masster 0.3.10__py3-none-any.whl → 0.3.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of masster might be problematic. Click here for more details.

masster/sample/sample.py CHANGED
@@ -333,15 +333,15 @@ class Sample:
333
333
  if module_name.startswith(study_module_prefix) and module_name != current_module:
334
334
  study_modules.append(module_name)
335
335
 
336
- ''' # Add parameters submodules
336
+ """ # Add parameters submodules
337
337
  parameters_modules = []
338
338
  parameters_module_prefix = f"{base_modname}.parameters."
339
339
  for module_name in sys.modules:
340
340
  if module_name.startswith(parameters_module_prefix) and module_name != current_module:
341
341
  parameters_modules.append(module_name)
342
- '''
343
-
344
- all_modules_to_reload = core_modules + sample_modules + study_modules #+ parameters_modules
342
+ """
343
+
344
+ all_modules_to_reload = core_modules + sample_modules + study_modules # + parameters_modules
345
345
 
346
346
  # Reload all discovered modules
347
347
  for full_module_name in all_modules_to_reload:
@@ -1,44 +1,41 @@
1
1
  {
2
2
  "features_df": {
3
3
  "columns": {
4
- "feature_uid": {
5
- "dtype": "pl.Int64"
6
- },
7
- "feature_id": {
4
+ "adduct": {
8
5
  "dtype": "pl.Utf8"
9
6
  },
10
- "mz": {
11
- "dtype": "pl.Float64"
7
+ "adduct_group": {
8
+ "dtype": "pl.Int64"
12
9
  },
13
- "rt": {
10
+ "adduct_mass": {
14
11
  "dtype": "pl.Float64"
15
12
  },
16
- "rt_original": {
17
- "dtype": "pl.Float64"
13
+ "charge": {
14
+ "dtype": "pl.Int32"
18
15
  },
19
- "rt_start": {
20
- "dtype": "pl.Float64"
16
+ "chrom": {
17
+ "dtype": "pl.Object"
21
18
  },
22
- "rt_end": {
19
+ "chrom_coherence": {
23
20
  "dtype": "pl.Float64"
24
21
  },
25
- "rt_delta": {
22
+ "chrom_height_scaled": {
26
23
  "dtype": "pl.Float64"
27
24
  },
28
- "mz_start": {
25
+ "chrom_prominence": {
29
26
  "dtype": "pl.Float64"
30
27
  },
31
- "mz_end": {
28
+ "chrom_prominence_scaled": {
32
29
  "dtype": "pl.Float64"
33
30
  },
34
- "inty": {
35
- "dtype": "pl.Float64"
31
+ "feature_id": {
32
+ "dtype": "pl.Utf8"
36
33
  },
37
- "quality": {
38
- "dtype": "pl.Float64"
34
+ "feature_uid": {
35
+ "dtype": "pl.Int64"
39
36
  },
40
- "charge": {
41
- "dtype": "pl.Int32"
37
+ "inty": {
38
+ "dtype": "pl.Float64"
42
39
  },
43
40
  "iso": {
44
41
  "dtype": "pl.Int64"
@@ -46,35 +43,38 @@
46
43
  "iso_of": {
47
44
  "dtype": "pl.Int64"
48
45
  },
49
- "adduct_group": {
50
- "dtype": "pl.Int64"
46
+ "ms2_scans": {
47
+ "dtype": "pl.Object"
51
48
  },
52
- "adduct": {
53
- "dtype": "pl.Utf8"
49
+ "ms2_specs": {
50
+ "dtype": "pl.Object"
54
51
  },
55
- "adduct_mass": {
52
+ "mz": {
56
53
  "dtype": "pl.Float64"
57
54
  },
58
- "chrom": {
59
- "dtype": "pl.Object"
55
+ "mz_end": {
56
+ "dtype": "pl.Float64"
60
57
  },
61
- "chrom_coherence": {
58
+ "mz_start": {
62
59
  "dtype": "pl.Float64"
63
60
  },
64
- "chrom_prominence": {
61
+ "quality": {
65
62
  "dtype": "pl.Float64"
66
63
  },
67
- "chrom_prominence_scaled": {
64
+ "rt": {
68
65
  "dtype": "pl.Float64"
69
66
  },
70
- "chrom_height_scaled": {
67
+ "rt_delta": {
71
68
  "dtype": "pl.Float64"
72
69
  },
73
- "ms2_scans": {
74
- "dtype": "pl.Object"
70
+ "rt_end": {
71
+ "dtype": "pl.Float64"
75
72
  },
76
- "ms2_specs": {
77
- "dtype": "pl.Object"
73
+ "rt_original": {
74
+ "dtype": "pl.Float64"
75
+ },
76
+ "rt_start": {
77
+ "dtype": "pl.Float64"
78
78
  }
79
79
  }
80
80
  },
@@ -100,64 +100,64 @@
100
100
  },
101
101
  "scans_df": {
102
102
  "columns": {
103
- "scan_uid": {
104
- "dtype": "pl.Int64"
103
+ "bl": {
104
+ "dtype": "pl.Float64"
105
+ },
106
+ "comment": {
107
+ "dtype": "pl.Utf8"
105
108
  },
106
109
  "cycle": {
107
110
  "dtype": "pl.Int64"
108
111
  },
109
- "ms_level": {
112
+ "energy": {
113
+ "dtype": "pl.Float64"
114
+ },
115
+ "feature_uid": {
110
116
  "dtype": "pl.Int64"
111
117
  },
112
- "rt": {
113
- "dtype": "pl.Float64"
118
+ "id": {
119
+ "dtype": "pl.Utf8"
114
120
  },
115
- "inty_tot": {
121
+ "inty_max": {
116
122
  "dtype": "pl.Float64"
117
123
  },
118
124
  "inty_min": {
119
125
  "dtype": "pl.Float64"
120
126
  },
121
- "inty_max": {
127
+ "inty_tot": {
122
128
  "dtype": "pl.Float64"
123
129
  },
124
- "bl": {
125
- "dtype": "pl.Float64"
130
+ "ms2_n": {
131
+ "dtype": "pl.Int64"
126
132
  },
127
- "mz_min": {
128
- "dtype": "pl.Float64"
133
+ "ms_level": {
134
+ "dtype": "pl.Int64"
129
135
  },
130
136
  "mz_max": {
131
137
  "dtype": "pl.Float64"
132
138
  },
133
- "comment": {
134
- "dtype": "pl.Utf8"
139
+ "mz_min": {
140
+ "dtype": "pl.Float64"
135
141
  },
136
142
  "name": {
137
143
  "dtype": "pl.Utf8"
138
144
  },
139
- "id": {
140
- "dtype": "pl.Utf8"
141
- },
142
- "prec_mz": {
145
+ "prec_inty": {
143
146
  "dtype": "pl.Float64"
144
147
  },
145
- "prec_mz_min": {
148
+ "prec_mz": {
146
149
  "dtype": "pl.Float64"
147
150
  },
148
151
  "prec_mz_max": {
149
152
  "dtype": "pl.Float64"
150
153
  },
151
- "prec_inty": {
154
+ "prec_mz_min": {
152
155
  "dtype": "pl.Float64"
153
156
  },
154
- "energy": {
157
+ "rt": {
155
158
  "dtype": "pl.Float64"
156
159
  },
157
- "feature_uid": {
158
- "dtype": "pl.Int64"
159
- },
160
- "ms2_n": {
160
+ "scan_uid": {
161
161
  "dtype": "pl.Int64"
162
162
  },
163
163
  "time_cycle": {
@@ -169,10 +169,10 @@
169
169
  "time_ms1_to_ms2": {
170
170
  "dtype": "pl.Float64"
171
171
  },
172
- "time_ms2_to_ms2": {
172
+ "time_ms2_to_ms1": {
173
173
  "dtype": "pl.Float64"
174
174
  },
175
- "time_ms2_to_ms1": {
175
+ "time_ms2_to_ms2": {
176
176
  "dtype": "pl.Float64"
177
177
  }
178
178
  }
masster/sample/save.py CHANGED
@@ -134,10 +134,10 @@ def export_features(self, filename="features.csv"):
134
134
  # clone df
135
135
  clean_df = self.features_df.clone()
136
136
  filename = os.path.abspath(filename)
137
- # add a column has_ms2=True if colum ms2_scans is not None
137
+ # add a column has_ms2=True if column ms2_scans is not None
138
138
  if "ms2_scans" in clean_df.columns:
139
139
  clean_df = clean_df.with_columns(
140
- (pl.col("ms2_scans").is_not_null()).alias("has_ms2")
140
+ (pl.col("ms2_scans").is_not_null()).alias("has_ms2"),
141
141
  )
142
142
  clean_df = self.features_df.select([
143
143
  col for col in self.features_df.columns if self.features_df[col].dtype not in (pl.List, pl.Object)
@@ -215,7 +215,7 @@ def export_mgf(
215
215
  return
216
216
  else:
217
217
  self.features_df = self.features.get_df()
218
-
218
+
219
219
  # Apply filtering at DataFrame level for better performance
220
220
  features = self.features_df
221
221
  if mz_start is not None:
@@ -228,7 +228,7 @@ def export_mgf(
228
228
  features = features.filter(pl.col("rt") <= rt_end)
229
229
  if not include_all_ms1:
230
230
  features = features.filter(pl.col("ms2_scans").is_not_null())
231
-
231
+
232
232
  # Convert to list of dictionaries for faster iteration
233
233
  features_list = features.to_dicts()
234
234
 
@@ -286,7 +286,10 @@ def export_mgf(
286
286
  centroid_algo = "cr"
287
287
 
288
288
  # count how many features have charge < 0
289
- if self.features_df.filter(pl.col("charge") < 0).shape[0]- self.features_df.filter(pl.col("charge") > 0).shape[0] > 0:
289
+ if (
290
+ self.features_df.filter(pl.col("charge") < 0).shape[0] - self.features_df.filter(pl.col("charge") > 0).shape[0]
291
+ > 0
292
+ ):
290
293
  preferred_charge = -1
291
294
  else:
292
295
  preferred_charge = 1
@@ -312,7 +315,7 @@ def export_mgf(
312
315
  rt = row["rt"]
313
316
  rt_str = f"{rt:.2f}"
314
317
  mz_str = f"{mz:.4f}"
315
-
318
+
316
319
  # Filtering is now done at DataFrame level, so we can skip these checks
317
320
  if row["ms2_scans"] is None and not include_all_ms1:
318
321
  skip = skip + 1
@@ -338,7 +341,7 @@ def export_mgf(
338
341
 
339
342
  charge = preferred_charge
340
343
  if row["charge"] is not None and row["charge"] != 0:
341
- charge = row["charge"]
344
+ charge = row["charge"]
342
345
 
343
346
  write_ion(
344
347
  f,
@@ -397,7 +400,7 @@ def export_mgf(
397
400
  )
398
401
  c += 1
399
402
  continue # Skip the rest of the processing for this feature
400
-
403
+
401
404
  # If we reach here, either use_cache=False or no cached spectra were available
402
405
  if split_energy:
403
406
  # get energy of all scans with scan_uid in ms2_scans by fetching them
@@ -408,20 +411,20 @@ def export_mgf(
408
411
  for scan_uid in ms2_scan_uids:
409
412
  spec = self.get_spectrum(scan_uid)
410
413
  if spec is not None:
411
- spectra_with_energy.append((scan_uid, spec.energy if hasattr(spec, 'energy') else 0))
412
-
414
+ spectra_with_energy.append((scan_uid, spec.energy if hasattr(spec, "energy") else 0))
415
+
413
416
  # Group by energy
414
417
  energy_groups: dict[float, list[int]] = {}
415
418
  for scan_uid, energy in spectra_with_energy:
416
419
  if energy not in energy_groups:
417
420
  energy_groups[energy] = []
418
421
  energy_groups[energy].append(scan_uid)
419
-
422
+
420
423
  for energy, scan_uids_for_energy in energy_groups.items():
421
424
  if selection == "best":
422
425
  # Keep only the first scan for this energy
423
426
  scan_uids_for_energy = [scan_uids_for_energy[0]]
424
-
427
+
425
428
  for scan_uid in scan_uids_for_energy:
426
429
  spect = self.get_spectrum(
427
430
  scan_uid,
@@ -556,7 +559,7 @@ def export_mgf(
556
559
  inty_min_str = f"{inty_min:.3f}" if inty_min != float("-inf") else "None"
557
560
  q1_ratio_min_str = f"{q1_ratio_min:.3f}" if q1_ratio_min is not None else "None"
558
561
  eic_corr_min_str = f"{eic_corr_min:.3f}" if eic_corr_min is not None else "None"
559
-
562
+
560
563
  self.logger.debug(
561
564
  f"MGF created with int>{inty_min_str}, q1_ratio>{q1_ratio_min_str}, eic_corr>{eic_corr_min_str}",
562
565
  )