masster 0.5.22__py3-none-any.whl → 0.5.24__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of masster might be problematic. Click here for more details.
- masster/_version.py +1 -1
- masster/logger.py +35 -19
- masster/sample/adducts.py +15 -29
- masster/sample/defaults/find_adducts_def.py +1 -3
- masster/sample/defaults/sample_def.py +4 -4
- masster/sample/h5.py +203 -361
- masster/sample/helpers.py +14 -30
- masster/sample/lib.py +3 -3
- masster/sample/load.py +21 -29
- masster/sample/plot.py +222 -132
- masster/sample/processing.py +42 -55
- masster/sample/sample.py +37 -46
- masster/sample/save.py +37 -61
- masster/sample/sciex.py +13 -11
- masster/sample/thermo.py +69 -74
- masster/spectrum.py +15 -15
- masster/study/analysis.py +650 -586
- masster/study/defaults/identify_def.py +1 -3
- masster/study/defaults/merge_def.py +6 -7
- masster/study/defaults/study_def.py +1 -5
- masster/study/export.py +35 -96
- masster/study/h5.py +134 -211
- masster/study/helpers.py +385 -459
- masster/study/id.py +239 -290
- masster/study/importers.py +84 -93
- masster/study/load.py +159 -178
- masster/study/merge.py +1112 -1098
- masster/study/plot.py +195 -149
- masster/study/processing.py +144 -191
- masster/study/save.py +14 -13
- masster/study/study.py +89 -130
- masster/wizard/wizard.py +764 -714
- {masster-0.5.22.dist-info → masster-0.5.24.dist-info}/METADATA +27 -1
- {masster-0.5.22.dist-info → masster-0.5.24.dist-info}/RECORD +37 -37
- {masster-0.5.22.dist-info → masster-0.5.24.dist-info}/WHEEL +0 -0
- {masster-0.5.22.dist-info → masster-0.5.24.dist-info}/entry_points.txt +0 -0
- {masster-0.5.22.dist-info → masster-0.5.24.dist-info}/licenses/LICENSE +0 -0
masster/sample/processing.py
CHANGED
|
@@ -253,8 +253,7 @@ def get_spectrum(self, scan, **kwargs):
|
|
|
253
253
|
spec=spect,
|
|
254
254
|
scan_uid=scan_uid,
|
|
255
255
|
feature_uid=scan_info["feature_uid"][0]
|
|
256
|
-
if "feature_uid" in scan_info
|
|
257
|
-
and scan_info["feature_uid"][0] is not None
|
|
256
|
+
if "feature_uid" in scan_info and scan_info["feature_uid"][0] is not None
|
|
258
257
|
else feature_uid,
|
|
259
258
|
q1_step=2,
|
|
260
259
|
deisotope=deisotope,
|
|
@@ -447,9 +446,7 @@ def _spec_to_mat(
|
|
|
447
446
|
closest_index = np.argmin(np.abs(ar2 - val1))
|
|
448
447
|
closest_indices.append((i, closest_index))
|
|
449
448
|
# filter out pairs that are not within the specified tolerance
|
|
450
|
-
closest_indices = [
|
|
451
|
-
(i, j) for i, j in closest_indices if np.abs(ar1[i] - ar2[j]) <= tol
|
|
452
|
-
]
|
|
449
|
+
closest_indices = [(i, j) for i, j in closest_indices if np.abs(ar1[i] - ar2[j]) <= tol]
|
|
453
450
|
# remove duplicates from the list of indices
|
|
454
451
|
closest_indices = list(set(closest_indices))
|
|
455
452
|
# sort the list of indices by the first element (i) in ascending order
|
|
@@ -621,8 +618,7 @@ def find_features(self, **kwargs):
|
|
|
621
618
|
mtd_par.setValue("noise_threshold_int", float(params.get("noise")))
|
|
622
619
|
mtd_par.setValue(
|
|
623
620
|
"min_trace_length",
|
|
624
|
-
float(params.get("min_trace_length_multiplier"))
|
|
625
|
-
* float(params.get("chrom_fwhm_min")),
|
|
621
|
+
float(params.get("min_trace_length_multiplier")) * float(params.get("chrom_fwhm_min")),
|
|
626
622
|
)
|
|
627
623
|
mtd_par.setValue(
|
|
628
624
|
"trace_termination_outliers",
|
|
@@ -801,7 +797,7 @@ def find_features(self, **kwargs):
|
|
|
801
797
|
)
|
|
802
798
|
|
|
803
799
|
self.features_df = df
|
|
804
|
-
#self._features_sync()
|
|
800
|
+
# self._features_sync()
|
|
805
801
|
self.logger.success(f"Feature detection completed. Total features: {len(df)}")
|
|
806
802
|
|
|
807
803
|
# store params
|
|
@@ -1134,9 +1130,7 @@ def find_ms2(self, **kwargs):
|
|
|
1134
1130
|
feature_rt_start = features_subset.select("rt_start").to_numpy().flatten()
|
|
1135
1131
|
feature_rt_end = features_subset.select("rt_end").to_numpy().flatten()
|
|
1136
1132
|
feature_uids = features_subset.select("feature_uid").to_numpy().flatten()
|
|
1137
|
-
feature_indices = (
|
|
1138
|
-
features_subset.with_row_index().select("index").to_numpy().flatten()
|
|
1139
|
-
)
|
|
1133
|
+
feature_indices = features_subset.with_row_index().select("index").to_numpy().flatten()
|
|
1140
1134
|
|
|
1141
1135
|
# Pre-compute RT radius for all features
|
|
1142
1136
|
rt_radius = np.minimum(feature_rt - feature_rt_start, feature_rt_end - feature_rt)
|
|
@@ -1283,16 +1277,16 @@ def find_ms2(self, **kwargs):
|
|
|
1283
1277
|
|
|
1284
1278
|
def find_iso(self, rt_tolerance: float = 0.1, **kwargs):
|
|
1285
1279
|
"""Extract isotopic distributions from MS1 data and add to features_df.
|
|
1286
|
-
|
|
1280
|
+
|
|
1287
1281
|
This method processes each feature to find isotopic distributions from MS1 data,
|
|
1288
1282
|
similar to the study.find_iso() method but for individual samples. The method
|
|
1289
1283
|
adds a new 'ms1_spec' column to features_df containing numpy arrays with
|
|
1290
1284
|
isotopic distribution data.
|
|
1291
|
-
|
|
1285
|
+
|
|
1292
1286
|
Args:
|
|
1293
1287
|
rt_tolerance (float): RT tolerance in minutes for matching MS1 scans. Default 0.1.
|
|
1294
1288
|
**kwargs: Additional parameters
|
|
1295
|
-
|
|
1289
|
+
|
|
1296
1290
|
Notes:
|
|
1297
1291
|
- Adds a new 'ms1_spec' column to features_df containing numpy arrays
|
|
1298
1292
|
- Each array contains [mz, intensity] pairs for the isotopic distribution
|
|
@@ -1302,11 +1296,11 @@ def find_iso(self, rt_tolerance: float = 0.1, **kwargs):
|
|
|
1302
1296
|
if self.features_df is None or self.features_df.is_empty():
|
|
1303
1297
|
self.logger.warning("No features found. Run find_features() first.")
|
|
1304
1298
|
return
|
|
1305
|
-
|
|
1299
|
+
|
|
1306
1300
|
if self.ms1_df is None or self.ms1_df.is_empty():
|
|
1307
1301
|
self.logger.warning("No MS1 data found.")
|
|
1308
1302
|
return
|
|
1309
|
-
|
|
1303
|
+
|
|
1310
1304
|
# Check if ms1_spec column already exists
|
|
1311
1305
|
if "ms1_spec" in self.features_df.columns:
|
|
1312
1306
|
features_without_spec = self.features_df.filter(pl.col("ms1_spec").is_null())
|
|
@@ -1316,9 +1310,7 @@ def find_iso(self, rt_tolerance: float = 0.1, **kwargs):
|
|
|
1316
1310
|
self.logger.info(f"Processing {len(features_without_spec)} features without isotopic distributions.")
|
|
1317
1311
|
else:
|
|
1318
1312
|
# Add the ms1_spec column with None values
|
|
1319
|
-
self.features_df = self.features_df.with_columns(
|
|
1320
|
-
pl.lit(None, dtype=pl.Object).alias("ms1_spec")
|
|
1321
|
-
)
|
|
1313
|
+
self.features_df = self.features_df.with_columns(pl.lit(None, dtype=pl.Object).alias("ms1_spec"))
|
|
1322
1314
|
features_without_spec = self.features_df
|
|
1323
1315
|
self.logger.info(f"Processing {len(features_without_spec)} features for isotopic distributions.")
|
|
1324
1316
|
|
|
@@ -1336,60 +1328,59 @@ def find_iso(self, rt_tolerance: float = 0.1, **kwargs):
|
|
|
1336
1328
|
6.02010,
|
|
1337
1329
|
7.02345,
|
|
1338
1330
|
])
|
|
1339
|
-
|
|
1331
|
+
|
|
1340
1332
|
# Convert rt_tolerance from minutes to seconds
|
|
1341
1333
|
rt_tolerance_s = rt_tolerance * 60
|
|
1342
|
-
|
|
1334
|
+
|
|
1343
1335
|
# Process each feature
|
|
1344
1336
|
ms1_specs = []
|
|
1345
1337
|
feature_indices = []
|
|
1346
|
-
|
|
1347
|
-
for i, row in enumerate(
|
|
1348
|
-
|
|
1349
|
-
|
|
1350
|
-
|
|
1338
|
+
|
|
1339
|
+
for i, row in enumerate(
|
|
1340
|
+
tqdm(
|
|
1341
|
+
features_without_spec.rows(named=True),
|
|
1342
|
+
desc=f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')[:-3]} | INFO | {self.log_label}Extracting isotope patterns",
|
|
1343
|
+
)
|
|
1344
|
+
):
|
|
1351
1345
|
feature_rt = row["rt"]
|
|
1352
1346
|
feature_mz = row["mz"]
|
|
1353
|
-
|
|
1347
|
+
|
|
1354
1348
|
# Find MS1 scans within RT tolerance
|
|
1355
|
-
rt_mask = (
|
|
1356
|
-
|
|
1357
|
-
(self.ms1_df["rt"] <= (feature_rt + rt_tolerance_s))
|
|
1349
|
+
rt_mask = (self.ms1_df["rt"] >= (feature_rt - rt_tolerance_s)) & (
|
|
1350
|
+
self.ms1_df["rt"] <= (feature_rt + rt_tolerance_s)
|
|
1358
1351
|
)
|
|
1359
1352
|
ms1_in_range = self.ms1_df.filter(rt_mask)
|
|
1360
|
-
|
|
1353
|
+
|
|
1361
1354
|
if ms1_in_range.is_empty():
|
|
1362
1355
|
ms1_specs.append(None)
|
|
1363
1356
|
feature_indices.append(row["feature_uid"])
|
|
1364
1357
|
continue
|
|
1365
|
-
|
|
1358
|
+
|
|
1366
1359
|
# Extract isotopic pattern
|
|
1367
1360
|
isotope_pattern = []
|
|
1368
|
-
|
|
1361
|
+
|
|
1369
1362
|
# Start with the monoisotopic peak (M+0)
|
|
1370
1363
|
base_intensity = 0
|
|
1371
1364
|
mz_tolerance = 0.01 # 10 ppm at 1000 Da
|
|
1372
|
-
|
|
1365
|
+
|
|
1373
1366
|
# Find the base peak intensity
|
|
1374
|
-
base_mask = (
|
|
1375
|
-
|
|
1376
|
-
(ms1_in_range["mz"] <= (feature_mz + mz_tolerance))
|
|
1367
|
+
base_mask = (ms1_in_range["mz"] >= (feature_mz - mz_tolerance)) & (
|
|
1368
|
+
ms1_in_range["mz"] <= (feature_mz + mz_tolerance)
|
|
1377
1369
|
)
|
|
1378
1370
|
base_peaks = ms1_in_range.filter(base_mask)
|
|
1379
|
-
|
|
1371
|
+
|
|
1380
1372
|
if not base_peaks.is_empty():
|
|
1381
1373
|
base_intensity = base_peaks["inty"].max()
|
|
1382
1374
|
isotope_pattern.append([feature_mz, base_intensity])
|
|
1383
|
-
|
|
1375
|
+
|
|
1384
1376
|
# Look for isotope peaks
|
|
1385
1377
|
for shift in isotope_shifts:
|
|
1386
1378
|
isotope_mz = feature_mz + shift
|
|
1387
|
-
isotope_mask = (
|
|
1388
|
-
|
|
1389
|
-
(ms1_in_range["mz"] <= (isotope_mz + mz_tolerance))
|
|
1379
|
+
isotope_mask = (ms1_in_range["mz"] >= (isotope_mz - mz_tolerance)) & (
|
|
1380
|
+
ms1_in_range["mz"] <= (isotope_mz + mz_tolerance)
|
|
1390
1381
|
)
|
|
1391
1382
|
isotope_peaks = ms1_in_range.filter(isotope_mask)
|
|
1392
|
-
|
|
1383
|
+
|
|
1393
1384
|
if not isotope_peaks.is_empty():
|
|
1394
1385
|
max_intensity = isotope_peaks["inty"].max()
|
|
1395
1386
|
# Only keep isotope peaks that are at least 1% of base peak
|
|
@@ -1397,29 +1388,25 @@ def find_iso(self, rt_tolerance: float = 0.1, **kwargs):
|
|
|
1397
1388
|
# Get the mz of the most intense peak
|
|
1398
1389
|
max_peak = isotope_peaks.filter(pl.col("inty") == max_intensity).row(0, named=True)
|
|
1399
1390
|
isotope_pattern.append([max_peak["mz"], max_intensity])
|
|
1400
|
-
|
|
1391
|
+
|
|
1401
1392
|
# Convert to numpy array or None if empty
|
|
1402
1393
|
if len(isotope_pattern) > 1: # Need at least 2 points (monoisotopic + 1 isotope)
|
|
1403
1394
|
ms1_spec = np.array(isotope_pattern, dtype=np.float64)
|
|
1404
1395
|
else:
|
|
1405
1396
|
ms1_spec = None
|
|
1406
|
-
|
|
1397
|
+
|
|
1407
1398
|
ms1_specs.append(ms1_spec)
|
|
1408
1399
|
feature_indices.append(row["feature_uid"])
|
|
1409
|
-
|
|
1400
|
+
|
|
1410
1401
|
# Update the features_df with the isotopic spectra
|
|
1411
1402
|
update_df = pl.DataFrame({
|
|
1412
1403
|
"feature_uid": feature_indices,
|
|
1413
|
-
"ms1_spec_new": pl.Series("ms1_spec_new", ms1_specs, dtype=pl.Object)
|
|
1404
|
+
"ms1_spec_new": pl.Series("ms1_spec_new", ms1_specs, dtype=pl.Object),
|
|
1414
1405
|
})
|
|
1415
|
-
|
|
1406
|
+
|
|
1416
1407
|
# Join and update
|
|
1417
1408
|
self.features_df = (
|
|
1418
|
-
self.features_df.join(
|
|
1419
|
-
update_df,
|
|
1420
|
-
on="feature_uid",
|
|
1421
|
-
how="left"
|
|
1422
|
-
)
|
|
1409
|
+
self.features_df.join(update_df, on="feature_uid", how="left")
|
|
1423
1410
|
.with_columns([
|
|
1424
1411
|
pl.when(pl.col("ms1_spec_new").is_not_null())
|
|
1425
1412
|
.then(pl.col("ms1_spec_new"))
|
|
@@ -1428,11 +1415,11 @@ def find_iso(self, rt_tolerance: float = 0.1, **kwargs):
|
|
|
1428
1415
|
])
|
|
1429
1416
|
.drop("ms1_spec_new")
|
|
1430
1417
|
)
|
|
1431
|
-
|
|
1418
|
+
|
|
1432
1419
|
# Log results
|
|
1433
1420
|
non_null_count = len([spec for spec in ms1_specs if spec is not None])
|
|
1434
1421
|
self.logger.success(f"Extracted isotopic distributions for {non_null_count}/{len(ms1_specs)} features.")
|
|
1435
|
-
|
|
1422
|
+
|
|
1436
1423
|
# Store parameters in history
|
|
1437
1424
|
params_dict = {"rt_tolerance": rt_tolerance}
|
|
1438
1425
|
params_dict.update(kwargs)
|
masster/sample/sample.py
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
"""
|
|
2
2
|
sample.py - Mass Spectrometry Sample Analysis Module
|
|
3
3
|
|
|
4
|
-
This module provides comprehensive tools for processing and analyzing Data-Dependent Acquisition (DDA)
|
|
5
|
-
mass spectrometry data. It defines the `Sample` class, which offers methods to load, process, analyze,
|
|
4
|
+
This module provides comprehensive tools for processing and analyzing Data-Dependent Acquisition (DDA)
|
|
5
|
+
mass spectrometry data. It defines the `Sample` class, which offers methods to load, process, analyze,
|
|
6
6
|
and visualize mass spectrometry data from various file formats.
|
|
7
7
|
|
|
8
8
|
Supported File Formats:
|
|
@@ -31,7 +31,7 @@ Core Dependencies:
|
|
|
31
31
|
- `h5py`: HDF5 file format support for Sample5 files
|
|
32
32
|
|
|
33
33
|
Classes:
|
|
34
|
-
Sample: Main class for handling DDA mass spectrometry data, providing methods for
|
|
34
|
+
Sample: Main class for handling DDA mass spectrometry data, providing methods for
|
|
35
35
|
data import, processing, analysis, and visualization.
|
|
36
36
|
|
|
37
37
|
Typical Workflow:
|
|
@@ -43,43 +43,43 @@ Typical Workflow:
|
|
|
43
43
|
|
|
44
44
|
Example Usage:
|
|
45
45
|
Basic analysis workflow:
|
|
46
|
-
|
|
46
|
+
|
|
47
47
|
```python
|
|
48
48
|
from masster.sample import Sample
|
|
49
|
-
|
|
49
|
+
|
|
50
50
|
# Load a mass spectrometry file
|
|
51
51
|
sample = Sample(filename="experiment.mzML")
|
|
52
|
-
|
|
52
|
+
|
|
53
53
|
# Detect features
|
|
54
54
|
sample.find_features()
|
|
55
|
-
|
|
55
|
+
|
|
56
56
|
# Find MS2 spectra for features
|
|
57
57
|
sample.find_ms2()
|
|
58
|
-
|
|
58
|
+
|
|
59
59
|
# Generate 2D visualization
|
|
60
60
|
sample.plot_2d()
|
|
61
|
-
|
|
61
|
+
|
|
62
62
|
# Export results
|
|
63
63
|
sample.export_features("features.xlsx")
|
|
64
64
|
```
|
|
65
|
-
|
|
65
|
+
|
|
66
66
|
Advanced usage with custom parameters:
|
|
67
|
-
|
|
67
|
+
|
|
68
68
|
```python
|
|
69
69
|
from masster.sample import Sample
|
|
70
70
|
from masster.sample.defaults import sample_defaults, find_features_defaults
|
|
71
|
-
|
|
71
|
+
|
|
72
72
|
# Create custom parameters
|
|
73
73
|
params = sample_defaults(log_level="DEBUG", label="My Experiment")
|
|
74
74
|
ff_params = find_features_defaults(noise_threshold_int=1000)
|
|
75
|
-
|
|
75
|
+
|
|
76
76
|
# Initialize with custom parameters
|
|
77
77
|
sample = Sample(params=params)
|
|
78
78
|
sample.load("data.raw")
|
|
79
|
-
|
|
79
|
+
|
|
80
80
|
# Feature detection with custom parameters
|
|
81
81
|
sample.find_features(params=ff_params)
|
|
82
|
-
|
|
82
|
+
|
|
83
83
|
# Generate comprehensive statistics
|
|
84
84
|
stats = sample.get_dda_stats()
|
|
85
85
|
sample.plot_dda_stats()
|
|
@@ -275,7 +275,7 @@ class Sample:
|
|
|
275
275
|
save = save
|
|
276
276
|
find_features = find_features
|
|
277
277
|
find_adducts = find_adducts
|
|
278
|
-
_get_adducts= _get_adducts
|
|
278
|
+
_get_adducts = _get_adducts
|
|
279
279
|
find_iso = find_iso
|
|
280
280
|
find_ms2 = find_ms2
|
|
281
281
|
get_spectrum = get_spectrum
|
|
@@ -348,45 +348,44 @@ class Sample:
|
|
|
348
348
|
|
|
349
349
|
def __dir__(self):
|
|
350
350
|
"""
|
|
351
|
-
Custom __dir__ implementation to hide internal methods starting with '_'
|
|
352
|
-
and backward compatibility aliases from tab completion and dir() calls,
|
|
351
|
+
Custom __dir__ implementation to hide internal methods starting with '_'
|
|
352
|
+
and backward compatibility aliases from tab completion and dir() calls,
|
|
353
353
|
while keeping them accessible to class methods.
|
|
354
|
-
|
|
354
|
+
|
|
355
355
|
Returns:
|
|
356
356
|
list: List of public attribute and method names (excluding internal and deprecated methods)
|
|
357
357
|
"""
|
|
358
358
|
# Define backward compatibility aliases to hide
|
|
359
359
|
backward_compatibility_aliases = {
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
360
|
+
"load_study", # deprecated alias for _load_ms1
|
|
361
|
+
"filter_features", # alias for filter (deprecated naming)
|
|
362
|
+
"select_features", # alias for select (deprecated naming)
|
|
363
|
+
"features_filter", # confusing duplicate of filter
|
|
364
|
+
"features_select", # confusing duplicate of select
|
|
365
|
+
"merge_defaults", # alias for find_features_defaults (confusing)
|
|
366
|
+
"plot_feature_stats", # backward compatibility for plot_features_stats
|
|
367
|
+
"store_history", # deprecated alias for update_history
|
|
368
368
|
}
|
|
369
|
-
|
|
369
|
+
|
|
370
370
|
# Get all attributes from the class
|
|
371
371
|
all_attrs = set()
|
|
372
|
-
|
|
372
|
+
|
|
373
373
|
# Add attributes from the class and all its bases
|
|
374
374
|
for cls in self.__class__.__mro__:
|
|
375
375
|
all_attrs.update(cls.__dict__.keys())
|
|
376
|
-
|
|
376
|
+
|
|
377
377
|
# Add instance attributes
|
|
378
378
|
all_attrs.update(self.__dict__.keys())
|
|
379
|
-
|
|
379
|
+
|
|
380
380
|
# Filter out attributes starting with '_' (but keep special methods like __init__, __str__, etc.)
|
|
381
381
|
# Also filter out backward compatibility aliases
|
|
382
382
|
public_attrs = [
|
|
383
|
-
attr for attr in all_attrs
|
|
384
|
-
if not attr.startswith('_') or attr.startswith('__') and attr.endswith('__')
|
|
383
|
+
attr for attr in all_attrs if not attr.startswith("_") or attr.startswith("__") and attr.endswith("__")
|
|
385
384
|
]
|
|
386
|
-
|
|
385
|
+
|
|
387
386
|
# Remove backward compatibility aliases from the public attributes
|
|
388
387
|
public_attrs = [attr for attr in public_attrs if attr not in backward_compatibility_aliases]
|
|
389
|
-
|
|
388
|
+
|
|
390
389
|
return sorted(public_attrs)
|
|
391
390
|
|
|
392
391
|
def logger_update(
|
|
@@ -442,10 +441,7 @@ class Sample:
|
|
|
442
441
|
|
|
443
442
|
# Get all currently loaded modules that are part of the sample package
|
|
444
443
|
for module_name in sys.modules:
|
|
445
|
-
if (
|
|
446
|
-
module_name.startswith(sample_module_prefix)
|
|
447
|
-
and module_name != current_module
|
|
448
|
-
):
|
|
444
|
+
if module_name.startswith(sample_module_prefix) and module_name != current_module:
|
|
449
445
|
sample_modules.append(module_name)
|
|
450
446
|
|
|
451
447
|
# Add core masster modules
|
|
@@ -461,15 +457,10 @@ class Sample:
|
|
|
461
457
|
study_modules = []
|
|
462
458
|
study_module_prefix = f"{base_modname}.study."
|
|
463
459
|
for module_name in sys.modules:
|
|
464
|
-
if (
|
|
465
|
-
module_name.startswith(study_module_prefix)
|
|
466
|
-
and module_name != current_module
|
|
467
|
-
):
|
|
460
|
+
if module_name.startswith(study_module_prefix) and module_name != current_module:
|
|
468
461
|
study_modules.append(module_name)
|
|
469
462
|
|
|
470
|
-
all_modules_to_reload =
|
|
471
|
-
core_modules + sample_modules + study_modules
|
|
472
|
-
)
|
|
463
|
+
all_modules_to_reload = core_modules + sample_modules + study_modules
|
|
473
464
|
|
|
474
465
|
# Reload all discovered modules
|
|
475
466
|
for full_module_name in all_modules_to_reload:
|
masster/sample/save.py
CHANGED
|
@@ -105,7 +105,8 @@ def save(self, filename=None):
|
|
|
105
105
|
self._save_sample5(filename=filename)
|
|
106
106
|
self.file_path = filename
|
|
107
107
|
|
|
108
|
-
|
|
108
|
+
|
|
109
|
+
"""
|
|
109
110
|
def _save_featureXML(self, filename="features.featureXML"):
|
|
110
111
|
if self._oms_features_map is None:
|
|
111
112
|
self.logger.warning("No features found.")
|
|
@@ -114,7 +115,9 @@ def _save_featureXML(self, filename="features.featureXML"):
|
|
|
114
115
|
fh.store(filename, self._oms_features_map)
|
|
115
116
|
self.logger.debug(f"Features Map saved to {filename}")
|
|
116
117
|
|
|
117
|
-
|
|
118
|
+
"""
|
|
119
|
+
|
|
120
|
+
|
|
118
121
|
def export_features(self, filename="features.csv"):
|
|
119
122
|
"""
|
|
120
123
|
Export the features DataFrame to a CSV or Excel file.
|
|
@@ -140,11 +143,7 @@ def export_features(self, filename="features.csv"):
|
|
|
140
143
|
(pl.col("ms2_scans").is_not_null()).alias("has_ms2"),
|
|
141
144
|
)
|
|
142
145
|
clean_df = self.features_df.select(
|
|
143
|
-
[
|
|
144
|
-
col
|
|
145
|
-
for col in self.features_df.columns
|
|
146
|
-
if self.features_df[col].dtype not in (pl.List, pl.Object)
|
|
147
|
-
],
|
|
146
|
+
[col for col in self.features_df.columns if self.features_df[col].dtype not in (pl.List, pl.Object)],
|
|
148
147
|
)
|
|
149
148
|
if filename.lower().endswith((".xls", ".xlsx")):
|
|
150
149
|
clean_df.to_pandas().to_excel(filename, index=False)
|
|
@@ -231,7 +230,7 @@ def export_mgf(
|
|
|
231
230
|
if rt_end is not None:
|
|
232
231
|
features = features.filter(pl.col("rt") <= rt_end)
|
|
233
232
|
# Note: We no longer filter out features without MS2 data here since we want to export
|
|
234
|
-
# MS1 spectra for ALL features with isotope data. The MS2 filtering is done in the
|
|
233
|
+
# MS1 spectra for ALL features with isotope data. The MS2 filtering is done in the
|
|
235
234
|
# second pass where we specifically check for ms2_scans.
|
|
236
235
|
|
|
237
236
|
# Convert to list of dictionaries for faster iteration
|
|
@@ -269,26 +268,26 @@ def export_mgf(
|
|
|
269
268
|
def write_ion(f, title, fuid, fid, mz, rt, charge, spect):
|
|
270
269
|
if spect is None:
|
|
271
270
|
return "none"
|
|
272
|
-
|
|
271
|
+
|
|
273
272
|
# For MSLEVEL=2 ions, don't write empty spectra
|
|
274
273
|
ms_level = spect.ms_level if spect.ms_level is not None else 1
|
|
275
274
|
if ms_level > 1 and (len(spect.mz) == 0 or len(spect.inty) == 0):
|
|
276
275
|
return "empty_ms2"
|
|
277
|
-
|
|
276
|
+
|
|
278
277
|
# Create dynamic title based on MS level
|
|
279
278
|
if ms_level == 1:
|
|
280
279
|
# MS1: uid, rt, mz
|
|
281
280
|
dynamic_title = f"uid:{fuid}, rt:{rt:.2f}, mz:{mz:.4f}"
|
|
282
281
|
else:
|
|
283
282
|
# MS2: uid, rt, mz, energy
|
|
284
|
-
energy = spect.energy if hasattr(spect,
|
|
283
|
+
energy = spect.energy if hasattr(spect, "energy") else 0
|
|
285
284
|
dynamic_title = f"uid:{fuid}, rt:{rt:.2f}, mz:{mz:.4f}, energy:{energy}"
|
|
286
|
-
|
|
285
|
+
|
|
287
286
|
f.write(f"BEGIN IONS\nTITLE={dynamic_title}\n")
|
|
288
287
|
f.write(f"FEATURE_UID={fuid}\n")
|
|
289
288
|
f.write(f"FEATURE_ID={fid}\n")
|
|
290
289
|
f.write(f"CHARGE={charge}\nPEPMASS={mz}\nRTINSECONDS={rt}\n")
|
|
291
|
-
|
|
290
|
+
|
|
292
291
|
if spect.ms_level is None:
|
|
293
292
|
f.write("MSLEVEL=1\n")
|
|
294
293
|
# Add PRECURSORINTENSITY for MS1 spectra
|
|
@@ -301,15 +300,12 @@ def export_mgf(
|
|
|
301
300
|
if spect.ms_level == 1 and len(spect.inty) > 0:
|
|
302
301
|
precursor_intensity = max(spect.inty)
|
|
303
302
|
f.write(f"PRECURSORINTENSITY={precursor_intensity:.0f}\n")
|
|
304
|
-
|
|
303
|
+
|
|
305
304
|
if spect.ms_level is not None:
|
|
306
305
|
if spect.ms_level > 1 and hasattr(spect, "energy"):
|
|
307
306
|
f.write(f"ENERGY={spect.energy}\n")
|
|
308
307
|
# Use list comprehension for better performance
|
|
309
|
-
peak_lines = [
|
|
310
|
-
f"{mz_val:.5f} {inty_val:.0f}\n"
|
|
311
|
-
for mz_val, inty_val in zip(spect.mz, spect.inty, strict=False)
|
|
312
|
-
]
|
|
308
|
+
peak_lines = [f"{mz_val:.5f} {inty_val:.0f}\n" for mz_val, inty_val in zip(spect.mz, spect.inty, strict=False)]
|
|
313
309
|
f.writelines(peak_lines)
|
|
314
310
|
f.write("END IONS\n\n")
|
|
315
311
|
return "written"
|
|
@@ -322,8 +318,7 @@ def export_mgf(
|
|
|
322
318
|
|
|
323
319
|
# count how many features have charge < 0
|
|
324
320
|
if (
|
|
325
|
-
self.features_df.filter(pl.col("charge") < 0).shape[0]
|
|
326
|
-
- self.features_df.filter(pl.col("charge") > 0).shape[0]
|
|
321
|
+
self.features_df.filter(pl.col("charge") < 0).shape[0] - self.features_df.filter(pl.col("charge") > 0).shape[0]
|
|
327
322
|
> 0
|
|
328
323
|
):
|
|
329
324
|
preferred_charge = -1
|
|
@@ -342,7 +337,7 @@ def export_mgf(
|
|
|
342
337
|
filename = os.path.abspath(filename)
|
|
343
338
|
with open(filename, "w", encoding="utf-8") as f:
|
|
344
339
|
tdqm_disable = self.log_level not in ["TRACE", "DEBUG", "INFO"]
|
|
345
|
-
|
|
340
|
+
|
|
346
341
|
# First pass: Export MS1 spectra for ALL features with ms1_spec data
|
|
347
342
|
for row in tqdm(
|
|
348
343
|
features_list,
|
|
@@ -362,19 +357,15 @@ def export_mgf(
|
|
|
362
357
|
if "ms1_spec" in row and row["ms1_spec"] is not None:
|
|
363
358
|
# Create spectrum from ms1_spec isotope pattern data
|
|
364
359
|
from masster.spectrum import Spectrum
|
|
365
|
-
|
|
360
|
+
|
|
366
361
|
iso_data = row["ms1_spec"]
|
|
367
362
|
if len(iso_data) >= 2: # Ensure we have mz and intensity arrays
|
|
368
363
|
ms1_mz = iso_data[0]
|
|
369
364
|
ms1_inty = iso_data[1]
|
|
370
|
-
|
|
365
|
+
|
|
371
366
|
# Create a Spectrum object from the isotope data
|
|
372
|
-
spect = Spectrum(
|
|
373
|
-
|
|
374
|
-
inty=np.array(ms1_inty),
|
|
375
|
-
ms_level=1
|
|
376
|
-
)
|
|
377
|
-
|
|
367
|
+
spect = Spectrum(mz=np.array(ms1_mz), inty=np.array(ms1_inty), ms_level=1)
|
|
368
|
+
|
|
378
369
|
charge = preferred_charge
|
|
379
370
|
if row["charge"] is not None and row["charge"] != 0:
|
|
380
371
|
charge = row["charge"]
|
|
@@ -395,7 +386,7 @@ def export_mgf(
|
|
|
395
386
|
else:
|
|
396
387
|
# No MS1 spectrum exported for features without ms1_spec data
|
|
397
388
|
ms1_fallback_count += 1
|
|
398
|
-
|
|
389
|
+
|
|
399
390
|
# Second pass: Export MS2 spectra for features with MS2 data
|
|
400
391
|
for row in tqdm(
|
|
401
392
|
features_list,
|
|
@@ -453,9 +444,7 @@ def export_mgf(
|
|
|
453
444
|
q1_max=q1_ratio_max,
|
|
454
445
|
)
|
|
455
446
|
# Get the corresponding scan_uid from the list
|
|
456
|
-
current_scan_uid = (
|
|
457
|
-
scan_uids[i] if i < len(scan_uids) else "unknown"
|
|
458
|
-
)
|
|
447
|
+
current_scan_uid = scan_uids[i] if i < len(scan_uids) else "unknown"
|
|
459
448
|
result = write_ion(
|
|
460
449
|
f,
|
|
461
450
|
f"uid:{feature_uid}",
|
|
@@ -580,18 +569,14 @@ def export_mgf(
|
|
|
580
569
|
spect = spect.centroid(
|
|
581
570
|
tolerance=self.parameters["mz_tol_ms1_da"],
|
|
582
571
|
ppm=self.parameters["mz_tol_ms1_ppm"],
|
|
583
|
-
min_points=self.parameters[
|
|
584
|
-
"centroid_min_points_ms1"
|
|
585
|
-
],
|
|
572
|
+
min_points=self.parameters["centroid_min_points_ms1"],
|
|
586
573
|
algo=centroid_algo,
|
|
587
574
|
)
|
|
588
575
|
elif spect.ms_level == 2:
|
|
589
576
|
spect = spect.centroid(
|
|
590
577
|
tolerance=self.parameters["mz_tol_ms2_da"],
|
|
591
578
|
ppm=self.parameters["mz_tol_ms2_ppm"],
|
|
592
|
-
min_points=self.parameters[
|
|
593
|
-
"centroid_min_points_ms2"
|
|
594
|
-
],
|
|
579
|
+
min_points=self.parameters["centroid_min_points_ms2"],
|
|
595
580
|
algo=centroid_algo,
|
|
596
581
|
)
|
|
597
582
|
if deisotope:
|
|
@@ -654,7 +639,7 @@ def export_mgf(
|
|
|
654
639
|
self.logger.info(f"Skipped {empty_ms2_count} empty MS2 spectra")
|
|
655
640
|
if ms1_fallback_count > 0:
|
|
656
641
|
self.logger.info(f"Skipped MS1 export for {ms1_fallback_count} features without isotope patterns")
|
|
657
|
-
|
|
642
|
+
|
|
658
643
|
# Handle None values in logging
|
|
659
644
|
inty_min_str = f"{inty_min:.3f}" if inty_min != float("-inf") else "None"
|
|
660
645
|
q1_ratio_min_str = f"{q1_ratio_min:.3f}" if q1_ratio_min is not None else "None"
|
|
@@ -695,9 +680,7 @@ def export_dda_stats(self, filename="stats.csv"):
|
|
|
695
680
|
ms2_count = len(self.scans_df.filter(pl.col("ms_level") == 2))
|
|
696
681
|
features_count = len(self.features_df) if self.features_df is not None else 0
|
|
697
682
|
features_with_ms2 = (
|
|
698
|
-
self.features_df.filter(pl.col("ms2_scans").is_not_null()).height
|
|
699
|
-
if self.features_df is not None
|
|
700
|
-
else 0
|
|
683
|
+
self.features_df.filter(pl.col("ms2_scans").is_not_null()).height if self.features_df is not None else 0
|
|
701
684
|
)
|
|
702
685
|
|
|
703
686
|
# Initialize a dictionary to hold statistics
|
|
@@ -712,9 +695,7 @@ def export_dda_stats(self, filename="stats.csv"):
|
|
|
712
695
|
if "time_cycle" in self.scans_df.columns:
|
|
713
696
|
ms1_df = self.scans_df.filter(pl.col("ms_level") == 1)
|
|
714
697
|
avg_cycle_time = ms1_df["time_cycle"].mean()
|
|
715
|
-
stats["Average_cycle_time"] =
|
|
716
|
-
avg_cycle_time if avg_cycle_time is not None else ""
|
|
717
|
-
)
|
|
698
|
+
stats["Average_cycle_time"] = avg_cycle_time if avg_cycle_time is not None else ""
|
|
718
699
|
else:
|
|
719
700
|
stats["Average_cycle_time"] = 0
|
|
720
701
|
|
|
@@ -851,32 +832,27 @@ def export_xlsx(self, filename="features.xlsx"):
|
|
|
851
832
|
return
|
|
852
833
|
|
|
853
834
|
# Validate filename extension
|
|
854
|
-
if not filename.lower().endswith((
|
|
835
|
+
if not filename.lower().endswith((".xlsx", ".xls")):
|
|
855
836
|
raise ValueError("Filename must end with '.xlsx' or '.xls' for Excel export")
|
|
856
|
-
|
|
837
|
+
|
|
857
838
|
filename = os.path.abspath(filename)
|
|
858
|
-
|
|
839
|
+
|
|
859
840
|
# Clone the DataFrame to avoid modifying the original
|
|
860
841
|
clean_df = self.features_df.clone()
|
|
861
|
-
|
|
842
|
+
|
|
862
843
|
# Add a column has_ms2=True if column ms2_scans is not None
|
|
863
844
|
if "ms2_scans" in clean_df.columns:
|
|
864
|
-
clean_df = clean_df.with_columns(
|
|
865
|
-
|
|
866
|
-
)
|
|
867
|
-
|
|
845
|
+
clean_df = clean_df.with_columns((pl.col("ms2_scans").is_not_null()).alias("has_ms2"))
|
|
846
|
+
|
|
868
847
|
# Filter out columns with List or Object data types that can't be exported to Excel
|
|
869
|
-
exportable_columns = [
|
|
870
|
-
|
|
871
|
-
if clean_df[col].dtype not in (pl.List, pl.Object)
|
|
872
|
-
]
|
|
873
|
-
|
|
848
|
+
exportable_columns = [col for col in clean_df.columns if clean_df[col].dtype not in (pl.List, pl.Object)]
|
|
849
|
+
|
|
874
850
|
clean_df = clean_df.select(exportable_columns)
|
|
875
|
-
|
|
851
|
+
|
|
876
852
|
# Convert to pandas and export to Excel
|
|
877
853
|
pandas_df = clean_df.to_pandas()
|
|
878
854
|
pandas_df.to_excel(filename, index=False)
|
|
879
|
-
|
|
855
|
+
|
|
880
856
|
self.logger.success(f"Features exported to {filename} (Excel format)")
|
|
881
857
|
self.logger.debug(f"Exported {len(clean_df)} features with {len(exportable_columns)} columns")
|
|
882
858
|
|