masster 0.5.8__py3-none-any.whl → 0.5.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of masster might be problematic. Click here for more details.
- masster/_version.py +1 -1
- masster/logger.py +58 -43
- masster/sample/adducts.py +2 -2
- masster/sample/h5.py +1 -1
- masster/sample/helpers.py +47 -15
- masster/sample/plot.py +706 -578
- masster/sample/processing.py +4 -4
- masster/sample/sample.py +91 -48
- masster/sample/save.py +5 -5
- masster/study/h5.py +32 -14
- masster/study/helpers.py +27 -8
- masster/study/id.py +3 -3
- masster/study/load.py +1 -164
- masster/study/merge.py +6 -12
- masster/study/plot.py +105 -35
- masster/study/processing.py +7 -7
- masster/study/study5_schema.json +3 -0
- {masster-0.5.8.dist-info → masster-0.5.10.dist-info}/METADATA +3 -1
- {masster-0.5.8.dist-info → masster-0.5.10.dist-info}/RECORD +22 -22
- {masster-0.5.8.dist-info → masster-0.5.10.dist-info}/WHEEL +0 -0
- {masster-0.5.8.dist-info → masster-0.5.10.dist-info}/entry_points.txt +0 -0
- {masster-0.5.8.dist-info → masster-0.5.10.dist-info}/licenses/LICENSE +0 -0
masster/sample/processing.py
CHANGED
|
@@ -796,7 +796,7 @@ def find_features(self, **kwargs):
|
|
|
796
796
|
|
|
797
797
|
self.features_df = df
|
|
798
798
|
#self._features_sync()
|
|
799
|
-
self.logger.
|
|
799
|
+
self.logger.success(f"Feature detection completed. Total features: {len(df)}")
|
|
800
800
|
|
|
801
801
|
# store params
|
|
802
802
|
self.update_history(["find_features"], params.to_dict())
|
|
@@ -1263,8 +1263,8 @@ def find_ms2(self, **kwargs):
|
|
|
1263
1263
|
)
|
|
1264
1264
|
|
|
1265
1265
|
# Log completion
|
|
1266
|
-
self.logger.
|
|
1267
|
-
f"MS2 linking completed.
|
|
1266
|
+
self.logger.success(
|
|
1267
|
+
f"MS2 linking completed. Features with MS2 data: {c}.",
|
|
1268
1268
|
)
|
|
1269
1269
|
self.features_df = features_df
|
|
1270
1270
|
|
|
@@ -1425,7 +1425,7 @@ def find_iso(self, rt_tolerance: float = 0.1, **kwargs):
|
|
|
1425
1425
|
|
|
1426
1426
|
# Log results
|
|
1427
1427
|
non_null_count = len([spec for spec in ms1_specs if spec is not None])
|
|
1428
|
-
self.logger.
|
|
1428
|
+
self.logger.success(f"Extracted isotopic distributions for {non_null_count}/{len(ms1_specs)} features.")
|
|
1429
1429
|
|
|
1430
1430
|
# Store parameters in history
|
|
1431
1431
|
params_dict = {"rt_tolerance": rt_tolerance}
|
masster/sample/sample.py
CHANGED
|
@@ -1,35 +1,98 @@
|
|
|
1
1
|
"""
|
|
2
|
-
sample.py
|
|
2
|
+
sample.py - Mass Spectrometry Sample Analysis Module
|
|
3
3
|
|
|
4
|
-
This module provides tools for processing and analyzing Data-Dependent Acquisition (DDA)
|
|
5
|
-
It defines the `Sample` class, which offers methods to load, process, analyze,
|
|
6
|
-
|
|
4
|
+
This module provides comprehensive tools for processing and analyzing Data-Dependent Acquisition (DDA)
|
|
5
|
+
mass spectrometry data. It defines the `Sample` class, which offers methods to load, process, analyze,
|
|
6
|
+
and visualize mass spectrometry data from various file formats.
|
|
7
|
+
|
|
8
|
+
Supported File Formats:
|
|
9
|
+
- mzML (standard XML format for mass spectrometry data)
|
|
10
|
+
- Thermo RAW (native Thermo Fisher Scientific format)
|
|
11
|
+
- Sciex WIFF (native Sciex format)
|
|
12
|
+
- Sample5 (MASSter's native HDF5-based format for optimized storage)
|
|
7
13
|
|
|
8
14
|
Key Features:
|
|
9
|
-
- **File Handling**: Load and save data in multiple formats
|
|
10
|
-
- **Feature Detection**: Detect and process mass spectrometry features
|
|
11
|
-
- **Spectrum Analysis**: Retrieve and analyze MS1/MS2 spectra
|
|
12
|
-
- **
|
|
13
|
-
- **
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
-
|
|
17
|
-
-
|
|
18
|
-
-
|
|
19
|
-
|
|
15
|
+
- **File Handling**: Load and save data in multiple formats with automatic format detection
|
|
16
|
+
- **Feature Detection**: Detect and process mass spectrometry features using advanced algorithms
|
|
17
|
+
- **Spectrum Analysis**: Retrieve and analyze MS1/MS2 spectra with comprehensive metadata
|
|
18
|
+
- **Adduct Detection**: Find and annotate adducts and in-source fragments
|
|
19
|
+
- **Isotope Analysis**: Detect and process isotopic patterns
|
|
20
|
+
- **Chromatogram Extraction**: Extract and analyze chromatograms (EIC, BPC, TIC)
|
|
21
|
+
- **Visualization**: Generate interactive and static plots for spectra, chromatograms, and 2D maps
|
|
22
|
+
- **Statistics**: Compute and export detailed DDA run statistics and quality metrics
|
|
23
|
+
- **Data Export**: Export processed data to various formats (XLSX, MGF, etc.)
|
|
24
|
+
- **Memory Management**: Efficient handling of large datasets with on-disk storage options
|
|
25
|
+
|
|
26
|
+
Core Dependencies:
|
|
27
|
+
- `pyopenms`: OpenMS library for file handling and feature detection algorithms
|
|
28
|
+
- `polars`: High-performance data manipulation and analysis
|
|
29
|
+
- `numpy`: Numerical computations and array operations
|
|
30
|
+
- `bokeh`, `panel`, `holoviews`, `datashader`: Interactive visualizations and dashboards
|
|
31
|
+
- `h5py`: HDF5 file format support for Sample5 files
|
|
20
32
|
|
|
21
33
|
Classes:
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
Example Usage:
|
|
25
|
-
```python
|
|
26
|
-
from masster.sample import Sample
|
|
34
|
+
Sample: Main class for handling DDA mass spectrometry data, providing methods for
|
|
35
|
+
data import, processing, analysis, and visualization.
|
|
27
36
|
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
37
|
+
Typical Workflow:
|
|
38
|
+
1. Load mass spectrometry data file
|
|
39
|
+
2. Detect features using find_features()
|
|
40
|
+
3. Optionally find MS2 spectra with find_ms2()
|
|
41
|
+
4. Analyze and visualize results
|
|
42
|
+
5. Export processed data
|
|
32
43
|
|
|
44
|
+
Example Usage:
|
|
45
|
+
Basic analysis workflow:
|
|
46
|
+
|
|
47
|
+
```python
|
|
48
|
+
from masster.sample import Sample
|
|
49
|
+
|
|
50
|
+
# Load a mass spectrometry file
|
|
51
|
+
sample = Sample(filename="experiment.mzML")
|
|
52
|
+
|
|
53
|
+
# Detect features
|
|
54
|
+
sample.find_features()
|
|
55
|
+
|
|
56
|
+
# Find MS2 spectra for features
|
|
57
|
+
sample.find_ms2()
|
|
58
|
+
|
|
59
|
+
# Generate 2D visualization
|
|
60
|
+
sample.plot_2d()
|
|
61
|
+
|
|
62
|
+
# Export results
|
|
63
|
+
sample.export_features("features.xlsx")
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
Advanced usage with custom parameters:
|
|
67
|
+
|
|
68
|
+
```python
|
|
69
|
+
from masster.sample import Sample
|
|
70
|
+
from masster.sample.defaults import sample_defaults, find_features_defaults
|
|
71
|
+
|
|
72
|
+
# Create custom parameters
|
|
73
|
+
params = sample_defaults(log_level="DEBUG", label="My Experiment")
|
|
74
|
+
ff_params = find_features_defaults(noise_threshold_int=1000)
|
|
75
|
+
|
|
76
|
+
# Initialize with custom parameters
|
|
77
|
+
sample = Sample(params=params)
|
|
78
|
+
sample.load("data.raw")
|
|
79
|
+
|
|
80
|
+
# Feature detection with custom parameters
|
|
81
|
+
sample.find_features(params=ff_params)
|
|
82
|
+
|
|
83
|
+
# Generate comprehensive statistics
|
|
84
|
+
stats = sample.get_dda_stats()
|
|
85
|
+
sample.plot_dda_stats()
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
Notes:
|
|
89
|
+
- The Sample class maintains processing history and parameters for reproducibility
|
|
90
|
+
- Large files can be processed with on-disk storage to manage memory usage
|
|
91
|
+
- All visualizations are interactive by default and can be exported as static images
|
|
92
|
+
- The module supports both individual sample analysis and batch processing workflows
|
|
93
|
+
|
|
94
|
+
Version: Part of the MASSter mass spectrometry analysis framework
|
|
95
|
+
Author: Zamboni Lab, ETH Zurich
|
|
33
96
|
"""
|
|
34
97
|
|
|
35
98
|
import importlib
|
|
@@ -49,16 +112,12 @@ from masster.sample.defaults.get_spectrum_def import get_spectrum_defaults
|
|
|
49
112
|
|
|
50
113
|
# Sample-specific imports - keeping these private, only for internal use
|
|
51
114
|
from masster.sample.h5 import _load_sample5
|
|
52
|
-
# from masster.sample.h5 import _load_sample5_study
|
|
53
115
|
from masster.sample.h5 import _save_sample5
|
|
54
|
-
# from masster.sample.helpers import _delete_ms2
|
|
55
116
|
from masster.sample.helpers import _estimate_memory_usage
|
|
56
117
|
from masster.sample.helpers import _get_scan_uids
|
|
57
118
|
from masster.sample.helpers import _get_feature_uids
|
|
58
|
-
# from masster.sample.helpers import _features_sync - made internal only
|
|
59
119
|
from masster.sample.adducts import find_adducts
|
|
60
120
|
from masster.sample.adducts import _get_adducts
|
|
61
|
-
# Removed _get_adducts - only used in study modules
|
|
62
121
|
from masster.sample.helpers import features_delete
|
|
63
122
|
from masster.sample.helpers import features_filter
|
|
64
123
|
from masster.sample.helpers import features_select
|
|
@@ -70,23 +129,17 @@ from masster.sample.helpers import get_eic
|
|
|
70
129
|
from masster.sample.helpers import set_source
|
|
71
130
|
from masster.sample.helpers import _recreate_feature_map
|
|
72
131
|
from masster.sample.helpers import _get_feature_map
|
|
73
|
-
# Load functions - keeping only specific ones needed for external API
|
|
74
|
-
# from masster.sample.load import _load_featureXML - made internal only
|
|
75
|
-
# from masster.sample.load import _load_ms2data - made internal only
|
|
76
|
-
# from masster.sample.load import _load_mzML - made internal only
|
|
77
|
-
# from masster.sample.load import _load_raw - made internal only
|
|
78
|
-
# from masster.sample.load import _load_wiff - made internal only
|
|
79
132
|
from masster.sample.load import chrom_extract
|
|
80
133
|
from masster.sample.load import _index_file
|
|
81
134
|
from masster.sample.load import load
|
|
82
135
|
from masster.sample.load import load_noms1
|
|
83
|
-
from masster.sample.load import _load_ms1
|
|
136
|
+
from masster.sample.load import _load_ms1
|
|
84
137
|
from masster.sample.load import sanitize
|
|
85
138
|
from masster.sample.plot import plot_2d
|
|
86
139
|
from masster.sample.plot import plot_2d_oracle
|
|
87
140
|
from masster.sample.plot import plot_dda_stats
|
|
88
141
|
from masster.sample.plot import plot_chrom
|
|
89
|
-
from masster.sample.plot import plot_features_stats
|
|
142
|
+
from masster.sample.plot import plot_features_stats
|
|
90
143
|
from masster.sample.plot import plot_ms2_cycle
|
|
91
144
|
from masster.sample.plot import plot_ms2_eic
|
|
92
145
|
from masster.sample.plot import plot_ms2_q1
|
|
@@ -113,7 +166,6 @@ from masster.sample.save import export_features
|
|
|
113
166
|
from masster.sample.save import export_mgf
|
|
114
167
|
from masster.sample.save import export_xlsx
|
|
115
168
|
from masster.sample.save import save
|
|
116
|
-
# Removed internal-only import: _save_featureXML
|
|
117
169
|
|
|
118
170
|
|
|
119
171
|
class Sample:
|
|
@@ -402,6 +454,7 @@ class Sample:
|
|
|
402
454
|
f"{base_modname}.chromatogram",
|
|
403
455
|
f"{base_modname}.spectrum",
|
|
404
456
|
f"{base_modname}.logger",
|
|
457
|
+
f"{base_modname}.lib",
|
|
405
458
|
]
|
|
406
459
|
|
|
407
460
|
# Add study submodules
|
|
@@ -414,17 +467,9 @@ class Sample:
|
|
|
414
467
|
):
|
|
415
468
|
study_modules.append(module_name)
|
|
416
469
|
|
|
417
|
-
""" # Add parameters submodules
|
|
418
|
-
parameters_modules = []
|
|
419
|
-
parameters_module_prefix = f"{base_modname}.parameters."
|
|
420
|
-
for module_name in sys.modules:
|
|
421
|
-
if module_name.startswith(parameters_module_prefix) and module_name != current_module:
|
|
422
|
-
parameters_modules.append(module_name)
|
|
423
|
-
"""
|
|
424
|
-
|
|
425
470
|
all_modules_to_reload = (
|
|
426
471
|
core_modules + sample_modules + study_modules
|
|
427
|
-
)
|
|
472
|
+
)
|
|
428
473
|
|
|
429
474
|
# Reload all discovered modules
|
|
430
475
|
for full_module_name in all_modules_to_reload:
|
|
@@ -466,8 +511,6 @@ class Sample:
|
|
|
466
511
|
else:
|
|
467
512
|
str += "Features: 0\n"
|
|
468
513
|
str += "Features with MS2 spectra: 0\n"
|
|
469
|
-
|
|
470
|
-
# estimate memory usage
|
|
471
514
|
mem_usage = self._estimate_memory_usage()
|
|
472
515
|
str += f"Estimated memory usage: {mem_usage:.2f} MB\n"
|
|
473
516
|
|
masster/sample/save.py
CHANGED
|
@@ -148,10 +148,10 @@ def export_features(self, filename="features.csv"):
|
|
|
148
148
|
)
|
|
149
149
|
if filename.lower().endswith((".xls", ".xlsx")):
|
|
150
150
|
clean_df.to_pandas().to_excel(filename, index=False)
|
|
151
|
-
self.logger.
|
|
151
|
+
self.logger.success(f"Features exported to {filename} (Excel format)")
|
|
152
152
|
else:
|
|
153
153
|
clean_df.write_csv(filename)
|
|
154
|
-
self.logger.
|
|
154
|
+
self.logger.success(f"Features exported to {filename}")
|
|
155
155
|
|
|
156
156
|
|
|
157
157
|
def export_mgf(
|
|
@@ -649,7 +649,7 @@ def export_mgf(
|
|
|
649
649
|
elif result == "empty_ms2":
|
|
650
650
|
empty_ms2_count += 1
|
|
651
651
|
|
|
652
|
-
self.logger.
|
|
652
|
+
self.logger.success(f"Exported {ms1_spec_used_count} MS1 spectra and {c} MS2 spectra to {filename}")
|
|
653
653
|
if empty_ms2_count > 0:
|
|
654
654
|
self.logger.info(f"Skipped {empty_ms2_count} empty MS2 spectra")
|
|
655
655
|
if ms1_fallback_count > 0:
|
|
@@ -824,7 +824,7 @@ def export_dda_stats(self, filename="stats.csv"):
|
|
|
824
824
|
for line in lines:
|
|
825
825
|
f.write(line + "\n")
|
|
826
826
|
|
|
827
|
-
self.logger.
|
|
827
|
+
self.logger.success(f"DDA statistics exported to {filename}")
|
|
828
828
|
|
|
829
829
|
|
|
830
830
|
def export_xlsx(self, filename="features.xlsx"):
|
|
@@ -877,7 +877,7 @@ def export_xlsx(self, filename="features.xlsx"):
|
|
|
877
877
|
pandas_df = clean_df.to_pandas()
|
|
878
878
|
pandas_df.to_excel(filename, index=False)
|
|
879
879
|
|
|
880
|
-
self.logger.
|
|
880
|
+
self.logger.success(f"Features exported to {filename} (Excel format)")
|
|
881
881
|
self.logger.debug(f"Exported {len(clean_df)} features with {len(exportable_columns)} columns")
|
|
882
882
|
|
|
883
883
|
|
masster/study/h5.py
CHANGED
|
@@ -818,6 +818,19 @@ def _reorder_columns_by_schema(
|
|
|
818
818
|
|
|
819
819
|
def _create_dataframe_with_objects(data: dict, object_columns: list) -> pl.DataFrame:
|
|
820
820
|
"""Create DataFrame handling Object columns properly."""
|
|
821
|
+
# First check all data for numpy object arrays and move them to object columns
|
|
822
|
+
additional_object_cols = []
|
|
823
|
+
for k, v in data.items():
|
|
824
|
+
if k not in object_columns and hasattr(v, 'dtype') and str(v.dtype) == 'object':
|
|
825
|
+
# This is a numpy object array that should be treated as object
|
|
826
|
+
additional_object_cols.append(k)
|
|
827
|
+
object_columns.append(k)
|
|
828
|
+
|
|
829
|
+
if additional_object_cols:
|
|
830
|
+
# Re-run reconstruction for these columns
|
|
831
|
+
for col in additional_object_cols:
|
|
832
|
+
data[col] = _reconstruct_object_column(data[col], col)
|
|
833
|
+
|
|
821
834
|
object_data = {k: v for k, v in data.items() if k in object_columns}
|
|
822
835
|
regular_data = {k: v for k, v in data.items() if k not in object_columns}
|
|
823
836
|
|
|
@@ -1103,11 +1116,18 @@ def _load_dataframe_from_group(
|
|
|
1103
1116
|
logger.info(f"Loading extra column '{col}' not in schema for {df_name}")
|
|
1104
1117
|
column_data = group[col][:]
|
|
1105
1118
|
|
|
1106
|
-
#
|
|
1107
|
-
|
|
1108
|
-
|
|
1119
|
+
# Check if this is a known object column by name
|
|
1120
|
+
known_object_columns = {"ms1_spec", "chrom", "ms2_scans", "ms2_specs", "spec", "adducts", "iso"}
|
|
1121
|
+
is_known_object = col in known_object_columns
|
|
1122
|
+
|
|
1123
|
+
if is_known_object:
|
|
1124
|
+
# Known object column, always reconstruct
|
|
1125
|
+
data[col] = _reconstruct_object_column(column_data, col)
|
|
1126
|
+
if col not in object_columns:
|
|
1127
|
+
object_columns.append(col)
|
|
1128
|
+
elif len(column_data) > 0 and isinstance(column_data[0], bytes):
|
|
1109
1129
|
try:
|
|
1110
|
-
# Check if it looks like JSON
|
|
1130
|
+
# Check if it looks like JSON for unknown columns
|
|
1111
1131
|
test_decode = column_data[0].decode("utf-8")
|
|
1112
1132
|
if test_decode.startswith("[") or test_decode.startswith("{"):
|
|
1113
1133
|
# Looks like JSON, treat as object column
|
|
@@ -1738,9 +1758,7 @@ def _save_study5(self, filename):
|
|
|
1738
1758
|
)
|
|
1739
1759
|
pbar.update(1)
|
|
1740
1760
|
|
|
1741
|
-
self.logger.
|
|
1742
|
-
self.logger.debug(f"Save completed for {filename}")
|
|
1743
|
-
self.logger.debug(f"Save completed for {filename}")
|
|
1761
|
+
self.logger.success(f"Study saved to {filename}")
|
|
1744
1762
|
|
|
1745
1763
|
|
|
1746
1764
|
def _load_study5(self, filename=None):
|
|
@@ -1859,7 +1877,7 @@ def _load_study5(self, filename=None):
|
|
|
1859
1877
|
)
|
|
1860
1878
|
else:
|
|
1861
1879
|
self.logger.debug(
|
|
1862
|
-
"
|
|
1880
|
+
"Updated parameters from loaded history",
|
|
1863
1881
|
)
|
|
1864
1882
|
else:
|
|
1865
1883
|
self.logger.debug(
|
|
@@ -2093,8 +2111,8 @@ def _load_study5(self, filename=None):
|
|
|
2093
2111
|
# Ensure the column is Int64 type
|
|
2094
2112
|
self.samples_df = self.samples_df.cast({"map_id": pl.Int64})
|
|
2095
2113
|
|
|
2096
|
-
self.logger.
|
|
2097
|
-
f"
|
|
2114
|
+
self.logger.debug(
|
|
2115
|
+
f"Sanitized {sample_count} samples to indexed map_id format (0 to {sample_count - 1})",
|
|
2098
2116
|
)
|
|
2099
2117
|
|
|
2100
2118
|
# Sanitize null feature_id and consensus_id values with new UIDs (same method as merge)
|
|
@@ -2218,7 +2236,7 @@ def _sanitize_nulls(self):
|
|
|
2218
2236
|
pl.Series("feature_id", feature_ids, dtype=pl.Utf8)
|
|
2219
2237
|
)
|
|
2220
2238
|
|
|
2221
|
-
self.logger.debug(f"
|
|
2239
|
+
self.logger.debug(f"Sanitized {null_feature_ids} feature_id values")
|
|
2222
2240
|
|
|
2223
2241
|
# Sanitize consensus_df consensus_id column
|
|
2224
2242
|
if hasattr(self, 'consensus_df') and self.consensus_df is not None and not self.consensus_df.is_empty():
|
|
@@ -2244,8 +2262,8 @@ def _sanitize_nulls(self):
|
|
|
2244
2262
|
self.consensus_df = self.consensus_df.with_columns(
|
|
2245
2263
|
pl.Series("consensus_id", consensus_ids, dtype=pl.Utf8)
|
|
2246
2264
|
)
|
|
2247
|
-
|
|
2248
|
-
self.logger.debug(f"
|
|
2265
|
+
|
|
2266
|
+
self.logger.debug(f"Sanitized {null_consensus_ids} consensus_id values")
|
|
2249
2267
|
|
|
2250
2268
|
# Sanitize rt_original in features_df by replacing null or NaN values with rt values
|
|
2251
2269
|
if hasattr(self, 'features_df') and self.features_df is not None and not self.features_df.is_empty():
|
|
@@ -2262,4 +2280,4 @@ def _sanitize_nulls(self):
|
|
|
2262
2280
|
.otherwise(pl.col("rt_original"))
|
|
2263
2281
|
.alias("rt_original")
|
|
2264
2282
|
)
|
|
2265
|
-
self.logger.debug(f"
|
|
2283
|
+
self.logger.debug(f"Sanitized {null_or_nan_rt_original} rt_original values")
|
masster/study/helpers.py
CHANGED
|
@@ -1440,7 +1440,7 @@ def compress(self, features=True, ms2=True, chrom=False, ms2_max=5):
|
|
|
1440
1440
|
self.compress_ms2(max_replicates=ms2_max)
|
|
1441
1441
|
if chrom:
|
|
1442
1442
|
self.compress_chrom()
|
|
1443
|
-
self.logger.
|
|
1443
|
+
self.logger.success("Compression completed")
|
|
1444
1444
|
|
|
1445
1445
|
|
|
1446
1446
|
def compress_features(self):
|
|
@@ -1630,7 +1630,7 @@ def restore_features(self, samples=None, maps=False):
|
|
|
1630
1630
|
self.logger.error(f"Failed to load sample {sample_name}: {e}")
|
|
1631
1631
|
continue
|
|
1632
1632
|
|
|
1633
|
-
self.logger.
|
|
1633
|
+
self.logger.success(
|
|
1634
1634
|
f"Completed restoring columns {columns_to_update} from {len(sample_uids)} samples",
|
|
1635
1635
|
)
|
|
1636
1636
|
|
|
@@ -1886,7 +1886,7 @@ def restore_chrom(self, samples=None, mz_tol=0.010, rt_tol=10.0):
|
|
|
1886
1886
|
self.logger.error(f"Failed to gap-fill sample {sample_name}: {e}")
|
|
1887
1887
|
continue
|
|
1888
1888
|
|
|
1889
|
-
self.logger.
|
|
1889
|
+
self.logger.success(f"Phase 2 complete: Gap-filled {filled_count} chromatograms")
|
|
1890
1890
|
|
|
1891
1891
|
# Final summary
|
|
1892
1892
|
final_non_null = self.features_df.filter(pl.col("chrom").is_not_null()).height
|
|
@@ -2051,7 +2051,7 @@ def sample_name_replace(self, replace_dict):
|
|
|
2051
2051
|
pl.Series("sample_name", new_names).alias("sample_name"),
|
|
2052
2052
|
)
|
|
2053
2053
|
|
|
2054
|
-
self.logger.
|
|
2054
|
+
self.logger.success(f"Successfully replaced {replaced_count} sample names")
|
|
2055
2055
|
|
|
2056
2056
|
|
|
2057
2057
|
def sample_name_reset(self):
|
|
@@ -2940,6 +2940,7 @@ def features_delete(self, features):
|
|
|
2940
2940
|
|
|
2941
2941
|
def consensus_select(
|
|
2942
2942
|
self,
|
|
2943
|
+
uid=None,
|
|
2943
2944
|
mz=None,
|
|
2944
2945
|
rt=None,
|
|
2945
2946
|
inty_mean=None,
|
|
@@ -2956,14 +2957,12 @@ def consensus_select(
|
|
|
2956
2957
|
rt_delta_mean=None,
|
|
2957
2958
|
id_top_score=None,
|
|
2958
2959
|
identified=None,
|
|
2959
|
-
# New adduct filter parameters
|
|
2960
2960
|
adduct_top=None,
|
|
2961
2961
|
adduct_charge_top=None,
|
|
2962
2962
|
adduct_mass_neutral_top=None,
|
|
2963
2963
|
adduct_mass_shift_top=None,
|
|
2964
2964
|
adduct_group=None,
|
|
2965
2965
|
adduct_of=None,
|
|
2966
|
-
# New identification filter parameters
|
|
2967
2966
|
id_top_name=None,
|
|
2968
2967
|
id_top_class=None,
|
|
2969
2968
|
id_top_adduct=None,
|
|
@@ -2976,6 +2975,11 @@ def consensus_select(
|
|
|
2976
2975
|
OPTIMIZED VERSION: Enhanced performance with lazy evaluation, vectorized operations, and efficient filtering.
|
|
2977
2976
|
|
|
2978
2977
|
Parameters:
|
|
2978
|
+
uid: consensus UID filter with flexible formats:
|
|
2979
|
+
- None: include all consensus features (default)
|
|
2980
|
+
- int: single specific consensus_uid
|
|
2981
|
+
- tuple: range of consensus_uids (consensus_uid_min, consensus_uid_max)
|
|
2982
|
+
- list: specific list of consensus_uid values
|
|
2979
2983
|
mz: m/z filter with flexible formats:
|
|
2980
2984
|
- float: m/z value ± default tolerance (uses study.parameters.eic_mz_tol)
|
|
2981
2985
|
- tuple (mz_min, mz_max): range where mz_max > mz_min
|
|
@@ -3023,7 +3027,7 @@ def consensus_select(
|
|
|
3023
3027
|
return pl.DataFrame()
|
|
3024
3028
|
|
|
3025
3029
|
# Early return optimization - check if any filters are provided
|
|
3026
|
-
filter_params = [mz, rt, inty_mean, consensus_uid, consensus_id, number_samples,
|
|
3030
|
+
filter_params = [uid, mz, rt, inty_mean, consensus_uid, consensus_id, number_samples,
|
|
3027
3031
|
number_ms2, quality, bl, chrom_coherence_mean, chrom_prominence_mean,
|
|
3028
3032
|
chrom_prominence_scaled_mean, chrom_height_scaled_mean,
|
|
3029
3033
|
rt_delta_mean, id_top_score, identified,
|
|
@@ -3044,6 +3048,21 @@ def consensus_select(
|
|
|
3044
3048
|
warnings = []
|
|
3045
3049
|
|
|
3046
3050
|
# Build all filter conditions efficiently
|
|
3051
|
+
# Handle uid parameter first (consensus_uid filter with flexible formats)
|
|
3052
|
+
if uid is not None:
|
|
3053
|
+
if isinstance(uid, int):
|
|
3054
|
+
# Single specific consensus_uid
|
|
3055
|
+
filter_conditions.append(pl.col("consensus_uid") == uid)
|
|
3056
|
+
elif isinstance(uid, tuple) and len(uid) == 2:
|
|
3057
|
+
# Range of consensus_uids (consensus_uid_min, consensus_uid_max)
|
|
3058
|
+
min_uid, max_uid = uid
|
|
3059
|
+
filter_conditions.append((pl.col("consensus_uid") >= min_uid) & (pl.col("consensus_uid") <= max_uid))
|
|
3060
|
+
elif isinstance(uid, list):
|
|
3061
|
+
# Specific list of consensus_uid values
|
|
3062
|
+
filter_conditions.append(pl.col("consensus_uid").is_in(uid))
|
|
3063
|
+
else:
|
|
3064
|
+
self.logger.warning(f"Invalid uid parameter type: {type(uid)}. Expected int, tuple, or list.")
|
|
3065
|
+
|
|
3047
3066
|
if mz is not None:
|
|
3048
3067
|
if isinstance(mz, tuple) and len(mz) == 2:
|
|
3049
3068
|
if mz[1] < mz[0]:
|
|
@@ -4622,7 +4641,7 @@ def decompress(self, features=True, ms2=True, chrom=True, samples=None, **kwargs
|
|
|
4622
4641
|
|
|
4623
4642
|
self.restore_ms2(samples=samples, **ms2_kwargs)
|
|
4624
4643
|
|
|
4625
|
-
self.logger.
|
|
4644
|
+
self.logger.success("Adaptive decompression completed successfully")
|
|
4626
4645
|
|
|
4627
4646
|
except Exception as e:
|
|
4628
4647
|
self.logger.error(f"Decompression failed: {e}")
|
masster/study/id.py
CHANGED
|
@@ -1093,7 +1093,7 @@ def id_reset(study):
|
|
|
1093
1093
|
del study.history["identify"]
|
|
1094
1094
|
|
|
1095
1095
|
if logger:
|
|
1096
|
-
logger.
|
|
1096
|
+
logger.success("Identification data reset completed")
|
|
1097
1097
|
|
|
1098
1098
|
|
|
1099
1099
|
def lib_reset(study):
|
|
@@ -1198,7 +1198,7 @@ def lib_reset(study):
|
|
|
1198
1198
|
del study.history["lib_to_consensus"]
|
|
1199
1199
|
|
|
1200
1200
|
if logger:
|
|
1201
|
-
logger.
|
|
1201
|
+
logger.success("Library and identification data reset completed")
|
|
1202
1202
|
|
|
1203
1203
|
|
|
1204
1204
|
def _get_adducts(study, adducts_list: list | None = None, **kwargs):
|
|
@@ -1978,4 +1978,4 @@ def lib_to_consensus(study, chrom_fhwm: float = 5.0, mz_tol: float = 0.01, rt_to
|
|
|
1978
1978
|
logger.warning(f"find_ms2 failed: {e}")
|
|
1979
1979
|
|
|
1980
1980
|
if logger:
|
|
1981
|
-
logger.
|
|
1981
|
+
logger.success(f"lib_to_consensus completed: {len(consensus_metadata)} features added")
|