masster 0.5.3__tar.gz → 0.5.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of masster might be problematic. Click here for more details.
- {masster-0.5.3 → masster-0.5.4}/PKG-INFO +1 -1
- {masster-0.5.3 → masster-0.5.4}/pyproject.toml +1 -1
- {masster-0.5.3 → masster-0.5.4}/src/masster/_version.py +1 -1
- {masster-0.5.3 → masster-0.5.4}/src/masster/sample/adducts.py +1 -1
- {masster-0.5.3 → masster-0.5.4}/src/masster/sample/h5.py +11 -11
- {masster-0.5.3 → masster-0.5.4}/src/masster/sample/helpers.py +2 -2
- {masster-0.5.3 → masster-0.5.4}/src/masster/sample/load.py +5 -4
- {masster-0.5.3 → masster-0.5.4}/src/masster/sample/processing.py +1 -1
- {masster-0.5.3 → masster-0.5.4}/src/masster/sample/sample.py +7 -3
- {masster-0.5.3 → masster-0.5.4}/src/masster/study/h5.py +70 -0
- {masster-0.5.3 → masster-0.5.4}/src/masster/study/plot.py +68 -51
- {masster-0.5.3 → masster-0.5.4}/uv.lock +1 -1
- {masster-0.5.3 → masster-0.5.4}/.github/workflows/publish.yml +0 -0
- {masster-0.5.3 → masster-0.5.4}/.github/workflows/security.yml +0 -0
- {masster-0.5.3 → masster-0.5.4}/.github/workflows/test.yml +0 -0
- {masster-0.5.3 → masster-0.5.4}/.gitignore +0 -0
- {masster-0.5.3 → masster-0.5.4}/.pre-commit-config.yaml +0 -0
- {masster-0.5.3 → masster-0.5.4}/LICENSE +0 -0
- {masster-0.5.3 → masster-0.5.4}/Makefile +0 -0
- {masster-0.5.3 → masster-0.5.4}/README.md +0 -0
- {masster-0.5.3 → masster-0.5.4}/TESTING.md +0 -0
- {masster-0.5.3 → masster-0.5.4}/demo/example_batch_process.py +0 -0
- {masster-0.5.3 → masster-0.5.4}/demo/example_sample_process.py +0 -0
- {masster-0.5.3 → masster-0.5.4}/src/masster/__init__.py +0 -0
- {masster-0.5.3 → masster-0.5.4}/src/masster/chromatogram.py +0 -0
- {masster-0.5.3 → masster-0.5.4}/src/masster/data/dda/20250530_VH_IQX_KW_RP_HSST3_100mm_12min_pos_v4_DDA_OT_C-MiLUT_QC_dil2_01_20250602151849.sample5 +0 -0
- {masster-0.5.3 → masster-0.5.4}/src/masster/data/dda/20250530_VH_IQX_KW_RP_HSST3_100mm_12min_pos_v4_DDA_OT_C-MiLUT_QC_dil3_01_20250602150634.sample5 +0 -0
- {masster-0.5.3 → masster-0.5.4}/src/masster/data/dda/20250530_VH_IQX_KW_RP_HSST3_100mm_12min_pos_v4_MS1_C-MiLUT_C008_v6_r38_01.sample5 +0 -0
- {masster-0.5.3 → masster-0.5.4}/src/masster/data/dda/20250530_VH_IQX_KW_RP_HSST3_100mm_12min_pos_v4_MS1_C-MiLUT_C008_v7_r37_01.sample5 +0 -0
- {masster-0.5.3 → masster-0.5.4}/src/masster/data/dda/20250530_VH_IQX_KW_RP_HSST3_100mm_12min_pos_v4_MS1_C-MiLUT_C017_v5_r99_01.sample5 +0 -0
- {masster-0.5.3 → masster-0.5.4}/src/masster/data/libs/aa.csv +0 -0
- {masster-0.5.3 → masster-0.5.4}/src/masster/data/libs/ccm.csv +0 -0
- {masster-0.5.3 → masster-0.5.4}/src/masster/data/libs/urine.csv +0 -0
- {masster-0.5.3 → masster-0.5.4}/src/masster/data/wiff/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.timeseries.data +0 -0
- {masster-0.5.3 → masster-0.5.4}/src/masster/data/wiff/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.wiff +0 -0
- {masster-0.5.3 → masster-0.5.4}/src/masster/data/wiff/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.wiff.scan +0 -0
- {masster-0.5.3 → masster-0.5.4}/src/masster/data/wiff/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.wiff2 +0 -0
- {masster-0.5.3 → masster-0.5.4}/src/masster/lib/__init__.py +0 -0
- {masster-0.5.3 → masster-0.5.4}/src/masster/lib/lib.py +0 -0
- {masster-0.5.3 → masster-0.5.4}/src/masster/logger.py +0 -0
- {masster-0.5.3 → masster-0.5.4}/src/masster/sample/__init__.py +0 -0
- {masster-0.5.3 → masster-0.5.4}/src/masster/sample/defaults/__init__.py +0 -0
- {masster-0.5.3 → masster-0.5.4}/src/masster/sample/defaults/find_adducts_def.py +0 -0
- {masster-0.5.3 → masster-0.5.4}/src/masster/sample/defaults/find_features_def.py +0 -0
- {masster-0.5.3 → masster-0.5.4}/src/masster/sample/defaults/find_ms2_def.py +0 -0
- {masster-0.5.3 → masster-0.5.4}/src/masster/sample/defaults/get_spectrum_def.py +0 -0
- {masster-0.5.3 → masster-0.5.4}/src/masster/sample/defaults/sample_def.py +0 -0
- {masster-0.5.3 → masster-0.5.4}/src/masster/sample/lib.py +0 -0
- {masster-0.5.3 → masster-0.5.4}/src/masster/sample/parameters.py +0 -0
- {masster-0.5.3 → masster-0.5.4}/src/masster/sample/plot.py +0 -0
- {masster-0.5.3 → masster-0.5.4}/src/masster/sample/quant.py +0 -0
- {masster-0.5.3 → masster-0.5.4}/src/masster/sample/sample5_schema.json +0 -0
- {masster-0.5.3 → masster-0.5.4}/src/masster/sample/save.py +0 -0
- {masster-0.5.3 → masster-0.5.4}/src/masster/sample/sciex.py +0 -0
- {masster-0.5.3 → masster-0.5.4}/src/masster/spectrum.py +0 -0
- {masster-0.5.3 → masster-0.5.4}/src/masster/study/__init__.py +0 -0
- {masster-0.5.3 → masster-0.5.4}/src/masster/study/analysis.py +0 -0
- {masster-0.5.3 → masster-0.5.4}/src/masster/study/defaults/__init__.py +0 -0
- {masster-0.5.3 → masster-0.5.4}/src/masster/study/defaults/align_def.py +0 -0
- {masster-0.5.3 → masster-0.5.4}/src/masster/study/defaults/export_def.py +0 -0
- {masster-0.5.3 → masster-0.5.4}/src/masster/study/defaults/fill_def.py +0 -0
- {masster-0.5.3 → masster-0.5.4}/src/masster/study/defaults/find_consensus_def.py +0 -0
- {masster-0.5.3 → masster-0.5.4}/src/masster/study/defaults/find_ms2_def.py +0 -0
- {masster-0.5.3 → masster-0.5.4}/src/masster/study/defaults/identify_def.py +0 -0
- {masster-0.5.3 → masster-0.5.4}/src/masster/study/defaults/integrate_chrom_def.py +0 -0
- {masster-0.5.3 → masster-0.5.4}/src/masster/study/defaults/integrate_def.py +0 -0
- {masster-0.5.3 → masster-0.5.4}/src/masster/study/defaults/merge_def.py +0 -0
- {masster-0.5.3 → masster-0.5.4}/src/masster/study/defaults/study_def.py +0 -0
- {masster-0.5.3 → masster-0.5.4}/src/masster/study/export.py +0 -0
- {masster-0.5.3 → masster-0.5.4}/src/masster/study/helpers.py +0 -0
- {masster-0.5.3 → masster-0.5.4}/src/masster/study/id.py +0 -0
- {masster-0.5.3 → masster-0.5.4}/src/masster/study/load.py +0 -0
- {masster-0.5.3 → masster-0.5.4}/src/masster/study/merge.py +0 -0
- {masster-0.5.3 → masster-0.5.4}/src/masster/study/parameters.py +0 -0
- {masster-0.5.3 → masster-0.5.4}/src/masster/study/processing.py +0 -0
- {masster-0.5.3 → masster-0.5.4}/src/masster/study/save.py +0 -0
- {masster-0.5.3 → masster-0.5.4}/src/masster/study/study.py +0 -0
- {masster-0.5.3 → masster-0.5.4}/src/masster/study/study5_schema.json +0 -0
- {masster-0.5.3 → masster-0.5.4}/src/masster/wizard/README.md +0 -0
- {masster-0.5.3 → masster-0.5.4}/src/masster/wizard/__init__.py +0 -0
- {masster-0.5.3 → masster-0.5.4}/src/masster/wizard/example.py +0 -0
- {masster-0.5.3 → masster-0.5.4}/src/masster/wizard/wizard.py +0 -0
- {masster-0.5.3 → masster-0.5.4}/tests/conftest.py +0 -0
- {masster-0.5.3 → masster-0.5.4}/tests/test_chromatogram.py +0 -0
- {masster-0.5.3 → masster-0.5.4}/tests/test_defaults.py +0 -0
- {masster-0.5.3 → masster-0.5.4}/tests/test_imports.py +0 -0
- {masster-0.5.3 → masster-0.5.4}/tests/test_integration.py +0 -0
- {masster-0.5.3 → masster-0.5.4}/tests/test_logger.py +0 -0
- {masster-0.5.3 → masster-0.5.4}/tests/test_parameters.py +0 -0
- {masster-0.5.3 → masster-0.5.4}/tests/test_sample.py +0 -0
- {masster-0.5.3 → masster-0.5.4}/tests/test_spectrum.py +0 -0
- {masster-0.5.3 → masster-0.5.4}/tests/test_study.py +0 -0
- {masster-0.5.3 → masster-0.5.4}/tests/test_version.py +0 -0
- {masster-0.5.3 → masster-0.5.4}/tox.ini +0 -0
|
@@ -473,7 +473,7 @@ def find_adducts(self, **kwargs):
|
|
|
473
473
|
self.logger.debug(f"Min probability threshold: {min_probability}")
|
|
474
474
|
|
|
475
475
|
# Generate comprehensive adduct specifications using the Sample method
|
|
476
|
-
adducts_df =
|
|
476
|
+
adducts_df = _get_adducts(self,
|
|
477
477
|
adducts_list=adducts_list,
|
|
478
478
|
charge_min=charge_min,
|
|
479
479
|
charge_max=charge_max,
|
|
@@ -62,8 +62,8 @@ def _save_sample5(
|
|
|
62
62
|
return
|
|
63
63
|
|
|
64
64
|
# synchronize feature_map if it exists
|
|
65
|
-
if hasattr(self, "_feature_map") and self._feature_map is not None:
|
|
66
|
-
|
|
65
|
+
#if hasattr(self, "_feature_map") and self._feature_map is not None:
|
|
66
|
+
# self._features_sync()
|
|
67
67
|
|
|
68
68
|
# if no extension is given, add .sample5
|
|
69
69
|
if not filename.endswith(".sample5"):
|
|
@@ -1057,15 +1057,15 @@ def _load_sample5(self, filename: str, map: bool = False):
|
|
|
1057
1057
|
# Parameters are now loaded from metadata JSON (see above)
|
|
1058
1058
|
# Lib and lib_match are no longer saved/loaded
|
|
1059
1059
|
|
|
1060
|
-
if map:
|
|
1061
|
-
|
|
1062
|
-
|
|
1063
|
-
|
|
1064
|
-
|
|
1065
|
-
|
|
1066
|
-
|
|
1067
|
-
|
|
1068
|
-
|
|
1060
|
+
#if map:
|
|
1061
|
+
# featureXML = filename.replace(".sample5", ".featureXML")
|
|
1062
|
+
# if os.path.exists(featureXML):
|
|
1063
|
+
# self._load_featureXML(featureXML)
|
|
1064
|
+
# #self._features_sync()
|
|
1065
|
+
# else:
|
|
1066
|
+
# self.logger.warning(
|
|
1067
|
+
# f"Feature XML file {featureXML} not found, skipping loading.",
|
|
1068
|
+
# )
|
|
1069
1069
|
|
|
1070
1070
|
# set self.file_path to *.sample5
|
|
1071
1071
|
self.file_path = filename
|
|
@@ -569,7 +569,7 @@ def select(
|
|
|
569
569
|
self.logger.info(f"Selected features. Features remaining: {len(feats)}")
|
|
570
570
|
return feats
|
|
571
571
|
|
|
572
|
-
|
|
572
|
+
'''
|
|
573
573
|
def _features_sync(self):
|
|
574
574
|
"""
|
|
575
575
|
Synchronizes the cached FeatureMap with features_df.
|
|
@@ -675,7 +675,7 @@ def _features_sync(self):
|
|
|
675
675
|
self.logger.warning("PyOpenMS not available, cannot sync FeatureMap")
|
|
676
676
|
except Exception as e:
|
|
677
677
|
self.logger.error(f"Error during feature synchronization: {e}")
|
|
678
|
-
|
|
678
|
+
'''
|
|
679
679
|
|
|
680
680
|
def features_delete(self, features: list | None = None):
|
|
681
681
|
"""
|
|
@@ -46,6 +46,7 @@ import polars as pl
|
|
|
46
46
|
from tqdm import tqdm
|
|
47
47
|
|
|
48
48
|
from masster.chromatogram import Chromatogram
|
|
49
|
+
from .h5 import _load_sample5
|
|
49
50
|
from masster.spectrum import Spectrum
|
|
50
51
|
|
|
51
52
|
# Suppress pyOpenMS warnings globally
|
|
@@ -96,13 +97,13 @@ def load(
|
|
|
96
97
|
|
|
97
98
|
# check if file is mzML
|
|
98
99
|
if filename.lower().endswith(".mzml"):
|
|
99
|
-
|
|
100
|
+
_load_mzML(self, filename)
|
|
100
101
|
elif filename.lower().endswith(".wiff") or filename.lower().endswith(".wiff2"):
|
|
101
|
-
|
|
102
|
+
_load_wiff(self, filename)
|
|
102
103
|
elif filename.lower().endswith(".raw"):
|
|
103
|
-
|
|
104
|
+
_load_raw(self, filename)
|
|
104
105
|
elif filename.lower().endswith(".sample5"):
|
|
105
|
-
|
|
106
|
+
_load_sample5(self, filename)
|
|
106
107
|
# elif filename.lower().endswith(".h5"):
|
|
107
108
|
# self._load_h5(filename)
|
|
108
109
|
else:
|
|
@@ -48,9 +48,9 @@ from masster.sample.defaults.find_ms2_def import find_ms2_defaults
|
|
|
48
48
|
from masster.sample.defaults.get_spectrum_def import get_spectrum_defaults
|
|
49
49
|
|
|
50
50
|
# Sample-specific imports - keeping these private, only for internal use
|
|
51
|
-
|
|
51
|
+
from masster.sample.h5 import _load_sample5
|
|
52
52
|
# from masster.sample.h5 import _load_sample5_study
|
|
53
|
-
|
|
53
|
+
from masster.sample.h5 import _save_sample5
|
|
54
54
|
# from masster.sample.helpers import _delete_ms2
|
|
55
55
|
from masster.sample.helpers import _estimate_memory_usage
|
|
56
56
|
from masster.sample.helpers import _get_scan_uids
|
|
@@ -263,12 +263,16 @@ class Sample:
|
|
|
263
263
|
_get_feature_map = _get_feature_map
|
|
264
264
|
|
|
265
265
|
# Additional method assignments for all imported functions
|
|
266
|
-
# Removed internal-only methods:
|
|
266
|
+
# Removed internal-only methods: _load_sample5_study, _delete_ms2, _features_sync
|
|
267
267
|
_estimate_memory_usage = _estimate_memory_usage
|
|
268
268
|
_get_scan_uids = _get_scan_uids
|
|
269
269
|
_get_feature_uids = _get_feature_uids
|
|
270
270
|
features_delete = features_delete
|
|
271
271
|
features_filter = features_filter
|
|
272
|
+
_save_sample5 = _save_sample5
|
|
273
|
+
_load_sample5 = _load_sample5
|
|
274
|
+
|
|
275
|
+
|
|
272
276
|
# Removed internal-only load methods: _load_featureXML, _load_ms2data, _load_mzML, _load_raw, _load_wiff
|
|
273
277
|
chrom_extract = chrom_extract
|
|
274
278
|
_index_file = _index_file # Renamed from index_file to be internal-only
|
|
@@ -304,6 +304,30 @@ def _save_object_columns_optimized(group, df, object_cols, logger, chunk_size):
|
|
|
304
304
|
serialized_chunk.append(item.to_json())
|
|
305
305
|
else:
|
|
306
306
|
serialized_chunk.append("None")
|
|
307
|
+
elif col_name == "iso":
|
|
308
|
+
# Handle isotope patterns (numpy arrays with [mz, intensity] data)
|
|
309
|
+
for item in chunk_data:
|
|
310
|
+
if item is not None:
|
|
311
|
+
try:
|
|
312
|
+
# Convert numpy array to nested list for JSON serialization
|
|
313
|
+
serialized_chunk.append(json.dumps(item.tolist()))
|
|
314
|
+
except (AttributeError, TypeError):
|
|
315
|
+
# Fallback for non-numpy data
|
|
316
|
+
serialized_chunk.append(json.dumps(list(item) if hasattr(item, '__iter__') else []))
|
|
317
|
+
else:
|
|
318
|
+
serialized_chunk.append("None")
|
|
319
|
+
elif col_name == "ms1_spec":
|
|
320
|
+
# Handle MS1 spectra patterns (numpy arrays with [mz, intensity] data)
|
|
321
|
+
for item in chunk_data:
|
|
322
|
+
if item is not None:
|
|
323
|
+
try:
|
|
324
|
+
# Convert numpy array to nested list for JSON serialization
|
|
325
|
+
serialized_chunk.append(json.dumps(item.tolist()))
|
|
326
|
+
except (AttributeError, TypeError):
|
|
327
|
+
# Fallback for non-numpy data
|
|
328
|
+
serialized_chunk.append(json.dumps(list(item) if hasattr(item, '__iter__') else []))
|
|
329
|
+
else:
|
|
330
|
+
serialized_chunk.append("None")
|
|
307
331
|
else:
|
|
308
332
|
logger.warning(
|
|
309
333
|
f"Unknown object column '{col_name}', using default serialization",
|
|
@@ -564,6 +588,34 @@ def _save_dataframe_column_legacy(
|
|
|
564
588
|
else:
|
|
565
589
|
data_as_str.append("None")
|
|
566
590
|
group.create_dataset(col, data=data_as_str, compression=compression)
|
|
591
|
+
elif col == "iso":
|
|
592
|
+
# Handle isotope patterns (numpy arrays with [mz, intensity] data)
|
|
593
|
+
data_as_json_strings = []
|
|
594
|
+
for item in data:
|
|
595
|
+
if item is not None:
|
|
596
|
+
try:
|
|
597
|
+
# Convert numpy array to nested list for JSON serialization
|
|
598
|
+
data_as_json_strings.append(json.dumps(item.tolist()))
|
|
599
|
+
except (AttributeError, TypeError):
|
|
600
|
+
# Fallback for non-numpy data
|
|
601
|
+
data_as_json_strings.append(json.dumps(list(item) if hasattr(item, '__iter__') else []))
|
|
602
|
+
else:
|
|
603
|
+
data_as_json_strings.append("None")
|
|
604
|
+
group.create_dataset(col, data=data_as_json_strings, **optimal_compression)
|
|
605
|
+
elif col == "ms1_spec":
|
|
606
|
+
# Handle MS1 spectra patterns (numpy arrays with [mz, intensity] data)
|
|
607
|
+
data_as_json_strings = []
|
|
608
|
+
for item in data:
|
|
609
|
+
if item is not None:
|
|
610
|
+
try:
|
|
611
|
+
# Convert numpy array to nested list for JSON serialization
|
|
612
|
+
data_as_json_strings.append(json.dumps(item.tolist()))
|
|
613
|
+
except (AttributeError, TypeError):
|
|
614
|
+
# Fallback for non-numpy data
|
|
615
|
+
data_as_json_strings.append(json.dumps(list(item) if hasattr(item, '__iter__') else []))
|
|
616
|
+
else:
|
|
617
|
+
data_as_json_strings.append("None")
|
|
618
|
+
group.create_dataset(col, data=data_as_json_strings, **optimal_compression)
|
|
567
619
|
else:
|
|
568
620
|
logger.warning(
|
|
569
621
|
f"Unexpectedly, column '{col}' has dtype '{dtype}'. Implement serialization for this column.",
|
|
@@ -666,6 +718,24 @@ def _reconstruct_object_column(data_col, col_name: str):
|
|
|
666
718
|
},
|
|
667
719
|
)
|
|
668
720
|
reconstructed_data.append(converted_adducts)
|
|
721
|
+
elif col_name == "iso":
|
|
722
|
+
# Handle isotope patterns (numpy arrays with [mz, intensity] data)
|
|
723
|
+
try:
|
|
724
|
+
import numpy as np
|
|
725
|
+
iso_data = json.loads(item)
|
|
726
|
+
# Convert back to numpy array
|
|
727
|
+
reconstructed_data.append(np.array(iso_data) if iso_data else None)
|
|
728
|
+
except (json.JSONDecodeError, ValueError, ImportError):
|
|
729
|
+
reconstructed_data.append(None)
|
|
730
|
+
elif col_name == "ms1_spec":
|
|
731
|
+
# Handle MS1 spectra patterns (numpy arrays with [mz, intensity] data)
|
|
732
|
+
try:
|
|
733
|
+
import numpy as np
|
|
734
|
+
ms1_spec_data = json.loads(item)
|
|
735
|
+
# Convert back to numpy array
|
|
736
|
+
reconstructed_data.append(np.array(ms1_spec_data) if ms1_spec_data else None)
|
|
737
|
+
except (json.JSONDecodeError, ValueError, ImportError):
|
|
738
|
+
reconstructed_data.append(None)
|
|
669
739
|
else:
|
|
670
740
|
# Unknown object column
|
|
671
741
|
reconstructed_data.append(None)
|
|
@@ -603,7 +603,7 @@ def plot_consensus_2d(
|
|
|
603
603
|
pl.when(
|
|
604
604
|
(pl.col(sizeby).is_not_null()) & (pl.col(sizeby).is_finite()) & (pl.col(sizeby) > 0),
|
|
605
605
|
)
|
|
606
|
-
.then((pl.col(sizeby).log10() * markersize / 12).pow(
|
|
606
|
+
.then((pl.col(sizeby).log10() * markersize / 12).pow(1.5))
|
|
607
607
|
.otherwise(markersize)
|
|
608
608
|
.alias("markersize"),
|
|
609
609
|
])
|
|
@@ -1421,46 +1421,60 @@ def plot_rt_correction(
|
|
|
1421
1421
|
p.xaxis.axis_label = f"Retention Time ({rt_unit})"
|
|
1422
1422
|
p.yaxis.axis_label = "RT - RT_original (s)"
|
|
1423
1423
|
|
|
1424
|
-
|
|
1424
|
+
# Create sample name lookup dictionary from samples_df (all in Polars)
|
|
1425
|
+
sample_names_dict = {}
|
|
1425
1426
|
if hasattr(self, "samples_df") and self.samples_df is not None:
|
|
1426
1427
|
try:
|
|
1427
|
-
|
|
1428
|
+
sample_name_mapping = (
|
|
1429
|
+
self.samples_df
|
|
1430
|
+
.filter(pl.col("sample_uid").is_in(sample_uids))
|
|
1431
|
+
.select(["sample_uid", "sample_name"])
|
|
1432
|
+
)
|
|
1433
|
+
sample_names_dict = dict(zip(
|
|
1434
|
+
sample_name_mapping["sample_uid"].to_list(),
|
|
1435
|
+
sample_name_mapping["sample_name"].to_list()
|
|
1436
|
+
))
|
|
1428
1437
|
except Exception:
|
|
1429
|
-
|
|
1438
|
+
pass
|
|
1430
1439
|
|
|
1431
1440
|
renderers = []
|
|
1432
1441
|
|
|
1433
|
-
#
|
|
1434
|
-
|
|
1435
|
-
|
|
1436
|
-
|
|
1437
|
-
|
|
1438
|
-
|
|
1439
|
-
|
|
1440
|
-
|
|
1441
|
-
|
|
1442
|
-
self.logger.debug("No sample identifier column in features_df; skipping sample filtering")
|
|
1443
|
-
continue
|
|
1444
|
-
except Exception as e:
|
|
1445
|
-
self.logger.debug(f"Error filtering features for sample {uid}: {e}")
|
|
1446
|
-
continue
|
|
1442
|
+
# Check sample identifier column
|
|
1443
|
+
if "sample_uid" not in self.features_df.columns:
|
|
1444
|
+
if "sample_name" in self.features_df.columns:
|
|
1445
|
+
sample_id_col = "sample_name"
|
|
1446
|
+
else:
|
|
1447
|
+
self.logger.debug("No sample identifier column in features_df")
|
|
1448
|
+
return
|
|
1449
|
+
else:
|
|
1450
|
+
sample_id_col = "sample_uid"
|
|
1447
1451
|
|
|
1448
|
-
|
|
1449
|
-
|
|
1452
|
+
# OPTIMIZED: Filter once, group once instead of per-sample filtering
|
|
1453
|
+
try:
|
|
1454
|
+
# Filter all data once for selected samples and required conditions
|
|
1455
|
+
all_sample_feats = self.features_df.filter(
|
|
1456
|
+
pl.col(sample_id_col).is_in(sample_uids)
|
|
1457
|
+
)
|
|
1458
|
+
|
|
1459
|
+
if all_sample_feats.is_empty():
|
|
1460
|
+
self.logger.warning("No features found for the selected samples.")
|
|
1461
|
+
return
|
|
1450
1462
|
|
|
1451
|
-
# Filter to only use features with filled==False
|
|
1452
|
-
if "filled" in
|
|
1453
|
-
|
|
1454
|
-
if
|
|
1455
|
-
|
|
1463
|
+
# Filter to only use features with filled==False if column exists
|
|
1464
|
+
if "filled" in all_sample_feats.columns:
|
|
1465
|
+
all_sample_feats = all_sample_feats.filter(~pl.col("filled"))
|
|
1466
|
+
if all_sample_feats.is_empty():
|
|
1467
|
+
self.logger.warning("No non-filled features found for the selected samples.")
|
|
1468
|
+
return
|
|
1456
1469
|
|
|
1457
|
-
#
|
|
1458
|
-
if "rt" not in
|
|
1459
|
-
|
|
1470
|
+
# Check required columns
|
|
1471
|
+
if "rt" not in all_sample_feats.columns or "rt_original" not in all_sample_feats.columns:
|
|
1472
|
+
self.logger.error("Required columns 'rt' or 'rt_original' not found in features_df.")
|
|
1473
|
+
return
|
|
1460
1474
|
|
|
1461
|
-
# Filter nulls
|
|
1462
|
-
|
|
1463
|
-
|
|
1475
|
+
# Filter nulls, add delta column, and sort - all in one operation
|
|
1476
|
+
all_sample_feats = (
|
|
1477
|
+
all_sample_feats
|
|
1464
1478
|
.filter(
|
|
1465
1479
|
pl.col("rt").is_not_null() &
|
|
1466
1480
|
pl.col("rt_original").is_not_null()
|
|
@@ -1468,33 +1482,36 @@ def plot_rt_correction(
|
|
|
1468
1482
|
.with_columns([
|
|
1469
1483
|
(pl.col("rt") - pl.col("rt_original")).alias("delta")
|
|
1470
1484
|
])
|
|
1471
|
-
.sort("rt")
|
|
1485
|
+
.sort([sample_id_col, "rt"])
|
|
1472
1486
|
)
|
|
1473
1487
|
|
|
1474
|
-
if
|
|
1475
|
-
|
|
1488
|
+
if all_sample_feats.is_empty():
|
|
1489
|
+
self.logger.warning("No valid RT data found for the selected samples.")
|
|
1490
|
+
return
|
|
1476
1491
|
|
|
1477
|
-
#
|
|
1478
|
-
|
|
1479
|
-
|
|
1492
|
+
# Group by sample and process each group (much faster than individual filtering)
|
|
1493
|
+
for (sample_uid,), sample_group in all_sample_feats.group_by(sample_id_col):
|
|
1494
|
+
if sample_group.is_empty():
|
|
1495
|
+
continue
|
|
1480
1496
|
|
|
1481
|
-
|
|
1482
|
-
|
|
1483
|
-
|
|
1484
|
-
row = samples_info[samples_info["sample_uid"] == uid]
|
|
1485
|
-
if not row.empty:
|
|
1486
|
-
sample_name = row.iloc[0].get("sample_name", sample_name)
|
|
1487
|
-
except Exception:
|
|
1488
|
-
pass
|
|
1497
|
+
# Extract arrays directly from Polars
|
|
1498
|
+
rt = sample_group["rt"].to_numpy()
|
|
1499
|
+
delta = sample_group["delta"].to_numpy()
|
|
1489
1500
|
|
|
1490
|
-
|
|
1501
|
+
# Get sample name efficiently from pre-built dictionary
|
|
1502
|
+
sample_name = sample_names_dict.get(sample_uid, str(sample_uid))
|
|
1503
|
+
color = color_map.get(sample_uid, "#000000")
|
|
1491
1504
|
|
|
1492
|
-
|
|
1493
|
-
|
|
1505
|
+
data = {"rt": rt, "delta": delta, "sample": [sample_name] * len(rt), "sample_color": [color] * len(rt)}
|
|
1506
|
+
src = ColumnDataSource(data)
|
|
1494
1507
|
|
|
1495
|
-
|
|
1496
|
-
|
|
1497
|
-
|
|
1508
|
+
r_line = p.line("rt", "delta", source=src, line_width=1, color=color)
|
|
1509
|
+
p.scatter("rt", "delta", source=src, size=2, color=color, alpha=0.6)
|
|
1510
|
+
renderers.append(r_line)
|
|
1511
|
+
|
|
1512
|
+
except Exception as e:
|
|
1513
|
+
self.logger.error(f"Error in optimized RT correction plotting: {e}")
|
|
1514
|
+
return
|
|
1498
1515
|
|
|
1499
1516
|
if not renderers:
|
|
1500
1517
|
self.logger.warning("No RT correction curves to plot for the selected samples.")
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|