masster 0.4.18__py3-none-any.whl → 0.4.20__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of masster might be problematic. Click here for more details.
- masster/__init__.py +0 -1
- masster/_version.py +1 -1
- masster/logger.py +42 -0
- masster/sample/load.py +6 -5
- masster/sample/sample.py +0 -9
- masster/study/defaults/merge_def.py +43 -2
- masster/study/helpers.py +52 -11
- masster/study/merge.py +1418 -105
- masster/study/plot.py +11 -5
- masster/study/study.py +18 -0
- masster/wizard/__init__.py +5 -2
- masster/wizard/wizard.py +1199 -27
- {masster-0.4.18.dist-info → masster-0.4.20.dist-info}/METADATA +1 -1
- {masster-0.4.18.dist-info → masster-0.4.20.dist-info}/RECORD +17 -18
- masster/wizard.py +0 -1175
- {masster-0.4.18.dist-info → masster-0.4.20.dist-info}/WHEEL +0 -0
- {masster-0.4.18.dist-info → masster-0.4.20.dist-info}/entry_points.txt +0 -0
- {masster-0.4.18.dist-info → masster-0.4.20.dist-info}/licenses/LICENSE +0 -0
masster/__init__.py
CHANGED
masster/_version.py
CHANGED
masster/logger.py
CHANGED
|
@@ -55,6 +55,9 @@ class MassterLogger:
|
|
|
55
55
|
# Convert string sink to actual object
|
|
56
56
|
if sink == "sys.stdout" or sink is None:
|
|
57
57
|
self.sink = sys.stdout
|
|
58
|
+
elif isinstance(sink, str) and sink != "sys.stdout":
|
|
59
|
+
# If it's a file path string, open the file for writing
|
|
60
|
+
self.sink = open(sink, "a", encoding="utf-8")
|
|
58
61
|
else:
|
|
59
62
|
self.sink = sink
|
|
60
63
|
|
|
@@ -67,6 +70,21 @@ class MassterLogger:
|
|
|
67
70
|
# Remove any existing handlers to prevent duplicates
|
|
68
71
|
if self.logger_instance.hasHandlers():
|
|
69
72
|
self.logger_instance.handlers.clear()
|
|
73
|
+
|
|
74
|
+
# Also ensure no duplicate handlers on parent loggers
|
|
75
|
+
parent = self.logger_instance.parent
|
|
76
|
+
while parent:
|
|
77
|
+
if parent.name == "masster" and parent.hasHandlers():
|
|
78
|
+
# Remove duplicate handlers from masster parent logger
|
|
79
|
+
unique_handlers = []
|
|
80
|
+
handler_types = set()
|
|
81
|
+
for handler in parent.handlers:
|
|
82
|
+
handler_type = type(handler)
|
|
83
|
+
if handler_type not in handler_types:
|
|
84
|
+
unique_handlers.append(handler)
|
|
85
|
+
handler_types.add(handler_type)
|
|
86
|
+
parent.handlers = unique_handlers
|
|
87
|
+
parent = parent.parent
|
|
70
88
|
|
|
71
89
|
self.logger_instance.setLevel(getattr(logging, self.level))
|
|
72
90
|
|
|
@@ -129,6 +147,17 @@ class MassterLogger:
|
|
|
129
147
|
|
|
130
148
|
# Prevent propagation to avoid duplicate messages
|
|
131
149
|
self.logger_instance.propagate = False
|
|
150
|
+
|
|
151
|
+
# Additional fix: ensure no duplicate handlers in the entire logging hierarchy
|
|
152
|
+
masster_logger = logging.getLogger("masster")
|
|
153
|
+
if masster_logger.hasHandlers():
|
|
154
|
+
# Keep only one handler per type
|
|
155
|
+
unique_handlers = {}
|
|
156
|
+
for handler in masster_logger.handlers:
|
|
157
|
+
handler_key = (type(handler).__name__, getattr(handler, 'stream', None))
|
|
158
|
+
if handler_key not in unique_handlers:
|
|
159
|
+
unique_handlers[handler_key] = handler
|
|
160
|
+
masster_logger.handlers = list(unique_handlers.values())
|
|
132
161
|
|
|
133
162
|
def update_level(self, level: str):
|
|
134
163
|
"""Update the logging level."""
|
|
@@ -326,7 +355,20 @@ class MassterLogger:
|
|
|
326
355
|
"""Remove this logger's handler."""
|
|
327
356
|
if self.handler:
|
|
328
357
|
self.logger_instance.removeHandler(self.handler)
|
|
358
|
+
# Close the file handle if it's not stdout
|
|
359
|
+
if hasattr(self.sink, 'close') and self.sink != sys.stdout:
|
|
360
|
+
try:
|
|
361
|
+
self.sink.close()
|
|
362
|
+
except Exception:
|
|
363
|
+
pass # Ignore close errors
|
|
329
364
|
self.handler = None
|
|
330
365
|
|
|
366
|
+
def __del__(self):
|
|
367
|
+
"""Cleanup when the logger is destroyed."""
|
|
368
|
+
try:
|
|
369
|
+
self.remove()
|
|
370
|
+
except Exception:
|
|
371
|
+
pass # Ignore cleanup errors during destruction
|
|
372
|
+
|
|
331
373
|
def __repr__(self):
|
|
332
374
|
return f"MassterLogger(type={self.instance_type}, id={self.instance_id}, level={self.level})"
|
masster/sample/load.py
CHANGED
|
@@ -37,21 +37,22 @@ See Also:
|
|
|
37
37
|
"""
|
|
38
38
|
|
|
39
39
|
import os
|
|
40
|
-
|
|
40
|
+
import warnings
|
|
41
41
|
from datetime import datetime
|
|
42
42
|
|
|
43
43
|
import numpy as np
|
|
44
44
|
import pandas as pd
|
|
45
45
|
import polars as pl
|
|
46
|
-
import pyopenms as oms
|
|
47
|
-
|
|
48
46
|
from tqdm import tqdm
|
|
49
47
|
|
|
50
48
|
from masster.chromatogram import Chromatogram
|
|
51
|
-
|
|
52
|
-
# Parameters removed - using hardcoded defaults
|
|
53
49
|
from masster.spectrum import Spectrum
|
|
54
50
|
|
|
51
|
+
# Import pyopenms with suppressed warnings
|
|
52
|
+
with warnings.catch_warnings():
|
|
53
|
+
warnings.filterwarnings("ignore", message="Warning: OPENMS_DATA_PATH environment variable already exists.*", category=UserWarning)
|
|
54
|
+
import pyopenms as oms
|
|
55
|
+
|
|
55
56
|
|
|
56
57
|
def load(
|
|
57
58
|
self,
|
masster/sample/sample.py
CHANGED
|
@@ -56,15 +56,6 @@ from masster.sample.helpers import _estimate_memory_usage
|
|
|
56
56
|
from masster.sample.helpers import _get_scan_uids
|
|
57
57
|
from masster.sample.helpers import _get_feature_uids
|
|
58
58
|
from masster.sample.helpers import _features_sync
|
|
59
|
-
|
|
60
|
-
# from masster.sample.helpers import _parse_adduct_specs
|
|
61
|
-
# from masster.sample.helpers import _calculate_adduct_mass_shift
|
|
62
|
-
# from masster.sample.helpers import _parse_formula_expression
|
|
63
|
-
# from masster.sample.helpers import _calculate_molecular_mass
|
|
64
|
-
# from masster.sample.helpers import _parse_legacy_adduct_format
|
|
65
|
-
# from masster.sample.helpers import _extract_adduct_probability
|
|
66
|
-
# from masster.sample.helpers import _detect_adduct_groups_direct
|
|
67
|
-
# from masster.sample.helpers import _check_adduct_relationship
|
|
68
59
|
from masster.sample.adducts import _get_adducts
|
|
69
60
|
from masster.sample.adducts import find_adducts
|
|
70
61
|
from masster.sample.helpers import features_delete
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
"""Parameter class for Study merge method."""
|
|
2
2
|
|
|
3
3
|
from dataclasses import dataclass, field
|
|
4
|
-
from typing import Any
|
|
4
|
+
from typing import Any, Optional
|
|
5
5
|
|
|
6
6
|
|
|
7
7
|
@dataclass
|
|
@@ -36,6 +36,9 @@ class merge_defaults:
|
|
|
36
36
|
max_nr_conflicts: int = 0
|
|
37
37
|
link_ms2: bool = True
|
|
38
38
|
|
|
39
|
+
# Parallel processing parameters
|
|
40
|
+
threads: Optional[int] = None
|
|
41
|
+
|
|
39
42
|
# KD-Strict specific parameters
|
|
40
43
|
optimize_rt_tol: bool = False
|
|
41
44
|
rt_tol_range: tuple = (0.5, 4.0)
|
|
@@ -115,6 +118,14 @@ class merge_defaults:
|
|
|
115
118
|
"description": "Whether to link MS2 spectra to consensus features",
|
|
116
119
|
"default": True,
|
|
117
120
|
},
|
|
121
|
+
# Parallel processing parameters
|
|
122
|
+
"threads": {
|
|
123
|
+
"dtype": [int, type(None)],
|
|
124
|
+
"description": "Number of parallel threads/processes for chunked methods (None=original sequential)",
|
|
125
|
+
"default": None,
|
|
126
|
+
"min_value": 1,
|
|
127
|
+
"max_value": 32,
|
|
128
|
+
},
|
|
118
129
|
# KD-Strict specific parameters
|
|
119
130
|
"optimize_rt_tol": {
|
|
120
131
|
"dtype": bool,
|
|
@@ -217,7 +228,37 @@ class merge_defaults:
|
|
|
217
228
|
metadata = self._param_metadata[param_name]
|
|
218
229
|
expected_dtype = metadata["dtype"]
|
|
219
230
|
|
|
220
|
-
#
|
|
231
|
+
# Handle Optional types (list of types including None)
|
|
232
|
+
if isinstance(expected_dtype, list):
|
|
233
|
+
# Check if value matches any of the allowed types
|
|
234
|
+
valid_type = False
|
|
235
|
+
for dtype in expected_dtype:
|
|
236
|
+
if dtype is type(None) and value is None:
|
|
237
|
+
return True # None is explicitly allowed
|
|
238
|
+
elif dtype is int and isinstance(value, int):
|
|
239
|
+
valid_type = True
|
|
240
|
+
break
|
|
241
|
+
elif dtype is float and isinstance(value, (int, float)):
|
|
242
|
+
valid_type = True
|
|
243
|
+
break
|
|
244
|
+
elif dtype is bool and isinstance(value, bool):
|
|
245
|
+
valid_type = True
|
|
246
|
+
break
|
|
247
|
+
elif dtype is str and isinstance(value, str):
|
|
248
|
+
valid_type = True
|
|
249
|
+
break
|
|
250
|
+
|
|
251
|
+
if not valid_type:
|
|
252
|
+
return False
|
|
253
|
+
|
|
254
|
+
# For None values, skip further validation
|
|
255
|
+
if value is None:
|
|
256
|
+
return True
|
|
257
|
+
|
|
258
|
+
# Use the first non-None type for range validation
|
|
259
|
+
expected_dtype = next((dt for dt in expected_dtype if dt is not type(None)), expected_dtype[0])
|
|
260
|
+
|
|
261
|
+
# Type checking for non-Optional types
|
|
221
262
|
if expected_dtype is int:
|
|
222
263
|
if not isinstance(value, int):
|
|
223
264
|
try:
|
masster/study/helpers.py
CHANGED
|
@@ -641,20 +641,61 @@ def get_gaps_stats(self, uids=None):
|
|
|
641
641
|
|
|
642
642
|
|
|
643
643
|
# TODO is uid not supposed to be a list anymore?
|
|
644
|
-
def get_consensus_matches(self, uids=None):
|
|
644
|
+
def get_consensus_matches(self, uids=None, filled=True):
|
|
645
|
+
"""
|
|
646
|
+
Get feature matches for consensus UIDs with optimized join operation.
|
|
647
|
+
|
|
648
|
+
Parameters:
|
|
649
|
+
uids: Consensus UID(s) to get matches for. Can be:
|
|
650
|
+
- None: get matches for all consensus features
|
|
651
|
+
- int: single consensus UID (converted to list)
|
|
652
|
+
- list: multiple consensus UIDs
|
|
653
|
+
filled (bool): Whether to include filled rows (True) or exclude them (False).
|
|
654
|
+
Default is True to maintain backward compatibility.
|
|
655
|
+
|
|
656
|
+
Returns:
|
|
657
|
+
pl.DataFrame: Feature matches for the specified consensus UIDs
|
|
658
|
+
"""
|
|
659
|
+
# Handle single int by converting to list
|
|
660
|
+
if isinstance(uids, int):
|
|
661
|
+
uids = [uids]
|
|
662
|
+
|
|
645
663
|
uids = self._get_consensus_uids(uids)
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
664
|
+
|
|
665
|
+
if not uids:
|
|
666
|
+
return pl.DataFrame()
|
|
667
|
+
|
|
668
|
+
# Early validation checks
|
|
669
|
+
if self.consensus_mapping_df is None or self.consensus_mapping_df.is_empty():
|
|
670
|
+
self.logger.warning("No consensus mapping data available")
|
|
671
|
+
return pl.DataFrame()
|
|
672
|
+
|
|
673
|
+
if self.features_df is None or self.features_df.is_empty():
|
|
674
|
+
self.logger.warning("No feature data available")
|
|
675
|
+
return pl.DataFrame()
|
|
676
|
+
|
|
677
|
+
# Build the query with optional filled filter
|
|
678
|
+
features_query = self.features_df.lazy()
|
|
679
|
+
|
|
680
|
+
# Apply filled filter if specified
|
|
681
|
+
if not filled and "filled" in self.features_df.columns:
|
|
682
|
+
features_query = features_query.filter(~pl.col("filled"))
|
|
683
|
+
|
|
684
|
+
# Optimized single-pass operation using join instead of two separate filters
|
|
685
|
+
# This avoids creating intermediate Python lists and leverages Polars' optimized joins
|
|
686
|
+
matches = (
|
|
687
|
+
features_query
|
|
688
|
+
.join(
|
|
689
|
+
self.consensus_mapping_df
|
|
690
|
+
.lazy()
|
|
691
|
+
.filter(pl.col("consensus_uid").is_in(uids))
|
|
692
|
+
.select("feature_uid"), # Only select what we need for the join
|
|
693
|
+
on="feature_uid",
|
|
694
|
+
how="inner"
|
|
651
695
|
)
|
|
652
|
-
.
|
|
653
|
-
.to_series()
|
|
654
|
-
.to_list()
|
|
696
|
+
.collect(streaming=True) # Use streaming for memory efficiency with large datasets
|
|
655
697
|
)
|
|
656
|
-
|
|
657
|
-
matches = self.features_df.filter(pl.col("feature_uid").is_in(fid)).clone()
|
|
698
|
+
|
|
658
699
|
return matches
|
|
659
700
|
|
|
660
701
|
|