masster 0.4.19__py3-none-any.whl → 0.4.20__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of masster might be problematic. Click here for more details.

masster/__init__.py CHANGED
@@ -27,5 +27,4 @@ __all__ = [
27
27
  "Study",
28
28
  "Wizard",
29
29
  "__version__",
30
- # "get_version",
31
30
  ]
masster/_version.py CHANGED
@@ -1,7 +1,7 @@
1
1
  from __future__ import annotations
2
2
 
3
3
 
4
- __version__ = "0.4.19"
4
+ __version__ = "0.4.20"
5
5
 
6
6
 
7
7
  def get_version():
masster/logger.py CHANGED
@@ -55,6 +55,9 @@ class MassterLogger:
55
55
  # Convert string sink to actual object
56
56
  if sink == "sys.stdout" or sink is None:
57
57
  self.sink = sys.stdout
58
+ elif isinstance(sink, str) and sink != "sys.stdout":
59
+ # If it's a file path string, open the file for writing
60
+ self.sink = open(sink, "a", encoding="utf-8")
58
61
  else:
59
62
  self.sink = sink
60
63
 
@@ -67,6 +70,21 @@ class MassterLogger:
67
70
  # Remove any existing handlers to prevent duplicates
68
71
  if self.logger_instance.hasHandlers():
69
72
  self.logger_instance.handlers.clear()
73
+
74
+ # Also ensure no duplicate handlers on parent loggers
75
+ parent = self.logger_instance.parent
76
+ while parent:
77
+ if parent.name == "masster" and parent.hasHandlers():
78
+ # Remove duplicate handlers from masster parent logger
79
+ unique_handlers = []
80
+ handler_types = set()
81
+ for handler in parent.handlers:
82
+ handler_type = type(handler)
83
+ if handler_type not in handler_types:
84
+ unique_handlers.append(handler)
85
+ handler_types.add(handler_type)
86
+ parent.handlers = unique_handlers
87
+ parent = parent.parent
70
88
 
71
89
  self.logger_instance.setLevel(getattr(logging, self.level))
72
90
 
@@ -129,6 +147,17 @@ class MassterLogger:
129
147
 
130
148
  # Prevent propagation to avoid duplicate messages
131
149
  self.logger_instance.propagate = False
150
+
151
+ # Additional fix: ensure no duplicate handlers in the entire logging hierarchy
152
+ masster_logger = logging.getLogger("masster")
153
+ if masster_logger.hasHandlers():
154
+ # Keep only one handler per type
155
+ unique_handlers = {}
156
+ for handler in masster_logger.handlers:
157
+ handler_key = (type(handler).__name__, getattr(handler, 'stream', None))
158
+ if handler_key not in unique_handlers:
159
+ unique_handlers[handler_key] = handler
160
+ masster_logger.handlers = list(unique_handlers.values())
132
161
 
133
162
  def update_level(self, level: str):
134
163
  """Update the logging level."""
@@ -326,7 +355,20 @@ class MassterLogger:
326
355
  """Remove this logger's handler."""
327
356
  if self.handler:
328
357
  self.logger_instance.removeHandler(self.handler)
358
+ # Close the file handle if it's not stdout
359
+ if hasattr(self.sink, 'close') and self.sink != sys.stdout:
360
+ try:
361
+ self.sink.close()
362
+ except Exception:
363
+ pass # Ignore close errors
329
364
  self.handler = None
330
365
 
366
+ def __del__(self):
367
+ """Cleanup when the logger is destroyed."""
368
+ try:
369
+ self.remove()
370
+ except Exception:
371
+ pass # Ignore cleanup errors during destruction
372
+
331
373
  def __repr__(self):
332
374
  return f"MassterLogger(type={self.instance_type}, id={self.instance_id}, level={self.level})"
masster/sample/load.py CHANGED
@@ -37,21 +37,22 @@ See Also:
37
37
  """
38
38
 
39
39
  import os
40
-
40
+ import warnings
41
41
  from datetime import datetime
42
42
 
43
43
  import numpy as np
44
44
  import pandas as pd
45
45
  import polars as pl
46
- import pyopenms as oms
47
-
48
46
  from tqdm import tqdm
49
47
 
50
48
  from masster.chromatogram import Chromatogram
51
-
52
- # Parameters removed - using hardcoded defaults
53
49
  from masster.spectrum import Spectrum
54
50
 
51
+ # Import pyopenms with suppressed warnings
52
+ with warnings.catch_warnings():
53
+ warnings.filterwarnings("ignore", message="Warning: OPENMS_DATA_PATH environment variable already exists.*", category=UserWarning)
54
+ import pyopenms as oms
55
+
55
56
 
56
57
  def load(
57
58
  self,
masster/sample/sample.py CHANGED
@@ -56,15 +56,6 @@ from masster.sample.helpers import _estimate_memory_usage
56
56
  from masster.sample.helpers import _get_scan_uids
57
57
  from masster.sample.helpers import _get_feature_uids
58
58
  from masster.sample.helpers import _features_sync
59
-
60
- # from masster.sample.helpers import _parse_adduct_specs
61
- # from masster.sample.helpers import _calculate_adduct_mass_shift
62
- # from masster.sample.helpers import _parse_formula_expression
63
- # from masster.sample.helpers import _calculate_molecular_mass
64
- # from masster.sample.helpers import _parse_legacy_adduct_format
65
- # from masster.sample.helpers import _extract_adduct_probability
66
- # from masster.sample.helpers import _detect_adduct_groups_direct
67
- # from masster.sample.helpers import _check_adduct_relationship
68
59
  from masster.sample.adducts import _get_adducts
69
60
  from masster.sample.adducts import find_adducts
70
61
  from masster.sample.helpers import features_delete
masster/study/helpers.py CHANGED
@@ -641,20 +641,61 @@ def get_gaps_stats(self, uids=None):
641
641
 
642
642
 
643
643
  # TODO is uid not supposed to be a list anymore?
644
- def get_consensus_matches(self, uids=None):
644
+ def get_consensus_matches(self, uids=None, filled=True):
645
+ """
646
+ Get feature matches for consensus UIDs with optimized join operation.
647
+
648
+ Parameters:
649
+ uids: Consensus UID(s) to get matches for. Can be:
650
+ - None: get matches for all consensus features
651
+ - int: single consensus UID (converted to list)
652
+ - list: multiple consensus UIDs
653
+ filled (bool): Whether to include filled rows (True) or exclude them (False).
654
+ Default is True to maintain backward compatibility.
655
+
656
+ Returns:
657
+ pl.DataFrame: Feature matches for the specified consensus UIDs
658
+ """
659
+ # Handle single int by converting to list
660
+ if isinstance(uids, int):
661
+ uids = [uids]
662
+
645
663
  uids = self._get_consensus_uids(uids)
646
-
647
- # find all rows in consensus_mapping_df with consensus_id=id - use Polars filtering
648
- fid = (
649
- self.consensus_mapping_df.filter(
650
- pl.col("consensus_uid").is_in(uids),
664
+
665
+ if not uids:
666
+ return pl.DataFrame()
667
+
668
+ # Early validation checks
669
+ if self.consensus_mapping_df is None or self.consensus_mapping_df.is_empty():
670
+ self.logger.warning("No consensus mapping data available")
671
+ return pl.DataFrame()
672
+
673
+ if self.features_df is None or self.features_df.is_empty():
674
+ self.logger.warning("No feature data available")
675
+ return pl.DataFrame()
676
+
677
+ # Build the query with optional filled filter
678
+ features_query = self.features_df.lazy()
679
+
680
+ # Apply filled filter if specified
681
+ if not filled and "filled" in self.features_df.columns:
682
+ features_query = features_query.filter(~pl.col("filled"))
683
+
684
+ # Optimized single-pass operation using join instead of two separate filters
685
+ # This avoids creating intermediate Python lists and leverages Polars' optimized joins
686
+ matches = (
687
+ features_query
688
+ .join(
689
+ self.consensus_mapping_df
690
+ .lazy()
691
+ .filter(pl.col("consensus_uid").is_in(uids))
692
+ .select("feature_uid"), # Only select what we need for the join
693
+ on="feature_uid",
694
+ how="inner"
651
695
  )
652
- .select("feature_uid")
653
- .to_series()
654
- .to_list()
696
+ .collect(streaming=True) # Use streaming for memory efficiency with large datasets
655
697
  )
656
- # select all rows in features_df with uid in fid
657
- matches = self.features_df.filter(pl.col("feature_uid").is_in(fid)).clone()
698
+
658
699
  return matches
659
700
 
660
701