masster 0.5.9__py3-none-any.whl → 0.5.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of masster might be problematic. Click here for more details.

@@ -1264,7 +1264,7 @@ def find_ms2(self, **kwargs):
1264
1264
 
1265
1265
  # Log completion
1266
1266
  self.logger.success(
1267
- f"MS2 linking completed. Total features with MS2 data: {c}",
1267
+ f"MS2 linking completed. Features with MS2 data: {c}.",
1268
1268
  )
1269
1269
  self.features_df = features_df
1270
1270
 
masster/sample/sample.py CHANGED
@@ -1,35 +1,98 @@
1
1
  """
2
- sample.py
2
+ sample.py - Mass Spectrometry Sample Analysis Module
3
3
 
4
- This module provides tools for processing and analyzing Data-Dependent Acquisition (DDA) mass spectrometry data.
5
- It defines the `Sample` class, which offers methods to load, process, analyze, and visualize mass spectrometry data
6
- from various file formats, including mzML, Thermo RAW, and Sciex WIFF formats.
4
+ This module provides comprehensive tools for processing and analyzing Data-Dependent Acquisition (DDA)
5
+ mass spectrometry data. It defines the `Sample` class, which offers methods to load, process, analyze,
6
+ and visualize mass spectrometry data from various file formats.
7
+
8
+ Supported File Formats:
9
+ - mzML (standard XML format for mass spectrometry data)
10
+ - Thermo RAW (native Thermo Fisher Scientific format)
11
+ - Sciex WIFF (native Sciex format)
12
+ - Sample5 (MASSter's native HDF5-based format for optimized storage)
7
13
 
8
14
  Key Features:
9
- - **File Handling**: Load and save data in multiple formats.
10
- - **Feature Detection**: Detect and process mass spectrometry features.
11
- - **Spectrum Analysis**: Retrieve and analyze MS1/MS2 spectra.
12
- - **Visualization**: Generate interactive and static plots for spectra and chromatograms.
13
- - **Statistics**: Compute and export detailed DDA run statistics.
14
-
15
- Dependencies:
16
- - `pyopenms`: For file handling and feature detection.
17
- - `polars` and `pandas`: For data manipulation.
18
- - `numpy`: For numerical computations.
19
- - `bokeh`, `panel`, `holoviews`, `datashader`: For interactive visualizations.
15
+ - **File Handling**: Load and save data in multiple formats with automatic format detection
16
+ - **Feature Detection**: Detect and process mass spectrometry features using advanced algorithms
17
+ - **Spectrum Analysis**: Retrieve and analyze MS1/MS2 spectra with comprehensive metadata
18
+ - **Adduct Detection**: Find and annotate adducts and in-source fragments
19
+ - **Isotope Analysis**: Detect and process isotopic patterns
20
+ - **Chromatogram Extraction**: Extract and analyze chromatograms (EIC, BPC, TIC)
21
+ - **Visualization**: Generate interactive and static plots for spectra, chromatograms, and 2D maps
22
+ - **Statistics**: Compute and export detailed DDA run statistics and quality metrics
23
+ - **Data Export**: Export processed data to various formats (XLSX, MGF, etc.)
24
+ - **Memory Management**: Efficient handling of large datasets with on-disk storage options
25
+
26
+ Core Dependencies:
27
+ - `pyopenms`: OpenMS library for file handling and feature detection algorithms
28
+ - `polars`: High-performance data manipulation and analysis
29
+ - `numpy`: Numerical computations and array operations
30
+ - `bokeh`, `panel`, `holoviews`, `datashader`: Interactive visualizations and dashboards
31
+ - `h5py`: HDF5 file format support for Sample5 files
20
32
 
21
33
  Classes:
22
- - `Sample`: Main class for handling DDA data, providing methods for data import, processing, and visualization.
23
-
24
- Example Usage:
25
- ```python
26
- from masster.sample import Sample
34
+ Sample: Main class for handling DDA mass spectrometry data, providing methods for
35
+ data import, processing, analysis, and visualization.
27
36
 
28
- sample = Sample(file="example.mzML")
29
- sample.find_features()
30
- sample.plot_2d()
31
- ```
37
+ Typical Workflow:
38
+ 1. Load mass spectrometry data file
39
+ 2. Detect features using find_features()
40
+ 3. Optionally find MS2 spectra with find_ms2()
41
+ 4. Analyze and visualize results
42
+ 5. Export processed data
32
43
 
44
+ Example Usage:
45
+ Basic analysis workflow:
46
+
47
+ ```python
48
+ from masster.sample import Sample
49
+
50
+ # Load a mass spectrometry file
51
+ sample = Sample(filename="experiment.mzML")
52
+
53
+ # Detect features
54
+ sample.find_features()
55
+
56
+ # Find MS2 spectra for features
57
+ sample.find_ms2()
58
+
59
+ # Generate 2D visualization
60
+ sample.plot_2d()
61
+
62
+ # Export results
63
+ sample.export_features("features.xlsx")
64
+ ```
65
+
66
+ Advanced usage with custom parameters:
67
+
68
+ ```python
69
+ from masster.sample import Sample
70
+ from masster.sample.defaults import sample_defaults, find_features_defaults
71
+
72
+ # Create custom parameters
73
+ params = sample_defaults(log_level="DEBUG", label="My Experiment")
74
+ ff_params = find_features_defaults(noise_threshold_int=1000)
75
+
76
+ # Initialize with custom parameters
77
+ sample = Sample(params=params)
78
+ sample.load("data.raw")
79
+
80
+ # Feature detection with custom parameters
81
+ sample.find_features(params=ff_params)
82
+
83
+ # Generate comprehensive statistics
84
+ stats = sample.get_dda_stats()
85
+ sample.plot_dda_stats()
86
+ ```
87
+
88
+ Notes:
89
+ - The Sample class maintains processing history and parameters for reproducibility
90
+ - Large files can be processed with on-disk storage to manage memory usage
91
+ - All visualizations are interactive by default and can be exported as static images
92
+ - The module supports both individual sample analysis and batch processing workflows
93
+
94
+ Version: Part of the MASSter mass spectrometry analysis framework
95
+ Author: Zamboni Lab, ETH Zurich
33
96
  """
34
97
 
35
98
  import importlib
@@ -49,16 +112,12 @@ from masster.sample.defaults.get_spectrum_def import get_spectrum_defaults
49
112
 
50
113
  # Sample-specific imports - keeping these private, only for internal use
51
114
  from masster.sample.h5 import _load_sample5
52
- # from masster.sample.h5 import _load_sample5_study
53
115
  from masster.sample.h5 import _save_sample5
54
- # from masster.sample.helpers import _delete_ms2
55
116
  from masster.sample.helpers import _estimate_memory_usage
56
117
  from masster.sample.helpers import _get_scan_uids
57
118
  from masster.sample.helpers import _get_feature_uids
58
- # from masster.sample.helpers import _features_sync - made internal only
59
119
  from masster.sample.adducts import find_adducts
60
120
  from masster.sample.adducts import _get_adducts
61
- # Removed _get_adducts - only used in study modules
62
121
  from masster.sample.helpers import features_delete
63
122
  from masster.sample.helpers import features_filter
64
123
  from masster.sample.helpers import features_select
@@ -70,23 +129,17 @@ from masster.sample.helpers import get_eic
70
129
  from masster.sample.helpers import set_source
71
130
  from masster.sample.helpers import _recreate_feature_map
72
131
  from masster.sample.helpers import _get_feature_map
73
- # Load functions - keeping only specific ones needed for external API
74
- # from masster.sample.load import _load_featureXML - made internal only
75
- # from masster.sample.load import _load_ms2data - made internal only
76
- # from masster.sample.load import _load_mzML - made internal only
77
- # from masster.sample.load import _load_raw - made internal only
78
- # from masster.sample.load import _load_wiff - made internal only
79
132
  from masster.sample.load import chrom_extract
80
133
  from masster.sample.load import _index_file
81
134
  from masster.sample.load import load
82
135
  from masster.sample.load import load_noms1
83
- from masster.sample.load import _load_ms1 # Renamed from load_study
136
+ from masster.sample.load import _load_ms1
84
137
  from masster.sample.load import sanitize
85
138
  from masster.sample.plot import plot_2d
86
139
  from masster.sample.plot import plot_2d_oracle
87
140
  from masster.sample.plot import plot_dda_stats
88
141
  from masster.sample.plot import plot_chrom
89
- from masster.sample.plot import plot_features_stats # Renamed from plot_feature_stats
142
+ from masster.sample.plot import plot_features_stats
90
143
  from masster.sample.plot import plot_ms2_cycle
91
144
  from masster.sample.plot import plot_ms2_eic
92
145
  from masster.sample.plot import plot_ms2_q1
@@ -113,7 +166,6 @@ from masster.sample.save import export_features
113
166
  from masster.sample.save import export_mgf
114
167
  from masster.sample.save import export_xlsx
115
168
  from masster.sample.save import save
116
- # Removed internal-only import: _save_featureXML
117
169
 
118
170
 
119
171
  class Sample:
@@ -402,6 +454,7 @@ class Sample:
402
454
  f"{base_modname}.chromatogram",
403
455
  f"{base_modname}.spectrum",
404
456
  f"{base_modname}.logger",
457
+ f"{base_modname}.lib",
405
458
  ]
406
459
 
407
460
  # Add study submodules
@@ -414,17 +467,9 @@ class Sample:
414
467
  ):
415
468
  study_modules.append(module_name)
416
469
 
417
- """ # Add parameters submodules
418
- parameters_modules = []
419
- parameters_module_prefix = f"{base_modname}.parameters."
420
- for module_name in sys.modules:
421
- if module_name.startswith(parameters_module_prefix) and module_name != current_module:
422
- parameters_modules.append(module_name)
423
- """
424
-
425
470
  all_modules_to_reload = (
426
471
  core_modules + sample_modules + study_modules
427
- ) # + parameters_modules
472
+ )
428
473
 
429
474
  # Reload all discovered modules
430
475
  for full_module_name in all_modules_to_reload:
@@ -466,8 +511,6 @@ class Sample:
466
511
  else:
467
512
  str += "Features: 0\n"
468
513
  str += "Features with MS2 spectra: 0\n"
469
-
470
- # estimate memory usage
471
514
  mem_usage = self._estimate_memory_usage()
472
515
  str += f"Estimated memory usage: {mem_usage:.2f} MB\n"
473
516
 
masster/study/h5.py CHANGED
@@ -818,6 +818,19 @@ def _reorder_columns_by_schema(
818
818
 
819
819
  def _create_dataframe_with_objects(data: dict, object_columns: list) -> pl.DataFrame:
820
820
  """Create DataFrame handling Object columns properly."""
821
+ # First check all data for numpy object arrays and move them to object columns
822
+ additional_object_cols = []
823
+ for k, v in data.items():
824
+ if k not in object_columns and hasattr(v, 'dtype') and str(v.dtype) == 'object':
825
+ # This is a numpy object array that should be treated as object
826
+ additional_object_cols.append(k)
827
+ object_columns.append(k)
828
+
829
+ if additional_object_cols:
830
+ # Re-run reconstruction for these columns
831
+ for col in additional_object_cols:
832
+ data[col] = _reconstruct_object_column(data[col], col)
833
+
821
834
  object_data = {k: v for k, v in data.items() if k in object_columns}
822
835
  regular_data = {k: v for k, v in data.items() if k not in object_columns}
823
836
 
@@ -1103,11 +1116,18 @@ def _load_dataframe_from_group(
1103
1116
  logger.info(f"Loading extra column '{col}' not in schema for {df_name}")
1104
1117
  column_data = group[col][:]
1105
1118
 
1106
- # Try to determine if this should be treated as an object column
1107
- # by checking if the data looks like JSON strings
1108
- if len(column_data) > 0 and isinstance(column_data[0], bytes):
1119
+ # Check if this is a known object column by name
1120
+ known_object_columns = {"ms1_spec", "chrom", "ms2_scans", "ms2_specs", "spec", "adducts", "iso"}
1121
+ is_known_object = col in known_object_columns
1122
+
1123
+ if is_known_object:
1124
+ # Known object column, always reconstruct
1125
+ data[col] = _reconstruct_object_column(column_data, col)
1126
+ if col not in object_columns:
1127
+ object_columns.append(col)
1128
+ elif len(column_data) > 0 and isinstance(column_data[0], bytes):
1109
1129
  try:
1110
- # Check if it looks like JSON
1130
+ # Check if it looks like JSON for unknown columns
1111
1131
  test_decode = column_data[0].decode("utf-8")
1112
1132
  if test_decode.startswith("[") or test_decode.startswith("{"):
1113
1133
  # Looks like JSON, treat as object column
@@ -1738,9 +1758,7 @@ def _save_study5(self, filename):
1738
1758
  )
1739
1759
  pbar.update(1)
1740
1760
 
1741
- self.logger.success(f"Study saved successfully to {filename}")
1742
- self.logger.debug(f"Save completed for {filename}")
1743
- self.logger.debug(f"Save completed for {filename}")
1761
+ self.logger.success(f"Study saved to {filename}")
1744
1762
 
1745
1763
 
1746
1764
  def _load_study5(self, filename=None):
@@ -1859,7 +1877,7 @@ def _load_study5(self, filename=None):
1859
1877
  )
1860
1878
  else:
1861
1879
  self.logger.debug(
1862
- "Successfully updated parameters from loaded history",
1880
+ "Updated parameters from loaded history",
1863
1881
  )
1864
1882
  else:
1865
1883
  self.logger.debug(
@@ -2093,8 +2111,8 @@ def _load_study5(self, filename=None):
2093
2111
  # Ensure the column is Int64 type
2094
2112
  self.samples_df = self.samples_df.cast({"map_id": pl.Int64})
2095
2113
 
2096
- self.logger.info(
2097
- f"Successfully migrated {sample_count} samples to indexed map_id format (0 to {sample_count - 1})",
2114
+ self.logger.debug(
2115
+ f"Sanitized {sample_count} samples to indexed map_id format (0 to {sample_count - 1})",
2098
2116
  )
2099
2117
 
2100
2118
  # Sanitize null feature_id and consensus_id values with new UIDs (same method as merge)
@@ -2218,7 +2236,7 @@ def _sanitize_nulls(self):
2218
2236
  pl.Series("feature_id", feature_ids, dtype=pl.Utf8)
2219
2237
  )
2220
2238
 
2221
- self.logger.debug(f"Successfully sanitized {null_feature_ids} feature_id values")
2239
+ self.logger.debug(f"Sanitized {null_feature_ids} feature_id values")
2222
2240
 
2223
2241
  # Sanitize consensus_df consensus_id column
2224
2242
  if hasattr(self, 'consensus_df') and self.consensus_df is not None and not self.consensus_df.is_empty():
@@ -2244,8 +2262,8 @@ def _sanitize_nulls(self):
2244
2262
  self.consensus_df = self.consensus_df.with_columns(
2245
2263
  pl.Series("consensus_id", consensus_ids, dtype=pl.Utf8)
2246
2264
  )
2247
-
2248
- self.logger.debug(f"Successfully sanitized {null_consensus_ids} consensus_id values")
2265
+
2266
+ self.logger.debug(f"Sanitized {null_consensus_ids} consensus_id values")
2249
2267
 
2250
2268
  # Sanitize rt_original in features_df by replacing null or NaN values with rt values
2251
2269
  if hasattr(self, 'features_df') and self.features_df is not None and not self.features_df.is_empty():
@@ -2262,4 +2280,4 @@ def _sanitize_nulls(self):
2262
2280
  .otherwise(pl.col("rt_original"))
2263
2281
  .alias("rt_original")
2264
2282
  )
2265
- self.logger.debug(f"Successfully sanitized {null_or_nan_rt_original} rt_original values")
2283
+ self.logger.debug(f"Sanitized {null_or_nan_rt_original} rt_original values")
masster/study/helpers.py CHANGED
@@ -1630,7 +1630,7 @@ def restore_features(self, samples=None, maps=False):
1630
1630
  self.logger.error(f"Failed to load sample {sample_name}: {e}")
1631
1631
  continue
1632
1632
 
1633
- self.logger.info(
1633
+ self.logger.success(
1634
1634
  f"Completed restoring columns {columns_to_update} from {len(sample_uids)} samples",
1635
1635
  )
1636
1636
 
@@ -2940,6 +2940,7 @@ def features_delete(self, features):
2940
2940
 
2941
2941
  def consensus_select(
2942
2942
  self,
2943
+ uid=None,
2943
2944
  mz=None,
2944
2945
  rt=None,
2945
2946
  inty_mean=None,
@@ -2956,14 +2957,12 @@ def consensus_select(
2956
2957
  rt_delta_mean=None,
2957
2958
  id_top_score=None,
2958
2959
  identified=None,
2959
- # New adduct filter parameters
2960
2960
  adduct_top=None,
2961
2961
  adduct_charge_top=None,
2962
2962
  adduct_mass_neutral_top=None,
2963
2963
  adduct_mass_shift_top=None,
2964
2964
  adduct_group=None,
2965
2965
  adduct_of=None,
2966
- # New identification filter parameters
2967
2966
  id_top_name=None,
2968
2967
  id_top_class=None,
2969
2968
  id_top_adduct=None,
@@ -2976,6 +2975,11 @@ def consensus_select(
2976
2975
  OPTIMIZED VERSION: Enhanced performance with lazy evaluation, vectorized operations, and efficient filtering.
2977
2976
 
2978
2977
  Parameters:
2978
+ uid: consensus UID filter with flexible formats:
2979
+ - None: include all consensus features (default)
2980
+ - int: single specific consensus_uid
2981
+ - tuple: range of consensus_uids (consensus_uid_min, consensus_uid_max)
2982
+ - list: specific list of consensus_uid values
2979
2983
  mz: m/z filter with flexible formats:
2980
2984
  - float: m/z value ± default tolerance (uses study.parameters.eic_mz_tol)
2981
2985
  - tuple (mz_min, mz_max): range where mz_max > mz_min
@@ -3023,7 +3027,7 @@ def consensus_select(
3023
3027
  return pl.DataFrame()
3024
3028
 
3025
3029
  # Early return optimization - check if any filters are provided
3026
- filter_params = [mz, rt, inty_mean, consensus_uid, consensus_id, number_samples,
3030
+ filter_params = [uid, mz, rt, inty_mean, consensus_uid, consensus_id, number_samples,
3027
3031
  number_ms2, quality, bl, chrom_coherence_mean, chrom_prominence_mean,
3028
3032
  chrom_prominence_scaled_mean, chrom_height_scaled_mean,
3029
3033
  rt_delta_mean, id_top_score, identified,
@@ -3044,6 +3048,21 @@ def consensus_select(
3044
3048
  warnings = []
3045
3049
 
3046
3050
  # Build all filter conditions efficiently
3051
+ # Handle uid parameter first (consensus_uid filter with flexible formats)
3052
+ if uid is not None:
3053
+ if isinstance(uid, int):
3054
+ # Single specific consensus_uid
3055
+ filter_conditions.append(pl.col("consensus_uid") == uid)
3056
+ elif isinstance(uid, tuple) and len(uid) == 2:
3057
+ # Range of consensus_uids (consensus_uid_min, consensus_uid_max)
3058
+ min_uid, max_uid = uid
3059
+ filter_conditions.append((pl.col("consensus_uid") >= min_uid) & (pl.col("consensus_uid") <= max_uid))
3060
+ elif isinstance(uid, list):
3061
+ # Specific list of consensus_uid values
3062
+ filter_conditions.append(pl.col("consensus_uid").is_in(uid))
3063
+ else:
3064
+ self.logger.warning(f"Invalid uid parameter type: {type(uid)}. Expected int, tuple, or list.")
3065
+
3047
3066
  if mz is not None:
3048
3067
  if isinstance(mz, tuple) and len(mz) == 2:
3049
3068
  if mz[1] < mz[0]:
masster/study/load.py CHANGED
@@ -139,7 +139,7 @@ def add(
139
139
  f"No files found in {folder}. Please check the folder path or file patterns.",
140
140
  )
141
141
  else:
142
- self.logger.debug(f"Successfully added {counter} samples to the study.")
142
+ self.logger.debug(f"Added {counter} samples to the study.")
143
143
 
144
144
  # Return a simple summary to suppress marimo's automatic object display
145
145
  return f"Added {counter} samples to study"
@@ -2055,169 +2055,6 @@ def _sanitize(self):
2055
2055
  except Exception as e:
2056
2056
  self.logger.error(f"Failed to recreate sanitized DataFrame: {e}")
2057
2057
 
2058
- '''
2059
- def _load_features(self):
2060
- """
2061
- Load features by reconstructing FeatureMaps from the processed features_df data.
2062
-
2063
- This ensures that the loaded FeatureMaps contain the same processed features
2064
- as stored in features_df, rather than loading raw features from .featureXML files
2065
- which may not match the processed data after filtering, alignment, etc.
2066
- """
2067
- import polars as pl
2068
- import pyopenms as oms
2069
- from tqdm import tqdm
2070
- from datetime import datetime
2071
-
2072
- self.features_maps = []
2073
-
2074
- # Check if features_df exists and is not empty
2075
- if self.features_df is None:
2076
- self.logger.warning("features_df is None. Falling back to XML loading.")
2077
- self._load_features_from_xml()
2078
- return
2079
-
2080
- if len(self.features_df) == 0:
2081
- self.logger.warning("features_df is empty. Falling back to XML loading.")
2082
- self._load_features_from_xml()
2083
- return
2084
-
2085
- # If we get here, we should use the new method
2086
- self.logger.debug("Reconstructing FeatureMaps from features_df.")
2087
-
2088
- tdqm_disable = self.log_level not in ["TRACE", "DEBUG", "INFO"]
2089
-
2090
- # Process each sample in order
2091
- for sample_index, row_dict in tqdm(
2092
- enumerate(self.samples_df.iter_rows(named=True)),
2093
- total=len(self.samples_df),
2094
- desc=f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')[:-3]} | INFO | {self.log_label}Reconstruct FeatureMaps from DataFrame",
2095
- disable=tdqm_disable,
2096
- ):
2097
- sample_uid = row_dict["sample_uid"]
2098
- sample_name = row_dict["sample_name"]
2099
-
2100
- # Get features for this sample from features_df
2101
- sample_features = self.features_df.filter(pl.col("sample_uid") == sample_uid)
2102
-
2103
- # Create new FeatureMap
2104
- feature_map = oms.FeatureMap()
2105
-
2106
- # Convert DataFrame features to OpenMS Features
2107
- # Keep track of next available feature_id for this sample
2108
- next_feature_id = 1
2109
- used_feature_ids = set()
2110
-
2111
- # First pass: collect existing feature_ids to avoid conflicts
2112
- for feature_row in sample_features.iter_rows(named=True):
2113
- if feature_row["feature_id"] is not None:
2114
- used_feature_ids.add(int(feature_row["feature_id"]))
2115
-
2116
- # Find the next available feature_id
2117
- while next_feature_id in used_feature_ids:
2118
- next_feature_id += 1
2119
-
2120
- for feature_row in sample_features.iter_rows(named=True):
2121
- feature = oms.Feature()
2122
-
2123
- # Set properties from DataFrame (handle missing values gracefully)
2124
- try:
2125
- # Skip features with missing critical data
2126
- if feature_row["mz"] is None:
2127
- self.logger.warning("Skipping feature due to missing mz")
2128
- continue
2129
- if feature_row["rt"] is None:
2130
- self.logger.warning("Skipping feature due to missing rt")
2131
- continue
2132
- if feature_row["inty"] is None:
2133
- self.logger.warning("Skipping feature due to missing inty")
2134
- continue
2135
-
2136
- # Handle missing feature_id by generating a new one
2137
- if feature_row["feature_id"] is None:
2138
- feature_id = next_feature_id
2139
- next_feature_id += 1
2140
- self.logger.debug(f"Generated new feature_id {feature_id} for feature with missing ID")
2141
- else:
2142
- feature_id = int(feature_row["feature_id"])
2143
-
2144
- feature.setUniqueId(feature_id)
2145
- feature.setMZ(float(feature_row["mz"]))
2146
- feature.setRT(float(feature_row["rt"]))
2147
- feature.setIntensity(float(feature_row["inty"]))
2148
-
2149
- # Handle optional fields that might be None
2150
- if feature_row.get("quality") is not None:
2151
- feature.setOverallQuality(float(feature_row["quality"]))
2152
- if feature_row.get("charge") is not None:
2153
- feature.setCharge(int(feature_row["charge"]))
2154
-
2155
- # Add to feature map
2156
- feature_map.push_back(feature)
2157
- except (ValueError, TypeError) as e:
2158
- self.logger.warning(f"Skipping feature due to conversion error: {e}")
2159
- continue
2160
-
2161
- self.features_maps.append(feature_map)
2162
-
2163
- self.logger.debug(
2164
- f"Successfully reconstructed {len(self.features_maps)} FeatureMaps from features_df.",
2165
- )
2166
- '''
2167
-
2168
- '''
2169
- def _load_features_from_xml(self):
2170
- """
2171
- Original load_features method that loads from .featureXML files.
2172
- Used as fallback when features_df is not available.
2173
- """
2174
- self.features_maps = []
2175
- self.logger.debug("Loading features from featureXML files.")
2176
- tdqm_disable = self.log_level not in ["TRACE", "DEBUG", "INFO"]
2177
- for _index, row_dict in tqdm(
2178
- enumerate(self.samples_df.iter_rows(named=True)),
2179
- total=len(self.samples_df),
2180
- desc=f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')[:-3]} | INFO | {self.log_label}Load feature maps from XML",
2181
- disable=tdqm_disable,
2182
- ):
2183
- if self.folder is not None:
2184
- filename = os.path.join(
2185
- self.folder,
2186
- row_dict["sample_name"] + ".featureXML",
2187
- )
2188
- else:
2189
- filename = os.path.join(
2190
- os.getcwd(),
2191
- row_dict["sample_name"] + ".featureXML",
2192
- )
2193
- # check if file exists
2194
- if not os.path.exists(filename):
2195
- filename = row_dict["sample_path"].replace(".sample5", ".featureXML")
2196
-
2197
- if not os.path.exists(filename):
2198
- self.features_maps.append(None)
2199
- continue
2200
-
2201
- fh = oms.FeatureXMLFile()
2202
- fm = oms.FeatureMap()
2203
- fh.load(filename, fm)
2204
- self.features_maps.append(fm)
2205
- self.logger.debug("Features loaded successfully.")
2206
- '''
2207
- '''
2208
- def _load_consensusXML(self, filename="alignment.consensusXML"):
2209
- """
2210
- Load a consensus map from a file.
2211
- """
2212
- if not os.path.exists(filename):
2213
- self.logger.error(f"File {filename} does not exist.")
2214
- return
2215
- fh = oms.ConsensusXMLFile()
2216
- self.consensus_map = oms.ConsensusMap()
2217
- fh.load(filename, self.consensus_map)
2218
- self.logger.debug(f"Loaded consensus map from {filename}.")
2219
- '''
2220
-
2221
2058
  def _add_samples_batch(
2222
2059
  self,
2223
2060
  files,
masster/study/merge.py CHANGED
@@ -340,8 +340,6 @@ def merge(study, **kwargs) -> None:
340
340
  - MS2 spectra are automatically linked when link_ms2=True
341
341
  - Adduct relationships are identified and stored after merging
342
342
  """
343
- start_time = time.time()
344
-
345
343
  # Initialize with defaults and override with kwargs
346
344
  params = merge_defaults()
347
345
 
@@ -486,10 +484,6 @@ def merge(study, **kwargs) -> None:
486
484
 
487
485
  # Finalize merge: filter by min_samples and add isotope/MS2 data
488
486
  __finalize_merge(study, params.link_ms2, params.extract_ms1, params.min_samples)
489
-
490
- # Log completion without the misleading feature count
491
- elapsed = time.time() - start_time
492
- study.logger.debug(f"Merge process completed in {elapsed:.1f}s")
493
487
 
494
488
 
495
489
  def _merge_kd(study, params: merge_defaults) -> oms.ConsensusMap:
@@ -3082,9 +3076,9 @@ def __finalize_merge(study, link_ms2, extract_ms1, min_samples):
3082
3076
  # Count tight clusters with specified thresholds
3083
3077
  tight_clusters = _count_tight_clusters(study,mz_tol=0.04, rt_tol=0.3)
3084
3078
 
3085
- study.logger.info(
3079
+ study.logger.success(
3086
3080
  f"Merging completed. Consensus features: {len(study.consensus_df)}. "
3087
- f"Completeness: {c:.2f}. Tight clusters left: {tight_clusters}.",
3081
+ f"Completeness: {c:.2f}. Tight clusters: {tight_clusters}.",
3088
3082
  )
3089
3083
  else:
3090
3084
  study.logger.warning(