masster 0.4.16__py3-none-any.whl → 0.4.18__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of masster might be problematic. Click here for more details.

masster/_version.py CHANGED
@@ -1,7 +1,7 @@
1
1
  from __future__ import annotations
2
2
 
3
3
 
4
- __version__ = "0.4.16"
4
+ __version__ = "0.4.18"
5
5
 
6
6
 
7
7
  def get_version():
@@ -25,24 +25,36 @@ class merge_defaults:
25
25
  link_ms2 (bool): Whether to link MS2 spectra to consensus features. Default is True.
26
26
  """
27
27
 
28
- method: str = "kd"
29
- min_samples: int = 10
28
+ method: str = "qt"
29
+ min_samples: int = 2
30
30
  rt_tol: float = 5.0
31
31
  mz_tol: float = 0.01
32
- chunk_size: int = 300
32
+ chunk_size: int = 500
33
33
  nr_partitions: int = 1000
34
- min_rel_cc_size: float = 0.2
34
+ min_rel_cc_size: float = 0.1
35
35
  max_pairwise_log_fc: float = -1.0
36
36
  max_nr_conflicts: int = 0
37
37
  link_ms2: bool = True
38
+
39
+ # KD-Strict specific parameters
40
+ optimize_rt_tol: bool = False
41
+ rt_tol_range: tuple = (0.5, 4.0)
42
+ rt_tol_steps: int = 7
43
+ secondary_merge_rt_tol: float = 1.0
44
+ secondary_merge_mz_tol: float = 0.005
45
+ min_sample_overlap: float = 0.8
46
+ max_rt_spread: float = 2.0 # Will default to 2x rt_tol
47
+ min_coherence: float = 0.0
38
48
 
39
49
  _param_metadata: dict[str, dict[str, Any]] = field(
40
50
  default_factory=lambda: {
41
51
  "method": {
42
52
  "dtype": str,
43
53
  "description": "Merge method (algorithm) to use",
44
- "default": "kd",
45
- "allowed_values": ["kd", "qt", "kd-nowarp", "chunked"],
54
+ "default": "quality",
55
+ "allowed_values": ["sensitivity", "qt", "nowarp", "chunked", "quality",
56
+ "kd", "kd-nowarp", "kd_nowarp", "kd-strict", "kd_strict",
57
+ "kd_chunked", "kd-chunked", "qt_chunked", "qt-chunked"],
46
58
  },
47
59
  "min_samples": {
48
60
  "dtype": int,
@@ -103,6 +115,58 @@ class merge_defaults:
103
115
  "description": "Whether to link MS2 spectra to consensus features",
104
116
  "default": True,
105
117
  },
118
+ # KD-Strict specific parameters
119
+ "optimize_rt_tol": {
120
+ "dtype": bool,
121
+ "description": "Enable RT tolerance optimization for kd-strict method",
122
+ "default": False,
123
+ },
124
+ "rt_tol_range": {
125
+ "dtype": tuple,
126
+ "description": "RT tolerance range for optimization (min, max) in seconds",
127
+ "default": (0.8, 2.0),
128
+ },
129
+ "rt_tol_steps": {
130
+ "dtype": int,
131
+ "description": "Number of steps for RT tolerance optimization",
132
+ "default": 5,
133
+ "min_value": 3,
134
+ "max_value": 20,
135
+ },
136
+ "secondary_merge_rt_tol": {
137
+ "dtype": float,
138
+ "description": "RT tolerance for secondary clustering in kd-strict (seconds)",
139
+ "default": 0.5,
140
+ "min_value": 0.1,
141
+ "max_value": 5.0,
142
+ },
143
+ "secondary_merge_mz_tol": {
144
+ "dtype": float,
145
+ "description": "m/z tolerance for secondary clustering in kd-strict (Da)",
146
+ "default": 0.005,
147
+ "min_value": 0.001,
148
+ "max_value": 0.1,
149
+ },
150
+ "min_sample_overlap": {
151
+ "dtype": float,
152
+ "description": "Minimum sample overlap ratio for merging features (0.0-1.0)",
153
+ "default": 0.8,
154
+ "min_value": 0.0,
155
+ "max_value": 1.0,
156
+ },
157
+ "max_rt_spread": {
158
+ "dtype": float,
159
+ "description": "Maximum allowed RT spread in seconds (None = 3x rt_tol)",
160
+ "default": None,
161
+ "min_value": 0.1,
162
+ },
163
+ "min_coherence": {
164
+ "dtype": float,
165
+ "description": "Minimum chromatographic coherence score (0.0 = disabled)",
166
+ "default": 0.0,
167
+ "min_value": 0.0,
168
+ "max_value": 1.0,
169
+ },
106
170
  },
107
171
  repr=False,
108
172
  )
masster/study/merge.py CHANGED
@@ -1,6 +1,6 @@
1
1
  """
2
2
  Unified merge module for the Study class.
3
- Supports multiple merge methods: 'kd', 'qt', 'kd-nowarp', 'chunked'
3
+ Supports multiple merge methods: 'kd', 'qt', 'kd-nowarp', 'kd_chunked', 'qt_chunked'
4
4
  """
5
5
 
6
6
  import time
@@ -24,8 +24,8 @@ def merge(self, **kwargs) -> None:
24
24
  ----------
25
25
  **kwargs : dict
26
26
  Parameters from merge_defaults class:
27
- - method : str, default 'kd'
28
- Merge algorithm: 'kd', 'qt', 'kd-nowarp', 'chunked'
27
+ - method : str, default 'quality'
28
+ Merge algorithm: 'sensitivity', 'qt', 'nowarp', 'kd_chunked', 'qt_chunked', 'quality'
29
29
  - min_samples : int, default 10
30
30
  Minimum number of samples for consensus feature
31
31
  - rt_tol : float, default 2.0
@@ -47,12 +47,16 @@ def merge(self, **kwargs) -> None:
47
47
 
48
48
  Algorithm Guidelines
49
49
  -------------------
50
- - KD: Best general purpose, O(n log n), recommended default
50
+ - Quality: KD with post-processing quality control to reduce oversegmentation (RECOMMENDED DEFAULT)
51
+ Includes RT tolerance optimization, secondary clustering, and quality filtering
52
+ - Sensitivity: Best raw sensitivity, O(n log n), maximum feature detection
51
53
  - QT: Thorough but slow O(n²), good for <1000 samples
52
- - KD-NoWarp: Memory efficient KD without RT warping for large datasets
53
- - Chunked: Memory-optimized KD algorithm for very large datasets (>5000 samples)
54
+ - NoWarp: Memory efficient KD without RT warping for large datasets
55
+ - KD-Chunked: Memory-optimized KD algorithm for very large datasets (>5000 samples)
54
56
  Uses optimized partitioning for better memory management while maintaining
55
57
  full cross-sample consensus feature detection.
58
+ - QT-Chunked: Memory-optimized QT algorithm for very large datasets (>5000 samples)
59
+ Uses QT clustering in first stage with optimized cross-chunk consensus building.
56
60
  """
57
61
  start_time = time.time()
58
62
 
@@ -67,9 +71,29 @@ def merge(self, **kwargs) -> None:
67
71
  else:
68
72
  self.logger.warning(f"Unknown parameter '{key}' ignored")
69
73
 
74
+ # Backward compatibility: Map old method names to new names
75
+ method_mapping = {
76
+ 'kd': 'sensitivity',
77
+ 'kd-nowarp': 'nowarp',
78
+ 'kd_nowarp': 'nowarp',
79
+ 'kd-strict': 'quality',
80
+ 'kd_strict': 'quality',
81
+ 'kdstrict': 'quality',
82
+ 'chunked': 'kd_chunked', # Map old 'chunked' to 'kd_chunked'
83
+ 'qtchunked': 'qt_chunked', # QT chunked variants
84
+ 'qt-chunked': 'qt_chunked',
85
+ 'kdchunked': 'kd_chunked', # KD chunked variants
86
+ 'kd-chunked': 'kd_chunked'
87
+ }
88
+
89
+ if params.method in method_mapping:
90
+ old_method = params.method
91
+ params.method = method_mapping[old_method]
92
+ self.logger.info(f"Method '{old_method}' is deprecated. Using '{params.method}' instead.")
93
+
70
94
  # Validate method
71
- if params.method not in ['kd', 'qt', 'kd-nowarp', 'chunked']:
72
- raise ValueError(f"Invalid method '{params.method}'. Must be one of: ['kd', 'qt', 'kd-nowarp', 'chunked']")
95
+ if params.method not in ['sensitivity', 'qt', 'nowarp', 'kd_chunked', 'qt_chunked', 'quality']:
96
+ raise ValueError(f"Invalid method '{params.method}'. Must be one of: ['sensitivity', 'qt', 'nowarp', 'kd_chunked', 'qt_chunked', 'quality']")
73
97
 
74
98
  # Persist last used params for diagnostics
75
99
  try:
@@ -77,6 +101,15 @@ def merge(self, **kwargs) -> None:
77
101
  except Exception:
78
102
  self._merge_params_last = {}
79
103
 
104
+ # Store merge parameters in history
105
+ try:
106
+ if hasattr(self, 'store_history'):
107
+ self.store_history(['merge'], params.to_dict())
108
+ else:
109
+ self.logger.warning("History storage not available - parameters not saved to history")
110
+ except Exception as e:
111
+ self.logger.warning(f"Failed to store merge parameters in history: {e}")
112
+
80
113
  # Ensure feature maps are available for merging (regenerate if needed)
81
114
  if len(self.features_maps) < len(self.samples_df):
82
115
  self.features_maps = []
@@ -106,7 +139,7 @@ def merge(self, **kwargs) -> None:
106
139
  cached_valid_adducts.add("?")
107
140
 
108
141
  # Route to algorithm implementation
109
- if params.method == 'kd':
142
+ if params.method == 'sensitivity':
110
143
  consensus_map = _merge_kd(self, params)
111
144
  # Extract consensus features
112
145
  self._extract_consensus_features(consensus_map, params.min_samples, cached_adducts_df, cached_valid_adducts)
@@ -114,13 +147,19 @@ def merge(self, **kwargs) -> None:
114
147
  consensus_map = _merge_qt(self, params)
115
148
  # Extract consensus features
116
149
  self._extract_consensus_features(consensus_map, params.min_samples, cached_adducts_df, cached_valid_adducts)
117
- elif params.method == 'kd-nowarp':
150
+ elif params.method == 'nowarp':
118
151
  consensus_map = _merge_kd_nowarp(self, params)
119
152
  # Extract consensus features
120
153
  self._extract_consensus_features(consensus_map, params.min_samples, cached_adducts_df, cached_valid_adducts)
121
- elif params.method == 'chunked':
122
- consensus_map = _merge_chunked(self, params, cached_adducts_df, cached_valid_adducts)
123
- # Note: _merge_chunked populates consensus_df directly, no need to extract
154
+ elif params.method == 'quality':
155
+ consensus_map = _merge_kd_strict(self, params)
156
+ # Note: _merge_kd_strict handles both consensus_df and consensus_mapping_df directly
157
+ elif params.method == 'kd_chunked':
158
+ consensus_map = _merge_kd_chunked(self, params, cached_adducts_df, cached_valid_adducts)
159
+ # Note: _merge_kd_chunked populates consensus_df directly, no need to extract
160
+ elif params.method == 'qt_chunked':
161
+ consensus_map = _merge_qt_chunked(self, params, cached_adducts_df, cached_valid_adducts)
162
+ # Note: _merge_qt_chunked populates consensus_df directly, no need to extract
124
163
 
125
164
  # Perform adduct grouping
126
165
  self._perform_adduct_grouping(params.rt_tol, params.mz_tol)
@@ -160,9 +199,9 @@ def _merge_kd(self, params: merge_defaults) -> oms.ConsensusMap:
160
199
  params_oms.setValue("warp:mz_tol", params.mz_tol)
161
200
  params_oms.setValue("link:rt_tol", params.rt_tol)
162
201
  params_oms.setValue("link:mz_tol", params.mz_tol)
163
- params_oms.setValue("link:min_rel_cc_size", params.min_rel_cc_size)
164
- params_oms.setValue("link:max_pairwise_log_fc", params.max_pairwise_log_fc)
165
- params_oms.setValue("link:max_nr_conflicts", params.max_nr_conflicts)
202
+ #params_oms.setValue("link:min_rel_cc_size", params.min_rel_cc_size)
203
+ #params_oms.setValue("link:max_pairwise_log_fc", params.max_pairwise_log_fc)
204
+ #params_oms.setValue("link:max_nr_conflicts", params.max_nr_conflicts)
166
205
  #params_oms.setValue("link:charge_merging", "With_charge_zero") THIS LEADS TO A CRASH
167
206
 
168
207
  grouper.setParameters(params_oms)
@@ -198,9 +237,9 @@ def _merge_qt(self, params: merge_defaults) -> oms.ConsensusMap:
198
237
  params_oms.setValue("distance_MZ:max_difference", params.mz_tol)
199
238
  params_oms.setValue("distance_MZ:unit", "Da") # QT now uses Da like all other methods
200
239
  params_oms.setValue("ignore_charge", "true")
201
- params_oms.setValue("min_rel_cc_size", params.min_rel_cc_size)
202
- params_oms.setValue("max_pairwise_log_fc", params.max_pairwise_log_fc)
203
- params_oms.setValue("max_nr_conflicts", params.max_nr_conflicts)
240
+ #params_oms.setValue("min_rel_cc_size", params.min_rel_cc_size)
241
+ #params_oms.setValue("max_pairwise_log_fc", params.max_pairwise_log_fc)
242
+ #params_oms.setValue("max_nr_conflicts", params.max_nr_conflicts)
204
243
  params_oms.setValue("nr_partitions", params.nr_partitions)
205
244
 
206
245
  grouper.setParameters(params_oms)
@@ -209,6 +248,496 @@ def _merge_qt(self, params: merge_defaults) -> oms.ConsensusMap:
209
248
  return consensus_map
210
249
 
211
250
 
251
+ def _merge_kd_strict(self, params: merge_defaults) -> oms.ConsensusMap:
252
+ """
253
+ Quality merge: Standard KD algorithm with post-processing quality control.
254
+
255
+ This method combines the sensitivity of KD clustering with post-processing steps
256
+ to reduce oversegmentation while maintaining high-quality consensus features.
257
+ This is the recommended default method.
258
+
259
+ Post-processing features:
260
+ 1. RT tolerance optimization (optional)
261
+ 2. Secondary clustering for close features
262
+ 3. Sample overlap validation
263
+ 4. RT spread quality filtering
264
+ 5. Chromatographic coherence validation
265
+
266
+ Additional parameters supported in params:
267
+ - optimize_rt_tol: bool - Enable RT tolerance optimization
268
+ - rt_tol_range: tuple - RT tolerance range for optimization (min, max)
269
+ - secondary_merge_rt_tol: float - Secondary merge RT tolerance (default: 0.5s)
270
+ - secondary_merge_mz_tol: float - Secondary merge m/z tolerance (default: 0.005)
271
+ - min_sample_overlap: float - Minimum sample overlap for merging (0.0-1.0, default: 0.8)
272
+ - max_rt_spread: float - Maximum RT spread allowed (default: 2x rt_tol)
273
+ - min_coherence: float - Minimum chromatographic coherence (default: 0.0, disabled)
274
+ """
275
+
276
+ # Check for RT tolerance optimization
277
+ optimize_rt_tol = getattr(params, 'optimize_rt_tol', False)
278
+
279
+ if optimize_rt_tol:
280
+ # Optimize RT tolerance first
281
+ optimal_rt_tol = _optimize_rt_tolerance(self, params)
282
+ self.logger.info(f"RT tolerance optimization: {params.rt_tol}s → {optimal_rt_tol}s")
283
+ # Create modified params with optimal RT tolerance
284
+ import copy
285
+ optimized_params = copy.deepcopy(params)
286
+ optimized_params.rt_tol = optimal_rt_tol
287
+ else:
288
+ optimized_params = params
289
+
290
+ # Phase 1: Standard KD clustering
291
+ self.logger.info("Initial KD clustering")
292
+ consensus_map = _merge_kd(self, optimized_params)
293
+
294
+ # Phase 2: Post-processing quality control
295
+ self.logger.info("Post-processing quality control")
296
+ consensus_map = _apply_kd_strict_postprocessing(self, consensus_map, optimized_params)
297
+
298
+ return consensus_map
299
+
300
+
301
+ def _optimize_rt_tolerance(self, params: merge_defaults) -> float:
302
+ """
303
+ Optimize RT tolerance by testing different values and measuring oversegmentation.
304
+
305
+ Args:
306
+ self: Study object
307
+ params: Merge parameters
308
+
309
+ Returns:
310
+ Optimal RT tolerance value
311
+ """
312
+ rt_tol_range = getattr(params, 'rt_tol_range', (0.8, 2.0))
313
+ rt_tol_steps = getattr(params, 'rt_tol_steps', 5)
314
+
315
+ self.logger.info(f"Optimizing RT tolerance in range {rt_tol_range} with {rt_tol_steps} steps")
316
+
317
+ # Generate test values
318
+ test_rt_tols = [rt_tol_range[0] + i * (rt_tol_range[1] - rt_tol_range[0]) / (rt_tol_steps - 1)
319
+ for i in range(rt_tol_steps)]
320
+
321
+ best_rt_tol = params.rt_tol
322
+ best_score = float('inf')
323
+
324
+ # Store original features for restoration
325
+ original_consensus_df = getattr(self, 'consensus_df', pl.DataFrame())
326
+ original_consensus_mapping_df = getattr(self, 'consensus_mapping_df', pl.DataFrame())
327
+
328
+ for test_rt_tol in test_rt_tols:
329
+ try:
330
+ # Create test parameters
331
+ import copy
332
+ test_params = copy.deepcopy(params)
333
+ test_params.rt_tol = test_rt_tol
334
+
335
+ # Run KD merge with test parameters
336
+ test_consensus_map = _merge_kd(self, test_params)
337
+
338
+ # Extract consensus features temporarily for analysis
339
+ self._extract_consensus_features(test_consensus_map, test_params.min_samples)
340
+
341
+ if len(self.consensus_df) == 0:
342
+ continue
343
+
344
+ # Calculate oversegmentation metrics
345
+ oversegmentation_score = _calculate_oversegmentation_score(self, test_rt_tol)
346
+
347
+ self.logger.debug(f"RT tol {test_rt_tol:.1f}s: {len(self.consensus_df)} features, score: {oversegmentation_score:.3f}")
348
+
349
+ # Lower score is better (less oversegmentation)
350
+ if oversegmentation_score < best_score:
351
+ best_score = oversegmentation_score
352
+ best_rt_tol = test_rt_tol
353
+
354
+ except Exception as e:
355
+ self.logger.warning(f"RT tolerance optimization failed for {test_rt_tol}s: {e}")
356
+ continue
357
+
358
+ # Restore original consensus data
359
+ self.consensus_df = original_consensus_df
360
+ self.consensus_mapping_df = original_consensus_mapping_df
361
+
362
+ self.logger.info(f"Optimal RT tolerance: {best_rt_tol:.1f}s (score: {best_score:.3f})")
363
+ return best_rt_tol
364
+
365
+
366
+ def _calculate_oversegmentation_score(self, rt_tol: float) -> float:
367
+ """
368
+ Calculate oversegmentation score based on feature density and RT spread metrics.
369
+ Lower scores indicate less oversegmentation.
370
+
371
+ Args:
372
+ self: Study object
373
+ rt_tol: RT tolerance used
374
+
375
+ Returns:
376
+ Oversegmentation score (lower = better)
377
+ """
378
+ if len(self.consensus_df) == 0:
379
+ return float('inf')
380
+
381
+ # Metric 1: Feature density (features per RT second)
382
+ rt_range = self.consensus_df['rt'].max() - self.consensus_df['rt'].min()
383
+ if rt_range <= 0:
384
+ return float('inf')
385
+
386
+ feature_density = len(self.consensus_df) / rt_range
387
+
388
+ # Metric 2: Average RT spread relative to tolerance
389
+ rt_spreads = (self.consensus_df['rt_max'] - self.consensus_df['rt_min'])
390
+ avg_rt_spread_ratio = rt_spreads.mean() / rt_tol if rt_tol > 0 else float('inf')
391
+
392
+ # Metric 3: Proportion of features with low sample counts (indicates fragmentation)
393
+ low_sample_features = len(self.consensus_df.filter(pl.col('number_samples') <= 5))
394
+ low_sample_ratio = low_sample_features / len(self.consensus_df)
395
+
396
+ # Metric 4: Number of features with excessive RT spread
397
+ excessive_spread_features = len(rt_spreads.filter(rt_spreads > rt_tol * 2))
398
+ excessive_spread_ratio = excessive_spread_features / len(self.consensus_df)
399
+
400
+ # Combined score (weighted combination)
401
+ oversegmentation_score = (
402
+ 0.4 * (feature_density / 10.0) + # Normalize to reasonable scale
403
+ 0.3 * avg_rt_spread_ratio +
404
+ 0.2 * low_sample_ratio +
405
+ 0.1 * excessive_spread_ratio
406
+ )
407
+
408
+ return oversegmentation_score
409
+
410
+
411
+ def _apply_kd_strict_postprocessing(self, consensus_map: oms.ConsensusMap, params: merge_defaults) -> oms.ConsensusMap:
412
+ """
413
+ Apply post-processing quality control to KD consensus map.
414
+
415
+ Args:
416
+ consensus_map: Initial consensus map from KD
417
+ params: Merge parameters with kd-strict options
418
+
419
+ Returns:
420
+ Processed consensus map with reduced oversegmentation
421
+ """
422
+ if consensus_map.size() == 0:
423
+ self.logger.warning("Empty consensus map provided to post-processing")
424
+ return consensus_map
425
+
426
+ self.logger.debug(f"Post-processing {consensus_map.size()} initial consensus features")
427
+
428
+ # Step 1: Extract initial consensus features
429
+ original_min_samples = params.min_samples
430
+ params.min_samples = 1 # Extract all features initially
431
+
432
+ self._extract_consensus_features(consensus_map, params.min_samples)
433
+ initial_feature_count = len(self.consensus_df)
434
+
435
+ if initial_feature_count == 0:
436
+ self.logger.warning("No consensus features extracted for post-processing")
437
+ params.min_samples = original_min_samples
438
+ return consensus_map
439
+
440
+ # Step 2: Secondary clustering for close features
441
+ secondary_merge_rt_tol = getattr(params, 'secondary_merge_rt_tol', 0.5)
442
+ secondary_merge_mz_tol = getattr(params, 'secondary_merge_mz_tol', 0.005)
443
+
444
+ self.logger.debug(f"Secondary clustering with RT≤{secondary_merge_rt_tol}s, m/z≤{secondary_merge_mz_tol}")
445
+ merged_features = _perform_secondary_clustering(self, secondary_merge_rt_tol, secondary_merge_mz_tol)
446
+
447
+ # Step 3: Sample overlap validation
448
+ min_sample_overlap = getattr(params, 'min_sample_overlap', 0.8)
449
+ if min_sample_overlap > 0:
450
+ self.logger.debug(f"Sample overlap validation (threshold: {min_sample_overlap})")
451
+ merged_features = _validate_sample_overlap(self, merged_features, min_sample_overlap)
452
+
453
+ # Step 4: RT spread quality filtering
454
+ if params.rt_tol is not None:
455
+ max_rt_spread = getattr(params, 'max_rt_spread', params.rt_tol * 2)
456
+ if max_rt_spread is not None:
457
+ self.logger.debug(f"RT spread filtering (max: {max_rt_spread:.1f}s)")
458
+ merged_features = _filter_rt_spread(self, merged_features, max_rt_spread)
459
+ else:
460
+ self.logger.debug("Skipping RT spread filtering - max_rt_spread is None")
461
+ else:
462
+ self.logger.debug("Skipping RT spread filtering - rt_tol is None")
463
+
464
+ # Step 5: Chromatographic coherence filtering (optional)
465
+ min_coherence = getattr(params, 'min_coherence', 0.0)
466
+ if min_coherence > 0:
467
+ self.logger.debug(f"Chromatographic coherence filtering (min: {min_coherence})")
468
+ merged_features = _filter_coherence(self, merged_features, min_coherence)
469
+
470
+ # Step 6: Rebuild consensus_df with filtered features and preserve mapping
471
+ original_mapping_df = self.consensus_mapping_df.clone() # Save original mapping
472
+ self.consensus_df = pl.DataFrame(merged_features, strict=False)
473
+
474
+ # Step 7: Apply original min_samples filter
475
+ params.min_samples = original_min_samples
476
+ if params.min_samples > 1:
477
+ l1 = len(self.consensus_df)
478
+ self.consensus_df = self.consensus_df.filter(
479
+ pl.col("number_samples") >= params.min_samples
480
+ )
481
+ filtered_count = l1 - len(self.consensus_df)
482
+ if filtered_count > 0:
483
+ self.logger.debug(f"Filtered {filtered_count} features below min_samples threshold ({params.min_samples})")
484
+
485
+ # Step 8: Update consensus_mapping_df to match final consensus_df
486
+ if len(self.consensus_df) > 0 and len(original_mapping_df) > 0:
487
+ valid_consensus_ids = set(self.consensus_df['consensus_uid'].to_list())
488
+ self.consensus_mapping_df = original_mapping_df.filter(
489
+ pl.col('consensus_uid').is_in(list(valid_consensus_ids))
490
+ )
491
+ else:
492
+ self.consensus_mapping_df = pl.DataFrame()
493
+
494
+ final_feature_count = len(self.consensus_df)
495
+ reduction_pct = ((initial_feature_count - final_feature_count) / initial_feature_count * 100) if initial_feature_count > 0 else 0
496
+
497
+ self.logger.info(f"Post-processing complete: {initial_feature_count} → {final_feature_count} features ({reduction_pct:.1f}% reduction)")
498
+
499
+ # Create a new consensus map for compatibility (the processed data is in consensus_df)
500
+ processed_consensus_map = oms.ConsensusMap()
501
+ return processed_consensus_map
502
+
503
+
504
+ def _perform_secondary_clustering(self, rt_tol: float, mz_tol: float) -> list:
505
+ """
506
+ Perform secondary clustering to merge very close features.
507
+
508
+ Args:
509
+ rt_tol: RT tolerance for secondary clustering
510
+ mz_tol: m/z tolerance for secondary clustering
511
+
512
+ Returns:
513
+ List of merged consensus feature dictionaries
514
+ """
515
+ if len(self.consensus_df) == 0:
516
+ return []
517
+
518
+ # Convert consensus_df to list of dictionaries for clustering
519
+ consensus_features = []
520
+ for i, row in enumerate(self.consensus_df.iter_rows(named=True)):
521
+ consensus_features.append(dict(row))
522
+
523
+ # Use Union-Find for efficient clustering
524
+ class UnionFind:
525
+ def __init__(self, n):
526
+ self.parent = list(range(n))
527
+ self.rank = [0] * n
528
+
529
+ def find(self, x):
530
+ if self.parent[x] != x:
531
+ self.parent[x] = self.find(self.parent[x])
532
+ return self.parent[x]
533
+
534
+ def union(self, x, y):
535
+ px, py = self.find(x), self.find(y)
536
+ if px == py:
537
+ return
538
+ if self.rank[px] < self.rank[py]:
539
+ px, py = py, px
540
+ self.parent[py] = px
541
+ if self.rank[px] == self.rank[py]:
542
+ self.rank[px] += 1
543
+
544
+ n_features = len(consensus_features)
545
+ uf = UnionFind(n_features)
546
+
547
+ # Find features to merge based on proximity
548
+ merge_count = 0
549
+ for i in range(n_features):
550
+ for j in range(i + 1, n_features):
551
+ feat_i = consensus_features[i]
552
+ feat_j = consensus_features[j]
553
+
554
+ rt_diff = abs(feat_i['rt'] - feat_j['rt'])
555
+ mz_diff = abs(feat_i['mz'] - feat_j['mz'])
556
+
557
+ if rt_diff <= rt_tol and mz_diff <= mz_tol:
558
+ uf.union(i, j)
559
+ merge_count += 1
560
+
561
+ # Group features by their root
562
+ groups_by_root = defaultdict(list)
563
+ for i in range(n_features):
564
+ root = uf.find(i)
565
+ groups_by_root[root].append(consensus_features[i])
566
+
567
+ # Merge features within each group
568
+ merged_features = []
569
+ for group in groups_by_root.values():
570
+ if len(group) == 1:
571
+ # Single feature - keep as is
572
+ merged_features.append(group[0])
573
+ else:
574
+ # Multiple features - merge them
575
+ merged_feature = _merge_feature_group(group)
576
+ merged_features.append(merged_feature)
577
+
578
+ self.logger.debug(f"Secondary clustering: {n_features} → {len(merged_features)} features ({n_features - len(merged_features)} merged)")
579
+ return merged_features
580
+
581
+
582
+ def _merge_feature_group(feature_group: list) -> dict:
583
+ """
584
+ Merge a group of similar consensus features into one.
585
+
586
+ Args:
587
+ feature_group: List of consensus feature dictionaries to merge
588
+
589
+ Returns:
590
+ Merged consensus feature dictionary
591
+ """
592
+ if not feature_group:
593
+ return {}
594
+
595
+ if len(feature_group) == 1:
596
+ return feature_group[0]
597
+
598
+ # Use the feature with highest sample count as base
599
+ base_feature = max(feature_group, key=lambda f: f.get('number_samples', 0))
600
+ merged = base_feature.copy()
601
+
602
+ # Aggregate numeric statistics
603
+ rt_values = [f['rt'] for f in feature_group if f.get('rt') is not None]
604
+ mz_values = [f['mz'] for f in feature_group if f.get('mz') is not None]
605
+ sample_counts = [f.get('number_samples', 0) for f in feature_group]
606
+ intensities = [f.get('inty_mean', 0) for f in feature_group if f.get('inty_mean') is not None]
607
+
608
+ # Update merged feature statistics
609
+ if rt_values:
610
+ merged['rt'] = float(np.mean(rt_values))
611
+ merged['rt_min'] = min([f.get('rt_min', f['rt']) for f in feature_group])
612
+ merged['rt_max'] = max([f.get('rt_max', f['rt']) for f in feature_group])
613
+ merged['rt_mean'] = float(np.mean(rt_values))
614
+
615
+ if mz_values:
616
+ merged['mz'] = float(np.mean(mz_values))
617
+ merged['mz_min'] = min([f.get('mz_min', f['mz']) for f in feature_group])
618
+ merged['mz_max'] = max([f.get('mz_max', f['mz']) for f in feature_group])
619
+ merged['mz_mean'] = float(np.mean(mz_values))
620
+
621
+ # Use maximum sample count (features might be detected in overlapping but different samples)
622
+ merged['number_samples'] = max(sample_counts)
623
+
624
+ # Use weighted average intensity (by sample count)
625
+ if intensities and sample_counts:
626
+ total_weight = sum(sample_counts)
627
+ if total_weight > 0:
628
+ weighted_intensity = sum(inty * count for inty, count in zip(intensities, sample_counts)) / total_weight
629
+ merged['inty_mean'] = float(weighted_intensity)
630
+
631
+ # Aggregate chromatographic quality metrics if available
632
+ coherence_values = [f.get('chrom_coherence_mean', 0) for f in feature_group if f.get('chrom_coherence_mean') is not None]
633
+ prominence_values = [f.get('chrom_prominence_mean', 0) for f in feature_group if f.get('chrom_prominence_mean') is not None]
634
+
635
+ if coherence_values:
636
+ merged['chrom_coherence_mean'] = float(np.mean(coherence_values))
637
+ if prominence_values:
638
+ merged['chrom_prominence_mean'] = float(np.mean(prominence_values))
639
+
640
+ # Merge MS2 counts
641
+ ms2_counts = [f.get('number_ms2', 0) for f in feature_group]
642
+ merged['number_ms2'] = sum(ms2_counts)
643
+
644
+ # Keep the best quality score
645
+ quality_scores = [f.get('quality', 1.0) for f in feature_group if f.get('quality') is not None]
646
+ if quality_scores:
647
+ merged['quality'] = max(quality_scores)
648
+
649
+ return merged
650
+
651
+
652
+ def _validate_sample_overlap(self, features: list, min_overlap: float) -> list:
653
+ """
654
+ Validate that merged features have sufficient sample overlap.
655
+
656
+ Args:
657
+ features: List of consensus feature dictionaries
658
+ min_overlap: Minimum sample overlap ratio (0.0-1.0)
659
+
660
+ Returns:
661
+ List of validated features
662
+ """
663
+ # This is a placeholder for sample overlap validation
664
+ # Implementation would require access to which samples each feature appears in
665
+ # For now, we'll use a simple heuristic based on feature statistics
666
+
667
+ validated_features = []
668
+ for feature in features:
669
+ # Simple validation based on RT spread and sample count ratio
670
+ rt_spread = feature.get('rt_max', feature['rt']) - feature.get('rt_min', feature['rt'])
671
+ sample_count = feature.get('number_samples', 1)
672
+
673
+ # Features with very tight RT spread and high sample counts are more reliable
674
+ if rt_spread <= 2.0 or sample_count >= 10: # More permissive validation
675
+ validated_features.append(feature)
676
+ else:
677
+ # Could implement more sophisticated sample overlap checking here
678
+ validated_features.append(feature) # Keep for now
679
+
680
+ return validated_features
681
+
682
+
683
+ def _filter_rt_spread(self, features: list, max_rt_spread: float) -> list:
684
+ """
685
+ Filter out features with excessive RT spread.
686
+
687
+ Args:
688
+ features: List of consensus feature dictionaries
689
+ max_rt_spread: Maximum allowed RT spread in seconds
690
+
691
+ Returns:
692
+ List of filtered features
693
+ """
694
+ filtered_features = []
695
+ filtered_count = 0
696
+
697
+ for feature in features:
698
+ rt_min = feature.get('rt_min', feature['rt'])
699
+ rt_max = feature.get('rt_max', feature['rt'])
700
+ rt_spread = rt_max - rt_min
701
+
702
+ if rt_spread <= max_rt_spread:
703
+ filtered_features.append(feature)
704
+ else:
705
+ filtered_count += 1
706
+
707
+ if filtered_count > 0:
708
+ self.logger.debug(f"Filtered {filtered_count} features with excessive RT spread (>{max_rt_spread:.1f}s)")
709
+
710
+ return filtered_features
711
+
712
+
713
+ def _filter_coherence(self, features: list, min_coherence: float) -> list:
714
+ """
715
+ Filter out features with low chromatographic coherence.
716
+
717
+ Args:
718
+ features: List of consensus feature dictionaries
719
+ min_coherence: Minimum chromatographic coherence score
720
+
721
+ Returns:
722
+ List of filtered features
723
+ """
724
+ filtered_features = []
725
+ filtered_count = 0
726
+
727
+ for feature in features:
728
+ coherence = feature.get('chrom_coherence_mean', 1.0) # Default to high coherence if missing
729
+
730
+ if coherence >= min_coherence:
731
+ filtered_features.append(feature)
732
+ else:
733
+ filtered_count += 1
734
+
735
+ if filtered_count > 0:
736
+ self.logger.debug(f"Filtered {filtered_count} features with low coherence (<{min_coherence})")
737
+
738
+ return filtered_features
739
+
740
+
212
741
  def _merge_kd_nowarp(self, params: merge_defaults) -> oms.ConsensusMap:
213
742
  """KD-tree based merge without RT warping"""
214
743
 
@@ -244,8 +773,8 @@ def _merge_kd_nowarp(self, params: merge_defaults) -> oms.ConsensusMap:
244
773
  return consensus_map
245
774
 
246
775
 
247
- def _merge_chunked(self, params: merge_defaults, cached_adducts_df=None, cached_valid_adducts=None) -> oms.ConsensusMap:
248
- """Chunked merge with proper cross-chunk consensus building"""
776
+ def _merge_kd_chunked(self, params: merge_defaults, cached_adducts_df=None, cached_valid_adducts=None) -> oms.ConsensusMap:
777
+ """KD-based chunked merge with proper cross-chunk consensus building"""
249
778
 
250
779
  n_samples = len(self.features_maps)
251
780
  if n_samples <= params.chunk_size:
@@ -307,6 +836,64 @@ def _merge_chunked(self, params: merge_defaults, cached_adducts_df=None, cached_
307
836
  return consensus_map
308
837
 
309
838
 
839
+ def _merge_qt_chunked(self, params: merge_defaults, cached_adducts_df=None, cached_valid_adducts=None) -> oms.ConsensusMap:
840
+ """QT-based chunked merge with proper cross-chunk consensus building"""
841
+
842
+ n_samples = len(self.features_maps)
843
+ if n_samples <= params.chunk_size:
844
+ self.logger.info(f"Dataset size ({n_samples}) ≤ chunk_size, using QT merge")
845
+ consensus_map = _merge_qt(self, params)
846
+ # Extract consensus features to populate consensus_df for chunked method consistency
847
+ self._extract_consensus_features(consensus_map, params.min_samples, cached_adducts_df, cached_valid_adducts)
848
+ return consensus_map
849
+
850
+ # Process in chunks
851
+ chunks = []
852
+ for i in range(0, n_samples, params.chunk_size):
853
+ chunk_end = min(i + params.chunk_size, n_samples)
854
+ chunks.append((i, self.features_maps[i:chunk_end]))
855
+
856
+ self.logger.debug(f"Processing {len(chunks)} chunks of max {params.chunk_size} samples")
857
+
858
+ # Process each chunk to create chunk consensus maps
859
+ chunk_consensus_maps = []
860
+
861
+ for chunk_idx, (chunk_start_idx, chunk_maps) in enumerate(tqdm(chunks, desc="Chunk", disable=self.log_level not in ["TRACE", "DEBUG", "INFO"])):
862
+ chunk_consensus_map = oms.ConsensusMap()
863
+
864
+ # Set up file descriptions for chunk
865
+ file_descriptions = chunk_consensus_map.getColumnHeaders()
866
+ for j, feature_map in enumerate(chunk_maps):
867
+ file_description = file_descriptions.get(j, oms.ColumnHeader())
868
+ file_description.filename = self.samples_df.row(chunk_start_idx + j, named=True)["sample_name"]
869
+ file_description.size = feature_map.size()
870
+ file_description.unique_id = feature_map.getUniqueId()
871
+ file_descriptions[j] = file_description
872
+
873
+ chunk_consensus_map.setColumnHeaders(file_descriptions)
874
+
875
+ # Use QT algorithm for chunk (main difference from KD chunked)
876
+ grouper = oms.FeatureGroupingAlgorithmQT()
877
+ chunk_params = grouper.getParameters()
878
+ chunk_params.setValue("distance_RT:max_difference", params.rt_tol)
879
+ chunk_params.setValue("distance_MZ:max_difference", params.mz_tol)
880
+ chunk_params.setValue("distance_MZ:unit", "Da")
881
+ chunk_params.setValue("ignore_charge", "true")
882
+ chunk_params.setValue("nr_partitions", params.nr_partitions)
883
+
884
+ grouper.setParameters(chunk_params)
885
+ grouper.group(chunk_maps, chunk_consensus_map)
886
+
887
+ chunk_consensus_maps.append((chunk_start_idx, chunk_consensus_map))
888
+
889
+ # Merge chunk results with proper cross-chunk consensus building
890
+ _merge_chunk_results(self, chunk_consensus_maps, params, cached_adducts_df, cached_valid_adducts)
891
+
892
+ # Create a dummy consensus map for compatibility (since other functions expect it)
893
+ consensus_map = oms.ConsensusMap()
894
+ return consensus_map
895
+
896
+
310
897
  def _merge_chunk_results(self, chunk_consensus_maps: list, params: merge_defaults, cached_adducts_df=None, cached_valid_adducts=None) -> None:
311
898
  """
312
899
  Scalable aggregation of chunk consensus maps into final consensus_df.
@@ -470,11 +1057,19 @@ def _merge_chunk_results(self, chunk_consensus_maps: list, params: merge_default
470
1057
  b = chunk_consensus_list[j]
471
1058
  if a['chunk_idx'] == b['chunk_idx']:
472
1059
  continue
473
- # Centroid checks
1060
+
1061
+ # Primary check: centroid distance (strict)
474
1062
  centroid_close = (abs(a['rt']-b['rt']) <= rt_tol and abs(a['mz']-b['mz']) <= mz_tol)
475
- # Interval overlap checks (expanded by tolerance)
476
- rt_overlap = (a['rt_min'] - rt_tol) <= (b['rt_max'] + rt_tol) and (b['rt_min'] - rt_tol) <= (a['rt_max'] + rt_tol)
477
- mz_overlap = (a['mz_min'] - mz_tol) <= (b['mz_max'] + mz_tol) and (b['mz_min'] - mz_tol) <= (a['mz_max'] + mz_tol)
1063
+
1064
+ # Secondary check: interval overlap (more conservative)
1065
+ # Only allow interval overlap if centroids are reasonably close (within 2x tolerance)
1066
+ centroids_reasonable = (abs(a['rt']-b['rt']) <= 2 * rt_tol and abs(a['mz']-b['mz']) <= 2 * mz_tol)
1067
+ if centroids_reasonable:
1068
+ rt_overlap = (a['rt_min'] - rt_tol/2) <= (b['rt_max'] + rt_tol/2) and (b['rt_min'] - rt_tol/2) <= (a['rt_max'] + rt_tol/2)
1069
+ mz_overlap = (a['mz_min'] - mz_tol/2) <= (b['mz_max'] + mz_tol/2) and (b['mz_min'] - mz_tol/2) <= (a['mz_max'] + mz_tol/2)
1070
+ else:
1071
+ rt_overlap = mz_overlap = False
1072
+
478
1073
  if centroid_close or (rt_overlap and mz_overlap):
479
1074
  uf.union(i,j)
480
1075
 
@@ -611,6 +1206,17 @@ def _merge_chunk_results(self, chunk_consensus_maps: list, params: merge_default
611
1206
  cached_adducts_df=cached_adducts_df,
612
1207
  cached_valid_adducts=cached_valid_adducts,
613
1208
  )
1209
+
1210
+ # Validate RT spread doesn't exceed tolerance (with some flexibility for chunked merge)
1211
+ rt_spread = metadata.get('rt_max', 0) - metadata.get('rt_min', 0)
1212
+ max_allowed_spread = params.rt_tol * 2 # Allow 2x tolerance for chunked method
1213
+
1214
+ if rt_spread > max_allowed_spread:
1215
+ # Skip consensus features with excessive RT spread
1216
+ self.logger.debug(f"Skipping consensus feature {consensus_uid_counter} with RT spread {rt_spread:.3f}s > {max_allowed_spread:.3f}s")
1217
+ consensus_uid_counter += 1
1218
+ continue
1219
+
614
1220
  consensus_metadata.append(metadata)
615
1221
 
616
1222
  # Build mapping rows (deduplicated)
@@ -689,8 +1295,8 @@ def _calculate_consensus_statistics(study_obj, consensus_uid: int, feature_data_
689
1295
  inty_values = np.array([fd.get("inty", 0) for fd in feature_data_list if fd.get("inty") is not None])
690
1296
  coherence_values = np.array([fd.get("chrom_coherence", 0) for fd in feature_data_list if fd.get("chrom_coherence") is not None])
691
1297
  prominence_values = np.array([fd.get("chrom_prominence", 0) for fd in feature_data_list if fd.get("chrom_prominence") is not None])
692
- prominence_scaled_values = np.array([fd.get("chrom_prominence_scaled", 0) for fd in feature_data_list if fd.get("chrom_prominence_scaled") is not None])
693
- height_scaled_values = np.array([fd.get("chrom_height_scaled", 0) for fd in feature_data_list if fd.get("chrom_height_scaled") is not None])
1298
+ prominence_scaled_values = np.array([fd.get("chrom_height_scaled", 0) for fd in feature_data_list if fd.get("chrom_height_scaled") is not None])
1299
+ height_scaled_values = np.array([fd.get("chrom_prominence_scaled", 0) for fd in feature_data_list if fd.get("chrom_prominence_scaled") is not None])
694
1300
  iso_values = np.array([fd.get("iso", 0) for fd in feature_data_list if fd.get("iso") is not None])
695
1301
  charge_values = np.array([fd.get("charge", 0) for fd in feature_data_list if fd.get("charge") is not None])
696
1302
 
@@ -1006,16 +1612,16 @@ def _extract_consensus_features(self, consensus_map, min_samples, cached_adducts
1006
1612
  )
1007
1613
  prominence_scaled_values = np.array(
1008
1614
  [
1009
- fd.get("chrom_prominence_scaled", 0)
1615
+ fd.get("chrom_height_scaled", 0)
1010
1616
  for fd in feature_data_list
1011
- if fd.get("chrom_prominence_scaled") is not None
1617
+ if fd.get("chrom_height_scaled") is not None
1012
1618
  ],
1013
1619
  )
1014
1620
  height_scaled_values = np.array(
1015
1621
  [
1016
- fd.get("chrom_height_scaled", 0)
1622
+ fd.get("chrom_prominence_scaled", 0)
1017
1623
  for fd in feature_data_list
1018
- if fd.get("chrom_height_scaled") is not None
1624
+ if fd.get("chrom_prominence_scaled") is not None
1019
1625
  ],
1020
1626
  )
1021
1627
  iso_values = np.array(
masster/study/plot.py CHANGED
@@ -310,8 +310,22 @@ def plot_alignment(
310
310
  max_inty = sample_data.select(pl.col("inty").max()).item() or 1
311
311
 
312
312
  # Get sample information
313
- sample_name = str(sample)
314
313
  sample_uid = sample if sample_col == "sample_uid" else sample_data.select(pl.col("sample_uid")).item() if "sample_uid" in sample_data.columns else sample
314
+
315
+ # Try to get actual sample name from samples_df if available
316
+ sample_name = str(sample) # fallback
317
+ if hasattr(self, "samples_df") and self.samples_df is not None and sample_uid is not None:
318
+ try:
319
+ sample_name_result = (
320
+ self.samples_df.filter(pl.col("sample_uid") == sample_uid)
321
+ .select("sample_name")
322
+ .to_series()
323
+ )
324
+ if len(sample_name_result) > 0 and sample_name_result[0] is not None:
325
+ sample_name = str(sample_name_result[0])
326
+ except Exception:
327
+ # Keep the fallback value
328
+ pass
315
329
 
316
330
  # Select columns to process
317
331
  cols_to_select = ["rt", "mz", "inty"]
@@ -97,7 +97,6 @@ def align(self, **kwargs):
97
97
  _align_kd_algorithm(self, fmaps, params)
98
98
  else:
99
99
  self.logger.error(f"Unknown alignment algorithm '{algorithm}'")
100
- self.logger.error(f"Unknown alignment algorithm '{algorithm}'")
101
100
 
102
101
  # check if rt_original exists in features_df, if not, add it after rt
103
102
  if "rt_original" not in self.features_df.columns:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: masster
3
- Version: 0.4.16
3
+ Version: 0.4.18
4
4
  Summary: Mass spectrometry data analysis package
5
5
  Project-URL: homepage, https://github.com/zamboni-lab/masster
6
6
  Project-URL: repository, https://github.com/zamboni-lab/masster
@@ -767,7 +767,8 @@ study.integrate()
767
767
  # export results
768
768
  study.export_mgf()
769
769
  study.export_mztab()
770
- study.export_consensus()
770
+ study.export_xlsx()
771
+ study.export_parquet()
771
772
 
772
773
  # Save the study to .study5
773
774
  study.save()
@@ -1,5 +1,5 @@
1
1
  masster/__init__.py,sha256=HHjKhCjkAc98LhoQfu4C6L-W2vfTEc1iXaPTxxcl_4A,800
2
- masster/_version.py,sha256=zMjCN14DFC1TYYvoTFcnuHINoESJ3g5QeRaN-wLn-U0,257
2
+ masster/_version.py,sha256=OUcHIwT4wa5AqV46S88edNYE4u4sKsoESNk3lFdxs_c,257
3
3
  masster/chromatogram.py,sha256=iYpdv8C17zVnlWvOFgAn9ns2uFGiF-GgoYf5QVVAbHs,19319
4
4
  masster/logger.py,sha256=W50V_uh8RSYwGxDrDFhOuj5jpu2tKJyt_16lMw9kQwA,14755
5
5
  masster/spectrum.py,sha256=_upC_g2N9gwTaflXAugs9pSXpKUmzbIehofDordk7WI,47718
@@ -43,10 +43,10 @@ masster/study/h5.py,sha256=LiVGUAtULyPpZIUmKVJSaV38huJb8FsKOUWBOqiv0QU,82363
43
43
  masster/study/helpers.py,sha256=M5_q8O5tuFchKPW04PTuj3X335lDA2VZqcs4D8ZQJEk,158604
44
44
  masster/study/id.py,sha256=6NUBBKZCFOU1wlDKM0eXQeOIStSZCRNJ_3x7ZaIHzmM,55263
45
45
  masster/study/load.py,sha256=CQQY_7BzagE3oQTdDlqNyfuMdVWIAft-M4a2WCFnxp0,70695
46
- masster/study/merge.py,sha256=7ezv9GauDCw3M4wcskjQnQ3zszWap-5MvDUR4nSa6EM,69628
46
+ masster/study/merge.py,sha256=2Vqj0OaTZxwtjYu1l5PmRpMmT8_cHh-R761FUvBE_Sk,95741
47
47
  masster/study/parameters.py,sha256=0elaF7YspTsB7qyajWAbRNL2VfKlGz5GJLifmO8IGkk,3276
48
- masster/study/plot.py,sha256=Wp48DH5x1t8w6R67AMjxLaUIKZpDa82fnUoAgEeNY5E,87564
49
- masster/study/processing.py,sha256=pm98FrQHoM3ov6qmjKuVN9h2KBhGgCLEZCRS7zpmJFM,41104
48
+ masster/study/plot.py,sha256=SimX-IlqISEItAnTBsx4xsdYHRAevfN41cCENVns1lw,88236
49
+ masster/study/processing.py,sha256=u1MSRKTzcqHNz_dClSUSfgTxkNRdBLXtVyO5LXuW_uk,41031
50
50
  masster/study/save.py,sha256=YCvp4xhnG16sNXaT2mFDBoCrIMub0Es61B97qLo0maw,6705
51
51
  masster/study/study.py,sha256=LO_hbJOOCZzeA3uterPKImFgPG6fCNQKMSVMtEwW3DU,38815
52
52
  masster/study/study5_schema.json,sha256=c0w24QdHak01m04I1VPu97KvF2468FcaqROhf6pmLk4,7507
@@ -60,7 +60,7 @@ masster/study/defaults/find_ms2_def.py,sha256=RL0DFG41wQ05U8UQKUGr3vzSl3mU0m0knQ
60
60
  masster/study/defaults/identify_def.py,sha256=96rxoCAPQj_yX-3mRoD2LTkTLJgG27eJQqwarLv5jL0,10580
61
61
  masster/study/defaults/integrate_chrom_def.py,sha256=0MNIWGTjty-Zu-NTQsIweuj3UVqEY3x1x8pK0mPwYak,7264
62
62
  masster/study/defaults/integrate_def.py,sha256=Vf4SAzdBfnsSZ3IRaF0qZvWu3gMDPHdgPfMYoPKeWv8,7246
63
- masster/study/defaults/merge_def.py,sha256=R-BbhfgThjOwb2QEZKYO2jdhDxxTaSDau-NXkWRO3-U,10609
63
+ masster/study/defaults/merge_def.py,sha256=X7mTCgtQhglOTjwg06oSMFSbLBJSKsHmJeVVfYE2qHE,13272
64
64
  masster/study/defaults/study_def.py,sha256=h8dYbi9xv0sesCSQik49Z53IkskMmNtW6ixl7it5pL0,16033
65
65
  masster/wizard/README.md,sha256=mL1A3YWJZOefpJ6D0-HqGLkVRmUlOpwyVFdvJBeeoZM,14149
66
66
  masster/wizard/__init__.py,sha256=A9GHQvkq4lSRIA8V6AKB-TJy8s_npH8i1baUGdkw_is,364
@@ -68,8 +68,8 @@ masster/wizard/example.py,sha256=xEZFTH9UZ8HKOm6s3JL8Js0Uw5ChnISWBHSZCL32vsM,798
68
68
  masster/wizard/test_structure.py,sha256=h88gsYYCG6iDRjqPZC_r1H1T8y79j0E-K6OrwuHaSCU,1586
69
69
  masster/wizard/test_wizard.py,sha256=CMp1cpjH3iYYC5Fy6puF_K0kfwwk3bgOsSbUGW-t7Xk,8986
70
70
  masster/wizard/wizard.py,sha256=jMLHy4cXgNEE_-vshFmA7BNEByhfA6tV7O91jhiMYuw,48054
71
- masster-0.4.16.dist-info/METADATA,sha256=gNDP1Gnpz65g1WR0OGzazi2ikrRngHlIBvReOHlxYiQ,44189
72
- masster-0.4.16.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
73
- masster-0.4.16.dist-info/entry_points.txt,sha256=ZHguQ_vPmdbpqq2uGtmEOLJfgP-DQ1T0c07Lxh30wc8,58
74
- masster-0.4.16.dist-info/licenses/LICENSE,sha256=bx5iLIKjgAdYQ7sISn7DsfHRKkoCUm1154sJJKhgqnU,35184
75
- masster-0.4.16.dist-info/RECORD,,
71
+ masster-0.4.18.dist-info/METADATA,sha256=pn-XNHgHqlY1KgiYkQ2Dyke9E1nnCP3mn-ja5W5QPyM,44207
72
+ masster-0.4.18.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
73
+ masster-0.4.18.dist-info/entry_points.txt,sha256=ZHguQ_vPmdbpqq2uGtmEOLJfgP-DQ1T0c07Lxh30wc8,58
74
+ masster-0.4.18.dist-info/licenses/LICENSE,sha256=bx5iLIKjgAdYQ7sISn7DsfHRKkoCUm1154sJJKhgqnU,35184
75
+ masster-0.4.18.dist-info/RECORD,,