masster 0.2.5__py3-none-any.whl → 0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of masster might be problematic. Click here for more details.

Files changed (55) hide show
  1. masster/__init__.py +27 -27
  2. masster/_version.py +17 -17
  3. masster/chromatogram.py +497 -503
  4. masster/data/examples/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.featureXML +199787 -0
  5. masster/data/examples/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.sample5 +0 -0
  6. masster/logger.py +318 -244
  7. masster/sample/__init__.py +9 -9
  8. masster/sample/defaults/__init__.py +15 -15
  9. masster/sample/defaults/find_adducts_def.py +325 -325
  10. masster/sample/defaults/find_features_def.py +366 -366
  11. masster/sample/defaults/find_ms2_def.py +285 -285
  12. masster/sample/defaults/get_spectrum_def.py +314 -318
  13. masster/sample/defaults/sample_def.py +374 -378
  14. masster/sample/h5.py +1321 -1297
  15. masster/sample/helpers.py +833 -364
  16. masster/sample/lib.py +762 -0
  17. masster/sample/load.py +1220 -1187
  18. masster/sample/parameters.py +131 -131
  19. masster/sample/plot.py +1685 -1622
  20. masster/sample/processing.py +1402 -1416
  21. masster/sample/quant.py +209 -0
  22. masster/sample/sample.py +393 -387
  23. masster/sample/sample5_schema.json +181 -181
  24. masster/sample/save.py +737 -736
  25. masster/sample/sciex.py +1213 -0
  26. masster/spectrum.py +1287 -1319
  27. masster/study/__init__.py +9 -9
  28. masster/study/defaults/__init__.py +21 -19
  29. masster/study/defaults/align_def.py +267 -267
  30. masster/study/defaults/export_def.py +41 -40
  31. masster/study/defaults/fill_chrom_def.py +264 -264
  32. masster/study/defaults/fill_def.py +260 -0
  33. masster/study/defaults/find_consensus_def.py +256 -256
  34. masster/study/defaults/find_ms2_def.py +163 -163
  35. masster/study/defaults/integrate_chrom_def.py +225 -225
  36. masster/study/defaults/integrate_def.py +221 -0
  37. masster/study/defaults/merge_def.py +256 -0
  38. masster/study/defaults/study_def.py +272 -269
  39. masster/study/export.py +674 -287
  40. masster/study/h5.py +1406 -886
  41. masster/study/helpers.py +1713 -433
  42. masster/study/helpers_optimized.py +317 -0
  43. masster/study/load.py +1231 -1078
  44. masster/study/parameters.py +99 -99
  45. masster/study/plot.py +632 -645
  46. masster/study/processing.py +1057 -1046
  47. masster/study/save.py +161 -134
  48. masster/study/study.py +612 -522
  49. masster/study/study5_schema.json +253 -241
  50. {masster-0.2.5.dist-info → masster-0.3.1.dist-info}/METADATA +15 -10
  51. masster-0.3.1.dist-info/RECORD +59 -0
  52. {masster-0.2.5.dist-info → masster-0.3.1.dist-info}/licenses/LICENSE +661 -661
  53. masster-0.2.5.dist-info/RECORD +0 -50
  54. {masster-0.2.5.dist-info → masster-0.3.1.dist-info}/WHEEL +0 -0
  55. {masster-0.2.5.dist-info → masster-0.3.1.dist-info}/entry_points.txt +0 -0
masster/study/study.py CHANGED
@@ -1,522 +1,612 @@
1
- """
2
- study.py
3
-
4
- This module provides tools for multi-sample mass spectrometry data analysis and cross-sample feature alignment.
5
- It defines the `study` class, which manages collections of DDA files, performs feature alignment across samples,
6
- generates consensus features, and provides study-level visualization and reporting capabilities.
7
-
8
- Key Features:
9
- - **Multi-Sample Management**: Handle collections of mass spectrometry files with metadata.
10
- - **Feature Alignment**: Align features across multiple samples using retention time and m/z tolerances.
11
- - **Consensus Features**: Generate consensus feature tables from aligned data.
12
- - **Batch Processing**: Automated processing of entire studies with configurable parameters.
13
- - **Study Visualization**: Generate comparative plots and alignment visualizations.
14
- - **Export Capabilities**: Export study results in various formats for downstream analysis.
15
-
16
- Dependencies:
17
- - `pyopenms`: For mass spectrometry data handling and algorithms.
18
- - `polars` and `pandas`: For efficient data manipulation and analysis.
19
- - `bokeh`, `holoviews`, `panel`: For interactive visualizations and dashboards.
20
- - `numpy`: For numerical computations and array operations.
21
-
22
- Classes:
23
- - `study`: Main class for multi-sample study management, providing methods for file loading,
24
- feature alignment, consensus generation, and study-level analysis.
25
-
26
- Example Usage:
27
- ```python
28
- from study import study
29
-
30
- # Create study from multiple files
31
- study_obj = study()
32
- study_obj.load_files(["sample1.mzML", "sample2.mzML", "sample3.mzML"])
33
- study_obj.process_all()
34
- study_obj.align()
35
- study_obj.plot_alignment_bokeh()
36
- study_obj.export_consensus()
37
- ```
38
-
39
- See Also:
40
- - `single.py`: For individual file processing before study-level analysis.
41
- - `parameters.study_parameters`: For study-specific parameter configuration.
42
-
43
-
44
- """
45
-
46
- from __future__ import annotations
47
-
48
- import importlib
49
- import os
50
- import sys
51
-
52
- import polars as pl
53
-
54
- # Study-specific imports
55
- from masster.study.h5 import _load_study5
56
- from masster.study.h5 import _save_study5
57
- from masster.study.helpers import _get_consensus_uids
58
- from masster.study.helpers import _get_feature_uids
59
- from masster.study.helpers import _get_sample_uids
60
- from masster.study.helpers import compress
61
- from masster.study.helpers import fill_reset
62
- from masster.study.helpers import get_chrom
63
- from masster.study.helpers import get_consensus
64
- from masster.study.helpers import get_consensus_matches
65
- from masster.study.helpers import get_consensus_matrix
66
- from masster.study.helpers import get_orphans
67
- from masster.study.helpers import get_gaps_matrix
68
- from masster.study.helpers import get_gaps_stats
69
- from masster.study.helpers import align_reset
70
- from masster.study.helpers import set_default_folder
71
- from masster.study.load import add_folder
72
- from masster.study.load import add_sample
73
- from masster.study.load import (
74
- fill_chrom_single,
75
- fill_chrom,
76
- _process_sample_for_parallel_fill,
77
- )
78
- from masster.study.load import _get_missing_consensus_sample_combinations
79
- from masster.study.load import load
80
- from masster.study.load import _load_consensusXML
81
- from masster.study.load import load_features
82
- from masster.study.load import sanitize
83
-
84
- from masster.study.plot import plot_alignment
85
- from masster.study.plot import plot_alignment_bokeh
86
- from masster.study.plot import plot_chrom
87
- from masster.study.plot import plot_consensus_2d
88
- from masster.study.plot import plot_samples_2d
89
- from masster.study.processing import align
90
- from masster.study.processing import filter_consensus
91
- from masster.study.processing import filter_features
92
- from masster.study.processing import find_consensus
93
- from masster.study.processing import integrate_chrom
94
- from masster.study.processing import find_ms2
95
- from masster.study.parameters import store_history
96
- from masster.study.parameters import get_parameters
97
- from masster.study.parameters import update_parameters
98
- from masster.study.parameters import get_parameters_property
99
- from masster.study.parameters import set_parameters_property
100
- from masster.study.save import save
101
- from masster.study.save import save_consensus
102
- from masster.study.save import _save_consensusXML
103
- from masster.study.save import save_samples
104
- from masster.study.export import export_mgf
105
-
106
- from masster.logger import MassterLogger
107
- from masster.study.defaults.study_def import study_defaults
108
-
109
-
110
- class Study:
111
- """
112
- A class for managing and analyzing multi-sample mass spectrometry studies.
113
-
114
- The `study` class provides comprehensive tools for handling collections of DDA
115
- (Data-Dependent Acquisition) mass spectrometry files, performing cross-sample
116
- feature alignment, generating consensus features, and conducting study-level
117
- analysis and visualization.
118
-
119
- Attributes:
120
- default_folder (str): Default directory for study files and outputs.
121
- ddafiles (dict): Dictionary containing loaded ddafile objects keyed by sample names.
122
- features_df (pl.DataFrame): Combined features from all samples in the study.
123
- consensus_df (pl.DataFrame): Consensus features generated from alignment.
124
- metadata_df (pl.DataFrame): Sample metadata and experimental information.
125
-
126
- Key Methods:
127
- - `add_folder()`: Load all files from a directory into the study.
128
- - `add_sample()`: Add individual sample files to the study.
129
- - `process_all()`: Batch process all samples with feature detection.
130
- - `align()`: Perform cross-sample feature alignment.
131
- - `plot_alignment_bokeh()`: Visualize alignment results.
132
- - `export_consensus()`: Export consensus features for downstream analysis.
133
-
134
- Example Usage:
135
- >>> from masster import study
136
- >>> study_obj = study(default_folder="./data")
137
- >>> study_obj.load_folder("./mzml_files")
138
- >>> study_obj.process_all()
139
- >>> study_obj.align()
140
- >>> study_obj.plot_alignment_bokeh()
141
- >>> study_obj.export_consensus("consensus_features.csv")
142
-
143
- See Also:
144
- - `ddafile`: For individual sample processing before study-level analysis.
145
- - `StudyParameters`: For configuring study-specific parameters.
146
- """
147
-
148
- def __init__(
149
- self,
150
- filename=None,
151
- **kwargs,
152
- ):
153
- """
154
- Initialize a Study instance for multi-sample mass spectrometry analysis.
155
-
156
- This constructor initializes various attributes related to file handling,
157
- data storage, and processing parameters used for study-level analysis.
158
-
159
- Parameters:
160
- filename (str, optional): Path to a .study5 file to load automatically.
161
- If provided, the default_folder will be set to the
162
- directory containing this file, and the study will
163
- be loaded automatically.
164
- **kwargs: Keyword arguments for setting study parameters. Can include:
165
- - A study_defaults instance to set all parameters at once (pass as params=study_defaults(...))
166
- - Individual parameter names and values (see study_defaults for available parameters)
167
-
168
- Core initialization parameters:
169
- - default_folder (str, optional): Default directory for study files and outputs
170
- - label (str, optional): An optional label to identify the study
171
- - log_level (str): The logging level to be set for the logger. Defaults to 'INFO'
172
- - log_label (str, optional): Optional label for the logger
173
- - log_sink (str): Output sink for logging. Default is "sys.stdout"
174
-
175
- For backward compatibility, original signature is supported:
176
- Study(default_folder=..., label=..., log_level=..., log_label=..., log_sink=...)
177
- """
178
- # Initialize default parameters
179
-
180
- # Handle filename parameter for automatic loading
181
- auto_load_filename = None
182
- if filename is not None:
183
- if not filename.endswith('.study5'):
184
- raise ValueError("filename must be a .study5 file")
185
- if not os.path.exists(filename):
186
- raise FileNotFoundError(f"Study file not found: {filename}")
187
-
188
- # Set default_folder to the directory containing the file if not already specified
189
- if 'default_folder' not in kwargs:
190
- kwargs['default_folder'] = os.path.dirname(os.path.abspath(filename))
191
-
192
- auto_load_filename = filename
193
-
194
- # Check if a study_defaults instance was passed
195
- if "params" in kwargs and isinstance(kwargs["params"], study_defaults):
196
- params = kwargs.pop("params")
197
- else:
198
- # Create default parameters and update with provided values
199
- params = study_defaults()
200
-
201
- # Update with any provided parameters
202
- for key, value in kwargs.items():
203
- if hasattr(params, key):
204
- params.set(key, value, validate=True)
205
-
206
- # Store parameter instance for method access
207
- self.parameters = params
208
- self.history = {}
209
- self.store_history(["study"], params.to_dict())
210
-
211
- # Set instance attributes (ensure proper string values for logger)
212
- self.default_folder = params.default_folder
213
- self.label = params.label
214
- self.log_level = params.log_level
215
- self.log_label = (params.log_label + " | " if params.log_label else "")
216
- self.log_sink = params.log_sink
217
-
218
- if self.default_folder is not None and not os.path.exists(self.default_folder):
219
- # create the folder if it does not exist
220
- os.makedirs(self.default_folder)
221
-
222
- self.samples_df = pl.DataFrame(
223
- {
224
- "sample_uid": [],
225
- "sample_name": [],
226
- "sample_path": [],
227
- "sample_type": [],
228
- "size": [],
229
- "map_id": [],
230
- },
231
- schema={
232
- "sample_uid": pl.Int64,
233
- "sample_name": pl.Utf8,
234
- "sample_path": pl.Utf8,
235
- "sample_type": pl.Utf8,
236
- "size": pl.Int64,
237
- "map_id": pl.Utf8,
238
- },
239
- )
240
- self.features_maps = []
241
- self.features_df = pl.DataFrame()
242
- self.consensus_ms2 = pl.DataFrame()
243
- self.consensus_df = pl.DataFrame()
244
- self.consensus_map = None
245
- self.consensus_mapping_df = pl.DataFrame()
246
- self.alignment_ref_index = None
247
-
248
- # Initialize independent logger
249
- self.logger = MassterLogger(
250
- instance_type="study",
251
- level=self.log_level,
252
- label=self.log_label,
253
- sink=self.log_sink
254
- )
255
-
256
- # Auto-load study file if filename was provided
257
- if auto_load_filename is not None:
258
- self.load(filename=auto_load_filename)
259
-
260
-
261
-
262
- # Attach module functions as class methods
263
- load = load
264
- save = save
265
- save_consensus = save_consensus
266
- save_samples = save_samples
267
- align = align
268
- fill_chrom_single = fill_chrom_single
269
- find_consensus = find_consensus
270
- find_ms2 = find_ms2
271
- integrate_chrom = integrate_chrom
272
- store_history = store_history
273
- get_parameters = get_parameters
274
- update_parameters = update_parameters
275
- get_parameters_property = get_parameters_property
276
- set_parameters_property = set_parameters_property
277
- plot_alignment = plot_alignment
278
- plot_alignment_bokeh = plot_alignment_bokeh
279
- plot_chrom = plot_chrom
280
- plot_consensus_2d = plot_consensus_2d
281
- plot_samples_2d = plot_samples_2d
282
- get_consensus = get_consensus
283
- get_chrom = get_chrom
284
- get_consensus_matches = get_consensus_matches
285
- compress = compress
286
- fill_reset = fill_reset
287
- align_reset = align_reset
288
-
289
- # Additional method assignments for all imported functions
290
- add_folder = add_folder
291
- add_sample = add_sample
292
- _load_study5 = _load_study5
293
- _save_study5 = _save_study5
294
- _get_consensus_uids = _get_consensus_uids
295
- _get_feature_uids = _get_feature_uids
296
- _get_sample_uids = _get_sample_uids
297
- get_consensus_matrix = get_consensus_matrix
298
- get_gaps_matrix = get_gaps_matrix
299
- get_gaps_stats = get_gaps_stats
300
- get_orphans = get_orphans
301
- set_default_folder = set_default_folder
302
- fill_chrom = fill_chrom
303
- _process_sample_for_parallel_fill = _process_sample_for_parallel_fill
304
- _get_missing_consensus_sample_combinations = _get_missing_consensus_sample_combinations
305
- _load_consensusXML = _load_consensusXML
306
- load_features = load_features
307
- sanitize = sanitize
308
- filter_consensus = filter_consensus
309
- filter_features = filter_features
310
- _save_consensusXML = _save_consensusXML
311
- export_mgf = export_mgf
312
-
313
-
314
- def reload(self):
315
- """
316
- Reloads all masster modules to pick up any changes to their source code,
317
- and updates the instance's class reference to the newly reloaded class version.
318
- This ensures that the instance uses the latest implementation without restarting the interpreter.
319
- """
320
- # Reset logger configuration flags to allow proper reconfiguration after reload
321
- try:
322
- import masster.sample.logger as logger_module
323
-
324
- if hasattr(logger_module, "_STUDY_LOGGER_CONFIGURED"):
325
- logger_module._STUDY_LOGGER_CONFIGURED = False
326
- except Exception:
327
- pass
328
-
329
- # Get the base module name (masster)
330
- base_modname = self.__class__.__module__.split(".")[0]
331
- current_module = self.__class__.__module__
332
-
333
- # Dynamically find all study submodules
334
- study_modules = []
335
- study_module_prefix = f"{base_modname}.study."
336
-
337
- # Get all currently loaded modules that are part of the study package
338
- for module_name in sys.modules:
339
- if (
340
- module_name.startswith(study_module_prefix)
341
- and module_name != current_module
342
- ):
343
- study_modules.append(module_name)
344
-
345
- # Add core masster modules
346
- core_modules = [
347
- f"{base_modname}._version",
348
- f"{base_modname}.chromatogram",
349
- f"{base_modname}.spectrum",
350
- f"{base_modname}.parameters",
351
- ]
352
-
353
- # Add any parameters submodules that are loaded
354
- for module_name in sys.modules:
355
- if (
356
- module_name.startswith(f"{base_modname}.parameters.")
357
- and module_name not in core_modules
358
- ):
359
- core_modules.append(module_name)
360
-
361
- all_modules_to_reload = core_modules + study_modules
362
-
363
- # Reload all discovered modules
364
- for full_module_name in all_modules_to_reload:
365
- try:
366
- if full_module_name in sys.modules:
367
- mod = sys.modules[full_module_name]
368
- importlib.reload(mod)
369
- self.logger.debug(f"Reloaded module: {full_module_name}")
370
- except Exception as e:
371
- self.logger.warning(f"Failed to reload module {full_module_name}: {e}")
372
-
373
- # Finally, reload the current module (sample.py)
374
- try:
375
- mod = __import__(current_module, fromlist=[current_module.split(".")[0]])
376
- importlib.reload(mod)
377
-
378
- # Get the updated class reference from the reloaded module
379
- new = getattr(mod, self.__class__.__name__)
380
- # Update the class reference of the instance
381
- self.__class__ = new
382
-
383
- self.logger.debug("Module reload completed")
384
- except Exception as e:
385
- self.logger.error(f"Failed to reload current module {current_module}: {e}")
386
-
387
- def __str__(self):
388
- """
389
- Returns a string representation of the study.
390
-
391
- Returns:
392
- str: A summary string of the study.
393
- """
394
- return ""
395
-
396
- def logger_update(self, level: str | None = None, label: str | None = None, sink: str | None = None):
397
- """Update the logging configuration for this Study instance.
398
-
399
- Args:
400
- level: New logging level ("DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL")
401
- label: New label for log messages
402
- sink: New output sink (file path, file object, or "sys.stdout")
403
- """
404
- if level is not None:
405
- self.log_level = level.upper()
406
- self.logger.update_level(level)
407
-
408
- if label is not None:
409
- self.log_label = label + " | " if len(label) > 0 else ""
410
- self.logger.update_label(self.log_label)
411
-
412
- if sink is not None:
413
- if sink == "sys.stdout":
414
- self.log_sink = sys.stdout
415
- else:
416
- self.log_sink = sink
417
- self.logger.update_sink(self.log_sink)
418
-
419
- def info(self):
420
- """
421
- Display study information with optimized performance.
422
-
423
- Returns a summary string of the study including folder, features count,
424
- samples count, and various statistics.
425
- """
426
- # Cache DataFrame lengths and existence checks
427
- consensus_df_len = (
428
- len(self.consensus_df) if not self.consensus_df.is_empty() else 0
429
- )
430
- samples_df_len = len(self.samples_df) if not self.samples_df.is_empty() else 0
431
- consensus_ms2_len = len(self.consensus_ms2) if not self.consensus_ms2.is_empty() else 0
432
-
433
- # Calculate consensus statistics only if consensus_df exists and has data
434
- if consensus_df_len > 0:
435
- # Execute the aggregation once
436
- stats_result = self.consensus_df.select([
437
- pl.col("number_samples").min().alias("min_samples"),
438
- pl.col("number_samples").mean().alias("mean_samples"),
439
- pl.col("number_samples").max().alias("max_samples")
440
-
441
- ]).row(0)
442
-
443
- min_samples = stats_result[0] if stats_result[0] is not None else 0
444
- mean_samples = stats_result[1] if stats_result[1] is not None else 0
445
- max_samples = stats_result[2] if stats_result[2] is not None else 0
446
- else:
447
- min_samples = 0
448
- mean_samples = 0
449
- max_samples = 0
450
-
451
- # Count only features where 'filled' == False
452
- if not self.features_df.is_empty() and 'filled' in self.features_df.columns:
453
- unfilled_features_count = self.features_df.filter(~self.features_df['filled']).height
454
- else:
455
- unfilled_features_count = 0
456
-
457
- # Optimize chrom completeness calculation
458
- if consensus_df_len > 0 and samples_df_len > 0 and not self.features_df.is_empty():
459
-
460
- # Use more efficient counting - count non-null chroms only for features in consensus mapping
461
- if not self.consensus_mapping_df.is_empty():
462
- non_null_chroms = self.features_df.join(
463
- self.consensus_mapping_df.select("feature_uid"),
464
- on="feature_uid",
465
- how="inner"
466
- ).select(
467
- pl.col("chrom").is_not_null().sum().alias("count")
468
- ).item()
469
- else:
470
- non_null_chroms = 0
471
- total_possible = samples_df_len * consensus_df_len
472
- chrom_completeness = (
473
- non_null_chroms / total_possible if total_possible > 0 else 0
474
- )
475
- not_in_consensus = len(self.features_df.filter(~self.features_df['feature_uid'].is_in(self.consensus_mapping_df['feature_uid'].to_list())))
476
- ratio_not_in_consensus_to_total = not_in_consensus / unfilled_features_count if unfilled_features_count > 0 else 0
477
- ratio_in_consensus_to_total = (unfilled_features_count- not_in_consensus) / len(self.features_df) if len(self.features_df) > 0 else 0
478
-
479
- else:
480
- chrom_completeness = 0
481
- not_in_consensus = 0
482
- ratio_not_in_consensus_to_total = 0
483
- ratio_in_consensus_to_total = 0
484
-
485
-
486
-
487
- # calculate for how many consensus features there is at least one MS2 spectrum linked
488
- consensus_with_ms2 = self.consensus_ms2.select(
489
- pl.col("consensus_uid").is_not_null().sum().alias("count")
490
- ).item() if not self.consensus_ms2.is_empty() else 0
491
-
492
- # estimate memory usage
493
- memory_usage = (
494
- self.samples_df.estimated_size() +
495
- self.features_df.estimated_size() +
496
- self.consensus_df.estimated_size() +
497
- self.consensus_ms2.estimated_size() +
498
- self.consensus_mapping_df.estimated_size()
499
- )
500
-
501
- summary = (
502
- f"Default folder: {self.default_folder}\n"
503
- f"Samples: {samples_df_len}\n"
504
- f"Features: {unfilled_features_count}\n"
505
- f"- in consensus: {ratio_in_consensus_to_total*100:.0f}%\n"
506
- f"- non in consensus: {ratio_not_in_consensus_to_total*100:.0f}%\n"
507
- f"Consensus: {consensus_df_len}\n"
508
- f"- Min samples count: {min_samples:.0f}\n"
509
- f"- Mean samples count: {mean_samples:.0f}\n"
510
- f"- Max samples count: {max_samples:.0f}\n"
511
- f"- with MS2: {consensus_with_ms2}\n"
512
- f"Chrom completeness: {chrom_completeness*100:.0f}%\n"
513
- f"Memory usage: {memory_usage / (1024 ** 2):.2f} MB\n"
514
- )
515
-
516
- print(summary)
517
-
518
-
519
-
520
- if __name__ == "__main__":
521
- # This block is executed when the script is run directly
522
- pass
1
+ """
2
+ study.py
3
+
4
+ This module provides tools for multi-sample mass spectrometry data analysis and cross-sample feature alignment.
5
+ It defines the `study` class, which manages collections of DDA files, performs feature alignment across samples,
6
+ generates consensus features, and provides study-level visualization and reporting capabilities.
7
+
8
+ Key Features:
9
+ - **Multi-Sample Management**: Handle collections of mass spectrometry files with metadata.
10
+ - **Feature Alignment**: Align features across multiple samples using retention time and m/z tolerances.
11
+ - **Consensus Features**: Generate consensus feature tables from aligned data.
12
+ - **Batch Processing**: Automated processing of entire studies with configurable parameters.
13
+ - **Study Visualization**: Generate comparative plots and alignment visualizations.
14
+ - **Export Capabilities**: Export study results in various formats for downstream analysis.
15
+
16
+ Dependencies:
17
+ - `pyopenms`: For mass spectrometry data handling and algorithms.
18
+ - `polars` and `pandas`: For efficient data manipulation and analysis.
19
+ - `bokeh`, `holoviews`, `panel`: For interactive visualizations and dashboards.
20
+ - `numpy`: For numerical computations and array operations.
21
+
22
+ Classes:
23
+ - `study`: Main class for multi-sample study management, providing methods for file loading,
24
+ feature alignment, consensus generation, and study-level analysis.
25
+
26
+ Example Usage:
27
+ ```python
28
+ from study import study
29
+
30
+ # Create study from multiple files
31
+ study_obj = study()
32
+ study_obj.load_files(["sample1.mzML", "sample2.mzML", "sample3.mzML"])
33
+ study_obj.process_all()
34
+ study_obj.align()
35
+ study_obj.plot_alignment_bokeh()
36
+ study_obj.export_consensus()
37
+ ```
38
+
39
+ See Also:
40
+ - `single.py`: For individual file processing before study-level analysis.
41
+ - `parameters.study_parameters`: For study-specific parameter configuration.
42
+
43
+
44
+ """
45
+
46
+ from __future__ import annotations
47
+
48
+ import importlib
49
+ import os
50
+ import sys
51
+
52
+ import polars as pl
53
+
54
+ # Study-specific imports
55
+ from masster.study.h5 import _load_study5
56
+ from masster.study.h5 import _save_study5
57
+ from masster.study.h5 import _save_study5_compressed
58
+ from masster.study.helpers import _get_consensus_uids
59
+ from masster.study.helpers import _get_feature_uids
60
+ from masster.study.helpers import _get_sample_uids
61
+ from masster.study.helpers import compress
62
+ from masster.study.helpers import compress_features
63
+ from masster.study.helpers import compress_ms2
64
+ from masster.study.helpers import compress_chrom
65
+ from masster.study.helpers import restore_features
66
+ from masster.study.helpers import restore_chrom
67
+ from masster.study.helpers import fill_reset
68
+ from masster.study.helpers import get_chrom
69
+ from masster.study.helpers import get_consensus
70
+ from masster.study.helpers import get_consensus_matches
71
+ from masster.study.helpers import get_consensus_matrix
72
+ from masster.study.helpers import get_orphans
73
+ from masster.study.helpers import get_gaps_matrix
74
+ from masster.study.helpers import get_gaps_stats
75
+ from masster.study.helpers import align_reset
76
+ from masster.study.helpers import set_folder
77
+ from masster.study.helpers import set_source
78
+ from masster.study.helpers import features_select
79
+ from masster.study.helpers import features_filter
80
+ from masster.study.helpers import features_delete
81
+ from masster.study.helpers import consensus_select
82
+ from masster.study.helpers import consensus_filter
83
+ from masster.study.helpers import consensus_delete
84
+ from masster.study.load import add
85
+ from masster.study.load import add_sample
86
+ from masster.study.load import fill_single
87
+ from masster.study.load import fill
88
+ from masster.study.load import _process_sample_for_parallel_fill
89
+ from masster.study.load import _get_missing_consensus_sample_combinations
90
+ from masster.study.load import load
91
+ from masster.study.load import _load_consensusXML
92
+ from masster.study.load import load_features
93
+ from masster.study.load import sanitize
94
+ from masster.study.plot import plot_alignment
95
+ from masster.study.plot import plot_alignment_bokeh
96
+ from masster.study.plot import plot_chrom
97
+ from masster.study.plot import plot_consensus_2d
98
+ from masster.study.plot import plot_samples_2d
99
+ from masster.study.processing import align
100
+ from masster.study.processing import filter_consensus
101
+ from masster.study.processing import merge
102
+ from masster.study.processing import integrate
103
+ from masster.study.processing import find_ms2
104
+ from masster.study.parameters import store_history
105
+ from masster.study.parameters import get_parameters
106
+ from masster.study.parameters import update_parameters
107
+ from masster.study.parameters import get_parameters_property
108
+ from masster.study.parameters import set_parameters_property
109
+ from masster.study.save import save
110
+ from masster.study.save import save_consensus
111
+ from masster.study.save import _save_consensusXML
112
+ from masster.study.save import save_samples
113
+ from masster.study.export import export_mgf
114
+ from masster.study.export import export_mztab
115
+ from masster.study.export import _get_mgf_df
116
+
117
+ from masster.logger import MassterLogger
118
+ from masster.study.defaults.study_def import study_defaults
119
+
120
+
121
+ class Study:
122
+ """
123
+ A class for managing and analyzing multi-sample mass spectrometry studies.
124
+
125
+ The `study` class provides comprehensive tools for handling collections of DDA
126
+ (Data-Dependent Acquisition) mass spectrometry files, performing cross-sample
127
+ feature alignment, generating consensus features, and conducting study-level
128
+ analysis and visualization.
129
+
130
+ Attributes:
131
+ folder (str): Default directory for study files and outputs.
132
+ ddafiles (dict): Dictionary containing loaded ddafile objects keyed by sample names.
133
+ features_df (pl.DataFrame): Combined features from all samples in the study.
134
+ consensus_df (pl.DataFrame): Consensus features generated from alignment.
135
+ metadata_df (pl.DataFrame): Sample metadata and experimental information.
136
+
137
+ Key Methods:
138
+ - `add_folder()`: Load all files from a directory into the study.
139
+ - `add_sample()`: Add individual sample files to the study.
140
+ - `process_all()`: Batch process all samples with feature detection.
141
+ - `align()`: Perform cross-sample feature alignment.
142
+ - `plot_alignment_bokeh()`: Visualize alignment results.
143
+ - `export_consensus()`: Export consensus features for downstream analysis.
144
+
145
+ Example Usage:
146
+ >>> from masster import study
147
+ >>> study_obj = study(folder="./data")
148
+ >>> study_obj.load_folder("./mzml_files")
149
+ >>> study_obj.process_all()
150
+ >>> study_obj.align()
151
+ >>> study_obj.plot_alignment_bokeh()
152
+ >>> study_obj.export_consensus("consensus_features.csv")
153
+
154
+ See Also:
155
+ - `ddafile`: For individual sample processing before study-level analysis.
156
+ - `StudyParameters`: For configuring study-specific parameters.
157
+ """
158
+
159
+ def __init__(
160
+ self,
161
+ filename=None,
162
+ **kwargs,
163
+ ):
164
+ """
165
+ Initialize a Study instance for multi-sample mass spectrometry analysis.
166
+
167
+ This constructor initializes various attributes related to file handling,
168
+ data storage, and processing parameters used for study-level analysis.
169
+
170
+ Parameters:
171
+ filename (str, optional): Path to a .study5 file to load automatically.
172
+ If provided, the folder will be set to the
173
+ directory containing this file, and the study will
174
+ be loaded automatically.
175
+ **kwargs: Keyword arguments for setting study parameters. Can include:
176
+ - A study_defaults instance to set all parameters at once (pass as params=study_defaults(...))
177
+ - Individual parameter names and values (see study_defaults for available parameters)
178
+
179
+ Core initialization parameters:
180
+ - folder (str, optional): Default directory for study files and outputs
181
+ - label (str, optional): An optional label to identify the study
182
+ - log_level (str): The logging level to be set for the logger. Defaults to 'INFO'
183
+ - log_label (str, optional): Optional label for the logger
184
+ - log_sink (str): Output sink for logging. Default is "sys.stdout"
185
+
186
+ For backward compatibility, original signature is supported:
187
+ Study(folder=..., label=..., log_level=..., log_label=..., log_sink=...)
188
+ """
189
+ # Initialize default parameters
190
+
191
+ # Handle filename parameter for automatic loading
192
+ auto_load_filename = None
193
+ if filename is not None:
194
+ if not filename.endswith('.study5'):
195
+ raise ValueError("filename must be a .study5 file")
196
+ if not os.path.exists(filename):
197
+ raise FileNotFoundError(f"Study file not found: {filename}")
198
+
199
+ # Set folder to the directory containing the file if not already specified
200
+ if 'folder' not in kwargs:
201
+ kwargs['folder'] = os.path.dirname(os.path.abspath(filename))
202
+
203
+ auto_load_filename = filename
204
+
205
+ # Check if a study_defaults instance was passed
206
+ if "params" in kwargs and isinstance(kwargs["params"], study_defaults):
207
+ params = kwargs.pop("params")
208
+ else:
209
+ # Create default parameters and update with provided values
210
+ params = study_defaults()
211
+
212
+ # Update with any provided parameters
213
+ for key, value in kwargs.items():
214
+ if hasattr(params, key):
215
+ params.set(key, value, validate=True)
216
+
217
+ # Keeps a pointer to study5 whenever it's saved or loaded
218
+ self.filename = None
219
+
220
+ # Store parameter instance for method access
221
+ self.parameters = params
222
+ self.history = {}
223
+ self.store_history(["study"], params.to_dict())
224
+
225
+ # Set instance attributes (ensure proper string values for logger)
226
+ self.folder = params.folder
227
+ self.label = params.label
228
+ self.polarity = params.polarity if params.polarity in ["positive", "negative", "pos", "neg"] else "positive"
229
+ self.log_level = params.log_level.upper() if params.log_level else "INFO"
230
+ self.log_label = params.log_label + " | " if params.log_label else ""
231
+ self.log_sink = params.log_sink
232
+
233
+ if self.folder is not None and not os.path.exists(self.folder):
234
+ # create the folder if it does not exist
235
+ os.makedirs(self.folder)
236
+
237
+ self.samples_df = pl.DataFrame(
238
+ {
239
+ "sample_uid": [],
240
+ "sample_name": [],
241
+ "sample_path": [],
242
+ "sample_type": [],
243
+ "size": [],
244
+ "map_id": [],
245
+ "file_source": [],
246
+ "ms1": [],
247
+ "ms2": [],
248
+ },
249
+ schema={
250
+ "sample_uid": pl.Int64,
251
+ "sample_name": pl.Utf8,
252
+ "sample_path": pl.Utf8,
253
+ "sample_type": pl.Utf8,
254
+ "size": pl.Int64,
255
+ "map_id": pl.Utf8,
256
+ "file_source": pl.Utf8,
257
+ "ms1": pl.Int64,
258
+ "ms2": pl.Int64,
259
+ },
260
+ )
261
+ self.features_maps = []
262
+ self.features_df = pl.DataFrame()
263
+ self.consensus_ms2 = pl.DataFrame()
264
+ self.consensus_df = pl.DataFrame()
265
+ self.consensus_map = None
266
+ self.consensus_mapping_df = pl.DataFrame()
267
+ self.alignment_ref_index = None
268
+
269
+ # Initialize independent logger
270
+ self.logger = MassterLogger(
271
+ instance_type="study",
272
+ level=self.log_level.upper(),
273
+ label=self.log_label,
274
+ sink=self.log_sink,
275
+ )
276
+ self.logger.debug(f"Study folder: {self.folder}")
277
+ self.logger.debug(f"Polarity: {self.polarity}")
278
+
279
+ # Auto-load study file if filename was provided
280
+ if auto_load_filename is not None:
281
+ self.load(filename=auto_load_filename)
282
+
283
+
284
+
285
+ # Attach module functions as class methods
286
+ load = load
287
+ save = save
288
+ save_consensus = save_consensus
289
+ save_samples = save_samples
290
+ align = align
291
+ fill_single = fill_single
292
+ fill_chrom_single = fill_single # Backward compatibility alias
293
+ merge = merge
294
+ find_consensus = merge # Backward compatibility alias
295
+ find_ms2 = find_ms2
296
+ integrate = integrate
297
+ integrate_chrom = integrate # Backward compatibility alias
298
+ store_history = store_history
299
+ get_parameters = get_parameters
300
+ update_parameters = update_parameters
301
+ get_parameters_property = get_parameters_property
302
+ set_parameters_property = set_parameters_property
303
+ plot_alignment = plot_alignment
304
+ plot_alignment_bokeh = plot_alignment_bokeh
305
+ plot_chrom = plot_chrom
306
+ plot_consensus_2d = plot_consensus_2d
307
+ plot_samples_2d = plot_samples_2d
308
+ get_consensus = get_consensus
309
+ get_chrom = get_chrom
310
+ get_consensus_matches = get_consensus_matches
311
+ compress = compress
312
+ compress_features = compress_features
313
+ compress_ms2 = compress_ms2
314
+ compress_chrom = compress_chrom
315
+ restore_features = restore_features
316
+ restore_chrom = restore_chrom
317
+ fill_reset = fill_reset
318
+ align_reset = align_reset
319
+ set_source = set_source
320
+ features_select = features_select
321
+ features_filter = features_filter
322
+ features_delete = features_delete
323
+ consensus_select = consensus_select
324
+ consensus_filter = consensus_filter
325
+ consensus_delete = consensus_delete
326
+ filter_consensus = consensus_filter
327
+ select_consensus = consensus_select
328
+ filter_features = features_filter
329
+ select_features = features_select
330
+ consensus_find = merge
331
+ filter_features = features_filter
332
+
333
+ # Additional method assignments for all imported functions
334
+ add_folder = add # backward compatibility alias
335
+ add = add
336
+ add_sample = add_sample
337
+ _load_study5 = _load_study5
338
+ _save_study5 = _save_study5
339
+ _save_study5_compressed = _save_study5_compressed
340
+ _get_consensus_uids = _get_consensus_uids
341
+ _get_feature_uids = _get_feature_uids
342
+ _get_sample_uids = _get_sample_uids
343
+ get_consensus_matrix = get_consensus_matrix
344
+ get_gaps_matrix = get_gaps_matrix
345
+ get_gaps_stats = get_gaps_stats
346
+ get_orphans = get_orphans
347
+ set_folder = set_folder
348
+ fill = fill
349
+ fill_chrom = fill # Backward compatibility alias
350
+ _process_sample_for_parallel_fill = _process_sample_for_parallel_fill
351
+ _get_missing_consensus_sample_combinations = _get_missing_consensus_sample_combinations
352
+ _load_consensusXML = _load_consensusXML
353
+ load_features = load_features
354
+ sanitize = sanitize
355
+ _save_consensusXML = _save_consensusXML
356
+ export_mgf = export_mgf
357
+ export_mztab = export_mztab
358
+ _get_mgf_df = _get_mgf_df # New function for MGF data extraction
359
+
360
+
361
+ def _reload(self):
362
+ """
363
+ Reloads all masster modules to pick up any changes to their source code,
364
+ and updates the instance's class reference to the newly reloaded class version.
365
+ This ensures that the instance uses the latest implementation without restarting the interpreter.
366
+ """
367
+ # Reset logger configuration flags to allow proper reconfiguration after reload
368
+ ''' try:
369
+ import masster.sample.logger as logger_module
370
+
371
+ if hasattr(logger_module, "_STUDY_LOGGER_CONFIGURED"):
372
+ logger_module._STUDY_LOGGER_CONFIGURED = False
373
+ except Exception:
374
+ pass'''
375
+
376
+ # Get the base module name (masster)
377
+ base_modname = self.__class__.__module__.split(".")[0]
378
+ current_module = self.__class__.__module__
379
+
380
+ # Dynamically find all study submodules
381
+ study_modules = []
382
+ study_module_prefix = f"{base_modname}.study."
383
+
384
+ # Get all currently loaded modules that are part of the study package
385
+ for module_name in sys.modules:
386
+ if module_name.startswith(study_module_prefix) and module_name != current_module:
387
+ study_modules.append(module_name)
388
+
389
+ # Add core masster modules
390
+ core_modules = [
391
+ f"{base_modname}._version",
392
+ f"{base_modname}.chromatogram",
393
+ f"{base_modname}.spectrum",
394
+ f"{base_modname}.logger",
395
+ ]
396
+
397
+ # Add sample submodules
398
+ sample_modules = []
399
+ sample_module_prefix = f"{base_modname}.sample."
400
+ for module_name in sys.modules:
401
+ if module_name.startswith(sample_module_prefix) and module_name != current_module:
402
+ sample_modules.append(module_name)
403
+
404
+ all_modules_to_reload = core_modules + sample_modules + study_modules
405
+
406
+ # Reload all discovered modules
407
+ for full_module_name in all_modules_to_reload:
408
+ try:
409
+ if full_module_name in sys.modules:
410
+ mod = sys.modules[full_module_name]
411
+ importlib.reload(mod)
412
+ self.logger.debug(f"Reloaded module: {full_module_name}")
413
+ except Exception as e:
414
+ self.logger.warning(f"Failed to reload module {full_module_name}: {e}")
415
+
416
+ # Finally, reload the current module (sample.py)
417
+ try:
418
+ mod = __import__(current_module, fromlist=[current_module.split(".")[0]])
419
+ importlib.reload(mod)
420
+
421
+ # Get the updated class reference from the reloaded module
422
+ new = getattr(mod, self.__class__.__name__)
423
+ # Update the class reference of the instance
424
+ self.__class__ = new
425
+
426
+ self.logger.debug("Module reload completed")
427
+ except Exception as e:
428
+ self.logger.error(f"Failed to reload current module {current_module}: {e}")
429
+
430
+ def __str__(self):
431
+ """
432
+ Returns a string representation of the study.
433
+
434
+ Returns:
435
+ str: A summary string of the study.
436
+ """
437
+ return ""
438
+
439
+ def logger_update(self, level: str | None = None, label: str | None = None, sink: str | None = None):
440
+ """Update the logging configuration for this Study instance.
441
+
442
+ Args:
443
+ level: New logging level ("DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL")
444
+ label: New label for log messages
445
+ sink: New output sink (file path, file object, or "sys.stdout")
446
+ """
447
+ if level is not None:
448
+ self.log_level = level.upper()
449
+ self.logger.update_level(level)
450
+
451
+ if label is not None:
452
+ self.log_label = label + " | " if len(label) > 0 else ""
453
+ self.logger.update_label(self.log_label)
454
+
455
+ if sink is not None:
456
+ if sink == "sys.stdout":
457
+ self.log_sink = sys.stdout
458
+ else:
459
+ self.log_sink = sink
460
+ self.logger.update_sink(self.log_sink)
461
+
462
+ def info(self):
463
+ """
464
+ Display study information with optimized performance.
465
+
466
+ Returns a summary string of the study including folder, features count,
467
+ samples count, and various statistics.
468
+ """
469
+ # Cache DataFrame lengths and existence checks
470
+ consensus_df_len = len(self.consensus_df) if not self.consensus_df.is_empty() else 0
471
+ samples_df_len = len(self.samples_df) if not self.samples_df.is_empty() else 0
472
+
473
+ # Calculate consensus statistics only if consensus_df exists and has data
474
+ if consensus_df_len > 0:
475
+ # Execute the aggregation once
476
+ stats_result = self.consensus_df.select([
477
+ pl.col("number_samples").min().alias("min_samples"),
478
+ pl.col("number_samples").mean().alias("mean_samples"),
479
+ pl.col("number_samples").max().alias("max_samples"),
480
+ ]).row(0)
481
+
482
+ min_samples = stats_result[0] if stats_result[0] is not None else 0
483
+ mean_samples = stats_result[1] if stats_result[1] is not None else 0
484
+ max_samples = stats_result[2] if stats_result[2] is not None else 0
485
+ else:
486
+ min_samples = 0
487
+ mean_samples = 0
488
+ max_samples = 0
489
+
490
+ # Count only features where 'filled' == False
491
+ if not self.features_df.is_empty() and 'filled' in self.features_df.columns:
492
+ unfilled_features_count = self.features_df.filter(~self.features_df['filled']).height
493
+ else:
494
+ unfilled_features_count = 0
495
+
496
+ # Calculate features in consensus vs not in consensus (only for unfilled features)
497
+ if not self.features_df.is_empty() and not self.consensus_mapping_df.is_empty():
498
+ # Get unfilled features only
499
+ unfilled_features = self.features_df.filter(~self.features_df['filled']) if 'filled' in self.features_df.columns else self.features_df
500
+
501
+ # Ensure the column and list have matching data types
502
+ consensus_feature_uids = self.consensus_mapping_df['feature_uid'].to_list()
503
+
504
+ # Check if we need to cast either side to match types
505
+ unfilled_dtype = unfilled_features['feature_uid'].dtype
506
+ consensus_dtype = self.consensus_mapping_df['feature_uid'].dtype
507
+
508
+ if unfilled_dtype != consensus_dtype:
509
+ # Cast both to Int64 if possible, otherwise keep as string
510
+ try:
511
+ unfilled_features = unfilled_features.with_columns(pl.col('feature_uid').cast(pl.Int64))
512
+ consensus_feature_uids = [int(uid) for uid in consensus_feature_uids]
513
+ except Exception:
514
+ # If casting fails, ensure both are strings
515
+ unfilled_features = unfilled_features.with_columns(pl.col('feature_uid').cast(pl.Utf8))
516
+ consensus_feature_uids = [str(uid) for uid in consensus_feature_uids]
517
+
518
+ # Count unfilled features that are in consensus
519
+ in_consensus_count = unfilled_features.filter(
520
+ pl.col('feature_uid').is_in(consensus_feature_uids)
521
+ ).height
522
+
523
+ # Calculate ratios that sum to 100%
524
+ total_unfilled = unfilled_features.height
525
+ ratio_in_consensus_to_total = (in_consensus_count / total_unfilled * 100) if total_unfilled > 0 else 0
526
+ ratio_not_in_consensus_to_total = 100 - ratio_in_consensus_to_total if total_unfilled > 0 else 0
527
+ else:
528
+ ratio_in_consensus_to_total = 0
529
+ ratio_not_in_consensus_to_total = 0
530
+
531
+ # Optimize chrom completeness calculation
532
+ if consensus_df_len > 0 and samples_df_len > 0 and not self.features_df.is_empty():
533
+ # Ensure matching data types for join keys
534
+ features_dtype = self.features_df["feature_uid"].dtype
535
+ consensus_dtype = self.consensus_mapping_df["feature_uid"].dtype
536
+
537
+ if features_dtype != consensus_dtype:
538
+ # Try to cast both to Int64, fallback to string if needed
539
+ try:
540
+ self.features_df = self.features_df.with_columns(pl.col("feature_uid").cast(pl.Int64))
541
+ self.consensus_mapping_df = self.consensus_mapping_df.with_columns(pl.col("feature_uid").cast(pl.Int64))
542
+ except Exception:
543
+ # If casting to Int64 fails, cast both to string
544
+ self.features_df = self.features_df.with_columns(pl.col("feature_uid").cast(pl.Utf8))
545
+ self.consensus_mapping_df = self.consensus_mapping_df.with_columns(pl.col("feature_uid").cast(pl.Utf8))
546
+
547
+ # Use more efficient counting - count non-null chroms only for features in consensus mapping
548
+ if not self.consensus_mapping_df.is_empty():
549
+ non_null_chroms = (
550
+ self.features_df.join(
551
+ self.consensus_mapping_df.select("feature_uid"),
552
+ on="feature_uid",
553
+ how="inner",
554
+ )
555
+ .select(
556
+ pl.col("chrom").is_not_null().sum().alias("count"),
557
+ )
558
+ .item()
559
+ )
560
+ else:
561
+ non_null_chroms = 0
562
+ total_possible = samples_df_len * consensus_df_len
563
+ chrom_completeness = (
564
+ non_null_chroms / total_possible if total_possible > 0 else 0
565
+ )
566
+ else:
567
+ chrom_completeness = 0
568
+
569
+ # Calculate consensus features with MS2 (count unique consensus_uids with MS2)
570
+ if not self.consensus_ms2.is_empty():
571
+ consensus_with_ms2_count = self.consensus_ms2["consensus_uid"].n_unique()
572
+ else:
573
+ consensus_with_ms2_count = 0
574
+
575
+ # Calculate percentage of consensus features with MS2
576
+ consensus_with_ms2_percentage = (consensus_with_ms2_count / consensus_df_len * 100) if consensus_df_len > 0 else 0
577
+
578
+ # Total MS2 spectra count
579
+ total_ms2_count = len(self.consensus_ms2) if not self.consensus_ms2.is_empty() else 0
580
+
581
+ # estimate memory usage
582
+ memory_usage = (
583
+ self.samples_df.estimated_size()
584
+ + self.features_df.estimated_size()
585
+ + self.consensus_df.estimated_size()
586
+ + self.consensus_ms2.estimated_size()
587
+ + self.consensus_mapping_df.estimated_size()
588
+ )
589
+
590
+ summary = (
591
+ f"Study folder: {self.folder}\n"
592
+ f"Last save: {self.filename}\n"
593
+ f"Samples: {samples_df_len}\n"
594
+ f"Features: {unfilled_features_count}\n"
595
+ f"- in consensus: {ratio_in_consensus_to_total:.0f}%\n"
596
+ f"- not in consensus: {ratio_not_in_consensus_to_total:.0f}%\n"
597
+ f"Consensus: {consensus_df_len}\n"
598
+ f"- Min samples count: {min_samples:.0f}\n"
599
+ f"- Mean samples count: {mean_samples:.0f}\n"
600
+ f"- Max samples count: {max_samples:.0f}\n"
601
+ f"- with MS2: {consensus_with_ms2_percentage:.0f}%\n"
602
+ f"- total MS2: {total_ms2_count}\n"
603
+ f"Chrom completeness: {chrom_completeness*100:.0f}%\n"
604
+ f"Memory usage: {memory_usage / (1024 ** 2):.2f} MB\n"
605
+ )
606
+
607
+ print(summary)
608
+
609
+
610
+ if __name__ == "__main__":
611
+ # This block is executed when the script is run directly
612
+ pass