masster 0.4.3__py3-none-any.whl → 0.4.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of masster might be problematic. Click here for more details.

Files changed (54) hide show
  1. masster/__init__.py +8 -8
  2. masster/_version.py +1 -1
  3. masster/chromatogram.py +1 -1
  4. masster/data/dda/20250530_VH_IQX_KW_RP_HSST3_100mm_12min_pos_v4_DDA_OT_C-MiLUT_QC_dil2_01_20250602151849.sample5 +0 -0
  5. masster/data/dda/20250530_VH_IQX_KW_RP_HSST3_100mm_12min_pos_v4_DDA_OT_C-MiLUT_QC_dil3_01_20250602150634.sample5 +0 -0
  6. masster/data/dda/20250530_VH_IQX_KW_RP_HSST3_100mm_12min_pos_v4_MS1_C-MiLUT_C008_v6_r38_01.sample5 +0 -0
  7. masster/data/dda/20250530_VH_IQX_KW_RP_HSST3_100mm_12min_pos_v4_MS1_C-MiLUT_C008_v7_r37_01.sample5 +0 -0
  8. masster/data/dda/20250530_VH_IQX_KW_RP_HSST3_100mm_12min_pos_v4_MS1_C-MiLUT_C017_v5_r99_01.sample5 +0 -0
  9. masster/data/libs/__pycache__/ccm.cpython-312.pyc +0 -0
  10. masster/data/libs/__pycache__/urine.cpython-312.pyc +0 -0
  11. masster/data/libs/ccm.csv +120 -0
  12. masster/data/libs/urine.csv +4693 -0
  13. masster/data/wiff/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.timeseries.data +0 -0
  14. masster/data/wiff/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.wiff +0 -0
  15. masster/data/wiff/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.wiff.scan +0 -0
  16. masster/data/wiff/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.wiff2 +0 -0
  17. masster/logger.py +11 -11
  18. masster/sample/__init__.py +1 -1
  19. masster/sample/adducts.py +338 -264
  20. masster/sample/defaults/find_adducts_def.py +21 -8
  21. masster/sample/h5.py +561 -282
  22. masster/sample/helpers.py +131 -75
  23. masster/sample/lib.py +4 -4
  24. masster/sample/load.py +31 -17
  25. masster/sample/parameters.py +1 -1
  26. masster/sample/plot.py +7 -7
  27. masster/sample/processing.py +117 -87
  28. masster/sample/sample.py +103 -90
  29. masster/sample/sample5_schema.json +196 -0
  30. masster/sample/save.py +35 -12
  31. masster/spectrum.py +1 -1
  32. masster/study/__init__.py +1 -1
  33. masster/study/defaults/align_def.py +5 -1
  34. masster/study/defaults/identify_def.py +3 -1
  35. masster/study/defaults/study_def.py +58 -25
  36. masster/study/export.py +360 -210
  37. masster/study/h5.py +560 -158
  38. masster/study/helpers.py +496 -203
  39. masster/study/helpers_optimized.py +1 -1
  40. masster/study/id.py +538 -349
  41. masster/study/load.py +233 -143
  42. masster/study/plot.py +71 -71
  43. masster/study/processing.py +456 -254
  44. masster/study/save.py +15 -5
  45. masster/study/study.py +213 -131
  46. masster/study/study5_schema.json +360 -0
  47. masster-0.4.5.dist-info/METADATA +131 -0
  48. masster-0.4.5.dist-info/RECORD +71 -0
  49. masster-0.4.3.dist-info/METADATA +0 -791
  50. masster-0.4.3.dist-info/RECORD +0 -56
  51. {masster-0.4.3.dist-info → masster-0.4.5.dist-info}/WHEEL +0 -0
  52. {masster-0.4.3.dist-info → masster-0.4.5.dist-info}/entry_points.txt +0 -0
  53. {masster-0.4.3.dist-info → masster-0.4.5.dist-info}/licenses/LICENSE +0 -0
  54. {masster-0.4.3.dist-info → masster-0.4.5.dist-info}/top_level.txt +0 -0
masster/study/save.py CHANGED
@@ -9,7 +9,7 @@ import pyopenms as oms
9
9
 
10
10
  from tqdm import tqdm
11
11
 
12
- from masster.sample.sample import Sample
12
+ from master.sample.sample import Sample
13
13
 
14
14
 
15
15
  def save(self, filename=None, add_timestamp=True, compress=False):
@@ -48,8 +48,14 @@ def save(self, filename=None, add_timestamp=True, compress=False):
48
48
  # Log file size information for performance monitoring
49
49
  if hasattr(self, "features_df") and not self.features_df.is_empty():
50
50
  feature_count = len(self.features_df)
51
- sample_count = len(self.samples_df) if hasattr(self, "samples_df") and not self.samples_df.is_empty() else 0
52
- self.logger.info(f"Saving study with {sample_count} samples and {feature_count} features to {filename}")
51
+ sample_count = (
52
+ len(self.samples_df)
53
+ if hasattr(self, "samples_df") and not self.samples_df.is_empty()
54
+ else 0
55
+ )
56
+ self.logger.info(
57
+ f"Saving study with {sample_count} samples and {feature_count} features to {filename}",
58
+ )
53
59
 
54
60
  # Use compressed mode for large datasets
55
61
  if compress:
@@ -121,7 +127,9 @@ def save_samples(self, samples=None):
121
127
  if sample_path.endswith(".sample5"):
122
128
  # If sample_path is a .sample5 file, save featureXML in the same directory
123
129
  featurexml_filename = sample_path.replace(".sample5", ".featureXML")
124
- self.logger.debug(f"Saving featureXML alongside .sample5 file: {featurexml_filename}")
130
+ self.logger.debug(
131
+ f"Saving featureXML alongside .sample5 file: {featurexml_filename}",
132
+ )
125
133
  else:
126
134
  # Fallback to study folder or current directory (original behavior)
127
135
  if self.folder is not None:
@@ -134,7 +142,9 @@ def save_samples(self, samples=None):
134
142
  os.getcwd(),
135
143
  sample_name + ".featureXML",
136
144
  )
137
- self.logger.debug(f"Saving featureXML to default location: {featurexml_filename}")
145
+ self.logger.debug(
146
+ f"Saving featureXML to default location: {featurexml_filename}",
147
+ )
138
148
 
139
149
  fh = oms.FeatureXMLFile()
140
150
  if sample_index is not None and sample_index < len(self.features_maps):
masster/study/study.py CHANGED
@@ -52,100 +52,111 @@ import sys
52
52
  import polars as pl
53
53
 
54
54
  # Study-specific imports
55
- from masster.study.h5 import _load_study5
56
- from masster.study.h5 import _save_study5
57
- from masster.study.h5 import _save_study5_compressed
58
- from masster.study.helpers import _get_consensus_uids
59
- from masster.study.helpers import _get_feature_uids
60
- from masster.study.helpers import _get_sample_uids
61
- from masster.study.helpers import _ensure_features_df_schema_order
62
- from masster.study.helpers import compress
63
- from masster.study.helpers import compress_features
64
- from masster.study.helpers import compress_ms2
65
- from masster.study.helpers import compress_chrom
66
- from masster.study.helpers import restore_features
67
- from masster.study.helpers import restore_chrom
68
- from masster.study.helpers import restore_ms2
69
- from masster.study.helpers import decompress
70
- from masster.study.helpers import fill_reset
71
- from masster.study.helpers import get_chrom
72
- from masster.study.helpers import get_sample
73
- from masster.study.helpers import get_consensus
74
- from masster.study.helpers import get_consensus_matches
75
- from masster.study.helpers import get_consensus_matrix
76
- from masster.study.helpers import get_orphans
77
- from masster.study.helpers import get_gaps_matrix
78
- from masster.study.helpers import get_gaps_stats
79
- from masster.study.helpers import align_reset
80
- from masster.study.helpers import set_folder
81
- from masster.study.helpers import set_source
82
- from masster.study.helpers import sample_color
83
- from masster.study.helpers import sample_color_reset
84
- from masster.study.helpers import sample_name_replace
85
- from masster.study.helpers import sample_name_reset
86
- from masster.study.helpers import samples_select
87
- from masster.study.helpers import samples_delete
88
- from masster.study.helpers import features_select
89
- from masster.study.helpers import features_filter
90
- from masster.study.helpers import features_delete
91
- from masster.study.helpers import consensus_select
92
- from masster.study.helpers import consensus_filter
93
- from masster.study.helpers import consensus_delete
94
- from masster.study.load import add
95
- from masster.study.load import add_sample
96
- from masster.study.load import _add_samples_batch
97
- from masster.study.load import _add_sample_optimized
98
- from masster.study.load import _add_sample_standard
99
- from masster.study.load import _sample_color_reset_optimized
100
- from masster.study.load import fill_single
101
- from masster.study.load import fill
102
- from masster.study.load import _process_sample_for_parallel_fill
103
- from masster.study.load import _get_missing_consensus_sample_combinations
104
- from masster.study.load import load
105
- from masster.study.load import _load_consensusXML
106
- from masster.study.load import load_features
107
- from masster.study.load import sanitize
108
- from masster.study.plot import plot_alignment
109
- from masster.study.plot import plot_consensus_2d
110
- from masster.study.plot import plot_samples_2d
111
- from masster.study.plot import plot_consensus_stats
112
- from masster.study.plot import plot_chrom
113
- from masster.study.plot import plot_pca
114
- from masster.study.plot import plot_bpc
115
- from masster.study.plot import plot_tic
116
- from masster.study.plot import plot_eic
117
- from masster.study.plot import plot_rt_correction
118
- from masster.study.processing import align
119
- from masster.study.processing import merge
120
- from masster.study.processing import integrate
121
- from masster.study.processing import find_ms2
122
- from masster.study.parameters import store_history
123
- from masster.study.parameters import get_parameters
124
- from masster.study.parameters import update_parameters
125
- from masster.study.parameters import get_parameters_property
126
- from masster.study.parameters import set_parameters_property
127
- from masster.study.save import save, save_consensus, _save_consensusXML, save_samples
128
- from masster.study.export import export_mgf, export_mztab, export_xlsx, export_parquet, _get_mgf_df
129
- from masster.study.id import lib_load, identify, get_id, id_reset, lib_reset
130
- from masster.study.id import _get_adducts, _calculate_formula_mass_shift, _format_adduct_name, _parse_element_counts
131
-
132
- from masster.logger import MassterLogger
133
- from masster.study.defaults.study_def import study_defaults
134
- from masster.study.defaults.align_def import align_defaults
135
- from masster.study.defaults.export_def import export_mgf_defaults
136
- from masster.study.defaults.fill_chrom_def import fill_chrom_defaults
137
- from masster.study.defaults.fill_def import fill_defaults
138
- from masster.study.defaults.find_consensus_def import find_consensus_defaults
139
- from masster.study.defaults.find_ms2_def import find_ms2_defaults
140
- from masster.study.defaults.integrate_chrom_def import integrate_chrom_defaults
141
- from masster.study.defaults.integrate_def import integrate_defaults
142
- from masster.study.defaults.merge_def import merge_defaults
55
+ from master.study.h5 import _load_study5
56
+ from master.study.h5 import _save_study5
57
+ from master.study.h5 import _save_study5_compressed
58
+ from master.study.helpers import _get_consensus_uids
59
+ from master.study.helpers import _get_feature_uids
60
+ from master.study.helpers import _get_sample_uids
61
+ from master.study.helpers import _ensure_features_df_schema_order
62
+ from master.study.helpers import compress
63
+ from master.study.helpers import compress_features
64
+ from master.study.helpers import compress_ms2
65
+ from master.study.helpers import compress_chrom
66
+ from master.study.helpers import restore_features
67
+ from master.study.helpers import restore_chrom
68
+ from master.study.helpers import restore_ms2
69
+ from master.study.helpers import decompress
70
+ from master.study.helpers import fill_reset
71
+ from master.study.helpers import get_chrom
72
+ from master.study.helpers import get_sample
73
+ from master.study.helpers import get_consensus
74
+ from master.study.helpers import get_consensus_matches
75
+ from master.study.helpers import get_consensus_matrix
76
+ from master.study.helpers import get_orphans
77
+ from master.study.helpers import get_gaps_matrix
78
+ from master.study.helpers import get_gaps_stats
79
+ from master.study.helpers import align_reset
80
+ from master.study.helpers import set_folder
81
+ from master.study.helpers import set_source
82
+ from master.study.helpers import sample_color
83
+ from master.study.helpers import sample_color_reset
84
+ from master.study.helpers import sample_name_replace
85
+ from master.study.helpers import sample_name_reset
86
+ from master.study.helpers import samples_select
87
+ from master.study.helpers import samples_delete
88
+ from master.study.helpers import features_select
89
+ from master.study.helpers import features_filter
90
+ from master.study.helpers import features_delete
91
+ from master.study.helpers import consensus_select
92
+ from master.study.helpers import consensus_filter
93
+ from master.study.helpers import consensus_delete
94
+ from master.study.load import add
95
+ from master.study.load import add_sample
96
+ from master.study.load import _add_samples_batch
97
+ from master.study.load import _add_sample_optimized
98
+ from master.study.load import _add_sample_standard
99
+ from master.study.load import _sample_color_reset_optimized
100
+ from master.study.load import fill_single
101
+ from master.study.load import fill
102
+ from master.study.load import _process_sample_for_parallel_fill
103
+ from master.study.load import _get_missing_consensus_sample_combinations
104
+ from master.study.load import load
105
+ from master.study.load import _load_consensusXML
106
+ from master.study.load import load_features
107
+ from master.study.load import sanitize
108
+ from master.study.plot import plot_alignment
109
+ from master.study.plot import plot_consensus_2d
110
+ from master.study.plot import plot_samples_2d
111
+ from master.study.plot import plot_consensus_stats
112
+ from master.study.plot import plot_chrom
113
+ from master.study.plot import plot_pca
114
+ from master.study.plot import plot_bpc
115
+ from master.study.plot import plot_tic
116
+ from master.study.plot import plot_eic
117
+ from master.study.plot import plot_rt_correction
118
+ from master.study.processing import align
119
+ from master.study.processing import merge
120
+ from master.study.processing import integrate
121
+ from master.study.processing import find_ms2
122
+ from master.study.parameters import store_history
123
+ from master.study.parameters import get_parameters
124
+ from master.study.parameters import update_parameters
125
+ from master.study.parameters import get_parameters_property
126
+ from master.study.parameters import set_parameters_property
127
+ from master.study.save import save, save_consensus, _save_consensusXML, save_samples
128
+ from master.study.export import (
129
+ export_mgf,
130
+ export_mztab,
131
+ export_xlsx,
132
+ export_parquet,
133
+ _get_mgf_df,
134
+ )
135
+ from master.study.id import lib_load, identify, get_id, id_reset, lib_reset
136
+ from master.study.id import (
137
+ _get_adducts,
138
+ _calculate_formula_mass_shift,
139
+ _format_adduct_name,
140
+ _parse_element_counts,
141
+ )
142
+
143
+ from master.logger import MasterLogger
144
+ from master.study.defaults.study_def import study_defaults
145
+ from master.study.defaults.align_def import align_defaults
146
+ from master.study.defaults.export_def import export_mgf_defaults
147
+ from master.study.defaults.fill_chrom_def import fill_chrom_defaults
148
+ from master.study.defaults.fill_def import fill_defaults
149
+ from master.study.defaults.find_consensus_def import find_consensus_defaults
150
+ from master.study.defaults.find_ms2_def import find_ms2_defaults
151
+ from master.study.defaults.integrate_chrom_def import integrate_chrom_defaults
152
+ from master.study.defaults.integrate_def import integrate_defaults
153
+ from master.study.defaults.merge_def import merge_defaults
143
154
 
144
155
  # Import sample defaults
145
- from masster.sample.defaults.sample_def import sample_defaults
146
- from masster.sample.defaults.find_features_def import find_features_defaults
147
- from masster.sample.defaults.find_adducts_def import find_adducts_defaults
148
- from masster.sample.defaults.get_spectrum_def import get_spectrum_defaults
156
+ from master.sample.defaults.sample_def import sample_defaults
157
+ from master.sample.defaults.find_features_def import find_features_defaults
158
+ from master.sample.defaults.find_adducts_def import find_adducts_defaults
159
+ from master.sample.defaults.get_spectrum_def import get_spectrum_defaults
149
160
 
150
161
  # Warning symbols for info display
151
162
  _WARNING_SYMBOL = "⚠️" # Yellow warning triangle
@@ -177,7 +188,7 @@ class Study:
177
188
  - `export_consensus()`: Export consensus features for downstream analysis.
178
189
 
179
190
  Example Usage:
180
- >>> from masster import study
191
+ >>> from master import study
181
192
  >>> study_obj = study(folder="./data")
182
193
  >>> study_obj.load_folder("./mzml_files")
183
194
  >>> study_obj.process_all()
@@ -272,7 +283,11 @@ class Study:
272
283
  # Set instance attributes (ensure proper string values for logger)
273
284
  self.folder = params.folder
274
285
  self.label = params.label
275
- self.polarity = params.polarity if params.polarity in ["positive", "negative", "pos", "neg"] else "positive"
286
+ self.polarity = (
287
+ params.polarity
288
+ if params.polarity in ["positive", "negative", "pos", "neg"]
289
+ else "positive"
290
+ )
276
291
  self.log_level = params.log_level.upper() if params.log_level else "INFO"
277
292
  self.log_label = params.log_label + " | " if params.log_label else ""
278
293
  self.log_sink = params.log_sink
@@ -327,7 +342,7 @@ class Study:
327
342
  self.id_df = pl.DataFrame()
328
343
 
329
344
  # Initialize independent logger
330
- self.logger = MassterLogger(
345
+ self.logger = MasterLogger(
331
346
  instance_type="study",
332
347
  level=self.log_level.upper(),
333
348
  label=self.log_label,
@@ -427,7 +442,9 @@ class Study:
427
442
  fill = fill
428
443
  fill_chrom = fill # Backward compatibility alias
429
444
  _process_sample_for_parallel_fill = _process_sample_for_parallel_fill
430
- _get_missing_consensus_sample_combinations = _get_missing_consensus_sample_combinations
445
+ _get_missing_consensus_sample_combinations = (
446
+ _get_missing_consensus_sample_combinations
447
+ )
431
448
  _load_consensusXML = _load_consensusXML
432
449
  load_features = load_features
433
450
  sanitize = sanitize
@@ -462,20 +479,20 @@ class Study:
462
479
 
463
480
  def _reload(self):
464
481
  """
465
- Reloads all masster modules to pick up any changes to their source code,
482
+ Reloads all master modules to pick up any changes to their source code,
466
483
  and updates the instance's class reference to the newly reloaded class version.
467
484
  This ensures that the instance uses the latest implementation without restarting the interpreter.
468
485
  """
469
486
  # Reset logger configuration flags to allow proper reconfiguration after reload
470
487
  """ try:
471
- import masster.sample.logger as logger_module
488
+ import master.sample.logger as logger_module
472
489
 
473
490
  if hasattr(logger_module, "_STUDY_LOGGER_CONFIGURED"):
474
491
  logger_module._STUDY_LOGGER_CONFIGURED = False
475
492
  except Exception:
476
493
  pass"""
477
494
 
478
- # Get the base module name (masster)
495
+ # Get the base module name (master)
479
496
  base_modname = self.__class__.__module__.split(".")[0]
480
497
  current_module = self.__class__.__module__
481
498
 
@@ -485,10 +502,13 @@ class Study:
485
502
 
486
503
  # Get all currently loaded modules that are part of the study package
487
504
  for module_name in sys.modules:
488
- if module_name.startswith(study_module_prefix) and module_name != current_module:
505
+ if (
506
+ module_name.startswith(study_module_prefix)
507
+ and module_name != current_module
508
+ ):
489
509
  study_modules.append(module_name)
490
510
 
491
- # Add core masster modules
511
+ # Add core master modules
492
512
  core_modules = [
493
513
  f"{base_modname}._version",
494
514
  f"{base_modname}.chromatogram",
@@ -500,7 +520,10 @@ class Study:
500
520
  sample_modules = []
501
521
  sample_module_prefix = f"{base_modname}.sample."
502
522
  for module_name in sys.modules:
503
- if module_name.startswith(sample_module_prefix) and module_name != current_module:
523
+ if (
524
+ module_name.startswith(sample_module_prefix)
525
+ and module_name != current_module
526
+ ):
504
527
  sample_modules.append(module_name)
505
528
 
506
529
  all_modules_to_reload = core_modules + sample_modules + study_modules
@@ -538,7 +561,12 @@ class Study:
538
561
  """
539
562
  return ""
540
563
 
541
- def logger_update(self, level: str | None = None, label: str | None = None, sink: str | None = None):
564
+ def logger_update(
565
+ self,
566
+ level: str | None = None,
567
+ label: str | None = None,
568
+ sink: str | None = None,
569
+ ):
542
570
  """Update the logging configuration for this Study instance.
543
571
 
544
572
  Args:
@@ -570,17 +598,21 @@ class Study:
570
598
  that are out of normal range.
571
599
  """
572
600
  # Cache DataFrame lengths and existence checks
573
- consensus_df_len = len(self.consensus_df) if not self.consensus_df.is_empty() else 0
601
+ consensus_df_len = (
602
+ len(self.consensus_df) if not self.consensus_df.is_empty() else 0
603
+ )
574
604
  samples_df_len = len(self.samples_df) if not self.samples_df.is_empty() else 0
575
605
 
576
606
  # Calculate consensus statistics only if consensus_df exists and has data
577
607
  if consensus_df_len > 0:
578
608
  # Execute the aggregation once
579
- stats_result = self.consensus_df.select([
580
- pl.col("number_samples").min().alias("min_samples"),
581
- pl.col("number_samples").mean().alias("mean_samples"),
582
- pl.col("number_samples").max().alias("max_samples"),
583
- ]).row(0)
609
+ stats_result = self.consensus_df.select(
610
+ [
611
+ pl.col("number_samples").min().alias("min_samples"),
612
+ pl.col("number_samples").mean().alias("mean_samples"),
613
+ pl.col("number_samples").max().alias("max_samples"),
614
+ ],
615
+ ).row(0)
584
616
 
585
617
  min_samples = stats_result[0] if stats_result[0] is not None else 0
586
618
  mean_samples = stats_result[1] if stats_result[1] is not None else 0
@@ -592,7 +624,9 @@ class Study:
592
624
 
593
625
  # Count only features where 'filled' == False
594
626
  if not self.features_df.is_empty() and "filled" in self.features_df.columns:
595
- unfilled_features_count = self.features_df.filter(~self.features_df["filled"]).height
627
+ unfilled_features_count = self.features_df.filter(
628
+ ~self.features_df["filled"],
629
+ ).height
596
630
  else:
597
631
  unfilled_features_count = 0
598
632
 
@@ -615,12 +649,20 @@ class Study:
615
649
  if unfilled_dtype != consensus_dtype:
616
650
  # Cast both to Int64 if possible, otherwise keep as string
617
651
  try:
618
- unfilled_features = unfilled_features.with_columns(pl.col("feature_uid").cast(pl.Int64))
619
- consensus_feature_uids = [int(uid) for uid in consensus_feature_uids]
652
+ unfilled_features = unfilled_features.with_columns(
653
+ pl.col("feature_uid").cast(pl.Int64),
654
+ )
655
+ consensus_feature_uids = [
656
+ int(uid) for uid in consensus_feature_uids
657
+ ]
620
658
  except Exception:
621
659
  # If casting fails, ensure both are strings
622
- unfilled_features = unfilled_features.with_columns(pl.col("feature_uid").cast(pl.Utf8))
623
- consensus_feature_uids = [str(uid) for uid in consensus_feature_uids]
660
+ unfilled_features = unfilled_features.with_columns(
661
+ pl.col("feature_uid").cast(pl.Utf8),
662
+ )
663
+ consensus_feature_uids = [
664
+ str(uid) for uid in consensus_feature_uids
665
+ ]
624
666
 
625
667
  # Count unfilled features that are in consensus
626
668
  in_consensus_count = unfilled_features.filter(
@@ -629,14 +671,22 @@ class Study:
629
671
 
630
672
  # Calculate ratios that sum to 100%
631
673
  total_unfilled = unfilled_features.height
632
- ratio_in_consensus_to_total = (in_consensus_count / total_unfilled * 100) if total_unfilled > 0 else 0
633
- ratio_not_in_consensus_to_total = 100 - ratio_in_consensus_to_total if total_unfilled > 0 else 0
674
+ ratio_in_consensus_to_total = (
675
+ (in_consensus_count / total_unfilled * 100) if total_unfilled > 0 else 0
676
+ )
677
+ ratio_not_in_consensus_to_total = (
678
+ 100 - ratio_in_consensus_to_total if total_unfilled > 0 else 0
679
+ )
634
680
  else:
635
681
  ratio_in_consensus_to_total = 0
636
682
  ratio_not_in_consensus_to_total = 0
637
683
 
638
684
  # Optimize chrom completeness calculation
639
- if consensus_df_len > 0 and samples_df_len > 0 and not self.features_df.is_empty():
685
+ if (
686
+ consensus_df_len > 0
687
+ and samples_df_len > 0
688
+ and not self.features_df.is_empty()
689
+ ):
640
690
  # Ensure matching data types for join keys
641
691
  features_dtype = self.features_df["feature_uid"].dtype
642
692
  consensus_dtype = self.consensus_mapping_df["feature_uid"].dtype
@@ -644,13 +694,17 @@ class Study:
644
694
  if features_dtype != consensus_dtype:
645
695
  # Try to cast both to Int64, fallback to string if needed
646
696
  try:
647
- self.features_df = self.features_df.with_columns(pl.col("feature_uid").cast(pl.Int64))
697
+ self.features_df = self.features_df.with_columns(
698
+ pl.col("feature_uid").cast(pl.Int64),
699
+ )
648
700
  self.consensus_mapping_df = self.consensus_mapping_df.with_columns(
649
701
  pl.col("feature_uid").cast(pl.Int64),
650
702
  )
651
703
  except Exception:
652
704
  # If casting to Int64 fails, cast both to string
653
- self.features_df = self.features_df.with_columns(pl.col("feature_uid").cast(pl.Utf8))
705
+ self.features_df = self.features_df.with_columns(
706
+ pl.col("feature_uid").cast(pl.Utf8),
707
+ )
654
708
  self.consensus_mapping_df = self.consensus_mapping_df.with_columns(
655
709
  pl.col("feature_uid").cast(pl.Utf8),
656
710
  )
@@ -671,7 +725,9 @@ class Study:
671
725
  else:
672
726
  non_null_chroms = 0
673
727
  total_possible = samples_df_len * consensus_df_len
674
- chrom_completeness = non_null_chroms / total_possible if total_possible > 0 else 0
728
+ chrom_completeness = (
729
+ non_null_chroms / total_possible if total_possible > 0 else 0
730
+ )
675
731
  else:
676
732
  chrom_completeness = 0
677
733
 
@@ -683,23 +739,37 @@ class Study:
683
739
 
684
740
  if not self.consensus_df.is_empty():
685
741
  # Compute RT spread using only consensus rows with number_samples >= half the number of samples
686
- threshold = self.consensus_df.select(pl.col("number_samples").max()).item() / 2 if not self.samples_df.is_empty() else 0
742
+ threshold = (
743
+ self.consensus_df.select(pl.col("number_samples").max()).item() / 2
744
+ if not self.samples_df.is_empty()
745
+ else 0
746
+ )
687
747
  filtered = self.consensus_df.filter(pl.col("number_samples") >= threshold)
688
748
  if filtered.is_empty():
689
749
  rt_spread = -1.0
690
750
  else:
691
- rt_spread_row = filtered.select((pl.col("rt_max") - pl.col("rt_min")).mean()).row(0)
692
- rt_spread = float(rt_spread_row[0]) if rt_spread_row and rt_spread_row[0] is not None else 0.0
751
+ rt_spread_row = filtered.select(
752
+ (pl.col("rt_max") - pl.col("rt_min")).mean(),
753
+ ).row(0)
754
+ rt_spread = (
755
+ float(rt_spread_row[0])
756
+ if rt_spread_row and rt_spread_row[0] is not None
757
+ else 0.0
758
+ )
693
759
  else:
694
760
  rt_spread = -1.0
695
761
 
696
762
  # Calculate percentage of consensus features with MS2
697
763
  consensus_with_ms2_percentage = (
698
- (consensus_with_ms2_count / consensus_df_len * 100) if consensus_df_len > 0 else 0
764
+ (consensus_with_ms2_count / consensus_df_len * 100)
765
+ if consensus_df_len > 0
766
+ else 0
699
767
  )
700
768
 
701
769
  # Total MS2 spectra count
702
- total_ms2_count = len(self.consensus_ms2) if not self.consensus_ms2.is_empty() else 0
770
+ total_ms2_count = (
771
+ len(self.consensus_ms2) if not self.consensus_ms2.is_empty() else 0
772
+ )
703
773
 
704
774
  # Estimate memory usage
705
775
  memory_usage = (
@@ -712,15 +782,27 @@ class Study:
712
782
 
713
783
  # Add warning symbols for out-of-range values
714
784
  consensus_warning = f" {_WARNING_SYMBOL}" if consensus_df_len < 50 else ""
715
-
785
+
716
786
  rt_spread_text = "N/A" if rt_spread < 0 else f"{rt_spread:.3f}s"
717
- rt_spread_warning = f" {_WARNING_SYMBOL}" if rt_spread >= 0 and (rt_spread > 5 or rt_spread < 0.1) else ""
718
-
787
+ rt_spread_warning = (
788
+ f" {_WARNING_SYMBOL}"
789
+ if rt_spread >= 0 and (rt_spread > 5 or rt_spread < 0.1)
790
+ else ""
791
+ )
792
+
719
793
  chrom_completeness_pct = chrom_completeness * 100
720
- chrom_warning = f" {_WARNING_SYMBOL}" if chrom_completeness_pct < 10 and chrom_completeness_pct >= 0 else ""
721
-
794
+ chrom_warning = (
795
+ f" {_WARNING_SYMBOL}"
796
+ if chrom_completeness_pct < 10 and chrom_completeness_pct >= 0
797
+ else ""
798
+ )
799
+
722
800
  max_samples_warning = ""
723
- if isinstance(max_samples, (int, float)) and samples_df_len > 0 and max_samples > 0:
801
+ if (
802
+ isinstance(max_samples, (int, float))
803
+ and samples_df_len > 0
804
+ and max_samples > 0
805
+ ):
724
806
  if max_samples < samples_df_len / 3.0:
725
807
  max_samples_warning = f" {_WARNING_SYMBOL}"
726
808
  elif max_samples < samples_df_len * 0.8: