masster 0.4.0__py3-none-any.whl → 0.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- masster/__init__.py +8 -8
- masster/_version.py +1 -1
- masster/chromatogram.py +3 -9
- masster/data/libs/README.md +1 -1
- masster/data/libs/ccm.csv +120 -120
- masster/data/libs/ccm.py +116 -62
- masster/data/libs/central_carbon_README.md +1 -1
- masster/data/libs/urine.py +161 -65
- masster/data/libs/urine_metabolites.csv +4693 -4693
- masster/data/wiff/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.mzML +2 -2
- masster/logger.py +43 -78
- masster/sample/__init__.py +1 -1
- masster/sample/adducts.py +264 -338
- masster/sample/defaults/find_adducts_def.py +8 -21
- masster/sample/defaults/find_features_def.py +1 -6
- masster/sample/defaults/get_spectrum_def.py +1 -5
- masster/sample/defaults/sample_def.py +1 -5
- masster/sample/h5.py +282 -561
- masster/sample/helpers.py +75 -131
- masster/sample/lib.py +17 -42
- masster/sample/load.py +17 -31
- masster/sample/parameters.py +2 -6
- masster/sample/plot.py +27 -88
- masster/sample/processing.py +87 -117
- masster/sample/quant.py +51 -57
- masster/sample/sample.py +90 -103
- masster/sample/sample5_schema.json +44 -44
- masster/sample/save.py +12 -35
- masster/sample/sciex.py +19 -66
- masster/spectrum.py +20 -58
- masster/study/__init__.py +1 -1
- masster/study/defaults/align_def.py +1 -5
- masster/study/defaults/fill_chrom_def.py +1 -5
- masster/study/defaults/fill_def.py +1 -5
- masster/study/defaults/integrate_chrom_def.py +1 -5
- masster/study/defaults/integrate_def.py +1 -5
- masster/study/defaults/study_def.py +25 -58
- masster/study/export.py +207 -233
- masster/study/h5.py +136 -470
- masster/study/helpers.py +202 -495
- masster/study/helpers_optimized.py +13 -40
- masster/study/id.py +110 -213
- masster/study/load.py +143 -230
- masster/study/plot.py +257 -518
- masster/study/processing.py +257 -469
- masster/study/save.py +5 -15
- masster/study/study.py +276 -379
- masster/study/study5_schema.json +96 -96
- {masster-0.4.0.dist-info → masster-0.4.1.dist-info}/METADATA +1 -1
- masster-0.4.1.dist-info/RECORD +67 -0
- masster-0.4.0.dist-info/RECORD +0 -67
- {masster-0.4.0.dist-info → masster-0.4.1.dist-info}/WHEEL +0 -0
- {masster-0.4.0.dist-info → masster-0.4.1.dist-info}/entry_points.txt +0 -0
- {masster-0.4.0.dist-info → masster-0.4.1.dist-info}/licenses/LICENSE +0 -0
masster/study/study.py
CHANGED
|
@@ -52,104 +52,104 @@ import sys
|
|
|
52
52
|
import polars as pl
|
|
53
53
|
|
|
54
54
|
# Study-specific imports
|
|
55
|
-
from
|
|
56
|
-
from
|
|
57
|
-
from
|
|
58
|
-
from
|
|
59
|
-
from
|
|
60
|
-
from
|
|
61
|
-
from
|
|
62
|
-
from
|
|
63
|
-
from
|
|
64
|
-
from
|
|
65
|
-
from
|
|
66
|
-
from
|
|
67
|
-
from
|
|
68
|
-
from
|
|
69
|
-
from
|
|
70
|
-
from
|
|
71
|
-
from
|
|
72
|
-
from
|
|
73
|
-
from
|
|
74
|
-
from
|
|
75
|
-
from
|
|
76
|
-
from
|
|
77
|
-
from
|
|
78
|
-
from
|
|
79
|
-
from
|
|
80
|
-
from
|
|
81
|
-
from
|
|
82
|
-
from
|
|
83
|
-
from
|
|
84
|
-
from
|
|
85
|
-
from
|
|
86
|
-
from
|
|
87
|
-
from
|
|
88
|
-
from
|
|
89
|
-
from
|
|
90
|
-
from
|
|
91
|
-
from
|
|
92
|
-
from
|
|
93
|
-
from
|
|
94
|
-
from
|
|
95
|
-
from
|
|
96
|
-
from
|
|
97
|
-
from
|
|
98
|
-
from
|
|
99
|
-
from
|
|
100
|
-
from
|
|
101
|
-
from
|
|
102
|
-
from
|
|
103
|
-
from
|
|
104
|
-
from
|
|
105
|
-
from
|
|
106
|
-
from
|
|
107
|
-
from
|
|
108
|
-
from
|
|
109
|
-
from
|
|
110
|
-
from
|
|
111
|
-
from
|
|
112
|
-
from
|
|
113
|
-
from
|
|
114
|
-
from
|
|
115
|
-
from
|
|
116
|
-
from
|
|
117
|
-
from
|
|
118
|
-
from
|
|
119
|
-
from
|
|
120
|
-
from
|
|
121
|
-
from
|
|
122
|
-
from
|
|
123
|
-
from
|
|
124
|
-
from
|
|
125
|
-
from
|
|
126
|
-
from
|
|
127
|
-
from
|
|
128
|
-
from
|
|
129
|
-
from
|
|
130
|
-
from
|
|
131
|
-
from
|
|
132
|
-
from
|
|
133
|
-
from
|
|
134
|
-
from
|
|
135
|
-
|
|
136
|
-
from
|
|
137
|
-
from
|
|
138
|
-
from
|
|
139
|
-
from
|
|
140
|
-
from
|
|
141
|
-
from
|
|
142
|
-
from
|
|
143
|
-
from
|
|
144
|
-
from
|
|
145
|
-
from
|
|
146
|
-
from
|
|
55
|
+
from masster.study.h5 import _load_study5
|
|
56
|
+
from masster.study.h5 import _save_study5
|
|
57
|
+
from masster.study.h5 import _save_study5_compressed
|
|
58
|
+
from masster.study.helpers import _get_consensus_uids
|
|
59
|
+
from masster.study.helpers import _get_feature_uids
|
|
60
|
+
from masster.study.helpers import _get_sample_uids
|
|
61
|
+
from masster.study.helpers import _ensure_features_df_schema_order
|
|
62
|
+
from masster.study.helpers import compress
|
|
63
|
+
from masster.study.helpers import compress_features
|
|
64
|
+
from masster.study.helpers import compress_ms2
|
|
65
|
+
from masster.study.helpers import compress_chrom
|
|
66
|
+
from masster.study.helpers import restore_features
|
|
67
|
+
from masster.study.helpers import restore_chrom
|
|
68
|
+
from masster.study.helpers import restore_ms2
|
|
69
|
+
from masster.study.helpers import decompress
|
|
70
|
+
from masster.study.helpers import fill_reset
|
|
71
|
+
from masster.study.helpers import get_chrom
|
|
72
|
+
from masster.study.helpers import get_sample
|
|
73
|
+
from masster.study.helpers import get_consensus
|
|
74
|
+
from masster.study.helpers import get_consensus_matches
|
|
75
|
+
from masster.study.helpers import get_consensus_matrix
|
|
76
|
+
from masster.study.helpers import get_orphans
|
|
77
|
+
from masster.study.helpers import get_gaps_matrix
|
|
78
|
+
from masster.study.helpers import get_gaps_stats
|
|
79
|
+
from masster.study.helpers import align_reset
|
|
80
|
+
from masster.study.helpers import set_folder
|
|
81
|
+
from masster.study.helpers import set_source
|
|
82
|
+
from masster.study.helpers import sample_color
|
|
83
|
+
from masster.study.helpers import sample_color_reset
|
|
84
|
+
from masster.study.helpers import sample_name_replace
|
|
85
|
+
from masster.study.helpers import sample_name_reset
|
|
86
|
+
from masster.study.helpers import samples_select
|
|
87
|
+
from masster.study.helpers import samples_delete
|
|
88
|
+
from masster.study.helpers import features_select
|
|
89
|
+
from masster.study.helpers import features_filter
|
|
90
|
+
from masster.study.helpers import features_delete
|
|
91
|
+
from masster.study.helpers import consensus_select
|
|
92
|
+
from masster.study.helpers import consensus_filter
|
|
93
|
+
from masster.study.helpers import consensus_delete
|
|
94
|
+
from masster.study.load import add
|
|
95
|
+
from masster.study.load import add_sample
|
|
96
|
+
from masster.study.load import _add_samples_batch
|
|
97
|
+
from masster.study.load import _add_sample_optimized
|
|
98
|
+
from masster.study.load import _add_sample_standard
|
|
99
|
+
from masster.study.load import _sample_color_reset_optimized
|
|
100
|
+
from masster.study.load import fill_single
|
|
101
|
+
from masster.study.load import fill
|
|
102
|
+
from masster.study.load import _process_sample_for_parallel_fill
|
|
103
|
+
from masster.study.load import _get_missing_consensus_sample_combinations
|
|
104
|
+
from masster.study.load import load
|
|
105
|
+
from masster.study.load import _load_consensusXML
|
|
106
|
+
from masster.study.load import load_features
|
|
107
|
+
from masster.study.load import sanitize
|
|
108
|
+
from masster.study.plot import plot_alignment
|
|
109
|
+
from masster.study.plot import plot_consensus_2d
|
|
110
|
+
from masster.study.plot import plot_samples_2d
|
|
111
|
+
from masster.study.plot import plot_consensus_stats
|
|
112
|
+
from masster.study.plot import plot_chrom
|
|
113
|
+
from masster.study.plot import plot_pca
|
|
114
|
+
from masster.study.plot import plot_bpc
|
|
115
|
+
from masster.study.plot import plot_tic
|
|
116
|
+
from masster.study.plot import plot_eic
|
|
117
|
+
from masster.study.plot import plot_rt_correction
|
|
118
|
+
from masster.study.processing import align
|
|
119
|
+
from masster.study.processing import merge
|
|
120
|
+
from masster.study.processing import integrate
|
|
121
|
+
from masster.study.processing import find_ms2
|
|
122
|
+
from masster.study.parameters import store_history
|
|
123
|
+
from masster.study.parameters import get_parameters
|
|
124
|
+
from masster.study.parameters import update_parameters
|
|
125
|
+
from masster.study.parameters import get_parameters_property
|
|
126
|
+
from masster.study.parameters import set_parameters_property
|
|
127
|
+
from masster.study.save import save
|
|
128
|
+
from masster.study.save import save_consensus
|
|
129
|
+
from masster.study.save import _save_consensusXML
|
|
130
|
+
from masster.study.save import save_samples
|
|
131
|
+
from masster.study.export import export_mgf
|
|
132
|
+
from masster.study.export import export_mztab
|
|
133
|
+
from masster.study.export import _get_mgf_df
|
|
134
|
+
from masster.study.id import lib_load, identify, get_id
|
|
135
|
+
|
|
136
|
+
from masster.logger import MassterLogger
|
|
137
|
+
from masster.study.defaults.study_def import study_defaults
|
|
138
|
+
from masster.study.defaults.align_def import align_defaults
|
|
139
|
+
from masster.study.defaults.export_def import export_mgf_defaults
|
|
140
|
+
from masster.study.defaults.fill_chrom_def import fill_chrom_defaults
|
|
141
|
+
from masster.study.defaults.fill_def import fill_defaults
|
|
142
|
+
from masster.study.defaults.find_consensus_def import find_consensus_defaults
|
|
143
|
+
from masster.study.defaults.find_ms2_def import find_ms2_defaults
|
|
144
|
+
from masster.study.defaults.integrate_chrom_def import integrate_chrom_defaults
|
|
145
|
+
from masster.study.defaults.integrate_def import integrate_defaults
|
|
146
|
+
from masster.study.defaults.merge_def import merge_defaults
|
|
147
147
|
|
|
148
148
|
# Import sample defaults
|
|
149
|
-
from
|
|
150
|
-
from
|
|
151
|
-
from
|
|
152
|
-
from
|
|
149
|
+
from masster.sample.defaults.sample_def import sample_defaults
|
|
150
|
+
from masster.sample.defaults.find_features_def import find_features_defaults
|
|
151
|
+
from masster.sample.defaults.find_adducts_def import find_adducts_defaults
|
|
152
|
+
from masster.sample.defaults.get_spectrum_def import get_spectrum_defaults
|
|
153
153
|
|
|
154
154
|
# Warning symbols for info display
|
|
155
155
|
_WARNING_SYMBOL = "⚠️" # Yellow warning triangle
|
|
@@ -181,7 +181,7 @@ class Study:
|
|
|
181
181
|
- `export_consensus()`: Export consensus features for downstream analysis.
|
|
182
182
|
|
|
183
183
|
Example Usage:
|
|
184
|
-
>>> from
|
|
184
|
+
>>> from masster import study
|
|
185
185
|
>>> study_obj = study(folder="./data")
|
|
186
186
|
>>> study_obj.load_folder("./mzml_files")
|
|
187
187
|
>>> study_obj.process_all()
|
|
@@ -276,11 +276,7 @@ class Study:
|
|
|
276
276
|
# Set instance attributes (ensure proper string values for logger)
|
|
277
277
|
self.folder = params.folder
|
|
278
278
|
self.label = params.label
|
|
279
|
-
self.polarity =
|
|
280
|
-
params.polarity
|
|
281
|
-
if params.polarity in ["positive", "negative", "pos", "neg"]
|
|
282
|
-
else "positive"
|
|
283
|
-
)
|
|
279
|
+
self.polarity = params.polarity if params.polarity in ["positive", "negative", "pos", "neg"] else "positive"
|
|
284
280
|
self.log_level = params.log_level.upper() if params.log_level else "INFO"
|
|
285
281
|
self.log_label = params.log_label + " | " if params.log_label else ""
|
|
286
282
|
self.log_sink = params.log_sink
|
|
@@ -335,7 +331,7 @@ class Study:
|
|
|
335
331
|
self.id_df = pl.DataFrame()
|
|
336
332
|
|
|
337
333
|
# Initialize independent logger
|
|
338
|
-
self.logger =
|
|
334
|
+
self.logger = MassterLogger(
|
|
339
335
|
instance_type="study",
|
|
340
336
|
level=self.log_level.upper(),
|
|
341
337
|
label=self.log_label,
|
|
@@ -436,9 +432,7 @@ class Study:
|
|
|
436
432
|
fill = fill
|
|
437
433
|
fill_chrom = fill # Backward compatibility alias
|
|
438
434
|
_process_sample_for_parallel_fill = _process_sample_for_parallel_fill
|
|
439
|
-
_get_missing_consensus_sample_combinations =
|
|
440
|
-
_get_missing_consensus_sample_combinations
|
|
441
|
-
)
|
|
435
|
+
_get_missing_consensus_sample_combinations = _get_missing_consensus_sample_combinations
|
|
442
436
|
_load_consensusXML = _load_consensusXML
|
|
443
437
|
load_features = load_features
|
|
444
438
|
sanitize = sanitize
|
|
@@ -465,20 +459,20 @@ class Study:
|
|
|
465
459
|
|
|
466
460
|
def _reload(self):
|
|
467
461
|
"""
|
|
468
|
-
Reloads all
|
|
462
|
+
Reloads all masster modules to pick up any changes to their source code,
|
|
469
463
|
and updates the instance's class reference to the newly reloaded class version.
|
|
470
464
|
This ensures that the instance uses the latest implementation without restarting the interpreter.
|
|
471
465
|
"""
|
|
472
466
|
# Reset logger configuration flags to allow proper reconfiguration after reload
|
|
473
467
|
""" try:
|
|
474
|
-
import
|
|
468
|
+
import masster.sample.logger as logger_module
|
|
475
469
|
|
|
476
470
|
if hasattr(logger_module, "_STUDY_LOGGER_CONFIGURED"):
|
|
477
471
|
logger_module._STUDY_LOGGER_CONFIGURED = False
|
|
478
472
|
except Exception:
|
|
479
473
|
pass"""
|
|
480
474
|
|
|
481
|
-
# Get the base module name (
|
|
475
|
+
# Get the base module name (masster)
|
|
482
476
|
base_modname = self.__class__.__module__.split(".")[0]
|
|
483
477
|
current_module = self.__class__.__module__
|
|
484
478
|
|
|
@@ -488,13 +482,10 @@ class Study:
|
|
|
488
482
|
|
|
489
483
|
# Get all currently loaded modules that are part of the study package
|
|
490
484
|
for module_name in sys.modules:
|
|
491
|
-
if (
|
|
492
|
-
module_name.startswith(study_module_prefix)
|
|
493
|
-
and module_name != current_module
|
|
494
|
-
):
|
|
485
|
+
if module_name.startswith(study_module_prefix) and module_name != current_module:
|
|
495
486
|
study_modules.append(module_name)
|
|
496
487
|
|
|
497
|
-
# Add core
|
|
488
|
+
# Add core masster modules
|
|
498
489
|
core_modules = [
|
|
499
490
|
f"{base_modname}._version",
|
|
500
491
|
f"{base_modname}.chromatogram",
|
|
@@ -506,10 +497,7 @@ class Study:
|
|
|
506
497
|
sample_modules = []
|
|
507
498
|
sample_module_prefix = f"{base_modname}.sample."
|
|
508
499
|
for module_name in sys.modules:
|
|
509
|
-
if (
|
|
510
|
-
module_name.startswith(sample_module_prefix)
|
|
511
|
-
and module_name != current_module
|
|
512
|
-
):
|
|
500
|
+
if module_name.startswith(sample_module_prefix) and module_name != current_module:
|
|
513
501
|
sample_modules.append(module_name)
|
|
514
502
|
|
|
515
503
|
all_modules_to_reload = core_modules + sample_modules + study_modules
|
|
@@ -541,11 +529,11 @@ class Study:
|
|
|
541
529
|
def _get_adducts(self, adducts_list: list = None, **kwargs):
|
|
542
530
|
"""
|
|
543
531
|
Generate comprehensive adduct specifications for study-level adduct filtering.
|
|
544
|
-
|
|
532
|
+
|
|
545
533
|
This method creates a DataFrame of adduct combinations that will be used to filter
|
|
546
534
|
and score adducts at the study level. Similar to sample._get_adducts() but uses
|
|
547
535
|
study-level parameters and constraints.
|
|
548
|
-
|
|
536
|
+
|
|
549
537
|
Parameters
|
|
550
538
|
----------
|
|
551
539
|
adducts_list : List[str], optional
|
|
@@ -554,10 +542,10 @@ class Study:
|
|
|
554
542
|
**kwargs : dict
|
|
555
543
|
Override parameters, including:
|
|
556
544
|
- charge_min: Minimum charge to consider (default 1)
|
|
557
|
-
- charge_max: Maximum charge to consider (default 3)
|
|
545
|
+
- charge_max: Maximum charge to consider (default 3)
|
|
558
546
|
- max_combinations: Maximum number of adduct components to combine (default 3)
|
|
559
547
|
- min_probability: Minimum probability threshold (default from study parameters)
|
|
560
|
-
|
|
548
|
+
|
|
561
549
|
Returns
|
|
562
550
|
-------
|
|
563
551
|
pl.DataFrame
|
|
@@ -569,304 +557,272 @@ class Study:
|
|
|
569
557
|
- complexity: Number of adduct components (1-3)
|
|
570
558
|
"""
|
|
571
559
|
# Import required modules
|
|
572
|
-
|
|
560
|
+
from collections import Counter
|
|
561
|
+
from itertools import combinations
|
|
562
|
+
import numpy as np
|
|
563
|
+
|
|
573
564
|
# Use provided adducts list or get from study parameters
|
|
574
565
|
if adducts_list is None:
|
|
575
|
-
adducts_list = (
|
|
576
|
-
|
|
577
|
-
if hasattr(self.parameters, "adducts") and self.parameters.adducts
|
|
578
|
-
else []
|
|
579
|
-
)
|
|
580
|
-
|
|
566
|
+
adducts_list = self.parameters.adducts if hasattr(self.parameters, 'adducts') and self.parameters.adducts else []
|
|
567
|
+
|
|
581
568
|
# Get parameters with study-specific defaults
|
|
582
|
-
charge_min = kwargs.get(
|
|
583
|
-
charge_max = kwargs.get(
|
|
584
|
-
max_combinations = kwargs.get(
|
|
585
|
-
min_probability = kwargs.get(
|
|
586
|
-
|
|
587
|
-
getattr(self.parameters, "adduct_min_probability", 0.04),
|
|
588
|
-
)
|
|
589
|
-
|
|
569
|
+
charge_min = kwargs.get('charge_min', -3) # Allow negative charges
|
|
570
|
+
charge_max = kwargs.get('charge_max', 3) # Study uses up to charge ±3
|
|
571
|
+
max_combinations = kwargs.get('max_combinations', 3) # Up to 3 combinations
|
|
572
|
+
min_probability = kwargs.get('min_probability', getattr(self.parameters, 'adduct_min_probability', 0.04))
|
|
573
|
+
|
|
590
574
|
# Parse base adduct specifications
|
|
591
575
|
base_specs = []
|
|
592
|
-
|
|
576
|
+
|
|
593
577
|
for adduct_str in adducts_list:
|
|
594
|
-
if not isinstance(adduct_str, str) or
|
|
578
|
+
if not isinstance(adduct_str, str) or ':' not in adduct_str:
|
|
595
579
|
continue
|
|
596
|
-
|
|
580
|
+
|
|
597
581
|
try:
|
|
598
|
-
parts = adduct_str.split(
|
|
582
|
+
parts = adduct_str.split(':')
|
|
599
583
|
if len(parts) != 3:
|
|
600
584
|
continue
|
|
601
|
-
|
|
585
|
+
|
|
602
586
|
formula_part = parts[0]
|
|
603
|
-
charge = int(parts[1])
|
|
587
|
+
charge = int(parts[1])
|
|
604
588
|
probability = float(parts[2])
|
|
605
|
-
|
|
589
|
+
|
|
606
590
|
# Calculate mass shift from formula
|
|
607
591
|
mass_shift = self._calculate_formula_mass_shift(formula_part)
|
|
608
|
-
|
|
609
|
-
base_specs.append(
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
)
|
|
618
|
-
|
|
592
|
+
|
|
593
|
+
base_specs.append({
|
|
594
|
+
'formula': formula_part,
|
|
595
|
+
'charge': charge,
|
|
596
|
+
'mass_shift': mass_shift,
|
|
597
|
+
'probability': probability,
|
|
598
|
+
'raw_string': adduct_str
|
|
599
|
+
})
|
|
600
|
+
|
|
619
601
|
except (ValueError, IndexError):
|
|
620
602
|
continue
|
|
621
|
-
|
|
603
|
+
|
|
622
604
|
if not base_specs:
|
|
623
605
|
# Return empty DataFrame with correct schema
|
|
624
|
-
return pl.DataFrame(
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
)
|
|
633
|
-
|
|
606
|
+
return pl.DataFrame({
|
|
607
|
+
'name': [],
|
|
608
|
+
'charge': [],
|
|
609
|
+
'mass_shift': [],
|
|
610
|
+
'probability': [],
|
|
611
|
+
'complexity': []
|
|
612
|
+
})
|
|
613
|
+
|
|
634
614
|
# Generate all valid combinations
|
|
635
615
|
combinations_list = []
|
|
636
|
-
|
|
616
|
+
|
|
637
617
|
# Separate specs by charge type
|
|
638
|
-
positive_specs = [spec for spec in base_specs if spec[
|
|
639
|
-
negative_specs = [spec for spec in base_specs if spec[
|
|
640
|
-
neutral_specs = [spec for spec in base_specs if spec[
|
|
641
|
-
|
|
618
|
+
positive_specs = [spec for spec in base_specs if spec['charge'] > 0]
|
|
619
|
+
negative_specs = [spec for spec in base_specs if spec['charge'] < 0]
|
|
620
|
+
neutral_specs = [spec for spec in base_specs if spec['charge'] == 0]
|
|
621
|
+
|
|
642
622
|
# 1. Single adducts (filter out neutral adducts with charge == 0)
|
|
643
623
|
for spec in base_specs:
|
|
644
|
-
if charge_min <= spec[
|
|
624
|
+
if charge_min <= spec['charge'] <= charge_max and spec['charge'] != 0:
|
|
645
625
|
formatted_name = self._format_adduct_name([spec])
|
|
646
|
-
combinations_list.append(
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
)
|
|
656
|
-
|
|
626
|
+
combinations_list.append({
|
|
627
|
+
'components': [spec],
|
|
628
|
+
'formatted_name': formatted_name,
|
|
629
|
+
'total_mass_shift': spec['mass_shift'],
|
|
630
|
+
'total_charge': spec['charge'],
|
|
631
|
+
'combined_probability': spec['probability'],
|
|
632
|
+
'complexity': 1
|
|
633
|
+
})
|
|
634
|
+
|
|
657
635
|
# 2. Generate multiply charged versions (2H+, 3H+, etc.) - already excludes charge==0
|
|
658
636
|
for spec in positive_specs + negative_specs:
|
|
659
|
-
base_charge = spec[
|
|
660
|
-
for multiplier in range(
|
|
661
|
-
2,
|
|
662
|
-
min(max_combinations + 1, 4),
|
|
663
|
-
): # Up to 3x multiplier
|
|
637
|
+
base_charge = spec['charge']
|
|
638
|
+
for multiplier in range(2, min(max_combinations + 1, 4)): # Up to 3x multiplier
|
|
664
639
|
total_charge = base_charge * multiplier
|
|
665
640
|
if charge_min <= total_charge <= charge_max and total_charge != 0:
|
|
666
641
|
components = [spec] * multiplier
|
|
667
642
|
formatted_name = self._format_adduct_name(components)
|
|
668
|
-
|
|
669
|
-
combinations_list.append(
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
|
|
678
|
-
)
|
|
679
|
-
|
|
643
|
+
|
|
644
|
+
combinations_list.append({
|
|
645
|
+
'components': components,
|
|
646
|
+
'formatted_name': formatted_name,
|
|
647
|
+
'total_mass_shift': spec['mass_shift'] * multiplier,
|
|
648
|
+
'total_charge': total_charge,
|
|
649
|
+
'combined_probability': spec['probability'] ** multiplier,
|
|
650
|
+
'complexity': multiplier
|
|
651
|
+
})
|
|
652
|
+
|
|
680
653
|
# 3. Mixed combinations (2-component) - limited for study level, filter out charge==0
|
|
681
654
|
if max_combinations >= 2:
|
|
682
655
|
# Positive + Neutral (1 neutral loss only) - but exclude if total charge == 0
|
|
683
656
|
for pos_spec in positive_specs[:2]: # Limit to first 2 positive specs
|
|
684
657
|
for neut_spec in neutral_specs[:1]: # Only 1 neutral loss
|
|
685
|
-
total_charge = pos_spec[
|
|
658
|
+
total_charge = pos_spec['charge'] + neut_spec['charge']
|
|
686
659
|
if charge_min <= total_charge <= charge_max and total_charge != 0:
|
|
687
660
|
components = [pos_spec, neut_spec]
|
|
688
661
|
formatted_name = self._format_adduct_name(components)
|
|
689
|
-
combinations_list.append(
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
"complexity": 2,
|
|
699
|
-
},
|
|
700
|
-
)
|
|
701
|
-
|
|
662
|
+
combinations_list.append({
|
|
663
|
+
'components': components,
|
|
664
|
+
'formatted_name': formatted_name,
|
|
665
|
+
'total_mass_shift': pos_spec['mass_shift'] + neut_spec['mass_shift'],
|
|
666
|
+
'total_charge': total_charge,
|
|
667
|
+
'combined_probability': pos_spec['probability'] * neut_spec['probability'],
|
|
668
|
+
'complexity': 2
|
|
669
|
+
})
|
|
670
|
+
|
|
702
671
|
# Convert to polars DataFrame
|
|
703
672
|
if combinations_list:
|
|
704
|
-
combinations_list.sort(
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
|
|
712
|
-
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
|
|
716
|
-
|
|
717
|
-
for combo in combinations_list
|
|
718
|
-
],
|
|
719
|
-
)
|
|
720
|
-
|
|
673
|
+
combinations_list.sort(key=lambda x: (-x['combined_probability'], x['complexity']))
|
|
674
|
+
|
|
675
|
+
adducts_df = pl.DataFrame([
|
|
676
|
+
{
|
|
677
|
+
'name': combo['formatted_name'],
|
|
678
|
+
'charge': combo['total_charge'],
|
|
679
|
+
'mass_shift': combo['total_mass_shift'],
|
|
680
|
+
'probability': combo['combined_probability'],
|
|
681
|
+
'complexity': combo['complexity']
|
|
682
|
+
}
|
|
683
|
+
for combo in combinations_list
|
|
684
|
+
])
|
|
685
|
+
|
|
721
686
|
# Filter by minimum probability threshold
|
|
722
687
|
if min_probability > 0.0:
|
|
723
688
|
adducts_before_filter = len(adducts_df)
|
|
724
689
|
adducts_df = adducts_df.filter(pl.col("probability") >= min_probability)
|
|
725
690
|
adducts_after_filter = len(adducts_df)
|
|
726
|
-
|
|
727
|
-
self.logger.debug(
|
|
728
|
-
|
|
729
|
-
)
|
|
730
|
-
|
|
691
|
+
|
|
692
|
+
self.logger.debug(f"Study adducts: generated {adducts_before_filter}, filtered to {adducts_after_filter} (min_prob={min_probability})")
|
|
693
|
+
|
|
731
694
|
else:
|
|
732
695
|
# Return empty DataFrame with correct schema
|
|
733
|
-
adducts_df = pl.DataFrame(
|
|
734
|
-
|
|
735
|
-
|
|
736
|
-
|
|
737
|
-
|
|
738
|
-
|
|
739
|
-
|
|
740
|
-
|
|
741
|
-
)
|
|
742
|
-
|
|
696
|
+
adducts_df = pl.DataFrame({
|
|
697
|
+
'name': [],
|
|
698
|
+
'charge': [],
|
|
699
|
+
'mass_shift': [],
|
|
700
|
+
'probability': [],
|
|
701
|
+
'complexity': []
|
|
702
|
+
})
|
|
703
|
+
|
|
743
704
|
return adducts_df
|
|
744
705
|
|
|
745
706
|
def _calculate_formula_mass_shift(self, formula: str) -> float:
|
|
746
707
|
"""Calculate mass shift from formula string like "+H", "-H2O", "+Na-H", etc."""
|
|
747
708
|
# Standard atomic masses
|
|
748
709
|
atomic_masses = {
|
|
749
|
-
|
|
750
|
-
|
|
751
|
-
|
|
752
|
-
|
|
753
|
-
|
|
754
|
-
|
|
755
|
-
|
|
756
|
-
|
|
757
|
-
|
|
758
|
-
|
|
759
|
-
|
|
760
|
-
|
|
761
|
-
|
|
762
|
-
|
|
763
|
-
|
|
710
|
+
'H': 1.007825,
|
|
711
|
+
'C': 12.0,
|
|
712
|
+
'N': 14.003074,
|
|
713
|
+
'O': 15.994915,
|
|
714
|
+
'Na': 22.989769,
|
|
715
|
+
'K': 38.963707,
|
|
716
|
+
'Li': 7.016003,
|
|
717
|
+
'Ca': 39.962591,
|
|
718
|
+
'Mg': 23.985042,
|
|
719
|
+
'Fe': 55.934938,
|
|
720
|
+
'Cl': 34.968853,
|
|
721
|
+
'Br': 78.918336,
|
|
722
|
+
'I': 126.904473,
|
|
723
|
+
'P': 30.973762,
|
|
724
|
+
'S': 31.972071
|
|
764
725
|
}
|
|
765
|
-
|
|
726
|
+
|
|
766
727
|
total_mass = 0.0
|
|
767
|
-
|
|
728
|
+
|
|
768
729
|
# Parse formula by splitting on + and - while preserving the operators
|
|
769
730
|
parts = []
|
|
770
731
|
current_part = ""
|
|
771
732
|
current_sign = 1
|
|
772
|
-
|
|
733
|
+
|
|
773
734
|
for char in formula:
|
|
774
|
-
if char ==
|
|
735
|
+
if char == '+':
|
|
775
736
|
if current_part:
|
|
776
737
|
parts.append((current_sign, current_part))
|
|
777
738
|
current_part = ""
|
|
778
739
|
current_sign = 1
|
|
779
|
-
elif char ==
|
|
740
|
+
elif char == '-':
|
|
780
741
|
if current_part:
|
|
781
742
|
parts.append((current_sign, current_part))
|
|
782
743
|
current_part = ""
|
|
783
744
|
current_sign = -1
|
|
784
745
|
else:
|
|
785
746
|
current_part += char
|
|
786
|
-
|
|
747
|
+
|
|
787
748
|
if current_part:
|
|
788
749
|
parts.append((current_sign, current_part))
|
|
789
|
-
|
|
750
|
+
|
|
790
751
|
# Process each part
|
|
791
752
|
for sign, part in parts:
|
|
792
753
|
if not part:
|
|
793
754
|
continue
|
|
794
|
-
|
|
755
|
+
|
|
795
756
|
# Parse element and count (e.g., "H2O" -> H:2, O:1)
|
|
796
757
|
elements = self._parse_element_counts(part)
|
|
797
|
-
|
|
758
|
+
|
|
798
759
|
for element, count in elements.items():
|
|
799
760
|
if element in atomic_masses:
|
|
800
761
|
total_mass += sign * atomic_masses[element] * count
|
|
801
|
-
|
|
762
|
+
|
|
802
763
|
return total_mass
|
|
803
764
|
|
|
804
765
|
def _parse_element_counts(self, formula_part: str) -> dict[str, int]:
|
|
805
766
|
"""Parse element counts from a formula part like 'H2O' -> {'H': 2, 'O': 1}"""
|
|
806
767
|
elements = {}
|
|
807
768
|
i = 0
|
|
808
|
-
|
|
769
|
+
|
|
809
770
|
while i < len(formula_part):
|
|
810
771
|
# Get element (uppercase letter, possibly followed by lowercase)
|
|
811
772
|
element = formula_part[i]
|
|
812
773
|
i += 1
|
|
813
|
-
|
|
774
|
+
|
|
814
775
|
while i < len(formula_part) and formula_part[i].islower():
|
|
815
776
|
element += formula_part[i]
|
|
816
777
|
i += 1
|
|
817
|
-
|
|
778
|
+
|
|
818
779
|
# Get count (digits following element)
|
|
819
780
|
count_str = ""
|
|
820
781
|
while i < len(formula_part) and formula_part[i].isdigit():
|
|
821
782
|
count_str += formula_part[i]
|
|
822
783
|
i += 1
|
|
823
|
-
|
|
784
|
+
|
|
824
785
|
count = int(count_str) if count_str else 1
|
|
825
786
|
elements[element] = elements.get(element, 0) + count
|
|
826
|
-
|
|
787
|
+
|
|
827
788
|
return elements
|
|
828
789
|
|
|
829
790
|
def _format_adduct_name(self, components: list[dict]) -> str:
|
|
830
791
|
"""Format adduct name from components like [M+H]1+ or [M+2H]2+"""
|
|
831
792
|
if not components:
|
|
832
793
|
return "[M]"
|
|
833
|
-
|
|
794
|
+
|
|
834
795
|
# Count occurrences of each formula
|
|
835
796
|
from collections import Counter
|
|
836
|
-
|
|
837
|
-
|
|
838
|
-
|
|
839
|
-
|
|
797
|
+
formula_counts = Counter(comp['formula'] for comp in components)
|
|
798
|
+
total_charge = sum(comp['charge'] for comp in components)
|
|
799
|
+
|
|
840
800
|
# Build formula part with proper multipliers
|
|
841
801
|
formula_parts = []
|
|
842
|
-
for formula, count in sorted(
|
|
843
|
-
formula_counts.items(),
|
|
844
|
-
): # Sort for consistent ordering
|
|
802
|
+
for formula, count in sorted(formula_counts.items()): # Sort for consistent ordering
|
|
845
803
|
if count == 1:
|
|
846
804
|
formula_parts.append(formula)
|
|
847
805
|
else:
|
|
848
806
|
# For multiple occurrences, use count prefix (e.g., 2H, 3Na)
|
|
849
807
|
# Handle special case where formula might already start with + or -
|
|
850
|
-
if formula.startswith((
|
|
808
|
+
if formula.startswith(('+', '-')):
|
|
851
809
|
sign = formula[0]
|
|
852
810
|
base_formula = formula[1:]
|
|
853
811
|
formula_parts.append(f"{sign}{count}{base_formula}")
|
|
854
812
|
else:
|
|
855
813
|
formula_parts.append(f"{count}{formula}")
|
|
856
|
-
|
|
814
|
+
|
|
857
815
|
# Combine formula parts
|
|
858
816
|
formula = "".join(formula_parts)
|
|
859
|
-
|
|
817
|
+
|
|
860
818
|
# Format charge
|
|
861
819
|
if total_charge == 0:
|
|
862
820
|
charge_str = ""
|
|
863
821
|
elif abs(total_charge) == 1:
|
|
864
822
|
charge_str = "1+" if total_charge > 0 else "1-"
|
|
865
823
|
else:
|
|
866
|
-
charge_str = (
|
|
867
|
-
|
|
868
|
-
)
|
|
869
|
-
|
|
824
|
+
charge_str = f"{abs(total_charge)}+" if total_charge > 0 else f"{abs(total_charge)}-"
|
|
825
|
+
|
|
870
826
|
return f"[M{formula}]{charge_str}"
|
|
871
827
|
|
|
872
828
|
def __str__(self):
|
|
@@ -878,12 +834,7 @@ class Study:
|
|
|
878
834
|
"""
|
|
879
835
|
return ""
|
|
880
836
|
|
|
881
|
-
def logger_update(
|
|
882
|
-
self,
|
|
883
|
-
level: str | None = None,
|
|
884
|
-
label: str | None = None,
|
|
885
|
-
sink: str | None = None,
|
|
886
|
-
):
|
|
837
|
+
def logger_update(self, level: str | None = None, label: str | None = None, sink: str | None = None):
|
|
887
838
|
"""Update the logging configuration for this Study instance.
|
|
888
839
|
|
|
889
840
|
Args:
|
|
@@ -915,21 +866,17 @@ class Study:
|
|
|
915
866
|
that are out of normal range.
|
|
916
867
|
"""
|
|
917
868
|
# Cache DataFrame lengths and existence checks
|
|
918
|
-
consensus_df_len = (
|
|
919
|
-
len(self.consensus_df) if not self.consensus_df.is_empty() else 0
|
|
920
|
-
)
|
|
869
|
+
consensus_df_len = len(self.consensus_df) if not self.consensus_df.is_empty() else 0
|
|
921
870
|
samples_df_len = len(self.samples_df) if not self.samples_df.is_empty() else 0
|
|
922
871
|
|
|
923
872
|
# Calculate consensus statistics only if consensus_df exists and has data
|
|
924
873
|
if consensus_df_len > 0:
|
|
925
874
|
# Execute the aggregation once
|
|
926
|
-
stats_result = self.consensus_df.select(
|
|
927
|
-
|
|
928
|
-
|
|
929
|
-
|
|
930
|
-
|
|
931
|
-
],
|
|
932
|
-
).row(0)
|
|
875
|
+
stats_result = self.consensus_df.select([
|
|
876
|
+
pl.col("number_samples").min().alias("min_samples"),
|
|
877
|
+
pl.col("number_samples").mean().alias("mean_samples"),
|
|
878
|
+
pl.col("number_samples").max().alias("max_samples"),
|
|
879
|
+
]).row(0)
|
|
933
880
|
|
|
934
881
|
min_samples = stats_result[0] if stats_result[0] is not None else 0
|
|
935
882
|
mean_samples = stats_result[1] if stats_result[1] is not None else 0
|
|
@@ -941,9 +888,7 @@ class Study:
|
|
|
941
888
|
|
|
942
889
|
# Count only features where 'filled' == False
|
|
943
890
|
if not self.features_df.is_empty() and "filled" in self.features_df.columns:
|
|
944
|
-
unfilled_features_count = self.features_df.filter(
|
|
945
|
-
~self.features_df["filled"],
|
|
946
|
-
).height
|
|
891
|
+
unfilled_features_count = self.features_df.filter(~self.features_df["filled"]).height
|
|
947
892
|
else:
|
|
948
893
|
unfilled_features_count = 0
|
|
949
894
|
|
|
@@ -966,20 +911,12 @@ class Study:
|
|
|
966
911
|
if unfilled_dtype != consensus_dtype:
|
|
967
912
|
# Cast both to Int64 if possible, otherwise keep as string
|
|
968
913
|
try:
|
|
969
|
-
unfilled_features = unfilled_features.with_columns(
|
|
970
|
-
|
|
971
|
-
)
|
|
972
|
-
consensus_feature_uids = [
|
|
973
|
-
int(uid) for uid in consensus_feature_uids
|
|
974
|
-
]
|
|
914
|
+
unfilled_features = unfilled_features.with_columns(pl.col("feature_uid").cast(pl.Int64))
|
|
915
|
+
consensus_feature_uids = [int(uid) for uid in consensus_feature_uids]
|
|
975
916
|
except Exception:
|
|
976
917
|
# If casting fails, ensure both are strings
|
|
977
|
-
unfilled_features = unfilled_features.with_columns(
|
|
978
|
-
|
|
979
|
-
)
|
|
980
|
-
consensus_feature_uids = [
|
|
981
|
-
str(uid) for uid in consensus_feature_uids
|
|
982
|
-
]
|
|
918
|
+
unfilled_features = unfilled_features.with_columns(pl.col("feature_uid").cast(pl.Utf8))
|
|
919
|
+
consensus_feature_uids = [str(uid) for uid in consensus_feature_uids]
|
|
983
920
|
|
|
984
921
|
# Count unfilled features that are in consensus
|
|
985
922
|
in_consensus_count = unfilled_features.filter(
|
|
@@ -988,22 +925,14 @@ class Study:
|
|
|
988
925
|
|
|
989
926
|
# Calculate ratios that sum to 100%
|
|
990
927
|
total_unfilled = unfilled_features.height
|
|
991
|
-
ratio_in_consensus_to_total = (
|
|
992
|
-
|
|
993
|
-
)
|
|
994
|
-
ratio_not_in_consensus_to_total = (
|
|
995
|
-
100 - ratio_in_consensus_to_total if total_unfilled > 0 else 0
|
|
996
|
-
)
|
|
928
|
+
ratio_in_consensus_to_total = (in_consensus_count / total_unfilled * 100) if total_unfilled > 0 else 0
|
|
929
|
+
ratio_not_in_consensus_to_total = 100 - ratio_in_consensus_to_total if total_unfilled > 0 else 0
|
|
997
930
|
else:
|
|
998
931
|
ratio_in_consensus_to_total = 0
|
|
999
932
|
ratio_not_in_consensus_to_total = 0
|
|
1000
933
|
|
|
1001
934
|
# Optimize chrom completeness calculation
|
|
1002
|
-
if (
|
|
1003
|
-
consensus_df_len > 0
|
|
1004
|
-
and samples_df_len > 0
|
|
1005
|
-
and not self.features_df.is_empty()
|
|
1006
|
-
):
|
|
935
|
+
if consensus_df_len > 0 and samples_df_len > 0 and not self.features_df.is_empty():
|
|
1007
936
|
# Ensure matching data types for join keys
|
|
1008
937
|
features_dtype = self.features_df["feature_uid"].dtype
|
|
1009
938
|
consensus_dtype = self.consensus_mapping_df["feature_uid"].dtype
|
|
@@ -1011,17 +940,13 @@ class Study:
|
|
|
1011
940
|
if features_dtype != consensus_dtype:
|
|
1012
941
|
# Try to cast both to Int64, fallback to string if needed
|
|
1013
942
|
try:
|
|
1014
|
-
self.features_df = self.features_df.with_columns(
|
|
1015
|
-
pl.col("feature_uid").cast(pl.Int64),
|
|
1016
|
-
)
|
|
943
|
+
self.features_df = self.features_df.with_columns(pl.col("feature_uid").cast(pl.Int64))
|
|
1017
944
|
self.consensus_mapping_df = self.consensus_mapping_df.with_columns(
|
|
1018
945
|
pl.col("feature_uid").cast(pl.Int64),
|
|
1019
946
|
)
|
|
1020
947
|
except Exception:
|
|
1021
948
|
# If casting to Int64 fails, cast both to string
|
|
1022
|
-
self.features_df = self.features_df.with_columns(
|
|
1023
|
-
pl.col("feature_uid").cast(pl.Utf8),
|
|
1024
|
-
)
|
|
949
|
+
self.features_df = self.features_df.with_columns(pl.col("feature_uid").cast(pl.Utf8))
|
|
1025
950
|
self.consensus_mapping_df = self.consensus_mapping_df.with_columns(
|
|
1026
951
|
pl.col("feature_uid").cast(pl.Utf8),
|
|
1027
952
|
)
|
|
@@ -1042,9 +967,7 @@ class Study:
|
|
|
1042
967
|
else:
|
|
1043
968
|
non_null_chroms = 0
|
|
1044
969
|
total_possible = samples_df_len * consensus_df_len
|
|
1045
|
-
chrom_completeness =
|
|
1046
|
-
non_null_chroms / total_possible if total_possible > 0 else 0
|
|
1047
|
-
)
|
|
970
|
+
chrom_completeness = non_null_chroms / total_possible if total_possible > 0 else 0
|
|
1048
971
|
else:
|
|
1049
972
|
chrom_completeness = 0
|
|
1050
973
|
|
|
@@ -1056,37 +979,23 @@ class Study:
|
|
|
1056
979
|
|
|
1057
980
|
if not self.consensus_df.is_empty():
|
|
1058
981
|
# Compute RT spread using only consensus rows with number_samples >= half the number of samples
|
|
1059
|
-
threshold = (
|
|
1060
|
-
self.consensus_df.select(pl.col("number_samples").max()).item() / 2
|
|
1061
|
-
if not self.samples_df.is_empty()
|
|
1062
|
-
else 0
|
|
1063
|
-
)
|
|
982
|
+
threshold = self.consensus_df.select(pl.col("number_samples").max()).item() / 2 if not self.samples_df.is_empty() else 0
|
|
1064
983
|
filtered = self.consensus_df.filter(pl.col("number_samples") >= threshold)
|
|
1065
984
|
if filtered.is_empty():
|
|
1066
985
|
rt_spread = -1.0
|
|
1067
986
|
else:
|
|
1068
|
-
rt_spread_row = filtered.select(
|
|
1069
|
-
|
|
1070
|
-
).row(0)
|
|
1071
|
-
rt_spread = (
|
|
1072
|
-
float(rt_spread_row[0])
|
|
1073
|
-
if rt_spread_row and rt_spread_row[0] is not None
|
|
1074
|
-
else 0.0
|
|
1075
|
-
)
|
|
987
|
+
rt_spread_row = filtered.select((pl.col("rt_max") - pl.col("rt_min")).mean()).row(0)
|
|
988
|
+
rt_spread = float(rt_spread_row[0]) if rt_spread_row and rt_spread_row[0] is not None else 0.0
|
|
1076
989
|
else:
|
|
1077
990
|
rt_spread = -1.0
|
|
1078
991
|
|
|
1079
992
|
# Calculate percentage of consensus features with MS2
|
|
1080
993
|
consensus_with_ms2_percentage = (
|
|
1081
|
-
(consensus_with_ms2_count / consensus_df_len * 100)
|
|
1082
|
-
if consensus_df_len > 0
|
|
1083
|
-
else 0
|
|
994
|
+
(consensus_with_ms2_count / consensus_df_len * 100) if consensus_df_len > 0 else 0
|
|
1084
995
|
)
|
|
1085
996
|
|
|
1086
997
|
# Total MS2 spectra count
|
|
1087
|
-
total_ms2_count = (
|
|
1088
|
-
len(self.consensus_ms2) if not self.consensus_ms2.is_empty() else 0
|
|
1089
|
-
)
|
|
998
|
+
total_ms2_count = len(self.consensus_ms2) if not self.consensus_ms2.is_empty() else 0
|
|
1090
999
|
|
|
1091
1000
|
# Estimate memory usage
|
|
1092
1001
|
memory_usage = (
|
|
@@ -1099,27 +1008,15 @@ class Study:
|
|
|
1099
1008
|
|
|
1100
1009
|
# Add warning symbols for out-of-range values
|
|
1101
1010
|
consensus_warning = f" {_WARNING_SYMBOL}" if consensus_df_len < 50 else ""
|
|
1102
|
-
|
|
1011
|
+
|
|
1103
1012
|
rt_spread_text = "N/A" if rt_spread < 0 else f"{rt_spread:.3f}s"
|
|
1104
|
-
rt_spread_warning = (
|
|
1105
|
-
|
|
1106
|
-
if rt_spread >= 0 and (rt_spread > 5 or rt_spread < 0.1)
|
|
1107
|
-
else ""
|
|
1108
|
-
)
|
|
1109
|
-
|
|
1013
|
+
rt_spread_warning = f" {_WARNING_SYMBOL}" if rt_spread >= 0 and (rt_spread > 5 or rt_spread < 0.1) else ""
|
|
1014
|
+
|
|
1110
1015
|
chrom_completeness_pct = chrom_completeness * 100
|
|
1111
|
-
chrom_warning =
|
|
1112
|
-
|
|
1113
|
-
if chrom_completeness_pct < 10 and chrom_completeness_pct >= 0
|
|
1114
|
-
else ""
|
|
1115
|
-
)
|
|
1116
|
-
|
|
1016
|
+
chrom_warning = f" {_WARNING_SYMBOL}" if chrom_completeness_pct < 10 and chrom_completeness_pct >= 0 else ""
|
|
1017
|
+
|
|
1117
1018
|
max_samples_warning = ""
|
|
1118
|
-
if (
|
|
1119
|
-
isinstance(max_samples, (int, float))
|
|
1120
|
-
and samples_df_len > 0
|
|
1121
|
-
and max_samples > 0
|
|
1122
|
-
):
|
|
1019
|
+
if isinstance(max_samples, (int, float)) and samples_df_len > 0 and max_samples > 0:
|
|
1123
1020
|
if max_samples < samples_df_len / 3.0:
|
|
1124
1021
|
max_samples_warning = f" {_WARNING_SYMBOL}"
|
|
1125
1022
|
elif max_samples < samples_df_len * 0.8:
|