masster 0.4.4__py3-none-any.whl → 0.4.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of masster might be problematic. Click here for more details.
- masster/__init__.py +8 -8
- masster/chromatogram.py +1 -1
- masster/data/libs/urine.csv +3 -3
- masster/logger.py +11 -11
- masster/sample/__init__.py +1 -1
- masster/sample/adducts.py +338 -264
- masster/sample/defaults/find_adducts_def.py +21 -8
- masster/sample/h5.py +561 -282
- masster/sample/helpers.py +131 -75
- masster/sample/lib.py +4 -4
- masster/sample/load.py +31 -17
- masster/sample/parameters.py +1 -1
- masster/sample/plot.py +7 -7
- masster/sample/processing.py +117 -87
- masster/sample/sample.py +103 -90
- masster/sample/sample5_schema.json +44 -44
- masster/sample/save.py +35 -12
- masster/spectrum.py +1 -1
- masster/study/__init__.py +1 -1
- masster/study/defaults/align_def.py +5 -1
- masster/study/defaults/identify_def.py +3 -1
- masster/study/defaults/study_def.py +58 -25
- masster/study/export.py +360 -210
- masster/study/h5.py +560 -158
- masster/study/helpers.py +496 -203
- masster/study/helpers_optimized.py +1 -1
- masster/study/id.py +538 -349
- masster/study/load.py +233 -143
- masster/study/plot.py +71 -71
- masster/study/processing.py +456 -254
- masster/study/save.py +15 -5
- masster/study/study.py +213 -131
- masster/study/study5_schema.json +149 -149
- {masster-0.4.4.dist-info → masster-0.4.5.dist-info}/METADATA +3 -1
- {masster-0.4.4.dist-info → masster-0.4.5.dist-info}/RECORD +39 -39
- {masster-0.4.4.dist-info → masster-0.4.5.dist-info}/WHEEL +0 -0
- {masster-0.4.4.dist-info → masster-0.4.5.dist-info}/entry_points.txt +0 -0
- {masster-0.4.4.dist-info → masster-0.4.5.dist-info}/licenses/LICENSE +0 -0
- {masster-0.4.4.dist-info → masster-0.4.5.dist-info}/top_level.txt +0 -0
masster/study/save.py
CHANGED
|
@@ -9,7 +9,7 @@ import pyopenms as oms
|
|
|
9
9
|
|
|
10
10
|
from tqdm import tqdm
|
|
11
11
|
|
|
12
|
-
from
|
|
12
|
+
from master.sample.sample import Sample
|
|
13
13
|
|
|
14
14
|
|
|
15
15
|
def save(self, filename=None, add_timestamp=True, compress=False):
|
|
@@ -48,8 +48,14 @@ def save(self, filename=None, add_timestamp=True, compress=False):
|
|
|
48
48
|
# Log file size information for performance monitoring
|
|
49
49
|
if hasattr(self, "features_df") and not self.features_df.is_empty():
|
|
50
50
|
feature_count = len(self.features_df)
|
|
51
|
-
sample_count =
|
|
52
|
-
|
|
51
|
+
sample_count = (
|
|
52
|
+
len(self.samples_df)
|
|
53
|
+
if hasattr(self, "samples_df") and not self.samples_df.is_empty()
|
|
54
|
+
else 0
|
|
55
|
+
)
|
|
56
|
+
self.logger.info(
|
|
57
|
+
f"Saving study with {sample_count} samples and {feature_count} features to {filename}",
|
|
58
|
+
)
|
|
53
59
|
|
|
54
60
|
# Use compressed mode for large datasets
|
|
55
61
|
if compress:
|
|
@@ -121,7 +127,9 @@ def save_samples(self, samples=None):
|
|
|
121
127
|
if sample_path.endswith(".sample5"):
|
|
122
128
|
# If sample_path is a .sample5 file, save featureXML in the same directory
|
|
123
129
|
featurexml_filename = sample_path.replace(".sample5", ".featureXML")
|
|
124
|
-
self.logger.debug(
|
|
130
|
+
self.logger.debug(
|
|
131
|
+
f"Saving featureXML alongside .sample5 file: {featurexml_filename}",
|
|
132
|
+
)
|
|
125
133
|
else:
|
|
126
134
|
# Fallback to study folder or current directory (original behavior)
|
|
127
135
|
if self.folder is not None:
|
|
@@ -134,7 +142,9 @@ def save_samples(self, samples=None):
|
|
|
134
142
|
os.getcwd(),
|
|
135
143
|
sample_name + ".featureXML",
|
|
136
144
|
)
|
|
137
|
-
self.logger.debug(
|
|
145
|
+
self.logger.debug(
|
|
146
|
+
f"Saving featureXML to default location: {featurexml_filename}",
|
|
147
|
+
)
|
|
138
148
|
|
|
139
149
|
fh = oms.FeatureXMLFile()
|
|
140
150
|
if sample_index is not None and sample_index < len(self.features_maps):
|
masster/study/study.py
CHANGED
|
@@ -52,100 +52,111 @@ import sys
|
|
|
52
52
|
import polars as pl
|
|
53
53
|
|
|
54
54
|
# Study-specific imports
|
|
55
|
-
from
|
|
56
|
-
from
|
|
57
|
-
from
|
|
58
|
-
from
|
|
59
|
-
from
|
|
60
|
-
from
|
|
61
|
-
from
|
|
62
|
-
from
|
|
63
|
-
from
|
|
64
|
-
from
|
|
65
|
-
from
|
|
66
|
-
from
|
|
67
|
-
from
|
|
68
|
-
from
|
|
69
|
-
from
|
|
70
|
-
from
|
|
71
|
-
from
|
|
72
|
-
from
|
|
73
|
-
from
|
|
74
|
-
from
|
|
75
|
-
from
|
|
76
|
-
from
|
|
77
|
-
from
|
|
78
|
-
from
|
|
79
|
-
from
|
|
80
|
-
from
|
|
81
|
-
from
|
|
82
|
-
from
|
|
83
|
-
from
|
|
84
|
-
from
|
|
85
|
-
from
|
|
86
|
-
from
|
|
87
|
-
from
|
|
88
|
-
from
|
|
89
|
-
from
|
|
90
|
-
from
|
|
91
|
-
from
|
|
92
|
-
from
|
|
93
|
-
from
|
|
94
|
-
from
|
|
95
|
-
from
|
|
96
|
-
from
|
|
97
|
-
from
|
|
98
|
-
from
|
|
99
|
-
from
|
|
100
|
-
from
|
|
101
|
-
from
|
|
102
|
-
from
|
|
103
|
-
from
|
|
104
|
-
from
|
|
105
|
-
from
|
|
106
|
-
from
|
|
107
|
-
from
|
|
108
|
-
from
|
|
109
|
-
from
|
|
110
|
-
from
|
|
111
|
-
from
|
|
112
|
-
from
|
|
113
|
-
from
|
|
114
|
-
from
|
|
115
|
-
from
|
|
116
|
-
from
|
|
117
|
-
from
|
|
118
|
-
from
|
|
119
|
-
from
|
|
120
|
-
from
|
|
121
|
-
from
|
|
122
|
-
from
|
|
123
|
-
from
|
|
124
|
-
from
|
|
125
|
-
from
|
|
126
|
-
from
|
|
127
|
-
from
|
|
128
|
-
from
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
from
|
|
136
|
-
from
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
55
|
+
from master.study.h5 import _load_study5
|
|
56
|
+
from master.study.h5 import _save_study5
|
|
57
|
+
from master.study.h5 import _save_study5_compressed
|
|
58
|
+
from master.study.helpers import _get_consensus_uids
|
|
59
|
+
from master.study.helpers import _get_feature_uids
|
|
60
|
+
from master.study.helpers import _get_sample_uids
|
|
61
|
+
from master.study.helpers import _ensure_features_df_schema_order
|
|
62
|
+
from master.study.helpers import compress
|
|
63
|
+
from master.study.helpers import compress_features
|
|
64
|
+
from master.study.helpers import compress_ms2
|
|
65
|
+
from master.study.helpers import compress_chrom
|
|
66
|
+
from master.study.helpers import restore_features
|
|
67
|
+
from master.study.helpers import restore_chrom
|
|
68
|
+
from master.study.helpers import restore_ms2
|
|
69
|
+
from master.study.helpers import decompress
|
|
70
|
+
from master.study.helpers import fill_reset
|
|
71
|
+
from master.study.helpers import get_chrom
|
|
72
|
+
from master.study.helpers import get_sample
|
|
73
|
+
from master.study.helpers import get_consensus
|
|
74
|
+
from master.study.helpers import get_consensus_matches
|
|
75
|
+
from master.study.helpers import get_consensus_matrix
|
|
76
|
+
from master.study.helpers import get_orphans
|
|
77
|
+
from master.study.helpers import get_gaps_matrix
|
|
78
|
+
from master.study.helpers import get_gaps_stats
|
|
79
|
+
from master.study.helpers import align_reset
|
|
80
|
+
from master.study.helpers import set_folder
|
|
81
|
+
from master.study.helpers import set_source
|
|
82
|
+
from master.study.helpers import sample_color
|
|
83
|
+
from master.study.helpers import sample_color_reset
|
|
84
|
+
from master.study.helpers import sample_name_replace
|
|
85
|
+
from master.study.helpers import sample_name_reset
|
|
86
|
+
from master.study.helpers import samples_select
|
|
87
|
+
from master.study.helpers import samples_delete
|
|
88
|
+
from master.study.helpers import features_select
|
|
89
|
+
from master.study.helpers import features_filter
|
|
90
|
+
from master.study.helpers import features_delete
|
|
91
|
+
from master.study.helpers import consensus_select
|
|
92
|
+
from master.study.helpers import consensus_filter
|
|
93
|
+
from master.study.helpers import consensus_delete
|
|
94
|
+
from master.study.load import add
|
|
95
|
+
from master.study.load import add_sample
|
|
96
|
+
from master.study.load import _add_samples_batch
|
|
97
|
+
from master.study.load import _add_sample_optimized
|
|
98
|
+
from master.study.load import _add_sample_standard
|
|
99
|
+
from master.study.load import _sample_color_reset_optimized
|
|
100
|
+
from master.study.load import fill_single
|
|
101
|
+
from master.study.load import fill
|
|
102
|
+
from master.study.load import _process_sample_for_parallel_fill
|
|
103
|
+
from master.study.load import _get_missing_consensus_sample_combinations
|
|
104
|
+
from master.study.load import load
|
|
105
|
+
from master.study.load import _load_consensusXML
|
|
106
|
+
from master.study.load import load_features
|
|
107
|
+
from master.study.load import sanitize
|
|
108
|
+
from master.study.plot import plot_alignment
|
|
109
|
+
from master.study.plot import plot_consensus_2d
|
|
110
|
+
from master.study.plot import plot_samples_2d
|
|
111
|
+
from master.study.plot import plot_consensus_stats
|
|
112
|
+
from master.study.plot import plot_chrom
|
|
113
|
+
from master.study.plot import plot_pca
|
|
114
|
+
from master.study.plot import plot_bpc
|
|
115
|
+
from master.study.plot import plot_tic
|
|
116
|
+
from master.study.plot import plot_eic
|
|
117
|
+
from master.study.plot import plot_rt_correction
|
|
118
|
+
from master.study.processing import align
|
|
119
|
+
from master.study.processing import merge
|
|
120
|
+
from master.study.processing import integrate
|
|
121
|
+
from master.study.processing import find_ms2
|
|
122
|
+
from master.study.parameters import store_history
|
|
123
|
+
from master.study.parameters import get_parameters
|
|
124
|
+
from master.study.parameters import update_parameters
|
|
125
|
+
from master.study.parameters import get_parameters_property
|
|
126
|
+
from master.study.parameters import set_parameters_property
|
|
127
|
+
from master.study.save import save, save_consensus, _save_consensusXML, save_samples
|
|
128
|
+
from master.study.export import (
|
|
129
|
+
export_mgf,
|
|
130
|
+
export_mztab,
|
|
131
|
+
export_xlsx,
|
|
132
|
+
export_parquet,
|
|
133
|
+
_get_mgf_df,
|
|
134
|
+
)
|
|
135
|
+
from master.study.id import lib_load, identify, get_id, id_reset, lib_reset
|
|
136
|
+
from master.study.id import (
|
|
137
|
+
_get_adducts,
|
|
138
|
+
_calculate_formula_mass_shift,
|
|
139
|
+
_format_adduct_name,
|
|
140
|
+
_parse_element_counts,
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
from master.logger import MasterLogger
|
|
144
|
+
from master.study.defaults.study_def import study_defaults
|
|
145
|
+
from master.study.defaults.align_def import align_defaults
|
|
146
|
+
from master.study.defaults.export_def import export_mgf_defaults
|
|
147
|
+
from master.study.defaults.fill_chrom_def import fill_chrom_defaults
|
|
148
|
+
from master.study.defaults.fill_def import fill_defaults
|
|
149
|
+
from master.study.defaults.find_consensus_def import find_consensus_defaults
|
|
150
|
+
from master.study.defaults.find_ms2_def import find_ms2_defaults
|
|
151
|
+
from master.study.defaults.integrate_chrom_def import integrate_chrom_defaults
|
|
152
|
+
from master.study.defaults.integrate_def import integrate_defaults
|
|
153
|
+
from master.study.defaults.merge_def import merge_defaults
|
|
143
154
|
|
|
144
155
|
# Import sample defaults
|
|
145
|
-
from
|
|
146
|
-
from
|
|
147
|
-
from
|
|
148
|
-
from
|
|
156
|
+
from master.sample.defaults.sample_def import sample_defaults
|
|
157
|
+
from master.sample.defaults.find_features_def import find_features_defaults
|
|
158
|
+
from master.sample.defaults.find_adducts_def import find_adducts_defaults
|
|
159
|
+
from master.sample.defaults.get_spectrum_def import get_spectrum_defaults
|
|
149
160
|
|
|
150
161
|
# Warning symbols for info display
|
|
151
162
|
_WARNING_SYMBOL = "⚠️" # Yellow warning triangle
|
|
@@ -177,7 +188,7 @@ class Study:
|
|
|
177
188
|
- `export_consensus()`: Export consensus features for downstream analysis.
|
|
178
189
|
|
|
179
190
|
Example Usage:
|
|
180
|
-
>>> from
|
|
191
|
+
>>> from master import study
|
|
181
192
|
>>> study_obj = study(folder="./data")
|
|
182
193
|
>>> study_obj.load_folder("./mzml_files")
|
|
183
194
|
>>> study_obj.process_all()
|
|
@@ -272,7 +283,11 @@ class Study:
|
|
|
272
283
|
# Set instance attributes (ensure proper string values for logger)
|
|
273
284
|
self.folder = params.folder
|
|
274
285
|
self.label = params.label
|
|
275
|
-
self.polarity =
|
|
286
|
+
self.polarity = (
|
|
287
|
+
params.polarity
|
|
288
|
+
if params.polarity in ["positive", "negative", "pos", "neg"]
|
|
289
|
+
else "positive"
|
|
290
|
+
)
|
|
276
291
|
self.log_level = params.log_level.upper() if params.log_level else "INFO"
|
|
277
292
|
self.log_label = params.log_label + " | " if params.log_label else ""
|
|
278
293
|
self.log_sink = params.log_sink
|
|
@@ -327,7 +342,7 @@ class Study:
|
|
|
327
342
|
self.id_df = pl.DataFrame()
|
|
328
343
|
|
|
329
344
|
# Initialize independent logger
|
|
330
|
-
self.logger =
|
|
345
|
+
self.logger = MasterLogger(
|
|
331
346
|
instance_type="study",
|
|
332
347
|
level=self.log_level.upper(),
|
|
333
348
|
label=self.log_label,
|
|
@@ -427,7 +442,9 @@ class Study:
|
|
|
427
442
|
fill = fill
|
|
428
443
|
fill_chrom = fill # Backward compatibility alias
|
|
429
444
|
_process_sample_for_parallel_fill = _process_sample_for_parallel_fill
|
|
430
|
-
_get_missing_consensus_sample_combinations =
|
|
445
|
+
_get_missing_consensus_sample_combinations = (
|
|
446
|
+
_get_missing_consensus_sample_combinations
|
|
447
|
+
)
|
|
431
448
|
_load_consensusXML = _load_consensusXML
|
|
432
449
|
load_features = load_features
|
|
433
450
|
sanitize = sanitize
|
|
@@ -462,20 +479,20 @@ class Study:
|
|
|
462
479
|
|
|
463
480
|
def _reload(self):
|
|
464
481
|
"""
|
|
465
|
-
Reloads all
|
|
482
|
+
Reloads all master modules to pick up any changes to their source code,
|
|
466
483
|
and updates the instance's class reference to the newly reloaded class version.
|
|
467
484
|
This ensures that the instance uses the latest implementation without restarting the interpreter.
|
|
468
485
|
"""
|
|
469
486
|
# Reset logger configuration flags to allow proper reconfiguration after reload
|
|
470
487
|
""" try:
|
|
471
|
-
import
|
|
488
|
+
import master.sample.logger as logger_module
|
|
472
489
|
|
|
473
490
|
if hasattr(logger_module, "_STUDY_LOGGER_CONFIGURED"):
|
|
474
491
|
logger_module._STUDY_LOGGER_CONFIGURED = False
|
|
475
492
|
except Exception:
|
|
476
493
|
pass"""
|
|
477
494
|
|
|
478
|
-
# Get the base module name (
|
|
495
|
+
# Get the base module name (master)
|
|
479
496
|
base_modname = self.__class__.__module__.split(".")[0]
|
|
480
497
|
current_module = self.__class__.__module__
|
|
481
498
|
|
|
@@ -485,10 +502,13 @@ class Study:
|
|
|
485
502
|
|
|
486
503
|
# Get all currently loaded modules that are part of the study package
|
|
487
504
|
for module_name in sys.modules:
|
|
488
|
-
if
|
|
505
|
+
if (
|
|
506
|
+
module_name.startswith(study_module_prefix)
|
|
507
|
+
and module_name != current_module
|
|
508
|
+
):
|
|
489
509
|
study_modules.append(module_name)
|
|
490
510
|
|
|
491
|
-
# Add core
|
|
511
|
+
# Add core master modules
|
|
492
512
|
core_modules = [
|
|
493
513
|
f"{base_modname}._version",
|
|
494
514
|
f"{base_modname}.chromatogram",
|
|
@@ -500,7 +520,10 @@ class Study:
|
|
|
500
520
|
sample_modules = []
|
|
501
521
|
sample_module_prefix = f"{base_modname}.sample."
|
|
502
522
|
for module_name in sys.modules:
|
|
503
|
-
if
|
|
523
|
+
if (
|
|
524
|
+
module_name.startswith(sample_module_prefix)
|
|
525
|
+
and module_name != current_module
|
|
526
|
+
):
|
|
504
527
|
sample_modules.append(module_name)
|
|
505
528
|
|
|
506
529
|
all_modules_to_reload = core_modules + sample_modules + study_modules
|
|
@@ -538,7 +561,12 @@ class Study:
|
|
|
538
561
|
"""
|
|
539
562
|
return ""
|
|
540
563
|
|
|
541
|
-
def logger_update(
|
|
564
|
+
def logger_update(
|
|
565
|
+
self,
|
|
566
|
+
level: str | None = None,
|
|
567
|
+
label: str | None = None,
|
|
568
|
+
sink: str | None = None,
|
|
569
|
+
):
|
|
542
570
|
"""Update the logging configuration for this Study instance.
|
|
543
571
|
|
|
544
572
|
Args:
|
|
@@ -570,17 +598,21 @@ class Study:
|
|
|
570
598
|
that are out of normal range.
|
|
571
599
|
"""
|
|
572
600
|
# Cache DataFrame lengths and existence checks
|
|
573
|
-
consensus_df_len =
|
|
601
|
+
consensus_df_len = (
|
|
602
|
+
len(self.consensus_df) if not self.consensus_df.is_empty() else 0
|
|
603
|
+
)
|
|
574
604
|
samples_df_len = len(self.samples_df) if not self.samples_df.is_empty() else 0
|
|
575
605
|
|
|
576
606
|
# Calculate consensus statistics only if consensus_df exists and has data
|
|
577
607
|
if consensus_df_len > 0:
|
|
578
608
|
# Execute the aggregation once
|
|
579
|
-
stats_result = self.consensus_df.select(
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
609
|
+
stats_result = self.consensus_df.select(
|
|
610
|
+
[
|
|
611
|
+
pl.col("number_samples").min().alias("min_samples"),
|
|
612
|
+
pl.col("number_samples").mean().alias("mean_samples"),
|
|
613
|
+
pl.col("number_samples").max().alias("max_samples"),
|
|
614
|
+
],
|
|
615
|
+
).row(0)
|
|
584
616
|
|
|
585
617
|
min_samples = stats_result[0] if stats_result[0] is not None else 0
|
|
586
618
|
mean_samples = stats_result[1] if stats_result[1] is not None else 0
|
|
@@ -592,7 +624,9 @@ class Study:
|
|
|
592
624
|
|
|
593
625
|
# Count only features where 'filled' == False
|
|
594
626
|
if not self.features_df.is_empty() and "filled" in self.features_df.columns:
|
|
595
|
-
unfilled_features_count = self.features_df.filter(
|
|
627
|
+
unfilled_features_count = self.features_df.filter(
|
|
628
|
+
~self.features_df["filled"],
|
|
629
|
+
).height
|
|
596
630
|
else:
|
|
597
631
|
unfilled_features_count = 0
|
|
598
632
|
|
|
@@ -615,12 +649,20 @@ class Study:
|
|
|
615
649
|
if unfilled_dtype != consensus_dtype:
|
|
616
650
|
# Cast both to Int64 if possible, otherwise keep as string
|
|
617
651
|
try:
|
|
618
|
-
unfilled_features = unfilled_features.with_columns(
|
|
619
|
-
|
|
652
|
+
unfilled_features = unfilled_features.with_columns(
|
|
653
|
+
pl.col("feature_uid").cast(pl.Int64),
|
|
654
|
+
)
|
|
655
|
+
consensus_feature_uids = [
|
|
656
|
+
int(uid) for uid in consensus_feature_uids
|
|
657
|
+
]
|
|
620
658
|
except Exception:
|
|
621
659
|
# If casting fails, ensure both are strings
|
|
622
|
-
unfilled_features = unfilled_features.with_columns(
|
|
623
|
-
|
|
660
|
+
unfilled_features = unfilled_features.with_columns(
|
|
661
|
+
pl.col("feature_uid").cast(pl.Utf8),
|
|
662
|
+
)
|
|
663
|
+
consensus_feature_uids = [
|
|
664
|
+
str(uid) for uid in consensus_feature_uids
|
|
665
|
+
]
|
|
624
666
|
|
|
625
667
|
# Count unfilled features that are in consensus
|
|
626
668
|
in_consensus_count = unfilled_features.filter(
|
|
@@ -629,14 +671,22 @@ class Study:
|
|
|
629
671
|
|
|
630
672
|
# Calculate ratios that sum to 100%
|
|
631
673
|
total_unfilled = unfilled_features.height
|
|
632
|
-
ratio_in_consensus_to_total = (
|
|
633
|
-
|
|
674
|
+
ratio_in_consensus_to_total = (
|
|
675
|
+
(in_consensus_count / total_unfilled * 100) if total_unfilled > 0 else 0
|
|
676
|
+
)
|
|
677
|
+
ratio_not_in_consensus_to_total = (
|
|
678
|
+
100 - ratio_in_consensus_to_total if total_unfilled > 0 else 0
|
|
679
|
+
)
|
|
634
680
|
else:
|
|
635
681
|
ratio_in_consensus_to_total = 0
|
|
636
682
|
ratio_not_in_consensus_to_total = 0
|
|
637
683
|
|
|
638
684
|
# Optimize chrom completeness calculation
|
|
639
|
-
if
|
|
685
|
+
if (
|
|
686
|
+
consensus_df_len > 0
|
|
687
|
+
and samples_df_len > 0
|
|
688
|
+
and not self.features_df.is_empty()
|
|
689
|
+
):
|
|
640
690
|
# Ensure matching data types for join keys
|
|
641
691
|
features_dtype = self.features_df["feature_uid"].dtype
|
|
642
692
|
consensus_dtype = self.consensus_mapping_df["feature_uid"].dtype
|
|
@@ -644,13 +694,17 @@ class Study:
|
|
|
644
694
|
if features_dtype != consensus_dtype:
|
|
645
695
|
# Try to cast both to Int64, fallback to string if needed
|
|
646
696
|
try:
|
|
647
|
-
self.features_df = self.features_df.with_columns(
|
|
697
|
+
self.features_df = self.features_df.with_columns(
|
|
698
|
+
pl.col("feature_uid").cast(pl.Int64),
|
|
699
|
+
)
|
|
648
700
|
self.consensus_mapping_df = self.consensus_mapping_df.with_columns(
|
|
649
701
|
pl.col("feature_uid").cast(pl.Int64),
|
|
650
702
|
)
|
|
651
703
|
except Exception:
|
|
652
704
|
# If casting to Int64 fails, cast both to string
|
|
653
|
-
self.features_df = self.features_df.with_columns(
|
|
705
|
+
self.features_df = self.features_df.with_columns(
|
|
706
|
+
pl.col("feature_uid").cast(pl.Utf8),
|
|
707
|
+
)
|
|
654
708
|
self.consensus_mapping_df = self.consensus_mapping_df.with_columns(
|
|
655
709
|
pl.col("feature_uid").cast(pl.Utf8),
|
|
656
710
|
)
|
|
@@ -671,7 +725,9 @@ class Study:
|
|
|
671
725
|
else:
|
|
672
726
|
non_null_chroms = 0
|
|
673
727
|
total_possible = samples_df_len * consensus_df_len
|
|
674
|
-
chrom_completeness =
|
|
728
|
+
chrom_completeness = (
|
|
729
|
+
non_null_chroms / total_possible if total_possible > 0 else 0
|
|
730
|
+
)
|
|
675
731
|
else:
|
|
676
732
|
chrom_completeness = 0
|
|
677
733
|
|
|
@@ -683,23 +739,37 @@ class Study:
|
|
|
683
739
|
|
|
684
740
|
if not self.consensus_df.is_empty():
|
|
685
741
|
# Compute RT spread using only consensus rows with number_samples >= half the number of samples
|
|
686
|
-
threshold =
|
|
742
|
+
threshold = (
|
|
743
|
+
self.consensus_df.select(pl.col("number_samples").max()).item() / 2
|
|
744
|
+
if not self.samples_df.is_empty()
|
|
745
|
+
else 0
|
|
746
|
+
)
|
|
687
747
|
filtered = self.consensus_df.filter(pl.col("number_samples") >= threshold)
|
|
688
748
|
if filtered.is_empty():
|
|
689
749
|
rt_spread = -1.0
|
|
690
750
|
else:
|
|
691
|
-
rt_spread_row = filtered.select(
|
|
692
|
-
|
|
751
|
+
rt_spread_row = filtered.select(
|
|
752
|
+
(pl.col("rt_max") - pl.col("rt_min")).mean(),
|
|
753
|
+
).row(0)
|
|
754
|
+
rt_spread = (
|
|
755
|
+
float(rt_spread_row[0])
|
|
756
|
+
if rt_spread_row and rt_spread_row[0] is not None
|
|
757
|
+
else 0.0
|
|
758
|
+
)
|
|
693
759
|
else:
|
|
694
760
|
rt_spread = -1.0
|
|
695
761
|
|
|
696
762
|
# Calculate percentage of consensus features with MS2
|
|
697
763
|
consensus_with_ms2_percentage = (
|
|
698
|
-
(consensus_with_ms2_count / consensus_df_len * 100)
|
|
764
|
+
(consensus_with_ms2_count / consensus_df_len * 100)
|
|
765
|
+
if consensus_df_len > 0
|
|
766
|
+
else 0
|
|
699
767
|
)
|
|
700
768
|
|
|
701
769
|
# Total MS2 spectra count
|
|
702
|
-
total_ms2_count =
|
|
770
|
+
total_ms2_count = (
|
|
771
|
+
len(self.consensus_ms2) if not self.consensus_ms2.is_empty() else 0
|
|
772
|
+
)
|
|
703
773
|
|
|
704
774
|
# Estimate memory usage
|
|
705
775
|
memory_usage = (
|
|
@@ -712,15 +782,27 @@ class Study:
|
|
|
712
782
|
|
|
713
783
|
# Add warning symbols for out-of-range values
|
|
714
784
|
consensus_warning = f" {_WARNING_SYMBOL}" if consensus_df_len < 50 else ""
|
|
715
|
-
|
|
785
|
+
|
|
716
786
|
rt_spread_text = "N/A" if rt_spread < 0 else f"{rt_spread:.3f}s"
|
|
717
|
-
rt_spread_warning =
|
|
718
|
-
|
|
787
|
+
rt_spread_warning = (
|
|
788
|
+
f" {_WARNING_SYMBOL}"
|
|
789
|
+
if rt_spread >= 0 and (rt_spread > 5 or rt_spread < 0.1)
|
|
790
|
+
else ""
|
|
791
|
+
)
|
|
792
|
+
|
|
719
793
|
chrom_completeness_pct = chrom_completeness * 100
|
|
720
|
-
chrom_warning =
|
|
721
|
-
|
|
794
|
+
chrom_warning = (
|
|
795
|
+
f" {_WARNING_SYMBOL}"
|
|
796
|
+
if chrom_completeness_pct < 10 and chrom_completeness_pct >= 0
|
|
797
|
+
else ""
|
|
798
|
+
)
|
|
799
|
+
|
|
722
800
|
max_samples_warning = ""
|
|
723
|
-
if
|
|
801
|
+
if (
|
|
802
|
+
isinstance(max_samples, (int, float))
|
|
803
|
+
and samples_df_len > 0
|
|
804
|
+
and max_samples > 0
|
|
805
|
+
):
|
|
724
806
|
if max_samples < samples_df_len / 3.0:
|
|
725
807
|
max_samples_warning = f" {_WARNING_SYMBOL}"
|
|
726
808
|
elif max_samples < samples_df_len * 0.8:
|