masster 0.5.22__py3-none-any.whl → 0.5.24__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of masster might be problematic. Click here for more details.
- masster/_version.py +1 -1
- masster/logger.py +35 -19
- masster/sample/adducts.py +15 -29
- masster/sample/defaults/find_adducts_def.py +1 -3
- masster/sample/defaults/sample_def.py +4 -4
- masster/sample/h5.py +203 -361
- masster/sample/helpers.py +14 -30
- masster/sample/lib.py +3 -3
- masster/sample/load.py +21 -29
- masster/sample/plot.py +222 -132
- masster/sample/processing.py +42 -55
- masster/sample/sample.py +37 -46
- masster/sample/save.py +37 -61
- masster/sample/sciex.py +13 -11
- masster/sample/thermo.py +69 -74
- masster/spectrum.py +15 -15
- masster/study/analysis.py +650 -586
- masster/study/defaults/identify_def.py +1 -3
- masster/study/defaults/merge_def.py +6 -7
- masster/study/defaults/study_def.py +1 -5
- masster/study/export.py +35 -96
- masster/study/h5.py +134 -211
- masster/study/helpers.py +385 -459
- masster/study/id.py +239 -290
- masster/study/importers.py +84 -93
- masster/study/load.py +159 -178
- masster/study/merge.py +1112 -1098
- masster/study/plot.py +195 -149
- masster/study/processing.py +144 -191
- masster/study/save.py +14 -13
- masster/study/study.py +89 -130
- masster/wizard/wizard.py +764 -714
- {masster-0.5.22.dist-info → masster-0.5.24.dist-info}/METADATA +27 -1
- {masster-0.5.22.dist-info → masster-0.5.24.dist-info}/RECORD +37 -37
- {masster-0.5.22.dist-info → masster-0.5.24.dist-info}/WHEEL +0 -0
- {masster-0.5.22.dist-info → masster-0.5.24.dist-info}/entry_points.txt +0 -0
- {masster-0.5.22.dist-info → masster-0.5.24.dist-info}/licenses/LICENSE +0 -0
masster/study/save.py
CHANGED
|
@@ -48,11 +48,7 @@ def save(self, filename=None, add_timestamp=True, compress=False):
|
|
|
48
48
|
# Log file size information for performance monitoring
|
|
49
49
|
if hasattr(self, "features_df") and not self.features_df.is_empty():
|
|
50
50
|
feature_count = len(self.features_df)
|
|
51
|
-
sample_count = (
|
|
52
|
-
len(self.samples_df)
|
|
53
|
-
if hasattr(self, "samples_df") and not self.samples_df.is_empty()
|
|
54
|
-
else 0
|
|
55
|
-
)
|
|
51
|
+
sample_count = len(self.samples_df) if hasattr(self, "samples_df") and not self.samples_df.is_empty() else 0
|
|
56
52
|
self.logger.debug(
|
|
57
53
|
f"Saving study with {sample_count} samples and {feature_count} features to {filename}",
|
|
58
54
|
)
|
|
@@ -60,14 +56,17 @@ def save(self, filename=None, add_timestamp=True, compress=False):
|
|
|
60
56
|
# Use compressed mode for large datasets
|
|
61
57
|
if compress:
|
|
62
58
|
from masster.study.h5 import _save_study5_compressed
|
|
59
|
+
|
|
63
60
|
_save_study5_compressed(self, filename)
|
|
64
61
|
else:
|
|
65
62
|
from masster.study.h5 import _save_study5
|
|
63
|
+
|
|
66
64
|
_save_study5(self, filename)
|
|
67
65
|
|
|
68
66
|
if self.consensus_map is not None:
|
|
69
67
|
# save the features as a separate file
|
|
70
68
|
from masster.study.save import _save_consensusXML
|
|
69
|
+
|
|
71
70
|
_save_consensusXML(self, filename=filename.replace(".study5", ".consensusXML"))
|
|
72
71
|
self.filename = filename
|
|
73
72
|
|
|
@@ -160,14 +159,15 @@ def _save_consensusXML(self, filename: str):
|
|
|
160
159
|
if self.consensus_df is None or self.consensus_df.is_empty():
|
|
161
160
|
self.logger.error("No consensus features found.")
|
|
162
161
|
return
|
|
163
|
-
|
|
162
|
+
|
|
164
163
|
# Build consensus map from consensus_df with proper consensus_id values
|
|
165
164
|
import pyopenms as oms
|
|
165
|
+
|
|
166
166
|
consensus_map = oms.ConsensusMap()
|
|
167
|
-
|
|
167
|
+
|
|
168
168
|
# Set up file descriptions for all samples
|
|
169
169
|
file_descriptions = consensus_map.getColumnHeaders()
|
|
170
|
-
if hasattr(self,
|
|
170
|
+
if hasattr(self, "samples_df") and not self.samples_df.is_empty():
|
|
171
171
|
for i, sample_row in enumerate(self.samples_df.iter_rows(named=True)):
|
|
172
172
|
file_description = file_descriptions.get(i, oms.ColumnHeader())
|
|
173
173
|
file_description.filename = sample_row.get("sample_name", f"sample_{i}")
|
|
@@ -175,17 +175,17 @@ def _save_consensusXML(self, filename: str):
|
|
|
175
175
|
file_description.unique_id = i + 1
|
|
176
176
|
file_descriptions[i] = file_description
|
|
177
177
|
consensus_map.setColumnHeaders(file_descriptions)
|
|
178
|
-
|
|
178
|
+
|
|
179
179
|
# Add consensus features to the map (simplified version without individual features)
|
|
180
180
|
for consensus_row in self.consensus_df.iter_rows(named=True):
|
|
181
181
|
consensus_feature = oms.ConsensusFeature()
|
|
182
|
-
|
|
182
|
+
|
|
183
183
|
# Set basic properties
|
|
184
184
|
consensus_feature.setRT(float(consensus_row.get("rt", 0.0)))
|
|
185
185
|
consensus_feature.setMZ(float(consensus_row.get("mz", 0.0)))
|
|
186
186
|
consensus_feature.setIntensity(float(consensus_row.get("inty_mean", 0.0)))
|
|
187
187
|
consensus_feature.setQuality(float(consensus_row.get("quality", 1.0)))
|
|
188
|
-
|
|
188
|
+
|
|
189
189
|
# Set the unique consensus_id as the unique ID
|
|
190
190
|
consensus_id_str = consensus_row.get("consensus_id", "")
|
|
191
191
|
if consensus_id_str and len(consensus_id_str) == 16:
|
|
@@ -199,9 +199,9 @@ def _save_consensusXML(self, filename: str):
|
|
|
199
199
|
else:
|
|
200
200
|
# Fallback to consensus_uid
|
|
201
201
|
consensus_feature.setUniqueId(consensus_row.get("consensus_uid", 0))
|
|
202
|
-
|
|
202
|
+
|
|
203
203
|
consensus_map.push_back(consensus_feature)
|
|
204
|
-
|
|
204
|
+
|
|
205
205
|
# Save the consensus map
|
|
206
206
|
fh = oms.ConsensusXMLFile()
|
|
207
207
|
fh.store(filename, consensus_map)
|
|
@@ -215,4 +215,5 @@ def save_consensus(self, **kwargs):
|
|
|
215
215
|
self.logger.error("No consensus map found.")
|
|
216
216
|
return
|
|
217
217
|
from masster.study.save import _save_consensusXML
|
|
218
|
+
|
|
218
219
|
_save_consensusXML(self, **kwargs)
|
masster/study/study.py
CHANGED
|
@@ -80,7 +80,8 @@ from masster.study.load import add
|
|
|
80
80
|
from masster.study.load import add_sample
|
|
81
81
|
from masster.study.load import fill
|
|
82
82
|
from masster.study.load import load
|
|
83
|
-
|
|
83
|
+
|
|
84
|
+
# from masster.study.load import _load_features
|
|
84
85
|
from masster.study.h5 import _load_ms1
|
|
85
86
|
from masster.study.h5 import _load_study5
|
|
86
87
|
from masster.study.h5 import _save_study5
|
|
@@ -215,13 +216,13 @@ class Study:
|
|
|
215
216
|
"""
|
|
216
217
|
# ===== PARAMETER INITIALIZATION =====
|
|
217
218
|
auto_load_filename = self._init_parameters(filename, kwargs)
|
|
218
|
-
|
|
219
|
+
|
|
219
220
|
# ===== DATA STRUCTURES INITIALIZATION =====
|
|
220
221
|
self._init_data_structures()
|
|
221
|
-
|
|
222
|
+
|
|
222
223
|
# ===== LOGGER INITIALIZATION =====
|
|
223
224
|
self._init_logger()
|
|
224
|
-
|
|
225
|
+
|
|
225
226
|
# ===== AUTO-LOAD FILE IF PROVIDED =====
|
|
226
227
|
if auto_load_filename is not None:
|
|
227
228
|
self.load(filename=auto_load_filename)
|
|
@@ -266,11 +267,7 @@ class Study:
|
|
|
266
267
|
# Set instance attributes (ensure proper string values for logger)
|
|
267
268
|
self.folder = params.folder
|
|
268
269
|
self.label = params.label
|
|
269
|
-
self.polarity =
|
|
270
|
-
params.polarity
|
|
271
|
-
if params.polarity in ["positive", "negative", "pos", "neg"]
|
|
272
|
-
else "positive"
|
|
273
|
-
)
|
|
270
|
+
self.polarity = params.polarity if params.polarity in ["positive", "negative", "pos", "neg"] else "positive"
|
|
274
271
|
self.log_level = params.log_level.upper() if params.log_level else "INFO"
|
|
275
272
|
self.log_label = params.log_label + " | " if params.log_label else ""
|
|
276
273
|
self.log_sink = params.log_sink
|
|
@@ -278,7 +275,7 @@ class Study:
|
|
|
278
275
|
# Create folder if it doesn't exist
|
|
279
276
|
if self.folder is not None and not os.path.exists(self.folder):
|
|
280
277
|
os.makedirs(self.folder)
|
|
281
|
-
|
|
278
|
+
|
|
282
279
|
return auto_load_filename
|
|
283
280
|
|
|
284
281
|
def _init_data_structures(self):
|
|
@@ -316,11 +313,11 @@ class Study:
|
|
|
316
313
|
"num_ms2": pl.Int64,
|
|
317
314
|
},
|
|
318
315
|
)
|
|
319
|
-
|
|
316
|
+
|
|
320
317
|
# Feature-related data structures
|
|
321
318
|
self.features_maps = []
|
|
322
319
|
self.features_df = pl.DataFrame()
|
|
323
|
-
|
|
320
|
+
|
|
324
321
|
# Consensus-related data structures
|
|
325
322
|
self.consensus_ms2 = pl.DataFrame()
|
|
326
323
|
self.consensus_df = pl.DataFrame()
|
|
@@ -330,7 +327,7 @@ class Study:
|
|
|
330
327
|
|
|
331
328
|
# Library and identification data structures
|
|
332
329
|
self.lib_df = pl.DataFrame() # populated by lib_load
|
|
333
|
-
self.id_df = pl.DataFrame()
|
|
330
|
+
self.id_df = pl.DataFrame() # populated by identify
|
|
334
331
|
|
|
335
332
|
def _init_logger(self):
|
|
336
333
|
"""Initialize the logger for this Study instance."""
|
|
@@ -352,24 +349,24 @@ class Study:
|
|
|
352
349
|
_load_ms1 = _load_ms1
|
|
353
350
|
_load_study5 = _load_study5
|
|
354
351
|
_save_study5 = _save_study5
|
|
355
|
-
|
|
352
|
+
|
|
356
353
|
# === Sample Management ===
|
|
357
354
|
add = add
|
|
358
355
|
add_sample = add_sample
|
|
359
|
-
|
|
356
|
+
|
|
360
357
|
# === Core Processing Operations ===
|
|
361
358
|
align = align
|
|
362
|
-
merge = merge
|
|
363
|
-
|
|
359
|
+
merge = merge
|
|
360
|
+
|
|
364
361
|
find_ms2 = find_ms2
|
|
365
362
|
find_iso = find_iso
|
|
366
363
|
reset_iso = reset_iso
|
|
367
364
|
iso_reset = reset_iso
|
|
368
365
|
integrate = integrate
|
|
369
|
-
|
|
366
|
+
|
|
370
367
|
fill = fill
|
|
371
|
-
#_estimate_rt_original_for_filled_feature = _estimate_rt_original_for_filled_feature
|
|
372
|
-
|
|
368
|
+
# _estimate_rt_original_for_filled_feature = _estimate_rt_original_for_filled_feature
|
|
369
|
+
|
|
373
370
|
# === Data Retrieval and Access ===
|
|
374
371
|
get_consensus = get_consensus
|
|
375
372
|
get_chrom = get_chrom
|
|
@@ -382,11 +379,11 @@ class Study:
|
|
|
382
379
|
get_sample_stats = get_sample_stats
|
|
383
380
|
get_consensus_stats = get_consensus_stats
|
|
384
381
|
_get_adducts = _get_adducts
|
|
385
|
-
|
|
382
|
+
|
|
386
383
|
# === Data Selection and Filtering ===
|
|
387
384
|
samples_select = samples_select
|
|
388
385
|
samples_delete = samples_delete
|
|
389
|
-
|
|
386
|
+
|
|
390
387
|
features_select = features_select
|
|
391
388
|
features_filter = features_filter
|
|
392
389
|
features_delete = features_delete
|
|
@@ -397,22 +394,22 @@ class Study:
|
|
|
397
394
|
# === Sample Metadata and Styling ===
|
|
398
395
|
set_samples_source = set_samples_source
|
|
399
396
|
set_samples_color = set_samples_color
|
|
400
|
-
|
|
397
|
+
|
|
401
398
|
samples_name_replace = sample_name_replace
|
|
402
399
|
samples_name_reset = sample_name_reset
|
|
403
|
-
|
|
400
|
+
|
|
404
401
|
# Backward compatibility aliases for renamed methods
|
|
405
402
|
set_folder = set_study_folder
|
|
406
|
-
set_source = set_samples_source
|
|
407
|
-
#sample_color = set_samples_color
|
|
408
|
-
#get_sample = get_samples
|
|
409
|
-
#load_features = _load_features
|
|
403
|
+
set_source = set_samples_source
|
|
404
|
+
# sample_color = set_samples_color
|
|
405
|
+
# get_sample = get_samples
|
|
406
|
+
# load_features = _load_features
|
|
410
407
|
store_history = update_history
|
|
411
|
-
|
|
408
|
+
|
|
412
409
|
# === Data Compression and Storage ===
|
|
413
410
|
compress = compress
|
|
414
411
|
decompress = decompress
|
|
415
|
-
|
|
412
|
+
|
|
416
413
|
# === Reset Operations ===
|
|
417
414
|
consensus_reset = consensus_reset
|
|
418
415
|
fill_reset = fill_reset
|
|
@@ -435,27 +432,29 @@ class Study:
|
|
|
435
432
|
|
|
436
433
|
# === Analysis Operations ===
|
|
437
434
|
analyze_umap = analyze_umap
|
|
438
|
-
|
|
435
|
+
|
|
439
436
|
# === Export Operations ===
|
|
440
437
|
export_mgf = export_mgf
|
|
441
438
|
export_mztab = export_mztab
|
|
442
439
|
export_xlsx = export_xlsx
|
|
443
440
|
export_parquet = export_parquet
|
|
444
|
-
|
|
441
|
+
|
|
445
442
|
# === Identification and Library Matching ===
|
|
446
443
|
lib_load = lib_load
|
|
447
|
-
|
|
444
|
+
|
|
448
445
|
def lib_to_consensus(self, **kwargs):
|
|
449
446
|
"""Create consensus features from library entries."""
|
|
450
447
|
from masster.study.id import lib_to_consensus as _lib_to_consensus
|
|
448
|
+
|
|
451
449
|
return _lib_to_consensus(self, **kwargs)
|
|
450
|
+
|
|
452
451
|
identify = identify
|
|
453
452
|
get_id = get_id
|
|
454
453
|
id_reset = id_reset
|
|
455
454
|
reset_id = id_reset
|
|
456
455
|
lib_reset = lib_reset
|
|
457
456
|
reset_lib = lib_reset
|
|
458
|
-
|
|
457
|
+
|
|
459
458
|
# === Oracle Import Operations ===
|
|
460
459
|
import_oracle = import_oracle
|
|
461
460
|
|
|
@@ -465,12 +464,12 @@ class Study:
|
|
|
465
464
|
update_parameters = update_parameters
|
|
466
465
|
get_parameters_property = get_parameters_property
|
|
467
466
|
set_parameters_property = set_parameters_property
|
|
468
|
-
|
|
467
|
+
|
|
469
468
|
# === Private/Internal Methods ===
|
|
470
469
|
_get_consensus_uids = _get_consensus_uids
|
|
471
470
|
_get_features_uids = _get_features_uids
|
|
472
471
|
_get_samples_uids = _get_samples_uids
|
|
473
|
-
|
|
472
|
+
|
|
474
473
|
# === Default Parameters ===
|
|
475
474
|
study_defaults = study_defaults
|
|
476
475
|
align_defaults = align_defaults
|
|
@@ -506,10 +505,7 @@ class Study:
|
|
|
506
505
|
|
|
507
506
|
# Get all currently loaded modules that are part of the study package
|
|
508
507
|
for module_name in sys.modules:
|
|
509
|
-
if (
|
|
510
|
-
module_name.startswith(study_module_prefix)
|
|
511
|
-
and module_name != current_module
|
|
512
|
-
):
|
|
508
|
+
if module_name.startswith(study_module_prefix) and module_name != current_module:
|
|
513
509
|
study_modules.append(module_name)
|
|
514
510
|
|
|
515
511
|
# Add core masster modules
|
|
@@ -524,20 +520,14 @@ class Study:
|
|
|
524
520
|
sample_modules = []
|
|
525
521
|
sample_module_prefix = f"{base_modname}.sample."
|
|
526
522
|
for module_name in sys.modules:
|
|
527
|
-
if (
|
|
528
|
-
module_name.startswith(sample_module_prefix)
|
|
529
|
-
and module_name != current_module
|
|
530
|
-
):
|
|
523
|
+
if module_name.startswith(sample_module_prefix) and module_name != current_module:
|
|
531
524
|
sample_modules.append(module_name)
|
|
532
525
|
|
|
533
526
|
# Add lib submodules
|
|
534
527
|
lib_modules = []
|
|
535
528
|
lib_module_prefix = f"{base_modname}.lib."
|
|
536
529
|
for module_name in sys.modules:
|
|
537
|
-
if (
|
|
538
|
-
module_name.startswith(lib_module_prefix)
|
|
539
|
-
and module_name != current_module
|
|
540
|
-
):
|
|
530
|
+
if module_name.startswith(lib_module_prefix) and module_name != current_module:
|
|
541
531
|
lib_modules.append(module_name)
|
|
542
532
|
|
|
543
533
|
all_modules_to_reload = core_modules + sample_modules + study_modules + lib_modules
|
|
@@ -565,73 +555,66 @@ class Study:
|
|
|
565
555
|
self.logger.debug("Module reload completed")
|
|
566
556
|
except Exception as e:
|
|
567
557
|
self.logger.error(f"Failed to reload current module {current_module}: {e}")
|
|
568
|
-
|
|
558
|
+
|
|
569
559
|
def __dir__(self):
|
|
570
560
|
"""
|
|
571
|
-
Custom __dir__ implementation to hide internal methods starting with '_'
|
|
572
|
-
and backward compatibility aliases from tab completion and dir() calls,
|
|
561
|
+
Custom __dir__ implementation to hide internal methods starting with '_'
|
|
562
|
+
and backward compatibility aliases from tab completion and dir() calls,
|
|
573
563
|
while keeping them accessible to class methods.
|
|
574
|
-
|
|
564
|
+
|
|
575
565
|
Returns:
|
|
576
566
|
list: List of public attribute and method names (excluding internal and deprecated methods)
|
|
577
567
|
"""
|
|
578
568
|
# Define backward compatibility aliases to hide
|
|
579
569
|
backward_compatibility_aliases = {
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
570
|
+
"add_folder", # alias for add
|
|
571
|
+
"find_consensus", # alias for merge
|
|
572
|
+
"integrate_chrom", # alias for integrate
|
|
573
|
+
"fill_chrom", # alias for fill
|
|
574
|
+
"filter_consensus", # alias for consensus_filter
|
|
575
|
+
"select_consensus", # alias for consensus_select
|
|
576
|
+
"filter_features", # alias for features_filter
|
|
577
|
+
"select_features", # alias for features_select
|
|
578
|
+
"consensus_find", # alias for merge
|
|
589
579
|
# Backward compatibility for renamed methods
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
580
|
+
"set_folder", # alias for set_study_folder
|
|
581
|
+
"set_source", # alias for set_samples_source
|
|
582
|
+
"sample_color", # alias for set_samples_color
|
|
583
|
+
"get_sample", # alias for get_samples
|
|
584
|
+
"load_features", # alias for _load_features
|
|
585
|
+
"store_history", # alias for update_history
|
|
586
|
+
"sample_color_reset", # alias for set_samples_color(by=None)
|
|
587
|
+
"reset_sample_color", # alias for sample_color_reset
|
|
598
588
|
}
|
|
599
|
-
|
|
589
|
+
|
|
600
590
|
# Get all attributes from the class
|
|
601
591
|
all_attrs: set[str] = set()
|
|
602
|
-
|
|
592
|
+
|
|
603
593
|
# Add attributes from the class and all its bases
|
|
604
594
|
for cls in self.__class__.__mro__:
|
|
605
595
|
all_attrs.update(cls.__dict__.keys())
|
|
606
|
-
|
|
596
|
+
|
|
607
597
|
# Add instance attributes
|
|
608
598
|
all_attrs.update(self.__dict__.keys())
|
|
609
|
-
|
|
599
|
+
|
|
610
600
|
# Filter out attributes starting with '_' (but keep special methods like __init__, __str__, etc.)
|
|
611
601
|
# Also filter out backward compatibility aliases
|
|
612
602
|
public_attrs = [
|
|
613
|
-
attr for attr in all_attrs
|
|
614
|
-
if not attr.startswith('_') or attr.startswith('__') and attr.endswith('__')
|
|
603
|
+
attr for attr in all_attrs if not attr.startswith("_") or attr.startswith("__") and attr.endswith("__")
|
|
615
604
|
]
|
|
616
|
-
|
|
605
|
+
|
|
617
606
|
# Remove backward compatibility aliases from the public attributes
|
|
618
607
|
public_attrs = [attr for attr in public_attrs if attr not in backward_compatibility_aliases]
|
|
619
|
-
|
|
608
|
+
|
|
620
609
|
return sorted(public_attrs)
|
|
621
610
|
|
|
622
611
|
def __str__(self):
|
|
623
612
|
"""
|
|
624
613
|
Return a short summary string with number of samples and consensus features.
|
|
625
614
|
"""
|
|
626
|
-
samples = (
|
|
627
|
-
len(self.samples_df)
|
|
628
|
-
if (self.samples_df is not None and not self.samples_df.is_empty())
|
|
629
|
-
else 0
|
|
630
|
-
)
|
|
615
|
+
samples = len(self.samples_df) if (self.samples_df is not None and not self.samples_df.is_empty()) else 0
|
|
631
616
|
consensus = (
|
|
632
|
-
len(self.consensus_df)
|
|
633
|
-
if (self.consensus_df is not None and not self.consensus_df.is_empty())
|
|
634
|
-
else 0
|
|
617
|
+
len(self.consensus_df) if (self.consensus_df is not None and not self.consensus_df.is_empty()) else 0
|
|
635
618
|
)
|
|
636
619
|
return f"{samples} samples, {consensus} consensus"
|
|
637
620
|
|
|
@@ -699,8 +682,7 @@ class Study:
|
|
|
699
682
|
max_samples = 0
|
|
700
683
|
|
|
701
684
|
# Count only features where 'filled' == False
|
|
702
|
-
if
|
|
703
|
-
"filled" in self.features_df.columns):
|
|
685
|
+
if self.features_df is not None and not self.features_df.is_empty() and "filled" in self.features_df.columns:
|
|
704
686
|
unfilled_features_count = self.features_df.filter(
|
|
705
687
|
~self.features_df["filled"],
|
|
706
688
|
).height
|
|
@@ -708,8 +690,12 @@ class Study:
|
|
|
708
690
|
unfilled_features_count = 0
|
|
709
691
|
|
|
710
692
|
# Calculate features in consensus vs not in consensus (only for unfilled features)
|
|
711
|
-
if (
|
|
712
|
-
self.
|
|
693
|
+
if (
|
|
694
|
+
self.features_df is not None
|
|
695
|
+
and not self.features_df.is_empty()
|
|
696
|
+
and self.consensus_mapping_df is not None
|
|
697
|
+
and not self.consensus_mapping_df.is_empty()
|
|
698
|
+
):
|
|
713
699
|
# Get unfilled features only
|
|
714
700
|
unfilled_features = (
|
|
715
701
|
self.features_df.filter(~self.features_df["filled"])
|
|
@@ -730,17 +716,13 @@ class Study:
|
|
|
730
716
|
unfilled_features = unfilled_features.with_columns(
|
|
731
717
|
pl.col("feature_uid").cast(pl.Int64),
|
|
732
718
|
)
|
|
733
|
-
consensus_feature_uids = [
|
|
734
|
-
int(uid) for uid in consensus_feature_uids
|
|
735
|
-
]
|
|
719
|
+
consensus_feature_uids = [int(uid) for uid in consensus_feature_uids]
|
|
736
720
|
except Exception:
|
|
737
721
|
# If casting fails, ensure both are strings
|
|
738
722
|
unfilled_features = unfilled_features.with_columns(
|
|
739
723
|
pl.col("feature_uid").cast(pl.Utf8),
|
|
740
724
|
)
|
|
741
|
-
consensus_feature_uids = [
|
|
742
|
-
str(uid) for uid in consensus_feature_uids
|
|
743
|
-
]
|
|
725
|
+
consensus_feature_uids = [str(uid) for uid in consensus_feature_uids]
|
|
744
726
|
|
|
745
727
|
# Count unfilled features that are in consensus
|
|
746
728
|
in_consensus_count = unfilled_features.filter(
|
|
@@ -749,12 +731,8 @@ class Study:
|
|
|
749
731
|
|
|
750
732
|
# Calculate ratios that sum to 100%
|
|
751
733
|
total_unfilled = unfilled_features.height
|
|
752
|
-
ratio_in_consensus_to_total = (
|
|
753
|
-
|
|
754
|
-
)
|
|
755
|
-
ratio_not_in_consensus_to_total = (
|
|
756
|
-
100 - ratio_in_consensus_to_total if total_unfilled > 0 else 0
|
|
757
|
-
)
|
|
734
|
+
ratio_in_consensus_to_total = (in_consensus_count / total_unfilled * 100) if total_unfilled > 0 else 0
|
|
735
|
+
ratio_not_in_consensus_to_total = 100 - ratio_in_consensus_to_total if total_unfilled > 0 else 0
|
|
758
736
|
else:
|
|
759
737
|
ratio_in_consensus_to_total = 0
|
|
760
738
|
ratio_not_in_consensus_to_total = 0
|
|
@@ -789,8 +767,7 @@ class Study:
|
|
|
789
767
|
)
|
|
790
768
|
|
|
791
769
|
# Use more efficient counting - count non-null chroms only for features in consensus mapping
|
|
792
|
-
if
|
|
793
|
-
not self.consensus_mapping_df.is_empty()):
|
|
770
|
+
if self.consensus_mapping_df is not None and not self.consensus_mapping_df.is_empty():
|
|
794
771
|
non_null_chroms = (
|
|
795
772
|
self.features_df.join(
|
|
796
773
|
self.consensus_mapping_df.select("feature_uid"),
|
|
@@ -805,9 +782,7 @@ class Study:
|
|
|
805
782
|
else:
|
|
806
783
|
non_null_chroms = 0
|
|
807
784
|
total_possible = samples_df_len * consensus_df_len
|
|
808
|
-
chrom_completeness =
|
|
809
|
-
non_null_chroms / total_possible if total_possible > 0 else 0
|
|
810
|
-
)
|
|
785
|
+
chrom_completeness = non_null_chroms / total_possible if total_possible > 0 else 0
|
|
811
786
|
else:
|
|
812
787
|
chrom_completeness = 0
|
|
813
788
|
|
|
@@ -831,19 +806,13 @@ class Study:
|
|
|
831
806
|
rt_spread_row = filtered.select(
|
|
832
807
|
(pl.col("rt_max") - pl.col("rt_min")).mean(),
|
|
833
808
|
).row(0)
|
|
834
|
-
rt_spread = (
|
|
835
|
-
float(rt_spread_row[0])
|
|
836
|
-
if rt_spread_row and rt_spread_row[0] is not None
|
|
837
|
-
else 0.0
|
|
838
|
-
)
|
|
809
|
+
rt_spread = float(rt_spread_row[0]) if rt_spread_row and rt_spread_row[0] is not None else 0.0
|
|
839
810
|
else:
|
|
840
811
|
rt_spread = -1.0
|
|
841
812
|
|
|
842
813
|
# Calculate percentage of consensus features with MS2
|
|
843
814
|
consensus_with_ms2_percentage = (
|
|
844
|
-
(consensus_with_ms2_count / consensus_df_len * 100)
|
|
845
|
-
if consensus_df_len > 0
|
|
846
|
-
else 0
|
|
815
|
+
(consensus_with_ms2_count / consensus_df_len * 100) if consensus_df_len > 0 else 0
|
|
847
816
|
)
|
|
848
817
|
|
|
849
818
|
# Total MS2 spectra count
|
|
@@ -865,6 +834,7 @@ class Study:
|
|
|
865
834
|
if consensus_df_len > 0:
|
|
866
835
|
try:
|
|
867
836
|
from masster.study.merge import _count_tight_clusters
|
|
837
|
+
|
|
868
838
|
tight_clusters_count = _count_tight_clusters(self, mz_tol=0.04, rt_tol=0.3)
|
|
869
839
|
except Exception:
|
|
870
840
|
# If tight clusters calculation fails, just use 0
|
|
@@ -874,25 +844,13 @@ class Study:
|
|
|
874
844
|
consensus_warning = f" {_WARNING_SYMBOL}" if consensus_df_len < 50 else ""
|
|
875
845
|
|
|
876
846
|
rt_spread_text = "N/A" if rt_spread < 0 else f"{rt_spread:.3f}s"
|
|
877
|
-
rt_spread_warning = (
|
|
878
|
-
f" {_WARNING_SYMBOL}"
|
|
879
|
-
if rt_spread >= 0 and (rt_spread > 5 or rt_spread < 0.1)
|
|
880
|
-
else ""
|
|
881
|
-
)
|
|
847
|
+
rt_spread_warning = f" {_WARNING_SYMBOL}" if rt_spread >= 0 and (rt_spread > 5 or rt_spread < 0.1) else ""
|
|
882
848
|
|
|
883
849
|
chrom_completeness_pct = chrom_completeness * 100
|
|
884
|
-
chrom_warning =
|
|
885
|
-
f" {_WARNING_SYMBOL}"
|
|
886
|
-
if chrom_completeness_pct < 10 and chrom_completeness_pct >= 0
|
|
887
|
-
else ""
|
|
888
|
-
)
|
|
850
|
+
chrom_warning = f" {_WARNING_SYMBOL}" if chrom_completeness_pct < 10 and chrom_completeness_pct >= 0 else ""
|
|
889
851
|
|
|
890
852
|
max_samples_warning = ""
|
|
891
|
-
if (
|
|
892
|
-
isinstance(max_samples, (int, float))
|
|
893
|
-
and samples_df_len > 0
|
|
894
|
-
and max_samples > 0
|
|
895
|
-
):
|
|
853
|
+
if isinstance(max_samples, (int, float)) and samples_df_len > 0 and max_samples > 0:
|
|
896
854
|
if max_samples < samples_df_len / 3.0:
|
|
897
855
|
max_samples_warning = f" {_WARNING_SYMBOL}"
|
|
898
856
|
elif max_samples < samples_df_len * 0.8:
|
|
@@ -923,5 +881,6 @@ class Study:
|
|
|
923
881
|
|
|
924
882
|
print(summary)
|
|
925
883
|
|
|
926
|
-
|
|
884
|
+
|
|
885
|
+
if __name__ == "__main__":
|
|
927
886
|
pass
|