masster 0.3.10__py3-none-any.whl → 0.3.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of masster might be problematic. Click here for more details.
- masster/docs/SCX_API_Documentation.md +0 -0
- masster/docs/SCX_DLL_Analysis.md +0 -0
- masster/logger.py +92 -78
- masster/sample/defaults/find_features_def.py +16 -6
- masster/sample/defaults/sample_def.py +1 -1
- masster/sample/h5.py +2 -2
- masster/sample/helpers.py +190 -140
- masster/sample/load.py +13 -9
- masster/sample/plot.py +256 -147
- masster/sample/processing.py +18 -12
- masster/sample/sample.py +10 -4
- masster/sample/sample5_schema.json +38 -29
- masster/sample/save.py +16 -13
- masster/sample/sciex.py +187 -176
- masster/study/defaults/align_def.py +231 -13
- masster/study/defaults/fill_chrom_def.py +1 -5
- masster/study/defaults/integrate_chrom_def.py +1 -5
- masster/study/defaults/study_def.py +2 -2
- masster/study/export.py +144 -131
- masster/study/h5.py +193 -133
- masster/study/helpers.py +757 -246
- masster/study/helpers_optimized.py +99 -57
- masster/study/load.py +57 -25
- masster/study/plot.py +1244 -129
- masster/study/processing.py +194 -86
- masster/study/save.py +7 -7
- masster/study/study.py +154 -89
- masster/study/study5_schema.json +15 -15
- {masster-0.3.10.dist-info → masster-0.3.12.dist-info}/METADATA +1 -1
- {masster-0.3.10.dist-info → masster-0.3.12.dist-info}/RECORD +33 -31
- {masster-0.3.10.dist-info → masster-0.3.12.dist-info}/WHEEL +0 -0
- {masster-0.3.10.dist-info → masster-0.3.12.dist-info}/entry_points.txt +0 -0
- {masster-0.3.10.dist-info → masster-0.3.12.dist-info}/licenses/LICENSE +0 -0
masster/study/study.py
CHANGED
|
@@ -50,6 +50,7 @@ import os
|
|
|
50
50
|
import sys
|
|
51
51
|
|
|
52
52
|
import polars as pl
|
|
53
|
+
import numpy as np
|
|
53
54
|
|
|
54
55
|
# Study-specific imports
|
|
55
56
|
from masster.study.h5 import _load_study5
|
|
@@ -66,6 +67,7 @@ from masster.study.helpers import restore_features
|
|
|
66
67
|
from masster.study.helpers import restore_chrom
|
|
67
68
|
from masster.study.helpers import fill_reset
|
|
68
69
|
from masster.study.helpers import get_chrom
|
|
70
|
+
from masster.study.helpers import get_sample
|
|
69
71
|
from masster.study.helpers import get_consensus
|
|
70
72
|
from masster.study.helpers import get_consensus_matches
|
|
71
73
|
from masster.study.helpers import get_consensus_matrix
|
|
@@ -75,6 +77,8 @@ from masster.study.helpers import get_gaps_stats
|
|
|
75
77
|
from masster.study.helpers import align_reset
|
|
76
78
|
from masster.study.helpers import set_folder
|
|
77
79
|
from masster.study.helpers import set_source
|
|
80
|
+
from masster.study.helpers import name_replace
|
|
81
|
+
from masster.study.helpers import name_reset
|
|
78
82
|
from masster.study.helpers import features_select
|
|
79
83
|
from masster.study.helpers import features_filter
|
|
80
84
|
from masster.study.helpers import features_delete
|
|
@@ -92,12 +96,16 @@ from masster.study.load import _load_consensusXML
|
|
|
92
96
|
from masster.study.load import load_features
|
|
93
97
|
from masster.study.load import sanitize
|
|
94
98
|
from masster.study.plot import plot_alignment
|
|
95
|
-
from masster.study.plot import plot_alignment_bokeh
|
|
96
|
-
from masster.study.plot import plot_chrom
|
|
97
99
|
from masster.study.plot import plot_consensus_2d
|
|
98
100
|
from masster.study.plot import plot_samples_2d
|
|
101
|
+
from masster.study.plot import plot_consensus_stats
|
|
102
|
+
from masster.study.plot import plot_chrom
|
|
103
|
+
from masster.study.plot import plot_pca
|
|
104
|
+
from masster.study.plot import plot_bpc
|
|
105
|
+
from masster.study.plot import plot_tic
|
|
106
|
+
from masster.study.plot import plot_eic
|
|
107
|
+
from masster.study.plot import plot_rt_correction
|
|
99
108
|
from masster.study.processing import align
|
|
100
|
-
from masster.study.processing import filter_consensus
|
|
101
109
|
from masster.study.processing import merge
|
|
102
110
|
from masster.study.processing import integrate
|
|
103
111
|
from masster.study.processing import find_ms2
|
|
@@ -170,7 +178,7 @@ class Study:
|
|
|
170
178
|
- `ddafile`: For individual sample processing before study-level analysis.
|
|
171
179
|
- `StudyParameters`: For configuring study-specific parameters.
|
|
172
180
|
"""
|
|
173
|
-
|
|
181
|
+
|
|
174
182
|
# Defaults class attributes
|
|
175
183
|
study_defaults = study_defaults
|
|
176
184
|
sample_defaults = sample_defaults
|
|
@@ -219,15 +227,15 @@ class Study:
|
|
|
219
227
|
# Handle filename parameter for automatic loading
|
|
220
228
|
auto_load_filename = None
|
|
221
229
|
if filename is not None:
|
|
222
|
-
if not filename.endswith(
|
|
230
|
+
if not filename.endswith(".study5"):
|
|
223
231
|
raise ValueError("filename must be a .study5 file")
|
|
224
232
|
if not os.path.exists(filename):
|
|
225
233
|
raise FileNotFoundError(f"Study file not found: {filename}")
|
|
226
|
-
|
|
234
|
+
|
|
227
235
|
# Set folder to the directory containing the file if not already specified
|
|
228
|
-
if
|
|
229
|
-
kwargs[
|
|
230
|
-
|
|
236
|
+
if "folder" not in kwargs:
|
|
237
|
+
kwargs["folder"] = os.path.dirname(os.path.abspath(filename))
|
|
238
|
+
|
|
231
239
|
auto_load_filename = filename
|
|
232
240
|
|
|
233
241
|
# Check if a study_defaults instance was passed
|
|
@@ -257,7 +265,7 @@ class Study:
|
|
|
257
265
|
self.log_level = params.log_level.upper() if params.log_level else "INFO"
|
|
258
266
|
self.log_label = params.log_label + " | " if params.log_label else ""
|
|
259
267
|
self.log_sink = params.log_sink
|
|
260
|
-
|
|
268
|
+
|
|
261
269
|
if self.folder is not None and not os.path.exists(self.folder):
|
|
262
270
|
# create the folder if it does not exist
|
|
263
271
|
os.makedirs(self.folder)
|
|
@@ -308,80 +316,91 @@ class Study:
|
|
|
308
316
|
if auto_load_filename is not None:
|
|
309
317
|
self.load(filename=auto_load_filename)
|
|
310
318
|
|
|
311
|
-
|
|
319
|
+
# cache for Sample instances created/loaded by this Study
|
|
320
|
+
self._samples_cache = {}
|
|
321
|
+
|
|
312
322
|
|
|
313
|
-
|
|
323
|
+
|
|
324
|
+
# Attach module functions as class methods
|
|
314
325
|
load = load
|
|
315
|
-
save = save
|
|
316
|
-
save_consensus = save_consensus
|
|
317
|
-
save_samples = save_samples
|
|
318
|
-
align = align
|
|
319
|
-
fill_single = fill_single
|
|
326
|
+
save = save
|
|
327
|
+
save_consensus = save_consensus
|
|
328
|
+
save_samples = save_samples
|
|
329
|
+
align = align
|
|
330
|
+
fill_single = fill_single
|
|
320
331
|
fill_chrom_single = fill_single # Backward compatibility alias
|
|
321
|
-
merge = merge
|
|
332
|
+
merge = merge
|
|
322
333
|
find_consensus = merge # Backward compatibility alias
|
|
323
|
-
find_ms2 = find_ms2
|
|
334
|
+
find_ms2 = find_ms2
|
|
324
335
|
integrate = integrate
|
|
325
336
|
integrate_chrom = integrate # Backward compatibility alias
|
|
326
|
-
store_history = store_history
|
|
327
|
-
get_parameters = get_parameters
|
|
328
|
-
update_parameters = update_parameters
|
|
329
|
-
get_parameters_property = get_parameters_property
|
|
330
|
-
set_parameters_property = set_parameters_property
|
|
331
|
-
plot_alignment = plot_alignment
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
337
|
+
store_history = store_history
|
|
338
|
+
get_parameters = get_parameters
|
|
339
|
+
update_parameters = update_parameters
|
|
340
|
+
get_parameters_property = get_parameters_property
|
|
341
|
+
set_parameters_property = set_parameters_property
|
|
342
|
+
plot_alignment = plot_alignment
|
|
343
|
+
plot_chrom = plot_chrom
|
|
344
|
+
plot_consensus_2d = plot_consensus_2d
|
|
345
|
+
plot_consensus_stats = plot_consensus_stats
|
|
346
|
+
plot_pca = plot_pca
|
|
347
|
+
plot_samples_2d = plot_samples_2d
|
|
348
|
+
plot_bpc = plot_bpc
|
|
349
|
+
plot_rt_correction = plot_rt_correction
|
|
350
|
+
plot_tic = plot_tic
|
|
351
|
+
plot_eic = plot_eic
|
|
352
|
+
get_consensus = get_consensus
|
|
353
|
+
get_chrom = get_chrom
|
|
354
|
+
get_sample = get_sample
|
|
355
|
+
get_consensus_matches = get_consensus_matches
|
|
356
|
+
compress = compress
|
|
340
357
|
compress_features = compress_features
|
|
341
358
|
compress_ms2 = compress_ms2
|
|
342
359
|
compress_chrom = compress_chrom
|
|
343
|
-
restore_features = restore_features
|
|
360
|
+
restore_features = restore_features
|
|
344
361
|
restore_chrom = restore_chrom
|
|
345
|
-
fill_reset = fill_reset
|
|
346
|
-
align_reset = align_reset
|
|
362
|
+
fill_reset = fill_reset
|
|
363
|
+
align_reset = align_reset
|
|
347
364
|
set_source = set_source
|
|
365
|
+
name_replace = name_replace
|
|
366
|
+
name_reset = name_reset
|
|
348
367
|
features_select = features_select
|
|
349
368
|
features_filter = features_filter
|
|
350
369
|
features_delete = features_delete
|
|
351
370
|
consensus_select = consensus_select
|
|
352
371
|
consensus_filter = consensus_filter
|
|
353
372
|
consensus_delete = consensus_delete
|
|
354
|
-
filter_consensus = consensus_filter
|
|
373
|
+
filter_consensus = consensus_filter
|
|
355
374
|
select_consensus = consensus_select
|
|
356
375
|
filter_features = features_filter
|
|
357
376
|
select_features = features_select
|
|
358
377
|
consensus_find = merge
|
|
359
|
-
filter_features = features_filter
|
|
378
|
+
filter_features = features_filter
|
|
360
379
|
|
|
361
380
|
# Additional method assignments for all imported functions
|
|
362
|
-
add_folder = add
|
|
363
|
-
add = add
|
|
381
|
+
add_folder = add # backward compatibility alias
|
|
382
|
+
add = add
|
|
364
383
|
add_sample = add_sample
|
|
365
|
-
_load_study5 = _load_study5
|
|
366
|
-
_save_study5 = _save_study5
|
|
367
|
-
_save_study5_compressed = _save_study5_compressed
|
|
368
|
-
_get_consensus_uids = _get_consensus_uids
|
|
369
|
-
_get_feature_uids = _get_feature_uids
|
|
370
|
-
_get_sample_uids = _get_sample_uids
|
|
371
|
-
get_consensus_matrix = get_consensus_matrix
|
|
372
|
-
get_gaps_matrix = get_gaps_matrix
|
|
373
|
-
get_gaps_stats = get_gaps_stats
|
|
384
|
+
_load_study5 = _load_study5
|
|
385
|
+
_save_study5 = _save_study5
|
|
386
|
+
_save_study5_compressed = _save_study5_compressed
|
|
387
|
+
_get_consensus_uids = _get_consensus_uids
|
|
388
|
+
_get_feature_uids = _get_feature_uids
|
|
389
|
+
_get_sample_uids = _get_sample_uids
|
|
390
|
+
get_consensus_matrix = get_consensus_matrix
|
|
391
|
+
get_gaps_matrix = get_gaps_matrix
|
|
392
|
+
get_gaps_stats = get_gaps_stats
|
|
374
393
|
get_orphans = get_orphans
|
|
375
|
-
set_folder = set_folder
|
|
376
|
-
fill = fill
|
|
394
|
+
set_folder = set_folder
|
|
395
|
+
fill = fill
|
|
377
396
|
fill_chrom = fill # Backward compatibility alias
|
|
378
|
-
_process_sample_for_parallel_fill = _process_sample_for_parallel_fill
|
|
379
|
-
_get_missing_consensus_sample_combinations = _get_missing_consensus_sample_combinations
|
|
380
|
-
_load_consensusXML = _load_consensusXML
|
|
381
|
-
load_features = load_features
|
|
382
|
-
sanitize = sanitize
|
|
383
|
-
_save_consensusXML = _save_consensusXML
|
|
384
|
-
export_mgf = export_mgf
|
|
397
|
+
_process_sample_for_parallel_fill = _process_sample_for_parallel_fill
|
|
398
|
+
_get_missing_consensus_sample_combinations = _get_missing_consensus_sample_combinations
|
|
399
|
+
_load_consensusXML = _load_consensusXML
|
|
400
|
+
load_features = load_features
|
|
401
|
+
sanitize = sanitize
|
|
402
|
+
_save_consensusXML = _save_consensusXML
|
|
403
|
+
export_mgf = export_mgf
|
|
385
404
|
export_mztab = export_mztab
|
|
386
405
|
_get_mgf_df = _get_mgf_df # New function for MGF data extraction
|
|
387
406
|
|
|
@@ -404,13 +423,13 @@ class Study:
|
|
|
404
423
|
This ensures that the instance uses the latest implementation without restarting the interpreter.
|
|
405
424
|
"""
|
|
406
425
|
# Reset logger configuration flags to allow proper reconfiguration after reload
|
|
407
|
-
|
|
426
|
+
""" try:
|
|
408
427
|
import masster.sample.logger as logger_module
|
|
409
428
|
|
|
410
429
|
if hasattr(logger_module, "_STUDY_LOGGER_CONFIGURED"):
|
|
411
430
|
logger_module._STUDY_LOGGER_CONFIGURED = False
|
|
412
431
|
except Exception:
|
|
413
|
-
pass
|
|
432
|
+
pass"""
|
|
414
433
|
|
|
415
434
|
# Get the base module name (masster)
|
|
416
435
|
base_modname = self.__class__.__module__.split(".")[0]
|
|
@@ -508,7 +527,7 @@ class Study:
|
|
|
508
527
|
# Cache DataFrame lengths and existence checks
|
|
509
528
|
consensus_df_len = len(self.consensus_df) if not self.consensus_df.is_empty() else 0
|
|
510
529
|
samples_df_len = len(self.samples_df) if not self.samples_df.is_empty() else 0
|
|
511
|
-
|
|
530
|
+
|
|
512
531
|
# Calculate consensus statistics only if consensus_df exists and has data
|
|
513
532
|
if consensus_df_len > 0:
|
|
514
533
|
# Execute the aggregation once
|
|
@@ -525,40 +544,44 @@ class Study:
|
|
|
525
544
|
min_samples = 0
|
|
526
545
|
mean_samples = 0
|
|
527
546
|
max_samples = 0
|
|
528
|
-
|
|
547
|
+
|
|
529
548
|
# Count only features where 'filled' == False
|
|
530
|
-
if not self.features_df.is_empty() and
|
|
531
|
-
unfilled_features_count = self.features_df.filter(~self.features_df[
|
|
549
|
+
if not self.features_df.is_empty() and "filled" in self.features_df.columns:
|
|
550
|
+
unfilled_features_count = self.features_df.filter(~self.features_df["filled"]).height
|
|
532
551
|
else:
|
|
533
552
|
unfilled_features_count = 0
|
|
534
553
|
|
|
535
554
|
# Calculate features in consensus vs not in consensus (only for unfilled features)
|
|
536
555
|
if not self.features_df.is_empty() and not self.consensus_mapping_df.is_empty():
|
|
537
556
|
# Get unfilled features only
|
|
538
|
-
unfilled_features =
|
|
539
|
-
|
|
557
|
+
unfilled_features = (
|
|
558
|
+
self.features_df.filter(~self.features_df["filled"])
|
|
559
|
+
if "filled" in self.features_df.columns
|
|
560
|
+
else self.features_df
|
|
561
|
+
)
|
|
562
|
+
|
|
540
563
|
# Ensure the column and list have matching data types
|
|
541
|
-
consensus_feature_uids = self.consensus_mapping_df[
|
|
542
|
-
|
|
564
|
+
consensus_feature_uids = self.consensus_mapping_df["feature_uid"].to_list()
|
|
565
|
+
|
|
543
566
|
# Check if we need to cast either side to match types
|
|
544
|
-
unfilled_dtype = unfilled_features[
|
|
545
|
-
consensus_dtype = self.consensus_mapping_df[
|
|
546
|
-
|
|
567
|
+
unfilled_dtype = unfilled_features["feature_uid"].dtype
|
|
568
|
+
consensus_dtype = self.consensus_mapping_df["feature_uid"].dtype
|
|
569
|
+
|
|
547
570
|
if unfilled_dtype != consensus_dtype:
|
|
548
571
|
# Cast both to Int64 if possible, otherwise keep as string
|
|
549
572
|
try:
|
|
550
|
-
unfilled_features = unfilled_features.with_columns(pl.col(
|
|
573
|
+
unfilled_features = unfilled_features.with_columns(pl.col("feature_uid").cast(pl.Int64))
|
|
551
574
|
consensus_feature_uids = [int(uid) for uid in consensus_feature_uids]
|
|
552
575
|
except Exception:
|
|
553
576
|
# If casting fails, ensure both are strings
|
|
554
|
-
unfilled_features = unfilled_features.with_columns(pl.col(
|
|
577
|
+
unfilled_features = unfilled_features.with_columns(pl.col("feature_uid").cast(pl.Utf8))
|
|
555
578
|
consensus_feature_uids = [str(uid) for uid in consensus_feature_uids]
|
|
556
579
|
|
|
557
580
|
# Count unfilled features that are in consensus
|
|
558
581
|
in_consensus_count = unfilled_features.filter(
|
|
559
|
-
pl.col(
|
|
582
|
+
pl.col("feature_uid").is_in(consensus_feature_uids),
|
|
560
583
|
).height
|
|
561
|
-
|
|
584
|
+
|
|
562
585
|
# Calculate ratios that sum to 100%
|
|
563
586
|
total_unfilled = unfilled_features.height
|
|
564
587
|
ratio_in_consensus_to_total = (in_consensus_count / total_unfilled * 100) if total_unfilled > 0 else 0
|
|
@@ -572,16 +595,20 @@ class Study:
|
|
|
572
595
|
# Ensure matching data types for join keys
|
|
573
596
|
features_dtype = self.features_df["feature_uid"].dtype
|
|
574
597
|
consensus_dtype = self.consensus_mapping_df["feature_uid"].dtype
|
|
575
|
-
|
|
598
|
+
|
|
576
599
|
if features_dtype != consensus_dtype:
|
|
577
600
|
# Try to cast both to Int64, fallback to string if needed
|
|
578
601
|
try:
|
|
579
602
|
self.features_df = self.features_df.with_columns(pl.col("feature_uid").cast(pl.Int64))
|
|
580
|
-
self.consensus_mapping_df = self.consensus_mapping_df.with_columns(
|
|
603
|
+
self.consensus_mapping_df = self.consensus_mapping_df.with_columns(
|
|
604
|
+
pl.col("feature_uid").cast(pl.Int64)
|
|
605
|
+
)
|
|
581
606
|
except Exception:
|
|
582
607
|
# If casting to Int64 fails, cast both to string
|
|
583
608
|
self.features_df = self.features_df.with_columns(pl.col("feature_uid").cast(pl.Utf8))
|
|
584
|
-
self.consensus_mapping_df = self.consensus_mapping_df.with_columns(
|
|
609
|
+
self.consensus_mapping_df = self.consensus_mapping_df.with_columns(
|
|
610
|
+
pl.col("feature_uid").cast(pl.Utf8)
|
|
611
|
+
)
|
|
585
612
|
|
|
586
613
|
# Use more efficient counting - count non-null chroms only for features in consensus mapping
|
|
587
614
|
if not self.consensus_mapping_df.is_empty():
|
|
@@ -599,21 +626,33 @@ class Study:
|
|
|
599
626
|
else:
|
|
600
627
|
non_null_chroms = 0
|
|
601
628
|
total_possible = samples_df_len * consensus_df_len
|
|
602
|
-
chrom_completeness =
|
|
603
|
-
non_null_chroms / total_possible if total_possible > 0 else 0
|
|
604
|
-
)
|
|
629
|
+
chrom_completeness = non_null_chroms / total_possible if total_possible > 0 else 0
|
|
605
630
|
else:
|
|
606
631
|
chrom_completeness = 0
|
|
607
|
-
|
|
632
|
+
|
|
608
633
|
# Calculate consensus features with MS2 (count unique consensus_uids with MS2)
|
|
609
634
|
if not self.consensus_ms2.is_empty():
|
|
610
635
|
consensus_with_ms2_count = self.consensus_ms2["consensus_uid"].n_unique()
|
|
611
636
|
else:
|
|
612
637
|
consensus_with_ms2_count = 0
|
|
613
|
-
|
|
638
|
+
|
|
639
|
+
if not self.consensus_df.is_empty():
|
|
640
|
+
# Compute RT spread using only consensus rows with number_samples >= half the number of samples
|
|
641
|
+
threshold = len(self.samples_df) / 2 if not self.samples_df.is_empty() else 0
|
|
642
|
+
filtered = self.consensus_df.filter(pl.col("number_samples") >= threshold)
|
|
643
|
+
if filtered.is_empty():
|
|
644
|
+
rt_spread = -1.0
|
|
645
|
+
else:
|
|
646
|
+
rt_spread_row = filtered.select((pl.col("rt_max") - pl.col("rt_min")).mean()).row(0)
|
|
647
|
+
rt_spread = float(rt_spread_row[0]) if rt_spread_row and rt_spread_row[0] is not None else 0.0
|
|
648
|
+
else:
|
|
649
|
+
rt_spread = -1.0
|
|
650
|
+
|
|
614
651
|
# Calculate percentage of consensus features with MS2
|
|
615
|
-
consensus_with_ms2_percentage = (
|
|
616
|
-
|
|
652
|
+
consensus_with_ms2_percentage = (
|
|
653
|
+
(consensus_with_ms2_count / consensus_df_len * 100) if consensus_df_len > 0 else 0
|
|
654
|
+
)
|
|
655
|
+
|
|
617
656
|
# Total MS2 spectra count
|
|
618
657
|
total_ms2_count = len(self.consensus_ms2) if not self.consensus_ms2.is_empty() else 0
|
|
619
658
|
|
|
@@ -632,19 +671,45 @@ class Study:
|
|
|
632
671
|
f"Samples: {samples_df_len}\n"
|
|
633
672
|
f"Features: {unfilled_features_count}\n"
|
|
634
673
|
f"- in consensus: {ratio_in_consensus_to_total:.0f}%\n"
|
|
635
|
-
f"- not in consensus: {ratio_not_in_consensus_to_total:.0f}%\n"
|
|
674
|
+
f"- not in consensus: {ratio_not_in_consensus_to_total:.0f}%\n"
|
|
636
675
|
f"Consensus: {consensus_df_len}\n"
|
|
676
|
+
f"- RT spread: {rt_spread:.3f}s\n"
|
|
637
677
|
f"- Min samples count: {min_samples:.0f}\n"
|
|
638
678
|
f"- Mean samples count: {mean_samples:.0f}\n"
|
|
639
|
-
f"- Max samples count: {max_samples:.0f}\n"
|
|
679
|
+
f"- Max samples count: {max_samples:.0f}\n"
|
|
640
680
|
f"- with MS2: {consensus_with_ms2_percentage:.0f}%\n"
|
|
641
681
|
f"- total MS2: {total_ms2_count}\n"
|
|
642
|
-
f"Chrom completeness: {chrom_completeness*100:.0f}%\n"
|
|
643
|
-
f"Memory usage: {memory_usage / (1024
|
|
682
|
+
f"Chrom completeness: {chrom_completeness * 100:.0f}%\n"
|
|
683
|
+
f"Memory usage: {memory_usage / (1024**2):.2f} MB\n"
|
|
644
684
|
)
|
|
645
685
|
|
|
646
686
|
print(summary)
|
|
647
687
|
|
|
688
|
+
def _ensure_features_df_schema_order(self):
|
|
689
|
+
"""
|
|
690
|
+
Ensure features_df columns are ordered according to study5_schema.json.
|
|
691
|
+
|
|
692
|
+
This method should be called after operations that might scramble the column order.
|
|
693
|
+
"""
|
|
694
|
+
if self.features_df is None or self.features_df.is_empty():
|
|
695
|
+
return
|
|
696
|
+
|
|
697
|
+
try:
|
|
698
|
+
import os
|
|
699
|
+
import json
|
|
700
|
+
from masster.study.h5 import _reorder_columns_by_schema
|
|
701
|
+
|
|
702
|
+
# Load schema
|
|
703
|
+
schema_path = os.path.join(os.path.dirname(__file__), "study5_schema.json")
|
|
704
|
+
with open(schema_path, 'r') as f:
|
|
705
|
+
schema = json.load(f)
|
|
706
|
+
|
|
707
|
+
# Reorder columns to match schema
|
|
708
|
+
self.features_df = _reorder_columns_by_schema(self.features_df, schema, 'features_df')
|
|
709
|
+
|
|
710
|
+
except Exception as e:
|
|
711
|
+
self.logger.warning(f"Failed to reorder features_df columns: {e}")
|
|
712
|
+
|
|
648
713
|
|
|
649
714
|
if __name__ == "__main__":
|
|
650
715
|
# This block is executed when the script is run directly
|
masster/study/study5_schema.json
CHANGED
|
@@ -136,12 +136,12 @@
|
|
|
136
136
|
"feature_uid": {
|
|
137
137
|
"dtype": "pl.Int64"
|
|
138
138
|
},
|
|
139
|
-
"sample_uid": {
|
|
140
|
-
"dtype": "pl.Int32"
|
|
141
|
-
},
|
|
142
139
|
"feature_id": {
|
|
143
140
|
"dtype": "pl.Utf8"
|
|
144
141
|
},
|
|
142
|
+
"sample_uid": {
|
|
143
|
+
"dtype": "pl.Int32"
|
|
144
|
+
},
|
|
145
145
|
"mz": {
|
|
146
146
|
"dtype": "pl.Float64"
|
|
147
147
|
},
|
|
@@ -181,24 +181,24 @@
|
|
|
181
181
|
"iso_of": {
|
|
182
182
|
"dtype": "pl.Int64"
|
|
183
183
|
},
|
|
184
|
-
"adduct_group": {
|
|
185
|
-
"dtype": "pl.Int64"
|
|
186
|
-
},
|
|
187
184
|
"adduct": {
|
|
188
185
|
"dtype": "pl.Utf8"
|
|
189
186
|
},
|
|
190
187
|
"adduct_mass": {
|
|
191
188
|
"dtype": "pl.Float64"
|
|
192
189
|
},
|
|
190
|
+
"adduct_group": {
|
|
191
|
+
"dtype": "pl.Int64"
|
|
192
|
+
},
|
|
193
|
+
"chrom": {
|
|
194
|
+
"dtype": "pl.Object"
|
|
195
|
+
},
|
|
193
196
|
"filled": {
|
|
194
197
|
"dtype": "pl.Boolean"
|
|
195
198
|
},
|
|
196
199
|
"chrom_area": {
|
|
197
200
|
"dtype": "pl.Float64"
|
|
198
201
|
},
|
|
199
|
-
"chrom": {
|
|
200
|
-
"dtype": "pl.Object"
|
|
201
|
-
},
|
|
202
202
|
"chrom_coherence": {
|
|
203
203
|
"dtype": "pl.Float64"
|
|
204
204
|
},
|
|
@@ -221,24 +221,24 @@
|
|
|
221
221
|
},
|
|
222
222
|
"samples_df": {
|
|
223
223
|
"columns": {
|
|
224
|
+
"sample_uid": {
|
|
225
|
+
"dtype": "pl.Int64"
|
|
226
|
+
},
|
|
224
227
|
"map_id": {
|
|
225
228
|
"dtype": "pl.Utf8"
|
|
226
229
|
},
|
|
227
|
-
"
|
|
230
|
+
"file_source": {
|
|
228
231
|
"dtype": "pl.Utf8"
|
|
229
232
|
},
|
|
230
|
-
"
|
|
233
|
+
"sample_name": {
|
|
231
234
|
"dtype": "pl.Utf8"
|
|
232
235
|
},
|
|
233
|
-
"
|
|
236
|
+
"sample_path": {
|
|
234
237
|
"dtype": "pl.Utf8"
|
|
235
238
|
},
|
|
236
239
|
"sample_type": {
|
|
237
240
|
"dtype": "pl.Utf8"
|
|
238
241
|
},
|
|
239
|
-
"sample_uid": {
|
|
240
|
-
"dtype": "pl.Int64"
|
|
241
|
-
},
|
|
242
242
|
"size": {
|
|
243
243
|
"dtype": "pl.Int64"
|
|
244
244
|
},
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
masster/__init__.py,sha256=G7hbKO8F_o1wFwQlvO25M8JYGka_YSAVU2_O__2rjlI,697
|
|
2
2
|
masster/_version.py,sha256=ioQa4W_2pWdKSoU7hw7Pn6WMBm3nMuuLKfSR4f8171A,256
|
|
3
3
|
masster/chromatogram.py,sha256=NgPr1uLGJHjRu6PWZZGOrS3pCl7sye1yQCJjlRi9ZSY,19305
|
|
4
|
-
masster/logger.py,sha256=
|
|
4
|
+
masster/logger.py,sha256=W50V_uh8RSYwGxDrDFhOuj5jpu2tKJyt_16lMw9kQwA,14755
|
|
5
5
|
masster/spectrum.py,sha256=LuDa7qP_JInctzkmxC9c5468opHOholy321KpUgyW2U,47550
|
|
6
6
|
masster/data/examples/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.featureXML,sha256=033IjCWBaYVymnPhVHneytilC-XIa6T-6wkeBB0BXvc,10980374
|
|
7
7
|
masster/data/examples/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.mzML,sha256=3RS_crLN-aoPSacMYaQ45sxszmp_EcQElrg8tiuAQyA,39741920
|
|
@@ -10,50 +10,52 @@ masster/data/examples/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005m
|
|
|
10
10
|
masster/data/examples/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.wiff,sha256=go5N9gAM1rn4PZAVaoCmdteY9f7YGEM9gyPdSmkQ8PE,1447936
|
|
11
11
|
masster/data/examples/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.wiff.scan,sha256=ahi1Y3UhAj9Bj4Q2MlbgPekNdkJvMOoMXVOoR6CeIxc,13881220
|
|
12
12
|
masster/data/examples/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.wiff2,sha256=TFB0HW4Agkig6yht7FtgjUdbXax8jjKaHpSZSvuU5vs,3252224
|
|
13
|
+
masster/docs/SCX_API_Documentation.md,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
14
|
+
masster/docs/SCX_DLL_Analysis.md,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
13
15
|
masster/sample/__init__.py,sha256=HL0m1ept0PMAYUCQtDDnkdOS12IFl6oLAq4TZQz83uY,170
|
|
14
|
-
masster/sample/h5.py,sha256=
|
|
15
|
-
masster/sample/helpers.py,sha256=
|
|
16
|
+
masster/sample/h5.py,sha256=IdfbdkDgKcij-jMQTxnjW-gsBhb6vwi8w1XXL795yEs,63793
|
|
17
|
+
masster/sample/helpers.py,sha256=U2VyboRdTsQbOefCy7VXh6PlSQtEsR6BK5QF2jGUd94,36208
|
|
16
18
|
masster/sample/lib.py,sha256=l5YdU9TxEWJI0kJxXxrRCxgDDwbzO5zBf1_Qi_HY87w,33556
|
|
17
|
-
masster/sample/load.py,sha256=
|
|
19
|
+
masster/sample/load.py,sha256=5Ig94gEWBfdxR86yNwaPj9mdNvPI1nVCp0LXcFJt05U,47577
|
|
18
20
|
masster/sample/parameters.py,sha256=Gg2KcuNbV_wZ_Wwv93QlM5J19ji0oSIvZLPV1NoBmq0,4456
|
|
19
|
-
masster/sample/plot.py,sha256=
|
|
20
|
-
masster/sample/processing.py,sha256
|
|
21
|
+
masster/sample/plot.py,sha256=C8Y2DLLHw3Bo_U0MPhKMI3AKOkqj0zMUQxasRVWp26U,77964
|
|
22
|
+
masster/sample/processing.py,sha256=-H93MEUysA-B9PB4nU31WFjtaU_flqbu2gY35ce4vVs,57827
|
|
21
23
|
masster/sample/quant.py,sha256=tHNjvUFTdehKR31BXBZnVsBxMD9XJHgaltITOjr71uE,7562
|
|
22
|
-
masster/sample/sample.py,sha256=
|
|
23
|
-
masster/sample/sample5_schema.json,sha256=
|
|
24
|
-
masster/sample/save.py,sha256=
|
|
25
|
-
masster/sample/sciex.py,sha256=
|
|
24
|
+
masster/sample/sample.py,sha256=QxKjXPO5lWRrIq5eDsYzNQPjp0zI_vuPdPlRQe1y3uI,16925
|
|
25
|
+
masster/sample/sample5_schema.json,sha256=4g_uBMODLvxnhZU6iCK61W3fbaT6sTgOvBhEFc3U1nI,3772
|
|
26
|
+
masster/sample/save.py,sha256=kQULZkuhx2ED8jMlQsi5yK98dRKlQKvQf5uJDvI01ZA,31896
|
|
27
|
+
masster/sample/sciex.py,sha256=vnbxsq_qnAQVuzcpziP1o3IC4kM5amGBcPmC2TAuDLw,46319
|
|
26
28
|
masster/sample/defaults/__init__.py,sha256=A09AOP44cxD_oYohyt7XFUho0zndRcrzVD4DUaGnKH4,447
|
|
27
29
|
masster/sample/defaults/find_adducts_def.py,sha256=6CcGRlz4VeosoBT_W0bkR1Kjj11Rq3QvuuOnkizIZyk,11630
|
|
28
|
-
masster/sample/defaults/find_features_def.py,sha256=
|
|
30
|
+
masster/sample/defaults/find_features_def.py,sha256=Bcd39uav1BniwKgrsB-I1maF3ljf4Wb1f5yv0pDYfts,17745
|
|
29
31
|
masster/sample/defaults/find_ms2_def.py,sha256=KTELMAnioGLYbhzAwOgK14TZqboPEvzeBN0HC-v0Z5A,9872
|
|
30
32
|
masster/sample/defaults/get_spectrum_def.py,sha256=o62p31PhGd-LiIkTOzKQhwPtnO2AtQDHcPu-O-YoQPs,11460
|
|
31
|
-
masster/sample/defaults/sample_def.py,sha256=
|
|
33
|
+
masster/sample/defaults/sample_def.py,sha256=keoXyMyrm_iLgbYqfIbqCpJ3XHBVlNwCNmb5iMQL0iY,14579
|
|
32
34
|
masster/study/__init__.py,sha256=Zspv6U8jFqjkHGYdNdDy1rfUnCSolCzUdgSSg98PRgE,166
|
|
33
|
-
masster/study/export.py,sha256=
|
|
34
|
-
masster/study/h5.py,sha256=
|
|
35
|
-
masster/study/helpers.py,sha256=
|
|
36
|
-
masster/study/helpers_optimized.py,sha256=
|
|
37
|
-
masster/study/load.py,sha256=
|
|
35
|
+
masster/study/export.py,sha256=9Bhz8wpO3ZHdwV0iWSX0E38GS3UfqfAFlW9VN8ht2-Y,28845
|
|
36
|
+
masster/study/h5.py,sha256=UOc4tbeWr8Xa_5Aescz7rMMnkzpu8PSTsOAnTfPv0-E,67109
|
|
37
|
+
masster/study/helpers.py,sha256=qf5_4DVW5X3MI3TgoZes5bNzuIt-r_esSeJoNOO8a6A,92540
|
|
38
|
+
masster/study/helpers_optimized.py,sha256=sd87kNPIEPdMijekXzZWSyeZzJ_DTAW8HQjAry-jVyY,13922
|
|
39
|
+
masster/study/load.py,sha256=w-HhyQqShgc2R2b3ko3Yy7t-MHLiO9HlSpuOMAdpnZQ,48429
|
|
38
40
|
masster/study/parameters.py,sha256=0elaF7YspTsB7qyajWAbRNL2VfKlGz5GJLifmO8IGkk,3276
|
|
39
|
-
masster/study/plot.py,sha256=
|
|
40
|
-
masster/study/processing.py,sha256=
|
|
41
|
-
masster/study/save.py,sha256=
|
|
42
|
-
masster/study/study.py,sha256=
|
|
43
|
-
masster/study/study5_schema.json,sha256=
|
|
41
|
+
masster/study/plot.py,sha256=Vvbh19f94X8D4aX5mLVyPso-iC0ZpsyQCe3NTib34VU,68389
|
|
42
|
+
masster/study/processing.py,sha256=EZlzcMmMWcCW0dMdC3tc4r8ii6kncWQC2_Lc_ybfLFc,52286
|
|
43
|
+
masster/study/save.py,sha256=YjFEiuiB4OFLVvW_AX4-kgnsbjCWrYZeqF85VNEtbdw,6560
|
|
44
|
+
masster/study/study.py,sha256=ZeCwKgZfAIrvEywxn7HbtBFssWCYxgkRnsNzgmfApyo,31343
|
|
45
|
+
masster/study/study5_schema.json,sha256=Grm2vfi2NnfNfcqKndz3IX9JNyhgwh92T8x-IofLay4,5103
|
|
44
46
|
masster/study/defaults/__init__.py,sha256=m3Z5KXGqsTdh7GjYzZoENERt39yRg0ceVRV1DeCt1P0,610
|
|
45
|
-
masster/study/defaults/align_def.py,sha256=
|
|
47
|
+
masster/study/defaults/align_def.py,sha256=QSJXfe5kAtYp_IN8LUuXjq61IkxT74ml84k5kmmRjqM,19846
|
|
46
48
|
masster/study/defaults/export_def.py,sha256=eXl3h4aoLX88XkHTpqahLd-QZ2gjUqrmjq8IJULXeWo,1203
|
|
47
|
-
masster/study/defaults/fill_chrom_def.py,sha256=
|
|
49
|
+
masster/study/defaults/fill_chrom_def.py,sha256=hB6-tyC9bhx-IpGj2HC8FinQdW4VLYj_pn5t1rlj-Ew,8887
|
|
48
50
|
masster/study/defaults/fill_def.py,sha256=5B7-iNCngdwHPbf0146LzrqxKCi7_g5OC1XtkxvckeQ,8869
|
|
49
51
|
masster/study/defaults/find_consensus_def.py,sha256=uWB4NKCXDMQgNp4BaQUExkDofnXz0ZDffsxH1tvH2_Q,8599
|
|
50
52
|
masster/study/defaults/find_ms2_def.py,sha256=RL0DFG41wQ05U8UQKUGr3vzSl3mU0m0knQus8DpSoJE,5070
|
|
51
|
-
masster/study/defaults/integrate_chrom_def.py,sha256=
|
|
53
|
+
masster/study/defaults/integrate_chrom_def.py,sha256=0MNIWGTjty-Zu-NTQsIweuj3UVqEY3x1x8pK0mPwYak,7264
|
|
52
54
|
masster/study/defaults/integrate_def.py,sha256=Vf4SAzdBfnsSZ3IRaF0qZvWu3gMDPHdgPfMYoPKeWv8,7246
|
|
53
55
|
masster/study/defaults/merge_def.py,sha256=EBsKE3hsAkTEzN9dpdRD5W3_suTKy_WZ_96rwS0uBuE,8572
|
|
54
|
-
masster/study/defaults/study_def.py,sha256=
|
|
55
|
-
masster-0.3.
|
|
56
|
-
masster-0.3.
|
|
57
|
-
masster-0.3.
|
|
58
|
-
masster-0.3.
|
|
59
|
-
masster-0.3.
|
|
56
|
+
masster/study/defaults/study_def.py,sha256=d8mQWIpvWEWI8grPTAcQa4jKTG7QrM98RRgHZVoh134,9519
|
|
57
|
+
masster-0.3.12.dist-info/METADATA,sha256=gb3nbkBNDa0TARJGbCHRjoOym_L4bMX6mT6415BBvDg,44293
|
|
58
|
+
masster-0.3.12.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
59
|
+
masster-0.3.12.dist-info/entry_points.txt,sha256=ZHguQ_vPmdbpqq2uGtmEOLJfgP-DQ1T0c07Lxh30wc8,58
|
|
60
|
+
masster-0.3.12.dist-info/licenses/LICENSE,sha256=bx5iLIKjgAdYQ7sISn7DsfHRKkoCUm1154sJJKhgqnU,35184
|
|
61
|
+
masster-0.3.12.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|