masster 0.5.1__py3-none-any.whl → 0.5.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of masster might be problematic. Click here for more details.
- masster/_version.py +1 -1
- masster/sample/load.py +5 -4
- masster/study/defaults/align_def.py +0 -204
- masster/study/defaults/fill_def.py +9 -1
- masster/study/defaults/merge_def.py +20 -69
- masster/study/export.py +25 -5
- masster/study/h5.py +160 -42
- masster/study/helpers.py +430 -53
- masster/study/load.py +986 -158
- masster/study/merge.py +683 -1076
- masster/study/plot.py +43 -38
- masster/study/processing.py +337 -280
- masster/study/study.py +58 -135
- masster/wizard/wizard.py +20 -6
- {masster-0.5.1.dist-info → masster-0.5.3.dist-info}/METADATA +1 -1
- {masster-0.5.1.dist-info → masster-0.5.3.dist-info}/RECORD +19 -20
- masster/study/defaults/fill_chrom_def.py +0 -260
- {masster-0.5.1.dist-info → masster-0.5.3.dist-info}/WHEEL +0 -0
- {masster-0.5.1.dist-info → masster-0.5.3.dist-info}/entry_points.txt +0 -0
- {masster-0.5.1.dist-info → masster-0.5.3.dist-info}/licenses/LICENSE +0 -0
masster/study/study.py
CHANGED
|
@@ -1,46 +1,38 @@
|
|
|
1
1
|
"""
|
|
2
2
|
study.py
|
|
3
3
|
|
|
4
|
-
|
|
5
|
-
It
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
-
|
|
11
|
-
-
|
|
12
|
-
-
|
|
13
|
-
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
-
|
|
19
|
-
-
|
|
20
|
-
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
study_obj.export_consensus()
|
|
37
|
-
```
|
|
38
|
-
|
|
39
|
-
See Also:
|
|
40
|
-
- `single.py`: For individual file processing before study-level analysis.
|
|
41
|
-
- `parameters.study_parameters`: For study-specific parameter configuration.
|
|
42
|
-
|
|
43
|
-
|
|
4
|
+
Module providing the Study class, the main entry point for multi-sample mass spectrometry
|
|
5
|
+
studies. It manages loading and metadata, cross-sample feature alignment, consensus
|
|
6
|
+
generation, integration, MS2 association, plotting, exporting, and parameter/history
|
|
7
|
+
management.
|
|
8
|
+
|
|
9
|
+
Main class:
|
|
10
|
+
- Study: high-level orchestrator. Key operations include:
|
|
11
|
+
- I/O: load/save .study5, add/add_sample, set_study_folder
|
|
12
|
+
- Processing: align, merge (consensus), fill, integrate, find_ms2, find_iso/reset_iso
|
|
13
|
+
- Selection/filtering: samples_select/delete, features_select/filter/delete,
|
|
14
|
+
consensus_select/filter/delete
|
|
15
|
+
- Retrieval: get_consensus, get_chrom, get_samples, get_*_stats, get_*_matrix
|
|
16
|
+
- Plotting: plot_alignment, plot_samples_pca/umap/2d, plot_tic/bpc/eic, plot_chrom,
|
|
17
|
+
plot_rt_correction, plot_consensus_2d/stats
|
|
18
|
+
- Export: export_mgf, export_mztab, export_xlsx, export_parquet
|
|
19
|
+
- Identification: lib_load, identify, get_id, id_reset, lib_reset
|
|
20
|
+
- Parameters: get/update parameters, update_history
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
Quickstart:
|
|
24
|
+
>>> from masster import Study
|
|
25
|
+
>>> s = Study(folder="./study")
|
|
26
|
+
>>> s.add("/data/mzML/*.mzML") # or s.add_sample("sample.mzML", name="S1")
|
|
27
|
+
>>> s.align()
|
|
28
|
+
>>> s.merge()
|
|
29
|
+
>>> s.plot_alignment()
|
|
30
|
+
>>> s.export_parquet("consensus.parquet")
|
|
31
|
+
>>> s.save("project.study5")
|
|
32
|
+
|
|
33
|
+
Notes:
|
|
34
|
+
- This module re-exports many functions from masster.study.* as Study methods.
|
|
35
|
+
- Use Study.info() for a concise study summary.
|
|
44
36
|
"""
|
|
45
37
|
|
|
46
38
|
from __future__ import annotations
|
|
@@ -57,6 +49,7 @@ from masster.study.helpers import _get_consensus_uids
|
|
|
57
49
|
from masster.study.helpers import _get_features_uids
|
|
58
50
|
from masster.study.helpers import _get_samples_uids
|
|
59
51
|
from masster.study.helpers import compress
|
|
52
|
+
from masster.study.helpers import consensus_reset
|
|
60
53
|
from masster.study.helpers import decompress
|
|
61
54
|
from masster.study.helpers import fill_reset
|
|
62
55
|
from masster.study.helpers import get_chrom
|
|
@@ -66,6 +59,7 @@ from masster.study.helpers import get_consensus_matches
|
|
|
66
59
|
from masster.study.helpers import get_consensus_matrix
|
|
67
60
|
from masster.study.helpers import get_orphans
|
|
68
61
|
from masster.study.helpers import get_sample_stats
|
|
62
|
+
from masster.study.helpers import get_consensus_stats
|
|
69
63
|
from masster.study.helpers import get_gaps_matrix
|
|
70
64
|
from masster.study.helpers import get_gaps_stats
|
|
71
65
|
from masster.study.helpers import align_reset
|
|
@@ -86,7 +80,10 @@ from masster.study.load import add
|
|
|
86
80
|
from masster.study.load import add_sample
|
|
87
81
|
from masster.study.load import fill
|
|
88
82
|
from masster.study.load import load
|
|
89
|
-
from masster.study.load import _load_features
|
|
83
|
+
#from masster.study.load import _load_features
|
|
84
|
+
from masster.study.h5 import _load_ms1
|
|
85
|
+
from masster.study.h5 import _load_study5
|
|
86
|
+
from masster.study.h5 import _save_study5
|
|
90
87
|
from masster.study.plot import plot_alignment
|
|
91
88
|
from masster.study.plot import plot_consensus_2d
|
|
92
89
|
from masster.study.plot import plot_samples_2d
|
|
@@ -117,7 +114,6 @@ from masster.logger import MassterLogger
|
|
|
117
114
|
from masster.study.defaults.study_def import study_defaults
|
|
118
115
|
from masster.study.defaults.align_def import align_defaults
|
|
119
116
|
from masster.study.defaults.export_def import export_mgf_defaults
|
|
120
|
-
from masster.study.defaults.fill_chrom_def import fill_chrom_defaults
|
|
121
117
|
from masster.study.defaults.fill_def import fill_defaults
|
|
122
118
|
from masster.study.defaults.find_ms2_def import find_ms2_defaults
|
|
123
119
|
from masster.study.defaults.integrate_chrom_def import integrate_chrom_defaults
|
|
@@ -352,6 +348,9 @@ class Study:
|
|
|
352
348
|
save_consensus = save_consensus
|
|
353
349
|
save_samples = save_samples
|
|
354
350
|
set_study_folder = set_study_folder
|
|
351
|
+
_load_ms1 = _load_ms1
|
|
352
|
+
_load_study5 = _load_study5
|
|
353
|
+
_save_study5 = _save_study5
|
|
355
354
|
|
|
356
355
|
# === Sample Management ===
|
|
357
356
|
add = add
|
|
@@ -368,6 +367,7 @@ class Study:
|
|
|
368
367
|
integrate = integrate
|
|
369
368
|
|
|
370
369
|
fill = fill
|
|
370
|
+
#_estimate_rt_original_for_filled_feature = _estimate_rt_original_for_filled_feature
|
|
371
371
|
|
|
372
372
|
# === Data Retrieval and Access ===
|
|
373
373
|
get_consensus = get_consensus
|
|
@@ -379,6 +379,7 @@ class Study:
|
|
|
379
379
|
get_gaps_stats = get_gaps_stats
|
|
380
380
|
get_orphans = get_orphans
|
|
381
381
|
get_sample_stats = get_sample_stats
|
|
382
|
+
get_consensus_stats = get_consensus_stats
|
|
382
383
|
|
|
383
384
|
# === Data Selection and Filtering ===
|
|
384
385
|
samples_select = samples_select
|
|
@@ -411,6 +412,7 @@ class Study:
|
|
|
411
412
|
decompress = decompress
|
|
412
413
|
|
|
413
414
|
# === Reset Operations ===
|
|
415
|
+
consensus_reset = consensus_reset
|
|
414
416
|
fill_reset = fill_reset
|
|
415
417
|
reset_fill = fill_reset
|
|
416
418
|
align_reset = align_reset
|
|
@@ -463,19 +465,11 @@ class Study:
|
|
|
463
465
|
_get_consensus_uids = _get_consensus_uids
|
|
464
466
|
_get_features_uids = _get_features_uids
|
|
465
467
|
_get_samples_uids = _get_samples_uids
|
|
466
|
-
_load_features = _load_features
|
|
467
|
-
|
|
468
|
-
# Note: _load_study5 and _save_study5 are not exposed as class methods
|
|
469
|
-
# They are used internally by load() and save() methods only
|
|
470
|
-
|
|
471
|
-
# === Merge Helper Methods ===
|
|
472
|
-
# (All merge helper methods are now internal to the merge module)
|
|
473
468
|
|
|
474
469
|
# === Default Parameters ===
|
|
475
470
|
study_defaults = study_defaults
|
|
476
471
|
align_defaults = align_defaults
|
|
477
472
|
export_mgf_defaults = export_mgf_defaults
|
|
478
|
-
fill_chrom_defaults = fill_chrom_defaults
|
|
479
473
|
fill_defaults = fill_defaults
|
|
480
474
|
find_ms2_defaults = find_ms2_defaults
|
|
481
475
|
integrate_chrom_defaults = integrate_chrom_defaults
|
|
@@ -566,84 +560,7 @@ class Study:
|
|
|
566
560
|
self.logger.debug("Module reload completed")
|
|
567
561
|
except Exception as e:
|
|
568
562
|
self.logger.error(f"Failed to reload current module {current_module}: {e}")
|
|
569
|
-
|
|
570
|
-
def _sanitize_null_ids(self):
|
|
571
|
-
"""
|
|
572
|
-
Sanitize null feature_id and consensus_id values by replacing them with new integer IDs.
|
|
573
|
-
For feature_id: generates large sequential integers that can be converted by merge/align functions.
|
|
574
|
-
For consensus_id: uses 16-character UUID strings (as expected by merge function).
|
|
575
|
-
"""
|
|
576
|
-
import uuid
|
|
577
|
-
import polars as pl
|
|
578
|
-
import time
|
|
579
|
-
|
|
580
|
-
# Sanitize features_df feature_id column
|
|
581
|
-
if hasattr(self, 'features_df') and self.features_df is not None and not self.features_df.is_empty():
|
|
582
|
-
# Check for null feature_ids
|
|
583
|
-
null_feature_ids = self.features_df.filter(pl.col("feature_id").is_null()).shape[0]
|
|
584
|
-
if null_feature_ids > 0:
|
|
585
|
-
self.logger.info(f"Sanitizing {null_feature_ids} null feature_id values with new integer IDs")
|
|
586
|
-
|
|
587
|
-
# Find the maximum existing feature_id (convert strings to int if possible)
|
|
588
|
-
max_existing_id = 0
|
|
589
|
-
existing_ids = self.features_df.filter(pl.col("feature_id").is_not_null())["feature_id"].to_list()
|
|
590
|
-
for fid in existing_ids:
|
|
591
|
-
try:
|
|
592
|
-
int_id = int(fid)
|
|
593
|
-
max_existing_id = max(max_existing_id, int_id)
|
|
594
|
-
except (ValueError, TypeError):
|
|
595
|
-
# Skip non-integer IDs
|
|
596
|
-
pass
|
|
597
|
-
|
|
598
|
-
# Generate new sequential integer IDs starting from max + timestamp offset
|
|
599
|
-
# Use timestamp to ensure uniqueness across different sanitization runs
|
|
600
|
-
base_id = max(max_existing_id + 1, int(time.time() * 1000000)) # Microsecond timestamp
|
|
601
|
-
new_int_ids = [str(base_id + i) for i in range(null_feature_ids)]
|
|
602
|
-
uid_index = 0
|
|
603
|
-
|
|
604
|
-
# Create a list to store all feature_ids
|
|
605
|
-
feature_ids = []
|
|
606
|
-
for feature_id in self.features_df["feature_id"].to_list():
|
|
607
|
-
if feature_id is None:
|
|
608
|
-
feature_ids.append(new_int_ids[uid_index])
|
|
609
|
-
uid_index += 1
|
|
610
|
-
else:
|
|
611
|
-
feature_ids.append(feature_id)
|
|
612
|
-
|
|
613
|
-
# Update the DataFrame with sanitized feature_ids
|
|
614
|
-
self.features_df = self.features_df.with_columns(
|
|
615
|
-
pl.Series("feature_id", feature_ids, dtype=pl.Utf8)
|
|
616
|
-
)
|
|
617
|
-
|
|
618
|
-
self.logger.info(f"Successfully sanitized {null_feature_ids} feature_id values")
|
|
619
|
-
|
|
620
|
-
# Sanitize consensus_df consensus_id column
|
|
621
|
-
if hasattr(self, 'consensus_df') and self.consensus_df is not None and not self.consensus_df.is_empty():
|
|
622
|
-
if "consensus_id" in self.consensus_df.columns:
|
|
623
|
-
null_consensus_ids = self.consensus_df.filter(pl.col("consensus_id").is_null()).shape[0]
|
|
624
|
-
if null_consensus_ids > 0:
|
|
625
|
-
self.logger.info(f"Sanitizing {null_consensus_ids} null consensus_id values with new UIDs")
|
|
626
|
-
|
|
627
|
-
# Generate new UIDs for null values using the same method as merge()
|
|
628
|
-
new_uids = [str(uuid.uuid4()).replace('-', '')[:16] for _ in range(null_consensus_ids)]
|
|
629
|
-
uid_index = 0
|
|
630
|
-
|
|
631
|
-
# Create a list to store all consensus_ids
|
|
632
|
-
consensus_ids = []
|
|
633
|
-
for consensus_id in self.consensus_df["consensus_id"].to_list():
|
|
634
|
-
if consensus_id is None:
|
|
635
|
-
consensus_ids.append(new_uids[uid_index])
|
|
636
|
-
uid_index += 1
|
|
637
|
-
else:
|
|
638
|
-
consensus_ids.append(consensus_id)
|
|
639
|
-
|
|
640
|
-
# Update the DataFrame with sanitized consensus_ids
|
|
641
|
-
self.consensus_df = self.consensus_df.with_columns(
|
|
642
|
-
pl.Series("consensus_id", consensus_ids, dtype=pl.Utf8)
|
|
643
|
-
)
|
|
644
|
-
|
|
645
|
-
self.logger.info(f"Successfully sanitized {null_consensus_ids} consensus_id values")
|
|
646
|
-
|
|
563
|
+
|
|
647
564
|
def __dir__(self):
|
|
648
565
|
"""
|
|
649
566
|
Custom __dir__ implementation to hide internal methods starting with '_'
|
|
@@ -699,12 +616,19 @@ class Study:
|
|
|
699
616
|
|
|
700
617
|
def __str__(self):
|
|
701
618
|
"""
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
Returns:
|
|
705
|
-
str: A summary string of the study.
|
|
619
|
+
Return a short summary string with number of samples and consensus features.
|
|
706
620
|
"""
|
|
707
|
-
|
|
621
|
+
samples = (
|
|
622
|
+
len(self.samples_df)
|
|
623
|
+
if (self.samples_df is not None and not self.samples_df.is_empty())
|
|
624
|
+
else 0
|
|
625
|
+
)
|
|
626
|
+
consensus = (
|
|
627
|
+
len(self.consensus_df)
|
|
628
|
+
if (self.consensus_df is not None and not self.consensus_df.is_empty())
|
|
629
|
+
else 0
|
|
630
|
+
)
|
|
631
|
+
return f"{samples} samples, {consensus} consensus"
|
|
708
632
|
|
|
709
633
|
def logger_update(
|
|
710
634
|
self,
|
|
@@ -993,6 +917,5 @@ class Study:
|
|
|
993
917
|
|
|
994
918
|
print(summary)
|
|
995
919
|
|
|
996
|
-
if __name__ == "__main__":
|
|
997
|
-
# This block is executed when the script is run directly
|
|
920
|
+
if __name__ == "__main__":
|
|
998
921
|
pass
|
masster/wizard/wizard.py
CHANGED
|
@@ -652,12 +652,26 @@ class Wizard:
|
|
|
652
652
|
' )',
|
|
653
653
|
' ',
|
|
654
654
|
' # Merge and create consensus features',
|
|
655
|
-
'
|
|
656
|
-
'
|
|
657
|
-
'
|
|
658
|
-
'
|
|
659
|
-
'
|
|
660
|
-
'
|
|
655
|
+
' # Use optimized method for large datasets (>500 samples)',
|
|
656
|
+
' num_samples = len(study.samples)',
|
|
657
|
+
' if num_samples > 500:',
|
|
658
|
+
' print(f" Large dataset detected ({num_samples} samples), using optimized qt_chunked + hierarchical method")',
|
|
659
|
+
' study.merge(',
|
|
660
|
+
' method="qt_chunked",',
|
|
661
|
+
' dechunking="hierarchical",',
|
|
662
|
+
' min_samples=PARAMS[\'min_samples_per_feature\'],',
|
|
663
|
+
' threads=PARAMS[\'num_cores\'],',
|
|
664
|
+
' rt_tol=PARAMS[\'rt_tol\'],',
|
|
665
|
+
' mz_tol=PARAMS[\'mz_tol\']',
|
|
666
|
+
' )',
|
|
667
|
+
' else:',
|
|
668
|
+
' print(f" Using standard merge method for {num_samples} samples")',
|
|
669
|
+
' study.merge(',
|
|
670
|
+
' min_samples=PARAMS[\'min_samples_per_feature\'],',
|
|
671
|
+
' threads=PARAMS[\'num_cores\'],',
|
|
672
|
+
' rt_tol=PARAMS[\'rt_tol\'],',
|
|
673
|
+
' mz_tol=PARAMS[\'mz_tol\']',
|
|
674
|
+
' )',
|
|
661
675
|
' study.find_iso()',
|
|
662
676
|
' study.fill(min_samples_rel=0.0)',
|
|
663
677
|
' study.integrate()',
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
masster/__init__.py,sha256=ueZ224WPNRRjQEYTaQUol818nwQgJwB93HbEfmtPRmg,1041
|
|
2
|
-
masster/_version.py,sha256=
|
|
2
|
+
masster/_version.py,sha256=ugCZC9n8XchZWXe4FUzXQCUPt7xKbTwRFRBFdsN0tkI,256
|
|
3
3
|
masster/chromatogram.py,sha256=iYpdv8C17zVnlWvOFgAn9ns2uFGiF-GgoYf5QVVAbHs,19319
|
|
4
4
|
masster/logger.py,sha256=tR65N23zfrNpcZNbZm2ot_Aual9XrGB1MWjLrovZkMs,16749
|
|
5
5
|
masster/spectrum.py,sha256=XJSUrqXZSzfpWnD8v5IMClXMRZLKLYIk014qaMOS9_k,49738
|
|
@@ -22,7 +22,7 @@ masster/sample/adducts.py,sha256=S7meba3L1tSdjoDhkSiTI71H2NJLu4i1dtJwfDKWI1M,325
|
|
|
22
22
|
masster/sample/h5.py,sha256=B0gAmhrnoFoybotqsqiT8s-PkeZWUdIQfI-4cnM52Zc,115430
|
|
23
23
|
masster/sample/helpers.py,sha256=JhzFpNh7j7YVUibIMuPQ50hBcGDEBCaBbmwA3Z5OhgM,41336
|
|
24
24
|
masster/sample/lib.py,sha256=E-j9c3Wd8f9a-H8xj7CAOwlA8KcyXPoFyYm3c8r7LtI,33755
|
|
25
|
-
masster/sample/load.py,sha256=
|
|
25
|
+
masster/sample/load.py,sha256=jVgni8eFG8ubxX2K0ygzHQuMbZv0hfRP1kFomVTe9t4,51784
|
|
26
26
|
masster/sample/parameters.py,sha256=Gg2KcuNbV_wZ_Wwv93QlM5J19ji0oSIvZLPV1NoBmq0,4456
|
|
27
27
|
masster/sample/plot.py,sha256=0v4FzO_yzpUdATjdHZU4YO7UoW73Xlf51vVQByQ33X0,82574
|
|
28
28
|
masster/sample/processing.py,sha256=Sh6IFxuFcmCspyizUPghd4Qqqk1mTPCNtvdxmyy0eKQ,55914
|
|
@@ -39,36 +39,35 @@ masster/sample/defaults/get_spectrum_def.py,sha256=o62p31PhGd-LiIkTOzKQhwPtnO2At
|
|
|
39
39
|
masster/sample/defaults/sample_def.py,sha256=keoXyMyrm_iLgbYqfIbqCpJ3XHBVlNwCNmb5iMQL0iY,14579
|
|
40
40
|
masster/study/__init__.py,sha256=55axdFuqRX4aXtJ8ocnhcLB32fNtmmJpCi58moO0r4g,237
|
|
41
41
|
masster/study/analysis.py,sha256=L-wXBnGZCLB5UUDrjIdOiMG9zdej3Tw_SftcEmmTukM,84264
|
|
42
|
-
masster/study/export.py,sha256=
|
|
43
|
-
masster/study/h5.py,sha256=
|
|
44
|
-
masster/study/helpers.py,sha256=
|
|
42
|
+
masster/study/export.py,sha256=joFK9jip2UM4lVAvhkdKVeUdNdM4D8uP2WE49IaVJgw,60172
|
|
43
|
+
masster/study/h5.py,sha256=rMY8lgXPrU41L_bgzVl7J-uDWzb6thG2-ibw71JP0Ss,91376
|
|
44
|
+
masster/study/helpers.py,sha256=s5jLUmxDAs_Qn6dVwpkwlwuwliMDEBjmeikS6OrxdSE,183137
|
|
45
45
|
masster/study/id.py,sha256=r_vZQYNxqNXf_pjgk_CLkl1doLnLa956mTuVmlHN52o,80075
|
|
46
|
-
masster/study/load.py,sha256=
|
|
47
|
-
masster/study/merge.py,sha256=
|
|
46
|
+
masster/study/load.py,sha256=7d11294YYEGrSKox3cwvetv2vqcstYT1SnyAhHH5V_Q,107706
|
|
47
|
+
masster/study/merge.py,sha256=D9xNRlEaMPTPZQAZhiBBSzQ-27lD60fCDmKb0cYST-M,149764
|
|
48
48
|
masster/study/parameters.py,sha256=bTvmcwX9INxzcrEAmTiFH8qeWVhwkvMTZjuP394pz5o,3279
|
|
49
|
-
masster/study/plot.py,sha256=
|
|
50
|
-
masster/study/processing.py,sha256=
|
|
49
|
+
masster/study/plot.py,sha256=cS4haKL8PA9q7qgkv_0S7JY6PfFV5nKZH33thSnjwtM,102113
|
|
50
|
+
masster/study/processing.py,sha256=O6X7wgeq0kXSyMO12g23cqB8cYO60gLRMxuJC2uhSMY,58644
|
|
51
51
|
masster/study/save.py,sha256=47AP518epJJ9TjaGGyrLKsMsyjIk8_J4ka7bmsnRtFQ,9268
|
|
52
|
-
masster/study/study.py,sha256=
|
|
52
|
+
masster/study/study.py,sha256=TnZkTLB8Z5R-AVqoHfUNvmkTthfUI4OPmBo_LYR_e8g,38654
|
|
53
53
|
masster/study/study5_schema.json,sha256=0IZxM9VVI0TUlx74BPzJDT44kySi6NZZ6iLR0j8bU_s,7736
|
|
54
54
|
masster/study/defaults/__init__.py,sha256=m3Z5KXGqsTdh7GjYzZoENERt39yRg0ceVRV1DeCt1P0,610
|
|
55
|
-
masster/study/defaults/align_def.py,sha256=
|
|
55
|
+
masster/study/defaults/align_def.py,sha256=Du0F592ej2einT8kOx8EUs610axSvur8_-6N19O-uJY,10209
|
|
56
56
|
masster/study/defaults/export_def.py,sha256=eXl3h4aoLX88XkHTpqahLd-QZ2gjUqrmjq8IJULXeWo,1203
|
|
57
|
-
masster/study/defaults/
|
|
58
|
-
masster/study/defaults/fill_def.py,sha256=H-ZNKyiXxBLWdLoCMqxfvphNyc9wrDVFMC7TyRNYEm0,8869
|
|
57
|
+
masster/study/defaults/fill_def.py,sha256=pdH-lwknaMqnB_lXdGmeSqk9KgLMoOgNlp82eEunx6U,9124
|
|
59
58
|
masster/study/defaults/find_consensus_def.py,sha256=2KRRMsCDP7pwNrLCC6eI5uQgMXqiNdiI6pSvxNJ8L5M,8598
|
|
60
59
|
masster/study/defaults/find_ms2_def.py,sha256=RL0DFG41wQ05U8UQKUGr3vzSl3mU0m0knQus8DpSoJE,5070
|
|
61
60
|
masster/study/defaults/identify_def.py,sha256=96rxoCAPQj_yX-3mRoD2LTkTLJgG27eJQqwarLv5jL0,10580
|
|
62
61
|
masster/study/defaults/integrate_chrom_def.py,sha256=0MNIWGTjty-Zu-NTQsIweuj3UVqEY3x1x8pK0mPwYak,7264
|
|
63
62
|
masster/study/defaults/integrate_def.py,sha256=Vf4SAzdBfnsSZ3IRaF0qZvWu3gMDPHdgPfMYoPKeWv8,7246
|
|
64
|
-
masster/study/defaults/merge_def.py,sha256=
|
|
63
|
+
masster/study/defaults/merge_def.py,sha256=3W13QSZaIzxHTDFifW-Nncu_phIqZgf0TpcllaKwbHE,12978
|
|
65
64
|
masster/study/defaults/study_def.py,sha256=h8dYbi9xv0sesCSQik49Z53IkskMmNtW6ixl7it5pL0,16033
|
|
66
65
|
masster/wizard/README.md,sha256=mL1A3YWJZOefpJ6D0-HqGLkVRmUlOpwyVFdvJBeeoZM,14149
|
|
67
66
|
masster/wizard/__init__.py,sha256=a2hcZnHASjfuw1lqZhZnvTR58rc33rRnoGAY_JfvGhI,683
|
|
68
67
|
masster/wizard/example.py,sha256=xEZFTH9UZ8HKOm6s3JL8Js0Uw5ChnISWBHSZCL32vsM,7983
|
|
69
|
-
masster/wizard/wizard.py,sha256=
|
|
70
|
-
masster-0.5.
|
|
71
|
-
masster-0.5.
|
|
72
|
-
masster-0.5.
|
|
73
|
-
masster-0.5.
|
|
74
|
-
masster-0.5.
|
|
68
|
+
masster/wizard/wizard.py,sha256=6VqeOyKJ-9n0376CVbNuQo4vKLFjE0Sl2KexWZclQew,38580
|
|
69
|
+
masster-0.5.3.dist-info/METADATA,sha256=qYfl6QVz7POBKlmRQ0UDR6ZR9SdUgjaXw3LFpajZKHM,45113
|
|
70
|
+
masster-0.5.3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
71
|
+
masster-0.5.3.dist-info/entry_points.txt,sha256=ZHguQ_vPmdbpqq2uGtmEOLJfgP-DQ1T0c07Lxh30wc8,58
|
|
72
|
+
masster-0.5.3.dist-info/licenses/LICENSE,sha256=bx5iLIKjgAdYQ7sISn7DsfHRKkoCUm1154sJJKhgqnU,35184
|
|
73
|
+
masster-0.5.3.dist-info/RECORD,,
|
|
@@ -1,260 +0,0 @@
|
|
|
1
|
-
"""Parameter class for Study fill_chrom method."""
|
|
2
|
-
|
|
3
|
-
from dataclasses import dataclass, field
|
|
4
|
-
from typing import Optional, Any
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
@dataclass
|
|
8
|
-
class fill_chrom_defaults:
|
|
9
|
-
"""
|
|
10
|
-
Parameter class for Study fill_chrom method.
|
|
11
|
-
|
|
12
|
-
This class encapsulates parameters for filling missing chromatograms
|
|
13
|
-
by extracting them from raw data across samples.
|
|
14
|
-
|
|
15
|
-
Attributes:
|
|
16
|
-
uids (Optional[list]): List of consensus UIDs to process. Default is None (all).
|
|
17
|
-
mz_tol (float): m/z tolerance for chromatogram extraction (Da). Default is 0.010.
|
|
18
|
-
rt_tol (float): RT tolerance for chromatogram extraction (seconds). Default is 10.0.
|
|
19
|
-
min_samples_rel (float): Minimum relative samples threshold. Default is 0.05.
|
|
20
|
-
min_samples_abs (int): Minimum absolute samples threshold. Default is 5.
|
|
21
|
-
"""
|
|
22
|
-
|
|
23
|
-
uids: Optional[list] = None
|
|
24
|
-
mz_tol: float = 0.010
|
|
25
|
-
rt_tol: float = 10.0
|
|
26
|
-
min_samples_rel: float = 0.05
|
|
27
|
-
min_samples_abs: int = 5
|
|
28
|
-
|
|
29
|
-
_param_metadata: dict[str, dict[str, Any]] = field(
|
|
30
|
-
default_factory=lambda: {
|
|
31
|
-
"uids": {
|
|
32
|
-
"dtype": "Optional[list]",
|
|
33
|
-
"description": "List of consensus UIDs to process (None for all)",
|
|
34
|
-
"default": None,
|
|
35
|
-
},
|
|
36
|
-
"mz_tol": {
|
|
37
|
-
"dtype": float,
|
|
38
|
-
"description": "m/z tolerance for chromatogram extraction (Da)",
|
|
39
|
-
"default": 0.010,
|
|
40
|
-
"min_value": 0.001,
|
|
41
|
-
"max_value": 0.1,
|
|
42
|
-
},
|
|
43
|
-
"rt_tol": {
|
|
44
|
-
"dtype": float,
|
|
45
|
-
"description": "RT tolerance for chromatogram extraction (seconds)",
|
|
46
|
-
"default": 10.0,
|
|
47
|
-
"min_value": 1.0,
|
|
48
|
-
"max_value": 300.0,
|
|
49
|
-
},
|
|
50
|
-
"min_samples_rel": {
|
|
51
|
-
"dtype": float,
|
|
52
|
-
"description": "Minimum relative samples threshold (fraction)",
|
|
53
|
-
"default": 0.05,
|
|
54
|
-
"min_value": 0.01,
|
|
55
|
-
"max_value": 1.0,
|
|
56
|
-
},
|
|
57
|
-
"min_samples_abs": {
|
|
58
|
-
"dtype": int,
|
|
59
|
-
"description": "Minimum absolute samples threshold",
|
|
60
|
-
"default": 5,
|
|
61
|
-
"min_value": 1,
|
|
62
|
-
"max_value": 100,
|
|
63
|
-
},
|
|
64
|
-
},
|
|
65
|
-
repr=False,
|
|
66
|
-
)
|
|
67
|
-
|
|
68
|
-
def get_info(self, param_name: str) -> dict[str, Any]:
|
|
69
|
-
"""
|
|
70
|
-
Get information about a specific parameter.
|
|
71
|
-
|
|
72
|
-
Args:
|
|
73
|
-
param_name: Name of the parameter
|
|
74
|
-
|
|
75
|
-
Returns:
|
|
76
|
-
Dictionary containing parameter metadata
|
|
77
|
-
|
|
78
|
-
Raises:
|
|
79
|
-
KeyError: If parameter name is not found
|
|
80
|
-
"""
|
|
81
|
-
if param_name not in self._param_metadata:
|
|
82
|
-
raise KeyError(f"Parameter '{param_name}' not found")
|
|
83
|
-
return self._param_metadata[param_name]
|
|
84
|
-
|
|
85
|
-
def get_description(self, param_name: str) -> str:
|
|
86
|
-
"""
|
|
87
|
-
Get description for a specific parameter.
|
|
88
|
-
|
|
89
|
-
Args:
|
|
90
|
-
param_name: Name of the parameter
|
|
91
|
-
|
|
92
|
-
Returns:
|
|
93
|
-
Parameter description string
|
|
94
|
-
"""
|
|
95
|
-
return str(self.get_info(param_name)["description"])
|
|
96
|
-
|
|
97
|
-
def validate(self, param_name: str, value: Any) -> bool:
|
|
98
|
-
"""
|
|
99
|
-
Validate a parameter value against its constraints.
|
|
100
|
-
|
|
101
|
-
Args:
|
|
102
|
-
param_name: Name of the parameter
|
|
103
|
-
value: Value to validate
|
|
104
|
-
|
|
105
|
-
Returns:
|
|
106
|
-
True if value is valid, False otherwise
|
|
107
|
-
"""
|
|
108
|
-
if param_name not in self._param_metadata:
|
|
109
|
-
return False
|
|
110
|
-
|
|
111
|
-
metadata = self._param_metadata[param_name]
|
|
112
|
-
expected_dtype = metadata["dtype"]
|
|
113
|
-
|
|
114
|
-
# Handle optional types
|
|
115
|
-
if isinstance(expected_dtype, str) and expected_dtype.startswith("Optional"):
|
|
116
|
-
if value is None:
|
|
117
|
-
return True
|
|
118
|
-
# Extract the inner type for validation
|
|
119
|
-
if "list" in expected_dtype:
|
|
120
|
-
expected_dtype = list
|
|
121
|
-
|
|
122
|
-
# Type checking
|
|
123
|
-
if expected_dtype is int:
|
|
124
|
-
if not isinstance(value, int):
|
|
125
|
-
try:
|
|
126
|
-
value = int(value)
|
|
127
|
-
except (ValueError, TypeError):
|
|
128
|
-
return False
|
|
129
|
-
elif expected_dtype is float:
|
|
130
|
-
if not isinstance(value, (int, float)):
|
|
131
|
-
try:
|
|
132
|
-
value = float(value)
|
|
133
|
-
except (ValueError, TypeError):
|
|
134
|
-
return False
|
|
135
|
-
elif expected_dtype is list:
|
|
136
|
-
if not isinstance(value, list):
|
|
137
|
-
return False
|
|
138
|
-
|
|
139
|
-
# Range validation for numeric types
|
|
140
|
-
if expected_dtype in (int, float) and isinstance(value, (int, float)):
|
|
141
|
-
if "min_value" in metadata and value < metadata["min_value"]:
|
|
142
|
-
return False
|
|
143
|
-
if "max_value" in metadata and value > metadata["max_value"]:
|
|
144
|
-
return False
|
|
145
|
-
|
|
146
|
-
return True
|
|
147
|
-
|
|
148
|
-
def set(self, param_name: str, value: Any, validate: bool = True) -> bool:
|
|
149
|
-
"""
|
|
150
|
-
Set a parameter value with optional validation.
|
|
151
|
-
|
|
152
|
-
Args:
|
|
153
|
-
param_name: Name of the parameter
|
|
154
|
-
value: New value for the parameter
|
|
155
|
-
validate: Whether to validate the value before setting
|
|
156
|
-
|
|
157
|
-
Returns:
|
|
158
|
-
True if parameter was set successfully, False otherwise
|
|
159
|
-
"""
|
|
160
|
-
if not hasattr(self, param_name):
|
|
161
|
-
return False
|
|
162
|
-
|
|
163
|
-
if validate and not self.validate(param_name, value):
|
|
164
|
-
return False
|
|
165
|
-
|
|
166
|
-
# Convert to expected type if needed
|
|
167
|
-
if param_name in self._param_metadata:
|
|
168
|
-
expected_dtype = self._param_metadata[param_name]["dtype"]
|
|
169
|
-
|
|
170
|
-
# Handle optional types
|
|
171
|
-
if isinstance(expected_dtype, str) and expected_dtype.startswith("Optional") and value is not None:
|
|
172
|
-
if "int" in expected_dtype and not isinstance(value, int):
|
|
173
|
-
try:
|
|
174
|
-
value = int(value)
|
|
175
|
-
except (ValueError, TypeError):
|
|
176
|
-
if validate:
|
|
177
|
-
return False
|
|
178
|
-
elif "float" in expected_dtype and not isinstance(value, float):
|
|
179
|
-
try:
|
|
180
|
-
value = float(value)
|
|
181
|
-
except (ValueError, TypeError):
|
|
182
|
-
if validate:
|
|
183
|
-
return False
|
|
184
|
-
|
|
185
|
-
setattr(self, param_name, value)
|
|
186
|
-
return True
|
|
187
|
-
|
|
188
|
-
def get(self, param_name: str) -> Any:
|
|
189
|
-
"""
|
|
190
|
-
Get the value of a parameter by name.
|
|
191
|
-
|
|
192
|
-
Args:
|
|
193
|
-
param_name: Name of the parameter
|
|
194
|
-
|
|
195
|
-
Returns:
|
|
196
|
-
Current value of the parameter
|
|
197
|
-
"""
|
|
198
|
-
if not hasattr(self, param_name):
|
|
199
|
-
raise KeyError(f"Parameter '{param_name}' not found")
|
|
200
|
-
return getattr(self, param_name)
|
|
201
|
-
|
|
202
|
-
def set_from_dict(
|
|
203
|
-
self,
|
|
204
|
-
param_dict: dict[str, Any],
|
|
205
|
-
validate: bool = True,
|
|
206
|
-
) -> list[str]:
|
|
207
|
-
"""
|
|
208
|
-
Update multiple parameters from a dictionary.
|
|
209
|
-
|
|
210
|
-
Args:
|
|
211
|
-
param_dict: Dictionary of parameter names and values
|
|
212
|
-
validate: Whether to validate values before setting
|
|
213
|
-
|
|
214
|
-
Returns:
|
|
215
|
-
List of parameter names that could not be set
|
|
216
|
-
"""
|
|
217
|
-
failed_params = []
|
|
218
|
-
|
|
219
|
-
for param_name, value in param_dict.items():
|
|
220
|
-
if not self.set(param_name, value, validate):
|
|
221
|
-
failed_params.append(param_name)
|
|
222
|
-
|
|
223
|
-
return failed_params
|
|
224
|
-
|
|
225
|
-
def to_dict(self) -> dict[str, Any]:
|
|
226
|
-
"""
|
|
227
|
-
Convert parameters to dictionary, excluding metadata.
|
|
228
|
-
|
|
229
|
-
Returns:
|
|
230
|
-
Dictionary of parameter names and values
|
|
231
|
-
"""
|
|
232
|
-
return {k: v for k, v in self.__dict__.items() if not k.startswith("_")}
|
|
233
|
-
|
|
234
|
-
def list_parameters(self) -> list[str]:
|
|
235
|
-
"""
|
|
236
|
-
Get list of all parameter names.
|
|
237
|
-
|
|
238
|
-
Returns:
|
|
239
|
-
List of parameter names
|
|
240
|
-
"""
|
|
241
|
-
return [k for k in self.__dict__.keys() if not k.startswith("_")]
|
|
242
|
-
|
|
243
|
-
def validate_all(self) -> tuple[bool, list[str]]:
|
|
244
|
-
"""
|
|
245
|
-
Validate all parameters in the instance.
|
|
246
|
-
|
|
247
|
-
Returns:
|
|
248
|
-
Tuple of (all_valid, list_of_invalid_params)
|
|
249
|
-
- all_valid: True if all parameters are valid, False otherwise
|
|
250
|
-
- list_of_invalid_params: List of parameter names that failed validation
|
|
251
|
-
"""
|
|
252
|
-
invalid_params = []
|
|
253
|
-
|
|
254
|
-
for param_name in self.list_parameters():
|
|
255
|
-
if param_name in self._param_metadata:
|
|
256
|
-
current_value = getattr(self, param_name)
|
|
257
|
-
if not self.validate(param_name, current_value):
|
|
258
|
-
invalid_params.append(param_name)
|
|
259
|
-
|
|
260
|
-
return len(invalid_params) == 0, invalid_params
|
|
File without changes
|
|
File without changes
|
|
File without changes
|