masster 0.5.1__py3-none-any.whl → 0.5.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of masster might be problematic. Click here for more details.

masster/study/study.py CHANGED
@@ -1,46 +1,38 @@
1
1
  """
2
2
  study.py
3
3
 
4
- This module provides tools for multi-sample mass spectrometry data analysis and cross-sample feature alignment.
5
- It defines the `study` class, which manages collections of DDA files, performs feature alignment across samples,
6
- generates consensus features, and provides study-level visualization and reporting capabilities.
7
-
8
- Key Features:
9
- - **Multi-Sample Management**: Handle collections of mass spectrometry files with metadata.
10
- - **Feature Alignment**: Align features across multiple samples using retention time and m/z tolerances.
11
- - **Consensus Features**: Generate consensus feature tables from aligned data.
12
- - **Batch Processing**: Automated processing of entire studies with configurable parameters.
13
- - **Study Visualization**: Generate comparative plots and alignment visualizations.
14
- - **Export Capabilities**: Export study results in various formats for downstream analysis.
15
-
16
- Dependencies:
17
- - `pyopenms`: For mass spectrometry data handling and algorithms.
18
- - `polars` and `pandas`: For efficient data manipulation and analysis.
19
- - `bokeh`, `holoviews`, `panel`: For interactive visualizations and dashboards.
20
- - `numpy`: For numerical computations and array operations.
21
-
22
- Classes:
23
- - `study`: Main class for multi-sample study management, providing methods for file loading,
24
- feature alignment, consensus generation, and study-level analysis.
25
-
26
- Example Usage:
27
- ```python
28
- from study import study
29
-
30
- # Create study from multiple files
31
- study_obj = study()
32
- study_obj.load_files(["sample1.mzML", "sample2.mzML", "sample3.mzML"])
33
- study_obj.process_all()
34
- study_obj.align()
35
- study_obj.plot_alignment_bokeh()
36
- study_obj.export_consensus()
37
- ```
38
-
39
- See Also:
40
- - `single.py`: For individual file processing before study-level analysis.
41
- - `parameters.study_parameters`: For study-specific parameter configuration.
42
-
43
-
4
+ Module providing the Study class, the main entry point for multi-sample mass spectrometry
5
+ studies. It manages loading and metadata, cross-sample feature alignment, consensus
6
+ generation, integration, MS2 association, plotting, exporting, and parameter/history
7
+ management.
8
+
9
+ Main class:
10
+ - Study: high-level orchestrator. Key operations include:
11
+ - I/O: load/save .study5, add/add_sample, set_study_folder
12
+ - Processing: align, merge (consensus), fill, integrate, find_ms2, find_iso/reset_iso
13
+ - Selection/filtering: samples_select/delete, features_select/filter/delete,
14
+ consensus_select/filter/delete
15
+ - Retrieval: get_consensus, get_chrom, get_samples, get_*_stats, get_*_matrix
16
+ - Plotting: plot_alignment, plot_samples_pca/umap/2d, plot_tic/bpc/eic, plot_chrom,
17
+ plot_rt_correction, plot_consensus_2d/stats
18
+ - Export: export_mgf, export_mztab, export_xlsx, export_parquet
19
+ - Identification: lib_load, identify, get_id, id_reset, lib_reset
20
+ - Parameters: get/update parameters, update_history
21
+
22
+
23
+ Quickstart:
24
+ >>> from masster import Study
25
+ >>> s = Study(folder="./study")
26
+ >>> s.add("/data/mzML/*.mzML") # or s.add_sample("sample.mzML", name="S1")
27
+ >>> s.align()
28
+ >>> s.merge()
29
+ >>> s.plot_alignment()
30
+ >>> s.export_parquet("consensus.parquet")
31
+ >>> s.save("project.study5")
32
+
33
+ Notes:
34
+ - This module re-exports many functions from masster.study.* as Study methods.
35
+ - Use Study.info() for a concise study summary.
44
36
  """
45
37
 
46
38
  from __future__ import annotations
@@ -57,6 +49,7 @@ from masster.study.helpers import _get_consensus_uids
57
49
  from masster.study.helpers import _get_features_uids
58
50
  from masster.study.helpers import _get_samples_uids
59
51
  from masster.study.helpers import compress
52
+ from masster.study.helpers import consensus_reset
60
53
  from masster.study.helpers import decompress
61
54
  from masster.study.helpers import fill_reset
62
55
  from masster.study.helpers import get_chrom
@@ -66,6 +59,7 @@ from masster.study.helpers import get_consensus_matches
66
59
  from masster.study.helpers import get_consensus_matrix
67
60
  from masster.study.helpers import get_orphans
68
61
  from masster.study.helpers import get_sample_stats
62
+ from masster.study.helpers import get_consensus_stats
69
63
  from masster.study.helpers import get_gaps_matrix
70
64
  from masster.study.helpers import get_gaps_stats
71
65
  from masster.study.helpers import align_reset
@@ -86,7 +80,10 @@ from masster.study.load import add
86
80
  from masster.study.load import add_sample
87
81
  from masster.study.load import fill
88
82
  from masster.study.load import load
89
- from masster.study.load import _load_features
83
+ #from masster.study.load import _load_features
84
+ from masster.study.h5 import _load_ms1
85
+ from masster.study.h5 import _load_study5
86
+ from masster.study.h5 import _save_study5
90
87
  from masster.study.plot import plot_alignment
91
88
  from masster.study.plot import plot_consensus_2d
92
89
  from masster.study.plot import plot_samples_2d
@@ -117,7 +114,6 @@ from masster.logger import MassterLogger
117
114
  from masster.study.defaults.study_def import study_defaults
118
115
  from masster.study.defaults.align_def import align_defaults
119
116
  from masster.study.defaults.export_def import export_mgf_defaults
120
- from masster.study.defaults.fill_chrom_def import fill_chrom_defaults
121
117
  from masster.study.defaults.fill_def import fill_defaults
122
118
  from masster.study.defaults.find_ms2_def import find_ms2_defaults
123
119
  from masster.study.defaults.integrate_chrom_def import integrate_chrom_defaults
@@ -352,6 +348,9 @@ class Study:
352
348
  save_consensus = save_consensus
353
349
  save_samples = save_samples
354
350
  set_study_folder = set_study_folder
351
+ _load_ms1 = _load_ms1
352
+ _load_study5 = _load_study5
353
+ _save_study5 = _save_study5
355
354
 
356
355
  # === Sample Management ===
357
356
  add = add
@@ -368,6 +367,7 @@ class Study:
368
367
  integrate = integrate
369
368
 
370
369
  fill = fill
370
+ #_estimate_rt_original_for_filled_feature = _estimate_rt_original_for_filled_feature
371
371
 
372
372
  # === Data Retrieval and Access ===
373
373
  get_consensus = get_consensus
@@ -379,6 +379,7 @@ class Study:
379
379
  get_gaps_stats = get_gaps_stats
380
380
  get_orphans = get_orphans
381
381
  get_sample_stats = get_sample_stats
382
+ get_consensus_stats = get_consensus_stats
382
383
 
383
384
  # === Data Selection and Filtering ===
384
385
  samples_select = samples_select
@@ -411,6 +412,7 @@ class Study:
411
412
  decompress = decompress
412
413
 
413
414
  # === Reset Operations ===
415
+ consensus_reset = consensus_reset
414
416
  fill_reset = fill_reset
415
417
  reset_fill = fill_reset
416
418
  align_reset = align_reset
@@ -463,19 +465,11 @@ class Study:
463
465
  _get_consensus_uids = _get_consensus_uids
464
466
  _get_features_uids = _get_features_uids
465
467
  _get_samples_uids = _get_samples_uids
466
- _load_features = _load_features
467
-
468
- # Note: _load_study5 and _save_study5 are not exposed as class methods
469
- # They are used internally by load() and save() methods only
470
-
471
- # === Merge Helper Methods ===
472
- # (All merge helper methods are now internal to the merge module)
473
468
 
474
469
  # === Default Parameters ===
475
470
  study_defaults = study_defaults
476
471
  align_defaults = align_defaults
477
472
  export_mgf_defaults = export_mgf_defaults
478
- fill_chrom_defaults = fill_chrom_defaults
479
473
  fill_defaults = fill_defaults
480
474
  find_ms2_defaults = find_ms2_defaults
481
475
  integrate_chrom_defaults = integrate_chrom_defaults
@@ -566,84 +560,7 @@ class Study:
566
560
  self.logger.debug("Module reload completed")
567
561
  except Exception as e:
568
562
  self.logger.error(f"Failed to reload current module {current_module}: {e}")
569
-
570
- def _sanitize_null_ids(self):
571
- """
572
- Sanitize null feature_id and consensus_id values by replacing them with new integer IDs.
573
- For feature_id: generates large sequential integers that can be converted by merge/align functions.
574
- For consensus_id: uses 16-character UUID strings (as expected by merge function).
575
- """
576
- import uuid
577
- import polars as pl
578
- import time
579
-
580
- # Sanitize features_df feature_id column
581
- if hasattr(self, 'features_df') and self.features_df is not None and not self.features_df.is_empty():
582
- # Check for null feature_ids
583
- null_feature_ids = self.features_df.filter(pl.col("feature_id").is_null()).shape[0]
584
- if null_feature_ids > 0:
585
- self.logger.info(f"Sanitizing {null_feature_ids} null feature_id values with new integer IDs")
586
-
587
- # Find the maximum existing feature_id (convert strings to int if possible)
588
- max_existing_id = 0
589
- existing_ids = self.features_df.filter(pl.col("feature_id").is_not_null())["feature_id"].to_list()
590
- for fid in existing_ids:
591
- try:
592
- int_id = int(fid)
593
- max_existing_id = max(max_existing_id, int_id)
594
- except (ValueError, TypeError):
595
- # Skip non-integer IDs
596
- pass
597
-
598
- # Generate new sequential integer IDs starting from max + timestamp offset
599
- # Use timestamp to ensure uniqueness across different sanitization runs
600
- base_id = max(max_existing_id + 1, int(time.time() * 1000000)) # Microsecond timestamp
601
- new_int_ids = [str(base_id + i) for i in range(null_feature_ids)]
602
- uid_index = 0
603
-
604
- # Create a list to store all feature_ids
605
- feature_ids = []
606
- for feature_id in self.features_df["feature_id"].to_list():
607
- if feature_id is None:
608
- feature_ids.append(new_int_ids[uid_index])
609
- uid_index += 1
610
- else:
611
- feature_ids.append(feature_id)
612
-
613
- # Update the DataFrame with sanitized feature_ids
614
- self.features_df = self.features_df.with_columns(
615
- pl.Series("feature_id", feature_ids, dtype=pl.Utf8)
616
- )
617
-
618
- self.logger.info(f"Successfully sanitized {null_feature_ids} feature_id values")
619
-
620
- # Sanitize consensus_df consensus_id column
621
- if hasattr(self, 'consensus_df') and self.consensus_df is not None and not self.consensus_df.is_empty():
622
- if "consensus_id" in self.consensus_df.columns:
623
- null_consensus_ids = self.consensus_df.filter(pl.col("consensus_id").is_null()).shape[0]
624
- if null_consensus_ids > 0:
625
- self.logger.info(f"Sanitizing {null_consensus_ids} null consensus_id values with new UIDs")
626
-
627
- # Generate new UIDs for null values using the same method as merge()
628
- new_uids = [str(uuid.uuid4()).replace('-', '')[:16] for _ in range(null_consensus_ids)]
629
- uid_index = 0
630
-
631
- # Create a list to store all consensus_ids
632
- consensus_ids = []
633
- for consensus_id in self.consensus_df["consensus_id"].to_list():
634
- if consensus_id is None:
635
- consensus_ids.append(new_uids[uid_index])
636
- uid_index += 1
637
- else:
638
- consensus_ids.append(consensus_id)
639
-
640
- # Update the DataFrame with sanitized consensus_ids
641
- self.consensus_df = self.consensus_df.with_columns(
642
- pl.Series("consensus_id", consensus_ids, dtype=pl.Utf8)
643
- )
644
-
645
- self.logger.info(f"Successfully sanitized {null_consensus_ids} consensus_id values")
646
-
563
+
647
564
  def __dir__(self):
648
565
  """
649
566
  Custom __dir__ implementation to hide internal methods starting with '_'
@@ -699,12 +616,19 @@ class Study:
699
616
 
700
617
  def __str__(self):
701
618
  """
702
- Returns a string representation of the study.
703
-
704
- Returns:
705
- str: A summary string of the study.
619
+ Return a short summary string with number of samples and consensus features.
706
620
  """
707
- return ""
621
+ samples = (
622
+ len(self.samples_df)
623
+ if (self.samples_df is not None and not self.samples_df.is_empty())
624
+ else 0
625
+ )
626
+ consensus = (
627
+ len(self.consensus_df)
628
+ if (self.consensus_df is not None and not self.consensus_df.is_empty())
629
+ else 0
630
+ )
631
+ return f"{samples} samples, {consensus} consensus"
708
632
 
709
633
  def logger_update(
710
634
  self,
@@ -993,6 +917,5 @@ class Study:
993
917
 
994
918
  print(summary)
995
919
 
996
- if __name__ == "__main__":
997
- # This block is executed when the script is run directly
920
+ if __name__ == "__main__":
998
921
  pass
masster/wizard/wizard.py CHANGED
@@ -652,12 +652,26 @@ class Wizard:
652
652
  ' )',
653
653
  ' ',
654
654
  ' # Merge and create consensus features',
655
- ' study.merge(',
656
- ' min_samples=PARAMS[\'min_samples_per_feature\'],',
657
- ' threads=PARAMS[\'num_cores\'],',
658
- ' rt_tol=PARAMS[\'rt_tol\'],',
659
- ' mz_tol=PARAMS[\'mz_tol\']',
660
- ' )',
655
+ ' # Use optimized method for large datasets (>500 samples)',
656
+ ' num_samples = len(study.samples)',
657
+ ' if num_samples > 500:',
658
+ ' print(f" Large dataset detected ({num_samples} samples), using optimized qt_chunked + hierarchical method")',
659
+ ' study.merge(',
660
+ ' method="qt_chunked",',
661
+ ' dechunking="hierarchical",',
662
+ ' min_samples=PARAMS[\'min_samples_per_feature\'],',
663
+ ' threads=PARAMS[\'num_cores\'],',
664
+ ' rt_tol=PARAMS[\'rt_tol\'],',
665
+ ' mz_tol=PARAMS[\'mz_tol\']',
666
+ ' )',
667
+ ' else:',
668
+ ' print(f" Using standard merge method for {num_samples} samples")',
669
+ ' study.merge(',
670
+ ' min_samples=PARAMS[\'min_samples_per_feature\'],',
671
+ ' threads=PARAMS[\'num_cores\'],',
672
+ ' rt_tol=PARAMS[\'rt_tol\'],',
673
+ ' mz_tol=PARAMS[\'mz_tol\']',
674
+ ' )',
661
675
  ' study.find_iso()',
662
676
  ' study.fill(min_samples_rel=0.0)',
663
677
  ' study.integrate()',
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: masster
3
- Version: 0.5.1
3
+ Version: 0.5.3
4
4
  Summary: Mass spectrometry data analysis package
5
5
  Project-URL: homepage, https://github.com/zamboni-lab/masster
6
6
  Project-URL: repository, https://github.com/zamboni-lab/masster
@@ -1,5 +1,5 @@
1
1
  masster/__init__.py,sha256=ueZ224WPNRRjQEYTaQUol818nwQgJwB93HbEfmtPRmg,1041
2
- masster/_version.py,sha256=dkqPLCQGfsGL65orxLHNgDpbEE9aMOWq4b_vYspojyk,256
2
+ masster/_version.py,sha256=ugCZC9n8XchZWXe4FUzXQCUPt7xKbTwRFRBFdsN0tkI,256
3
3
  masster/chromatogram.py,sha256=iYpdv8C17zVnlWvOFgAn9ns2uFGiF-GgoYf5QVVAbHs,19319
4
4
  masster/logger.py,sha256=tR65N23zfrNpcZNbZm2ot_Aual9XrGB1MWjLrovZkMs,16749
5
5
  masster/spectrum.py,sha256=XJSUrqXZSzfpWnD8v5IMClXMRZLKLYIk014qaMOS9_k,49738
@@ -22,7 +22,7 @@ masster/sample/adducts.py,sha256=S7meba3L1tSdjoDhkSiTI71H2NJLu4i1dtJwfDKWI1M,325
22
22
  masster/sample/h5.py,sha256=B0gAmhrnoFoybotqsqiT8s-PkeZWUdIQfI-4cnM52Zc,115430
23
23
  masster/sample/helpers.py,sha256=JhzFpNh7j7YVUibIMuPQ50hBcGDEBCaBbmwA3Z5OhgM,41336
24
24
  masster/sample/lib.py,sha256=E-j9c3Wd8f9a-H8xj7CAOwlA8KcyXPoFyYm3c8r7LtI,33755
25
- masster/sample/load.py,sha256=CSJnNfJ6EpWEIbDj92mJK0qFao0ycxuGf_w9DzWhlnY,51721
25
+ masster/sample/load.py,sha256=jVgni8eFG8ubxX2K0ygzHQuMbZv0hfRP1kFomVTe9t4,51784
26
26
  masster/sample/parameters.py,sha256=Gg2KcuNbV_wZ_Wwv93QlM5J19ji0oSIvZLPV1NoBmq0,4456
27
27
  masster/sample/plot.py,sha256=0v4FzO_yzpUdATjdHZU4YO7UoW73Xlf51vVQByQ33X0,82574
28
28
  masster/sample/processing.py,sha256=Sh6IFxuFcmCspyizUPghd4Qqqk1mTPCNtvdxmyy0eKQ,55914
@@ -39,36 +39,35 @@ masster/sample/defaults/get_spectrum_def.py,sha256=o62p31PhGd-LiIkTOzKQhwPtnO2At
39
39
  masster/sample/defaults/sample_def.py,sha256=keoXyMyrm_iLgbYqfIbqCpJ3XHBVlNwCNmb5iMQL0iY,14579
40
40
  masster/study/__init__.py,sha256=55axdFuqRX4aXtJ8ocnhcLB32fNtmmJpCi58moO0r4g,237
41
41
  masster/study/analysis.py,sha256=L-wXBnGZCLB5UUDrjIdOiMG9zdej3Tw_SftcEmmTukM,84264
42
- masster/study/export.py,sha256=Rp1vc5iDl-XFWo_RBVCJDGBNSKakq9f8aC2FeUCP9GA,59398
43
- masster/study/h5.py,sha256=6_nyjMGg_dkKkrx_Mv77wGg5SmWsVOZxu7HZasoXbRU,84916
44
- masster/study/helpers.py,sha256=dU2YxAGPmu1w55mpcgNoHPpg2fNW-vK944aJy3YwLsU,163555
42
+ masster/study/export.py,sha256=joFK9jip2UM4lVAvhkdKVeUdNdM4D8uP2WE49IaVJgw,60172
43
+ masster/study/h5.py,sha256=rMY8lgXPrU41L_bgzVl7J-uDWzb6thG2-ibw71JP0Ss,91376
44
+ masster/study/helpers.py,sha256=s5jLUmxDAs_Qn6dVwpkwlwuwliMDEBjmeikS6OrxdSE,183137
45
45
  masster/study/id.py,sha256=r_vZQYNxqNXf_pjgk_CLkl1doLnLa956mTuVmlHN52o,80075
46
- masster/study/load.py,sha256=W4mljmYVR71sas4no7vKWIVfdnQjb-rTcEUhE0ZMr0k,71696
47
- masster/study/merge.py,sha256=XF4NxNuLSxwf2j1__ReIInXVRGDRoSHFeKdcCSayKU4,164298
46
+ masster/study/load.py,sha256=7d11294YYEGrSKox3cwvetv2vqcstYT1SnyAhHH5V_Q,107706
47
+ masster/study/merge.py,sha256=D9xNRlEaMPTPZQAZhiBBSzQ-27lD60fCDmKb0cYST-M,149764
48
48
  masster/study/parameters.py,sha256=bTvmcwX9INxzcrEAmTiFH8qeWVhwkvMTZjuP394pz5o,3279
49
- masster/study/plot.py,sha256=pAN5uQKYPUpupQVtKBloWjKOKpM_C9o2e3VWkJ-aZN8,102041
50
- masster/study/processing.py,sha256=TKeTzRLmaMxUKCt66pXPfx_7xc-R5__ZwEZdFHOxg6A,55916
49
+ masster/study/plot.py,sha256=cS4haKL8PA9q7qgkv_0S7JY6PfFV5nKZH33thSnjwtM,102113
50
+ masster/study/processing.py,sha256=O6X7wgeq0kXSyMO12g23cqB8cYO60gLRMxuJC2uhSMY,58644
51
51
  masster/study/save.py,sha256=47AP518epJJ9TjaGGyrLKsMsyjIk8_J4ka7bmsnRtFQ,9268
52
- masster/study/study.py,sha256=vbP_bPa62-KYN0OTUN6PpSyCoFcW-TdbLbx67ShkEx0,42930
52
+ masster/study/study.py,sha256=TnZkTLB8Z5R-AVqoHfUNvmkTthfUI4OPmBo_LYR_e8g,38654
53
53
  masster/study/study5_schema.json,sha256=0IZxM9VVI0TUlx74BPzJDT44kySi6NZZ6iLR0j8bU_s,7736
54
54
  masster/study/defaults/__init__.py,sha256=m3Z5KXGqsTdh7GjYzZoENERt39yRg0ceVRV1DeCt1P0,610
55
- masster/study/defaults/align_def.py,sha256=hHQbGgsOqMRHHr0Wn8Onr8XeaRz3-fFE0qGE-OMst80,20324
55
+ masster/study/defaults/align_def.py,sha256=Du0F592ej2einT8kOx8EUs610axSvur8_-6N19O-uJY,10209
56
56
  masster/study/defaults/export_def.py,sha256=eXl3h4aoLX88XkHTpqahLd-QZ2gjUqrmjq8IJULXeWo,1203
57
- masster/study/defaults/fill_chrom_def.py,sha256=hB6-tyC9bhx-IpGj2HC8FinQdW4VLYj_pn5t1rlj-Ew,8887
58
- masster/study/defaults/fill_def.py,sha256=H-ZNKyiXxBLWdLoCMqxfvphNyc9wrDVFMC7TyRNYEm0,8869
57
+ masster/study/defaults/fill_def.py,sha256=pdH-lwknaMqnB_lXdGmeSqk9KgLMoOgNlp82eEunx6U,9124
59
58
  masster/study/defaults/find_consensus_def.py,sha256=2KRRMsCDP7pwNrLCC6eI5uQgMXqiNdiI6pSvxNJ8L5M,8598
60
59
  masster/study/defaults/find_ms2_def.py,sha256=RL0DFG41wQ05U8UQKUGr3vzSl3mU0m0knQus8DpSoJE,5070
61
60
  masster/study/defaults/identify_def.py,sha256=96rxoCAPQj_yX-3mRoD2LTkTLJgG27eJQqwarLv5jL0,10580
62
61
  masster/study/defaults/integrate_chrom_def.py,sha256=0MNIWGTjty-Zu-NTQsIweuj3UVqEY3x1x8pK0mPwYak,7264
63
62
  masster/study/defaults/integrate_def.py,sha256=Vf4SAzdBfnsSZ3IRaF0qZvWu3gMDPHdgPfMYoPKeWv8,7246
64
- masster/study/defaults/merge_def.py,sha256=K7sfwEGfgcWU85zorbWNFaxDhqRH52pxQoKv9Jn2qhY,15030
63
+ masster/study/defaults/merge_def.py,sha256=3W13QSZaIzxHTDFifW-Nncu_phIqZgf0TpcllaKwbHE,12978
65
64
  masster/study/defaults/study_def.py,sha256=h8dYbi9xv0sesCSQik49Z53IkskMmNtW6ixl7it5pL0,16033
66
65
  masster/wizard/README.md,sha256=mL1A3YWJZOefpJ6D0-HqGLkVRmUlOpwyVFdvJBeeoZM,14149
67
66
  masster/wizard/__init__.py,sha256=a2hcZnHASjfuw1lqZhZnvTR58rc33rRnoGAY_JfvGhI,683
68
67
  masster/wizard/example.py,sha256=xEZFTH9UZ8HKOm6s3JL8Js0Uw5ChnISWBHSZCL32vsM,7983
69
- masster/wizard/wizard.py,sha256=esgaifLRyaGxytif9qOkTy-21VxlUQxrvl47K-l-BpE,37666
70
- masster-0.5.1.dist-info/METADATA,sha256=01v713yHW9RJPqFXY89wd5e21Ls3crfs6kEBDhDrUlc,45113
71
- masster-0.5.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
72
- masster-0.5.1.dist-info/entry_points.txt,sha256=ZHguQ_vPmdbpqq2uGtmEOLJfgP-DQ1T0c07Lxh30wc8,58
73
- masster-0.5.1.dist-info/licenses/LICENSE,sha256=bx5iLIKjgAdYQ7sISn7DsfHRKkoCUm1154sJJKhgqnU,35184
74
- masster-0.5.1.dist-info/RECORD,,
68
+ masster/wizard/wizard.py,sha256=6VqeOyKJ-9n0376CVbNuQo4vKLFjE0Sl2KexWZclQew,38580
69
+ masster-0.5.3.dist-info/METADATA,sha256=qYfl6QVz7POBKlmRQ0UDR6ZR9SdUgjaXw3LFpajZKHM,45113
70
+ masster-0.5.3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
71
+ masster-0.5.3.dist-info/entry_points.txt,sha256=ZHguQ_vPmdbpqq2uGtmEOLJfgP-DQ1T0c07Lxh30wc8,58
72
+ masster-0.5.3.dist-info/licenses/LICENSE,sha256=bx5iLIKjgAdYQ7sISn7DsfHRKkoCUm1154sJJKhgqnU,35184
73
+ masster-0.5.3.dist-info/RECORD,,
@@ -1,260 +0,0 @@
1
- """Parameter class for Study fill_chrom method."""
2
-
3
- from dataclasses import dataclass, field
4
- from typing import Optional, Any
5
-
6
-
7
- @dataclass
8
- class fill_chrom_defaults:
9
- """
10
- Parameter class for Study fill_chrom method.
11
-
12
- This class encapsulates parameters for filling missing chromatograms
13
- by extracting them from raw data across samples.
14
-
15
- Attributes:
16
- uids (Optional[list]): List of consensus UIDs to process. Default is None (all).
17
- mz_tol (float): m/z tolerance for chromatogram extraction (Da). Default is 0.010.
18
- rt_tol (float): RT tolerance for chromatogram extraction (seconds). Default is 10.0.
19
- min_samples_rel (float): Minimum relative samples threshold. Default is 0.05.
20
- min_samples_abs (int): Minimum absolute samples threshold. Default is 5.
21
- """
22
-
23
- uids: Optional[list] = None
24
- mz_tol: float = 0.010
25
- rt_tol: float = 10.0
26
- min_samples_rel: float = 0.05
27
- min_samples_abs: int = 5
28
-
29
- _param_metadata: dict[str, dict[str, Any]] = field(
30
- default_factory=lambda: {
31
- "uids": {
32
- "dtype": "Optional[list]",
33
- "description": "List of consensus UIDs to process (None for all)",
34
- "default": None,
35
- },
36
- "mz_tol": {
37
- "dtype": float,
38
- "description": "m/z tolerance for chromatogram extraction (Da)",
39
- "default": 0.010,
40
- "min_value": 0.001,
41
- "max_value": 0.1,
42
- },
43
- "rt_tol": {
44
- "dtype": float,
45
- "description": "RT tolerance for chromatogram extraction (seconds)",
46
- "default": 10.0,
47
- "min_value": 1.0,
48
- "max_value": 300.0,
49
- },
50
- "min_samples_rel": {
51
- "dtype": float,
52
- "description": "Minimum relative samples threshold (fraction)",
53
- "default": 0.05,
54
- "min_value": 0.01,
55
- "max_value": 1.0,
56
- },
57
- "min_samples_abs": {
58
- "dtype": int,
59
- "description": "Minimum absolute samples threshold",
60
- "default": 5,
61
- "min_value": 1,
62
- "max_value": 100,
63
- },
64
- },
65
- repr=False,
66
- )
67
-
68
- def get_info(self, param_name: str) -> dict[str, Any]:
69
- """
70
- Get information about a specific parameter.
71
-
72
- Args:
73
- param_name: Name of the parameter
74
-
75
- Returns:
76
- Dictionary containing parameter metadata
77
-
78
- Raises:
79
- KeyError: If parameter name is not found
80
- """
81
- if param_name not in self._param_metadata:
82
- raise KeyError(f"Parameter '{param_name}' not found")
83
- return self._param_metadata[param_name]
84
-
85
- def get_description(self, param_name: str) -> str:
86
- """
87
- Get description for a specific parameter.
88
-
89
- Args:
90
- param_name: Name of the parameter
91
-
92
- Returns:
93
- Parameter description string
94
- """
95
- return str(self.get_info(param_name)["description"])
96
-
97
- def validate(self, param_name: str, value: Any) -> bool:
98
- """
99
- Validate a parameter value against its constraints.
100
-
101
- Args:
102
- param_name: Name of the parameter
103
- value: Value to validate
104
-
105
- Returns:
106
- True if value is valid, False otherwise
107
- """
108
- if param_name not in self._param_metadata:
109
- return False
110
-
111
- metadata = self._param_metadata[param_name]
112
- expected_dtype = metadata["dtype"]
113
-
114
- # Handle optional types
115
- if isinstance(expected_dtype, str) and expected_dtype.startswith("Optional"):
116
- if value is None:
117
- return True
118
- # Extract the inner type for validation
119
- if "list" in expected_dtype:
120
- expected_dtype = list
121
-
122
- # Type checking
123
- if expected_dtype is int:
124
- if not isinstance(value, int):
125
- try:
126
- value = int(value)
127
- except (ValueError, TypeError):
128
- return False
129
- elif expected_dtype is float:
130
- if not isinstance(value, (int, float)):
131
- try:
132
- value = float(value)
133
- except (ValueError, TypeError):
134
- return False
135
- elif expected_dtype is list:
136
- if not isinstance(value, list):
137
- return False
138
-
139
- # Range validation for numeric types
140
- if expected_dtype in (int, float) and isinstance(value, (int, float)):
141
- if "min_value" in metadata and value < metadata["min_value"]:
142
- return False
143
- if "max_value" in metadata and value > metadata["max_value"]:
144
- return False
145
-
146
- return True
147
-
148
- def set(self, param_name: str, value: Any, validate: bool = True) -> bool:
149
- """
150
- Set a parameter value with optional validation.
151
-
152
- Args:
153
- param_name: Name of the parameter
154
- value: New value for the parameter
155
- validate: Whether to validate the value before setting
156
-
157
- Returns:
158
- True if parameter was set successfully, False otherwise
159
- """
160
- if not hasattr(self, param_name):
161
- return False
162
-
163
- if validate and not self.validate(param_name, value):
164
- return False
165
-
166
- # Convert to expected type if needed
167
- if param_name in self._param_metadata:
168
- expected_dtype = self._param_metadata[param_name]["dtype"]
169
-
170
- # Handle optional types
171
- if isinstance(expected_dtype, str) and expected_dtype.startswith("Optional") and value is not None:
172
- if "int" in expected_dtype and not isinstance(value, int):
173
- try:
174
- value = int(value)
175
- except (ValueError, TypeError):
176
- if validate:
177
- return False
178
- elif "float" in expected_dtype and not isinstance(value, float):
179
- try:
180
- value = float(value)
181
- except (ValueError, TypeError):
182
- if validate:
183
- return False
184
-
185
- setattr(self, param_name, value)
186
- return True
187
-
188
- def get(self, param_name: str) -> Any:
189
- """
190
- Get the value of a parameter by name.
191
-
192
- Args:
193
- param_name: Name of the parameter
194
-
195
- Returns:
196
- Current value of the parameter
197
- """
198
- if not hasattr(self, param_name):
199
- raise KeyError(f"Parameter '{param_name}' not found")
200
- return getattr(self, param_name)
201
-
202
- def set_from_dict(
203
- self,
204
- param_dict: dict[str, Any],
205
- validate: bool = True,
206
- ) -> list[str]:
207
- """
208
- Update multiple parameters from a dictionary.
209
-
210
- Args:
211
- param_dict: Dictionary of parameter names and values
212
- validate: Whether to validate values before setting
213
-
214
- Returns:
215
- List of parameter names that could not be set
216
- """
217
- failed_params = []
218
-
219
- for param_name, value in param_dict.items():
220
- if not self.set(param_name, value, validate):
221
- failed_params.append(param_name)
222
-
223
- return failed_params
224
-
225
- def to_dict(self) -> dict[str, Any]:
226
- """
227
- Convert parameters to dictionary, excluding metadata.
228
-
229
- Returns:
230
- Dictionary of parameter names and values
231
- """
232
- return {k: v for k, v in self.__dict__.items() if not k.startswith("_")}
233
-
234
- def list_parameters(self) -> list[str]:
235
- """
236
- Get list of all parameter names.
237
-
238
- Returns:
239
- List of parameter names
240
- """
241
- return [k for k in self.__dict__.keys() if not k.startswith("_")]
242
-
243
- def validate_all(self) -> tuple[bool, list[str]]:
244
- """
245
- Validate all parameters in the instance.
246
-
247
- Returns:
248
- Tuple of (all_valid, list_of_invalid_params)
249
- - all_valid: True if all parameters are valid, False otherwise
250
- - list_of_invalid_params: List of parameter names that failed validation
251
- """
252
- invalid_params = []
253
-
254
- for param_name in self.list_parameters():
255
- if param_name in self._param_metadata:
256
- current_value = getattr(self, param_name)
257
- if not self.validate(param_name, current_value):
258
- invalid_params.append(param_name)
259
-
260
- return len(invalid_params) == 0, invalid_params