masster 0.4.2__py3-none-any.whl → 0.4.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of masster might be problematic. Click here for more details.

masster/study/helpers.py CHANGED
@@ -1599,11 +1599,12 @@ def features_select(
1599
1599
  Returns:
1600
1600
  polars.DataFrame: Filtered features DataFrame
1601
1601
  """
1602
+ # Consolidated optimized implementation (previously in helpers_optimized.py)
1602
1603
  if self.features_df is None or self.features_df.is_empty():
1603
1604
  self.logger.warning("No features found in study.")
1604
1605
  return pl.DataFrame()
1605
1606
 
1606
- # Early return if no filters provided - performance optimization
1607
+ # Early return if no filters provided
1607
1608
  filter_params = [
1608
1609
  mz,
1609
1610
  rt,
@@ -1624,10 +1625,10 @@ def features_select(
1624
1625
 
1625
1626
  initial_count = len(self.features_df)
1626
1627
 
1627
- # Pre-check available columns once for efficiency
1628
+ # Pre-check available columns once
1628
1629
  available_columns = set(self.features_df.columns)
1629
1630
 
1630
- # Build all filter conditions first, then apply them all at once
1631
+ # Build all filter conditions
1631
1632
  filter_conditions = []
1632
1633
  warnings = []
1633
1634
 
@@ -1786,18 +1787,18 @@ def features_select(
1786
1787
  else:
1787
1788
  warnings.append("'chrom_height_scaled' column not found in features_df")
1788
1789
 
1789
- # Log all warnings once at the end for efficiency
1790
+ # Log warnings once at the end
1790
1791
  for warning in warnings:
1791
1792
  self.logger.warning(warning)
1792
1793
 
1793
- # Apply all filters at once using lazy evaluation for optimal performance
1794
+ # Apply all filters at once if any exist
1794
1795
  if filter_conditions:
1795
1796
  # Combine all conditions with AND
1796
1797
  combined_filter = filter_conditions[0]
1797
1798
  for condition in filter_conditions[1:]:
1798
1799
  combined_filter = combined_filter & condition
1799
1800
 
1800
- # Apply the combined filter using lazy evaluation
1801
+ # Apply the combined filter using lazy evaluation for better performance
1801
1802
  feats = self.features_df.lazy().filter(combined_filter).collect()
1802
1803
  else:
1803
1804
  feats = self.features_df.clone()
@@ -1807,12 +1808,106 @@ def features_select(
1807
1808
  if final_count == 0:
1808
1809
  self.logger.warning("No features remaining after applying selection criteria.")
1809
1810
  else:
1810
- # removed_count = initial_count - final_count
1811
- self.logger.info(f"Features selected: {final_count} (out of {initial_count})")
1811
+ removed_count = initial_count - final_count
1812
+ self.logger.info(f"Features selected: {final_count} (removed: {removed_count})")
1812
1813
 
1813
1814
  return feats
1814
1815
 
1815
1816
 
1817
+ def features_select_benchmarked(
1818
+ self,
1819
+ mz=None,
1820
+ rt=None,
1821
+ inty=None,
1822
+ sample_uid=None,
1823
+ sample_name=None,
1824
+ consensus_uid=None,
1825
+ feature_uid=None,
1826
+ filled=None,
1827
+ quality=None,
1828
+ chrom_coherence=None,
1829
+ chrom_prominence=None,
1830
+ chrom_prominence_scaled=None,
1831
+ chrom_height_scaled=None,
1832
+ ):
1833
+ """
1834
+ Benchmarked version that compares old vs new implementation performance.
1835
+ If an original implementation is available as `features_select_original` on the Study
1836
+ instance, it will be used for comparison; otherwise only the optimized run is timed.
1837
+ """
1838
+ import time
1839
+
1840
+ original_time = None
1841
+ # If an original implementation was stored, call it for comparison
1842
+ original_impl = getattr(self, "features_select_original", None)
1843
+ if callable(original_impl):
1844
+ start_time = time.perf_counter()
1845
+ _ = original_impl(
1846
+ mz=mz,
1847
+ rt=rt,
1848
+ inty=inty,
1849
+ sample_uid=sample_uid,
1850
+ sample_name=sample_name,
1851
+ consensus_uid=consensus_uid,
1852
+ feature_uid=feature_uid,
1853
+ filled=filled,
1854
+ quality=quality,
1855
+ chrom_coherence=chrom_coherence,
1856
+ chrom_prominence=chrom_prominence,
1857
+ chrom_prominence_scaled=chrom_prominence_scaled,
1858
+ chrom_height_scaled=chrom_height_scaled,
1859
+ )
1860
+ original_time = time.perf_counter() - start_time
1861
+
1862
+ # Call the optimized method
1863
+ start_time = time.perf_counter()
1864
+ result_optimized = self.features_select(
1865
+ mz=mz,
1866
+ rt=rt,
1867
+ inty=inty,
1868
+ sample_uid=sample_uid,
1869
+ sample_name=sample_name,
1870
+ consensus_uid=consensus_uid,
1871
+ feature_uid=feature_uid,
1872
+ filled=filled,
1873
+ quality=quality,
1874
+ chrom_coherence=chrom_coherence,
1875
+ chrom_prominence=chrom_prominence,
1876
+ chrom_prominence_scaled=chrom_prominence_scaled,
1877
+ chrom_height_scaled=chrom_height_scaled,
1878
+ )
1879
+ optimized_time = time.perf_counter() - start_time
1880
+
1881
+ # Log performance comparison when possible
1882
+ if original_time is not None:
1883
+ speedup = original_time / optimized_time if optimized_time > 0 else float("inf")
1884
+ self.logger.info(
1885
+ f"Performance comparison - Original: {original_time:.4f}s, Optimized: {optimized_time:.4f}s, Speedup: {speedup:.2f}x",
1886
+ )
1887
+ else:
1888
+ self.logger.info(f"Optimized features_select executed in {optimized_time:.4f}s")
1889
+
1890
+ return result_optimized
1891
+
1892
+
1893
+ def monkey_patch_study():
1894
+ """
1895
+ (Optional) Monkey-patch helper for Study. Stores the current Study.features_select
1896
+ as `features_select_original` if not already set, then replaces Study.features_select
1897
+ with the optimized `features_select` defined above. This function is idempotent.
1898
+ """
1899
+ from masster.study.study import Study
1900
+
1901
+ # Only set original if it doesn't exist yet
1902
+ if not hasattr(Study, "features_select_original"):
1903
+ Study.features_select_original = Study.features_select
1904
+
1905
+ Study.features_select = features_select
1906
+ Study.features_select_benchmarked = features_select_benchmarked
1907
+
1908
+ print("Patched Study.features_select with consolidated optimized implementation")
1909
+
1910
+
1816
1911
  def features_filter(self, features):
1817
1912
  """
1818
1913
  Filter features_df by keeping only features that match the given criteria.
@@ -0,0 +1,129 @@
1
+ Metadata-Version: 2.4
2
+ Name: masster
3
+ Version: 0.4.4
4
+ Summary: Mass spectrometry data analysis package
5
+ Author: Zamboni Lab
6
+ License-Expression: AGPL-3.0-only
7
+ Project-URL: homepage, https://github.com/zamboni-lab/masster
8
+ Project-URL: repository, https://github.com/zamboni-lab/masster
9
+ Project-URL: documentation, https://github.com/zamboni-lab/masster#readme
10
+ Keywords: mass spectrometry,metabolomics,lc-ms,chromatography
11
+ Classifier: Development Status :: 3 - Alpha
12
+ Classifier: Intended Audience :: Science/Research
13
+ Classifier: Operating System :: OS Independent
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.11
16
+ Classifier: Programming Language :: Python :: 3.12
17
+ Classifier: Programming Language :: Python :: 3.13
18
+ Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
19
+ Classifier: Topic :: Scientific/Engineering :: Chemistry
20
+ Requires-Python: >=3.11
21
+ Description-Content-Type: text/markdown
22
+ License-File: LICENSE
23
+ Requires-Dist: alpharaw>=0.4.8
24
+ Requires-Dist: bokeh>=3.7.3
25
+ Requires-Dist: datashader>=0.18.1
26
+ Requires-Dist: holoviews>=1.21.0
27
+ Requires-Dist: h5py>=3.14.0
28
+ Requires-Dist: hvplot>=0.11.3
29
+ Requires-Dist: loguru>=0.7.3
30
+ Requires-Dist: numpy>=2.0.0
31
+ Requires-Dist: marimo>=0.14.16
32
+ Requires-Dist: matplotlib>=3.8.0
33
+ Requires-Dist: pandas>=2.2.0
34
+ Requires-Dist: panel>=1.7.0
35
+ Requires-Dist: polars>=1.0.0
36
+ Requires-Dist: pyopenms>=3.3.0
37
+ Requires-Dist: pyteomics>=4.7.0
38
+ Requires-Dist: pythonnet>=3.0.0
39
+ Requires-Dist: scipy>=1.12.0
40
+ Requires-Dist: tqdm>=4.65.0
41
+ Requires-Dist: openpyxl>=3.1.5
42
+ Requires-Dist: cmap>=0.6.2
43
+ Requires-Dist: altair>=5.5.0
44
+ Provides-Extra: dev
45
+ Requires-Dist: pytest>=7.0.0; extra == "dev"
46
+ Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
47
+ Requires-Dist: pytest-mock>=3.10.0; extra == "dev"
48
+ Requires-Dist: black>=23.0.0; extra == "dev"
49
+ Requires-Dist: flake8>=5.0.0; extra == "dev"
50
+ Requires-Dist: mypy>=1.0.0; extra == "dev"
51
+ Requires-Dist: pre-commit>=3.0.0; extra == "dev"
52
+ Requires-Dist: twine>=4.0.0; extra == "dev"
53
+ Requires-Dist: build>=0.10.0; extra == "dev"
54
+ Requires-Dist: safety>=2.0.0; extra == "dev"
55
+ Requires-Dist: bandit>=1.7.0; extra == "dev"
56
+ Requires-Dist: pyyaml>=6.0; extra == "dev"
57
+ Provides-Extra: docs
58
+ Requires-Dist: sphinx>=5.0.0; extra == "docs"
59
+ Requires-Dist: sphinx-rtd-theme>=1.2.0; extra == "docs"
60
+ Requires-Dist: sphinxcontrib-napoleon>=0.7; extra == "docs"
61
+ Provides-Extra: test
62
+ Requires-Dist: pytest>=7.0.0; extra == "test"
63
+ Requires-Dist: pytest-cov>=4.0.0; extra == "test"
64
+ Requires-Dist: pytest-mock>=3.10.0; extra == "test"
65
+ Requires-Dist: coverage>=7.0.0; extra == "test"
66
+ Dynamic: license-file
67
+
68
+ # MASSter
69
+
70
+ **MASSter** is a comprehensive Python package for mass spectrometry data analysis, designed for metabolomics and LC-MS data processing. It provides tools for feature detection, alignment, consensus building, and interactive visualization of mass spectrometry datasets. It is designed to deal with DDA, and hides functionalities for DIA and ZTScan DIA data.
71
+
72
+ This is a poorly documented, stable branch of the development codebase in use in the Zamboni lab.
73
+
74
+ Some of the core processing functions are derived from OpenMS. We use the same nomenclature and refer to their documentation for an explanation of the parameters. To a large extent, however, you should be able to use the defaults (=no parameters) when calling processing steps.
75
+
76
+
77
+ ## Installation
78
+
79
+ ```bash
80
+ pip install master
81
+ ```
82
+
83
+ ### Basic Workflow for analyzing LC-MS study with 2-... samples
84
+
85
+ ```python
86
+ import master
87
+
88
+ # Initialize the Study object with the default folder
89
+ study = master.Study(default_folder=r'D:\...\mylcms')
90
+
91
+ # Load data from folder with raw data, here: WIFF
92
+ study.add(r'D:\...\...\...\*.wiff')
93
+
94
+ # Perform retention time correction
95
+ study.align(rt_max_diff=2.0)
96
+ study.plot_alignment()
97
+
98
+ # Find consensus features
99
+ study.merge(min_samples=3)
100
+ study.plot_consensus_2d()
101
+
102
+ # Retrieve missing data for quantification
103
+ study.fill()
104
+
105
+ # Integrate according to consensus metadata
106
+ study.integrate()
107
+
108
+ # export results
109
+ study.export_mgf()
110
+ study.export_mztab()
111
+ study.export_consensus()
112
+
113
+ # Save the study to .study5
114
+ study.save()
115
+ ```
116
+
117
+ ## Requirements
118
+
119
+ - Python ≥ 3.11
120
+ - Key dependencies: pandas, polars, numpy, scipy, matplotlib, bokeh, holoviews, panel
121
+ - See `pyproject.toml` for complete dependency list
122
+
123
+ ## License
124
+
125
+ GNU Affero General Public License v3
126
+
127
+ ## Citation
128
+
129
+ If you use Master in your research, please cite this repository.
@@ -1,5 +1,5 @@
1
1
  masster/__init__.py,sha256=8U4cIteNlYyHDrxWSbB_MsDKCX9tds07SJG8-vh8Oa8,738
2
- masster/_version.py,sha256=3PLvtFFtatOSA_8w-RwjzMhdV4Khv39uFcfxtrt2xrE,256
2
+ masster/_version.py,sha256=VO89cZ_6MtW0W_P2yFvZpdGOhoWdoBYiY0efEf1SqsA,256
3
3
  masster/chromatogram.py,sha256=NgPr1uLGJHjRu6PWZZGOrS3pCl7sye1yQCJjlRi9ZSY,19305
4
4
  masster/logger.py,sha256=W50V_uh8RSYwGxDrDFhOuj5jpu2tKJyt_16lMw9kQwA,14755
5
5
  masster/spectrum.py,sha256=7wKQes97sI-O2lxrkQnYuoroXFyAWgwSzS4TnjUX8RY,47710
@@ -10,10 +10,14 @@ masster/data/dda/20250530_VH_IQX_KW_RP_HSST3_100mm_12min_pos_v4_MS1_C-MiLUT_C008
10
10
  masster/data/dda/20250530_VH_IQX_KW_RP_HSST3_100mm_12min_pos_v4_MS1_C-MiLUT_C017_v5_r99_01.sample5,sha256=h2OOAWWTwKXzTNewhiYeL-cMYdp_JYLPya8Q9Nv9Lvw,12389587
11
11
  masster/data/libs/ccm.csv,sha256=Q6nylV1152uTpX-ydqWeGrc6L9kgv45xN_fBZ4f7Tvo,12754
12
12
  masster/data/libs/urine.csv,sha256=98L11DolzWzAumTbtKdNNkpDbp-sNDzwAXypgqSZcik,653733
13
+ masster/data/libs/__pycache__/ccm.cpython-312.pyc,sha256=krce-0iqL50kmK1hItP4KWHGCs6M5T10-rgRZynP01w,18597
14
+ masster/data/libs/__pycache__/urine.cpython-312.pyc,sha256=yz16lRCULWE-FDWkkVQCWfJLDKpN6906vm6emeDxu6A,10921
13
15
  masster/data/wiff/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.timeseries.data,sha256=01vC6m__Qqm2rLvlTMZoeKIKowFvovBTUnrNl8Uav3E,24576
14
16
  masster/data/wiff/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.wiff,sha256=go5N9gAM1rn4PZAVaoCmdteY9f7YGEM9gyPdSmkQ8PE,1447936
15
17
  masster/data/wiff/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.wiff.scan,sha256=ahi1Y3UhAj9Bj4Q2MlbgPekNdkJvMOoMXVOoR6CeIxc,13881220
16
18
  masster/data/wiff/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.wiff2,sha256=TFB0HW4Agkig6yht7FtgjUdbXax8jjKaHpSZSvuU5vs,3252224
19
+ masster/lib/__init__.py,sha256=TcePNx3SYZHz6763TL9Sg4gUNXaRWjlrOtyS6vsu-hg,178
20
+ masster/lib/lib.py,sha256=seAOqzat74iF6YnbNakU3rF5MN5t9WABIpcIPTvU1q8,24987
17
21
  masster/sample/__init__.py,sha256=HL0m1ept0PMAYUCQtDDnkdOS12IFl6oLAq4TZQz83uY,170
18
22
  masster/sample/adducts.py,sha256=6LvMrrsFjvr4OCuNx6N9qCE1vlvRGEgXtmMz8iwMMQg,31675
19
23
  masster/sample/h5.py,sha256=ZAGPJzPXkytmBOlpZXbyRM0K3buzxWd9f4a0hz85NMA,100475
@@ -37,7 +41,7 @@ masster/sample/defaults/sample_def.py,sha256=keoXyMyrm_iLgbYqfIbqCpJ3XHBVlNwCNmb
37
41
  masster/study/__init__.py,sha256=Zspv6U8jFqjkHGYdNdDy1rfUnCSolCzUdgSSg98PRgE,166
38
42
  masster/study/export.py,sha256=JVsOj-m5aHi4i6M6_Mq1nS_YaukT9E17hdyZtVQJ6FQ,53030
39
43
  masster/study/h5.py,sha256=OLSCBfeBrXGibbSogs8YEaqP-3kqcJbP5yIJ4dfPgrc,76339
40
- masster/study/helpers.py,sha256=tcbDU0QucGA4CyC4ur2MwfInl-cmmehKivVBLw8dInk,144787
44
+ masster/study/helpers.py,sha256=FbHoy2GfArozl1OmxAMsuUjsjhfiobwlOwc1AyEYFaE,148057
41
45
  masster/study/helpers_optimized.py,sha256=5qf6tiLPgsOSna9XVtTrx2B0GJ1wI8ZTrSv8n8MtxNg,13927
42
46
  masster/study/id.py,sha256=YlrvLhMXQcXAQgkZTYZ6Ih9y2nw0MGIq2on_wx7Oibc,42211
43
47
  masster/study/load.py,sha256=V_XHHxPTQaCu1RACliDVpuGoLh7DxQ6gcLTuctRcc6M,69034
@@ -59,8 +63,9 @@ masster/study/defaults/integrate_chrom_def.py,sha256=0MNIWGTjty-Zu-NTQsIweuj3UVq
59
63
  masster/study/defaults/integrate_def.py,sha256=Vf4SAzdBfnsSZ3IRaF0qZvWu3gMDPHdgPfMYoPKeWv8,7246
60
64
  masster/study/defaults/merge_def.py,sha256=EBsKE3hsAkTEzN9dpdRD5W3_suTKy_WZ_96rwS0uBuE,8572
61
65
  masster/study/defaults/study_def.py,sha256=5rHxs4rMP2IuBCDEeOoFyjqY8t-Gx54IuQ4Jam3AglM,15510
62
- masster-0.4.2.dist-info/METADATA,sha256=2LOAc8KioZbeKbp4xFimSmddU7fWceDMfKOdtyqvHyE,44221
63
- masster-0.4.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
64
- masster-0.4.2.dist-info/entry_points.txt,sha256=ZHguQ_vPmdbpqq2uGtmEOLJfgP-DQ1T0c07Lxh30wc8,58
65
- masster-0.4.2.dist-info/licenses/LICENSE,sha256=bx5iLIKjgAdYQ7sISn7DsfHRKkoCUm1154sJJKhgqnU,35184
66
- masster-0.4.2.dist-info/RECORD,,
66
+ masster-0.4.4.dist-info/licenses/LICENSE,sha256=bx5iLIKjgAdYQ7sISn7DsfHRKkoCUm1154sJJKhgqnU,35184
67
+ masster-0.4.4.dist-info/METADATA,sha256=kRUmKm5X-73tccWrIBt5IO1_lBjfgtzelbuL4Ws3ogo,4518
68
+ masster-0.4.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
69
+ masster-0.4.4.dist-info/entry_points.txt,sha256=ZHguQ_vPmdbpqq2uGtmEOLJfgP-DQ1T0c07Lxh30wc8,58
70
+ masster-0.4.4.dist-info/top_level.txt,sha256=MPZokk6zWIP_UhQ_VkKxSTG63eM4WGu9oTcMpQXp7NI,8
71
+ masster-0.4.4.dist-info/RECORD,,
@@ -1,4 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: hatchling 1.27.0
2
+ Generator: setuptools (80.9.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
+
@@ -0,0 +1 @@
1
+ masster