masster 0.4.6__py3-none-any.whl → 0.4.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of masster might be problematic. Click here for more details.

@@ -1,359 +0,0 @@
1
- """
2
- Optimized features_select method for improved performance.
3
-
4
- This module contains the optimized version of features_select that:
5
- 1. Combines all filters into a single expression
6
- 2. Uses lazy evaluation
7
- 3. Reduces logging overhead
8
- 4. Pre-checks column existence
9
- 5. Implements early returns
10
- """
11
-
12
- import polars as pl
13
-
14
-
15
- def features_select_optimized(
16
- self,
17
- mz=None,
18
- rt=None,
19
- inty=None,
20
- sample_uid=None,
21
- sample_name=None,
22
- consensus_uid=None,
23
- feature_uid=None,
24
- filled=None,
25
- quality=None,
26
- chrom_coherence=None,
27
- chrom_prominence=None,
28
- chrom_prominence_scaled=None,
29
- chrom_height_scaled=None,
30
- ):
31
- """
32
- Optimized version of features_select with improved performance.
33
-
34
- Key optimizations:
35
- - Combines all filters into a single expression
36
- - Uses lazy evaluation for better performance
37
- - Reduces logging overhead
38
- - Pre-checks column existence once
39
- - Early return for no filters
40
-
41
- Args:
42
- mz: mass-to-charge ratio filter (tuple for range, single value for minimum)
43
- rt: retention time filter (tuple for range, single value for minimum)
44
- inty: intensity filter (tuple for range, single value for minimum)
45
- sample_uid: sample UID filter (list, single value, or tuple for range)
46
- sample_name: sample name filter (list or single value)
47
- consensus_uid: consensus UID filter (list, single value, or tuple for range)
48
- feature_uid: feature UID filter (list, single value, or tuple for range)
49
- filled: filter for filled/not filled features (bool)
50
- quality: quality score filter (tuple for range, single value for minimum)
51
- chrom_coherence: chromatogram coherence filter (tuple for range, single value for minimum)
52
- chrom_prominence: chromatogram prominence filter (tuple for range, single value for minimum)
53
- chrom_prominence_scaled: scaled chromatogram prominence filter (tuple for range, single value for minimum)
54
- chrom_height_scaled: scaled chromatogram height filter (tuple for range, single value for minimum)
55
-
56
- Returns:
57
- polars.DataFrame: Filtered features DataFrame
58
- """
59
- if self.features_df is None or self.features_df.is_empty():
60
- self.logger.warning("No features found in study.")
61
- return pl.DataFrame()
62
-
63
- # Early return if no filters provided
64
- filter_params = [
65
- mz,
66
- rt,
67
- inty,
68
- sample_uid,
69
- sample_name,
70
- consensus_uid,
71
- feature_uid,
72
- filled,
73
- quality,
74
- chrom_coherence,
75
- chrom_prominence,
76
- chrom_prominence_scaled,
77
- chrom_height_scaled,
78
- ]
79
- if all(param is None for param in filter_params):
80
- return self.features_df.clone()
81
-
82
- initial_count = len(self.features_df)
83
-
84
- # Pre-check available columns once
85
- available_columns = set(self.features_df.columns)
86
-
87
- # Build all filter conditions
88
- filter_conditions = []
89
- warnings = []
90
-
91
- # Filter by m/z
92
- if mz is not None:
93
- if isinstance(mz, tuple) and len(mz) == 2:
94
- min_mz, max_mz = mz
95
- filter_conditions.append((pl.col("mz") >= min_mz) & (pl.col("mz") <= max_mz))
96
- else:
97
- filter_conditions.append(pl.col("mz") >= mz)
98
-
99
- # Filter by retention time
100
- if rt is not None:
101
- if isinstance(rt, tuple) and len(rt) == 2:
102
- min_rt, max_rt = rt
103
- filter_conditions.append((pl.col("rt") >= min_rt) & (pl.col("rt") <= max_rt))
104
- else:
105
- filter_conditions.append(pl.col("rt") >= rt)
106
-
107
- # Filter by intensity
108
- if inty is not None:
109
- if isinstance(inty, tuple) and len(inty) == 2:
110
- min_inty, max_inty = inty
111
- filter_conditions.append((pl.col("inty") >= min_inty) & (pl.col("inty") <= max_inty))
112
- else:
113
- filter_conditions.append(pl.col("inty") >= inty)
114
-
115
- # Filter by sample_uid
116
- if sample_uid is not None:
117
- if isinstance(sample_uid, (list, tuple)):
118
- if len(sample_uid) == 2 and not isinstance(sample_uid, list):
119
- # Treat as range
120
- min_uid, max_uid = sample_uid
121
- filter_conditions.append((pl.col("sample_uid") >= min_uid) & (pl.col("sample_uid") <= max_uid))
122
- else:
123
- # Treat as list
124
- filter_conditions.append(pl.col("sample_uid").is_in(sample_uid))
125
- else:
126
- filter_conditions.append(pl.col("sample_uid") == sample_uid)
127
-
128
- # Filter by sample_name (requires pre-processing)
129
- if sample_name is not None:
130
- # Get sample_uids for the given sample names
131
- if isinstance(sample_name, list):
132
- sample_uids_for_names = self.samples_df.filter(
133
- pl.col("sample_name").is_in(sample_name),
134
- )["sample_uid"].to_list()
135
- else:
136
- sample_uids_for_names = self.samples_df.filter(
137
- pl.col("sample_name") == sample_name,
138
- )["sample_uid"].to_list()
139
-
140
- if sample_uids_for_names:
141
- filter_conditions.append(pl.col("sample_uid").is_in(sample_uids_for_names))
142
- else:
143
- filter_conditions.append(pl.lit(False)) # No matching samples
144
-
145
- # Filter by consensus_uid
146
- if consensus_uid is not None:
147
- if isinstance(consensus_uid, (list, tuple)):
148
- if len(consensus_uid) == 2 and not isinstance(consensus_uid, list):
149
- # Treat as range
150
- min_uid, max_uid = consensus_uid
151
- filter_conditions.append((pl.col("consensus_uid") >= min_uid) & (pl.col("consensus_uid") <= max_uid))
152
- else:
153
- # Treat as list
154
- filter_conditions.append(pl.col("consensus_uid").is_in(consensus_uid))
155
- else:
156
- filter_conditions.append(pl.col("consensus_uid") == consensus_uid)
157
-
158
- # Filter by feature_uid
159
- if feature_uid is not None:
160
- if isinstance(feature_uid, (list, tuple)):
161
- if len(feature_uid) == 2 and not isinstance(feature_uid, list):
162
- # Treat as range
163
- min_uid, max_uid = feature_uid
164
- filter_conditions.append((pl.col("feature_uid") >= min_uid) & (pl.col("feature_uid") <= max_uid))
165
- else:
166
- # Treat as list
167
- filter_conditions.append(pl.col("feature_uid").is_in(feature_uid))
168
- else:
169
- filter_conditions.append(pl.col("feature_uid") == feature_uid)
170
-
171
- # Filter by filled status
172
- if filled is not None:
173
- if "filled" in available_columns:
174
- if filled:
175
- filter_conditions.append(pl.col("filled"))
176
- else:
177
- filter_conditions.append(~pl.col("filled") | pl.col("filled").is_null())
178
- else:
179
- warnings.append("'filled' column not found in features_df")
180
-
181
- # Filter by quality
182
- if quality is not None:
183
- if "quality" in available_columns:
184
- if isinstance(quality, tuple) and len(quality) == 2:
185
- min_quality, max_quality = quality
186
- filter_conditions.append((pl.col("quality") >= min_quality) & (pl.col("quality") <= max_quality))
187
- else:
188
- filter_conditions.append(pl.col("quality") >= quality)
189
- else:
190
- warnings.append("'quality' column not found in features_df")
191
-
192
- # Filter by chromatogram coherence
193
- if chrom_coherence is not None:
194
- if "chrom_coherence" in available_columns:
195
- if isinstance(chrom_coherence, tuple) and len(chrom_coherence) == 2:
196
- min_coherence, max_coherence = chrom_coherence
197
- filter_conditions.append(
198
- (pl.col("chrom_coherence") >= min_coherence) & (pl.col("chrom_coherence") <= max_coherence),
199
- )
200
- else:
201
- filter_conditions.append(pl.col("chrom_coherence") >= chrom_coherence)
202
- else:
203
- warnings.append("'chrom_coherence' column not found in features_df")
204
-
205
- # Filter by chromatogram prominence
206
- if chrom_prominence is not None:
207
- if "chrom_prominence" in available_columns:
208
- if isinstance(chrom_prominence, tuple) and len(chrom_prominence) == 2:
209
- min_prominence, max_prominence = chrom_prominence
210
- filter_conditions.append(
211
- (pl.col("chrom_prominence") >= min_prominence) & (pl.col("chrom_prominence") <= max_prominence),
212
- )
213
- else:
214
- filter_conditions.append(pl.col("chrom_prominence") >= chrom_prominence)
215
- else:
216
- warnings.append("'chrom_prominence' column not found in features_df")
217
-
218
- # Filter by scaled chromatogram prominence
219
- if chrom_prominence_scaled is not None:
220
- if "chrom_prominence_scaled" in available_columns:
221
- if isinstance(chrom_prominence_scaled, tuple) and len(chrom_prominence_scaled) == 2:
222
- min_prominence_scaled, max_prominence_scaled = chrom_prominence_scaled
223
- filter_conditions.append(
224
- (pl.col("chrom_prominence_scaled") >= min_prominence_scaled)
225
- & (pl.col("chrom_prominence_scaled") <= max_prominence_scaled),
226
- )
227
- else:
228
- filter_conditions.append(pl.col("chrom_prominence_scaled") >= chrom_prominence_scaled)
229
- else:
230
- warnings.append("'chrom_prominence_scaled' column not found in features_df")
231
-
232
- # Filter by scaled chromatogram height
233
- if chrom_height_scaled is not None:
234
- if "chrom_height_scaled" in available_columns:
235
- if isinstance(chrom_height_scaled, tuple) and len(chrom_height_scaled) == 2:
236
- min_height_scaled, max_height_scaled = chrom_height_scaled
237
- filter_conditions.append(
238
- (pl.col("chrom_height_scaled") >= min_height_scaled)
239
- & (pl.col("chrom_height_scaled") <= max_height_scaled),
240
- )
241
- else:
242
- filter_conditions.append(pl.col("chrom_height_scaled") >= chrom_height_scaled)
243
- else:
244
- warnings.append("'chrom_height_scaled' column not found in features_df")
245
-
246
- # Log warnings once at the end
247
- for warning in warnings:
248
- self.logger.warning(warning)
249
-
250
- # Apply all filters at once if any exist
251
- if filter_conditions:
252
- # Combine all conditions with AND
253
- combined_filter = filter_conditions[0]
254
- for condition in filter_conditions[1:]:
255
- combined_filter = combined_filter & condition
256
-
257
- # Apply the combined filter using lazy evaluation for better performance
258
- feats = self.features_df.lazy().filter(combined_filter).collect()
259
- else:
260
- feats = self.features_df.clone()
261
-
262
- final_count = len(feats)
263
-
264
- if final_count == 0:
265
- self.logger.warning("No features remaining after applying selection criteria.")
266
- else:
267
- removed_count = initial_count - final_count
268
- self.logger.info(f"Features selected: {final_count} (removed: {removed_count})")
269
-
270
- return feats
271
-
272
-
273
- def features_select_benchmarked(
274
- self,
275
- mz=None,
276
- rt=None,
277
- inty=None,
278
- sample_uid=None,
279
- sample_name=None,
280
- consensus_uid=None,
281
- feature_uid=None,
282
- filled=None,
283
- quality=None,
284
- chrom_coherence=None,
285
- chrom_prominence=None,
286
- chrom_prominence_scaled=None,
287
- chrom_height_scaled=None,
288
- ):
289
- """
290
- Benchmarked version that compares old vs new implementation performance.
291
- """
292
- import time
293
-
294
- # Call the original method for comparison
295
- start_time = time.perf_counter()
296
- _ = self.features_select_original(
297
- mz=mz,
298
- rt=rt,
299
- inty=inty,
300
- sample_uid=sample_uid,
301
- sample_name=sample_name,
302
- consensus_uid=consensus_uid,
303
- feature_uid=feature_uid,
304
- filled=filled,
305
- quality=quality,
306
- chrom_coherence=chrom_coherence,
307
- chrom_prominence=chrom_prominence,
308
- chrom_prominence_scaled=chrom_prominence_scaled,
309
- chrom_height_scaled=chrom_height_scaled,
310
- )
311
- original_time = time.perf_counter() - start_time
312
-
313
- # Call the optimized method
314
- start_time = time.perf_counter()
315
- result_optimized = features_select_optimized(
316
- self,
317
- mz=mz,
318
- rt=rt,
319
- inty=inty,
320
- sample_uid=sample_uid,
321
- sample_name=sample_name,
322
- consensus_uid=consensus_uid,
323
- feature_uid=feature_uid,
324
- filled=filled,
325
- quality=quality,
326
- chrom_coherence=chrom_coherence,
327
- chrom_prominence=chrom_prominence,
328
- chrom_prominence_scaled=chrom_prominence_scaled,
329
- chrom_height_scaled=chrom_height_scaled,
330
- )
331
- optimized_time = time.perf_counter() - start_time
332
-
333
- # Log performance comparison
334
- speedup = original_time / optimized_time if optimized_time > 0 else float("inf")
335
- self.logger.info(
336
- f"Performance comparison - Original: {original_time:.4f}s, Optimized: {optimized_time:.4f}s, Speedup: {speedup:.2f}x",
337
- )
338
-
339
- return result_optimized
340
-
341
-
342
- def monkey_patch_study():
343
- """
344
- Apply the optimized features_select method to the Study class.
345
-
346
- Call this function to replace the original features_select with the optimized version.
347
- """
348
- from masster.study.study import Study
349
-
350
- # Store original method for benchmarking
351
- Study.features_select_original = Study.features_select
352
-
353
- # Replace with optimized version
354
- Study.features_select = features_select_optimized
355
-
356
- # Add benchmarked version as an option
357
- Study.features_select_benchmarked = features_select_benchmarked
358
-
359
- print("Successfully patched Study.features_select with optimized version")
@@ -1,131 +0,0 @@
1
- Metadata-Version: 2.4
2
- Name: masster
3
- Version: 0.4.6
4
- Summary: Mass spectrometry data analysis package
5
- Author: Zamboni Lab
6
- License-Expression: AGPL-3.0-only
7
- Project-URL: homepage, https://github.com/zamboni-lab/masster
8
- Project-URL: repository, https://github.com/zamboni-lab/masster
9
- Project-URL: documentation, https://github.com/zamboni-lab/masster#readme
10
- Keywords: mass spectrometry,metabolomics,lc-ms,chromatography
11
- Classifier: Development Status :: 3 - Alpha
12
- Classifier: Intended Audience :: Science/Research
13
- Classifier: Operating System :: OS Independent
14
- Classifier: Programming Language :: Python :: 3
15
- Classifier: Programming Language :: Python :: 3.11
16
- Classifier: Programming Language :: Python :: 3.12
17
- Classifier: Programming Language :: Python :: 3.13
18
- Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
19
- Classifier: Topic :: Scientific/Engineering :: Chemistry
20
- Requires-Python: >=3.11
21
- Description-Content-Type: text/markdown
22
- License-File: LICENSE
23
- Requires-Dist: alpharaw>=0.4.8
24
- Requires-Dist: bokeh>=3.7.3
25
- Requires-Dist: datashader>=0.18.1
26
- Requires-Dist: holoviews>=1.21.0
27
- Requires-Dist: h5py>=3.14.0
28
- Requires-Dist: hvplot>=0.11.3
29
- Requires-Dist: loguru>=0.7.3
30
- Requires-Dist: numpy>=2.0.0
31
- Requires-Dist: marimo>=0.14.16
32
- Requires-Dist: matplotlib>=3.8.0
33
- Requires-Dist: pandas>=2.2.0
34
- Requires-Dist: panel>=1.7.0
35
- Requires-Dist: pyopenms>=3.3.0
36
- Requires-Dist: pyteomics>=4.7.0
37
- Requires-Dist: pythonnet>=3.0.0
38
- Requires-Dist: tqdm>=4.65.0
39
- Requires-Dist: openpyxl>=3.1.5
40
- Requires-Dist: cmap>=0.6.2
41
- Requires-Dist: altair>=5.5.0
42
- Requires-Dist: scikit-learn>=1.7.1
43
- Requires-Dist: ipython>=9.4.0
44
- Requires-Dist: scipy>=1.14.1
45
- Requires-Dist: polars>=1.32.3
46
- Provides-Extra: dev
47
- Requires-Dist: pytest>=7.0.0; extra == "dev"
48
- Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
49
- Requires-Dist: pytest-mock>=3.10.0; extra == "dev"
50
- Requires-Dist: black>=23.0.0; extra == "dev"
51
- Requires-Dist: flake8>=5.0.0; extra == "dev"
52
- Requires-Dist: mypy>=1.0.0; extra == "dev"
53
- Requires-Dist: pre-commit>=3.0.0; extra == "dev"
54
- Requires-Dist: twine>=4.0.0; extra == "dev"
55
- Requires-Dist: build>=0.10.0; extra == "dev"
56
- Requires-Dist: safety>=2.0.0; extra == "dev"
57
- Requires-Dist: bandit>=1.7.0; extra == "dev"
58
- Requires-Dist: pyyaml>=6.0; extra == "dev"
59
- Provides-Extra: docs
60
- Requires-Dist: sphinx>=5.0.0; extra == "docs"
61
- Requires-Dist: sphinx-rtd-theme>=1.2.0; extra == "docs"
62
- Requires-Dist: sphinxcontrib-napoleon>=0.7; extra == "docs"
63
- Provides-Extra: test
64
- Requires-Dist: pytest>=7.0.0; extra == "test"
65
- Requires-Dist: pytest-cov>=4.0.0; extra == "test"
66
- Requires-Dist: pytest-mock>=3.10.0; extra == "test"
67
- Requires-Dist: coverage>=7.0.0; extra == "test"
68
- Dynamic: license-file
69
-
70
- # MASSter
71
-
72
- **MASSter** is a comprehensive Python package for mass spectrometry data analysis, designed for metabolomics and LC-MS data processing. It provides tools for feature detection, alignment, consensus building, and interactive visualization of mass spectrometry datasets. It is designed to deal with DDA, and hides functionalities for DIA and ZTScan DIA data.
73
-
74
- This is a poorly documented, stable branch of the development codebase in use in the Zamboni lab.
75
-
76
- Some of the core processing functions are derived from OpenMS. We use the same nomenclature and refer to their documentation for an explanation of the parameters. To a large extent, however, you should be able to use the defaults (=no parameters) when calling processing steps.
77
-
78
-
79
- ## Installation
80
-
81
- ```bash
82
- pip install master
83
- ```
84
-
85
- ### Basic Workflow for analyzing LC-MS study with 2-... samples
86
-
87
- ```python
88
- import master
89
-
90
- # Initialize the Study object with the default folder
91
- study = master.Study(default_folder=r'D:\...\mylcms')
92
-
93
- # Load data from folder with raw data, here: WIFF
94
- study.add(r'D:\...\...\...\*.wiff')
95
-
96
- # Perform retention time correction
97
- study.align(rt_max_diff=2.0)
98
- study.plot_alignment()
99
-
100
- # Find consensus features
101
- study.merge(min_samples=3)
102
- study.plot_consensus_2d()
103
-
104
- # Retrieve missing data for quantification
105
- study.fill()
106
-
107
- # Integrate according to consensus metadata
108
- study.integrate()
109
-
110
- # export results
111
- study.export_mgf()
112
- study.export_mztab()
113
- study.export_consensus()
114
-
115
- # Save the study to .study5
116
- study.save()
117
- ```
118
-
119
- ## Requirements
120
-
121
- - Python ≥ 3.11
122
- - Key dependencies: pandas, polars, numpy, scipy, matplotlib, bokeh, holoviews, panel
123
- - See `pyproject.toml` for complete dependency list
124
-
125
- ## License
126
-
127
- GNU Affero General Public License v3
128
-
129
- ## Citation
130
-
131
- If you use Master in your research, please cite this repository.
@@ -1 +0,0 @@
1
- masster