masster 0.2.2__tar.gz → 0.2.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of masster might be problematic. Click here for more details.

Files changed (72) hide show
  1. {masster-0.2.2 → masster-0.2.4}/PKG-INFO +1 -120
  2. masster-0.2.4/README.md +58 -0
  3. {masster-0.2.2 → masster-0.2.4}/pyproject.toml +1 -1
  4. {masster-0.2.2 → masster-0.2.4}/src/masster/_version.py +1 -1
  5. {masster-0.2.2 → masster-0.2.4}/src/masster/sample/save.py +149 -89
  6. {masster-0.2.2 → masster-0.2.4}/src/masster/study/save.py +0 -6
  7. {masster-0.2.2 → masster-0.2.4}/src/masster/study/study.py +23 -0
  8. {masster-0.2.2 → masster-0.2.4}/uv.lock +1 -1
  9. masster-0.2.2/README.md +0 -177
  10. {masster-0.2.2 → masster-0.2.4}/.github/workflows/publish.yml +0 -0
  11. {masster-0.2.2 → masster-0.2.4}/.github/workflows/security.yml +0 -0
  12. {masster-0.2.2 → masster-0.2.4}/.github/workflows/test.yml +0 -0
  13. {masster-0.2.2 → masster-0.2.4}/.gitignore +0 -0
  14. {masster-0.2.2 → masster-0.2.4}/.pre-commit-config.yaml +0 -0
  15. {masster-0.2.2 → masster-0.2.4}/LICENSE +0 -0
  16. {masster-0.2.2 → masster-0.2.4}/Makefile +0 -0
  17. {masster-0.2.2 → masster-0.2.4}/TESTING.md +0 -0
  18. {masster-0.2.2 → masster-0.2.4}/demo/example_batch_process.py +0 -0
  19. {masster-0.2.2 → masster-0.2.4}/demo/example_sample_process.py +0 -0
  20. {masster-0.2.2 → masster-0.2.4}/src/masster/__init__.py +0 -0
  21. {masster-0.2.2 → masster-0.2.4}/src/masster/chromatogram.py +0 -0
  22. {masster-0.2.2 → masster-0.2.4}/src/masster/data/examples/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.mzML +0 -0
  23. {masster-0.2.2 → masster-0.2.4}/src/masster/data/examples/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.timeseries.data +0 -0
  24. {masster-0.2.2 → masster-0.2.4}/src/masster/data/examples/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.wiff +0 -0
  25. {masster-0.2.2 → masster-0.2.4}/src/masster/data/examples/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.wiff.scan +0 -0
  26. {masster-0.2.2 → masster-0.2.4}/src/masster/data/examples/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.wiff2 +0 -0
  27. {masster-0.2.2 → masster-0.2.4}/src/masster/logger.py +0 -0
  28. {masster-0.2.2 → masster-0.2.4}/src/masster/sample/__init__.py +0 -0
  29. {masster-0.2.2 → masster-0.2.4}/src/masster/sample/defaults/__init__.py +0 -0
  30. {masster-0.2.2 → masster-0.2.4}/src/masster/sample/defaults/find_adducts_def.py +0 -0
  31. {masster-0.2.2 → masster-0.2.4}/src/masster/sample/defaults/find_features_def.py +0 -0
  32. {masster-0.2.2 → masster-0.2.4}/src/masster/sample/defaults/find_ms2_def.py +0 -0
  33. {masster-0.2.2 → masster-0.2.4}/src/masster/sample/defaults/get_spectrum_def.py +0 -0
  34. {masster-0.2.2 → masster-0.2.4}/src/masster/sample/defaults/sample_def.py +0 -0
  35. {masster-0.2.2 → masster-0.2.4}/src/masster/sample/h5.py +0 -0
  36. {masster-0.2.2 → masster-0.2.4}/src/masster/sample/helpers.py +0 -0
  37. {masster-0.2.2 → masster-0.2.4}/src/masster/sample/load.py +0 -0
  38. {masster-0.2.2 → masster-0.2.4}/src/masster/sample/parameters.py +0 -0
  39. {masster-0.2.2 → masster-0.2.4}/src/masster/sample/plot.py +0 -0
  40. {masster-0.2.2 → masster-0.2.4}/src/masster/sample/processing.py +0 -0
  41. {masster-0.2.2 → masster-0.2.4}/src/masster/sample/sample.py +0 -0
  42. {masster-0.2.2 → masster-0.2.4}/src/masster/sample/sample5_schema.json +0 -0
  43. {masster-0.2.2 → masster-0.2.4}/src/masster/spectrum.py +0 -0
  44. {masster-0.2.2 → masster-0.2.4}/src/masster/study/__init__.py +0 -0
  45. {masster-0.2.2 → masster-0.2.4}/src/masster/study/defaults/__init__.py +0 -0
  46. {masster-0.2.2 → masster-0.2.4}/src/masster/study/defaults/align_def.py +0 -0
  47. {masster-0.2.2 → masster-0.2.4}/src/masster/study/defaults/export_def.py +0 -0
  48. {masster-0.2.2 → masster-0.2.4}/src/masster/study/defaults/fill_chrom_def.py +0 -0
  49. {masster-0.2.2 → masster-0.2.4}/src/masster/study/defaults/find_consensus_def.py +0 -0
  50. {masster-0.2.2 → masster-0.2.4}/src/masster/study/defaults/find_ms2_def.py +0 -0
  51. {masster-0.2.2 → masster-0.2.4}/src/masster/study/defaults/integrate_chrom_def.py +0 -0
  52. {masster-0.2.2 → masster-0.2.4}/src/masster/study/defaults/study_def.py +0 -0
  53. {masster-0.2.2 → masster-0.2.4}/src/masster/study/export.py +0 -0
  54. {masster-0.2.2 → masster-0.2.4}/src/masster/study/h5.py +0 -0
  55. {masster-0.2.2 → masster-0.2.4}/src/masster/study/helpers.py +0 -0
  56. {masster-0.2.2 → masster-0.2.4}/src/masster/study/load.py +0 -0
  57. {masster-0.2.2 → masster-0.2.4}/src/masster/study/parameters.py +0 -0
  58. {masster-0.2.2 → masster-0.2.4}/src/masster/study/plot.py +0 -0
  59. {masster-0.2.2 → masster-0.2.4}/src/masster/study/processing.py +0 -0
  60. {masster-0.2.2 → masster-0.2.4}/src/masster/study/study5_schema.json +0 -0
  61. {masster-0.2.2 → masster-0.2.4}/tests/conftest.py +0 -0
  62. {masster-0.2.2 → masster-0.2.4}/tests/test_chromatogram.py +0 -0
  63. {masster-0.2.2 → masster-0.2.4}/tests/test_defaults.py +0 -0
  64. {masster-0.2.2 → masster-0.2.4}/tests/test_imports.py +0 -0
  65. {masster-0.2.2 → masster-0.2.4}/tests/test_integration.py +0 -0
  66. {masster-0.2.2 → masster-0.2.4}/tests/test_logger.py +0 -0
  67. {masster-0.2.2 → masster-0.2.4}/tests/test_parameters.py +0 -0
  68. {masster-0.2.2 → masster-0.2.4}/tests/test_sample.py +0 -0
  69. {masster-0.2.2 → masster-0.2.4}/tests/test_spectrum.py +0 -0
  70. {masster-0.2.2 → masster-0.2.4}/tests/test_study.py +0 -0
  71. {masster-0.2.2 → masster-0.2.4}/tests/test_version.py +0 -0
  72. {masster-0.2.2 → masster-0.2.4}/tox.ini +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: masster
3
- Version: 0.2.2
3
+ Version: 0.2.4
4
4
  Summary: Mass spectrometry data analysis package
5
5
  Project-URL: homepage, https://github.com/zamboni-lab/masster
6
6
  Project-URL: repository, https://github.com/zamboni-lab/masster
@@ -736,23 +736,12 @@ Most core processing functions are derived from OpenMS. We use the same nomencla
736
736
 
737
737
  This is a poorly documented, stable branch of the development codebase in use in the Zamboni lab. Novel functionalities will be added based on need and requests.
738
738
 
739
- ## Features
740
-
741
- - **Mass spectrometry data processing**: Support for multiple file formats (.wiff, .mzML, .raw, .mzpkl)
742
- - **Feature detection and alignment**: Automated chromatographic peak detection and retention time alignment
743
- - **Consensus feature building**: Identification of features across multiple samples
744
- - **Interactive visualizations**: 2D plots, chromatograms, and statistical dashboards
745
- - **Batch processing**: Process entire studies with multiple samples
746
- - **Export capabilities**: MGF export for spectral library searches
747
-
748
739
  ## Installation
749
740
 
750
741
  ```bash
751
742
  pip install masster
752
743
  ```
753
744
 
754
- ## Quick Start
755
-
756
745
  ### Basic Workflow for analyzing LC-MS study with 2-... samples
757
746
 
758
747
  ```python
@@ -784,114 +773,6 @@ study.export_mgf()
784
773
  study.save()
785
774
  ```
786
775
 
787
- ### Study-Level Plots
788
-
789
- ```python
790
- # Plot features from multiple samples
791
- study.plot_samples_2d()
792
-
793
- # Plot consensus features
794
- study.plot_consensus_2d()
795
-
796
- # Plot overlaid chromatograms for specific consensus features (use their uid)
797
- study.plot_chrom(uids=[1, 2, 3])
798
- ```
799
-
800
-
801
- ### Single Sample Processing
802
-
803
- ```python
804
- from masster.sample import Sample
805
-
806
- # Load a single sample (mzML, RAW, WIFF)
807
- sample = Sample("path/to/your/file.mzML")
808
-
809
- # Detect features
810
- sample.find_features(chrom_peak_snr=10, noise=500, chrom_fwhm=1.0)
811
-
812
- # Detect adducts
813
- sample.find_adducts()
814
-
815
- # Find MS2 spectra
816
- sample.find_ms2()
817
-
818
- # Save results to .sample5
819
- sample.save()
820
- ```
821
-
822
- Masster provides extensive plotting capabilities for data exploration and quality control:
823
-
824
- ### Single sample visualization
825
-
826
- ```python
827
- # Plot 2D overview of MS data with detected features
828
- sample.plot_2d(
829
- filename="overview_2d.html",
830
- show_features=True,
831
- show_ms2=True,
832
- title="MS Data Overview"
833
- )
834
-
835
- # Plot with feature filtering
836
- sample.plot_2d(
837
- filename="features_ms2_only.html"
838
- )
839
-
840
- # Plot extracted ion chromatogram
841
- sample.plot_eic(
842
- feature_uid=123,
843
- rt_tol=10,
844
- mz_tol=0.005
845
- )
846
- ```
847
-
848
- ### Quality Control Plots
849
-
850
- ```python
851
- # Plot DDA acquisition statistics
852
- sample.plot_dda_stats(filename="dda_stats.html")
853
-
854
- # Plot feature statistics
855
- sample.plot_feature_stats(filename="feature_stats.html")
856
-
857
- # Plot total ion chromatogram
858
- sample.plot_tic(filename="tic.html")
859
- ```
860
-
861
- ### Advanced Plotting Options
862
-
863
- ```python
864
- # Plot with Oracle annotation data
865
- sample.plot_2d_oracle(
866
- oracle_folder="path/to/oracle/results",
867
- colorby="hg", # Color by chemical class
868
- filename="annotated_features.html"
869
- )
870
-
871
- # Plot MS2 cycle view
872
- sample.plot_ms2_cycle(
873
- cycle=100,
874
- filename="ms2_cycle.html",
875
- centroid=True
876
- )
877
- ```
878
-
879
- ## File Format Support
880
-
881
- - **Input formats**: .wiff, .mzML, .raw files
882
- - **Intermediate formats**: .sample5 and .study5 (HDF5) for fast loading
883
- - **Export formats**: .mgf, .csv
884
- - **Visualization**: .html (interactive), .png, .svg
885
-
886
- ## Advanced Features
887
-
888
- ### Batch Processing
889
- Use the command-line interface for processing multiple files:
890
-
891
- ```bash
892
- python -m masster.demo.example_batch_process input_directory --recursive --dest output_directory
893
- ```
894
-
895
776
  ## Requirements
896
777
 
897
778
  - Python ≥ 3.11
@@ -0,0 +1,58 @@
1
+ # MASSter
2
+
3
+ **MASSter** is a comprehensive Python package for mass spectrometry data analysis, designed for metabolomics and LC-MS data processing. It provides tools for feature detection, alignment, consensus building, and interactive visualization of mass spectrometry datasets. It is designed to deal with DDA, and hides functionalities for DIA and ZTScan DIA data.
4
+
5
+ Most core processing functions are derived from OpenMS. We use the same nomenclature and refer to their documentation for an explanation of the parameters. To a large extent, however, you should be able to use the defaults (=no parameters) when calling processing steps.
6
+
7
+ This is a poorly documented, stable branch of the development codebase in use in the Zamboni lab. Novel functionalities will be added based on need and requests.
8
+
9
+ ## Installation
10
+
11
+ ```bash
12
+ pip install masster
13
+ ```
14
+
15
+ ### Basic Workflow for analyzing LC-MS study with 2-... samples
16
+
17
+ ```python
18
+ import masster
19
+
20
+ # Initialize the Study object with the default folder
21
+ study = masster.Study(default_folder=r'D:\...\mylcms')
22
+
23
+ # Load data from folder with raw data, here: WIFF
24
+ study.add_folder(r'D:\...\...\...\*.wiff')
25
+
26
+ # Align maps
27
+ study.align(rt_max_diff=2.0)
28
+
29
+ # Find consensus features
30
+ study.find_consensus(min_samples=3)
31
+
32
+ # Retrieve missing data for quantification
33
+ study.fill_chrom(abs_)
34
+
35
+ # Integrate according to consensus metadata
36
+ study.integrate_chrom()
37
+
38
+ # link MS2 across the whole study and export them
39
+ study.find_ms2()
40
+ study.export_mgf()
41
+
42
+ # Save the study to .study5
43
+ study.save()
44
+ ```
45
+
46
+ ## Requirements
47
+
48
+ - Python ≥ 3.11
49
+ - Key dependencies: pandas, polars, numpy, scipy, matplotlib, bokeh, holoviews, panel
50
+ - See `pyproject.toml` for complete dependency list
51
+
52
+ ## License
53
+
54
+ GNU Affero General Public License v3
55
+
56
+ ## Citation
57
+
58
+ If you use Masster in your research, please cite this repository.
@@ -1,7 +1,7 @@
1
1
 
2
2
  [project]
3
3
  name = "masster"
4
- version = "0.2.2"
4
+ version = "0.2.4"
5
5
  description = "Mass spectrometry data analysis package"
6
6
  authors = [
7
7
  { name = "Zamboni Lab" }
@@ -1,7 +1,7 @@
1
1
  from __future__ import annotations
2
2
 
3
3
 
4
- __version__ = "0.2.2"
4
+ __version__ = "0.2.4"
5
5
 
6
6
 
7
7
  def get_version():
@@ -71,6 +71,23 @@ from masster.spectrum import combine_peaks
71
71
 
72
72
 
73
73
  def save(self, filename=None):
74
+ """
75
+ Save the current object to a file in the '.sample5' format.
76
+
77
+ If `filename` is not provided, the method attempts to use `self.file_path` as the base name,
78
+ replacing its extension with '.sample5'. If neither `filename` nor `self.file_path` is available,
79
+ a ValueError is raised.
80
+
81
+ If `filename` is provided and `self.file_path` is an absolute path, the extension of `filename`
82
+ is replaced with '.sample5'. Otherwise, if `self.file_path` is available, its extension is replaced
83
+ with '.sample5'. If neither is available, a ValueError is raised.
84
+
85
+ Parameters:
86
+ filename (str, optional): The name of the file to save to. If not provided, uses `self.file_path`.
87
+
88
+ Returns:
89
+ None
90
+ """
74
91
  if filename is None:
75
92
  # save to default file name
76
93
  if self.file_path is not None:
@@ -98,20 +115,43 @@ def _save_featureXML(self, filename="features.featureXML"):
98
115
 
99
116
 
100
117
  def export_features(self, filename="features.csv"):
101
- # COMMENT: cannot export lists to CSV. Could be exported to Parquet
102
- # COMMENT: removing problematic columns for now
118
+ """
119
+ Export the features DataFrame to a CSV or Excel file.
120
+
121
+ This method clones the internal features DataFrame, adds a boolean column 'has_ms2' indicating
122
+ whether the 'ms2_scans' column is not null, and exports the resulting DataFrame to the specified file.
123
+ Columns with data types 'List' or 'Object' are excluded from the export.
124
+
125
+ Parameters:
126
+ filename (str): The path to the output file. If the filename ends with '.xls' or '.xlsx',
127
+ the data is exported in Excel format; otherwise, it is exported as CSV.
128
+ Defaults to 'features.csv'.
129
+
130
+ Side Effects:
131
+ Writes the exported data to the specified file and logs the export operation.
132
+ """
133
+ # clone df
134
+ clean_df = self.features_df.clone()
135
+ filename = os.path.abspath(filename)
136
+ # add a column has_ms2=True if colum ms2_scans is not None
137
+ if "ms2_scans" in clean_df.columns:
138
+ clean_df = clean_df.with_columns(
139
+ (pl.col("ms2_scans").is_not_null()).alias("has_ms2")
140
+ )
103
141
  clean_df = self.features_df.select([
104
- col
105
- for col in self.features_df.columns
106
- if self.features_df[col].dtype not in (pl.List, pl.Object)
142
+ col for col in self.features_df.columns if self.features_df[col].dtype not in (pl.List, pl.Object)
107
143
  ])
108
- clean_df.write_csv(filename)
109
- self.logger.info(f"Features exported to {filename}")
144
+ if filename.lower().endswith((".xls", ".xlsx")):
145
+ clean_df.to_pandas().to_excel(filename, index=False)
146
+ self.logger.info(f"Features exported to {filename} (Excel format)")
147
+ else:
148
+ clean_df.write_csv(filename)
149
+ self.logger.info(f"Features exported to {filename}")
110
150
 
111
151
 
112
152
  def export_mgf(
113
153
  self,
114
- filename:str="features.mgf",
154
+ filename: str = "features.mgf",
115
155
  use_cache=True,
116
156
  selection="best",
117
157
  split_energy=True,
@@ -128,7 +168,6 @@ def export_mgf(
128
168
  q1_ratio_max=None,
129
169
  eic_corr_min=None,
130
170
  deisotope=True,
131
- verbose=False,
132
171
  precursor_trim=-(-10.0),
133
172
  centroid_algo=None,
134
173
  ):
@@ -175,14 +214,28 @@ def export_mgf(
175
214
  return
176
215
  else:
177
216
  self.features_df = self.features.get_df()
217
+
218
+ # Apply filtering at DataFrame level for better performance
178
219
  features = self.features_df
179
- # iterate over all features
220
+ if mz_start is not None:
221
+ features = features.filter(pl.col("mz") >= mz_start)
222
+ if mz_end is not None:
223
+ features = features.filter(pl.col("mz") <= mz_end)
224
+ if rt_start is not None:
225
+ features = features.filter(pl.col("rt") >= rt_start)
226
+ if rt_end is not None:
227
+ features = features.filter(pl.col("rt") <= rt_end)
228
+ if not include_all_ms1:
229
+ features = features.filter(pl.col("ms2_scans").is_not_null())
230
+
231
+ # Convert to list of dictionaries for faster iteration
232
+ features_list = features.to_dicts()
180
233
 
181
234
  def filter_peaks(spec, inty_min=None, q1_min=None, eic_min=None, q1_max=None):
182
235
  # create a copy of the spectrum
183
236
  spec = spec.copy()
184
- l = len(spec.mz)
185
- mask = [True] * l
237
+ spec_len = len(spec.mz)
238
+ mask = [True] * spec_len
186
239
  if inty_min is not None and inty_min > 0:
187
240
  mask = np.array(mask) & (spec.inty >= inty_min)
188
241
  # check if q1_ratio is an attribute of spec
@@ -201,9 +254,9 @@ def export_mgf(
201
254
  getattr(spec, attr),
202
255
  np.ndarray,
203
256
  ):
204
- # check if attr has attribute 0 and its length is equal to l:
257
+ # check if attr has attribute 0 and its length is equal to spec_len:
205
258
  if hasattr(getattr(spec, attr), "__len__"):
206
- if len(getattr(spec, attr)) == l:
259
+ if len(getattr(spec, attr)) == spec_len:
207
260
  setattr(spec, attr, getattr(spec, attr)[mask])
208
261
  return spec
209
262
 
@@ -218,47 +271,54 @@ def export_mgf(
218
271
  else:
219
272
  f.write(f"MSLEVEL={spect.ms_level}\n")
220
273
  if spect.ms_level is not None:
221
- if spect.ms_level > 1 and "energy" in spect.__dict__:
274
+ if spect.ms_level > 1 and hasattr(spect, "energy"):
222
275
  f.write(f"ENERGY={spect.energy}\n")
223
- for mz, inty in zip(spect.mz, spect.inty, strict=False):
224
- f.write(f"{mz:.5f} {inty:.0f}\n")
276
+ # Use list comprehension for better performance
277
+ peak_lines = [f"{mz_val:.5f} {inty_val:.0f}\n" for mz_val, inty_val in zip(spect.mz, spect.inty, strict=False)]
278
+ f.writelines(peak_lines)
225
279
  f.write("END IONS\n\n")
226
280
 
227
281
  if centroid_algo is None:
228
- if "centroid_algo" in self.parameters:
229
- centroid_algo = self.parameters["centroid_algo"]
282
+ if hasattr(self.parameters, "centroid_algo"):
283
+ centroid_algo = self.parameters.centroid_algo
230
284
  else:
231
285
  centroid_algo = "cr"
232
286
 
287
+ # count how many features have charge < 0
288
+ if self.features_df.filter(pl.col("charge") < 0).shape[0]- self.features_df.filter(pl.col("charge") > 0).shape[0] > 0:
289
+ preferred_charge = -1
290
+ else:
291
+ preferred_charge = 1
292
+
233
293
  c = 0
234
294
  skip = 0
235
295
  # check if features is empty
236
- if len(features) == 0:
296
+ if len(features_list) == 0:
237
297
  self.logger.warning("No features found.")
238
298
  return
299
+ filename = os.path.abspath(filename)
239
300
  with open(filename, "w", encoding="utf-8") as f:
240
301
  tdqm_disable = self.log_level not in ["TRACE", "DEBUG", "INFO"]
241
- for i in tqdm(
242
- range(len(features)),
243
- total=len(features),
302
+ for row in tqdm(
303
+ features_list,
304
+ total=len(features_list),
244
305
  desc=f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')[:-3]} | INFO | {self.log_label}Export MGF",
245
306
  disable=tdqm_disable,
246
307
  ):
247
- row = features.row(i, named=True)
248
- if mz_start is not None and row["mz"] < mz_start:
249
- continue
250
- if mz_end is not None and row["mz"] > mz_end:
251
- continue
252
- if rt_start is not None and row["rt"] < rt_start:
253
- continue
254
- if rt_end is not None and row["rt"] > rt_end:
255
- continue
308
+ # Pre-calculate common values
309
+ feature_uid = row["feature_uid"]
310
+ mz = row["mz"]
311
+ rt = row["rt"]
312
+ rt_str = f"{rt:.2f}"
313
+ mz_str = f"{mz:.4f}"
314
+
315
+ # Filtering is now done at DataFrame level, so we can skip these checks
256
316
  if row["ms2_scans"] is None and not include_all_ms1:
257
317
  skip = skip + 1
258
318
  continue
259
319
 
260
320
  # write MS1 spectrum
261
- ms1_scan_uid = self.find_closest_scan(rt=row["rt"])["scan_uid"]
321
+ ms1_scan_uid = self.find_closest_scan(rt=rt)["scan_uid"]
262
322
  spect = self.get_spectrum(
263
323
  ms1_scan_uid,
264
324
  centroid=centroid,
@@ -271,17 +331,21 @@ def export_mgf(
271
331
  if not full_ms1:
272
332
  # trim spectrum to region around the precursor, it's wide to potentially identify adducts
273
333
  spect = spect.trim(
274
- mz_min=row["mz"] - 50,
275
- mz_max=row["mz"] + 50,
334
+ mz_min=mz - 50,
335
+ mz_max=mz + 50,
276
336
  )
277
337
 
338
+ charge = preferred_charge
339
+ if row["charge"] is not None and row["charge"] != 0:
340
+ charge = row["charge"]
341
+
278
342
  write_ion(
279
343
  f,
280
- f"fid:{row['feature_uid']}, rt:{row['rt']:.2f}, mz:{row['mz']:.4f}",
281
- row["feature_uid"],
282
- row["mz"],
283
- row["rt"],
284
- row["charge"],
344
+ f"feature_uid:{feature_uid}, rt:{rt_str}, mz:{mz_str}",
345
+ feature_uid,
346
+ mz,
347
+ rt,
348
+ charge,
285
349
  spect,
286
350
  )
287
351
 
@@ -319,29 +383,24 @@ def export_mgf(
319
383
  q1_max=q1_ratio_max,
320
384
  )
321
385
  # Get the corresponding scan_uid from the list
322
- current_scan_uid = (
323
- scan_uids[i] if i < len(scan_uids) else "unknown"
324
- )
386
+ current_scan_uid = scan_uids[i] if i < len(scan_uids) else "unknown"
325
387
  write_ion(
326
388
  f,
327
- f"fid:{row['feature_uid']}, rt:{row['rt']:.2f}, mz:{row['mz']:.4f}, scan_uid:{current_scan_uid}",
328
- row["feature_uid"],
329
- row["mz"],
330
- row["rt"],
331
- row["charge"],
389
+ f"fid:{feature_uid}, rt:{rt_str}, mz:{mz_str}, scan_uid:{current_scan_uid}",
390
+ feature_uid,
391
+ mz,
392
+ rt,
393
+ charge,
332
394
  s,
333
395
  )
396
+ c += 1
334
397
  elif split_energy:
335
398
  # get energy of all scans with scan_uid in ms2_scans
336
399
  energy = [s.energy for s in row["ms2_specs"]]
337
400
  # find unique energies
338
401
  unique_energies = list(set(energy))
339
402
  for e in unique_energies:
340
- ms2_scans = [
341
- row["ms2_scans"][i]
342
- for i, s in enumerate(row["ms2_specs"])
343
- if s.energy == e
344
- ]
403
+ ms2_scans = [row["ms2_scans"][i] for i, s in enumerate(row["ms2_specs"]) if s.energy == e]
345
404
  if selection == "best":
346
405
  # Keep as list with single element
347
406
  ms2_scans = [ms2_scans[0]]
@@ -362,13 +421,14 @@ def export_mgf(
362
421
  )
363
422
  write_ion(
364
423
  f,
365
- f"fid:{row['feature_uid']}, rt:{row['rt']:.2f}, mz:{row['mz']:.4f}, scan_uid:{scan_uid}, energy:{e}",
366
- row["feature_uid"],
367
- row["mz"],
368
- row["rt"],
369
- row["charge"],
424
+ f"fid:{feature_uid}, rt:{rt_str}, mz:{mz_str}, scan_uid:{scan_uid}, energy:{e}",
425
+ feature_uid,
426
+ mz,
427
+ rt,
428
+ charge,
370
429
  spect,
371
430
  )
431
+ c += 1
372
432
  else:
373
433
  if selection == "best":
374
434
  ms2_scans = row["ms2_scans"][0]
@@ -388,13 +448,14 @@ def export_mgf(
388
448
  )
389
449
  write_ion(
390
450
  f,
391
- f"fid:{row['feature_uid']}, rt:{row['rt']:.2f}, mz:{row['mz']:.4f}, scan_uid:{ms2_scans}",
392
- row["feature_uid"],
393
- row["mz"],
394
- row["rt"],
395
- row["charge"],
451
+ f"fid:{feature_uid}, rt:{rt_str}, mz:{mz_str}, scan_uid:{ms2_scans}",
452
+ feature_uid,
453
+ mz,
454
+ rt,
455
+ charge,
396
456
  spect,
397
457
  )
458
+ c += 1
398
459
  elif selection == "all":
399
460
  if merge:
400
461
  specs = []
@@ -414,23 +475,19 @@ def export_mgf(
414
475
  spect = spect.centroid(
415
476
  tolerance=self.parameters["mz_tol_ms1_da"],
416
477
  ppm=self.parameters["mz_tol_ms1_ppm"],
417
- min_points=self.parameters[
418
- "centroid_min_points_ms1"
419
- ],
478
+ min_points=self.parameters["centroid_min_points_ms1"],
420
479
  algo=centroid_algo,
421
480
  )
422
481
  elif spect.ms_level == 2:
423
482
  spect = spect.centroid(
424
483
  tolerance=self.parameters["mz_tol_ms2_da"],
425
484
  ppm=self.parameters["mz_tol_ms2_ppm"],
426
- min_points=self.parameters[
427
- "centroid_min_points_ms2"
428
- ],
485
+ min_points=self.parameters["centroid_min_points_ms2"],
429
486
  algo=centroid_algo,
430
487
  )
431
488
  if deisotope:
432
489
  spect = spect.deisotope()
433
- title = f"fid:{row['fid']}, rt:{row['rt']:.2f}, mz:{row['mz']:.4f}, merged"
490
+ title = f"fid:{feature_uid}, rt:{rt_str}, mz:{mz_str}, merged"
434
491
  spect = filter_peaks(
435
492
  spect,
436
493
  inty_min=inty_min,
@@ -441,12 +498,13 @@ def export_mgf(
441
498
  write_ion(
442
499
  f,
443
500
  title,
444
- row["feature_uid"],
445
- row["mz"],
446
- row["rt"],
447
- row["charge"],
501
+ feature_uid,
502
+ mz,
503
+ rt,
504
+ charge,
448
505
  spect,
449
506
  )
507
+ c += 1
450
508
  else:
451
509
  for ms2_scans in row["ms2_scans"]:
452
510
  spect = self.get_spectrum(
@@ -465,24 +523,30 @@ def export_mgf(
465
523
  )
466
524
  write_ion(
467
525
  f,
468
- f"fid:{row['feature_uid']}, rt:{row['rt']:.2f}, mz:{row['mz']:.4f}, scan_uid:{ms2_scans}",
469
- row["feature_uid"],
470
- row["mz"],
471
- row["rt"],
472
- row["charge"],
526
+ f"fid:{feature_uid}, rt:{rt_str}, mz:{mz_str}, scan_uid:{ms2_scans}",
527
+ feature_uid,
528
+ mz,
529
+ rt,
530
+ charge,
473
531
  spect,
474
532
  )
533
+ c += 1
475
534
 
476
- self.logger.info(f"Exported {c - skip} features to {filename}")
535
+ self.logger.info(f"Exported {c} features to {filename}")
477
536
 
537
+ # Handle None values in logging
538
+ inty_min_str = f"{inty_min:.3f}" if inty_min != float("-inf") else "None"
539
+ q1_ratio_min_str = f"{q1_ratio_min:.3f}" if q1_ratio_min is not None else "None"
540
+ eic_corr_min_str = f"{eic_corr_min:.3f}" if eic_corr_min is not None else "None"
541
+
478
542
  self.logger.debug(
479
- f"MGF created with int>{inty_min:.3f}, q1_ratio>{q1_ratio_min:.3f}, eic_corr>{eic_corr_min:.3f}",
543
+ f"MGF created with int>{inty_min_str}, q1_ratio>{q1_ratio_min_str}, eic_corr>{eic_corr_min_str}",
480
544
  )
481
545
  self.logger.debug(
482
- f"- Exported {c} MS2 features for {len(features) - skip} precursors. Average peaks/feature is {c / (len(features) - skip + 0.000000001):.0f}",
546
+ f"- Exported {c} MS2 spectra for {len(features_list) - skip} precursors. Average spectra/feature is {c / (len(features_list) - skip + 0.000000001):.0f}",
483
547
  )
484
548
  self.logger.debug(
485
- f"- Skipped {skip} features because no MS2 peaks were left after filtering.",
549
+ f"- Skipped {skip} features because no MS2 scans were available.",
486
550
  )
487
551
 
488
552
 
@@ -510,9 +574,7 @@ def export_dda_stats(self, filename="stats.csv"):
510
574
  ms2_count = len(self.scans_df.filter(pl.col("ms_level") == 2))
511
575
  features_count = len(self.features_df) if self.features_df is not None else 0
512
576
  features_with_ms2 = (
513
- self.features_df.filter(pl.col("ms2_scans").is_not_null()).height
514
- if self.features_df is not None
515
- else 0
577
+ self.features_df.filter(pl.col("ms2_scans").is_not_null()).height if self.features_df is not None else 0
516
578
  )
517
579
 
518
580
  # Initialize a dictionary to hold statistics
@@ -527,9 +589,7 @@ def export_dda_stats(self, filename="stats.csv"):
527
589
  if "time_cycle" in self.scans_df.columns:
528
590
  ms1_df = self.scans_df.filter(pl.col("ms_level") == 1)
529
591
  avg_cycle_time = ms1_df["time_cycle"].mean()
530
- stats["Average_cycle_time"] = (
531
- avg_cycle_time if avg_cycle_time is not None else ""
532
- )
592
+ stats["Average_cycle_time"] = avg_cycle_time if avg_cycle_time is not None else ""
533
593
  else:
534
594
  stats["Average_cycle_time"] = 0
535
595
 
@@ -122,12 +122,6 @@ def _save_consensusXML(self, filename:str):
122
122
  return
123
123
 
124
124
  fh = oms.ConsensusXMLFile()
125
- # check if filename includes any path
126
- if not os.path.isabs(filename):
127
- if self.default_folder is not None:
128
- filename = os.path.join(self.default_folder, filename)
129
- else:
130
- filename = os.path.join(os.getcwd(), filename)
131
125
  fh.store(filename, self.consensus_map)
132
126
  self.logger.info(f"Saved consensus map to {filename}")
133
127
 
@@ -147,6 +147,7 @@ class Study:
147
147
 
148
148
  def __init__(
149
149
  self,
150
+ filename=None,
150
151
  **kwargs,
151
152
  ):
152
153
  """
@@ -156,6 +157,10 @@ class Study:
156
157
  data storage, and processing parameters used for study-level analysis.
157
158
 
158
159
  Parameters:
160
+ filename (str, optional): Path to a .study5 file to load automatically.
161
+ If provided, the default_folder will be set to the
162
+ directory containing this file, and the study will
163
+ be loaded automatically.
159
164
  **kwargs: Keyword arguments for setting study parameters. Can include:
160
165
  - A study_defaults instance to set all parameters at once (pass as params=study_defaults(...))
161
166
  - Individual parameter names and values (see study_defaults for available parameters)
@@ -172,6 +177,20 @@ class Study:
172
177
  """
173
178
  # Initialize default parameters
174
179
 
180
+ # Handle filename parameter for automatic loading
181
+ auto_load_filename = None
182
+ if filename is not None:
183
+ if not filename.endswith('.study5'):
184
+ raise ValueError("filename must be a .study5 file")
185
+ if not os.path.exists(filename):
186
+ raise FileNotFoundError(f"Study file not found: {filename}")
187
+
188
+ # Set default_folder to the directory containing the file if not already specified
189
+ if 'default_folder' not in kwargs:
190
+ kwargs['default_folder'] = os.path.dirname(os.path.abspath(filename))
191
+
192
+ auto_load_filename = filename
193
+
175
194
  # Check if a study_defaults instance was passed
176
195
  if "params" in kwargs and isinstance(kwargs["params"], study_defaults):
177
196
  params = kwargs.pop("params")
@@ -234,6 +253,10 @@ class Study:
234
253
  sink=self.log_sink
235
254
  )
236
255
 
256
+ # Auto-load study file if filename was provided
257
+ if auto_load_filename is not None:
258
+ self.load(filename=auto_load_filename)
259
+
237
260
 
238
261
 
239
262
  # Attach module functions as class methods
@@ -1585,7 +1585,7 @@ wheels = [
1585
1585
 
1586
1586
  [[package]]
1587
1587
  name = "masster"
1588
- version = "0.2.2"
1588
+ version = "0.2.4"
1589
1589
  source = { editable = "." }
1590
1590
  dependencies = [
1591
1591
  { name = "alphabase" },
masster-0.2.2/README.md DELETED
@@ -1,177 +0,0 @@
1
- # MASSter
2
-
3
- **MASSter** is a comprehensive Python package for mass spectrometry data analysis, designed for metabolomics and LC-MS data processing. It provides tools for feature detection, alignment, consensus building, and interactive visualization of mass spectrometry datasets. It is designed to deal with DDA, and hides functionalities for DIA and ZTScan DIA data.
4
-
5
- Most core processing functions are derived from OpenMS. We use the same nomenclature and refer to their documentation for an explanation of the parameters. To a large extent, however, you should be able to use the defaults (=no parameters) when calling processing steps.
6
-
7
- This is a poorly documented, stable branch of the development codebase in use in the Zamboni lab. Novel functionalities will be added based on need and requests.
8
-
9
- ## Features
10
-
11
- - **Mass spectrometry data processing**: Support for multiple file formats (.wiff, .mzML, .raw, .mzpkl)
12
- - **Feature detection and alignment**: Automated chromatographic peak detection and retention time alignment
13
- - **Consensus feature building**: Identification of features across multiple samples
14
- - **Interactive visualizations**: 2D plots, chromatograms, and statistical dashboards
15
- - **Batch processing**: Process entire studies with multiple samples
16
- - **Export capabilities**: MGF export for spectral library searches
17
-
18
- ## Installation
19
-
20
- ```bash
21
- pip install masster
22
- ```
23
-
24
- ## Quick Start
25
-
26
- ### Basic Workflow for analyzing LC-MS study with 2-... samples
27
-
28
- ```python
29
- import masster
30
-
31
- # Initialize the Study object with the default folder
32
- study = masster.Study(default_folder=r'D:\...\mylcms')
33
-
34
- # Load data from folder with raw data, here: WIFF
35
- study.add_folder(r'D:\...\...\...\*.wiff')
36
-
37
- # Align maps
38
- study.align(rt_max_diff=2.0)
39
-
40
- # Find consensus features
41
- study.find_consensus(min_samples=3)
42
-
43
- # Retrieve missing data for quantification
44
- study.fill_chrom(abs_)
45
-
46
- # Integrate according to consensus metadata
47
- study.integrate_chrom()
48
-
49
- # link MS2 across the whole study and export them
50
- study.find_ms2()
51
- study.export_mgf()
52
-
53
- # Save the study to .study5
54
- study.save()
55
- ```
56
-
57
- ### Study-Level Plots
58
-
59
- ```python
60
- # Plot features from multiple samples
61
- study.plot_samples_2d()
62
-
63
- # Plot consensus features
64
- study.plot_consensus_2d()
65
-
66
- # Plot overlaid chromatograms for specific consensus features (use their uid)
67
- study.plot_chrom(uids=[1, 2, 3])
68
- ```
69
-
70
-
71
- ### Single Sample Processing
72
-
73
- ```python
74
- from masster.sample import Sample
75
-
76
- # Load a single sample (mzML, RAW, WIFF)
77
- sample = Sample("path/to/your/file.mzML")
78
-
79
- # Detect features
80
- sample.find_features(chrom_peak_snr=10, noise=500, chrom_fwhm=1.0)
81
-
82
- # Detect adducts
83
- sample.find_adducts()
84
-
85
- # Find MS2 spectra
86
- sample.find_ms2()
87
-
88
- # Save results to .sample5
89
- sample.save()
90
- ```
91
-
92
- Masster provides extensive plotting capabilities for data exploration and quality control:
93
-
94
- ### Single sample visualization
95
-
96
- ```python
97
- # Plot 2D overview of MS data with detected features
98
- sample.plot_2d(
99
- filename="overview_2d.html",
100
- show_features=True,
101
- show_ms2=True,
102
- title="MS Data Overview"
103
- )
104
-
105
- # Plot with feature filtering
106
- sample.plot_2d(
107
- filename="features_ms2_only.html"
108
- )
109
-
110
- # Plot extracted ion chromatogram
111
- sample.plot_eic(
112
- feature_uid=123,
113
- rt_tol=10,
114
- mz_tol=0.005
115
- )
116
- ```
117
-
118
- ### Quality Control Plots
119
-
120
- ```python
121
- # Plot DDA acquisition statistics
122
- sample.plot_dda_stats(filename="dda_stats.html")
123
-
124
- # Plot feature statistics
125
- sample.plot_feature_stats(filename="feature_stats.html")
126
-
127
- # Plot total ion chromatogram
128
- sample.plot_tic(filename="tic.html")
129
- ```
130
-
131
- ### Advanced Plotting Options
132
-
133
- ```python
134
- # Plot with Oracle annotation data
135
- sample.plot_2d_oracle(
136
- oracle_folder="path/to/oracle/results",
137
- colorby="hg", # Color by chemical class
138
- filename="annotated_features.html"
139
- )
140
-
141
- # Plot MS2 cycle view
142
- sample.plot_ms2_cycle(
143
- cycle=100,
144
- filename="ms2_cycle.html",
145
- centroid=True
146
- )
147
- ```
148
-
149
- ## File Format Support
150
-
151
- - **Input formats**: .wiff, .mzML, .raw files
152
- - **Intermediate formats**: .sample5 and .study5 (HDF5) for fast loading
153
- - **Export formats**: .mgf, .csv
154
- - **Visualization**: .html (interactive), .png, .svg
155
-
156
- ## Advanced Features
157
-
158
- ### Batch Processing
159
- Use the command-line interface for processing multiple files:
160
-
161
- ```bash
162
- python -m masster.demo.example_batch_process input_directory --recursive --dest output_directory
163
- ```
164
-
165
- ## Requirements
166
-
167
- - Python ≥ 3.11
168
- - Key dependencies: pandas, polars, numpy, scipy, matplotlib, bokeh, holoviews, panel
169
- - See `pyproject.toml` for complete dependency list
170
-
171
- ## License
172
-
173
- GNU Affero General Public License v3
174
-
175
- ## Citation
176
-
177
- If you use Masster in your research, please cite this repository.
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes