masster 0.5.13__tar.gz → 0.5.14__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of masster might be problematic. Click here for more details.

Files changed (97) hide show
  1. {masster-0.5.13 → masster-0.5.14}/PKG-INFO +1 -1
  2. {masster-0.5.13 → masster-0.5.14}/pyproject.toml +1 -1
  3. {masster-0.5.13 → masster-0.5.14}/src/masster/_version.py +1 -1
  4. {masster-0.5.13 → masster-0.5.14}/src/masster/lib/lib.py +371 -57
  5. {masster-0.5.13 → masster-0.5.14}/src/masster/study/helpers.py +1 -0
  6. {masster-0.5.13 → masster-0.5.14}/src/masster/study/id.py +233 -36
  7. masster-0.5.14/src/masster/study/importers.py +331 -0
  8. {masster-0.5.13 → masster-0.5.14}/src/masster/study/merge.py +1 -1
  9. {masster-0.5.13 → masster-0.5.14}/src/masster/study/plot.py +10 -18
  10. {masster-0.5.13 → masster-0.5.14}/src/masster/study/study5_schema.json +9 -0
  11. {masster-0.5.13 → masster-0.5.14}/src/masster/wizard/__init__.py +4 -4
  12. {masster-0.5.13 → masster-0.5.14}/src/masster/wizard/wizard.py +437 -19
  13. {masster-0.5.13 → masster-0.5.14}/uv.lock +1 -1
  14. masster-0.5.13/src/masster/study/importers.py +0 -222
  15. {masster-0.5.13 → masster-0.5.14}/.github/workflows/publish.yml +0 -0
  16. {masster-0.5.13 → masster-0.5.14}/.github/workflows/security.yml +0 -0
  17. {masster-0.5.13 → masster-0.5.14}/.github/workflows/test.yml +0 -0
  18. {masster-0.5.13 → masster-0.5.14}/.gitignore +0 -0
  19. {masster-0.5.13 → masster-0.5.14}/.pre-commit-config.yaml +0 -0
  20. {masster-0.5.13 → masster-0.5.14}/LICENSE +0 -0
  21. {masster-0.5.13 → masster-0.5.14}/Makefile +0 -0
  22. {masster-0.5.13 → masster-0.5.14}/README.md +0 -0
  23. {masster-0.5.13 → masster-0.5.14}/TESTING.md +0 -0
  24. {masster-0.5.13 → masster-0.5.14}/demo/example_batch_process.py +0 -0
  25. {masster-0.5.13 → masster-0.5.14}/demo/example_sample_process.py +0 -0
  26. {masster-0.5.13 → masster-0.5.14}/src/masster/__init__.py +0 -0
  27. {masster-0.5.13 → masster-0.5.14}/src/masster/chromatogram.py +0 -0
  28. {masster-0.5.13 → masster-0.5.14}/src/masster/data/dda/20250530_VH_IQX_KW_RP_HSST3_100mm_12min_pos_v4_DDA_OT_C-MiLUT_QC_dil2_01_20250602151849.sample5 +0 -0
  29. {masster-0.5.13 → masster-0.5.14}/src/masster/data/dda/20250530_VH_IQX_KW_RP_HSST3_100mm_12min_pos_v4_DDA_OT_C-MiLUT_QC_dil3_01_20250602150634.sample5 +0 -0
  30. {masster-0.5.13 → masster-0.5.14}/src/masster/data/dda/20250530_VH_IQX_KW_RP_HSST3_100mm_12min_pos_v4_MS1_C-MiLUT_C008_v6_r38_01.sample5 +0 -0
  31. {masster-0.5.13 → masster-0.5.14}/src/masster/data/dda/20250530_VH_IQX_KW_RP_HSST3_100mm_12min_pos_v4_MS1_C-MiLUT_C008_v7_r37_01.sample5 +0 -0
  32. {masster-0.5.13 → masster-0.5.14}/src/masster/data/dda/20250530_VH_IQX_KW_RP_HSST3_100mm_12min_pos_v4_MS1_C-MiLUT_C017_v5_r99_01.sample5 +0 -0
  33. {masster-0.5.13 → masster-0.5.14}/src/masster/data/libs/aa.csv +0 -0
  34. {masster-0.5.13 → masster-0.5.14}/src/masster/data/libs/ccm.csv +0 -0
  35. {masster-0.5.13 → masster-0.5.14}/src/masster/data/libs/hilic.csv +0 -0
  36. {masster-0.5.13 → masster-0.5.14}/src/masster/data/libs/urine.csv +0 -0
  37. {masster-0.5.13 → masster-0.5.14}/src/masster/data/wiff/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.timeseries.data +0 -0
  38. {masster-0.5.13 → masster-0.5.14}/src/masster/data/wiff/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.wiff +0 -0
  39. {masster-0.5.13 → masster-0.5.14}/src/masster/data/wiff/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.wiff.scan +0 -0
  40. {masster-0.5.13 → masster-0.5.14}/src/masster/data/wiff/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.wiff2 +0 -0
  41. {masster-0.5.13 → masster-0.5.14}/src/masster/lib/__init__.py +0 -0
  42. {masster-0.5.13 → masster-0.5.14}/src/masster/logger.py +0 -0
  43. {masster-0.5.13 → masster-0.5.14}/src/masster/sample/__init__.py +0 -0
  44. {masster-0.5.13 → masster-0.5.14}/src/masster/sample/adducts.py +0 -0
  45. {masster-0.5.13 → masster-0.5.14}/src/masster/sample/defaults/__init__.py +0 -0
  46. {masster-0.5.13 → masster-0.5.14}/src/masster/sample/defaults/find_adducts_def.py +0 -0
  47. {masster-0.5.13 → masster-0.5.14}/src/masster/sample/defaults/find_features_def.py +0 -0
  48. {masster-0.5.13 → masster-0.5.14}/src/masster/sample/defaults/find_ms2_def.py +0 -0
  49. {masster-0.5.13 → masster-0.5.14}/src/masster/sample/defaults/get_spectrum_def.py +0 -0
  50. {masster-0.5.13 → masster-0.5.14}/src/masster/sample/defaults/sample_def.py +0 -0
  51. {masster-0.5.13 → masster-0.5.14}/src/masster/sample/h5.py +0 -0
  52. {masster-0.5.13 → masster-0.5.14}/src/masster/sample/helpers.py +0 -0
  53. {masster-0.5.13 → masster-0.5.14}/src/masster/sample/lib.py +0 -0
  54. {masster-0.5.13 → masster-0.5.14}/src/masster/sample/load.py +0 -0
  55. {masster-0.5.13 → masster-0.5.14}/src/masster/sample/parameters.py +0 -0
  56. {masster-0.5.13 → masster-0.5.14}/src/masster/sample/plot.py +0 -0
  57. {masster-0.5.13 → masster-0.5.14}/src/masster/sample/processing.py +0 -0
  58. {masster-0.5.13 → masster-0.5.14}/src/masster/sample/quant.py +0 -0
  59. {masster-0.5.13 → masster-0.5.14}/src/masster/sample/sample.py +0 -0
  60. {masster-0.5.13 → masster-0.5.14}/src/masster/sample/sample5_schema.json +0 -0
  61. {masster-0.5.13 → masster-0.5.14}/src/masster/sample/save.py +0 -0
  62. {masster-0.5.13 → masster-0.5.14}/src/masster/sample/sciex.py +0 -0
  63. {masster-0.5.13 → masster-0.5.14}/src/masster/spectrum.py +0 -0
  64. {masster-0.5.13 → masster-0.5.14}/src/masster/study/__init__.py +0 -0
  65. {masster-0.5.13 → masster-0.5.14}/src/masster/study/analysis.py +0 -0
  66. {masster-0.5.13 → masster-0.5.14}/src/masster/study/defaults/__init__.py +0 -0
  67. {masster-0.5.13 → masster-0.5.14}/src/masster/study/defaults/align_def.py +0 -0
  68. {masster-0.5.13 → masster-0.5.14}/src/masster/study/defaults/export_def.py +0 -0
  69. {masster-0.5.13 → masster-0.5.14}/src/masster/study/defaults/fill_def.py +0 -0
  70. {masster-0.5.13 → masster-0.5.14}/src/masster/study/defaults/find_consensus_def.py +0 -0
  71. {masster-0.5.13 → masster-0.5.14}/src/masster/study/defaults/find_ms2_def.py +0 -0
  72. {masster-0.5.13 → masster-0.5.14}/src/masster/study/defaults/identify_def.py +0 -0
  73. {masster-0.5.13 → masster-0.5.14}/src/masster/study/defaults/integrate_chrom_def.py +0 -0
  74. {masster-0.5.13 → masster-0.5.14}/src/masster/study/defaults/integrate_def.py +0 -0
  75. {masster-0.5.13 → masster-0.5.14}/src/masster/study/defaults/merge_def.py +0 -0
  76. {masster-0.5.13 → masster-0.5.14}/src/masster/study/defaults/study_def.py +0 -0
  77. {masster-0.5.13 → masster-0.5.14}/src/masster/study/export.py +0 -0
  78. {masster-0.5.13 → masster-0.5.14}/src/masster/study/h5.py +0 -0
  79. {masster-0.5.13 → masster-0.5.14}/src/masster/study/load.py +0 -0
  80. {masster-0.5.13 → masster-0.5.14}/src/masster/study/parameters.py +0 -0
  81. {masster-0.5.13 → masster-0.5.14}/src/masster/study/processing.py +0 -0
  82. {masster-0.5.13 → masster-0.5.14}/src/masster/study/save.py +0 -0
  83. {masster-0.5.13 → masster-0.5.14}/src/masster/study/study.py +0 -0
  84. {masster-0.5.13 → masster-0.5.14}/src/masster/wizard/README.md +0 -0
  85. {masster-0.5.13 → masster-0.5.14}/src/masster/wizard/example.py +0 -0
  86. {masster-0.5.13 → masster-0.5.14}/tests/conftest.py +0 -0
  87. {masster-0.5.13 → masster-0.5.14}/tests/test_chromatogram.py +0 -0
  88. {masster-0.5.13 → masster-0.5.14}/tests/test_defaults.py +0 -0
  89. {masster-0.5.13 → masster-0.5.14}/tests/test_imports.py +0 -0
  90. {masster-0.5.13 → masster-0.5.14}/tests/test_integration.py +0 -0
  91. {masster-0.5.13 → masster-0.5.14}/tests/test_logger.py +0 -0
  92. {masster-0.5.13 → masster-0.5.14}/tests/test_parameters.py +0 -0
  93. {masster-0.5.13 → masster-0.5.14}/tests/test_sample.py +0 -0
  94. {masster-0.5.13 → masster-0.5.14}/tests/test_spectrum.py +0 -0
  95. {masster-0.5.13 → masster-0.5.14}/tests/test_study.py +0 -0
  96. {masster-0.5.13 → masster-0.5.14}/tests/test_version.py +0 -0
  97. {masster-0.5.13 → masster-0.5.14}/tox.ini +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: masster
3
- Version: 0.5.13
3
+ Version: 0.5.14
4
4
  Summary: Mass spectrometry data analysis package
5
5
  Project-URL: homepage, https://github.com/zamboni-lab/masster
6
6
  Project-URL: repository, https://github.com/zamboni-lab/masster
@@ -1,7 +1,7 @@
1
1
 
2
2
  [project]
3
3
  name = "masster"
4
- version = "0.5.13"
4
+ version = "0.5.14"
5
5
  description = "Mass spectrometry data analysis package"
6
6
  authors = [
7
7
  { name = "Zamboni Lab" }
@@ -1,7 +1,7 @@
1
1
  from __future__ import annotations
2
2
 
3
3
 
4
- __version__ = "0.5.13"
4
+ __version__ = "0.5.14"
5
5
 
6
6
 
7
7
  def get_version():
@@ -46,11 +46,9 @@ annotations = lib.annotate_features(sample.features_df)
46
46
  """
47
47
 
48
48
  import os
49
- import uuid
50
49
  from typing import Optional, Union, List, Dict, Any, TYPE_CHECKING
51
50
  import warnings
52
51
 
53
- import numpy as np
54
52
  import polars as pl
55
53
  import pyopenms as oms
56
54
 
@@ -58,6 +56,148 @@ if TYPE_CHECKING:
58
56
  import pandas as pd
59
57
 
60
58
 
59
+ def _calculate_formula_mass_shift(formula: str) -> float:
60
+ """
61
+ Calculate mass shift from formula string like "+H", "-H2O", "+Na-H", etc.
62
+
63
+ Parameters
64
+ ----------
65
+ formula : str
66
+ Formula string (e.g., "+H", "-H2O", "+Na-H")
67
+
68
+ Returns
69
+ -------
70
+ float
71
+ Mass shift in Daltons
72
+ """
73
+ # Standard atomic masses
74
+ atomic_masses = {
75
+ "H": 1.007825,
76
+ "C": 12.0,
77
+ "N": 14.003074,
78
+ "O": 15.994915,
79
+ "Na": 22.989769,
80
+ "K": 38.963707,
81
+ "Li": 7.016003,
82
+ "Ca": 39.962591,
83
+ "Mg": 23.985042,
84
+ "Fe": 55.934938,
85
+ "Cl": 34.968853,
86
+ "Br": 78.918336,
87
+ "I": 126.904473,
88
+ "P": 30.973762,
89
+ "S": 31.972071,
90
+ }
91
+
92
+ total_mass = 0.0
93
+
94
+ # Parse formula by splitting on + and - while preserving the operators
95
+ parts = []
96
+ current_part = ""
97
+ current_sign = 1
98
+
99
+ for char in formula:
100
+ if char == "+":
101
+ if current_part:
102
+ parts.append((current_sign, current_part))
103
+ current_part = ""
104
+ current_sign = 1
105
+ elif char == "-":
106
+ if current_part:
107
+ parts.append((current_sign, current_part))
108
+ current_part = ""
109
+ current_sign = -1
110
+ else:
111
+ current_part += char
112
+
113
+ if current_part:
114
+ parts.append((current_sign, current_part))
115
+
116
+ # Process each part
117
+ for sign, part in parts:
118
+ if not part:
119
+ continue
120
+
121
+ # Parse element and count (e.g., "H2O" -> H:2, O:1)
122
+ elements = _parse_element_counts(part)
123
+
124
+ for element, count in elements.items():
125
+ if element in atomic_masses:
126
+ total_mass += sign * atomic_masses[element] * count
127
+
128
+ return total_mass
129
+
130
+
131
+ def _parse_element_counts(formula_part: str) -> Dict[str, int]:
132
+ """Parse element counts from a formula part like 'H2O' -> {'H': 2, 'O': 1}"""
133
+ elements = {}
134
+ i = 0
135
+
136
+ while i < len(formula_part):
137
+ # Get element (uppercase letter, possibly followed by lowercase)
138
+ element = formula_part[i]
139
+ i += 1
140
+
141
+ while i < len(formula_part) and formula_part[i].islower():
142
+ element += formula_part[i]
143
+ i += 1
144
+
145
+ # Get count (digits following element)
146
+ count_str = ""
147
+ while i < len(formula_part) and formula_part[i].isdigit():
148
+ count_str += formula_part[i]
149
+ i += 1
150
+
151
+ count = int(count_str) if count_str else 1
152
+ elements[element] = elements.get(element, 0) + count
153
+
154
+ return elements
155
+
156
+
157
+ def _format_adduct_name(components: List[Dict]) -> str:
158
+ """Format adduct name from components like [M+H]1+ or [M+2H]2+"""
159
+ if not components:
160
+ return "[M]"
161
+
162
+ # Count occurrences of each formula
163
+ from collections import Counter
164
+
165
+ formula_counts = Counter(comp["formula"] for comp in components)
166
+ total_charge = sum(comp["charge"] for comp in components)
167
+
168
+ # Build formula part with proper multipliers
169
+ formula_parts = []
170
+ for formula, count in sorted(
171
+ formula_counts.items(),
172
+ ): # Sort for consistent ordering
173
+ if count == 1:
174
+ formula_parts.append(formula)
175
+ else:
176
+ # For multiple occurrences, use count prefix (e.g., 2H, 3Na)
177
+ # Handle special case where formula might already start with + or -
178
+ if formula.startswith(("+", "-")):
179
+ sign = formula[0]
180
+ base_formula = formula[1:]
181
+ formula_parts.append(f"{sign}{count}{base_formula}")
182
+ else:
183
+ formula_parts.append(f"{count}{formula}")
184
+
185
+ # Combine formula parts
186
+ formula = "".join(formula_parts)
187
+
188
+ # Format charge
189
+ if total_charge == 0:
190
+ charge_str = ""
191
+ elif abs(total_charge) == 1:
192
+ charge_str = "1+" if total_charge > 0 else "1-"
193
+ else:
194
+ charge_str = (
195
+ f"{abs(total_charge)}+" if total_charge > 0 else f"{abs(total_charge)}-"
196
+ )
197
+
198
+ return f"[M{formula}]{charge_str}"
199
+
200
+
61
201
  class Lib:
62
202
  """
63
203
  A class for managing compound libraries and feature annotation in mass spectrometry data.
@@ -89,22 +229,21 @@ class Lib:
89
229
  >>> print(f"Loaded {len(lib.lib_df)} library entries")
90
230
  """
91
231
 
92
- # Define supported adducts and their properties
93
- ADDUCT_DEFINITIONS = {
94
- # Positive mode adducts
95
- "[M+H]1+": {"delta_m": 1.007276, "delta_z": 1, "polarity": "positive"},
96
- "[M+Na]1+": {"delta_m": 22.989218, "delta_z": 1, "polarity": "positive"},
97
- "[M+K]1+": {"delta_m": 38.962383, "delta_z": 1, "polarity": "positive"},
98
- "[M+NH4]1+": {"delta_m": 18.033823, "delta_z": 1, "polarity": "positive"},
99
- "[M+H-H2O]1+": {"delta_m": -17.00329, "delta_z": 1, "polarity": "positive"},
100
- "[M+2H]2+": {"delta_m": 2.014552, "delta_z": 2, "polarity": "positive"},
101
-
102
- # Negative mode adducts
103
- "[M-H]1-": {"delta_m": -1.007276, "delta_z": -1, "polarity": "negative"},
104
- "[M+CH3COO]1-": {"delta_m": 59.013852, "delta_z": -1, "polarity": "negative"},
105
- "[M+HCOO]1-": {"delta_m": 44.998203, "delta_z": -1, "polarity": "negative"},
106
- "[M+Cl]1-": {"delta_m": 34.968853, "delta_z": -1, "polarity": "negative"},
107
- "[M-2H]2-": {"delta_m": -2.014552, "delta_z": -2, "polarity": "negative"},
232
+ # Default adduct definitions using OpenMS format
233
+ DEFAULT_ADDUCTS = {
234
+ "positive": [
235
+ "+H:1:0.65",
236
+ "+Na:1:0.15",
237
+ "+K:1:0.05",
238
+ "+NH4:1:0.15",
239
+ "-H2O:0:0.15",
240
+ ],
241
+ "negative": [
242
+ "-H:-1:0.9",
243
+ "+Cl:-1:0.1",
244
+ "+CH2O2:0:0.15",
245
+ "-H2O:0:0.15",
246
+ ]
108
247
  }
109
248
 
110
249
  def __init__(self):
@@ -119,12 +258,15 @@ class Lib:
119
258
  "cmpd_uid": pl.Series([], dtype=pl.Int64),
120
259
  "source_id": pl.Series([], dtype=pl.Utf8),
121
260
  "name": pl.Series([], dtype=pl.Utf8),
261
+ "shortname": pl.Series([], dtype=pl.Utf8),
262
+ "class": pl.Series([], dtype=pl.Utf8),
122
263
  "smiles": pl.Series([], dtype=pl.Utf8),
123
264
  "inchi": pl.Series([], dtype=pl.Utf8),
124
265
  "inchikey": pl.Series([], dtype=pl.Utf8),
125
266
  "formula": pl.Series([], dtype=pl.Utf8),
126
267
  "iso": pl.Series([], dtype=pl.Int64),
127
268
  "adduct": pl.Series([], dtype=pl.Utf8),
269
+ "probability": pl.Series([], dtype=pl.Float64),
128
270
  "m": pl.Series([], dtype=pl.Float64),
129
271
  "z": pl.Series([], dtype=pl.Int8),
130
272
  "mz": pl.Series([], dtype=pl.Float64),
@@ -134,6 +276,177 @@ class Lib:
134
276
  "db": pl.Series([], dtype=pl.Utf8),
135
277
  })
136
278
 
279
+ def _get_adducts(self,
280
+ adducts_list: Optional[List[str]] = None,
281
+ polarity: Optional[str] = None,
282
+ min_probability: float = 0.03,
283
+ **kwargs) -> pl.DataFrame:
284
+ """
285
+ Generate comprehensive adduct specifications for the library.
286
+
287
+ This method creates a DataFrame of adduct combinations following the same
288
+ syntax as Study() and Sample() classes.
289
+
290
+ Args:
291
+ adducts_list: List of adduct specifications in OpenMS format (e.g., "+H:1:0.65")
292
+ polarity: "positive", "negative", or None for both
293
+ min_probability: Minimum probability threshold to filter adducts
294
+ **kwargs: Additional parameters for adduct generation
295
+
296
+ Returns:
297
+ DataFrame with columns:
298
+ - name: Formatted adduct name like "[M+H]1+"
299
+ - charge: Total charge of the adduct
300
+ - mass_shift: Total mass shift in Da
301
+ - probability: Combined probability score
302
+ - complexity: Number of adduct components
303
+ """
304
+ # Get adduct specifications
305
+ if adducts_list is None:
306
+ if polarity is None:
307
+ # Use positive by default
308
+ adducts_list = self.DEFAULT_ADDUCTS["positive"]
309
+ elif polarity.lower() in ["positive", "pos"]:
310
+ adducts_list = self.DEFAULT_ADDUCTS["positive"]
311
+ elif polarity.lower() in ["negative", "neg"]:
312
+ adducts_list = self.DEFAULT_ADDUCTS["negative"]
313
+ else:
314
+ raise ValueError(f"Unknown polarity: {polarity}")
315
+
316
+ # Parameters
317
+ charge_min = kwargs.get("charge_min", -2)
318
+ charge_max = kwargs.get("charge_max", 2)
319
+ max_combinations = kwargs.get("max_combinations", 2)
320
+
321
+ # Parse base adduct specifications
322
+ base_specs = []
323
+
324
+ for adduct_str in adducts_list:
325
+ if not isinstance(adduct_str, str) or ":" not in adduct_str:
326
+ continue
327
+
328
+ try:
329
+ parts = adduct_str.split(":")
330
+ if len(parts) != 3:
331
+ continue
332
+
333
+ formula_part = parts[0]
334
+ charge = int(parts[1])
335
+ probability = float(parts[2])
336
+
337
+ # Calculate mass shift from formula
338
+ mass_shift = _calculate_formula_mass_shift(formula_part)
339
+
340
+ base_specs.append({
341
+ "formula": formula_part,
342
+ "charge": charge,
343
+ "mass_shift": mass_shift,
344
+ "probability": probability,
345
+ "raw_string": adduct_str,
346
+ })
347
+
348
+ except (ValueError, IndexError):
349
+ continue
350
+
351
+ if not base_specs:
352
+ # Return empty DataFrame with correct schema
353
+ return pl.DataFrame({
354
+ "name": [],
355
+ "charge": [],
356
+ "mass_shift": [],
357
+ "probability": [],
358
+ "complexity": [],
359
+ })
360
+
361
+ # Generate all valid combinations
362
+ combinations_list = []
363
+
364
+ # Separate specs by charge type
365
+ positive_specs = [spec for spec in base_specs if spec["charge"] > 0]
366
+ negative_specs = [spec for spec in base_specs if spec["charge"] < 0]
367
+ neutral_specs = [spec for spec in base_specs if spec["charge"] == 0]
368
+
369
+ # 1. Single adducts
370
+ for spec in base_specs:
371
+ if charge_min <= spec["charge"] <= charge_max:
372
+ formatted_name = _format_adduct_name([spec])
373
+ combinations_list.append({
374
+ "components": [spec],
375
+ "formatted_name": formatted_name,
376
+ "total_mass_shift": spec["mass_shift"],
377
+ "total_charge": spec["charge"],
378
+ "combined_probability": spec["probability"],
379
+ "complexity": 1,
380
+ })
381
+
382
+ # 2. Generate multiply charged versions (2H+, 3H+, etc.)
383
+ if max_combinations >= 2:
384
+ for spec in positive_specs + negative_specs:
385
+ base_charge = spec["charge"]
386
+ for multiplier in range(2, min(max_combinations + 1, 4)):
387
+ total_charge = base_charge * multiplier
388
+ if charge_min <= total_charge <= charge_max:
389
+ components = [spec] * multiplier
390
+ formatted_name = _format_adduct_name(components)
391
+
392
+ combinations_list.append({
393
+ "components": components,
394
+ "formatted_name": formatted_name,
395
+ "total_mass_shift": spec["mass_shift"] * multiplier,
396
+ "total_charge": total_charge,
397
+ "combined_probability": spec["probability"] ** multiplier,
398
+ "complexity": multiplier,
399
+ })
400
+
401
+ # 3. Mixed combinations (positive + neutral)
402
+ if max_combinations >= 2:
403
+ for pos_spec in positive_specs:
404
+ for neut_spec in neutral_specs:
405
+ total_charge = pos_spec["charge"] + neut_spec["charge"]
406
+ if charge_min <= total_charge <= charge_max:
407
+ components = [pos_spec, neut_spec]
408
+ formatted_name = _format_adduct_name(components)
409
+ combinations_list.append({
410
+ "components": components,
411
+ "formatted_name": formatted_name,
412
+ "total_mass_shift": pos_spec["mass_shift"] + neut_spec["mass_shift"],
413
+ "total_charge": total_charge,
414
+ "combined_probability": pos_spec["probability"] * neut_spec["probability"],
415
+ "complexity": 2,
416
+ })
417
+
418
+ # Convert to polars DataFrame
419
+ if combinations_list:
420
+ combinations_list.sort(
421
+ key=lambda x: (-x["combined_probability"], x["complexity"])
422
+ )
423
+
424
+ adducts_df = pl.DataFrame([
425
+ {
426
+ "name": combo["formatted_name"],
427
+ "charge": combo["total_charge"],
428
+ "mass_shift": combo["total_mass_shift"],
429
+ "probability": combo["combined_probability"],
430
+ "complexity": combo["complexity"],
431
+ }
432
+ for combo in combinations_list
433
+ ])
434
+ else:
435
+ # Return empty DataFrame with correct schema
436
+ adducts_df = pl.DataFrame({
437
+ "name": [],
438
+ "charge": [],
439
+ "mass_shift": [],
440
+ "probability": [],
441
+ "complexity": [],
442
+ })
443
+
444
+ # Filter by minimum probability
445
+ if min_probability > 0.0 and len(adducts_df) > 0:
446
+ adducts_df = adducts_df.filter(pl.col("probability") >= min_probability)
447
+
448
+ return adducts_df
449
+
137
450
  def _calculate_accurate_mass(self, formula: str) -> Optional[float]:
138
451
  """
139
452
  Calculate the accurate mass for a molecular formula using PyOpenMS.
@@ -185,15 +498,17 @@ class Lib:
185
498
  compound_data: Dict[str, Any],
186
499
  adducts: Optional[List[str]] = None,
187
500
  polarity: Optional[str] = None,
188
- lib_id_counter: Optional[int] = None) -> tuple[List[Dict[str, Any]], int]:
501
+ lib_id_counter: Optional[int] = None,
502
+ min_probability: float = 0.03) -> tuple[List[Dict[str, Any]], int]:
189
503
  """
190
- Generate adduct variants for a given compound.
504
+ Generate adduct variants for a given compound using the new adduct system.
191
505
 
192
506
  Args:
193
507
  compound_data: Dictionary containing compound information
194
- adducts: List of specific adducts to generate. If None, uses all adducts for polarity
195
- polarity: Ionization polarity ("positive", "negative", or None for both)
508
+ adducts: List of specific adducts to generate. If None, uses defaults for polarity
509
+ polarity: Ionization polarity ("positive", "negative", or None for positive)
196
510
  lib_id_counter: Counter for generating unique lib_uid values
511
+ min_probability: Minimum probability threshold for adduct filtering
197
512
 
198
513
  Returns:
199
514
  Tuple of (list of dictionaries representing adduct variants, updated counter)
@@ -206,35 +521,25 @@ class Lib:
206
521
  if accurate_mass is None:
207
522
  return variants, counter
208
523
 
209
- # Determine which adducts to use
210
- if adducts is None:
211
- if polarity is None:
212
- # Use all adducts
213
- selected_adducts = list(self.ADDUCT_DEFINITIONS.keys())
214
- else:
215
- # Filter by polarity
216
- selected_adducts = [
217
- adduct for adduct, props in self.ADDUCT_DEFINITIONS.items()
218
- if props["polarity"] == polarity.lower()
219
- ]
220
- else:
221
- selected_adducts = adducts
524
+ # Get adduct specifications using _get_adducts
525
+ adducts_df = self._get_adducts(
526
+ adducts_list=adducts,
527
+ polarity=polarity,
528
+ min_probability=min_probability
529
+ )
530
+
531
+ if len(adducts_df) == 0:
532
+ return variants, counter
222
533
 
223
534
  # Generate variants for each adduct
224
- for adduct in selected_adducts:
225
- if adduct not in self.ADDUCT_DEFINITIONS:
226
- warnings.warn(f"Unknown adduct: {adduct}")
227
- continue
228
-
229
- adduct_props = self.ADDUCT_DEFINITIONS[adduct]
230
-
231
- # Skip if polarity doesn't match
232
- if polarity is not None and adduct_props["polarity"] != polarity.lower():
233
- continue
535
+ for adduct_row in adducts_df.iter_rows(named=True):
536
+ adduct_name = adduct_row["name"]
537
+ charge = adduct_row["charge"]
538
+ mass_shift = adduct_row["mass_shift"]
539
+ probability = adduct_row["probability"]
234
540
 
235
541
  # Calculate adducted mass and m/z
236
- adducted_mass = accurate_mass + adduct_props["delta_m"]
237
- charge = adduct_props["delta_z"]
542
+ adducted_mass = accurate_mass + mass_shift
238
543
  mz = abs(adducted_mass / charge) if charge != 0 else adducted_mass
239
544
 
240
545
  # Create variant entry
@@ -243,12 +548,15 @@ class Lib:
243
548
  "cmpd_uid": compound_data.get("cmpd_uid", None),
244
549
  "source_id": compound_data.get("source_id", None),
245
550
  "name": compound_data.get("name", ""),
551
+ "shortname": compound_data.get("shortname", ""),
552
+ "class": compound_data.get("class", ""),
246
553
  "smiles": compound_data.get("smiles", ""),
247
554
  "inchi": compound_data.get("inchi", ""),
248
555
  "inchikey": compound_data.get("inchikey", ""),
249
556
  "formula": compound_data["formula"],
250
557
  "iso": 0, # Default to zero
251
- "adduct": adduct,
558
+ "adduct": adduct_name,
559
+ "probability": probability,
252
560
  "m": adducted_mass,
253
561
  "z": charge,
254
562
  "mz": mz,
@@ -265,7 +573,8 @@ class Lib:
265
573
  def import_csv(self,
266
574
  csvfile: str,
267
575
  polarity: Optional[str] = None,
268
- adducts: Optional[List[str]] = None) -> None:
576
+ adducts: Optional[List[str]] = None,
577
+ min_probability: float = 0.03) -> None:
269
578
  """
270
579
  Import compound library from a CSV file.
271
580
 
@@ -274,8 +583,9 @@ class Lib:
274
583
 
275
584
  Args:
276
585
  csvfile: Path to the CSV file
277
- polarity: Ionization polarity ("positive", "negative", or None for both)
278
- adducts: Specific adducts to generate. If None, generates all for the polarity
586
+ polarity: Ionization polarity ("positive", "negative", or None for positive)
587
+ adducts: Specific adducts to generate. If None, generates defaults for the polarity
588
+ min_probability: Minimum probability threshold for adduct filtering
279
589
 
280
590
  Expected CSV columns (case-insensitive):
281
591
  - Required: Formula (or formula)
@@ -319,6 +629,8 @@ class Lib:
319
629
 
320
630
  compound_data = {
321
631
  "name": row.get(column_mapping.get("name", ""), ""),
632
+ "shortname": row.get(column_mapping.get("shortname", ""), ""),
633
+ "class": row.get(column_mapping.get("class", ""), ""),
322
634
  "smiles": row.get(column_mapping.get("smiles", ""), ""),
323
635
  "inchi": row.get(column_mapping.get("inchi", ""), ""),
324
636
  "inchikey": row.get(column_mapping.get("inchikey", ""), ""),
@@ -331,7 +643,8 @@ class Lib:
331
643
 
332
644
  # Generate adduct variants
333
645
  variants, lib_id_counter = self._generate_adduct_variants(
334
- compound_data, adducts=adducts, polarity=polarity, lib_id_counter=lib_id_counter
646
+ compound_data, adducts=adducts, polarity=polarity,
647
+ lib_id_counter=lib_id_counter, min_probability=min_probability
335
648
  )
336
649
  all_variants.extend(variants)
337
650
 
@@ -349,7 +662,8 @@ class Lib:
349
662
  compound_data_rt2["name"] = compound_data["name"] + " II"
350
663
 
351
664
  variants_rt2, lib_id_counter = self._generate_adduct_variants(
352
- compound_data_rt2, adducts=adducts, polarity=polarity, lib_id_counter=lib_id_counter
665
+ compound_data_rt2, adducts=adducts, polarity=polarity,
666
+ lib_id_counter=lib_id_counter, min_probability=min_probability
353
667
  )
354
668
  all_variants.extend(variants_rt2)
355
669
 
@@ -529,6 +843,8 @@ class Lib:
529
843
  "cmpd_uid": match_row.get("cmpd_uid"),
530
844
  "source_id": match_row.get("source_id"),
531
845
  "name": match_row["name"],
846
+ "shortname": match_row["shortname"],
847
+ "class": match_row["class"],
532
848
  "formula": match_row["formula"],
533
849
  "iso": match_row.get("iso", 0),
534
850
  "adduct": match_row["adduct"],
@@ -555,10 +871,8 @@ class Lib:
555
871
  Returns:
556
872
  List of adduct names
557
873
  """
558
- return [
559
- adduct for adduct, props in self.ADDUCT_DEFINITIONS.items()
560
- if props["polarity"] == polarity.lower()
561
- ]
874
+ adducts_df = self._get_adducts(polarity=polarity, min_probability=0.0)
875
+ return adducts_df.select("name").to_series().to_list()
562
876
 
563
877
  def __len__(self) -> int:
564
878
  """Return number of library entries."""
@@ -490,6 +490,7 @@ def align_reset(self):
490
490
  # Ensure column order is maintained after with_columns operation
491
491
  from masster.study.helpers import _ensure_features_df_schema_order
492
492
  _ensure_features_df_schema_order(self)
493
+ self.logger.info("Alignment reset: all feature RTs set to original_RT.")
493
494
 
494
495
 
495
496
  # =====================================================================================