masster 0.4.11__py3-none-any.whl → 0.4.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of masster might be problematic. Click here for more details.
- masster/_version.py +1 -1
- masster/lib/lib.py +45 -3
- masster/study/helpers.py +262 -310
- masster/study/id.py +564 -324
- masster/study/plot.py +38 -23
- masster/study/processing.py +268 -178
- masster/study/study.py +95 -60
- masster/study/study5_schema.json +12 -0
- {masster-0.4.11.dist-info → masster-0.4.13.dist-info}/METADATA +1 -1
- {masster-0.4.11.dist-info → masster-0.4.13.dist-info}/RECORD +13 -13
- {masster-0.4.11.dist-info → masster-0.4.13.dist-info}/WHEEL +0 -0
- {masster-0.4.11.dist-info → masster-0.4.13.dist-info}/entry_points.txt +0 -0
- {masster-0.4.11.dist-info → masster-0.4.13.dist-info}/licenses/LICENSE +0 -0
masster/_version.py
CHANGED
masster/lib/lib.py
CHANGED
|
@@ -142,11 +142,41 @@ class Lib:
|
|
|
142
142
|
Returns:
|
|
143
143
|
Accurate mass as float, or None if calculation fails
|
|
144
144
|
"""
|
|
145
|
+
# Skip obviously invalid formulas
|
|
146
|
+
if not formula or not isinstance(formula, str):
|
|
147
|
+
return None
|
|
148
|
+
|
|
149
|
+
# Clean up whitespace
|
|
150
|
+
formula = formula.strip()
|
|
151
|
+
|
|
152
|
+
# Skip formulas that are obviously invalid
|
|
153
|
+
invalid_patterns = [
|
|
154
|
+
# Contains parentheses with multipliers like (C12H19NO19S3)nH2O
|
|
155
|
+
lambda f: '(' in f and ')' in f and any(c.isalpha() and not c.isupper() for c in f.split(')')[1:]),
|
|
156
|
+
# Contains words instead of chemical symbols
|
|
157
|
+
lambda f: any(word in f.lower() for word in ['and', 'or', 'not', 'with', 'without']),
|
|
158
|
+
# Contains lowercase letters at the start (element symbols should be uppercase)
|
|
159
|
+
lambda f: f and f[0].islower(),
|
|
160
|
+
# Contains unusual characters that shouldn't be in formulas
|
|
161
|
+
lambda f: any(char in f for char in ['@', '#', '$', '%', '^', '&', '*', '=', '+', '?', '/', '\\', '|']),
|
|
162
|
+
# Empty or very short non-standard formulas
|
|
163
|
+
lambda f: len(f) < 2 and not f.isupper(),
|
|
164
|
+
]
|
|
165
|
+
|
|
166
|
+
for pattern_check in invalid_patterns:
|
|
167
|
+
try:
|
|
168
|
+
if pattern_check(formula):
|
|
169
|
+
warnings.warn(f"Skipping obviously invalid formula: '{formula}'")
|
|
170
|
+
return None
|
|
171
|
+
except Exception:
|
|
172
|
+
# If pattern checking fails, continue to PyOpenMS parsing
|
|
173
|
+
pass
|
|
174
|
+
|
|
145
175
|
try:
|
|
146
176
|
empirical_formula = oms.EmpiricalFormula(formula)
|
|
147
177
|
return empirical_formula.getMonoWeight()
|
|
148
178
|
except Exception as e:
|
|
149
|
-
warnings.warn(f"Error calculating accurate mass for formula {formula}: {e}")
|
|
179
|
+
warnings.warn(f"Error calculating accurate mass for formula '{formula}': {e}")
|
|
150
180
|
return None
|
|
151
181
|
|
|
152
182
|
def _generate_adduct_variants(self,
|
|
@@ -272,8 +302,12 @@ class Lib:
|
|
|
272
302
|
all_variants = []
|
|
273
303
|
cmpd_id_counter = 1
|
|
274
304
|
lib_id_counter = 1
|
|
305
|
+
total_compounds = 0
|
|
306
|
+
skipped_compounds = 0
|
|
275
307
|
|
|
276
308
|
for row in df.iter_rows(named=True):
|
|
309
|
+
total_compounds += 1
|
|
310
|
+
|
|
277
311
|
# Extract compound data
|
|
278
312
|
# assign a compound-level uid so all adducts share the same cmpd_uid
|
|
279
313
|
compound_level_uid = cmpd_id_counter
|
|
@@ -297,8 +331,12 @@ class Lib:
|
|
|
297
331
|
)
|
|
298
332
|
all_variants.extend(variants)
|
|
299
333
|
|
|
334
|
+
# Track if compound was skipped due to invalid formula
|
|
335
|
+
if len(variants) == 0:
|
|
336
|
+
skipped_compounds += 1
|
|
337
|
+
|
|
300
338
|
# Handle RT2 column if present
|
|
301
|
-
if "rt2" in column_mapping:
|
|
339
|
+
if "rt2" in column_mapping and len(variants) > 0: # Only if main variants were created
|
|
302
340
|
rt2_value = self._safe_float_conversion(row.get(column_mapping["rt2"], None))
|
|
303
341
|
if rt2_value is not None:
|
|
304
342
|
# Create additional variants with RT2
|
|
@@ -321,9 +359,13 @@ class Lib:
|
|
|
321
359
|
else:
|
|
322
360
|
self.lib_df = new_lib_df
|
|
323
361
|
|
|
324
|
-
|
|
362
|
+
#successful_compounds = total_compounds - skipped_compounds
|
|
363
|
+
print(f"Imported {len(all_variants)} library entries from {csvfile}")
|
|
364
|
+
#print(f"Processed {total_compounds} compounds: {successful_compounds} successful, {skipped_compounds} skipped due to invalid formulas")
|
|
325
365
|
else:
|
|
326
366
|
print(f"No valid compounds found in {csvfile}")
|
|
367
|
+
if skipped_compounds > 0:
|
|
368
|
+
print(f"All {total_compounds} compounds were skipped due to invalid formulas")
|
|
327
369
|
|
|
328
370
|
def _map_csv_columns(self, columns: List[str]) -> Dict[str, str]:
|
|
329
371
|
"""
|