masster 0.5.22__py3-none-any.whl → 0.5.24__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of masster might be problematic. Click here for more details.
- masster/_version.py +1 -1
- masster/logger.py +35 -19
- masster/sample/adducts.py +15 -29
- masster/sample/defaults/find_adducts_def.py +1 -3
- masster/sample/defaults/sample_def.py +4 -4
- masster/sample/h5.py +203 -361
- masster/sample/helpers.py +14 -30
- masster/sample/lib.py +3 -3
- masster/sample/load.py +21 -29
- masster/sample/plot.py +222 -132
- masster/sample/processing.py +42 -55
- masster/sample/sample.py +37 -46
- masster/sample/save.py +37 -61
- masster/sample/sciex.py +13 -11
- masster/sample/thermo.py +69 -74
- masster/spectrum.py +15 -15
- masster/study/analysis.py +650 -586
- masster/study/defaults/identify_def.py +1 -3
- masster/study/defaults/merge_def.py +6 -7
- masster/study/defaults/study_def.py +1 -5
- masster/study/export.py +35 -96
- masster/study/h5.py +134 -211
- masster/study/helpers.py +385 -459
- masster/study/id.py +239 -290
- masster/study/importers.py +84 -93
- masster/study/load.py +159 -178
- masster/study/merge.py +1112 -1098
- masster/study/plot.py +195 -149
- masster/study/processing.py +144 -191
- masster/study/save.py +14 -13
- masster/study/study.py +89 -130
- masster/wizard/wizard.py +764 -714
- {masster-0.5.22.dist-info → masster-0.5.24.dist-info}/METADATA +27 -1
- {masster-0.5.22.dist-info → masster-0.5.24.dist-info}/RECORD +37 -37
- {masster-0.5.22.dist-info → masster-0.5.24.dist-info}/WHEEL +0 -0
- {masster-0.5.22.dist-info → masster-0.5.24.dist-info}/entry_points.txt +0 -0
- {masster-0.5.22.dist-info → masster-0.5.24.dist-info}/licenses/LICENSE +0 -0
masster/sample/sciex.py
CHANGED
|
@@ -31,16 +31,16 @@ def naive_centroid(
|
|
|
31
31
|
) -> tuple[np.ndarray, np.ndarray]:
|
|
32
32
|
"""
|
|
33
33
|
Simplified naive centroiding implementation.
|
|
34
|
-
|
|
34
|
+
|
|
35
35
|
Parameters
|
|
36
36
|
----------
|
|
37
37
|
peak_mzs : np.ndarray
|
|
38
38
|
Array of m/z values
|
|
39
|
-
peak_intensities : np.ndarray
|
|
39
|
+
peak_intensities : np.ndarray
|
|
40
40
|
Array of intensity values
|
|
41
41
|
centroiding_ppm : float, default 20.0
|
|
42
42
|
PPM tolerance for combining peaks
|
|
43
|
-
|
|
43
|
+
|
|
44
44
|
Returns
|
|
45
45
|
-------
|
|
46
46
|
tuple[np.ndarray, np.ndarray]
|
|
@@ -150,20 +150,20 @@ except Exception as e:
|
|
|
150
150
|
def dot_net_array_to_np_array(src) -> np.ndarray:
|
|
151
151
|
"""
|
|
152
152
|
Convert .NET array to NumPy array.
|
|
153
|
-
|
|
153
|
+
|
|
154
154
|
Parameters
|
|
155
155
|
----------
|
|
156
156
|
src : .NET array or None
|
|
157
157
|
Source .NET array to convert
|
|
158
|
-
|
|
158
|
+
|
|
159
159
|
Returns
|
|
160
160
|
-------
|
|
161
161
|
np.ndarray
|
|
162
162
|
Converted NumPy array
|
|
163
|
-
|
|
163
|
+
|
|
164
164
|
Notes
|
|
165
165
|
-----
|
|
166
|
-
Based on approach from:
|
|
166
|
+
Based on approach from:
|
|
167
167
|
https://mail.python.org/pipermail/pythondotnet/2014-May/001527.html
|
|
168
168
|
"""
|
|
169
169
|
if src is None:
|
|
@@ -204,7 +204,7 @@ class SciexWiffFileReader:
|
|
|
204
204
|
"""Close the file and clean up resources."""
|
|
205
205
|
self._wiffDataProvider.Close()
|
|
206
206
|
|
|
207
|
-
def __enter__(self) ->
|
|
207
|
+
def __enter__(self) -> "SciexWiffFileReader":
|
|
208
208
|
"""Context manager entry."""
|
|
209
209
|
return self
|
|
210
210
|
|
|
@@ -272,11 +272,11 @@ class SciexWiffFileReader:
|
|
|
272
272
|
if ms_level > 1 and not details.IsSwath and mass_spectrum.NumDataPoints <= 0 and ignore_empty_scans:
|
|
273
273
|
continue
|
|
274
274
|
if exp.Details.Polarity == exp.Details.Polarity.Positive:
|
|
275
|
-
pol =
|
|
275
|
+
pol = "positive"
|
|
276
276
|
elif exp.Details.Polarity == exp.Details.Polarity.Negative:
|
|
277
|
-
pol =
|
|
277
|
+
pol = "negative"
|
|
278
278
|
else:
|
|
279
|
-
pol =
|
|
279
|
+
pol = ""
|
|
280
280
|
polarity_list.append(pol)
|
|
281
281
|
|
|
282
282
|
mz_array = dot_net_array_to_np_array(mass_spectrum.GetActualXValues())
|
|
@@ -553,6 +553,7 @@ def load_wiff_file(filename: str, **kwargs) -> SciexWiffData:
|
|
|
553
553
|
wiff_data.import_raw(filename)
|
|
554
554
|
return wiff_data
|
|
555
555
|
|
|
556
|
+
|
|
556
557
|
def get_sample_names(filename: str) -> list[str]:
|
|
557
558
|
"""
|
|
558
559
|
Get the sample names from a WIFF file.
|
|
@@ -570,6 +571,7 @@ def get_sample_names(filename: str) -> list[str]:
|
|
|
570
571
|
with SciexWiffFileReader(filename) as reader:
|
|
571
572
|
return list(reader.sample_names)
|
|
572
573
|
|
|
574
|
+
|
|
573
575
|
# Example usage and testing
|
|
574
576
|
if __name__ == "__main__":
|
|
575
577
|
print("Standalone Sciex WIFF reader implementation")
|
masster/sample/thermo.py
CHANGED
|
@@ -30,8 +30,8 @@ Example:
|
|
|
30
30
|
>>> mz, intensity = raw_data.get_peaks(0) # Get first spectrum peaks
|
|
31
31
|
|
|
32
32
|
Note:
|
|
33
|
-
The .NET imports (System, ThermoFisher) will only work when pythonnet
|
|
34
|
-
is properly installed and configured. Without these dependencies, the
|
|
33
|
+
The .NET imports (System, ThermoFisher) will only work when pythonnet
|
|
34
|
+
is properly installed and configured. Without these dependencies, the
|
|
35
35
|
module will still import but Thermo RAW file reading will be disabled.
|
|
36
36
|
"""
|
|
37
37
|
|
|
@@ -54,23 +54,23 @@ def naive_centroid(
|
|
|
54
54
|
) -> tuple[np.ndarray, np.ndarray]:
|
|
55
55
|
"""
|
|
56
56
|
Simplified naive centroiding implementation.
|
|
57
|
-
|
|
57
|
+
|
|
58
58
|
Combines nearby peaks within a PPM tolerance using intensity-weighted averaging.
|
|
59
|
-
|
|
59
|
+
|
|
60
60
|
Parameters
|
|
61
61
|
----------
|
|
62
62
|
peak_mzs : np.ndarray
|
|
63
63
|
Array of m/z values (must be sorted)
|
|
64
|
-
peak_intensities : np.ndarray
|
|
64
|
+
peak_intensities : np.ndarray
|
|
65
65
|
Array of intensity values corresponding to peak_mzs
|
|
66
66
|
centroiding_ppm : float, default 20.0
|
|
67
67
|
PPM tolerance for combining peaks
|
|
68
|
-
|
|
68
|
+
|
|
69
69
|
Returns
|
|
70
70
|
-------
|
|
71
71
|
tuple[np.ndarray, np.ndarray]
|
|
72
72
|
Centroided m/z and intensity arrays
|
|
73
|
-
|
|
73
|
+
|
|
74
74
|
Notes
|
|
75
75
|
-----
|
|
76
76
|
This is a simple implementation that assumes input peaks are sorted by m/z.
|
|
@@ -78,7 +78,7 @@ def naive_centroid(
|
|
|
78
78
|
"""
|
|
79
79
|
if len(peak_mzs) == 0:
|
|
80
80
|
return np.array([]), np.array([])
|
|
81
|
-
|
|
81
|
+
|
|
82
82
|
if len(peak_mzs) != len(peak_intensities):
|
|
83
83
|
raise ValueError("peak_mzs and peak_intensities must have the same length")
|
|
84
84
|
|
|
@@ -89,7 +89,7 @@ def naive_centroid(
|
|
|
89
89
|
while i < len(peak_mzs):
|
|
90
90
|
current_mz = peak_mzs[i]
|
|
91
91
|
current_intensity = peak_intensities[i]
|
|
92
|
-
|
|
92
|
+
|
|
93
93
|
# Calculate tolerance for current m/z
|
|
94
94
|
tolerance = current_mz * centroiding_ppm * 1e-6
|
|
95
95
|
|
|
@@ -144,6 +144,7 @@ try:
|
|
|
144
144
|
# Try alternative locations
|
|
145
145
|
try:
|
|
146
146
|
import alpharaw
|
|
147
|
+
|
|
147
148
|
alpharaw_dir = os.path.dirname(alpharaw.__file__)
|
|
148
149
|
ext_dir = os.path.join(alpharaw_dir, "ext")
|
|
149
150
|
except ImportError:
|
|
@@ -156,9 +157,7 @@ try:
|
|
|
156
157
|
clr.AddReference(
|
|
157
158
|
os.path.join(ext_dir, "thermo_fisher", "ThermoFisher.CommonCore.Data.dll"),
|
|
158
159
|
)
|
|
159
|
-
clr.AddReference(
|
|
160
|
-
os.path.join(ext_dir, "thermo_fisher", "ThermoFisher.CommonCore.RawFileReader.dll")
|
|
161
|
-
)
|
|
160
|
+
clr.AddReference(os.path.join(ext_dir, "thermo_fisher", "ThermoFisher.CommonCore.RawFileReader.dll"))
|
|
162
161
|
|
|
163
162
|
import ThermoFisher # noqa: F401
|
|
164
163
|
|
|
@@ -189,43 +188,43 @@ except Exception as e:
|
|
|
189
188
|
def dot_net_array_to_np_array(src) -> np.ndarray:
|
|
190
189
|
"""
|
|
191
190
|
Convert .NET array to NumPy array with efficient memory handling.
|
|
192
|
-
|
|
191
|
+
|
|
193
192
|
This function performs a zero-copy conversion from .NET arrays to NumPy arrays
|
|
194
193
|
by directly accessing the underlying memory buffer. This is much faster than
|
|
195
194
|
iterating through elements.
|
|
196
|
-
|
|
195
|
+
|
|
197
196
|
Parameters
|
|
198
197
|
----------
|
|
199
198
|
src : .NET array or None
|
|
200
199
|
Source .NET array to convert (typically double[])
|
|
201
|
-
|
|
200
|
+
|
|
202
201
|
Returns
|
|
203
202
|
-------
|
|
204
203
|
np.ndarray
|
|
205
204
|
Converted NumPy array with dtype float64. Returns empty array if src is None.
|
|
206
|
-
|
|
205
|
+
|
|
207
206
|
Notes
|
|
208
207
|
-----
|
|
209
|
-
Based on the approach from:
|
|
208
|
+
Based on the approach from:
|
|
210
209
|
https://mail.python.org/pipermail/pythondotnet/2014-May/001527.html
|
|
211
|
-
|
|
210
|
+
|
|
212
211
|
The function uses GCHandle.Alloc to pin the .NET array in memory, allowing
|
|
213
212
|
direct access to its underlying buffer via ctypes. The buffer is then
|
|
214
213
|
wrapped as a NumPy array and copied to ensure memory safety.
|
|
215
214
|
"""
|
|
216
215
|
if src is None:
|
|
217
216
|
return np.array([], dtype=np.float64)
|
|
218
|
-
|
|
217
|
+
|
|
219
218
|
# Pin the .NET array in memory to prevent garbage collection
|
|
220
219
|
src_hndl = GCHandle.Alloc(src, GCHandleType.Pinned)
|
|
221
220
|
try:
|
|
222
221
|
# Get pointer to the pinned memory
|
|
223
222
|
src_ptr = src_hndl.AddrOfPinnedObject().ToInt64()
|
|
224
|
-
|
|
223
|
+
|
|
225
224
|
# Create ctypes buffer pointing to the same memory
|
|
226
225
|
buf_type = ctypes.c_double * len(src)
|
|
227
226
|
cbuf = buf_type.from_address(src_ptr)
|
|
228
|
-
|
|
227
|
+
|
|
229
228
|
# Convert to NumPy array and make a copy for safety
|
|
230
229
|
dest = np.frombuffer(cbuf, dtype="float64").copy() # type: ignore[call-overload]
|
|
231
230
|
finally:
|
|
@@ -247,7 +246,7 @@ class ThermoRawFileReader:
|
|
|
247
246
|
"Install pythonnet (pip install pythonnet) and ensure Thermo Fisher DLLs "
|
|
248
247
|
"are available in alpharaw's ext/thermo_fisher directory."
|
|
249
248
|
)
|
|
250
|
-
|
|
249
|
+
|
|
251
250
|
if not os.path.exists(filename):
|
|
252
251
|
raise FileNotFoundError(f"RAW file not found: {filename}")
|
|
253
252
|
|
|
@@ -255,7 +254,7 @@ class ThermoRawFileReader:
|
|
|
255
254
|
self._raw_file = RawFileReaderAdapter.FileFactory(filename)
|
|
256
255
|
except Exception as e:
|
|
257
256
|
raise ValueError(f"Failed to create RAW file reader for '{filename}': {e}") from e
|
|
258
|
-
|
|
257
|
+
|
|
259
258
|
if not self._raw_file.IsOpen:
|
|
260
259
|
raise ValueError(f"Could not open RAW file: {filename}")
|
|
261
260
|
|
|
@@ -271,10 +270,10 @@ class ThermoRawFileReader:
|
|
|
271
270
|
|
|
272
271
|
def close(self) -> None:
|
|
273
272
|
"""Close the file and clean up resources."""
|
|
274
|
-
if hasattr(self,
|
|
273
|
+
if hasattr(self, "_raw_file") and self._raw_file is not None:
|
|
275
274
|
self._raw_file.Dispose()
|
|
276
275
|
|
|
277
|
-
def __enter__(self) ->
|
|
276
|
+
def __enter__(self) -> "ThermoRawFileReader":
|
|
278
277
|
"""Context manager entry."""
|
|
279
278
|
return self
|
|
280
279
|
|
|
@@ -285,12 +284,12 @@ class ThermoRawFileReader:
|
|
|
285
284
|
def get_polarity_from_scan_event(self, scan_number: int) -> str:
|
|
286
285
|
"""
|
|
287
286
|
Extract polarity information from scan event.
|
|
288
|
-
|
|
287
|
+
|
|
289
288
|
Parameters
|
|
290
289
|
----------
|
|
291
290
|
scan_number : int
|
|
292
291
|
Scan number to extract polarity from
|
|
293
|
-
|
|
292
|
+
|
|
294
293
|
Returns
|
|
295
294
|
-------
|
|
296
295
|
str
|
|
@@ -299,28 +298,28 @@ class ThermoRawFileReader:
|
|
|
299
298
|
try:
|
|
300
299
|
scan_event = self._raw_file.GetScanEventForScanNumber(scan_number)
|
|
301
300
|
if scan_event is None:
|
|
302
|
-
return
|
|
301
|
+
return ""
|
|
303
302
|
|
|
304
303
|
# Try the direct Polarity property first (most reliable)
|
|
305
|
-
if hasattr(scan_event,
|
|
304
|
+
if hasattr(scan_event, "Polarity"):
|
|
306
305
|
polarity_str = str(scan_event.Polarity).lower()
|
|
307
|
-
if
|
|
308
|
-
return
|
|
309
|
-
elif
|
|
310
|
-
return
|
|
311
|
-
|
|
306
|
+
if "positive" in polarity_str:
|
|
307
|
+
return "positive"
|
|
308
|
+
elif "negative" in polarity_str:
|
|
309
|
+
return "negative"
|
|
310
|
+
|
|
312
311
|
# Fallback: parse the scan filter string
|
|
313
312
|
filter_string = str(scan_event.ToString()).lower()
|
|
314
|
-
if
|
|
315
|
-
return
|
|
316
|
-
elif
|
|
317
|
-
return
|
|
318
|
-
|
|
313
|
+
if "+" in filter_string or "positive" in filter_string:
|
|
314
|
+
return "positive"
|
|
315
|
+
elif "-" in filter_string or "negative" in filter_string:
|
|
316
|
+
return "negative"
|
|
317
|
+
|
|
319
318
|
except Exception:
|
|
320
319
|
# Log the exception if needed, but don't raise
|
|
321
320
|
pass
|
|
322
|
-
|
|
323
|
-
return
|
|
321
|
+
|
|
322
|
+
return "" # Unknown polarity
|
|
324
323
|
|
|
325
324
|
def _extract_precursor_info(self, scan_event, ms_level: int) -> tuple[float, int, float, float, float]:
|
|
326
325
|
"""Extract precursor information from scan event for MS2+ scans."""
|
|
@@ -333,17 +332,19 @@ class ThermoRawFileReader:
|
|
|
333
332
|
precursor_mz = -1.0
|
|
334
333
|
|
|
335
334
|
try:
|
|
336
|
-
precursor_charge = int(scan_event.GetChargeState(0)) if hasattr(scan_event,
|
|
335
|
+
precursor_charge = int(scan_event.GetChargeState(0)) if hasattr(scan_event, "GetChargeState") else 0
|
|
337
336
|
except Exception:
|
|
338
337
|
precursor_charge = 0
|
|
339
338
|
|
|
340
339
|
try:
|
|
341
|
-
collision_energy = float(scan_event.GetEnergy(0)) if hasattr(scan_event,
|
|
340
|
+
collision_energy = float(scan_event.GetEnergy(0)) if hasattr(scan_event, "GetEnergy") else 0.0
|
|
342
341
|
except Exception:
|
|
343
342
|
collision_energy = 0.0
|
|
344
343
|
|
|
345
344
|
try:
|
|
346
|
-
isolation_window =
|
|
345
|
+
isolation_window = (
|
|
346
|
+
float(scan_event.GetIsolationWidth(0)) if hasattr(scan_event, "GetIsolationWidth") else 3.0
|
|
347
|
+
)
|
|
347
348
|
except Exception:
|
|
348
349
|
isolation_window = 3.0
|
|
349
350
|
|
|
@@ -353,11 +354,7 @@ class ThermoRawFileReader:
|
|
|
353
354
|
return precursor_mz, precursor_charge, collision_energy, isolation_lower, isolation_upper
|
|
354
355
|
|
|
355
356
|
def _process_scan_data(
|
|
356
|
-
self,
|
|
357
|
-
scan_data,
|
|
358
|
-
centroid: bool,
|
|
359
|
-
centroid_ppm: float,
|
|
360
|
-
keep_k_peaks: int
|
|
357
|
+
self, scan_data, centroid: bool, centroid_ppm: float, keep_k_peaks: int
|
|
361
358
|
) -> tuple[np.ndarray, np.ndarray]:
|
|
362
359
|
"""Process scan data to extract and optionally centroid peaks."""
|
|
363
360
|
if scan_data.Positions is not None and scan_data.Intensities is not None:
|
|
@@ -434,30 +431,29 @@ class ThermoRawFileReader:
|
|
|
434
431
|
continue
|
|
435
432
|
|
|
436
433
|
scan_event = self._raw_file.GetScanEventForScanNumber(scan_num)
|
|
437
|
-
|
|
434
|
+
|
|
438
435
|
# Extract basic scan information
|
|
439
436
|
rt = scan_stats.StartTime # in minutes
|
|
440
437
|
ms_level = int(scan_event.MSOrder) if scan_event else 1
|
|
441
438
|
polarity = self.get_polarity_from_scan_event(scan_num)
|
|
442
439
|
|
|
443
440
|
# Process peak data
|
|
444
|
-
mz_array, int_array = self._process_scan_data(
|
|
445
|
-
scan_data, centroid, centroid_ppm, keep_k_peaks
|
|
446
|
-
)
|
|
441
|
+
mz_array, int_array = self._process_scan_data(scan_data, centroid, centroid_ppm, keep_k_peaks)
|
|
447
442
|
|
|
448
443
|
# Store scan data
|
|
449
444
|
peak_mz_arrays.append(mz_array)
|
|
450
445
|
peak_intensity_arrays.append(int_array)
|
|
451
446
|
peak_indices_list.append(len(mz_array))
|
|
452
|
-
|
|
447
|
+
|
|
453
448
|
rt_list.append(rt)
|
|
454
449
|
ms_level_list.append(ms_level)
|
|
455
450
|
polarity_list.append(polarity)
|
|
456
451
|
|
|
457
452
|
# Extract precursor information
|
|
458
|
-
precursor_mz, precursor_charge, collision_energy, isolation_lower, isolation_upper =
|
|
453
|
+
precursor_mz, precursor_charge, collision_energy, isolation_lower, isolation_upper = (
|
|
459
454
|
self._extract_precursor_info(scan_event, ms_level)
|
|
460
|
-
|
|
455
|
+
)
|
|
456
|
+
|
|
461
457
|
precursor_mz_list.append(precursor_mz)
|
|
462
458
|
precursor_charge_list.append(precursor_charge)
|
|
463
459
|
ce_list.append(collision_energy)
|
|
@@ -510,7 +506,7 @@ class ThermoRawData:
|
|
|
510
506
|
def __init__(self, centroided: bool = True) -> None:
|
|
511
507
|
"""
|
|
512
508
|
Initialize ThermoRawData reader.
|
|
513
|
-
|
|
509
|
+
|
|
514
510
|
Parameters
|
|
515
511
|
----------
|
|
516
512
|
centroided : bool, optional
|
|
@@ -520,13 +516,13 @@ class ThermoRawData:
|
|
|
520
516
|
# Initialize dataframes
|
|
521
517
|
self.spectrum_df: pd.DataFrame = pd.DataFrame()
|
|
522
518
|
self.peak_df: pd.DataFrame = pd.DataFrame()
|
|
523
|
-
|
|
519
|
+
|
|
524
520
|
# File and instrument information
|
|
525
521
|
self._raw_file_path = ""
|
|
526
522
|
self.creation_time = ""
|
|
527
523
|
self.type = "thermo"
|
|
528
524
|
self.instrument = "thermo"
|
|
529
|
-
|
|
525
|
+
|
|
530
526
|
# Processing parameters
|
|
531
527
|
self.centroided = centroided
|
|
532
528
|
self.centroid_ppm = 20.0
|
|
@@ -537,8 +533,7 @@ class ThermoRawData:
|
|
|
537
533
|
if self.centroided:
|
|
538
534
|
self.centroided = False
|
|
539
535
|
warnings.warn(
|
|
540
|
-
"Centroiding for Thermo data is not well implemented yet. "
|
|
541
|
-
"Data will be processed in profile mode.",
|
|
536
|
+
"Centroiding for Thermo data is not well implemented yet. Data will be processed in profile mode.",
|
|
542
537
|
UserWarning,
|
|
543
538
|
stacklevel=2,
|
|
544
539
|
)
|
|
@@ -587,14 +582,14 @@ class ThermoRawData:
|
|
|
587
582
|
ignore_empty_scans=self.ignore_empty_scans,
|
|
588
583
|
keep_k_peaks=self.keep_k_peaks_per_spec,
|
|
589
584
|
)
|
|
590
|
-
|
|
585
|
+
|
|
591
586
|
# Try to get file creation time
|
|
592
587
|
try:
|
|
593
588
|
creation_info = raw_reader._raw_file.GetCreationDate()
|
|
594
589
|
self.creation_time = creation_info.ToString("O") if creation_info else ""
|
|
595
590
|
except Exception:
|
|
596
591
|
self.creation_time = ""
|
|
597
|
-
|
|
592
|
+
|
|
598
593
|
return data_dict
|
|
599
594
|
|
|
600
595
|
def _set_dataframes(self, raw_data: dict[str, Any]) -> None:
|
|
@@ -607,16 +602,16 @@ class ThermoRawData:
|
|
|
607
602
|
Dictionary containing the raw spectral data with keys like 'rt', 'peak_mz', etc.
|
|
608
603
|
"""
|
|
609
604
|
num_spectra = len(raw_data["rt"])
|
|
610
|
-
|
|
605
|
+
|
|
611
606
|
# Create spectrum dataframe
|
|
612
607
|
self.create_spectrum_df(num_spectra)
|
|
613
|
-
|
|
608
|
+
|
|
614
609
|
# Create peak dataframe with indexed arrays
|
|
615
610
|
self.set_peak_df_by_indexed_array(
|
|
616
611
|
raw_data["peak_mz"],
|
|
617
612
|
raw_data["peak_intensity"],
|
|
618
613
|
raw_data["peak_indices"][:-1], # start indices
|
|
619
|
-
raw_data["peak_indices"][1:],
|
|
614
|
+
raw_data["peak_indices"][1:], # end indices
|
|
620
615
|
)
|
|
621
616
|
|
|
622
617
|
# Add spectrum-level data to spectrum dataframe
|
|
@@ -741,17 +736,17 @@ def get_file_info(filename: str) -> dict[str, Any]:
|
|
|
741
736
|
def main() -> None:
|
|
742
737
|
"""
|
|
743
738
|
Main function for testing and demonstrating the module functionality.
|
|
744
|
-
|
|
739
|
+
|
|
745
740
|
This function provides usage examples and tests basic module functionality
|
|
746
741
|
when the script is run directly.
|
|
747
742
|
"""
|
|
748
743
|
print("Standalone Thermo RAW Reader")
|
|
749
744
|
print("=" * 40)
|
|
750
|
-
|
|
745
|
+
|
|
751
746
|
# Display usage example
|
|
752
747
|
print("\nUsage Example:")
|
|
753
748
|
print("-" * 20)
|
|
754
|
-
example_code =
|
|
749
|
+
example_code = """
|
|
755
750
|
from thermo import ThermoRawData, load_raw_file
|
|
756
751
|
|
|
757
752
|
# Method 1: Create reader instance
|
|
@@ -771,18 +766,18 @@ mz, intensity = raw_data.get_peaks(0)
|
|
|
771
766
|
# Check available polarities
|
|
772
767
|
polarities = raw_data.spectrum_df['polarity'].unique()
|
|
773
768
|
print(f"Polarities: {polarities}")
|
|
774
|
-
|
|
769
|
+
"""
|
|
775
770
|
print(example_code)
|
|
776
771
|
|
|
777
772
|
# Test module functionality
|
|
778
773
|
print("\nModule Status:")
|
|
779
774
|
print("-" * 20)
|
|
780
|
-
|
|
775
|
+
|
|
781
776
|
try:
|
|
782
777
|
# Test class instantiation
|
|
783
778
|
test_data = ThermoRawData()
|
|
784
779
|
print("✓ ThermoRawData instantiated successfully")
|
|
785
|
-
|
|
780
|
+
|
|
786
781
|
# Check .NET support
|
|
787
782
|
if HAS_DOTNET:
|
|
788
783
|
print("✓ .NET support available")
|
|
@@ -792,10 +787,10 @@ print(f"Polarities: {polarities}")
|
|
|
792
787
|
print("⚠ .NET support not available")
|
|
793
788
|
print(" • Install pythonnet to enable RAW file reading")
|
|
794
789
|
print(" • Ensure Thermo Fisher DLLs are in alpharaw ext directory")
|
|
795
|
-
|
|
790
|
+
|
|
796
791
|
except Exception as e:
|
|
797
792
|
print(f"✗ Error during module testing: {e}")
|
|
798
793
|
|
|
799
794
|
|
|
800
795
|
if __name__ == "__main__":
|
|
801
|
-
main()
|
|
796
|
+
main()
|
masster/spectrum.py
CHANGED
|
@@ -199,61 +199,61 @@ class Spectrum:
|
|
|
199
199
|
def check_if_centroided(self) -> bool:
|
|
200
200
|
"""
|
|
201
201
|
Fast determination if spectrum data is centroided or profile.
|
|
202
|
-
|
|
202
|
+
|
|
203
203
|
Uses optimized statistical approaches with early exits for speed:
|
|
204
204
|
1. Fast median difference check (most decisive)
|
|
205
|
-
2. Small gap ratio (profile characteristic)
|
|
205
|
+
2. Small gap ratio (profile characteristic)
|
|
206
206
|
3. Density check (fallback)
|
|
207
|
-
|
|
207
|
+
|
|
208
208
|
Returns:
|
|
209
209
|
bool: True if centroided, False if profile
|
|
210
210
|
"""
|
|
211
211
|
if self.mz.size < 5:
|
|
212
212
|
return True # Too few points to determine, assume centroided
|
|
213
|
-
|
|
213
|
+
|
|
214
214
|
# Fast path: check if mz is already sorted to avoid sorting cost
|
|
215
215
|
if np.all(self.mz[:-1] <= self.mz[1:]):
|
|
216
216
|
sorted_mz = self.mz
|
|
217
217
|
else:
|
|
218
218
|
sorted_mz = np.sort(self.mz)
|
|
219
|
-
|
|
219
|
+
|
|
220
220
|
# Calculate differences efficiently
|
|
221
221
|
mz_diffs = np.diff(sorted_mz)
|
|
222
|
-
|
|
222
|
+
|
|
223
223
|
# Remove zeros efficiently (keep positive differences)
|
|
224
224
|
mz_diffs = mz_diffs[mz_diffs > 0]
|
|
225
|
-
|
|
225
|
+
|
|
226
226
|
if mz_diffs.size == 0:
|
|
227
227
|
return True # All identical m/z values
|
|
228
|
-
|
|
228
|
+
|
|
229
229
|
# Fast approach 1: Median difference (most decisive, compute once)
|
|
230
230
|
median_diff = np.median(mz_diffs)
|
|
231
|
-
|
|
231
|
+
|
|
232
232
|
# Early exits for clear cases (>90% of cases)
|
|
233
233
|
if median_diff > 0.02:
|
|
234
234
|
return True # Clearly centroided
|
|
235
235
|
elif median_diff < 0.005:
|
|
236
236
|
return False # Clearly profile
|
|
237
|
-
|
|
237
|
+
|
|
238
238
|
# Fast approach 2: Small gap ratio (for borderline cases)
|
|
239
239
|
# Use vectorized comparison instead of creating new array
|
|
240
240
|
small_gap_count = np.sum(mz_diffs < 0.005)
|
|
241
241
|
small_gap_ratio = small_gap_count / mz_diffs.size
|
|
242
|
-
|
|
242
|
+
|
|
243
243
|
if small_gap_ratio > 0.7:
|
|
244
244
|
return False # High ratio of small gaps = profile
|
|
245
245
|
elif small_gap_ratio < 0.1:
|
|
246
|
-
return True
|
|
247
|
-
|
|
246
|
+
return True # Low ratio of small gaps = centroided
|
|
247
|
+
|
|
248
248
|
# Fast approach 3: Density check (final fallback)
|
|
249
|
-
mz_range = sorted_mz[-1] - sorted_mz[0]
|
|
249
|
+
mz_range = sorted_mz[-1] - sorted_mz[0]
|
|
250
250
|
if mz_range > 0:
|
|
251
251
|
density = sorted_mz.size / mz_range
|
|
252
252
|
if density > 100: # High density = profile
|
|
253
253
|
return False
|
|
254
254
|
elif density < 10: # Low density = centroided
|
|
255
255
|
return True
|
|
256
|
-
|
|
256
|
+
|
|
257
257
|
# Final fallback: median threshold
|
|
258
258
|
return median_diff > 0.01
|
|
259
259
|
|