masster 0.5.16__py3-none-any.whl → 0.5.18__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of masster might be problematic. Click here for more details.
- masster/_version.py +1 -1
- masster/sample/adducts.py +12 -0
- masster/sample/defaults/sample_def.py +30 -6
- masster/sample/h5.py +52 -6
- masster/sample/lib.py +9 -3
- masster/sample/load.py +47 -120
- masster/sample/processing.py +1 -1
- masster/sample/sample.py +5 -3
- masster/sample/sciex.py +60 -646
- masster/sample/thermo.py +801 -0
- masster/study/id.py +3 -1
- masster/study/load.py +15 -792
- masster/study/study.py +1 -0
- masster/wizard/wizard.py +178 -225
- {masster-0.5.16.dist-info → masster-0.5.18.dist-info}/METADATA +3 -4
- {masster-0.5.16.dist-info → masster-0.5.18.dist-info}/RECORD +19 -18
- {masster-0.5.16.dist-info → masster-0.5.18.dist-info}/WHEEL +0 -0
- {masster-0.5.16.dist-info → masster-0.5.18.dist-info}/entry_points.txt +0 -0
- {masster-0.5.16.dist-info → masster-0.5.18.dist-info}/licenses/LICENSE +0 -0
masster/sample/sciex.py
CHANGED
|
@@ -2,12 +2,11 @@
|
|
|
2
2
|
Standalone Sciex WIFF file reader module.
|
|
3
3
|
|
|
4
4
|
This module provides a standalone implementation of Sciex WIFF file reading
|
|
5
|
-
functionality that uses the DLLs
|
|
6
|
-
without importing from the alpharaw package.
|
|
5
|
+
functionality that uses the Sciex DLLs directly.
|
|
7
6
|
|
|
8
7
|
Requirements:
|
|
9
8
|
- pythonnet (pip install pythonnet)
|
|
10
|
-
-
|
|
9
|
+
- Sciex DLLs must be available in the ext/sciex directory
|
|
11
10
|
- On Linux/macOS: mono runtime must be installed
|
|
12
11
|
|
|
13
12
|
The .NET imports (System, Clearcore2, WiffOps4Python) will only work when
|
|
@@ -32,6 +31,20 @@ def naive_centroid(
|
|
|
32
31
|
) -> tuple[np.ndarray, np.ndarray]:
|
|
33
32
|
"""
|
|
34
33
|
Simplified naive centroiding implementation.
|
|
34
|
+
|
|
35
|
+
Parameters
|
|
36
|
+
----------
|
|
37
|
+
peak_mzs : np.ndarray
|
|
38
|
+
Array of m/z values
|
|
39
|
+
peak_intensities : np.ndarray
|
|
40
|
+
Array of intensity values
|
|
41
|
+
centroiding_ppm : float, default 20.0
|
|
42
|
+
PPM tolerance for combining peaks
|
|
43
|
+
|
|
44
|
+
Returns
|
|
45
|
+
-------
|
|
46
|
+
tuple[np.ndarray, np.ndarray]
|
|
47
|
+
Centroided m/z and intensity arrays
|
|
35
48
|
"""
|
|
36
49
|
if len(peak_mzs) == 0:
|
|
37
50
|
return np.array([]), np.array([])
|
|
@@ -134,10 +147,24 @@ except Exception as e:
|
|
|
134
147
|
HAS_DOTNET = False
|
|
135
148
|
|
|
136
149
|
|
|
137
|
-
def dot_net_array_to_np_array(src):
|
|
150
|
+
def dot_net_array_to_np_array(src) -> np.ndarray:
|
|
138
151
|
"""
|
|
139
152
|
Convert .NET array to NumPy array.
|
|
140
|
-
|
|
153
|
+
|
|
154
|
+
Parameters
|
|
155
|
+
----------
|
|
156
|
+
src : .NET array or None
|
|
157
|
+
Source .NET array to convert
|
|
158
|
+
|
|
159
|
+
Returns
|
|
160
|
+
-------
|
|
161
|
+
np.ndarray
|
|
162
|
+
Converted NumPy array
|
|
163
|
+
|
|
164
|
+
Notes
|
|
165
|
+
-----
|
|
166
|
+
Based on approach from:
|
|
167
|
+
https://mail.python.org/pipermail/pythondotnet/2014-May/001527.html
|
|
141
168
|
"""
|
|
142
169
|
if src is None:
|
|
143
170
|
return np.array([], dtype=np.float64)
|
|
@@ -153,482 +180,9 @@ def dot_net_array_to_np_array(src):
|
|
|
153
180
|
return dest # noqa: B012
|
|
154
181
|
|
|
155
182
|
|
|
156
|
-
class SciexWiff2FileReader:
|
|
157
|
-
"""
|
|
158
|
-
Specialized reader for Sciex WIFF2 files using optimal DLL combination.
|
|
159
|
-
|
|
160
|
-
WIFF2 is a newer format from Sciex that may have enhanced capabilities
|
|
161
|
-
compared to the original WIFF format. This reader is optimized specifically
|
|
162
|
-
for WIFF2 files and uses the most appropriate DLLs for maximum information extraction.
|
|
163
|
-
|
|
164
|
-
Based on comprehensive DLL analysis, WIFF2 files require specific handling and
|
|
165
|
-
may use different underlying storage mechanisms than regular WIFF files.
|
|
166
|
-
"""
|
|
167
|
-
|
|
168
|
-
def __init__(self, filename: str):
|
|
169
|
-
"""
|
|
170
|
-
Initialize WIFF2 reader with file path.
|
|
171
|
-
|
|
172
|
-
Parameters
|
|
173
|
-
----------
|
|
174
|
-
filename : str
|
|
175
|
-
Path to the WIFF2 file
|
|
176
|
-
"""
|
|
177
|
-
if not HAS_DOTNET:
|
|
178
|
-
raise ValueError(
|
|
179
|
-
"Dotnet-based dependencies are required for reading Sciex WIFF2 files. "
|
|
180
|
-
"Install pythonnet and ensure alpharaw DLLs are available.",
|
|
181
|
-
)
|
|
182
|
-
|
|
183
|
-
self.filename = filename
|
|
184
|
-
self.ext_dir = self._find_dll_directory()
|
|
185
|
-
self._ensure_wiff2_dlls_loaded()
|
|
186
|
-
|
|
187
|
-
# Try different initialization strategies for WIFF2
|
|
188
|
-
self._initialize_wiff2_reader()
|
|
189
|
-
|
|
190
|
-
def _find_dll_directory(self):
|
|
191
|
-
"""Find the alpharaw DLL directory using the same discovery pattern."""
|
|
192
|
-
for site_dir in site.getsitepackages():
|
|
193
|
-
potential_ext_dir = os.path.join(site_dir, "alpharaw", "ext", "sciex")
|
|
194
|
-
if os.path.exists(potential_ext_dir):
|
|
195
|
-
return potential_ext_dir
|
|
196
|
-
|
|
197
|
-
# Fallback to alpharaw module location
|
|
198
|
-
try:
|
|
199
|
-
import alpharaw
|
|
200
|
-
|
|
201
|
-
alpharaw_dir = os.path.dirname(alpharaw.__file__)
|
|
202
|
-
return os.path.join(alpharaw_dir, "ext", "sciex")
|
|
203
|
-
except ImportError:
|
|
204
|
-
raise ImportError("Could not find alpharaw DLL directory")
|
|
205
|
-
|
|
206
|
-
def _ensure_wiff2_dlls_loaded(self):
|
|
207
|
-
"""Ensure all necessary WIFF2 DLLs are loaded."""
|
|
208
|
-
# Key DLLs identified through comprehensive analysis
|
|
209
|
-
required_dlls = [
|
|
210
|
-
"Clearcore2.Data.Wiff2.dll", # Primary WIFF2 support
|
|
211
|
-
"Clearcore2.Data.AnalystDataProvider.dll",
|
|
212
|
-
"Clearcore2.Data.dll",
|
|
213
|
-
"Clearcore2.Data.Common.dll",
|
|
214
|
-
"Clearcore2.Data.Core.dll",
|
|
215
|
-
"Clearcore2.StructuredStorage.dll", # For WIFF2 storage format
|
|
216
|
-
"WiffOps4Python.dll",
|
|
217
|
-
]
|
|
218
|
-
|
|
219
|
-
for dll in required_dlls:
|
|
220
|
-
dll_path = os.path.join(self.ext_dir, dll)
|
|
221
|
-
if os.path.exists(dll_path):
|
|
222
|
-
try:
|
|
223
|
-
clr.AddReference(dll_path)
|
|
224
|
-
except:
|
|
225
|
-
pass # May already be loaded
|
|
226
|
-
else:
|
|
227
|
-
warnings.warn(f"WIFF2 DLL not found: {dll}", stacklevel=2)
|
|
228
|
-
|
|
229
|
-
def _initialize_wiff2_reader(self):
|
|
230
|
-
"""
|
|
231
|
-
Initialize WIFF2 reader with fallback strategies.
|
|
232
|
-
|
|
233
|
-
WIFF2 files may require different initialization approaches than WIFF files.
|
|
234
|
-
We try multiple strategies based on the comprehensive DLL analysis.
|
|
235
|
-
"""
|
|
236
|
-
initialization_errors = []
|
|
237
|
-
|
|
238
|
-
# Strategy 1: Try standard AnalystDataProvider (may work for some WIFF2)
|
|
239
|
-
try:
|
|
240
|
-
from Clearcore2.Data.AnalystDataProvider import AnalystDataProviderFactory
|
|
241
|
-
from Clearcore2.Data.AnalystDataProvider import AnalystWiffDataProvider
|
|
242
|
-
|
|
243
|
-
self._wiffDataProvider = AnalystWiffDataProvider()
|
|
244
|
-
self._wiff_file = AnalystDataProviderFactory.CreateBatch(
|
|
245
|
-
self.filename,
|
|
246
|
-
self._wiffDataProvider,
|
|
247
|
-
)
|
|
248
|
-
|
|
249
|
-
self.sample_names = self._wiff_file.GetSampleNames()
|
|
250
|
-
self.sample_count = len(self.sample_names)
|
|
251
|
-
self.initialization_method = "AnalystDataProvider"
|
|
252
|
-
return
|
|
253
|
-
|
|
254
|
-
except Exception as e:
|
|
255
|
-
initialization_errors.append(f"AnalystDataProvider: {e}")
|
|
256
|
-
|
|
257
|
-
# Strategy 2: Try alpharaw's SciexWiffData (correct API)
|
|
258
|
-
try:
|
|
259
|
-
from alpharaw.sciex import SciexWiffData
|
|
260
|
-
|
|
261
|
-
self._alpharaw_reader = SciexWiffData()
|
|
262
|
-
self._alpharaw_reader.import_raw(self.filename)
|
|
263
|
-
|
|
264
|
-
# Extract basic information (SciexWiffData doesn't have sample_names property)
|
|
265
|
-
self.sample_names = ["Sample_0"] # Default since WIFF2 format needs investigation
|
|
266
|
-
self.sample_count = 1
|
|
267
|
-
self.initialization_method = "alpharaw_SciexWiffData"
|
|
268
|
-
|
|
269
|
-
# Store the reader for later use
|
|
270
|
-
self._wiff_data = self._alpharaw_reader
|
|
271
|
-
return
|
|
272
|
-
|
|
273
|
-
except Exception as e:
|
|
274
|
-
initialization_errors.append(f"alpharaw_SciexWiffData: {e}")
|
|
275
|
-
|
|
276
|
-
# Strategy 3: Try direct WIFF2 DLL approach
|
|
277
|
-
try:
|
|
278
|
-
# Check if file is recognized as WIFF2
|
|
279
|
-
from Clearcore2.Data.AnalystDataProvider import DataProviderHelper
|
|
280
|
-
|
|
281
|
-
is_wiff2 = DataProviderHelper.IsMdWiffFile(self.filename)
|
|
282
|
-
if is_wiff2:
|
|
283
|
-
# Try specialized WIFF2 handling
|
|
284
|
-
warnings.warn(
|
|
285
|
-
"File detected as WIFF2 format but specialized reader not fully implemented. "
|
|
286
|
-
"Consider using alpharaw.ms_data_from_file() directly.",
|
|
287
|
-
stacklevel=2,
|
|
288
|
-
)
|
|
289
|
-
# For now, fall back to treating as regular WIFF with enhanced parameters
|
|
290
|
-
self._initialize_as_enhanced_wiff()
|
|
291
|
-
return
|
|
292
|
-
|
|
293
|
-
except Exception as e:
|
|
294
|
-
initialization_errors.append(f"WIFF2 detection: {e}")
|
|
295
|
-
|
|
296
|
-
# If all strategies fail, provide comprehensive error information with helpful suggestions
|
|
297
|
-
error_summary = "; ".join(initialization_errors)
|
|
298
|
-
|
|
299
|
-
# Check if this is a WIFF2 format issue specifically
|
|
300
|
-
if "could not be opened (result = -2147286960)" in error_summary:
|
|
301
|
-
raise RuntimeError(
|
|
302
|
-
f"WIFF2 file format is not supported by the current DLL combination. "
|
|
303
|
-
f"Error code -2147286960 (0x80030050) indicates format incompatibility. "
|
|
304
|
-
f"The file '{self.filename}' appears to be a valid WIFF2 file but requires "
|
|
305
|
-
f"newer or different DLLs than currently available. "
|
|
306
|
-
f"Try converting the WIFF2 file to WIFF format or use alternative tools. "
|
|
307
|
-
f"Full error details: {error_summary}",
|
|
308
|
-
)
|
|
309
|
-
else:
|
|
310
|
-
raise RuntimeError(
|
|
311
|
-
f"Failed to initialize WIFF2 reader with any strategy. "
|
|
312
|
-
f"Errors: {error_summary}. "
|
|
313
|
-
f"The file may be corrupted, locked, or require different dependencies.",
|
|
314
|
-
)
|
|
315
|
-
|
|
316
|
-
def _initialize_as_enhanced_wiff(self):
|
|
317
|
-
"""Fallback: Initialize as enhanced WIFF with WIFF2-optimized parameters."""
|
|
318
|
-
# Use the same initialization as regular WIFF but with warnings
|
|
319
|
-
try:
|
|
320
|
-
from Clearcore2.Data.AnalystDataProvider import AnalystDataProviderFactory
|
|
321
|
-
from Clearcore2.Data.AnalystDataProvider import AnalystWiffDataProvider
|
|
322
|
-
|
|
323
|
-
self._wiffDataProvider = AnalystWiffDataProvider()
|
|
324
|
-
self._wiff_file = AnalystDataProviderFactory.CreateBatch(
|
|
325
|
-
self.filename,
|
|
326
|
-
self._wiffDataProvider,
|
|
327
|
-
)
|
|
328
|
-
|
|
329
|
-
self.sample_names = self._wiff_file.GetSampleNames()
|
|
330
|
-
self.sample_count = len(self.sample_names)
|
|
331
|
-
self.initialization_method = "enhanced_wiff_fallback"
|
|
332
|
-
|
|
333
|
-
warnings.warn(
|
|
334
|
-
"WIFF2 file opened using WIFF reader fallback. Some WIFF2-specific features may not be available.",
|
|
335
|
-
stacklevel=2,
|
|
336
|
-
)
|
|
337
|
-
|
|
338
|
-
except Exception as e:
|
|
339
|
-
raise RuntimeError(f"Enhanced WIFF fallback also failed: {e}")
|
|
340
|
-
|
|
341
|
-
def get_file_metadata(self) -> dict[str, Any]:
|
|
342
|
-
"""Get comprehensive file metadata for WIFF2 format."""
|
|
343
|
-
metadata: dict[str, Any] = {
|
|
344
|
-
"format": "WIFF2",
|
|
345
|
-
"sample_count": self.sample_count,
|
|
346
|
-
"sample_names": list(self.sample_names),
|
|
347
|
-
"file_size": os.path.getsize(self.filename),
|
|
348
|
-
"file_path": self.filename,
|
|
349
|
-
"initialization_method": self.initialization_method,
|
|
350
|
-
"samples": [], # Initialize samples list
|
|
351
|
-
}
|
|
352
|
-
|
|
353
|
-
if self.initialization_method == "alpharaw":
|
|
354
|
-
# Get metadata from alpharaw reader
|
|
355
|
-
try:
|
|
356
|
-
if hasattr(self._alpharaw_reader, "get_spectrum_count"):
|
|
357
|
-
metadata["total_spectra"] = self._alpharaw_reader.get_spectrum_count()
|
|
358
|
-
|
|
359
|
-
# Add alpharaw-specific metadata
|
|
360
|
-
for attr in ["creation_time", "instrument_model", "ms_levels"]:
|
|
361
|
-
if hasattr(self._alpharaw_reader, attr):
|
|
362
|
-
try:
|
|
363
|
-
value = getattr(self._alpharaw_reader, attr)
|
|
364
|
-
if callable(value):
|
|
365
|
-
metadata[attr] = value()
|
|
366
|
-
else:
|
|
367
|
-
metadata[attr] = value
|
|
368
|
-
except:
|
|
369
|
-
pass
|
|
370
|
-
|
|
371
|
-
except Exception as e:
|
|
372
|
-
metadata["metadata_error"] = str(e)
|
|
373
|
-
|
|
374
|
-
elif hasattr(self, "_wiff_file"):
|
|
375
|
-
# Get metadata from standard WIFF reader
|
|
376
|
-
try:
|
|
377
|
-
for i in range(self.sample_count):
|
|
378
|
-
sample = self._wiff_file.GetSample(i)
|
|
379
|
-
sample_info = {
|
|
380
|
-
"index": i,
|
|
381
|
-
"name": str(self.sample_names[i]),
|
|
382
|
-
}
|
|
383
|
-
|
|
384
|
-
if hasattr(sample, "Details"):
|
|
385
|
-
details = sample.Details
|
|
386
|
-
if hasattr(details, "AcquisitionDateTime"):
|
|
387
|
-
sample_info["acquisition_time"] = str(details.AcquisitionDateTime.ToString("O"))
|
|
388
|
-
|
|
389
|
-
if hasattr(sample, "MassSpectrometerSample"):
|
|
390
|
-
ms_sample = sample.MassSpectrometerSample
|
|
391
|
-
sample_info["experiment_count"] = ms_sample.ExperimentCount
|
|
392
|
-
|
|
393
|
-
metadata["samples"].append(sample_info)
|
|
394
|
-
|
|
395
|
-
except Exception as e:
|
|
396
|
-
metadata["metadata_error"] = str(e)
|
|
397
|
-
|
|
398
|
-
return metadata
|
|
399
|
-
|
|
400
|
-
def load_sample(self, sample_id: int = 0, **kwargs):
|
|
401
|
-
"""
|
|
402
|
-
Load sample data with WIFF2-optimized settings.
|
|
403
|
-
|
|
404
|
-
Parameters
|
|
405
|
-
----------
|
|
406
|
-
sample_id : int
|
|
407
|
-
Sample index to load
|
|
408
|
-
**kwargs
|
|
409
|
-
Additional parameters for data loading
|
|
410
|
-
|
|
411
|
-
Returns
|
|
412
|
-
-------
|
|
413
|
-
dict
|
|
414
|
-
Comprehensive spectral data dictionary
|
|
415
|
-
"""
|
|
416
|
-
if self.initialization_method == "alpharaw":
|
|
417
|
-
return self._load_sample_alpharaw(sample_id, **kwargs)
|
|
418
|
-
else:
|
|
419
|
-
return self._load_sample_standard(sample_id, **kwargs)
|
|
420
|
-
|
|
421
|
-
def _load_sample_alpharaw(self, sample_id: int, **kwargs):
|
|
422
|
-
"""Load sample using alpharaw reader."""
|
|
423
|
-
# Enhanced parameters for WIFF2
|
|
424
|
-
enhanced_params = {
|
|
425
|
-
"centroid": kwargs.get("centroid", True),
|
|
426
|
-
"centroid_ppm": kwargs.get("centroid_ppm", 15.0),
|
|
427
|
-
"keep_k_peaks": kwargs.get("keep_k_peaks", 3000),
|
|
428
|
-
}
|
|
429
|
-
|
|
430
|
-
try:
|
|
431
|
-
# Use alpharaw's data extraction
|
|
432
|
-
spectrum_df = self._alpharaw_reader.spectrum_df
|
|
433
|
-
peak_df = self._alpharaw_reader.peak_df
|
|
434
|
-
|
|
435
|
-
# Convert to the expected format
|
|
436
|
-
spectral_data = {
|
|
437
|
-
"peak_indices": spectrum_df[["peak_start_idx", "peak_stop_idx"]].values.flatten(),
|
|
438
|
-
"peak_mz": peak_df["mz"].values,
|
|
439
|
-
"peak_intensity": peak_df["intensity"].values,
|
|
440
|
-
"rt": spectrum_df["rt"].values,
|
|
441
|
-
"ms_level": spectrum_df["ms_level"].values,
|
|
442
|
-
"precursor_mz": spectrum_df.get("precursor_mz", np.full(len(spectrum_df), -1.0)).values,
|
|
443
|
-
"precursor_charge": spectrum_df.get("precursor_charge", np.full(len(spectrum_df), 0)).values,
|
|
444
|
-
"isolation_lower_mz": spectrum_df.get("isolation_lower_mz", np.full(len(spectrum_df), -1.0)).values,
|
|
445
|
-
"isolation_upper_mz": spectrum_df.get("isolation_upper_mz", np.full(len(spectrum_df), -1.0)).values,
|
|
446
|
-
"nce": spectrum_df.get("nce", np.full(len(spectrum_df), 0.0)).values,
|
|
447
|
-
"metadata": {
|
|
448
|
-
"format": "WIFF2",
|
|
449
|
-
"sample_id": sample_id,
|
|
450
|
-
"sample_name": str(self.sample_names[sample_id])
|
|
451
|
-
if sample_id < len(self.sample_names)
|
|
452
|
-
else f"Sample_{sample_id}",
|
|
453
|
-
"loading_params": enhanced_params,
|
|
454
|
-
"total_spectra": len(spectrum_df),
|
|
455
|
-
"total_peaks": len(peak_df),
|
|
456
|
-
"ms1_count": np.sum(spectrum_df["ms_level"] == 1),
|
|
457
|
-
"ms2_count": np.sum(spectrum_df["ms_level"] > 1),
|
|
458
|
-
"rt_range": [float(spectrum_df["rt"].min()), float(spectrum_df["rt"].max())]
|
|
459
|
-
if len(spectrum_df) > 0
|
|
460
|
-
else [0, 0],
|
|
461
|
-
"reader_method": "alpharaw",
|
|
462
|
-
},
|
|
463
|
-
}
|
|
464
|
-
|
|
465
|
-
return spectral_data
|
|
466
|
-
|
|
467
|
-
except Exception as e:
|
|
468
|
-
raise RuntimeError(f"Failed to load WIFF2 sample via alpharaw: {e}")
|
|
469
|
-
|
|
470
|
-
def _load_sample_standard(self, sample_id: int, **kwargs):
|
|
471
|
-
"""Load sample using standard WIFF reader with WIFF2 enhancements."""
|
|
472
|
-
# Use enhanced parameters optimized for WIFF2
|
|
473
|
-
enhanced_params = {
|
|
474
|
-
"centroid": kwargs.get("centroid", True),
|
|
475
|
-
"centroid_ppm": kwargs.get("centroid_ppm", 15.0), # Tighter for WIFF2
|
|
476
|
-
"ignore_empty_scans": kwargs.get("ignore_empty_scans", True),
|
|
477
|
-
"keep_k_peaks": kwargs.get("keep_k_peaks", 3000), # More peaks for WIFF2
|
|
478
|
-
}
|
|
479
|
-
|
|
480
|
-
if sample_id < 0 or sample_id >= self.sample_count:
|
|
481
|
-
raise ValueError(f"Sample ID {sample_id} out of range (0-{self.sample_count - 1})")
|
|
482
|
-
|
|
483
|
-
# Use the same loading approach as SciexWiffFileReader but with enhancements
|
|
484
|
-
sample = self._wiff_file.GetSample(sample_id)
|
|
485
|
-
ms_sample = sample.MassSpectrometerSample
|
|
486
|
-
|
|
487
|
-
# Process data (same as SciexWiffFileReader.load_sample but with enhanced params)
|
|
488
|
-
_peak_indices: list[int] = []
|
|
489
|
-
peak_mz_list: list[np.ndarray] = []
|
|
490
|
-
peak_intensity_list: list[np.ndarray] = []
|
|
491
|
-
rt_list: list[float] = []
|
|
492
|
-
ms_level_list: list[int] = []
|
|
493
|
-
precursor_mz_list: list[float] = []
|
|
494
|
-
precursor_charge_list: list[int] = []
|
|
495
|
-
nce_list: list[float] = []
|
|
496
|
-
isolation_lower_list: list[float] = []
|
|
497
|
-
isolation_upper_list: list[float] = []
|
|
498
|
-
|
|
499
|
-
exp_list = [ms_sample.GetMSExperiment(i) for i in range(ms_sample.ExperimentCount)]
|
|
500
|
-
|
|
501
|
-
for j in range(exp_list[0].Details.NumberOfScans):
|
|
502
|
-
for i in range(ms_sample.ExperimentCount):
|
|
503
|
-
exp = exp_list[i]
|
|
504
|
-
mass_spectrum = exp.GetMassSpectrum(j)
|
|
505
|
-
mass_spectrum_info = exp.GetMassSpectrumInfo(j)
|
|
506
|
-
details = exp.Details
|
|
507
|
-
ms_level = mass_spectrum_info.MSLevel
|
|
508
|
-
|
|
509
|
-
if (
|
|
510
|
-
ms_level > 1
|
|
511
|
-
and not details.IsSwath
|
|
512
|
-
and mass_spectrum.NumDataPoints <= 0
|
|
513
|
-
and enhanced_params["ignore_empty_scans"]
|
|
514
|
-
):
|
|
515
|
-
continue
|
|
516
|
-
|
|
517
|
-
mz_array = dot_net_array_to_np_array(mass_spectrum.GetActualXValues())
|
|
518
|
-
int_array = dot_net_array_to_np_array(mass_spectrum.GetActualYValues()).astype(np.float32)
|
|
519
|
-
|
|
520
|
-
if enhanced_params["centroid"]:
|
|
521
|
-
mz_array, int_array = naive_centroid(
|
|
522
|
-
mz_array,
|
|
523
|
-
int_array,
|
|
524
|
-
centroiding_ppm=enhanced_params["centroid_ppm"],
|
|
525
|
-
)
|
|
526
|
-
|
|
527
|
-
if len(mz_array) > enhanced_params["keep_k_peaks"]:
|
|
528
|
-
top_indices = np.argsort(int_array)[-enhanced_params["keep_k_peaks"] :]
|
|
529
|
-
top_indices = np.sort(top_indices)
|
|
530
|
-
mz_array = mz_array[top_indices]
|
|
531
|
-
int_array = int_array[top_indices]
|
|
532
|
-
|
|
533
|
-
peak_mz_list.append(mz_array)
|
|
534
|
-
peak_intensity_list.append(int_array)
|
|
535
|
-
_peak_indices.append(len(peak_mz_list[-1]))
|
|
536
|
-
|
|
537
|
-
rt_list.append(exp.GetRTFromExperimentCycle(j))
|
|
538
|
-
ms_level_list.append(ms_level)
|
|
539
|
-
|
|
540
|
-
# Enhanced precursor handling for WIFF2
|
|
541
|
-
center_mz = -1.0
|
|
542
|
-
isolation_window = 0.0
|
|
543
|
-
|
|
544
|
-
if ms_level > 1:
|
|
545
|
-
if details.IsSwath and details.MassRangeInfo.Length > 0:
|
|
546
|
-
try:
|
|
547
|
-
from WiffOps4Python import WiffOps as DotNetWiffOps
|
|
548
|
-
|
|
549
|
-
center_mz = DotNetWiffOps.get_center_mz(details)
|
|
550
|
-
isolation_window = DotNetWiffOps.get_isolation_window(details)
|
|
551
|
-
except:
|
|
552
|
-
center_mz = mass_spectrum_info.ParentMZ
|
|
553
|
-
isolation_window = 3.0
|
|
554
|
-
|
|
555
|
-
if isolation_window <= 0:
|
|
556
|
-
isolation_window = 3.0
|
|
557
|
-
if center_mz <= 0:
|
|
558
|
-
center_mz = mass_spectrum_info.ParentMZ
|
|
559
|
-
|
|
560
|
-
precursor_mz_list.append(center_mz)
|
|
561
|
-
precursor_charge_list.append(mass_spectrum_info.ParentChargeState)
|
|
562
|
-
nce_list.append(float(mass_spectrum_info.CollisionEnergy))
|
|
563
|
-
isolation_lower_list.append(center_mz - isolation_window / 2)
|
|
564
|
-
isolation_upper_list.append(center_mz + isolation_window / 2)
|
|
565
|
-
else:
|
|
566
|
-
precursor_mz_list.append(-1.0)
|
|
567
|
-
precursor_charge_list.append(0)
|
|
568
|
-
nce_list.append(0.0)
|
|
569
|
-
isolation_lower_list.append(-1.0)
|
|
570
|
-
isolation_upper_list.append(-1.0)
|
|
571
|
-
|
|
572
|
-
# Finalize arrays
|
|
573
|
-
peak_indices = np.empty(len(rt_list) + 1, np.int64)
|
|
574
|
-
peak_indices[0] = 0
|
|
575
|
-
peak_indices[1:] = np.cumsum(_peak_indices)
|
|
576
|
-
|
|
577
|
-
return {
|
|
578
|
-
"peak_indices": peak_indices,
|
|
579
|
-
"peak_mz": np.concatenate(peak_mz_list) if peak_mz_list else np.array([]),
|
|
580
|
-
"peak_intensity": np.concatenate(peak_intensity_list) if peak_intensity_list else np.array([]),
|
|
581
|
-
"rt": np.array(rt_list, dtype=np.float64),
|
|
582
|
-
"ms_level": np.array(ms_level_list, dtype=np.int8),
|
|
583
|
-
"precursor_mz": np.array(precursor_mz_list, dtype=np.float64),
|
|
584
|
-
"precursor_charge": np.array(precursor_charge_list, dtype=np.int8),
|
|
585
|
-
"isolation_lower_mz": np.array(isolation_lower_list, dtype=np.float64),
|
|
586
|
-
"isolation_upper_mz": np.array(isolation_upper_list, dtype=np.float64),
|
|
587
|
-
"nce": np.array(nce_list, dtype=np.float32),
|
|
588
|
-
"metadata": {
|
|
589
|
-
"format": "WIFF2",
|
|
590
|
-
"sample_id": sample_id,
|
|
591
|
-
"sample_name": str(self.sample_names[sample_id]),
|
|
592
|
-
"loading_params": enhanced_params,
|
|
593
|
-
"total_spectra": len(rt_list),
|
|
594
|
-
"total_peaks": sum(_peak_indices),
|
|
595
|
-
"ms1_count": np.sum(np.array(ms_level_list) == 1),
|
|
596
|
-
"ms2_count": np.sum(np.array(ms_level_list) > 1),
|
|
597
|
-
"rt_range": [float(np.min(rt_list)), float(np.max(rt_list))] if rt_list else [0, 0],
|
|
598
|
-
"creation_time": str(sample.Details.AcquisitionDateTime.ToString("O"))
|
|
599
|
-
if hasattr(sample, "Details")
|
|
600
|
-
else "",
|
|
601
|
-
"reader_method": "standard_enhanced",
|
|
602
|
-
},
|
|
603
|
-
}
|
|
604
|
-
|
|
605
|
-
def close(self):
|
|
606
|
-
"""Close the WIFF2 file and clean up resources."""
|
|
607
|
-
if hasattr(self, "_wiffDataProvider"):
|
|
608
|
-
try:
|
|
609
|
-
self._wiffDataProvider.Close()
|
|
610
|
-
except:
|
|
611
|
-
pass
|
|
612
|
-
|
|
613
|
-
if hasattr(self, "_alpharaw_reader"):
|
|
614
|
-
try:
|
|
615
|
-
self._alpharaw_reader.close()
|
|
616
|
-
except:
|
|
617
|
-
pass
|
|
618
|
-
|
|
619
|
-
def __enter__(self):
|
|
620
|
-
return self
|
|
621
|
-
|
|
622
|
-
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
623
|
-
self.close()
|
|
624
|
-
|
|
625
|
-
def __repr__(self):
|
|
626
|
-
return f"SciexWiff2FileReader(file='{self.filename}', samples={self.sample_count}, method={self.initialization_method})"
|
|
627
|
-
|
|
628
|
-
|
|
629
183
|
class SciexWiffFileReader:
|
|
630
184
|
"""
|
|
631
|
-
Direct implementation of Sciex WIFF file reader using the DLLs
|
|
185
|
+
Direct implementation of Sciex WIFF file reader using the Sciex DLLs.
|
|
632
186
|
"""
|
|
633
187
|
|
|
634
188
|
def __init__(self, filename: str):
|
|
@@ -636,7 +190,7 @@ class SciexWiffFileReader:
|
|
|
636
190
|
raise ValueError(
|
|
637
191
|
"Dotnet-based dependencies are required for reading Sciex files. "
|
|
638
192
|
"Do you have pythonnet and/or mono installed? "
|
|
639
|
-
"
|
|
193
|
+
"Please ensure pythonnet and Sciex DLLs are properly installed.",
|
|
640
194
|
)
|
|
641
195
|
|
|
642
196
|
self._wiffDataProvider = AnalystWiffDataProvider()
|
|
@@ -646,10 +200,18 @@ class SciexWiffFileReader:
|
|
|
646
200
|
)
|
|
647
201
|
self.sample_names = self._wiff_file.GetSampleNames()
|
|
648
202
|
|
|
649
|
-
def close(self):
|
|
203
|
+
def close(self) -> None:
|
|
650
204
|
"""Close the file and clean up resources."""
|
|
651
205
|
self._wiffDataProvider.Close()
|
|
652
206
|
|
|
207
|
+
def __enter__(self) -> 'SciexWiffFileReader':
|
|
208
|
+
"""Context manager entry."""
|
|
209
|
+
return self
|
|
210
|
+
|
|
211
|
+
def __exit__(self, exc_type, exc_val, exc_tb) -> None:
|
|
212
|
+
"""Context manager exit."""
|
|
213
|
+
self.close()
|
|
214
|
+
|
|
653
215
|
def load_sample(
|
|
654
216
|
self,
|
|
655
217
|
sample_id: int,
|
|
@@ -690,6 +252,7 @@ class SciexWiffFileReader:
|
|
|
690
252
|
peak_intensity_array_list: list[np.ndarray] = []
|
|
691
253
|
rt_list: list[float] = []
|
|
692
254
|
ms_level_list: list[int] = []
|
|
255
|
+
polarity_list: list[str] = []
|
|
693
256
|
precursor_mz_list: list[float] = []
|
|
694
257
|
precursor_charge_list: list[int] = []
|
|
695
258
|
ce_list: list[float] = []
|
|
@@ -708,6 +271,13 @@ class SciexWiffFileReader:
|
|
|
708
271
|
|
|
709
272
|
if ms_level > 1 and not details.IsSwath and mass_spectrum.NumDataPoints <= 0 and ignore_empty_scans:
|
|
710
273
|
continue
|
|
274
|
+
if exp.Details.Polarity == exp.Details.Polarity.Positive:
|
|
275
|
+
pol = 'positive'
|
|
276
|
+
elif exp.Details.Polarity == exp.Details.Polarity.Negative:
|
|
277
|
+
pol = 'negative'
|
|
278
|
+
else:
|
|
279
|
+
pol = ''
|
|
280
|
+
polarity_list.append(pol)
|
|
711
281
|
|
|
712
282
|
mz_array = dot_net_array_to_np_array(mass_spectrum.GetActualXValues())
|
|
713
283
|
int_array = dot_net_array_to_np_array(
|
|
@@ -768,6 +338,7 @@ class SciexWiffFileReader:
|
|
|
768
338
|
"peak_intensity": np.concatenate(peak_intensity_array_list),
|
|
769
339
|
"rt": np.array(rt_list, dtype=np.float64),
|
|
770
340
|
"ms_level": np.array(ms_level_list, dtype=np.int8),
|
|
341
|
+
"polarity": np.array(polarity_list, dtype="U8"),
|
|
771
342
|
"precursor_mz": np.array(precursor_mz_list, dtype=np.float64),
|
|
772
343
|
"precursor_charge": np.array(precursor_charge_list, dtype=np.int8),
|
|
773
344
|
"isolation_lower_mz": np.array(isolation_lower_mz_list),
|
|
@@ -778,14 +349,15 @@ class SciexWiffFileReader:
|
|
|
778
349
|
|
|
779
350
|
class SciexWiffData:
|
|
780
351
|
"""
|
|
781
|
-
Standalone Sciex WIFF data reader class that
|
|
782
|
-
functionality
|
|
352
|
+
Standalone Sciex WIFF data reader class that provides WIFF data reading
|
|
353
|
+
functionality using Sciex DLLs directly.
|
|
783
354
|
"""
|
|
784
355
|
|
|
785
356
|
# Column data types mapping
|
|
786
357
|
column_dtypes: ClassVar[dict[str, Any]] = {
|
|
787
358
|
"rt": np.float64,
|
|
788
359
|
"ms_level": np.int8,
|
|
360
|
+
"polarity": "U8",
|
|
789
361
|
"precursor_mz": np.float64,
|
|
790
362
|
"isolation_lower_mz": np.float64,
|
|
791
363
|
"isolation_upper_mz": np.float64,
|
|
@@ -795,20 +367,17 @@ class SciexWiffData:
|
|
|
795
367
|
"activation": "U",
|
|
796
368
|
}
|
|
797
369
|
|
|
798
|
-
def __init__(self, centroided: bool = True
|
|
370
|
+
def __init__(self, centroided: bool = True) -> None:
|
|
799
371
|
"""
|
|
800
372
|
Parameters
|
|
801
373
|
----------
|
|
802
374
|
centroided : bool, optional
|
|
803
375
|
If peaks will be centroided after loading, by default True.
|
|
804
|
-
save_as_hdf : bool, optional
|
|
805
|
-
Automatically save hdf after load raw data, by default False.
|
|
806
376
|
"""
|
|
807
377
|
self.spectrum_df: pd.DataFrame = pd.DataFrame()
|
|
808
378
|
self.peak_df: pd.DataFrame = pd.DataFrame()
|
|
809
379
|
self._raw_file_path = ""
|
|
810
380
|
self.centroided = centroided
|
|
811
|
-
self._save_as_hdf = save_as_hdf
|
|
812
381
|
self.creation_time = ""
|
|
813
382
|
self.file_type = "sciex"
|
|
814
383
|
self.instrument = "sciex"
|
|
@@ -953,42 +522,12 @@ class SciexWiffData:
|
|
|
953
522
|
tuple
|
|
954
523
|
(mz_array, intensity_array)
|
|
955
524
|
"""
|
|
956
|
-
start, end = self.spectrum_df[["peak_start_idx", "peak_stop_idx"]].values
|
|
957
|
-
spec_idx,
|
|
958
|
-
:,
|
|
959
|
-
]
|
|
525
|
+
start, end = self.spectrum_df.iloc[spec_idx][["peak_start_idx", "peak_stop_idx"]].values
|
|
960
526
|
return (
|
|
961
527
|
self.peak_df.mz.values[start:end],
|
|
962
528
|
self.peak_df.intensity.values[start:end],
|
|
963
529
|
)
|
|
964
530
|
|
|
965
|
-
def save_hdf(self, hdf_file_path: str) -> None:
|
|
966
|
-
"""
|
|
967
|
-
Save data to HDF5 file (placeholder implementation).
|
|
968
|
-
|
|
969
|
-
Parameters
|
|
970
|
-
----------
|
|
971
|
-
hdf_file_path : str
|
|
972
|
-
Path to save the HDF5 file
|
|
973
|
-
"""
|
|
974
|
-
# This would require implementing HDF5 saving functionality
|
|
975
|
-
# For now, just save as pickle or implement as needed
|
|
976
|
-
import pickle
|
|
977
|
-
|
|
978
|
-
with open(hdf_file_path.replace(".hdf", ".pkl"), "wb") as f:
|
|
979
|
-
pickle.dump(
|
|
980
|
-
{
|
|
981
|
-
"spectrum_df": self.spectrum_df,
|
|
982
|
-
"peak_df": self.peak_df,
|
|
983
|
-
"creation_time": self.creation_time,
|
|
984
|
-
"raw_file_path": self.raw_file_path,
|
|
985
|
-
"file_type": self.file_type,
|
|
986
|
-
"centroided": self.centroided,
|
|
987
|
-
"instrument": self.instrument,
|
|
988
|
-
},
|
|
989
|
-
f,
|
|
990
|
-
)
|
|
991
|
-
|
|
992
531
|
def __repr__(self) -> str:
|
|
993
532
|
return f"SciexWiffData(file_path='{self.raw_file_path}', spectra={len(self.spectrum_df)})"
|
|
994
533
|
|
|
@@ -1014,92 +553,7 @@ def load_wiff_file(filename: str, **kwargs) -> SciexWiffData:
|
|
|
1014
553
|
wiff_data.import_raw(filename)
|
|
1015
554
|
return wiff_data
|
|
1016
555
|
|
|
1017
|
-
|
|
1018
|
-
def load_wiff2_file(filename: str, **kwargs) -> dict[str, Any]:
|
|
1019
|
-
"""
|
|
1020
|
-
Load a WIFF2 file and return spectral data.
|
|
1021
|
-
|
|
1022
|
-
Note: WIFF2 format support is limited with current DLL versions.
|
|
1023
|
-
If you encounter format incompatibility errors, try using the regular
|
|
1024
|
-
WIFF file instead or convert WIFF2 to WIFF format.
|
|
1025
|
-
|
|
1026
|
-
Parameters
|
|
1027
|
-
----------
|
|
1028
|
-
filename : str
|
|
1029
|
-
Path to the WIFF2 file
|
|
1030
|
-
**kwargs
|
|
1031
|
-
Additional arguments for WIFF2 loading (sample_id, centroid, etc.)
|
|
1032
|
-
|
|
1033
|
-
Returns
|
|
1034
|
-
-------
|
|
1035
|
-
dict
|
|
1036
|
-
Spectral data dictionary with enhanced WIFF2 information
|
|
1037
|
-
|
|
1038
|
-
Raises
|
|
1039
|
-
------
|
|
1040
|
-
RuntimeError
|
|
1041
|
-
If WIFF2 format is not supported by current DLL combination
|
|
1042
|
-
"""
|
|
1043
|
-
sample_id = kwargs.pop("sample_id", 0)
|
|
1044
|
-
|
|
1045
|
-
try:
|
|
1046
|
-
with SciexWiff2FileReader(filename) as reader:
|
|
1047
|
-
return reader.load_sample(sample_id, **kwargs) # type: ignore[no-any-return]
|
|
1048
|
-
except RuntimeError as e:
|
|
1049
|
-
if "format is not supported" in str(e):
|
|
1050
|
-
# Suggest using regular WIFF file if available
|
|
1051
|
-
wiff_file = filename.replace(".wiff2", ".wiff")
|
|
1052
|
-
if os.path.exists(wiff_file):
|
|
1053
|
-
raise RuntimeError(
|
|
1054
|
-
f"WIFF2 format not supported. However, a regular WIFF file was found: "
|
|
1055
|
-
f"'{wiff_file}'. Try using load_wiff_file('{wiff_file}') instead.",
|
|
1056
|
-
) from e
|
|
1057
|
-
else:
|
|
1058
|
-
raise RuntimeError(
|
|
1059
|
-
f"WIFF2 format not supported and no corresponding WIFF file found. Original error: {e}",
|
|
1060
|
-
) from e
|
|
1061
|
-
else:
|
|
1062
|
-
raise
|
|
1063
|
-
|
|
1064
|
-
|
|
1065
|
-
def load_wiff_file_smart(filename: str, **kwargs) -> dict[str, Any] | SciexWiffData:
|
|
1066
|
-
"""
|
|
1067
|
-
Smart WIFF file loader that automatically handles WIFF and WIFF2 formats.
|
|
1068
|
-
|
|
1069
|
-
This function will first try to load the file as specified, and if it's a WIFF2
|
|
1070
|
-
file that fails due to format incompatibility, it will suggest alternatives.
|
|
1071
|
-
|
|
1072
|
-
Parameters
|
|
1073
|
-
----------
|
|
1074
|
-
filename : str
|
|
1075
|
-
Path to the WIFF or WIFF2 file
|
|
1076
|
-
**kwargs
|
|
1077
|
-
Additional arguments for loading (sample_id, centroid, etc.)
|
|
1078
|
-
|
|
1079
|
-
Returns
|
|
1080
|
-
-------
|
|
1081
|
-
dict
|
|
1082
|
-
Spectral data dictionary
|
|
1083
|
-
"""
|
|
1084
|
-
if filename.lower().endswith(".wiff2"):
|
|
1085
|
-
try:
|
|
1086
|
-
return load_wiff2_file(filename, **kwargs)
|
|
1087
|
-
except RuntimeError as e:
|
|
1088
|
-
if "format is not supported" in str(e):
|
|
1089
|
-
# Check if regular WIFF file exists
|
|
1090
|
-
wiff_file = filename.replace(".wiff2", ".wiff")
|
|
1091
|
-
if os.path.exists(wiff_file):
|
|
1092
|
-
warnings.warn(
|
|
1093
|
-
f"WIFF2 format not supported, falling back to WIFF file: {wiff_file}",
|
|
1094
|
-
stacklevel=2,
|
|
1095
|
-
)
|
|
1096
|
-
return load_wiff_file(wiff_file, **kwargs)
|
|
1097
|
-
raise
|
|
1098
|
-
else:
|
|
1099
|
-
return load_wiff_file(filename, **kwargs)
|
|
1100
|
-
|
|
1101
|
-
|
|
1102
|
-
def get_sample_names(filename: str) -> list:
|
|
556
|
+
def get_sample_names(filename: str) -> list[str]:
|
|
1103
557
|
"""
|
|
1104
558
|
Get the sample names from a WIFF file.
|
|
1105
559
|
|
|
@@ -1110,51 +564,11 @@ def get_sample_names(filename: str) -> list:
|
|
|
1110
564
|
|
|
1111
565
|
Returns
|
|
1112
566
|
-------
|
|
1113
|
-
list
|
|
567
|
+
list[str]
|
|
1114
568
|
List of sample names
|
|
1115
569
|
"""
|
|
1116
|
-
|
|
1117
|
-
try:
|
|
570
|
+
with SciexWiffFileReader(filename) as reader:
|
|
1118
571
|
return list(reader.sample_names)
|
|
1119
|
-
finally:
|
|
1120
|
-
reader.close()
|
|
1121
|
-
|
|
1122
|
-
|
|
1123
|
-
def get_wiff2_sample_names(filename: str) -> list:
|
|
1124
|
-
"""
|
|
1125
|
-
Get the sample names from a WIFF2 file.
|
|
1126
|
-
|
|
1127
|
-
Parameters
|
|
1128
|
-
----------
|
|
1129
|
-
filename : str
|
|
1130
|
-
Path to the WIFF2 file
|
|
1131
|
-
|
|
1132
|
-
Returns
|
|
1133
|
-
-------
|
|
1134
|
-
list
|
|
1135
|
-
List of sample names
|
|
1136
|
-
"""
|
|
1137
|
-
with SciexWiff2FileReader(filename) as reader:
|
|
1138
|
-
return list(reader.sample_names)
|
|
1139
|
-
|
|
1140
|
-
|
|
1141
|
-
def get_wiff2_metadata(filename: str) -> dict[str, Any]:
|
|
1142
|
-
"""
|
|
1143
|
-
Get comprehensive metadata from a WIFF2 file.
|
|
1144
|
-
|
|
1145
|
-
Parameters
|
|
1146
|
-
----------
|
|
1147
|
-
filename : str
|
|
1148
|
-
Path to the WIFF2 file
|
|
1149
|
-
|
|
1150
|
-
Returns
|
|
1151
|
-
-------
|
|
1152
|
-
dict
|
|
1153
|
-
Comprehensive WIFF2 file metadata
|
|
1154
|
-
"""
|
|
1155
|
-
with SciexWiff2FileReader(filename) as reader:
|
|
1156
|
-
return reader.get_file_metadata() # type: ignore[no-any-return]
|
|
1157
|
-
|
|
1158
572
|
|
|
1159
573
|
# Example usage and testing
|
|
1160
574
|
if __name__ == "__main__":
|