masster 0.3.10__py3-none-any.whl → 0.3.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of masster might be problematic. Click here for more details.
- masster/docs/SCX_API_Documentation.md +0 -0
- masster/docs/SCX_DLL_Analysis.md +0 -0
- masster/logger.py +92 -78
- masster/sample/defaults/find_features_def.py +16 -6
- masster/sample/defaults/sample_def.py +1 -1
- masster/sample/h5.py +2 -2
- masster/sample/helpers.py +137 -136
- masster/sample/load.py +13 -9
- masster/sample/plot.py +156 -131
- masster/sample/processing.py +18 -12
- masster/sample/sample.py +4 -4
- masster/sample/sample5_schema.json +62 -62
- masster/sample/save.py +16 -13
- masster/sample/sciex.py +187 -176
- masster/study/defaults/align_def.py +224 -6
- masster/study/defaults/fill_chrom_def.py +1 -5
- masster/study/defaults/integrate_chrom_def.py +1 -5
- masster/study/defaults/study_def.py +2 -2
- masster/study/export.py +144 -131
- masster/study/h5.py +193 -133
- masster/study/helpers.py +293 -245
- masster/study/helpers_optimized.py +99 -57
- masster/study/load.py +51 -25
- masster/study/plot.py +453 -17
- masster/study/processing.py +159 -76
- masster/study/save.py +7 -7
- masster/study/study.py +97 -88
- masster/study/study5_schema.json +82 -82
- {masster-0.3.10.dist-info → masster-0.3.11.dist-info}/METADATA +1 -1
- {masster-0.3.10.dist-info → masster-0.3.11.dist-info}/RECORD +33 -31
- {masster-0.3.10.dist-info → masster-0.3.11.dist-info}/WHEEL +0 -0
- {masster-0.3.10.dist-info → masster-0.3.11.dist-info}/entry_points.txt +0 -0
- {masster-0.3.10.dist-info → masster-0.3.11.dist-info}/licenses/LICENSE +0 -0
masster/sample/sciex.py
CHANGED
|
@@ -146,7 +146,7 @@ def dot_net_array_to_np_array(src):
|
|
|
146
146
|
src_ptr = src_hndl.AddrOfPinnedObject().ToInt64()
|
|
147
147
|
buf_type = ctypes.c_double * len(src)
|
|
148
148
|
cbuf = buf_type.from_address(src_ptr)
|
|
149
|
-
dest = np.frombuffer(cbuf, dtype=
|
|
149
|
+
dest = np.frombuffer(cbuf, dtype="float64").copy() # type: ignore[call-overload]
|
|
150
150
|
finally:
|
|
151
151
|
if src_hndl.IsAllocated:
|
|
152
152
|
src_hndl.Free()
|
|
@@ -156,19 +156,19 @@ def dot_net_array_to_np_array(src):
|
|
|
156
156
|
class SciexWiff2FileReader:
|
|
157
157
|
"""
|
|
158
158
|
Specialized reader for Sciex WIFF2 files using optimal DLL combination.
|
|
159
|
-
|
|
159
|
+
|
|
160
160
|
WIFF2 is a newer format from Sciex that may have enhanced capabilities
|
|
161
161
|
compared to the original WIFF format. This reader is optimized specifically
|
|
162
162
|
for WIFF2 files and uses the most appropriate DLLs for maximum information extraction.
|
|
163
|
-
|
|
163
|
+
|
|
164
164
|
Based on comprehensive DLL analysis, WIFF2 files require specific handling and
|
|
165
165
|
may use different underlying storage mechanisms than regular WIFF files.
|
|
166
166
|
"""
|
|
167
|
-
|
|
167
|
+
|
|
168
168
|
def __init__(self, filename: str):
|
|
169
169
|
"""
|
|
170
170
|
Initialize WIFF2 reader with file path.
|
|
171
|
-
|
|
171
|
+
|
|
172
172
|
Parameters
|
|
173
173
|
----------
|
|
174
174
|
filename : str
|
|
@@ -177,44 +177,45 @@ class SciexWiff2FileReader:
|
|
|
177
177
|
if not HAS_DOTNET:
|
|
178
178
|
raise ValueError(
|
|
179
179
|
"Dotnet-based dependencies are required for reading Sciex WIFF2 files. "
|
|
180
|
-
"Install pythonnet and ensure alpharaw DLLs are available."
|
|
180
|
+
"Install pythonnet and ensure alpharaw DLLs are available.",
|
|
181
181
|
)
|
|
182
|
-
|
|
182
|
+
|
|
183
183
|
self.filename = filename
|
|
184
184
|
self.ext_dir = self._find_dll_directory()
|
|
185
185
|
self._ensure_wiff2_dlls_loaded()
|
|
186
|
-
|
|
186
|
+
|
|
187
187
|
# Try different initialization strategies for WIFF2
|
|
188
188
|
self._initialize_wiff2_reader()
|
|
189
|
-
|
|
189
|
+
|
|
190
190
|
def _find_dll_directory(self):
|
|
191
191
|
"""Find the alpharaw DLL directory using the same discovery pattern."""
|
|
192
192
|
for site_dir in site.getsitepackages():
|
|
193
193
|
potential_ext_dir = os.path.join(site_dir, "alpharaw", "ext", "sciex")
|
|
194
194
|
if os.path.exists(potential_ext_dir):
|
|
195
195
|
return potential_ext_dir
|
|
196
|
-
|
|
196
|
+
|
|
197
197
|
# Fallback to alpharaw module location
|
|
198
198
|
try:
|
|
199
199
|
import alpharaw
|
|
200
|
+
|
|
200
201
|
alpharaw_dir = os.path.dirname(alpharaw.__file__)
|
|
201
202
|
return os.path.join(alpharaw_dir, "ext", "sciex")
|
|
202
203
|
except ImportError:
|
|
203
204
|
raise ImportError("Could not find alpharaw DLL directory")
|
|
204
|
-
|
|
205
|
+
|
|
205
206
|
def _ensure_wiff2_dlls_loaded(self):
|
|
206
207
|
"""Ensure all necessary WIFF2 DLLs are loaded."""
|
|
207
208
|
# Key DLLs identified through comprehensive analysis
|
|
208
209
|
required_dlls = [
|
|
209
210
|
"Clearcore2.Data.Wiff2.dll", # Primary WIFF2 support
|
|
210
|
-
"Clearcore2.Data.AnalystDataProvider.dll",
|
|
211
|
+
"Clearcore2.Data.AnalystDataProvider.dll",
|
|
211
212
|
"Clearcore2.Data.dll",
|
|
212
213
|
"Clearcore2.Data.Common.dll",
|
|
213
214
|
"Clearcore2.Data.Core.dll",
|
|
214
215
|
"Clearcore2.StructuredStorage.dll", # For WIFF2 storage format
|
|
215
|
-
"WiffOps4Python.dll"
|
|
216
|
+
"WiffOps4Python.dll",
|
|
216
217
|
]
|
|
217
|
-
|
|
218
|
+
|
|
218
219
|
for dll in required_dlls:
|
|
219
220
|
dll_path = os.path.join(self.ext_dir, dll)
|
|
220
221
|
if os.path.exists(dll_path):
|
|
@@ -224,77 +225,77 @@ class SciexWiff2FileReader:
|
|
|
224
225
|
pass # May already be loaded
|
|
225
226
|
else:
|
|
226
227
|
warnings.warn(f"WIFF2 DLL not found: {dll}", stacklevel=2)
|
|
227
|
-
|
|
228
|
+
|
|
228
229
|
def _initialize_wiff2_reader(self):
|
|
229
230
|
"""
|
|
230
231
|
Initialize WIFF2 reader with fallback strategies.
|
|
231
|
-
|
|
232
|
+
|
|
232
233
|
WIFF2 files may require different initialization approaches than WIFF files.
|
|
233
234
|
We try multiple strategies based on the comprehensive DLL analysis.
|
|
234
235
|
"""
|
|
235
236
|
initialization_errors = []
|
|
236
|
-
|
|
237
|
+
|
|
237
238
|
# Strategy 1: Try standard AnalystDataProvider (may work for some WIFF2)
|
|
238
239
|
try:
|
|
239
240
|
from Clearcore2.Data.AnalystDataProvider import AnalystDataProviderFactory
|
|
240
241
|
from Clearcore2.Data.AnalystDataProvider import AnalystWiffDataProvider
|
|
241
|
-
|
|
242
|
+
|
|
242
243
|
self._wiffDataProvider = AnalystWiffDataProvider()
|
|
243
244
|
self._wiff_file = AnalystDataProviderFactory.CreateBatch(
|
|
244
245
|
self.filename,
|
|
245
246
|
self._wiffDataProvider,
|
|
246
247
|
)
|
|
247
|
-
|
|
248
|
+
|
|
248
249
|
self.sample_names = self._wiff_file.GetSampleNames()
|
|
249
250
|
self.sample_count = len(self.sample_names)
|
|
250
251
|
self.initialization_method = "AnalystDataProvider"
|
|
251
252
|
return
|
|
252
|
-
|
|
253
|
+
|
|
253
254
|
except Exception as e:
|
|
254
255
|
initialization_errors.append(f"AnalystDataProvider: {e}")
|
|
255
|
-
|
|
256
|
+
|
|
256
257
|
# Strategy 2: Try alpharaw's SciexWiffData (correct API)
|
|
257
258
|
try:
|
|
258
259
|
from alpharaw.sciex import SciexWiffData
|
|
259
|
-
|
|
260
|
+
|
|
260
261
|
self._alpharaw_reader = SciexWiffData()
|
|
261
262
|
self._alpharaw_reader.import_raw(self.filename)
|
|
262
|
-
|
|
263
|
+
|
|
263
264
|
# Extract basic information (SciexWiffData doesn't have sample_names property)
|
|
264
|
-
self.sample_names = [
|
|
265
|
+
self.sample_names = ["Sample_0"] # Default since WIFF2 format needs investigation
|
|
265
266
|
self.sample_count = 1
|
|
266
267
|
self.initialization_method = "alpharaw_SciexWiffData"
|
|
267
|
-
|
|
268
|
+
|
|
268
269
|
# Store the reader for later use
|
|
269
270
|
self._wiff_data = self._alpharaw_reader
|
|
270
271
|
return
|
|
271
|
-
|
|
272
|
+
|
|
272
273
|
except Exception as e:
|
|
273
274
|
initialization_errors.append(f"alpharaw_SciexWiffData: {e}")
|
|
274
|
-
|
|
275
|
+
|
|
275
276
|
# Strategy 3: Try direct WIFF2 DLL approach
|
|
276
277
|
try:
|
|
277
278
|
# Check if file is recognized as WIFF2
|
|
278
279
|
from Clearcore2.Data.AnalystDataProvider import DataProviderHelper
|
|
279
|
-
|
|
280
|
+
|
|
280
281
|
is_wiff2 = DataProviderHelper.IsMdWiffFile(self.filename)
|
|
281
282
|
if is_wiff2:
|
|
282
283
|
# Try specialized WIFF2 handling
|
|
283
284
|
warnings.warn(
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
stacklevel=2
|
|
285
|
+
"File detected as WIFF2 format but specialized reader not fully implemented. "
|
|
286
|
+
"Consider using alpharaw.ms_data_from_file() directly.",
|
|
287
|
+
stacklevel=2,
|
|
287
288
|
)
|
|
288
289
|
# For now, fall back to treating as regular WIFF with enhanced parameters
|
|
289
290
|
self._initialize_as_enhanced_wiff()
|
|
290
291
|
return
|
|
291
|
-
|
|
292
|
+
|
|
292
293
|
except Exception as e:
|
|
293
294
|
initialization_errors.append(f"WIFF2 detection: {e}")
|
|
294
|
-
|
|
295
|
+
|
|
295
296
|
# If all strategies fail, provide comprehensive error information with helpful suggestions
|
|
296
297
|
error_summary = "; ".join(initialization_errors)
|
|
297
|
-
|
|
298
|
+
|
|
298
299
|
# Check if this is a WIFF2 format issue specifically
|
|
299
300
|
if "could not be opened (result = -2147286960)" in error_summary:
|
|
300
301
|
raise RuntimeError(
|
|
@@ -303,61 +304,60 @@ class SciexWiff2FileReader:
|
|
|
303
304
|
f"The file '{self.filename}' appears to be a valid WIFF2 file but requires "
|
|
304
305
|
f"newer or different DLLs than currently available. "
|
|
305
306
|
f"Try converting the WIFF2 file to WIFF format or use alternative tools. "
|
|
306
|
-
f"Full error details: {error_summary}"
|
|
307
|
+
f"Full error details: {error_summary}",
|
|
307
308
|
)
|
|
308
309
|
else:
|
|
309
310
|
raise RuntimeError(
|
|
310
311
|
f"Failed to initialize WIFF2 reader with any strategy. "
|
|
311
312
|
f"Errors: {error_summary}. "
|
|
312
|
-
f"The file may be corrupted, locked, or require different dependencies."
|
|
313
|
+
f"The file may be corrupted, locked, or require different dependencies.",
|
|
313
314
|
)
|
|
314
|
-
|
|
315
|
+
|
|
315
316
|
def _initialize_as_enhanced_wiff(self):
|
|
316
317
|
"""Fallback: Initialize as enhanced WIFF with WIFF2-optimized parameters."""
|
|
317
318
|
# Use the same initialization as regular WIFF but with warnings
|
|
318
319
|
try:
|
|
319
320
|
from Clearcore2.Data.AnalystDataProvider import AnalystDataProviderFactory
|
|
320
321
|
from Clearcore2.Data.AnalystDataProvider import AnalystWiffDataProvider
|
|
321
|
-
|
|
322
|
+
|
|
322
323
|
self._wiffDataProvider = AnalystWiffDataProvider()
|
|
323
324
|
self._wiff_file = AnalystDataProviderFactory.CreateBatch(
|
|
324
325
|
self.filename,
|
|
325
326
|
self._wiffDataProvider,
|
|
326
327
|
)
|
|
327
|
-
|
|
328
|
+
|
|
328
329
|
self.sample_names = self._wiff_file.GetSampleNames()
|
|
329
330
|
self.sample_count = len(self.sample_names)
|
|
330
331
|
self.initialization_method = "enhanced_wiff_fallback"
|
|
331
|
-
|
|
332
|
+
|
|
332
333
|
warnings.warn(
|
|
333
|
-
"WIFF2 file opened using WIFF reader fallback. "
|
|
334
|
-
|
|
335
|
-
stacklevel=2
|
|
334
|
+
"WIFF2 file opened using WIFF reader fallback. Some WIFF2-specific features may not be available.",
|
|
335
|
+
stacklevel=2,
|
|
336
336
|
)
|
|
337
|
-
|
|
337
|
+
|
|
338
338
|
except Exception as e:
|
|
339
339
|
raise RuntimeError(f"Enhanced WIFF fallback also failed: {e}")
|
|
340
340
|
|
|
341
341
|
def get_file_metadata(self) -> dict[str, Any]:
|
|
342
342
|
"""Get comprehensive file metadata for WIFF2 format."""
|
|
343
343
|
metadata: dict[str, Any] = {
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
344
|
+
"format": "WIFF2",
|
|
345
|
+
"sample_count": self.sample_count,
|
|
346
|
+
"sample_names": list(self.sample_names),
|
|
347
|
+
"file_size": os.path.getsize(self.filename),
|
|
348
|
+
"file_path": self.filename,
|
|
349
|
+
"initialization_method": self.initialization_method,
|
|
350
|
+
"samples": [], # Initialize samples list
|
|
351
351
|
}
|
|
352
|
-
|
|
352
|
+
|
|
353
353
|
if self.initialization_method == "alpharaw":
|
|
354
354
|
# Get metadata from alpharaw reader
|
|
355
355
|
try:
|
|
356
|
-
if hasattr(self._alpharaw_reader,
|
|
357
|
-
metadata[
|
|
358
|
-
|
|
356
|
+
if hasattr(self._alpharaw_reader, "get_spectrum_count"):
|
|
357
|
+
metadata["total_spectra"] = self._alpharaw_reader.get_spectrum_count()
|
|
358
|
+
|
|
359
359
|
# Add alpharaw-specific metadata
|
|
360
|
-
for attr in [
|
|
360
|
+
for attr in ["creation_time", "instrument_model", "ms_levels"]:
|
|
361
361
|
if hasattr(self._alpharaw_reader, attr):
|
|
362
362
|
try:
|
|
363
363
|
value = getattr(self._alpharaw_reader, attr)
|
|
@@ -367,47 +367,47 @@ class SciexWiff2FileReader:
|
|
|
367
367
|
metadata[attr] = value
|
|
368
368
|
except:
|
|
369
369
|
pass
|
|
370
|
-
|
|
370
|
+
|
|
371
371
|
except Exception as e:
|
|
372
|
-
metadata[
|
|
373
|
-
|
|
374
|
-
elif hasattr(self,
|
|
372
|
+
metadata["metadata_error"] = str(e)
|
|
373
|
+
|
|
374
|
+
elif hasattr(self, "_wiff_file"):
|
|
375
375
|
# Get metadata from standard WIFF reader
|
|
376
376
|
try:
|
|
377
377
|
for i in range(self.sample_count):
|
|
378
378
|
sample = self._wiff_file.GetSample(i)
|
|
379
379
|
sample_info = {
|
|
380
|
-
|
|
381
|
-
|
|
380
|
+
"index": i,
|
|
381
|
+
"name": str(self.sample_names[i]),
|
|
382
382
|
}
|
|
383
|
-
|
|
384
|
-
if hasattr(sample,
|
|
383
|
+
|
|
384
|
+
if hasattr(sample, "Details"):
|
|
385
385
|
details = sample.Details
|
|
386
|
-
if hasattr(details,
|
|
387
|
-
sample_info[
|
|
388
|
-
|
|
389
|
-
if hasattr(sample,
|
|
386
|
+
if hasattr(details, "AcquisitionDateTime"):
|
|
387
|
+
sample_info["acquisition_time"] = str(details.AcquisitionDateTime.ToString("O"))
|
|
388
|
+
|
|
389
|
+
if hasattr(sample, "MassSpectrometerSample"):
|
|
390
390
|
ms_sample = sample.MassSpectrometerSample
|
|
391
|
-
sample_info[
|
|
392
|
-
|
|
393
|
-
metadata[
|
|
394
|
-
|
|
391
|
+
sample_info["experiment_count"] = ms_sample.ExperimentCount
|
|
392
|
+
|
|
393
|
+
metadata["samples"].append(sample_info)
|
|
394
|
+
|
|
395
395
|
except Exception as e:
|
|
396
|
-
metadata[
|
|
397
|
-
|
|
396
|
+
metadata["metadata_error"] = str(e)
|
|
397
|
+
|
|
398
398
|
return metadata
|
|
399
399
|
|
|
400
400
|
def load_sample(self, sample_id: int = 0, **kwargs):
|
|
401
401
|
"""
|
|
402
402
|
Load sample data with WIFF2-optimized settings.
|
|
403
|
-
|
|
403
|
+
|
|
404
404
|
Parameters
|
|
405
405
|
----------
|
|
406
406
|
sample_id : int
|
|
407
407
|
Sample index to load
|
|
408
408
|
**kwargs
|
|
409
409
|
Additional parameters for data loading
|
|
410
|
-
|
|
410
|
+
|
|
411
411
|
Returns
|
|
412
412
|
-------
|
|
413
413
|
dict
|
|
@@ -417,69 +417,73 @@ class SciexWiff2FileReader:
|
|
|
417
417
|
return self._load_sample_alpharaw(sample_id, **kwargs)
|
|
418
418
|
else:
|
|
419
419
|
return self._load_sample_standard(sample_id, **kwargs)
|
|
420
|
-
|
|
420
|
+
|
|
421
421
|
def _load_sample_alpharaw(self, sample_id: int, **kwargs):
|
|
422
422
|
"""Load sample using alpharaw reader."""
|
|
423
423
|
# Enhanced parameters for WIFF2
|
|
424
424
|
enhanced_params = {
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
425
|
+
"centroid": kwargs.get("centroid", True),
|
|
426
|
+
"centroid_ppm": kwargs.get("centroid_ppm", 15.0),
|
|
427
|
+
"keep_k_peaks": kwargs.get("keep_k_peaks", 3000),
|
|
428
428
|
}
|
|
429
|
-
|
|
429
|
+
|
|
430
430
|
try:
|
|
431
431
|
# Use alpharaw's data extraction
|
|
432
432
|
spectrum_df = self._alpharaw_reader.spectrum_df
|
|
433
433
|
peak_df = self._alpharaw_reader.peak_df
|
|
434
|
-
|
|
434
|
+
|
|
435
435
|
# Convert to the expected format
|
|
436
436
|
spectral_data = {
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
437
|
+
"peak_indices": spectrum_df[["peak_start_idx", "peak_stop_idx"]].values.flatten(),
|
|
438
|
+
"peak_mz": peak_df["mz"].values,
|
|
439
|
+
"peak_intensity": peak_df["intensity"].values,
|
|
440
|
+
"rt": spectrum_df["rt"].values,
|
|
441
|
+
"ms_level": spectrum_df["ms_level"].values,
|
|
442
|
+
"precursor_mz": spectrum_df.get("precursor_mz", np.full(len(spectrum_df), -1.0)).values,
|
|
443
|
+
"precursor_charge": spectrum_df.get("precursor_charge", np.full(len(spectrum_df), 0)).values,
|
|
444
|
+
"isolation_lower_mz": spectrum_df.get("isolation_lower_mz", np.full(len(spectrum_df), -1.0)).values,
|
|
445
|
+
"isolation_upper_mz": spectrum_df.get("isolation_upper_mz", np.full(len(spectrum_df), -1.0)).values,
|
|
446
|
+
"nce": spectrum_df.get("nce", np.full(len(spectrum_df), 0.0)).values,
|
|
447
|
+
"metadata": {
|
|
448
|
+
"format": "WIFF2",
|
|
449
|
+
"sample_id": sample_id,
|
|
450
|
+
"sample_name": str(self.sample_names[sample_id])
|
|
451
|
+
if sample_id < len(self.sample_names)
|
|
452
|
+
else f"Sample_{sample_id}",
|
|
453
|
+
"loading_params": enhanced_params,
|
|
454
|
+
"total_spectra": len(spectrum_df),
|
|
455
|
+
"total_peaks": len(peak_df),
|
|
456
|
+
"ms1_count": np.sum(spectrum_df["ms_level"] == 1),
|
|
457
|
+
"ms2_count": np.sum(spectrum_df["ms_level"] > 1),
|
|
458
|
+
"rt_range": [float(spectrum_df["rt"].min()), float(spectrum_df["rt"].max())]
|
|
459
|
+
if len(spectrum_df) > 0
|
|
460
|
+
else [0, 0],
|
|
461
|
+
"reader_method": "alpharaw",
|
|
462
|
+
},
|
|
459
463
|
}
|
|
460
|
-
|
|
464
|
+
|
|
461
465
|
return spectral_data
|
|
462
|
-
|
|
466
|
+
|
|
463
467
|
except Exception as e:
|
|
464
468
|
raise RuntimeError(f"Failed to load WIFF2 sample via alpharaw: {e}")
|
|
465
|
-
|
|
469
|
+
|
|
466
470
|
def _load_sample_standard(self, sample_id: int, **kwargs):
|
|
467
471
|
"""Load sample using standard WIFF reader with WIFF2 enhancements."""
|
|
468
472
|
# Use enhanced parameters optimized for WIFF2
|
|
469
473
|
enhanced_params = {
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
+
"centroid": kwargs.get("centroid", True),
|
|
475
|
+
"centroid_ppm": kwargs.get("centroid_ppm", 15.0), # Tighter for WIFF2
|
|
476
|
+
"ignore_empty_scans": kwargs.get("ignore_empty_scans", True),
|
|
477
|
+
"keep_k_peaks": kwargs.get("keep_k_peaks", 3000), # More peaks for WIFF2
|
|
474
478
|
}
|
|
475
|
-
|
|
479
|
+
|
|
476
480
|
if sample_id < 0 or sample_id >= self.sample_count:
|
|
477
|
-
raise ValueError(f"Sample ID {sample_id} out of range (0-{self.sample_count-1})")
|
|
478
|
-
|
|
481
|
+
raise ValueError(f"Sample ID {sample_id} out of range (0-{self.sample_count - 1})")
|
|
482
|
+
|
|
479
483
|
# Use the same loading approach as SciexWiffFileReader but with enhancements
|
|
480
484
|
sample = self._wiff_file.GetSample(sample_id)
|
|
481
485
|
ms_sample = sample.MassSpectrometerSample
|
|
482
|
-
|
|
486
|
+
|
|
483
487
|
# Process data (same as SciexWiffFileReader.load_sample but with enhanced params)
|
|
484
488
|
_peak_indices: list[int] = []
|
|
485
489
|
peak_mz_list: list[np.ndarray] = []
|
|
@@ -491,9 +495,9 @@ class SciexWiff2FileReader:
|
|
|
491
495
|
nce_list: list[float] = []
|
|
492
496
|
isolation_lower_list: list[float] = []
|
|
493
497
|
isolation_upper_list: list[float] = []
|
|
494
|
-
|
|
498
|
+
|
|
495
499
|
exp_list = [ms_sample.GetMSExperiment(i) for i in range(ms_sample.ExperimentCount)]
|
|
496
|
-
|
|
500
|
+
|
|
497
501
|
for j in range(exp_list[0].Details.NumberOfScans):
|
|
498
502
|
for i in range(ms_sample.ExperimentCount):
|
|
499
503
|
exp = exp_list[i]
|
|
@@ -501,52 +505,58 @@ class SciexWiff2FileReader:
|
|
|
501
505
|
mass_spectrum_info = exp.GetMassSpectrumInfo(j)
|
|
502
506
|
details = exp.Details
|
|
503
507
|
ms_level = mass_spectrum_info.MSLevel
|
|
504
|
-
|
|
505
|
-
if (
|
|
506
|
-
|
|
508
|
+
|
|
509
|
+
if (
|
|
510
|
+
ms_level > 1
|
|
511
|
+
and not details.IsSwath
|
|
512
|
+
and mass_spectrum.NumDataPoints <= 0
|
|
513
|
+
and enhanced_params["ignore_empty_scans"]
|
|
514
|
+
):
|
|
507
515
|
continue
|
|
508
|
-
|
|
516
|
+
|
|
509
517
|
mz_array = dot_net_array_to_np_array(mass_spectrum.GetActualXValues())
|
|
510
518
|
int_array = dot_net_array_to_np_array(mass_spectrum.GetActualYValues()).astype(np.float32)
|
|
511
|
-
|
|
512
|
-
if enhanced_params[
|
|
519
|
+
|
|
520
|
+
if enhanced_params["centroid"]:
|
|
513
521
|
mz_array, int_array = naive_centroid(
|
|
514
|
-
mz_array,
|
|
515
|
-
|
|
522
|
+
mz_array,
|
|
523
|
+
int_array,
|
|
524
|
+
centroiding_ppm=enhanced_params["centroid_ppm"],
|
|
516
525
|
)
|
|
517
|
-
|
|
518
|
-
if len(mz_array) > enhanced_params[
|
|
519
|
-
top_indices = np.argsort(int_array)[-enhanced_params[
|
|
526
|
+
|
|
527
|
+
if len(mz_array) > enhanced_params["keep_k_peaks"]:
|
|
528
|
+
top_indices = np.argsort(int_array)[-enhanced_params["keep_k_peaks"] :]
|
|
520
529
|
top_indices = np.sort(top_indices)
|
|
521
530
|
mz_array = mz_array[top_indices]
|
|
522
531
|
int_array = int_array[top_indices]
|
|
523
|
-
|
|
532
|
+
|
|
524
533
|
peak_mz_list.append(mz_array)
|
|
525
534
|
peak_intensity_list.append(int_array)
|
|
526
535
|
_peak_indices.append(len(peak_mz_list[-1]))
|
|
527
|
-
|
|
536
|
+
|
|
528
537
|
rt_list.append(exp.GetRTFromExperimentCycle(j))
|
|
529
538
|
ms_level_list.append(ms_level)
|
|
530
|
-
|
|
539
|
+
|
|
531
540
|
# Enhanced precursor handling for WIFF2
|
|
532
541
|
center_mz = -1.0
|
|
533
542
|
isolation_window = 0.0
|
|
534
|
-
|
|
543
|
+
|
|
535
544
|
if ms_level > 1:
|
|
536
545
|
if details.IsSwath and details.MassRangeInfo.Length > 0:
|
|
537
546
|
try:
|
|
538
547
|
from WiffOps4Python import WiffOps as DotNetWiffOps
|
|
548
|
+
|
|
539
549
|
center_mz = DotNetWiffOps.get_center_mz(details)
|
|
540
550
|
isolation_window = DotNetWiffOps.get_isolation_window(details)
|
|
541
551
|
except:
|
|
542
552
|
center_mz = mass_spectrum_info.ParentMZ
|
|
543
553
|
isolation_window = 3.0
|
|
544
|
-
|
|
554
|
+
|
|
545
555
|
if isolation_window <= 0:
|
|
546
556
|
isolation_window = 3.0
|
|
547
557
|
if center_mz <= 0:
|
|
548
558
|
center_mz = mass_spectrum_info.ParentMZ
|
|
549
|
-
|
|
559
|
+
|
|
550
560
|
precursor_mz_list.append(center_mz)
|
|
551
561
|
precursor_charge_list.append(mass_spectrum_info.ParentChargeState)
|
|
552
562
|
nce_list.append(float(mass_spectrum_info.CollisionEnergy))
|
|
@@ -558,58 +568,60 @@ class SciexWiff2FileReader:
|
|
|
558
568
|
nce_list.append(0.0)
|
|
559
569
|
isolation_lower_list.append(-1.0)
|
|
560
570
|
isolation_upper_list.append(-1.0)
|
|
561
|
-
|
|
571
|
+
|
|
562
572
|
# Finalize arrays
|
|
563
573
|
peak_indices = np.empty(len(rt_list) + 1, np.int64)
|
|
564
574
|
peak_indices[0] = 0
|
|
565
575
|
peak_indices[1:] = np.cumsum(_peak_indices)
|
|
566
|
-
|
|
576
|
+
|
|
567
577
|
return {
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
578
|
+
"peak_indices": peak_indices,
|
|
579
|
+
"peak_mz": np.concatenate(peak_mz_list) if peak_mz_list else np.array([]),
|
|
580
|
+
"peak_intensity": np.concatenate(peak_intensity_list) if peak_intensity_list else np.array([]),
|
|
581
|
+
"rt": np.array(rt_list, dtype=np.float64),
|
|
582
|
+
"ms_level": np.array(ms_level_list, dtype=np.int8),
|
|
583
|
+
"precursor_mz": np.array(precursor_mz_list, dtype=np.float64),
|
|
584
|
+
"precursor_charge": np.array(precursor_charge_list, dtype=np.int8),
|
|
585
|
+
"isolation_lower_mz": np.array(isolation_lower_list, dtype=np.float64),
|
|
586
|
+
"isolation_upper_mz": np.array(isolation_upper_list, dtype=np.float64),
|
|
587
|
+
"nce": np.array(nce_list, dtype=np.float32),
|
|
588
|
+
"metadata": {
|
|
589
|
+
"format": "WIFF2",
|
|
590
|
+
"sample_id": sample_id,
|
|
591
|
+
"sample_name": str(self.sample_names[sample_id]),
|
|
592
|
+
"loading_params": enhanced_params,
|
|
593
|
+
"total_spectra": len(rt_list),
|
|
594
|
+
"total_peaks": sum(_peak_indices),
|
|
595
|
+
"ms1_count": np.sum(np.array(ms_level_list) == 1),
|
|
596
|
+
"ms2_count": np.sum(np.array(ms_level_list) > 1),
|
|
597
|
+
"rt_range": [float(np.min(rt_list)), float(np.max(rt_list))] if rt_list else [0, 0],
|
|
598
|
+
"creation_time": str(sample.Details.AcquisitionDateTime.ToString("O"))
|
|
599
|
+
if hasattr(sample, "Details")
|
|
600
|
+
else "",
|
|
601
|
+
"reader_method": "standard_enhanced",
|
|
602
|
+
},
|
|
591
603
|
}
|
|
592
604
|
|
|
593
605
|
def close(self):
|
|
594
606
|
"""Close the WIFF2 file and clean up resources."""
|
|
595
|
-
if hasattr(self,
|
|
607
|
+
if hasattr(self, "_wiffDataProvider"):
|
|
596
608
|
try:
|
|
597
609
|
self._wiffDataProvider.Close()
|
|
598
610
|
except:
|
|
599
611
|
pass
|
|
600
|
-
|
|
601
|
-
if hasattr(self,
|
|
612
|
+
|
|
613
|
+
if hasattr(self, "_alpharaw_reader"):
|
|
602
614
|
try:
|
|
603
615
|
self._alpharaw_reader.close()
|
|
604
616
|
except:
|
|
605
617
|
pass
|
|
606
|
-
|
|
618
|
+
|
|
607
619
|
def __enter__(self):
|
|
608
620
|
return self
|
|
609
|
-
|
|
621
|
+
|
|
610
622
|
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
611
623
|
self.close()
|
|
612
|
-
|
|
624
|
+
|
|
613
625
|
def __repr__(self):
|
|
614
626
|
return f"SciexWiff2FileReader(file='{self.filename}', samples={self.sample_count}, method={self.initialization_method})"
|
|
615
627
|
|
|
@@ -1022,30 +1034,29 @@ def load_wiff2_file(filename: str, **kwargs) -> dict[str, Any]:
|
|
|
1022
1034
|
-------
|
|
1023
1035
|
dict
|
|
1024
1036
|
Spectral data dictionary with enhanced WIFF2 information
|
|
1025
|
-
|
|
1037
|
+
|
|
1026
1038
|
Raises
|
|
1027
1039
|
------
|
|
1028
1040
|
RuntimeError
|
|
1029
1041
|
If WIFF2 format is not supported by current DLL combination
|
|
1030
1042
|
"""
|
|
1031
|
-
sample_id = kwargs.pop(
|
|
1032
|
-
|
|
1043
|
+
sample_id = kwargs.pop("sample_id", 0)
|
|
1044
|
+
|
|
1033
1045
|
try:
|
|
1034
1046
|
with SciexWiff2FileReader(filename) as reader:
|
|
1035
1047
|
return reader.load_sample(sample_id, **kwargs) # type: ignore[no-any-return]
|
|
1036
1048
|
except RuntimeError as e:
|
|
1037
1049
|
if "format is not supported" in str(e):
|
|
1038
1050
|
# Suggest using regular WIFF file if available
|
|
1039
|
-
wiff_file = filename.replace(
|
|
1051
|
+
wiff_file = filename.replace(".wiff2", ".wiff")
|
|
1040
1052
|
if os.path.exists(wiff_file):
|
|
1041
1053
|
raise RuntimeError(
|
|
1042
1054
|
f"WIFF2 format not supported. However, a regular WIFF file was found: "
|
|
1043
|
-
f"'{wiff_file}'. Try using load_wiff_file('{wiff_file}') instead."
|
|
1055
|
+
f"'{wiff_file}'. Try using load_wiff_file('{wiff_file}') instead.",
|
|
1044
1056
|
) from e
|
|
1045
1057
|
else:
|
|
1046
1058
|
raise RuntimeError(
|
|
1047
|
-
f"WIFF2 format not supported and no corresponding WIFF file found. "
|
|
1048
|
-
f"Original error: {e}"
|
|
1059
|
+
f"WIFF2 format not supported and no corresponding WIFF file found. Original error: {e}",
|
|
1049
1060
|
) from e
|
|
1050
1061
|
else:
|
|
1051
1062
|
raise
|
|
@@ -1054,7 +1065,7 @@ def load_wiff2_file(filename: str, **kwargs) -> dict[str, Any]:
|
|
|
1054
1065
|
def load_wiff_file_smart(filename: str, **kwargs) -> dict[str, Any] | SciexWiffData:
|
|
1055
1066
|
"""
|
|
1056
1067
|
Smart WIFF file loader that automatically handles WIFF and WIFF2 formats.
|
|
1057
|
-
|
|
1068
|
+
|
|
1058
1069
|
This function will first try to load the file as specified, and if it's a WIFF2
|
|
1059
1070
|
file that fails due to format incompatibility, it will suggest alternatives.
|
|
1060
1071
|
|
|
@@ -1070,17 +1081,17 @@ def load_wiff_file_smart(filename: str, **kwargs) -> dict[str, Any] | SciexWiffD
|
|
|
1070
1081
|
dict
|
|
1071
1082
|
Spectral data dictionary
|
|
1072
1083
|
"""
|
|
1073
|
-
if filename.lower().endswith(
|
|
1084
|
+
if filename.lower().endswith(".wiff2"):
|
|
1074
1085
|
try:
|
|
1075
1086
|
return load_wiff2_file(filename, **kwargs)
|
|
1076
1087
|
except RuntimeError as e:
|
|
1077
1088
|
if "format is not supported" in str(e):
|
|
1078
1089
|
# Check if regular WIFF file exists
|
|
1079
|
-
wiff_file = filename.replace(
|
|
1090
|
+
wiff_file = filename.replace(".wiff2", ".wiff")
|
|
1080
1091
|
if os.path.exists(wiff_file):
|
|
1081
1092
|
warnings.warn(
|
|
1082
1093
|
f"WIFF2 format not supported, falling back to WIFF file: {wiff_file}",
|
|
1083
|
-
stacklevel=2
|
|
1094
|
+
stacklevel=2,
|
|
1084
1095
|
)
|
|
1085
1096
|
return load_wiff_file(wiff_file, **kwargs)
|
|
1086
1097
|
raise
|