masster 0.2.5__py3-none-any.whl → 0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of masster might be problematic. Click here for more details.

Files changed (55) hide show
  1. masster/__init__.py +27 -27
  2. masster/_version.py +17 -17
  3. masster/chromatogram.py +497 -503
  4. masster/data/examples/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.featureXML +199787 -0
  5. masster/data/examples/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.sample5 +0 -0
  6. masster/logger.py +318 -244
  7. masster/sample/__init__.py +9 -9
  8. masster/sample/defaults/__init__.py +15 -15
  9. masster/sample/defaults/find_adducts_def.py +325 -325
  10. masster/sample/defaults/find_features_def.py +366 -366
  11. masster/sample/defaults/find_ms2_def.py +285 -285
  12. masster/sample/defaults/get_spectrum_def.py +314 -318
  13. masster/sample/defaults/sample_def.py +374 -378
  14. masster/sample/h5.py +1321 -1297
  15. masster/sample/helpers.py +833 -364
  16. masster/sample/lib.py +762 -0
  17. masster/sample/load.py +1220 -1187
  18. masster/sample/parameters.py +131 -131
  19. masster/sample/plot.py +1685 -1622
  20. masster/sample/processing.py +1402 -1416
  21. masster/sample/quant.py +209 -0
  22. masster/sample/sample.py +393 -387
  23. masster/sample/sample5_schema.json +181 -181
  24. masster/sample/save.py +737 -736
  25. masster/sample/sciex.py +1213 -0
  26. masster/spectrum.py +1287 -1319
  27. masster/study/__init__.py +9 -9
  28. masster/study/defaults/__init__.py +21 -19
  29. masster/study/defaults/align_def.py +267 -267
  30. masster/study/defaults/export_def.py +41 -40
  31. masster/study/defaults/fill_chrom_def.py +264 -264
  32. masster/study/defaults/fill_def.py +260 -0
  33. masster/study/defaults/find_consensus_def.py +256 -256
  34. masster/study/defaults/find_ms2_def.py +163 -163
  35. masster/study/defaults/integrate_chrom_def.py +225 -225
  36. masster/study/defaults/integrate_def.py +221 -0
  37. masster/study/defaults/merge_def.py +256 -0
  38. masster/study/defaults/study_def.py +272 -269
  39. masster/study/export.py +674 -287
  40. masster/study/h5.py +1406 -886
  41. masster/study/helpers.py +1713 -433
  42. masster/study/helpers_optimized.py +317 -0
  43. masster/study/load.py +1231 -1078
  44. masster/study/parameters.py +99 -99
  45. masster/study/plot.py +632 -645
  46. masster/study/processing.py +1057 -1046
  47. masster/study/save.py +161 -134
  48. masster/study/study.py +612 -522
  49. masster/study/study5_schema.json +253 -241
  50. {masster-0.2.5.dist-info → masster-0.3.1.dist-info}/METADATA +15 -10
  51. masster-0.3.1.dist-info/RECORD +59 -0
  52. {masster-0.2.5.dist-info → masster-0.3.1.dist-info}/licenses/LICENSE +661 -661
  53. masster-0.2.5.dist-info/RECORD +0 -50
  54. {masster-0.2.5.dist-info → masster-0.3.1.dist-info}/WHEEL +0 -0
  55. {masster-0.2.5.dist-info → masster-0.3.1.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,1213 @@
1
+ """
2
+ Standalone Sciex WIFF file reader module.
3
+
4
+ This module provides a standalone implementation of Sciex WIFF file reading
5
+ functionality that uses the DLLs from alpharaw's ext/sciex directory directly
6
+ without importing from the alpharaw package.
7
+
8
+ Requirements:
9
+ - pythonnet (pip install pythonnet)
10
+ - alpharaw package must be installed to access the DLLs in site-packages/alpharaw/ext/sciex/
11
+ - On Linux/macOS: mono runtime must be installed
12
+
13
+ The .NET imports (System, Clearcore2, WiffOps4Python) will only work when
14
+ pythonnet is properly installed and configured.
15
+ """
16
+
17
+ import os
18
+ import site
19
+ import warnings
20
+
21
+ from typing import Any, ClassVar
22
+
23
+ import numpy as np
24
+ import pandas as pd
25
+
26
+
27
+ # Import centroiding functionality (simplified naive centroid implementation)
28
+ def naive_centroid(
29
+ peak_mzs: np.ndarray,
30
+ peak_intensities: np.ndarray,
31
+ centroiding_ppm: float = 20.0,
32
+ ) -> tuple[np.ndarray, np.ndarray]:
33
+ """
34
+ Simplified naive centroiding implementation.
35
+ """
36
+ if len(peak_mzs) == 0:
37
+ return np.array([]), np.array([])
38
+
39
+ # Simple centroiding: combine peaks within tolerance
40
+ centroided_mzs = []
41
+ centroided_intensities = []
42
+
43
+ i = 0
44
+ while i < len(peak_mzs):
45
+ current_mz = peak_mzs[i]
46
+ current_intensity = peak_intensities[i]
47
+
48
+ # Look for nearby peaks within tolerance
49
+ j = i + 1
50
+ total_intensity = current_intensity
51
+ weighted_mz_sum = current_mz * current_intensity
52
+
53
+ while j < len(peak_mzs):
54
+ tolerance = current_mz * centroiding_ppm * 1e-6
55
+ if abs(peak_mzs[j] - current_mz) <= tolerance:
56
+ total_intensity += peak_intensities[j]
57
+ weighted_mz_sum += peak_mzs[j] * peak_intensities[j]
58
+ j += 1
59
+ else:
60
+ break
61
+
62
+ # Calculate centroided m/z and intensity
63
+ if total_intensity > 0:
64
+ centroided_mz = weighted_mz_sum / total_intensity
65
+ centroided_mzs.append(centroided_mz)
66
+ centroided_intensities.append(total_intensity)
67
+
68
+ i = j
69
+
70
+ return np.array(centroided_mzs), np.array(centroided_intensities)
71
+
72
+
73
+ # CLR utilities implementation
74
+ try:
75
+ # require pythonnet, pip install pythonnet on Windows
76
+ import clr
77
+
78
+ clr.AddReference("System")
79
+
80
+ import ctypes
81
+
82
+ import System # noqa: F401
83
+
84
+ from System.Globalization import CultureInfo
85
+ from System.Runtime.InteropServices import GCHandle
86
+ from System.Runtime.InteropServices import GCHandleType
87
+ from System.Threading import Thread
88
+
89
+ de_fr = CultureInfo("fr-FR")
90
+ other = CultureInfo("en-US")
91
+
92
+ Thread.CurrentThread.CurrentCulture = other
93
+ Thread.CurrentThread.CurrentUICulture = other
94
+
95
+ # Find the alpharaw ext/sciex directory in site-packages
96
+ ext_dir = None
97
+ for site_dir in site.getsitepackages():
98
+ potential_ext_dir = os.path.join(site_dir, "alpharaw", "ext", "sciex")
99
+ if os.path.exists(potential_ext_dir):
100
+ ext_dir = potential_ext_dir
101
+ break
102
+
103
+ if ext_dir is None:
104
+ # Try alternative locations
105
+ import alpharaw
106
+
107
+ alpharaw_dir = os.path.dirname(alpharaw.__file__)
108
+ ext_dir = os.path.join(alpharaw_dir, "ext", "sciex")
109
+
110
+ if not os.path.exists(ext_dir):
111
+ raise ImportError("Could not find alpharaw ext/sciex directory with DLLs")
112
+
113
+ # Add Sciex DLL references
114
+ clr.AddReference(
115
+ os.path.join(ext_dir, "Clearcore2.Data.AnalystDataProvider.dll"),
116
+ )
117
+ clr.AddReference(os.path.join(ext_dir, "Clearcore2.Data.dll"))
118
+ clr.AddReference(os.path.join(ext_dir, "WiffOps4Python.dll"))
119
+
120
+ import Clearcore2 # noqa: F401
121
+ import WiffOps4Python # noqa: F401
122
+
123
+ from Clearcore2.Data.AnalystDataProvider import AnalystDataProviderFactory
124
+ from Clearcore2.Data.AnalystDataProvider import AnalystWiffDataProvider
125
+ from WiffOps4Python import WiffOps as DotNetWiffOps
126
+
127
+ HAS_DOTNET = True
128
+ except Exception as e:
129
+ # allows to use the rest of the code without clr
130
+ warnings.warn(
131
+ f"Dotnet-based dependencies could not be loaded. Sciex support is disabled. Error: {e}",
132
+ stacklevel=2,
133
+ )
134
+ HAS_DOTNET = False
135
+
136
+
137
+ def dot_net_array_to_np_array(src):
138
+ """
139
+ Convert .NET array to NumPy array.
140
+ See https://mail.python.org/pipermail/pythondotnet/2014-May/001527.html
141
+ """
142
+ if src is None:
143
+ return np.array([], dtype=np.float64)
144
+ src_hndl = GCHandle.Alloc(src, GCHandleType.Pinned)
145
+ try:
146
+ src_ptr = src_hndl.AddrOfPinnedObject().ToInt64()
147
+ buf_type = ctypes.c_double * len(src)
148
+ cbuf = buf_type.from_address(src_ptr)
149
+ dest = np.frombuffer(cbuf, dtype='float64').copy() # type: ignore[call-overload]
150
+ finally:
151
+ if src_hndl.IsAllocated:
152
+ src_hndl.Free()
153
+ return dest # noqa: B012
154
+
155
+
156
+ class SciexWiff2FileReader:
157
+ """
158
+ Specialized reader for Sciex WIFF2 files using optimal DLL combination.
159
+
160
+ WIFF2 is a newer format from Sciex that may have enhanced capabilities
161
+ compared to the original WIFF format. This reader is optimized specifically
162
+ for WIFF2 files and uses the most appropriate DLLs for maximum information extraction.
163
+
164
+ Based on comprehensive DLL analysis, WIFF2 files require specific handling and
165
+ may use different underlying storage mechanisms than regular WIFF files.
166
+ """
167
+
168
+ def __init__(self, filename: str):
169
+ """
170
+ Initialize WIFF2 reader with file path.
171
+
172
+ Parameters
173
+ ----------
174
+ filename : str
175
+ Path to the WIFF2 file
176
+ """
177
+ if not HAS_DOTNET:
178
+ raise ValueError(
179
+ "Dotnet-based dependencies are required for reading Sciex WIFF2 files. "
180
+ "Install pythonnet and ensure alpharaw DLLs are available."
181
+ )
182
+
183
+ self.filename = filename
184
+ self.ext_dir = self._find_dll_directory()
185
+ self._ensure_wiff2_dlls_loaded()
186
+
187
+ # Try different initialization strategies for WIFF2
188
+ self._initialize_wiff2_reader()
189
+
190
+ def _find_dll_directory(self):
191
+ """Find the alpharaw DLL directory using the same discovery pattern."""
192
+ for site_dir in site.getsitepackages():
193
+ potential_ext_dir = os.path.join(site_dir, "alpharaw", "ext", "sciex")
194
+ if os.path.exists(potential_ext_dir):
195
+ return potential_ext_dir
196
+
197
+ # Fallback to alpharaw module location
198
+ try:
199
+ import alpharaw
200
+ alpharaw_dir = os.path.dirname(alpharaw.__file__)
201
+ return os.path.join(alpharaw_dir, "ext", "sciex")
202
+ except ImportError:
203
+ raise ImportError("Could not find alpharaw DLL directory")
204
+
205
+ def _ensure_wiff2_dlls_loaded(self):
206
+ """Ensure all necessary WIFF2 DLLs are loaded."""
207
+ # Key DLLs identified through comprehensive analysis
208
+ required_dlls = [
209
+ "Clearcore2.Data.Wiff2.dll", # Primary WIFF2 support
210
+ "Clearcore2.Data.AnalystDataProvider.dll",
211
+ "Clearcore2.Data.dll",
212
+ "Clearcore2.Data.Common.dll",
213
+ "Clearcore2.Data.Core.dll",
214
+ "Clearcore2.StructuredStorage.dll", # For WIFF2 storage format
215
+ "WiffOps4Python.dll"
216
+ ]
217
+
218
+ for dll in required_dlls:
219
+ dll_path = os.path.join(self.ext_dir, dll)
220
+ if os.path.exists(dll_path):
221
+ try:
222
+ clr.AddReference(dll_path)
223
+ except:
224
+ pass # May already be loaded
225
+ else:
226
+ warnings.warn(f"WIFF2 DLL not found: {dll}", stacklevel=2)
227
+
228
+ def _initialize_wiff2_reader(self):
229
+ """
230
+ Initialize WIFF2 reader with fallback strategies.
231
+
232
+ WIFF2 files may require different initialization approaches than WIFF files.
233
+ We try multiple strategies based on the comprehensive DLL analysis.
234
+ """
235
+ initialization_errors = []
236
+
237
+ # Strategy 1: Try standard AnalystDataProvider (may work for some WIFF2)
238
+ try:
239
+ from Clearcore2.Data.AnalystDataProvider import AnalystDataProviderFactory
240
+ from Clearcore2.Data.AnalystDataProvider import AnalystWiffDataProvider
241
+
242
+ self._wiffDataProvider = AnalystWiffDataProvider()
243
+ self._wiff_file = AnalystDataProviderFactory.CreateBatch(
244
+ self.filename,
245
+ self._wiffDataProvider,
246
+ )
247
+
248
+ self.sample_names = self._wiff_file.GetSampleNames()
249
+ self.sample_count = len(self.sample_names)
250
+ self.initialization_method = "AnalystDataProvider"
251
+ return
252
+
253
+ except Exception as e:
254
+ initialization_errors.append(f"AnalystDataProvider: {e}")
255
+
256
+ # Strategy 2: Try alpharaw's SciexWiffData (correct API)
257
+ try:
258
+ from alpharaw.sciex import SciexWiffData
259
+
260
+ self._alpharaw_reader = SciexWiffData()
261
+ self._alpharaw_reader.import_raw(self.filename)
262
+
263
+ # Extract basic information (SciexWiffData doesn't have sample_names property)
264
+ self.sample_names = ['Sample_0'] # Default since WIFF2 format needs investigation
265
+ self.sample_count = 1
266
+ self.initialization_method = "alpharaw_SciexWiffData"
267
+
268
+ # Store the reader for later use
269
+ self._wiff_data = self._alpharaw_reader
270
+ return
271
+
272
+ except Exception as e:
273
+ initialization_errors.append(f"alpharaw_SciexWiffData: {e}")
274
+
275
+ # Strategy 3: Try direct WIFF2 DLL approach
276
+ try:
277
+ # Check if file is recognized as WIFF2
278
+ from Clearcore2.Data.AnalystDataProvider import DataProviderHelper
279
+
280
+ is_wiff2 = DataProviderHelper.IsMdWiffFile(self.filename)
281
+ if is_wiff2:
282
+ # Try specialized WIFF2 handling
283
+ warnings.warn(
284
+ f"File detected as WIFF2 format but specialized reader not fully implemented. "
285
+ f"Consider using alpharaw.ms_data_from_file() directly.",
286
+ stacklevel=2
287
+ )
288
+ # For now, fall back to treating as regular WIFF with enhanced parameters
289
+ self._initialize_as_enhanced_wiff()
290
+ return
291
+
292
+ except Exception as e:
293
+ initialization_errors.append(f"WIFF2 detection: {e}")
294
+
295
+ # If all strategies fail, provide comprehensive error information with helpful suggestions
296
+ error_summary = "; ".join(initialization_errors)
297
+
298
+ # Check if this is a WIFF2 format issue specifically
299
+ if "could not be opened (result = -2147286960)" in error_summary:
300
+ raise RuntimeError(
301
+ f"WIFF2 file format is not supported by the current DLL combination. "
302
+ f"Error code -2147286960 (0x80030050) indicates format incompatibility. "
303
+ f"The file '{self.filename}' appears to be a valid WIFF2 file but requires "
304
+ f"newer or different DLLs than currently available. "
305
+ f"Try converting the WIFF2 file to WIFF format or use alternative tools. "
306
+ f"Full error details: {error_summary}"
307
+ )
308
+ else:
309
+ raise RuntimeError(
310
+ f"Failed to initialize WIFF2 reader with any strategy. "
311
+ f"Errors: {error_summary}. "
312
+ f"The file may be corrupted, locked, or require different dependencies."
313
+ )
314
+
315
+ def _initialize_as_enhanced_wiff(self):
316
+ """Fallback: Initialize as enhanced WIFF with WIFF2-optimized parameters."""
317
+ # Use the same initialization as regular WIFF but with warnings
318
+ try:
319
+ from Clearcore2.Data.AnalystDataProvider import AnalystDataProviderFactory
320
+ from Clearcore2.Data.AnalystDataProvider import AnalystWiffDataProvider
321
+
322
+ self._wiffDataProvider = AnalystWiffDataProvider()
323
+ self._wiff_file = AnalystDataProviderFactory.CreateBatch(
324
+ self.filename,
325
+ self._wiffDataProvider,
326
+ )
327
+
328
+ self.sample_names = self._wiff_file.GetSampleNames()
329
+ self.sample_count = len(self.sample_names)
330
+ self.initialization_method = "enhanced_wiff_fallback"
331
+
332
+ warnings.warn(
333
+ "WIFF2 file opened using WIFF reader fallback. "
334
+ "Some WIFF2-specific features may not be available.",
335
+ stacklevel=2
336
+ )
337
+
338
+ except Exception as e:
339
+ raise RuntimeError(f"Enhanced WIFF fallback also failed: {e}")
340
+
341
+ def get_file_metadata(self) -> dict[str, Any]:
342
+ """Get comprehensive file metadata for WIFF2 format."""
343
+ metadata: dict[str, Any] = {
344
+ 'format': 'WIFF2',
345
+ 'sample_count': self.sample_count,
346
+ 'sample_names': list(self.sample_names),
347
+ 'file_size': os.path.getsize(self.filename),
348
+ 'file_path': self.filename,
349
+ 'initialization_method': self.initialization_method,
350
+ 'samples': [] # Initialize samples list
351
+ }
352
+
353
+ if self.initialization_method == "alpharaw":
354
+ # Get metadata from alpharaw reader
355
+ try:
356
+ if hasattr(self._alpharaw_reader, 'get_spectrum_count'):
357
+ metadata['total_spectra'] = self._alpharaw_reader.get_spectrum_count()
358
+
359
+ # Add alpharaw-specific metadata
360
+ for attr in ['creation_time', 'instrument_model', 'ms_levels']:
361
+ if hasattr(self._alpharaw_reader, attr):
362
+ try:
363
+ value = getattr(self._alpharaw_reader, attr)
364
+ if callable(value):
365
+ metadata[attr] = value()
366
+ else:
367
+ metadata[attr] = value
368
+ except:
369
+ pass
370
+
371
+ except Exception as e:
372
+ metadata['metadata_error'] = str(e)
373
+
374
+ elif hasattr(self, '_wiff_file'):
375
+ # Get metadata from standard WIFF reader
376
+ try:
377
+ for i in range(self.sample_count):
378
+ sample = self._wiff_file.GetSample(i)
379
+ sample_info = {
380
+ 'index': i,
381
+ 'name': str(self.sample_names[i]),
382
+ }
383
+
384
+ if hasattr(sample, 'Details'):
385
+ details = sample.Details
386
+ if hasattr(details, 'AcquisitionDateTime'):
387
+ sample_info['acquisition_time'] = str(details.AcquisitionDateTime.ToString("O"))
388
+
389
+ if hasattr(sample, 'MassSpectrometerSample'):
390
+ ms_sample = sample.MassSpectrometerSample
391
+ sample_info['experiment_count'] = ms_sample.ExperimentCount
392
+
393
+ metadata['samples'].append(sample_info)
394
+
395
+ except Exception as e:
396
+ metadata['metadata_error'] = str(e)
397
+
398
+ return metadata
399
+
400
+ def load_sample(self, sample_id: int = 0, **kwargs):
401
+ """
402
+ Load sample data with WIFF2-optimized settings.
403
+
404
+ Parameters
405
+ ----------
406
+ sample_id : int
407
+ Sample index to load
408
+ **kwargs
409
+ Additional parameters for data loading
410
+
411
+ Returns
412
+ -------
413
+ dict
414
+ Comprehensive spectral data dictionary
415
+ """
416
+ if self.initialization_method == "alpharaw":
417
+ return self._load_sample_alpharaw(sample_id, **kwargs)
418
+ else:
419
+ return self._load_sample_standard(sample_id, **kwargs)
420
+
421
+ def _load_sample_alpharaw(self, sample_id: int, **kwargs):
422
+ """Load sample using alpharaw reader."""
423
+ # Enhanced parameters for WIFF2
424
+ enhanced_params = {
425
+ 'centroid': kwargs.get('centroid', True),
426
+ 'centroid_ppm': kwargs.get('centroid_ppm', 15.0),
427
+ 'keep_k_peaks': kwargs.get('keep_k_peaks', 3000),
428
+ }
429
+
430
+ try:
431
+ # Use alpharaw's data extraction
432
+ spectrum_df = self._alpharaw_reader.spectrum_df
433
+ peak_df = self._alpharaw_reader.peak_df
434
+
435
+ # Convert to the expected format
436
+ spectral_data = {
437
+ 'peak_indices': spectrum_df[['peak_start_idx', 'peak_stop_idx']].values.flatten(),
438
+ 'peak_mz': peak_df['mz'].values,
439
+ 'peak_intensity': peak_df['intensity'].values,
440
+ 'rt': spectrum_df['rt'].values,
441
+ 'ms_level': spectrum_df['ms_level'].values,
442
+ 'precursor_mz': spectrum_df.get('precursor_mz', np.full(len(spectrum_df), -1.0)).values,
443
+ 'precursor_charge': spectrum_df.get('precursor_charge', np.full(len(spectrum_df), 0)).values,
444
+ 'isolation_lower_mz': spectrum_df.get('isolation_lower_mz', np.full(len(spectrum_df), -1.0)).values,
445
+ 'isolation_upper_mz': spectrum_df.get('isolation_upper_mz', np.full(len(spectrum_df), -1.0)).values,
446
+ 'nce': spectrum_df.get('nce', np.full(len(spectrum_df), 0.0)).values,
447
+ 'metadata': {
448
+ 'format': 'WIFF2',
449
+ 'sample_id': sample_id,
450
+ 'sample_name': str(self.sample_names[sample_id]) if sample_id < len(self.sample_names) else f'Sample_{sample_id}',
451
+ 'loading_params': enhanced_params,
452
+ 'total_spectra': len(spectrum_df),
453
+ 'total_peaks': len(peak_df),
454
+ 'ms1_count': np.sum(spectrum_df['ms_level'] == 1),
455
+ 'ms2_count': np.sum(spectrum_df['ms_level'] > 1),
456
+ 'rt_range': [float(spectrum_df['rt'].min()), float(spectrum_df['rt'].max())] if len(spectrum_df) > 0 else [0, 0],
457
+ 'reader_method': 'alpharaw'
458
+ }
459
+ }
460
+
461
+ return spectral_data
462
+
463
+ except Exception as e:
464
+ raise RuntimeError(f"Failed to load WIFF2 sample via alpharaw: {e}")
465
+
466
+ def _load_sample_standard(self, sample_id: int, **kwargs):
467
+ """Load sample using standard WIFF reader with WIFF2 enhancements."""
468
+ # Use enhanced parameters optimized for WIFF2
469
+ enhanced_params = {
470
+ 'centroid': kwargs.get('centroid', True),
471
+ 'centroid_ppm': kwargs.get('centroid_ppm', 15.0), # Tighter for WIFF2
472
+ 'ignore_empty_scans': kwargs.get('ignore_empty_scans', True),
473
+ 'keep_k_peaks': kwargs.get('keep_k_peaks', 3000), # More peaks for WIFF2
474
+ }
475
+
476
+ if sample_id < 0 or sample_id >= self.sample_count:
477
+ raise ValueError(f"Sample ID {sample_id} out of range (0-{self.sample_count-1})")
478
+
479
+ # Use the same loading approach as SciexWiffFileReader but with enhancements
480
+ sample = self._wiff_file.GetSample(sample_id)
481
+ ms_sample = sample.MassSpectrometerSample
482
+
483
+ # Process data (same as SciexWiffFileReader.load_sample but with enhanced params)
484
+ _peak_indices: list[int] = []
485
+ peak_mz_list: list[np.ndarray] = []
486
+ peak_intensity_list: list[np.ndarray] = []
487
+ rt_list: list[float] = []
488
+ ms_level_list: list[int] = []
489
+ precursor_mz_list: list[float] = []
490
+ precursor_charge_list: list[int] = []
491
+ nce_list: list[float] = []
492
+ isolation_lower_list: list[float] = []
493
+ isolation_upper_list: list[float] = []
494
+
495
+ exp_list = [ms_sample.GetMSExperiment(i) for i in range(ms_sample.ExperimentCount)]
496
+
497
+ for j in range(exp_list[0].Details.NumberOfScans):
498
+ for i in range(ms_sample.ExperimentCount):
499
+ exp = exp_list[i]
500
+ mass_spectrum = exp.GetMassSpectrum(j)
501
+ mass_spectrum_info = exp.GetMassSpectrumInfo(j)
502
+ details = exp.Details
503
+ ms_level = mass_spectrum_info.MSLevel
504
+
505
+ if (ms_level > 1 and not details.IsSwath and
506
+ mass_spectrum.NumDataPoints <= 0 and enhanced_params['ignore_empty_scans']):
507
+ continue
508
+
509
+ mz_array = dot_net_array_to_np_array(mass_spectrum.GetActualXValues())
510
+ int_array = dot_net_array_to_np_array(mass_spectrum.GetActualYValues()).astype(np.float32)
511
+
512
+ if enhanced_params['centroid']:
513
+ mz_array, int_array = naive_centroid(
514
+ mz_array, int_array,
515
+ centroiding_ppm=enhanced_params['centroid_ppm']
516
+ )
517
+
518
+ if len(mz_array) > enhanced_params['keep_k_peaks']:
519
+ top_indices = np.argsort(int_array)[-enhanced_params['keep_k_peaks']:]
520
+ top_indices = np.sort(top_indices)
521
+ mz_array = mz_array[top_indices]
522
+ int_array = int_array[top_indices]
523
+
524
+ peak_mz_list.append(mz_array)
525
+ peak_intensity_list.append(int_array)
526
+ _peak_indices.append(len(peak_mz_list[-1]))
527
+
528
+ rt_list.append(exp.GetRTFromExperimentCycle(j))
529
+ ms_level_list.append(ms_level)
530
+
531
+ # Enhanced precursor handling for WIFF2
532
+ center_mz = -1.0
533
+ isolation_window = 0.0
534
+
535
+ if ms_level > 1:
536
+ if details.IsSwath and details.MassRangeInfo.Length > 0:
537
+ try:
538
+ from WiffOps4Python import WiffOps as DotNetWiffOps
539
+ center_mz = DotNetWiffOps.get_center_mz(details)
540
+ isolation_window = DotNetWiffOps.get_isolation_window(details)
541
+ except:
542
+ center_mz = mass_spectrum_info.ParentMZ
543
+ isolation_window = 3.0
544
+
545
+ if isolation_window <= 0:
546
+ isolation_window = 3.0
547
+ if center_mz <= 0:
548
+ center_mz = mass_spectrum_info.ParentMZ
549
+
550
+ precursor_mz_list.append(center_mz)
551
+ precursor_charge_list.append(mass_spectrum_info.ParentChargeState)
552
+ nce_list.append(float(mass_spectrum_info.CollisionEnergy))
553
+ isolation_lower_list.append(center_mz - isolation_window / 2)
554
+ isolation_upper_list.append(center_mz + isolation_window / 2)
555
+ else:
556
+ precursor_mz_list.append(-1.0)
557
+ precursor_charge_list.append(0)
558
+ nce_list.append(0.0)
559
+ isolation_lower_list.append(-1.0)
560
+ isolation_upper_list.append(-1.0)
561
+
562
+ # Finalize arrays
563
+ peak_indices = np.empty(len(rt_list) + 1, np.int64)
564
+ peak_indices[0] = 0
565
+ peak_indices[1:] = np.cumsum(_peak_indices)
566
+
567
+ return {
568
+ 'peak_indices': peak_indices,
569
+ 'peak_mz': np.concatenate(peak_mz_list) if peak_mz_list else np.array([]),
570
+ 'peak_intensity': np.concatenate(peak_intensity_list) if peak_intensity_list else np.array([]),
571
+ 'rt': np.array(rt_list, dtype=np.float64),
572
+ 'ms_level': np.array(ms_level_list, dtype=np.int8),
573
+ 'precursor_mz': np.array(precursor_mz_list, dtype=np.float64),
574
+ 'precursor_charge': np.array(precursor_charge_list, dtype=np.int8),
575
+ 'isolation_lower_mz': np.array(isolation_lower_list, dtype=np.float64),
576
+ 'isolation_upper_mz': np.array(isolation_upper_list, dtype=np.float64),
577
+ 'nce': np.array(nce_list, dtype=np.float32),
578
+ 'metadata': {
579
+ 'format': 'WIFF2',
580
+ 'sample_id': sample_id,
581
+ 'sample_name': str(self.sample_names[sample_id]),
582
+ 'loading_params': enhanced_params,
583
+ 'total_spectra': len(rt_list),
584
+ 'total_peaks': sum(_peak_indices),
585
+ 'ms1_count': np.sum(np.array(ms_level_list) == 1),
586
+ 'ms2_count': np.sum(np.array(ms_level_list) > 1),
587
+ 'rt_range': [float(np.min(rt_list)), float(np.max(rt_list))] if rt_list else [0, 0],
588
+ 'creation_time': str(sample.Details.AcquisitionDateTime.ToString("O")) if hasattr(sample, 'Details') else '',
589
+ 'reader_method': 'standard_enhanced'
590
+ }
591
+ }
592
+
593
+ def close(self):
594
+ """Close the WIFF2 file and clean up resources."""
595
+ if hasattr(self, '_wiffDataProvider'):
596
+ try:
597
+ self._wiffDataProvider.Close()
598
+ except:
599
+ pass
600
+
601
+ if hasattr(self, '_alpharaw_reader'):
602
+ try:
603
+ self._alpharaw_reader.close()
604
+ except:
605
+ pass
606
+
607
+ def __enter__(self):
608
+ return self
609
+
610
+ def __exit__(self, exc_type, exc_val, exc_tb):
611
+ self.close()
612
+
613
+ def __repr__(self):
614
+ return f"SciexWiff2FileReader(file='{self.filename}', samples={self.sample_count}, method={self.initialization_method})"
615
+
616
+
617
+ class SciexWiffFileReader:
618
+ """
619
+ Direct implementation of Sciex WIFF file reader using the DLLs without alpharaw dependency.
620
+ """
621
+
622
+ def __init__(self, filename: str):
623
+ if not HAS_DOTNET:
624
+ raise ValueError(
625
+ "Dotnet-based dependencies are required for reading Sciex files. "
626
+ "Do you have pythonnet and/or mono installed? "
627
+ "See the alpharaw documentation for details.",
628
+ )
629
+
630
+ self._wiffDataProvider = AnalystWiffDataProvider()
631
+ self._wiff_file = AnalystDataProviderFactory.CreateBatch(
632
+ filename,
633
+ self._wiffDataProvider,
634
+ )
635
+ self.sample_names = self._wiff_file.GetSampleNames()
636
+
637
+ def close(self):
638
+ """Close the file and clean up resources."""
639
+ self._wiffDataProvider.Close()
640
+
641
+ def load_sample(
642
+ self,
643
+ sample_id: int,
644
+ centroid: bool = True,
645
+ centroid_ppm: float = 20.0,
646
+ ignore_empty_scans: bool = True,
647
+ keep_k_peaks: int = 2000,
648
+ ) -> dict[str, Any]:
649
+ """
650
+ Load a sample from the WIFF file and extract spectral data.
651
+
652
+ Parameters
653
+ ----------
654
+ sample_id : int
655
+ ID of the sample to load
656
+ centroid : bool
657
+ Whether to centroid the data
658
+ centroid_ppm : float
659
+ PPM tolerance for centroiding
660
+ ignore_empty_scans : bool
661
+ Whether to skip empty scans
662
+ keep_k_peaks : int
663
+ Maximum number of peaks to keep per spectrum
664
+
665
+ Returns
666
+ -------
667
+ dict
668
+ Dictionary containing spectral data
669
+ """
670
+ if sample_id < 0 or sample_id >= len(self.sample_names):
671
+ raise ValueError("Incorrect sample number.")
672
+
673
+ self.wiffSample = self._wiff_file.GetSample(sample_id)
674
+ self.msSample = self.wiffSample.MassSpectrometerSample
675
+
676
+ _peak_indices: list[int] = []
677
+ peak_mz_array_list: list[np.ndarray] = []
678
+ peak_intensity_array_list: list[np.ndarray] = []
679
+ rt_list: list[float] = []
680
+ ms_level_list: list[int] = []
681
+ precursor_mz_list: list[float] = []
682
+ precursor_charge_list: list[int] = []
683
+ ce_list: list[float] = []
684
+ isolation_lower_mz_list: list[float] = []
685
+ isolation_upper_mz_list: list[float] = []
686
+
687
+ exp_list = [self.msSample.GetMSExperiment(i) for i in range(self.msSample.ExperimentCount)]
688
+
689
+ for j in range(exp_list[0].Details.NumberOfScans):
690
+ for i in range(self.msSample.ExperimentCount):
691
+ exp = exp_list[i]
692
+ mass_spectrum = exp.GetMassSpectrum(j)
693
+ mass_spectrum_info = exp.GetMassSpectrumInfo(j)
694
+ details = exp.Details
695
+ ms_level = mass_spectrum_info.MSLevel
696
+
697
+ if ms_level > 1 and not details.IsSwath and mass_spectrum.NumDataPoints <= 0 and ignore_empty_scans:
698
+ continue
699
+
700
+ mz_array = dot_net_array_to_np_array(mass_spectrum.GetActualXValues())
701
+ int_array = dot_net_array_to_np_array(
702
+ mass_spectrum.GetActualYValues(),
703
+ ).astype(np.float32)
704
+
705
+ if centroid:
706
+ (mz_array, int_array) = naive_centroid(
707
+ mz_array,
708
+ int_array,
709
+ centroiding_ppm=centroid_ppm,
710
+ )
711
+
712
+ if len(mz_array) > keep_k_peaks:
713
+ idxes = np.argsort(int_array)[-keep_k_peaks:]
714
+ idxes = np.sort(idxes)
715
+ mz_array = mz_array[idxes]
716
+ int_array = int_array[idxes]
717
+
718
+ peak_mz_array_list.append(mz_array)
719
+ peak_intensity_array_list.append(int_array)
720
+
721
+ _peak_indices.append(len(peak_mz_array_list[-1]))
722
+ rt_list.append(exp.GetRTFromExperimentCycle(j))
723
+
724
+ ms_level_list.append(ms_level)
725
+
726
+ center_mz = -1.0
727
+ isolation_window = 0.0
728
+
729
+ if ms_level > 1:
730
+ if details.IsSwath and details.MassRangeInfo.Length > 0:
731
+ center_mz = DotNetWiffOps.get_center_mz(details)
732
+ isolation_window = DotNetWiffOps.get_isolation_window(details)
733
+ if isolation_window <= 0:
734
+ isolation_window = 3.0
735
+ if center_mz <= 0:
736
+ center_mz = mass_spectrum_info.ParentMZ
737
+ precursor_mz_list.append(center_mz)
738
+ precursor_charge_list.append(mass_spectrum_info.ParentChargeState)
739
+ ce_list.append(float(mass_spectrum_info.CollisionEnergy))
740
+ isolation_lower_mz_list.append(center_mz - isolation_window / 2)
741
+ isolation_upper_mz_list.append(center_mz + isolation_window / 2)
742
+ else:
743
+ precursor_mz_list.append(-1.0)
744
+ precursor_charge_list.append(0)
745
+ ce_list.append(0.0)
746
+ isolation_lower_mz_list.append(-1.0)
747
+ isolation_upper_mz_list.append(-1.0)
748
+
749
+ peak_indices = np.empty(len(rt_list) + 1, np.int64)
750
+ peak_indices[0] = 0
751
+ peak_indices[1:] = np.cumsum(_peak_indices)
752
+
753
+ return {
754
+ "peak_indices": peak_indices,
755
+ "peak_mz": np.concatenate(peak_mz_array_list),
756
+ "peak_intensity": np.concatenate(peak_intensity_array_list),
757
+ "rt": np.array(rt_list, dtype=np.float64),
758
+ "ms_level": np.array(ms_level_list, dtype=np.int8),
759
+ "precursor_mz": np.array(precursor_mz_list, dtype=np.float64),
760
+ "precursor_charge": np.array(precursor_charge_list, dtype=np.int8),
761
+ "isolation_lower_mz": np.array(isolation_lower_mz_list),
762
+ "isolation_upper_mz": np.array(isolation_upper_mz_list),
763
+ "nce": np.array(ce_list, dtype=np.float32),
764
+ }
765
+
766
+
767
+ class SciexWiffData:
768
+ """
769
+ Standalone Sciex WIFF data reader class that mimics alpharaw.sciex.SciexWiffData
770
+ functionality but uses DLLs directly without importing from alpharaw.
771
+ """
772
+
773
+ # Column data types mapping
774
+ column_dtypes: ClassVar[dict[str, Any]] = {
775
+ "rt": np.float64,
776
+ "ms_level": np.int8,
777
+ "precursor_mz": np.float64,
778
+ "isolation_lower_mz": np.float64,
779
+ "isolation_upper_mz": np.float64,
780
+ "precursor_charge": np.int8,
781
+ "nce": np.float32,
782
+ "injection_time": np.float32,
783
+ "activation": "U",
784
+ }
785
+
786
+ def __init__(self, centroided: bool = True, save_as_hdf: bool = False, **kwargs):
787
+ """
788
+ Parameters
789
+ ----------
790
+ centroided : bool, optional
791
+ If peaks will be centroided after loading, by default True.
792
+ save_as_hdf : bool, optional
793
+ Automatically save hdf after load raw data, by default False.
794
+ """
795
+ self.spectrum_df: pd.DataFrame = pd.DataFrame()
796
+ self.peak_df: pd.DataFrame = pd.DataFrame()
797
+ self._raw_file_path = ""
798
+ self.centroided = centroided
799
+ self._save_as_hdf = save_as_hdf
800
+ self.creation_time = ""
801
+ self.file_type = "sciex"
802
+ self.instrument = "sciex"
803
+
804
+ if self.centroided:
805
+ self.centroided = False
806
+ warnings.warn(
807
+ "Centroiding for Sciex data is not well implemented yet",
808
+ stacklevel=2,
809
+ )
810
+
811
+ self.centroid_ppm = 20.0
812
+ self.ignore_empty_scans = True
813
+ self.keep_k_peaks_per_spec = 2000
814
+ self.sample_id = 0
815
+
816
+ @property
817
+ def raw_file_path(self) -> str:
818
+ """Get the raw file path."""
819
+ return self._raw_file_path
820
+
821
+ @raw_file_path.setter
822
+ def raw_file_path(self, value: str):
823
+ """Set the raw file path."""
824
+ self._raw_file_path = value
825
+
826
+ def import_raw(self, wiff_file_path: str) -> None:
827
+ """
828
+ Import raw data from a WIFF file.
829
+
830
+ Parameters
831
+ ----------
832
+ wiff_file_path : str
833
+ Path to the WIFF file
834
+ """
835
+ self.raw_file_path = wiff_file_path
836
+ data_dict = self._import(wiff_file_path)
837
+ self._set_dataframes(data_dict)
838
+
839
+ def _import(self, _wiff_file_path: str) -> dict[str, Any]:
840
+ """
841
+ Implementation of data import interface.
842
+
843
+ Parameters
844
+ ----------
845
+ _wiff_file_path : str
846
+ Absolute or relative path of the sciex wiff file.
847
+
848
+ Returns
849
+ -------
850
+ dict
851
+ Spectrum information dict.
852
+ """
853
+ wiff_reader = SciexWiffFileReader(_wiff_file_path)
854
+ data_dict = wiff_reader.load_sample(
855
+ self.sample_id,
856
+ centroid=self.centroided,
857
+ centroid_ppm=self.centroid_ppm,
858
+ ignore_empty_scans=self.ignore_empty_scans,
859
+ keep_k_peaks=self.keep_k_peaks_per_spec,
860
+ )
861
+ self.creation_time = wiff_reader.wiffSample.Details.AcquisitionDateTime.ToString("O")
862
+ wiff_reader.close()
863
+ return data_dict
864
+
865
+ def _set_dataframes(self, raw_data: dict[str, Any]) -> None:
866
+ """
867
+ Set the spectrum and peak dataframes from raw data dictionary.
868
+
869
+ Parameters
870
+ ----------
871
+ raw_data : dict
872
+ Dictionary containing the raw spectral data
873
+ """
874
+ self.create_spectrum_df(len(raw_data["rt"]))
875
+ self.set_peak_df_by_indexed_array(
876
+ raw_data["peak_mz"],
877
+ raw_data["peak_intensity"],
878
+ raw_data["peak_indices"][:-1],
879
+ raw_data["peak_indices"][1:],
880
+ )
881
+
882
+ for col, val in raw_data.items():
883
+ if col in self.column_dtypes:
884
+ if self.column_dtypes[col] == "O":
885
+ self.spectrum_df[col] = list(val)
886
+ else:
887
+ self.spectrum_df[col] = np.array(val, dtype=self.column_dtypes[col])
888
+
889
+ def create_spectrum_df(self, spectrum_num: int) -> None:
890
+ """
891
+ Create an empty spectrum dataframe from the number of spectra.
892
+
893
+ Parameters
894
+ ----------
895
+ spectrum_num : int
896
+ The number of spectra.
897
+ """
898
+ self.spectrum_df = pd.DataFrame(index=np.arange(spectrum_num, dtype=np.int64))
899
+ self.spectrum_df["spec_idx"] = self.spectrum_df.index.values
900
+
901
+ def set_peak_df_by_indexed_array(
902
+ self,
903
+ mz_array: np.ndarray,
904
+ intensity_array: np.ndarray,
905
+ peak_start_indices: np.ndarray,
906
+ peak_stop_indices: np.ndarray,
907
+ ) -> None:
908
+ """
909
+ Set peak dataframe using indexed arrays.
910
+
911
+ Parameters
912
+ ----------
913
+ mz_array : np.ndarray
914
+ Array of m/z values
915
+ intensity_array : np.ndarray
916
+ Array of intensity values
917
+ peak_start_indices : np.ndarray
918
+ Array of start indices for each spectrum
919
+ peak_stop_indices : np.ndarray
920
+ Array of stop indices for each spectrum
921
+ """
922
+ self.peak_df = pd.DataFrame()
923
+ self.peak_df["mz"] = mz_array.astype(np.float64)
924
+ self.peak_df["intensity"] = intensity_array.astype(np.float32)
925
+
926
+ # Set peak start and stop indices in spectrum df
927
+ self.spectrum_df["peak_start_idx"] = peak_start_indices
928
+ self.spectrum_df["peak_stop_idx"] = peak_stop_indices
929
+
930
+ def get_peaks(self, spec_idx: int) -> tuple[np.ndarray, np.ndarray]:
931
+ """
932
+ Get peaks for a specific spectrum.
933
+
934
+ Parameters
935
+ ----------
936
+ spec_idx : int
937
+ Spectrum index
938
+
939
+ Returns
940
+ -------
941
+ tuple
942
+ (mz_array, intensity_array)
943
+ """
944
+ start, end = self.spectrum_df[["peak_start_idx", "peak_stop_idx"]].values[
945
+ spec_idx,
946
+ :,
947
+ ]
948
+ return (
949
+ self.peak_df.mz.values[start:end],
950
+ self.peak_df.intensity.values[start:end],
951
+ )
952
+
953
+ def save_hdf(self, hdf_file_path: str) -> None:
954
+ """
955
+ Save data to HDF5 file (placeholder implementation).
956
+
957
+ Parameters
958
+ ----------
959
+ hdf_file_path : str
960
+ Path to save the HDF5 file
961
+ """
962
+ # This would require implementing HDF5 saving functionality
963
+ # For now, just save as pickle or implement as needed
964
+ import pickle
965
+
966
+ with open(hdf_file_path.replace(".hdf", ".pkl"), "wb") as f:
967
+ pickle.dump(
968
+ {
969
+ "spectrum_df": self.spectrum_df,
970
+ "peak_df": self.peak_df,
971
+ "creation_time": self.creation_time,
972
+ "raw_file_path": self.raw_file_path,
973
+ "file_type": self.file_type,
974
+ "centroided": self.centroided,
975
+ "instrument": self.instrument,
976
+ },
977
+ f,
978
+ )
979
+
980
+ def __repr__(self) -> str:
981
+ return f"SciexWiffData(file_path='{self.raw_file_path}', spectra={len(self.spectrum_df)})"
982
+
983
+
984
+ # Convenience functions to maintain compatibility with existing code
985
+ def load_wiff_file(filename: str, **kwargs) -> SciexWiffData:
986
+ """
987
+ Load a WIFF file and return a SciexWiffData object.
988
+
989
+ Parameters
990
+ ----------
991
+ filename : str
992
+ Path to the WIFF file
993
+ **kwargs
994
+ Additional arguments to pass to SciexWiffData constructor
995
+
996
+ Returns
997
+ -------
998
+ SciexWiffData
999
+ Loaded WIFF data object
1000
+ """
1001
+ wiff_data = SciexWiffData(**kwargs)
1002
+ wiff_data.import_raw(filename)
1003
+ return wiff_data
1004
+
1005
+
1006
+ def load_wiff2_file(filename: str, **kwargs) -> dict[str, Any]:
1007
+ """
1008
+ Load a WIFF2 file and return spectral data.
1009
+
1010
+ Note: WIFF2 format support is limited with current DLL versions.
1011
+ If you encounter format incompatibility errors, try using the regular
1012
+ WIFF file instead or convert WIFF2 to WIFF format.
1013
+
1014
+ Parameters
1015
+ ----------
1016
+ filename : str
1017
+ Path to the WIFF2 file
1018
+ **kwargs
1019
+ Additional arguments for WIFF2 loading (sample_id, centroid, etc.)
1020
+
1021
+ Returns
1022
+ -------
1023
+ dict
1024
+ Spectral data dictionary with enhanced WIFF2 information
1025
+
1026
+ Raises
1027
+ ------
1028
+ RuntimeError
1029
+ If WIFF2 format is not supported by current DLL combination
1030
+ """
1031
+ sample_id = kwargs.pop('sample_id', 0)
1032
+
1033
+ try:
1034
+ with SciexWiff2FileReader(filename) as reader:
1035
+ return reader.load_sample(sample_id, **kwargs) # type: ignore[no-any-return]
1036
+ except RuntimeError as e:
1037
+ if "format is not supported" in str(e):
1038
+ # Suggest using regular WIFF file if available
1039
+ wiff_file = filename.replace('.wiff2', '.wiff')
1040
+ if os.path.exists(wiff_file):
1041
+ raise RuntimeError(
1042
+ f"WIFF2 format not supported. However, a regular WIFF file was found: "
1043
+ f"'{wiff_file}'. Try using load_wiff_file('{wiff_file}') instead."
1044
+ ) from e
1045
+ else:
1046
+ raise RuntimeError(
1047
+ f"WIFF2 format not supported and no corresponding WIFF file found. "
1048
+ f"Original error: {e}"
1049
+ ) from e
1050
+ else:
1051
+ raise
1052
+
1053
+
1054
+ def load_wiff_file_smart(filename: str, **kwargs) -> dict[str, Any] | SciexWiffData:
1055
+ """
1056
+ Smart WIFF file loader that automatically handles WIFF and WIFF2 formats.
1057
+
1058
+ This function will first try to load the file as specified, and if it's a WIFF2
1059
+ file that fails due to format incompatibility, it will suggest alternatives.
1060
+
1061
+ Parameters
1062
+ ----------
1063
+ filename : str
1064
+ Path to the WIFF or WIFF2 file
1065
+ **kwargs
1066
+ Additional arguments for loading (sample_id, centroid, etc.)
1067
+
1068
+ Returns
1069
+ -------
1070
+ dict
1071
+ Spectral data dictionary
1072
+ """
1073
+ if filename.lower().endswith('.wiff2'):
1074
+ try:
1075
+ return load_wiff2_file(filename, **kwargs)
1076
+ except RuntimeError as e:
1077
+ if "format is not supported" in str(e):
1078
+ # Check if regular WIFF file exists
1079
+ wiff_file = filename.replace('.wiff2', '.wiff')
1080
+ if os.path.exists(wiff_file):
1081
+ warnings.warn(
1082
+ f"WIFF2 format not supported, falling back to WIFF file: {wiff_file}",
1083
+ stacklevel=2
1084
+ )
1085
+ return load_wiff_file(wiff_file, **kwargs)
1086
+ raise
1087
+ else:
1088
+ return load_wiff_file(filename, **kwargs)
1089
+
1090
+
1091
+ def get_sample_names(filename: str) -> list:
1092
+ """
1093
+ Get the sample names from a WIFF file.
1094
+
1095
+ Parameters
1096
+ ----------
1097
+ filename : str
1098
+ Path to the WIFF file
1099
+
1100
+ Returns
1101
+ -------
1102
+ list
1103
+ List of sample names
1104
+ """
1105
+ reader = SciexWiffFileReader(filename)
1106
+ try:
1107
+ return list(reader.sample_names)
1108
+ finally:
1109
+ reader.close()
1110
+
1111
+
1112
+ def get_wiff2_sample_names(filename: str) -> list:
1113
+ """
1114
+ Get the sample names from a WIFF2 file.
1115
+
1116
+ Parameters
1117
+ ----------
1118
+ filename : str
1119
+ Path to the WIFF2 file
1120
+
1121
+ Returns
1122
+ -------
1123
+ list
1124
+ List of sample names
1125
+ """
1126
+ with SciexWiff2FileReader(filename) as reader:
1127
+ return list(reader.sample_names)
1128
+
1129
+
1130
+ def get_wiff2_metadata(filename: str) -> dict[str, Any]:
1131
+ """
1132
+ Get comprehensive metadata from a WIFF2 file.
1133
+
1134
+ Parameters
1135
+ ----------
1136
+ filename : str
1137
+ Path to the WIFF2 file
1138
+
1139
+ Returns
1140
+ -------
1141
+ dict
1142
+ Comprehensive WIFF2 file metadata
1143
+ """
1144
+ with SciexWiff2FileReader(filename) as reader:
1145
+ return reader.get_file_metadata() # type: ignore[no-any-return]
1146
+
1147
+
1148
+ # Example usage and testing
1149
+ if __name__ == "__main__":
1150
+ print("Standalone Sciex WIFF reader implementation")
1151
+ print("Usage example:")
1152
+ print("""
1153
+ from sciex import SciexWiffData, load_wiff_file
1154
+
1155
+ # Create reader instance
1156
+ wiff_data = SciexWiffData(centroided=False)
1157
+ wiff_data.import_raw("path/to/file.wiff")
1158
+
1159
+ # Or use convenience function
1160
+ wiff_data = load_wiff_file("path/to/file.wiff")
1161
+
1162
+ # Access spectrum and peak data
1163
+ print(f"Number of spectra: {len(wiff_data.spectrum_df)}")
1164
+ print(f"Number of peaks: {len(wiff_data.peak_df)}")
1165
+
1166
+ # Get peaks for first spectrum
1167
+ mz, intensity = wiff_data.get_peaks(0)
1168
+ """)
1169
+
1170
+ # Test that the module can be imported and classes instantiated
1171
+ try:
1172
+ test_data = SciexWiffData()
1173
+ print(f"✓ SciexWiffData class instantiated successfully: {test_data}")
1174
+ print(f"✓ Has dotnet support: {HAS_DOTNET}")
1175
+
1176
+ # Test with example WIFF file if available
1177
+ example_file = os.path.join(
1178
+ os.path.dirname(__file__),
1179
+ "data",
1180
+ "examples",
1181
+ "2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.wiff",
1182
+ )
1183
+
1184
+ if os.path.exists(example_file):
1185
+ print(f"\n✓ Found example WIFF file: {example_file}")
1186
+ print("Testing WIFF file loading...")
1187
+
1188
+ # Test loading the example file
1189
+ wiff_data = load_wiff_file(example_file)
1190
+ print("✓ Successfully loaded WIFF file")
1191
+ print(f" - Number of spectra: {len(wiff_data.spectrum_df)}")
1192
+ print(f" - Number of peaks: {len(wiff_data.peak_df)}")
1193
+ print(f" - Creation time: {wiff_data.creation_time}")
1194
+ print(f" - File type: {wiff_data.file_type}")
1195
+ print(f" - Instrument: {wiff_data.instrument}")
1196
+
1197
+ # Test getting peaks from first spectrum
1198
+ if len(wiff_data.spectrum_df) > 0:
1199
+ mz, intensity = wiff_data.get_peaks(0)
1200
+ print(f" - First spectrum has {len(mz)} peaks")
1201
+ if len(mz) > 0:
1202
+ print(f" - m/z range: {mz.min():.2f} - {mz.max():.2f}")
1203
+ print(
1204
+ f" - Intensity range: {intensity.min():.0f} - {intensity.max():.0f}",
1205
+ )
1206
+ else:
1207
+ print(f"\n⚠ Example WIFF file not found at: {example_file}")
1208
+
1209
+ except Exception as e:
1210
+ print(f"✗ Error during testing: {e}")
1211
+ import traceback
1212
+
1213
+ traceback.print_exc()