masster 0.5.16__py3-none-any.whl → 0.5.18__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of masster might be problematic. Click here for more details.

masster/sample/sciex.py CHANGED
@@ -2,12 +2,11 @@
2
2
  Standalone Sciex WIFF file reader module.
3
3
 
4
4
  This module provides a standalone implementation of Sciex WIFF file reading
5
- functionality that uses the DLLs from alpharaw's ext/sciex directory directly
6
- without importing from the alpharaw package.
5
+ functionality that uses the Sciex DLLs directly.
7
6
 
8
7
  Requirements:
9
8
  - pythonnet (pip install pythonnet)
10
- - alpharaw package must be installed to access the DLLs in site-packages/alpharaw/ext/sciex/
9
+ - Sciex DLLs must be available in the ext/sciex directory
11
10
  - On Linux/macOS: mono runtime must be installed
12
11
 
13
12
  The .NET imports (System, Clearcore2, WiffOps4Python) will only work when
@@ -32,6 +31,20 @@ def naive_centroid(
32
31
  ) -> tuple[np.ndarray, np.ndarray]:
33
32
  """
34
33
  Simplified naive centroiding implementation.
34
+
35
+ Parameters
36
+ ----------
37
+ peak_mzs : np.ndarray
38
+ Array of m/z values
39
+ peak_intensities : np.ndarray
40
+ Array of intensity values
41
+ centroiding_ppm : float, default 20.0
42
+ PPM tolerance for combining peaks
43
+
44
+ Returns
45
+ -------
46
+ tuple[np.ndarray, np.ndarray]
47
+ Centroided m/z and intensity arrays
35
48
  """
36
49
  if len(peak_mzs) == 0:
37
50
  return np.array([]), np.array([])
@@ -134,10 +147,24 @@ except Exception as e:
134
147
  HAS_DOTNET = False
135
148
 
136
149
 
137
- def dot_net_array_to_np_array(src):
150
+ def dot_net_array_to_np_array(src) -> np.ndarray:
138
151
  """
139
152
  Convert .NET array to NumPy array.
140
- See https://mail.python.org/pipermail/pythondotnet/2014-May/001527.html
153
+
154
+ Parameters
155
+ ----------
156
+ src : .NET array or None
157
+ Source .NET array to convert
158
+
159
+ Returns
160
+ -------
161
+ np.ndarray
162
+ Converted NumPy array
163
+
164
+ Notes
165
+ -----
166
+ Based on approach from:
167
+ https://mail.python.org/pipermail/pythondotnet/2014-May/001527.html
141
168
  """
142
169
  if src is None:
143
170
  return np.array([], dtype=np.float64)
@@ -153,482 +180,9 @@ def dot_net_array_to_np_array(src):
153
180
  return dest # noqa: B012
154
181
 
155
182
 
156
- class SciexWiff2FileReader:
157
- """
158
- Specialized reader for Sciex WIFF2 files using optimal DLL combination.
159
-
160
- WIFF2 is a newer format from Sciex that may have enhanced capabilities
161
- compared to the original WIFF format. This reader is optimized specifically
162
- for WIFF2 files and uses the most appropriate DLLs for maximum information extraction.
163
-
164
- Based on comprehensive DLL analysis, WIFF2 files require specific handling and
165
- may use different underlying storage mechanisms than regular WIFF files.
166
- """
167
-
168
- def __init__(self, filename: str):
169
- """
170
- Initialize WIFF2 reader with file path.
171
-
172
- Parameters
173
- ----------
174
- filename : str
175
- Path to the WIFF2 file
176
- """
177
- if not HAS_DOTNET:
178
- raise ValueError(
179
- "Dotnet-based dependencies are required for reading Sciex WIFF2 files. "
180
- "Install pythonnet and ensure alpharaw DLLs are available.",
181
- )
182
-
183
- self.filename = filename
184
- self.ext_dir = self._find_dll_directory()
185
- self._ensure_wiff2_dlls_loaded()
186
-
187
- # Try different initialization strategies for WIFF2
188
- self._initialize_wiff2_reader()
189
-
190
- def _find_dll_directory(self):
191
- """Find the alpharaw DLL directory using the same discovery pattern."""
192
- for site_dir in site.getsitepackages():
193
- potential_ext_dir = os.path.join(site_dir, "alpharaw", "ext", "sciex")
194
- if os.path.exists(potential_ext_dir):
195
- return potential_ext_dir
196
-
197
- # Fallback to alpharaw module location
198
- try:
199
- import alpharaw
200
-
201
- alpharaw_dir = os.path.dirname(alpharaw.__file__)
202
- return os.path.join(alpharaw_dir, "ext", "sciex")
203
- except ImportError:
204
- raise ImportError("Could not find alpharaw DLL directory")
205
-
206
- def _ensure_wiff2_dlls_loaded(self):
207
- """Ensure all necessary WIFF2 DLLs are loaded."""
208
- # Key DLLs identified through comprehensive analysis
209
- required_dlls = [
210
- "Clearcore2.Data.Wiff2.dll", # Primary WIFF2 support
211
- "Clearcore2.Data.AnalystDataProvider.dll",
212
- "Clearcore2.Data.dll",
213
- "Clearcore2.Data.Common.dll",
214
- "Clearcore2.Data.Core.dll",
215
- "Clearcore2.StructuredStorage.dll", # For WIFF2 storage format
216
- "WiffOps4Python.dll",
217
- ]
218
-
219
- for dll in required_dlls:
220
- dll_path = os.path.join(self.ext_dir, dll)
221
- if os.path.exists(dll_path):
222
- try:
223
- clr.AddReference(dll_path)
224
- except:
225
- pass # May already be loaded
226
- else:
227
- warnings.warn(f"WIFF2 DLL not found: {dll}", stacklevel=2)
228
-
229
- def _initialize_wiff2_reader(self):
230
- """
231
- Initialize WIFF2 reader with fallback strategies.
232
-
233
- WIFF2 files may require different initialization approaches than WIFF files.
234
- We try multiple strategies based on the comprehensive DLL analysis.
235
- """
236
- initialization_errors = []
237
-
238
- # Strategy 1: Try standard AnalystDataProvider (may work for some WIFF2)
239
- try:
240
- from Clearcore2.Data.AnalystDataProvider import AnalystDataProviderFactory
241
- from Clearcore2.Data.AnalystDataProvider import AnalystWiffDataProvider
242
-
243
- self._wiffDataProvider = AnalystWiffDataProvider()
244
- self._wiff_file = AnalystDataProviderFactory.CreateBatch(
245
- self.filename,
246
- self._wiffDataProvider,
247
- )
248
-
249
- self.sample_names = self._wiff_file.GetSampleNames()
250
- self.sample_count = len(self.sample_names)
251
- self.initialization_method = "AnalystDataProvider"
252
- return
253
-
254
- except Exception as e:
255
- initialization_errors.append(f"AnalystDataProvider: {e}")
256
-
257
- # Strategy 2: Try alpharaw's SciexWiffData (correct API)
258
- try:
259
- from alpharaw.sciex import SciexWiffData
260
-
261
- self._alpharaw_reader = SciexWiffData()
262
- self._alpharaw_reader.import_raw(self.filename)
263
-
264
- # Extract basic information (SciexWiffData doesn't have sample_names property)
265
- self.sample_names = ["Sample_0"] # Default since WIFF2 format needs investigation
266
- self.sample_count = 1
267
- self.initialization_method = "alpharaw_SciexWiffData"
268
-
269
- # Store the reader for later use
270
- self._wiff_data = self._alpharaw_reader
271
- return
272
-
273
- except Exception as e:
274
- initialization_errors.append(f"alpharaw_SciexWiffData: {e}")
275
-
276
- # Strategy 3: Try direct WIFF2 DLL approach
277
- try:
278
- # Check if file is recognized as WIFF2
279
- from Clearcore2.Data.AnalystDataProvider import DataProviderHelper
280
-
281
- is_wiff2 = DataProviderHelper.IsMdWiffFile(self.filename)
282
- if is_wiff2:
283
- # Try specialized WIFF2 handling
284
- warnings.warn(
285
- "File detected as WIFF2 format but specialized reader not fully implemented. "
286
- "Consider using alpharaw.ms_data_from_file() directly.",
287
- stacklevel=2,
288
- )
289
- # For now, fall back to treating as regular WIFF with enhanced parameters
290
- self._initialize_as_enhanced_wiff()
291
- return
292
-
293
- except Exception as e:
294
- initialization_errors.append(f"WIFF2 detection: {e}")
295
-
296
- # If all strategies fail, provide comprehensive error information with helpful suggestions
297
- error_summary = "; ".join(initialization_errors)
298
-
299
- # Check if this is a WIFF2 format issue specifically
300
- if "could not be opened (result = -2147286960)" in error_summary:
301
- raise RuntimeError(
302
- f"WIFF2 file format is not supported by the current DLL combination. "
303
- f"Error code -2147286960 (0x80030050) indicates format incompatibility. "
304
- f"The file '{self.filename}' appears to be a valid WIFF2 file but requires "
305
- f"newer or different DLLs than currently available. "
306
- f"Try converting the WIFF2 file to WIFF format or use alternative tools. "
307
- f"Full error details: {error_summary}",
308
- )
309
- else:
310
- raise RuntimeError(
311
- f"Failed to initialize WIFF2 reader with any strategy. "
312
- f"Errors: {error_summary}. "
313
- f"The file may be corrupted, locked, or require different dependencies.",
314
- )
315
-
316
- def _initialize_as_enhanced_wiff(self):
317
- """Fallback: Initialize as enhanced WIFF with WIFF2-optimized parameters."""
318
- # Use the same initialization as regular WIFF but with warnings
319
- try:
320
- from Clearcore2.Data.AnalystDataProvider import AnalystDataProviderFactory
321
- from Clearcore2.Data.AnalystDataProvider import AnalystWiffDataProvider
322
-
323
- self._wiffDataProvider = AnalystWiffDataProvider()
324
- self._wiff_file = AnalystDataProviderFactory.CreateBatch(
325
- self.filename,
326
- self._wiffDataProvider,
327
- )
328
-
329
- self.sample_names = self._wiff_file.GetSampleNames()
330
- self.sample_count = len(self.sample_names)
331
- self.initialization_method = "enhanced_wiff_fallback"
332
-
333
- warnings.warn(
334
- "WIFF2 file opened using WIFF reader fallback. Some WIFF2-specific features may not be available.",
335
- stacklevel=2,
336
- )
337
-
338
- except Exception as e:
339
- raise RuntimeError(f"Enhanced WIFF fallback also failed: {e}")
340
-
341
- def get_file_metadata(self) -> dict[str, Any]:
342
- """Get comprehensive file metadata for WIFF2 format."""
343
- metadata: dict[str, Any] = {
344
- "format": "WIFF2",
345
- "sample_count": self.sample_count,
346
- "sample_names": list(self.sample_names),
347
- "file_size": os.path.getsize(self.filename),
348
- "file_path": self.filename,
349
- "initialization_method": self.initialization_method,
350
- "samples": [], # Initialize samples list
351
- }
352
-
353
- if self.initialization_method == "alpharaw":
354
- # Get metadata from alpharaw reader
355
- try:
356
- if hasattr(self._alpharaw_reader, "get_spectrum_count"):
357
- metadata["total_spectra"] = self._alpharaw_reader.get_spectrum_count()
358
-
359
- # Add alpharaw-specific metadata
360
- for attr in ["creation_time", "instrument_model", "ms_levels"]:
361
- if hasattr(self._alpharaw_reader, attr):
362
- try:
363
- value = getattr(self._alpharaw_reader, attr)
364
- if callable(value):
365
- metadata[attr] = value()
366
- else:
367
- metadata[attr] = value
368
- except:
369
- pass
370
-
371
- except Exception as e:
372
- metadata["metadata_error"] = str(e)
373
-
374
- elif hasattr(self, "_wiff_file"):
375
- # Get metadata from standard WIFF reader
376
- try:
377
- for i in range(self.sample_count):
378
- sample = self._wiff_file.GetSample(i)
379
- sample_info = {
380
- "index": i,
381
- "name": str(self.sample_names[i]),
382
- }
383
-
384
- if hasattr(sample, "Details"):
385
- details = sample.Details
386
- if hasattr(details, "AcquisitionDateTime"):
387
- sample_info["acquisition_time"] = str(details.AcquisitionDateTime.ToString("O"))
388
-
389
- if hasattr(sample, "MassSpectrometerSample"):
390
- ms_sample = sample.MassSpectrometerSample
391
- sample_info["experiment_count"] = ms_sample.ExperimentCount
392
-
393
- metadata["samples"].append(sample_info)
394
-
395
- except Exception as e:
396
- metadata["metadata_error"] = str(e)
397
-
398
- return metadata
399
-
400
- def load_sample(self, sample_id: int = 0, **kwargs):
401
- """
402
- Load sample data with WIFF2-optimized settings.
403
-
404
- Parameters
405
- ----------
406
- sample_id : int
407
- Sample index to load
408
- **kwargs
409
- Additional parameters for data loading
410
-
411
- Returns
412
- -------
413
- dict
414
- Comprehensive spectral data dictionary
415
- """
416
- if self.initialization_method == "alpharaw":
417
- return self._load_sample_alpharaw(sample_id, **kwargs)
418
- else:
419
- return self._load_sample_standard(sample_id, **kwargs)
420
-
421
- def _load_sample_alpharaw(self, sample_id: int, **kwargs):
422
- """Load sample using alpharaw reader."""
423
- # Enhanced parameters for WIFF2
424
- enhanced_params = {
425
- "centroid": kwargs.get("centroid", True),
426
- "centroid_ppm": kwargs.get("centroid_ppm", 15.0),
427
- "keep_k_peaks": kwargs.get("keep_k_peaks", 3000),
428
- }
429
-
430
- try:
431
- # Use alpharaw's data extraction
432
- spectrum_df = self._alpharaw_reader.spectrum_df
433
- peak_df = self._alpharaw_reader.peak_df
434
-
435
- # Convert to the expected format
436
- spectral_data = {
437
- "peak_indices": spectrum_df[["peak_start_idx", "peak_stop_idx"]].values.flatten(),
438
- "peak_mz": peak_df["mz"].values,
439
- "peak_intensity": peak_df["intensity"].values,
440
- "rt": spectrum_df["rt"].values,
441
- "ms_level": spectrum_df["ms_level"].values,
442
- "precursor_mz": spectrum_df.get("precursor_mz", np.full(len(spectrum_df), -1.0)).values,
443
- "precursor_charge": spectrum_df.get("precursor_charge", np.full(len(spectrum_df), 0)).values,
444
- "isolation_lower_mz": spectrum_df.get("isolation_lower_mz", np.full(len(spectrum_df), -1.0)).values,
445
- "isolation_upper_mz": spectrum_df.get("isolation_upper_mz", np.full(len(spectrum_df), -1.0)).values,
446
- "nce": spectrum_df.get("nce", np.full(len(spectrum_df), 0.0)).values,
447
- "metadata": {
448
- "format": "WIFF2",
449
- "sample_id": sample_id,
450
- "sample_name": str(self.sample_names[sample_id])
451
- if sample_id < len(self.sample_names)
452
- else f"Sample_{sample_id}",
453
- "loading_params": enhanced_params,
454
- "total_spectra": len(spectrum_df),
455
- "total_peaks": len(peak_df),
456
- "ms1_count": np.sum(spectrum_df["ms_level"] == 1),
457
- "ms2_count": np.sum(spectrum_df["ms_level"] > 1),
458
- "rt_range": [float(spectrum_df["rt"].min()), float(spectrum_df["rt"].max())]
459
- if len(spectrum_df) > 0
460
- else [0, 0],
461
- "reader_method": "alpharaw",
462
- },
463
- }
464
-
465
- return spectral_data
466
-
467
- except Exception as e:
468
- raise RuntimeError(f"Failed to load WIFF2 sample via alpharaw: {e}")
469
-
470
- def _load_sample_standard(self, sample_id: int, **kwargs):
471
- """Load sample using standard WIFF reader with WIFF2 enhancements."""
472
- # Use enhanced parameters optimized for WIFF2
473
- enhanced_params = {
474
- "centroid": kwargs.get("centroid", True),
475
- "centroid_ppm": kwargs.get("centroid_ppm", 15.0), # Tighter for WIFF2
476
- "ignore_empty_scans": kwargs.get("ignore_empty_scans", True),
477
- "keep_k_peaks": kwargs.get("keep_k_peaks", 3000), # More peaks for WIFF2
478
- }
479
-
480
- if sample_id < 0 or sample_id >= self.sample_count:
481
- raise ValueError(f"Sample ID {sample_id} out of range (0-{self.sample_count - 1})")
482
-
483
- # Use the same loading approach as SciexWiffFileReader but with enhancements
484
- sample = self._wiff_file.GetSample(sample_id)
485
- ms_sample = sample.MassSpectrometerSample
486
-
487
- # Process data (same as SciexWiffFileReader.load_sample but with enhanced params)
488
- _peak_indices: list[int] = []
489
- peak_mz_list: list[np.ndarray] = []
490
- peak_intensity_list: list[np.ndarray] = []
491
- rt_list: list[float] = []
492
- ms_level_list: list[int] = []
493
- precursor_mz_list: list[float] = []
494
- precursor_charge_list: list[int] = []
495
- nce_list: list[float] = []
496
- isolation_lower_list: list[float] = []
497
- isolation_upper_list: list[float] = []
498
-
499
- exp_list = [ms_sample.GetMSExperiment(i) for i in range(ms_sample.ExperimentCount)]
500
-
501
- for j in range(exp_list[0].Details.NumberOfScans):
502
- for i in range(ms_sample.ExperimentCount):
503
- exp = exp_list[i]
504
- mass_spectrum = exp.GetMassSpectrum(j)
505
- mass_spectrum_info = exp.GetMassSpectrumInfo(j)
506
- details = exp.Details
507
- ms_level = mass_spectrum_info.MSLevel
508
-
509
- if (
510
- ms_level > 1
511
- and not details.IsSwath
512
- and mass_spectrum.NumDataPoints <= 0
513
- and enhanced_params["ignore_empty_scans"]
514
- ):
515
- continue
516
-
517
- mz_array = dot_net_array_to_np_array(mass_spectrum.GetActualXValues())
518
- int_array = dot_net_array_to_np_array(mass_spectrum.GetActualYValues()).astype(np.float32)
519
-
520
- if enhanced_params["centroid"]:
521
- mz_array, int_array = naive_centroid(
522
- mz_array,
523
- int_array,
524
- centroiding_ppm=enhanced_params["centroid_ppm"],
525
- )
526
-
527
- if len(mz_array) > enhanced_params["keep_k_peaks"]:
528
- top_indices = np.argsort(int_array)[-enhanced_params["keep_k_peaks"] :]
529
- top_indices = np.sort(top_indices)
530
- mz_array = mz_array[top_indices]
531
- int_array = int_array[top_indices]
532
-
533
- peak_mz_list.append(mz_array)
534
- peak_intensity_list.append(int_array)
535
- _peak_indices.append(len(peak_mz_list[-1]))
536
-
537
- rt_list.append(exp.GetRTFromExperimentCycle(j))
538
- ms_level_list.append(ms_level)
539
-
540
- # Enhanced precursor handling for WIFF2
541
- center_mz = -1.0
542
- isolation_window = 0.0
543
-
544
- if ms_level > 1:
545
- if details.IsSwath and details.MassRangeInfo.Length > 0:
546
- try:
547
- from WiffOps4Python import WiffOps as DotNetWiffOps
548
-
549
- center_mz = DotNetWiffOps.get_center_mz(details)
550
- isolation_window = DotNetWiffOps.get_isolation_window(details)
551
- except:
552
- center_mz = mass_spectrum_info.ParentMZ
553
- isolation_window = 3.0
554
-
555
- if isolation_window <= 0:
556
- isolation_window = 3.0
557
- if center_mz <= 0:
558
- center_mz = mass_spectrum_info.ParentMZ
559
-
560
- precursor_mz_list.append(center_mz)
561
- precursor_charge_list.append(mass_spectrum_info.ParentChargeState)
562
- nce_list.append(float(mass_spectrum_info.CollisionEnergy))
563
- isolation_lower_list.append(center_mz - isolation_window / 2)
564
- isolation_upper_list.append(center_mz + isolation_window / 2)
565
- else:
566
- precursor_mz_list.append(-1.0)
567
- precursor_charge_list.append(0)
568
- nce_list.append(0.0)
569
- isolation_lower_list.append(-1.0)
570
- isolation_upper_list.append(-1.0)
571
-
572
- # Finalize arrays
573
- peak_indices = np.empty(len(rt_list) + 1, np.int64)
574
- peak_indices[0] = 0
575
- peak_indices[1:] = np.cumsum(_peak_indices)
576
-
577
- return {
578
- "peak_indices": peak_indices,
579
- "peak_mz": np.concatenate(peak_mz_list) if peak_mz_list else np.array([]),
580
- "peak_intensity": np.concatenate(peak_intensity_list) if peak_intensity_list else np.array([]),
581
- "rt": np.array(rt_list, dtype=np.float64),
582
- "ms_level": np.array(ms_level_list, dtype=np.int8),
583
- "precursor_mz": np.array(precursor_mz_list, dtype=np.float64),
584
- "precursor_charge": np.array(precursor_charge_list, dtype=np.int8),
585
- "isolation_lower_mz": np.array(isolation_lower_list, dtype=np.float64),
586
- "isolation_upper_mz": np.array(isolation_upper_list, dtype=np.float64),
587
- "nce": np.array(nce_list, dtype=np.float32),
588
- "metadata": {
589
- "format": "WIFF2",
590
- "sample_id": sample_id,
591
- "sample_name": str(self.sample_names[sample_id]),
592
- "loading_params": enhanced_params,
593
- "total_spectra": len(rt_list),
594
- "total_peaks": sum(_peak_indices),
595
- "ms1_count": np.sum(np.array(ms_level_list) == 1),
596
- "ms2_count": np.sum(np.array(ms_level_list) > 1),
597
- "rt_range": [float(np.min(rt_list)), float(np.max(rt_list))] if rt_list else [0, 0],
598
- "creation_time": str(sample.Details.AcquisitionDateTime.ToString("O"))
599
- if hasattr(sample, "Details")
600
- else "",
601
- "reader_method": "standard_enhanced",
602
- },
603
- }
604
-
605
- def close(self):
606
- """Close the WIFF2 file and clean up resources."""
607
- if hasattr(self, "_wiffDataProvider"):
608
- try:
609
- self._wiffDataProvider.Close()
610
- except:
611
- pass
612
-
613
- if hasattr(self, "_alpharaw_reader"):
614
- try:
615
- self._alpharaw_reader.close()
616
- except:
617
- pass
618
-
619
- def __enter__(self):
620
- return self
621
-
622
- def __exit__(self, exc_type, exc_val, exc_tb):
623
- self.close()
624
-
625
- def __repr__(self):
626
- return f"SciexWiff2FileReader(file='{self.filename}', samples={self.sample_count}, method={self.initialization_method})"
627
-
628
-
629
183
  class SciexWiffFileReader:
630
184
  """
631
- Direct implementation of Sciex WIFF file reader using the DLLs without alpharaw dependency.
185
+ Direct implementation of Sciex WIFF file reader using the Sciex DLLs.
632
186
  """
633
187
 
634
188
  def __init__(self, filename: str):
@@ -636,7 +190,7 @@ class SciexWiffFileReader:
636
190
  raise ValueError(
637
191
  "Dotnet-based dependencies are required for reading Sciex files. "
638
192
  "Do you have pythonnet and/or mono installed? "
639
- "See the alpharaw documentation for details.",
193
+ "Please ensure pythonnet and Sciex DLLs are properly installed.",
640
194
  )
641
195
 
642
196
  self._wiffDataProvider = AnalystWiffDataProvider()
@@ -646,10 +200,18 @@ class SciexWiffFileReader:
646
200
  )
647
201
  self.sample_names = self._wiff_file.GetSampleNames()
648
202
 
649
- def close(self):
203
+ def close(self) -> None:
650
204
  """Close the file and clean up resources."""
651
205
  self._wiffDataProvider.Close()
652
206
 
207
+ def __enter__(self) -> 'SciexWiffFileReader':
208
+ """Context manager entry."""
209
+ return self
210
+
211
+ def __exit__(self, exc_type, exc_val, exc_tb) -> None:
212
+ """Context manager exit."""
213
+ self.close()
214
+
653
215
  def load_sample(
654
216
  self,
655
217
  sample_id: int,
@@ -690,6 +252,7 @@ class SciexWiffFileReader:
690
252
  peak_intensity_array_list: list[np.ndarray] = []
691
253
  rt_list: list[float] = []
692
254
  ms_level_list: list[int] = []
255
+ polarity_list: list[str] = []
693
256
  precursor_mz_list: list[float] = []
694
257
  precursor_charge_list: list[int] = []
695
258
  ce_list: list[float] = []
@@ -708,6 +271,13 @@ class SciexWiffFileReader:
708
271
 
709
272
  if ms_level > 1 and not details.IsSwath and mass_spectrum.NumDataPoints <= 0 and ignore_empty_scans:
710
273
  continue
274
+ if exp.Details.Polarity == exp.Details.Polarity.Positive:
275
+ pol = 'positive'
276
+ elif exp.Details.Polarity == exp.Details.Polarity.Negative:
277
+ pol = 'negative'
278
+ else:
279
+ pol = ''
280
+ polarity_list.append(pol)
711
281
 
712
282
  mz_array = dot_net_array_to_np_array(mass_spectrum.GetActualXValues())
713
283
  int_array = dot_net_array_to_np_array(
@@ -768,6 +338,7 @@ class SciexWiffFileReader:
768
338
  "peak_intensity": np.concatenate(peak_intensity_array_list),
769
339
  "rt": np.array(rt_list, dtype=np.float64),
770
340
  "ms_level": np.array(ms_level_list, dtype=np.int8),
341
+ "polarity": np.array(polarity_list, dtype="U8"),
771
342
  "precursor_mz": np.array(precursor_mz_list, dtype=np.float64),
772
343
  "precursor_charge": np.array(precursor_charge_list, dtype=np.int8),
773
344
  "isolation_lower_mz": np.array(isolation_lower_mz_list),
@@ -778,14 +349,15 @@ class SciexWiffFileReader:
778
349
 
779
350
  class SciexWiffData:
780
351
  """
781
- Standalone Sciex WIFF data reader class that mimics alpharaw.sciex.SciexWiffData
782
- functionality but uses DLLs directly without importing from alpharaw.
352
+ Standalone Sciex WIFF data reader class that provides WIFF data reading
353
+ functionality using Sciex DLLs directly.
783
354
  """
784
355
 
785
356
  # Column data types mapping
786
357
  column_dtypes: ClassVar[dict[str, Any]] = {
787
358
  "rt": np.float64,
788
359
  "ms_level": np.int8,
360
+ "polarity": "U8",
789
361
  "precursor_mz": np.float64,
790
362
  "isolation_lower_mz": np.float64,
791
363
  "isolation_upper_mz": np.float64,
@@ -795,20 +367,17 @@ class SciexWiffData:
795
367
  "activation": "U",
796
368
  }
797
369
 
798
- def __init__(self, centroided: bool = True, save_as_hdf: bool = False, **kwargs):
370
+ def __init__(self, centroided: bool = True) -> None:
799
371
  """
800
372
  Parameters
801
373
  ----------
802
374
  centroided : bool, optional
803
375
  If peaks will be centroided after loading, by default True.
804
- save_as_hdf : bool, optional
805
- Automatically save hdf after load raw data, by default False.
806
376
  """
807
377
  self.spectrum_df: pd.DataFrame = pd.DataFrame()
808
378
  self.peak_df: pd.DataFrame = pd.DataFrame()
809
379
  self._raw_file_path = ""
810
380
  self.centroided = centroided
811
- self._save_as_hdf = save_as_hdf
812
381
  self.creation_time = ""
813
382
  self.file_type = "sciex"
814
383
  self.instrument = "sciex"
@@ -953,42 +522,12 @@ class SciexWiffData:
953
522
  tuple
954
523
  (mz_array, intensity_array)
955
524
  """
956
- start, end = self.spectrum_df[["peak_start_idx", "peak_stop_idx"]].values[
957
- spec_idx,
958
- :,
959
- ]
525
+ start, end = self.spectrum_df.iloc[spec_idx][["peak_start_idx", "peak_stop_idx"]].values
960
526
  return (
961
527
  self.peak_df.mz.values[start:end],
962
528
  self.peak_df.intensity.values[start:end],
963
529
  )
964
530
 
965
- def save_hdf(self, hdf_file_path: str) -> None:
966
- """
967
- Save data to HDF5 file (placeholder implementation).
968
-
969
- Parameters
970
- ----------
971
- hdf_file_path : str
972
- Path to save the HDF5 file
973
- """
974
- # This would require implementing HDF5 saving functionality
975
- # For now, just save as pickle or implement as needed
976
- import pickle
977
-
978
- with open(hdf_file_path.replace(".hdf", ".pkl"), "wb") as f:
979
- pickle.dump(
980
- {
981
- "spectrum_df": self.spectrum_df,
982
- "peak_df": self.peak_df,
983
- "creation_time": self.creation_time,
984
- "raw_file_path": self.raw_file_path,
985
- "file_type": self.file_type,
986
- "centroided": self.centroided,
987
- "instrument": self.instrument,
988
- },
989
- f,
990
- )
991
-
992
531
  def __repr__(self) -> str:
993
532
  return f"SciexWiffData(file_path='{self.raw_file_path}', spectra={len(self.spectrum_df)})"
994
533
 
@@ -1014,92 +553,7 @@ def load_wiff_file(filename: str, **kwargs) -> SciexWiffData:
1014
553
  wiff_data.import_raw(filename)
1015
554
  return wiff_data
1016
555
 
1017
-
1018
- def load_wiff2_file(filename: str, **kwargs) -> dict[str, Any]:
1019
- """
1020
- Load a WIFF2 file and return spectral data.
1021
-
1022
- Note: WIFF2 format support is limited with current DLL versions.
1023
- If you encounter format incompatibility errors, try using the regular
1024
- WIFF file instead or convert WIFF2 to WIFF format.
1025
-
1026
- Parameters
1027
- ----------
1028
- filename : str
1029
- Path to the WIFF2 file
1030
- **kwargs
1031
- Additional arguments for WIFF2 loading (sample_id, centroid, etc.)
1032
-
1033
- Returns
1034
- -------
1035
- dict
1036
- Spectral data dictionary with enhanced WIFF2 information
1037
-
1038
- Raises
1039
- ------
1040
- RuntimeError
1041
- If WIFF2 format is not supported by current DLL combination
1042
- """
1043
- sample_id = kwargs.pop("sample_id", 0)
1044
-
1045
- try:
1046
- with SciexWiff2FileReader(filename) as reader:
1047
- return reader.load_sample(sample_id, **kwargs) # type: ignore[no-any-return]
1048
- except RuntimeError as e:
1049
- if "format is not supported" in str(e):
1050
- # Suggest using regular WIFF file if available
1051
- wiff_file = filename.replace(".wiff2", ".wiff")
1052
- if os.path.exists(wiff_file):
1053
- raise RuntimeError(
1054
- f"WIFF2 format not supported. However, a regular WIFF file was found: "
1055
- f"'{wiff_file}'. Try using load_wiff_file('{wiff_file}') instead.",
1056
- ) from e
1057
- else:
1058
- raise RuntimeError(
1059
- f"WIFF2 format not supported and no corresponding WIFF file found. Original error: {e}",
1060
- ) from e
1061
- else:
1062
- raise
1063
-
1064
-
1065
- def load_wiff_file_smart(filename: str, **kwargs) -> dict[str, Any] | SciexWiffData:
1066
- """
1067
- Smart WIFF file loader that automatically handles WIFF and WIFF2 formats.
1068
-
1069
- This function will first try to load the file as specified, and if it's a WIFF2
1070
- file that fails due to format incompatibility, it will suggest alternatives.
1071
-
1072
- Parameters
1073
- ----------
1074
- filename : str
1075
- Path to the WIFF or WIFF2 file
1076
- **kwargs
1077
- Additional arguments for loading (sample_id, centroid, etc.)
1078
-
1079
- Returns
1080
- -------
1081
- dict
1082
- Spectral data dictionary
1083
- """
1084
- if filename.lower().endswith(".wiff2"):
1085
- try:
1086
- return load_wiff2_file(filename, **kwargs)
1087
- except RuntimeError as e:
1088
- if "format is not supported" in str(e):
1089
- # Check if regular WIFF file exists
1090
- wiff_file = filename.replace(".wiff2", ".wiff")
1091
- if os.path.exists(wiff_file):
1092
- warnings.warn(
1093
- f"WIFF2 format not supported, falling back to WIFF file: {wiff_file}",
1094
- stacklevel=2,
1095
- )
1096
- return load_wiff_file(wiff_file, **kwargs)
1097
- raise
1098
- else:
1099
- return load_wiff_file(filename, **kwargs)
1100
-
1101
-
1102
- def get_sample_names(filename: str) -> list:
556
+ def get_sample_names(filename: str) -> list[str]:
1103
557
  """
1104
558
  Get the sample names from a WIFF file.
1105
559
 
@@ -1110,51 +564,11 @@ def get_sample_names(filename: str) -> list:
1110
564
 
1111
565
  Returns
1112
566
  -------
1113
- list
567
+ list[str]
1114
568
  List of sample names
1115
569
  """
1116
- reader = SciexWiffFileReader(filename)
1117
- try:
570
+ with SciexWiffFileReader(filename) as reader:
1118
571
  return list(reader.sample_names)
1119
- finally:
1120
- reader.close()
1121
-
1122
-
1123
- def get_wiff2_sample_names(filename: str) -> list:
1124
- """
1125
- Get the sample names from a WIFF2 file.
1126
-
1127
- Parameters
1128
- ----------
1129
- filename : str
1130
- Path to the WIFF2 file
1131
-
1132
- Returns
1133
- -------
1134
- list
1135
- List of sample names
1136
- """
1137
- with SciexWiff2FileReader(filename) as reader:
1138
- return list(reader.sample_names)
1139
-
1140
-
1141
- def get_wiff2_metadata(filename: str) -> dict[str, Any]:
1142
- """
1143
- Get comprehensive metadata from a WIFF2 file.
1144
-
1145
- Parameters
1146
- ----------
1147
- filename : str
1148
- Path to the WIFF2 file
1149
-
1150
- Returns
1151
- -------
1152
- dict
1153
- Comprehensive WIFF2 file metadata
1154
- """
1155
- with SciexWiff2FileReader(filename) as reader:
1156
- return reader.get_file_metadata() # type: ignore[no-any-return]
1157
-
1158
572
 
1159
573
  # Example usage and testing
1160
574
  if __name__ == "__main__":