masster 0.5.22__py3-none-any.whl → 0.5.24__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of masster might be problematic. Click here for more details.

masster/sample/sciex.py CHANGED
@@ -31,16 +31,16 @@ def naive_centroid(
31
31
  ) -> tuple[np.ndarray, np.ndarray]:
32
32
  """
33
33
  Simplified naive centroiding implementation.
34
-
34
+
35
35
  Parameters
36
36
  ----------
37
37
  peak_mzs : np.ndarray
38
38
  Array of m/z values
39
- peak_intensities : np.ndarray
39
+ peak_intensities : np.ndarray
40
40
  Array of intensity values
41
41
  centroiding_ppm : float, default 20.0
42
42
  PPM tolerance for combining peaks
43
-
43
+
44
44
  Returns
45
45
  -------
46
46
  tuple[np.ndarray, np.ndarray]
@@ -150,20 +150,20 @@ except Exception as e:
150
150
  def dot_net_array_to_np_array(src) -> np.ndarray:
151
151
  """
152
152
  Convert .NET array to NumPy array.
153
-
153
+
154
154
  Parameters
155
155
  ----------
156
156
  src : .NET array or None
157
157
  Source .NET array to convert
158
-
158
+
159
159
  Returns
160
160
  -------
161
161
  np.ndarray
162
162
  Converted NumPy array
163
-
163
+
164
164
  Notes
165
165
  -----
166
- Based on approach from:
166
+ Based on approach from:
167
167
  https://mail.python.org/pipermail/pythondotnet/2014-May/001527.html
168
168
  """
169
169
  if src is None:
@@ -204,7 +204,7 @@ class SciexWiffFileReader:
204
204
  """Close the file and clean up resources."""
205
205
  self._wiffDataProvider.Close()
206
206
 
207
- def __enter__(self) -> 'SciexWiffFileReader':
207
+ def __enter__(self) -> "SciexWiffFileReader":
208
208
  """Context manager entry."""
209
209
  return self
210
210
 
@@ -272,11 +272,11 @@ class SciexWiffFileReader:
272
272
  if ms_level > 1 and not details.IsSwath and mass_spectrum.NumDataPoints <= 0 and ignore_empty_scans:
273
273
  continue
274
274
  if exp.Details.Polarity == exp.Details.Polarity.Positive:
275
- pol = 'positive'
275
+ pol = "positive"
276
276
  elif exp.Details.Polarity == exp.Details.Polarity.Negative:
277
- pol = 'negative'
277
+ pol = "negative"
278
278
  else:
279
- pol = ''
279
+ pol = ""
280
280
  polarity_list.append(pol)
281
281
 
282
282
  mz_array = dot_net_array_to_np_array(mass_spectrum.GetActualXValues())
@@ -553,6 +553,7 @@ def load_wiff_file(filename: str, **kwargs) -> SciexWiffData:
553
553
  wiff_data.import_raw(filename)
554
554
  return wiff_data
555
555
 
556
+
556
557
  def get_sample_names(filename: str) -> list[str]:
557
558
  """
558
559
  Get the sample names from a WIFF file.
@@ -570,6 +571,7 @@ def get_sample_names(filename: str) -> list[str]:
570
571
  with SciexWiffFileReader(filename) as reader:
571
572
  return list(reader.sample_names)
572
573
 
574
+
573
575
  # Example usage and testing
574
576
  if __name__ == "__main__":
575
577
  print("Standalone Sciex WIFF reader implementation")
masster/sample/thermo.py CHANGED
@@ -30,8 +30,8 @@ Example:
30
30
  >>> mz, intensity = raw_data.get_peaks(0) # Get first spectrum peaks
31
31
 
32
32
  Note:
33
- The .NET imports (System, ThermoFisher) will only work when pythonnet
34
- is properly installed and configured. Without these dependencies, the
33
+ The .NET imports (System, ThermoFisher) will only work when pythonnet
34
+ is properly installed and configured. Without these dependencies, the
35
35
  module will still import but Thermo RAW file reading will be disabled.
36
36
  """
37
37
 
@@ -54,23 +54,23 @@ def naive_centroid(
54
54
  ) -> tuple[np.ndarray, np.ndarray]:
55
55
  """
56
56
  Simplified naive centroiding implementation.
57
-
57
+
58
58
  Combines nearby peaks within a PPM tolerance using intensity-weighted averaging.
59
-
59
+
60
60
  Parameters
61
61
  ----------
62
62
  peak_mzs : np.ndarray
63
63
  Array of m/z values (must be sorted)
64
- peak_intensities : np.ndarray
64
+ peak_intensities : np.ndarray
65
65
  Array of intensity values corresponding to peak_mzs
66
66
  centroiding_ppm : float, default 20.0
67
67
  PPM tolerance for combining peaks
68
-
68
+
69
69
  Returns
70
70
  -------
71
71
  tuple[np.ndarray, np.ndarray]
72
72
  Centroided m/z and intensity arrays
73
-
73
+
74
74
  Notes
75
75
  -----
76
76
  This is a simple implementation that assumes input peaks are sorted by m/z.
@@ -78,7 +78,7 @@ def naive_centroid(
78
78
  """
79
79
  if len(peak_mzs) == 0:
80
80
  return np.array([]), np.array([])
81
-
81
+
82
82
  if len(peak_mzs) != len(peak_intensities):
83
83
  raise ValueError("peak_mzs and peak_intensities must have the same length")
84
84
 
@@ -89,7 +89,7 @@ def naive_centroid(
89
89
  while i < len(peak_mzs):
90
90
  current_mz = peak_mzs[i]
91
91
  current_intensity = peak_intensities[i]
92
-
92
+
93
93
  # Calculate tolerance for current m/z
94
94
  tolerance = current_mz * centroiding_ppm * 1e-6
95
95
 
@@ -144,6 +144,7 @@ try:
144
144
  # Try alternative locations
145
145
  try:
146
146
  import alpharaw
147
+
147
148
  alpharaw_dir = os.path.dirname(alpharaw.__file__)
148
149
  ext_dir = os.path.join(alpharaw_dir, "ext")
149
150
  except ImportError:
@@ -156,9 +157,7 @@ try:
156
157
  clr.AddReference(
157
158
  os.path.join(ext_dir, "thermo_fisher", "ThermoFisher.CommonCore.Data.dll"),
158
159
  )
159
- clr.AddReference(
160
- os.path.join(ext_dir, "thermo_fisher", "ThermoFisher.CommonCore.RawFileReader.dll")
161
- )
160
+ clr.AddReference(os.path.join(ext_dir, "thermo_fisher", "ThermoFisher.CommonCore.RawFileReader.dll"))
162
161
 
163
162
  import ThermoFisher # noqa: F401
164
163
 
@@ -189,43 +188,43 @@ except Exception as e:
189
188
  def dot_net_array_to_np_array(src) -> np.ndarray:
190
189
  """
191
190
  Convert .NET array to NumPy array with efficient memory handling.
192
-
191
+
193
192
  This function performs a zero-copy conversion from .NET arrays to NumPy arrays
194
193
  by directly accessing the underlying memory buffer. This is much faster than
195
194
  iterating through elements.
196
-
195
+
197
196
  Parameters
198
197
  ----------
199
198
  src : .NET array or None
200
199
  Source .NET array to convert (typically double[])
201
-
200
+
202
201
  Returns
203
202
  -------
204
203
  np.ndarray
205
204
  Converted NumPy array with dtype float64. Returns empty array if src is None.
206
-
205
+
207
206
  Notes
208
207
  -----
209
- Based on the approach from:
208
+ Based on the approach from:
210
209
  https://mail.python.org/pipermail/pythondotnet/2014-May/001527.html
211
-
210
+
212
211
  The function uses GCHandle.Alloc to pin the .NET array in memory, allowing
213
212
  direct access to its underlying buffer via ctypes. The buffer is then
214
213
  wrapped as a NumPy array and copied to ensure memory safety.
215
214
  """
216
215
  if src is None:
217
216
  return np.array([], dtype=np.float64)
218
-
217
+
219
218
  # Pin the .NET array in memory to prevent garbage collection
220
219
  src_hndl = GCHandle.Alloc(src, GCHandleType.Pinned)
221
220
  try:
222
221
  # Get pointer to the pinned memory
223
222
  src_ptr = src_hndl.AddrOfPinnedObject().ToInt64()
224
-
223
+
225
224
  # Create ctypes buffer pointing to the same memory
226
225
  buf_type = ctypes.c_double * len(src)
227
226
  cbuf = buf_type.from_address(src_ptr)
228
-
227
+
229
228
  # Convert to NumPy array and make a copy for safety
230
229
  dest = np.frombuffer(cbuf, dtype="float64").copy() # type: ignore[call-overload]
231
230
  finally:
@@ -247,7 +246,7 @@ class ThermoRawFileReader:
247
246
  "Install pythonnet (pip install pythonnet) and ensure Thermo Fisher DLLs "
248
247
  "are available in alpharaw's ext/thermo_fisher directory."
249
248
  )
250
-
249
+
251
250
  if not os.path.exists(filename):
252
251
  raise FileNotFoundError(f"RAW file not found: {filename}")
253
252
 
@@ -255,7 +254,7 @@ class ThermoRawFileReader:
255
254
  self._raw_file = RawFileReaderAdapter.FileFactory(filename)
256
255
  except Exception as e:
257
256
  raise ValueError(f"Failed to create RAW file reader for '{filename}': {e}") from e
258
-
257
+
259
258
  if not self._raw_file.IsOpen:
260
259
  raise ValueError(f"Could not open RAW file: {filename}")
261
260
 
@@ -271,10 +270,10 @@ class ThermoRawFileReader:
271
270
 
272
271
  def close(self) -> None:
273
272
  """Close the file and clean up resources."""
274
- if hasattr(self, '_raw_file') and self._raw_file is not None:
273
+ if hasattr(self, "_raw_file") and self._raw_file is not None:
275
274
  self._raw_file.Dispose()
276
275
 
277
- def __enter__(self) -> 'ThermoRawFileReader':
276
+ def __enter__(self) -> "ThermoRawFileReader":
278
277
  """Context manager entry."""
279
278
  return self
280
279
 
@@ -285,12 +284,12 @@ class ThermoRawFileReader:
285
284
  def get_polarity_from_scan_event(self, scan_number: int) -> str:
286
285
  """
287
286
  Extract polarity information from scan event.
288
-
287
+
289
288
  Parameters
290
289
  ----------
291
290
  scan_number : int
292
291
  Scan number to extract polarity from
293
-
292
+
294
293
  Returns
295
294
  -------
296
295
  str
@@ -299,28 +298,28 @@ class ThermoRawFileReader:
299
298
  try:
300
299
  scan_event = self._raw_file.GetScanEventForScanNumber(scan_number)
301
300
  if scan_event is None:
302
- return ''
301
+ return ""
303
302
 
304
303
  # Try the direct Polarity property first (most reliable)
305
- if hasattr(scan_event, 'Polarity'):
304
+ if hasattr(scan_event, "Polarity"):
306
305
  polarity_str = str(scan_event.Polarity).lower()
307
- if 'positive' in polarity_str:
308
- return 'positive'
309
- elif 'negative' in polarity_str:
310
- return 'negative'
311
-
306
+ if "positive" in polarity_str:
307
+ return "positive"
308
+ elif "negative" in polarity_str:
309
+ return "negative"
310
+
312
311
  # Fallback: parse the scan filter string
313
312
  filter_string = str(scan_event.ToString()).lower()
314
- if '+' in filter_string or 'positive' in filter_string:
315
- return 'positive'
316
- elif '-' in filter_string or 'negative' in filter_string:
317
- return 'negative'
318
-
313
+ if "+" in filter_string or "positive" in filter_string:
314
+ return "positive"
315
+ elif "-" in filter_string or "negative" in filter_string:
316
+ return "negative"
317
+
319
318
  except Exception:
320
319
  # Log the exception if needed, but don't raise
321
320
  pass
322
-
323
- return '' # Unknown polarity
321
+
322
+ return "" # Unknown polarity
324
323
 
325
324
  def _extract_precursor_info(self, scan_event, ms_level: int) -> tuple[float, int, float, float, float]:
326
325
  """Extract precursor information from scan event for MS2+ scans."""
@@ -333,17 +332,19 @@ class ThermoRawFileReader:
333
332
  precursor_mz = -1.0
334
333
 
335
334
  try:
336
- precursor_charge = int(scan_event.GetChargeState(0)) if hasattr(scan_event, 'GetChargeState') else 0
335
+ precursor_charge = int(scan_event.GetChargeState(0)) if hasattr(scan_event, "GetChargeState") else 0
337
336
  except Exception:
338
337
  precursor_charge = 0
339
338
 
340
339
  try:
341
- collision_energy = float(scan_event.GetEnergy(0)) if hasattr(scan_event, 'GetEnergy') else 0.0
340
+ collision_energy = float(scan_event.GetEnergy(0)) if hasattr(scan_event, "GetEnergy") else 0.0
342
341
  except Exception:
343
342
  collision_energy = 0.0
344
343
 
345
344
  try:
346
- isolation_window = float(scan_event.GetIsolationWidth(0)) if hasattr(scan_event, 'GetIsolationWidth') else 3.0
345
+ isolation_window = (
346
+ float(scan_event.GetIsolationWidth(0)) if hasattr(scan_event, "GetIsolationWidth") else 3.0
347
+ )
347
348
  except Exception:
348
349
  isolation_window = 3.0
349
350
 
@@ -353,11 +354,7 @@ class ThermoRawFileReader:
353
354
  return precursor_mz, precursor_charge, collision_energy, isolation_lower, isolation_upper
354
355
 
355
356
  def _process_scan_data(
356
- self,
357
- scan_data,
358
- centroid: bool,
359
- centroid_ppm: float,
360
- keep_k_peaks: int
357
+ self, scan_data, centroid: bool, centroid_ppm: float, keep_k_peaks: int
361
358
  ) -> tuple[np.ndarray, np.ndarray]:
362
359
  """Process scan data to extract and optionally centroid peaks."""
363
360
  if scan_data.Positions is not None and scan_data.Intensities is not None:
@@ -434,30 +431,29 @@ class ThermoRawFileReader:
434
431
  continue
435
432
 
436
433
  scan_event = self._raw_file.GetScanEventForScanNumber(scan_num)
437
-
434
+
438
435
  # Extract basic scan information
439
436
  rt = scan_stats.StartTime # in minutes
440
437
  ms_level = int(scan_event.MSOrder) if scan_event else 1
441
438
  polarity = self.get_polarity_from_scan_event(scan_num)
442
439
 
443
440
  # Process peak data
444
- mz_array, int_array = self._process_scan_data(
445
- scan_data, centroid, centroid_ppm, keep_k_peaks
446
- )
441
+ mz_array, int_array = self._process_scan_data(scan_data, centroid, centroid_ppm, keep_k_peaks)
447
442
 
448
443
  # Store scan data
449
444
  peak_mz_arrays.append(mz_array)
450
445
  peak_intensity_arrays.append(int_array)
451
446
  peak_indices_list.append(len(mz_array))
452
-
447
+
453
448
  rt_list.append(rt)
454
449
  ms_level_list.append(ms_level)
455
450
  polarity_list.append(polarity)
456
451
 
457
452
  # Extract precursor information
458
- precursor_mz, precursor_charge, collision_energy, isolation_lower, isolation_upper = \
453
+ precursor_mz, precursor_charge, collision_energy, isolation_lower, isolation_upper = (
459
454
  self._extract_precursor_info(scan_event, ms_level)
460
-
455
+ )
456
+
461
457
  precursor_mz_list.append(precursor_mz)
462
458
  precursor_charge_list.append(precursor_charge)
463
459
  ce_list.append(collision_energy)
@@ -510,7 +506,7 @@ class ThermoRawData:
510
506
  def __init__(self, centroided: bool = True) -> None:
511
507
  """
512
508
  Initialize ThermoRawData reader.
513
-
509
+
514
510
  Parameters
515
511
  ----------
516
512
  centroided : bool, optional
@@ -520,13 +516,13 @@ class ThermoRawData:
520
516
  # Initialize dataframes
521
517
  self.spectrum_df: pd.DataFrame = pd.DataFrame()
522
518
  self.peak_df: pd.DataFrame = pd.DataFrame()
523
-
519
+
524
520
  # File and instrument information
525
521
  self._raw_file_path = ""
526
522
  self.creation_time = ""
527
523
  self.type = "thermo"
528
524
  self.instrument = "thermo"
529
-
525
+
530
526
  # Processing parameters
531
527
  self.centroided = centroided
532
528
  self.centroid_ppm = 20.0
@@ -537,8 +533,7 @@ class ThermoRawData:
537
533
  if self.centroided:
538
534
  self.centroided = False
539
535
  warnings.warn(
540
- "Centroiding for Thermo data is not well implemented yet. "
541
- "Data will be processed in profile mode.",
536
+ "Centroiding for Thermo data is not well implemented yet. Data will be processed in profile mode.",
542
537
  UserWarning,
543
538
  stacklevel=2,
544
539
  )
@@ -587,14 +582,14 @@ class ThermoRawData:
587
582
  ignore_empty_scans=self.ignore_empty_scans,
588
583
  keep_k_peaks=self.keep_k_peaks_per_spec,
589
584
  )
590
-
585
+
591
586
  # Try to get file creation time
592
587
  try:
593
588
  creation_info = raw_reader._raw_file.GetCreationDate()
594
589
  self.creation_time = creation_info.ToString("O") if creation_info else ""
595
590
  except Exception:
596
591
  self.creation_time = ""
597
-
592
+
598
593
  return data_dict
599
594
 
600
595
  def _set_dataframes(self, raw_data: dict[str, Any]) -> None:
@@ -607,16 +602,16 @@ class ThermoRawData:
607
602
  Dictionary containing the raw spectral data with keys like 'rt', 'peak_mz', etc.
608
603
  """
609
604
  num_spectra = len(raw_data["rt"])
610
-
605
+
611
606
  # Create spectrum dataframe
612
607
  self.create_spectrum_df(num_spectra)
613
-
608
+
614
609
  # Create peak dataframe with indexed arrays
615
610
  self.set_peak_df_by_indexed_array(
616
611
  raw_data["peak_mz"],
617
612
  raw_data["peak_intensity"],
618
613
  raw_data["peak_indices"][:-1], # start indices
619
- raw_data["peak_indices"][1:], # end indices
614
+ raw_data["peak_indices"][1:], # end indices
620
615
  )
621
616
 
622
617
  # Add spectrum-level data to spectrum dataframe
@@ -741,17 +736,17 @@ def get_file_info(filename: str) -> dict[str, Any]:
741
736
  def main() -> None:
742
737
  """
743
738
  Main function for testing and demonstrating the module functionality.
744
-
739
+
745
740
  This function provides usage examples and tests basic module functionality
746
741
  when the script is run directly.
747
742
  """
748
743
  print("Standalone Thermo RAW Reader")
749
744
  print("=" * 40)
750
-
745
+
751
746
  # Display usage example
752
747
  print("\nUsage Example:")
753
748
  print("-" * 20)
754
- example_code = '''
749
+ example_code = """
755
750
  from thermo import ThermoRawData, load_raw_file
756
751
 
757
752
  # Method 1: Create reader instance
@@ -771,18 +766,18 @@ mz, intensity = raw_data.get_peaks(0)
771
766
  # Check available polarities
772
767
  polarities = raw_data.spectrum_df['polarity'].unique()
773
768
  print(f"Polarities: {polarities}")
774
- '''
769
+ """
775
770
  print(example_code)
776
771
 
777
772
  # Test module functionality
778
773
  print("\nModule Status:")
779
774
  print("-" * 20)
780
-
775
+
781
776
  try:
782
777
  # Test class instantiation
783
778
  test_data = ThermoRawData()
784
779
  print("✓ ThermoRawData instantiated successfully")
785
-
780
+
786
781
  # Check .NET support
787
782
  if HAS_DOTNET:
788
783
  print("✓ .NET support available")
@@ -792,10 +787,10 @@ print(f"Polarities: {polarities}")
792
787
  print("⚠ .NET support not available")
793
788
  print(" • Install pythonnet to enable RAW file reading")
794
789
  print(" • Ensure Thermo Fisher DLLs are in alpharaw ext directory")
795
-
790
+
796
791
  except Exception as e:
797
792
  print(f"✗ Error during module testing: {e}")
798
793
 
799
794
 
800
795
  if __name__ == "__main__":
801
- main()
796
+ main()
masster/spectrum.py CHANGED
@@ -199,61 +199,61 @@ class Spectrum:
199
199
  def check_if_centroided(self) -> bool:
200
200
  """
201
201
  Fast determination if spectrum data is centroided or profile.
202
-
202
+
203
203
  Uses optimized statistical approaches with early exits for speed:
204
204
  1. Fast median difference check (most decisive)
205
- 2. Small gap ratio (profile characteristic)
205
+ 2. Small gap ratio (profile characteristic)
206
206
  3. Density check (fallback)
207
-
207
+
208
208
  Returns:
209
209
  bool: True if centroided, False if profile
210
210
  """
211
211
  if self.mz.size < 5:
212
212
  return True # Too few points to determine, assume centroided
213
-
213
+
214
214
  # Fast path: check if mz is already sorted to avoid sorting cost
215
215
  if np.all(self.mz[:-1] <= self.mz[1:]):
216
216
  sorted_mz = self.mz
217
217
  else:
218
218
  sorted_mz = np.sort(self.mz)
219
-
219
+
220
220
  # Calculate differences efficiently
221
221
  mz_diffs = np.diff(sorted_mz)
222
-
222
+
223
223
  # Remove zeros efficiently (keep positive differences)
224
224
  mz_diffs = mz_diffs[mz_diffs > 0]
225
-
225
+
226
226
  if mz_diffs.size == 0:
227
227
  return True # All identical m/z values
228
-
228
+
229
229
  # Fast approach 1: Median difference (most decisive, compute once)
230
230
  median_diff = np.median(mz_diffs)
231
-
231
+
232
232
  # Early exits for clear cases (>90% of cases)
233
233
  if median_diff > 0.02:
234
234
  return True # Clearly centroided
235
235
  elif median_diff < 0.005:
236
236
  return False # Clearly profile
237
-
237
+
238
238
  # Fast approach 2: Small gap ratio (for borderline cases)
239
239
  # Use vectorized comparison instead of creating new array
240
240
  small_gap_count = np.sum(mz_diffs < 0.005)
241
241
  small_gap_ratio = small_gap_count / mz_diffs.size
242
-
242
+
243
243
  if small_gap_ratio > 0.7:
244
244
  return False # High ratio of small gaps = profile
245
245
  elif small_gap_ratio < 0.1:
246
- return True # Low ratio of small gaps = centroided
247
-
246
+ return True # Low ratio of small gaps = centroided
247
+
248
248
  # Fast approach 3: Density check (final fallback)
249
- mz_range = sorted_mz[-1] - sorted_mz[0]
249
+ mz_range = sorted_mz[-1] - sorted_mz[0]
250
250
  if mz_range > 0:
251
251
  density = sorted_mz.size / mz_range
252
252
  if density > 100: # High density = profile
253
253
  return False
254
254
  elif density < 10: # Low density = centroided
255
255
  return True
256
-
256
+
257
257
  # Final fallback: median threshold
258
258
  return median_diff > 0.01
259
259