imspy-core 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,604 @@
1
+ import pandas as pd
2
+
3
+ from typing import List, Tuple, Optional, TYPE_CHECKING
4
+ from numpy.typing import NDArray
5
+
6
+ import numpy as np
7
+ from imspy_core.data.spectrum import TimsSpectrum, IndexedMzSpectrum
8
+ from imspy_core.core.base import RustWrapperObject
9
+ from imspy_core.utility.utilities import re_index_indices
10
+
11
+ if TYPE_CHECKING:
12
+ from imspy_simulation.annotation import TimsFrameAnnotated
13
+
14
+ import imspy_connector
15
+ ims = imspy_connector.py_tims_frame
16
+
17
+
18
+ def _get_tims_frame_annotated():
19
+ """Lazy import of TimsFrameAnnotated to avoid circular imports.
20
+
21
+ Requires imspy-simulation package to be installed.
22
+ """
23
+ try:
24
+ from imspy_simulation.annotation import TimsFrameAnnotated
25
+ return TimsFrameAnnotated
26
+ except ImportError:
27
+ raise ImportError(
28
+ "TimsFrameAnnotated requires the imspy-simulation package. "
29
+ "Install it with: pip install imspy-simulation"
30
+ )
31
+
32
+
33
+ class TimsFrame(RustWrapperObject):
34
+ def __init__(self, frame_id: int, ms_type: int, retention_time: float, scan: NDArray[np.int32],
35
+ mobility: NDArray[np.float64], tof: NDArray[np.int32],
36
+ mz: NDArray[np.float64], intensity: NDArray[np.float64]):
37
+ """TimsFrame class.
38
+
39
+ Args:
40
+ frame_id (int): Frame ID.
41
+ ms_type (int): MS type.
42
+ retention_time (float): Retention time.
43
+ scan (NDArray[np.int32]): Scan.
44
+ mobility (NDArray[np.float64]): Inverse mobility.
45
+ tof (NDArray[np.int32]): Time of flight.
46
+ mz (NDArray[np.float64]): m/z.
47
+ intensity (NDArray[np.float64]): Intensity.
48
+
49
+ Raises:
50
+ AssertionError: If the length of the scan, mobility, tof, mz and intensity arrays are not equal.
51
+ """
52
+
53
+ assert len(scan) == len(mobility) == len(tof) == len(mz) == len(intensity), \
54
+ "The length of the scan, mobility, tof, mz and intensity arrays must be equal."
55
+
56
+ self.__frame_ptr = ims.PyTimsFrame(frame_id, ms_type, retention_time, scan, mobility, tof, mz, intensity)
57
+
58
+ def __add__(self, other: 'TimsFrame') -> 'TimsFrame':
59
+ """Add two TimsFrames together.
60
+
61
+ Args:
62
+ other (TimsFrame): TimsFrame to add.
63
+
64
+ Returns:
65
+ TimsFrame: Sum of the two TimsFrames.
66
+ """
67
+ return TimsFrame.from_py_ptr(self.__frame_ptr + other.__frame_ptr)
68
+
69
+ @classmethod
70
+ def from_py_ptr(cls, frame: ims.PyTimsFrame):
71
+ """Create a TimsFrame from a PyTimsFrame.
72
+
73
+ Args:
74
+ frame (pims.PyTimsFrame): PyTimsFrame to create the TimsFrame from.
75
+
76
+ Returns:
77
+ TimsFrame: TimsFrame created from the PyTimsFrame.
78
+ """
79
+ instance = cls.__new__(cls)
80
+ instance.__frame_ptr = frame
81
+ return instance
82
+
83
+ @property
84
+ def frame_id(self) -> int:
85
+ """Frame ID.
86
+
87
+ Returns:
88
+ int: Frame ID.
89
+ """
90
+ return self.__frame_ptr.frame_id
91
+
92
+ @property
93
+ def ms_type_as_string(self) -> str:
94
+ """MS type.
95
+
96
+ Returns:
97
+ int: MS type.
98
+ """
99
+ return self.__frame_ptr.ms_type
100
+
101
+ @property
102
+ def ms_type(self) -> int:
103
+ """MS type.
104
+
105
+ Returns:
106
+ int: MS type.
107
+ """
108
+ return self.__frame_ptr.ms_type_numeric
109
+
110
+ @property
111
+ def retention_time(self) -> float:
112
+ """Retention time.
113
+
114
+ Returns:
115
+ float: Retention time.
116
+ """
117
+ return self.__frame_ptr.retention_time
118
+
119
+ @property
120
+ def scan(self) -> NDArray[np.int32]:
121
+ """Scan.
122
+
123
+ Returns:
124
+ NDArray[np.int32]: Scan.
125
+ """
126
+ return self.__frame_ptr.scan
127
+
128
+ @property
129
+ def mobility(self) -> NDArray[np.float64]:
130
+ """Inverse mobility.
131
+
132
+ Returns:
133
+ NDArray[np.float64]: Inverse mobility.
134
+ """
135
+ return self.__frame_ptr.mobility
136
+
137
+ @property
138
+ def tof(self) -> NDArray[np.int32]:
139
+ """Time of flight.
140
+
141
+ Returns:
142
+ NDArray[np.int32]: Time of flight.
143
+ """
144
+ return self.__frame_ptr.tof
145
+
146
+ @tof.setter
147
+ def tof(self, tof: NDArray[np.int32]):
148
+ self.__frame_ptr.tof = tof
149
+
150
+ @property
151
+ def mz(self) -> NDArray[np.float64]:
152
+ """m/z.
153
+
154
+ Returns:
155
+ NDArray[np.float64]: m/z.
156
+ """
157
+ return self.__frame_ptr.mz
158
+
159
+ @property
160
+ def intensity(self) -> NDArray[np.float64]:
161
+ """Intensity.
162
+
163
+ Returns:
164
+ NDArray[np.float64]: Intensity.
165
+ """
166
+ return self.__frame_ptr.intensity
167
+
168
+ @property
169
+ def df(self) -> pd.DataFrame:
170
+ """ Data as a pandas DataFrame.
171
+
172
+ Returns:
173
+ pd.DataFrame: Data.
174
+ """
175
+
176
+ return pd.DataFrame({
177
+ 'frame': np.repeat(self.frame_id, len(self.scan)),
178
+ 'retention_time': np.repeat(self.retention_time, len(self.scan)),
179
+ 'scan': self.scan,
180
+ 'mobility': self.mobility,
181
+ 'tof': self.tof,
182
+ 'mz': self.mz,
183
+ 'intensity': self.intensity})
184
+
185
+ def filter(self,
186
+ mz_min: float = 0.0,
187
+ mz_max: float = 2000.0,
188
+ scan_min: int = 0,
189
+ scan_max: int = 1000,
190
+ mobility_min: float = 0.0,
191
+ mobility_max: float = 2.0,
192
+ intensity_min: float = 0.0,
193
+ intensity_max: float = 1e9,
194
+ tof_min: int = 0,
195
+ tof_max: int = 400000,
196
+ ) -> 'TimsFrame':
197
+ """Filter the frame for a given m/z range, scan range and intensity range.
198
+
199
+ Args:
200
+ mz_min (float): Minimum m/z value.
201
+ mz_max (float): Maximum m/z value.
202
+ scan_min (int, optional): Minimum scan value. Defaults to 0.
203
+ scan_max (int, optional): Maximum scan value. Defaults to 1000.
204
+ mobility_min (float, optional): Minimum inverse mobility value. Defaults to 0.0.
205
+ mobility_max (float, optional): Maximum inverse mobility value. Defaults to 2.0.
206
+ intensity_min (float, optional): Minimum intensity value. Defaults to 0.0.
207
+ intensity_max (float, optional): Maximum intensity value. Defaults to 1e9.
208
+ tof_min (int, optional): Minimum TOF value. Defaults to 0.
209
+ tof_max (int, optional): Maximum TOF value. Defaults to 400000.
210
+
211
+ Returns:
212
+ TimsFrame: Filtered frame.
213
+ """
214
+
215
+ return TimsFrame.from_py_ptr(self.__frame_ptr.filter_ranged(mz_min, mz_max, scan_min, scan_max, mobility_min, mobility_max,
216
+ intensity_min, intensity_max, tof_min, tof_max))
217
+
218
+ def to_indexed_mz_spectrum(self) -> 'IndexedMzSpectrum':
219
+ """Convert the frame to an IndexedMzSpectrum.
220
+
221
+ Returns:
222
+ IndexedMzSpectrum: IndexedMzSpectrum.
223
+ """
224
+ return IndexedMzSpectrum.from_py_ptr(self.__frame_ptr.to_indexed_mz_spectrum())
225
+
226
+ def to_resolution(self, resolution: int) -> 'TimsFrame':
227
+ """Convert the frame to a given resolution.
228
+
229
+ Args:
230
+ resolution (int): Resolution.
231
+
232
+ Returns:
233
+ TimsFrame: Frame with the given resolution.
234
+ """
235
+ return TimsFrame.from_py_ptr(self.__frame_ptr.to_resolution(resolution))
236
+
237
+ def vectorized(self, resolution: int = 2) -> 'TimsFrameVectorized':
238
+ """Convert the frame to a vectorized frame.
239
+
240
+ Args:
241
+ resolution (int, optional): Resolution. Defaults to 2.
242
+
243
+ Returns:
244
+ TimsFrameVectorized: Vectorized frame.
245
+ """
246
+ return TimsFrameVectorized.from_py_ptr(self.__frame_ptr.vectorized(resolution))
247
+
248
+ def to_tims_spectra(self) -> List['TimsSpectrum']:
249
+ """Convert the frame to a list of TimsSpectrum.
250
+
251
+ Returns:
252
+ List[TimsSpectrum]: List of TimsSpectrum.
253
+ """
254
+ return [TimsSpectrum.from_py_tims_spectrum(spec) for spec in self.__frame_ptr.to_tims_spectra()]
255
+
256
+ def to_windows(self, window_length: float = 10, overlapping: bool = True, min_num_peaks: int = 5,
257
+ min_intensity: float = 1) -> List[TimsSpectrum]:
258
+ """Convert the frame to a list of windows.
259
+
260
+ Args:
261
+ window_length (float, optional): Window length. Defaults to 10.
262
+ overlapping (bool, optional): Whether the windows should overlap. Defaults to True.
263
+ min_num_peaks (int, optional): Minimum number of peaks in a window. Defaults to 5.
264
+ min_intensity (float, optional): Minimum intensity of a peak in a window. Defaults to 1.
265
+
266
+ Returns:
267
+ List[MzSpectrum]: List of windows.
268
+ """
269
+ return [TimsSpectrum.from_py_tims_spectrum(spec) for spec in self.__frame_ptr.to_windows(
270
+ window_length, overlapping, min_num_peaks, min_intensity)]
271
+
272
+ @classmethod
273
+ def from_windows(cls, windows: List[TimsSpectrum]) -> 'TimsFrame':
274
+ """Create a TimsFrame from a list of windows.
275
+
276
+ Args:
277
+ windows (List[TimsSpectrum]): List of windows.
278
+
279
+ Returns:
280
+ TimsFrame: TimsFrame created from the windows.
281
+ """
282
+ return TimsFrame.from_py_ptr(ims.PyTimsFrame.from_windows(
283
+ [spec.get_py_ptr() for spec in windows]
284
+ ))
285
+
286
+ @classmethod
287
+ def from_tims_spectra(cls, spectra: List[TimsSpectrum]) -> 'TimsFrame':
288
+ """Create a TimsFrame from a list of TimsSpectrum.
289
+
290
+ Args:
291
+ spectra (List[TimsSpectrum]): List of TimsSpectrum.
292
+
293
+ Returns:
294
+ TimsFrame: TimsFrame created from the TimsSpectrum.
295
+ """
296
+ return TimsFrame.from_py_ptr(ims.PyTimsFrame.from_tims_spectra(
297
+ [spec.get_py_ptr() for spec in spectra]
298
+ ))
299
+
300
+ def to_dense_windows(self, window_length: float = 10, resolution: int = 1, overlapping: bool = True,
301
+ min_num_peaks: int = 5, min_intensity: float = 0.0):
302
+
303
+ rows, cols, values, ims, mzs, scans, window_indices = self.__frame_ptr.to_dense_windows(window_length, resolution,
304
+ overlapping, min_num_peaks,
305
+ min_intensity)
306
+
307
+ return ims, mzs, scans, window_indices, np.reshape(values, (rows, cols))
308
+
309
+ def to_noise_annotated_tims_frame(self) -> 'TimsFrameAnnotated':
310
+ """Convert the frame to a noise annotated frame.
311
+
312
+ Returns:
313
+ TimsFrameAnnotated: Noise annotated frame.
314
+
315
+ Note:
316
+ Requires imspy-simulation package to be installed.
317
+ """
318
+ TimsFrameAnnotated = _get_tims_frame_annotated()
319
+ return TimsFrameAnnotated.from_py_ptr(self.__frame_ptr.to_noise_annotated_tims_frame())
320
+
321
+ def get_inverse_mobility_along_scan_marginal(self) -> float:
322
+ """Get the inverse mobility along the scan marginal.
323
+
324
+ Returns:
325
+ float: Inverse mobility.
326
+ """
327
+ return self.__frame_ptr.get_inverse_mobility_along_scan_marginal()
328
+
329
+ def get_mobility_mean_and_variance(self) -> Tuple[float, float]:
330
+ """Get the mean and variance of the inverse mobility.
331
+
332
+ Returns:
333
+ Tuple[float, float]: Mean and variance of the inverse mobility.
334
+ """
335
+ return self.__frame_ptr.get_mobility_mean_and_variance()
336
+
337
+ def get_py_ptr(self):
338
+ return self.__frame_ptr
339
+
340
+ def __repr__(self):
341
+ return (f"TimsFrame(frame_id={self.__frame_ptr.frame_id}, ms_type={self.__frame_ptr.ms_type}, "
342
+ f"num_peaks={len(self.__frame_ptr.mz)}, intensity_sum={np.round(np.sum(self.__frame_ptr.intensity))})")
343
+
344
+ def random_subsample_frame(self, take_probability: float) -> 'TimsFrame':
345
+ """Randomly subsample the frame.
346
+
347
+ Args:
348
+ take_probability (float): Take probability.
349
+
350
+ Returns:
351
+ TimsFrame: Subsampled frame.
352
+ """
353
+
354
+ assert 0.0 <= take_probability <= 1.0, "The take probability must be between 0 and 1."
355
+ return TimsFrame.from_py_ptr(self.__frame_ptr.random_subsample_frame(take_probability))
356
+
357
+ def fold_along_scan_axis(self, fold_width: int = 4) -> 'TimsFrame':
358
+ """Fold the frame along the scan axis.
359
+
360
+ Args:
361
+ fold_width (int): Width of the fold. Default to 4.
362
+
363
+ Returns:
364
+ TimsFrame: Folded frame.
365
+ """
366
+ return TimsFrame.from_py_ptr(self.__frame_ptr.fold_along_scan_axis(fold_width))
367
+
368
+ def __getitem__(self, index: int) -> Optional[TimsSpectrum]:
369
+ """Get the TimsSpectrum at a given index.
370
+
371
+ Args:
372
+ index (int): Index.
373
+
374
+ Returns:
375
+ TimsSpectrum: TimsSpectrum at the index, or None if the index is out of bounds.
376
+ """
377
+ maybe_spectrum = self.__frame_ptr.get_tims_spectrum(index)
378
+ if maybe_spectrum is None:
379
+ return None
380
+ else:
381
+ return TimsSpectrum.from_py_ptr(maybe_spectrum)
382
+
383
+
384
+ class TimsFrameVectorized(RustWrapperObject):
385
+ def __init__(self, frame_id: int, ms_type: int, retention_time: float, scan: NDArray[np.int32],
386
+ mobility: NDArray[np.float64], tof: NDArray[np.int32],
387
+ indices: NDArray[np.int32], intensity: NDArray[np.float64]):
388
+ """TimsFrameVectorized class.
389
+
390
+ Args:
391
+ frame_id (int): Frame ID.
392
+ ms_type (int): MS type.
393
+ retention_time (float): Retention time.
394
+ scan (NDArray[np.int32]): Scan.
395
+ mobility (NDArray[np.float64]): Inverse mobility.
396
+ tof (NDArray[np.int32]): Time of flight.
397
+ indices (NDArray[np.int32]): Indices.
398
+ intensity (NDArray[np.float64]): Intensity.
399
+
400
+ Raises:
401
+ AssertionError: If the length of the scan, mobility, tof, indices and intensity arrays are not equal.
402
+ """
403
+
404
+ assert len(scan) == len(mobility) == len(tof) == len(indices) == len(intensity), \
405
+ "The length of the scan, mobility, tof, indices and intensity arrays must be equal."
406
+
407
+ self.__frame_ptr = ims.PyTimsFrameVectorized(frame_id, ms_type, retention_time, scan, mobility, tof, indices,
408
+ intensity)
409
+
410
+ @classmethod
411
+ def from_py_ptr(cls, frame: ims.PyTimsFrameVectorized):
412
+ """Create a TimsFrameVectorized from a PyTimsFrameVectorized.
413
+
414
+ Args:
415
+ frame (pims.PyTimsFrameVectorized): PyTimsFrameVectorized to create the TimsFrameVectorized from.
416
+
417
+ Returns:
418
+ TimsFrameVectorized: TimsFrameVectorized created from the PyTimsFrameVectorized.
419
+ """
420
+ instance = cls.__new__(cls)
421
+ instance.__frame_ptr = frame
422
+ return instance
423
+
424
+ @property
425
+ def frame_id(self) -> int:
426
+ """Frame ID.
427
+
428
+ Returns:
429
+ int: Frame ID.
430
+ """
431
+ return self.__frame_ptr.frame_id
432
+
433
+ @property
434
+ def ms_type(self) -> str:
435
+ """MS type.
436
+
437
+ Returns:
438
+ int: MS type.
439
+ """
440
+ return self.__frame_ptr.ms_type_as_string
441
+
442
+ @property
443
+ def retention_time(self) -> float:
444
+ """Retention time.
445
+
446
+ Returns:
447
+ float: Retention time.
448
+ """
449
+ return self.__frame_ptr.retention_time
450
+
451
+ @property
452
+ def scan(self) -> NDArray[np.int32]:
453
+ """Scan.
454
+
455
+ Returns:
456
+ NDArray[np.int32]: Scan.
457
+ """
458
+ return self.__frame_ptr.scan
459
+
460
+ @property
461
+ def mobility(self) -> NDArray[np.float64]:
462
+ """Inverse mobility.
463
+
464
+ Returns:
465
+ NDArray[np.float64]: Inverse mobility.
466
+ """
467
+ return self.__frame_ptr.mobility
468
+
469
+ @property
470
+ def tof(self) -> NDArray[np.int32]:
471
+ """Time of flight.
472
+
473
+ Returns:
474
+ NDArray[np.int32]: Time of flight.
475
+ """
476
+ return self.__frame_ptr.tof
477
+
478
+ @property
479
+ def indices(self) -> NDArray[np.int32]:
480
+ """Indices.
481
+
482
+ Returns:
483
+ NDArray[np.int32]: Indices.
484
+ """
485
+ return self.__frame_ptr.indices
486
+
487
+ @property
488
+ def intensity(self) -> NDArray[np.float64]:
489
+ """Intensity.
490
+
491
+ Returns:
492
+ NDArray[np.float64]: Intensity.
493
+ """
494
+ return self.__frame_ptr.values
495
+
496
+ @property
497
+ def df(self) -> pd.DataFrame:
498
+ """ Data as a pandas DataFrame.
499
+
500
+ Returns:
501
+ pd.DataFrame: Data.
502
+ """
503
+
504
+ return pd.DataFrame({
505
+ 'frame': np.repeat(self.frame_id, len(self.scan)),
506
+ 'retention_time': np.repeat(self.retention_time, len(self.scan)),
507
+ 'scan': self.scan,
508
+ 'mobility': self.mobility,
509
+ 'tof': self.tof,
510
+ 'indices': self.indices,
511
+ 'intensity': self.intensity})
512
+
513
+ def __repr__(self):
514
+ return (f"TimsFrameVectorized(frame_id={self.__frame_ptr.frame_id}, ms_type={self.__frame_ptr.ms_type}, "
515
+ f"num_peaks={len(self.__frame_ptr.indices)})")
516
+
517
+ def get_tensor_repr(self, dense=True, zero_indexed=True, re_index=True, scan_max=None, index_max=None):
518
+ """Get a tensor representation of the frame.
519
+
520
+ Args:
521
+ dense: If True, return dense numpy array. If False, return scipy sparse matrix.
522
+ zero_indexed: If True, shift indices to start from 0.
523
+ re_index: If True, re-index the indices.
524
+ scan_max: Maximum scan value for matrix shape.
525
+ index_max: Maximum index value for matrix shape.
526
+
527
+ Returns:
528
+ numpy array (if dense=True) or scipy sparse matrix (if dense=False)
529
+ """
530
+ from scipy import sparse as sp
531
+
532
+ s = self.scan.copy()
533
+ f = self.indices.copy()
534
+ i = self.intensity.copy()
535
+
536
+ if zero_indexed:
537
+ if len(s) > 0:
538
+ s = s - np.min(s)
539
+ if len(f) > 0:
540
+ f = f - np.min(f)
541
+
542
+ if re_index:
543
+ f = re_index_indices(f)
544
+
545
+ if scan_max is None:
546
+ m_s = int(np.max(s) + 1) if len(s) > 0 else 1
547
+ else:
548
+ m_s = int(scan_max + 1)
549
+
550
+ if index_max is None:
551
+ m_f = int(np.max(f) + 1) if len(f) > 0 else 1
552
+ else:
553
+ m_f = int(index_max + 1)
554
+
555
+ # Create scipy sparse COO matrix
556
+ sv = sp.coo_matrix((i, (s, f)), shape=(m_s, m_f))
557
+
558
+ if dense:
559
+ return sv.toarray()
560
+ else:
561
+ return sv
562
+
563
+ def filter(self,
564
+ mz_min: float = 0.0,
565
+ mz_max: float = 2000.0,
566
+ scan_min: int = 0,
567
+ scan_max: int = 1000,
568
+ mobility_min: float = 0.0,
569
+ mobility_max: float = 2.0,
570
+ intensity_min: float = 0.0,
571
+ intensity_max: float = 1e9,
572
+ ) -> 'TimsFrameVectorized':
573
+ """Filter the frame for a given m/z range, scan range and intensity range.
574
+
575
+ Args:
576
+ mz_min (float): Minimum m/z value.
577
+ mz_max (float): Maximum m/z value.
578
+ scan_min (int, optional): Minimum scan value. Defaults to 0.
579
+ scan_max (int, optional): Maximum scan value. Defaults to 1000.
580
+ mobility_min (float, optional): Minimum inverse mobility value. Defaults to 0.0.
581
+ mobility_max (float, optional): Maximum inverse mobility value. Defaults to 2.0.
582
+ intensity_min (float, optional): Minimum intensity value. Defaults to 0.0.
583
+ intensity_max (float, optional): Maximum intensity value. Defaults to 1e9.
584
+
585
+ Returns:
586
+ TimsFrameVectorized: Filtered frame.
587
+ """
588
+
589
+ return TimsFrameVectorized.from_py_ptr(self.__frame_ptr.filter_ranged(
590
+ mz_min, mz_max, scan_min, scan_max, mobility_min, mobility_max, intensity_min, intensity_max))
591
+
592
+ def get_py_ptr(self):
593
+ return self.__frame_ptr
594
+
595
+ def get_arrays_at_index(self, index: int) -> Tuple[NDArray[np.int32], NDArray[np.int32], NDArray[np.float32]]:
596
+ """Get the arrays at a given index.
597
+
598
+ Args:
599
+ index (int): Index.
600
+
601
+ Returns:
602
+ NDArray[np.float64]: Arrays at the index.
603
+ """
604
+ return self.__frame_ptr.get_arrays_at_index(index)