imspy-core 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,429 @@
1
+ import platform
2
+ from pathlib import Path
3
+
4
+ import numpy as np
5
+ import pandas as pd
6
+ import sqlite3
7
+
8
+ from typing import Dict, List
9
+
10
+ from numpy.typing import NDArray
11
+ import opentims_bruker_bridge as obb
12
+
13
+ from abc import ABC
14
+
15
+ from imspy_core.core.base import RustWrapperObject
16
+ from imspy_core.timstof.frame import TimsFrame
17
+ from imspy_core.timstof.slice import TimsSlice
18
+
19
+ import imspy_connector
20
+ ims = imspy_connector.py_dataset
21
+ import warnings
22
+
23
+ import platform
24
+
25
+ def is_amd64():
26
+ arch = platform.machine().lower()
27
+ return arch in ("amd64", "x86_64")
28
+
29
+
30
+ class AcquisitionMode(RustWrapperObject):
31
+ def __init__(self, mode: str):
32
+ """AcquisitionMode class.
33
+
34
+ Args:
35
+ mode (str): Acquisition mode.
36
+ """
37
+ allowed_modes = ["DDA", "DIA", "UNKNOWN", "PRECURSOR"]
38
+ assert mode in allowed_modes, f"Unknown acquisition mode, use one of {allowed_modes}"
39
+ self.__mode_ptr = ims.PyAcquisitionMode.from_string(mode)
40
+
41
+ @property
42
+ def mode(self) -> str:
43
+ """Get the acquisition mode.
44
+
45
+ Returns:
46
+ str: Acquisition mode.
47
+ """
48
+ return self.__mode_ptr.acquisition_mode
49
+
50
+ @classmethod
51
+ def from_py_ptr(cls, ptr: ims.PyAcquisitionMode):
52
+ """Get an AcquisitionMode from a pointer.
53
+
54
+ Args:
55
+ ptr (pims.AcquisitionMode): Pointer to an acquisition mode.
56
+
57
+ Returns:
58
+ AcquisitionMode: Acquisition mode.
59
+ """
60
+ instance = cls.__new__(cls)
61
+ instance.__mode_ptr = ptr
62
+ return instance
63
+
64
+ def __repr__(self):
65
+ return f"AcquisitionMode({self.mode})"
66
+
67
+ def get_py_ptr(self):
68
+ return self.__mode_ptr
69
+
70
+
71
+ class TimsDataset(ABC):
72
+ def __init__(self, data_path: str, in_memory: bool = False, use_bruker_sdk: bool = True):
73
+ """TimsDataHandle class.
74
+
75
+ Args:
76
+ data_path (str): Path to the data.
77
+ """
78
+ self.__dataset = None
79
+ self.binary_path = None
80
+ self.use_bruker_sdk = use_bruker_sdk
81
+
82
+ if not self.use_bruker_sdk:
83
+ warnings.warn(
84
+ "Warning: SDK-free mode requested. On Linux, calibration will be derived from SDK automatically. "
85
+ "On macOS (no SDK), a simple boundary model is used which may have ~5 Da m/z error on some datasets."
86
+ )
87
+
88
+ self.data_path = data_path
89
+ if data_path[-1] == "/":
90
+ data_path = data_path[:-1]
91
+
92
+ self.experiment_name = data_path.split("/")[-1]
93
+
94
+ self.meta_data = self.__load_meta_data()
95
+ self.global_meta_data_pandas = self.__load_global_meta_data_pandas()
96
+ self.global_meta_data = self.__load_global_meta_data()
97
+ self.tims_calibration = self.__load_tims_calibration()
98
+ self.mz_calibration = self.__load_mz_calibration()
99
+
100
+ try:
101
+ self.precursor_frames = self.meta_data[self.meta_data["MsMsType"] == 0].Id.values.astype(np.int32)
102
+ self.fragment_frames = self.meta_data[self.meta_data["MsMsType"] > 0].Id.values.astype(np.int32)
103
+
104
+ except AttributeError:
105
+ self.precursor_frames = self.meta_data[self.meta_data["MsMsType"] == 0].frame_id.values.astype(np.int32)
106
+ self.fragment_frames = self.meta_data[self.meta_data["MsMsType"] > 0].frame_id.values.astype(np.int32)
107
+
108
+ self.__current_index = 1
109
+
110
+ # if we are on macOS, we only can use the bruker SDK False option, start by getting the OS we are on, use python's os module
111
+ current_os = platform.system()
112
+
113
+ # if bruker_sdk_mode is requested, we need to check for cases where the vendor binary is not available
114
+ if self.use_bruker_sdk:
115
+ if current_os == "Darwin":
116
+ warnings.warn("Warning: MacOS does not support bruker SDK, setting use_bruker_sdk to False.")
117
+ self.use_bruker_sdk = False
118
+
119
+ if not is_amd64():
120
+ warnings.warn("Warning: Only x86_64 architecture is supported by bruker SDK, setting use_bruker_sdk to False.")
121
+ self.use_bruker_sdk = False
122
+
123
+ # Try to find SDK path (needed for calibration even when use_bruker_sdk=False)
124
+ sdk_path = "NO_SDK"
125
+ try:
126
+ for so_path in obb.get_so_paths():
127
+ if Path(so_path).exists():
128
+ sdk_path = so_path
129
+ break
130
+ except Exception:
131
+ pass
132
+
133
+ if not self.use_bruker_sdk:
134
+ # Pass SDK path for calibration derivation, but use_bruker_sdk=False for fast parallel access
135
+ self.__dataset = ims.PyTimsDataset(self.data_path, sdk_path, in_memory, self.use_bruker_sdk)
136
+ self.binary_path = sdk_path
137
+
138
+ else:
139
+ # Try to load the data with the first binary found
140
+ appropriate_found = False
141
+ for so_path in obb.get_so_paths():
142
+ try:
143
+ self.__dataset = ims.PyTimsDataset(self.data_path, so_path, in_memory, self.use_bruker_sdk)
144
+ self.binary_path = so_path
145
+ appropriate_found = True
146
+ break
147
+ except Exception:
148
+ continue
149
+ assert appropriate_found is True, ("No appropriate bruker binary could be found, please check if your "
150
+ "operating system is supported by open-tims-bruker-bridge.")
151
+
152
+ @property
153
+ def acquisition_mode(self) -> str:
154
+ """Get the acquisition mode.
155
+
156
+ Returns:
157
+ str: Acquisition mode.
158
+ """
159
+ return self.__dataset.get_acquisition_mode()
160
+
161
+ @property
162
+ def num_scans(self) -> int:
163
+ """Get the number of scans.
164
+
165
+ Returns:
166
+ int: Number of scans.
167
+ """
168
+ return self.tims_calibration.C1.values[0] + 1
169
+
170
+ @property
171
+ def acquisition_mode_numeric(self) -> int:
172
+ """Get the acquisition mode as a numerical value.
173
+
174
+ Returns:
175
+ int: Acquisition mode as a numerical value.
176
+ """
177
+ return self.__dataset.get_acquisition_mode_numeric()
178
+
179
+ @property
180
+ def frame_count(self) -> int:
181
+ """Get the number of frames.
182
+
183
+ Returns:
184
+ int: Number of frames.
185
+ """
186
+ return self.__dataset.frame_count()
187
+
188
+ def __load_tims_calibration(self) -> pd.DataFrame:
189
+ """Get the calibration.
190
+
191
+ Returns:
192
+ pd.DataFrame: Calibration.
193
+ """
194
+ return pd.read_sql_query("SELECT * from TimsCalibration", sqlite3.connect(self.data_path + "/analysis.tdf"))
195
+
196
+ def __load_mz_calibration(self) -> pd.DataFrame:
197
+ """Get the m/z calibration.
198
+
199
+ Returns:
200
+ pd.DataFrame: m/z calibration.
201
+ """
202
+ return pd.read_sql_query("SELECT * from MzCalibration", sqlite3.connect(self.data_path + "/analysis.tdf"))
203
+
204
+ def __load_meta_data(self) -> pd.DataFrame:
205
+ """Get the meta data.
206
+
207
+ Returns:
208
+ pd.DataFrame: Meta data.
209
+ """
210
+ return pd.read_sql_query("SELECT * from Frames", sqlite3.connect(self.data_path + "/analysis.tdf"))
211
+
212
+ def __load_global_meta_data(self) -> Dict[str, str]:
213
+ """Get the global meta data.
214
+
215
+ Returns:
216
+ pd.DataFrame: Global meta data.
217
+ """
218
+ d = pd.read_sql_query("SELECT * from GlobalMetadata", sqlite3.connect(self.data_path + "/analysis.tdf"))
219
+ return dict(zip(d.Key, d.Value))
220
+
221
+ def __load_global_meta_data_pandas(self) -> pd.DataFrame:
222
+ """Get the global meta data.
223
+
224
+ Returns:
225
+ pd.DataFrame: Global meta data.
226
+ """
227
+ return pd.read_sql_query("SELECT * from GlobalMetadata", sqlite3.connect(self.data_path + "/analysis.tdf"))
228
+
229
+ def get_table(self, table_name: str) -> pd.DataFrame:
230
+ """Get a table.
231
+
232
+ Args:
233
+ table_name (str): Table name.
234
+
235
+ Returns:
236
+ pd.DataFrame: Table.
237
+ """
238
+ return pd.read_sql_query(f"SELECT * from {table_name}", sqlite3.connect(self.data_path + "/analysis.tdf"))
239
+
240
+ @property
241
+ def im_lower(self):
242
+ return float(self.global_meta_data["OneOverK0AcqRangeLower"])
243
+
244
+ @property
245
+ def im_upper(self):
246
+ return float(self.global_meta_data["OneOverK0AcqRangeUpper"])
247
+
248
+ @property
249
+ def mz_lower(self):
250
+ return float(self.global_meta_data["MzAcqRangeLower"])
251
+
252
+ @property
253
+ def mz_upper(self):
254
+ return float(self.global_meta_data["MzAcqRangeUpper"])
255
+
256
+ @property
257
+ def average_cycle_length(self) -> float:
258
+ return np.mean(np.diff(self.meta_data.Time.values))
259
+
260
+ @property
261
+ def description(self) -> str:
262
+ return self.global_meta_data["Description"]
263
+
264
+ def get_tims_frame(self, frame_id: int) -> TimsFrame:
265
+ """Get a TimsFrame.
266
+
267
+ Args:
268
+ frame_id (int): Frame ID.
269
+
270
+ Returns:
271
+ TimsFrame: TimsFrame.
272
+ """
273
+ return TimsFrame.from_py_ptr(self.__dataset.get_frame(frame_id))
274
+
275
+ def get_tims_slice(self, frame_ids: NDArray[np.int32], num_threads: int = 8) -> TimsSlice:
276
+ """Get a TimsFrame.
277
+
278
+ Args:
279
+ frame_ids (int): Frame ID.
280
+ num_threads (int): Number of threads.
281
+
282
+ Returns:
283
+ TimsFrame: TimsFrame.
284
+ """
285
+ return TimsSlice.from_py_tims_slice(self.__dataset.get_slice(frame_ids, num_threads))
286
+
287
+ def tof_to_mz(self, frame_id: int, tof_values: NDArray[np.int32]) -> NDArray[np.float64]:
288
+ """Convert TOF values to m/z values.
289
+
290
+ Args:
291
+ frame_id (int): Frame ID.
292
+ tof_values (NDArray[np.int32]): TOF values.
293
+
294
+ Returns:
295
+ NDArray[np.float64]: m/z values.
296
+ """
297
+ return self.__dataset.tof_to_mz(frame_id, tof_values)
298
+
299
+ def mz_to_tof(self, frame_id: int, mz_values: NDArray[np.float64]) -> NDArray[np.int32]:
300
+ """Convert m/z values to TOF values.
301
+
302
+ Args:
303
+ frame_id (int): Frame ID.
304
+ mz_values (NDArray[np.float64]): m/z values.
305
+
306
+ Returns:
307
+ NDArray[np.int32]: TOF values.
308
+ """
309
+ return self.__dataset.mz_to_tof(frame_id, mz_values)
310
+
311
+ def scan_to_inverse_mobility(self, frame_id: int, scan_values: NDArray[np.int32]) -> NDArray[np.float64]:
312
+ """Convert scan values to inverse mobility values.
313
+
314
+ Args:
315
+ frame_id (int): Frame ID.
316
+ scan_values (NDArray[np.int32]): Scan values.
317
+
318
+ Returns:
319
+ NDArray[np.float64]: Inverse mobility values.
320
+ """
321
+ return self.__dataset.scan_to_inverse_mobility(frame_id, scan_values)
322
+
323
+ def inverse_mobility_to_scan(self, frame_id: int, im_values: NDArray[np.float64]) -> NDArray[np.int32]:
324
+ """Convert inverse mobility values to scan values.
325
+
326
+ Args:
327
+ frame_id (int): Frame ID.
328
+ im_values (NDArray[np.float64]): Inverse mobility values.
329
+
330
+ Returns:
331
+ NDArray[np.int32]: Scan values.
332
+ """
333
+ return self.__dataset.inverse_mobility_to_scan(frame_id, im_values)
334
+
335
+ def compress_zstd(self, values: NDArray[np.uint8]) -> NDArray[np.uint8]:
336
+ """Compress values using ZSTD.
337
+
338
+ Args:
339
+ values (NDArray[np.float64]): Values to compress.
340
+
341
+ Returns:
342
+ NDArray[np.uint8]: Compressed values.
343
+ """
344
+ return self.__dataset.compress_bytes_zstd(values)
345
+
346
+ def decompress_zstd(self, values: NDArray[np.uint8], ignore_first_n: int = 8) -> NDArray[np.uint8]:
347
+ """Decompress values using ZSTD.
348
+
349
+ Args:
350
+ values (NDArray[np.float64]): Values to decompress.
351
+ ignore_first_n (int): Number of bytes to ignore.
352
+
353
+ Returns:
354
+ NDArray[np.uint8]: Decompressed values.
355
+ """
356
+ return self.__dataset.decompress_bytes_zstd(values[ignore_first_n:])
357
+
358
+ def indexed_values_to_compressed_bytes(self,
359
+ scan_values: NDArray[np.int32],
360
+ tof_values: NDArray[np.int32],
361
+ intensity_values: NDArray[np.float64],
362
+ total_scans: int) -> NDArray[np.uint8]:
363
+ """Convert scan and intensity values to bytes.
364
+
365
+ Args:
366
+ scan_values (NDArray[np.int32]): Scan values.
367
+ tof_values (NDArray[np.int32]): TOF values.
368
+ intensity_values (NDArray[np.float64]): Intensity values.
369
+ total_scans (int): Total number of scans.
370
+
371
+ Returns:
372
+ NDArray[np.uint8]: Bytes.
373
+ """
374
+ return self.__dataset.scan_tof_intensities_to_compressed_u8(
375
+ scan_values,
376
+ tof_values,
377
+ intensity_values.astype(np.int32),
378
+ total_scans
379
+ )
380
+
381
+ def compress_frames(self, frames: List[TimsFrame], num_threads: int = 4) -> List[NDArray[np.uint8]]:
382
+ """Compress a collection of frames.
383
+
384
+ Args:
385
+ frames (List[TimsFrame]): List of frames.
386
+ num_threads (int): Number of threads to use.
387
+
388
+ Returns:
389
+ List[NDArray[np.uint8]]: List of compressed bytes.
390
+ """
391
+ return self.__dataset.compress_frames([f.get_py_ptr() for f in frames], self.num_scans, num_threads)
392
+
393
+ def bytes_to_indexed_values(self, values: NDArray[np.uint8]) \
394
+ -> (NDArray[np.int32], NDArray[np.int32], NDArray[np.float64]):
395
+ """Convert bytes to scan, tof, and intensity values.
396
+
397
+ Args:
398
+ values (NDArray[np.uint8]): Bytes.
399
+
400
+ Returns:
401
+ NDArray[np.int32]: Scan values.
402
+ NDArray[np.int32]: TOF values.
403
+ NDArray[np.float64]: Intensity values.
404
+ """
405
+ scan_values, tof_values, intensity_values = self.__dataset.u8_to_scan_tof_intensities(values)
406
+ return scan_values, tof_values, intensity_values.astype(np.float64)
407
+
408
+ def __iter__(self):
409
+ return self
410
+
411
+ def __next__(self):
412
+ if self.__current_index <= self.frame_count:
413
+ frame_ptr = self.__dataset.get_frame(self.__current_index)
414
+ self.__current_index += 1
415
+ if frame_ptr is not None:
416
+ return TimsFrame.from_py_ptr(frame_ptr)
417
+ else:
418
+ raise ValueError(f"Frame pointer is None for valid index: {self.__current_index}")
419
+ else:
420
+ self.__current_index = 1 # Reset for next iteration
421
+ raise StopIteration
422
+
423
+ def __getitem__(self, index):
424
+ if isinstance(index, slice):
425
+ return self.get_tims_slice(np.arange(index.start, index.stop, index.step).astype(np.int32))
426
+ return self.get_tims_frame(index)
427
+
428
+ def __repr__(self):
429
+ return f"TimsDataset({self.data_path})"