imspy-core 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,364 @@
1
+ import sqlite3
2
+ from typing import List, Optional
3
+
4
+ import pandas as pd
5
+
6
+ from imspy_core.core.base import RustWrapperObject
7
+ from imspy_core.timstof.data import TimsDataset
8
+ from imspy_core.timstof.frame import TimsFrame
9
+
10
+ import imspy_connector
11
+ ims = imspy_connector.py_dda
12
+ import warnings
13
+
14
+
15
+
16
+ class PrecursorDDA(RustWrapperObject):
17
+ """DDA Precursor class.
18
+
19
+ Note:
20
+ The to_sage_precursor() method has been moved to imspy-search package.
21
+ Use imspy_search.dda_extensions.to_sage_precursor(precursor) instead.
22
+ """
23
+ def __init__(self, frame_id: int, precursor_id: int, highest_intensity_mz: float, average_mz: float,
24
+ inverse_ion_mobility: float, collision_energy: float, precuror_total_intensity: float,
25
+ isolation_mz: float, isolation_width: float, mono_mz: Optional[float] = None, charge: Optional[int] = None):
26
+ self._precursor_ptr = ims.PyDDAPrecursor(
27
+ frame_id, precursor_id, highest_intensity_mz, average_mz, inverse_ion_mobility, collision_energy,
28
+ precuror_total_intensity, isolation_mz, isolation_width, mono_mz, charge
29
+ )
30
+
31
+ @classmethod
32
+ def from_py_ptr(cls, precursor: ims.PyDDAPrecursor):
33
+ instance = cls.__new__(cls)
34
+ instance._precursor_ptr = precursor
35
+ return instance
36
+
37
+ @property
38
+ def frame_id(self) -> int:
39
+ return self._precursor_ptr.frame_id
40
+
41
+ @property
42
+ def precursor_id(self) -> int:
43
+ return self._precursor_ptr.precursor_id
44
+
45
+ @property
46
+ def mono_mz(self) -> Optional[float]:
47
+ return self._precursor_ptr.mono_mz
48
+
49
+ @property
50
+ def highest_intensity_mz(self) -> float:
51
+ return self._precursor_ptr.highest_intensity_mz
52
+
53
+ @property
54
+ def average_mz(self) -> float:
55
+ return self._precursor_ptr.average_mz
56
+
57
+ @property
58
+ def charge(self) -> Optional[int]:
59
+ return self._precursor_ptr.charge
60
+
61
+ @property
62
+ def inverse_ion_mobility(self) -> float:
63
+ return self._precursor_ptr.inverse_ion_mobility
64
+
65
+ @property
66
+ def collision_energy(self) -> float:
67
+ return self._precursor_ptr.collision_energy
68
+
69
+ @property
70
+ def precuror_total_intensity(self) -> float:
71
+ return self._precursor_ptr.precuror_total_intensity
72
+
73
+ @property
74
+ def isolation_mz(self) -> float:
75
+ return self._precursor_ptr.isolation_mz
76
+
77
+ @property
78
+ def isolation_width(self) -> float:
79
+ return self._precursor_ptr.isolation_width
80
+
81
+ def __repr__(self):
82
+ return (f"DDAPrecursor(frame_id={self.frame_id}, precursor_id={self.precursor_id}, "
83
+ f"highest_intensity_mz={self.highest_intensity_mz}, average_mz={self.average_mz}, "
84
+ f"inverse_ion_mobility={self.inverse_ion_mobility}, collision_energy={self.collision_energy}, "
85
+ f"precuror_total_intensity={self.precuror_total_intensity}, isolation_mz={self.isolation_mz}, "
86
+ f"isolation_width={self.isolation_width}, mono_mz={self.mono_mz}, charge={self.charge})")
87
+
88
+ def get_py_ptr(self):
89
+ return self._precursor_ptr
90
+
91
+
92
+ class TimsDatasetDDA(TimsDataset, RustWrapperObject):
93
+ """DDA TimsDataset class.
94
+
95
+ Note:
96
+ The get_sage_processed_precursors() method has been moved to imspy-search package.
97
+ Use imspy_search.dda_extensions.get_sage_processed_precursors(dataset, ...) instead.
98
+ """
99
+
100
+ def __init__(self, data_path: str, in_memory: bool = False, use_bruker_sdk: bool = True, rename_id: bool = True):
101
+ super().__init__(data_path=data_path, in_memory=in_memory, use_bruker_sdk=use_bruker_sdk)
102
+ self.__dataset = ims.PyTimsDatasetDDA(self.data_path, self.binary_path, in_memory, self.use_bruker_sdk)
103
+ if rename_id:
104
+ self.meta_data = self.meta_data.rename(columns={"Id": "frame_id"})
105
+ self.fragmented_precursors = self._load_selected_precursors().rename(
106
+ columns={
107
+ 'Id': 'precursor_id',
108
+ 'LargestPeakMz': 'largest_peak_mz',
109
+ 'AverageMz': 'average_mz',
110
+ 'MonoisotopicMz': 'monoisotopic_mz',
111
+ 'Charge': 'charge',
112
+ 'ScanNumber': 'average_scan',
113
+ 'Intensity': 'intensity',
114
+ 'Parent': 'parent_id',
115
+ }
116
+ )
117
+ self.pasef_meta_data = self._load_pasef_meta_data().rename(
118
+ columns={
119
+ 'Frame': 'frame_id',
120
+ 'ScanNumBegin': 'scan_begin',
121
+ 'ScanNumEnd': 'scan_end',
122
+ 'IsolationMz': 'isolation_mz',
123
+ 'IsolationWidth': 'isolation_width',
124
+ 'CollisionEnergy': 'collision_energy',
125
+ 'Precursor': 'precursor_id'
126
+ }
127
+ )
128
+
129
+ def _load_selected_precursors(self):
130
+ """Get precursors selected for fragmentation.
131
+
132
+ Returns:
133
+ pd.DataFrame: Precursors selected for fragmentation.
134
+ """
135
+ return pd.read_sql_query("SELECT * from Precursors", sqlite3.connect(self.data_path + "/analysis.tdf"))
136
+
137
+ def _load_pasef_meta_data(self):
138
+ """Get PASEF meta data for DDA.
139
+
140
+ Returns:
141
+ pd.DataFrame: PASEF meta data.
142
+ """
143
+ return pd.read_sql_query("SELECT * from PasefFrameMsMsInfo",
144
+ sqlite3.connect(self.data_path + "/analysis.tdf"))
145
+
146
+ def get_pasef_fragments(self, num_threads: int = 1) -> pd.DataFrame:
147
+ """Get PASEF fragments.
148
+
149
+ Args: num_threads (int, optional): Number of threads. Defaults to 1. CAUTION: As long as connection to
150
+ datasets is established via bruker so / dll, using multiple threads is unstable.
151
+
152
+ Returns:
153
+ List[FragmentDDA]: List of PASEF fragments.
154
+ """
155
+
156
+ if self.use_bruker_sdk:
157
+ warnings.warn("Using multiple threads is currently not supported when using Bruker SDK, "
158
+ "setting num_threads to 1.")
159
+ num_threads = 1
160
+
161
+ pasef_fragments = [FragmentDDA.from_py_ptr(fragment)
162
+ for fragment in self.__dataset.get_pasef_fragments(num_threads)]
163
+
164
+ pasef_fragments = pd.DataFrame({
165
+ 'frame_id': [s.frame_id for s in pasef_fragments],
166
+ 'precursor_id': [s.precursor_id for s in pasef_fragments],
167
+ 'raw_data': [s.selected_fragment for s in pasef_fragments]
168
+ })
169
+
170
+ A = pd.merge(
171
+ pasef_fragments, self.pasef_meta_data,
172
+ left_on=['precursor_id', 'frame_id'],
173
+ right_on=['precursor_id', 'frame_id'],
174
+ how='inner',
175
+ )
176
+
177
+ B = pd.merge(
178
+ A, self.fragmented_precursors,
179
+ left_on=['precursor_id'],
180
+ right_on=['precursor_id'],
181
+ how='inner'
182
+ )
183
+
184
+ time = self.meta_data[['frame_id']]
185
+ time.insert(time.shape[1], "time", self.meta_data['Time'] / 60)
186
+
187
+ return pd.merge(time, B, left_on=['frame_id'], right_on=['frame_id'], how='inner')
188
+
189
+ def get_precursor_frames(self, min_intensity: float = 75, max_peaks: int = 500, num_threads: int = 4) -> List[TimsFrame]:
190
+ """
191
+ Get precursor frames.
192
+ Args:
193
+ min_intensity: minimum intensity a peak must have to be considered
194
+ max_peaks: maximum number of peaks to consider, frames will be sorted by intensity and only the top max_peaks will be considered
195
+ num_threads: number of threads to use for processing
196
+
197
+ Returns:
198
+ List[TimsFrame]: List of all precursor frames
199
+ """
200
+ precursor_frames = [TimsFrame.from_py_ptr(frame) for frame in self.__dataset.get_precursor_frames(min_intensity, max_peaks, num_threads)]
201
+ return precursor_frames
202
+
203
+
204
+ def get_selected_precursors(self) -> List[PrecursorDDA]:
205
+ """
206
+ Get meta data for all selected precursors
207
+ Returns:
208
+ List[PrecursorDDA]: List of all selected precursors
209
+ """
210
+ return [PrecursorDDA.from_py_ptr(precursor) for precursor in self.__dataset.get_selected_precursors()]
211
+
212
+ def sample_pasef_fragments_random(self,
213
+ scan_apex_values: List[int],
214
+ scan_max_value: int,
215
+ ) -> TimsFrame:
216
+ """
217
+ Sample PASEF fragments randomly from the dataset.
218
+ Args:
219
+ scan_apex_values: List of scan apex values to sample from
220
+ scan_max_value: maximum scan value to sample from
221
+
222
+ Returns:
223
+ TimsFrame: sampled PASEF fragments
224
+ """
225
+
226
+ return TimsFrame.from_py_ptr(
227
+ self.__dataset.sample_pasef_fragments_random(scan_apex_values, scan_max_value)
228
+ )
229
+
230
+ def sample_precursor_signal(self, num_frames: int, max_intensity: float, take_probability: float) -> TimsFrame:
231
+ """
232
+ Sample precursor signal from the dataset.
233
+ Args:
234
+ num_frames: number of frames to sample
235
+ max_intensity: maximum intensity of the sampled frames
236
+ take_probability: probability of taking a frame
237
+
238
+ Returns:
239
+ TimsFrame: sampled precursor signal
240
+ """
241
+
242
+ return TimsFrame.from_py_ptr(
243
+ self.__dataset.sample_precursor_signal(num_frames, max_intensity, take_probability)
244
+ )
245
+
246
+ def __repr__(self):
247
+ return (f"TimsDatasetDDA(data_path={self.data_path}, num_frames={self.frame_count}, "
248
+ f"fragmented_precursors={self.fragmented_precursors.shape[0]})")
249
+
250
+ def get_py_ptr(self):
251
+ return self.__dataset
252
+
253
+ @classmethod
254
+ def from_py_ptr(cls, ptr):
255
+ instance = cls.__new__(cls)
256
+ instance.__dataset = ptr
257
+ return instance
258
+
259
+ @classmethod
260
+ def with_mz_calibration(cls, data_path: str, in_memory: bool, tof_intercept: float, tof_slope: float):
261
+ """Create a DDA dataset with custom m/z calibration coefficients.
262
+
263
+ This method allows providing externally-derived m/z calibration coefficients
264
+ (e.g., from linear regression on SDK data) for accurate m/z conversion without
265
+ requiring the Bruker SDK at runtime.
266
+
267
+ The calibration formula is: sqrt(mz) = tof_intercept + tof_slope * tof_index
268
+
269
+ Args:
270
+ data_path: Path to the .d folder
271
+ in_memory: Whether to load all data into memory
272
+ tof_intercept: Intercept for sqrt(mz) = intercept + slope * tof
273
+ tof_slope: Slope for sqrt(mz) = intercept + slope * tof
274
+
275
+ Returns:
276
+ TimsDatasetDDA with custom m/z calibration
277
+
278
+ Example:
279
+ # Derive calibration from SDK (e.g., on Linux)
280
+ sdk_data = TimsDatasetDDA(path, use_bruker_sdk=True)
281
+ frame = sdk_data.get_tims_frame(1)
282
+ coeffs = np.polyfit(frame.tof, np.sqrt(frame.mz), 1)
283
+ slope, intercept = coeffs[0], coeffs[1]
284
+
285
+ # Use calibration on macOS (or for parallel processing)
286
+ dataset = TimsDatasetDDA.with_mz_calibration(path, False, intercept, slope)
287
+ """
288
+ instance = cls.__new__(cls)
289
+ instance.data_path = data_path
290
+ instance.binary_path = "CALIBRATED"
291
+ instance.use_bruker_sdk = False
292
+ instance._TimsDatasetDDA__dataset = ims.PyTimsDatasetDDA.with_mz_calibration(
293
+ data_path, in_memory, tof_intercept, tof_slope
294
+ )
295
+
296
+ # Load metadata
297
+ instance.meta_data = pd.read_sql_query(
298
+ "SELECT * from Frames",
299
+ sqlite3.connect(data_path + "/analysis.tdf")
300
+ ).rename(columns={"Id": "frame_id"})
301
+
302
+ instance.fragmented_precursors = pd.read_sql_query(
303
+ "SELECT * from Precursors",
304
+ sqlite3.connect(data_path + "/analysis.tdf")
305
+ ).rename(columns={
306
+ 'Id': 'precursor_id',
307
+ 'LargestPeakMz': 'largest_peak_mz',
308
+ 'AverageMz': 'average_mz',
309
+ 'MonoisotopicMz': 'monoisotopic_mz',
310
+ 'Charge': 'charge',
311
+ 'ScanNumber': 'average_scan',
312
+ 'Intensity': 'intensity',
313
+ 'Parent': 'parent_id',
314
+ })
315
+
316
+ instance.pasef_meta_data = pd.read_sql_query(
317
+ "SELECT * from PasefFrameMsMsInfo",
318
+ sqlite3.connect(data_path + "/analysis.tdf")
319
+ ).rename(columns={
320
+ 'Frame': 'frame_id',
321
+ 'ScanNumBegin': 'scan_begin',
322
+ 'ScanNumEnd': 'scan_end',
323
+ 'IsolationMz': 'isolation_mz',
324
+ 'IsolationWidth': 'isolation_width',
325
+ 'CollisionEnergy': 'collision_energy',
326
+ 'Precursor': 'precursor_id'
327
+ })
328
+
329
+ return instance
330
+
331
+
332
+ class FragmentDDA(RustWrapperObject):
333
+ def __init__(self, frame_id: int, precursor_id: int, collision_energy: float, selected_fragment: TimsFrame):
334
+ self._fragment_ptr = ims.PyTimsFragmentDDA(frame_id, precursor_id, collision_energy, selected_fragment.get_py_ptr())
335
+
336
+ @classmethod
337
+ def from_py_ptr(cls, fragment: ims.PyTimsFragmentDDA):
338
+ instance = cls.__new__(cls)
339
+ instance._fragment_ptr = fragment
340
+ return instance
341
+
342
+ @property
343
+ def frame_id(self) -> int:
344
+ return self._fragment_ptr.frame_id
345
+
346
+ @property
347
+ def precursor_id(self) -> int:
348
+ return self._fragment_ptr.precursor_id
349
+
350
+ @property
351
+ def collision_energy(self) -> float:
352
+ return self._fragment_ptr.collision_energy
353
+
354
+ @property
355
+ def selected_fragment(self) -> TimsFrame:
356
+ return TimsFrame.from_py_ptr(self._fragment_ptr.selected_fragment)
357
+
358
+ def __repr__(self):
359
+ return f"FragmentDDA(frame_id={self.frame_id}, precursor_id={self.precursor_id}, " \
360
+ f"collision_energy={self.collision_energy}, " \
361
+ f"selected_fragment={self.selected_fragment})"
362
+
363
+ def get_py_ptr(self):
364
+ return self._fragment_ptr
@@ -0,0 +1,131 @@
1
+ import sqlite3
2
+ from typing import List
3
+
4
+ from imspy_core.core.base import RustWrapperObject
5
+ from imspy_core.timstof.data import TimsDataset
6
+ import pandas as pd
7
+
8
+ import imspy_connector
9
+
10
+ from imspy_core.timstof.frame import TimsFrame
11
+
12
+ ims = imspy_connector.py_dia
13
+
14
+
15
+ class TimsDatasetDIA(TimsDataset, RustWrapperObject):
16
+ def __init__(self, data_path: str, in_memory: bool = False, use_bruker_sdk: bool = True):
17
+ super().__init__(data_path=data_path, in_memory=in_memory, use_bruker_sdk=use_bruker_sdk)
18
+ self.__dataset = ims.PyTimsDatasetDIA(self.data_path, self.binary_path, in_memory, self.use_bruker_sdk)
19
+
20
+ @property
21
+ def dia_ms_ms_windows(self):
22
+ """Get PASEF meta data for DIA.
23
+
24
+ Returns:
25
+ pd.DataFrame: PASEF meta data.
26
+ """
27
+ return pd.read_sql_query("SELECT * from DiaFrameMsMsWindows",
28
+ sqlite3.connect(self.data_path + "/analysis.tdf"))
29
+
30
+ @property
31
+ def dia_ms_ms_info(self):
32
+ """Get DIA MS/MS info.
33
+
34
+ Returns:
35
+ pd.DataFrame: DIA MS/MS info.
36
+ """
37
+ return pd.read_sql_query("SELECT * from DiaFrameMsMsInfo",
38
+ sqlite3.connect(self.data_path + "/analysis.tdf"))
39
+
40
+ def sample_precursor_signal(self, num_frames: int, max_intensity: float = 25.0, take_probability: float = 0.5) -> TimsFrame:
41
+ """Sample precursor signal.
42
+
43
+ Args:
44
+ num_frames: Number of frames.
45
+ max_intensity: Maximum intensity.
46
+ take_probability: Probability to take signals from sampled frames.
47
+
48
+ Returns:
49
+ TimsFrame: Frame.
50
+ """
51
+
52
+ assert num_frames > 0, "Number of frames must be greater than 0."
53
+ assert 0 < take_probability <= 1, " Probability to take signals from sampled frames must be between 0 and 1."
54
+
55
+ return TimsFrame.from_py_ptr(self.__dataset.sample_precursor_signal(num_frames, max_intensity, take_probability))
56
+
57
+ def sample_fragment_signal(self, num_frames: int, window_group: int, max_intensity: float = 25.0, take_probability: float = 0.5) -> TimsFrame:
58
+ """Sample fragment signal.
59
+
60
+ Args:
61
+ num_frames: Number of frames.
62
+ window_group: Window group to take frames from.
63
+ max_intensity: Maximum intensity.
64
+ take_probability: Probability to take signals from sampled frames.
65
+
66
+ Returns:
67
+ TimsFrame: Frame.
68
+ """
69
+
70
+ assert num_frames > 0, "Number of frames must be greater than 0."
71
+ assert 0 < take_probability <= 1, " Probability to take signals from sampled frames must be between 0 and 1."
72
+
73
+ return TimsFrame.from_py_ptr(self.__dataset.sample_fragment_signal(num_frames, window_group, max_intensity, take_probability))
74
+
75
+ def read_compressed_data_full(self) -> List[bytes]:
76
+ """Read compressed data.
77
+
78
+ Returns:
79
+ List[bytes]: Compressed data.
80
+ """
81
+ return self.__dataset.read_compressed_data_full()
82
+
83
+ @classmethod
84
+ def from_py_ptr(cls, obj):
85
+ instance = cls.__new__(cls)
86
+ instance.__dataset = obj
87
+ return instance
88
+
89
+ @classmethod
90
+ def with_mz_calibration(cls, data_path: str, in_memory: bool, tof_intercept: float, tof_slope: float):
91
+ """Create a DIA dataset with custom m/z calibration coefficients.
92
+
93
+ This method allows providing externally-derived m/z calibration coefficients
94
+ (e.g., from linear regression on SDK data) for accurate m/z conversion without
95
+ requiring the Bruker SDK at runtime.
96
+
97
+ The calibration formula is: sqrt(mz) = tof_intercept + tof_slope * tof_index
98
+
99
+ Args:
100
+ data_path: Path to the .d folder
101
+ in_memory: Whether to load all data into memory
102
+ tof_intercept: Intercept for sqrt(mz) = intercept + slope * tof
103
+ tof_slope: Slope for sqrt(mz) = intercept + slope * tof
104
+
105
+ Returns:
106
+ TimsDatasetDIA with custom m/z calibration
107
+ """
108
+ instance = cls.__new__(cls)
109
+ instance.data_path = data_path
110
+ instance.binary_path = "CALIBRATED"
111
+ instance.use_bruker_sdk = False
112
+ instance._TimsDatasetDIA__dataset = ims.PyTimsDatasetDIA.with_mz_calibration(
113
+ data_path, in_memory, tof_intercept, tof_slope
114
+ )
115
+
116
+ # Load metadata (needed for some properties)
117
+ instance.meta_data = pd.read_sql_query(
118
+ "SELECT * from Frames",
119
+ sqlite3.connect(data_path + "/analysis.tdf")
120
+ )
121
+ instance.global_meta_data = dict(zip(
122
+ *pd.read_sql_query(
123
+ "SELECT * from GlobalMetadata",
124
+ sqlite3.connect(data_path + "/analysis.tdf")
125
+ ).values.T
126
+ ))
127
+
128
+ return instance
129
+
130
+ def get_py_ptr(self):
131
+ return self.__dataset