shadowfax 0.1.0__py3-none-manylinux_2_24_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
shadowfax/__init__.py ADDED
@@ -0,0 +1 @@
1
+ from .shadowfax import PileupStream, count_bases
shadowfax/shadowfax.py ADDED
@@ -0,0 +1,374 @@
1
+ import ctypes
2
+ import queue
3
+ import threading
4
+ from concurrent.futures import ThreadPoolExecutor
5
+ from abc import ABC, abstractmethod
6
+ from pathlib import Path
7
+
8
+ WINDOW_SIZE = 16384
9
+
10
+ class Context(ctypes.Structure):
11
+ pass
12
+
13
+ class Flagstat(ctypes.Structure):
14
+ _fields_ = [
15
+ ('n_reads', ctypes.c_uint64),
16
+ ('n_mapped', ctypes.c_uint64),
17
+ ('n_pair_all', ctypes.c_uint64),
18
+ ('n_pair_map', ctypes.c_uint64),
19
+ ('n_pair_good', ctypes.c_uint64),
20
+ ('n_sgltn', ctypes.c_uint64),
21
+ ('n_read1', ctypes.c_uint64),
22
+ ('n_read2', ctypes.c_uint64),
23
+ ('n_dup', ctypes.c_uint64),
24
+ ('n_diffchr', ctypes.c_uint64),
25
+ ('n_diffhigh', ctypes.c_uint64),
26
+ ('n_secondary', ctypes.c_uint64),
27
+ ('n_supp', ctypes.c_uint64),
28
+ ('n_primary', ctypes.c_uint64),
29
+ ('n_pmapped', ctypes.c_uint64),
30
+ ('n_pdup', ctypes.c_uint64),
31
+ ]
32
+
33
+ class FlagstatStream(ctypes.Structure):
34
+ pass
35
+
36
+ class PileupStreamC(ctypes.Structure):
37
+ pass
38
+
39
+ class DepthStream(ctypes.Structure):
40
+ pass
41
+
42
+ class _PileupBatchC(ctypes.Structure):
43
+ _fields_ = [
44
+ ('n_windows', ctypes.c_uint64),
45
+ ('start_pos', ctypes.c_uint64),
46
+ ('end_pos', ctypes.c_uint64),
47
+ ('ref_id', ctypes.c_uint32),
48
+ ('data', ctypes.c_void_p),
49
+ ]
50
+
51
+ def to_dict(self):
52
+ return ctypes_to_dict(self)
53
+
54
+
55
+ class PileupBatch:
56
+ __slots__ = ("_stream", "_ptr")
57
+
58
+ def __init__(self, stream, ptr):
59
+ self._stream = stream
60
+ self._ptr = ptr
61
+
62
+ def __del__(self):
63
+ stream = getattr(self, "_stream", None)
64
+ ptr = getattr(self, "_ptr", None)
65
+ if stream is None or ptr is None:
66
+ return
67
+ lib.shadowfax_pileup_stream_batch_destroy(stream, ptr)
68
+
69
+ def __getattr__(self, name):
70
+ return getattr(self._ptr.contents, name)
71
+
72
+ @property
73
+ def has_data(self):
74
+ return bool(self._ptr.contents.data)
75
+
76
+ class DepthBatch(ctypes.Structure):
77
+ pass
78
+
79
+ class DepthBatchData(ctypes.Structure):
80
+ _fields_ = [
81
+ ('buffer', ctypes.POINTER(ctypes.c_uint32)),
82
+ ('n_regions', ctypes.c_uint64),
83
+ ]
84
+
85
+ def to_buffer(self):
86
+ array_type = ctypes.c_uint32 * self.n_regions * WINDOW_SIZE
87
+ c_array = ctypes.cast(self.buffer, ctypes.POINTER(array_type)).contents
88
+ return c_array
89
+
90
+
91
+ class _VariantC(ctypes.Structure):
92
+ _fields_ = [
93
+ ("pos", ctypes.c_uint32),
94
+ ("ref", ctypes.c_char),
95
+ ("alt", ctypes.c_char),
96
+ ("_alignment_padding", ctypes.c_char * 2),
97
+ ]
98
+
99
+
100
+ class _BaseCountsC(ctypes.Structure):
101
+ _fields_ = [
102
+ ("a_count", ctypes.c_uint32),
103
+ ("c_count", ctypes.c_uint32),
104
+ ("g_count", ctypes.c_uint32),
105
+ ("t_count", ctypes.c_uint32),
106
+ ]
107
+
108
+
109
+ class BaseCounts:
110
+ __slots__ = ("a_count", "c_count", "g_count", "t_count")
111
+
112
+ def __init__(self, a_count=0, c_count=0, g_count=0, t_count=0):
113
+ self.a_count = a_count
114
+ self.c_count = c_count
115
+ self.g_count = g_count
116
+ self.t_count = t_count
117
+
118
+ def to_dict(self):
119
+ return {
120
+ "a_count": self.a_count,
121
+ "c_count": self.c_count,
122
+ "g_count": self.g_count,
123
+ "t_count": self.t_count,
124
+ }
125
+
126
+
127
+ class BamFlagstat(ctypes.Structure):
128
+ _fields_ = [
129
+ ('passed', Flagstat),
130
+ ('failed', Flagstat),
131
+ ]
132
+
133
+ def to_dict(self):
134
+ return ctypes_to_dict(self)
135
+
136
+ class FlagstatBatch(ctypes.Structure):
137
+ _fields_ = [
138
+ ('data', BamFlagstat),
139
+ ('bytes_processed', ctypes.c_uint64),
140
+ ('total_bytes', ctypes.c_uint64),
141
+ ]
142
+
143
+ def to_dict(self):
144
+ return ctypes_to_dict(self)
145
+
146
+ def ctypes_to_dict(obj):
147
+ result = {}
148
+ for field, _ in obj._fields_:
149
+ value = getattr(obj, field)
150
+ if isinstance(value, ctypes.Structure):
151
+ result[field] = ctypes_to_dict(value)
152
+ else:
153
+ result[field] = value
154
+ return result
155
+
156
+ lib_path = Path(__file__).resolve().parent / '_native' / 'linux_x86_64' / 'libshadowfax.so'
157
+
158
+ #lib = ctypes.cdll.LoadLibrary('./shadowfax/lib/libshadowfax.so')
159
+ lib = ctypes.cdll.LoadLibrary(str(lib_path))
160
+
161
+ lib.shadowfax_context_create.restype = ctypes.POINTER(Context)
162
+
163
+
164
+ lib.shadowfax_flagstat_stream_create.argtypes = [
165
+ ctypes.c_char_p, # bam_path
166
+ ctypes.c_uint64, # input_batch_size
167
+ ctypes.c_uint64, # data_limit
168
+ ]
169
+ lib.shadowfax_flagstat_stream_create.restype = ctypes.POINTER(FlagstatStream)
170
+
171
+ lib.shadowfax_flagstat_stream_next.argtypes = [
172
+ ctypes.POINTER(FlagstatStream),
173
+ ctypes.POINTER(FlagstatBatch),
174
+ ]
175
+
176
+ lib.shadowfax_flagstat_stream_done.argtypes = [
177
+ ctypes.POINTER(FlagstatStream),
178
+ ]
179
+ lib.shadowfax_flagstat_stream_done.restype = ctypes.c_bool
180
+
181
+
182
+ lib.shadowfax_pileup_stream_create.argtypes = [
183
+ ctypes.POINTER(Context),
184
+ ctypes.c_char_p, # bam_path
185
+ ctypes.c_uint64, # input_batch_size
186
+ ctypes.c_uint64, # data_limit
187
+ ]
188
+ lib.shadowfax_pileup_stream_create.restype = ctypes.POINTER(PileupStreamC)
189
+
190
+ lib.shadowfax_pileup_stream_done.argtypes = [
191
+ ctypes.POINTER(PileupStreamC),
192
+ ]
193
+ lib.shadowfax_pileup_stream_done.restype = ctypes.c_bool
194
+
195
+ lib.shadowfax_pileup_stream_next.argtypes = [
196
+ ctypes.POINTER(PileupStreamC),
197
+ ]
198
+ lib.shadowfax_pileup_stream_next.restype = ctypes.POINTER(_PileupBatchC)
199
+
200
+ lib.shadowfax_pileup_stream_batch_destroy.argtypes = [
201
+ ctypes.POINTER(PileupStreamC),
202
+ ctypes.POINTER(_PileupBatchC),
203
+ ]
204
+
205
+ lib.shadowfax_depth_stream_create.argtypes = [
206
+ ctypes.POINTER(Context),
207
+ ]
208
+ lib.shadowfax_depth_stream_create.restype = ctypes.POINTER(DepthStream)
209
+
210
+ lib.shadowfax_depth_batch_create.argtypes = [
211
+ ]
212
+ lib.shadowfax_depth_batch_create.restype = ctypes.POINTER(DepthBatch)
213
+
214
+
215
+
216
+ lib.shadowfax_depth_stream_done.argtypes = [
217
+ ctypes.POINTER(DepthStream),
218
+ ]
219
+ lib.shadowfax_depth_stream_done.restype = ctypes.c_bool
220
+
221
+ lib.shadowfax_depth_stream_next.argtypes = [
222
+ ctypes.POINTER(DepthStream),
223
+ ctypes.c_void_p,
224
+ ctypes.c_uint64,
225
+ ctypes.POINTER(DepthBatch),
226
+ ]
227
+
228
+ lib.shadowfax_depth_batch_get_data.argtypes = [
229
+ ctypes.POINTER(DepthBatch),
230
+ ctypes.POINTER(DepthBatchData),
231
+ ]
232
+
233
+ lib.shadowfax_count_bases.argtypes = [
234
+ ctypes.POINTER(Context),
235
+ ctypes.POINTER(_VariantC),
236
+ ctypes.c_uint64,
237
+ ctypes.c_void_p,
238
+ ctypes.c_uint64,
239
+ ctypes.POINTER(_BaseCountsC),
240
+ ]
241
+ lib.shadowfax_count_bases.restype = None
242
+
243
+
244
+ executor = ThreadPoolExecutor()
245
+
246
+ ctx = None
247
+ def get_context():
248
+ global ctx
249
+ if ctx is None:
250
+ ctx = lib.shadowfax_context_create()
251
+ return ctx
252
+
253
+
254
+ def _flagstat_thread(q, bam_path):
255
+ stats = lib.shadowfax_flagstat_stream_create(bam_path.encode(), ctypes.c_uint64(128), ctypes.c_uint64(1))
256
+ #print(stats.n_reads[0])
257
+ return stats
258
+
259
+
260
+ def flagstat(bam_path):
261
+ q = queue.Queue()
262
+
263
+ future = executor.submit(_flagstat_thread, q, bam_path)
264
+
265
+ return future
266
+
267
+ def flagstat_stream(bam_path='', batch_size=128*1024*1024):
268
+
269
+ stream = lib.shadowfax_flagstat_stream_create(bam_path.encode(), batch_size, 1*1024*1024*1024)
270
+
271
+ def gen():
272
+ batch = FlagstatBatch()
273
+
274
+ while not lib.shadowfax_flagstat_stream_done(stream):
275
+ lib.shadowfax_flagstat_stream_next(stream, ctypes.byref(batch))
276
+ yield batch
277
+
278
+ return gen()
279
+
280
+ class Stream(ABC):
281
+ def __init__(self):
282
+ self.q = queue.Queue(maxsize=1)
283
+ self.started = False
284
+
285
+ def __iter__(self):
286
+ return self
287
+
288
+ def __next__(self):
289
+ if not self.started:
290
+ t = threading.Thread(target=self.thread, daemon=True)
291
+ t.start()
292
+ self.started = True
293
+ item = self.q.get()
294
+ if item is None:
295
+ raise StopIteration
296
+ return item
297
+
298
+ def __ror__(self, input_stream):
299
+ self.input_stream = input_stream
300
+ return self
301
+
302
+ @abstractmethod
303
+ def thread(self):
304
+ pass
305
+
306
+
307
+ class PileupStream(Stream):
308
+ def __init__(self, bam_path='', input_batch_size=1*1024*1024*1024, output_batch_size=1*1024*1024*1024):
309
+ super().__init__()
310
+
311
+ ctx = get_context()
312
+ self.stream = lib.shadowfax_pileup_stream_create(ctx, bam_path.encode(), input_batch_size, output_batch_size)
313
+ def thread(self):
314
+ while not lib.shadowfax_pileup_stream_done(self.stream):
315
+ batch_ptr = lib.shadowfax_pileup_stream_next(self.stream)
316
+ if not batch_ptr:
317
+ continue
318
+ batch = PileupBatch(self.stream, batch_ptr)
319
+ self.q.put(batch)
320
+ self.q.put(None)
321
+
322
+
323
+ class ReadDepthStream(Stream):
324
+ def __init__(self):
325
+ super().__init__()
326
+
327
+ ctx = get_context()
328
+ self.stream = lib.shadowfax_depth_stream_create(ctx)
329
+
330
+ def thread(self):
331
+ batch = lib.shadowfax_depth_batch_create()
332
+ batch_data = DepthBatchData()
333
+
334
+ for pileup_batch in self.input_stream:
335
+ lib.shadowfax_depth_stream_next(self.stream, pileup_batch.data, pileup_batch.n_windows, batch)
336
+ lib.shadowfax_depth_batch_get_data(batch, ctypes.byref(batch_data))
337
+ self.q.put(batch_data)
338
+
339
+
340
+ def count_bases(variants, pileup_batch):
341
+ if not variants:
342
+ return []
343
+
344
+ if not isinstance(pileup_batch, PileupBatch):
345
+ raise TypeError("pileup_batch must be a PileupBatch")
346
+
347
+ if not pileup_batch.data:
348
+ return []
349
+
350
+ variant_arr = (_VariantC * len(variants))()
351
+ counts_arr = (_BaseCountsC * len(variants))()
352
+ for idx, (pos, ref, alt) in enumerate(variants):
353
+ variant_arr[idx].pos = pos
354
+ variant_arr[idx].ref = ref.encode("ascii")
355
+ variant_arr[idx].alt = alt.encode("ascii")
356
+
357
+ lib.shadowfax_count_bases(
358
+ get_context(),
359
+ variant_arr,
360
+ len(variants),
361
+ pileup_batch.data,
362
+ pileup_batch.n_windows,
363
+ counts_arr,
364
+ )
365
+
366
+ return [
367
+ BaseCounts(
368
+ counts_arr[idx].a_count,
369
+ counts_arr[idx].c_count,
370
+ counts_arr[idx].g_count,
371
+ counts_arr[idx].t_count,
372
+ )
373
+ for idx in range(len(variants))
374
+ ]
@@ -0,0 +1,5 @@
1
+ Metadata-Version: 2.1
2
+ Name: shadowfax
3
+ Version: 0.1.0
4
+ Requires-Python: >=3.9
5
+
@@ -0,0 +1,13 @@
1
+ shadowfax/shadowfax.py,sha256=MHEQ8nZMDJ4dDtFTWFuS_upv-Dvh2AqZTAkikaRGkus,9965
2
+ shadowfax/__init__.py,sha256=X5VrJjRIvbbinoC5mgzw91iA5iKmeojE5n39QPLSdGQ,49
3
+ shadowfax/_native/linux_x86_64/libnvcomp_gdeflate.so,sha256=ts1NpPaBGQ75Z2l6p4e50-9P84Z0nfiPT9BWfGKYTgg,20066664
4
+ shadowfax/_native/linux_x86_64/libshadowfax.so,sha256=kV8Ps8VDBkF0XAxj1pTDEwSzfpeKqiYiSMsqOpTfcyM,354600
5
+ shadowfax/_native/linux_x86_64/libcudart.so,sha256=0NpBrhMjz07rYQEj1p13FBJM_l6_zE5F8CuRDlHFfuY,679264
6
+ shadowfax/_native/linux_x86_64/libnvcomp_bitcomp.so,sha256=Q9nX6cMeJMg4MJFN0Nz_70j3duoE920QvUUg4JN5ays,20588320
7
+ shadowfax/_native/linux_x86_64/libcudart.so.11.0,sha256=0NpBrhMjz07rYQEj1p13FBJM_l6_zE5F8CuRDlHFfuY,679264
8
+ shadowfax/_native/linux_x86_64/libnvcomp.so,sha256=GKortofAEEaUXZbE4jNl-cgpG8ewLnQBUgZr8ny_1Mw,18066760
9
+ shadowfax/_native/linux_x86_64/libcudart.so.11.8.89,sha256=0NpBrhMjz07rYQEj1p13FBJM_l6_zE5F8CuRDlHFfuY,679264
10
+ shadowfax-0.1.0.dist-info/WHEEL,sha256=m3DgTshqvfz_3GrnNNgvVzF0ic6QslWdsD5HlQXcacU,132
11
+ shadowfax-0.1.0.dist-info/METADATA,sha256=k1C98NbUp30NXH4xRFmjDo-pcsJz3BrfgsVXn075yYM,77
12
+ shadowfax-0.1.0.dist-info/RECORD,,
13
+ shadowfax-0.1.0.dist-info/top_level.txt,sha256=9mpv42kNxlTaBGh-MwqspTmLa7kwomz1EiuEk2eXwGA,10
@@ -0,0 +1,6 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (75.3.3)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-manylinux_2_24_x86_64
5
+ Root-Is-Purelib: False
6
+
@@ -0,0 +1 @@
1
+ shadowfax