shadowfax 0.1.0__py3-none-manylinux_2_24_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- shadowfax/__init__.py +1 -0
- shadowfax/_native/linux_x86_64/libcudart.so +0 -0
- shadowfax/_native/linux_x86_64/libcudart.so.11.0 +0 -0
- shadowfax/_native/linux_x86_64/libcudart.so.11.8.89 +0 -0
- shadowfax/_native/linux_x86_64/libnvcomp.so +0 -0
- shadowfax/_native/linux_x86_64/libnvcomp_bitcomp.so +0 -0
- shadowfax/_native/linux_x86_64/libnvcomp_gdeflate.so +0 -0
- shadowfax/_native/linux_x86_64/libshadowfax.so +0 -0
- shadowfax/shadowfax.py +374 -0
- shadowfax-0.1.0.dist-info/METADATA +5 -0
- shadowfax-0.1.0.dist-info/RECORD +13 -0
- shadowfax-0.1.0.dist-info/WHEEL +6 -0
- shadowfax-0.1.0.dist-info/top_level.txt +1 -0
shadowfax/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from .shadowfax import PileupStream, count_bases
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
shadowfax/shadowfax.py
ADDED
|
@@ -0,0 +1,374 @@
|
|
|
1
|
+
import ctypes
|
|
2
|
+
import queue
|
|
3
|
+
import threading
|
|
4
|
+
from concurrent.futures import ThreadPoolExecutor
|
|
5
|
+
from abc import ABC, abstractmethod
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
WINDOW_SIZE = 16384
|
|
9
|
+
|
|
10
|
+
class Context(ctypes.Structure):
|
|
11
|
+
pass
|
|
12
|
+
|
|
13
|
+
class Flagstat(ctypes.Structure):
|
|
14
|
+
_fields_ = [
|
|
15
|
+
('n_reads', ctypes.c_uint64),
|
|
16
|
+
('n_mapped', ctypes.c_uint64),
|
|
17
|
+
('n_pair_all', ctypes.c_uint64),
|
|
18
|
+
('n_pair_map', ctypes.c_uint64),
|
|
19
|
+
('n_pair_good', ctypes.c_uint64),
|
|
20
|
+
('n_sgltn', ctypes.c_uint64),
|
|
21
|
+
('n_read1', ctypes.c_uint64),
|
|
22
|
+
('n_read2', ctypes.c_uint64),
|
|
23
|
+
('n_dup', ctypes.c_uint64),
|
|
24
|
+
('n_diffchr', ctypes.c_uint64),
|
|
25
|
+
('n_diffhigh', ctypes.c_uint64),
|
|
26
|
+
('n_secondary', ctypes.c_uint64),
|
|
27
|
+
('n_supp', ctypes.c_uint64),
|
|
28
|
+
('n_primary', ctypes.c_uint64),
|
|
29
|
+
('n_pmapped', ctypes.c_uint64),
|
|
30
|
+
('n_pdup', ctypes.c_uint64),
|
|
31
|
+
]
|
|
32
|
+
|
|
33
|
+
class FlagstatStream(ctypes.Structure):
|
|
34
|
+
pass
|
|
35
|
+
|
|
36
|
+
class PileupStreamC(ctypes.Structure):
|
|
37
|
+
pass
|
|
38
|
+
|
|
39
|
+
class DepthStream(ctypes.Structure):
|
|
40
|
+
pass
|
|
41
|
+
|
|
42
|
+
class _PileupBatchC(ctypes.Structure):
|
|
43
|
+
_fields_ = [
|
|
44
|
+
('n_windows', ctypes.c_uint64),
|
|
45
|
+
('start_pos', ctypes.c_uint64),
|
|
46
|
+
('end_pos', ctypes.c_uint64),
|
|
47
|
+
('ref_id', ctypes.c_uint32),
|
|
48
|
+
('data', ctypes.c_void_p),
|
|
49
|
+
]
|
|
50
|
+
|
|
51
|
+
def to_dict(self):
|
|
52
|
+
return ctypes_to_dict(self)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class PileupBatch:
|
|
56
|
+
__slots__ = ("_stream", "_ptr")
|
|
57
|
+
|
|
58
|
+
def __init__(self, stream, ptr):
|
|
59
|
+
self._stream = stream
|
|
60
|
+
self._ptr = ptr
|
|
61
|
+
|
|
62
|
+
def __del__(self):
|
|
63
|
+
stream = getattr(self, "_stream", None)
|
|
64
|
+
ptr = getattr(self, "_ptr", None)
|
|
65
|
+
if stream is None or ptr is None:
|
|
66
|
+
return
|
|
67
|
+
lib.shadowfax_pileup_stream_batch_destroy(stream, ptr)
|
|
68
|
+
|
|
69
|
+
def __getattr__(self, name):
|
|
70
|
+
return getattr(self._ptr.contents, name)
|
|
71
|
+
|
|
72
|
+
@property
|
|
73
|
+
def has_data(self):
|
|
74
|
+
return bool(self._ptr.contents.data)
|
|
75
|
+
|
|
76
|
+
class DepthBatch(ctypes.Structure):
|
|
77
|
+
pass
|
|
78
|
+
|
|
79
|
+
class DepthBatchData(ctypes.Structure):
|
|
80
|
+
_fields_ = [
|
|
81
|
+
('buffer', ctypes.POINTER(ctypes.c_uint32)),
|
|
82
|
+
('n_regions', ctypes.c_uint64),
|
|
83
|
+
]
|
|
84
|
+
|
|
85
|
+
def to_buffer(self):
|
|
86
|
+
array_type = ctypes.c_uint32 * self.n_regions * WINDOW_SIZE
|
|
87
|
+
c_array = ctypes.cast(self.buffer, ctypes.POINTER(array_type)).contents
|
|
88
|
+
return c_array
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
class _VariantC(ctypes.Structure):
|
|
92
|
+
_fields_ = [
|
|
93
|
+
("pos", ctypes.c_uint32),
|
|
94
|
+
("ref", ctypes.c_char),
|
|
95
|
+
("alt", ctypes.c_char),
|
|
96
|
+
("_alignment_padding", ctypes.c_char * 2),
|
|
97
|
+
]
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
class _BaseCountsC(ctypes.Structure):
|
|
101
|
+
_fields_ = [
|
|
102
|
+
("a_count", ctypes.c_uint32),
|
|
103
|
+
("c_count", ctypes.c_uint32),
|
|
104
|
+
("g_count", ctypes.c_uint32),
|
|
105
|
+
("t_count", ctypes.c_uint32),
|
|
106
|
+
]
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
class BaseCounts:
|
|
110
|
+
__slots__ = ("a_count", "c_count", "g_count", "t_count")
|
|
111
|
+
|
|
112
|
+
def __init__(self, a_count=0, c_count=0, g_count=0, t_count=0):
|
|
113
|
+
self.a_count = a_count
|
|
114
|
+
self.c_count = c_count
|
|
115
|
+
self.g_count = g_count
|
|
116
|
+
self.t_count = t_count
|
|
117
|
+
|
|
118
|
+
def to_dict(self):
|
|
119
|
+
return {
|
|
120
|
+
"a_count": self.a_count,
|
|
121
|
+
"c_count": self.c_count,
|
|
122
|
+
"g_count": self.g_count,
|
|
123
|
+
"t_count": self.t_count,
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
class BamFlagstat(ctypes.Structure):
|
|
128
|
+
_fields_ = [
|
|
129
|
+
('passed', Flagstat),
|
|
130
|
+
('failed', Flagstat),
|
|
131
|
+
]
|
|
132
|
+
|
|
133
|
+
def to_dict(self):
|
|
134
|
+
return ctypes_to_dict(self)
|
|
135
|
+
|
|
136
|
+
class FlagstatBatch(ctypes.Structure):
|
|
137
|
+
_fields_ = [
|
|
138
|
+
('data', BamFlagstat),
|
|
139
|
+
('bytes_processed', ctypes.c_uint64),
|
|
140
|
+
('total_bytes', ctypes.c_uint64),
|
|
141
|
+
]
|
|
142
|
+
|
|
143
|
+
def to_dict(self):
|
|
144
|
+
return ctypes_to_dict(self)
|
|
145
|
+
|
|
146
|
+
def ctypes_to_dict(obj):
|
|
147
|
+
result = {}
|
|
148
|
+
for field, _ in obj._fields_:
|
|
149
|
+
value = getattr(obj, field)
|
|
150
|
+
if isinstance(value, ctypes.Structure):
|
|
151
|
+
result[field] = ctypes_to_dict(value)
|
|
152
|
+
else:
|
|
153
|
+
result[field] = value
|
|
154
|
+
return result
|
|
155
|
+
|
|
156
|
+
lib_path = Path(__file__).resolve().parent / '_native' / 'linux_x86_64' / 'libshadowfax.so'
|
|
157
|
+
|
|
158
|
+
#lib = ctypes.cdll.LoadLibrary('./shadowfax/lib/libshadowfax.so')
|
|
159
|
+
lib = ctypes.cdll.LoadLibrary(str(lib_path))
|
|
160
|
+
|
|
161
|
+
lib.shadowfax_context_create.restype = ctypes.POINTER(Context)
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
lib.shadowfax_flagstat_stream_create.argtypes = [
|
|
165
|
+
ctypes.c_char_p, # bam_path
|
|
166
|
+
ctypes.c_uint64, # input_batch_size
|
|
167
|
+
ctypes.c_uint64, # data_limit
|
|
168
|
+
]
|
|
169
|
+
lib.shadowfax_flagstat_stream_create.restype = ctypes.POINTER(FlagstatStream)
|
|
170
|
+
|
|
171
|
+
lib.shadowfax_flagstat_stream_next.argtypes = [
|
|
172
|
+
ctypes.POINTER(FlagstatStream),
|
|
173
|
+
ctypes.POINTER(FlagstatBatch),
|
|
174
|
+
]
|
|
175
|
+
|
|
176
|
+
lib.shadowfax_flagstat_stream_done.argtypes = [
|
|
177
|
+
ctypes.POINTER(FlagstatStream),
|
|
178
|
+
]
|
|
179
|
+
lib.shadowfax_flagstat_stream_done.restype = ctypes.c_bool
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
lib.shadowfax_pileup_stream_create.argtypes = [
|
|
183
|
+
ctypes.POINTER(Context),
|
|
184
|
+
ctypes.c_char_p, # bam_path
|
|
185
|
+
ctypes.c_uint64, # input_batch_size
|
|
186
|
+
ctypes.c_uint64, # data_limit
|
|
187
|
+
]
|
|
188
|
+
lib.shadowfax_pileup_stream_create.restype = ctypes.POINTER(PileupStreamC)
|
|
189
|
+
|
|
190
|
+
lib.shadowfax_pileup_stream_done.argtypes = [
|
|
191
|
+
ctypes.POINTER(PileupStreamC),
|
|
192
|
+
]
|
|
193
|
+
lib.shadowfax_pileup_stream_done.restype = ctypes.c_bool
|
|
194
|
+
|
|
195
|
+
lib.shadowfax_pileup_stream_next.argtypes = [
|
|
196
|
+
ctypes.POINTER(PileupStreamC),
|
|
197
|
+
]
|
|
198
|
+
lib.shadowfax_pileup_stream_next.restype = ctypes.POINTER(_PileupBatchC)
|
|
199
|
+
|
|
200
|
+
lib.shadowfax_pileup_stream_batch_destroy.argtypes = [
|
|
201
|
+
ctypes.POINTER(PileupStreamC),
|
|
202
|
+
ctypes.POINTER(_PileupBatchC),
|
|
203
|
+
]
|
|
204
|
+
|
|
205
|
+
lib.shadowfax_depth_stream_create.argtypes = [
|
|
206
|
+
ctypes.POINTER(Context),
|
|
207
|
+
]
|
|
208
|
+
lib.shadowfax_depth_stream_create.restype = ctypes.POINTER(DepthStream)
|
|
209
|
+
|
|
210
|
+
lib.shadowfax_depth_batch_create.argtypes = [
|
|
211
|
+
]
|
|
212
|
+
lib.shadowfax_depth_batch_create.restype = ctypes.POINTER(DepthBatch)
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
lib.shadowfax_depth_stream_done.argtypes = [
|
|
217
|
+
ctypes.POINTER(DepthStream),
|
|
218
|
+
]
|
|
219
|
+
lib.shadowfax_depth_stream_done.restype = ctypes.c_bool
|
|
220
|
+
|
|
221
|
+
lib.shadowfax_depth_stream_next.argtypes = [
|
|
222
|
+
ctypes.POINTER(DepthStream),
|
|
223
|
+
ctypes.c_void_p,
|
|
224
|
+
ctypes.c_uint64,
|
|
225
|
+
ctypes.POINTER(DepthBatch),
|
|
226
|
+
]
|
|
227
|
+
|
|
228
|
+
lib.shadowfax_depth_batch_get_data.argtypes = [
|
|
229
|
+
ctypes.POINTER(DepthBatch),
|
|
230
|
+
ctypes.POINTER(DepthBatchData),
|
|
231
|
+
]
|
|
232
|
+
|
|
233
|
+
lib.shadowfax_count_bases.argtypes = [
|
|
234
|
+
ctypes.POINTER(Context),
|
|
235
|
+
ctypes.POINTER(_VariantC),
|
|
236
|
+
ctypes.c_uint64,
|
|
237
|
+
ctypes.c_void_p,
|
|
238
|
+
ctypes.c_uint64,
|
|
239
|
+
ctypes.POINTER(_BaseCountsC),
|
|
240
|
+
]
|
|
241
|
+
lib.shadowfax_count_bases.restype = None
|
|
242
|
+
|
|
243
|
+
|
|
244
|
+
executor = ThreadPoolExecutor()
|
|
245
|
+
|
|
246
|
+
ctx = None
|
|
247
|
+
def get_context():
|
|
248
|
+
global ctx
|
|
249
|
+
if ctx is None:
|
|
250
|
+
ctx = lib.shadowfax_context_create()
|
|
251
|
+
return ctx
|
|
252
|
+
|
|
253
|
+
|
|
254
|
+
def _flagstat_thread(q, bam_path):
|
|
255
|
+
stats = lib.shadowfax_flagstat_stream_create(bam_path.encode(), ctypes.c_uint64(128), ctypes.c_uint64(1))
|
|
256
|
+
#print(stats.n_reads[0])
|
|
257
|
+
return stats
|
|
258
|
+
|
|
259
|
+
|
|
260
|
+
def flagstat(bam_path):
|
|
261
|
+
q = queue.Queue()
|
|
262
|
+
|
|
263
|
+
future = executor.submit(_flagstat_thread, q, bam_path)
|
|
264
|
+
|
|
265
|
+
return future
|
|
266
|
+
|
|
267
|
+
def flagstat_stream(bam_path='', batch_size=128*1024*1024):
|
|
268
|
+
|
|
269
|
+
stream = lib.shadowfax_flagstat_stream_create(bam_path.encode(), batch_size, 1*1024*1024*1024)
|
|
270
|
+
|
|
271
|
+
def gen():
|
|
272
|
+
batch = FlagstatBatch()
|
|
273
|
+
|
|
274
|
+
while not lib.shadowfax_flagstat_stream_done(stream):
|
|
275
|
+
lib.shadowfax_flagstat_stream_next(stream, ctypes.byref(batch))
|
|
276
|
+
yield batch
|
|
277
|
+
|
|
278
|
+
return gen()
|
|
279
|
+
|
|
280
|
+
class Stream(ABC):
|
|
281
|
+
def __init__(self):
|
|
282
|
+
self.q = queue.Queue(maxsize=1)
|
|
283
|
+
self.started = False
|
|
284
|
+
|
|
285
|
+
def __iter__(self):
|
|
286
|
+
return self
|
|
287
|
+
|
|
288
|
+
def __next__(self):
|
|
289
|
+
if not self.started:
|
|
290
|
+
t = threading.Thread(target=self.thread, daemon=True)
|
|
291
|
+
t.start()
|
|
292
|
+
self.started = True
|
|
293
|
+
item = self.q.get()
|
|
294
|
+
if item is None:
|
|
295
|
+
raise StopIteration
|
|
296
|
+
return item
|
|
297
|
+
|
|
298
|
+
def __ror__(self, input_stream):
|
|
299
|
+
self.input_stream = input_stream
|
|
300
|
+
return self
|
|
301
|
+
|
|
302
|
+
@abstractmethod
|
|
303
|
+
def thread(self):
|
|
304
|
+
pass
|
|
305
|
+
|
|
306
|
+
|
|
307
|
+
class PileupStream(Stream):
|
|
308
|
+
def __init__(self, bam_path='', input_batch_size=1*1024*1024*1024, output_batch_size=1*1024*1024*1024):
|
|
309
|
+
super().__init__()
|
|
310
|
+
|
|
311
|
+
ctx = get_context()
|
|
312
|
+
self.stream = lib.shadowfax_pileup_stream_create(ctx, bam_path.encode(), input_batch_size, output_batch_size)
|
|
313
|
+
def thread(self):
|
|
314
|
+
while not lib.shadowfax_pileup_stream_done(self.stream):
|
|
315
|
+
batch_ptr = lib.shadowfax_pileup_stream_next(self.stream)
|
|
316
|
+
if not batch_ptr:
|
|
317
|
+
continue
|
|
318
|
+
batch = PileupBatch(self.stream, batch_ptr)
|
|
319
|
+
self.q.put(batch)
|
|
320
|
+
self.q.put(None)
|
|
321
|
+
|
|
322
|
+
|
|
323
|
+
class ReadDepthStream(Stream):
|
|
324
|
+
def __init__(self):
|
|
325
|
+
super().__init__()
|
|
326
|
+
|
|
327
|
+
ctx = get_context()
|
|
328
|
+
self.stream = lib.shadowfax_depth_stream_create(ctx)
|
|
329
|
+
|
|
330
|
+
def thread(self):
|
|
331
|
+
batch = lib.shadowfax_depth_batch_create()
|
|
332
|
+
batch_data = DepthBatchData()
|
|
333
|
+
|
|
334
|
+
for pileup_batch in self.input_stream:
|
|
335
|
+
lib.shadowfax_depth_stream_next(self.stream, pileup_batch.data, pileup_batch.n_windows, batch)
|
|
336
|
+
lib.shadowfax_depth_batch_get_data(batch, ctypes.byref(batch_data))
|
|
337
|
+
self.q.put(batch_data)
|
|
338
|
+
|
|
339
|
+
|
|
340
|
+
def count_bases(variants, pileup_batch):
|
|
341
|
+
if not variants:
|
|
342
|
+
return []
|
|
343
|
+
|
|
344
|
+
if not isinstance(pileup_batch, PileupBatch):
|
|
345
|
+
raise TypeError("pileup_batch must be a PileupBatch")
|
|
346
|
+
|
|
347
|
+
if not pileup_batch.data:
|
|
348
|
+
return []
|
|
349
|
+
|
|
350
|
+
variant_arr = (_VariantC * len(variants))()
|
|
351
|
+
counts_arr = (_BaseCountsC * len(variants))()
|
|
352
|
+
for idx, (pos, ref, alt) in enumerate(variants):
|
|
353
|
+
variant_arr[idx].pos = pos
|
|
354
|
+
variant_arr[idx].ref = ref.encode("ascii")
|
|
355
|
+
variant_arr[idx].alt = alt.encode("ascii")
|
|
356
|
+
|
|
357
|
+
lib.shadowfax_count_bases(
|
|
358
|
+
get_context(),
|
|
359
|
+
variant_arr,
|
|
360
|
+
len(variants),
|
|
361
|
+
pileup_batch.data,
|
|
362
|
+
pileup_batch.n_windows,
|
|
363
|
+
counts_arr,
|
|
364
|
+
)
|
|
365
|
+
|
|
366
|
+
return [
|
|
367
|
+
BaseCounts(
|
|
368
|
+
counts_arr[idx].a_count,
|
|
369
|
+
counts_arr[idx].c_count,
|
|
370
|
+
counts_arr[idx].g_count,
|
|
371
|
+
counts_arr[idx].t_count,
|
|
372
|
+
)
|
|
373
|
+
for idx in range(len(variants))
|
|
374
|
+
]
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
shadowfax/shadowfax.py,sha256=MHEQ8nZMDJ4dDtFTWFuS_upv-Dvh2AqZTAkikaRGkus,9965
|
|
2
|
+
shadowfax/__init__.py,sha256=X5VrJjRIvbbinoC5mgzw91iA5iKmeojE5n39QPLSdGQ,49
|
|
3
|
+
shadowfax/_native/linux_x86_64/libnvcomp_gdeflate.so,sha256=ts1NpPaBGQ75Z2l6p4e50-9P84Z0nfiPT9BWfGKYTgg,20066664
|
|
4
|
+
shadowfax/_native/linux_x86_64/libshadowfax.so,sha256=kV8Ps8VDBkF0XAxj1pTDEwSzfpeKqiYiSMsqOpTfcyM,354600
|
|
5
|
+
shadowfax/_native/linux_x86_64/libcudart.so,sha256=0NpBrhMjz07rYQEj1p13FBJM_l6_zE5F8CuRDlHFfuY,679264
|
|
6
|
+
shadowfax/_native/linux_x86_64/libnvcomp_bitcomp.so,sha256=Q9nX6cMeJMg4MJFN0Nz_70j3duoE920QvUUg4JN5ays,20588320
|
|
7
|
+
shadowfax/_native/linux_x86_64/libcudart.so.11.0,sha256=0NpBrhMjz07rYQEj1p13FBJM_l6_zE5F8CuRDlHFfuY,679264
|
|
8
|
+
shadowfax/_native/linux_x86_64/libnvcomp.so,sha256=GKortofAEEaUXZbE4jNl-cgpG8ewLnQBUgZr8ny_1Mw,18066760
|
|
9
|
+
shadowfax/_native/linux_x86_64/libcudart.so.11.8.89,sha256=0NpBrhMjz07rYQEj1p13FBJM_l6_zE5F8CuRDlHFfuY,679264
|
|
10
|
+
shadowfax-0.1.0.dist-info/WHEEL,sha256=m3DgTshqvfz_3GrnNNgvVzF0ic6QslWdsD5HlQXcacU,132
|
|
11
|
+
shadowfax-0.1.0.dist-info/METADATA,sha256=k1C98NbUp30NXH4xRFmjDo-pcsJz3BrfgsVXn075yYM,77
|
|
12
|
+
shadowfax-0.1.0.dist-info/RECORD,,
|
|
13
|
+
shadowfax-0.1.0.dist-info/top_level.txt,sha256=9mpv42kNxlTaBGh-MwqspTmLa7kwomz1EiuEk2eXwGA,10
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
shadowfax
|