bio-twobit 0.2.2 → 0.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +2 -1
- data/ext/bio/twobit/2bit.c +23 -17
- data/lib/bio/twobit/version.rb +1 -1
- metadata +3 -6
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 4578e19f1827f09cf1ca7398ddd0576296044a5e789081d3d4950137ee75d01b
|
|
4
|
+
data.tar.gz: c33949f1f4eeea07f383f2ba0f3b5f9a0cfb37b4d82d2a9c399ac5143ee84c11
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: a137044cd596bcee5b56bf5d164fb281937e8264f708c61176b443d5a9ef9a3eff8591d0fcd2377bea5fc3accefc0c3ab22e6f80b9658744baa2d1d3731ddeb2
|
|
7
|
+
data.tar.gz: 6200a2c8733b961f3a937cf4bb32271b53a1dc9186d48fed764a51fb31b75e4e6c16481f5547bfc765916a5b5bb57aaed9bd012948bd096689b7b632d669eaee
|
data/README.md
CHANGED
|
@@ -2,7 +2,8 @@
|
|
|
2
2
|
|
|
3
3
|
[](https://badge.fury.io/rb/bio-twobit)
|
|
4
4
|
[](https://github.com/kojix2/bio-twobit/actions/workflows/ci.yml)
|
|
5
|
-
[](https://rubydoc.info/gems/bio-twobit)
|
|
6
|
+
[](https://tokei.kojix2.net/github/kojix2/bio-twobit)
|
|
6
7
|
[](https://zenodo.org/badge/latestdoi/436454379)
|
|
7
8
|
|
|
8
9
|
Bio::TwoBit is a Ruby interface to 2bit files.
|
data/ext/bio/twobit/2bit.c
CHANGED
|
@@ -18,7 +18,8 @@ uint64_t twobitTell(TwoBit *tb);
|
|
|
18
18
|
*/
|
|
19
19
|
size_t twobitRead(void *data, size_t sz, size_t nmemb, TwoBit *tb) {
|
|
20
20
|
if(tb->data) {
|
|
21
|
-
|
|
21
|
+
// tb->data is a void*, so perform byte-wise pointer arithmetic via a char* cast
|
|
22
|
+
if(memcpy(data, (const char*)tb->data + tb->offset, nmemb * sz) == NULL) return 0;
|
|
22
23
|
tb->offset += nmemb * sz;
|
|
23
24
|
return nmemb;
|
|
24
25
|
} else {
|
|
@@ -69,7 +70,7 @@ void bytes2bases(char *seq, uint8_t *byte, uint32_t sz, int offset) {
|
|
|
69
70
|
|
|
70
71
|
// Deal with the first partial byte
|
|
71
72
|
if(offset != 0) {
|
|
72
|
-
while(offset < 4) {
|
|
73
|
+
while(offset < 4 && pos < sz) {
|
|
73
74
|
seq[pos++] = byte2base(foo, offset++);
|
|
74
75
|
}
|
|
75
76
|
if(pos >= sz) return;
|
|
@@ -204,15 +205,17 @@ error:
|
|
|
204
205
|
*/
|
|
205
206
|
char *twobitSequence(TwoBit *tb, char *chrom, uint32_t start, uint32_t end) {
|
|
206
207
|
uint32_t i, tid=0;
|
|
208
|
+
int found = 0;
|
|
207
209
|
|
|
208
210
|
//Get the chromosome ID
|
|
209
211
|
for(i=0; i<tb->hdr->nChroms; i++) {
|
|
210
212
|
if(strcmp(tb->cl->chrom[i], chrom) == 0) {
|
|
211
213
|
tid = i;
|
|
214
|
+
found = 1;
|
|
212
215
|
break;
|
|
213
216
|
}
|
|
214
217
|
}
|
|
215
|
-
if(
|
|
218
|
+
if(!found) return NULL;
|
|
216
219
|
|
|
217
220
|
//Get the start/end if not specified
|
|
218
221
|
if(start == end && end == 0) {
|
|
@@ -309,13 +312,13 @@ void *twobitBasesWorker(TwoBit *tb, uint32_t tid, uint32_t start, uint32_t end,
|
|
|
309
312
|
|
|
310
313
|
while(i < len) {
|
|
311
314
|
// Check if we need to jump
|
|
312
|
-
if(maskIdx != -1 && start + i + 4 >= maskStart) {
|
|
315
|
+
if(maskIdx != (uint32_t)-1 && start + i + 4 >= maskStart) {
|
|
313
316
|
if(start + i >= maskStart || start + i + 4 - offset > maskStart) {
|
|
314
317
|
//Jump iff the whole byte is inside an N block
|
|
315
318
|
if(start + i >= maskStart && start + i + 4 - offset < maskEnd) {
|
|
316
319
|
//iff we're fully in an N block then jump
|
|
317
320
|
i = maskEnd - start;
|
|
318
|
-
getMask(tb, tid, i, end, &maskIdx, &maskStart, &maskEnd);
|
|
321
|
+
getMask(tb, tid, start + i, end, &maskIdx, &maskStart, &maskEnd);
|
|
319
322
|
offset = (start + i) % 4;
|
|
320
323
|
j = i / 4;
|
|
321
324
|
mask = getByteMaskFromOffset(offset);
|
|
@@ -331,7 +334,7 @@ void *twobitBasesWorker(TwoBit *tb, uint32_t tid, uint32_t start, uint32_t end,
|
|
|
331
334
|
if(mask & 4 && (foo + 1 >= maskStart && foo + 1 < maskEnd)) mask -= 4;
|
|
332
335
|
if(mask & 8 && (foo >= maskStart && foo < maskEnd)) mask -= 8;
|
|
333
336
|
if(foo + 4 > maskEnd) {
|
|
334
|
-
getMask(tb, tid, i, end, &maskIdx, &maskStart, &maskEnd);
|
|
337
|
+
getMask(tb, tid, start + i, end, &maskIdx, &maskStart, &maskEnd);
|
|
335
338
|
continue;
|
|
336
339
|
}
|
|
337
340
|
}
|
|
@@ -397,16 +400,18 @@ error:
|
|
|
397
400
|
|
|
398
401
|
void *twobitBases(TwoBit *tb, char *chrom, uint32_t start, uint32_t end, int fraction) {
|
|
399
402
|
uint32_t tid = 0, i;
|
|
403
|
+
int found = 0;
|
|
400
404
|
|
|
401
405
|
//Get the chromosome ID
|
|
402
406
|
for(i=0; i<tb->hdr->nChroms; i++) {
|
|
403
407
|
if(strcmp(tb->cl->chrom[i], chrom) == 0) {
|
|
404
408
|
tid = i;
|
|
409
|
+
found = 1;
|
|
405
410
|
break;
|
|
406
411
|
}
|
|
407
412
|
}
|
|
408
413
|
|
|
409
|
-
if(
|
|
414
|
+
if(!found) return NULL;
|
|
410
415
|
|
|
411
416
|
//Get the start/end if not specified
|
|
412
417
|
if(start == end && end == 0) {
|
|
@@ -510,13 +515,13 @@ error:
|
|
|
510
515
|
for(i=0; i<tb->hdr->nChroms; i++) {
|
|
511
516
|
if(idx->nBlockStart[i]) free(idx->nBlockStart[i]);
|
|
512
517
|
}
|
|
513
|
-
free(idx->nBlockStart
|
|
518
|
+
free(idx->nBlockStart);
|
|
514
519
|
}
|
|
515
520
|
if(idx->nBlockSizes) {
|
|
516
521
|
for(i=0; i<tb->hdr->nChroms; i++) {
|
|
517
522
|
if(idx->nBlockSizes[i]) free(idx->nBlockSizes[i]);
|
|
518
523
|
}
|
|
519
|
-
free(idx->nBlockSizes
|
|
524
|
+
free(idx->nBlockSizes);
|
|
520
525
|
}
|
|
521
526
|
|
|
522
527
|
if(idx->maskBlockCount) free(idx->maskBlockCount);
|
|
@@ -524,13 +529,13 @@ error:
|
|
|
524
529
|
for(i=0; i<tb->hdr->nChroms; i++) {
|
|
525
530
|
if(idx->maskBlockStart[i]) free(idx->maskBlockStart[i]);
|
|
526
531
|
}
|
|
527
|
-
free(idx->maskBlockStart
|
|
532
|
+
free(idx->maskBlockStart);
|
|
528
533
|
}
|
|
529
534
|
if(idx->maskBlockSizes) {
|
|
530
535
|
for(i=0; i<tb->hdr->nChroms; i++) {
|
|
531
536
|
if(idx->maskBlockSizes[i]) free(idx->maskBlockSizes[i]);
|
|
532
537
|
}
|
|
533
|
-
free(idx->maskBlockSizes
|
|
538
|
+
free(idx->maskBlockSizes);
|
|
534
539
|
}
|
|
535
540
|
|
|
536
541
|
if(idx->offset) free(idx->offset);
|
|
@@ -706,12 +711,13 @@ TwoBit* twobitOpen(char *fname, int storeMasked) {
|
|
|
706
711
|
fd = fileno(tb->fp);
|
|
707
712
|
if(fstat(fd, &fs) == 0) {
|
|
708
713
|
tb->sz = (uint64_t) fs.st_size;
|
|
709
|
-
|
|
710
|
-
if(
|
|
711
|
-
|
|
712
|
-
|
|
713
|
-
|
|
714
|
-
|
|
714
|
+
void *map = mmap(NULL, fs.st_size, PROT_READ, MAP_SHARED, fd, 0);
|
|
715
|
+
if(map != MAP_FAILED) {
|
|
716
|
+
tb->data = map;
|
|
717
|
+
/* madvise is an optional optimization; ignore failures */
|
|
718
|
+
madvise(tb->data, fs.st_size, MADV_RANDOM);
|
|
719
|
+
} else {
|
|
720
|
+
tb->data = NULL; /* mmap failed */
|
|
715
721
|
}
|
|
716
722
|
}
|
|
717
723
|
|
data/lib/bio/twobit/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,14 +1,13 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: bio-twobit
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.2.
|
|
4
|
+
version: 0.2.4
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- kojix2
|
|
8
|
-
autorequire:
|
|
9
8
|
bindir: bin
|
|
10
9
|
cert_chain: []
|
|
11
|
-
date:
|
|
10
|
+
date: 1980-01-02 00:00:00.000000000 Z
|
|
12
11
|
dependencies: []
|
|
13
12
|
description: This is a Ruby binding for lib2bit(https://github.com/dpryan79/lib2bit),
|
|
14
13
|
which provides high-speed access to genomic data in 2bit file format.
|
|
@@ -146,7 +145,6 @@ homepage: https://github.com/kojix2/bio-twobit
|
|
|
146
145
|
licenses:
|
|
147
146
|
- MIT
|
|
148
147
|
metadata: {}
|
|
149
|
-
post_install_message:
|
|
150
148
|
rdoc_options: []
|
|
151
149
|
require_paths:
|
|
152
150
|
- lib
|
|
@@ -161,8 +159,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
161
159
|
- !ruby/object:Gem::Version
|
|
162
160
|
version: '0'
|
|
163
161
|
requirements: []
|
|
164
|
-
rubygems_version: 3.
|
|
165
|
-
signing_key:
|
|
162
|
+
rubygems_version: 3.6.9
|
|
166
163
|
specification_version: 4
|
|
167
164
|
summary: A ruby library for accessing 2bit files
|
|
168
165
|
test_files: []
|