bio-twobit 0.1.3 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +0 -3
- data/Rakefile +1 -1
- data/ext/bio/twobit/twobit.c +409 -412
- data/lib/bio/twobit/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1f0ce7759c6099bf6f2675eb467722473f06040a583cad13f2400c1d50f4f3b7
|
4
|
+
data.tar.gz: '09bdb93292cc70fa665a73890d8500b56084639cc3e18d6a1602bdf62d943620'
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ec2fb805c335562a64eca155c5806f3d004b8c4d3a2f6c2363eeb0c529749bb3f220bea4cf626d8d1618d6bdd061b247ae5a50a3f6d94314fbf6d0752bed281e
|
7
|
+
data.tar.gz: 96784953366c86e9e8e3d215a809faf0f23e6f012d3ff158ad47e80651fa2bea8cd0f87a3ca630579f7bb8e1aa41c2a30e449ba8af612ab8921d62e41521d293
|
data/Gemfile
CHANGED
data/Rakefile
CHANGED
data/ext/bio/twobit/twobit.c
CHANGED
@@ -32,7 +32,7 @@
|
|
32
32
|
#define NUM2INT64 NUM2LONG
|
33
33
|
#define UINT64_2NUM ULONG2NUM
|
34
34
|
#define INT64_2NUM LONG2NUM
|
35
|
-
#elif
|
35
|
+
#elif SIZEOF_LONG_LONG == SIZEOF_INT64
|
36
36
|
#define NUM2UINT64 NUM2ULL
|
37
37
|
#define NUM2INT64 NUM2LL
|
38
38
|
#define UINT64_2NUM ULL2NUM
|
@@ -48,494 +48,491 @@ static void TwoBit_free(void *ptr);
|
|
48
48
|
static size_t TwoBit_memsize(const void *ptr);
|
49
49
|
|
50
50
|
static const rb_data_type_t TwoBit_type = {
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
0,
|
58
|
-
0,
|
59
|
-
RUBY_TYPED_FREE_IMMEDIATELY,
|
51
|
+
.wrap_struct_name = "TwoBit",
|
52
|
+
.function = {
|
53
|
+
.dfree = TwoBit_free,
|
54
|
+
.dsize = TwoBit_memsize,
|
55
|
+
},
|
56
|
+
.flags = RUBY_TYPED_FREE_IMMEDIATELY,
|
60
57
|
};
|
61
58
|
|
62
59
|
static void
|
63
60
|
TwoBit_free(void *ptr)
|
64
61
|
{
|
65
|
-
|
66
|
-
|
62
|
+
// twobitClose checks for null
|
63
|
+
twobitClose(ptr);
|
67
64
|
}
|
68
65
|
|
69
66
|
static size_t
|
70
67
|
TwoBit_memsize(const void *ptr)
|
71
68
|
{
|
72
|
-
|
69
|
+
const TwoBit *data = ptr;
|
73
70
|
|
74
|
-
|
71
|
+
return data ? sizeof(*data) : 0;
|
75
72
|
}
|
76
73
|
|
77
74
|
static TwoBit *getTwoBit(VALUE self)
|
78
75
|
{
|
79
|
-
|
80
|
-
|
76
|
+
TwoBit *ptr = NULL;
|
77
|
+
TypedData_Get_Struct(self, TwoBit, &TwoBit_type, ptr);
|
81
78
|
|
82
|
-
|
79
|
+
return ptr;
|
83
80
|
}
|
84
81
|
|
85
82
|
static VALUE
|
86
83
|
twobit_allocate(VALUE klass)
|
87
84
|
{
|
88
|
-
|
85
|
+
TwoBit *tb = NULL;
|
89
86
|
|
90
|
-
|
87
|
+
return TypedData_Wrap_Struct(klass, &TwoBit_type, tb);
|
91
88
|
}
|
92
89
|
|
93
90
|
static VALUE
|
94
91
|
twobit_init(VALUE klass, VALUE fpath, VALUE storeMasked)
|
95
92
|
{
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
93
|
+
char *path = NULL;
|
94
|
+
int mask = 0;
|
95
|
+
TwoBit *tb = NULL;
|
96
|
+
|
97
|
+
path = StringValueCStr(fpath);
|
98
|
+
mask = NUM2INT(storeMasked);
|
99
|
+
|
100
|
+
tb = twobitOpen(path, mask);
|
101
|
+
if (!tb)
|
102
|
+
{
|
103
|
+
twobitClose(tb);
|
104
|
+
rb_raise(rb_eRuntimeError, "Could not open file %s", path);
|
105
|
+
return Qnil;
|
106
|
+
}
|
107
|
+
DATA_PTR(klass) = tb;
|
108
|
+
|
109
|
+
return klass;
|
113
110
|
}
|
114
111
|
|
115
112
|
static VALUE
|
116
113
|
twobit_close(VALUE self)
|
117
114
|
{
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
115
|
+
TwoBit *tb = getTwoBit(self);
|
116
|
+
if (tb)
|
117
|
+
{
|
118
|
+
twobitClose(tb);
|
119
|
+
DATA_PTR(self) = NULL;
|
120
|
+
}
|
121
|
+
|
122
|
+
return Qnil;
|
126
123
|
}
|
127
124
|
|
128
125
|
static VALUE
|
129
126
|
twobit_closed_question_mark(VALUE self)
|
130
127
|
{
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
128
|
+
TwoBit *tb = getTwoBit(self);
|
129
|
+
if (tb)
|
130
|
+
{
|
131
|
+
return Qfalse;
|
132
|
+
}
|
133
|
+
else
|
134
|
+
{
|
135
|
+
return Qtrue;
|
136
|
+
}
|
140
137
|
}
|
141
138
|
|
142
139
|
static VALUE
|
143
140
|
twobit_info(VALUE self)
|
144
141
|
{
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
142
|
+
TwoBit *tb = getTwoBit(self);
|
143
|
+
|
144
|
+
if (!tb)
|
145
|
+
{
|
146
|
+
rb_raise(rb_eRuntimeError, "The 2bit file handle is not open!");
|
147
|
+
return Qnil;
|
148
|
+
}
|
149
|
+
|
150
|
+
uint32_t i, j, foo;
|
151
|
+
VALUE val;
|
152
|
+
VALUE info = rb_hash_new();
|
153
|
+
|
154
|
+
// file size
|
155
|
+
val = UINT64_2NUM(tb->sz);
|
156
|
+
if (!val)
|
157
|
+
goto error;
|
158
|
+
rb_hash_aset(info, rb_str_new2("file_size"), val);
|
159
|
+
|
160
|
+
// nContigs
|
161
|
+
val = UINT32_2NUM(tb->hdr->nChroms);
|
162
|
+
if (!val)
|
163
|
+
goto error;
|
164
|
+
rb_hash_aset(info, rb_str_new2("nChroms"), val);
|
165
|
+
|
166
|
+
// sequence length
|
167
|
+
foo = 0;
|
168
|
+
for (i = 0; i < tb->hdr->nChroms; i++)
|
169
|
+
{
|
170
|
+
foo += tb->idx->size[i];
|
171
|
+
}
|
172
|
+
val = UINT32_2NUM(foo);
|
173
|
+
if (!val)
|
174
|
+
goto error;
|
175
|
+
rb_hash_aset(info, rb_str_new2("sequence_length"), val);
|
176
|
+
|
177
|
+
// hard-masked length
|
178
|
+
foo = 0;
|
179
|
+
for (i = 0; i < tb->hdr->nChroms; i++)
|
180
|
+
{
|
181
|
+
for (j = 0; j < tb->idx->nBlockCount[i]; j++)
|
182
|
+
{
|
183
|
+
foo += tb->idx->nBlockSizes[i][j];
|
184
|
+
}
|
185
|
+
}
|
186
|
+
val = UINT32_2NUM(foo);
|
187
|
+
if (!val)
|
188
|
+
goto error;
|
189
|
+
rb_hash_aset(info, rb_str_new2("hard_masked_length"), val);
|
190
|
+
|
191
|
+
// soft-masked length
|
192
|
+
if (tb->idx->maskBlockStart)
|
193
|
+
{
|
194
|
+
foo = 0;
|
195
|
+
for (i = 0; i < tb->hdr->nChroms; i++)
|
196
|
+
{
|
197
|
+
for (j = 0; j < tb->idx->maskBlockCount[i]; j++)
|
198
|
+
{
|
199
|
+
foo += tb->idx->maskBlockSizes[i][j];
|
200
|
+
}
|
201
|
+
}
|
202
|
+
val = UINT32_2NUM(foo);
|
203
|
+
if (!val)
|
204
|
+
goto error;
|
205
|
+
rb_hash_aset(info, rb_str_new2("soft_masked_length"), val);
|
206
|
+
}
|
207
|
+
|
208
|
+
return info;
|
212
209
|
|
213
210
|
error:
|
214
|
-
|
215
|
-
|
211
|
+
rb_raise(rb_eRuntimeError, "Received an error while gathering information on the 2bit file!");
|
212
|
+
return Qnil;
|
216
213
|
}
|
217
214
|
|
218
215
|
static VALUE
|
219
216
|
twobit_chroms(VALUE self)
|
220
217
|
{
|
221
|
-
|
218
|
+
TwoBit *tb = getTwoBit(self);
|
222
219
|
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
220
|
+
if (!tb)
|
221
|
+
{
|
222
|
+
rb_raise(rb_eRuntimeError, "The 2bit file handle is not open!");
|
223
|
+
return Qnil;
|
224
|
+
}
|
228
225
|
|
229
|
-
|
230
|
-
|
231
|
-
|
226
|
+
uint32_t i;
|
227
|
+
VALUE val;
|
228
|
+
VALUE chroms = rb_hash_new();
|
232
229
|
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
230
|
+
for (i = 0; i < tb->hdr->nChroms; i++)
|
231
|
+
{
|
232
|
+
val = UINT32_2NUM(tb->idx->size[i]);
|
233
|
+
if (!val)
|
234
|
+
goto error;
|
235
|
+
rb_hash_aset(chroms, rb_str_new2(tb->cl->chrom[i]), val);
|
236
|
+
}
|
240
237
|
|
241
|
-
|
238
|
+
return chroms;
|
242
239
|
|
243
240
|
error:
|
244
|
-
|
245
|
-
|
241
|
+
rb_raise(rb_eRuntimeError, "Received an error while adding an item to the output hash!");
|
242
|
+
return Qnil;
|
246
243
|
}
|
247
244
|
|
248
245
|
static VALUE
|
249
246
|
twobit_sequence(VALUE self, VALUE chrom, VALUE rbstart, VALUE rbend)
|
250
247
|
{
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
248
|
+
char *ch, *str;
|
249
|
+
unsigned long startl = 0, endl = 0;
|
250
|
+
uint32_t start, end, len;
|
251
|
+
TwoBit *tb;
|
252
|
+
|
253
|
+
ch = StringValueCStr(chrom);
|
254
|
+
startl = NUM2UINT32(rbstart);
|
255
|
+
endl = NUM2UINT32(rbend);
|
256
|
+
tb = getTwoBit(self);
|
257
|
+
|
258
|
+
if (!tb)
|
259
|
+
{
|
260
|
+
rb_raise(rb_eRuntimeError, "The 2bit file handle is not open!");
|
261
|
+
return Qnil;
|
262
|
+
}
|
263
|
+
|
264
|
+
len = twobitChromLen(tb, ch);
|
265
|
+
if (len == 0)
|
266
|
+
{
|
267
|
+
rb_raise(rb_eRuntimeError, "The chromosome %s does not exist in the 2bit file!", ch);
|
268
|
+
return Qnil;
|
269
|
+
}
|
270
|
+
if (endl > len)
|
271
|
+
endl = len;
|
272
|
+
end = (uint32_t)endl;
|
273
|
+
if (startl >= endl && startl > 0)
|
274
|
+
{
|
275
|
+
rb_raise(rb_eRuntimeError, "The start position %lu is greater than the end position %lu!", startl, endl);
|
276
|
+
return Qnil;
|
277
|
+
}
|
278
|
+
start = (uint32_t)startl;
|
279
|
+
|
280
|
+
str = twobitSequence(tb, ch, start, end);
|
281
|
+
|
282
|
+
return rb_str_new2(str);
|
286
283
|
}
|
287
284
|
|
288
285
|
static VALUE
|
289
286
|
twobit_bases(VALUE self, VALUE chrom, VALUE start, VALUE end, VALUE fraction)
|
290
287
|
{
|
291
|
-
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
|
342
|
-
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
|
352
|
-
|
353
|
-
|
354
|
-
|
355
|
-
|
356
|
-
|
357
|
-
|
358
|
-
|
359
|
-
|
360
|
-
|
288
|
+
char *ch;
|
289
|
+
uint32_t st, en, fr;
|
290
|
+
TwoBit *tb;
|
291
|
+
void *o = NULL;
|
292
|
+
VALUE val, hash;
|
293
|
+
|
294
|
+
tb = getTwoBit(self);
|
295
|
+
if (!tb)
|
296
|
+
{
|
297
|
+
rb_raise(rb_eRuntimeError, "The 2bit file handle is not open!");
|
298
|
+
return Qnil;
|
299
|
+
}
|
300
|
+
|
301
|
+
ch = StringValueCStr(chrom);
|
302
|
+
st = NUM2UINT32(start);
|
303
|
+
en = NUM2UINT32(end);
|
304
|
+
fr = NUM2INT(fraction);
|
305
|
+
|
306
|
+
o = twobitBases(tb, ch, st, en, fr);
|
307
|
+
if (!o)
|
308
|
+
{
|
309
|
+
rb_raise(rb_eRuntimeError, "Received an error while determining the per-base metrics.");
|
310
|
+
return Qnil;
|
311
|
+
}
|
312
|
+
|
313
|
+
hash = rb_hash_new();
|
314
|
+
|
315
|
+
if (fr)
|
316
|
+
{
|
317
|
+
val = DBL2NUM(((double *)o)[0]);
|
318
|
+
}
|
319
|
+
else
|
320
|
+
{
|
321
|
+
val = UINT32_2NUM(((uint32_t *)o)[0]);
|
322
|
+
}
|
323
|
+
rb_hash_aset(hash, rb_str_new2("A"), val);
|
324
|
+
|
325
|
+
if (fr)
|
326
|
+
{
|
327
|
+
val = DBL2NUM(((double *)o)[1]);
|
328
|
+
}
|
329
|
+
else
|
330
|
+
{
|
331
|
+
val = UINT32_2NUM(((uint32_t *)o)[1]);
|
332
|
+
}
|
333
|
+
rb_hash_aset(hash, rb_str_new2("C"), val);
|
334
|
+
|
335
|
+
if (fr)
|
336
|
+
{
|
337
|
+
val = DBL2NUM(((double *)o)[2]);
|
338
|
+
}
|
339
|
+
else
|
340
|
+
{
|
341
|
+
val = UINT32_2NUM(((uint32_t *)o)[2]);
|
342
|
+
}
|
343
|
+
rb_hash_aset(hash, rb_str_new2("T"), val);
|
344
|
+
|
345
|
+
if (fr)
|
346
|
+
{
|
347
|
+
val = DBL2NUM(((double *)o)[3]);
|
348
|
+
}
|
349
|
+
else
|
350
|
+
{
|
351
|
+
val = UINT32_2NUM(((uint32_t *)o)[3]);
|
352
|
+
}
|
353
|
+
rb_hash_aset(hash, rb_str_new2("G"), val);
|
354
|
+
|
355
|
+
free(o);
|
356
|
+
|
357
|
+
return hash;
|
361
358
|
}
|
362
359
|
|
363
360
|
static VALUE
|
364
361
|
twobit_hard_masked_blocks(VALUE self, VALUE chrom, VALUE rbstart, VALUE rbend)
|
365
362
|
{
|
366
|
-
|
367
|
-
|
368
|
-
|
369
|
-
|
370
|
-
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
|
375
|
-
|
376
|
-
|
377
|
-
|
378
|
-
|
379
|
-
|
380
|
-
|
381
|
-
|
382
|
-
|
383
|
-
|
384
|
-
|
385
|
-
|
386
|
-
|
387
|
-
|
388
|
-
|
389
|
-
|
390
|
-
|
391
|
-
|
392
|
-
|
393
|
-
|
394
|
-
|
395
|
-
|
396
|
-
|
397
|
-
|
398
|
-
|
399
|
-
|
400
|
-
|
401
|
-
|
402
|
-
|
403
|
-
|
404
|
-
|
405
|
-
|
406
|
-
|
407
|
-
|
408
|
-
|
409
|
-
|
410
|
-
|
411
|
-
|
412
|
-
|
413
|
-
|
414
|
-
|
415
|
-
|
416
|
-
|
417
|
-
|
418
|
-
|
419
|
-
|
420
|
-
|
421
|
-
|
422
|
-
|
423
|
-
|
424
|
-
|
425
|
-
|
426
|
-
|
427
|
-
|
428
|
-
|
429
|
-
|
430
|
-
|
431
|
-
|
432
|
-
|
433
|
-
|
434
|
-
|
435
|
-
|
436
|
-
|
437
|
-
|
438
|
-
|
363
|
+
char *ch;
|
364
|
+
TwoBit *tb;
|
365
|
+
long tid = -1;
|
366
|
+
unsigned long startl = 0, endl = 0, totalBlocks = 0;
|
367
|
+
uint32_t i, len, start, end, blockStart, blockEnd;
|
368
|
+
VALUE val, ary;
|
369
|
+
|
370
|
+
tb = getTwoBit(self);
|
371
|
+
ch = StringValueCStr(chrom);
|
372
|
+
startl = NUM2UINT32(rbstart);
|
373
|
+
endl = NUM2UINT32(rbend);
|
374
|
+
|
375
|
+
if (!tb)
|
376
|
+
{
|
377
|
+
rb_raise(rb_eRuntimeError, "The 2bit file handle is not open!");
|
378
|
+
return Qnil;
|
379
|
+
}
|
380
|
+
|
381
|
+
// Get the chromosome ID
|
382
|
+
for (i = 0; i < tb->hdr->nChroms; i++)
|
383
|
+
{
|
384
|
+
if (strcmp(tb->cl->chrom[i], ch) == 0)
|
385
|
+
{
|
386
|
+
tid = i;
|
387
|
+
break;
|
388
|
+
}
|
389
|
+
}
|
390
|
+
|
391
|
+
len = twobitChromLen(tb, ch);
|
392
|
+
if (len == 0)
|
393
|
+
{
|
394
|
+
rb_raise(rb_eRuntimeError, "The chromosome %s doesn't exist in the 2bit file!", ch);
|
395
|
+
return Qnil;
|
396
|
+
}
|
397
|
+
if (endl == 0)
|
398
|
+
endl = len;
|
399
|
+
if (endl > len)
|
400
|
+
endl = len;
|
401
|
+
end = (uint32_t)endl;
|
402
|
+
if (startl > endl && startl > 0)
|
403
|
+
{
|
404
|
+
rb_raise(rb_eRuntimeError, "The start value must be less then the end value (and the end of the chromosome!");
|
405
|
+
return Qnil;
|
406
|
+
}
|
407
|
+
start = (uint32_t)startl;
|
408
|
+
|
409
|
+
// Count the total number of overlapping N-masked blocks
|
410
|
+
for (i = 0; i < tb->idx->nBlockCount[tid]; i++)
|
411
|
+
{
|
412
|
+
blockStart = tb->idx->nBlockStart[tid][i];
|
413
|
+
blockEnd = blockStart + tb->idx->nBlockSizes[tid][i];
|
414
|
+
if (blockStart < end && blockEnd > start)
|
415
|
+
{
|
416
|
+
totalBlocks++;
|
417
|
+
}
|
418
|
+
}
|
419
|
+
|
420
|
+
// Form the output
|
421
|
+
ary = rb_ary_new2(totalBlocks);
|
422
|
+
if (totalBlocks == 0)
|
423
|
+
return ary;
|
424
|
+
for (i = 0; i < tb->idx->nBlockCount[tid]; i++)
|
425
|
+
{
|
426
|
+
blockStart = tb->idx->nBlockStart[tid][i];
|
427
|
+
blockEnd = blockStart + tb->idx->nBlockSizes[tid][i];
|
428
|
+
if (blockStart < end && blockEnd > start)
|
429
|
+
{
|
430
|
+
val = rb_ary_new3(2, UINT32_2NUM(blockStart), UINT32_2NUM(blockEnd));
|
431
|
+
rb_ary_push(ary, val);
|
432
|
+
}
|
433
|
+
}
|
434
|
+
|
435
|
+
return ary;
|
439
436
|
}
|
440
437
|
|
441
438
|
static VALUE
|
442
439
|
twobit_soft_masked_blocks(VALUE self, VALUE chrom, VALUE rbstart, VALUE rbend)
|
443
440
|
{
|
444
|
-
|
445
|
-
|
446
|
-
|
447
|
-
|
448
|
-
|
449
|
-
|
450
|
-
|
451
|
-
|
452
|
-
|
453
|
-
|
454
|
-
|
455
|
-
|
456
|
-
|
457
|
-
|
458
|
-
|
459
|
-
|
460
|
-
|
461
|
-
|
462
|
-
|
463
|
-
|
464
|
-
|
465
|
-
|
466
|
-
|
467
|
-
|
468
|
-
|
469
|
-
|
470
|
-
|
471
|
-
|
472
|
-
|
473
|
-
|
474
|
-
|
475
|
-
|
476
|
-
|
477
|
-
|
478
|
-
|
479
|
-
|
480
|
-
|
481
|
-
|
482
|
-
|
483
|
-
|
484
|
-
|
485
|
-
|
486
|
-
|
487
|
-
|
488
|
-
|
489
|
-
|
490
|
-
|
491
|
-
|
492
|
-
|
493
|
-
|
494
|
-
|
495
|
-
|
496
|
-
|
497
|
-
|
498
|
-
|
499
|
-
|
500
|
-
|
501
|
-
|
502
|
-
|
503
|
-
|
504
|
-
|
505
|
-
|
506
|
-
|
507
|
-
|
508
|
-
|
509
|
-
|
510
|
-
|
511
|
-
|
512
|
-
|
513
|
-
|
514
|
-
|
515
|
-
|
516
|
-
|
517
|
-
|
518
|
-
|
519
|
-
|
520
|
-
|
521
|
-
|
522
|
-
|
441
|
+
char *ch;
|
442
|
+
TwoBit *tb;
|
443
|
+
long tid = -1;
|
444
|
+
unsigned long startl = 0, endl = 0, totalBlocks = 0;
|
445
|
+
uint32_t i, len, start, end, blockStart, blockEnd;
|
446
|
+
VALUE val, ary;
|
447
|
+
|
448
|
+
tb = getTwoBit(self);
|
449
|
+
ch = StringValueCStr(chrom);
|
450
|
+
startl = NUM2UINT32(rbstart);
|
451
|
+
endl = NUM2UINT32(rbend);
|
452
|
+
|
453
|
+
if (!tb)
|
454
|
+
{
|
455
|
+
rb_raise(rb_eRuntimeError, "The 2bit file handle is not open!");
|
456
|
+
return Qnil;
|
457
|
+
}
|
458
|
+
|
459
|
+
// Get the chromosome ID
|
460
|
+
for (i = 0; i < tb->hdr->nChroms; i++)
|
461
|
+
{
|
462
|
+
if (strcmp(tb->cl->chrom[i], ch) == 0)
|
463
|
+
{
|
464
|
+
tid = i;
|
465
|
+
break;
|
466
|
+
}
|
467
|
+
}
|
468
|
+
|
469
|
+
len = twobitChromLen(tb, ch);
|
470
|
+
if (len == 0)
|
471
|
+
{
|
472
|
+
rb_raise(rb_eRuntimeError, "The chromosome %s doesn't exist in the 2bit file!", ch);
|
473
|
+
return Qnil;
|
474
|
+
}
|
475
|
+
if (endl == 0)
|
476
|
+
endl = len;
|
477
|
+
if (endl > len)
|
478
|
+
endl = len;
|
479
|
+
end = (uint32_t)endl;
|
480
|
+
if (startl >= endl && startl > 0)
|
481
|
+
{
|
482
|
+
rb_raise(rb_eRuntimeError, "The start value must be less then the end value (and the end of the chromosome!");
|
483
|
+
return Qnil;
|
484
|
+
}
|
485
|
+
start = (uint32_t)startl;
|
486
|
+
|
487
|
+
if (!tb->idx->maskBlockStart)
|
488
|
+
{
|
489
|
+
rb_raise(rb_eRuntimeError, "The file was not opened with storeMasked=True! Consequently, there are no stored soft-masked regions.");
|
490
|
+
return Qnil;
|
491
|
+
}
|
492
|
+
|
493
|
+
// Count the total number of overlapping N-masked blocks
|
494
|
+
for (i = 0; i < tb->idx->maskBlockCount[tid]; i++)
|
495
|
+
{
|
496
|
+
blockStart = tb->idx->maskBlockStart[tid][i];
|
497
|
+
blockEnd = blockStart + tb->idx->maskBlockSizes[tid][i];
|
498
|
+
if (blockStart < end && blockEnd > start)
|
499
|
+
{
|
500
|
+
totalBlocks++;
|
501
|
+
}
|
502
|
+
}
|
503
|
+
|
504
|
+
// Form the output
|
505
|
+
ary = rb_ary_new2(totalBlocks);
|
506
|
+
if (totalBlocks == 0)
|
507
|
+
return ary;
|
508
|
+
for (i = 0; i < tb->idx->maskBlockCount[tid]; i++)
|
509
|
+
{
|
510
|
+
blockStart = tb->idx->maskBlockStart[tid][i];
|
511
|
+
blockEnd = blockStart + tb->idx->maskBlockSizes[tid][i];
|
512
|
+
if (blockStart < end && blockEnd > start)
|
513
|
+
{
|
514
|
+
val = rb_ary_new3(2, UINT32_2NUM(blockStart), UINT32_2NUM(blockEnd));
|
515
|
+
rb_ary_push(ary, val);
|
516
|
+
}
|
517
|
+
}
|
518
|
+
|
519
|
+
return ary;
|
523
520
|
}
|
524
521
|
|
525
522
|
void Init_twobit(void)
|
526
523
|
{
|
527
|
-
|
528
|
-
|
529
|
-
|
530
|
-
|
531
|
-
|
532
|
-
|
533
|
-
|
534
|
-
|
535
|
-
|
536
|
-
|
537
|
-
|
538
|
-
|
539
|
-
|
540
|
-
|
524
|
+
mBio = rb_define_module("Bio");
|
525
|
+
mTwoBit = rb_define_class_under(mBio, "TwoBit", rb_cObject);
|
526
|
+
|
527
|
+
rb_define_alloc_func(mTwoBit, twobit_allocate);
|
528
|
+
|
529
|
+
rb_define_private_method(mTwoBit, "initialize_raw", twobit_init, 2);
|
530
|
+
rb_define_method(mTwoBit, "close", twobit_close, 0);
|
531
|
+
rb_define_method(mTwoBit, "closed?", twobit_closed_question_mark, 0);
|
532
|
+
rb_define_method(mTwoBit, "info", twobit_info, 0);
|
533
|
+
rb_define_method(mTwoBit, "chroms", twobit_chroms, 0);
|
534
|
+
rb_define_private_method(mTwoBit, "sequence_raw", twobit_sequence, 3);
|
535
|
+
rb_define_private_method(mTwoBit, "bases_raw", twobit_bases, 4);
|
536
|
+
rb_define_private_method(mTwoBit, "hard_masked_blocks_raw", twobit_hard_masked_blocks, 3);
|
537
|
+
rb_define_private_method(mTwoBit, "soft_masked_blocks_raw", twobit_soft_masked_blocks, 3);
|
541
538
|
}
|
data/lib/bio/twobit/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bio-twobit
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- kojix2
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-
|
11
|
+
date: 2022-11-03 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: This is a Ruby binding for lib2bit(https://github.com/dpryan79/lib2bit),
|
14
14
|
which provides high-speed access to genomic data in 2bit file format.
|