bio-twobit 0.1.3 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 13e0df3d77589aedc725b0cffa8c5362ac749990a5eef6e1c10d0d137ae8e4d3
4
- data.tar.gz: 56e92c1e74b96a040cc8c22c786f2c03d1f6fac7aabb969c34e9f0e81b496055
3
+ metadata.gz: 1f0ce7759c6099bf6f2675eb467722473f06040a583cad13f2400c1d50f4f3b7
4
+ data.tar.gz: '09bdb93292cc70fa665a73890d8500b56084639cc3e18d6a1602bdf62d943620'
5
5
  SHA512:
6
- metadata.gz: d4b7fa2a849a682739b2ae9686000adf397ad972412df8ee965feb0c165df7743a94f1f4d1b7b7d6fba0066e4b7a12b9d5fff87f5db47c7d8a76bf6bd2753456
7
- data.tar.gz: 593012ebcbb5eed16157ab6990db9f6cebcb9164c2b171eff04f3e5ebc4f88360c079c118d4e064f63d7205e113a1c889c3897cfdc92a59de8915d7b57df0aeb
6
+ metadata.gz: ec2fb805c335562a64eca155c5806f3d004b8c4d3a2f6c2363eeb0c529749bb3f220bea4cf626d8d1618d6bdd061b247ae5a50a3f6d94314fbf6d0752bed281e
7
+ data.tar.gz: 96784953366c86e9e8e3d215a809faf0f23e6f012d3ff158ad47e80651fa2bea8cd0f87a3ca630579f7bb8e1aa41c2a30e449ba8af612ab8921d62e41521d293
data/Gemfile CHANGED
@@ -6,9 +6,6 @@ source "https://rubygems.org"
6
6
  gemspec
7
7
 
8
8
  gem "rake"
9
-
10
9
  gem "rake-compiler"
11
-
12
10
  gem "test-unit"
13
-
14
11
  gem "rubocop"
data/Rakefile CHANGED
@@ -22,4 +22,4 @@ Rake::ExtensionTask.new("twobit") do |ext|
22
22
  ext.ext_dir = "ext/bio/twobit"
23
23
  end
24
24
 
25
- task default: %i[clobber compile test rubocop]
25
+ task default: %i[clobber compile test]
@@ -32,7 +32,7 @@
32
32
  #define NUM2INT64 NUM2LONG
33
33
  #define UINT64_2NUM ULONG2NUM
34
34
  #define INT64_2NUM LONG2NUM
35
- #elif SIZEOF_LONGLONG == SIZEOF_INT64
35
+ #elif SIZEOF_LONG_LONG == SIZEOF_INT64
36
36
  #define NUM2UINT64 NUM2ULL
37
37
  #define NUM2INT64 NUM2LL
38
38
  #define UINT64_2NUM ULL2NUM
@@ -48,494 +48,491 @@ static void TwoBit_free(void *ptr);
48
48
  static size_t TwoBit_memsize(const void *ptr);
49
49
 
50
50
  static const rb_data_type_t TwoBit_type = {
51
- "TwoBit",
52
- {
53
- 0,
54
- TwoBit_free,
55
- TwoBit_memsize,
56
- },
57
- 0,
58
- 0,
59
- RUBY_TYPED_FREE_IMMEDIATELY,
51
+ .wrap_struct_name = "TwoBit",
52
+ .function = {
53
+ .dfree = TwoBit_free,
54
+ .dsize = TwoBit_memsize,
55
+ },
56
+ .flags = RUBY_TYPED_FREE_IMMEDIATELY,
60
57
  };
61
58
 
62
59
  static void
63
60
  TwoBit_free(void *ptr)
64
61
  {
65
- // twobitClose checks for null
66
- twobitClose(ptr);
62
+ // twobitClose checks for null
63
+ twobitClose(ptr);
67
64
  }
68
65
 
69
66
  static size_t
70
67
  TwoBit_memsize(const void *ptr)
71
68
  {
72
- const TwoBit *data = ptr;
69
+ const TwoBit *data = ptr;
73
70
 
74
- return data ? sizeof(*data) : 0;
71
+ return data ? sizeof(*data) : 0;
75
72
  }
76
73
 
77
74
  static TwoBit *getTwoBit(VALUE self)
78
75
  {
79
- TwoBit *ptr = NULL;
80
- TypedData_Get_Struct(self, TwoBit, &TwoBit_type, ptr);
76
+ TwoBit *ptr = NULL;
77
+ TypedData_Get_Struct(self, TwoBit, &TwoBit_type, ptr);
81
78
 
82
- return ptr;
79
+ return ptr;
83
80
  }
84
81
 
85
82
  static VALUE
86
83
  twobit_allocate(VALUE klass)
87
84
  {
88
- TwoBit *tb = NULL;
85
+ TwoBit *tb = NULL;
89
86
 
90
- return TypedData_Wrap_Struct(klass, &TwoBit_type, tb);
87
+ return TypedData_Wrap_Struct(klass, &TwoBit_type, tb);
91
88
  }
92
89
 
93
90
  static VALUE
94
91
  twobit_init(VALUE klass, VALUE fpath, VALUE storeMasked)
95
92
  {
96
- char *path = NULL;
97
- int mask = 0;
98
- TwoBit *tb = NULL;
99
-
100
- path = StringValueCStr(fpath);
101
- mask = NUM2INT(storeMasked);
102
-
103
- tb = twobitOpen(path, mask);
104
- if (!tb)
105
- {
106
- twobitClose(tb);
107
- rb_raise(rb_eRuntimeError, "Could not open file %s", path);
108
- return Qnil;
109
- }
110
- DATA_PTR(klass) = tb;
111
-
112
- return klass;
93
+ char *path = NULL;
94
+ int mask = 0;
95
+ TwoBit *tb = NULL;
96
+
97
+ path = StringValueCStr(fpath);
98
+ mask = NUM2INT(storeMasked);
99
+
100
+ tb = twobitOpen(path, mask);
101
+ if (!tb)
102
+ {
103
+ twobitClose(tb);
104
+ rb_raise(rb_eRuntimeError, "Could not open file %s", path);
105
+ return Qnil;
106
+ }
107
+ DATA_PTR(klass) = tb;
108
+
109
+ return klass;
113
110
  }
114
111
 
115
112
  static VALUE
116
113
  twobit_close(VALUE self)
117
114
  {
118
- TwoBit *tb = getTwoBit(self);
119
- if (tb)
120
- {
121
- twobitClose(tb);
122
- DATA_PTR(self) = NULL;
123
- }
124
-
125
- return Qnil;
115
+ TwoBit *tb = getTwoBit(self);
116
+ if (tb)
117
+ {
118
+ twobitClose(tb);
119
+ DATA_PTR(self) = NULL;
120
+ }
121
+
122
+ return Qnil;
126
123
  }
127
124
 
128
125
  static VALUE
129
126
  twobit_closed_question_mark(VALUE self)
130
127
  {
131
- TwoBit *tb = getTwoBit(self);
132
- if (tb)
133
- {
134
- return Qfalse;
135
- }
136
- else
137
- {
138
- return Qtrue;
139
- }
128
+ TwoBit *tb = getTwoBit(self);
129
+ if (tb)
130
+ {
131
+ return Qfalse;
132
+ }
133
+ else
134
+ {
135
+ return Qtrue;
136
+ }
140
137
  }
141
138
 
142
139
  static VALUE
143
140
  twobit_info(VALUE self)
144
141
  {
145
- TwoBit *tb = getTwoBit(self);
146
-
147
- if (!tb)
148
- {
149
- rb_raise(rb_eRuntimeError, "The 2bit file handle is not open!");
150
- return Qnil;
151
- }
152
-
153
- uint32_t i, j, foo;
154
- VALUE val;
155
- VALUE info = rb_hash_new();
156
-
157
- //file size
158
- val = UINT64_2NUM(tb->sz);
159
- if (!val)
160
- goto error;
161
- rb_hash_aset(info, rb_str_new2("file_size"), val);
162
-
163
- //nContigs
164
- val = UINT32_2NUM(tb->hdr->nChroms);
165
- if (!val)
166
- goto error;
167
- rb_hash_aset(info, rb_str_new2("nChroms"), val);
168
-
169
- //sequence length
170
- foo = 0;
171
- for (i = 0; i < tb->hdr->nChroms; i++)
172
- {
173
- foo += tb->idx->size[i];
174
- }
175
- val = UINT32_2NUM(foo);
176
- if (!val)
177
- goto error;
178
- rb_hash_aset(info, rb_str_new2("sequence_length"), val);
179
-
180
- //hard-masked length
181
- foo = 0;
182
- for (i = 0; i < tb->hdr->nChroms; i++)
183
- {
184
- for (j = 0; j < tb->idx->nBlockCount[i]; j++)
185
- {
186
- foo += tb->idx->nBlockSizes[i][j];
187
- }
188
- }
189
- val = UINT32_2NUM(foo);
190
- if (!val)
191
- goto error;
192
- rb_hash_aset(info, rb_str_new2("hard_masked_length"), val);
193
-
194
- //soft-masked length
195
- if (tb->idx->maskBlockStart)
196
- {
197
- foo = 0;
198
- for (i = 0; i < tb->hdr->nChroms; i++)
199
- {
200
- for (j = 0; j < tb->idx->maskBlockCount[i]; j++)
201
- {
202
- foo += tb->idx->maskBlockSizes[i][j];
203
- }
204
- }
205
- val = UINT32_2NUM(foo);
206
- if (!val)
207
- goto error;
208
- rb_hash_aset(info, rb_str_new2("soft_masked_length"), val);
209
- }
210
-
211
- return info;
142
+ TwoBit *tb = getTwoBit(self);
143
+
144
+ if (!tb)
145
+ {
146
+ rb_raise(rb_eRuntimeError, "The 2bit file handle is not open!");
147
+ return Qnil;
148
+ }
149
+
150
+ uint32_t i, j, foo;
151
+ VALUE val;
152
+ VALUE info = rb_hash_new();
153
+
154
+ // file size
155
+ val = UINT64_2NUM(tb->sz);
156
+ if (!val)
157
+ goto error;
158
+ rb_hash_aset(info, rb_str_new2("file_size"), val);
159
+
160
+ // nContigs
161
+ val = UINT32_2NUM(tb->hdr->nChroms);
162
+ if (!val)
163
+ goto error;
164
+ rb_hash_aset(info, rb_str_new2("nChroms"), val);
165
+
166
+ // sequence length
167
+ foo = 0;
168
+ for (i = 0; i < tb->hdr->nChroms; i++)
169
+ {
170
+ foo += tb->idx->size[i];
171
+ }
172
+ val = UINT32_2NUM(foo);
173
+ if (!val)
174
+ goto error;
175
+ rb_hash_aset(info, rb_str_new2("sequence_length"), val);
176
+
177
+ // hard-masked length
178
+ foo = 0;
179
+ for (i = 0; i < tb->hdr->nChroms; i++)
180
+ {
181
+ for (j = 0; j < tb->idx->nBlockCount[i]; j++)
182
+ {
183
+ foo += tb->idx->nBlockSizes[i][j];
184
+ }
185
+ }
186
+ val = UINT32_2NUM(foo);
187
+ if (!val)
188
+ goto error;
189
+ rb_hash_aset(info, rb_str_new2("hard_masked_length"), val);
190
+
191
+ // soft-masked length
192
+ if (tb->idx->maskBlockStart)
193
+ {
194
+ foo = 0;
195
+ for (i = 0; i < tb->hdr->nChroms; i++)
196
+ {
197
+ for (j = 0; j < tb->idx->maskBlockCount[i]; j++)
198
+ {
199
+ foo += tb->idx->maskBlockSizes[i][j];
200
+ }
201
+ }
202
+ val = UINT32_2NUM(foo);
203
+ if (!val)
204
+ goto error;
205
+ rb_hash_aset(info, rb_str_new2("soft_masked_length"), val);
206
+ }
207
+
208
+ return info;
212
209
 
213
210
  error:
214
- rb_raise(rb_eRuntimeError, "Received an error while gathering information on the 2bit file!");
215
- return Qnil;
211
+ rb_raise(rb_eRuntimeError, "Received an error while gathering information on the 2bit file!");
212
+ return Qnil;
216
213
  }
217
214
 
218
215
  static VALUE
219
216
  twobit_chroms(VALUE self)
220
217
  {
221
- TwoBit *tb = getTwoBit(self);
218
+ TwoBit *tb = getTwoBit(self);
222
219
 
223
- if (!tb)
224
- {
225
- rb_raise(rb_eRuntimeError, "The 2bit file handle is not open!");
226
- return Qnil;
227
- }
220
+ if (!tb)
221
+ {
222
+ rb_raise(rb_eRuntimeError, "The 2bit file handle is not open!");
223
+ return Qnil;
224
+ }
228
225
 
229
- uint32_t i;
230
- VALUE val;
231
- VALUE chroms = rb_hash_new();
226
+ uint32_t i;
227
+ VALUE val;
228
+ VALUE chroms = rb_hash_new();
232
229
 
233
- for (i = 0; i < tb->hdr->nChroms; i++)
234
- {
235
- val = UINT32_2NUM(tb->idx->size[i]);
236
- if (!val)
237
- goto error;
238
- rb_hash_aset(chroms, rb_str_new2(tb->cl->chrom[i]), val);
239
- }
230
+ for (i = 0; i < tb->hdr->nChroms; i++)
231
+ {
232
+ val = UINT32_2NUM(tb->idx->size[i]);
233
+ if (!val)
234
+ goto error;
235
+ rb_hash_aset(chroms, rb_str_new2(tb->cl->chrom[i]), val);
236
+ }
240
237
 
241
- return chroms;
238
+ return chroms;
242
239
 
243
240
  error:
244
- rb_raise(rb_eRuntimeError, "Received an error while adding an item to the output hash!");
245
- return Qnil;
241
+ rb_raise(rb_eRuntimeError, "Received an error while adding an item to the output hash!");
242
+ return Qnil;
246
243
  }
247
244
 
248
245
  static VALUE
249
246
  twobit_sequence(VALUE self, VALUE chrom, VALUE rbstart, VALUE rbend)
250
247
  {
251
- char *ch, *str;
252
- unsigned long startl = 0, endl = 0;
253
- uint32_t start, end, len;
254
- TwoBit *tb;
255
-
256
- ch = StringValueCStr(chrom);
257
- startl = NUM2UINT32(rbstart);
258
- endl = NUM2UINT32(rbend);
259
- tb = getTwoBit(self);
260
-
261
- if (!tb)
262
- {
263
- rb_raise(rb_eRuntimeError, "The 2bit file handle is not open!");
264
- return Qnil;
265
- }
266
-
267
- len = twobitChromLen(tb, ch);
268
- if (len == 0)
269
- {
270
- rb_raise(rb_eRuntimeError, "The chromosome %s does not exist in the 2bit file!", ch);
271
- return Qnil;
272
- }
273
- if (endl > len)
274
- endl = len;
275
- end = (uint32_t)endl;
276
- if (startl >= endl && startl > 0)
277
- {
278
- rb_raise(rb_eRuntimeError, "The start position %lu is greater than the end position %lu!", startl, endl);
279
- return Qnil;
280
- }
281
- start = (uint32_t)startl;
282
-
283
- str = twobitSequence(tb, ch, start, end);
284
-
285
- return rb_str_new2(str);
248
+ char *ch, *str;
249
+ unsigned long startl = 0, endl = 0;
250
+ uint32_t start, end, len;
251
+ TwoBit *tb;
252
+
253
+ ch = StringValueCStr(chrom);
254
+ startl = NUM2UINT32(rbstart);
255
+ endl = NUM2UINT32(rbend);
256
+ tb = getTwoBit(self);
257
+
258
+ if (!tb)
259
+ {
260
+ rb_raise(rb_eRuntimeError, "The 2bit file handle is not open!");
261
+ return Qnil;
262
+ }
263
+
264
+ len = twobitChromLen(tb, ch);
265
+ if (len == 0)
266
+ {
267
+ rb_raise(rb_eRuntimeError, "The chromosome %s does not exist in the 2bit file!", ch);
268
+ return Qnil;
269
+ }
270
+ if (endl > len)
271
+ endl = len;
272
+ end = (uint32_t)endl;
273
+ if (startl >= endl && startl > 0)
274
+ {
275
+ rb_raise(rb_eRuntimeError, "The start position %lu is greater than the end position %lu!", startl, endl);
276
+ return Qnil;
277
+ }
278
+ start = (uint32_t)startl;
279
+
280
+ str = twobitSequence(tb, ch, start, end);
281
+
282
+ return rb_str_new2(str);
286
283
  }
287
284
 
288
285
  static VALUE
289
286
  twobit_bases(VALUE self, VALUE chrom, VALUE start, VALUE end, VALUE fraction)
290
287
  {
291
- char *ch;
292
- uint32_t st, en, fr;
293
- TwoBit *tb;
294
- void *o = NULL;
295
- VALUE val, hash;
296
-
297
- tb = getTwoBit(self);
298
- if (!tb)
299
- {
300
- rb_raise(rb_eRuntimeError, "The 2bit file handle is not open!");
301
- return Qnil;
302
- }
303
-
304
- ch = StringValueCStr(chrom);
305
- st = NUM2UINT32(start);
306
- en = NUM2UINT32(end);
307
- fr = NUM2INT(fraction);
308
-
309
- o = twobitBases(tb, ch, st, en, fr);
310
- if (!o)
311
- {
312
- rb_raise(rb_eRuntimeError, "Received an error while determining the per-base metrics.");
313
- return Qnil;
314
- }
315
-
316
- hash = rb_hash_new();
317
-
318
- if (fr)
319
- {
320
- val = DBL2NUM(((double *)o)[0]);
321
- }
322
- else
323
- {
324
- val = UINT32_2NUM(((uint32_t *)o)[0]);
325
- }
326
- rb_hash_aset(hash, rb_str_new2("A"), val);
327
-
328
- if (fr)
329
- {
330
- val = DBL2NUM(((double *)o)[1]);
331
- }
332
- else
333
- {
334
- val = UINT32_2NUM(((uint32_t *)o)[1]);
335
- }
336
- rb_hash_aset(hash, rb_str_new2("C"), val);
337
-
338
- if (fr)
339
- {
340
- val = DBL2NUM(((double *)o)[2]);
341
- }
342
- else
343
- {
344
- val = UINT32_2NUM(((uint32_t *)o)[2]);
345
- }
346
- rb_hash_aset(hash, rb_str_new2("T"), val);
347
-
348
- if (fr)
349
- {
350
- val = DBL2NUM(((double *)o)[3]);
351
- }
352
- else
353
- {
354
- val = UINT32_2NUM(((uint32_t *)o)[3]);
355
- }
356
- rb_hash_aset(hash, rb_str_new2("G"), val);
357
-
358
- free(o);
359
-
360
- return hash;
288
+ char *ch;
289
+ uint32_t st, en, fr;
290
+ TwoBit *tb;
291
+ void *o = NULL;
292
+ VALUE val, hash;
293
+
294
+ tb = getTwoBit(self);
295
+ if (!tb)
296
+ {
297
+ rb_raise(rb_eRuntimeError, "The 2bit file handle is not open!");
298
+ return Qnil;
299
+ }
300
+
301
+ ch = StringValueCStr(chrom);
302
+ st = NUM2UINT32(start);
303
+ en = NUM2UINT32(end);
304
+ fr = NUM2INT(fraction);
305
+
306
+ o = twobitBases(tb, ch, st, en, fr);
307
+ if (!o)
308
+ {
309
+ rb_raise(rb_eRuntimeError, "Received an error while determining the per-base metrics.");
310
+ return Qnil;
311
+ }
312
+
313
+ hash = rb_hash_new();
314
+
315
+ if (fr)
316
+ {
317
+ val = DBL2NUM(((double *)o)[0]);
318
+ }
319
+ else
320
+ {
321
+ val = UINT32_2NUM(((uint32_t *)o)[0]);
322
+ }
323
+ rb_hash_aset(hash, rb_str_new2("A"), val);
324
+
325
+ if (fr)
326
+ {
327
+ val = DBL2NUM(((double *)o)[1]);
328
+ }
329
+ else
330
+ {
331
+ val = UINT32_2NUM(((uint32_t *)o)[1]);
332
+ }
333
+ rb_hash_aset(hash, rb_str_new2("C"), val);
334
+
335
+ if (fr)
336
+ {
337
+ val = DBL2NUM(((double *)o)[2]);
338
+ }
339
+ else
340
+ {
341
+ val = UINT32_2NUM(((uint32_t *)o)[2]);
342
+ }
343
+ rb_hash_aset(hash, rb_str_new2("T"), val);
344
+
345
+ if (fr)
346
+ {
347
+ val = DBL2NUM(((double *)o)[3]);
348
+ }
349
+ else
350
+ {
351
+ val = UINT32_2NUM(((uint32_t *)o)[3]);
352
+ }
353
+ rb_hash_aset(hash, rb_str_new2("G"), val);
354
+
355
+ free(o);
356
+
357
+ return hash;
361
358
  }
362
359
 
363
360
  static VALUE
364
361
  twobit_hard_masked_blocks(VALUE self, VALUE chrom, VALUE rbstart, VALUE rbend)
365
362
  {
366
- char *ch;
367
- TwoBit *tb;
368
- long tid = -1;
369
- unsigned long startl = 0, endl = 0, totalBlocks = 0;
370
- uint32_t i, len, start, end, blockStart, blockEnd;
371
- VALUE val, ary;
372
-
373
- tb = getTwoBit(self);
374
- ch = StringValueCStr(chrom);
375
- startl = NUM2UINT32(rbstart);
376
- endl = NUM2UINT32(rbend);
377
-
378
- if (!tb)
379
- {
380
- rb_raise(rb_eRuntimeError, "The 2bit file handle is not open!");
381
- return Qnil;
382
- }
383
-
384
- //Get the chromosome ID
385
- for (i = 0; i < tb->hdr->nChroms; i++)
386
- {
387
- if (strcmp(tb->cl->chrom[i], ch) == 0)
388
- {
389
- tid = i;
390
- break;
391
- }
392
- }
393
-
394
- len = twobitChromLen(tb, ch);
395
- if (len == 0)
396
- {
397
- rb_raise(rb_eRuntimeError, "The chromosome %s doesn't exist in the 2bit file!", ch);
398
- return Qnil;
399
- }
400
- if (endl == 0)
401
- endl = len;
402
- if (endl > len)
403
- endl = len;
404
- end = (uint32_t)endl;
405
- if (startl > endl && startl > 0)
406
- {
407
- rb_raise(rb_eRuntimeError, "The start value must be less then the end value (and the end of the chromosome!");
408
- return Qnil;
409
- }
410
- start = (uint32_t)startl;
411
-
412
- //Count the total number of overlapping N-masked blocks
413
- for (i = 0; i < tb->idx->nBlockCount[tid]; i++)
414
- {
415
- blockStart = tb->idx->nBlockStart[tid][i];
416
- blockEnd = blockStart + tb->idx->nBlockSizes[tid][i];
417
- if (blockStart < end && blockEnd > start)
418
- {
419
- totalBlocks++;
420
- }
421
- }
422
-
423
- //Form the output
424
- ary = rb_ary_new2(totalBlocks);
425
- if (totalBlocks == 0)
426
- return ary;
427
- for (i = 0; i < tb->idx->nBlockCount[tid]; i++)
428
- {
429
- blockStart = tb->idx->nBlockStart[tid][i];
430
- blockEnd = blockStart + tb->idx->nBlockSizes[tid][i];
431
- if (blockStart < end && blockEnd > start)
432
- {
433
- val = rb_ary_new3(2, UINT32_2NUM(blockStart), UINT32_2NUM(blockEnd));
434
- rb_ary_push(ary, val);
435
- }
436
- }
437
-
438
- return ary;
363
+ char *ch;
364
+ TwoBit *tb;
365
+ long tid = -1;
366
+ unsigned long startl = 0, endl = 0, totalBlocks = 0;
367
+ uint32_t i, len, start, end, blockStart, blockEnd;
368
+ VALUE val, ary;
369
+
370
+ tb = getTwoBit(self);
371
+ ch = StringValueCStr(chrom);
372
+ startl = NUM2UINT32(rbstart);
373
+ endl = NUM2UINT32(rbend);
374
+
375
+ if (!tb)
376
+ {
377
+ rb_raise(rb_eRuntimeError, "The 2bit file handle is not open!");
378
+ return Qnil;
379
+ }
380
+
381
+ // Get the chromosome ID
382
+ for (i = 0; i < tb->hdr->nChroms; i++)
383
+ {
384
+ if (strcmp(tb->cl->chrom[i], ch) == 0)
385
+ {
386
+ tid = i;
387
+ break;
388
+ }
389
+ }
390
+
391
+ len = twobitChromLen(tb, ch);
392
+ if (len == 0)
393
+ {
394
+ rb_raise(rb_eRuntimeError, "The chromosome %s doesn't exist in the 2bit file!", ch);
395
+ return Qnil;
396
+ }
397
+ if (endl == 0)
398
+ endl = len;
399
+ if (endl > len)
400
+ endl = len;
401
+ end = (uint32_t)endl;
402
+ if (startl > endl && startl > 0)
403
+ {
404
+ rb_raise(rb_eRuntimeError, "The start value must be less then the end value (and the end of the chromosome!");
405
+ return Qnil;
406
+ }
407
+ start = (uint32_t)startl;
408
+
409
+ // Count the total number of overlapping N-masked blocks
410
+ for (i = 0; i < tb->idx->nBlockCount[tid]; i++)
411
+ {
412
+ blockStart = tb->idx->nBlockStart[tid][i];
413
+ blockEnd = blockStart + tb->idx->nBlockSizes[tid][i];
414
+ if (blockStart < end && blockEnd > start)
415
+ {
416
+ totalBlocks++;
417
+ }
418
+ }
419
+
420
+ // Form the output
421
+ ary = rb_ary_new2(totalBlocks);
422
+ if (totalBlocks == 0)
423
+ return ary;
424
+ for (i = 0; i < tb->idx->nBlockCount[tid]; i++)
425
+ {
426
+ blockStart = tb->idx->nBlockStart[tid][i];
427
+ blockEnd = blockStart + tb->idx->nBlockSizes[tid][i];
428
+ if (blockStart < end && blockEnd > start)
429
+ {
430
+ val = rb_ary_new3(2, UINT32_2NUM(blockStart), UINT32_2NUM(blockEnd));
431
+ rb_ary_push(ary, val);
432
+ }
433
+ }
434
+
435
+ return ary;
439
436
  }
440
437
 
441
438
  static VALUE
442
439
  twobit_soft_masked_blocks(VALUE self, VALUE chrom, VALUE rbstart, VALUE rbend)
443
440
  {
444
- char *ch;
445
- TwoBit *tb;
446
- long tid = -1;
447
- unsigned long startl = 0, endl = 0, totalBlocks = 0;
448
- uint32_t i, len, start, end, blockStart, blockEnd;
449
- VALUE val, ary;
450
-
451
- tb = getTwoBit(self);
452
- ch = StringValueCStr(chrom);
453
- startl = NUM2UINT32(rbstart);
454
- endl = NUM2UINT32(rbend);
455
-
456
- if (!tb)
457
- {
458
- rb_raise(rb_eRuntimeError, "The 2bit file handle is not open!");
459
- return Qnil;
460
- }
461
-
462
- //Get the chromosome ID
463
- for (i = 0; i < tb->hdr->nChroms; i++)
464
- {
465
- if (strcmp(tb->cl->chrom[i], ch) == 0)
466
- {
467
- tid = i;
468
- break;
469
- }
470
- }
471
-
472
- len = twobitChromLen(tb, ch);
473
- if (len == 0)
474
- {
475
- rb_raise(rb_eRuntimeError, "The chromosome %s doesn't exist in the 2bit file!", ch);
476
- return Qnil;
477
- }
478
- if (endl == 0)
479
- endl = len;
480
- if (endl > len)
481
- endl = len;
482
- end = (uint32_t)endl;
483
- if (startl >= endl && startl > 0)
484
- {
485
- rb_raise(rb_eRuntimeError, "The start value must be less then the end value (and the end of the chromosome!");
486
- return Qnil;
487
- }
488
- start = (uint32_t)startl;
489
-
490
- if (!tb->idx->maskBlockStart)
491
- {
492
- rb_raise(rb_eRuntimeError, "The file was not opened with storeMasked=True! Consequently, there are no stored soft-masked regions.");
493
- return Qnil;
494
- }
495
-
496
- //Count the total number of overlapping N-masked blocks
497
- for (i = 0; i < tb->idx->maskBlockCount[tid]; i++)
498
- {
499
- blockStart = tb->idx->maskBlockStart[tid][i];
500
- blockEnd = blockStart + tb->idx->maskBlockSizes[tid][i];
501
- if (blockStart < end && blockEnd > start)
502
- {
503
- totalBlocks++;
504
- }
505
- }
506
-
507
- //Form the output
508
- ary = rb_ary_new2(totalBlocks);
509
- if (totalBlocks == 0)
510
- return ary;
511
- for (i = 0; i < tb->idx->maskBlockCount[tid]; i++)
512
- {
513
- blockStart = tb->idx->maskBlockStart[tid][i];
514
- blockEnd = blockStart + tb->idx->maskBlockSizes[tid][i];
515
- if (blockStart < end && blockEnd > start)
516
- {
517
- val = rb_ary_new3(2, UINT32_2NUM(blockStart), UINT32_2NUM(blockEnd));
518
- rb_ary_push(ary, val);
519
- }
520
- }
521
-
522
- return ary;
441
+ char *ch;
442
+ TwoBit *tb;
443
+ long tid = -1;
444
+ unsigned long startl = 0, endl = 0, totalBlocks = 0;
445
+ uint32_t i, len, start, end, blockStart, blockEnd;
446
+ VALUE val, ary;
447
+
448
+ tb = getTwoBit(self);
449
+ ch = StringValueCStr(chrom);
450
+ startl = NUM2UINT32(rbstart);
451
+ endl = NUM2UINT32(rbend);
452
+
453
+ if (!tb)
454
+ {
455
+ rb_raise(rb_eRuntimeError, "The 2bit file handle is not open!");
456
+ return Qnil;
457
+ }
458
+
459
+ // Get the chromosome ID
460
+ for (i = 0; i < tb->hdr->nChroms; i++)
461
+ {
462
+ if (strcmp(tb->cl->chrom[i], ch) == 0)
463
+ {
464
+ tid = i;
465
+ break;
466
+ }
467
+ }
468
+
469
+ len = twobitChromLen(tb, ch);
470
+ if (len == 0)
471
+ {
472
+ rb_raise(rb_eRuntimeError, "The chromosome %s doesn't exist in the 2bit file!", ch);
473
+ return Qnil;
474
+ }
475
+ if (endl == 0)
476
+ endl = len;
477
+ if (endl > len)
478
+ endl = len;
479
+ end = (uint32_t)endl;
480
+ if (startl >= endl && startl > 0)
481
+ {
482
+ rb_raise(rb_eRuntimeError, "The start value must be less then the end value (and the end of the chromosome!");
483
+ return Qnil;
484
+ }
485
+ start = (uint32_t)startl;
486
+
487
+ if (!tb->idx->maskBlockStart)
488
+ {
489
+ rb_raise(rb_eRuntimeError, "The file was not opened with storeMasked=True! Consequently, there are no stored soft-masked regions.");
490
+ return Qnil;
491
+ }
492
+
493
+ // Count the total number of overlapping N-masked blocks
494
+ for (i = 0; i < tb->idx->maskBlockCount[tid]; i++)
495
+ {
496
+ blockStart = tb->idx->maskBlockStart[tid][i];
497
+ blockEnd = blockStart + tb->idx->maskBlockSizes[tid][i];
498
+ if (blockStart < end && blockEnd > start)
499
+ {
500
+ totalBlocks++;
501
+ }
502
+ }
503
+
504
+ // Form the output
505
+ ary = rb_ary_new2(totalBlocks);
506
+ if (totalBlocks == 0)
507
+ return ary;
508
+ for (i = 0; i < tb->idx->maskBlockCount[tid]; i++)
509
+ {
510
+ blockStart = tb->idx->maskBlockStart[tid][i];
511
+ blockEnd = blockStart + tb->idx->maskBlockSizes[tid][i];
512
+ if (blockStart < end && blockEnd > start)
513
+ {
514
+ val = rb_ary_new3(2, UINT32_2NUM(blockStart), UINT32_2NUM(blockEnd));
515
+ rb_ary_push(ary, val);
516
+ }
517
+ }
518
+
519
+ return ary;
523
520
  }
524
521
 
525
522
  void Init_twobit(void)
526
523
  {
527
- mBio = rb_define_module("Bio");
528
- mTwoBit = rb_define_class_under(mBio, "TwoBit", rb_cObject);
529
-
530
- rb_define_alloc_func(mTwoBit, twobit_allocate);
531
-
532
- rb_define_private_method(mTwoBit, "initialize_raw", twobit_init, 2);
533
- rb_define_method(mTwoBit, "close", twobit_close, 0);
534
- rb_define_method(mTwoBit, "closed?", twobit_closed_question_mark, 0);
535
- rb_define_method(mTwoBit, "info", twobit_info, 0);
536
- rb_define_method(mTwoBit, "chroms", twobit_chroms, 0);
537
- rb_define_private_method(mTwoBit, "sequence_raw", twobit_sequence, 3);
538
- rb_define_private_method(mTwoBit, "bases_raw", twobit_bases, 4);
539
- rb_define_private_method(mTwoBit, "hard_masked_blocks_raw", twobit_hard_masked_blocks, 3);
540
- rb_define_private_method(mTwoBit, "soft_masked_blocks_raw", twobit_soft_masked_blocks, 3);
524
+ mBio = rb_define_module("Bio");
525
+ mTwoBit = rb_define_class_under(mBio, "TwoBit", rb_cObject);
526
+
527
+ rb_define_alloc_func(mTwoBit, twobit_allocate);
528
+
529
+ rb_define_private_method(mTwoBit, "initialize_raw", twobit_init, 2);
530
+ rb_define_method(mTwoBit, "close", twobit_close, 0);
531
+ rb_define_method(mTwoBit, "closed?", twobit_closed_question_mark, 0);
532
+ rb_define_method(mTwoBit, "info", twobit_info, 0);
533
+ rb_define_method(mTwoBit, "chroms", twobit_chroms, 0);
534
+ rb_define_private_method(mTwoBit, "sequence_raw", twobit_sequence, 3);
535
+ rb_define_private_method(mTwoBit, "bases_raw", twobit_bases, 4);
536
+ rb_define_private_method(mTwoBit, "hard_masked_blocks_raw", twobit_hard_masked_blocks, 3);
537
+ rb_define_private_method(mTwoBit, "soft_masked_blocks_raw", twobit_soft_masked_blocks, 3);
541
538
  }
@@ -2,6 +2,6 @@
2
2
 
3
3
  module Bio
4
4
  class TwoBit
5
- VERSION = "0.1.3"
5
+ VERSION = "0.1.4"
6
6
  end
7
7
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bio-twobit
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.3
4
+ version: 0.1.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - kojix2
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-07-29 00:00:00.000000000 Z
11
+ date: 2022-11-03 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: This is a Ruby binding for lib2bit(https://github.com/dpryan79/lib2bit),
14
14
  which provides high-speed access to genomic data in 2bit file format.