bio-twobit 0.1.2 → 0.1.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ec7f8c720eed6e69a7e0829e7a46237e49532a5dc3124c57ee0c574f36e79a46
4
- data.tar.gz: b46dcf07bd2c34993b57a91e4743d45579285727146fe5ea269dab848e6e6a7f
3
+ metadata.gz: 1f0ce7759c6099bf6f2675eb467722473f06040a583cad13f2400c1d50f4f3b7
4
+ data.tar.gz: '09bdb93292cc70fa665a73890d8500b56084639cc3e18d6a1602bdf62d943620'
5
5
  SHA512:
6
- metadata.gz: 071f082fca0cde51ff22be152f5b6a33837dbf9acdc39dfa9c7caa96fe141a065290febb17f1c5fc45b26a01caacd8d350334f9950885bd5f8a1fe7fae0e5ee4
7
- data.tar.gz: 7a6753959f1a7bd188341e64c3dc9176de7f240cc214334ba155a9e706a5cbb997a02c2355626831435bf2b5b0755a31e412902f095a9900cd87a3413043d6cc
6
+ metadata.gz: ec2fb805c335562a64eca155c5806f3d004b8c4d3a2f6c2363eeb0c529749bb3f220bea4cf626d8d1618d6bdd061b247ae5a50a3f6d94314fbf6d0752bed281e
7
+ data.tar.gz: 96784953366c86e9e8e3d215a809faf0f23e6f012d3ff158ad47e80651fa2bea8cd0f87a3ca630579f7bb8e1aa41c2a30e449ba8af612ab8921d62e41521d293
data/Gemfile CHANGED
@@ -6,9 +6,6 @@ source "https://rubygems.org"
6
6
  gemspec
7
7
 
8
8
  gem "rake"
9
-
10
9
  gem "rake-compiler"
11
-
12
10
  gem "test-unit"
13
-
14
11
  gem "rubocop"
data/README.md CHANGED
@@ -18,8 +18,7 @@ gem install bio-twobit
18
18
  Downlaod BSgenome.Hsapiens.UCSC.hg38
19
19
 
20
20
  ```sh
21
- wget https://bioconductor.org/packages/release/data/annotation/src/contrib/BSgenome.Hsapiens.UCSC.hg38_1.4.4.tar.gz
22
- tar xvf BSgenome.Hsapiens.UCSC.hg38_1.4.4.tar.gz
21
+ wget http://hgdownload.soe.ucsc.edu/goldenPath/hg38/bigZips/hg38.2bit
23
22
  ```
24
23
 
25
24
  Quick Start
@@ -27,7 +26,10 @@ Quick Start
27
26
  ```ruby
28
27
  require 'bio/twobit'
29
28
 
30
- hg38 = Bio::TwoBit.open("BSgenome.Hsapiens.UCSC.hg38/inst/extdata/single_sequences.2bit")
29
+ hg38 = Bio::TwoBit.open("hg38.2bit")
30
+
31
+ hg38.path
32
+ # "hg38.2bit"
31
33
 
32
34
  hg38.info
33
35
  # {"file_size"=>818064875,
@@ -75,6 +77,10 @@ Bio::TwoBit.open("test/fixtures/foo.2bit") do |t|
75
77
  end
76
78
  ```
77
79
 
80
+ ```ruby
81
+ tb.closed? # true / false
82
+ ```
83
+
78
84
  If you would like to include information about soft-masked bases, you need to manually specify `masked: true`
79
85
 
80
86
  ```ruby
@@ -89,10 +95,18 @@ tb.soft_masked_blocks("chr1")
89
95
  # => [[62, 70]]
90
96
  ```
91
97
 
98
+ ```ruby
99
+ tb.masked? # true / false
100
+ ```
101
+
92
102
  ## Development
93
103
 
94
104
  Bug reports and pull requests are welcome on GitHub at https://github.com/ruby-on-bioc/bio-twobit.
95
105
 
106
+ Do you need commit rights to my repository?
107
+ Do you want to get admin rights and take over the project?
108
+ If so, please feel free to contact us @kojix2.
109
+
96
110
  ## License
97
111
 
98
112
  The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
data/Rakefile CHANGED
@@ -22,4 +22,4 @@ Rake::ExtensionTask.new("twobit") do |ext|
22
22
  ext.ext_dir = "ext/bio/twobit"
23
23
  end
24
24
 
25
- task default: %i[clobber compile test rubocop]
25
+ task default: %i[clobber compile test]
data/bio-twobit.gemspec CHANGED
@@ -10,7 +10,7 @@ Gem::Specification.new do |spec|
10
10
 
11
11
  spec.summary = "A ruby library for accessing 2bit files"
12
12
  spec.description = "This is a Ruby binding for lib2bit(https://github.com/dpryan79/lib2bit), " \
13
- "which provides high-speed access to genomic data in 2bit file format."
13
+ "which provides high-speed access to genomic data in 2bit file format."
14
14
  spec.homepage = "https://github.com/ruby-on-bioc/bio-twobit"
15
15
  spec.license = "MIT"
16
16
  spec.required_ruby_version = ">= 2.6.0"
@@ -20,8 +20,6 @@ Gem::Specification.new do |spec|
20
20
  (f == __FILE__) || f.match(%r{\A(?:(?:test|spec|features)/|\.(?:git|travis|circleci)|appveyor)})
21
21
  end
22
22
  end
23
- spec.bindir = "exe"
24
- spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
25
23
  spec.require_paths = ["lib"]
26
24
  spec.extensions = ["ext/bio/twobit/extconf.rb"]
27
25
  end
@@ -278,6 +278,7 @@ uint8_t getByteMaskFromOffset(int offset) {
278
278
  void *twobitBasesWorker(TwoBit *tb, uint32_t tid, uint32_t start, uint32_t end, int fraction) {
279
279
  void *out;
280
280
  uint32_t tmp[4] = {0, 0, 0, 0}, len = end - start + (start % 4), i = 0, j = 0;
281
+ uint32_t seqLen = end - start;
281
282
  uint32_t blockStart, blockEnd, maskIdx = (uint32_t) -1, maskStart, maskEnd, foo;
282
283
  uint8_t *bytes = NULL, mask = 0, offset;
283
284
 
@@ -375,10 +376,10 @@ void *twobitBasesWorker(TwoBit *tb, uint32_t tid, uint32_t start, uint32_t end,
375
376
  //out is in TCAG order, since that's how 2bit is stored.
376
377
  //However, for whatever reason I went with ACTG in the first release...
377
378
  if(fraction) {
378
- ((double*) out)[0] = ((double) tmp[2])/((double) len);
379
- ((double*) out)[1] = ((double) tmp[1])/((double) len);
380
- ((double*) out)[2] = ((double) tmp[0])/((double) len);
381
- ((double*) out)[3] = ((double) tmp[3])/((double) len);
379
+ ((double*) out)[0] = ((double) tmp[2])/((double) seqLen);
380
+ ((double*) out)[1] = ((double) tmp[1])/((double) seqLen);
381
+ ((double*) out)[2] = ((double) tmp[0])/((double) seqLen);
382
+ ((double*) out)[3] = ((double) tmp[3])/((double) seqLen);
382
383
  } else {
383
384
  ((uint32_t*) out)[0] = tmp[2];
384
385
  ((uint32_t*) out)[1] = tmp[1];
@@ -32,7 +32,7 @@
32
32
  #define NUM2INT64 NUM2LONG
33
33
  #define UINT64_2NUM ULONG2NUM
34
34
  #define INT64_2NUM LONG2NUM
35
- #elif SIZEOF_LONGLONG == SIZEOF_INT64
35
+ #elif SIZEOF_LONG_LONG == SIZEOF_INT64
36
36
  #define NUM2UINT64 NUM2ULL
37
37
  #define NUM2INT64 NUM2LL
38
38
  #define UINT64_2NUM ULL2NUM
@@ -48,494 +48,491 @@ static void TwoBit_free(void *ptr);
48
48
  static size_t TwoBit_memsize(const void *ptr);
49
49
 
50
50
  static const rb_data_type_t TwoBit_type = {
51
- "TwoBit",
52
- {
53
- 0,
54
- TwoBit_free,
55
- TwoBit_memsize,
56
- },
57
- 0,
58
- 0,
59
- RUBY_TYPED_FREE_IMMEDIATELY,
51
+ .wrap_struct_name = "TwoBit",
52
+ .function = {
53
+ .dfree = TwoBit_free,
54
+ .dsize = TwoBit_memsize,
55
+ },
56
+ .flags = RUBY_TYPED_FREE_IMMEDIATELY,
60
57
  };
61
58
 
62
59
  static void
63
60
  TwoBit_free(void *ptr)
64
61
  {
65
- // twobitClose checks for null
66
- twobitClose(ptr);
62
+ // twobitClose checks for null
63
+ twobitClose(ptr);
67
64
  }
68
65
 
69
66
  static size_t
70
67
  TwoBit_memsize(const void *ptr)
71
68
  {
72
- const TwoBit *data = ptr;
69
+ const TwoBit *data = ptr;
73
70
 
74
- return data ? sizeof(*data) : 0;
71
+ return data ? sizeof(*data) : 0;
75
72
  }
76
73
 
77
74
  static TwoBit *getTwoBit(VALUE self)
78
75
  {
79
- TwoBit *ptr = NULL;
80
- TypedData_Get_Struct(self, TwoBit, &TwoBit_type, ptr);
76
+ TwoBit *ptr = NULL;
77
+ TypedData_Get_Struct(self, TwoBit, &TwoBit_type, ptr);
81
78
 
82
- return ptr;
79
+ return ptr;
83
80
  }
84
81
 
85
82
  static VALUE
86
83
  twobit_allocate(VALUE klass)
87
84
  {
88
- TwoBit *tb = NULL;
85
+ TwoBit *tb = NULL;
89
86
 
90
- return TypedData_Wrap_Struct(klass, &TwoBit_type, tb);
87
+ return TypedData_Wrap_Struct(klass, &TwoBit_type, tb);
91
88
  }
92
89
 
93
90
  static VALUE
94
91
  twobit_init(VALUE klass, VALUE fpath, VALUE storeMasked)
95
92
  {
96
- char *path = NULL;
97
- int mask = 0;
98
- TwoBit *tb = NULL;
99
-
100
- path = StringValueCStr(fpath);
101
- mask = NUM2INT(storeMasked);
102
-
103
- tb = twobitOpen(path, mask);
104
- if (!tb)
105
- {
106
- twobitClose(tb);
107
- rb_raise(rb_eRuntimeError, "Could not open file %s", path);
108
- return Qnil;
109
- }
110
- DATA_PTR(klass) = tb;
111
-
112
- return klass;
93
+ char *path = NULL;
94
+ int mask = 0;
95
+ TwoBit *tb = NULL;
96
+
97
+ path = StringValueCStr(fpath);
98
+ mask = NUM2INT(storeMasked);
99
+
100
+ tb = twobitOpen(path, mask);
101
+ if (!tb)
102
+ {
103
+ twobitClose(tb);
104
+ rb_raise(rb_eRuntimeError, "Could not open file %s", path);
105
+ return Qnil;
106
+ }
107
+ DATA_PTR(klass) = tb;
108
+
109
+ return klass;
113
110
  }
114
111
 
115
112
  static VALUE
116
113
  twobit_close(VALUE self)
117
114
  {
118
- TwoBit *tb = getTwoBit(self);
119
- if (tb)
120
- {
121
- twobitClose(tb);
122
- DATA_PTR(self) = NULL;
123
- }
124
-
125
- return Qnil;
115
+ TwoBit *tb = getTwoBit(self);
116
+ if (tb)
117
+ {
118
+ twobitClose(tb);
119
+ DATA_PTR(self) = NULL;
120
+ }
121
+
122
+ return Qnil;
126
123
  }
127
124
 
128
125
  static VALUE
129
126
  twobit_closed_question_mark(VALUE self)
130
127
  {
131
- TwoBit *tb = getTwoBit(self);
132
- if (tb)
133
- {
134
- return Qfalse;
135
- }
136
- else
137
- {
138
- return Qtrue;
139
- }
128
+ TwoBit *tb = getTwoBit(self);
129
+ if (tb)
130
+ {
131
+ return Qfalse;
132
+ }
133
+ else
134
+ {
135
+ return Qtrue;
136
+ }
140
137
  }
141
138
 
142
139
  static VALUE
143
140
  twobit_info(VALUE self)
144
141
  {
145
- TwoBit *tb = getTwoBit(self);
146
-
147
- if (!tb)
148
- {
149
- rb_raise(rb_eRuntimeError, "The 2bit file handle is not open!");
150
- return Qnil;
151
- }
152
-
153
- uint32_t i, j, foo;
154
- VALUE val;
155
- VALUE info = rb_hash_new();
156
-
157
- //file size
158
- val = UINT64_2NUM(tb->sz);
159
- if (!val)
160
- goto error;
161
- rb_hash_aset(info, rb_str_new2("file_size"), val);
162
-
163
- //nContigs
164
- val = UINT32_2NUM(tb->hdr->nChroms);
165
- if (!val)
166
- goto error;
167
- rb_hash_aset(info, rb_str_new2("nChroms"), val);
168
-
169
- //sequence length
170
- foo = 0;
171
- for (i = 0; i < tb->hdr->nChroms; i++)
172
- {
173
- foo += tb->idx->size[i];
174
- }
175
- val = UINT32_2NUM(foo);
176
- if (!val)
177
- goto error;
178
- rb_hash_aset(info, rb_str_new2("sequence_length"), val);
179
-
180
- //hard-masked length
181
- foo = 0;
182
- for (i = 0; i < tb->hdr->nChroms; i++)
183
- {
184
- for (j = 0; j < tb->idx->nBlockCount[i]; j++)
185
- {
186
- foo += tb->idx->nBlockSizes[i][j];
187
- }
188
- }
189
- val = UINT32_2NUM(foo);
190
- if (!val)
191
- goto error;
192
- rb_hash_aset(info, rb_str_new2("hard_masked_length"), val);
193
-
194
- //soft-masked length
195
- if (tb->idx->maskBlockStart)
196
- {
197
- foo = 0;
198
- for (i = 0; i < tb->hdr->nChroms; i++)
199
- {
200
- for (j = 0; j < tb->idx->maskBlockCount[i]; j++)
201
- {
202
- foo += tb->idx->maskBlockSizes[i][j];
203
- }
204
- }
205
- val = UINT32_2NUM(foo);
206
- if (!val)
207
- goto error;
208
- rb_hash_aset(info, rb_str_new2("soft_masked_length"), val);
209
- }
210
-
211
- return info;
142
+ TwoBit *tb = getTwoBit(self);
143
+
144
+ if (!tb)
145
+ {
146
+ rb_raise(rb_eRuntimeError, "The 2bit file handle is not open!");
147
+ return Qnil;
148
+ }
149
+
150
+ uint32_t i, j, foo;
151
+ VALUE val;
152
+ VALUE info = rb_hash_new();
153
+
154
+ // file size
155
+ val = UINT64_2NUM(tb->sz);
156
+ if (!val)
157
+ goto error;
158
+ rb_hash_aset(info, rb_str_new2("file_size"), val);
159
+
160
+ // nContigs
161
+ val = UINT32_2NUM(tb->hdr->nChroms);
162
+ if (!val)
163
+ goto error;
164
+ rb_hash_aset(info, rb_str_new2("nChroms"), val);
165
+
166
+ // sequence length
167
+ foo = 0;
168
+ for (i = 0; i < tb->hdr->nChroms; i++)
169
+ {
170
+ foo += tb->idx->size[i];
171
+ }
172
+ val = UINT32_2NUM(foo);
173
+ if (!val)
174
+ goto error;
175
+ rb_hash_aset(info, rb_str_new2("sequence_length"), val);
176
+
177
+ // hard-masked length
178
+ foo = 0;
179
+ for (i = 0; i < tb->hdr->nChroms; i++)
180
+ {
181
+ for (j = 0; j < tb->idx->nBlockCount[i]; j++)
182
+ {
183
+ foo += tb->idx->nBlockSizes[i][j];
184
+ }
185
+ }
186
+ val = UINT32_2NUM(foo);
187
+ if (!val)
188
+ goto error;
189
+ rb_hash_aset(info, rb_str_new2("hard_masked_length"), val);
190
+
191
+ // soft-masked length
192
+ if (tb->idx->maskBlockStart)
193
+ {
194
+ foo = 0;
195
+ for (i = 0; i < tb->hdr->nChroms; i++)
196
+ {
197
+ for (j = 0; j < tb->idx->maskBlockCount[i]; j++)
198
+ {
199
+ foo += tb->idx->maskBlockSizes[i][j];
200
+ }
201
+ }
202
+ val = UINT32_2NUM(foo);
203
+ if (!val)
204
+ goto error;
205
+ rb_hash_aset(info, rb_str_new2("soft_masked_length"), val);
206
+ }
207
+
208
+ return info;
212
209
 
213
210
  error:
214
- rb_raise(rb_eRuntimeError, "Received an error while gathering information on the 2bit file!");
215
- return Qnil;
211
+ rb_raise(rb_eRuntimeError, "Received an error while gathering information on the 2bit file!");
212
+ return Qnil;
216
213
  }
217
214
 
218
215
  static VALUE
219
216
  twobit_chroms(VALUE self)
220
217
  {
221
- TwoBit *tb = getTwoBit(self);
218
+ TwoBit *tb = getTwoBit(self);
222
219
 
223
- if (!tb)
224
- {
225
- rb_raise(rb_eRuntimeError, "The 2bit file handle is not open!");
226
- return Qnil;
227
- }
220
+ if (!tb)
221
+ {
222
+ rb_raise(rb_eRuntimeError, "The 2bit file handle is not open!");
223
+ return Qnil;
224
+ }
228
225
 
229
- uint32_t i;
230
- VALUE val;
231
- VALUE chroms = rb_hash_new();
226
+ uint32_t i;
227
+ VALUE val;
228
+ VALUE chroms = rb_hash_new();
232
229
 
233
- for (i = 0; i < tb->hdr->nChroms; i++)
234
- {
235
- val = UINT32_2NUM(tb->idx->size[i]);
236
- if (!val)
237
- goto error;
238
- rb_hash_aset(chroms, rb_str_new2(tb->cl->chrom[i]), val);
239
- }
230
+ for (i = 0; i < tb->hdr->nChroms; i++)
231
+ {
232
+ val = UINT32_2NUM(tb->idx->size[i]);
233
+ if (!val)
234
+ goto error;
235
+ rb_hash_aset(chroms, rb_str_new2(tb->cl->chrom[i]), val);
236
+ }
240
237
 
241
- return chroms;
238
+ return chroms;
242
239
 
243
240
  error:
244
- rb_raise(rb_eRuntimeError, "Received an error while adding an item to the output hash!");
245
- return Qnil;
241
+ rb_raise(rb_eRuntimeError, "Received an error while adding an item to the output hash!");
242
+ return Qnil;
246
243
  }
247
244
 
248
245
  static VALUE
249
246
  twobit_sequence(VALUE self, VALUE chrom, VALUE rbstart, VALUE rbend)
250
247
  {
251
- char *ch, *str;
252
- unsigned long startl = 0, endl = 0;
253
- uint32_t start, end, len;
254
- TwoBit *tb;
255
-
256
- ch = StringValueCStr(chrom);
257
- startl = NUM2UINT32(rbstart);
258
- endl = NUM2UINT32(rbend);
259
- tb = getTwoBit(self);
260
-
261
- if (!tb)
262
- {
263
- rb_raise(rb_eRuntimeError, "The 2bit file handle is not open!");
264
- return Qnil;
265
- }
266
-
267
- len = twobitChromLen(tb, ch);
268
- if (len == 0)
269
- {
270
- rb_raise(rb_eRuntimeError, "The chromosome %s does not exist in the 2bit file!", ch);
271
- return Qnil;
272
- }
273
- if (endl > len)
274
- endl = len;
275
- end = (uint32_t)endl;
276
- if (startl >= endl && startl > 0)
277
- {
278
- rb_raise(rb_eRuntimeError, "The start position %lu is greater than the end position %lu!", startl, endl);
279
- return Qnil;
280
- }
281
- start = (uint32_t)startl;
282
-
283
- str = twobitSequence(tb, ch, start, end);
284
-
285
- return rb_str_new2(str);
248
+ char *ch, *str;
249
+ unsigned long startl = 0, endl = 0;
250
+ uint32_t start, end, len;
251
+ TwoBit *tb;
252
+
253
+ ch = StringValueCStr(chrom);
254
+ startl = NUM2UINT32(rbstart);
255
+ endl = NUM2UINT32(rbend);
256
+ tb = getTwoBit(self);
257
+
258
+ if (!tb)
259
+ {
260
+ rb_raise(rb_eRuntimeError, "The 2bit file handle is not open!");
261
+ return Qnil;
262
+ }
263
+
264
+ len = twobitChromLen(tb, ch);
265
+ if (len == 0)
266
+ {
267
+ rb_raise(rb_eRuntimeError, "The chromosome %s does not exist in the 2bit file!", ch);
268
+ return Qnil;
269
+ }
270
+ if (endl > len)
271
+ endl = len;
272
+ end = (uint32_t)endl;
273
+ if (startl >= endl && startl > 0)
274
+ {
275
+ rb_raise(rb_eRuntimeError, "The start position %lu is greater than the end position %lu!", startl, endl);
276
+ return Qnil;
277
+ }
278
+ start = (uint32_t)startl;
279
+
280
+ str = twobitSequence(tb, ch, start, end);
281
+
282
+ return rb_str_new2(str);
286
283
  }
287
284
 
288
285
  static VALUE
289
286
  twobit_bases(VALUE self, VALUE chrom, VALUE start, VALUE end, VALUE fraction)
290
287
  {
291
- char *ch;
292
- uint32_t st, en, fr;
293
- TwoBit *tb;
294
- void *o = NULL;
295
- VALUE val, hash;
296
-
297
- tb = getTwoBit(self);
298
- if (!tb)
299
- {
300
- rb_raise(rb_eRuntimeError, "The 2bit file handle is not open!");
301
- return Qnil;
302
- }
303
-
304
- ch = StringValueCStr(chrom);
305
- st = NUM2UINT32(start);
306
- en = NUM2UINT32(end);
307
- fr = NUM2INT(fraction);
308
-
309
- o = twobitBases(tb, ch, st, en, fr);
310
- if (!o)
311
- {
312
- rb_raise(rb_eRuntimeError, "Received an error while determining the per-base metrics.");
313
- return Qnil;
314
- }
315
-
316
- hash = rb_hash_new();
317
-
318
- if (fr)
319
- {
320
- val = DBL2NUM(((double *)o)[0]);
321
- }
322
- else
323
- {
324
- val = UINT32_2NUM(((uint32_t *)o)[0]);
325
- }
326
- rb_hash_aset(hash, rb_str_new2("A"), val);
327
-
328
- if (fr)
329
- {
330
- val = DBL2NUM(((double *)o)[1]);
331
- }
332
- else
333
- {
334
- val = UINT32_2NUM(((uint32_t *)o)[1]);
335
- }
336
- rb_hash_aset(hash, rb_str_new2("C"), val);
337
-
338
- if (fr)
339
- {
340
- val = DBL2NUM(((double *)o)[2]);
341
- }
342
- else
343
- {
344
- val = UINT32_2NUM(((uint32_t *)o)[2]);
345
- }
346
- rb_hash_aset(hash, rb_str_new2("T"), val);
347
-
348
- if (fr)
349
- {
350
- val = DBL2NUM(((double *)o)[3]);
351
- }
352
- else
353
- {
354
- val = UINT32_2NUM(((uint32_t *)o)[3]);
355
- }
356
- rb_hash_aset(hash, rb_str_new2("G"), val);
357
-
358
- free(o);
359
-
360
- return hash;
288
+ char *ch;
289
+ uint32_t st, en, fr;
290
+ TwoBit *tb;
291
+ void *o = NULL;
292
+ VALUE val, hash;
293
+
294
+ tb = getTwoBit(self);
295
+ if (!tb)
296
+ {
297
+ rb_raise(rb_eRuntimeError, "The 2bit file handle is not open!");
298
+ return Qnil;
299
+ }
300
+
301
+ ch = StringValueCStr(chrom);
302
+ st = NUM2UINT32(start);
303
+ en = NUM2UINT32(end);
304
+ fr = NUM2INT(fraction);
305
+
306
+ o = twobitBases(tb, ch, st, en, fr);
307
+ if (!o)
308
+ {
309
+ rb_raise(rb_eRuntimeError, "Received an error while determining the per-base metrics.");
310
+ return Qnil;
311
+ }
312
+
313
+ hash = rb_hash_new();
314
+
315
+ if (fr)
316
+ {
317
+ val = DBL2NUM(((double *)o)[0]);
318
+ }
319
+ else
320
+ {
321
+ val = UINT32_2NUM(((uint32_t *)o)[0]);
322
+ }
323
+ rb_hash_aset(hash, rb_str_new2("A"), val);
324
+
325
+ if (fr)
326
+ {
327
+ val = DBL2NUM(((double *)o)[1]);
328
+ }
329
+ else
330
+ {
331
+ val = UINT32_2NUM(((uint32_t *)o)[1]);
332
+ }
333
+ rb_hash_aset(hash, rb_str_new2("C"), val);
334
+
335
+ if (fr)
336
+ {
337
+ val = DBL2NUM(((double *)o)[2]);
338
+ }
339
+ else
340
+ {
341
+ val = UINT32_2NUM(((uint32_t *)o)[2]);
342
+ }
343
+ rb_hash_aset(hash, rb_str_new2("T"), val);
344
+
345
+ if (fr)
346
+ {
347
+ val = DBL2NUM(((double *)o)[3]);
348
+ }
349
+ else
350
+ {
351
+ val = UINT32_2NUM(((uint32_t *)o)[3]);
352
+ }
353
+ rb_hash_aset(hash, rb_str_new2("G"), val);
354
+
355
+ free(o);
356
+
357
+ return hash;
361
358
  }
362
359
 
363
360
  static VALUE
364
361
  twobit_hard_masked_blocks(VALUE self, VALUE chrom, VALUE rbstart, VALUE rbend)
365
362
  {
366
- char *ch;
367
- TwoBit *tb;
368
- long tid = -1;
369
- unsigned long startl = 0, endl = 0, totalBlocks = 0;
370
- uint32_t i, len, start, end, blockStart, blockEnd;
371
- VALUE val, ary;
372
-
373
- tb = getTwoBit(self);
374
- ch = StringValueCStr(chrom);
375
- startl = NUM2UINT32(rbstart);
376
- endl = NUM2UINT32(rbend);
377
-
378
- if (!tb)
379
- {
380
- rb_raise(rb_eRuntimeError, "The 2bit file handle is not open!");
381
- return Qnil;
382
- }
383
-
384
- //Get the chromosome ID
385
- for (i = 0; i < tb->hdr->nChroms; i++)
386
- {
387
- if (strcmp(tb->cl->chrom[i], ch) == 0)
388
- {
389
- tid = i;
390
- break;
391
- }
392
- }
393
-
394
- len = twobitChromLen(tb, ch);
395
- if (len == 0)
396
- {
397
- rb_raise(rb_eRuntimeError, "The chromosome %s doesn't exist in the 2bit file!", ch);
398
- return Qnil;
399
- }
400
- if (endl == 0)
401
- endl = len;
402
- if (endl > len)
403
- endl = len;
404
- end = (uint32_t)endl;
405
- if (startl > endl && startl > 0)
406
- {
407
- rb_raise(rb_eRuntimeError, "The start value must be less then the end value (and the end of the chromosome!");
408
- return Qnil;
409
- }
410
- start = (uint32_t)startl;
411
-
412
- //Count the total number of overlapping N-masked blocks
413
- for (i = 0; i < tb->idx->nBlockCount[tid]; i++)
414
- {
415
- blockStart = tb->idx->nBlockStart[tid][i];
416
- blockEnd = blockStart + tb->idx->nBlockSizes[tid][i];
417
- if (blockStart < end && blockEnd > start)
418
- {
419
- totalBlocks++;
420
- }
421
- }
422
-
423
- //Form the output
424
- ary = rb_ary_new2(totalBlocks);
425
- if (totalBlocks == 0)
426
- return ary;
427
- for (i = 0; i < tb->idx->nBlockCount[tid]; i++)
428
- {
429
- blockStart = tb->idx->nBlockStart[tid][i];
430
- blockEnd = blockStart + tb->idx->nBlockSizes[tid][i];
431
- if (blockStart < end && blockEnd > start)
432
- {
433
- val = rb_ary_new3(2, UINT32_2NUM(blockStart), UINT32_2NUM(blockEnd));
434
- rb_ary_push(ary, val);
435
- }
436
- }
437
-
438
- return ary;
363
+ char *ch;
364
+ TwoBit *tb;
365
+ long tid = -1;
366
+ unsigned long startl = 0, endl = 0, totalBlocks = 0;
367
+ uint32_t i, len, start, end, blockStart, blockEnd;
368
+ VALUE val, ary;
369
+
370
+ tb = getTwoBit(self);
371
+ ch = StringValueCStr(chrom);
372
+ startl = NUM2UINT32(rbstart);
373
+ endl = NUM2UINT32(rbend);
374
+
375
+ if (!tb)
376
+ {
377
+ rb_raise(rb_eRuntimeError, "The 2bit file handle is not open!");
378
+ return Qnil;
379
+ }
380
+
381
+ // Get the chromosome ID
382
+ for (i = 0; i < tb->hdr->nChroms; i++)
383
+ {
384
+ if (strcmp(tb->cl->chrom[i], ch) == 0)
385
+ {
386
+ tid = i;
387
+ break;
388
+ }
389
+ }
390
+
391
+ len = twobitChromLen(tb, ch);
392
+ if (len == 0)
393
+ {
394
+ rb_raise(rb_eRuntimeError, "The chromosome %s doesn't exist in the 2bit file!", ch);
395
+ return Qnil;
396
+ }
397
+ if (endl == 0)
398
+ endl = len;
399
+ if (endl > len)
400
+ endl = len;
401
+ end = (uint32_t)endl;
402
+ if (startl > endl && startl > 0)
403
+ {
404
+ rb_raise(rb_eRuntimeError, "The start value must be less then the end value (and the end of the chromosome!");
405
+ return Qnil;
406
+ }
407
+ start = (uint32_t)startl;
408
+
409
+ // Count the total number of overlapping N-masked blocks
410
+ for (i = 0; i < tb->idx->nBlockCount[tid]; i++)
411
+ {
412
+ blockStart = tb->idx->nBlockStart[tid][i];
413
+ blockEnd = blockStart + tb->idx->nBlockSizes[tid][i];
414
+ if (blockStart < end && blockEnd > start)
415
+ {
416
+ totalBlocks++;
417
+ }
418
+ }
419
+
420
+ // Form the output
421
+ ary = rb_ary_new2(totalBlocks);
422
+ if (totalBlocks == 0)
423
+ return ary;
424
+ for (i = 0; i < tb->idx->nBlockCount[tid]; i++)
425
+ {
426
+ blockStart = tb->idx->nBlockStart[tid][i];
427
+ blockEnd = blockStart + tb->idx->nBlockSizes[tid][i];
428
+ if (blockStart < end && blockEnd > start)
429
+ {
430
+ val = rb_ary_new3(2, UINT32_2NUM(blockStart), UINT32_2NUM(blockEnd));
431
+ rb_ary_push(ary, val);
432
+ }
433
+ }
434
+
435
+ return ary;
439
436
  }
440
437
 
441
438
  static VALUE
442
439
  twobit_soft_masked_blocks(VALUE self, VALUE chrom, VALUE rbstart, VALUE rbend)
443
440
  {
444
- char *ch;
445
- TwoBit *tb;
446
- long tid = -1;
447
- unsigned long startl = 0, endl = 0, totalBlocks = 0;
448
- uint32_t i, len, start, end, blockStart, blockEnd;
449
- VALUE val, ary;
450
-
451
- tb = getTwoBit(self);
452
- ch = StringValueCStr(chrom);
453
- startl = NUM2UINT32(rbstart);
454
- endl = NUM2UINT32(rbend);
455
-
456
- if (!tb)
457
- {
458
- rb_raise(rb_eRuntimeError, "The 2bit file handle is not open!");
459
- return Qnil;
460
- }
461
-
462
- //Get the chromosome ID
463
- for (i = 0; i < tb->hdr->nChroms; i++)
464
- {
465
- if (strcmp(tb->cl->chrom[i], ch) == 0)
466
- {
467
- tid = i;
468
- break;
469
- }
470
- }
471
-
472
- len = twobitChromLen(tb, ch);
473
- if (len == 0)
474
- {
475
- rb_raise(rb_eRuntimeError, "The chromosome %s doesn't exist in the 2bit file!", ch);
476
- return Qnil;
477
- }
478
- if (endl == 0)
479
- endl = len;
480
- if (endl > len)
481
- endl = len;
482
- end = (uint32_t)endl;
483
- if (startl >= endl && startl > 0)
484
- {
485
- rb_raise(rb_eRuntimeError, "The start value must be less then the end value (and the end of the chromosome!");
486
- return Qnil;
487
- }
488
- start = (uint32_t)startl;
489
-
490
- if (!tb->idx->maskBlockStart)
491
- {
492
- rb_raise(rb_eRuntimeError, "The file was not opened with storeMasked=True! Consequently, there are no stored soft-masked regions.");
493
- return Qnil;
494
- }
495
-
496
- //Count the total number of overlapping N-masked blocks
497
- for (i = 0; i < tb->idx->maskBlockCount[tid]; i++)
498
- {
499
- blockStart = tb->idx->maskBlockStart[tid][i];
500
- blockEnd = blockStart + tb->idx->maskBlockSizes[tid][i];
501
- if (blockStart < end && blockEnd > start)
502
- {
503
- totalBlocks++;
504
- }
505
- }
506
-
507
- //Form the output
508
- ary = rb_ary_new2(totalBlocks);
509
- if (totalBlocks == 0)
510
- return ary;
511
- for (i = 0; i < tb->idx->maskBlockCount[tid]; i++)
512
- {
513
- blockStart = tb->idx->maskBlockStart[tid][i];
514
- blockEnd = blockStart + tb->idx->maskBlockSizes[tid][i];
515
- if (blockStart < end && blockEnd > start)
516
- {
517
- val = rb_ary_new3(2, UINT32_2NUM(blockStart), UINT32_2NUM(blockEnd));
518
- rb_ary_push(ary, val);
519
- }
520
- }
521
-
522
- return ary;
441
+ char *ch;
442
+ TwoBit *tb;
443
+ long tid = -1;
444
+ unsigned long startl = 0, endl = 0, totalBlocks = 0;
445
+ uint32_t i, len, start, end, blockStart, blockEnd;
446
+ VALUE val, ary;
447
+
448
+ tb = getTwoBit(self);
449
+ ch = StringValueCStr(chrom);
450
+ startl = NUM2UINT32(rbstart);
451
+ endl = NUM2UINT32(rbend);
452
+
453
+ if (!tb)
454
+ {
455
+ rb_raise(rb_eRuntimeError, "The 2bit file handle is not open!");
456
+ return Qnil;
457
+ }
458
+
459
+ // Get the chromosome ID
460
+ for (i = 0; i < tb->hdr->nChroms; i++)
461
+ {
462
+ if (strcmp(tb->cl->chrom[i], ch) == 0)
463
+ {
464
+ tid = i;
465
+ break;
466
+ }
467
+ }
468
+
469
+ len = twobitChromLen(tb, ch);
470
+ if (len == 0)
471
+ {
472
+ rb_raise(rb_eRuntimeError, "The chromosome %s doesn't exist in the 2bit file!", ch);
473
+ return Qnil;
474
+ }
475
+ if (endl == 0)
476
+ endl = len;
477
+ if (endl > len)
478
+ endl = len;
479
+ end = (uint32_t)endl;
480
+ if (startl >= endl && startl > 0)
481
+ {
482
+ rb_raise(rb_eRuntimeError, "The start value must be less then the end value (and the end of the chromosome!");
483
+ return Qnil;
484
+ }
485
+ start = (uint32_t)startl;
486
+
487
+ if (!tb->idx->maskBlockStart)
488
+ {
489
+ rb_raise(rb_eRuntimeError, "The file was not opened with storeMasked=True! Consequently, there are no stored soft-masked regions.");
490
+ return Qnil;
491
+ }
492
+
493
+ // Count the total number of overlapping N-masked blocks
494
+ for (i = 0; i < tb->idx->maskBlockCount[tid]; i++)
495
+ {
496
+ blockStart = tb->idx->maskBlockStart[tid][i];
497
+ blockEnd = blockStart + tb->idx->maskBlockSizes[tid][i];
498
+ if (blockStart < end && blockEnd > start)
499
+ {
500
+ totalBlocks++;
501
+ }
502
+ }
503
+
504
+ // Form the output
505
+ ary = rb_ary_new2(totalBlocks);
506
+ if (totalBlocks == 0)
507
+ return ary;
508
+ for (i = 0; i < tb->idx->maskBlockCount[tid]; i++)
509
+ {
510
+ blockStart = tb->idx->maskBlockStart[tid][i];
511
+ blockEnd = blockStart + tb->idx->maskBlockSizes[tid][i];
512
+ if (blockStart < end && blockEnd > start)
513
+ {
514
+ val = rb_ary_new3(2, UINT32_2NUM(blockStart), UINT32_2NUM(blockEnd));
515
+ rb_ary_push(ary, val);
516
+ }
517
+ }
518
+
519
+ return ary;
523
520
  }
524
521
 
525
522
  void Init_twobit(void)
526
523
  {
527
- mBio = rb_define_module("Bio");
528
- mTwoBit = rb_define_class_under(mBio, "TwoBit", rb_cObject);
529
-
530
- rb_define_alloc_func(mTwoBit, twobit_allocate);
531
-
532
- rb_define_private_method(mTwoBit, "initialize_raw", twobit_init, 2);
533
- rb_define_method(mTwoBit, "close", twobit_close, 0);
534
- rb_define_method(mTwoBit, "closed?", twobit_closed_question_mark, 0);
535
- rb_define_method(mTwoBit, "info", twobit_info, 0);
536
- rb_define_method(mTwoBit, "chroms", twobit_chroms, 0);
537
- rb_define_private_method(mTwoBit, "sequence_raw", twobit_sequence, 3);
538
- rb_define_private_method(mTwoBit, "bases_raw", twobit_bases, 4);
539
- rb_define_private_method(mTwoBit, "hard_masked_blocks_raw", twobit_hard_masked_blocks, 3);
540
- rb_define_private_method(mTwoBit, "soft_masked_blocks_raw", twobit_soft_masked_blocks, 3);
524
+ mBio = rb_define_module("Bio");
525
+ mTwoBit = rb_define_class_under(mBio, "TwoBit", rb_cObject);
526
+
527
+ rb_define_alloc_func(mTwoBit, twobit_allocate);
528
+
529
+ rb_define_private_method(mTwoBit, "initialize_raw", twobit_init, 2);
530
+ rb_define_method(mTwoBit, "close", twobit_close, 0);
531
+ rb_define_method(mTwoBit, "closed?", twobit_closed_question_mark, 0);
532
+ rb_define_method(mTwoBit, "info", twobit_info, 0);
533
+ rb_define_method(mTwoBit, "chroms", twobit_chroms, 0);
534
+ rb_define_private_method(mTwoBit, "sequence_raw", twobit_sequence, 3);
535
+ rb_define_private_method(mTwoBit, "bases_raw", twobit_bases, 4);
536
+ rb_define_private_method(mTwoBit, "hard_masked_blocks_raw", twobit_hard_masked_blocks, 3);
537
+ rb_define_private_method(mTwoBit, "soft_masked_blocks_raw", twobit_soft_masked_blocks, 3);
541
538
  }
@@ -2,6 +2,6 @@
2
2
 
3
3
  module Bio
4
4
  class TwoBit
5
- VERSION = "0.1.2"
5
+ VERSION = "0.1.4"
6
6
  end
7
7
  end
data/lib/bio/twobit.rb CHANGED
@@ -40,19 +40,33 @@ module Bio
40
40
  @masked
41
41
  end
42
42
 
43
+ # Since "end" is a reserved word in Ruby, use "stop" instead.
44
+
43
45
  def sequence(chrom, start = 0, stop = 0)
46
+ raise ArgumentError, "negative start position" if start.negative?
47
+ raise ArgumentError, "negative stop position" if stop.negative?
48
+
44
49
  sequence_raw(chrom, start, stop)
45
50
  end
46
51
 
47
52
  def bases(chrom, start = 0, stop = 0, fraction: true)
53
+ raise ArgumentError, "negative start position" if start.negative?
54
+ raise ArgumentError, "negative stop position" if stop.negative?
55
+
48
56
  bases_raw(chrom, start, stop, fraction ? 1 : 0)
49
57
  end
50
58
 
51
59
  def hard_masked_blocks(chrom, start = 0, stop = 0)
60
+ raise ArgumentError, "negative start position" if start.negative?
61
+ raise ArgumentError, "negative stop position" if stop.negative?
62
+
52
63
  hard_masked_blocks_raw(chrom, start, stop)
53
64
  end
54
65
 
55
66
  def soft_masked_blocks(chrom, start = 0, stop = 0)
67
+ raise ArgumentError, "negative start position" if start.negative?
68
+ raise ArgumentError, "negative stop position" if stop.negative?
69
+
56
70
  soft_masked_blocks_raw(chrom, start, stop)
57
71
  end
58
72
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bio-twobit
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.1.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - kojix2
8
8
  autorequire:
9
- bindir: exe
9
+ bindir: bin
10
10
  cert_chain: []
11
- date: 2022-01-14 00:00:00.000000000 Z
11
+ date: 2022-11-03 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: This is a Ruby binding for lib2bit(https://github.com/dpryan79/lib2bit),
14
14
  which provides high-speed access to genomic data in 2bit file format.
@@ -52,7 +52,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
52
52
  - !ruby/object:Gem::Version
53
53
  version: '0'
54
54
  requirements: []
55
- rubygems_version: 3.3.3
55
+ rubygems_version: 3.3.7
56
56
  signing_key:
57
57
  specification_version: 4
58
58
  summary: A ruby library for accessing 2bit files