bio-cgranges 0.0.0 → 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +23 -13
- data/ext/bio/cgranges/cgranges.c +158 -30
- data/lib/bio/cgranges/version.rb +1 -1
- data/lib/bio/cgranges.rb +20 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 333cfe3994ee4f8b4698b4f9ff0b594f45bd2c1f973c0b596b49a0ba51af9e25
|
4
|
+
data.tar.gz: 80f494f56add4046be7d5e2aee66c4e1496e2e4a84ca9670298d6d05587f7e19
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f40f2343621f7568848ea8cce89fa84de2b162dd4378b9224b5587cc1ff3a4badf04a4ada19e9286efa3f0d177e200212eb5dbc5cb4ad8c9c882ba55ac045e90
|
7
|
+
data.tar.gz: 258167bdbb5453857a1c3e4f2c8b51729de5d6f43ef8e5d24c74b6d2d0b95c4e2e58cabb14ec07d795bdb2e6949514369b8441c759395a6b0b399dd7dba741de
|
data/README.md
CHANGED
@@ -2,7 +2,8 @@
|
|
2
2
|
|
3
3
|
[](https://badge.fury.io/rb/bio-cgranges)
|
4
4
|
[](https://github.com/ruby-on-bioc/bio-cgranges/actions/workflows/ci.yml)
|
5
|
-
[](https://rubydoc.info/gems/bio-cgranges)
|
6
|
+
[](https://ruby-on-bioc.github.io/bio-cgranges/)
|
6
7
|
|
7
8
|
Ruby bindings to [lh3/cgranges](https://github.com/lh3/cgranges).
|
8
9
|
|
@@ -27,15 +28,6 @@ granges.add("chr1", 10, 20, 0)
|
|
27
28
|
.add("chr1", 15, 20, 4)
|
28
29
|
.add("chr2", 10, 20, 5)
|
29
30
|
.index
|
30
|
-
|
31
|
-
granges.overlap("chr1", 12, 22)
|
32
|
-
# [["chr1", 10, 20, 0],
|
33
|
-
# ["chr1", 10, 25, 3],
|
34
|
-
# ["chr1", 15, 25, 1],
|
35
|
-
# ["chr1", 15, 20, 4]]
|
36
|
-
|
37
|
-
granges.contain("chr1", 12, 22)
|
38
|
-
# [["chr1", 15, 20, 4]]
|
39
31
|
```
|
40
32
|
|
41
33
|
```
|
@@ -47,18 +39,36 @@ granges.contain("chr1", 12, 22)
|
|
47
39
|
3-3-3-3-3-3-3-3-3-3-3-3-3-3-3
|
48
40
|
4-4-4-4-4
|
49
41
|
5-5-5-5-5
|
50
|
-
Q-Q-Q-Q-Q-Q-Q-Q-Q-Q
|
51
42
|
|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|
|
52
43
|
0.........5.........10........15........20........25........30........35........40
|
53
44
|
```
|
54
45
|
|
46
|
+
```ruby
|
47
|
+
granges.overlap("chr1", 12, 22)
|
48
|
+
# [["chr1", 10, 20, 0],
|
49
|
+
# ["chr1", 10, 25, 3],
|
50
|
+
# ["chr1", 15, 25, 1],
|
51
|
+
# ["chr1", 15, 20, 4]]
|
52
|
+
|
53
|
+
granges.contain("chr1", 12, 22)
|
54
|
+
# [["chr1", 15, 20, 4]]
|
55
|
+
|
56
|
+
granges.coverage("chr1", 20, 35)
|
57
|
+
# [10, 3] # cov, n
|
58
|
+
|
59
|
+
granges.coverage("chr1", 12, 22, mode: :contain)
|
60
|
+
# [5, 1]
|
61
|
+
```
|
62
|
+
|
63
|
+
See [](https://ruby-on-bioc.github.io/bio-cgranges/) for details.
|
64
|
+
|
55
65
|
## Development
|
56
66
|
|
57
67
|
Bug reports and pull requests are welcome on GitHub at https://github.com/ruby-on-bioc/bio-cgranges.
|
58
68
|
|
59
|
-
Do you need commit rights to
|
69
|
+
Do you need commit rights to this repository?
|
60
70
|
Do you want to get admin rights and take over the project?
|
61
|
-
If so, please feel free to contact us
|
71
|
+
If so, please feel free to contact us.
|
62
72
|
|
63
73
|
## License
|
64
74
|
|
data/ext/bio/cgranges/cgranges.c
CHANGED
@@ -64,7 +64,7 @@ static const rb_data_type_t cgranges_type = {
|
|
64
64
|
static void
|
65
65
|
cgranges_free(void *ptr)
|
66
66
|
{
|
67
|
-
if(ptr)
|
67
|
+
if (ptr)
|
68
68
|
{
|
69
69
|
cr_destroy(ptr);
|
70
70
|
}
|
@@ -94,6 +94,11 @@ cgranges_allocate(VALUE klass)
|
|
94
94
|
return TypedData_Wrap_Struct(klass, &cgranges_type, ptr);
|
95
95
|
}
|
96
96
|
|
97
|
+
/* Create a new cgranges object
|
98
|
+
*
|
99
|
+
* @return [Bio::CGRanges]
|
100
|
+
*/
|
101
|
+
|
97
102
|
static VALUE
|
98
103
|
cgranges_init(VALUE self)
|
99
104
|
{
|
@@ -107,6 +112,14 @@ cgranges_init(VALUE self)
|
|
107
112
|
return self;
|
108
113
|
}
|
109
114
|
|
115
|
+
/* Add a genomic interval to the cgranges object.
|
116
|
+
* @param [String] contig The contig name
|
117
|
+
* @param [Fixnum] start The start position of the interval.
|
118
|
+
* @param [Fixnum] end The end position of the interval.
|
119
|
+
* @param [Fixnum] label The label of the interval.
|
120
|
+
* @return [Bio::CGRanges] self
|
121
|
+
*/
|
122
|
+
|
110
123
|
static VALUE
|
111
124
|
cgranges_add(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en, VALUE rb_label)
|
112
125
|
{
|
@@ -123,25 +136,10 @@ cgranges_add(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en, VALUE rb_label)
|
|
123
136
|
return Qnil;
|
124
137
|
}
|
125
138
|
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
if (rb_st != Qnil)
|
132
|
-
{
|
133
|
-
st = NUM2INT32(rb_st);
|
134
|
-
}
|
135
|
-
|
136
|
-
if (rb_en != Qnil)
|
137
|
-
{
|
138
|
-
en = NUM2INT32(rb_en);
|
139
|
-
}
|
140
|
-
|
141
|
-
if (rb_label != Qnil)
|
142
|
-
{
|
143
|
-
label = NUM2INT32(rb_label);
|
144
|
-
}
|
139
|
+
ctg = StringValueCStr(rb_ctg);
|
140
|
+
st = NUM2INT32(rb_st);
|
141
|
+
en = NUM2INT32(rb_en);
|
142
|
+
label = NUM2INT32(rb_label);
|
145
143
|
|
146
144
|
intv = cr_add(cr, ctg, st, en, label);
|
147
145
|
|
@@ -154,10 +152,14 @@ cgranges_add(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en, VALUE rb_label)
|
|
154
152
|
return self;
|
155
153
|
}
|
156
154
|
|
155
|
+
/* Index.
|
156
|
+
* @return [Bio::CGRanges] self
|
157
|
+
*/
|
158
|
+
|
157
159
|
static VALUE
|
158
160
|
cgranges_index(VALUE self)
|
159
161
|
{
|
160
|
-
if(RTEST(rb_ivar_get(self, rb_intern("@indexed"))))
|
162
|
+
if (RTEST(rb_ivar_get(self, rb_intern("@indexed"))))
|
161
163
|
{
|
162
164
|
rb_raise(rb_eIndexedError, "CGRanges already indexed");
|
163
165
|
return Qnil;
|
@@ -165,12 +167,19 @@ cgranges_index(VALUE self)
|
|
165
167
|
|
166
168
|
cgranges_t *cr = get_cganges(self);
|
167
169
|
cr_index(cr);
|
168
|
-
|
170
|
+
|
169
171
|
rb_ivar_set(self, rb_intern("@indexed"), Qtrue);
|
170
172
|
|
171
173
|
return self;
|
172
174
|
}
|
173
175
|
|
176
|
+
/* Overlap query.
|
177
|
+
* @param [String] contig The contig name
|
178
|
+
* @param [Fixnum] start The start position of the interval.
|
179
|
+
* @param [Fixnum] end The end position of the interval.
|
180
|
+
* @return [Array] An array of [contig, start, end, label] arrays.
|
181
|
+
*/
|
182
|
+
|
174
183
|
static VALUE
|
175
184
|
cgranges_overlap(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en)
|
176
185
|
{
|
@@ -192,7 +201,7 @@ cgranges_overlap(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en)
|
|
192
201
|
ctg = StringValueCStr(rb_ctg);
|
193
202
|
st = NUM2INT32(rb_st);
|
194
203
|
en = NUM2INT32(rb_en);
|
195
|
-
|
204
|
+
|
196
205
|
n = cr_overlap(cr, ctg, st, en, &b, &m_b);
|
197
206
|
|
198
207
|
if (n < 0)
|
@@ -206,14 +215,21 @@ cgranges_overlap(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en)
|
|
206
215
|
for (int64_t i = 0; i < n; i++)
|
207
216
|
{
|
208
217
|
VALUE rb_intv = rb_ary_new3(
|
209
|
-
|
210
|
-
);
|
218
|
+
4, rb_ctg, INT32_2NUM(cr_start(cr, b[i])), INT32_2NUM(cr_end(cr, b[i])), INT32_2NUM(cr_label(cr, b[i])));
|
211
219
|
rb_ary_push(result, rb_intv);
|
212
220
|
}
|
221
|
+
free(b);
|
213
222
|
|
214
223
|
return result;
|
215
224
|
}
|
216
225
|
|
226
|
+
/* Get the number of overlapping intervals.
|
227
|
+
* @param [String] contig The contig name
|
228
|
+
* @param [Fixnum] start The start position of the interval.
|
229
|
+
* @param [Fixnum] end The end position of the interval.
|
230
|
+
* @return [Fixnum] The number of overlapping intervals.
|
231
|
+
*/
|
232
|
+
|
217
233
|
static VALUE
|
218
234
|
cgranges_count_overlap(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en)
|
219
235
|
{
|
@@ -235,7 +251,7 @@ cgranges_count_overlap(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en)
|
|
235
251
|
ctg = StringValueCStr(rb_ctg);
|
236
252
|
st = NUM2INT32(rb_st);
|
237
253
|
en = NUM2INT32(rb_en);
|
238
|
-
|
254
|
+
|
239
255
|
n = cr_overlap(cr, ctg, st, en, &b, &m_b);
|
240
256
|
|
241
257
|
if (n < 0)
|
@@ -243,10 +259,18 @@ cgranges_count_overlap(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en)
|
|
243
259
|
rb_raise(rb_eRuntimeError, "Error finding overlaps");
|
244
260
|
return Qnil;
|
245
261
|
}
|
262
|
+
free(b);
|
246
263
|
|
247
264
|
return INT64_2NUM(n);
|
248
265
|
}
|
249
266
|
|
267
|
+
/* Containment query.
|
268
|
+
* @param [String] contig The contig name
|
269
|
+
* @param [Fixnum] start The start position of the interval.
|
270
|
+
* @param [Fixnum] end The end position of the interval.
|
271
|
+
* @return [Array] An array of [contig, start, end, label] arrays.
|
272
|
+
*/
|
273
|
+
|
250
274
|
static VALUE
|
251
275
|
cgranges_contain(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en)
|
252
276
|
{
|
@@ -268,7 +292,7 @@ cgranges_contain(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en)
|
|
268
292
|
ctg = StringValueCStr(rb_ctg);
|
269
293
|
st = NUM2INT32(rb_st);
|
270
294
|
en = NUM2INT32(rb_en);
|
271
|
-
|
295
|
+
|
272
296
|
n = cr_contain(cr, ctg, st, en, &b, &m_b);
|
273
297
|
|
274
298
|
if (n < 0)
|
@@ -282,14 +306,21 @@ cgranges_contain(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en)
|
|
282
306
|
for (int64_t i = 0; i < n; i++)
|
283
307
|
{
|
284
308
|
VALUE rb_intv = rb_ary_new3(
|
285
|
-
|
286
|
-
);
|
309
|
+
4, rb_ctg, INT32_2NUM(cr_start(cr, b[i])), INT32_2NUM(cr_end(cr, b[i])), INT32_2NUM(cr_label(cr, b[i])));
|
287
310
|
rb_ary_push(result, rb_intv);
|
288
311
|
}
|
312
|
+
free(b);
|
289
313
|
|
290
314
|
return result;
|
291
315
|
}
|
292
316
|
|
317
|
+
/* Get the number of contained intervals.
|
318
|
+
* @param [String] contig The contig name
|
319
|
+
* @param [Fixnum] start The start position of the interval.
|
320
|
+
* @param [Fixnum] end The end position of the interval.
|
321
|
+
* @return [Fixnum] The number of contained intervals.
|
322
|
+
*/
|
323
|
+
|
293
324
|
static VALUE
|
294
325
|
cgranges_count_contain(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en)
|
295
326
|
{
|
@@ -311,7 +342,7 @@ cgranges_count_contain(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en)
|
|
311
342
|
ctg = StringValueCStr(rb_ctg);
|
312
343
|
st = NUM2INT32(rb_st);
|
313
344
|
en = NUM2INT32(rb_en);
|
314
|
-
|
345
|
+
|
315
346
|
n = cr_contain(cr, ctg, st, en, &b, &m_b);
|
316
347
|
|
317
348
|
if (n < 0)
|
@@ -319,10 +350,105 @@ cgranges_count_contain(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en)
|
|
319
350
|
rb_raise(rb_eRuntimeError, "Error finding contained");
|
320
351
|
return Qnil;
|
321
352
|
}
|
353
|
+
free(b);
|
322
354
|
|
323
355
|
return INT64_2NUM(n);
|
324
356
|
}
|
325
357
|
|
358
|
+
static VALUE
|
359
|
+
cgranges_coverage(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en, int contain)
|
360
|
+
{
|
361
|
+
cgranges_t *cr = get_cganges(self);
|
362
|
+
char *ctg = NULL;
|
363
|
+
int32_t st1 = 0;
|
364
|
+
int32_t en1 = 0;
|
365
|
+
|
366
|
+
int64_t *b = NULL;
|
367
|
+
int64_t m_b = 0;
|
368
|
+
int64_t n = 0;
|
369
|
+
int64_t cov = 0, cov_st = 0, cov_en = 0;
|
370
|
+
|
371
|
+
if (!RTEST(rb_ivar_get(self, rb_intern("@indexed"))))
|
372
|
+
{
|
373
|
+
rb_raise(rb_eNoIndexError, "CGRanges not indexed");
|
374
|
+
return Qnil;
|
375
|
+
}
|
376
|
+
|
377
|
+
ctg = StringValueCStr(rb_ctg);
|
378
|
+
st1 = NUM2INT32(rb_st);
|
379
|
+
en1 = NUM2INT32(rb_en);
|
380
|
+
|
381
|
+
if (contain)
|
382
|
+
{
|
383
|
+
n = cr_contain(cr, ctg, st1, en1, &b, &m_b);
|
384
|
+
}
|
385
|
+
else
|
386
|
+
{
|
387
|
+
n = cr_overlap(cr, ctg, st1, en1, &b, &m_b);
|
388
|
+
}
|
389
|
+
|
390
|
+
if (n < 0)
|
391
|
+
{
|
392
|
+
rb_raise(rb_eRuntimeError, "Error finding overlaps");
|
393
|
+
return Qnil;
|
394
|
+
}
|
395
|
+
|
396
|
+
for (int64_t j = 0; j < n; j++)
|
397
|
+
{
|
398
|
+
cr_intv_t *r = &cr->r[b[j]];
|
399
|
+
int32_t st0 = cr_st(r), en0 = cr_en(r);
|
400
|
+
if (st0 < st1)
|
401
|
+
st0 = st1;
|
402
|
+
if (en0 > en1)
|
403
|
+
en0 = en1;
|
404
|
+
if (st0 > cov_en)
|
405
|
+
{
|
406
|
+
cov += cov_en - cov_st;
|
407
|
+
cov_st = st0, cov_en = en0;
|
408
|
+
}
|
409
|
+
else
|
410
|
+
cov_en = cov_en > en0 ? cov_en : en0;
|
411
|
+
}
|
412
|
+
cov += cov_en - cov_st;
|
413
|
+
free(b);
|
414
|
+
|
415
|
+
return rb_ary_new3(2, INT64_2NUM(cov), INT64_2NUM(n));
|
416
|
+
}
|
417
|
+
|
418
|
+
/* Calculate breadth of coverage. (Overlap)
|
419
|
+
* Same as coverage(contig, start, end, mode: overlap)
|
420
|
+
* @param [String] contig The contig name
|
421
|
+
* @param [Fixnum] start The start position of the interval.
|
422
|
+
* @param [Fixnum] end The end position of the interval.
|
423
|
+
* @return [Array] The breadth of coverage and the number of intervals.
|
424
|
+
* @see coverage
|
425
|
+
*/
|
426
|
+
|
427
|
+
static VALUE
|
428
|
+
cgranges_coverage_overlap(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en)
|
429
|
+
{
|
430
|
+
VALUE result;
|
431
|
+
result = cgranges_coverage(self, rb_ctg, rb_st, rb_en, 0);
|
432
|
+
return result;
|
433
|
+
}
|
434
|
+
|
435
|
+
/* Calculate breadth of coverage. (Containment)
|
436
|
+
* same as coverage(contig, start, end, mode: contain)
|
437
|
+
* @param [String] contig The contig name
|
438
|
+
* @param [Fixnum] start The start position of the interval.
|
439
|
+
* @param [Fixnum] end The end position of the interval.
|
440
|
+
* @return [Array] The breadth of coverage and the number of intervals.
|
441
|
+
* @see coverage
|
442
|
+
*/
|
443
|
+
|
444
|
+
static VALUE
|
445
|
+
cgranges_coverage_contain(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en)
|
446
|
+
{
|
447
|
+
VALUE result;
|
448
|
+
result = cgranges_coverage(self, rb_ctg, rb_st, rb_en, 1);
|
449
|
+
return result;
|
450
|
+
}
|
451
|
+
|
326
452
|
void Init_cgranges(void)
|
327
453
|
{
|
328
454
|
rb_Bio = rb_define_module("Bio");
|
@@ -339,4 +465,6 @@ void Init_cgranges(void)
|
|
339
465
|
rb_define_method(rb_CGRanges, "count_overlap", cgranges_count_overlap, 3);
|
340
466
|
rb_define_method(rb_CGRanges, "contain", cgranges_contain, 3);
|
341
467
|
rb_define_method(rb_CGRanges, "count_contain", cgranges_count_contain, 3);
|
468
|
+
rb_define_method(rb_CGRanges, "coverage_overlap", cgranges_coverage_overlap, 3);
|
469
|
+
rb_define_method(rb_CGRanges, "coverage_contain", cgranges_coverage_contain, 3);
|
342
470
|
}
|
data/lib/bio/cgranges/version.rb
CHANGED
data/lib/bio/cgranges.rb
CHANGED
@@ -6,5 +6,25 @@ require_relative "cgranges/cgranges"
|
|
6
6
|
module Bio
|
7
7
|
# Reader for .2bit files (i.e., from UCSC genome browser)
|
8
8
|
class CGRanges
|
9
|
+
# Calculate breadth of coverage.
|
10
|
+
# This is a wrapper method for `coverage_overlap` and `coverage_contain`.
|
11
|
+
# @param [String] contig The contig name
|
12
|
+
# @param [Fixnum] start The start position of the interval.
|
13
|
+
# @param [Fixnum] end The end position of the interval.
|
14
|
+
# @param [Symbol] mode :overlap or :contain (default: :overlap)
|
15
|
+
# @return [Array] The breadth of coverage and the number of intervals.
|
16
|
+
# @see coverage_overlap
|
17
|
+
# @see coverage_contain
|
18
|
+
|
19
|
+
def coverage(ctg, rb_st, rb_en, mode: :overlap)
|
20
|
+
case mode
|
21
|
+
when :overlap
|
22
|
+
coverage_overlap(ctg, rb_st, rb_en)
|
23
|
+
when :contain
|
24
|
+
coverage_contain(ctg, rb_st, rb_en)
|
25
|
+
else
|
26
|
+
raise ArgumentError, "unknown mode: #{mode}"
|
27
|
+
end
|
28
|
+
end
|
9
29
|
end
|
10
30
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bio-cgranges
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- kojix2
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-05-
|
11
|
+
date: 2022-05-15 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: Genomic interval overlap queries
|
14
14
|
email:
|