bio-cgranges 0.0.0 → 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +23 -13
- data/ext/bio/cgranges/cgranges.c +158 -30
- data/lib/bio/cgranges/version.rb +1 -1
- data/lib/bio/cgranges.rb +20 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 333cfe3994ee4f8b4698b4f9ff0b594f45bd2c1f973c0b596b49a0ba51af9e25
|
4
|
+
data.tar.gz: 80f494f56add4046be7d5e2aee66c4e1496e2e4a84ca9670298d6d05587f7e19
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f40f2343621f7568848ea8cce89fa84de2b162dd4378b9224b5587cc1ff3a4badf04a4ada19e9286efa3f0d177e200212eb5dbc5cb4ad8c9c882ba55ac045e90
|
7
|
+
data.tar.gz: 258167bdbb5453857a1c3e4f2c8b51729de5d6f43ef8e5d24c74b6d2d0b95c4e2e58cabb14ec07d795bdb2e6949514369b8441c759395a6b0b399dd7dba741de
|
data/README.md
CHANGED
@@ -2,7 +2,8 @@
|
|
2
2
|
|
3
3
|
[![Gem Version](https://badge.fury.io/rb/bio-cgranges.svg)](https://badge.fury.io/rb/bio-cgranges)
|
4
4
|
[![test](https://github.com/ruby-on-bioc/bio-cgranges/actions/workflows/ci.yml/badge.svg)](https://github.com/ruby-on-bioc/bio-cgranges/actions/workflows/ci.yml)
|
5
|
-
[![
|
5
|
+
[![docs](https://img.shields.io/badge/docs-stable-blue.svg)](https://rubydoc.info/gems/bio-cgranges)
|
6
|
+
[![docs](https://img.shields.io/badge/docs-latest-blue.svg)](https://ruby-on-bioc.github.io/bio-cgranges/)
|
6
7
|
|
7
8
|
Ruby bindings to [lh3/cgranges](https://github.com/lh3/cgranges).
|
8
9
|
|
@@ -27,15 +28,6 @@ granges.add("chr1", 10, 20, 0)
|
|
27
28
|
.add("chr1", 15, 20, 4)
|
28
29
|
.add("chr2", 10, 20, 5)
|
29
30
|
.index
|
30
|
-
|
31
|
-
granges.overlap("chr1", 12, 22)
|
32
|
-
# [["chr1", 10, 20, 0],
|
33
|
-
# ["chr1", 10, 25, 3],
|
34
|
-
# ["chr1", 15, 25, 1],
|
35
|
-
# ["chr1", 15, 20, 4]]
|
36
|
-
|
37
|
-
granges.contain("chr1", 12, 22)
|
38
|
-
# [["chr1", 15, 20, 4]]
|
39
31
|
```
|
40
32
|
|
41
33
|
```
|
@@ -47,18 +39,36 @@ granges.contain("chr1", 12, 22)
|
|
47
39
|
3-3-3-3-3-3-3-3-3-3-3-3-3-3-3
|
48
40
|
4-4-4-4-4
|
49
41
|
5-5-5-5-5
|
50
|
-
Q-Q-Q-Q-Q-Q-Q-Q-Q-Q
|
51
42
|
|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|
|
52
43
|
0.........5.........10........15........20........25........30........35........40
|
53
44
|
```
|
54
45
|
|
46
|
+
```ruby
|
47
|
+
granges.overlap("chr1", 12, 22)
|
48
|
+
# [["chr1", 10, 20, 0],
|
49
|
+
# ["chr1", 10, 25, 3],
|
50
|
+
# ["chr1", 15, 25, 1],
|
51
|
+
# ["chr1", 15, 20, 4]]
|
52
|
+
|
53
|
+
granges.contain("chr1", 12, 22)
|
54
|
+
# [["chr1", 15, 20, 4]]
|
55
|
+
|
56
|
+
granges.coverage("chr1", 20, 35)
|
57
|
+
# [10, 3] # cov, n
|
58
|
+
|
59
|
+
granges.coverage("chr1", 12, 22, mode: :contain)
|
60
|
+
# [5, 1]
|
61
|
+
```
|
62
|
+
|
63
|
+
See [![docs](https://img.shields.io/badge/docs-latest-blue.svg)](https://ruby-on-bioc.github.io/bio-cgranges/) for details.
|
64
|
+
|
55
65
|
## Development
|
56
66
|
|
57
67
|
Bug reports and pull requests are welcome on GitHub at https://github.com/ruby-on-bioc/bio-cgranges.
|
58
68
|
|
59
|
-
Do you need commit rights to
|
69
|
+
Do you need commit rights to this repository?
|
60
70
|
Do you want to get admin rights and take over the project?
|
61
|
-
If so, please feel free to contact us
|
71
|
+
If so, please feel free to contact us.
|
62
72
|
|
63
73
|
## License
|
64
74
|
|
data/ext/bio/cgranges/cgranges.c
CHANGED
@@ -64,7 +64,7 @@ static const rb_data_type_t cgranges_type = {
|
|
64
64
|
static void
|
65
65
|
cgranges_free(void *ptr)
|
66
66
|
{
|
67
|
-
if(ptr)
|
67
|
+
if (ptr)
|
68
68
|
{
|
69
69
|
cr_destroy(ptr);
|
70
70
|
}
|
@@ -94,6 +94,11 @@ cgranges_allocate(VALUE klass)
|
|
94
94
|
return TypedData_Wrap_Struct(klass, &cgranges_type, ptr);
|
95
95
|
}
|
96
96
|
|
97
|
+
/* Create a new cgranges object
|
98
|
+
*
|
99
|
+
* @return [Bio::CGRanges]
|
100
|
+
*/
|
101
|
+
|
97
102
|
static VALUE
|
98
103
|
cgranges_init(VALUE self)
|
99
104
|
{
|
@@ -107,6 +112,14 @@ cgranges_init(VALUE self)
|
|
107
112
|
return self;
|
108
113
|
}
|
109
114
|
|
115
|
+
/* Add a genomic interval to the cgranges object.
|
116
|
+
* @param [String] contig The contig name
|
117
|
+
* @param [Fixnum] start The start position of the interval.
|
118
|
+
* @param [Fixnum] end The end position of the interval.
|
119
|
+
* @param [Fixnum] label The label of the interval.
|
120
|
+
* @return [Bio::CGRanges] self
|
121
|
+
*/
|
122
|
+
|
110
123
|
static VALUE
|
111
124
|
cgranges_add(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en, VALUE rb_label)
|
112
125
|
{
|
@@ -123,25 +136,10 @@ cgranges_add(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en, VALUE rb_label)
|
|
123
136
|
return Qnil;
|
124
137
|
}
|
125
138
|
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
if (rb_st != Qnil)
|
132
|
-
{
|
133
|
-
st = NUM2INT32(rb_st);
|
134
|
-
}
|
135
|
-
|
136
|
-
if (rb_en != Qnil)
|
137
|
-
{
|
138
|
-
en = NUM2INT32(rb_en);
|
139
|
-
}
|
140
|
-
|
141
|
-
if (rb_label != Qnil)
|
142
|
-
{
|
143
|
-
label = NUM2INT32(rb_label);
|
144
|
-
}
|
139
|
+
ctg = StringValueCStr(rb_ctg);
|
140
|
+
st = NUM2INT32(rb_st);
|
141
|
+
en = NUM2INT32(rb_en);
|
142
|
+
label = NUM2INT32(rb_label);
|
145
143
|
|
146
144
|
intv = cr_add(cr, ctg, st, en, label);
|
147
145
|
|
@@ -154,10 +152,14 @@ cgranges_add(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en, VALUE rb_label)
|
|
154
152
|
return self;
|
155
153
|
}
|
156
154
|
|
155
|
+
/* Index.
|
156
|
+
* @return [Bio::CGRanges] self
|
157
|
+
*/
|
158
|
+
|
157
159
|
static VALUE
|
158
160
|
cgranges_index(VALUE self)
|
159
161
|
{
|
160
|
-
if(RTEST(rb_ivar_get(self, rb_intern("@indexed"))))
|
162
|
+
if (RTEST(rb_ivar_get(self, rb_intern("@indexed"))))
|
161
163
|
{
|
162
164
|
rb_raise(rb_eIndexedError, "CGRanges already indexed");
|
163
165
|
return Qnil;
|
@@ -165,12 +167,19 @@ cgranges_index(VALUE self)
|
|
165
167
|
|
166
168
|
cgranges_t *cr = get_cganges(self);
|
167
169
|
cr_index(cr);
|
168
|
-
|
170
|
+
|
169
171
|
rb_ivar_set(self, rb_intern("@indexed"), Qtrue);
|
170
172
|
|
171
173
|
return self;
|
172
174
|
}
|
173
175
|
|
176
|
+
/* Overlap query.
|
177
|
+
* @param [String] contig The contig name
|
178
|
+
* @param [Fixnum] start The start position of the interval.
|
179
|
+
* @param [Fixnum] end The end position of the interval.
|
180
|
+
* @return [Array] An array of [contig, start, end, label] arrays.
|
181
|
+
*/
|
182
|
+
|
174
183
|
static VALUE
|
175
184
|
cgranges_overlap(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en)
|
176
185
|
{
|
@@ -192,7 +201,7 @@ cgranges_overlap(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en)
|
|
192
201
|
ctg = StringValueCStr(rb_ctg);
|
193
202
|
st = NUM2INT32(rb_st);
|
194
203
|
en = NUM2INT32(rb_en);
|
195
|
-
|
204
|
+
|
196
205
|
n = cr_overlap(cr, ctg, st, en, &b, &m_b);
|
197
206
|
|
198
207
|
if (n < 0)
|
@@ -206,14 +215,21 @@ cgranges_overlap(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en)
|
|
206
215
|
for (int64_t i = 0; i < n; i++)
|
207
216
|
{
|
208
217
|
VALUE rb_intv = rb_ary_new3(
|
209
|
-
|
210
|
-
);
|
218
|
+
4, rb_ctg, INT32_2NUM(cr_start(cr, b[i])), INT32_2NUM(cr_end(cr, b[i])), INT32_2NUM(cr_label(cr, b[i])));
|
211
219
|
rb_ary_push(result, rb_intv);
|
212
220
|
}
|
221
|
+
free(b);
|
213
222
|
|
214
223
|
return result;
|
215
224
|
}
|
216
225
|
|
226
|
+
/* Get the number of overlapping intervals.
|
227
|
+
* @param [String] contig The contig name
|
228
|
+
* @param [Fixnum] start The start position of the interval.
|
229
|
+
* @param [Fixnum] end The end position of the interval.
|
230
|
+
* @return [Fixnum] The number of overlapping intervals.
|
231
|
+
*/
|
232
|
+
|
217
233
|
static VALUE
|
218
234
|
cgranges_count_overlap(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en)
|
219
235
|
{
|
@@ -235,7 +251,7 @@ cgranges_count_overlap(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en)
|
|
235
251
|
ctg = StringValueCStr(rb_ctg);
|
236
252
|
st = NUM2INT32(rb_st);
|
237
253
|
en = NUM2INT32(rb_en);
|
238
|
-
|
254
|
+
|
239
255
|
n = cr_overlap(cr, ctg, st, en, &b, &m_b);
|
240
256
|
|
241
257
|
if (n < 0)
|
@@ -243,10 +259,18 @@ cgranges_count_overlap(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en)
|
|
243
259
|
rb_raise(rb_eRuntimeError, "Error finding overlaps");
|
244
260
|
return Qnil;
|
245
261
|
}
|
262
|
+
free(b);
|
246
263
|
|
247
264
|
return INT64_2NUM(n);
|
248
265
|
}
|
249
266
|
|
267
|
+
/* Containment query.
|
268
|
+
* @param [String] contig The contig name
|
269
|
+
* @param [Fixnum] start The start position of the interval.
|
270
|
+
* @param [Fixnum] end The end position of the interval.
|
271
|
+
* @return [Array] An array of [contig, start, end, label] arrays.
|
272
|
+
*/
|
273
|
+
|
250
274
|
static VALUE
|
251
275
|
cgranges_contain(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en)
|
252
276
|
{
|
@@ -268,7 +292,7 @@ cgranges_contain(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en)
|
|
268
292
|
ctg = StringValueCStr(rb_ctg);
|
269
293
|
st = NUM2INT32(rb_st);
|
270
294
|
en = NUM2INT32(rb_en);
|
271
|
-
|
295
|
+
|
272
296
|
n = cr_contain(cr, ctg, st, en, &b, &m_b);
|
273
297
|
|
274
298
|
if (n < 0)
|
@@ -282,14 +306,21 @@ cgranges_contain(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en)
|
|
282
306
|
for (int64_t i = 0; i < n; i++)
|
283
307
|
{
|
284
308
|
VALUE rb_intv = rb_ary_new3(
|
285
|
-
|
286
|
-
);
|
309
|
+
4, rb_ctg, INT32_2NUM(cr_start(cr, b[i])), INT32_2NUM(cr_end(cr, b[i])), INT32_2NUM(cr_label(cr, b[i])));
|
287
310
|
rb_ary_push(result, rb_intv);
|
288
311
|
}
|
312
|
+
free(b);
|
289
313
|
|
290
314
|
return result;
|
291
315
|
}
|
292
316
|
|
317
|
+
/* Get the number of contained intervals.
|
318
|
+
* @param [String] contig The contig name
|
319
|
+
* @param [Fixnum] start The start position of the interval.
|
320
|
+
* @param [Fixnum] end The end position of the interval.
|
321
|
+
* @return [Fixnum] The number of contained intervals.
|
322
|
+
*/
|
323
|
+
|
293
324
|
static VALUE
|
294
325
|
cgranges_count_contain(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en)
|
295
326
|
{
|
@@ -311,7 +342,7 @@ cgranges_count_contain(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en)
|
|
311
342
|
ctg = StringValueCStr(rb_ctg);
|
312
343
|
st = NUM2INT32(rb_st);
|
313
344
|
en = NUM2INT32(rb_en);
|
314
|
-
|
345
|
+
|
315
346
|
n = cr_contain(cr, ctg, st, en, &b, &m_b);
|
316
347
|
|
317
348
|
if (n < 0)
|
@@ -319,10 +350,105 @@ cgranges_count_contain(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en)
|
|
319
350
|
rb_raise(rb_eRuntimeError, "Error finding contained");
|
320
351
|
return Qnil;
|
321
352
|
}
|
353
|
+
free(b);
|
322
354
|
|
323
355
|
return INT64_2NUM(n);
|
324
356
|
}
|
325
357
|
|
358
|
+
static VALUE
|
359
|
+
cgranges_coverage(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en, int contain)
|
360
|
+
{
|
361
|
+
cgranges_t *cr = get_cganges(self);
|
362
|
+
char *ctg = NULL;
|
363
|
+
int32_t st1 = 0;
|
364
|
+
int32_t en1 = 0;
|
365
|
+
|
366
|
+
int64_t *b = NULL;
|
367
|
+
int64_t m_b = 0;
|
368
|
+
int64_t n = 0;
|
369
|
+
int64_t cov = 0, cov_st = 0, cov_en = 0;
|
370
|
+
|
371
|
+
if (!RTEST(rb_ivar_get(self, rb_intern("@indexed"))))
|
372
|
+
{
|
373
|
+
rb_raise(rb_eNoIndexError, "CGRanges not indexed");
|
374
|
+
return Qnil;
|
375
|
+
}
|
376
|
+
|
377
|
+
ctg = StringValueCStr(rb_ctg);
|
378
|
+
st1 = NUM2INT32(rb_st);
|
379
|
+
en1 = NUM2INT32(rb_en);
|
380
|
+
|
381
|
+
if (contain)
|
382
|
+
{
|
383
|
+
n = cr_contain(cr, ctg, st1, en1, &b, &m_b);
|
384
|
+
}
|
385
|
+
else
|
386
|
+
{
|
387
|
+
n = cr_overlap(cr, ctg, st1, en1, &b, &m_b);
|
388
|
+
}
|
389
|
+
|
390
|
+
if (n < 0)
|
391
|
+
{
|
392
|
+
rb_raise(rb_eRuntimeError, "Error finding overlaps");
|
393
|
+
return Qnil;
|
394
|
+
}
|
395
|
+
|
396
|
+
for (int64_t j = 0; j < n; j++)
|
397
|
+
{
|
398
|
+
cr_intv_t *r = &cr->r[b[j]];
|
399
|
+
int32_t st0 = cr_st(r), en0 = cr_en(r);
|
400
|
+
if (st0 < st1)
|
401
|
+
st0 = st1;
|
402
|
+
if (en0 > en1)
|
403
|
+
en0 = en1;
|
404
|
+
if (st0 > cov_en)
|
405
|
+
{
|
406
|
+
cov += cov_en - cov_st;
|
407
|
+
cov_st = st0, cov_en = en0;
|
408
|
+
}
|
409
|
+
else
|
410
|
+
cov_en = cov_en > en0 ? cov_en : en0;
|
411
|
+
}
|
412
|
+
cov += cov_en - cov_st;
|
413
|
+
free(b);
|
414
|
+
|
415
|
+
return rb_ary_new3(2, INT64_2NUM(cov), INT64_2NUM(n));
|
416
|
+
}
|
417
|
+
|
418
|
+
/* Calculate breadth of coverage. (Overlap)
|
419
|
+
* Same as coverage(contig, start, end, mode: overlap)
|
420
|
+
* @param [String] contig The contig name
|
421
|
+
* @param [Fixnum] start The start position of the interval.
|
422
|
+
* @param [Fixnum] end The end position of the interval.
|
423
|
+
* @return [Array] The breadth of coverage and the number of intervals.
|
424
|
+
* @see coverage
|
425
|
+
*/
|
426
|
+
|
427
|
+
static VALUE
|
428
|
+
cgranges_coverage_overlap(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en)
|
429
|
+
{
|
430
|
+
VALUE result;
|
431
|
+
result = cgranges_coverage(self, rb_ctg, rb_st, rb_en, 0);
|
432
|
+
return result;
|
433
|
+
}
|
434
|
+
|
435
|
+
/* Calculate breadth of coverage. (Containment)
|
436
|
+
* same as coverage(contig, start, end, mode: contain)
|
437
|
+
* @param [String] contig The contig name
|
438
|
+
* @param [Fixnum] start The start position of the interval.
|
439
|
+
* @param [Fixnum] end The end position of the interval.
|
440
|
+
* @return [Array] The breadth of coverage and the number of intervals.
|
441
|
+
* @see coverage
|
442
|
+
*/
|
443
|
+
|
444
|
+
static VALUE
|
445
|
+
cgranges_coverage_contain(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en)
|
446
|
+
{
|
447
|
+
VALUE result;
|
448
|
+
result = cgranges_coverage(self, rb_ctg, rb_st, rb_en, 1);
|
449
|
+
return result;
|
450
|
+
}
|
451
|
+
|
326
452
|
void Init_cgranges(void)
|
327
453
|
{
|
328
454
|
rb_Bio = rb_define_module("Bio");
|
@@ -339,4 +465,6 @@ void Init_cgranges(void)
|
|
339
465
|
rb_define_method(rb_CGRanges, "count_overlap", cgranges_count_overlap, 3);
|
340
466
|
rb_define_method(rb_CGRanges, "contain", cgranges_contain, 3);
|
341
467
|
rb_define_method(rb_CGRanges, "count_contain", cgranges_count_contain, 3);
|
468
|
+
rb_define_method(rb_CGRanges, "coverage_overlap", cgranges_coverage_overlap, 3);
|
469
|
+
rb_define_method(rb_CGRanges, "coverage_contain", cgranges_coverage_contain, 3);
|
342
470
|
}
|
data/lib/bio/cgranges/version.rb
CHANGED
data/lib/bio/cgranges.rb
CHANGED
@@ -6,5 +6,25 @@ require_relative "cgranges/cgranges"
|
|
6
6
|
module Bio
|
7
7
|
# Reader for .2bit files (i.e., from UCSC genome browser)
|
8
8
|
class CGRanges
|
9
|
+
# Calculate breadth of coverage.
|
10
|
+
# This is a wrapper method for `coverage_overlap` and `coverage_contain`.
|
11
|
+
# @param [String] contig The contig name
|
12
|
+
# @param [Fixnum] start The start position of the interval.
|
13
|
+
# @param [Fixnum] end The end position of the interval.
|
14
|
+
# @param [Symbol] mode :overlap or :contain (default: :overlap)
|
15
|
+
# @return [Array] The breadth of coverage and the number of intervals.
|
16
|
+
# @see coverage_overlap
|
17
|
+
# @see coverage_contain
|
18
|
+
|
19
|
+
def coverage(ctg, rb_st, rb_en, mode: :overlap)
|
20
|
+
case mode
|
21
|
+
when :overlap
|
22
|
+
coverage_overlap(ctg, rb_st, rb_en)
|
23
|
+
when :contain
|
24
|
+
coverage_contain(ctg, rb_st, rb_en)
|
25
|
+
else
|
26
|
+
raise ArgumentError, "unknown mode: #{mode}"
|
27
|
+
end
|
28
|
+
end
|
9
29
|
end
|
10
30
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bio-cgranges
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- kojix2
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-05-
|
11
|
+
date: 2022-05-15 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: Genomic interval overlap queries
|
14
14
|
email:
|