bio-cgranges 0.0.0 → 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 618c6866b3f708b148682a529b73efda7b782bfc567f92949f72c237fd98435b
4
- data.tar.gz: 0ebbc69b7858f934fd98f60500e49b7300774b21e05ee04c654d55746e18f00b
3
+ metadata.gz: 333cfe3994ee4f8b4698b4f9ff0b594f45bd2c1f973c0b596b49a0ba51af9e25
4
+ data.tar.gz: 80f494f56add4046be7d5e2aee66c4e1496e2e4a84ca9670298d6d05587f7e19
5
5
  SHA512:
6
- metadata.gz: 0444fad48ec7b6266072f2b7fb23684bf295703121c6d0b5b392e3c150de6dda02adbd1ecff161793b5f1a821f8d10cac564c4c60042f5fc4cc191bb26181620
7
- data.tar.gz: 7f783598ec7ed7937c8163593901f055463175e4b16ab0073cf41bb96341f858077877524c5e1409369822e1efd5eaed80a11acc6c7678d9e27dee4ee3e14dec
6
+ metadata.gz: f40f2343621f7568848ea8cce89fa84de2b162dd4378b9224b5587cc1ff3a4badf04a4ada19e9286efa3f0d177e200212eb5dbc5cb4ad8c9c882ba55ac045e90
7
+ data.tar.gz: 258167bdbb5453857a1c3e4f2c8b51729de5d6f43ef8e5d24c74b6d2d0b95c4e2e58cabb14ec07d795bdb2e6949514369b8441c759395a6b0b399dd7dba741de
data/README.md CHANGED
@@ -2,7 +2,8 @@
2
2
 
3
3
  [![Gem Version](https://badge.fury.io/rb/bio-cgranges.svg)](https://badge.fury.io/rb/bio-cgranges)
4
4
  [![test](https://github.com/ruby-on-bioc/bio-cgranges/actions/workflows/ci.yml/badge.svg)](https://github.com/ruby-on-bioc/bio-cgranges/actions/workflows/ci.yml)
5
- [![dics](https://img.shields.io/badge/docs-stable-blue.svg)](https://rubydoc.info/gems/bio-cgranges)
5
+ [![docs](https://img.shields.io/badge/docs-stable-blue.svg)](https://rubydoc.info/gems/bio-cgranges)
6
+ [![docs](https://img.shields.io/badge/docs-latest-blue.svg)](https://ruby-on-bioc.github.io/bio-cgranges/)
6
7
 
7
8
  Ruby bindings to [lh3/cgranges](https://github.com/lh3/cgranges).
8
9
 
@@ -27,15 +28,6 @@ granges.add("chr1", 10, 20, 0)
27
28
  .add("chr1", 15, 20, 4)
28
29
  .add("chr2", 10, 20, 5)
29
30
  .index
30
-
31
- granges.overlap("chr1", 12, 22)
32
- # [["chr1", 10, 20, 0],
33
- # ["chr1", 10, 25, 3],
34
- # ["chr1", 15, 25, 1],
35
- # ["chr1", 15, 20, 4]]
36
-
37
- granges.contain("chr1", 12, 22)
38
- # [["chr1", 15, 20, 4]]
39
31
  ```
40
32
 
41
33
  ```
@@ -47,18 +39,36 @@ granges.contain("chr1", 12, 22)
47
39
  3-3-3-3-3-3-3-3-3-3-3-3-3-3-3
48
40
  4-4-4-4-4
49
41
  5-5-5-5-5
50
- Q-Q-Q-Q-Q-Q-Q-Q-Q-Q
51
42
  |-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|
52
43
  0.........5.........10........15........20........25........30........35........40
53
44
  ```
54
45
 
46
+ ```ruby
47
+ granges.overlap("chr1", 12, 22)
48
+ # [["chr1", 10, 20, 0],
49
+ # ["chr1", 10, 25, 3],
50
+ # ["chr1", 15, 25, 1],
51
+ # ["chr1", 15, 20, 4]]
52
+
53
+ granges.contain("chr1", 12, 22)
54
+ # [["chr1", 15, 20, 4]]
55
+
56
+ granges.coverage("chr1", 20, 35)
57
+ # [10, 3] # cov, n
58
+
59
+ granges.coverage("chr1", 12, 22, mode: :contain)
60
+ # [5, 1]
61
+ ```
62
+
63
+ See [![docs](https://img.shields.io/badge/docs-latest-blue.svg)](https://ruby-on-bioc.github.io/bio-cgranges/) for details.
64
+
55
65
  ## Development
56
66
 
57
67
  Bug reports and pull requests are welcome on GitHub at https://github.com/ruby-on-bioc/bio-cgranges.
58
68
 
59
- Do you need commit rights to my repository?
69
+ Do you need commit rights to this repository?
60
70
  Do you want to get admin rights and take over the project?
61
- If so, please feel free to contact us @kojix2.
71
+ If so, please feel free to contact us.
62
72
 
63
73
  ## License
64
74
 
@@ -64,7 +64,7 @@ static const rb_data_type_t cgranges_type = {
64
64
  static void
65
65
  cgranges_free(void *ptr)
66
66
  {
67
- if(ptr)
67
+ if (ptr)
68
68
  {
69
69
  cr_destroy(ptr);
70
70
  }
@@ -94,6 +94,11 @@ cgranges_allocate(VALUE klass)
94
94
  return TypedData_Wrap_Struct(klass, &cgranges_type, ptr);
95
95
  }
96
96
 
97
+ /* Create a new cgranges object
98
+ *
99
+ * @return [Bio::CGRanges]
100
+ */
101
+
97
102
  static VALUE
98
103
  cgranges_init(VALUE self)
99
104
  {
@@ -107,6 +112,14 @@ cgranges_init(VALUE self)
107
112
  return self;
108
113
  }
109
114
 
115
+ /* Add a genomic interval to the cgranges object.
116
+ * @param [String] contig The contig name
117
+ * @param [Fixnum] start The start position of the interval.
118
+ * @param [Fixnum] end The end position of the interval.
119
+ * @param [Fixnum] label The label of the interval.
120
+ * @return [Bio::CGRanges] self
121
+ */
122
+
110
123
  static VALUE
111
124
  cgranges_add(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en, VALUE rb_label)
112
125
  {
@@ -123,25 +136,10 @@ cgranges_add(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en, VALUE rb_label)
123
136
  return Qnil;
124
137
  }
125
138
 
126
- if (rb_ctg != Qnil)
127
- {
128
- ctg = StringValueCStr(rb_ctg);
129
- }
130
-
131
- if (rb_st != Qnil)
132
- {
133
- st = NUM2INT32(rb_st);
134
- }
135
-
136
- if (rb_en != Qnil)
137
- {
138
- en = NUM2INT32(rb_en);
139
- }
140
-
141
- if (rb_label != Qnil)
142
- {
143
- label = NUM2INT32(rb_label);
144
- }
139
+ ctg = StringValueCStr(rb_ctg);
140
+ st = NUM2INT32(rb_st);
141
+ en = NUM2INT32(rb_en);
142
+ label = NUM2INT32(rb_label);
145
143
 
146
144
  intv = cr_add(cr, ctg, st, en, label);
147
145
 
@@ -154,10 +152,14 @@ cgranges_add(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en, VALUE rb_label)
154
152
  return self;
155
153
  }
156
154
 
155
+ /* Index.
156
+ * @return [Bio::CGRanges] self
157
+ */
158
+
157
159
  static VALUE
158
160
  cgranges_index(VALUE self)
159
161
  {
160
- if(RTEST(rb_ivar_get(self, rb_intern("@indexed"))))
162
+ if (RTEST(rb_ivar_get(self, rb_intern("@indexed"))))
161
163
  {
162
164
  rb_raise(rb_eIndexedError, "CGRanges already indexed");
163
165
  return Qnil;
@@ -165,12 +167,19 @@ cgranges_index(VALUE self)
165
167
 
166
168
  cgranges_t *cr = get_cganges(self);
167
169
  cr_index(cr);
168
-
170
+
169
171
  rb_ivar_set(self, rb_intern("@indexed"), Qtrue);
170
172
 
171
173
  return self;
172
174
  }
173
175
 
176
+ /* Overlap query.
177
+ * @param [String] contig The contig name
178
+ * @param [Fixnum] start The start position of the interval.
179
+ * @param [Fixnum] end The end position of the interval.
180
+ * @return [Array] An array of [contig, start, end, label] arrays.
181
+ */
182
+
174
183
  static VALUE
175
184
  cgranges_overlap(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en)
176
185
  {
@@ -192,7 +201,7 @@ cgranges_overlap(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en)
192
201
  ctg = StringValueCStr(rb_ctg);
193
202
  st = NUM2INT32(rb_st);
194
203
  en = NUM2INT32(rb_en);
195
-
204
+
196
205
  n = cr_overlap(cr, ctg, st, en, &b, &m_b);
197
206
 
198
207
  if (n < 0)
@@ -206,14 +215,21 @@ cgranges_overlap(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en)
206
215
  for (int64_t i = 0; i < n; i++)
207
216
  {
208
217
  VALUE rb_intv = rb_ary_new3(
209
- 4, rb_ctg, INT32_2NUM(cr_start(cr, b[i])), INT32_2NUM(cr_end(cr, b[i])), INT32_2NUM(cr_label(cr, b[i]))
210
- );
218
+ 4, rb_ctg, INT32_2NUM(cr_start(cr, b[i])), INT32_2NUM(cr_end(cr, b[i])), INT32_2NUM(cr_label(cr, b[i])));
211
219
  rb_ary_push(result, rb_intv);
212
220
  }
221
+ free(b);
213
222
 
214
223
  return result;
215
224
  }
216
225
 
226
+ /* Get the number of overlapping intervals.
227
+ * @param [String] contig The contig name
228
+ * @param [Fixnum] start The start position of the interval.
229
+ * @param [Fixnum] end The end position of the interval.
230
+ * @return [Fixnum] The number of overlapping intervals.
231
+ */
232
+
217
233
  static VALUE
218
234
  cgranges_count_overlap(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en)
219
235
  {
@@ -235,7 +251,7 @@ cgranges_count_overlap(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en)
235
251
  ctg = StringValueCStr(rb_ctg);
236
252
  st = NUM2INT32(rb_st);
237
253
  en = NUM2INT32(rb_en);
238
-
254
+
239
255
  n = cr_overlap(cr, ctg, st, en, &b, &m_b);
240
256
 
241
257
  if (n < 0)
@@ -243,10 +259,18 @@ cgranges_count_overlap(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en)
243
259
  rb_raise(rb_eRuntimeError, "Error finding overlaps");
244
260
  return Qnil;
245
261
  }
262
+ free(b);
246
263
 
247
264
  return INT64_2NUM(n);
248
265
  }
249
266
 
267
+ /* Containment query.
268
+ * @param [String] contig The contig name
269
+ * @param [Fixnum] start The start position of the interval.
270
+ * @param [Fixnum] end The end position of the interval.
271
+ * @return [Array] An array of [contig, start, end, label] arrays.
272
+ */
273
+
250
274
  static VALUE
251
275
  cgranges_contain(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en)
252
276
  {
@@ -268,7 +292,7 @@ cgranges_contain(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en)
268
292
  ctg = StringValueCStr(rb_ctg);
269
293
  st = NUM2INT32(rb_st);
270
294
  en = NUM2INT32(rb_en);
271
-
295
+
272
296
  n = cr_contain(cr, ctg, st, en, &b, &m_b);
273
297
 
274
298
  if (n < 0)
@@ -282,14 +306,21 @@ cgranges_contain(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en)
282
306
  for (int64_t i = 0; i < n; i++)
283
307
  {
284
308
  VALUE rb_intv = rb_ary_new3(
285
- 4, rb_ctg, INT32_2NUM(cr_start(cr, b[i])), INT32_2NUM(cr_end(cr, b[i])), INT32_2NUM(cr_label(cr, b[i]))
286
- );
309
+ 4, rb_ctg, INT32_2NUM(cr_start(cr, b[i])), INT32_2NUM(cr_end(cr, b[i])), INT32_2NUM(cr_label(cr, b[i])));
287
310
  rb_ary_push(result, rb_intv);
288
311
  }
312
+ free(b);
289
313
 
290
314
  return result;
291
315
  }
292
316
 
317
+ /* Get the number of contained intervals.
318
+ * @param [String] contig The contig name
319
+ * @param [Fixnum] start The start position of the interval.
320
+ * @param [Fixnum] end The end position of the interval.
321
+ * @return [Fixnum] The number of contained intervals.
322
+ */
323
+
293
324
  static VALUE
294
325
  cgranges_count_contain(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en)
295
326
  {
@@ -311,7 +342,7 @@ cgranges_count_contain(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en)
311
342
  ctg = StringValueCStr(rb_ctg);
312
343
  st = NUM2INT32(rb_st);
313
344
  en = NUM2INT32(rb_en);
314
-
345
+
315
346
  n = cr_contain(cr, ctg, st, en, &b, &m_b);
316
347
 
317
348
  if (n < 0)
@@ -319,10 +350,105 @@ cgranges_count_contain(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en)
319
350
  rb_raise(rb_eRuntimeError, "Error finding contained");
320
351
  return Qnil;
321
352
  }
353
+ free(b);
322
354
 
323
355
  return INT64_2NUM(n);
324
356
  }
325
357
 
358
+ static VALUE
359
+ cgranges_coverage(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en, int contain)
360
+ {
361
+ cgranges_t *cr = get_cganges(self);
362
+ char *ctg = NULL;
363
+ int32_t st1 = 0;
364
+ int32_t en1 = 0;
365
+
366
+ int64_t *b = NULL;
367
+ int64_t m_b = 0;
368
+ int64_t n = 0;
369
+ int64_t cov = 0, cov_st = 0, cov_en = 0;
370
+
371
+ if (!RTEST(rb_ivar_get(self, rb_intern("@indexed"))))
372
+ {
373
+ rb_raise(rb_eNoIndexError, "CGRanges not indexed");
374
+ return Qnil;
375
+ }
376
+
377
+ ctg = StringValueCStr(rb_ctg);
378
+ st1 = NUM2INT32(rb_st);
379
+ en1 = NUM2INT32(rb_en);
380
+
381
+ if (contain)
382
+ {
383
+ n = cr_contain(cr, ctg, st1, en1, &b, &m_b);
384
+ }
385
+ else
386
+ {
387
+ n = cr_overlap(cr, ctg, st1, en1, &b, &m_b);
388
+ }
389
+
390
+ if (n < 0)
391
+ {
392
+ rb_raise(rb_eRuntimeError, "Error finding overlaps");
393
+ return Qnil;
394
+ }
395
+
396
+ for (int64_t j = 0; j < n; j++)
397
+ {
398
+ cr_intv_t *r = &cr->r[b[j]];
399
+ int32_t st0 = cr_st(r), en0 = cr_en(r);
400
+ if (st0 < st1)
401
+ st0 = st1;
402
+ if (en0 > en1)
403
+ en0 = en1;
404
+ if (st0 > cov_en)
405
+ {
406
+ cov += cov_en - cov_st;
407
+ cov_st = st0, cov_en = en0;
408
+ }
409
+ else
410
+ cov_en = cov_en > en0 ? cov_en : en0;
411
+ }
412
+ cov += cov_en - cov_st;
413
+ free(b);
414
+
415
+ return rb_ary_new3(2, INT64_2NUM(cov), INT64_2NUM(n));
416
+ }
417
+
418
+ /* Calculate breadth of coverage. (Overlap)
419
+ * Same as coverage(contig, start, end, mode: overlap)
420
+ * @param [String] contig The contig name
421
+ * @param [Fixnum] start The start position of the interval.
422
+ * @param [Fixnum] end The end position of the interval.
423
+ * @return [Array] The breadth of coverage and the number of intervals.
424
+ * @see coverage
425
+ */
426
+
427
+ static VALUE
428
+ cgranges_coverage_overlap(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en)
429
+ {
430
+ VALUE result;
431
+ result = cgranges_coverage(self, rb_ctg, rb_st, rb_en, 0);
432
+ return result;
433
+ }
434
+
435
+ /* Calculate breadth of coverage. (Containment)
436
+ * same as coverage(contig, start, end, mode: contain)
437
+ * @param [String] contig The contig name
438
+ * @param [Fixnum] start The start position of the interval.
439
+ * @param [Fixnum] end The end position of the interval.
440
+ * @return [Array] The breadth of coverage and the number of intervals.
441
+ * @see coverage
442
+ */
443
+
444
+ static VALUE
445
+ cgranges_coverage_contain(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en)
446
+ {
447
+ VALUE result;
448
+ result = cgranges_coverage(self, rb_ctg, rb_st, rb_en, 1);
449
+ return result;
450
+ }
451
+
326
452
  void Init_cgranges(void)
327
453
  {
328
454
  rb_Bio = rb_define_module("Bio");
@@ -339,4 +465,6 @@ void Init_cgranges(void)
339
465
  rb_define_method(rb_CGRanges, "count_overlap", cgranges_count_overlap, 3);
340
466
  rb_define_method(rb_CGRanges, "contain", cgranges_contain, 3);
341
467
  rb_define_method(rb_CGRanges, "count_contain", cgranges_count_contain, 3);
468
+ rb_define_method(rb_CGRanges, "coverage_overlap", cgranges_coverage_overlap, 3);
469
+ rb_define_method(rb_CGRanges, "coverage_contain", cgranges_coverage_contain, 3);
342
470
  }
@@ -2,6 +2,6 @@
2
2
 
3
3
  module Bio
4
4
  class CGRanges
5
- VERSION = "0.0.0"
5
+ VERSION = "0.0.1"
6
6
  end
7
7
  end
data/lib/bio/cgranges.rb CHANGED
@@ -6,5 +6,25 @@ require_relative "cgranges/cgranges"
6
6
  module Bio
7
7
  # Reader for .2bit files (i.e., from UCSC genome browser)
8
8
  class CGRanges
9
+ # Calculate breadth of coverage.
10
+ # This is a wrapper method for `coverage_overlap` and `coverage_contain`.
11
+ # @param [String] contig The contig name
12
+ # @param [Fixnum] start The start position of the interval.
13
+ # @param [Fixnum] end The end position of the interval.
14
+ # @param [Symbol] mode :overlap or :contain (default: :overlap)
15
+ # @return [Array] The breadth of coverage and the number of intervals.
16
+ # @see coverage_overlap
17
+ # @see coverage_contain
18
+
19
+ def coverage(ctg, rb_st, rb_en, mode: :overlap)
20
+ case mode
21
+ when :overlap
22
+ coverage_overlap(ctg, rb_st, rb_en)
23
+ when :contain
24
+ coverage_contain(ctg, rb_st, rb_en)
25
+ else
26
+ raise ArgumentError, "unknown mode: #{mode}"
27
+ end
28
+ end
9
29
  end
10
30
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bio-cgranges
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.0
4
+ version: 0.0.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - kojix2
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-05-13 00:00:00.000000000 Z
11
+ date: 2022-05-15 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Genomic interval overlap queries
14
14
  email: