bio-cgranges 0.0.0 → 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 618c6866b3f708b148682a529b73efda7b782bfc567f92949f72c237fd98435b
4
- data.tar.gz: 0ebbc69b7858f934fd98f60500e49b7300774b21e05ee04c654d55746e18f00b
3
+ metadata.gz: 333cfe3994ee4f8b4698b4f9ff0b594f45bd2c1f973c0b596b49a0ba51af9e25
4
+ data.tar.gz: 80f494f56add4046be7d5e2aee66c4e1496e2e4a84ca9670298d6d05587f7e19
5
5
  SHA512:
6
- metadata.gz: 0444fad48ec7b6266072f2b7fb23684bf295703121c6d0b5b392e3c150de6dda02adbd1ecff161793b5f1a821f8d10cac564c4c60042f5fc4cc191bb26181620
7
- data.tar.gz: 7f783598ec7ed7937c8163593901f055463175e4b16ab0073cf41bb96341f858077877524c5e1409369822e1efd5eaed80a11acc6c7678d9e27dee4ee3e14dec
6
+ metadata.gz: f40f2343621f7568848ea8cce89fa84de2b162dd4378b9224b5587cc1ff3a4badf04a4ada19e9286efa3f0d177e200212eb5dbc5cb4ad8c9c882ba55ac045e90
7
+ data.tar.gz: 258167bdbb5453857a1c3e4f2c8b51729de5d6f43ef8e5d24c74b6d2d0b95c4e2e58cabb14ec07d795bdb2e6949514369b8441c759395a6b0b399dd7dba741de
data/README.md CHANGED
@@ -2,7 +2,8 @@
2
2
 
3
3
  [![Gem Version](https://badge.fury.io/rb/bio-cgranges.svg)](https://badge.fury.io/rb/bio-cgranges)
4
4
  [![test](https://github.com/ruby-on-bioc/bio-cgranges/actions/workflows/ci.yml/badge.svg)](https://github.com/ruby-on-bioc/bio-cgranges/actions/workflows/ci.yml)
5
- [![dics](https://img.shields.io/badge/docs-stable-blue.svg)](https://rubydoc.info/gems/bio-cgranges)
5
+ [![docs](https://img.shields.io/badge/docs-stable-blue.svg)](https://rubydoc.info/gems/bio-cgranges)
6
+ [![docs](https://img.shields.io/badge/docs-latest-blue.svg)](https://ruby-on-bioc.github.io/bio-cgranges/)
6
7
 
7
8
  Ruby bindings to [lh3/cgranges](https://github.com/lh3/cgranges).
8
9
 
@@ -27,15 +28,6 @@ granges.add("chr1", 10, 20, 0)
27
28
  .add("chr1", 15, 20, 4)
28
29
  .add("chr2", 10, 20, 5)
29
30
  .index
30
-
31
- granges.overlap("chr1", 12, 22)
32
- # [["chr1", 10, 20, 0],
33
- # ["chr1", 10, 25, 3],
34
- # ["chr1", 15, 25, 1],
35
- # ["chr1", 15, 20, 4]]
36
-
37
- granges.contain("chr1", 12, 22)
38
- # [["chr1", 15, 20, 4]]
39
31
  ```
40
32
 
41
33
  ```
@@ -47,18 +39,36 @@ granges.contain("chr1", 12, 22)
47
39
  3-3-3-3-3-3-3-3-3-3-3-3-3-3-3
48
40
  4-4-4-4-4
49
41
  5-5-5-5-5
50
- Q-Q-Q-Q-Q-Q-Q-Q-Q-Q
51
42
  |-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|
52
43
  0.........5.........10........15........20........25........30........35........40
53
44
  ```
54
45
 
46
+ ```ruby
47
+ granges.overlap("chr1", 12, 22)
48
+ # [["chr1", 10, 20, 0],
49
+ # ["chr1", 10, 25, 3],
50
+ # ["chr1", 15, 25, 1],
51
+ # ["chr1", 15, 20, 4]]
52
+
53
+ granges.contain("chr1", 12, 22)
54
+ # [["chr1", 15, 20, 4]]
55
+
56
+ granges.coverage("chr1", 20, 35)
57
+ # [10, 3] # cov, n
58
+
59
+ granges.coverage("chr1", 12, 22, mode: :contain)
60
+ # [5, 1]
61
+ ```
62
+
63
+ See [![docs](https://img.shields.io/badge/docs-latest-blue.svg)](https://ruby-on-bioc.github.io/bio-cgranges/) for details.
64
+
55
65
  ## Development
56
66
 
57
67
  Bug reports and pull requests are welcome on GitHub at https://github.com/ruby-on-bioc/bio-cgranges.
58
68
 
59
- Do you need commit rights to my repository?
69
+ Do you need commit rights to this repository?
60
70
  Do you want to get admin rights and take over the project?
61
- If so, please feel free to contact us @kojix2.
71
+ If so, please feel free to contact us.
62
72
 
63
73
  ## License
64
74
 
@@ -64,7 +64,7 @@ static const rb_data_type_t cgranges_type = {
64
64
  static void
65
65
  cgranges_free(void *ptr)
66
66
  {
67
- if(ptr)
67
+ if (ptr)
68
68
  {
69
69
  cr_destroy(ptr);
70
70
  }
@@ -94,6 +94,11 @@ cgranges_allocate(VALUE klass)
94
94
  return TypedData_Wrap_Struct(klass, &cgranges_type, ptr);
95
95
  }
96
96
 
97
+ /* Create a new cgranges object
98
+ *
99
+ * @return [Bio::CGRanges]
100
+ */
101
+
97
102
  static VALUE
98
103
  cgranges_init(VALUE self)
99
104
  {
@@ -107,6 +112,14 @@ cgranges_init(VALUE self)
107
112
  return self;
108
113
  }
109
114
 
115
+ /* Add a genomic interval to the cgranges object.
116
+ * @param [String] contig The contig name
117
+ * @param [Fixnum] start The start position of the interval.
118
+ * @param [Fixnum] end The end position of the interval.
119
+ * @param [Fixnum] label The label of the interval.
120
+ * @return [Bio::CGRanges] self
121
+ */
122
+
110
123
  static VALUE
111
124
  cgranges_add(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en, VALUE rb_label)
112
125
  {
@@ -123,25 +136,10 @@ cgranges_add(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en, VALUE rb_label)
123
136
  return Qnil;
124
137
  }
125
138
 
126
- if (rb_ctg != Qnil)
127
- {
128
- ctg = StringValueCStr(rb_ctg);
129
- }
130
-
131
- if (rb_st != Qnil)
132
- {
133
- st = NUM2INT32(rb_st);
134
- }
135
-
136
- if (rb_en != Qnil)
137
- {
138
- en = NUM2INT32(rb_en);
139
- }
140
-
141
- if (rb_label != Qnil)
142
- {
143
- label = NUM2INT32(rb_label);
144
- }
139
+ ctg = StringValueCStr(rb_ctg);
140
+ st = NUM2INT32(rb_st);
141
+ en = NUM2INT32(rb_en);
142
+ label = NUM2INT32(rb_label);
145
143
 
146
144
  intv = cr_add(cr, ctg, st, en, label);
147
145
 
@@ -154,10 +152,14 @@ cgranges_add(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en, VALUE rb_label)
154
152
  return self;
155
153
  }
156
154
 
155
+ /* Index.
156
+ * @return [Bio::CGRanges] self
157
+ */
158
+
157
159
  static VALUE
158
160
  cgranges_index(VALUE self)
159
161
  {
160
- if(RTEST(rb_ivar_get(self, rb_intern("@indexed"))))
162
+ if (RTEST(rb_ivar_get(self, rb_intern("@indexed"))))
161
163
  {
162
164
  rb_raise(rb_eIndexedError, "CGRanges already indexed");
163
165
  return Qnil;
@@ -165,12 +167,19 @@ cgranges_index(VALUE self)
165
167
 
166
168
  cgranges_t *cr = get_cganges(self);
167
169
  cr_index(cr);
168
-
170
+
169
171
  rb_ivar_set(self, rb_intern("@indexed"), Qtrue);
170
172
 
171
173
  return self;
172
174
  }
173
175
 
176
+ /* Overlap query.
177
+ * @param [String] contig The contig name
178
+ * @param [Fixnum] start The start position of the interval.
179
+ * @param [Fixnum] end The end position of the interval.
180
+ * @return [Array] An array of [contig, start, end, label] arrays.
181
+ */
182
+
174
183
  static VALUE
175
184
  cgranges_overlap(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en)
176
185
  {
@@ -192,7 +201,7 @@ cgranges_overlap(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en)
192
201
  ctg = StringValueCStr(rb_ctg);
193
202
  st = NUM2INT32(rb_st);
194
203
  en = NUM2INT32(rb_en);
195
-
204
+
196
205
  n = cr_overlap(cr, ctg, st, en, &b, &m_b);
197
206
 
198
207
  if (n < 0)
@@ -206,14 +215,21 @@ cgranges_overlap(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en)
206
215
  for (int64_t i = 0; i < n; i++)
207
216
  {
208
217
  VALUE rb_intv = rb_ary_new3(
209
- 4, rb_ctg, INT32_2NUM(cr_start(cr, b[i])), INT32_2NUM(cr_end(cr, b[i])), INT32_2NUM(cr_label(cr, b[i]))
210
- );
218
+ 4, rb_ctg, INT32_2NUM(cr_start(cr, b[i])), INT32_2NUM(cr_end(cr, b[i])), INT32_2NUM(cr_label(cr, b[i])));
211
219
  rb_ary_push(result, rb_intv);
212
220
  }
221
+ free(b);
213
222
 
214
223
  return result;
215
224
  }
216
225
 
226
+ /* Get the number of overlapping intervals.
227
+ * @param [String] contig The contig name
228
+ * @param [Fixnum] start The start position of the interval.
229
+ * @param [Fixnum] end The end position of the interval.
230
+ * @return [Fixnum] The number of overlapping intervals.
231
+ */
232
+
217
233
  static VALUE
218
234
  cgranges_count_overlap(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en)
219
235
  {
@@ -235,7 +251,7 @@ cgranges_count_overlap(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en)
235
251
  ctg = StringValueCStr(rb_ctg);
236
252
  st = NUM2INT32(rb_st);
237
253
  en = NUM2INT32(rb_en);
238
-
254
+
239
255
  n = cr_overlap(cr, ctg, st, en, &b, &m_b);
240
256
 
241
257
  if (n < 0)
@@ -243,10 +259,18 @@ cgranges_count_overlap(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en)
243
259
  rb_raise(rb_eRuntimeError, "Error finding overlaps");
244
260
  return Qnil;
245
261
  }
262
+ free(b);
246
263
 
247
264
  return INT64_2NUM(n);
248
265
  }
249
266
 
267
+ /* Containment query.
268
+ * @param [String] contig The contig name
269
+ * @param [Fixnum] start The start position of the interval.
270
+ * @param [Fixnum] end The end position of the interval.
271
+ * @return [Array] An array of [contig, start, end, label] arrays.
272
+ */
273
+
250
274
  static VALUE
251
275
  cgranges_contain(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en)
252
276
  {
@@ -268,7 +292,7 @@ cgranges_contain(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en)
268
292
  ctg = StringValueCStr(rb_ctg);
269
293
  st = NUM2INT32(rb_st);
270
294
  en = NUM2INT32(rb_en);
271
-
295
+
272
296
  n = cr_contain(cr, ctg, st, en, &b, &m_b);
273
297
 
274
298
  if (n < 0)
@@ -282,14 +306,21 @@ cgranges_contain(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en)
282
306
  for (int64_t i = 0; i < n; i++)
283
307
  {
284
308
  VALUE rb_intv = rb_ary_new3(
285
- 4, rb_ctg, INT32_2NUM(cr_start(cr, b[i])), INT32_2NUM(cr_end(cr, b[i])), INT32_2NUM(cr_label(cr, b[i]))
286
- );
309
+ 4, rb_ctg, INT32_2NUM(cr_start(cr, b[i])), INT32_2NUM(cr_end(cr, b[i])), INT32_2NUM(cr_label(cr, b[i])));
287
310
  rb_ary_push(result, rb_intv);
288
311
  }
312
+ free(b);
289
313
 
290
314
  return result;
291
315
  }
292
316
 
317
+ /* Get the number of contained intervals.
318
+ * @param [String] contig The contig name
319
+ * @param [Fixnum] start The start position of the interval.
320
+ * @param [Fixnum] end The end position of the interval.
321
+ * @return [Fixnum] The number of contained intervals.
322
+ */
323
+
293
324
  static VALUE
294
325
  cgranges_count_contain(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en)
295
326
  {
@@ -311,7 +342,7 @@ cgranges_count_contain(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en)
311
342
  ctg = StringValueCStr(rb_ctg);
312
343
  st = NUM2INT32(rb_st);
313
344
  en = NUM2INT32(rb_en);
314
-
345
+
315
346
  n = cr_contain(cr, ctg, st, en, &b, &m_b);
316
347
 
317
348
  if (n < 0)
@@ -319,10 +350,105 @@ cgranges_count_contain(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en)
319
350
  rb_raise(rb_eRuntimeError, "Error finding contained");
320
351
  return Qnil;
321
352
  }
353
+ free(b);
322
354
 
323
355
  return INT64_2NUM(n);
324
356
  }
325
357
 
358
+ static VALUE
359
+ cgranges_coverage(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en, int contain)
360
+ {
361
+ cgranges_t *cr = get_cganges(self);
362
+ char *ctg = NULL;
363
+ int32_t st1 = 0;
364
+ int32_t en1 = 0;
365
+
366
+ int64_t *b = NULL;
367
+ int64_t m_b = 0;
368
+ int64_t n = 0;
369
+ int64_t cov = 0, cov_st = 0, cov_en = 0;
370
+
371
+ if (!RTEST(rb_ivar_get(self, rb_intern("@indexed"))))
372
+ {
373
+ rb_raise(rb_eNoIndexError, "CGRanges not indexed");
374
+ return Qnil;
375
+ }
376
+
377
+ ctg = StringValueCStr(rb_ctg);
378
+ st1 = NUM2INT32(rb_st);
379
+ en1 = NUM2INT32(rb_en);
380
+
381
+ if (contain)
382
+ {
383
+ n = cr_contain(cr, ctg, st1, en1, &b, &m_b);
384
+ }
385
+ else
386
+ {
387
+ n = cr_overlap(cr, ctg, st1, en1, &b, &m_b);
388
+ }
389
+
390
+ if (n < 0)
391
+ {
392
+ rb_raise(rb_eRuntimeError, "Error finding overlaps");
393
+ return Qnil;
394
+ }
395
+
396
+ for (int64_t j = 0; j < n; j++)
397
+ {
398
+ cr_intv_t *r = &cr->r[b[j]];
399
+ int32_t st0 = cr_st(r), en0 = cr_en(r);
400
+ if (st0 < st1)
401
+ st0 = st1;
402
+ if (en0 > en1)
403
+ en0 = en1;
404
+ if (st0 > cov_en)
405
+ {
406
+ cov += cov_en - cov_st;
407
+ cov_st = st0, cov_en = en0;
408
+ }
409
+ else
410
+ cov_en = cov_en > en0 ? cov_en : en0;
411
+ }
412
+ cov += cov_en - cov_st;
413
+ free(b);
414
+
415
+ return rb_ary_new3(2, INT64_2NUM(cov), INT64_2NUM(n));
416
+ }
417
+
418
+ /* Calculate breadth of coverage. (Overlap)
419
+ * Same as coverage(contig, start, end, mode: overlap)
420
+ * @param [String] contig The contig name
421
+ * @param [Fixnum] start The start position of the interval.
422
+ * @param [Fixnum] end The end position of the interval.
423
+ * @return [Array] The breadth of coverage and the number of intervals.
424
+ * @see coverage
425
+ */
426
+
427
+ static VALUE
428
+ cgranges_coverage_overlap(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en)
429
+ {
430
+ VALUE result;
431
+ result = cgranges_coverage(self, rb_ctg, rb_st, rb_en, 0);
432
+ return result;
433
+ }
434
+
435
+ /* Calculate breadth of coverage. (Containment)
436
+ * same as coverage(contig, start, end, mode: contain)
437
+ * @param [String] contig The contig name
438
+ * @param [Fixnum] start The start position of the interval.
439
+ * @param [Fixnum] end The end position of the interval.
440
+ * @return [Array] The breadth of coverage and the number of intervals.
441
+ * @see coverage
442
+ */
443
+
444
+ static VALUE
445
+ cgranges_coverage_contain(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en)
446
+ {
447
+ VALUE result;
448
+ result = cgranges_coverage(self, rb_ctg, rb_st, rb_en, 1);
449
+ return result;
450
+ }
451
+
326
452
  void Init_cgranges(void)
327
453
  {
328
454
  rb_Bio = rb_define_module("Bio");
@@ -339,4 +465,6 @@ void Init_cgranges(void)
339
465
  rb_define_method(rb_CGRanges, "count_overlap", cgranges_count_overlap, 3);
340
466
  rb_define_method(rb_CGRanges, "contain", cgranges_contain, 3);
341
467
  rb_define_method(rb_CGRanges, "count_contain", cgranges_count_contain, 3);
468
+ rb_define_method(rb_CGRanges, "coverage_overlap", cgranges_coverage_overlap, 3);
469
+ rb_define_method(rb_CGRanges, "coverage_contain", cgranges_coverage_contain, 3);
342
470
  }
@@ -2,6 +2,6 @@
2
2
 
3
3
  module Bio
4
4
  class CGRanges
5
- VERSION = "0.0.0"
5
+ VERSION = "0.0.1"
6
6
  end
7
7
  end
data/lib/bio/cgranges.rb CHANGED
@@ -6,5 +6,25 @@ require_relative "cgranges/cgranges"
6
6
  module Bio
7
7
  # Reader for .2bit files (i.e., from UCSC genome browser)
8
8
  class CGRanges
9
+ # Calculate breadth of coverage.
10
+ # This is a wrapper method for `coverage_overlap` and `coverage_contain`.
11
+ # @param [String] contig The contig name
12
+ # @param [Fixnum] start The start position of the interval.
13
+ # @param [Fixnum] end The end position of the interval.
14
+ # @param [Symbol] mode :overlap or :contain (default: :overlap)
15
+ # @return [Array] The breadth of coverage and the number of intervals.
16
+ # @see coverage_overlap
17
+ # @see coverage_contain
18
+
19
+ def coverage(ctg, rb_st, rb_en, mode: :overlap)
20
+ case mode
21
+ when :overlap
22
+ coverage_overlap(ctg, rb_st, rb_en)
23
+ when :contain
24
+ coverage_contain(ctg, rb_st, rb_en)
25
+ else
26
+ raise ArgumentError, "unknown mode: #{mode}"
27
+ end
28
+ end
9
29
  end
10
30
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bio-cgranges
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.0
4
+ version: 0.0.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - kojix2
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-05-13 00:00:00.000000000 Z
11
+ date: 2022-05-15 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Genomic interval overlap queries
14
14
  email: