bio-cgranges 0.0.0 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 618c6866b3f708b148682a529b73efda7b782bfc567f92949f72c237fd98435b
4
- data.tar.gz: 0ebbc69b7858f934fd98f60500e49b7300774b21e05ee04c654d55746e18f00b
3
+ metadata.gz: 85f780637e2d11f0b7fac69bcf7434584c1200f0b341413da6d499ad0bd88f1a
4
+ data.tar.gz: 98a8b1897c11d91f999f2cb2af1cf6d1a4e6f268e1ba7877d51ac6b2d771d5a2
5
5
  SHA512:
6
- metadata.gz: 0444fad48ec7b6266072f2b7fb23684bf295703121c6d0b5b392e3c150de6dda02adbd1ecff161793b5f1a821f8d10cac564c4c60042f5fc4cc191bb26181620
7
- data.tar.gz: 7f783598ec7ed7937c8163593901f055463175e4b16ab0073cf41bb96341f858077877524c5e1409369822e1efd5eaed80a11acc6c7678d9e27dee4ee3e14dec
6
+ metadata.gz: 4d94f5de2f40b2ca280fa508c6ba7b0784b8d11428edf9cabd084fb002c3989b7efcaf6646d0362d702d7c8ba6307c46a5565d1099907f182e0c106db4e94515
7
+ data.tar.gz: dd98e66a0962bec279c8c39a55c5c93fdfa5705a1560830f0efe415029bfa3d8bd1c1a57f57261cedf22c2defbe8d8a65aaef453cc35fdf23ad572f5b72d6ff2
data/README.md CHANGED
@@ -1,8 +1,9 @@
1
1
  # bio-cgranges
2
2
 
3
3
  [![Gem Version](https://badge.fury.io/rb/bio-cgranges.svg)](https://badge.fury.io/rb/bio-cgranges)
4
- [![test](https://github.com/ruby-on-bioc/bio-cgranges/actions/workflows/ci.yml/badge.svg)](https://github.com/ruby-on-bioc/bio-cgranges/actions/workflows/ci.yml)
5
- [![dics](https://img.shields.io/badge/docs-stable-blue.svg)](https://rubydoc.info/gems/bio-cgranges)
4
+ [![test](https://github.com/kojix2/bio-cgranges/actions/workflows/ci.yml/badge.svg)](https://github.com/kojix2/bio-cgranges/actions/workflows/ci.yml)
5
+ [![docs](https://img.shields.io/badge/docs-stable-blue.svg)](https://rubydoc.info/gems/bio-cgranges)
6
+ [![docs](https://img.shields.io/badge/docs-latest-blue.svg)](https://kojix2.github.io/bio-cgranges/)
6
7
 
7
8
  Ruby bindings to [lh3/cgranges](https://github.com/lh3/cgranges).
8
9
 
@@ -27,15 +28,6 @@ granges.add("chr1", 10, 20, 0)
27
28
  .add("chr1", 15, 20, 4)
28
29
  .add("chr2", 10, 20, 5)
29
30
  .index
30
-
31
- granges.overlap("chr1", 12, 22)
32
- # [["chr1", 10, 20, 0],
33
- # ["chr1", 10, 25, 3],
34
- # ["chr1", 15, 25, 1],
35
- # ["chr1", 15, 20, 4]]
36
-
37
- granges.contain("chr1", 12, 22)
38
- # [["chr1", 15, 20, 4]]
39
31
  ```
40
32
 
41
33
  ```
@@ -47,18 +39,43 @@ granges.contain("chr1", 12, 22)
47
39
  3-3-3-3-3-3-3-3-3-3-3-3-3-3-3
48
40
  4-4-4-4-4
49
41
  5-5-5-5-5
50
- Q-Q-Q-Q-Q-Q-Q-Q-Q-Q
51
42
  |-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|
52
43
  0.........5.........10........15........20........25........30........35........40
53
44
  ```
54
45
 
46
+ ```ruby
47
+ granges.overlap("chr1", 12, 22)
48
+ # [["chr1", 10, 20, 0],
49
+ # ["chr1", 10, 25, 3],
50
+ # ["chr1", 15, 25, 1],
51
+ # ["chr1", 15, 20, 4]]
52
+
53
+ granges.contain("chr1", 12, 22)
54
+ # [["chr1", 15, 20, 4]]
55
+
56
+ granges.coverage("chr1", 20, 35)
57
+ # [10, 3] # cov, n
58
+
59
+ granges.coverage("chr1", 12, 22, mode: :contain)
60
+ # [5, 1]
61
+ ```
62
+
63
+ See [![docs](https://img.shields.io/badge/docs-latest-blue.svg)](https://kojix2.github.io/bio-cgranges/) for details.
64
+
55
65
  ## Development
56
66
 
57
- Bug reports and pull requests are welcome on GitHub at https://github.com/ruby-on-bioc/bio-cgranges.
67
+ ```sh
68
+ git clone https://github.com/kojix2/bio-cgranges
69
+ bundle install
70
+ bundle exec rake compile
71
+ bundle exec rake test
72
+ ```
73
+
74
+ Bug reports and pull requests are welcome on GitHub at https://github.com/kojix2/bio-cgranges.
58
75
 
59
- Do you need commit rights to my repository?
76
+ Do you need commit rights to this repository?
60
77
  Do you want to get admin rights and take over the project?
61
- If so, please feel free to contact us @kojix2.
78
+ If so, please feel free to contact us.
62
79
 
63
80
  ## License
64
81
 
data/Rakefile CHANGED
@@ -9,10 +9,6 @@ Rake::TestTask.new(:test) do |t|
9
9
  t.test_files = FileList["test/**/*_test.rb"]
10
10
  end
11
11
 
12
- require "rubocop/rake_task"
13
-
14
- RuboCop::RakeTask.new
15
-
16
12
  require "rake/extensiontask"
17
13
 
18
14
  task build: :compile
@@ -34,7 +30,6 @@ task default: %i[
34
30
  compile
35
31
  remove_object_file
36
32
  test
37
- rubocop
38
33
  ]
39
34
 
40
35
  task cleanall: %i[
@@ -32,7 +32,7 @@
32
32
  #define NUM2INT64 NUM2LONG
33
33
  #define UINT64_2NUM ULONG2NUM
34
34
  #define INT64_2NUM LONG2NUM
35
- #elif SIZEOF_LONGLONG == SIZEOF_INT64
35
+ #elif SIZEOF_LONG_LONG == SIZEOF_INT64
36
36
  #define NUM2UINT64 NUM2ULL
37
37
  #define NUM2INT64 NUM2LL
38
38
  #define UINT64_2NUM ULL2NUM
@@ -50,21 +50,18 @@ static void cgranges_free(void *ptr);
50
50
  static size_t cgranges_memsize(const void *ptr);
51
51
 
52
52
  static const rb_data_type_t cgranges_type = {
53
- "cgranges",
54
- {
55
- 0,
56
- cgranges_free,
57
- cgranges_memsize,
53
+ .wrap_struct_name = "cgranges",
54
+ .function = {
55
+ .dfree = cgranges_free,
56
+ .dsize = cgranges_memsize,
58
57
  },
59
- 0,
60
- 0,
61
- RUBY_TYPED_FREE_IMMEDIATELY,
58
+ .flags = RUBY_TYPED_FREE_IMMEDIATELY,
62
59
  };
63
60
 
64
61
  static void
65
62
  cgranges_free(void *ptr)
66
63
  {
67
- if(ptr)
64
+ if (ptr)
68
65
  {
69
66
  cr_destroy(ptr);
70
67
  }
@@ -94,6 +91,11 @@ cgranges_allocate(VALUE klass)
94
91
  return TypedData_Wrap_Struct(klass, &cgranges_type, ptr);
95
92
  }
96
93
 
94
+ /* Create a new cgranges object
95
+ *
96
+ * @return [Bio::CGRanges]
97
+ */
98
+
97
99
  static VALUE
98
100
  cgranges_init(VALUE self)
99
101
  {
@@ -107,6 +109,14 @@ cgranges_init(VALUE self)
107
109
  return self;
108
110
  }
109
111
 
112
+ /* Add a genomic interval to the cgranges object.
113
+ * @param [String] contig The contig name
114
+ * @param [Fixnum] start The start position of the interval.
115
+ * @param [Fixnum] end The end position of the interval.
116
+ * @param [Fixnum] label The label of the interval.
117
+ * @return [Bio::CGRanges] self
118
+ */
119
+
110
120
  static VALUE
111
121
  cgranges_add(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en, VALUE rb_label)
112
122
  {
@@ -123,25 +133,10 @@ cgranges_add(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en, VALUE rb_label)
123
133
  return Qnil;
124
134
  }
125
135
 
126
- if (rb_ctg != Qnil)
127
- {
128
- ctg = StringValueCStr(rb_ctg);
129
- }
130
-
131
- if (rb_st != Qnil)
132
- {
133
- st = NUM2INT32(rb_st);
134
- }
135
-
136
- if (rb_en != Qnil)
137
- {
138
- en = NUM2INT32(rb_en);
139
- }
140
-
141
- if (rb_label != Qnil)
142
- {
143
- label = NUM2INT32(rb_label);
144
- }
136
+ ctg = StringValueCStr(rb_ctg);
137
+ st = NUM2INT32(rb_st);
138
+ en = NUM2INT32(rb_en);
139
+ label = NUM2INT32(rb_label);
145
140
 
146
141
  intv = cr_add(cr, ctg, st, en, label);
147
142
 
@@ -154,10 +149,14 @@ cgranges_add(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en, VALUE rb_label)
154
149
  return self;
155
150
  }
156
151
 
152
+ /* Index.
153
+ * @return [Bio::CGRanges] self
154
+ */
155
+
157
156
  static VALUE
158
157
  cgranges_index(VALUE self)
159
158
  {
160
- if(RTEST(rb_ivar_get(self, rb_intern("@indexed"))))
159
+ if (RTEST(rb_ivar_get(self, rb_intern("@indexed"))))
161
160
  {
162
161
  rb_raise(rb_eIndexedError, "CGRanges already indexed");
163
162
  return Qnil;
@@ -165,12 +164,19 @@ cgranges_index(VALUE self)
165
164
 
166
165
  cgranges_t *cr = get_cganges(self);
167
166
  cr_index(cr);
168
-
167
+
169
168
  rb_ivar_set(self, rb_intern("@indexed"), Qtrue);
170
169
 
171
170
  return self;
172
171
  }
173
172
 
173
+ /* Overlap query.
174
+ * @param [String] contig The contig name
175
+ * @param [Fixnum] start The start position of the interval.
176
+ * @param [Fixnum] end The end position of the interval.
177
+ * @return [Array] An array of [contig, start, end, label] arrays.
178
+ */
179
+
174
180
  static VALUE
175
181
  cgranges_overlap(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en)
176
182
  {
@@ -192,7 +198,7 @@ cgranges_overlap(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en)
192
198
  ctg = StringValueCStr(rb_ctg);
193
199
  st = NUM2INT32(rb_st);
194
200
  en = NUM2INT32(rb_en);
195
-
201
+
196
202
  n = cr_overlap(cr, ctg, st, en, &b, &m_b);
197
203
 
198
204
  if (n < 0)
@@ -206,14 +212,21 @@ cgranges_overlap(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en)
206
212
  for (int64_t i = 0; i < n; i++)
207
213
  {
208
214
  VALUE rb_intv = rb_ary_new3(
209
- 4, rb_ctg, INT32_2NUM(cr_start(cr, b[i])), INT32_2NUM(cr_end(cr, b[i])), INT32_2NUM(cr_label(cr, b[i]))
210
- );
215
+ 4, rb_ctg, INT32_2NUM(cr_start(cr, b[i])), INT32_2NUM(cr_end(cr, b[i])), INT32_2NUM(cr_label(cr, b[i])));
211
216
  rb_ary_push(result, rb_intv);
212
217
  }
218
+ free(b);
213
219
 
214
220
  return result;
215
221
  }
216
222
 
223
+ /* Get the number of overlapping intervals.
224
+ * @param [String] contig The contig name
225
+ * @param [Fixnum] start The start position of the interval.
226
+ * @param [Fixnum] end The end position of the interval.
227
+ * @return [Fixnum] The number of overlapping intervals.
228
+ */
229
+
217
230
  static VALUE
218
231
  cgranges_count_overlap(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en)
219
232
  {
@@ -235,7 +248,7 @@ cgranges_count_overlap(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en)
235
248
  ctg = StringValueCStr(rb_ctg);
236
249
  st = NUM2INT32(rb_st);
237
250
  en = NUM2INT32(rb_en);
238
-
251
+
239
252
  n = cr_overlap(cr, ctg, st, en, &b, &m_b);
240
253
 
241
254
  if (n < 0)
@@ -243,10 +256,18 @@ cgranges_count_overlap(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en)
243
256
  rb_raise(rb_eRuntimeError, "Error finding overlaps");
244
257
  return Qnil;
245
258
  }
259
+ free(b);
246
260
 
247
261
  return INT64_2NUM(n);
248
262
  }
249
263
 
264
+ /* Containment query.
265
+ * @param [String] contig The contig name
266
+ * @param [Fixnum] start The start position of the interval.
267
+ * @param [Fixnum] end The end position of the interval.
268
+ * @return [Array] An array of [contig, start, end, label] arrays.
269
+ */
270
+
250
271
  static VALUE
251
272
  cgranges_contain(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en)
252
273
  {
@@ -268,7 +289,7 @@ cgranges_contain(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en)
268
289
  ctg = StringValueCStr(rb_ctg);
269
290
  st = NUM2INT32(rb_st);
270
291
  en = NUM2INT32(rb_en);
271
-
292
+
272
293
  n = cr_contain(cr, ctg, st, en, &b, &m_b);
273
294
 
274
295
  if (n < 0)
@@ -282,14 +303,21 @@ cgranges_contain(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en)
282
303
  for (int64_t i = 0; i < n; i++)
283
304
  {
284
305
  VALUE rb_intv = rb_ary_new3(
285
- 4, rb_ctg, INT32_2NUM(cr_start(cr, b[i])), INT32_2NUM(cr_end(cr, b[i])), INT32_2NUM(cr_label(cr, b[i]))
286
- );
306
+ 4, rb_ctg, INT32_2NUM(cr_start(cr, b[i])), INT32_2NUM(cr_end(cr, b[i])), INT32_2NUM(cr_label(cr, b[i])));
287
307
  rb_ary_push(result, rb_intv);
288
308
  }
309
+ free(b);
289
310
 
290
311
  return result;
291
312
  }
292
313
 
314
+ /* Get the number of contained intervals.
315
+ * @param [String] contig The contig name
316
+ * @param [Fixnum] start The start position of the interval.
317
+ * @param [Fixnum] end The end position of the interval.
318
+ * @return [Fixnum] The number of contained intervals.
319
+ */
320
+
293
321
  static VALUE
294
322
  cgranges_count_contain(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en)
295
323
  {
@@ -311,7 +339,7 @@ cgranges_count_contain(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en)
311
339
  ctg = StringValueCStr(rb_ctg);
312
340
  st = NUM2INT32(rb_st);
313
341
  en = NUM2INT32(rb_en);
314
-
342
+
315
343
  n = cr_contain(cr, ctg, st, en, &b, &m_b);
316
344
 
317
345
  if (n < 0)
@@ -319,10 +347,105 @@ cgranges_count_contain(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en)
319
347
  rb_raise(rb_eRuntimeError, "Error finding contained");
320
348
  return Qnil;
321
349
  }
350
+ free(b);
322
351
 
323
352
  return INT64_2NUM(n);
324
353
  }
325
354
 
355
+ static VALUE
356
+ cgranges_coverage(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en, int contain)
357
+ {
358
+ cgranges_t *cr = get_cganges(self);
359
+ char *ctg = NULL;
360
+ int32_t st1 = 0;
361
+ int32_t en1 = 0;
362
+
363
+ int64_t *b = NULL;
364
+ int64_t m_b = 0;
365
+ int64_t n = 0;
366
+ int64_t cov = 0, cov_st = 0, cov_en = 0;
367
+
368
+ if (!RTEST(rb_ivar_get(self, rb_intern("@indexed"))))
369
+ {
370
+ rb_raise(rb_eNoIndexError, "CGRanges not indexed");
371
+ return Qnil;
372
+ }
373
+
374
+ ctg = StringValueCStr(rb_ctg);
375
+ st1 = NUM2INT32(rb_st);
376
+ en1 = NUM2INT32(rb_en);
377
+
378
+ if (contain)
379
+ {
380
+ n = cr_contain(cr, ctg, st1, en1, &b, &m_b);
381
+ }
382
+ else
383
+ {
384
+ n = cr_overlap(cr, ctg, st1, en1, &b, &m_b);
385
+ }
386
+
387
+ if (n < 0)
388
+ {
389
+ rb_raise(rb_eRuntimeError, "Error finding overlaps");
390
+ return Qnil;
391
+ }
392
+
393
+ for (int64_t j = 0; j < n; j++)
394
+ {
395
+ cr_intv_t *r = &cr->r[b[j]];
396
+ int32_t st0 = cr_st(r), en0 = cr_en(r);
397
+ if (st0 < st1)
398
+ st0 = st1;
399
+ if (en0 > en1)
400
+ en0 = en1;
401
+ if (st0 > cov_en)
402
+ {
403
+ cov += cov_en - cov_st;
404
+ cov_st = st0, cov_en = en0;
405
+ }
406
+ else
407
+ cov_en = cov_en > en0 ? cov_en : en0;
408
+ }
409
+ cov += cov_en - cov_st;
410
+ free(b);
411
+
412
+ return rb_ary_new3(2, INT64_2NUM(cov), INT64_2NUM(n));
413
+ }
414
+
415
+ /* Calculate breadth of coverage. (Overlap)
416
+ * Same as coverage(contig, start, end, mode: overlap)
417
+ * @param [String] contig The contig name
418
+ * @param [Fixnum] start The start position of the interval.
419
+ * @param [Fixnum] end The end position of the interval.
420
+ * @return [Array] The breadth of coverage and the number of intervals.
421
+ * @see coverage
422
+ */
423
+
424
+ static VALUE
425
+ cgranges_coverage_overlap(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en)
426
+ {
427
+ VALUE result;
428
+ result = cgranges_coverage(self, rb_ctg, rb_st, rb_en, 0);
429
+ return result;
430
+ }
431
+
432
+ /* Calculate breadth of coverage. (Containment)
433
+ * same as coverage(contig, start, end, mode: contain)
434
+ * @param [String] contig The contig name
435
+ * @param [Fixnum] start The start position of the interval.
436
+ * @param [Fixnum] end The end position of the interval.
437
+ * @return [Array] The breadth of coverage and the number of intervals.
438
+ * @see coverage
439
+ */
440
+
441
+ static VALUE
442
+ cgranges_coverage_contain(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en)
443
+ {
444
+ VALUE result;
445
+ result = cgranges_coverage(self, rb_ctg, rb_st, rb_en, 1);
446
+ return result;
447
+ }
448
+
326
449
  void Init_cgranges(void)
327
450
  {
328
451
  rb_Bio = rb_define_module("Bio");
@@ -339,4 +462,6 @@ void Init_cgranges(void)
339
462
  rb_define_method(rb_CGRanges, "count_overlap", cgranges_count_overlap, 3);
340
463
  rb_define_method(rb_CGRanges, "contain", cgranges_contain, 3);
341
464
  rb_define_method(rb_CGRanges, "count_contain", cgranges_count_contain, 3);
342
- }
465
+ rb_define_method(rb_CGRanges, "coverage_overlap", cgranges_coverage_overlap, 3);
466
+ rb_define_method(rb_CGRanges, "coverage_contain", cgranges_coverage_contain, 3);
467
+ }
@@ -2,6 +2,6 @@
2
2
 
3
3
  module Bio
4
4
  class CGRanges
5
- VERSION = "0.0.0"
5
+ VERSION = "0.0.2"
6
6
  end
7
7
  end
data/lib/bio/cgranges.rb CHANGED
@@ -6,5 +6,25 @@ require_relative "cgranges/cgranges"
6
6
  module Bio
7
7
  # Reader for .2bit files (i.e., from UCSC genome browser)
8
8
  class CGRanges
9
+ # Calculate breadth of coverage.
10
+ # This is a wrapper method for `coverage_overlap` and `coverage_contain`.
11
+ # @param [String] contig The contig name
12
+ # @param [Fixnum] start The start position of the interval.
13
+ # @param [Fixnum] end The end position of the interval.
14
+ # @param [Symbol] mode :overlap or :contain (default: :overlap)
15
+ # @return [Array] The breadth of coverage and the number of intervals.
16
+ # @see coverage_overlap
17
+ # @see coverage_contain
18
+
19
+ def coverage(ctg, rb_st, rb_en, mode: :overlap)
20
+ case mode
21
+ when :overlap
22
+ coverage_overlap(ctg, rb_st, rb_en)
23
+ when :contain
24
+ coverage_contain(ctg, rb_st, rb_en)
25
+ else
26
+ raise ArgumentError, "unknown mode: #{mode}"
27
+ end
28
+ end
9
29
  end
10
30
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bio-cgranges
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.0
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - kojix2
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-05-13 00:00:00.000000000 Z
11
+ date: 2022-11-03 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Genomic interval overlap queries
14
14
  email:
@@ -31,7 +31,7 @@ files:
31
31
  - ext/bio/cgranges/extconf.rb
32
32
  - lib/bio/cgranges.rb
33
33
  - lib/bio/cgranges/version.rb
34
- homepage: https://github.com/ruby-on-bioc/bio-cgranges
34
+ homepage: https://github.com/kojix2/bio-cgranges
35
35
  licenses:
36
36
  - MIT
37
37
  metadata: {}