bio-cgranges 0.0.0 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 618c6866b3f708b148682a529b73efda7b782bfc567f92949f72c237fd98435b
4
- data.tar.gz: 0ebbc69b7858f934fd98f60500e49b7300774b21e05ee04c654d55746e18f00b
3
+ metadata.gz: 85f780637e2d11f0b7fac69bcf7434584c1200f0b341413da6d499ad0bd88f1a
4
+ data.tar.gz: 98a8b1897c11d91f999f2cb2af1cf6d1a4e6f268e1ba7877d51ac6b2d771d5a2
5
5
  SHA512:
6
- metadata.gz: 0444fad48ec7b6266072f2b7fb23684bf295703121c6d0b5b392e3c150de6dda02adbd1ecff161793b5f1a821f8d10cac564c4c60042f5fc4cc191bb26181620
7
- data.tar.gz: 7f783598ec7ed7937c8163593901f055463175e4b16ab0073cf41bb96341f858077877524c5e1409369822e1efd5eaed80a11acc6c7678d9e27dee4ee3e14dec
6
+ metadata.gz: 4d94f5de2f40b2ca280fa508c6ba7b0784b8d11428edf9cabd084fb002c3989b7efcaf6646d0362d702d7c8ba6307c46a5565d1099907f182e0c106db4e94515
7
+ data.tar.gz: dd98e66a0962bec279c8c39a55c5c93fdfa5705a1560830f0efe415029bfa3d8bd1c1a57f57261cedf22c2defbe8d8a65aaef453cc35fdf23ad572f5b72d6ff2
data/README.md CHANGED
@@ -1,8 +1,9 @@
1
1
  # bio-cgranges
2
2
 
3
3
  [![Gem Version](https://badge.fury.io/rb/bio-cgranges.svg)](https://badge.fury.io/rb/bio-cgranges)
4
- [![test](https://github.com/ruby-on-bioc/bio-cgranges/actions/workflows/ci.yml/badge.svg)](https://github.com/ruby-on-bioc/bio-cgranges/actions/workflows/ci.yml)
5
- [![dics](https://img.shields.io/badge/docs-stable-blue.svg)](https://rubydoc.info/gems/bio-cgranges)
4
+ [![test](https://github.com/kojix2/bio-cgranges/actions/workflows/ci.yml/badge.svg)](https://github.com/kojix2/bio-cgranges/actions/workflows/ci.yml)
5
+ [![docs](https://img.shields.io/badge/docs-stable-blue.svg)](https://rubydoc.info/gems/bio-cgranges)
6
+ [![docs](https://img.shields.io/badge/docs-latest-blue.svg)](https://kojix2.github.io/bio-cgranges/)
6
7
 
7
8
  Ruby bindings to [lh3/cgranges](https://github.com/lh3/cgranges).
8
9
 
@@ -27,15 +28,6 @@ granges.add("chr1", 10, 20, 0)
27
28
  .add("chr1", 15, 20, 4)
28
29
  .add("chr2", 10, 20, 5)
29
30
  .index
30
-
31
- granges.overlap("chr1", 12, 22)
32
- # [["chr1", 10, 20, 0],
33
- # ["chr1", 10, 25, 3],
34
- # ["chr1", 15, 25, 1],
35
- # ["chr1", 15, 20, 4]]
36
-
37
- granges.contain("chr1", 12, 22)
38
- # [["chr1", 15, 20, 4]]
39
31
  ```
40
32
 
41
33
  ```
@@ -47,18 +39,43 @@ granges.contain("chr1", 12, 22)
47
39
  3-3-3-3-3-3-3-3-3-3-3-3-3-3-3
48
40
  4-4-4-4-4
49
41
  5-5-5-5-5
50
- Q-Q-Q-Q-Q-Q-Q-Q-Q-Q
51
42
  |-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|-|
52
43
  0.........5.........10........15........20........25........30........35........40
53
44
  ```
54
45
 
46
+ ```ruby
47
+ granges.overlap("chr1", 12, 22)
48
+ # [["chr1", 10, 20, 0],
49
+ # ["chr1", 10, 25, 3],
50
+ # ["chr1", 15, 25, 1],
51
+ # ["chr1", 15, 20, 4]]
52
+
53
+ granges.contain("chr1", 12, 22)
54
+ # [["chr1", 15, 20, 4]]
55
+
56
+ granges.coverage("chr1", 20, 35)
57
+ # [10, 3] # cov, n
58
+
59
+ granges.coverage("chr1", 12, 22, mode: :contain)
60
+ # [5, 1]
61
+ ```
62
+
63
+ See [![docs](https://img.shields.io/badge/docs-latest-blue.svg)](https://kojix2.github.io/bio-cgranges/) for details.
64
+
55
65
  ## Development
56
66
 
57
- Bug reports and pull requests are welcome on GitHub at https://github.com/ruby-on-bioc/bio-cgranges.
67
+ ```sh
68
+ git clone https://github.com/kojix2/bio-cgranges
69
+ bundle install
70
+ bundle exec rake compile
71
+ bundle exec rake test
72
+ ```
73
+
74
+ Bug reports and pull requests are welcome on GitHub at https://github.com/kojix2/bio-cgranges.
58
75
 
59
- Do you need commit rights to my repository?
76
+ Do you need commit rights to this repository?
60
77
  Do you want to get admin rights and take over the project?
61
- If so, please feel free to contact us @kojix2.
78
+ If so, please feel free to contact us.
62
79
 
63
80
  ## License
64
81
 
data/Rakefile CHANGED
@@ -9,10 +9,6 @@ Rake::TestTask.new(:test) do |t|
9
9
  t.test_files = FileList["test/**/*_test.rb"]
10
10
  end
11
11
 
12
- require "rubocop/rake_task"
13
-
14
- RuboCop::RakeTask.new
15
-
16
12
  require "rake/extensiontask"
17
13
 
18
14
  task build: :compile
@@ -34,7 +30,6 @@ task default: %i[
34
30
  compile
35
31
  remove_object_file
36
32
  test
37
- rubocop
38
33
  ]
39
34
 
40
35
  task cleanall: %i[
@@ -32,7 +32,7 @@
32
32
  #define NUM2INT64 NUM2LONG
33
33
  #define UINT64_2NUM ULONG2NUM
34
34
  #define INT64_2NUM LONG2NUM
35
- #elif SIZEOF_LONGLONG == SIZEOF_INT64
35
+ #elif SIZEOF_LONG_LONG == SIZEOF_INT64
36
36
  #define NUM2UINT64 NUM2ULL
37
37
  #define NUM2INT64 NUM2LL
38
38
  #define UINT64_2NUM ULL2NUM
@@ -50,21 +50,18 @@ static void cgranges_free(void *ptr);
50
50
  static size_t cgranges_memsize(const void *ptr);
51
51
 
52
52
  static const rb_data_type_t cgranges_type = {
53
- "cgranges",
54
- {
55
- 0,
56
- cgranges_free,
57
- cgranges_memsize,
53
+ .wrap_struct_name = "cgranges",
54
+ .function = {
55
+ .dfree = cgranges_free,
56
+ .dsize = cgranges_memsize,
58
57
  },
59
- 0,
60
- 0,
61
- RUBY_TYPED_FREE_IMMEDIATELY,
58
+ .flags = RUBY_TYPED_FREE_IMMEDIATELY,
62
59
  };
63
60
 
64
61
  static void
65
62
  cgranges_free(void *ptr)
66
63
  {
67
- if(ptr)
64
+ if (ptr)
68
65
  {
69
66
  cr_destroy(ptr);
70
67
  }
@@ -94,6 +91,11 @@ cgranges_allocate(VALUE klass)
94
91
  return TypedData_Wrap_Struct(klass, &cgranges_type, ptr);
95
92
  }
96
93
 
94
+ /* Create a new cgranges object
95
+ *
96
+ * @return [Bio::CGRanges]
97
+ */
98
+
97
99
  static VALUE
98
100
  cgranges_init(VALUE self)
99
101
  {
@@ -107,6 +109,14 @@ cgranges_init(VALUE self)
107
109
  return self;
108
110
  }
109
111
 
112
+ /* Add a genomic interval to the cgranges object.
113
+ * @param [String] contig The contig name
114
+ * @param [Fixnum] start The start position of the interval.
115
+ * @param [Fixnum] end The end position of the interval.
116
+ * @param [Fixnum] label The label of the interval.
117
+ * @return [Bio::CGRanges] self
118
+ */
119
+
110
120
  static VALUE
111
121
  cgranges_add(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en, VALUE rb_label)
112
122
  {
@@ -123,25 +133,10 @@ cgranges_add(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en, VALUE rb_label)
123
133
  return Qnil;
124
134
  }
125
135
 
126
- if (rb_ctg != Qnil)
127
- {
128
- ctg = StringValueCStr(rb_ctg);
129
- }
130
-
131
- if (rb_st != Qnil)
132
- {
133
- st = NUM2INT32(rb_st);
134
- }
135
-
136
- if (rb_en != Qnil)
137
- {
138
- en = NUM2INT32(rb_en);
139
- }
140
-
141
- if (rb_label != Qnil)
142
- {
143
- label = NUM2INT32(rb_label);
144
- }
136
+ ctg = StringValueCStr(rb_ctg);
137
+ st = NUM2INT32(rb_st);
138
+ en = NUM2INT32(rb_en);
139
+ label = NUM2INT32(rb_label);
145
140
 
146
141
  intv = cr_add(cr, ctg, st, en, label);
147
142
 
@@ -154,10 +149,14 @@ cgranges_add(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en, VALUE rb_label)
154
149
  return self;
155
150
  }
156
151
 
152
+ /* Index.
153
+ * @return [Bio::CGRanges] self
154
+ */
155
+
157
156
  static VALUE
158
157
  cgranges_index(VALUE self)
159
158
  {
160
- if(RTEST(rb_ivar_get(self, rb_intern("@indexed"))))
159
+ if (RTEST(rb_ivar_get(self, rb_intern("@indexed"))))
161
160
  {
162
161
  rb_raise(rb_eIndexedError, "CGRanges already indexed");
163
162
  return Qnil;
@@ -165,12 +164,19 @@ cgranges_index(VALUE self)
165
164
 
166
165
  cgranges_t *cr = get_cganges(self);
167
166
  cr_index(cr);
168
-
167
+
169
168
  rb_ivar_set(self, rb_intern("@indexed"), Qtrue);
170
169
 
171
170
  return self;
172
171
  }
173
172
 
173
+ /* Overlap query.
174
+ * @param [String] contig The contig name
175
+ * @param [Fixnum] start The start position of the interval.
176
+ * @param [Fixnum] end The end position of the interval.
177
+ * @return [Array] An array of [contig, start, end, label] arrays.
178
+ */
179
+
174
180
  static VALUE
175
181
  cgranges_overlap(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en)
176
182
  {
@@ -192,7 +198,7 @@ cgranges_overlap(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en)
192
198
  ctg = StringValueCStr(rb_ctg);
193
199
  st = NUM2INT32(rb_st);
194
200
  en = NUM2INT32(rb_en);
195
-
201
+
196
202
  n = cr_overlap(cr, ctg, st, en, &b, &m_b);
197
203
 
198
204
  if (n < 0)
@@ -206,14 +212,21 @@ cgranges_overlap(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en)
206
212
  for (int64_t i = 0; i < n; i++)
207
213
  {
208
214
  VALUE rb_intv = rb_ary_new3(
209
- 4, rb_ctg, INT32_2NUM(cr_start(cr, b[i])), INT32_2NUM(cr_end(cr, b[i])), INT32_2NUM(cr_label(cr, b[i]))
210
- );
215
+ 4, rb_ctg, INT32_2NUM(cr_start(cr, b[i])), INT32_2NUM(cr_end(cr, b[i])), INT32_2NUM(cr_label(cr, b[i])));
211
216
  rb_ary_push(result, rb_intv);
212
217
  }
218
+ free(b);
213
219
 
214
220
  return result;
215
221
  }
216
222
 
223
+ /* Get the number of overlapping intervals.
224
+ * @param [String] contig The contig name
225
+ * @param [Fixnum] start The start position of the interval.
226
+ * @param [Fixnum] end The end position of the interval.
227
+ * @return [Fixnum] The number of overlapping intervals.
228
+ */
229
+
217
230
  static VALUE
218
231
  cgranges_count_overlap(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en)
219
232
  {
@@ -235,7 +248,7 @@ cgranges_count_overlap(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en)
235
248
  ctg = StringValueCStr(rb_ctg);
236
249
  st = NUM2INT32(rb_st);
237
250
  en = NUM2INT32(rb_en);
238
-
251
+
239
252
  n = cr_overlap(cr, ctg, st, en, &b, &m_b);
240
253
 
241
254
  if (n < 0)
@@ -243,10 +256,18 @@ cgranges_count_overlap(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en)
243
256
  rb_raise(rb_eRuntimeError, "Error finding overlaps");
244
257
  return Qnil;
245
258
  }
259
+ free(b);
246
260
 
247
261
  return INT64_2NUM(n);
248
262
  }
249
263
 
264
+ /* Containment query.
265
+ * @param [String] contig The contig name
266
+ * @param [Fixnum] start The start position of the interval.
267
+ * @param [Fixnum] end The end position of the interval.
268
+ * @return [Array] An array of [contig, start, end, label] arrays.
269
+ */
270
+
250
271
  static VALUE
251
272
  cgranges_contain(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en)
252
273
  {
@@ -268,7 +289,7 @@ cgranges_contain(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en)
268
289
  ctg = StringValueCStr(rb_ctg);
269
290
  st = NUM2INT32(rb_st);
270
291
  en = NUM2INT32(rb_en);
271
-
292
+
272
293
  n = cr_contain(cr, ctg, st, en, &b, &m_b);
273
294
 
274
295
  if (n < 0)
@@ -282,14 +303,21 @@ cgranges_contain(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en)
282
303
  for (int64_t i = 0; i < n; i++)
283
304
  {
284
305
  VALUE rb_intv = rb_ary_new3(
285
- 4, rb_ctg, INT32_2NUM(cr_start(cr, b[i])), INT32_2NUM(cr_end(cr, b[i])), INT32_2NUM(cr_label(cr, b[i]))
286
- );
306
+ 4, rb_ctg, INT32_2NUM(cr_start(cr, b[i])), INT32_2NUM(cr_end(cr, b[i])), INT32_2NUM(cr_label(cr, b[i])));
287
307
  rb_ary_push(result, rb_intv);
288
308
  }
309
+ free(b);
289
310
 
290
311
  return result;
291
312
  }
292
313
 
314
+ /* Get the number of contained intervals.
315
+ * @param [String] contig The contig name
316
+ * @param [Fixnum] start The start position of the interval.
317
+ * @param [Fixnum] end The end position of the interval.
318
+ * @return [Fixnum] The number of contained intervals.
319
+ */
320
+
293
321
  static VALUE
294
322
  cgranges_count_contain(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en)
295
323
  {
@@ -311,7 +339,7 @@ cgranges_count_contain(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en)
311
339
  ctg = StringValueCStr(rb_ctg);
312
340
  st = NUM2INT32(rb_st);
313
341
  en = NUM2INT32(rb_en);
314
-
342
+
315
343
  n = cr_contain(cr, ctg, st, en, &b, &m_b);
316
344
 
317
345
  if (n < 0)
@@ -319,10 +347,105 @@ cgranges_count_contain(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en)
319
347
  rb_raise(rb_eRuntimeError, "Error finding contained");
320
348
  return Qnil;
321
349
  }
350
+ free(b);
322
351
 
323
352
  return INT64_2NUM(n);
324
353
  }
325
354
 
355
+ static VALUE
356
+ cgranges_coverage(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en, int contain)
357
+ {
358
+ cgranges_t *cr = get_cganges(self);
359
+ char *ctg = NULL;
360
+ int32_t st1 = 0;
361
+ int32_t en1 = 0;
362
+
363
+ int64_t *b = NULL;
364
+ int64_t m_b = 0;
365
+ int64_t n = 0;
366
+ int64_t cov = 0, cov_st = 0, cov_en = 0;
367
+
368
+ if (!RTEST(rb_ivar_get(self, rb_intern("@indexed"))))
369
+ {
370
+ rb_raise(rb_eNoIndexError, "CGRanges not indexed");
371
+ return Qnil;
372
+ }
373
+
374
+ ctg = StringValueCStr(rb_ctg);
375
+ st1 = NUM2INT32(rb_st);
376
+ en1 = NUM2INT32(rb_en);
377
+
378
+ if (contain)
379
+ {
380
+ n = cr_contain(cr, ctg, st1, en1, &b, &m_b);
381
+ }
382
+ else
383
+ {
384
+ n = cr_overlap(cr, ctg, st1, en1, &b, &m_b);
385
+ }
386
+
387
+ if (n < 0)
388
+ {
389
+ rb_raise(rb_eRuntimeError, "Error finding overlaps");
390
+ return Qnil;
391
+ }
392
+
393
+ for (int64_t j = 0; j < n; j++)
394
+ {
395
+ cr_intv_t *r = &cr->r[b[j]];
396
+ int32_t st0 = cr_st(r), en0 = cr_en(r);
397
+ if (st0 < st1)
398
+ st0 = st1;
399
+ if (en0 > en1)
400
+ en0 = en1;
401
+ if (st0 > cov_en)
402
+ {
403
+ cov += cov_en - cov_st;
404
+ cov_st = st0, cov_en = en0;
405
+ }
406
+ else
407
+ cov_en = cov_en > en0 ? cov_en : en0;
408
+ }
409
+ cov += cov_en - cov_st;
410
+ free(b);
411
+
412
+ return rb_ary_new3(2, INT64_2NUM(cov), INT64_2NUM(n));
413
+ }
414
+
415
+ /* Calculate breadth of coverage. (Overlap)
416
+ * Same as coverage(contig, start, end, mode: overlap)
417
+ * @param [String] contig The contig name
418
+ * @param [Fixnum] start The start position of the interval.
419
+ * @param [Fixnum] end The end position of the interval.
420
+ * @return [Array] The breadth of coverage and the number of intervals.
421
+ * @see coverage
422
+ */
423
+
424
+ static VALUE
425
+ cgranges_coverage_overlap(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en)
426
+ {
427
+ VALUE result;
428
+ result = cgranges_coverage(self, rb_ctg, rb_st, rb_en, 0);
429
+ return result;
430
+ }
431
+
432
+ /* Calculate breadth of coverage. (Containment)
433
+ * same as coverage(contig, start, end, mode: contain)
434
+ * @param [String] contig The contig name
435
+ * @param [Fixnum] start The start position of the interval.
436
+ * @param [Fixnum] end The end position of the interval.
437
+ * @return [Array] The breadth of coverage and the number of intervals.
438
+ * @see coverage
439
+ */
440
+
441
+ static VALUE
442
+ cgranges_coverage_contain(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en)
443
+ {
444
+ VALUE result;
445
+ result = cgranges_coverage(self, rb_ctg, rb_st, rb_en, 1);
446
+ return result;
447
+ }
448
+
326
449
  void Init_cgranges(void)
327
450
  {
328
451
  rb_Bio = rb_define_module("Bio");
@@ -339,4 +462,6 @@ void Init_cgranges(void)
339
462
  rb_define_method(rb_CGRanges, "count_overlap", cgranges_count_overlap, 3);
340
463
  rb_define_method(rb_CGRanges, "contain", cgranges_contain, 3);
341
464
  rb_define_method(rb_CGRanges, "count_contain", cgranges_count_contain, 3);
342
- }
465
+ rb_define_method(rb_CGRanges, "coverage_overlap", cgranges_coverage_overlap, 3);
466
+ rb_define_method(rb_CGRanges, "coverage_contain", cgranges_coverage_contain, 3);
467
+ }
@@ -2,6 +2,6 @@
2
2
 
3
3
  module Bio
4
4
  class CGRanges
5
- VERSION = "0.0.0"
5
+ VERSION = "0.0.2"
6
6
  end
7
7
  end
data/lib/bio/cgranges.rb CHANGED
@@ -6,5 +6,25 @@ require_relative "cgranges/cgranges"
6
6
  module Bio
7
7
  # Reader for .2bit files (i.e., from UCSC genome browser)
8
8
  class CGRanges
9
+ # Calculate breadth of coverage.
10
+ # This is a wrapper method for `coverage_overlap` and `coverage_contain`.
11
+ # @param [String] contig The contig name
12
+ # @param [Fixnum] start The start position of the interval.
13
+ # @param [Fixnum] end The end position of the interval.
14
+ # @param [Symbol] mode :overlap or :contain (default: :overlap)
15
+ # @return [Array] The breadth of coverage and the number of intervals.
16
+ # @see coverage_overlap
17
+ # @see coverage_contain
18
+
19
+ def coverage(ctg, rb_st, rb_en, mode: :overlap)
20
+ case mode
21
+ when :overlap
22
+ coverage_overlap(ctg, rb_st, rb_en)
23
+ when :contain
24
+ coverage_contain(ctg, rb_st, rb_en)
25
+ else
26
+ raise ArgumentError, "unknown mode: #{mode}"
27
+ end
28
+ end
9
29
  end
10
30
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bio-cgranges
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.0
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - kojix2
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-05-13 00:00:00.000000000 Z
11
+ date: 2022-11-03 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Genomic interval overlap queries
14
14
  email:
@@ -31,7 +31,7 @@ files:
31
31
  - ext/bio/cgranges/extconf.rb
32
32
  - lib/bio/cgranges.rb
33
33
  - lib/bio/cgranges/version.rb
34
- homepage: https://github.com/ruby-on-bioc/bio-cgranges
34
+ homepage: https://github.com/kojix2/bio-cgranges
35
35
  licenses:
36
36
  - MIT
37
37
  metadata: {}