bio-cgranges 0.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,342 @@
1
+ #include "cgranges.h"
2
+
3
+ #define SIZEOF_INT32 4
4
+ #define SIZEOF_INT64 8
5
+
6
+ #if SIZEOF_SHORT == SIZEOF_INT32
7
+ #define NUM2UINT32 NUM2USHORT
8
+ #define NUM2INT32 NUM2SHORT
9
+ #define UINT32_2NUM USHORT2NUM
10
+ #define INT32_2NUM SHORT2NUM
11
+ #elif SIZEOF_INT == SIZEOF_INT32
12
+ #define NUM2UINT32 NUM2UINT
13
+ #define NUM2INT32 NUM2INT
14
+ #define UINT32_2NUM UINT2NUM
15
+ #define INT32_2NUM INT2NUM
16
+ #elif SIZEOF_LONG == SIZEOF_INT32
17
+ #define NUM2UINT32 NUM2ULONG
18
+ #define NUM2INT32 NUM2LONG
19
+ #define UINT32_2NUM ULONG2NUM
20
+ #define INT32_2NUM LONG2NUM
21
+ #else
22
+ #error "Neither int, long, nor short is the same size as int32_t"
23
+ #endif
24
+
25
+ #if SIZEOF_INT == SIZEOF_INT64
26
+ #define NUM2UINT64 NUM2UINT
27
+ #define NUM2INT64 NUM2INT
28
+ #define UINT64_2NUM UINT2NUM
29
+ #define INT64_2NUM INT2NUM
30
+ #elif SIZEOF_LONG == SIZEOF_INT64
31
+ #define NUM2UINT64 NUM2ULONG
32
+ #define NUM2INT64 NUM2LONG
33
+ #define UINT64_2NUM ULONG2NUM
34
+ #define INT64_2NUM LONG2NUM
35
+ #elif SIZEOF_LONGLONG == SIZEOF_INT64
36
+ #define NUM2UINT64 NUM2ULL
37
+ #define NUM2INT64 NUM2LL
38
+ #define UINT64_2NUM ULL2NUM
39
+ #define INT64_2NUM LL2NUM
40
+ #else
41
+ #error "Neither int, long, nor short is the same size as int64_t"
42
+ #endif
43
+
44
+ VALUE rb_Bio;
45
+ VALUE rb_CGRanges;
46
+ VALUE rb_eNoIndexError;
47
+ VALUE rb_eIndexedError;
48
+
49
+ static void cgranges_free(void *ptr);
50
+ static size_t cgranges_memsize(const void *ptr);
51
+
52
+ static const rb_data_type_t cgranges_type = {
53
+ "cgranges",
54
+ {
55
+ 0,
56
+ cgranges_free,
57
+ cgranges_memsize,
58
+ },
59
+ 0,
60
+ 0,
61
+ RUBY_TYPED_FREE_IMMEDIATELY,
62
+ };
63
+
64
+ static void
65
+ cgranges_free(void *ptr)
66
+ {
67
+ if(ptr)
68
+ {
69
+ cr_destroy(ptr);
70
+ }
71
+ }
72
+
73
+ static size_t
74
+ cgranges_memsize(const void *ptr)
75
+ {
76
+ const cgranges_t *data = ptr;
77
+
78
+ return data ? sizeof(*data) : 0;
79
+ }
80
+
81
+ static cgranges_t *get_cganges(VALUE self)
82
+ {
83
+ cgranges_t *ptr = NULL;
84
+ TypedData_Get_Struct(self, cgranges_t, &cgranges_type, ptr);
85
+
86
+ return ptr;
87
+ }
88
+
89
+ static VALUE
90
+ cgranges_allocate(VALUE klass)
91
+ {
92
+ cgranges_t *ptr = NULL;
93
+
94
+ return TypedData_Wrap_Struct(klass, &cgranges_type, ptr);
95
+ }
96
+
97
+ static VALUE
98
+ cgranges_init(VALUE self)
99
+ {
100
+ cgranges_t *cr = NULL;
101
+
102
+ cr = cr_init();
103
+ DATA_PTR(self) = cr;
104
+
105
+ rb_ivar_set(self, rb_intern("@indexed"), Qfalse);
106
+
107
+ return self;
108
+ }
109
+
110
+ static VALUE
111
+ cgranges_add(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en, VALUE rb_label)
112
+ {
113
+ cgranges_t *cr = get_cganges(self);
114
+ cr_intv_t *intv = NULL;
115
+ char *ctg = NULL;
116
+ int32_t st = 0;
117
+ int32_t en = 0;
118
+ int32_t label = 0;
119
+
120
+ if (RTEST(rb_ivar_get(self, rb_intern("@indexed"))))
121
+ {
122
+ rb_raise(rb_eIndexedError, "Cannot add intervals to an indexed CGRanges");
123
+ return Qnil;
124
+ }
125
+
126
+ if (rb_ctg != Qnil)
127
+ {
128
+ ctg = StringValueCStr(rb_ctg);
129
+ }
130
+
131
+ if (rb_st != Qnil)
132
+ {
133
+ st = NUM2INT32(rb_st);
134
+ }
135
+
136
+ if (rb_en != Qnil)
137
+ {
138
+ en = NUM2INT32(rb_en);
139
+ }
140
+
141
+ if (rb_label != Qnil)
142
+ {
143
+ label = NUM2INT32(rb_label);
144
+ }
145
+
146
+ intv = cr_add(cr, ctg, st, en, label);
147
+
148
+ if (!intv)
149
+ {
150
+ rb_raise(rb_eRuntimeError, "Error adding interval");
151
+ return Qnil;
152
+ }
153
+
154
+ return self;
155
+ }
156
+
157
+ static VALUE
158
+ cgranges_index(VALUE self)
159
+ {
160
+ if(RTEST(rb_ivar_get(self, rb_intern("@indexed"))))
161
+ {
162
+ rb_raise(rb_eIndexedError, "CGRanges already indexed");
163
+ return Qnil;
164
+ }
165
+
166
+ cgranges_t *cr = get_cganges(self);
167
+ cr_index(cr);
168
+
169
+ rb_ivar_set(self, rb_intern("@indexed"), Qtrue);
170
+
171
+ return self;
172
+ }
173
+
174
+ static VALUE
175
+ cgranges_overlap(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en)
176
+ {
177
+ cgranges_t *cr = get_cganges(self);
178
+ char *ctg = NULL;
179
+ int32_t st = 0;
180
+ int32_t en = 0;
181
+
182
+ int64_t *b = NULL;
183
+ int64_t m_b = 0;
184
+ int64_t n = 0;
185
+
186
+ if (!RTEST(rb_ivar_get(self, rb_intern("@indexed"))))
187
+ {
188
+ rb_raise(rb_eNoIndexError, "CGRanges not indexed");
189
+ return Qnil;
190
+ }
191
+
192
+ ctg = StringValueCStr(rb_ctg);
193
+ st = NUM2INT32(rb_st);
194
+ en = NUM2INT32(rb_en);
195
+
196
+ n = cr_overlap(cr, ctg, st, en, &b, &m_b);
197
+
198
+ if (n < 0)
199
+ {
200
+ rb_raise(rb_eRuntimeError, "Error finding overlaps");
201
+ return Qnil;
202
+ }
203
+
204
+ VALUE result = rb_ary_new2(n);
205
+
206
+ for (int64_t i = 0; i < n; i++)
207
+ {
208
+ VALUE rb_intv = rb_ary_new3(
209
+ 4, rb_ctg, INT32_2NUM(cr_start(cr, b[i])), INT32_2NUM(cr_end(cr, b[i])), INT32_2NUM(cr_label(cr, b[i]))
210
+ );
211
+ rb_ary_push(result, rb_intv);
212
+ }
213
+
214
+ return result;
215
+ }
216
+
217
+ static VALUE
218
+ cgranges_count_overlap(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en)
219
+ {
220
+ cgranges_t *cr = get_cganges(self);
221
+ char *ctg = NULL;
222
+ int32_t st = 0;
223
+ int32_t en = 0;
224
+
225
+ int64_t *b = NULL;
226
+ int64_t m_b = 0;
227
+ int64_t n = 0;
228
+
229
+ if (!RTEST(rb_ivar_get(self, rb_intern("@indexed"))))
230
+ {
231
+ rb_raise(rb_eNoIndexError, "CGRanges not indexed");
232
+ return Qnil;
233
+ }
234
+
235
+ ctg = StringValueCStr(rb_ctg);
236
+ st = NUM2INT32(rb_st);
237
+ en = NUM2INT32(rb_en);
238
+
239
+ n = cr_overlap(cr, ctg, st, en, &b, &m_b);
240
+
241
+ if (n < 0)
242
+ {
243
+ rb_raise(rb_eRuntimeError, "Error finding overlaps");
244
+ return Qnil;
245
+ }
246
+
247
+ return INT64_2NUM(n);
248
+ }
249
+
250
+ static VALUE
251
+ cgranges_contain(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en)
252
+ {
253
+ cgranges_t *cr = get_cganges(self);
254
+ char *ctg = NULL;
255
+ int32_t st = 0;
256
+ int32_t en = 0;
257
+
258
+ int64_t *b = NULL;
259
+ int64_t m_b = 0;
260
+ int64_t n = 0;
261
+
262
+ if (!RTEST(rb_ivar_get(self, rb_intern("@indexed"))))
263
+ {
264
+ rb_raise(rb_eNoIndexError, "CGRanges not indexed");
265
+ return Qnil;
266
+ }
267
+
268
+ ctg = StringValueCStr(rb_ctg);
269
+ st = NUM2INT32(rb_st);
270
+ en = NUM2INT32(rb_en);
271
+
272
+ n = cr_contain(cr, ctg, st, en, &b, &m_b);
273
+
274
+ if (n < 0)
275
+ {
276
+ rb_raise(rb_eRuntimeError, "Error finding contained");
277
+ return Qnil;
278
+ }
279
+
280
+ VALUE result = rb_ary_new2(n);
281
+
282
+ for (int64_t i = 0; i < n; i++)
283
+ {
284
+ VALUE rb_intv = rb_ary_new3(
285
+ 4, rb_ctg, INT32_2NUM(cr_start(cr, b[i])), INT32_2NUM(cr_end(cr, b[i])), INT32_2NUM(cr_label(cr, b[i]))
286
+ );
287
+ rb_ary_push(result, rb_intv);
288
+ }
289
+
290
+ return result;
291
+ }
292
+
293
+ static VALUE
294
+ cgranges_count_contain(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en)
295
+ {
296
+ cgranges_t *cr = get_cganges(self);
297
+ char *ctg = NULL;
298
+ int32_t st = 0;
299
+ int32_t en = 0;
300
+
301
+ int64_t *b = NULL;
302
+ int64_t m_b = 0;
303
+ int64_t n = 0;
304
+
305
+ if (!RTEST(rb_ivar_get(self, rb_intern("@indexed"))))
306
+ {
307
+ rb_raise(rb_eNoIndexError, "CGRanges not indexed");
308
+ return Qnil;
309
+ }
310
+
311
+ ctg = StringValueCStr(rb_ctg);
312
+ st = NUM2INT32(rb_st);
313
+ en = NUM2INT32(rb_en);
314
+
315
+ n = cr_contain(cr, ctg, st, en, &b, &m_b);
316
+
317
+ if (n < 0)
318
+ {
319
+ rb_raise(rb_eRuntimeError, "Error finding contained");
320
+ return Qnil;
321
+ }
322
+
323
+ return INT64_2NUM(n);
324
+ }
325
+
326
+ void Init_cgranges(void)
327
+ {
328
+ rb_Bio = rb_define_module("Bio");
329
+ rb_CGRanges = rb_define_class_under(rb_Bio, "CGRanges", rb_cObject);
330
+ rb_eNoIndexError = rb_define_class_under(rb_CGRanges, "NoIndexError", rb_eStandardError);
331
+ rb_eIndexedError = rb_define_class_under(rb_CGRanges, "IndexedError", rb_eStandardError);
332
+
333
+ rb_define_alloc_func(rb_CGRanges, cgranges_allocate);
334
+
335
+ rb_define_method(rb_CGRanges, "initialize", cgranges_init, 0);
336
+ rb_define_method(rb_CGRanges, "add", cgranges_add, 4);
337
+ rb_define_method(rb_CGRanges, "index", cgranges_index, 0);
338
+ rb_define_method(rb_CGRanges, "overlap", cgranges_overlap, 3);
339
+ rb_define_method(rb_CGRanges, "count_overlap", cgranges_count_overlap, 3);
340
+ rb_define_method(rb_CGRanges, "contain", cgranges_contain, 3);
341
+ rb_define_method(rb_CGRanges, "count_contain", cgranges_count_contain, 3);
342
+ }
@@ -0,0 +1,7 @@
1
+ #ifndef BIO_CGRanges_H
2
+ #define BIO_CGRanges_H 1
3
+
4
+ #include "ruby.h"
5
+ #include "cgranges/cgranges.h"
6
+
7
+ #endif /* BIO_CGRanges_H */
@@ -0,0 +1,9 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "mkmf"
4
+
5
+ $objs = Dir.glob(["{.,cgranges}/*.c"], base: __dir__)
6
+ .map { |f| File.expand_path(f, __dir__) }
7
+ .map { |f| f.sub(/\.c$/, ".o") }
8
+
9
+ create_makefile("bio/cgranges/cgranges")
@@ -0,0 +1,7 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Bio
4
+ class CGRanges
5
+ VERSION = "0.0.0"
6
+ end
7
+ end
@@ -0,0 +1,10 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "cgranges/version"
4
+ require_relative "cgranges/cgranges"
5
+
6
+ module Bio
7
+ # Reader for .2bit files (i.e., from UCSC genome browser)
8
+ class CGRanges
9
+ end
10
+ end
metadata ADDED
@@ -0,0 +1,57 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: bio-cgranges
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.0
5
+ platform: ruby
6
+ authors:
7
+ - kojix2
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2022-05-13 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: Genomic interval overlap queries
14
+ email:
15
+ - 2xijok@gmail.com
16
+ executables: []
17
+ extensions:
18
+ - ext/bio/cgranges/extconf.rb
19
+ extra_rdoc_files: []
20
+ files:
21
+ - LICENSE.txt
22
+ - README.md
23
+ - Rakefile
24
+ - ext/bio/cgranges/cgranges.c
25
+ - ext/bio/cgranges/cgranges.h
26
+ - ext/bio/cgranges/cgranges/LICENSE.txt
27
+ - ext/bio/cgranges/cgranges/README.md
28
+ - ext/bio/cgranges/cgranges/cgranges.c
29
+ - ext/bio/cgranges/cgranges/cgranges.h
30
+ - ext/bio/cgranges/cgranges/khash.h
31
+ - ext/bio/cgranges/extconf.rb
32
+ - lib/bio/cgranges.rb
33
+ - lib/bio/cgranges/version.rb
34
+ homepage: https://github.com/ruby-on-bioc/bio-cgranges
35
+ licenses:
36
+ - MIT
37
+ metadata: {}
38
+ post_install_message:
39
+ rdoc_options: []
40
+ require_paths:
41
+ - lib
42
+ required_ruby_version: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - ">="
45
+ - !ruby/object:Gem::Version
46
+ version: 2.7.0
47
+ required_rubygems_version: !ruby/object:Gem::Requirement
48
+ requirements:
49
+ - - ">="
50
+ - !ruby/object:Gem::Version
51
+ version: '0'
52
+ requirements: []
53
+ rubygems_version: 3.3.7
54
+ signing_key:
55
+ specification_version: 4
56
+ summary: Ruby bindings for lh3/cgranges
57
+ test_files: []