bio-cgranges 0.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE.txt +21 -0
- data/README.md +65 -0
- data/Rakefile +44 -0
- data/ext/bio/cgranges/cgranges/LICENSE.txt +23 -0
- data/ext/bio/cgranges/cgranges/README.md +133 -0
- data/ext/bio/cgranges/cgranges/cgranges.c +330 -0
- data/ext/bio/cgranges/cgranges/cgranges.h +87 -0
- data/ext/bio/cgranges/cgranges/khash.h +627 -0
- data/ext/bio/cgranges/cgranges.c +342 -0
- data/ext/bio/cgranges/cgranges.h +7 -0
- data/ext/bio/cgranges/extconf.rb +9 -0
- data/lib/bio/cgranges/version.rb +7 -0
- data/lib/bio/cgranges.rb +10 -0
- metadata +57 -0
@@ -0,0 +1,342 @@
|
|
1
|
+
#include "cgranges.h"
|
2
|
+
|
3
|
+
#define SIZEOF_INT32 4
|
4
|
+
#define SIZEOF_INT64 8
|
5
|
+
|
6
|
+
#if SIZEOF_SHORT == SIZEOF_INT32
|
7
|
+
#define NUM2UINT32 NUM2USHORT
|
8
|
+
#define NUM2INT32 NUM2SHORT
|
9
|
+
#define UINT32_2NUM USHORT2NUM
|
10
|
+
#define INT32_2NUM SHORT2NUM
|
11
|
+
#elif SIZEOF_INT == SIZEOF_INT32
|
12
|
+
#define NUM2UINT32 NUM2UINT
|
13
|
+
#define NUM2INT32 NUM2INT
|
14
|
+
#define UINT32_2NUM UINT2NUM
|
15
|
+
#define INT32_2NUM INT2NUM
|
16
|
+
#elif SIZEOF_LONG == SIZEOF_INT32
|
17
|
+
#define NUM2UINT32 NUM2ULONG
|
18
|
+
#define NUM2INT32 NUM2LONG
|
19
|
+
#define UINT32_2NUM ULONG2NUM
|
20
|
+
#define INT32_2NUM LONG2NUM
|
21
|
+
#else
|
22
|
+
#error "Neither int, long, nor short is the same size as int32_t"
|
23
|
+
#endif
|
24
|
+
|
25
|
+
#if SIZEOF_INT == SIZEOF_INT64
|
26
|
+
#define NUM2UINT64 NUM2UINT
|
27
|
+
#define NUM2INT64 NUM2INT
|
28
|
+
#define UINT64_2NUM UINT2NUM
|
29
|
+
#define INT64_2NUM INT2NUM
|
30
|
+
#elif SIZEOF_LONG == SIZEOF_INT64
|
31
|
+
#define NUM2UINT64 NUM2ULONG
|
32
|
+
#define NUM2INT64 NUM2LONG
|
33
|
+
#define UINT64_2NUM ULONG2NUM
|
34
|
+
#define INT64_2NUM LONG2NUM
|
35
|
+
#elif SIZEOF_LONGLONG == SIZEOF_INT64
|
36
|
+
#define NUM2UINT64 NUM2ULL
|
37
|
+
#define NUM2INT64 NUM2LL
|
38
|
+
#define UINT64_2NUM ULL2NUM
|
39
|
+
#define INT64_2NUM LL2NUM
|
40
|
+
#else
|
41
|
+
#error "Neither int, long, nor short is the same size as int64_t"
|
42
|
+
#endif
|
43
|
+
|
44
|
+
VALUE rb_Bio;
|
45
|
+
VALUE rb_CGRanges;
|
46
|
+
VALUE rb_eNoIndexError;
|
47
|
+
VALUE rb_eIndexedError;
|
48
|
+
|
49
|
+
static void cgranges_free(void *ptr);
|
50
|
+
static size_t cgranges_memsize(const void *ptr);
|
51
|
+
|
52
|
+
static const rb_data_type_t cgranges_type = {
|
53
|
+
"cgranges",
|
54
|
+
{
|
55
|
+
0,
|
56
|
+
cgranges_free,
|
57
|
+
cgranges_memsize,
|
58
|
+
},
|
59
|
+
0,
|
60
|
+
0,
|
61
|
+
RUBY_TYPED_FREE_IMMEDIATELY,
|
62
|
+
};
|
63
|
+
|
64
|
+
static void
|
65
|
+
cgranges_free(void *ptr)
|
66
|
+
{
|
67
|
+
if(ptr)
|
68
|
+
{
|
69
|
+
cr_destroy(ptr);
|
70
|
+
}
|
71
|
+
}
|
72
|
+
|
73
|
+
static size_t
|
74
|
+
cgranges_memsize(const void *ptr)
|
75
|
+
{
|
76
|
+
const cgranges_t *data = ptr;
|
77
|
+
|
78
|
+
return data ? sizeof(*data) : 0;
|
79
|
+
}
|
80
|
+
|
81
|
+
static cgranges_t *get_cganges(VALUE self)
|
82
|
+
{
|
83
|
+
cgranges_t *ptr = NULL;
|
84
|
+
TypedData_Get_Struct(self, cgranges_t, &cgranges_type, ptr);
|
85
|
+
|
86
|
+
return ptr;
|
87
|
+
}
|
88
|
+
|
89
|
+
static VALUE
|
90
|
+
cgranges_allocate(VALUE klass)
|
91
|
+
{
|
92
|
+
cgranges_t *ptr = NULL;
|
93
|
+
|
94
|
+
return TypedData_Wrap_Struct(klass, &cgranges_type, ptr);
|
95
|
+
}
|
96
|
+
|
97
|
+
static VALUE
|
98
|
+
cgranges_init(VALUE self)
|
99
|
+
{
|
100
|
+
cgranges_t *cr = NULL;
|
101
|
+
|
102
|
+
cr = cr_init();
|
103
|
+
DATA_PTR(self) = cr;
|
104
|
+
|
105
|
+
rb_ivar_set(self, rb_intern("@indexed"), Qfalse);
|
106
|
+
|
107
|
+
return self;
|
108
|
+
}
|
109
|
+
|
110
|
+
static VALUE
|
111
|
+
cgranges_add(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en, VALUE rb_label)
|
112
|
+
{
|
113
|
+
cgranges_t *cr = get_cganges(self);
|
114
|
+
cr_intv_t *intv = NULL;
|
115
|
+
char *ctg = NULL;
|
116
|
+
int32_t st = 0;
|
117
|
+
int32_t en = 0;
|
118
|
+
int32_t label = 0;
|
119
|
+
|
120
|
+
if (RTEST(rb_ivar_get(self, rb_intern("@indexed"))))
|
121
|
+
{
|
122
|
+
rb_raise(rb_eIndexedError, "Cannot add intervals to an indexed CGRanges");
|
123
|
+
return Qnil;
|
124
|
+
}
|
125
|
+
|
126
|
+
if (rb_ctg != Qnil)
|
127
|
+
{
|
128
|
+
ctg = StringValueCStr(rb_ctg);
|
129
|
+
}
|
130
|
+
|
131
|
+
if (rb_st != Qnil)
|
132
|
+
{
|
133
|
+
st = NUM2INT32(rb_st);
|
134
|
+
}
|
135
|
+
|
136
|
+
if (rb_en != Qnil)
|
137
|
+
{
|
138
|
+
en = NUM2INT32(rb_en);
|
139
|
+
}
|
140
|
+
|
141
|
+
if (rb_label != Qnil)
|
142
|
+
{
|
143
|
+
label = NUM2INT32(rb_label);
|
144
|
+
}
|
145
|
+
|
146
|
+
intv = cr_add(cr, ctg, st, en, label);
|
147
|
+
|
148
|
+
if (!intv)
|
149
|
+
{
|
150
|
+
rb_raise(rb_eRuntimeError, "Error adding interval");
|
151
|
+
return Qnil;
|
152
|
+
}
|
153
|
+
|
154
|
+
return self;
|
155
|
+
}
|
156
|
+
|
157
|
+
static VALUE
|
158
|
+
cgranges_index(VALUE self)
|
159
|
+
{
|
160
|
+
if(RTEST(rb_ivar_get(self, rb_intern("@indexed"))))
|
161
|
+
{
|
162
|
+
rb_raise(rb_eIndexedError, "CGRanges already indexed");
|
163
|
+
return Qnil;
|
164
|
+
}
|
165
|
+
|
166
|
+
cgranges_t *cr = get_cganges(self);
|
167
|
+
cr_index(cr);
|
168
|
+
|
169
|
+
rb_ivar_set(self, rb_intern("@indexed"), Qtrue);
|
170
|
+
|
171
|
+
return self;
|
172
|
+
}
|
173
|
+
|
174
|
+
static VALUE
|
175
|
+
cgranges_overlap(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en)
|
176
|
+
{
|
177
|
+
cgranges_t *cr = get_cganges(self);
|
178
|
+
char *ctg = NULL;
|
179
|
+
int32_t st = 0;
|
180
|
+
int32_t en = 0;
|
181
|
+
|
182
|
+
int64_t *b = NULL;
|
183
|
+
int64_t m_b = 0;
|
184
|
+
int64_t n = 0;
|
185
|
+
|
186
|
+
if (!RTEST(rb_ivar_get(self, rb_intern("@indexed"))))
|
187
|
+
{
|
188
|
+
rb_raise(rb_eNoIndexError, "CGRanges not indexed");
|
189
|
+
return Qnil;
|
190
|
+
}
|
191
|
+
|
192
|
+
ctg = StringValueCStr(rb_ctg);
|
193
|
+
st = NUM2INT32(rb_st);
|
194
|
+
en = NUM2INT32(rb_en);
|
195
|
+
|
196
|
+
n = cr_overlap(cr, ctg, st, en, &b, &m_b);
|
197
|
+
|
198
|
+
if (n < 0)
|
199
|
+
{
|
200
|
+
rb_raise(rb_eRuntimeError, "Error finding overlaps");
|
201
|
+
return Qnil;
|
202
|
+
}
|
203
|
+
|
204
|
+
VALUE result = rb_ary_new2(n);
|
205
|
+
|
206
|
+
for (int64_t i = 0; i < n; i++)
|
207
|
+
{
|
208
|
+
VALUE rb_intv = rb_ary_new3(
|
209
|
+
4, rb_ctg, INT32_2NUM(cr_start(cr, b[i])), INT32_2NUM(cr_end(cr, b[i])), INT32_2NUM(cr_label(cr, b[i]))
|
210
|
+
);
|
211
|
+
rb_ary_push(result, rb_intv);
|
212
|
+
}
|
213
|
+
|
214
|
+
return result;
|
215
|
+
}
|
216
|
+
|
217
|
+
static VALUE
|
218
|
+
cgranges_count_overlap(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en)
|
219
|
+
{
|
220
|
+
cgranges_t *cr = get_cganges(self);
|
221
|
+
char *ctg = NULL;
|
222
|
+
int32_t st = 0;
|
223
|
+
int32_t en = 0;
|
224
|
+
|
225
|
+
int64_t *b = NULL;
|
226
|
+
int64_t m_b = 0;
|
227
|
+
int64_t n = 0;
|
228
|
+
|
229
|
+
if (!RTEST(rb_ivar_get(self, rb_intern("@indexed"))))
|
230
|
+
{
|
231
|
+
rb_raise(rb_eNoIndexError, "CGRanges not indexed");
|
232
|
+
return Qnil;
|
233
|
+
}
|
234
|
+
|
235
|
+
ctg = StringValueCStr(rb_ctg);
|
236
|
+
st = NUM2INT32(rb_st);
|
237
|
+
en = NUM2INT32(rb_en);
|
238
|
+
|
239
|
+
n = cr_overlap(cr, ctg, st, en, &b, &m_b);
|
240
|
+
|
241
|
+
if (n < 0)
|
242
|
+
{
|
243
|
+
rb_raise(rb_eRuntimeError, "Error finding overlaps");
|
244
|
+
return Qnil;
|
245
|
+
}
|
246
|
+
|
247
|
+
return INT64_2NUM(n);
|
248
|
+
}
|
249
|
+
|
250
|
+
static VALUE
|
251
|
+
cgranges_contain(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en)
|
252
|
+
{
|
253
|
+
cgranges_t *cr = get_cganges(self);
|
254
|
+
char *ctg = NULL;
|
255
|
+
int32_t st = 0;
|
256
|
+
int32_t en = 0;
|
257
|
+
|
258
|
+
int64_t *b = NULL;
|
259
|
+
int64_t m_b = 0;
|
260
|
+
int64_t n = 0;
|
261
|
+
|
262
|
+
if (!RTEST(rb_ivar_get(self, rb_intern("@indexed"))))
|
263
|
+
{
|
264
|
+
rb_raise(rb_eNoIndexError, "CGRanges not indexed");
|
265
|
+
return Qnil;
|
266
|
+
}
|
267
|
+
|
268
|
+
ctg = StringValueCStr(rb_ctg);
|
269
|
+
st = NUM2INT32(rb_st);
|
270
|
+
en = NUM2INT32(rb_en);
|
271
|
+
|
272
|
+
n = cr_contain(cr, ctg, st, en, &b, &m_b);
|
273
|
+
|
274
|
+
if (n < 0)
|
275
|
+
{
|
276
|
+
rb_raise(rb_eRuntimeError, "Error finding contained");
|
277
|
+
return Qnil;
|
278
|
+
}
|
279
|
+
|
280
|
+
VALUE result = rb_ary_new2(n);
|
281
|
+
|
282
|
+
for (int64_t i = 0; i < n; i++)
|
283
|
+
{
|
284
|
+
VALUE rb_intv = rb_ary_new3(
|
285
|
+
4, rb_ctg, INT32_2NUM(cr_start(cr, b[i])), INT32_2NUM(cr_end(cr, b[i])), INT32_2NUM(cr_label(cr, b[i]))
|
286
|
+
);
|
287
|
+
rb_ary_push(result, rb_intv);
|
288
|
+
}
|
289
|
+
|
290
|
+
return result;
|
291
|
+
}
|
292
|
+
|
293
|
+
static VALUE
|
294
|
+
cgranges_count_contain(VALUE self, VALUE rb_ctg, VALUE rb_st, VALUE rb_en)
|
295
|
+
{
|
296
|
+
cgranges_t *cr = get_cganges(self);
|
297
|
+
char *ctg = NULL;
|
298
|
+
int32_t st = 0;
|
299
|
+
int32_t en = 0;
|
300
|
+
|
301
|
+
int64_t *b = NULL;
|
302
|
+
int64_t m_b = 0;
|
303
|
+
int64_t n = 0;
|
304
|
+
|
305
|
+
if (!RTEST(rb_ivar_get(self, rb_intern("@indexed"))))
|
306
|
+
{
|
307
|
+
rb_raise(rb_eNoIndexError, "CGRanges not indexed");
|
308
|
+
return Qnil;
|
309
|
+
}
|
310
|
+
|
311
|
+
ctg = StringValueCStr(rb_ctg);
|
312
|
+
st = NUM2INT32(rb_st);
|
313
|
+
en = NUM2INT32(rb_en);
|
314
|
+
|
315
|
+
n = cr_contain(cr, ctg, st, en, &b, &m_b);
|
316
|
+
|
317
|
+
if (n < 0)
|
318
|
+
{
|
319
|
+
rb_raise(rb_eRuntimeError, "Error finding contained");
|
320
|
+
return Qnil;
|
321
|
+
}
|
322
|
+
|
323
|
+
return INT64_2NUM(n);
|
324
|
+
}
|
325
|
+
|
326
|
+
void Init_cgranges(void)
|
327
|
+
{
|
328
|
+
rb_Bio = rb_define_module("Bio");
|
329
|
+
rb_CGRanges = rb_define_class_under(rb_Bio, "CGRanges", rb_cObject);
|
330
|
+
rb_eNoIndexError = rb_define_class_under(rb_CGRanges, "NoIndexError", rb_eStandardError);
|
331
|
+
rb_eIndexedError = rb_define_class_under(rb_CGRanges, "IndexedError", rb_eStandardError);
|
332
|
+
|
333
|
+
rb_define_alloc_func(rb_CGRanges, cgranges_allocate);
|
334
|
+
|
335
|
+
rb_define_method(rb_CGRanges, "initialize", cgranges_init, 0);
|
336
|
+
rb_define_method(rb_CGRanges, "add", cgranges_add, 4);
|
337
|
+
rb_define_method(rb_CGRanges, "index", cgranges_index, 0);
|
338
|
+
rb_define_method(rb_CGRanges, "overlap", cgranges_overlap, 3);
|
339
|
+
rb_define_method(rb_CGRanges, "count_overlap", cgranges_count_overlap, 3);
|
340
|
+
rb_define_method(rb_CGRanges, "contain", cgranges_contain, 3);
|
341
|
+
rb_define_method(rb_CGRanges, "count_contain", cgranges_count_contain, 3);
|
342
|
+
}
|
data/lib/bio/cgranges.rb
ADDED
metadata
ADDED
@@ -0,0 +1,57 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: bio-cgranges
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- kojix2
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2022-05-13 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
13
|
+
description: Genomic interval overlap queries
|
14
|
+
email:
|
15
|
+
- 2xijok@gmail.com
|
16
|
+
executables: []
|
17
|
+
extensions:
|
18
|
+
- ext/bio/cgranges/extconf.rb
|
19
|
+
extra_rdoc_files: []
|
20
|
+
files:
|
21
|
+
- LICENSE.txt
|
22
|
+
- README.md
|
23
|
+
- Rakefile
|
24
|
+
- ext/bio/cgranges/cgranges.c
|
25
|
+
- ext/bio/cgranges/cgranges.h
|
26
|
+
- ext/bio/cgranges/cgranges/LICENSE.txt
|
27
|
+
- ext/bio/cgranges/cgranges/README.md
|
28
|
+
- ext/bio/cgranges/cgranges/cgranges.c
|
29
|
+
- ext/bio/cgranges/cgranges/cgranges.h
|
30
|
+
- ext/bio/cgranges/cgranges/khash.h
|
31
|
+
- ext/bio/cgranges/extconf.rb
|
32
|
+
- lib/bio/cgranges.rb
|
33
|
+
- lib/bio/cgranges/version.rb
|
34
|
+
homepage: https://github.com/ruby-on-bioc/bio-cgranges
|
35
|
+
licenses:
|
36
|
+
- MIT
|
37
|
+
metadata: {}
|
38
|
+
post_install_message:
|
39
|
+
rdoc_options: []
|
40
|
+
require_paths:
|
41
|
+
- lib
|
42
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
43
|
+
requirements:
|
44
|
+
- - ">="
|
45
|
+
- !ruby/object:Gem::Version
|
46
|
+
version: 2.7.0
|
47
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
48
|
+
requirements:
|
49
|
+
- - ">="
|
50
|
+
- !ruby/object:Gem::Version
|
51
|
+
version: '0'
|
52
|
+
requirements: []
|
53
|
+
rubygems_version: 3.3.7
|
54
|
+
signing_key:
|
55
|
+
specification_version: 4
|
56
|
+
summary: Ruby bindings for lh3/cgranges
|
57
|
+
test_files: []
|