edlib 0.0.5 → 0.0.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +1 -0
- data/ext/edlib/edlibext.c +101 -98
- data/ext/edlib/edlibext.h +15 -0
- data/ext/edlib/extconf.rb +2 -0
- data/lib/edlib/version.rb +3 -1
- data/lib/edlib.rb +65 -0
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 72ca7381d858e17e8ba5e2641d072cb4f391410b97ef9138504efa6851020373
|
4
|
+
data.tar.gz: 180e15f4a09149af7031536817a635be580d55afd25896341cb44cf54a9902be
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5f53003d5e12d9d55b40201f435a278b4a99d6e01569cf0f5e96a263822058257d3dd1b65c7c0f48d39a92177c271131d7752bf982c68893316e5dccbdeefc9f
|
7
|
+
data.tar.gz: 27e1ec9237c5f86fe8775100dbda7e2b9851071a9043cace94b927a65e372afa0b7cbbca741d51266b1f2a0493917eeaf1b4b60da92600def529b2e4ac724edb
|
data/README.md
CHANGED
@@ -2,6 +2,7 @@
|
|
2
2
|
|
3
3
|
[![Gem Version](https://badge.fury.io/rb/edlib.svg)](https://badge.fury.io/rb/edlib)
|
4
4
|
[![test](https://github.com/kojix2/ruby-edlib/actions/workflows/ci.yml/badge.svg)](https://github.com/kojix2/ruby-edlib/actions/workflows/ci.yml)
|
5
|
+
[![DOI](https://zenodo.org/badge/559318048.svg)](https://zenodo.org/badge/latestdoi/559318048)
|
5
6
|
|
6
7
|
[Edlib](https://github.com/Martinsos/edlib) - A lightweight and super fast C/C++ library for sequence alignment using edit distance
|
7
8
|
|
data/ext/edlib/edlibext.c
CHANGED
@@ -1,70 +1,98 @@
|
|
1
1
|
#include "ruby.h"
|
2
|
-
#include "
|
2
|
+
#include "edlibext.h"
|
3
|
+
|
4
|
+
#define ALIGNER_GET_(name) \
|
5
|
+
static VALUE \
|
6
|
+
aligner_get_##name(VALUE self) \
|
7
|
+
{ \
|
8
|
+
EdlibAlignConfig *config = aligner_get_config(self); \
|
9
|
+
return get_##name(config); \
|
10
|
+
}
|
11
|
+
|
12
|
+
#define ALIGNER_SET_(name) \
|
13
|
+
static VALUE \
|
14
|
+
aligner_set_##name(VALUE self, VALUE value) \
|
15
|
+
{ \
|
16
|
+
EdlibAlignConfig *config = aligner_get_config(self); \
|
17
|
+
return set_##name(config, value); \
|
18
|
+
}
|
3
19
|
|
4
20
|
VALUE mEdlib;
|
5
21
|
VALUE cAligner;
|
6
|
-
EdlibEqualityPair *eqpairs;
|
7
22
|
|
8
|
-
|
9
|
-
|
23
|
+
// Aligner class
|
24
|
+
|
25
|
+
static size_t aligner_config_memsize(const void *ptr);
|
26
|
+
static void aligner_config_free(void *ptr);
|
10
27
|
|
11
28
|
static const rb_data_type_t config_type = {
|
12
|
-
.wrap_struct_name = "
|
29
|
+
.wrap_struct_name = "RbAlignConfig",
|
13
30
|
.function = {
|
14
|
-
.dfree =
|
15
|
-
.dsize =
|
31
|
+
.dfree = aligner_config_free,
|
32
|
+
.dsize = aligner_config_memsize,
|
16
33
|
},
|
17
34
|
.flags = RUBY_TYPED_FREE_IMMEDIATELY,
|
18
35
|
};
|
19
36
|
|
20
37
|
static VALUE
|
21
|
-
|
38
|
+
aligner_config_allocate(VALUE klass)
|
22
39
|
{
|
23
|
-
|
24
|
-
|
25
|
-
|
40
|
+
RbAlignConfig *aligner_config;
|
41
|
+
|
42
|
+
VALUE obj = TypedData_Make_Struct(klass, RbAlignConfig, &config_type, aligner_config);
|
43
|
+
aligner_config->config = (EdlibAlignConfig *)malloc(sizeof(EdlibAlignConfig));
|
44
|
+
aligner_config->equalityPairs = NULL;
|
26
45
|
return obj;
|
27
46
|
}
|
28
47
|
|
29
48
|
static void
|
30
|
-
|
49
|
+
aligner_config_free(void *ptr)
|
31
50
|
{
|
32
|
-
|
51
|
+
RbAlignConfig *aligner_config = ptr;
|
52
|
+
if (aligner_config->config != NULL)
|
33
53
|
{
|
34
|
-
free(
|
35
|
-
|
54
|
+
free(aligner_config->config);
|
55
|
+
}
|
56
|
+
if (aligner_config->equalityPairs != NULL)
|
57
|
+
{
|
58
|
+
free(aligner_config->equalityPairs);
|
36
59
|
}
|
37
|
-
|
60
|
+
|
61
|
+
free(ptr);
|
38
62
|
}
|
39
63
|
|
40
64
|
static size_t
|
41
|
-
|
65
|
+
aligner_config_memsize(const void *ptr)
|
42
66
|
{
|
43
|
-
const
|
44
|
-
return sizeof(ptr) + 2 * sizeof(char) *
|
67
|
+
const RbAlignConfig *aligner_config = ptr;
|
68
|
+
return sizeof(ptr) + sizeof(aligner_config->config) + 2 * sizeof(char) * aligner_config->config->additionalEqualitiesLength;
|
45
69
|
}
|
46
70
|
|
47
71
|
static EdlibAlignConfig *
|
48
|
-
|
72
|
+
aligner_get_config(VALUE self)
|
49
73
|
{
|
50
|
-
|
51
|
-
TypedData_Get_Struct(self,
|
74
|
+
RbAlignConfig *aligner_config = NULL;
|
75
|
+
TypedData_Get_Struct(self, RbAlignConfig, &config_type, aligner_config);
|
76
|
+
return aligner_config->config;
|
77
|
+
}
|
52
78
|
|
53
|
-
|
79
|
+
static EdlibEqualityPair *
|
80
|
+
aligner_get_equalityPairs(VALUE self)
|
81
|
+
{
|
82
|
+
RbAlignConfig *aligner_config = NULL;
|
83
|
+
TypedData_Get_Struct(self, RbAlignConfig, &config_type, aligner_config);
|
84
|
+
return aligner_config->equalityPairs;
|
54
85
|
}
|
55
86
|
|
87
|
+
// Config
|
88
|
+
|
56
89
|
static VALUE
|
57
90
|
get_k(EdlibAlignConfig *config)
|
58
91
|
{
|
59
92
|
return INT2NUM(config->k);
|
60
93
|
}
|
61
94
|
|
62
|
-
|
63
|
-
aligner_get_k(VALUE self)
|
64
|
-
{
|
65
|
-
EdlibAlignConfig *config = get_config(self);
|
66
|
-
return get_k(config);
|
67
|
-
}
|
95
|
+
ALIGNER_GET_(k)
|
68
96
|
|
69
97
|
static VALUE
|
70
98
|
set_k(EdlibAlignConfig *config, VALUE k)
|
@@ -73,35 +101,25 @@ set_k(EdlibAlignConfig *config, VALUE k)
|
|
73
101
|
return k;
|
74
102
|
}
|
75
103
|
|
76
|
-
|
77
|
-
aligner_set_k(VALUE self, VALUE k)
|
78
|
-
{
|
79
|
-
EdlibAlignConfig *config = get_config(self);
|
80
|
-
return set_k(config, k);
|
81
|
-
}
|
104
|
+
ALIGNER_SET_(k)
|
82
105
|
|
83
106
|
static VALUE
|
84
107
|
get_mode(EdlibAlignConfig *config)
|
85
108
|
{
|
86
109
|
switch (config->mode)
|
87
110
|
{
|
88
|
-
case
|
111
|
+
case EDLIB_MODE_NW:
|
89
112
|
return rb_str_new2("NW");
|
90
|
-
case
|
113
|
+
case EDLIB_MODE_SHW:
|
91
114
|
return rb_str_new2("SHW");
|
92
|
-
case
|
115
|
+
case EDLIB_MODE_HW:
|
93
116
|
return rb_str_new2("HW");
|
94
117
|
default:
|
95
118
|
return Qnil;
|
96
119
|
}
|
97
120
|
}
|
98
121
|
|
99
|
-
|
100
|
-
aligner_get_mode(VALUE self)
|
101
|
-
{
|
102
|
-
EdlibAlignConfig *config = get_config(self);
|
103
|
-
return get_mode(config);
|
104
|
-
}
|
122
|
+
ALIGNER_GET_(mode)
|
105
123
|
|
106
124
|
static VALUE
|
107
125
|
set_mode(EdlibAlignConfig *config, VALUE mode)
|
@@ -113,18 +131,19 @@ set_mode(EdlibAlignConfig *config, VALUE mode)
|
|
113
131
|
switch (TYPE(mode))
|
114
132
|
{
|
115
133
|
case T_STRING:;
|
116
|
-
|
117
|
-
|
134
|
+
VALUE mode_str = rb_funcall(mode, rb_intern("upcase"), 0);
|
135
|
+
char *mode_s = RSTRING_PTR(mode_str);
|
136
|
+
if (strcmp(mode_s, "NW") == 0)
|
118
137
|
{
|
119
|
-
config->mode =
|
138
|
+
config->mode = EDLIB_MODE_NW;
|
120
139
|
}
|
121
|
-
else if (strcmp(
|
140
|
+
else if (strcmp(mode_s, "SHW") == 0)
|
122
141
|
{
|
123
|
-
config->mode =
|
142
|
+
config->mode = EDLIB_MODE_SHW;
|
124
143
|
}
|
125
|
-
else if (strcmp(
|
144
|
+
else if (strcmp(mode_s, "HW") == 0)
|
126
145
|
{
|
127
|
-
config->mode =
|
146
|
+
config->mode = EDLIB_MODE_HW;
|
128
147
|
}
|
129
148
|
else
|
130
149
|
{
|
@@ -145,35 +164,25 @@ set_mode(EdlibAlignConfig *config, VALUE mode)
|
|
145
164
|
return mode;
|
146
165
|
}
|
147
166
|
|
148
|
-
|
149
|
-
aligner_set_mode(VALUE self, VALUE mode)
|
150
|
-
{
|
151
|
-
EdlibAlignConfig *config = get_config(self);
|
152
|
-
return set_mode(config, mode);
|
153
|
-
}
|
167
|
+
ALIGNER_SET_(mode)
|
154
168
|
|
155
169
|
static VALUE
|
156
170
|
get_task(EdlibAlignConfig *config)
|
157
171
|
{
|
158
172
|
switch (config->task)
|
159
173
|
{
|
160
|
-
case
|
174
|
+
case EDLIB_TASK_DISTANCE:
|
161
175
|
return rb_str_new2("DISTANCE");
|
162
|
-
case
|
176
|
+
case EDLIB_TASK_LOC:
|
163
177
|
return rb_str_new2("LOC");
|
164
|
-
case
|
178
|
+
case EDLIB_TASK_PATH:
|
165
179
|
return rb_str_new2("PATH");
|
166
180
|
default:
|
167
181
|
return Qnil;
|
168
182
|
}
|
169
183
|
}
|
170
184
|
|
171
|
-
|
172
|
-
aligner_get_task(VALUE self)
|
173
|
-
{
|
174
|
-
EdlibAlignConfig *config = get_config(self);
|
175
|
-
return get_task(config);
|
176
|
-
}
|
185
|
+
ALIGNER_GET_(task)
|
177
186
|
|
178
187
|
static VALUE
|
179
188
|
set_task(EdlibAlignConfig *config, VALUE task)
|
@@ -185,18 +194,19 @@ set_task(EdlibAlignConfig *config, VALUE task)
|
|
185
194
|
switch (TYPE(task))
|
186
195
|
{
|
187
196
|
case T_STRING:;
|
188
|
-
rb_funcall(task, rb_intern("upcase
|
189
|
-
|
197
|
+
VALUE task_str = rb_funcall(task, rb_intern("upcase"), 0);
|
198
|
+
char *task_s = RSTRING_PTR(task_str);
|
199
|
+
if (strcmp(task_s, "DISTANCE") == 0)
|
190
200
|
{
|
191
|
-
config->task =
|
201
|
+
config->task = EDLIB_TASK_DISTANCE;
|
192
202
|
}
|
193
|
-
else if (strcmp(
|
203
|
+
else if (strcmp(task_s, "LOC") == 0)
|
194
204
|
{
|
195
|
-
config->task =
|
205
|
+
config->task = EDLIB_TASK_LOC;
|
196
206
|
}
|
197
|
-
else if (strcmp(
|
207
|
+
else if (strcmp(task_s, "PATH") == 0)
|
198
208
|
{
|
199
|
-
config->task =
|
209
|
+
config->task = EDLIB_TASK_PATH;
|
200
210
|
}
|
201
211
|
else
|
202
212
|
{
|
@@ -217,12 +227,7 @@ set_task(EdlibAlignConfig *config, VALUE task)
|
|
217
227
|
return task;
|
218
228
|
}
|
219
229
|
|
220
|
-
|
221
|
-
aligner_set_task(VALUE self, VALUE task)
|
222
|
-
{
|
223
|
-
EdlibAlignConfig *config = get_config(self);
|
224
|
-
return set_task(config, task);
|
225
|
-
}
|
230
|
+
ALIGNER_SET_(task)
|
226
231
|
|
227
232
|
static VALUE
|
228
233
|
get_additional_equalities(EdlibAlignConfig *config)
|
@@ -241,21 +246,17 @@ get_additional_equalities(EdlibAlignConfig *config)
|
|
241
246
|
return equalities;
|
242
247
|
}
|
243
248
|
|
244
|
-
|
245
|
-
aligner_get_additional_equalities(VALUE self)
|
246
|
-
{
|
247
|
-
EdlibAlignConfig *config = get_config(self);
|
248
|
-
return get_additional_equalities(config);
|
249
|
-
}
|
249
|
+
ALIGNER_GET_(additional_equalities)
|
250
250
|
|
251
251
|
static VALUE
|
252
|
-
set_additional_equalities(EdlibAlignConfig *config, VALUE equalities)
|
252
|
+
set_additional_equalities(EdlibAlignConfig *config, EdlibEqualityPair *eqpairs, VALUE equalities)
|
253
253
|
{
|
254
254
|
Check_Type(equalities, T_ARRAY);
|
255
255
|
int len = RARRAY_LEN(equalities);
|
256
256
|
if (len == 0)
|
257
257
|
{
|
258
|
-
if(eqpairs != NULL)
|
258
|
+
if (eqpairs != NULL)
|
259
|
+
{
|
259
260
|
free(eqpairs);
|
260
261
|
eqpairs = NULL;
|
261
262
|
}
|
@@ -312,14 +313,15 @@ set_additional_equalities(EdlibAlignConfig *config, VALUE equalities)
|
|
312
313
|
static VALUE
|
313
314
|
aligner_set_additional_equalities(VALUE self, VALUE equalities)
|
314
315
|
{
|
315
|
-
EdlibAlignConfig *config =
|
316
|
-
|
316
|
+
EdlibAlignConfig *config = aligner_get_config(self);
|
317
|
+
EdlibEqualityPair *eqpairs = aligner_get_equalityPairs(self);
|
318
|
+
return set_additional_equalities(config, eqpairs, equalities);
|
317
319
|
}
|
318
320
|
|
319
321
|
static VALUE
|
320
322
|
aligner_config_hash(VALUE self)
|
321
323
|
{
|
322
|
-
EdlibAlignConfig *config =
|
324
|
+
EdlibAlignConfig *config = aligner_get_config(self);
|
323
325
|
|
324
326
|
VALUE hash = rb_hash_new();
|
325
327
|
|
@@ -332,16 +334,17 @@ aligner_config_hash(VALUE self)
|
|
332
334
|
}
|
333
335
|
|
334
336
|
static VALUE
|
335
|
-
|
337
|
+
aligner_initialize_raw(VALUE self, VALUE k, VALUE mode, VALUE task, VALUE additional_equalities)
|
336
338
|
{
|
337
|
-
EdlibAlignConfig *config =
|
338
|
-
|
339
|
+
EdlibAlignConfig *config = aligner_get_config(self);
|
340
|
+
EdlibEqualityPair *eqpairs = aligner_get_equalityPairs(self);
|
339
341
|
config->k = NUM2INT(k);
|
340
342
|
set_mode(config, mode);
|
341
343
|
set_task(config, task);
|
344
|
+
|
342
345
|
if (additional_equalities != Qnil)
|
343
346
|
{
|
344
|
-
set_additional_equalities(config, additional_equalities);
|
347
|
+
set_additional_equalities(config, eqpairs, additional_equalities);
|
345
348
|
}
|
346
349
|
else
|
347
350
|
{
|
@@ -355,7 +358,7 @@ aligner_initialize(VALUE self, VALUE k, VALUE mode, VALUE task, VALUE additional
|
|
355
358
|
static VALUE
|
356
359
|
aligner_align(VALUE self, VALUE query, VALUE target)
|
357
360
|
{
|
358
|
-
EdlibAlignConfig *config =
|
361
|
+
EdlibAlignConfig *config = aligner_get_config(self);
|
359
362
|
if (!config)
|
360
363
|
{
|
361
364
|
rb_raise(rb_eRuntimeError, "config is NULL");
|
@@ -427,8 +430,8 @@ void Init_edlibext(void)
|
|
427
430
|
{
|
428
431
|
mEdlib = rb_define_module("Edlib");
|
429
432
|
cAligner = rb_define_class_under(mEdlib, "Aligner", rb_cObject);
|
430
|
-
rb_define_alloc_func(cAligner,
|
431
|
-
rb_define_private_method(cAligner, "initialize_raw",
|
433
|
+
rb_define_alloc_func(cAligner, aligner_config_allocate);
|
434
|
+
rb_define_private_method(cAligner, "initialize_raw", aligner_initialize_raw, 4);
|
432
435
|
rb_define_method(cAligner, "k", aligner_get_k, 0);
|
433
436
|
rb_define_method(cAligner, "k=", aligner_set_k, 1);
|
434
437
|
rb_define_method(cAligner, "mode", aligner_get_mode, 0);
|
@@ -438,5 +441,5 @@ void Init_edlibext(void)
|
|
438
441
|
rb_define_method(cAligner, "additional_equalities", aligner_get_additional_equalities, 0);
|
439
442
|
rb_define_method(cAligner, "additional_equalities=", aligner_set_additional_equalities, 1);
|
440
443
|
rb_define_method(cAligner, "config", aligner_config_hash, 0);
|
441
|
-
rb_define_method(cAligner, "
|
444
|
+
rb_define_method(cAligner, "align_raw", aligner_align, 2);
|
442
445
|
}
|
@@ -0,0 +1,15 @@
|
|
1
|
+
#ifndef EDLIBEXT_H
|
2
|
+
#define EDLIBEXT_H
|
3
|
+
|
4
|
+
#include "edlib.h"
|
5
|
+
|
6
|
+
typedef struct
|
7
|
+
{
|
8
|
+
EdlibAlignConfig *config;
|
9
|
+
EdlibEqualityPair *equalityPairs;
|
10
|
+
} RbAlignConfig;
|
11
|
+
|
12
|
+
static EdlibAlignConfig *aligner_get_config(VALUE self);
|
13
|
+
static EdlibEqualityPair *aligner_get_equalityPairs(VALUE self);
|
14
|
+
|
15
|
+
#endif // EDLIBEXT_H
|
data/ext/edlib/extconf.rb
CHANGED
data/lib/edlib/version.rb
CHANGED
data/lib/edlib.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require_relative 'edlib/edlibext'
|
2
4
|
|
3
5
|
module Edlib
|
@@ -9,5 +11,68 @@ module Edlib
|
|
9
11
|
task = task.upcase if task.is_a? String
|
10
12
|
initialize_raw(k, mode, task, additional_equalities)
|
11
13
|
end
|
14
|
+
|
15
|
+
def align(query, target, nice: false)
|
16
|
+
if nice
|
17
|
+
align_nice(query, target)
|
18
|
+
else
|
19
|
+
align_raw(query, target)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
private
|
24
|
+
|
25
|
+
def align_nice(query, target)
|
26
|
+
result = align(query, target)
|
27
|
+
result.merge(nice(result, query, target))
|
28
|
+
end
|
29
|
+
|
30
|
+
def nice(result, query, target, gap_symbol: '-')
|
31
|
+
raise 'result does not have :locations and :cigar' unless result.key?(:locations) && result.key?(:cigar)
|
32
|
+
|
33
|
+
target_pos = result[:locations][0][0]
|
34
|
+
query_pos = 0
|
35
|
+
query_aln = String.new
|
36
|
+
match_aln = String.new
|
37
|
+
target_aln = String.new
|
38
|
+
cigar = result[:cigar]
|
39
|
+
cigar.scan(/(\d+)(\D)/).each do |num, op|
|
40
|
+
num = num.to_i
|
41
|
+
case op
|
42
|
+
when '='
|
43
|
+
target_aln << target[target_pos, num]
|
44
|
+
target_pos += num
|
45
|
+
query_aln << query[query_pos, num]
|
46
|
+
query_pos += num
|
47
|
+
match_aln << '|' * num
|
48
|
+
when 'X'
|
49
|
+
target_aln << target[target_pos, num]
|
50
|
+
target_pos += num
|
51
|
+
query_aln << query[query_pos, num]
|
52
|
+
query_pos += num
|
53
|
+
match_aln << '.' * num
|
54
|
+
when 'D'
|
55
|
+
target_aln << target[target_pos, num]
|
56
|
+
target_pos += num
|
57
|
+
query_aln << gap_symbol * num
|
58
|
+
query_pos += 0
|
59
|
+
match_aln << gap_symbol * num
|
60
|
+
when 'I'
|
61
|
+
target_aln << gap_symbol * num
|
62
|
+
target_pos += 0
|
63
|
+
query_aln << query[query_pos, num]
|
64
|
+
query_pos += num
|
65
|
+
match_aln << gap_symbol * num
|
66
|
+
else
|
67
|
+
raise "Unknown CIGAR operation: #{op}"
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
{
|
72
|
+
query_aligned: query_aln,
|
73
|
+
match_aligned: match_aln,
|
74
|
+
target_aligned: target_aln
|
75
|
+
}
|
76
|
+
end
|
12
77
|
end
|
13
78
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: edlib
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- kojix2
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-
|
11
|
+
date: 2022-11-07 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: 'Lightweight, super fast C/C++ library for sequence alignment using edit
|
14
14
|
(Levenshtein) distance. '
|
@@ -23,6 +23,7 @@ files:
|
|
23
23
|
- ext/edlib/edlib.cpp
|
24
24
|
- ext/edlib/edlib.h
|
25
25
|
- ext/edlib/edlibext.c
|
26
|
+
- ext/edlib/edlibext.h
|
26
27
|
- ext/edlib/extconf.rb
|
27
28
|
- lib/edlib.rb
|
28
29
|
- lib/edlib/version.rb
|