edlib 0.0.5 → 0.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +1 -0
- data/ext/edlib/edlibext.c +101 -98
- data/ext/edlib/edlibext.h +15 -0
- data/ext/edlib/extconf.rb +2 -0
- data/lib/edlib/version.rb +3 -1
- data/lib/edlib.rb +65 -0
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 72ca7381d858e17e8ba5e2641d072cb4f391410b97ef9138504efa6851020373
|
4
|
+
data.tar.gz: 180e15f4a09149af7031536817a635be580d55afd25896341cb44cf54a9902be
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5f53003d5e12d9d55b40201f435a278b4a99d6e01569cf0f5e96a263822058257d3dd1b65c7c0f48d39a92177c271131d7752bf982c68893316e5dccbdeefc9f
|
7
|
+
data.tar.gz: 27e1ec9237c5f86fe8775100dbda7e2b9851071a9043cace94b927a65e372afa0b7cbbca741d51266b1f2a0493917eeaf1b4b60da92600def529b2e4ac724edb
|
data/README.md
CHANGED
@@ -2,6 +2,7 @@
|
|
2
2
|
|
3
3
|
[](https://badge.fury.io/rb/edlib)
|
4
4
|
[](https://github.com/kojix2/ruby-edlib/actions/workflows/ci.yml)
|
5
|
+
[](https://zenodo.org/badge/latestdoi/559318048)
|
5
6
|
|
6
7
|
[Edlib](https://github.com/Martinsos/edlib) - A lightweight and super fast C/C++ library for sequence alignment using edit distance
|
7
8
|
|
data/ext/edlib/edlibext.c
CHANGED
@@ -1,70 +1,98 @@
|
|
1
1
|
#include "ruby.h"
|
2
|
-
#include "
|
2
|
+
#include "edlibext.h"
|
3
|
+
|
4
|
+
#define ALIGNER_GET_(name) \
|
5
|
+
static VALUE \
|
6
|
+
aligner_get_##name(VALUE self) \
|
7
|
+
{ \
|
8
|
+
EdlibAlignConfig *config = aligner_get_config(self); \
|
9
|
+
return get_##name(config); \
|
10
|
+
}
|
11
|
+
|
12
|
+
#define ALIGNER_SET_(name) \
|
13
|
+
static VALUE \
|
14
|
+
aligner_set_##name(VALUE self, VALUE value) \
|
15
|
+
{ \
|
16
|
+
EdlibAlignConfig *config = aligner_get_config(self); \
|
17
|
+
return set_##name(config, value); \
|
18
|
+
}
|
3
19
|
|
4
20
|
VALUE mEdlib;
|
5
21
|
VALUE cAligner;
|
6
|
-
EdlibEqualityPair *eqpairs;
|
7
22
|
|
8
|
-
|
9
|
-
|
23
|
+
// Aligner class
|
24
|
+
|
25
|
+
static size_t aligner_config_memsize(const void *ptr);
|
26
|
+
static void aligner_config_free(void *ptr);
|
10
27
|
|
11
28
|
static const rb_data_type_t config_type = {
|
12
|
-
.wrap_struct_name = "
|
29
|
+
.wrap_struct_name = "RbAlignConfig",
|
13
30
|
.function = {
|
14
|
-
.dfree =
|
15
|
-
.dsize =
|
31
|
+
.dfree = aligner_config_free,
|
32
|
+
.dsize = aligner_config_memsize,
|
16
33
|
},
|
17
34
|
.flags = RUBY_TYPED_FREE_IMMEDIATELY,
|
18
35
|
};
|
19
36
|
|
20
37
|
static VALUE
|
21
|
-
|
38
|
+
aligner_config_allocate(VALUE klass)
|
22
39
|
{
|
23
|
-
|
24
|
-
|
25
|
-
|
40
|
+
RbAlignConfig *aligner_config;
|
41
|
+
|
42
|
+
VALUE obj = TypedData_Make_Struct(klass, RbAlignConfig, &config_type, aligner_config);
|
43
|
+
aligner_config->config = (EdlibAlignConfig *)malloc(sizeof(EdlibAlignConfig));
|
44
|
+
aligner_config->equalityPairs = NULL;
|
26
45
|
return obj;
|
27
46
|
}
|
28
47
|
|
29
48
|
static void
|
30
|
-
|
49
|
+
aligner_config_free(void *ptr)
|
31
50
|
{
|
32
|
-
|
51
|
+
RbAlignConfig *aligner_config = ptr;
|
52
|
+
if (aligner_config->config != NULL)
|
33
53
|
{
|
34
|
-
free(
|
35
|
-
|
54
|
+
free(aligner_config->config);
|
55
|
+
}
|
56
|
+
if (aligner_config->equalityPairs != NULL)
|
57
|
+
{
|
58
|
+
free(aligner_config->equalityPairs);
|
36
59
|
}
|
37
|
-
|
60
|
+
|
61
|
+
free(ptr);
|
38
62
|
}
|
39
63
|
|
40
64
|
static size_t
|
41
|
-
|
65
|
+
aligner_config_memsize(const void *ptr)
|
42
66
|
{
|
43
|
-
const
|
44
|
-
return sizeof(ptr) + 2 * sizeof(char) *
|
67
|
+
const RbAlignConfig *aligner_config = ptr;
|
68
|
+
return sizeof(ptr) + sizeof(aligner_config->config) + 2 * sizeof(char) * aligner_config->config->additionalEqualitiesLength;
|
45
69
|
}
|
46
70
|
|
47
71
|
static EdlibAlignConfig *
|
48
|
-
|
72
|
+
aligner_get_config(VALUE self)
|
49
73
|
{
|
50
|
-
|
51
|
-
TypedData_Get_Struct(self,
|
74
|
+
RbAlignConfig *aligner_config = NULL;
|
75
|
+
TypedData_Get_Struct(self, RbAlignConfig, &config_type, aligner_config);
|
76
|
+
return aligner_config->config;
|
77
|
+
}
|
52
78
|
|
53
|
-
|
79
|
+
static EdlibEqualityPair *
|
80
|
+
aligner_get_equalityPairs(VALUE self)
|
81
|
+
{
|
82
|
+
RbAlignConfig *aligner_config = NULL;
|
83
|
+
TypedData_Get_Struct(self, RbAlignConfig, &config_type, aligner_config);
|
84
|
+
return aligner_config->equalityPairs;
|
54
85
|
}
|
55
86
|
|
87
|
+
// Config
|
88
|
+
|
56
89
|
static VALUE
|
57
90
|
get_k(EdlibAlignConfig *config)
|
58
91
|
{
|
59
92
|
return INT2NUM(config->k);
|
60
93
|
}
|
61
94
|
|
62
|
-
|
63
|
-
aligner_get_k(VALUE self)
|
64
|
-
{
|
65
|
-
EdlibAlignConfig *config = get_config(self);
|
66
|
-
return get_k(config);
|
67
|
-
}
|
95
|
+
ALIGNER_GET_(k)
|
68
96
|
|
69
97
|
static VALUE
|
70
98
|
set_k(EdlibAlignConfig *config, VALUE k)
|
@@ -73,35 +101,25 @@ set_k(EdlibAlignConfig *config, VALUE k)
|
|
73
101
|
return k;
|
74
102
|
}
|
75
103
|
|
76
|
-
|
77
|
-
aligner_set_k(VALUE self, VALUE k)
|
78
|
-
{
|
79
|
-
EdlibAlignConfig *config = get_config(self);
|
80
|
-
return set_k(config, k);
|
81
|
-
}
|
104
|
+
ALIGNER_SET_(k)
|
82
105
|
|
83
106
|
static VALUE
|
84
107
|
get_mode(EdlibAlignConfig *config)
|
85
108
|
{
|
86
109
|
switch (config->mode)
|
87
110
|
{
|
88
|
-
case
|
111
|
+
case EDLIB_MODE_NW:
|
89
112
|
return rb_str_new2("NW");
|
90
|
-
case
|
113
|
+
case EDLIB_MODE_SHW:
|
91
114
|
return rb_str_new2("SHW");
|
92
|
-
case
|
115
|
+
case EDLIB_MODE_HW:
|
93
116
|
return rb_str_new2("HW");
|
94
117
|
default:
|
95
118
|
return Qnil;
|
96
119
|
}
|
97
120
|
}
|
98
121
|
|
99
|
-
|
100
|
-
aligner_get_mode(VALUE self)
|
101
|
-
{
|
102
|
-
EdlibAlignConfig *config = get_config(self);
|
103
|
-
return get_mode(config);
|
104
|
-
}
|
122
|
+
ALIGNER_GET_(mode)
|
105
123
|
|
106
124
|
static VALUE
|
107
125
|
set_mode(EdlibAlignConfig *config, VALUE mode)
|
@@ -113,18 +131,19 @@ set_mode(EdlibAlignConfig *config, VALUE mode)
|
|
113
131
|
switch (TYPE(mode))
|
114
132
|
{
|
115
133
|
case T_STRING:;
|
116
|
-
|
117
|
-
|
134
|
+
VALUE mode_str = rb_funcall(mode, rb_intern("upcase"), 0);
|
135
|
+
char *mode_s = RSTRING_PTR(mode_str);
|
136
|
+
if (strcmp(mode_s, "NW") == 0)
|
118
137
|
{
|
119
|
-
config->mode =
|
138
|
+
config->mode = EDLIB_MODE_NW;
|
120
139
|
}
|
121
|
-
else if (strcmp(
|
140
|
+
else if (strcmp(mode_s, "SHW") == 0)
|
122
141
|
{
|
123
|
-
config->mode =
|
142
|
+
config->mode = EDLIB_MODE_SHW;
|
124
143
|
}
|
125
|
-
else if (strcmp(
|
144
|
+
else if (strcmp(mode_s, "HW") == 0)
|
126
145
|
{
|
127
|
-
config->mode =
|
146
|
+
config->mode = EDLIB_MODE_HW;
|
128
147
|
}
|
129
148
|
else
|
130
149
|
{
|
@@ -145,35 +164,25 @@ set_mode(EdlibAlignConfig *config, VALUE mode)
|
|
145
164
|
return mode;
|
146
165
|
}
|
147
166
|
|
148
|
-
|
149
|
-
aligner_set_mode(VALUE self, VALUE mode)
|
150
|
-
{
|
151
|
-
EdlibAlignConfig *config = get_config(self);
|
152
|
-
return set_mode(config, mode);
|
153
|
-
}
|
167
|
+
ALIGNER_SET_(mode)
|
154
168
|
|
155
169
|
static VALUE
|
156
170
|
get_task(EdlibAlignConfig *config)
|
157
171
|
{
|
158
172
|
switch (config->task)
|
159
173
|
{
|
160
|
-
case
|
174
|
+
case EDLIB_TASK_DISTANCE:
|
161
175
|
return rb_str_new2("DISTANCE");
|
162
|
-
case
|
176
|
+
case EDLIB_TASK_LOC:
|
163
177
|
return rb_str_new2("LOC");
|
164
|
-
case
|
178
|
+
case EDLIB_TASK_PATH:
|
165
179
|
return rb_str_new2("PATH");
|
166
180
|
default:
|
167
181
|
return Qnil;
|
168
182
|
}
|
169
183
|
}
|
170
184
|
|
171
|
-
|
172
|
-
aligner_get_task(VALUE self)
|
173
|
-
{
|
174
|
-
EdlibAlignConfig *config = get_config(self);
|
175
|
-
return get_task(config);
|
176
|
-
}
|
185
|
+
ALIGNER_GET_(task)
|
177
186
|
|
178
187
|
static VALUE
|
179
188
|
set_task(EdlibAlignConfig *config, VALUE task)
|
@@ -185,18 +194,19 @@ set_task(EdlibAlignConfig *config, VALUE task)
|
|
185
194
|
switch (TYPE(task))
|
186
195
|
{
|
187
196
|
case T_STRING:;
|
188
|
-
rb_funcall(task, rb_intern("upcase
|
189
|
-
|
197
|
+
VALUE task_str = rb_funcall(task, rb_intern("upcase"), 0);
|
198
|
+
char *task_s = RSTRING_PTR(task_str);
|
199
|
+
if (strcmp(task_s, "DISTANCE") == 0)
|
190
200
|
{
|
191
|
-
config->task =
|
201
|
+
config->task = EDLIB_TASK_DISTANCE;
|
192
202
|
}
|
193
|
-
else if (strcmp(
|
203
|
+
else if (strcmp(task_s, "LOC") == 0)
|
194
204
|
{
|
195
|
-
config->task =
|
205
|
+
config->task = EDLIB_TASK_LOC;
|
196
206
|
}
|
197
|
-
else if (strcmp(
|
207
|
+
else if (strcmp(task_s, "PATH") == 0)
|
198
208
|
{
|
199
|
-
config->task =
|
209
|
+
config->task = EDLIB_TASK_PATH;
|
200
210
|
}
|
201
211
|
else
|
202
212
|
{
|
@@ -217,12 +227,7 @@ set_task(EdlibAlignConfig *config, VALUE task)
|
|
217
227
|
return task;
|
218
228
|
}
|
219
229
|
|
220
|
-
|
221
|
-
aligner_set_task(VALUE self, VALUE task)
|
222
|
-
{
|
223
|
-
EdlibAlignConfig *config = get_config(self);
|
224
|
-
return set_task(config, task);
|
225
|
-
}
|
230
|
+
ALIGNER_SET_(task)
|
226
231
|
|
227
232
|
static VALUE
|
228
233
|
get_additional_equalities(EdlibAlignConfig *config)
|
@@ -241,21 +246,17 @@ get_additional_equalities(EdlibAlignConfig *config)
|
|
241
246
|
return equalities;
|
242
247
|
}
|
243
248
|
|
244
|
-
|
245
|
-
aligner_get_additional_equalities(VALUE self)
|
246
|
-
{
|
247
|
-
EdlibAlignConfig *config = get_config(self);
|
248
|
-
return get_additional_equalities(config);
|
249
|
-
}
|
249
|
+
ALIGNER_GET_(additional_equalities)
|
250
250
|
|
251
251
|
static VALUE
|
252
|
-
set_additional_equalities(EdlibAlignConfig *config, VALUE equalities)
|
252
|
+
set_additional_equalities(EdlibAlignConfig *config, EdlibEqualityPair *eqpairs, VALUE equalities)
|
253
253
|
{
|
254
254
|
Check_Type(equalities, T_ARRAY);
|
255
255
|
int len = RARRAY_LEN(equalities);
|
256
256
|
if (len == 0)
|
257
257
|
{
|
258
|
-
if(eqpairs != NULL)
|
258
|
+
if (eqpairs != NULL)
|
259
|
+
{
|
259
260
|
free(eqpairs);
|
260
261
|
eqpairs = NULL;
|
261
262
|
}
|
@@ -312,14 +313,15 @@ set_additional_equalities(EdlibAlignConfig *config, VALUE equalities)
|
|
312
313
|
static VALUE
|
313
314
|
aligner_set_additional_equalities(VALUE self, VALUE equalities)
|
314
315
|
{
|
315
|
-
EdlibAlignConfig *config =
|
316
|
-
|
316
|
+
EdlibAlignConfig *config = aligner_get_config(self);
|
317
|
+
EdlibEqualityPair *eqpairs = aligner_get_equalityPairs(self);
|
318
|
+
return set_additional_equalities(config, eqpairs, equalities);
|
317
319
|
}
|
318
320
|
|
319
321
|
static VALUE
|
320
322
|
aligner_config_hash(VALUE self)
|
321
323
|
{
|
322
|
-
EdlibAlignConfig *config =
|
324
|
+
EdlibAlignConfig *config = aligner_get_config(self);
|
323
325
|
|
324
326
|
VALUE hash = rb_hash_new();
|
325
327
|
|
@@ -332,16 +334,17 @@ aligner_config_hash(VALUE self)
|
|
332
334
|
}
|
333
335
|
|
334
336
|
static VALUE
|
335
|
-
|
337
|
+
aligner_initialize_raw(VALUE self, VALUE k, VALUE mode, VALUE task, VALUE additional_equalities)
|
336
338
|
{
|
337
|
-
EdlibAlignConfig *config =
|
338
|
-
|
339
|
+
EdlibAlignConfig *config = aligner_get_config(self);
|
340
|
+
EdlibEqualityPair *eqpairs = aligner_get_equalityPairs(self);
|
339
341
|
config->k = NUM2INT(k);
|
340
342
|
set_mode(config, mode);
|
341
343
|
set_task(config, task);
|
344
|
+
|
342
345
|
if (additional_equalities != Qnil)
|
343
346
|
{
|
344
|
-
set_additional_equalities(config, additional_equalities);
|
347
|
+
set_additional_equalities(config, eqpairs, additional_equalities);
|
345
348
|
}
|
346
349
|
else
|
347
350
|
{
|
@@ -355,7 +358,7 @@ aligner_initialize(VALUE self, VALUE k, VALUE mode, VALUE task, VALUE additional
|
|
355
358
|
static VALUE
|
356
359
|
aligner_align(VALUE self, VALUE query, VALUE target)
|
357
360
|
{
|
358
|
-
EdlibAlignConfig *config =
|
361
|
+
EdlibAlignConfig *config = aligner_get_config(self);
|
359
362
|
if (!config)
|
360
363
|
{
|
361
364
|
rb_raise(rb_eRuntimeError, "config is NULL");
|
@@ -427,8 +430,8 @@ void Init_edlibext(void)
|
|
427
430
|
{
|
428
431
|
mEdlib = rb_define_module("Edlib");
|
429
432
|
cAligner = rb_define_class_under(mEdlib, "Aligner", rb_cObject);
|
430
|
-
rb_define_alloc_func(cAligner,
|
431
|
-
rb_define_private_method(cAligner, "initialize_raw",
|
433
|
+
rb_define_alloc_func(cAligner, aligner_config_allocate);
|
434
|
+
rb_define_private_method(cAligner, "initialize_raw", aligner_initialize_raw, 4);
|
432
435
|
rb_define_method(cAligner, "k", aligner_get_k, 0);
|
433
436
|
rb_define_method(cAligner, "k=", aligner_set_k, 1);
|
434
437
|
rb_define_method(cAligner, "mode", aligner_get_mode, 0);
|
@@ -438,5 +441,5 @@ void Init_edlibext(void)
|
|
438
441
|
rb_define_method(cAligner, "additional_equalities", aligner_get_additional_equalities, 0);
|
439
442
|
rb_define_method(cAligner, "additional_equalities=", aligner_set_additional_equalities, 1);
|
440
443
|
rb_define_method(cAligner, "config", aligner_config_hash, 0);
|
441
|
-
rb_define_method(cAligner, "
|
444
|
+
rb_define_method(cAligner, "align_raw", aligner_align, 2);
|
442
445
|
}
|
@@ -0,0 +1,15 @@
|
|
1
|
+
#ifndef EDLIBEXT_H
|
2
|
+
#define EDLIBEXT_H
|
3
|
+
|
4
|
+
#include "edlib.h"
|
5
|
+
|
6
|
+
typedef struct
|
7
|
+
{
|
8
|
+
EdlibAlignConfig *config;
|
9
|
+
EdlibEqualityPair *equalityPairs;
|
10
|
+
} RbAlignConfig;
|
11
|
+
|
12
|
+
static EdlibAlignConfig *aligner_get_config(VALUE self);
|
13
|
+
static EdlibEqualityPair *aligner_get_equalityPairs(VALUE self);
|
14
|
+
|
15
|
+
#endif // EDLIBEXT_H
|
data/ext/edlib/extconf.rb
CHANGED
data/lib/edlib/version.rb
CHANGED
data/lib/edlib.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require_relative 'edlib/edlibext'
|
2
4
|
|
3
5
|
module Edlib
|
@@ -9,5 +11,68 @@ module Edlib
|
|
9
11
|
task = task.upcase if task.is_a? String
|
10
12
|
initialize_raw(k, mode, task, additional_equalities)
|
11
13
|
end
|
14
|
+
|
15
|
+
def align(query, target, nice: false)
|
16
|
+
if nice
|
17
|
+
align_nice(query, target)
|
18
|
+
else
|
19
|
+
align_raw(query, target)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
private
|
24
|
+
|
25
|
+
def align_nice(query, target)
|
26
|
+
result = align(query, target)
|
27
|
+
result.merge(nice(result, query, target))
|
28
|
+
end
|
29
|
+
|
30
|
+
def nice(result, query, target, gap_symbol: '-')
|
31
|
+
raise 'result does not have :locations and :cigar' unless result.key?(:locations) && result.key?(:cigar)
|
32
|
+
|
33
|
+
target_pos = result[:locations][0][0]
|
34
|
+
query_pos = 0
|
35
|
+
query_aln = String.new
|
36
|
+
match_aln = String.new
|
37
|
+
target_aln = String.new
|
38
|
+
cigar = result[:cigar]
|
39
|
+
cigar.scan(/(\d+)(\D)/).each do |num, op|
|
40
|
+
num = num.to_i
|
41
|
+
case op
|
42
|
+
when '='
|
43
|
+
target_aln << target[target_pos, num]
|
44
|
+
target_pos += num
|
45
|
+
query_aln << query[query_pos, num]
|
46
|
+
query_pos += num
|
47
|
+
match_aln << '|' * num
|
48
|
+
when 'X'
|
49
|
+
target_aln << target[target_pos, num]
|
50
|
+
target_pos += num
|
51
|
+
query_aln << query[query_pos, num]
|
52
|
+
query_pos += num
|
53
|
+
match_aln << '.' * num
|
54
|
+
when 'D'
|
55
|
+
target_aln << target[target_pos, num]
|
56
|
+
target_pos += num
|
57
|
+
query_aln << gap_symbol * num
|
58
|
+
query_pos += 0
|
59
|
+
match_aln << gap_symbol * num
|
60
|
+
when 'I'
|
61
|
+
target_aln << gap_symbol * num
|
62
|
+
target_pos += 0
|
63
|
+
query_aln << query[query_pos, num]
|
64
|
+
query_pos += num
|
65
|
+
match_aln << gap_symbol * num
|
66
|
+
else
|
67
|
+
raise "Unknown CIGAR operation: #{op}"
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
{
|
72
|
+
query_aligned: query_aln,
|
73
|
+
match_aligned: match_aln,
|
74
|
+
target_aligned: target_aln
|
75
|
+
}
|
76
|
+
end
|
12
77
|
end
|
13
78
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: edlib
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- kojix2
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-
|
11
|
+
date: 2022-11-07 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: 'Lightweight, super fast C/C++ library for sequence alignment using edit
|
14
14
|
(Levenshtein) distance. '
|
@@ -23,6 +23,7 @@ files:
|
|
23
23
|
- ext/edlib/edlib.cpp
|
24
24
|
- ext/edlib/edlib.h
|
25
25
|
- ext/edlib/edlibext.c
|
26
|
+
- ext/edlib/edlibext.h
|
26
27
|
- ext/edlib/extconf.rb
|
27
28
|
- lib/edlib.rb
|
28
29
|
- lib/edlib/version.rb
|