edlib 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: ad1a2f931b436577baac1923b324837f50aa133efbe48843f0956711b6e3fe4c
4
+ data.tar.gz: 62c9924efcf428902528e6ce29e1be6d7d874bd5db406b29bdeee00b3491e800
5
+ SHA512:
6
+ metadata.gz: ca119f2d471a21d8cb8da7820d2a1723396040d8a62c18c4cbcdb9b7c8879ed7a8792d57193e110a122c3b9d2d5b883aeab869cdc7d465e4aeec687c23896c5d
7
+ data.tar.gz: 596c89f8fe1112bb876edfb89849d6f634b4aa1d84ea3f47a6a24dbdb78070f86f063a0d38ef5aaccc4e4ea66edfd5d9c699ccb778dcf0f8812794018be944e7
data/README.md ADDED
@@ -0,0 +1,39 @@
1
+ # ruby-edlib
2
+
3
+ [Edlib](https://github.com/Martinsos/edlib) - A lightweight and super fast C/C++ library for sequence alignment using edit distance
4
+
5
+ ## Installation
6
+
7
+ ```
8
+ sudo apt install edlib-dev
9
+ gem install edlib
10
+ ```
11
+
12
+ ## API
13
+
14
+ ```ruby
15
+ require "edlib"
16
+
17
+ a = Edlib::Aligner.new(mode: :hw, task: :path)
18
+ a.align("AACG", "TCAACCTG")
19
+ # => {:edit_distance=>1, :alphabet_length=>4, :locations=>[[2, 4], [2, 5]], :alignment=>[0, 0, 0, 1], :cigar=>"3=1I"}
20
+ ```
21
+
22
+ |keyword argument |description|
23
+ |---------------------|-----------------------------------------------------------------------------|
24
+ |k |edit distance is not larger than k [-1] |
25
+ |mode |global (NW) , prefix (SHW) , infix (HW) ["NW"] |
26
+ |task |DISTANCE, LOC, PATH ["DISTANCE"] |
27
+ |additional_equalities|List of pairs of characters, where each pair defines two characters as equal. [NULL]|
28
+
29
+ ## Development
30
+
31
+ * Pull requests welcome
32
+
33
+ ```sh
34
+ git clone https://github.com/kojix2/ruby-edlib # Please fork repo
35
+ cd ruby-edlib
36
+ bundle install
37
+ bundle exec rake compile
38
+ bundle exec rake test
39
+ ```
data/ext/edlib/edlib.c ADDED
@@ -0,0 +1,400 @@
1
+ #include "ruby.h"
2
+ #include "edlib.h"
3
+
4
+ VALUE mEdlib;
5
+ VALUE cAligner;
6
+
7
+ static size_t config_memsize(const void *ptr);
8
+
9
+ static const rb_data_type_t config_type = {
10
+ .wrap_struct_name = "EdlibAlignConfig",
11
+ .function = {
12
+ .dfree = RUBY_DEFAULT_FREE,
13
+ .dsize = config_memsize,
14
+ },
15
+ .flags = RUBY_TYPED_FREE_IMMEDIATELY,
16
+ };
17
+
18
+ static VALUE
19
+ config_allocate(VALUE klass)
20
+ {
21
+ EdlibAlignConfig *config;
22
+ VALUE obj = TypedData_Make_Struct(klass, EdlibAlignConfig,
23
+ &config_type, config);
24
+ return obj;
25
+ }
26
+
27
+ static size_t
28
+ config_memsize(const void *ptr)
29
+ {
30
+ const EdlibAlignConfig *config = ptr;
31
+ return sizeof(ptr) + 2 * sizeof(char) * (config->additionalEqualitiesLength);
32
+ }
33
+
34
+ static EdlibAlignConfig *get_config(VALUE self)
35
+ {
36
+ EdlibAlignConfig *ptr = NULL;
37
+ TypedData_Get_Struct(self, EdlibAlignConfig, &config_type, ptr);
38
+
39
+ return ptr;
40
+ }
41
+
42
+ static VALUE
43
+ get_k(EdlibAlignConfig *config)
44
+ {
45
+ return INT2NUM(config->k);
46
+ }
47
+
48
+ static VALUE
49
+ aligner_get_k(VALUE self)
50
+ {
51
+ EdlibAlignConfig *config = get_config(self);
52
+ return get_k(config);
53
+ }
54
+
55
+ static VALUE
56
+ set_k(EdlibAlignConfig *config, VALUE k)
57
+ {
58
+ config->k = NUM2INT(k);
59
+ return k;
60
+ }
61
+
62
+ static VALUE
63
+ aligner_set_k(VALUE self, VALUE k)
64
+ {
65
+ EdlibAlignConfig *config = get_config(self);
66
+ return set_k(config, k);
67
+ }
68
+
69
+ static VALUE
70
+ get_mode(EdlibAlignConfig *config)
71
+ {
72
+ switch (config->mode)
73
+ {
74
+ case 0:
75
+ return rb_str_new2("NW");
76
+ case 1:
77
+ return rb_str_new2("SHW");
78
+ case 2:
79
+ return rb_str_new2("HW");
80
+ default:
81
+ return Qnil;
82
+ }
83
+ }
84
+
85
+ static VALUE
86
+ aligner_get_mode(VALUE self)
87
+ {
88
+ EdlibAlignConfig *config = get_config(self);
89
+ return get_mode(config);
90
+ }
91
+
92
+ static VALUE
93
+ set_mode(EdlibAlignConfig *config, VALUE mode)
94
+ {
95
+ switch (TYPE(mode))
96
+ {
97
+ case T_STRING:
98
+ if (strcmp(RSTRING_PTR(mode), "NW") == 0)
99
+ {
100
+ config->mode = 0;
101
+ }
102
+ else if (strcmp(RSTRING_PTR(mode), "SHW") == 0)
103
+ {
104
+ config->mode = 1;
105
+ }
106
+ else if (strcmp(RSTRING_PTR(mode), "HW") == 0)
107
+ {
108
+ config->mode = 2;
109
+ }
110
+ else
111
+ {
112
+ rb_raise(rb_eArgError, "Invalid mode");
113
+ }
114
+ break;
115
+ case T_FIXNUM:;
116
+ int m = NUM2INT(mode);
117
+ if (m < 0 || m > 2)
118
+ {
119
+ rb_raise(rb_eArgError, "Invalid mode");
120
+ }
121
+ config->mode = NUM2INT(mode);
122
+ break;
123
+ default:
124
+ rb_raise(rb_eArgError, "Invalid mode");
125
+ }
126
+ return mode;
127
+ }
128
+
129
+ static VALUE
130
+ aligner_set_mode(VALUE self, VALUE mode)
131
+ {
132
+ EdlibAlignConfig *config = get_config(self);
133
+ return set_mode(config, mode);
134
+ }
135
+
136
+ static VALUE
137
+ get_task(EdlibAlignConfig *config)
138
+ {
139
+ switch (config->task)
140
+ {
141
+ case 0:
142
+ return rb_str_new2("DISTANCE");
143
+ case 1:
144
+ return rb_str_new2("LOC");
145
+ case 2:
146
+ return rb_str_new2("PATH");
147
+ default:
148
+ return Qnil;
149
+ }
150
+ }
151
+
152
+ static VALUE
153
+ aligner_get_task(VALUE self)
154
+ {
155
+ EdlibAlignConfig *config = get_config(self);
156
+ return get_task(config);
157
+ }
158
+
159
+ static VALUE
160
+ set_task(EdlibAlignConfig *config, VALUE task)
161
+ {
162
+ switch (TYPE(task))
163
+ {
164
+ case T_STRING:
165
+ if (strcmp(RSTRING_PTR(task), "DISTANCE") == 0)
166
+ {
167
+ config->task = 0;
168
+ }
169
+ else if (strcmp(RSTRING_PTR(task), "LOC") == 0)
170
+ {
171
+ config->task = 1;
172
+ }
173
+ else if (strcmp(RSTRING_PTR(task), "PATH") == 0)
174
+ {
175
+ config->task = 2;
176
+ }
177
+ else
178
+ {
179
+ rb_raise(rb_eArgError, "Invalid task");
180
+ }
181
+ break;
182
+ case T_FIXNUM:;
183
+ int t = NUM2INT(task);
184
+ if (t < 0 || t > 2)
185
+ {
186
+ rb_raise(rb_eArgError, "Invalid task");
187
+ }
188
+ config->task = NUM2INT(task);
189
+ break;
190
+ default:
191
+ rb_raise(rb_eArgError, "Invalid task");
192
+ }
193
+ return task;
194
+ }
195
+
196
+ static VALUE
197
+ aligner_set_task(VALUE self, VALUE task)
198
+ {
199
+ EdlibAlignConfig *config = get_config(self);
200
+ return set_task(config, task);
201
+ }
202
+
203
+ static VALUE
204
+ get_additional_equalities(EdlibAlignConfig *config)
205
+ {
206
+ VALUE equalities = rb_ary_new();
207
+
208
+ for (int i = 0; i < config->additionalEqualitiesLength; i++)
209
+ {
210
+ EdlibEqualityPair pair = config->additionalEqualities[i];
211
+ VALUE pair_ary = rb_ary_new();
212
+ rb_ary_push(pair_ary, rb_str_new(&pair.first, 1));
213
+ rb_ary_push(pair_ary, rb_str_new(&pair.second, 1));
214
+ rb_ary_push(equalities, pair_ary);
215
+ }
216
+
217
+ return equalities;
218
+ }
219
+
220
+ static VALUE
221
+ aligner_get_additional_equalities(VALUE self)
222
+ {
223
+ EdlibAlignConfig *config = get_config(self);
224
+ return get_additional_equalities(config);
225
+ }
226
+
227
+ static VALUE
228
+ set_additional_equalities(EdlibAlignConfig *config, VALUE equalities)
229
+ {
230
+ Check_Type(equalities, T_ARRAY);
231
+ EdlibEqualityPair *pairs = (EdlibEqualityPair *)malloc(sizeof(EdlibEqualityPair) * RARRAY_LEN(equalities));
232
+
233
+ for (int i = 0; i < RARRAY_LEN(equalities); i++)
234
+ {
235
+ VALUE pair = rb_ary_entry(equalities, i);
236
+ Check_Type(pair, T_ARRAY);
237
+ if (RARRAY_LEN(pair) != 2)
238
+ {
239
+ rb_raise(rb_eArgError, "Invalid equality pair");
240
+ }
241
+ VALUE s1 = rb_ary_entry(pair, 0);
242
+ VALUE s2 = rb_ary_entry(pair, 1);
243
+ Check_Type(s1, T_STRING);
244
+ Check_Type(s2, T_STRING);
245
+ if (RSTRING_LEN(s1) != 1 || RSTRING_LEN(s2) != 1)
246
+ {
247
+ rb_raise(rb_eArgError, "String length must be 1");
248
+ }
249
+ char c1 = RSTRING_PTR(s1)[0];
250
+ char c2 = RSTRING_PTR(s2)[0];
251
+
252
+ pairs[i].first = c1;
253
+ pairs[i].second = c2;
254
+ }
255
+
256
+ config->additionalEqualities = pairs;
257
+ free(pairs);
258
+
259
+ config->additionalEqualitiesLength = RARRAY_LEN(equalities);
260
+
261
+ return equalities;
262
+ }
263
+
264
+ static VALUE
265
+ aligner_set_additional_equalities(VALUE self, VALUE equalities)
266
+ {
267
+ EdlibAlignConfig *config = get_config(self);
268
+ return set_additional_equalities(config, equalities);
269
+ }
270
+
271
+ static VALUE
272
+ aligner_config_hash(VALUE self)
273
+ {
274
+ EdlibAlignConfig *config = get_config(self);
275
+
276
+ VALUE hash = rb_hash_new();
277
+
278
+ rb_hash_aset(hash, ID2SYM(rb_intern("k")), get_k(config));
279
+ rb_hash_aset(hash, ID2SYM(rb_intern("mode")), get_mode(config));
280
+ rb_hash_aset(hash, ID2SYM(rb_intern("task")), get_task(config));
281
+ rb_hash_aset(hash, ID2SYM(rb_intern("additional_equalities")), get_additional_equalities(config));
282
+
283
+ return hash;
284
+ }
285
+
286
+ static VALUE
287
+ aligner_initialize(VALUE self, VALUE k, VALUE mode, VALUE task, VALUE additional_equalities)
288
+ {
289
+ EdlibAlignConfig *config = get_config(self);
290
+
291
+ config->k = NUM2INT(k);
292
+ set_mode(config, mode);
293
+ set_task(config, task);
294
+ if (additional_equalities != Qnil)
295
+ {
296
+ set_additional_equalities(config, additional_equalities);
297
+ }
298
+ else
299
+ {
300
+ config->additionalEqualities = NULL;
301
+ config->additionalEqualitiesLength = 0;
302
+ }
303
+
304
+ return self;
305
+ }
306
+
307
+ static VALUE
308
+ aligner_align(VALUE self, VALUE query, VALUE target)
309
+ {
310
+ EdlibAlignConfig *config = get_config(self);
311
+ if (!config)
312
+ {
313
+ rb_raise(rb_eRuntimeError, "config is NULL");
314
+ }
315
+ EdlibAlignConfig cfg = edlibNewAlignConfig(
316
+ config->k,
317
+ config->mode,
318
+ config->task,
319
+ config->additionalEqualities,
320
+ config->additionalEqualitiesLength);
321
+
322
+ EdlibAlignResult result = edlibAlign(
323
+ StringValueCStr(query),
324
+ RSTRING_LEN(query),
325
+ StringValueCStr(target),
326
+ RSTRING_LEN(target),
327
+ cfg);
328
+
329
+ if (result.status != 0)
330
+ {
331
+ rb_raise(rb_eRuntimeError, "edlibAlign failed");
332
+ }
333
+
334
+ VALUE edit_distance = INT2NUM(result.editDistance);
335
+ VALUE alphabet_length = INT2NUM(result.alphabetLength);
336
+ VALUE locations = rb_ary_new();
337
+ VALUE alignment = rb_ary_new();
338
+ VALUE cigar;
339
+
340
+ int *el = result.endLocations;
341
+ int *sl = result.startLocations;
342
+ for (int i = 0; i < result.numLocations; i++)
343
+ {
344
+ VALUE ary = rb_ary_new();
345
+ if (sl)
346
+ {
347
+ rb_ary_push(ary, INT2NUM(sl[i]));
348
+ }
349
+ else
350
+ {
351
+ rb_ary_push(ary, Qnil);
352
+ }
353
+ if (el)
354
+ {
355
+ rb_ary_push(ary, INT2NUM(el[i]));
356
+ }
357
+ else
358
+ {
359
+ rb_ary_push(ary, Qnil);
360
+ }
361
+ rb_ary_push(locations, ary);
362
+ }
363
+
364
+ for (int i = 0; i < result.alignmentLength; i++)
365
+ {
366
+ rb_ary_push(alignment, UINT2NUM(result.alignment[i]));
367
+ }
368
+
369
+ char *ccigar = edlibAlignmentToCigar(result.alignment, result.alignmentLength, 1); // EDLIB_CIGAR_EXTENDED
370
+ cigar = rb_str_new2(ccigar);
371
+
372
+ VALUE hash = rb_hash_new();
373
+ rb_hash_aset(hash, ID2SYM(rb_intern("edit_distance")), edit_distance);
374
+ rb_hash_aset(hash, ID2SYM(rb_intern("alphabet_length")), alphabet_length);
375
+ rb_hash_aset(hash, ID2SYM(rb_intern("locations")), locations);
376
+ rb_hash_aset(hash, ID2SYM(rb_intern("alignment")), alignment);
377
+ rb_hash_aset(hash, ID2SYM(rb_intern("cigar")), cigar);
378
+
379
+ edlibFreeAlignResult(result);
380
+
381
+ return hash;
382
+ }
383
+
384
+ void Init_edlib(void)
385
+ {
386
+ mEdlib = rb_define_module("Edlib");
387
+ cAligner = rb_define_class_under(mEdlib, "Aligner", rb_cObject);
388
+ rb_define_alloc_func(cAligner, config_allocate);
389
+ rb_define_private_method(cAligner, "initialize_raw", aligner_initialize, 4);
390
+ rb_define_method(cAligner, "k", aligner_get_k, 0);
391
+ rb_define_method(cAligner, "k=", aligner_set_k, 1);
392
+ rb_define_method(cAligner, "mode", aligner_get_mode, 0);
393
+ rb_define_method(cAligner, "mode=", aligner_set_mode, 1);
394
+ rb_define_method(cAligner, "task", aligner_get_task, 0);
395
+ rb_define_method(cAligner, "task=", aligner_set_task, 1);
396
+ rb_define_method(cAligner, "additional_equalities", aligner_get_additional_equalities, 0);
397
+ rb_define_method(cAligner, "additional_equalities=", aligner_set_additional_equalities, 1);
398
+ rb_define_method(cAligner, "config", aligner_config_hash, 0);
399
+ rb_define_method(cAligner, "align", aligner_align, 2);
400
+ }
@@ -0,0 +1,6 @@
1
+ require "mkmf"
2
+
3
+ dir_config('edlib')
4
+ if have_header('edlib.h') and have_library('edlib')
5
+ create_makefile('edlib/edlib')
6
+ end
@@ -0,0 +1,3 @@
1
+ module Edlib
2
+ VERSION = "0.0.1"
3
+ end
data/lib/edlib.rb ADDED
@@ -0,0 +1,13 @@
1
+ require_relative 'edlib/edlib'
2
+
3
+ module Edlib
4
+ class Aligner
5
+ def initialize(k:-1, mode: 'NW', task: 'DISTANCE', additional_equalities: nil)
6
+ mode = mode.to_s if mode.is_a? Symbol
7
+ task = task.to_s if task.is_a? Symbol
8
+ mode = mode.upcase if mode.is_a? String
9
+ task = task.upcase if task.is_a? String
10
+ initialize_raw(k, mode, task, additional_equalities)
11
+ end
12
+ end
13
+ end
metadata ADDED
@@ -0,0 +1,50 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: edlib
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - kojix2
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2022-10-30 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: 'Lightweight, super fast C/C++ library for sequence alignment using edit
14
+ (Levenshtein) distance. '
15
+ email:
16
+ - 2xijok@gmail.com
17
+ executables: []
18
+ extensions:
19
+ - ext/edlib/extconf.rb
20
+ extra_rdoc_files: []
21
+ files:
22
+ - README.md
23
+ - ext/edlib/edlib.c
24
+ - ext/edlib/extconf.rb
25
+ - lib/edlib.rb
26
+ - lib/edlib/version.rb
27
+ homepage: https://github.com/kojix2/ruby-edlib
28
+ licenses:
29
+ - MIT
30
+ metadata: {}
31
+ post_install_message:
32
+ rdoc_options: []
33
+ require_paths:
34
+ - lib
35
+ required_ruby_version: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - ">="
38
+ - !ruby/object:Gem::Version
39
+ version: 2.7.0
40
+ required_rubygems_version: !ruby/object:Gem::Requirement
41
+ requirements:
42
+ - - ">="
43
+ - !ruby/object:Gem::Version
44
+ version: '0'
45
+ requirements: []
46
+ rubygems_version: 3.3.7
47
+ signing_key:
48
+ specification_version: 4
49
+ summary: ruby-edlib is a wrapper for edlib.
50
+ test_files: []