edlib 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: ad1a2f931b436577baac1923b324837f50aa133efbe48843f0956711b6e3fe4c
4
+ data.tar.gz: 62c9924efcf428902528e6ce29e1be6d7d874bd5db406b29bdeee00b3491e800
5
+ SHA512:
6
+ metadata.gz: ca119f2d471a21d8cb8da7820d2a1723396040d8a62c18c4cbcdb9b7c8879ed7a8792d57193e110a122c3b9d2d5b883aeab869cdc7d465e4aeec687c23896c5d
7
+ data.tar.gz: 596c89f8fe1112bb876edfb89849d6f634b4aa1d84ea3f47a6a24dbdb78070f86f063a0d38ef5aaccc4e4ea66edfd5d9c699ccb778dcf0f8812794018be944e7
data/README.md ADDED
@@ -0,0 +1,39 @@
1
+ # ruby-edlib
2
+
3
+ [Edlib](https://github.com/Martinsos/edlib) - A lightweight and super fast C/C++ library for sequence alignment using edit distance
4
+
5
+ ## Installation
6
+
7
+ ```
8
+ sudo apt install edlib-dev
9
+ gem install edlib
10
+ ```
11
+
12
+ ## API
13
+
14
+ ```ruby
15
+ require "edlib"
16
+
17
+ a = Edlib::Aligner.new(mode: :hw, task: :path)
18
+ a.align("AACG", "TCAACCTG")
19
+ # => {:edit_distance=>1, :alphabet_length=>4, :locations=>[[2, 4], [2, 5]], :alignment=>[0, 0, 0, 1], :cigar=>"3=1I"}
20
+ ```
21
+
22
+ |keyword argument |description|
23
+ |---------------------|-----------------------------------------------------------------------------|
24
+ |k |edit distance is not larger than k [-1] |
25
+ |mode |global (NW) , prefix (SHW) , infix (HW) ["NW"] |
26
+ |task |DISTANCE, LOC, PATH ["DISTANCE"] |
27
+ |additional_equalities|List of pairs of characters, where each pair defines two characters as equal. [NULL]|
28
+
29
+ ## Development
30
+
31
+ * Pull requests welcome
32
+
33
+ ```sh
34
+ git clone https://github.com/kojix2/ruby-edlib # Please fork repo
35
+ cd ruby-edlib
36
+ bundle install
37
+ bundle exec rake compile
38
+ bundle exec rake test
39
+ ```
data/ext/edlib/edlib.c ADDED
@@ -0,0 +1,400 @@
1
+ #include "ruby.h"
2
+ #include "edlib.h"
3
+
4
+ VALUE mEdlib;
5
+ VALUE cAligner;
6
+
7
+ static size_t config_memsize(const void *ptr);
8
+
9
+ static const rb_data_type_t config_type = {
10
+ .wrap_struct_name = "EdlibAlignConfig",
11
+ .function = {
12
+ .dfree = RUBY_DEFAULT_FREE,
13
+ .dsize = config_memsize,
14
+ },
15
+ .flags = RUBY_TYPED_FREE_IMMEDIATELY,
16
+ };
17
+
18
+ static VALUE
19
+ config_allocate(VALUE klass)
20
+ {
21
+ EdlibAlignConfig *config;
22
+ VALUE obj = TypedData_Make_Struct(klass, EdlibAlignConfig,
23
+ &config_type, config);
24
+ return obj;
25
+ }
26
+
27
+ static size_t
28
+ config_memsize(const void *ptr)
29
+ {
30
+ const EdlibAlignConfig *config = ptr;
31
+ return sizeof(ptr) + 2 * sizeof(char) * (config->additionalEqualitiesLength);
32
+ }
33
+
34
+ static EdlibAlignConfig *get_config(VALUE self)
35
+ {
36
+ EdlibAlignConfig *ptr = NULL;
37
+ TypedData_Get_Struct(self, EdlibAlignConfig, &config_type, ptr);
38
+
39
+ return ptr;
40
+ }
41
+
42
+ static VALUE
43
+ get_k(EdlibAlignConfig *config)
44
+ {
45
+ return INT2NUM(config->k);
46
+ }
47
+
48
+ static VALUE
49
+ aligner_get_k(VALUE self)
50
+ {
51
+ EdlibAlignConfig *config = get_config(self);
52
+ return get_k(config);
53
+ }
54
+
55
+ static VALUE
56
+ set_k(EdlibAlignConfig *config, VALUE k)
57
+ {
58
+ config->k = NUM2INT(k);
59
+ return k;
60
+ }
61
+
62
+ static VALUE
63
+ aligner_set_k(VALUE self, VALUE k)
64
+ {
65
+ EdlibAlignConfig *config = get_config(self);
66
+ return set_k(config, k);
67
+ }
68
+
69
+ static VALUE
70
+ get_mode(EdlibAlignConfig *config)
71
+ {
72
+ switch (config->mode)
73
+ {
74
+ case 0:
75
+ return rb_str_new2("NW");
76
+ case 1:
77
+ return rb_str_new2("SHW");
78
+ case 2:
79
+ return rb_str_new2("HW");
80
+ default:
81
+ return Qnil;
82
+ }
83
+ }
84
+
85
+ static VALUE
86
+ aligner_get_mode(VALUE self)
87
+ {
88
+ EdlibAlignConfig *config = get_config(self);
89
+ return get_mode(config);
90
+ }
91
+
92
+ static VALUE
93
+ set_mode(EdlibAlignConfig *config, VALUE mode)
94
+ {
95
+ switch (TYPE(mode))
96
+ {
97
+ case T_STRING:
98
+ if (strcmp(RSTRING_PTR(mode), "NW") == 0)
99
+ {
100
+ config->mode = 0;
101
+ }
102
+ else if (strcmp(RSTRING_PTR(mode), "SHW") == 0)
103
+ {
104
+ config->mode = 1;
105
+ }
106
+ else if (strcmp(RSTRING_PTR(mode), "HW") == 0)
107
+ {
108
+ config->mode = 2;
109
+ }
110
+ else
111
+ {
112
+ rb_raise(rb_eArgError, "Invalid mode");
113
+ }
114
+ break;
115
+ case T_FIXNUM:;
116
+ int m = NUM2INT(mode);
117
+ if (m < 0 || m > 2)
118
+ {
119
+ rb_raise(rb_eArgError, "Invalid mode");
120
+ }
121
+ config->mode = NUM2INT(mode);
122
+ break;
123
+ default:
124
+ rb_raise(rb_eArgError, "Invalid mode");
125
+ }
126
+ return mode;
127
+ }
128
+
129
+ static VALUE
130
+ aligner_set_mode(VALUE self, VALUE mode)
131
+ {
132
+ EdlibAlignConfig *config = get_config(self);
133
+ return set_mode(config, mode);
134
+ }
135
+
136
+ static VALUE
137
+ get_task(EdlibAlignConfig *config)
138
+ {
139
+ switch (config->task)
140
+ {
141
+ case 0:
142
+ return rb_str_new2("DISTANCE");
143
+ case 1:
144
+ return rb_str_new2("LOC");
145
+ case 2:
146
+ return rb_str_new2("PATH");
147
+ default:
148
+ return Qnil;
149
+ }
150
+ }
151
+
152
+ static VALUE
153
+ aligner_get_task(VALUE self)
154
+ {
155
+ EdlibAlignConfig *config = get_config(self);
156
+ return get_task(config);
157
+ }
158
+
159
+ static VALUE
160
+ set_task(EdlibAlignConfig *config, VALUE task)
161
+ {
162
+ switch (TYPE(task))
163
+ {
164
+ case T_STRING:
165
+ if (strcmp(RSTRING_PTR(task), "DISTANCE") == 0)
166
+ {
167
+ config->task = 0;
168
+ }
169
+ else if (strcmp(RSTRING_PTR(task), "LOC") == 0)
170
+ {
171
+ config->task = 1;
172
+ }
173
+ else if (strcmp(RSTRING_PTR(task), "PATH") == 0)
174
+ {
175
+ config->task = 2;
176
+ }
177
+ else
178
+ {
179
+ rb_raise(rb_eArgError, "Invalid task");
180
+ }
181
+ break;
182
+ case T_FIXNUM:;
183
+ int t = NUM2INT(task);
184
+ if (t < 0 || t > 2)
185
+ {
186
+ rb_raise(rb_eArgError, "Invalid task");
187
+ }
188
+ config->task = NUM2INT(task);
189
+ break;
190
+ default:
191
+ rb_raise(rb_eArgError, "Invalid task");
192
+ }
193
+ return task;
194
+ }
195
+
196
+ static VALUE
197
+ aligner_set_task(VALUE self, VALUE task)
198
+ {
199
+ EdlibAlignConfig *config = get_config(self);
200
+ return set_task(config, task);
201
+ }
202
+
203
+ static VALUE
204
+ get_additional_equalities(EdlibAlignConfig *config)
205
+ {
206
+ VALUE equalities = rb_ary_new();
207
+
208
+ for (int i = 0; i < config->additionalEqualitiesLength; i++)
209
+ {
210
+ EdlibEqualityPair pair = config->additionalEqualities[i];
211
+ VALUE pair_ary = rb_ary_new();
212
+ rb_ary_push(pair_ary, rb_str_new(&pair.first, 1));
213
+ rb_ary_push(pair_ary, rb_str_new(&pair.second, 1));
214
+ rb_ary_push(equalities, pair_ary);
215
+ }
216
+
217
+ return equalities;
218
+ }
219
+
220
+ static VALUE
221
+ aligner_get_additional_equalities(VALUE self)
222
+ {
223
+ EdlibAlignConfig *config = get_config(self);
224
+ return get_additional_equalities(config);
225
+ }
226
+
227
+ static VALUE
228
+ set_additional_equalities(EdlibAlignConfig *config, VALUE equalities)
229
+ {
230
+ Check_Type(equalities, T_ARRAY);
231
+ EdlibEqualityPair *pairs = (EdlibEqualityPair *)malloc(sizeof(EdlibEqualityPair) * RARRAY_LEN(equalities));
232
+
233
+ for (int i = 0; i < RARRAY_LEN(equalities); i++)
234
+ {
235
+ VALUE pair = rb_ary_entry(equalities, i);
236
+ Check_Type(pair, T_ARRAY);
237
+ if (RARRAY_LEN(pair) != 2)
238
+ {
239
+ rb_raise(rb_eArgError, "Invalid equality pair");
240
+ }
241
+ VALUE s1 = rb_ary_entry(pair, 0);
242
+ VALUE s2 = rb_ary_entry(pair, 1);
243
+ Check_Type(s1, T_STRING);
244
+ Check_Type(s2, T_STRING);
245
+ if (RSTRING_LEN(s1) != 1 || RSTRING_LEN(s2) != 1)
246
+ {
247
+ rb_raise(rb_eArgError, "String length must be 1");
248
+ }
249
+ char c1 = RSTRING_PTR(s1)[0];
250
+ char c2 = RSTRING_PTR(s2)[0];
251
+
252
+ pairs[i].first = c1;
253
+ pairs[i].second = c2;
254
+ }
255
+
256
+ config->additionalEqualities = pairs;
257
+ free(pairs);
258
+
259
+ config->additionalEqualitiesLength = RARRAY_LEN(equalities);
260
+
261
+ return equalities;
262
+ }
263
+
264
+ static VALUE
265
+ aligner_set_additional_equalities(VALUE self, VALUE equalities)
266
+ {
267
+ EdlibAlignConfig *config = get_config(self);
268
+ return set_additional_equalities(config, equalities);
269
+ }
270
+
271
+ static VALUE
272
+ aligner_config_hash(VALUE self)
273
+ {
274
+ EdlibAlignConfig *config = get_config(self);
275
+
276
+ VALUE hash = rb_hash_new();
277
+
278
+ rb_hash_aset(hash, ID2SYM(rb_intern("k")), get_k(config));
279
+ rb_hash_aset(hash, ID2SYM(rb_intern("mode")), get_mode(config));
280
+ rb_hash_aset(hash, ID2SYM(rb_intern("task")), get_task(config));
281
+ rb_hash_aset(hash, ID2SYM(rb_intern("additional_equalities")), get_additional_equalities(config));
282
+
283
+ return hash;
284
+ }
285
+
286
+ static VALUE
287
+ aligner_initialize(VALUE self, VALUE k, VALUE mode, VALUE task, VALUE additional_equalities)
288
+ {
289
+ EdlibAlignConfig *config = get_config(self);
290
+
291
+ config->k = NUM2INT(k);
292
+ set_mode(config, mode);
293
+ set_task(config, task);
294
+ if (additional_equalities != Qnil)
295
+ {
296
+ set_additional_equalities(config, additional_equalities);
297
+ }
298
+ else
299
+ {
300
+ config->additionalEqualities = NULL;
301
+ config->additionalEqualitiesLength = 0;
302
+ }
303
+
304
+ return self;
305
+ }
306
+
307
+ static VALUE
308
+ aligner_align(VALUE self, VALUE query, VALUE target)
309
+ {
310
+ EdlibAlignConfig *config = get_config(self);
311
+ if (!config)
312
+ {
313
+ rb_raise(rb_eRuntimeError, "config is NULL");
314
+ }
315
+ EdlibAlignConfig cfg = edlibNewAlignConfig(
316
+ config->k,
317
+ config->mode,
318
+ config->task,
319
+ config->additionalEqualities,
320
+ config->additionalEqualitiesLength);
321
+
322
+ EdlibAlignResult result = edlibAlign(
323
+ StringValueCStr(query),
324
+ RSTRING_LEN(query),
325
+ StringValueCStr(target),
326
+ RSTRING_LEN(target),
327
+ cfg);
328
+
329
+ if (result.status != 0)
330
+ {
331
+ rb_raise(rb_eRuntimeError, "edlibAlign failed");
332
+ }
333
+
334
+ VALUE edit_distance = INT2NUM(result.editDistance);
335
+ VALUE alphabet_length = INT2NUM(result.alphabetLength);
336
+ VALUE locations = rb_ary_new();
337
+ VALUE alignment = rb_ary_new();
338
+ VALUE cigar;
339
+
340
+ int *el = result.endLocations;
341
+ int *sl = result.startLocations;
342
+ for (int i = 0; i < result.numLocations; i++)
343
+ {
344
+ VALUE ary = rb_ary_new();
345
+ if (sl)
346
+ {
347
+ rb_ary_push(ary, INT2NUM(sl[i]));
348
+ }
349
+ else
350
+ {
351
+ rb_ary_push(ary, Qnil);
352
+ }
353
+ if (el)
354
+ {
355
+ rb_ary_push(ary, INT2NUM(el[i]));
356
+ }
357
+ else
358
+ {
359
+ rb_ary_push(ary, Qnil);
360
+ }
361
+ rb_ary_push(locations, ary);
362
+ }
363
+
364
+ for (int i = 0; i < result.alignmentLength; i++)
365
+ {
366
+ rb_ary_push(alignment, UINT2NUM(result.alignment[i]));
367
+ }
368
+
369
+ char *ccigar = edlibAlignmentToCigar(result.alignment, result.alignmentLength, 1); // EDLIB_CIGAR_EXTENDED
370
+ cigar = rb_str_new2(ccigar);
371
+
372
+ VALUE hash = rb_hash_new();
373
+ rb_hash_aset(hash, ID2SYM(rb_intern("edit_distance")), edit_distance);
374
+ rb_hash_aset(hash, ID2SYM(rb_intern("alphabet_length")), alphabet_length);
375
+ rb_hash_aset(hash, ID2SYM(rb_intern("locations")), locations);
376
+ rb_hash_aset(hash, ID2SYM(rb_intern("alignment")), alignment);
377
+ rb_hash_aset(hash, ID2SYM(rb_intern("cigar")), cigar);
378
+
379
+ edlibFreeAlignResult(result);
380
+
381
+ return hash;
382
+ }
383
+
384
+ void Init_edlib(void)
385
+ {
386
+ mEdlib = rb_define_module("Edlib");
387
+ cAligner = rb_define_class_under(mEdlib, "Aligner", rb_cObject);
388
+ rb_define_alloc_func(cAligner, config_allocate);
389
+ rb_define_private_method(cAligner, "initialize_raw", aligner_initialize, 4);
390
+ rb_define_method(cAligner, "k", aligner_get_k, 0);
391
+ rb_define_method(cAligner, "k=", aligner_set_k, 1);
392
+ rb_define_method(cAligner, "mode", aligner_get_mode, 0);
393
+ rb_define_method(cAligner, "mode=", aligner_set_mode, 1);
394
+ rb_define_method(cAligner, "task", aligner_get_task, 0);
395
+ rb_define_method(cAligner, "task=", aligner_set_task, 1);
396
+ rb_define_method(cAligner, "additional_equalities", aligner_get_additional_equalities, 0);
397
+ rb_define_method(cAligner, "additional_equalities=", aligner_set_additional_equalities, 1);
398
+ rb_define_method(cAligner, "config", aligner_config_hash, 0);
399
+ rb_define_method(cAligner, "align", aligner_align, 2);
400
+ }
@@ -0,0 +1,6 @@
1
+ require "mkmf"
2
+
3
+ dir_config('edlib')
4
+ if have_header('edlib.h') and have_library('edlib')
5
+ create_makefile('edlib/edlib')
6
+ end
@@ -0,0 +1,3 @@
1
+ module Edlib
2
+ VERSION = "0.0.1"
3
+ end
data/lib/edlib.rb ADDED
@@ -0,0 +1,13 @@
1
+ require_relative 'edlib/edlib'
2
+
3
+ module Edlib
4
+ class Aligner
5
+ def initialize(k:-1, mode: 'NW', task: 'DISTANCE', additional_equalities: nil)
6
+ mode = mode.to_s if mode.is_a? Symbol
7
+ task = task.to_s if task.is_a? Symbol
8
+ mode = mode.upcase if mode.is_a? String
9
+ task = task.upcase if task.is_a? String
10
+ initialize_raw(k, mode, task, additional_equalities)
11
+ end
12
+ end
13
+ end
metadata ADDED
@@ -0,0 +1,50 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: edlib
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - kojix2
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2022-10-30 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: 'Lightweight, super fast C/C++ library for sequence alignment using edit
14
+ (Levenshtein) distance. '
15
+ email:
16
+ - 2xijok@gmail.com
17
+ executables: []
18
+ extensions:
19
+ - ext/edlib/extconf.rb
20
+ extra_rdoc_files: []
21
+ files:
22
+ - README.md
23
+ - ext/edlib/edlib.c
24
+ - ext/edlib/extconf.rb
25
+ - lib/edlib.rb
26
+ - lib/edlib/version.rb
27
+ homepage: https://github.com/kojix2/ruby-edlib
28
+ licenses:
29
+ - MIT
30
+ metadata: {}
31
+ post_install_message:
32
+ rdoc_options: []
33
+ require_paths:
34
+ - lib
35
+ required_ruby_version: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - ">="
38
+ - !ruby/object:Gem::Version
39
+ version: 2.7.0
40
+ required_rubygems_version: !ruby/object:Gem::Requirement
41
+ requirements:
42
+ - - ">="
43
+ - !ruby/object:Gem::Version
44
+ version: '0'
45
+ requirements: []
46
+ rubygems_version: 3.3.7
47
+ signing_key:
48
+ specification_version: 4
49
+ summary: ruby-edlib is a wrapper for edlib.
50
+ test_files: []