edlib 0.0.5 → 0.0.7

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 5c9cf154ed1668d991a4476b501981a245a9b758950699fcba36062f73d312dd
4
- data.tar.gz: 716d869b8fc5dc870b9bbcdf3bfb2bceb99567b4e22197214cd99a948e850b86
3
+ metadata.gz: 72ca7381d858e17e8ba5e2641d072cb4f391410b97ef9138504efa6851020373
4
+ data.tar.gz: 180e15f4a09149af7031536817a635be580d55afd25896341cb44cf54a9902be
5
5
  SHA512:
6
- metadata.gz: b32f9755d963c17fba0c1b76364f9969f3a84fdb5c5e123020b02733699b313ff4d46e6203956640ad795c5686b0d00ff914645cd68a711add89e9d8250430eb
7
- data.tar.gz: a776f6b8c67c9f9a7f16440cd8630688f2bba4bca8822e2b76b32f4b6f2095cf31ce259dbc92b6ea923503403c135becd85a8e7e953caa52a4c51df16e2bcbfa
6
+ metadata.gz: 5f53003d5e12d9d55b40201f435a278b4a99d6e01569cf0f5e96a263822058257d3dd1b65c7c0f48d39a92177c271131d7752bf982c68893316e5dccbdeefc9f
7
+ data.tar.gz: 27e1ec9237c5f86fe8775100dbda7e2b9851071a9043cace94b927a65e372afa0b7cbbca741d51266b1f2a0493917eeaf1b4b60da92600def529b2e4ac724edb
data/README.md CHANGED
@@ -2,6 +2,7 @@
2
2
 
3
3
  [![Gem Version](https://badge.fury.io/rb/edlib.svg)](https://badge.fury.io/rb/edlib)
4
4
  [![test](https://github.com/kojix2/ruby-edlib/actions/workflows/ci.yml/badge.svg)](https://github.com/kojix2/ruby-edlib/actions/workflows/ci.yml)
5
+ [![DOI](https://zenodo.org/badge/559318048.svg)](https://zenodo.org/badge/latestdoi/559318048)
5
6
 
6
7
  [Edlib](https://github.com/Martinsos/edlib) - A lightweight and super fast C/C++ library for sequence alignment using edit distance
7
8
 
data/ext/edlib/edlibext.c CHANGED
@@ -1,70 +1,98 @@
1
1
  #include "ruby.h"
2
- #include "edlib.h"
2
+ #include "edlibext.h"
3
+
4
+ #define ALIGNER_GET_(name) \
5
+ static VALUE \
6
+ aligner_get_##name(VALUE self) \
7
+ { \
8
+ EdlibAlignConfig *config = aligner_get_config(self); \
9
+ return get_##name(config); \
10
+ }
11
+
12
+ #define ALIGNER_SET_(name) \
13
+ static VALUE \
14
+ aligner_set_##name(VALUE self, VALUE value) \
15
+ { \
16
+ EdlibAlignConfig *config = aligner_get_config(self); \
17
+ return set_##name(config, value); \
18
+ }
3
19
 
4
20
  VALUE mEdlib;
5
21
  VALUE cAligner;
6
- EdlibEqualityPair *eqpairs;
7
22
 
8
- static size_t config_memsize(const void *ptr);
9
- static void config_free(void *ptr);
23
+ // Aligner class
24
+
25
+ static size_t aligner_config_memsize(const void *ptr);
26
+ static void aligner_config_free(void *ptr);
10
27
 
11
28
  static const rb_data_type_t config_type = {
12
- .wrap_struct_name = "EdlibAlignConfig",
29
+ .wrap_struct_name = "RbAlignConfig",
13
30
  .function = {
14
- .dfree = config_free,
15
- .dsize = config_memsize,
31
+ .dfree = aligner_config_free,
32
+ .dsize = aligner_config_memsize,
16
33
  },
17
34
  .flags = RUBY_TYPED_FREE_IMMEDIATELY,
18
35
  };
19
36
 
20
37
  static VALUE
21
- config_allocate(VALUE klass)
38
+ aligner_config_allocate(VALUE klass)
22
39
  {
23
- EdlibAlignConfig *config;
24
- VALUE obj = TypedData_Make_Struct(klass, EdlibAlignConfig,
25
- &config_type, config);
40
+ RbAlignConfig *aligner_config;
41
+
42
+ VALUE obj = TypedData_Make_Struct(klass, RbAlignConfig, &config_type, aligner_config);
43
+ aligner_config->config = (EdlibAlignConfig *)malloc(sizeof(EdlibAlignConfig));
44
+ aligner_config->equalityPairs = NULL;
26
45
  return obj;
27
46
  }
28
47
 
29
48
  static void
30
- config_free(void *ptr)
49
+ aligner_config_free(void *ptr)
31
50
  {
32
- if (eqpairs != NULL)
51
+ RbAlignConfig *aligner_config = ptr;
52
+ if (aligner_config->config != NULL)
33
53
  {
34
- free(eqpairs);
35
- eqpairs = NULL;
54
+ free(aligner_config->config);
55
+ }
56
+ if (aligner_config->equalityPairs != NULL)
57
+ {
58
+ free(aligner_config->equalityPairs);
36
59
  }
37
- xfree(ptr);
60
+
61
+ free(ptr);
38
62
  }
39
63
 
40
64
  static size_t
41
- config_memsize(const void *ptr)
65
+ aligner_config_memsize(const void *ptr)
42
66
  {
43
- const EdlibAlignConfig *config = ptr;
44
- return sizeof(ptr) + 2 * sizeof(char) * (config->additionalEqualitiesLength);
67
+ const RbAlignConfig *aligner_config = ptr;
68
+ return sizeof(ptr) + sizeof(aligner_config->config) + 2 * sizeof(char) * aligner_config->config->additionalEqualitiesLength;
45
69
  }
46
70
 
47
71
  static EdlibAlignConfig *
48
- get_config(VALUE self)
72
+ aligner_get_config(VALUE self)
49
73
  {
50
- EdlibAlignConfig *ptr = NULL;
51
- TypedData_Get_Struct(self, EdlibAlignConfig, &config_type, ptr);
74
+ RbAlignConfig *aligner_config = NULL;
75
+ TypedData_Get_Struct(self, RbAlignConfig, &config_type, aligner_config);
76
+ return aligner_config->config;
77
+ }
52
78
 
53
- return ptr;
79
+ static EdlibEqualityPair *
80
+ aligner_get_equalityPairs(VALUE self)
81
+ {
82
+ RbAlignConfig *aligner_config = NULL;
83
+ TypedData_Get_Struct(self, RbAlignConfig, &config_type, aligner_config);
84
+ return aligner_config->equalityPairs;
54
85
  }
55
86
 
87
+ // Config
88
+
56
89
  static VALUE
57
90
  get_k(EdlibAlignConfig *config)
58
91
  {
59
92
  return INT2NUM(config->k);
60
93
  }
61
94
 
62
- static VALUE
63
- aligner_get_k(VALUE self)
64
- {
65
- EdlibAlignConfig *config = get_config(self);
66
- return get_k(config);
67
- }
95
+ ALIGNER_GET_(k)
68
96
 
69
97
  static VALUE
70
98
  set_k(EdlibAlignConfig *config, VALUE k)
@@ -73,35 +101,25 @@ set_k(EdlibAlignConfig *config, VALUE k)
73
101
  return k;
74
102
  }
75
103
 
76
- static VALUE
77
- aligner_set_k(VALUE self, VALUE k)
78
- {
79
- EdlibAlignConfig *config = get_config(self);
80
- return set_k(config, k);
81
- }
104
+ ALIGNER_SET_(k)
82
105
 
83
106
  static VALUE
84
107
  get_mode(EdlibAlignConfig *config)
85
108
  {
86
109
  switch (config->mode)
87
110
  {
88
- case 0:
111
+ case EDLIB_MODE_NW:
89
112
  return rb_str_new2("NW");
90
- case 1:
113
+ case EDLIB_MODE_SHW:
91
114
  return rb_str_new2("SHW");
92
- case 2:
115
+ case EDLIB_MODE_HW:
93
116
  return rb_str_new2("HW");
94
117
  default:
95
118
  return Qnil;
96
119
  }
97
120
  }
98
121
 
99
- static VALUE
100
- aligner_get_mode(VALUE self)
101
- {
102
- EdlibAlignConfig *config = get_config(self);
103
- return get_mode(config);
104
- }
122
+ ALIGNER_GET_(mode)
105
123
 
106
124
  static VALUE
107
125
  set_mode(EdlibAlignConfig *config, VALUE mode)
@@ -113,18 +131,19 @@ set_mode(EdlibAlignConfig *config, VALUE mode)
113
131
  switch (TYPE(mode))
114
132
  {
115
133
  case T_STRING:;
116
- rb_funcall(mode, rb_intern("upcase!"), 0);
117
- if (strcmp(RSTRING_PTR(mode), "NW") == 0)
134
+ VALUE mode_str = rb_funcall(mode, rb_intern("upcase"), 0);
135
+ char *mode_s = RSTRING_PTR(mode_str);
136
+ if (strcmp(mode_s, "NW") == 0)
118
137
  {
119
- config->mode = 0;
138
+ config->mode = EDLIB_MODE_NW;
120
139
  }
121
- else if (strcmp(RSTRING_PTR(mode), "SHW") == 0)
140
+ else if (strcmp(mode_s, "SHW") == 0)
122
141
  {
123
- config->mode = 1;
142
+ config->mode = EDLIB_MODE_SHW;
124
143
  }
125
- else if (strcmp(RSTRING_PTR(mode), "HW") == 0)
144
+ else if (strcmp(mode_s, "HW") == 0)
126
145
  {
127
- config->mode = 2;
146
+ config->mode = EDLIB_MODE_HW;
128
147
  }
129
148
  else
130
149
  {
@@ -145,35 +164,25 @@ set_mode(EdlibAlignConfig *config, VALUE mode)
145
164
  return mode;
146
165
  }
147
166
 
148
- static VALUE
149
- aligner_set_mode(VALUE self, VALUE mode)
150
- {
151
- EdlibAlignConfig *config = get_config(self);
152
- return set_mode(config, mode);
153
- }
167
+ ALIGNER_SET_(mode)
154
168
 
155
169
  static VALUE
156
170
  get_task(EdlibAlignConfig *config)
157
171
  {
158
172
  switch (config->task)
159
173
  {
160
- case 0:
174
+ case EDLIB_TASK_DISTANCE:
161
175
  return rb_str_new2("DISTANCE");
162
- case 1:
176
+ case EDLIB_TASK_LOC:
163
177
  return rb_str_new2("LOC");
164
- case 2:
178
+ case EDLIB_TASK_PATH:
165
179
  return rb_str_new2("PATH");
166
180
  default:
167
181
  return Qnil;
168
182
  }
169
183
  }
170
184
 
171
- static VALUE
172
- aligner_get_task(VALUE self)
173
- {
174
- EdlibAlignConfig *config = get_config(self);
175
- return get_task(config);
176
- }
185
+ ALIGNER_GET_(task)
177
186
 
178
187
  static VALUE
179
188
  set_task(EdlibAlignConfig *config, VALUE task)
@@ -185,18 +194,19 @@ set_task(EdlibAlignConfig *config, VALUE task)
185
194
  switch (TYPE(task))
186
195
  {
187
196
  case T_STRING:;
188
- rb_funcall(task, rb_intern("upcase!"), 0);
189
- if (strcmp(RSTRING_PTR(task), "DISTANCE") == 0)
197
+ VALUE task_str = rb_funcall(task, rb_intern("upcase"), 0);
198
+ char *task_s = RSTRING_PTR(task_str);
199
+ if (strcmp(task_s, "DISTANCE") == 0)
190
200
  {
191
- config->task = 0;
201
+ config->task = EDLIB_TASK_DISTANCE;
192
202
  }
193
- else if (strcmp(RSTRING_PTR(task), "LOC") == 0)
203
+ else if (strcmp(task_s, "LOC") == 0)
194
204
  {
195
- config->task = 1;
205
+ config->task = EDLIB_TASK_LOC;
196
206
  }
197
- else if (strcmp(RSTRING_PTR(task), "PATH") == 0)
207
+ else if (strcmp(task_s, "PATH") == 0)
198
208
  {
199
- config->task = 2;
209
+ config->task = EDLIB_TASK_PATH;
200
210
  }
201
211
  else
202
212
  {
@@ -217,12 +227,7 @@ set_task(EdlibAlignConfig *config, VALUE task)
217
227
  return task;
218
228
  }
219
229
 
220
- static VALUE
221
- aligner_set_task(VALUE self, VALUE task)
222
- {
223
- EdlibAlignConfig *config = get_config(self);
224
- return set_task(config, task);
225
- }
230
+ ALIGNER_SET_(task)
226
231
 
227
232
  static VALUE
228
233
  get_additional_equalities(EdlibAlignConfig *config)
@@ -241,21 +246,17 @@ get_additional_equalities(EdlibAlignConfig *config)
241
246
  return equalities;
242
247
  }
243
248
 
244
- static VALUE
245
- aligner_get_additional_equalities(VALUE self)
246
- {
247
- EdlibAlignConfig *config = get_config(self);
248
- return get_additional_equalities(config);
249
- }
249
+ ALIGNER_GET_(additional_equalities)
250
250
 
251
251
  static VALUE
252
- set_additional_equalities(EdlibAlignConfig *config, VALUE equalities)
252
+ set_additional_equalities(EdlibAlignConfig *config, EdlibEqualityPair *eqpairs, VALUE equalities)
253
253
  {
254
254
  Check_Type(equalities, T_ARRAY);
255
255
  int len = RARRAY_LEN(equalities);
256
256
  if (len == 0)
257
257
  {
258
- if(eqpairs != NULL) {
258
+ if (eqpairs != NULL)
259
+ {
259
260
  free(eqpairs);
260
261
  eqpairs = NULL;
261
262
  }
@@ -312,14 +313,15 @@ set_additional_equalities(EdlibAlignConfig *config, VALUE equalities)
312
313
  static VALUE
313
314
  aligner_set_additional_equalities(VALUE self, VALUE equalities)
314
315
  {
315
- EdlibAlignConfig *config = get_config(self);
316
- return set_additional_equalities(config, equalities);
316
+ EdlibAlignConfig *config = aligner_get_config(self);
317
+ EdlibEqualityPair *eqpairs = aligner_get_equalityPairs(self);
318
+ return set_additional_equalities(config, eqpairs, equalities);
317
319
  }
318
320
 
319
321
  static VALUE
320
322
  aligner_config_hash(VALUE self)
321
323
  {
322
- EdlibAlignConfig *config = get_config(self);
324
+ EdlibAlignConfig *config = aligner_get_config(self);
323
325
 
324
326
  VALUE hash = rb_hash_new();
325
327
 
@@ -332,16 +334,17 @@ aligner_config_hash(VALUE self)
332
334
  }
333
335
 
334
336
  static VALUE
335
- aligner_initialize(VALUE self, VALUE k, VALUE mode, VALUE task, VALUE additional_equalities)
337
+ aligner_initialize_raw(VALUE self, VALUE k, VALUE mode, VALUE task, VALUE additional_equalities)
336
338
  {
337
- EdlibAlignConfig *config = get_config(self);
338
-
339
+ EdlibAlignConfig *config = aligner_get_config(self);
340
+ EdlibEqualityPair *eqpairs = aligner_get_equalityPairs(self);
339
341
  config->k = NUM2INT(k);
340
342
  set_mode(config, mode);
341
343
  set_task(config, task);
344
+
342
345
  if (additional_equalities != Qnil)
343
346
  {
344
- set_additional_equalities(config, additional_equalities);
347
+ set_additional_equalities(config, eqpairs, additional_equalities);
345
348
  }
346
349
  else
347
350
  {
@@ -355,7 +358,7 @@ aligner_initialize(VALUE self, VALUE k, VALUE mode, VALUE task, VALUE additional
355
358
  static VALUE
356
359
  aligner_align(VALUE self, VALUE query, VALUE target)
357
360
  {
358
- EdlibAlignConfig *config = get_config(self);
361
+ EdlibAlignConfig *config = aligner_get_config(self);
359
362
  if (!config)
360
363
  {
361
364
  rb_raise(rb_eRuntimeError, "config is NULL");
@@ -427,8 +430,8 @@ void Init_edlibext(void)
427
430
  {
428
431
  mEdlib = rb_define_module("Edlib");
429
432
  cAligner = rb_define_class_under(mEdlib, "Aligner", rb_cObject);
430
- rb_define_alloc_func(cAligner, config_allocate);
431
- rb_define_private_method(cAligner, "initialize_raw", aligner_initialize, 4);
433
+ rb_define_alloc_func(cAligner, aligner_config_allocate);
434
+ rb_define_private_method(cAligner, "initialize_raw", aligner_initialize_raw, 4);
432
435
  rb_define_method(cAligner, "k", aligner_get_k, 0);
433
436
  rb_define_method(cAligner, "k=", aligner_set_k, 1);
434
437
  rb_define_method(cAligner, "mode", aligner_get_mode, 0);
@@ -438,5 +441,5 @@ void Init_edlibext(void)
438
441
  rb_define_method(cAligner, "additional_equalities", aligner_get_additional_equalities, 0);
439
442
  rb_define_method(cAligner, "additional_equalities=", aligner_set_additional_equalities, 1);
440
443
  rb_define_method(cAligner, "config", aligner_config_hash, 0);
441
- rb_define_method(cAligner, "align", aligner_align, 2);
444
+ rb_define_method(cAligner, "align_raw", aligner_align, 2);
442
445
  }
@@ -0,0 +1,15 @@
1
+ #ifndef EDLIBEXT_H
2
+ #define EDLIBEXT_H
3
+
4
+ #include "edlib.h"
5
+
6
+ typedef struct
7
+ {
8
+ EdlibAlignConfig *config;
9
+ EdlibEqualityPair *equalityPairs;
10
+ } RbAlignConfig;
11
+
12
+ static EdlibAlignConfig *aligner_get_config(VALUE self);
13
+ static EdlibEqualityPair *aligner_get_equalityPairs(VALUE self);
14
+
15
+ #endif // EDLIBEXT_H
data/ext/edlib/extconf.rb CHANGED
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'mkmf'
2
4
 
3
5
  create_makefile('edlib/edlibext')
data/lib/edlib/version.rb CHANGED
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Edlib
2
- VERSION = '0.0.5'
4
+ VERSION = '0.0.7'
3
5
  end
data/lib/edlib.rb CHANGED
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require_relative 'edlib/edlibext'
2
4
 
3
5
  module Edlib
@@ -9,5 +11,68 @@ module Edlib
9
11
  task = task.upcase if task.is_a? String
10
12
  initialize_raw(k, mode, task, additional_equalities)
11
13
  end
14
+
15
+ def align(query, target, nice: false)
16
+ if nice
17
+ align_nice(query, target)
18
+ else
19
+ align_raw(query, target)
20
+ end
21
+ end
22
+
23
+ private
24
+
25
+ def align_nice(query, target)
26
+ result = align(query, target)
27
+ result.merge(nice(result, query, target))
28
+ end
29
+
30
+ def nice(result, query, target, gap_symbol: '-')
31
+ raise 'result does not have :locations and :cigar' unless result.key?(:locations) && result.key?(:cigar)
32
+
33
+ target_pos = result[:locations][0][0]
34
+ query_pos = 0
35
+ query_aln = String.new
36
+ match_aln = String.new
37
+ target_aln = String.new
38
+ cigar = result[:cigar]
39
+ cigar.scan(/(\d+)(\D)/).each do |num, op|
40
+ num = num.to_i
41
+ case op
42
+ when '='
43
+ target_aln << target[target_pos, num]
44
+ target_pos += num
45
+ query_aln << query[query_pos, num]
46
+ query_pos += num
47
+ match_aln << '|' * num
48
+ when 'X'
49
+ target_aln << target[target_pos, num]
50
+ target_pos += num
51
+ query_aln << query[query_pos, num]
52
+ query_pos += num
53
+ match_aln << '.' * num
54
+ when 'D'
55
+ target_aln << target[target_pos, num]
56
+ target_pos += num
57
+ query_aln << gap_symbol * num
58
+ query_pos += 0
59
+ match_aln << gap_symbol * num
60
+ when 'I'
61
+ target_aln << gap_symbol * num
62
+ target_pos += 0
63
+ query_aln << query[query_pos, num]
64
+ query_pos += num
65
+ match_aln << gap_symbol * num
66
+ else
67
+ raise "Unknown CIGAR operation: #{op}"
68
+ end
69
+ end
70
+
71
+ {
72
+ query_aligned: query_aln,
73
+ match_aligned: match_aln,
74
+ target_aligned: target_aln
75
+ }
76
+ end
12
77
  end
13
78
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: edlib
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.5
4
+ version: 0.0.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - kojix2
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-10-31 00:00:00.000000000 Z
11
+ date: 2022-11-07 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: 'Lightweight, super fast C/C++ library for sequence alignment using edit
14
14
  (Levenshtein) distance. '
@@ -23,6 +23,7 @@ files:
23
23
  - ext/edlib/edlib.cpp
24
24
  - ext/edlib/edlib.h
25
25
  - ext/edlib/edlibext.c
26
+ - ext/edlib/edlibext.h
26
27
  - ext/edlib/extconf.rb
27
28
  - lib/edlib.rb
28
29
  - lib/edlib/version.rb