edlib 0.0.5 → 0.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 5c9cf154ed1668d991a4476b501981a245a9b758950699fcba36062f73d312dd
4
- data.tar.gz: 716d869b8fc5dc870b9bbcdf3bfb2bceb99567b4e22197214cd99a948e850b86
3
+ metadata.gz: 72ca7381d858e17e8ba5e2641d072cb4f391410b97ef9138504efa6851020373
4
+ data.tar.gz: 180e15f4a09149af7031536817a635be580d55afd25896341cb44cf54a9902be
5
5
  SHA512:
6
- metadata.gz: b32f9755d963c17fba0c1b76364f9969f3a84fdb5c5e123020b02733699b313ff4d46e6203956640ad795c5686b0d00ff914645cd68a711add89e9d8250430eb
7
- data.tar.gz: a776f6b8c67c9f9a7f16440cd8630688f2bba4bca8822e2b76b32f4b6f2095cf31ce259dbc92b6ea923503403c135becd85a8e7e953caa52a4c51df16e2bcbfa
6
+ metadata.gz: 5f53003d5e12d9d55b40201f435a278b4a99d6e01569cf0f5e96a263822058257d3dd1b65c7c0f48d39a92177c271131d7752bf982c68893316e5dccbdeefc9f
7
+ data.tar.gz: 27e1ec9237c5f86fe8775100dbda7e2b9851071a9043cace94b927a65e372afa0b7cbbca741d51266b1f2a0493917eeaf1b4b60da92600def529b2e4ac724edb
data/README.md CHANGED
@@ -2,6 +2,7 @@
2
2
 
3
3
  [![Gem Version](https://badge.fury.io/rb/edlib.svg)](https://badge.fury.io/rb/edlib)
4
4
  [![test](https://github.com/kojix2/ruby-edlib/actions/workflows/ci.yml/badge.svg)](https://github.com/kojix2/ruby-edlib/actions/workflows/ci.yml)
5
+ [![DOI](https://zenodo.org/badge/559318048.svg)](https://zenodo.org/badge/latestdoi/559318048)
5
6
 
6
7
  [Edlib](https://github.com/Martinsos/edlib) - A lightweight and super fast C/C++ library for sequence alignment using edit distance
7
8
 
data/ext/edlib/edlibext.c CHANGED
@@ -1,70 +1,98 @@
1
1
  #include "ruby.h"
2
- #include "edlib.h"
2
+ #include "edlibext.h"
3
+
4
+ #define ALIGNER_GET_(name) \
5
+ static VALUE \
6
+ aligner_get_##name(VALUE self) \
7
+ { \
8
+ EdlibAlignConfig *config = aligner_get_config(self); \
9
+ return get_##name(config); \
10
+ }
11
+
12
+ #define ALIGNER_SET_(name) \
13
+ static VALUE \
14
+ aligner_set_##name(VALUE self, VALUE value) \
15
+ { \
16
+ EdlibAlignConfig *config = aligner_get_config(self); \
17
+ return set_##name(config, value); \
18
+ }
3
19
 
4
20
  VALUE mEdlib;
5
21
  VALUE cAligner;
6
- EdlibEqualityPair *eqpairs;
7
22
 
8
- static size_t config_memsize(const void *ptr);
9
- static void config_free(void *ptr);
23
+ // Aligner class
24
+
25
+ static size_t aligner_config_memsize(const void *ptr);
26
+ static void aligner_config_free(void *ptr);
10
27
 
11
28
  static const rb_data_type_t config_type = {
12
- .wrap_struct_name = "EdlibAlignConfig",
29
+ .wrap_struct_name = "RbAlignConfig",
13
30
  .function = {
14
- .dfree = config_free,
15
- .dsize = config_memsize,
31
+ .dfree = aligner_config_free,
32
+ .dsize = aligner_config_memsize,
16
33
  },
17
34
  .flags = RUBY_TYPED_FREE_IMMEDIATELY,
18
35
  };
19
36
 
20
37
  static VALUE
21
- config_allocate(VALUE klass)
38
+ aligner_config_allocate(VALUE klass)
22
39
  {
23
- EdlibAlignConfig *config;
24
- VALUE obj = TypedData_Make_Struct(klass, EdlibAlignConfig,
25
- &config_type, config);
40
+ RbAlignConfig *aligner_config;
41
+
42
+ VALUE obj = TypedData_Make_Struct(klass, RbAlignConfig, &config_type, aligner_config);
43
+ aligner_config->config = (EdlibAlignConfig *)malloc(sizeof(EdlibAlignConfig));
44
+ aligner_config->equalityPairs = NULL;
26
45
  return obj;
27
46
  }
28
47
 
29
48
  static void
30
- config_free(void *ptr)
49
+ aligner_config_free(void *ptr)
31
50
  {
32
- if (eqpairs != NULL)
51
+ RbAlignConfig *aligner_config = ptr;
52
+ if (aligner_config->config != NULL)
33
53
  {
34
- free(eqpairs);
35
- eqpairs = NULL;
54
+ free(aligner_config->config);
55
+ }
56
+ if (aligner_config->equalityPairs != NULL)
57
+ {
58
+ free(aligner_config->equalityPairs);
36
59
  }
37
- xfree(ptr);
60
+
61
+ free(ptr);
38
62
  }
39
63
 
40
64
  static size_t
41
- config_memsize(const void *ptr)
65
+ aligner_config_memsize(const void *ptr)
42
66
  {
43
- const EdlibAlignConfig *config = ptr;
44
- return sizeof(ptr) + 2 * sizeof(char) * (config->additionalEqualitiesLength);
67
+ const RbAlignConfig *aligner_config = ptr;
68
+ return sizeof(ptr) + sizeof(aligner_config->config) + 2 * sizeof(char) * aligner_config->config->additionalEqualitiesLength;
45
69
  }
46
70
 
47
71
  static EdlibAlignConfig *
48
- get_config(VALUE self)
72
+ aligner_get_config(VALUE self)
49
73
  {
50
- EdlibAlignConfig *ptr = NULL;
51
- TypedData_Get_Struct(self, EdlibAlignConfig, &config_type, ptr);
74
+ RbAlignConfig *aligner_config = NULL;
75
+ TypedData_Get_Struct(self, RbAlignConfig, &config_type, aligner_config);
76
+ return aligner_config->config;
77
+ }
52
78
 
53
- return ptr;
79
+ static EdlibEqualityPair *
80
+ aligner_get_equalityPairs(VALUE self)
81
+ {
82
+ RbAlignConfig *aligner_config = NULL;
83
+ TypedData_Get_Struct(self, RbAlignConfig, &config_type, aligner_config);
84
+ return aligner_config->equalityPairs;
54
85
  }
55
86
 
87
+ // Config
88
+
56
89
  static VALUE
57
90
  get_k(EdlibAlignConfig *config)
58
91
  {
59
92
  return INT2NUM(config->k);
60
93
  }
61
94
 
62
- static VALUE
63
- aligner_get_k(VALUE self)
64
- {
65
- EdlibAlignConfig *config = get_config(self);
66
- return get_k(config);
67
- }
95
+ ALIGNER_GET_(k)
68
96
 
69
97
  static VALUE
70
98
  set_k(EdlibAlignConfig *config, VALUE k)
@@ -73,35 +101,25 @@ set_k(EdlibAlignConfig *config, VALUE k)
73
101
  return k;
74
102
  }
75
103
 
76
- static VALUE
77
- aligner_set_k(VALUE self, VALUE k)
78
- {
79
- EdlibAlignConfig *config = get_config(self);
80
- return set_k(config, k);
81
- }
104
+ ALIGNER_SET_(k)
82
105
 
83
106
  static VALUE
84
107
  get_mode(EdlibAlignConfig *config)
85
108
  {
86
109
  switch (config->mode)
87
110
  {
88
- case 0:
111
+ case EDLIB_MODE_NW:
89
112
  return rb_str_new2("NW");
90
- case 1:
113
+ case EDLIB_MODE_SHW:
91
114
  return rb_str_new2("SHW");
92
- case 2:
115
+ case EDLIB_MODE_HW:
93
116
  return rb_str_new2("HW");
94
117
  default:
95
118
  return Qnil;
96
119
  }
97
120
  }
98
121
 
99
- static VALUE
100
- aligner_get_mode(VALUE self)
101
- {
102
- EdlibAlignConfig *config = get_config(self);
103
- return get_mode(config);
104
- }
122
+ ALIGNER_GET_(mode)
105
123
 
106
124
  static VALUE
107
125
  set_mode(EdlibAlignConfig *config, VALUE mode)
@@ -113,18 +131,19 @@ set_mode(EdlibAlignConfig *config, VALUE mode)
113
131
  switch (TYPE(mode))
114
132
  {
115
133
  case T_STRING:;
116
- rb_funcall(mode, rb_intern("upcase!"), 0);
117
- if (strcmp(RSTRING_PTR(mode), "NW") == 0)
134
+ VALUE mode_str = rb_funcall(mode, rb_intern("upcase"), 0);
135
+ char *mode_s = RSTRING_PTR(mode_str);
136
+ if (strcmp(mode_s, "NW") == 0)
118
137
  {
119
- config->mode = 0;
138
+ config->mode = EDLIB_MODE_NW;
120
139
  }
121
- else if (strcmp(RSTRING_PTR(mode), "SHW") == 0)
140
+ else if (strcmp(mode_s, "SHW") == 0)
122
141
  {
123
- config->mode = 1;
142
+ config->mode = EDLIB_MODE_SHW;
124
143
  }
125
- else if (strcmp(RSTRING_PTR(mode), "HW") == 0)
144
+ else if (strcmp(mode_s, "HW") == 0)
126
145
  {
127
- config->mode = 2;
146
+ config->mode = EDLIB_MODE_HW;
128
147
  }
129
148
  else
130
149
  {
@@ -145,35 +164,25 @@ set_mode(EdlibAlignConfig *config, VALUE mode)
145
164
  return mode;
146
165
  }
147
166
 
148
- static VALUE
149
- aligner_set_mode(VALUE self, VALUE mode)
150
- {
151
- EdlibAlignConfig *config = get_config(self);
152
- return set_mode(config, mode);
153
- }
167
+ ALIGNER_SET_(mode)
154
168
 
155
169
  static VALUE
156
170
  get_task(EdlibAlignConfig *config)
157
171
  {
158
172
  switch (config->task)
159
173
  {
160
- case 0:
174
+ case EDLIB_TASK_DISTANCE:
161
175
  return rb_str_new2("DISTANCE");
162
- case 1:
176
+ case EDLIB_TASK_LOC:
163
177
  return rb_str_new2("LOC");
164
- case 2:
178
+ case EDLIB_TASK_PATH:
165
179
  return rb_str_new2("PATH");
166
180
  default:
167
181
  return Qnil;
168
182
  }
169
183
  }
170
184
 
171
- static VALUE
172
- aligner_get_task(VALUE self)
173
- {
174
- EdlibAlignConfig *config = get_config(self);
175
- return get_task(config);
176
- }
185
+ ALIGNER_GET_(task)
177
186
 
178
187
  static VALUE
179
188
  set_task(EdlibAlignConfig *config, VALUE task)
@@ -185,18 +194,19 @@ set_task(EdlibAlignConfig *config, VALUE task)
185
194
  switch (TYPE(task))
186
195
  {
187
196
  case T_STRING:;
188
- rb_funcall(task, rb_intern("upcase!"), 0);
189
- if (strcmp(RSTRING_PTR(task), "DISTANCE") == 0)
197
+ VALUE task_str = rb_funcall(task, rb_intern("upcase"), 0);
198
+ char *task_s = RSTRING_PTR(task_str);
199
+ if (strcmp(task_s, "DISTANCE") == 0)
190
200
  {
191
- config->task = 0;
201
+ config->task = EDLIB_TASK_DISTANCE;
192
202
  }
193
- else if (strcmp(RSTRING_PTR(task), "LOC") == 0)
203
+ else if (strcmp(task_s, "LOC") == 0)
194
204
  {
195
- config->task = 1;
205
+ config->task = EDLIB_TASK_LOC;
196
206
  }
197
- else if (strcmp(RSTRING_PTR(task), "PATH") == 0)
207
+ else if (strcmp(task_s, "PATH") == 0)
198
208
  {
199
- config->task = 2;
209
+ config->task = EDLIB_TASK_PATH;
200
210
  }
201
211
  else
202
212
  {
@@ -217,12 +227,7 @@ set_task(EdlibAlignConfig *config, VALUE task)
217
227
  return task;
218
228
  }
219
229
 
220
- static VALUE
221
- aligner_set_task(VALUE self, VALUE task)
222
- {
223
- EdlibAlignConfig *config = get_config(self);
224
- return set_task(config, task);
225
- }
230
+ ALIGNER_SET_(task)
226
231
 
227
232
  static VALUE
228
233
  get_additional_equalities(EdlibAlignConfig *config)
@@ -241,21 +246,17 @@ get_additional_equalities(EdlibAlignConfig *config)
241
246
  return equalities;
242
247
  }
243
248
 
244
- static VALUE
245
- aligner_get_additional_equalities(VALUE self)
246
- {
247
- EdlibAlignConfig *config = get_config(self);
248
- return get_additional_equalities(config);
249
- }
249
+ ALIGNER_GET_(additional_equalities)
250
250
 
251
251
  static VALUE
252
- set_additional_equalities(EdlibAlignConfig *config, VALUE equalities)
252
+ set_additional_equalities(EdlibAlignConfig *config, EdlibEqualityPair *eqpairs, VALUE equalities)
253
253
  {
254
254
  Check_Type(equalities, T_ARRAY);
255
255
  int len = RARRAY_LEN(equalities);
256
256
  if (len == 0)
257
257
  {
258
- if(eqpairs != NULL) {
258
+ if (eqpairs != NULL)
259
+ {
259
260
  free(eqpairs);
260
261
  eqpairs = NULL;
261
262
  }
@@ -312,14 +313,15 @@ set_additional_equalities(EdlibAlignConfig *config, VALUE equalities)
312
313
  static VALUE
313
314
  aligner_set_additional_equalities(VALUE self, VALUE equalities)
314
315
  {
315
- EdlibAlignConfig *config = get_config(self);
316
- return set_additional_equalities(config, equalities);
316
+ EdlibAlignConfig *config = aligner_get_config(self);
317
+ EdlibEqualityPair *eqpairs = aligner_get_equalityPairs(self);
318
+ return set_additional_equalities(config, eqpairs, equalities);
317
319
  }
318
320
 
319
321
  static VALUE
320
322
  aligner_config_hash(VALUE self)
321
323
  {
322
- EdlibAlignConfig *config = get_config(self);
324
+ EdlibAlignConfig *config = aligner_get_config(self);
323
325
 
324
326
  VALUE hash = rb_hash_new();
325
327
 
@@ -332,16 +334,17 @@ aligner_config_hash(VALUE self)
332
334
  }
333
335
 
334
336
  static VALUE
335
- aligner_initialize(VALUE self, VALUE k, VALUE mode, VALUE task, VALUE additional_equalities)
337
+ aligner_initialize_raw(VALUE self, VALUE k, VALUE mode, VALUE task, VALUE additional_equalities)
336
338
  {
337
- EdlibAlignConfig *config = get_config(self);
338
-
339
+ EdlibAlignConfig *config = aligner_get_config(self);
340
+ EdlibEqualityPair *eqpairs = aligner_get_equalityPairs(self);
339
341
  config->k = NUM2INT(k);
340
342
  set_mode(config, mode);
341
343
  set_task(config, task);
344
+
342
345
  if (additional_equalities != Qnil)
343
346
  {
344
- set_additional_equalities(config, additional_equalities);
347
+ set_additional_equalities(config, eqpairs, additional_equalities);
345
348
  }
346
349
  else
347
350
  {
@@ -355,7 +358,7 @@ aligner_initialize(VALUE self, VALUE k, VALUE mode, VALUE task, VALUE additional
355
358
  static VALUE
356
359
  aligner_align(VALUE self, VALUE query, VALUE target)
357
360
  {
358
- EdlibAlignConfig *config = get_config(self);
361
+ EdlibAlignConfig *config = aligner_get_config(self);
359
362
  if (!config)
360
363
  {
361
364
  rb_raise(rb_eRuntimeError, "config is NULL");
@@ -427,8 +430,8 @@ void Init_edlibext(void)
427
430
  {
428
431
  mEdlib = rb_define_module("Edlib");
429
432
  cAligner = rb_define_class_under(mEdlib, "Aligner", rb_cObject);
430
- rb_define_alloc_func(cAligner, config_allocate);
431
- rb_define_private_method(cAligner, "initialize_raw", aligner_initialize, 4);
433
+ rb_define_alloc_func(cAligner, aligner_config_allocate);
434
+ rb_define_private_method(cAligner, "initialize_raw", aligner_initialize_raw, 4);
432
435
  rb_define_method(cAligner, "k", aligner_get_k, 0);
433
436
  rb_define_method(cAligner, "k=", aligner_set_k, 1);
434
437
  rb_define_method(cAligner, "mode", aligner_get_mode, 0);
@@ -438,5 +441,5 @@ void Init_edlibext(void)
438
441
  rb_define_method(cAligner, "additional_equalities", aligner_get_additional_equalities, 0);
439
442
  rb_define_method(cAligner, "additional_equalities=", aligner_set_additional_equalities, 1);
440
443
  rb_define_method(cAligner, "config", aligner_config_hash, 0);
441
- rb_define_method(cAligner, "align", aligner_align, 2);
444
+ rb_define_method(cAligner, "align_raw", aligner_align, 2);
442
445
  }
@@ -0,0 +1,15 @@
1
+ #ifndef EDLIBEXT_H
2
+ #define EDLIBEXT_H
3
+
4
+ #include "edlib.h"
5
+
6
+ typedef struct
7
+ {
8
+ EdlibAlignConfig *config;
9
+ EdlibEqualityPair *equalityPairs;
10
+ } RbAlignConfig;
11
+
12
+ static EdlibAlignConfig *aligner_get_config(VALUE self);
13
+ static EdlibEqualityPair *aligner_get_equalityPairs(VALUE self);
14
+
15
+ #endif // EDLIBEXT_H
data/ext/edlib/extconf.rb CHANGED
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'mkmf'
2
4
 
3
5
  create_makefile('edlib/edlibext')
data/lib/edlib/version.rb CHANGED
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Edlib
2
- VERSION = '0.0.5'
4
+ VERSION = '0.0.7'
3
5
  end
data/lib/edlib.rb CHANGED
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require_relative 'edlib/edlibext'
2
4
 
3
5
  module Edlib
@@ -9,5 +11,68 @@ module Edlib
9
11
  task = task.upcase if task.is_a? String
10
12
  initialize_raw(k, mode, task, additional_equalities)
11
13
  end
14
+
15
+ def align(query, target, nice: false)
16
+ if nice
17
+ align_nice(query, target)
18
+ else
19
+ align_raw(query, target)
20
+ end
21
+ end
22
+
23
+ private
24
+
25
+ def align_nice(query, target)
26
+ result = align(query, target)
27
+ result.merge(nice(result, query, target))
28
+ end
29
+
30
+ def nice(result, query, target, gap_symbol: '-')
31
+ raise 'result does not have :locations and :cigar' unless result.key?(:locations) && result.key?(:cigar)
32
+
33
+ target_pos = result[:locations][0][0]
34
+ query_pos = 0
35
+ query_aln = String.new
36
+ match_aln = String.new
37
+ target_aln = String.new
38
+ cigar = result[:cigar]
39
+ cigar.scan(/(\d+)(\D)/).each do |num, op|
40
+ num = num.to_i
41
+ case op
42
+ when '='
43
+ target_aln << target[target_pos, num]
44
+ target_pos += num
45
+ query_aln << query[query_pos, num]
46
+ query_pos += num
47
+ match_aln << '|' * num
48
+ when 'X'
49
+ target_aln << target[target_pos, num]
50
+ target_pos += num
51
+ query_aln << query[query_pos, num]
52
+ query_pos += num
53
+ match_aln << '.' * num
54
+ when 'D'
55
+ target_aln << target[target_pos, num]
56
+ target_pos += num
57
+ query_aln << gap_symbol * num
58
+ query_pos += 0
59
+ match_aln << gap_symbol * num
60
+ when 'I'
61
+ target_aln << gap_symbol * num
62
+ target_pos += 0
63
+ query_aln << query[query_pos, num]
64
+ query_pos += num
65
+ match_aln << gap_symbol * num
66
+ else
67
+ raise "Unknown CIGAR operation: #{op}"
68
+ end
69
+ end
70
+
71
+ {
72
+ query_aligned: query_aln,
73
+ match_aligned: match_aln,
74
+ target_aligned: target_aln
75
+ }
76
+ end
12
77
  end
13
78
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: edlib
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.5
4
+ version: 0.0.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - kojix2
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-10-31 00:00:00.000000000 Z
11
+ date: 2022-11-07 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: 'Lightweight, super fast C/C++ library for sequence alignment using edit
14
14
  (Levenshtein) distance. '
@@ -23,6 +23,7 @@ files:
23
23
  - ext/edlib/edlib.cpp
24
24
  - ext/edlib/edlib.h
25
25
  - ext/edlib/edlibext.c
26
+ - ext/edlib/edlibext.h
26
27
  - ext/edlib/extconf.rb
27
28
  - lib/edlib.rb
28
29
  - lib/edlib/version.rb