edlib 0.0.6 → 0.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 41a9c8bdf279436f3ca2da4840a62d875c62ab5630c0411c7751446dd7c6db48
4
- data.tar.gz: 62436d4433d308d71e76ec5095a57d85c1da88a3a6addf1531cb00e65ed37d72
3
+ metadata.gz: b4014e8ff49033ae36755f0752dba89486afc104ad67cf528358a3e1d12d2cfd
4
+ data.tar.gz: 4241313686e900f0d5751f10375f5afc347208737f66d19830c8b89add5e02f0
5
5
  SHA512:
6
- metadata.gz: 9afed6c6ab4e8a59906a98441c464f093ad45a4b92464b0efba67166d925cf0243e4e484b063264940a2afb4867a181deb7dc55733cfb8ae0f082b472f47f286
7
- data.tar.gz: c472407d2aa613f12c75c554d525c3adafa21f689921c82e9fac3d43395ce3759f5240fafa06fe4c6985bac7d6ba5bf42944c746e55c0584b067ba57d8b455fb
6
+ metadata.gz: 586d9f3730505eb43b61eee2d875af40d8aa0f2cc852fd342ebea69566e83f8255f291d4f29b2c731591ec0c29d7057cfff3065707e537acedcf7fcd66cb4cd2
7
+ data.tar.gz: f18b99c6456cc9c62a052ea0cf873832d537001a8d92add2073faf14f77fe103b09de3d0888d02a2ab92d4ce4c5982ea4125dccd93263b150172db6af925788c
data/README.md CHANGED
@@ -38,6 +38,26 @@ a.align("AACG", "TCAACCTG")
38
38
  |task |DISTANCE, LOC, PATH ["DISTANCE"] |
39
39
  |additional_equalities|List of pairs of characters, where each pair defines two characters as equal. [NULL]|
40
40
 
41
+
42
+ ```ruby
43
+ a.align("AACG", "TCAACCTG", nice: true)
44
+
45
+ # {
46
+ # :edit_distance=>1,
47
+ # :alphabet_length=>4,
48
+ # :locations=>[[2, 4], [2, 5]],
49
+ # :alignment=>[0, 0, 0, 1],
50
+ # :cigar=>"3=1I",
51
+ # :query_aligned=>"AACG",
52
+ # :match_aligned=>"|||-",
53
+ # :target_aligned=>"AAC-"
54
+ # }
55
+ ```
56
+
57
+ ## Documentation
58
+
59
+ https://kojix2.github.io/ruby-edlib/
60
+
41
61
  ## Development
42
62
 
43
63
  Pull requests welcome!
data/ext/edlib/edlib.cpp CHANGED
@@ -889,10 +889,10 @@ static int myersCalcEditDistanceNW(const Word* const Peq, const int W, const int
889
889
  (*alignData)->Ps[maxNumBlocks * c + b] = bl->P;
890
890
  (*alignData)->Ms[maxNumBlocks * c + b] = bl->M;
891
891
  (*alignData)->scores[maxNumBlocks * c + b] = bl->score;
892
- (*alignData)->firstBlocks[c] = firstBlock;
893
- (*alignData)->lastBlocks[c] = lastBlock;
894
892
  bl++;
895
893
  }
894
+ (*alignData)->firstBlocks[c] = firstBlock;
895
+ (*alignData)->lastBlocks[c] = lastBlock;
896
896
  }
897
897
  //----------------------------------------------------------//
898
898
  //---- If this is stop column, save it and finish ----//
@@ -901,9 +901,9 @@ static int myersCalcEditDistanceNW(const Word* const Peq, const int W, const int
901
901
  (*alignData)->Ps[b] = (blocks + b)->P;
902
902
  (*alignData)->Ms[b] = (blocks + b)->M;
903
903
  (*alignData)->scores[b] = (blocks + b)->score;
904
- (*alignData)->firstBlocks[0] = firstBlock;
905
- (*alignData)->lastBlocks[0] = lastBlock;
906
904
  }
905
+ (*alignData)->firstBlocks[0] = firstBlock;
906
+ (*alignData)->lastBlocks[0] = lastBlock;
907
907
  *bestScore_ = -1;
908
908
  *position_ = targetStopPosition;
909
909
  delete[] blocks;
data/ext/edlib/edlibext.c CHANGED
@@ -1,18 +1,37 @@
1
1
  #include "ruby.h"
2
2
  #include "edlibext.h"
3
3
 
4
+ #define ALIGNER_GET_(name) \
5
+ static VALUE \
6
+ aligner_get_##name(VALUE self) \
7
+ { \
8
+ EdlibAlignConfig *config = aligner_get_config(self); \
9
+ return get_##name(config); \
10
+ }
11
+
12
+ #define ALIGNER_SET_(name) \
13
+ static VALUE \
14
+ aligner_set_##name(VALUE self, VALUE value) \
15
+ { \
16
+ EdlibAlignConfig *config = aligner_get_config(self); \
17
+ return set_##name(config, value); \
18
+ }
19
+
4
20
  VALUE mEdlib;
5
21
  VALUE cAligner;
6
22
 
7
23
  // Aligner class
8
24
 
25
+ static size_t aligner_config_memsize(const void *ptr);
26
+ static void aligner_config_free(void *ptr);
27
+
9
28
  static const rb_data_type_t config_type = {
10
- .wrap_struct_name = "RbAlignConfig",
11
- .function = {
12
- .dfree = aligner_config_free,
13
- .dsize = aligner_config_memsize,
14
- },
15
- .flags = RUBY_TYPED_FREE_IMMEDIATELY,
29
+ .wrap_struct_name = "RbAlignConfig",
30
+ .function = {
31
+ .dfree = aligner_config_free,
32
+ .dsize = aligner_config_memsize,
33
+ },
34
+ .flags = RUBY_TYPED_FREE_IMMEDIATELY,
16
35
  };
17
36
 
18
37
  static VALUE
@@ -73,12 +92,7 @@ get_k(EdlibAlignConfig *config)
73
92
  return INT2NUM(config->k);
74
93
  }
75
94
 
76
- static VALUE
77
- aligner_get_k(VALUE self)
78
- {
79
- EdlibAlignConfig *config = aligner_get_config(self);
80
- return get_k(config);
81
- }
95
+ ALIGNER_GET_(k)
82
96
 
83
97
  static VALUE
84
98
  set_k(EdlibAlignConfig *config, VALUE k)
@@ -87,35 +101,25 @@ set_k(EdlibAlignConfig *config, VALUE k)
87
101
  return k;
88
102
  }
89
103
 
90
- static VALUE
91
- aligner_set_k(VALUE self, VALUE k)
92
- {
93
- EdlibAlignConfig *config = aligner_get_config(self);
94
- return set_k(config, k);
95
- }
104
+ ALIGNER_SET_(k)
96
105
 
97
106
  static VALUE
98
107
  get_mode(EdlibAlignConfig *config)
99
108
  {
100
109
  switch (config->mode)
101
110
  {
102
- case 0:
111
+ case EDLIB_MODE_NW:
103
112
  return rb_str_new2("NW");
104
- case 1:
113
+ case EDLIB_MODE_SHW:
105
114
  return rb_str_new2("SHW");
106
- case 2:
115
+ case EDLIB_MODE_HW:
107
116
  return rb_str_new2("HW");
108
117
  default:
109
118
  return Qnil;
110
119
  }
111
120
  }
112
121
 
113
- static VALUE
114
- aligner_get_mode(VALUE self)
115
- {
116
- EdlibAlignConfig *config = aligner_get_config(self);
117
- return get_mode(config);
118
- }
122
+ ALIGNER_GET_(mode)
119
123
 
120
124
  static VALUE
121
125
  set_mode(EdlibAlignConfig *config, VALUE mode)
@@ -127,18 +131,19 @@ set_mode(EdlibAlignConfig *config, VALUE mode)
127
131
  switch (TYPE(mode))
128
132
  {
129
133
  case T_STRING:;
130
- rb_funcall(mode, rb_intern("upcase!"), 0);
131
- if (strcmp(RSTRING_PTR(mode), "NW") == 0)
134
+ VALUE mode_str = rb_funcall(mode, rb_intern("upcase"), 0);
135
+ char *mode_s = RSTRING_PTR(mode_str);
136
+ if (strcmp(mode_s, "NW") == 0)
132
137
  {
133
- config->mode = 0;
138
+ config->mode = EDLIB_MODE_NW;
134
139
  }
135
- else if (strcmp(RSTRING_PTR(mode), "SHW") == 0)
140
+ else if (strcmp(mode_s, "SHW") == 0)
136
141
  {
137
- config->mode = 1;
142
+ config->mode = EDLIB_MODE_SHW;
138
143
  }
139
- else if (strcmp(RSTRING_PTR(mode), "HW") == 0)
144
+ else if (strcmp(mode_s, "HW") == 0)
140
145
  {
141
- config->mode = 2;
146
+ config->mode = EDLIB_MODE_HW;
142
147
  }
143
148
  else
144
149
  {
@@ -159,35 +164,25 @@ set_mode(EdlibAlignConfig *config, VALUE mode)
159
164
  return mode;
160
165
  }
161
166
 
162
- static VALUE
163
- aligner_set_mode(VALUE self, VALUE mode)
164
- {
165
- EdlibAlignConfig *config = aligner_get_config(self);
166
- return set_mode(config, mode);
167
- }
167
+ ALIGNER_SET_(mode)
168
168
 
169
169
  static VALUE
170
170
  get_task(EdlibAlignConfig *config)
171
171
  {
172
172
  switch (config->task)
173
173
  {
174
- case 0:
174
+ case EDLIB_TASK_DISTANCE:
175
175
  return rb_str_new2("DISTANCE");
176
- case 1:
176
+ case EDLIB_TASK_LOC:
177
177
  return rb_str_new2("LOC");
178
- case 2:
178
+ case EDLIB_TASK_PATH:
179
179
  return rb_str_new2("PATH");
180
180
  default:
181
181
  return Qnil;
182
182
  }
183
183
  }
184
184
 
185
- static VALUE
186
- aligner_get_task(VALUE self)
187
- {
188
- EdlibAlignConfig *config = aligner_get_config(self);
189
- return get_task(config);
190
- }
185
+ ALIGNER_GET_(task)
191
186
 
192
187
  static VALUE
193
188
  set_task(EdlibAlignConfig *config, VALUE task)
@@ -199,18 +194,19 @@ set_task(EdlibAlignConfig *config, VALUE task)
199
194
  switch (TYPE(task))
200
195
  {
201
196
  case T_STRING:;
202
- rb_funcall(task, rb_intern("upcase!"), 0);
203
- if (strcmp(RSTRING_PTR(task), "DISTANCE") == 0)
197
+ VALUE task_str = rb_funcall(task, rb_intern("upcase"), 0);
198
+ char *task_s = RSTRING_PTR(task_str);
199
+ if (strcmp(task_s, "DISTANCE") == 0)
204
200
  {
205
- config->task = 0;
201
+ config->task = EDLIB_TASK_DISTANCE;
206
202
  }
207
- else if (strcmp(RSTRING_PTR(task), "LOC") == 0)
203
+ else if (strcmp(task_s, "LOC") == 0)
208
204
  {
209
- config->task = 1;
205
+ config->task = EDLIB_TASK_LOC;
210
206
  }
211
- else if (strcmp(RSTRING_PTR(task), "PATH") == 0)
207
+ else if (strcmp(task_s, "PATH") == 0)
212
208
  {
213
- config->task = 2;
209
+ config->task = EDLIB_TASK_PATH;
214
210
  }
215
211
  else
216
212
  {
@@ -231,12 +227,7 @@ set_task(EdlibAlignConfig *config, VALUE task)
231
227
  return task;
232
228
  }
233
229
 
234
- static VALUE
235
- aligner_set_task(VALUE self, VALUE task)
236
- {
237
- EdlibAlignConfig *config = aligner_get_config(self);
238
- return set_task(config, task);
239
- }
230
+ ALIGNER_SET_(task)
240
231
 
241
232
  static VALUE
242
233
  get_additional_equalities(EdlibAlignConfig *config)
@@ -255,18 +246,13 @@ get_additional_equalities(EdlibAlignConfig *config)
255
246
  return equalities;
256
247
  }
257
248
 
258
- static VALUE
259
- aligner_get_additional_equalities(VALUE self)
260
- {
261
- EdlibAlignConfig *config = aligner_get_config(self);
262
- return get_additional_equalities(config);
263
- }
249
+ ALIGNER_GET_(additional_equalities)
264
250
 
265
251
  static VALUE
266
252
  set_additional_equalities(EdlibAlignConfig *config, EdlibEqualityPair *eqpairs, VALUE equalities)
267
253
  {
268
254
  Check_Type(equalities, T_ARRAY);
269
- int len = RARRAY_LEN(equalities);
255
+ size_t len = RARRAY_LEN(equalities);
270
256
  if (len == 0)
271
257
  {
272
258
  if (eqpairs != NULL)
@@ -278,9 +264,25 @@ set_additional_equalities(EdlibAlignConfig *config, EdlibEqualityPair *eqpairs,
278
264
  config->additionalEqualitiesLength = 0;
279
265
  return equalities;
280
266
  }
267
+
268
+ // Check if len is too large
269
+ if (len > SIZE_MAX / sizeof(EdlibEqualityPair))
270
+ {
271
+ rb_raise(rb_eArgError, "Requested array is too large");
272
+ }
273
+
281
274
  char *first_arr = malloc(len * sizeof(char));
282
275
  char *second_arr = malloc(len * sizeof(char));
283
- for (int i = 0; i < len; i++)
276
+ if (first_arr == NULL || second_arr == NULL)
277
+ {
278
+ if (first_arr != NULL)
279
+ free(first_arr);
280
+ if (second_arr != NULL)
281
+ free(second_arr);
282
+ rb_raise(rb_eNoMemError, "Failed to allocate memory for equality pairs");
283
+ }
284
+
285
+ for (size_t i = 0; i < len; i++)
284
286
  {
285
287
  VALUE pair = rb_ary_entry(equalities, i);
286
288
  Check_Type(pair, T_ARRAY);
@@ -309,7 +311,7 @@ set_additional_equalities(EdlibAlignConfig *config, EdlibEqualityPair *eqpairs,
309
311
 
310
312
  eqpairs = (EdlibEqualityPair *)malloc(sizeof(EdlibEqualityPair) * len);
311
313
 
312
- for (int i = 0; i < len; i++)
314
+ for (size_t i = 0; i < len; i++)
313
315
  {
314
316
  eqpairs[i].first = first_arr[i];
315
317
  eqpairs[i].second = second_arr[i];
@@ -355,7 +357,7 @@ aligner_initialize_raw(VALUE self, VALUE k, VALUE mode, VALUE task, VALUE additi
355
357
  config->k = NUM2INT(k);
356
358
  set_mode(config, mode);
357
359
  set_task(config, task);
358
-
360
+
359
361
  if (additional_equalities != Qnil)
360
362
  {
361
363
  set_additional_equalities(config, eqpairs, additional_equalities);
@@ -379,11 +381,11 @@ aligner_align(VALUE self, VALUE query, VALUE target)
379
381
  }
380
382
 
381
383
  EdlibAlignResult result = edlibAlign(
382
- StringValueCStr(query),
383
- RSTRING_LEN(query),
384
- StringValueCStr(target),
385
- RSTRING_LEN(target),
386
- *config);
384
+ StringValueCStr(query),
385
+ RSTRING_LEN(query),
386
+ StringValueCStr(target),
387
+ RSTRING_LEN(target),
388
+ *config);
387
389
 
388
390
  if (result.status != 0)
389
391
  {
@@ -455,5 +457,5 @@ void Init_edlibext(void)
455
457
  rb_define_method(cAligner, "additional_equalities", aligner_get_additional_equalities, 0);
456
458
  rb_define_method(cAligner, "additional_equalities=", aligner_set_additional_equalities, 1);
457
459
  rb_define_method(cAligner, "config", aligner_config_hash, 0);
458
- rb_define_method(cAligner, "align", aligner_align, 2);
460
+ rb_define_method(cAligner, "align_raw", aligner_align, 2);
459
461
  }
data/ext/edlib/edlibext.h CHANGED
@@ -9,34 +9,7 @@ typedef struct
9
9
  EdlibEqualityPair *equalityPairs;
10
10
  } RbAlignConfig;
11
11
 
12
- static VALUE aligner_config_allocate(VALUE klass);
13
- static size_t aligner_config_memsize(const void *ptr);
14
- static void aligner_config_free(void *ptr);
15
-
16
12
  static EdlibAlignConfig *aligner_get_config(VALUE self);
17
13
  static EdlibEqualityPair *aligner_get_equalityPairs(VALUE self);
18
14
 
19
- static VALUE get_k(EdlibAlignConfig *config);
20
- static VALUE aligner_get_k(VALUE self);
21
- static VALUE set_k(EdlibAlignConfig *config, VALUE k);
22
- static VALUE aligner_set_k(VALUE self, VALUE k);
23
- static VALUE get_mode(EdlibAlignConfig *config);
24
- static VALUE aligner_get_mode(VALUE self);
25
- static VALUE set_mode(EdlibAlignConfig *config, VALUE mode);
26
- static VALUE aligner_set_mode(VALUE self, VALUE mode);
27
- static VALUE get_task(EdlibAlignConfig *config);
28
- static VALUE aligner_get_task(VALUE self);
29
- static VALUE set_task(EdlibAlignConfig *config, VALUE task);
30
- static VALUE aligner_set_task(VALUE self, VALUE task);
31
- static VALUE get_additional_equalities(EdlibAlignConfig *config);
32
- static VALUE aligner_get_additional_equalities(VALUE self);
33
- static VALUE set_additional_equalities(EdlibAlignConfig *config, EdlibEqualityPair *eqpairs, VALUE equalities);
34
- static VALUE aligner_set_additional_equalities(VALUE self, VALUE equalities);
35
-
36
- static VALUE aligner_config_hash(VALUE self);
37
-
38
- static VALUE aligner_initialize_raw(VALUE self, VALUE k, VALUE mode, VALUE task, VALUE additional_equalities);
39
- static VALUE aligner_align(VALUE self, VALUE query, VALUE target);
40
- void Init_edlibext(void);
41
-
42
15
  #endif // EDLIBEXT_H
data/ext/edlib/extconf.rb CHANGED
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'mkmf'
2
4
 
3
5
  create_makefile('edlib/edlibext')
data/lib/edlib/version.rb CHANGED
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Edlib
2
- VERSION = '0.0.6'
4
+ VERSION = '0.0.8'
3
5
  end
data/lib/edlib.rb CHANGED
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require_relative 'edlib/edlibext'
2
4
 
3
5
  module Edlib
@@ -9,5 +11,68 @@ module Edlib
9
11
  task = task.upcase if task.is_a? String
10
12
  initialize_raw(k, mode, task, additional_equalities)
11
13
  end
14
+
15
+ def align(query, target, nice: false)
16
+ if nice
17
+ align_nice(query, target)
18
+ else
19
+ align_raw(query, target)
20
+ end
21
+ end
22
+
23
+ private
24
+
25
+ def align_nice(query, target)
26
+ result = align(query, target)
27
+ result.merge(nice(result, query, target))
28
+ end
29
+
30
+ def nice(result, query, target, gap_symbol: '-')
31
+ raise 'result does not have :locations and :cigar' unless result.key?(:locations) && result.key?(:cigar)
32
+
33
+ target_pos = result[:locations][0][0]
34
+ query_pos = 0
35
+ query_aln = String.new
36
+ match_aln = String.new
37
+ target_aln = String.new
38
+ cigar = result[:cigar]
39
+ cigar.scan(/(\d+)(\D)/).each do |num, op|
40
+ num = num.to_i
41
+ case op
42
+ when '='
43
+ target_aln << target[target_pos, num]
44
+ target_pos += num
45
+ query_aln << query[query_pos, num]
46
+ query_pos += num
47
+ match_aln << '|' * num
48
+ when 'X'
49
+ target_aln << target[target_pos, num]
50
+ target_pos += num
51
+ query_aln << query[query_pos, num]
52
+ query_pos += num
53
+ match_aln << '.' * num
54
+ when 'D'
55
+ target_aln << target[target_pos, num]
56
+ target_pos += num
57
+ query_aln << gap_symbol * num
58
+ query_pos += 0
59
+ match_aln << gap_symbol * num
60
+ when 'I'
61
+ target_aln << gap_symbol * num
62
+ target_pos += 0
63
+ query_aln << query[query_pos, num]
64
+ query_pos += num
65
+ match_aln << gap_symbol * num
66
+ else
67
+ raise "Unknown CIGAR operation: #{op}"
68
+ end
69
+ end
70
+
71
+ {
72
+ query_aligned: query_aln,
73
+ match_aligned: match_aln,
74
+ target_aligned: target_aln
75
+ }
76
+ end
12
77
  end
13
78
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: edlib
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.6
4
+ version: 0.0.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - kojix2
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-11-02 00:00:00.000000000 Z
11
+ date: 2024-08-17 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: 'Lightweight, super fast C/C++ library for sequence alignment using edit
14
14
  (Levenshtein) distance. '
@@ -31,7 +31,7 @@ homepage: https://github.com/kojix2/ruby-edlib
31
31
  licenses:
32
32
  - MIT
33
33
  metadata: {}
34
- post_install_message:
34
+ post_install_message:
35
35
  rdoc_options: []
36
36
  require_paths:
37
37
  - lib
@@ -46,8 +46,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
46
46
  - !ruby/object:Gem::Version
47
47
  version: '0'
48
48
  requirements: []
49
- rubygems_version: 3.3.7
50
- signing_key:
49
+ rubygems_version: 3.5.11
50
+ signing_key:
51
51
  specification_version: 4
52
52
  summary: ruby-edlib is a wrapper for edlib.
53
53
  test_files: []