edlib 0.0.6 → 0.0.8

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 41a9c8bdf279436f3ca2da4840a62d875c62ab5630c0411c7751446dd7c6db48
4
- data.tar.gz: 62436d4433d308d71e76ec5095a57d85c1da88a3a6addf1531cb00e65ed37d72
3
+ metadata.gz: b4014e8ff49033ae36755f0752dba89486afc104ad67cf528358a3e1d12d2cfd
4
+ data.tar.gz: 4241313686e900f0d5751f10375f5afc347208737f66d19830c8b89add5e02f0
5
5
  SHA512:
6
- metadata.gz: 9afed6c6ab4e8a59906a98441c464f093ad45a4b92464b0efba67166d925cf0243e4e484b063264940a2afb4867a181deb7dc55733cfb8ae0f082b472f47f286
7
- data.tar.gz: c472407d2aa613f12c75c554d525c3adafa21f689921c82e9fac3d43395ce3759f5240fafa06fe4c6985bac7d6ba5bf42944c746e55c0584b067ba57d8b455fb
6
+ metadata.gz: 586d9f3730505eb43b61eee2d875af40d8aa0f2cc852fd342ebea69566e83f8255f291d4f29b2c731591ec0c29d7057cfff3065707e537acedcf7fcd66cb4cd2
7
+ data.tar.gz: f18b99c6456cc9c62a052ea0cf873832d537001a8d92add2073faf14f77fe103b09de3d0888d02a2ab92d4ce4c5982ea4125dccd93263b150172db6af925788c
data/README.md CHANGED
@@ -38,6 +38,26 @@ a.align("AACG", "TCAACCTG")
38
38
  |task |DISTANCE, LOC, PATH ["DISTANCE"] |
39
39
  |additional_equalities|List of pairs of characters, where each pair defines two characters as equal. [NULL]|
40
40
 
41
+
42
+ ```ruby
43
+ a.align("AACG", "TCAACCTG", nice: true)
44
+
45
+ # {
46
+ # :edit_distance=>1,
47
+ # :alphabet_length=>4,
48
+ # :locations=>[[2, 4], [2, 5]],
49
+ # :alignment=>[0, 0, 0, 1],
50
+ # :cigar=>"3=1I",
51
+ # :query_aligned=>"AACG",
52
+ # :match_aligned=>"|||-",
53
+ # :target_aligned=>"AAC-"
54
+ # }
55
+ ```
56
+
57
+ ## Documentation
58
+
59
+ https://kojix2.github.io/ruby-edlib/
60
+
41
61
  ## Development
42
62
 
43
63
  Pull requests welcome!
data/ext/edlib/edlib.cpp CHANGED
@@ -889,10 +889,10 @@ static int myersCalcEditDistanceNW(const Word* const Peq, const int W, const int
889
889
  (*alignData)->Ps[maxNumBlocks * c + b] = bl->P;
890
890
  (*alignData)->Ms[maxNumBlocks * c + b] = bl->M;
891
891
  (*alignData)->scores[maxNumBlocks * c + b] = bl->score;
892
- (*alignData)->firstBlocks[c] = firstBlock;
893
- (*alignData)->lastBlocks[c] = lastBlock;
894
892
  bl++;
895
893
  }
894
+ (*alignData)->firstBlocks[c] = firstBlock;
895
+ (*alignData)->lastBlocks[c] = lastBlock;
896
896
  }
897
897
  //----------------------------------------------------------//
898
898
  //---- If this is stop column, save it and finish ----//
@@ -901,9 +901,9 @@ static int myersCalcEditDistanceNW(const Word* const Peq, const int W, const int
901
901
  (*alignData)->Ps[b] = (blocks + b)->P;
902
902
  (*alignData)->Ms[b] = (blocks + b)->M;
903
903
  (*alignData)->scores[b] = (blocks + b)->score;
904
- (*alignData)->firstBlocks[0] = firstBlock;
905
- (*alignData)->lastBlocks[0] = lastBlock;
906
904
  }
905
+ (*alignData)->firstBlocks[0] = firstBlock;
906
+ (*alignData)->lastBlocks[0] = lastBlock;
907
907
  *bestScore_ = -1;
908
908
  *position_ = targetStopPosition;
909
909
  delete[] blocks;
data/ext/edlib/edlibext.c CHANGED
@@ -1,18 +1,37 @@
1
1
  #include "ruby.h"
2
2
  #include "edlibext.h"
3
3
 
4
+ #define ALIGNER_GET_(name) \
5
+ static VALUE \
6
+ aligner_get_##name(VALUE self) \
7
+ { \
8
+ EdlibAlignConfig *config = aligner_get_config(self); \
9
+ return get_##name(config); \
10
+ }
11
+
12
+ #define ALIGNER_SET_(name) \
13
+ static VALUE \
14
+ aligner_set_##name(VALUE self, VALUE value) \
15
+ { \
16
+ EdlibAlignConfig *config = aligner_get_config(self); \
17
+ return set_##name(config, value); \
18
+ }
19
+
4
20
  VALUE mEdlib;
5
21
  VALUE cAligner;
6
22
 
7
23
  // Aligner class
8
24
 
25
+ static size_t aligner_config_memsize(const void *ptr);
26
+ static void aligner_config_free(void *ptr);
27
+
9
28
  static const rb_data_type_t config_type = {
10
- .wrap_struct_name = "RbAlignConfig",
11
- .function = {
12
- .dfree = aligner_config_free,
13
- .dsize = aligner_config_memsize,
14
- },
15
- .flags = RUBY_TYPED_FREE_IMMEDIATELY,
29
+ .wrap_struct_name = "RbAlignConfig",
30
+ .function = {
31
+ .dfree = aligner_config_free,
32
+ .dsize = aligner_config_memsize,
33
+ },
34
+ .flags = RUBY_TYPED_FREE_IMMEDIATELY,
16
35
  };
17
36
 
18
37
  static VALUE
@@ -73,12 +92,7 @@ get_k(EdlibAlignConfig *config)
73
92
  return INT2NUM(config->k);
74
93
  }
75
94
 
76
- static VALUE
77
- aligner_get_k(VALUE self)
78
- {
79
- EdlibAlignConfig *config = aligner_get_config(self);
80
- return get_k(config);
81
- }
95
+ ALIGNER_GET_(k)
82
96
 
83
97
  static VALUE
84
98
  set_k(EdlibAlignConfig *config, VALUE k)
@@ -87,35 +101,25 @@ set_k(EdlibAlignConfig *config, VALUE k)
87
101
  return k;
88
102
  }
89
103
 
90
- static VALUE
91
- aligner_set_k(VALUE self, VALUE k)
92
- {
93
- EdlibAlignConfig *config = aligner_get_config(self);
94
- return set_k(config, k);
95
- }
104
+ ALIGNER_SET_(k)
96
105
 
97
106
  static VALUE
98
107
  get_mode(EdlibAlignConfig *config)
99
108
  {
100
109
  switch (config->mode)
101
110
  {
102
- case 0:
111
+ case EDLIB_MODE_NW:
103
112
  return rb_str_new2("NW");
104
- case 1:
113
+ case EDLIB_MODE_SHW:
105
114
  return rb_str_new2("SHW");
106
- case 2:
115
+ case EDLIB_MODE_HW:
107
116
  return rb_str_new2("HW");
108
117
  default:
109
118
  return Qnil;
110
119
  }
111
120
  }
112
121
 
113
- static VALUE
114
- aligner_get_mode(VALUE self)
115
- {
116
- EdlibAlignConfig *config = aligner_get_config(self);
117
- return get_mode(config);
118
- }
122
+ ALIGNER_GET_(mode)
119
123
 
120
124
  static VALUE
121
125
  set_mode(EdlibAlignConfig *config, VALUE mode)
@@ -127,18 +131,19 @@ set_mode(EdlibAlignConfig *config, VALUE mode)
127
131
  switch (TYPE(mode))
128
132
  {
129
133
  case T_STRING:;
130
- rb_funcall(mode, rb_intern("upcase!"), 0);
131
- if (strcmp(RSTRING_PTR(mode), "NW") == 0)
134
+ VALUE mode_str = rb_funcall(mode, rb_intern("upcase"), 0);
135
+ char *mode_s = RSTRING_PTR(mode_str);
136
+ if (strcmp(mode_s, "NW") == 0)
132
137
  {
133
- config->mode = 0;
138
+ config->mode = EDLIB_MODE_NW;
134
139
  }
135
- else if (strcmp(RSTRING_PTR(mode), "SHW") == 0)
140
+ else if (strcmp(mode_s, "SHW") == 0)
136
141
  {
137
- config->mode = 1;
142
+ config->mode = EDLIB_MODE_SHW;
138
143
  }
139
- else if (strcmp(RSTRING_PTR(mode), "HW") == 0)
144
+ else if (strcmp(mode_s, "HW") == 0)
140
145
  {
141
- config->mode = 2;
146
+ config->mode = EDLIB_MODE_HW;
142
147
  }
143
148
  else
144
149
  {
@@ -159,35 +164,25 @@ set_mode(EdlibAlignConfig *config, VALUE mode)
159
164
  return mode;
160
165
  }
161
166
 
162
- static VALUE
163
- aligner_set_mode(VALUE self, VALUE mode)
164
- {
165
- EdlibAlignConfig *config = aligner_get_config(self);
166
- return set_mode(config, mode);
167
- }
167
+ ALIGNER_SET_(mode)
168
168
 
169
169
  static VALUE
170
170
  get_task(EdlibAlignConfig *config)
171
171
  {
172
172
  switch (config->task)
173
173
  {
174
- case 0:
174
+ case EDLIB_TASK_DISTANCE:
175
175
  return rb_str_new2("DISTANCE");
176
- case 1:
176
+ case EDLIB_TASK_LOC:
177
177
  return rb_str_new2("LOC");
178
- case 2:
178
+ case EDLIB_TASK_PATH:
179
179
  return rb_str_new2("PATH");
180
180
  default:
181
181
  return Qnil;
182
182
  }
183
183
  }
184
184
 
185
- static VALUE
186
- aligner_get_task(VALUE self)
187
- {
188
- EdlibAlignConfig *config = aligner_get_config(self);
189
- return get_task(config);
190
- }
185
+ ALIGNER_GET_(task)
191
186
 
192
187
  static VALUE
193
188
  set_task(EdlibAlignConfig *config, VALUE task)
@@ -199,18 +194,19 @@ set_task(EdlibAlignConfig *config, VALUE task)
199
194
  switch (TYPE(task))
200
195
  {
201
196
  case T_STRING:;
202
- rb_funcall(task, rb_intern("upcase!"), 0);
203
- if (strcmp(RSTRING_PTR(task), "DISTANCE") == 0)
197
+ VALUE task_str = rb_funcall(task, rb_intern("upcase"), 0);
198
+ char *task_s = RSTRING_PTR(task_str);
199
+ if (strcmp(task_s, "DISTANCE") == 0)
204
200
  {
205
- config->task = 0;
201
+ config->task = EDLIB_TASK_DISTANCE;
206
202
  }
207
- else if (strcmp(RSTRING_PTR(task), "LOC") == 0)
203
+ else if (strcmp(task_s, "LOC") == 0)
208
204
  {
209
- config->task = 1;
205
+ config->task = EDLIB_TASK_LOC;
210
206
  }
211
- else if (strcmp(RSTRING_PTR(task), "PATH") == 0)
207
+ else if (strcmp(task_s, "PATH") == 0)
212
208
  {
213
- config->task = 2;
209
+ config->task = EDLIB_TASK_PATH;
214
210
  }
215
211
  else
216
212
  {
@@ -231,12 +227,7 @@ set_task(EdlibAlignConfig *config, VALUE task)
231
227
  return task;
232
228
  }
233
229
 
234
- static VALUE
235
- aligner_set_task(VALUE self, VALUE task)
236
- {
237
- EdlibAlignConfig *config = aligner_get_config(self);
238
- return set_task(config, task);
239
- }
230
+ ALIGNER_SET_(task)
240
231
 
241
232
  static VALUE
242
233
  get_additional_equalities(EdlibAlignConfig *config)
@@ -255,18 +246,13 @@ get_additional_equalities(EdlibAlignConfig *config)
255
246
  return equalities;
256
247
  }
257
248
 
258
- static VALUE
259
- aligner_get_additional_equalities(VALUE self)
260
- {
261
- EdlibAlignConfig *config = aligner_get_config(self);
262
- return get_additional_equalities(config);
263
- }
249
+ ALIGNER_GET_(additional_equalities)
264
250
 
265
251
  static VALUE
266
252
  set_additional_equalities(EdlibAlignConfig *config, EdlibEqualityPair *eqpairs, VALUE equalities)
267
253
  {
268
254
  Check_Type(equalities, T_ARRAY);
269
- int len = RARRAY_LEN(equalities);
255
+ size_t len = RARRAY_LEN(equalities);
270
256
  if (len == 0)
271
257
  {
272
258
  if (eqpairs != NULL)
@@ -278,9 +264,25 @@ set_additional_equalities(EdlibAlignConfig *config, EdlibEqualityPair *eqpairs,
278
264
  config->additionalEqualitiesLength = 0;
279
265
  return equalities;
280
266
  }
267
+
268
+ // Check if len is too large
269
+ if (len > SIZE_MAX / sizeof(EdlibEqualityPair))
270
+ {
271
+ rb_raise(rb_eArgError, "Requested array is too large");
272
+ }
273
+
281
274
  char *first_arr = malloc(len * sizeof(char));
282
275
  char *second_arr = malloc(len * sizeof(char));
283
- for (int i = 0; i < len; i++)
276
+ if (first_arr == NULL || second_arr == NULL)
277
+ {
278
+ if (first_arr != NULL)
279
+ free(first_arr);
280
+ if (second_arr != NULL)
281
+ free(second_arr);
282
+ rb_raise(rb_eNoMemError, "Failed to allocate memory for equality pairs");
283
+ }
284
+
285
+ for (size_t i = 0; i < len; i++)
284
286
  {
285
287
  VALUE pair = rb_ary_entry(equalities, i);
286
288
  Check_Type(pair, T_ARRAY);
@@ -309,7 +311,7 @@ set_additional_equalities(EdlibAlignConfig *config, EdlibEqualityPair *eqpairs,
309
311
 
310
312
  eqpairs = (EdlibEqualityPair *)malloc(sizeof(EdlibEqualityPair) * len);
311
313
 
312
- for (int i = 0; i < len; i++)
314
+ for (size_t i = 0; i < len; i++)
313
315
  {
314
316
  eqpairs[i].first = first_arr[i];
315
317
  eqpairs[i].second = second_arr[i];
@@ -355,7 +357,7 @@ aligner_initialize_raw(VALUE self, VALUE k, VALUE mode, VALUE task, VALUE additi
355
357
  config->k = NUM2INT(k);
356
358
  set_mode(config, mode);
357
359
  set_task(config, task);
358
-
360
+
359
361
  if (additional_equalities != Qnil)
360
362
  {
361
363
  set_additional_equalities(config, eqpairs, additional_equalities);
@@ -379,11 +381,11 @@ aligner_align(VALUE self, VALUE query, VALUE target)
379
381
  }
380
382
 
381
383
  EdlibAlignResult result = edlibAlign(
382
- StringValueCStr(query),
383
- RSTRING_LEN(query),
384
- StringValueCStr(target),
385
- RSTRING_LEN(target),
386
- *config);
384
+ StringValueCStr(query),
385
+ RSTRING_LEN(query),
386
+ StringValueCStr(target),
387
+ RSTRING_LEN(target),
388
+ *config);
387
389
 
388
390
  if (result.status != 0)
389
391
  {
@@ -455,5 +457,5 @@ void Init_edlibext(void)
455
457
  rb_define_method(cAligner, "additional_equalities", aligner_get_additional_equalities, 0);
456
458
  rb_define_method(cAligner, "additional_equalities=", aligner_set_additional_equalities, 1);
457
459
  rb_define_method(cAligner, "config", aligner_config_hash, 0);
458
- rb_define_method(cAligner, "align", aligner_align, 2);
460
+ rb_define_method(cAligner, "align_raw", aligner_align, 2);
459
461
  }
data/ext/edlib/edlibext.h CHANGED
@@ -9,34 +9,7 @@ typedef struct
9
9
  EdlibEqualityPair *equalityPairs;
10
10
  } RbAlignConfig;
11
11
 
12
- static VALUE aligner_config_allocate(VALUE klass);
13
- static size_t aligner_config_memsize(const void *ptr);
14
- static void aligner_config_free(void *ptr);
15
-
16
12
  static EdlibAlignConfig *aligner_get_config(VALUE self);
17
13
  static EdlibEqualityPair *aligner_get_equalityPairs(VALUE self);
18
14
 
19
- static VALUE get_k(EdlibAlignConfig *config);
20
- static VALUE aligner_get_k(VALUE self);
21
- static VALUE set_k(EdlibAlignConfig *config, VALUE k);
22
- static VALUE aligner_set_k(VALUE self, VALUE k);
23
- static VALUE get_mode(EdlibAlignConfig *config);
24
- static VALUE aligner_get_mode(VALUE self);
25
- static VALUE set_mode(EdlibAlignConfig *config, VALUE mode);
26
- static VALUE aligner_set_mode(VALUE self, VALUE mode);
27
- static VALUE get_task(EdlibAlignConfig *config);
28
- static VALUE aligner_get_task(VALUE self);
29
- static VALUE set_task(EdlibAlignConfig *config, VALUE task);
30
- static VALUE aligner_set_task(VALUE self, VALUE task);
31
- static VALUE get_additional_equalities(EdlibAlignConfig *config);
32
- static VALUE aligner_get_additional_equalities(VALUE self);
33
- static VALUE set_additional_equalities(EdlibAlignConfig *config, EdlibEqualityPair *eqpairs, VALUE equalities);
34
- static VALUE aligner_set_additional_equalities(VALUE self, VALUE equalities);
35
-
36
- static VALUE aligner_config_hash(VALUE self);
37
-
38
- static VALUE aligner_initialize_raw(VALUE self, VALUE k, VALUE mode, VALUE task, VALUE additional_equalities);
39
- static VALUE aligner_align(VALUE self, VALUE query, VALUE target);
40
- void Init_edlibext(void);
41
-
42
15
  #endif // EDLIBEXT_H
data/ext/edlib/extconf.rb CHANGED
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'mkmf'
2
4
 
3
5
  create_makefile('edlib/edlibext')
data/lib/edlib/version.rb CHANGED
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Edlib
2
- VERSION = '0.0.6'
4
+ VERSION = '0.0.8'
3
5
  end
data/lib/edlib.rb CHANGED
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require_relative 'edlib/edlibext'
2
4
 
3
5
  module Edlib
@@ -9,5 +11,68 @@ module Edlib
9
11
  task = task.upcase if task.is_a? String
10
12
  initialize_raw(k, mode, task, additional_equalities)
11
13
  end
14
+
15
+ def align(query, target, nice: false)
16
+ if nice
17
+ align_nice(query, target)
18
+ else
19
+ align_raw(query, target)
20
+ end
21
+ end
22
+
23
+ private
24
+
25
+ def align_nice(query, target)
26
+ result = align(query, target)
27
+ result.merge(nice(result, query, target))
28
+ end
29
+
30
+ def nice(result, query, target, gap_symbol: '-')
31
+ raise 'result does not have :locations and :cigar' unless result.key?(:locations) && result.key?(:cigar)
32
+
33
+ target_pos = result[:locations][0][0]
34
+ query_pos = 0
35
+ query_aln = String.new
36
+ match_aln = String.new
37
+ target_aln = String.new
38
+ cigar = result[:cigar]
39
+ cigar.scan(/(\d+)(\D)/).each do |num, op|
40
+ num = num.to_i
41
+ case op
42
+ when '='
43
+ target_aln << target[target_pos, num]
44
+ target_pos += num
45
+ query_aln << query[query_pos, num]
46
+ query_pos += num
47
+ match_aln << '|' * num
48
+ when 'X'
49
+ target_aln << target[target_pos, num]
50
+ target_pos += num
51
+ query_aln << query[query_pos, num]
52
+ query_pos += num
53
+ match_aln << '.' * num
54
+ when 'D'
55
+ target_aln << target[target_pos, num]
56
+ target_pos += num
57
+ query_aln << gap_symbol * num
58
+ query_pos += 0
59
+ match_aln << gap_symbol * num
60
+ when 'I'
61
+ target_aln << gap_symbol * num
62
+ target_pos += 0
63
+ query_aln << query[query_pos, num]
64
+ query_pos += num
65
+ match_aln << gap_symbol * num
66
+ else
67
+ raise "Unknown CIGAR operation: #{op}"
68
+ end
69
+ end
70
+
71
+ {
72
+ query_aligned: query_aln,
73
+ match_aligned: match_aln,
74
+ target_aligned: target_aln
75
+ }
76
+ end
12
77
  end
13
78
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: edlib
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.6
4
+ version: 0.0.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - kojix2
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-11-02 00:00:00.000000000 Z
11
+ date: 2024-08-17 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: 'Lightweight, super fast C/C++ library for sequence alignment using edit
14
14
  (Levenshtein) distance. '
@@ -31,7 +31,7 @@ homepage: https://github.com/kojix2/ruby-edlib
31
31
  licenses:
32
32
  - MIT
33
33
  metadata: {}
34
- post_install_message:
34
+ post_install_message:
35
35
  rdoc_options: []
36
36
  require_paths:
37
37
  - lib
@@ -46,8 +46,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
46
46
  - !ruby/object:Gem::Version
47
47
  version: '0'
48
48
  requirements: []
49
- rubygems_version: 3.3.7
50
- signing_key:
49
+ rubygems_version: 3.5.11
50
+ signing_key:
51
51
  specification_version: 4
52
52
  summary: ruby-edlib is a wrapper for edlib.
53
53
  test_files: []