edlib 0.0.7 → 0.0.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +20 -0
- data/ext/edlib/LICENSE +20 -0
- data/ext/edlib/edlib.cpp +4 -4
- data/ext/edlib/edlib.h +1 -1
- data/ext/edlib/edlibext.c +46 -29
- data/lib/edlib/version.rb +1 -1
- metadata +9 -8
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ef5b15bb5b7c58bee55633b12fb1ad4cf6effb44eafa00b8c077933480b710b1
|
4
|
+
data.tar.gz: da2dc59b146e9d52498b936ea4e4db2045c6e230fe5ed93b0bf921988576ab59
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0e890af5ec844e45693bf2a4e022768f6b965714c04e9dd05f6a58f9adf683cf2980e394afe8bb1a478caddc722750e4d5b8bac292cacdaa7357dcf1520f165f
|
7
|
+
data.tar.gz: 106e704858775d74353f59ab5d53375c36873857ee7193daf180013ab0405fec89a7ce26e7e1a1eebc85d07f0e335d4350a2addd115a5a7c73c15ff5130c7571
|
data/README.md
CHANGED
@@ -38,6 +38,26 @@ a.align("AACG", "TCAACCTG")
|
|
38
38
|
|task |DISTANCE, LOC, PATH ["DISTANCE"] |
|
39
39
|
|additional_equalities|List of pairs of characters, where each pair defines two characters as equal. [NULL]|
|
40
40
|
|
41
|
+
|
42
|
+
```ruby
|
43
|
+
a.align("AACG", "TCAACCTG", nice: true)
|
44
|
+
|
45
|
+
# {
|
46
|
+
# :edit_distance=>1,
|
47
|
+
# :alphabet_length=>4,
|
48
|
+
# :locations=>[[2, 4], [2, 5]],
|
49
|
+
# :alignment=>[0, 0, 0, 1],
|
50
|
+
# :cigar=>"3=1I",
|
51
|
+
# :query_aligned=>"AACG",
|
52
|
+
# :match_aligned=>"|||-",
|
53
|
+
# :target_aligned=>"AAC-"
|
54
|
+
# }
|
55
|
+
```
|
56
|
+
|
57
|
+
## Documentation
|
58
|
+
|
59
|
+
https://kojix2.github.io/ruby-edlib/
|
60
|
+
|
41
61
|
## Development
|
42
62
|
|
43
63
|
Pull requests welcome!
|
data/ext/edlib/LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2014 Martin Šošić
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
6
|
+
this software and associated documentation files (the "Software"), to deal in
|
7
|
+
the Software without restriction, including without limitation the rights to
|
8
|
+
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
9
|
+
the Software, and to permit persons to whom the Software is furnished to do so,
|
10
|
+
subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
13
|
+
copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
17
|
+
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
18
|
+
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
19
|
+
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
20
|
+
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/ext/edlib/edlib.cpp
CHANGED
@@ -889,10 +889,10 @@ static int myersCalcEditDistanceNW(const Word* const Peq, const int W, const int
|
|
889
889
|
(*alignData)->Ps[maxNumBlocks * c + b] = bl->P;
|
890
890
|
(*alignData)->Ms[maxNumBlocks * c + b] = bl->M;
|
891
891
|
(*alignData)->scores[maxNumBlocks * c + b] = bl->score;
|
892
|
-
(*alignData)->firstBlocks[c] = firstBlock;
|
893
|
-
(*alignData)->lastBlocks[c] = lastBlock;
|
894
892
|
bl++;
|
895
893
|
}
|
894
|
+
(*alignData)->firstBlocks[c] = firstBlock;
|
895
|
+
(*alignData)->lastBlocks[c] = lastBlock;
|
896
896
|
}
|
897
897
|
//----------------------------------------------------------//
|
898
898
|
//---- If this is stop column, save it and finish ----//
|
@@ -901,9 +901,9 @@ static int myersCalcEditDistanceNW(const Word* const Peq, const int W, const int
|
|
901
901
|
(*alignData)->Ps[b] = (blocks + b)->P;
|
902
902
|
(*alignData)->Ms[b] = (blocks + b)->M;
|
903
903
|
(*alignData)->scores[b] = (blocks + b)->score;
|
904
|
-
(*alignData)->firstBlocks[0] = firstBlock;
|
905
|
-
(*alignData)->lastBlocks[0] = lastBlock;
|
906
904
|
}
|
905
|
+
(*alignData)->firstBlocks[0] = firstBlock;
|
906
|
+
(*alignData)->lastBlocks[0] = lastBlock;
|
907
907
|
*bestScore_ = -1;
|
908
908
|
*position_ = targetStopPosition;
|
909
909
|
delete[] blocks;
|
data/ext/edlib/edlib.h
CHANGED
@@ -200,7 +200,7 @@ extern "C" {
|
|
200
200
|
* 1 stands for insertion to target.
|
201
201
|
* 2 stands for insertion to query.
|
202
202
|
* 3 stands for mismatch.
|
203
|
-
* Alignment aligns query to target from
|
203
|
+
* Alignment aligns query to target from beginning of query till end of query.
|
204
204
|
* If gaps are not penalized, they are not in alignment.
|
205
205
|
* If you do not free whole result object using edlibFreeAlignResult(), do not forget to use free().
|
206
206
|
*/
|
data/ext/edlib/edlibext.c
CHANGED
@@ -1,20 +1,20 @@
|
|
1
1
|
#include "ruby.h"
|
2
2
|
#include "edlibext.h"
|
3
3
|
|
4
|
-
#define ALIGNER_GET_(name)
|
5
|
-
static VALUE
|
6
|
-
|
7
|
-
{
|
8
|
-
|
9
|
-
return get_##name(config);
|
4
|
+
#define ALIGNER_GET_(name) \
|
5
|
+
static VALUE \
|
6
|
+
aligner_get_##name(VALUE self) \
|
7
|
+
{ \
|
8
|
+
EdlibAlignConfig *config = aligner_get_config(self); \
|
9
|
+
return get_##name(config); \
|
10
10
|
}
|
11
11
|
|
12
|
-
#define ALIGNER_SET_(name)
|
13
|
-
static VALUE
|
14
|
-
|
15
|
-
{
|
16
|
-
|
17
|
-
return set_##name(config, value);
|
12
|
+
#define ALIGNER_SET_(name) \
|
13
|
+
static VALUE \
|
14
|
+
aligner_set_##name(VALUE self, VALUE value) \
|
15
|
+
{ \
|
16
|
+
EdlibAlignConfig *config = aligner_get_config(self); \
|
17
|
+
return set_##name(config, value); \
|
18
18
|
}
|
19
19
|
|
20
20
|
VALUE mEdlib;
|
@@ -26,12 +26,12 @@ static size_t aligner_config_memsize(const void *ptr);
|
|
26
26
|
static void aligner_config_free(void *ptr);
|
27
27
|
|
28
28
|
static const rb_data_type_t config_type = {
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
29
|
+
.wrap_struct_name = "RbAlignConfig",
|
30
|
+
.function = {
|
31
|
+
.dfree = aligner_config_free,
|
32
|
+
.dsize = aligner_config_memsize,
|
33
|
+
},
|
34
|
+
.flags = RUBY_TYPED_FREE_IMMEDIATELY,
|
35
35
|
};
|
36
36
|
|
37
37
|
static VALUE
|
@@ -131,8 +131,8 @@ set_mode(EdlibAlignConfig *config, VALUE mode)
|
|
131
131
|
switch (TYPE(mode))
|
132
132
|
{
|
133
133
|
case T_STRING:;
|
134
|
-
|
135
|
-
|
134
|
+
VALUE mode_str = rb_funcall(mode, rb_intern("upcase"), 0);
|
135
|
+
char *mode_s = RSTRING_PTR(mode_str);
|
136
136
|
if (strcmp(mode_s, "NW") == 0)
|
137
137
|
{
|
138
138
|
config->mode = EDLIB_MODE_NW;
|
@@ -252,7 +252,7 @@ static VALUE
|
|
252
252
|
set_additional_equalities(EdlibAlignConfig *config, EdlibEqualityPair *eqpairs, VALUE equalities)
|
253
253
|
{
|
254
254
|
Check_Type(equalities, T_ARRAY);
|
255
|
-
|
255
|
+
size_t len = RARRAY_LEN(equalities);
|
256
256
|
if (len == 0)
|
257
257
|
{
|
258
258
|
if (eqpairs != NULL)
|
@@ -264,9 +264,25 @@ set_additional_equalities(EdlibAlignConfig *config, EdlibEqualityPair *eqpairs,
|
|
264
264
|
config->additionalEqualitiesLength = 0;
|
265
265
|
return equalities;
|
266
266
|
}
|
267
|
+
|
268
|
+
// Check if len is too large
|
269
|
+
if (len > SIZE_MAX / sizeof(EdlibEqualityPair))
|
270
|
+
{
|
271
|
+
rb_raise(rb_eArgError, "Requested array is too large");
|
272
|
+
}
|
273
|
+
|
267
274
|
char *first_arr = malloc(len * sizeof(char));
|
268
275
|
char *second_arr = malloc(len * sizeof(char));
|
269
|
-
|
276
|
+
if (first_arr == NULL || second_arr == NULL)
|
277
|
+
{
|
278
|
+
if (first_arr != NULL)
|
279
|
+
free(first_arr);
|
280
|
+
if (second_arr != NULL)
|
281
|
+
free(second_arr);
|
282
|
+
rb_raise(rb_eNoMemError, "Failed to allocate memory for equality pairs");
|
283
|
+
}
|
284
|
+
|
285
|
+
for (size_t i = 0; i < len; i++)
|
270
286
|
{
|
271
287
|
VALUE pair = rb_ary_entry(equalities, i);
|
272
288
|
Check_Type(pair, T_ARRAY);
|
@@ -295,7 +311,7 @@ set_additional_equalities(EdlibAlignConfig *config, EdlibEqualityPair *eqpairs,
|
|
295
311
|
|
296
312
|
eqpairs = (EdlibEqualityPair *)malloc(sizeof(EdlibEqualityPair) * len);
|
297
313
|
|
298
|
-
for (
|
314
|
+
for (size_t i = 0; i < len; i++)
|
299
315
|
{
|
300
316
|
eqpairs[i].first = first_arr[i];
|
301
317
|
eqpairs[i].second = second_arr[i];
|
@@ -341,7 +357,7 @@ aligner_initialize_raw(VALUE self, VALUE k, VALUE mode, VALUE task, VALUE additi
|
|
341
357
|
config->k = NUM2INT(k);
|
342
358
|
set_mode(config, mode);
|
343
359
|
set_task(config, task);
|
344
|
-
|
360
|
+
|
345
361
|
if (additional_equalities != Qnil)
|
346
362
|
{
|
347
363
|
set_additional_equalities(config, eqpairs, additional_equalities);
|
@@ -365,11 +381,11 @@ aligner_align(VALUE self, VALUE query, VALUE target)
|
|
365
381
|
}
|
366
382
|
|
367
383
|
EdlibAlignResult result = edlibAlign(
|
368
|
-
|
369
|
-
|
370
|
-
|
371
|
-
|
372
|
-
|
384
|
+
StringValueCStr(query),
|
385
|
+
RSTRING_LEN(query),
|
386
|
+
StringValueCStr(target),
|
387
|
+
RSTRING_LEN(target),
|
388
|
+
*config);
|
373
389
|
|
374
390
|
if (result.status != 0)
|
375
391
|
{
|
@@ -413,6 +429,7 @@ aligner_align(VALUE self, VALUE query, VALUE target)
|
|
413
429
|
|
414
430
|
char *ccigar = edlibAlignmentToCigar(result.alignment, result.alignmentLength, 1); // EDLIB_CIGAR_EXTENDED
|
415
431
|
cigar = rb_str_new2(ccigar);
|
432
|
+
free(ccigar);
|
416
433
|
|
417
434
|
VALUE hash = rb_hash_new();
|
418
435
|
rb_hash_aset(hash, ID2SYM(rb_intern("edit_distance")), edit_distance);
|
data/lib/edlib/version.rb
CHANGED
metadata
CHANGED
@@ -1,17 +1,17 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: edlib
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.9
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- kojix2
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2024-09-20 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
|
-
description:
|
14
|
-
(Levenshtein) distance.
|
13
|
+
description: Lightweight, super fast C/C++ library for sequence alignment using edit
|
14
|
+
(Levenshtein) distance.
|
15
15
|
email:
|
16
16
|
- 2xijok@gmail.com
|
17
17
|
executables: []
|
@@ -20,6 +20,7 @@ extensions:
|
|
20
20
|
extra_rdoc_files: []
|
21
21
|
files:
|
22
22
|
- README.md
|
23
|
+
- ext/edlib/LICENSE
|
23
24
|
- ext/edlib/edlib.cpp
|
24
25
|
- ext/edlib/edlib.h
|
25
26
|
- ext/edlib/edlibext.c
|
@@ -31,7 +32,7 @@ homepage: https://github.com/kojix2/ruby-edlib
|
|
31
32
|
licenses:
|
32
33
|
- MIT
|
33
34
|
metadata: {}
|
34
|
-
post_install_message:
|
35
|
+
post_install_message:
|
35
36
|
rdoc_options: []
|
36
37
|
require_paths:
|
37
38
|
- lib
|
@@ -46,8 +47,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
46
47
|
- !ruby/object:Gem::Version
|
47
48
|
version: '0'
|
48
49
|
requirements: []
|
49
|
-
rubygems_version: 3.
|
50
|
-
signing_key:
|
50
|
+
rubygems_version: 3.5.16
|
51
|
+
signing_key:
|
51
52
|
specification_version: 4
|
52
53
|
summary: ruby-edlib is a wrapper for edlib.
|
53
54
|
test_files: []
|