divsufsort 0.1.0-mswin32
Sign up to get free protection for your applications and to get access to all the features.
- data/README.txt +77 -0
- data/ext/divsufsort_ruby.c +227 -0
- data/lib/i386-mswin32/divsufsort.so +0 -0
- metadata +57 -0
data/README.txt
ADDED
@@ -0,0 +1,77 @@
|
|
1
|
+
= divsufsort
|
2
|
+
|
3
|
+
Copyright (c) 2008 SUGAWARA Genki <sgwr_dts@yahoo.co.jp>
|
4
|
+
|
5
|
+
== Description
|
6
|
+
|
7
|
+
Ruby bindings for libdivsufsort.
|
8
|
+
|
9
|
+
libdivsufsort is a C API library to construct the suffix array and the Burrows-Wheeler transformed string.
|
10
|
+
|
11
|
+
== Project Page
|
12
|
+
|
13
|
+
http://rubyforge.org/projects/divsufsort
|
14
|
+
|
15
|
+
== Install
|
16
|
+
|
17
|
+
gem install divsufsort
|
18
|
+
|
19
|
+
== Example
|
20
|
+
=== Burrows-Wheeler Transform/Inverse Burrows-Wheeler Transform
|
21
|
+
|
22
|
+
require 'divsufsort'
|
23
|
+
include Divsufsort
|
24
|
+
|
25
|
+
bwt = divbwt(<<-EOS)
|
26
|
+
London bridge is falling down,
|
27
|
+
Falling down, falling down,
|
28
|
+
London bridge is falling down,
|
29
|
+
My fair Lady.
|
30
|
+
EOS
|
31
|
+
|
32
|
+
unbwt = inverse_bw_transform(bwt)
|
33
|
+
|
34
|
+
=== Construct the suffix array
|
35
|
+
|
36
|
+
require 'divsufsort'
|
37
|
+
include Divsufsort
|
38
|
+
|
39
|
+
sa = divsufsort(<<-EOS)
|
40
|
+
London bridge is falling down,
|
41
|
+
Falling down, falling down,
|
42
|
+
London bridge is falling down,
|
43
|
+
My fair Lady.
|
44
|
+
EOS
|
45
|
+
|
46
|
+
== License
|
47
|
+
Copyright (c) 2008 SUGAWARA Genki <sgwr_dts@yahoo.co.jp>
|
48
|
+
All rights reserved.
|
49
|
+
|
50
|
+
Redistribution and use in source and binary forms, with or without modification,
|
51
|
+
are permitted provided that the following conditions are met:
|
52
|
+
|
53
|
+
* Redistributions of source code must retain the above copyright notice,
|
54
|
+
this list of conditions and the following disclaimer.
|
55
|
+
* Redistributions in binary form must reproduce the above copyright notice,
|
56
|
+
this list of conditions and the following disclaimer in the documentation
|
57
|
+
and/or other materials provided with the distribution.
|
58
|
+
* The names of its contributors may be used to endorse or promote products
|
59
|
+
derived from this software without specific prior written permission.
|
60
|
+
|
61
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
62
|
+
ANY EXPRESS OR IMPLIED WARRANTIES,
|
63
|
+
INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
|
64
|
+
FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
65
|
+
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
66
|
+
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
|
67
|
+
OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
68
|
+
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
69
|
+
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
70
|
+
OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
|
71
|
+
DAMAGE.
|
72
|
+
|
73
|
+
=== libdivsufsort
|
74
|
+
divsufsort contains libdivsufsort.
|
75
|
+
|
76
|
+
* libdivsufsort is a lightweight suffix-sorting library.
|
77
|
+
* http://code.google.com/p/libdivsufsort/
|
@@ -0,0 +1,227 @@
|
|
1
|
+
#ifdef DIVSUFSORT_EXPORTS
|
2
|
+
#define DLLEXPORT __declspec(dllexport)
|
3
|
+
#else
|
4
|
+
#define DLLEXPORT
|
5
|
+
#endif
|
6
|
+
|
7
|
+
#define VERSION "0.1.0"
|
8
|
+
|
9
|
+
#include "divsufsort.h"
|
10
|
+
#include "ruby.h"
|
11
|
+
#include "rubysig.h"
|
12
|
+
|
13
|
+
#ifndef RSTRING_PTR
|
14
|
+
#define RSTRING_PTR(s) (RSTRING(s)->ptr)
|
15
|
+
#endif
|
16
|
+
#ifndef RSTRING_LEN
|
17
|
+
#define RSTRING_LEN(s) (RSTRING(s)->len)
|
18
|
+
#endif
|
19
|
+
|
20
|
+
#define RB_STR_CAT_INT(o, n) do { \
|
21
|
+
unsigned char c[4]; \
|
22
|
+
c[0] = (unsigned char) (((n) >> 0) & 0xff); \
|
23
|
+
c[1] = (unsigned char) (((n) >> 8) & 0xff); \
|
24
|
+
c[2] = (unsigned char) (((n) >> 16) & 0xff); \
|
25
|
+
c[3] = (unsigned char) (((n) >> 24) & 0xff); \
|
26
|
+
rb_str_cat((o), c, 4); \
|
27
|
+
} while(0)
|
28
|
+
|
29
|
+
#define PTR_READ_INT(p, n, x) do { \
|
30
|
+
unsigned char c[4]; \
|
31
|
+
c[0] = (unsigned char) (p)[0]; \
|
32
|
+
c[1] = (unsigned char) (p)[1]; \
|
33
|
+
c[2] = (unsigned char) (p)[2]; \
|
34
|
+
c[3] = (unsigned char) (p)[3]; \
|
35
|
+
(x) = (c[0] << 0) | (c[1] << 8) | (c[2] << 16) | (c[3] << 24); \
|
36
|
+
(p) += 4; \
|
37
|
+
(n) -= 4; \
|
38
|
+
} while(0)
|
39
|
+
|
40
|
+
static VALUE Divsufsort;
|
41
|
+
|
42
|
+
/* */
|
43
|
+
static VALUE divsufsort_divsufsort(VALUE self, VALUE src) {
|
44
|
+
VALUE dst;
|
45
|
+
sauchar_t *T;
|
46
|
+
saidx_t *SA;
|
47
|
+
char *p;
|
48
|
+
long n, i;
|
49
|
+
saint_t err;
|
50
|
+
|
51
|
+
Check_Type(src, T_STRING);
|
52
|
+
p = RSTRING_PTR(src);
|
53
|
+
n = RSTRING_LEN(src);
|
54
|
+
|
55
|
+
if(n >= 0x7fffffff) {
|
56
|
+
rb_raise(rb_eRuntimeError, "Input data is too big.");
|
57
|
+
}
|
58
|
+
|
59
|
+
T = (sauchar_t *) xmalloc((size_t) n * sizeof(sauchar_t));
|
60
|
+
SA = (saidx_t *) xmalloc((size_t) n * sizeof(saidx_t));
|
61
|
+
dst = rb_ary_new();
|
62
|
+
memcpy(T, p, (size_t) n * sizeof(sauchar_t));
|
63
|
+
|
64
|
+
TRAP_BEG;
|
65
|
+
err = divsufsort(T, SA, (saidx_t) n);
|
66
|
+
TRAP_END;
|
67
|
+
|
68
|
+
if(err != 0) {
|
69
|
+
xfree(SA);
|
70
|
+
xfree(T);
|
71
|
+
rb_raise(rb_eRuntimeError, "Cannot allocate memory.");
|
72
|
+
}
|
73
|
+
|
74
|
+
TRAP_BEG;
|
75
|
+
err = sufcheck(T, SA, (saidx_t) n, 0);
|
76
|
+
TRAP_END;
|
77
|
+
|
78
|
+
if(err != 0) {
|
79
|
+
xfree(SA);
|
80
|
+
xfree(T);
|
81
|
+
rb_raise(rb_eRuntimeError, "Wrong suffix array.");
|
82
|
+
}
|
83
|
+
|
84
|
+
for(i = 0; i < n; i++) {
|
85
|
+
saidx_t SA_i = SA[i];
|
86
|
+
rb_ary_push(dst, LONG2NUM(SA_i));
|
87
|
+
}
|
88
|
+
|
89
|
+
xfree(SA);
|
90
|
+
xfree(T);
|
91
|
+
|
92
|
+
if(err != 0) {
|
93
|
+
rb_raise(rb_eRuntimeError, "Cannot allocate memory.");
|
94
|
+
}
|
95
|
+
|
96
|
+
return dst;
|
97
|
+
}
|
98
|
+
|
99
|
+
/* */
|
100
|
+
static VALUE divsufsort_divbwt(int argc, VALUE *argv, VALUE self) {
|
101
|
+
VALUE src, v_blocksize, transformed;
|
102
|
+
sauchar_t *T;
|
103
|
+
saidx_t *SA;
|
104
|
+
char *p;
|
105
|
+
long n, T_len;
|
106
|
+
saint_t blocksize = 32;
|
107
|
+
saidx_t pidx;
|
108
|
+
|
109
|
+
rb_scan_args(argc, argv, "11", &src, &v_blocksize);
|
110
|
+
Check_Type(src, T_STRING);
|
111
|
+
|
112
|
+
if (!NIL_P(v_blocksize)) {
|
113
|
+
blocksize = (saint_t) NUM2INT(v_blocksize);
|
114
|
+
}
|
115
|
+
|
116
|
+
blocksize <<= 20;
|
117
|
+
p = RSTRING_PTR(src);
|
118
|
+
n = RSTRING_LEN(src);
|
119
|
+
|
120
|
+
if(n > 0x20000000L) {
|
121
|
+
n = 0x20000000L;
|
122
|
+
}
|
123
|
+
|
124
|
+
if(blocksize == 0 || n < blocksize) {
|
125
|
+
blocksize = (saidx_t) n;
|
126
|
+
}
|
127
|
+
|
128
|
+
T_len = blocksize * sizeof(sauchar_t);
|
129
|
+
T = (sauchar_t *) xmalloc(T_len);
|
130
|
+
SA = (saidx_t *) xmalloc(blocksize * sizeof(saidx_t));
|
131
|
+
transformed = rb_str_new("", 0);
|
132
|
+
|
133
|
+
RB_STR_CAT_INT(transformed, blocksize);
|
134
|
+
|
135
|
+
while (n > 0) {
|
136
|
+
int m = (n < T_len) ? n : T_len;
|
137
|
+
|
138
|
+
memcpy(T, p, m);
|
139
|
+
p += m; n -= m;
|
140
|
+
|
141
|
+
TRAP_BEG;
|
142
|
+
pidx = divbwt(T, T, SA, m);
|
143
|
+
TRAP_END;
|
144
|
+
|
145
|
+
if(pidx < 0) {
|
146
|
+
break;
|
147
|
+
}
|
148
|
+
|
149
|
+
RB_STR_CAT_INT(transformed, pidx);
|
150
|
+
rb_str_cat(transformed, T, m);
|
151
|
+
}
|
152
|
+
|
153
|
+
xfree(SA);
|
154
|
+
xfree(T);
|
155
|
+
|
156
|
+
if(pidx < 0) {
|
157
|
+
rb_raise(rb_eRuntimeError, "bw_transform: %s.", (pidx == -1) ? "Invalid arguments" : "Cannot allocate memory");
|
158
|
+
}
|
159
|
+
|
160
|
+
return transformed;
|
161
|
+
}
|
162
|
+
|
163
|
+
/* */
|
164
|
+
static VALUE divsufsort_inverse_bw_transform(VALUE self, VALUE transformed) {
|
165
|
+
VALUE dst;
|
166
|
+
sauchar_t *T;
|
167
|
+
saidx_t *A;
|
168
|
+
char *p;
|
169
|
+
long n;
|
170
|
+
saint_t blocksize;
|
171
|
+
int err = 0;
|
172
|
+
|
173
|
+
Check_Type(transformed, T_STRING);
|
174
|
+
p = RSTRING_PTR(transformed);
|
175
|
+
n = RSTRING_LEN(transformed);
|
176
|
+
|
177
|
+
if (n < 4) {
|
178
|
+
rb_raise(rb_eRuntimeError, "reverseBWT: Invalid data.\n");
|
179
|
+
}
|
180
|
+
|
181
|
+
PTR_READ_INT(p, n, blocksize);
|
182
|
+
T = (sauchar_t *) xmalloc(blocksize * sizeof(sauchar_t));
|
183
|
+
A = (saidx_t *) xmalloc(blocksize * sizeof(saidx_t));
|
184
|
+
dst = rb_str_new("", 0);
|
185
|
+
|
186
|
+
while (n > 0) {
|
187
|
+
int m;
|
188
|
+
saidx_t pidx;
|
189
|
+
|
190
|
+
if (n < 4) {
|
191
|
+
err = -1;
|
192
|
+
break;
|
193
|
+
}
|
194
|
+
|
195
|
+
PTR_READ_INT(p, n, pidx);
|
196
|
+
m = (n < blocksize) ? n : blocksize;
|
197
|
+
memcpy(T, p, m);
|
198
|
+
p += m; n -= m;
|
199
|
+
|
200
|
+
TRAP_BEG;
|
201
|
+
err = inverse_bw_transform(T, T, A, m, pidx);
|
202
|
+
TRAP_END;
|
203
|
+
|
204
|
+
if(err != 0) {
|
205
|
+
break;
|
206
|
+
}
|
207
|
+
|
208
|
+
rb_str_cat(dst, T, m);
|
209
|
+
}
|
210
|
+
|
211
|
+
xfree(A);
|
212
|
+
xfree(T);
|
213
|
+
|
214
|
+
if (err != 0) {
|
215
|
+
rb_raise(rb_eRuntimeError, "reverseBWT: %s.\n", (err == -1) ? "Invalid data" : "Cannot allocate memory");
|
216
|
+
}
|
217
|
+
|
218
|
+
return dst;
|
219
|
+
}
|
220
|
+
|
221
|
+
void DLLEXPORT Init_divsufsort() {
|
222
|
+
Divsufsort = rb_define_module("Divsufsort");
|
223
|
+
rb_define_const(Divsufsort, "VERSION", rb_str_new2(VERSION));
|
224
|
+
rb_define_module_function(Divsufsort, "divsufsort", divsufsort_divsufsort, 1);
|
225
|
+
rb_define_module_function(Divsufsort, "divbwt", divsufsort_divbwt, -1);
|
226
|
+
rb_define_module_function(Divsufsort, "inverse_bw_transform", divsufsort_inverse_bw_transform, 1);
|
227
|
+
}
|
Binary file
|
metadata
ADDED
@@ -0,0 +1,57 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: divsufsort
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: mswin32
|
6
|
+
authors:
|
7
|
+
- winebarrel
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2008-10-22 00:00:00 +09:00
|
13
|
+
default_executable:
|
14
|
+
dependencies: []
|
15
|
+
|
16
|
+
description:
|
17
|
+
email: sgwr_dts@yahoo.co.jp
|
18
|
+
executables: []
|
19
|
+
|
20
|
+
extensions: []
|
21
|
+
|
22
|
+
extra_rdoc_files:
|
23
|
+
- README.txt
|
24
|
+
- ext/divsufsort_ruby.c
|
25
|
+
files:
|
26
|
+
- lib/i386-mswin32/divsufsort.so
|
27
|
+
- README.txt
|
28
|
+
- ext/divsufsort_ruby.c
|
29
|
+
has_rdoc: true
|
30
|
+
homepage: http://divsufsort.rubyforge.org
|
31
|
+
post_install_message:
|
32
|
+
rdoc_options:
|
33
|
+
- --title
|
34
|
+
- divsufsort - Ruby bindings for libdivsufsort.
|
35
|
+
require_paths:
|
36
|
+
- lib/i386-mswin32
|
37
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
38
|
+
requirements:
|
39
|
+
- - ">="
|
40
|
+
- !ruby/object:Gem::Version
|
41
|
+
version: "0"
|
42
|
+
version:
|
43
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: "0"
|
48
|
+
version:
|
49
|
+
requirements: []
|
50
|
+
|
51
|
+
rubyforge_project: divsufsort
|
52
|
+
rubygems_version: 1.2.0
|
53
|
+
signing_key:
|
54
|
+
specification_version: 2
|
55
|
+
summary: Ruby bindings for libdivsufsort.
|
56
|
+
test_files: []
|
57
|
+
|