divsufsort 0.1.0-mswin32
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.txt +77 -0
- data/ext/divsufsort_ruby.c +227 -0
- data/lib/i386-mswin32/divsufsort.so +0 -0
- metadata +57 -0
data/README.txt
ADDED
@@ -0,0 +1,77 @@
|
|
1
|
+
= divsufsort
|
2
|
+
|
3
|
+
Copyright (c) 2008 SUGAWARA Genki <sgwr_dts@yahoo.co.jp>
|
4
|
+
|
5
|
+
== Description
|
6
|
+
|
7
|
+
Ruby bindings for libdivsufsort.
|
8
|
+
|
9
|
+
libdivsufsort is a C API library to construct the suffix array and the Burrows-Wheeler transformed string.
|
10
|
+
|
11
|
+
== Project Page
|
12
|
+
|
13
|
+
http://rubyforge.org/projects/divsufsort
|
14
|
+
|
15
|
+
== Install
|
16
|
+
|
17
|
+
gem install divsufsort
|
18
|
+
|
19
|
+
== Example
|
20
|
+
=== Burrows-Wheeler Transform/Inverse Burrows-Wheeler Transform
|
21
|
+
|
22
|
+
require 'divsufsort'
|
23
|
+
include Divsufsort
|
24
|
+
|
25
|
+
bwt = divbwt(<<-EOS)
|
26
|
+
London bridge is falling down,
|
27
|
+
Falling down, falling down,
|
28
|
+
London bridge is falling down,
|
29
|
+
My fair Lady.
|
30
|
+
EOS
|
31
|
+
|
32
|
+
unbwt = inverse_bw_transform(bwt)
|
33
|
+
|
34
|
+
=== Construct the suffix array
|
35
|
+
|
36
|
+
require 'divsufsort'
|
37
|
+
include Divsufsort
|
38
|
+
|
39
|
+
sa = divsufsort(<<-EOS)
|
40
|
+
London bridge is falling down,
|
41
|
+
Falling down, falling down,
|
42
|
+
London bridge is falling down,
|
43
|
+
My fair Lady.
|
44
|
+
EOS
|
45
|
+
|
46
|
+
== License
|
47
|
+
Copyright (c) 2008 SUGAWARA Genki <sgwr_dts@yahoo.co.jp>
|
48
|
+
All rights reserved.
|
49
|
+
|
50
|
+
Redistribution and use in source and binary forms, with or without modification,
|
51
|
+
are permitted provided that the following conditions are met:
|
52
|
+
|
53
|
+
* Redistributions of source code must retain the above copyright notice,
|
54
|
+
this list of conditions and the following disclaimer.
|
55
|
+
* Redistributions in binary form must reproduce the above copyright notice,
|
56
|
+
this list of conditions and the following disclaimer in the documentation
|
57
|
+
and/or other materials provided with the distribution.
|
58
|
+
* The names of its contributors may be used to endorse or promote products
|
59
|
+
derived from this software without specific prior written permission.
|
60
|
+
|
61
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
62
|
+
ANY EXPRESS OR IMPLIED WARRANTIES,
|
63
|
+
INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
|
64
|
+
FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
65
|
+
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
66
|
+
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
|
67
|
+
OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
68
|
+
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
69
|
+
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
70
|
+
OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
|
71
|
+
DAMAGE.
|
72
|
+
|
73
|
+
=== libdivsufsort
|
74
|
+
divsufsort contains libdivsufsort.
|
75
|
+
|
76
|
+
* libdivsufsort is a lightweight suffix-sorting library.
|
77
|
+
* http://code.google.com/p/libdivsufsort/
|
@@ -0,0 +1,227 @@
|
|
1
|
+
#ifdef DIVSUFSORT_EXPORTS
|
2
|
+
#define DLLEXPORT __declspec(dllexport)
|
3
|
+
#else
|
4
|
+
#define DLLEXPORT
|
5
|
+
#endif
|
6
|
+
|
7
|
+
#define VERSION "0.1.0"
|
8
|
+
|
9
|
+
#include "divsufsort.h"
|
10
|
+
#include "ruby.h"
|
11
|
+
#include "rubysig.h"
|
12
|
+
|
13
|
+
#ifndef RSTRING_PTR
|
14
|
+
#define RSTRING_PTR(s) (RSTRING(s)->ptr)
|
15
|
+
#endif
|
16
|
+
#ifndef RSTRING_LEN
|
17
|
+
#define RSTRING_LEN(s) (RSTRING(s)->len)
|
18
|
+
#endif
|
19
|
+
|
20
|
+
#define RB_STR_CAT_INT(o, n) do { \
|
21
|
+
unsigned char c[4]; \
|
22
|
+
c[0] = (unsigned char) (((n) >> 0) & 0xff); \
|
23
|
+
c[1] = (unsigned char) (((n) >> 8) & 0xff); \
|
24
|
+
c[2] = (unsigned char) (((n) >> 16) & 0xff); \
|
25
|
+
c[3] = (unsigned char) (((n) >> 24) & 0xff); \
|
26
|
+
rb_str_cat((o), c, 4); \
|
27
|
+
} while(0)
|
28
|
+
|
29
|
+
#define PTR_READ_INT(p, n, x) do { \
|
30
|
+
unsigned char c[4]; \
|
31
|
+
c[0] = (unsigned char) (p)[0]; \
|
32
|
+
c[1] = (unsigned char) (p)[1]; \
|
33
|
+
c[2] = (unsigned char) (p)[2]; \
|
34
|
+
c[3] = (unsigned char) (p)[3]; \
|
35
|
+
(x) = (c[0] << 0) | (c[1] << 8) | (c[2] << 16) | (c[3] << 24); \
|
36
|
+
(p) += 4; \
|
37
|
+
(n) -= 4; \
|
38
|
+
} while(0)
|
39
|
+
|
40
|
+
static VALUE Divsufsort;
|
41
|
+
|
42
|
+
/* */
|
43
|
+
static VALUE divsufsort_divsufsort(VALUE self, VALUE src) {
|
44
|
+
VALUE dst;
|
45
|
+
sauchar_t *T;
|
46
|
+
saidx_t *SA;
|
47
|
+
char *p;
|
48
|
+
long n, i;
|
49
|
+
saint_t err;
|
50
|
+
|
51
|
+
Check_Type(src, T_STRING);
|
52
|
+
p = RSTRING_PTR(src);
|
53
|
+
n = RSTRING_LEN(src);
|
54
|
+
|
55
|
+
if(n >= 0x7fffffff) {
|
56
|
+
rb_raise(rb_eRuntimeError, "Input data is too big.");
|
57
|
+
}
|
58
|
+
|
59
|
+
T = (sauchar_t *) xmalloc((size_t) n * sizeof(sauchar_t));
|
60
|
+
SA = (saidx_t *) xmalloc((size_t) n * sizeof(saidx_t));
|
61
|
+
dst = rb_ary_new();
|
62
|
+
memcpy(T, p, (size_t) n * sizeof(sauchar_t));
|
63
|
+
|
64
|
+
TRAP_BEG;
|
65
|
+
err = divsufsort(T, SA, (saidx_t) n);
|
66
|
+
TRAP_END;
|
67
|
+
|
68
|
+
if(err != 0) {
|
69
|
+
xfree(SA);
|
70
|
+
xfree(T);
|
71
|
+
rb_raise(rb_eRuntimeError, "Cannot allocate memory.");
|
72
|
+
}
|
73
|
+
|
74
|
+
TRAP_BEG;
|
75
|
+
err = sufcheck(T, SA, (saidx_t) n, 0);
|
76
|
+
TRAP_END;
|
77
|
+
|
78
|
+
if(err != 0) {
|
79
|
+
xfree(SA);
|
80
|
+
xfree(T);
|
81
|
+
rb_raise(rb_eRuntimeError, "Wrong suffix array.");
|
82
|
+
}
|
83
|
+
|
84
|
+
for(i = 0; i < n; i++) {
|
85
|
+
saidx_t SA_i = SA[i];
|
86
|
+
rb_ary_push(dst, LONG2NUM(SA_i));
|
87
|
+
}
|
88
|
+
|
89
|
+
xfree(SA);
|
90
|
+
xfree(T);
|
91
|
+
|
92
|
+
if(err != 0) {
|
93
|
+
rb_raise(rb_eRuntimeError, "Cannot allocate memory.");
|
94
|
+
}
|
95
|
+
|
96
|
+
return dst;
|
97
|
+
}
|
98
|
+
|
99
|
+
/* */
|
100
|
+
static VALUE divsufsort_divbwt(int argc, VALUE *argv, VALUE self) {
|
101
|
+
VALUE src, v_blocksize, transformed;
|
102
|
+
sauchar_t *T;
|
103
|
+
saidx_t *SA;
|
104
|
+
char *p;
|
105
|
+
long n, T_len;
|
106
|
+
saint_t blocksize = 32;
|
107
|
+
saidx_t pidx;
|
108
|
+
|
109
|
+
rb_scan_args(argc, argv, "11", &src, &v_blocksize);
|
110
|
+
Check_Type(src, T_STRING);
|
111
|
+
|
112
|
+
if (!NIL_P(v_blocksize)) {
|
113
|
+
blocksize = (saint_t) NUM2INT(v_blocksize);
|
114
|
+
}
|
115
|
+
|
116
|
+
blocksize <<= 20;
|
117
|
+
p = RSTRING_PTR(src);
|
118
|
+
n = RSTRING_LEN(src);
|
119
|
+
|
120
|
+
if(n > 0x20000000L) {
|
121
|
+
n = 0x20000000L;
|
122
|
+
}
|
123
|
+
|
124
|
+
if(blocksize == 0 || n < blocksize) {
|
125
|
+
blocksize = (saidx_t) n;
|
126
|
+
}
|
127
|
+
|
128
|
+
T_len = blocksize * sizeof(sauchar_t);
|
129
|
+
T = (sauchar_t *) xmalloc(T_len);
|
130
|
+
SA = (saidx_t *) xmalloc(blocksize * sizeof(saidx_t));
|
131
|
+
transformed = rb_str_new("", 0);
|
132
|
+
|
133
|
+
RB_STR_CAT_INT(transformed, blocksize);
|
134
|
+
|
135
|
+
while (n > 0) {
|
136
|
+
int m = (n < T_len) ? n : T_len;
|
137
|
+
|
138
|
+
memcpy(T, p, m);
|
139
|
+
p += m; n -= m;
|
140
|
+
|
141
|
+
TRAP_BEG;
|
142
|
+
pidx = divbwt(T, T, SA, m);
|
143
|
+
TRAP_END;
|
144
|
+
|
145
|
+
if(pidx < 0) {
|
146
|
+
break;
|
147
|
+
}
|
148
|
+
|
149
|
+
RB_STR_CAT_INT(transformed, pidx);
|
150
|
+
rb_str_cat(transformed, T, m);
|
151
|
+
}
|
152
|
+
|
153
|
+
xfree(SA);
|
154
|
+
xfree(T);
|
155
|
+
|
156
|
+
if(pidx < 0) {
|
157
|
+
rb_raise(rb_eRuntimeError, "bw_transform: %s.", (pidx == -1) ? "Invalid arguments" : "Cannot allocate memory");
|
158
|
+
}
|
159
|
+
|
160
|
+
return transformed;
|
161
|
+
}
|
162
|
+
|
163
|
+
/* */
|
164
|
+
static VALUE divsufsort_inverse_bw_transform(VALUE self, VALUE transformed) {
|
165
|
+
VALUE dst;
|
166
|
+
sauchar_t *T;
|
167
|
+
saidx_t *A;
|
168
|
+
char *p;
|
169
|
+
long n;
|
170
|
+
saint_t blocksize;
|
171
|
+
int err = 0;
|
172
|
+
|
173
|
+
Check_Type(transformed, T_STRING);
|
174
|
+
p = RSTRING_PTR(transformed);
|
175
|
+
n = RSTRING_LEN(transformed);
|
176
|
+
|
177
|
+
if (n < 4) {
|
178
|
+
rb_raise(rb_eRuntimeError, "reverseBWT: Invalid data.\n");
|
179
|
+
}
|
180
|
+
|
181
|
+
PTR_READ_INT(p, n, blocksize);
|
182
|
+
T = (sauchar_t *) xmalloc(blocksize * sizeof(sauchar_t));
|
183
|
+
A = (saidx_t *) xmalloc(blocksize * sizeof(saidx_t));
|
184
|
+
dst = rb_str_new("", 0);
|
185
|
+
|
186
|
+
while (n > 0) {
|
187
|
+
int m;
|
188
|
+
saidx_t pidx;
|
189
|
+
|
190
|
+
if (n < 4) {
|
191
|
+
err = -1;
|
192
|
+
break;
|
193
|
+
}
|
194
|
+
|
195
|
+
PTR_READ_INT(p, n, pidx);
|
196
|
+
m = (n < blocksize) ? n : blocksize;
|
197
|
+
memcpy(T, p, m);
|
198
|
+
p += m; n -= m;
|
199
|
+
|
200
|
+
TRAP_BEG;
|
201
|
+
err = inverse_bw_transform(T, T, A, m, pidx);
|
202
|
+
TRAP_END;
|
203
|
+
|
204
|
+
if(err != 0) {
|
205
|
+
break;
|
206
|
+
}
|
207
|
+
|
208
|
+
rb_str_cat(dst, T, m);
|
209
|
+
}
|
210
|
+
|
211
|
+
xfree(A);
|
212
|
+
xfree(T);
|
213
|
+
|
214
|
+
if (err != 0) {
|
215
|
+
rb_raise(rb_eRuntimeError, "reverseBWT: %s.\n", (err == -1) ? "Invalid data" : "Cannot allocate memory");
|
216
|
+
}
|
217
|
+
|
218
|
+
return dst;
|
219
|
+
}
|
220
|
+
|
221
|
+
void DLLEXPORT Init_divsufsort() {
|
222
|
+
Divsufsort = rb_define_module("Divsufsort");
|
223
|
+
rb_define_const(Divsufsort, "VERSION", rb_str_new2(VERSION));
|
224
|
+
rb_define_module_function(Divsufsort, "divsufsort", divsufsort_divsufsort, 1);
|
225
|
+
rb_define_module_function(Divsufsort, "divbwt", divsufsort_divbwt, -1);
|
226
|
+
rb_define_module_function(Divsufsort, "inverse_bw_transform", divsufsort_inverse_bw_transform, 1);
|
227
|
+
}
|
Binary file
|
metadata
ADDED
@@ -0,0 +1,57 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: divsufsort
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: mswin32
|
6
|
+
authors:
|
7
|
+
- winebarrel
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2008-10-22 00:00:00 +09:00
|
13
|
+
default_executable:
|
14
|
+
dependencies: []
|
15
|
+
|
16
|
+
description:
|
17
|
+
email: sgwr_dts@yahoo.co.jp
|
18
|
+
executables: []
|
19
|
+
|
20
|
+
extensions: []
|
21
|
+
|
22
|
+
extra_rdoc_files:
|
23
|
+
- README.txt
|
24
|
+
- ext/divsufsort_ruby.c
|
25
|
+
files:
|
26
|
+
- lib/i386-mswin32/divsufsort.so
|
27
|
+
- README.txt
|
28
|
+
- ext/divsufsort_ruby.c
|
29
|
+
has_rdoc: true
|
30
|
+
homepage: http://divsufsort.rubyforge.org
|
31
|
+
post_install_message:
|
32
|
+
rdoc_options:
|
33
|
+
- --title
|
34
|
+
- divsufsort - Ruby bindings for libdivsufsort.
|
35
|
+
require_paths:
|
36
|
+
- lib/i386-mswin32
|
37
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
38
|
+
requirements:
|
39
|
+
- - ">="
|
40
|
+
- !ruby/object:Gem::Version
|
41
|
+
version: "0"
|
42
|
+
version:
|
43
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: "0"
|
48
|
+
version:
|
49
|
+
requirements: []
|
50
|
+
|
51
|
+
rubyforge_project: divsufsort
|
52
|
+
rubygems_version: 1.2.0
|
53
|
+
signing_key:
|
54
|
+
specification_version: 2
|
55
|
+
summary: Ruby bindings for libdivsufsort.
|
56
|
+
test_files: []
|
57
|
+
|