divsufsort 0.1.0-mswin32

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,77 @@
1
+ = divsufsort
2
+
3
+ Copyright (c) 2008 SUGAWARA Genki <sgwr_dts@yahoo.co.jp>
4
+
5
+ == Description
6
+
7
+ Ruby bindings for libdivsufsort.
8
+
9
+ libdivsufsort is a C API library to construct the suffix array and the Burrows-Wheeler transformed string.
10
+
11
+ == Project Page
12
+
13
+ http://rubyforge.org/projects/divsufsort
14
+
15
+ == Install
16
+
17
+ gem install divsufsort
18
+
19
+ == Example
20
+ === Burrows-Wheeler Transform/Inverse Burrows-Wheeler Transform
21
+
22
+ require 'divsufsort'
23
+ include Divsufsort
24
+
25
+ bwt = divbwt(<<-EOS)
26
+ London bridge is falling down,
27
+ Falling down, falling down,
28
+ London bridge is falling down,
29
+ My fair Lady.
30
+ EOS
31
+
32
+ unbwt = inverse_bw_transform(bwt)
33
+
34
+ === Construct the suffix array
35
+
36
+ require 'divsufsort'
37
+ include Divsufsort
38
+
39
+ sa = divsufsort(<<-EOS)
40
+ London bridge is falling down,
41
+ Falling down, falling down,
42
+ London bridge is falling down,
43
+ My fair Lady.
44
+ EOS
45
+
46
+ == License
47
+ Copyright (c) 2008 SUGAWARA Genki <sgwr_dts@yahoo.co.jp>
48
+ All rights reserved.
49
+
50
+ Redistribution and use in source and binary forms, with or without modification,
51
+ are permitted provided that the following conditions are met:
52
+
53
+ * Redistributions of source code must retain the above copyright notice,
54
+ this list of conditions and the following disclaimer.
55
+ * Redistributions in binary form must reproduce the above copyright notice,
56
+ this list of conditions and the following disclaimer in the documentation
57
+ and/or other materials provided with the distribution.
58
+ * The names of its contributors may be used to endorse or promote products
59
+ derived from this software without specific prior written permission.
60
+
61
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
62
+ ANY EXPRESS OR IMPLIED WARRANTIES,
63
+ INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
64
+ FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
65
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
66
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
67
+ OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
68
+ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
69
+ STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
70
+ OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
71
+ DAMAGE.
72
+
73
+ === libdivsufsort
74
+ divsufsort contains libdivsufsort.
75
+
76
+ * libdivsufsort is a lightweight suffix-sorting library.
77
+ * http://code.google.com/p/libdivsufsort/
@@ -0,0 +1,227 @@
1
+ #ifdef DIVSUFSORT_EXPORTS
2
+ #define DLLEXPORT __declspec(dllexport)
3
+ #else
4
+ #define DLLEXPORT
5
+ #endif
6
+
7
+ #define VERSION "0.1.0"
8
+
9
+ #include "divsufsort.h"
10
+ #include "ruby.h"
11
+ #include "rubysig.h"
12
+
13
+ #ifndef RSTRING_PTR
14
+ #define RSTRING_PTR(s) (RSTRING(s)->ptr)
15
+ #endif
16
+ #ifndef RSTRING_LEN
17
+ #define RSTRING_LEN(s) (RSTRING(s)->len)
18
+ #endif
19
+
20
+ #define RB_STR_CAT_INT(o, n) do { \
21
+ unsigned char c[4]; \
22
+ c[0] = (unsigned char) (((n) >> 0) & 0xff); \
23
+ c[1] = (unsigned char) (((n) >> 8) & 0xff); \
24
+ c[2] = (unsigned char) (((n) >> 16) & 0xff); \
25
+ c[3] = (unsigned char) (((n) >> 24) & 0xff); \
26
+ rb_str_cat((o), c, 4); \
27
+ } while(0)
28
+
29
+ #define PTR_READ_INT(p, n, x) do { \
30
+ unsigned char c[4]; \
31
+ c[0] = (unsigned char) (p)[0]; \
32
+ c[1] = (unsigned char) (p)[1]; \
33
+ c[2] = (unsigned char) (p)[2]; \
34
+ c[3] = (unsigned char) (p)[3]; \
35
+ (x) = (c[0] << 0) | (c[1] << 8) | (c[2] << 16) | (c[3] << 24); \
36
+ (p) += 4; \
37
+ (n) -= 4; \
38
+ } while(0)
39
+
40
+ static VALUE Divsufsort;
41
+
42
+ /* */
43
+ static VALUE divsufsort_divsufsort(VALUE self, VALUE src) {
44
+ VALUE dst;
45
+ sauchar_t *T;
46
+ saidx_t *SA;
47
+ char *p;
48
+ long n, i;
49
+ saint_t err;
50
+
51
+ Check_Type(src, T_STRING);
52
+ p = RSTRING_PTR(src);
53
+ n = RSTRING_LEN(src);
54
+
55
+ if(n >= 0x7fffffff) {
56
+ rb_raise(rb_eRuntimeError, "Input data is too big.");
57
+ }
58
+
59
+ T = (sauchar_t *) xmalloc((size_t) n * sizeof(sauchar_t));
60
+ SA = (saidx_t *) xmalloc((size_t) n * sizeof(saidx_t));
61
+ dst = rb_ary_new();
62
+ memcpy(T, p, (size_t) n * sizeof(sauchar_t));
63
+
64
+ TRAP_BEG;
65
+ err = divsufsort(T, SA, (saidx_t) n);
66
+ TRAP_END;
67
+
68
+ if(err != 0) {
69
+ xfree(SA);
70
+ xfree(T);
71
+ rb_raise(rb_eRuntimeError, "Cannot allocate memory.");
72
+ }
73
+
74
+ TRAP_BEG;
75
+ err = sufcheck(T, SA, (saidx_t) n, 0);
76
+ TRAP_END;
77
+
78
+ if(err != 0) {
79
+ xfree(SA);
80
+ xfree(T);
81
+ rb_raise(rb_eRuntimeError, "Wrong suffix array.");
82
+ }
83
+
84
+ for(i = 0; i < n; i++) {
85
+ saidx_t SA_i = SA[i];
86
+ rb_ary_push(dst, LONG2NUM(SA_i));
87
+ }
88
+
89
+ xfree(SA);
90
+ xfree(T);
91
+
92
+ if(err != 0) {
93
+ rb_raise(rb_eRuntimeError, "Cannot allocate memory.");
94
+ }
95
+
96
+ return dst;
97
+ }
98
+
99
+ /* */
100
+ static VALUE divsufsort_divbwt(int argc, VALUE *argv, VALUE self) {
101
+ VALUE src, v_blocksize, transformed;
102
+ sauchar_t *T;
103
+ saidx_t *SA;
104
+ char *p;
105
+ long n, T_len;
106
+ saint_t blocksize = 32;
107
+ saidx_t pidx;
108
+
109
+ rb_scan_args(argc, argv, "11", &src, &v_blocksize);
110
+ Check_Type(src, T_STRING);
111
+
112
+ if (!NIL_P(v_blocksize)) {
113
+ blocksize = (saint_t) NUM2INT(v_blocksize);
114
+ }
115
+
116
+ blocksize <<= 20;
117
+ p = RSTRING_PTR(src);
118
+ n = RSTRING_LEN(src);
119
+
120
+ if(n > 0x20000000L) {
121
+ n = 0x20000000L;
122
+ }
123
+
124
+ if(blocksize == 0 || n < blocksize) {
125
+ blocksize = (saidx_t) n;
126
+ }
127
+
128
+ T_len = blocksize * sizeof(sauchar_t);
129
+ T = (sauchar_t *) xmalloc(T_len);
130
+ SA = (saidx_t *) xmalloc(blocksize * sizeof(saidx_t));
131
+ transformed = rb_str_new("", 0);
132
+
133
+ RB_STR_CAT_INT(transformed, blocksize);
134
+
135
+ while (n > 0) {
136
+ int m = (n < T_len) ? n : T_len;
137
+
138
+ memcpy(T, p, m);
139
+ p += m; n -= m;
140
+
141
+ TRAP_BEG;
142
+ pidx = divbwt(T, T, SA, m);
143
+ TRAP_END;
144
+
145
+ if(pidx < 0) {
146
+ break;
147
+ }
148
+
149
+ RB_STR_CAT_INT(transformed, pidx);
150
+ rb_str_cat(transformed, T, m);
151
+ }
152
+
153
+ xfree(SA);
154
+ xfree(T);
155
+
156
+ if(pidx < 0) {
157
+ rb_raise(rb_eRuntimeError, "bw_transform: %s.", (pidx == -1) ? "Invalid arguments" : "Cannot allocate memory");
158
+ }
159
+
160
+ return transformed;
161
+ }
162
+
163
+ /* */
164
+ static VALUE divsufsort_inverse_bw_transform(VALUE self, VALUE transformed) {
165
+ VALUE dst;
166
+ sauchar_t *T;
167
+ saidx_t *A;
168
+ char *p;
169
+ long n;
170
+ saint_t blocksize;
171
+ int err = 0;
172
+
173
+ Check_Type(transformed, T_STRING);
174
+ p = RSTRING_PTR(transformed);
175
+ n = RSTRING_LEN(transformed);
176
+
177
+ if (n < 4) {
178
+ rb_raise(rb_eRuntimeError, "reverseBWT: Invalid data.\n");
179
+ }
180
+
181
+ PTR_READ_INT(p, n, blocksize);
182
+ T = (sauchar_t *) xmalloc(blocksize * sizeof(sauchar_t));
183
+ A = (saidx_t *) xmalloc(blocksize * sizeof(saidx_t));
184
+ dst = rb_str_new("", 0);
185
+
186
+ while (n > 0) {
187
+ int m;
188
+ saidx_t pidx;
189
+
190
+ if (n < 4) {
191
+ err = -1;
192
+ break;
193
+ }
194
+
195
+ PTR_READ_INT(p, n, pidx);
196
+ m = (n < blocksize) ? n : blocksize;
197
+ memcpy(T, p, m);
198
+ p += m; n -= m;
199
+
200
+ TRAP_BEG;
201
+ err = inverse_bw_transform(T, T, A, m, pidx);
202
+ TRAP_END;
203
+
204
+ if(err != 0) {
205
+ break;
206
+ }
207
+
208
+ rb_str_cat(dst, T, m);
209
+ }
210
+
211
+ xfree(A);
212
+ xfree(T);
213
+
214
+ if (err != 0) {
215
+ rb_raise(rb_eRuntimeError, "reverseBWT: %s.\n", (err == -1) ? "Invalid data" : "Cannot allocate memory");
216
+ }
217
+
218
+ return dst;
219
+ }
220
+
221
+ void DLLEXPORT Init_divsufsort() {
222
+ Divsufsort = rb_define_module("Divsufsort");
223
+ rb_define_const(Divsufsort, "VERSION", rb_str_new2(VERSION));
224
+ rb_define_module_function(Divsufsort, "divsufsort", divsufsort_divsufsort, 1);
225
+ rb_define_module_function(Divsufsort, "divbwt", divsufsort_divbwt, -1);
226
+ rb_define_module_function(Divsufsort, "inverse_bw_transform", divsufsort_inverse_bw_transform, 1);
227
+ }
metadata ADDED
@@ -0,0 +1,57 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: divsufsort
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: mswin32
6
+ authors:
7
+ - winebarrel
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2008-10-22 00:00:00 +09:00
13
+ default_executable:
14
+ dependencies: []
15
+
16
+ description:
17
+ email: sgwr_dts@yahoo.co.jp
18
+ executables: []
19
+
20
+ extensions: []
21
+
22
+ extra_rdoc_files:
23
+ - README.txt
24
+ - ext/divsufsort_ruby.c
25
+ files:
26
+ - lib/i386-mswin32/divsufsort.so
27
+ - README.txt
28
+ - ext/divsufsort_ruby.c
29
+ has_rdoc: true
30
+ homepage: http://divsufsort.rubyforge.org
31
+ post_install_message:
32
+ rdoc_options:
33
+ - --title
34
+ - divsufsort - Ruby bindings for libdivsufsort.
35
+ require_paths:
36
+ - lib/i386-mswin32
37
+ required_ruby_version: !ruby/object:Gem::Requirement
38
+ requirements:
39
+ - - ">="
40
+ - !ruby/object:Gem::Version
41
+ version: "0"
42
+ version:
43
+ required_rubygems_version: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: "0"
48
+ version:
49
+ requirements: []
50
+
51
+ rubyforge_project: divsufsort
52
+ rubygems_version: 1.2.0
53
+ signing_key:
54
+ specification_version: 2
55
+ summary: Ruby bindings for libdivsufsort.
56
+ test_files: []
57
+