divsufsort 0.1.0-mswin32

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,77 @@
1
+ = divsufsort
2
+
3
+ Copyright (c) 2008 SUGAWARA Genki <sgwr_dts@yahoo.co.jp>
4
+
5
+ == Description
6
+
7
+ Ruby bindings for libdivsufsort.
8
+
9
+ libdivsufsort is a C API library to construct the suffix array and the Burrows-Wheeler transformed string.
10
+
11
+ == Project Page
12
+
13
+ http://rubyforge.org/projects/divsufsort
14
+
15
+ == Install
16
+
17
+ gem install divsufsort
18
+
19
+ == Example
20
+ === Burrows-Wheeler Transform/Inverse Burrows-Wheeler Transform
21
+
22
+ require 'divsufsort'
23
+ include Divsufsort
24
+
25
+ bwt = divbwt(<<-EOS)
26
+ London bridge is falling down,
27
+ Falling down, falling down,
28
+ London bridge is falling down,
29
+ My fair Lady.
30
+ EOS
31
+
32
+ unbwt = inverse_bw_transform(bwt)
33
+
34
+ === Construct the suffix array
35
+
36
+ require 'divsufsort'
37
+ include Divsufsort
38
+
39
+ sa = divsufsort(<<-EOS)
40
+ London bridge is falling down,
41
+ Falling down, falling down,
42
+ London bridge is falling down,
43
+ My fair Lady.
44
+ EOS
45
+
46
+ == License
47
+ Copyright (c) 2008 SUGAWARA Genki <sgwr_dts@yahoo.co.jp>
48
+ All rights reserved.
49
+
50
+ Redistribution and use in source and binary forms, with or without modification,
51
+ are permitted provided that the following conditions are met:
52
+
53
+ * Redistributions of source code must retain the above copyright notice,
54
+ this list of conditions and the following disclaimer.
55
+ * Redistributions in binary form must reproduce the above copyright notice,
56
+ this list of conditions and the following disclaimer in the documentation
57
+ and/or other materials provided with the distribution.
58
+ * The names of its contributors may be used to endorse or promote products
59
+ derived from this software without specific prior written permission.
60
+
61
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
62
+ ANY EXPRESS OR IMPLIED WARRANTIES,
63
+ INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
64
+ FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
65
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
66
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
67
+ OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
68
+ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
69
+ STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
70
+ OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
71
+ DAMAGE.
72
+
73
+ === libdivsufsort
74
+ divsufsort contains libdivsufsort.
75
+
76
+ * libdivsufsort is a lightweight suffix-sorting library.
77
+ * http://code.google.com/p/libdivsufsort/
@@ -0,0 +1,227 @@
1
+ #ifdef DIVSUFSORT_EXPORTS
2
+ #define DLLEXPORT __declspec(dllexport)
3
+ #else
4
+ #define DLLEXPORT
5
+ #endif
6
+
7
+ #define VERSION "0.1.0"
8
+
9
+ #include "divsufsort.h"
10
+ #include "ruby.h"
11
+ #include "rubysig.h"
12
+
13
+ #ifndef RSTRING_PTR
14
+ #define RSTRING_PTR(s) (RSTRING(s)->ptr)
15
+ #endif
16
+ #ifndef RSTRING_LEN
17
+ #define RSTRING_LEN(s) (RSTRING(s)->len)
18
+ #endif
19
+
20
+ #define RB_STR_CAT_INT(o, n) do { \
21
+ unsigned char c[4]; \
22
+ c[0] = (unsigned char) (((n) >> 0) & 0xff); \
23
+ c[1] = (unsigned char) (((n) >> 8) & 0xff); \
24
+ c[2] = (unsigned char) (((n) >> 16) & 0xff); \
25
+ c[3] = (unsigned char) (((n) >> 24) & 0xff); \
26
+ rb_str_cat((o), c, 4); \
27
+ } while(0)
28
+
29
+ #define PTR_READ_INT(p, n, x) do { \
30
+ unsigned char c[4]; \
31
+ c[0] = (unsigned char) (p)[0]; \
32
+ c[1] = (unsigned char) (p)[1]; \
33
+ c[2] = (unsigned char) (p)[2]; \
34
+ c[3] = (unsigned char) (p)[3]; \
35
+ (x) = (c[0] << 0) | (c[1] << 8) | (c[2] << 16) | (c[3] << 24); \
36
+ (p) += 4; \
37
+ (n) -= 4; \
38
+ } while(0)
39
+
40
+ static VALUE Divsufsort;
41
+
42
+ /* */
43
+ static VALUE divsufsort_divsufsort(VALUE self, VALUE src) {
44
+ VALUE dst;
45
+ sauchar_t *T;
46
+ saidx_t *SA;
47
+ char *p;
48
+ long n, i;
49
+ saint_t err;
50
+
51
+ Check_Type(src, T_STRING);
52
+ p = RSTRING_PTR(src);
53
+ n = RSTRING_LEN(src);
54
+
55
+ if(n >= 0x7fffffff) {
56
+ rb_raise(rb_eRuntimeError, "Input data is too big.");
57
+ }
58
+
59
+ T = (sauchar_t *) xmalloc((size_t) n * sizeof(sauchar_t));
60
+ SA = (saidx_t *) xmalloc((size_t) n * sizeof(saidx_t));
61
+ dst = rb_ary_new();
62
+ memcpy(T, p, (size_t) n * sizeof(sauchar_t));
63
+
64
+ TRAP_BEG;
65
+ err = divsufsort(T, SA, (saidx_t) n);
66
+ TRAP_END;
67
+
68
+ if(err != 0) {
69
+ xfree(SA);
70
+ xfree(T);
71
+ rb_raise(rb_eRuntimeError, "Cannot allocate memory.");
72
+ }
73
+
74
+ TRAP_BEG;
75
+ err = sufcheck(T, SA, (saidx_t) n, 0);
76
+ TRAP_END;
77
+
78
+ if(err != 0) {
79
+ xfree(SA);
80
+ xfree(T);
81
+ rb_raise(rb_eRuntimeError, "Wrong suffix array.");
82
+ }
83
+
84
+ for(i = 0; i < n; i++) {
85
+ saidx_t SA_i = SA[i];
86
+ rb_ary_push(dst, LONG2NUM(SA_i));
87
+ }
88
+
89
+ xfree(SA);
90
+ xfree(T);
91
+
92
+ if(err != 0) {
93
+ rb_raise(rb_eRuntimeError, "Cannot allocate memory.");
94
+ }
95
+
96
+ return dst;
97
+ }
98
+
99
+ /* */
100
+ static VALUE divsufsort_divbwt(int argc, VALUE *argv, VALUE self) {
101
+ VALUE src, v_blocksize, transformed;
102
+ sauchar_t *T;
103
+ saidx_t *SA;
104
+ char *p;
105
+ long n, T_len;
106
+ saint_t blocksize = 32;
107
+ saidx_t pidx;
108
+
109
+ rb_scan_args(argc, argv, "11", &src, &v_blocksize);
110
+ Check_Type(src, T_STRING);
111
+
112
+ if (!NIL_P(v_blocksize)) {
113
+ blocksize = (saint_t) NUM2INT(v_blocksize);
114
+ }
115
+
116
+ blocksize <<= 20;
117
+ p = RSTRING_PTR(src);
118
+ n = RSTRING_LEN(src);
119
+
120
+ if(n > 0x20000000L) {
121
+ n = 0x20000000L;
122
+ }
123
+
124
+ if(blocksize == 0 || n < blocksize) {
125
+ blocksize = (saidx_t) n;
126
+ }
127
+
128
+ T_len = blocksize * sizeof(sauchar_t);
129
+ T = (sauchar_t *) xmalloc(T_len);
130
+ SA = (saidx_t *) xmalloc(blocksize * sizeof(saidx_t));
131
+ transformed = rb_str_new("", 0);
132
+
133
+ RB_STR_CAT_INT(transformed, blocksize);
134
+
135
+ while (n > 0) {
136
+ int m = (n < T_len) ? n : T_len;
137
+
138
+ memcpy(T, p, m);
139
+ p += m; n -= m;
140
+
141
+ TRAP_BEG;
142
+ pidx = divbwt(T, T, SA, m);
143
+ TRAP_END;
144
+
145
+ if(pidx < 0) {
146
+ break;
147
+ }
148
+
149
+ RB_STR_CAT_INT(transformed, pidx);
150
+ rb_str_cat(transformed, T, m);
151
+ }
152
+
153
+ xfree(SA);
154
+ xfree(T);
155
+
156
+ if(pidx < 0) {
157
+ rb_raise(rb_eRuntimeError, "bw_transform: %s.", (pidx == -1) ? "Invalid arguments" : "Cannot allocate memory");
158
+ }
159
+
160
+ return transformed;
161
+ }
162
+
163
+ /* */
164
+ static VALUE divsufsort_inverse_bw_transform(VALUE self, VALUE transformed) {
165
+ VALUE dst;
166
+ sauchar_t *T;
167
+ saidx_t *A;
168
+ char *p;
169
+ long n;
170
+ saint_t blocksize;
171
+ int err = 0;
172
+
173
+ Check_Type(transformed, T_STRING);
174
+ p = RSTRING_PTR(transformed);
175
+ n = RSTRING_LEN(transformed);
176
+
177
+ if (n < 4) {
178
+ rb_raise(rb_eRuntimeError, "reverseBWT: Invalid data.\n");
179
+ }
180
+
181
+ PTR_READ_INT(p, n, blocksize);
182
+ T = (sauchar_t *) xmalloc(blocksize * sizeof(sauchar_t));
183
+ A = (saidx_t *) xmalloc(blocksize * sizeof(saidx_t));
184
+ dst = rb_str_new("", 0);
185
+
186
+ while (n > 0) {
187
+ int m;
188
+ saidx_t pidx;
189
+
190
+ if (n < 4) {
191
+ err = -1;
192
+ break;
193
+ }
194
+
195
+ PTR_READ_INT(p, n, pidx);
196
+ m = (n < blocksize) ? n : blocksize;
197
+ memcpy(T, p, m);
198
+ p += m; n -= m;
199
+
200
+ TRAP_BEG;
201
+ err = inverse_bw_transform(T, T, A, m, pidx);
202
+ TRAP_END;
203
+
204
+ if(err != 0) {
205
+ break;
206
+ }
207
+
208
+ rb_str_cat(dst, T, m);
209
+ }
210
+
211
+ xfree(A);
212
+ xfree(T);
213
+
214
+ if (err != 0) {
215
+ rb_raise(rb_eRuntimeError, "reverseBWT: %s.\n", (err == -1) ? "Invalid data" : "Cannot allocate memory");
216
+ }
217
+
218
+ return dst;
219
+ }
220
+
221
+ void DLLEXPORT Init_divsufsort() {
222
+ Divsufsort = rb_define_module("Divsufsort");
223
+ rb_define_const(Divsufsort, "VERSION", rb_str_new2(VERSION));
224
+ rb_define_module_function(Divsufsort, "divsufsort", divsufsort_divsufsort, 1);
225
+ rb_define_module_function(Divsufsort, "divbwt", divsufsort_divbwt, -1);
226
+ rb_define_module_function(Divsufsort, "inverse_bw_transform", divsufsort_inverse_bw_transform, 1);
227
+ }
metadata ADDED
@@ -0,0 +1,57 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: divsufsort
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: mswin32
6
+ authors:
7
+ - winebarrel
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2008-10-22 00:00:00 +09:00
13
+ default_executable:
14
+ dependencies: []
15
+
16
+ description:
17
+ email: sgwr_dts@yahoo.co.jp
18
+ executables: []
19
+
20
+ extensions: []
21
+
22
+ extra_rdoc_files:
23
+ - README.txt
24
+ - ext/divsufsort_ruby.c
25
+ files:
26
+ - lib/i386-mswin32/divsufsort.so
27
+ - README.txt
28
+ - ext/divsufsort_ruby.c
29
+ has_rdoc: true
30
+ homepage: http://divsufsort.rubyforge.org
31
+ post_install_message:
32
+ rdoc_options:
33
+ - --title
34
+ - divsufsort - Ruby bindings for libdivsufsort.
35
+ require_paths:
36
+ - lib/i386-mswin32
37
+ required_ruby_version: !ruby/object:Gem::Requirement
38
+ requirements:
39
+ - - ">="
40
+ - !ruby/object:Gem::Version
41
+ version: "0"
42
+ version:
43
+ required_rubygems_version: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: "0"
48
+ version:
49
+ requirements: []
50
+
51
+ rubyforge_project: divsufsort
52
+ rubygems_version: 1.2.0
53
+ signing_key:
54
+ specification_version: 2
55
+ summary: Ruby bindings for libdivsufsort.
56
+ test_files: []
57
+