divsufsort 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,77 @@
1
+ = divsufsort
2
+
3
+ Copyright (c) 2008 SUGAWARA Genki <sgwr_dts@yahoo.co.jp>
4
+
5
+ == Description
6
+
7
+ Ruby bindings for libdivsufsort.
8
+
9
+ libdivsufsort is a C API library to construct the suffix array and the Burrows-Wheeler transformed string.
10
+
11
+ == Project Page
12
+
13
+ http://rubyforge.org/projects/divsufsort
14
+
15
+ == Install
16
+
17
+ gem install divsufsort
18
+
19
+ == Example
20
+ === Burrows-Wheeler Transform/Inverse Burrows-Wheeler Transform
21
+
22
+ require 'divsufsort'
23
+ include Divsufsort
24
+
25
+ bwt = divbwt(<<-EOS)
26
+ London bridge is falling down,
27
+ Falling down, falling down,
28
+ London bridge is falling down,
29
+ My fair Lady.
30
+ EOS
31
+
32
+ unbwt = inverse_bw_transform(bwt)
33
+
34
+ === Construct the suffix array
35
+
36
+ require 'divsufsort'
37
+ include Divsufsort
38
+
39
+ sa = divsufsort(<<-EOS)
40
+ London bridge is falling down,
41
+ Falling down, falling down,
42
+ London bridge is falling down,
43
+ My fair Lady.
44
+ EOS
45
+
46
+ == License
47
+ Copyright (c) 2008 SUGAWARA Genki <sgwr_dts@yahoo.co.jp>
48
+ All rights reserved.
49
+
50
+ Redistribution and use in source and binary forms, with or without modification,
51
+ are permitted provided that the following conditions are met:
52
+
53
+ * Redistributions of source code must retain the above copyright notice,
54
+ this list of conditions and the following disclaimer.
55
+ * Redistributions in binary form must reproduce the above copyright notice,
56
+ this list of conditions and the following disclaimer in the documentation
57
+ and/or other materials provided with the distribution.
58
+ * The names of its contributors may be used to endorse or promote products
59
+ derived from this software without specific prior written permission.
60
+
61
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
62
+ ANY EXPRESS OR IMPLIED WARRANTIES,
63
+ INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
64
+ FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
65
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
66
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
67
+ OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
68
+ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
69
+ STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
70
+ OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
71
+ DAMAGE.
72
+
73
+ === libdivsufsort
74
+ divsufsort contains libdivsufsort.
75
+
76
+ * libdivsufsort is a lightweight suffix-sorting library.
77
+ * http://code.google.com/p/libdivsufsort/
@@ -0,0 +1,149 @@
1
+
2
+ SHELL = /bin/sh
3
+
4
+ #### Start of system configuration section. ####
5
+
6
+ srcdir = .
7
+ topdir = /usr/lib/ruby/1.8/i486-linux
8
+ hdrdir = $(topdir)
9
+ VPATH = $(srcdir):$(topdir):$(hdrdir)
10
+ prefix = $(DESTDIR)/usr
11
+ exec_prefix = $(DESTDIR)/usr
12
+ sitedir = $(DESTDIR)/usr/local/lib/site_ruby
13
+ rubylibdir = $(libdir)/ruby/$(ruby_version)
14
+ docdir = $(datarootdir)/doc/$(PACKAGE)
15
+ dvidir = $(docdir)
16
+ datarootdir = $(prefix)/share
17
+ archdir = $(rubylibdir)/$(arch)
18
+ sbindir = $(exec_prefix)/sbin
19
+ psdir = $(docdir)
20
+ localedir = $(datarootdir)/locale
21
+ htmldir = $(docdir)
22
+ datadir = $(datarootdir)
23
+ includedir = $(prefix)/include
24
+ infodir = $(prefix)/share/info
25
+ sysconfdir = $(DESTDIR)/etc
26
+ mandir = $(prefix)/share/man
27
+ libdir = $(DESTDIR)/usr/lib
28
+ sharedstatedir = $(prefix)/com
29
+ oldincludedir = $(DESTDIR)/usr/include
30
+ pdfdir = $(docdir)
31
+ sitearchdir = $(sitelibdir)/$(sitearch)
32
+ bindir = $(exec_prefix)/bin
33
+ localstatedir = $(DESTDIR)/var
34
+ sitelibdir = $(sitedir)/$(ruby_version)
35
+ libexecdir = $(prefix)/lib/ruby1.8
36
+
37
+ CC = cc
38
+ LIBRUBY = $(LIBRUBY_SO)
39
+ LIBRUBY_A = lib$(RUBY_SO_NAME)-static.a
40
+ LIBRUBYARG_SHARED = -l$(RUBY_SO_NAME)
41
+ LIBRUBYARG_STATIC = -l$(RUBY_SO_NAME)-static
42
+
43
+ RUBY_EXTCONF_H =
44
+ CFLAGS = -fPIC -fno-strict-aliasing -g -O2 -fPIC
45
+ INCFLAGS = -I. -I. -I/usr/lib/ruby/1.8/i486-linux -I.
46
+ CPPFLAGS = -DHAVE_DLFCN_H -DHAVE_FCNTL_H -DHAVE_INTTYPES_H -DHAVE_MEMORY_H -DHAVE_STDDEF_H -DHAVE_STDINT_H -DHAVE_STDLIB_H -DHAVE_STRINGS_H -DHAVE_STRING_H -DHAVE_SYS_STAT_H -DHAVE_SYS_TYPES_H -DHAVE_UNISTD_H
47
+ CXXFLAGS = $(CFLAGS)
48
+ DLDFLAGS = -L. -rdynamic -Wl,-export-dynamic
49
+ LDSHARED = $(CC) -shared
50
+ AR = ar
51
+ EXEEXT =
52
+
53
+ RUBY_INSTALL_NAME = ruby1.8
54
+ RUBY_SO_NAME = ruby1.8
55
+ arch = i486-linux
56
+ sitearch = i486-linux
57
+ ruby_version = 1.8
58
+ ruby = /usr/bin/ruby1.8
59
+ RUBY = $(ruby)
60
+ RM = rm -f
61
+ MAKEDIRS = mkdir -p
62
+ INSTALL = /usr/bin/install -c
63
+ INSTALL_PROG = $(INSTALL) -m 0755
64
+ INSTALL_DATA = $(INSTALL) -m 644
65
+ COPY = cp
66
+
67
+ #### End of system configuration section. ####
68
+
69
+ preload =
70
+
71
+ libpath = . $(libdir)
72
+ LIBPATH = -L"." -L"$(libdir)"
73
+ DEFFILE =
74
+
75
+ CLEANFILES =
76
+ DISTCLEANFILES =
77
+
78
+ extout =
79
+ extout_prefix =
80
+ target_prefix =
81
+ LOCAL_LIBS =
82
+ LIBS = $(LIBRUBYARG_SHARED) -lpthread -ldl -lcrypt -lm -lc
83
+ SRCS = divsufsort.c divsufsort_ruby.c sssort.c trsort.c utils.c
84
+ OBJS = divsufsort.o divsufsort_ruby.o sssort.o trsort.o utils.o
85
+ TARGET = divsufsort
86
+ DLLIB = $(TARGET).so
87
+ EXTSTATIC =
88
+ STATIC_LIB =
89
+
90
+ RUBYCOMMONDIR = $(sitedir)$(target_prefix)
91
+ RUBYLIBDIR = $(sitelibdir)$(target_prefix)
92
+ RUBYARCHDIR = $(sitearchdir)$(target_prefix)
93
+
94
+ TARGET_SO = $(DLLIB)
95
+ CLEANLIBS = $(TARGET).so $(TARGET).il? $(TARGET).tds $(TARGET).map
96
+ CLEANOBJS = *.o *.a *.s[ol] *.pdb *.exp *.bak
97
+
98
+ all: $(DLLIB)
99
+ static: $(STATIC_LIB)
100
+
101
+ clean:
102
+ @-$(RM) $(CLEANLIBS) $(CLEANOBJS) $(CLEANFILES)
103
+
104
+ distclean: clean
105
+ @-$(RM) Makefile $(RUBY_EXTCONF_H) conftest.* mkmf.log
106
+ @-$(RM) core ruby$(EXEEXT) *~ $(DISTCLEANFILES)
107
+
108
+ realclean: distclean
109
+ install: install-so install-rb
110
+
111
+ install-so: $(RUBYARCHDIR)
112
+ install-so: $(RUBYARCHDIR)/$(DLLIB)
113
+ $(RUBYARCHDIR)/$(DLLIB): $(DLLIB)
114
+ $(INSTALL_PROG) $(DLLIB) $(RUBYARCHDIR)
115
+ install-rb: pre-install-rb install-rb-default
116
+ install-rb-default: pre-install-rb-default
117
+ pre-install-rb: Makefile
118
+ pre-install-rb-default: Makefile
119
+ $(RUBYARCHDIR):
120
+ $(MAKEDIRS) $@
121
+
122
+ site-install: site-install-so site-install-rb
123
+ site-install-so: install-so
124
+ site-install-rb: install-rb
125
+
126
+ .SUFFIXES: .c .m .cc .cxx .cpp .C .o
127
+
128
+ .cc.o:
129
+ $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) -c $<
130
+
131
+ .cxx.o:
132
+ $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) -c $<
133
+
134
+ .cpp.o:
135
+ $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) -c $<
136
+
137
+ .C.o:
138
+ $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) -c $<
139
+
140
+ .c.o:
141
+ $(CC) $(INCFLAGS) $(CPPFLAGS) $(CFLAGS) -c $<
142
+
143
+ $(DLLIB): $(OBJS)
144
+ @-$(RM) $@
145
+ $(LDSHARED) -o $@ $(OBJS) $(LIBPATH) $(DLDFLAGS) $(LOCAL_LIBS) $(LIBS)
146
+
147
+
148
+
149
+ $(OBJS): ruby.h defines.h
@@ -0,0 +1,398 @@
1
+ /*
2
+ * divsufsort.c for libdivsufsort
3
+ * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
4
+ *
5
+ * Permission is hereby granted, free of charge, to any person
6
+ * obtaining a copy of this software and associated documentation
7
+ * files (the "Software"), to deal in the Software without
8
+ * restriction, including without limitation the rights to use,
9
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
10
+ * copies of the Software, and to permit persons to whom the
11
+ * Software is furnished to do so, subject to the following
12
+ * conditions:
13
+ *
14
+ * The above copyright notice and this permission notice shall be
15
+ * included in all copies or substantial portions of the Software.
16
+ *
17
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
19
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
21
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
22
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
24
+ * OTHER DEALINGS IN THE SOFTWARE.
25
+ */
26
+
27
+ #include "divsufsort_private.h"
28
+ #ifdef _OPENMP
29
+ # include <omp.h>
30
+ #endif
31
+
32
+
33
+ /*- Private Functions -*/
34
+
35
+ /* Sorts suffixes of type B*. */
36
+ static
37
+ saidx_t
38
+ sort_typeBstar(const sauchar_t *T, saidx_t *SA,
39
+ saidx_t *bucket_A, saidx_t *bucket_B,
40
+ saidx_t n) {
41
+ saidx_t *PAb, *ISAb, *buf;
42
+ #ifdef _OPENMP
43
+ saidx_t *curbuf;
44
+ saidx_t l;
45
+ #endif
46
+ saidx_t i, j, k, t, m, bufsize;
47
+ saint_t c0, c1;
48
+ #ifdef _OPENMP
49
+ saint_t d0, d1;
50
+ int tmp;
51
+ #endif
52
+
53
+ /* Initialize bucket arrays. */
54
+ for(i = 0; i < BUCKET_A_SIZE; ++i) { bucket_A[i] = 0; }
55
+ for(i = 0; i < BUCKET_B_SIZE; ++i) { bucket_B[i] = 0; }
56
+
57
+ /* Count the number of occurrences of the first one or two characters of each
58
+ type A, B and B* suffix. Moreover, store the beginning position of all
59
+ type B* suffixes into the array SA. */
60
+ for(i = n - 1, m = n, c0 = T[n - 1]; 0 <= i;) {
61
+ /* type A suffix. */
62
+ do { ++BUCKET_A(c1 = c0); } while((0 <= --i) && ((c0 = T[i]) >= c1));
63
+ if(0 <= i) {
64
+ /* type B* suffix. */
65
+ ++BUCKET_BSTAR(c0, c1);
66
+ SA[--m] = i;
67
+ /* type B suffix. */
68
+ for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) {
69
+ ++BUCKET_B(c0, c1);
70
+ }
71
+ }
72
+ }
73
+ m = n - m;
74
+ /*
75
+ note:
76
+ A type B* suffix is lexicographically smaller than a type B suffix that
77
+ begins with the same first two characters.
78
+ */
79
+
80
+ /* Calculate the index of start/end point of each bucket. */
81
+ for(c0 = 0, i = 0, j = 0; c0 < ALPHABET_SIZE; ++c0) {
82
+ t = i + BUCKET_A(c0);
83
+ BUCKET_A(c0) = i + j; /* start point */
84
+ i = t + BUCKET_B(c0, c0);
85
+ for(c1 = c0 + 1; c1 < ALPHABET_SIZE; ++c1) {
86
+ j += BUCKET_BSTAR(c0, c1);
87
+ BUCKET_BSTAR(c0, c1) = j; /* end point */
88
+ i += BUCKET_B(c0, c1);
89
+ }
90
+ }
91
+
92
+ if(0 < m) {
93
+ /* Sort the type B* suffixes by their first two characters. */
94
+ PAb = SA + n - m; ISAb = SA + m;
95
+ for(i = m - 2; 0 <= i; --i) {
96
+ t = PAb[i], c0 = T[t], c1 = T[t + 1];
97
+ SA[--BUCKET_BSTAR(c0, c1)] = i;
98
+ }
99
+ t = PAb[m - 1], c0 = T[t], c1 = T[t + 1];
100
+ SA[--BUCKET_BSTAR(c0, c1)] = m - 1;
101
+
102
+ /* Sort the type B* substrings using sssort. */
103
+ #ifdef _OPENMP
104
+ tmp = omp_get_max_threads();
105
+ buf = SA + m, bufsize = (n - (2 * m)) / tmp;
106
+ c0 = ALPHABET_SIZE - 2, c1 = ALPHABET_SIZE - 1, j = m;
107
+ #pragma omp parallel default(shared) private(curbuf, k, l, d0, d1, tmp)
108
+ {
109
+ tmp = omp_get_thread_num();
110
+ curbuf = buf + tmp * bufsize;
111
+ k = 0;
112
+ for(;;) {
113
+ #pragma omp critical(sssort_lock)
114
+ {
115
+ if(0 < (l = j)) {
116
+ d0 = c0, d1 = c1;
117
+ do {
118
+ k = BUCKET_BSTAR(d0, d1);
119
+ if(--d1 <= d0) {
120
+ d1 = ALPHABET_SIZE - 1;
121
+ if(--d0 < 0) { break; }
122
+ }
123
+ } while(((l - k) <= 1) && (0 < (l = k)));
124
+ c0 = d0, c1 = d1, j = k;
125
+ }
126
+ }
127
+ if(l == 0) { break; }
128
+ sssort(T, PAb, SA + k, SA + l,
129
+ curbuf, bufsize, 2, n, *(SA + k) == (m - 1));
130
+ }
131
+ }
132
+ #else
133
+ buf = SA + m, bufsize = n - (2 * m);
134
+ for(c0 = ALPHABET_SIZE - 2, j = m; 0 < j; --c0) {
135
+ for(c1 = ALPHABET_SIZE - 1; c0 < c1; j = i, --c1) {
136
+ i = BUCKET_BSTAR(c0, c1);
137
+ if(1 < (j - i)) {
138
+ sssort(T, PAb, SA + i, SA + j,
139
+ buf, bufsize, 2, n, *(SA + i) == (m - 1));
140
+ }
141
+ }
142
+ }
143
+ #endif
144
+
145
+ /* Compute ranks of type B* substrings. */
146
+ for(i = m - 1; 0 <= i; --i) {
147
+ if(0 <= SA[i]) {
148
+ j = i;
149
+ do { ISAb[SA[i]] = i; } while((0 <= --i) && (0 <= SA[i]));
150
+ SA[i + 1] = i - j;
151
+ if(i <= 0) { break; }
152
+ }
153
+ j = i;
154
+ do { ISAb[SA[i] = ~SA[i]] = j; } while(SA[--i] < 0);
155
+ ISAb[SA[i]] = j;
156
+ }
157
+
158
+ /* Construct the inverse suffix array of type B* suffixes using trsort. */
159
+ trsort(ISAb, SA, m, 1);
160
+
161
+ /* Set the sorted order of tyoe B* suffixes. */
162
+ for(i = n - 1, j = m, c0 = T[n - 1]; 0 <= i;) {
163
+ for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) >= c1); --i, c1 = c0) { }
164
+ if(0 <= i) {
165
+ t = i;
166
+ for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) { }
167
+ SA[ISAb[--j]] = ((t == 0) || (1 < (t - i))) ? t : ~t;
168
+ }
169
+ }
170
+
171
+ /* Calculate the index of start/end point of each bucket. */
172
+ BUCKET_B(ALPHABET_SIZE - 1, ALPHABET_SIZE - 1) = n; /* end point */
173
+ for(c0 = ALPHABET_SIZE - 2, k = m - 1; 0 <= c0; --c0) {
174
+ i = BUCKET_A(c0 + 1) - 1;
175
+ for(c1 = ALPHABET_SIZE - 1; c0 < c1; --c1) {
176
+ t = i - BUCKET_B(c0, c1);
177
+ BUCKET_B(c0, c1) = i; /* end point */
178
+
179
+ /* Move all type B* suffixes to the correct position. */
180
+ for(i = t, j = BUCKET_BSTAR(c0, c1);
181
+ j <= k;
182
+ --i, --k) { SA[i] = SA[k]; }
183
+ }
184
+ BUCKET_BSTAR(c0, c0 + 1) = i - BUCKET_B(c0, c0) + 1; /* start point */
185
+ BUCKET_B(c0, c0) = i; /* end point */
186
+ }
187
+ }
188
+
189
+ return m;
190
+ }
191
+
192
+ /* Constructs the suffix array by using the sorted order of type B* suffixes. */
193
+ static
194
+ void
195
+ construct_SA(const sauchar_t *T, saidx_t *SA,
196
+ saidx_t *bucket_A, saidx_t *bucket_B,
197
+ saidx_t n, saidx_t m) {
198
+ saidx_t *i, *j, *k;
199
+ saidx_t s;
200
+ saint_t c0, c1, c2;
201
+
202
+ if(0 < m) {
203
+ /* Construct the sorted order of type B suffixes by using
204
+ the sorted order of type B* suffixes. */
205
+ for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) {
206
+ /* Scan the suffix array from right to left. */
207
+ for(i = SA + BUCKET_BSTAR(c1, c1 + 1),
208
+ j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1;
209
+ i <= j;
210
+ --j) {
211
+ if(0 < (s = *j)) {
212
+ assert(T[s] == c1);
213
+ assert(((s + 1) < n) && (T[s] <= T[s + 1]));
214
+ assert(T[s - 1] <= T[s]);
215
+ *j = ~s;
216
+ c0 = T[--s];
217
+ if((0 < s) && (T[s - 1] > c0)) { s = ~s; }
218
+ if(c0 != c2) {
219
+ if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; }
220
+ k = SA + BUCKET_B(c2 = c0, c1);
221
+ }
222
+ assert(k < j);
223
+ *k-- = s;
224
+ } else {
225
+ assert(((s == 0) && (T[s] == c1)) || (s < 0));
226
+ *j = ~s;
227
+ }
228
+ }
229
+ }
230
+ }
231
+
232
+ /* Construct the suffix array by using
233
+ the sorted order of type B suffixes. */
234
+ k = SA + BUCKET_A(c2 = T[n - 1]);
235
+ *k++ = (T[n - 2] < c2) ? ~(n - 1) : (n - 1);
236
+ /* Scan the suffix array from left to right. */
237
+ for(i = SA, j = SA + n; i < j; ++i) {
238
+ if(0 < (s = *i)) {
239
+ assert(T[s - 1] >= T[s]);
240
+ c0 = T[--s];
241
+ if((s == 0) || (T[s - 1] < c0)) { s = ~s; }
242
+ if(c0 != c2) {
243
+ BUCKET_A(c2) = k - SA;
244
+ k = SA + BUCKET_A(c2 = c0);
245
+ }
246
+ assert(i < k);
247
+ *k++ = s;
248
+ } else {
249
+ assert(s < 0);
250
+ *i = ~s;
251
+ }
252
+ }
253
+ }
254
+
255
+ /* Constructs the burrows-wheeler transformed string directly
256
+ by using the sorted order of type B* suffixes. */
257
+ static
258
+ saidx_t
259
+ construct_BWT(const sauchar_t *T, saidx_t *SA,
260
+ saidx_t *bucket_A, saidx_t *bucket_B,
261
+ saidx_t n, saidx_t m) {
262
+ saidx_t *i, *j, *k, *orig;
263
+ saidx_t s;
264
+ saint_t c0, c1, c2;
265
+
266
+ if(0 < m) {
267
+ /* Construct the sorted order of type B suffixes by using
268
+ the sorted order of type B* suffixes. */
269
+ for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) {
270
+ /* Scan the suffix array from right to left. */
271
+ for(i = SA + BUCKET_BSTAR(c1, c1 + 1),
272
+ j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1;
273
+ i <= j;
274
+ --j) {
275
+ if(0 < (s = *j)) {
276
+ assert(T[s] == c1);
277
+ assert(((s + 1) < n) && (T[s] <= T[s + 1]));
278
+ assert(T[s - 1] <= T[s]);
279
+ c0 = T[--s];
280
+ *j = ~((saidx_t)c0);
281
+ if((0 < s) && (T[s - 1] > c0)) { s = ~s; }
282
+ if(c0 != c2) {
283
+ if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; }
284
+ k = SA + BUCKET_B(c2 = c0, c1);
285
+ }
286
+ assert(k < j);
287
+ *k-- = s;
288
+ } else if(s != 0) {
289
+ *j = ~s;
290
+ #ifndef NDEBUG
291
+ } else {
292
+ assert(T[s] == c1);
293
+ #endif
294
+ }
295
+ }
296
+ }
297
+ }
298
+
299
+ /* Construct the BWTed string by using
300
+ the sorted order of type B suffixes. */
301
+ k = SA + BUCKET_A(c2 = T[n - 1]);
302
+ *k++ = (T[n - 2] < c2) ? ~((saidx_t)T[n - 2]) : (n - 1);
303
+ /* Scan the suffix array from left to right. */
304
+ for(i = SA, j = SA + n, orig = SA; i < j; ++i) {
305
+ if(0 < (s = *i)) {
306
+ assert(T[s - 1] >= T[s]);
307
+ c0 = T[--s];
308
+ *i = c0;
309
+ if((0 < s) && (T[s - 1] < c0)) { s = ~((saidx_t)T[s - 1]); }
310
+ if(c0 != c2) {
311
+ BUCKET_A(c2) = k - SA;
312
+ k = SA + BUCKET_A(c2 = c0);
313
+ }
314
+ assert(i < k);
315
+ *k++ = s;
316
+ } else if(s != 0) {
317
+ *i = ~s;
318
+ } else {
319
+ orig = i;
320
+ }
321
+ }
322
+
323
+ return orig - SA;
324
+ }
325
+
326
+
327
+ /*---------------------------------------------------------------------------*/
328
+
329
+ /*- Function -*/
330
+
331
+ saint_t
332
+ divsufsort(const sauchar_t *T, saidx_t *SA, saidx_t n) {
333
+ saidx_t *bucket_A, *bucket_B;
334
+ saidx_t m;
335
+ saint_t err = 0;
336
+
337
+ /* Check arguments. */
338
+ if((T == NULL) || (SA == NULL) || (n < 0)) { return -1; }
339
+ else if(n == 0) { return 0; }
340
+ else if(n == 1) { SA[0] = 0; return 0; }
341
+ else if(n == 2) { m = (T[0] < T[1]); SA[m ^ 1] = 0, SA[m] = 1; return 0; }
342
+
343
+ bucket_A = (saidx_t *)malloc(BUCKET_A_SIZE * sizeof(saidx_t));
344
+ bucket_B = (saidx_t *)malloc(BUCKET_B_SIZE * sizeof(saidx_t));
345
+
346
+ /* Suffixsort. */
347
+ if((bucket_A != NULL) && (bucket_B != NULL)) {
348
+ m = sort_typeBstar(T, SA, bucket_A, bucket_B, n);
349
+ construct_SA(T, SA, bucket_A, bucket_B, n, m);
350
+ } else {
351
+ err = -2;
352
+ }
353
+
354
+ free(bucket_B);
355
+ free(bucket_A);
356
+
357
+ return err;
358
+ }
359
+
360
+ saidx_t
361
+ divbwt(const sauchar_t *T, sauchar_t *U, saidx_t *A, saidx_t n) {
362
+ saidx_t *B;
363
+ saidx_t *bucket_A, *bucket_B;
364
+ saidx_t m, pidx, i;
365
+
366
+ /* Check arguments. */
367
+ if((T == NULL) || (U == NULL) || (n < 0)) { return -1; }
368
+ else if(n <= 1) { if(n == 1) { U[0] = T[0]; } return n; }
369
+
370
+ if((B = A) == NULL) { B = (saidx_t *)malloc((size_t)(n + 1) * sizeof(saidx_t)); }
371
+ bucket_A = (saidx_t *)malloc(BUCKET_A_SIZE * sizeof(saidx_t));
372
+ bucket_B = (saidx_t *)malloc(BUCKET_B_SIZE * sizeof(saidx_t));
373
+
374
+ /* Burrows-Wheeler Transform. */
375
+ if((B != NULL) && (bucket_A != NULL) && (bucket_B != NULL)) {
376
+ m = sort_typeBstar(T, B, bucket_A, bucket_B, n);
377
+ pidx = construct_BWT(T, B, bucket_A, bucket_B, n, m);
378
+
379
+ /* Copy to output string. */
380
+ U[0] = T[n - 1];
381
+ for(i = 0; i < pidx; ++i) { U[i + 1] = (sauchar_t)B[i]; }
382
+ for(i += 1; i < n; ++i) { U[i] = (sauchar_t)B[i]; }
383
+ pidx += 1;
384
+ } else {
385
+ pidx = -2;
386
+ }
387
+
388
+ free(bucket_B);
389
+ free(bucket_A);
390
+ if(A == NULL) { free(B); }
391
+
392
+ return pidx;
393
+ }
394
+
395
+ const char *
396
+ divsufsort_version(void) {
397
+ return PROJECT_VERSION_FULL;
398
+ }