divsufsort 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README.txt +77 -0
- data/ext/Makefile +149 -0
- data/ext/divsufsort.c +398 -0
- data/ext/divsufsort.h +191 -0
- data/ext/divsufsort.o +0 -0
- data/ext/divsufsort.so +0 -0
- data/ext/divsufsort_private.h +207 -0
- data/ext/divsufsort_ruby.c +227 -0
- data/ext/divsufsort_ruby.o +0 -0
- data/ext/extconf.rb +18 -0
- data/ext/lfs.h +56 -0
- data/ext/mkmf.log +266 -0
- data/ext/sssort.c +815 -0
- data/ext/sssort.o +0 -0
- data/ext/trsort.c +586 -0
- data/ext/trsort.o +0 -0
- data/ext/utils.c +381 -0
- data/ext/utils.o +0 -0
- data/libdivsufsort/COPYING +27 -0
- data/libdivsufsort/divsufsort.c +398 -0
- data/libdivsufsort/divsufsort.h +191 -0
- data/libdivsufsort/divsufsort_private.h +207 -0
- data/libdivsufsort/lfs.h +56 -0
- data/libdivsufsort/sssort.c +815 -0
- data/libdivsufsort/trsort.c +586 -0
- data/libdivsufsort/utils.c +381 -0
- metadata +80 -0
data/README.txt
ADDED
@@ -0,0 +1,77 @@
|
|
1
|
+
= divsufsort
|
2
|
+
|
3
|
+
Copyright (c) 2008 SUGAWARA Genki <sgwr_dts@yahoo.co.jp>
|
4
|
+
|
5
|
+
== Description
|
6
|
+
|
7
|
+
Ruby bindings for libdivsufsort.
|
8
|
+
|
9
|
+
libdivsufsort is a C API library to construct the suffix array and the Burrows-Wheeler transformed string.
|
10
|
+
|
11
|
+
== Project Page
|
12
|
+
|
13
|
+
http://rubyforge.org/projects/divsufsort
|
14
|
+
|
15
|
+
== Install
|
16
|
+
|
17
|
+
gem install divsufsort
|
18
|
+
|
19
|
+
== Example
|
20
|
+
=== Burrows-Wheeler Transform/Inverse Burrows-Wheeler Transform
|
21
|
+
|
22
|
+
require 'divsufsort'
|
23
|
+
include Divsufsort
|
24
|
+
|
25
|
+
bwt = divbwt(<<-EOS)
|
26
|
+
London bridge is falling down,
|
27
|
+
Falling down, falling down,
|
28
|
+
London bridge is falling down,
|
29
|
+
My fair Lady.
|
30
|
+
EOS
|
31
|
+
|
32
|
+
unbwt = inverse_bw_transform(bwt)
|
33
|
+
|
34
|
+
=== Construct the suffix array
|
35
|
+
|
36
|
+
require 'divsufsort'
|
37
|
+
include Divsufsort
|
38
|
+
|
39
|
+
sa = divsufsort(<<-EOS)
|
40
|
+
London bridge is falling down,
|
41
|
+
Falling down, falling down,
|
42
|
+
London bridge is falling down,
|
43
|
+
My fair Lady.
|
44
|
+
EOS
|
45
|
+
|
46
|
+
== License
|
47
|
+
Copyright (c) 2008 SUGAWARA Genki <sgwr_dts@yahoo.co.jp>
|
48
|
+
All rights reserved.
|
49
|
+
|
50
|
+
Redistribution and use in source and binary forms, with or without modification,
|
51
|
+
are permitted provided that the following conditions are met:
|
52
|
+
|
53
|
+
* Redistributions of source code must retain the above copyright notice,
|
54
|
+
this list of conditions and the following disclaimer.
|
55
|
+
* Redistributions in binary form must reproduce the above copyright notice,
|
56
|
+
this list of conditions and the following disclaimer in the documentation
|
57
|
+
and/or other materials provided with the distribution.
|
58
|
+
* The names of its contributors may be used to endorse or promote products
|
59
|
+
derived from this software without specific prior written permission.
|
60
|
+
|
61
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
62
|
+
ANY EXPRESS OR IMPLIED WARRANTIES,
|
63
|
+
INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
|
64
|
+
FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
65
|
+
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
66
|
+
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
|
67
|
+
OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
68
|
+
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
69
|
+
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
70
|
+
OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
|
71
|
+
DAMAGE.
|
72
|
+
|
73
|
+
=== libdivsufsort
|
74
|
+
divsufsort contains libdivsufsort.
|
75
|
+
|
76
|
+
* libdivsufsort is a lightweight suffix-sorting library.
|
77
|
+
* http://code.google.com/p/libdivsufsort/
|
data/ext/Makefile
ADDED
@@ -0,0 +1,149 @@
|
|
1
|
+
|
2
|
+
SHELL = /bin/sh
|
3
|
+
|
4
|
+
#### Start of system configuration section. ####
|
5
|
+
|
6
|
+
srcdir = .
|
7
|
+
topdir = /usr/lib/ruby/1.8/i486-linux
|
8
|
+
hdrdir = $(topdir)
|
9
|
+
VPATH = $(srcdir):$(topdir):$(hdrdir)
|
10
|
+
prefix = $(DESTDIR)/usr
|
11
|
+
exec_prefix = $(DESTDIR)/usr
|
12
|
+
sitedir = $(DESTDIR)/usr/local/lib/site_ruby
|
13
|
+
rubylibdir = $(libdir)/ruby/$(ruby_version)
|
14
|
+
docdir = $(datarootdir)/doc/$(PACKAGE)
|
15
|
+
dvidir = $(docdir)
|
16
|
+
datarootdir = $(prefix)/share
|
17
|
+
archdir = $(rubylibdir)/$(arch)
|
18
|
+
sbindir = $(exec_prefix)/sbin
|
19
|
+
psdir = $(docdir)
|
20
|
+
localedir = $(datarootdir)/locale
|
21
|
+
htmldir = $(docdir)
|
22
|
+
datadir = $(datarootdir)
|
23
|
+
includedir = $(prefix)/include
|
24
|
+
infodir = $(prefix)/share/info
|
25
|
+
sysconfdir = $(DESTDIR)/etc
|
26
|
+
mandir = $(prefix)/share/man
|
27
|
+
libdir = $(DESTDIR)/usr/lib
|
28
|
+
sharedstatedir = $(prefix)/com
|
29
|
+
oldincludedir = $(DESTDIR)/usr/include
|
30
|
+
pdfdir = $(docdir)
|
31
|
+
sitearchdir = $(sitelibdir)/$(sitearch)
|
32
|
+
bindir = $(exec_prefix)/bin
|
33
|
+
localstatedir = $(DESTDIR)/var
|
34
|
+
sitelibdir = $(sitedir)/$(ruby_version)
|
35
|
+
libexecdir = $(prefix)/lib/ruby1.8
|
36
|
+
|
37
|
+
CC = cc
|
38
|
+
LIBRUBY = $(LIBRUBY_SO)
|
39
|
+
LIBRUBY_A = lib$(RUBY_SO_NAME)-static.a
|
40
|
+
LIBRUBYARG_SHARED = -l$(RUBY_SO_NAME)
|
41
|
+
LIBRUBYARG_STATIC = -l$(RUBY_SO_NAME)-static
|
42
|
+
|
43
|
+
RUBY_EXTCONF_H =
|
44
|
+
CFLAGS = -fPIC -fno-strict-aliasing -g -O2 -fPIC
|
45
|
+
INCFLAGS = -I. -I. -I/usr/lib/ruby/1.8/i486-linux -I.
|
46
|
+
CPPFLAGS = -DHAVE_DLFCN_H -DHAVE_FCNTL_H -DHAVE_INTTYPES_H -DHAVE_MEMORY_H -DHAVE_STDDEF_H -DHAVE_STDINT_H -DHAVE_STDLIB_H -DHAVE_STRINGS_H -DHAVE_STRING_H -DHAVE_SYS_STAT_H -DHAVE_SYS_TYPES_H -DHAVE_UNISTD_H
|
47
|
+
CXXFLAGS = $(CFLAGS)
|
48
|
+
DLDFLAGS = -L. -rdynamic -Wl,-export-dynamic
|
49
|
+
LDSHARED = $(CC) -shared
|
50
|
+
AR = ar
|
51
|
+
EXEEXT =
|
52
|
+
|
53
|
+
RUBY_INSTALL_NAME = ruby1.8
|
54
|
+
RUBY_SO_NAME = ruby1.8
|
55
|
+
arch = i486-linux
|
56
|
+
sitearch = i486-linux
|
57
|
+
ruby_version = 1.8
|
58
|
+
ruby = /usr/bin/ruby1.8
|
59
|
+
RUBY = $(ruby)
|
60
|
+
RM = rm -f
|
61
|
+
MAKEDIRS = mkdir -p
|
62
|
+
INSTALL = /usr/bin/install -c
|
63
|
+
INSTALL_PROG = $(INSTALL) -m 0755
|
64
|
+
INSTALL_DATA = $(INSTALL) -m 644
|
65
|
+
COPY = cp
|
66
|
+
|
67
|
+
#### End of system configuration section. ####
|
68
|
+
|
69
|
+
preload =
|
70
|
+
|
71
|
+
libpath = . $(libdir)
|
72
|
+
LIBPATH = -L"." -L"$(libdir)"
|
73
|
+
DEFFILE =
|
74
|
+
|
75
|
+
CLEANFILES =
|
76
|
+
DISTCLEANFILES =
|
77
|
+
|
78
|
+
extout =
|
79
|
+
extout_prefix =
|
80
|
+
target_prefix =
|
81
|
+
LOCAL_LIBS =
|
82
|
+
LIBS = $(LIBRUBYARG_SHARED) -lpthread -ldl -lcrypt -lm -lc
|
83
|
+
SRCS = divsufsort.c divsufsort_ruby.c sssort.c trsort.c utils.c
|
84
|
+
OBJS = divsufsort.o divsufsort_ruby.o sssort.o trsort.o utils.o
|
85
|
+
TARGET = divsufsort
|
86
|
+
DLLIB = $(TARGET).so
|
87
|
+
EXTSTATIC =
|
88
|
+
STATIC_LIB =
|
89
|
+
|
90
|
+
RUBYCOMMONDIR = $(sitedir)$(target_prefix)
|
91
|
+
RUBYLIBDIR = $(sitelibdir)$(target_prefix)
|
92
|
+
RUBYARCHDIR = $(sitearchdir)$(target_prefix)
|
93
|
+
|
94
|
+
TARGET_SO = $(DLLIB)
|
95
|
+
CLEANLIBS = $(TARGET).so $(TARGET).il? $(TARGET).tds $(TARGET).map
|
96
|
+
CLEANOBJS = *.o *.a *.s[ol] *.pdb *.exp *.bak
|
97
|
+
|
98
|
+
all: $(DLLIB)
|
99
|
+
static: $(STATIC_LIB)
|
100
|
+
|
101
|
+
clean:
|
102
|
+
@-$(RM) $(CLEANLIBS) $(CLEANOBJS) $(CLEANFILES)
|
103
|
+
|
104
|
+
distclean: clean
|
105
|
+
@-$(RM) Makefile $(RUBY_EXTCONF_H) conftest.* mkmf.log
|
106
|
+
@-$(RM) core ruby$(EXEEXT) *~ $(DISTCLEANFILES)
|
107
|
+
|
108
|
+
realclean: distclean
|
109
|
+
install: install-so install-rb
|
110
|
+
|
111
|
+
install-so: $(RUBYARCHDIR)
|
112
|
+
install-so: $(RUBYARCHDIR)/$(DLLIB)
|
113
|
+
$(RUBYARCHDIR)/$(DLLIB): $(DLLIB)
|
114
|
+
$(INSTALL_PROG) $(DLLIB) $(RUBYARCHDIR)
|
115
|
+
install-rb: pre-install-rb install-rb-default
|
116
|
+
install-rb-default: pre-install-rb-default
|
117
|
+
pre-install-rb: Makefile
|
118
|
+
pre-install-rb-default: Makefile
|
119
|
+
$(RUBYARCHDIR):
|
120
|
+
$(MAKEDIRS) $@
|
121
|
+
|
122
|
+
site-install: site-install-so site-install-rb
|
123
|
+
site-install-so: install-so
|
124
|
+
site-install-rb: install-rb
|
125
|
+
|
126
|
+
.SUFFIXES: .c .m .cc .cxx .cpp .C .o
|
127
|
+
|
128
|
+
.cc.o:
|
129
|
+
$(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) -c $<
|
130
|
+
|
131
|
+
.cxx.o:
|
132
|
+
$(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) -c $<
|
133
|
+
|
134
|
+
.cpp.o:
|
135
|
+
$(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) -c $<
|
136
|
+
|
137
|
+
.C.o:
|
138
|
+
$(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) -c $<
|
139
|
+
|
140
|
+
.c.o:
|
141
|
+
$(CC) $(INCFLAGS) $(CPPFLAGS) $(CFLAGS) -c $<
|
142
|
+
|
143
|
+
$(DLLIB): $(OBJS)
|
144
|
+
@-$(RM) $@
|
145
|
+
$(LDSHARED) -o $@ $(OBJS) $(LIBPATH) $(DLDFLAGS) $(LOCAL_LIBS) $(LIBS)
|
146
|
+
|
147
|
+
|
148
|
+
|
149
|
+
$(OBJS): ruby.h defines.h
|
data/ext/divsufsort.c
ADDED
@@ -0,0 +1,398 @@
|
|
1
|
+
/*
|
2
|
+
* divsufsort.c for libdivsufsort
|
3
|
+
* Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
|
4
|
+
*
|
5
|
+
* Permission is hereby granted, free of charge, to any person
|
6
|
+
* obtaining a copy of this software and associated documentation
|
7
|
+
* files (the "Software"), to deal in the Software without
|
8
|
+
* restriction, including without limitation the rights to use,
|
9
|
+
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
10
|
+
* copies of the Software, and to permit persons to whom the
|
11
|
+
* Software is furnished to do so, subject to the following
|
12
|
+
* conditions:
|
13
|
+
*
|
14
|
+
* The above copyright notice and this permission notice shall be
|
15
|
+
* included in all copies or substantial portions of the Software.
|
16
|
+
*
|
17
|
+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
18
|
+
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
19
|
+
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
20
|
+
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
21
|
+
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
22
|
+
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
23
|
+
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
24
|
+
* OTHER DEALINGS IN THE SOFTWARE.
|
25
|
+
*/
|
26
|
+
|
27
|
+
#include "divsufsort_private.h"
|
28
|
+
#ifdef _OPENMP
|
29
|
+
# include <omp.h>
|
30
|
+
#endif
|
31
|
+
|
32
|
+
|
33
|
+
/*- Private Functions -*/
|
34
|
+
|
35
|
+
/* Sorts suffixes of type B*. */
|
36
|
+
static
|
37
|
+
saidx_t
|
38
|
+
sort_typeBstar(const sauchar_t *T, saidx_t *SA,
|
39
|
+
saidx_t *bucket_A, saidx_t *bucket_B,
|
40
|
+
saidx_t n) {
|
41
|
+
saidx_t *PAb, *ISAb, *buf;
|
42
|
+
#ifdef _OPENMP
|
43
|
+
saidx_t *curbuf;
|
44
|
+
saidx_t l;
|
45
|
+
#endif
|
46
|
+
saidx_t i, j, k, t, m, bufsize;
|
47
|
+
saint_t c0, c1;
|
48
|
+
#ifdef _OPENMP
|
49
|
+
saint_t d0, d1;
|
50
|
+
int tmp;
|
51
|
+
#endif
|
52
|
+
|
53
|
+
/* Initialize bucket arrays. */
|
54
|
+
for(i = 0; i < BUCKET_A_SIZE; ++i) { bucket_A[i] = 0; }
|
55
|
+
for(i = 0; i < BUCKET_B_SIZE; ++i) { bucket_B[i] = 0; }
|
56
|
+
|
57
|
+
/* Count the number of occurrences of the first one or two characters of each
|
58
|
+
type A, B and B* suffix. Moreover, store the beginning position of all
|
59
|
+
type B* suffixes into the array SA. */
|
60
|
+
for(i = n - 1, m = n, c0 = T[n - 1]; 0 <= i;) {
|
61
|
+
/* type A suffix. */
|
62
|
+
do { ++BUCKET_A(c1 = c0); } while((0 <= --i) && ((c0 = T[i]) >= c1));
|
63
|
+
if(0 <= i) {
|
64
|
+
/* type B* suffix. */
|
65
|
+
++BUCKET_BSTAR(c0, c1);
|
66
|
+
SA[--m] = i;
|
67
|
+
/* type B suffix. */
|
68
|
+
for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) {
|
69
|
+
++BUCKET_B(c0, c1);
|
70
|
+
}
|
71
|
+
}
|
72
|
+
}
|
73
|
+
m = n - m;
|
74
|
+
/*
|
75
|
+
note:
|
76
|
+
A type B* suffix is lexicographically smaller than a type B suffix that
|
77
|
+
begins with the same first two characters.
|
78
|
+
*/
|
79
|
+
|
80
|
+
/* Calculate the index of start/end point of each bucket. */
|
81
|
+
for(c0 = 0, i = 0, j = 0; c0 < ALPHABET_SIZE; ++c0) {
|
82
|
+
t = i + BUCKET_A(c0);
|
83
|
+
BUCKET_A(c0) = i + j; /* start point */
|
84
|
+
i = t + BUCKET_B(c0, c0);
|
85
|
+
for(c1 = c0 + 1; c1 < ALPHABET_SIZE; ++c1) {
|
86
|
+
j += BUCKET_BSTAR(c0, c1);
|
87
|
+
BUCKET_BSTAR(c0, c1) = j; /* end point */
|
88
|
+
i += BUCKET_B(c0, c1);
|
89
|
+
}
|
90
|
+
}
|
91
|
+
|
92
|
+
if(0 < m) {
|
93
|
+
/* Sort the type B* suffixes by their first two characters. */
|
94
|
+
PAb = SA + n - m; ISAb = SA + m;
|
95
|
+
for(i = m - 2; 0 <= i; --i) {
|
96
|
+
t = PAb[i], c0 = T[t], c1 = T[t + 1];
|
97
|
+
SA[--BUCKET_BSTAR(c0, c1)] = i;
|
98
|
+
}
|
99
|
+
t = PAb[m - 1], c0 = T[t], c1 = T[t + 1];
|
100
|
+
SA[--BUCKET_BSTAR(c0, c1)] = m - 1;
|
101
|
+
|
102
|
+
/* Sort the type B* substrings using sssort. */
|
103
|
+
#ifdef _OPENMP
|
104
|
+
tmp = omp_get_max_threads();
|
105
|
+
buf = SA + m, bufsize = (n - (2 * m)) / tmp;
|
106
|
+
c0 = ALPHABET_SIZE - 2, c1 = ALPHABET_SIZE - 1, j = m;
|
107
|
+
#pragma omp parallel default(shared) private(curbuf, k, l, d0, d1, tmp)
|
108
|
+
{
|
109
|
+
tmp = omp_get_thread_num();
|
110
|
+
curbuf = buf + tmp * bufsize;
|
111
|
+
k = 0;
|
112
|
+
for(;;) {
|
113
|
+
#pragma omp critical(sssort_lock)
|
114
|
+
{
|
115
|
+
if(0 < (l = j)) {
|
116
|
+
d0 = c0, d1 = c1;
|
117
|
+
do {
|
118
|
+
k = BUCKET_BSTAR(d0, d1);
|
119
|
+
if(--d1 <= d0) {
|
120
|
+
d1 = ALPHABET_SIZE - 1;
|
121
|
+
if(--d0 < 0) { break; }
|
122
|
+
}
|
123
|
+
} while(((l - k) <= 1) && (0 < (l = k)));
|
124
|
+
c0 = d0, c1 = d1, j = k;
|
125
|
+
}
|
126
|
+
}
|
127
|
+
if(l == 0) { break; }
|
128
|
+
sssort(T, PAb, SA + k, SA + l,
|
129
|
+
curbuf, bufsize, 2, n, *(SA + k) == (m - 1));
|
130
|
+
}
|
131
|
+
}
|
132
|
+
#else
|
133
|
+
buf = SA + m, bufsize = n - (2 * m);
|
134
|
+
for(c0 = ALPHABET_SIZE - 2, j = m; 0 < j; --c0) {
|
135
|
+
for(c1 = ALPHABET_SIZE - 1; c0 < c1; j = i, --c1) {
|
136
|
+
i = BUCKET_BSTAR(c0, c1);
|
137
|
+
if(1 < (j - i)) {
|
138
|
+
sssort(T, PAb, SA + i, SA + j,
|
139
|
+
buf, bufsize, 2, n, *(SA + i) == (m - 1));
|
140
|
+
}
|
141
|
+
}
|
142
|
+
}
|
143
|
+
#endif
|
144
|
+
|
145
|
+
/* Compute ranks of type B* substrings. */
|
146
|
+
for(i = m - 1; 0 <= i; --i) {
|
147
|
+
if(0 <= SA[i]) {
|
148
|
+
j = i;
|
149
|
+
do { ISAb[SA[i]] = i; } while((0 <= --i) && (0 <= SA[i]));
|
150
|
+
SA[i + 1] = i - j;
|
151
|
+
if(i <= 0) { break; }
|
152
|
+
}
|
153
|
+
j = i;
|
154
|
+
do { ISAb[SA[i] = ~SA[i]] = j; } while(SA[--i] < 0);
|
155
|
+
ISAb[SA[i]] = j;
|
156
|
+
}
|
157
|
+
|
158
|
+
/* Construct the inverse suffix array of type B* suffixes using trsort. */
|
159
|
+
trsort(ISAb, SA, m, 1);
|
160
|
+
|
161
|
+
/* Set the sorted order of tyoe B* suffixes. */
|
162
|
+
for(i = n - 1, j = m, c0 = T[n - 1]; 0 <= i;) {
|
163
|
+
for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) >= c1); --i, c1 = c0) { }
|
164
|
+
if(0 <= i) {
|
165
|
+
t = i;
|
166
|
+
for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) { }
|
167
|
+
SA[ISAb[--j]] = ((t == 0) || (1 < (t - i))) ? t : ~t;
|
168
|
+
}
|
169
|
+
}
|
170
|
+
|
171
|
+
/* Calculate the index of start/end point of each bucket. */
|
172
|
+
BUCKET_B(ALPHABET_SIZE - 1, ALPHABET_SIZE - 1) = n; /* end point */
|
173
|
+
for(c0 = ALPHABET_SIZE - 2, k = m - 1; 0 <= c0; --c0) {
|
174
|
+
i = BUCKET_A(c0 + 1) - 1;
|
175
|
+
for(c1 = ALPHABET_SIZE - 1; c0 < c1; --c1) {
|
176
|
+
t = i - BUCKET_B(c0, c1);
|
177
|
+
BUCKET_B(c0, c1) = i; /* end point */
|
178
|
+
|
179
|
+
/* Move all type B* suffixes to the correct position. */
|
180
|
+
for(i = t, j = BUCKET_BSTAR(c0, c1);
|
181
|
+
j <= k;
|
182
|
+
--i, --k) { SA[i] = SA[k]; }
|
183
|
+
}
|
184
|
+
BUCKET_BSTAR(c0, c0 + 1) = i - BUCKET_B(c0, c0) + 1; /* start point */
|
185
|
+
BUCKET_B(c0, c0) = i; /* end point */
|
186
|
+
}
|
187
|
+
}
|
188
|
+
|
189
|
+
return m;
|
190
|
+
}
|
191
|
+
|
192
|
+
/* Constructs the suffix array by using the sorted order of type B* suffixes. */
|
193
|
+
static
|
194
|
+
void
|
195
|
+
construct_SA(const sauchar_t *T, saidx_t *SA,
|
196
|
+
saidx_t *bucket_A, saidx_t *bucket_B,
|
197
|
+
saidx_t n, saidx_t m) {
|
198
|
+
saidx_t *i, *j, *k;
|
199
|
+
saidx_t s;
|
200
|
+
saint_t c0, c1, c2;
|
201
|
+
|
202
|
+
if(0 < m) {
|
203
|
+
/* Construct the sorted order of type B suffixes by using
|
204
|
+
the sorted order of type B* suffixes. */
|
205
|
+
for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) {
|
206
|
+
/* Scan the suffix array from right to left. */
|
207
|
+
for(i = SA + BUCKET_BSTAR(c1, c1 + 1),
|
208
|
+
j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1;
|
209
|
+
i <= j;
|
210
|
+
--j) {
|
211
|
+
if(0 < (s = *j)) {
|
212
|
+
assert(T[s] == c1);
|
213
|
+
assert(((s + 1) < n) && (T[s] <= T[s + 1]));
|
214
|
+
assert(T[s - 1] <= T[s]);
|
215
|
+
*j = ~s;
|
216
|
+
c0 = T[--s];
|
217
|
+
if((0 < s) && (T[s - 1] > c0)) { s = ~s; }
|
218
|
+
if(c0 != c2) {
|
219
|
+
if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; }
|
220
|
+
k = SA + BUCKET_B(c2 = c0, c1);
|
221
|
+
}
|
222
|
+
assert(k < j);
|
223
|
+
*k-- = s;
|
224
|
+
} else {
|
225
|
+
assert(((s == 0) && (T[s] == c1)) || (s < 0));
|
226
|
+
*j = ~s;
|
227
|
+
}
|
228
|
+
}
|
229
|
+
}
|
230
|
+
}
|
231
|
+
|
232
|
+
/* Construct the suffix array by using
|
233
|
+
the sorted order of type B suffixes. */
|
234
|
+
k = SA + BUCKET_A(c2 = T[n - 1]);
|
235
|
+
*k++ = (T[n - 2] < c2) ? ~(n - 1) : (n - 1);
|
236
|
+
/* Scan the suffix array from left to right. */
|
237
|
+
for(i = SA, j = SA + n; i < j; ++i) {
|
238
|
+
if(0 < (s = *i)) {
|
239
|
+
assert(T[s - 1] >= T[s]);
|
240
|
+
c0 = T[--s];
|
241
|
+
if((s == 0) || (T[s - 1] < c0)) { s = ~s; }
|
242
|
+
if(c0 != c2) {
|
243
|
+
BUCKET_A(c2) = k - SA;
|
244
|
+
k = SA + BUCKET_A(c2 = c0);
|
245
|
+
}
|
246
|
+
assert(i < k);
|
247
|
+
*k++ = s;
|
248
|
+
} else {
|
249
|
+
assert(s < 0);
|
250
|
+
*i = ~s;
|
251
|
+
}
|
252
|
+
}
|
253
|
+
}
|
254
|
+
|
255
|
+
/* Constructs the burrows-wheeler transformed string directly
|
256
|
+
by using the sorted order of type B* suffixes. */
|
257
|
+
static
|
258
|
+
saidx_t
|
259
|
+
construct_BWT(const sauchar_t *T, saidx_t *SA,
|
260
|
+
saidx_t *bucket_A, saidx_t *bucket_B,
|
261
|
+
saidx_t n, saidx_t m) {
|
262
|
+
saidx_t *i, *j, *k, *orig;
|
263
|
+
saidx_t s;
|
264
|
+
saint_t c0, c1, c2;
|
265
|
+
|
266
|
+
if(0 < m) {
|
267
|
+
/* Construct the sorted order of type B suffixes by using
|
268
|
+
the sorted order of type B* suffixes. */
|
269
|
+
for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) {
|
270
|
+
/* Scan the suffix array from right to left. */
|
271
|
+
for(i = SA + BUCKET_BSTAR(c1, c1 + 1),
|
272
|
+
j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1;
|
273
|
+
i <= j;
|
274
|
+
--j) {
|
275
|
+
if(0 < (s = *j)) {
|
276
|
+
assert(T[s] == c1);
|
277
|
+
assert(((s + 1) < n) && (T[s] <= T[s + 1]));
|
278
|
+
assert(T[s - 1] <= T[s]);
|
279
|
+
c0 = T[--s];
|
280
|
+
*j = ~((saidx_t)c0);
|
281
|
+
if((0 < s) && (T[s - 1] > c0)) { s = ~s; }
|
282
|
+
if(c0 != c2) {
|
283
|
+
if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; }
|
284
|
+
k = SA + BUCKET_B(c2 = c0, c1);
|
285
|
+
}
|
286
|
+
assert(k < j);
|
287
|
+
*k-- = s;
|
288
|
+
} else if(s != 0) {
|
289
|
+
*j = ~s;
|
290
|
+
#ifndef NDEBUG
|
291
|
+
} else {
|
292
|
+
assert(T[s] == c1);
|
293
|
+
#endif
|
294
|
+
}
|
295
|
+
}
|
296
|
+
}
|
297
|
+
}
|
298
|
+
|
299
|
+
/* Construct the BWTed string by using
|
300
|
+
the sorted order of type B suffixes. */
|
301
|
+
k = SA + BUCKET_A(c2 = T[n - 1]);
|
302
|
+
*k++ = (T[n - 2] < c2) ? ~((saidx_t)T[n - 2]) : (n - 1);
|
303
|
+
/* Scan the suffix array from left to right. */
|
304
|
+
for(i = SA, j = SA + n, orig = SA; i < j; ++i) {
|
305
|
+
if(0 < (s = *i)) {
|
306
|
+
assert(T[s - 1] >= T[s]);
|
307
|
+
c0 = T[--s];
|
308
|
+
*i = c0;
|
309
|
+
if((0 < s) && (T[s - 1] < c0)) { s = ~((saidx_t)T[s - 1]); }
|
310
|
+
if(c0 != c2) {
|
311
|
+
BUCKET_A(c2) = k - SA;
|
312
|
+
k = SA + BUCKET_A(c2 = c0);
|
313
|
+
}
|
314
|
+
assert(i < k);
|
315
|
+
*k++ = s;
|
316
|
+
} else if(s != 0) {
|
317
|
+
*i = ~s;
|
318
|
+
} else {
|
319
|
+
orig = i;
|
320
|
+
}
|
321
|
+
}
|
322
|
+
|
323
|
+
return orig - SA;
|
324
|
+
}
|
325
|
+
|
326
|
+
|
327
|
+
/*---------------------------------------------------------------------------*/
|
328
|
+
|
329
|
+
/*- Function -*/
|
330
|
+
|
331
|
+
saint_t
|
332
|
+
divsufsort(const sauchar_t *T, saidx_t *SA, saidx_t n) {
|
333
|
+
saidx_t *bucket_A, *bucket_B;
|
334
|
+
saidx_t m;
|
335
|
+
saint_t err = 0;
|
336
|
+
|
337
|
+
/* Check arguments. */
|
338
|
+
if((T == NULL) || (SA == NULL) || (n < 0)) { return -1; }
|
339
|
+
else if(n == 0) { return 0; }
|
340
|
+
else if(n == 1) { SA[0] = 0; return 0; }
|
341
|
+
else if(n == 2) { m = (T[0] < T[1]); SA[m ^ 1] = 0, SA[m] = 1; return 0; }
|
342
|
+
|
343
|
+
bucket_A = (saidx_t *)malloc(BUCKET_A_SIZE * sizeof(saidx_t));
|
344
|
+
bucket_B = (saidx_t *)malloc(BUCKET_B_SIZE * sizeof(saidx_t));
|
345
|
+
|
346
|
+
/* Suffixsort. */
|
347
|
+
if((bucket_A != NULL) && (bucket_B != NULL)) {
|
348
|
+
m = sort_typeBstar(T, SA, bucket_A, bucket_B, n);
|
349
|
+
construct_SA(T, SA, bucket_A, bucket_B, n, m);
|
350
|
+
} else {
|
351
|
+
err = -2;
|
352
|
+
}
|
353
|
+
|
354
|
+
free(bucket_B);
|
355
|
+
free(bucket_A);
|
356
|
+
|
357
|
+
return err;
|
358
|
+
}
|
359
|
+
|
360
|
+
saidx_t
|
361
|
+
divbwt(const sauchar_t *T, sauchar_t *U, saidx_t *A, saidx_t n) {
|
362
|
+
saidx_t *B;
|
363
|
+
saidx_t *bucket_A, *bucket_B;
|
364
|
+
saidx_t m, pidx, i;
|
365
|
+
|
366
|
+
/* Check arguments. */
|
367
|
+
if((T == NULL) || (U == NULL) || (n < 0)) { return -1; }
|
368
|
+
else if(n <= 1) { if(n == 1) { U[0] = T[0]; } return n; }
|
369
|
+
|
370
|
+
if((B = A) == NULL) { B = (saidx_t *)malloc((size_t)(n + 1) * sizeof(saidx_t)); }
|
371
|
+
bucket_A = (saidx_t *)malloc(BUCKET_A_SIZE * sizeof(saidx_t));
|
372
|
+
bucket_B = (saidx_t *)malloc(BUCKET_B_SIZE * sizeof(saidx_t));
|
373
|
+
|
374
|
+
/* Burrows-Wheeler Transform. */
|
375
|
+
if((B != NULL) && (bucket_A != NULL) && (bucket_B != NULL)) {
|
376
|
+
m = sort_typeBstar(T, B, bucket_A, bucket_B, n);
|
377
|
+
pidx = construct_BWT(T, B, bucket_A, bucket_B, n, m);
|
378
|
+
|
379
|
+
/* Copy to output string. */
|
380
|
+
U[0] = T[n - 1];
|
381
|
+
for(i = 0; i < pidx; ++i) { U[i + 1] = (sauchar_t)B[i]; }
|
382
|
+
for(i += 1; i < n; ++i) { U[i] = (sauchar_t)B[i]; }
|
383
|
+
pidx += 1;
|
384
|
+
} else {
|
385
|
+
pidx = -2;
|
386
|
+
}
|
387
|
+
|
388
|
+
free(bucket_B);
|
389
|
+
free(bucket_A);
|
390
|
+
if(A == NULL) { free(B); }
|
391
|
+
|
392
|
+
return pidx;
|
393
|
+
}
|
394
|
+
|
395
|
+
const char *
|
396
|
+
divsufsort_version(void) {
|
397
|
+
return PROJECT_VERSION_FULL;
|
398
|
+
}
|