divsufsort 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.txt +77 -0
- data/ext/Makefile +149 -0
- data/ext/divsufsort.c +398 -0
- data/ext/divsufsort.h +191 -0
- data/ext/divsufsort.o +0 -0
- data/ext/divsufsort.so +0 -0
- data/ext/divsufsort_private.h +207 -0
- data/ext/divsufsort_ruby.c +227 -0
- data/ext/divsufsort_ruby.o +0 -0
- data/ext/extconf.rb +18 -0
- data/ext/lfs.h +56 -0
- data/ext/mkmf.log +266 -0
- data/ext/sssort.c +815 -0
- data/ext/sssort.o +0 -0
- data/ext/trsort.c +586 -0
- data/ext/trsort.o +0 -0
- data/ext/utils.c +381 -0
- data/ext/utils.o +0 -0
- data/libdivsufsort/COPYING +27 -0
- data/libdivsufsort/divsufsort.c +398 -0
- data/libdivsufsort/divsufsort.h +191 -0
- data/libdivsufsort/divsufsort_private.h +207 -0
- data/libdivsufsort/lfs.h +56 -0
- data/libdivsufsort/sssort.c +815 -0
- data/libdivsufsort/trsort.c +586 -0
- data/libdivsufsort/utils.c +381 -0
- metadata +80 -0
data/ext/sssort.o
ADDED
Binary file
|
data/ext/trsort.c
ADDED
@@ -0,0 +1,586 @@
|
|
1
|
+
/*
|
2
|
+
* trsort.c for libdivsufsort
|
3
|
+
* Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
|
4
|
+
*
|
5
|
+
* Permission is hereby granted, free of charge, to any person
|
6
|
+
* obtaining a copy of this software and associated documentation
|
7
|
+
* files (the "Software"), to deal in the Software without
|
8
|
+
* restriction, including without limitation the rights to use,
|
9
|
+
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
10
|
+
* copies of the Software, and to permit persons to whom the
|
11
|
+
* Software is furnished to do so, subject to the following
|
12
|
+
* conditions:
|
13
|
+
*
|
14
|
+
* The above copyright notice and this permission notice shall be
|
15
|
+
* included in all copies or substantial portions of the Software.
|
16
|
+
*
|
17
|
+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
18
|
+
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
19
|
+
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
20
|
+
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
21
|
+
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
22
|
+
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
23
|
+
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
24
|
+
* OTHER DEALINGS IN THE SOFTWARE.
|
25
|
+
*/
|
26
|
+
|
27
|
+
#include "divsufsort_private.h"
|
28
|
+
|
29
|
+
|
30
|
+
/*- Private Functions -*/
|
31
|
+
|
32
|
+
static const saint_t lg_table[256]= {
|
33
|
+
-1,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
|
34
|
+
5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
|
35
|
+
6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
|
36
|
+
6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
|
37
|
+
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
|
38
|
+
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
|
39
|
+
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
|
40
|
+
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7
|
41
|
+
};
|
42
|
+
|
43
|
+
static INLINE
|
44
|
+
saint_t
|
45
|
+
tr_ilg(saidx_t n) {
|
46
|
+
#if defined(BUILD_DIVSUFSORT64)
|
47
|
+
return (n >> 32) ?
|
48
|
+
((n >> 48) ?
|
49
|
+
((n >> 56) ?
|
50
|
+
56 + lg_table[(n >> 56) & 0xff] :
|
51
|
+
48 + lg_table[(n >> 48) & 0xff]) :
|
52
|
+
((n >> 40) ?
|
53
|
+
40 + lg_table[(n >> 40) & 0xff] :
|
54
|
+
32 + lg_table[(n >> 32) & 0xff])) :
|
55
|
+
((n & 0xffff0000) ?
|
56
|
+
((n & 0xff000000) ?
|
57
|
+
24 + lg_table[(n >> 24) & 0xff] :
|
58
|
+
16 + lg_table[(n >> 16) & 0xff]) :
|
59
|
+
((n & 0x0000ff00) ?
|
60
|
+
8 + lg_table[(n >> 8) & 0xff] :
|
61
|
+
0 + lg_table[(n >> 0) & 0xff]));
|
62
|
+
#else
|
63
|
+
return (n & 0xffff0000) ?
|
64
|
+
((n & 0xff000000) ?
|
65
|
+
24 + lg_table[(n >> 24) & 0xff] :
|
66
|
+
16 + lg_table[(n >> 16) & 0xff]) :
|
67
|
+
((n & 0x0000ff00) ?
|
68
|
+
8 + lg_table[(n >> 8) & 0xff] :
|
69
|
+
0 + lg_table[(n >> 0) & 0xff]);
|
70
|
+
#endif
|
71
|
+
}
|
72
|
+
|
73
|
+
|
74
|
+
/*---------------------------------------------------------------------------*/
|
75
|
+
|
76
|
+
/* Simple insertionsort for small size groups. */
|
77
|
+
static
|
78
|
+
void
|
79
|
+
tr_insertionsort(const saidx_t *ISAd, saidx_t *first, saidx_t *last) {
|
80
|
+
saidx_t *a, *b;
|
81
|
+
saidx_t t, r;
|
82
|
+
|
83
|
+
for(a = first + 1; a < last; ++a) {
|
84
|
+
for(t = *a, b = a - 1; 0 > (r = ISAd[t] - ISAd[*b]);) {
|
85
|
+
do { *(b + 1) = *b; } while((first <= --b) && (*b < 0));
|
86
|
+
if(b < first) { break; }
|
87
|
+
}
|
88
|
+
if(r == 0) { *b = ~*b; }
|
89
|
+
*(b + 1) = t;
|
90
|
+
}
|
91
|
+
}
|
92
|
+
|
93
|
+
|
94
|
+
/*---------------------------------------------------------------------------*/
|
95
|
+
|
96
|
+
static INLINE
|
97
|
+
void
|
98
|
+
tr_fixdown(const saidx_t *ISAd, saidx_t *SA, saidx_t i, saidx_t size) {
|
99
|
+
saidx_t j, k;
|
100
|
+
saidx_t v;
|
101
|
+
saidx_t c, d, e;
|
102
|
+
|
103
|
+
for(v = SA[i], c = ISAd[v]; (j = 2 * i + 1) < size; SA[i] = SA[k], i = k) {
|
104
|
+
d = ISAd[SA[k = j++]];
|
105
|
+
if(d < (e = ISAd[SA[j]])) { k = j; d = e; }
|
106
|
+
if(d <= c) { break; }
|
107
|
+
}
|
108
|
+
SA[i] = v;
|
109
|
+
}
|
110
|
+
|
111
|
+
/* Simple top-down heapsort. */
|
112
|
+
static
|
113
|
+
void
|
114
|
+
tr_heapsort(const saidx_t *ISAd, saidx_t *SA, saidx_t size) {
|
115
|
+
saidx_t i, m;
|
116
|
+
saidx_t t;
|
117
|
+
|
118
|
+
m = size;
|
119
|
+
if((size % 2) == 0) {
|
120
|
+
m--;
|
121
|
+
if(ISAd[SA[m / 2]] < ISAd[SA[m]]) { SWAP(SA[m], SA[m / 2]); }
|
122
|
+
}
|
123
|
+
|
124
|
+
for(i = m / 2 - 1; 0 <= i; --i) { tr_fixdown(ISAd, SA, i, m); }
|
125
|
+
if((size % 2) == 0) { SWAP(SA[0], SA[m]); tr_fixdown(ISAd, SA, 0, m); }
|
126
|
+
for(i = m - 1; 0 < i; --i) {
|
127
|
+
t = SA[0], SA[0] = SA[i];
|
128
|
+
tr_fixdown(ISAd, SA, 0, i);
|
129
|
+
SA[i] = t;
|
130
|
+
}
|
131
|
+
}
|
132
|
+
|
133
|
+
|
134
|
+
/*---------------------------------------------------------------------------*/
|
135
|
+
|
136
|
+
/* Returns the median of three elements. */
|
137
|
+
static INLINE
|
138
|
+
saidx_t *
|
139
|
+
tr_median3(const saidx_t *ISAd, saidx_t *v1, saidx_t *v2, saidx_t *v3) {
|
140
|
+
saidx_t *t;
|
141
|
+
if(ISAd[*v1] > ISAd[*v2]) { SWAP(v1, v2); }
|
142
|
+
if(ISAd[*v2] > ISAd[*v3]) {
|
143
|
+
if(ISAd[*v1] > ISAd[*v3]) { return v1; }
|
144
|
+
else { return v3; }
|
145
|
+
}
|
146
|
+
return v2;
|
147
|
+
}
|
148
|
+
|
149
|
+
/* Returns the median of five elements. */
|
150
|
+
static INLINE
|
151
|
+
saidx_t *
|
152
|
+
tr_median5(const saidx_t *ISAd,
|
153
|
+
saidx_t *v1, saidx_t *v2, saidx_t *v3, saidx_t *v4, saidx_t *v5) {
|
154
|
+
saidx_t *t;
|
155
|
+
if(ISAd[*v2] > ISAd[*v3]) { SWAP(v2, v3); }
|
156
|
+
if(ISAd[*v4] > ISAd[*v5]) { SWAP(v4, v5); }
|
157
|
+
if(ISAd[*v2] > ISAd[*v4]) { SWAP(v2, v4); SWAP(v3, v5); }
|
158
|
+
if(ISAd[*v1] > ISAd[*v3]) { SWAP(v1, v3); }
|
159
|
+
if(ISAd[*v1] > ISAd[*v4]) { SWAP(v1, v4); SWAP(v3, v5); }
|
160
|
+
if(ISAd[*v3] > ISAd[*v4]) { return v4; }
|
161
|
+
return v3;
|
162
|
+
}
|
163
|
+
|
164
|
+
/* Returns the pivot element. */
|
165
|
+
static INLINE
|
166
|
+
saidx_t *
|
167
|
+
tr_pivot(const saidx_t *ISAd, saidx_t *first, saidx_t *last) {
|
168
|
+
saidx_t *middle;
|
169
|
+
saidx_t t;
|
170
|
+
|
171
|
+
t = last - first;
|
172
|
+
middle = first + t / 2;
|
173
|
+
|
174
|
+
if(t <= 512) {
|
175
|
+
if(t <= 32) {
|
176
|
+
return tr_median3(ISAd, first, middle, last - 1);
|
177
|
+
} else {
|
178
|
+
t >>= 2;
|
179
|
+
return tr_median5(ISAd, first, first + t, middle, last - 1 - t, last - 1);
|
180
|
+
}
|
181
|
+
}
|
182
|
+
t >>= 3;
|
183
|
+
first = tr_median3(ISAd, first, first + t, first + (t << 1));
|
184
|
+
middle = tr_median3(ISAd, middle - t, middle, middle + t);
|
185
|
+
last = tr_median3(ISAd, last - 1 - (t << 1), last - 1 - t, last - 1);
|
186
|
+
return tr_median3(ISAd, first, middle, last);
|
187
|
+
}
|
188
|
+
|
189
|
+
|
190
|
+
/*---------------------------------------------------------------------------*/
|
191
|
+
|
192
|
+
typedef struct _trbudget_t trbudget_t;
|
193
|
+
struct _trbudget_t {
|
194
|
+
saidx_t chance;
|
195
|
+
saidx_t remain;
|
196
|
+
saidx_t incval;
|
197
|
+
saidx_t count;
|
198
|
+
};
|
199
|
+
|
200
|
+
static INLINE
|
201
|
+
void
|
202
|
+
trbudget_init(trbudget_t *budget, saidx_t chance, saidx_t incval) {
|
203
|
+
budget->chance = chance;
|
204
|
+
budget->remain = budget->incval = incval;
|
205
|
+
}
|
206
|
+
|
207
|
+
static INLINE
|
208
|
+
saint_t
|
209
|
+
trbudget_check(trbudget_t *budget, saidx_t size) {
|
210
|
+
if(size <= budget->remain) { budget->remain -= size; return 1; }
|
211
|
+
if(budget->chance == 0) { budget->count += size; return 0; }
|
212
|
+
budget->remain += budget->incval - size;
|
213
|
+
budget->chance -= 1;
|
214
|
+
return 1;
|
215
|
+
}
|
216
|
+
|
217
|
+
|
218
|
+
/*---------------------------------------------------------------------------*/
|
219
|
+
|
220
|
+
static INLINE
|
221
|
+
void
|
222
|
+
tr_partition(const saidx_t *ISAd,
|
223
|
+
saidx_t *first, saidx_t *middle, saidx_t *last,
|
224
|
+
saidx_t **pa, saidx_t **pb, saidx_t v) {
|
225
|
+
saidx_t *a, *b, *c, *d, *e, *f;
|
226
|
+
saidx_t t, s;
|
227
|
+
saidx_t x = 0;
|
228
|
+
|
229
|
+
for(b = middle - 1; (++b < last) && ((x = ISAd[*b]) == v);) { }
|
230
|
+
if(((a = b) < last) && (x < v)) {
|
231
|
+
for(; (++b < last) && ((x = ISAd[*b]) <= v);) {
|
232
|
+
if(x == v) { SWAP(*b, *a); ++a; }
|
233
|
+
}
|
234
|
+
}
|
235
|
+
for(c = last; (b < --c) && ((x = ISAd[*c]) == v);) { }
|
236
|
+
if((b < (d = c)) && (x > v)) {
|
237
|
+
for(; (b < --c) && ((x = ISAd[*c]) >= v);) {
|
238
|
+
if(x == v) { SWAP(*c, *d); --d; }
|
239
|
+
}
|
240
|
+
}
|
241
|
+
for(; b < c;) {
|
242
|
+
SWAP(*b, *c);
|
243
|
+
for(; (++b < c) && ((x = ISAd[*b]) <= v);) {
|
244
|
+
if(x == v) { SWAP(*b, *a); ++a; }
|
245
|
+
}
|
246
|
+
for(; (b < --c) && ((x = ISAd[*c]) >= v);) {
|
247
|
+
if(x == v) { SWAP(*c, *d); --d; }
|
248
|
+
}
|
249
|
+
}
|
250
|
+
|
251
|
+
if(a <= d) {
|
252
|
+
c = b - 1;
|
253
|
+
if((s = a - first) > (t = b - a)) { s = t; }
|
254
|
+
for(e = first, f = b - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
|
255
|
+
if((s = d - c) > (t = last - d - 1)) { s = t; }
|
256
|
+
for(e = b, f = last - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
|
257
|
+
first += (b - a), last -= (d - c);
|
258
|
+
}
|
259
|
+
*pa = first, *pb = last;
|
260
|
+
}
|
261
|
+
|
262
|
+
static
|
263
|
+
void
|
264
|
+
tr_copy(saidx_t *ISA, const saidx_t *SA,
|
265
|
+
saidx_t *first, saidx_t *a, saidx_t *b, saidx_t *last,
|
266
|
+
saidx_t depth) {
|
267
|
+
/* sort suffixes of middle partition
|
268
|
+
by using sorted order of suffixes of left and right partition. */
|
269
|
+
saidx_t *c, *d, *e;
|
270
|
+
saidx_t s, v;
|
271
|
+
|
272
|
+
v = b - SA - 1;
|
273
|
+
for(c = first, d = a - 1; c <= d; ++c) {
|
274
|
+
if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
|
275
|
+
*++d = s;
|
276
|
+
ISA[s] = d - SA;
|
277
|
+
}
|
278
|
+
}
|
279
|
+
for(c = last - 1, e = d + 1, d = b; e < d; --c) {
|
280
|
+
if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
|
281
|
+
*--d = s;
|
282
|
+
ISA[s] = d - SA;
|
283
|
+
}
|
284
|
+
}
|
285
|
+
}
|
286
|
+
|
287
|
+
static
|
288
|
+
void
|
289
|
+
tr_partialcopy(saidx_t *ISA, const saidx_t *SA,
|
290
|
+
saidx_t *first, saidx_t *a, saidx_t *b, saidx_t *last,
|
291
|
+
saidx_t depth) {
|
292
|
+
saidx_t *c, *d, *e;
|
293
|
+
saidx_t s, v;
|
294
|
+
saidx_t rank, lastrank, newrank = -1;
|
295
|
+
|
296
|
+
v = b - SA - 1;
|
297
|
+
lastrank = -1;
|
298
|
+
for(c = first, d = a - 1; c <= d; ++c) {
|
299
|
+
if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
|
300
|
+
*++d = s;
|
301
|
+
rank = ISA[s + depth];
|
302
|
+
if(lastrank != rank) { lastrank = rank; newrank = d - SA; }
|
303
|
+
ISA[s] = newrank;
|
304
|
+
}
|
305
|
+
}
|
306
|
+
|
307
|
+
lastrank = -1;
|
308
|
+
for(e = d; first <= e; --e) {
|
309
|
+
rank = ISA[*e];
|
310
|
+
if(lastrank != rank) { lastrank = rank; newrank = e - SA; }
|
311
|
+
if(newrank != rank) { ISA[*e] = newrank; }
|
312
|
+
}
|
313
|
+
|
314
|
+
lastrank = -1;
|
315
|
+
for(c = last - 1, e = d + 1, d = b; e < d; --c) {
|
316
|
+
if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
|
317
|
+
*--d = s;
|
318
|
+
rank = ISA[s + depth];
|
319
|
+
if(lastrank != rank) { lastrank = rank; newrank = d - SA; }
|
320
|
+
ISA[s] = newrank;
|
321
|
+
}
|
322
|
+
}
|
323
|
+
}
|
324
|
+
|
325
|
+
static
|
326
|
+
void
|
327
|
+
tr_introsort(saidx_t *ISA, const saidx_t *ISAd,
|
328
|
+
saidx_t *SA, saidx_t *first, saidx_t *last,
|
329
|
+
trbudget_t *budget) {
|
330
|
+
#define STACK_SIZE TR_STACKSIZE
|
331
|
+
struct { const saidx_t *a; saidx_t *b, *c; saint_t d, e; }stack[STACK_SIZE];
|
332
|
+
saidx_t *a, *b, *c;
|
333
|
+
saidx_t t;
|
334
|
+
saidx_t v, x = 0;
|
335
|
+
saidx_t incr = ISAd - ISA;
|
336
|
+
saint_t limit, next;
|
337
|
+
saint_t ssize, trlink = -1;
|
338
|
+
|
339
|
+
for(ssize = 0, limit = tr_ilg(last - first);;) {
|
340
|
+
|
341
|
+
if(limit < 0) {
|
342
|
+
if(limit == -1) {
|
343
|
+
/* tandem repeat partition */
|
344
|
+
tr_partition(ISAd - incr, first, first, last, &a, &b, last - SA - 1);
|
345
|
+
|
346
|
+
/* update ranks */
|
347
|
+
if(a < last) {
|
348
|
+
for(c = first, v = a - SA - 1; c < a; ++c) { ISA[*c] = v; }
|
349
|
+
}
|
350
|
+
if(b < last) {
|
351
|
+
for(c = a, v = b - SA - 1; c < b; ++c) { ISA[*c] = v; }
|
352
|
+
}
|
353
|
+
|
354
|
+
/* push */
|
355
|
+
if(1 < (b - a)) {
|
356
|
+
STACK_PUSH5(NULL, a, b, 0, 0);
|
357
|
+
STACK_PUSH5(ISAd - incr, first, last, -2, trlink);
|
358
|
+
trlink = ssize - 2;
|
359
|
+
}
|
360
|
+
if((a - first) <= (last - b)) {
|
361
|
+
if(1 < (a - first)) {
|
362
|
+
STACK_PUSH5(ISAd, b, last, tr_ilg(last - b), trlink);
|
363
|
+
last = a, limit = tr_ilg(a - first);
|
364
|
+
} else if(1 < (last - b)) {
|
365
|
+
first = b, limit = tr_ilg(last - b);
|
366
|
+
} else {
|
367
|
+
STACK_POP5(ISAd, first, last, limit, trlink);
|
368
|
+
}
|
369
|
+
} else {
|
370
|
+
if(1 < (last - b)) {
|
371
|
+
STACK_PUSH5(ISAd, first, a, tr_ilg(a - first), trlink);
|
372
|
+
first = b, limit = tr_ilg(last - b);
|
373
|
+
} else if(1 < (a - first)) {
|
374
|
+
last = a, limit = tr_ilg(a - first);
|
375
|
+
} else {
|
376
|
+
STACK_POP5(ISAd, first, last, limit, trlink);
|
377
|
+
}
|
378
|
+
}
|
379
|
+
} else if(limit == -2) {
|
380
|
+
/* tandem repeat copy */
|
381
|
+
a = stack[--ssize].b, b = stack[ssize].c;
|
382
|
+
if(stack[ssize].d == 0) {
|
383
|
+
tr_copy(ISA, SA, first, a, b, last, ISAd - ISA);
|
384
|
+
} else {
|
385
|
+
if(0 <= trlink) { stack[trlink].d = -1; }
|
386
|
+
tr_partialcopy(ISA, SA, first, a, b, last, ISAd - ISA);
|
387
|
+
}
|
388
|
+
STACK_POP5(ISAd, first, last, limit, trlink);
|
389
|
+
} else {
|
390
|
+
/* sorted partition */
|
391
|
+
if(0 <= *first) {
|
392
|
+
a = first;
|
393
|
+
do { ISA[*a] = a - SA; } while((++a < last) && (0 <= *a));
|
394
|
+
first = a;
|
395
|
+
}
|
396
|
+
if(first < last) {
|
397
|
+
a = first; do { *a = ~*a; } while(*++a < 0);
|
398
|
+
next = (ISA[*a] != ISAd[*a]) ? tr_ilg(a - first + 1) : -1;
|
399
|
+
if(++a < last) { for(b = first, v = a - SA - 1; b < a; ++b) { ISA[*b] = v; } }
|
400
|
+
|
401
|
+
/* push */
|
402
|
+
if(trbudget_check(budget, a - first)) {
|
403
|
+
if((a - first) <= (last - a)) {
|
404
|
+
STACK_PUSH5(ISAd, a, last, -3, trlink);
|
405
|
+
ISAd += incr, last = a, limit = next;
|
406
|
+
} else {
|
407
|
+
if(1 < (last - a)) {
|
408
|
+
STACK_PUSH5(ISAd + incr, first, a, next, trlink);
|
409
|
+
first = a, limit = -3;
|
410
|
+
} else {
|
411
|
+
ISAd += incr, last = a, limit = next;
|
412
|
+
}
|
413
|
+
}
|
414
|
+
} else {
|
415
|
+
if(0 <= trlink) { stack[trlink].d = -1; }
|
416
|
+
if(1 < (last - a)) {
|
417
|
+
first = a, limit = -3;
|
418
|
+
} else {
|
419
|
+
STACK_POP5(ISAd, first, last, limit, trlink);
|
420
|
+
}
|
421
|
+
}
|
422
|
+
} else {
|
423
|
+
STACK_POP5(ISAd, first, last, limit, trlink);
|
424
|
+
}
|
425
|
+
}
|
426
|
+
continue;
|
427
|
+
}
|
428
|
+
|
429
|
+
if((last - first) <= TR_INSERTIONSORT_THRESHOLD) {
|
430
|
+
tr_insertionsort(ISAd, first, last);
|
431
|
+
limit = -3;
|
432
|
+
continue;
|
433
|
+
}
|
434
|
+
|
435
|
+
if(limit-- == 0) {
|
436
|
+
tr_heapsort(ISAd, first, last - first);
|
437
|
+
for(a = last - 1; first < a; a = b) {
|
438
|
+
for(x = ISAd[*a], b = a - 1; (first <= b) && (ISAd[*b] == x); --b) { *b = ~*b; }
|
439
|
+
}
|
440
|
+
limit = -3;
|
441
|
+
continue;
|
442
|
+
}
|
443
|
+
|
444
|
+
/* choose pivot */
|
445
|
+
a = tr_pivot(ISAd, first, last);
|
446
|
+
SWAP(*first, *a);
|
447
|
+
v = ISAd[*first];
|
448
|
+
|
449
|
+
/* partition */
|
450
|
+
tr_partition(ISAd, first, first + 1, last, &a, &b, v);
|
451
|
+
if((last - first) != (b - a)) {
|
452
|
+
next = (ISA[*a] != v) ? tr_ilg(b - a) : -1;
|
453
|
+
|
454
|
+
/* update ranks */
|
455
|
+
for(c = first, v = a - SA - 1; c < a; ++c) { ISA[*c] = v; }
|
456
|
+
if(b < last) { for(c = a, v = b - SA - 1; c < b; ++c) { ISA[*c] = v; } }
|
457
|
+
|
458
|
+
/* push */
|
459
|
+
if((1 < (b - a)) && (trbudget_check(budget, b - a))) {
|
460
|
+
if((a - first) <= (last - b)) {
|
461
|
+
if((last - b) <= (b - a)) {
|
462
|
+
if(1 < (a - first)) {
|
463
|
+
STACK_PUSH5(ISAd + incr, a, b, next, trlink);
|
464
|
+
STACK_PUSH5(ISAd, b, last, limit, trlink);
|
465
|
+
last = a;
|
466
|
+
} else if(1 < (last - b)) {
|
467
|
+
STACK_PUSH5(ISAd + incr, a, b, next, trlink);
|
468
|
+
first = b;
|
469
|
+
} else {
|
470
|
+
ISAd += incr, first = a, last = b, limit = next;
|
471
|
+
}
|
472
|
+
} else if((a - first) <= (b - a)) {
|
473
|
+
if(1 < (a - first)) {
|
474
|
+
STACK_PUSH5(ISAd, b, last, limit, trlink);
|
475
|
+
STACK_PUSH5(ISAd + incr, a, b, next, trlink);
|
476
|
+
last = a;
|
477
|
+
} else {
|
478
|
+
STACK_PUSH5(ISAd, b, last, limit, trlink);
|
479
|
+
ISAd += incr, first = a, last = b, limit = next;
|
480
|
+
}
|
481
|
+
} else {
|
482
|
+
STACK_PUSH5(ISAd, b, last, limit, trlink);
|
483
|
+
STACK_PUSH5(ISAd, first, a, limit, trlink);
|
484
|
+
ISAd += incr, first = a, last = b, limit = next;
|
485
|
+
}
|
486
|
+
} else {
|
487
|
+
if((a - first) <= (b - a)) {
|
488
|
+
if(1 < (last - b)) {
|
489
|
+
STACK_PUSH5(ISAd + incr, a, b, next, trlink);
|
490
|
+
STACK_PUSH5(ISAd, first, a, limit, trlink);
|
491
|
+
first = b;
|
492
|
+
} else if(1 < (a - first)) {
|
493
|
+
STACK_PUSH5(ISAd + incr, a, b, next, trlink);
|
494
|
+
last = a;
|
495
|
+
} else {
|
496
|
+
ISAd += incr, first = a, last = b, limit = next;
|
497
|
+
}
|
498
|
+
} else if((last - b) <= (b - a)) {
|
499
|
+
if(1 < (last - b)) {
|
500
|
+
STACK_PUSH5(ISAd, first, a, limit, trlink);
|
501
|
+
STACK_PUSH5(ISAd + incr, a, b, next, trlink);
|
502
|
+
first = b;
|
503
|
+
} else {
|
504
|
+
STACK_PUSH5(ISAd, first, a, limit, trlink);
|
505
|
+
ISAd += incr, first = a, last = b, limit = next;
|
506
|
+
}
|
507
|
+
} else {
|
508
|
+
STACK_PUSH5(ISAd, first, a, limit, trlink);
|
509
|
+
STACK_PUSH5(ISAd, b, last, limit, trlink);
|
510
|
+
ISAd += incr, first = a, last = b, limit = next;
|
511
|
+
}
|
512
|
+
}
|
513
|
+
} else {
|
514
|
+
if((1 < (b - a)) && (0 <= trlink)) { stack[trlink].d = -1; }
|
515
|
+
if((a - first) <= (last - b)) {
|
516
|
+
if(1 < (a - first)) {
|
517
|
+
STACK_PUSH5(ISAd, b, last, limit, trlink);
|
518
|
+
last = a;
|
519
|
+
} else if(1 < (last - b)) {
|
520
|
+
first = b;
|
521
|
+
} else {
|
522
|
+
STACK_POP5(ISAd, first, last, limit, trlink);
|
523
|
+
}
|
524
|
+
} else {
|
525
|
+
if(1 < (last - b)) {
|
526
|
+
STACK_PUSH5(ISAd, first, a, limit, trlink);
|
527
|
+
first = b;
|
528
|
+
} else if(1 < (a - first)) {
|
529
|
+
last = a;
|
530
|
+
} else {
|
531
|
+
STACK_POP5(ISAd, first, last, limit, trlink);
|
532
|
+
}
|
533
|
+
}
|
534
|
+
}
|
535
|
+
} else {
|
536
|
+
if(trbudget_check(budget, last - first)) {
|
537
|
+
limit = tr_ilg(last - first), ISAd += incr;
|
538
|
+
} else {
|
539
|
+
if(0 <= trlink) { stack[trlink].d = -1; }
|
540
|
+
STACK_POP5(ISAd, first, last, limit, trlink);
|
541
|
+
}
|
542
|
+
}
|
543
|
+
}
|
544
|
+
#undef STACK_SIZE
|
545
|
+
}
|
546
|
+
|
547
|
+
|
548
|
+
|
549
|
+
/*---------------------------------------------------------------------------*/
|
550
|
+
|
551
|
+
/*- Function -*/
|
552
|
+
|
553
|
+
/* Tandem repeat sort */
|
554
|
+
void
|
555
|
+
trsort(saidx_t *ISA, saidx_t *SA, saidx_t n, saidx_t depth) {
|
556
|
+
saidx_t *ISAd;
|
557
|
+
saidx_t *first, *last;
|
558
|
+
trbudget_t budget;
|
559
|
+
saidx_t t, skip, unsorted;
|
560
|
+
|
561
|
+
trbudget_init(&budget, tr_ilg(n) * 2 / 3, n);
|
562
|
+
/* trbudget_init(&budget, tr_ilg(n) * 3 / 4, n); */
|
563
|
+
for(ISAd = ISA + depth; -n < *SA; ISAd += ISAd - ISA) {
|
564
|
+
first = SA;
|
565
|
+
skip = 0;
|
566
|
+
unsorted = 0;
|
567
|
+
do {
|
568
|
+
if((t = *first) < 0) { first -= t; skip += t; }
|
569
|
+
else {
|
570
|
+
if(skip != 0) { *(first + skip) = skip; skip = 0; }
|
571
|
+
last = SA + ISA[t] + 1;
|
572
|
+
if(1 < (last - first)) {
|
573
|
+
budget.count = 0;
|
574
|
+
tr_introsort(ISA, ISAd, SA, first, last, &budget);
|
575
|
+
if(budget.count != 0) { unsorted += budget.count; }
|
576
|
+
else { skip = first - last; }
|
577
|
+
} else if((last - first) == 1) {
|
578
|
+
skip = -1;
|
579
|
+
}
|
580
|
+
first = last;
|
581
|
+
}
|
582
|
+
} while(first < (SA + n));
|
583
|
+
if(skip != 0) { *(first + skip) = skip; }
|
584
|
+
if(unsorted == 0) { break; }
|
585
|
+
}
|
586
|
+
}
|