divsufsort 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README.txt +77 -0
- data/ext/Makefile +149 -0
- data/ext/divsufsort.c +398 -0
- data/ext/divsufsort.h +191 -0
- data/ext/divsufsort.o +0 -0
- data/ext/divsufsort.so +0 -0
- data/ext/divsufsort_private.h +207 -0
- data/ext/divsufsort_ruby.c +227 -0
- data/ext/divsufsort_ruby.o +0 -0
- data/ext/extconf.rb +18 -0
- data/ext/lfs.h +56 -0
- data/ext/mkmf.log +266 -0
- data/ext/sssort.c +815 -0
- data/ext/sssort.o +0 -0
- data/ext/trsort.c +586 -0
- data/ext/trsort.o +0 -0
- data/ext/utils.c +381 -0
- data/ext/utils.o +0 -0
- data/libdivsufsort/COPYING +27 -0
- data/libdivsufsort/divsufsort.c +398 -0
- data/libdivsufsort/divsufsort.h +191 -0
- data/libdivsufsort/divsufsort_private.h +207 -0
- data/libdivsufsort/lfs.h +56 -0
- data/libdivsufsort/sssort.c +815 -0
- data/libdivsufsort/trsort.c +586 -0
- data/libdivsufsort/utils.c +381 -0
- metadata +80 -0
data/ext/sssort.o
ADDED
Binary file
|
data/ext/trsort.c
ADDED
@@ -0,0 +1,586 @@
|
|
1
|
+
/*
|
2
|
+
* trsort.c for libdivsufsort
|
3
|
+
* Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
|
4
|
+
*
|
5
|
+
* Permission is hereby granted, free of charge, to any person
|
6
|
+
* obtaining a copy of this software and associated documentation
|
7
|
+
* files (the "Software"), to deal in the Software without
|
8
|
+
* restriction, including without limitation the rights to use,
|
9
|
+
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
10
|
+
* copies of the Software, and to permit persons to whom the
|
11
|
+
* Software is furnished to do so, subject to the following
|
12
|
+
* conditions:
|
13
|
+
*
|
14
|
+
* The above copyright notice and this permission notice shall be
|
15
|
+
* included in all copies or substantial portions of the Software.
|
16
|
+
*
|
17
|
+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
18
|
+
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
19
|
+
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
20
|
+
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
21
|
+
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
22
|
+
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
23
|
+
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
24
|
+
* OTHER DEALINGS IN THE SOFTWARE.
|
25
|
+
*/
|
26
|
+
|
27
|
+
#include "divsufsort_private.h"
|
28
|
+
|
29
|
+
|
30
|
+
/*- Private Functions -*/
|
31
|
+
|
32
|
+
static const saint_t lg_table[256]= {
|
33
|
+
-1,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
|
34
|
+
5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
|
35
|
+
6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
|
36
|
+
6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
|
37
|
+
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
|
38
|
+
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
|
39
|
+
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
|
40
|
+
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7
|
41
|
+
};
|
42
|
+
|
43
|
+
static INLINE
|
44
|
+
saint_t
|
45
|
+
tr_ilg(saidx_t n) {
|
46
|
+
#if defined(BUILD_DIVSUFSORT64)
|
47
|
+
return (n >> 32) ?
|
48
|
+
((n >> 48) ?
|
49
|
+
((n >> 56) ?
|
50
|
+
56 + lg_table[(n >> 56) & 0xff] :
|
51
|
+
48 + lg_table[(n >> 48) & 0xff]) :
|
52
|
+
((n >> 40) ?
|
53
|
+
40 + lg_table[(n >> 40) & 0xff] :
|
54
|
+
32 + lg_table[(n >> 32) & 0xff])) :
|
55
|
+
((n & 0xffff0000) ?
|
56
|
+
((n & 0xff000000) ?
|
57
|
+
24 + lg_table[(n >> 24) & 0xff] :
|
58
|
+
16 + lg_table[(n >> 16) & 0xff]) :
|
59
|
+
((n & 0x0000ff00) ?
|
60
|
+
8 + lg_table[(n >> 8) & 0xff] :
|
61
|
+
0 + lg_table[(n >> 0) & 0xff]));
|
62
|
+
#else
|
63
|
+
return (n & 0xffff0000) ?
|
64
|
+
((n & 0xff000000) ?
|
65
|
+
24 + lg_table[(n >> 24) & 0xff] :
|
66
|
+
16 + lg_table[(n >> 16) & 0xff]) :
|
67
|
+
((n & 0x0000ff00) ?
|
68
|
+
8 + lg_table[(n >> 8) & 0xff] :
|
69
|
+
0 + lg_table[(n >> 0) & 0xff]);
|
70
|
+
#endif
|
71
|
+
}
|
72
|
+
|
73
|
+
|
74
|
+
/*---------------------------------------------------------------------------*/
|
75
|
+
|
76
|
+
/* Simple insertionsort for small size groups. */
|
77
|
+
static
|
78
|
+
void
|
79
|
+
tr_insertionsort(const saidx_t *ISAd, saidx_t *first, saidx_t *last) {
|
80
|
+
saidx_t *a, *b;
|
81
|
+
saidx_t t, r;
|
82
|
+
|
83
|
+
for(a = first + 1; a < last; ++a) {
|
84
|
+
for(t = *a, b = a - 1; 0 > (r = ISAd[t] - ISAd[*b]);) {
|
85
|
+
do { *(b + 1) = *b; } while((first <= --b) && (*b < 0));
|
86
|
+
if(b < first) { break; }
|
87
|
+
}
|
88
|
+
if(r == 0) { *b = ~*b; }
|
89
|
+
*(b + 1) = t;
|
90
|
+
}
|
91
|
+
}
|
92
|
+
|
93
|
+
|
94
|
+
/*---------------------------------------------------------------------------*/
|
95
|
+
|
96
|
+
static INLINE
|
97
|
+
void
|
98
|
+
tr_fixdown(const saidx_t *ISAd, saidx_t *SA, saidx_t i, saidx_t size) {
|
99
|
+
saidx_t j, k;
|
100
|
+
saidx_t v;
|
101
|
+
saidx_t c, d, e;
|
102
|
+
|
103
|
+
for(v = SA[i], c = ISAd[v]; (j = 2 * i + 1) < size; SA[i] = SA[k], i = k) {
|
104
|
+
d = ISAd[SA[k = j++]];
|
105
|
+
if(d < (e = ISAd[SA[j]])) { k = j; d = e; }
|
106
|
+
if(d <= c) { break; }
|
107
|
+
}
|
108
|
+
SA[i] = v;
|
109
|
+
}
|
110
|
+
|
111
|
+
/* Simple top-down heapsort. */
|
112
|
+
static
|
113
|
+
void
|
114
|
+
tr_heapsort(const saidx_t *ISAd, saidx_t *SA, saidx_t size) {
|
115
|
+
saidx_t i, m;
|
116
|
+
saidx_t t;
|
117
|
+
|
118
|
+
m = size;
|
119
|
+
if((size % 2) == 0) {
|
120
|
+
m--;
|
121
|
+
if(ISAd[SA[m / 2]] < ISAd[SA[m]]) { SWAP(SA[m], SA[m / 2]); }
|
122
|
+
}
|
123
|
+
|
124
|
+
for(i = m / 2 - 1; 0 <= i; --i) { tr_fixdown(ISAd, SA, i, m); }
|
125
|
+
if((size % 2) == 0) { SWAP(SA[0], SA[m]); tr_fixdown(ISAd, SA, 0, m); }
|
126
|
+
for(i = m - 1; 0 < i; --i) {
|
127
|
+
t = SA[0], SA[0] = SA[i];
|
128
|
+
tr_fixdown(ISAd, SA, 0, i);
|
129
|
+
SA[i] = t;
|
130
|
+
}
|
131
|
+
}
|
132
|
+
|
133
|
+
|
134
|
+
/*---------------------------------------------------------------------------*/
|
135
|
+
|
136
|
+
/* Returns the median of three elements. */
|
137
|
+
static INLINE
|
138
|
+
saidx_t *
|
139
|
+
tr_median3(const saidx_t *ISAd, saidx_t *v1, saidx_t *v2, saidx_t *v3) {
|
140
|
+
saidx_t *t;
|
141
|
+
if(ISAd[*v1] > ISAd[*v2]) { SWAP(v1, v2); }
|
142
|
+
if(ISAd[*v2] > ISAd[*v3]) {
|
143
|
+
if(ISAd[*v1] > ISAd[*v3]) { return v1; }
|
144
|
+
else { return v3; }
|
145
|
+
}
|
146
|
+
return v2;
|
147
|
+
}
|
148
|
+
|
149
|
+
/* Returns the median of five elements. */
|
150
|
+
static INLINE
|
151
|
+
saidx_t *
|
152
|
+
tr_median5(const saidx_t *ISAd,
|
153
|
+
saidx_t *v1, saidx_t *v2, saidx_t *v3, saidx_t *v4, saidx_t *v5) {
|
154
|
+
saidx_t *t;
|
155
|
+
if(ISAd[*v2] > ISAd[*v3]) { SWAP(v2, v3); }
|
156
|
+
if(ISAd[*v4] > ISAd[*v5]) { SWAP(v4, v5); }
|
157
|
+
if(ISAd[*v2] > ISAd[*v4]) { SWAP(v2, v4); SWAP(v3, v5); }
|
158
|
+
if(ISAd[*v1] > ISAd[*v3]) { SWAP(v1, v3); }
|
159
|
+
if(ISAd[*v1] > ISAd[*v4]) { SWAP(v1, v4); SWAP(v3, v5); }
|
160
|
+
if(ISAd[*v3] > ISAd[*v4]) { return v4; }
|
161
|
+
return v3;
|
162
|
+
}
|
163
|
+
|
164
|
+
/* Returns the pivot element. */
|
165
|
+
static INLINE
|
166
|
+
saidx_t *
|
167
|
+
tr_pivot(const saidx_t *ISAd, saidx_t *first, saidx_t *last) {
|
168
|
+
saidx_t *middle;
|
169
|
+
saidx_t t;
|
170
|
+
|
171
|
+
t = last - first;
|
172
|
+
middle = first + t / 2;
|
173
|
+
|
174
|
+
if(t <= 512) {
|
175
|
+
if(t <= 32) {
|
176
|
+
return tr_median3(ISAd, first, middle, last - 1);
|
177
|
+
} else {
|
178
|
+
t >>= 2;
|
179
|
+
return tr_median5(ISAd, first, first + t, middle, last - 1 - t, last - 1);
|
180
|
+
}
|
181
|
+
}
|
182
|
+
t >>= 3;
|
183
|
+
first = tr_median3(ISAd, first, first + t, first + (t << 1));
|
184
|
+
middle = tr_median3(ISAd, middle - t, middle, middle + t);
|
185
|
+
last = tr_median3(ISAd, last - 1 - (t << 1), last - 1 - t, last - 1);
|
186
|
+
return tr_median3(ISAd, first, middle, last);
|
187
|
+
}
|
188
|
+
|
189
|
+
|
190
|
+
/*---------------------------------------------------------------------------*/
|
191
|
+
|
192
|
+
typedef struct _trbudget_t trbudget_t;
|
193
|
+
struct _trbudget_t {
|
194
|
+
saidx_t chance;
|
195
|
+
saidx_t remain;
|
196
|
+
saidx_t incval;
|
197
|
+
saidx_t count;
|
198
|
+
};
|
199
|
+
|
200
|
+
static INLINE
|
201
|
+
void
|
202
|
+
trbudget_init(trbudget_t *budget, saidx_t chance, saidx_t incval) {
|
203
|
+
budget->chance = chance;
|
204
|
+
budget->remain = budget->incval = incval;
|
205
|
+
}
|
206
|
+
|
207
|
+
static INLINE
|
208
|
+
saint_t
|
209
|
+
trbudget_check(trbudget_t *budget, saidx_t size) {
|
210
|
+
if(size <= budget->remain) { budget->remain -= size; return 1; }
|
211
|
+
if(budget->chance == 0) { budget->count += size; return 0; }
|
212
|
+
budget->remain += budget->incval - size;
|
213
|
+
budget->chance -= 1;
|
214
|
+
return 1;
|
215
|
+
}
|
216
|
+
|
217
|
+
|
218
|
+
/*---------------------------------------------------------------------------*/
|
219
|
+
|
220
|
+
static INLINE
|
221
|
+
void
|
222
|
+
tr_partition(const saidx_t *ISAd,
|
223
|
+
saidx_t *first, saidx_t *middle, saidx_t *last,
|
224
|
+
saidx_t **pa, saidx_t **pb, saidx_t v) {
|
225
|
+
saidx_t *a, *b, *c, *d, *e, *f;
|
226
|
+
saidx_t t, s;
|
227
|
+
saidx_t x = 0;
|
228
|
+
|
229
|
+
for(b = middle - 1; (++b < last) && ((x = ISAd[*b]) == v);) { }
|
230
|
+
if(((a = b) < last) && (x < v)) {
|
231
|
+
for(; (++b < last) && ((x = ISAd[*b]) <= v);) {
|
232
|
+
if(x == v) { SWAP(*b, *a); ++a; }
|
233
|
+
}
|
234
|
+
}
|
235
|
+
for(c = last; (b < --c) && ((x = ISAd[*c]) == v);) { }
|
236
|
+
if((b < (d = c)) && (x > v)) {
|
237
|
+
for(; (b < --c) && ((x = ISAd[*c]) >= v);) {
|
238
|
+
if(x == v) { SWAP(*c, *d); --d; }
|
239
|
+
}
|
240
|
+
}
|
241
|
+
for(; b < c;) {
|
242
|
+
SWAP(*b, *c);
|
243
|
+
for(; (++b < c) && ((x = ISAd[*b]) <= v);) {
|
244
|
+
if(x == v) { SWAP(*b, *a); ++a; }
|
245
|
+
}
|
246
|
+
for(; (b < --c) && ((x = ISAd[*c]) >= v);) {
|
247
|
+
if(x == v) { SWAP(*c, *d); --d; }
|
248
|
+
}
|
249
|
+
}
|
250
|
+
|
251
|
+
if(a <= d) {
|
252
|
+
c = b - 1;
|
253
|
+
if((s = a - first) > (t = b - a)) { s = t; }
|
254
|
+
for(e = first, f = b - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
|
255
|
+
if((s = d - c) > (t = last - d - 1)) { s = t; }
|
256
|
+
for(e = b, f = last - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
|
257
|
+
first += (b - a), last -= (d - c);
|
258
|
+
}
|
259
|
+
*pa = first, *pb = last;
|
260
|
+
}
|
261
|
+
|
262
|
+
static
|
263
|
+
void
|
264
|
+
tr_copy(saidx_t *ISA, const saidx_t *SA,
|
265
|
+
saidx_t *first, saidx_t *a, saidx_t *b, saidx_t *last,
|
266
|
+
saidx_t depth) {
|
267
|
+
/* sort suffixes of middle partition
|
268
|
+
by using sorted order of suffixes of left and right partition. */
|
269
|
+
saidx_t *c, *d, *e;
|
270
|
+
saidx_t s, v;
|
271
|
+
|
272
|
+
v = b - SA - 1;
|
273
|
+
for(c = first, d = a - 1; c <= d; ++c) {
|
274
|
+
if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
|
275
|
+
*++d = s;
|
276
|
+
ISA[s] = d - SA;
|
277
|
+
}
|
278
|
+
}
|
279
|
+
for(c = last - 1, e = d + 1, d = b; e < d; --c) {
|
280
|
+
if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
|
281
|
+
*--d = s;
|
282
|
+
ISA[s] = d - SA;
|
283
|
+
}
|
284
|
+
}
|
285
|
+
}
|
286
|
+
|
287
|
+
static
|
288
|
+
void
|
289
|
+
tr_partialcopy(saidx_t *ISA, const saidx_t *SA,
|
290
|
+
saidx_t *first, saidx_t *a, saidx_t *b, saidx_t *last,
|
291
|
+
saidx_t depth) {
|
292
|
+
saidx_t *c, *d, *e;
|
293
|
+
saidx_t s, v;
|
294
|
+
saidx_t rank, lastrank, newrank = -1;
|
295
|
+
|
296
|
+
v = b - SA - 1;
|
297
|
+
lastrank = -1;
|
298
|
+
for(c = first, d = a - 1; c <= d; ++c) {
|
299
|
+
if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
|
300
|
+
*++d = s;
|
301
|
+
rank = ISA[s + depth];
|
302
|
+
if(lastrank != rank) { lastrank = rank; newrank = d - SA; }
|
303
|
+
ISA[s] = newrank;
|
304
|
+
}
|
305
|
+
}
|
306
|
+
|
307
|
+
lastrank = -1;
|
308
|
+
for(e = d; first <= e; --e) {
|
309
|
+
rank = ISA[*e];
|
310
|
+
if(lastrank != rank) { lastrank = rank; newrank = e - SA; }
|
311
|
+
if(newrank != rank) { ISA[*e] = newrank; }
|
312
|
+
}
|
313
|
+
|
314
|
+
lastrank = -1;
|
315
|
+
for(c = last - 1, e = d + 1, d = b; e < d; --c) {
|
316
|
+
if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
|
317
|
+
*--d = s;
|
318
|
+
rank = ISA[s + depth];
|
319
|
+
if(lastrank != rank) { lastrank = rank; newrank = d - SA; }
|
320
|
+
ISA[s] = newrank;
|
321
|
+
}
|
322
|
+
}
|
323
|
+
}
|
324
|
+
|
325
|
+
static
|
326
|
+
void
|
327
|
+
tr_introsort(saidx_t *ISA, const saidx_t *ISAd,
|
328
|
+
saidx_t *SA, saidx_t *first, saidx_t *last,
|
329
|
+
trbudget_t *budget) {
|
330
|
+
#define STACK_SIZE TR_STACKSIZE
|
331
|
+
struct { const saidx_t *a; saidx_t *b, *c; saint_t d, e; }stack[STACK_SIZE];
|
332
|
+
saidx_t *a, *b, *c;
|
333
|
+
saidx_t t;
|
334
|
+
saidx_t v, x = 0;
|
335
|
+
saidx_t incr = ISAd - ISA;
|
336
|
+
saint_t limit, next;
|
337
|
+
saint_t ssize, trlink = -1;
|
338
|
+
|
339
|
+
for(ssize = 0, limit = tr_ilg(last - first);;) {
|
340
|
+
|
341
|
+
if(limit < 0) {
|
342
|
+
if(limit == -1) {
|
343
|
+
/* tandem repeat partition */
|
344
|
+
tr_partition(ISAd - incr, first, first, last, &a, &b, last - SA - 1);
|
345
|
+
|
346
|
+
/* update ranks */
|
347
|
+
if(a < last) {
|
348
|
+
for(c = first, v = a - SA - 1; c < a; ++c) { ISA[*c] = v; }
|
349
|
+
}
|
350
|
+
if(b < last) {
|
351
|
+
for(c = a, v = b - SA - 1; c < b; ++c) { ISA[*c] = v; }
|
352
|
+
}
|
353
|
+
|
354
|
+
/* push */
|
355
|
+
if(1 < (b - a)) {
|
356
|
+
STACK_PUSH5(NULL, a, b, 0, 0);
|
357
|
+
STACK_PUSH5(ISAd - incr, first, last, -2, trlink);
|
358
|
+
trlink = ssize - 2;
|
359
|
+
}
|
360
|
+
if((a - first) <= (last - b)) {
|
361
|
+
if(1 < (a - first)) {
|
362
|
+
STACK_PUSH5(ISAd, b, last, tr_ilg(last - b), trlink);
|
363
|
+
last = a, limit = tr_ilg(a - first);
|
364
|
+
} else if(1 < (last - b)) {
|
365
|
+
first = b, limit = tr_ilg(last - b);
|
366
|
+
} else {
|
367
|
+
STACK_POP5(ISAd, first, last, limit, trlink);
|
368
|
+
}
|
369
|
+
} else {
|
370
|
+
if(1 < (last - b)) {
|
371
|
+
STACK_PUSH5(ISAd, first, a, tr_ilg(a - first), trlink);
|
372
|
+
first = b, limit = tr_ilg(last - b);
|
373
|
+
} else if(1 < (a - first)) {
|
374
|
+
last = a, limit = tr_ilg(a - first);
|
375
|
+
} else {
|
376
|
+
STACK_POP5(ISAd, first, last, limit, trlink);
|
377
|
+
}
|
378
|
+
}
|
379
|
+
} else if(limit == -2) {
|
380
|
+
/* tandem repeat copy */
|
381
|
+
a = stack[--ssize].b, b = stack[ssize].c;
|
382
|
+
if(stack[ssize].d == 0) {
|
383
|
+
tr_copy(ISA, SA, first, a, b, last, ISAd - ISA);
|
384
|
+
} else {
|
385
|
+
if(0 <= trlink) { stack[trlink].d = -1; }
|
386
|
+
tr_partialcopy(ISA, SA, first, a, b, last, ISAd - ISA);
|
387
|
+
}
|
388
|
+
STACK_POP5(ISAd, first, last, limit, trlink);
|
389
|
+
} else {
|
390
|
+
/* sorted partition */
|
391
|
+
if(0 <= *first) {
|
392
|
+
a = first;
|
393
|
+
do { ISA[*a] = a - SA; } while((++a < last) && (0 <= *a));
|
394
|
+
first = a;
|
395
|
+
}
|
396
|
+
if(first < last) {
|
397
|
+
a = first; do { *a = ~*a; } while(*++a < 0);
|
398
|
+
next = (ISA[*a] != ISAd[*a]) ? tr_ilg(a - first + 1) : -1;
|
399
|
+
if(++a < last) { for(b = first, v = a - SA - 1; b < a; ++b) { ISA[*b] = v; } }
|
400
|
+
|
401
|
+
/* push */
|
402
|
+
if(trbudget_check(budget, a - first)) {
|
403
|
+
if((a - first) <= (last - a)) {
|
404
|
+
STACK_PUSH5(ISAd, a, last, -3, trlink);
|
405
|
+
ISAd += incr, last = a, limit = next;
|
406
|
+
} else {
|
407
|
+
if(1 < (last - a)) {
|
408
|
+
STACK_PUSH5(ISAd + incr, first, a, next, trlink);
|
409
|
+
first = a, limit = -3;
|
410
|
+
} else {
|
411
|
+
ISAd += incr, last = a, limit = next;
|
412
|
+
}
|
413
|
+
}
|
414
|
+
} else {
|
415
|
+
if(0 <= trlink) { stack[trlink].d = -1; }
|
416
|
+
if(1 < (last - a)) {
|
417
|
+
first = a, limit = -3;
|
418
|
+
} else {
|
419
|
+
STACK_POP5(ISAd, first, last, limit, trlink);
|
420
|
+
}
|
421
|
+
}
|
422
|
+
} else {
|
423
|
+
STACK_POP5(ISAd, first, last, limit, trlink);
|
424
|
+
}
|
425
|
+
}
|
426
|
+
continue;
|
427
|
+
}
|
428
|
+
|
429
|
+
if((last - first) <= TR_INSERTIONSORT_THRESHOLD) {
|
430
|
+
tr_insertionsort(ISAd, first, last);
|
431
|
+
limit = -3;
|
432
|
+
continue;
|
433
|
+
}
|
434
|
+
|
435
|
+
if(limit-- == 0) {
|
436
|
+
tr_heapsort(ISAd, first, last - first);
|
437
|
+
for(a = last - 1; first < a; a = b) {
|
438
|
+
for(x = ISAd[*a], b = a - 1; (first <= b) && (ISAd[*b] == x); --b) { *b = ~*b; }
|
439
|
+
}
|
440
|
+
limit = -3;
|
441
|
+
continue;
|
442
|
+
}
|
443
|
+
|
444
|
+
/* choose pivot */
|
445
|
+
a = tr_pivot(ISAd, first, last);
|
446
|
+
SWAP(*first, *a);
|
447
|
+
v = ISAd[*first];
|
448
|
+
|
449
|
+
/* partition */
|
450
|
+
tr_partition(ISAd, first, first + 1, last, &a, &b, v);
|
451
|
+
if((last - first) != (b - a)) {
|
452
|
+
next = (ISA[*a] != v) ? tr_ilg(b - a) : -1;
|
453
|
+
|
454
|
+
/* update ranks */
|
455
|
+
for(c = first, v = a - SA - 1; c < a; ++c) { ISA[*c] = v; }
|
456
|
+
if(b < last) { for(c = a, v = b - SA - 1; c < b; ++c) { ISA[*c] = v; } }
|
457
|
+
|
458
|
+
/* push */
|
459
|
+
if((1 < (b - a)) && (trbudget_check(budget, b - a))) {
|
460
|
+
if((a - first) <= (last - b)) {
|
461
|
+
if((last - b) <= (b - a)) {
|
462
|
+
if(1 < (a - first)) {
|
463
|
+
STACK_PUSH5(ISAd + incr, a, b, next, trlink);
|
464
|
+
STACK_PUSH5(ISAd, b, last, limit, trlink);
|
465
|
+
last = a;
|
466
|
+
} else if(1 < (last - b)) {
|
467
|
+
STACK_PUSH5(ISAd + incr, a, b, next, trlink);
|
468
|
+
first = b;
|
469
|
+
} else {
|
470
|
+
ISAd += incr, first = a, last = b, limit = next;
|
471
|
+
}
|
472
|
+
} else if((a - first) <= (b - a)) {
|
473
|
+
if(1 < (a - first)) {
|
474
|
+
STACK_PUSH5(ISAd, b, last, limit, trlink);
|
475
|
+
STACK_PUSH5(ISAd + incr, a, b, next, trlink);
|
476
|
+
last = a;
|
477
|
+
} else {
|
478
|
+
STACK_PUSH5(ISAd, b, last, limit, trlink);
|
479
|
+
ISAd += incr, first = a, last = b, limit = next;
|
480
|
+
}
|
481
|
+
} else {
|
482
|
+
STACK_PUSH5(ISAd, b, last, limit, trlink);
|
483
|
+
STACK_PUSH5(ISAd, first, a, limit, trlink);
|
484
|
+
ISAd += incr, first = a, last = b, limit = next;
|
485
|
+
}
|
486
|
+
} else {
|
487
|
+
if((a - first) <= (b - a)) {
|
488
|
+
if(1 < (last - b)) {
|
489
|
+
STACK_PUSH5(ISAd + incr, a, b, next, trlink);
|
490
|
+
STACK_PUSH5(ISAd, first, a, limit, trlink);
|
491
|
+
first = b;
|
492
|
+
} else if(1 < (a - first)) {
|
493
|
+
STACK_PUSH5(ISAd + incr, a, b, next, trlink);
|
494
|
+
last = a;
|
495
|
+
} else {
|
496
|
+
ISAd += incr, first = a, last = b, limit = next;
|
497
|
+
}
|
498
|
+
} else if((last - b) <= (b - a)) {
|
499
|
+
if(1 < (last - b)) {
|
500
|
+
STACK_PUSH5(ISAd, first, a, limit, trlink);
|
501
|
+
STACK_PUSH5(ISAd + incr, a, b, next, trlink);
|
502
|
+
first = b;
|
503
|
+
} else {
|
504
|
+
STACK_PUSH5(ISAd, first, a, limit, trlink);
|
505
|
+
ISAd += incr, first = a, last = b, limit = next;
|
506
|
+
}
|
507
|
+
} else {
|
508
|
+
STACK_PUSH5(ISAd, first, a, limit, trlink);
|
509
|
+
STACK_PUSH5(ISAd, b, last, limit, trlink);
|
510
|
+
ISAd += incr, first = a, last = b, limit = next;
|
511
|
+
}
|
512
|
+
}
|
513
|
+
} else {
|
514
|
+
if((1 < (b - a)) && (0 <= trlink)) { stack[trlink].d = -1; }
|
515
|
+
if((a - first) <= (last - b)) {
|
516
|
+
if(1 < (a - first)) {
|
517
|
+
STACK_PUSH5(ISAd, b, last, limit, trlink);
|
518
|
+
last = a;
|
519
|
+
} else if(1 < (last - b)) {
|
520
|
+
first = b;
|
521
|
+
} else {
|
522
|
+
STACK_POP5(ISAd, first, last, limit, trlink);
|
523
|
+
}
|
524
|
+
} else {
|
525
|
+
if(1 < (last - b)) {
|
526
|
+
STACK_PUSH5(ISAd, first, a, limit, trlink);
|
527
|
+
first = b;
|
528
|
+
} else if(1 < (a - first)) {
|
529
|
+
last = a;
|
530
|
+
} else {
|
531
|
+
STACK_POP5(ISAd, first, last, limit, trlink);
|
532
|
+
}
|
533
|
+
}
|
534
|
+
}
|
535
|
+
} else {
|
536
|
+
if(trbudget_check(budget, last - first)) {
|
537
|
+
limit = tr_ilg(last - first), ISAd += incr;
|
538
|
+
} else {
|
539
|
+
if(0 <= trlink) { stack[trlink].d = -1; }
|
540
|
+
STACK_POP5(ISAd, first, last, limit, trlink);
|
541
|
+
}
|
542
|
+
}
|
543
|
+
}
|
544
|
+
#undef STACK_SIZE
|
545
|
+
}
|
546
|
+
|
547
|
+
|
548
|
+
|
549
|
+
/*---------------------------------------------------------------------------*/
|
550
|
+
|
551
|
+
/*- Function -*/
|
552
|
+
|
553
|
+
/* Tandem repeat sort */
|
554
|
+
void
|
555
|
+
trsort(saidx_t *ISA, saidx_t *SA, saidx_t n, saidx_t depth) {
|
556
|
+
saidx_t *ISAd;
|
557
|
+
saidx_t *first, *last;
|
558
|
+
trbudget_t budget;
|
559
|
+
saidx_t t, skip, unsorted;
|
560
|
+
|
561
|
+
trbudget_init(&budget, tr_ilg(n) * 2 / 3, n);
|
562
|
+
/* trbudget_init(&budget, tr_ilg(n) * 3 / 4, n); */
|
563
|
+
for(ISAd = ISA + depth; -n < *SA; ISAd += ISAd - ISA) {
|
564
|
+
first = SA;
|
565
|
+
skip = 0;
|
566
|
+
unsorted = 0;
|
567
|
+
do {
|
568
|
+
if((t = *first) < 0) { first -= t; skip += t; }
|
569
|
+
else {
|
570
|
+
if(skip != 0) { *(first + skip) = skip; skip = 0; }
|
571
|
+
last = SA + ISA[t] + 1;
|
572
|
+
if(1 < (last - first)) {
|
573
|
+
budget.count = 0;
|
574
|
+
tr_introsort(ISA, ISAd, SA, first, last, &budget);
|
575
|
+
if(budget.count != 0) { unsorted += budget.count; }
|
576
|
+
else { skip = first - last; }
|
577
|
+
} else if((last - first) == 1) {
|
578
|
+
skip = -1;
|
579
|
+
}
|
580
|
+
first = last;
|
581
|
+
}
|
582
|
+
} while(first < (SA + n));
|
583
|
+
if(skip != 0) { *(first + skip) = skip; }
|
584
|
+
if(unsorted == 0) { break; }
|
585
|
+
}
|
586
|
+
}
|