divsufsort 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,815 @@
1
+ /*
2
+ * sssort.c for libdivsufsort
3
+ * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
4
+ *
5
+ * Permission is hereby granted, free of charge, to any person
6
+ * obtaining a copy of this software and associated documentation
7
+ * files (the "Software"), to deal in the Software without
8
+ * restriction, including without limitation the rights to use,
9
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
10
+ * copies of the Software, and to permit persons to whom the
11
+ * Software is furnished to do so, subject to the following
12
+ * conditions:
13
+ *
14
+ * The above copyright notice and this permission notice shall be
15
+ * included in all copies or substantial portions of the Software.
16
+ *
17
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
19
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
21
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
22
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
24
+ * OTHER DEALINGS IN THE SOFTWARE.
25
+ */
26
+
27
+ #include "divsufsort_private.h"
28
+
29
+
30
+ /*- Private Functions -*/
31
+
32
+ static const saint_t lg_table[256]= {
33
+ -1,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
34
+ 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
35
+ 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
36
+ 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
37
+ 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
38
+ 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
39
+ 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
40
+ 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7
41
+ };
42
+
43
+ #if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE)
44
+
45
+ static INLINE
46
+ saint_t
47
+ ss_ilg(saidx_t n) {
48
+ #if SS_BLOCKSIZE == 0
49
+ # if defined(BUILD_DIVSUFSORT64)
50
+ return (n >> 32) ?
51
+ ((n >> 48) ?
52
+ ((n >> 56) ?
53
+ 56 + lg_table[(n >> 56) & 0xff] :
54
+ 48 + lg_table[(n >> 48) & 0xff]) :
55
+ ((n >> 40) ?
56
+ 40 + lg_table[(n >> 40) & 0xff] :
57
+ 32 + lg_table[(n >> 32) & 0xff])) :
58
+ ((n & 0xffff0000) ?
59
+ ((n & 0xff000000) ?
60
+ 24 + lg_table[(n >> 24) & 0xff] :
61
+ 16 + lg_table[(n >> 16) & 0xff]) :
62
+ ((n & 0x0000ff00) ?
63
+ 8 + lg_table[(n >> 8) & 0xff] :
64
+ 0 + lg_table[(n >> 0) & 0xff]));
65
+ # else
66
+ return (n & 0xffff0000) ?
67
+ ((n & 0xff000000) ?
68
+ 24 + lg_table[(n >> 24) & 0xff] :
69
+ 16 + lg_table[(n >> 16) & 0xff]) :
70
+ ((n & 0x0000ff00) ?
71
+ 8 + lg_table[(n >> 8) & 0xff] :
72
+ 0 + lg_table[(n >> 0) & 0xff]);
73
+ # endif
74
+ #elif SS_BLOCKSIZE < 256
75
+ return lg_table[n];
76
+ #else
77
+ return (n & 0xff00) ?
78
+ 8 + lg_table[(n >> 8) & 0xff] :
79
+ 0 + lg_table[(n >> 0) & 0xff];
80
+ #endif
81
+ }
82
+
83
+ #endif /* (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) */
84
+
85
+ #if SS_BLOCKSIZE != 0
86
+
87
+ static const saint_t sqq_table[256] = {
88
+ 0, 16, 22, 27, 32, 35, 39, 42, 45, 48, 50, 53, 55, 57, 59, 61,
89
+ 64, 65, 67, 69, 71, 73, 75, 76, 78, 80, 81, 83, 84, 86, 87, 89,
90
+ 90, 91, 93, 94, 96, 97, 98, 99, 101, 102, 103, 104, 106, 107, 108, 109,
91
+ 110, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126,
92
+ 128, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142,
93
+ 143, 144, 144, 145, 146, 147, 148, 149, 150, 150, 151, 152, 153, 154, 155, 155,
94
+ 156, 157, 158, 159, 160, 160, 161, 162, 163, 163, 164, 165, 166, 167, 167, 168,
95
+ 169, 170, 170, 171, 172, 173, 173, 174, 175, 176, 176, 177, 178, 178, 179, 180,
96
+ 181, 181, 182, 183, 183, 184, 185, 185, 186, 187, 187, 188, 189, 189, 190, 191,
97
+ 192, 192, 193, 193, 194, 195, 195, 196, 197, 197, 198, 199, 199, 200, 201, 201,
98
+ 202, 203, 203, 204, 204, 205, 206, 206, 207, 208, 208, 209, 209, 210, 211, 211,
99
+ 212, 212, 213, 214, 214, 215, 215, 216, 217, 217, 218, 218, 219, 219, 220, 221,
100
+ 221, 222, 222, 223, 224, 224, 225, 225, 226, 226, 227, 227, 228, 229, 229, 230,
101
+ 230, 231, 231, 232, 232, 233, 234, 234, 235, 235, 236, 236, 237, 237, 238, 238,
102
+ 239, 240, 240, 241, 241, 242, 242, 243, 243, 244, 244, 245, 245, 246, 246, 247,
103
+ 247, 248, 248, 249, 249, 250, 250, 251, 251, 252, 252, 253, 253, 254, 254, 255
104
+ };
105
+
106
+ static INLINE
107
+ saidx_t
108
+ ss_isqrt(saidx_t x) {
109
+ saidx_t y, e;
110
+
111
+ if(x >= (SS_BLOCKSIZE * SS_BLOCKSIZE)) { return SS_BLOCKSIZE; }
112
+ e = (x & 0xffff0000) ?
113
+ ((x & 0xff000000) ?
114
+ 24 + lg_table[(x >> 24) & 0xff] :
115
+ 16 + lg_table[(x >> 16) & 0xff]) :
116
+ ((x & 0x0000ff00) ?
117
+ 8 + lg_table[(x >> 8) & 0xff] :
118
+ 0 + lg_table[(x >> 0) & 0xff]);
119
+
120
+ if(e >= 16) {
121
+ y = sqq_table[x >> ((e - 6) - (e & 1))] << ((e >> 1) - 7);
122
+ if(e >= 24) { y = (y + 1 + x / y) >> 1; }
123
+ y = (y + 1 + x / y) >> 1;
124
+ } else if(e >= 8) {
125
+ y = (sqq_table[x >> ((e - 6) - (e & 1))] >> (7 - (e >> 1))) + 1;
126
+ } else {
127
+ return sqq_table[x] >> 4;
128
+ }
129
+
130
+ return (x < (y * y)) ? y - 1 : y;
131
+ }
132
+
133
+ #endif /* SS_BLOCKSIZE != 0 */
134
+
135
+
136
+ /*---------------------------------------------------------------------------*/
137
+
138
+ /* Compares two suffixes. */
139
+ static INLINE
140
+ saint_t
141
+ ss_compare(const sauchar_t *T,
142
+ const saidx_t *p1, const saidx_t *p2,
143
+ saidx_t depth) {
144
+ const sauchar_t *U1, *U2, *U1n, *U2n;
145
+
146
+ for(U1 = T + depth + *p1,
147
+ U2 = T + depth + *p2,
148
+ U1n = T + *(p1 + 1) + 2,
149
+ U2n = T + *(p2 + 1) + 2;
150
+ (U1 < U1n) && (U2 < U2n) && (*U1 == *U2);
151
+ ++U1, ++U2) {
152
+ }
153
+
154
+ return U1 < U1n ?
155
+ (U2 < U2n ? *U1 - *U2 : 1) :
156
+ (U2 < U2n ? -1 : 0);
157
+ }
158
+
159
+
160
+ /*---------------------------------------------------------------------------*/
161
+
162
+ #if (SS_BLOCKSIZE != 1) && (SS_INSERTIONSORT_THRESHOLD != 1)
163
+
164
+ /* Insertionsort for small size groups */
165
+ static
166
+ void
167
+ ss_insertionsort(const sauchar_t *T, const saidx_t *PA,
168
+ saidx_t *first, saidx_t *last, saidx_t depth) {
169
+ saidx_t *i, *j;
170
+ saidx_t t;
171
+ saint_t r;
172
+
173
+ for(i = last - 2; first <= i; --i) {
174
+ for(t = *i, j = i + 1; 0 < (r = ss_compare(T, PA + t, PA + *j, depth));) {
175
+ do { *(j - 1) = *j; } while((++j < last) && (*j < 0));
176
+ if(last <= j) { break; }
177
+ }
178
+ if(r == 0) { *j = ~*j; }
179
+ *(j - 1) = t;
180
+ }
181
+ }
182
+
183
+ #endif /* (SS_BLOCKSIZE != 1) && (SS_INSERTIONSORT_THRESHOLD != 1) */
184
+
185
+
186
+ /*---------------------------------------------------------------------------*/
187
+
188
+ #if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE)
189
+
190
+ static INLINE
191
+ void
192
+ ss_fixdown(const sauchar_t *Td, const saidx_t *PA,
193
+ saidx_t *SA, saidx_t i, saidx_t size) {
194
+ saidx_t j, k;
195
+ saidx_t v;
196
+ saint_t c, d, e;
197
+
198
+ for(v = SA[i], c = Td[PA[v]]; (j = 2 * i + 1) < size; SA[i] = SA[k], i = k) {
199
+ d = Td[PA[SA[k = j++]]];
200
+ if(d < (e = Td[PA[SA[j]]])) { k = j; d = e; }
201
+ if(d <= c) { break; }
202
+ }
203
+ SA[i] = v;
204
+ }
205
+
206
+ /* Simple top-down heapsort. */
207
+ static
208
+ void
209
+ ss_heapsort(const sauchar_t *Td, const saidx_t *PA, saidx_t *SA, saidx_t size) {
210
+ saidx_t i, m;
211
+ saidx_t t;
212
+
213
+ m = size;
214
+ if((size % 2) == 0) {
215
+ m--;
216
+ if(Td[PA[SA[m / 2]]] < Td[PA[SA[m]]]) { SWAP(SA[m], SA[m / 2]); }
217
+ }
218
+
219
+ for(i = m / 2 - 1; 0 <= i; --i) { ss_fixdown(Td, PA, SA, i, m); }
220
+ if((size % 2) == 0) { SWAP(SA[0], SA[m]); ss_fixdown(Td, PA, SA, 0, m); }
221
+ for(i = m - 1; 0 < i; --i) {
222
+ t = SA[0], SA[0] = SA[i];
223
+ ss_fixdown(Td, PA, SA, 0, i);
224
+ SA[i] = t;
225
+ }
226
+ }
227
+
228
+
229
+ /*---------------------------------------------------------------------------*/
230
+
231
+ /* Returns the median of three elements. */
232
+ static INLINE
233
+ saidx_t *
234
+ ss_median3(const sauchar_t *Td, const saidx_t *PA,
235
+ saidx_t *v1, saidx_t *v2, saidx_t *v3) {
236
+ saidx_t *t;
237
+ if(Td[PA[*v1]] > Td[PA[*v2]]) { SWAP(v1, v2); }
238
+ if(Td[PA[*v2]] > Td[PA[*v3]]) {
239
+ if(Td[PA[*v1]] > Td[PA[*v3]]) { return v1; }
240
+ else { return v3; }
241
+ }
242
+ return v2;
243
+ }
244
+
245
+ /* Returns the median of five elements. */
246
+ static INLINE
247
+ saidx_t *
248
+ ss_median5(const sauchar_t *Td, const saidx_t *PA,
249
+ saidx_t *v1, saidx_t *v2, saidx_t *v3, saidx_t *v4, saidx_t *v5) {
250
+ saidx_t *t;
251
+ if(Td[PA[*v2]] > Td[PA[*v3]]) { SWAP(v2, v3); }
252
+ if(Td[PA[*v4]] > Td[PA[*v5]]) { SWAP(v4, v5); }
253
+ if(Td[PA[*v2]] > Td[PA[*v4]]) { SWAP(v2, v4); SWAP(v3, v5); }
254
+ if(Td[PA[*v1]] > Td[PA[*v3]]) { SWAP(v1, v3); }
255
+ if(Td[PA[*v1]] > Td[PA[*v4]]) { SWAP(v1, v4); SWAP(v3, v5); }
256
+ if(Td[PA[*v3]] > Td[PA[*v4]]) { return v4; }
257
+ return v3;
258
+ }
259
+
260
+ /* Returns the pivot element. */
261
+ static INLINE
262
+ saidx_t *
263
+ ss_pivot(const sauchar_t *Td, const saidx_t *PA, saidx_t *first, saidx_t *last) {
264
+ saidx_t *middle;
265
+ saidx_t t;
266
+
267
+ t = last - first;
268
+ middle = first + t / 2;
269
+
270
+ if(t <= 512) {
271
+ if(t <= 32) {
272
+ return ss_median3(Td, PA, first, middle, last - 1);
273
+ } else {
274
+ t >>= 2;
275
+ return ss_median5(Td, PA, first, first + t, middle, last - 1 - t, last - 1);
276
+ }
277
+ }
278
+ t >>= 3;
279
+ first = ss_median3(Td, PA, first, first + t, first + (t << 1));
280
+ middle = ss_median3(Td, PA, middle - t, middle, middle + t);
281
+ last = ss_median3(Td, PA, last - 1 - (t << 1), last - 1 - t, last - 1);
282
+ return ss_median3(Td, PA, first, middle, last);
283
+ }
284
+
285
+
286
+ /*---------------------------------------------------------------------------*/
287
+
288
+ /* Binary partition for substrings. */
289
+ static INLINE
290
+ saidx_t *
291
+ ss_partition(const saidx_t *PA,
292
+ saidx_t *first, saidx_t *last, saidx_t depth) {
293
+ saidx_t *a, *b;
294
+ saidx_t t;
295
+ for(a = first - 1, b = last;;) {
296
+ for(; (++a < b) && ((PA[*a] + depth) >= (PA[*a + 1] + 1));) { *a = ~*a; }
297
+ for(; (a < --b) && ((PA[*b] + depth) < (PA[*b + 1] + 1));) { }
298
+ if(b <= a) { break; }
299
+ t = ~*b;
300
+ *b = *a;
301
+ *a = t;
302
+ }
303
+ if(first < a) { *first = ~*first; }
304
+ return a;
305
+ }
306
+
307
+ /* Multikey introsort for medium size groups. */
308
+ static
309
+ void
310
+ ss_mintrosort(const sauchar_t *T, const saidx_t *PA,
311
+ saidx_t *first, saidx_t *last,
312
+ saidx_t depth) {
313
+ #define STACK_SIZE SS_MISORT_STACKSIZE
314
+ struct { saidx_t *a, *b, c; saint_t d; } stack[STACK_SIZE];
315
+ const sauchar_t *Td;
316
+ saidx_t *a, *b, *c, *d, *e, *f;
317
+ saidx_t s, t;
318
+ saint_t ssize;
319
+ saint_t limit;
320
+ saint_t v, x = 0;
321
+
322
+ for(ssize = 0, limit = ss_ilg(last - first);;) {
323
+
324
+ if((last - first) <= SS_INSERTIONSORT_THRESHOLD) {
325
+ #if 1 < SS_INSERTIONSORT_THRESHOLD
326
+ if(1 < (last - first)) { ss_insertionsort(T, PA, first, last, depth); }
327
+ #endif
328
+ STACK_POP(first, last, depth, limit);
329
+ continue;
330
+ }
331
+
332
+ Td = T + depth;
333
+ if(limit-- == 0) { ss_heapsort(Td, PA, first, last - first); }
334
+ if(limit < 0) {
335
+ for(a = first + 1, v = Td[PA[*first]]; a < last; ++a) {
336
+ if((x = Td[PA[*a]]) != v) {
337
+ if(1 < (a - first)) { break; }
338
+ v = x;
339
+ first = a;
340
+ }
341
+ }
342
+ if(Td[PA[*first] - 1] < v) {
343
+ first = ss_partition(PA, first, a, depth);
344
+ }
345
+ if((a - first) <= (last - a)) {
346
+ if(1 < (a - first)) {
347
+ STACK_PUSH(a, last, depth, -1);
348
+ last = a, depth += 1, limit = ss_ilg(a - first);
349
+ } else {
350
+ first = a, limit = -1;
351
+ }
352
+ } else {
353
+ if(1 < (last - a)) {
354
+ STACK_PUSH(first, a, depth + 1, ss_ilg(a - first));
355
+ first = a, limit = -1;
356
+ } else {
357
+ last = a, depth += 1, limit = ss_ilg(a - first);
358
+ }
359
+ }
360
+ continue;
361
+ }
362
+
363
+ /* choose pivot */
364
+ a = ss_pivot(Td, PA, first, last);
365
+ v = Td[PA[*a]];
366
+ SWAP(*first, *a);
367
+
368
+ /* partition */
369
+ for(b = first; (++b < last) && ((x = Td[PA[*b]]) == v);) { }
370
+ if(((a = b) < last) && (x < v)) {
371
+ for(; (++b < last) && ((x = Td[PA[*b]]) <= v);) {
372
+ if(x == v) { SWAP(*b, *a); ++a; }
373
+ }
374
+ }
375
+ for(c = last; (b < --c) && ((x = Td[PA[*c]]) == v);) { }
376
+ if((b < (d = c)) && (x > v)) {
377
+ for(; (b < --c) && ((x = Td[PA[*c]]) >= v);) {
378
+ if(x == v) { SWAP(*c, *d); --d; }
379
+ }
380
+ }
381
+ for(; b < c;) {
382
+ SWAP(*b, *c);
383
+ for(; (++b < c) && ((x = Td[PA[*b]]) <= v);) {
384
+ if(x == v) { SWAP(*b, *a); ++a; }
385
+ }
386
+ for(; (b < --c) && ((x = Td[PA[*c]]) >= v);) {
387
+ if(x == v) { SWAP(*c, *d); --d; }
388
+ }
389
+ }
390
+
391
+ if(a <= d) {
392
+ c = b - 1;
393
+
394
+ if((s = a - first) > (t = b - a)) { s = t; }
395
+ for(e = first, f = b - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
396
+ if((s = d - c) > (t = last - d - 1)) { s = t; }
397
+ for(e = b, f = last - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
398
+
399
+ a = first + (b - a), c = last - (d - c);
400
+ b = (v <= Td[PA[*a] - 1]) ? a : ss_partition(PA, a, c, depth);
401
+
402
+ if((a - first) <= (last - c)) {
403
+ if((last - c) <= (c - b)) {
404
+ STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
405
+ STACK_PUSH(c, last, depth, limit);
406
+ last = a;
407
+ } else if((a - first) <= (c - b)) {
408
+ STACK_PUSH(c, last, depth, limit);
409
+ STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
410
+ last = a;
411
+ } else {
412
+ STACK_PUSH(c, last, depth, limit);
413
+ STACK_PUSH(first, a, depth, limit);
414
+ first = b, last = c, depth += 1, limit = ss_ilg(c - b);
415
+ }
416
+ } else {
417
+ if((a - first) <= (c - b)) {
418
+ STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
419
+ STACK_PUSH(first, a, depth, limit);
420
+ first = c;
421
+ } else if((last - c) <= (c - b)) {
422
+ STACK_PUSH(first, a, depth, limit);
423
+ STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
424
+ first = c;
425
+ } else {
426
+ STACK_PUSH(first, a, depth, limit);
427
+ STACK_PUSH(c, last, depth, limit);
428
+ first = b, last = c, depth += 1, limit = ss_ilg(c - b);
429
+ }
430
+ }
431
+ } else {
432
+ limit += 1;
433
+ if(Td[PA[*first] - 1] < v) {
434
+ first = ss_partition(PA, first, last, depth);
435
+ limit = ss_ilg(last - first);
436
+ }
437
+ depth += 1;
438
+ }
439
+ }
440
+ #undef STACK_SIZE
441
+ }
442
+
443
+ #endif /* (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) */
444
+
445
+
446
+ /*---------------------------------------------------------------------------*/
447
+
448
+ #if SS_BLOCKSIZE != 0
449
+
450
+ static INLINE
451
+ void
452
+ ss_blockswap(saidx_t *a, saidx_t *b, saidx_t n) {
453
+ saidx_t t;
454
+ for(; 0 < n; --n, ++a, ++b) {
455
+ t = *a, *a = *b, *b = t;
456
+ }
457
+ }
458
+
459
+ static INLINE
460
+ void
461
+ ss_rotate(saidx_t *first, saidx_t *middle, saidx_t *last) {
462
+ saidx_t *a, *b, t;
463
+ saidx_t l, r;
464
+ l = middle - first, r = last - middle;
465
+ for(; (0 < l) && (0 < r);) {
466
+ if(l == r) { ss_blockswap(first, middle, l); break; }
467
+ if(l < r) {
468
+ a = last - 1, b = middle - 1;
469
+ t = *a;
470
+ do {
471
+ *a-- = *b, *b-- = *a;
472
+ if(b < first) {
473
+ *a = t;
474
+ last = a;
475
+ if((r -= l + 1) <= l) { break; }
476
+ a -= 1, b = middle - 1;
477
+ t = *a;
478
+ }
479
+ } while(1);
480
+ } else {
481
+ a = first, b = middle;
482
+ t = *a;
483
+ do {
484
+ *a++ = *b, *b++ = *a;
485
+ if(last <= b) {
486
+ *a = t;
487
+ first = a + 1;
488
+ if((l -= r + 1) <= r) { break; }
489
+ a += 1, b = middle;
490
+ t = *a;
491
+ }
492
+ } while(1);
493
+ }
494
+ }
495
+ }
496
+
497
+
498
+ /*---------------------------------------------------------------------------*/
499
+
500
+ static
501
+ void
502
+ ss_inplacemerge(const sauchar_t *T, const saidx_t *PA,
503
+ saidx_t *first, saidx_t *middle, saidx_t *last,
504
+ saidx_t depth) {
505
+ const saidx_t *p;
506
+ saidx_t *a, *b;
507
+ saidx_t len, half;
508
+ saint_t q, r;
509
+ saint_t x;
510
+
511
+ for(;;) {
512
+ if(*(last - 1) < 0) { x = 1; p = PA + ~*(last - 1); }
513
+ else { x = 0; p = PA + *(last - 1); }
514
+ for(a = first, len = middle - first, half = len >> 1, r = -1;
515
+ 0 < len;
516
+ len = half, half >>= 1) {
517
+ b = a + half;
518
+ q = ss_compare(T, PA + ((0 <= *b) ? *b : ~*b), p, depth);
519
+ if(q < 0) {
520
+ a = b + 1;
521
+ half -= (len & 1) ^ 1;
522
+ } else {
523
+ r = q;
524
+ }
525
+ }
526
+ if(a < middle) {
527
+ if(r == 0) { *a = ~*a; }
528
+ ss_rotate(a, middle, last);
529
+ last -= middle - a;
530
+ middle = a;
531
+ if(first == middle) { break; }
532
+ }
533
+ --last;
534
+ if(x != 0) { while(*--last < 0) { } }
535
+ if(middle == last) { break; }
536
+ }
537
+ }
538
+
539
+
540
+ /*---------------------------------------------------------------------------*/
541
+
542
+ /* Merge-forward with internal buffer. */
543
+ static
544
+ void
545
+ ss_mergeforward(const sauchar_t *T, const saidx_t *PA,
546
+ saidx_t *first, saidx_t *middle, saidx_t *last,
547
+ saidx_t *buf, saidx_t depth) {
548
+ saidx_t *a, *b, *c, *bufend;
549
+ saidx_t t;
550
+ saint_t r;
551
+
552
+ bufend = buf + (middle - first) - 1;
553
+ ss_blockswap(buf, first, middle - first);
554
+
555
+ for(t = *(a = first), b = buf, c = middle;;) {
556
+ r = ss_compare(T, PA + *b, PA + *c, depth);
557
+ if(r < 0) {
558
+ do {
559
+ *a++ = *b;
560
+ if(bufend <= b) { *bufend = t; return; }
561
+ *b++ = *a;
562
+ } while(*b < 0);
563
+ } else if(r > 0) {
564
+ do {
565
+ *a++ = *c, *c++ = *a;
566
+ if(last <= c) {
567
+ while(b < bufend) { *a++ = *b, *b++ = *a; }
568
+ *a = *b, *b = t;
569
+ return;
570
+ }
571
+ } while(*c < 0);
572
+ } else {
573
+ *c = ~*c;
574
+ do {
575
+ *a++ = *b;
576
+ if(bufend <= b) { *bufend = t; return; }
577
+ *b++ = *a;
578
+ } while(*b < 0);
579
+
580
+ do {
581
+ *a++ = *c, *c++ = *a;
582
+ if(last <= c) {
583
+ while(b < bufend) { *a++ = *b, *b++ = *a; }
584
+ *a = *b, *b = t;
585
+ return;
586
+ }
587
+ } while(*c < 0);
588
+ }
589
+ }
590
+ }
591
+
592
+ /* Merge-backward with internal buffer. */
593
+ static
594
+ void
595
+ ss_mergebackward(const sauchar_t *T, const saidx_t *PA,
596
+ saidx_t *first, saidx_t *middle, saidx_t *last,
597
+ saidx_t *buf, saidx_t depth) {
598
+ const saidx_t *p1, *p2;
599
+ saidx_t *a, *b, *c, *bufend;
600
+ saidx_t t;
601
+ saint_t r;
602
+ saint_t x;
603
+
604
+ bufend = buf + (last - middle) - 1;
605
+ ss_blockswap(buf, middle, last - middle);
606
+
607
+ x = 0;
608
+ if(*bufend < 0) { p1 = PA + ~*bufend; x |= 1; }
609
+ else { p1 = PA + *bufend; }
610
+ if(*(middle - 1) < 0) { p2 = PA + ~*(middle - 1); x |= 2; }
611
+ else { p2 = PA + *(middle - 1); }
612
+ for(t = *(a = last - 1), b = bufend, c = middle - 1;;) {
613
+ r = ss_compare(T, p1, p2, depth);
614
+ if(0 < r) {
615
+ if(x & 1) { do { *a-- = *b, *b-- = *a; } while(*b < 0); x ^= 1; }
616
+ *a-- = *b;
617
+ if(b <= buf) { *buf = t; break; }
618
+ *b-- = *a;
619
+ if(*b < 0) { p1 = PA + ~*b; x |= 1; }
620
+ else { p1 = PA + *b; }
621
+ } else if(r < 0) {
622
+ if(x & 2) { do { *a-- = *c, *c-- = *a; } while(*c < 0); x ^= 2; }
623
+ *a-- = *c, *c-- = *a;
624
+ if(c < first) {
625
+ while(buf < b) { *a-- = *b, *b-- = *a; }
626
+ *a = *b, *b = t;
627
+ break;
628
+ }
629
+ if(*c < 0) { p2 = PA + ~*c; x |= 2; }
630
+ else { p2 = PA + *c; }
631
+ } else {
632
+ if(x & 1) { do { *a-- = *b, *b-- = *a; } while(*b < 0); x ^= 1; }
633
+ *a-- = ~*b;
634
+ if(b <= buf) { *buf = t; break; }
635
+ *b-- = *a;
636
+ if(x & 2) { do { *a-- = *c, *c-- = *a; } while(*c < 0); x ^= 2; }
637
+ *a-- = *c, *c-- = *a;
638
+ if(c < first) {
639
+ while(buf < b) { *a-- = *b, *b-- = *a; }
640
+ *a = *b, *b = t;
641
+ break;
642
+ }
643
+ if(*b < 0) { p1 = PA + ~*b; x |= 1; }
644
+ else { p1 = PA + *b; }
645
+ if(*c < 0) { p2 = PA + ~*c; x |= 2; }
646
+ else { p2 = PA + *c; }
647
+ }
648
+ }
649
+ }
650
+
651
+ /* D&C based merge. */
652
+ static
653
+ void
654
+ ss_swapmerge(const sauchar_t *T, const saidx_t *PA,
655
+ saidx_t *first, saidx_t *middle, saidx_t *last,
656
+ saidx_t *buf, saidx_t bufsize, saidx_t depth) {
657
+ #define STACK_SIZE SS_SMERGE_STACKSIZE
658
+ #define GETIDX(a) ((0 <= (a)) ? (a) : (~(a)))
659
+ #define MERGE_CHECK(a, b, c)\
660
+ do {\
661
+ if(((c) & 1) ||\
662
+ (((c) & 2) && (ss_compare(T, PA + GETIDX(*((a) - 1)), PA + *(a), depth) == 0))) {\
663
+ *(a) = ~*(a);\
664
+ }\
665
+ if(((c) & 4) && ((ss_compare(T, PA + GETIDX(*((b) - 1)), PA + *(b), depth) == 0))) {\
666
+ *(b) = ~*(b);\
667
+ }\
668
+ } while(0)
669
+ struct { saidx_t *a, *b, *c; saint_t d; } stack[STACK_SIZE];
670
+ saidx_t *l, *r, *lm, *rm;
671
+ saidx_t m, len, half;
672
+ saint_t ssize;
673
+ saint_t check, next;
674
+
675
+ for(check = 0, ssize = 0;;) {
676
+ if((last - middle) <= bufsize) {
677
+ if((first < middle) && (middle < last)) {
678
+ ss_mergebackward(T, PA, first, middle, last, buf, depth);
679
+ }
680
+ MERGE_CHECK(first, last, check);
681
+ STACK_POP(first, middle, last, check);
682
+ continue;
683
+ }
684
+
685
+ if((middle - first) <= bufsize) {
686
+ if(first < middle) {
687
+ ss_mergeforward(T, PA, first, middle, last, buf, depth);
688
+ }
689
+ MERGE_CHECK(first, last, check);
690
+ STACK_POP(first, middle, last, check);
691
+ continue;
692
+ }
693
+
694
+ for(m = 0, len = MIN(middle - first, last - middle), half = len >> 1;
695
+ 0 < len;
696
+ len = half, half >>= 1) {
697
+ if(ss_compare(T, PA + GETIDX(*(middle + m + half)),
698
+ PA + GETIDX(*(middle - m - half - 1)), depth) < 0) {
699
+ m += half + 1;
700
+ half -= (len & 1) ^ 1;
701
+ }
702
+ }
703
+
704
+ if(0 < m) {
705
+ lm = middle - m, rm = middle + m;
706
+ ss_blockswap(lm, middle, m);
707
+ l = r = middle, next = 0;
708
+ if(rm < last) {
709
+ if(*rm < 0) {
710
+ *rm = ~*rm;
711
+ if(first < lm) { for(; *--l < 0;) { } next |= 4; }
712
+ next |= 1;
713
+ } else if(first < lm) {
714
+ for(; *r < 0; ++r) { }
715
+ next |= 2;
716
+ }
717
+ }
718
+
719
+ if((l - first) <= (last - r)) {
720
+ STACK_PUSH(r, rm, last, (next & 3) | (check & 4));
721
+ middle = lm, last = l, check = (check & 3) | (next & 4);
722
+ } else {
723
+ if((next & 2) && (r == middle)) { next ^= 6; }
724
+ STACK_PUSH(first, lm, l, (check & 3) | (next & 4));
725
+ first = r, middle = rm, check = (next & 3) | (check & 4);
726
+ }
727
+ } else {
728
+ if(ss_compare(T, PA + GETIDX(*(middle - 1)), PA + *middle, depth) == 0) {
729
+ *middle = ~*middle;
730
+ }
731
+ MERGE_CHECK(first, last, check);
732
+ STACK_POP(first, middle, last, check);
733
+ }
734
+ }
735
+ #undef STACK_SIZE
736
+ }
737
+
738
+ #endif /* SS_BLOCKSIZE != 0 */
739
+
740
+
741
+ /*---------------------------------------------------------------------------*/
742
+
743
+ /*- Function -*/
744
+
745
+ /* Substring sort */
746
+ void
747
+ sssort(const sauchar_t *T, const saidx_t *PA,
748
+ saidx_t *first, saidx_t *last,
749
+ saidx_t *buf, saidx_t bufsize,
750
+ saidx_t depth, saidx_t n, saint_t lastsuffix) {
751
+ saidx_t *a;
752
+ #if SS_BLOCKSIZE != 0
753
+ saidx_t *b, *middle, *curbuf;
754
+ saidx_t j, k, curbufsize, limit;
755
+ #endif
756
+ saidx_t i;
757
+
758
+ if(lastsuffix != 0) { ++first; }
759
+
760
+ #if SS_BLOCKSIZE == 0
761
+ ss_mintrosort(T, PA, first, last, depth);
762
+ #else
763
+ if((bufsize < SS_BLOCKSIZE) &&
764
+ (bufsize < (last - first)) &&
765
+ (bufsize < (limit = ss_isqrt(last - first)))) {
766
+ if(SS_BLOCKSIZE < limit) { limit = SS_BLOCKSIZE; }
767
+ buf = middle = last - limit, bufsize = limit;
768
+ } else {
769
+ middle = last, limit = 0;
770
+ }
771
+ for(a = first, i = 0; SS_BLOCKSIZE < (middle - a); a += SS_BLOCKSIZE, ++i) {
772
+ #if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE
773
+ ss_mintrosort(T, PA, a, a + SS_BLOCKSIZE, depth);
774
+ #elif 1 < SS_BLOCKSIZE
775
+ ss_insertionsort(T, PA, a, a + SS_BLOCKSIZE, depth);
776
+ #endif
777
+ curbufsize = last - (a + SS_BLOCKSIZE);
778
+ curbuf = a + SS_BLOCKSIZE;
779
+ if(curbufsize <= bufsize) { curbufsize = bufsize, curbuf = buf; }
780
+ for(b = a, k = SS_BLOCKSIZE, j = i; j & 1; b -= k, k <<= 1, j >>= 1) {
781
+ ss_swapmerge(T, PA, b - k, b, b + k, curbuf, curbufsize, depth);
782
+ }
783
+ }
784
+ #if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE
785
+ ss_mintrosort(T, PA, a, middle, depth);
786
+ #elif 1 < SS_BLOCKSIZE
787
+ ss_insertionsort(T, PA, a, middle, depth);
788
+ #endif
789
+ for(k = SS_BLOCKSIZE; i != 0; k <<= 1, i >>= 1) {
790
+ if(i & 1) {
791
+ ss_swapmerge(T, PA, a - k, a, middle, buf, bufsize, depth);
792
+ a -= k;
793
+ }
794
+ }
795
+ if(limit != 0) {
796
+ #if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE
797
+ ss_mintrosort(T, PA, middle, last, depth);
798
+ #elif 1 < SS_BLOCKSIZE
799
+ ss_insertionsort(T, PA, middle, last, depth);
800
+ #endif
801
+ ss_inplacemerge(T, PA, first, middle, last, depth);
802
+ }
803
+ #endif
804
+
805
+ if(lastsuffix != 0) {
806
+ /* Insert last type B* suffix. */
807
+ saidx_t PAi[2]; PAi[0] = PA[*(first - 1)], PAi[1] = n - 2;
808
+ for(a = first, i = *(first - 1);
809
+ (a < last) && ((*a < 0) || (0 < ss_compare(T, &(PAi[0]), PA + *a, depth)));
810
+ ++a) {
811
+ *(a - 1) = *a;
812
+ }
813
+ *(a - 1) = i;
814
+ }
815
+ }