zstd 1.1.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (71) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +15 -0
  3. data/.travis.yml +5 -0
  4. data/Gemfile +4 -0
  5. data/LICENSE.txt +21 -0
  6. data/README.md +64 -0
  7. data/Rakefile +19 -0
  8. data/bin/console +14 -0
  9. data/bin/setup +8 -0
  10. data/exe/zstd +3 -0
  11. data/ext/zstd/extconf.rb +20 -0
  12. data/ext/zstd/libzstd/.gitignore +2 -0
  13. data/ext/zstd/libzstd/LICENSE +1262 -0
  14. data/ext/zstd/libzstd/Makefile +133 -0
  15. data/ext/zstd/libzstd/PATENTS +1272 -0
  16. data/ext/zstd/libzstd/README.md +77 -0
  17. data/ext/zstd/libzstd/common/bitstream.h +414 -0
  18. data/ext/zstd/libzstd/common/entropy_common.c +227 -0
  19. data/ext/zstd/libzstd/common/error_private.c +43 -0
  20. data/ext/zstd/libzstd/common/error_private.h +76 -0
  21. data/ext/zstd/libzstd/common/fse.h +668 -0
  22. data/ext/zstd/libzstd/common/fse_decompress.c +329 -0
  23. data/ext/zstd/libzstd/common/huf.h +238 -0
  24. data/ext/zstd/libzstd/common/mem.h +372 -0
  25. data/ext/zstd/libzstd/common/xxhash.c +867 -0
  26. data/ext/zstd/libzstd/common/xxhash.h +309 -0
  27. data/ext/zstd/libzstd/common/zstd_common.c +77 -0
  28. data/ext/zstd/libzstd/common/zstd_errors.h +60 -0
  29. data/ext/zstd/libzstd/common/zstd_internal.h +270 -0
  30. data/ext/zstd/libzstd/compress/fse_compress.c +850 -0
  31. data/ext/zstd/libzstd/compress/huf_compress.c +609 -0
  32. data/ext/zstd/libzstd/compress/zstd_compress.c +3291 -0
  33. data/ext/zstd/libzstd/compress/zstd_opt.h +919 -0
  34. data/ext/zstd/libzstd/decompress/huf_decompress.c +885 -0
  35. data/ext/zstd/libzstd/decompress/zstd_decompress.c +2154 -0
  36. data/ext/zstd/libzstd/deprecated/zbuff.h +210 -0
  37. data/ext/zstd/libzstd/deprecated/zbuff_compress.c +145 -0
  38. data/ext/zstd/libzstd/deprecated/zbuff_decompress.c +74 -0
  39. data/ext/zstd/libzstd/dictBuilder/divsufsort.c +1913 -0
  40. data/ext/zstd/libzstd/dictBuilder/divsufsort.h +67 -0
  41. data/ext/zstd/libzstd/dictBuilder/zdict.c +1012 -0
  42. data/ext/zstd/libzstd/dictBuilder/zdict.h +111 -0
  43. data/ext/zstd/libzstd/dll/example/Makefile +47 -0
  44. data/ext/zstd/libzstd/dll/example/README.md +69 -0
  45. data/ext/zstd/libzstd/dll/example/build_package.bat +17 -0
  46. data/ext/zstd/libzstd/dll/example/fullbench-dll.sln +25 -0
  47. data/ext/zstd/libzstd/dll/example/fullbench-dll.vcxproj +179 -0
  48. data/ext/zstd/libzstd/dll/libzstd.def +86 -0
  49. data/ext/zstd/libzstd/legacy/zstd_legacy.h +259 -0
  50. data/ext/zstd/libzstd/legacy/zstd_v01.c +2095 -0
  51. data/ext/zstd/libzstd/legacy/zstd_v01.h +80 -0
  52. data/ext/zstd/libzstd/legacy/zstd_v02.c +3518 -0
  53. data/ext/zstd/libzstd/legacy/zstd_v02.h +79 -0
  54. data/ext/zstd/libzstd/legacy/zstd_v03.c +3159 -0
  55. data/ext/zstd/libzstd/legacy/zstd_v03.h +79 -0
  56. data/ext/zstd/libzstd/legacy/zstd_v04.c +3795 -0
  57. data/ext/zstd/libzstd/legacy/zstd_v04.h +128 -0
  58. data/ext/zstd/libzstd/legacy/zstd_v05.c +4056 -0
  59. data/ext/zstd/libzstd/legacy/zstd_v05.h +149 -0
  60. data/ext/zstd/libzstd/legacy/zstd_v06.c +4167 -0
  61. data/ext/zstd/libzstd/legacy/zstd_v06.h +159 -0
  62. data/ext/zstd/libzstd/legacy/zstd_v07.c +4540 -0
  63. data/ext/zstd/libzstd/legacy/zstd_v07.h +173 -0
  64. data/ext/zstd/libzstd/libzstd.pc.in +14 -0
  65. data/ext/zstd/libzstd/zstd.h +673 -0
  66. data/ext/zstd/zstd.c +185 -0
  67. data/ext/zstd/zstd.h +7 -0
  68. data/lib/zstd/version.rb +3 -0
  69. data/lib/zstd.rb +6 -0
  70. data/zstd.gemspec +38 -0
  71. metadata +172 -0
@@ -0,0 +1,1913 @@
1
+ /*
2
+ * divsufsort.c for libdivsufsort-lite
3
+ * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
4
+ *
5
+ * Permission is hereby granted, free of charge, to any person
6
+ * obtaining a copy of this software and associated documentation
7
+ * files (the "Software"), to deal in the Software without
8
+ * restriction, including without limitation the rights to use,
9
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
10
+ * copies of the Software, and to permit persons to whom the
11
+ * Software is furnished to do so, subject to the following
12
+ * conditions:
13
+ *
14
+ * The above copyright notice and this permission notice shall be
15
+ * included in all copies or substantial portions of the Software.
16
+ *
17
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
19
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
21
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
22
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
24
+ * OTHER DEALINGS IN THE SOFTWARE.
25
+ */
26
+
27
+ /*- Compiler specifics -*/
28
+ #ifdef __clang__
29
+ #pragma clang diagnostic ignored "-Wshorten-64-to-32"
30
+ #endif
31
+
32
+ #if defined(_MSC_VER)
33
+ # pragma warning(disable : 4244)
34
+ # pragma warning(disable : 4127) /* C4127 : Condition expression is constant */
35
+ #endif
36
+
37
+
38
+ /*- Dependencies -*/
39
+ #include <assert.h>
40
+ #include <stdio.h>
41
+ #include <stdlib.h>
42
+
43
+ #include "divsufsort.h"
44
+
45
+ /*- Constants -*/
46
+ #if defined(INLINE)
47
+ # undef INLINE
48
+ #endif
49
+ #if !defined(INLINE)
50
+ # define INLINE __inline
51
+ #endif
52
+ #if defined(ALPHABET_SIZE) && (ALPHABET_SIZE < 1)
53
+ # undef ALPHABET_SIZE
54
+ #endif
55
+ #if !defined(ALPHABET_SIZE)
56
+ # define ALPHABET_SIZE (256)
57
+ #endif
58
+ #define BUCKET_A_SIZE (ALPHABET_SIZE)
59
+ #define BUCKET_B_SIZE (ALPHABET_SIZE * ALPHABET_SIZE)
60
+ #if defined(SS_INSERTIONSORT_THRESHOLD)
61
+ # if SS_INSERTIONSORT_THRESHOLD < 1
62
+ # undef SS_INSERTIONSORT_THRESHOLD
63
+ # define SS_INSERTIONSORT_THRESHOLD (1)
64
+ # endif
65
+ #else
66
+ # define SS_INSERTIONSORT_THRESHOLD (8)
67
+ #endif
68
+ #if defined(SS_BLOCKSIZE)
69
+ # if SS_BLOCKSIZE < 0
70
+ # undef SS_BLOCKSIZE
71
+ # define SS_BLOCKSIZE (0)
72
+ # elif 32768 <= SS_BLOCKSIZE
73
+ # undef SS_BLOCKSIZE
74
+ # define SS_BLOCKSIZE (32767)
75
+ # endif
76
+ #else
77
+ # define SS_BLOCKSIZE (1024)
78
+ #endif
79
+ /* minstacksize = log(SS_BLOCKSIZE) / log(3) * 2 */
80
+ #if SS_BLOCKSIZE == 0
81
+ # define SS_MISORT_STACKSIZE (96)
82
+ #elif SS_BLOCKSIZE <= 4096
83
+ # define SS_MISORT_STACKSIZE (16)
84
+ #else
85
+ # define SS_MISORT_STACKSIZE (24)
86
+ #endif
87
+ #define SS_SMERGE_STACKSIZE (32)
88
+ #define TR_INSERTIONSORT_THRESHOLD (8)
89
+ #define TR_STACKSIZE (64)
90
+
91
+
92
+ /*- Macros -*/
93
+ #ifndef SWAP
94
+ # define SWAP(_a, _b) do { t = (_a); (_a) = (_b); (_b) = t; } while(0)
95
+ #endif /* SWAP */
96
+ #ifndef MIN
97
+ # define MIN(_a, _b) (((_a) < (_b)) ? (_a) : (_b))
98
+ #endif /* MIN */
99
+ #ifndef MAX
100
+ # define MAX(_a, _b) (((_a) > (_b)) ? (_a) : (_b))
101
+ #endif /* MAX */
102
+ #define STACK_PUSH(_a, _b, _c, _d)\
103
+ do {\
104
+ assert(ssize < STACK_SIZE);\
105
+ stack[ssize].a = (_a), stack[ssize].b = (_b),\
106
+ stack[ssize].c = (_c), stack[ssize++].d = (_d);\
107
+ } while(0)
108
+ #define STACK_PUSH5(_a, _b, _c, _d, _e)\
109
+ do {\
110
+ assert(ssize < STACK_SIZE);\
111
+ stack[ssize].a = (_a), stack[ssize].b = (_b),\
112
+ stack[ssize].c = (_c), stack[ssize].d = (_d), stack[ssize++].e = (_e);\
113
+ } while(0)
114
+ #define STACK_POP(_a, _b, _c, _d)\
115
+ do {\
116
+ assert(0 <= ssize);\
117
+ if(ssize == 0) { return; }\
118
+ (_a) = stack[--ssize].a, (_b) = stack[ssize].b,\
119
+ (_c) = stack[ssize].c, (_d) = stack[ssize].d;\
120
+ } while(0)
121
+ #define STACK_POP5(_a, _b, _c, _d, _e)\
122
+ do {\
123
+ assert(0 <= ssize);\
124
+ if(ssize == 0) { return; }\
125
+ (_a) = stack[--ssize].a, (_b) = stack[ssize].b,\
126
+ (_c) = stack[ssize].c, (_d) = stack[ssize].d, (_e) = stack[ssize].e;\
127
+ } while(0)
128
+ #define BUCKET_A(_c0) bucket_A[(_c0)]
129
+ #if ALPHABET_SIZE == 256
130
+ #define BUCKET_B(_c0, _c1) (bucket_B[((_c1) << 8) | (_c0)])
131
+ #define BUCKET_BSTAR(_c0, _c1) (bucket_B[((_c0) << 8) | (_c1)])
132
+ #else
133
+ #define BUCKET_B(_c0, _c1) (bucket_B[(_c1) * ALPHABET_SIZE + (_c0)])
134
+ #define BUCKET_BSTAR(_c0, _c1) (bucket_B[(_c0) * ALPHABET_SIZE + (_c1)])
135
+ #endif
136
+
137
+
138
+ /*- Private Functions -*/
139
+
140
+ static const int lg_table[256]= {
141
+ -1,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
142
+ 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
143
+ 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
144
+ 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
145
+ 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
146
+ 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
147
+ 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
148
+ 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7
149
+ };
150
+
151
+ #if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE)
152
+
153
+ static INLINE
154
+ int
155
+ ss_ilg(int n) {
156
+ #if SS_BLOCKSIZE == 0
157
+ return (n & 0xffff0000) ?
158
+ ((n & 0xff000000) ?
159
+ 24 + lg_table[(n >> 24) & 0xff] :
160
+ 16 + lg_table[(n >> 16) & 0xff]) :
161
+ ((n & 0x0000ff00) ?
162
+ 8 + lg_table[(n >> 8) & 0xff] :
163
+ 0 + lg_table[(n >> 0) & 0xff]);
164
+ #elif SS_BLOCKSIZE < 256
165
+ return lg_table[n];
166
+ #else
167
+ return (n & 0xff00) ?
168
+ 8 + lg_table[(n >> 8) & 0xff] :
169
+ 0 + lg_table[(n >> 0) & 0xff];
170
+ #endif
171
+ }
172
+
173
+ #endif /* (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) */
174
+
175
+ #if SS_BLOCKSIZE != 0
176
+
177
+ static const int sqq_table[256] = {
178
+ 0, 16, 22, 27, 32, 35, 39, 42, 45, 48, 50, 53, 55, 57, 59, 61,
179
+ 64, 65, 67, 69, 71, 73, 75, 76, 78, 80, 81, 83, 84, 86, 87, 89,
180
+ 90, 91, 93, 94, 96, 97, 98, 99, 101, 102, 103, 104, 106, 107, 108, 109,
181
+ 110, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126,
182
+ 128, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142,
183
+ 143, 144, 144, 145, 146, 147, 148, 149, 150, 150, 151, 152, 153, 154, 155, 155,
184
+ 156, 157, 158, 159, 160, 160, 161, 162, 163, 163, 164, 165, 166, 167, 167, 168,
185
+ 169, 170, 170, 171, 172, 173, 173, 174, 175, 176, 176, 177, 178, 178, 179, 180,
186
+ 181, 181, 182, 183, 183, 184, 185, 185, 186, 187, 187, 188, 189, 189, 190, 191,
187
+ 192, 192, 193, 193, 194, 195, 195, 196, 197, 197, 198, 199, 199, 200, 201, 201,
188
+ 202, 203, 203, 204, 204, 205, 206, 206, 207, 208, 208, 209, 209, 210, 211, 211,
189
+ 212, 212, 213, 214, 214, 215, 215, 216, 217, 217, 218, 218, 219, 219, 220, 221,
190
+ 221, 222, 222, 223, 224, 224, 225, 225, 226, 226, 227, 227, 228, 229, 229, 230,
191
+ 230, 231, 231, 232, 232, 233, 234, 234, 235, 235, 236, 236, 237, 237, 238, 238,
192
+ 239, 240, 240, 241, 241, 242, 242, 243, 243, 244, 244, 245, 245, 246, 246, 247,
193
+ 247, 248, 248, 249, 249, 250, 250, 251, 251, 252, 252, 253, 253, 254, 254, 255
194
+ };
195
+
196
+ static INLINE
197
+ int
198
+ ss_isqrt(int x) {
199
+ int y, e;
200
+
201
+ if(x >= (SS_BLOCKSIZE * SS_BLOCKSIZE)) { return SS_BLOCKSIZE; }
202
+ e = (x & 0xffff0000) ?
203
+ ((x & 0xff000000) ?
204
+ 24 + lg_table[(x >> 24) & 0xff] :
205
+ 16 + lg_table[(x >> 16) & 0xff]) :
206
+ ((x & 0x0000ff00) ?
207
+ 8 + lg_table[(x >> 8) & 0xff] :
208
+ 0 + lg_table[(x >> 0) & 0xff]);
209
+
210
+ if(e >= 16) {
211
+ y = sqq_table[x >> ((e - 6) - (e & 1))] << ((e >> 1) - 7);
212
+ if(e >= 24) { y = (y + 1 + x / y) >> 1; }
213
+ y = (y + 1 + x / y) >> 1;
214
+ } else if(e >= 8) {
215
+ y = (sqq_table[x >> ((e - 6) - (e & 1))] >> (7 - (e >> 1))) + 1;
216
+ } else {
217
+ return sqq_table[x] >> 4;
218
+ }
219
+
220
+ return (x < (y * y)) ? y - 1 : y;
221
+ }
222
+
223
+ #endif /* SS_BLOCKSIZE != 0 */
224
+
225
+
226
+ /*---------------------------------------------------------------------------*/
227
+
228
+ /* Compares two suffixes. */
229
+ static INLINE
230
+ int
231
+ ss_compare(const unsigned char *T,
232
+ const int *p1, const int *p2,
233
+ int depth) {
234
+ const unsigned char *U1, *U2, *U1n, *U2n;
235
+
236
+ for(U1 = T + depth + *p1,
237
+ U2 = T + depth + *p2,
238
+ U1n = T + *(p1 + 1) + 2,
239
+ U2n = T + *(p2 + 1) + 2;
240
+ (U1 < U1n) && (U2 < U2n) && (*U1 == *U2);
241
+ ++U1, ++U2) {
242
+ }
243
+
244
+ return U1 < U1n ?
245
+ (U2 < U2n ? *U1 - *U2 : 1) :
246
+ (U2 < U2n ? -1 : 0);
247
+ }
248
+
249
+
250
+ /*---------------------------------------------------------------------------*/
251
+
252
+ #if (SS_BLOCKSIZE != 1) && (SS_INSERTIONSORT_THRESHOLD != 1)
253
+
254
+ /* Insertionsort for small size groups */
255
+ static
256
+ void
257
+ ss_insertionsort(const unsigned char *T, const int *PA,
258
+ int *first, int *last, int depth) {
259
+ int *i, *j;
260
+ int t;
261
+ int r;
262
+
263
+ for(i = last - 2; first <= i; --i) {
264
+ for(t = *i, j = i + 1; 0 < (r = ss_compare(T, PA + t, PA + *j, depth));) {
265
+ do { *(j - 1) = *j; } while((++j < last) && (*j < 0));
266
+ if(last <= j) { break; }
267
+ }
268
+ if(r == 0) { *j = ~*j; }
269
+ *(j - 1) = t;
270
+ }
271
+ }
272
+
273
+ #endif /* (SS_BLOCKSIZE != 1) && (SS_INSERTIONSORT_THRESHOLD != 1) */
274
+
275
+
276
+ /*---------------------------------------------------------------------------*/
277
+
278
+ #if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE)
279
+
280
+ static INLINE
281
+ void
282
+ ss_fixdown(const unsigned char *Td, const int *PA,
283
+ int *SA, int i, int size) {
284
+ int j, k;
285
+ int v;
286
+ int c, d, e;
287
+
288
+ for(v = SA[i], c = Td[PA[v]]; (j = 2 * i + 1) < size; SA[i] = SA[k], i = k) {
289
+ d = Td[PA[SA[k = j++]]];
290
+ if(d < (e = Td[PA[SA[j]]])) { k = j; d = e; }
291
+ if(d <= c) { break; }
292
+ }
293
+ SA[i] = v;
294
+ }
295
+
296
+ /* Simple top-down heapsort. */
297
+ static
298
+ void
299
+ ss_heapsort(const unsigned char *Td, const int *PA, int *SA, int size) {
300
+ int i, m;
301
+ int t;
302
+
303
+ m = size;
304
+ if((size % 2) == 0) {
305
+ m--;
306
+ if(Td[PA[SA[m / 2]]] < Td[PA[SA[m]]]) { SWAP(SA[m], SA[m / 2]); }
307
+ }
308
+
309
+ for(i = m / 2 - 1; 0 <= i; --i) { ss_fixdown(Td, PA, SA, i, m); }
310
+ if((size % 2) == 0) { SWAP(SA[0], SA[m]); ss_fixdown(Td, PA, SA, 0, m); }
311
+ for(i = m - 1; 0 < i; --i) {
312
+ t = SA[0], SA[0] = SA[i];
313
+ ss_fixdown(Td, PA, SA, 0, i);
314
+ SA[i] = t;
315
+ }
316
+ }
317
+
318
+
319
+ /*---------------------------------------------------------------------------*/
320
+
321
+ /* Returns the median of three elements. */
322
+ static INLINE
323
+ int *
324
+ ss_median3(const unsigned char *Td, const int *PA,
325
+ int *v1, int *v2, int *v3) {
326
+ int *t;
327
+ if(Td[PA[*v1]] > Td[PA[*v2]]) { SWAP(v1, v2); }
328
+ if(Td[PA[*v2]] > Td[PA[*v3]]) {
329
+ if(Td[PA[*v1]] > Td[PA[*v3]]) { return v1; }
330
+ else { return v3; }
331
+ }
332
+ return v2;
333
+ }
334
+
335
+ /* Returns the median of five elements. */
336
+ static INLINE
337
+ int *
338
+ ss_median5(const unsigned char *Td, const int *PA,
339
+ int *v1, int *v2, int *v3, int *v4, int *v5) {
340
+ int *t;
341
+ if(Td[PA[*v2]] > Td[PA[*v3]]) { SWAP(v2, v3); }
342
+ if(Td[PA[*v4]] > Td[PA[*v5]]) { SWAP(v4, v5); }
343
+ if(Td[PA[*v2]] > Td[PA[*v4]]) { SWAP(v2, v4); SWAP(v3, v5); }
344
+ if(Td[PA[*v1]] > Td[PA[*v3]]) { SWAP(v1, v3); }
345
+ if(Td[PA[*v1]] > Td[PA[*v4]]) { SWAP(v1, v4); SWAP(v3, v5); }
346
+ if(Td[PA[*v3]] > Td[PA[*v4]]) { return v4; }
347
+ return v3;
348
+ }
349
+
350
+ /* Returns the pivot element. */
351
+ static INLINE
352
+ int *
353
+ ss_pivot(const unsigned char *Td, const int *PA, int *first, int *last) {
354
+ int *middle;
355
+ int t;
356
+
357
+ t = last - first;
358
+ middle = first + t / 2;
359
+
360
+ if(t <= 512) {
361
+ if(t <= 32) {
362
+ return ss_median3(Td, PA, first, middle, last - 1);
363
+ } else {
364
+ t >>= 2;
365
+ return ss_median5(Td, PA, first, first + t, middle, last - 1 - t, last - 1);
366
+ }
367
+ }
368
+ t >>= 3;
369
+ first = ss_median3(Td, PA, first, first + t, first + (t << 1));
370
+ middle = ss_median3(Td, PA, middle - t, middle, middle + t);
371
+ last = ss_median3(Td, PA, last - 1 - (t << 1), last - 1 - t, last - 1);
372
+ return ss_median3(Td, PA, first, middle, last);
373
+ }
374
+
375
+
376
+ /*---------------------------------------------------------------------------*/
377
+
378
+ /* Binary partition for substrings. */
379
+ static INLINE
380
+ int *
381
+ ss_partition(const int *PA,
382
+ int *first, int *last, int depth) {
383
+ int *a, *b;
384
+ int t;
385
+ for(a = first - 1, b = last;;) {
386
+ for(; (++a < b) && ((PA[*a] + depth) >= (PA[*a + 1] + 1));) { *a = ~*a; }
387
+ for(; (a < --b) && ((PA[*b] + depth) < (PA[*b + 1] + 1));) { }
388
+ if(b <= a) { break; }
389
+ t = ~*b;
390
+ *b = *a;
391
+ *a = t;
392
+ }
393
+ if(first < a) { *first = ~*first; }
394
+ return a;
395
+ }
396
+
397
+ /* Multikey introsort for medium size groups. */
398
+ static
399
+ void
400
+ ss_mintrosort(const unsigned char *T, const int *PA,
401
+ int *first, int *last,
402
+ int depth) {
403
+ #define STACK_SIZE SS_MISORT_STACKSIZE
404
+ struct { int *a, *b, c; int d; } stack[STACK_SIZE];
405
+ const unsigned char *Td;
406
+ int *a, *b, *c, *d, *e, *f;
407
+ int s, t;
408
+ int ssize;
409
+ int limit;
410
+ int v, x = 0;
411
+
412
+ for(ssize = 0, limit = ss_ilg(last - first);;) {
413
+
414
+ if((last - first) <= SS_INSERTIONSORT_THRESHOLD) {
415
+ #if 1 < SS_INSERTIONSORT_THRESHOLD
416
+ if(1 < (last - first)) { ss_insertionsort(T, PA, first, last, depth); }
417
+ #endif
418
+ STACK_POP(first, last, depth, limit);
419
+ continue;
420
+ }
421
+
422
+ Td = T + depth;
423
+ if(limit-- == 0) { ss_heapsort(Td, PA, first, last - first); }
424
+ if(limit < 0) {
425
+ for(a = first + 1, v = Td[PA[*first]]; a < last; ++a) {
426
+ if((x = Td[PA[*a]]) != v) {
427
+ if(1 < (a - first)) { break; }
428
+ v = x;
429
+ first = a;
430
+ }
431
+ }
432
+ if(Td[PA[*first] - 1] < v) {
433
+ first = ss_partition(PA, first, a, depth);
434
+ }
435
+ if((a - first) <= (last - a)) {
436
+ if(1 < (a - first)) {
437
+ STACK_PUSH(a, last, depth, -1);
438
+ last = a, depth += 1, limit = ss_ilg(a - first);
439
+ } else {
440
+ first = a, limit = -1;
441
+ }
442
+ } else {
443
+ if(1 < (last - a)) {
444
+ STACK_PUSH(first, a, depth + 1, ss_ilg(a - first));
445
+ first = a, limit = -1;
446
+ } else {
447
+ last = a, depth += 1, limit = ss_ilg(a - first);
448
+ }
449
+ }
450
+ continue;
451
+ }
452
+
453
+ /* choose pivot */
454
+ a = ss_pivot(Td, PA, first, last);
455
+ v = Td[PA[*a]];
456
+ SWAP(*first, *a);
457
+
458
+ /* partition */
459
+ for(b = first; (++b < last) && ((x = Td[PA[*b]]) == v);) { }
460
+ if(((a = b) < last) && (x < v)) {
461
+ for(; (++b < last) && ((x = Td[PA[*b]]) <= v);) {
462
+ if(x == v) { SWAP(*b, *a); ++a; }
463
+ }
464
+ }
465
+ for(c = last; (b < --c) && ((x = Td[PA[*c]]) == v);) { }
466
+ if((b < (d = c)) && (x > v)) {
467
+ for(; (b < --c) && ((x = Td[PA[*c]]) >= v);) {
468
+ if(x == v) { SWAP(*c, *d); --d; }
469
+ }
470
+ }
471
+ for(; b < c;) {
472
+ SWAP(*b, *c);
473
+ for(; (++b < c) && ((x = Td[PA[*b]]) <= v);) {
474
+ if(x == v) { SWAP(*b, *a); ++a; }
475
+ }
476
+ for(; (b < --c) && ((x = Td[PA[*c]]) >= v);) {
477
+ if(x == v) { SWAP(*c, *d); --d; }
478
+ }
479
+ }
480
+
481
+ if(a <= d) {
482
+ c = b - 1;
483
+
484
+ if((s = a - first) > (t = b - a)) { s = t; }
485
+ for(e = first, f = b - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
486
+ if((s = d - c) > (t = last - d - 1)) { s = t; }
487
+ for(e = b, f = last - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
488
+
489
+ a = first + (b - a), c = last - (d - c);
490
+ b = (v <= Td[PA[*a] - 1]) ? a : ss_partition(PA, a, c, depth);
491
+
492
+ if((a - first) <= (last - c)) {
493
+ if((last - c) <= (c - b)) {
494
+ STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
495
+ STACK_PUSH(c, last, depth, limit);
496
+ last = a;
497
+ } else if((a - first) <= (c - b)) {
498
+ STACK_PUSH(c, last, depth, limit);
499
+ STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
500
+ last = a;
501
+ } else {
502
+ STACK_PUSH(c, last, depth, limit);
503
+ STACK_PUSH(first, a, depth, limit);
504
+ first = b, last = c, depth += 1, limit = ss_ilg(c - b);
505
+ }
506
+ } else {
507
+ if((a - first) <= (c - b)) {
508
+ STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
509
+ STACK_PUSH(first, a, depth, limit);
510
+ first = c;
511
+ } else if((last - c) <= (c - b)) {
512
+ STACK_PUSH(first, a, depth, limit);
513
+ STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
514
+ first = c;
515
+ } else {
516
+ STACK_PUSH(first, a, depth, limit);
517
+ STACK_PUSH(c, last, depth, limit);
518
+ first = b, last = c, depth += 1, limit = ss_ilg(c - b);
519
+ }
520
+ }
521
+ } else {
522
+ limit += 1;
523
+ if(Td[PA[*first] - 1] < v) {
524
+ first = ss_partition(PA, first, last, depth);
525
+ limit = ss_ilg(last - first);
526
+ }
527
+ depth += 1;
528
+ }
529
+ }
530
+ #undef STACK_SIZE
531
+ }
532
+
533
+ #endif /* (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) */
534
+
535
+
536
+ /*---------------------------------------------------------------------------*/
537
+
538
+ #if SS_BLOCKSIZE != 0
539
+
540
+ static INLINE
541
+ void
542
+ ss_blockswap(int *a, int *b, int n) {
543
+ int t;
544
+ for(; 0 < n; --n, ++a, ++b) {
545
+ t = *a, *a = *b, *b = t;
546
+ }
547
+ }
548
+
549
+ static INLINE
550
+ void
551
+ ss_rotate(int *first, int *middle, int *last) {
552
+ int *a, *b, t;
553
+ int l, r;
554
+ l = middle - first, r = last - middle;
555
+ for(; (0 < l) && (0 < r);) {
556
+ if(l == r) { ss_blockswap(first, middle, l); break; }
557
+ if(l < r) {
558
+ a = last - 1, b = middle - 1;
559
+ t = *a;
560
+ do {
561
+ *a-- = *b, *b-- = *a;
562
+ if(b < first) {
563
+ *a = t;
564
+ last = a;
565
+ if((r -= l + 1) <= l) { break; }
566
+ a -= 1, b = middle - 1;
567
+ t = *a;
568
+ }
569
+ } while(1);
570
+ } else {
571
+ a = first, b = middle;
572
+ t = *a;
573
+ do {
574
+ *a++ = *b, *b++ = *a;
575
+ if(last <= b) {
576
+ *a = t;
577
+ first = a + 1;
578
+ if((l -= r + 1) <= r) { break; }
579
+ a += 1, b = middle;
580
+ t = *a;
581
+ }
582
+ } while(1);
583
+ }
584
+ }
585
+ }
586
+
587
+
588
+ /*---------------------------------------------------------------------------*/
589
+
590
+ static
591
+ void
592
+ ss_inplacemerge(const unsigned char *T, const int *PA,
593
+ int *first, int *middle, int *last,
594
+ int depth) {
595
+ const int *p;
596
+ int *a, *b;
597
+ int len, half;
598
+ int q, r;
599
+ int x;
600
+
601
+ for(;;) {
602
+ if(*(last - 1) < 0) { x = 1; p = PA + ~*(last - 1); }
603
+ else { x = 0; p = PA + *(last - 1); }
604
+ for(a = first, len = middle - first, half = len >> 1, r = -1;
605
+ 0 < len;
606
+ len = half, half >>= 1) {
607
+ b = a + half;
608
+ q = ss_compare(T, PA + ((0 <= *b) ? *b : ~*b), p, depth);
609
+ if(q < 0) {
610
+ a = b + 1;
611
+ half -= (len & 1) ^ 1;
612
+ } else {
613
+ r = q;
614
+ }
615
+ }
616
+ if(a < middle) {
617
+ if(r == 0) { *a = ~*a; }
618
+ ss_rotate(a, middle, last);
619
+ last -= middle - a;
620
+ middle = a;
621
+ if(first == middle) { break; }
622
+ }
623
+ --last;
624
+ if(x != 0) { while(*--last < 0) { } }
625
+ if(middle == last) { break; }
626
+ }
627
+ }
628
+
629
+
630
+ /*---------------------------------------------------------------------------*/
631
+
632
+ /* Merge-forward with internal buffer. */
633
+ static
634
+ void
635
+ ss_mergeforward(const unsigned char *T, const int *PA,
636
+ int *first, int *middle, int *last,
637
+ int *buf, int depth) {
638
+ int *a, *b, *c, *bufend;
639
+ int t;
640
+ int r;
641
+
642
+ bufend = buf + (middle - first) - 1;
643
+ ss_blockswap(buf, first, middle - first);
644
+
645
+ for(t = *(a = first), b = buf, c = middle;;) {
646
+ r = ss_compare(T, PA + *b, PA + *c, depth);
647
+ if(r < 0) {
648
+ do {
649
+ *a++ = *b;
650
+ if(bufend <= b) { *bufend = t; return; }
651
+ *b++ = *a;
652
+ } while(*b < 0);
653
+ } else if(r > 0) {
654
+ do {
655
+ *a++ = *c, *c++ = *a;
656
+ if(last <= c) {
657
+ while(b < bufend) { *a++ = *b, *b++ = *a; }
658
+ *a = *b, *b = t;
659
+ return;
660
+ }
661
+ } while(*c < 0);
662
+ } else {
663
+ *c = ~*c;
664
+ do {
665
+ *a++ = *b;
666
+ if(bufend <= b) { *bufend = t; return; }
667
+ *b++ = *a;
668
+ } while(*b < 0);
669
+
670
+ do {
671
+ *a++ = *c, *c++ = *a;
672
+ if(last <= c) {
673
+ while(b < bufend) { *a++ = *b, *b++ = *a; }
674
+ *a = *b, *b = t;
675
+ return;
676
+ }
677
+ } while(*c < 0);
678
+ }
679
+ }
680
+ }
681
+
682
+ /* Merge-backward with internal buffer. */
683
+ static
684
+ void
685
+ ss_mergebackward(const unsigned char *T, const int *PA,
686
+ int *first, int *middle, int *last,
687
+ int *buf, int depth) {
688
+ const int *p1, *p2;
689
+ int *a, *b, *c, *bufend;
690
+ int t;
691
+ int r;
692
+ int x;
693
+
694
+ bufend = buf + (last - middle) - 1;
695
+ ss_blockswap(buf, middle, last - middle);
696
+
697
+ x = 0;
698
+ if(*bufend < 0) { p1 = PA + ~*bufend; x |= 1; }
699
+ else { p1 = PA + *bufend; }
700
+ if(*(middle - 1) < 0) { p2 = PA + ~*(middle - 1); x |= 2; }
701
+ else { p2 = PA + *(middle - 1); }
702
+ for(t = *(a = last - 1), b = bufend, c = middle - 1;;) {
703
+ r = ss_compare(T, p1, p2, depth);
704
+ if(0 < r) {
705
+ if(x & 1) { do { *a-- = *b, *b-- = *a; } while(*b < 0); x ^= 1; }
706
+ *a-- = *b;
707
+ if(b <= buf) { *buf = t; break; }
708
+ *b-- = *a;
709
+ if(*b < 0) { p1 = PA + ~*b; x |= 1; }
710
+ else { p1 = PA + *b; }
711
+ } else if(r < 0) {
712
+ if(x & 2) { do { *a-- = *c, *c-- = *a; } while(*c < 0); x ^= 2; }
713
+ *a-- = *c, *c-- = *a;
714
+ if(c < first) {
715
+ while(buf < b) { *a-- = *b, *b-- = *a; }
716
+ *a = *b, *b = t;
717
+ break;
718
+ }
719
+ if(*c < 0) { p2 = PA + ~*c; x |= 2; }
720
+ else { p2 = PA + *c; }
721
+ } else {
722
+ if(x & 1) { do { *a-- = *b, *b-- = *a; } while(*b < 0); x ^= 1; }
723
+ *a-- = ~*b;
724
+ if(b <= buf) { *buf = t; break; }
725
+ *b-- = *a;
726
+ if(x & 2) { do { *a-- = *c, *c-- = *a; } while(*c < 0); x ^= 2; }
727
+ *a-- = *c, *c-- = *a;
728
+ if(c < first) {
729
+ while(buf < b) { *a-- = *b, *b-- = *a; }
730
+ *a = *b, *b = t;
731
+ break;
732
+ }
733
+ if(*b < 0) { p1 = PA + ~*b; x |= 1; }
734
+ else { p1 = PA + *b; }
735
+ if(*c < 0) { p2 = PA + ~*c; x |= 2; }
736
+ else { p2 = PA + *c; }
737
+ }
738
+ }
739
+ }
740
+
741
+ /* D&C based merge. */
742
+ static
743
+ void
744
+ ss_swapmerge(const unsigned char *T, const int *PA,
745
+ int *first, int *middle, int *last,
746
+ int *buf, int bufsize, int depth) {
747
+ #define STACK_SIZE SS_SMERGE_STACKSIZE
748
+ #define GETIDX(a) ((0 <= (a)) ? (a) : (~(a)))
749
+ #define MERGE_CHECK(a, b, c)\
750
+ do {\
751
+ if(((c) & 1) ||\
752
+ (((c) & 2) && (ss_compare(T, PA + GETIDX(*((a) - 1)), PA + *(a), depth) == 0))) {\
753
+ *(a) = ~*(a);\
754
+ }\
755
+ if(((c) & 4) && ((ss_compare(T, PA + GETIDX(*((b) - 1)), PA + *(b), depth) == 0))) {\
756
+ *(b) = ~*(b);\
757
+ }\
758
+ } while(0)
759
+ struct { int *a, *b, *c; int d; } stack[STACK_SIZE];
760
+ int *l, *r, *lm, *rm;
761
+ int m, len, half;
762
+ int ssize;
763
+ int check, next;
764
+
765
+ for(check = 0, ssize = 0;;) {
766
+ if((last - middle) <= bufsize) {
767
+ if((first < middle) && (middle < last)) {
768
+ ss_mergebackward(T, PA, first, middle, last, buf, depth);
769
+ }
770
+ MERGE_CHECK(first, last, check);
771
+ STACK_POP(first, middle, last, check);
772
+ continue;
773
+ }
774
+
775
+ if((middle - first) <= bufsize) {
776
+ if(first < middle) {
777
+ ss_mergeforward(T, PA, first, middle, last, buf, depth);
778
+ }
779
+ MERGE_CHECK(first, last, check);
780
+ STACK_POP(first, middle, last, check);
781
+ continue;
782
+ }
783
+
784
+ for(m = 0, len = MIN(middle - first, last - middle), half = len >> 1;
785
+ 0 < len;
786
+ len = half, half >>= 1) {
787
+ if(ss_compare(T, PA + GETIDX(*(middle + m + half)),
788
+ PA + GETIDX(*(middle - m - half - 1)), depth) < 0) {
789
+ m += half + 1;
790
+ half -= (len & 1) ^ 1;
791
+ }
792
+ }
793
+
794
+ if(0 < m) {
795
+ lm = middle - m, rm = middle + m;
796
+ ss_blockswap(lm, middle, m);
797
+ l = r = middle, next = 0;
798
+ if(rm < last) {
799
+ if(*rm < 0) {
800
+ *rm = ~*rm;
801
+ if(first < lm) { for(; *--l < 0;) { } next |= 4; }
802
+ next |= 1;
803
+ } else if(first < lm) {
804
+ for(; *r < 0; ++r) { }
805
+ next |= 2;
806
+ }
807
+ }
808
+
809
+ if((l - first) <= (last - r)) {
810
+ STACK_PUSH(r, rm, last, (next & 3) | (check & 4));
811
+ middle = lm, last = l, check = (check & 3) | (next & 4);
812
+ } else {
813
+ if((next & 2) && (r == middle)) { next ^= 6; }
814
+ STACK_PUSH(first, lm, l, (check & 3) | (next & 4));
815
+ first = r, middle = rm, check = (next & 3) | (check & 4);
816
+ }
817
+ } else {
818
+ if(ss_compare(T, PA + GETIDX(*(middle - 1)), PA + *middle, depth) == 0) {
819
+ *middle = ~*middle;
820
+ }
821
+ MERGE_CHECK(first, last, check);
822
+ STACK_POP(first, middle, last, check);
823
+ }
824
+ }
825
+ #undef STACK_SIZE
826
+ }
827
+
828
+ #endif /* SS_BLOCKSIZE != 0 */
829
+
830
+
831
+ /*---------------------------------------------------------------------------*/
832
+
833
+ /* Substring sort */
834
+ static
835
+ void
836
+ sssort(const unsigned char *T, const int *PA,
837
+ int *first, int *last,
838
+ int *buf, int bufsize,
839
+ int depth, int n, int lastsuffix) {
840
+ int *a;
841
+ #if SS_BLOCKSIZE != 0
842
+ int *b, *middle, *curbuf;
843
+ int j, k, curbufsize, limit;
844
+ #endif
845
+ int i;
846
+
847
+ if(lastsuffix != 0) { ++first; }
848
+
849
+ #if SS_BLOCKSIZE == 0
850
+ ss_mintrosort(T, PA, first, last, depth);
851
+ #else
852
+ if((bufsize < SS_BLOCKSIZE) &&
853
+ (bufsize < (last - first)) &&
854
+ (bufsize < (limit = ss_isqrt(last - first)))) {
855
+ if(SS_BLOCKSIZE < limit) { limit = SS_BLOCKSIZE; }
856
+ buf = middle = last - limit, bufsize = limit;
857
+ } else {
858
+ middle = last, limit = 0;
859
+ }
860
+ for(a = first, i = 0; SS_BLOCKSIZE < (middle - a); a += SS_BLOCKSIZE, ++i) {
861
+ #if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE
862
+ ss_mintrosort(T, PA, a, a + SS_BLOCKSIZE, depth);
863
+ #elif 1 < SS_BLOCKSIZE
864
+ ss_insertionsort(T, PA, a, a + SS_BLOCKSIZE, depth);
865
+ #endif
866
+ curbufsize = last - (a + SS_BLOCKSIZE);
867
+ curbuf = a + SS_BLOCKSIZE;
868
+ if(curbufsize <= bufsize) { curbufsize = bufsize, curbuf = buf; }
869
+ for(b = a, k = SS_BLOCKSIZE, j = i; j & 1; b -= k, k <<= 1, j >>= 1) {
870
+ ss_swapmerge(T, PA, b - k, b, b + k, curbuf, curbufsize, depth);
871
+ }
872
+ }
873
+ #if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE
874
+ ss_mintrosort(T, PA, a, middle, depth);
875
+ #elif 1 < SS_BLOCKSIZE
876
+ ss_insertionsort(T, PA, a, middle, depth);
877
+ #endif
878
+ for(k = SS_BLOCKSIZE; i != 0; k <<= 1, i >>= 1) {
879
+ if(i & 1) {
880
+ ss_swapmerge(T, PA, a - k, a, middle, buf, bufsize, depth);
881
+ a -= k;
882
+ }
883
+ }
884
+ if(limit != 0) {
885
+ #if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE
886
+ ss_mintrosort(T, PA, middle, last, depth);
887
+ #elif 1 < SS_BLOCKSIZE
888
+ ss_insertionsort(T, PA, middle, last, depth);
889
+ #endif
890
+ ss_inplacemerge(T, PA, first, middle, last, depth);
891
+ }
892
+ #endif
893
+
894
+ if(lastsuffix != 0) {
895
+ /* Insert last type B* suffix. */
896
+ int PAi[2]; PAi[0] = PA[*(first - 1)], PAi[1] = n - 2;
897
+ for(a = first, i = *(first - 1);
898
+ (a < last) && ((*a < 0) || (0 < ss_compare(T, &(PAi[0]), PA + *a, depth)));
899
+ ++a) {
900
+ *(a - 1) = *a;
901
+ }
902
+ *(a - 1) = i;
903
+ }
904
+ }
905
+
906
+
907
+ /*---------------------------------------------------------------------------*/
908
+
909
+ static INLINE
910
+ int
911
+ tr_ilg(int n) {
912
+ return (n & 0xffff0000) ?
913
+ ((n & 0xff000000) ?
914
+ 24 + lg_table[(n >> 24) & 0xff] :
915
+ 16 + lg_table[(n >> 16) & 0xff]) :
916
+ ((n & 0x0000ff00) ?
917
+ 8 + lg_table[(n >> 8) & 0xff] :
918
+ 0 + lg_table[(n >> 0) & 0xff]);
919
+ }
920
+
921
+
922
+ /*---------------------------------------------------------------------------*/
923
+
924
+ /* Simple insertionsort for small size groups. */
925
+ static
926
+ void
927
+ tr_insertionsort(const int *ISAd, int *first, int *last) {
928
+ int *a, *b;
929
+ int t, r;
930
+
931
+ for(a = first + 1; a < last; ++a) {
932
+ for(t = *a, b = a - 1; 0 > (r = ISAd[t] - ISAd[*b]);) {
933
+ do { *(b + 1) = *b; } while((first <= --b) && (*b < 0));
934
+ if(b < first) { break; }
935
+ }
936
+ if(r == 0) { *b = ~*b; }
937
+ *(b + 1) = t;
938
+ }
939
+ }
940
+
941
+
942
+ /*---------------------------------------------------------------------------*/
943
+
944
+ static INLINE
945
+ void
946
+ tr_fixdown(const int *ISAd, int *SA, int i, int size) {
947
+ int j, k;
948
+ int v;
949
+ int c, d, e;
950
+
951
+ for(v = SA[i], c = ISAd[v]; (j = 2 * i + 1) < size; SA[i] = SA[k], i = k) {
952
+ d = ISAd[SA[k = j++]];
953
+ if(d < (e = ISAd[SA[j]])) { k = j; d = e; }
954
+ if(d <= c) { break; }
955
+ }
956
+ SA[i] = v;
957
+ }
958
+
959
+ /* Simple top-down heapsort. */
960
+ static
961
+ void
962
+ tr_heapsort(const int *ISAd, int *SA, int size) {
963
+ int i, m;
964
+ int t;
965
+
966
+ m = size;
967
+ if((size % 2) == 0) {
968
+ m--;
969
+ if(ISAd[SA[m / 2]] < ISAd[SA[m]]) { SWAP(SA[m], SA[m / 2]); }
970
+ }
971
+
972
+ for(i = m / 2 - 1; 0 <= i; --i) { tr_fixdown(ISAd, SA, i, m); }
973
+ if((size % 2) == 0) { SWAP(SA[0], SA[m]); tr_fixdown(ISAd, SA, 0, m); }
974
+ for(i = m - 1; 0 < i; --i) {
975
+ t = SA[0], SA[0] = SA[i];
976
+ tr_fixdown(ISAd, SA, 0, i);
977
+ SA[i] = t;
978
+ }
979
+ }
980
+
981
+
982
+ /*---------------------------------------------------------------------------*/
983
+
984
+ /* Returns the median of three elements. */
985
+ static INLINE
986
+ int *
987
+ tr_median3(const int *ISAd, int *v1, int *v2, int *v3) {
988
+ int *t;
989
+ if(ISAd[*v1] > ISAd[*v2]) { SWAP(v1, v2); }
990
+ if(ISAd[*v2] > ISAd[*v3]) {
991
+ if(ISAd[*v1] > ISAd[*v3]) { return v1; }
992
+ else { return v3; }
993
+ }
994
+ return v2;
995
+ }
996
+
997
+ /* Returns the median of five elements. */
998
+ static INLINE
999
+ int *
1000
+ tr_median5(const int *ISAd,
1001
+ int *v1, int *v2, int *v3, int *v4, int *v5) {
1002
+ int *t;
1003
+ if(ISAd[*v2] > ISAd[*v3]) { SWAP(v2, v3); }
1004
+ if(ISAd[*v4] > ISAd[*v5]) { SWAP(v4, v5); }
1005
+ if(ISAd[*v2] > ISAd[*v4]) { SWAP(v2, v4); SWAP(v3, v5); }
1006
+ if(ISAd[*v1] > ISAd[*v3]) { SWAP(v1, v3); }
1007
+ if(ISAd[*v1] > ISAd[*v4]) { SWAP(v1, v4); SWAP(v3, v5); }
1008
+ if(ISAd[*v3] > ISAd[*v4]) { return v4; }
1009
+ return v3;
1010
+ }
1011
+
1012
+ /* Returns the pivot element. */
1013
+ static INLINE
1014
+ int *
1015
+ tr_pivot(const int *ISAd, int *first, int *last) {
1016
+ int *middle;
1017
+ int t;
1018
+
1019
+ t = last - first;
1020
+ middle = first + t / 2;
1021
+
1022
+ if(t <= 512) {
1023
+ if(t <= 32) {
1024
+ return tr_median3(ISAd, first, middle, last - 1);
1025
+ } else {
1026
+ t >>= 2;
1027
+ return tr_median5(ISAd, first, first + t, middle, last - 1 - t, last - 1);
1028
+ }
1029
+ }
1030
+ t >>= 3;
1031
+ first = tr_median3(ISAd, first, first + t, first + (t << 1));
1032
+ middle = tr_median3(ISAd, middle - t, middle, middle + t);
1033
+ last = tr_median3(ISAd, last - 1 - (t << 1), last - 1 - t, last - 1);
1034
+ return tr_median3(ISAd, first, middle, last);
1035
+ }
1036
+
1037
+
1038
+ /*---------------------------------------------------------------------------*/
1039
+
1040
+ typedef struct _trbudget_t trbudget_t;
1041
+ struct _trbudget_t {
1042
+ int chance;
1043
+ int remain;
1044
+ int incval;
1045
+ int count;
1046
+ };
1047
+
1048
+ static INLINE
1049
+ void
1050
+ trbudget_init(trbudget_t *budget, int chance, int incval) {
1051
+ budget->chance = chance;
1052
+ budget->remain = budget->incval = incval;
1053
+ }
1054
+
1055
+ static INLINE
1056
+ int
1057
+ trbudget_check(trbudget_t *budget, int size) {
1058
+ if(size <= budget->remain) { budget->remain -= size; return 1; }
1059
+ if(budget->chance == 0) { budget->count += size; return 0; }
1060
+ budget->remain += budget->incval - size;
1061
+ budget->chance -= 1;
1062
+ return 1;
1063
+ }
1064
+
1065
+
1066
+ /*---------------------------------------------------------------------------*/
1067
+
1068
+ static INLINE
1069
+ void
1070
+ tr_partition(const int *ISAd,
1071
+ int *first, int *middle, int *last,
1072
+ int **pa, int **pb, int v) {
1073
+ int *a, *b, *c, *d, *e, *f;
1074
+ int t, s;
1075
+ int x = 0;
1076
+
1077
+ for(b = middle - 1; (++b < last) && ((x = ISAd[*b]) == v);) { }
1078
+ if(((a = b) < last) && (x < v)) {
1079
+ for(; (++b < last) && ((x = ISAd[*b]) <= v);) {
1080
+ if(x == v) { SWAP(*b, *a); ++a; }
1081
+ }
1082
+ }
1083
+ for(c = last; (b < --c) && ((x = ISAd[*c]) == v);) { }
1084
+ if((b < (d = c)) && (x > v)) {
1085
+ for(; (b < --c) && ((x = ISAd[*c]) >= v);) {
1086
+ if(x == v) { SWAP(*c, *d); --d; }
1087
+ }
1088
+ }
1089
+ for(; b < c;) {
1090
+ SWAP(*b, *c);
1091
+ for(; (++b < c) && ((x = ISAd[*b]) <= v);) {
1092
+ if(x == v) { SWAP(*b, *a); ++a; }
1093
+ }
1094
+ for(; (b < --c) && ((x = ISAd[*c]) >= v);) {
1095
+ if(x == v) { SWAP(*c, *d); --d; }
1096
+ }
1097
+ }
1098
+
1099
+ if(a <= d) {
1100
+ c = b - 1;
1101
+ if((s = a - first) > (t = b - a)) { s = t; }
1102
+ for(e = first, f = b - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
1103
+ if((s = d - c) > (t = last - d - 1)) { s = t; }
1104
+ for(e = b, f = last - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
1105
+ first += (b - a), last -= (d - c);
1106
+ }
1107
+ *pa = first, *pb = last;
1108
+ }
1109
+
1110
+ static
1111
+ void
1112
+ tr_copy(int *ISA, const int *SA,
1113
+ int *first, int *a, int *b, int *last,
1114
+ int depth) {
1115
+ /* sort suffixes of middle partition
1116
+ by using sorted order of suffixes of left and right partition. */
1117
+ int *c, *d, *e;
1118
+ int s, v;
1119
+
1120
+ v = b - SA - 1;
1121
+ for(c = first, d = a - 1; c <= d; ++c) {
1122
+ if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
1123
+ *++d = s;
1124
+ ISA[s] = d - SA;
1125
+ }
1126
+ }
1127
+ for(c = last - 1, e = d + 1, d = b; e < d; --c) {
1128
+ if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
1129
+ *--d = s;
1130
+ ISA[s] = d - SA;
1131
+ }
1132
+ }
1133
+ }
1134
+
1135
+ static
1136
+ void
1137
+ tr_partialcopy(int *ISA, const int *SA,
1138
+ int *first, int *a, int *b, int *last,
1139
+ int depth) {
1140
+ int *c, *d, *e;
1141
+ int s, v;
1142
+ int rank, lastrank, newrank = -1;
1143
+
1144
+ v = b - SA - 1;
1145
+ lastrank = -1;
1146
+ for(c = first, d = a - 1; c <= d; ++c) {
1147
+ if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
1148
+ *++d = s;
1149
+ rank = ISA[s + depth];
1150
+ if(lastrank != rank) { lastrank = rank; newrank = d - SA; }
1151
+ ISA[s] = newrank;
1152
+ }
1153
+ }
1154
+
1155
+ lastrank = -1;
1156
+ for(e = d; first <= e; --e) {
1157
+ rank = ISA[*e];
1158
+ if(lastrank != rank) { lastrank = rank; newrank = e - SA; }
1159
+ if(newrank != rank) { ISA[*e] = newrank; }
1160
+ }
1161
+
1162
+ lastrank = -1;
1163
+ for(c = last - 1, e = d + 1, d = b; e < d; --c) {
1164
+ if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
1165
+ *--d = s;
1166
+ rank = ISA[s + depth];
1167
+ if(lastrank != rank) { lastrank = rank; newrank = d - SA; }
1168
+ ISA[s] = newrank;
1169
+ }
1170
+ }
1171
+ }
1172
+
1173
+ static
1174
+ void
1175
+ tr_introsort(int *ISA, const int *ISAd,
1176
+ int *SA, int *first, int *last,
1177
+ trbudget_t *budget) {
1178
+ #define STACK_SIZE TR_STACKSIZE
1179
+ struct { const int *a; int *b, *c; int d, e; }stack[STACK_SIZE];
1180
+ int *a, *b, *c;
1181
+ int t;
1182
+ int v, x = 0;
1183
+ int incr = ISAd - ISA;
1184
+ int limit, next;
1185
+ int ssize, trlink = -1;
1186
+
1187
+ for(ssize = 0, limit = tr_ilg(last - first);;) {
1188
+
1189
+ if(limit < 0) {
1190
+ if(limit == -1) {
1191
+ /* tandem repeat partition */
1192
+ tr_partition(ISAd - incr, first, first, last, &a, &b, last - SA - 1);
1193
+
1194
+ /* update ranks */
1195
+ if(a < last) {
1196
+ for(c = first, v = a - SA - 1; c < a; ++c) { ISA[*c] = v; }
1197
+ }
1198
+ if(b < last) {
1199
+ for(c = a, v = b - SA - 1; c < b; ++c) { ISA[*c] = v; }
1200
+ }
1201
+
1202
+ /* push */
1203
+ if(1 < (b - a)) {
1204
+ STACK_PUSH5(NULL, a, b, 0, 0);
1205
+ STACK_PUSH5(ISAd - incr, first, last, -2, trlink);
1206
+ trlink = ssize - 2;
1207
+ }
1208
+ if((a - first) <= (last - b)) {
1209
+ if(1 < (a - first)) {
1210
+ STACK_PUSH5(ISAd, b, last, tr_ilg(last - b), trlink);
1211
+ last = a, limit = tr_ilg(a - first);
1212
+ } else if(1 < (last - b)) {
1213
+ first = b, limit = tr_ilg(last - b);
1214
+ } else {
1215
+ STACK_POP5(ISAd, first, last, limit, trlink);
1216
+ }
1217
+ } else {
1218
+ if(1 < (last - b)) {
1219
+ STACK_PUSH5(ISAd, first, a, tr_ilg(a - first), trlink);
1220
+ first = b, limit = tr_ilg(last - b);
1221
+ } else if(1 < (a - first)) {
1222
+ last = a, limit = tr_ilg(a - first);
1223
+ } else {
1224
+ STACK_POP5(ISAd, first, last, limit, trlink);
1225
+ }
1226
+ }
1227
+ } else if(limit == -2) {
1228
+ /* tandem repeat copy */
1229
+ a = stack[--ssize].b, b = stack[ssize].c;
1230
+ if(stack[ssize].d == 0) {
1231
+ tr_copy(ISA, SA, first, a, b, last, ISAd - ISA);
1232
+ } else {
1233
+ if(0 <= trlink) { stack[trlink].d = -1; }
1234
+ tr_partialcopy(ISA, SA, first, a, b, last, ISAd - ISA);
1235
+ }
1236
+ STACK_POP5(ISAd, first, last, limit, trlink);
1237
+ } else {
1238
+ /* sorted partition */
1239
+ if(0 <= *first) {
1240
+ a = first;
1241
+ do { ISA[*a] = a - SA; } while((++a < last) && (0 <= *a));
1242
+ first = a;
1243
+ }
1244
+ if(first < last) {
1245
+ a = first; do { *a = ~*a; } while(*++a < 0);
1246
+ next = (ISA[*a] != ISAd[*a]) ? tr_ilg(a - first + 1) : -1;
1247
+ if(++a < last) { for(b = first, v = a - SA - 1; b < a; ++b) { ISA[*b] = v; } }
1248
+
1249
+ /* push */
1250
+ if(trbudget_check(budget, a - first)) {
1251
+ if((a - first) <= (last - a)) {
1252
+ STACK_PUSH5(ISAd, a, last, -3, trlink);
1253
+ ISAd += incr, last = a, limit = next;
1254
+ } else {
1255
+ if(1 < (last - a)) {
1256
+ STACK_PUSH5(ISAd + incr, first, a, next, trlink);
1257
+ first = a, limit = -3;
1258
+ } else {
1259
+ ISAd += incr, last = a, limit = next;
1260
+ }
1261
+ }
1262
+ } else {
1263
+ if(0 <= trlink) { stack[trlink].d = -1; }
1264
+ if(1 < (last - a)) {
1265
+ first = a, limit = -3;
1266
+ } else {
1267
+ STACK_POP5(ISAd, first, last, limit, trlink);
1268
+ }
1269
+ }
1270
+ } else {
1271
+ STACK_POP5(ISAd, first, last, limit, trlink);
1272
+ }
1273
+ }
1274
+ continue;
1275
+ }
1276
+
1277
+ if((last - first) <= TR_INSERTIONSORT_THRESHOLD) {
1278
+ tr_insertionsort(ISAd, first, last);
1279
+ limit = -3;
1280
+ continue;
1281
+ }
1282
+
1283
+ if(limit-- == 0) {
1284
+ tr_heapsort(ISAd, first, last - first);
1285
+ for(a = last - 1; first < a; a = b) {
1286
+ for(x = ISAd[*a], b = a - 1; (first <= b) && (ISAd[*b] == x); --b) { *b = ~*b; }
1287
+ }
1288
+ limit = -3;
1289
+ continue;
1290
+ }
1291
+
1292
+ /* choose pivot */
1293
+ a = tr_pivot(ISAd, first, last);
1294
+ SWAP(*first, *a);
1295
+ v = ISAd[*first];
1296
+
1297
+ /* partition */
1298
+ tr_partition(ISAd, first, first + 1, last, &a, &b, v);
1299
+ if((last - first) != (b - a)) {
1300
+ next = (ISA[*a] != v) ? tr_ilg(b - a) : -1;
1301
+
1302
+ /* update ranks */
1303
+ for(c = first, v = a - SA - 1; c < a; ++c) { ISA[*c] = v; }
1304
+ if(b < last) { for(c = a, v = b - SA - 1; c < b; ++c) { ISA[*c] = v; } }
1305
+
1306
+ /* push */
1307
+ if((1 < (b - a)) && (trbudget_check(budget, b - a))) {
1308
+ if((a - first) <= (last - b)) {
1309
+ if((last - b) <= (b - a)) {
1310
+ if(1 < (a - first)) {
1311
+ STACK_PUSH5(ISAd + incr, a, b, next, trlink);
1312
+ STACK_PUSH5(ISAd, b, last, limit, trlink);
1313
+ last = a;
1314
+ } else if(1 < (last - b)) {
1315
+ STACK_PUSH5(ISAd + incr, a, b, next, trlink);
1316
+ first = b;
1317
+ } else {
1318
+ ISAd += incr, first = a, last = b, limit = next;
1319
+ }
1320
+ } else if((a - first) <= (b - a)) {
1321
+ if(1 < (a - first)) {
1322
+ STACK_PUSH5(ISAd, b, last, limit, trlink);
1323
+ STACK_PUSH5(ISAd + incr, a, b, next, trlink);
1324
+ last = a;
1325
+ } else {
1326
+ STACK_PUSH5(ISAd, b, last, limit, trlink);
1327
+ ISAd += incr, first = a, last = b, limit = next;
1328
+ }
1329
+ } else {
1330
+ STACK_PUSH5(ISAd, b, last, limit, trlink);
1331
+ STACK_PUSH5(ISAd, first, a, limit, trlink);
1332
+ ISAd += incr, first = a, last = b, limit = next;
1333
+ }
1334
+ } else {
1335
+ if((a - first) <= (b - a)) {
1336
+ if(1 < (last - b)) {
1337
+ STACK_PUSH5(ISAd + incr, a, b, next, trlink);
1338
+ STACK_PUSH5(ISAd, first, a, limit, trlink);
1339
+ first = b;
1340
+ } else if(1 < (a - first)) {
1341
+ STACK_PUSH5(ISAd + incr, a, b, next, trlink);
1342
+ last = a;
1343
+ } else {
1344
+ ISAd += incr, first = a, last = b, limit = next;
1345
+ }
1346
+ } else if((last - b) <= (b - a)) {
1347
+ if(1 < (last - b)) {
1348
+ STACK_PUSH5(ISAd, first, a, limit, trlink);
1349
+ STACK_PUSH5(ISAd + incr, a, b, next, trlink);
1350
+ first = b;
1351
+ } else {
1352
+ STACK_PUSH5(ISAd, first, a, limit, trlink);
1353
+ ISAd += incr, first = a, last = b, limit = next;
1354
+ }
1355
+ } else {
1356
+ STACK_PUSH5(ISAd, first, a, limit, trlink);
1357
+ STACK_PUSH5(ISAd, b, last, limit, trlink);
1358
+ ISAd += incr, first = a, last = b, limit = next;
1359
+ }
1360
+ }
1361
+ } else {
1362
+ if((1 < (b - a)) && (0 <= trlink)) { stack[trlink].d = -1; }
1363
+ if((a - first) <= (last - b)) {
1364
+ if(1 < (a - first)) {
1365
+ STACK_PUSH5(ISAd, b, last, limit, trlink);
1366
+ last = a;
1367
+ } else if(1 < (last - b)) {
1368
+ first = b;
1369
+ } else {
1370
+ STACK_POP5(ISAd, first, last, limit, trlink);
1371
+ }
1372
+ } else {
1373
+ if(1 < (last - b)) {
1374
+ STACK_PUSH5(ISAd, first, a, limit, trlink);
1375
+ first = b;
1376
+ } else if(1 < (a - first)) {
1377
+ last = a;
1378
+ } else {
1379
+ STACK_POP5(ISAd, first, last, limit, trlink);
1380
+ }
1381
+ }
1382
+ }
1383
+ } else {
1384
+ if(trbudget_check(budget, last - first)) {
1385
+ limit = tr_ilg(last - first), ISAd += incr;
1386
+ } else {
1387
+ if(0 <= trlink) { stack[trlink].d = -1; }
1388
+ STACK_POP5(ISAd, first, last, limit, trlink);
1389
+ }
1390
+ }
1391
+ }
1392
+ #undef STACK_SIZE
1393
+ }
1394
+
1395
+
1396
+
1397
+ /*---------------------------------------------------------------------------*/
1398
+
1399
+ /* Tandem repeat sort */
1400
+ static
1401
+ void
1402
+ trsort(int *ISA, int *SA, int n, int depth) {
1403
+ int *ISAd;
1404
+ int *first, *last;
1405
+ trbudget_t budget;
1406
+ int t, skip, unsorted;
1407
+
1408
+ trbudget_init(&budget, tr_ilg(n) * 2 / 3, n);
1409
+ /* trbudget_init(&budget, tr_ilg(n) * 3 / 4, n); */
1410
+ for(ISAd = ISA + depth; -n < *SA; ISAd += ISAd - ISA) {
1411
+ first = SA;
1412
+ skip = 0;
1413
+ unsorted = 0;
1414
+ do {
1415
+ if((t = *first) < 0) { first -= t; skip += t; }
1416
+ else {
1417
+ if(skip != 0) { *(first + skip) = skip; skip = 0; }
1418
+ last = SA + ISA[t] + 1;
1419
+ if(1 < (last - first)) {
1420
+ budget.count = 0;
1421
+ tr_introsort(ISA, ISAd, SA, first, last, &budget);
1422
+ if(budget.count != 0) { unsorted += budget.count; }
1423
+ else { skip = first - last; }
1424
+ } else if((last - first) == 1) {
1425
+ skip = -1;
1426
+ }
1427
+ first = last;
1428
+ }
1429
+ } while(first < (SA + n));
1430
+ if(skip != 0) { *(first + skip) = skip; }
1431
+ if(unsorted == 0) { break; }
1432
+ }
1433
+ }
1434
+
1435
+
1436
+ /*---------------------------------------------------------------------------*/
1437
+
1438
+ /* Sorts suffixes of type B*. */
1439
+ static
1440
+ int
1441
+ sort_typeBstar(const unsigned char *T, int *SA,
1442
+ int *bucket_A, int *bucket_B,
1443
+ int n, int openMP) {
1444
+ int *PAb, *ISAb, *buf;
1445
+ #ifdef LIBBSC_OPENMP
1446
+ int *curbuf;
1447
+ int l;
1448
+ #endif
1449
+ int i, j, k, t, m, bufsize;
1450
+ int c0, c1;
1451
+ #ifdef LIBBSC_OPENMP
1452
+ int d0, d1;
1453
+ #endif
1454
+ (void)openMP;
1455
+
1456
+ /* Initialize bucket arrays. */
1457
+ for(i = 0; i < BUCKET_A_SIZE; ++i) { bucket_A[i] = 0; }
1458
+ for(i = 0; i < BUCKET_B_SIZE; ++i) { bucket_B[i] = 0; }
1459
+
1460
+ /* Count the number of occurrences of the first one or two characters of each
1461
+ type A, B and B* suffix. Moreover, store the beginning position of all
1462
+ type B* suffixes into the array SA. */
1463
+ for(i = n - 1, m = n, c0 = T[n - 1]; 0 <= i;) {
1464
+ /* type A suffix. */
1465
+ do { ++BUCKET_A(c1 = c0); } while((0 <= --i) && ((c0 = T[i]) >= c1));
1466
+ if(0 <= i) {
1467
+ /* type B* suffix. */
1468
+ ++BUCKET_BSTAR(c0, c1);
1469
+ SA[--m] = i;
1470
+ /* type B suffix. */
1471
+ for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) {
1472
+ ++BUCKET_B(c0, c1);
1473
+ }
1474
+ }
1475
+ }
1476
+ m = n - m;
1477
+ /*
1478
+ note:
1479
+ A type B* suffix is lexicographically smaller than a type B suffix that
1480
+ begins with the same first two characters.
1481
+ */
1482
+
1483
+ /* Calculate the index of start/end point of each bucket. */
1484
+ for(c0 = 0, i = 0, j = 0; c0 < ALPHABET_SIZE; ++c0) {
1485
+ t = i + BUCKET_A(c0);
1486
+ BUCKET_A(c0) = i + j; /* start point */
1487
+ i = t + BUCKET_B(c0, c0);
1488
+ for(c1 = c0 + 1; c1 < ALPHABET_SIZE; ++c1) {
1489
+ j += BUCKET_BSTAR(c0, c1);
1490
+ BUCKET_BSTAR(c0, c1) = j; /* end point */
1491
+ i += BUCKET_B(c0, c1);
1492
+ }
1493
+ }
1494
+
1495
+ if(0 < m) {
1496
+ /* Sort the type B* suffixes by their first two characters. */
1497
+ PAb = SA + n - m; ISAb = SA + m;
1498
+ for(i = m - 2; 0 <= i; --i) {
1499
+ t = PAb[i], c0 = T[t], c1 = T[t + 1];
1500
+ SA[--BUCKET_BSTAR(c0, c1)] = i;
1501
+ }
1502
+ t = PAb[m - 1], c0 = T[t], c1 = T[t + 1];
1503
+ SA[--BUCKET_BSTAR(c0, c1)] = m - 1;
1504
+
1505
+ /* Sort the type B* substrings using sssort. */
1506
+ #ifdef LIBBSC_OPENMP
1507
+ if (openMP)
1508
+ {
1509
+ buf = SA + m;
1510
+ c0 = ALPHABET_SIZE - 2, c1 = ALPHABET_SIZE - 1, j = m;
1511
+ #pragma omp parallel default(shared) private(bufsize, curbuf, k, l, d0, d1)
1512
+ {
1513
+ bufsize = (n - (2 * m)) / omp_get_num_threads();
1514
+ curbuf = buf + omp_get_thread_num() * bufsize;
1515
+ k = 0;
1516
+ for(;;) {
1517
+ #pragma omp critical(sssort_lock)
1518
+ {
1519
+ if(0 < (l = j)) {
1520
+ d0 = c0, d1 = c1;
1521
+ do {
1522
+ k = BUCKET_BSTAR(d0, d1);
1523
+ if(--d1 <= d0) {
1524
+ d1 = ALPHABET_SIZE - 1;
1525
+ if(--d0 < 0) { break; }
1526
+ }
1527
+ } while(((l - k) <= 1) && (0 < (l = k)));
1528
+ c0 = d0, c1 = d1, j = k;
1529
+ }
1530
+ }
1531
+ if(l == 0) { break; }
1532
+ sssort(T, PAb, SA + k, SA + l,
1533
+ curbuf, bufsize, 2, n, *(SA + k) == (m - 1));
1534
+ }
1535
+ }
1536
+ }
1537
+ else
1538
+ {
1539
+ buf = SA + m, bufsize = n - (2 * m);
1540
+ for(c0 = ALPHABET_SIZE - 2, j = m; 0 < j; --c0) {
1541
+ for(c1 = ALPHABET_SIZE - 1; c0 < c1; j = i, --c1) {
1542
+ i = BUCKET_BSTAR(c0, c1);
1543
+ if(1 < (j - i)) {
1544
+ sssort(T, PAb, SA + i, SA + j,
1545
+ buf, bufsize, 2, n, *(SA + i) == (m - 1));
1546
+ }
1547
+ }
1548
+ }
1549
+ }
1550
+ #else
1551
+ buf = SA + m, bufsize = n - (2 * m);
1552
+ for(c0 = ALPHABET_SIZE - 2, j = m; 0 < j; --c0) {
1553
+ for(c1 = ALPHABET_SIZE - 1; c0 < c1; j = i, --c1) {
1554
+ i = BUCKET_BSTAR(c0, c1);
1555
+ if(1 < (j - i)) {
1556
+ sssort(T, PAb, SA + i, SA + j,
1557
+ buf, bufsize, 2, n, *(SA + i) == (m - 1));
1558
+ }
1559
+ }
1560
+ }
1561
+ #endif
1562
+
1563
+ /* Compute ranks of type B* substrings. */
1564
+ for(i = m - 1; 0 <= i; --i) {
1565
+ if(0 <= SA[i]) {
1566
+ j = i;
1567
+ do { ISAb[SA[i]] = i; } while((0 <= --i) && (0 <= SA[i]));
1568
+ SA[i + 1] = i - j;
1569
+ if(i <= 0) { break; }
1570
+ }
1571
+ j = i;
1572
+ do { ISAb[SA[i] = ~SA[i]] = j; } while(SA[--i] < 0);
1573
+ ISAb[SA[i]] = j;
1574
+ }
1575
+
1576
+ /* Construct the inverse suffix array of type B* suffixes using trsort. */
1577
+ trsort(ISAb, SA, m, 1);
1578
+
1579
+ /* Set the sorted order of tyoe B* suffixes. */
1580
+ for(i = n - 1, j = m, c0 = T[n - 1]; 0 <= i;) {
1581
+ for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) >= c1); --i, c1 = c0) { }
1582
+ if(0 <= i) {
1583
+ t = i;
1584
+ for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) { }
1585
+ SA[ISAb[--j]] = ((t == 0) || (1 < (t - i))) ? t : ~t;
1586
+ }
1587
+ }
1588
+
1589
+ /* Calculate the index of start/end point of each bucket. */
1590
+ BUCKET_B(ALPHABET_SIZE - 1, ALPHABET_SIZE - 1) = n; /* end point */
1591
+ for(c0 = ALPHABET_SIZE - 2, k = m - 1; 0 <= c0; --c0) {
1592
+ i = BUCKET_A(c0 + 1) - 1;
1593
+ for(c1 = ALPHABET_SIZE - 1; c0 < c1; --c1) {
1594
+ t = i - BUCKET_B(c0, c1);
1595
+ BUCKET_B(c0, c1) = i; /* end point */
1596
+
1597
+ /* Move all type B* suffixes to the correct position. */
1598
+ for(i = t, j = BUCKET_BSTAR(c0, c1);
1599
+ j <= k;
1600
+ --i, --k) { SA[i] = SA[k]; }
1601
+ }
1602
+ BUCKET_BSTAR(c0, c0 + 1) = i - BUCKET_B(c0, c0) + 1; /* start point */
1603
+ BUCKET_B(c0, c0) = i; /* end point */
1604
+ }
1605
+ }
1606
+
1607
+ return m;
1608
+ }
1609
+
1610
+ /* Constructs the suffix array by using the sorted order of type B* suffixes. */
1611
+ static
1612
+ void
1613
+ construct_SA(const unsigned char *T, int *SA,
1614
+ int *bucket_A, int *bucket_B,
1615
+ int n, int m) {
1616
+ int *i, *j, *k;
1617
+ int s;
1618
+ int c0, c1, c2;
1619
+
1620
+ if(0 < m) {
1621
+ /* Construct the sorted order of type B suffixes by using
1622
+ the sorted order of type B* suffixes. */
1623
+ for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) {
1624
+ /* Scan the suffix array from right to left. */
1625
+ for(i = SA + BUCKET_BSTAR(c1, c1 + 1),
1626
+ j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1;
1627
+ i <= j;
1628
+ --j) {
1629
+ if(0 < (s = *j)) {
1630
+ assert(T[s] == c1);
1631
+ assert(((s + 1) < n) && (T[s] <= T[s + 1]));
1632
+ assert(T[s - 1] <= T[s]);
1633
+ *j = ~s;
1634
+ c0 = T[--s];
1635
+ if((0 < s) && (T[s - 1] > c0)) { s = ~s; }
1636
+ if(c0 != c2) {
1637
+ if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; }
1638
+ k = SA + BUCKET_B(c2 = c0, c1);
1639
+ }
1640
+ assert(k < j);
1641
+ *k-- = s;
1642
+ } else {
1643
+ assert(((s == 0) && (T[s] == c1)) || (s < 0));
1644
+ *j = ~s;
1645
+ }
1646
+ }
1647
+ }
1648
+ }
1649
+
1650
+ /* Construct the suffix array by using
1651
+ the sorted order of type B suffixes. */
1652
+ k = SA + BUCKET_A(c2 = T[n - 1]);
1653
+ *k++ = (T[n - 2] < c2) ? ~(n - 1) : (n - 1);
1654
+ /* Scan the suffix array from left to right. */
1655
+ for(i = SA, j = SA + n; i < j; ++i) {
1656
+ if(0 < (s = *i)) {
1657
+ assert(T[s - 1] >= T[s]);
1658
+ c0 = T[--s];
1659
+ if((s == 0) || (T[s - 1] < c0)) { s = ~s; }
1660
+ if(c0 != c2) {
1661
+ BUCKET_A(c2) = k - SA;
1662
+ k = SA + BUCKET_A(c2 = c0);
1663
+ }
1664
+ assert(i < k);
1665
+ *k++ = s;
1666
+ } else {
1667
+ assert(s < 0);
1668
+ *i = ~s;
1669
+ }
1670
+ }
1671
+ }
1672
+
1673
+ /* Constructs the burrows-wheeler transformed string directly
1674
+ by using the sorted order of type B* suffixes. */
1675
+ static
1676
+ int
1677
+ construct_BWT(const unsigned char *T, int *SA,
1678
+ int *bucket_A, int *bucket_B,
1679
+ int n, int m) {
1680
+ int *i, *j, *k, *orig;
1681
+ int s;
1682
+ int c0, c1, c2;
1683
+
1684
+ if(0 < m) {
1685
+ /* Construct the sorted order of type B suffixes by using
1686
+ the sorted order of type B* suffixes. */
1687
+ for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) {
1688
+ /* Scan the suffix array from right to left. */
1689
+ for(i = SA + BUCKET_BSTAR(c1, c1 + 1),
1690
+ j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1;
1691
+ i <= j;
1692
+ --j) {
1693
+ if(0 < (s = *j)) {
1694
+ assert(T[s] == c1);
1695
+ assert(((s + 1) < n) && (T[s] <= T[s + 1]));
1696
+ assert(T[s - 1] <= T[s]);
1697
+ c0 = T[--s];
1698
+ *j = ~((int)c0);
1699
+ if((0 < s) && (T[s - 1] > c0)) { s = ~s; }
1700
+ if(c0 != c2) {
1701
+ if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; }
1702
+ k = SA + BUCKET_B(c2 = c0, c1);
1703
+ }
1704
+ assert(k < j);
1705
+ *k-- = s;
1706
+ } else if(s != 0) {
1707
+ *j = ~s;
1708
+ #ifndef NDEBUG
1709
+ } else {
1710
+ assert(T[s] == c1);
1711
+ #endif
1712
+ }
1713
+ }
1714
+ }
1715
+ }
1716
+
1717
+ /* Construct the BWTed string by using
1718
+ the sorted order of type B suffixes. */
1719
+ k = SA + BUCKET_A(c2 = T[n - 1]);
1720
+ *k++ = (T[n - 2] < c2) ? ~((int)T[n - 2]) : (n - 1);
1721
+ /* Scan the suffix array from left to right. */
1722
+ for(i = SA, j = SA + n, orig = SA; i < j; ++i) {
1723
+ if(0 < (s = *i)) {
1724
+ assert(T[s - 1] >= T[s]);
1725
+ c0 = T[--s];
1726
+ *i = c0;
1727
+ if((0 < s) && (T[s - 1] < c0)) { s = ~((int)T[s - 1]); }
1728
+ if(c0 != c2) {
1729
+ BUCKET_A(c2) = k - SA;
1730
+ k = SA + BUCKET_A(c2 = c0);
1731
+ }
1732
+ assert(i < k);
1733
+ *k++ = s;
1734
+ } else if(s != 0) {
1735
+ *i = ~s;
1736
+ } else {
1737
+ orig = i;
1738
+ }
1739
+ }
1740
+
1741
+ return orig - SA;
1742
+ }
1743
+
1744
+ /* Constructs the burrows-wheeler transformed string directly
1745
+ by using the sorted order of type B* suffixes. */
1746
+ static
1747
+ int
1748
+ construct_BWT_indexes(const unsigned char *T, int *SA,
1749
+ int *bucket_A, int *bucket_B,
1750
+ int n, int m,
1751
+ unsigned char * num_indexes, int * indexes) {
1752
+ int *i, *j, *k, *orig;
1753
+ int s;
1754
+ int c0, c1, c2;
1755
+
1756
+ int mod = n / 8;
1757
+ {
1758
+ mod |= mod >> 1; mod |= mod >> 2;
1759
+ mod |= mod >> 4; mod |= mod >> 8;
1760
+ mod |= mod >> 16; mod >>= 1;
1761
+
1762
+ *num_indexes = (unsigned char)((n - 1) / (mod + 1));
1763
+ }
1764
+
1765
+ if(0 < m) {
1766
+ /* Construct the sorted order of type B suffixes by using
1767
+ the sorted order of type B* suffixes. */
1768
+ for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) {
1769
+ /* Scan the suffix array from right to left. */
1770
+ for(i = SA + BUCKET_BSTAR(c1, c1 + 1),
1771
+ j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1;
1772
+ i <= j;
1773
+ --j) {
1774
+ if(0 < (s = *j)) {
1775
+ assert(T[s] == c1);
1776
+ assert(((s + 1) < n) && (T[s] <= T[s + 1]));
1777
+ assert(T[s - 1] <= T[s]);
1778
+
1779
+ if ((s & mod) == 0) indexes[s / (mod + 1) - 1] = j - SA;
1780
+
1781
+ c0 = T[--s];
1782
+ *j = ~((int)c0);
1783
+ if((0 < s) && (T[s - 1] > c0)) { s = ~s; }
1784
+ if(c0 != c2) {
1785
+ if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; }
1786
+ k = SA + BUCKET_B(c2 = c0, c1);
1787
+ }
1788
+ assert(k < j);
1789
+ *k-- = s;
1790
+ } else if(s != 0) {
1791
+ *j = ~s;
1792
+ #ifndef NDEBUG
1793
+ } else {
1794
+ assert(T[s] == c1);
1795
+ #endif
1796
+ }
1797
+ }
1798
+ }
1799
+ }
1800
+
1801
+ /* Construct the BWTed string by using
1802
+ the sorted order of type B suffixes. */
1803
+ k = SA + BUCKET_A(c2 = T[n - 1]);
1804
+ if (T[n - 2] < c2) {
1805
+ if (((n - 1) & mod) == 0) indexes[(n - 1) / (mod + 1) - 1] = k - SA;
1806
+ *k++ = ~((int)T[n - 2]);
1807
+ }
1808
+ else {
1809
+ *k++ = n - 1;
1810
+ }
1811
+
1812
+ /* Scan the suffix array from left to right. */
1813
+ for(i = SA, j = SA + n, orig = SA; i < j; ++i) {
1814
+ if(0 < (s = *i)) {
1815
+ assert(T[s - 1] >= T[s]);
1816
+
1817
+ if ((s & mod) == 0) indexes[s / (mod + 1) - 1] = i - SA;
1818
+
1819
+ c0 = T[--s];
1820
+ *i = c0;
1821
+ if(c0 != c2) {
1822
+ BUCKET_A(c2) = k - SA;
1823
+ k = SA + BUCKET_A(c2 = c0);
1824
+ }
1825
+ assert(i < k);
1826
+ if((0 < s) && (T[s - 1] < c0)) {
1827
+ if ((s & mod) == 0) indexes[s / (mod + 1) - 1] = k - SA;
1828
+ *k++ = ~((int)T[s - 1]);
1829
+ } else
1830
+ *k++ = s;
1831
+ } else if(s != 0) {
1832
+ *i = ~s;
1833
+ } else {
1834
+ orig = i;
1835
+ }
1836
+ }
1837
+
1838
+ return orig - SA;
1839
+ }
1840
+
1841
+
1842
+ /*---------------------------------------------------------------------------*/
1843
+
1844
+ /*- Function -*/
1845
+
1846
+ int
1847
+ divsufsort(const unsigned char *T, int *SA, int n, int openMP) {
1848
+ int *bucket_A, *bucket_B;
1849
+ int m;
1850
+ int err = 0;
1851
+
1852
+ /* Check arguments. */
1853
+ if((T == NULL) || (SA == NULL) || (n < 0)) { return -1; }
1854
+ else if(n == 0) { return 0; }
1855
+ else if(n == 1) { SA[0] = 0; return 0; }
1856
+ else if(n == 2) { m = (T[0] < T[1]); SA[m ^ 1] = 0, SA[m] = 1; return 0; }
1857
+
1858
+ bucket_A = (int *)malloc(BUCKET_A_SIZE * sizeof(int));
1859
+ bucket_B = (int *)malloc(BUCKET_B_SIZE * sizeof(int));
1860
+
1861
+ /* Suffixsort. */
1862
+ if((bucket_A != NULL) && (bucket_B != NULL)) {
1863
+ m = sort_typeBstar(T, SA, bucket_A, bucket_B, n, openMP);
1864
+ construct_SA(T, SA, bucket_A, bucket_B, n, m);
1865
+ } else {
1866
+ err = -2;
1867
+ }
1868
+
1869
+ free(bucket_B);
1870
+ free(bucket_A);
1871
+
1872
+ return err;
1873
+ }
1874
+
1875
+ int
1876
+ divbwt(const unsigned char *T, unsigned char *U, int *A, int n, unsigned char * num_indexes, int * indexes, int openMP) {
1877
+ int *B;
1878
+ int *bucket_A, *bucket_B;
1879
+ int m, pidx, i;
1880
+
1881
+ /* Check arguments. */
1882
+ if((T == NULL) || (U == NULL) || (n < 0)) { return -1; }
1883
+ else if(n <= 1) { if(n == 1) { U[0] = T[0]; } return n; }
1884
+
1885
+ if((B = A) == NULL) { B = (int *)malloc((size_t)(n + 1) * sizeof(int)); }
1886
+ bucket_A = (int *)malloc(BUCKET_A_SIZE * sizeof(int));
1887
+ bucket_B = (int *)malloc(BUCKET_B_SIZE * sizeof(int));
1888
+
1889
+ /* Burrows-Wheeler Transform. */
1890
+ if((B != NULL) && (bucket_A != NULL) && (bucket_B != NULL)) {
1891
+ m = sort_typeBstar(T, B, bucket_A, bucket_B, n, openMP);
1892
+
1893
+ if (num_indexes == NULL || indexes == NULL) {
1894
+ pidx = construct_BWT(T, B, bucket_A, bucket_B, n, m);
1895
+ } else {
1896
+ pidx = construct_BWT_indexes(T, B, bucket_A, bucket_B, n, m, num_indexes, indexes);
1897
+ }
1898
+
1899
+ /* Copy to output string. */
1900
+ U[0] = T[n - 1];
1901
+ for(i = 0; i < pidx; ++i) { U[i + 1] = (unsigned char)B[i]; }
1902
+ for(i += 1; i < n; ++i) { U[i] = (unsigned char)B[i]; }
1903
+ pidx += 1;
1904
+ } else {
1905
+ pidx = -2;
1906
+ }
1907
+
1908
+ free(bucket_B);
1909
+ free(bucket_A);
1910
+ if(A == NULL) { free(B); }
1911
+
1912
+ return pidx;
1913
+ }