bsdiff 0.0.2 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,1094 @@
1
+
2
+ /*-------------------------------------------------------------*/
3
+ /*--- Block sorting machinery ---*/
4
+ /*--- blocksort.c ---*/
5
+ /*-------------------------------------------------------------*/
6
+
7
+ /* ------------------------------------------------------------------
8
+ This file is part of bzip2/libbzip2, a program and library for
9
+ lossless, block-sorting data compression.
10
+
11
+ bzip2/libbzip2 version 1.0.8 of 13 July 2019
12
+ Copyright (C) 1996-2019 Julian Seward <jseward@acm.org>
13
+
14
+ Please read the WARNING, DISCLAIMER and PATENTS sections in the
15
+ README file.
16
+
17
+ This program is released under the terms of the license contained
18
+ in the file LICENSE.
19
+ ------------------------------------------------------------------ */
20
+
21
+
22
+ #include "bzlib_private.h"
23
+
24
+ /*---------------------------------------------*/
25
+ /*--- Fallback O(N log(N)^2) sorting ---*/
26
+ /*--- algorithm, for repetitive blocks ---*/
27
+ /*---------------------------------------------*/
28
+
29
+ /*---------------------------------------------*/
30
+ static
31
+ __inline__
32
+ void fallbackSimpleSort ( UInt32* fmap,
33
+ UInt32* eclass,
34
+ Int32 lo,
35
+ Int32 hi )
36
+ {
37
+ Int32 i, j, tmp;
38
+ UInt32 ec_tmp;
39
+
40
+ if (lo == hi) return;
41
+
42
+ if (hi - lo > 3) {
43
+ for ( i = hi-4; i >= lo; i-- ) {
44
+ tmp = fmap[i];
45
+ ec_tmp = eclass[tmp];
46
+ for ( j = i+4; j <= hi && ec_tmp > eclass[fmap[j]]; j += 4 )
47
+ fmap[j-4] = fmap[j];
48
+ fmap[j-4] = tmp;
49
+ }
50
+ }
51
+
52
+ for ( i = hi-1; i >= lo; i-- ) {
53
+ tmp = fmap[i];
54
+ ec_tmp = eclass[tmp];
55
+ for ( j = i+1; j <= hi && ec_tmp > eclass[fmap[j]]; j++ )
56
+ fmap[j-1] = fmap[j];
57
+ fmap[j-1] = tmp;
58
+ }
59
+ }
60
+
61
+
62
+ /*---------------------------------------------*/
63
+ #define fswap(zz1, zz2) \
64
+ { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; }
65
+
66
+ #define fvswap(zzp1, zzp2, zzn) \
67
+ { \
68
+ Int32 yyp1 = (zzp1); \
69
+ Int32 yyp2 = (zzp2); \
70
+ Int32 yyn = (zzn); \
71
+ while (yyn > 0) { \
72
+ fswap(fmap[yyp1], fmap[yyp2]); \
73
+ yyp1++; yyp2++; yyn--; \
74
+ } \
75
+ }
76
+
77
+
78
+ #define fmin(a,b) ((a) < (b)) ? (a) : (b)
79
+
80
+ #define fpush(lz,hz) { stackLo[sp] = lz; \
81
+ stackHi[sp] = hz; \
82
+ sp++; }
83
+
84
+ #define fpop(lz,hz) { sp--; \
85
+ lz = stackLo[sp]; \
86
+ hz = stackHi[sp]; }
87
+
88
+ #define FALLBACK_QSORT_SMALL_THRESH 10
89
+ #define FALLBACK_QSORT_STACK_SIZE 100
90
+
91
+
92
+ static
93
+ void fallbackQSort3 ( UInt32* fmap,
94
+ UInt32* eclass,
95
+ Int32 loSt,
96
+ Int32 hiSt )
97
+ {
98
+ Int32 unLo, unHi, ltLo, gtHi, n, m;
99
+ Int32 sp, lo, hi;
100
+ UInt32 med, r, r3;
101
+ Int32 stackLo[FALLBACK_QSORT_STACK_SIZE];
102
+ Int32 stackHi[FALLBACK_QSORT_STACK_SIZE];
103
+
104
+ r = 0;
105
+
106
+ sp = 0;
107
+ fpush ( loSt, hiSt );
108
+
109
+ while (sp > 0) {
110
+
111
+ AssertH ( sp < FALLBACK_QSORT_STACK_SIZE - 1, 1004 );
112
+
113
+ fpop ( lo, hi );
114
+ if (hi - lo < FALLBACK_QSORT_SMALL_THRESH) {
115
+ fallbackSimpleSort ( fmap, eclass, lo, hi );
116
+ continue;
117
+ }
118
+
119
+ /* Random partitioning. Median of 3 sometimes fails to
120
+ avoid bad cases. Median of 9 seems to help but
121
+ looks rather expensive. This too seems to work but
122
+ is cheaper. Guidance for the magic constants
123
+ 7621 and 32768 is taken from Sedgewick's algorithms
124
+ book, chapter 35.
125
+ */
126
+ r = ((r * 7621) + 1) % 32768;
127
+ r3 = r % 3;
128
+ if (r3 == 0) med = eclass[fmap[lo]]; else
129
+ if (r3 == 1) med = eclass[fmap[(lo+hi)>>1]]; else
130
+ med = eclass[fmap[hi]];
131
+
132
+ unLo = ltLo = lo;
133
+ unHi = gtHi = hi;
134
+
135
+ while (1) {
136
+ while (1) {
137
+ if (unLo > unHi) break;
138
+ n = (Int32)eclass[fmap[unLo]] - (Int32)med;
139
+ if (n == 0) {
140
+ fswap(fmap[unLo], fmap[ltLo]);
141
+ ltLo++; unLo++;
142
+ continue;
143
+ };
144
+ if (n > 0) break;
145
+ unLo++;
146
+ }
147
+ while (1) {
148
+ if (unLo > unHi) break;
149
+ n = (Int32)eclass[fmap[unHi]] - (Int32)med;
150
+ if (n == 0) {
151
+ fswap(fmap[unHi], fmap[gtHi]);
152
+ gtHi--; unHi--;
153
+ continue;
154
+ };
155
+ if (n < 0) break;
156
+ unHi--;
157
+ }
158
+ if (unLo > unHi) break;
159
+ fswap(fmap[unLo], fmap[unHi]); unLo++; unHi--;
160
+ }
161
+
162
+ AssertD ( unHi == unLo-1, "fallbackQSort3(2)" );
163
+
164
+ if (gtHi < ltLo) continue;
165
+
166
+ n = fmin(ltLo-lo, unLo-ltLo); fvswap(lo, unLo-n, n);
167
+ m = fmin(hi-gtHi, gtHi-unHi); fvswap(unLo, hi-m+1, m);
168
+
169
+ n = lo + unLo - ltLo - 1;
170
+ m = hi - (gtHi - unHi) + 1;
171
+
172
+ if (n - lo > hi - m) {
173
+ fpush ( lo, n );
174
+ fpush ( m, hi );
175
+ } else {
176
+ fpush ( m, hi );
177
+ fpush ( lo, n );
178
+ }
179
+ }
180
+ }
181
+
182
+ #undef fmin
183
+ #undef fpush
184
+ #undef fpop
185
+ #undef fswap
186
+ #undef fvswap
187
+ #undef FALLBACK_QSORT_SMALL_THRESH
188
+ #undef FALLBACK_QSORT_STACK_SIZE
189
+
190
+
191
+ /*---------------------------------------------*/
192
+ /* Pre:
193
+ nblock > 0
194
+ eclass exists for [0 .. nblock-1]
195
+ ((UChar*)eclass) [0 .. nblock-1] holds block
196
+ ptr exists for [0 .. nblock-1]
197
+
198
+ Post:
199
+ ((UChar*)eclass) [0 .. nblock-1] holds block
200
+ All other areas of eclass destroyed
201
+ fmap [0 .. nblock-1] holds sorted order
202
+ bhtab [ 0 .. 2+(nblock/32) ] destroyed
203
+ */
204
+
205
+ #define SET_BH(zz) bhtab[(zz) >> 5] |= ((UInt32)1 << ((zz) & 31))
206
+ #define CLEAR_BH(zz) bhtab[(zz) >> 5] &= ~((UInt32)1 << ((zz) & 31))
207
+ #define ISSET_BH(zz) (bhtab[(zz) >> 5] & ((UInt32)1 << ((zz) & 31)))
208
+ #define WORD_BH(zz) bhtab[(zz) >> 5]
209
+ #define UNALIGNED_BH(zz) ((zz) & 0x01f)
210
+
211
+ static
212
+ void fallbackSort ( UInt32* fmap,
213
+ UInt32* eclass,
214
+ UInt32* bhtab,
215
+ Int32 nblock,
216
+ Int32 verb )
217
+ {
218
+ Int32 ftab[257];
219
+ Int32 ftabCopy[256];
220
+ Int32 H, i, j, k, l, r, cc, cc1;
221
+ Int32 nNotDone;
222
+ Int32 nBhtab;
223
+ UChar* eclass8 = (UChar*)eclass;
224
+
225
+ /*--
226
+ Initial 1-char radix sort to generate
227
+ initial fmap and initial BH bits.
228
+ --*/
229
+ if (verb >= 4)
230
+ VPrintf0 ( " bucket sorting ...\n" );
231
+ for (i = 0; i < 257; i++) ftab[i] = 0;
232
+ for (i = 0; i < nblock; i++) ftab[eclass8[i]]++;
233
+ for (i = 0; i < 256; i++) ftabCopy[i] = ftab[i];
234
+ for (i = 1; i < 257; i++) ftab[i] += ftab[i-1];
235
+
236
+ for (i = 0; i < nblock; i++) {
237
+ j = eclass8[i];
238
+ k = ftab[j] - 1;
239
+ ftab[j] = k;
240
+ fmap[k] = i;
241
+ }
242
+
243
+ nBhtab = 2 + (nblock / 32);
244
+ for (i = 0; i < nBhtab; i++) bhtab[i] = 0;
245
+ for (i = 0; i < 256; i++) SET_BH(ftab[i]);
246
+
247
+ /*--
248
+ Inductively refine the buckets. Kind-of an
249
+ "exponential radix sort" (!), inspired by the
250
+ Manber-Myers suffix array construction algorithm.
251
+ --*/
252
+
253
+ /*-- set sentinel bits for block-end detection --*/
254
+ for (i = 0; i < 32; i++) {
255
+ SET_BH(nblock + 2*i);
256
+ CLEAR_BH(nblock + 2*i + 1);
257
+ }
258
+
259
+ /*-- the log(N) loop --*/
260
+ H = 1;
261
+ while (1) {
262
+
263
+ if (verb >= 4)
264
+ VPrintf1 ( " depth %6d has ", H );
265
+
266
+ j = 0;
267
+ for (i = 0; i < nblock; i++) {
268
+ if (ISSET_BH(i)) j = i;
269
+ k = fmap[i] - H; if (k < 0) k += nblock;
270
+ eclass[k] = j;
271
+ }
272
+
273
+ nNotDone = 0;
274
+ r = -1;
275
+ while (1) {
276
+
277
+ /*-- find the next non-singleton bucket --*/
278
+ k = r + 1;
279
+ while (ISSET_BH(k) && UNALIGNED_BH(k)) k++;
280
+ if (ISSET_BH(k)) {
281
+ while (WORD_BH(k) == 0xffffffff) k += 32;
282
+ while (ISSET_BH(k)) k++;
283
+ }
284
+ l = k - 1;
285
+ if (l >= nblock) break;
286
+ while (!ISSET_BH(k) && UNALIGNED_BH(k)) k++;
287
+ if (!ISSET_BH(k)) {
288
+ while (WORD_BH(k) == 0x00000000) k += 32;
289
+ while (!ISSET_BH(k)) k++;
290
+ }
291
+ r = k - 1;
292
+ if (r >= nblock) break;
293
+
294
+ /*-- now [l, r] bracket current bucket --*/
295
+ if (r > l) {
296
+ nNotDone += (r - l + 1);
297
+ fallbackQSort3 ( fmap, eclass, l, r );
298
+
299
+ /*-- scan bucket and generate header bits-- */
300
+ cc = -1;
301
+ for (i = l; i <= r; i++) {
302
+ cc1 = eclass[fmap[i]];
303
+ if (cc != cc1) { SET_BH(i); cc = cc1; };
304
+ }
305
+ }
306
+ }
307
+
308
+ if (verb >= 4)
309
+ VPrintf1 ( "%6d unresolved strings\n", nNotDone );
310
+
311
+ H *= 2;
312
+ if (H > nblock || nNotDone == 0) break;
313
+ }
314
+
315
+ /*--
316
+ Reconstruct the original block in
317
+ eclass8 [0 .. nblock-1], since the
318
+ previous phase destroyed it.
319
+ --*/
320
+ if (verb >= 4)
321
+ VPrintf0 ( " reconstructing block ...\n" );
322
+ j = 0;
323
+ for (i = 0; i < nblock; i++) {
324
+ while (ftabCopy[j] == 0) j++;
325
+ ftabCopy[j]--;
326
+ eclass8[fmap[i]] = (UChar)j;
327
+ }
328
+ AssertH ( j < 256, 1005 );
329
+ }
330
+
331
+ #undef SET_BH
332
+ #undef CLEAR_BH
333
+ #undef ISSET_BH
334
+ #undef WORD_BH
335
+ #undef UNALIGNED_BH
336
+
337
+
338
+ /*---------------------------------------------*/
339
+ /*--- The main, O(N^2 log(N)) sorting ---*/
340
+ /*--- algorithm. Faster for "normal" ---*/
341
+ /*--- non-repetitive blocks. ---*/
342
+ /*---------------------------------------------*/
343
+
344
+ /*---------------------------------------------*/
345
+ static
346
+ __inline__
347
+ Bool mainGtU ( UInt32 i1,
348
+ UInt32 i2,
349
+ UChar* block,
350
+ UInt16* quadrant,
351
+ UInt32 nblock,
352
+ Int32* budget )
353
+ {
354
+ Int32 k;
355
+ UChar c1, c2;
356
+ UInt16 s1, s2;
357
+
358
+ AssertD ( i1 != i2, "mainGtU" );
359
+ /* 1 */
360
+ c1 = block[i1]; c2 = block[i2];
361
+ if (c1 != c2) return (c1 > c2);
362
+ i1++; i2++;
363
+ /* 2 */
364
+ c1 = block[i1]; c2 = block[i2];
365
+ if (c1 != c2) return (c1 > c2);
366
+ i1++; i2++;
367
+ /* 3 */
368
+ c1 = block[i1]; c2 = block[i2];
369
+ if (c1 != c2) return (c1 > c2);
370
+ i1++; i2++;
371
+ /* 4 */
372
+ c1 = block[i1]; c2 = block[i2];
373
+ if (c1 != c2) return (c1 > c2);
374
+ i1++; i2++;
375
+ /* 5 */
376
+ c1 = block[i1]; c2 = block[i2];
377
+ if (c1 != c2) return (c1 > c2);
378
+ i1++; i2++;
379
+ /* 6 */
380
+ c1 = block[i1]; c2 = block[i2];
381
+ if (c1 != c2) return (c1 > c2);
382
+ i1++; i2++;
383
+ /* 7 */
384
+ c1 = block[i1]; c2 = block[i2];
385
+ if (c1 != c2) return (c1 > c2);
386
+ i1++; i2++;
387
+ /* 8 */
388
+ c1 = block[i1]; c2 = block[i2];
389
+ if (c1 != c2) return (c1 > c2);
390
+ i1++; i2++;
391
+ /* 9 */
392
+ c1 = block[i1]; c2 = block[i2];
393
+ if (c1 != c2) return (c1 > c2);
394
+ i1++; i2++;
395
+ /* 10 */
396
+ c1 = block[i1]; c2 = block[i2];
397
+ if (c1 != c2) return (c1 > c2);
398
+ i1++; i2++;
399
+ /* 11 */
400
+ c1 = block[i1]; c2 = block[i2];
401
+ if (c1 != c2) return (c1 > c2);
402
+ i1++; i2++;
403
+ /* 12 */
404
+ c1 = block[i1]; c2 = block[i2];
405
+ if (c1 != c2) return (c1 > c2);
406
+ i1++; i2++;
407
+
408
+ k = nblock + 8;
409
+
410
+ do {
411
+ /* 1 */
412
+ c1 = block[i1]; c2 = block[i2];
413
+ if (c1 != c2) return (c1 > c2);
414
+ s1 = quadrant[i1]; s2 = quadrant[i2];
415
+ if (s1 != s2) return (s1 > s2);
416
+ i1++; i2++;
417
+ /* 2 */
418
+ c1 = block[i1]; c2 = block[i2];
419
+ if (c1 != c2) return (c1 > c2);
420
+ s1 = quadrant[i1]; s2 = quadrant[i2];
421
+ if (s1 != s2) return (s1 > s2);
422
+ i1++; i2++;
423
+ /* 3 */
424
+ c1 = block[i1]; c2 = block[i2];
425
+ if (c1 != c2) return (c1 > c2);
426
+ s1 = quadrant[i1]; s2 = quadrant[i2];
427
+ if (s1 != s2) return (s1 > s2);
428
+ i1++; i2++;
429
+ /* 4 */
430
+ c1 = block[i1]; c2 = block[i2];
431
+ if (c1 != c2) return (c1 > c2);
432
+ s1 = quadrant[i1]; s2 = quadrant[i2];
433
+ if (s1 != s2) return (s1 > s2);
434
+ i1++; i2++;
435
+ /* 5 */
436
+ c1 = block[i1]; c2 = block[i2];
437
+ if (c1 != c2) return (c1 > c2);
438
+ s1 = quadrant[i1]; s2 = quadrant[i2];
439
+ if (s1 != s2) return (s1 > s2);
440
+ i1++; i2++;
441
+ /* 6 */
442
+ c1 = block[i1]; c2 = block[i2];
443
+ if (c1 != c2) return (c1 > c2);
444
+ s1 = quadrant[i1]; s2 = quadrant[i2];
445
+ if (s1 != s2) return (s1 > s2);
446
+ i1++; i2++;
447
+ /* 7 */
448
+ c1 = block[i1]; c2 = block[i2];
449
+ if (c1 != c2) return (c1 > c2);
450
+ s1 = quadrant[i1]; s2 = quadrant[i2];
451
+ if (s1 != s2) return (s1 > s2);
452
+ i1++; i2++;
453
+ /* 8 */
454
+ c1 = block[i1]; c2 = block[i2];
455
+ if (c1 != c2) return (c1 > c2);
456
+ s1 = quadrant[i1]; s2 = quadrant[i2];
457
+ if (s1 != s2) return (s1 > s2);
458
+ i1++; i2++;
459
+
460
+ if (i1 >= nblock) i1 -= nblock;
461
+ if (i2 >= nblock) i2 -= nblock;
462
+
463
+ k -= 8;
464
+ (*budget)--;
465
+ }
466
+ while (k >= 0);
467
+
468
+ return False;
469
+ }
470
+
471
+
472
+ /*---------------------------------------------*/
473
+ /*--
474
+ Knuth's increments seem to work better
475
+ than Incerpi-Sedgewick here. Possibly
476
+ because the number of elems to sort is
477
+ usually small, typically <= 20.
478
+ --*/
479
+ static
480
+ Int32 incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280,
481
+ 9841, 29524, 88573, 265720,
482
+ 797161, 2391484 };
483
+
484
+ static
485
+ void mainSimpleSort ( UInt32* ptr,
486
+ UChar* block,
487
+ UInt16* quadrant,
488
+ Int32 nblock,
489
+ Int32 lo,
490
+ Int32 hi,
491
+ Int32 d,
492
+ Int32* budget )
493
+ {
494
+ Int32 i, j, h, bigN, hp;
495
+ UInt32 v;
496
+
497
+ bigN = hi - lo + 1;
498
+ if (bigN < 2) return;
499
+
500
+ hp = 0;
501
+ while (incs[hp] < bigN) hp++;
502
+ hp--;
503
+
504
+ for (; hp >= 0; hp--) {
505
+ h = incs[hp];
506
+
507
+ i = lo + h;
508
+ while (True) {
509
+
510
+ /*-- copy 1 --*/
511
+ if (i > hi) break;
512
+ v = ptr[i];
513
+ j = i;
514
+ while ( mainGtU (
515
+ ptr[j-h]+d, v+d, block, quadrant, nblock, budget
516
+ ) ) {
517
+ ptr[j] = ptr[j-h];
518
+ j = j - h;
519
+ if (j <= (lo + h - 1)) break;
520
+ }
521
+ ptr[j] = v;
522
+ i++;
523
+
524
+ /*-- copy 2 --*/
525
+ if (i > hi) break;
526
+ v = ptr[i];
527
+ j = i;
528
+ while ( mainGtU (
529
+ ptr[j-h]+d, v+d, block, quadrant, nblock, budget
530
+ ) ) {
531
+ ptr[j] = ptr[j-h];
532
+ j = j - h;
533
+ if (j <= (lo + h - 1)) break;
534
+ }
535
+ ptr[j] = v;
536
+ i++;
537
+
538
+ /*-- copy 3 --*/
539
+ if (i > hi) break;
540
+ v = ptr[i];
541
+ j = i;
542
+ while ( mainGtU (
543
+ ptr[j-h]+d, v+d, block, quadrant, nblock, budget
544
+ ) ) {
545
+ ptr[j] = ptr[j-h];
546
+ j = j - h;
547
+ if (j <= (lo + h - 1)) break;
548
+ }
549
+ ptr[j] = v;
550
+ i++;
551
+
552
+ if (*budget < 0) return;
553
+ }
554
+ }
555
+ }
556
+
557
+
558
+ /*---------------------------------------------*/
559
+ /*--
560
+ The following is an implementation of
561
+ an elegant 3-way quicksort for strings,
562
+ described in a paper "Fast Algorithms for
563
+ Sorting and Searching Strings", by Robert
564
+ Sedgewick and Jon L. Bentley.
565
+ --*/
566
+
567
+ #define mswap(zz1, zz2) \
568
+ { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; }
569
+
570
+ #define mvswap(zzp1, zzp2, zzn) \
571
+ { \
572
+ Int32 yyp1 = (zzp1); \
573
+ Int32 yyp2 = (zzp2); \
574
+ Int32 yyn = (zzn); \
575
+ while (yyn > 0) { \
576
+ mswap(ptr[yyp1], ptr[yyp2]); \
577
+ yyp1++; yyp2++; yyn--; \
578
+ } \
579
+ }
580
+
581
+ static
582
+ __inline__
583
+ UChar mmed3 ( UChar a, UChar b, UChar c )
584
+ {
585
+ UChar t;
586
+ if (a > b) { t = a; a = b; b = t; };
587
+ if (b > c) {
588
+ b = c;
589
+ if (a > b) b = a;
590
+ }
591
+ return b;
592
+ }
593
+
594
+ #define mmin(a,b) ((a) < (b)) ? (a) : (b)
595
+
596
+ #define mpush(lz,hz,dz) { stackLo[sp] = lz; \
597
+ stackHi[sp] = hz; \
598
+ stackD [sp] = dz; \
599
+ sp++; }
600
+
601
+ #define mpop(lz,hz,dz) { sp--; \
602
+ lz = stackLo[sp]; \
603
+ hz = stackHi[sp]; \
604
+ dz = stackD [sp]; }
605
+
606
+
607
+ #define mnextsize(az) (nextHi[az]-nextLo[az])
608
+
609
+ #define mnextswap(az,bz) \
610
+ { Int32 tz; \
611
+ tz = nextLo[az]; nextLo[az] = nextLo[bz]; nextLo[bz] = tz; \
612
+ tz = nextHi[az]; nextHi[az] = nextHi[bz]; nextHi[bz] = tz; \
613
+ tz = nextD [az]; nextD [az] = nextD [bz]; nextD [bz] = tz; }
614
+
615
+
616
+ #define MAIN_QSORT_SMALL_THRESH 20
617
+ #define MAIN_QSORT_DEPTH_THRESH (BZ_N_RADIX + BZ_N_QSORT)
618
+ #define MAIN_QSORT_STACK_SIZE 100
619
+
620
+ static
621
+ void mainQSort3 ( UInt32* ptr,
622
+ UChar* block,
623
+ UInt16* quadrant,
624
+ Int32 nblock,
625
+ Int32 loSt,
626
+ Int32 hiSt,
627
+ Int32 dSt,
628
+ Int32* budget )
629
+ {
630
+ Int32 unLo, unHi, ltLo, gtHi, n, m, med;
631
+ Int32 sp, lo, hi, d;
632
+
633
+ Int32 stackLo[MAIN_QSORT_STACK_SIZE];
634
+ Int32 stackHi[MAIN_QSORT_STACK_SIZE];
635
+ Int32 stackD [MAIN_QSORT_STACK_SIZE];
636
+
637
+ Int32 nextLo[3];
638
+ Int32 nextHi[3];
639
+ Int32 nextD [3];
640
+
641
+ sp = 0;
642
+ mpush ( loSt, hiSt, dSt );
643
+
644
+ while (sp > 0) {
645
+
646
+ AssertH ( sp < MAIN_QSORT_STACK_SIZE - 2, 1001 );
647
+
648
+ mpop ( lo, hi, d );
649
+ if (hi - lo < MAIN_QSORT_SMALL_THRESH ||
650
+ d > MAIN_QSORT_DEPTH_THRESH) {
651
+ mainSimpleSort ( ptr, block, quadrant, nblock, lo, hi, d, budget );
652
+ if (*budget < 0) return;
653
+ continue;
654
+ }
655
+
656
+ med = (Int32)
657
+ mmed3 ( block[ptr[ lo ]+d],
658
+ block[ptr[ hi ]+d],
659
+ block[ptr[ (lo+hi)>>1 ]+d] );
660
+
661
+ unLo = ltLo = lo;
662
+ unHi = gtHi = hi;
663
+
664
+ while (True) {
665
+ while (True) {
666
+ if (unLo > unHi) break;
667
+ n = ((Int32)block[ptr[unLo]+d]) - med;
668
+ if (n == 0) {
669
+ mswap(ptr[unLo], ptr[ltLo]);
670
+ ltLo++; unLo++; continue;
671
+ };
672
+ if (n > 0) break;
673
+ unLo++;
674
+ }
675
+ while (True) {
676
+ if (unLo > unHi) break;
677
+ n = ((Int32)block[ptr[unHi]+d]) - med;
678
+ if (n == 0) {
679
+ mswap(ptr[unHi], ptr[gtHi]);
680
+ gtHi--; unHi--; continue;
681
+ };
682
+ if (n < 0) break;
683
+ unHi--;
684
+ }
685
+ if (unLo > unHi) break;
686
+ mswap(ptr[unLo], ptr[unHi]); unLo++; unHi--;
687
+ }
688
+
689
+ AssertD ( unHi == unLo-1, "mainQSort3(2)" );
690
+
691
+ if (gtHi < ltLo) {
692
+ mpush(lo, hi, d+1 );
693
+ continue;
694
+ }
695
+
696
+ n = mmin(ltLo-lo, unLo-ltLo); mvswap(lo, unLo-n, n);
697
+ m = mmin(hi-gtHi, gtHi-unHi); mvswap(unLo, hi-m+1, m);
698
+
699
+ n = lo + unLo - ltLo - 1;
700
+ m = hi - (gtHi - unHi) + 1;
701
+
702
+ nextLo[0] = lo; nextHi[0] = n; nextD[0] = d;
703
+ nextLo[1] = m; nextHi[1] = hi; nextD[1] = d;
704
+ nextLo[2] = n+1; nextHi[2] = m-1; nextD[2] = d+1;
705
+
706
+ if (mnextsize(0) < mnextsize(1)) mnextswap(0,1);
707
+ if (mnextsize(1) < mnextsize(2)) mnextswap(1,2);
708
+ if (mnextsize(0) < mnextsize(1)) mnextswap(0,1);
709
+
710
+ AssertD (mnextsize(0) >= mnextsize(1), "mainQSort3(8)" );
711
+ AssertD (mnextsize(1) >= mnextsize(2), "mainQSort3(9)" );
712
+
713
+ mpush (nextLo[0], nextHi[0], nextD[0]);
714
+ mpush (nextLo[1], nextHi[1], nextD[1]);
715
+ mpush (nextLo[2], nextHi[2], nextD[2]);
716
+ }
717
+ }
718
+
719
+ #undef mswap
720
+ #undef mvswap
721
+ #undef mpush
722
+ #undef mpop
723
+ #undef mmin
724
+ #undef mnextsize
725
+ #undef mnextswap
726
+ #undef MAIN_QSORT_SMALL_THRESH
727
+ #undef MAIN_QSORT_DEPTH_THRESH
728
+ #undef MAIN_QSORT_STACK_SIZE
729
+
730
+
731
+ /*---------------------------------------------*/
732
+ /* Pre:
733
+ nblock > N_OVERSHOOT
734
+ block32 exists for [0 .. nblock-1 +N_OVERSHOOT]
735
+ ((UChar*)block32) [0 .. nblock-1] holds block
736
+ ptr exists for [0 .. nblock-1]
737
+
738
+ Post:
739
+ ((UChar*)block32) [0 .. nblock-1] holds block
740
+ All other areas of block32 destroyed
741
+ ftab [0 .. 65536 ] destroyed
742
+ ptr [0 .. nblock-1] holds sorted order
743
+ if (*budget < 0), sorting was abandoned
744
+ */
745
+
746
+ #define BIGFREQ(b) (ftab[((b)+1) << 8] - ftab[(b) << 8])
747
+ #define SETMASK (1 << 21)
748
+ #define CLEARMASK (~(SETMASK))
749
+
750
+ static
751
+ void mainSort ( UInt32* ptr,
752
+ UChar* block,
753
+ UInt16* quadrant,
754
+ UInt32* ftab,
755
+ Int32 nblock,
756
+ Int32 verb,
757
+ Int32* budget )
758
+ {
759
+ Int32 i, j, k, ss, sb;
760
+ Int32 runningOrder[256];
761
+ Bool bigDone[256];
762
+ Int32 copyStart[256];
763
+ Int32 copyEnd [256];
764
+ UChar c1;
765
+ Int32 numQSorted;
766
+ UInt16 s;
767
+ if (verb >= 4) VPrintf0 ( " main sort initialise ...\n" );
768
+
769
+ /*-- set up the 2-byte frequency table --*/
770
+ for (i = 65536; i >= 0; i--) ftab[i] = 0;
771
+
772
+ j = block[0] << 8;
773
+ i = nblock-1;
774
+ for (; i >= 3; i -= 4) {
775
+ quadrant[i] = 0;
776
+ j = (j >> 8) | ( ((UInt16)block[i]) << 8);
777
+ ftab[j]++;
778
+ quadrant[i-1] = 0;
779
+ j = (j >> 8) | ( ((UInt16)block[i-1]) << 8);
780
+ ftab[j]++;
781
+ quadrant[i-2] = 0;
782
+ j = (j >> 8) | ( ((UInt16)block[i-2]) << 8);
783
+ ftab[j]++;
784
+ quadrant[i-3] = 0;
785
+ j = (j >> 8) | ( ((UInt16)block[i-3]) << 8);
786
+ ftab[j]++;
787
+ }
788
+ for (; i >= 0; i--) {
789
+ quadrant[i] = 0;
790
+ j = (j >> 8) | ( ((UInt16)block[i]) << 8);
791
+ ftab[j]++;
792
+ }
793
+
794
+ /*-- (emphasises close relationship of block & quadrant) --*/
795
+ for (i = 0; i < BZ_N_OVERSHOOT; i++) {
796
+ block [nblock+i] = block[i];
797
+ quadrant[nblock+i] = 0;
798
+ }
799
+
800
+ if (verb >= 4) VPrintf0 ( " bucket sorting ...\n" );
801
+
802
+ /*-- Complete the initial radix sort --*/
803
+ for (i = 1; i <= 65536; i++) ftab[i] += ftab[i-1];
804
+
805
+ s = block[0] << 8;
806
+ i = nblock-1;
807
+ for (; i >= 3; i -= 4) {
808
+ s = (s >> 8) | (block[i] << 8);
809
+ j = ftab[s] -1;
810
+ ftab[s] = j;
811
+ ptr[j] = i;
812
+ s = (s >> 8) | (block[i-1] << 8);
813
+ j = ftab[s] -1;
814
+ ftab[s] = j;
815
+ ptr[j] = i-1;
816
+ s = (s >> 8) | (block[i-2] << 8);
817
+ j = ftab[s] -1;
818
+ ftab[s] = j;
819
+ ptr[j] = i-2;
820
+ s = (s >> 8) | (block[i-3] << 8);
821
+ j = ftab[s] -1;
822
+ ftab[s] = j;
823
+ ptr[j] = i-3;
824
+ }
825
+ for (; i >= 0; i--) {
826
+ s = (s >> 8) | (block[i] << 8);
827
+ j = ftab[s] -1;
828
+ ftab[s] = j;
829
+ ptr[j] = i;
830
+ }
831
+
832
+ /*--
833
+ Now ftab contains the first loc of every small bucket.
834
+ Calculate the running order, from smallest to largest
835
+ big bucket.
836
+ --*/
837
+ for (i = 0; i <= 255; i++) {
838
+ bigDone [i] = False;
839
+ runningOrder[i] = i;
840
+ }
841
+
842
+ {
843
+ Int32 vv;
844
+ Int32 h = 1;
845
+ do h = 3 * h + 1; while (h <= 256);
846
+ do {
847
+ h = h / 3;
848
+ for (i = h; i <= 255; i++) {
849
+ vv = runningOrder[i];
850
+ j = i;
851
+ while ( BIGFREQ(runningOrder[j-h]) > BIGFREQ(vv) ) {
852
+ runningOrder[j] = runningOrder[j-h];
853
+ j = j - h;
854
+ if (j <= (h - 1)) goto zero;
855
+ }
856
+ zero:
857
+ runningOrder[j] = vv;
858
+ }
859
+ } while (h != 1);
860
+ }
861
+
862
+ /*--
863
+ The main sorting loop.
864
+ --*/
865
+
866
+ numQSorted = 0;
867
+
868
+ for (i = 0; i <= 255; i++) {
869
+
870
+ /*--
871
+ Process big buckets, starting with the least full.
872
+ Basically this is a 3-step process in which we call
873
+ mainQSort3 to sort the small buckets [ss, j], but
874
+ also make a big effort to avoid the calls if we can.
875
+ --*/
876
+ ss = runningOrder[i];
877
+
878
+ /*--
879
+ Step 1:
880
+ Complete the big bucket [ss] by quicksorting
881
+ any unsorted small buckets [ss, j], for j != ss.
882
+ Hopefully previous pointer-scanning phases have already
883
+ completed many of the small buckets [ss, j], so
884
+ we don't have to sort them at all.
885
+ --*/
886
+ for (j = 0; j <= 255; j++) {
887
+ if (j != ss) {
888
+ sb = (ss << 8) + j;
889
+ if ( ! (ftab[sb] & SETMASK) ) {
890
+ Int32 lo = ftab[sb] & CLEARMASK;
891
+ Int32 hi = (ftab[sb+1] & CLEARMASK) - 1;
892
+ if (hi > lo) {
893
+ if (verb >= 4)
894
+ VPrintf4 ( " qsort [0x%x, 0x%x] "
895
+ "done %d this %d\n",
896
+ ss, j, numQSorted, hi - lo + 1 );
897
+ mainQSort3 (
898
+ ptr, block, quadrant, nblock,
899
+ lo, hi, BZ_N_RADIX, budget
900
+ );
901
+ numQSorted += (hi - lo + 1);
902
+ if (*budget < 0) return;
903
+ }
904
+ }
905
+ ftab[sb] |= SETMASK;
906
+ }
907
+ }
908
+
909
+ AssertH ( !bigDone[ss], 1006 );
910
+
911
+ /*--
912
+ Step 2:
913
+ Now scan this big bucket [ss] so as to synthesise the
914
+ sorted order for small buckets [t, ss] for all t,
915
+ including, magically, the bucket [ss,ss] too.
916
+ This will avoid doing Real Work in subsequent Step 1's.
917
+ --*/
918
+ {
919
+ for (j = 0; j <= 255; j++) {
920
+ copyStart[j] = ftab[(j << 8) + ss] & CLEARMASK;
921
+ copyEnd [j] = (ftab[(j << 8) + ss + 1] & CLEARMASK) - 1;
922
+ }
923
+ for (j = ftab[ss << 8] & CLEARMASK; j < copyStart[ss]; j++) {
924
+ k = ptr[j]-1; if (k < 0) k += nblock;
925
+ c1 = block[k];
926
+ if (!bigDone[c1])
927
+ ptr[ copyStart[c1]++ ] = k;
928
+ }
929
+ for (j = (ftab[(ss+1) << 8] & CLEARMASK) - 1; j > copyEnd[ss]; j--) {
930
+ k = ptr[j]-1; if (k < 0) k += nblock;
931
+ c1 = block[k];
932
+ if (!bigDone[c1])
933
+ ptr[ copyEnd[c1]-- ] = k;
934
+ }
935
+ }
936
+
937
+ AssertH ( (copyStart[ss]-1 == copyEnd[ss])
938
+ ||
939
+ /* Extremely rare case missing in bzip2-1.0.0 and 1.0.1.
940
+ Necessity for this case is demonstrated by compressing
941
+ a sequence of approximately 48.5 million of character
942
+ 251; 1.0.0/1.0.1 will then die here. */
943
+ (copyStart[ss] == 0 && copyEnd[ss] == nblock-1),
944
+ 1007 )
945
+
946
+ for (j = 0; j <= 255; j++) ftab[(j << 8) + ss] |= SETMASK;
947
+
948
+ /*--
949
+ Step 3:
950
+ The [ss] big bucket is now done. Record this fact,
951
+ and update the quadrant descriptors. Remember to
952
+ update quadrants in the overshoot area too, if
953
+ necessary. The "if (i < 255)" test merely skips
954
+ this updating for the last bucket processed, since
955
+ updating for the last bucket is pointless.
956
+
957
+ The quadrant array provides a way to incrementally
958
+ cache sort orderings, as they appear, so as to
959
+ make subsequent comparisons in fullGtU() complete
960
+ faster. For repetitive blocks this makes a big
961
+ difference (but not big enough to be able to avoid
962
+ the fallback sorting mechanism, exponential radix sort).
963
+
964
+ The precise meaning is: at all times:
965
+
966
+ for 0 <= i < nblock and 0 <= j <= nblock
967
+
968
+ if block[i] != block[j],
969
+
970
+ then the relative values of quadrant[i] and
971
+ quadrant[j] are meaningless.
972
+
973
+ else {
974
+ if quadrant[i] < quadrant[j]
975
+ then the string starting at i lexicographically
976
+ precedes the string starting at j
977
+
978
+ else if quadrant[i] > quadrant[j]
979
+ then the string starting at j lexicographically
980
+ precedes the string starting at i
981
+
982
+ else
983
+ the relative ordering of the strings starting
984
+ at i and j has not yet been determined.
985
+ }
986
+ --*/
987
+ bigDone[ss] = True;
988
+
989
+ if (i < 255) {
990
+ Int32 bbStart = ftab[ss << 8] & CLEARMASK;
991
+ Int32 bbSize = (ftab[(ss+1) << 8] & CLEARMASK) - bbStart;
992
+ Int32 shifts = 0;
993
+
994
+ while ((bbSize >> shifts) > 65534) shifts++;
995
+
996
+ for (j = bbSize-1; j >= 0; j--) {
997
+ Int32 a2update = ptr[bbStart + j];
998
+ UInt16 qVal = (UInt16)(j >> shifts);
999
+ quadrant[a2update] = qVal;
1000
+ if (a2update < BZ_N_OVERSHOOT)
1001
+ quadrant[a2update + nblock] = qVal;
1002
+ }
1003
+ AssertH ( ((bbSize-1) >> shifts) <= 65535, 1002 );
1004
+ }
1005
+
1006
+ }
1007
+
1008
+ if (verb >= 4)
1009
+ VPrintf3 ( " %d pointers, %d sorted, %d scanned\n",
1010
+ nblock, numQSorted, nblock - numQSorted );
1011
+ }
1012
+
1013
+ #undef BIGFREQ
1014
+ #undef SETMASK
1015
+ #undef CLEARMASK
1016
+
1017
+
1018
+ /*---------------------------------------------*/
1019
+ /* Pre:
1020
+ nblock > 0
1021
+ arr2 exists for [0 .. nblock-1 +N_OVERSHOOT]
1022
+ ((UChar*)arr2) [0 .. nblock-1] holds block
1023
+ arr1 exists for [0 .. nblock-1]
1024
+
1025
+ Post:
1026
+ ((UChar*)arr2) [0 .. nblock-1] holds block
1027
+ All other areas of block destroyed
1028
+ ftab [ 0 .. 65536 ] destroyed
1029
+ arr1 [0 .. nblock-1] holds sorted order
1030
+ */
1031
+ void BZ2_blockSort ( EState* s )
1032
+ {
1033
+ UInt32* ptr = s->ptr;
1034
+ UChar* block = s->block;
1035
+ UInt32* ftab = s->ftab;
1036
+ Int32 nblock = s->nblock;
1037
+ Int32 verb = s->verbosity;
1038
+ Int32 wfact = s->workFactor;
1039
+ UInt16* quadrant;
1040
+ Int32 budget;
1041
+ Int32 budgetInit;
1042
+ Int32 i;
1043
+
1044
+ if (nblock < 10000) {
1045
+ fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb );
1046
+ } else {
1047
+ /* Calculate the location for quadrant, remembering to get
1048
+ the alignment right. Assumes that &(block[0]) is at least
1049
+ 2-byte aligned -- this should be ok since block is really
1050
+ the first section of arr2.
1051
+ */
1052
+ i = nblock+BZ_N_OVERSHOOT;
1053
+ if (i & 1) i++;
1054
+ quadrant = (UInt16*)(&(block[i]));
1055
+
1056
+ /* (wfact-1) / 3 puts the default-factor-30
1057
+ transition point at very roughly the same place as
1058
+ with v0.1 and v0.9.0.
1059
+ Not that it particularly matters any more, since the
1060
+ resulting compressed stream is now the same regardless
1061
+ of whether or not we use the main sort or fallback sort.
1062
+ */
1063
+ if (wfact < 1 ) wfact = 1;
1064
+ if (wfact > 100) wfact = 100;
1065
+ budgetInit = nblock * ((wfact-1) / 3);
1066
+ budget = budgetInit;
1067
+
1068
+ mainSort ( ptr, block, quadrant, ftab, nblock, verb, &budget );
1069
+ if (verb >= 3)
1070
+ VPrintf3 ( " %d work, %d block, ratio %5.2f\n",
1071
+ budgetInit - budget,
1072
+ nblock,
1073
+ (float)(budgetInit - budget) /
1074
+ (float)(nblock==0 ? 1 : nblock) );
1075
+ if (budget < 0) {
1076
+ if (verb >= 2)
1077
+ VPrintf0 ( " too repetitive; using fallback"
1078
+ " sorting algorithm\n" );
1079
+ fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb );
1080
+ }
1081
+ }
1082
+
1083
+ s->origPtr = -1;
1084
+ for (i = 0; i < s->nblock; i++)
1085
+ if (ptr[i] == 0)
1086
+ { s->origPtr = i; break; };
1087
+
1088
+ AssertH( s->origPtr != -1, 1003 );
1089
+ }
1090
+
1091
+
1092
+ /*-------------------------------------------------------------*/
1093
+ /*--- end blocksort.c ---*/
1094
+ /*-------------------------------------------------------------*/