bsdiff 0.0.2 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1094 @@
1
+
2
+ /*-------------------------------------------------------------*/
3
+ /*--- Block sorting machinery ---*/
4
+ /*--- blocksort.c ---*/
5
+ /*-------------------------------------------------------------*/
6
+
7
+ /* ------------------------------------------------------------------
8
+ This file is part of bzip2/libbzip2, a program and library for
9
+ lossless, block-sorting data compression.
10
+
11
+ bzip2/libbzip2 version 1.0.8 of 13 July 2019
12
+ Copyright (C) 1996-2019 Julian Seward <jseward@acm.org>
13
+
14
+ Please read the WARNING, DISCLAIMER and PATENTS sections in the
15
+ README file.
16
+
17
+ This program is released under the terms of the license contained
18
+ in the file LICENSE.
19
+ ------------------------------------------------------------------ */
20
+
21
+
22
+ #include "bzlib_private.h"
23
+
24
+ /*---------------------------------------------*/
25
+ /*--- Fallback O(N log(N)^2) sorting ---*/
26
+ /*--- algorithm, for repetitive blocks ---*/
27
+ /*---------------------------------------------*/
28
+
29
+ /*---------------------------------------------*/
30
+ static
31
+ __inline__
32
+ void fallbackSimpleSort ( UInt32* fmap,
33
+ UInt32* eclass,
34
+ Int32 lo,
35
+ Int32 hi )
36
+ {
37
+ Int32 i, j, tmp;
38
+ UInt32 ec_tmp;
39
+
40
+ if (lo == hi) return;
41
+
42
+ if (hi - lo > 3) {
43
+ for ( i = hi-4; i >= lo; i-- ) {
44
+ tmp = fmap[i];
45
+ ec_tmp = eclass[tmp];
46
+ for ( j = i+4; j <= hi && ec_tmp > eclass[fmap[j]]; j += 4 )
47
+ fmap[j-4] = fmap[j];
48
+ fmap[j-4] = tmp;
49
+ }
50
+ }
51
+
52
+ for ( i = hi-1; i >= lo; i-- ) {
53
+ tmp = fmap[i];
54
+ ec_tmp = eclass[tmp];
55
+ for ( j = i+1; j <= hi && ec_tmp > eclass[fmap[j]]; j++ )
56
+ fmap[j-1] = fmap[j];
57
+ fmap[j-1] = tmp;
58
+ }
59
+ }
60
+
61
+
62
+ /*---------------------------------------------*/
63
+ #define fswap(zz1, zz2) \
64
+ { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; }
65
+
66
+ #define fvswap(zzp1, zzp2, zzn) \
67
+ { \
68
+ Int32 yyp1 = (zzp1); \
69
+ Int32 yyp2 = (zzp2); \
70
+ Int32 yyn = (zzn); \
71
+ while (yyn > 0) { \
72
+ fswap(fmap[yyp1], fmap[yyp2]); \
73
+ yyp1++; yyp2++; yyn--; \
74
+ } \
75
+ }
76
+
77
+
78
+ #define fmin(a,b) ((a) < (b)) ? (a) : (b)
79
+
80
+ #define fpush(lz,hz) { stackLo[sp] = lz; \
81
+ stackHi[sp] = hz; \
82
+ sp++; }
83
+
84
+ #define fpop(lz,hz) { sp--; \
85
+ lz = stackLo[sp]; \
86
+ hz = stackHi[sp]; }
87
+
88
+ #define FALLBACK_QSORT_SMALL_THRESH 10
89
+ #define FALLBACK_QSORT_STACK_SIZE 100
90
+
91
+
92
+ static
93
+ void fallbackQSort3 ( UInt32* fmap,
94
+ UInt32* eclass,
95
+ Int32 loSt,
96
+ Int32 hiSt )
97
+ {
98
+ Int32 unLo, unHi, ltLo, gtHi, n, m;
99
+ Int32 sp, lo, hi;
100
+ UInt32 med, r, r3;
101
+ Int32 stackLo[FALLBACK_QSORT_STACK_SIZE];
102
+ Int32 stackHi[FALLBACK_QSORT_STACK_SIZE];
103
+
104
+ r = 0;
105
+
106
+ sp = 0;
107
+ fpush ( loSt, hiSt );
108
+
109
+ while (sp > 0) {
110
+
111
+ AssertH ( sp < FALLBACK_QSORT_STACK_SIZE - 1, 1004 );
112
+
113
+ fpop ( lo, hi );
114
+ if (hi - lo < FALLBACK_QSORT_SMALL_THRESH) {
115
+ fallbackSimpleSort ( fmap, eclass, lo, hi );
116
+ continue;
117
+ }
118
+
119
+ /* Random partitioning. Median of 3 sometimes fails to
120
+ avoid bad cases. Median of 9 seems to help but
121
+ looks rather expensive. This too seems to work but
122
+ is cheaper. Guidance for the magic constants
123
+ 7621 and 32768 is taken from Sedgewick's algorithms
124
+ book, chapter 35.
125
+ */
126
+ r = ((r * 7621) + 1) % 32768;
127
+ r3 = r % 3;
128
+ if (r3 == 0) med = eclass[fmap[lo]]; else
129
+ if (r3 == 1) med = eclass[fmap[(lo+hi)>>1]]; else
130
+ med = eclass[fmap[hi]];
131
+
132
+ unLo = ltLo = lo;
133
+ unHi = gtHi = hi;
134
+
135
+ while (1) {
136
+ while (1) {
137
+ if (unLo > unHi) break;
138
+ n = (Int32)eclass[fmap[unLo]] - (Int32)med;
139
+ if (n == 0) {
140
+ fswap(fmap[unLo], fmap[ltLo]);
141
+ ltLo++; unLo++;
142
+ continue;
143
+ };
144
+ if (n > 0) break;
145
+ unLo++;
146
+ }
147
+ while (1) {
148
+ if (unLo > unHi) break;
149
+ n = (Int32)eclass[fmap[unHi]] - (Int32)med;
150
+ if (n == 0) {
151
+ fswap(fmap[unHi], fmap[gtHi]);
152
+ gtHi--; unHi--;
153
+ continue;
154
+ };
155
+ if (n < 0) break;
156
+ unHi--;
157
+ }
158
+ if (unLo > unHi) break;
159
+ fswap(fmap[unLo], fmap[unHi]); unLo++; unHi--;
160
+ }
161
+
162
+ AssertD ( unHi == unLo-1, "fallbackQSort3(2)" );
163
+
164
+ if (gtHi < ltLo) continue;
165
+
166
+ n = fmin(ltLo-lo, unLo-ltLo); fvswap(lo, unLo-n, n);
167
+ m = fmin(hi-gtHi, gtHi-unHi); fvswap(unLo, hi-m+1, m);
168
+
169
+ n = lo + unLo - ltLo - 1;
170
+ m = hi - (gtHi - unHi) + 1;
171
+
172
+ if (n - lo > hi - m) {
173
+ fpush ( lo, n );
174
+ fpush ( m, hi );
175
+ } else {
176
+ fpush ( m, hi );
177
+ fpush ( lo, n );
178
+ }
179
+ }
180
+ }
181
+
182
+ #undef fmin
183
+ #undef fpush
184
+ #undef fpop
185
+ #undef fswap
186
+ #undef fvswap
187
+ #undef FALLBACK_QSORT_SMALL_THRESH
188
+ #undef FALLBACK_QSORT_STACK_SIZE
189
+
190
+
191
+ /*---------------------------------------------*/
192
+ /* Pre:
193
+ nblock > 0
194
+ eclass exists for [0 .. nblock-1]
195
+ ((UChar*)eclass) [0 .. nblock-1] holds block
196
+ ptr exists for [0 .. nblock-1]
197
+
198
+ Post:
199
+ ((UChar*)eclass) [0 .. nblock-1] holds block
200
+ All other areas of eclass destroyed
201
+ fmap [0 .. nblock-1] holds sorted order
202
+ bhtab [ 0 .. 2+(nblock/32) ] destroyed
203
+ */
204
+
205
+ #define SET_BH(zz) bhtab[(zz) >> 5] |= ((UInt32)1 << ((zz) & 31))
206
+ #define CLEAR_BH(zz) bhtab[(zz) >> 5] &= ~((UInt32)1 << ((zz) & 31))
207
+ #define ISSET_BH(zz) (bhtab[(zz) >> 5] & ((UInt32)1 << ((zz) & 31)))
208
+ #define WORD_BH(zz) bhtab[(zz) >> 5]
209
+ #define UNALIGNED_BH(zz) ((zz) & 0x01f)
210
+
211
+ static
212
+ void fallbackSort ( UInt32* fmap,
213
+ UInt32* eclass,
214
+ UInt32* bhtab,
215
+ Int32 nblock,
216
+ Int32 verb )
217
+ {
218
+ Int32 ftab[257];
219
+ Int32 ftabCopy[256];
220
+ Int32 H, i, j, k, l, r, cc, cc1;
221
+ Int32 nNotDone;
222
+ Int32 nBhtab;
223
+ UChar* eclass8 = (UChar*)eclass;
224
+
225
+ /*--
226
+ Initial 1-char radix sort to generate
227
+ initial fmap and initial BH bits.
228
+ --*/
229
+ if (verb >= 4)
230
+ VPrintf0 ( " bucket sorting ...\n" );
231
+ for (i = 0; i < 257; i++) ftab[i] = 0;
232
+ for (i = 0; i < nblock; i++) ftab[eclass8[i]]++;
233
+ for (i = 0; i < 256; i++) ftabCopy[i] = ftab[i];
234
+ for (i = 1; i < 257; i++) ftab[i] += ftab[i-1];
235
+
236
+ for (i = 0; i < nblock; i++) {
237
+ j = eclass8[i];
238
+ k = ftab[j] - 1;
239
+ ftab[j] = k;
240
+ fmap[k] = i;
241
+ }
242
+
243
+ nBhtab = 2 + (nblock / 32);
244
+ for (i = 0; i < nBhtab; i++) bhtab[i] = 0;
245
+ for (i = 0; i < 256; i++) SET_BH(ftab[i]);
246
+
247
+ /*--
248
+ Inductively refine the buckets. Kind-of an
249
+ "exponential radix sort" (!), inspired by the
250
+ Manber-Myers suffix array construction algorithm.
251
+ --*/
252
+
253
+ /*-- set sentinel bits for block-end detection --*/
254
+ for (i = 0; i < 32; i++) {
255
+ SET_BH(nblock + 2*i);
256
+ CLEAR_BH(nblock + 2*i + 1);
257
+ }
258
+
259
+ /*-- the log(N) loop --*/
260
+ H = 1;
261
+ while (1) {
262
+
263
+ if (verb >= 4)
264
+ VPrintf1 ( " depth %6d has ", H );
265
+
266
+ j = 0;
267
+ for (i = 0; i < nblock; i++) {
268
+ if (ISSET_BH(i)) j = i;
269
+ k = fmap[i] - H; if (k < 0) k += nblock;
270
+ eclass[k] = j;
271
+ }
272
+
273
+ nNotDone = 0;
274
+ r = -1;
275
+ while (1) {
276
+
277
+ /*-- find the next non-singleton bucket --*/
278
+ k = r + 1;
279
+ while (ISSET_BH(k) && UNALIGNED_BH(k)) k++;
280
+ if (ISSET_BH(k)) {
281
+ while (WORD_BH(k) == 0xffffffff) k += 32;
282
+ while (ISSET_BH(k)) k++;
283
+ }
284
+ l = k - 1;
285
+ if (l >= nblock) break;
286
+ while (!ISSET_BH(k) && UNALIGNED_BH(k)) k++;
287
+ if (!ISSET_BH(k)) {
288
+ while (WORD_BH(k) == 0x00000000) k += 32;
289
+ while (!ISSET_BH(k)) k++;
290
+ }
291
+ r = k - 1;
292
+ if (r >= nblock) break;
293
+
294
+ /*-- now [l, r] bracket current bucket --*/
295
+ if (r > l) {
296
+ nNotDone += (r - l + 1);
297
+ fallbackQSort3 ( fmap, eclass, l, r );
298
+
299
+ /*-- scan bucket and generate header bits-- */
300
+ cc = -1;
301
+ for (i = l; i <= r; i++) {
302
+ cc1 = eclass[fmap[i]];
303
+ if (cc != cc1) { SET_BH(i); cc = cc1; };
304
+ }
305
+ }
306
+ }
307
+
308
+ if (verb >= 4)
309
+ VPrintf1 ( "%6d unresolved strings\n", nNotDone );
310
+
311
+ H *= 2;
312
+ if (H > nblock || nNotDone == 0) break;
313
+ }
314
+
315
+ /*--
316
+ Reconstruct the original block in
317
+ eclass8 [0 .. nblock-1], since the
318
+ previous phase destroyed it.
319
+ --*/
320
+ if (verb >= 4)
321
+ VPrintf0 ( " reconstructing block ...\n" );
322
+ j = 0;
323
+ for (i = 0; i < nblock; i++) {
324
+ while (ftabCopy[j] == 0) j++;
325
+ ftabCopy[j]--;
326
+ eclass8[fmap[i]] = (UChar)j;
327
+ }
328
+ AssertH ( j < 256, 1005 );
329
+ }
330
+
331
+ #undef SET_BH
332
+ #undef CLEAR_BH
333
+ #undef ISSET_BH
334
+ #undef WORD_BH
335
+ #undef UNALIGNED_BH
336
+
337
+
338
+ /*---------------------------------------------*/
339
+ /*--- The main, O(N^2 log(N)) sorting ---*/
340
+ /*--- algorithm. Faster for "normal" ---*/
341
+ /*--- non-repetitive blocks. ---*/
342
+ /*---------------------------------------------*/
343
+
344
+ /*---------------------------------------------*/
345
+ static
346
+ __inline__
347
+ Bool mainGtU ( UInt32 i1,
348
+ UInt32 i2,
349
+ UChar* block,
350
+ UInt16* quadrant,
351
+ UInt32 nblock,
352
+ Int32* budget )
353
+ {
354
+ Int32 k;
355
+ UChar c1, c2;
356
+ UInt16 s1, s2;
357
+
358
+ AssertD ( i1 != i2, "mainGtU" );
359
+ /* 1 */
360
+ c1 = block[i1]; c2 = block[i2];
361
+ if (c1 != c2) return (c1 > c2);
362
+ i1++; i2++;
363
+ /* 2 */
364
+ c1 = block[i1]; c2 = block[i2];
365
+ if (c1 != c2) return (c1 > c2);
366
+ i1++; i2++;
367
+ /* 3 */
368
+ c1 = block[i1]; c2 = block[i2];
369
+ if (c1 != c2) return (c1 > c2);
370
+ i1++; i2++;
371
+ /* 4 */
372
+ c1 = block[i1]; c2 = block[i2];
373
+ if (c1 != c2) return (c1 > c2);
374
+ i1++; i2++;
375
+ /* 5 */
376
+ c1 = block[i1]; c2 = block[i2];
377
+ if (c1 != c2) return (c1 > c2);
378
+ i1++; i2++;
379
+ /* 6 */
380
+ c1 = block[i1]; c2 = block[i2];
381
+ if (c1 != c2) return (c1 > c2);
382
+ i1++; i2++;
383
+ /* 7 */
384
+ c1 = block[i1]; c2 = block[i2];
385
+ if (c1 != c2) return (c1 > c2);
386
+ i1++; i2++;
387
+ /* 8 */
388
+ c1 = block[i1]; c2 = block[i2];
389
+ if (c1 != c2) return (c1 > c2);
390
+ i1++; i2++;
391
+ /* 9 */
392
+ c1 = block[i1]; c2 = block[i2];
393
+ if (c1 != c2) return (c1 > c2);
394
+ i1++; i2++;
395
+ /* 10 */
396
+ c1 = block[i1]; c2 = block[i2];
397
+ if (c1 != c2) return (c1 > c2);
398
+ i1++; i2++;
399
+ /* 11 */
400
+ c1 = block[i1]; c2 = block[i2];
401
+ if (c1 != c2) return (c1 > c2);
402
+ i1++; i2++;
403
+ /* 12 */
404
+ c1 = block[i1]; c2 = block[i2];
405
+ if (c1 != c2) return (c1 > c2);
406
+ i1++; i2++;
407
+
408
+ k = nblock + 8;
409
+
410
+ do {
411
+ /* 1 */
412
+ c1 = block[i1]; c2 = block[i2];
413
+ if (c1 != c2) return (c1 > c2);
414
+ s1 = quadrant[i1]; s2 = quadrant[i2];
415
+ if (s1 != s2) return (s1 > s2);
416
+ i1++; i2++;
417
+ /* 2 */
418
+ c1 = block[i1]; c2 = block[i2];
419
+ if (c1 != c2) return (c1 > c2);
420
+ s1 = quadrant[i1]; s2 = quadrant[i2];
421
+ if (s1 != s2) return (s1 > s2);
422
+ i1++; i2++;
423
+ /* 3 */
424
+ c1 = block[i1]; c2 = block[i2];
425
+ if (c1 != c2) return (c1 > c2);
426
+ s1 = quadrant[i1]; s2 = quadrant[i2];
427
+ if (s1 != s2) return (s1 > s2);
428
+ i1++; i2++;
429
+ /* 4 */
430
+ c1 = block[i1]; c2 = block[i2];
431
+ if (c1 != c2) return (c1 > c2);
432
+ s1 = quadrant[i1]; s2 = quadrant[i2];
433
+ if (s1 != s2) return (s1 > s2);
434
+ i1++; i2++;
435
+ /* 5 */
436
+ c1 = block[i1]; c2 = block[i2];
437
+ if (c1 != c2) return (c1 > c2);
438
+ s1 = quadrant[i1]; s2 = quadrant[i2];
439
+ if (s1 != s2) return (s1 > s2);
440
+ i1++; i2++;
441
+ /* 6 */
442
+ c1 = block[i1]; c2 = block[i2];
443
+ if (c1 != c2) return (c1 > c2);
444
+ s1 = quadrant[i1]; s2 = quadrant[i2];
445
+ if (s1 != s2) return (s1 > s2);
446
+ i1++; i2++;
447
+ /* 7 */
448
+ c1 = block[i1]; c2 = block[i2];
449
+ if (c1 != c2) return (c1 > c2);
450
+ s1 = quadrant[i1]; s2 = quadrant[i2];
451
+ if (s1 != s2) return (s1 > s2);
452
+ i1++; i2++;
453
+ /* 8 */
454
+ c1 = block[i1]; c2 = block[i2];
455
+ if (c1 != c2) return (c1 > c2);
456
+ s1 = quadrant[i1]; s2 = quadrant[i2];
457
+ if (s1 != s2) return (s1 > s2);
458
+ i1++; i2++;
459
+
460
+ if (i1 >= nblock) i1 -= nblock;
461
+ if (i2 >= nblock) i2 -= nblock;
462
+
463
+ k -= 8;
464
+ (*budget)--;
465
+ }
466
+ while (k >= 0);
467
+
468
+ return False;
469
+ }
470
+
471
+
472
+ /*---------------------------------------------*/
473
+ /*--
474
+ Knuth's increments seem to work better
475
+ than Incerpi-Sedgewick here. Possibly
476
+ because the number of elems to sort is
477
+ usually small, typically <= 20.
478
+ --*/
479
+ static
480
+ Int32 incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280,
481
+ 9841, 29524, 88573, 265720,
482
+ 797161, 2391484 };
483
+
484
+ static
485
+ void mainSimpleSort ( UInt32* ptr,
486
+ UChar* block,
487
+ UInt16* quadrant,
488
+ Int32 nblock,
489
+ Int32 lo,
490
+ Int32 hi,
491
+ Int32 d,
492
+ Int32* budget )
493
+ {
494
+ Int32 i, j, h, bigN, hp;
495
+ UInt32 v;
496
+
497
+ bigN = hi - lo + 1;
498
+ if (bigN < 2) return;
499
+
500
+ hp = 0;
501
+ while (incs[hp] < bigN) hp++;
502
+ hp--;
503
+
504
+ for (; hp >= 0; hp--) {
505
+ h = incs[hp];
506
+
507
+ i = lo + h;
508
+ while (True) {
509
+
510
+ /*-- copy 1 --*/
511
+ if (i > hi) break;
512
+ v = ptr[i];
513
+ j = i;
514
+ while ( mainGtU (
515
+ ptr[j-h]+d, v+d, block, quadrant, nblock, budget
516
+ ) ) {
517
+ ptr[j] = ptr[j-h];
518
+ j = j - h;
519
+ if (j <= (lo + h - 1)) break;
520
+ }
521
+ ptr[j] = v;
522
+ i++;
523
+
524
+ /*-- copy 2 --*/
525
+ if (i > hi) break;
526
+ v = ptr[i];
527
+ j = i;
528
+ while ( mainGtU (
529
+ ptr[j-h]+d, v+d, block, quadrant, nblock, budget
530
+ ) ) {
531
+ ptr[j] = ptr[j-h];
532
+ j = j - h;
533
+ if (j <= (lo + h - 1)) break;
534
+ }
535
+ ptr[j] = v;
536
+ i++;
537
+
538
+ /*-- copy 3 --*/
539
+ if (i > hi) break;
540
+ v = ptr[i];
541
+ j = i;
542
+ while ( mainGtU (
543
+ ptr[j-h]+d, v+d, block, quadrant, nblock, budget
544
+ ) ) {
545
+ ptr[j] = ptr[j-h];
546
+ j = j - h;
547
+ if (j <= (lo + h - 1)) break;
548
+ }
549
+ ptr[j] = v;
550
+ i++;
551
+
552
+ if (*budget < 0) return;
553
+ }
554
+ }
555
+ }
556
+
557
+
558
+ /*---------------------------------------------*/
559
+ /*--
560
+ The following is an implementation of
561
+ an elegant 3-way quicksort for strings,
562
+ described in a paper "Fast Algorithms for
563
+ Sorting and Searching Strings", by Robert
564
+ Sedgewick and Jon L. Bentley.
565
+ --*/
566
+
567
+ #define mswap(zz1, zz2) \
568
+ { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; }
569
+
570
+ #define mvswap(zzp1, zzp2, zzn) \
571
+ { \
572
+ Int32 yyp1 = (zzp1); \
573
+ Int32 yyp2 = (zzp2); \
574
+ Int32 yyn = (zzn); \
575
+ while (yyn > 0) { \
576
+ mswap(ptr[yyp1], ptr[yyp2]); \
577
+ yyp1++; yyp2++; yyn--; \
578
+ } \
579
+ }
580
+
581
+ static
582
+ __inline__
583
+ UChar mmed3 ( UChar a, UChar b, UChar c )
584
+ {
585
+ UChar t;
586
+ if (a > b) { t = a; a = b; b = t; };
587
+ if (b > c) {
588
+ b = c;
589
+ if (a > b) b = a;
590
+ }
591
+ return b;
592
+ }
593
+
594
+ #define mmin(a,b) ((a) < (b)) ? (a) : (b)
595
+
596
+ #define mpush(lz,hz,dz) { stackLo[sp] = lz; \
597
+ stackHi[sp] = hz; \
598
+ stackD [sp] = dz; \
599
+ sp++; }
600
+
601
+ #define mpop(lz,hz,dz) { sp--; \
602
+ lz = stackLo[sp]; \
603
+ hz = stackHi[sp]; \
604
+ dz = stackD [sp]; }
605
+
606
+
607
+ #define mnextsize(az) (nextHi[az]-nextLo[az])
608
+
609
+ #define mnextswap(az,bz) \
610
+ { Int32 tz; \
611
+ tz = nextLo[az]; nextLo[az] = nextLo[bz]; nextLo[bz] = tz; \
612
+ tz = nextHi[az]; nextHi[az] = nextHi[bz]; nextHi[bz] = tz; \
613
+ tz = nextD [az]; nextD [az] = nextD [bz]; nextD [bz] = tz; }
614
+
615
+
616
+ #define MAIN_QSORT_SMALL_THRESH 20
617
+ #define MAIN_QSORT_DEPTH_THRESH (BZ_N_RADIX + BZ_N_QSORT)
618
+ #define MAIN_QSORT_STACK_SIZE 100
619
+
620
+ static
621
+ void mainQSort3 ( UInt32* ptr,
622
+ UChar* block,
623
+ UInt16* quadrant,
624
+ Int32 nblock,
625
+ Int32 loSt,
626
+ Int32 hiSt,
627
+ Int32 dSt,
628
+ Int32* budget )
629
+ {
630
+ Int32 unLo, unHi, ltLo, gtHi, n, m, med;
631
+ Int32 sp, lo, hi, d;
632
+
633
+ Int32 stackLo[MAIN_QSORT_STACK_SIZE];
634
+ Int32 stackHi[MAIN_QSORT_STACK_SIZE];
635
+ Int32 stackD [MAIN_QSORT_STACK_SIZE];
636
+
637
+ Int32 nextLo[3];
638
+ Int32 nextHi[3];
639
+ Int32 nextD [3];
640
+
641
+ sp = 0;
642
+ mpush ( loSt, hiSt, dSt );
643
+
644
+ while (sp > 0) {
645
+
646
+ AssertH ( sp < MAIN_QSORT_STACK_SIZE - 2, 1001 );
647
+
648
+ mpop ( lo, hi, d );
649
+ if (hi - lo < MAIN_QSORT_SMALL_THRESH ||
650
+ d > MAIN_QSORT_DEPTH_THRESH) {
651
+ mainSimpleSort ( ptr, block, quadrant, nblock, lo, hi, d, budget );
652
+ if (*budget < 0) return;
653
+ continue;
654
+ }
655
+
656
+ med = (Int32)
657
+ mmed3 ( block[ptr[ lo ]+d],
658
+ block[ptr[ hi ]+d],
659
+ block[ptr[ (lo+hi)>>1 ]+d] );
660
+
661
+ unLo = ltLo = lo;
662
+ unHi = gtHi = hi;
663
+
664
+ while (True) {
665
+ while (True) {
666
+ if (unLo > unHi) break;
667
+ n = ((Int32)block[ptr[unLo]+d]) - med;
668
+ if (n == 0) {
669
+ mswap(ptr[unLo], ptr[ltLo]);
670
+ ltLo++; unLo++; continue;
671
+ };
672
+ if (n > 0) break;
673
+ unLo++;
674
+ }
675
+ while (True) {
676
+ if (unLo > unHi) break;
677
+ n = ((Int32)block[ptr[unHi]+d]) - med;
678
+ if (n == 0) {
679
+ mswap(ptr[unHi], ptr[gtHi]);
680
+ gtHi--; unHi--; continue;
681
+ };
682
+ if (n < 0) break;
683
+ unHi--;
684
+ }
685
+ if (unLo > unHi) break;
686
+ mswap(ptr[unLo], ptr[unHi]); unLo++; unHi--;
687
+ }
688
+
689
+ AssertD ( unHi == unLo-1, "mainQSort3(2)" );
690
+
691
+ if (gtHi < ltLo) {
692
+ mpush(lo, hi, d+1 );
693
+ continue;
694
+ }
695
+
696
+ n = mmin(ltLo-lo, unLo-ltLo); mvswap(lo, unLo-n, n);
697
+ m = mmin(hi-gtHi, gtHi-unHi); mvswap(unLo, hi-m+1, m);
698
+
699
+ n = lo + unLo - ltLo - 1;
700
+ m = hi - (gtHi - unHi) + 1;
701
+
702
+ nextLo[0] = lo; nextHi[0] = n; nextD[0] = d;
703
+ nextLo[1] = m; nextHi[1] = hi; nextD[1] = d;
704
+ nextLo[2] = n+1; nextHi[2] = m-1; nextD[2] = d+1;
705
+
706
+ if (mnextsize(0) < mnextsize(1)) mnextswap(0,1);
707
+ if (mnextsize(1) < mnextsize(2)) mnextswap(1,2);
708
+ if (mnextsize(0) < mnextsize(1)) mnextswap(0,1);
709
+
710
+ AssertD (mnextsize(0) >= mnextsize(1), "mainQSort3(8)" );
711
+ AssertD (mnextsize(1) >= mnextsize(2), "mainQSort3(9)" );
712
+
713
+ mpush (nextLo[0], nextHi[0], nextD[0]);
714
+ mpush (nextLo[1], nextHi[1], nextD[1]);
715
+ mpush (nextLo[2], nextHi[2], nextD[2]);
716
+ }
717
+ }
718
+
719
+ #undef mswap
720
+ #undef mvswap
721
+ #undef mpush
722
+ #undef mpop
723
+ #undef mmin
724
+ #undef mnextsize
725
+ #undef mnextswap
726
+ #undef MAIN_QSORT_SMALL_THRESH
727
+ #undef MAIN_QSORT_DEPTH_THRESH
728
+ #undef MAIN_QSORT_STACK_SIZE
729
+
730
+
731
+ /*---------------------------------------------*/
732
+ /* Pre:
733
+ nblock > N_OVERSHOOT
734
+ block32 exists for [0 .. nblock-1 +N_OVERSHOOT]
735
+ ((UChar*)block32) [0 .. nblock-1] holds block
736
+ ptr exists for [0 .. nblock-1]
737
+
738
+ Post:
739
+ ((UChar*)block32) [0 .. nblock-1] holds block
740
+ All other areas of block32 destroyed
741
+ ftab [0 .. 65536 ] destroyed
742
+ ptr [0 .. nblock-1] holds sorted order
743
+ if (*budget < 0), sorting was abandoned
744
+ */
745
+
746
+ #define BIGFREQ(b) (ftab[((b)+1) << 8] - ftab[(b) << 8])
747
+ #define SETMASK (1 << 21)
748
+ #define CLEARMASK (~(SETMASK))
749
+
750
+ static
751
+ void mainSort ( UInt32* ptr,
752
+ UChar* block,
753
+ UInt16* quadrant,
754
+ UInt32* ftab,
755
+ Int32 nblock,
756
+ Int32 verb,
757
+ Int32* budget )
758
+ {
759
+ Int32 i, j, k, ss, sb;
760
+ Int32 runningOrder[256];
761
+ Bool bigDone[256];
762
+ Int32 copyStart[256];
763
+ Int32 copyEnd [256];
764
+ UChar c1;
765
+ Int32 numQSorted;
766
+ UInt16 s;
767
+ if (verb >= 4) VPrintf0 ( " main sort initialise ...\n" );
768
+
769
+ /*-- set up the 2-byte frequency table --*/
770
+ for (i = 65536; i >= 0; i--) ftab[i] = 0;
771
+
772
+ j = block[0] << 8;
773
+ i = nblock-1;
774
+ for (; i >= 3; i -= 4) {
775
+ quadrant[i] = 0;
776
+ j = (j >> 8) | ( ((UInt16)block[i]) << 8);
777
+ ftab[j]++;
778
+ quadrant[i-1] = 0;
779
+ j = (j >> 8) | ( ((UInt16)block[i-1]) << 8);
780
+ ftab[j]++;
781
+ quadrant[i-2] = 0;
782
+ j = (j >> 8) | ( ((UInt16)block[i-2]) << 8);
783
+ ftab[j]++;
784
+ quadrant[i-3] = 0;
785
+ j = (j >> 8) | ( ((UInt16)block[i-3]) << 8);
786
+ ftab[j]++;
787
+ }
788
+ for (; i >= 0; i--) {
789
+ quadrant[i] = 0;
790
+ j = (j >> 8) | ( ((UInt16)block[i]) << 8);
791
+ ftab[j]++;
792
+ }
793
+
794
+ /*-- (emphasises close relationship of block & quadrant) --*/
795
+ for (i = 0; i < BZ_N_OVERSHOOT; i++) {
796
+ block [nblock+i] = block[i];
797
+ quadrant[nblock+i] = 0;
798
+ }
799
+
800
+ if (verb >= 4) VPrintf0 ( " bucket sorting ...\n" );
801
+
802
+ /*-- Complete the initial radix sort --*/
803
+ for (i = 1; i <= 65536; i++) ftab[i] += ftab[i-1];
804
+
805
+ s = block[0] << 8;
806
+ i = nblock-1;
807
+ for (; i >= 3; i -= 4) {
808
+ s = (s >> 8) | (block[i] << 8);
809
+ j = ftab[s] -1;
810
+ ftab[s] = j;
811
+ ptr[j] = i;
812
+ s = (s >> 8) | (block[i-1] << 8);
813
+ j = ftab[s] -1;
814
+ ftab[s] = j;
815
+ ptr[j] = i-1;
816
+ s = (s >> 8) | (block[i-2] << 8);
817
+ j = ftab[s] -1;
818
+ ftab[s] = j;
819
+ ptr[j] = i-2;
820
+ s = (s >> 8) | (block[i-3] << 8);
821
+ j = ftab[s] -1;
822
+ ftab[s] = j;
823
+ ptr[j] = i-3;
824
+ }
825
+ for (; i >= 0; i--) {
826
+ s = (s >> 8) | (block[i] << 8);
827
+ j = ftab[s] -1;
828
+ ftab[s] = j;
829
+ ptr[j] = i;
830
+ }
831
+
832
+ /*--
833
+ Now ftab contains the first loc of every small bucket.
834
+ Calculate the running order, from smallest to largest
835
+ big bucket.
836
+ --*/
837
+ for (i = 0; i <= 255; i++) {
838
+ bigDone [i] = False;
839
+ runningOrder[i] = i;
840
+ }
841
+
842
+ {
843
+ Int32 vv;
844
+ Int32 h = 1;
845
+ do h = 3 * h + 1; while (h <= 256);
846
+ do {
847
+ h = h / 3;
848
+ for (i = h; i <= 255; i++) {
849
+ vv = runningOrder[i];
850
+ j = i;
851
+ while ( BIGFREQ(runningOrder[j-h]) > BIGFREQ(vv) ) {
852
+ runningOrder[j] = runningOrder[j-h];
853
+ j = j - h;
854
+ if (j <= (h - 1)) goto zero;
855
+ }
856
+ zero:
857
+ runningOrder[j] = vv;
858
+ }
859
+ } while (h != 1);
860
+ }
861
+
862
+ /*--
863
+ The main sorting loop.
864
+ --*/
865
+
866
+ numQSorted = 0;
867
+
868
+ for (i = 0; i <= 255; i++) {
869
+
870
+ /*--
871
+ Process big buckets, starting with the least full.
872
+ Basically this is a 3-step process in which we call
873
+ mainQSort3 to sort the small buckets [ss, j], but
874
+ also make a big effort to avoid the calls if we can.
875
+ --*/
876
+ ss = runningOrder[i];
877
+
878
+ /*--
879
+ Step 1:
880
+ Complete the big bucket [ss] by quicksorting
881
+ any unsorted small buckets [ss, j], for j != ss.
882
+ Hopefully previous pointer-scanning phases have already
883
+ completed many of the small buckets [ss, j], so
884
+ we don't have to sort them at all.
885
+ --*/
886
+ for (j = 0; j <= 255; j++) {
887
+ if (j != ss) {
888
+ sb = (ss << 8) + j;
889
+ if ( ! (ftab[sb] & SETMASK) ) {
890
+ Int32 lo = ftab[sb] & CLEARMASK;
891
+ Int32 hi = (ftab[sb+1] & CLEARMASK) - 1;
892
+ if (hi > lo) {
893
+ if (verb >= 4)
894
+ VPrintf4 ( " qsort [0x%x, 0x%x] "
895
+ "done %d this %d\n",
896
+ ss, j, numQSorted, hi - lo + 1 );
897
+ mainQSort3 (
898
+ ptr, block, quadrant, nblock,
899
+ lo, hi, BZ_N_RADIX, budget
900
+ );
901
+ numQSorted += (hi - lo + 1);
902
+ if (*budget < 0) return;
903
+ }
904
+ }
905
+ ftab[sb] |= SETMASK;
906
+ }
907
+ }
908
+
909
+ AssertH ( !bigDone[ss], 1006 );
910
+
911
+ /*--
912
+ Step 2:
913
+ Now scan this big bucket [ss] so as to synthesise the
914
+ sorted order for small buckets [t, ss] for all t,
915
+ including, magically, the bucket [ss,ss] too.
916
+ This will avoid doing Real Work in subsequent Step 1's.
917
+ --*/
918
+ {
919
+ for (j = 0; j <= 255; j++) {
920
+ copyStart[j] = ftab[(j << 8) + ss] & CLEARMASK;
921
+ copyEnd [j] = (ftab[(j << 8) + ss + 1] & CLEARMASK) - 1;
922
+ }
923
+ for (j = ftab[ss << 8] & CLEARMASK; j < copyStart[ss]; j++) {
924
+ k = ptr[j]-1; if (k < 0) k += nblock;
925
+ c1 = block[k];
926
+ if (!bigDone[c1])
927
+ ptr[ copyStart[c1]++ ] = k;
928
+ }
929
+ for (j = (ftab[(ss+1) << 8] & CLEARMASK) - 1; j > copyEnd[ss]; j--) {
930
+ k = ptr[j]-1; if (k < 0) k += nblock;
931
+ c1 = block[k];
932
+ if (!bigDone[c1])
933
+ ptr[ copyEnd[c1]-- ] = k;
934
+ }
935
+ }
936
+
937
+ AssertH ( (copyStart[ss]-1 == copyEnd[ss])
938
+ ||
939
+ /* Extremely rare case missing in bzip2-1.0.0 and 1.0.1.
940
+ Necessity for this case is demonstrated by compressing
941
+ a sequence of approximately 48.5 million of character
942
+ 251; 1.0.0/1.0.1 will then die here. */
943
+ (copyStart[ss] == 0 && copyEnd[ss] == nblock-1),
944
+ 1007 )
945
+
946
+ for (j = 0; j <= 255; j++) ftab[(j << 8) + ss] |= SETMASK;
947
+
948
+ /*--
949
+ Step 3:
950
+ The [ss] big bucket is now done. Record this fact,
951
+ and update the quadrant descriptors. Remember to
952
+ update quadrants in the overshoot area too, if
953
+ necessary. The "if (i < 255)" test merely skips
954
+ this updating for the last bucket processed, since
955
+ updating for the last bucket is pointless.
956
+
957
+ The quadrant array provides a way to incrementally
958
+ cache sort orderings, as they appear, so as to
959
+ make subsequent comparisons in fullGtU() complete
960
+ faster. For repetitive blocks this makes a big
961
+ difference (but not big enough to be able to avoid
962
+ the fallback sorting mechanism, exponential radix sort).
963
+
964
+ The precise meaning is: at all times:
965
+
966
+ for 0 <= i < nblock and 0 <= j <= nblock
967
+
968
+ if block[i] != block[j],
969
+
970
+ then the relative values of quadrant[i] and
971
+ quadrant[j] are meaningless.
972
+
973
+ else {
974
+ if quadrant[i] < quadrant[j]
975
+ then the string starting at i lexicographically
976
+ precedes the string starting at j
977
+
978
+ else if quadrant[i] > quadrant[j]
979
+ then the string starting at j lexicographically
980
+ precedes the string starting at i
981
+
982
+ else
983
+ the relative ordering of the strings starting
984
+ at i and j has not yet been determined.
985
+ }
986
+ --*/
987
+ bigDone[ss] = True;
988
+
989
+ if (i < 255) {
990
+ Int32 bbStart = ftab[ss << 8] & CLEARMASK;
991
+ Int32 bbSize = (ftab[(ss+1) << 8] & CLEARMASK) - bbStart;
992
+ Int32 shifts = 0;
993
+
994
+ while ((bbSize >> shifts) > 65534) shifts++;
995
+
996
+ for (j = bbSize-1; j >= 0; j--) {
997
+ Int32 a2update = ptr[bbStart + j];
998
+ UInt16 qVal = (UInt16)(j >> shifts);
999
+ quadrant[a2update] = qVal;
1000
+ if (a2update < BZ_N_OVERSHOOT)
1001
+ quadrant[a2update + nblock] = qVal;
1002
+ }
1003
+ AssertH ( ((bbSize-1) >> shifts) <= 65535, 1002 );
1004
+ }
1005
+
1006
+ }
1007
+
1008
+ if (verb >= 4)
1009
+ VPrintf3 ( " %d pointers, %d sorted, %d scanned\n",
1010
+ nblock, numQSorted, nblock - numQSorted );
1011
+ }
1012
+
1013
+ #undef BIGFREQ
1014
+ #undef SETMASK
1015
+ #undef CLEARMASK
1016
+
1017
+
1018
+ /*---------------------------------------------*/
1019
+ /* Pre:
1020
+ nblock > 0
1021
+ arr2 exists for [0 .. nblock-1 +N_OVERSHOOT]
1022
+ ((UChar*)arr2) [0 .. nblock-1] holds block
1023
+ arr1 exists for [0 .. nblock-1]
1024
+
1025
+ Post:
1026
+ ((UChar*)arr2) [0 .. nblock-1] holds block
1027
+ All other areas of block destroyed
1028
+ ftab [ 0 .. 65536 ] destroyed
1029
+ arr1 [0 .. nblock-1] holds sorted order
1030
+ */
1031
+ void BZ2_blockSort ( EState* s )
1032
+ {
1033
+ UInt32* ptr = s->ptr;
1034
+ UChar* block = s->block;
1035
+ UInt32* ftab = s->ftab;
1036
+ Int32 nblock = s->nblock;
1037
+ Int32 verb = s->verbosity;
1038
+ Int32 wfact = s->workFactor;
1039
+ UInt16* quadrant;
1040
+ Int32 budget;
1041
+ Int32 budgetInit;
1042
+ Int32 i;
1043
+
1044
+ if (nblock < 10000) {
1045
+ fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb );
1046
+ } else {
1047
+ /* Calculate the location for quadrant, remembering to get
1048
+ the alignment right. Assumes that &(block[0]) is at least
1049
+ 2-byte aligned -- this should be ok since block is really
1050
+ the first section of arr2.
1051
+ */
1052
+ i = nblock+BZ_N_OVERSHOOT;
1053
+ if (i & 1) i++;
1054
+ quadrant = (UInt16*)(&(block[i]));
1055
+
1056
+ /* (wfact-1) / 3 puts the default-factor-30
1057
+ transition point at very roughly the same place as
1058
+ with v0.1 and v0.9.0.
1059
+ Not that it particularly matters any more, since the
1060
+ resulting compressed stream is now the same regardless
1061
+ of whether or not we use the main sort or fallback sort.
1062
+ */
1063
+ if (wfact < 1 ) wfact = 1;
1064
+ if (wfact > 100) wfact = 100;
1065
+ budgetInit = nblock * ((wfact-1) / 3);
1066
+ budget = budgetInit;
1067
+
1068
+ mainSort ( ptr, block, quadrant, ftab, nblock, verb, &budget );
1069
+ if (verb >= 3)
1070
+ VPrintf3 ( " %d work, %d block, ratio %5.2f\n",
1071
+ budgetInit - budget,
1072
+ nblock,
1073
+ (float)(budgetInit - budget) /
1074
+ (float)(nblock==0 ? 1 : nblock) );
1075
+ if (budget < 0) {
1076
+ if (verb >= 2)
1077
+ VPrintf0 ( " too repetitive; using fallback"
1078
+ " sorting algorithm\n" );
1079
+ fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb );
1080
+ }
1081
+ }
1082
+
1083
+ s->origPtr = -1;
1084
+ for (i = 0; i < s->nblock; i++)
1085
+ if (ptr[i] == 0)
1086
+ { s->origPtr = i; break; };
1087
+
1088
+ AssertH( s->origPtr != -1, 1003 );
1089
+ }
1090
+
1091
+
1092
+ /*-------------------------------------------------------------*/
1093
+ /*--- end blocksort.c ---*/
1094
+ /*-------------------------------------------------------------*/