zstd-ruby 1.5.0.0 → 1.5.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/ruby.yml +2 -2
- data/README.md +1 -1
- data/ext/zstdruby/extconf.rb +1 -0
- data/ext/zstdruby/libzstd/Makefile +50 -175
- data/ext/zstdruby/libzstd/README.md +7 -1
- data/ext/zstdruby/libzstd/common/bitstream.h +24 -9
- data/ext/zstdruby/libzstd/common/compiler.h +89 -43
- data/ext/zstdruby/libzstd/common/entropy_common.c +11 -5
- data/ext/zstdruby/libzstd/common/error_private.h +79 -0
- data/ext/zstdruby/libzstd/common/fse.h +2 -1
- data/ext/zstdruby/libzstd/common/fse_decompress.c +1 -1
- data/ext/zstdruby/libzstd/common/huf.h +24 -22
- data/ext/zstdruby/libzstd/common/mem.h +18 -0
- data/ext/zstdruby/libzstd/common/portability_macros.h +131 -0
- data/ext/zstdruby/libzstd/common/xxhash.c +5 -805
- data/ext/zstdruby/libzstd/common/xxhash.h +5568 -167
- data/ext/zstdruby/libzstd/common/zstd_internal.h +92 -88
- data/ext/zstdruby/libzstd/common/zstd_trace.h +12 -3
- data/ext/zstdruby/libzstd/compress/clevels.h +134 -0
- data/ext/zstdruby/libzstd/compress/fse_compress.c +63 -27
- data/ext/zstdruby/libzstd/compress/huf_compress.c +537 -104
- data/ext/zstdruby/libzstd/compress/zstd_compress.c +194 -278
- data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +102 -44
- data/ext/zstdruby/libzstd/compress/zstd_compress_literals.c +4 -3
- data/ext/zstdruby/libzstd/compress/zstd_compress_literals.h +3 -1
- data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.c +5 -4
- data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.c +3 -2
- data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +3 -3
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +289 -114
- data/ext/zstdruby/libzstd/compress/zstd_fast.c +302 -123
- data/ext/zstdruby/libzstd/compress/zstd_lazy.c +418 -502
- data/ext/zstdruby/libzstd/compress/zstd_ldm.c +4 -4
- data/ext/zstdruby/libzstd/compress/zstd_ldm.h +1 -1
- data/ext/zstdruby/libzstd/compress/zstd_ldm_geartab.h +4 -1
- data/ext/zstdruby/libzstd/compress/zstd_opt.c +186 -108
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +59 -29
- data/ext/zstdruby/libzstd/decompress/huf_decompress.c +727 -189
- data/ext/zstdruby/libzstd/decompress/huf_decompress_amd64.S +571 -0
- data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +85 -22
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +744 -220
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +8 -2
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +34 -3
- data/ext/zstdruby/libzstd/deprecated/zbuff_compress.c +23 -3
- data/ext/zstdruby/libzstd/dictBuilder/cover.c +9 -2
- data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +11 -4
- data/ext/zstdruby/libzstd/dictBuilder/zdict.c +99 -28
- data/ext/zstdruby/libzstd/legacy/zstd_v01.c +2 -6
- data/ext/zstdruby/libzstd/legacy/zstd_v02.c +3 -7
- data/ext/zstdruby/libzstd/legacy/zstd_v03.c +3 -7
- data/ext/zstdruby/libzstd/legacy/zstd_v04.c +3 -7
- data/ext/zstdruby/libzstd/legacy/zstd_v05.c +3 -7
- data/ext/zstdruby/libzstd/legacy/zstd_v06.c +3 -7
- data/ext/zstdruby/libzstd/legacy/zstd_v07.c +3 -7
- data/ext/zstdruby/libzstd/libzstd.mk +185 -0
- data/ext/zstdruby/libzstd/libzstd.pc.in +1 -0
- data/ext/zstdruby/libzstd/modulemap/module.modulemap +4 -0
- data/ext/zstdruby/libzstd/zdict.h +4 -4
- data/ext/zstdruby/libzstd/zstd.h +179 -136
- data/ext/zstdruby/zstdruby.c +2 -2
- data/lib/zstd-ruby/version.rb +1 -1
- metadata +8 -3
@@ -53,6 +53,28 @@ unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxS
|
|
53
53
|
/* *******************************************************
|
54
54
|
* HUF : Huffman block compression
|
55
55
|
*********************************************************/
|
56
|
+
#define HUF_WORKSPACE_MAX_ALIGNMENT 8
|
57
|
+
|
58
|
+
static void* HUF_alignUpWorkspace(void* workspace, size_t* workspaceSizePtr, size_t align)
|
59
|
+
{
|
60
|
+
size_t const mask = align - 1;
|
61
|
+
size_t const rem = (size_t)workspace & mask;
|
62
|
+
size_t const add = (align - rem) & mask;
|
63
|
+
BYTE* const aligned = (BYTE*)workspace + add;
|
64
|
+
assert((align & (align - 1)) == 0); /* pow 2 */
|
65
|
+
assert(align <= HUF_WORKSPACE_MAX_ALIGNMENT);
|
66
|
+
if (*workspaceSizePtr >= add) {
|
67
|
+
assert(add < align);
|
68
|
+
assert(((size_t)aligned & mask) == 0);
|
69
|
+
*workspaceSizePtr -= add;
|
70
|
+
return aligned;
|
71
|
+
} else {
|
72
|
+
*workspaceSizePtr = 0;
|
73
|
+
return NULL;
|
74
|
+
}
|
75
|
+
}
|
76
|
+
|
77
|
+
|
56
78
|
/* HUF_compressWeights() :
|
57
79
|
* Same as FSE_compress(), but dedicated to huff0's weights compression.
|
58
80
|
* The use case needs much less stack memory.
|
@@ -75,7 +97,7 @@ static size_t HUF_compressWeights(void* dst, size_t dstSize, const void* weightT
|
|
75
97
|
|
76
98
|
unsigned maxSymbolValue = HUF_TABLELOG_MAX;
|
77
99
|
U32 tableLog = MAX_FSE_TABLELOG_FOR_HUFF_HEADER;
|
78
|
-
HUF_CompressWeightsWksp* wksp = (HUF_CompressWeightsWksp*)workspace;
|
100
|
+
HUF_CompressWeightsWksp* wksp = (HUF_CompressWeightsWksp*)HUF_alignUpWorkspace(workspace, &workspaceSize, ZSTD_ALIGNOF(U32));
|
79
101
|
|
80
102
|
if (workspaceSize < sizeof(HUF_CompressWeightsWksp)) return ERROR(GENERIC);
|
81
103
|
|
@@ -106,6 +128,40 @@ static size_t HUF_compressWeights(void* dst, size_t dstSize, const void* weightT
|
|
106
128
|
return (size_t)(op-ostart);
|
107
129
|
}
|
108
130
|
|
131
|
+
static size_t HUF_getNbBits(HUF_CElt elt)
|
132
|
+
{
|
133
|
+
return elt & 0xFF;
|
134
|
+
}
|
135
|
+
|
136
|
+
static size_t HUF_getNbBitsFast(HUF_CElt elt)
|
137
|
+
{
|
138
|
+
return elt;
|
139
|
+
}
|
140
|
+
|
141
|
+
static size_t HUF_getValue(HUF_CElt elt)
|
142
|
+
{
|
143
|
+
return elt & ~0xFF;
|
144
|
+
}
|
145
|
+
|
146
|
+
static size_t HUF_getValueFast(HUF_CElt elt)
|
147
|
+
{
|
148
|
+
return elt;
|
149
|
+
}
|
150
|
+
|
151
|
+
static void HUF_setNbBits(HUF_CElt* elt, size_t nbBits)
|
152
|
+
{
|
153
|
+
assert(nbBits <= HUF_TABLELOG_ABSOLUTEMAX);
|
154
|
+
*elt = nbBits;
|
155
|
+
}
|
156
|
+
|
157
|
+
static void HUF_setValue(HUF_CElt* elt, size_t value)
|
158
|
+
{
|
159
|
+
size_t const nbBits = HUF_getNbBits(*elt);
|
160
|
+
if (nbBits > 0) {
|
161
|
+
assert((value >> nbBits) == 0);
|
162
|
+
*elt |= value << (sizeof(HUF_CElt) * 8 - nbBits);
|
163
|
+
}
|
164
|
+
}
|
109
165
|
|
110
166
|
typedef struct {
|
111
167
|
HUF_CompressWeightsWksp wksp;
|
@@ -117,9 +173,10 @@ size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize,
|
|
117
173
|
const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog,
|
118
174
|
void* workspace, size_t workspaceSize)
|
119
175
|
{
|
176
|
+
HUF_CElt const* const ct = CTable + 1;
|
120
177
|
BYTE* op = (BYTE*)dst;
|
121
178
|
U32 n;
|
122
|
-
HUF_WriteCTableWksp* wksp = (HUF_WriteCTableWksp*)workspace;
|
179
|
+
HUF_WriteCTableWksp* wksp = (HUF_WriteCTableWksp*)HUF_alignUpWorkspace(workspace, &workspaceSize, ZSTD_ALIGNOF(U32));
|
123
180
|
|
124
181
|
/* check conditions */
|
125
182
|
if (workspaceSize < sizeof(HUF_WriteCTableWksp)) return ERROR(GENERIC);
|
@@ -130,9 +187,10 @@ size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize,
|
|
130
187
|
for (n=1; n<huffLog+1; n++)
|
131
188
|
wksp->bitsToWeight[n] = (BYTE)(huffLog + 1 - n);
|
132
189
|
for (n=0; n<maxSymbolValue; n++)
|
133
|
-
wksp->huffWeight[n] = wksp->bitsToWeight[
|
190
|
+
wksp->huffWeight[n] = wksp->bitsToWeight[HUF_getNbBits(ct[n])];
|
134
191
|
|
135
192
|
/* attempt weights compression by FSE */
|
193
|
+
if (maxDstSize < 1) return ERROR(dstSize_tooSmall);
|
136
194
|
{ CHECK_V_F(hSize, HUF_compressWeights(op+1, maxDstSize-1, wksp->huffWeight, maxSymbolValue, &wksp->wksp, sizeof(wksp->wksp)) );
|
137
195
|
if ((hSize>1) & (hSize < maxSymbolValue/2)) { /* FSE compressed */
|
138
196
|
op[0] = (BYTE)hSize;
|
@@ -166,6 +224,7 @@ size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void
|
|
166
224
|
U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1]; /* large enough for values from 0 to 16 */
|
167
225
|
U32 tableLog = 0;
|
168
226
|
U32 nbSymbols = 0;
|
227
|
+
HUF_CElt* const ct = CTable + 1;
|
169
228
|
|
170
229
|
/* get symbol weights */
|
171
230
|
CHECK_V_F(readSize, HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX+1, rankVal, &nbSymbols, &tableLog, src, srcSize));
|
@@ -175,6 +234,8 @@ size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void
|
|
175
234
|
if (tableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
|
176
235
|
if (nbSymbols > *maxSymbolValuePtr+1) return ERROR(maxSymbolValue_tooSmall);
|
177
236
|
|
237
|
+
CTable[0] = tableLog;
|
238
|
+
|
178
239
|
/* Prepare base value per rank */
|
179
240
|
{ U32 n, nextRankStart = 0;
|
180
241
|
for (n=1; n<=tableLog; n++) {
|
@@ -186,13 +247,13 @@ size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void
|
|
186
247
|
/* fill nbBits */
|
187
248
|
{ U32 n; for (n=0; n<nbSymbols; n++) {
|
188
249
|
const U32 w = huffWeight[n];
|
189
|
-
|
250
|
+
HUF_setNbBits(ct + n, (BYTE)(tableLog + 1 - w) & -(w != 0));
|
190
251
|
} }
|
191
252
|
|
192
253
|
/* fill val */
|
193
254
|
{ U16 nbPerRank[HUF_TABLELOG_MAX+2] = {0}; /* support w=0=>n=tableLog+1 */
|
194
255
|
U16 valPerRank[HUF_TABLELOG_MAX+2] = {0};
|
195
|
-
{ U32 n; for (n=0; n<nbSymbols; n++) nbPerRank[
|
256
|
+
{ U32 n; for (n=0; n<nbSymbols; n++) nbPerRank[HUF_getNbBits(ct[n])]++; }
|
196
257
|
/* determine stating value per rank */
|
197
258
|
valPerRank[tableLog+1] = 0; /* for w==0 */
|
198
259
|
{ U16 min = 0;
|
@@ -202,18 +263,18 @@ size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void
|
|
202
263
|
min >>= 1;
|
203
264
|
} }
|
204
265
|
/* assign value within rank, symbol order */
|
205
|
-
{ U32 n; for (n=0; n<nbSymbols; n++)
|
266
|
+
{ U32 n; for (n=0; n<nbSymbols; n++) HUF_setValue(ct + n, valPerRank[HUF_getNbBits(ct[n])]++); }
|
206
267
|
}
|
207
268
|
|
208
269
|
*maxSymbolValuePtr = nbSymbols - 1;
|
209
270
|
return readSize;
|
210
271
|
}
|
211
272
|
|
212
|
-
U32
|
273
|
+
U32 HUF_getNbBitsFromCTable(HUF_CElt const* CTable, U32 symbolValue)
|
213
274
|
{
|
214
|
-
const HUF_CElt*
|
275
|
+
const HUF_CElt* ct = CTable + 1;
|
215
276
|
assert(symbolValue <= HUF_SYMBOLVALUE_MAX);
|
216
|
-
return
|
277
|
+
return (U32)HUF_getNbBits(ct[symbolValue]);
|
217
278
|
}
|
218
279
|
|
219
280
|
|
@@ -367,22 +428,118 @@ static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits)
|
|
367
428
|
}
|
368
429
|
|
369
430
|
typedef struct {
|
370
|
-
|
371
|
-
|
431
|
+
U16 base;
|
432
|
+
U16 curr;
|
372
433
|
} rankPos;
|
373
434
|
|
374
435
|
typedef nodeElt huffNodeTable[HUF_CTABLE_WORKSPACE_SIZE_U32];
|
375
436
|
|
376
|
-
|
437
|
+
/* Number of buckets available for HUF_sort() */
|
438
|
+
#define RANK_POSITION_TABLE_SIZE 192
|
377
439
|
|
378
440
|
typedef struct {
|
379
441
|
huffNodeTable huffNodeTbl;
|
380
442
|
rankPos rankPosition[RANK_POSITION_TABLE_SIZE];
|
381
443
|
} HUF_buildCTable_wksp_tables;
|
382
444
|
|
445
|
+
/* RANK_POSITION_DISTINCT_COUNT_CUTOFF == Cutoff point in HUF_sort() buckets for which we use log2 bucketing.
|
446
|
+
* Strategy is to use as many buckets as possible for representing distinct
|
447
|
+
* counts while using the remainder to represent all "large" counts.
|
448
|
+
*
|
449
|
+
* To satisfy this requirement for 192 buckets, we can do the following:
|
450
|
+
* Let buckets 0-166 represent distinct counts of [0, 166]
|
451
|
+
* Let buckets 166 to 192 represent all remaining counts up to RANK_POSITION_MAX_COUNT_LOG using log2 bucketing.
|
452
|
+
*/
|
453
|
+
#define RANK_POSITION_MAX_COUNT_LOG 32
|
454
|
+
#define RANK_POSITION_LOG_BUCKETS_BEGIN (RANK_POSITION_TABLE_SIZE - 1) - RANK_POSITION_MAX_COUNT_LOG - 1 /* == 158 */
|
455
|
+
#define RANK_POSITION_DISTINCT_COUNT_CUTOFF RANK_POSITION_LOG_BUCKETS_BEGIN + BIT_highbit32(RANK_POSITION_LOG_BUCKETS_BEGIN) /* == 166 */
|
456
|
+
|
457
|
+
/* Return the appropriate bucket index for a given count. See definition of
|
458
|
+
* RANK_POSITION_DISTINCT_COUNT_CUTOFF for explanation of bucketing strategy.
|
459
|
+
*/
|
460
|
+
static U32 HUF_getIndex(U32 const count) {
|
461
|
+
return (count < RANK_POSITION_DISTINCT_COUNT_CUTOFF)
|
462
|
+
? count
|
463
|
+
: BIT_highbit32(count) + RANK_POSITION_LOG_BUCKETS_BEGIN;
|
464
|
+
}
|
465
|
+
|
466
|
+
/* Helper swap function for HUF_quickSortPartition() */
|
467
|
+
static void HUF_swapNodes(nodeElt* a, nodeElt* b) {
|
468
|
+
nodeElt tmp = *a;
|
469
|
+
*a = *b;
|
470
|
+
*b = tmp;
|
471
|
+
}
|
472
|
+
|
473
|
+
/* Returns 0 if the huffNode array is not sorted by descending count */
|
474
|
+
MEM_STATIC int HUF_isSorted(nodeElt huffNode[], U32 const maxSymbolValue1) {
|
475
|
+
U32 i;
|
476
|
+
for (i = 1; i < maxSymbolValue1; ++i) {
|
477
|
+
if (huffNode[i].count > huffNode[i-1].count) {
|
478
|
+
return 0;
|
479
|
+
}
|
480
|
+
}
|
481
|
+
return 1;
|
482
|
+
}
|
483
|
+
|
484
|
+
/* Insertion sort by descending order */
|
485
|
+
HINT_INLINE void HUF_insertionSort(nodeElt huffNode[], int const low, int const high) {
|
486
|
+
int i;
|
487
|
+
int const size = high-low+1;
|
488
|
+
huffNode += low;
|
489
|
+
for (i = 1; i < size; ++i) {
|
490
|
+
nodeElt const key = huffNode[i];
|
491
|
+
int j = i - 1;
|
492
|
+
while (j >= 0 && huffNode[j].count < key.count) {
|
493
|
+
huffNode[j + 1] = huffNode[j];
|
494
|
+
j--;
|
495
|
+
}
|
496
|
+
huffNode[j + 1] = key;
|
497
|
+
}
|
498
|
+
}
|
499
|
+
|
500
|
+
/* Pivot helper function for quicksort. */
|
501
|
+
static int HUF_quickSortPartition(nodeElt arr[], int const low, int const high) {
|
502
|
+
/* Simply select rightmost element as pivot. "Better" selectors like
|
503
|
+
* median-of-three don't experimentally appear to have any benefit.
|
504
|
+
*/
|
505
|
+
U32 const pivot = arr[high].count;
|
506
|
+
int i = low - 1;
|
507
|
+
int j = low;
|
508
|
+
for ( ; j < high; j++) {
|
509
|
+
if (arr[j].count > pivot) {
|
510
|
+
i++;
|
511
|
+
HUF_swapNodes(&arr[i], &arr[j]);
|
512
|
+
}
|
513
|
+
}
|
514
|
+
HUF_swapNodes(&arr[i + 1], &arr[high]);
|
515
|
+
return i + 1;
|
516
|
+
}
|
517
|
+
|
518
|
+
/* Classic quicksort by descending with partially iterative calls
|
519
|
+
* to reduce worst case callstack size.
|
520
|
+
*/
|
521
|
+
static void HUF_simpleQuickSort(nodeElt arr[], int low, int high) {
|
522
|
+
int const kInsertionSortThreshold = 8;
|
523
|
+
if (high - low < kInsertionSortThreshold) {
|
524
|
+
HUF_insertionSort(arr, low, high);
|
525
|
+
return;
|
526
|
+
}
|
527
|
+
while (low < high) {
|
528
|
+
int const idx = HUF_quickSortPartition(arr, low, high);
|
529
|
+
if (idx - low < high - idx) {
|
530
|
+
HUF_simpleQuickSort(arr, low, idx - 1);
|
531
|
+
low = idx + 1;
|
532
|
+
} else {
|
533
|
+
HUF_simpleQuickSort(arr, idx + 1, high);
|
534
|
+
high = idx - 1;
|
535
|
+
}
|
536
|
+
}
|
537
|
+
}
|
538
|
+
|
383
539
|
/**
|
384
540
|
* HUF_sort():
|
385
541
|
* Sorts the symbols [0, maxSymbolValue] by count[symbol] in decreasing order.
|
542
|
+
* This is a typical bucket sorting strategy that uses either quicksort or insertion sort to sort each bucket.
|
386
543
|
*
|
387
544
|
* @param[out] huffNode Sorted symbols by decreasing count. Only members `.count` and `.byte` are filled.
|
388
545
|
* Must have (maxSymbolValue + 1) entries.
|
@@ -390,44 +547,52 @@ typedef struct {
|
|
390
547
|
* @param[in] maxSymbolValue Maximum symbol value.
|
391
548
|
* @param rankPosition This is a scratch workspace. Must have RANK_POSITION_TABLE_SIZE entries.
|
392
549
|
*/
|
393
|
-
static void HUF_sort(nodeElt
|
394
|
-
|
395
|
-
|
396
|
-
int const maxSymbolValue1 = (int)maxSymbolValue + 1;
|
550
|
+
static void HUF_sort(nodeElt huffNode[], const unsigned count[], U32 const maxSymbolValue, rankPos rankPosition[]) {
|
551
|
+
U32 n;
|
552
|
+
U32 const maxSymbolValue1 = maxSymbolValue+1;
|
397
553
|
|
398
554
|
/* Compute base and set curr to base.
|
399
|
-
* For symbol s let lowerRank =
|
400
|
-
*
|
555
|
+
* For symbol s let lowerRank = HUF_getIndex(count[n]) and rank = lowerRank + 1.
|
556
|
+
* See HUF_getIndex to see bucketing strategy.
|
401
557
|
* We attribute each symbol to lowerRank's base value, because we want to know where
|
402
558
|
* each rank begins in the output, so for rank R we want to count ranks R+1 and above.
|
403
559
|
*/
|
404
560
|
ZSTD_memset(rankPosition, 0, sizeof(*rankPosition) * RANK_POSITION_TABLE_SIZE);
|
405
561
|
for (n = 0; n < maxSymbolValue1; ++n) {
|
406
|
-
U32 lowerRank =
|
562
|
+
U32 lowerRank = HUF_getIndex(count[n]);
|
563
|
+
assert(lowerRank < RANK_POSITION_TABLE_SIZE - 1);
|
407
564
|
rankPosition[lowerRank].base++;
|
408
565
|
}
|
566
|
+
|
409
567
|
assert(rankPosition[RANK_POSITION_TABLE_SIZE - 1].base == 0);
|
568
|
+
/* Set up the rankPosition table */
|
410
569
|
for (n = RANK_POSITION_TABLE_SIZE - 1; n > 0; --n) {
|
411
570
|
rankPosition[n-1].base += rankPosition[n].base;
|
412
571
|
rankPosition[n-1].curr = rankPosition[n-1].base;
|
413
572
|
}
|
414
|
-
|
573
|
+
|
574
|
+
/* Insert each symbol into their appropriate bucket, setting up rankPosition table. */
|
415
575
|
for (n = 0; n < maxSymbolValue1; ++n) {
|
416
576
|
U32 const c = count[n];
|
417
|
-
U32 const r =
|
418
|
-
U32 pos = rankPosition[r].curr++;
|
419
|
-
|
420
|
-
* We have at most 256 symbols, so this insertion should be fine.
|
421
|
-
*/
|
422
|
-
while ((pos > rankPosition[r].base) && (c > huffNode[pos-1].count)) {
|
423
|
-
huffNode[pos] = huffNode[pos-1];
|
424
|
-
pos--;
|
425
|
-
}
|
577
|
+
U32 const r = HUF_getIndex(c) + 1;
|
578
|
+
U32 const pos = rankPosition[r].curr++;
|
579
|
+
assert(pos < maxSymbolValue1);
|
426
580
|
huffNode[pos].count = c;
|
427
581
|
huffNode[pos].byte = (BYTE)n;
|
428
582
|
}
|
429
|
-
}
|
430
583
|
|
584
|
+
/* Sort each bucket. */
|
585
|
+
for (n = RANK_POSITION_DISTINCT_COUNT_CUTOFF; n < RANK_POSITION_TABLE_SIZE - 1; ++n) {
|
586
|
+
U32 const bucketSize = rankPosition[n].curr-rankPosition[n].base;
|
587
|
+
U32 const bucketStartIdx = rankPosition[n].base;
|
588
|
+
if (bucketSize > 1) {
|
589
|
+
assert(bucketStartIdx < maxSymbolValue1);
|
590
|
+
HUF_simpleQuickSort(huffNode + bucketStartIdx, 0, bucketSize-1);
|
591
|
+
}
|
592
|
+
}
|
593
|
+
|
594
|
+
assert(HUF_isSorted(huffNode, maxSymbolValue1));
|
595
|
+
}
|
431
596
|
|
432
597
|
/** HUF_buildCTable_wksp() :
|
433
598
|
* Same as HUF_buildCTable(), but using externally allocated scratch buffer.
|
@@ -490,6 +655,7 @@ static int HUF_buildTree(nodeElt* huffNode, U32 maxSymbolValue)
|
|
490
655
|
*/
|
491
656
|
static void HUF_buildCTableFromTree(HUF_CElt* CTable, nodeElt const* huffNode, int nonNullRank, U32 maxSymbolValue, U32 maxNbBits)
|
492
657
|
{
|
658
|
+
HUF_CElt* const ct = CTable + 1;
|
493
659
|
/* fill result into ctable (val, nbBits) */
|
494
660
|
int n;
|
495
661
|
U16 nbPerRank[HUF_TABLELOG_MAX+1] = {0};
|
@@ -505,20 +671,20 @@ static void HUF_buildCTableFromTree(HUF_CElt* CTable, nodeElt const* huffNode, i
|
|
505
671
|
min >>= 1;
|
506
672
|
} }
|
507
673
|
for (n=0; n<alphabetSize; n++)
|
508
|
-
|
674
|
+
HUF_setNbBits(ct + huffNode[n].byte, huffNode[n].nbBits); /* push nbBits per symbol, symbol order */
|
509
675
|
for (n=0; n<alphabetSize; n++)
|
510
|
-
|
676
|
+
HUF_setValue(ct + n, valPerRank[HUF_getNbBits(ct[n])]++); /* assign value within rank, symbol order */
|
677
|
+
CTable[0] = maxNbBits;
|
511
678
|
}
|
512
679
|
|
513
|
-
size_t HUF_buildCTable_wksp (HUF_CElt*
|
680
|
+
size_t HUF_buildCTable_wksp (HUF_CElt* CTable, const unsigned* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize)
|
514
681
|
{
|
515
|
-
HUF_buildCTable_wksp_tables* const wksp_tables = (HUF_buildCTable_wksp_tables*)workSpace;
|
682
|
+
HUF_buildCTable_wksp_tables* const wksp_tables = (HUF_buildCTable_wksp_tables*)HUF_alignUpWorkspace(workSpace, &wkspSize, ZSTD_ALIGNOF(U32));
|
516
683
|
nodeElt* const huffNode0 = wksp_tables->huffNodeTbl;
|
517
684
|
nodeElt* const huffNode = huffNode0+1;
|
518
685
|
int nonNullRank;
|
519
686
|
|
520
687
|
/* safety checks */
|
521
|
-
if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */
|
522
688
|
if (wkspSize < sizeof(HUF_buildCTable_wksp_tables))
|
523
689
|
return ERROR(workSpace_tooSmall);
|
524
690
|
if (maxNbBits == 0) maxNbBits = HUF_TABLELOG_DEFAULT;
|
@@ -536,96 +702,334 @@ size_t HUF_buildCTable_wksp (HUF_CElt* tree, const unsigned* count, U32 maxSymbo
|
|
536
702
|
maxNbBits = HUF_setMaxHeight(huffNode, (U32)nonNullRank, maxNbBits);
|
537
703
|
if (maxNbBits > HUF_TABLELOG_MAX) return ERROR(GENERIC); /* check fit into table */
|
538
704
|
|
539
|
-
HUF_buildCTableFromTree(
|
705
|
+
HUF_buildCTableFromTree(CTable, huffNode, nonNullRank, maxSymbolValue, maxNbBits);
|
540
706
|
|
541
707
|
return maxNbBits;
|
542
708
|
}
|
543
709
|
|
544
710
|
size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue)
|
545
711
|
{
|
712
|
+
HUF_CElt const* ct = CTable + 1;
|
546
713
|
size_t nbBits = 0;
|
547
714
|
int s;
|
548
715
|
for (s = 0; s <= (int)maxSymbolValue; ++s) {
|
549
|
-
nbBits +=
|
716
|
+
nbBits += HUF_getNbBits(ct[s]) * count[s];
|
550
717
|
}
|
551
718
|
return nbBits >> 3;
|
552
719
|
}
|
553
720
|
|
554
721
|
int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) {
|
722
|
+
HUF_CElt const* ct = CTable + 1;
|
555
723
|
int bad = 0;
|
556
724
|
int s;
|
557
725
|
for (s = 0; s <= (int)maxSymbolValue; ++s) {
|
558
|
-
bad |= (count[s] != 0) & (
|
726
|
+
bad |= (count[s] != 0) & (HUF_getNbBits(ct[s]) == 0);
|
559
727
|
}
|
560
728
|
return !bad;
|
561
729
|
}
|
562
730
|
|
563
731
|
size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); }
|
564
732
|
|
733
|
+
/** HUF_CStream_t:
|
734
|
+
* Huffman uses its own BIT_CStream_t implementation.
|
735
|
+
* There are three major differences from BIT_CStream_t:
|
736
|
+
* 1. HUF_addBits() takes a HUF_CElt (size_t) which is
|
737
|
+
* the pair (nbBits, value) in the format:
|
738
|
+
* format:
|
739
|
+
* - Bits [0, 4) = nbBits
|
740
|
+
* - Bits [4, 64 - nbBits) = 0
|
741
|
+
* - Bits [64 - nbBits, 64) = value
|
742
|
+
* 2. The bitContainer is built from the upper bits and
|
743
|
+
* right shifted. E.g. to add a new value of N bits
|
744
|
+
* you right shift the bitContainer by N, then or in
|
745
|
+
* the new value into the N upper bits.
|
746
|
+
* 3. The bitstream has two bit containers. You can add
|
747
|
+
* bits to the second container and merge them into
|
748
|
+
* the first container.
|
749
|
+
*/
|
750
|
+
|
751
|
+
#define HUF_BITS_IN_CONTAINER (sizeof(size_t) * 8)
|
752
|
+
|
753
|
+
typedef struct {
|
754
|
+
size_t bitContainer[2];
|
755
|
+
size_t bitPos[2];
|
756
|
+
|
757
|
+
BYTE* startPtr;
|
758
|
+
BYTE* ptr;
|
759
|
+
BYTE* endPtr;
|
760
|
+
} HUF_CStream_t;
|
761
|
+
|
762
|
+
/**! HUF_initCStream():
|
763
|
+
* Initializes the bitstream.
|
764
|
+
* @returns 0 or an error code.
|
765
|
+
*/
|
766
|
+
static size_t HUF_initCStream(HUF_CStream_t* bitC,
|
767
|
+
void* startPtr, size_t dstCapacity)
|
768
|
+
{
|
769
|
+
ZSTD_memset(bitC, 0, sizeof(*bitC));
|
770
|
+
bitC->startPtr = (BYTE*)startPtr;
|
771
|
+
bitC->ptr = bitC->startPtr;
|
772
|
+
bitC->endPtr = bitC->startPtr + dstCapacity - sizeof(bitC->bitContainer[0]);
|
773
|
+
if (dstCapacity <= sizeof(bitC->bitContainer[0])) return ERROR(dstSize_tooSmall);
|
774
|
+
return 0;
|
775
|
+
}
|
776
|
+
|
777
|
+
/*! HUF_addBits():
|
778
|
+
* Adds the symbol stored in HUF_CElt elt to the bitstream.
|
779
|
+
*
|
780
|
+
* @param elt The element we're adding. This is a (nbBits, value) pair.
|
781
|
+
* See the HUF_CStream_t docs for the format.
|
782
|
+
* @param idx Insert into the bitstream at this idx.
|
783
|
+
* @param kFast This is a template parameter. If the bitstream is guaranteed
|
784
|
+
* to have at least 4 unused bits after this call it may be 1,
|
785
|
+
* otherwise it must be 0. HUF_addBits() is faster when fast is set.
|
786
|
+
*/
|
787
|
+
FORCE_INLINE_TEMPLATE void HUF_addBits(HUF_CStream_t* bitC, HUF_CElt elt, int idx, int kFast)
|
788
|
+
{
|
789
|
+
assert(idx <= 1);
|
790
|
+
assert(HUF_getNbBits(elt) <= HUF_TABLELOG_ABSOLUTEMAX);
|
791
|
+
/* This is efficient on x86-64 with BMI2 because shrx
|
792
|
+
* only reads the low 6 bits of the register. The compiler
|
793
|
+
* knows this and elides the mask. When fast is set,
|
794
|
+
* every operation can use the same value loaded from elt.
|
795
|
+
*/
|
796
|
+
bitC->bitContainer[idx] >>= HUF_getNbBits(elt);
|
797
|
+
bitC->bitContainer[idx] |= kFast ? HUF_getValueFast(elt) : HUF_getValue(elt);
|
798
|
+
/* We only read the low 8 bits of bitC->bitPos[idx] so it
|
799
|
+
* doesn't matter that the high bits have noise from the value.
|
800
|
+
*/
|
801
|
+
bitC->bitPos[idx] += HUF_getNbBitsFast(elt);
|
802
|
+
assert((bitC->bitPos[idx] & 0xFF) <= HUF_BITS_IN_CONTAINER);
|
803
|
+
/* The last 4-bits of elt are dirty if fast is set,
|
804
|
+
* so we must not be overwriting bits that have already been
|
805
|
+
* inserted into the bit container.
|
806
|
+
*/
|
807
|
+
#if DEBUGLEVEL >= 1
|
808
|
+
{
|
809
|
+
size_t const nbBits = HUF_getNbBits(elt);
|
810
|
+
size_t const dirtyBits = nbBits == 0 ? 0 : BIT_highbit32((U32)nbBits) + 1;
|
811
|
+
(void)dirtyBits;
|
812
|
+
/* Middle bits are 0. */
|
813
|
+
assert(((elt >> dirtyBits) << (dirtyBits + nbBits)) == 0);
|
814
|
+
/* We didn't overwrite any bits in the bit container. */
|
815
|
+
assert(!kFast || (bitC->bitPos[idx] & 0xFF) <= HUF_BITS_IN_CONTAINER);
|
816
|
+
(void)dirtyBits;
|
817
|
+
}
|
818
|
+
#endif
|
819
|
+
}
|
820
|
+
|
821
|
+
FORCE_INLINE_TEMPLATE void HUF_zeroIndex1(HUF_CStream_t* bitC)
|
822
|
+
{
|
823
|
+
bitC->bitContainer[1] = 0;
|
824
|
+
bitC->bitPos[1] = 0;
|
825
|
+
}
|
826
|
+
|
827
|
+
/*! HUF_mergeIndex1() :
|
828
|
+
* Merges the bit container @ index 1 into the bit container @ index 0
|
829
|
+
* and zeros the bit container @ index 1.
|
830
|
+
*/
|
831
|
+
FORCE_INLINE_TEMPLATE void HUF_mergeIndex1(HUF_CStream_t* bitC)
|
832
|
+
{
|
833
|
+
assert((bitC->bitPos[1] & 0xFF) < HUF_BITS_IN_CONTAINER);
|
834
|
+
bitC->bitContainer[0] >>= (bitC->bitPos[1] & 0xFF);
|
835
|
+
bitC->bitContainer[0] |= bitC->bitContainer[1];
|
836
|
+
bitC->bitPos[0] += bitC->bitPos[1];
|
837
|
+
assert((bitC->bitPos[0] & 0xFF) <= HUF_BITS_IN_CONTAINER);
|
838
|
+
}
|
839
|
+
|
840
|
+
/*! HUF_flushBits() :
|
841
|
+
* Flushes the bits in the bit container @ index 0.
|
842
|
+
*
|
843
|
+
* @post bitPos will be < 8.
|
844
|
+
* @param kFast If kFast is set then we must know a-priori that
|
845
|
+
* the bit container will not overflow.
|
846
|
+
*/
|
847
|
+
FORCE_INLINE_TEMPLATE void HUF_flushBits(HUF_CStream_t* bitC, int kFast)
|
848
|
+
{
|
849
|
+
/* The upper bits of bitPos are noisy, so we must mask by 0xFF. */
|
850
|
+
size_t const nbBits = bitC->bitPos[0] & 0xFF;
|
851
|
+
size_t const nbBytes = nbBits >> 3;
|
852
|
+
/* The top nbBits bits of bitContainer are the ones we need. */
|
853
|
+
size_t const bitContainer = bitC->bitContainer[0] >> (HUF_BITS_IN_CONTAINER - nbBits);
|
854
|
+
/* Mask bitPos to account for the bytes we consumed. */
|
855
|
+
bitC->bitPos[0] &= 7;
|
856
|
+
assert(nbBits > 0);
|
857
|
+
assert(nbBits <= sizeof(bitC->bitContainer[0]) * 8);
|
858
|
+
assert(bitC->ptr <= bitC->endPtr);
|
859
|
+
MEM_writeLEST(bitC->ptr, bitContainer);
|
860
|
+
bitC->ptr += nbBytes;
|
861
|
+
assert(!kFast || bitC->ptr <= bitC->endPtr);
|
862
|
+
if (!kFast && bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr;
|
863
|
+
/* bitContainer doesn't need to be modified because the leftover
|
864
|
+
* bits are already the top bitPos bits. And we don't care about
|
865
|
+
* noise in the lower values.
|
866
|
+
*/
|
867
|
+
}
|
868
|
+
|
869
|
+
/*! HUF_endMark()
|
870
|
+
* @returns The Huffman stream end mark: A 1-bit value = 1.
|
871
|
+
*/
|
872
|
+
static HUF_CElt HUF_endMark(void)
|
873
|
+
{
|
874
|
+
HUF_CElt endMark;
|
875
|
+
HUF_setNbBits(&endMark, 1);
|
876
|
+
HUF_setValue(&endMark, 1);
|
877
|
+
return endMark;
|
878
|
+
}
|
879
|
+
|
880
|
+
/*! HUF_closeCStream() :
|
881
|
+
* @return Size of CStream, in bytes,
|
882
|
+
* or 0 if it could not fit into dstBuffer */
|
883
|
+
static size_t HUF_closeCStream(HUF_CStream_t* bitC)
|
884
|
+
{
|
885
|
+
HUF_addBits(bitC, HUF_endMark(), /* idx */ 0, /* kFast */ 0);
|
886
|
+
HUF_flushBits(bitC, /* kFast */ 0);
|
887
|
+
{
|
888
|
+
size_t const nbBits = bitC->bitPos[0] & 0xFF;
|
889
|
+
if (bitC->ptr >= bitC->endPtr) return 0; /* overflow detected */
|
890
|
+
return (bitC->ptr - bitC->startPtr) + (nbBits > 0);
|
891
|
+
}
|
892
|
+
}
|
893
|
+
|
565
894
|
FORCE_INLINE_TEMPLATE void
|
566
|
-
HUF_encodeSymbol(
|
895
|
+
HUF_encodeSymbol(HUF_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable, int idx, int fast)
|
567
896
|
{
|
568
|
-
|
897
|
+
HUF_addBits(bitCPtr, CTable[symbol], idx, fast);
|
569
898
|
}
|
570
899
|
|
571
|
-
|
900
|
+
FORCE_INLINE_TEMPLATE void
|
901
|
+
HUF_compress1X_usingCTable_internal_body_loop(HUF_CStream_t* bitC,
|
902
|
+
const BYTE* ip, size_t srcSize,
|
903
|
+
const HUF_CElt* ct,
|
904
|
+
int kUnroll, int kFastFlush, int kLastFast)
|
905
|
+
{
|
906
|
+
/* Join to kUnroll */
|
907
|
+
int n = (int)srcSize;
|
908
|
+
int rem = n % kUnroll;
|
909
|
+
if (rem > 0) {
|
910
|
+
for (; rem > 0; --rem) {
|
911
|
+
HUF_encodeSymbol(bitC, ip[--n], ct, 0, /* fast */ 0);
|
912
|
+
}
|
913
|
+
HUF_flushBits(bitC, kFastFlush);
|
914
|
+
}
|
915
|
+
assert(n % kUnroll == 0);
|
916
|
+
|
917
|
+
/* Join to 2 * kUnroll */
|
918
|
+
if (n % (2 * kUnroll)) {
|
919
|
+
int u;
|
920
|
+
for (u = 1; u < kUnroll; ++u) {
|
921
|
+
HUF_encodeSymbol(bitC, ip[n - u], ct, 0, 1);
|
922
|
+
}
|
923
|
+
HUF_encodeSymbol(bitC, ip[n - kUnroll], ct, 0, kLastFast);
|
924
|
+
HUF_flushBits(bitC, kFastFlush);
|
925
|
+
n -= kUnroll;
|
926
|
+
}
|
927
|
+
assert(n % (2 * kUnroll) == 0);
|
928
|
+
|
929
|
+
for (; n>0; n-= 2 * kUnroll) {
|
930
|
+
/* Encode kUnroll symbols into the bitstream @ index 0. */
|
931
|
+
int u;
|
932
|
+
for (u = 1; u < kUnroll; ++u) {
|
933
|
+
HUF_encodeSymbol(bitC, ip[n - u], ct, /* idx */ 0, /* fast */ 1);
|
934
|
+
}
|
935
|
+
HUF_encodeSymbol(bitC, ip[n - kUnroll], ct, /* idx */ 0, /* fast */ kLastFast);
|
936
|
+
HUF_flushBits(bitC, kFastFlush);
|
937
|
+
/* Encode kUnroll symbols into the bitstream @ index 1.
|
938
|
+
* This allows us to start filling the bit container
|
939
|
+
* without any data dependencies.
|
940
|
+
*/
|
941
|
+
HUF_zeroIndex1(bitC);
|
942
|
+
for (u = 1; u < kUnroll; ++u) {
|
943
|
+
HUF_encodeSymbol(bitC, ip[n - kUnroll - u], ct, /* idx */ 1, /* fast */ 1);
|
944
|
+
}
|
945
|
+
HUF_encodeSymbol(bitC, ip[n - kUnroll - kUnroll], ct, /* idx */ 1, /* fast */ kLastFast);
|
946
|
+
/* Merge bitstream @ index 1 into the bitstream @ index 0 */
|
947
|
+
HUF_mergeIndex1(bitC);
|
948
|
+
HUF_flushBits(bitC, kFastFlush);
|
949
|
+
}
|
950
|
+
assert(n == 0);
|
951
|
+
|
952
|
+
}
|
572
953
|
|
573
|
-
|
574
|
-
|
954
|
+
/**
|
955
|
+
* Returns a tight upper bound on the output space needed by Huffman
|
956
|
+
* with 8 bytes buffer to handle over-writes. If the output is at least
|
957
|
+
* this large we don't need to do bounds checks during Huffman encoding.
|
958
|
+
*/
|
959
|
+
static size_t HUF_tightCompressBound(size_t srcSize, size_t tableLog)
|
960
|
+
{
|
961
|
+
return ((srcSize * tableLog) >> 3) + 8;
|
962
|
+
}
|
575
963
|
|
576
|
-
#define HUF_FLUSHBITS_2(stream) \
|
577
|
-
if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*4+7) HUF_FLUSHBITS(stream)
|
578
964
|
|
579
965
|
FORCE_INLINE_TEMPLATE size_t
|
580
966
|
HUF_compress1X_usingCTable_internal_body(void* dst, size_t dstSize,
|
581
967
|
const void* src, size_t srcSize,
|
582
968
|
const HUF_CElt* CTable)
|
583
969
|
{
|
970
|
+
U32 const tableLog = (U32)CTable[0];
|
971
|
+
HUF_CElt const* ct = CTable + 1;
|
584
972
|
const BYTE* ip = (const BYTE*) src;
|
585
973
|
BYTE* const ostart = (BYTE*)dst;
|
586
974
|
BYTE* const oend = ostart + dstSize;
|
587
975
|
BYTE* op = ostart;
|
588
|
-
|
589
|
-
BIT_CStream_t bitC;
|
976
|
+
HUF_CStream_t bitC;
|
590
977
|
|
591
978
|
/* init */
|
592
979
|
if (dstSize < 8) return 0; /* not enough space to compress */
|
593
|
-
{ size_t const initErr =
|
980
|
+
{ size_t const initErr = HUF_initCStream(&bitC, op, (size_t)(oend-op));
|
594
981
|
if (HUF_isError(initErr)) return 0; }
|
595
982
|
|
596
|
-
|
597
|
-
|
598
|
-
{
|
599
|
-
|
600
|
-
|
601
|
-
|
602
|
-
|
603
|
-
|
604
|
-
|
605
|
-
|
606
|
-
|
607
|
-
|
608
|
-
|
609
|
-
|
610
|
-
|
611
|
-
|
612
|
-
|
613
|
-
|
614
|
-
|
615
|
-
|
616
|
-
|
617
|
-
|
618
|
-
|
619
|
-
|
620
|
-
|
983
|
+
if (dstSize < HUF_tightCompressBound(srcSize, (size_t)tableLog) || tableLog > 11)
|
984
|
+
HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ MEM_32bits() ? 2 : 4, /* kFast */ 0, /* kLastFast */ 0);
|
985
|
+
else {
|
986
|
+
if (MEM_32bits()) {
|
987
|
+
switch (tableLog) {
|
988
|
+
case 11:
|
989
|
+
HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 2, /* kFastFlush */ 1, /* kLastFast */ 0);
|
990
|
+
break;
|
991
|
+
case 10: ZSTD_FALLTHROUGH;
|
992
|
+
case 9: ZSTD_FALLTHROUGH;
|
993
|
+
case 8:
|
994
|
+
HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 2, /* kFastFlush */ 1, /* kLastFast */ 1);
|
995
|
+
break;
|
996
|
+
case 7: ZSTD_FALLTHROUGH;
|
997
|
+
default:
|
998
|
+
HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 3, /* kFastFlush */ 1, /* kLastFast */ 1);
|
999
|
+
break;
|
1000
|
+
}
|
1001
|
+
} else {
|
1002
|
+
switch (tableLog) {
|
1003
|
+
case 11:
|
1004
|
+
HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 5, /* kFastFlush */ 1, /* kLastFast */ 0);
|
1005
|
+
break;
|
1006
|
+
case 10:
|
1007
|
+
HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 5, /* kFastFlush */ 1, /* kLastFast */ 1);
|
1008
|
+
break;
|
1009
|
+
case 9:
|
1010
|
+
HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 6, /* kFastFlush */ 1, /* kLastFast */ 0);
|
1011
|
+
break;
|
1012
|
+
case 8:
|
1013
|
+
HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 7, /* kFastFlush */ 1, /* kLastFast */ 0);
|
1014
|
+
break;
|
1015
|
+
case 7:
|
1016
|
+
HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 8, /* kFastFlush */ 1, /* kLastFast */ 0);
|
1017
|
+
break;
|
1018
|
+
case 6: ZSTD_FALLTHROUGH;
|
1019
|
+
default:
|
1020
|
+
HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 9, /* kFastFlush */ 1, /* kLastFast */ 1);
|
1021
|
+
break;
|
1022
|
+
}
|
1023
|
+
}
|
621
1024
|
}
|
1025
|
+
assert(bitC.ptr <= bitC.endPtr);
|
622
1026
|
|
623
|
-
return
|
1027
|
+
return HUF_closeCStream(&bitC);
|
624
1028
|
}
|
625
1029
|
|
626
1030
|
#if DYNAMIC_BMI2
|
627
1031
|
|
628
|
-
static
|
1032
|
+
static BMI2_TARGET_ATTRIBUTE size_t
|
629
1033
|
HUF_compress1X_usingCTable_internal_bmi2(void* dst, size_t dstSize,
|
630
1034
|
const void* src, size_t srcSize,
|
631
1035
|
const HUF_CElt* CTable)
|
@@ -667,9 +1071,13 @@ HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize,
|
|
667
1071
|
|
668
1072
|
size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
|
669
1073
|
{
|
670
|
-
return
|
1074
|
+
return HUF_compress1X_usingCTable_bmi2(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0);
|
671
1075
|
}
|
672
1076
|
|
1077
|
+
size_t HUF_compress1X_usingCTable_bmi2(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int bmi2)
|
1078
|
+
{
|
1079
|
+
return HUF_compress1X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, bmi2);
|
1080
|
+
}
|
673
1081
|
|
674
1082
|
static size_t
|
675
1083
|
HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
|
@@ -689,8 +1097,7 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
|
|
689
1097
|
|
690
1098
|
assert(op <= oend);
|
691
1099
|
{ CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) );
|
692
|
-
if (cSize==0) return 0;
|
693
|
-
assert(cSize <= 65535);
|
1100
|
+
if (cSize == 0 || cSize > 65535) return 0;
|
694
1101
|
MEM_writeLE16(ostart, (U16)cSize);
|
695
1102
|
op += cSize;
|
696
1103
|
}
|
@@ -698,8 +1105,7 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
|
|
698
1105
|
ip += segmentSize;
|
699
1106
|
assert(op <= oend);
|
700
1107
|
{ CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) );
|
701
|
-
if (cSize==0) return 0;
|
702
|
-
assert(cSize <= 65535);
|
1108
|
+
if (cSize == 0 || cSize > 65535) return 0;
|
703
1109
|
MEM_writeLE16(ostart+2, (U16)cSize);
|
704
1110
|
op += cSize;
|
705
1111
|
}
|
@@ -707,8 +1113,7 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
|
|
707
1113
|
ip += segmentSize;
|
708
1114
|
assert(op <= oend);
|
709
1115
|
{ CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) );
|
710
|
-
if (cSize==0) return 0;
|
711
|
-
assert(cSize <= 65535);
|
1116
|
+
if (cSize == 0 || cSize > 65535) return 0;
|
712
1117
|
MEM_writeLE16(ostart+4, (U16)cSize);
|
713
1118
|
op += cSize;
|
714
1119
|
}
|
@@ -717,7 +1122,7 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
|
|
717
1122
|
assert(op <= oend);
|
718
1123
|
assert(ip <= iend);
|
719
1124
|
{ CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, (size_t)(iend-ip), CTable, bmi2) );
|
720
|
-
if (cSize==0) return 0;
|
1125
|
+
if (cSize == 0 || cSize > 65535) return 0;
|
721
1126
|
op += cSize;
|
722
1127
|
}
|
723
1128
|
|
@@ -726,7 +1131,12 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
|
|
726
1131
|
|
727
1132
|
size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
|
728
1133
|
{
|
729
|
-
return
|
1134
|
+
return HUF_compress4X_usingCTable_bmi2(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0);
|
1135
|
+
}
|
1136
|
+
|
1137
|
+
size_t HUF_compress4X_usingCTable_bmi2(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int bmi2)
|
1138
|
+
{
|
1139
|
+
return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, bmi2);
|
730
1140
|
}
|
731
1141
|
|
732
1142
|
typedef enum { HUF_singleStream, HUF_fourStreams } HUF_nbStreams_e;
|
@@ -750,35 +1160,38 @@ static size_t HUF_compressCTable_internal(
|
|
750
1160
|
|
751
1161
|
typedef struct {
|
752
1162
|
unsigned count[HUF_SYMBOLVALUE_MAX + 1];
|
753
|
-
HUF_CElt CTable[HUF_SYMBOLVALUE_MAX
|
1163
|
+
HUF_CElt CTable[HUF_CTABLE_SIZE_ST(HUF_SYMBOLVALUE_MAX)];
|
754
1164
|
union {
|
755
1165
|
HUF_buildCTable_wksp_tables buildCTable_wksp;
|
756
1166
|
HUF_WriteCTableWksp writeCTable_wksp;
|
1167
|
+
U32 hist_wksp[HIST_WKSP_SIZE_U32];
|
757
1168
|
} wksps;
|
758
1169
|
} HUF_compress_tables_t;
|
759
1170
|
|
1171
|
+
#define SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE 4096
|
1172
|
+
#define SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO 10 /* Must be >= 2 */
|
1173
|
+
|
760
1174
|
/* HUF_compress_internal() :
|
761
1175
|
* `workSpace_align4` must be aligned on 4-bytes boundaries,
|
762
|
-
* and occupies the same space as a table of
|
1176
|
+
* and occupies the same space as a table of HUF_WORKSPACE_SIZE_U64 unsigned */
|
763
1177
|
static size_t
|
764
1178
|
HUF_compress_internal (void* dst, size_t dstSize,
|
765
1179
|
const void* src, size_t srcSize,
|
766
1180
|
unsigned maxSymbolValue, unsigned huffLog,
|
767
1181
|
HUF_nbStreams_e nbStreams,
|
768
|
-
void*
|
1182
|
+
void* workSpace, size_t wkspSize,
|
769
1183
|
HUF_CElt* oldHufTable, HUF_repeat* repeat, int preferRepeat,
|
770
|
-
const int bmi2)
|
1184
|
+
const int bmi2, unsigned suspectUncompressible)
|
771
1185
|
{
|
772
|
-
HUF_compress_tables_t* const table = (HUF_compress_tables_t*)
|
1186
|
+
HUF_compress_tables_t* const table = (HUF_compress_tables_t*)HUF_alignUpWorkspace(workSpace, &wkspSize, ZSTD_ALIGNOF(size_t));
|
773
1187
|
BYTE* const ostart = (BYTE*)dst;
|
774
1188
|
BYTE* const oend = ostart + dstSize;
|
775
1189
|
BYTE* op = ostart;
|
776
1190
|
|
777
|
-
HUF_STATIC_ASSERT(sizeof(*table) <= HUF_WORKSPACE_SIZE);
|
778
|
-
assert(((size_t)workSpace_align4 & 3) == 0); /* must be aligned on 4-bytes boundaries */
|
1191
|
+
HUF_STATIC_ASSERT(sizeof(*table) + HUF_WORKSPACE_MAX_ALIGNMENT <= HUF_WORKSPACE_SIZE);
|
779
1192
|
|
780
1193
|
/* checks & inits */
|
781
|
-
if (wkspSize <
|
1194
|
+
if (wkspSize < sizeof(*table)) return ERROR(workSpace_tooSmall);
|
782
1195
|
if (!srcSize) return 0; /* Uncompressed */
|
783
1196
|
if (!dstSize) return 0; /* cannot fit anything within dst budget */
|
784
1197
|
if (srcSize > HUF_BLOCKSIZE_MAX) return ERROR(srcSize_wrong); /* current block size limit */
|
@@ -794,8 +1207,23 @@ HUF_compress_internal (void* dst, size_t dstSize,
|
|
794
1207
|
nbStreams, oldHufTable, bmi2);
|
795
1208
|
}
|
796
1209
|
|
1210
|
+
/* If uncompressible data is suspected, do a smaller sampling first */
|
1211
|
+
DEBUG_STATIC_ASSERT(SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO >= 2);
|
1212
|
+
if (suspectUncompressible && srcSize >= (SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE * SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO)) {
|
1213
|
+
size_t largestTotal = 0;
|
1214
|
+
{ unsigned maxSymbolValueBegin = maxSymbolValue;
|
1215
|
+
CHECK_V_F(largestBegin, HIST_count_simple (table->count, &maxSymbolValueBegin, (const BYTE*)src, SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE) );
|
1216
|
+
largestTotal += largestBegin;
|
1217
|
+
}
|
1218
|
+
{ unsigned maxSymbolValueEnd = maxSymbolValue;
|
1219
|
+
CHECK_V_F(largestEnd, HIST_count_simple (table->count, &maxSymbolValueEnd, (const BYTE*)src + srcSize - SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE, SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE) );
|
1220
|
+
largestTotal += largestEnd;
|
1221
|
+
}
|
1222
|
+
if (largestTotal <= ((2 * SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE) >> 7)+4) return 0; /* heuristic : probably not compressible enough */
|
1223
|
+
}
|
1224
|
+
|
797
1225
|
/* Scan input and build symbol stats */
|
798
|
-
{ CHECK_V_F(largest, HIST_count_wksp (table->count, &maxSymbolValue, (const BYTE*)src, srcSize,
|
1226
|
+
{ CHECK_V_F(largest, HIST_count_wksp (table->count, &maxSymbolValue, (const BYTE*)src, srcSize, table->wksps.hist_wksp, sizeof(table->wksps.hist_wksp)) );
|
799
1227
|
if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; } /* single symbol, rle */
|
800
1228
|
if (largest <= (srcSize >> 7)+4) return 0; /* heuristic : probably not compressible enough */
|
801
1229
|
}
|
@@ -820,9 +1248,12 @@ HUF_compress_internal (void* dst, size_t dstSize,
|
|
820
1248
|
&table->wksps.buildCTable_wksp, sizeof(table->wksps.buildCTable_wksp));
|
821
1249
|
CHECK_F(maxBits);
|
822
1250
|
huffLog = (U32)maxBits;
|
823
|
-
|
824
|
-
|
825
|
-
|
1251
|
+
}
|
1252
|
+
/* Zero unused symbols in CTable, so we can check it for validity */
|
1253
|
+
{
|
1254
|
+
size_t const ctableSize = HUF_CTABLE_SIZE_ST(maxSymbolValue);
|
1255
|
+
size_t const unusedSize = sizeof(table->CTable) - ctableSize * sizeof(HUF_CElt);
|
1256
|
+
ZSTD_memset(table->CTable + ctableSize, 0, unusedSize);
|
826
1257
|
}
|
827
1258
|
|
828
1259
|
/* Write table description header */
|
@@ -859,19 +1290,20 @@ size_t HUF_compress1X_wksp (void* dst, size_t dstSize,
|
|
859
1290
|
return HUF_compress_internal(dst, dstSize, src, srcSize,
|
860
1291
|
maxSymbolValue, huffLog, HUF_singleStream,
|
861
1292
|
workSpace, wkspSize,
|
862
|
-
NULL, NULL, 0, 0 /*bmi2
|
1293
|
+
NULL, NULL, 0, 0 /*bmi2*/, 0);
|
863
1294
|
}
|
864
1295
|
|
865
1296
|
size_t HUF_compress1X_repeat (void* dst, size_t dstSize,
|
866
1297
|
const void* src, size_t srcSize,
|
867
1298
|
unsigned maxSymbolValue, unsigned huffLog,
|
868
1299
|
void* workSpace, size_t wkspSize,
|
869
|
-
HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat,
|
1300
|
+
HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat,
|
1301
|
+
int bmi2, unsigned suspectUncompressible)
|
870
1302
|
{
|
871
1303
|
return HUF_compress_internal(dst, dstSize, src, srcSize,
|
872
1304
|
maxSymbolValue, huffLog, HUF_singleStream,
|
873
1305
|
workSpace, wkspSize, hufTable,
|
874
|
-
repeat, preferRepeat, bmi2);
|
1306
|
+
repeat, preferRepeat, bmi2, suspectUncompressible);
|
875
1307
|
}
|
876
1308
|
|
877
1309
|
/* HUF_compress4X_repeat():
|
@@ -885,22 +1317,23 @@ size_t HUF_compress4X_wksp (void* dst, size_t dstSize,
|
|
885
1317
|
return HUF_compress_internal(dst, dstSize, src, srcSize,
|
886
1318
|
maxSymbolValue, huffLog, HUF_fourStreams,
|
887
1319
|
workSpace, wkspSize,
|
888
|
-
NULL, NULL, 0, 0 /*bmi2
|
1320
|
+
NULL, NULL, 0, 0 /*bmi2*/, 0);
|
889
1321
|
}
|
890
1322
|
|
891
1323
|
/* HUF_compress4X_repeat():
|
892
1324
|
* compress input using 4 streams.
|
1325
|
+
* consider skipping quickly
|
893
1326
|
* re-use an existing huffman compression table */
|
894
1327
|
size_t HUF_compress4X_repeat (void* dst, size_t dstSize,
|
895
1328
|
const void* src, size_t srcSize,
|
896
1329
|
unsigned maxSymbolValue, unsigned huffLog,
|
897
1330
|
void* workSpace, size_t wkspSize,
|
898
|
-
HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2)
|
1331
|
+
HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2, unsigned suspectUncompressible)
|
899
1332
|
{
|
900
1333
|
return HUF_compress_internal(dst, dstSize, src, srcSize,
|
901
1334
|
maxSymbolValue, huffLog, HUF_fourStreams,
|
902
1335
|
workSpace, wkspSize,
|
903
|
-
hufTable, repeat, preferRepeat, bmi2);
|
1336
|
+
hufTable, repeat, preferRepeat, bmi2, suspectUncompressible);
|
904
1337
|
}
|
905
1338
|
|
906
1339
|
#ifndef ZSTD_NO_UNUSED_FUNCTIONS
|
@@ -918,7 +1351,7 @@ size_t HUF_compress1X (void* dst, size_t dstSize,
|
|
918
1351
|
const void* src, size_t srcSize,
|
919
1352
|
unsigned maxSymbolValue, unsigned huffLog)
|
920
1353
|
{
|
921
|
-
|
1354
|
+
U64 workSpace[HUF_WORKSPACE_SIZE_U64];
|
922
1355
|
return HUF_compress1X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace));
|
923
1356
|
}
|
924
1357
|
|
@@ -926,7 +1359,7 @@ size_t HUF_compress2 (void* dst, size_t dstSize,
|
|
926
1359
|
const void* src, size_t srcSize,
|
927
1360
|
unsigned maxSymbolValue, unsigned huffLog)
|
928
1361
|
{
|
929
|
-
|
1362
|
+
U64 workSpace[HUF_WORKSPACE_SIZE_U64];
|
930
1363
|
return HUF_compress4X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace));
|
931
1364
|
}
|
932
1365
|
|