extzstd 0.1 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/HISTORY.ja +5 -0
- data/README.md +5 -5
- data/contrib/zstd/CONTRIBUTING.md +42 -0
- data/contrib/zstd/LICENSE-examples +11 -0
- data/contrib/zstd/Makefile +315 -0
- data/contrib/zstd/NEWS +261 -0
- data/contrib/zstd/PATENTS +33 -0
- data/contrib/zstd/README.md +121 -41
- data/contrib/zstd/TESTING.md +44 -0
- data/contrib/zstd/appveyor.yml +178 -0
- data/contrib/zstd/circle.yml +75 -0
- data/contrib/zstd/lib/BUCK +186 -0
- data/contrib/zstd/lib/Makefile +163 -0
- data/contrib/zstd/lib/README.md +77 -0
- data/contrib/zstd/{common → lib/common}/bitstream.h +7 -4
- data/contrib/zstd/{common → lib/common}/entropy_common.c +19 -23
- data/contrib/zstd/{common → lib/common}/error_private.c +0 -0
- data/contrib/zstd/{common → lib/common}/error_private.h +0 -0
- data/contrib/zstd/{common → lib/common}/fse.h +94 -34
- data/contrib/zstd/{common → lib/common}/fse_decompress.c +18 -19
- data/contrib/zstd/{common → lib/common}/huf.h +52 -20
- data/contrib/zstd/{common → lib/common}/mem.h +17 -13
- data/contrib/zstd/lib/common/pool.c +194 -0
- data/contrib/zstd/lib/common/pool.h +56 -0
- data/contrib/zstd/lib/common/threading.c +80 -0
- data/contrib/zstd/lib/common/threading.h +104 -0
- data/contrib/zstd/{common → lib/common}/xxhash.c +3 -1
- data/contrib/zstd/{common → lib/common}/xxhash.h +11 -15
- data/contrib/zstd/{common → lib/common}/zstd_common.c +1 -11
- data/contrib/zstd/{common → lib/common}/zstd_errors.h +16 -2
- data/contrib/zstd/{common → lib/common}/zstd_internal.h +17 -1
- data/contrib/zstd/{compress → lib/compress}/fse_compress.c +138 -91
- data/contrib/zstd/{compress → lib/compress}/huf_compress.c +218 -67
- data/contrib/zstd/{compress → lib/compress}/zstd_compress.c +231 -108
- data/contrib/zstd/{compress → lib/compress}/zstd_opt.h +44 -25
- data/contrib/zstd/lib/compress/zstdmt_compress.c +739 -0
- data/contrib/zstd/lib/compress/zstdmt_compress.h +78 -0
- data/contrib/zstd/{decompress → lib/decompress}/huf_decompress.c +28 -23
- data/contrib/zstd/{decompress → lib/decompress}/zstd_decompress.c +814 -176
- data/contrib/zstd/{common → lib/deprecated}/zbuff.h +60 -39
- data/contrib/zstd/lib/deprecated/zbuff_common.c +26 -0
- data/contrib/zstd/lib/deprecated/zbuff_compress.c +145 -0
- data/contrib/zstd/lib/deprecated/zbuff_decompress.c +74 -0
- data/contrib/zstd/lib/dictBuilder/cover.c +1029 -0
- data/contrib/zstd/{dictBuilder → lib/dictBuilder}/divsufsort.c +0 -0
- data/contrib/zstd/{dictBuilder → lib/dictBuilder}/divsufsort.h +0 -0
- data/contrib/zstd/{dictBuilder → lib/dictBuilder}/zdict.c +68 -18
- data/contrib/zstd/lib/dictBuilder/zdict.h +201 -0
- data/contrib/zstd/{legacy → lib/legacy}/zstd_legacy.h +122 -7
- data/contrib/zstd/{legacy → lib/legacy}/zstd_v01.c +34 -3
- data/contrib/zstd/{legacy → lib/legacy}/zstd_v01.h +8 -0
- data/contrib/zstd/{legacy → lib/legacy}/zstd_v02.c +45 -12
- data/contrib/zstd/{legacy → lib/legacy}/zstd_v02.h +8 -0
- data/contrib/zstd/{legacy → lib/legacy}/zstd_v03.c +45 -12
- data/contrib/zstd/{legacy → lib/legacy}/zstd_v03.h +8 -0
- data/contrib/zstd/{legacy → lib/legacy}/zstd_v04.c +56 -33
- data/contrib/zstd/{legacy → lib/legacy}/zstd_v04.h +8 -0
- data/contrib/zstd/{legacy → lib/legacy}/zstd_v05.c +45 -18
- data/contrib/zstd/{legacy → lib/legacy}/zstd_v05.h +7 -0
- data/contrib/zstd/{legacy → lib/legacy}/zstd_v06.c +43 -16
- data/contrib/zstd/{legacy → lib/legacy}/zstd_v06.h +7 -0
- data/contrib/zstd/{legacy → lib/legacy}/zstd_v07.c +57 -23
- data/contrib/zstd/{legacy → lib/legacy}/zstd_v07.h +8 -0
- data/contrib/zstd/lib/libzstd.pc.in +14 -0
- data/contrib/zstd/{zstd.h → lib/zstd.h} +206 -71
- data/ext/depend +2 -0
- data/ext/extconf.rb +4 -4
- data/ext/extzstd.c +1 -1
- data/ext/zstd_common.c +5 -5
- data/ext/zstd_compress.c +3 -3
- data/ext/zstd_decompress.c +2 -2
- data/ext/zstd_dictbuilder.c +2 -2
- data/ext/zstd_legacy_v01.c +1 -1
- data/ext/zstd_legacy_v02.c +1 -1
- data/ext/zstd_legacy_v03.c +1 -1
- data/ext/zstd_legacy_v04.c +1 -1
- data/ext/zstd_legacy_v05.c +1 -1
- data/ext/zstd_legacy_v06.c +1 -1
- data/ext/zstd_legacy_v07.c +1 -1
- data/gemstub.rb +9 -5
- data/lib/extzstd/version.rb +1 -1
- metadata +73 -51
- data/contrib/zstd/compress/zbuff_compress.c +0 -319
- data/contrib/zstd/decompress/zbuff_decompress.c +0 -252
- data/contrib/zstd/dictBuilder/zdict.h +0 -111
|
@@ -70,12 +70,6 @@
|
|
|
70
70
|
#define FSE_STATIC_ASSERT(c) { enum { FSE_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */
|
|
71
71
|
|
|
72
72
|
|
|
73
|
-
/* **************************************************************
|
|
74
|
-
* Complex types
|
|
75
|
-
****************************************************************/
|
|
76
|
-
typedef U32 CTable_max_t[FSE_CTABLE_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)];
|
|
77
|
-
|
|
78
|
-
|
|
79
73
|
/* **************************************************************
|
|
80
74
|
* Templates
|
|
81
75
|
****************************************************************/
|
|
@@ -100,7 +94,13 @@ typedef U32 CTable_max_t[FSE_CTABLE_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VA
|
|
|
100
94
|
|
|
101
95
|
|
|
102
96
|
/* Function templates */
|
|
103
|
-
|
|
97
|
+
|
|
98
|
+
/* FSE_buildCTable_wksp() :
|
|
99
|
+
* Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`).
|
|
100
|
+
* wkspSize should be sized to handle worst case situation, which is `1<<max_tableLog * sizeof(FSE_FUNCTION_TYPE)`
|
|
101
|
+
* workSpace must also be properly aligned with FSE_FUNCTION_TYPE requirements
|
|
102
|
+
*/
|
|
103
|
+
size_t FSE_buildCTable_wksp(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize)
|
|
104
104
|
{
|
|
105
105
|
U32 const tableSize = 1 << tableLog;
|
|
106
106
|
U32 const tableMask = tableSize - 1;
|
|
@@ -111,10 +111,11 @@ size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned
|
|
|
111
111
|
U32 const step = FSE_TABLESTEP(tableSize);
|
|
112
112
|
U32 cumul[FSE_MAX_SYMBOL_VALUE+2];
|
|
113
113
|
|
|
114
|
-
FSE_FUNCTION_TYPE tableSymbol
|
|
114
|
+
FSE_FUNCTION_TYPE* const tableSymbol = (FSE_FUNCTION_TYPE*)workSpace;
|
|
115
115
|
U32 highThreshold = tableSize-1;
|
|
116
116
|
|
|
117
117
|
/* CTable header */
|
|
118
|
+
if (((size_t)1 << tableLog) * sizeof(FSE_FUNCTION_TYPE) > wkspSize) return ERROR(tableLog_tooLarge);
|
|
118
119
|
tableU16[-2] = (U16) tableLog;
|
|
119
120
|
tableU16[-1] = (U16) maxSymbolValue;
|
|
120
121
|
|
|
@@ -181,6 +182,13 @@ size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned
|
|
|
181
182
|
}
|
|
182
183
|
|
|
183
184
|
|
|
185
|
+
size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
|
|
186
|
+
{
|
|
187
|
+
FSE_FUNCTION_TYPE tableSymbol[FSE_MAX_TABLESIZE]; /* memset() is not necessary, even if static analyzer complain about it */
|
|
188
|
+
return FSE_buildCTable_wksp(ct, normalizedCounter, maxSymbolValue, tableLog, tableSymbol, sizeof(tableSymbol));
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
|
|
184
192
|
|
|
185
193
|
#ifndef FSE_COMMONDEFS_ONLY
|
|
186
194
|
|
|
@@ -189,12 +197,10 @@ size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned
|
|
|
189
197
|
****************************************************************/
|
|
190
198
|
size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog)
|
|
191
199
|
{
|
|
192
|
-
size_t maxHeaderSize = (((maxSymbolValue+1) * tableLog) >> 3) + 3;
|
|
200
|
+
size_t const maxHeaderSize = (((maxSymbolValue+1) * tableLog) >> 3) + 3;
|
|
193
201
|
return maxSymbolValue ? maxHeaderSize : FSE_NCOUNTBOUND; /* maxSymbolValue==0 ? use default */
|
|
194
202
|
}
|
|
195
203
|
|
|
196
|
-
static short FSE_abs(short a) { return (short)(a<0 ? -a : a); }
|
|
197
|
-
|
|
198
204
|
static size_t FSE_writeNCount_generic (void* header, size_t headerBufferSize,
|
|
199
205
|
const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog,
|
|
200
206
|
unsigned writeIsSafe)
|
|
@@ -250,16 +256,16 @@ static size_t FSE_writeNCount_generic (void* header, size_t headerBufferSize,
|
|
|
250
256
|
bitStream >>= 16;
|
|
251
257
|
bitCount -= 16;
|
|
252
258
|
} }
|
|
253
|
-
{
|
|
254
|
-
const
|
|
255
|
-
remaining -=
|
|
256
|
-
if (remaining<1) return ERROR(GENERIC);
|
|
259
|
+
{ int count = normalizedCounter[charnum++];
|
|
260
|
+
int const max = (2*threshold-1)-remaining;
|
|
261
|
+
remaining -= count < 0 ? -count : count;
|
|
257
262
|
count++; /* +1 for extra accuracy */
|
|
258
263
|
if (count>=threshold) count += max; /* [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[ */
|
|
259
264
|
bitStream += count << bitCount;
|
|
260
265
|
bitCount += nbBits;
|
|
261
266
|
bitCount -= (count<max);
|
|
262
267
|
previous0 = (count==1);
|
|
268
|
+
if (remaining<1) return ERROR(GENERIC);
|
|
263
269
|
while (remaining<threshold) nbBits--, threshold>>=1;
|
|
264
270
|
}
|
|
265
271
|
if (bitCount>16) {
|
|
@@ -300,21 +306,20 @@ size_t FSE_writeNCount (void* buffer, size_t bufferSize, const short* normalized
|
|
|
300
306
|
* Counting histogram
|
|
301
307
|
****************************************************************/
|
|
302
308
|
/*! FSE_count_simple
|
|
303
|
-
This function
|
|
304
|
-
|
|
305
|
-
|
|
309
|
+
This function counts byte values within `src`, and store the histogram into table `count`.
|
|
310
|
+
It doesn't use any additional memory.
|
|
311
|
+
But this function is unsafe : it doesn't check that all values within `src` can fit into `count`.
|
|
306
312
|
For this reason, prefer using a table `count` with 256 elements.
|
|
307
313
|
@return : count of most numerous element
|
|
308
314
|
*/
|
|
309
|
-
|
|
310
|
-
|
|
315
|
+
size_t FSE_count_simple(unsigned* count, unsigned* maxSymbolValuePtr,
|
|
316
|
+
const void* src, size_t srcSize)
|
|
311
317
|
{
|
|
312
318
|
const BYTE* ip = (const BYTE*)src;
|
|
313
319
|
const BYTE* const end = ip + srcSize;
|
|
314
320
|
unsigned maxSymbolValue = *maxSymbolValuePtr;
|
|
315
321
|
unsigned max=0;
|
|
316
322
|
|
|
317
|
-
|
|
318
323
|
memset(count, 0, (maxSymbolValue+1)*sizeof(*count));
|
|
319
324
|
if (srcSize==0) { *maxSymbolValuePtr = 0; return 0; }
|
|
320
325
|
|
|
@@ -329,20 +334,24 @@ static size_t FSE_count_simple(unsigned* count, unsigned* maxSymbolValuePtr,
|
|
|
329
334
|
}
|
|
330
335
|
|
|
331
336
|
|
|
332
|
-
|
|
337
|
+
/* FSE_count_parallel_wksp() :
|
|
338
|
+
* Same as FSE_count_parallel(), but using an externally provided scratch buffer.
|
|
339
|
+
* `workSpace` size must be a minimum of `1024 * sizeof(unsigned)`` */
|
|
340
|
+
static size_t FSE_count_parallel_wksp(
|
|
341
|
+
unsigned* count, unsigned* maxSymbolValuePtr,
|
|
333
342
|
const void* source, size_t sourceSize,
|
|
334
|
-
unsigned checkMax)
|
|
343
|
+
unsigned checkMax, unsigned* const workSpace)
|
|
335
344
|
{
|
|
336
345
|
const BYTE* ip = (const BYTE*)source;
|
|
337
346
|
const BYTE* const iend = ip+sourceSize;
|
|
338
347
|
unsigned maxSymbolValue = *maxSymbolValuePtr;
|
|
339
348
|
unsigned max=0;
|
|
349
|
+
U32* const Counting1 = workSpace;
|
|
350
|
+
U32* const Counting2 = Counting1 + 256;
|
|
351
|
+
U32* const Counting3 = Counting2 + 256;
|
|
352
|
+
U32* const Counting4 = Counting3 + 256;
|
|
340
353
|
|
|
341
|
-
|
|
342
|
-
U32 Counting1[256] = { 0 };
|
|
343
|
-
U32 Counting2[256] = { 0 };
|
|
344
|
-
U32 Counting3[256] = { 0 };
|
|
345
|
-
U32 Counting4[256] = { 0 };
|
|
354
|
+
memset(Counting1, 0, 4*256*sizeof(unsigned));
|
|
346
355
|
|
|
347
356
|
/* safety checks */
|
|
348
357
|
if (!sourceSize) {
|
|
@@ -388,31 +397,51 @@ static size_t FSE_count_parallel(unsigned* count, unsigned* maxSymbolValuePtr,
|
|
|
388
397
|
if (Counting1[s]) return ERROR(maxSymbolValue_tooSmall);
|
|
389
398
|
} }
|
|
390
399
|
|
|
391
|
-
{
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
}}
|
|
400
|
+
{ U32 s; for (s=0; s<=maxSymbolValue; s++) {
|
|
401
|
+
count[s] = Counting1[s] + Counting2[s] + Counting3[s] + Counting4[s];
|
|
402
|
+
if (count[s] > max) max = count[s];
|
|
403
|
+
} }
|
|
395
404
|
|
|
396
405
|
while (!count[maxSymbolValue]) maxSymbolValue--;
|
|
397
406
|
*maxSymbolValuePtr = maxSymbolValue;
|
|
398
407
|
return (size_t)max;
|
|
399
408
|
}
|
|
400
409
|
|
|
410
|
+
/* FSE_countFast_wksp() :
|
|
411
|
+
* Same as FSE_countFast(), but using an externally provided scratch buffer.
|
|
412
|
+
* `workSpace` size must be table of >= `1024` unsigned */
|
|
413
|
+
size_t FSE_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
|
|
414
|
+
const void* source, size_t sourceSize, unsigned* workSpace)
|
|
415
|
+
{
|
|
416
|
+
if (sourceSize < 1500) return FSE_count_simple(count, maxSymbolValuePtr, source, sourceSize);
|
|
417
|
+
return FSE_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, 0, workSpace);
|
|
418
|
+
}
|
|
419
|
+
|
|
401
420
|
/* fast variant (unsafe : won't check if src contains values beyond count[] limit) */
|
|
402
421
|
size_t FSE_countFast(unsigned* count, unsigned* maxSymbolValuePtr,
|
|
403
422
|
const void* source, size_t sourceSize)
|
|
404
423
|
{
|
|
405
|
-
|
|
406
|
-
return
|
|
424
|
+
unsigned tmpCounters[1024];
|
|
425
|
+
return FSE_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, tmpCounters);
|
|
407
426
|
}
|
|
408
427
|
|
|
409
|
-
|
|
410
|
-
|
|
428
|
+
/* FSE_count_wksp() :
|
|
429
|
+
* Same as FSE_count(), but using an externally provided scratch buffer.
|
|
430
|
+
* `workSpace` size must be table of >= `1024` unsigned */
|
|
431
|
+
size_t FSE_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
|
|
432
|
+
const void* source, size_t sourceSize, unsigned* workSpace)
|
|
411
433
|
{
|
|
412
|
-
if (*maxSymbolValuePtr <255)
|
|
413
|
-
return
|
|
434
|
+
if (*maxSymbolValuePtr < 255)
|
|
435
|
+
return FSE_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, 1, workSpace);
|
|
414
436
|
*maxSymbolValuePtr = 255;
|
|
415
|
-
return
|
|
437
|
+
return FSE_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, workSpace);
|
|
438
|
+
}
|
|
439
|
+
|
|
440
|
+
size_t FSE_count(unsigned* count, unsigned* maxSymbolValuePtr,
|
|
441
|
+
const void* src, size_t srcSize)
|
|
442
|
+
{
|
|
443
|
+
unsigned tmpCounters[1024];
|
|
444
|
+
return FSE_count_wksp(count, maxSymbolValuePtr, src, srcSize, tmpCounters);
|
|
416
445
|
}
|
|
417
446
|
|
|
418
447
|
|
|
@@ -428,14 +457,10 @@ size_t FSE_count(unsigned* count, unsigned* maxSymbolValuePtr,
|
|
|
428
457
|
`FSE_symbolCompressionTransform symbolTT[maxSymbolValue+1];` // This size is variable
|
|
429
458
|
Allocation is manual (C standard does not support variable-size structures).
|
|
430
459
|
*/
|
|
431
|
-
|
|
432
460
|
size_t FSE_sizeof_CTable (unsigned maxSymbolValue, unsigned tableLog)
|
|
433
461
|
{
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
if (tableLog > FSE_MAX_TABLELOG) return ERROR(GENERIC);
|
|
437
|
-
size = FSE_CTABLE_SIZE_U32 (tableLog, maxSymbolValue) * sizeof(U32);
|
|
438
|
-
return size;
|
|
462
|
+
if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);
|
|
463
|
+
return FSE_CTABLE_SIZE_U32 (tableLog, maxSymbolValue) * sizeof(U32);
|
|
439
464
|
}
|
|
440
465
|
|
|
441
466
|
FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog)
|
|
@@ -481,12 +506,13 @@ unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxS
|
|
|
481
506
|
|
|
482
507
|
static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count, size_t total, U32 maxSymbolValue)
|
|
483
508
|
{
|
|
509
|
+
short const NOT_YET_ASSIGNED = -2;
|
|
484
510
|
U32 s;
|
|
485
511
|
U32 distributed = 0;
|
|
486
512
|
U32 ToDistribute;
|
|
487
513
|
|
|
488
514
|
/* Init */
|
|
489
|
-
U32 lowThreshold = (U32)(total >> tableLog);
|
|
515
|
+
U32 const lowThreshold = (U32)(total >> tableLog);
|
|
490
516
|
U32 lowOne = (U32)((total * 3) >> (tableLog + 1));
|
|
491
517
|
|
|
492
518
|
for (s=0; s<=maxSymbolValue; s++) {
|
|
@@ -506,7 +532,8 @@ static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count,
|
|
|
506
532
|
total -= count[s];
|
|
507
533
|
continue;
|
|
508
534
|
}
|
|
509
|
-
|
|
535
|
+
|
|
536
|
+
norm[s]=NOT_YET_ASSIGNED;
|
|
510
537
|
}
|
|
511
538
|
ToDistribute = (1 << tableLog) - distributed;
|
|
512
539
|
|
|
@@ -514,7 +541,7 @@ static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count,
|
|
|
514
541
|
/* risk of rounding to zero */
|
|
515
542
|
lowOne = (U32)((total * 3) / (ToDistribute * 2));
|
|
516
543
|
for (s=0; s<=maxSymbolValue; s++) {
|
|
517
|
-
if ((norm[s] ==
|
|
544
|
+
if ((norm[s] == NOT_YET_ASSIGNED) && (count[s] <= lowOne)) {
|
|
518
545
|
norm[s] = 1;
|
|
519
546
|
distributed++;
|
|
520
547
|
total -= count[s];
|
|
@@ -534,17 +561,23 @@ static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count,
|
|
|
534
561
|
return 0;
|
|
535
562
|
}
|
|
536
563
|
|
|
537
|
-
{
|
|
538
|
-
|
|
564
|
+
if (total == 0) {
|
|
565
|
+
/* all of the symbols were low enough for the lowOne or lowThreshold */
|
|
566
|
+
for (s=0; ToDistribute > 0; s = (s+1)%(maxSymbolValue+1))
|
|
567
|
+
if (norm[s] > 0) ToDistribute--, norm[s]++;
|
|
568
|
+
return 0;
|
|
569
|
+
}
|
|
570
|
+
|
|
571
|
+
{ U64 const vStepLog = 62 - tableLog;
|
|
539
572
|
U64 const mid = (1ULL << (vStepLog-1)) - 1;
|
|
540
573
|
U64 const rStep = ((((U64)1<<vStepLog) * ToDistribute) + mid) / total; /* scale on remaining */
|
|
541
574
|
U64 tmpTotal = mid;
|
|
542
575
|
for (s=0; s<=maxSymbolValue; s++) {
|
|
543
|
-
if (norm[s]
|
|
544
|
-
U64 end = tmpTotal + (count[s] * rStep);
|
|
545
|
-
U32 sStart = (U32)(tmpTotal >> vStepLog);
|
|
546
|
-
U32 sEnd = (U32)(end >> vStepLog);
|
|
547
|
-
U32 weight = sEnd - sStart;
|
|
576
|
+
if (norm[s]==NOT_YET_ASSIGNED) {
|
|
577
|
+
U64 const end = tmpTotal + (count[s] * rStep);
|
|
578
|
+
U32 const sStart = (U32)(tmpTotal >> vStepLog);
|
|
579
|
+
U32 const sEnd = (U32)(end >> vStepLog);
|
|
580
|
+
U32 const weight = sEnd - sStart;
|
|
548
581
|
if (weight < 1)
|
|
549
582
|
return ERROR(GENERIC);
|
|
550
583
|
norm[s] = (short)weight;
|
|
@@ -566,7 +599,6 @@ size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog,
|
|
|
566
599
|
if (tableLog < FSE_minTableLog(total, maxSymbolValue)) return ERROR(GENERIC); /* Too small tableLog, compression potentially impossible */
|
|
567
600
|
|
|
568
601
|
{ U32 const rtbTable[] = { 0, 473195, 504333, 520860, 550000, 700000, 750000, 830000 };
|
|
569
|
-
|
|
570
602
|
U64 const scale = 62 - tableLog;
|
|
571
603
|
U64 const step = ((U64)1<<62) / total; /* <== here, one division ! */
|
|
572
604
|
U64 const vStep = 1ULL<<(scale-20);
|
|
@@ -594,7 +626,7 @@ size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog,
|
|
|
594
626
|
} }
|
|
595
627
|
if (-stillToDistribute >= (normalizedCounter[largest] >> 1)) {
|
|
596
628
|
/* corner case, need another normalization method */
|
|
597
|
-
size_t errorCode = FSE_normalizeM2(normalizedCounter, tableLog, count, total, maxSymbolValue);
|
|
629
|
+
size_t const errorCode = FSE_normalizeM2(normalizedCounter, tableLog, count, total, maxSymbolValue);
|
|
598
630
|
if (FSE_isError(errorCode)) return errorCode;
|
|
599
631
|
}
|
|
600
632
|
else normalizedCounter[largest] += (short)stillToDistribute;
|
|
@@ -643,17 +675,15 @@ size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits)
|
|
|
643
675
|
|
|
644
676
|
/* Build Symbol Transformation Table */
|
|
645
677
|
{ const U32 deltaNbBits = (nbBits << 16) - (1 << nbBits);
|
|
646
|
-
|
|
647
678
|
for (s=0; s<=maxSymbolValue; s++) {
|
|
648
679
|
symbolTT[s].deltaNbBits = deltaNbBits;
|
|
649
680
|
symbolTT[s].deltaFindState = s-1;
|
|
650
681
|
} }
|
|
651
682
|
|
|
652
|
-
|
|
653
683
|
return 0;
|
|
654
684
|
}
|
|
655
685
|
|
|
656
|
-
/* fake FSE_CTable, for rle (
|
|
686
|
+
/* fake FSE_CTable, for rle input (always same symbol) */
|
|
657
687
|
size_t FSE_buildCTable_rle (FSE_CTable* ct, BYTE symbolValue)
|
|
658
688
|
{
|
|
659
689
|
void* ptr = ct;
|
|
@@ -685,14 +715,13 @@ static size_t FSE_compress_usingCTable_generic (void* dst, size_t dstSize,
|
|
|
685
715
|
const BYTE* const iend = istart + srcSize;
|
|
686
716
|
const BYTE* ip=iend;
|
|
687
717
|
|
|
688
|
-
|
|
689
718
|
BIT_CStream_t bitC;
|
|
690
719
|
FSE_CState_t CState1, CState2;
|
|
691
720
|
|
|
692
721
|
/* init */
|
|
693
722
|
if (srcSize <= 2) return 0;
|
|
694
|
-
{ size_t const
|
|
695
|
-
if (FSE_isError(
|
|
723
|
+
{ size_t const initError = BIT_initCStream(&bitC, dst, dstSize);
|
|
724
|
+
if (FSE_isError(initError)) return 0; /* not enough space available to write a bitstream */ }
|
|
696
725
|
|
|
697
726
|
#define FSE_FLUSHBITS(s) (fast ? BIT_flushBitsFast(s) : BIT_flushBits(s))
|
|
698
727
|
|
|
@@ -715,7 +744,7 @@ static size_t FSE_compress_usingCTable_generic (void* dst, size_t dstSize,
|
|
|
715
744
|
}
|
|
716
745
|
|
|
717
746
|
/* 2 or 4 encoding per loop */
|
|
718
|
-
|
|
747
|
+
while ( ip>istart ) {
|
|
719
748
|
|
|
720
749
|
FSE_encodeSymbol(&bitC, &CState2, *--ip);
|
|
721
750
|
|
|
@@ -741,7 +770,7 @@ size_t FSE_compress_usingCTable (void* dst, size_t dstSize,
|
|
|
741
770
|
const void* src, size_t srcSize,
|
|
742
771
|
const FSE_CTable* ct)
|
|
743
772
|
{
|
|
744
|
-
const
|
|
773
|
+
unsigned const fast = (dstSize >= FSE_BLOCKBOUND(srcSize));
|
|
745
774
|
|
|
746
775
|
if (fast)
|
|
747
776
|
return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 1);
|
|
@@ -752,58 +781,76 @@ size_t FSE_compress_usingCTable (void* dst, size_t dstSize,
|
|
|
752
781
|
|
|
753
782
|
size_t FSE_compressBound(size_t size) { return FSE_COMPRESSBOUND(size); }
|
|
754
783
|
|
|
755
|
-
|
|
756
|
-
{
|
|
757
|
-
const BYTE* const istart = (const BYTE*) src;
|
|
758
|
-
const BYTE* ip = istart;
|
|
784
|
+
#define CHECK_V_F(e, f) size_t const e = f; if (ERR_isError(e)) return f
|
|
785
|
+
#define CHECK_F(f) { CHECK_V_F(_var_err__, f); }
|
|
759
786
|
|
|
787
|
+
/* FSE_compress_wksp() :
|
|
788
|
+
* Same as FSE_compress2(), but using an externally allocated scratch buffer (`workSpace`).
|
|
789
|
+
* `wkspSize` size must be `(1<<tableLog)`.
|
|
790
|
+
*/
|
|
791
|
+
size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize)
|
|
792
|
+
{
|
|
760
793
|
BYTE* const ostart = (BYTE*) dst;
|
|
761
794
|
BYTE* op = ostart;
|
|
762
795
|
BYTE* const oend = ostart + dstSize;
|
|
763
796
|
|
|
764
797
|
U32 count[FSE_MAX_SYMBOL_VALUE+1];
|
|
765
798
|
S16 norm[FSE_MAX_SYMBOL_VALUE+1];
|
|
766
|
-
|
|
767
|
-
size_t
|
|
799
|
+
FSE_CTable* CTable = (FSE_CTable*)workSpace;
|
|
800
|
+
size_t const CTableSize = FSE_CTABLE_SIZE_U32(tableLog, maxSymbolValue);
|
|
801
|
+
void* scratchBuffer = (void*)(CTable + CTableSize);
|
|
802
|
+
size_t const scratchBufferSize = wkspSize - (CTableSize * sizeof(FSE_CTable));
|
|
768
803
|
|
|
769
804
|
/* init conditions */
|
|
770
|
-
if (
|
|
805
|
+
if (wkspSize < FSE_WKSP_SIZE_U32(tableLog, maxSymbolValue)) return ERROR(tableLog_tooLarge);
|
|
806
|
+
if (srcSize <= 1) return 0; /* Not compressible */
|
|
771
807
|
if (!maxSymbolValue) maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
|
|
772
808
|
if (!tableLog) tableLog = FSE_DEFAULT_TABLELOG;
|
|
773
809
|
|
|
774
810
|
/* Scan input and build symbol stats */
|
|
775
|
-
|
|
776
|
-
|
|
777
|
-
|
|
778
|
-
|
|
779
|
-
|
|
811
|
+
{ CHECK_V_F(maxCount, FSE_count(count, &maxSymbolValue, src, srcSize) );
|
|
812
|
+
if (maxCount == srcSize) return 1; /* only a single symbol in src : rle */
|
|
813
|
+
if (maxCount == 1) return 0; /* each symbol present maximum once => not compressible */
|
|
814
|
+
if (maxCount < (srcSize >> 7)) return 0; /* Heuristic : not compressible enough */
|
|
815
|
+
}
|
|
780
816
|
|
|
781
817
|
tableLog = FSE_optimalTableLog(tableLog, srcSize, maxSymbolValue);
|
|
782
|
-
|
|
783
|
-
if (FSE_isError(errorCode)) return errorCode;
|
|
818
|
+
CHECK_F( FSE_normalizeCount(norm, tableLog, count, srcSize, maxSymbolValue) );
|
|
784
819
|
|
|
785
820
|
/* Write table description header */
|
|
786
|
-
|
|
787
|
-
|
|
788
|
-
|
|
821
|
+
{ CHECK_V_F(nc_err, FSE_writeNCount(op, oend-op, norm, maxSymbolValue, tableLog) );
|
|
822
|
+
op += nc_err;
|
|
823
|
+
}
|
|
789
824
|
|
|
790
825
|
/* Compress */
|
|
791
|
-
|
|
792
|
-
|
|
793
|
-
|
|
794
|
-
|
|
795
|
-
|
|
826
|
+
CHECK_F( FSE_buildCTable_wksp(CTable, norm, maxSymbolValue, tableLog, scratchBuffer, scratchBufferSize) );
|
|
827
|
+
{ CHECK_V_F(cSize, FSE_compress_usingCTable(op, oend - op, src, srcSize, CTable) );
|
|
828
|
+
if (cSize == 0) return 0; /* not enough space for compressed data */
|
|
829
|
+
op += cSize;
|
|
830
|
+
}
|
|
796
831
|
|
|
797
832
|
/* check compressibility */
|
|
798
|
-
if ( (size_t)(op-ostart) >= srcSize-1 )
|
|
799
|
-
return 0;
|
|
833
|
+
if ( (size_t)(op-ostart) >= srcSize-1 ) return 0;
|
|
800
834
|
|
|
801
835
|
return op-ostart;
|
|
802
836
|
}
|
|
803
837
|
|
|
804
|
-
|
|
838
|
+
typedef struct {
|
|
839
|
+
FSE_CTable CTable_max[FSE_CTABLE_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)];
|
|
840
|
+
BYTE scratchBuffer[1 << FSE_MAX_TABLELOG];
|
|
841
|
+
} fseWkspMax_t;
|
|
842
|
+
|
|
843
|
+
size_t FSE_compress2 (void* dst, size_t dstCapacity, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog)
|
|
844
|
+
{
|
|
845
|
+
fseWkspMax_t scratchBuffer;
|
|
846
|
+
FSE_STATIC_ASSERT(sizeof(scratchBuffer) >= FSE_WKSP_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)); /* compilation failures here means scratchBuffer is not large enough */
|
|
847
|
+
if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);
|
|
848
|
+
return FSE_compress_wksp(dst, dstCapacity, src, srcSize, maxSymbolValue, tableLog, &scratchBuffer, sizeof(scratchBuffer));
|
|
849
|
+
}
|
|
850
|
+
|
|
851
|
+
size_t FSE_compress (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
|
|
805
852
|
{
|
|
806
|
-
return FSE_compress2(dst,
|
|
853
|
+
return FSE_compress2(dst, dstCapacity, src, srcSize, FSE_MAX_SYMBOL_VALUE, FSE_DEFAULT_TABLELOG);
|
|
807
854
|
}
|
|
808
855
|
|
|
809
856
|
|