zstd-ruby 1.1.4.0 → 1.2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +1 -1
- data/README.md +1 -1
- data/ext/zstdruby/libzstd/Makefile +11 -1
- data/ext/zstdruby/libzstd/README.md +8 -0
- data/ext/zstdruby/libzstd/common/bitstream.h +56 -27
- data/ext/zstdruby/libzstd/common/error_private.c +2 -1
- data/ext/zstdruby/libzstd/common/fse.h +7 -3
- data/ext/zstdruby/libzstd/common/huf.h +42 -19
- data/ext/zstdruby/libzstd/common/mem.h +2 -3
- data/ext/zstdruby/libzstd/common/zstd_errors.h +1 -0
- data/ext/zstdruby/libzstd/common/zstd_internal.h +3 -2
- data/ext/zstdruby/libzstd/compress/fse_compress.c +10 -10
- data/ext/zstdruby/libzstd/compress/zstd_compress.c +455 -244
- data/ext/zstdruby/libzstd/compress/zstd_opt.h +6 -4
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +40 -28
- data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +115 -219
- data/ext/zstdruby/libzstd/dictBuilder/cover.c +34 -13
- data/ext/zstdruby/libzstd/dictBuilder/zdict.c +65 -43
- data/ext/zstdruby/libzstd/dictBuilder/zdict.h +7 -7
- data/ext/zstdruby/libzstd/dll/example/README.md +5 -5
- data/ext/zstdruby/libzstd/dll/example/build_package.bat +1 -0
- data/ext/zstdruby/libzstd/legacy/zstd_v01.c +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v02.c +21 -21
- data/ext/zstdruby/libzstd/legacy/zstd_v03.c +20 -20
- data/ext/zstdruby/libzstd/legacy/zstd_v04.c +4 -4
- data/ext/zstdruby/libzstd/legacy/zstd_v05.c +2 -2
- data/ext/zstdruby/libzstd/legacy/zstd_v06.c +2 -2
- data/ext/zstdruby/libzstd/zstd.h +88 -68
- data/lib/zstd-ruby/version.rb +1 -1
- metadata +3 -3
@@ -59,8 +59,6 @@ static int g_displayLevel = 2;
|
|
59
59
|
if ((clock() - g_time > refreshRate) || (displayLevel >= 4)) { \
|
60
60
|
g_time = clock(); \
|
61
61
|
DISPLAY(__VA_ARGS__); \
|
62
|
-
if (displayLevel >= 4) \
|
63
|
-
fflush(stdout); \
|
64
62
|
} \
|
65
63
|
}
|
66
64
|
#define DISPLAYUPDATE(l, ...) LOCALDISPLAYUPDATE(g_displayLevel, l, __VA_ARGS__)
|
@@ -236,10 +234,22 @@ static size_t COVER_sum(const size_t *samplesSizes, unsigned nbSamples) {
|
|
236
234
|
* Returns 1 if the dmer at lp is greater than the dmer at rp.
|
237
235
|
*/
|
238
236
|
static int COVER_cmp(COVER_ctx_t *ctx, const void *lp, const void *rp) {
|
239
|
-
const
|
240
|
-
const
|
237
|
+
U32 const lhs = *(U32 const *)lp;
|
238
|
+
U32 const rhs = *(U32 const *)rp;
|
241
239
|
return memcmp(ctx->samples + lhs, ctx->samples + rhs, ctx->d);
|
242
240
|
}
|
241
|
+
/**
|
242
|
+
* Faster version for d <= 8.
|
243
|
+
*/
|
244
|
+
static int COVER_cmp8(COVER_ctx_t *ctx, const void *lp, const void *rp) {
|
245
|
+
U64 const mask = (ctx->d == 8) ? (U64)-1 : (((U64)1 << (8 * ctx->d)) - 1);
|
246
|
+
U64 const lhs = MEM_readLE64(ctx->samples + *(U32 const *)lp) & mask;
|
247
|
+
U64 const rhs = MEM_readLE64(ctx->samples + *(U32 const *)rp) & mask;
|
248
|
+
if (lhs < rhs) {
|
249
|
+
return -1;
|
250
|
+
}
|
251
|
+
return (lhs > rhs);
|
252
|
+
}
|
243
253
|
|
244
254
|
/**
|
245
255
|
* Same as COVER_cmp() except ties are broken by pointer value
|
@@ -253,6 +263,16 @@ static int COVER_strict_cmp(const void *lp, const void *rp) {
|
|
253
263
|
}
|
254
264
|
return result;
|
255
265
|
}
|
266
|
+
/**
|
267
|
+
* Faster version for d <= 8.
|
268
|
+
*/
|
269
|
+
static int COVER_strict_cmp8(const void *lp, const void *rp) {
|
270
|
+
int result = COVER_cmp8(g_ctx, lp, rp);
|
271
|
+
if (result == 0) {
|
272
|
+
result = lp < rp ? -1 : 1;
|
273
|
+
}
|
274
|
+
return result;
|
275
|
+
}
|
256
276
|
|
257
277
|
/**
|
258
278
|
* Returns the first pointer in [first, last) whose element does not compare
|
@@ -508,7 +528,7 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
|
|
508
528
|
const BYTE *const samples = (const BYTE *)samplesBuffer;
|
509
529
|
const size_t totalSamplesSize = COVER_sum(samplesSizes, nbSamples);
|
510
530
|
/* Checks */
|
511
|
-
if (totalSamplesSize < d ||
|
531
|
+
if (totalSamplesSize < MAX(d, sizeof(U64)) ||
|
512
532
|
totalSamplesSize >= (size_t)COVER_MAX_SAMPLES_SIZE) {
|
513
533
|
DISPLAYLEVEL(1, "Total samples size is too large, maximum size is %u MB\n",
|
514
534
|
(COVER_MAX_SAMPLES_SIZE >> 20));
|
@@ -522,7 +542,7 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
|
|
522
542
|
ctx->samplesSizes = samplesSizes;
|
523
543
|
ctx->nbSamples = nbSamples;
|
524
544
|
/* Partial suffix array */
|
525
|
-
ctx->suffixSize = totalSamplesSize - d + 1;
|
545
|
+
ctx->suffixSize = totalSamplesSize - MAX(d, sizeof(U64)) + 1;
|
526
546
|
ctx->suffix = (U32 *)malloc(ctx->suffixSize * sizeof(U32));
|
527
547
|
/* Maps index to the dmerID */
|
528
548
|
ctx->dmerAt = (U32 *)malloc(ctx->suffixSize * sizeof(U32));
|
@@ -556,7 +576,8 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
|
|
556
576
|
}
|
557
577
|
/* qsort doesn't take an opaque pointer, so pass as a global */
|
558
578
|
g_ctx = ctx;
|
559
|
-
qsort(ctx->suffix, ctx->suffixSize, sizeof(U32),
|
579
|
+
qsort(ctx->suffix, ctx->suffixSize, sizeof(U32),
|
580
|
+
(ctx->d <= 8 ? &COVER_strict_cmp8 : &COVER_strict_cmp));
|
560
581
|
}
|
561
582
|
DISPLAYLEVEL(2, "Computing frequencies\n");
|
562
583
|
/* For each dmer group (group of positions with the same first d bytes):
|
@@ -566,8 +587,8 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
|
|
566
587
|
* 2. We calculate how many samples the dmer occurs in and save it in
|
567
588
|
* freqs[dmerId].
|
568
589
|
*/
|
569
|
-
COVER_groupBy(ctx->suffix, ctx->suffixSize, sizeof(U32), ctx,
|
570
|
-
&COVER_group);
|
590
|
+
COVER_groupBy(ctx->suffix, ctx->suffixSize, sizeof(U32), ctx,
|
591
|
+
(ctx->d <= 8 ? &COVER_cmp8 : &COVER_cmp), &COVER_group);
|
571
592
|
ctx->freqs = ctx->suffix;
|
572
593
|
ctx->suffix = NULL;
|
573
594
|
return 1;
|
@@ -918,10 +939,10 @@ ZDICTLIB_API size_t COVER_optimizeTrainFromBuffer(void *dictBuffer,
|
|
918
939
|
/* constants */
|
919
940
|
const unsigned nbThreads = parameters->nbThreads;
|
920
941
|
const unsigned kMinD = parameters->d == 0 ? 6 : parameters->d;
|
921
|
-
const unsigned kMaxD = parameters->d == 0 ?
|
922
|
-
const unsigned kMinK = parameters->k == 0 ?
|
923
|
-
const unsigned kMaxK = parameters->k == 0 ?
|
924
|
-
const unsigned kSteps = parameters->steps == 0 ?
|
942
|
+
const unsigned kMaxD = parameters->d == 0 ? 8 : parameters->d;
|
943
|
+
const unsigned kMinK = parameters->k == 0 ? 50 : parameters->k;
|
944
|
+
const unsigned kMaxK = parameters->k == 0 ? 2000 : parameters->k;
|
945
|
+
const unsigned kSteps = parameters->steps == 0 ? 40 : parameters->steps;
|
925
946
|
const unsigned kStepSize = MAX((kMaxK - kMinK) / kSteps, 1);
|
926
947
|
const unsigned kIterations =
|
927
948
|
(1 + (kMaxD - kMinD) / 2) * (1 + (kMaxK - kMinK) / kStepSize);
|
@@ -11,8 +11,9 @@
|
|
11
11
|
/*-**************************************
|
12
12
|
* Tuning parameters
|
13
13
|
****************************************/
|
14
|
+
#define MINRATIO 4 /* minimum nb of apparition to be selected in dictionary */
|
14
15
|
#define ZDICT_MAX_SAMPLES_SIZE (2000U << 20)
|
15
|
-
#define ZDICT_MIN_SAMPLES_SIZE
|
16
|
+
#define ZDICT_MIN_SAMPLES_SIZE (ZDICT_CONTENTSIZE_MIN * MINRATIO)
|
16
17
|
|
17
18
|
|
18
19
|
/*-**************************************
|
@@ -59,11 +60,8 @@
|
|
59
60
|
|
60
61
|
#define NOISELENGTH 32
|
61
62
|
|
62
|
-
#define MINRATIO 4
|
63
63
|
static const int g_compressionLevel_default = 6;
|
64
64
|
static const U32 g_selectivity_default = 9;
|
65
|
-
static const size_t g_provision_entropySize = 200;
|
66
|
-
static const size_t g_min_fast_dictContent = 192;
|
67
65
|
|
68
66
|
|
69
67
|
/*-*************************************
|
@@ -308,10 +306,10 @@ static dictItem ZDICT_analyzePos(
|
|
308
306
|
/* look backward */
|
309
307
|
length = MINMATCHLENGTH;
|
310
308
|
while ((length >= MINMATCHLENGTH) & (start > 0)) {
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
|
309
|
+
length = ZDICT_count(b + pos, b + suffix[start - 1]);
|
310
|
+
if (length >= LLIMIT) length = LLIMIT - 1;
|
311
|
+
lengthList[length]++;
|
312
|
+
if (length >= MINMATCHLENGTH) start--;
|
315
313
|
}
|
316
314
|
|
317
315
|
/* largest useful length */
|
@@ -363,21 +361,35 @@ static dictItem ZDICT_analyzePos(
|
|
363
361
|
}
|
364
362
|
|
365
363
|
|
364
|
+
static int isIncluded(const void* in, const void* container, size_t length)
|
365
|
+
{
|
366
|
+
const char* const ip = (const char*) in;
|
367
|
+
const char* const into = (const char*) container;
|
368
|
+
size_t u;
|
369
|
+
|
370
|
+
for (u=0; u<length; u++) { /* works because end of buffer is a noisy guard band */
|
371
|
+
if (ip[u] != into[u]) break;
|
372
|
+
}
|
373
|
+
|
374
|
+
return u==length;
|
375
|
+
}
|
376
|
+
|
366
377
|
/*! ZDICT_checkMerge
|
367
378
|
check if dictItem can be merged, do it if possible
|
368
379
|
@return : id of destination elt, 0 if not merged
|
369
380
|
*/
|
370
|
-
static U32
|
381
|
+
static U32 ZDICT_tryMerge(dictItem* table, dictItem elt, U32 eltNbToSkip, const void* buffer)
|
371
382
|
{
|
372
383
|
const U32 tableSize = table->pos;
|
373
384
|
const U32 eltEnd = elt.pos + elt.length;
|
385
|
+
const char* const buf = (const char*) buffer;
|
374
386
|
|
375
387
|
/* tail overlap */
|
376
388
|
U32 u; for (u=1; u<tableSize; u++) {
|
377
389
|
if (u==eltNbToSkip) continue;
|
378
390
|
if ((table[u].pos > elt.pos) && (table[u].pos <= eltEnd)) { /* overlap, existing > new */
|
379
391
|
/* append */
|
380
|
-
U32 addedLength = table[u].pos - elt.pos;
|
392
|
+
U32 const addedLength = table[u].pos - elt.pos;
|
381
393
|
table[u].length += addedLength;
|
382
394
|
table[u].pos = elt.pos;
|
383
395
|
table[u].savings += elt.savings * addedLength / elt.length; /* rough approx */
|
@@ -393,9 +405,10 @@ static U32 ZDICT_checkMerge(dictItem* table, dictItem elt, U32 eltNbToSkip)
|
|
393
405
|
/* front overlap */
|
394
406
|
for (u=1; u<tableSize; u++) {
|
395
407
|
if (u==eltNbToSkip) continue;
|
408
|
+
|
396
409
|
if ((table[u].pos + table[u].length >= elt.pos) && (table[u].pos < elt.pos)) { /* overlap, existing < new */
|
397
410
|
/* append */
|
398
|
-
int addedLength = (int)eltEnd - (table[u].pos + table[u].length);
|
411
|
+
int const addedLength = (int)eltEnd - (table[u].pos + table[u].length);
|
399
412
|
table[u].savings += elt.length / 8; /* rough approx bonus */
|
400
413
|
if (addedLength > 0) { /* otherwise, elt fully included into existing */
|
401
414
|
table[u].length += addedLength;
|
@@ -407,7 +420,18 @@ static U32 ZDICT_checkMerge(dictItem* table, dictItem elt, U32 eltNbToSkip)
|
|
407
420
|
table[u] = table[u-1], u--;
|
408
421
|
table[u] = elt;
|
409
422
|
return u;
|
410
|
-
|
423
|
+
}
|
424
|
+
|
425
|
+
if (MEM_read64(buf + table[u].pos) == MEM_read64(buf + elt.pos + 1)) {
|
426
|
+
if (isIncluded(buf + table[u].pos, buf + elt.pos + 1, table[u].length)) {
|
427
|
+
size_t const addedLength = MAX( (int)elt.length - (int)table[u].length , 1 );
|
428
|
+
table[u].pos = elt.pos;
|
429
|
+
table[u].savings += (U32)(elt.savings * addedLength / elt.length);
|
430
|
+
table[u].length = MIN(elt.length, table[u].length + 1);
|
431
|
+
return u;
|
432
|
+
}
|
433
|
+
}
|
434
|
+
}
|
411
435
|
|
412
436
|
return 0;
|
413
437
|
}
|
@@ -425,14 +449,14 @@ static void ZDICT_removeDictItem(dictItem* table, U32 id)
|
|
425
449
|
}
|
426
450
|
|
427
451
|
|
428
|
-
static void ZDICT_insertDictItem(dictItem* table, U32 maxSize, dictItem elt)
|
452
|
+
static void ZDICT_insertDictItem(dictItem* table, U32 maxSize, dictItem elt, const void* buffer)
|
429
453
|
{
|
430
454
|
/* merge if possible */
|
431
|
-
U32 mergeId =
|
455
|
+
U32 mergeId = ZDICT_tryMerge(table, elt, 0, buffer);
|
432
456
|
if (mergeId) {
|
433
457
|
U32 newMerge = 1;
|
434
458
|
while (newMerge) {
|
435
|
-
newMerge =
|
459
|
+
newMerge = ZDICT_tryMerge(table, table[mergeId], mergeId, buffer);
|
436
460
|
if (newMerge) ZDICT_removeDictItem(table, mergeId);
|
437
461
|
mergeId = newMerge;
|
438
462
|
}
|
@@ -480,7 +504,7 @@ static size_t ZDICT_trainBuffer(dictItem* dictList, U32 dictListSize,
|
|
480
504
|
# define DISPLAYUPDATE(l, ...) if (notificationLevel>=l) { \
|
481
505
|
if (ZDICT_clockSpan(displayClock) > refreshRate) \
|
482
506
|
{ displayClock = clock(); DISPLAY(__VA_ARGS__); \
|
483
|
-
if (notificationLevel>=4) fflush(
|
507
|
+
if (notificationLevel>=4) fflush(stderr); } }
|
484
508
|
|
485
509
|
/* init */
|
486
510
|
DISPLAYLEVEL(2, "\r%70s\r", ""); /* clean display line */
|
@@ -521,7 +545,7 @@ static size_t ZDICT_trainBuffer(dictItem* dictList, U32 dictListSize,
|
|
521
545
|
if (doneMarks[cursor]) { cursor++; continue; }
|
522
546
|
solution = ZDICT_analyzePos(doneMarks, suffix, reverseSuffix[cursor], buffer, minRatio, notificationLevel);
|
523
547
|
if (solution.length==0) { cursor++; continue; }
|
524
|
-
ZDICT_insertDictItem(dictList, dictListSize, solution);
|
548
|
+
ZDICT_insertDictItem(dictList, dictListSize, solution, buffer);
|
525
549
|
cursor += solution.length;
|
526
550
|
DISPLAYUPDATE(2, "\r%4.2f %% \r", (double)cursor / bufferSize * 100);
|
527
551
|
} }
|
@@ -683,19 +707,19 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|
683
707
|
goto _cleanup;
|
684
708
|
}
|
685
709
|
if (offcodeMax>OFFCODE_MAX) { eSize = ERROR(dictionary_wrong); goto _cleanup; } /* too large dictionary */
|
686
|
-
for (u=0; u<256; u++) countLit[u]=1; /* any character must be described */
|
687
|
-
for (u=0; u<=offcodeMax; u++) offcodeCount[u]=1;
|
688
|
-
for (u=0; u<=MaxML; u++) matchLengthCount[u]=1;
|
689
|
-
for (u=0; u<=MaxLL; u++) litLengthCount[u]=1;
|
710
|
+
for (u=0; u<256; u++) countLit[u] = 1; /* any character must be described */
|
711
|
+
for (u=0; u<=offcodeMax; u++) offcodeCount[u] = 1;
|
712
|
+
for (u=0; u<=MaxML; u++) matchLengthCount[u] = 1;
|
713
|
+
for (u=0; u<=MaxLL; u++) litLengthCount[u] = 1;
|
690
714
|
memset(repOffset, 0, sizeof(repOffset));
|
691
715
|
repOffset[1] = repOffset[4] = repOffset[8] = 1;
|
692
716
|
memset(bestRepOffset, 0, sizeof(bestRepOffset));
|
693
|
-
if (compressionLevel==0) compressionLevel=g_compressionLevel_default;
|
717
|
+
if (compressionLevel==0) compressionLevel = g_compressionLevel_default;
|
694
718
|
params = ZSTD_getParams(compressionLevel, averageSampleSize, dictBufferSize);
|
695
719
|
{ size_t const beginResult = ZSTD_compressBegin_advanced(esr.ref, dictBuffer, dictBufferSize, params, 0);
|
696
|
-
|
720
|
+
if (ZSTD_isError(beginResult)) {
|
721
|
+
DISPLAYLEVEL(1, "error : ZSTD_compressBegin_advanced() failed : %s \n", ZSTD_getErrorName(beginResult));
|
697
722
|
eSize = ERROR(GENERIC);
|
698
|
-
DISPLAYLEVEL(1, "error : ZSTD_compressBegin_advanced failed \n");
|
699
723
|
goto _cleanup;
|
700
724
|
} }
|
701
725
|
|
@@ -812,7 +836,6 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|
812
836
|
MEM_writeLE32(dstPtr+4, repStartValue[1]);
|
813
837
|
MEM_writeLE32(dstPtr+8, repStartValue[2]);
|
814
838
|
#endif
|
815
|
-
//dstPtr += 12;
|
816
839
|
eSize += 12;
|
817
840
|
|
818
841
|
_cleanup:
|
@@ -831,7 +854,7 @@ size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity,
|
|
831
854
|
ZDICT_params_t params)
|
832
855
|
{
|
833
856
|
size_t hSize;
|
834
|
-
#define HBUFFSIZE 256
|
857
|
+
#define HBUFFSIZE 256 /* should prove large enough for all entropy headers */
|
835
858
|
BYTE header[HBUFFSIZE];
|
836
859
|
int const compressionLevel = (params.compressionLevel <= 0) ? g_compressionLevel_default : params.compressionLevel;
|
837
860
|
U32 const notificationLevel = params.notificationLevel;
|
@@ -877,20 +900,11 @@ size_t ZDICT_addEntropyTablesFromBuffer_advanced(void* dictBuffer, size_t dictCo
|
|
877
900
|
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
|
878
901
|
ZDICT_params_t params)
|
879
902
|
{
|
880
|
-
size_t hSize;
|
881
903
|
int const compressionLevel = (params.compressionLevel <= 0) ? g_compressionLevel_default : params.compressionLevel;
|
882
904
|
U32 const notificationLevel = params.notificationLevel;
|
905
|
+
size_t hSize = 8;
|
883
906
|
|
884
|
-
/*
|
885
|
-
MEM_writeLE32(dictBuffer, ZSTD_DICT_MAGIC);
|
886
|
-
{ U64 const randomID = XXH64((char*)dictBuffer + dictBufferCapacity - dictContentSize, dictContentSize, 0);
|
887
|
-
U32 const compliantID = (randomID % ((1U<<31)-32768)) + 32768;
|
888
|
-
U32 const dictID = params.dictID ? params.dictID : compliantID;
|
889
|
-
MEM_writeLE32((char*)dictBuffer+4, dictID);
|
890
|
-
}
|
891
|
-
hSize = 8;
|
892
|
-
|
893
|
-
/* entropy tables */
|
907
|
+
/* calculate entropy tables */
|
894
908
|
DISPLAYLEVEL(2, "\r%70s\r", ""); /* clean display line */
|
895
909
|
DISPLAYLEVEL(2, "statistics ... \n");
|
896
910
|
{ size_t const eSize = ZDICT_analyzeEntropy((char*)dictBuffer+hSize, dictBufferCapacity-hSize,
|
@@ -902,6 +916,13 @@ size_t ZDICT_addEntropyTablesFromBuffer_advanced(void* dictBuffer, size_t dictCo
|
|
902
916
|
hSize += eSize;
|
903
917
|
}
|
904
918
|
|
919
|
+
/* add dictionary header (after entropy tables) */
|
920
|
+
MEM_writeLE32(dictBuffer, ZSTD_DICT_MAGIC);
|
921
|
+
{ U64 const randomID = XXH64((char*)dictBuffer + dictBufferCapacity - dictContentSize, dictContentSize, 0);
|
922
|
+
U32 const compliantID = (randomID % ((1U<<31)-32768)) + 32768;
|
923
|
+
U32 const dictID = params.dictID ? params.dictID : compliantID;
|
924
|
+
MEM_writeLE32((char*)dictBuffer+4, dictID);
|
925
|
+
}
|
905
926
|
|
906
927
|
if (hSize + dictContentSize < dictBufferCapacity)
|
907
928
|
memmove((char*)dictBuffer + hSize, (char*)dictBuffer + dictBufferCapacity - dictContentSize, dictContentSize);
|
@@ -929,8 +950,8 @@ size_t ZDICT_trainFromBuffer_unsafe(
|
|
929
950
|
|
930
951
|
/* checks */
|
931
952
|
if (!dictList) return ERROR(memory_allocation);
|
932
|
-
if (maxDictSize
|
933
|
-
if (samplesBuffSize < ZDICT_MIN_SAMPLES_SIZE) { free(dictList); return
|
953
|
+
if (maxDictSize < ZDICT_DICTSIZE_MIN) { free(dictList); return ERROR(dstSize_tooSmall); } /* requested dictionary size is too small */
|
954
|
+
if (samplesBuffSize < ZDICT_MIN_SAMPLES_SIZE) { free(dictList); return ERROR(dictionaryCreation_failed); } /* not enough source to create dictionary */
|
934
955
|
|
935
956
|
/* init */
|
936
957
|
ZDICT_initDictItem(dictList);
|
@@ -963,14 +984,15 @@ size_t ZDICT_trainFromBuffer_unsafe(
|
|
963
984
|
|
964
985
|
/* create dictionary */
|
965
986
|
{ U32 dictContentSize = ZDICT_dictSize(dictList);
|
966
|
-
if (dictContentSize <
|
987
|
+
if (dictContentSize < ZDICT_CONTENTSIZE_MIN) { free(dictList); return ERROR(dictionaryCreation_failed); } /* dictionary content too small */
|
988
|
+
if (dictContentSize < targetDictSize/4) {
|
967
989
|
DISPLAYLEVEL(2, "! warning : selected content significantly smaller than requested (%u < %u) \n", dictContentSize, (U32)maxDictSize);
|
990
|
+
if (samplesBuffSize < 10 * targetDictSize)
|
991
|
+
DISPLAYLEVEL(2, "! consider increasing the number of samples (total size : %u MB)\n", (U32)(samplesBuffSize>>20));
|
968
992
|
if (minRep > MINRATIO) {
|
969
993
|
DISPLAYLEVEL(2, "! consider increasing selectivity to produce larger dictionary (-s%u) \n", selectivity+1);
|
970
994
|
DISPLAYLEVEL(2, "! note : larger dictionaries are not necessarily better, test its efficiency on samples \n");
|
971
995
|
}
|
972
|
-
if (samplesBuffSize < 10 * targetDictSize)
|
973
|
-
DISPLAYLEVEL(2, "! consider increasing the number of samples (total size : %u MB)\n", (U32)(samplesBuffSize>>20));
|
974
996
|
}
|
975
997
|
|
976
998
|
if ((dictContentSize > targetDictSize*3) && (nbSamples > 2*MINRATIO) && (selectivity>1)) {
|
@@ -978,7 +1000,7 @@ size_t ZDICT_trainFromBuffer_unsafe(
|
|
978
1000
|
while ((nbSamples >> proposedSelectivity) <= MINRATIO) { proposedSelectivity--; }
|
979
1001
|
DISPLAYLEVEL(2, "! note : calculated dictionary significantly larger than requested (%u > %u) \n", dictContentSize, (U32)maxDictSize);
|
980
1002
|
DISPLAYLEVEL(2, "! consider increasing dictionary size, or produce denser dictionary (-s%u) \n", proposedSelectivity);
|
981
|
-
DISPLAYLEVEL(2, "! always test dictionary efficiency on samples \n");
|
1003
|
+
DISPLAYLEVEL(2, "! always test dictionary efficiency on real samples \n");
|
982
1004
|
}
|
983
1005
|
|
984
1006
|
/* limit dictionary size */
|
@@ -88,7 +88,7 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_advanced(void* dictBuffer, size_t dict
|
|
88
88
|
|
89
89
|
/*! COVER_params_t :
|
90
90
|
For all values 0 means default.
|
91
|
-
|
91
|
+
k and d are the only required parameters.
|
92
92
|
*/
|
93
93
|
typedef struct {
|
94
94
|
unsigned k; /* Segment size : constraint: 0 < k : Reasonable range [16, 2048+] */
|
@@ -147,18 +147,18 @@ ZDICTLIB_API size_t COVER_optimizeTrainFromBuffer(void* dictBuffer, size_t dictB
|
|
147
147
|
Samples must be stored concatenated in a flat buffer `samplesBuffer`,
|
148
148
|
supplied with an array of sizes `samplesSizes`, providing the size of each sample in order.
|
149
149
|
|
150
|
-
dictContentSize must be
|
151
|
-
maxDictSize must be >= dictContentSize, and must be
|
150
|
+
dictContentSize must be >= ZDICT_CONTENTSIZE_MIN bytes.
|
151
|
+
maxDictSize must be >= dictContentSize, and must be >= ZDICT_DICTSIZE_MIN bytes.
|
152
152
|
|
153
153
|
@return : size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`),
|
154
154
|
or an error code, which can be tested by ZDICT_isError().
|
155
155
|
note : ZDICT_finalizeDictionary() will push notifications into stderr if instructed to, using notificationLevel>0.
|
156
|
-
note 2 : dictBuffer and
|
156
|
+
note 2 : dictBuffer and dictContent can overlap
|
157
157
|
*/
|
158
|
-
#define ZDICT_CONTENTSIZE_MIN
|
159
|
-
#define ZDICT_DICTSIZE_MIN
|
158
|
+
#define ZDICT_CONTENTSIZE_MIN 128
|
159
|
+
#define ZDICT_DICTSIZE_MIN 256
|
160
160
|
ZDICTLIB_API size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity,
|
161
|
-
const void*
|
161
|
+
const void* dictContent, size_t dictContentSize,
|
162
162
|
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
|
163
163
|
ZDICT_params_t parameters);
|
164
164
|
|
@@ -4,11 +4,11 @@ ZSTD Windows binary package
|
|
4
4
|
#### The package contents
|
5
5
|
|
6
6
|
- `zstd.exe` : Command Line Utility, supporting gzip-like arguments
|
7
|
-
- `dll\libzstd.dll` : The
|
8
|
-
- `dll\libzstd.lib` : The import library of ZSTD library for Visual C++
|
9
|
-
- `example\` : The example of usage of ZSTD library
|
10
|
-
- `include\` : Header files required
|
11
|
-
- `static\libzstd_static.lib` : The static ZSTD library
|
7
|
+
- `dll\libzstd.dll` : The ZSTD dynamic library (DLL)
|
8
|
+
- `dll\libzstd.lib` : The import library of the ZSTD dynamic library (DLL) for Visual C++
|
9
|
+
- `example\` : The example of usage of the ZSTD library
|
10
|
+
- `include\` : Header files required by the ZSTD library
|
11
|
+
- `static\libzstd_static.lib` : The static ZSTD library (LIB)
|
12
12
|
|
13
13
|
|
14
14
|
#### Usage of Command Line Interface
|
@@ -9,6 +9,7 @@ COPY lib\common\mem.h bin\example\
|
|
9
9
|
COPY lib\common\zstd_errors.h bin\example\
|
10
10
|
COPY lib\common\zstd_internal.h bin\example\
|
11
11
|
COPY lib\common\error_private.h bin\example\
|
12
|
+
COPY lib\common\xxhash.h bin\example\
|
12
13
|
COPY lib\zstd.h bin\include\
|
13
14
|
COPY lib\libzstd.a bin\static\libzstd_static.lib
|
14
15
|
COPY lib\dll\libzstd.* bin\dll\
|
@@ -475,8 +475,8 @@ MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, U32 nbBits)
|
|
475
475
|
|
476
476
|
MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD)
|
477
477
|
{
|
478
|
-
|
479
|
-
|
478
|
+
if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8)) /* should never happen */
|
479
|
+
return BIT_DStream_overflow;
|
480
480
|
|
481
481
|
if (bitD->ptr >= bitD->start + sizeof(bitD->bitContainer))
|
482
482
|
{
|
@@ -1334,8 +1334,8 @@ static size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsi
|
|
1334
1334
|
else
|
1335
1335
|
{
|
1336
1336
|
bitCount -= (int)(8 * (iend - 4 - ip));
|
1337
|
-
|
1338
|
-
|
1337
|
+
ip = iend - 4;
|
1338
|
+
}
|
1339
1339
|
bitStream = MEM_readLE32(ip) >> (bitCount & 31);
|
1340
1340
|
}
|
1341
1341
|
}
|
@@ -2040,7 +2040,7 @@ static size_t HUF_readDTableX4 (U32* DTable, const void* src, size_t srcSize)
|
|
2040
2040
|
rankStart[0] = 0; /* forget 0w symbols; this is beginning of weight(1) */
|
2041
2041
|
}
|
2042
2042
|
|
2043
|
-
|
2043
|
+
/* Build rankVal */
|
2044
2044
|
{
|
2045
2045
|
const U32 minBits = tableLog+1 - maxW;
|
2046
2046
|
U32 nextRankVal = 0;
|
@@ -2374,7 +2374,7 @@ static size_t HUF_readDTableX6 (U32* DTable, const void* src, size_t srcSize)
|
|
2374
2374
|
rankStart[0] = 0; /* forget 0w symbols; this is beginning of weight(1) */
|
2375
2375
|
}
|
2376
2376
|
|
2377
|
-
|
2377
|
+
/* Build rankVal */
|
2378
2378
|
{
|
2379
2379
|
const U32 minBits = tableLog+1 - maxW;
|
2380
2380
|
U32 nextRankVal = 0;
|
@@ -2948,14 +2948,14 @@ static size_t ZSTD_decodeLiteralsBlock(void* ctx,
|
|
2948
2948
|
const size_t litSize = (MEM_readLE32(istart) & 0xFFFFFF) >> 2; /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */
|
2949
2949
|
if (litSize > srcSize-11) /* risk of reading too far with wildcopy */
|
2950
2950
|
{
|
2951
|
-
|
2952
|
-
|
2953
|
-
|
2954
|
-
|
2955
|
-
|
2956
|
-
|
2957
|
-
|
2958
|
-
|
2951
|
+
if (litSize > srcSize-3) return ERROR(corruption_detected);
|
2952
|
+
memcpy(dctx->litBuffer, istart, litSize);
|
2953
|
+
dctx->litPtr = dctx->litBuffer;
|
2954
|
+
dctx->litSize = litSize;
|
2955
|
+
memset(dctx->litBuffer + dctx->litSize, 0, 8);
|
2956
|
+
return litSize+3;
|
2957
|
+
}
|
2958
|
+
/* direct reference into compressed stream */
|
2959
2959
|
dctx->litPtr = istart+3;
|
2960
2960
|
dctx->litSize = litSize;
|
2961
2961
|
return litSize+3;
|
@@ -3515,13 +3515,13 @@ static size_t ZSTD_decompressContinue(ZSTD_DCtx* ctx, void* dst, size_t maxDstSi
|
|
3515
3515
|
|
3516
3516
|
unsigned ZSTDv02_isError(size_t code)
|
3517
3517
|
{
|
3518
|
-
|
3518
|
+
return ZSTD_isError(code);
|
3519
3519
|
}
|
3520
3520
|
|
3521
3521
|
size_t ZSTDv02_decompress( void* dst, size_t maxOriginalSize,
|
3522
3522
|
const void* src, size_t compressedSize)
|
3523
3523
|
{
|
3524
|
-
|
3524
|
+
return ZSTD_decompress(dst, maxOriginalSize, src, compressedSize);
|
3525
3525
|
}
|
3526
3526
|
|
3527
3527
|
size_t ZSTDv02_findFrameCompressedSize(const void *src, size_t compressedSize)
|
@@ -3531,25 +3531,25 @@ size_t ZSTDv02_findFrameCompressedSize(const void *src, size_t compressedSize)
|
|
3531
3531
|
|
3532
3532
|
ZSTDv02_Dctx* ZSTDv02_createDCtx(void)
|
3533
3533
|
{
|
3534
|
-
|
3534
|
+
return (ZSTDv02_Dctx*)ZSTD_createDCtx();
|
3535
3535
|
}
|
3536
3536
|
|
3537
3537
|
size_t ZSTDv02_freeDCtx(ZSTDv02_Dctx* dctx)
|
3538
3538
|
{
|
3539
|
-
|
3539
|
+
return ZSTD_freeDCtx((ZSTD_DCtx*)dctx);
|
3540
3540
|
}
|
3541
3541
|
|
3542
3542
|
size_t ZSTDv02_resetDCtx(ZSTDv02_Dctx* dctx)
|
3543
3543
|
{
|
3544
|
-
|
3544
|
+
return ZSTD_resetDCtx((ZSTD_DCtx*)dctx);
|
3545
3545
|
}
|
3546
3546
|
|
3547
3547
|
size_t ZSTDv02_nextSrcSizeToDecompress(ZSTDv02_Dctx* dctx)
|
3548
3548
|
{
|
3549
|
-
|
3549
|
+
return ZSTD_nextSrcSizeToDecompress((ZSTD_DCtx*)dctx);
|
3550
3550
|
}
|
3551
3551
|
|
3552
3552
|
size_t ZSTDv02_decompressContinue(ZSTDv02_Dctx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
|
3553
3553
|
{
|
3554
|
-
|
3554
|
+
return ZSTD_decompressContinue((ZSTD_DCtx*)dctx, dst, maxDstSize, src, srcSize);
|
3555
3555
|
}
|