zstd-ruby 1.1.4.0 → 1.2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +1 -1
- data/README.md +1 -1
- data/ext/zstdruby/libzstd/Makefile +11 -1
- data/ext/zstdruby/libzstd/README.md +8 -0
- data/ext/zstdruby/libzstd/common/bitstream.h +56 -27
- data/ext/zstdruby/libzstd/common/error_private.c +2 -1
- data/ext/zstdruby/libzstd/common/fse.h +7 -3
- data/ext/zstdruby/libzstd/common/huf.h +42 -19
- data/ext/zstdruby/libzstd/common/mem.h +2 -3
- data/ext/zstdruby/libzstd/common/zstd_errors.h +1 -0
- data/ext/zstdruby/libzstd/common/zstd_internal.h +3 -2
- data/ext/zstdruby/libzstd/compress/fse_compress.c +10 -10
- data/ext/zstdruby/libzstd/compress/zstd_compress.c +455 -244
- data/ext/zstdruby/libzstd/compress/zstd_opt.h +6 -4
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +40 -28
- data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +115 -219
- data/ext/zstdruby/libzstd/dictBuilder/cover.c +34 -13
- data/ext/zstdruby/libzstd/dictBuilder/zdict.c +65 -43
- data/ext/zstdruby/libzstd/dictBuilder/zdict.h +7 -7
- data/ext/zstdruby/libzstd/dll/example/README.md +5 -5
- data/ext/zstdruby/libzstd/dll/example/build_package.bat +1 -0
- data/ext/zstdruby/libzstd/legacy/zstd_v01.c +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v02.c +21 -21
- data/ext/zstdruby/libzstd/legacy/zstd_v03.c +20 -20
- data/ext/zstdruby/libzstd/legacy/zstd_v04.c +4 -4
- data/ext/zstdruby/libzstd/legacy/zstd_v05.c +2 -2
- data/ext/zstdruby/libzstd/legacy/zstd_v06.c +2 -2
- data/ext/zstdruby/libzstd/zstd.h +88 -68
- data/lib/zstd-ruby/version.rb +1 -1
- metadata +3 -3
@@ -59,8 +59,6 @@ static int g_displayLevel = 2;
|
|
59
59
|
if ((clock() - g_time > refreshRate) || (displayLevel >= 4)) { \
|
60
60
|
g_time = clock(); \
|
61
61
|
DISPLAY(__VA_ARGS__); \
|
62
|
-
if (displayLevel >= 4) \
|
63
|
-
fflush(stdout); \
|
64
62
|
} \
|
65
63
|
}
|
66
64
|
#define DISPLAYUPDATE(l, ...) LOCALDISPLAYUPDATE(g_displayLevel, l, __VA_ARGS__)
|
@@ -236,10 +234,22 @@ static size_t COVER_sum(const size_t *samplesSizes, unsigned nbSamples) {
|
|
236
234
|
* Returns 1 if the dmer at lp is greater than the dmer at rp.
|
237
235
|
*/
|
238
236
|
static int COVER_cmp(COVER_ctx_t *ctx, const void *lp, const void *rp) {
|
239
|
-
const
|
240
|
-
const
|
237
|
+
U32 const lhs = *(U32 const *)lp;
|
238
|
+
U32 const rhs = *(U32 const *)rp;
|
241
239
|
return memcmp(ctx->samples + lhs, ctx->samples + rhs, ctx->d);
|
242
240
|
}
|
241
|
+
/**
|
242
|
+
* Faster version for d <= 8.
|
243
|
+
*/
|
244
|
+
static int COVER_cmp8(COVER_ctx_t *ctx, const void *lp, const void *rp) {
|
245
|
+
U64 const mask = (ctx->d == 8) ? (U64)-1 : (((U64)1 << (8 * ctx->d)) - 1);
|
246
|
+
U64 const lhs = MEM_readLE64(ctx->samples + *(U32 const *)lp) & mask;
|
247
|
+
U64 const rhs = MEM_readLE64(ctx->samples + *(U32 const *)rp) & mask;
|
248
|
+
if (lhs < rhs) {
|
249
|
+
return -1;
|
250
|
+
}
|
251
|
+
return (lhs > rhs);
|
252
|
+
}
|
243
253
|
|
244
254
|
/**
|
245
255
|
* Same as COVER_cmp() except ties are broken by pointer value
|
@@ -253,6 +263,16 @@ static int COVER_strict_cmp(const void *lp, const void *rp) {
|
|
253
263
|
}
|
254
264
|
return result;
|
255
265
|
}
|
266
|
+
/**
|
267
|
+
* Faster version for d <= 8.
|
268
|
+
*/
|
269
|
+
static int COVER_strict_cmp8(const void *lp, const void *rp) {
|
270
|
+
int result = COVER_cmp8(g_ctx, lp, rp);
|
271
|
+
if (result == 0) {
|
272
|
+
result = lp < rp ? -1 : 1;
|
273
|
+
}
|
274
|
+
return result;
|
275
|
+
}
|
256
276
|
|
257
277
|
/**
|
258
278
|
* Returns the first pointer in [first, last) whose element does not compare
|
@@ -508,7 +528,7 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
|
|
508
528
|
const BYTE *const samples = (const BYTE *)samplesBuffer;
|
509
529
|
const size_t totalSamplesSize = COVER_sum(samplesSizes, nbSamples);
|
510
530
|
/* Checks */
|
511
|
-
if (totalSamplesSize < d ||
|
531
|
+
if (totalSamplesSize < MAX(d, sizeof(U64)) ||
|
512
532
|
totalSamplesSize >= (size_t)COVER_MAX_SAMPLES_SIZE) {
|
513
533
|
DISPLAYLEVEL(1, "Total samples size is too large, maximum size is %u MB\n",
|
514
534
|
(COVER_MAX_SAMPLES_SIZE >> 20));
|
@@ -522,7 +542,7 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
|
|
522
542
|
ctx->samplesSizes = samplesSizes;
|
523
543
|
ctx->nbSamples = nbSamples;
|
524
544
|
/* Partial suffix array */
|
525
|
-
ctx->suffixSize = totalSamplesSize - d + 1;
|
545
|
+
ctx->suffixSize = totalSamplesSize - MAX(d, sizeof(U64)) + 1;
|
526
546
|
ctx->suffix = (U32 *)malloc(ctx->suffixSize * sizeof(U32));
|
527
547
|
/* Maps index to the dmerID */
|
528
548
|
ctx->dmerAt = (U32 *)malloc(ctx->suffixSize * sizeof(U32));
|
@@ -556,7 +576,8 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
|
|
556
576
|
}
|
557
577
|
/* qsort doesn't take an opaque pointer, so pass as a global */
|
558
578
|
g_ctx = ctx;
|
559
|
-
qsort(ctx->suffix, ctx->suffixSize, sizeof(U32),
|
579
|
+
qsort(ctx->suffix, ctx->suffixSize, sizeof(U32),
|
580
|
+
(ctx->d <= 8 ? &COVER_strict_cmp8 : &COVER_strict_cmp));
|
560
581
|
}
|
561
582
|
DISPLAYLEVEL(2, "Computing frequencies\n");
|
562
583
|
/* For each dmer group (group of positions with the same first d bytes):
|
@@ -566,8 +587,8 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
|
|
566
587
|
* 2. We calculate how many samples the dmer occurs in and save it in
|
567
588
|
* freqs[dmerId].
|
568
589
|
*/
|
569
|
-
COVER_groupBy(ctx->suffix, ctx->suffixSize, sizeof(U32), ctx,
|
570
|
-
&COVER_group);
|
590
|
+
COVER_groupBy(ctx->suffix, ctx->suffixSize, sizeof(U32), ctx,
|
591
|
+
(ctx->d <= 8 ? &COVER_cmp8 : &COVER_cmp), &COVER_group);
|
571
592
|
ctx->freqs = ctx->suffix;
|
572
593
|
ctx->suffix = NULL;
|
573
594
|
return 1;
|
@@ -918,10 +939,10 @@ ZDICTLIB_API size_t COVER_optimizeTrainFromBuffer(void *dictBuffer,
|
|
918
939
|
/* constants */
|
919
940
|
const unsigned nbThreads = parameters->nbThreads;
|
920
941
|
const unsigned kMinD = parameters->d == 0 ? 6 : parameters->d;
|
921
|
-
const unsigned kMaxD = parameters->d == 0 ?
|
922
|
-
const unsigned kMinK = parameters->k == 0 ?
|
923
|
-
const unsigned kMaxK = parameters->k == 0 ?
|
924
|
-
const unsigned kSteps = parameters->steps == 0 ?
|
942
|
+
const unsigned kMaxD = parameters->d == 0 ? 8 : parameters->d;
|
943
|
+
const unsigned kMinK = parameters->k == 0 ? 50 : parameters->k;
|
944
|
+
const unsigned kMaxK = parameters->k == 0 ? 2000 : parameters->k;
|
945
|
+
const unsigned kSteps = parameters->steps == 0 ? 40 : parameters->steps;
|
925
946
|
const unsigned kStepSize = MAX((kMaxK - kMinK) / kSteps, 1);
|
926
947
|
const unsigned kIterations =
|
927
948
|
(1 + (kMaxD - kMinD) / 2) * (1 + (kMaxK - kMinK) / kStepSize);
|
@@ -11,8 +11,9 @@
|
|
11
11
|
/*-**************************************
|
12
12
|
* Tuning parameters
|
13
13
|
****************************************/
|
14
|
+
#define MINRATIO 4 /* minimum nb of apparition to be selected in dictionary */
|
14
15
|
#define ZDICT_MAX_SAMPLES_SIZE (2000U << 20)
|
15
|
-
#define ZDICT_MIN_SAMPLES_SIZE
|
16
|
+
#define ZDICT_MIN_SAMPLES_SIZE (ZDICT_CONTENTSIZE_MIN * MINRATIO)
|
16
17
|
|
17
18
|
|
18
19
|
/*-**************************************
|
@@ -59,11 +60,8 @@
|
|
59
60
|
|
60
61
|
#define NOISELENGTH 32
|
61
62
|
|
62
|
-
#define MINRATIO 4
|
63
63
|
static const int g_compressionLevel_default = 6;
|
64
64
|
static const U32 g_selectivity_default = 9;
|
65
|
-
static const size_t g_provision_entropySize = 200;
|
66
|
-
static const size_t g_min_fast_dictContent = 192;
|
67
65
|
|
68
66
|
|
69
67
|
/*-*************************************
|
@@ -308,10 +306,10 @@ static dictItem ZDICT_analyzePos(
|
|
308
306
|
/* look backward */
|
309
307
|
length = MINMATCHLENGTH;
|
310
308
|
while ((length >= MINMATCHLENGTH) & (start > 0)) {
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
|
309
|
+
length = ZDICT_count(b + pos, b + suffix[start - 1]);
|
310
|
+
if (length >= LLIMIT) length = LLIMIT - 1;
|
311
|
+
lengthList[length]++;
|
312
|
+
if (length >= MINMATCHLENGTH) start--;
|
315
313
|
}
|
316
314
|
|
317
315
|
/* largest useful length */
|
@@ -363,21 +361,35 @@ static dictItem ZDICT_analyzePos(
|
|
363
361
|
}
|
364
362
|
|
365
363
|
|
364
|
+
static int isIncluded(const void* in, const void* container, size_t length)
|
365
|
+
{
|
366
|
+
const char* const ip = (const char*) in;
|
367
|
+
const char* const into = (const char*) container;
|
368
|
+
size_t u;
|
369
|
+
|
370
|
+
for (u=0; u<length; u++) { /* works because end of buffer is a noisy guard band */
|
371
|
+
if (ip[u] != into[u]) break;
|
372
|
+
}
|
373
|
+
|
374
|
+
return u==length;
|
375
|
+
}
|
376
|
+
|
366
377
|
/*! ZDICT_checkMerge
|
367
378
|
check if dictItem can be merged, do it if possible
|
368
379
|
@return : id of destination elt, 0 if not merged
|
369
380
|
*/
|
370
|
-
static U32
|
381
|
+
static U32 ZDICT_tryMerge(dictItem* table, dictItem elt, U32 eltNbToSkip, const void* buffer)
|
371
382
|
{
|
372
383
|
const U32 tableSize = table->pos;
|
373
384
|
const U32 eltEnd = elt.pos + elt.length;
|
385
|
+
const char* const buf = (const char*) buffer;
|
374
386
|
|
375
387
|
/* tail overlap */
|
376
388
|
U32 u; for (u=1; u<tableSize; u++) {
|
377
389
|
if (u==eltNbToSkip) continue;
|
378
390
|
if ((table[u].pos > elt.pos) && (table[u].pos <= eltEnd)) { /* overlap, existing > new */
|
379
391
|
/* append */
|
380
|
-
U32 addedLength = table[u].pos - elt.pos;
|
392
|
+
U32 const addedLength = table[u].pos - elt.pos;
|
381
393
|
table[u].length += addedLength;
|
382
394
|
table[u].pos = elt.pos;
|
383
395
|
table[u].savings += elt.savings * addedLength / elt.length; /* rough approx */
|
@@ -393,9 +405,10 @@ static U32 ZDICT_checkMerge(dictItem* table, dictItem elt, U32 eltNbToSkip)
|
|
393
405
|
/* front overlap */
|
394
406
|
for (u=1; u<tableSize; u++) {
|
395
407
|
if (u==eltNbToSkip) continue;
|
408
|
+
|
396
409
|
if ((table[u].pos + table[u].length >= elt.pos) && (table[u].pos < elt.pos)) { /* overlap, existing < new */
|
397
410
|
/* append */
|
398
|
-
int addedLength = (int)eltEnd - (table[u].pos + table[u].length);
|
411
|
+
int const addedLength = (int)eltEnd - (table[u].pos + table[u].length);
|
399
412
|
table[u].savings += elt.length / 8; /* rough approx bonus */
|
400
413
|
if (addedLength > 0) { /* otherwise, elt fully included into existing */
|
401
414
|
table[u].length += addedLength;
|
@@ -407,7 +420,18 @@ static U32 ZDICT_checkMerge(dictItem* table, dictItem elt, U32 eltNbToSkip)
|
|
407
420
|
table[u] = table[u-1], u--;
|
408
421
|
table[u] = elt;
|
409
422
|
return u;
|
410
|
-
|
423
|
+
}
|
424
|
+
|
425
|
+
if (MEM_read64(buf + table[u].pos) == MEM_read64(buf + elt.pos + 1)) {
|
426
|
+
if (isIncluded(buf + table[u].pos, buf + elt.pos + 1, table[u].length)) {
|
427
|
+
size_t const addedLength = MAX( (int)elt.length - (int)table[u].length , 1 );
|
428
|
+
table[u].pos = elt.pos;
|
429
|
+
table[u].savings += (U32)(elt.savings * addedLength / elt.length);
|
430
|
+
table[u].length = MIN(elt.length, table[u].length + 1);
|
431
|
+
return u;
|
432
|
+
}
|
433
|
+
}
|
434
|
+
}
|
411
435
|
|
412
436
|
return 0;
|
413
437
|
}
|
@@ -425,14 +449,14 @@ static void ZDICT_removeDictItem(dictItem* table, U32 id)
|
|
425
449
|
}
|
426
450
|
|
427
451
|
|
428
|
-
static void ZDICT_insertDictItem(dictItem* table, U32 maxSize, dictItem elt)
|
452
|
+
static void ZDICT_insertDictItem(dictItem* table, U32 maxSize, dictItem elt, const void* buffer)
|
429
453
|
{
|
430
454
|
/* merge if possible */
|
431
|
-
U32 mergeId =
|
455
|
+
U32 mergeId = ZDICT_tryMerge(table, elt, 0, buffer);
|
432
456
|
if (mergeId) {
|
433
457
|
U32 newMerge = 1;
|
434
458
|
while (newMerge) {
|
435
|
-
newMerge =
|
459
|
+
newMerge = ZDICT_tryMerge(table, table[mergeId], mergeId, buffer);
|
436
460
|
if (newMerge) ZDICT_removeDictItem(table, mergeId);
|
437
461
|
mergeId = newMerge;
|
438
462
|
}
|
@@ -480,7 +504,7 @@ static size_t ZDICT_trainBuffer(dictItem* dictList, U32 dictListSize,
|
|
480
504
|
# define DISPLAYUPDATE(l, ...) if (notificationLevel>=l) { \
|
481
505
|
if (ZDICT_clockSpan(displayClock) > refreshRate) \
|
482
506
|
{ displayClock = clock(); DISPLAY(__VA_ARGS__); \
|
483
|
-
if (notificationLevel>=4) fflush(
|
507
|
+
if (notificationLevel>=4) fflush(stderr); } }
|
484
508
|
|
485
509
|
/* init */
|
486
510
|
DISPLAYLEVEL(2, "\r%70s\r", ""); /* clean display line */
|
@@ -521,7 +545,7 @@ static size_t ZDICT_trainBuffer(dictItem* dictList, U32 dictListSize,
|
|
521
545
|
if (doneMarks[cursor]) { cursor++; continue; }
|
522
546
|
solution = ZDICT_analyzePos(doneMarks, suffix, reverseSuffix[cursor], buffer, minRatio, notificationLevel);
|
523
547
|
if (solution.length==0) { cursor++; continue; }
|
524
|
-
ZDICT_insertDictItem(dictList, dictListSize, solution);
|
548
|
+
ZDICT_insertDictItem(dictList, dictListSize, solution, buffer);
|
525
549
|
cursor += solution.length;
|
526
550
|
DISPLAYUPDATE(2, "\r%4.2f %% \r", (double)cursor / bufferSize * 100);
|
527
551
|
} }
|
@@ -683,19 +707,19 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|
683
707
|
goto _cleanup;
|
684
708
|
}
|
685
709
|
if (offcodeMax>OFFCODE_MAX) { eSize = ERROR(dictionary_wrong); goto _cleanup; } /* too large dictionary */
|
686
|
-
for (u=0; u<256; u++) countLit[u]=1; /* any character must be described */
|
687
|
-
for (u=0; u<=offcodeMax; u++) offcodeCount[u]=1;
|
688
|
-
for (u=0; u<=MaxML; u++) matchLengthCount[u]=1;
|
689
|
-
for (u=0; u<=MaxLL; u++) litLengthCount[u]=1;
|
710
|
+
for (u=0; u<256; u++) countLit[u] = 1; /* any character must be described */
|
711
|
+
for (u=0; u<=offcodeMax; u++) offcodeCount[u] = 1;
|
712
|
+
for (u=0; u<=MaxML; u++) matchLengthCount[u] = 1;
|
713
|
+
for (u=0; u<=MaxLL; u++) litLengthCount[u] = 1;
|
690
714
|
memset(repOffset, 0, sizeof(repOffset));
|
691
715
|
repOffset[1] = repOffset[4] = repOffset[8] = 1;
|
692
716
|
memset(bestRepOffset, 0, sizeof(bestRepOffset));
|
693
|
-
if (compressionLevel==0) compressionLevel=g_compressionLevel_default;
|
717
|
+
if (compressionLevel==0) compressionLevel = g_compressionLevel_default;
|
694
718
|
params = ZSTD_getParams(compressionLevel, averageSampleSize, dictBufferSize);
|
695
719
|
{ size_t const beginResult = ZSTD_compressBegin_advanced(esr.ref, dictBuffer, dictBufferSize, params, 0);
|
696
|
-
|
720
|
+
if (ZSTD_isError(beginResult)) {
|
721
|
+
DISPLAYLEVEL(1, "error : ZSTD_compressBegin_advanced() failed : %s \n", ZSTD_getErrorName(beginResult));
|
697
722
|
eSize = ERROR(GENERIC);
|
698
|
-
DISPLAYLEVEL(1, "error : ZSTD_compressBegin_advanced failed \n");
|
699
723
|
goto _cleanup;
|
700
724
|
} }
|
701
725
|
|
@@ -812,7 +836,6 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|
812
836
|
MEM_writeLE32(dstPtr+4, repStartValue[1]);
|
813
837
|
MEM_writeLE32(dstPtr+8, repStartValue[2]);
|
814
838
|
#endif
|
815
|
-
//dstPtr += 12;
|
816
839
|
eSize += 12;
|
817
840
|
|
818
841
|
_cleanup:
|
@@ -831,7 +854,7 @@ size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity,
|
|
831
854
|
ZDICT_params_t params)
|
832
855
|
{
|
833
856
|
size_t hSize;
|
834
|
-
#define HBUFFSIZE 256
|
857
|
+
#define HBUFFSIZE 256 /* should prove large enough for all entropy headers */
|
835
858
|
BYTE header[HBUFFSIZE];
|
836
859
|
int const compressionLevel = (params.compressionLevel <= 0) ? g_compressionLevel_default : params.compressionLevel;
|
837
860
|
U32 const notificationLevel = params.notificationLevel;
|
@@ -877,20 +900,11 @@ size_t ZDICT_addEntropyTablesFromBuffer_advanced(void* dictBuffer, size_t dictCo
|
|
877
900
|
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
|
878
901
|
ZDICT_params_t params)
|
879
902
|
{
|
880
|
-
size_t hSize;
|
881
903
|
int const compressionLevel = (params.compressionLevel <= 0) ? g_compressionLevel_default : params.compressionLevel;
|
882
904
|
U32 const notificationLevel = params.notificationLevel;
|
905
|
+
size_t hSize = 8;
|
883
906
|
|
884
|
-
/*
|
885
|
-
MEM_writeLE32(dictBuffer, ZSTD_DICT_MAGIC);
|
886
|
-
{ U64 const randomID = XXH64((char*)dictBuffer + dictBufferCapacity - dictContentSize, dictContentSize, 0);
|
887
|
-
U32 const compliantID = (randomID % ((1U<<31)-32768)) + 32768;
|
888
|
-
U32 const dictID = params.dictID ? params.dictID : compliantID;
|
889
|
-
MEM_writeLE32((char*)dictBuffer+4, dictID);
|
890
|
-
}
|
891
|
-
hSize = 8;
|
892
|
-
|
893
|
-
/* entropy tables */
|
907
|
+
/* calculate entropy tables */
|
894
908
|
DISPLAYLEVEL(2, "\r%70s\r", ""); /* clean display line */
|
895
909
|
DISPLAYLEVEL(2, "statistics ... \n");
|
896
910
|
{ size_t const eSize = ZDICT_analyzeEntropy((char*)dictBuffer+hSize, dictBufferCapacity-hSize,
|
@@ -902,6 +916,13 @@ size_t ZDICT_addEntropyTablesFromBuffer_advanced(void* dictBuffer, size_t dictCo
|
|
902
916
|
hSize += eSize;
|
903
917
|
}
|
904
918
|
|
919
|
+
/* add dictionary header (after entropy tables) */
|
920
|
+
MEM_writeLE32(dictBuffer, ZSTD_DICT_MAGIC);
|
921
|
+
{ U64 const randomID = XXH64((char*)dictBuffer + dictBufferCapacity - dictContentSize, dictContentSize, 0);
|
922
|
+
U32 const compliantID = (randomID % ((1U<<31)-32768)) + 32768;
|
923
|
+
U32 const dictID = params.dictID ? params.dictID : compliantID;
|
924
|
+
MEM_writeLE32((char*)dictBuffer+4, dictID);
|
925
|
+
}
|
905
926
|
|
906
927
|
if (hSize + dictContentSize < dictBufferCapacity)
|
907
928
|
memmove((char*)dictBuffer + hSize, (char*)dictBuffer + dictBufferCapacity - dictContentSize, dictContentSize);
|
@@ -929,8 +950,8 @@ size_t ZDICT_trainFromBuffer_unsafe(
|
|
929
950
|
|
930
951
|
/* checks */
|
931
952
|
if (!dictList) return ERROR(memory_allocation);
|
932
|
-
if (maxDictSize
|
933
|
-
if (samplesBuffSize < ZDICT_MIN_SAMPLES_SIZE) { free(dictList); return
|
953
|
+
if (maxDictSize < ZDICT_DICTSIZE_MIN) { free(dictList); return ERROR(dstSize_tooSmall); } /* requested dictionary size is too small */
|
954
|
+
if (samplesBuffSize < ZDICT_MIN_SAMPLES_SIZE) { free(dictList); return ERROR(dictionaryCreation_failed); } /* not enough source to create dictionary */
|
934
955
|
|
935
956
|
/* init */
|
936
957
|
ZDICT_initDictItem(dictList);
|
@@ -963,14 +984,15 @@ size_t ZDICT_trainFromBuffer_unsafe(
|
|
963
984
|
|
964
985
|
/* create dictionary */
|
965
986
|
{ U32 dictContentSize = ZDICT_dictSize(dictList);
|
966
|
-
if (dictContentSize <
|
987
|
+
if (dictContentSize < ZDICT_CONTENTSIZE_MIN) { free(dictList); return ERROR(dictionaryCreation_failed); } /* dictionary content too small */
|
988
|
+
if (dictContentSize < targetDictSize/4) {
|
967
989
|
DISPLAYLEVEL(2, "! warning : selected content significantly smaller than requested (%u < %u) \n", dictContentSize, (U32)maxDictSize);
|
990
|
+
if (samplesBuffSize < 10 * targetDictSize)
|
991
|
+
DISPLAYLEVEL(2, "! consider increasing the number of samples (total size : %u MB)\n", (U32)(samplesBuffSize>>20));
|
968
992
|
if (minRep > MINRATIO) {
|
969
993
|
DISPLAYLEVEL(2, "! consider increasing selectivity to produce larger dictionary (-s%u) \n", selectivity+1);
|
970
994
|
DISPLAYLEVEL(2, "! note : larger dictionaries are not necessarily better, test its efficiency on samples \n");
|
971
995
|
}
|
972
|
-
if (samplesBuffSize < 10 * targetDictSize)
|
973
|
-
DISPLAYLEVEL(2, "! consider increasing the number of samples (total size : %u MB)\n", (U32)(samplesBuffSize>>20));
|
974
996
|
}
|
975
997
|
|
976
998
|
if ((dictContentSize > targetDictSize*3) && (nbSamples > 2*MINRATIO) && (selectivity>1)) {
|
@@ -978,7 +1000,7 @@ size_t ZDICT_trainFromBuffer_unsafe(
|
|
978
1000
|
while ((nbSamples >> proposedSelectivity) <= MINRATIO) { proposedSelectivity--; }
|
979
1001
|
DISPLAYLEVEL(2, "! note : calculated dictionary significantly larger than requested (%u > %u) \n", dictContentSize, (U32)maxDictSize);
|
980
1002
|
DISPLAYLEVEL(2, "! consider increasing dictionary size, or produce denser dictionary (-s%u) \n", proposedSelectivity);
|
981
|
-
DISPLAYLEVEL(2, "! always test dictionary efficiency on samples \n");
|
1003
|
+
DISPLAYLEVEL(2, "! always test dictionary efficiency on real samples \n");
|
982
1004
|
}
|
983
1005
|
|
984
1006
|
/* limit dictionary size */
|
@@ -88,7 +88,7 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_advanced(void* dictBuffer, size_t dict
|
|
88
88
|
|
89
89
|
/*! COVER_params_t :
|
90
90
|
For all values 0 means default.
|
91
|
-
|
91
|
+
k and d are the only required parameters.
|
92
92
|
*/
|
93
93
|
typedef struct {
|
94
94
|
unsigned k; /* Segment size : constraint: 0 < k : Reasonable range [16, 2048+] */
|
@@ -147,18 +147,18 @@ ZDICTLIB_API size_t COVER_optimizeTrainFromBuffer(void* dictBuffer, size_t dictB
|
|
147
147
|
Samples must be stored concatenated in a flat buffer `samplesBuffer`,
|
148
148
|
supplied with an array of sizes `samplesSizes`, providing the size of each sample in order.
|
149
149
|
|
150
|
-
dictContentSize must be
|
151
|
-
maxDictSize must be >= dictContentSize, and must be
|
150
|
+
dictContentSize must be >= ZDICT_CONTENTSIZE_MIN bytes.
|
151
|
+
maxDictSize must be >= dictContentSize, and must be >= ZDICT_DICTSIZE_MIN bytes.
|
152
152
|
|
153
153
|
@return : size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`),
|
154
154
|
or an error code, which can be tested by ZDICT_isError().
|
155
155
|
note : ZDICT_finalizeDictionary() will push notifications into stderr if instructed to, using notificationLevel>0.
|
156
|
-
note 2 : dictBuffer and
|
156
|
+
note 2 : dictBuffer and dictContent can overlap
|
157
157
|
*/
|
158
|
-
#define ZDICT_CONTENTSIZE_MIN
|
159
|
-
#define ZDICT_DICTSIZE_MIN
|
158
|
+
#define ZDICT_CONTENTSIZE_MIN 128
|
159
|
+
#define ZDICT_DICTSIZE_MIN 256
|
160
160
|
ZDICTLIB_API size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity,
|
161
|
-
const void*
|
161
|
+
const void* dictContent, size_t dictContentSize,
|
162
162
|
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
|
163
163
|
ZDICT_params_t parameters);
|
164
164
|
|
@@ -4,11 +4,11 @@ ZSTD Windows binary package
|
|
4
4
|
#### The package contents
|
5
5
|
|
6
6
|
- `zstd.exe` : Command Line Utility, supporting gzip-like arguments
|
7
|
-
- `dll\libzstd.dll` : The
|
8
|
-
- `dll\libzstd.lib` : The import library of ZSTD library for Visual C++
|
9
|
-
- `example\` : The example of usage of ZSTD library
|
10
|
-
- `include\` : Header files required
|
11
|
-
- `static\libzstd_static.lib` : The static ZSTD library
|
7
|
+
- `dll\libzstd.dll` : The ZSTD dynamic library (DLL)
|
8
|
+
- `dll\libzstd.lib` : The import library of the ZSTD dynamic library (DLL) for Visual C++
|
9
|
+
- `example\` : The example of usage of the ZSTD library
|
10
|
+
- `include\` : Header files required by the ZSTD library
|
11
|
+
- `static\libzstd_static.lib` : The static ZSTD library (LIB)
|
12
12
|
|
13
13
|
|
14
14
|
#### Usage of Command Line Interface
|
@@ -9,6 +9,7 @@ COPY lib\common\mem.h bin\example\
|
|
9
9
|
COPY lib\common\zstd_errors.h bin\example\
|
10
10
|
COPY lib\common\zstd_internal.h bin\example\
|
11
11
|
COPY lib\common\error_private.h bin\example\
|
12
|
+
COPY lib\common\xxhash.h bin\example\
|
12
13
|
COPY lib\zstd.h bin\include\
|
13
14
|
COPY lib\libzstd.a bin\static\libzstd_static.lib
|
14
15
|
COPY lib\dll\libzstd.* bin\dll\
|
@@ -475,8 +475,8 @@ MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, U32 nbBits)
|
|
475
475
|
|
476
476
|
MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD)
|
477
477
|
{
|
478
|
-
|
479
|
-
|
478
|
+
if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8)) /* should never happen */
|
479
|
+
return BIT_DStream_overflow;
|
480
480
|
|
481
481
|
if (bitD->ptr >= bitD->start + sizeof(bitD->bitContainer))
|
482
482
|
{
|
@@ -1334,8 +1334,8 @@ static size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsi
|
|
1334
1334
|
else
|
1335
1335
|
{
|
1336
1336
|
bitCount -= (int)(8 * (iend - 4 - ip));
|
1337
|
-
|
1338
|
-
|
1337
|
+
ip = iend - 4;
|
1338
|
+
}
|
1339
1339
|
bitStream = MEM_readLE32(ip) >> (bitCount & 31);
|
1340
1340
|
}
|
1341
1341
|
}
|
@@ -2040,7 +2040,7 @@ static size_t HUF_readDTableX4 (U32* DTable, const void* src, size_t srcSize)
|
|
2040
2040
|
rankStart[0] = 0; /* forget 0w symbols; this is beginning of weight(1) */
|
2041
2041
|
}
|
2042
2042
|
|
2043
|
-
|
2043
|
+
/* Build rankVal */
|
2044
2044
|
{
|
2045
2045
|
const U32 minBits = tableLog+1 - maxW;
|
2046
2046
|
U32 nextRankVal = 0;
|
@@ -2374,7 +2374,7 @@ static size_t HUF_readDTableX6 (U32* DTable, const void* src, size_t srcSize)
|
|
2374
2374
|
rankStart[0] = 0; /* forget 0w symbols; this is beginning of weight(1) */
|
2375
2375
|
}
|
2376
2376
|
|
2377
|
-
|
2377
|
+
/* Build rankVal */
|
2378
2378
|
{
|
2379
2379
|
const U32 minBits = tableLog+1 - maxW;
|
2380
2380
|
U32 nextRankVal = 0;
|
@@ -2948,14 +2948,14 @@ static size_t ZSTD_decodeLiteralsBlock(void* ctx,
|
|
2948
2948
|
const size_t litSize = (MEM_readLE32(istart) & 0xFFFFFF) >> 2; /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */
|
2949
2949
|
if (litSize > srcSize-11) /* risk of reading too far with wildcopy */
|
2950
2950
|
{
|
2951
|
-
|
2952
|
-
|
2953
|
-
|
2954
|
-
|
2955
|
-
|
2956
|
-
|
2957
|
-
|
2958
|
-
|
2951
|
+
if (litSize > srcSize-3) return ERROR(corruption_detected);
|
2952
|
+
memcpy(dctx->litBuffer, istart, litSize);
|
2953
|
+
dctx->litPtr = dctx->litBuffer;
|
2954
|
+
dctx->litSize = litSize;
|
2955
|
+
memset(dctx->litBuffer + dctx->litSize, 0, 8);
|
2956
|
+
return litSize+3;
|
2957
|
+
}
|
2958
|
+
/* direct reference into compressed stream */
|
2959
2959
|
dctx->litPtr = istart+3;
|
2960
2960
|
dctx->litSize = litSize;
|
2961
2961
|
return litSize+3;
|
@@ -3515,13 +3515,13 @@ static size_t ZSTD_decompressContinue(ZSTD_DCtx* ctx, void* dst, size_t maxDstSi
|
|
3515
3515
|
|
3516
3516
|
unsigned ZSTDv02_isError(size_t code)
|
3517
3517
|
{
|
3518
|
-
|
3518
|
+
return ZSTD_isError(code);
|
3519
3519
|
}
|
3520
3520
|
|
3521
3521
|
size_t ZSTDv02_decompress( void* dst, size_t maxOriginalSize,
|
3522
3522
|
const void* src, size_t compressedSize)
|
3523
3523
|
{
|
3524
|
-
|
3524
|
+
return ZSTD_decompress(dst, maxOriginalSize, src, compressedSize);
|
3525
3525
|
}
|
3526
3526
|
|
3527
3527
|
size_t ZSTDv02_findFrameCompressedSize(const void *src, size_t compressedSize)
|
@@ -3531,25 +3531,25 @@ size_t ZSTDv02_findFrameCompressedSize(const void *src, size_t compressedSize)
|
|
3531
3531
|
|
3532
3532
|
ZSTDv02_Dctx* ZSTDv02_createDCtx(void)
|
3533
3533
|
{
|
3534
|
-
|
3534
|
+
return (ZSTDv02_Dctx*)ZSTD_createDCtx();
|
3535
3535
|
}
|
3536
3536
|
|
3537
3537
|
size_t ZSTDv02_freeDCtx(ZSTDv02_Dctx* dctx)
|
3538
3538
|
{
|
3539
|
-
|
3539
|
+
return ZSTD_freeDCtx((ZSTD_DCtx*)dctx);
|
3540
3540
|
}
|
3541
3541
|
|
3542
3542
|
size_t ZSTDv02_resetDCtx(ZSTDv02_Dctx* dctx)
|
3543
3543
|
{
|
3544
|
-
|
3544
|
+
return ZSTD_resetDCtx((ZSTD_DCtx*)dctx);
|
3545
3545
|
}
|
3546
3546
|
|
3547
3547
|
size_t ZSTDv02_nextSrcSizeToDecompress(ZSTDv02_Dctx* dctx)
|
3548
3548
|
{
|
3549
|
-
|
3549
|
+
return ZSTD_nextSrcSizeToDecompress((ZSTD_DCtx*)dctx);
|
3550
3550
|
}
|
3551
3551
|
|
3552
3552
|
size_t ZSTDv02_decompressContinue(ZSTDv02_Dctx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
|
3553
3553
|
{
|
3554
|
-
|
3554
|
+
return ZSTD_decompressContinue((ZSTD_DCtx*)dctx, dst, maxDstSize, src, srcSize);
|
3555
3555
|
}
|