zstd-ruby 1.3.3.0 → 1.3.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/README.md +1 -1
- data/ext/zstdruby/libzstd/BUCK +13 -0
- data/ext/zstdruby/libzstd/README.md +32 -25
- data/ext/zstdruby/libzstd/common/bitstream.h +1 -1
- data/ext/zstdruby/libzstd/common/compiler.h +25 -0
- data/ext/zstdruby/libzstd/common/cpu.h +216 -0
- data/ext/zstdruby/libzstd/common/error_private.c +1 -0
- data/ext/zstdruby/libzstd/common/fse.h +1 -1
- data/ext/zstdruby/libzstd/common/fse_decompress.c +2 -2
- data/ext/zstdruby/libzstd/common/huf.h +114 -89
- data/ext/zstdruby/libzstd/common/pool.c +46 -17
- data/ext/zstdruby/libzstd/common/pool.h +18 -9
- data/ext/zstdruby/libzstd/common/threading.h +12 -12
- data/ext/zstdruby/libzstd/common/zstd_errors.h +16 -7
- data/ext/zstdruby/libzstd/common/zstd_internal.h +4 -5
- data/ext/zstdruby/libzstd/compress/fse_compress.c +19 -11
- data/ext/zstdruby/libzstd/compress/huf_compress.c +160 -62
- data/ext/zstdruby/libzstd/compress/zstd_compress.c +973 -644
- data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +281 -34
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +80 -62
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +11 -4
- data/ext/zstdruby/libzstd/compress/zstd_fast.c +87 -71
- data/ext/zstdruby/libzstd/compress/zstd_fast.h +10 -6
- data/ext/zstdruby/libzstd/compress/zstd_lazy.c +333 -274
- data/ext/zstdruby/libzstd/compress/zstd_lazy.h +33 -16
- data/ext/zstdruby/libzstd/compress/zstd_ldm.c +305 -359
- data/ext/zstdruby/libzstd/compress/zstd_ldm.h +64 -21
- data/ext/zstdruby/libzstd/compress/zstd_opt.c +194 -56
- data/ext/zstdruby/libzstd/compress/zstd_opt.h +17 -5
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +1131 -449
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +32 -16
- data/ext/zstdruby/libzstd/decompress/huf_decompress.c +390 -290
- data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +777 -439
- data/ext/zstdruby/libzstd/dictBuilder/cover.c +11 -8
- data/ext/zstdruby/libzstd/dictBuilder/zdict.c +83 -50
- data/ext/zstdruby/libzstd/dictBuilder/zdict.h +44 -43
- data/ext/zstdruby/libzstd/legacy/zstd_legacy.h +2 -0
- data/ext/zstdruby/libzstd/legacy/zstd_v04.c +42 -118
- data/ext/zstdruby/libzstd/legacy/zstd_v06.c +2 -2
- data/ext/zstdruby/libzstd/legacy/zstd_v07.c +2 -2
- data/ext/zstdruby/libzstd/zstd.h +254 -254
- data/lib/zstd-ruby/version.rb +1 -1
- metadata +4 -3
@@ -537,8 +537,8 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
|
|
537
537
|
/* Checks */
|
538
538
|
if (totalSamplesSize < MAX(d, sizeof(U64)) ||
|
539
539
|
totalSamplesSize >= (size_t)COVER_MAX_SAMPLES_SIZE) {
|
540
|
-
DISPLAYLEVEL(1, "Total samples size is too large, maximum size is %u MB\n",
|
541
|
-
(COVER_MAX_SAMPLES_SIZE >> 20));
|
540
|
+
DISPLAYLEVEL(1, "Total samples size is too large (%u MB), maximum size is %u MB\n",
|
541
|
+
(U32)(totalSamplesSize>>20), (COVER_MAX_SAMPLES_SIZE >> 20));
|
542
542
|
return 0;
|
543
543
|
}
|
544
544
|
/* Zero the context */
|
@@ -651,12 +651,16 @@ static size_t COVER_buildDictionary(const COVER_ctx_t *ctx, U32 *freqs,
|
|
651
651
|
}
|
652
652
|
|
653
653
|
ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
|
654
|
-
void *dictBuffer, size_t dictBufferCapacity,
|
655
|
-
const size_t *samplesSizes, unsigned nbSamples,
|
656
|
-
ZDICT_cover_params_t parameters)
|
657
|
-
|
654
|
+
void *dictBuffer, size_t dictBufferCapacity,
|
655
|
+
const void *samplesBuffer, const size_t *samplesSizes, unsigned nbSamples,
|
656
|
+
ZDICT_cover_params_t parameters)
|
657
|
+
{
|
658
|
+
BYTE* const dict = (BYTE*)dictBuffer;
|
658
659
|
COVER_ctx_t ctx;
|
659
660
|
COVER_map_t activeDmers;
|
661
|
+
|
662
|
+
/* Initialize global data */
|
663
|
+
g_displayLevel = parameters.zParams.notificationLevel;
|
660
664
|
/* Checks */
|
661
665
|
if (!COVER_checkParameters(parameters, dictBufferCapacity)) {
|
662
666
|
DISPLAYLEVEL(1, "Cover parameters incorrect\n");
|
@@ -671,8 +675,6 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
|
|
671
675
|
ZDICT_DICTSIZE_MIN);
|
672
676
|
return ERROR(dstSize_tooSmall);
|
673
677
|
}
|
674
|
-
/* Initialize global data */
|
675
|
-
g_displayLevel = parameters.zParams.notificationLevel;
|
676
678
|
/* Initialize context and activeDmers */
|
677
679
|
if (!COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples,
|
678
680
|
parameters.d)) {
|
@@ -947,6 +949,7 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
|
|
947
949
|
unsigned k;
|
948
950
|
COVER_best_t best;
|
949
951
|
POOL_ctx *pool = NULL;
|
952
|
+
|
950
953
|
/* Checks */
|
951
954
|
if (kMinK < kMaxD || kMaxK < kMinK) {
|
952
955
|
LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect parameters\n");
|
@@ -207,7 +207,6 @@ static dictItem ZDICT_analyzePos(
|
|
207
207
|
U32 cumulLength[LLIMIT] = {0};
|
208
208
|
U32 savings[LLIMIT] = {0};
|
209
209
|
const BYTE* b = (const BYTE*)buffer;
|
210
|
-
size_t length;
|
211
210
|
size_t maxLength = LLIMIT;
|
212
211
|
size_t pos = suffix[start];
|
213
212
|
U32 end = start;
|
@@ -222,26 +221,30 @@ static dictItem ZDICT_analyzePos(
|
|
222
221
|
||(MEM_read16(b+pos+1) == MEM_read16(b+pos+3))
|
223
222
|
||(MEM_read16(b+pos+2) == MEM_read16(b+pos+4)) ) {
|
224
223
|
/* skip and mark segment */
|
225
|
-
U16
|
226
|
-
U32 u,
|
227
|
-
while (MEM_read16(b+pos+
|
228
|
-
if (b[pos+
|
229
|
-
for (u=1; u<
|
224
|
+
U16 const pattern16 = MEM_read16(b+pos+4);
|
225
|
+
U32 u, patternEnd = 6;
|
226
|
+
while (MEM_read16(b+pos+patternEnd) == pattern16) patternEnd+=2 ;
|
227
|
+
if (b[pos+patternEnd] == b[pos+patternEnd-1]) patternEnd++;
|
228
|
+
for (u=1; u<patternEnd; u++)
|
230
229
|
doneMarks[pos+u] = 1;
|
231
230
|
return solution;
|
232
231
|
}
|
233
232
|
|
234
233
|
/* look forward */
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
234
|
+
{ size_t length;
|
235
|
+
do {
|
236
|
+
end++;
|
237
|
+
length = ZDICT_count(b + pos, b + suffix[end]);
|
238
|
+
} while (length >= MINMATCHLENGTH);
|
239
|
+
}
|
239
240
|
|
240
241
|
/* look backward */
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
242
|
+
{ size_t length;
|
243
|
+
do {
|
244
|
+
length = ZDICT_count(b + pos, b + *(suffix+start-1));
|
245
|
+
if (length >=MINMATCHLENGTH) start--;
|
246
|
+
} while(length >= MINMATCHLENGTH);
|
247
|
+
}
|
245
248
|
|
246
249
|
/* exit if not found a minimum nb of repetitions */
|
247
250
|
if (end-start < minRatio) {
|
@@ -268,7 +271,7 @@ static dictItem ZDICT_analyzePos(
|
|
268
271
|
U32 selectedCount = 0;
|
269
272
|
U32 selectedID = currentID;
|
270
273
|
for (id =refinedStart; id < refinedEnd; id++) {
|
271
|
-
if (b[
|
274
|
+
if (b[suffix[id] + searchLength] != currentChar) {
|
272
275
|
if (currentCount > selectedCount) {
|
273
276
|
selectedCount = currentCount;
|
274
277
|
selectedID = currentID;
|
@@ -297,20 +300,23 @@ static dictItem ZDICT_analyzePos(
|
|
297
300
|
memset(lengthList, 0, sizeof(lengthList));
|
298
301
|
|
299
302
|
/* look forward */
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
|
303
|
+
{ size_t length;
|
304
|
+
do {
|
305
|
+
end++;
|
306
|
+
length = ZDICT_count(b + pos, b + suffix[end]);
|
307
|
+
if (length >= LLIMIT) length = LLIMIT-1;
|
308
|
+
lengthList[length]++;
|
309
|
+
} while (length >=MINMATCHLENGTH);
|
310
|
+
}
|
306
311
|
|
307
312
|
/* look backward */
|
308
|
-
length = MINMATCHLENGTH;
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
|
313
|
-
|
313
|
+
{ size_t length = MINMATCHLENGTH;
|
314
|
+
while ((length >= MINMATCHLENGTH) & (start > 0)) {
|
315
|
+
length = ZDICT_count(b + pos, b + suffix[start - 1]);
|
316
|
+
if (length >= LLIMIT) length = LLIMIT - 1;
|
317
|
+
lengthList[length]++;
|
318
|
+
if (length >= MINMATCHLENGTH) start--;
|
319
|
+
}
|
314
320
|
}
|
315
321
|
|
316
322
|
/* largest useful length */
|
@@ -345,12 +351,12 @@ static dictItem ZDICT_analyzePos(
|
|
345
351
|
/* mark positions done */
|
346
352
|
{ U32 id;
|
347
353
|
for (id=start; id<end; id++) {
|
348
|
-
U32 p, pEnd;
|
354
|
+
U32 p, pEnd, length;
|
349
355
|
U32 const testedPos = suffix[id];
|
350
356
|
if (testedPos == pos)
|
351
357
|
length = solution.length;
|
352
358
|
else {
|
353
|
-
length = ZDICT_count(b+pos, b+testedPos);
|
359
|
+
length = (U32)ZDICT_count(b+pos, b+testedPos);
|
354
360
|
if (length > solution.length) length = solution.length;
|
355
361
|
}
|
356
362
|
pEnd = (U32)(testedPos + length);
|
@@ -575,29 +581,30 @@ static void ZDICT_fillNoise(void* buffer, size_t length)
|
|
575
581
|
|
576
582
|
typedef struct
|
577
583
|
{
|
578
|
-
ZSTD_CCtx* ref;
|
579
|
-
ZSTD_CCtx* zc;
|
584
|
+
ZSTD_CCtx* ref; /* contains reference to dictionary */
|
585
|
+
ZSTD_CCtx* zc; /* working context */
|
580
586
|
void* workPlace; /* must be ZSTD_BLOCKSIZE_MAX allocated */
|
581
587
|
} EStats_ress_t;
|
582
588
|
|
583
589
|
#define MAXREPOFFSET 1024
|
584
590
|
|
585
591
|
static void ZDICT_countEStats(EStats_ress_t esr, ZSTD_parameters params,
|
586
|
-
|
587
|
-
|
592
|
+
U32* countLit, U32* offsetcodeCount, U32* matchlengthCount, U32* litlengthCount, U32* repOffsets,
|
593
|
+
const void* src, size_t srcSize,
|
594
|
+
U32 notificationLevel)
|
588
595
|
{
|
589
596
|
size_t const blockSizeMax = MIN (ZSTD_BLOCKSIZE_MAX, 1 << params.cParams.windowLog);
|
590
597
|
size_t cSize;
|
591
598
|
|
592
599
|
if (srcSize > blockSizeMax) srcSize = blockSizeMax; /* protection vs large samples */
|
593
|
-
{
|
594
|
-
|
600
|
+
{ size_t const errorCode = ZSTD_copyCCtx(esr.zc, esr.ref, 0);
|
601
|
+
if (ZSTD_isError(errorCode)) { DISPLAYLEVEL(1, "warning : ZSTD_copyCCtx failed \n"); return; }
|
595
602
|
}
|
596
603
|
cSize = ZSTD_compressBlock(esr.zc, esr.workPlace, ZSTD_BLOCKSIZE_MAX, src, srcSize);
|
597
604
|
if (ZSTD_isError(cSize)) { DISPLAYLEVEL(3, "warning : could not compress sample size %u \n", (U32)srcSize); return; }
|
598
605
|
|
599
606
|
if (cSize) { /* if == 0; block is not compressible */
|
600
|
-
const seqStore_t* seqStorePtr = ZSTD_getSeqStore(esr.zc);
|
607
|
+
const seqStore_t* const seqStorePtr = ZSTD_getSeqStore(esr.zc);
|
601
608
|
|
602
609
|
/* literals stats */
|
603
610
|
{ const BYTE* bytePtr;
|
@@ -659,6 +666,18 @@ static void ZDICT_insertSortCount(offsetCount_t table[ZSTD_REP_NUM+1], U32 val,
|
|
659
666
|
}
|
660
667
|
}
|
661
668
|
|
669
|
+
/* ZDICT_flatLit() :
|
670
|
+
* rewrite `countLit` to contain a mostly flat but still compressible distribution of literals.
|
671
|
+
* necessary to avoid generating a non-compressible distribution that HUF_writeCTable() cannot encode.
|
672
|
+
*/
|
673
|
+
static void ZDICT_flatLit(U32* countLit)
|
674
|
+
{
|
675
|
+
int u;
|
676
|
+
for (u=1; u<256; u++) countLit[u] = 2;
|
677
|
+
countLit[0] = 4;
|
678
|
+
countLit[253] = 1;
|
679
|
+
countLit[254] = 1;
|
680
|
+
}
|
662
681
|
|
663
682
|
#define OFFCODE_MAX 30 /* only applicable to first block */
|
664
683
|
static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
@@ -688,6 +707,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|
688
707
|
BYTE* dstPtr = (BYTE*)dstBuffer;
|
689
708
|
|
690
709
|
/* init */
|
710
|
+
DEBUGLOG(4, "ZDICT_analyzeEntropy");
|
691
711
|
esr.ref = ZSTD_createCCtx();
|
692
712
|
esr.zc = ZSTD_createCCtx();
|
693
713
|
esr.workPlace = malloc(ZSTD_BLOCKSIZE_MAX);
|
@@ -713,7 +733,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|
713
733
|
goto _cleanup;
|
714
734
|
} }
|
715
735
|
|
716
|
-
/* collect stats on all
|
736
|
+
/* collect stats on all samples */
|
717
737
|
for (u=0; u<nbFiles; u++) {
|
718
738
|
ZDICT_countEStats(esr, params,
|
719
739
|
countLit, offcodeCount, matchLengthCount, litLengthCount, repOffset,
|
@@ -722,14 +742,21 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|
722
742
|
pos += fileSizes[u];
|
723
743
|
}
|
724
744
|
|
725
|
-
/* analyze */
|
726
|
-
|
727
|
-
|
728
|
-
|
729
|
-
|
730
|
-
|
745
|
+
/* analyze, build stats, starting with literals */
|
746
|
+
{ size_t maxNbBits = HUF_buildCTable (hufTable, countLit, 255, huffLog);
|
747
|
+
if (HUF_isError(maxNbBits)) {
|
748
|
+
eSize = ERROR(GENERIC);
|
749
|
+
DISPLAYLEVEL(1, " HUF_buildCTable error \n");
|
750
|
+
goto _cleanup;
|
751
|
+
}
|
752
|
+
if (maxNbBits==8) { /* not compressible : will fail on HUF_writeCTable() */
|
753
|
+
DISPLAYLEVEL(2, "warning : pathological dataset : literals are not compressible : samples are noisy or too regular \n");
|
754
|
+
ZDICT_flatLit(countLit); /* replace distribution by a fake "mostly flat but still compressible" distribution, that HUF_writeCTable() can encode */
|
755
|
+
maxNbBits = HUF_buildCTable (hufTable, countLit, 255, huffLog);
|
756
|
+
assert(maxNbBits==9);
|
757
|
+
}
|
758
|
+
huffLog = (U32)maxNbBits;
|
731
759
|
}
|
732
|
-
huffLog = (U32)errorCode;
|
733
760
|
|
734
761
|
/* looking for most common first offsets */
|
735
762
|
{ U32 offset;
|
@@ -850,6 +877,7 @@ size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity,
|
|
850
877
|
U32 const notificationLevel = params.notificationLevel;
|
851
878
|
|
852
879
|
/* check conditions */
|
880
|
+
DEBUGLOG(4, "ZDICT_finalizeDictionary");
|
853
881
|
if (dictBufferCapacity < dictContentSize) return ERROR(dstSize_tooSmall);
|
854
882
|
if (dictContentSize < ZDICT_CONTENTSIZE_MIN) return ERROR(srcSize_wrong);
|
855
883
|
if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) return ERROR(dstSize_tooSmall);
|
@@ -1025,8 +1053,9 @@ size_t ZDICT_trainFromBuffer_unsafe_legacy(
|
|
1025
1053
|
}
|
1026
1054
|
|
1027
1055
|
|
1028
|
-
/*
|
1029
|
-
*
|
1056
|
+
/* ZDICT_trainFromBuffer_legacy() :
|
1057
|
+
* issue : samplesBuffer need to be followed by a noisy guard band.
|
1058
|
+
* work around : duplicate the buffer, and add the noise */
|
1030
1059
|
size_t ZDICT_trainFromBuffer_legacy(void* dictBuffer, size_t dictBufferCapacity,
|
1031
1060
|
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
|
1032
1061
|
ZDICT_legacy_params_t params)
|
@@ -1054,18 +1083,22 @@ size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity,
|
|
1054
1083
|
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples)
|
1055
1084
|
{
|
1056
1085
|
ZDICT_cover_params_t params;
|
1086
|
+
DEBUGLOG(3, "ZDICT_trainFromBuffer");
|
1057
1087
|
memset(¶ms, 0, sizeof(params));
|
1058
1088
|
params.d = 8;
|
1059
1089
|
params.steps = 4;
|
1060
|
-
/* Default to level 6 since no compression level information is
|
1090
|
+
/* Default to level 6 since no compression level information is available */
|
1061
1091
|
params.zParams.compressionLevel = 6;
|
1092
|
+
#if defined(ZSTD_DEBUG) && (ZSTD_DEBUG>=1)
|
1093
|
+
params.zParams.notificationLevel = ZSTD_DEBUG;
|
1094
|
+
#endif
|
1062
1095
|
return ZDICT_optimizeTrainFromBuffer_cover(dictBuffer, dictBufferCapacity,
|
1063
|
-
samplesBuffer, samplesSizes,
|
1064
|
-
|
1096
|
+
samplesBuffer, samplesSizes, nbSamples,
|
1097
|
+
¶ms);
|
1065
1098
|
}
|
1066
1099
|
|
1067
1100
|
size_t ZDICT_addEntropyTablesFromBuffer(void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity,
|
1068
|
-
|
1101
|
+
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples)
|
1069
1102
|
{
|
1070
1103
|
ZDICT_params_t params;
|
1071
1104
|
memset(¶ms, 0, sizeof(params));
|
@@ -38,21 +38,21 @@ extern "C" {
|
|
38
38
|
|
39
39
|
|
40
40
|
/*! ZDICT_trainFromBuffer():
|
41
|
-
*
|
42
|
-
*
|
43
|
-
*
|
44
|
-
*
|
45
|
-
*
|
41
|
+
* Train a dictionary from an array of samples.
|
42
|
+
* Redirect towards ZDICT_optimizeTrainFromBuffer_cover() single-threaded, with d=8 and steps=4.
|
43
|
+
* Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
|
44
|
+
* supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
|
45
|
+
* The resulting dictionary will be saved into `dictBuffer`.
|
46
46
|
* @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
|
47
|
-
*
|
48
|
-
*
|
49
|
-
*
|
50
|
-
* It's
|
51
|
-
* In general, it's recommended to provide a few thousands samples,
|
47
|
+
* or an error code, which can be tested with ZDICT_isError().
|
48
|
+
* Note: ZDICT_trainFromBuffer() requires about 9 bytes of memory for each input byte.
|
49
|
+
* Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
|
50
|
+
* It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`.
|
51
|
+
* In general, it's recommended to provide a few thousands samples, though this can vary a lot.
|
52
52
|
* It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
|
53
53
|
*/
|
54
54
|
ZDICTLIB_API size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity,
|
55
|
-
|
55
|
+
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples);
|
56
56
|
|
57
57
|
|
58
58
|
/*====== Helper functions ======*/
|
@@ -72,14 +72,14 @@ ZDICTLIB_API const char* ZDICT_getErrorName(size_t errorCode);
|
|
72
72
|
* ==================================================================================== */
|
73
73
|
|
74
74
|
typedef struct {
|
75
|
-
int compressionLevel; /*
|
76
|
-
unsigned notificationLevel; /* Write to stderr; 0 = none (default); 1 = errors; 2 = progression; 3 = details; 4 = debug; */
|
77
|
-
unsigned dictID; /* 0 means auto mode (32-bits random value)
|
75
|
+
int compressionLevel; /* optimize for a specific zstd compression level; 0 means default */
|
76
|
+
unsigned notificationLevel; /* Write log to stderr; 0 = none (default); 1 = errors; 2 = progression; 3 = details; 4 = debug; */
|
77
|
+
unsigned dictID; /* force dictID value; 0 means auto mode (32-bits random value) */
|
78
78
|
} ZDICT_params_t;
|
79
79
|
|
80
80
|
/*! ZDICT_cover_params_t:
|
81
|
-
* For all values 0 means default.
|
82
81
|
* k and d are the only required parameters.
|
82
|
+
* For others, value 0 means default.
|
83
83
|
*/
|
84
84
|
typedef struct {
|
85
85
|
unsigned k; /* Segment size : constraint: 0 < k : Reasonable range [16, 2048+] */
|
@@ -91,28 +91,28 @@ typedef struct {
|
|
91
91
|
|
92
92
|
|
93
93
|
/*! ZDICT_trainFromBuffer_cover():
|
94
|
-
*
|
95
|
-
*
|
96
|
-
*
|
97
|
-
*
|
94
|
+
* Train a dictionary from an array of samples using the COVER algorithm.
|
95
|
+
* Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
|
96
|
+
* supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
|
97
|
+
* The resulting dictionary will be saved into `dictBuffer`.
|
98
98
|
* @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
|
99
|
-
*
|
100
|
-
*
|
101
|
-
*
|
102
|
-
* It's
|
103
|
-
* In general, it's recommended to provide a few thousands samples,
|
99
|
+
* or an error code, which can be tested with ZDICT_isError().
|
100
|
+
* Note: ZDICT_trainFromBuffer_cover() requires about 9 bytes of memory for each input byte.
|
101
|
+
* Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
|
102
|
+
* It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`.
|
103
|
+
* In general, it's recommended to provide a few thousands samples, though this can vary a lot.
|
104
104
|
* It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
|
105
105
|
*/
|
106
106
|
ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
|
107
|
-
|
108
|
-
const size_t *samplesSizes, unsigned nbSamples,
|
109
|
-
|
107
|
+
void *dictBuffer, size_t dictBufferCapacity,
|
108
|
+
const void *samplesBuffer, const size_t *samplesSizes, unsigned nbSamples,
|
109
|
+
ZDICT_cover_params_t parameters);
|
110
110
|
|
111
111
|
/*! ZDICT_optimizeTrainFromBuffer_cover():
|
112
112
|
* The same requirements as above hold for all the parameters except `parameters`.
|
113
113
|
* This function tries many parameter combinations and picks the best parameters.
|
114
|
-
* `*parameters` is filled with the best parameters found,
|
115
|
-
* constructed with those parameters is stored in `dictBuffer`.
|
114
|
+
* `*parameters` is filled with the best parameters found,
|
115
|
+
* dictionary constructed with those parameters is stored in `dictBuffer`.
|
116
116
|
*
|
117
117
|
* All of the parameters d, k, steps are optional.
|
118
118
|
* If d is non-zero then we don't check multiple values of d, otherwise we check d = {6, 8, 10, 12, 14, 16}.
|
@@ -125,9 +125,9 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
|
|
125
125
|
* Note: ZDICT_optimizeTrainFromBuffer_cover() requires about 8 bytes of memory for each input byte and additionally another 5 bytes of memory for each byte of memory for each thread.
|
126
126
|
*/
|
127
127
|
ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
|
128
|
-
|
129
|
-
const size_t
|
130
|
-
|
128
|
+
void* dictBuffer, size_t dictBufferCapacity,
|
129
|
+
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
|
130
|
+
ZDICT_cover_params_t* parameters);
|
131
131
|
|
132
132
|
/*! ZDICT_finalizeDictionary():
|
133
133
|
* Given a custom content as a basis for dictionary, and a set of samples,
|
@@ -157,22 +157,23 @@ typedef struct {
|
|
157
157
|
} ZDICT_legacy_params_t;
|
158
158
|
|
159
159
|
/*! ZDICT_trainFromBuffer_legacy():
|
160
|
-
*
|
161
|
-
*
|
162
|
-
*
|
163
|
-
*
|
160
|
+
* Train a dictionary from an array of samples.
|
161
|
+
* Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
|
162
|
+
* supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
|
163
|
+
* The resulting dictionary will be saved into `dictBuffer`.
|
164
164
|
* `parameters` is optional and can be provided with values set to 0 to mean "default".
|
165
165
|
* @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
|
166
|
-
*
|
167
|
-
*
|
168
|
-
* It's
|
169
|
-
* In general, it's recommended to provide a few thousands samples,
|
166
|
+
* or an error code, which can be tested with ZDICT_isError().
|
167
|
+
* Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
|
168
|
+
* It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`.
|
169
|
+
* In general, it's recommended to provide a few thousands samples, though this can vary a lot.
|
170
170
|
* It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
|
171
|
-
*
|
171
|
+
* Note: ZDICT_trainFromBuffer_legacy() will send notifications into stderr if instructed to, using notificationLevel>0.
|
172
172
|
*/
|
173
173
|
ZDICTLIB_API size_t ZDICT_trainFromBuffer_legacy(
|
174
|
-
void *dictBuffer, size_t dictBufferCapacity,
|
175
|
-
const size_t *samplesSizes, unsigned nbSamples,
|
174
|
+
void *dictBuffer, size_t dictBufferCapacity,
|
175
|
+
const void *samplesBuffer, const size_t *samplesSizes, unsigned nbSamples,
|
176
|
+
ZDICT_legacy_params_t parameters);
|
176
177
|
|
177
178
|
/* Deprecation warnings */
|
178
179
|
/* It is generally possible to disable deprecation warnings from compiler,
|
@@ -246,6 +246,7 @@ MEM_STATIC size_t ZSTD_freeLegacyStreamContext(void* legacyContext, U32 version)
|
|
246
246
|
MEM_STATIC size_t ZSTD_initLegacyStream(void** legacyContext, U32 prevVersion, U32 newVersion,
|
247
247
|
const void* dict, size_t dictSize)
|
248
248
|
{
|
249
|
+
DEBUGLOG(5, "ZSTD_initLegacyStream for v0.%u", newVersion);
|
249
250
|
if (prevVersion != newVersion) ZSTD_freeLegacyStreamContext(*legacyContext, prevVersion);
|
250
251
|
switch(newVersion)
|
251
252
|
{
|
@@ -304,6 +305,7 @@ MEM_STATIC size_t ZSTD_initLegacyStream(void** legacyContext, U32 prevVersion, U
|
|
304
305
|
MEM_STATIC size_t ZSTD_decompressLegacyStream(void* legacyContext, U32 version,
|
305
306
|
ZSTD_outBuffer* output, ZSTD_inBuffer* input)
|
306
307
|
{
|
308
|
+
DEBUGLOG(5, "ZSTD_decompressLegacyStream for v0.%u", version);
|
307
309
|
switch(version)
|
308
310
|
{
|
309
311
|
default :
|