zstd-ruby 1.3.3.0 → 1.3.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/README.md +1 -1
- data/ext/zstdruby/libzstd/BUCK +13 -0
- data/ext/zstdruby/libzstd/README.md +32 -25
- data/ext/zstdruby/libzstd/common/bitstream.h +1 -1
- data/ext/zstdruby/libzstd/common/compiler.h +25 -0
- data/ext/zstdruby/libzstd/common/cpu.h +216 -0
- data/ext/zstdruby/libzstd/common/error_private.c +1 -0
- data/ext/zstdruby/libzstd/common/fse.h +1 -1
- data/ext/zstdruby/libzstd/common/fse_decompress.c +2 -2
- data/ext/zstdruby/libzstd/common/huf.h +114 -89
- data/ext/zstdruby/libzstd/common/pool.c +46 -17
- data/ext/zstdruby/libzstd/common/pool.h +18 -9
- data/ext/zstdruby/libzstd/common/threading.h +12 -12
- data/ext/zstdruby/libzstd/common/zstd_errors.h +16 -7
- data/ext/zstdruby/libzstd/common/zstd_internal.h +4 -5
- data/ext/zstdruby/libzstd/compress/fse_compress.c +19 -11
- data/ext/zstdruby/libzstd/compress/huf_compress.c +160 -62
- data/ext/zstdruby/libzstd/compress/zstd_compress.c +973 -644
- data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +281 -34
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +80 -62
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +11 -4
- data/ext/zstdruby/libzstd/compress/zstd_fast.c +87 -71
- data/ext/zstdruby/libzstd/compress/zstd_fast.h +10 -6
- data/ext/zstdruby/libzstd/compress/zstd_lazy.c +333 -274
- data/ext/zstdruby/libzstd/compress/zstd_lazy.h +33 -16
- data/ext/zstdruby/libzstd/compress/zstd_ldm.c +305 -359
- data/ext/zstdruby/libzstd/compress/zstd_ldm.h +64 -21
- data/ext/zstdruby/libzstd/compress/zstd_opt.c +194 -56
- data/ext/zstdruby/libzstd/compress/zstd_opt.h +17 -5
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +1131 -449
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +32 -16
- data/ext/zstdruby/libzstd/decompress/huf_decompress.c +390 -290
- data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +777 -439
- data/ext/zstdruby/libzstd/dictBuilder/cover.c +11 -8
- data/ext/zstdruby/libzstd/dictBuilder/zdict.c +83 -50
- data/ext/zstdruby/libzstd/dictBuilder/zdict.h +44 -43
- data/ext/zstdruby/libzstd/legacy/zstd_legacy.h +2 -0
- data/ext/zstdruby/libzstd/legacy/zstd_v04.c +42 -118
- data/ext/zstdruby/libzstd/legacy/zstd_v06.c +2 -2
- data/ext/zstdruby/libzstd/legacy/zstd_v07.c +2 -2
- data/ext/zstdruby/libzstd/zstd.h +254 -254
- data/lib/zstd-ruby/version.rb +1 -1
- metadata +4 -3
@@ -537,8 +537,8 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
|
|
537
537
|
/* Checks */
|
538
538
|
if (totalSamplesSize < MAX(d, sizeof(U64)) ||
|
539
539
|
totalSamplesSize >= (size_t)COVER_MAX_SAMPLES_SIZE) {
|
540
|
-
DISPLAYLEVEL(1, "Total samples size is too large, maximum size is %u MB\n",
|
541
|
-
(COVER_MAX_SAMPLES_SIZE >> 20));
|
540
|
+
DISPLAYLEVEL(1, "Total samples size is too large (%u MB), maximum size is %u MB\n",
|
541
|
+
(U32)(totalSamplesSize>>20), (COVER_MAX_SAMPLES_SIZE >> 20));
|
542
542
|
return 0;
|
543
543
|
}
|
544
544
|
/* Zero the context */
|
@@ -651,12 +651,16 @@ static size_t COVER_buildDictionary(const COVER_ctx_t *ctx, U32 *freqs,
|
|
651
651
|
}
|
652
652
|
|
653
653
|
ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
|
654
|
-
void *dictBuffer, size_t dictBufferCapacity,
|
655
|
-
const size_t *samplesSizes, unsigned nbSamples,
|
656
|
-
ZDICT_cover_params_t parameters)
|
657
|
-
|
654
|
+
void *dictBuffer, size_t dictBufferCapacity,
|
655
|
+
const void *samplesBuffer, const size_t *samplesSizes, unsigned nbSamples,
|
656
|
+
ZDICT_cover_params_t parameters)
|
657
|
+
{
|
658
|
+
BYTE* const dict = (BYTE*)dictBuffer;
|
658
659
|
COVER_ctx_t ctx;
|
659
660
|
COVER_map_t activeDmers;
|
661
|
+
|
662
|
+
/* Initialize global data */
|
663
|
+
g_displayLevel = parameters.zParams.notificationLevel;
|
660
664
|
/* Checks */
|
661
665
|
if (!COVER_checkParameters(parameters, dictBufferCapacity)) {
|
662
666
|
DISPLAYLEVEL(1, "Cover parameters incorrect\n");
|
@@ -671,8 +675,6 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
|
|
671
675
|
ZDICT_DICTSIZE_MIN);
|
672
676
|
return ERROR(dstSize_tooSmall);
|
673
677
|
}
|
674
|
-
/* Initialize global data */
|
675
|
-
g_displayLevel = parameters.zParams.notificationLevel;
|
676
678
|
/* Initialize context and activeDmers */
|
677
679
|
if (!COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples,
|
678
680
|
parameters.d)) {
|
@@ -947,6 +949,7 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
|
|
947
949
|
unsigned k;
|
948
950
|
COVER_best_t best;
|
949
951
|
POOL_ctx *pool = NULL;
|
952
|
+
|
950
953
|
/* Checks */
|
951
954
|
if (kMinK < kMaxD || kMaxK < kMinK) {
|
952
955
|
LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect parameters\n");
|
@@ -207,7 +207,6 @@ static dictItem ZDICT_analyzePos(
|
|
207
207
|
U32 cumulLength[LLIMIT] = {0};
|
208
208
|
U32 savings[LLIMIT] = {0};
|
209
209
|
const BYTE* b = (const BYTE*)buffer;
|
210
|
-
size_t length;
|
211
210
|
size_t maxLength = LLIMIT;
|
212
211
|
size_t pos = suffix[start];
|
213
212
|
U32 end = start;
|
@@ -222,26 +221,30 @@ static dictItem ZDICT_analyzePos(
|
|
222
221
|
||(MEM_read16(b+pos+1) == MEM_read16(b+pos+3))
|
223
222
|
||(MEM_read16(b+pos+2) == MEM_read16(b+pos+4)) ) {
|
224
223
|
/* skip and mark segment */
|
225
|
-
U16
|
226
|
-
U32 u,
|
227
|
-
while (MEM_read16(b+pos+
|
228
|
-
if (b[pos+
|
229
|
-
for (u=1; u<
|
224
|
+
U16 const pattern16 = MEM_read16(b+pos+4);
|
225
|
+
U32 u, patternEnd = 6;
|
226
|
+
while (MEM_read16(b+pos+patternEnd) == pattern16) patternEnd+=2 ;
|
227
|
+
if (b[pos+patternEnd] == b[pos+patternEnd-1]) patternEnd++;
|
228
|
+
for (u=1; u<patternEnd; u++)
|
230
229
|
doneMarks[pos+u] = 1;
|
231
230
|
return solution;
|
232
231
|
}
|
233
232
|
|
234
233
|
/* look forward */
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
234
|
+
{ size_t length;
|
235
|
+
do {
|
236
|
+
end++;
|
237
|
+
length = ZDICT_count(b + pos, b + suffix[end]);
|
238
|
+
} while (length >= MINMATCHLENGTH);
|
239
|
+
}
|
239
240
|
|
240
241
|
/* look backward */
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
242
|
+
{ size_t length;
|
243
|
+
do {
|
244
|
+
length = ZDICT_count(b + pos, b + *(suffix+start-1));
|
245
|
+
if (length >=MINMATCHLENGTH) start--;
|
246
|
+
} while(length >= MINMATCHLENGTH);
|
247
|
+
}
|
245
248
|
|
246
249
|
/* exit if not found a minimum nb of repetitions */
|
247
250
|
if (end-start < minRatio) {
|
@@ -268,7 +271,7 @@ static dictItem ZDICT_analyzePos(
|
|
268
271
|
U32 selectedCount = 0;
|
269
272
|
U32 selectedID = currentID;
|
270
273
|
for (id =refinedStart; id < refinedEnd; id++) {
|
271
|
-
if (b[
|
274
|
+
if (b[suffix[id] + searchLength] != currentChar) {
|
272
275
|
if (currentCount > selectedCount) {
|
273
276
|
selectedCount = currentCount;
|
274
277
|
selectedID = currentID;
|
@@ -297,20 +300,23 @@ static dictItem ZDICT_analyzePos(
|
|
297
300
|
memset(lengthList, 0, sizeof(lengthList));
|
298
301
|
|
299
302
|
/* look forward */
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
|
303
|
+
{ size_t length;
|
304
|
+
do {
|
305
|
+
end++;
|
306
|
+
length = ZDICT_count(b + pos, b + suffix[end]);
|
307
|
+
if (length >= LLIMIT) length = LLIMIT-1;
|
308
|
+
lengthList[length]++;
|
309
|
+
} while (length >=MINMATCHLENGTH);
|
310
|
+
}
|
306
311
|
|
307
312
|
/* look backward */
|
308
|
-
length = MINMATCHLENGTH;
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
|
313
|
-
|
313
|
+
{ size_t length = MINMATCHLENGTH;
|
314
|
+
while ((length >= MINMATCHLENGTH) & (start > 0)) {
|
315
|
+
length = ZDICT_count(b + pos, b + suffix[start - 1]);
|
316
|
+
if (length >= LLIMIT) length = LLIMIT - 1;
|
317
|
+
lengthList[length]++;
|
318
|
+
if (length >= MINMATCHLENGTH) start--;
|
319
|
+
}
|
314
320
|
}
|
315
321
|
|
316
322
|
/* largest useful length */
|
@@ -345,12 +351,12 @@ static dictItem ZDICT_analyzePos(
|
|
345
351
|
/* mark positions done */
|
346
352
|
{ U32 id;
|
347
353
|
for (id=start; id<end; id++) {
|
348
|
-
U32 p, pEnd;
|
354
|
+
U32 p, pEnd, length;
|
349
355
|
U32 const testedPos = suffix[id];
|
350
356
|
if (testedPos == pos)
|
351
357
|
length = solution.length;
|
352
358
|
else {
|
353
|
-
length = ZDICT_count(b+pos, b+testedPos);
|
359
|
+
length = (U32)ZDICT_count(b+pos, b+testedPos);
|
354
360
|
if (length > solution.length) length = solution.length;
|
355
361
|
}
|
356
362
|
pEnd = (U32)(testedPos + length);
|
@@ -575,29 +581,30 @@ static void ZDICT_fillNoise(void* buffer, size_t length)
|
|
575
581
|
|
576
582
|
typedef struct
|
577
583
|
{
|
578
|
-
ZSTD_CCtx* ref;
|
579
|
-
ZSTD_CCtx* zc;
|
584
|
+
ZSTD_CCtx* ref; /* contains reference to dictionary */
|
585
|
+
ZSTD_CCtx* zc; /* working context */
|
580
586
|
void* workPlace; /* must be ZSTD_BLOCKSIZE_MAX allocated */
|
581
587
|
} EStats_ress_t;
|
582
588
|
|
583
589
|
#define MAXREPOFFSET 1024
|
584
590
|
|
585
591
|
static void ZDICT_countEStats(EStats_ress_t esr, ZSTD_parameters params,
|
586
|
-
|
587
|
-
|
592
|
+
U32* countLit, U32* offsetcodeCount, U32* matchlengthCount, U32* litlengthCount, U32* repOffsets,
|
593
|
+
const void* src, size_t srcSize,
|
594
|
+
U32 notificationLevel)
|
588
595
|
{
|
589
596
|
size_t const blockSizeMax = MIN (ZSTD_BLOCKSIZE_MAX, 1 << params.cParams.windowLog);
|
590
597
|
size_t cSize;
|
591
598
|
|
592
599
|
if (srcSize > blockSizeMax) srcSize = blockSizeMax; /* protection vs large samples */
|
593
|
-
{
|
594
|
-
|
600
|
+
{ size_t const errorCode = ZSTD_copyCCtx(esr.zc, esr.ref, 0);
|
601
|
+
if (ZSTD_isError(errorCode)) { DISPLAYLEVEL(1, "warning : ZSTD_copyCCtx failed \n"); return; }
|
595
602
|
}
|
596
603
|
cSize = ZSTD_compressBlock(esr.zc, esr.workPlace, ZSTD_BLOCKSIZE_MAX, src, srcSize);
|
597
604
|
if (ZSTD_isError(cSize)) { DISPLAYLEVEL(3, "warning : could not compress sample size %u \n", (U32)srcSize); return; }
|
598
605
|
|
599
606
|
if (cSize) { /* if == 0; block is not compressible */
|
600
|
-
const seqStore_t* seqStorePtr = ZSTD_getSeqStore(esr.zc);
|
607
|
+
const seqStore_t* const seqStorePtr = ZSTD_getSeqStore(esr.zc);
|
601
608
|
|
602
609
|
/* literals stats */
|
603
610
|
{ const BYTE* bytePtr;
|
@@ -659,6 +666,18 @@ static void ZDICT_insertSortCount(offsetCount_t table[ZSTD_REP_NUM+1], U32 val,
|
|
659
666
|
}
|
660
667
|
}
|
661
668
|
|
669
|
+
/* ZDICT_flatLit() :
|
670
|
+
* rewrite `countLit` to contain a mostly flat but still compressible distribution of literals.
|
671
|
+
* necessary to avoid generating a non-compressible distribution that HUF_writeCTable() cannot encode.
|
672
|
+
*/
|
673
|
+
static void ZDICT_flatLit(U32* countLit)
|
674
|
+
{
|
675
|
+
int u;
|
676
|
+
for (u=1; u<256; u++) countLit[u] = 2;
|
677
|
+
countLit[0] = 4;
|
678
|
+
countLit[253] = 1;
|
679
|
+
countLit[254] = 1;
|
680
|
+
}
|
662
681
|
|
663
682
|
#define OFFCODE_MAX 30 /* only applicable to first block */
|
664
683
|
static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
@@ -688,6 +707,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|
688
707
|
BYTE* dstPtr = (BYTE*)dstBuffer;
|
689
708
|
|
690
709
|
/* init */
|
710
|
+
DEBUGLOG(4, "ZDICT_analyzeEntropy");
|
691
711
|
esr.ref = ZSTD_createCCtx();
|
692
712
|
esr.zc = ZSTD_createCCtx();
|
693
713
|
esr.workPlace = malloc(ZSTD_BLOCKSIZE_MAX);
|
@@ -713,7 +733,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|
713
733
|
goto _cleanup;
|
714
734
|
} }
|
715
735
|
|
716
|
-
/* collect stats on all
|
736
|
+
/* collect stats on all samples */
|
717
737
|
for (u=0; u<nbFiles; u++) {
|
718
738
|
ZDICT_countEStats(esr, params,
|
719
739
|
countLit, offcodeCount, matchLengthCount, litLengthCount, repOffset,
|
@@ -722,14 +742,21 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|
722
742
|
pos += fileSizes[u];
|
723
743
|
}
|
724
744
|
|
725
|
-
/* analyze */
|
726
|
-
|
727
|
-
|
728
|
-
|
729
|
-
|
730
|
-
|
745
|
+
/* analyze, build stats, starting with literals */
|
746
|
+
{ size_t maxNbBits = HUF_buildCTable (hufTable, countLit, 255, huffLog);
|
747
|
+
if (HUF_isError(maxNbBits)) {
|
748
|
+
eSize = ERROR(GENERIC);
|
749
|
+
DISPLAYLEVEL(1, " HUF_buildCTable error \n");
|
750
|
+
goto _cleanup;
|
751
|
+
}
|
752
|
+
if (maxNbBits==8) { /* not compressible : will fail on HUF_writeCTable() */
|
753
|
+
DISPLAYLEVEL(2, "warning : pathological dataset : literals are not compressible : samples are noisy or too regular \n");
|
754
|
+
ZDICT_flatLit(countLit); /* replace distribution by a fake "mostly flat but still compressible" distribution, that HUF_writeCTable() can encode */
|
755
|
+
maxNbBits = HUF_buildCTable (hufTable, countLit, 255, huffLog);
|
756
|
+
assert(maxNbBits==9);
|
757
|
+
}
|
758
|
+
huffLog = (U32)maxNbBits;
|
731
759
|
}
|
732
|
-
huffLog = (U32)errorCode;
|
733
760
|
|
734
761
|
/* looking for most common first offsets */
|
735
762
|
{ U32 offset;
|
@@ -850,6 +877,7 @@ size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity,
|
|
850
877
|
U32 const notificationLevel = params.notificationLevel;
|
851
878
|
|
852
879
|
/* check conditions */
|
880
|
+
DEBUGLOG(4, "ZDICT_finalizeDictionary");
|
853
881
|
if (dictBufferCapacity < dictContentSize) return ERROR(dstSize_tooSmall);
|
854
882
|
if (dictContentSize < ZDICT_CONTENTSIZE_MIN) return ERROR(srcSize_wrong);
|
855
883
|
if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) return ERROR(dstSize_tooSmall);
|
@@ -1025,8 +1053,9 @@ size_t ZDICT_trainFromBuffer_unsafe_legacy(
|
|
1025
1053
|
}
|
1026
1054
|
|
1027
1055
|
|
1028
|
-
/*
|
1029
|
-
*
|
1056
|
+
/* ZDICT_trainFromBuffer_legacy() :
|
1057
|
+
* issue : samplesBuffer need to be followed by a noisy guard band.
|
1058
|
+
* work around : duplicate the buffer, and add the noise */
|
1030
1059
|
size_t ZDICT_trainFromBuffer_legacy(void* dictBuffer, size_t dictBufferCapacity,
|
1031
1060
|
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
|
1032
1061
|
ZDICT_legacy_params_t params)
|
@@ -1054,18 +1083,22 @@ size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity,
|
|
1054
1083
|
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples)
|
1055
1084
|
{
|
1056
1085
|
ZDICT_cover_params_t params;
|
1086
|
+
DEBUGLOG(3, "ZDICT_trainFromBuffer");
|
1057
1087
|
memset(¶ms, 0, sizeof(params));
|
1058
1088
|
params.d = 8;
|
1059
1089
|
params.steps = 4;
|
1060
|
-
/* Default to level 6 since no compression level information is
|
1090
|
+
/* Default to level 6 since no compression level information is available */
|
1061
1091
|
params.zParams.compressionLevel = 6;
|
1092
|
+
#if defined(ZSTD_DEBUG) && (ZSTD_DEBUG>=1)
|
1093
|
+
params.zParams.notificationLevel = ZSTD_DEBUG;
|
1094
|
+
#endif
|
1062
1095
|
return ZDICT_optimizeTrainFromBuffer_cover(dictBuffer, dictBufferCapacity,
|
1063
|
-
samplesBuffer, samplesSizes,
|
1064
|
-
|
1096
|
+
samplesBuffer, samplesSizes, nbSamples,
|
1097
|
+
¶ms);
|
1065
1098
|
}
|
1066
1099
|
|
1067
1100
|
size_t ZDICT_addEntropyTablesFromBuffer(void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity,
|
1068
|
-
|
1101
|
+
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples)
|
1069
1102
|
{
|
1070
1103
|
ZDICT_params_t params;
|
1071
1104
|
memset(¶ms, 0, sizeof(params));
|
@@ -38,21 +38,21 @@ extern "C" {
|
|
38
38
|
|
39
39
|
|
40
40
|
/*! ZDICT_trainFromBuffer():
|
41
|
-
*
|
42
|
-
*
|
43
|
-
*
|
44
|
-
*
|
45
|
-
*
|
41
|
+
* Train a dictionary from an array of samples.
|
42
|
+
* Redirect towards ZDICT_optimizeTrainFromBuffer_cover() single-threaded, with d=8 and steps=4.
|
43
|
+
* Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
|
44
|
+
* supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
|
45
|
+
* The resulting dictionary will be saved into `dictBuffer`.
|
46
46
|
* @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
|
47
|
-
*
|
48
|
-
*
|
49
|
-
*
|
50
|
-
* It's
|
51
|
-
* In general, it's recommended to provide a few thousands samples,
|
47
|
+
* or an error code, which can be tested with ZDICT_isError().
|
48
|
+
* Note: ZDICT_trainFromBuffer() requires about 9 bytes of memory for each input byte.
|
49
|
+
* Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
|
50
|
+
* It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`.
|
51
|
+
* In general, it's recommended to provide a few thousands samples, though this can vary a lot.
|
52
52
|
* It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
|
53
53
|
*/
|
54
54
|
ZDICTLIB_API size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity,
|
55
|
-
|
55
|
+
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples);
|
56
56
|
|
57
57
|
|
58
58
|
/*====== Helper functions ======*/
|
@@ -72,14 +72,14 @@ ZDICTLIB_API const char* ZDICT_getErrorName(size_t errorCode);
|
|
72
72
|
* ==================================================================================== */
|
73
73
|
|
74
74
|
typedef struct {
|
75
|
-
int compressionLevel; /*
|
76
|
-
unsigned notificationLevel; /* Write to stderr; 0 = none (default); 1 = errors; 2 = progression; 3 = details; 4 = debug; */
|
77
|
-
unsigned dictID; /* 0 means auto mode (32-bits random value)
|
75
|
+
int compressionLevel; /* optimize for a specific zstd compression level; 0 means default */
|
76
|
+
unsigned notificationLevel; /* Write log to stderr; 0 = none (default); 1 = errors; 2 = progression; 3 = details; 4 = debug; */
|
77
|
+
unsigned dictID; /* force dictID value; 0 means auto mode (32-bits random value) */
|
78
78
|
} ZDICT_params_t;
|
79
79
|
|
80
80
|
/*! ZDICT_cover_params_t:
|
81
|
-
* For all values 0 means default.
|
82
81
|
* k and d are the only required parameters.
|
82
|
+
* For others, value 0 means default.
|
83
83
|
*/
|
84
84
|
typedef struct {
|
85
85
|
unsigned k; /* Segment size : constraint: 0 < k : Reasonable range [16, 2048+] */
|
@@ -91,28 +91,28 @@ typedef struct {
|
|
91
91
|
|
92
92
|
|
93
93
|
/*! ZDICT_trainFromBuffer_cover():
|
94
|
-
*
|
95
|
-
*
|
96
|
-
*
|
97
|
-
*
|
94
|
+
* Train a dictionary from an array of samples using the COVER algorithm.
|
95
|
+
* Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
|
96
|
+
* supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
|
97
|
+
* The resulting dictionary will be saved into `dictBuffer`.
|
98
98
|
* @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
|
99
|
-
*
|
100
|
-
*
|
101
|
-
*
|
102
|
-
* It's
|
103
|
-
* In general, it's recommended to provide a few thousands samples,
|
99
|
+
* or an error code, which can be tested with ZDICT_isError().
|
100
|
+
* Note: ZDICT_trainFromBuffer_cover() requires about 9 bytes of memory for each input byte.
|
101
|
+
* Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
|
102
|
+
* It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`.
|
103
|
+
* In general, it's recommended to provide a few thousands samples, though this can vary a lot.
|
104
104
|
* It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
|
105
105
|
*/
|
106
106
|
ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
|
107
|
-
|
108
|
-
const size_t *samplesSizes, unsigned nbSamples,
|
109
|
-
|
107
|
+
void *dictBuffer, size_t dictBufferCapacity,
|
108
|
+
const void *samplesBuffer, const size_t *samplesSizes, unsigned nbSamples,
|
109
|
+
ZDICT_cover_params_t parameters);
|
110
110
|
|
111
111
|
/*! ZDICT_optimizeTrainFromBuffer_cover():
|
112
112
|
* The same requirements as above hold for all the parameters except `parameters`.
|
113
113
|
* This function tries many parameter combinations and picks the best parameters.
|
114
|
-
* `*parameters` is filled with the best parameters found,
|
115
|
-
* constructed with those parameters is stored in `dictBuffer`.
|
114
|
+
* `*parameters` is filled with the best parameters found,
|
115
|
+
* dictionary constructed with those parameters is stored in `dictBuffer`.
|
116
116
|
*
|
117
117
|
* All of the parameters d, k, steps are optional.
|
118
118
|
* If d is non-zero then we don't check multiple values of d, otherwise we check d = {6, 8, 10, 12, 14, 16}.
|
@@ -125,9 +125,9 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
|
|
125
125
|
* Note: ZDICT_optimizeTrainFromBuffer_cover() requires about 8 bytes of memory for each input byte and additionally another 5 bytes of memory for each byte of memory for each thread.
|
126
126
|
*/
|
127
127
|
ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
|
128
|
-
|
129
|
-
const size_t
|
130
|
-
|
128
|
+
void* dictBuffer, size_t dictBufferCapacity,
|
129
|
+
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
|
130
|
+
ZDICT_cover_params_t* parameters);
|
131
131
|
|
132
132
|
/*! ZDICT_finalizeDictionary():
|
133
133
|
* Given a custom content as a basis for dictionary, and a set of samples,
|
@@ -157,22 +157,23 @@ typedef struct {
|
|
157
157
|
} ZDICT_legacy_params_t;
|
158
158
|
|
159
159
|
/*! ZDICT_trainFromBuffer_legacy():
|
160
|
-
*
|
161
|
-
*
|
162
|
-
*
|
163
|
-
*
|
160
|
+
* Train a dictionary from an array of samples.
|
161
|
+
* Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
|
162
|
+
* supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
|
163
|
+
* The resulting dictionary will be saved into `dictBuffer`.
|
164
164
|
* `parameters` is optional and can be provided with values set to 0 to mean "default".
|
165
165
|
* @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
|
166
|
-
*
|
167
|
-
*
|
168
|
-
* It's
|
169
|
-
* In general, it's recommended to provide a few thousands samples,
|
166
|
+
* or an error code, which can be tested with ZDICT_isError().
|
167
|
+
* Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
|
168
|
+
* It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`.
|
169
|
+
* In general, it's recommended to provide a few thousands samples, though this can vary a lot.
|
170
170
|
* It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
|
171
|
-
*
|
171
|
+
* Note: ZDICT_trainFromBuffer_legacy() will send notifications into stderr if instructed to, using notificationLevel>0.
|
172
172
|
*/
|
173
173
|
ZDICTLIB_API size_t ZDICT_trainFromBuffer_legacy(
|
174
|
-
void *dictBuffer, size_t dictBufferCapacity,
|
175
|
-
const size_t *samplesSizes, unsigned nbSamples,
|
174
|
+
void *dictBuffer, size_t dictBufferCapacity,
|
175
|
+
const void *samplesBuffer, const size_t *samplesSizes, unsigned nbSamples,
|
176
|
+
ZDICT_legacy_params_t parameters);
|
176
177
|
|
177
178
|
/* Deprecation warnings */
|
178
179
|
/* It is generally possible to disable deprecation warnings from compiler,
|
@@ -246,6 +246,7 @@ MEM_STATIC size_t ZSTD_freeLegacyStreamContext(void* legacyContext, U32 version)
|
|
246
246
|
MEM_STATIC size_t ZSTD_initLegacyStream(void** legacyContext, U32 prevVersion, U32 newVersion,
|
247
247
|
const void* dict, size_t dictSize)
|
248
248
|
{
|
249
|
+
DEBUGLOG(5, "ZSTD_initLegacyStream for v0.%u", newVersion);
|
249
250
|
if (prevVersion != newVersion) ZSTD_freeLegacyStreamContext(*legacyContext, prevVersion);
|
250
251
|
switch(newVersion)
|
251
252
|
{
|
@@ -304,6 +305,7 @@ MEM_STATIC size_t ZSTD_initLegacyStream(void** legacyContext, U32 prevVersion, U
|
|
304
305
|
MEM_STATIC size_t ZSTD_decompressLegacyStream(void* legacyContext, U32 version,
|
305
306
|
ZSTD_outBuffer* output, ZSTD_inBuffer* input)
|
306
307
|
{
|
308
|
+
DEBUGLOG(5, "ZSTD_decompressLegacyStream for v0.%u", version);
|
307
309
|
switch(version)
|
308
310
|
{
|
309
311
|
default :
|