extzstd 0.1 → 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/HISTORY.ja.md +39 -0
- data/README.md +38 -56
- data/contrib/zstd/CHANGELOG +613 -0
- data/contrib/zstd/CODE_OF_CONDUCT.md +5 -0
- data/contrib/zstd/CONTRIBUTING.md +406 -0
- data/contrib/zstd/COPYING +339 -0
- data/contrib/zstd/Makefile +420 -0
- data/contrib/zstd/README.md +179 -41
- data/contrib/zstd/TESTING.md +44 -0
- data/contrib/zstd/appveyor.yml +292 -0
- data/contrib/zstd/lib/BUCK +234 -0
- data/contrib/zstd/lib/Makefile +451 -0
- data/contrib/zstd/lib/README.md +207 -0
- data/contrib/zstd/{common → lib/common}/bitstream.h +187 -138
- data/contrib/zstd/lib/common/compiler.h +288 -0
- data/contrib/zstd/lib/common/cpu.h +213 -0
- data/contrib/zstd/lib/common/debug.c +24 -0
- data/contrib/zstd/lib/common/debug.h +107 -0
- data/contrib/zstd/lib/common/entropy_common.c +362 -0
- data/contrib/zstd/{common → lib/common}/error_private.c +25 -12
- data/contrib/zstd/{common → lib/common}/error_private.h +14 -10
- data/contrib/zstd/{common → lib/common}/fse.h +173 -92
- data/contrib/zstd/{common → lib/common}/fse_decompress.c +149 -85
- data/contrib/zstd/lib/common/huf.h +361 -0
- data/contrib/zstd/{common → lib/common}/mem.h +115 -59
- data/contrib/zstd/lib/common/pool.c +350 -0
- data/contrib/zstd/lib/common/pool.h +84 -0
- data/contrib/zstd/lib/common/threading.c +122 -0
- data/contrib/zstd/lib/common/threading.h +155 -0
- data/contrib/zstd/{common → lib/common}/xxhash.c +55 -96
- data/contrib/zstd/{common → lib/common}/xxhash.h +23 -47
- data/contrib/zstd/lib/common/zstd_common.c +83 -0
- data/contrib/zstd/lib/common/zstd_deps.h +111 -0
- data/contrib/zstd/lib/common/zstd_errors.h +95 -0
- data/contrib/zstd/lib/common/zstd_internal.h +478 -0
- data/contrib/zstd/{compress → lib/compress}/fse_compress.c +214 -319
- data/contrib/zstd/lib/compress/hist.c +181 -0
- data/contrib/zstd/lib/compress/hist.h +75 -0
- data/contrib/zstd/lib/compress/huf_compress.c +913 -0
- data/contrib/zstd/lib/compress/zstd_compress.c +5208 -0
- data/contrib/zstd/lib/compress/zstd_compress_internal.h +1203 -0
- data/contrib/zstd/lib/compress/zstd_compress_literals.c +158 -0
- data/contrib/zstd/lib/compress/zstd_compress_literals.h +29 -0
- data/contrib/zstd/lib/compress/zstd_compress_sequences.c +433 -0
- data/contrib/zstd/lib/compress/zstd_compress_sequences.h +54 -0
- data/contrib/zstd/lib/compress/zstd_compress_superblock.c +849 -0
- data/contrib/zstd/lib/compress/zstd_compress_superblock.h +32 -0
- data/contrib/zstd/lib/compress/zstd_cwksp.h +561 -0
- data/contrib/zstd/lib/compress/zstd_double_fast.c +521 -0
- data/contrib/zstd/lib/compress/zstd_double_fast.h +38 -0
- data/contrib/zstd/lib/compress/zstd_fast.c +496 -0
- data/contrib/zstd/lib/compress/zstd_fast.h +37 -0
- data/contrib/zstd/lib/compress/zstd_lazy.c +1412 -0
- data/contrib/zstd/lib/compress/zstd_lazy.h +87 -0
- data/contrib/zstd/lib/compress/zstd_ldm.c +660 -0
- data/contrib/zstd/lib/compress/zstd_ldm.h +116 -0
- data/contrib/zstd/lib/compress/zstd_opt.c +1345 -0
- data/contrib/zstd/lib/compress/zstd_opt.h +56 -0
- data/contrib/zstd/lib/compress/zstdmt_compress.c +1811 -0
- data/contrib/zstd/lib/compress/zstdmt_compress.h +110 -0
- data/contrib/zstd/lib/decompress/huf_decompress.c +1350 -0
- data/contrib/zstd/lib/decompress/zstd_ddict.c +244 -0
- data/contrib/zstd/lib/decompress/zstd_ddict.h +44 -0
- data/contrib/zstd/lib/decompress/zstd_decompress.c +1930 -0
- data/contrib/zstd/lib/decompress/zstd_decompress_block.c +1540 -0
- data/contrib/zstd/lib/decompress/zstd_decompress_block.h +62 -0
- data/contrib/zstd/lib/decompress/zstd_decompress_internal.h +190 -0
- data/contrib/zstd/{common → lib/deprecated}/zbuff.h +68 -45
- data/contrib/zstd/lib/deprecated/zbuff_common.c +26 -0
- data/contrib/zstd/lib/deprecated/zbuff_compress.c +147 -0
- data/contrib/zstd/lib/deprecated/zbuff_decompress.c +75 -0
- data/contrib/zstd/lib/dictBuilder/cover.c +1245 -0
- data/contrib/zstd/lib/dictBuilder/cover.h +157 -0
- data/contrib/zstd/{dictBuilder → lib/dictBuilder}/divsufsort.c +3 -3
- data/contrib/zstd/{dictBuilder → lib/dictBuilder}/divsufsort.h +0 -0
- data/contrib/zstd/lib/dictBuilder/fastcover.c +758 -0
- data/contrib/zstd/{dictBuilder → lib/dictBuilder}/zdict.c +318 -194
- data/contrib/zstd/lib/dictBuilder/zdict.h +305 -0
- data/contrib/zstd/{legacy → lib/legacy}/zstd_legacy.h +171 -15
- data/contrib/zstd/{legacy → lib/legacy}/zstd_v01.c +191 -124
- data/contrib/zstd/{legacy → lib/legacy}/zstd_v01.h +19 -5
- data/contrib/zstd/{legacy → lib/legacy}/zstd_v02.c +125 -125
- data/contrib/zstd/{legacy → lib/legacy}/zstd_v02.h +19 -5
- data/contrib/zstd/{legacy → lib/legacy}/zstd_v03.c +125 -124
- data/contrib/zstd/{legacy → lib/legacy}/zstd_v03.h +20 -6
- data/contrib/zstd/{legacy → lib/legacy}/zstd_v04.c +151 -299
- data/contrib/zstd/{legacy → lib/legacy}/zstd_v04.h +19 -5
- data/contrib/zstd/{legacy → lib/legacy}/zstd_v05.c +237 -243
- data/contrib/zstd/{legacy → lib/legacy}/zstd_v05.h +19 -6
- data/contrib/zstd/{legacy → lib/legacy}/zstd_v06.c +130 -143
- data/contrib/zstd/{legacy → lib/legacy}/zstd_v06.h +18 -5
- data/contrib/zstd/{legacy → lib/legacy}/zstd_v07.c +158 -157
- data/contrib/zstd/{legacy → lib/legacy}/zstd_v07.h +19 -5
- data/contrib/zstd/lib/libzstd.pc.in +15 -0
- data/contrib/zstd/lib/zstd.h +2391 -0
- data/ext/depend +2 -0
- data/ext/extconf.rb +15 -6
- data/ext/extzstd.c +76 -145
- data/ext/extzstd.h +80 -31
- data/ext/extzstd_stream.c +417 -142
- data/ext/libzstd_conf.h +8 -0
- data/ext/zstd_common.c +10 -7
- data/ext/zstd_compress.c +14 -5
- data/ext/zstd_decompress.c +5 -4
- data/ext/zstd_dictbuilder.c +9 -4
- data/ext/zstd_dictbuilder_fastcover.c +3 -0
- data/ext/zstd_legacy_v01.c +3 -1
- data/ext/zstd_legacy_v02.c +3 -1
- data/ext/zstd_legacy_v03.c +3 -1
- data/ext/zstd_legacy_v04.c +3 -1
- data/ext/zstd_legacy_v05.c +3 -1
- data/ext/zstd_legacy_v06.c +3 -1
- data/ext/zstd_legacy_v07.c +3 -1
- data/gemstub.rb +10 -24
- data/lib/extzstd.rb +64 -179
- data/lib/extzstd/version.rb +6 -1
- data/test/test_basic.rb +9 -6
- metadata +113 -57
- data/HISTORY.ja +0 -5
- data/contrib/zstd/common/entropy_common.c +0 -225
- data/contrib/zstd/common/huf.h +0 -228
- data/contrib/zstd/common/zstd_common.c +0 -83
- data/contrib/zstd/common/zstd_errors.h +0 -60
- data/contrib/zstd/common/zstd_internal.h +0 -267
- data/contrib/zstd/compress/huf_compress.c +0 -533
- data/contrib/zstd/compress/zbuff_compress.c +0 -319
- data/contrib/zstd/compress/zstd_compress.c +0 -3264
- data/contrib/zstd/compress/zstd_opt.h +0 -900
- data/contrib/zstd/decompress/huf_decompress.c +0 -883
- data/contrib/zstd/decompress/zbuff_decompress.c +0 -252
- data/contrib/zstd/decompress/zstd_decompress.c +0 -1842
- data/contrib/zstd/dictBuilder/zdict.h +0 -111
- data/contrib/zstd/zstd.h +0 -640
|
@@ -1,18 +1,20 @@
|
|
|
1
|
-
|
|
2
|
-
* Copyright (c) 2016-
|
|
1
|
+
/*
|
|
2
|
+
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
|
|
3
3
|
* All rights reserved.
|
|
4
4
|
*
|
|
5
|
-
* This source code is licensed under the BSD-style license found in the
|
|
6
|
-
* LICENSE file in the root directory of this source tree
|
|
7
|
-
*
|
|
5
|
+
* This source code is licensed under both the BSD-style license (found in the
|
|
6
|
+
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
|
7
|
+
* in the COPYING file in the root directory of this source tree).
|
|
8
|
+
* You may select, at your option, one of the above-listed licenses.
|
|
8
9
|
*/
|
|
9
10
|
|
|
10
11
|
|
|
11
12
|
/*-**************************************
|
|
12
13
|
* Tuning parameters
|
|
13
14
|
****************************************/
|
|
15
|
+
#define MINRATIO 4 /* minimum nb of apparition to be selected in dictionary */
|
|
14
16
|
#define ZDICT_MAX_SAMPLES_SIZE (2000U << 20)
|
|
15
|
-
#define ZDICT_MIN_SAMPLES_SIZE
|
|
17
|
+
#define ZDICT_MIN_SAMPLES_SIZE (ZDICT_CONTENTSIZE_MIN * MINRATIO)
|
|
16
18
|
|
|
17
19
|
|
|
18
20
|
/*-**************************************
|
|
@@ -35,18 +37,18 @@
|
|
|
35
37
|
#include <stdio.h> /* fprintf, fopen, ftello64 */
|
|
36
38
|
#include <time.h> /* clock */
|
|
37
39
|
|
|
38
|
-
#include "mem.h" /* read */
|
|
39
|
-
#include "
|
|
40
|
-
#include "fse.h" /* FSE_normalizeCount, FSE_writeNCount */
|
|
40
|
+
#include "../common/mem.h" /* read */
|
|
41
|
+
#include "../common/fse.h" /* FSE_normalizeCount, FSE_writeNCount */
|
|
41
42
|
#define HUF_STATIC_LINKING_ONLY
|
|
42
|
-
#include "huf.h"
|
|
43
|
-
#include "zstd_internal.h" /* includes zstd.h */
|
|
44
|
-
#include "xxhash.h"
|
|
43
|
+
#include "../common/huf.h" /* HUF_buildCTable, HUF_writeCTable */
|
|
44
|
+
#include "../common/zstd_internal.h" /* includes zstd.h */
|
|
45
|
+
#include "../common/xxhash.h" /* XXH64 */
|
|
45
46
|
#include "divsufsort.h"
|
|
46
47
|
#ifndef ZDICT_STATIC_LINKING_ONLY
|
|
47
48
|
# define ZDICT_STATIC_LINKING_ONLY
|
|
48
49
|
#endif
|
|
49
50
|
#include "zdict.h"
|
|
51
|
+
#include "../compress/zstd_compress_internal.h" /* ZSTD_loadCEntropy() */
|
|
50
52
|
|
|
51
53
|
|
|
52
54
|
/*-*************************************
|
|
@@ -60,17 +62,15 @@
|
|
|
60
62
|
|
|
61
63
|
#define NOISELENGTH 32
|
|
62
64
|
|
|
63
|
-
#define MINRATIO 4
|
|
64
|
-
static const int g_compressionLevel_default = 5;
|
|
65
65
|
static const U32 g_selectivity_default = 9;
|
|
66
|
-
static const size_t g_provision_entropySize = 200;
|
|
67
|
-
static const size_t g_min_fast_dictContent = 192;
|
|
68
66
|
|
|
69
67
|
|
|
70
68
|
/*-*************************************
|
|
71
69
|
* Console display
|
|
72
70
|
***************************************/
|
|
71
|
+
#undef DISPLAY
|
|
73
72
|
#define DISPLAY(...) { fprintf(stderr, __VA_ARGS__); fflush( stderr ); }
|
|
73
|
+
#undef DISPLAYLEVEL
|
|
74
74
|
#define DISPLAYLEVEL(l, ...) if (notificationLevel>=l) { DISPLAY(__VA_ARGS__); } /* 0 : no display; 1: errors; 2: default; 3: details; 4: debug */
|
|
75
75
|
|
|
76
76
|
static clock_t ZDICT_clockSpan(clock_t nPrevious) { return clock() - nPrevious; }
|
|
@@ -97,15 +97,35 @@ const char* ZDICT_getErrorName(size_t errorCode) { return ERR_getErrorName(error
|
|
|
97
97
|
unsigned ZDICT_getDictID(const void* dictBuffer, size_t dictSize)
|
|
98
98
|
{
|
|
99
99
|
if (dictSize < 8) return 0;
|
|
100
|
-
if (MEM_readLE32(dictBuffer) !=
|
|
100
|
+
if (MEM_readLE32(dictBuffer) != ZSTD_MAGIC_DICTIONARY) return 0;
|
|
101
101
|
return MEM_readLE32((const char*)dictBuffer + 4);
|
|
102
102
|
}
|
|
103
103
|
|
|
104
|
+
size_t ZDICT_getDictHeaderSize(const void* dictBuffer, size_t dictSize)
|
|
105
|
+
{
|
|
106
|
+
size_t headerSize;
|
|
107
|
+
if (dictSize <= 8 || MEM_readLE32(dictBuffer) != ZSTD_MAGIC_DICTIONARY) return ERROR(dictionary_corrupted);
|
|
108
|
+
|
|
109
|
+
{ ZSTD_compressedBlockState_t* bs = (ZSTD_compressedBlockState_t*)malloc(sizeof(ZSTD_compressedBlockState_t));
|
|
110
|
+
U32* wksp = (U32*)malloc(HUF_WORKSPACE_SIZE);
|
|
111
|
+
if (!bs || !wksp) {
|
|
112
|
+
headerSize = ERROR(memory_allocation);
|
|
113
|
+
} else {
|
|
114
|
+
ZSTD_reset_compressedBlockState(bs);
|
|
115
|
+
headerSize = ZSTD_loadCEntropy(bs, wksp, dictBuffer, dictSize);
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
free(bs);
|
|
119
|
+
free(wksp);
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
return headerSize;
|
|
123
|
+
}
|
|
104
124
|
|
|
105
125
|
/*-********************************************************
|
|
106
126
|
* Dictionary training functions
|
|
107
127
|
**********************************************************/
|
|
108
|
-
static unsigned ZDICT_NbCommonBytes (
|
|
128
|
+
static unsigned ZDICT_NbCommonBytes (size_t val)
|
|
109
129
|
{
|
|
110
130
|
if (MEM_isLittleEndian()) {
|
|
111
131
|
if (MEM_64bits()) {
|
|
@@ -209,7 +229,6 @@ static dictItem ZDICT_analyzePos(
|
|
|
209
229
|
U32 cumulLength[LLIMIT] = {0};
|
|
210
230
|
U32 savings[LLIMIT] = {0};
|
|
211
231
|
const BYTE* b = (const BYTE*)buffer;
|
|
212
|
-
size_t length;
|
|
213
232
|
size_t maxLength = LLIMIT;
|
|
214
233
|
size_t pos = suffix[start];
|
|
215
234
|
U32 end = start;
|
|
@@ -224,26 +243,30 @@ static dictItem ZDICT_analyzePos(
|
|
|
224
243
|
||(MEM_read16(b+pos+1) == MEM_read16(b+pos+3))
|
|
225
244
|
||(MEM_read16(b+pos+2) == MEM_read16(b+pos+4)) ) {
|
|
226
245
|
/* skip and mark segment */
|
|
227
|
-
U16
|
|
228
|
-
U32 u,
|
|
229
|
-
while (MEM_read16(b+pos+
|
|
230
|
-
if (b[pos+
|
|
231
|
-
for (u=1; u<
|
|
246
|
+
U16 const pattern16 = MEM_read16(b+pos+4);
|
|
247
|
+
U32 u, patternEnd = 6;
|
|
248
|
+
while (MEM_read16(b+pos+patternEnd) == pattern16) patternEnd+=2 ;
|
|
249
|
+
if (b[pos+patternEnd] == b[pos+patternEnd-1]) patternEnd++;
|
|
250
|
+
for (u=1; u<patternEnd; u++)
|
|
232
251
|
doneMarks[pos+u] = 1;
|
|
233
252
|
return solution;
|
|
234
253
|
}
|
|
235
254
|
|
|
236
255
|
/* look forward */
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
256
|
+
{ size_t length;
|
|
257
|
+
do {
|
|
258
|
+
end++;
|
|
259
|
+
length = ZDICT_count(b + pos, b + suffix[end]);
|
|
260
|
+
} while (length >= MINMATCHLENGTH);
|
|
261
|
+
}
|
|
241
262
|
|
|
242
263
|
/* look backward */
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
264
|
+
{ size_t length;
|
|
265
|
+
do {
|
|
266
|
+
length = ZDICT_count(b + pos, b + *(suffix+start-1));
|
|
267
|
+
if (length >=MINMATCHLENGTH) start--;
|
|
268
|
+
} while(length >= MINMATCHLENGTH);
|
|
269
|
+
}
|
|
247
270
|
|
|
248
271
|
/* exit if not found a minimum nb of repetitions */
|
|
249
272
|
if (end-start < minRatio) {
|
|
@@ -254,15 +277,15 @@ static dictItem ZDICT_analyzePos(
|
|
|
254
277
|
}
|
|
255
278
|
|
|
256
279
|
{ int i;
|
|
257
|
-
U32
|
|
280
|
+
U32 mml;
|
|
258
281
|
U32 refinedStart = start;
|
|
259
282
|
U32 refinedEnd = end;
|
|
260
283
|
|
|
261
284
|
DISPLAYLEVEL(4, "\n");
|
|
262
|
-
DISPLAYLEVEL(4, "found %3u matches of length >= %i at pos %7u ", (
|
|
285
|
+
DISPLAYLEVEL(4, "found %3u matches of length >= %i at pos %7u ", (unsigned)(end-start), MINMATCHLENGTH, (unsigned)pos);
|
|
263
286
|
DISPLAYLEVEL(4, "\n");
|
|
264
287
|
|
|
265
|
-
for (
|
|
288
|
+
for (mml = MINMATCHLENGTH ; ; mml++) {
|
|
266
289
|
BYTE currentChar = 0;
|
|
267
290
|
U32 currentCount = 0;
|
|
268
291
|
U32 currentID = refinedStart;
|
|
@@ -270,13 +293,13 @@ static dictItem ZDICT_analyzePos(
|
|
|
270
293
|
U32 selectedCount = 0;
|
|
271
294
|
U32 selectedID = currentID;
|
|
272
295
|
for (id =refinedStart; id < refinedEnd; id++) {
|
|
273
|
-
if (b[
|
|
296
|
+
if (b[suffix[id] + mml] != currentChar) {
|
|
274
297
|
if (currentCount > selectedCount) {
|
|
275
298
|
selectedCount = currentCount;
|
|
276
299
|
selectedID = currentID;
|
|
277
300
|
}
|
|
278
301
|
currentID = id;
|
|
279
|
-
currentChar = b[ suffix[id] +
|
|
302
|
+
currentChar = b[ suffix[id] + mml];
|
|
280
303
|
currentCount = 0;
|
|
281
304
|
}
|
|
282
305
|
currentCount ++;
|
|
@@ -292,28 +315,31 @@ static dictItem ZDICT_analyzePos(
|
|
|
292
315
|
refinedEnd = refinedStart + selectedCount;
|
|
293
316
|
}
|
|
294
317
|
|
|
295
|
-
/* evaluate gain based on new
|
|
318
|
+
/* evaluate gain based on new dict */
|
|
296
319
|
start = refinedStart;
|
|
297
320
|
pos = suffix[refinedStart];
|
|
298
321
|
end = start;
|
|
299
322
|
memset(lengthList, 0, sizeof(lengthList));
|
|
300
323
|
|
|
301
324
|
/* look forward */
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
325
|
+
{ size_t length;
|
|
326
|
+
do {
|
|
327
|
+
end++;
|
|
328
|
+
length = ZDICT_count(b + pos, b + suffix[end]);
|
|
329
|
+
if (length >= LLIMIT) length = LLIMIT-1;
|
|
330
|
+
lengthList[length]++;
|
|
331
|
+
} while (length >=MINMATCHLENGTH);
|
|
332
|
+
}
|
|
308
333
|
|
|
309
334
|
/* look backward */
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
335
|
+
{ size_t length = MINMATCHLENGTH;
|
|
336
|
+
while ((length >= MINMATCHLENGTH) & (start > 0)) {
|
|
337
|
+
length = ZDICT_count(b + pos, b + suffix[start - 1]);
|
|
338
|
+
if (length >= LLIMIT) length = LLIMIT - 1;
|
|
339
|
+
lengthList[length]++;
|
|
340
|
+
if (length >= MINMATCHLENGTH) start--;
|
|
341
|
+
}
|
|
342
|
+
}
|
|
317
343
|
|
|
318
344
|
/* largest useful length */
|
|
319
345
|
memset(cumulLength, 0, sizeof(cumulLength));
|
|
@@ -337,8 +363,8 @@ static dictItem ZDICT_analyzePos(
|
|
|
337
363
|
for (i=MINMATCHLENGTH; i<=(int)maxLength; i++)
|
|
338
364
|
savings[i] = savings[i-1] + (lengthList[i] * (i-3));
|
|
339
365
|
|
|
340
|
-
DISPLAYLEVEL(4, "Selected
|
|
341
|
-
(
|
|
366
|
+
DISPLAYLEVEL(4, "Selected dict at position %u, of length %u : saves %u (ratio: %.2f) \n",
|
|
367
|
+
(unsigned)pos, (unsigned)maxLength, (unsigned)savings[maxLength], (double)savings[maxLength] / maxLength);
|
|
342
368
|
|
|
343
369
|
solution.pos = (U32)pos;
|
|
344
370
|
solution.length = (U32)maxLength;
|
|
@@ -347,12 +373,12 @@ static dictItem ZDICT_analyzePos(
|
|
|
347
373
|
/* mark positions done */
|
|
348
374
|
{ U32 id;
|
|
349
375
|
for (id=start; id<end; id++) {
|
|
350
|
-
U32 p, pEnd;
|
|
376
|
+
U32 p, pEnd, length;
|
|
351
377
|
U32 const testedPos = suffix[id];
|
|
352
378
|
if (testedPos == pos)
|
|
353
379
|
length = solution.length;
|
|
354
380
|
else {
|
|
355
|
-
length = ZDICT_count(b+pos, b+testedPos);
|
|
381
|
+
length = (U32)ZDICT_count(b+pos, b+testedPos);
|
|
356
382
|
if (length > solution.length) length = solution.length;
|
|
357
383
|
}
|
|
358
384
|
pEnd = (U32)(testedPos + length);
|
|
@@ -364,21 +390,35 @@ static dictItem ZDICT_analyzePos(
|
|
|
364
390
|
}
|
|
365
391
|
|
|
366
392
|
|
|
367
|
-
|
|
393
|
+
static int isIncluded(const void* in, const void* container, size_t length)
|
|
394
|
+
{
|
|
395
|
+
const char* const ip = (const char*) in;
|
|
396
|
+
const char* const into = (const char*) container;
|
|
397
|
+
size_t u;
|
|
398
|
+
|
|
399
|
+
for (u=0; u<length; u++) { /* works because end of buffer is a noisy guard band */
|
|
400
|
+
if (ip[u] != into[u]) break;
|
|
401
|
+
}
|
|
402
|
+
|
|
403
|
+
return u==length;
|
|
404
|
+
}
|
|
405
|
+
|
|
406
|
+
/*! ZDICT_tryMerge() :
|
|
368
407
|
check if dictItem can be merged, do it if possible
|
|
369
408
|
@return : id of destination elt, 0 if not merged
|
|
370
409
|
*/
|
|
371
|
-
static U32
|
|
410
|
+
static U32 ZDICT_tryMerge(dictItem* table, dictItem elt, U32 eltNbToSkip, const void* buffer)
|
|
372
411
|
{
|
|
373
412
|
const U32 tableSize = table->pos;
|
|
374
413
|
const U32 eltEnd = elt.pos + elt.length;
|
|
414
|
+
const char* const buf = (const char*) buffer;
|
|
375
415
|
|
|
376
416
|
/* tail overlap */
|
|
377
417
|
U32 u; for (u=1; u<tableSize; u++) {
|
|
378
418
|
if (u==eltNbToSkip) continue;
|
|
379
419
|
if ((table[u].pos > elt.pos) && (table[u].pos <= eltEnd)) { /* overlap, existing > new */
|
|
380
420
|
/* append */
|
|
381
|
-
U32 addedLength = table[u].pos - elt.pos;
|
|
421
|
+
U32 const addedLength = table[u].pos - elt.pos;
|
|
382
422
|
table[u].length += addedLength;
|
|
383
423
|
table[u].pos = elt.pos;
|
|
384
424
|
table[u].savings += elt.savings * addedLength / elt.length; /* rough approx */
|
|
@@ -394,9 +434,10 @@ static U32 ZDICT_checkMerge(dictItem* table, dictItem elt, U32 eltNbToSkip)
|
|
|
394
434
|
/* front overlap */
|
|
395
435
|
for (u=1; u<tableSize; u++) {
|
|
396
436
|
if (u==eltNbToSkip) continue;
|
|
437
|
+
|
|
397
438
|
if ((table[u].pos + table[u].length >= elt.pos) && (table[u].pos < elt.pos)) { /* overlap, existing < new */
|
|
398
439
|
/* append */
|
|
399
|
-
int addedLength = (int)eltEnd - (table[u].pos + table[u].length);
|
|
440
|
+
int const addedLength = (int)eltEnd - (table[u].pos + table[u].length);
|
|
400
441
|
table[u].savings += elt.length / 8; /* rough approx bonus */
|
|
401
442
|
if (addedLength > 0) { /* otherwise, elt fully included into existing */
|
|
402
443
|
table[u].length += addedLength;
|
|
@@ -408,7 +449,18 @@ static U32 ZDICT_checkMerge(dictItem* table, dictItem elt, U32 eltNbToSkip)
|
|
|
408
449
|
table[u] = table[u-1], u--;
|
|
409
450
|
table[u] = elt;
|
|
410
451
|
return u;
|
|
411
|
-
|
|
452
|
+
}
|
|
453
|
+
|
|
454
|
+
if (MEM_read64(buf + table[u].pos) == MEM_read64(buf + elt.pos + 1)) {
|
|
455
|
+
if (isIncluded(buf + table[u].pos, buf + elt.pos + 1, table[u].length)) {
|
|
456
|
+
size_t const addedLength = MAX( (int)elt.length - (int)table[u].length , 1 );
|
|
457
|
+
table[u].pos = elt.pos;
|
|
458
|
+
table[u].savings += (U32)(elt.savings * addedLength / elt.length);
|
|
459
|
+
table[u].length = MIN(elt.length, table[u].length + 1);
|
|
460
|
+
return u;
|
|
461
|
+
}
|
|
462
|
+
}
|
|
463
|
+
}
|
|
412
464
|
|
|
413
465
|
return 0;
|
|
414
466
|
}
|
|
@@ -416,8 +468,8 @@ static U32 ZDICT_checkMerge(dictItem* table, dictItem elt, U32 eltNbToSkip)
|
|
|
416
468
|
|
|
417
469
|
static void ZDICT_removeDictItem(dictItem* table, U32 id)
|
|
418
470
|
{
|
|
419
|
-
/* convention :
|
|
420
|
-
U32 const max = table
|
|
471
|
+
/* convention : table[0].pos stores nb of elts */
|
|
472
|
+
U32 const max = table[0].pos;
|
|
421
473
|
U32 u;
|
|
422
474
|
if (!id) return; /* protection, should never happen */
|
|
423
475
|
for (u=id; u<max-1; u++)
|
|
@@ -426,14 +478,14 @@ static void ZDICT_removeDictItem(dictItem* table, U32 id)
|
|
|
426
478
|
}
|
|
427
479
|
|
|
428
480
|
|
|
429
|
-
static void ZDICT_insertDictItem(dictItem* table, U32 maxSize, dictItem elt)
|
|
481
|
+
static void ZDICT_insertDictItem(dictItem* table, U32 maxSize, dictItem elt, const void* buffer)
|
|
430
482
|
{
|
|
431
483
|
/* merge if possible */
|
|
432
|
-
U32 mergeId =
|
|
484
|
+
U32 mergeId = ZDICT_tryMerge(table, elt, 0, buffer);
|
|
433
485
|
if (mergeId) {
|
|
434
486
|
U32 newMerge = 1;
|
|
435
487
|
while (newMerge) {
|
|
436
|
-
newMerge =
|
|
488
|
+
newMerge = ZDICT_tryMerge(table, table[mergeId], mergeId, buffer);
|
|
437
489
|
if (newMerge) ZDICT_removeDictItem(table, mergeId);
|
|
438
490
|
mergeId = newMerge;
|
|
439
491
|
}
|
|
@@ -464,10 +516,10 @@ static U32 ZDICT_dictSize(const dictItem* dictList)
|
|
|
464
516
|
}
|
|
465
517
|
|
|
466
518
|
|
|
467
|
-
static size_t
|
|
519
|
+
static size_t ZDICT_trainBuffer_legacy(dictItem* dictList, U32 dictListSize,
|
|
468
520
|
const void* const buffer, size_t bufferSize, /* buffer must end with noisy guard band */
|
|
469
521
|
const size_t* fileSizes, unsigned nbFiles,
|
|
470
|
-
|
|
522
|
+
unsigned minRatio, U32 notificationLevel)
|
|
471
523
|
{
|
|
472
524
|
int* const suffix0 = (int*)malloc((bufferSize+2)*sizeof(*suffix0));
|
|
473
525
|
int* const suffix = suffix0+1;
|
|
@@ -478,10 +530,11 @@ static size_t ZDICT_trainBuffer(dictItem* dictList, U32 dictListSize,
|
|
|
478
530
|
clock_t displayClock = 0;
|
|
479
531
|
clock_t const refreshRate = CLOCKS_PER_SEC * 3 / 10;
|
|
480
532
|
|
|
533
|
+
# undef DISPLAYUPDATE
|
|
481
534
|
# define DISPLAYUPDATE(l, ...) if (notificationLevel>=l) { \
|
|
482
535
|
if (ZDICT_clockSpan(displayClock) > refreshRate) \
|
|
483
536
|
{ displayClock = clock(); DISPLAY(__VA_ARGS__); \
|
|
484
|
-
if (notificationLevel>=4) fflush(
|
|
537
|
+
if (notificationLevel>=4) fflush(stderr); } }
|
|
485
538
|
|
|
486
539
|
/* init */
|
|
487
540
|
DISPLAYLEVEL(2, "\r%70s\r", ""); /* clean display line */
|
|
@@ -493,11 +546,11 @@ static size_t ZDICT_trainBuffer(dictItem* dictList, U32 dictListSize,
|
|
|
493
546
|
memset(doneMarks, 0, bufferSize+16);
|
|
494
547
|
|
|
495
548
|
/* limit sample set size (divsufsort limitation)*/
|
|
496
|
-
if (bufferSize > ZDICT_MAX_SAMPLES_SIZE) DISPLAYLEVEL(3, "sample set too large : reduced to %u MB ...\n", (
|
|
549
|
+
if (bufferSize > ZDICT_MAX_SAMPLES_SIZE) DISPLAYLEVEL(3, "sample set too large : reduced to %u MB ...\n", (unsigned)(ZDICT_MAX_SAMPLES_SIZE>>20));
|
|
497
550
|
while (bufferSize > ZDICT_MAX_SAMPLES_SIZE) bufferSize -= fileSizes[--nbFiles];
|
|
498
551
|
|
|
499
552
|
/* sort */
|
|
500
|
-
DISPLAYLEVEL(2, "sorting %u files of total size %u MB ...\n", nbFiles, (
|
|
553
|
+
DISPLAYLEVEL(2, "sorting %u files of total size %u MB ...\n", nbFiles, (unsigned)(bufferSize>>20));
|
|
501
554
|
{ int const divSuftSortResult = divsufsort((const unsigned char*)buffer, suffix, (int)bufferSize, 0);
|
|
502
555
|
if (divSuftSortResult != 0) { result = ERROR(GENERIC); goto _cleanup; }
|
|
503
556
|
}
|
|
@@ -522,7 +575,7 @@ static size_t ZDICT_trainBuffer(dictItem* dictList, U32 dictListSize,
|
|
|
522
575
|
if (doneMarks[cursor]) { cursor++; continue; }
|
|
523
576
|
solution = ZDICT_analyzePos(doneMarks, suffix, reverseSuffix[cursor], buffer, minRatio, notificationLevel);
|
|
524
577
|
if (solution.length==0) { cursor++; continue; }
|
|
525
|
-
ZDICT_insertDictItem(dictList, dictListSize, solution);
|
|
578
|
+
ZDICT_insertDictItem(dictList, dictListSize, solution, buffer);
|
|
526
579
|
cursor += solution.length;
|
|
527
580
|
DISPLAYUPDATE(2, "\r%4.2f %% \r", (double)cursor / bufferSize * 100);
|
|
528
581
|
} }
|
|
@@ -541,7 +594,7 @@ static void ZDICT_fillNoise(void* buffer, size_t length)
|
|
|
541
594
|
unsigned const prime1 = 2654435761U;
|
|
542
595
|
unsigned const prime2 = 2246822519U;
|
|
543
596
|
unsigned acc = prime1;
|
|
544
|
-
size_t p=0
|
|
597
|
+
size_t p=0;
|
|
545
598
|
for (p=0; p<length; p++) {
|
|
546
599
|
acc *= prime2;
|
|
547
600
|
((unsigned char*)buffer)[p] = (unsigned char)(acc >> 21);
|
|
@@ -551,29 +604,31 @@ static void ZDICT_fillNoise(void* buffer, size_t length)
|
|
|
551
604
|
|
|
552
605
|
typedef struct
|
|
553
606
|
{
|
|
554
|
-
|
|
555
|
-
ZSTD_CCtx* zc;
|
|
556
|
-
void* workPlace; /* must be
|
|
607
|
+
ZSTD_CDict* dict; /* dictionary */
|
|
608
|
+
ZSTD_CCtx* zc; /* working context */
|
|
609
|
+
void* workPlace; /* must be ZSTD_BLOCKSIZE_MAX allocated */
|
|
557
610
|
} EStats_ress_t;
|
|
558
611
|
|
|
559
612
|
#define MAXREPOFFSET 1024
|
|
560
613
|
|
|
561
|
-
static void ZDICT_countEStats(EStats_ress_t esr, ZSTD_parameters params,
|
|
562
|
-
|
|
563
|
-
|
|
614
|
+
static void ZDICT_countEStats(EStats_ress_t esr, const ZSTD_parameters* params,
|
|
615
|
+
unsigned* countLit, unsigned* offsetcodeCount, unsigned* matchlengthCount, unsigned* litlengthCount, U32* repOffsets,
|
|
616
|
+
const void* src, size_t srcSize,
|
|
617
|
+
U32 notificationLevel)
|
|
564
618
|
{
|
|
565
|
-
size_t const blockSizeMax = MIN (
|
|
619
|
+
size_t const blockSizeMax = MIN (ZSTD_BLOCKSIZE_MAX, 1 << params->cParams.windowLog);
|
|
566
620
|
size_t cSize;
|
|
567
621
|
|
|
568
622
|
if (srcSize > blockSizeMax) srcSize = blockSizeMax; /* protection vs large samples */
|
|
569
|
-
{
|
|
570
|
-
|
|
623
|
+
{ size_t const errorCode = ZSTD_compressBegin_usingCDict(esr.zc, esr.dict);
|
|
624
|
+
if (ZSTD_isError(errorCode)) { DISPLAYLEVEL(1, "warning : ZSTD_compressBegin_usingCDict failed \n"); return; }
|
|
625
|
+
|
|
571
626
|
}
|
|
572
|
-
cSize = ZSTD_compressBlock(esr.zc, esr.workPlace,
|
|
573
|
-
if (ZSTD_isError(cSize)) { DISPLAYLEVEL(
|
|
627
|
+
cSize = ZSTD_compressBlock(esr.zc, esr.workPlace, ZSTD_BLOCKSIZE_MAX, src, srcSize);
|
|
628
|
+
if (ZSTD_isError(cSize)) { DISPLAYLEVEL(3, "warning : could not compress sample size %u \n", (unsigned)srcSize); return; }
|
|
574
629
|
|
|
575
630
|
if (cSize) { /* if == 0; block is not compressible */
|
|
576
|
-
const seqStore_t* seqStorePtr = ZSTD_getSeqStore(esr.zc);
|
|
631
|
+
const seqStore_t* const seqStorePtr = ZSTD_getSeqStore(esr.zc);
|
|
577
632
|
|
|
578
633
|
/* literals stats */
|
|
579
634
|
{ const BYTE* bytePtr;
|
|
@@ -611,17 +666,6 @@ static void ZDICT_countEStats(EStats_ress_t esr, ZSTD_parameters params,
|
|
|
611
666
|
} } }
|
|
612
667
|
}
|
|
613
668
|
|
|
614
|
-
/*
|
|
615
|
-
static size_t ZDICT_maxSampleSize(const size_t* fileSizes, unsigned nbFiles)
|
|
616
|
-
{
|
|
617
|
-
unsigned u;
|
|
618
|
-
size_t max=0;
|
|
619
|
-
for (u=0; u<nbFiles; u++)
|
|
620
|
-
if (max < fileSizes[u]) max = fileSizes[u];
|
|
621
|
-
return max;
|
|
622
|
-
}
|
|
623
|
-
*/
|
|
624
|
-
|
|
625
669
|
static size_t ZDICT_totalSampleSize(const size_t* fileSizes, unsigned nbFiles)
|
|
626
670
|
{
|
|
627
671
|
size_t total=0;
|
|
@@ -646,26 +690,38 @@ static void ZDICT_insertSortCount(offsetCount_t table[ZSTD_REP_NUM+1], U32 val,
|
|
|
646
690
|
}
|
|
647
691
|
}
|
|
648
692
|
|
|
693
|
+
/* ZDICT_flatLit() :
|
|
694
|
+
* rewrite `countLit` to contain a mostly flat but still compressible distribution of literals.
|
|
695
|
+
* necessary to avoid generating a non-compressible distribution that HUF_writeCTable() cannot encode.
|
|
696
|
+
*/
|
|
697
|
+
static void ZDICT_flatLit(unsigned* countLit)
|
|
698
|
+
{
|
|
699
|
+
int u;
|
|
700
|
+
for (u=1; u<256; u++) countLit[u] = 2;
|
|
701
|
+
countLit[0] = 4;
|
|
702
|
+
countLit[253] = 1;
|
|
703
|
+
countLit[254] = 1;
|
|
704
|
+
}
|
|
649
705
|
|
|
650
706
|
#define OFFCODE_MAX 30 /* only applicable to first block */
|
|
651
707
|
static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|
652
|
-
|
|
708
|
+
int compressionLevel,
|
|
653
709
|
const void* srcBuffer, const size_t* fileSizes, unsigned nbFiles,
|
|
654
710
|
const void* dictBuffer, size_t dictBufferSize,
|
|
655
711
|
unsigned notificationLevel)
|
|
656
712
|
{
|
|
657
|
-
|
|
713
|
+
unsigned countLit[256];
|
|
658
714
|
HUF_CREATE_STATIC_CTABLE(hufTable, 255);
|
|
659
|
-
|
|
715
|
+
unsigned offcodeCount[OFFCODE_MAX+1];
|
|
660
716
|
short offcodeNCount[OFFCODE_MAX+1];
|
|
661
717
|
U32 offcodeMax = ZSTD_highbit32((U32)(dictBufferSize + 128 KB));
|
|
662
|
-
|
|
718
|
+
unsigned matchLengthCount[MaxML+1];
|
|
663
719
|
short matchLengthNCount[MaxML+1];
|
|
664
|
-
|
|
720
|
+
unsigned litLengthCount[MaxLL+1];
|
|
665
721
|
short litLengthNCount[MaxLL+1];
|
|
666
722
|
U32 repOffset[MAXREPOFFSET];
|
|
667
723
|
offsetCount_t bestRepOffset[ZSTD_REP_NUM+1];
|
|
668
|
-
EStats_ress_t esr;
|
|
724
|
+
EStats_ress_t esr = { NULL, NULL, NULL };
|
|
669
725
|
ZSTD_parameters params;
|
|
670
726
|
U32 u, huffLog = 11, Offlog = OffFSELog, mlLog = MLFSELog, llLog = LLFSELog, total;
|
|
671
727
|
size_t pos = 0, errorCode;
|
|
@@ -675,48 +731,51 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|
|
675
731
|
BYTE* dstPtr = (BYTE*)dstBuffer;
|
|
676
732
|
|
|
677
733
|
/* init */
|
|
678
|
-
|
|
734
|
+
DEBUGLOG(4, "ZDICT_analyzeEntropy");
|
|
735
|
+
if (offcodeMax>OFFCODE_MAX) { eSize = ERROR(dictionaryCreation_failed); goto _cleanup; } /* too large dictionary */
|
|
736
|
+
for (u=0; u<256; u++) countLit[u] = 1; /* any character must be described */
|
|
737
|
+
for (u=0; u<=offcodeMax; u++) offcodeCount[u] = 1;
|
|
738
|
+
for (u=0; u<=MaxML; u++) matchLengthCount[u] = 1;
|
|
739
|
+
for (u=0; u<=MaxLL; u++) litLengthCount[u] = 1;
|
|
740
|
+
memset(repOffset, 0, sizeof(repOffset));
|
|
741
|
+
repOffset[1] = repOffset[4] = repOffset[8] = 1;
|
|
742
|
+
memset(bestRepOffset, 0, sizeof(bestRepOffset));
|
|
743
|
+
if (compressionLevel==0) compressionLevel = ZSTD_CLEVEL_DEFAULT;
|
|
744
|
+
params = ZSTD_getParams(compressionLevel, averageSampleSize, dictBufferSize);
|
|
745
|
+
|
|
746
|
+
esr.dict = ZSTD_createCDict_advanced(dictBuffer, dictBufferSize, ZSTD_dlm_byRef, ZSTD_dct_rawContent, params.cParams, ZSTD_defaultCMem);
|
|
679
747
|
esr.zc = ZSTD_createCCtx();
|
|
680
|
-
esr.workPlace = malloc(
|
|
681
|
-
if (!esr.
|
|
748
|
+
esr.workPlace = malloc(ZSTD_BLOCKSIZE_MAX);
|
|
749
|
+
if (!esr.dict || !esr.zc || !esr.workPlace) {
|
|
682
750
|
eSize = ERROR(memory_allocation);
|
|
683
751
|
DISPLAYLEVEL(1, "Not enough memory \n");
|
|
684
752
|
goto _cleanup;
|
|
685
753
|
}
|
|
686
|
-
if (offcodeMax>OFFCODE_MAX) { eSize = ERROR(dictionary_wrong); goto _cleanup; } /* too large dictionary */
|
|
687
|
-
for (u=0; u<256; u++) countLit[u]=1; /* any character must be described */
|
|
688
|
-
for (u=0; u<=offcodeMax; u++) offcodeCount[u]=1;
|
|
689
|
-
for (u=0; u<=MaxML; u++) matchLengthCount[u]=1;
|
|
690
|
-
for (u=0; u<=MaxLL; u++) litLengthCount[u]=1;
|
|
691
|
-
memset(repOffset, 0, sizeof(repOffset));
|
|
692
|
-
repOffset[1] = repOffset[4] = repOffset[8] = 1;
|
|
693
|
-
memset(bestRepOffset, 0, sizeof(bestRepOffset));
|
|
694
|
-
if (compressionLevel==0) compressionLevel=g_compressionLevel_default;
|
|
695
|
-
params = ZSTD_getParams(compressionLevel, averageSampleSize, dictBufferSize);
|
|
696
|
-
{ size_t const beginResult = ZSTD_compressBegin_advanced(esr.ref, dictBuffer, dictBufferSize, params, 0);
|
|
697
|
-
if (ZSTD_isError(beginResult)) {
|
|
698
|
-
eSize = ERROR(GENERIC);
|
|
699
|
-
DISPLAYLEVEL(1, "error : ZSTD_compressBegin_advanced failed \n");
|
|
700
|
-
goto _cleanup;
|
|
701
|
-
} }
|
|
702
754
|
|
|
703
|
-
/* collect stats on all
|
|
755
|
+
/* collect stats on all samples */
|
|
704
756
|
for (u=0; u<nbFiles; u++) {
|
|
705
|
-
ZDICT_countEStats(esr, params,
|
|
757
|
+
ZDICT_countEStats(esr, ¶ms,
|
|
706
758
|
countLit, offcodeCount, matchLengthCount, litLengthCount, repOffset,
|
|
707
759
|
(const char*)srcBuffer + pos, fileSizes[u],
|
|
708
760
|
notificationLevel);
|
|
709
761
|
pos += fileSizes[u];
|
|
710
762
|
}
|
|
711
763
|
|
|
712
|
-
/* analyze */
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
|
|
716
|
-
|
|
717
|
-
|
|
764
|
+
/* analyze, build stats, starting with literals */
|
|
765
|
+
{ size_t maxNbBits = HUF_buildCTable (hufTable, countLit, 255, huffLog);
|
|
766
|
+
if (HUF_isError(maxNbBits)) {
|
|
767
|
+
eSize = maxNbBits;
|
|
768
|
+
DISPLAYLEVEL(1, " HUF_buildCTable error \n");
|
|
769
|
+
goto _cleanup;
|
|
770
|
+
}
|
|
771
|
+
if (maxNbBits==8) { /* not compressible : will fail on HUF_writeCTable() */
|
|
772
|
+
DISPLAYLEVEL(2, "warning : pathological dataset : literals are not compressible : samples are noisy or too regular \n");
|
|
773
|
+
ZDICT_flatLit(countLit); /* replace distribution by a fake "mostly flat but still compressible" distribution, that HUF_writeCTable() can encode */
|
|
774
|
+
maxNbBits = HUF_buildCTable (hufTable, countLit, 255, huffLog);
|
|
775
|
+
assert(maxNbBits==9);
|
|
776
|
+
}
|
|
777
|
+
huffLog = (U32)maxNbBits;
|
|
718
778
|
}
|
|
719
|
-
huffLog = (U32)errorCode;
|
|
720
779
|
|
|
721
780
|
/* looking for most common first offsets */
|
|
722
781
|
{ U32 offset;
|
|
@@ -726,27 +785,27 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|
|
726
785
|
/* note : the result of this phase should be used to better appreciate the impact on statistics */
|
|
727
786
|
|
|
728
787
|
total=0; for (u=0; u<=offcodeMax; u++) total+=offcodeCount[u];
|
|
729
|
-
errorCode = FSE_normalizeCount(offcodeNCount, Offlog, offcodeCount, total, offcodeMax);
|
|
788
|
+
errorCode = FSE_normalizeCount(offcodeNCount, Offlog, offcodeCount, total, offcodeMax, /* useLowProbCount */ 1);
|
|
730
789
|
if (FSE_isError(errorCode)) {
|
|
731
|
-
eSize =
|
|
790
|
+
eSize = errorCode;
|
|
732
791
|
DISPLAYLEVEL(1, "FSE_normalizeCount error with offcodeCount \n");
|
|
733
792
|
goto _cleanup;
|
|
734
793
|
}
|
|
735
794
|
Offlog = (U32)errorCode;
|
|
736
795
|
|
|
737
796
|
total=0; for (u=0; u<=MaxML; u++) total+=matchLengthCount[u];
|
|
738
|
-
errorCode = FSE_normalizeCount(matchLengthNCount, mlLog, matchLengthCount, total, MaxML);
|
|
797
|
+
errorCode = FSE_normalizeCount(matchLengthNCount, mlLog, matchLengthCount, total, MaxML, /* useLowProbCount */ 1);
|
|
739
798
|
if (FSE_isError(errorCode)) {
|
|
740
|
-
eSize =
|
|
799
|
+
eSize = errorCode;
|
|
741
800
|
DISPLAYLEVEL(1, "FSE_normalizeCount error with matchLengthCount \n");
|
|
742
801
|
goto _cleanup;
|
|
743
802
|
}
|
|
744
803
|
mlLog = (U32)errorCode;
|
|
745
804
|
|
|
746
805
|
total=0; for (u=0; u<=MaxLL; u++) total+=litLengthCount[u];
|
|
747
|
-
errorCode = FSE_normalizeCount(litLengthNCount, llLog, litLengthCount, total, MaxLL);
|
|
806
|
+
errorCode = FSE_normalizeCount(litLengthNCount, llLog, litLengthCount, total, MaxLL, /* useLowProbCount */ 1);
|
|
748
807
|
if (FSE_isError(errorCode)) {
|
|
749
|
-
eSize =
|
|
808
|
+
eSize = errorCode;
|
|
750
809
|
DISPLAYLEVEL(1, "FSE_normalizeCount error with litLengthCount \n");
|
|
751
810
|
goto _cleanup;
|
|
752
811
|
}
|
|
@@ -755,7 +814,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|
|
755
814
|
/* write result to buffer */
|
|
756
815
|
{ size_t const hhSize = HUF_writeCTable(dstPtr, maxDstSize, hufTable, 255, huffLog);
|
|
757
816
|
if (HUF_isError(hhSize)) {
|
|
758
|
-
eSize =
|
|
817
|
+
eSize = hhSize;
|
|
759
818
|
DISPLAYLEVEL(1, "HUF_writeCTable error \n");
|
|
760
819
|
goto _cleanup;
|
|
761
820
|
}
|
|
@@ -766,7 +825,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|
|
766
825
|
|
|
767
826
|
{ size_t const ohSize = FSE_writeNCount(dstPtr, maxDstSize, offcodeNCount, OFFCODE_MAX, Offlog);
|
|
768
827
|
if (FSE_isError(ohSize)) {
|
|
769
|
-
eSize =
|
|
828
|
+
eSize = ohSize;
|
|
770
829
|
DISPLAYLEVEL(1, "FSE_writeNCount error with offcodeNCount \n");
|
|
771
830
|
goto _cleanup;
|
|
772
831
|
}
|
|
@@ -777,7 +836,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|
|
777
836
|
|
|
778
837
|
{ size_t const mhSize = FSE_writeNCount(dstPtr, maxDstSize, matchLengthNCount, MaxML, mlLog);
|
|
779
838
|
if (FSE_isError(mhSize)) {
|
|
780
|
-
eSize =
|
|
839
|
+
eSize = mhSize;
|
|
781
840
|
DISPLAYLEVEL(1, "FSE_writeNCount error with matchLengthNCount \n");
|
|
782
841
|
goto _cleanup;
|
|
783
842
|
}
|
|
@@ -788,7 +847,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|
|
788
847
|
|
|
789
848
|
{ size_t const lhSize = FSE_writeNCount(dstPtr, maxDstSize, litLengthNCount, MaxLL, llLog);
|
|
790
849
|
if (FSE_isError(lhSize)) {
|
|
791
|
-
eSize =
|
|
850
|
+
eSize = lhSize;
|
|
792
851
|
DISPLAYLEVEL(1, "FSE_writeNCount error with litlengthNCount \n");
|
|
793
852
|
goto _cleanup;
|
|
794
853
|
}
|
|
@@ -798,7 +857,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|
|
798
857
|
}
|
|
799
858
|
|
|
800
859
|
if (maxDstSize<12) {
|
|
801
|
-
eSize = ERROR(
|
|
860
|
+
eSize = ERROR(dstSize_tooSmall);
|
|
802
861
|
DISPLAYLEVEL(1, "not enough space to write RepOffsets \n");
|
|
803
862
|
goto _cleanup;
|
|
804
863
|
}
|
|
@@ -813,11 +872,10 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|
|
813
872
|
MEM_writeLE32(dstPtr+4, repStartValue[1]);
|
|
814
873
|
MEM_writeLE32(dstPtr+8, repStartValue[2]);
|
|
815
874
|
#endif
|
|
816
|
-
//dstPtr += 12;
|
|
817
875
|
eSize += 12;
|
|
818
876
|
|
|
819
877
|
_cleanup:
|
|
820
|
-
|
|
878
|
+
ZSTD_freeCDict(esr.dict);
|
|
821
879
|
ZSTD_freeCCtx(esr.zc);
|
|
822
880
|
free(esr.workPlace);
|
|
823
881
|
|
|
@@ -825,26 +883,68 @@ _cleanup:
|
|
|
825
883
|
}
|
|
826
884
|
|
|
827
885
|
|
|
828
|
-
|
|
829
|
-
|
|
830
|
-
|
|
886
|
+
|
|
887
|
+
size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity,
|
|
888
|
+
const void* customDictContent, size_t dictContentSize,
|
|
889
|
+
const void* samplesBuffer, const size_t* samplesSizes,
|
|
890
|
+
unsigned nbSamples, ZDICT_params_t params)
|
|
831
891
|
{
|
|
832
892
|
size_t hSize;
|
|
833
|
-
|
|
893
|
+
#define HBUFFSIZE 256 /* should prove large enough for all entropy headers */
|
|
894
|
+
BYTE header[HBUFFSIZE];
|
|
895
|
+
int const compressionLevel = (params.compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT : params.compressionLevel;
|
|
834
896
|
U32 const notificationLevel = params.notificationLevel;
|
|
835
897
|
|
|
898
|
+
/* check conditions */
|
|
899
|
+
DEBUGLOG(4, "ZDICT_finalizeDictionary");
|
|
900
|
+
if (dictBufferCapacity < dictContentSize) return ERROR(dstSize_tooSmall);
|
|
901
|
+
if (dictContentSize < ZDICT_CONTENTSIZE_MIN) return ERROR(srcSize_wrong);
|
|
902
|
+
if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) return ERROR(dstSize_tooSmall);
|
|
903
|
+
|
|
836
904
|
/* dictionary header */
|
|
837
|
-
MEM_writeLE32(
|
|
838
|
-
{ U64 const randomID = XXH64(
|
|
905
|
+
MEM_writeLE32(header, ZSTD_MAGIC_DICTIONARY);
|
|
906
|
+
{ U64 const randomID = XXH64(customDictContent, dictContentSize, 0);
|
|
839
907
|
U32 const compliantID = (randomID % ((1U<<31)-32768)) + 32768;
|
|
840
908
|
U32 const dictID = params.dictID ? params.dictID : compliantID;
|
|
841
|
-
MEM_writeLE32(
|
|
909
|
+
MEM_writeLE32(header+4, dictID);
|
|
842
910
|
}
|
|
843
911
|
hSize = 8;
|
|
844
912
|
|
|
845
913
|
/* entropy tables */
|
|
846
914
|
DISPLAYLEVEL(2, "\r%70s\r", ""); /* clean display line */
|
|
847
915
|
DISPLAYLEVEL(2, "statistics ... \n");
|
|
916
|
+
{ size_t const eSize = ZDICT_analyzeEntropy(header+hSize, HBUFFSIZE-hSize,
|
|
917
|
+
compressionLevel,
|
|
918
|
+
samplesBuffer, samplesSizes, nbSamples,
|
|
919
|
+
customDictContent, dictContentSize,
|
|
920
|
+
notificationLevel);
|
|
921
|
+
if (ZDICT_isError(eSize)) return eSize;
|
|
922
|
+
hSize += eSize;
|
|
923
|
+
}
|
|
924
|
+
|
|
925
|
+
/* copy elements in final buffer ; note : src and dst buffer can overlap */
|
|
926
|
+
if (hSize + dictContentSize > dictBufferCapacity) dictContentSize = dictBufferCapacity - hSize;
|
|
927
|
+
{ size_t const dictSize = hSize + dictContentSize;
|
|
928
|
+
char* dictEnd = (char*)dictBuffer + dictSize;
|
|
929
|
+
memmove(dictEnd - dictContentSize, customDictContent, dictContentSize);
|
|
930
|
+
memcpy(dictBuffer, header, hSize);
|
|
931
|
+
return dictSize;
|
|
932
|
+
}
|
|
933
|
+
}
|
|
934
|
+
|
|
935
|
+
|
|
936
|
+
static size_t ZDICT_addEntropyTablesFromBuffer_advanced(
|
|
937
|
+
void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity,
|
|
938
|
+
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
|
|
939
|
+
ZDICT_params_t params)
|
|
940
|
+
{
|
|
941
|
+
int const compressionLevel = (params.compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT : params.compressionLevel;
|
|
942
|
+
U32 const notificationLevel = params.notificationLevel;
|
|
943
|
+
size_t hSize = 8;
|
|
944
|
+
|
|
945
|
+
/* calculate entropy tables */
|
|
946
|
+
DISPLAYLEVEL(2, "\r%70s\r", ""); /* clean display line */
|
|
947
|
+
DISPLAYLEVEL(2, "statistics ... \n");
|
|
848
948
|
{ size_t const eSize = ZDICT_analyzeEntropy((char*)dictBuffer+hSize, dictBufferCapacity-hSize,
|
|
849
949
|
compressionLevel,
|
|
850
950
|
samplesBuffer, samplesSizes, nbSamples,
|
|
@@ -854,21 +954,32 @@ size_t ZDICT_addEntropyTablesFromBuffer_advanced(void* dictBuffer, size_t dictCo
|
|
|
854
954
|
hSize += eSize;
|
|
855
955
|
}
|
|
856
956
|
|
|
957
|
+
/* add dictionary header (after entropy tables) */
|
|
958
|
+
MEM_writeLE32(dictBuffer, ZSTD_MAGIC_DICTIONARY);
|
|
959
|
+
{ U64 const randomID = XXH64((char*)dictBuffer + dictBufferCapacity - dictContentSize, dictContentSize, 0);
|
|
960
|
+
U32 const compliantID = (randomID % ((1U<<31)-32768)) + 32768;
|
|
961
|
+
U32 const dictID = params.dictID ? params.dictID : compliantID;
|
|
962
|
+
MEM_writeLE32((char*)dictBuffer+4, dictID);
|
|
963
|
+
}
|
|
857
964
|
|
|
858
965
|
if (hSize + dictContentSize < dictBufferCapacity)
|
|
859
966
|
memmove((char*)dictBuffer + hSize, (char*)dictBuffer + dictBufferCapacity - dictContentSize, dictContentSize);
|
|
860
967
|
return MIN(dictBufferCapacity, hSize+dictContentSize);
|
|
861
968
|
}
|
|
862
969
|
|
|
863
|
-
|
|
864
|
-
|
|
970
|
+
/* Hidden declaration for dbio.c */
|
|
971
|
+
size_t ZDICT_trainFromBuffer_unsafe_legacy(
|
|
972
|
+
void* dictBuffer, size_t maxDictSize,
|
|
973
|
+
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
|
|
974
|
+
ZDICT_legacy_params_t params);
|
|
975
|
+
/*! ZDICT_trainFromBuffer_unsafe_legacy() :
|
|
865
976
|
* Warning : `samplesBuffer` must be followed by noisy guard band.
|
|
866
977
|
* @return : size of dictionary, or an error code which can be tested with ZDICT_isError()
|
|
867
978
|
*/
|
|
868
|
-
size_t
|
|
979
|
+
size_t ZDICT_trainFromBuffer_unsafe_legacy(
|
|
869
980
|
void* dictBuffer, size_t maxDictSize,
|
|
870
981
|
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
|
|
871
|
-
|
|
982
|
+
ZDICT_legacy_params_t params)
|
|
872
983
|
{
|
|
873
984
|
U32 const dictListSize = MAX(MAX(DICTLISTSIZE_DEFAULT, nbSamples), (U32)(maxDictSize/16));
|
|
874
985
|
dictItem* const dictList = (dictItem*)malloc(dictListSize * sizeof(*dictList));
|
|
@@ -877,58 +988,63 @@ size_t ZDICT_trainFromBuffer_unsafe(
|
|
|
877
988
|
size_t const targetDictSize = maxDictSize;
|
|
878
989
|
size_t const samplesBuffSize = ZDICT_totalSampleSize(samplesSizes, nbSamples);
|
|
879
990
|
size_t dictSize = 0;
|
|
880
|
-
U32 const notificationLevel = params.notificationLevel;
|
|
991
|
+
U32 const notificationLevel = params.zParams.notificationLevel;
|
|
881
992
|
|
|
882
993
|
/* checks */
|
|
883
994
|
if (!dictList) return ERROR(memory_allocation);
|
|
884
|
-
if (maxDictSize
|
|
885
|
-
if (samplesBuffSize < ZDICT_MIN_SAMPLES_SIZE) { free(dictList); return
|
|
995
|
+
if (maxDictSize < ZDICT_DICTSIZE_MIN) { free(dictList); return ERROR(dstSize_tooSmall); } /* requested dictionary size is too small */
|
|
996
|
+
if (samplesBuffSize < ZDICT_MIN_SAMPLES_SIZE) { free(dictList); return ERROR(dictionaryCreation_failed); } /* not enough source to create dictionary */
|
|
886
997
|
|
|
887
998
|
/* init */
|
|
888
999
|
ZDICT_initDictItem(dictList);
|
|
889
1000
|
|
|
890
1001
|
/* build dictionary */
|
|
891
|
-
|
|
892
|
-
|
|
893
|
-
|
|
894
|
-
|
|
1002
|
+
ZDICT_trainBuffer_legacy(dictList, dictListSize,
|
|
1003
|
+
samplesBuffer, samplesBuffSize,
|
|
1004
|
+
samplesSizes, nbSamples,
|
|
1005
|
+
minRep, notificationLevel);
|
|
895
1006
|
|
|
896
1007
|
/* display best matches */
|
|
897
|
-
if (params.notificationLevel>= 3) {
|
|
898
|
-
|
|
899
|
-
|
|
900
|
-
|
|
901
|
-
DISPLAYLEVEL(3, "\n %u segments found, of total size %u \n", dictList[0].pos, dictContentSize);
|
|
902
|
-
DISPLAYLEVEL(3, "list %u best segments \n", nb);
|
|
903
|
-
for (u=1; u
|
|
904
|
-
|
|
905
|
-
|
|
906
|
-
U32 printedLength = MIN(40, length);
|
|
1008
|
+
if (params.zParams.notificationLevel>= 3) {
|
|
1009
|
+
unsigned const nb = MIN(25, dictList[0].pos);
|
|
1010
|
+
unsigned const dictContentSize = ZDICT_dictSize(dictList);
|
|
1011
|
+
unsigned u;
|
|
1012
|
+
DISPLAYLEVEL(3, "\n %u segments found, of total size %u \n", (unsigned)dictList[0].pos-1, dictContentSize);
|
|
1013
|
+
DISPLAYLEVEL(3, "list %u best segments \n", nb-1);
|
|
1014
|
+
for (u=1; u<nb; u++) {
|
|
1015
|
+
unsigned const pos = dictList[u].pos;
|
|
1016
|
+
unsigned const length = dictList[u].length;
|
|
1017
|
+
U32 const printedLength = MIN(40, length);
|
|
1018
|
+
if ((pos > samplesBuffSize) || ((pos + length) > samplesBuffSize)) {
|
|
1019
|
+
free(dictList);
|
|
1020
|
+
return ERROR(GENERIC); /* should never happen */
|
|
1021
|
+
}
|
|
907
1022
|
DISPLAYLEVEL(3, "%3u:%3u bytes at pos %8u, savings %7u bytes |",
|
|
908
|
-
u, length, pos, dictList[u].savings);
|
|
1023
|
+
u, length, pos, (unsigned)dictList[u].savings);
|
|
909
1024
|
ZDICT_printHex((const char*)samplesBuffer+pos, printedLength);
|
|
910
1025
|
DISPLAYLEVEL(3, "| \n");
|
|
911
1026
|
} }
|
|
912
1027
|
|
|
913
1028
|
|
|
914
1029
|
/* create dictionary */
|
|
915
|
-
{
|
|
916
|
-
if (dictContentSize <
|
|
917
|
-
|
|
1030
|
+
{ unsigned dictContentSize = ZDICT_dictSize(dictList);
|
|
1031
|
+
if (dictContentSize < ZDICT_CONTENTSIZE_MIN) { free(dictList); return ERROR(dictionaryCreation_failed); } /* dictionary content too small */
|
|
1032
|
+
if (dictContentSize < targetDictSize/4) {
|
|
1033
|
+
DISPLAYLEVEL(2, "! warning : selected content significantly smaller than requested (%u < %u) \n", dictContentSize, (unsigned)maxDictSize);
|
|
1034
|
+
if (samplesBuffSize < 10 * targetDictSize)
|
|
1035
|
+
DISPLAYLEVEL(2, "! consider increasing the number of samples (total size : %u MB)\n", (unsigned)(samplesBuffSize>>20));
|
|
918
1036
|
if (minRep > MINRATIO) {
|
|
919
1037
|
DISPLAYLEVEL(2, "! consider increasing selectivity to produce larger dictionary (-s%u) \n", selectivity+1);
|
|
920
1038
|
DISPLAYLEVEL(2, "! note : larger dictionaries are not necessarily better, test its efficiency on samples \n");
|
|
921
1039
|
}
|
|
922
|
-
if (samplesBuffSize < 10 * targetDictSize)
|
|
923
|
-
DISPLAYLEVEL(2, "! consider increasing the number of samples (total size : %u MB)\n", (U32)(samplesBuffSize>>20));
|
|
924
1040
|
}
|
|
925
1041
|
|
|
926
1042
|
if ((dictContentSize > targetDictSize*3) && (nbSamples > 2*MINRATIO) && (selectivity>1)) {
|
|
927
|
-
|
|
1043
|
+
unsigned proposedSelectivity = selectivity-1;
|
|
928
1044
|
while ((nbSamples >> proposedSelectivity) <= MINRATIO) { proposedSelectivity--; }
|
|
929
|
-
DISPLAYLEVEL(2, "! note : calculated dictionary significantly larger than requested (%u > %u) \n", dictContentSize, (
|
|
1045
|
+
DISPLAYLEVEL(2, "! note : calculated dictionary significantly larger than requested (%u > %u) \n", dictContentSize, (unsigned)maxDictSize);
|
|
930
1046
|
DISPLAYLEVEL(2, "! consider increasing dictionary size, or produce denser dictionary (-s%u) \n", proposedSelectivity);
|
|
931
|
-
DISPLAYLEVEL(2, "! always test dictionary efficiency on samples \n");
|
|
1047
|
+
DISPLAYLEVEL(2, "! always test dictionary efficiency on real samples \n");
|
|
932
1048
|
}
|
|
933
1049
|
|
|
934
1050
|
/* limit dictionary size */
|
|
@@ -954,7 +1070,7 @@ size_t ZDICT_trainFromBuffer_unsafe(
|
|
|
954
1070
|
|
|
955
1071
|
dictSize = ZDICT_addEntropyTablesFromBuffer_advanced(dictBuffer, dictContentSize, maxDictSize,
|
|
956
1072
|
samplesBuffer, samplesSizes, nbSamples,
|
|
957
|
-
params);
|
|
1073
|
+
params.zParams);
|
|
958
1074
|
}
|
|
959
1075
|
|
|
960
1076
|
/* clean up */
|
|
@@ -963,11 +1079,12 @@ size_t ZDICT_trainFromBuffer_unsafe(
|
|
|
963
1079
|
}
|
|
964
1080
|
|
|
965
1081
|
|
|
966
|
-
/*
|
|
967
|
-
*
|
|
968
|
-
|
|
969
|
-
|
|
970
|
-
|
|
1082
|
+
/* ZDICT_trainFromBuffer_legacy() :
|
|
1083
|
+
* issue : samplesBuffer need to be followed by a noisy guard band.
|
|
1084
|
+
* work around : duplicate the buffer, and add the noise */
|
|
1085
|
+
size_t ZDICT_trainFromBuffer_legacy(void* dictBuffer, size_t dictBufferCapacity,
|
|
1086
|
+
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
|
|
1087
|
+
ZDICT_legacy_params_t params)
|
|
971
1088
|
{
|
|
972
1089
|
size_t result;
|
|
973
1090
|
void* newBuff;
|
|
@@ -980,10 +1097,9 @@ size_t ZDICT_trainFromBuffer_advanced(void* dictBuffer, size_t dictBufferCapacit
|
|
|
980
1097
|
memcpy(newBuff, samplesBuffer, sBuffSize);
|
|
981
1098
|
ZDICT_fillNoise((char*)newBuff + sBuffSize, NOISELENGTH); /* guard band, for end of buffer condition */
|
|
982
1099
|
|
|
983
|
-
result =
|
|
984
|
-
|
|
985
|
-
|
|
986
|
-
params);
|
|
1100
|
+
result =
|
|
1101
|
+
ZDICT_trainFromBuffer_unsafe_legacy(dictBuffer, dictBufferCapacity, newBuff,
|
|
1102
|
+
samplesSizes, nbSamples, params);
|
|
987
1103
|
free(newBuff);
|
|
988
1104
|
return result;
|
|
989
1105
|
}
|
|
@@ -992,15 +1108,23 @@ size_t ZDICT_trainFromBuffer_advanced(void* dictBuffer, size_t dictBufferCapacit
|
|
|
992
1108
|
size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity,
|
|
993
1109
|
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples)
|
|
994
1110
|
{
|
|
995
|
-
|
|
1111
|
+
ZDICT_fastCover_params_t params;
|
|
1112
|
+
DEBUGLOG(3, "ZDICT_trainFromBuffer");
|
|
996
1113
|
memset(¶ms, 0, sizeof(params));
|
|
997
|
-
|
|
998
|
-
|
|
999
|
-
|
|
1114
|
+
params.d = 8;
|
|
1115
|
+
params.steps = 4;
|
|
1116
|
+
/* Use default level since no compression level information is available */
|
|
1117
|
+
params.zParams.compressionLevel = ZSTD_CLEVEL_DEFAULT;
|
|
1118
|
+
#if defined(DEBUGLEVEL) && (DEBUGLEVEL>=1)
|
|
1119
|
+
params.zParams.notificationLevel = DEBUGLEVEL;
|
|
1120
|
+
#endif
|
|
1121
|
+
return ZDICT_optimizeTrainFromBuffer_fastCover(dictBuffer, dictBufferCapacity,
|
|
1122
|
+
samplesBuffer, samplesSizes, nbSamples,
|
|
1123
|
+
¶ms);
|
|
1000
1124
|
}
|
|
1001
1125
|
|
|
1002
1126
|
size_t ZDICT_addEntropyTablesFromBuffer(void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity,
|
|
1003
|
-
|
|
1127
|
+
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples)
|
|
1004
1128
|
{
|
|
1005
1129
|
ZDICT_params_t params;
|
|
1006
1130
|
memset(¶ms, 0, sizeof(params));
|