zstd-ruby 1.4.4.0 → 1.5.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/README.md +78 -5
- data/Rakefile +8 -2
- data/ext/zstdruby/common.h +15 -0
- data/ext/zstdruby/extconf.rb +3 -2
- data/ext/zstdruby/libzstd/common/allocations.h +55 -0
- data/ext/zstdruby/libzstd/common/bits.h +200 -0
- data/ext/zstdruby/libzstd/common/bitstream.h +74 -97
- data/ext/zstdruby/libzstd/common/compiler.h +219 -20
- data/ext/zstdruby/libzstd/common/cpu.h +1 -3
- data/ext/zstdruby/libzstd/common/debug.c +11 -31
- data/ext/zstdruby/libzstd/common/debug.h +22 -49
- data/ext/zstdruby/libzstd/common/entropy_common.c +184 -80
- data/ext/zstdruby/libzstd/common/error_private.c +11 -2
- data/ext/zstdruby/libzstd/common/error_private.h +87 -4
- data/ext/zstdruby/libzstd/common/fse.h +47 -116
- data/ext/zstdruby/libzstd/common/fse_decompress.c +127 -127
- data/ext/zstdruby/libzstd/common/huf.h +112 -197
- data/ext/zstdruby/libzstd/common/mem.h +124 -142
- data/ext/zstdruby/libzstd/common/pool.c +54 -27
- data/ext/zstdruby/libzstd/common/pool.h +11 -5
- data/ext/zstdruby/libzstd/common/portability_macros.h +156 -0
- data/ext/zstdruby/libzstd/common/threading.c +78 -22
- data/ext/zstdruby/libzstd/common/threading.h +9 -13
- data/ext/zstdruby/libzstd/common/xxhash.c +15 -873
- data/ext/zstdruby/libzstd/common/xxhash.h +5572 -191
- data/ext/zstdruby/libzstd/common/zstd_common.c +2 -37
- data/ext/zstdruby/libzstd/common/zstd_deps.h +111 -0
- data/ext/zstdruby/libzstd/common/zstd_internal.h +186 -144
- data/ext/zstdruby/libzstd/common/zstd_trace.h +163 -0
- data/ext/zstdruby/libzstd/compress/clevels.h +134 -0
- data/ext/zstdruby/libzstd/compress/fse_compress.c +99 -196
- data/ext/zstdruby/libzstd/compress/hist.c +41 -63
- data/ext/zstdruby/libzstd/compress/hist.h +13 -33
- data/ext/zstdruby/libzstd/compress/huf_compress.c +968 -331
- data/ext/zstdruby/libzstd/compress/zstd_compress.c +4120 -1191
- data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +688 -159
- data/ext/zstdruby/libzstd/compress/zstd_compress_literals.c +121 -40
- data/ext/zstdruby/libzstd/compress/zstd_compress_literals.h +16 -6
- data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.c +62 -35
- data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.h +10 -3
- data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.c +577 -0
- data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.h +32 -0
- data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +322 -115
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +394 -154
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +4 -3
- data/ext/zstdruby/libzstd/compress/zstd_fast.c +729 -253
- data/ext/zstdruby/libzstd/compress/zstd_fast.h +4 -3
- data/ext/zstdruby/libzstd/compress/zstd_lazy.c +1289 -247
- data/ext/zstdruby/libzstd/compress/zstd_lazy.h +61 -1
- data/ext/zstdruby/libzstd/compress/zstd_ldm.c +339 -212
- data/ext/zstdruby/libzstd/compress/zstd_ldm.h +15 -3
- data/ext/zstdruby/libzstd/compress/zstd_ldm_geartab.h +106 -0
- data/ext/zstdruby/libzstd/compress/zstd_opt.c +508 -282
- data/ext/zstdruby/libzstd/compress/zstd_opt.h +1 -1
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +217 -466
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +35 -114
- data/ext/zstdruby/libzstd/decompress/huf_decompress.c +1220 -572
- data/ext/zstdruby/libzstd/decompress/huf_decompress_amd64.S +576 -0
- data/ext/zstdruby/libzstd/decompress/zstd_ddict.c +23 -19
- data/ext/zstdruby/libzstd/decompress/zstd_ddict.h +3 -3
- data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +859 -273
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +1244 -375
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +21 -7
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +74 -11
- data/ext/zstdruby/libzstd/dictBuilder/cover.c +75 -54
- data/ext/zstdruby/libzstd/dictBuilder/cover.h +20 -9
- data/ext/zstdruby/libzstd/dictBuilder/divsufsort.c +1 -1
- data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +55 -36
- data/ext/zstdruby/libzstd/dictBuilder/zdict.c +126 -110
- data/ext/zstdruby/libzstd/{dictBuilder/zdict.h → zdict.h} +248 -56
- data/ext/zstdruby/libzstd/zstd.h +1277 -306
- data/ext/zstdruby/libzstd/{common/zstd_errors.h → zstd_errors.h} +29 -8
- data/ext/zstdruby/main.c +20 -0
- data/ext/zstdruby/skippable_frame.c +63 -0
- data/ext/zstdruby/streaming_compress.c +177 -0
- data/ext/zstdruby/streaming_compress.h +5 -0
- data/ext/zstdruby/streaming_decompress.c +123 -0
- data/ext/zstdruby/zstdruby.c +114 -32
- data/lib/zstd-ruby/version.rb +1 -1
- data/lib/zstd-ruby.rb +0 -1
- data/zstd-ruby.gemspec +1 -1
- metadata +24 -39
- data/.travis.yml +0 -14
- data/ext/zstdruby/libzstd/.gitignore +0 -3
- data/ext/zstdruby/libzstd/BUCK +0 -234
- data/ext/zstdruby/libzstd/Makefile +0 -289
- data/ext/zstdruby/libzstd/README.md +0 -159
- data/ext/zstdruby/libzstd/deprecated/zbuff.h +0 -214
- data/ext/zstdruby/libzstd/deprecated/zbuff_common.c +0 -26
- data/ext/zstdruby/libzstd/deprecated/zbuff_compress.c +0 -147
- data/ext/zstdruby/libzstd/deprecated/zbuff_decompress.c +0 -75
- data/ext/zstdruby/libzstd/dll/example/Makefile +0 -47
- data/ext/zstdruby/libzstd/dll/example/README.md +0 -69
- data/ext/zstdruby/libzstd/dll/example/build_package.bat +0 -20
- data/ext/zstdruby/libzstd/dll/example/fullbench-dll.sln +0 -25
- data/ext/zstdruby/libzstd/dll/example/fullbench-dll.vcxproj +0 -181
- data/ext/zstdruby/libzstd/legacy/zstd_legacy.h +0 -415
- data/ext/zstdruby/libzstd/legacy/zstd_v01.c +0 -2152
- data/ext/zstdruby/libzstd/legacy/zstd_v01.h +0 -94
- data/ext/zstdruby/libzstd/legacy/zstd_v02.c +0 -3514
- data/ext/zstdruby/libzstd/legacy/zstd_v02.h +0 -93
- data/ext/zstdruby/libzstd/legacy/zstd_v03.c +0 -3156
- data/ext/zstdruby/libzstd/legacy/zstd_v03.h +0 -93
- data/ext/zstdruby/libzstd/legacy/zstd_v04.c +0 -3641
- data/ext/zstdruby/libzstd/legacy/zstd_v04.h +0 -142
- data/ext/zstdruby/libzstd/legacy/zstd_v05.c +0 -4046
- data/ext/zstdruby/libzstd/legacy/zstd_v05.h +0 -162
- data/ext/zstdruby/libzstd/legacy/zstd_v06.c +0 -4150
- data/ext/zstdruby/libzstd/legacy/zstd_v06.h +0 -172
- data/ext/zstdruby/libzstd/legacy/zstd_v07.c +0 -4533
- data/ext/zstdruby/libzstd/legacy/zstd_v07.h +0 -187
- data/ext/zstdruby/libzstd/libzstd.pc.in +0 -15
- data/ext/zstdruby/zstdruby.h +0 -6
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/*
|
|
2
|
-
* Copyright (c)
|
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
3
3
|
* All rights reserved.
|
|
4
4
|
*
|
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
|
@@ -23,9 +23,13 @@
|
|
|
23
23
|
/* Unix Large Files support (>4GB) */
|
|
24
24
|
#define _FILE_OFFSET_BITS 64
|
|
25
25
|
#if (defined(__sun__) && (!defined(__LP64__))) /* Sun Solaris 32-bits requires specific definitions */
|
|
26
|
+
# ifndef _LARGEFILE_SOURCE
|
|
26
27
|
# define _LARGEFILE_SOURCE
|
|
28
|
+
# endif
|
|
27
29
|
#elif ! defined(__LP64__) /* No point defining Large file for 64 bit */
|
|
30
|
+
# ifndef _LARGEFILE64_SOURCE
|
|
28
31
|
# define _LARGEFILE64_SOURCE
|
|
32
|
+
# endif
|
|
29
33
|
#endif
|
|
30
34
|
|
|
31
35
|
|
|
@@ -37,17 +41,19 @@
|
|
|
37
41
|
#include <stdio.h> /* fprintf, fopen, ftello64 */
|
|
38
42
|
#include <time.h> /* clock */
|
|
39
43
|
|
|
40
|
-
#include "mem.h" /* read */
|
|
41
|
-
#include "fse.h" /* FSE_normalizeCount, FSE_writeNCount */
|
|
42
|
-
#define HUF_STATIC_LINKING_ONLY
|
|
43
|
-
#include "huf.h" /* HUF_buildCTable, HUF_writeCTable */
|
|
44
|
-
#include "zstd_internal.h" /* includes zstd.h */
|
|
45
|
-
#include "xxhash.h" /* XXH64 */
|
|
46
|
-
#include "divsufsort.h"
|
|
47
44
|
#ifndef ZDICT_STATIC_LINKING_ONLY
|
|
48
45
|
# define ZDICT_STATIC_LINKING_ONLY
|
|
49
46
|
#endif
|
|
50
|
-
|
|
47
|
+
|
|
48
|
+
#include "../common/mem.h" /* read */
|
|
49
|
+
#include "../common/fse.h" /* FSE_normalizeCount, FSE_writeNCount */
|
|
50
|
+
#include "../common/huf.h" /* HUF_buildCTable, HUF_writeCTable */
|
|
51
|
+
#include "../common/zstd_internal.h" /* includes zstd.h */
|
|
52
|
+
#include "../common/xxhash.h" /* XXH64 */
|
|
53
|
+
#include "../compress/zstd_compress_internal.h" /* ZSTD_loadCEntropy() */
|
|
54
|
+
#include "../zdict.h"
|
|
55
|
+
#include "divsufsort.h"
|
|
56
|
+
#include "../common/bits.h" /* ZSTD_NbCommonBytes */
|
|
51
57
|
|
|
52
58
|
|
|
53
59
|
/*-*************************************
|
|
@@ -61,14 +67,15 @@
|
|
|
61
67
|
|
|
62
68
|
#define NOISELENGTH 32
|
|
63
69
|
|
|
64
|
-
static const int g_compressionLevel_default = 3;
|
|
65
70
|
static const U32 g_selectivity_default = 9;
|
|
66
71
|
|
|
67
72
|
|
|
68
73
|
/*-*************************************
|
|
69
74
|
* Console display
|
|
70
75
|
***************************************/
|
|
76
|
+
#undef DISPLAY
|
|
71
77
|
#define DISPLAY(...) { fprintf(stderr, __VA_ARGS__); fflush( stderr ); }
|
|
78
|
+
#undef DISPLAYLEVEL
|
|
72
79
|
#define DISPLAYLEVEL(l, ...) if (notificationLevel>=l) { DISPLAY(__VA_ARGS__); } /* 0 : no display; 1: errors; 2: default; 3: details; 4: debug */
|
|
73
80
|
|
|
74
81
|
static clock_t ZDICT_clockSpan(clock_t nPrevious) { return clock() - nPrevious; }
|
|
@@ -99,69 +106,30 @@ unsigned ZDICT_getDictID(const void* dictBuffer, size_t dictSize)
|
|
|
99
106
|
return MEM_readLE32((const char*)dictBuffer + 4);
|
|
100
107
|
}
|
|
101
108
|
|
|
102
|
-
|
|
103
|
-
/*-********************************************************
|
|
104
|
-
* Dictionary training functions
|
|
105
|
-
**********************************************************/
|
|
106
|
-
static unsigned ZDICT_NbCommonBytes (size_t val)
|
|
109
|
+
size_t ZDICT_getDictHeaderSize(const void* dictBuffer, size_t dictSize)
|
|
107
110
|
{
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58];
|
|
119
|
-
# endif
|
|
120
|
-
} else { /* 32 bits */
|
|
121
|
-
# if defined(_MSC_VER)
|
|
122
|
-
unsigned long r=0;
|
|
123
|
-
_BitScanForward( &r, (U32)val );
|
|
124
|
-
return (unsigned)(r>>3);
|
|
125
|
-
# elif defined(__GNUC__) && (__GNUC__ >= 3)
|
|
126
|
-
return (__builtin_ctz((U32)val) >> 3);
|
|
127
|
-
# else
|
|
128
|
-
static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 };
|
|
129
|
-
return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
|
|
130
|
-
# endif
|
|
111
|
+
size_t headerSize;
|
|
112
|
+
if (dictSize <= 8 || MEM_readLE32(dictBuffer) != ZSTD_MAGIC_DICTIONARY) return ERROR(dictionary_corrupted);
|
|
113
|
+
|
|
114
|
+
{ ZSTD_compressedBlockState_t* bs = (ZSTD_compressedBlockState_t*)malloc(sizeof(ZSTD_compressedBlockState_t));
|
|
115
|
+
U32* wksp = (U32*)malloc(HUF_WORKSPACE_SIZE);
|
|
116
|
+
if (!bs || !wksp) {
|
|
117
|
+
headerSize = ERROR(memory_allocation);
|
|
118
|
+
} else {
|
|
119
|
+
ZSTD_reset_compressedBlockState(bs);
|
|
120
|
+
headerSize = ZSTD_loadCEntropy(bs, wksp, dictBuffer, dictSize);
|
|
131
121
|
}
|
|
132
|
-
} else { /* Big Endian CPU */
|
|
133
|
-
if (MEM_64bits()) {
|
|
134
|
-
# if defined(_MSC_VER) && defined(_WIN64)
|
|
135
|
-
unsigned long r = 0;
|
|
136
|
-
_BitScanReverse64( &r, val );
|
|
137
|
-
return (unsigned)(r>>3);
|
|
138
|
-
# elif defined(__GNUC__) && (__GNUC__ >= 3)
|
|
139
|
-
return (__builtin_clzll(val) >> 3);
|
|
140
|
-
# else
|
|
141
|
-
unsigned r;
|
|
142
|
-
const unsigned n32 = sizeof(size_t)*4; /* calculate this way due to compiler complaining in 32-bits mode */
|
|
143
|
-
if (!(val>>n32)) { r=4; } else { r=0; val>>=n32; }
|
|
144
|
-
if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
|
|
145
|
-
r += (!val);
|
|
146
|
-
return r;
|
|
147
|
-
# endif
|
|
148
|
-
} else { /* 32 bits */
|
|
149
|
-
# if defined(_MSC_VER)
|
|
150
|
-
unsigned long r = 0;
|
|
151
|
-
_BitScanReverse( &r, (unsigned long)val );
|
|
152
|
-
return (unsigned)(r>>3);
|
|
153
|
-
# elif defined(__GNUC__) && (__GNUC__ >= 3)
|
|
154
|
-
return (__builtin_clz((U32)val) >> 3);
|
|
155
|
-
# else
|
|
156
|
-
unsigned r;
|
|
157
|
-
if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; }
|
|
158
|
-
r += (!val);
|
|
159
|
-
return r;
|
|
160
|
-
# endif
|
|
161
|
-
} }
|
|
162
|
-
}
|
|
163
122
|
|
|
123
|
+
free(bs);
|
|
124
|
+
free(wksp);
|
|
125
|
+
}
|
|
164
126
|
|
|
127
|
+
return headerSize;
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
/*-********************************************************
|
|
131
|
+
* Dictionary training functions
|
|
132
|
+
**********************************************************/
|
|
165
133
|
/*! ZDICT_count() :
|
|
166
134
|
Count the nb of common bytes between 2 pointers.
|
|
167
135
|
Note : this function presumes end of buffer followed by noisy guard band.
|
|
@@ -176,7 +144,7 @@ static size_t ZDICT_count(const void* pIn, const void* pMatch)
|
|
|
176
144
|
pMatch = (const char*)pMatch+sizeof(size_t);
|
|
177
145
|
continue;
|
|
178
146
|
}
|
|
179
|
-
pIn = (const char*)pIn+
|
|
147
|
+
pIn = (const char*)pIn+ZSTD_NbCommonBytes(diff);
|
|
180
148
|
return (size_t)((const char*)pIn - pStart);
|
|
181
149
|
}
|
|
182
150
|
}
|
|
@@ -208,7 +176,7 @@ static dictItem ZDICT_analyzePos(
|
|
|
208
176
|
U32 savings[LLIMIT] = {0};
|
|
209
177
|
const BYTE* b = (const BYTE*)buffer;
|
|
210
178
|
size_t maxLength = LLIMIT;
|
|
211
|
-
size_t pos = suffix[start];
|
|
179
|
+
size_t pos = (size_t)suffix[start];
|
|
212
180
|
U32 end = start;
|
|
213
181
|
dictItem solution;
|
|
214
182
|
|
|
@@ -342,7 +310,7 @@ static dictItem ZDICT_analyzePos(
|
|
|
342
310
|
savings[i] = savings[i-1] + (lengthList[i] * (i-3));
|
|
343
311
|
|
|
344
312
|
DISPLAYLEVEL(4, "Selected dict at position %u, of length %u : saves %u (ratio: %.2f) \n",
|
|
345
|
-
(unsigned)pos, (unsigned)maxLength, (unsigned)savings[maxLength], (double)savings[maxLength] / maxLength);
|
|
313
|
+
(unsigned)pos, (unsigned)maxLength, (unsigned)savings[maxLength], (double)savings[maxLength] / (double)maxLength);
|
|
346
314
|
|
|
347
315
|
solution.pos = (U32)pos;
|
|
348
316
|
solution.length = (U32)maxLength;
|
|
@@ -352,7 +320,7 @@ static dictItem ZDICT_analyzePos(
|
|
|
352
320
|
{ U32 id;
|
|
353
321
|
for (id=start; id<end; id++) {
|
|
354
322
|
U32 p, pEnd, length;
|
|
355
|
-
U32 const testedPos = suffix[id];
|
|
323
|
+
U32 const testedPos = (U32)suffix[id];
|
|
356
324
|
if (testedPos == pos)
|
|
357
325
|
length = solution.length;
|
|
358
326
|
else {
|
|
@@ -404,7 +372,7 @@ static U32 ZDICT_tryMerge(dictItem* table, dictItem elt, U32 eltNbToSkip, const
|
|
|
404
372
|
elt = table[u];
|
|
405
373
|
/* sort : improve rank */
|
|
406
374
|
while ((u>1) && (table[u-1].savings < elt.savings))
|
|
407
|
-
|
|
375
|
+
table[u] = table[u-1], u--;
|
|
408
376
|
table[u] = elt;
|
|
409
377
|
return u;
|
|
410
378
|
} }
|
|
@@ -415,7 +383,7 @@ static U32 ZDICT_tryMerge(dictItem* table, dictItem elt, U32 eltNbToSkip, const
|
|
|
415
383
|
|
|
416
384
|
if ((table[u].pos + table[u].length >= elt.pos) && (table[u].pos < elt.pos)) { /* overlap, existing < new */
|
|
417
385
|
/* append */
|
|
418
|
-
int const addedLength = (int)eltEnd - (table[u].pos + table[u].length);
|
|
386
|
+
int const addedLength = (int)eltEnd - (int)(table[u].pos + table[u].length);
|
|
419
387
|
table[u].savings += elt.length / 8; /* rough approx bonus */
|
|
420
388
|
if (addedLength > 0) { /* otherwise, elt fully included into existing */
|
|
421
389
|
table[u].length += addedLength;
|
|
@@ -508,6 +476,7 @@ static size_t ZDICT_trainBuffer_legacy(dictItem* dictList, U32 dictListSize,
|
|
|
508
476
|
clock_t displayClock = 0;
|
|
509
477
|
clock_t const refreshRate = CLOCKS_PER_SEC * 3 / 10;
|
|
510
478
|
|
|
479
|
+
# undef DISPLAYUPDATE
|
|
511
480
|
# define DISPLAYUPDATE(l, ...) if (notificationLevel>=l) { \
|
|
512
481
|
if (ZDICT_clockSpan(displayClock) > refreshRate) \
|
|
513
482
|
{ displayClock = clock(); DISPLAY(__VA_ARGS__); \
|
|
@@ -554,7 +523,7 @@ static size_t ZDICT_trainBuffer_legacy(dictItem* dictList, U32 dictListSize,
|
|
|
554
523
|
if (solution.length==0) { cursor++; continue; }
|
|
555
524
|
ZDICT_insertDictItem(dictList, dictListSize, solution, buffer);
|
|
556
525
|
cursor += solution.length;
|
|
557
|
-
DISPLAYUPDATE(2, "\r%4.2f %% \r", (double)cursor / bufferSize * 100);
|
|
526
|
+
DISPLAYUPDATE(2, "\r%4.2f %% \r", (double)cursor / (double)bufferSize * 100.0);
|
|
558
527
|
} }
|
|
559
528
|
|
|
560
529
|
_cleanup:
|
|
@@ -588,20 +557,20 @@ typedef struct
|
|
|
588
557
|
|
|
589
558
|
#define MAXREPOFFSET 1024
|
|
590
559
|
|
|
591
|
-
static void ZDICT_countEStats(EStats_ress_t esr, ZSTD_parameters params,
|
|
560
|
+
static void ZDICT_countEStats(EStats_ress_t esr, const ZSTD_parameters* params,
|
|
592
561
|
unsigned* countLit, unsigned* offsetcodeCount, unsigned* matchlengthCount, unsigned* litlengthCount, U32* repOffsets,
|
|
593
562
|
const void* src, size_t srcSize,
|
|
594
563
|
U32 notificationLevel)
|
|
595
564
|
{
|
|
596
|
-
size_t const blockSizeMax = MIN (ZSTD_BLOCKSIZE_MAX, 1 << params
|
|
565
|
+
size_t const blockSizeMax = MIN (ZSTD_BLOCKSIZE_MAX, 1 << params->cParams.windowLog);
|
|
597
566
|
size_t cSize;
|
|
598
567
|
|
|
599
568
|
if (srcSize > blockSizeMax) srcSize = blockSizeMax; /* protection vs large samples */
|
|
600
|
-
{ size_t const errorCode =
|
|
569
|
+
{ size_t const errorCode = ZSTD_compressBegin_usingCDict_deprecated(esr.zc, esr.dict);
|
|
601
570
|
if (ZSTD_isError(errorCode)) { DISPLAYLEVEL(1, "warning : ZSTD_compressBegin_usingCDict failed \n"); return; }
|
|
602
571
|
|
|
603
572
|
}
|
|
604
|
-
cSize =
|
|
573
|
+
cSize = ZSTD_compressBlock_deprecated(esr.zc, esr.workPlace, ZSTD_BLOCKSIZE_MAX, src, srcSize);
|
|
605
574
|
if (ZSTD_isError(cSize)) { DISPLAYLEVEL(3, "warning : could not compress sample size %u \n", (unsigned)srcSize); return; }
|
|
606
575
|
|
|
607
576
|
if (cSize) { /* if == 0; block is not compressible */
|
|
@@ -634,8 +603,8 @@ static void ZDICT_countEStats(EStats_ress_t esr, ZSTD_parameters params,
|
|
|
634
603
|
|
|
635
604
|
if (nbSeq >= 2) { /* rep offsets */
|
|
636
605
|
const seqDef* const seq = seqStorePtr->sequencesStart;
|
|
637
|
-
U32 offset1 = seq[0].
|
|
638
|
-
U32 offset2 = seq[1].
|
|
606
|
+
U32 offset1 = seq[0].offBase - ZSTD_REP_NUM;
|
|
607
|
+
U32 offset2 = seq[1].offBase - ZSTD_REP_NUM;
|
|
639
608
|
if (offset1 >= MAXREPOFFSET) offset1 = 0;
|
|
640
609
|
if (offset2 >= MAXREPOFFSET) offset2 = 0;
|
|
641
610
|
repOffsets[offset1] += 3;
|
|
@@ -682,7 +651,7 @@ static void ZDICT_flatLit(unsigned* countLit)
|
|
|
682
651
|
|
|
683
652
|
#define OFFCODE_MAX 30 /* only applicable to first block */
|
|
684
653
|
static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|
685
|
-
|
|
654
|
+
int compressionLevel,
|
|
686
655
|
const void* srcBuffer, const size_t* fileSizes, unsigned nbFiles,
|
|
687
656
|
const void* dictBuffer, size_t dictBufferSize,
|
|
688
657
|
unsigned notificationLevel)
|
|
@@ -706,6 +675,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|
|
706
675
|
size_t const totalSrcSize = ZDICT_totalSampleSize(fileSizes, nbFiles);
|
|
707
676
|
size_t const averageSampleSize = totalSrcSize / (nbFiles + !nbFiles);
|
|
708
677
|
BYTE* dstPtr = (BYTE*)dstBuffer;
|
|
678
|
+
U32 wksp[HUF_CTABLE_WORKSPACE_SIZE_U32];
|
|
709
679
|
|
|
710
680
|
/* init */
|
|
711
681
|
DEBUGLOG(4, "ZDICT_analyzeEntropy");
|
|
@@ -717,7 +687,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|
|
717
687
|
memset(repOffset, 0, sizeof(repOffset));
|
|
718
688
|
repOffset[1] = repOffset[4] = repOffset[8] = 1;
|
|
719
689
|
memset(bestRepOffset, 0, sizeof(bestRepOffset));
|
|
720
|
-
if (compressionLevel==0) compressionLevel =
|
|
690
|
+
if (compressionLevel==0) compressionLevel = ZSTD_CLEVEL_DEFAULT;
|
|
721
691
|
params = ZSTD_getParams(compressionLevel, averageSampleSize, dictBufferSize);
|
|
722
692
|
|
|
723
693
|
esr.dict = ZSTD_createCDict_advanced(dictBuffer, dictBufferSize, ZSTD_dlm_byRef, ZSTD_dct_rawContent, params.cParams, ZSTD_defaultCMem);
|
|
@@ -731,15 +701,22 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|
|
731
701
|
|
|
732
702
|
/* collect stats on all samples */
|
|
733
703
|
for (u=0; u<nbFiles; u++) {
|
|
734
|
-
ZDICT_countEStats(esr, params,
|
|
704
|
+
ZDICT_countEStats(esr, ¶ms,
|
|
735
705
|
countLit, offcodeCount, matchLengthCount, litLengthCount, repOffset,
|
|
736
706
|
(const char*)srcBuffer + pos, fileSizes[u],
|
|
737
707
|
notificationLevel);
|
|
738
708
|
pos += fileSizes[u];
|
|
739
709
|
}
|
|
740
710
|
|
|
711
|
+
if (notificationLevel >= 4) {
|
|
712
|
+
/* writeStats */
|
|
713
|
+
DISPLAYLEVEL(4, "Offset Code Frequencies : \n");
|
|
714
|
+
for (u=0; u<=offcodeMax; u++) {
|
|
715
|
+
DISPLAYLEVEL(4, "%2u :%7u \n", u, offcodeCount[u]);
|
|
716
|
+
} }
|
|
717
|
+
|
|
741
718
|
/* analyze, build stats, starting with literals */
|
|
742
|
-
{ size_t maxNbBits =
|
|
719
|
+
{ size_t maxNbBits = HUF_buildCTable_wksp(hufTable, countLit, 255, huffLog, wksp, sizeof(wksp));
|
|
743
720
|
if (HUF_isError(maxNbBits)) {
|
|
744
721
|
eSize = maxNbBits;
|
|
745
722
|
DISPLAYLEVEL(1, " HUF_buildCTable error \n");
|
|
@@ -748,7 +725,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|
|
748
725
|
if (maxNbBits==8) { /* not compressible : will fail on HUF_writeCTable() */
|
|
749
726
|
DISPLAYLEVEL(2, "warning : pathological dataset : literals are not compressible : samples are noisy or too regular \n");
|
|
750
727
|
ZDICT_flatLit(countLit); /* replace distribution by a fake "mostly flat but still compressible" distribution, that HUF_writeCTable() can encode */
|
|
751
|
-
maxNbBits =
|
|
728
|
+
maxNbBits = HUF_buildCTable_wksp(hufTable, countLit, 255, huffLog, wksp, sizeof(wksp));
|
|
752
729
|
assert(maxNbBits==9);
|
|
753
730
|
}
|
|
754
731
|
huffLog = (U32)maxNbBits;
|
|
@@ -762,7 +739,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|
|
762
739
|
/* note : the result of this phase should be used to better appreciate the impact on statistics */
|
|
763
740
|
|
|
764
741
|
total=0; for (u=0; u<=offcodeMax; u++) total+=offcodeCount[u];
|
|
765
|
-
errorCode = FSE_normalizeCount(offcodeNCount, Offlog, offcodeCount, total, offcodeMax);
|
|
742
|
+
errorCode = FSE_normalizeCount(offcodeNCount, Offlog, offcodeCount, total, offcodeMax, /* useLowProbCount */ 1);
|
|
766
743
|
if (FSE_isError(errorCode)) {
|
|
767
744
|
eSize = errorCode;
|
|
768
745
|
DISPLAYLEVEL(1, "FSE_normalizeCount error with offcodeCount \n");
|
|
@@ -771,7 +748,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|
|
771
748
|
Offlog = (U32)errorCode;
|
|
772
749
|
|
|
773
750
|
total=0; for (u=0; u<=MaxML; u++) total+=matchLengthCount[u];
|
|
774
|
-
errorCode = FSE_normalizeCount(matchLengthNCount, mlLog, matchLengthCount, total, MaxML);
|
|
751
|
+
errorCode = FSE_normalizeCount(matchLengthNCount, mlLog, matchLengthCount, total, MaxML, /* useLowProbCount */ 1);
|
|
775
752
|
if (FSE_isError(errorCode)) {
|
|
776
753
|
eSize = errorCode;
|
|
777
754
|
DISPLAYLEVEL(1, "FSE_normalizeCount error with matchLengthCount \n");
|
|
@@ -780,7 +757,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|
|
780
757
|
mlLog = (U32)errorCode;
|
|
781
758
|
|
|
782
759
|
total=0; for (u=0; u<=MaxLL; u++) total+=litLengthCount[u];
|
|
783
|
-
errorCode = FSE_normalizeCount(litLengthNCount, llLog, litLengthCount, total, MaxLL);
|
|
760
|
+
errorCode = FSE_normalizeCount(litLengthNCount, llLog, litLengthCount, total, MaxLL, /* useLowProbCount */ 1);
|
|
784
761
|
if (FSE_isError(errorCode)) {
|
|
785
762
|
eSize = errorCode;
|
|
786
763
|
DISPLAYLEVEL(1, "FSE_normalizeCount error with litLengthCount \n");
|
|
@@ -789,7 +766,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|
|
789
766
|
llLog = (U32)errorCode;
|
|
790
767
|
|
|
791
768
|
/* write result to buffer */
|
|
792
|
-
{ size_t const hhSize =
|
|
769
|
+
{ size_t const hhSize = HUF_writeCTable_wksp(dstPtr, maxDstSize, hufTable, 255, huffLog, wksp, sizeof(wksp));
|
|
793
770
|
if (HUF_isError(hhSize)) {
|
|
794
771
|
eSize = hhSize;
|
|
795
772
|
DISPLAYLEVEL(1, "HUF_writeCTable error \n");
|
|
@@ -844,7 +821,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|
|
844
821
|
MEM_writeLE32(dstPtr+8, bestRepOffset[2].offset);
|
|
845
822
|
#else
|
|
846
823
|
/* at this stage, we don't use the result of "most common first offset",
|
|
847
|
-
|
|
824
|
+
* as the impact of statistics is not properly evaluated */
|
|
848
825
|
MEM_writeLE32(dstPtr+0, repStartValue[0]);
|
|
849
826
|
MEM_writeLE32(dstPtr+4, repStartValue[1]);
|
|
850
827
|
MEM_writeLE32(dstPtr+8, repStartValue[2]);
|
|
@@ -860,6 +837,17 @@ _cleanup:
|
|
|
860
837
|
}
|
|
861
838
|
|
|
862
839
|
|
|
840
|
+
/**
|
|
841
|
+
* @returns the maximum repcode value
|
|
842
|
+
*/
|
|
843
|
+
static U32 ZDICT_maxRep(U32 const reps[ZSTD_REP_NUM])
|
|
844
|
+
{
|
|
845
|
+
U32 maxRep = reps[0];
|
|
846
|
+
int r;
|
|
847
|
+
for (r = 1; r < ZSTD_REP_NUM; ++r)
|
|
848
|
+
maxRep = MAX(maxRep, reps[r]);
|
|
849
|
+
return maxRep;
|
|
850
|
+
}
|
|
863
851
|
|
|
864
852
|
size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity,
|
|
865
853
|
const void* customDictContent, size_t dictContentSize,
|
|
@@ -869,13 +857,15 @@ size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity,
|
|
|
869
857
|
size_t hSize;
|
|
870
858
|
#define HBUFFSIZE 256 /* should prove large enough for all entropy headers */
|
|
871
859
|
BYTE header[HBUFFSIZE];
|
|
872
|
-
int const compressionLevel = (params.compressionLevel == 0) ?
|
|
860
|
+
int const compressionLevel = (params.compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT : params.compressionLevel;
|
|
873
861
|
U32 const notificationLevel = params.notificationLevel;
|
|
862
|
+
/* The final dictionary content must be at least as large as the largest repcode */
|
|
863
|
+
size_t const minContentSize = (size_t)ZDICT_maxRep(repStartValue);
|
|
864
|
+
size_t paddingSize;
|
|
874
865
|
|
|
875
866
|
/* check conditions */
|
|
876
867
|
DEBUGLOG(4, "ZDICT_finalizeDictionary");
|
|
877
868
|
if (dictBufferCapacity < dictContentSize) return ERROR(dstSize_tooSmall);
|
|
878
|
-
if (dictContentSize < ZDICT_CONTENTSIZE_MIN) return ERROR(srcSize_wrong);
|
|
879
869
|
if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) return ERROR(dstSize_tooSmall);
|
|
880
870
|
|
|
881
871
|
/* dictionary header */
|
|
@@ -899,12 +889,43 @@ size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity,
|
|
|
899
889
|
hSize += eSize;
|
|
900
890
|
}
|
|
901
891
|
|
|
902
|
-
/*
|
|
903
|
-
if (hSize + dictContentSize > dictBufferCapacity)
|
|
904
|
-
|
|
905
|
-
|
|
906
|
-
|
|
907
|
-
|
|
892
|
+
/* Shrink the content size if it doesn't fit in the buffer */
|
|
893
|
+
if (hSize + dictContentSize > dictBufferCapacity) {
|
|
894
|
+
dictContentSize = dictBufferCapacity - hSize;
|
|
895
|
+
}
|
|
896
|
+
|
|
897
|
+
/* Pad the dictionary content with zeros if it is too small */
|
|
898
|
+
if (dictContentSize < minContentSize) {
|
|
899
|
+
RETURN_ERROR_IF(hSize + minContentSize > dictBufferCapacity, dstSize_tooSmall,
|
|
900
|
+
"dictBufferCapacity too small to fit max repcode");
|
|
901
|
+
paddingSize = minContentSize - dictContentSize;
|
|
902
|
+
} else {
|
|
903
|
+
paddingSize = 0;
|
|
904
|
+
}
|
|
905
|
+
|
|
906
|
+
{
|
|
907
|
+
size_t const dictSize = hSize + paddingSize + dictContentSize;
|
|
908
|
+
|
|
909
|
+
/* The dictionary consists of the header, optional padding, and the content.
|
|
910
|
+
* The padding comes before the content because the "best" position in the
|
|
911
|
+
* dictionary is the last byte.
|
|
912
|
+
*/
|
|
913
|
+
BYTE* const outDictHeader = (BYTE*)dictBuffer;
|
|
914
|
+
BYTE* const outDictPadding = outDictHeader + hSize;
|
|
915
|
+
BYTE* const outDictContent = outDictPadding + paddingSize;
|
|
916
|
+
|
|
917
|
+
assert(dictSize <= dictBufferCapacity);
|
|
918
|
+
assert(outDictContent + dictContentSize == (BYTE*)dictBuffer + dictSize);
|
|
919
|
+
|
|
920
|
+
/* First copy the customDictContent into its final location.
|
|
921
|
+
* `customDictContent` and `dictBuffer` may overlap, so we must
|
|
922
|
+
* do this before any other writes into the output buffer.
|
|
923
|
+
* Then copy the header & padding into the output buffer.
|
|
924
|
+
*/
|
|
925
|
+
memmove(outDictContent, customDictContent, dictContentSize);
|
|
926
|
+
memcpy(outDictHeader, header, hSize);
|
|
927
|
+
memset(outDictPadding, 0, paddingSize);
|
|
928
|
+
|
|
908
929
|
return dictSize;
|
|
909
930
|
}
|
|
910
931
|
}
|
|
@@ -915,7 +936,7 @@ static size_t ZDICT_addEntropyTablesFromBuffer_advanced(
|
|
|
915
936
|
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
|
|
916
937
|
ZDICT_params_t params)
|
|
917
938
|
{
|
|
918
|
-
int const compressionLevel = (params.compressionLevel == 0) ?
|
|
939
|
+
int const compressionLevel = (params.compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT : params.compressionLevel;
|
|
919
940
|
U32 const notificationLevel = params.notificationLevel;
|
|
920
941
|
size_t hSize = 8;
|
|
921
942
|
|
|
@@ -944,16 +965,11 @@ static size_t ZDICT_addEntropyTablesFromBuffer_advanced(
|
|
|
944
965
|
return MIN(dictBufferCapacity, hSize+dictContentSize);
|
|
945
966
|
}
|
|
946
967
|
|
|
947
|
-
/* Hidden declaration for dbio.c */
|
|
948
|
-
size_t ZDICT_trainFromBuffer_unsafe_legacy(
|
|
949
|
-
void* dictBuffer, size_t maxDictSize,
|
|
950
|
-
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
|
|
951
|
-
ZDICT_legacy_params_t params);
|
|
952
968
|
/*! ZDICT_trainFromBuffer_unsafe_legacy() :
|
|
953
|
-
* Warning : `samplesBuffer` must be followed by noisy guard band
|
|
969
|
+
* Warning : `samplesBuffer` must be followed by noisy guard band !!!
|
|
954
970
|
* @return : size of dictionary, or an error code which can be tested with ZDICT_isError()
|
|
955
971
|
*/
|
|
956
|
-
size_t ZDICT_trainFromBuffer_unsafe_legacy(
|
|
972
|
+
static size_t ZDICT_trainFromBuffer_unsafe_legacy(
|
|
957
973
|
void* dictBuffer, size_t maxDictSize,
|
|
958
974
|
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
|
|
959
975
|
ZDICT_legacy_params_t params)
|
|
@@ -1090,8 +1106,8 @@ size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity,
|
|
|
1090
1106
|
memset(¶ms, 0, sizeof(params));
|
|
1091
1107
|
params.d = 8;
|
|
1092
1108
|
params.steps = 4;
|
|
1093
|
-
/*
|
|
1094
|
-
params.zParams.compressionLevel =
|
|
1109
|
+
/* Use default level since no compression level information is available */
|
|
1110
|
+
params.zParams.compressionLevel = ZSTD_CLEVEL_DEFAULT;
|
|
1095
1111
|
#if defined(DEBUGLEVEL) && (DEBUGLEVEL>=1)
|
|
1096
1112
|
params.zParams.notificationLevel = DEBUGLEVEL;
|
|
1097
1113
|
#endif
|