extzstd 0.3.1 → 0.3.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +28 -14
- data/contrib/zstd/CHANGELOG +301 -56
- data/contrib/zstd/CONTRIBUTING.md +169 -72
- data/contrib/zstd/LICENSE +4 -4
- data/contrib/zstd/Makefile +116 -87
- data/contrib/zstd/Package.swift +36 -0
- data/contrib/zstd/README.md +62 -32
- data/contrib/zstd/TESTING.md +2 -3
- data/contrib/zstd/appveyor.yml +52 -136
- data/contrib/zstd/lib/BUCK +5 -7
- data/contrib/zstd/lib/Makefile +225 -222
- data/contrib/zstd/lib/README.md +51 -6
- data/contrib/zstd/lib/common/allocations.h +55 -0
- data/contrib/zstd/lib/common/bits.h +200 -0
- data/contrib/zstd/lib/common/bitstream.h +45 -62
- data/contrib/zstd/lib/common/compiler.h +205 -22
- data/contrib/zstd/lib/common/cpu.h +1 -3
- data/contrib/zstd/lib/common/debug.c +1 -1
- data/contrib/zstd/lib/common/debug.h +12 -19
- data/contrib/zstd/lib/common/entropy_common.c +172 -48
- data/contrib/zstd/lib/common/error_private.c +10 -2
- data/contrib/zstd/lib/common/error_private.h +82 -3
- data/contrib/zstd/lib/common/fse.h +37 -86
- data/contrib/zstd/lib/common/fse_decompress.c +117 -92
- data/contrib/zstd/lib/common/huf.h +99 -166
- data/contrib/zstd/lib/common/mem.h +124 -142
- data/contrib/zstd/lib/common/pool.c +54 -27
- data/contrib/zstd/lib/common/pool.h +10 -4
- data/contrib/zstd/lib/common/portability_macros.h +156 -0
- data/contrib/zstd/lib/common/threading.c +74 -19
- data/contrib/zstd/lib/common/threading.h +5 -10
- data/contrib/zstd/lib/common/xxhash.c +7 -847
- data/contrib/zstd/lib/common/xxhash.h +5568 -167
- data/contrib/zstd/lib/common/zstd_common.c +2 -37
- data/contrib/zstd/lib/common/zstd_deps.h +111 -0
- data/contrib/zstd/lib/common/zstd_internal.h +132 -187
- data/contrib/zstd/lib/common/zstd_trace.h +163 -0
- data/contrib/zstd/lib/compress/clevels.h +134 -0
- data/contrib/zstd/lib/compress/fse_compress.c +83 -157
- data/contrib/zstd/lib/compress/hist.c +27 -29
- data/contrib/zstd/lib/compress/hist.h +2 -2
- data/contrib/zstd/lib/compress/huf_compress.c +916 -279
- data/contrib/zstd/lib/compress/zstd_compress.c +3773 -1019
- data/contrib/zstd/lib/compress/zstd_compress_internal.h +610 -203
- data/contrib/zstd/lib/compress/zstd_compress_literals.c +119 -42
- data/contrib/zstd/lib/compress/zstd_compress_literals.h +16 -6
- data/contrib/zstd/lib/compress/zstd_compress_sequences.c +42 -19
- data/contrib/zstd/lib/compress/zstd_compress_sequences.h +1 -1
- data/contrib/zstd/lib/compress/zstd_compress_superblock.c +49 -317
- data/contrib/zstd/lib/compress/zstd_compress_superblock.h +1 -1
- data/contrib/zstd/lib/compress/zstd_cwksp.h +320 -103
- data/contrib/zstd/lib/compress/zstd_double_fast.c +388 -151
- data/contrib/zstd/lib/compress/zstd_double_fast.h +3 -2
- data/contrib/zstd/lib/compress/zstd_fast.c +729 -265
- data/contrib/zstd/lib/compress/zstd_fast.h +3 -2
- data/contrib/zstd/lib/compress/zstd_lazy.c +1270 -251
- data/contrib/zstd/lib/compress/zstd_lazy.h +61 -1
- data/contrib/zstd/lib/compress/zstd_ldm.c +324 -219
- data/contrib/zstd/lib/compress/zstd_ldm.h +9 -2
- data/contrib/zstd/lib/compress/zstd_ldm_geartab.h +106 -0
- data/contrib/zstd/lib/compress/zstd_opt.c +481 -209
- data/contrib/zstd/lib/compress/zstd_opt.h +1 -1
- data/contrib/zstd/lib/compress/zstdmt_compress.c +181 -457
- data/contrib/zstd/lib/compress/zstdmt_compress.h +34 -113
- data/contrib/zstd/lib/decompress/huf_decompress.c +1199 -565
- data/contrib/zstd/lib/decompress/huf_decompress_amd64.S +576 -0
- data/contrib/zstd/lib/decompress/zstd_ddict.c +12 -12
- data/contrib/zstd/lib/decompress/zstd_ddict.h +2 -2
- data/contrib/zstd/lib/decompress/zstd_decompress.c +627 -157
- data/contrib/zstd/lib/decompress/zstd_decompress_block.c +1086 -326
- data/contrib/zstd/lib/decompress/zstd_decompress_block.h +19 -5
- data/contrib/zstd/lib/decompress/zstd_decompress_internal.h +62 -13
- data/contrib/zstd/lib/deprecated/zbuff.h +1 -1
- data/contrib/zstd/lib/deprecated/zbuff_common.c +1 -1
- data/contrib/zstd/lib/deprecated/zbuff_compress.c +24 -4
- data/contrib/zstd/lib/deprecated/zbuff_decompress.c +3 -1
- data/contrib/zstd/lib/dictBuilder/cover.c +73 -52
- data/contrib/zstd/lib/dictBuilder/cover.h +7 -6
- data/contrib/zstd/lib/dictBuilder/divsufsort.c +1 -1
- data/contrib/zstd/lib/dictBuilder/fastcover.c +44 -35
- data/contrib/zstd/lib/dictBuilder/zdict.c +103 -111
- data/contrib/zstd/lib/legacy/zstd_legacy.h +8 -1
- data/contrib/zstd/lib/legacy/zstd_v01.c +21 -54
- data/contrib/zstd/lib/legacy/zstd_v01.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v02.c +29 -70
- data/contrib/zstd/lib/legacy/zstd_v02.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v03.c +30 -73
- data/contrib/zstd/lib/legacy/zstd_v03.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v04.c +29 -71
- data/contrib/zstd/lib/legacy/zstd_v04.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v05.c +40 -86
- data/contrib/zstd/lib/legacy/zstd_v05.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v06.c +47 -88
- data/contrib/zstd/lib/legacy/zstd_v06.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v07.c +40 -83
- data/contrib/zstd/lib/legacy/zstd_v07.h +1 -1
- data/contrib/zstd/lib/libzstd.mk +214 -0
- data/contrib/zstd/lib/libzstd.pc.in +7 -6
- data/contrib/zstd/lib/module.modulemap +35 -0
- data/contrib/zstd/lib/{dictBuilder/zdict.h → zdict.h} +203 -34
- data/contrib/zstd/lib/zstd.h +1217 -287
- data/contrib/zstd/lib/{common/zstd_errors.h → zstd_errors.h} +28 -8
- data/ext/extconf.rb +7 -6
- data/ext/extzstd.c +19 -10
- data/ext/extzstd.h +6 -0
- data/ext/libzstd_conf.h +0 -1
- data/ext/zstd_decompress_asm.S +1 -0
- data/gemstub.rb +3 -21
- data/lib/extzstd/version.rb +6 -1
- data/lib/extzstd.rb +0 -2
- data/test/test_basic.rb +0 -5
- metadata +18 -6
@@ -1,5 +1,5 @@
|
|
1
1
|
/*
|
2
|
-
* Copyright (c)
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
3
3
|
* All rights reserved.
|
4
4
|
*
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
@@ -23,9 +23,13 @@
|
|
23
23
|
/* Unix Large Files support (>4GB) */
|
24
24
|
#define _FILE_OFFSET_BITS 64
|
25
25
|
#if (defined(__sun__) && (!defined(__LP64__))) /* Sun Solaris 32-bits requires specific definitions */
|
26
|
+
# ifndef _LARGEFILE_SOURCE
|
26
27
|
# define _LARGEFILE_SOURCE
|
28
|
+
# endif
|
27
29
|
#elif ! defined(__LP64__) /* No point defining Large file for 64 bit */
|
30
|
+
# ifndef _LARGEFILE64_SOURCE
|
28
31
|
# define _LARGEFILE64_SOURCE
|
32
|
+
# endif
|
29
33
|
#endif
|
30
34
|
|
31
35
|
|
@@ -37,18 +41,19 @@
|
|
37
41
|
#include <stdio.h> /* fprintf, fopen, ftello64 */
|
38
42
|
#include <time.h> /* clock */
|
39
43
|
|
44
|
+
#ifndef ZDICT_STATIC_LINKING_ONLY
|
45
|
+
# define ZDICT_STATIC_LINKING_ONLY
|
46
|
+
#endif
|
47
|
+
|
40
48
|
#include "../common/mem.h" /* read */
|
41
49
|
#include "../common/fse.h" /* FSE_normalizeCount, FSE_writeNCount */
|
42
|
-
#define HUF_STATIC_LINKING_ONLY
|
43
50
|
#include "../common/huf.h" /* HUF_buildCTable, HUF_writeCTable */
|
44
51
|
#include "../common/zstd_internal.h" /* includes zstd.h */
|
45
52
|
#include "../common/xxhash.h" /* XXH64 */
|
46
|
-
#include "divsufsort.h"
|
47
|
-
#ifndef ZDICT_STATIC_LINKING_ONLY
|
48
|
-
# define ZDICT_STATIC_LINKING_ONLY
|
49
|
-
#endif
|
50
|
-
#include "zdict.h"
|
51
53
|
#include "../compress/zstd_compress_internal.h" /* ZSTD_loadCEntropy() */
|
54
|
+
#include "../zdict.h"
|
55
|
+
#include "divsufsort.h"
|
56
|
+
#include "../common/bits.h" /* ZSTD_NbCommonBytes */
|
52
57
|
|
53
58
|
|
54
59
|
/*-*************************************
|
@@ -62,14 +67,15 @@
|
|
62
67
|
|
63
68
|
#define NOISELENGTH 32
|
64
69
|
|
65
|
-
static const int g_compressionLevel_default = 3;
|
66
70
|
static const U32 g_selectivity_default = 9;
|
67
71
|
|
68
72
|
|
69
73
|
/*-*************************************
|
70
74
|
* Console display
|
71
75
|
***************************************/
|
76
|
+
#undef DISPLAY
|
72
77
|
#define DISPLAY(...) { fprintf(stderr, __VA_ARGS__); fflush( stderr ); }
|
78
|
+
#undef DISPLAYLEVEL
|
73
79
|
#define DISPLAYLEVEL(l, ...) if (notificationLevel>=l) { DISPLAY(__VA_ARGS__); } /* 0 : no display; 1: errors; 2: default; 3: details; 4: debug */
|
74
80
|
|
75
81
|
static clock_t ZDICT_clockSpan(clock_t nPrevious) { return clock() - nPrevious; }
|
@@ -105,20 +111,17 @@ size_t ZDICT_getDictHeaderSize(const void* dictBuffer, size_t dictSize)
|
|
105
111
|
size_t headerSize;
|
106
112
|
if (dictSize <= 8 || MEM_readLE32(dictBuffer) != ZSTD_MAGIC_DICTIONARY) return ERROR(dictionary_corrupted);
|
107
113
|
|
108
|
-
{
|
109
|
-
ZSTD_compressedBlockState_t* bs = (ZSTD_compressedBlockState_t*)malloc(sizeof(ZSTD_compressedBlockState_t));
|
114
|
+
{ ZSTD_compressedBlockState_t* bs = (ZSTD_compressedBlockState_t*)malloc(sizeof(ZSTD_compressedBlockState_t));
|
110
115
|
U32* wksp = (U32*)malloc(HUF_WORKSPACE_SIZE);
|
111
|
-
|
112
|
-
if (!bs || !wksp || !offcodeNCount) {
|
116
|
+
if (!bs || !wksp) {
|
113
117
|
headerSize = ERROR(memory_allocation);
|
114
118
|
} else {
|
115
119
|
ZSTD_reset_compressedBlockState(bs);
|
116
|
-
headerSize = ZSTD_loadCEntropy(bs, wksp,
|
120
|
+
headerSize = ZSTD_loadCEntropy(bs, wksp, dictBuffer, dictSize);
|
117
121
|
}
|
118
122
|
|
119
123
|
free(bs);
|
120
124
|
free(wksp);
|
121
|
-
free(offcodeNCount);
|
122
125
|
}
|
123
126
|
|
124
127
|
return headerSize;
|
@@ -127,65 +130,6 @@ size_t ZDICT_getDictHeaderSize(const void* dictBuffer, size_t dictSize)
|
|
127
130
|
/*-********************************************************
|
128
131
|
* Dictionary training functions
|
129
132
|
**********************************************************/
|
130
|
-
static unsigned ZDICT_NbCommonBytes (size_t val)
|
131
|
-
{
|
132
|
-
if (MEM_isLittleEndian()) {
|
133
|
-
if (MEM_64bits()) {
|
134
|
-
# if defined(_MSC_VER) && defined(_WIN64)
|
135
|
-
unsigned long r = 0;
|
136
|
-
_BitScanForward64( &r, (U64)val );
|
137
|
-
return (unsigned)(r>>3);
|
138
|
-
# elif defined(__GNUC__) && (__GNUC__ >= 3)
|
139
|
-
return (__builtin_ctzll((U64)val) >> 3);
|
140
|
-
# else
|
141
|
-
static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 };
|
142
|
-
return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58];
|
143
|
-
# endif
|
144
|
-
} else { /* 32 bits */
|
145
|
-
# if defined(_MSC_VER)
|
146
|
-
unsigned long r=0;
|
147
|
-
_BitScanForward( &r, (U32)val );
|
148
|
-
return (unsigned)(r>>3);
|
149
|
-
# elif defined(__GNUC__) && (__GNUC__ >= 3)
|
150
|
-
return (__builtin_ctz((U32)val) >> 3);
|
151
|
-
# else
|
152
|
-
static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 };
|
153
|
-
return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
|
154
|
-
# endif
|
155
|
-
}
|
156
|
-
} else { /* Big Endian CPU */
|
157
|
-
if (MEM_64bits()) {
|
158
|
-
# if defined(_MSC_VER) && defined(_WIN64)
|
159
|
-
unsigned long r = 0;
|
160
|
-
_BitScanReverse64( &r, val );
|
161
|
-
return (unsigned)(r>>3);
|
162
|
-
# elif defined(__GNUC__) && (__GNUC__ >= 3)
|
163
|
-
return (__builtin_clzll(val) >> 3);
|
164
|
-
# else
|
165
|
-
unsigned r;
|
166
|
-
const unsigned n32 = sizeof(size_t)*4; /* calculate this way due to compiler complaining in 32-bits mode */
|
167
|
-
if (!(val>>n32)) { r=4; } else { r=0; val>>=n32; }
|
168
|
-
if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
|
169
|
-
r += (!val);
|
170
|
-
return r;
|
171
|
-
# endif
|
172
|
-
} else { /* 32 bits */
|
173
|
-
# if defined(_MSC_VER)
|
174
|
-
unsigned long r = 0;
|
175
|
-
_BitScanReverse( &r, (unsigned long)val );
|
176
|
-
return (unsigned)(r>>3);
|
177
|
-
# elif defined(__GNUC__) && (__GNUC__ >= 3)
|
178
|
-
return (__builtin_clz((U32)val) >> 3);
|
179
|
-
# else
|
180
|
-
unsigned r;
|
181
|
-
if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; }
|
182
|
-
r += (!val);
|
183
|
-
return r;
|
184
|
-
# endif
|
185
|
-
} }
|
186
|
-
}
|
187
|
-
|
188
|
-
|
189
133
|
/*! ZDICT_count() :
|
190
134
|
Count the nb of common bytes between 2 pointers.
|
191
135
|
Note : this function presumes end of buffer followed by noisy guard band.
|
@@ -200,7 +144,7 @@ static size_t ZDICT_count(const void* pIn, const void* pMatch)
|
|
200
144
|
pMatch = (const char*)pMatch+sizeof(size_t);
|
201
145
|
continue;
|
202
146
|
}
|
203
|
-
pIn = (const char*)pIn+
|
147
|
+
pIn = (const char*)pIn+ZSTD_NbCommonBytes(diff);
|
204
148
|
return (size_t)((const char*)pIn - pStart);
|
205
149
|
}
|
206
150
|
}
|
@@ -232,7 +176,7 @@ static dictItem ZDICT_analyzePos(
|
|
232
176
|
U32 savings[LLIMIT] = {0};
|
233
177
|
const BYTE* b = (const BYTE*)buffer;
|
234
178
|
size_t maxLength = LLIMIT;
|
235
|
-
size_t pos = suffix[start];
|
179
|
+
size_t pos = (size_t)suffix[start];
|
236
180
|
U32 end = start;
|
237
181
|
dictItem solution;
|
238
182
|
|
@@ -366,7 +310,7 @@ static dictItem ZDICT_analyzePos(
|
|
366
310
|
savings[i] = savings[i-1] + (lengthList[i] * (i-3));
|
367
311
|
|
368
312
|
DISPLAYLEVEL(4, "Selected dict at position %u, of length %u : saves %u (ratio: %.2f) \n",
|
369
|
-
(unsigned)pos, (unsigned)maxLength, (unsigned)savings[maxLength], (double)savings[maxLength] / maxLength);
|
313
|
+
(unsigned)pos, (unsigned)maxLength, (unsigned)savings[maxLength], (double)savings[maxLength] / (double)maxLength);
|
370
314
|
|
371
315
|
solution.pos = (U32)pos;
|
372
316
|
solution.length = (U32)maxLength;
|
@@ -376,7 +320,7 @@ static dictItem ZDICT_analyzePos(
|
|
376
320
|
{ U32 id;
|
377
321
|
for (id=start; id<end; id++) {
|
378
322
|
U32 p, pEnd, length;
|
379
|
-
U32 const testedPos = suffix[id];
|
323
|
+
U32 const testedPos = (U32)suffix[id];
|
380
324
|
if (testedPos == pos)
|
381
325
|
length = solution.length;
|
382
326
|
else {
|
@@ -428,7 +372,7 @@ static U32 ZDICT_tryMerge(dictItem* table, dictItem elt, U32 eltNbToSkip, const
|
|
428
372
|
elt = table[u];
|
429
373
|
/* sort : improve rank */
|
430
374
|
while ((u>1) && (table[u-1].savings < elt.savings))
|
431
|
-
|
375
|
+
table[u] = table[u-1], u--;
|
432
376
|
table[u] = elt;
|
433
377
|
return u;
|
434
378
|
} }
|
@@ -439,7 +383,7 @@ static U32 ZDICT_tryMerge(dictItem* table, dictItem elt, U32 eltNbToSkip, const
|
|
439
383
|
|
440
384
|
if ((table[u].pos + table[u].length >= elt.pos) && (table[u].pos < elt.pos)) { /* overlap, existing < new */
|
441
385
|
/* append */
|
442
|
-
int const addedLength = (int)eltEnd - (table[u].pos + table[u].length);
|
386
|
+
int const addedLength = (int)eltEnd - (int)(table[u].pos + table[u].length);
|
443
387
|
table[u].savings += elt.length / 8; /* rough approx bonus */
|
444
388
|
if (addedLength > 0) { /* otherwise, elt fully included into existing */
|
445
389
|
table[u].length += addedLength;
|
@@ -532,6 +476,7 @@ static size_t ZDICT_trainBuffer_legacy(dictItem* dictList, U32 dictListSize,
|
|
532
476
|
clock_t displayClock = 0;
|
533
477
|
clock_t const refreshRate = CLOCKS_PER_SEC * 3 / 10;
|
534
478
|
|
479
|
+
# undef DISPLAYUPDATE
|
535
480
|
# define DISPLAYUPDATE(l, ...) if (notificationLevel>=l) { \
|
536
481
|
if (ZDICT_clockSpan(displayClock) > refreshRate) \
|
537
482
|
{ displayClock = clock(); DISPLAY(__VA_ARGS__); \
|
@@ -578,7 +523,7 @@ static size_t ZDICT_trainBuffer_legacy(dictItem* dictList, U32 dictListSize,
|
|
578
523
|
if (solution.length==0) { cursor++; continue; }
|
579
524
|
ZDICT_insertDictItem(dictList, dictListSize, solution, buffer);
|
580
525
|
cursor += solution.length;
|
581
|
-
DISPLAYUPDATE(2, "\r%4.2f %% \r", (double)cursor / bufferSize * 100);
|
526
|
+
DISPLAYUPDATE(2, "\r%4.2f %% \r", (double)cursor / (double)bufferSize * 100.0);
|
582
527
|
} }
|
583
528
|
|
584
529
|
_cleanup:
|
@@ -621,11 +566,11 @@ static void ZDICT_countEStats(EStats_ress_t esr, const ZSTD_parameters* params,
|
|
621
566
|
size_t cSize;
|
622
567
|
|
623
568
|
if (srcSize > blockSizeMax) srcSize = blockSizeMax; /* protection vs large samples */
|
624
|
-
{ size_t const errorCode =
|
569
|
+
{ size_t const errorCode = ZSTD_compressBegin_usingCDict_deprecated(esr.zc, esr.dict);
|
625
570
|
if (ZSTD_isError(errorCode)) { DISPLAYLEVEL(1, "warning : ZSTD_compressBegin_usingCDict failed \n"); return; }
|
626
571
|
|
627
572
|
}
|
628
|
-
cSize =
|
573
|
+
cSize = ZSTD_compressBlock_deprecated(esr.zc, esr.workPlace, ZSTD_BLOCKSIZE_MAX, src, srcSize);
|
629
574
|
if (ZSTD_isError(cSize)) { DISPLAYLEVEL(3, "warning : could not compress sample size %u \n", (unsigned)srcSize); return; }
|
630
575
|
|
631
576
|
if (cSize) { /* if == 0; block is not compressible */
|
@@ -658,8 +603,8 @@ static void ZDICT_countEStats(EStats_ress_t esr, const ZSTD_parameters* params,
|
|
658
603
|
|
659
604
|
if (nbSeq >= 2) { /* rep offsets */
|
660
605
|
const seqDef* const seq = seqStorePtr->sequencesStart;
|
661
|
-
U32 offset1 = seq[0].
|
662
|
-
U32 offset2 = seq[1].
|
606
|
+
U32 offset1 = seq[0].offBase - ZSTD_REP_NUM;
|
607
|
+
U32 offset2 = seq[1].offBase - ZSTD_REP_NUM;
|
663
608
|
if (offset1 >= MAXREPOFFSET) offset1 = 0;
|
664
609
|
if (offset2 >= MAXREPOFFSET) offset2 = 0;
|
665
610
|
repOffsets[offset1] += 3;
|
@@ -706,7 +651,7 @@ static void ZDICT_flatLit(unsigned* countLit)
|
|
706
651
|
|
707
652
|
#define OFFCODE_MAX 30 /* only applicable to first block */
|
708
653
|
static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
709
|
-
|
654
|
+
int compressionLevel,
|
710
655
|
const void* srcBuffer, const size_t* fileSizes, unsigned nbFiles,
|
711
656
|
const void* dictBuffer, size_t dictBufferSize,
|
712
657
|
unsigned notificationLevel)
|
@@ -730,6 +675,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|
730
675
|
size_t const totalSrcSize = ZDICT_totalSampleSize(fileSizes, nbFiles);
|
731
676
|
size_t const averageSampleSize = totalSrcSize / (nbFiles + !nbFiles);
|
732
677
|
BYTE* dstPtr = (BYTE*)dstBuffer;
|
678
|
+
U32 wksp[HUF_CTABLE_WORKSPACE_SIZE_U32];
|
733
679
|
|
734
680
|
/* init */
|
735
681
|
DEBUGLOG(4, "ZDICT_analyzeEntropy");
|
@@ -741,7 +687,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|
741
687
|
memset(repOffset, 0, sizeof(repOffset));
|
742
688
|
repOffset[1] = repOffset[4] = repOffset[8] = 1;
|
743
689
|
memset(bestRepOffset, 0, sizeof(bestRepOffset));
|
744
|
-
if (compressionLevel==0) compressionLevel =
|
690
|
+
if (compressionLevel==0) compressionLevel = ZSTD_CLEVEL_DEFAULT;
|
745
691
|
params = ZSTD_getParams(compressionLevel, averageSampleSize, dictBufferSize);
|
746
692
|
|
747
693
|
esr.dict = ZSTD_createCDict_advanced(dictBuffer, dictBufferSize, ZSTD_dlm_byRef, ZSTD_dct_rawContent, params.cParams, ZSTD_defaultCMem);
|
@@ -762,8 +708,15 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|
762
708
|
pos += fileSizes[u];
|
763
709
|
}
|
764
710
|
|
711
|
+
if (notificationLevel >= 4) {
|
712
|
+
/* writeStats */
|
713
|
+
DISPLAYLEVEL(4, "Offset Code Frequencies : \n");
|
714
|
+
for (u=0; u<=offcodeMax; u++) {
|
715
|
+
DISPLAYLEVEL(4, "%2u :%7u \n", u, offcodeCount[u]);
|
716
|
+
} }
|
717
|
+
|
765
718
|
/* analyze, build stats, starting with literals */
|
766
|
-
{ size_t maxNbBits =
|
719
|
+
{ size_t maxNbBits = HUF_buildCTable_wksp(hufTable, countLit, 255, huffLog, wksp, sizeof(wksp));
|
767
720
|
if (HUF_isError(maxNbBits)) {
|
768
721
|
eSize = maxNbBits;
|
769
722
|
DISPLAYLEVEL(1, " HUF_buildCTable error \n");
|
@@ -772,7 +725,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|
772
725
|
if (maxNbBits==8) { /* not compressible : will fail on HUF_writeCTable() */
|
773
726
|
DISPLAYLEVEL(2, "warning : pathological dataset : literals are not compressible : samples are noisy or too regular \n");
|
774
727
|
ZDICT_flatLit(countLit); /* replace distribution by a fake "mostly flat but still compressible" distribution, that HUF_writeCTable() can encode */
|
775
|
-
maxNbBits =
|
728
|
+
maxNbBits = HUF_buildCTable_wksp(hufTable, countLit, 255, huffLog, wksp, sizeof(wksp));
|
776
729
|
assert(maxNbBits==9);
|
777
730
|
}
|
778
731
|
huffLog = (U32)maxNbBits;
|
@@ -786,7 +739,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|
786
739
|
/* note : the result of this phase should be used to better appreciate the impact on statistics */
|
787
740
|
|
788
741
|
total=0; for (u=0; u<=offcodeMax; u++) total+=offcodeCount[u];
|
789
|
-
errorCode = FSE_normalizeCount(offcodeNCount, Offlog, offcodeCount, total, offcodeMax);
|
742
|
+
errorCode = FSE_normalizeCount(offcodeNCount, Offlog, offcodeCount, total, offcodeMax, /* useLowProbCount */ 1);
|
790
743
|
if (FSE_isError(errorCode)) {
|
791
744
|
eSize = errorCode;
|
792
745
|
DISPLAYLEVEL(1, "FSE_normalizeCount error with offcodeCount \n");
|
@@ -795,7 +748,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|
795
748
|
Offlog = (U32)errorCode;
|
796
749
|
|
797
750
|
total=0; for (u=0; u<=MaxML; u++) total+=matchLengthCount[u];
|
798
|
-
errorCode = FSE_normalizeCount(matchLengthNCount, mlLog, matchLengthCount, total, MaxML);
|
751
|
+
errorCode = FSE_normalizeCount(matchLengthNCount, mlLog, matchLengthCount, total, MaxML, /* useLowProbCount */ 1);
|
799
752
|
if (FSE_isError(errorCode)) {
|
800
753
|
eSize = errorCode;
|
801
754
|
DISPLAYLEVEL(1, "FSE_normalizeCount error with matchLengthCount \n");
|
@@ -804,7 +757,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|
804
757
|
mlLog = (U32)errorCode;
|
805
758
|
|
806
759
|
total=0; for (u=0; u<=MaxLL; u++) total+=litLengthCount[u];
|
807
|
-
errorCode = FSE_normalizeCount(litLengthNCount, llLog, litLengthCount, total, MaxLL);
|
760
|
+
errorCode = FSE_normalizeCount(litLengthNCount, llLog, litLengthCount, total, MaxLL, /* useLowProbCount */ 1);
|
808
761
|
if (FSE_isError(errorCode)) {
|
809
762
|
eSize = errorCode;
|
810
763
|
DISPLAYLEVEL(1, "FSE_normalizeCount error with litLengthCount \n");
|
@@ -813,7 +766,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|
813
766
|
llLog = (U32)errorCode;
|
814
767
|
|
815
768
|
/* write result to buffer */
|
816
|
-
{ size_t const hhSize =
|
769
|
+
{ size_t const hhSize = HUF_writeCTable_wksp(dstPtr, maxDstSize, hufTable, 255, huffLog, wksp, sizeof(wksp));
|
817
770
|
if (HUF_isError(hhSize)) {
|
818
771
|
eSize = hhSize;
|
819
772
|
DISPLAYLEVEL(1, "HUF_writeCTable error \n");
|
@@ -868,7 +821,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|
868
821
|
MEM_writeLE32(dstPtr+8, bestRepOffset[2].offset);
|
869
822
|
#else
|
870
823
|
/* at this stage, we don't use the result of "most common first offset",
|
871
|
-
|
824
|
+
* as the impact of statistics is not properly evaluated */
|
872
825
|
MEM_writeLE32(dstPtr+0, repStartValue[0]);
|
873
826
|
MEM_writeLE32(dstPtr+4, repStartValue[1]);
|
874
827
|
MEM_writeLE32(dstPtr+8, repStartValue[2]);
|
@@ -884,6 +837,17 @@ _cleanup:
|
|
884
837
|
}
|
885
838
|
|
886
839
|
|
840
|
+
/**
|
841
|
+
* @returns the maximum repcode value
|
842
|
+
*/
|
843
|
+
static U32 ZDICT_maxRep(U32 const reps[ZSTD_REP_NUM])
|
844
|
+
{
|
845
|
+
U32 maxRep = reps[0];
|
846
|
+
int r;
|
847
|
+
for (r = 1; r < ZSTD_REP_NUM; ++r)
|
848
|
+
maxRep = MAX(maxRep, reps[r]);
|
849
|
+
return maxRep;
|
850
|
+
}
|
887
851
|
|
888
852
|
size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity,
|
889
853
|
const void* customDictContent, size_t dictContentSize,
|
@@ -893,13 +857,15 @@ size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity,
|
|
893
857
|
size_t hSize;
|
894
858
|
#define HBUFFSIZE 256 /* should prove large enough for all entropy headers */
|
895
859
|
BYTE header[HBUFFSIZE];
|
896
|
-
int const compressionLevel = (params.compressionLevel == 0) ?
|
860
|
+
int const compressionLevel = (params.compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT : params.compressionLevel;
|
897
861
|
U32 const notificationLevel = params.notificationLevel;
|
862
|
+
/* The final dictionary content must be at least as large as the largest repcode */
|
863
|
+
size_t const minContentSize = (size_t)ZDICT_maxRep(repStartValue);
|
864
|
+
size_t paddingSize;
|
898
865
|
|
899
866
|
/* check conditions */
|
900
867
|
DEBUGLOG(4, "ZDICT_finalizeDictionary");
|
901
868
|
if (dictBufferCapacity < dictContentSize) return ERROR(dstSize_tooSmall);
|
902
|
-
if (dictContentSize < ZDICT_CONTENTSIZE_MIN) return ERROR(srcSize_wrong);
|
903
869
|
if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) return ERROR(dstSize_tooSmall);
|
904
870
|
|
905
871
|
/* dictionary header */
|
@@ -923,12 +889,43 @@ size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity,
|
|
923
889
|
hSize += eSize;
|
924
890
|
}
|
925
891
|
|
926
|
-
/*
|
927
|
-
if (hSize + dictContentSize > dictBufferCapacity)
|
928
|
-
|
929
|
-
|
930
|
-
|
931
|
-
|
892
|
+
/* Shrink the content size if it doesn't fit in the buffer */
|
893
|
+
if (hSize + dictContentSize > dictBufferCapacity) {
|
894
|
+
dictContentSize = dictBufferCapacity - hSize;
|
895
|
+
}
|
896
|
+
|
897
|
+
/* Pad the dictionary content with zeros if it is too small */
|
898
|
+
if (dictContentSize < minContentSize) {
|
899
|
+
RETURN_ERROR_IF(hSize + minContentSize > dictBufferCapacity, dstSize_tooSmall,
|
900
|
+
"dictBufferCapacity too small to fit max repcode");
|
901
|
+
paddingSize = minContentSize - dictContentSize;
|
902
|
+
} else {
|
903
|
+
paddingSize = 0;
|
904
|
+
}
|
905
|
+
|
906
|
+
{
|
907
|
+
size_t const dictSize = hSize + paddingSize + dictContentSize;
|
908
|
+
|
909
|
+
/* The dictionary consists of the header, optional padding, and the content.
|
910
|
+
* The padding comes before the content because the "best" position in the
|
911
|
+
* dictionary is the last byte.
|
912
|
+
*/
|
913
|
+
BYTE* const outDictHeader = (BYTE*)dictBuffer;
|
914
|
+
BYTE* const outDictPadding = outDictHeader + hSize;
|
915
|
+
BYTE* const outDictContent = outDictPadding + paddingSize;
|
916
|
+
|
917
|
+
assert(dictSize <= dictBufferCapacity);
|
918
|
+
assert(outDictContent + dictContentSize == (BYTE*)dictBuffer + dictSize);
|
919
|
+
|
920
|
+
/* First copy the customDictContent into its final location.
|
921
|
+
* `customDictContent` and `dictBuffer` may overlap, so we must
|
922
|
+
* do this before any other writes into the output buffer.
|
923
|
+
* Then copy the header & padding into the output buffer.
|
924
|
+
*/
|
925
|
+
memmove(outDictContent, customDictContent, dictContentSize);
|
926
|
+
memcpy(outDictHeader, header, hSize);
|
927
|
+
memset(outDictPadding, 0, paddingSize);
|
928
|
+
|
932
929
|
return dictSize;
|
933
930
|
}
|
934
931
|
}
|
@@ -939,7 +936,7 @@ static size_t ZDICT_addEntropyTablesFromBuffer_advanced(
|
|
939
936
|
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
|
940
937
|
ZDICT_params_t params)
|
941
938
|
{
|
942
|
-
int const compressionLevel = (params.compressionLevel == 0) ?
|
939
|
+
int const compressionLevel = (params.compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT : params.compressionLevel;
|
943
940
|
U32 const notificationLevel = params.notificationLevel;
|
944
941
|
size_t hSize = 8;
|
945
942
|
|
@@ -968,16 +965,11 @@ static size_t ZDICT_addEntropyTablesFromBuffer_advanced(
|
|
968
965
|
return MIN(dictBufferCapacity, hSize+dictContentSize);
|
969
966
|
}
|
970
967
|
|
971
|
-
/* Hidden declaration for dbio.c */
|
972
|
-
size_t ZDICT_trainFromBuffer_unsafe_legacy(
|
973
|
-
void* dictBuffer, size_t maxDictSize,
|
974
|
-
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
|
975
|
-
ZDICT_legacy_params_t params);
|
976
968
|
/*! ZDICT_trainFromBuffer_unsafe_legacy() :
|
977
|
-
* Warning : `samplesBuffer` must be followed by noisy guard band
|
969
|
+
* Warning : `samplesBuffer` must be followed by noisy guard band !!!
|
978
970
|
* @return : size of dictionary, or an error code which can be tested with ZDICT_isError()
|
979
971
|
*/
|
980
|
-
size_t ZDICT_trainFromBuffer_unsafe_legacy(
|
972
|
+
static size_t ZDICT_trainFromBuffer_unsafe_legacy(
|
981
973
|
void* dictBuffer, size_t maxDictSize,
|
982
974
|
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
|
983
975
|
ZDICT_legacy_params_t params)
|
@@ -1114,8 +1106,8 @@ size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity,
|
|
1114
1106
|
memset(¶ms, 0, sizeof(params));
|
1115
1107
|
params.d = 8;
|
1116
1108
|
params.steps = 4;
|
1117
|
-
/*
|
1118
|
-
params.zParams.compressionLevel =
|
1109
|
+
/* Use default level since no compression level information is available */
|
1110
|
+
params.zParams.compressionLevel = ZSTD_CLEVEL_DEFAULT;
|
1119
1111
|
#if defined(DEBUGLEVEL) && (DEBUGLEVEL>=1)
|
1120
1112
|
params.zParams.notificationLevel = DEBUGLEVEL;
|
1121
1113
|
#endif
|
@@ -1,5 +1,5 @@
|
|
1
1
|
/*
|
2
|
-
* Copyright (c)
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
3
3
|
* All rights reserved.
|
4
4
|
*
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
@@ -242,6 +242,13 @@ MEM_STATIC ZSTD_frameSizeInfo ZSTD_findFrameSizeInfoLegacy(const void *src, size
|
|
242
242
|
frameSizeInfo.compressedSize = ERROR(srcSize_wrong);
|
243
243
|
frameSizeInfo.decompressedBound = ZSTD_CONTENTSIZE_ERROR;
|
244
244
|
}
|
245
|
+
/* In all cases, decompressedBound == nbBlocks * ZSTD_BLOCKSIZE_MAX.
|
246
|
+
* So we can compute nbBlocks without having to change every function.
|
247
|
+
*/
|
248
|
+
if (frameSizeInfo.decompressedBound != ZSTD_CONTENTSIZE_ERROR) {
|
249
|
+
assert((frameSizeInfo.decompressedBound & (ZSTD_BLOCKSIZE_MAX - 1)) == 0);
|
250
|
+
frameSizeInfo.nbBlocks = (size_t)(frameSizeInfo.decompressedBound / ZSTD_BLOCKSIZE_MAX);
|
251
|
+
}
|
245
252
|
return frameSizeInfo;
|
246
253
|
}
|
247
254
|
|
@@ -1,5 +1,5 @@
|
|
1
1
|
/*
|
2
|
-
* Copyright (c)
|
2
|
+
* Copyright (c) Yann Collet, Meta Platforms, Inc. and affiliates.
|
3
3
|
* All rights reserved.
|
4
4
|
*
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
@@ -190,28 +190,6 @@ typedef signed long long S64;
|
|
190
190
|
/****************************************************************
|
191
191
|
* Memory I/O
|
192
192
|
*****************************************************************/
|
193
|
-
/* FSE_FORCE_MEMORY_ACCESS
|
194
|
-
* By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
|
195
|
-
* Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
|
196
|
-
* The below switch allow to select different access method for improved performance.
|
197
|
-
* Method 0 (default) : use `memcpy()`. Safe and portable.
|
198
|
-
* Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable).
|
199
|
-
* This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
|
200
|
-
* Method 2 : direct access. This method is portable but violate C standard.
|
201
|
-
* It can generate buggy code on targets generating assembly depending on alignment.
|
202
|
-
* But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6)
|
203
|
-
* See http://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details.
|
204
|
-
* Prefer these methods in priority order (0 > 1 > 2)
|
205
|
-
*/
|
206
|
-
#ifndef FSE_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */
|
207
|
-
# if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
|
208
|
-
# define FSE_FORCE_MEMORY_ACCESS 2
|
209
|
-
# elif (defined(__INTEL_COMPILER) && !defined(WIN32)) || \
|
210
|
-
(defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) ))
|
211
|
-
# define FSE_FORCE_MEMORY_ACCESS 1
|
212
|
-
# endif
|
213
|
-
#endif
|
214
|
-
|
215
193
|
|
216
194
|
static unsigned FSE_32bits(void)
|
217
195
|
{
|
@@ -224,24 +202,6 @@ static unsigned FSE_isLittleEndian(void)
|
|
224
202
|
return one.c[0];
|
225
203
|
}
|
226
204
|
|
227
|
-
#if defined(FSE_FORCE_MEMORY_ACCESS) && (FSE_FORCE_MEMORY_ACCESS==2)
|
228
|
-
|
229
|
-
static U16 FSE_read16(const void* memPtr) { return *(const U16*) memPtr; }
|
230
|
-
static U32 FSE_read32(const void* memPtr) { return *(const U32*) memPtr; }
|
231
|
-
static U64 FSE_read64(const void* memPtr) { return *(const U64*) memPtr; }
|
232
|
-
|
233
|
-
#elif defined(FSE_FORCE_MEMORY_ACCESS) && (FSE_FORCE_MEMORY_ACCESS==1)
|
234
|
-
|
235
|
-
/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
|
236
|
-
/* currently only defined for gcc and icc */
|
237
|
-
typedef union { U16 u16; U32 u32; U64 u64; } __attribute__((packed)) unalign;
|
238
|
-
|
239
|
-
static U16 FSE_read16(const void* ptr) { return ((const unalign*)ptr)->u16; }
|
240
|
-
static U32 FSE_read32(const void* ptr) { return ((const unalign*)ptr)->u32; }
|
241
|
-
static U64 FSE_read64(const void* ptr) { return ((const unalign*)ptr)->u64; }
|
242
|
-
|
243
|
-
#else
|
244
|
-
|
245
205
|
static U16 FSE_read16(const void* memPtr)
|
246
206
|
{
|
247
207
|
U16 val; memcpy(&val, memPtr, sizeof(val)); return val;
|
@@ -257,8 +217,6 @@ static U64 FSE_read64(const void* memPtr)
|
|
257
217
|
U64 val; memcpy(&val, memPtr, sizeof(val)); return val;
|
258
218
|
}
|
259
219
|
|
260
|
-
#endif /* FSE_FORCE_MEMORY_ACCESS */
|
261
|
-
|
262
220
|
static U16 FSE_readLE16(const void* memPtr)
|
263
221
|
{
|
264
222
|
if (FSE_isLittleEndian())
|
@@ -343,8 +301,7 @@ FORCE_INLINE unsigned FSE_highbit32 (U32 val)
|
|
343
301
|
{
|
344
302
|
# if defined(_MSC_VER) /* Visual */
|
345
303
|
unsigned long r;
|
346
|
-
_BitScanReverse
|
347
|
-
return (unsigned) r;
|
304
|
+
return _BitScanReverse(&r, val) ? (unsigned)r : 0;
|
348
305
|
# elif defined(__GNUC__) && (GCC_VERSION >= 304) /* GCC Intrinsic */
|
349
306
|
return __builtin_clz (val) ^ 31;
|
350
307
|
# else /* Software version */
|
@@ -1194,7 +1151,7 @@ static size_t HUF_decompress (void* dst, size_t maxDstSize, const void* cSrc, si
|
|
1194
1151
|
zstd - standard compression library
|
1195
1152
|
Copyright (C) 2014-2015, Yann Collet.
|
1196
1153
|
|
1197
|
-
BSD 2-Clause License (
|
1154
|
+
BSD 2-Clause License (https://opensource.org/licenses/bsd-license.php)
|
1198
1155
|
|
1199
1156
|
Redistribution and use in source and binary forms, with or without
|
1200
1157
|
modification, are permitted provided that the following conditions are
|
@@ -1280,7 +1237,11 @@ static size_t HUF_decompress (void* dst, size_t maxDstSize, const void* cSrc, si
|
|
1280
1237
|
* Basic Types
|
1281
1238
|
*********************************************************/
|
1282
1239
|
#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */
|
1283
|
-
#
|
1240
|
+
# if defined(_AIX)
|
1241
|
+
# include <inttypes.h>
|
1242
|
+
# else
|
1243
|
+
# include <stdint.h> /* intptr_t */
|
1244
|
+
# endif
|
1284
1245
|
typedef uint8_t BYTE;
|
1285
1246
|
typedef uint16_t U16;
|
1286
1247
|
typedef int16_t S16;
|
@@ -1759,20 +1720,26 @@ static size_t ZSTD_execSequence(BYTE* op,
|
|
1759
1720
|
static const int dec32table[] = {0, 1, 2, 1, 4, 4, 4, 4}; /* added */
|
1760
1721
|
static const int dec64table[] = {8, 8, 8, 7, 8, 9,10,11}; /* subtracted */
|
1761
1722
|
const BYTE* const ostart = op;
|
1723
|
+
BYTE* const oLitEnd = op + sequence.litLength;
|
1762
1724
|
const size_t litLength = sequence.litLength;
|
1763
1725
|
BYTE* const endMatch = op + litLength + sequence.matchLength; /* risk : address space overflow (32-bits) */
|
1764
1726
|
const BYTE* const litEnd = *litPtr + litLength;
|
1765
1727
|
|
1766
|
-
/*
|
1728
|
+
/* checks */
|
1729
|
+
size_t const seqLength = sequence.litLength + sequence.matchLength;
|
1730
|
+
|
1731
|
+
if (seqLength > (size_t)(oend - op)) return ERROR(dstSize_tooSmall);
|
1732
|
+
if (sequence.litLength > (size_t)(litLimit - *litPtr)) return ERROR(corruption_detected);
|
1733
|
+
/* Now we know there are no overflow in literal nor match lengths, can use pointer checks */
|
1734
|
+
if (sequence.offset > (U32)(oLitEnd - base)) return ERROR(corruption_detected);
|
1735
|
+
|
1767
1736
|
if (endMatch > oend) return ERROR(dstSize_tooSmall); /* overwrite beyond dst buffer */
|
1768
|
-
if (litEnd > litLimit) return ERROR(corruption_detected);
|
1769
|
-
if (sequence.matchLength > (size_t)(*litPtr-op))
|
1737
|
+
if (litEnd > litLimit) return ERROR(corruption_detected); /* overRead beyond lit buffer */
|
1738
|
+
if (sequence.matchLength > (size_t)(*litPtr-op)) return ERROR(dstSize_tooSmall); /* overwrite literal segment */
|
1770
1739
|
|
1771
1740
|
/* copy Literals */
|
1772
|
-
|
1773
|
-
|
1774
|
-
else
|
1775
|
-
ZSTD_wildcopy(op, *litPtr, litLength);
|
1741
|
+
ZSTD_memmove(op, *litPtr, sequence.litLength); /* note : v0.1 seems to allow scenarios where output or input are close to end of buffer */
|
1742
|
+
|
1776
1743
|
op += litLength;
|
1777
1744
|
*litPtr = litEnd; /* update for next sequence */
|
1778
1745
|
|