extzstd 0.3.2 → 0.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +4 -3
- data/contrib/zstd/CHANGELOG +225 -1
- data/contrib/zstd/CONTRIBUTING.md +158 -75
- data/contrib/zstd/LICENSE +4 -4
- data/contrib/zstd/Makefile +106 -69
- data/contrib/zstd/Package.swift +36 -0
- data/contrib/zstd/README.md +64 -36
- data/contrib/zstd/SECURITY.md +15 -0
- data/contrib/zstd/TESTING.md +2 -3
- data/contrib/zstd/lib/BUCK +5 -7
- data/contrib/zstd/lib/Makefile +117 -199
- data/contrib/zstd/lib/README.md +37 -7
- data/contrib/zstd/lib/common/allocations.h +55 -0
- data/contrib/zstd/lib/common/bits.h +200 -0
- data/contrib/zstd/lib/common/bitstream.h +80 -86
- data/contrib/zstd/lib/common/compiler.h +225 -63
- data/contrib/zstd/lib/common/cpu.h +37 -1
- data/contrib/zstd/lib/common/debug.c +7 -1
- data/contrib/zstd/lib/common/debug.h +21 -12
- data/contrib/zstd/lib/common/entropy_common.c +15 -37
- data/contrib/zstd/lib/common/error_private.c +9 -2
- data/contrib/zstd/lib/common/error_private.h +93 -5
- data/contrib/zstd/lib/common/fse.h +12 -87
- data/contrib/zstd/lib/common/fse_decompress.c +37 -117
- data/contrib/zstd/lib/common/huf.h +97 -172
- data/contrib/zstd/lib/common/mem.h +58 -58
- data/contrib/zstd/lib/common/pool.c +38 -17
- data/contrib/zstd/lib/common/pool.h +10 -4
- data/contrib/zstd/lib/common/portability_macros.h +158 -0
- data/contrib/zstd/lib/common/threading.c +74 -14
- data/contrib/zstd/lib/common/threading.h +5 -10
- data/contrib/zstd/lib/common/xxhash.c +6 -814
- data/contrib/zstd/lib/common/xxhash.h +6930 -195
- data/contrib/zstd/lib/common/zstd_common.c +1 -36
- data/contrib/zstd/lib/common/zstd_deps.h +1 -1
- data/contrib/zstd/lib/common/zstd_internal.h +68 -154
- data/contrib/zstd/lib/common/zstd_trace.h +163 -0
- data/contrib/zstd/lib/compress/clevels.h +134 -0
- data/contrib/zstd/lib/compress/fse_compress.c +75 -155
- data/contrib/zstd/lib/compress/hist.c +1 -1
- data/contrib/zstd/lib/compress/hist.h +1 -1
- data/contrib/zstd/lib/compress/huf_compress.c +810 -259
- data/contrib/zstd/lib/compress/zstd_compress.c +2864 -919
- data/contrib/zstd/lib/compress/zstd_compress_internal.h +523 -192
- data/contrib/zstd/lib/compress/zstd_compress_literals.c +117 -40
- data/contrib/zstd/lib/compress/zstd_compress_literals.h +16 -6
- data/contrib/zstd/lib/compress/zstd_compress_sequences.c +28 -19
- data/contrib/zstd/lib/compress/zstd_compress_sequences.h +1 -1
- data/contrib/zstd/lib/compress/zstd_compress_superblock.c +251 -412
- data/contrib/zstd/lib/compress/zstd_compress_superblock.h +1 -1
- data/contrib/zstd/lib/compress/zstd_cwksp.h +284 -97
- data/contrib/zstd/lib/compress/zstd_double_fast.c +382 -133
- data/contrib/zstd/lib/compress/zstd_double_fast.h +14 -2
- data/contrib/zstd/lib/compress/zstd_fast.c +732 -260
- data/contrib/zstd/lib/compress/zstd_fast.h +3 -2
- data/contrib/zstd/lib/compress/zstd_lazy.c +1177 -390
- data/contrib/zstd/lib/compress/zstd_lazy.h +129 -14
- data/contrib/zstd/lib/compress/zstd_ldm.c +280 -210
- data/contrib/zstd/lib/compress/zstd_ldm.h +3 -2
- data/contrib/zstd/lib/compress/zstd_ldm_geartab.h +106 -0
- data/contrib/zstd/lib/compress/zstd_opt.c +516 -285
- data/contrib/zstd/lib/compress/zstd_opt.h +32 -8
- data/contrib/zstd/lib/compress/zstdmt_compress.c +202 -131
- data/contrib/zstd/lib/compress/zstdmt_compress.h +9 -6
- data/contrib/zstd/lib/decompress/huf_decompress.c +1149 -555
- data/contrib/zstd/lib/decompress/huf_decompress_amd64.S +595 -0
- data/contrib/zstd/lib/decompress/zstd_ddict.c +4 -4
- data/contrib/zstd/lib/decompress/zstd_ddict.h +1 -1
- data/contrib/zstd/lib/decompress/zstd_decompress.c +583 -106
- data/contrib/zstd/lib/decompress/zstd_decompress_block.c +1054 -379
- data/contrib/zstd/lib/decompress/zstd_decompress_block.h +14 -3
- data/contrib/zstd/lib/decompress/zstd_decompress_internal.h +56 -6
- data/contrib/zstd/lib/deprecated/zbuff.h +1 -1
- data/contrib/zstd/lib/deprecated/zbuff_common.c +1 -1
- data/contrib/zstd/lib/deprecated/zbuff_compress.c +24 -4
- data/contrib/zstd/lib/deprecated/zbuff_decompress.c +3 -1
- data/contrib/zstd/lib/dictBuilder/cover.c +60 -44
- data/contrib/zstd/lib/dictBuilder/cover.h +6 -11
- data/contrib/zstd/lib/dictBuilder/divsufsort.c +1 -1
- data/contrib/zstd/lib/dictBuilder/fastcover.c +26 -18
- data/contrib/zstd/lib/dictBuilder/zdict.c +100 -101
- data/contrib/zstd/lib/legacy/zstd_legacy.h +38 -1
- data/contrib/zstd/lib/legacy/zstd_v01.c +18 -53
- data/contrib/zstd/lib/legacy/zstd_v01.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v02.c +28 -85
- data/contrib/zstd/lib/legacy/zstd_v02.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v03.c +29 -88
- data/contrib/zstd/lib/legacy/zstd_v03.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v04.c +27 -80
- data/contrib/zstd/lib/legacy/zstd_v04.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v05.c +36 -85
- data/contrib/zstd/lib/legacy/zstd_v05.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v06.c +44 -96
- data/contrib/zstd/lib/legacy/zstd_v06.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v07.c +37 -92
- data/contrib/zstd/lib/legacy/zstd_v07.h +1 -1
- data/contrib/zstd/lib/libzstd.mk +237 -0
- data/contrib/zstd/lib/libzstd.pc.in +4 -3
- data/contrib/zstd/lib/module.modulemap +35 -0
- data/contrib/zstd/lib/{dictBuilder/zdict.h → zdict.h} +202 -33
- data/contrib/zstd/lib/zstd.h +1030 -332
- data/contrib/zstd/lib/{common/zstd_errors.h → zstd_errors.h} +27 -8
- data/ext/extconf.rb +26 -7
- data/ext/extzstd.c +51 -24
- data/ext/extzstd.h +33 -6
- data/ext/extzstd_stream.c +74 -31
- data/ext/libzstd_conf.h +0 -1
- data/ext/zstd_decompress_asm.S +1 -0
- metadata +17 -7
- data/contrib/zstd/appveyor.yml +0 -292
- data/ext/depend +0 -2
@@ -1,5 +1,5 @@
|
|
1
1
|
/*
|
2
|
-
* Copyright (c)
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
3
3
|
* All rights reserved.
|
4
4
|
*
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
@@ -16,21 +16,29 @@
|
|
16
16
|
#include <string.h> /* memset */
|
17
17
|
#include <time.h> /* clock */
|
18
18
|
|
19
|
+
#ifndef ZDICT_STATIC_LINKING_ONLY
|
20
|
+
# define ZDICT_STATIC_LINKING_ONLY
|
21
|
+
#endif
|
22
|
+
|
19
23
|
#include "../common/mem.h" /* read */
|
20
24
|
#include "../common/pool.h"
|
21
25
|
#include "../common/threading.h"
|
22
|
-
#include "cover.h"
|
23
26
|
#include "../common/zstd_internal.h" /* includes zstd.h */
|
24
27
|
#include "../compress/zstd_compress_internal.h" /* ZSTD_hash*() */
|
25
|
-
#
|
26
|
-
#
|
27
|
-
#endif
|
28
|
-
#include "zdict.h"
|
28
|
+
#include "../zdict.h"
|
29
|
+
#include "cover.h"
|
29
30
|
|
30
31
|
|
31
32
|
/*-*************************************
|
32
33
|
* Constants
|
33
34
|
***************************************/
|
35
|
+
/**
|
36
|
+
* There are 32bit indexes used to ref samples, so limit samples size to 4GB
|
37
|
+
* on 64bit builds.
|
38
|
+
* For 32bit builds we choose 1 GB.
|
39
|
+
* Most 32bit platforms have 2GB user-mode addressable space and we allocate a large
|
40
|
+
* contiguous buffer, so 1GB is already a high limit.
|
41
|
+
*/
|
34
42
|
#define FASTCOVER_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((unsigned)-1) : ((unsigned)1 GB))
|
35
43
|
#define FASTCOVER_MAX_F 31
|
36
44
|
#define FASTCOVER_MAX_ACCEL 10
|
@@ -43,7 +51,7 @@
|
|
43
51
|
* Console display
|
44
52
|
***************************************/
|
45
53
|
#ifndef LOCALDISPLAYLEVEL
|
46
|
-
static int g_displayLevel =
|
54
|
+
static int g_displayLevel = 0;
|
47
55
|
#endif
|
48
56
|
#undef DISPLAY
|
49
57
|
#define DISPLAY(...) \
|
@@ -296,7 +304,7 @@ FASTCOVER_computeFrequency(U32* freqs, const FASTCOVER_ctx_t* ctx)
|
|
296
304
|
|
297
305
|
/**
|
298
306
|
* Prepare a context for dictionary building.
|
299
|
-
* The context is only dependent on the parameter `d` and can used multiple
|
307
|
+
* The context is only dependent on the parameter `d` and can be used multiple
|
300
308
|
* times.
|
301
309
|
* Returns 0 on success or error code on error.
|
302
310
|
* The context must be destroyed with `FASTCOVER_ctx_destroy()`.
|
@@ -462,20 +470,20 @@ typedef struct FASTCOVER_tryParameters_data_s {
|
|
462
470
|
* This function is thread safe if zstd is compiled with multithreaded support.
|
463
471
|
* It takes its parameters as an *OWNING* opaque pointer to support threading.
|
464
472
|
*/
|
465
|
-
static void FASTCOVER_tryParameters(void
|
473
|
+
static void FASTCOVER_tryParameters(void* opaque)
|
466
474
|
{
|
467
475
|
/* Save parameters as local variables */
|
468
|
-
FASTCOVER_tryParameters_data_t *const data = (FASTCOVER_tryParameters_data_t
|
476
|
+
FASTCOVER_tryParameters_data_t *const data = (FASTCOVER_tryParameters_data_t*)opaque;
|
469
477
|
const FASTCOVER_ctx_t *const ctx = data->ctx;
|
470
478
|
const ZDICT_cover_params_t parameters = data->parameters;
|
471
479
|
size_t dictBufferCapacity = data->dictBufferCapacity;
|
472
480
|
size_t totalCompressedSize = ERROR(GENERIC);
|
473
481
|
/* Initialize array to keep track of frequency of dmer within activeSegment */
|
474
|
-
U16* segmentFreqs = (U16
|
482
|
+
U16* segmentFreqs = (U16*)calloc(((U64)1 << ctx->f), sizeof(U16));
|
475
483
|
/* Allocate space for hash table, dict, and freqs */
|
476
|
-
BYTE *const dict = (BYTE
|
484
|
+
BYTE *const dict = (BYTE*)malloc(dictBufferCapacity);
|
477
485
|
COVER_dictSelection_t selection = COVER_dictSelectionError(ERROR(GENERIC));
|
478
|
-
U32
|
486
|
+
U32* freqs = (U32*) malloc(((U64)1 << ctx->f) * sizeof(U32));
|
479
487
|
if (!segmentFreqs || !dict || !freqs) {
|
480
488
|
DISPLAYLEVEL(1, "Failed to allocate buffers: out of memory\n");
|
481
489
|
goto _cleanup;
|
@@ -537,7 +545,7 @@ FASTCOVER_convertToFastCoverParams(ZDICT_cover_params_t coverParams,
|
|
537
545
|
}
|
538
546
|
|
539
547
|
|
540
|
-
|
548
|
+
ZDICTLIB_STATIC_API size_t
|
541
549
|
ZDICT_trainFromBuffer_fastCover(void* dictBuffer, size_t dictBufferCapacity,
|
542
550
|
const void* samplesBuffer,
|
543
551
|
const size_t* samplesSizes, unsigned nbSamples,
|
@@ -548,7 +556,7 @@ ZDICT_trainFromBuffer_fastCover(void* dictBuffer, size_t dictBufferCapacity,
|
|
548
556
|
ZDICT_cover_params_t coverParams;
|
549
557
|
FASTCOVER_accel_t accelParams;
|
550
558
|
/* Initialize global data */
|
551
|
-
g_displayLevel = parameters.zParams.notificationLevel;
|
559
|
+
g_displayLevel = (int)parameters.zParams.notificationLevel;
|
552
560
|
/* Assign splitPoint and f if not provided */
|
553
561
|
parameters.splitPoint = 1.0;
|
554
562
|
parameters.f = parameters.f == 0 ? DEFAULT_F : parameters.f;
|
@@ -606,7 +614,7 @@ ZDICT_trainFromBuffer_fastCover(void* dictBuffer, size_t dictBufferCapacity,
|
|
606
614
|
}
|
607
615
|
|
608
616
|
|
609
|
-
|
617
|
+
ZDICTLIB_STATIC_API size_t
|
610
618
|
ZDICT_optimizeTrainFromBuffer_fastCover(
|
611
619
|
void* dictBuffer, size_t dictBufferCapacity,
|
612
620
|
const void* samplesBuffer,
|
@@ -631,7 +639,7 @@ ZDICT_optimizeTrainFromBuffer_fastCover(
|
|
631
639
|
const unsigned accel = parameters->accel == 0 ? DEFAULT_ACCEL : parameters->accel;
|
632
640
|
const unsigned shrinkDict = 0;
|
633
641
|
/* Local variables */
|
634
|
-
const int displayLevel = parameters->zParams.notificationLevel;
|
642
|
+
const int displayLevel = (int)parameters->zParams.notificationLevel;
|
635
643
|
unsigned iteration = 1;
|
636
644
|
unsigned d;
|
637
645
|
unsigned k;
|
@@ -715,7 +723,7 @@ ZDICT_optimizeTrainFromBuffer_fastCover(
|
|
715
723
|
data->parameters.splitPoint = splitPoint;
|
716
724
|
data->parameters.steps = kSteps;
|
717
725
|
data->parameters.shrinkDict = shrinkDict;
|
718
|
-
data->parameters.zParams.notificationLevel = g_displayLevel;
|
726
|
+
data->parameters.zParams.notificationLevel = (unsigned)g_displayLevel;
|
719
727
|
/* Check the parameters */
|
720
728
|
if (!FASTCOVER_checkParameters(data->parameters, dictBufferCapacity,
|
721
729
|
data->ctx->f, accel)) {
|
@@ -1,5 +1,5 @@
|
|
1
1
|
/*
|
2
|
-
* Copyright (c)
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
3
3
|
* All rights reserved.
|
4
4
|
*
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
@@ -23,9 +23,13 @@
|
|
23
23
|
/* Unix Large Files support (>4GB) */
|
24
24
|
#define _FILE_OFFSET_BITS 64
|
25
25
|
#if (defined(__sun__) && (!defined(__LP64__))) /* Sun Solaris 32-bits requires specific definitions */
|
26
|
+
# ifndef _LARGEFILE_SOURCE
|
26
27
|
# define _LARGEFILE_SOURCE
|
28
|
+
# endif
|
27
29
|
#elif ! defined(__LP64__) /* No point defining Large file for 64 bit */
|
30
|
+
# ifndef _LARGEFILE64_SOURCE
|
28
31
|
# define _LARGEFILE64_SOURCE
|
32
|
+
# endif
|
29
33
|
#endif
|
30
34
|
|
31
35
|
|
@@ -37,18 +41,19 @@
|
|
37
41
|
#include <stdio.h> /* fprintf, fopen, ftello64 */
|
38
42
|
#include <time.h> /* clock */
|
39
43
|
|
44
|
+
#ifndef ZDICT_STATIC_LINKING_ONLY
|
45
|
+
# define ZDICT_STATIC_LINKING_ONLY
|
46
|
+
#endif
|
47
|
+
|
40
48
|
#include "../common/mem.h" /* read */
|
41
49
|
#include "../common/fse.h" /* FSE_normalizeCount, FSE_writeNCount */
|
42
|
-
#define HUF_STATIC_LINKING_ONLY
|
43
50
|
#include "../common/huf.h" /* HUF_buildCTable, HUF_writeCTable */
|
44
51
|
#include "../common/zstd_internal.h" /* includes zstd.h */
|
45
52
|
#include "../common/xxhash.h" /* XXH64 */
|
46
|
-
#include "divsufsort.h"
|
47
|
-
#ifndef ZDICT_STATIC_LINKING_ONLY
|
48
|
-
# define ZDICT_STATIC_LINKING_ONLY
|
49
|
-
#endif
|
50
|
-
#include "zdict.h"
|
51
53
|
#include "../compress/zstd_compress_internal.h" /* ZSTD_loadCEntropy() */
|
54
|
+
#include "../zdict.h"
|
55
|
+
#include "divsufsort.h"
|
56
|
+
#include "../common/bits.h" /* ZSTD_NbCommonBytes */
|
52
57
|
|
53
58
|
|
54
59
|
/*-*************************************
|
@@ -69,9 +74,9 @@ static const U32 g_selectivity_default = 9;
|
|
69
74
|
* Console display
|
70
75
|
***************************************/
|
71
76
|
#undef DISPLAY
|
72
|
-
#define DISPLAY(...) { fprintf(stderr, __VA_ARGS__); fflush( stderr ); }
|
77
|
+
#define DISPLAY(...) do { fprintf(stderr, __VA_ARGS__); fflush( stderr ); } while (0)
|
73
78
|
#undef DISPLAYLEVEL
|
74
|
-
#define DISPLAYLEVEL(l, ...) if (notificationLevel>=l) { DISPLAY(__VA_ARGS__); } /* 0 : no display; 1: errors; 2: default; 3: details; 4: debug */
|
79
|
+
#define DISPLAYLEVEL(l, ...) do { if (notificationLevel>=l) { DISPLAY(__VA_ARGS__); } } while (0) /* 0 : no display; 1: errors; 2: default; 3: details; 4: debug */
|
75
80
|
|
76
81
|
static clock_t ZDICT_clockSpan(clock_t nPrevious) { return clock() - nPrevious; }
|
77
82
|
|
@@ -125,65 +130,6 @@ size_t ZDICT_getDictHeaderSize(const void* dictBuffer, size_t dictSize)
|
|
125
130
|
/*-********************************************************
|
126
131
|
* Dictionary training functions
|
127
132
|
**********************************************************/
|
128
|
-
static unsigned ZDICT_NbCommonBytes (size_t val)
|
129
|
-
{
|
130
|
-
if (MEM_isLittleEndian()) {
|
131
|
-
if (MEM_64bits()) {
|
132
|
-
# if defined(_MSC_VER) && defined(_WIN64)
|
133
|
-
unsigned long r = 0;
|
134
|
-
_BitScanForward64( &r, (U64)val );
|
135
|
-
return (unsigned)(r>>3);
|
136
|
-
# elif defined(__GNUC__) && (__GNUC__ >= 3)
|
137
|
-
return (__builtin_ctzll((U64)val) >> 3);
|
138
|
-
# else
|
139
|
-
static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 };
|
140
|
-
return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58];
|
141
|
-
# endif
|
142
|
-
} else { /* 32 bits */
|
143
|
-
# if defined(_MSC_VER)
|
144
|
-
unsigned long r=0;
|
145
|
-
_BitScanForward( &r, (U32)val );
|
146
|
-
return (unsigned)(r>>3);
|
147
|
-
# elif defined(__GNUC__) && (__GNUC__ >= 3)
|
148
|
-
return (__builtin_ctz((U32)val) >> 3);
|
149
|
-
# else
|
150
|
-
static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 };
|
151
|
-
return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
|
152
|
-
# endif
|
153
|
-
}
|
154
|
-
} else { /* Big Endian CPU */
|
155
|
-
if (MEM_64bits()) {
|
156
|
-
# if defined(_MSC_VER) && defined(_WIN64)
|
157
|
-
unsigned long r = 0;
|
158
|
-
_BitScanReverse64( &r, val );
|
159
|
-
return (unsigned)(r>>3);
|
160
|
-
# elif defined(__GNUC__) && (__GNUC__ >= 3)
|
161
|
-
return (__builtin_clzll(val) >> 3);
|
162
|
-
# else
|
163
|
-
unsigned r;
|
164
|
-
const unsigned n32 = sizeof(size_t)*4; /* calculate this way due to compiler complaining in 32-bits mode */
|
165
|
-
if (!(val>>n32)) { r=4; } else { r=0; val>>=n32; }
|
166
|
-
if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
|
167
|
-
r += (!val);
|
168
|
-
return r;
|
169
|
-
# endif
|
170
|
-
} else { /* 32 bits */
|
171
|
-
# if defined(_MSC_VER)
|
172
|
-
unsigned long r = 0;
|
173
|
-
_BitScanReverse( &r, (unsigned long)val );
|
174
|
-
return (unsigned)(r>>3);
|
175
|
-
# elif defined(__GNUC__) && (__GNUC__ >= 3)
|
176
|
-
return (__builtin_clz((U32)val) >> 3);
|
177
|
-
# else
|
178
|
-
unsigned r;
|
179
|
-
if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; }
|
180
|
-
r += (!val);
|
181
|
-
return r;
|
182
|
-
# endif
|
183
|
-
} }
|
184
|
-
}
|
185
|
-
|
186
|
-
|
187
133
|
/*! ZDICT_count() :
|
188
134
|
Count the nb of common bytes between 2 pointers.
|
189
135
|
Note : this function presumes end of buffer followed by noisy guard band.
|
@@ -198,7 +144,7 @@ static size_t ZDICT_count(const void* pIn, const void* pMatch)
|
|
198
144
|
pMatch = (const char*)pMatch+sizeof(size_t);
|
199
145
|
continue;
|
200
146
|
}
|
201
|
-
pIn = (const char*)pIn+
|
147
|
+
pIn = (const char*)pIn+ZSTD_NbCommonBytes(diff);
|
202
148
|
return (size_t)((const char*)pIn - pStart);
|
203
149
|
}
|
204
150
|
}
|
@@ -230,7 +176,7 @@ static dictItem ZDICT_analyzePos(
|
|
230
176
|
U32 savings[LLIMIT] = {0};
|
231
177
|
const BYTE* b = (const BYTE*)buffer;
|
232
178
|
size_t maxLength = LLIMIT;
|
233
|
-
size_t pos = suffix[start];
|
179
|
+
size_t pos = (size_t)suffix[start];
|
234
180
|
U32 end = start;
|
235
181
|
dictItem solution;
|
236
182
|
|
@@ -364,7 +310,7 @@ static dictItem ZDICT_analyzePos(
|
|
364
310
|
savings[i] = savings[i-1] + (lengthList[i] * (i-3));
|
365
311
|
|
366
312
|
DISPLAYLEVEL(4, "Selected dict at position %u, of length %u : saves %u (ratio: %.2f) \n",
|
367
|
-
(unsigned)pos, (unsigned)maxLength, (unsigned)savings[maxLength], (double)savings[maxLength] / maxLength);
|
313
|
+
(unsigned)pos, (unsigned)maxLength, (unsigned)savings[maxLength], (double)savings[maxLength] / (double)maxLength);
|
368
314
|
|
369
315
|
solution.pos = (U32)pos;
|
370
316
|
solution.length = (U32)maxLength;
|
@@ -374,7 +320,7 @@ static dictItem ZDICT_analyzePos(
|
|
374
320
|
{ U32 id;
|
375
321
|
for (id=start; id<end; id++) {
|
376
322
|
U32 p, pEnd, length;
|
377
|
-
U32 const testedPos = suffix[id];
|
323
|
+
U32 const testedPos = (U32)suffix[id];
|
378
324
|
if (testedPos == pos)
|
379
325
|
length = solution.length;
|
380
326
|
else {
|
@@ -426,7 +372,7 @@ static U32 ZDICT_tryMerge(dictItem* table, dictItem elt, U32 eltNbToSkip, const
|
|
426
372
|
elt = table[u];
|
427
373
|
/* sort : improve rank */
|
428
374
|
while ((u>1) && (table[u-1].savings < elt.savings))
|
429
|
-
|
375
|
+
table[u] = table[u-1], u--;
|
430
376
|
table[u] = elt;
|
431
377
|
return u;
|
432
378
|
} }
|
@@ -437,7 +383,7 @@ static U32 ZDICT_tryMerge(dictItem* table, dictItem elt, U32 eltNbToSkip, const
|
|
437
383
|
|
438
384
|
if ((table[u].pos + table[u].length >= elt.pos) && (table[u].pos < elt.pos)) { /* overlap, existing < new */
|
439
385
|
/* append */
|
440
|
-
int const addedLength = (int)eltEnd - (table[u].pos + table[u].length);
|
386
|
+
int const addedLength = (int)eltEnd - (int)(table[u].pos + table[u].length);
|
441
387
|
table[u].savings += elt.length / 8; /* rough approx bonus */
|
442
388
|
if (addedLength > 0) { /* otherwise, elt fully included into existing */
|
443
389
|
table[u].length += addedLength;
|
@@ -531,10 +477,16 @@ static size_t ZDICT_trainBuffer_legacy(dictItem* dictList, U32 dictListSize,
|
|
531
477
|
clock_t const refreshRate = CLOCKS_PER_SEC * 3 / 10;
|
532
478
|
|
533
479
|
# undef DISPLAYUPDATE
|
534
|
-
# define DISPLAYUPDATE(l, ...)
|
535
|
-
|
536
|
-
|
537
|
-
|
480
|
+
# define DISPLAYUPDATE(l, ...) \
|
481
|
+
do { \
|
482
|
+
if (notificationLevel>=l) { \
|
483
|
+
if (ZDICT_clockSpan(displayClock) > refreshRate) { \
|
484
|
+
displayClock = clock(); \
|
485
|
+
DISPLAY(__VA_ARGS__); \
|
486
|
+
} \
|
487
|
+
if (notificationLevel>=4) fflush(stderr); \
|
488
|
+
} \
|
489
|
+
} while (0)
|
538
490
|
|
539
491
|
/* init */
|
540
492
|
DISPLAYLEVEL(2, "\r%70s\r", ""); /* clean display line */
|
@@ -577,7 +529,7 @@ static size_t ZDICT_trainBuffer_legacy(dictItem* dictList, U32 dictListSize,
|
|
577
529
|
if (solution.length==0) { cursor++; continue; }
|
578
530
|
ZDICT_insertDictItem(dictList, dictListSize, solution, buffer);
|
579
531
|
cursor += solution.length;
|
580
|
-
DISPLAYUPDATE(2, "\r%4.2f %% \r", (double)cursor / bufferSize * 100);
|
532
|
+
DISPLAYUPDATE(2, "\r%4.2f %% \r", (double)cursor / (double)bufferSize * 100.0);
|
581
533
|
} }
|
582
534
|
|
583
535
|
_cleanup:
|
@@ -620,11 +572,11 @@ static void ZDICT_countEStats(EStats_ress_t esr, const ZSTD_parameters* params,
|
|
620
572
|
size_t cSize;
|
621
573
|
|
622
574
|
if (srcSize > blockSizeMax) srcSize = blockSizeMax; /* protection vs large samples */
|
623
|
-
{ size_t const errorCode =
|
575
|
+
{ size_t const errorCode = ZSTD_compressBegin_usingCDict_deprecated(esr.zc, esr.dict);
|
624
576
|
if (ZSTD_isError(errorCode)) { DISPLAYLEVEL(1, "warning : ZSTD_compressBegin_usingCDict failed \n"); return; }
|
625
577
|
|
626
578
|
}
|
627
|
-
cSize =
|
579
|
+
cSize = ZSTD_compressBlock_deprecated(esr.zc, esr.workPlace, ZSTD_BLOCKSIZE_MAX, src, srcSize);
|
628
580
|
if (ZSTD_isError(cSize)) { DISPLAYLEVEL(3, "warning : could not compress sample size %u \n", (unsigned)srcSize); return; }
|
629
581
|
|
630
582
|
if (cSize) { /* if == 0; block is not compressible */
|
@@ -657,8 +609,8 @@ static void ZDICT_countEStats(EStats_ress_t esr, const ZSTD_parameters* params,
|
|
657
609
|
|
658
610
|
if (nbSeq >= 2) { /* rep offsets */
|
659
611
|
const seqDef* const seq = seqStorePtr->sequencesStart;
|
660
|
-
U32 offset1 = seq[0].
|
661
|
-
U32 offset2 = seq[1].
|
612
|
+
U32 offset1 = seq[0].offBase - ZSTD_REP_NUM;
|
613
|
+
U32 offset2 = seq[1].offBase - ZSTD_REP_NUM;
|
662
614
|
if (offset1 >= MAXREPOFFSET) offset1 = 0;
|
663
615
|
if (offset2 >= MAXREPOFFSET) offset2 = 0;
|
664
616
|
repOffsets[offset1] += 3;
|
@@ -729,6 +681,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|
729
681
|
size_t const totalSrcSize = ZDICT_totalSampleSize(fileSizes, nbFiles);
|
730
682
|
size_t const averageSampleSize = totalSrcSize / (nbFiles + !nbFiles);
|
731
683
|
BYTE* dstPtr = (BYTE*)dstBuffer;
|
684
|
+
U32 wksp[HUF_CTABLE_WORKSPACE_SIZE_U32];
|
732
685
|
|
733
686
|
/* init */
|
734
687
|
DEBUGLOG(4, "ZDICT_analyzeEntropy");
|
@@ -761,8 +714,15 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|
761
714
|
pos += fileSizes[u];
|
762
715
|
}
|
763
716
|
|
717
|
+
if (notificationLevel >= 4) {
|
718
|
+
/* writeStats */
|
719
|
+
DISPLAYLEVEL(4, "Offset Code Frequencies : \n");
|
720
|
+
for (u=0; u<=offcodeMax; u++) {
|
721
|
+
DISPLAYLEVEL(4, "%2u :%7u \n", u, offcodeCount[u]);
|
722
|
+
} }
|
723
|
+
|
764
724
|
/* analyze, build stats, starting with literals */
|
765
|
-
{ size_t maxNbBits =
|
725
|
+
{ size_t maxNbBits = HUF_buildCTable_wksp(hufTable, countLit, 255, huffLog, wksp, sizeof(wksp));
|
766
726
|
if (HUF_isError(maxNbBits)) {
|
767
727
|
eSize = maxNbBits;
|
768
728
|
DISPLAYLEVEL(1, " HUF_buildCTable error \n");
|
@@ -771,7 +731,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|
771
731
|
if (maxNbBits==8) { /* not compressible : will fail on HUF_writeCTable() */
|
772
732
|
DISPLAYLEVEL(2, "warning : pathological dataset : literals are not compressible : samples are noisy or too regular \n");
|
773
733
|
ZDICT_flatLit(countLit); /* replace distribution by a fake "mostly flat but still compressible" distribution, that HUF_writeCTable() can encode */
|
774
|
-
maxNbBits =
|
734
|
+
maxNbBits = HUF_buildCTable_wksp(hufTable, countLit, 255, huffLog, wksp, sizeof(wksp));
|
775
735
|
assert(maxNbBits==9);
|
776
736
|
}
|
777
737
|
huffLog = (U32)maxNbBits;
|
@@ -812,7 +772,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|
812
772
|
llLog = (U32)errorCode;
|
813
773
|
|
814
774
|
/* write result to buffer */
|
815
|
-
{ size_t const hhSize =
|
775
|
+
{ size_t const hhSize = HUF_writeCTable_wksp(dstPtr, maxDstSize, hufTable, 255, huffLog, wksp, sizeof(wksp));
|
816
776
|
if (HUF_isError(hhSize)) {
|
817
777
|
eSize = hhSize;
|
818
778
|
DISPLAYLEVEL(1, "HUF_writeCTable error \n");
|
@@ -867,7 +827,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|
867
827
|
MEM_writeLE32(dstPtr+8, bestRepOffset[2].offset);
|
868
828
|
#else
|
869
829
|
/* at this stage, we don't use the result of "most common first offset",
|
870
|
-
|
830
|
+
* as the impact of statistics is not properly evaluated */
|
871
831
|
MEM_writeLE32(dstPtr+0, repStartValue[0]);
|
872
832
|
MEM_writeLE32(dstPtr+4, repStartValue[1]);
|
873
833
|
MEM_writeLE32(dstPtr+8, repStartValue[2]);
|
@@ -883,6 +843,17 @@ _cleanup:
|
|
883
843
|
}
|
884
844
|
|
885
845
|
|
846
|
+
/**
|
847
|
+
* @returns the maximum repcode value
|
848
|
+
*/
|
849
|
+
static U32 ZDICT_maxRep(U32 const reps[ZSTD_REP_NUM])
|
850
|
+
{
|
851
|
+
U32 maxRep = reps[0];
|
852
|
+
int r;
|
853
|
+
for (r = 1; r < ZSTD_REP_NUM; ++r)
|
854
|
+
maxRep = MAX(maxRep, reps[r]);
|
855
|
+
return maxRep;
|
856
|
+
}
|
886
857
|
|
887
858
|
size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity,
|
888
859
|
const void* customDictContent, size_t dictContentSize,
|
@@ -894,11 +865,13 @@ size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity,
|
|
894
865
|
BYTE header[HBUFFSIZE];
|
895
866
|
int const compressionLevel = (params.compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT : params.compressionLevel;
|
896
867
|
U32 const notificationLevel = params.notificationLevel;
|
868
|
+
/* The final dictionary content must be at least as large as the largest repcode */
|
869
|
+
size_t const minContentSize = (size_t)ZDICT_maxRep(repStartValue);
|
870
|
+
size_t paddingSize;
|
897
871
|
|
898
872
|
/* check conditions */
|
899
873
|
DEBUGLOG(4, "ZDICT_finalizeDictionary");
|
900
874
|
if (dictBufferCapacity < dictContentSize) return ERROR(dstSize_tooSmall);
|
901
|
-
if (dictContentSize < ZDICT_CONTENTSIZE_MIN) return ERROR(srcSize_wrong);
|
902
875
|
if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) return ERROR(dstSize_tooSmall);
|
903
876
|
|
904
877
|
/* dictionary header */
|
@@ -922,12 +895,43 @@ size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity,
|
|
922
895
|
hSize += eSize;
|
923
896
|
}
|
924
897
|
|
925
|
-
/*
|
926
|
-
if (hSize + dictContentSize > dictBufferCapacity)
|
927
|
-
|
928
|
-
|
929
|
-
|
930
|
-
|
898
|
+
/* Shrink the content size if it doesn't fit in the buffer */
|
899
|
+
if (hSize + dictContentSize > dictBufferCapacity) {
|
900
|
+
dictContentSize = dictBufferCapacity - hSize;
|
901
|
+
}
|
902
|
+
|
903
|
+
/* Pad the dictionary content with zeros if it is too small */
|
904
|
+
if (dictContentSize < minContentSize) {
|
905
|
+
RETURN_ERROR_IF(hSize + minContentSize > dictBufferCapacity, dstSize_tooSmall,
|
906
|
+
"dictBufferCapacity too small to fit max repcode");
|
907
|
+
paddingSize = minContentSize - dictContentSize;
|
908
|
+
} else {
|
909
|
+
paddingSize = 0;
|
910
|
+
}
|
911
|
+
|
912
|
+
{
|
913
|
+
size_t const dictSize = hSize + paddingSize + dictContentSize;
|
914
|
+
|
915
|
+
/* The dictionary consists of the header, optional padding, and the content.
|
916
|
+
* The padding comes before the content because the "best" position in the
|
917
|
+
* dictionary is the last byte.
|
918
|
+
*/
|
919
|
+
BYTE* const outDictHeader = (BYTE*)dictBuffer;
|
920
|
+
BYTE* const outDictPadding = outDictHeader + hSize;
|
921
|
+
BYTE* const outDictContent = outDictPadding + paddingSize;
|
922
|
+
|
923
|
+
assert(dictSize <= dictBufferCapacity);
|
924
|
+
assert(outDictContent + dictContentSize == (BYTE*)dictBuffer + dictSize);
|
925
|
+
|
926
|
+
/* First copy the customDictContent into its final location.
|
927
|
+
* `customDictContent` and `dictBuffer` may overlap, so we must
|
928
|
+
* do this before any other writes into the output buffer.
|
929
|
+
* Then copy the header & padding into the output buffer.
|
930
|
+
*/
|
931
|
+
memmove(outDictContent, customDictContent, dictContentSize);
|
932
|
+
memcpy(outDictHeader, header, hSize);
|
933
|
+
memset(outDictPadding, 0, paddingSize);
|
934
|
+
|
931
935
|
return dictSize;
|
932
936
|
}
|
933
937
|
}
|
@@ -967,16 +971,11 @@ static size_t ZDICT_addEntropyTablesFromBuffer_advanced(
|
|
967
971
|
return MIN(dictBufferCapacity, hSize+dictContentSize);
|
968
972
|
}
|
969
973
|
|
970
|
-
/* Hidden declaration for dbio.c */
|
971
|
-
size_t ZDICT_trainFromBuffer_unsafe_legacy(
|
972
|
-
void* dictBuffer, size_t maxDictSize,
|
973
|
-
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
|
974
|
-
ZDICT_legacy_params_t params);
|
975
974
|
/*! ZDICT_trainFromBuffer_unsafe_legacy() :
|
976
|
-
* Warning : `samplesBuffer` must be followed by noisy guard band
|
975
|
+
* Warning : `samplesBuffer` must be followed by noisy guard band !!!
|
977
976
|
* @return : size of dictionary, or an error code which can be tested with ZDICT_isError()
|
978
977
|
*/
|
979
|
-
size_t ZDICT_trainFromBuffer_unsafe_legacy(
|
978
|
+
static size_t ZDICT_trainFromBuffer_unsafe_legacy(
|
980
979
|
void* dictBuffer, size_t maxDictSize,
|
981
980
|
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
|
982
981
|
ZDICT_legacy_params_t params)
|
@@ -1,5 +1,5 @@
|
|
1
1
|
/*
|
2
|
-
* Copyright (c)
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
3
3
|
* All rights reserved.
|
4
4
|
*
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
@@ -124,6 +124,20 @@ MEM_STATIC size_t ZSTD_decompressLegacy(
|
|
124
124
|
const void* dict,size_t dictSize)
|
125
125
|
{
|
126
126
|
U32 const version = ZSTD_isLegacy(src, compressedSize);
|
127
|
+
char x;
|
128
|
+
/* Avoid passing NULL to legacy decoding. */
|
129
|
+
if (dst == NULL) {
|
130
|
+
assert(dstCapacity == 0);
|
131
|
+
dst = &x;
|
132
|
+
}
|
133
|
+
if (src == NULL) {
|
134
|
+
assert(compressedSize == 0);
|
135
|
+
src = &x;
|
136
|
+
}
|
137
|
+
if (dict == NULL) {
|
138
|
+
assert(dictSize == 0);
|
139
|
+
dict = &x;
|
140
|
+
}
|
127
141
|
(void)dst; (void)dstCapacity; (void)dict; (void)dictSize; /* unused when ZSTD_LEGACY_SUPPORT >= 8 */
|
128
142
|
switch(version)
|
129
143
|
{
|
@@ -242,6 +256,13 @@ MEM_STATIC ZSTD_frameSizeInfo ZSTD_findFrameSizeInfoLegacy(const void *src, size
|
|
242
256
|
frameSizeInfo.compressedSize = ERROR(srcSize_wrong);
|
243
257
|
frameSizeInfo.decompressedBound = ZSTD_CONTENTSIZE_ERROR;
|
244
258
|
}
|
259
|
+
/* In all cases, decompressedBound == nbBlocks * ZSTD_BLOCKSIZE_MAX.
|
260
|
+
* So we can compute nbBlocks without having to change every function.
|
261
|
+
*/
|
262
|
+
if (frameSizeInfo.decompressedBound != ZSTD_CONTENTSIZE_ERROR) {
|
263
|
+
assert((frameSizeInfo.decompressedBound & (ZSTD_BLOCKSIZE_MAX - 1)) == 0);
|
264
|
+
frameSizeInfo.nbBlocks = (size_t)(frameSizeInfo.decompressedBound / ZSTD_BLOCKSIZE_MAX);
|
265
|
+
}
|
245
266
|
return frameSizeInfo;
|
246
267
|
}
|
247
268
|
|
@@ -280,6 +301,12 @@ MEM_STATIC size_t ZSTD_freeLegacyStreamContext(void* legacyContext, U32 version)
|
|
280
301
|
MEM_STATIC size_t ZSTD_initLegacyStream(void** legacyContext, U32 prevVersion, U32 newVersion,
|
281
302
|
const void* dict, size_t dictSize)
|
282
303
|
{
|
304
|
+
char x;
|
305
|
+
/* Avoid passing NULL to legacy decoding. */
|
306
|
+
if (dict == NULL) {
|
307
|
+
assert(dictSize == 0);
|
308
|
+
dict = &x;
|
309
|
+
}
|
283
310
|
DEBUGLOG(5, "ZSTD_initLegacyStream for v0.%u", newVersion);
|
284
311
|
if (prevVersion != newVersion) ZSTD_freeLegacyStreamContext(*legacyContext, prevVersion);
|
285
312
|
switch(newVersion)
|
@@ -339,6 +366,16 @@ MEM_STATIC size_t ZSTD_initLegacyStream(void** legacyContext, U32 prevVersion, U
|
|
339
366
|
MEM_STATIC size_t ZSTD_decompressLegacyStream(void* legacyContext, U32 version,
|
340
367
|
ZSTD_outBuffer* output, ZSTD_inBuffer* input)
|
341
368
|
{
|
369
|
+
static char x;
|
370
|
+
/* Avoid passing NULL to legacy decoding. */
|
371
|
+
if (output->dst == NULL) {
|
372
|
+
assert(output->size == 0);
|
373
|
+
output->dst = &x;
|
374
|
+
}
|
375
|
+
if (input->src == NULL) {
|
376
|
+
assert(input->size == 0);
|
377
|
+
input->src = &x;
|
378
|
+
}
|
342
379
|
DEBUGLOG(5, "ZSTD_decompressLegacyStream for v0.%u", version);
|
343
380
|
switch(version)
|
344
381
|
{
|