extzstd 0.3.2 → 0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +4 -3
- data/contrib/zstd/CHANGELOG +225 -1
- data/contrib/zstd/CONTRIBUTING.md +158 -75
- data/contrib/zstd/LICENSE +4 -4
- data/contrib/zstd/Makefile +106 -69
- data/contrib/zstd/Package.swift +36 -0
- data/contrib/zstd/README.md +64 -36
- data/contrib/zstd/SECURITY.md +15 -0
- data/contrib/zstd/TESTING.md +2 -3
- data/contrib/zstd/lib/BUCK +5 -7
- data/contrib/zstd/lib/Makefile +117 -199
- data/contrib/zstd/lib/README.md +37 -7
- data/contrib/zstd/lib/common/allocations.h +55 -0
- data/contrib/zstd/lib/common/bits.h +200 -0
- data/contrib/zstd/lib/common/bitstream.h +80 -86
- data/contrib/zstd/lib/common/compiler.h +225 -63
- data/contrib/zstd/lib/common/cpu.h +37 -1
- data/contrib/zstd/lib/common/debug.c +7 -1
- data/contrib/zstd/lib/common/debug.h +21 -12
- data/contrib/zstd/lib/common/entropy_common.c +15 -37
- data/contrib/zstd/lib/common/error_private.c +9 -2
- data/contrib/zstd/lib/common/error_private.h +93 -5
- data/contrib/zstd/lib/common/fse.h +12 -87
- data/contrib/zstd/lib/common/fse_decompress.c +37 -117
- data/contrib/zstd/lib/common/huf.h +97 -172
- data/contrib/zstd/lib/common/mem.h +58 -58
- data/contrib/zstd/lib/common/pool.c +38 -17
- data/contrib/zstd/lib/common/pool.h +10 -4
- data/contrib/zstd/lib/common/portability_macros.h +158 -0
- data/contrib/zstd/lib/common/threading.c +74 -14
- data/contrib/zstd/lib/common/threading.h +5 -10
- data/contrib/zstd/lib/common/xxhash.c +6 -814
- data/contrib/zstd/lib/common/xxhash.h +6930 -195
- data/contrib/zstd/lib/common/zstd_common.c +1 -36
- data/contrib/zstd/lib/common/zstd_deps.h +1 -1
- data/contrib/zstd/lib/common/zstd_internal.h +68 -154
- data/contrib/zstd/lib/common/zstd_trace.h +163 -0
- data/contrib/zstd/lib/compress/clevels.h +134 -0
- data/contrib/zstd/lib/compress/fse_compress.c +75 -155
- data/contrib/zstd/lib/compress/hist.c +1 -1
- data/contrib/zstd/lib/compress/hist.h +1 -1
- data/contrib/zstd/lib/compress/huf_compress.c +810 -259
- data/contrib/zstd/lib/compress/zstd_compress.c +2864 -919
- data/contrib/zstd/lib/compress/zstd_compress_internal.h +523 -192
- data/contrib/zstd/lib/compress/zstd_compress_literals.c +117 -40
- data/contrib/zstd/lib/compress/zstd_compress_literals.h +16 -6
- data/contrib/zstd/lib/compress/zstd_compress_sequences.c +28 -19
- data/contrib/zstd/lib/compress/zstd_compress_sequences.h +1 -1
- data/contrib/zstd/lib/compress/zstd_compress_superblock.c +251 -412
- data/contrib/zstd/lib/compress/zstd_compress_superblock.h +1 -1
- data/contrib/zstd/lib/compress/zstd_cwksp.h +284 -97
- data/contrib/zstd/lib/compress/zstd_double_fast.c +382 -133
- data/contrib/zstd/lib/compress/zstd_double_fast.h +14 -2
- data/contrib/zstd/lib/compress/zstd_fast.c +732 -260
- data/contrib/zstd/lib/compress/zstd_fast.h +3 -2
- data/contrib/zstd/lib/compress/zstd_lazy.c +1177 -390
- data/contrib/zstd/lib/compress/zstd_lazy.h +129 -14
- data/contrib/zstd/lib/compress/zstd_ldm.c +280 -210
- data/contrib/zstd/lib/compress/zstd_ldm.h +3 -2
- data/contrib/zstd/lib/compress/zstd_ldm_geartab.h +106 -0
- data/contrib/zstd/lib/compress/zstd_opt.c +516 -285
- data/contrib/zstd/lib/compress/zstd_opt.h +32 -8
- data/contrib/zstd/lib/compress/zstdmt_compress.c +202 -131
- data/contrib/zstd/lib/compress/zstdmt_compress.h +9 -6
- data/contrib/zstd/lib/decompress/huf_decompress.c +1149 -555
- data/contrib/zstd/lib/decompress/huf_decompress_amd64.S +595 -0
- data/contrib/zstd/lib/decompress/zstd_ddict.c +4 -4
- data/contrib/zstd/lib/decompress/zstd_ddict.h +1 -1
- data/contrib/zstd/lib/decompress/zstd_decompress.c +583 -106
- data/contrib/zstd/lib/decompress/zstd_decompress_block.c +1054 -379
- data/contrib/zstd/lib/decompress/zstd_decompress_block.h +14 -3
- data/contrib/zstd/lib/decompress/zstd_decompress_internal.h +56 -6
- data/contrib/zstd/lib/deprecated/zbuff.h +1 -1
- data/contrib/zstd/lib/deprecated/zbuff_common.c +1 -1
- data/contrib/zstd/lib/deprecated/zbuff_compress.c +24 -4
- data/contrib/zstd/lib/deprecated/zbuff_decompress.c +3 -1
- data/contrib/zstd/lib/dictBuilder/cover.c +60 -44
- data/contrib/zstd/lib/dictBuilder/cover.h +6 -11
- data/contrib/zstd/lib/dictBuilder/divsufsort.c +1 -1
- data/contrib/zstd/lib/dictBuilder/fastcover.c +26 -18
- data/contrib/zstd/lib/dictBuilder/zdict.c +100 -101
- data/contrib/zstd/lib/legacy/zstd_legacy.h +38 -1
- data/contrib/zstd/lib/legacy/zstd_v01.c +18 -53
- data/contrib/zstd/lib/legacy/zstd_v01.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v02.c +28 -85
- data/contrib/zstd/lib/legacy/zstd_v02.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v03.c +29 -88
- data/contrib/zstd/lib/legacy/zstd_v03.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v04.c +27 -80
- data/contrib/zstd/lib/legacy/zstd_v04.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v05.c +36 -85
- data/contrib/zstd/lib/legacy/zstd_v05.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v06.c +44 -96
- data/contrib/zstd/lib/legacy/zstd_v06.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v07.c +37 -92
- data/contrib/zstd/lib/legacy/zstd_v07.h +1 -1
- data/contrib/zstd/lib/libzstd.mk +237 -0
- data/contrib/zstd/lib/libzstd.pc.in +4 -3
- data/contrib/zstd/lib/module.modulemap +35 -0
- data/contrib/zstd/lib/{dictBuilder/zdict.h → zdict.h} +202 -33
- data/contrib/zstd/lib/zstd.h +1030 -332
- data/contrib/zstd/lib/{common/zstd_errors.h → zstd_errors.h} +27 -8
- data/ext/extconf.rb +26 -7
- data/ext/extzstd.c +51 -24
- data/ext/extzstd.h +33 -6
- data/ext/extzstd_stream.c +74 -31
- data/ext/libzstd_conf.h +0 -1
- data/ext/zstd_decompress_asm.S +1 -0
- metadata +17 -7
- data/contrib/zstd/appveyor.yml +0 -292
- data/ext/depend +0 -2
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/*
|
|
2
|
-
* Copyright (c)
|
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
3
3
|
* All rights reserved.
|
|
4
4
|
*
|
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
|
@@ -16,21 +16,29 @@
|
|
|
16
16
|
#include <string.h> /* memset */
|
|
17
17
|
#include <time.h> /* clock */
|
|
18
18
|
|
|
19
|
+
#ifndef ZDICT_STATIC_LINKING_ONLY
|
|
20
|
+
# define ZDICT_STATIC_LINKING_ONLY
|
|
21
|
+
#endif
|
|
22
|
+
|
|
19
23
|
#include "../common/mem.h" /* read */
|
|
20
24
|
#include "../common/pool.h"
|
|
21
25
|
#include "../common/threading.h"
|
|
22
|
-
#include "cover.h"
|
|
23
26
|
#include "../common/zstd_internal.h" /* includes zstd.h */
|
|
24
27
|
#include "../compress/zstd_compress_internal.h" /* ZSTD_hash*() */
|
|
25
|
-
#
|
|
26
|
-
#
|
|
27
|
-
#endif
|
|
28
|
-
#include "zdict.h"
|
|
28
|
+
#include "../zdict.h"
|
|
29
|
+
#include "cover.h"
|
|
29
30
|
|
|
30
31
|
|
|
31
32
|
/*-*************************************
|
|
32
33
|
* Constants
|
|
33
34
|
***************************************/
|
|
35
|
+
/**
|
|
36
|
+
* There are 32bit indexes used to ref samples, so limit samples size to 4GB
|
|
37
|
+
* on 64bit builds.
|
|
38
|
+
* For 32bit builds we choose 1 GB.
|
|
39
|
+
* Most 32bit platforms have 2GB user-mode addressable space and we allocate a large
|
|
40
|
+
* contiguous buffer, so 1GB is already a high limit.
|
|
41
|
+
*/
|
|
34
42
|
#define FASTCOVER_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((unsigned)-1) : ((unsigned)1 GB))
|
|
35
43
|
#define FASTCOVER_MAX_F 31
|
|
36
44
|
#define FASTCOVER_MAX_ACCEL 10
|
|
@@ -43,7 +51,7 @@
|
|
|
43
51
|
* Console display
|
|
44
52
|
***************************************/
|
|
45
53
|
#ifndef LOCALDISPLAYLEVEL
|
|
46
|
-
static int g_displayLevel =
|
|
54
|
+
static int g_displayLevel = 0;
|
|
47
55
|
#endif
|
|
48
56
|
#undef DISPLAY
|
|
49
57
|
#define DISPLAY(...) \
|
|
@@ -296,7 +304,7 @@ FASTCOVER_computeFrequency(U32* freqs, const FASTCOVER_ctx_t* ctx)
|
|
|
296
304
|
|
|
297
305
|
/**
|
|
298
306
|
* Prepare a context for dictionary building.
|
|
299
|
-
* The context is only dependent on the parameter `d` and can used multiple
|
|
307
|
+
* The context is only dependent on the parameter `d` and can be used multiple
|
|
300
308
|
* times.
|
|
301
309
|
* Returns 0 on success or error code on error.
|
|
302
310
|
* The context must be destroyed with `FASTCOVER_ctx_destroy()`.
|
|
@@ -462,20 +470,20 @@ typedef struct FASTCOVER_tryParameters_data_s {
|
|
|
462
470
|
* This function is thread safe if zstd is compiled with multithreaded support.
|
|
463
471
|
* It takes its parameters as an *OWNING* opaque pointer to support threading.
|
|
464
472
|
*/
|
|
465
|
-
static void FASTCOVER_tryParameters(void
|
|
473
|
+
static void FASTCOVER_tryParameters(void* opaque)
|
|
466
474
|
{
|
|
467
475
|
/* Save parameters as local variables */
|
|
468
|
-
FASTCOVER_tryParameters_data_t *const data = (FASTCOVER_tryParameters_data_t
|
|
476
|
+
FASTCOVER_tryParameters_data_t *const data = (FASTCOVER_tryParameters_data_t*)opaque;
|
|
469
477
|
const FASTCOVER_ctx_t *const ctx = data->ctx;
|
|
470
478
|
const ZDICT_cover_params_t parameters = data->parameters;
|
|
471
479
|
size_t dictBufferCapacity = data->dictBufferCapacity;
|
|
472
480
|
size_t totalCompressedSize = ERROR(GENERIC);
|
|
473
481
|
/* Initialize array to keep track of frequency of dmer within activeSegment */
|
|
474
|
-
U16* segmentFreqs = (U16
|
|
482
|
+
U16* segmentFreqs = (U16*)calloc(((U64)1 << ctx->f), sizeof(U16));
|
|
475
483
|
/* Allocate space for hash table, dict, and freqs */
|
|
476
|
-
BYTE *const dict = (BYTE
|
|
484
|
+
BYTE *const dict = (BYTE*)malloc(dictBufferCapacity);
|
|
477
485
|
COVER_dictSelection_t selection = COVER_dictSelectionError(ERROR(GENERIC));
|
|
478
|
-
U32
|
|
486
|
+
U32* freqs = (U32*) malloc(((U64)1 << ctx->f) * sizeof(U32));
|
|
479
487
|
if (!segmentFreqs || !dict || !freqs) {
|
|
480
488
|
DISPLAYLEVEL(1, "Failed to allocate buffers: out of memory\n");
|
|
481
489
|
goto _cleanup;
|
|
@@ -537,7 +545,7 @@ FASTCOVER_convertToFastCoverParams(ZDICT_cover_params_t coverParams,
|
|
|
537
545
|
}
|
|
538
546
|
|
|
539
547
|
|
|
540
|
-
|
|
548
|
+
ZDICTLIB_STATIC_API size_t
|
|
541
549
|
ZDICT_trainFromBuffer_fastCover(void* dictBuffer, size_t dictBufferCapacity,
|
|
542
550
|
const void* samplesBuffer,
|
|
543
551
|
const size_t* samplesSizes, unsigned nbSamples,
|
|
@@ -548,7 +556,7 @@ ZDICT_trainFromBuffer_fastCover(void* dictBuffer, size_t dictBufferCapacity,
|
|
|
548
556
|
ZDICT_cover_params_t coverParams;
|
|
549
557
|
FASTCOVER_accel_t accelParams;
|
|
550
558
|
/* Initialize global data */
|
|
551
|
-
g_displayLevel = parameters.zParams.notificationLevel;
|
|
559
|
+
g_displayLevel = (int)parameters.zParams.notificationLevel;
|
|
552
560
|
/* Assign splitPoint and f if not provided */
|
|
553
561
|
parameters.splitPoint = 1.0;
|
|
554
562
|
parameters.f = parameters.f == 0 ? DEFAULT_F : parameters.f;
|
|
@@ -606,7 +614,7 @@ ZDICT_trainFromBuffer_fastCover(void* dictBuffer, size_t dictBufferCapacity,
|
|
|
606
614
|
}
|
|
607
615
|
|
|
608
616
|
|
|
609
|
-
|
|
617
|
+
ZDICTLIB_STATIC_API size_t
|
|
610
618
|
ZDICT_optimizeTrainFromBuffer_fastCover(
|
|
611
619
|
void* dictBuffer, size_t dictBufferCapacity,
|
|
612
620
|
const void* samplesBuffer,
|
|
@@ -631,7 +639,7 @@ ZDICT_optimizeTrainFromBuffer_fastCover(
|
|
|
631
639
|
const unsigned accel = parameters->accel == 0 ? DEFAULT_ACCEL : parameters->accel;
|
|
632
640
|
const unsigned shrinkDict = 0;
|
|
633
641
|
/* Local variables */
|
|
634
|
-
const int displayLevel = parameters->zParams.notificationLevel;
|
|
642
|
+
const int displayLevel = (int)parameters->zParams.notificationLevel;
|
|
635
643
|
unsigned iteration = 1;
|
|
636
644
|
unsigned d;
|
|
637
645
|
unsigned k;
|
|
@@ -715,7 +723,7 @@ ZDICT_optimizeTrainFromBuffer_fastCover(
|
|
|
715
723
|
data->parameters.splitPoint = splitPoint;
|
|
716
724
|
data->parameters.steps = kSteps;
|
|
717
725
|
data->parameters.shrinkDict = shrinkDict;
|
|
718
|
-
data->parameters.zParams.notificationLevel = g_displayLevel;
|
|
726
|
+
data->parameters.zParams.notificationLevel = (unsigned)g_displayLevel;
|
|
719
727
|
/* Check the parameters */
|
|
720
728
|
if (!FASTCOVER_checkParameters(data->parameters, dictBufferCapacity,
|
|
721
729
|
data->ctx->f, accel)) {
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/*
|
|
2
|
-
* Copyright (c)
|
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
3
3
|
* All rights reserved.
|
|
4
4
|
*
|
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
|
@@ -23,9 +23,13 @@
|
|
|
23
23
|
/* Unix Large Files support (>4GB) */
|
|
24
24
|
#define _FILE_OFFSET_BITS 64
|
|
25
25
|
#if (defined(__sun__) && (!defined(__LP64__))) /* Sun Solaris 32-bits requires specific definitions */
|
|
26
|
+
# ifndef _LARGEFILE_SOURCE
|
|
26
27
|
# define _LARGEFILE_SOURCE
|
|
28
|
+
# endif
|
|
27
29
|
#elif ! defined(__LP64__) /* No point defining Large file for 64 bit */
|
|
30
|
+
# ifndef _LARGEFILE64_SOURCE
|
|
28
31
|
# define _LARGEFILE64_SOURCE
|
|
32
|
+
# endif
|
|
29
33
|
#endif
|
|
30
34
|
|
|
31
35
|
|
|
@@ -37,18 +41,19 @@
|
|
|
37
41
|
#include <stdio.h> /* fprintf, fopen, ftello64 */
|
|
38
42
|
#include <time.h> /* clock */
|
|
39
43
|
|
|
44
|
+
#ifndef ZDICT_STATIC_LINKING_ONLY
|
|
45
|
+
# define ZDICT_STATIC_LINKING_ONLY
|
|
46
|
+
#endif
|
|
47
|
+
|
|
40
48
|
#include "../common/mem.h" /* read */
|
|
41
49
|
#include "../common/fse.h" /* FSE_normalizeCount, FSE_writeNCount */
|
|
42
|
-
#define HUF_STATIC_LINKING_ONLY
|
|
43
50
|
#include "../common/huf.h" /* HUF_buildCTable, HUF_writeCTable */
|
|
44
51
|
#include "../common/zstd_internal.h" /* includes zstd.h */
|
|
45
52
|
#include "../common/xxhash.h" /* XXH64 */
|
|
46
|
-
#include "divsufsort.h"
|
|
47
|
-
#ifndef ZDICT_STATIC_LINKING_ONLY
|
|
48
|
-
# define ZDICT_STATIC_LINKING_ONLY
|
|
49
|
-
#endif
|
|
50
|
-
#include "zdict.h"
|
|
51
53
|
#include "../compress/zstd_compress_internal.h" /* ZSTD_loadCEntropy() */
|
|
54
|
+
#include "../zdict.h"
|
|
55
|
+
#include "divsufsort.h"
|
|
56
|
+
#include "../common/bits.h" /* ZSTD_NbCommonBytes */
|
|
52
57
|
|
|
53
58
|
|
|
54
59
|
/*-*************************************
|
|
@@ -69,9 +74,9 @@ static const U32 g_selectivity_default = 9;
|
|
|
69
74
|
* Console display
|
|
70
75
|
***************************************/
|
|
71
76
|
#undef DISPLAY
|
|
72
|
-
#define DISPLAY(...) { fprintf(stderr, __VA_ARGS__); fflush( stderr ); }
|
|
77
|
+
#define DISPLAY(...) do { fprintf(stderr, __VA_ARGS__); fflush( stderr ); } while (0)
|
|
73
78
|
#undef DISPLAYLEVEL
|
|
74
|
-
#define DISPLAYLEVEL(l, ...) if (notificationLevel>=l) { DISPLAY(__VA_ARGS__); } /* 0 : no display; 1: errors; 2: default; 3: details; 4: debug */
|
|
79
|
+
#define DISPLAYLEVEL(l, ...) do { if (notificationLevel>=l) { DISPLAY(__VA_ARGS__); } } while (0) /* 0 : no display; 1: errors; 2: default; 3: details; 4: debug */
|
|
75
80
|
|
|
76
81
|
static clock_t ZDICT_clockSpan(clock_t nPrevious) { return clock() - nPrevious; }
|
|
77
82
|
|
|
@@ -125,65 +130,6 @@ size_t ZDICT_getDictHeaderSize(const void* dictBuffer, size_t dictSize)
|
|
|
125
130
|
/*-********************************************************
|
|
126
131
|
* Dictionary training functions
|
|
127
132
|
**********************************************************/
|
|
128
|
-
static unsigned ZDICT_NbCommonBytes (size_t val)
|
|
129
|
-
{
|
|
130
|
-
if (MEM_isLittleEndian()) {
|
|
131
|
-
if (MEM_64bits()) {
|
|
132
|
-
# if defined(_MSC_VER) && defined(_WIN64)
|
|
133
|
-
unsigned long r = 0;
|
|
134
|
-
_BitScanForward64( &r, (U64)val );
|
|
135
|
-
return (unsigned)(r>>3);
|
|
136
|
-
# elif defined(__GNUC__) && (__GNUC__ >= 3)
|
|
137
|
-
return (__builtin_ctzll((U64)val) >> 3);
|
|
138
|
-
# else
|
|
139
|
-
static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 };
|
|
140
|
-
return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58];
|
|
141
|
-
# endif
|
|
142
|
-
} else { /* 32 bits */
|
|
143
|
-
# if defined(_MSC_VER)
|
|
144
|
-
unsigned long r=0;
|
|
145
|
-
_BitScanForward( &r, (U32)val );
|
|
146
|
-
return (unsigned)(r>>3);
|
|
147
|
-
# elif defined(__GNUC__) && (__GNUC__ >= 3)
|
|
148
|
-
return (__builtin_ctz((U32)val) >> 3);
|
|
149
|
-
# else
|
|
150
|
-
static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 };
|
|
151
|
-
return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
|
|
152
|
-
# endif
|
|
153
|
-
}
|
|
154
|
-
} else { /* Big Endian CPU */
|
|
155
|
-
if (MEM_64bits()) {
|
|
156
|
-
# if defined(_MSC_VER) && defined(_WIN64)
|
|
157
|
-
unsigned long r = 0;
|
|
158
|
-
_BitScanReverse64( &r, val );
|
|
159
|
-
return (unsigned)(r>>3);
|
|
160
|
-
# elif defined(__GNUC__) && (__GNUC__ >= 3)
|
|
161
|
-
return (__builtin_clzll(val) >> 3);
|
|
162
|
-
# else
|
|
163
|
-
unsigned r;
|
|
164
|
-
const unsigned n32 = sizeof(size_t)*4; /* calculate this way due to compiler complaining in 32-bits mode */
|
|
165
|
-
if (!(val>>n32)) { r=4; } else { r=0; val>>=n32; }
|
|
166
|
-
if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
|
|
167
|
-
r += (!val);
|
|
168
|
-
return r;
|
|
169
|
-
# endif
|
|
170
|
-
} else { /* 32 bits */
|
|
171
|
-
# if defined(_MSC_VER)
|
|
172
|
-
unsigned long r = 0;
|
|
173
|
-
_BitScanReverse( &r, (unsigned long)val );
|
|
174
|
-
return (unsigned)(r>>3);
|
|
175
|
-
# elif defined(__GNUC__) && (__GNUC__ >= 3)
|
|
176
|
-
return (__builtin_clz((U32)val) >> 3);
|
|
177
|
-
# else
|
|
178
|
-
unsigned r;
|
|
179
|
-
if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; }
|
|
180
|
-
r += (!val);
|
|
181
|
-
return r;
|
|
182
|
-
# endif
|
|
183
|
-
} }
|
|
184
|
-
}
|
|
185
|
-
|
|
186
|
-
|
|
187
133
|
/*! ZDICT_count() :
|
|
188
134
|
Count the nb of common bytes between 2 pointers.
|
|
189
135
|
Note : this function presumes end of buffer followed by noisy guard band.
|
|
@@ -198,7 +144,7 @@ static size_t ZDICT_count(const void* pIn, const void* pMatch)
|
|
|
198
144
|
pMatch = (const char*)pMatch+sizeof(size_t);
|
|
199
145
|
continue;
|
|
200
146
|
}
|
|
201
|
-
pIn = (const char*)pIn+
|
|
147
|
+
pIn = (const char*)pIn+ZSTD_NbCommonBytes(diff);
|
|
202
148
|
return (size_t)((const char*)pIn - pStart);
|
|
203
149
|
}
|
|
204
150
|
}
|
|
@@ -230,7 +176,7 @@ static dictItem ZDICT_analyzePos(
|
|
|
230
176
|
U32 savings[LLIMIT] = {0};
|
|
231
177
|
const BYTE* b = (const BYTE*)buffer;
|
|
232
178
|
size_t maxLength = LLIMIT;
|
|
233
|
-
size_t pos = suffix[start];
|
|
179
|
+
size_t pos = (size_t)suffix[start];
|
|
234
180
|
U32 end = start;
|
|
235
181
|
dictItem solution;
|
|
236
182
|
|
|
@@ -364,7 +310,7 @@ static dictItem ZDICT_analyzePos(
|
|
|
364
310
|
savings[i] = savings[i-1] + (lengthList[i] * (i-3));
|
|
365
311
|
|
|
366
312
|
DISPLAYLEVEL(4, "Selected dict at position %u, of length %u : saves %u (ratio: %.2f) \n",
|
|
367
|
-
(unsigned)pos, (unsigned)maxLength, (unsigned)savings[maxLength], (double)savings[maxLength] / maxLength);
|
|
313
|
+
(unsigned)pos, (unsigned)maxLength, (unsigned)savings[maxLength], (double)savings[maxLength] / (double)maxLength);
|
|
368
314
|
|
|
369
315
|
solution.pos = (U32)pos;
|
|
370
316
|
solution.length = (U32)maxLength;
|
|
@@ -374,7 +320,7 @@ static dictItem ZDICT_analyzePos(
|
|
|
374
320
|
{ U32 id;
|
|
375
321
|
for (id=start; id<end; id++) {
|
|
376
322
|
U32 p, pEnd, length;
|
|
377
|
-
U32 const testedPos = suffix[id];
|
|
323
|
+
U32 const testedPos = (U32)suffix[id];
|
|
378
324
|
if (testedPos == pos)
|
|
379
325
|
length = solution.length;
|
|
380
326
|
else {
|
|
@@ -426,7 +372,7 @@ static U32 ZDICT_tryMerge(dictItem* table, dictItem elt, U32 eltNbToSkip, const
|
|
|
426
372
|
elt = table[u];
|
|
427
373
|
/* sort : improve rank */
|
|
428
374
|
while ((u>1) && (table[u-1].savings < elt.savings))
|
|
429
|
-
|
|
375
|
+
table[u] = table[u-1], u--;
|
|
430
376
|
table[u] = elt;
|
|
431
377
|
return u;
|
|
432
378
|
} }
|
|
@@ -437,7 +383,7 @@ static U32 ZDICT_tryMerge(dictItem* table, dictItem elt, U32 eltNbToSkip, const
|
|
|
437
383
|
|
|
438
384
|
if ((table[u].pos + table[u].length >= elt.pos) && (table[u].pos < elt.pos)) { /* overlap, existing < new */
|
|
439
385
|
/* append */
|
|
440
|
-
int const addedLength = (int)eltEnd - (table[u].pos + table[u].length);
|
|
386
|
+
int const addedLength = (int)eltEnd - (int)(table[u].pos + table[u].length);
|
|
441
387
|
table[u].savings += elt.length / 8; /* rough approx bonus */
|
|
442
388
|
if (addedLength > 0) { /* otherwise, elt fully included into existing */
|
|
443
389
|
table[u].length += addedLength;
|
|
@@ -531,10 +477,16 @@ static size_t ZDICT_trainBuffer_legacy(dictItem* dictList, U32 dictListSize,
|
|
|
531
477
|
clock_t const refreshRate = CLOCKS_PER_SEC * 3 / 10;
|
|
532
478
|
|
|
533
479
|
# undef DISPLAYUPDATE
|
|
534
|
-
# define DISPLAYUPDATE(l, ...)
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
480
|
+
# define DISPLAYUPDATE(l, ...) \
|
|
481
|
+
do { \
|
|
482
|
+
if (notificationLevel>=l) { \
|
|
483
|
+
if (ZDICT_clockSpan(displayClock) > refreshRate) { \
|
|
484
|
+
displayClock = clock(); \
|
|
485
|
+
DISPLAY(__VA_ARGS__); \
|
|
486
|
+
} \
|
|
487
|
+
if (notificationLevel>=4) fflush(stderr); \
|
|
488
|
+
} \
|
|
489
|
+
} while (0)
|
|
538
490
|
|
|
539
491
|
/* init */
|
|
540
492
|
DISPLAYLEVEL(2, "\r%70s\r", ""); /* clean display line */
|
|
@@ -577,7 +529,7 @@ static size_t ZDICT_trainBuffer_legacy(dictItem* dictList, U32 dictListSize,
|
|
|
577
529
|
if (solution.length==0) { cursor++; continue; }
|
|
578
530
|
ZDICT_insertDictItem(dictList, dictListSize, solution, buffer);
|
|
579
531
|
cursor += solution.length;
|
|
580
|
-
DISPLAYUPDATE(2, "\r%4.2f %% \r", (double)cursor / bufferSize * 100);
|
|
532
|
+
DISPLAYUPDATE(2, "\r%4.2f %% \r", (double)cursor / (double)bufferSize * 100.0);
|
|
581
533
|
} }
|
|
582
534
|
|
|
583
535
|
_cleanup:
|
|
@@ -620,11 +572,11 @@ static void ZDICT_countEStats(EStats_ress_t esr, const ZSTD_parameters* params,
|
|
|
620
572
|
size_t cSize;
|
|
621
573
|
|
|
622
574
|
if (srcSize > blockSizeMax) srcSize = blockSizeMax; /* protection vs large samples */
|
|
623
|
-
{ size_t const errorCode =
|
|
575
|
+
{ size_t const errorCode = ZSTD_compressBegin_usingCDict_deprecated(esr.zc, esr.dict);
|
|
624
576
|
if (ZSTD_isError(errorCode)) { DISPLAYLEVEL(1, "warning : ZSTD_compressBegin_usingCDict failed \n"); return; }
|
|
625
577
|
|
|
626
578
|
}
|
|
627
|
-
cSize =
|
|
579
|
+
cSize = ZSTD_compressBlock_deprecated(esr.zc, esr.workPlace, ZSTD_BLOCKSIZE_MAX, src, srcSize);
|
|
628
580
|
if (ZSTD_isError(cSize)) { DISPLAYLEVEL(3, "warning : could not compress sample size %u \n", (unsigned)srcSize); return; }
|
|
629
581
|
|
|
630
582
|
if (cSize) { /* if == 0; block is not compressible */
|
|
@@ -657,8 +609,8 @@ static void ZDICT_countEStats(EStats_ress_t esr, const ZSTD_parameters* params,
|
|
|
657
609
|
|
|
658
610
|
if (nbSeq >= 2) { /* rep offsets */
|
|
659
611
|
const seqDef* const seq = seqStorePtr->sequencesStart;
|
|
660
|
-
U32 offset1 = seq[0].
|
|
661
|
-
U32 offset2 = seq[1].
|
|
612
|
+
U32 offset1 = seq[0].offBase - ZSTD_REP_NUM;
|
|
613
|
+
U32 offset2 = seq[1].offBase - ZSTD_REP_NUM;
|
|
662
614
|
if (offset1 >= MAXREPOFFSET) offset1 = 0;
|
|
663
615
|
if (offset2 >= MAXREPOFFSET) offset2 = 0;
|
|
664
616
|
repOffsets[offset1] += 3;
|
|
@@ -729,6 +681,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|
|
729
681
|
size_t const totalSrcSize = ZDICT_totalSampleSize(fileSizes, nbFiles);
|
|
730
682
|
size_t const averageSampleSize = totalSrcSize / (nbFiles + !nbFiles);
|
|
731
683
|
BYTE* dstPtr = (BYTE*)dstBuffer;
|
|
684
|
+
U32 wksp[HUF_CTABLE_WORKSPACE_SIZE_U32];
|
|
732
685
|
|
|
733
686
|
/* init */
|
|
734
687
|
DEBUGLOG(4, "ZDICT_analyzeEntropy");
|
|
@@ -761,8 +714,15 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|
|
761
714
|
pos += fileSizes[u];
|
|
762
715
|
}
|
|
763
716
|
|
|
717
|
+
if (notificationLevel >= 4) {
|
|
718
|
+
/* writeStats */
|
|
719
|
+
DISPLAYLEVEL(4, "Offset Code Frequencies : \n");
|
|
720
|
+
for (u=0; u<=offcodeMax; u++) {
|
|
721
|
+
DISPLAYLEVEL(4, "%2u :%7u \n", u, offcodeCount[u]);
|
|
722
|
+
} }
|
|
723
|
+
|
|
764
724
|
/* analyze, build stats, starting with literals */
|
|
765
|
-
{ size_t maxNbBits =
|
|
725
|
+
{ size_t maxNbBits = HUF_buildCTable_wksp(hufTable, countLit, 255, huffLog, wksp, sizeof(wksp));
|
|
766
726
|
if (HUF_isError(maxNbBits)) {
|
|
767
727
|
eSize = maxNbBits;
|
|
768
728
|
DISPLAYLEVEL(1, " HUF_buildCTable error \n");
|
|
@@ -771,7 +731,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|
|
771
731
|
if (maxNbBits==8) { /* not compressible : will fail on HUF_writeCTable() */
|
|
772
732
|
DISPLAYLEVEL(2, "warning : pathological dataset : literals are not compressible : samples are noisy or too regular \n");
|
|
773
733
|
ZDICT_flatLit(countLit); /* replace distribution by a fake "mostly flat but still compressible" distribution, that HUF_writeCTable() can encode */
|
|
774
|
-
maxNbBits =
|
|
734
|
+
maxNbBits = HUF_buildCTable_wksp(hufTable, countLit, 255, huffLog, wksp, sizeof(wksp));
|
|
775
735
|
assert(maxNbBits==9);
|
|
776
736
|
}
|
|
777
737
|
huffLog = (U32)maxNbBits;
|
|
@@ -812,7 +772,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|
|
812
772
|
llLog = (U32)errorCode;
|
|
813
773
|
|
|
814
774
|
/* write result to buffer */
|
|
815
|
-
{ size_t const hhSize =
|
|
775
|
+
{ size_t const hhSize = HUF_writeCTable_wksp(dstPtr, maxDstSize, hufTable, 255, huffLog, wksp, sizeof(wksp));
|
|
816
776
|
if (HUF_isError(hhSize)) {
|
|
817
777
|
eSize = hhSize;
|
|
818
778
|
DISPLAYLEVEL(1, "HUF_writeCTable error \n");
|
|
@@ -867,7 +827,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|
|
867
827
|
MEM_writeLE32(dstPtr+8, bestRepOffset[2].offset);
|
|
868
828
|
#else
|
|
869
829
|
/* at this stage, we don't use the result of "most common first offset",
|
|
870
|
-
|
|
830
|
+
* as the impact of statistics is not properly evaluated */
|
|
871
831
|
MEM_writeLE32(dstPtr+0, repStartValue[0]);
|
|
872
832
|
MEM_writeLE32(dstPtr+4, repStartValue[1]);
|
|
873
833
|
MEM_writeLE32(dstPtr+8, repStartValue[2]);
|
|
@@ -883,6 +843,17 @@ _cleanup:
|
|
|
883
843
|
}
|
|
884
844
|
|
|
885
845
|
|
|
846
|
+
/**
|
|
847
|
+
* @returns the maximum repcode value
|
|
848
|
+
*/
|
|
849
|
+
static U32 ZDICT_maxRep(U32 const reps[ZSTD_REP_NUM])
|
|
850
|
+
{
|
|
851
|
+
U32 maxRep = reps[0];
|
|
852
|
+
int r;
|
|
853
|
+
for (r = 1; r < ZSTD_REP_NUM; ++r)
|
|
854
|
+
maxRep = MAX(maxRep, reps[r]);
|
|
855
|
+
return maxRep;
|
|
856
|
+
}
|
|
886
857
|
|
|
887
858
|
size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity,
|
|
888
859
|
const void* customDictContent, size_t dictContentSize,
|
|
@@ -894,11 +865,13 @@ size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity,
|
|
|
894
865
|
BYTE header[HBUFFSIZE];
|
|
895
866
|
int const compressionLevel = (params.compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT : params.compressionLevel;
|
|
896
867
|
U32 const notificationLevel = params.notificationLevel;
|
|
868
|
+
/* The final dictionary content must be at least as large as the largest repcode */
|
|
869
|
+
size_t const minContentSize = (size_t)ZDICT_maxRep(repStartValue);
|
|
870
|
+
size_t paddingSize;
|
|
897
871
|
|
|
898
872
|
/* check conditions */
|
|
899
873
|
DEBUGLOG(4, "ZDICT_finalizeDictionary");
|
|
900
874
|
if (dictBufferCapacity < dictContentSize) return ERROR(dstSize_tooSmall);
|
|
901
|
-
if (dictContentSize < ZDICT_CONTENTSIZE_MIN) return ERROR(srcSize_wrong);
|
|
902
875
|
if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) return ERROR(dstSize_tooSmall);
|
|
903
876
|
|
|
904
877
|
/* dictionary header */
|
|
@@ -922,12 +895,43 @@ size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity,
|
|
|
922
895
|
hSize += eSize;
|
|
923
896
|
}
|
|
924
897
|
|
|
925
|
-
/*
|
|
926
|
-
if (hSize + dictContentSize > dictBufferCapacity)
|
|
927
|
-
|
|
928
|
-
|
|
929
|
-
|
|
930
|
-
|
|
898
|
+
/* Shrink the content size if it doesn't fit in the buffer */
|
|
899
|
+
if (hSize + dictContentSize > dictBufferCapacity) {
|
|
900
|
+
dictContentSize = dictBufferCapacity - hSize;
|
|
901
|
+
}
|
|
902
|
+
|
|
903
|
+
/* Pad the dictionary content with zeros if it is too small */
|
|
904
|
+
if (dictContentSize < minContentSize) {
|
|
905
|
+
RETURN_ERROR_IF(hSize + minContentSize > dictBufferCapacity, dstSize_tooSmall,
|
|
906
|
+
"dictBufferCapacity too small to fit max repcode");
|
|
907
|
+
paddingSize = minContentSize - dictContentSize;
|
|
908
|
+
} else {
|
|
909
|
+
paddingSize = 0;
|
|
910
|
+
}
|
|
911
|
+
|
|
912
|
+
{
|
|
913
|
+
size_t const dictSize = hSize + paddingSize + dictContentSize;
|
|
914
|
+
|
|
915
|
+
/* The dictionary consists of the header, optional padding, and the content.
|
|
916
|
+
* The padding comes before the content because the "best" position in the
|
|
917
|
+
* dictionary is the last byte.
|
|
918
|
+
*/
|
|
919
|
+
BYTE* const outDictHeader = (BYTE*)dictBuffer;
|
|
920
|
+
BYTE* const outDictPadding = outDictHeader + hSize;
|
|
921
|
+
BYTE* const outDictContent = outDictPadding + paddingSize;
|
|
922
|
+
|
|
923
|
+
assert(dictSize <= dictBufferCapacity);
|
|
924
|
+
assert(outDictContent + dictContentSize == (BYTE*)dictBuffer + dictSize);
|
|
925
|
+
|
|
926
|
+
/* First copy the customDictContent into its final location.
|
|
927
|
+
* `customDictContent` and `dictBuffer` may overlap, so we must
|
|
928
|
+
* do this before any other writes into the output buffer.
|
|
929
|
+
* Then copy the header & padding into the output buffer.
|
|
930
|
+
*/
|
|
931
|
+
memmove(outDictContent, customDictContent, dictContentSize);
|
|
932
|
+
memcpy(outDictHeader, header, hSize);
|
|
933
|
+
memset(outDictPadding, 0, paddingSize);
|
|
934
|
+
|
|
931
935
|
return dictSize;
|
|
932
936
|
}
|
|
933
937
|
}
|
|
@@ -967,16 +971,11 @@ static size_t ZDICT_addEntropyTablesFromBuffer_advanced(
|
|
|
967
971
|
return MIN(dictBufferCapacity, hSize+dictContentSize);
|
|
968
972
|
}
|
|
969
973
|
|
|
970
|
-
/* Hidden declaration for dbio.c */
|
|
971
|
-
size_t ZDICT_trainFromBuffer_unsafe_legacy(
|
|
972
|
-
void* dictBuffer, size_t maxDictSize,
|
|
973
|
-
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
|
|
974
|
-
ZDICT_legacy_params_t params);
|
|
975
974
|
/*! ZDICT_trainFromBuffer_unsafe_legacy() :
|
|
976
|
-
* Warning : `samplesBuffer` must be followed by noisy guard band
|
|
975
|
+
* Warning : `samplesBuffer` must be followed by noisy guard band !!!
|
|
977
976
|
* @return : size of dictionary, or an error code which can be tested with ZDICT_isError()
|
|
978
977
|
*/
|
|
979
|
-
size_t ZDICT_trainFromBuffer_unsafe_legacy(
|
|
978
|
+
static size_t ZDICT_trainFromBuffer_unsafe_legacy(
|
|
980
979
|
void* dictBuffer, size_t maxDictSize,
|
|
981
980
|
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
|
|
982
981
|
ZDICT_legacy_params_t params)
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/*
|
|
2
|
-
* Copyright (c)
|
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
3
3
|
* All rights reserved.
|
|
4
4
|
*
|
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
|
@@ -124,6 +124,20 @@ MEM_STATIC size_t ZSTD_decompressLegacy(
|
|
|
124
124
|
const void* dict,size_t dictSize)
|
|
125
125
|
{
|
|
126
126
|
U32 const version = ZSTD_isLegacy(src, compressedSize);
|
|
127
|
+
char x;
|
|
128
|
+
/* Avoid passing NULL to legacy decoding. */
|
|
129
|
+
if (dst == NULL) {
|
|
130
|
+
assert(dstCapacity == 0);
|
|
131
|
+
dst = &x;
|
|
132
|
+
}
|
|
133
|
+
if (src == NULL) {
|
|
134
|
+
assert(compressedSize == 0);
|
|
135
|
+
src = &x;
|
|
136
|
+
}
|
|
137
|
+
if (dict == NULL) {
|
|
138
|
+
assert(dictSize == 0);
|
|
139
|
+
dict = &x;
|
|
140
|
+
}
|
|
127
141
|
(void)dst; (void)dstCapacity; (void)dict; (void)dictSize; /* unused when ZSTD_LEGACY_SUPPORT >= 8 */
|
|
128
142
|
switch(version)
|
|
129
143
|
{
|
|
@@ -242,6 +256,13 @@ MEM_STATIC ZSTD_frameSizeInfo ZSTD_findFrameSizeInfoLegacy(const void *src, size
|
|
|
242
256
|
frameSizeInfo.compressedSize = ERROR(srcSize_wrong);
|
|
243
257
|
frameSizeInfo.decompressedBound = ZSTD_CONTENTSIZE_ERROR;
|
|
244
258
|
}
|
|
259
|
+
/* In all cases, decompressedBound == nbBlocks * ZSTD_BLOCKSIZE_MAX.
|
|
260
|
+
* So we can compute nbBlocks without having to change every function.
|
|
261
|
+
*/
|
|
262
|
+
if (frameSizeInfo.decompressedBound != ZSTD_CONTENTSIZE_ERROR) {
|
|
263
|
+
assert((frameSizeInfo.decompressedBound & (ZSTD_BLOCKSIZE_MAX - 1)) == 0);
|
|
264
|
+
frameSizeInfo.nbBlocks = (size_t)(frameSizeInfo.decompressedBound / ZSTD_BLOCKSIZE_MAX);
|
|
265
|
+
}
|
|
245
266
|
return frameSizeInfo;
|
|
246
267
|
}
|
|
247
268
|
|
|
@@ -280,6 +301,12 @@ MEM_STATIC size_t ZSTD_freeLegacyStreamContext(void* legacyContext, U32 version)
|
|
|
280
301
|
MEM_STATIC size_t ZSTD_initLegacyStream(void** legacyContext, U32 prevVersion, U32 newVersion,
|
|
281
302
|
const void* dict, size_t dictSize)
|
|
282
303
|
{
|
|
304
|
+
char x;
|
|
305
|
+
/* Avoid passing NULL to legacy decoding. */
|
|
306
|
+
if (dict == NULL) {
|
|
307
|
+
assert(dictSize == 0);
|
|
308
|
+
dict = &x;
|
|
309
|
+
}
|
|
283
310
|
DEBUGLOG(5, "ZSTD_initLegacyStream for v0.%u", newVersion);
|
|
284
311
|
if (prevVersion != newVersion) ZSTD_freeLegacyStreamContext(*legacyContext, prevVersion);
|
|
285
312
|
switch(newVersion)
|
|
@@ -339,6 +366,16 @@ MEM_STATIC size_t ZSTD_initLegacyStream(void** legacyContext, U32 prevVersion, U
|
|
|
339
366
|
MEM_STATIC size_t ZSTD_decompressLegacyStream(void* legacyContext, U32 version,
|
|
340
367
|
ZSTD_outBuffer* output, ZSTD_inBuffer* input)
|
|
341
368
|
{
|
|
369
|
+
static char x;
|
|
370
|
+
/* Avoid passing NULL to legacy decoding. */
|
|
371
|
+
if (output->dst == NULL) {
|
|
372
|
+
assert(output->size == 0);
|
|
373
|
+
output->dst = &x;
|
|
374
|
+
}
|
|
375
|
+
if (input->src == NULL) {
|
|
376
|
+
assert(input->size == 0);
|
|
377
|
+
input->src = &x;
|
|
378
|
+
}
|
|
342
379
|
DEBUGLOG(5, "ZSTD_decompressLegacyStream for v0.%u", version);
|
|
343
380
|
switch(version)
|
|
344
381
|
{
|