zstd-ruby 1.4.5.0 → 1.5.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/README.md +78 -5
- data/Rakefile +8 -2
- data/ext/zstdruby/common.h +15 -0
- data/ext/zstdruby/extconf.rb +3 -2
- data/ext/zstdruby/libzstd/common/allocations.h +55 -0
- data/ext/zstdruby/libzstd/common/bits.h +200 -0
- data/ext/zstdruby/libzstd/common/bitstream.h +45 -62
- data/ext/zstdruby/libzstd/common/compiler.h +205 -22
- data/ext/zstdruby/libzstd/common/cpu.h +1 -3
- data/ext/zstdruby/libzstd/common/debug.c +1 -1
- data/ext/zstdruby/libzstd/common/debug.h +12 -19
- data/ext/zstdruby/libzstd/common/entropy_common.c +172 -48
- data/ext/zstdruby/libzstd/common/error_private.c +10 -2
- data/ext/zstdruby/libzstd/common/error_private.h +82 -3
- data/ext/zstdruby/libzstd/common/fse.h +37 -86
- data/ext/zstdruby/libzstd/common/fse_decompress.c +117 -92
- data/ext/zstdruby/libzstd/common/huf.h +99 -166
- data/ext/zstdruby/libzstd/common/mem.h +124 -142
- data/ext/zstdruby/libzstd/common/pool.c +54 -27
- data/ext/zstdruby/libzstd/common/pool.h +10 -4
- data/ext/zstdruby/libzstd/common/portability_macros.h +156 -0
- data/ext/zstdruby/libzstd/common/threading.c +74 -19
- data/ext/zstdruby/libzstd/common/threading.h +5 -10
- data/ext/zstdruby/libzstd/common/xxhash.c +7 -847
- data/ext/zstdruby/libzstd/common/xxhash.h +5568 -167
- data/ext/zstdruby/libzstd/common/zstd_common.c +2 -37
- data/ext/zstdruby/libzstd/common/zstd_deps.h +111 -0
- data/ext/zstdruby/libzstd/common/zstd_internal.h +132 -187
- data/ext/zstdruby/libzstd/common/zstd_trace.h +163 -0
- data/ext/zstdruby/libzstd/compress/clevels.h +134 -0
- data/ext/zstdruby/libzstd/compress/fse_compress.c +83 -157
- data/ext/zstdruby/libzstd/compress/hist.c +27 -29
- data/ext/zstdruby/libzstd/compress/hist.h +2 -2
- data/ext/zstdruby/libzstd/compress/huf_compress.c +916 -279
- data/ext/zstdruby/libzstd/compress/zstd_compress.c +3773 -1019
- data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +610 -203
- data/ext/zstdruby/libzstd/compress/zstd_compress_literals.c +119 -42
- data/ext/zstdruby/libzstd/compress/zstd_compress_literals.h +16 -6
- data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.c +42 -19
- data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.h +1 -1
- data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.c +49 -317
- data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.h +1 -1
- data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +320 -103
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +388 -151
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +3 -2
- data/ext/zstdruby/libzstd/compress/zstd_fast.c +729 -265
- data/ext/zstdruby/libzstd/compress/zstd_fast.h +3 -2
- data/ext/zstdruby/libzstd/compress/zstd_lazy.c +1270 -251
- data/ext/zstdruby/libzstd/compress/zstd_lazy.h +61 -1
- data/ext/zstdruby/libzstd/compress/zstd_ldm.c +324 -219
- data/ext/zstdruby/libzstd/compress/zstd_ldm.h +9 -2
- data/ext/zstdruby/libzstd/compress/zstd_ldm_geartab.h +106 -0
- data/ext/zstdruby/libzstd/compress/zstd_opt.c +481 -209
- data/ext/zstdruby/libzstd/compress/zstd_opt.h +1 -1
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +181 -457
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +34 -113
- data/ext/zstdruby/libzstd/decompress/huf_decompress.c +1199 -565
- data/ext/zstdruby/libzstd/decompress/huf_decompress_amd64.S +576 -0
- data/ext/zstdruby/libzstd/decompress/zstd_ddict.c +12 -12
- data/ext/zstdruby/libzstd/decompress/zstd_ddict.h +2 -2
- data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +627 -157
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +1086 -326
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +19 -5
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +62 -13
- data/ext/zstdruby/libzstd/dictBuilder/cover.c +73 -52
- data/ext/zstdruby/libzstd/dictBuilder/cover.h +7 -6
- data/ext/zstdruby/libzstd/dictBuilder/divsufsort.c +1 -1
- data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +44 -35
- data/ext/zstdruby/libzstd/dictBuilder/zdict.c +103 -111
- data/ext/zstdruby/libzstd/{dictBuilder/zdict.h → zdict.h} +203 -34
- data/ext/zstdruby/libzstd/zstd.h +1217 -287
- data/ext/zstdruby/libzstd/{common/zstd_errors.h → zstd_errors.h} +28 -8
- data/ext/zstdruby/main.c +20 -0
- data/ext/zstdruby/skippable_frame.c +63 -0
- data/ext/zstdruby/streaming_compress.c +177 -0
- data/ext/zstdruby/streaming_compress.h +5 -0
- data/ext/zstdruby/streaming_decompress.c +123 -0
- data/ext/zstdruby/zstdruby.c +114 -32
- data/lib/zstd-ruby/version.rb +1 -1
- data/lib/zstd-ruby.rb +0 -1
- data/zstd-ruby.gemspec +1 -1
- metadata +19 -36
- data/.travis.yml +0 -14
- data/ext/zstdruby/libzstd/.gitignore +0 -3
- data/ext/zstdruby/libzstd/BUCK +0 -234
- data/ext/zstdruby/libzstd/Makefile +0 -354
- data/ext/zstdruby/libzstd/README.md +0 -179
- data/ext/zstdruby/libzstd/deprecated/zbuff.h +0 -214
- data/ext/zstdruby/libzstd/deprecated/zbuff_common.c +0 -26
- data/ext/zstdruby/libzstd/deprecated/zbuff_compress.c +0 -147
- data/ext/zstdruby/libzstd/deprecated/zbuff_decompress.c +0 -75
- data/ext/zstdruby/libzstd/dll/example/Makefile +0 -48
- data/ext/zstdruby/libzstd/dll/example/README.md +0 -69
- data/ext/zstdruby/libzstd/dll/example/build_package.bat +0 -20
- data/ext/zstdruby/libzstd/dll/example/fullbench-dll.sln +0 -25
- data/ext/zstdruby/libzstd/dll/example/fullbench-dll.vcxproj +0 -181
- data/ext/zstdruby/libzstd/legacy/zstd_legacy.h +0 -415
- data/ext/zstdruby/libzstd/legacy/zstd_v01.c +0 -2158
- data/ext/zstdruby/libzstd/legacy/zstd_v01.h +0 -94
- data/ext/zstdruby/libzstd/legacy/zstd_v02.c +0 -3518
- data/ext/zstdruby/libzstd/legacy/zstd_v02.h +0 -93
- data/ext/zstdruby/libzstd/legacy/zstd_v03.c +0 -3160
- data/ext/zstdruby/libzstd/legacy/zstd_v03.h +0 -93
- data/ext/zstdruby/libzstd/legacy/zstd_v04.c +0 -3647
- data/ext/zstdruby/libzstd/legacy/zstd_v04.h +0 -142
- data/ext/zstdruby/libzstd/legacy/zstd_v05.c +0 -4050
- data/ext/zstdruby/libzstd/legacy/zstd_v05.h +0 -162
- data/ext/zstdruby/libzstd/legacy/zstd_v06.c +0 -4154
- data/ext/zstdruby/libzstd/legacy/zstd_v06.h +0 -172
- data/ext/zstdruby/libzstd/legacy/zstd_v07.c +0 -4541
- data/ext/zstdruby/libzstd/legacy/zstd_v07.h +0 -187
- data/ext/zstdruby/libzstd/libzstd.pc.in +0 -15
- data/ext/zstdruby/zstdruby.h +0 -6
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/*
|
|
2
|
-
* Copyright (c)
|
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
3
3
|
* All rights reserved.
|
|
4
4
|
*
|
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
|
@@ -16,24 +16,33 @@
|
|
|
16
16
|
#include <string.h> /* memset */
|
|
17
17
|
#include <time.h> /* clock */
|
|
18
18
|
|
|
19
|
+
#ifndef ZDICT_STATIC_LINKING_ONLY
|
|
20
|
+
# define ZDICT_STATIC_LINKING_ONLY
|
|
21
|
+
#endif
|
|
22
|
+
|
|
19
23
|
#include "../common/mem.h" /* read */
|
|
20
24
|
#include "../common/pool.h"
|
|
21
25
|
#include "../common/threading.h"
|
|
22
|
-
#include "cover.h"
|
|
23
26
|
#include "../common/zstd_internal.h" /* includes zstd.h */
|
|
24
|
-
#
|
|
25
|
-
#
|
|
26
|
-
#
|
|
27
|
-
#include "zdict.h"
|
|
27
|
+
#include "../compress/zstd_compress_internal.h" /* ZSTD_hash*() */
|
|
28
|
+
#include "../zdict.h"
|
|
29
|
+
#include "cover.h"
|
|
28
30
|
|
|
29
31
|
|
|
30
32
|
/*-*************************************
|
|
31
33
|
* Constants
|
|
32
34
|
***************************************/
|
|
35
|
+
/**
|
|
36
|
+
* There are 32bit indexes used to ref samples, so limit samples size to 4GB
|
|
37
|
+
* on 64bit builds.
|
|
38
|
+
* For 32bit builds we choose 1 GB.
|
|
39
|
+
* Most 32bit platforms have 2GB user-mode addressable space and we allocate a large
|
|
40
|
+
* contiguous buffer, so 1GB is already a high limit.
|
|
41
|
+
*/
|
|
33
42
|
#define FASTCOVER_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((unsigned)-1) : ((unsigned)1 GB))
|
|
34
43
|
#define FASTCOVER_MAX_F 31
|
|
35
44
|
#define FASTCOVER_MAX_ACCEL 10
|
|
36
|
-
#define
|
|
45
|
+
#define FASTCOVER_DEFAULT_SPLITPOINT 0.75
|
|
37
46
|
#define DEFAULT_F 20
|
|
38
47
|
#define DEFAULT_ACCEL 1
|
|
39
48
|
|
|
@@ -41,50 +50,50 @@
|
|
|
41
50
|
/*-*************************************
|
|
42
51
|
* Console display
|
|
43
52
|
***************************************/
|
|
44
|
-
|
|
53
|
+
#ifndef LOCALDISPLAYLEVEL
|
|
54
|
+
static int g_displayLevel = 0;
|
|
55
|
+
#endif
|
|
56
|
+
#undef DISPLAY
|
|
45
57
|
#define DISPLAY(...) \
|
|
46
58
|
{ \
|
|
47
59
|
fprintf(stderr, __VA_ARGS__); \
|
|
48
60
|
fflush(stderr); \
|
|
49
61
|
}
|
|
62
|
+
#undef LOCALDISPLAYLEVEL
|
|
50
63
|
#define LOCALDISPLAYLEVEL(displayLevel, l, ...) \
|
|
51
64
|
if (displayLevel >= l) { \
|
|
52
65
|
DISPLAY(__VA_ARGS__); \
|
|
53
66
|
} /* 0 : no display; 1: errors; 2: default; 3: details; 4: debug */
|
|
67
|
+
#undef DISPLAYLEVEL
|
|
54
68
|
#define DISPLAYLEVEL(l, ...) LOCALDISPLAYLEVEL(g_displayLevel, l, __VA_ARGS__)
|
|
55
69
|
|
|
70
|
+
#ifndef LOCALDISPLAYUPDATE
|
|
71
|
+
static const clock_t g_refreshRate = CLOCKS_PER_SEC * 15 / 100;
|
|
72
|
+
static clock_t g_time = 0;
|
|
73
|
+
#endif
|
|
74
|
+
#undef LOCALDISPLAYUPDATE
|
|
56
75
|
#define LOCALDISPLAYUPDATE(displayLevel, l, ...) \
|
|
57
76
|
if (displayLevel >= l) { \
|
|
58
|
-
if ((clock() - g_time >
|
|
77
|
+
if ((clock() - g_time > g_refreshRate) || (displayLevel >= 4)) { \
|
|
59
78
|
g_time = clock(); \
|
|
60
79
|
DISPLAY(__VA_ARGS__); \
|
|
61
80
|
} \
|
|
62
81
|
}
|
|
82
|
+
#undef DISPLAYUPDATE
|
|
63
83
|
#define DISPLAYUPDATE(l, ...) LOCALDISPLAYUPDATE(g_displayLevel, l, __VA_ARGS__)
|
|
64
|
-
static const clock_t refreshRate = CLOCKS_PER_SEC * 15 / 100;
|
|
65
|
-
static clock_t g_time = 0;
|
|
66
84
|
|
|
67
85
|
|
|
68
86
|
/*-*************************************
|
|
69
87
|
* Hash Functions
|
|
70
88
|
***************************************/
|
|
71
|
-
static const U64 prime6bytes = 227718039650203ULL;
|
|
72
|
-
static size_t ZSTD_hash6(U64 u, U32 h) { return (size_t)(((u << (64-48)) * prime6bytes) >> (64-h)) ; }
|
|
73
|
-
static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_readLE64(p), h); }
|
|
74
|
-
|
|
75
|
-
static const U64 prime8bytes = 0xCF1BBCDCB7A56463ULL;
|
|
76
|
-
static size_t ZSTD_hash8(U64 u, U32 h) { return (size_t)(((u) * prime8bytes) >> (64-h)) ; }
|
|
77
|
-
static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h); }
|
|
78
|
-
|
|
79
|
-
|
|
80
89
|
/**
|
|
81
|
-
* Hash the d-byte value pointed to by p and mod 2^f
|
|
90
|
+
* Hash the d-byte value pointed to by p and mod 2^f into the frequency vector
|
|
82
91
|
*/
|
|
83
|
-
static size_t FASTCOVER_hashPtrToIndex(const void* p, U32
|
|
92
|
+
static size_t FASTCOVER_hashPtrToIndex(const void* p, U32 f, unsigned d) {
|
|
84
93
|
if (d == 6) {
|
|
85
|
-
return ZSTD_hash6Ptr(p,
|
|
94
|
+
return ZSTD_hash6Ptr(p, f);
|
|
86
95
|
}
|
|
87
|
-
return ZSTD_hash8Ptr(p,
|
|
96
|
+
return ZSTD_hash8Ptr(p, f);
|
|
88
97
|
}
|
|
89
98
|
|
|
90
99
|
|
|
@@ -295,7 +304,7 @@ FASTCOVER_computeFrequency(U32* freqs, const FASTCOVER_ctx_t* ctx)
|
|
|
295
304
|
|
|
296
305
|
/**
|
|
297
306
|
* Prepare a context for dictionary building.
|
|
298
|
-
* The context is only dependent on the parameter `d` and can used multiple
|
|
307
|
+
* The context is only dependent on the parameter `d` and can be used multiple
|
|
299
308
|
* times.
|
|
300
309
|
* Returns 0 on success or error code on error.
|
|
301
310
|
* The context must be destroyed with `FASTCOVER_ctx_destroy()`.
|
|
@@ -461,20 +470,20 @@ typedef struct FASTCOVER_tryParameters_data_s {
|
|
|
461
470
|
* This function is thread safe if zstd is compiled with multithreaded support.
|
|
462
471
|
* It takes its parameters as an *OWNING* opaque pointer to support threading.
|
|
463
472
|
*/
|
|
464
|
-
static void FASTCOVER_tryParameters(void
|
|
473
|
+
static void FASTCOVER_tryParameters(void* opaque)
|
|
465
474
|
{
|
|
466
475
|
/* Save parameters as local variables */
|
|
467
|
-
FASTCOVER_tryParameters_data_t *const data = (FASTCOVER_tryParameters_data_t
|
|
476
|
+
FASTCOVER_tryParameters_data_t *const data = (FASTCOVER_tryParameters_data_t*)opaque;
|
|
468
477
|
const FASTCOVER_ctx_t *const ctx = data->ctx;
|
|
469
478
|
const ZDICT_cover_params_t parameters = data->parameters;
|
|
470
479
|
size_t dictBufferCapacity = data->dictBufferCapacity;
|
|
471
480
|
size_t totalCompressedSize = ERROR(GENERIC);
|
|
472
481
|
/* Initialize array to keep track of frequency of dmer within activeSegment */
|
|
473
|
-
U16* segmentFreqs = (U16
|
|
482
|
+
U16* segmentFreqs = (U16*)calloc(((U64)1 << ctx->f), sizeof(U16));
|
|
474
483
|
/* Allocate space for hash table, dict, and freqs */
|
|
475
|
-
BYTE *const dict = (BYTE
|
|
484
|
+
BYTE *const dict = (BYTE*)malloc(dictBufferCapacity);
|
|
476
485
|
COVER_dictSelection_t selection = COVER_dictSelectionError(ERROR(GENERIC));
|
|
477
|
-
U32
|
|
486
|
+
U32* freqs = (U32*) malloc(((U64)1 << ctx->f) * sizeof(U32));
|
|
478
487
|
if (!segmentFreqs || !dict || !freqs) {
|
|
479
488
|
DISPLAYLEVEL(1, "Failed to allocate buffers: out of memory\n");
|
|
480
489
|
goto _cleanup;
|
|
@@ -486,7 +495,7 @@ static void FASTCOVER_tryParameters(void *opaque)
|
|
|
486
495
|
parameters, segmentFreqs);
|
|
487
496
|
|
|
488
497
|
const unsigned nbFinalizeSamples = (unsigned)(ctx->nbTrainSamples * ctx->accelParams.finalize / 100);
|
|
489
|
-
selection = COVER_selectDict(dict + tail, dictBufferCapacity - tail,
|
|
498
|
+
selection = COVER_selectDict(dict + tail, dictBufferCapacity, dictBufferCapacity - tail,
|
|
490
499
|
ctx->samples, ctx->samplesSizes, nbFinalizeSamples, ctx->nbTrainSamples, ctx->nbSamples, parameters, ctx->offsets,
|
|
491
500
|
totalCompressedSize);
|
|
492
501
|
|
|
@@ -547,7 +556,7 @@ ZDICT_trainFromBuffer_fastCover(void* dictBuffer, size_t dictBufferCapacity,
|
|
|
547
556
|
ZDICT_cover_params_t coverParams;
|
|
548
557
|
FASTCOVER_accel_t accelParams;
|
|
549
558
|
/* Initialize global data */
|
|
550
|
-
g_displayLevel = parameters.zParams.notificationLevel;
|
|
559
|
+
g_displayLevel = (int)parameters.zParams.notificationLevel;
|
|
551
560
|
/* Assign splitPoint and f if not provided */
|
|
552
561
|
parameters.splitPoint = 1.0;
|
|
553
562
|
parameters.f = parameters.f == 0 ? DEFAULT_F : parameters.f;
|
|
@@ -617,7 +626,7 @@ ZDICT_optimizeTrainFromBuffer_fastCover(
|
|
|
617
626
|
/* constants */
|
|
618
627
|
const unsigned nbThreads = parameters->nbThreads;
|
|
619
628
|
const double splitPoint =
|
|
620
|
-
parameters->splitPoint <= 0.0 ?
|
|
629
|
+
parameters->splitPoint <= 0.0 ? FASTCOVER_DEFAULT_SPLITPOINT : parameters->splitPoint;
|
|
621
630
|
const unsigned kMinD = parameters->d == 0 ? 6 : parameters->d;
|
|
622
631
|
const unsigned kMaxD = parameters->d == 0 ? 8 : parameters->d;
|
|
623
632
|
const unsigned kMinK = parameters->k == 0 ? 50 : parameters->k;
|
|
@@ -630,7 +639,7 @@ ZDICT_optimizeTrainFromBuffer_fastCover(
|
|
|
630
639
|
const unsigned accel = parameters->accel == 0 ? DEFAULT_ACCEL : parameters->accel;
|
|
631
640
|
const unsigned shrinkDict = 0;
|
|
632
641
|
/* Local variables */
|
|
633
|
-
const int displayLevel = parameters->zParams.notificationLevel;
|
|
642
|
+
const int displayLevel = (int)parameters->zParams.notificationLevel;
|
|
634
643
|
unsigned iteration = 1;
|
|
635
644
|
unsigned d;
|
|
636
645
|
unsigned k;
|
|
@@ -714,7 +723,7 @@ ZDICT_optimizeTrainFromBuffer_fastCover(
|
|
|
714
723
|
data->parameters.splitPoint = splitPoint;
|
|
715
724
|
data->parameters.steps = kSteps;
|
|
716
725
|
data->parameters.shrinkDict = shrinkDict;
|
|
717
|
-
data->parameters.zParams.notificationLevel = g_displayLevel;
|
|
726
|
+
data->parameters.zParams.notificationLevel = (unsigned)g_displayLevel;
|
|
718
727
|
/* Check the parameters */
|
|
719
728
|
if (!FASTCOVER_checkParameters(data->parameters, dictBufferCapacity,
|
|
720
729
|
data->ctx->f, accel)) {
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/*
|
|
2
|
-
* Copyright (c)
|
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
3
3
|
* All rights reserved.
|
|
4
4
|
*
|
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
|
@@ -23,9 +23,13 @@
|
|
|
23
23
|
/* Unix Large Files support (>4GB) */
|
|
24
24
|
#define _FILE_OFFSET_BITS 64
|
|
25
25
|
#if (defined(__sun__) && (!defined(__LP64__))) /* Sun Solaris 32-bits requires specific definitions */
|
|
26
|
+
# ifndef _LARGEFILE_SOURCE
|
|
26
27
|
# define _LARGEFILE_SOURCE
|
|
28
|
+
# endif
|
|
27
29
|
#elif ! defined(__LP64__) /* No point defining Large file for 64 bit */
|
|
30
|
+
# ifndef _LARGEFILE64_SOURCE
|
|
28
31
|
# define _LARGEFILE64_SOURCE
|
|
32
|
+
# endif
|
|
29
33
|
#endif
|
|
30
34
|
|
|
31
35
|
|
|
@@ -37,18 +41,19 @@
|
|
|
37
41
|
#include <stdio.h> /* fprintf, fopen, ftello64 */
|
|
38
42
|
#include <time.h> /* clock */
|
|
39
43
|
|
|
44
|
+
#ifndef ZDICT_STATIC_LINKING_ONLY
|
|
45
|
+
# define ZDICT_STATIC_LINKING_ONLY
|
|
46
|
+
#endif
|
|
47
|
+
|
|
40
48
|
#include "../common/mem.h" /* read */
|
|
41
49
|
#include "../common/fse.h" /* FSE_normalizeCount, FSE_writeNCount */
|
|
42
|
-
#define HUF_STATIC_LINKING_ONLY
|
|
43
50
|
#include "../common/huf.h" /* HUF_buildCTable, HUF_writeCTable */
|
|
44
51
|
#include "../common/zstd_internal.h" /* includes zstd.h */
|
|
45
52
|
#include "../common/xxhash.h" /* XXH64 */
|
|
46
|
-
#include "divsufsort.h"
|
|
47
|
-
#ifndef ZDICT_STATIC_LINKING_ONLY
|
|
48
|
-
# define ZDICT_STATIC_LINKING_ONLY
|
|
49
|
-
#endif
|
|
50
|
-
#include "zdict.h"
|
|
51
53
|
#include "../compress/zstd_compress_internal.h" /* ZSTD_loadCEntropy() */
|
|
54
|
+
#include "../zdict.h"
|
|
55
|
+
#include "divsufsort.h"
|
|
56
|
+
#include "../common/bits.h" /* ZSTD_NbCommonBytes */
|
|
52
57
|
|
|
53
58
|
|
|
54
59
|
/*-*************************************
|
|
@@ -62,14 +67,15 @@
|
|
|
62
67
|
|
|
63
68
|
#define NOISELENGTH 32
|
|
64
69
|
|
|
65
|
-
static const int g_compressionLevel_default = 3;
|
|
66
70
|
static const U32 g_selectivity_default = 9;
|
|
67
71
|
|
|
68
72
|
|
|
69
73
|
/*-*************************************
|
|
70
74
|
* Console display
|
|
71
75
|
***************************************/
|
|
76
|
+
#undef DISPLAY
|
|
72
77
|
#define DISPLAY(...) { fprintf(stderr, __VA_ARGS__); fflush( stderr ); }
|
|
78
|
+
#undef DISPLAYLEVEL
|
|
73
79
|
#define DISPLAYLEVEL(l, ...) if (notificationLevel>=l) { DISPLAY(__VA_ARGS__); } /* 0 : no display; 1: errors; 2: default; 3: details; 4: debug */
|
|
74
80
|
|
|
75
81
|
static clock_t ZDICT_clockSpan(clock_t nPrevious) { return clock() - nPrevious; }
|
|
@@ -105,20 +111,17 @@ size_t ZDICT_getDictHeaderSize(const void* dictBuffer, size_t dictSize)
|
|
|
105
111
|
size_t headerSize;
|
|
106
112
|
if (dictSize <= 8 || MEM_readLE32(dictBuffer) != ZSTD_MAGIC_DICTIONARY) return ERROR(dictionary_corrupted);
|
|
107
113
|
|
|
108
|
-
{
|
|
109
|
-
ZSTD_compressedBlockState_t* bs = (ZSTD_compressedBlockState_t*)malloc(sizeof(ZSTD_compressedBlockState_t));
|
|
114
|
+
{ ZSTD_compressedBlockState_t* bs = (ZSTD_compressedBlockState_t*)malloc(sizeof(ZSTD_compressedBlockState_t));
|
|
110
115
|
U32* wksp = (U32*)malloc(HUF_WORKSPACE_SIZE);
|
|
111
|
-
|
|
112
|
-
if (!bs || !wksp || !offcodeNCount) {
|
|
116
|
+
if (!bs || !wksp) {
|
|
113
117
|
headerSize = ERROR(memory_allocation);
|
|
114
118
|
} else {
|
|
115
119
|
ZSTD_reset_compressedBlockState(bs);
|
|
116
|
-
headerSize = ZSTD_loadCEntropy(bs, wksp,
|
|
120
|
+
headerSize = ZSTD_loadCEntropy(bs, wksp, dictBuffer, dictSize);
|
|
117
121
|
}
|
|
118
122
|
|
|
119
123
|
free(bs);
|
|
120
124
|
free(wksp);
|
|
121
|
-
free(offcodeNCount);
|
|
122
125
|
}
|
|
123
126
|
|
|
124
127
|
return headerSize;
|
|
@@ -127,65 +130,6 @@ size_t ZDICT_getDictHeaderSize(const void* dictBuffer, size_t dictSize)
|
|
|
127
130
|
/*-********************************************************
|
|
128
131
|
* Dictionary training functions
|
|
129
132
|
**********************************************************/
|
|
130
|
-
static unsigned ZDICT_NbCommonBytes (size_t val)
|
|
131
|
-
{
|
|
132
|
-
if (MEM_isLittleEndian()) {
|
|
133
|
-
if (MEM_64bits()) {
|
|
134
|
-
# if defined(_MSC_VER) && defined(_WIN64)
|
|
135
|
-
unsigned long r = 0;
|
|
136
|
-
_BitScanForward64( &r, (U64)val );
|
|
137
|
-
return (unsigned)(r>>3);
|
|
138
|
-
# elif defined(__GNUC__) && (__GNUC__ >= 3)
|
|
139
|
-
return (__builtin_ctzll((U64)val) >> 3);
|
|
140
|
-
# else
|
|
141
|
-
static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 };
|
|
142
|
-
return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58];
|
|
143
|
-
# endif
|
|
144
|
-
} else { /* 32 bits */
|
|
145
|
-
# if defined(_MSC_VER)
|
|
146
|
-
unsigned long r=0;
|
|
147
|
-
_BitScanForward( &r, (U32)val );
|
|
148
|
-
return (unsigned)(r>>3);
|
|
149
|
-
# elif defined(__GNUC__) && (__GNUC__ >= 3)
|
|
150
|
-
return (__builtin_ctz((U32)val) >> 3);
|
|
151
|
-
# else
|
|
152
|
-
static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 };
|
|
153
|
-
return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
|
|
154
|
-
# endif
|
|
155
|
-
}
|
|
156
|
-
} else { /* Big Endian CPU */
|
|
157
|
-
if (MEM_64bits()) {
|
|
158
|
-
# if defined(_MSC_VER) && defined(_WIN64)
|
|
159
|
-
unsigned long r = 0;
|
|
160
|
-
_BitScanReverse64( &r, val );
|
|
161
|
-
return (unsigned)(r>>3);
|
|
162
|
-
# elif defined(__GNUC__) && (__GNUC__ >= 3)
|
|
163
|
-
return (__builtin_clzll(val) >> 3);
|
|
164
|
-
# else
|
|
165
|
-
unsigned r;
|
|
166
|
-
const unsigned n32 = sizeof(size_t)*4; /* calculate this way due to compiler complaining in 32-bits mode */
|
|
167
|
-
if (!(val>>n32)) { r=4; } else { r=0; val>>=n32; }
|
|
168
|
-
if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
|
|
169
|
-
r += (!val);
|
|
170
|
-
return r;
|
|
171
|
-
# endif
|
|
172
|
-
} else { /* 32 bits */
|
|
173
|
-
# if defined(_MSC_VER)
|
|
174
|
-
unsigned long r = 0;
|
|
175
|
-
_BitScanReverse( &r, (unsigned long)val );
|
|
176
|
-
return (unsigned)(r>>3);
|
|
177
|
-
# elif defined(__GNUC__) && (__GNUC__ >= 3)
|
|
178
|
-
return (__builtin_clz((U32)val) >> 3);
|
|
179
|
-
# else
|
|
180
|
-
unsigned r;
|
|
181
|
-
if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; }
|
|
182
|
-
r += (!val);
|
|
183
|
-
return r;
|
|
184
|
-
# endif
|
|
185
|
-
} }
|
|
186
|
-
}
|
|
187
|
-
|
|
188
|
-
|
|
189
133
|
/*! ZDICT_count() :
|
|
190
134
|
Count the nb of common bytes between 2 pointers.
|
|
191
135
|
Note : this function presumes end of buffer followed by noisy guard band.
|
|
@@ -200,7 +144,7 @@ static size_t ZDICT_count(const void* pIn, const void* pMatch)
|
|
|
200
144
|
pMatch = (const char*)pMatch+sizeof(size_t);
|
|
201
145
|
continue;
|
|
202
146
|
}
|
|
203
|
-
pIn = (const char*)pIn+
|
|
147
|
+
pIn = (const char*)pIn+ZSTD_NbCommonBytes(diff);
|
|
204
148
|
return (size_t)((const char*)pIn - pStart);
|
|
205
149
|
}
|
|
206
150
|
}
|
|
@@ -232,7 +176,7 @@ static dictItem ZDICT_analyzePos(
|
|
|
232
176
|
U32 savings[LLIMIT] = {0};
|
|
233
177
|
const BYTE* b = (const BYTE*)buffer;
|
|
234
178
|
size_t maxLength = LLIMIT;
|
|
235
|
-
size_t pos = suffix[start];
|
|
179
|
+
size_t pos = (size_t)suffix[start];
|
|
236
180
|
U32 end = start;
|
|
237
181
|
dictItem solution;
|
|
238
182
|
|
|
@@ -366,7 +310,7 @@ static dictItem ZDICT_analyzePos(
|
|
|
366
310
|
savings[i] = savings[i-1] + (lengthList[i] * (i-3));
|
|
367
311
|
|
|
368
312
|
DISPLAYLEVEL(4, "Selected dict at position %u, of length %u : saves %u (ratio: %.2f) \n",
|
|
369
|
-
(unsigned)pos, (unsigned)maxLength, (unsigned)savings[maxLength], (double)savings[maxLength] / maxLength);
|
|
313
|
+
(unsigned)pos, (unsigned)maxLength, (unsigned)savings[maxLength], (double)savings[maxLength] / (double)maxLength);
|
|
370
314
|
|
|
371
315
|
solution.pos = (U32)pos;
|
|
372
316
|
solution.length = (U32)maxLength;
|
|
@@ -376,7 +320,7 @@ static dictItem ZDICT_analyzePos(
|
|
|
376
320
|
{ U32 id;
|
|
377
321
|
for (id=start; id<end; id++) {
|
|
378
322
|
U32 p, pEnd, length;
|
|
379
|
-
U32 const testedPos = suffix[id];
|
|
323
|
+
U32 const testedPos = (U32)suffix[id];
|
|
380
324
|
if (testedPos == pos)
|
|
381
325
|
length = solution.length;
|
|
382
326
|
else {
|
|
@@ -428,7 +372,7 @@ static U32 ZDICT_tryMerge(dictItem* table, dictItem elt, U32 eltNbToSkip, const
|
|
|
428
372
|
elt = table[u];
|
|
429
373
|
/* sort : improve rank */
|
|
430
374
|
while ((u>1) && (table[u-1].savings < elt.savings))
|
|
431
|
-
|
|
375
|
+
table[u] = table[u-1], u--;
|
|
432
376
|
table[u] = elt;
|
|
433
377
|
return u;
|
|
434
378
|
} }
|
|
@@ -439,7 +383,7 @@ static U32 ZDICT_tryMerge(dictItem* table, dictItem elt, U32 eltNbToSkip, const
|
|
|
439
383
|
|
|
440
384
|
if ((table[u].pos + table[u].length >= elt.pos) && (table[u].pos < elt.pos)) { /* overlap, existing < new */
|
|
441
385
|
/* append */
|
|
442
|
-
int const addedLength = (int)eltEnd - (table[u].pos + table[u].length);
|
|
386
|
+
int const addedLength = (int)eltEnd - (int)(table[u].pos + table[u].length);
|
|
443
387
|
table[u].savings += elt.length / 8; /* rough approx bonus */
|
|
444
388
|
if (addedLength > 0) { /* otherwise, elt fully included into existing */
|
|
445
389
|
table[u].length += addedLength;
|
|
@@ -532,6 +476,7 @@ static size_t ZDICT_trainBuffer_legacy(dictItem* dictList, U32 dictListSize,
|
|
|
532
476
|
clock_t displayClock = 0;
|
|
533
477
|
clock_t const refreshRate = CLOCKS_PER_SEC * 3 / 10;
|
|
534
478
|
|
|
479
|
+
# undef DISPLAYUPDATE
|
|
535
480
|
# define DISPLAYUPDATE(l, ...) if (notificationLevel>=l) { \
|
|
536
481
|
if (ZDICT_clockSpan(displayClock) > refreshRate) \
|
|
537
482
|
{ displayClock = clock(); DISPLAY(__VA_ARGS__); \
|
|
@@ -578,7 +523,7 @@ static size_t ZDICT_trainBuffer_legacy(dictItem* dictList, U32 dictListSize,
|
|
|
578
523
|
if (solution.length==0) { cursor++; continue; }
|
|
579
524
|
ZDICT_insertDictItem(dictList, dictListSize, solution, buffer);
|
|
580
525
|
cursor += solution.length;
|
|
581
|
-
DISPLAYUPDATE(2, "\r%4.2f %% \r", (double)cursor / bufferSize * 100);
|
|
526
|
+
DISPLAYUPDATE(2, "\r%4.2f %% \r", (double)cursor / (double)bufferSize * 100.0);
|
|
582
527
|
} }
|
|
583
528
|
|
|
584
529
|
_cleanup:
|
|
@@ -621,11 +566,11 @@ static void ZDICT_countEStats(EStats_ress_t esr, const ZSTD_parameters* params,
|
|
|
621
566
|
size_t cSize;
|
|
622
567
|
|
|
623
568
|
if (srcSize > blockSizeMax) srcSize = blockSizeMax; /* protection vs large samples */
|
|
624
|
-
{ size_t const errorCode =
|
|
569
|
+
{ size_t const errorCode = ZSTD_compressBegin_usingCDict_deprecated(esr.zc, esr.dict);
|
|
625
570
|
if (ZSTD_isError(errorCode)) { DISPLAYLEVEL(1, "warning : ZSTD_compressBegin_usingCDict failed \n"); return; }
|
|
626
571
|
|
|
627
572
|
}
|
|
628
|
-
cSize =
|
|
573
|
+
cSize = ZSTD_compressBlock_deprecated(esr.zc, esr.workPlace, ZSTD_BLOCKSIZE_MAX, src, srcSize);
|
|
629
574
|
if (ZSTD_isError(cSize)) { DISPLAYLEVEL(3, "warning : could not compress sample size %u \n", (unsigned)srcSize); return; }
|
|
630
575
|
|
|
631
576
|
if (cSize) { /* if == 0; block is not compressible */
|
|
@@ -658,8 +603,8 @@ static void ZDICT_countEStats(EStats_ress_t esr, const ZSTD_parameters* params,
|
|
|
658
603
|
|
|
659
604
|
if (nbSeq >= 2) { /* rep offsets */
|
|
660
605
|
const seqDef* const seq = seqStorePtr->sequencesStart;
|
|
661
|
-
U32 offset1 = seq[0].
|
|
662
|
-
U32 offset2 = seq[1].
|
|
606
|
+
U32 offset1 = seq[0].offBase - ZSTD_REP_NUM;
|
|
607
|
+
U32 offset2 = seq[1].offBase - ZSTD_REP_NUM;
|
|
663
608
|
if (offset1 >= MAXREPOFFSET) offset1 = 0;
|
|
664
609
|
if (offset2 >= MAXREPOFFSET) offset2 = 0;
|
|
665
610
|
repOffsets[offset1] += 3;
|
|
@@ -706,7 +651,7 @@ static void ZDICT_flatLit(unsigned* countLit)
|
|
|
706
651
|
|
|
707
652
|
#define OFFCODE_MAX 30 /* only applicable to first block */
|
|
708
653
|
static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|
709
|
-
|
|
654
|
+
int compressionLevel,
|
|
710
655
|
const void* srcBuffer, const size_t* fileSizes, unsigned nbFiles,
|
|
711
656
|
const void* dictBuffer, size_t dictBufferSize,
|
|
712
657
|
unsigned notificationLevel)
|
|
@@ -730,6 +675,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|
|
730
675
|
size_t const totalSrcSize = ZDICT_totalSampleSize(fileSizes, nbFiles);
|
|
731
676
|
size_t const averageSampleSize = totalSrcSize / (nbFiles + !nbFiles);
|
|
732
677
|
BYTE* dstPtr = (BYTE*)dstBuffer;
|
|
678
|
+
U32 wksp[HUF_CTABLE_WORKSPACE_SIZE_U32];
|
|
733
679
|
|
|
734
680
|
/* init */
|
|
735
681
|
DEBUGLOG(4, "ZDICT_analyzeEntropy");
|
|
@@ -741,7 +687,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|
|
741
687
|
memset(repOffset, 0, sizeof(repOffset));
|
|
742
688
|
repOffset[1] = repOffset[4] = repOffset[8] = 1;
|
|
743
689
|
memset(bestRepOffset, 0, sizeof(bestRepOffset));
|
|
744
|
-
if (compressionLevel==0) compressionLevel =
|
|
690
|
+
if (compressionLevel==0) compressionLevel = ZSTD_CLEVEL_DEFAULT;
|
|
745
691
|
params = ZSTD_getParams(compressionLevel, averageSampleSize, dictBufferSize);
|
|
746
692
|
|
|
747
693
|
esr.dict = ZSTD_createCDict_advanced(dictBuffer, dictBufferSize, ZSTD_dlm_byRef, ZSTD_dct_rawContent, params.cParams, ZSTD_defaultCMem);
|
|
@@ -762,8 +708,15 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|
|
762
708
|
pos += fileSizes[u];
|
|
763
709
|
}
|
|
764
710
|
|
|
711
|
+
if (notificationLevel >= 4) {
|
|
712
|
+
/* writeStats */
|
|
713
|
+
DISPLAYLEVEL(4, "Offset Code Frequencies : \n");
|
|
714
|
+
for (u=0; u<=offcodeMax; u++) {
|
|
715
|
+
DISPLAYLEVEL(4, "%2u :%7u \n", u, offcodeCount[u]);
|
|
716
|
+
} }
|
|
717
|
+
|
|
765
718
|
/* analyze, build stats, starting with literals */
|
|
766
|
-
{ size_t maxNbBits =
|
|
719
|
+
{ size_t maxNbBits = HUF_buildCTable_wksp(hufTable, countLit, 255, huffLog, wksp, sizeof(wksp));
|
|
767
720
|
if (HUF_isError(maxNbBits)) {
|
|
768
721
|
eSize = maxNbBits;
|
|
769
722
|
DISPLAYLEVEL(1, " HUF_buildCTable error \n");
|
|
@@ -772,7 +725,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|
|
772
725
|
if (maxNbBits==8) { /* not compressible : will fail on HUF_writeCTable() */
|
|
773
726
|
DISPLAYLEVEL(2, "warning : pathological dataset : literals are not compressible : samples are noisy or too regular \n");
|
|
774
727
|
ZDICT_flatLit(countLit); /* replace distribution by a fake "mostly flat but still compressible" distribution, that HUF_writeCTable() can encode */
|
|
775
|
-
maxNbBits =
|
|
728
|
+
maxNbBits = HUF_buildCTable_wksp(hufTable, countLit, 255, huffLog, wksp, sizeof(wksp));
|
|
776
729
|
assert(maxNbBits==9);
|
|
777
730
|
}
|
|
778
731
|
huffLog = (U32)maxNbBits;
|
|
@@ -786,7 +739,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|
|
786
739
|
/* note : the result of this phase should be used to better appreciate the impact on statistics */
|
|
787
740
|
|
|
788
741
|
total=0; for (u=0; u<=offcodeMax; u++) total+=offcodeCount[u];
|
|
789
|
-
errorCode = FSE_normalizeCount(offcodeNCount, Offlog, offcodeCount, total, offcodeMax);
|
|
742
|
+
errorCode = FSE_normalizeCount(offcodeNCount, Offlog, offcodeCount, total, offcodeMax, /* useLowProbCount */ 1);
|
|
790
743
|
if (FSE_isError(errorCode)) {
|
|
791
744
|
eSize = errorCode;
|
|
792
745
|
DISPLAYLEVEL(1, "FSE_normalizeCount error with offcodeCount \n");
|
|
@@ -795,7 +748,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|
|
795
748
|
Offlog = (U32)errorCode;
|
|
796
749
|
|
|
797
750
|
total=0; for (u=0; u<=MaxML; u++) total+=matchLengthCount[u];
|
|
798
|
-
errorCode = FSE_normalizeCount(matchLengthNCount, mlLog, matchLengthCount, total, MaxML);
|
|
751
|
+
errorCode = FSE_normalizeCount(matchLengthNCount, mlLog, matchLengthCount, total, MaxML, /* useLowProbCount */ 1);
|
|
799
752
|
if (FSE_isError(errorCode)) {
|
|
800
753
|
eSize = errorCode;
|
|
801
754
|
DISPLAYLEVEL(1, "FSE_normalizeCount error with matchLengthCount \n");
|
|
@@ -804,7 +757,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|
|
804
757
|
mlLog = (U32)errorCode;
|
|
805
758
|
|
|
806
759
|
total=0; for (u=0; u<=MaxLL; u++) total+=litLengthCount[u];
|
|
807
|
-
errorCode = FSE_normalizeCount(litLengthNCount, llLog, litLengthCount, total, MaxLL);
|
|
760
|
+
errorCode = FSE_normalizeCount(litLengthNCount, llLog, litLengthCount, total, MaxLL, /* useLowProbCount */ 1);
|
|
808
761
|
if (FSE_isError(errorCode)) {
|
|
809
762
|
eSize = errorCode;
|
|
810
763
|
DISPLAYLEVEL(1, "FSE_normalizeCount error with litLengthCount \n");
|
|
@@ -813,7 +766,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|
|
813
766
|
llLog = (U32)errorCode;
|
|
814
767
|
|
|
815
768
|
/* write result to buffer */
|
|
816
|
-
{ size_t const hhSize =
|
|
769
|
+
{ size_t const hhSize = HUF_writeCTable_wksp(dstPtr, maxDstSize, hufTable, 255, huffLog, wksp, sizeof(wksp));
|
|
817
770
|
if (HUF_isError(hhSize)) {
|
|
818
771
|
eSize = hhSize;
|
|
819
772
|
DISPLAYLEVEL(1, "HUF_writeCTable error \n");
|
|
@@ -868,7 +821,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|
|
868
821
|
MEM_writeLE32(dstPtr+8, bestRepOffset[2].offset);
|
|
869
822
|
#else
|
|
870
823
|
/* at this stage, we don't use the result of "most common first offset",
|
|
871
|
-
|
|
824
|
+
* as the impact of statistics is not properly evaluated */
|
|
872
825
|
MEM_writeLE32(dstPtr+0, repStartValue[0]);
|
|
873
826
|
MEM_writeLE32(dstPtr+4, repStartValue[1]);
|
|
874
827
|
MEM_writeLE32(dstPtr+8, repStartValue[2]);
|
|
@@ -884,6 +837,17 @@ _cleanup:
|
|
|
884
837
|
}
|
|
885
838
|
|
|
886
839
|
|
|
840
|
+
/**
|
|
841
|
+
* @returns the maximum repcode value
|
|
842
|
+
*/
|
|
843
|
+
static U32 ZDICT_maxRep(U32 const reps[ZSTD_REP_NUM])
|
|
844
|
+
{
|
|
845
|
+
U32 maxRep = reps[0];
|
|
846
|
+
int r;
|
|
847
|
+
for (r = 1; r < ZSTD_REP_NUM; ++r)
|
|
848
|
+
maxRep = MAX(maxRep, reps[r]);
|
|
849
|
+
return maxRep;
|
|
850
|
+
}
|
|
887
851
|
|
|
888
852
|
size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity,
|
|
889
853
|
const void* customDictContent, size_t dictContentSize,
|
|
@@ -893,13 +857,15 @@ size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity,
|
|
|
893
857
|
size_t hSize;
|
|
894
858
|
#define HBUFFSIZE 256 /* should prove large enough for all entropy headers */
|
|
895
859
|
BYTE header[HBUFFSIZE];
|
|
896
|
-
int const compressionLevel = (params.compressionLevel == 0) ?
|
|
860
|
+
int const compressionLevel = (params.compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT : params.compressionLevel;
|
|
897
861
|
U32 const notificationLevel = params.notificationLevel;
|
|
862
|
+
/* The final dictionary content must be at least as large as the largest repcode */
|
|
863
|
+
size_t const minContentSize = (size_t)ZDICT_maxRep(repStartValue);
|
|
864
|
+
size_t paddingSize;
|
|
898
865
|
|
|
899
866
|
/* check conditions */
|
|
900
867
|
DEBUGLOG(4, "ZDICT_finalizeDictionary");
|
|
901
868
|
if (dictBufferCapacity < dictContentSize) return ERROR(dstSize_tooSmall);
|
|
902
|
-
if (dictContentSize < ZDICT_CONTENTSIZE_MIN) return ERROR(srcSize_wrong);
|
|
903
869
|
if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) return ERROR(dstSize_tooSmall);
|
|
904
870
|
|
|
905
871
|
/* dictionary header */
|
|
@@ -923,12 +889,43 @@ size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity,
|
|
|
923
889
|
hSize += eSize;
|
|
924
890
|
}
|
|
925
891
|
|
|
926
|
-
/*
|
|
927
|
-
if (hSize + dictContentSize > dictBufferCapacity)
|
|
928
|
-
|
|
929
|
-
|
|
930
|
-
|
|
931
|
-
|
|
892
|
+
/* Shrink the content size if it doesn't fit in the buffer */
|
|
893
|
+
if (hSize + dictContentSize > dictBufferCapacity) {
|
|
894
|
+
dictContentSize = dictBufferCapacity - hSize;
|
|
895
|
+
}
|
|
896
|
+
|
|
897
|
+
/* Pad the dictionary content with zeros if it is too small */
|
|
898
|
+
if (dictContentSize < minContentSize) {
|
|
899
|
+
RETURN_ERROR_IF(hSize + minContentSize > dictBufferCapacity, dstSize_tooSmall,
|
|
900
|
+
"dictBufferCapacity too small to fit max repcode");
|
|
901
|
+
paddingSize = minContentSize - dictContentSize;
|
|
902
|
+
} else {
|
|
903
|
+
paddingSize = 0;
|
|
904
|
+
}
|
|
905
|
+
|
|
906
|
+
{
|
|
907
|
+
size_t const dictSize = hSize + paddingSize + dictContentSize;
|
|
908
|
+
|
|
909
|
+
/* The dictionary consists of the header, optional padding, and the content.
|
|
910
|
+
* The padding comes before the content because the "best" position in the
|
|
911
|
+
* dictionary is the last byte.
|
|
912
|
+
*/
|
|
913
|
+
BYTE* const outDictHeader = (BYTE*)dictBuffer;
|
|
914
|
+
BYTE* const outDictPadding = outDictHeader + hSize;
|
|
915
|
+
BYTE* const outDictContent = outDictPadding + paddingSize;
|
|
916
|
+
|
|
917
|
+
assert(dictSize <= dictBufferCapacity);
|
|
918
|
+
assert(outDictContent + dictContentSize == (BYTE*)dictBuffer + dictSize);
|
|
919
|
+
|
|
920
|
+
/* First copy the customDictContent into its final location.
|
|
921
|
+
* `customDictContent` and `dictBuffer` may overlap, so we must
|
|
922
|
+
* do this before any other writes into the output buffer.
|
|
923
|
+
* Then copy the header & padding into the output buffer.
|
|
924
|
+
*/
|
|
925
|
+
memmove(outDictContent, customDictContent, dictContentSize);
|
|
926
|
+
memcpy(outDictHeader, header, hSize);
|
|
927
|
+
memset(outDictPadding, 0, paddingSize);
|
|
928
|
+
|
|
932
929
|
return dictSize;
|
|
933
930
|
}
|
|
934
931
|
}
|
|
@@ -939,7 +936,7 @@ static size_t ZDICT_addEntropyTablesFromBuffer_advanced(
|
|
|
939
936
|
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
|
|
940
937
|
ZDICT_params_t params)
|
|
941
938
|
{
|
|
942
|
-
int const compressionLevel = (params.compressionLevel == 0) ?
|
|
939
|
+
int const compressionLevel = (params.compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT : params.compressionLevel;
|
|
943
940
|
U32 const notificationLevel = params.notificationLevel;
|
|
944
941
|
size_t hSize = 8;
|
|
945
942
|
|
|
@@ -968,16 +965,11 @@ static size_t ZDICT_addEntropyTablesFromBuffer_advanced(
|
|
|
968
965
|
return MIN(dictBufferCapacity, hSize+dictContentSize);
|
|
969
966
|
}
|
|
970
967
|
|
|
971
|
-
/* Hidden declaration for dbio.c */
|
|
972
|
-
size_t ZDICT_trainFromBuffer_unsafe_legacy(
|
|
973
|
-
void* dictBuffer, size_t maxDictSize,
|
|
974
|
-
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
|
|
975
|
-
ZDICT_legacy_params_t params);
|
|
976
968
|
/*! ZDICT_trainFromBuffer_unsafe_legacy() :
|
|
977
|
-
* Warning : `samplesBuffer` must be followed by noisy guard band
|
|
969
|
+
* Warning : `samplesBuffer` must be followed by noisy guard band !!!
|
|
978
970
|
* @return : size of dictionary, or an error code which can be tested with ZDICT_isError()
|
|
979
971
|
*/
|
|
980
|
-
size_t ZDICT_trainFromBuffer_unsafe_legacy(
|
|
972
|
+
static size_t ZDICT_trainFromBuffer_unsafe_legacy(
|
|
981
973
|
void* dictBuffer, size_t maxDictSize,
|
|
982
974
|
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
|
|
983
975
|
ZDICT_legacy_params_t params)
|
|
@@ -1114,8 +1106,8 @@ size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity,
|
|
|
1114
1106
|
memset(¶ms, 0, sizeof(params));
|
|
1115
1107
|
params.d = 8;
|
|
1116
1108
|
params.steps = 4;
|
|
1117
|
-
/*
|
|
1118
|
-
params.zParams.compressionLevel =
|
|
1109
|
+
/* Use default level since no compression level information is available */
|
|
1110
|
+
params.zParams.compressionLevel = ZSTD_CLEVEL_DEFAULT;
|
|
1119
1111
|
#if defined(DEBUGLEVEL) && (DEBUGLEVEL>=1)
|
|
1120
1112
|
params.zParams.notificationLevel = DEBUGLEVEL;
|
|
1121
1113
|
#endif
|