zstd-ruby 1.4.5.0 → 1.5.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/README.md +78 -5
- data/Rakefile +8 -2
- data/ext/zstdruby/common.h +15 -0
- data/ext/zstdruby/extconf.rb +3 -2
- data/ext/zstdruby/libzstd/common/allocations.h +55 -0
- data/ext/zstdruby/libzstd/common/bits.h +200 -0
- data/ext/zstdruby/libzstd/common/bitstream.h +45 -62
- data/ext/zstdruby/libzstd/common/compiler.h +205 -22
- data/ext/zstdruby/libzstd/common/cpu.h +1 -3
- data/ext/zstdruby/libzstd/common/debug.c +1 -1
- data/ext/zstdruby/libzstd/common/debug.h +12 -19
- data/ext/zstdruby/libzstd/common/entropy_common.c +172 -48
- data/ext/zstdruby/libzstd/common/error_private.c +10 -2
- data/ext/zstdruby/libzstd/common/error_private.h +82 -3
- data/ext/zstdruby/libzstd/common/fse.h +37 -86
- data/ext/zstdruby/libzstd/common/fse_decompress.c +117 -92
- data/ext/zstdruby/libzstd/common/huf.h +99 -166
- data/ext/zstdruby/libzstd/common/mem.h +124 -142
- data/ext/zstdruby/libzstd/common/pool.c +54 -27
- data/ext/zstdruby/libzstd/common/pool.h +10 -4
- data/ext/zstdruby/libzstd/common/portability_macros.h +156 -0
- data/ext/zstdruby/libzstd/common/threading.c +74 -19
- data/ext/zstdruby/libzstd/common/threading.h +5 -10
- data/ext/zstdruby/libzstd/common/xxhash.c +7 -847
- data/ext/zstdruby/libzstd/common/xxhash.h +5568 -167
- data/ext/zstdruby/libzstd/common/zstd_common.c +2 -37
- data/ext/zstdruby/libzstd/common/zstd_deps.h +111 -0
- data/ext/zstdruby/libzstd/common/zstd_internal.h +132 -187
- data/ext/zstdruby/libzstd/common/zstd_trace.h +163 -0
- data/ext/zstdruby/libzstd/compress/clevels.h +134 -0
- data/ext/zstdruby/libzstd/compress/fse_compress.c +83 -157
- data/ext/zstdruby/libzstd/compress/hist.c +27 -29
- data/ext/zstdruby/libzstd/compress/hist.h +2 -2
- data/ext/zstdruby/libzstd/compress/huf_compress.c +916 -279
- data/ext/zstdruby/libzstd/compress/zstd_compress.c +3773 -1019
- data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +610 -203
- data/ext/zstdruby/libzstd/compress/zstd_compress_literals.c +119 -42
- data/ext/zstdruby/libzstd/compress/zstd_compress_literals.h +16 -6
- data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.c +42 -19
- data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.h +1 -1
- data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.c +49 -317
- data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.h +1 -1
- data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +320 -103
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +388 -151
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +3 -2
- data/ext/zstdruby/libzstd/compress/zstd_fast.c +729 -265
- data/ext/zstdruby/libzstd/compress/zstd_fast.h +3 -2
- data/ext/zstdruby/libzstd/compress/zstd_lazy.c +1270 -251
- data/ext/zstdruby/libzstd/compress/zstd_lazy.h +61 -1
- data/ext/zstdruby/libzstd/compress/zstd_ldm.c +324 -219
- data/ext/zstdruby/libzstd/compress/zstd_ldm.h +9 -2
- data/ext/zstdruby/libzstd/compress/zstd_ldm_geartab.h +106 -0
- data/ext/zstdruby/libzstd/compress/zstd_opt.c +481 -209
- data/ext/zstdruby/libzstd/compress/zstd_opt.h +1 -1
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +181 -457
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +34 -113
- data/ext/zstdruby/libzstd/decompress/huf_decompress.c +1199 -565
- data/ext/zstdruby/libzstd/decompress/huf_decompress_amd64.S +576 -0
- data/ext/zstdruby/libzstd/decompress/zstd_ddict.c +12 -12
- data/ext/zstdruby/libzstd/decompress/zstd_ddict.h +2 -2
- data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +627 -157
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +1086 -326
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +19 -5
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +62 -13
- data/ext/zstdruby/libzstd/dictBuilder/cover.c +73 -52
- data/ext/zstdruby/libzstd/dictBuilder/cover.h +7 -6
- data/ext/zstdruby/libzstd/dictBuilder/divsufsort.c +1 -1
- data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +44 -35
- data/ext/zstdruby/libzstd/dictBuilder/zdict.c +103 -111
- data/ext/zstdruby/libzstd/{dictBuilder/zdict.h → zdict.h} +203 -34
- data/ext/zstdruby/libzstd/zstd.h +1217 -287
- data/ext/zstdruby/libzstd/{common/zstd_errors.h → zstd_errors.h} +28 -8
- data/ext/zstdruby/main.c +20 -0
- data/ext/zstdruby/skippable_frame.c +63 -0
- data/ext/zstdruby/streaming_compress.c +177 -0
- data/ext/zstdruby/streaming_compress.h +5 -0
- data/ext/zstdruby/streaming_decompress.c +123 -0
- data/ext/zstdruby/zstdruby.c +114 -32
- data/lib/zstd-ruby/version.rb +1 -1
- data/lib/zstd-ruby.rb +0 -1
- data/zstd-ruby.gemspec +1 -1
- metadata +19 -36
- data/.travis.yml +0 -14
- data/ext/zstdruby/libzstd/.gitignore +0 -3
- data/ext/zstdruby/libzstd/BUCK +0 -234
- data/ext/zstdruby/libzstd/Makefile +0 -354
- data/ext/zstdruby/libzstd/README.md +0 -179
- data/ext/zstdruby/libzstd/deprecated/zbuff.h +0 -214
- data/ext/zstdruby/libzstd/deprecated/zbuff_common.c +0 -26
- data/ext/zstdruby/libzstd/deprecated/zbuff_compress.c +0 -147
- data/ext/zstdruby/libzstd/deprecated/zbuff_decompress.c +0 -75
- data/ext/zstdruby/libzstd/dll/example/Makefile +0 -48
- data/ext/zstdruby/libzstd/dll/example/README.md +0 -69
- data/ext/zstdruby/libzstd/dll/example/build_package.bat +0 -20
- data/ext/zstdruby/libzstd/dll/example/fullbench-dll.sln +0 -25
- data/ext/zstdruby/libzstd/dll/example/fullbench-dll.vcxproj +0 -181
- data/ext/zstdruby/libzstd/legacy/zstd_legacy.h +0 -415
- data/ext/zstdruby/libzstd/legacy/zstd_v01.c +0 -2158
- data/ext/zstdruby/libzstd/legacy/zstd_v01.h +0 -94
- data/ext/zstdruby/libzstd/legacy/zstd_v02.c +0 -3518
- data/ext/zstdruby/libzstd/legacy/zstd_v02.h +0 -93
- data/ext/zstdruby/libzstd/legacy/zstd_v03.c +0 -3160
- data/ext/zstdruby/libzstd/legacy/zstd_v03.h +0 -93
- data/ext/zstdruby/libzstd/legacy/zstd_v04.c +0 -3647
- data/ext/zstdruby/libzstd/legacy/zstd_v04.h +0 -142
- data/ext/zstdruby/libzstd/legacy/zstd_v05.c +0 -4050
- data/ext/zstdruby/libzstd/legacy/zstd_v05.h +0 -162
- data/ext/zstdruby/libzstd/legacy/zstd_v06.c +0 -4154
- data/ext/zstdruby/libzstd/legacy/zstd_v06.h +0 -172
- data/ext/zstdruby/libzstd/legacy/zstd_v07.c +0 -4541
- data/ext/zstdruby/libzstd/legacy/zstd_v07.h +0 -187
- data/ext/zstdruby/libzstd/libzstd.pc.in +0 -15
- data/ext/zstdruby/zstdruby.h +0 -6
@@ -1,5 +1,5 @@
|
|
1
1
|
/*
|
2
|
-
* Copyright (c)
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
3
3
|
* All rights reserved.
|
4
4
|
*
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
@@ -16,24 +16,33 @@
|
|
16
16
|
#include <string.h> /* memset */
|
17
17
|
#include <time.h> /* clock */
|
18
18
|
|
19
|
+
#ifndef ZDICT_STATIC_LINKING_ONLY
|
20
|
+
# define ZDICT_STATIC_LINKING_ONLY
|
21
|
+
#endif
|
22
|
+
|
19
23
|
#include "../common/mem.h" /* read */
|
20
24
|
#include "../common/pool.h"
|
21
25
|
#include "../common/threading.h"
|
22
|
-
#include "cover.h"
|
23
26
|
#include "../common/zstd_internal.h" /* includes zstd.h */
|
24
|
-
#
|
25
|
-
#
|
26
|
-
#
|
27
|
-
#include "zdict.h"
|
27
|
+
#include "../compress/zstd_compress_internal.h" /* ZSTD_hash*() */
|
28
|
+
#include "../zdict.h"
|
29
|
+
#include "cover.h"
|
28
30
|
|
29
31
|
|
30
32
|
/*-*************************************
|
31
33
|
* Constants
|
32
34
|
***************************************/
|
35
|
+
/**
|
36
|
+
* There are 32bit indexes used to ref samples, so limit samples size to 4GB
|
37
|
+
* on 64bit builds.
|
38
|
+
* For 32bit builds we choose 1 GB.
|
39
|
+
* Most 32bit platforms have 2GB user-mode addressable space and we allocate a large
|
40
|
+
* contiguous buffer, so 1GB is already a high limit.
|
41
|
+
*/
|
33
42
|
#define FASTCOVER_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((unsigned)-1) : ((unsigned)1 GB))
|
34
43
|
#define FASTCOVER_MAX_F 31
|
35
44
|
#define FASTCOVER_MAX_ACCEL 10
|
36
|
-
#define
|
45
|
+
#define FASTCOVER_DEFAULT_SPLITPOINT 0.75
|
37
46
|
#define DEFAULT_F 20
|
38
47
|
#define DEFAULT_ACCEL 1
|
39
48
|
|
@@ -41,50 +50,50 @@
|
|
41
50
|
/*-*************************************
|
42
51
|
* Console display
|
43
52
|
***************************************/
|
44
|
-
|
53
|
+
#ifndef LOCALDISPLAYLEVEL
|
54
|
+
static int g_displayLevel = 0;
|
55
|
+
#endif
|
56
|
+
#undef DISPLAY
|
45
57
|
#define DISPLAY(...) \
|
46
58
|
{ \
|
47
59
|
fprintf(stderr, __VA_ARGS__); \
|
48
60
|
fflush(stderr); \
|
49
61
|
}
|
62
|
+
#undef LOCALDISPLAYLEVEL
|
50
63
|
#define LOCALDISPLAYLEVEL(displayLevel, l, ...) \
|
51
64
|
if (displayLevel >= l) { \
|
52
65
|
DISPLAY(__VA_ARGS__); \
|
53
66
|
} /* 0 : no display; 1: errors; 2: default; 3: details; 4: debug */
|
67
|
+
#undef DISPLAYLEVEL
|
54
68
|
#define DISPLAYLEVEL(l, ...) LOCALDISPLAYLEVEL(g_displayLevel, l, __VA_ARGS__)
|
55
69
|
|
70
|
+
#ifndef LOCALDISPLAYUPDATE
|
71
|
+
static const clock_t g_refreshRate = CLOCKS_PER_SEC * 15 / 100;
|
72
|
+
static clock_t g_time = 0;
|
73
|
+
#endif
|
74
|
+
#undef LOCALDISPLAYUPDATE
|
56
75
|
#define LOCALDISPLAYUPDATE(displayLevel, l, ...) \
|
57
76
|
if (displayLevel >= l) { \
|
58
|
-
if ((clock() - g_time >
|
77
|
+
if ((clock() - g_time > g_refreshRate) || (displayLevel >= 4)) { \
|
59
78
|
g_time = clock(); \
|
60
79
|
DISPLAY(__VA_ARGS__); \
|
61
80
|
} \
|
62
81
|
}
|
82
|
+
#undef DISPLAYUPDATE
|
63
83
|
#define DISPLAYUPDATE(l, ...) LOCALDISPLAYUPDATE(g_displayLevel, l, __VA_ARGS__)
|
64
|
-
static const clock_t refreshRate = CLOCKS_PER_SEC * 15 / 100;
|
65
|
-
static clock_t g_time = 0;
|
66
84
|
|
67
85
|
|
68
86
|
/*-*************************************
|
69
87
|
* Hash Functions
|
70
88
|
***************************************/
|
71
|
-
static const U64 prime6bytes = 227718039650203ULL;
|
72
|
-
static size_t ZSTD_hash6(U64 u, U32 h) { return (size_t)(((u << (64-48)) * prime6bytes) >> (64-h)) ; }
|
73
|
-
static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_readLE64(p), h); }
|
74
|
-
|
75
|
-
static const U64 prime8bytes = 0xCF1BBCDCB7A56463ULL;
|
76
|
-
static size_t ZSTD_hash8(U64 u, U32 h) { return (size_t)(((u) * prime8bytes) >> (64-h)) ; }
|
77
|
-
static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h); }
|
78
|
-
|
79
|
-
|
80
89
|
/**
|
81
|
-
* Hash the d-byte value pointed to by p and mod 2^f
|
90
|
+
* Hash the d-byte value pointed to by p and mod 2^f into the frequency vector
|
82
91
|
*/
|
83
|
-
static size_t FASTCOVER_hashPtrToIndex(const void* p, U32
|
92
|
+
static size_t FASTCOVER_hashPtrToIndex(const void* p, U32 f, unsigned d) {
|
84
93
|
if (d == 6) {
|
85
|
-
return ZSTD_hash6Ptr(p,
|
94
|
+
return ZSTD_hash6Ptr(p, f);
|
86
95
|
}
|
87
|
-
return ZSTD_hash8Ptr(p,
|
96
|
+
return ZSTD_hash8Ptr(p, f);
|
88
97
|
}
|
89
98
|
|
90
99
|
|
@@ -295,7 +304,7 @@ FASTCOVER_computeFrequency(U32* freqs, const FASTCOVER_ctx_t* ctx)
|
|
295
304
|
|
296
305
|
/**
|
297
306
|
* Prepare a context for dictionary building.
|
298
|
-
* The context is only dependent on the parameter `d` and can used multiple
|
307
|
+
* The context is only dependent on the parameter `d` and can be used multiple
|
299
308
|
* times.
|
300
309
|
* Returns 0 on success or error code on error.
|
301
310
|
* The context must be destroyed with `FASTCOVER_ctx_destroy()`.
|
@@ -461,20 +470,20 @@ typedef struct FASTCOVER_tryParameters_data_s {
|
|
461
470
|
* This function is thread safe if zstd is compiled with multithreaded support.
|
462
471
|
* It takes its parameters as an *OWNING* opaque pointer to support threading.
|
463
472
|
*/
|
464
|
-
static void FASTCOVER_tryParameters(void
|
473
|
+
static void FASTCOVER_tryParameters(void* opaque)
|
465
474
|
{
|
466
475
|
/* Save parameters as local variables */
|
467
|
-
FASTCOVER_tryParameters_data_t *const data = (FASTCOVER_tryParameters_data_t
|
476
|
+
FASTCOVER_tryParameters_data_t *const data = (FASTCOVER_tryParameters_data_t*)opaque;
|
468
477
|
const FASTCOVER_ctx_t *const ctx = data->ctx;
|
469
478
|
const ZDICT_cover_params_t parameters = data->parameters;
|
470
479
|
size_t dictBufferCapacity = data->dictBufferCapacity;
|
471
480
|
size_t totalCompressedSize = ERROR(GENERIC);
|
472
481
|
/* Initialize array to keep track of frequency of dmer within activeSegment */
|
473
|
-
U16* segmentFreqs = (U16
|
482
|
+
U16* segmentFreqs = (U16*)calloc(((U64)1 << ctx->f), sizeof(U16));
|
474
483
|
/* Allocate space for hash table, dict, and freqs */
|
475
|
-
BYTE *const dict = (BYTE
|
484
|
+
BYTE *const dict = (BYTE*)malloc(dictBufferCapacity);
|
476
485
|
COVER_dictSelection_t selection = COVER_dictSelectionError(ERROR(GENERIC));
|
477
|
-
U32
|
486
|
+
U32* freqs = (U32*) malloc(((U64)1 << ctx->f) * sizeof(U32));
|
478
487
|
if (!segmentFreqs || !dict || !freqs) {
|
479
488
|
DISPLAYLEVEL(1, "Failed to allocate buffers: out of memory\n");
|
480
489
|
goto _cleanup;
|
@@ -486,7 +495,7 @@ static void FASTCOVER_tryParameters(void *opaque)
|
|
486
495
|
parameters, segmentFreqs);
|
487
496
|
|
488
497
|
const unsigned nbFinalizeSamples = (unsigned)(ctx->nbTrainSamples * ctx->accelParams.finalize / 100);
|
489
|
-
selection = COVER_selectDict(dict + tail, dictBufferCapacity - tail,
|
498
|
+
selection = COVER_selectDict(dict + tail, dictBufferCapacity, dictBufferCapacity - tail,
|
490
499
|
ctx->samples, ctx->samplesSizes, nbFinalizeSamples, ctx->nbTrainSamples, ctx->nbSamples, parameters, ctx->offsets,
|
491
500
|
totalCompressedSize);
|
492
501
|
|
@@ -547,7 +556,7 @@ ZDICT_trainFromBuffer_fastCover(void* dictBuffer, size_t dictBufferCapacity,
|
|
547
556
|
ZDICT_cover_params_t coverParams;
|
548
557
|
FASTCOVER_accel_t accelParams;
|
549
558
|
/* Initialize global data */
|
550
|
-
g_displayLevel = parameters.zParams.notificationLevel;
|
559
|
+
g_displayLevel = (int)parameters.zParams.notificationLevel;
|
551
560
|
/* Assign splitPoint and f if not provided */
|
552
561
|
parameters.splitPoint = 1.0;
|
553
562
|
parameters.f = parameters.f == 0 ? DEFAULT_F : parameters.f;
|
@@ -617,7 +626,7 @@ ZDICT_optimizeTrainFromBuffer_fastCover(
|
|
617
626
|
/* constants */
|
618
627
|
const unsigned nbThreads = parameters->nbThreads;
|
619
628
|
const double splitPoint =
|
620
|
-
parameters->splitPoint <= 0.0 ?
|
629
|
+
parameters->splitPoint <= 0.0 ? FASTCOVER_DEFAULT_SPLITPOINT : parameters->splitPoint;
|
621
630
|
const unsigned kMinD = parameters->d == 0 ? 6 : parameters->d;
|
622
631
|
const unsigned kMaxD = parameters->d == 0 ? 8 : parameters->d;
|
623
632
|
const unsigned kMinK = parameters->k == 0 ? 50 : parameters->k;
|
@@ -630,7 +639,7 @@ ZDICT_optimizeTrainFromBuffer_fastCover(
|
|
630
639
|
const unsigned accel = parameters->accel == 0 ? DEFAULT_ACCEL : parameters->accel;
|
631
640
|
const unsigned shrinkDict = 0;
|
632
641
|
/* Local variables */
|
633
|
-
const int displayLevel = parameters->zParams.notificationLevel;
|
642
|
+
const int displayLevel = (int)parameters->zParams.notificationLevel;
|
634
643
|
unsigned iteration = 1;
|
635
644
|
unsigned d;
|
636
645
|
unsigned k;
|
@@ -714,7 +723,7 @@ ZDICT_optimizeTrainFromBuffer_fastCover(
|
|
714
723
|
data->parameters.splitPoint = splitPoint;
|
715
724
|
data->parameters.steps = kSteps;
|
716
725
|
data->parameters.shrinkDict = shrinkDict;
|
717
|
-
data->parameters.zParams.notificationLevel = g_displayLevel;
|
726
|
+
data->parameters.zParams.notificationLevel = (unsigned)g_displayLevel;
|
718
727
|
/* Check the parameters */
|
719
728
|
if (!FASTCOVER_checkParameters(data->parameters, dictBufferCapacity,
|
720
729
|
data->ctx->f, accel)) {
|
@@ -1,5 +1,5 @@
|
|
1
1
|
/*
|
2
|
-
* Copyright (c)
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
3
3
|
* All rights reserved.
|
4
4
|
*
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
@@ -23,9 +23,13 @@
|
|
23
23
|
/* Unix Large Files support (>4GB) */
|
24
24
|
#define _FILE_OFFSET_BITS 64
|
25
25
|
#if (defined(__sun__) && (!defined(__LP64__))) /* Sun Solaris 32-bits requires specific definitions */
|
26
|
+
# ifndef _LARGEFILE_SOURCE
|
26
27
|
# define _LARGEFILE_SOURCE
|
28
|
+
# endif
|
27
29
|
#elif ! defined(__LP64__) /* No point defining Large file for 64 bit */
|
30
|
+
# ifndef _LARGEFILE64_SOURCE
|
28
31
|
# define _LARGEFILE64_SOURCE
|
32
|
+
# endif
|
29
33
|
#endif
|
30
34
|
|
31
35
|
|
@@ -37,18 +41,19 @@
|
|
37
41
|
#include <stdio.h> /* fprintf, fopen, ftello64 */
|
38
42
|
#include <time.h> /* clock */
|
39
43
|
|
44
|
+
#ifndef ZDICT_STATIC_LINKING_ONLY
|
45
|
+
# define ZDICT_STATIC_LINKING_ONLY
|
46
|
+
#endif
|
47
|
+
|
40
48
|
#include "../common/mem.h" /* read */
|
41
49
|
#include "../common/fse.h" /* FSE_normalizeCount, FSE_writeNCount */
|
42
|
-
#define HUF_STATIC_LINKING_ONLY
|
43
50
|
#include "../common/huf.h" /* HUF_buildCTable, HUF_writeCTable */
|
44
51
|
#include "../common/zstd_internal.h" /* includes zstd.h */
|
45
52
|
#include "../common/xxhash.h" /* XXH64 */
|
46
|
-
#include "divsufsort.h"
|
47
|
-
#ifndef ZDICT_STATIC_LINKING_ONLY
|
48
|
-
# define ZDICT_STATIC_LINKING_ONLY
|
49
|
-
#endif
|
50
|
-
#include "zdict.h"
|
51
53
|
#include "../compress/zstd_compress_internal.h" /* ZSTD_loadCEntropy() */
|
54
|
+
#include "../zdict.h"
|
55
|
+
#include "divsufsort.h"
|
56
|
+
#include "../common/bits.h" /* ZSTD_NbCommonBytes */
|
52
57
|
|
53
58
|
|
54
59
|
/*-*************************************
|
@@ -62,14 +67,15 @@
|
|
62
67
|
|
63
68
|
#define NOISELENGTH 32
|
64
69
|
|
65
|
-
static const int g_compressionLevel_default = 3;
|
66
70
|
static const U32 g_selectivity_default = 9;
|
67
71
|
|
68
72
|
|
69
73
|
/*-*************************************
|
70
74
|
* Console display
|
71
75
|
***************************************/
|
76
|
+
#undef DISPLAY
|
72
77
|
#define DISPLAY(...) { fprintf(stderr, __VA_ARGS__); fflush( stderr ); }
|
78
|
+
#undef DISPLAYLEVEL
|
73
79
|
#define DISPLAYLEVEL(l, ...) if (notificationLevel>=l) { DISPLAY(__VA_ARGS__); } /* 0 : no display; 1: errors; 2: default; 3: details; 4: debug */
|
74
80
|
|
75
81
|
static clock_t ZDICT_clockSpan(clock_t nPrevious) { return clock() - nPrevious; }
|
@@ -105,20 +111,17 @@ size_t ZDICT_getDictHeaderSize(const void* dictBuffer, size_t dictSize)
|
|
105
111
|
size_t headerSize;
|
106
112
|
if (dictSize <= 8 || MEM_readLE32(dictBuffer) != ZSTD_MAGIC_DICTIONARY) return ERROR(dictionary_corrupted);
|
107
113
|
|
108
|
-
{
|
109
|
-
ZSTD_compressedBlockState_t* bs = (ZSTD_compressedBlockState_t*)malloc(sizeof(ZSTD_compressedBlockState_t));
|
114
|
+
{ ZSTD_compressedBlockState_t* bs = (ZSTD_compressedBlockState_t*)malloc(sizeof(ZSTD_compressedBlockState_t));
|
110
115
|
U32* wksp = (U32*)malloc(HUF_WORKSPACE_SIZE);
|
111
|
-
|
112
|
-
if (!bs || !wksp || !offcodeNCount) {
|
116
|
+
if (!bs || !wksp) {
|
113
117
|
headerSize = ERROR(memory_allocation);
|
114
118
|
} else {
|
115
119
|
ZSTD_reset_compressedBlockState(bs);
|
116
|
-
headerSize = ZSTD_loadCEntropy(bs, wksp,
|
120
|
+
headerSize = ZSTD_loadCEntropy(bs, wksp, dictBuffer, dictSize);
|
117
121
|
}
|
118
122
|
|
119
123
|
free(bs);
|
120
124
|
free(wksp);
|
121
|
-
free(offcodeNCount);
|
122
125
|
}
|
123
126
|
|
124
127
|
return headerSize;
|
@@ -127,65 +130,6 @@ size_t ZDICT_getDictHeaderSize(const void* dictBuffer, size_t dictSize)
|
|
127
130
|
/*-********************************************************
|
128
131
|
* Dictionary training functions
|
129
132
|
**********************************************************/
|
130
|
-
static unsigned ZDICT_NbCommonBytes (size_t val)
|
131
|
-
{
|
132
|
-
if (MEM_isLittleEndian()) {
|
133
|
-
if (MEM_64bits()) {
|
134
|
-
# if defined(_MSC_VER) && defined(_WIN64)
|
135
|
-
unsigned long r = 0;
|
136
|
-
_BitScanForward64( &r, (U64)val );
|
137
|
-
return (unsigned)(r>>3);
|
138
|
-
# elif defined(__GNUC__) && (__GNUC__ >= 3)
|
139
|
-
return (__builtin_ctzll((U64)val) >> 3);
|
140
|
-
# else
|
141
|
-
static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 };
|
142
|
-
return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58];
|
143
|
-
# endif
|
144
|
-
} else { /* 32 bits */
|
145
|
-
# if defined(_MSC_VER)
|
146
|
-
unsigned long r=0;
|
147
|
-
_BitScanForward( &r, (U32)val );
|
148
|
-
return (unsigned)(r>>3);
|
149
|
-
# elif defined(__GNUC__) && (__GNUC__ >= 3)
|
150
|
-
return (__builtin_ctz((U32)val) >> 3);
|
151
|
-
# else
|
152
|
-
static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 };
|
153
|
-
return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
|
154
|
-
# endif
|
155
|
-
}
|
156
|
-
} else { /* Big Endian CPU */
|
157
|
-
if (MEM_64bits()) {
|
158
|
-
# if defined(_MSC_VER) && defined(_WIN64)
|
159
|
-
unsigned long r = 0;
|
160
|
-
_BitScanReverse64( &r, val );
|
161
|
-
return (unsigned)(r>>3);
|
162
|
-
# elif defined(__GNUC__) && (__GNUC__ >= 3)
|
163
|
-
return (__builtin_clzll(val) >> 3);
|
164
|
-
# else
|
165
|
-
unsigned r;
|
166
|
-
const unsigned n32 = sizeof(size_t)*4; /* calculate this way due to compiler complaining in 32-bits mode */
|
167
|
-
if (!(val>>n32)) { r=4; } else { r=0; val>>=n32; }
|
168
|
-
if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
|
169
|
-
r += (!val);
|
170
|
-
return r;
|
171
|
-
# endif
|
172
|
-
} else { /* 32 bits */
|
173
|
-
# if defined(_MSC_VER)
|
174
|
-
unsigned long r = 0;
|
175
|
-
_BitScanReverse( &r, (unsigned long)val );
|
176
|
-
return (unsigned)(r>>3);
|
177
|
-
# elif defined(__GNUC__) && (__GNUC__ >= 3)
|
178
|
-
return (__builtin_clz((U32)val) >> 3);
|
179
|
-
# else
|
180
|
-
unsigned r;
|
181
|
-
if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; }
|
182
|
-
r += (!val);
|
183
|
-
return r;
|
184
|
-
# endif
|
185
|
-
} }
|
186
|
-
}
|
187
|
-
|
188
|
-
|
189
133
|
/*! ZDICT_count() :
|
190
134
|
Count the nb of common bytes between 2 pointers.
|
191
135
|
Note : this function presumes end of buffer followed by noisy guard band.
|
@@ -200,7 +144,7 @@ static size_t ZDICT_count(const void* pIn, const void* pMatch)
|
|
200
144
|
pMatch = (const char*)pMatch+sizeof(size_t);
|
201
145
|
continue;
|
202
146
|
}
|
203
|
-
pIn = (const char*)pIn+
|
147
|
+
pIn = (const char*)pIn+ZSTD_NbCommonBytes(diff);
|
204
148
|
return (size_t)((const char*)pIn - pStart);
|
205
149
|
}
|
206
150
|
}
|
@@ -232,7 +176,7 @@ static dictItem ZDICT_analyzePos(
|
|
232
176
|
U32 savings[LLIMIT] = {0};
|
233
177
|
const BYTE* b = (const BYTE*)buffer;
|
234
178
|
size_t maxLength = LLIMIT;
|
235
|
-
size_t pos = suffix[start];
|
179
|
+
size_t pos = (size_t)suffix[start];
|
236
180
|
U32 end = start;
|
237
181
|
dictItem solution;
|
238
182
|
|
@@ -366,7 +310,7 @@ static dictItem ZDICT_analyzePos(
|
|
366
310
|
savings[i] = savings[i-1] + (lengthList[i] * (i-3));
|
367
311
|
|
368
312
|
DISPLAYLEVEL(4, "Selected dict at position %u, of length %u : saves %u (ratio: %.2f) \n",
|
369
|
-
(unsigned)pos, (unsigned)maxLength, (unsigned)savings[maxLength], (double)savings[maxLength] / maxLength);
|
313
|
+
(unsigned)pos, (unsigned)maxLength, (unsigned)savings[maxLength], (double)savings[maxLength] / (double)maxLength);
|
370
314
|
|
371
315
|
solution.pos = (U32)pos;
|
372
316
|
solution.length = (U32)maxLength;
|
@@ -376,7 +320,7 @@ static dictItem ZDICT_analyzePos(
|
|
376
320
|
{ U32 id;
|
377
321
|
for (id=start; id<end; id++) {
|
378
322
|
U32 p, pEnd, length;
|
379
|
-
U32 const testedPos = suffix[id];
|
323
|
+
U32 const testedPos = (U32)suffix[id];
|
380
324
|
if (testedPos == pos)
|
381
325
|
length = solution.length;
|
382
326
|
else {
|
@@ -428,7 +372,7 @@ static U32 ZDICT_tryMerge(dictItem* table, dictItem elt, U32 eltNbToSkip, const
|
|
428
372
|
elt = table[u];
|
429
373
|
/* sort : improve rank */
|
430
374
|
while ((u>1) && (table[u-1].savings < elt.savings))
|
431
|
-
|
375
|
+
table[u] = table[u-1], u--;
|
432
376
|
table[u] = elt;
|
433
377
|
return u;
|
434
378
|
} }
|
@@ -439,7 +383,7 @@ static U32 ZDICT_tryMerge(dictItem* table, dictItem elt, U32 eltNbToSkip, const
|
|
439
383
|
|
440
384
|
if ((table[u].pos + table[u].length >= elt.pos) && (table[u].pos < elt.pos)) { /* overlap, existing < new */
|
441
385
|
/* append */
|
442
|
-
int const addedLength = (int)eltEnd - (table[u].pos + table[u].length);
|
386
|
+
int const addedLength = (int)eltEnd - (int)(table[u].pos + table[u].length);
|
443
387
|
table[u].savings += elt.length / 8; /* rough approx bonus */
|
444
388
|
if (addedLength > 0) { /* otherwise, elt fully included into existing */
|
445
389
|
table[u].length += addedLength;
|
@@ -532,6 +476,7 @@ static size_t ZDICT_trainBuffer_legacy(dictItem* dictList, U32 dictListSize,
|
|
532
476
|
clock_t displayClock = 0;
|
533
477
|
clock_t const refreshRate = CLOCKS_PER_SEC * 3 / 10;
|
534
478
|
|
479
|
+
# undef DISPLAYUPDATE
|
535
480
|
# define DISPLAYUPDATE(l, ...) if (notificationLevel>=l) { \
|
536
481
|
if (ZDICT_clockSpan(displayClock) > refreshRate) \
|
537
482
|
{ displayClock = clock(); DISPLAY(__VA_ARGS__); \
|
@@ -578,7 +523,7 @@ static size_t ZDICT_trainBuffer_legacy(dictItem* dictList, U32 dictListSize,
|
|
578
523
|
if (solution.length==0) { cursor++; continue; }
|
579
524
|
ZDICT_insertDictItem(dictList, dictListSize, solution, buffer);
|
580
525
|
cursor += solution.length;
|
581
|
-
DISPLAYUPDATE(2, "\r%4.2f %% \r", (double)cursor / bufferSize * 100);
|
526
|
+
DISPLAYUPDATE(2, "\r%4.2f %% \r", (double)cursor / (double)bufferSize * 100.0);
|
582
527
|
} }
|
583
528
|
|
584
529
|
_cleanup:
|
@@ -621,11 +566,11 @@ static void ZDICT_countEStats(EStats_ress_t esr, const ZSTD_parameters* params,
|
|
621
566
|
size_t cSize;
|
622
567
|
|
623
568
|
if (srcSize > blockSizeMax) srcSize = blockSizeMax; /* protection vs large samples */
|
624
|
-
{ size_t const errorCode =
|
569
|
+
{ size_t const errorCode = ZSTD_compressBegin_usingCDict_deprecated(esr.zc, esr.dict);
|
625
570
|
if (ZSTD_isError(errorCode)) { DISPLAYLEVEL(1, "warning : ZSTD_compressBegin_usingCDict failed \n"); return; }
|
626
571
|
|
627
572
|
}
|
628
|
-
cSize =
|
573
|
+
cSize = ZSTD_compressBlock_deprecated(esr.zc, esr.workPlace, ZSTD_BLOCKSIZE_MAX, src, srcSize);
|
629
574
|
if (ZSTD_isError(cSize)) { DISPLAYLEVEL(3, "warning : could not compress sample size %u \n", (unsigned)srcSize); return; }
|
630
575
|
|
631
576
|
if (cSize) { /* if == 0; block is not compressible */
|
@@ -658,8 +603,8 @@ static void ZDICT_countEStats(EStats_ress_t esr, const ZSTD_parameters* params,
|
|
658
603
|
|
659
604
|
if (nbSeq >= 2) { /* rep offsets */
|
660
605
|
const seqDef* const seq = seqStorePtr->sequencesStart;
|
661
|
-
U32 offset1 = seq[0].
|
662
|
-
U32 offset2 = seq[1].
|
606
|
+
U32 offset1 = seq[0].offBase - ZSTD_REP_NUM;
|
607
|
+
U32 offset2 = seq[1].offBase - ZSTD_REP_NUM;
|
663
608
|
if (offset1 >= MAXREPOFFSET) offset1 = 0;
|
664
609
|
if (offset2 >= MAXREPOFFSET) offset2 = 0;
|
665
610
|
repOffsets[offset1] += 3;
|
@@ -706,7 +651,7 @@ static void ZDICT_flatLit(unsigned* countLit)
|
|
706
651
|
|
707
652
|
#define OFFCODE_MAX 30 /* only applicable to first block */
|
708
653
|
static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
709
|
-
|
654
|
+
int compressionLevel,
|
710
655
|
const void* srcBuffer, const size_t* fileSizes, unsigned nbFiles,
|
711
656
|
const void* dictBuffer, size_t dictBufferSize,
|
712
657
|
unsigned notificationLevel)
|
@@ -730,6 +675,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|
730
675
|
size_t const totalSrcSize = ZDICT_totalSampleSize(fileSizes, nbFiles);
|
731
676
|
size_t const averageSampleSize = totalSrcSize / (nbFiles + !nbFiles);
|
732
677
|
BYTE* dstPtr = (BYTE*)dstBuffer;
|
678
|
+
U32 wksp[HUF_CTABLE_WORKSPACE_SIZE_U32];
|
733
679
|
|
734
680
|
/* init */
|
735
681
|
DEBUGLOG(4, "ZDICT_analyzeEntropy");
|
@@ -741,7 +687,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|
741
687
|
memset(repOffset, 0, sizeof(repOffset));
|
742
688
|
repOffset[1] = repOffset[4] = repOffset[8] = 1;
|
743
689
|
memset(bestRepOffset, 0, sizeof(bestRepOffset));
|
744
|
-
if (compressionLevel==0) compressionLevel =
|
690
|
+
if (compressionLevel==0) compressionLevel = ZSTD_CLEVEL_DEFAULT;
|
745
691
|
params = ZSTD_getParams(compressionLevel, averageSampleSize, dictBufferSize);
|
746
692
|
|
747
693
|
esr.dict = ZSTD_createCDict_advanced(dictBuffer, dictBufferSize, ZSTD_dlm_byRef, ZSTD_dct_rawContent, params.cParams, ZSTD_defaultCMem);
|
@@ -762,8 +708,15 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|
762
708
|
pos += fileSizes[u];
|
763
709
|
}
|
764
710
|
|
711
|
+
if (notificationLevel >= 4) {
|
712
|
+
/* writeStats */
|
713
|
+
DISPLAYLEVEL(4, "Offset Code Frequencies : \n");
|
714
|
+
for (u=0; u<=offcodeMax; u++) {
|
715
|
+
DISPLAYLEVEL(4, "%2u :%7u \n", u, offcodeCount[u]);
|
716
|
+
} }
|
717
|
+
|
765
718
|
/* analyze, build stats, starting with literals */
|
766
|
-
{ size_t maxNbBits =
|
719
|
+
{ size_t maxNbBits = HUF_buildCTable_wksp(hufTable, countLit, 255, huffLog, wksp, sizeof(wksp));
|
767
720
|
if (HUF_isError(maxNbBits)) {
|
768
721
|
eSize = maxNbBits;
|
769
722
|
DISPLAYLEVEL(1, " HUF_buildCTable error \n");
|
@@ -772,7 +725,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|
772
725
|
if (maxNbBits==8) { /* not compressible : will fail on HUF_writeCTable() */
|
773
726
|
DISPLAYLEVEL(2, "warning : pathological dataset : literals are not compressible : samples are noisy or too regular \n");
|
774
727
|
ZDICT_flatLit(countLit); /* replace distribution by a fake "mostly flat but still compressible" distribution, that HUF_writeCTable() can encode */
|
775
|
-
maxNbBits =
|
728
|
+
maxNbBits = HUF_buildCTable_wksp(hufTable, countLit, 255, huffLog, wksp, sizeof(wksp));
|
776
729
|
assert(maxNbBits==9);
|
777
730
|
}
|
778
731
|
huffLog = (U32)maxNbBits;
|
@@ -786,7 +739,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|
786
739
|
/* note : the result of this phase should be used to better appreciate the impact on statistics */
|
787
740
|
|
788
741
|
total=0; for (u=0; u<=offcodeMax; u++) total+=offcodeCount[u];
|
789
|
-
errorCode = FSE_normalizeCount(offcodeNCount, Offlog, offcodeCount, total, offcodeMax);
|
742
|
+
errorCode = FSE_normalizeCount(offcodeNCount, Offlog, offcodeCount, total, offcodeMax, /* useLowProbCount */ 1);
|
790
743
|
if (FSE_isError(errorCode)) {
|
791
744
|
eSize = errorCode;
|
792
745
|
DISPLAYLEVEL(1, "FSE_normalizeCount error with offcodeCount \n");
|
@@ -795,7 +748,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|
795
748
|
Offlog = (U32)errorCode;
|
796
749
|
|
797
750
|
total=0; for (u=0; u<=MaxML; u++) total+=matchLengthCount[u];
|
798
|
-
errorCode = FSE_normalizeCount(matchLengthNCount, mlLog, matchLengthCount, total, MaxML);
|
751
|
+
errorCode = FSE_normalizeCount(matchLengthNCount, mlLog, matchLengthCount, total, MaxML, /* useLowProbCount */ 1);
|
799
752
|
if (FSE_isError(errorCode)) {
|
800
753
|
eSize = errorCode;
|
801
754
|
DISPLAYLEVEL(1, "FSE_normalizeCount error with matchLengthCount \n");
|
@@ -804,7 +757,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|
804
757
|
mlLog = (U32)errorCode;
|
805
758
|
|
806
759
|
total=0; for (u=0; u<=MaxLL; u++) total+=litLengthCount[u];
|
807
|
-
errorCode = FSE_normalizeCount(litLengthNCount, llLog, litLengthCount, total, MaxLL);
|
760
|
+
errorCode = FSE_normalizeCount(litLengthNCount, llLog, litLengthCount, total, MaxLL, /* useLowProbCount */ 1);
|
808
761
|
if (FSE_isError(errorCode)) {
|
809
762
|
eSize = errorCode;
|
810
763
|
DISPLAYLEVEL(1, "FSE_normalizeCount error with litLengthCount \n");
|
@@ -813,7 +766,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|
813
766
|
llLog = (U32)errorCode;
|
814
767
|
|
815
768
|
/* write result to buffer */
|
816
|
-
{ size_t const hhSize =
|
769
|
+
{ size_t const hhSize = HUF_writeCTable_wksp(dstPtr, maxDstSize, hufTable, 255, huffLog, wksp, sizeof(wksp));
|
817
770
|
if (HUF_isError(hhSize)) {
|
818
771
|
eSize = hhSize;
|
819
772
|
DISPLAYLEVEL(1, "HUF_writeCTable error \n");
|
@@ -868,7 +821,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|
868
821
|
MEM_writeLE32(dstPtr+8, bestRepOffset[2].offset);
|
869
822
|
#else
|
870
823
|
/* at this stage, we don't use the result of "most common first offset",
|
871
|
-
|
824
|
+
* as the impact of statistics is not properly evaluated */
|
872
825
|
MEM_writeLE32(dstPtr+0, repStartValue[0]);
|
873
826
|
MEM_writeLE32(dstPtr+4, repStartValue[1]);
|
874
827
|
MEM_writeLE32(dstPtr+8, repStartValue[2]);
|
@@ -884,6 +837,17 @@ _cleanup:
|
|
884
837
|
}
|
885
838
|
|
886
839
|
|
840
|
+
/**
|
841
|
+
* @returns the maximum repcode value
|
842
|
+
*/
|
843
|
+
static U32 ZDICT_maxRep(U32 const reps[ZSTD_REP_NUM])
|
844
|
+
{
|
845
|
+
U32 maxRep = reps[0];
|
846
|
+
int r;
|
847
|
+
for (r = 1; r < ZSTD_REP_NUM; ++r)
|
848
|
+
maxRep = MAX(maxRep, reps[r]);
|
849
|
+
return maxRep;
|
850
|
+
}
|
887
851
|
|
888
852
|
size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity,
|
889
853
|
const void* customDictContent, size_t dictContentSize,
|
@@ -893,13 +857,15 @@ size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity,
|
|
893
857
|
size_t hSize;
|
894
858
|
#define HBUFFSIZE 256 /* should prove large enough for all entropy headers */
|
895
859
|
BYTE header[HBUFFSIZE];
|
896
|
-
int const compressionLevel = (params.compressionLevel == 0) ?
|
860
|
+
int const compressionLevel = (params.compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT : params.compressionLevel;
|
897
861
|
U32 const notificationLevel = params.notificationLevel;
|
862
|
+
/* The final dictionary content must be at least as large as the largest repcode */
|
863
|
+
size_t const minContentSize = (size_t)ZDICT_maxRep(repStartValue);
|
864
|
+
size_t paddingSize;
|
898
865
|
|
899
866
|
/* check conditions */
|
900
867
|
DEBUGLOG(4, "ZDICT_finalizeDictionary");
|
901
868
|
if (dictBufferCapacity < dictContentSize) return ERROR(dstSize_tooSmall);
|
902
|
-
if (dictContentSize < ZDICT_CONTENTSIZE_MIN) return ERROR(srcSize_wrong);
|
903
869
|
if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) return ERROR(dstSize_tooSmall);
|
904
870
|
|
905
871
|
/* dictionary header */
|
@@ -923,12 +889,43 @@ size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity,
|
|
923
889
|
hSize += eSize;
|
924
890
|
}
|
925
891
|
|
926
|
-
/*
|
927
|
-
if (hSize + dictContentSize > dictBufferCapacity)
|
928
|
-
|
929
|
-
|
930
|
-
|
931
|
-
|
892
|
+
/* Shrink the content size if it doesn't fit in the buffer */
|
893
|
+
if (hSize + dictContentSize > dictBufferCapacity) {
|
894
|
+
dictContentSize = dictBufferCapacity - hSize;
|
895
|
+
}
|
896
|
+
|
897
|
+
/* Pad the dictionary content with zeros if it is too small */
|
898
|
+
if (dictContentSize < minContentSize) {
|
899
|
+
RETURN_ERROR_IF(hSize + minContentSize > dictBufferCapacity, dstSize_tooSmall,
|
900
|
+
"dictBufferCapacity too small to fit max repcode");
|
901
|
+
paddingSize = minContentSize - dictContentSize;
|
902
|
+
} else {
|
903
|
+
paddingSize = 0;
|
904
|
+
}
|
905
|
+
|
906
|
+
{
|
907
|
+
size_t const dictSize = hSize + paddingSize + dictContentSize;
|
908
|
+
|
909
|
+
/* The dictionary consists of the header, optional padding, and the content.
|
910
|
+
* The padding comes before the content because the "best" position in the
|
911
|
+
* dictionary is the last byte.
|
912
|
+
*/
|
913
|
+
BYTE* const outDictHeader = (BYTE*)dictBuffer;
|
914
|
+
BYTE* const outDictPadding = outDictHeader + hSize;
|
915
|
+
BYTE* const outDictContent = outDictPadding + paddingSize;
|
916
|
+
|
917
|
+
assert(dictSize <= dictBufferCapacity);
|
918
|
+
assert(outDictContent + dictContentSize == (BYTE*)dictBuffer + dictSize);
|
919
|
+
|
920
|
+
/* First copy the customDictContent into its final location.
|
921
|
+
* `customDictContent` and `dictBuffer` may overlap, so we must
|
922
|
+
* do this before any other writes into the output buffer.
|
923
|
+
* Then copy the header & padding into the output buffer.
|
924
|
+
*/
|
925
|
+
memmove(outDictContent, customDictContent, dictContentSize);
|
926
|
+
memcpy(outDictHeader, header, hSize);
|
927
|
+
memset(outDictPadding, 0, paddingSize);
|
928
|
+
|
932
929
|
return dictSize;
|
933
930
|
}
|
934
931
|
}
|
@@ -939,7 +936,7 @@ static size_t ZDICT_addEntropyTablesFromBuffer_advanced(
|
|
939
936
|
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
|
940
937
|
ZDICT_params_t params)
|
941
938
|
{
|
942
|
-
int const compressionLevel = (params.compressionLevel == 0) ?
|
939
|
+
int const compressionLevel = (params.compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT : params.compressionLevel;
|
943
940
|
U32 const notificationLevel = params.notificationLevel;
|
944
941
|
size_t hSize = 8;
|
945
942
|
|
@@ -968,16 +965,11 @@ static size_t ZDICT_addEntropyTablesFromBuffer_advanced(
|
|
968
965
|
return MIN(dictBufferCapacity, hSize+dictContentSize);
|
969
966
|
}
|
970
967
|
|
971
|
-
/* Hidden declaration for dbio.c */
|
972
|
-
size_t ZDICT_trainFromBuffer_unsafe_legacy(
|
973
|
-
void* dictBuffer, size_t maxDictSize,
|
974
|
-
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
|
975
|
-
ZDICT_legacy_params_t params);
|
976
968
|
/*! ZDICT_trainFromBuffer_unsafe_legacy() :
|
977
|
-
* Warning : `samplesBuffer` must be followed by noisy guard band
|
969
|
+
* Warning : `samplesBuffer` must be followed by noisy guard band !!!
|
978
970
|
* @return : size of dictionary, or an error code which can be tested with ZDICT_isError()
|
979
971
|
*/
|
980
|
-
size_t ZDICT_trainFromBuffer_unsafe_legacy(
|
972
|
+
static size_t ZDICT_trainFromBuffer_unsafe_legacy(
|
981
973
|
void* dictBuffer, size_t maxDictSize,
|
982
974
|
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
|
983
975
|
ZDICT_legacy_params_t params)
|
@@ -1114,8 +1106,8 @@ size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity,
|
|
1114
1106
|
memset(¶ms, 0, sizeof(params));
|
1115
1107
|
params.d = 8;
|
1116
1108
|
params.steps = 4;
|
1117
|
-
/*
|
1118
|
-
params.zParams.compressionLevel =
|
1109
|
+
/* Use default level since no compression level information is available */
|
1110
|
+
params.zParams.compressionLevel = ZSTD_CLEVEL_DEFAULT;
|
1119
1111
|
#if defined(DEBUGLEVEL) && (DEBUGLEVEL>=1)
|
1120
1112
|
params.zParams.notificationLevel = DEBUGLEVEL;
|
1121
1113
|
#endif
|