zstd-ruby 1.4.4.0 → 1.5.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/dependabot.yml +8 -0
- data/.github/workflows/ruby.yml +35 -0
- data/README.md +2 -2
- data/ext/zstdruby/extconf.rb +1 -0
- data/ext/zstdruby/libzstd/BUCK +5 -7
- data/ext/zstdruby/libzstd/Makefile +241 -173
- data/ext/zstdruby/libzstd/README.md +76 -18
- data/ext/zstdruby/libzstd/common/bitstream.h +75 -57
- data/ext/zstdruby/libzstd/common/compiler.h +196 -20
- data/ext/zstdruby/libzstd/common/cpu.h +1 -3
- data/ext/zstdruby/libzstd/common/debug.c +11 -31
- data/ext/zstdruby/libzstd/common/debug.h +22 -49
- data/ext/zstdruby/libzstd/common/entropy_common.c +208 -76
- data/ext/zstdruby/libzstd/common/error_private.c +3 -1
- data/ext/zstdruby/libzstd/common/error_private.h +87 -4
- data/ext/zstdruby/libzstd/common/fse.h +51 -42
- data/ext/zstdruby/libzstd/common/fse_decompress.c +149 -57
- data/ext/zstdruby/libzstd/common/huf.h +60 -54
- data/ext/zstdruby/libzstd/common/mem.h +87 -98
- data/ext/zstdruby/libzstd/common/pool.c +23 -17
- data/ext/zstdruby/libzstd/common/pool.h +3 -3
- data/ext/zstdruby/libzstd/common/portability_macros.h +131 -0
- data/ext/zstdruby/libzstd/common/threading.c +10 -8
- data/ext/zstdruby/libzstd/common/threading.h +4 -3
- data/ext/zstdruby/libzstd/common/xxhash.c +15 -873
- data/ext/zstdruby/libzstd/common/xxhash.h +5572 -191
- data/ext/zstdruby/libzstd/common/zstd_common.c +10 -10
- data/ext/zstdruby/libzstd/common/zstd_deps.h +111 -0
- data/ext/zstdruby/libzstd/common/zstd_internal.h +252 -108
- data/ext/zstdruby/libzstd/common/zstd_trace.h +163 -0
- data/ext/zstdruby/libzstd/compress/clevels.h +134 -0
- data/ext/zstdruby/libzstd/compress/fse_compress.c +105 -85
- data/ext/zstdruby/libzstd/compress/hist.c +41 -63
- data/ext/zstdruby/libzstd/compress/hist.h +13 -33
- data/ext/zstdruby/libzstd/compress/huf_compress.c +831 -259
- data/ext/zstdruby/libzstd/compress/zstd_compress.c +3213 -1007
- data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +493 -71
- data/ext/zstdruby/libzstd/compress/zstd_compress_literals.c +21 -16
- data/ext/zstdruby/libzstd/compress/zstd_compress_literals.h +4 -2
- data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.c +51 -24
- data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.h +10 -3
- data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.c +573 -0
- data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.h +32 -0
- data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +208 -81
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +315 -137
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +2 -2
- data/ext/zstdruby/libzstd/compress/zstd_fast.c +319 -128
- data/ext/zstdruby/libzstd/compress/zstd_fast.h +2 -2
- data/ext/zstdruby/libzstd/compress/zstd_lazy.c +1156 -171
- data/ext/zstdruby/libzstd/compress/zstd_lazy.h +59 -1
- data/ext/zstdruby/libzstd/compress/zstd_ldm.c +331 -206
- data/ext/zstdruby/libzstd/compress/zstd_ldm.h +15 -3
- data/ext/zstdruby/libzstd/compress/zstd_ldm_geartab.h +106 -0
- data/ext/zstdruby/libzstd/compress/zstd_opt.c +403 -226
- data/ext/zstdruby/libzstd/compress/zstd_opt.h +1 -1
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +188 -453
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +32 -114
- data/ext/zstdruby/libzstd/decompress/huf_decompress.c +1065 -410
- data/ext/zstdruby/libzstd/decompress/huf_decompress_amd64.S +571 -0
- data/ext/zstdruby/libzstd/decompress/zstd_ddict.c +20 -16
- data/ext/zstdruby/libzstd/decompress/zstd_ddict.h +3 -3
- data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +691 -230
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +1072 -323
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +16 -7
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +71 -10
- data/ext/zstdruby/libzstd/deprecated/zbuff.h +3 -3
- data/ext/zstdruby/libzstd/deprecated/zbuff_common.c +2 -2
- data/ext/zstdruby/libzstd/deprecated/zbuff_compress.c +24 -4
- data/ext/zstdruby/libzstd/deprecated/zbuff_decompress.c +1 -1
- data/ext/zstdruby/libzstd/dictBuilder/cover.c +57 -40
- data/ext/zstdruby/libzstd/dictBuilder/cover.h +20 -9
- data/ext/zstdruby/libzstd/dictBuilder/divsufsort.c +1 -1
- data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +54 -35
- data/ext/zstdruby/libzstd/dictBuilder/zdict.c +151 -57
- data/ext/zstdruby/libzstd/dll/example/Makefile +2 -1
- data/ext/zstdruby/libzstd/dll/example/README.md +16 -22
- data/ext/zstdruby/libzstd/legacy/zstd_legacy.h +4 -4
- data/ext/zstdruby/libzstd/legacy/zstd_v01.c +25 -19
- data/ext/zstdruby/libzstd/legacy/zstd_v01.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v02.c +18 -14
- data/ext/zstdruby/libzstd/legacy/zstd_v02.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v03.c +18 -14
- data/ext/zstdruby/libzstd/legacy/zstd_v03.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v04.c +22 -16
- data/ext/zstdruby/libzstd/legacy/zstd_v04.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v05.c +29 -25
- data/ext/zstdruby/libzstd/legacy/zstd_v05.h +2 -2
- data/ext/zstdruby/libzstd/legacy/zstd_v06.c +29 -25
- data/ext/zstdruby/libzstd/legacy/zstd_v06.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v07.c +34 -26
- data/ext/zstdruby/libzstd/legacy/zstd_v07.h +1 -1
- data/ext/zstdruby/libzstd/libzstd.mk +185 -0
- data/ext/zstdruby/libzstd/libzstd.pc.in +4 -3
- data/ext/zstdruby/libzstd/modulemap/module.modulemap +4 -0
- data/ext/zstdruby/libzstd/{dictBuilder/zdict.h → zdict.h} +201 -31
- data/ext/zstdruby/libzstd/zstd.h +760 -234
- data/ext/zstdruby/libzstd/{common/zstd_errors.h → zstd_errors.h} +3 -1
- data/ext/zstdruby/zstdruby.c +2 -2
- data/lib/zstd-ruby/version.rb +1 -1
- metadata +20 -9
- data/.travis.yml +0 -14
|
@@ -1,3 +1,13 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright (c) Facebook, Inc.
|
|
3
|
+
* All rights reserved.
|
|
4
|
+
*
|
|
5
|
+
* This source code is licensed under both the BSD-style license (found in the
|
|
6
|
+
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
|
7
|
+
* in the COPYING file in the root directory of this source tree).
|
|
8
|
+
* You may select, at your option, one of the above-listed licenses.
|
|
9
|
+
*/
|
|
10
|
+
|
|
1
11
|
/*-*************************************
|
|
2
12
|
* Dependencies
|
|
3
13
|
***************************************/
|
|
@@ -6,24 +16,33 @@
|
|
|
6
16
|
#include <string.h> /* memset */
|
|
7
17
|
#include <time.h> /* clock */
|
|
8
18
|
|
|
9
|
-
#include "mem.h" /* read */
|
|
10
|
-
#include "pool.h"
|
|
11
|
-
#include "threading.h"
|
|
12
|
-
#include "cover.h"
|
|
13
|
-
#include "zstd_internal.h" /* includes zstd.h */
|
|
14
19
|
#ifndef ZDICT_STATIC_LINKING_ONLY
|
|
15
|
-
#define ZDICT_STATIC_LINKING_ONLY
|
|
20
|
+
# define ZDICT_STATIC_LINKING_ONLY
|
|
16
21
|
#endif
|
|
17
|
-
|
|
22
|
+
|
|
23
|
+
#include "../common/mem.h" /* read */
|
|
24
|
+
#include "../common/pool.h"
|
|
25
|
+
#include "../common/threading.h"
|
|
26
|
+
#include "../common/zstd_internal.h" /* includes zstd.h */
|
|
27
|
+
#include "../compress/zstd_compress_internal.h" /* ZSTD_hash*() */
|
|
28
|
+
#include "../zdict.h"
|
|
29
|
+
#include "cover.h"
|
|
18
30
|
|
|
19
31
|
|
|
20
32
|
/*-*************************************
|
|
21
33
|
* Constants
|
|
22
34
|
***************************************/
|
|
35
|
+
/**
|
|
36
|
+
* There are 32bit indexes used to ref samples, so limit samples size to 4GB
|
|
37
|
+
* on 64bit builds.
|
|
38
|
+
* For 32bit builds we choose 1 GB.
|
|
39
|
+
* Most 32bit platforms have 2GB user-mode addressable space and we allocate a large
|
|
40
|
+
* contiguous buffer, so 1GB is already a high limit.
|
|
41
|
+
*/
|
|
23
42
|
#define FASTCOVER_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((unsigned)-1) : ((unsigned)1 GB))
|
|
24
43
|
#define FASTCOVER_MAX_F 31
|
|
25
44
|
#define FASTCOVER_MAX_ACCEL 10
|
|
26
|
-
#define
|
|
45
|
+
#define FASTCOVER_DEFAULT_SPLITPOINT 0.75
|
|
27
46
|
#define DEFAULT_F 20
|
|
28
47
|
#define DEFAULT_ACCEL 1
|
|
29
48
|
|
|
@@ -31,50 +50,50 @@
|
|
|
31
50
|
/*-*************************************
|
|
32
51
|
* Console display
|
|
33
52
|
***************************************/
|
|
34
|
-
|
|
53
|
+
#ifndef LOCALDISPLAYLEVEL
|
|
54
|
+
static int g_displayLevel = 0;
|
|
55
|
+
#endif
|
|
56
|
+
#undef DISPLAY
|
|
35
57
|
#define DISPLAY(...) \
|
|
36
58
|
{ \
|
|
37
59
|
fprintf(stderr, __VA_ARGS__); \
|
|
38
60
|
fflush(stderr); \
|
|
39
61
|
}
|
|
62
|
+
#undef LOCALDISPLAYLEVEL
|
|
40
63
|
#define LOCALDISPLAYLEVEL(displayLevel, l, ...) \
|
|
41
64
|
if (displayLevel >= l) { \
|
|
42
65
|
DISPLAY(__VA_ARGS__); \
|
|
43
66
|
} /* 0 : no display; 1: errors; 2: default; 3: details; 4: debug */
|
|
67
|
+
#undef DISPLAYLEVEL
|
|
44
68
|
#define DISPLAYLEVEL(l, ...) LOCALDISPLAYLEVEL(g_displayLevel, l, __VA_ARGS__)
|
|
45
69
|
|
|
70
|
+
#ifndef LOCALDISPLAYUPDATE
|
|
71
|
+
static const clock_t g_refreshRate = CLOCKS_PER_SEC * 15 / 100;
|
|
72
|
+
static clock_t g_time = 0;
|
|
73
|
+
#endif
|
|
74
|
+
#undef LOCALDISPLAYUPDATE
|
|
46
75
|
#define LOCALDISPLAYUPDATE(displayLevel, l, ...) \
|
|
47
76
|
if (displayLevel >= l) { \
|
|
48
|
-
if ((clock() - g_time >
|
|
77
|
+
if ((clock() - g_time > g_refreshRate) || (displayLevel >= 4)) { \
|
|
49
78
|
g_time = clock(); \
|
|
50
79
|
DISPLAY(__VA_ARGS__); \
|
|
51
80
|
} \
|
|
52
81
|
}
|
|
82
|
+
#undef DISPLAYUPDATE
|
|
53
83
|
#define DISPLAYUPDATE(l, ...) LOCALDISPLAYUPDATE(g_displayLevel, l, __VA_ARGS__)
|
|
54
|
-
static const clock_t refreshRate = CLOCKS_PER_SEC * 15 / 100;
|
|
55
|
-
static clock_t g_time = 0;
|
|
56
84
|
|
|
57
85
|
|
|
58
86
|
/*-*************************************
|
|
59
87
|
* Hash Functions
|
|
60
88
|
***************************************/
|
|
61
|
-
static const U64 prime6bytes = 227718039650203ULL;
|
|
62
|
-
static size_t ZSTD_hash6(U64 u, U32 h) { return (size_t)(((u << (64-48)) * prime6bytes) >> (64-h)) ; }
|
|
63
|
-
static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_readLE64(p), h); }
|
|
64
|
-
|
|
65
|
-
static const U64 prime8bytes = 0xCF1BBCDCB7A56463ULL;
|
|
66
|
-
static size_t ZSTD_hash8(U64 u, U32 h) { return (size_t)(((u) * prime8bytes) >> (64-h)) ; }
|
|
67
|
-
static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h); }
|
|
68
|
-
|
|
69
|
-
|
|
70
89
|
/**
|
|
71
|
-
* Hash the d-byte value pointed to by p and mod 2^f
|
|
90
|
+
* Hash the d-byte value pointed to by p and mod 2^f into the frequency vector
|
|
72
91
|
*/
|
|
73
|
-
static size_t FASTCOVER_hashPtrToIndex(const void* p, U32
|
|
92
|
+
static size_t FASTCOVER_hashPtrToIndex(const void* p, U32 f, unsigned d) {
|
|
74
93
|
if (d == 6) {
|
|
75
|
-
return ZSTD_hash6Ptr(p,
|
|
94
|
+
return ZSTD_hash6Ptr(p, f);
|
|
76
95
|
}
|
|
77
|
-
return ZSTD_hash8Ptr(p,
|
|
96
|
+
return ZSTD_hash8Ptr(p, f);
|
|
78
97
|
}
|
|
79
98
|
|
|
80
99
|
|
|
@@ -451,20 +470,20 @@ typedef struct FASTCOVER_tryParameters_data_s {
|
|
|
451
470
|
* This function is thread safe if zstd is compiled with multithreaded support.
|
|
452
471
|
* It takes its parameters as an *OWNING* opaque pointer to support threading.
|
|
453
472
|
*/
|
|
454
|
-
static void FASTCOVER_tryParameters(void
|
|
473
|
+
static void FASTCOVER_tryParameters(void* opaque)
|
|
455
474
|
{
|
|
456
475
|
/* Save parameters as local variables */
|
|
457
|
-
FASTCOVER_tryParameters_data_t *const data = (FASTCOVER_tryParameters_data_t
|
|
476
|
+
FASTCOVER_tryParameters_data_t *const data = (FASTCOVER_tryParameters_data_t*)opaque;
|
|
458
477
|
const FASTCOVER_ctx_t *const ctx = data->ctx;
|
|
459
478
|
const ZDICT_cover_params_t parameters = data->parameters;
|
|
460
479
|
size_t dictBufferCapacity = data->dictBufferCapacity;
|
|
461
480
|
size_t totalCompressedSize = ERROR(GENERIC);
|
|
462
481
|
/* Initialize array to keep track of frequency of dmer within activeSegment */
|
|
463
|
-
U16* segmentFreqs = (U16
|
|
482
|
+
U16* segmentFreqs = (U16*)calloc(((U64)1 << ctx->f), sizeof(U16));
|
|
464
483
|
/* Allocate space for hash table, dict, and freqs */
|
|
465
|
-
BYTE *const dict = (BYTE
|
|
484
|
+
BYTE *const dict = (BYTE*)malloc(dictBufferCapacity);
|
|
466
485
|
COVER_dictSelection_t selection = COVER_dictSelectionError(ERROR(GENERIC));
|
|
467
|
-
U32
|
|
486
|
+
U32* freqs = (U32*) malloc(((U64)1 << ctx->f) * sizeof(U32));
|
|
468
487
|
if (!segmentFreqs || !dict || !freqs) {
|
|
469
488
|
DISPLAYLEVEL(1, "Failed to allocate buffers: out of memory\n");
|
|
470
489
|
goto _cleanup;
|
|
@@ -476,7 +495,7 @@ static void FASTCOVER_tryParameters(void *opaque)
|
|
|
476
495
|
parameters, segmentFreqs);
|
|
477
496
|
|
|
478
497
|
const unsigned nbFinalizeSamples = (unsigned)(ctx->nbTrainSamples * ctx->accelParams.finalize / 100);
|
|
479
|
-
selection = COVER_selectDict(dict + tail, dictBufferCapacity - tail,
|
|
498
|
+
selection = COVER_selectDict(dict + tail, dictBufferCapacity, dictBufferCapacity - tail,
|
|
480
499
|
ctx->samples, ctx->samplesSizes, nbFinalizeSamples, ctx->nbTrainSamples, ctx->nbSamples, parameters, ctx->offsets,
|
|
481
500
|
totalCompressedSize);
|
|
482
501
|
|
|
@@ -537,7 +556,7 @@ ZDICT_trainFromBuffer_fastCover(void* dictBuffer, size_t dictBufferCapacity,
|
|
|
537
556
|
ZDICT_cover_params_t coverParams;
|
|
538
557
|
FASTCOVER_accel_t accelParams;
|
|
539
558
|
/* Initialize global data */
|
|
540
|
-
g_displayLevel = parameters.zParams.notificationLevel;
|
|
559
|
+
g_displayLevel = (int)parameters.zParams.notificationLevel;
|
|
541
560
|
/* Assign splitPoint and f if not provided */
|
|
542
561
|
parameters.splitPoint = 1.0;
|
|
543
562
|
parameters.f = parameters.f == 0 ? DEFAULT_F : parameters.f;
|
|
@@ -607,7 +626,7 @@ ZDICT_optimizeTrainFromBuffer_fastCover(
|
|
|
607
626
|
/* constants */
|
|
608
627
|
const unsigned nbThreads = parameters->nbThreads;
|
|
609
628
|
const double splitPoint =
|
|
610
|
-
parameters->splitPoint <= 0.0 ?
|
|
629
|
+
parameters->splitPoint <= 0.0 ? FASTCOVER_DEFAULT_SPLITPOINT : parameters->splitPoint;
|
|
611
630
|
const unsigned kMinD = parameters->d == 0 ? 6 : parameters->d;
|
|
612
631
|
const unsigned kMaxD = parameters->d == 0 ? 8 : parameters->d;
|
|
613
632
|
const unsigned kMinK = parameters->k == 0 ? 50 : parameters->k;
|
|
@@ -620,7 +639,7 @@ ZDICT_optimizeTrainFromBuffer_fastCover(
|
|
|
620
639
|
const unsigned accel = parameters->accel == 0 ? DEFAULT_ACCEL : parameters->accel;
|
|
621
640
|
const unsigned shrinkDict = 0;
|
|
622
641
|
/* Local variables */
|
|
623
|
-
const int displayLevel = parameters->zParams.notificationLevel;
|
|
642
|
+
const int displayLevel = (int)parameters->zParams.notificationLevel;
|
|
624
643
|
unsigned iteration = 1;
|
|
625
644
|
unsigned d;
|
|
626
645
|
unsigned k;
|
|
@@ -704,7 +723,7 @@ ZDICT_optimizeTrainFromBuffer_fastCover(
|
|
|
704
723
|
data->parameters.splitPoint = splitPoint;
|
|
705
724
|
data->parameters.steps = kSteps;
|
|
706
725
|
data->parameters.shrinkDict = shrinkDict;
|
|
707
|
-
data->parameters.zParams.notificationLevel = g_displayLevel;
|
|
726
|
+
data->parameters.zParams.notificationLevel = (unsigned)g_displayLevel;
|
|
708
727
|
/* Check the parameters */
|
|
709
728
|
if (!FASTCOVER_checkParameters(data->parameters, dictBufferCapacity,
|
|
710
729
|
data->ctx->f, accel)) {
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/*
|
|
2
|
-
* Copyright (c)
|
|
2
|
+
* Copyright (c) Yann Collet, Facebook, Inc.
|
|
3
3
|
* All rights reserved.
|
|
4
4
|
*
|
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
|
@@ -23,9 +23,13 @@
|
|
|
23
23
|
/* Unix Large Files support (>4GB) */
|
|
24
24
|
#define _FILE_OFFSET_BITS 64
|
|
25
25
|
#if (defined(__sun__) && (!defined(__LP64__))) /* Sun Solaris 32-bits requires specific definitions */
|
|
26
|
+
# ifndef _LARGEFILE_SOURCE
|
|
26
27
|
# define _LARGEFILE_SOURCE
|
|
28
|
+
# endif
|
|
27
29
|
#elif ! defined(__LP64__) /* No point defining Large file for 64 bit */
|
|
30
|
+
# ifndef _LARGEFILE64_SOURCE
|
|
28
31
|
# define _LARGEFILE64_SOURCE
|
|
32
|
+
# endif
|
|
29
33
|
#endif
|
|
30
34
|
|
|
31
35
|
|
|
@@ -37,17 +41,19 @@
|
|
|
37
41
|
#include <stdio.h> /* fprintf, fopen, ftello64 */
|
|
38
42
|
#include <time.h> /* clock */
|
|
39
43
|
|
|
40
|
-
#include "mem.h" /* read */
|
|
41
|
-
#include "fse.h" /* FSE_normalizeCount, FSE_writeNCount */
|
|
42
|
-
#define HUF_STATIC_LINKING_ONLY
|
|
43
|
-
#include "huf.h" /* HUF_buildCTable, HUF_writeCTable */
|
|
44
|
-
#include "zstd_internal.h" /* includes zstd.h */
|
|
45
|
-
#include "xxhash.h" /* XXH64 */
|
|
46
|
-
#include "divsufsort.h"
|
|
47
44
|
#ifndef ZDICT_STATIC_LINKING_ONLY
|
|
48
45
|
# define ZDICT_STATIC_LINKING_ONLY
|
|
49
46
|
#endif
|
|
50
|
-
#
|
|
47
|
+
#define HUF_STATIC_LINKING_ONLY
|
|
48
|
+
|
|
49
|
+
#include "../common/mem.h" /* read */
|
|
50
|
+
#include "../common/fse.h" /* FSE_normalizeCount, FSE_writeNCount */
|
|
51
|
+
#include "../common/huf.h" /* HUF_buildCTable, HUF_writeCTable */
|
|
52
|
+
#include "../common/zstd_internal.h" /* includes zstd.h */
|
|
53
|
+
#include "../common/xxhash.h" /* XXH64 */
|
|
54
|
+
#include "../compress/zstd_compress_internal.h" /* ZSTD_loadCEntropy() */
|
|
55
|
+
#include "../zdict.h"
|
|
56
|
+
#include "divsufsort.h"
|
|
51
57
|
|
|
52
58
|
|
|
53
59
|
/*-*************************************
|
|
@@ -61,14 +67,15 @@
|
|
|
61
67
|
|
|
62
68
|
#define NOISELENGTH 32
|
|
63
69
|
|
|
64
|
-
static const int g_compressionLevel_default = 3;
|
|
65
70
|
static const U32 g_selectivity_default = 9;
|
|
66
71
|
|
|
67
72
|
|
|
68
73
|
/*-*************************************
|
|
69
74
|
* Console display
|
|
70
75
|
***************************************/
|
|
76
|
+
#undef DISPLAY
|
|
71
77
|
#define DISPLAY(...) { fprintf(stderr, __VA_ARGS__); fflush( stderr ); }
|
|
78
|
+
#undef DISPLAYLEVEL
|
|
72
79
|
#define DISPLAYLEVEL(l, ...) if (notificationLevel>=l) { DISPLAY(__VA_ARGS__); } /* 0 : no display; 1: errors; 2: default; 3: details; 4: debug */
|
|
73
80
|
|
|
74
81
|
static clock_t ZDICT_clockSpan(clock_t nPrevious) { return clock() - nPrevious; }
|
|
@@ -99,6 +106,26 @@ unsigned ZDICT_getDictID(const void* dictBuffer, size_t dictSize)
|
|
|
99
106
|
return MEM_readLE32((const char*)dictBuffer + 4);
|
|
100
107
|
}
|
|
101
108
|
|
|
109
|
+
size_t ZDICT_getDictHeaderSize(const void* dictBuffer, size_t dictSize)
|
|
110
|
+
{
|
|
111
|
+
size_t headerSize;
|
|
112
|
+
if (dictSize <= 8 || MEM_readLE32(dictBuffer) != ZSTD_MAGIC_DICTIONARY) return ERROR(dictionary_corrupted);
|
|
113
|
+
|
|
114
|
+
{ ZSTD_compressedBlockState_t* bs = (ZSTD_compressedBlockState_t*)malloc(sizeof(ZSTD_compressedBlockState_t));
|
|
115
|
+
U32* wksp = (U32*)malloc(HUF_WORKSPACE_SIZE);
|
|
116
|
+
if (!bs || !wksp) {
|
|
117
|
+
headerSize = ERROR(memory_allocation);
|
|
118
|
+
} else {
|
|
119
|
+
ZSTD_reset_compressedBlockState(bs);
|
|
120
|
+
headerSize = ZSTD_loadCEntropy(bs, wksp, dictBuffer, dictSize);
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
free(bs);
|
|
124
|
+
free(wksp);
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
return headerSize;
|
|
128
|
+
}
|
|
102
129
|
|
|
103
130
|
/*-********************************************************
|
|
104
131
|
* Dictionary training functions
|
|
@@ -108,22 +135,32 @@ static unsigned ZDICT_NbCommonBytes (size_t val)
|
|
|
108
135
|
if (MEM_isLittleEndian()) {
|
|
109
136
|
if (MEM_64bits()) {
|
|
110
137
|
# if defined(_MSC_VER) && defined(_WIN64)
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
138
|
+
if (val != 0) {
|
|
139
|
+
unsigned long r;
|
|
140
|
+
_BitScanForward64(&r, (U64)val);
|
|
141
|
+
return (unsigned)(r >> 3);
|
|
142
|
+
} else {
|
|
143
|
+
/* Should not reach this code path */
|
|
144
|
+
__assume(0);
|
|
145
|
+
}
|
|
114
146
|
# elif defined(__GNUC__) && (__GNUC__ >= 3)
|
|
115
|
-
return (__builtin_ctzll((U64)val) >> 3);
|
|
147
|
+
return (unsigned)(__builtin_ctzll((U64)val) >> 3);
|
|
116
148
|
# else
|
|
117
149
|
static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 };
|
|
118
150
|
return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58];
|
|
119
151
|
# endif
|
|
120
152
|
} else { /* 32 bits */
|
|
121
153
|
# if defined(_MSC_VER)
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
154
|
+
if (val != 0) {
|
|
155
|
+
unsigned long r;
|
|
156
|
+
_BitScanForward(&r, (U32)val);
|
|
157
|
+
return (unsigned)(r >> 3);
|
|
158
|
+
} else {
|
|
159
|
+
/* Should not reach this code path */
|
|
160
|
+
__assume(0);
|
|
161
|
+
}
|
|
125
162
|
# elif defined(__GNUC__) && (__GNUC__ >= 3)
|
|
126
|
-
return (__builtin_ctz((U32)val) >> 3);
|
|
163
|
+
return (unsigned)(__builtin_ctz((U32)val) >> 3);
|
|
127
164
|
# else
|
|
128
165
|
static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 };
|
|
129
166
|
return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
|
|
@@ -132,11 +169,16 @@ static unsigned ZDICT_NbCommonBytes (size_t val)
|
|
|
132
169
|
} else { /* Big Endian CPU */
|
|
133
170
|
if (MEM_64bits()) {
|
|
134
171
|
# if defined(_MSC_VER) && defined(_WIN64)
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
172
|
+
if (val != 0) {
|
|
173
|
+
unsigned long r;
|
|
174
|
+
_BitScanReverse64(&r, val);
|
|
175
|
+
return (unsigned)(r >> 3);
|
|
176
|
+
} else {
|
|
177
|
+
/* Should not reach this code path */
|
|
178
|
+
__assume(0);
|
|
179
|
+
}
|
|
138
180
|
# elif defined(__GNUC__) && (__GNUC__ >= 3)
|
|
139
|
-
return (__builtin_clzll(val) >> 3);
|
|
181
|
+
return (unsigned)(__builtin_clzll(val) >> 3);
|
|
140
182
|
# else
|
|
141
183
|
unsigned r;
|
|
142
184
|
const unsigned n32 = sizeof(size_t)*4; /* calculate this way due to compiler complaining in 32-bits mode */
|
|
@@ -147,11 +189,16 @@ static unsigned ZDICT_NbCommonBytes (size_t val)
|
|
|
147
189
|
# endif
|
|
148
190
|
} else { /* 32 bits */
|
|
149
191
|
# if defined(_MSC_VER)
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
192
|
+
if (val != 0) {
|
|
193
|
+
unsigned long r;
|
|
194
|
+
_BitScanReverse(&r, (unsigned long)val);
|
|
195
|
+
return (unsigned)(r >> 3);
|
|
196
|
+
} else {
|
|
197
|
+
/* Should not reach this code path */
|
|
198
|
+
__assume(0);
|
|
199
|
+
}
|
|
153
200
|
# elif defined(__GNUC__) && (__GNUC__ >= 3)
|
|
154
|
-
return (__builtin_clz((U32)val) >> 3);
|
|
201
|
+
return (unsigned)(__builtin_clz((U32)val) >> 3);
|
|
155
202
|
# else
|
|
156
203
|
unsigned r;
|
|
157
204
|
if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; }
|
|
@@ -208,7 +255,7 @@ static dictItem ZDICT_analyzePos(
|
|
|
208
255
|
U32 savings[LLIMIT] = {0};
|
|
209
256
|
const BYTE* b = (const BYTE*)buffer;
|
|
210
257
|
size_t maxLength = LLIMIT;
|
|
211
|
-
size_t pos = suffix[start];
|
|
258
|
+
size_t pos = (size_t)suffix[start];
|
|
212
259
|
U32 end = start;
|
|
213
260
|
dictItem solution;
|
|
214
261
|
|
|
@@ -342,7 +389,7 @@ static dictItem ZDICT_analyzePos(
|
|
|
342
389
|
savings[i] = savings[i-1] + (lengthList[i] * (i-3));
|
|
343
390
|
|
|
344
391
|
DISPLAYLEVEL(4, "Selected dict at position %u, of length %u : saves %u (ratio: %.2f) \n",
|
|
345
|
-
(unsigned)pos, (unsigned)maxLength, (unsigned)savings[maxLength], (double)savings[maxLength] / maxLength);
|
|
392
|
+
(unsigned)pos, (unsigned)maxLength, (unsigned)savings[maxLength], (double)savings[maxLength] / (double)maxLength);
|
|
346
393
|
|
|
347
394
|
solution.pos = (U32)pos;
|
|
348
395
|
solution.length = (U32)maxLength;
|
|
@@ -352,7 +399,7 @@ static dictItem ZDICT_analyzePos(
|
|
|
352
399
|
{ U32 id;
|
|
353
400
|
for (id=start; id<end; id++) {
|
|
354
401
|
U32 p, pEnd, length;
|
|
355
|
-
U32 const testedPos = suffix[id];
|
|
402
|
+
U32 const testedPos = (U32)suffix[id];
|
|
356
403
|
if (testedPos == pos)
|
|
357
404
|
length = solution.length;
|
|
358
405
|
else {
|
|
@@ -415,7 +462,7 @@ static U32 ZDICT_tryMerge(dictItem* table, dictItem elt, U32 eltNbToSkip, const
|
|
|
415
462
|
|
|
416
463
|
if ((table[u].pos + table[u].length >= elt.pos) && (table[u].pos < elt.pos)) { /* overlap, existing < new */
|
|
417
464
|
/* append */
|
|
418
|
-
int const addedLength = (int)eltEnd - (table[u].pos + table[u].length);
|
|
465
|
+
int const addedLength = (int)eltEnd - (int)(table[u].pos + table[u].length);
|
|
419
466
|
table[u].savings += elt.length / 8; /* rough approx bonus */
|
|
420
467
|
if (addedLength > 0) { /* otherwise, elt fully included into existing */
|
|
421
468
|
table[u].length += addedLength;
|
|
@@ -508,6 +555,7 @@ static size_t ZDICT_trainBuffer_legacy(dictItem* dictList, U32 dictListSize,
|
|
|
508
555
|
clock_t displayClock = 0;
|
|
509
556
|
clock_t const refreshRate = CLOCKS_PER_SEC * 3 / 10;
|
|
510
557
|
|
|
558
|
+
# undef DISPLAYUPDATE
|
|
511
559
|
# define DISPLAYUPDATE(l, ...) if (notificationLevel>=l) { \
|
|
512
560
|
if (ZDICT_clockSpan(displayClock) > refreshRate) \
|
|
513
561
|
{ displayClock = clock(); DISPLAY(__VA_ARGS__); \
|
|
@@ -588,12 +636,12 @@ typedef struct
|
|
|
588
636
|
|
|
589
637
|
#define MAXREPOFFSET 1024
|
|
590
638
|
|
|
591
|
-
static void ZDICT_countEStats(EStats_ress_t esr, ZSTD_parameters params,
|
|
639
|
+
static void ZDICT_countEStats(EStats_ress_t esr, const ZSTD_parameters* params,
|
|
592
640
|
unsigned* countLit, unsigned* offsetcodeCount, unsigned* matchlengthCount, unsigned* litlengthCount, U32* repOffsets,
|
|
593
641
|
const void* src, size_t srcSize,
|
|
594
642
|
U32 notificationLevel)
|
|
595
643
|
{
|
|
596
|
-
size_t const blockSizeMax = MIN (ZSTD_BLOCKSIZE_MAX, 1 << params
|
|
644
|
+
size_t const blockSizeMax = MIN (ZSTD_BLOCKSIZE_MAX, 1 << params->cParams.windowLog);
|
|
597
645
|
size_t cSize;
|
|
598
646
|
|
|
599
647
|
if (srcSize > blockSizeMax) srcSize = blockSizeMax; /* protection vs large samples */
|
|
@@ -682,7 +730,7 @@ static void ZDICT_flatLit(unsigned* countLit)
|
|
|
682
730
|
|
|
683
731
|
#define OFFCODE_MAX 30 /* only applicable to first block */
|
|
684
732
|
static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|
685
|
-
|
|
733
|
+
int compressionLevel,
|
|
686
734
|
const void* srcBuffer, const size_t* fileSizes, unsigned nbFiles,
|
|
687
735
|
const void* dictBuffer, size_t dictBufferSize,
|
|
688
736
|
unsigned notificationLevel)
|
|
@@ -717,7 +765,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|
|
717
765
|
memset(repOffset, 0, sizeof(repOffset));
|
|
718
766
|
repOffset[1] = repOffset[4] = repOffset[8] = 1;
|
|
719
767
|
memset(bestRepOffset, 0, sizeof(bestRepOffset));
|
|
720
|
-
if (compressionLevel==0) compressionLevel =
|
|
768
|
+
if (compressionLevel==0) compressionLevel = ZSTD_CLEVEL_DEFAULT;
|
|
721
769
|
params = ZSTD_getParams(compressionLevel, averageSampleSize, dictBufferSize);
|
|
722
770
|
|
|
723
771
|
esr.dict = ZSTD_createCDict_advanced(dictBuffer, dictBufferSize, ZSTD_dlm_byRef, ZSTD_dct_rawContent, params.cParams, ZSTD_defaultCMem);
|
|
@@ -731,13 +779,20 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|
|
731
779
|
|
|
732
780
|
/* collect stats on all samples */
|
|
733
781
|
for (u=0; u<nbFiles; u++) {
|
|
734
|
-
ZDICT_countEStats(esr, params,
|
|
782
|
+
ZDICT_countEStats(esr, ¶ms,
|
|
735
783
|
countLit, offcodeCount, matchLengthCount, litLengthCount, repOffset,
|
|
736
784
|
(const char*)srcBuffer + pos, fileSizes[u],
|
|
737
785
|
notificationLevel);
|
|
738
786
|
pos += fileSizes[u];
|
|
739
787
|
}
|
|
740
788
|
|
|
789
|
+
if (notificationLevel >= 4) {
|
|
790
|
+
/* writeStats */
|
|
791
|
+
DISPLAYLEVEL(4, "Offset Code Frequencies : \n");
|
|
792
|
+
for (u=0; u<=offcodeMax; u++) {
|
|
793
|
+
DISPLAYLEVEL(4, "%2u :%7u \n", u, offcodeCount[u]);
|
|
794
|
+
} }
|
|
795
|
+
|
|
741
796
|
/* analyze, build stats, starting with literals */
|
|
742
797
|
{ size_t maxNbBits = HUF_buildCTable (hufTable, countLit, 255, huffLog);
|
|
743
798
|
if (HUF_isError(maxNbBits)) {
|
|
@@ -762,7 +817,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|
|
762
817
|
/* note : the result of this phase should be used to better appreciate the impact on statistics */
|
|
763
818
|
|
|
764
819
|
total=0; for (u=0; u<=offcodeMax; u++) total+=offcodeCount[u];
|
|
765
|
-
errorCode = FSE_normalizeCount(offcodeNCount, Offlog, offcodeCount, total, offcodeMax);
|
|
820
|
+
errorCode = FSE_normalizeCount(offcodeNCount, Offlog, offcodeCount, total, offcodeMax, /* useLowProbCount */ 1);
|
|
766
821
|
if (FSE_isError(errorCode)) {
|
|
767
822
|
eSize = errorCode;
|
|
768
823
|
DISPLAYLEVEL(1, "FSE_normalizeCount error with offcodeCount \n");
|
|
@@ -771,7 +826,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|
|
771
826
|
Offlog = (U32)errorCode;
|
|
772
827
|
|
|
773
828
|
total=0; for (u=0; u<=MaxML; u++) total+=matchLengthCount[u];
|
|
774
|
-
errorCode = FSE_normalizeCount(matchLengthNCount, mlLog, matchLengthCount, total, MaxML);
|
|
829
|
+
errorCode = FSE_normalizeCount(matchLengthNCount, mlLog, matchLengthCount, total, MaxML, /* useLowProbCount */ 1);
|
|
775
830
|
if (FSE_isError(errorCode)) {
|
|
776
831
|
eSize = errorCode;
|
|
777
832
|
DISPLAYLEVEL(1, "FSE_normalizeCount error with matchLengthCount \n");
|
|
@@ -780,7 +835,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|
|
780
835
|
mlLog = (U32)errorCode;
|
|
781
836
|
|
|
782
837
|
total=0; for (u=0; u<=MaxLL; u++) total+=litLengthCount[u];
|
|
783
|
-
errorCode = FSE_normalizeCount(litLengthNCount, llLog, litLengthCount, total, MaxLL);
|
|
838
|
+
errorCode = FSE_normalizeCount(litLengthNCount, llLog, litLengthCount, total, MaxLL, /* useLowProbCount */ 1);
|
|
784
839
|
if (FSE_isError(errorCode)) {
|
|
785
840
|
eSize = errorCode;
|
|
786
841
|
DISPLAYLEVEL(1, "FSE_normalizeCount error with litLengthCount \n");
|
|
@@ -844,7 +899,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|
|
844
899
|
MEM_writeLE32(dstPtr+8, bestRepOffset[2].offset);
|
|
845
900
|
#else
|
|
846
901
|
/* at this stage, we don't use the result of "most common first offset",
|
|
847
|
-
|
|
902
|
+
* as the impact of statistics is not properly evaluated */
|
|
848
903
|
MEM_writeLE32(dstPtr+0, repStartValue[0]);
|
|
849
904
|
MEM_writeLE32(dstPtr+4, repStartValue[1]);
|
|
850
905
|
MEM_writeLE32(dstPtr+8, repStartValue[2]);
|
|
@@ -860,6 +915,17 @@ _cleanup:
|
|
|
860
915
|
}
|
|
861
916
|
|
|
862
917
|
|
|
918
|
+
/**
|
|
919
|
+
* @returns the maximum repcode value
|
|
920
|
+
*/
|
|
921
|
+
static U32 ZDICT_maxRep(U32 const reps[ZSTD_REP_NUM])
|
|
922
|
+
{
|
|
923
|
+
U32 maxRep = reps[0];
|
|
924
|
+
int r;
|
|
925
|
+
for (r = 1; r < ZSTD_REP_NUM; ++r)
|
|
926
|
+
maxRep = MAX(maxRep, reps[r]);
|
|
927
|
+
return maxRep;
|
|
928
|
+
}
|
|
863
929
|
|
|
864
930
|
size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity,
|
|
865
931
|
const void* customDictContent, size_t dictContentSize,
|
|
@@ -869,13 +935,15 @@ size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity,
|
|
|
869
935
|
size_t hSize;
|
|
870
936
|
#define HBUFFSIZE 256 /* should prove large enough for all entropy headers */
|
|
871
937
|
BYTE header[HBUFFSIZE];
|
|
872
|
-
int const compressionLevel = (params.compressionLevel == 0) ?
|
|
938
|
+
int const compressionLevel = (params.compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT : params.compressionLevel;
|
|
873
939
|
U32 const notificationLevel = params.notificationLevel;
|
|
940
|
+
/* The final dictionary content must be at least as large as the largest repcode */
|
|
941
|
+
size_t const minContentSize = (size_t)ZDICT_maxRep(repStartValue);
|
|
942
|
+
size_t paddingSize;
|
|
874
943
|
|
|
875
944
|
/* check conditions */
|
|
876
945
|
DEBUGLOG(4, "ZDICT_finalizeDictionary");
|
|
877
946
|
if (dictBufferCapacity < dictContentSize) return ERROR(dstSize_tooSmall);
|
|
878
|
-
if (dictContentSize < ZDICT_CONTENTSIZE_MIN) return ERROR(srcSize_wrong);
|
|
879
947
|
if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) return ERROR(dstSize_tooSmall);
|
|
880
948
|
|
|
881
949
|
/* dictionary header */
|
|
@@ -899,12 +967,43 @@ size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity,
|
|
|
899
967
|
hSize += eSize;
|
|
900
968
|
}
|
|
901
969
|
|
|
902
|
-
/*
|
|
903
|
-
if (hSize + dictContentSize > dictBufferCapacity)
|
|
904
|
-
|
|
905
|
-
|
|
906
|
-
|
|
907
|
-
|
|
970
|
+
/* Shrink the content size if it doesn't fit in the buffer */
|
|
971
|
+
if (hSize + dictContentSize > dictBufferCapacity) {
|
|
972
|
+
dictContentSize = dictBufferCapacity - hSize;
|
|
973
|
+
}
|
|
974
|
+
|
|
975
|
+
/* Pad the dictionary content with zeros if it is too small */
|
|
976
|
+
if (dictContentSize < minContentSize) {
|
|
977
|
+
RETURN_ERROR_IF(hSize + minContentSize > dictBufferCapacity, dstSize_tooSmall,
|
|
978
|
+
"dictBufferCapacity too small to fit max repcode");
|
|
979
|
+
paddingSize = minContentSize - dictContentSize;
|
|
980
|
+
} else {
|
|
981
|
+
paddingSize = 0;
|
|
982
|
+
}
|
|
983
|
+
|
|
984
|
+
{
|
|
985
|
+
size_t const dictSize = hSize + paddingSize + dictContentSize;
|
|
986
|
+
|
|
987
|
+
/* The dictionary consists of the header, optional padding, and the content.
|
|
988
|
+
* The padding comes before the content because the "best" position in the
|
|
989
|
+
* dictionary is the last byte.
|
|
990
|
+
*/
|
|
991
|
+
BYTE* const outDictHeader = (BYTE*)dictBuffer;
|
|
992
|
+
BYTE* const outDictPadding = outDictHeader + hSize;
|
|
993
|
+
BYTE* const outDictContent = outDictPadding + paddingSize;
|
|
994
|
+
|
|
995
|
+
assert(dictSize <= dictBufferCapacity);
|
|
996
|
+
assert(outDictContent + dictContentSize == (BYTE*)dictBuffer + dictSize);
|
|
997
|
+
|
|
998
|
+
/* First copy the customDictContent into its final location.
|
|
999
|
+
* `customDictContent` and `dictBuffer` may overlap, so we must
|
|
1000
|
+
* do this before any other writes into the output buffer.
|
|
1001
|
+
* Then copy the header & padding into the output buffer.
|
|
1002
|
+
*/
|
|
1003
|
+
memmove(outDictContent, customDictContent, dictContentSize);
|
|
1004
|
+
memcpy(outDictHeader, header, hSize);
|
|
1005
|
+
memset(outDictPadding, 0, paddingSize);
|
|
1006
|
+
|
|
908
1007
|
return dictSize;
|
|
909
1008
|
}
|
|
910
1009
|
}
|
|
@@ -915,7 +1014,7 @@ static size_t ZDICT_addEntropyTablesFromBuffer_advanced(
|
|
|
915
1014
|
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
|
|
916
1015
|
ZDICT_params_t params)
|
|
917
1016
|
{
|
|
918
|
-
int const compressionLevel = (params.compressionLevel == 0) ?
|
|
1017
|
+
int const compressionLevel = (params.compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT : params.compressionLevel;
|
|
919
1018
|
U32 const notificationLevel = params.notificationLevel;
|
|
920
1019
|
size_t hSize = 8;
|
|
921
1020
|
|
|
@@ -944,16 +1043,11 @@ static size_t ZDICT_addEntropyTablesFromBuffer_advanced(
|
|
|
944
1043
|
return MIN(dictBufferCapacity, hSize+dictContentSize);
|
|
945
1044
|
}
|
|
946
1045
|
|
|
947
|
-
/* Hidden declaration for dbio.c */
|
|
948
|
-
size_t ZDICT_trainFromBuffer_unsafe_legacy(
|
|
949
|
-
void* dictBuffer, size_t maxDictSize,
|
|
950
|
-
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
|
|
951
|
-
ZDICT_legacy_params_t params);
|
|
952
1046
|
/*! ZDICT_trainFromBuffer_unsafe_legacy() :
|
|
953
|
-
* Warning : `samplesBuffer` must be followed by noisy guard band
|
|
1047
|
+
* Warning : `samplesBuffer` must be followed by noisy guard band !!!
|
|
954
1048
|
* @return : size of dictionary, or an error code which can be tested with ZDICT_isError()
|
|
955
1049
|
*/
|
|
956
|
-
size_t ZDICT_trainFromBuffer_unsafe_legacy(
|
|
1050
|
+
static size_t ZDICT_trainFromBuffer_unsafe_legacy(
|
|
957
1051
|
void* dictBuffer, size_t maxDictSize,
|
|
958
1052
|
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
|
|
959
1053
|
ZDICT_legacy_params_t params)
|
|
@@ -1090,8 +1184,8 @@ size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity,
|
|
|
1090
1184
|
memset(¶ms, 0, sizeof(params));
|
|
1091
1185
|
params.d = 8;
|
|
1092
1186
|
params.steps = 4;
|
|
1093
|
-
/*
|
|
1094
|
-
params.zParams.compressionLevel =
|
|
1187
|
+
/* Use default level since no compression level information is available */
|
|
1188
|
+
params.zParams.compressionLevel = ZSTD_CLEVEL_DEFAULT;
|
|
1095
1189
|
#if defined(DEBUGLEVEL) && (DEBUGLEVEL>=1)
|
|
1096
1190
|
params.zParams.notificationLevel = DEBUGLEVEL;
|
|
1097
1191
|
#endif
|
|
@@ -1,10 +1,11 @@
|
|
|
1
1
|
# ################################################################
|
|
2
|
-
# Copyright (c)
|
|
2
|
+
# Copyright (c) Yann Collet, Facebook, Inc.
|
|
3
3
|
# All rights reserved.
|
|
4
4
|
#
|
|
5
5
|
# This source code is licensed under both the BSD-style license (found in the
|
|
6
6
|
# LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
|
7
7
|
# in the COPYING file in the root directory of this source tree).
|
|
8
|
+
# You may select, at your option, one of the above-listed licenses.
|
|
8
9
|
# ################################################################
|
|
9
10
|
|
|
10
11
|
VOID := /dev/null
|