extzstd 0.3 → 0.3.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/HISTORY.ja.md +8 -0
- data/README.md +1 -1
- data/contrib/zstd/CHANGELOG +94 -0
- data/contrib/zstd/CONTRIBUTING.md +351 -1
- data/contrib/zstd/Makefile +32 -10
- data/contrib/zstd/README.md +33 -10
- data/contrib/zstd/TESTING.md +2 -2
- data/contrib/zstd/appveyor.yml +42 -4
- data/contrib/zstd/lib/Makefile +128 -60
- data/contrib/zstd/lib/README.md +47 -16
- data/contrib/zstd/lib/common/bitstream.h +38 -39
- data/contrib/zstd/lib/common/compiler.h +40 -5
- data/contrib/zstd/lib/common/cpu.h +1 -1
- data/contrib/zstd/lib/common/debug.c +11 -31
- data/contrib/zstd/lib/common/debug.h +11 -31
- data/contrib/zstd/lib/common/entropy_common.c +13 -33
- data/contrib/zstd/lib/common/error_private.c +2 -1
- data/contrib/zstd/lib/common/error_private.h +6 -2
- data/contrib/zstd/lib/common/fse.h +12 -32
- data/contrib/zstd/lib/common/fse_decompress.c +12 -35
- data/contrib/zstd/lib/common/huf.h +15 -33
- data/contrib/zstd/lib/common/mem.h +75 -2
- data/contrib/zstd/lib/common/pool.c +8 -4
- data/contrib/zstd/lib/common/pool.h +2 -2
- data/contrib/zstd/lib/common/threading.c +50 -4
- data/contrib/zstd/lib/common/threading.h +36 -4
- data/contrib/zstd/lib/common/xxhash.c +23 -35
- data/contrib/zstd/lib/common/xxhash.h +11 -31
- data/contrib/zstd/lib/common/zstd_common.c +1 -1
- data/contrib/zstd/lib/common/zstd_errors.h +2 -1
- data/contrib/zstd/lib/common/zstd_internal.h +154 -26
- data/contrib/zstd/lib/compress/fse_compress.c +17 -40
- data/contrib/zstd/lib/compress/hist.c +15 -35
- data/contrib/zstd/lib/compress/hist.h +12 -32
- data/contrib/zstd/lib/compress/huf_compress.c +92 -92
- data/contrib/zstd/lib/compress/zstd_compress.c +1191 -1330
- data/contrib/zstd/lib/compress/zstd_compress_internal.h +317 -55
- data/contrib/zstd/lib/compress/zstd_compress_literals.c +158 -0
- data/contrib/zstd/lib/compress/zstd_compress_literals.h +29 -0
- data/contrib/zstd/lib/compress/zstd_compress_sequences.c +419 -0
- data/contrib/zstd/lib/compress/zstd_compress_sequences.h +54 -0
- data/contrib/zstd/lib/compress/zstd_compress_superblock.c +845 -0
- data/contrib/zstd/lib/compress/zstd_compress_superblock.h +32 -0
- data/contrib/zstd/lib/compress/zstd_cwksp.h +525 -0
- data/contrib/zstd/lib/compress/zstd_double_fast.c +65 -43
- data/contrib/zstd/lib/compress/zstd_double_fast.h +2 -2
- data/contrib/zstd/lib/compress/zstd_fast.c +92 -66
- data/contrib/zstd/lib/compress/zstd_fast.h +2 -2
- data/contrib/zstd/lib/compress/zstd_lazy.c +74 -42
- data/contrib/zstd/lib/compress/zstd_lazy.h +1 -1
- data/contrib/zstd/lib/compress/zstd_ldm.c +32 -10
- data/contrib/zstd/lib/compress/zstd_ldm.h +7 -2
- data/contrib/zstd/lib/compress/zstd_opt.c +81 -114
- data/contrib/zstd/lib/compress/zstd_opt.h +1 -1
- data/contrib/zstd/lib/compress/zstdmt_compress.c +95 -51
- data/contrib/zstd/lib/compress/zstdmt_compress.h +3 -2
- data/contrib/zstd/lib/decompress/huf_decompress.c +76 -60
- data/contrib/zstd/lib/decompress/zstd_ddict.c +12 -8
- data/contrib/zstd/lib/decompress/zstd_ddict.h +2 -2
- data/contrib/zstd/lib/decompress/zstd_decompress.c +292 -172
- data/contrib/zstd/lib/decompress/zstd_decompress_block.c +459 -338
- data/contrib/zstd/lib/decompress/zstd_decompress_block.h +3 -3
- data/contrib/zstd/lib/decompress/zstd_decompress_internal.h +18 -4
- data/contrib/zstd/lib/deprecated/zbuff.h +9 -8
- data/contrib/zstd/lib/deprecated/zbuff_common.c +2 -2
- data/contrib/zstd/lib/deprecated/zbuff_compress.c +1 -1
- data/contrib/zstd/lib/deprecated/zbuff_decompress.c +1 -1
- data/contrib/zstd/lib/dictBuilder/cover.c +164 -54
- data/contrib/zstd/lib/dictBuilder/cover.h +52 -7
- data/contrib/zstd/lib/dictBuilder/fastcover.c +60 -43
- data/contrib/zstd/lib/dictBuilder/zdict.c +43 -19
- data/contrib/zstd/lib/dictBuilder/zdict.h +56 -28
- data/contrib/zstd/lib/legacy/zstd_legacy.h +8 -4
- data/contrib/zstd/lib/legacy/zstd_v01.c +110 -110
- data/contrib/zstd/lib/legacy/zstd_v01.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v02.c +23 -13
- data/contrib/zstd/lib/legacy/zstd_v02.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v03.c +23 -13
- data/contrib/zstd/lib/legacy/zstd_v03.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v04.c +30 -17
- data/contrib/zstd/lib/legacy/zstd_v04.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v05.c +113 -102
- data/contrib/zstd/lib/legacy/zstd_v05.h +2 -2
- data/contrib/zstd/lib/legacy/zstd_v06.c +20 -18
- data/contrib/zstd/lib/legacy/zstd_v06.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v07.c +25 -19
- data/contrib/zstd/lib/legacy/zstd_v07.h +1 -1
- data/contrib/zstd/lib/libzstd.pc.in +3 -2
- data/contrib/zstd/lib/zstd.h +265 -88
- data/ext/extzstd.h +1 -1
- data/ext/libzstd_conf.h +8 -0
- data/ext/zstd_common.c +1 -3
- data/ext/zstd_compress.c +3 -3
- data/ext/zstd_decompress.c +1 -5
- data/ext/zstd_dictbuilder.c +2 -3
- data/ext/zstd_dictbuilder_fastcover.c +1 -3
- data/ext/zstd_legacy_v01.c +2 -0
- data/ext/zstd_legacy_v02.c +2 -0
- data/ext/zstd_legacy_v03.c +2 -0
- data/ext/zstd_legacy_v04.c +2 -0
- data/ext/zstd_legacy_v05.c +2 -0
- data/ext/zstd_legacy_v06.c +2 -0
- data/ext/zstd_legacy_v07.c +2 -0
- data/lib/extzstd.rb +18 -10
- data/lib/extzstd/version.rb +1 -1
- metadata +15 -6
@@ -1,5 +1,5 @@
|
|
1
1
|
/*
|
2
|
-
* Copyright (c) 2016-
|
2
|
+
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
|
3
3
|
* All rights reserved.
|
4
4
|
*
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
@@ -16,8 +16,8 @@
|
|
16
16
|
* Dependencies
|
17
17
|
*********************************************************/
|
18
18
|
#include <stddef.h> /* size_t */
|
19
|
-
#include "zstd.h" /* DCtx, and some public functions */
|
20
|
-
#include "zstd_internal.h" /* blockProperties_t, and some public functions */
|
19
|
+
#include "../zstd.h" /* DCtx, and some public functions */
|
20
|
+
#include "../common/zstd_internal.h" /* blockProperties_t, and some public functions */
|
21
21
|
#include "zstd_decompress_internal.h" /* ZSTD_seqSymbol */
|
22
22
|
|
23
23
|
|
@@ -1,5 +1,5 @@
|
|
1
1
|
/*
|
2
|
-
* Copyright (c) 2016-
|
2
|
+
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
|
3
3
|
* All rights reserved.
|
4
4
|
*
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
@@ -19,8 +19,8 @@
|
|
19
19
|
/*-*******************************************************
|
20
20
|
* Dependencies
|
21
21
|
*********************************************************/
|
22
|
-
#include "mem.h" /* BYTE, U16, U32 */
|
23
|
-
#include "zstd_internal.h" /* ZSTD_seqSymbol */
|
22
|
+
#include "../common/mem.h" /* BYTE, U16, U32 */
|
23
|
+
#include "../common/zstd_internal.h" /* ZSTD_seqSymbol */
|
24
24
|
|
25
25
|
|
26
26
|
|
@@ -95,6 +95,11 @@ typedef enum {
|
|
95
95
|
ZSTD_use_once = 1 /* Use the dictionary once and set to ZSTD_dont_use */
|
96
96
|
} ZSTD_dictUses_e;
|
97
97
|
|
98
|
+
typedef enum {
|
99
|
+
ZSTD_obm_buffered = 0, /* Buffer the output */
|
100
|
+
ZSTD_obm_stable = 1 /* ZSTD_outBuffer is stable */
|
101
|
+
} ZSTD_outBufferMode_e;
|
102
|
+
|
98
103
|
struct ZSTD_DCtx_s
|
99
104
|
{
|
100
105
|
const ZSTD_seqSymbol* LLTptr;
|
@@ -147,10 +152,19 @@ struct ZSTD_DCtx_s
|
|
147
152
|
U32 legacyVersion;
|
148
153
|
U32 hostageByte;
|
149
154
|
int noForwardProgress;
|
155
|
+
ZSTD_outBufferMode_e outBufferMode;
|
156
|
+
ZSTD_outBuffer expectedOutBuffer;
|
150
157
|
|
151
158
|
/* workspace */
|
152
159
|
BYTE litBuffer[ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH];
|
153
160
|
BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX];
|
161
|
+
|
162
|
+
size_t oversizedDuration;
|
163
|
+
|
164
|
+
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
|
165
|
+
void const* dictContentBeginForFuzzing;
|
166
|
+
void const* dictContentEndForFuzzing;
|
167
|
+
#endif
|
154
168
|
}; /* typedef'd to ZSTD_DCtx within "zstd.h" */
|
155
169
|
|
156
170
|
|
@@ -160,7 +174,7 @@ struct ZSTD_DCtx_s
|
|
160
174
|
|
161
175
|
/*! ZSTD_loadDEntropy() :
|
162
176
|
* dict : must point at beginning of a valid zstd dictionary.
|
163
|
-
* @return : size of entropy tables
|
177
|
+
* @return : size of dictionary header (size of magic number + dict ID + entropy tables) */
|
164
178
|
size_t ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy,
|
165
179
|
const void* const dict, size_t const dictSize);
|
166
180
|
|
@@ -1,5 +1,5 @@
|
|
1
1
|
/*
|
2
|
-
* Copyright (c) 2016-
|
2
|
+
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
|
3
3
|
* All rights reserved.
|
4
4
|
*
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
@@ -28,7 +28,7 @@ extern "C" {
|
|
28
28
|
* Dependencies
|
29
29
|
***************************************/
|
30
30
|
#include <stddef.h> /* size_t */
|
31
|
-
#include "zstd.h" /* ZSTD_CStream, ZSTD_DStream, ZSTDLIB_API */
|
31
|
+
#include "../zstd.h" /* ZSTD_CStream, ZSTD_DStream, ZSTDLIB_API */
|
32
32
|
|
33
33
|
|
34
34
|
/* ***************************************************************
|
@@ -36,16 +36,17 @@ extern "C" {
|
|
36
36
|
*****************************************************************/
|
37
37
|
/* Deprecation warnings */
|
38
38
|
/* Should these warnings be a problem,
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
39
|
+
* it is generally possible to disable them,
|
40
|
+
* typically with -Wno-deprecated-declarations for gcc
|
41
|
+
* or _CRT_SECURE_NO_WARNINGS in Visual.
|
42
|
+
* Otherwise, it's also possible to define ZBUFF_DISABLE_DEPRECATE_WARNINGS
|
43
|
+
*/
|
43
44
|
#ifdef ZBUFF_DISABLE_DEPRECATE_WARNINGS
|
44
45
|
# define ZBUFF_DEPRECATED(message) ZSTDLIB_API /* disable deprecation warnings */
|
45
46
|
#else
|
46
47
|
# if defined (__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */
|
47
48
|
# define ZBUFF_DEPRECATED(message) [[deprecated(message)]] ZSTDLIB_API
|
48
|
-
# elif (defined(
|
49
|
+
# elif (defined(GNUC) && (GNUC > 4 || (GNUC == 4 && GNUC_MINOR >= 5))) || defined(__clang__)
|
49
50
|
# define ZBUFF_DEPRECATED(message) ZSTDLIB_API __attribute__((deprecated(message)))
|
50
51
|
# elif defined(__GNUC__) && (__GNUC__ >= 3)
|
51
52
|
# define ZBUFF_DEPRECATED(message) ZSTDLIB_API __attribute__((deprecated))
|
@@ -185,7 +186,7 @@ ZBUFF_DEPRECATED("use ZSTD_DStreamOutSize") size_t ZBUFF_recommendedDOutSize(voi
|
|
185
186
|
|
186
187
|
/*--- Dependency ---*/
|
187
188
|
#define ZSTD_STATIC_LINKING_ONLY /* ZSTD_parameters, ZSTD_customMem */
|
188
|
-
#include "zstd.h"
|
189
|
+
#include "../zstd.h"
|
189
190
|
|
190
191
|
|
191
192
|
/*--- Custom memory allocator ---*/
|
@@ -1,5 +1,5 @@
|
|
1
1
|
/*
|
2
|
-
* Copyright (c) 2016-
|
2
|
+
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
|
3
3
|
* All rights reserved.
|
4
4
|
*
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
@@ -11,7 +11,7 @@
|
|
11
11
|
/*-*************************************
|
12
12
|
* Dependencies
|
13
13
|
***************************************/
|
14
|
-
#include "error_private.h"
|
14
|
+
#include "../common/error_private.h"
|
15
15
|
#include "zbuff.h"
|
16
16
|
|
17
17
|
/*-****************************************
|
@@ -1,5 +1,5 @@
|
|
1
1
|
/*
|
2
|
-
* Copyright (c) 2016-
|
2
|
+
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
|
3
3
|
* All rights reserved.
|
4
4
|
*
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
@@ -26,11 +26,11 @@
|
|
26
26
|
#include <string.h> /* memset */
|
27
27
|
#include <time.h> /* clock */
|
28
28
|
|
29
|
-
#include "mem.h" /* read */
|
30
|
-
#include "pool.h"
|
31
|
-
#include "threading.h"
|
29
|
+
#include "../common/mem.h" /* read */
|
30
|
+
#include "../common/pool.h"
|
31
|
+
#include "../common/threading.h"
|
32
32
|
#include "cover.h"
|
33
|
-
#include "zstd_internal.h" /* includes zstd.h */
|
33
|
+
#include "../common/zstd_internal.h" /* includes zstd.h */
|
34
34
|
#ifndef ZDICT_STATIC_LINKING_ONLY
|
35
35
|
#define ZDICT_STATIC_LINKING_ONLY
|
36
36
|
#endif
|
@@ -526,10 +526,10 @@ static void COVER_ctx_destroy(COVER_ctx_t *ctx) {
|
|
526
526
|
* Prepare a context for dictionary building.
|
527
527
|
* The context is only dependent on the parameter `d` and can used multiple
|
528
528
|
* times.
|
529
|
-
* Returns
|
529
|
+
* Returns 0 on success or error code on error.
|
530
530
|
* The context must be destroyed with `COVER_ctx_destroy()`.
|
531
531
|
*/
|
532
|
-
static
|
532
|
+
static size_t COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
|
533
533
|
const size_t *samplesSizes, unsigned nbSamples,
|
534
534
|
unsigned d, double splitPoint) {
|
535
535
|
const BYTE *const samples = (const BYTE *)samplesBuffer;
|
@@ -544,17 +544,17 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
|
|
544
544
|
totalSamplesSize >= (size_t)COVER_MAX_SAMPLES_SIZE) {
|
545
545
|
DISPLAYLEVEL(1, "Total samples size is too large (%u MB), maximum size is %u MB\n",
|
546
546
|
(unsigned)(totalSamplesSize>>20), (COVER_MAX_SAMPLES_SIZE >> 20));
|
547
|
-
return
|
547
|
+
return ERROR(srcSize_wrong);
|
548
548
|
}
|
549
549
|
/* Check if there are at least 5 training samples */
|
550
550
|
if (nbTrainSamples < 5) {
|
551
551
|
DISPLAYLEVEL(1, "Total number of training samples is %u and is invalid.", nbTrainSamples);
|
552
|
-
return
|
552
|
+
return ERROR(srcSize_wrong);
|
553
553
|
}
|
554
554
|
/* Check if there's testing sample */
|
555
555
|
if (nbTestSamples < 1) {
|
556
556
|
DISPLAYLEVEL(1, "Total number of testing samples is %u and is invalid.", nbTestSamples);
|
557
|
-
return
|
557
|
+
return ERROR(srcSize_wrong);
|
558
558
|
}
|
559
559
|
/* Zero the context */
|
560
560
|
memset(ctx, 0, sizeof(*ctx));
|
@@ -577,7 +577,7 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
|
|
577
577
|
if (!ctx->suffix || !ctx->dmerAt || !ctx->offsets) {
|
578
578
|
DISPLAYLEVEL(1, "Failed to allocate scratch buffers\n");
|
579
579
|
COVER_ctx_destroy(ctx);
|
580
|
-
return
|
580
|
+
return ERROR(memory_allocation);
|
581
581
|
}
|
582
582
|
ctx->freqs = NULL;
|
583
583
|
ctx->d = d;
|
@@ -624,7 +624,7 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
|
|
624
624
|
(ctx->d <= 8 ? &COVER_cmp8 : &COVER_cmp), &COVER_group);
|
625
625
|
ctx->freqs = ctx->suffix;
|
626
626
|
ctx->suffix = NULL;
|
627
|
-
return
|
627
|
+
return 0;
|
628
628
|
}
|
629
629
|
|
630
630
|
void COVER_warnOnSmallCorpus(size_t maxDictSize, size_t nbDmers, int displayLevel)
|
@@ -638,8 +638,8 @@ void COVER_warnOnSmallCorpus(size_t maxDictSize, size_t nbDmers, int displayLeve
|
|
638
638
|
"compared to the source size %u! "
|
639
639
|
"size(source)/size(dictionary) = %f, but it should be >= "
|
640
640
|
"10! This may lead to a subpar dictionary! We recommend "
|
641
|
-
"training on sources at least 10x, and
|
642
|
-
"size of the dictionary
|
641
|
+
"training on sources at least 10x, and preferably 100x "
|
642
|
+
"the size of the dictionary! \n", (U32)maxDictSize,
|
643
643
|
(U32)nbDmers, ratio);
|
644
644
|
}
|
645
645
|
|
@@ -729,11 +729,11 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
|
|
729
729
|
/* Checks */
|
730
730
|
if (!COVER_checkParameters(parameters, dictBufferCapacity)) {
|
731
731
|
DISPLAYLEVEL(1, "Cover parameters incorrect\n");
|
732
|
-
return ERROR(
|
732
|
+
return ERROR(parameter_outOfBound);
|
733
733
|
}
|
734
734
|
if (nbSamples == 0) {
|
735
735
|
DISPLAYLEVEL(1, "Cover must have at least one input file\n");
|
736
|
-
return ERROR(
|
736
|
+
return ERROR(srcSize_wrong);
|
737
737
|
}
|
738
738
|
if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) {
|
739
739
|
DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n",
|
@@ -741,15 +741,18 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
|
|
741
741
|
return ERROR(dstSize_tooSmall);
|
742
742
|
}
|
743
743
|
/* Initialize context and activeDmers */
|
744
|
-
|
745
|
-
|
746
|
-
|
744
|
+
{
|
745
|
+
size_t const initVal = COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples,
|
746
|
+
parameters.d, parameters.splitPoint);
|
747
|
+
if (ZSTD_isError(initVal)) {
|
748
|
+
return initVal;
|
749
|
+
}
|
747
750
|
}
|
748
751
|
COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.suffixSize, g_displayLevel);
|
749
752
|
if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) {
|
750
753
|
DISPLAYLEVEL(1, "Failed to allocate dmer map: out of memory\n");
|
751
754
|
COVER_ctx_destroy(&ctx);
|
752
|
-
return ERROR(
|
755
|
+
return ERROR(memory_allocation);
|
753
756
|
}
|
754
757
|
|
755
758
|
DISPLAYLEVEL(2, "Building dictionary\n");
|
@@ -810,7 +813,7 @@ size_t COVER_checkTotalCompressedSize(const ZDICT_cover_params_t parameters,
|
|
810
813
|
cctx, dst, dstCapacity, samples + offsets[i],
|
811
814
|
samplesSizes[i], cdict);
|
812
815
|
if (ZSTD_isError(size)) {
|
813
|
-
totalCompressedSize =
|
816
|
+
totalCompressedSize = size;
|
814
817
|
goto _compressCleanup;
|
815
818
|
}
|
816
819
|
totalCompressedSize += size;
|
@@ -886,9 +889,11 @@ void COVER_best_start(COVER_best_t *best) {
|
|
886
889
|
* Decrements liveJobs and signals any waiting threads if liveJobs == 0.
|
887
890
|
* If this dictionary is the best so far save it and its parameters.
|
888
891
|
*/
|
889
|
-
void COVER_best_finish(COVER_best_t *best,
|
890
|
-
|
891
|
-
|
892
|
+
void COVER_best_finish(COVER_best_t *best, ZDICT_cover_params_t parameters,
|
893
|
+
COVER_dictSelection_t selection) {
|
894
|
+
void* dict = selection.dictContent;
|
895
|
+
size_t compressedSize = selection.totalCompressedSize;
|
896
|
+
size_t dictSize = selection.dictSize;
|
892
897
|
if (!best) {
|
893
898
|
return;
|
894
899
|
}
|
@@ -914,10 +919,12 @@ void COVER_best_finish(COVER_best_t *best, size_t compressedSize,
|
|
914
919
|
}
|
915
920
|
}
|
916
921
|
/* Save the dictionary, parameters, and size */
|
917
|
-
|
918
|
-
|
919
|
-
|
920
|
-
|
922
|
+
if (dict) {
|
923
|
+
memcpy(best->dict, dict, dictSize);
|
924
|
+
best->dictSize = dictSize;
|
925
|
+
best->parameters = parameters;
|
926
|
+
best->compressedSize = compressedSize;
|
927
|
+
}
|
921
928
|
}
|
922
929
|
if (liveJobs == 0) {
|
923
930
|
ZSTD_pthread_cond_broadcast(&best->cond);
|
@@ -926,6 +933,111 @@ void COVER_best_finish(COVER_best_t *best, size_t compressedSize,
|
|
926
933
|
}
|
927
934
|
}
|
928
935
|
|
936
|
+
COVER_dictSelection_t COVER_dictSelectionError(size_t error) {
|
937
|
+
COVER_dictSelection_t selection = { NULL, 0, error };
|
938
|
+
return selection;
|
939
|
+
}
|
940
|
+
|
941
|
+
unsigned COVER_dictSelectionIsError(COVER_dictSelection_t selection) {
|
942
|
+
return (ZSTD_isError(selection.totalCompressedSize) || !selection.dictContent);
|
943
|
+
}
|
944
|
+
|
945
|
+
void COVER_dictSelectionFree(COVER_dictSelection_t selection){
|
946
|
+
free(selection.dictContent);
|
947
|
+
}
|
948
|
+
|
949
|
+
COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent,
|
950
|
+
size_t dictContentSize, const BYTE* samplesBuffer, const size_t* samplesSizes, unsigned nbFinalizeSamples,
|
951
|
+
size_t nbCheckSamples, size_t nbSamples, ZDICT_cover_params_t params, size_t* offsets, size_t totalCompressedSize) {
|
952
|
+
|
953
|
+
size_t largestDict = 0;
|
954
|
+
size_t largestCompressed = 0;
|
955
|
+
BYTE* customDictContentEnd = customDictContent + dictContentSize;
|
956
|
+
|
957
|
+
BYTE * largestDictbuffer = (BYTE *)malloc(dictContentSize);
|
958
|
+
BYTE * candidateDictBuffer = (BYTE *)malloc(dictContentSize);
|
959
|
+
double regressionTolerance = ((double)params.shrinkDictMaxRegression / 100.0) + 1.00;
|
960
|
+
|
961
|
+
if (!largestDictbuffer || !candidateDictBuffer) {
|
962
|
+
free(largestDictbuffer);
|
963
|
+
free(candidateDictBuffer);
|
964
|
+
return COVER_dictSelectionError(dictContentSize);
|
965
|
+
}
|
966
|
+
|
967
|
+
/* Initial dictionary size and compressed size */
|
968
|
+
memcpy(largestDictbuffer, customDictContent, dictContentSize);
|
969
|
+
dictContentSize = ZDICT_finalizeDictionary(
|
970
|
+
largestDictbuffer, dictContentSize, customDictContent, dictContentSize,
|
971
|
+
samplesBuffer, samplesSizes, nbFinalizeSamples, params.zParams);
|
972
|
+
|
973
|
+
if (ZDICT_isError(dictContentSize)) {
|
974
|
+
free(largestDictbuffer);
|
975
|
+
free(candidateDictBuffer);
|
976
|
+
return COVER_dictSelectionError(dictContentSize);
|
977
|
+
}
|
978
|
+
|
979
|
+
totalCompressedSize = COVER_checkTotalCompressedSize(params, samplesSizes,
|
980
|
+
samplesBuffer, offsets,
|
981
|
+
nbCheckSamples, nbSamples,
|
982
|
+
largestDictbuffer, dictContentSize);
|
983
|
+
|
984
|
+
if (ZSTD_isError(totalCompressedSize)) {
|
985
|
+
free(largestDictbuffer);
|
986
|
+
free(candidateDictBuffer);
|
987
|
+
return COVER_dictSelectionError(totalCompressedSize);
|
988
|
+
}
|
989
|
+
|
990
|
+
if (params.shrinkDict == 0) {
|
991
|
+
COVER_dictSelection_t selection = { largestDictbuffer, dictContentSize, totalCompressedSize };
|
992
|
+
free(candidateDictBuffer);
|
993
|
+
return selection;
|
994
|
+
}
|
995
|
+
|
996
|
+
largestDict = dictContentSize;
|
997
|
+
largestCompressed = totalCompressedSize;
|
998
|
+
dictContentSize = ZDICT_DICTSIZE_MIN;
|
999
|
+
|
1000
|
+
/* Largest dict is initially at least ZDICT_DICTSIZE_MIN */
|
1001
|
+
while (dictContentSize < largestDict) {
|
1002
|
+
memcpy(candidateDictBuffer, largestDictbuffer, largestDict);
|
1003
|
+
dictContentSize = ZDICT_finalizeDictionary(
|
1004
|
+
candidateDictBuffer, dictContentSize, customDictContentEnd - dictContentSize, dictContentSize,
|
1005
|
+
samplesBuffer, samplesSizes, nbFinalizeSamples, params.zParams);
|
1006
|
+
|
1007
|
+
if (ZDICT_isError(dictContentSize)) {
|
1008
|
+
free(largestDictbuffer);
|
1009
|
+
free(candidateDictBuffer);
|
1010
|
+
return COVER_dictSelectionError(dictContentSize);
|
1011
|
+
|
1012
|
+
}
|
1013
|
+
|
1014
|
+
totalCompressedSize = COVER_checkTotalCompressedSize(params, samplesSizes,
|
1015
|
+
samplesBuffer, offsets,
|
1016
|
+
nbCheckSamples, nbSamples,
|
1017
|
+
candidateDictBuffer, dictContentSize);
|
1018
|
+
|
1019
|
+
if (ZSTD_isError(totalCompressedSize)) {
|
1020
|
+
free(largestDictbuffer);
|
1021
|
+
free(candidateDictBuffer);
|
1022
|
+
return COVER_dictSelectionError(totalCompressedSize);
|
1023
|
+
}
|
1024
|
+
|
1025
|
+
if (totalCompressedSize <= largestCompressed * regressionTolerance) {
|
1026
|
+
COVER_dictSelection_t selection = { candidateDictBuffer, dictContentSize, totalCompressedSize };
|
1027
|
+
free(largestDictbuffer);
|
1028
|
+
return selection;
|
1029
|
+
}
|
1030
|
+
dictContentSize *= 2;
|
1031
|
+
}
|
1032
|
+
dictContentSize = largestDict;
|
1033
|
+
totalCompressedSize = largestCompressed;
|
1034
|
+
{
|
1035
|
+
COVER_dictSelection_t selection = { largestDictbuffer, dictContentSize, totalCompressedSize };
|
1036
|
+
free(candidateDictBuffer);
|
1037
|
+
return selection;
|
1038
|
+
}
|
1039
|
+
}
|
1040
|
+
|
929
1041
|
/**
|
930
1042
|
* Parameters for COVER_tryParameters().
|
931
1043
|
*/
|
@@ -951,6 +1063,7 @@ static void COVER_tryParameters(void *opaque) {
|
|
951
1063
|
/* Allocate space for hash table, dict, and freqs */
|
952
1064
|
COVER_map_t activeDmers;
|
953
1065
|
BYTE *const dict = (BYTE * const)malloc(dictBufferCapacity);
|
1066
|
+
COVER_dictSelection_t selection = COVER_dictSelectionError(ERROR(GENERIC));
|
954
1067
|
U32 *freqs = (U32 *)malloc(ctx->suffixSize * sizeof(U32));
|
955
1068
|
if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) {
|
956
1069
|
DISPLAYLEVEL(1, "Failed to allocate dmer map: out of memory\n");
|
@@ -966,29 +1079,21 @@ static void COVER_tryParameters(void *opaque) {
|
|
966
1079
|
{
|
967
1080
|
const size_t tail = COVER_buildDictionary(ctx, freqs, &activeDmers, dict,
|
968
1081
|
dictBufferCapacity, parameters);
|
969
|
-
|
970
|
-
|
971
|
-
|
972
|
-
|
973
|
-
if (
|
974
|
-
DISPLAYLEVEL(1, "Failed to
|
1082
|
+
selection = COVER_selectDict(dict + tail, dictBufferCapacity - tail,
|
1083
|
+
ctx->samples, ctx->samplesSizes, (unsigned)ctx->nbTrainSamples, ctx->nbTrainSamples, ctx->nbSamples, parameters, ctx->offsets,
|
1084
|
+
totalCompressedSize);
|
1085
|
+
|
1086
|
+
if (COVER_dictSelectionIsError(selection)) {
|
1087
|
+
DISPLAYLEVEL(1, "Failed to select dictionary\n");
|
975
1088
|
goto _cleanup;
|
976
1089
|
}
|
977
1090
|
}
|
978
|
-
/* Check total compressed size */
|
979
|
-
totalCompressedSize = COVER_checkTotalCompressedSize(parameters, ctx->samplesSizes,
|
980
|
-
ctx->samples, ctx->offsets,
|
981
|
-
ctx->nbTrainSamples, ctx->nbSamples,
|
982
|
-
dict, dictBufferCapacity);
|
983
|
-
|
984
1091
|
_cleanup:
|
985
|
-
|
986
|
-
|
1092
|
+
free(dict);
|
1093
|
+
COVER_best_finish(data->best, parameters, selection);
|
987
1094
|
free(data);
|
988
1095
|
COVER_map_destroy(&activeDmers);
|
989
|
-
|
990
|
-
free(dict);
|
991
|
-
}
|
1096
|
+
COVER_dictSelectionFree(selection);
|
992
1097
|
if (freqs) {
|
993
1098
|
free(freqs);
|
994
1099
|
}
|
@@ -1010,6 +1115,7 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
|
|
1010
1115
|
const unsigned kStepSize = MAX((kMaxK - kMinK) / kSteps, 1);
|
1011
1116
|
const unsigned kIterations =
|
1012
1117
|
(1 + (kMaxD - kMinD) / 2) * (1 + (kMaxK - kMinK) / kStepSize);
|
1118
|
+
const unsigned shrinkDict = 0;
|
1013
1119
|
/* Local variables */
|
1014
1120
|
const int displayLevel = parameters->zParams.notificationLevel;
|
1015
1121
|
unsigned iteration = 1;
|
@@ -1022,15 +1128,15 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
|
|
1022
1128
|
/* Checks */
|
1023
1129
|
if (splitPoint <= 0 || splitPoint > 1) {
|
1024
1130
|
LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect parameters\n");
|
1025
|
-
return ERROR(
|
1131
|
+
return ERROR(parameter_outOfBound);
|
1026
1132
|
}
|
1027
1133
|
if (kMinK < kMaxD || kMaxK < kMinK) {
|
1028
1134
|
LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect parameters\n");
|
1029
|
-
return ERROR(
|
1135
|
+
return ERROR(parameter_outOfBound);
|
1030
1136
|
}
|
1031
1137
|
if (nbSamples == 0) {
|
1032
1138
|
DISPLAYLEVEL(1, "Cover must have at least one input file\n");
|
1033
|
-
return ERROR(
|
1139
|
+
return ERROR(srcSize_wrong);
|
1034
1140
|
}
|
1035
1141
|
if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) {
|
1036
1142
|
DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n",
|
@@ -1054,11 +1160,14 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
|
|
1054
1160
|
/* Initialize the context for this value of d */
|
1055
1161
|
COVER_ctx_t ctx;
|
1056
1162
|
LOCALDISPLAYLEVEL(displayLevel, 3, "d=%u\n", d);
|
1057
|
-
|
1058
|
-
|
1059
|
-
|
1060
|
-
|
1061
|
-
|
1163
|
+
{
|
1164
|
+
const size_t initVal = COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, d, splitPoint);
|
1165
|
+
if (ZSTD_isError(initVal)) {
|
1166
|
+
LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to initialize context\n");
|
1167
|
+
COVER_best_destroy(&best);
|
1168
|
+
POOL_free(pool);
|
1169
|
+
return initVal;
|
1170
|
+
}
|
1062
1171
|
}
|
1063
1172
|
if (!warned) {
|
1064
1173
|
COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.suffixSize, displayLevel);
|
@@ -1075,7 +1184,7 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
|
|
1075
1184
|
COVER_best_destroy(&best);
|
1076
1185
|
COVER_ctx_destroy(&ctx);
|
1077
1186
|
POOL_free(pool);
|
1078
|
-
return ERROR(
|
1187
|
+
return ERROR(memory_allocation);
|
1079
1188
|
}
|
1080
1189
|
data->ctx = &ctx;
|
1081
1190
|
data->best = &best;
|
@@ -1085,6 +1194,7 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
|
|
1085
1194
|
data->parameters.d = d;
|
1086
1195
|
data->parameters.splitPoint = splitPoint;
|
1087
1196
|
data->parameters.steps = kSteps;
|
1197
|
+
data->parameters.shrinkDict = shrinkDict;
|
1088
1198
|
data->parameters.zParams.notificationLevel = g_displayLevel;
|
1089
1199
|
/* Check the parameters */
|
1090
1200
|
if (!COVER_checkParameters(data->parameters, dictBufferCapacity)) {
|