extzstd 0.3 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/HISTORY.ja.md +8 -0
- data/README.md +1 -1
- data/contrib/zstd/CHANGELOG +94 -0
- data/contrib/zstd/CONTRIBUTING.md +351 -1
- data/contrib/zstd/Makefile +32 -10
- data/contrib/zstd/README.md +33 -10
- data/contrib/zstd/TESTING.md +2 -2
- data/contrib/zstd/appveyor.yml +42 -4
- data/contrib/zstd/lib/Makefile +128 -60
- data/contrib/zstd/lib/README.md +47 -16
- data/contrib/zstd/lib/common/bitstream.h +38 -39
- data/contrib/zstd/lib/common/compiler.h +40 -5
- data/contrib/zstd/lib/common/cpu.h +1 -1
- data/contrib/zstd/lib/common/debug.c +11 -31
- data/contrib/zstd/lib/common/debug.h +11 -31
- data/contrib/zstd/lib/common/entropy_common.c +13 -33
- data/contrib/zstd/lib/common/error_private.c +2 -1
- data/contrib/zstd/lib/common/error_private.h +6 -2
- data/contrib/zstd/lib/common/fse.h +12 -32
- data/contrib/zstd/lib/common/fse_decompress.c +12 -35
- data/contrib/zstd/lib/common/huf.h +15 -33
- data/contrib/zstd/lib/common/mem.h +75 -2
- data/contrib/zstd/lib/common/pool.c +8 -4
- data/contrib/zstd/lib/common/pool.h +2 -2
- data/contrib/zstd/lib/common/threading.c +50 -4
- data/contrib/zstd/lib/common/threading.h +36 -4
- data/contrib/zstd/lib/common/xxhash.c +23 -35
- data/contrib/zstd/lib/common/xxhash.h +11 -31
- data/contrib/zstd/lib/common/zstd_common.c +1 -1
- data/contrib/zstd/lib/common/zstd_errors.h +2 -1
- data/contrib/zstd/lib/common/zstd_internal.h +154 -26
- data/contrib/zstd/lib/compress/fse_compress.c +17 -40
- data/contrib/zstd/lib/compress/hist.c +15 -35
- data/contrib/zstd/lib/compress/hist.h +12 -32
- data/contrib/zstd/lib/compress/huf_compress.c +92 -92
- data/contrib/zstd/lib/compress/zstd_compress.c +1191 -1330
- data/contrib/zstd/lib/compress/zstd_compress_internal.h +317 -55
- data/contrib/zstd/lib/compress/zstd_compress_literals.c +158 -0
- data/contrib/zstd/lib/compress/zstd_compress_literals.h +29 -0
- data/contrib/zstd/lib/compress/zstd_compress_sequences.c +419 -0
- data/contrib/zstd/lib/compress/zstd_compress_sequences.h +54 -0
- data/contrib/zstd/lib/compress/zstd_compress_superblock.c +845 -0
- data/contrib/zstd/lib/compress/zstd_compress_superblock.h +32 -0
- data/contrib/zstd/lib/compress/zstd_cwksp.h +525 -0
- data/contrib/zstd/lib/compress/zstd_double_fast.c +65 -43
- data/contrib/zstd/lib/compress/zstd_double_fast.h +2 -2
- data/contrib/zstd/lib/compress/zstd_fast.c +92 -66
- data/contrib/zstd/lib/compress/zstd_fast.h +2 -2
- data/contrib/zstd/lib/compress/zstd_lazy.c +74 -42
- data/contrib/zstd/lib/compress/zstd_lazy.h +1 -1
- data/contrib/zstd/lib/compress/zstd_ldm.c +32 -10
- data/contrib/zstd/lib/compress/zstd_ldm.h +7 -2
- data/contrib/zstd/lib/compress/zstd_opt.c +81 -114
- data/contrib/zstd/lib/compress/zstd_opt.h +1 -1
- data/contrib/zstd/lib/compress/zstdmt_compress.c +95 -51
- data/contrib/zstd/lib/compress/zstdmt_compress.h +3 -2
- data/contrib/zstd/lib/decompress/huf_decompress.c +76 -60
- data/contrib/zstd/lib/decompress/zstd_ddict.c +12 -8
- data/contrib/zstd/lib/decompress/zstd_ddict.h +2 -2
- data/contrib/zstd/lib/decompress/zstd_decompress.c +292 -172
- data/contrib/zstd/lib/decompress/zstd_decompress_block.c +459 -338
- data/contrib/zstd/lib/decompress/zstd_decompress_block.h +3 -3
- data/contrib/zstd/lib/decompress/zstd_decompress_internal.h +18 -4
- data/contrib/zstd/lib/deprecated/zbuff.h +9 -8
- data/contrib/zstd/lib/deprecated/zbuff_common.c +2 -2
- data/contrib/zstd/lib/deprecated/zbuff_compress.c +1 -1
- data/contrib/zstd/lib/deprecated/zbuff_decompress.c +1 -1
- data/contrib/zstd/lib/dictBuilder/cover.c +164 -54
- data/contrib/zstd/lib/dictBuilder/cover.h +52 -7
- data/contrib/zstd/lib/dictBuilder/fastcover.c +60 -43
- data/contrib/zstd/lib/dictBuilder/zdict.c +43 -19
- data/contrib/zstd/lib/dictBuilder/zdict.h +56 -28
- data/contrib/zstd/lib/legacy/zstd_legacy.h +8 -4
- data/contrib/zstd/lib/legacy/zstd_v01.c +110 -110
- data/contrib/zstd/lib/legacy/zstd_v01.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v02.c +23 -13
- data/contrib/zstd/lib/legacy/zstd_v02.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v03.c +23 -13
- data/contrib/zstd/lib/legacy/zstd_v03.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v04.c +30 -17
- data/contrib/zstd/lib/legacy/zstd_v04.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v05.c +113 -102
- data/contrib/zstd/lib/legacy/zstd_v05.h +2 -2
- data/contrib/zstd/lib/legacy/zstd_v06.c +20 -18
- data/contrib/zstd/lib/legacy/zstd_v06.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v07.c +25 -19
- data/contrib/zstd/lib/legacy/zstd_v07.h +1 -1
- data/contrib/zstd/lib/libzstd.pc.in +3 -2
- data/contrib/zstd/lib/zstd.h +265 -88
- data/ext/extzstd.h +1 -1
- data/ext/libzstd_conf.h +8 -0
- data/ext/zstd_common.c +1 -3
- data/ext/zstd_compress.c +3 -3
- data/ext/zstd_decompress.c +1 -5
- data/ext/zstd_dictbuilder.c +2 -3
- data/ext/zstd_dictbuilder_fastcover.c +1 -3
- data/ext/zstd_legacy_v01.c +2 -0
- data/ext/zstd_legacy_v02.c +2 -0
- data/ext/zstd_legacy_v03.c +2 -0
- data/ext/zstd_legacy_v04.c +2 -0
- data/ext/zstd_legacy_v05.c +2 -0
- data/ext/zstd_legacy_v06.c +2 -0
- data/ext/zstd_legacy_v07.c +2 -0
- data/lib/extzstd.rb +18 -10
- data/lib/extzstd/version.rb +1 -1
- metadata +15 -6
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/*
|
|
2
|
-
* Copyright (c) 2016-
|
|
2
|
+
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
|
|
3
3
|
* All rights reserved.
|
|
4
4
|
*
|
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
|
@@ -16,8 +16,8 @@
|
|
|
16
16
|
* Dependencies
|
|
17
17
|
*********************************************************/
|
|
18
18
|
#include <stddef.h> /* size_t */
|
|
19
|
-
#include "zstd.h" /* DCtx, and some public functions */
|
|
20
|
-
#include "zstd_internal.h" /* blockProperties_t, and some public functions */
|
|
19
|
+
#include "../zstd.h" /* DCtx, and some public functions */
|
|
20
|
+
#include "../common/zstd_internal.h" /* blockProperties_t, and some public functions */
|
|
21
21
|
#include "zstd_decompress_internal.h" /* ZSTD_seqSymbol */
|
|
22
22
|
|
|
23
23
|
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/*
|
|
2
|
-
* Copyright (c) 2016-
|
|
2
|
+
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
|
|
3
3
|
* All rights reserved.
|
|
4
4
|
*
|
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
|
@@ -19,8 +19,8 @@
|
|
|
19
19
|
/*-*******************************************************
|
|
20
20
|
* Dependencies
|
|
21
21
|
*********************************************************/
|
|
22
|
-
#include "mem.h" /* BYTE, U16, U32 */
|
|
23
|
-
#include "zstd_internal.h" /* ZSTD_seqSymbol */
|
|
22
|
+
#include "../common/mem.h" /* BYTE, U16, U32 */
|
|
23
|
+
#include "../common/zstd_internal.h" /* ZSTD_seqSymbol */
|
|
24
24
|
|
|
25
25
|
|
|
26
26
|
|
|
@@ -95,6 +95,11 @@ typedef enum {
|
|
|
95
95
|
ZSTD_use_once = 1 /* Use the dictionary once and set to ZSTD_dont_use */
|
|
96
96
|
} ZSTD_dictUses_e;
|
|
97
97
|
|
|
98
|
+
typedef enum {
|
|
99
|
+
ZSTD_obm_buffered = 0, /* Buffer the output */
|
|
100
|
+
ZSTD_obm_stable = 1 /* ZSTD_outBuffer is stable */
|
|
101
|
+
} ZSTD_outBufferMode_e;
|
|
102
|
+
|
|
98
103
|
struct ZSTD_DCtx_s
|
|
99
104
|
{
|
|
100
105
|
const ZSTD_seqSymbol* LLTptr;
|
|
@@ -147,10 +152,19 @@ struct ZSTD_DCtx_s
|
|
|
147
152
|
U32 legacyVersion;
|
|
148
153
|
U32 hostageByte;
|
|
149
154
|
int noForwardProgress;
|
|
155
|
+
ZSTD_outBufferMode_e outBufferMode;
|
|
156
|
+
ZSTD_outBuffer expectedOutBuffer;
|
|
150
157
|
|
|
151
158
|
/* workspace */
|
|
152
159
|
BYTE litBuffer[ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH];
|
|
153
160
|
BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX];
|
|
161
|
+
|
|
162
|
+
size_t oversizedDuration;
|
|
163
|
+
|
|
164
|
+
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
|
|
165
|
+
void const* dictContentBeginForFuzzing;
|
|
166
|
+
void const* dictContentEndForFuzzing;
|
|
167
|
+
#endif
|
|
154
168
|
}; /* typedef'd to ZSTD_DCtx within "zstd.h" */
|
|
155
169
|
|
|
156
170
|
|
|
@@ -160,7 +174,7 @@ struct ZSTD_DCtx_s
|
|
|
160
174
|
|
|
161
175
|
/*! ZSTD_loadDEntropy() :
|
|
162
176
|
* dict : must point at beginning of a valid zstd dictionary.
|
|
163
|
-
* @return : size of entropy tables
|
|
177
|
+
* @return : size of dictionary header (size of magic number + dict ID + entropy tables) */
|
|
164
178
|
size_t ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy,
|
|
165
179
|
const void* const dict, size_t const dictSize);
|
|
166
180
|
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/*
|
|
2
|
-
* Copyright (c) 2016-
|
|
2
|
+
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
|
|
3
3
|
* All rights reserved.
|
|
4
4
|
*
|
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
|
@@ -28,7 +28,7 @@ extern "C" {
|
|
|
28
28
|
* Dependencies
|
|
29
29
|
***************************************/
|
|
30
30
|
#include <stddef.h> /* size_t */
|
|
31
|
-
#include "zstd.h" /* ZSTD_CStream, ZSTD_DStream, ZSTDLIB_API */
|
|
31
|
+
#include "../zstd.h" /* ZSTD_CStream, ZSTD_DStream, ZSTDLIB_API */
|
|
32
32
|
|
|
33
33
|
|
|
34
34
|
/* ***************************************************************
|
|
@@ -36,16 +36,17 @@ extern "C" {
|
|
|
36
36
|
*****************************************************************/
|
|
37
37
|
/* Deprecation warnings */
|
|
38
38
|
/* Should these warnings be a problem,
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
39
|
+
* it is generally possible to disable them,
|
|
40
|
+
* typically with -Wno-deprecated-declarations for gcc
|
|
41
|
+
* or _CRT_SECURE_NO_WARNINGS in Visual.
|
|
42
|
+
* Otherwise, it's also possible to define ZBUFF_DISABLE_DEPRECATE_WARNINGS
|
|
43
|
+
*/
|
|
43
44
|
#ifdef ZBUFF_DISABLE_DEPRECATE_WARNINGS
|
|
44
45
|
# define ZBUFF_DEPRECATED(message) ZSTDLIB_API /* disable deprecation warnings */
|
|
45
46
|
#else
|
|
46
47
|
# if defined (__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */
|
|
47
48
|
# define ZBUFF_DEPRECATED(message) [[deprecated(message)]] ZSTDLIB_API
|
|
48
|
-
# elif (defined(
|
|
49
|
+
# elif (defined(GNUC) && (GNUC > 4 || (GNUC == 4 && GNUC_MINOR >= 5))) || defined(__clang__)
|
|
49
50
|
# define ZBUFF_DEPRECATED(message) ZSTDLIB_API __attribute__((deprecated(message)))
|
|
50
51
|
# elif defined(__GNUC__) && (__GNUC__ >= 3)
|
|
51
52
|
# define ZBUFF_DEPRECATED(message) ZSTDLIB_API __attribute__((deprecated))
|
|
@@ -185,7 +186,7 @@ ZBUFF_DEPRECATED("use ZSTD_DStreamOutSize") size_t ZBUFF_recommendedDOutSize(voi
|
|
|
185
186
|
|
|
186
187
|
/*--- Dependency ---*/
|
|
187
188
|
#define ZSTD_STATIC_LINKING_ONLY /* ZSTD_parameters, ZSTD_customMem */
|
|
188
|
-
#include "zstd.h"
|
|
189
|
+
#include "../zstd.h"
|
|
189
190
|
|
|
190
191
|
|
|
191
192
|
/*--- Custom memory allocator ---*/
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/*
|
|
2
|
-
* Copyright (c) 2016-
|
|
2
|
+
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
|
|
3
3
|
* All rights reserved.
|
|
4
4
|
*
|
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
|
@@ -11,7 +11,7 @@
|
|
|
11
11
|
/*-*************************************
|
|
12
12
|
* Dependencies
|
|
13
13
|
***************************************/
|
|
14
|
-
#include "error_private.h"
|
|
14
|
+
#include "../common/error_private.h"
|
|
15
15
|
#include "zbuff.h"
|
|
16
16
|
|
|
17
17
|
/*-****************************************
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/*
|
|
2
|
-
* Copyright (c) 2016-
|
|
2
|
+
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
|
|
3
3
|
* All rights reserved.
|
|
4
4
|
*
|
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
|
@@ -26,11 +26,11 @@
|
|
|
26
26
|
#include <string.h> /* memset */
|
|
27
27
|
#include <time.h> /* clock */
|
|
28
28
|
|
|
29
|
-
#include "mem.h" /* read */
|
|
30
|
-
#include "pool.h"
|
|
31
|
-
#include "threading.h"
|
|
29
|
+
#include "../common/mem.h" /* read */
|
|
30
|
+
#include "../common/pool.h"
|
|
31
|
+
#include "../common/threading.h"
|
|
32
32
|
#include "cover.h"
|
|
33
|
-
#include "zstd_internal.h" /* includes zstd.h */
|
|
33
|
+
#include "../common/zstd_internal.h" /* includes zstd.h */
|
|
34
34
|
#ifndef ZDICT_STATIC_LINKING_ONLY
|
|
35
35
|
#define ZDICT_STATIC_LINKING_ONLY
|
|
36
36
|
#endif
|
|
@@ -526,10 +526,10 @@ static void COVER_ctx_destroy(COVER_ctx_t *ctx) {
|
|
|
526
526
|
* Prepare a context for dictionary building.
|
|
527
527
|
* The context is only dependent on the parameter `d` and can used multiple
|
|
528
528
|
* times.
|
|
529
|
-
* Returns
|
|
529
|
+
* Returns 0 on success or error code on error.
|
|
530
530
|
* The context must be destroyed with `COVER_ctx_destroy()`.
|
|
531
531
|
*/
|
|
532
|
-
static
|
|
532
|
+
static size_t COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
|
|
533
533
|
const size_t *samplesSizes, unsigned nbSamples,
|
|
534
534
|
unsigned d, double splitPoint) {
|
|
535
535
|
const BYTE *const samples = (const BYTE *)samplesBuffer;
|
|
@@ -544,17 +544,17 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
|
|
|
544
544
|
totalSamplesSize >= (size_t)COVER_MAX_SAMPLES_SIZE) {
|
|
545
545
|
DISPLAYLEVEL(1, "Total samples size is too large (%u MB), maximum size is %u MB\n",
|
|
546
546
|
(unsigned)(totalSamplesSize>>20), (COVER_MAX_SAMPLES_SIZE >> 20));
|
|
547
|
-
return
|
|
547
|
+
return ERROR(srcSize_wrong);
|
|
548
548
|
}
|
|
549
549
|
/* Check if there are at least 5 training samples */
|
|
550
550
|
if (nbTrainSamples < 5) {
|
|
551
551
|
DISPLAYLEVEL(1, "Total number of training samples is %u and is invalid.", nbTrainSamples);
|
|
552
|
-
return
|
|
552
|
+
return ERROR(srcSize_wrong);
|
|
553
553
|
}
|
|
554
554
|
/* Check if there's testing sample */
|
|
555
555
|
if (nbTestSamples < 1) {
|
|
556
556
|
DISPLAYLEVEL(1, "Total number of testing samples is %u and is invalid.", nbTestSamples);
|
|
557
|
-
return
|
|
557
|
+
return ERROR(srcSize_wrong);
|
|
558
558
|
}
|
|
559
559
|
/* Zero the context */
|
|
560
560
|
memset(ctx, 0, sizeof(*ctx));
|
|
@@ -577,7 +577,7 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
|
|
|
577
577
|
if (!ctx->suffix || !ctx->dmerAt || !ctx->offsets) {
|
|
578
578
|
DISPLAYLEVEL(1, "Failed to allocate scratch buffers\n");
|
|
579
579
|
COVER_ctx_destroy(ctx);
|
|
580
|
-
return
|
|
580
|
+
return ERROR(memory_allocation);
|
|
581
581
|
}
|
|
582
582
|
ctx->freqs = NULL;
|
|
583
583
|
ctx->d = d;
|
|
@@ -624,7 +624,7 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
|
|
|
624
624
|
(ctx->d <= 8 ? &COVER_cmp8 : &COVER_cmp), &COVER_group);
|
|
625
625
|
ctx->freqs = ctx->suffix;
|
|
626
626
|
ctx->suffix = NULL;
|
|
627
|
-
return
|
|
627
|
+
return 0;
|
|
628
628
|
}
|
|
629
629
|
|
|
630
630
|
void COVER_warnOnSmallCorpus(size_t maxDictSize, size_t nbDmers, int displayLevel)
|
|
@@ -638,8 +638,8 @@ void COVER_warnOnSmallCorpus(size_t maxDictSize, size_t nbDmers, int displayLeve
|
|
|
638
638
|
"compared to the source size %u! "
|
|
639
639
|
"size(source)/size(dictionary) = %f, but it should be >= "
|
|
640
640
|
"10! This may lead to a subpar dictionary! We recommend "
|
|
641
|
-
"training on sources at least 10x, and
|
|
642
|
-
"size of the dictionary
|
|
641
|
+
"training on sources at least 10x, and preferably 100x "
|
|
642
|
+
"the size of the dictionary! \n", (U32)maxDictSize,
|
|
643
643
|
(U32)nbDmers, ratio);
|
|
644
644
|
}
|
|
645
645
|
|
|
@@ -729,11 +729,11 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
|
|
|
729
729
|
/* Checks */
|
|
730
730
|
if (!COVER_checkParameters(parameters, dictBufferCapacity)) {
|
|
731
731
|
DISPLAYLEVEL(1, "Cover parameters incorrect\n");
|
|
732
|
-
return ERROR(
|
|
732
|
+
return ERROR(parameter_outOfBound);
|
|
733
733
|
}
|
|
734
734
|
if (nbSamples == 0) {
|
|
735
735
|
DISPLAYLEVEL(1, "Cover must have at least one input file\n");
|
|
736
|
-
return ERROR(
|
|
736
|
+
return ERROR(srcSize_wrong);
|
|
737
737
|
}
|
|
738
738
|
if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) {
|
|
739
739
|
DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n",
|
|
@@ -741,15 +741,18 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
|
|
|
741
741
|
return ERROR(dstSize_tooSmall);
|
|
742
742
|
}
|
|
743
743
|
/* Initialize context and activeDmers */
|
|
744
|
-
|
|
745
|
-
|
|
746
|
-
|
|
744
|
+
{
|
|
745
|
+
size_t const initVal = COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples,
|
|
746
|
+
parameters.d, parameters.splitPoint);
|
|
747
|
+
if (ZSTD_isError(initVal)) {
|
|
748
|
+
return initVal;
|
|
749
|
+
}
|
|
747
750
|
}
|
|
748
751
|
COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.suffixSize, g_displayLevel);
|
|
749
752
|
if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) {
|
|
750
753
|
DISPLAYLEVEL(1, "Failed to allocate dmer map: out of memory\n");
|
|
751
754
|
COVER_ctx_destroy(&ctx);
|
|
752
|
-
return ERROR(
|
|
755
|
+
return ERROR(memory_allocation);
|
|
753
756
|
}
|
|
754
757
|
|
|
755
758
|
DISPLAYLEVEL(2, "Building dictionary\n");
|
|
@@ -810,7 +813,7 @@ size_t COVER_checkTotalCompressedSize(const ZDICT_cover_params_t parameters,
|
|
|
810
813
|
cctx, dst, dstCapacity, samples + offsets[i],
|
|
811
814
|
samplesSizes[i], cdict);
|
|
812
815
|
if (ZSTD_isError(size)) {
|
|
813
|
-
totalCompressedSize =
|
|
816
|
+
totalCompressedSize = size;
|
|
814
817
|
goto _compressCleanup;
|
|
815
818
|
}
|
|
816
819
|
totalCompressedSize += size;
|
|
@@ -886,9 +889,11 @@ void COVER_best_start(COVER_best_t *best) {
|
|
|
886
889
|
* Decrements liveJobs and signals any waiting threads if liveJobs == 0.
|
|
887
890
|
* If this dictionary is the best so far save it and its parameters.
|
|
888
891
|
*/
|
|
889
|
-
void COVER_best_finish(COVER_best_t *best,
|
|
890
|
-
|
|
891
|
-
|
|
892
|
+
void COVER_best_finish(COVER_best_t *best, ZDICT_cover_params_t parameters,
|
|
893
|
+
COVER_dictSelection_t selection) {
|
|
894
|
+
void* dict = selection.dictContent;
|
|
895
|
+
size_t compressedSize = selection.totalCompressedSize;
|
|
896
|
+
size_t dictSize = selection.dictSize;
|
|
892
897
|
if (!best) {
|
|
893
898
|
return;
|
|
894
899
|
}
|
|
@@ -914,10 +919,12 @@ void COVER_best_finish(COVER_best_t *best, size_t compressedSize,
|
|
|
914
919
|
}
|
|
915
920
|
}
|
|
916
921
|
/* Save the dictionary, parameters, and size */
|
|
917
|
-
|
|
918
|
-
|
|
919
|
-
|
|
920
|
-
|
|
922
|
+
if (dict) {
|
|
923
|
+
memcpy(best->dict, dict, dictSize);
|
|
924
|
+
best->dictSize = dictSize;
|
|
925
|
+
best->parameters = parameters;
|
|
926
|
+
best->compressedSize = compressedSize;
|
|
927
|
+
}
|
|
921
928
|
}
|
|
922
929
|
if (liveJobs == 0) {
|
|
923
930
|
ZSTD_pthread_cond_broadcast(&best->cond);
|
|
@@ -926,6 +933,111 @@ void COVER_best_finish(COVER_best_t *best, size_t compressedSize,
|
|
|
926
933
|
}
|
|
927
934
|
}
|
|
928
935
|
|
|
936
|
+
COVER_dictSelection_t COVER_dictSelectionError(size_t error) {
|
|
937
|
+
COVER_dictSelection_t selection = { NULL, 0, error };
|
|
938
|
+
return selection;
|
|
939
|
+
}
|
|
940
|
+
|
|
941
|
+
unsigned COVER_dictSelectionIsError(COVER_dictSelection_t selection) {
|
|
942
|
+
return (ZSTD_isError(selection.totalCompressedSize) || !selection.dictContent);
|
|
943
|
+
}
|
|
944
|
+
|
|
945
|
+
void COVER_dictSelectionFree(COVER_dictSelection_t selection){
|
|
946
|
+
free(selection.dictContent);
|
|
947
|
+
}
|
|
948
|
+
|
|
949
|
+
COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent,
|
|
950
|
+
size_t dictContentSize, const BYTE* samplesBuffer, const size_t* samplesSizes, unsigned nbFinalizeSamples,
|
|
951
|
+
size_t nbCheckSamples, size_t nbSamples, ZDICT_cover_params_t params, size_t* offsets, size_t totalCompressedSize) {
|
|
952
|
+
|
|
953
|
+
size_t largestDict = 0;
|
|
954
|
+
size_t largestCompressed = 0;
|
|
955
|
+
BYTE* customDictContentEnd = customDictContent + dictContentSize;
|
|
956
|
+
|
|
957
|
+
BYTE * largestDictbuffer = (BYTE *)malloc(dictContentSize);
|
|
958
|
+
BYTE * candidateDictBuffer = (BYTE *)malloc(dictContentSize);
|
|
959
|
+
double regressionTolerance = ((double)params.shrinkDictMaxRegression / 100.0) + 1.00;
|
|
960
|
+
|
|
961
|
+
if (!largestDictbuffer || !candidateDictBuffer) {
|
|
962
|
+
free(largestDictbuffer);
|
|
963
|
+
free(candidateDictBuffer);
|
|
964
|
+
return COVER_dictSelectionError(dictContentSize);
|
|
965
|
+
}
|
|
966
|
+
|
|
967
|
+
/* Initial dictionary size and compressed size */
|
|
968
|
+
memcpy(largestDictbuffer, customDictContent, dictContentSize);
|
|
969
|
+
dictContentSize = ZDICT_finalizeDictionary(
|
|
970
|
+
largestDictbuffer, dictContentSize, customDictContent, dictContentSize,
|
|
971
|
+
samplesBuffer, samplesSizes, nbFinalizeSamples, params.zParams);
|
|
972
|
+
|
|
973
|
+
if (ZDICT_isError(dictContentSize)) {
|
|
974
|
+
free(largestDictbuffer);
|
|
975
|
+
free(candidateDictBuffer);
|
|
976
|
+
return COVER_dictSelectionError(dictContentSize);
|
|
977
|
+
}
|
|
978
|
+
|
|
979
|
+
totalCompressedSize = COVER_checkTotalCompressedSize(params, samplesSizes,
|
|
980
|
+
samplesBuffer, offsets,
|
|
981
|
+
nbCheckSamples, nbSamples,
|
|
982
|
+
largestDictbuffer, dictContentSize);
|
|
983
|
+
|
|
984
|
+
if (ZSTD_isError(totalCompressedSize)) {
|
|
985
|
+
free(largestDictbuffer);
|
|
986
|
+
free(candidateDictBuffer);
|
|
987
|
+
return COVER_dictSelectionError(totalCompressedSize);
|
|
988
|
+
}
|
|
989
|
+
|
|
990
|
+
if (params.shrinkDict == 0) {
|
|
991
|
+
COVER_dictSelection_t selection = { largestDictbuffer, dictContentSize, totalCompressedSize };
|
|
992
|
+
free(candidateDictBuffer);
|
|
993
|
+
return selection;
|
|
994
|
+
}
|
|
995
|
+
|
|
996
|
+
largestDict = dictContentSize;
|
|
997
|
+
largestCompressed = totalCompressedSize;
|
|
998
|
+
dictContentSize = ZDICT_DICTSIZE_MIN;
|
|
999
|
+
|
|
1000
|
+
/* Largest dict is initially at least ZDICT_DICTSIZE_MIN */
|
|
1001
|
+
while (dictContentSize < largestDict) {
|
|
1002
|
+
memcpy(candidateDictBuffer, largestDictbuffer, largestDict);
|
|
1003
|
+
dictContentSize = ZDICT_finalizeDictionary(
|
|
1004
|
+
candidateDictBuffer, dictContentSize, customDictContentEnd - dictContentSize, dictContentSize,
|
|
1005
|
+
samplesBuffer, samplesSizes, nbFinalizeSamples, params.zParams);
|
|
1006
|
+
|
|
1007
|
+
if (ZDICT_isError(dictContentSize)) {
|
|
1008
|
+
free(largestDictbuffer);
|
|
1009
|
+
free(candidateDictBuffer);
|
|
1010
|
+
return COVER_dictSelectionError(dictContentSize);
|
|
1011
|
+
|
|
1012
|
+
}
|
|
1013
|
+
|
|
1014
|
+
totalCompressedSize = COVER_checkTotalCompressedSize(params, samplesSizes,
|
|
1015
|
+
samplesBuffer, offsets,
|
|
1016
|
+
nbCheckSamples, nbSamples,
|
|
1017
|
+
candidateDictBuffer, dictContentSize);
|
|
1018
|
+
|
|
1019
|
+
if (ZSTD_isError(totalCompressedSize)) {
|
|
1020
|
+
free(largestDictbuffer);
|
|
1021
|
+
free(candidateDictBuffer);
|
|
1022
|
+
return COVER_dictSelectionError(totalCompressedSize);
|
|
1023
|
+
}
|
|
1024
|
+
|
|
1025
|
+
if (totalCompressedSize <= largestCompressed * regressionTolerance) {
|
|
1026
|
+
COVER_dictSelection_t selection = { candidateDictBuffer, dictContentSize, totalCompressedSize };
|
|
1027
|
+
free(largestDictbuffer);
|
|
1028
|
+
return selection;
|
|
1029
|
+
}
|
|
1030
|
+
dictContentSize *= 2;
|
|
1031
|
+
}
|
|
1032
|
+
dictContentSize = largestDict;
|
|
1033
|
+
totalCompressedSize = largestCompressed;
|
|
1034
|
+
{
|
|
1035
|
+
COVER_dictSelection_t selection = { largestDictbuffer, dictContentSize, totalCompressedSize };
|
|
1036
|
+
free(candidateDictBuffer);
|
|
1037
|
+
return selection;
|
|
1038
|
+
}
|
|
1039
|
+
}
|
|
1040
|
+
|
|
929
1041
|
/**
|
|
930
1042
|
* Parameters for COVER_tryParameters().
|
|
931
1043
|
*/
|
|
@@ -951,6 +1063,7 @@ static void COVER_tryParameters(void *opaque) {
|
|
|
951
1063
|
/* Allocate space for hash table, dict, and freqs */
|
|
952
1064
|
COVER_map_t activeDmers;
|
|
953
1065
|
BYTE *const dict = (BYTE * const)malloc(dictBufferCapacity);
|
|
1066
|
+
COVER_dictSelection_t selection = COVER_dictSelectionError(ERROR(GENERIC));
|
|
954
1067
|
U32 *freqs = (U32 *)malloc(ctx->suffixSize * sizeof(U32));
|
|
955
1068
|
if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) {
|
|
956
1069
|
DISPLAYLEVEL(1, "Failed to allocate dmer map: out of memory\n");
|
|
@@ -966,29 +1079,21 @@ static void COVER_tryParameters(void *opaque) {
|
|
|
966
1079
|
{
|
|
967
1080
|
const size_t tail = COVER_buildDictionary(ctx, freqs, &activeDmers, dict,
|
|
968
1081
|
dictBufferCapacity, parameters);
|
|
969
|
-
|
|
970
|
-
|
|
971
|
-
|
|
972
|
-
|
|
973
|
-
if (
|
|
974
|
-
DISPLAYLEVEL(1, "Failed to
|
|
1082
|
+
selection = COVER_selectDict(dict + tail, dictBufferCapacity - tail,
|
|
1083
|
+
ctx->samples, ctx->samplesSizes, (unsigned)ctx->nbTrainSamples, ctx->nbTrainSamples, ctx->nbSamples, parameters, ctx->offsets,
|
|
1084
|
+
totalCompressedSize);
|
|
1085
|
+
|
|
1086
|
+
if (COVER_dictSelectionIsError(selection)) {
|
|
1087
|
+
DISPLAYLEVEL(1, "Failed to select dictionary\n");
|
|
975
1088
|
goto _cleanup;
|
|
976
1089
|
}
|
|
977
1090
|
}
|
|
978
|
-
/* Check total compressed size */
|
|
979
|
-
totalCompressedSize = COVER_checkTotalCompressedSize(parameters, ctx->samplesSizes,
|
|
980
|
-
ctx->samples, ctx->offsets,
|
|
981
|
-
ctx->nbTrainSamples, ctx->nbSamples,
|
|
982
|
-
dict, dictBufferCapacity);
|
|
983
|
-
|
|
984
1091
|
_cleanup:
|
|
985
|
-
|
|
986
|
-
|
|
1092
|
+
free(dict);
|
|
1093
|
+
COVER_best_finish(data->best, parameters, selection);
|
|
987
1094
|
free(data);
|
|
988
1095
|
COVER_map_destroy(&activeDmers);
|
|
989
|
-
|
|
990
|
-
free(dict);
|
|
991
|
-
}
|
|
1096
|
+
COVER_dictSelectionFree(selection);
|
|
992
1097
|
if (freqs) {
|
|
993
1098
|
free(freqs);
|
|
994
1099
|
}
|
|
@@ -1010,6 +1115,7 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
|
|
|
1010
1115
|
const unsigned kStepSize = MAX((kMaxK - kMinK) / kSteps, 1);
|
|
1011
1116
|
const unsigned kIterations =
|
|
1012
1117
|
(1 + (kMaxD - kMinD) / 2) * (1 + (kMaxK - kMinK) / kStepSize);
|
|
1118
|
+
const unsigned shrinkDict = 0;
|
|
1013
1119
|
/* Local variables */
|
|
1014
1120
|
const int displayLevel = parameters->zParams.notificationLevel;
|
|
1015
1121
|
unsigned iteration = 1;
|
|
@@ -1022,15 +1128,15 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
|
|
|
1022
1128
|
/* Checks */
|
|
1023
1129
|
if (splitPoint <= 0 || splitPoint > 1) {
|
|
1024
1130
|
LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect parameters\n");
|
|
1025
|
-
return ERROR(
|
|
1131
|
+
return ERROR(parameter_outOfBound);
|
|
1026
1132
|
}
|
|
1027
1133
|
if (kMinK < kMaxD || kMaxK < kMinK) {
|
|
1028
1134
|
LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect parameters\n");
|
|
1029
|
-
return ERROR(
|
|
1135
|
+
return ERROR(parameter_outOfBound);
|
|
1030
1136
|
}
|
|
1031
1137
|
if (nbSamples == 0) {
|
|
1032
1138
|
DISPLAYLEVEL(1, "Cover must have at least one input file\n");
|
|
1033
|
-
return ERROR(
|
|
1139
|
+
return ERROR(srcSize_wrong);
|
|
1034
1140
|
}
|
|
1035
1141
|
if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) {
|
|
1036
1142
|
DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n",
|
|
@@ -1054,11 +1160,14 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
|
|
|
1054
1160
|
/* Initialize the context for this value of d */
|
|
1055
1161
|
COVER_ctx_t ctx;
|
|
1056
1162
|
LOCALDISPLAYLEVEL(displayLevel, 3, "d=%u\n", d);
|
|
1057
|
-
|
|
1058
|
-
|
|
1059
|
-
|
|
1060
|
-
|
|
1061
|
-
|
|
1163
|
+
{
|
|
1164
|
+
const size_t initVal = COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, d, splitPoint);
|
|
1165
|
+
if (ZSTD_isError(initVal)) {
|
|
1166
|
+
LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to initialize context\n");
|
|
1167
|
+
COVER_best_destroy(&best);
|
|
1168
|
+
POOL_free(pool);
|
|
1169
|
+
return initVal;
|
|
1170
|
+
}
|
|
1062
1171
|
}
|
|
1063
1172
|
if (!warned) {
|
|
1064
1173
|
COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.suffixSize, displayLevel);
|
|
@@ -1075,7 +1184,7 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
|
|
|
1075
1184
|
COVER_best_destroy(&best);
|
|
1076
1185
|
COVER_ctx_destroy(&ctx);
|
|
1077
1186
|
POOL_free(pool);
|
|
1078
|
-
return ERROR(
|
|
1187
|
+
return ERROR(memory_allocation);
|
|
1079
1188
|
}
|
|
1080
1189
|
data->ctx = &ctx;
|
|
1081
1190
|
data->best = &best;
|
|
@@ -1085,6 +1194,7 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
|
|
|
1085
1194
|
data->parameters.d = d;
|
|
1086
1195
|
data->parameters.splitPoint = splitPoint;
|
|
1087
1196
|
data->parameters.steps = kSteps;
|
|
1197
|
+
data->parameters.shrinkDict = shrinkDict;
|
|
1088
1198
|
data->parameters.zParams.notificationLevel = g_displayLevel;
|
|
1089
1199
|
/* Check the parameters */
|
|
1090
1200
|
if (!COVER_checkParameters(data->parameters, dictBufferCapacity)) {
|