zstd-ruby 1.3.7.0 → 1.3.8.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/ext/zstdruby/libzstd/BUCK +15 -2
- data/ext/zstdruby/libzstd/Makefile +37 -2
- data/ext/zstdruby/libzstd/README.md +67 -41
- data/ext/zstdruby/libzstd/common/bitstream.h +2 -2
- data/ext/zstdruby/libzstd/common/compiler.h +19 -12
- data/ext/zstdruby/libzstd/common/cpu.h +1 -1
- data/ext/zstdruby/libzstd/common/debug.h +22 -11
- data/ext/zstdruby/libzstd/common/error_private.c +6 -0
- data/ext/zstdruby/libzstd/common/fse.h +2 -2
- data/ext/zstdruby/libzstd/common/huf.h +25 -1
- data/ext/zstdruby/libzstd/common/pool.c +1 -1
- data/ext/zstdruby/libzstd/common/zstd_common.c +3 -1
- data/ext/zstdruby/libzstd/common/zstd_errors.h +1 -0
- data/ext/zstdruby/libzstd/common/zstd_internal.h +11 -2
- data/ext/zstdruby/libzstd/compress/fse_compress.c +3 -3
- data/ext/zstdruby/libzstd/compress/hist.c +19 -11
- data/ext/zstdruby/libzstd/compress/hist.h +11 -8
- data/ext/zstdruby/libzstd/compress/huf_compress.c +33 -31
- data/ext/zstdruby/libzstd/compress/zstd_compress.c +621 -371
- data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +90 -28
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +4 -4
- data/ext/zstdruby/libzstd/compress/zstd_fast.c +15 -15
- data/ext/zstdruby/libzstd/compress/zstd_lazy.c +25 -18
- data/ext/zstdruby/libzstd/compress/zstd_ldm.c +18 -67
- data/ext/zstdruby/libzstd/compress/zstd_ldm.h +2 -6
- data/ext/zstdruby/libzstd/compress/zstd_opt.c +133 -48
- data/ext/zstdruby/libzstd/compress/zstd_opt.h +8 -0
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +229 -73
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +18 -10
- data/ext/zstdruby/libzstd/decompress/huf_decompress.c +178 -42
- data/ext/zstdruby/libzstd/decompress/zstd_ddict.c +240 -0
- data/ext/zstdruby/libzstd/decompress/zstd_ddict.h +44 -0
- data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +244 -1680
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +1307 -0
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +59 -0
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +168 -0
- data/ext/zstdruby/libzstd/dictBuilder/cover.c +13 -11
- data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +15 -15
- data/ext/zstdruby/libzstd/dictBuilder/zdict.c +28 -28
- data/ext/zstdruby/libzstd/dll/libzstd.def +0 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v04.c +0 -10
- data/ext/zstdruby/libzstd/legacy/zstd_v05.c +15 -15
- data/ext/zstdruby/libzstd/zstd.h +1208 -968
- data/lib/zstd-ruby/version.rb +1 -1
- metadata +7 -2
@@ -0,0 +1,59 @@
|
|
1
|
+
/*
|
2
|
+
* Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
|
3
|
+
* All rights reserved.
|
4
|
+
*
|
5
|
+
* This source code is licensed under both the BSD-style license (found in the
|
6
|
+
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
7
|
+
* in the COPYING file in the root directory of this source tree).
|
8
|
+
* You may select, at your option, one of the above-listed licenses.
|
9
|
+
*/
|
10
|
+
|
11
|
+
|
12
|
+
#ifndef ZSTD_DEC_BLOCK_H
|
13
|
+
#define ZSTD_DEC_BLOCK_H
|
14
|
+
|
15
|
+
/*-*******************************************************
|
16
|
+
* Dependencies
|
17
|
+
*********************************************************/
|
18
|
+
#include <stddef.h> /* size_t */
|
19
|
+
#include "zstd.h" /* DCtx, and some public functions */
|
20
|
+
#include "zstd_internal.h" /* blockProperties_t, and some public functions */
|
21
|
+
#include "zstd_decompress_internal.h" /* ZSTD_seqSymbol */
|
22
|
+
|
23
|
+
|
24
|
+
/* === Prototypes === */
|
25
|
+
|
26
|
+
/* note: prototypes already published within `zstd.h` :
|
27
|
+
* ZSTD_decompressBlock()
|
28
|
+
*/
|
29
|
+
|
30
|
+
/* note: prototypes already published within `zstd_internal.h` :
|
31
|
+
* ZSTD_getcBlockSize()
|
32
|
+
* ZSTD_decodeSeqHeaders()
|
33
|
+
*/
|
34
|
+
|
35
|
+
|
36
|
+
/* ZSTD_decompressBlock_internal() :
|
37
|
+
* decompress block, starting at `src`,
|
38
|
+
* into destination buffer `dst`.
|
39
|
+
* @return : decompressed block size,
|
40
|
+
* or an error code (which can be tested using ZSTD_isError())
|
41
|
+
*/
|
42
|
+
size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
|
43
|
+
void* dst, size_t dstCapacity,
|
44
|
+
const void* src, size_t srcSize, const int frame);
|
45
|
+
|
46
|
+
/* ZSTD_buildFSETable() :
|
47
|
+
* generate FSE decoding table for one symbol (ll, ml or off)
|
48
|
+
* this function must be called with valid parameters only
|
49
|
+
* (dt is large enough, normalizedCounter distribution total is a power of 2, max is within range, etc.)
|
50
|
+
* in which case it cannot fail.
|
51
|
+
* Internal use only.
|
52
|
+
*/
|
53
|
+
void ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
|
54
|
+
const short* normalizedCounter, unsigned maxSymbolValue,
|
55
|
+
const U32* baseValue, const U32* nbAdditionalBits,
|
56
|
+
unsigned tableLog);
|
57
|
+
|
58
|
+
|
59
|
+
#endif /* ZSTD_DEC_BLOCK_H */
|
@@ -0,0 +1,168 @@
|
|
1
|
+
/*
|
2
|
+
* Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
|
3
|
+
* All rights reserved.
|
4
|
+
*
|
5
|
+
* This source code is licensed under both the BSD-style license (found in the
|
6
|
+
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
7
|
+
* in the COPYING file in the root directory of this source tree).
|
8
|
+
* You may select, at your option, one of the above-listed licenses.
|
9
|
+
*/
|
10
|
+
|
11
|
+
|
12
|
+
/* zstd_decompress_internal:
|
13
|
+
* objects and definitions shared within lib/decompress modules */
|
14
|
+
|
15
|
+
#ifndef ZSTD_DECOMPRESS_INTERNAL_H
|
16
|
+
#define ZSTD_DECOMPRESS_INTERNAL_H
|
17
|
+
|
18
|
+
|
19
|
+
/*-*******************************************************
|
20
|
+
* Dependencies
|
21
|
+
*********************************************************/
|
22
|
+
#include "mem.h" /* BYTE, U16, U32 */
|
23
|
+
#include "zstd_internal.h" /* ZSTD_seqSymbol */
|
24
|
+
|
25
|
+
|
26
|
+
|
27
|
+
/*-*******************************************************
|
28
|
+
* Constants
|
29
|
+
*********************************************************/
|
30
|
+
static const U32 LL_base[MaxLL+1] = {
|
31
|
+
0, 1, 2, 3, 4, 5, 6, 7,
|
32
|
+
8, 9, 10, 11, 12, 13, 14, 15,
|
33
|
+
16, 18, 20, 22, 24, 28, 32, 40,
|
34
|
+
48, 64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000,
|
35
|
+
0x2000, 0x4000, 0x8000, 0x10000 };
|
36
|
+
|
37
|
+
static const U32 OF_base[MaxOff+1] = {
|
38
|
+
0, 1, 1, 5, 0xD, 0x1D, 0x3D, 0x7D,
|
39
|
+
0xFD, 0x1FD, 0x3FD, 0x7FD, 0xFFD, 0x1FFD, 0x3FFD, 0x7FFD,
|
40
|
+
0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD,
|
41
|
+
0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD, 0x1FFFFFFD, 0x3FFFFFFD, 0x7FFFFFFD };
|
42
|
+
|
43
|
+
static const U32 OF_bits[MaxOff+1] = {
|
44
|
+
0, 1, 2, 3, 4, 5, 6, 7,
|
45
|
+
8, 9, 10, 11, 12, 13, 14, 15,
|
46
|
+
16, 17, 18, 19, 20, 21, 22, 23,
|
47
|
+
24, 25, 26, 27, 28, 29, 30, 31 };
|
48
|
+
|
49
|
+
static const U32 ML_base[MaxML+1] = {
|
50
|
+
3, 4, 5, 6, 7, 8, 9, 10,
|
51
|
+
11, 12, 13, 14, 15, 16, 17, 18,
|
52
|
+
19, 20, 21, 22, 23, 24, 25, 26,
|
53
|
+
27, 28, 29, 30, 31, 32, 33, 34,
|
54
|
+
35, 37, 39, 41, 43, 47, 51, 59,
|
55
|
+
67, 83, 99, 0x83, 0x103, 0x203, 0x403, 0x803,
|
56
|
+
0x1003, 0x2003, 0x4003, 0x8003, 0x10003 };
|
57
|
+
|
58
|
+
|
59
|
+
/*-*******************************************************
|
60
|
+
* Decompression types
|
61
|
+
*********************************************************/
|
62
|
+
typedef struct {
|
63
|
+
U32 fastMode;
|
64
|
+
U32 tableLog;
|
65
|
+
} ZSTD_seqSymbol_header;
|
66
|
+
|
67
|
+
typedef struct {
|
68
|
+
U16 nextState;
|
69
|
+
BYTE nbAdditionalBits;
|
70
|
+
BYTE nbBits;
|
71
|
+
U32 baseValue;
|
72
|
+
} ZSTD_seqSymbol;
|
73
|
+
|
74
|
+
#define SEQSYMBOL_TABLE_SIZE(log) (1 + (1 << (log)))
|
75
|
+
|
76
|
+
typedef struct {
|
77
|
+
ZSTD_seqSymbol LLTable[SEQSYMBOL_TABLE_SIZE(LLFSELog)]; /* Note : Space reserved for FSE Tables */
|
78
|
+
ZSTD_seqSymbol OFTable[SEQSYMBOL_TABLE_SIZE(OffFSELog)]; /* is also used as temporary workspace while building hufTable during DDict creation */
|
79
|
+
ZSTD_seqSymbol MLTable[SEQSYMBOL_TABLE_SIZE(MLFSELog)]; /* and therefore must be at least HUF_DECOMPRESS_WORKSPACE_SIZE large */
|
80
|
+
HUF_DTable hufTable[HUF_DTABLE_SIZE(HufLog)]; /* can accommodate HUF_decompress4X */
|
81
|
+
U32 rep[ZSTD_REP_NUM];
|
82
|
+
} ZSTD_entropyDTables_t;
|
83
|
+
|
84
|
+
typedef enum { ZSTDds_getFrameHeaderSize, ZSTDds_decodeFrameHeader,
|
85
|
+
ZSTDds_decodeBlockHeader, ZSTDds_decompressBlock,
|
86
|
+
ZSTDds_decompressLastBlock, ZSTDds_checkChecksum,
|
87
|
+
ZSTDds_decodeSkippableHeader, ZSTDds_skipFrame } ZSTD_dStage;
|
88
|
+
|
89
|
+
typedef enum { zdss_init=0, zdss_loadHeader,
|
90
|
+
zdss_read, zdss_load, zdss_flush } ZSTD_dStreamStage;
|
91
|
+
|
92
|
+
struct ZSTD_DCtx_s
|
93
|
+
{
|
94
|
+
const ZSTD_seqSymbol* LLTptr;
|
95
|
+
const ZSTD_seqSymbol* MLTptr;
|
96
|
+
const ZSTD_seqSymbol* OFTptr;
|
97
|
+
const HUF_DTable* HUFptr;
|
98
|
+
ZSTD_entropyDTables_t entropy;
|
99
|
+
U32 workspace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; /* space needed when building huffman tables */
|
100
|
+
const void* previousDstEnd; /* detect continuity */
|
101
|
+
const void* prefixStart; /* start of current segment */
|
102
|
+
const void* virtualStart; /* virtual start of previous segment if it was just before current one */
|
103
|
+
const void* dictEnd; /* end of previous segment */
|
104
|
+
size_t expected;
|
105
|
+
ZSTD_frameHeader fParams;
|
106
|
+
U64 decodedSize;
|
107
|
+
blockType_e bType; /* used in ZSTD_decompressContinue(), store blockType between block header decoding and block decompression stages */
|
108
|
+
ZSTD_dStage stage;
|
109
|
+
U32 litEntropy;
|
110
|
+
U32 fseEntropy;
|
111
|
+
XXH64_state_t xxhState;
|
112
|
+
size_t headerSize;
|
113
|
+
ZSTD_format_e format;
|
114
|
+
const BYTE* litPtr;
|
115
|
+
ZSTD_customMem customMem;
|
116
|
+
size_t litSize;
|
117
|
+
size_t rleSize;
|
118
|
+
size_t staticSize;
|
119
|
+
int bmi2; /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */
|
120
|
+
|
121
|
+
/* dictionary */
|
122
|
+
ZSTD_DDict* ddictLocal;
|
123
|
+
const ZSTD_DDict* ddict; /* set by ZSTD_initDStream_usingDDict(), or ZSTD_DCtx_refDDict() */
|
124
|
+
U32 dictID;
|
125
|
+
int ddictIsCold; /* if == 1 : dictionary is "new" for working context, and presumed "cold" (not in cpu cache) */
|
126
|
+
|
127
|
+
/* streaming */
|
128
|
+
ZSTD_dStreamStage streamStage;
|
129
|
+
char* inBuff;
|
130
|
+
size_t inBuffSize;
|
131
|
+
size_t inPos;
|
132
|
+
size_t maxWindowSize;
|
133
|
+
char* outBuff;
|
134
|
+
size_t outBuffSize;
|
135
|
+
size_t outStart;
|
136
|
+
size_t outEnd;
|
137
|
+
size_t lhSize;
|
138
|
+
void* legacyContext;
|
139
|
+
U32 previousLegacyVersion;
|
140
|
+
U32 legacyVersion;
|
141
|
+
U32 hostageByte;
|
142
|
+
int noForwardProgress;
|
143
|
+
|
144
|
+
/* workspace */
|
145
|
+
BYTE litBuffer[ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH];
|
146
|
+
BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX];
|
147
|
+
}; /* typedef'd to ZSTD_DCtx within "zstd.h" */
|
148
|
+
|
149
|
+
|
150
|
+
/*-*******************************************************
|
151
|
+
* Shared internal functions
|
152
|
+
*********************************************************/
|
153
|
+
|
154
|
+
/*! ZSTD_loadDEntropy() :
|
155
|
+
* dict : must point at beginning of a valid zstd dictionary.
|
156
|
+
* @return : size of entropy tables read */
|
157
|
+
size_t ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy,
|
158
|
+
const void* const dict, size_t const dictSize);
|
159
|
+
|
160
|
+
/*! ZSTD_checkContinuity() :
|
161
|
+
* check if next `dst` follows previous position, where decompression ended.
|
162
|
+
* If yes, do nothing (continue on current segment).
|
163
|
+
* If not, classify previous segment as "external dictionary", and start a new segment.
|
164
|
+
* This function cannot fail. */
|
165
|
+
void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst);
|
166
|
+
|
167
|
+
|
168
|
+
#endif /* ZSTD_DECOMPRESS_INTERNAL_H */
|
@@ -39,7 +39,7 @@
|
|
39
39
|
/*-*************************************
|
40
40
|
* Constants
|
41
41
|
***************************************/
|
42
|
-
#define COVER_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((
|
42
|
+
#define COVER_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((unsigned)-1) : ((unsigned)1 GB))
|
43
43
|
#define DEFAULT_SPLITPOINT 1.0
|
44
44
|
|
45
45
|
/*-*************************************
|
@@ -543,7 +543,7 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
|
|
543
543
|
if (totalSamplesSize < MAX(d, sizeof(U64)) ||
|
544
544
|
totalSamplesSize >= (size_t)COVER_MAX_SAMPLES_SIZE) {
|
545
545
|
DISPLAYLEVEL(1, "Total samples size is too large (%u MB), maximum size is %u MB\n",
|
546
|
-
(
|
546
|
+
(unsigned)(totalSamplesSize>>20), (COVER_MAX_SAMPLES_SIZE >> 20));
|
547
547
|
return 0;
|
548
548
|
}
|
549
549
|
/* Check if there are at least 5 training samples */
|
@@ -559,9 +559,9 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
|
|
559
559
|
/* Zero the context */
|
560
560
|
memset(ctx, 0, sizeof(*ctx));
|
561
561
|
DISPLAYLEVEL(2, "Training on %u samples of total size %u\n", nbTrainSamples,
|
562
|
-
(
|
562
|
+
(unsigned)trainingSamplesSize);
|
563
563
|
DISPLAYLEVEL(2, "Testing on %u samples of total size %u\n", nbTestSamples,
|
564
|
-
(
|
564
|
+
(unsigned)testSamplesSize);
|
565
565
|
ctx->samples = samples;
|
566
566
|
ctx->samplesSizes = samplesSizes;
|
567
567
|
ctx->nbSamples = nbSamples;
|
@@ -639,11 +639,11 @@ static size_t COVER_buildDictionary(const COVER_ctx_t *ctx, U32 *freqs,
|
|
639
639
|
/* Divide the data up into epochs of equal size.
|
640
640
|
* We will select at least one segment from each epoch.
|
641
641
|
*/
|
642
|
-
const
|
643
|
-
const
|
642
|
+
const unsigned epochs = MAX(1, (U32)(dictBufferCapacity / parameters.k / 4));
|
643
|
+
const unsigned epochSize = (U32)(ctx->suffixSize / epochs);
|
644
644
|
size_t epoch;
|
645
|
-
DISPLAYLEVEL(2, "Breaking content into %u epochs of size %u\n",
|
646
|
-
|
645
|
+
DISPLAYLEVEL(2, "Breaking content into %u epochs of size %u\n",
|
646
|
+
epochs, epochSize);
|
647
647
|
/* Loop through the epochs until there are no more segments or the dictionary
|
648
648
|
* is full.
|
649
649
|
*/
|
@@ -670,7 +670,7 @@ static size_t COVER_buildDictionary(const COVER_ctx_t *ctx, U32 *freqs,
|
|
670
670
|
memcpy(dict + tail, ctx->samples + segment.begin, segmentSize);
|
671
671
|
DISPLAYUPDATE(
|
672
672
|
2, "\r%u%% ",
|
673
|
-
(
|
673
|
+
(unsigned)(((dictBufferCapacity - tail) * 100) / dictBufferCapacity));
|
674
674
|
}
|
675
675
|
DISPLAYLEVEL(2, "\r%79s\r", "");
|
676
676
|
return tail;
|
@@ -722,7 +722,7 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
|
|
722
722
|
samplesBuffer, samplesSizes, nbSamples, parameters.zParams);
|
723
723
|
if (!ZSTD_isError(dictionarySize)) {
|
724
724
|
DISPLAYLEVEL(2, "Constructed dictionary of size %u\n",
|
725
|
-
(
|
725
|
+
(unsigned)dictionarySize);
|
726
726
|
}
|
727
727
|
COVER_ctx_destroy(&ctx);
|
728
728
|
COVER_map_destroy(&activeDmers);
|
@@ -868,6 +868,8 @@ void COVER_best_finish(COVER_best_t *best, size_t compressedSize,
|
|
868
868
|
if (!best->dict) {
|
869
869
|
best->compressedSize = ERROR(GENERIC);
|
870
870
|
best->dictSize = 0;
|
871
|
+
ZSTD_pthread_cond_signal(&best->cond);
|
872
|
+
ZSTD_pthread_mutex_unlock(&best->mutex);
|
871
873
|
return;
|
872
874
|
}
|
873
875
|
}
|
@@ -1054,7 +1056,7 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
|
|
1054
1056
|
}
|
1055
1057
|
/* Print status */
|
1056
1058
|
LOCALDISPLAYUPDATE(displayLevel, 2, "\r%u%% ",
|
1057
|
-
(
|
1059
|
+
(unsigned)((iteration * 100) / kIterations));
|
1058
1060
|
++iteration;
|
1059
1061
|
}
|
1060
1062
|
COVER_best_wait(&best);
|
@@ -20,7 +20,7 @@
|
|
20
20
|
/*-*************************************
|
21
21
|
* Constants
|
22
22
|
***************************************/
|
23
|
-
#define FASTCOVER_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((
|
23
|
+
#define FASTCOVER_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((unsigned)-1) : ((unsigned)1 GB))
|
24
24
|
#define FASTCOVER_MAX_F 31
|
25
25
|
#define FASTCOVER_MAX_ACCEL 10
|
26
26
|
#define DEFAULT_SPLITPOINT 0.75
|
@@ -159,15 +159,15 @@ static COVER_segment_t FASTCOVER_selectSegment(const FASTCOVER_ctx_t *ctx,
|
|
159
159
|
*/
|
160
160
|
while (activeSegment.end < end) {
|
161
161
|
/* Get hash value of current dmer */
|
162
|
-
const size_t
|
162
|
+
const size_t idx = FASTCOVER_hashPtrToIndex(ctx->samples + activeSegment.end, f, d);
|
163
163
|
|
164
164
|
/* Add frequency of this index to score if this is the first occurence of index in active segment */
|
165
|
-
if (segmentFreqs[
|
166
|
-
activeSegment.score += freqs[
|
165
|
+
if (segmentFreqs[idx] == 0) {
|
166
|
+
activeSegment.score += freqs[idx];
|
167
167
|
}
|
168
168
|
/* Increment end of segment and segmentFreqs*/
|
169
169
|
activeSegment.end += 1;
|
170
|
-
segmentFreqs[
|
170
|
+
segmentFreqs[idx] += 1;
|
171
171
|
/* If the window is now too large, drop the first position */
|
172
172
|
if (activeSegment.end - activeSegment.begin == dmersInK + 1) {
|
173
173
|
/* Get hash value of the dmer to be eliminated from active segment */
|
@@ -309,7 +309,7 @@ FASTCOVER_ctx_init(FASTCOVER_ctx_t* ctx,
|
|
309
309
|
if (totalSamplesSize < MAX(d, sizeof(U64)) ||
|
310
310
|
totalSamplesSize >= (size_t)FASTCOVER_MAX_SAMPLES_SIZE) {
|
311
311
|
DISPLAYLEVEL(1, "Total samples size is too large (%u MB), maximum size is %u MB\n",
|
312
|
-
(
|
312
|
+
(unsigned)(totalSamplesSize >> 20), (FASTCOVER_MAX_SAMPLES_SIZE >> 20));
|
313
313
|
return 0;
|
314
314
|
}
|
315
315
|
|
@@ -328,9 +328,9 @@ FASTCOVER_ctx_init(FASTCOVER_ctx_t* ctx,
|
|
328
328
|
/* Zero the context */
|
329
329
|
memset(ctx, 0, sizeof(*ctx));
|
330
330
|
DISPLAYLEVEL(2, "Training on %u samples of total size %u\n", nbTrainSamples,
|
331
|
-
(
|
331
|
+
(unsigned)trainingSamplesSize);
|
332
332
|
DISPLAYLEVEL(2, "Testing on %u samples of total size %u\n", nbTestSamples,
|
333
|
-
(
|
333
|
+
(unsigned)testSamplesSize);
|
334
334
|
|
335
335
|
ctx->samples = samples;
|
336
336
|
ctx->samplesSizes = samplesSizes;
|
@@ -389,11 +389,11 @@ FASTCOVER_buildDictionary(const FASTCOVER_ctx_t* ctx,
|
|
389
389
|
/* Divide the data up into epochs of equal size.
|
390
390
|
* We will select at least one segment from each epoch.
|
391
391
|
*/
|
392
|
-
const
|
393
|
-
const
|
392
|
+
const unsigned epochs = MAX(1, (U32)(dictBufferCapacity / parameters.k));
|
393
|
+
const unsigned epochSize = (U32)(ctx->nbDmers / epochs);
|
394
394
|
size_t epoch;
|
395
|
-
DISPLAYLEVEL(2, "Breaking content into %u epochs of size %u\n",
|
396
|
-
|
395
|
+
DISPLAYLEVEL(2, "Breaking content into %u epochs of size %u\n",
|
396
|
+
epochs, epochSize);
|
397
397
|
/* Loop through the epochs until there are no more segments or the dictionary
|
398
398
|
* is full.
|
399
399
|
*/
|
@@ -423,7 +423,7 @@ FASTCOVER_buildDictionary(const FASTCOVER_ctx_t* ctx,
|
|
423
423
|
memcpy(dict + tail, ctx->samples + segment.begin, segmentSize);
|
424
424
|
DISPLAYUPDATE(
|
425
425
|
2, "\r%u%% ",
|
426
|
-
(
|
426
|
+
(unsigned)(((dictBufferCapacity - tail) * 100) / dictBufferCapacity));
|
427
427
|
}
|
428
428
|
DISPLAYLEVEL(2, "\r%79s\r", "");
|
429
429
|
return tail;
|
@@ -577,7 +577,7 @@ ZDICT_trainFromBuffer_fastCover(void* dictBuffer, size_t dictBufferCapacity,
|
|
577
577
|
samplesBuffer, samplesSizes, nbFinalizeSamples, coverParams.zParams);
|
578
578
|
if (!ZSTD_isError(dictionarySize)) {
|
579
579
|
DISPLAYLEVEL(2, "Constructed dictionary of size %u\n",
|
580
|
-
(
|
580
|
+
(unsigned)dictionarySize);
|
581
581
|
}
|
582
582
|
FASTCOVER_ctx_destroy(&ctx);
|
583
583
|
free(segmentFreqs);
|
@@ -702,7 +702,7 @@ ZDICT_optimizeTrainFromBuffer_fastCover(
|
|
702
702
|
}
|
703
703
|
/* Print status */
|
704
704
|
LOCALDISPLAYUPDATE(displayLevel, 2, "\r%u%% ",
|
705
|
-
(
|
705
|
+
(unsigned)((iteration * 100) / kIterations));
|
706
706
|
++iteration;
|
707
707
|
}
|
708
708
|
COVER_best_wait(&best);
|
@@ -255,15 +255,15 @@ static dictItem ZDICT_analyzePos(
|
|
255
255
|
}
|
256
256
|
|
257
257
|
{ int i;
|
258
|
-
U32
|
258
|
+
U32 mml;
|
259
259
|
U32 refinedStart = start;
|
260
260
|
U32 refinedEnd = end;
|
261
261
|
|
262
262
|
DISPLAYLEVEL(4, "\n");
|
263
|
-
DISPLAYLEVEL(4, "found %3u matches of length >= %i at pos %7u ", (
|
263
|
+
DISPLAYLEVEL(4, "found %3u matches of length >= %i at pos %7u ", (unsigned)(end-start), MINMATCHLENGTH, (unsigned)pos);
|
264
264
|
DISPLAYLEVEL(4, "\n");
|
265
265
|
|
266
|
-
for (
|
266
|
+
for (mml = MINMATCHLENGTH ; ; mml++) {
|
267
267
|
BYTE currentChar = 0;
|
268
268
|
U32 currentCount = 0;
|
269
269
|
U32 currentID = refinedStart;
|
@@ -271,13 +271,13 @@ static dictItem ZDICT_analyzePos(
|
|
271
271
|
U32 selectedCount = 0;
|
272
272
|
U32 selectedID = currentID;
|
273
273
|
for (id =refinedStart; id < refinedEnd; id++) {
|
274
|
-
if (b[suffix[id] +
|
274
|
+
if (b[suffix[id] + mml] != currentChar) {
|
275
275
|
if (currentCount > selectedCount) {
|
276
276
|
selectedCount = currentCount;
|
277
277
|
selectedID = currentID;
|
278
278
|
}
|
279
279
|
currentID = id;
|
280
|
-
currentChar = b[ suffix[id] +
|
280
|
+
currentChar = b[ suffix[id] + mml];
|
281
281
|
currentCount = 0;
|
282
282
|
}
|
283
283
|
currentCount ++;
|
@@ -342,7 +342,7 @@ static dictItem ZDICT_analyzePos(
|
|
342
342
|
savings[i] = savings[i-1] + (lengthList[i] * (i-3));
|
343
343
|
|
344
344
|
DISPLAYLEVEL(4, "Selected dict at position %u, of length %u : saves %u (ratio: %.2f) \n",
|
345
|
-
(
|
345
|
+
(unsigned)pos, (unsigned)maxLength, (unsigned)savings[maxLength], (double)savings[maxLength] / maxLength);
|
346
346
|
|
347
347
|
solution.pos = (U32)pos;
|
348
348
|
solution.length = (U32)maxLength;
|
@@ -497,7 +497,7 @@ static U32 ZDICT_dictSize(const dictItem* dictList)
|
|
497
497
|
static size_t ZDICT_trainBuffer_legacy(dictItem* dictList, U32 dictListSize,
|
498
498
|
const void* const buffer, size_t bufferSize, /* buffer must end with noisy guard band */
|
499
499
|
const size_t* fileSizes, unsigned nbFiles,
|
500
|
-
|
500
|
+
unsigned minRatio, U32 notificationLevel)
|
501
501
|
{
|
502
502
|
int* const suffix0 = (int*)malloc((bufferSize+2)*sizeof(*suffix0));
|
503
503
|
int* const suffix = suffix0+1;
|
@@ -523,11 +523,11 @@ static size_t ZDICT_trainBuffer_legacy(dictItem* dictList, U32 dictListSize,
|
|
523
523
|
memset(doneMarks, 0, bufferSize+16);
|
524
524
|
|
525
525
|
/* limit sample set size (divsufsort limitation)*/
|
526
|
-
if (bufferSize > ZDICT_MAX_SAMPLES_SIZE) DISPLAYLEVEL(3, "sample set too large : reduced to %u MB ...\n", (
|
526
|
+
if (bufferSize > ZDICT_MAX_SAMPLES_SIZE) DISPLAYLEVEL(3, "sample set too large : reduced to %u MB ...\n", (unsigned)(ZDICT_MAX_SAMPLES_SIZE>>20));
|
527
527
|
while (bufferSize > ZDICT_MAX_SAMPLES_SIZE) bufferSize -= fileSizes[--nbFiles];
|
528
528
|
|
529
529
|
/* sort */
|
530
|
-
DISPLAYLEVEL(2, "sorting %u files of total size %u MB ...\n", nbFiles, (
|
530
|
+
DISPLAYLEVEL(2, "sorting %u files of total size %u MB ...\n", nbFiles, (unsigned)(bufferSize>>20));
|
531
531
|
{ int const divSuftSortResult = divsufsort((const unsigned char*)buffer, suffix, (int)bufferSize, 0);
|
532
532
|
if (divSuftSortResult != 0) { result = ERROR(GENERIC); goto _cleanup; }
|
533
533
|
}
|
@@ -589,7 +589,7 @@ typedef struct
|
|
589
589
|
#define MAXREPOFFSET 1024
|
590
590
|
|
591
591
|
static void ZDICT_countEStats(EStats_ress_t esr, ZSTD_parameters params,
|
592
|
-
|
592
|
+
unsigned* countLit, unsigned* offsetcodeCount, unsigned* matchlengthCount, unsigned* litlengthCount, U32* repOffsets,
|
593
593
|
const void* src, size_t srcSize,
|
594
594
|
U32 notificationLevel)
|
595
595
|
{
|
@@ -602,7 +602,7 @@ static void ZDICT_countEStats(EStats_ress_t esr, ZSTD_parameters params,
|
|
602
602
|
|
603
603
|
}
|
604
604
|
cSize = ZSTD_compressBlock(esr.zc, esr.workPlace, ZSTD_BLOCKSIZE_MAX, src, srcSize);
|
605
|
-
if (ZSTD_isError(cSize)) { DISPLAYLEVEL(3, "warning : could not compress sample size %u \n", (
|
605
|
+
if (ZSTD_isError(cSize)) { DISPLAYLEVEL(3, "warning : could not compress sample size %u \n", (unsigned)srcSize); return; }
|
606
606
|
|
607
607
|
if (cSize) { /* if == 0; block is not compressible */
|
608
608
|
const seqStore_t* const seqStorePtr = ZSTD_getSeqStore(esr.zc);
|
@@ -671,7 +671,7 @@ static void ZDICT_insertSortCount(offsetCount_t table[ZSTD_REP_NUM+1], U32 val,
|
|
671
671
|
* rewrite `countLit` to contain a mostly flat but still compressible distribution of literals.
|
672
672
|
* necessary to avoid generating a non-compressible distribution that HUF_writeCTable() cannot encode.
|
673
673
|
*/
|
674
|
-
static void ZDICT_flatLit(
|
674
|
+
static void ZDICT_flatLit(unsigned* countLit)
|
675
675
|
{
|
676
676
|
int u;
|
677
677
|
for (u=1; u<256; u++) countLit[u] = 2;
|
@@ -687,14 +687,14 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|
687
687
|
const void* dictBuffer, size_t dictBufferSize,
|
688
688
|
unsigned notificationLevel)
|
689
689
|
{
|
690
|
-
|
690
|
+
unsigned countLit[256];
|
691
691
|
HUF_CREATE_STATIC_CTABLE(hufTable, 255);
|
692
|
-
|
692
|
+
unsigned offcodeCount[OFFCODE_MAX+1];
|
693
693
|
short offcodeNCount[OFFCODE_MAX+1];
|
694
694
|
U32 offcodeMax = ZSTD_highbit32((U32)(dictBufferSize + 128 KB));
|
695
|
-
|
695
|
+
unsigned matchLengthCount[MaxML+1];
|
696
696
|
short matchLengthNCount[MaxML+1];
|
697
|
-
|
697
|
+
unsigned litLengthCount[MaxLL+1];
|
698
698
|
short litLengthNCount[MaxLL+1];
|
699
699
|
U32 repOffset[MAXREPOFFSET];
|
700
700
|
offsetCount_t bestRepOffset[ZSTD_REP_NUM+1];
|
@@ -983,33 +983,33 @@ size_t ZDICT_trainFromBuffer_unsafe_legacy(
|
|
983
983
|
|
984
984
|
/* display best matches */
|
985
985
|
if (params.zParams.notificationLevel>= 3) {
|
986
|
-
|
987
|
-
|
988
|
-
|
989
|
-
DISPLAYLEVEL(3, "\n %u segments found, of total size %u \n", dictList[0].pos-1, dictContentSize);
|
986
|
+
unsigned const nb = MIN(25, dictList[0].pos);
|
987
|
+
unsigned const dictContentSize = ZDICT_dictSize(dictList);
|
988
|
+
unsigned u;
|
989
|
+
DISPLAYLEVEL(3, "\n %u segments found, of total size %u \n", (unsigned)dictList[0].pos-1, dictContentSize);
|
990
990
|
DISPLAYLEVEL(3, "list %u best segments \n", nb-1);
|
991
991
|
for (u=1; u<nb; u++) {
|
992
|
-
|
993
|
-
|
992
|
+
unsigned const pos = dictList[u].pos;
|
993
|
+
unsigned const length = dictList[u].length;
|
994
994
|
U32 const printedLength = MIN(40, length);
|
995
995
|
if ((pos > samplesBuffSize) || ((pos + length) > samplesBuffSize)) {
|
996
996
|
free(dictList);
|
997
997
|
return ERROR(GENERIC); /* should never happen */
|
998
998
|
}
|
999
999
|
DISPLAYLEVEL(3, "%3u:%3u bytes at pos %8u, savings %7u bytes |",
|
1000
|
-
u, length, pos, dictList[u].savings);
|
1000
|
+
u, length, pos, (unsigned)dictList[u].savings);
|
1001
1001
|
ZDICT_printHex((const char*)samplesBuffer+pos, printedLength);
|
1002
1002
|
DISPLAYLEVEL(3, "| \n");
|
1003
1003
|
} }
|
1004
1004
|
|
1005
1005
|
|
1006
1006
|
/* create dictionary */
|
1007
|
-
{
|
1007
|
+
{ unsigned dictContentSize = ZDICT_dictSize(dictList);
|
1008
1008
|
if (dictContentSize < ZDICT_CONTENTSIZE_MIN) { free(dictList); return ERROR(dictionaryCreation_failed); } /* dictionary content too small */
|
1009
1009
|
if (dictContentSize < targetDictSize/4) {
|
1010
|
-
DISPLAYLEVEL(2, "! warning : selected content significantly smaller than requested (%u < %u) \n", dictContentSize, (
|
1010
|
+
DISPLAYLEVEL(2, "! warning : selected content significantly smaller than requested (%u < %u) \n", dictContentSize, (unsigned)maxDictSize);
|
1011
1011
|
if (samplesBuffSize < 10 * targetDictSize)
|
1012
|
-
DISPLAYLEVEL(2, "! consider increasing the number of samples (total size : %u MB)\n", (
|
1012
|
+
DISPLAYLEVEL(2, "! consider increasing the number of samples (total size : %u MB)\n", (unsigned)(samplesBuffSize>>20));
|
1013
1013
|
if (minRep > MINRATIO) {
|
1014
1014
|
DISPLAYLEVEL(2, "! consider increasing selectivity to produce larger dictionary (-s%u) \n", selectivity+1);
|
1015
1015
|
DISPLAYLEVEL(2, "! note : larger dictionaries are not necessarily better, test its efficiency on samples \n");
|
@@ -1017,9 +1017,9 @@ size_t ZDICT_trainFromBuffer_unsafe_legacy(
|
|
1017
1017
|
}
|
1018
1018
|
|
1019
1019
|
if ((dictContentSize > targetDictSize*3) && (nbSamples > 2*MINRATIO) && (selectivity>1)) {
|
1020
|
-
|
1020
|
+
unsigned proposedSelectivity = selectivity-1;
|
1021
1021
|
while ((nbSamples >> proposedSelectivity) <= MINRATIO) { proposedSelectivity--; }
|
1022
|
-
DISPLAYLEVEL(2, "! note : calculated dictionary significantly larger than requested (%u > %u) \n", dictContentSize, (
|
1022
|
+
DISPLAYLEVEL(2, "! note : calculated dictionary significantly larger than requested (%u > %u) \n", dictContentSize, (unsigned)maxDictSize);
|
1023
1023
|
DISPLAYLEVEL(2, "! consider increasing dictionary size, or produce denser dictionary (-s%u) \n", proposedSelectivity);
|
1024
1024
|
DISPLAYLEVEL(2, "! always test dictionary efficiency on real samples \n");
|
1025
1025
|
}
|