zstd-ruby 1.3.7.0 → 1.3.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/ext/zstdruby/libzstd/BUCK +15 -2
- data/ext/zstdruby/libzstd/Makefile +37 -2
- data/ext/zstdruby/libzstd/README.md +67 -41
- data/ext/zstdruby/libzstd/common/bitstream.h +2 -2
- data/ext/zstdruby/libzstd/common/compiler.h +19 -12
- data/ext/zstdruby/libzstd/common/cpu.h +1 -1
- data/ext/zstdruby/libzstd/common/debug.h +22 -11
- data/ext/zstdruby/libzstd/common/error_private.c +6 -0
- data/ext/zstdruby/libzstd/common/fse.h +2 -2
- data/ext/zstdruby/libzstd/common/huf.h +25 -1
- data/ext/zstdruby/libzstd/common/pool.c +1 -1
- data/ext/zstdruby/libzstd/common/zstd_common.c +3 -1
- data/ext/zstdruby/libzstd/common/zstd_errors.h +1 -0
- data/ext/zstdruby/libzstd/common/zstd_internal.h +11 -2
- data/ext/zstdruby/libzstd/compress/fse_compress.c +3 -3
- data/ext/zstdruby/libzstd/compress/hist.c +19 -11
- data/ext/zstdruby/libzstd/compress/hist.h +11 -8
- data/ext/zstdruby/libzstd/compress/huf_compress.c +33 -31
- data/ext/zstdruby/libzstd/compress/zstd_compress.c +621 -371
- data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +90 -28
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +4 -4
- data/ext/zstdruby/libzstd/compress/zstd_fast.c +15 -15
- data/ext/zstdruby/libzstd/compress/zstd_lazy.c +25 -18
- data/ext/zstdruby/libzstd/compress/zstd_ldm.c +18 -67
- data/ext/zstdruby/libzstd/compress/zstd_ldm.h +2 -6
- data/ext/zstdruby/libzstd/compress/zstd_opt.c +133 -48
- data/ext/zstdruby/libzstd/compress/zstd_opt.h +8 -0
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +229 -73
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +18 -10
- data/ext/zstdruby/libzstd/decompress/huf_decompress.c +178 -42
- data/ext/zstdruby/libzstd/decompress/zstd_ddict.c +240 -0
- data/ext/zstdruby/libzstd/decompress/zstd_ddict.h +44 -0
- data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +244 -1680
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +1307 -0
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +59 -0
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +168 -0
- data/ext/zstdruby/libzstd/dictBuilder/cover.c +13 -11
- data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +15 -15
- data/ext/zstdruby/libzstd/dictBuilder/zdict.c +28 -28
- data/ext/zstdruby/libzstd/dll/libzstd.def +0 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v04.c +0 -10
- data/ext/zstdruby/libzstd/legacy/zstd_v05.c +15 -15
- data/ext/zstdruby/libzstd/zstd.h +1208 -968
- data/lib/zstd-ruby/version.rb +1 -1
- metadata +7 -2
@@ -0,0 +1,59 @@
|
|
1
|
+
/*
|
2
|
+
* Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
|
3
|
+
* All rights reserved.
|
4
|
+
*
|
5
|
+
* This source code is licensed under both the BSD-style license (found in the
|
6
|
+
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
7
|
+
* in the COPYING file in the root directory of this source tree).
|
8
|
+
* You may select, at your option, one of the above-listed licenses.
|
9
|
+
*/
|
10
|
+
|
11
|
+
|
12
|
+
#ifndef ZSTD_DEC_BLOCK_H
|
13
|
+
#define ZSTD_DEC_BLOCK_H
|
14
|
+
|
15
|
+
/*-*******************************************************
|
16
|
+
* Dependencies
|
17
|
+
*********************************************************/
|
18
|
+
#include <stddef.h> /* size_t */
|
19
|
+
#include "zstd.h" /* DCtx, and some public functions */
|
20
|
+
#include "zstd_internal.h" /* blockProperties_t, and some public functions */
|
21
|
+
#include "zstd_decompress_internal.h" /* ZSTD_seqSymbol */
|
22
|
+
|
23
|
+
|
24
|
+
/* === Prototypes === */
|
25
|
+
|
26
|
+
/* note: prototypes already published within `zstd.h` :
|
27
|
+
* ZSTD_decompressBlock()
|
28
|
+
*/
|
29
|
+
|
30
|
+
/* note: prototypes already published within `zstd_internal.h` :
|
31
|
+
* ZSTD_getcBlockSize()
|
32
|
+
* ZSTD_decodeSeqHeaders()
|
33
|
+
*/
|
34
|
+
|
35
|
+
|
36
|
+
/* ZSTD_decompressBlock_internal() :
|
37
|
+
* decompress block, starting at `src`,
|
38
|
+
* into destination buffer `dst`.
|
39
|
+
* @return : decompressed block size,
|
40
|
+
* or an error code (which can be tested using ZSTD_isError())
|
41
|
+
*/
|
42
|
+
size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
|
43
|
+
void* dst, size_t dstCapacity,
|
44
|
+
const void* src, size_t srcSize, const int frame);
|
45
|
+
|
46
|
+
/* ZSTD_buildFSETable() :
|
47
|
+
* generate FSE decoding table for one symbol (ll, ml or off)
|
48
|
+
* this function must be called with valid parameters only
|
49
|
+
* (dt is large enough, normalizedCounter distribution total is a power of 2, max is within range, etc.)
|
50
|
+
* in which case it cannot fail.
|
51
|
+
* Internal use only.
|
52
|
+
*/
|
53
|
+
void ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
|
54
|
+
const short* normalizedCounter, unsigned maxSymbolValue,
|
55
|
+
const U32* baseValue, const U32* nbAdditionalBits,
|
56
|
+
unsigned tableLog);
|
57
|
+
|
58
|
+
|
59
|
+
#endif /* ZSTD_DEC_BLOCK_H */
|
@@ -0,0 +1,168 @@
|
|
1
|
+
/*
|
2
|
+
* Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
|
3
|
+
* All rights reserved.
|
4
|
+
*
|
5
|
+
* This source code is licensed under both the BSD-style license (found in the
|
6
|
+
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
7
|
+
* in the COPYING file in the root directory of this source tree).
|
8
|
+
* You may select, at your option, one of the above-listed licenses.
|
9
|
+
*/
|
10
|
+
|
11
|
+
|
12
|
+
/* zstd_decompress_internal:
|
13
|
+
* objects and definitions shared within lib/decompress modules */
|
14
|
+
|
15
|
+
#ifndef ZSTD_DECOMPRESS_INTERNAL_H
|
16
|
+
#define ZSTD_DECOMPRESS_INTERNAL_H
|
17
|
+
|
18
|
+
|
19
|
+
/*-*******************************************************
|
20
|
+
* Dependencies
|
21
|
+
*********************************************************/
|
22
|
+
#include "mem.h" /* BYTE, U16, U32 */
|
23
|
+
#include "zstd_internal.h" /* ZSTD_seqSymbol */
|
24
|
+
|
25
|
+
|
26
|
+
|
27
|
+
/*-*******************************************************
|
28
|
+
* Constants
|
29
|
+
*********************************************************/
|
30
|
+
static const U32 LL_base[MaxLL+1] = {
|
31
|
+
0, 1, 2, 3, 4, 5, 6, 7,
|
32
|
+
8, 9, 10, 11, 12, 13, 14, 15,
|
33
|
+
16, 18, 20, 22, 24, 28, 32, 40,
|
34
|
+
48, 64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000,
|
35
|
+
0x2000, 0x4000, 0x8000, 0x10000 };
|
36
|
+
|
37
|
+
static const U32 OF_base[MaxOff+1] = {
|
38
|
+
0, 1, 1, 5, 0xD, 0x1D, 0x3D, 0x7D,
|
39
|
+
0xFD, 0x1FD, 0x3FD, 0x7FD, 0xFFD, 0x1FFD, 0x3FFD, 0x7FFD,
|
40
|
+
0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD,
|
41
|
+
0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD, 0x1FFFFFFD, 0x3FFFFFFD, 0x7FFFFFFD };
|
42
|
+
|
43
|
+
static const U32 OF_bits[MaxOff+1] = {
|
44
|
+
0, 1, 2, 3, 4, 5, 6, 7,
|
45
|
+
8, 9, 10, 11, 12, 13, 14, 15,
|
46
|
+
16, 17, 18, 19, 20, 21, 22, 23,
|
47
|
+
24, 25, 26, 27, 28, 29, 30, 31 };
|
48
|
+
|
49
|
+
static const U32 ML_base[MaxML+1] = {
|
50
|
+
3, 4, 5, 6, 7, 8, 9, 10,
|
51
|
+
11, 12, 13, 14, 15, 16, 17, 18,
|
52
|
+
19, 20, 21, 22, 23, 24, 25, 26,
|
53
|
+
27, 28, 29, 30, 31, 32, 33, 34,
|
54
|
+
35, 37, 39, 41, 43, 47, 51, 59,
|
55
|
+
67, 83, 99, 0x83, 0x103, 0x203, 0x403, 0x803,
|
56
|
+
0x1003, 0x2003, 0x4003, 0x8003, 0x10003 };
|
57
|
+
|
58
|
+
|
59
|
+
/*-*******************************************************
|
60
|
+
* Decompression types
|
61
|
+
*********************************************************/
|
62
|
+
typedef struct {
|
63
|
+
U32 fastMode;
|
64
|
+
U32 tableLog;
|
65
|
+
} ZSTD_seqSymbol_header;
|
66
|
+
|
67
|
+
typedef struct {
|
68
|
+
U16 nextState;
|
69
|
+
BYTE nbAdditionalBits;
|
70
|
+
BYTE nbBits;
|
71
|
+
U32 baseValue;
|
72
|
+
} ZSTD_seqSymbol;
|
73
|
+
|
74
|
+
#define SEQSYMBOL_TABLE_SIZE(log) (1 + (1 << (log)))
|
75
|
+
|
76
|
+
typedef struct {
|
77
|
+
ZSTD_seqSymbol LLTable[SEQSYMBOL_TABLE_SIZE(LLFSELog)]; /* Note : Space reserved for FSE Tables */
|
78
|
+
ZSTD_seqSymbol OFTable[SEQSYMBOL_TABLE_SIZE(OffFSELog)]; /* is also used as temporary workspace while building hufTable during DDict creation */
|
79
|
+
ZSTD_seqSymbol MLTable[SEQSYMBOL_TABLE_SIZE(MLFSELog)]; /* and therefore must be at least HUF_DECOMPRESS_WORKSPACE_SIZE large */
|
80
|
+
HUF_DTable hufTable[HUF_DTABLE_SIZE(HufLog)]; /* can accommodate HUF_decompress4X */
|
81
|
+
U32 rep[ZSTD_REP_NUM];
|
82
|
+
} ZSTD_entropyDTables_t;
|
83
|
+
|
84
|
+
typedef enum { ZSTDds_getFrameHeaderSize, ZSTDds_decodeFrameHeader,
|
85
|
+
ZSTDds_decodeBlockHeader, ZSTDds_decompressBlock,
|
86
|
+
ZSTDds_decompressLastBlock, ZSTDds_checkChecksum,
|
87
|
+
ZSTDds_decodeSkippableHeader, ZSTDds_skipFrame } ZSTD_dStage;
|
88
|
+
|
89
|
+
typedef enum { zdss_init=0, zdss_loadHeader,
|
90
|
+
zdss_read, zdss_load, zdss_flush } ZSTD_dStreamStage;
|
91
|
+
|
92
|
+
struct ZSTD_DCtx_s
|
93
|
+
{
|
94
|
+
const ZSTD_seqSymbol* LLTptr;
|
95
|
+
const ZSTD_seqSymbol* MLTptr;
|
96
|
+
const ZSTD_seqSymbol* OFTptr;
|
97
|
+
const HUF_DTable* HUFptr;
|
98
|
+
ZSTD_entropyDTables_t entropy;
|
99
|
+
U32 workspace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; /* space needed when building huffman tables */
|
100
|
+
const void* previousDstEnd; /* detect continuity */
|
101
|
+
const void* prefixStart; /* start of current segment */
|
102
|
+
const void* virtualStart; /* virtual start of previous segment if it was just before current one */
|
103
|
+
const void* dictEnd; /* end of previous segment */
|
104
|
+
size_t expected;
|
105
|
+
ZSTD_frameHeader fParams;
|
106
|
+
U64 decodedSize;
|
107
|
+
blockType_e bType; /* used in ZSTD_decompressContinue(), store blockType between block header decoding and block decompression stages */
|
108
|
+
ZSTD_dStage stage;
|
109
|
+
U32 litEntropy;
|
110
|
+
U32 fseEntropy;
|
111
|
+
XXH64_state_t xxhState;
|
112
|
+
size_t headerSize;
|
113
|
+
ZSTD_format_e format;
|
114
|
+
const BYTE* litPtr;
|
115
|
+
ZSTD_customMem customMem;
|
116
|
+
size_t litSize;
|
117
|
+
size_t rleSize;
|
118
|
+
size_t staticSize;
|
119
|
+
int bmi2; /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */
|
120
|
+
|
121
|
+
/* dictionary */
|
122
|
+
ZSTD_DDict* ddictLocal;
|
123
|
+
const ZSTD_DDict* ddict; /* set by ZSTD_initDStream_usingDDict(), or ZSTD_DCtx_refDDict() */
|
124
|
+
U32 dictID;
|
125
|
+
int ddictIsCold; /* if == 1 : dictionary is "new" for working context, and presumed "cold" (not in cpu cache) */
|
126
|
+
|
127
|
+
/* streaming */
|
128
|
+
ZSTD_dStreamStage streamStage;
|
129
|
+
char* inBuff;
|
130
|
+
size_t inBuffSize;
|
131
|
+
size_t inPos;
|
132
|
+
size_t maxWindowSize;
|
133
|
+
char* outBuff;
|
134
|
+
size_t outBuffSize;
|
135
|
+
size_t outStart;
|
136
|
+
size_t outEnd;
|
137
|
+
size_t lhSize;
|
138
|
+
void* legacyContext;
|
139
|
+
U32 previousLegacyVersion;
|
140
|
+
U32 legacyVersion;
|
141
|
+
U32 hostageByte;
|
142
|
+
int noForwardProgress;
|
143
|
+
|
144
|
+
/* workspace */
|
145
|
+
BYTE litBuffer[ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH];
|
146
|
+
BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX];
|
147
|
+
}; /* typedef'd to ZSTD_DCtx within "zstd.h" */
|
148
|
+
|
149
|
+
|
150
|
+
/*-*******************************************************
|
151
|
+
* Shared internal functions
|
152
|
+
*********************************************************/
|
153
|
+
|
154
|
+
/*! ZSTD_loadDEntropy() :
|
155
|
+
* dict : must point at beginning of a valid zstd dictionary.
|
156
|
+
* @return : size of entropy tables read */
|
157
|
+
size_t ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy,
|
158
|
+
const void* const dict, size_t const dictSize);
|
159
|
+
|
160
|
+
/*! ZSTD_checkContinuity() :
|
161
|
+
* check if next `dst` follows previous position, where decompression ended.
|
162
|
+
* If yes, do nothing (continue on current segment).
|
163
|
+
* If not, classify previous segment as "external dictionary", and start a new segment.
|
164
|
+
* This function cannot fail. */
|
165
|
+
void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst);
|
166
|
+
|
167
|
+
|
168
|
+
#endif /* ZSTD_DECOMPRESS_INTERNAL_H */
|
@@ -39,7 +39,7 @@
|
|
39
39
|
/*-*************************************
|
40
40
|
* Constants
|
41
41
|
***************************************/
|
42
|
-
#define COVER_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((
|
42
|
+
#define COVER_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((unsigned)-1) : ((unsigned)1 GB))
|
43
43
|
#define DEFAULT_SPLITPOINT 1.0
|
44
44
|
|
45
45
|
/*-*************************************
|
@@ -543,7 +543,7 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
|
|
543
543
|
if (totalSamplesSize < MAX(d, sizeof(U64)) ||
|
544
544
|
totalSamplesSize >= (size_t)COVER_MAX_SAMPLES_SIZE) {
|
545
545
|
DISPLAYLEVEL(1, "Total samples size is too large (%u MB), maximum size is %u MB\n",
|
546
|
-
(
|
546
|
+
(unsigned)(totalSamplesSize>>20), (COVER_MAX_SAMPLES_SIZE >> 20));
|
547
547
|
return 0;
|
548
548
|
}
|
549
549
|
/* Check if there are at least 5 training samples */
|
@@ -559,9 +559,9 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
|
|
559
559
|
/* Zero the context */
|
560
560
|
memset(ctx, 0, sizeof(*ctx));
|
561
561
|
DISPLAYLEVEL(2, "Training on %u samples of total size %u\n", nbTrainSamples,
|
562
|
-
(
|
562
|
+
(unsigned)trainingSamplesSize);
|
563
563
|
DISPLAYLEVEL(2, "Testing on %u samples of total size %u\n", nbTestSamples,
|
564
|
-
(
|
564
|
+
(unsigned)testSamplesSize);
|
565
565
|
ctx->samples = samples;
|
566
566
|
ctx->samplesSizes = samplesSizes;
|
567
567
|
ctx->nbSamples = nbSamples;
|
@@ -639,11 +639,11 @@ static size_t COVER_buildDictionary(const COVER_ctx_t *ctx, U32 *freqs,
|
|
639
639
|
/* Divide the data up into epochs of equal size.
|
640
640
|
* We will select at least one segment from each epoch.
|
641
641
|
*/
|
642
|
-
const
|
643
|
-
const
|
642
|
+
const unsigned epochs = MAX(1, (U32)(dictBufferCapacity / parameters.k / 4));
|
643
|
+
const unsigned epochSize = (U32)(ctx->suffixSize / epochs);
|
644
644
|
size_t epoch;
|
645
|
-
DISPLAYLEVEL(2, "Breaking content into %u epochs of size %u\n",
|
646
|
-
|
645
|
+
DISPLAYLEVEL(2, "Breaking content into %u epochs of size %u\n",
|
646
|
+
epochs, epochSize);
|
647
647
|
/* Loop through the epochs until there are no more segments or the dictionary
|
648
648
|
* is full.
|
649
649
|
*/
|
@@ -670,7 +670,7 @@ static size_t COVER_buildDictionary(const COVER_ctx_t *ctx, U32 *freqs,
|
|
670
670
|
memcpy(dict + tail, ctx->samples + segment.begin, segmentSize);
|
671
671
|
DISPLAYUPDATE(
|
672
672
|
2, "\r%u%% ",
|
673
|
-
(
|
673
|
+
(unsigned)(((dictBufferCapacity - tail) * 100) / dictBufferCapacity));
|
674
674
|
}
|
675
675
|
DISPLAYLEVEL(2, "\r%79s\r", "");
|
676
676
|
return tail;
|
@@ -722,7 +722,7 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
|
|
722
722
|
samplesBuffer, samplesSizes, nbSamples, parameters.zParams);
|
723
723
|
if (!ZSTD_isError(dictionarySize)) {
|
724
724
|
DISPLAYLEVEL(2, "Constructed dictionary of size %u\n",
|
725
|
-
(
|
725
|
+
(unsigned)dictionarySize);
|
726
726
|
}
|
727
727
|
COVER_ctx_destroy(&ctx);
|
728
728
|
COVER_map_destroy(&activeDmers);
|
@@ -868,6 +868,8 @@ void COVER_best_finish(COVER_best_t *best, size_t compressedSize,
|
|
868
868
|
if (!best->dict) {
|
869
869
|
best->compressedSize = ERROR(GENERIC);
|
870
870
|
best->dictSize = 0;
|
871
|
+
ZSTD_pthread_cond_signal(&best->cond);
|
872
|
+
ZSTD_pthread_mutex_unlock(&best->mutex);
|
871
873
|
return;
|
872
874
|
}
|
873
875
|
}
|
@@ -1054,7 +1056,7 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
|
|
1054
1056
|
}
|
1055
1057
|
/* Print status */
|
1056
1058
|
LOCALDISPLAYUPDATE(displayLevel, 2, "\r%u%% ",
|
1057
|
-
(
|
1059
|
+
(unsigned)((iteration * 100) / kIterations));
|
1058
1060
|
++iteration;
|
1059
1061
|
}
|
1060
1062
|
COVER_best_wait(&best);
|
@@ -20,7 +20,7 @@
|
|
20
20
|
/*-*************************************
|
21
21
|
* Constants
|
22
22
|
***************************************/
|
23
|
-
#define FASTCOVER_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((
|
23
|
+
#define FASTCOVER_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((unsigned)-1) : ((unsigned)1 GB))
|
24
24
|
#define FASTCOVER_MAX_F 31
|
25
25
|
#define FASTCOVER_MAX_ACCEL 10
|
26
26
|
#define DEFAULT_SPLITPOINT 0.75
|
@@ -159,15 +159,15 @@ static COVER_segment_t FASTCOVER_selectSegment(const FASTCOVER_ctx_t *ctx,
|
|
159
159
|
*/
|
160
160
|
while (activeSegment.end < end) {
|
161
161
|
/* Get hash value of current dmer */
|
162
|
-
const size_t
|
162
|
+
const size_t idx = FASTCOVER_hashPtrToIndex(ctx->samples + activeSegment.end, f, d);
|
163
163
|
|
164
164
|
/* Add frequency of this index to score if this is the first occurence of index in active segment */
|
165
|
-
if (segmentFreqs[
|
166
|
-
activeSegment.score += freqs[
|
165
|
+
if (segmentFreqs[idx] == 0) {
|
166
|
+
activeSegment.score += freqs[idx];
|
167
167
|
}
|
168
168
|
/* Increment end of segment and segmentFreqs*/
|
169
169
|
activeSegment.end += 1;
|
170
|
-
segmentFreqs[
|
170
|
+
segmentFreqs[idx] += 1;
|
171
171
|
/* If the window is now too large, drop the first position */
|
172
172
|
if (activeSegment.end - activeSegment.begin == dmersInK + 1) {
|
173
173
|
/* Get hash value of the dmer to be eliminated from active segment */
|
@@ -309,7 +309,7 @@ FASTCOVER_ctx_init(FASTCOVER_ctx_t* ctx,
|
|
309
309
|
if (totalSamplesSize < MAX(d, sizeof(U64)) ||
|
310
310
|
totalSamplesSize >= (size_t)FASTCOVER_MAX_SAMPLES_SIZE) {
|
311
311
|
DISPLAYLEVEL(1, "Total samples size is too large (%u MB), maximum size is %u MB\n",
|
312
|
-
(
|
312
|
+
(unsigned)(totalSamplesSize >> 20), (FASTCOVER_MAX_SAMPLES_SIZE >> 20));
|
313
313
|
return 0;
|
314
314
|
}
|
315
315
|
|
@@ -328,9 +328,9 @@ FASTCOVER_ctx_init(FASTCOVER_ctx_t* ctx,
|
|
328
328
|
/* Zero the context */
|
329
329
|
memset(ctx, 0, sizeof(*ctx));
|
330
330
|
DISPLAYLEVEL(2, "Training on %u samples of total size %u\n", nbTrainSamples,
|
331
|
-
(
|
331
|
+
(unsigned)trainingSamplesSize);
|
332
332
|
DISPLAYLEVEL(2, "Testing on %u samples of total size %u\n", nbTestSamples,
|
333
|
-
(
|
333
|
+
(unsigned)testSamplesSize);
|
334
334
|
|
335
335
|
ctx->samples = samples;
|
336
336
|
ctx->samplesSizes = samplesSizes;
|
@@ -389,11 +389,11 @@ FASTCOVER_buildDictionary(const FASTCOVER_ctx_t* ctx,
|
|
389
389
|
/* Divide the data up into epochs of equal size.
|
390
390
|
* We will select at least one segment from each epoch.
|
391
391
|
*/
|
392
|
-
const
|
393
|
-
const
|
392
|
+
const unsigned epochs = MAX(1, (U32)(dictBufferCapacity / parameters.k));
|
393
|
+
const unsigned epochSize = (U32)(ctx->nbDmers / epochs);
|
394
394
|
size_t epoch;
|
395
|
-
DISPLAYLEVEL(2, "Breaking content into %u epochs of size %u\n",
|
396
|
-
|
395
|
+
DISPLAYLEVEL(2, "Breaking content into %u epochs of size %u\n",
|
396
|
+
epochs, epochSize);
|
397
397
|
/* Loop through the epochs until there are no more segments or the dictionary
|
398
398
|
* is full.
|
399
399
|
*/
|
@@ -423,7 +423,7 @@ FASTCOVER_buildDictionary(const FASTCOVER_ctx_t* ctx,
|
|
423
423
|
memcpy(dict + tail, ctx->samples + segment.begin, segmentSize);
|
424
424
|
DISPLAYUPDATE(
|
425
425
|
2, "\r%u%% ",
|
426
|
-
(
|
426
|
+
(unsigned)(((dictBufferCapacity - tail) * 100) / dictBufferCapacity));
|
427
427
|
}
|
428
428
|
DISPLAYLEVEL(2, "\r%79s\r", "");
|
429
429
|
return tail;
|
@@ -577,7 +577,7 @@ ZDICT_trainFromBuffer_fastCover(void* dictBuffer, size_t dictBufferCapacity,
|
|
577
577
|
samplesBuffer, samplesSizes, nbFinalizeSamples, coverParams.zParams);
|
578
578
|
if (!ZSTD_isError(dictionarySize)) {
|
579
579
|
DISPLAYLEVEL(2, "Constructed dictionary of size %u\n",
|
580
|
-
(
|
580
|
+
(unsigned)dictionarySize);
|
581
581
|
}
|
582
582
|
FASTCOVER_ctx_destroy(&ctx);
|
583
583
|
free(segmentFreqs);
|
@@ -702,7 +702,7 @@ ZDICT_optimizeTrainFromBuffer_fastCover(
|
|
702
702
|
}
|
703
703
|
/* Print status */
|
704
704
|
LOCALDISPLAYUPDATE(displayLevel, 2, "\r%u%% ",
|
705
|
-
(
|
705
|
+
(unsigned)((iteration * 100) / kIterations));
|
706
706
|
++iteration;
|
707
707
|
}
|
708
708
|
COVER_best_wait(&best);
|
@@ -255,15 +255,15 @@ static dictItem ZDICT_analyzePos(
|
|
255
255
|
}
|
256
256
|
|
257
257
|
{ int i;
|
258
|
-
U32
|
258
|
+
U32 mml;
|
259
259
|
U32 refinedStart = start;
|
260
260
|
U32 refinedEnd = end;
|
261
261
|
|
262
262
|
DISPLAYLEVEL(4, "\n");
|
263
|
-
DISPLAYLEVEL(4, "found %3u matches of length >= %i at pos %7u ", (
|
263
|
+
DISPLAYLEVEL(4, "found %3u matches of length >= %i at pos %7u ", (unsigned)(end-start), MINMATCHLENGTH, (unsigned)pos);
|
264
264
|
DISPLAYLEVEL(4, "\n");
|
265
265
|
|
266
|
-
for (
|
266
|
+
for (mml = MINMATCHLENGTH ; ; mml++) {
|
267
267
|
BYTE currentChar = 0;
|
268
268
|
U32 currentCount = 0;
|
269
269
|
U32 currentID = refinedStart;
|
@@ -271,13 +271,13 @@ static dictItem ZDICT_analyzePos(
|
|
271
271
|
U32 selectedCount = 0;
|
272
272
|
U32 selectedID = currentID;
|
273
273
|
for (id =refinedStart; id < refinedEnd; id++) {
|
274
|
-
if (b[suffix[id] +
|
274
|
+
if (b[suffix[id] + mml] != currentChar) {
|
275
275
|
if (currentCount > selectedCount) {
|
276
276
|
selectedCount = currentCount;
|
277
277
|
selectedID = currentID;
|
278
278
|
}
|
279
279
|
currentID = id;
|
280
|
-
currentChar = b[ suffix[id] +
|
280
|
+
currentChar = b[ suffix[id] + mml];
|
281
281
|
currentCount = 0;
|
282
282
|
}
|
283
283
|
currentCount ++;
|
@@ -342,7 +342,7 @@ static dictItem ZDICT_analyzePos(
|
|
342
342
|
savings[i] = savings[i-1] + (lengthList[i] * (i-3));
|
343
343
|
|
344
344
|
DISPLAYLEVEL(4, "Selected dict at position %u, of length %u : saves %u (ratio: %.2f) \n",
|
345
|
-
(
|
345
|
+
(unsigned)pos, (unsigned)maxLength, (unsigned)savings[maxLength], (double)savings[maxLength] / maxLength);
|
346
346
|
|
347
347
|
solution.pos = (U32)pos;
|
348
348
|
solution.length = (U32)maxLength;
|
@@ -497,7 +497,7 @@ static U32 ZDICT_dictSize(const dictItem* dictList)
|
|
497
497
|
static size_t ZDICT_trainBuffer_legacy(dictItem* dictList, U32 dictListSize,
|
498
498
|
const void* const buffer, size_t bufferSize, /* buffer must end with noisy guard band */
|
499
499
|
const size_t* fileSizes, unsigned nbFiles,
|
500
|
-
|
500
|
+
unsigned minRatio, U32 notificationLevel)
|
501
501
|
{
|
502
502
|
int* const suffix0 = (int*)malloc((bufferSize+2)*sizeof(*suffix0));
|
503
503
|
int* const suffix = suffix0+1;
|
@@ -523,11 +523,11 @@ static size_t ZDICT_trainBuffer_legacy(dictItem* dictList, U32 dictListSize,
|
|
523
523
|
memset(doneMarks, 0, bufferSize+16);
|
524
524
|
|
525
525
|
/* limit sample set size (divsufsort limitation)*/
|
526
|
-
if (bufferSize > ZDICT_MAX_SAMPLES_SIZE) DISPLAYLEVEL(3, "sample set too large : reduced to %u MB ...\n", (
|
526
|
+
if (bufferSize > ZDICT_MAX_SAMPLES_SIZE) DISPLAYLEVEL(3, "sample set too large : reduced to %u MB ...\n", (unsigned)(ZDICT_MAX_SAMPLES_SIZE>>20));
|
527
527
|
while (bufferSize > ZDICT_MAX_SAMPLES_SIZE) bufferSize -= fileSizes[--nbFiles];
|
528
528
|
|
529
529
|
/* sort */
|
530
|
-
DISPLAYLEVEL(2, "sorting %u files of total size %u MB ...\n", nbFiles, (
|
530
|
+
DISPLAYLEVEL(2, "sorting %u files of total size %u MB ...\n", nbFiles, (unsigned)(bufferSize>>20));
|
531
531
|
{ int const divSuftSortResult = divsufsort((const unsigned char*)buffer, suffix, (int)bufferSize, 0);
|
532
532
|
if (divSuftSortResult != 0) { result = ERROR(GENERIC); goto _cleanup; }
|
533
533
|
}
|
@@ -589,7 +589,7 @@ typedef struct
|
|
589
589
|
#define MAXREPOFFSET 1024
|
590
590
|
|
591
591
|
static void ZDICT_countEStats(EStats_ress_t esr, ZSTD_parameters params,
|
592
|
-
|
592
|
+
unsigned* countLit, unsigned* offsetcodeCount, unsigned* matchlengthCount, unsigned* litlengthCount, U32* repOffsets,
|
593
593
|
const void* src, size_t srcSize,
|
594
594
|
U32 notificationLevel)
|
595
595
|
{
|
@@ -602,7 +602,7 @@ static void ZDICT_countEStats(EStats_ress_t esr, ZSTD_parameters params,
|
|
602
602
|
|
603
603
|
}
|
604
604
|
cSize = ZSTD_compressBlock(esr.zc, esr.workPlace, ZSTD_BLOCKSIZE_MAX, src, srcSize);
|
605
|
-
if (ZSTD_isError(cSize)) { DISPLAYLEVEL(3, "warning : could not compress sample size %u \n", (
|
605
|
+
if (ZSTD_isError(cSize)) { DISPLAYLEVEL(3, "warning : could not compress sample size %u \n", (unsigned)srcSize); return; }
|
606
606
|
|
607
607
|
if (cSize) { /* if == 0; block is not compressible */
|
608
608
|
const seqStore_t* const seqStorePtr = ZSTD_getSeqStore(esr.zc);
|
@@ -671,7 +671,7 @@ static void ZDICT_insertSortCount(offsetCount_t table[ZSTD_REP_NUM+1], U32 val,
|
|
671
671
|
* rewrite `countLit` to contain a mostly flat but still compressible distribution of literals.
|
672
672
|
* necessary to avoid generating a non-compressible distribution that HUF_writeCTable() cannot encode.
|
673
673
|
*/
|
674
|
-
static void ZDICT_flatLit(
|
674
|
+
static void ZDICT_flatLit(unsigned* countLit)
|
675
675
|
{
|
676
676
|
int u;
|
677
677
|
for (u=1; u<256; u++) countLit[u] = 2;
|
@@ -687,14 +687,14 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|
687
687
|
const void* dictBuffer, size_t dictBufferSize,
|
688
688
|
unsigned notificationLevel)
|
689
689
|
{
|
690
|
-
|
690
|
+
unsigned countLit[256];
|
691
691
|
HUF_CREATE_STATIC_CTABLE(hufTable, 255);
|
692
|
-
|
692
|
+
unsigned offcodeCount[OFFCODE_MAX+1];
|
693
693
|
short offcodeNCount[OFFCODE_MAX+1];
|
694
694
|
U32 offcodeMax = ZSTD_highbit32((U32)(dictBufferSize + 128 KB));
|
695
|
-
|
695
|
+
unsigned matchLengthCount[MaxML+1];
|
696
696
|
short matchLengthNCount[MaxML+1];
|
697
|
-
|
697
|
+
unsigned litLengthCount[MaxLL+1];
|
698
698
|
short litLengthNCount[MaxLL+1];
|
699
699
|
U32 repOffset[MAXREPOFFSET];
|
700
700
|
offsetCount_t bestRepOffset[ZSTD_REP_NUM+1];
|
@@ -983,33 +983,33 @@ size_t ZDICT_trainFromBuffer_unsafe_legacy(
|
|
983
983
|
|
984
984
|
/* display best matches */
|
985
985
|
if (params.zParams.notificationLevel>= 3) {
|
986
|
-
|
987
|
-
|
988
|
-
|
989
|
-
DISPLAYLEVEL(3, "\n %u segments found, of total size %u \n", dictList[0].pos-1, dictContentSize);
|
986
|
+
unsigned const nb = MIN(25, dictList[0].pos);
|
987
|
+
unsigned const dictContentSize = ZDICT_dictSize(dictList);
|
988
|
+
unsigned u;
|
989
|
+
DISPLAYLEVEL(3, "\n %u segments found, of total size %u \n", (unsigned)dictList[0].pos-1, dictContentSize);
|
990
990
|
DISPLAYLEVEL(3, "list %u best segments \n", nb-1);
|
991
991
|
for (u=1; u<nb; u++) {
|
992
|
-
|
993
|
-
|
992
|
+
unsigned const pos = dictList[u].pos;
|
993
|
+
unsigned const length = dictList[u].length;
|
994
994
|
U32 const printedLength = MIN(40, length);
|
995
995
|
if ((pos > samplesBuffSize) || ((pos + length) > samplesBuffSize)) {
|
996
996
|
free(dictList);
|
997
997
|
return ERROR(GENERIC); /* should never happen */
|
998
998
|
}
|
999
999
|
DISPLAYLEVEL(3, "%3u:%3u bytes at pos %8u, savings %7u bytes |",
|
1000
|
-
u, length, pos, dictList[u].savings);
|
1000
|
+
u, length, pos, (unsigned)dictList[u].savings);
|
1001
1001
|
ZDICT_printHex((const char*)samplesBuffer+pos, printedLength);
|
1002
1002
|
DISPLAYLEVEL(3, "| \n");
|
1003
1003
|
} }
|
1004
1004
|
|
1005
1005
|
|
1006
1006
|
/* create dictionary */
|
1007
|
-
{
|
1007
|
+
{ unsigned dictContentSize = ZDICT_dictSize(dictList);
|
1008
1008
|
if (dictContentSize < ZDICT_CONTENTSIZE_MIN) { free(dictList); return ERROR(dictionaryCreation_failed); } /* dictionary content too small */
|
1009
1009
|
if (dictContentSize < targetDictSize/4) {
|
1010
|
-
DISPLAYLEVEL(2, "! warning : selected content significantly smaller than requested (%u < %u) \n", dictContentSize, (
|
1010
|
+
DISPLAYLEVEL(2, "! warning : selected content significantly smaller than requested (%u < %u) \n", dictContentSize, (unsigned)maxDictSize);
|
1011
1011
|
if (samplesBuffSize < 10 * targetDictSize)
|
1012
|
-
DISPLAYLEVEL(2, "! consider increasing the number of samples (total size : %u MB)\n", (
|
1012
|
+
DISPLAYLEVEL(2, "! consider increasing the number of samples (total size : %u MB)\n", (unsigned)(samplesBuffSize>>20));
|
1013
1013
|
if (minRep > MINRATIO) {
|
1014
1014
|
DISPLAYLEVEL(2, "! consider increasing selectivity to produce larger dictionary (-s%u) \n", selectivity+1);
|
1015
1015
|
DISPLAYLEVEL(2, "! note : larger dictionaries are not necessarily better, test its efficiency on samples \n");
|
@@ -1017,9 +1017,9 @@ size_t ZDICT_trainFromBuffer_unsafe_legacy(
|
|
1017
1017
|
}
|
1018
1018
|
|
1019
1019
|
if ((dictContentSize > targetDictSize*3) && (nbSamples > 2*MINRATIO) && (selectivity>1)) {
|
1020
|
-
|
1020
|
+
unsigned proposedSelectivity = selectivity-1;
|
1021
1021
|
while ((nbSamples >> proposedSelectivity) <= MINRATIO) { proposedSelectivity--; }
|
1022
|
-
DISPLAYLEVEL(2, "! note : calculated dictionary significantly larger than requested (%u > %u) \n", dictContentSize, (
|
1022
|
+
DISPLAYLEVEL(2, "! note : calculated dictionary significantly larger than requested (%u > %u) \n", dictContentSize, (unsigned)maxDictSize);
|
1023
1023
|
DISPLAYLEVEL(2, "! consider increasing dictionary size, or produce denser dictionary (-s%u) \n", proposedSelectivity);
|
1024
1024
|
DISPLAYLEVEL(2, "! always test dictionary efficiency on real samples \n");
|
1025
1025
|
}
|