extzstd 0.3.1 → 0.3.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (113) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +28 -14
  3. data/contrib/zstd/CHANGELOG +301 -56
  4. data/contrib/zstd/CONTRIBUTING.md +169 -72
  5. data/contrib/zstd/LICENSE +4 -4
  6. data/contrib/zstd/Makefile +116 -87
  7. data/contrib/zstd/Package.swift +36 -0
  8. data/contrib/zstd/README.md +62 -32
  9. data/contrib/zstd/TESTING.md +2 -3
  10. data/contrib/zstd/appveyor.yml +52 -136
  11. data/contrib/zstd/lib/BUCK +5 -7
  12. data/contrib/zstd/lib/Makefile +225 -222
  13. data/contrib/zstd/lib/README.md +51 -6
  14. data/contrib/zstd/lib/common/allocations.h +55 -0
  15. data/contrib/zstd/lib/common/bits.h +200 -0
  16. data/contrib/zstd/lib/common/bitstream.h +45 -62
  17. data/contrib/zstd/lib/common/compiler.h +205 -22
  18. data/contrib/zstd/lib/common/cpu.h +1 -3
  19. data/contrib/zstd/lib/common/debug.c +1 -1
  20. data/contrib/zstd/lib/common/debug.h +12 -19
  21. data/contrib/zstd/lib/common/entropy_common.c +172 -48
  22. data/contrib/zstd/lib/common/error_private.c +10 -2
  23. data/contrib/zstd/lib/common/error_private.h +82 -3
  24. data/contrib/zstd/lib/common/fse.h +37 -86
  25. data/contrib/zstd/lib/common/fse_decompress.c +117 -92
  26. data/contrib/zstd/lib/common/huf.h +99 -166
  27. data/contrib/zstd/lib/common/mem.h +124 -142
  28. data/contrib/zstd/lib/common/pool.c +54 -27
  29. data/contrib/zstd/lib/common/pool.h +10 -4
  30. data/contrib/zstd/lib/common/portability_macros.h +156 -0
  31. data/contrib/zstd/lib/common/threading.c +74 -19
  32. data/contrib/zstd/lib/common/threading.h +5 -10
  33. data/contrib/zstd/lib/common/xxhash.c +7 -847
  34. data/contrib/zstd/lib/common/xxhash.h +5568 -167
  35. data/contrib/zstd/lib/common/zstd_common.c +2 -37
  36. data/contrib/zstd/lib/common/zstd_deps.h +111 -0
  37. data/contrib/zstd/lib/common/zstd_internal.h +132 -187
  38. data/contrib/zstd/lib/common/zstd_trace.h +163 -0
  39. data/contrib/zstd/lib/compress/clevels.h +134 -0
  40. data/contrib/zstd/lib/compress/fse_compress.c +83 -157
  41. data/contrib/zstd/lib/compress/hist.c +27 -29
  42. data/contrib/zstd/lib/compress/hist.h +2 -2
  43. data/contrib/zstd/lib/compress/huf_compress.c +916 -279
  44. data/contrib/zstd/lib/compress/zstd_compress.c +3773 -1019
  45. data/contrib/zstd/lib/compress/zstd_compress_internal.h +610 -203
  46. data/contrib/zstd/lib/compress/zstd_compress_literals.c +119 -42
  47. data/contrib/zstd/lib/compress/zstd_compress_literals.h +16 -6
  48. data/contrib/zstd/lib/compress/zstd_compress_sequences.c +42 -19
  49. data/contrib/zstd/lib/compress/zstd_compress_sequences.h +1 -1
  50. data/contrib/zstd/lib/compress/zstd_compress_superblock.c +49 -317
  51. data/contrib/zstd/lib/compress/zstd_compress_superblock.h +1 -1
  52. data/contrib/zstd/lib/compress/zstd_cwksp.h +320 -103
  53. data/contrib/zstd/lib/compress/zstd_double_fast.c +388 -151
  54. data/contrib/zstd/lib/compress/zstd_double_fast.h +3 -2
  55. data/contrib/zstd/lib/compress/zstd_fast.c +729 -265
  56. data/contrib/zstd/lib/compress/zstd_fast.h +3 -2
  57. data/contrib/zstd/lib/compress/zstd_lazy.c +1270 -251
  58. data/contrib/zstd/lib/compress/zstd_lazy.h +61 -1
  59. data/contrib/zstd/lib/compress/zstd_ldm.c +324 -219
  60. data/contrib/zstd/lib/compress/zstd_ldm.h +9 -2
  61. data/contrib/zstd/lib/compress/zstd_ldm_geartab.h +106 -0
  62. data/contrib/zstd/lib/compress/zstd_opt.c +481 -209
  63. data/contrib/zstd/lib/compress/zstd_opt.h +1 -1
  64. data/contrib/zstd/lib/compress/zstdmt_compress.c +181 -457
  65. data/contrib/zstd/lib/compress/zstdmt_compress.h +34 -113
  66. data/contrib/zstd/lib/decompress/huf_decompress.c +1199 -565
  67. data/contrib/zstd/lib/decompress/huf_decompress_amd64.S +576 -0
  68. data/contrib/zstd/lib/decompress/zstd_ddict.c +12 -12
  69. data/contrib/zstd/lib/decompress/zstd_ddict.h +2 -2
  70. data/contrib/zstd/lib/decompress/zstd_decompress.c +627 -157
  71. data/contrib/zstd/lib/decompress/zstd_decompress_block.c +1086 -326
  72. data/contrib/zstd/lib/decompress/zstd_decompress_block.h +19 -5
  73. data/contrib/zstd/lib/decompress/zstd_decompress_internal.h +62 -13
  74. data/contrib/zstd/lib/deprecated/zbuff.h +1 -1
  75. data/contrib/zstd/lib/deprecated/zbuff_common.c +1 -1
  76. data/contrib/zstd/lib/deprecated/zbuff_compress.c +24 -4
  77. data/contrib/zstd/lib/deprecated/zbuff_decompress.c +3 -1
  78. data/contrib/zstd/lib/dictBuilder/cover.c +73 -52
  79. data/contrib/zstd/lib/dictBuilder/cover.h +7 -6
  80. data/contrib/zstd/lib/dictBuilder/divsufsort.c +1 -1
  81. data/contrib/zstd/lib/dictBuilder/fastcover.c +44 -35
  82. data/contrib/zstd/lib/dictBuilder/zdict.c +103 -111
  83. data/contrib/zstd/lib/legacy/zstd_legacy.h +8 -1
  84. data/contrib/zstd/lib/legacy/zstd_v01.c +21 -54
  85. data/contrib/zstd/lib/legacy/zstd_v01.h +1 -1
  86. data/contrib/zstd/lib/legacy/zstd_v02.c +29 -70
  87. data/contrib/zstd/lib/legacy/zstd_v02.h +1 -1
  88. data/contrib/zstd/lib/legacy/zstd_v03.c +30 -73
  89. data/contrib/zstd/lib/legacy/zstd_v03.h +1 -1
  90. data/contrib/zstd/lib/legacy/zstd_v04.c +29 -71
  91. data/contrib/zstd/lib/legacy/zstd_v04.h +1 -1
  92. data/contrib/zstd/lib/legacy/zstd_v05.c +40 -86
  93. data/contrib/zstd/lib/legacy/zstd_v05.h +1 -1
  94. data/contrib/zstd/lib/legacy/zstd_v06.c +47 -88
  95. data/contrib/zstd/lib/legacy/zstd_v06.h +1 -1
  96. data/contrib/zstd/lib/legacy/zstd_v07.c +40 -83
  97. data/contrib/zstd/lib/legacy/zstd_v07.h +1 -1
  98. data/contrib/zstd/lib/libzstd.mk +214 -0
  99. data/contrib/zstd/lib/libzstd.pc.in +7 -6
  100. data/contrib/zstd/lib/module.modulemap +35 -0
  101. data/contrib/zstd/lib/{dictBuilder/zdict.h → zdict.h} +203 -34
  102. data/contrib/zstd/lib/zstd.h +1217 -287
  103. data/contrib/zstd/lib/{common/zstd_errors.h → zstd_errors.h} +28 -8
  104. data/ext/extconf.rb +7 -6
  105. data/ext/extzstd.c +19 -10
  106. data/ext/extzstd.h +6 -0
  107. data/ext/libzstd_conf.h +0 -1
  108. data/ext/zstd_decompress_asm.S +1 -0
  109. data/gemstub.rb +3 -21
  110. data/lib/extzstd/version.rb +6 -1
  111. data/lib/extzstd.rb +0 -2
  112. data/test/test_basic.rb +0 -5
  113. metadata +18 -6
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -15,288 +15,10 @@
15
15
 
16
16
  #include "../common/zstd_internal.h" /* ZSTD_getSequenceLength */
17
17
  #include "hist.h" /* HIST_countFast_wksp */
18
- #include "zstd_compress_internal.h"
18
+ #include "zstd_compress_internal.h" /* ZSTD_[huf|fse|entropy]CTablesMetadata_t */
19
19
  #include "zstd_compress_sequences.h"
20
20
  #include "zstd_compress_literals.h"
21
21
 
22
- /*-*************************************
23
- * Superblock entropy buffer structs
24
- ***************************************/
25
- /** ZSTD_hufCTablesMetadata_t :
26
- * Stores Literals Block Type for a super-block in hType, and
27
- * huffman tree description in hufDesBuffer.
28
- * hufDesSize refers to the size of huffman tree description in bytes.
29
- * This metadata is populated in ZSTD_buildSuperBlockEntropy_literal() */
30
- typedef struct {
31
- symbolEncodingType_e hType;
32
- BYTE hufDesBuffer[500]; /* TODO give name to this value */
33
- size_t hufDesSize;
34
- } ZSTD_hufCTablesMetadata_t;
35
-
36
- /** ZSTD_fseCTablesMetadata_t :
37
- * Stores symbol compression modes for a super-block in {ll, ol, ml}Type, and
38
- * fse tables in fseTablesBuffer.
39
- * fseTablesSize refers to the size of fse tables in bytes.
40
- * This metadata is populated in ZSTD_buildSuperBlockEntropy_sequences() */
41
- typedef struct {
42
- symbolEncodingType_e llType;
43
- symbolEncodingType_e ofType;
44
- symbolEncodingType_e mlType;
45
- BYTE fseTablesBuffer[500]; /* TODO give name to this value */
46
- size_t fseTablesSize;
47
- size_t lastCountSize; /* This is to account for bug in 1.3.4. More detail in ZSTD_compressSubBlock_sequences() */
48
- } ZSTD_fseCTablesMetadata_t;
49
-
50
- typedef struct {
51
- ZSTD_hufCTablesMetadata_t hufMetadata;
52
- ZSTD_fseCTablesMetadata_t fseMetadata;
53
- } ZSTD_entropyCTablesMetadata_t;
54
-
55
-
56
- /** ZSTD_buildSuperBlockEntropy_literal() :
57
- * Builds entropy for the super-block literals.
58
- * Stores literals block type (raw, rle, compressed, repeat) and
59
- * huffman description table to hufMetadata.
60
- * @return : size of huffman description table or error code */
61
- static size_t ZSTD_buildSuperBlockEntropy_literal(void* const src, size_t srcSize,
62
- const ZSTD_hufCTables_t* prevHuf,
63
- ZSTD_hufCTables_t* nextHuf,
64
- ZSTD_hufCTablesMetadata_t* hufMetadata,
65
- const int disableLiteralsCompression,
66
- void* workspace, size_t wkspSize)
67
- {
68
- BYTE* const wkspStart = (BYTE*)workspace;
69
- BYTE* const wkspEnd = wkspStart + wkspSize;
70
- BYTE* const countWkspStart = wkspStart;
71
- unsigned* const countWksp = (unsigned*)workspace;
72
- const size_t countWkspSize = (HUF_SYMBOLVALUE_MAX + 1) * sizeof(unsigned);
73
- BYTE* const nodeWksp = countWkspStart + countWkspSize;
74
- const size_t nodeWkspSize = wkspEnd-nodeWksp;
75
- unsigned maxSymbolValue = 255;
76
- unsigned huffLog = HUF_TABLELOG_DEFAULT;
77
- HUF_repeat repeat = prevHuf->repeatMode;
78
-
79
- DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy_literal (srcSize=%zu)", srcSize);
80
-
81
- /* Prepare nextEntropy assuming reusing the existing table */
82
- memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
83
-
84
- if (disableLiteralsCompression) {
85
- DEBUGLOG(5, "set_basic - disabled");
86
- hufMetadata->hType = set_basic;
87
- return 0;
88
- }
89
-
90
- /* small ? don't even attempt compression (speed opt) */
91
- # define COMPRESS_LITERALS_SIZE_MIN 63
92
- { size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN;
93
- if (srcSize <= minLitSize) {
94
- DEBUGLOG(5, "set_basic - too small");
95
- hufMetadata->hType = set_basic;
96
- return 0;
97
- }
98
- }
99
-
100
- /* Scan input and build symbol stats */
101
- { size_t const largest = HIST_count_wksp (countWksp, &maxSymbolValue, (const BYTE*)src, srcSize, workspace, wkspSize);
102
- FORWARD_IF_ERROR(largest, "HIST_count_wksp failed");
103
- if (largest == srcSize) {
104
- DEBUGLOG(5, "set_rle");
105
- hufMetadata->hType = set_rle;
106
- return 0;
107
- }
108
- if (largest <= (srcSize >> 7)+4) {
109
- DEBUGLOG(5, "set_basic - no gain");
110
- hufMetadata->hType = set_basic;
111
- return 0;
112
- }
113
- }
114
-
115
- /* Validate the previous Huffman table */
116
- if (repeat == HUF_repeat_check && !HUF_validateCTable((HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue)) {
117
- repeat = HUF_repeat_none;
118
- }
119
-
120
- /* Build Huffman Tree */
121
- memset(nextHuf->CTable, 0, sizeof(nextHuf->CTable));
122
- huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue);
123
- { size_t const maxBits = HUF_buildCTable_wksp((HUF_CElt*)nextHuf->CTable, countWksp,
124
- maxSymbolValue, huffLog,
125
- nodeWksp, nodeWkspSize);
126
- FORWARD_IF_ERROR(maxBits, "HUF_buildCTable_wksp");
127
- huffLog = (U32)maxBits;
128
- { /* Build and write the CTable */
129
- size_t const newCSize = HUF_estimateCompressedSize(
130
- (HUF_CElt*)nextHuf->CTable, countWksp, maxSymbolValue);
131
- size_t const hSize = HUF_writeCTable(
132
- hufMetadata->hufDesBuffer, sizeof(hufMetadata->hufDesBuffer),
133
- (HUF_CElt*)nextHuf->CTable, maxSymbolValue, huffLog);
134
- /* Check against repeating the previous CTable */
135
- if (repeat != HUF_repeat_none) {
136
- size_t const oldCSize = HUF_estimateCompressedSize(
137
- (HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue);
138
- if (oldCSize < srcSize && (oldCSize <= hSize + newCSize || hSize + 12 >= srcSize)) {
139
- DEBUGLOG(5, "set_repeat - smaller");
140
- memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
141
- hufMetadata->hType = set_repeat;
142
- return 0;
143
- }
144
- }
145
- if (newCSize + hSize >= srcSize) {
146
- DEBUGLOG(5, "set_basic - no gains");
147
- memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
148
- hufMetadata->hType = set_basic;
149
- return 0;
150
- }
151
- DEBUGLOG(5, "set_compressed (hSize=%u)", (U32)hSize);
152
- hufMetadata->hType = set_compressed;
153
- nextHuf->repeatMode = HUF_repeat_check;
154
- return hSize;
155
- }
156
- }
157
- }
158
-
159
- /** ZSTD_buildSuperBlockEntropy_sequences() :
160
- * Builds entropy for the super-block sequences.
161
- * Stores symbol compression modes and fse table to fseMetadata.
162
- * @return : size of fse tables or error code */
163
- static size_t ZSTD_buildSuperBlockEntropy_sequences(seqStore_t* seqStorePtr,
164
- const ZSTD_fseCTables_t* prevEntropy,
165
- ZSTD_fseCTables_t* nextEntropy,
166
- const ZSTD_CCtx_params* cctxParams,
167
- ZSTD_fseCTablesMetadata_t* fseMetadata,
168
- void* workspace, size_t wkspSize)
169
- {
170
- BYTE* const wkspStart = (BYTE*)workspace;
171
- BYTE* const wkspEnd = wkspStart + wkspSize;
172
- BYTE* const countWkspStart = wkspStart;
173
- unsigned* const countWksp = (unsigned*)workspace;
174
- const size_t countWkspSize = (MaxSeq + 1) * sizeof(unsigned);
175
- BYTE* const cTableWksp = countWkspStart + countWkspSize;
176
- const size_t cTableWkspSize = wkspEnd-cTableWksp;
177
- ZSTD_strategy const strategy = cctxParams->cParams.strategy;
178
- FSE_CTable* CTable_LitLength = nextEntropy->litlengthCTable;
179
- FSE_CTable* CTable_OffsetBits = nextEntropy->offcodeCTable;
180
- FSE_CTable* CTable_MatchLength = nextEntropy->matchlengthCTable;
181
- const BYTE* const ofCodeTable = seqStorePtr->ofCode;
182
- const BYTE* const llCodeTable = seqStorePtr->llCode;
183
- const BYTE* const mlCodeTable = seqStorePtr->mlCode;
184
- size_t const nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart;
185
- BYTE* const ostart = fseMetadata->fseTablesBuffer;
186
- BYTE* const oend = ostart + sizeof(fseMetadata->fseTablesBuffer);
187
- BYTE* op = ostart;
188
-
189
- assert(cTableWkspSize >= (1 << MaxFSELog) * sizeof(FSE_FUNCTION_TYPE));
190
- DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy_sequences (nbSeq=%zu)", nbSeq);
191
- memset(workspace, 0, wkspSize);
192
-
193
- fseMetadata->lastCountSize = 0;
194
- /* convert length/distances into codes */
195
- ZSTD_seqToCodes(seqStorePtr);
196
- /* build CTable for Literal Lengths */
197
- { U32 LLtype;
198
- unsigned max = MaxLL;
199
- size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, llCodeTable, nbSeq, workspace, wkspSize); /* can't fail */
200
- DEBUGLOG(5, "Building LL table");
201
- nextEntropy->litlength_repeatMode = prevEntropy->litlength_repeatMode;
202
- LLtype = ZSTD_selectEncodingType(&nextEntropy->litlength_repeatMode,
203
- countWksp, max, mostFrequent, nbSeq,
204
- LLFSELog, prevEntropy->litlengthCTable,
205
- LL_defaultNorm, LL_defaultNormLog,
206
- ZSTD_defaultAllowed, strategy);
207
- assert(set_basic < set_compressed && set_rle < set_compressed);
208
- assert(!(LLtype < set_compressed && nextEntropy->litlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */
209
- { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype,
210
- countWksp, max, llCodeTable, nbSeq, LL_defaultNorm, LL_defaultNormLog, MaxLL,
211
- prevEntropy->litlengthCTable, sizeof(prevEntropy->litlengthCTable),
212
- cTableWksp, cTableWkspSize);
213
- FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for LitLens failed");
214
- if (LLtype == set_compressed)
215
- fseMetadata->lastCountSize = countSize;
216
- op += countSize;
217
- fseMetadata->llType = (symbolEncodingType_e) LLtype;
218
- } }
219
- /* build CTable for Offsets */
220
- { U32 Offtype;
221
- unsigned max = MaxOff;
222
- size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, ofCodeTable, nbSeq, workspace, wkspSize); /* can't fail */
223
- /* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */
224
- ZSTD_defaultPolicy_e const defaultPolicy = (max <= DefaultMaxOff) ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed;
225
- DEBUGLOG(5, "Building OF table");
226
- nextEntropy->offcode_repeatMode = prevEntropy->offcode_repeatMode;
227
- Offtype = ZSTD_selectEncodingType(&nextEntropy->offcode_repeatMode,
228
- countWksp, max, mostFrequent, nbSeq,
229
- OffFSELog, prevEntropy->offcodeCTable,
230
- OF_defaultNorm, OF_defaultNormLog,
231
- defaultPolicy, strategy);
232
- assert(!(Offtype < set_compressed && nextEntropy->offcode_repeatMode != FSE_repeat_none)); /* We don't copy tables */
233
- { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype,
234
- countWksp, max, ofCodeTable, nbSeq, OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff,
235
- prevEntropy->offcodeCTable, sizeof(prevEntropy->offcodeCTable),
236
- cTableWksp, cTableWkspSize);
237
- FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for Offsets failed");
238
- if (Offtype == set_compressed)
239
- fseMetadata->lastCountSize = countSize;
240
- op += countSize;
241
- fseMetadata->ofType = (symbolEncodingType_e) Offtype;
242
- } }
243
- /* build CTable for MatchLengths */
244
- { U32 MLtype;
245
- unsigned max = MaxML;
246
- size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, mlCodeTable, nbSeq, workspace, wkspSize); /* can't fail */
247
- DEBUGLOG(5, "Building ML table (remaining space : %i)", (int)(oend-op));
248
- nextEntropy->matchlength_repeatMode = prevEntropy->matchlength_repeatMode;
249
- MLtype = ZSTD_selectEncodingType(&nextEntropy->matchlength_repeatMode,
250
- countWksp, max, mostFrequent, nbSeq,
251
- MLFSELog, prevEntropy->matchlengthCTable,
252
- ML_defaultNorm, ML_defaultNormLog,
253
- ZSTD_defaultAllowed, strategy);
254
- assert(!(MLtype < set_compressed && nextEntropy->matchlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */
255
- { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype,
256
- countWksp, max, mlCodeTable, nbSeq, ML_defaultNorm, ML_defaultNormLog, MaxML,
257
- prevEntropy->matchlengthCTable, sizeof(prevEntropy->matchlengthCTable),
258
- cTableWksp, cTableWkspSize);
259
- FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for MatchLengths failed");
260
- if (MLtype == set_compressed)
261
- fseMetadata->lastCountSize = countSize;
262
- op += countSize;
263
- fseMetadata->mlType = (symbolEncodingType_e) MLtype;
264
- } }
265
- assert((size_t) (op-ostart) <= sizeof(fseMetadata->fseTablesBuffer));
266
- return op-ostart;
267
- }
268
-
269
-
270
- /** ZSTD_buildSuperBlockEntropy() :
271
- * Builds entropy for the super-block.
272
- * @return : 0 on success or error code */
273
- static size_t
274
- ZSTD_buildSuperBlockEntropy(seqStore_t* seqStorePtr,
275
- const ZSTD_entropyCTables_t* prevEntropy,
276
- ZSTD_entropyCTables_t* nextEntropy,
277
- const ZSTD_CCtx_params* cctxParams,
278
- ZSTD_entropyCTablesMetadata_t* entropyMetadata,
279
- void* workspace, size_t wkspSize)
280
- {
281
- size_t const litSize = seqStorePtr->lit - seqStorePtr->litStart;
282
- DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy");
283
- entropyMetadata->hufMetadata.hufDesSize =
284
- ZSTD_buildSuperBlockEntropy_literal(seqStorePtr->litStart, litSize,
285
- &prevEntropy->huf, &nextEntropy->huf,
286
- &entropyMetadata->hufMetadata,
287
- ZSTD_disableLiteralsCompression(cctxParams),
288
- workspace, wkspSize);
289
- FORWARD_IF_ERROR(entropyMetadata->hufMetadata.hufDesSize, "ZSTD_buildSuperBlockEntropy_literal failed");
290
- entropyMetadata->fseMetadata.fseTablesSize =
291
- ZSTD_buildSuperBlockEntropy_sequences(seqStorePtr,
292
- &prevEntropy->fse, &nextEntropy->fse,
293
- cctxParams,
294
- &entropyMetadata->fseMetadata,
295
- workspace, wkspSize);
296
- FORWARD_IF_ERROR(entropyMetadata->fseMetadata.fseTablesSize, "ZSTD_buildSuperBlockEntropy_sequences failed");
297
- return 0;
298
- }
299
-
300
22
  /** ZSTD_compressSubBlock_literal() :
301
23
  * Compresses literals section for a sub-block.
302
24
  * When we have to write the Huffman table we will sometimes choose a header
@@ -304,7 +26,7 @@ ZSTD_buildSuperBlockEntropy(seqStore_t* seqStorePtr,
304
26
  * before we know the table size + compressed size, so we have a bound on the
305
27
  * table size. If we guessed incorrectly, we fall back to uncompressed literals.
306
28
  *
307
- * We write the header when writeEntropy=1 and set entropyWrriten=1 when we succeeded
29
+ * We write the header when writeEntropy=1 and set entropyWritten=1 when we succeeded
308
30
  * in writing the header, otherwise it is set to 0.
309
31
  *
310
32
  * hufMetadata->hType has literals block type info.
@@ -314,13 +36,14 @@ ZSTD_buildSuperBlockEntropy(seqStore_t* seqStorePtr,
314
36
  * If it is set_compressed, first sub-block's literals section will be Treeless_Literals_Block
315
37
  * and the following sub-blocks' literals sections will be Treeless_Literals_Block.
316
38
  * @return : compressed size of literals section of a sub-block
317
- * Or 0 if it unable to compress.
39
+ * Or 0 if unable to compress.
318
40
  * Or error code */
319
- static size_t ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable,
320
- const ZSTD_hufCTablesMetadata_t* hufMetadata,
321
- const BYTE* literals, size_t litSize,
322
- void* dst, size_t dstSize,
323
- const int bmi2, int writeEntropy, int* entropyWritten)
41
+ static size_t
42
+ ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable,
43
+ const ZSTD_hufCTablesMetadata_t* hufMetadata,
44
+ const BYTE* literals, size_t litSize,
45
+ void* dst, size_t dstSize,
46
+ const int bmi2, int writeEntropy, int* entropyWritten)
324
47
  {
325
48
  size_t const header = writeEntropy ? 200 : 0;
326
49
  size_t const lhSize = 3 + (litSize >= (1 KB - header)) + (litSize >= (16 KB - header));
@@ -331,8 +54,6 @@ static size_t ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable,
331
54
  symbolEncodingType_e hType = writeEntropy ? hufMetadata->hType : set_repeat;
332
55
  size_t cLitSize = 0;
333
56
 
334
- (void)bmi2; /* TODO bmi2... */
335
-
336
57
  DEBUGLOG(5, "ZSTD_compressSubBlock_literal (litSize=%zu, lhSize=%zu, writeEntropy=%d)", litSize, lhSize, writeEntropy);
337
58
 
338
59
  *entropyWritten = 0;
@@ -348,15 +69,15 @@ static size_t ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable,
348
69
  assert(hufMetadata->hType == set_compressed || hufMetadata->hType == set_repeat);
349
70
 
350
71
  if (writeEntropy && hufMetadata->hType == set_compressed) {
351
- memcpy(op, hufMetadata->hufDesBuffer, hufMetadata->hufDesSize);
72
+ ZSTD_memcpy(op, hufMetadata->hufDesBuffer, hufMetadata->hufDesSize);
352
73
  op += hufMetadata->hufDesSize;
353
74
  cLitSize += hufMetadata->hufDesSize;
354
75
  DEBUGLOG(5, "ZSTD_compressSubBlock_literal (hSize=%zu)", hufMetadata->hufDesSize);
355
76
  }
356
77
 
357
- /* TODO bmi2 */
358
- { const size_t cSize = singleStream ? HUF_compress1X_usingCTable(op, oend-op, literals, litSize, hufTable)
359
- : HUF_compress4X_usingCTable(op, oend-op, literals, litSize, hufTable);
78
+ { int const flags = bmi2 ? HUF_flags_bmi2 : 0;
79
+ const size_t cSize = singleStream ? HUF_compress1X_usingCTable(op, oend-op, literals, litSize, hufTable, flags)
80
+ : HUF_compress4X_usingCTable(op, oend-op, literals, litSize, hufTable, flags);
360
81
  op += cSize;
361
82
  cLitSize += cSize;
362
83
  if (cSize == 0 || ERR_isError(cSize)) {
@@ -404,12 +125,17 @@ static size_t ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable,
404
125
  return op-ostart;
405
126
  }
406
127
 
407
- static size_t ZSTD_seqDecompressedSize(seqStore_t const* seqStore, const seqDef* sequences, size_t nbSeq, size_t litSize, int lastSequence) {
128
+ static size_t
129
+ ZSTD_seqDecompressedSize(seqStore_t const* seqStore,
130
+ const seqDef* sequences, size_t nbSeq,
131
+ size_t litSize, int lastSequence)
132
+ {
408
133
  const seqDef* const sstart = sequences;
409
134
  const seqDef* const send = sequences + nbSeq;
410
135
  const seqDef* sp = sstart;
411
136
  size_t matchLengthSum = 0;
412
137
  size_t litLengthSum = 0;
138
+ (void)(litLengthSum); /* suppress unused variable warning on some environments */
413
139
  while (send-sp > 0) {
414
140
  ZSTD_sequenceLength const seqLen = ZSTD_getSequenceLength(seqStore, sp);
415
141
  litLengthSum += seqLen.litLength;
@@ -433,13 +159,14 @@ static size_t ZSTD_seqDecompressedSize(seqStore_t const* seqStore, const seqDef*
433
159
  * @return : compressed size of sequences section of a sub-block
434
160
  * Or 0 if it is unable to compress
435
161
  * Or error code. */
436
- static size_t ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables,
437
- const ZSTD_fseCTablesMetadata_t* fseMetadata,
438
- const seqDef* sequences, size_t nbSeq,
439
- const BYTE* llCode, const BYTE* mlCode, const BYTE* ofCode,
440
- const ZSTD_CCtx_params* cctxParams,
441
- void* dst, size_t dstCapacity,
442
- const int bmi2, int writeEntropy, int* entropyWritten)
162
+ static size_t
163
+ ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables,
164
+ const ZSTD_fseCTablesMetadata_t* fseMetadata,
165
+ const seqDef* sequences, size_t nbSeq,
166
+ const BYTE* llCode, const BYTE* mlCode, const BYTE* ofCode,
167
+ const ZSTD_CCtx_params* cctxParams,
168
+ void* dst, size_t dstCapacity,
169
+ const int bmi2, int writeEntropy, int* entropyWritten)
443
170
  {
444
171
  const int longOffsets = cctxParams->cParams.windowLog > STREAM_ACCUMULATOR_MIN;
445
172
  BYTE* const ostart = (BYTE*)dst;
@@ -474,7 +201,7 @@ static size_t ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables
474
201
  const U32 MLtype = fseMetadata->mlType;
475
202
  DEBUGLOG(5, "ZSTD_compressSubBlock_sequences (fseTablesSize=%zu)", fseMetadata->fseTablesSize);
476
203
  *seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2));
477
- memcpy(op, fseMetadata->fseTablesBuffer, fseMetadata->fseTablesSize);
204
+ ZSTD_memcpy(op, fseMetadata->fseTablesBuffer, fseMetadata->fseTablesSize);
478
205
  op += fseMetadata->fseTablesSize;
479
206
  } else {
480
207
  const U32 repeat = set_repeat;
@@ -602,8 +329,8 @@ static size_t ZSTD_estimateSubBlockSize_literal(const BYTE* literals, size_t lit
602
329
  static size_t ZSTD_estimateSubBlockSize_symbolType(symbolEncodingType_e type,
603
330
  const BYTE* codeTable, unsigned maxCode,
604
331
  size_t nbSeq, const FSE_CTable* fseCTable,
605
- const U32* additionalBits,
606
- short const* defaultNorm, U32 defaultNormLog,
332
+ const U8* additionalBits,
333
+ short const* defaultNorm, U32 defaultNormLog, U32 defaultMax,
607
334
  void* workspace, size_t wkspSize)
608
335
  {
609
336
  unsigned* const countWksp = (unsigned*)workspace;
@@ -615,7 +342,11 @@ static size_t ZSTD_estimateSubBlockSize_symbolType(symbolEncodingType_e type,
615
342
 
616
343
  HIST_countFast_wksp(countWksp, &max, codeTable, nbSeq, workspace, wkspSize); /* can't fail */
617
344
  if (type == set_basic) {
618
- cSymbolTypeSizeEstimateInBits = ZSTD_crossEntropyCost(defaultNorm, defaultNormLog, countWksp, max);
345
+ /* We selected this encoding type, so it must be valid. */
346
+ assert(max <= defaultMax);
347
+ cSymbolTypeSizeEstimateInBits = max <= defaultMax
348
+ ? ZSTD_crossEntropyCost(defaultNorm, defaultNormLog, countWksp, max)
349
+ : ERROR(GENERIC);
619
350
  } else if (type == set_rle) {
620
351
  cSymbolTypeSizeEstimateInBits = 0;
621
352
  } else if (type == set_compressed || type == set_repeat) {
@@ -639,19 +370,20 @@ static size_t ZSTD_estimateSubBlockSize_sequences(const BYTE* ofCodeTable,
639
370
  void* workspace, size_t wkspSize,
640
371
  int writeEntropy)
641
372
  {
642
- size_t sequencesSectionHeaderSize = 3; /* Use hard coded size of 3 bytes */
373
+ size_t const sequencesSectionHeaderSize = 3; /* Use hard coded size of 3 bytes */
643
374
  size_t cSeqSizeEstimate = 0;
375
+ if (nbSeq == 0) return sequencesSectionHeaderSize;
644
376
  cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->ofType, ofCodeTable, MaxOff,
645
377
  nbSeq, fseTables->offcodeCTable, NULL,
646
- OF_defaultNorm, OF_defaultNormLog,
378
+ OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff,
647
379
  workspace, wkspSize);
648
380
  cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->llType, llCodeTable, MaxLL,
649
381
  nbSeq, fseTables->litlengthCTable, LL_bits,
650
- LL_defaultNorm, LL_defaultNormLog,
382
+ LL_defaultNorm, LL_defaultNormLog, MaxLL,
651
383
  workspace, wkspSize);
652
384
  cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->mlType, mlCodeTable, MaxML,
653
385
  nbSeq, fseTables->matchlengthCTable, ML_bits,
654
- ML_defaultNorm, ML_defaultNormLog,
386
+ ML_defaultNorm, ML_defaultNormLog, MaxML,
655
387
  workspace, wkspSize);
656
388
  if (writeEntropy) cSeqSizeEstimate += fseMetadata->fseTablesSize;
657
389
  return cSeqSizeEstimate + sequencesSectionHeaderSize;
@@ -747,7 +479,7 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
747
479
  /* I think there is an optimization opportunity here.
748
480
  * Calling ZSTD_estimateSubBlockSize for every sequence can be wasteful
749
481
  * since it recalculates estimate from scratch.
750
- * For example, it would recount literal distribution and symbol codes everytime.
482
+ * For example, it would recount literal distribution and symbol codes every time.
751
483
  */
752
484
  cBlockSizeEstimate = ZSTD_estimateSubBlockSize(lp, litSize, ofCodePtr, llCodePtr, mlCodePtr, seqCount,
753
485
  &nextCBlock->entropy, entropyMetadata,
@@ -790,7 +522,7 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
790
522
  } while (!lastSequence);
791
523
  if (writeLitEntropy) {
792
524
  DEBUGLOG(5, "ZSTD_compressSubBlock_multi has literal entropy tables unwritten");
793
- memcpy(&nextCBlock->entropy.huf, &prevCBlock->entropy.huf, sizeof(prevCBlock->entropy.huf));
525
+ ZSTD_memcpy(&nextCBlock->entropy.huf, &prevCBlock->entropy.huf, sizeof(prevCBlock->entropy.huf));
794
526
  }
795
527
  if (writeSeqEntropy && ZSTD_needSequenceEntropyTables(&entropyMetadata->fseMetadata)) {
796
528
  /* If we haven't written our entropy tables, then we've violated our contract and
@@ -809,11 +541,11 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
809
541
  if (sp < send) {
810
542
  seqDef const* seq;
811
543
  repcodes_t rep;
812
- memcpy(&rep, prevCBlock->rep, sizeof(rep));
544
+ ZSTD_memcpy(&rep, prevCBlock->rep, sizeof(rep));
813
545
  for (seq = sstart; seq < sp; ++seq) {
814
- rep = ZSTD_updateRep(rep.rep, seq->offset - 1, ZSTD_getSequenceLength(seqStorePtr, seq).litLength == 0);
546
+ ZSTD_updateRep(rep.rep, seq->offBase, ZSTD_getSequenceLength(seqStorePtr, seq).litLength == 0);
815
547
  }
816
- memcpy(nextCBlock->rep, &rep, sizeof(rep));
548
+ ZSTD_memcpy(nextCBlock->rep, &rep, sizeof(rep));
817
549
  }
818
550
  }
819
551
  DEBUGLOG(5, "ZSTD_compressSubBlock_multi compressed");
@@ -826,12 +558,12 @@ size_t ZSTD_compressSuperBlock(ZSTD_CCtx* zc,
826
558
  unsigned lastBlock) {
827
559
  ZSTD_entropyCTablesMetadata_t entropyMetadata;
828
560
 
829
- FORWARD_IF_ERROR(ZSTD_buildSuperBlockEntropy(&zc->seqStore,
561
+ FORWARD_IF_ERROR(ZSTD_buildBlockEntropyStats(&zc->seqStore,
830
562
  &zc->blockState.prevCBlock->entropy,
831
563
  &zc->blockState.nextCBlock->entropy,
832
564
  &zc->appliedParams,
833
565
  &entropyMetadata,
834
- zc->entropyWorkspace, HUF_WORKSPACE_SIZE /* statically allocated in resetCCtx */), "");
566
+ zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */), "");
835
567
 
836
568
  return ZSTD_compressSubBlock_multi(&zc->seqStore,
837
569
  zc->blockState.prevCBlock,
@@ -841,5 +573,5 @@ size_t ZSTD_compressSuperBlock(ZSTD_CCtx* zc,
841
573
  dst, dstCapacity,
842
574
  src, srcSize,
843
575
  zc->bmi2, lastBlock,
844
- zc->entropyWorkspace, HUF_WORKSPACE_SIZE /* statically allocated in resetCCtx */);
576
+ zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */);
845
577
  }
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the