zstd-ruby 1.4.5.0 → 1.5.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (115) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +2 -0
  3. data/README.md +78 -5
  4. data/Rakefile +8 -2
  5. data/ext/zstdruby/common.h +15 -0
  6. data/ext/zstdruby/extconf.rb +3 -2
  7. data/ext/zstdruby/libzstd/common/allocations.h +55 -0
  8. data/ext/zstdruby/libzstd/common/bits.h +200 -0
  9. data/ext/zstdruby/libzstd/common/bitstream.h +45 -62
  10. data/ext/zstdruby/libzstd/common/compiler.h +205 -22
  11. data/ext/zstdruby/libzstd/common/cpu.h +1 -3
  12. data/ext/zstdruby/libzstd/common/debug.c +1 -1
  13. data/ext/zstdruby/libzstd/common/debug.h +12 -19
  14. data/ext/zstdruby/libzstd/common/entropy_common.c +172 -48
  15. data/ext/zstdruby/libzstd/common/error_private.c +10 -2
  16. data/ext/zstdruby/libzstd/common/error_private.h +82 -3
  17. data/ext/zstdruby/libzstd/common/fse.h +37 -86
  18. data/ext/zstdruby/libzstd/common/fse_decompress.c +117 -92
  19. data/ext/zstdruby/libzstd/common/huf.h +99 -166
  20. data/ext/zstdruby/libzstd/common/mem.h +124 -142
  21. data/ext/zstdruby/libzstd/common/pool.c +54 -27
  22. data/ext/zstdruby/libzstd/common/pool.h +10 -4
  23. data/ext/zstdruby/libzstd/common/portability_macros.h +156 -0
  24. data/ext/zstdruby/libzstd/common/threading.c +74 -19
  25. data/ext/zstdruby/libzstd/common/threading.h +5 -10
  26. data/ext/zstdruby/libzstd/common/xxhash.c +7 -847
  27. data/ext/zstdruby/libzstd/common/xxhash.h +5568 -167
  28. data/ext/zstdruby/libzstd/common/zstd_common.c +2 -37
  29. data/ext/zstdruby/libzstd/common/zstd_deps.h +111 -0
  30. data/ext/zstdruby/libzstd/common/zstd_internal.h +132 -187
  31. data/ext/zstdruby/libzstd/common/zstd_trace.h +163 -0
  32. data/ext/zstdruby/libzstd/compress/clevels.h +134 -0
  33. data/ext/zstdruby/libzstd/compress/fse_compress.c +83 -157
  34. data/ext/zstdruby/libzstd/compress/hist.c +27 -29
  35. data/ext/zstdruby/libzstd/compress/hist.h +2 -2
  36. data/ext/zstdruby/libzstd/compress/huf_compress.c +916 -279
  37. data/ext/zstdruby/libzstd/compress/zstd_compress.c +3773 -1019
  38. data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +610 -203
  39. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.c +119 -42
  40. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.h +16 -6
  41. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.c +42 -19
  42. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.h +1 -1
  43. data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.c +49 -317
  44. data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.h +1 -1
  45. data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +320 -103
  46. data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +388 -151
  47. data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +3 -2
  48. data/ext/zstdruby/libzstd/compress/zstd_fast.c +729 -265
  49. data/ext/zstdruby/libzstd/compress/zstd_fast.h +3 -2
  50. data/ext/zstdruby/libzstd/compress/zstd_lazy.c +1270 -251
  51. data/ext/zstdruby/libzstd/compress/zstd_lazy.h +61 -1
  52. data/ext/zstdruby/libzstd/compress/zstd_ldm.c +324 -219
  53. data/ext/zstdruby/libzstd/compress/zstd_ldm.h +9 -2
  54. data/ext/zstdruby/libzstd/compress/zstd_ldm_geartab.h +106 -0
  55. data/ext/zstdruby/libzstd/compress/zstd_opt.c +481 -209
  56. data/ext/zstdruby/libzstd/compress/zstd_opt.h +1 -1
  57. data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +181 -457
  58. data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +34 -113
  59. data/ext/zstdruby/libzstd/decompress/huf_decompress.c +1199 -565
  60. data/ext/zstdruby/libzstd/decompress/huf_decompress_amd64.S +576 -0
  61. data/ext/zstdruby/libzstd/decompress/zstd_ddict.c +12 -12
  62. data/ext/zstdruby/libzstd/decompress/zstd_ddict.h +2 -2
  63. data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +627 -157
  64. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +1086 -326
  65. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +19 -5
  66. data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +62 -13
  67. data/ext/zstdruby/libzstd/dictBuilder/cover.c +73 -52
  68. data/ext/zstdruby/libzstd/dictBuilder/cover.h +7 -6
  69. data/ext/zstdruby/libzstd/dictBuilder/divsufsort.c +1 -1
  70. data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +44 -35
  71. data/ext/zstdruby/libzstd/dictBuilder/zdict.c +103 -111
  72. data/ext/zstdruby/libzstd/{dictBuilder/zdict.h → zdict.h} +203 -34
  73. data/ext/zstdruby/libzstd/zstd.h +1217 -287
  74. data/ext/zstdruby/libzstd/{common/zstd_errors.h → zstd_errors.h} +28 -8
  75. data/ext/zstdruby/main.c +20 -0
  76. data/ext/zstdruby/skippable_frame.c +63 -0
  77. data/ext/zstdruby/streaming_compress.c +177 -0
  78. data/ext/zstdruby/streaming_compress.h +5 -0
  79. data/ext/zstdruby/streaming_decompress.c +123 -0
  80. data/ext/zstdruby/zstdruby.c +114 -32
  81. data/lib/zstd-ruby/version.rb +1 -1
  82. data/lib/zstd-ruby.rb +0 -1
  83. data/zstd-ruby.gemspec +1 -1
  84. metadata +19 -36
  85. data/.travis.yml +0 -14
  86. data/ext/zstdruby/libzstd/.gitignore +0 -3
  87. data/ext/zstdruby/libzstd/BUCK +0 -234
  88. data/ext/zstdruby/libzstd/Makefile +0 -354
  89. data/ext/zstdruby/libzstd/README.md +0 -179
  90. data/ext/zstdruby/libzstd/deprecated/zbuff.h +0 -214
  91. data/ext/zstdruby/libzstd/deprecated/zbuff_common.c +0 -26
  92. data/ext/zstdruby/libzstd/deprecated/zbuff_compress.c +0 -147
  93. data/ext/zstdruby/libzstd/deprecated/zbuff_decompress.c +0 -75
  94. data/ext/zstdruby/libzstd/dll/example/Makefile +0 -48
  95. data/ext/zstdruby/libzstd/dll/example/README.md +0 -69
  96. data/ext/zstdruby/libzstd/dll/example/build_package.bat +0 -20
  97. data/ext/zstdruby/libzstd/dll/example/fullbench-dll.sln +0 -25
  98. data/ext/zstdruby/libzstd/dll/example/fullbench-dll.vcxproj +0 -181
  99. data/ext/zstdruby/libzstd/legacy/zstd_legacy.h +0 -415
  100. data/ext/zstdruby/libzstd/legacy/zstd_v01.c +0 -2158
  101. data/ext/zstdruby/libzstd/legacy/zstd_v01.h +0 -94
  102. data/ext/zstdruby/libzstd/legacy/zstd_v02.c +0 -3518
  103. data/ext/zstdruby/libzstd/legacy/zstd_v02.h +0 -93
  104. data/ext/zstdruby/libzstd/legacy/zstd_v03.c +0 -3160
  105. data/ext/zstdruby/libzstd/legacy/zstd_v03.h +0 -93
  106. data/ext/zstdruby/libzstd/legacy/zstd_v04.c +0 -3647
  107. data/ext/zstdruby/libzstd/legacy/zstd_v04.h +0 -142
  108. data/ext/zstdruby/libzstd/legacy/zstd_v05.c +0 -4050
  109. data/ext/zstdruby/libzstd/legacy/zstd_v05.h +0 -162
  110. data/ext/zstdruby/libzstd/legacy/zstd_v06.c +0 -4154
  111. data/ext/zstdruby/libzstd/legacy/zstd_v06.h +0 -172
  112. data/ext/zstdruby/libzstd/legacy/zstd_v07.c +0 -4541
  113. data/ext/zstdruby/libzstd/legacy/zstd_v07.h +0 -187
  114. data/ext/zstdruby/libzstd/libzstd.pc.in +0 -15
  115. data/ext/zstdruby/zstdruby.h +0 -6
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -15,288 +15,10 @@
15
15
 
16
16
  #include "../common/zstd_internal.h" /* ZSTD_getSequenceLength */
17
17
  #include "hist.h" /* HIST_countFast_wksp */
18
- #include "zstd_compress_internal.h"
18
+ #include "zstd_compress_internal.h" /* ZSTD_[huf|fse|entropy]CTablesMetadata_t */
19
19
  #include "zstd_compress_sequences.h"
20
20
  #include "zstd_compress_literals.h"
21
21
 
22
- /*-*************************************
23
- * Superblock entropy buffer structs
24
- ***************************************/
25
- /** ZSTD_hufCTablesMetadata_t :
26
- * Stores Literals Block Type for a super-block in hType, and
27
- * huffman tree description in hufDesBuffer.
28
- * hufDesSize refers to the size of huffman tree description in bytes.
29
- * This metadata is populated in ZSTD_buildSuperBlockEntropy_literal() */
30
- typedef struct {
31
- symbolEncodingType_e hType;
32
- BYTE hufDesBuffer[500]; /* TODO give name to this value */
33
- size_t hufDesSize;
34
- } ZSTD_hufCTablesMetadata_t;
35
-
36
- /** ZSTD_fseCTablesMetadata_t :
37
- * Stores symbol compression modes for a super-block in {ll, ol, ml}Type, and
38
- * fse tables in fseTablesBuffer.
39
- * fseTablesSize refers to the size of fse tables in bytes.
40
- * This metadata is populated in ZSTD_buildSuperBlockEntropy_sequences() */
41
- typedef struct {
42
- symbolEncodingType_e llType;
43
- symbolEncodingType_e ofType;
44
- symbolEncodingType_e mlType;
45
- BYTE fseTablesBuffer[500]; /* TODO give name to this value */
46
- size_t fseTablesSize;
47
- size_t lastCountSize; /* This is to account for bug in 1.3.4. More detail in ZSTD_compressSubBlock_sequences() */
48
- } ZSTD_fseCTablesMetadata_t;
49
-
50
- typedef struct {
51
- ZSTD_hufCTablesMetadata_t hufMetadata;
52
- ZSTD_fseCTablesMetadata_t fseMetadata;
53
- } ZSTD_entropyCTablesMetadata_t;
54
-
55
-
56
- /** ZSTD_buildSuperBlockEntropy_literal() :
57
- * Builds entropy for the super-block literals.
58
- * Stores literals block type (raw, rle, compressed, repeat) and
59
- * huffman description table to hufMetadata.
60
- * @return : size of huffman description table or error code */
61
- static size_t ZSTD_buildSuperBlockEntropy_literal(void* const src, size_t srcSize,
62
- const ZSTD_hufCTables_t* prevHuf,
63
- ZSTD_hufCTables_t* nextHuf,
64
- ZSTD_hufCTablesMetadata_t* hufMetadata,
65
- const int disableLiteralsCompression,
66
- void* workspace, size_t wkspSize)
67
- {
68
- BYTE* const wkspStart = (BYTE*)workspace;
69
- BYTE* const wkspEnd = wkspStart + wkspSize;
70
- BYTE* const countWkspStart = wkspStart;
71
- unsigned* const countWksp = (unsigned*)workspace;
72
- const size_t countWkspSize = (HUF_SYMBOLVALUE_MAX + 1) * sizeof(unsigned);
73
- BYTE* const nodeWksp = countWkspStart + countWkspSize;
74
- const size_t nodeWkspSize = wkspEnd-nodeWksp;
75
- unsigned maxSymbolValue = 255;
76
- unsigned huffLog = HUF_TABLELOG_DEFAULT;
77
- HUF_repeat repeat = prevHuf->repeatMode;
78
-
79
- DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy_literal (srcSize=%zu)", srcSize);
80
-
81
- /* Prepare nextEntropy assuming reusing the existing table */
82
- memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
83
-
84
- if (disableLiteralsCompression) {
85
- DEBUGLOG(5, "set_basic - disabled");
86
- hufMetadata->hType = set_basic;
87
- return 0;
88
- }
89
-
90
- /* small ? don't even attempt compression (speed opt) */
91
- # define COMPRESS_LITERALS_SIZE_MIN 63
92
- { size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN;
93
- if (srcSize <= minLitSize) {
94
- DEBUGLOG(5, "set_basic - too small");
95
- hufMetadata->hType = set_basic;
96
- return 0;
97
- }
98
- }
99
-
100
- /* Scan input and build symbol stats */
101
- { size_t const largest = HIST_count_wksp (countWksp, &maxSymbolValue, (const BYTE*)src, srcSize, workspace, wkspSize);
102
- FORWARD_IF_ERROR(largest, "HIST_count_wksp failed");
103
- if (largest == srcSize) {
104
- DEBUGLOG(5, "set_rle");
105
- hufMetadata->hType = set_rle;
106
- return 0;
107
- }
108
- if (largest <= (srcSize >> 7)+4) {
109
- DEBUGLOG(5, "set_basic - no gain");
110
- hufMetadata->hType = set_basic;
111
- return 0;
112
- }
113
- }
114
-
115
- /* Validate the previous Huffman table */
116
- if (repeat == HUF_repeat_check && !HUF_validateCTable((HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue)) {
117
- repeat = HUF_repeat_none;
118
- }
119
-
120
- /* Build Huffman Tree */
121
- memset(nextHuf->CTable, 0, sizeof(nextHuf->CTable));
122
- huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue);
123
- { size_t const maxBits = HUF_buildCTable_wksp((HUF_CElt*)nextHuf->CTable, countWksp,
124
- maxSymbolValue, huffLog,
125
- nodeWksp, nodeWkspSize);
126
- FORWARD_IF_ERROR(maxBits, "HUF_buildCTable_wksp");
127
- huffLog = (U32)maxBits;
128
- { /* Build and write the CTable */
129
- size_t const newCSize = HUF_estimateCompressedSize(
130
- (HUF_CElt*)nextHuf->CTable, countWksp, maxSymbolValue);
131
- size_t const hSize = HUF_writeCTable(
132
- hufMetadata->hufDesBuffer, sizeof(hufMetadata->hufDesBuffer),
133
- (HUF_CElt*)nextHuf->CTable, maxSymbolValue, huffLog);
134
- /* Check against repeating the previous CTable */
135
- if (repeat != HUF_repeat_none) {
136
- size_t const oldCSize = HUF_estimateCompressedSize(
137
- (HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue);
138
- if (oldCSize < srcSize && (oldCSize <= hSize + newCSize || hSize + 12 >= srcSize)) {
139
- DEBUGLOG(5, "set_repeat - smaller");
140
- memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
141
- hufMetadata->hType = set_repeat;
142
- return 0;
143
- }
144
- }
145
- if (newCSize + hSize >= srcSize) {
146
- DEBUGLOG(5, "set_basic - no gains");
147
- memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
148
- hufMetadata->hType = set_basic;
149
- return 0;
150
- }
151
- DEBUGLOG(5, "set_compressed (hSize=%u)", (U32)hSize);
152
- hufMetadata->hType = set_compressed;
153
- nextHuf->repeatMode = HUF_repeat_check;
154
- return hSize;
155
- }
156
- }
157
- }
158
-
159
- /** ZSTD_buildSuperBlockEntropy_sequences() :
160
- * Builds entropy for the super-block sequences.
161
- * Stores symbol compression modes and fse table to fseMetadata.
162
- * @return : size of fse tables or error code */
163
- static size_t ZSTD_buildSuperBlockEntropy_sequences(seqStore_t* seqStorePtr,
164
- const ZSTD_fseCTables_t* prevEntropy,
165
- ZSTD_fseCTables_t* nextEntropy,
166
- const ZSTD_CCtx_params* cctxParams,
167
- ZSTD_fseCTablesMetadata_t* fseMetadata,
168
- void* workspace, size_t wkspSize)
169
- {
170
- BYTE* const wkspStart = (BYTE*)workspace;
171
- BYTE* const wkspEnd = wkspStart + wkspSize;
172
- BYTE* const countWkspStart = wkspStart;
173
- unsigned* const countWksp = (unsigned*)workspace;
174
- const size_t countWkspSize = (MaxSeq + 1) * sizeof(unsigned);
175
- BYTE* const cTableWksp = countWkspStart + countWkspSize;
176
- const size_t cTableWkspSize = wkspEnd-cTableWksp;
177
- ZSTD_strategy const strategy = cctxParams->cParams.strategy;
178
- FSE_CTable* CTable_LitLength = nextEntropy->litlengthCTable;
179
- FSE_CTable* CTable_OffsetBits = nextEntropy->offcodeCTable;
180
- FSE_CTable* CTable_MatchLength = nextEntropy->matchlengthCTable;
181
- const BYTE* const ofCodeTable = seqStorePtr->ofCode;
182
- const BYTE* const llCodeTable = seqStorePtr->llCode;
183
- const BYTE* const mlCodeTable = seqStorePtr->mlCode;
184
- size_t const nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart;
185
- BYTE* const ostart = fseMetadata->fseTablesBuffer;
186
- BYTE* const oend = ostart + sizeof(fseMetadata->fseTablesBuffer);
187
- BYTE* op = ostart;
188
-
189
- assert(cTableWkspSize >= (1 << MaxFSELog) * sizeof(FSE_FUNCTION_TYPE));
190
- DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy_sequences (nbSeq=%zu)", nbSeq);
191
- memset(workspace, 0, wkspSize);
192
-
193
- fseMetadata->lastCountSize = 0;
194
- /* convert length/distances into codes */
195
- ZSTD_seqToCodes(seqStorePtr);
196
- /* build CTable for Literal Lengths */
197
- { U32 LLtype;
198
- unsigned max = MaxLL;
199
- size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, llCodeTable, nbSeq, workspace, wkspSize); /* can't fail */
200
- DEBUGLOG(5, "Building LL table");
201
- nextEntropy->litlength_repeatMode = prevEntropy->litlength_repeatMode;
202
- LLtype = ZSTD_selectEncodingType(&nextEntropy->litlength_repeatMode,
203
- countWksp, max, mostFrequent, nbSeq,
204
- LLFSELog, prevEntropy->litlengthCTable,
205
- LL_defaultNorm, LL_defaultNormLog,
206
- ZSTD_defaultAllowed, strategy);
207
- assert(set_basic < set_compressed && set_rle < set_compressed);
208
- assert(!(LLtype < set_compressed && nextEntropy->litlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */
209
- { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype,
210
- countWksp, max, llCodeTable, nbSeq, LL_defaultNorm, LL_defaultNormLog, MaxLL,
211
- prevEntropy->litlengthCTable, sizeof(prevEntropy->litlengthCTable),
212
- cTableWksp, cTableWkspSize);
213
- FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for LitLens failed");
214
- if (LLtype == set_compressed)
215
- fseMetadata->lastCountSize = countSize;
216
- op += countSize;
217
- fseMetadata->llType = (symbolEncodingType_e) LLtype;
218
- } }
219
- /* build CTable for Offsets */
220
- { U32 Offtype;
221
- unsigned max = MaxOff;
222
- size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, ofCodeTable, nbSeq, workspace, wkspSize); /* can't fail */
223
- /* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */
224
- ZSTD_defaultPolicy_e const defaultPolicy = (max <= DefaultMaxOff) ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed;
225
- DEBUGLOG(5, "Building OF table");
226
- nextEntropy->offcode_repeatMode = prevEntropy->offcode_repeatMode;
227
- Offtype = ZSTD_selectEncodingType(&nextEntropy->offcode_repeatMode,
228
- countWksp, max, mostFrequent, nbSeq,
229
- OffFSELog, prevEntropy->offcodeCTable,
230
- OF_defaultNorm, OF_defaultNormLog,
231
- defaultPolicy, strategy);
232
- assert(!(Offtype < set_compressed && nextEntropy->offcode_repeatMode != FSE_repeat_none)); /* We don't copy tables */
233
- { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype,
234
- countWksp, max, ofCodeTable, nbSeq, OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff,
235
- prevEntropy->offcodeCTable, sizeof(prevEntropy->offcodeCTable),
236
- cTableWksp, cTableWkspSize);
237
- FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for Offsets failed");
238
- if (Offtype == set_compressed)
239
- fseMetadata->lastCountSize = countSize;
240
- op += countSize;
241
- fseMetadata->ofType = (symbolEncodingType_e) Offtype;
242
- } }
243
- /* build CTable for MatchLengths */
244
- { U32 MLtype;
245
- unsigned max = MaxML;
246
- size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, mlCodeTable, nbSeq, workspace, wkspSize); /* can't fail */
247
- DEBUGLOG(5, "Building ML table (remaining space : %i)", (int)(oend-op));
248
- nextEntropy->matchlength_repeatMode = prevEntropy->matchlength_repeatMode;
249
- MLtype = ZSTD_selectEncodingType(&nextEntropy->matchlength_repeatMode,
250
- countWksp, max, mostFrequent, nbSeq,
251
- MLFSELog, prevEntropy->matchlengthCTable,
252
- ML_defaultNorm, ML_defaultNormLog,
253
- ZSTD_defaultAllowed, strategy);
254
- assert(!(MLtype < set_compressed && nextEntropy->matchlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */
255
- { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype,
256
- countWksp, max, mlCodeTable, nbSeq, ML_defaultNorm, ML_defaultNormLog, MaxML,
257
- prevEntropy->matchlengthCTable, sizeof(prevEntropy->matchlengthCTable),
258
- cTableWksp, cTableWkspSize);
259
- FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for MatchLengths failed");
260
- if (MLtype == set_compressed)
261
- fseMetadata->lastCountSize = countSize;
262
- op += countSize;
263
- fseMetadata->mlType = (symbolEncodingType_e) MLtype;
264
- } }
265
- assert((size_t) (op-ostart) <= sizeof(fseMetadata->fseTablesBuffer));
266
- return op-ostart;
267
- }
268
-
269
-
270
- /** ZSTD_buildSuperBlockEntropy() :
271
- * Builds entropy for the super-block.
272
- * @return : 0 on success or error code */
273
- static size_t
274
- ZSTD_buildSuperBlockEntropy(seqStore_t* seqStorePtr,
275
- const ZSTD_entropyCTables_t* prevEntropy,
276
- ZSTD_entropyCTables_t* nextEntropy,
277
- const ZSTD_CCtx_params* cctxParams,
278
- ZSTD_entropyCTablesMetadata_t* entropyMetadata,
279
- void* workspace, size_t wkspSize)
280
- {
281
- size_t const litSize = seqStorePtr->lit - seqStorePtr->litStart;
282
- DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy");
283
- entropyMetadata->hufMetadata.hufDesSize =
284
- ZSTD_buildSuperBlockEntropy_literal(seqStorePtr->litStart, litSize,
285
- &prevEntropy->huf, &nextEntropy->huf,
286
- &entropyMetadata->hufMetadata,
287
- ZSTD_disableLiteralsCompression(cctxParams),
288
- workspace, wkspSize);
289
- FORWARD_IF_ERROR(entropyMetadata->hufMetadata.hufDesSize, "ZSTD_buildSuperBlockEntropy_literal failed");
290
- entropyMetadata->fseMetadata.fseTablesSize =
291
- ZSTD_buildSuperBlockEntropy_sequences(seqStorePtr,
292
- &prevEntropy->fse, &nextEntropy->fse,
293
- cctxParams,
294
- &entropyMetadata->fseMetadata,
295
- workspace, wkspSize);
296
- FORWARD_IF_ERROR(entropyMetadata->fseMetadata.fseTablesSize, "ZSTD_buildSuperBlockEntropy_sequences failed");
297
- return 0;
298
- }
299
-
300
22
  /** ZSTD_compressSubBlock_literal() :
301
23
  * Compresses literals section for a sub-block.
302
24
  * When we have to write the Huffman table we will sometimes choose a header
@@ -304,7 +26,7 @@ ZSTD_buildSuperBlockEntropy(seqStore_t* seqStorePtr,
304
26
  * before we know the table size + compressed size, so we have a bound on the
305
27
  * table size. If we guessed incorrectly, we fall back to uncompressed literals.
306
28
  *
307
- * We write the header when writeEntropy=1 and set entropyWrriten=1 when we succeeded
29
+ * We write the header when writeEntropy=1 and set entropyWritten=1 when we succeeded
308
30
  * in writing the header, otherwise it is set to 0.
309
31
  *
310
32
  * hufMetadata->hType has literals block type info.
@@ -314,13 +36,14 @@ ZSTD_buildSuperBlockEntropy(seqStore_t* seqStorePtr,
314
36
  * If it is set_compressed, first sub-block's literals section will be Treeless_Literals_Block
315
37
  * and the following sub-blocks' literals sections will be Treeless_Literals_Block.
316
38
  * @return : compressed size of literals section of a sub-block
317
- * Or 0 if it unable to compress.
39
+ * Or 0 if unable to compress.
318
40
  * Or error code */
319
- static size_t ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable,
320
- const ZSTD_hufCTablesMetadata_t* hufMetadata,
321
- const BYTE* literals, size_t litSize,
322
- void* dst, size_t dstSize,
323
- const int bmi2, int writeEntropy, int* entropyWritten)
41
+ static size_t
42
+ ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable,
43
+ const ZSTD_hufCTablesMetadata_t* hufMetadata,
44
+ const BYTE* literals, size_t litSize,
45
+ void* dst, size_t dstSize,
46
+ const int bmi2, int writeEntropy, int* entropyWritten)
324
47
  {
325
48
  size_t const header = writeEntropy ? 200 : 0;
326
49
  size_t const lhSize = 3 + (litSize >= (1 KB - header)) + (litSize >= (16 KB - header));
@@ -331,8 +54,6 @@ static size_t ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable,
331
54
  symbolEncodingType_e hType = writeEntropy ? hufMetadata->hType : set_repeat;
332
55
  size_t cLitSize = 0;
333
56
 
334
- (void)bmi2; /* TODO bmi2... */
335
-
336
57
  DEBUGLOG(5, "ZSTD_compressSubBlock_literal (litSize=%zu, lhSize=%zu, writeEntropy=%d)", litSize, lhSize, writeEntropy);
337
58
 
338
59
  *entropyWritten = 0;
@@ -348,15 +69,15 @@ static size_t ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable,
348
69
  assert(hufMetadata->hType == set_compressed || hufMetadata->hType == set_repeat);
349
70
 
350
71
  if (writeEntropy && hufMetadata->hType == set_compressed) {
351
- memcpy(op, hufMetadata->hufDesBuffer, hufMetadata->hufDesSize);
72
+ ZSTD_memcpy(op, hufMetadata->hufDesBuffer, hufMetadata->hufDesSize);
352
73
  op += hufMetadata->hufDesSize;
353
74
  cLitSize += hufMetadata->hufDesSize;
354
75
  DEBUGLOG(5, "ZSTD_compressSubBlock_literal (hSize=%zu)", hufMetadata->hufDesSize);
355
76
  }
356
77
 
357
- /* TODO bmi2 */
358
- { const size_t cSize = singleStream ? HUF_compress1X_usingCTable(op, oend-op, literals, litSize, hufTable)
359
- : HUF_compress4X_usingCTable(op, oend-op, literals, litSize, hufTable);
78
+ { int const flags = bmi2 ? HUF_flags_bmi2 : 0;
79
+ const size_t cSize = singleStream ? HUF_compress1X_usingCTable(op, oend-op, literals, litSize, hufTable, flags)
80
+ : HUF_compress4X_usingCTable(op, oend-op, literals, litSize, hufTable, flags);
360
81
  op += cSize;
361
82
  cLitSize += cSize;
362
83
  if (cSize == 0 || ERR_isError(cSize)) {
@@ -404,12 +125,17 @@ static size_t ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable,
404
125
  return op-ostart;
405
126
  }
406
127
 
407
- static size_t ZSTD_seqDecompressedSize(seqStore_t const* seqStore, const seqDef* sequences, size_t nbSeq, size_t litSize, int lastSequence) {
128
+ static size_t
129
+ ZSTD_seqDecompressedSize(seqStore_t const* seqStore,
130
+ const seqDef* sequences, size_t nbSeq,
131
+ size_t litSize, int lastSequence)
132
+ {
408
133
  const seqDef* const sstart = sequences;
409
134
  const seqDef* const send = sequences + nbSeq;
410
135
  const seqDef* sp = sstart;
411
136
  size_t matchLengthSum = 0;
412
137
  size_t litLengthSum = 0;
138
+ (void)(litLengthSum); /* suppress unused variable warning on some environments */
413
139
  while (send-sp > 0) {
414
140
  ZSTD_sequenceLength const seqLen = ZSTD_getSequenceLength(seqStore, sp);
415
141
  litLengthSum += seqLen.litLength;
@@ -433,13 +159,14 @@ static size_t ZSTD_seqDecompressedSize(seqStore_t const* seqStore, const seqDef*
433
159
  * @return : compressed size of sequences section of a sub-block
434
160
  * Or 0 if it is unable to compress
435
161
  * Or error code. */
436
- static size_t ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables,
437
- const ZSTD_fseCTablesMetadata_t* fseMetadata,
438
- const seqDef* sequences, size_t nbSeq,
439
- const BYTE* llCode, const BYTE* mlCode, const BYTE* ofCode,
440
- const ZSTD_CCtx_params* cctxParams,
441
- void* dst, size_t dstCapacity,
442
- const int bmi2, int writeEntropy, int* entropyWritten)
162
+ static size_t
163
+ ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables,
164
+ const ZSTD_fseCTablesMetadata_t* fseMetadata,
165
+ const seqDef* sequences, size_t nbSeq,
166
+ const BYTE* llCode, const BYTE* mlCode, const BYTE* ofCode,
167
+ const ZSTD_CCtx_params* cctxParams,
168
+ void* dst, size_t dstCapacity,
169
+ const int bmi2, int writeEntropy, int* entropyWritten)
443
170
  {
444
171
  const int longOffsets = cctxParams->cParams.windowLog > STREAM_ACCUMULATOR_MIN;
445
172
  BYTE* const ostart = (BYTE*)dst;
@@ -474,7 +201,7 @@ static size_t ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables
474
201
  const U32 MLtype = fseMetadata->mlType;
475
202
  DEBUGLOG(5, "ZSTD_compressSubBlock_sequences (fseTablesSize=%zu)", fseMetadata->fseTablesSize);
476
203
  *seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2));
477
- memcpy(op, fseMetadata->fseTablesBuffer, fseMetadata->fseTablesSize);
204
+ ZSTD_memcpy(op, fseMetadata->fseTablesBuffer, fseMetadata->fseTablesSize);
478
205
  op += fseMetadata->fseTablesSize;
479
206
  } else {
480
207
  const U32 repeat = set_repeat;
@@ -602,8 +329,8 @@ static size_t ZSTD_estimateSubBlockSize_literal(const BYTE* literals, size_t lit
602
329
  static size_t ZSTD_estimateSubBlockSize_symbolType(symbolEncodingType_e type,
603
330
  const BYTE* codeTable, unsigned maxCode,
604
331
  size_t nbSeq, const FSE_CTable* fseCTable,
605
- const U32* additionalBits,
606
- short const* defaultNorm, U32 defaultNormLog,
332
+ const U8* additionalBits,
333
+ short const* defaultNorm, U32 defaultNormLog, U32 defaultMax,
607
334
  void* workspace, size_t wkspSize)
608
335
  {
609
336
  unsigned* const countWksp = (unsigned*)workspace;
@@ -615,7 +342,11 @@ static size_t ZSTD_estimateSubBlockSize_symbolType(symbolEncodingType_e type,
615
342
 
616
343
  HIST_countFast_wksp(countWksp, &max, codeTable, nbSeq, workspace, wkspSize); /* can't fail */
617
344
  if (type == set_basic) {
618
- cSymbolTypeSizeEstimateInBits = ZSTD_crossEntropyCost(defaultNorm, defaultNormLog, countWksp, max);
345
+ /* We selected this encoding type, so it must be valid. */
346
+ assert(max <= defaultMax);
347
+ cSymbolTypeSizeEstimateInBits = max <= defaultMax
348
+ ? ZSTD_crossEntropyCost(defaultNorm, defaultNormLog, countWksp, max)
349
+ : ERROR(GENERIC);
619
350
  } else if (type == set_rle) {
620
351
  cSymbolTypeSizeEstimateInBits = 0;
621
352
  } else if (type == set_compressed || type == set_repeat) {
@@ -639,19 +370,20 @@ static size_t ZSTD_estimateSubBlockSize_sequences(const BYTE* ofCodeTable,
639
370
  void* workspace, size_t wkspSize,
640
371
  int writeEntropy)
641
372
  {
642
- size_t sequencesSectionHeaderSize = 3; /* Use hard coded size of 3 bytes */
373
+ size_t const sequencesSectionHeaderSize = 3; /* Use hard coded size of 3 bytes */
643
374
  size_t cSeqSizeEstimate = 0;
375
+ if (nbSeq == 0) return sequencesSectionHeaderSize;
644
376
  cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->ofType, ofCodeTable, MaxOff,
645
377
  nbSeq, fseTables->offcodeCTable, NULL,
646
- OF_defaultNorm, OF_defaultNormLog,
378
+ OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff,
647
379
  workspace, wkspSize);
648
380
  cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->llType, llCodeTable, MaxLL,
649
381
  nbSeq, fseTables->litlengthCTable, LL_bits,
650
- LL_defaultNorm, LL_defaultNormLog,
382
+ LL_defaultNorm, LL_defaultNormLog, MaxLL,
651
383
  workspace, wkspSize);
652
384
  cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->mlType, mlCodeTable, MaxML,
653
385
  nbSeq, fseTables->matchlengthCTable, ML_bits,
654
- ML_defaultNorm, ML_defaultNormLog,
386
+ ML_defaultNorm, ML_defaultNormLog, MaxML,
655
387
  workspace, wkspSize);
656
388
  if (writeEntropy) cSeqSizeEstimate += fseMetadata->fseTablesSize;
657
389
  return cSeqSizeEstimate + sequencesSectionHeaderSize;
@@ -747,7 +479,7 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
747
479
  /* I think there is an optimization opportunity here.
748
480
  * Calling ZSTD_estimateSubBlockSize for every sequence can be wasteful
749
481
  * since it recalculates estimate from scratch.
750
- * For example, it would recount literal distribution and symbol codes everytime.
482
+ * For example, it would recount literal distribution and symbol codes every time.
751
483
  */
752
484
  cBlockSizeEstimate = ZSTD_estimateSubBlockSize(lp, litSize, ofCodePtr, llCodePtr, mlCodePtr, seqCount,
753
485
  &nextCBlock->entropy, entropyMetadata,
@@ -790,7 +522,7 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
790
522
  } while (!lastSequence);
791
523
  if (writeLitEntropy) {
792
524
  DEBUGLOG(5, "ZSTD_compressSubBlock_multi has literal entropy tables unwritten");
793
- memcpy(&nextCBlock->entropy.huf, &prevCBlock->entropy.huf, sizeof(prevCBlock->entropy.huf));
525
+ ZSTD_memcpy(&nextCBlock->entropy.huf, &prevCBlock->entropy.huf, sizeof(prevCBlock->entropy.huf));
794
526
  }
795
527
  if (writeSeqEntropy && ZSTD_needSequenceEntropyTables(&entropyMetadata->fseMetadata)) {
796
528
  /* If we haven't written our entropy tables, then we've violated our contract and
@@ -809,11 +541,11 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
809
541
  if (sp < send) {
810
542
  seqDef const* seq;
811
543
  repcodes_t rep;
812
- memcpy(&rep, prevCBlock->rep, sizeof(rep));
544
+ ZSTD_memcpy(&rep, prevCBlock->rep, sizeof(rep));
813
545
  for (seq = sstart; seq < sp; ++seq) {
814
- rep = ZSTD_updateRep(rep.rep, seq->offset - 1, ZSTD_getSequenceLength(seqStorePtr, seq).litLength == 0);
546
+ ZSTD_updateRep(rep.rep, seq->offBase, ZSTD_getSequenceLength(seqStorePtr, seq).litLength == 0);
815
547
  }
816
- memcpy(nextCBlock->rep, &rep, sizeof(rep));
548
+ ZSTD_memcpy(nextCBlock->rep, &rep, sizeof(rep));
817
549
  }
818
550
  }
819
551
  DEBUGLOG(5, "ZSTD_compressSubBlock_multi compressed");
@@ -826,12 +558,12 @@ size_t ZSTD_compressSuperBlock(ZSTD_CCtx* zc,
826
558
  unsigned lastBlock) {
827
559
  ZSTD_entropyCTablesMetadata_t entropyMetadata;
828
560
 
829
- FORWARD_IF_ERROR(ZSTD_buildSuperBlockEntropy(&zc->seqStore,
561
+ FORWARD_IF_ERROR(ZSTD_buildBlockEntropyStats(&zc->seqStore,
830
562
  &zc->blockState.prevCBlock->entropy,
831
563
  &zc->blockState.nextCBlock->entropy,
832
564
  &zc->appliedParams,
833
565
  &entropyMetadata,
834
- zc->entropyWorkspace, HUF_WORKSPACE_SIZE /* statically allocated in resetCCtx */), "");
566
+ zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */), "");
835
567
 
836
568
  return ZSTD_compressSubBlock_multi(&zc->seqStore,
837
569
  zc->blockState.prevCBlock,
@@ -841,5 +573,5 @@ size_t ZSTD_compressSuperBlock(ZSTD_CCtx* zc,
841
573
  dst, dstCapacity,
842
574
  src, srcSize,
843
575
  zc->bmi2, lastBlock,
844
- zc->entropyWorkspace, HUF_WORKSPACE_SIZE /* statically allocated in resetCCtx */);
576
+ zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */);
845
577
  }
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the