zstd-ruby 1.3.3.0 → 1.3.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/README.md +1 -1
- data/ext/zstdruby/libzstd/BUCK +13 -0
- data/ext/zstdruby/libzstd/README.md +32 -25
- data/ext/zstdruby/libzstd/common/bitstream.h +1 -1
- data/ext/zstdruby/libzstd/common/compiler.h +25 -0
- data/ext/zstdruby/libzstd/common/cpu.h +216 -0
- data/ext/zstdruby/libzstd/common/error_private.c +1 -0
- data/ext/zstdruby/libzstd/common/fse.h +1 -1
- data/ext/zstdruby/libzstd/common/fse_decompress.c +2 -2
- data/ext/zstdruby/libzstd/common/huf.h +114 -89
- data/ext/zstdruby/libzstd/common/pool.c +46 -17
- data/ext/zstdruby/libzstd/common/pool.h +18 -9
- data/ext/zstdruby/libzstd/common/threading.h +12 -12
- data/ext/zstdruby/libzstd/common/zstd_errors.h +16 -7
- data/ext/zstdruby/libzstd/common/zstd_internal.h +4 -5
- data/ext/zstdruby/libzstd/compress/fse_compress.c +19 -11
- data/ext/zstdruby/libzstd/compress/huf_compress.c +160 -62
- data/ext/zstdruby/libzstd/compress/zstd_compress.c +973 -644
- data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +281 -34
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +80 -62
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +11 -4
- data/ext/zstdruby/libzstd/compress/zstd_fast.c +87 -71
- data/ext/zstdruby/libzstd/compress/zstd_fast.h +10 -6
- data/ext/zstdruby/libzstd/compress/zstd_lazy.c +333 -274
- data/ext/zstdruby/libzstd/compress/zstd_lazy.h +33 -16
- data/ext/zstdruby/libzstd/compress/zstd_ldm.c +305 -359
- data/ext/zstdruby/libzstd/compress/zstd_ldm.h +64 -21
- data/ext/zstdruby/libzstd/compress/zstd_opt.c +194 -56
- data/ext/zstdruby/libzstd/compress/zstd_opt.h +17 -5
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +1131 -449
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +32 -16
- data/ext/zstdruby/libzstd/decompress/huf_decompress.c +390 -290
- data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +777 -439
- data/ext/zstdruby/libzstd/dictBuilder/cover.c +11 -8
- data/ext/zstdruby/libzstd/dictBuilder/zdict.c +83 -50
- data/ext/zstdruby/libzstd/dictBuilder/zdict.h +44 -43
- data/ext/zstdruby/libzstd/legacy/zstd_legacy.h +2 -0
- data/ext/zstdruby/libzstd/legacy/zstd_v04.c +42 -118
- data/ext/zstdruby/libzstd/legacy/zstd_v06.c +2 -2
- data/ext/zstdruby/libzstd/legacy/zstd_v07.c +2 -2
- data/ext/zstdruby/libzstd/zstd.h +254 -254
- data/lib/zstd-ruby/version.rb +1 -1
- metadata +4 -3
@@ -30,15 +30,15 @@
|
|
30
30
|
|
31
31
|
/* === Memory management === */
|
32
32
|
typedef struct ZSTDMT_CCtx_s ZSTDMT_CCtx;
|
33
|
-
ZSTDLIB_API ZSTDMT_CCtx* ZSTDMT_createCCtx(unsigned
|
34
|
-
ZSTDLIB_API ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(unsigned
|
33
|
+
ZSTDLIB_API ZSTDMT_CCtx* ZSTDMT_createCCtx(unsigned nbWorkers);
|
34
|
+
ZSTDLIB_API ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(unsigned nbWorkers,
|
35
35
|
ZSTD_customMem cMem);
|
36
36
|
ZSTDLIB_API size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* mtctx);
|
37
37
|
|
38
38
|
ZSTDLIB_API size_t ZSTDMT_sizeof_CCtx(ZSTDMT_CCtx* mtctx);
|
39
39
|
|
40
40
|
|
41
|
-
/* === Simple
|
41
|
+
/* === Simple one-pass compression function === */
|
42
42
|
|
43
43
|
ZSTDLIB_API size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx,
|
44
44
|
void* dst, size_t dstCapacity,
|
@@ -50,7 +50,7 @@ ZSTDLIB_API size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx,
|
|
50
50
|
/* === Streaming functions === */
|
51
51
|
|
52
52
|
ZSTDLIB_API size_t ZSTDMT_initCStream(ZSTDMT_CCtx* mtctx, int compressionLevel);
|
53
|
-
ZSTDLIB_API size_t ZSTDMT_resetCStream(ZSTDMT_CCtx* mtctx, unsigned long long pledgedSrcSize); /**< if srcSize is not known at reset time, use ZSTD_CONTENTSIZE_UNKNOWN. Note: for compatibility with older programs, 0 means the same as ZSTD_CONTENTSIZE_UNKNOWN, but it
|
53
|
+
ZSTDLIB_API size_t ZSTDMT_resetCStream(ZSTDMT_CCtx* mtctx, unsigned long long pledgedSrcSize); /**< if srcSize is not known at reset time, use ZSTD_CONTENTSIZE_UNKNOWN. Note: for compatibility with older programs, 0 means the same as ZSTD_CONTENTSIZE_UNKNOWN, but it will change in the future to mean "empty" */
|
54
54
|
|
55
55
|
ZSTDLIB_API size_t ZSTDMT_compressStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, ZSTD_inBuffer* input);
|
56
56
|
|
@@ -68,7 +68,7 @@ ZSTDLIB_API size_t ZSTDMT_compress_advanced(ZSTDMT_CCtx* mtctx,
|
|
68
68
|
void* dst, size_t dstCapacity,
|
69
69
|
const void* src, size_t srcSize,
|
70
70
|
const ZSTD_CDict* cdict,
|
71
|
-
ZSTD_parameters
|
71
|
+
ZSTD_parameters params,
|
72
72
|
unsigned overlapLog);
|
73
73
|
|
74
74
|
ZSTDLIB_API size_t ZSTDMT_initCStream_advanced(ZSTDMT_CCtx* mtctx,
|
@@ -85,7 +85,7 @@ ZSTDLIB_API size_t ZSTDMT_initCStream_usingCDict(ZSTDMT_CCtx* mtctx,
|
|
85
85
|
* List of parameters that can be set using ZSTDMT_setMTCtxParameter() */
|
86
86
|
typedef enum {
|
87
87
|
ZSTDMT_p_jobSize, /* Each job is compressed in parallel. By default, this value is dynamically determined depending on compression parameters. Can be set explicitly here. */
|
88
|
-
ZSTDMT_p_overlapSectionLog /* Each job may reload a part of previous job to enhance compressionr ratio; 0 == no overlap, 6(default) == use 1/8th of window, >=9 == use full window */
|
88
|
+
ZSTDMT_p_overlapSectionLog /* Each job may reload a part of previous job to enhance compressionr ratio; 0 == no overlap, 6(default) == use 1/8th of window, >=9 == use full window. This is a "sticky" parameter : its value will be re-used on next compression job */
|
89
89
|
} ZSTDMT_parameter;
|
90
90
|
|
91
91
|
/* ZSTDMT_setMTCtxParameter() :
|
@@ -97,30 +97,46 @@ ZSTDLIB_API size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter
|
|
97
97
|
|
98
98
|
|
99
99
|
/*! ZSTDMT_compressStream_generic() :
|
100
|
-
* Combines ZSTDMT_compressStream() with ZSTDMT_flushStream() or ZSTDMT_endStream()
|
100
|
+
* Combines ZSTDMT_compressStream() with optional ZSTDMT_flushStream() or ZSTDMT_endStream()
|
101
101
|
* depending on flush directive.
|
102
102
|
* @return : minimum amount of data still to be flushed
|
103
103
|
* 0 if fully flushed
|
104
|
-
* or an error code
|
104
|
+
* or an error code
|
105
|
+
* note : needs to be init using any ZSTD_initCStream*() variant */
|
105
106
|
ZSTDLIB_API size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
|
106
107
|
ZSTD_outBuffer* output,
|
107
108
|
ZSTD_inBuffer* input,
|
108
109
|
ZSTD_EndDirective endOp);
|
109
110
|
|
110
111
|
|
111
|
-
/*
|
112
|
+
/* ========================================================
|
113
|
+
* === Private interface, for use by ZSTD_compress.c ===
|
114
|
+
* === Not exposed in libzstd. Never invoke directly ===
|
115
|
+
* ======================================================== */
|
112
116
|
|
113
117
|
size_t ZSTDMT_CCtxParam_setMTCtxParameter(ZSTD_CCtx_params* params, ZSTDMT_parameter parameter, unsigned value);
|
114
118
|
|
115
|
-
/*
|
116
|
-
* Set
|
117
|
-
*
|
118
|
-
size_t
|
119
|
+
/* ZSTDMT_CCtxParam_setNbWorkers()
|
120
|
+
* Set nbWorkers, and clamp it.
|
121
|
+
* Also reset jobSize and overlapLog */
|
122
|
+
size_t ZSTDMT_CCtxParam_setNbWorkers(ZSTD_CCtx_params* params, unsigned nbWorkers);
|
119
123
|
|
120
|
-
|
124
|
+
/*! ZSTDMT_updateCParams_whileCompressing() :
|
125
|
+
* Updates only a selected set of compression parameters, to remain compatible with current frame.
|
126
|
+
* New parameters will be applied to next compression job. */
|
127
|
+
void ZSTDMT_updateCParams_whileCompressing(ZSTDMT_CCtx* mtctx, const ZSTD_CCtx_params* cctxParams);
|
128
|
+
|
129
|
+
/* ZSTDMT_getNbWorkers():
|
121
130
|
* @return nb threads currently active in mtctx.
|
122
131
|
* mtctx must be valid */
|
123
|
-
|
132
|
+
unsigned ZSTDMT_getNbWorkers(const ZSTDMT_CCtx* mtctx);
|
133
|
+
|
134
|
+
/* ZSTDMT_getFrameProgression():
|
135
|
+
* tells how much data has been consumed (input) and produced (output) for current frame.
|
136
|
+
* able to count progression inside worker threads.
|
137
|
+
*/
|
138
|
+
ZSTD_frameProgression ZSTDMT_getFrameProgression(ZSTDMT_CCtx* mtctx);
|
139
|
+
|
124
140
|
|
125
141
|
/*! ZSTDMT_initCStream_internal() :
|
126
142
|
* Private use only. Init streaming operation.
|
@@ -128,7 +144,7 @@ size_t ZSTDMT_getNbThreads(const ZSTDMT_CCtx* mtctx);
|
|
128
144
|
* must receive dict, or cdict, or none, but not both.
|
129
145
|
* @return : 0, or an error code */
|
130
146
|
size_t ZSTDMT_initCStream_internal(ZSTDMT_CCtx* zcs,
|
131
|
-
const void* dict, size_t dictSize,
|
147
|
+
const void* dict, size_t dictSize, ZSTD_dictContentType_e dictContentType,
|
132
148
|
const ZSTD_CDict* cdict,
|
133
149
|
ZSTD_CCtx_params params, unsigned long long pledgedSrcSize);
|
134
150
|
|
@@ -49,18 +49,19 @@
|
|
49
49
|
****************************************************************/
|
50
50
|
#define HUF_isError ERR_isError
|
51
51
|
#define HUF_STATIC_ASSERT(c) { enum { HUF_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */
|
52
|
+
#define CHECK_F(f) { size_t const err_ = (f); if (HUF_isError(err_)) return err_; }
|
52
53
|
|
53
54
|
|
54
55
|
/* **************************************************************
|
55
56
|
* Byte alignment for workSpace management
|
56
57
|
****************************************************************/
|
57
|
-
#define HUF_ALIGN(x, a)
|
58
|
+
#define HUF_ALIGN(x, a) HUF_ALIGN_MASK((x), (a) - 1)
|
58
59
|
#define HUF_ALIGN_MASK(x, mask) (((x) + (mask)) & ~(mask))
|
59
60
|
|
61
|
+
|
60
62
|
/*-***************************/
|
61
63
|
/* generic DTableDesc */
|
62
64
|
/*-***************************/
|
63
|
-
|
64
65
|
typedef struct { BYTE maxTableLog; BYTE tableType; BYTE tableLog; BYTE reserved; } DTableDesc;
|
65
66
|
|
66
67
|
static DTableDesc HUF_getDTableDesc(const HUF_DTable* table)
|
@@ -74,7 +75,6 @@ static DTableDesc HUF_getDTableDesc(const HUF_DTable* table)
|
|
74
75
|
/*-***************************/
|
75
76
|
/* single-symbol decoding */
|
76
77
|
/*-***************************/
|
77
|
-
|
78
78
|
typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX2; /* single-symbol decoding */
|
79
79
|
|
80
80
|
size_t HUF_readDTableX2_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize)
|
@@ -94,10 +94,7 @@ size_t HUF_readDTableX2_wksp(HUF_DTable* DTable, const void* src, size_t srcSize
|
|
94
94
|
huffWeight = (BYTE *)((U32 *)workSpace + spaceUsed32);
|
95
95
|
spaceUsed32 += HUF_ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2;
|
96
96
|
|
97
|
-
if ((spaceUsed32 << 2) > wkspSize)
|
98
|
-
return ERROR(tableLog_tooLarge);
|
99
|
-
workSpace = (U32 *)workSpace + spaceUsed32;
|
100
|
-
wkspSize -= (spaceUsed32 << 2);
|
97
|
+
if ((spaceUsed32 << 2) > wkspSize) return ERROR(tableLog_tooLarge);
|
101
98
|
|
102
99
|
HUF_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable));
|
103
100
|
/* memset(huffWeight, 0, sizeof(huffWeight)); */ /* is not necessary, even though some analyzer complain ... */
|
@@ -144,8 +141,10 @@ size_t HUF_readDTableX2(HUF_DTable* DTable, const void* src, size_t srcSize)
|
|
144
141
|
workSpace, sizeof(workSpace));
|
145
142
|
}
|
146
143
|
|
144
|
+
typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX4; /* double-symbols decoding */
|
147
145
|
|
148
|
-
|
146
|
+
FORCE_INLINE_TEMPLATE BYTE
|
147
|
+
HUF_decodeSymbolX2(BIT_DStream_t* Dstream, const HUF_DEltX2* dt, const U32 dtLog)
|
149
148
|
{
|
150
149
|
size_t const val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */
|
151
150
|
BYTE const c = dt[val].byte;
|
@@ -156,7 +155,7 @@ static BYTE HUF_decodeSymbolX2(BIT_DStream_t* Dstream, const HUF_DEltX2* dt, con
|
|
156
155
|
#define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \
|
157
156
|
*ptr++ = HUF_decodeSymbolX2(DStreamPtr, dt, dtLog)
|
158
157
|
|
159
|
-
#define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr)
|
158
|
+
#define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \
|
160
159
|
if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
|
161
160
|
HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr)
|
162
161
|
|
@@ -164,30 +163,33 @@ static BYTE HUF_decodeSymbolX2(BIT_DStream_t* Dstream, const HUF_DEltX2* dt, con
|
|
164
163
|
if (MEM_64bits()) \
|
165
164
|
HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr)
|
166
165
|
|
167
|
-
HINT_INLINE size_t
|
166
|
+
HINT_INLINE size_t
|
167
|
+
HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX2* const dt, const U32 dtLog)
|
168
168
|
{
|
169
169
|
BYTE* const pStart = p;
|
170
170
|
|
171
171
|
/* up to 4 symbols at a time */
|
172
|
-
while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished)
|
172
|
+
while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-3)) {
|
173
173
|
HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
|
174
174
|
HUF_DECODE_SYMBOLX2_1(p, bitDPtr);
|
175
175
|
HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
|
176
176
|
HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
|
177
177
|
}
|
178
178
|
|
179
|
-
/*
|
180
|
-
|
181
|
-
|
179
|
+
/* [0-3] symbols remaining */
|
180
|
+
if (MEM_32bits())
|
181
|
+
while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd))
|
182
|
+
HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
|
182
183
|
|
183
|
-
/* no more data to retrieve from bitstream,
|
184
|
+
/* no more data to retrieve from bitstream, no need to reload */
|
184
185
|
while (p < pEnd)
|
185
186
|
HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
|
186
187
|
|
187
188
|
return pEnd-pStart;
|
188
189
|
}
|
189
190
|
|
190
|
-
|
191
|
+
FORCE_INLINE_TEMPLATE size_t
|
192
|
+
HUF_decompress1X2_usingDTable_internal_body(
|
191
193
|
void* dst, size_t dstSize,
|
192
194
|
const void* cSrc, size_t cSrcSize,
|
193
195
|
const HUF_DTable* DTable)
|
@@ -200,58 +202,17 @@ static size_t HUF_decompress1X2_usingDTable_internal(
|
|
200
202
|
DTableDesc const dtd = HUF_getDTableDesc(DTable);
|
201
203
|
U32 const dtLog = dtd.tableLog;
|
202
204
|
|
203
|
-
|
204
|
-
if (HUF_isError(errorCode)) return errorCode; }
|
205
|
+
CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) );
|
205
206
|
|
206
207
|
HUF_decodeStreamX2(op, &bitD, oend, dt, dtLog);
|
207
208
|
|
208
|
-
/* check */
|
209
209
|
if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
|
210
210
|
|
211
211
|
return dstSize;
|
212
212
|
}
|
213
213
|
|
214
|
-
size_t
|
215
|
-
|
216
|
-
const void* cSrc, size_t cSrcSize,
|
217
|
-
const HUF_DTable* DTable)
|
218
|
-
{
|
219
|
-
DTableDesc dtd = HUF_getDTableDesc(DTable);
|
220
|
-
if (dtd.tableType != 0) return ERROR(GENERIC);
|
221
|
-
return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable);
|
222
|
-
}
|
223
|
-
|
224
|
-
size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
|
225
|
-
const void* cSrc, size_t cSrcSize,
|
226
|
-
void* workSpace, size_t wkspSize)
|
227
|
-
{
|
228
|
-
const BYTE* ip = (const BYTE*) cSrc;
|
229
|
-
|
230
|
-
size_t const hSize = HUF_readDTableX2_wksp(DCtx, cSrc, cSrcSize, workSpace, wkspSize);
|
231
|
-
if (HUF_isError(hSize)) return hSize;
|
232
|
-
if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
|
233
|
-
ip += hSize; cSrcSize -= hSize;
|
234
|
-
|
235
|
-
return HUF_decompress1X2_usingDTable_internal (dst, dstSize, ip, cSrcSize, DCtx);
|
236
|
-
}
|
237
|
-
|
238
|
-
|
239
|
-
size_t HUF_decompress1X2_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
|
240
|
-
const void* cSrc, size_t cSrcSize)
|
241
|
-
{
|
242
|
-
U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
|
243
|
-
return HUF_decompress1X2_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
|
244
|
-
workSpace, sizeof(workSpace));
|
245
|
-
}
|
246
|
-
|
247
|
-
size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
|
248
|
-
{
|
249
|
-
HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
|
250
|
-
return HUF_decompress1X2_DCtx (DTable, dst, dstSize, cSrc, cSrcSize);
|
251
|
-
}
|
252
|
-
|
253
|
-
|
254
|
-
static size_t HUF_decompress4X2_usingDTable_internal(
|
214
|
+
FORCE_INLINE_TEMPLATE size_t
|
215
|
+
HUF_decompress4X2_usingDTable_internal_body(
|
255
216
|
void* dst, size_t dstSize,
|
256
217
|
const void* cSrc, size_t cSrcSize,
|
257
218
|
const HUF_DTable* DTable)
|
@@ -286,23 +247,19 @@ static size_t HUF_decompress4X2_usingDTable_internal(
|
|
286
247
|
BYTE* op2 = opStart2;
|
287
248
|
BYTE* op3 = opStart3;
|
288
249
|
BYTE* op4 = opStart4;
|
289
|
-
U32 endSignal;
|
250
|
+
U32 endSignal = BIT_DStream_unfinished;
|
290
251
|
DTableDesc const dtd = HUF_getDTableDesc(DTable);
|
291
252
|
U32 const dtLog = dtd.tableLog;
|
292
253
|
|
293
254
|
if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
{ size_t const errorCode = BIT_initDStream(&bitD3, istart3, length3);
|
299
|
-
if (HUF_isError(errorCode)) return errorCode; }
|
300
|
-
{ size_t const errorCode = BIT_initDStream(&bitD4, istart4, length4);
|
301
|
-
if (HUF_isError(errorCode)) return errorCode; }
|
255
|
+
CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
|
256
|
+
CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
|
257
|
+
CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
|
258
|
+
CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
|
302
259
|
|
303
|
-
/* 16
|
260
|
+
/* up to 16 symbols per loop (4 symbols per stream) in 64-bit mode */
|
304
261
|
endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
|
305
|
-
|
262
|
+
while ( (endSignal==BIT_DStream_unfinished) && (op4<(oend-3)) ) {
|
306
263
|
HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
|
307
264
|
HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
|
308
265
|
HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
|
@@ -319,10 +276,15 @@ static size_t HUF_decompress4X2_usingDTable_internal(
|
|
319
276
|
HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
|
320
277
|
HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
|
321
278
|
HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
|
322
|
-
|
279
|
+
BIT_reloadDStream(&bitD1);
|
280
|
+
BIT_reloadDStream(&bitD2);
|
281
|
+
BIT_reloadDStream(&bitD3);
|
282
|
+
BIT_reloadDStream(&bitD4);
|
323
283
|
}
|
324
284
|
|
325
285
|
/* check corruption */
|
286
|
+
/* note : should not be necessary : op# advance in lock step, and we control op4.
|
287
|
+
* but curiously, binary generated by gcc 7.2 & 7.3 with -mbmi2 runs faster when >=1 test is present */
|
326
288
|
if (op1 > opStart2) return ERROR(corruption_detected);
|
327
289
|
if (op2 > opStart3) return ERROR(corruption_detected);
|
328
290
|
if (op3 > opStart4) return ERROR(corruption_detected);
|
@@ -335,8 +297,8 @@ static size_t HUF_decompress4X2_usingDTable_internal(
|
|
335
297
|
HUF_decodeStreamX2(op4, &bitD4, oend, dt, dtLog);
|
336
298
|
|
337
299
|
/* check */
|
338
|
-
|
339
|
-
|
300
|
+
{ U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
|
301
|
+
if (!endCheck) return ERROR(corruption_detected); }
|
340
302
|
|
341
303
|
/* decoded size */
|
342
304
|
return dstSize;
|
@@ -344,30 +306,309 @@ static size_t HUF_decompress4X2_usingDTable_internal(
|
|
344
306
|
}
|
345
307
|
|
346
308
|
|
347
|
-
|
309
|
+
FORCE_INLINE_TEMPLATE U32
|
310
|
+
HUF_decodeSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DEltX4* dt, const U32 dtLog)
|
311
|
+
{
|
312
|
+
size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
|
313
|
+
memcpy(op, dt+val, 2);
|
314
|
+
BIT_skipBits(DStream, dt[val].nbBits);
|
315
|
+
return dt[val].length;
|
316
|
+
}
|
317
|
+
|
318
|
+
FORCE_INLINE_TEMPLATE U32
|
319
|
+
HUF_decodeLastSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DEltX4* dt, const U32 dtLog)
|
320
|
+
{
|
321
|
+
size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
|
322
|
+
memcpy(op, dt+val, 1);
|
323
|
+
if (dt[val].length==1) BIT_skipBits(DStream, dt[val].nbBits);
|
324
|
+
else {
|
325
|
+
if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) {
|
326
|
+
BIT_skipBits(DStream, dt[val].nbBits);
|
327
|
+
if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8))
|
328
|
+
/* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */
|
329
|
+
DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8);
|
330
|
+
} }
|
331
|
+
return 1;
|
332
|
+
}
|
333
|
+
|
334
|
+
#define HUF_DECODE_SYMBOLX4_0(ptr, DStreamPtr) \
|
335
|
+
ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
|
336
|
+
|
337
|
+
#define HUF_DECODE_SYMBOLX4_1(ptr, DStreamPtr) \
|
338
|
+
if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
|
339
|
+
ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
|
340
|
+
|
341
|
+
#define HUF_DECODE_SYMBOLX4_2(ptr, DStreamPtr) \
|
342
|
+
if (MEM_64bits()) \
|
343
|
+
ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
|
344
|
+
|
345
|
+
HINT_INLINE size_t
|
346
|
+
HUF_decodeStreamX4(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd,
|
347
|
+
const HUF_DEltX4* const dt, const U32 dtLog)
|
348
|
+
{
|
349
|
+
BYTE* const pStart = p;
|
350
|
+
|
351
|
+
/* up to 8 symbols at a time */
|
352
|
+
while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-(sizeof(bitDPtr->bitContainer)-1))) {
|
353
|
+
HUF_DECODE_SYMBOLX4_2(p, bitDPtr);
|
354
|
+
HUF_DECODE_SYMBOLX4_1(p, bitDPtr);
|
355
|
+
HUF_DECODE_SYMBOLX4_2(p, bitDPtr);
|
356
|
+
HUF_DECODE_SYMBOLX4_0(p, bitDPtr);
|
357
|
+
}
|
358
|
+
|
359
|
+
/* closer to end : up to 2 symbols at a time */
|
360
|
+
while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p <= pEnd-2))
|
361
|
+
HUF_DECODE_SYMBOLX4_0(p, bitDPtr);
|
362
|
+
|
363
|
+
while (p <= pEnd-2)
|
364
|
+
HUF_DECODE_SYMBOLX4_0(p, bitDPtr); /* no need to reload : reached the end of DStream */
|
365
|
+
|
366
|
+
if (p < pEnd)
|
367
|
+
p += HUF_decodeLastSymbolX4(p, bitDPtr, dt, dtLog);
|
368
|
+
|
369
|
+
return p-pStart;
|
370
|
+
}
|
371
|
+
|
372
|
+
FORCE_INLINE_TEMPLATE size_t
|
373
|
+
HUF_decompress1X4_usingDTable_internal_body(
|
374
|
+
void* dst, size_t dstSize,
|
375
|
+
const void* cSrc, size_t cSrcSize,
|
376
|
+
const HUF_DTable* DTable)
|
377
|
+
{
|
378
|
+
BIT_DStream_t bitD;
|
379
|
+
|
380
|
+
/* Init */
|
381
|
+
CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) );
|
382
|
+
|
383
|
+
/* decode */
|
384
|
+
{ BYTE* const ostart = (BYTE*) dst;
|
385
|
+
BYTE* const oend = ostart + dstSize;
|
386
|
+
const void* const dtPtr = DTable+1; /* force compiler to not use strict-aliasing */
|
387
|
+
const HUF_DEltX4* const dt = (const HUF_DEltX4*)dtPtr;
|
388
|
+
DTableDesc const dtd = HUF_getDTableDesc(DTable);
|
389
|
+
HUF_decodeStreamX4(ostart, &bitD, oend, dt, dtd.tableLog);
|
390
|
+
}
|
391
|
+
|
392
|
+
/* check */
|
393
|
+
if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
|
394
|
+
|
395
|
+
/* decoded size */
|
396
|
+
return dstSize;
|
397
|
+
}
|
398
|
+
|
399
|
+
|
400
|
+
FORCE_INLINE_TEMPLATE size_t
|
401
|
+
HUF_decompress4X4_usingDTable_internal_body(
|
402
|
+
void* dst, size_t dstSize,
|
403
|
+
const void* cSrc, size_t cSrcSize,
|
404
|
+
const HUF_DTable* DTable)
|
405
|
+
{
|
406
|
+
if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */
|
407
|
+
|
408
|
+
{ const BYTE* const istart = (const BYTE*) cSrc;
|
409
|
+
BYTE* const ostart = (BYTE*) dst;
|
410
|
+
BYTE* const oend = ostart + dstSize;
|
411
|
+
const void* const dtPtr = DTable+1;
|
412
|
+
const HUF_DEltX4* const dt = (const HUF_DEltX4*)dtPtr;
|
413
|
+
|
414
|
+
/* Init */
|
415
|
+
BIT_DStream_t bitD1;
|
416
|
+
BIT_DStream_t bitD2;
|
417
|
+
BIT_DStream_t bitD3;
|
418
|
+
BIT_DStream_t bitD4;
|
419
|
+
size_t const length1 = MEM_readLE16(istart);
|
420
|
+
size_t const length2 = MEM_readLE16(istart+2);
|
421
|
+
size_t const length3 = MEM_readLE16(istart+4);
|
422
|
+
size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
|
423
|
+
const BYTE* const istart1 = istart + 6; /* jumpTable */
|
424
|
+
const BYTE* const istart2 = istart1 + length1;
|
425
|
+
const BYTE* const istart3 = istart2 + length2;
|
426
|
+
const BYTE* const istart4 = istart3 + length3;
|
427
|
+
size_t const segmentSize = (dstSize+3) / 4;
|
428
|
+
BYTE* const opStart2 = ostart + segmentSize;
|
429
|
+
BYTE* const opStart3 = opStart2 + segmentSize;
|
430
|
+
BYTE* const opStart4 = opStart3 + segmentSize;
|
431
|
+
BYTE* op1 = ostart;
|
432
|
+
BYTE* op2 = opStart2;
|
433
|
+
BYTE* op3 = opStart3;
|
434
|
+
BYTE* op4 = opStart4;
|
435
|
+
U32 endSignal;
|
436
|
+
DTableDesc const dtd = HUF_getDTableDesc(DTable);
|
437
|
+
U32 const dtLog = dtd.tableLog;
|
438
|
+
|
439
|
+
if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
|
440
|
+
CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
|
441
|
+
CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
|
442
|
+
CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
|
443
|
+
CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
|
444
|
+
|
445
|
+
/* 16-32 symbols per loop (4-8 symbols per stream) */
|
446
|
+
endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
|
447
|
+
for ( ; (endSignal==BIT_DStream_unfinished) & (op4<(oend-(sizeof(bitD4.bitContainer)-1))) ; ) {
|
448
|
+
HUF_DECODE_SYMBOLX4_2(op1, &bitD1);
|
449
|
+
HUF_DECODE_SYMBOLX4_2(op2, &bitD2);
|
450
|
+
HUF_DECODE_SYMBOLX4_2(op3, &bitD3);
|
451
|
+
HUF_DECODE_SYMBOLX4_2(op4, &bitD4);
|
452
|
+
HUF_DECODE_SYMBOLX4_1(op1, &bitD1);
|
453
|
+
HUF_DECODE_SYMBOLX4_1(op2, &bitD2);
|
454
|
+
HUF_DECODE_SYMBOLX4_1(op3, &bitD3);
|
455
|
+
HUF_DECODE_SYMBOLX4_1(op4, &bitD4);
|
456
|
+
HUF_DECODE_SYMBOLX4_2(op1, &bitD1);
|
457
|
+
HUF_DECODE_SYMBOLX4_2(op2, &bitD2);
|
458
|
+
HUF_DECODE_SYMBOLX4_2(op3, &bitD3);
|
459
|
+
HUF_DECODE_SYMBOLX4_2(op4, &bitD4);
|
460
|
+
HUF_DECODE_SYMBOLX4_0(op1, &bitD1);
|
461
|
+
HUF_DECODE_SYMBOLX4_0(op2, &bitD2);
|
462
|
+
HUF_DECODE_SYMBOLX4_0(op3, &bitD3);
|
463
|
+
HUF_DECODE_SYMBOLX4_0(op4, &bitD4);
|
464
|
+
|
465
|
+
endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
|
466
|
+
}
|
467
|
+
|
468
|
+
/* check corruption */
|
469
|
+
if (op1 > opStart2) return ERROR(corruption_detected);
|
470
|
+
if (op2 > opStart3) return ERROR(corruption_detected);
|
471
|
+
if (op3 > opStart4) return ERROR(corruption_detected);
|
472
|
+
/* note : op4 already verified within main loop */
|
473
|
+
|
474
|
+
/* finish bitStreams one by one */
|
475
|
+
HUF_decodeStreamX4(op1, &bitD1, opStart2, dt, dtLog);
|
476
|
+
HUF_decodeStreamX4(op2, &bitD2, opStart3, dt, dtLog);
|
477
|
+
HUF_decodeStreamX4(op3, &bitD3, opStart4, dt, dtLog);
|
478
|
+
HUF_decodeStreamX4(op4, &bitD4, oend, dt, dtLog);
|
479
|
+
|
480
|
+
/* check */
|
481
|
+
{ U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
|
482
|
+
if (!endCheck) return ERROR(corruption_detected); }
|
483
|
+
|
484
|
+
/* decoded size */
|
485
|
+
return dstSize;
|
486
|
+
}
|
487
|
+
}
|
488
|
+
|
489
|
+
|
490
|
+
typedef size_t (*HUF_decompress_usingDTable_t)(void *dst, size_t dstSize,
|
491
|
+
const void *cSrc,
|
492
|
+
size_t cSrcSize,
|
493
|
+
const HUF_DTable *DTable);
|
494
|
+
#if DYNAMIC_BMI2
|
495
|
+
|
496
|
+
#define X(fn) \
|
497
|
+
\
|
498
|
+
static size_t fn##_default( \
|
499
|
+
void* dst, size_t dstSize, \
|
500
|
+
const void* cSrc, size_t cSrcSize, \
|
501
|
+
const HUF_DTable* DTable) \
|
502
|
+
{ \
|
503
|
+
return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \
|
504
|
+
} \
|
505
|
+
\
|
506
|
+
static TARGET_ATTRIBUTE("bmi2") size_t fn##_bmi2( \
|
507
|
+
void* dst, size_t dstSize, \
|
508
|
+
const void* cSrc, size_t cSrcSize, \
|
509
|
+
const HUF_DTable* DTable) \
|
510
|
+
{ \
|
511
|
+
return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \
|
512
|
+
} \
|
513
|
+
\
|
514
|
+
static size_t fn(void* dst, size_t dstSize, void const* cSrc, \
|
515
|
+
size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \
|
516
|
+
{ \
|
517
|
+
if (bmi2) { \
|
518
|
+
return fn##_bmi2(dst, dstSize, cSrc, cSrcSize, DTable); \
|
519
|
+
} \
|
520
|
+
return fn##_default(dst, dstSize, cSrc, cSrcSize, DTable); \
|
521
|
+
}
|
522
|
+
|
523
|
+
#else
|
524
|
+
|
525
|
+
#define X(fn) \
|
526
|
+
static size_t fn(void* dst, size_t dstSize, void const* cSrc, \
|
527
|
+
size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \
|
528
|
+
{ \
|
529
|
+
(void)bmi2; \
|
530
|
+
return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \
|
531
|
+
}
|
532
|
+
|
533
|
+
#endif
|
534
|
+
|
535
|
+
X(HUF_decompress1X2_usingDTable_internal)
|
536
|
+
X(HUF_decompress4X2_usingDTable_internal)
|
537
|
+
X(HUF_decompress1X4_usingDTable_internal)
|
538
|
+
X(HUF_decompress4X4_usingDTable_internal)
|
539
|
+
|
540
|
+
#undef X
|
541
|
+
|
542
|
+
|
543
|
+
size_t HUF_decompress1X2_usingDTable(
|
348
544
|
void* dst, size_t dstSize,
|
349
545
|
const void* cSrc, size_t cSrcSize,
|
350
546
|
const HUF_DTable* DTable)
|
351
547
|
{
|
352
548
|
DTableDesc dtd = HUF_getDTableDesc(DTable);
|
353
549
|
if (dtd.tableType != 0) return ERROR(GENERIC);
|
354
|
-
return
|
550
|
+
return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
|
355
551
|
}
|
356
552
|
|
357
|
-
|
358
|
-
size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
|
553
|
+
size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
|
359
554
|
const void* cSrc, size_t cSrcSize,
|
360
555
|
void* workSpace, size_t wkspSize)
|
361
556
|
{
|
362
557
|
const BYTE* ip = (const BYTE*) cSrc;
|
363
558
|
|
559
|
+
size_t const hSize = HUF_readDTableX2_wksp(DCtx, cSrc, cSrcSize, workSpace, wkspSize);
|
560
|
+
if (HUF_isError(hSize)) return hSize;
|
561
|
+
if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
|
562
|
+
ip += hSize; cSrcSize -= hSize;
|
563
|
+
|
564
|
+
return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0);
|
565
|
+
}
|
566
|
+
|
567
|
+
|
568
|
+
size_t HUF_decompress1X2_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
|
569
|
+
const void* cSrc, size_t cSrcSize)
|
570
|
+
{
|
571
|
+
U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
|
572
|
+
return HUF_decompress1X2_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
|
573
|
+
workSpace, sizeof(workSpace));
|
574
|
+
}
|
575
|
+
|
576
|
+
size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
|
577
|
+
{
|
578
|
+
HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
|
579
|
+
return HUF_decompress1X2_DCtx (DTable, dst, dstSize, cSrc, cSrcSize);
|
580
|
+
}
|
581
|
+
|
582
|
+
size_t HUF_decompress4X2_usingDTable(
|
583
|
+
void* dst, size_t dstSize,
|
584
|
+
const void* cSrc, size_t cSrcSize,
|
585
|
+
const HUF_DTable* DTable)
|
586
|
+
{
|
587
|
+
DTableDesc dtd = HUF_getDTableDesc(DTable);
|
588
|
+
if (dtd.tableType != 0) return ERROR(GENERIC);
|
589
|
+
return HUF_decompress4X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
|
590
|
+
}
|
591
|
+
|
592
|
+
static size_t HUF_decompress4X2_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize,
|
593
|
+
const void* cSrc, size_t cSrcSize,
|
594
|
+
void* workSpace, size_t wkspSize, int bmi2)
|
595
|
+
{
|
596
|
+
const BYTE* ip = (const BYTE*) cSrc;
|
597
|
+
|
364
598
|
size_t const hSize = HUF_readDTableX2_wksp (dctx, cSrc, cSrcSize,
|
365
599
|
workSpace, wkspSize);
|
366
600
|
if (HUF_isError(hSize)) return hSize;
|
367
601
|
if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
|
368
602
|
ip += hSize; cSrcSize -= hSize;
|
369
603
|
|
370
|
-
return HUF_decompress4X2_usingDTable_internal
|
604
|
+
return HUF_decompress4X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
|
605
|
+
}
|
606
|
+
|
607
|
+
size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
|
608
|
+
const void* cSrc, size_t cSrcSize,
|
609
|
+
void* workSpace, size_t wkspSize)
|
610
|
+
{
|
611
|
+
return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, 0);
|
371
612
|
}
|
372
613
|
|
373
614
|
|
@@ -387,8 +628,6 @@ size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cS
|
|
387
628
|
/* *************************/
|
388
629
|
/* double-symbols decoding */
|
389
630
|
/* *************************/
|
390
|
-
typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX4; /* double-symbols decoding */
|
391
|
-
|
392
631
|
typedef struct { BYTE symbol; BYTE weight; } sortedSymbol_t;
|
393
632
|
|
394
633
|
/* HUF_fillDTableX4Level2() :
|
@@ -508,10 +747,7 @@ size_t HUF_readDTableX4_wksp(HUF_DTable* DTable, const void* src,
|
|
508
747
|
weightList = (BYTE *)((U32 *)workSpace + spaceUsed32);
|
509
748
|
spaceUsed32 += HUF_ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2;
|
510
749
|
|
511
|
-
if ((spaceUsed32 << 2) > wkspSize)
|
512
|
-
return ERROR(tableLog_tooLarge);
|
513
|
-
workSpace = (U32 *)workSpace + spaceUsed32;
|
514
|
-
wkspSize -= (spaceUsed32 << 2);
|
750
|
+
if ((spaceUsed32 << 2) > wkspSize) return ERROR(tableLog_tooLarge);
|
515
751
|
|
516
752
|
rankStart = rankStart0 + 1;
|
517
753
|
memset(rankStats, 0, sizeof(U32) * (2 * HUF_TABLELOG_MAX + 2 + 1));
|
@@ -588,95 +824,6 @@ size_t HUF_readDTableX4(HUF_DTable* DTable, const void* src, size_t srcSize)
|
|
588
824
|
workSpace, sizeof(workSpace));
|
589
825
|
}
|
590
826
|
|
591
|
-
static U32 HUF_decodeSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DEltX4* dt, const U32 dtLog)
|
592
|
-
{
|
593
|
-
size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
|
594
|
-
memcpy(op, dt+val, 2);
|
595
|
-
BIT_skipBits(DStream, dt[val].nbBits);
|
596
|
-
return dt[val].length;
|
597
|
-
}
|
598
|
-
|
599
|
-
static U32 HUF_decodeLastSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DEltX4* dt, const U32 dtLog)
|
600
|
-
{
|
601
|
-
size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
|
602
|
-
memcpy(op, dt+val, 1);
|
603
|
-
if (dt[val].length==1) BIT_skipBits(DStream, dt[val].nbBits);
|
604
|
-
else {
|
605
|
-
if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) {
|
606
|
-
BIT_skipBits(DStream, dt[val].nbBits);
|
607
|
-
if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8))
|
608
|
-
/* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */
|
609
|
-
DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8);
|
610
|
-
} }
|
611
|
-
return 1;
|
612
|
-
}
|
613
|
-
|
614
|
-
|
615
|
-
#define HUF_DECODE_SYMBOLX4_0(ptr, DStreamPtr) \
|
616
|
-
ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
|
617
|
-
|
618
|
-
#define HUF_DECODE_SYMBOLX4_1(ptr, DStreamPtr) \
|
619
|
-
if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
|
620
|
-
ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
|
621
|
-
|
622
|
-
#define HUF_DECODE_SYMBOLX4_2(ptr, DStreamPtr) \
|
623
|
-
if (MEM_64bits()) \
|
624
|
-
ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
|
625
|
-
|
626
|
-
HINT_INLINE size_t HUF_decodeStreamX4(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd, const HUF_DEltX4* const dt, const U32 dtLog)
|
627
|
-
{
|
628
|
-
BYTE* const pStart = p;
|
629
|
-
|
630
|
-
/* up to 8 symbols at a time */
|
631
|
-
while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-(sizeof(bitDPtr->bitContainer)-1))) {
|
632
|
-
HUF_DECODE_SYMBOLX4_2(p, bitDPtr);
|
633
|
-
HUF_DECODE_SYMBOLX4_1(p, bitDPtr);
|
634
|
-
HUF_DECODE_SYMBOLX4_2(p, bitDPtr);
|
635
|
-
HUF_DECODE_SYMBOLX4_0(p, bitDPtr);
|
636
|
-
}
|
637
|
-
|
638
|
-
/* closer to end : up to 2 symbols at a time */
|
639
|
-
while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p <= pEnd-2))
|
640
|
-
HUF_DECODE_SYMBOLX4_0(p, bitDPtr);
|
641
|
-
|
642
|
-
while (p <= pEnd-2)
|
643
|
-
HUF_DECODE_SYMBOLX4_0(p, bitDPtr); /* no need to reload : reached the end of DStream */
|
644
|
-
|
645
|
-
if (p < pEnd)
|
646
|
-
p += HUF_decodeLastSymbolX4(p, bitDPtr, dt, dtLog);
|
647
|
-
|
648
|
-
return p-pStart;
|
649
|
-
}
|
650
|
-
|
651
|
-
|
652
|
-
static size_t HUF_decompress1X4_usingDTable_internal(
|
653
|
-
void* dst, size_t dstSize,
|
654
|
-
const void* cSrc, size_t cSrcSize,
|
655
|
-
const HUF_DTable* DTable)
|
656
|
-
{
|
657
|
-
BIT_DStream_t bitD;
|
658
|
-
|
659
|
-
/* Init */
|
660
|
-
{ size_t const errorCode = BIT_initDStream(&bitD, cSrc, cSrcSize);
|
661
|
-
if (HUF_isError(errorCode)) return errorCode;
|
662
|
-
}
|
663
|
-
|
664
|
-
/* decode */
|
665
|
-
{ BYTE* const ostart = (BYTE*) dst;
|
666
|
-
BYTE* const oend = ostart + dstSize;
|
667
|
-
const void* const dtPtr = DTable+1; /* force compiler to not use strict-aliasing */
|
668
|
-
const HUF_DEltX4* const dt = (const HUF_DEltX4*)dtPtr;
|
669
|
-
DTableDesc const dtd = HUF_getDTableDesc(DTable);
|
670
|
-
HUF_decodeStreamX4(ostart, &bitD, oend, dt, dtd.tableLog);
|
671
|
-
}
|
672
|
-
|
673
|
-
/* check */
|
674
|
-
if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
|
675
|
-
|
676
|
-
/* decoded size */
|
677
|
-
return dstSize;
|
678
|
-
}
|
679
|
-
|
680
827
|
size_t HUF_decompress1X4_usingDTable(
|
681
828
|
void* dst, size_t dstSize,
|
682
829
|
const void* cSrc, size_t cSrcSize,
|
@@ -684,7 +831,7 @@ size_t HUF_decompress1X4_usingDTable(
|
|
684
831
|
{
|
685
832
|
DTableDesc dtd = HUF_getDTableDesc(DTable);
|
686
833
|
if (dtd.tableType != 1) return ERROR(GENERIC);
|
687
|
-
return HUF_decompress1X4_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable);
|
834
|
+
return HUF_decompress1X4_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
|
688
835
|
}
|
689
836
|
|
690
837
|
size_t HUF_decompress1X4_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
|
@@ -699,7 +846,7 @@ size_t HUF_decompress1X4_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
|
|
699
846
|
if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
|
700
847
|
ip += hSize; cSrcSize -= hSize;
|
701
848
|
|
702
|
-
return HUF_decompress1X4_usingDTable_internal
|
849
|
+
return HUF_decompress1X4_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0);
|
703
850
|
}
|
704
851
|
|
705
852
|
|
@@ -717,99 +864,6 @@ size_t HUF_decompress1X4 (void* dst, size_t dstSize, const void* cSrc, size_t cS
|
|
717
864
|
return HUF_decompress1X4_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
|
718
865
|
}
|
719
866
|
|
720
|
-
static size_t HUF_decompress4X4_usingDTable_internal(
|
721
|
-
void* dst, size_t dstSize,
|
722
|
-
const void* cSrc, size_t cSrcSize,
|
723
|
-
const HUF_DTable* DTable)
|
724
|
-
{
|
725
|
-
if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */
|
726
|
-
|
727
|
-
{ const BYTE* const istart = (const BYTE*) cSrc;
|
728
|
-
BYTE* const ostart = (BYTE*) dst;
|
729
|
-
BYTE* const oend = ostart + dstSize;
|
730
|
-
const void* const dtPtr = DTable+1;
|
731
|
-
const HUF_DEltX4* const dt = (const HUF_DEltX4*)dtPtr;
|
732
|
-
|
733
|
-
/* Init */
|
734
|
-
BIT_DStream_t bitD1;
|
735
|
-
BIT_DStream_t bitD2;
|
736
|
-
BIT_DStream_t bitD3;
|
737
|
-
BIT_DStream_t bitD4;
|
738
|
-
size_t const length1 = MEM_readLE16(istart);
|
739
|
-
size_t const length2 = MEM_readLE16(istart+2);
|
740
|
-
size_t const length3 = MEM_readLE16(istart+4);
|
741
|
-
size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
|
742
|
-
const BYTE* const istart1 = istart + 6; /* jumpTable */
|
743
|
-
const BYTE* const istart2 = istart1 + length1;
|
744
|
-
const BYTE* const istart3 = istart2 + length2;
|
745
|
-
const BYTE* const istart4 = istart3 + length3;
|
746
|
-
size_t const segmentSize = (dstSize+3) / 4;
|
747
|
-
BYTE* const opStart2 = ostart + segmentSize;
|
748
|
-
BYTE* const opStart3 = opStart2 + segmentSize;
|
749
|
-
BYTE* const opStart4 = opStart3 + segmentSize;
|
750
|
-
BYTE* op1 = ostart;
|
751
|
-
BYTE* op2 = opStart2;
|
752
|
-
BYTE* op3 = opStart3;
|
753
|
-
BYTE* op4 = opStart4;
|
754
|
-
U32 endSignal;
|
755
|
-
DTableDesc const dtd = HUF_getDTableDesc(DTable);
|
756
|
-
U32 const dtLog = dtd.tableLog;
|
757
|
-
|
758
|
-
if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
|
759
|
-
{ size_t const errorCode = BIT_initDStream(&bitD1, istart1, length1);
|
760
|
-
if (HUF_isError(errorCode)) return errorCode; }
|
761
|
-
{ size_t const errorCode = BIT_initDStream(&bitD2, istart2, length2);
|
762
|
-
if (HUF_isError(errorCode)) return errorCode; }
|
763
|
-
{ size_t const errorCode = BIT_initDStream(&bitD3, istart3, length3);
|
764
|
-
if (HUF_isError(errorCode)) return errorCode; }
|
765
|
-
{ size_t const errorCode = BIT_initDStream(&bitD4, istart4, length4);
|
766
|
-
if (HUF_isError(errorCode)) return errorCode; }
|
767
|
-
|
768
|
-
/* 16-32 symbols per loop (4-8 symbols per stream) */
|
769
|
-
endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
|
770
|
-
for ( ; (endSignal==BIT_DStream_unfinished) & (op4<(oend-(sizeof(bitD4.bitContainer)-1))) ; ) {
|
771
|
-
HUF_DECODE_SYMBOLX4_2(op1, &bitD1);
|
772
|
-
HUF_DECODE_SYMBOLX4_2(op2, &bitD2);
|
773
|
-
HUF_DECODE_SYMBOLX4_2(op3, &bitD3);
|
774
|
-
HUF_DECODE_SYMBOLX4_2(op4, &bitD4);
|
775
|
-
HUF_DECODE_SYMBOLX4_1(op1, &bitD1);
|
776
|
-
HUF_DECODE_SYMBOLX4_1(op2, &bitD2);
|
777
|
-
HUF_DECODE_SYMBOLX4_1(op3, &bitD3);
|
778
|
-
HUF_DECODE_SYMBOLX4_1(op4, &bitD4);
|
779
|
-
HUF_DECODE_SYMBOLX4_2(op1, &bitD1);
|
780
|
-
HUF_DECODE_SYMBOLX4_2(op2, &bitD2);
|
781
|
-
HUF_DECODE_SYMBOLX4_2(op3, &bitD3);
|
782
|
-
HUF_DECODE_SYMBOLX4_2(op4, &bitD4);
|
783
|
-
HUF_DECODE_SYMBOLX4_0(op1, &bitD1);
|
784
|
-
HUF_DECODE_SYMBOLX4_0(op2, &bitD2);
|
785
|
-
HUF_DECODE_SYMBOLX4_0(op3, &bitD3);
|
786
|
-
HUF_DECODE_SYMBOLX4_0(op4, &bitD4);
|
787
|
-
|
788
|
-
endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
|
789
|
-
}
|
790
|
-
|
791
|
-
/* check corruption */
|
792
|
-
if (op1 > opStart2) return ERROR(corruption_detected);
|
793
|
-
if (op2 > opStart3) return ERROR(corruption_detected);
|
794
|
-
if (op3 > opStart4) return ERROR(corruption_detected);
|
795
|
-
/* note : op4 already verified within main loop */
|
796
|
-
|
797
|
-
/* finish bitStreams one by one */
|
798
|
-
HUF_decodeStreamX4(op1, &bitD1, opStart2, dt, dtLog);
|
799
|
-
HUF_decodeStreamX4(op2, &bitD2, opStart3, dt, dtLog);
|
800
|
-
HUF_decodeStreamX4(op3, &bitD3, opStart4, dt, dtLog);
|
801
|
-
HUF_decodeStreamX4(op4, &bitD4, oend, dt, dtLog);
|
802
|
-
|
803
|
-
/* check */
|
804
|
-
{ U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
|
805
|
-
if (!endCheck) return ERROR(corruption_detected); }
|
806
|
-
|
807
|
-
/* decoded size */
|
808
|
-
return dstSize;
|
809
|
-
}
|
810
|
-
}
|
811
|
-
|
812
|
-
|
813
867
|
size_t HUF_decompress4X4_usingDTable(
|
814
868
|
void* dst, size_t dstSize,
|
815
869
|
const void* cSrc, size_t cSrcSize,
|
@@ -817,13 +871,12 @@ size_t HUF_decompress4X4_usingDTable(
|
|
817
871
|
{
|
818
872
|
DTableDesc dtd = HUF_getDTableDesc(DTable);
|
819
873
|
if (dtd.tableType != 1) return ERROR(GENERIC);
|
820
|
-
return HUF_decompress4X4_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable);
|
874
|
+
return HUF_decompress4X4_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
|
821
875
|
}
|
822
876
|
|
823
|
-
|
824
|
-
size_t HUF_decompress4X4_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
|
877
|
+
static size_t HUF_decompress4X4_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize,
|
825
878
|
const void* cSrc, size_t cSrcSize,
|
826
|
-
void* workSpace, size_t wkspSize)
|
879
|
+
void* workSpace, size_t wkspSize, int bmi2)
|
827
880
|
{
|
828
881
|
const BYTE* ip = (const BYTE*) cSrc;
|
829
882
|
|
@@ -833,7 +886,14 @@ size_t HUF_decompress4X4_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
|
|
833
886
|
if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
|
834
887
|
ip += hSize; cSrcSize -= hSize;
|
835
888
|
|
836
|
-
return HUF_decompress4X4_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx);
|
889
|
+
return HUF_decompress4X4_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
|
890
|
+
}
|
891
|
+
|
892
|
+
size_t HUF_decompress4X4_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
|
893
|
+
const void* cSrc, size_t cSrcSize,
|
894
|
+
void* workSpace, size_t wkspSize)
|
895
|
+
{
|
896
|
+
return HUF_decompress4X4_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, /* bmi2 */ 0);
|
837
897
|
}
|
838
898
|
|
839
899
|
|
@@ -861,8 +921,8 @@ size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize,
|
|
861
921
|
const HUF_DTable* DTable)
|
862
922
|
{
|
863
923
|
DTableDesc const dtd = HUF_getDTableDesc(DTable);
|
864
|
-
return dtd.tableType ? HUF_decompress1X4_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable) :
|
865
|
-
HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable);
|
924
|
+
return dtd.tableType ? HUF_decompress1X4_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
|
925
|
+
HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
|
866
926
|
}
|
867
927
|
|
868
928
|
size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize,
|
@@ -870,8 +930,8 @@ size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize,
|
|
870
930
|
const HUF_DTable* DTable)
|
871
931
|
{
|
872
932
|
DTableDesc const dtd = HUF_getDTableDesc(DTable);
|
873
|
-
return dtd.tableType ? HUF_decompress4X4_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable) :
|
874
|
-
HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable);
|
933
|
+
return dtd.tableType ? HUF_decompress4X4_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
|
934
|
+
HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
|
875
935
|
}
|
876
936
|
|
877
937
|
|
@@ -898,21 +958,22 @@ static const algo_time_t algoTime[16 /* Quantization */][3 /* single, double, qu
|
|
898
958
|
};
|
899
959
|
|
900
960
|
/** HUF_selectDecoder() :
|
901
|
-
*
|
902
|
-
*
|
903
|
-
*
|
904
|
-
*
|
961
|
+
* Tells which decoder is likely to decode faster,
|
962
|
+
* based on a set of pre-computed metrics.
|
963
|
+
* @return : 0==HUF_decompress4X2, 1==HUF_decompress4X4 .
|
964
|
+
* Assumption : 0 < dstSize <= 128 KB */
|
905
965
|
U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize)
|
906
966
|
{
|
967
|
+
assert(dstSize > 0);
|
968
|
+
assert(dstSize <= 128 KB);
|
907
969
|
/* decoder timing evaluation */
|
908
|
-
U32 const Q = cSrcSize >= dstSize ? 15 : (U32)(cSrcSize * 16 / dstSize); /* Q < 16 */
|
909
|
-
|
910
|
-
|
911
|
-
|
912
|
-
|
913
|
-
|
914
|
-
|
915
|
-
}
|
970
|
+
{ U32 const Q = (cSrcSize >= dstSize) ? 15 : (U32)(cSrcSize * 16 / dstSize); /* Q < 16 */
|
971
|
+
U32 const D256 = (U32)(dstSize >> 8);
|
972
|
+
U32 const DTime0 = algoTime[Q][0].tableTime + (algoTime[Q][0].decode256Time * D256);
|
973
|
+
U32 DTime1 = algoTime[Q][1].tableTime + (algoTime[Q][1].decode256Time * D256);
|
974
|
+
DTime1 += DTime1 >> 3; /* advantage to algorithm using less memory, to reduce cache eviction */
|
975
|
+
return DTime1 < DTime0;
|
976
|
+
} }
|
916
977
|
|
917
978
|
|
918
979
|
typedef size_t (*decompressionAlgo)(void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);
|
@@ -994,3 +1055,42 @@ size_t HUF_decompress1X_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,
|
|
994
1055
|
return HUF_decompress1X_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
|
995
1056
|
workSpace, sizeof(workSpace));
|
996
1057
|
}
|
1058
|
+
|
1059
|
+
|
1060
|
+
size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2)
|
1061
|
+
{
|
1062
|
+
DTableDesc const dtd = HUF_getDTableDesc(DTable);
|
1063
|
+
return dtd.tableType ? HUF_decompress1X4_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) :
|
1064
|
+
HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
|
1065
|
+
}
|
1066
|
+
|
1067
|
+
size_t HUF_decompress1X2_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2)
|
1068
|
+
{
|
1069
|
+
const BYTE* ip = (const BYTE*) cSrc;
|
1070
|
+
|
1071
|
+
size_t const hSize = HUF_readDTableX2_wksp(dctx, cSrc, cSrcSize, workSpace, wkspSize);
|
1072
|
+
if (HUF_isError(hSize)) return hSize;
|
1073
|
+
if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
|
1074
|
+
ip += hSize; cSrcSize -= hSize;
|
1075
|
+
|
1076
|
+
return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
|
1077
|
+
}
|
1078
|
+
|
1079
|
+
size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2)
|
1080
|
+
{
|
1081
|
+
DTableDesc const dtd = HUF_getDTableDesc(DTable);
|
1082
|
+
return dtd.tableType ? HUF_decompress4X4_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) :
|
1083
|
+
HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
|
1084
|
+
}
|
1085
|
+
|
1086
|
+
size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2)
|
1087
|
+
{
|
1088
|
+
/* validation checks */
|
1089
|
+
if (dstSize == 0) return ERROR(dstSize_tooSmall);
|
1090
|
+
if (cSrcSize == 0) return ERROR(corruption_detected);
|
1091
|
+
|
1092
|
+
{ U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
|
1093
|
+
return algoNb ? HUF_decompress4X4_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2) :
|
1094
|
+
HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
|
1095
|
+
}
|
1096
|
+
}
|