zstd-ruby 1.3.3.0 → 1.3.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/README.md +1 -1
- data/ext/zstdruby/libzstd/BUCK +13 -0
- data/ext/zstdruby/libzstd/README.md +32 -25
- data/ext/zstdruby/libzstd/common/bitstream.h +1 -1
- data/ext/zstdruby/libzstd/common/compiler.h +25 -0
- data/ext/zstdruby/libzstd/common/cpu.h +216 -0
- data/ext/zstdruby/libzstd/common/error_private.c +1 -0
- data/ext/zstdruby/libzstd/common/fse.h +1 -1
- data/ext/zstdruby/libzstd/common/fse_decompress.c +2 -2
- data/ext/zstdruby/libzstd/common/huf.h +114 -89
- data/ext/zstdruby/libzstd/common/pool.c +46 -17
- data/ext/zstdruby/libzstd/common/pool.h +18 -9
- data/ext/zstdruby/libzstd/common/threading.h +12 -12
- data/ext/zstdruby/libzstd/common/zstd_errors.h +16 -7
- data/ext/zstdruby/libzstd/common/zstd_internal.h +4 -5
- data/ext/zstdruby/libzstd/compress/fse_compress.c +19 -11
- data/ext/zstdruby/libzstd/compress/huf_compress.c +160 -62
- data/ext/zstdruby/libzstd/compress/zstd_compress.c +973 -644
- data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +281 -34
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +80 -62
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +11 -4
- data/ext/zstdruby/libzstd/compress/zstd_fast.c +87 -71
- data/ext/zstdruby/libzstd/compress/zstd_fast.h +10 -6
- data/ext/zstdruby/libzstd/compress/zstd_lazy.c +333 -274
- data/ext/zstdruby/libzstd/compress/zstd_lazy.h +33 -16
- data/ext/zstdruby/libzstd/compress/zstd_ldm.c +305 -359
- data/ext/zstdruby/libzstd/compress/zstd_ldm.h +64 -21
- data/ext/zstdruby/libzstd/compress/zstd_opt.c +194 -56
- data/ext/zstdruby/libzstd/compress/zstd_opt.h +17 -5
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +1131 -449
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +32 -16
- data/ext/zstdruby/libzstd/decompress/huf_decompress.c +390 -290
- data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +777 -439
- data/ext/zstdruby/libzstd/dictBuilder/cover.c +11 -8
- data/ext/zstdruby/libzstd/dictBuilder/zdict.c +83 -50
- data/ext/zstdruby/libzstd/dictBuilder/zdict.h +44 -43
- data/ext/zstdruby/libzstd/legacy/zstd_legacy.h +2 -0
- data/ext/zstdruby/libzstd/legacy/zstd_v04.c +42 -118
- data/ext/zstdruby/libzstd/legacy/zstd_v06.c +2 -2
- data/ext/zstdruby/libzstd/legacy/zstd_v07.c +2 -2
- data/ext/zstdruby/libzstd/zstd.h +254 -254
- data/lib/zstd-ruby/version.rb +1 -1
- metadata +4 -3
@@ -30,15 +30,15 @@
|
|
30
30
|
|
31
31
|
/* === Memory management === */
|
32
32
|
typedef struct ZSTDMT_CCtx_s ZSTDMT_CCtx;
|
33
|
-
ZSTDLIB_API ZSTDMT_CCtx* ZSTDMT_createCCtx(unsigned
|
34
|
-
ZSTDLIB_API ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(unsigned
|
33
|
+
ZSTDLIB_API ZSTDMT_CCtx* ZSTDMT_createCCtx(unsigned nbWorkers);
|
34
|
+
ZSTDLIB_API ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(unsigned nbWorkers,
|
35
35
|
ZSTD_customMem cMem);
|
36
36
|
ZSTDLIB_API size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* mtctx);
|
37
37
|
|
38
38
|
ZSTDLIB_API size_t ZSTDMT_sizeof_CCtx(ZSTDMT_CCtx* mtctx);
|
39
39
|
|
40
40
|
|
41
|
-
/* === Simple
|
41
|
+
/* === Simple one-pass compression function === */
|
42
42
|
|
43
43
|
ZSTDLIB_API size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx,
|
44
44
|
void* dst, size_t dstCapacity,
|
@@ -50,7 +50,7 @@ ZSTDLIB_API size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx,
|
|
50
50
|
/* === Streaming functions === */
|
51
51
|
|
52
52
|
ZSTDLIB_API size_t ZSTDMT_initCStream(ZSTDMT_CCtx* mtctx, int compressionLevel);
|
53
|
-
ZSTDLIB_API size_t ZSTDMT_resetCStream(ZSTDMT_CCtx* mtctx, unsigned long long pledgedSrcSize); /**< if srcSize is not known at reset time, use ZSTD_CONTENTSIZE_UNKNOWN. Note: for compatibility with older programs, 0 means the same as ZSTD_CONTENTSIZE_UNKNOWN, but it
|
53
|
+
ZSTDLIB_API size_t ZSTDMT_resetCStream(ZSTDMT_CCtx* mtctx, unsigned long long pledgedSrcSize); /**< if srcSize is not known at reset time, use ZSTD_CONTENTSIZE_UNKNOWN. Note: for compatibility with older programs, 0 means the same as ZSTD_CONTENTSIZE_UNKNOWN, but it will change in the future to mean "empty" */
|
54
54
|
|
55
55
|
ZSTDLIB_API size_t ZSTDMT_compressStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, ZSTD_inBuffer* input);
|
56
56
|
|
@@ -68,7 +68,7 @@ ZSTDLIB_API size_t ZSTDMT_compress_advanced(ZSTDMT_CCtx* mtctx,
|
|
68
68
|
void* dst, size_t dstCapacity,
|
69
69
|
const void* src, size_t srcSize,
|
70
70
|
const ZSTD_CDict* cdict,
|
71
|
-
ZSTD_parameters
|
71
|
+
ZSTD_parameters params,
|
72
72
|
unsigned overlapLog);
|
73
73
|
|
74
74
|
ZSTDLIB_API size_t ZSTDMT_initCStream_advanced(ZSTDMT_CCtx* mtctx,
|
@@ -85,7 +85,7 @@ ZSTDLIB_API size_t ZSTDMT_initCStream_usingCDict(ZSTDMT_CCtx* mtctx,
|
|
85
85
|
* List of parameters that can be set using ZSTDMT_setMTCtxParameter() */
|
86
86
|
typedef enum {
|
87
87
|
ZSTDMT_p_jobSize, /* Each job is compressed in parallel. By default, this value is dynamically determined depending on compression parameters. Can be set explicitly here. */
|
88
|
-
ZSTDMT_p_overlapSectionLog /* Each job may reload a part of previous job to enhance compressionr ratio; 0 == no overlap, 6(default) == use 1/8th of window, >=9 == use full window */
|
88
|
+
ZSTDMT_p_overlapSectionLog /* Each job may reload a part of previous job to enhance compressionr ratio; 0 == no overlap, 6(default) == use 1/8th of window, >=9 == use full window. This is a "sticky" parameter : its value will be re-used on next compression job */
|
89
89
|
} ZSTDMT_parameter;
|
90
90
|
|
91
91
|
/* ZSTDMT_setMTCtxParameter() :
|
@@ -97,30 +97,46 @@ ZSTDLIB_API size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter
|
|
97
97
|
|
98
98
|
|
99
99
|
/*! ZSTDMT_compressStream_generic() :
|
100
|
-
* Combines ZSTDMT_compressStream() with ZSTDMT_flushStream() or ZSTDMT_endStream()
|
100
|
+
* Combines ZSTDMT_compressStream() with optional ZSTDMT_flushStream() or ZSTDMT_endStream()
|
101
101
|
* depending on flush directive.
|
102
102
|
* @return : minimum amount of data still to be flushed
|
103
103
|
* 0 if fully flushed
|
104
|
-
* or an error code
|
104
|
+
* or an error code
|
105
|
+
* note : needs to be init using any ZSTD_initCStream*() variant */
|
105
106
|
ZSTDLIB_API size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
|
106
107
|
ZSTD_outBuffer* output,
|
107
108
|
ZSTD_inBuffer* input,
|
108
109
|
ZSTD_EndDirective endOp);
|
109
110
|
|
110
111
|
|
111
|
-
/*
|
112
|
+
/* ========================================================
|
113
|
+
* === Private interface, for use by ZSTD_compress.c ===
|
114
|
+
* === Not exposed in libzstd. Never invoke directly ===
|
115
|
+
* ======================================================== */
|
112
116
|
|
113
117
|
size_t ZSTDMT_CCtxParam_setMTCtxParameter(ZSTD_CCtx_params* params, ZSTDMT_parameter parameter, unsigned value);
|
114
118
|
|
115
|
-
/*
|
116
|
-
* Set
|
117
|
-
*
|
118
|
-
size_t
|
119
|
+
/* ZSTDMT_CCtxParam_setNbWorkers()
|
120
|
+
* Set nbWorkers, and clamp it.
|
121
|
+
* Also reset jobSize and overlapLog */
|
122
|
+
size_t ZSTDMT_CCtxParam_setNbWorkers(ZSTD_CCtx_params* params, unsigned nbWorkers);
|
119
123
|
|
120
|
-
|
124
|
+
/*! ZSTDMT_updateCParams_whileCompressing() :
|
125
|
+
* Updates only a selected set of compression parameters, to remain compatible with current frame.
|
126
|
+
* New parameters will be applied to next compression job. */
|
127
|
+
void ZSTDMT_updateCParams_whileCompressing(ZSTDMT_CCtx* mtctx, const ZSTD_CCtx_params* cctxParams);
|
128
|
+
|
129
|
+
/* ZSTDMT_getNbWorkers():
|
121
130
|
* @return nb threads currently active in mtctx.
|
122
131
|
* mtctx must be valid */
|
123
|
-
|
132
|
+
unsigned ZSTDMT_getNbWorkers(const ZSTDMT_CCtx* mtctx);
|
133
|
+
|
134
|
+
/* ZSTDMT_getFrameProgression():
|
135
|
+
* tells how much data has been consumed (input) and produced (output) for current frame.
|
136
|
+
* able to count progression inside worker threads.
|
137
|
+
*/
|
138
|
+
ZSTD_frameProgression ZSTDMT_getFrameProgression(ZSTDMT_CCtx* mtctx);
|
139
|
+
|
124
140
|
|
125
141
|
/*! ZSTDMT_initCStream_internal() :
|
126
142
|
* Private use only. Init streaming operation.
|
@@ -128,7 +144,7 @@ size_t ZSTDMT_getNbThreads(const ZSTDMT_CCtx* mtctx);
|
|
128
144
|
* must receive dict, or cdict, or none, but not both.
|
129
145
|
* @return : 0, or an error code */
|
130
146
|
size_t ZSTDMT_initCStream_internal(ZSTDMT_CCtx* zcs,
|
131
|
-
const void* dict, size_t dictSize,
|
147
|
+
const void* dict, size_t dictSize, ZSTD_dictContentType_e dictContentType,
|
132
148
|
const ZSTD_CDict* cdict,
|
133
149
|
ZSTD_CCtx_params params, unsigned long long pledgedSrcSize);
|
134
150
|
|
@@ -49,18 +49,19 @@
|
|
49
49
|
****************************************************************/
|
50
50
|
#define HUF_isError ERR_isError
|
51
51
|
#define HUF_STATIC_ASSERT(c) { enum { HUF_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */
|
52
|
+
#define CHECK_F(f) { size_t const err_ = (f); if (HUF_isError(err_)) return err_; }
|
52
53
|
|
53
54
|
|
54
55
|
/* **************************************************************
|
55
56
|
* Byte alignment for workSpace management
|
56
57
|
****************************************************************/
|
57
|
-
#define HUF_ALIGN(x, a)
|
58
|
+
#define HUF_ALIGN(x, a) HUF_ALIGN_MASK((x), (a) - 1)
|
58
59
|
#define HUF_ALIGN_MASK(x, mask) (((x) + (mask)) & ~(mask))
|
59
60
|
|
61
|
+
|
60
62
|
/*-***************************/
|
61
63
|
/* generic DTableDesc */
|
62
64
|
/*-***************************/
|
63
|
-
|
64
65
|
typedef struct { BYTE maxTableLog; BYTE tableType; BYTE tableLog; BYTE reserved; } DTableDesc;
|
65
66
|
|
66
67
|
static DTableDesc HUF_getDTableDesc(const HUF_DTable* table)
|
@@ -74,7 +75,6 @@ static DTableDesc HUF_getDTableDesc(const HUF_DTable* table)
|
|
74
75
|
/*-***************************/
|
75
76
|
/* single-symbol decoding */
|
76
77
|
/*-***************************/
|
77
|
-
|
78
78
|
typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX2; /* single-symbol decoding */
|
79
79
|
|
80
80
|
size_t HUF_readDTableX2_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize)
|
@@ -94,10 +94,7 @@ size_t HUF_readDTableX2_wksp(HUF_DTable* DTable, const void* src, size_t srcSize
|
|
94
94
|
huffWeight = (BYTE *)((U32 *)workSpace + spaceUsed32);
|
95
95
|
spaceUsed32 += HUF_ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2;
|
96
96
|
|
97
|
-
if ((spaceUsed32 << 2) > wkspSize)
|
98
|
-
return ERROR(tableLog_tooLarge);
|
99
|
-
workSpace = (U32 *)workSpace + spaceUsed32;
|
100
|
-
wkspSize -= (spaceUsed32 << 2);
|
97
|
+
if ((spaceUsed32 << 2) > wkspSize) return ERROR(tableLog_tooLarge);
|
101
98
|
|
102
99
|
HUF_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable));
|
103
100
|
/* memset(huffWeight, 0, sizeof(huffWeight)); */ /* is not necessary, even though some analyzer complain ... */
|
@@ -144,8 +141,10 @@ size_t HUF_readDTableX2(HUF_DTable* DTable, const void* src, size_t srcSize)
|
|
144
141
|
workSpace, sizeof(workSpace));
|
145
142
|
}
|
146
143
|
|
144
|
+
typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX4; /* double-symbols decoding */
|
147
145
|
|
148
|
-
|
146
|
+
FORCE_INLINE_TEMPLATE BYTE
|
147
|
+
HUF_decodeSymbolX2(BIT_DStream_t* Dstream, const HUF_DEltX2* dt, const U32 dtLog)
|
149
148
|
{
|
150
149
|
size_t const val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */
|
151
150
|
BYTE const c = dt[val].byte;
|
@@ -156,7 +155,7 @@ static BYTE HUF_decodeSymbolX2(BIT_DStream_t* Dstream, const HUF_DEltX2* dt, con
|
|
156
155
|
#define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \
|
157
156
|
*ptr++ = HUF_decodeSymbolX2(DStreamPtr, dt, dtLog)
|
158
157
|
|
159
|
-
#define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr)
|
158
|
+
#define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \
|
160
159
|
if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
|
161
160
|
HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr)
|
162
161
|
|
@@ -164,30 +163,33 @@ static BYTE HUF_decodeSymbolX2(BIT_DStream_t* Dstream, const HUF_DEltX2* dt, con
|
|
164
163
|
if (MEM_64bits()) \
|
165
164
|
HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr)
|
166
165
|
|
167
|
-
HINT_INLINE size_t
|
166
|
+
HINT_INLINE size_t
|
167
|
+
HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX2* const dt, const U32 dtLog)
|
168
168
|
{
|
169
169
|
BYTE* const pStart = p;
|
170
170
|
|
171
171
|
/* up to 4 symbols at a time */
|
172
|
-
while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished)
|
172
|
+
while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-3)) {
|
173
173
|
HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
|
174
174
|
HUF_DECODE_SYMBOLX2_1(p, bitDPtr);
|
175
175
|
HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
|
176
176
|
HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
|
177
177
|
}
|
178
178
|
|
179
|
-
/*
|
180
|
-
|
181
|
-
|
179
|
+
/* [0-3] symbols remaining */
|
180
|
+
if (MEM_32bits())
|
181
|
+
while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd))
|
182
|
+
HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
|
182
183
|
|
183
|
-
/* no more data to retrieve from bitstream,
|
184
|
+
/* no more data to retrieve from bitstream, no need to reload */
|
184
185
|
while (p < pEnd)
|
185
186
|
HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
|
186
187
|
|
187
188
|
return pEnd-pStart;
|
188
189
|
}
|
189
190
|
|
190
|
-
|
191
|
+
FORCE_INLINE_TEMPLATE size_t
|
192
|
+
HUF_decompress1X2_usingDTable_internal_body(
|
191
193
|
void* dst, size_t dstSize,
|
192
194
|
const void* cSrc, size_t cSrcSize,
|
193
195
|
const HUF_DTable* DTable)
|
@@ -200,58 +202,17 @@ static size_t HUF_decompress1X2_usingDTable_internal(
|
|
200
202
|
DTableDesc const dtd = HUF_getDTableDesc(DTable);
|
201
203
|
U32 const dtLog = dtd.tableLog;
|
202
204
|
|
203
|
-
|
204
|
-
if (HUF_isError(errorCode)) return errorCode; }
|
205
|
+
CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) );
|
205
206
|
|
206
207
|
HUF_decodeStreamX2(op, &bitD, oend, dt, dtLog);
|
207
208
|
|
208
|
-
/* check */
|
209
209
|
if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
|
210
210
|
|
211
211
|
return dstSize;
|
212
212
|
}
|
213
213
|
|
214
|
-
size_t
|
215
|
-
|
216
|
-
const void* cSrc, size_t cSrcSize,
|
217
|
-
const HUF_DTable* DTable)
|
218
|
-
{
|
219
|
-
DTableDesc dtd = HUF_getDTableDesc(DTable);
|
220
|
-
if (dtd.tableType != 0) return ERROR(GENERIC);
|
221
|
-
return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable);
|
222
|
-
}
|
223
|
-
|
224
|
-
size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
|
225
|
-
const void* cSrc, size_t cSrcSize,
|
226
|
-
void* workSpace, size_t wkspSize)
|
227
|
-
{
|
228
|
-
const BYTE* ip = (const BYTE*) cSrc;
|
229
|
-
|
230
|
-
size_t const hSize = HUF_readDTableX2_wksp(DCtx, cSrc, cSrcSize, workSpace, wkspSize);
|
231
|
-
if (HUF_isError(hSize)) return hSize;
|
232
|
-
if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
|
233
|
-
ip += hSize; cSrcSize -= hSize;
|
234
|
-
|
235
|
-
return HUF_decompress1X2_usingDTable_internal (dst, dstSize, ip, cSrcSize, DCtx);
|
236
|
-
}
|
237
|
-
|
238
|
-
|
239
|
-
size_t HUF_decompress1X2_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
|
240
|
-
const void* cSrc, size_t cSrcSize)
|
241
|
-
{
|
242
|
-
U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
|
243
|
-
return HUF_decompress1X2_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
|
244
|
-
workSpace, sizeof(workSpace));
|
245
|
-
}
|
246
|
-
|
247
|
-
size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
|
248
|
-
{
|
249
|
-
HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
|
250
|
-
return HUF_decompress1X2_DCtx (DTable, dst, dstSize, cSrc, cSrcSize);
|
251
|
-
}
|
252
|
-
|
253
|
-
|
254
|
-
static size_t HUF_decompress4X2_usingDTable_internal(
|
214
|
+
FORCE_INLINE_TEMPLATE size_t
|
215
|
+
HUF_decompress4X2_usingDTable_internal_body(
|
255
216
|
void* dst, size_t dstSize,
|
256
217
|
const void* cSrc, size_t cSrcSize,
|
257
218
|
const HUF_DTable* DTable)
|
@@ -286,23 +247,19 @@ static size_t HUF_decompress4X2_usingDTable_internal(
|
|
286
247
|
BYTE* op2 = opStart2;
|
287
248
|
BYTE* op3 = opStart3;
|
288
249
|
BYTE* op4 = opStart4;
|
289
|
-
U32 endSignal;
|
250
|
+
U32 endSignal = BIT_DStream_unfinished;
|
290
251
|
DTableDesc const dtd = HUF_getDTableDesc(DTable);
|
291
252
|
U32 const dtLog = dtd.tableLog;
|
292
253
|
|
293
254
|
if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
{ size_t const errorCode = BIT_initDStream(&bitD3, istart3, length3);
|
299
|
-
if (HUF_isError(errorCode)) return errorCode; }
|
300
|
-
{ size_t const errorCode = BIT_initDStream(&bitD4, istart4, length4);
|
301
|
-
if (HUF_isError(errorCode)) return errorCode; }
|
255
|
+
CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
|
256
|
+
CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
|
257
|
+
CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
|
258
|
+
CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
|
302
259
|
|
303
|
-
/* 16
|
260
|
+
/* up to 16 symbols per loop (4 symbols per stream) in 64-bit mode */
|
304
261
|
endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
|
305
|
-
|
262
|
+
while ( (endSignal==BIT_DStream_unfinished) && (op4<(oend-3)) ) {
|
306
263
|
HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
|
307
264
|
HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
|
308
265
|
HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
|
@@ -319,10 +276,15 @@ static size_t HUF_decompress4X2_usingDTable_internal(
|
|
319
276
|
HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
|
320
277
|
HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
|
321
278
|
HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
|
322
|
-
|
279
|
+
BIT_reloadDStream(&bitD1);
|
280
|
+
BIT_reloadDStream(&bitD2);
|
281
|
+
BIT_reloadDStream(&bitD3);
|
282
|
+
BIT_reloadDStream(&bitD4);
|
323
283
|
}
|
324
284
|
|
325
285
|
/* check corruption */
|
286
|
+
/* note : should not be necessary : op# advance in lock step, and we control op4.
|
287
|
+
* but curiously, binary generated by gcc 7.2 & 7.3 with -mbmi2 runs faster when >=1 test is present */
|
326
288
|
if (op1 > opStart2) return ERROR(corruption_detected);
|
327
289
|
if (op2 > opStart3) return ERROR(corruption_detected);
|
328
290
|
if (op3 > opStart4) return ERROR(corruption_detected);
|
@@ -335,8 +297,8 @@ static size_t HUF_decompress4X2_usingDTable_internal(
|
|
335
297
|
HUF_decodeStreamX2(op4, &bitD4, oend, dt, dtLog);
|
336
298
|
|
337
299
|
/* check */
|
338
|
-
|
339
|
-
|
300
|
+
{ U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
|
301
|
+
if (!endCheck) return ERROR(corruption_detected); }
|
340
302
|
|
341
303
|
/* decoded size */
|
342
304
|
return dstSize;
|
@@ -344,30 +306,309 @@ static size_t HUF_decompress4X2_usingDTable_internal(
|
|
344
306
|
}
|
345
307
|
|
346
308
|
|
347
|
-
|
309
|
+
FORCE_INLINE_TEMPLATE U32
|
310
|
+
HUF_decodeSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DEltX4* dt, const U32 dtLog)
|
311
|
+
{
|
312
|
+
size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
|
313
|
+
memcpy(op, dt+val, 2);
|
314
|
+
BIT_skipBits(DStream, dt[val].nbBits);
|
315
|
+
return dt[val].length;
|
316
|
+
}
|
317
|
+
|
318
|
+
FORCE_INLINE_TEMPLATE U32
|
319
|
+
HUF_decodeLastSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DEltX4* dt, const U32 dtLog)
|
320
|
+
{
|
321
|
+
size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
|
322
|
+
memcpy(op, dt+val, 1);
|
323
|
+
if (dt[val].length==1) BIT_skipBits(DStream, dt[val].nbBits);
|
324
|
+
else {
|
325
|
+
if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) {
|
326
|
+
BIT_skipBits(DStream, dt[val].nbBits);
|
327
|
+
if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8))
|
328
|
+
/* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */
|
329
|
+
DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8);
|
330
|
+
} }
|
331
|
+
return 1;
|
332
|
+
}
|
333
|
+
|
334
|
+
#define HUF_DECODE_SYMBOLX4_0(ptr, DStreamPtr) \
|
335
|
+
ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
|
336
|
+
|
337
|
+
#define HUF_DECODE_SYMBOLX4_1(ptr, DStreamPtr) \
|
338
|
+
if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
|
339
|
+
ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
|
340
|
+
|
341
|
+
#define HUF_DECODE_SYMBOLX4_2(ptr, DStreamPtr) \
|
342
|
+
if (MEM_64bits()) \
|
343
|
+
ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
|
344
|
+
|
345
|
+
HINT_INLINE size_t
|
346
|
+
HUF_decodeStreamX4(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd,
|
347
|
+
const HUF_DEltX4* const dt, const U32 dtLog)
|
348
|
+
{
|
349
|
+
BYTE* const pStart = p;
|
350
|
+
|
351
|
+
/* up to 8 symbols at a time */
|
352
|
+
while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-(sizeof(bitDPtr->bitContainer)-1))) {
|
353
|
+
HUF_DECODE_SYMBOLX4_2(p, bitDPtr);
|
354
|
+
HUF_DECODE_SYMBOLX4_1(p, bitDPtr);
|
355
|
+
HUF_DECODE_SYMBOLX4_2(p, bitDPtr);
|
356
|
+
HUF_DECODE_SYMBOLX4_0(p, bitDPtr);
|
357
|
+
}
|
358
|
+
|
359
|
+
/* closer to end : up to 2 symbols at a time */
|
360
|
+
while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p <= pEnd-2))
|
361
|
+
HUF_DECODE_SYMBOLX4_0(p, bitDPtr);
|
362
|
+
|
363
|
+
while (p <= pEnd-2)
|
364
|
+
HUF_DECODE_SYMBOLX4_0(p, bitDPtr); /* no need to reload : reached the end of DStream */
|
365
|
+
|
366
|
+
if (p < pEnd)
|
367
|
+
p += HUF_decodeLastSymbolX4(p, bitDPtr, dt, dtLog);
|
368
|
+
|
369
|
+
return p-pStart;
|
370
|
+
}
|
371
|
+
|
372
|
+
FORCE_INLINE_TEMPLATE size_t
|
373
|
+
HUF_decompress1X4_usingDTable_internal_body(
|
374
|
+
void* dst, size_t dstSize,
|
375
|
+
const void* cSrc, size_t cSrcSize,
|
376
|
+
const HUF_DTable* DTable)
|
377
|
+
{
|
378
|
+
BIT_DStream_t bitD;
|
379
|
+
|
380
|
+
/* Init */
|
381
|
+
CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) );
|
382
|
+
|
383
|
+
/* decode */
|
384
|
+
{ BYTE* const ostart = (BYTE*) dst;
|
385
|
+
BYTE* const oend = ostart + dstSize;
|
386
|
+
const void* const dtPtr = DTable+1; /* force compiler to not use strict-aliasing */
|
387
|
+
const HUF_DEltX4* const dt = (const HUF_DEltX4*)dtPtr;
|
388
|
+
DTableDesc const dtd = HUF_getDTableDesc(DTable);
|
389
|
+
HUF_decodeStreamX4(ostart, &bitD, oend, dt, dtd.tableLog);
|
390
|
+
}
|
391
|
+
|
392
|
+
/* check */
|
393
|
+
if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
|
394
|
+
|
395
|
+
/* decoded size */
|
396
|
+
return dstSize;
|
397
|
+
}
|
398
|
+
|
399
|
+
|
400
|
+
FORCE_INLINE_TEMPLATE size_t
|
401
|
+
HUF_decompress4X4_usingDTable_internal_body(
|
402
|
+
void* dst, size_t dstSize,
|
403
|
+
const void* cSrc, size_t cSrcSize,
|
404
|
+
const HUF_DTable* DTable)
|
405
|
+
{
|
406
|
+
if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */
|
407
|
+
|
408
|
+
{ const BYTE* const istart = (const BYTE*) cSrc;
|
409
|
+
BYTE* const ostart = (BYTE*) dst;
|
410
|
+
BYTE* const oend = ostart + dstSize;
|
411
|
+
const void* const dtPtr = DTable+1;
|
412
|
+
const HUF_DEltX4* const dt = (const HUF_DEltX4*)dtPtr;
|
413
|
+
|
414
|
+
/* Init */
|
415
|
+
BIT_DStream_t bitD1;
|
416
|
+
BIT_DStream_t bitD2;
|
417
|
+
BIT_DStream_t bitD3;
|
418
|
+
BIT_DStream_t bitD4;
|
419
|
+
size_t const length1 = MEM_readLE16(istart);
|
420
|
+
size_t const length2 = MEM_readLE16(istart+2);
|
421
|
+
size_t const length3 = MEM_readLE16(istart+4);
|
422
|
+
size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
|
423
|
+
const BYTE* const istart1 = istart + 6; /* jumpTable */
|
424
|
+
const BYTE* const istart2 = istart1 + length1;
|
425
|
+
const BYTE* const istart3 = istart2 + length2;
|
426
|
+
const BYTE* const istart4 = istart3 + length3;
|
427
|
+
size_t const segmentSize = (dstSize+3) / 4;
|
428
|
+
BYTE* const opStart2 = ostart + segmentSize;
|
429
|
+
BYTE* const opStart3 = opStart2 + segmentSize;
|
430
|
+
BYTE* const opStart4 = opStart3 + segmentSize;
|
431
|
+
BYTE* op1 = ostart;
|
432
|
+
BYTE* op2 = opStart2;
|
433
|
+
BYTE* op3 = opStart3;
|
434
|
+
BYTE* op4 = opStart4;
|
435
|
+
U32 endSignal;
|
436
|
+
DTableDesc const dtd = HUF_getDTableDesc(DTable);
|
437
|
+
U32 const dtLog = dtd.tableLog;
|
438
|
+
|
439
|
+
if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
|
440
|
+
CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
|
441
|
+
CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
|
442
|
+
CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
|
443
|
+
CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
|
444
|
+
|
445
|
+
/* 16-32 symbols per loop (4-8 symbols per stream) */
|
446
|
+
endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
|
447
|
+
for ( ; (endSignal==BIT_DStream_unfinished) & (op4<(oend-(sizeof(bitD4.bitContainer)-1))) ; ) {
|
448
|
+
HUF_DECODE_SYMBOLX4_2(op1, &bitD1);
|
449
|
+
HUF_DECODE_SYMBOLX4_2(op2, &bitD2);
|
450
|
+
HUF_DECODE_SYMBOLX4_2(op3, &bitD3);
|
451
|
+
HUF_DECODE_SYMBOLX4_2(op4, &bitD4);
|
452
|
+
HUF_DECODE_SYMBOLX4_1(op1, &bitD1);
|
453
|
+
HUF_DECODE_SYMBOLX4_1(op2, &bitD2);
|
454
|
+
HUF_DECODE_SYMBOLX4_1(op3, &bitD3);
|
455
|
+
HUF_DECODE_SYMBOLX4_1(op4, &bitD4);
|
456
|
+
HUF_DECODE_SYMBOLX4_2(op1, &bitD1);
|
457
|
+
HUF_DECODE_SYMBOLX4_2(op2, &bitD2);
|
458
|
+
HUF_DECODE_SYMBOLX4_2(op3, &bitD3);
|
459
|
+
HUF_DECODE_SYMBOLX4_2(op4, &bitD4);
|
460
|
+
HUF_DECODE_SYMBOLX4_0(op1, &bitD1);
|
461
|
+
HUF_DECODE_SYMBOLX4_0(op2, &bitD2);
|
462
|
+
HUF_DECODE_SYMBOLX4_0(op3, &bitD3);
|
463
|
+
HUF_DECODE_SYMBOLX4_0(op4, &bitD4);
|
464
|
+
|
465
|
+
endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
|
466
|
+
}
|
467
|
+
|
468
|
+
/* check corruption */
|
469
|
+
if (op1 > opStart2) return ERROR(corruption_detected);
|
470
|
+
if (op2 > opStart3) return ERROR(corruption_detected);
|
471
|
+
if (op3 > opStart4) return ERROR(corruption_detected);
|
472
|
+
/* note : op4 already verified within main loop */
|
473
|
+
|
474
|
+
/* finish bitStreams one by one */
|
475
|
+
HUF_decodeStreamX4(op1, &bitD1, opStart2, dt, dtLog);
|
476
|
+
HUF_decodeStreamX4(op2, &bitD2, opStart3, dt, dtLog);
|
477
|
+
HUF_decodeStreamX4(op3, &bitD3, opStart4, dt, dtLog);
|
478
|
+
HUF_decodeStreamX4(op4, &bitD4, oend, dt, dtLog);
|
479
|
+
|
480
|
+
/* check */
|
481
|
+
{ U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
|
482
|
+
if (!endCheck) return ERROR(corruption_detected); }
|
483
|
+
|
484
|
+
/* decoded size */
|
485
|
+
return dstSize;
|
486
|
+
}
|
487
|
+
}
|
488
|
+
|
489
|
+
|
490
|
+
typedef size_t (*HUF_decompress_usingDTable_t)(void *dst, size_t dstSize,
|
491
|
+
const void *cSrc,
|
492
|
+
size_t cSrcSize,
|
493
|
+
const HUF_DTable *DTable);
|
494
|
+
#if DYNAMIC_BMI2
|
495
|
+
|
496
|
+
#define X(fn) \
|
497
|
+
\
|
498
|
+
static size_t fn##_default( \
|
499
|
+
void* dst, size_t dstSize, \
|
500
|
+
const void* cSrc, size_t cSrcSize, \
|
501
|
+
const HUF_DTable* DTable) \
|
502
|
+
{ \
|
503
|
+
return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \
|
504
|
+
} \
|
505
|
+
\
|
506
|
+
static TARGET_ATTRIBUTE("bmi2") size_t fn##_bmi2( \
|
507
|
+
void* dst, size_t dstSize, \
|
508
|
+
const void* cSrc, size_t cSrcSize, \
|
509
|
+
const HUF_DTable* DTable) \
|
510
|
+
{ \
|
511
|
+
return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \
|
512
|
+
} \
|
513
|
+
\
|
514
|
+
static size_t fn(void* dst, size_t dstSize, void const* cSrc, \
|
515
|
+
size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \
|
516
|
+
{ \
|
517
|
+
if (bmi2) { \
|
518
|
+
return fn##_bmi2(dst, dstSize, cSrc, cSrcSize, DTable); \
|
519
|
+
} \
|
520
|
+
return fn##_default(dst, dstSize, cSrc, cSrcSize, DTable); \
|
521
|
+
}
|
522
|
+
|
523
|
+
#else
|
524
|
+
|
525
|
+
#define X(fn) \
|
526
|
+
static size_t fn(void* dst, size_t dstSize, void const* cSrc, \
|
527
|
+
size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \
|
528
|
+
{ \
|
529
|
+
(void)bmi2; \
|
530
|
+
return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \
|
531
|
+
}
|
532
|
+
|
533
|
+
#endif
|
534
|
+
|
535
|
+
X(HUF_decompress1X2_usingDTable_internal)
|
536
|
+
X(HUF_decompress4X2_usingDTable_internal)
|
537
|
+
X(HUF_decompress1X4_usingDTable_internal)
|
538
|
+
X(HUF_decompress4X4_usingDTable_internal)
|
539
|
+
|
540
|
+
#undef X
|
541
|
+
|
542
|
+
|
543
|
+
size_t HUF_decompress1X2_usingDTable(
|
348
544
|
void* dst, size_t dstSize,
|
349
545
|
const void* cSrc, size_t cSrcSize,
|
350
546
|
const HUF_DTable* DTable)
|
351
547
|
{
|
352
548
|
DTableDesc dtd = HUF_getDTableDesc(DTable);
|
353
549
|
if (dtd.tableType != 0) return ERROR(GENERIC);
|
354
|
-
return
|
550
|
+
return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
|
355
551
|
}
|
356
552
|
|
357
|
-
|
358
|
-
size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
|
553
|
+
size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
|
359
554
|
const void* cSrc, size_t cSrcSize,
|
360
555
|
void* workSpace, size_t wkspSize)
|
361
556
|
{
|
362
557
|
const BYTE* ip = (const BYTE*) cSrc;
|
363
558
|
|
559
|
+
size_t const hSize = HUF_readDTableX2_wksp(DCtx, cSrc, cSrcSize, workSpace, wkspSize);
|
560
|
+
if (HUF_isError(hSize)) return hSize;
|
561
|
+
if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
|
562
|
+
ip += hSize; cSrcSize -= hSize;
|
563
|
+
|
564
|
+
return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0);
|
565
|
+
}
|
566
|
+
|
567
|
+
|
568
|
+
size_t HUF_decompress1X2_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
|
569
|
+
const void* cSrc, size_t cSrcSize)
|
570
|
+
{
|
571
|
+
U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
|
572
|
+
return HUF_decompress1X2_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
|
573
|
+
workSpace, sizeof(workSpace));
|
574
|
+
}
|
575
|
+
|
576
|
+
size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
|
577
|
+
{
|
578
|
+
HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
|
579
|
+
return HUF_decompress1X2_DCtx (DTable, dst, dstSize, cSrc, cSrcSize);
|
580
|
+
}
|
581
|
+
|
582
|
+
size_t HUF_decompress4X2_usingDTable(
|
583
|
+
void* dst, size_t dstSize,
|
584
|
+
const void* cSrc, size_t cSrcSize,
|
585
|
+
const HUF_DTable* DTable)
|
586
|
+
{
|
587
|
+
DTableDesc dtd = HUF_getDTableDesc(DTable);
|
588
|
+
if (dtd.tableType != 0) return ERROR(GENERIC);
|
589
|
+
return HUF_decompress4X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
|
590
|
+
}
|
591
|
+
|
592
|
+
static size_t HUF_decompress4X2_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize,
|
593
|
+
const void* cSrc, size_t cSrcSize,
|
594
|
+
void* workSpace, size_t wkspSize, int bmi2)
|
595
|
+
{
|
596
|
+
const BYTE* ip = (const BYTE*) cSrc;
|
597
|
+
|
364
598
|
size_t const hSize = HUF_readDTableX2_wksp (dctx, cSrc, cSrcSize,
|
365
599
|
workSpace, wkspSize);
|
366
600
|
if (HUF_isError(hSize)) return hSize;
|
367
601
|
if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
|
368
602
|
ip += hSize; cSrcSize -= hSize;
|
369
603
|
|
370
|
-
return HUF_decompress4X2_usingDTable_internal
|
604
|
+
return HUF_decompress4X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
|
605
|
+
}
|
606
|
+
|
607
|
+
size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
|
608
|
+
const void* cSrc, size_t cSrcSize,
|
609
|
+
void* workSpace, size_t wkspSize)
|
610
|
+
{
|
611
|
+
return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, 0);
|
371
612
|
}
|
372
613
|
|
373
614
|
|
@@ -387,8 +628,6 @@ size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cS
|
|
387
628
|
/* *************************/
|
388
629
|
/* double-symbols decoding */
|
389
630
|
/* *************************/
|
390
|
-
typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX4; /* double-symbols decoding */
|
391
|
-
|
392
631
|
typedef struct { BYTE symbol; BYTE weight; } sortedSymbol_t;
|
393
632
|
|
394
633
|
/* HUF_fillDTableX4Level2() :
|
@@ -508,10 +747,7 @@ size_t HUF_readDTableX4_wksp(HUF_DTable* DTable, const void* src,
|
|
508
747
|
weightList = (BYTE *)((U32 *)workSpace + spaceUsed32);
|
509
748
|
spaceUsed32 += HUF_ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2;
|
510
749
|
|
511
|
-
if ((spaceUsed32 << 2) > wkspSize)
|
512
|
-
return ERROR(tableLog_tooLarge);
|
513
|
-
workSpace = (U32 *)workSpace + spaceUsed32;
|
514
|
-
wkspSize -= (spaceUsed32 << 2);
|
750
|
+
if ((spaceUsed32 << 2) > wkspSize) return ERROR(tableLog_tooLarge);
|
515
751
|
|
516
752
|
rankStart = rankStart0 + 1;
|
517
753
|
memset(rankStats, 0, sizeof(U32) * (2 * HUF_TABLELOG_MAX + 2 + 1));
|
@@ -588,95 +824,6 @@ size_t HUF_readDTableX4(HUF_DTable* DTable, const void* src, size_t srcSize)
|
|
588
824
|
workSpace, sizeof(workSpace));
|
589
825
|
}
|
590
826
|
|
591
|
-
static U32 HUF_decodeSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DEltX4* dt, const U32 dtLog)
|
592
|
-
{
|
593
|
-
size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
|
594
|
-
memcpy(op, dt+val, 2);
|
595
|
-
BIT_skipBits(DStream, dt[val].nbBits);
|
596
|
-
return dt[val].length;
|
597
|
-
}
|
598
|
-
|
599
|
-
static U32 HUF_decodeLastSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DEltX4* dt, const U32 dtLog)
|
600
|
-
{
|
601
|
-
size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
|
602
|
-
memcpy(op, dt+val, 1);
|
603
|
-
if (dt[val].length==1) BIT_skipBits(DStream, dt[val].nbBits);
|
604
|
-
else {
|
605
|
-
if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) {
|
606
|
-
BIT_skipBits(DStream, dt[val].nbBits);
|
607
|
-
if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8))
|
608
|
-
/* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */
|
609
|
-
DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8);
|
610
|
-
} }
|
611
|
-
return 1;
|
612
|
-
}
|
613
|
-
|
614
|
-
|
615
|
-
#define HUF_DECODE_SYMBOLX4_0(ptr, DStreamPtr) \
|
616
|
-
ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
|
617
|
-
|
618
|
-
#define HUF_DECODE_SYMBOLX4_1(ptr, DStreamPtr) \
|
619
|
-
if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
|
620
|
-
ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
|
621
|
-
|
622
|
-
#define HUF_DECODE_SYMBOLX4_2(ptr, DStreamPtr) \
|
623
|
-
if (MEM_64bits()) \
|
624
|
-
ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
|
625
|
-
|
626
|
-
HINT_INLINE size_t HUF_decodeStreamX4(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd, const HUF_DEltX4* const dt, const U32 dtLog)
|
627
|
-
{
|
628
|
-
BYTE* const pStart = p;
|
629
|
-
|
630
|
-
/* up to 8 symbols at a time */
|
631
|
-
while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-(sizeof(bitDPtr->bitContainer)-1))) {
|
632
|
-
HUF_DECODE_SYMBOLX4_2(p, bitDPtr);
|
633
|
-
HUF_DECODE_SYMBOLX4_1(p, bitDPtr);
|
634
|
-
HUF_DECODE_SYMBOLX4_2(p, bitDPtr);
|
635
|
-
HUF_DECODE_SYMBOLX4_0(p, bitDPtr);
|
636
|
-
}
|
637
|
-
|
638
|
-
/* closer to end : up to 2 symbols at a time */
|
639
|
-
while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p <= pEnd-2))
|
640
|
-
HUF_DECODE_SYMBOLX4_0(p, bitDPtr);
|
641
|
-
|
642
|
-
while (p <= pEnd-2)
|
643
|
-
HUF_DECODE_SYMBOLX4_0(p, bitDPtr); /* no need to reload : reached the end of DStream */
|
644
|
-
|
645
|
-
if (p < pEnd)
|
646
|
-
p += HUF_decodeLastSymbolX4(p, bitDPtr, dt, dtLog);
|
647
|
-
|
648
|
-
return p-pStart;
|
649
|
-
}
|
650
|
-
|
651
|
-
|
652
|
-
static size_t HUF_decompress1X4_usingDTable_internal(
|
653
|
-
void* dst, size_t dstSize,
|
654
|
-
const void* cSrc, size_t cSrcSize,
|
655
|
-
const HUF_DTable* DTable)
|
656
|
-
{
|
657
|
-
BIT_DStream_t bitD;
|
658
|
-
|
659
|
-
/* Init */
|
660
|
-
{ size_t const errorCode = BIT_initDStream(&bitD, cSrc, cSrcSize);
|
661
|
-
if (HUF_isError(errorCode)) return errorCode;
|
662
|
-
}
|
663
|
-
|
664
|
-
/* decode */
|
665
|
-
{ BYTE* const ostart = (BYTE*) dst;
|
666
|
-
BYTE* const oend = ostart + dstSize;
|
667
|
-
const void* const dtPtr = DTable+1; /* force compiler to not use strict-aliasing */
|
668
|
-
const HUF_DEltX4* const dt = (const HUF_DEltX4*)dtPtr;
|
669
|
-
DTableDesc const dtd = HUF_getDTableDesc(DTable);
|
670
|
-
HUF_decodeStreamX4(ostart, &bitD, oend, dt, dtd.tableLog);
|
671
|
-
}
|
672
|
-
|
673
|
-
/* check */
|
674
|
-
if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
|
675
|
-
|
676
|
-
/* decoded size */
|
677
|
-
return dstSize;
|
678
|
-
}
|
679
|
-
|
680
827
|
size_t HUF_decompress1X4_usingDTable(
|
681
828
|
void* dst, size_t dstSize,
|
682
829
|
const void* cSrc, size_t cSrcSize,
|
@@ -684,7 +831,7 @@ size_t HUF_decompress1X4_usingDTable(
|
|
684
831
|
{
|
685
832
|
DTableDesc dtd = HUF_getDTableDesc(DTable);
|
686
833
|
if (dtd.tableType != 1) return ERROR(GENERIC);
|
687
|
-
return HUF_decompress1X4_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable);
|
834
|
+
return HUF_decompress1X4_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
|
688
835
|
}
|
689
836
|
|
690
837
|
size_t HUF_decompress1X4_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
|
@@ -699,7 +846,7 @@ size_t HUF_decompress1X4_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
|
|
699
846
|
if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
|
700
847
|
ip += hSize; cSrcSize -= hSize;
|
701
848
|
|
702
|
-
return HUF_decompress1X4_usingDTable_internal
|
849
|
+
return HUF_decompress1X4_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0);
|
703
850
|
}
|
704
851
|
|
705
852
|
|
@@ -717,99 +864,6 @@ size_t HUF_decompress1X4 (void* dst, size_t dstSize, const void* cSrc, size_t cS
|
|
717
864
|
return HUF_decompress1X4_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
|
718
865
|
}
|
719
866
|
|
720
|
-
static size_t HUF_decompress4X4_usingDTable_internal(
|
721
|
-
void* dst, size_t dstSize,
|
722
|
-
const void* cSrc, size_t cSrcSize,
|
723
|
-
const HUF_DTable* DTable)
|
724
|
-
{
|
725
|
-
if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */
|
726
|
-
|
727
|
-
{ const BYTE* const istart = (const BYTE*) cSrc;
|
728
|
-
BYTE* const ostart = (BYTE*) dst;
|
729
|
-
BYTE* const oend = ostart + dstSize;
|
730
|
-
const void* const dtPtr = DTable+1;
|
731
|
-
const HUF_DEltX4* const dt = (const HUF_DEltX4*)dtPtr;
|
732
|
-
|
733
|
-
/* Init */
|
734
|
-
BIT_DStream_t bitD1;
|
735
|
-
BIT_DStream_t bitD2;
|
736
|
-
BIT_DStream_t bitD3;
|
737
|
-
BIT_DStream_t bitD4;
|
738
|
-
size_t const length1 = MEM_readLE16(istart);
|
739
|
-
size_t const length2 = MEM_readLE16(istart+2);
|
740
|
-
size_t const length3 = MEM_readLE16(istart+4);
|
741
|
-
size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
|
742
|
-
const BYTE* const istart1 = istart + 6; /* jumpTable */
|
743
|
-
const BYTE* const istart2 = istart1 + length1;
|
744
|
-
const BYTE* const istart3 = istart2 + length2;
|
745
|
-
const BYTE* const istart4 = istart3 + length3;
|
746
|
-
size_t const segmentSize = (dstSize+3) / 4;
|
747
|
-
BYTE* const opStart2 = ostart + segmentSize;
|
748
|
-
BYTE* const opStart3 = opStart2 + segmentSize;
|
749
|
-
BYTE* const opStart4 = opStart3 + segmentSize;
|
750
|
-
BYTE* op1 = ostart;
|
751
|
-
BYTE* op2 = opStart2;
|
752
|
-
BYTE* op3 = opStart3;
|
753
|
-
BYTE* op4 = opStart4;
|
754
|
-
U32 endSignal;
|
755
|
-
DTableDesc const dtd = HUF_getDTableDesc(DTable);
|
756
|
-
U32 const dtLog = dtd.tableLog;
|
757
|
-
|
758
|
-
if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
|
759
|
-
{ size_t const errorCode = BIT_initDStream(&bitD1, istart1, length1);
|
760
|
-
if (HUF_isError(errorCode)) return errorCode; }
|
761
|
-
{ size_t const errorCode = BIT_initDStream(&bitD2, istart2, length2);
|
762
|
-
if (HUF_isError(errorCode)) return errorCode; }
|
763
|
-
{ size_t const errorCode = BIT_initDStream(&bitD3, istart3, length3);
|
764
|
-
if (HUF_isError(errorCode)) return errorCode; }
|
765
|
-
{ size_t const errorCode = BIT_initDStream(&bitD4, istart4, length4);
|
766
|
-
if (HUF_isError(errorCode)) return errorCode; }
|
767
|
-
|
768
|
-
/* 16-32 symbols per loop (4-8 symbols per stream) */
|
769
|
-
endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
|
770
|
-
for ( ; (endSignal==BIT_DStream_unfinished) & (op4<(oend-(sizeof(bitD4.bitContainer)-1))) ; ) {
|
771
|
-
HUF_DECODE_SYMBOLX4_2(op1, &bitD1);
|
772
|
-
HUF_DECODE_SYMBOLX4_2(op2, &bitD2);
|
773
|
-
HUF_DECODE_SYMBOLX4_2(op3, &bitD3);
|
774
|
-
HUF_DECODE_SYMBOLX4_2(op4, &bitD4);
|
775
|
-
HUF_DECODE_SYMBOLX4_1(op1, &bitD1);
|
776
|
-
HUF_DECODE_SYMBOLX4_1(op2, &bitD2);
|
777
|
-
HUF_DECODE_SYMBOLX4_1(op3, &bitD3);
|
778
|
-
HUF_DECODE_SYMBOLX4_1(op4, &bitD4);
|
779
|
-
HUF_DECODE_SYMBOLX4_2(op1, &bitD1);
|
780
|
-
HUF_DECODE_SYMBOLX4_2(op2, &bitD2);
|
781
|
-
HUF_DECODE_SYMBOLX4_2(op3, &bitD3);
|
782
|
-
HUF_DECODE_SYMBOLX4_2(op4, &bitD4);
|
783
|
-
HUF_DECODE_SYMBOLX4_0(op1, &bitD1);
|
784
|
-
HUF_DECODE_SYMBOLX4_0(op2, &bitD2);
|
785
|
-
HUF_DECODE_SYMBOLX4_0(op3, &bitD3);
|
786
|
-
HUF_DECODE_SYMBOLX4_0(op4, &bitD4);
|
787
|
-
|
788
|
-
endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
|
789
|
-
}
|
790
|
-
|
791
|
-
/* check corruption */
|
792
|
-
if (op1 > opStart2) return ERROR(corruption_detected);
|
793
|
-
if (op2 > opStart3) return ERROR(corruption_detected);
|
794
|
-
if (op3 > opStart4) return ERROR(corruption_detected);
|
795
|
-
/* note : op4 already verified within main loop */
|
796
|
-
|
797
|
-
/* finish bitStreams one by one */
|
798
|
-
HUF_decodeStreamX4(op1, &bitD1, opStart2, dt, dtLog);
|
799
|
-
HUF_decodeStreamX4(op2, &bitD2, opStart3, dt, dtLog);
|
800
|
-
HUF_decodeStreamX4(op3, &bitD3, opStart4, dt, dtLog);
|
801
|
-
HUF_decodeStreamX4(op4, &bitD4, oend, dt, dtLog);
|
802
|
-
|
803
|
-
/* check */
|
804
|
-
{ U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
|
805
|
-
if (!endCheck) return ERROR(corruption_detected); }
|
806
|
-
|
807
|
-
/* decoded size */
|
808
|
-
return dstSize;
|
809
|
-
}
|
810
|
-
}
|
811
|
-
|
812
|
-
|
813
867
|
size_t HUF_decompress4X4_usingDTable(
|
814
868
|
void* dst, size_t dstSize,
|
815
869
|
const void* cSrc, size_t cSrcSize,
|
@@ -817,13 +871,12 @@ size_t HUF_decompress4X4_usingDTable(
|
|
817
871
|
{
|
818
872
|
DTableDesc dtd = HUF_getDTableDesc(DTable);
|
819
873
|
if (dtd.tableType != 1) return ERROR(GENERIC);
|
820
|
-
return HUF_decompress4X4_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable);
|
874
|
+
return HUF_decompress4X4_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
|
821
875
|
}
|
822
876
|
|
823
|
-
|
824
|
-
size_t HUF_decompress4X4_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
|
877
|
+
static size_t HUF_decompress4X4_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize,
|
825
878
|
const void* cSrc, size_t cSrcSize,
|
826
|
-
void* workSpace, size_t wkspSize)
|
879
|
+
void* workSpace, size_t wkspSize, int bmi2)
|
827
880
|
{
|
828
881
|
const BYTE* ip = (const BYTE*) cSrc;
|
829
882
|
|
@@ -833,7 +886,14 @@ size_t HUF_decompress4X4_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
|
|
833
886
|
if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
|
834
887
|
ip += hSize; cSrcSize -= hSize;
|
835
888
|
|
836
|
-
return HUF_decompress4X4_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx);
|
889
|
+
return HUF_decompress4X4_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
|
890
|
+
}
|
891
|
+
|
892
|
+
size_t HUF_decompress4X4_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
|
893
|
+
const void* cSrc, size_t cSrcSize,
|
894
|
+
void* workSpace, size_t wkspSize)
|
895
|
+
{
|
896
|
+
return HUF_decompress4X4_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, /* bmi2 */ 0);
|
837
897
|
}
|
838
898
|
|
839
899
|
|
@@ -861,8 +921,8 @@ size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize,
|
|
861
921
|
const HUF_DTable* DTable)
|
862
922
|
{
|
863
923
|
DTableDesc const dtd = HUF_getDTableDesc(DTable);
|
864
|
-
return dtd.tableType ? HUF_decompress1X4_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable) :
|
865
|
-
HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable);
|
924
|
+
return dtd.tableType ? HUF_decompress1X4_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
|
925
|
+
HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
|
866
926
|
}
|
867
927
|
|
868
928
|
size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize,
|
@@ -870,8 +930,8 @@ size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize,
|
|
870
930
|
const HUF_DTable* DTable)
|
871
931
|
{
|
872
932
|
DTableDesc const dtd = HUF_getDTableDesc(DTable);
|
873
|
-
return dtd.tableType ? HUF_decompress4X4_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable) :
|
874
|
-
HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable);
|
933
|
+
return dtd.tableType ? HUF_decompress4X4_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
|
934
|
+
HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
|
875
935
|
}
|
876
936
|
|
877
937
|
|
@@ -898,21 +958,22 @@ static const algo_time_t algoTime[16 /* Quantization */][3 /* single, double, qu
|
|
898
958
|
};
|
899
959
|
|
900
960
|
/** HUF_selectDecoder() :
|
901
|
-
*
|
902
|
-
*
|
903
|
-
*
|
904
|
-
*
|
961
|
+
* Tells which decoder is likely to decode faster,
|
962
|
+
* based on a set of pre-computed metrics.
|
963
|
+
* @return : 0==HUF_decompress4X2, 1==HUF_decompress4X4 .
|
964
|
+
* Assumption : 0 < dstSize <= 128 KB */
|
905
965
|
U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize)
|
906
966
|
{
|
967
|
+
assert(dstSize > 0);
|
968
|
+
assert(dstSize <= 128 KB);
|
907
969
|
/* decoder timing evaluation */
|
908
|
-
U32 const Q = cSrcSize >= dstSize ? 15 : (U32)(cSrcSize * 16 / dstSize); /* Q < 16 */
|
909
|
-
|
910
|
-
|
911
|
-
|
912
|
-
|
913
|
-
|
914
|
-
|
915
|
-
}
|
970
|
+
{ U32 const Q = (cSrcSize >= dstSize) ? 15 : (U32)(cSrcSize * 16 / dstSize); /* Q < 16 */
|
971
|
+
U32 const D256 = (U32)(dstSize >> 8);
|
972
|
+
U32 const DTime0 = algoTime[Q][0].tableTime + (algoTime[Q][0].decode256Time * D256);
|
973
|
+
U32 DTime1 = algoTime[Q][1].tableTime + (algoTime[Q][1].decode256Time * D256);
|
974
|
+
DTime1 += DTime1 >> 3; /* advantage to algorithm using less memory, to reduce cache eviction */
|
975
|
+
return DTime1 < DTime0;
|
976
|
+
} }
|
916
977
|
|
917
978
|
|
918
979
|
typedef size_t (*decompressionAlgo)(void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);
|
@@ -994,3 +1055,42 @@ size_t HUF_decompress1X_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,
|
|
994
1055
|
return HUF_decompress1X_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
|
995
1056
|
workSpace, sizeof(workSpace));
|
996
1057
|
}
|
1058
|
+
|
1059
|
+
|
1060
|
+
size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2)
|
1061
|
+
{
|
1062
|
+
DTableDesc const dtd = HUF_getDTableDesc(DTable);
|
1063
|
+
return dtd.tableType ? HUF_decompress1X4_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) :
|
1064
|
+
HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
|
1065
|
+
}
|
1066
|
+
|
1067
|
+
size_t HUF_decompress1X2_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2)
|
1068
|
+
{
|
1069
|
+
const BYTE* ip = (const BYTE*) cSrc;
|
1070
|
+
|
1071
|
+
size_t const hSize = HUF_readDTableX2_wksp(dctx, cSrc, cSrcSize, workSpace, wkspSize);
|
1072
|
+
if (HUF_isError(hSize)) return hSize;
|
1073
|
+
if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
|
1074
|
+
ip += hSize; cSrcSize -= hSize;
|
1075
|
+
|
1076
|
+
return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
|
1077
|
+
}
|
1078
|
+
|
1079
|
+
size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2)
|
1080
|
+
{
|
1081
|
+
DTableDesc const dtd = HUF_getDTableDesc(DTable);
|
1082
|
+
return dtd.tableType ? HUF_decompress4X4_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) :
|
1083
|
+
HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
|
1084
|
+
}
|
1085
|
+
|
1086
|
+
size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2)
|
1087
|
+
{
|
1088
|
+
/* validation checks */
|
1089
|
+
if (dstSize == 0) return ERROR(dstSize_tooSmall);
|
1090
|
+
if (cSrcSize == 0) return ERROR(corruption_detected);
|
1091
|
+
|
1092
|
+
{ U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
|
1093
|
+
return algoNb ? HUF_decompress4X4_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2) :
|
1094
|
+
HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
|
1095
|
+
}
|
1096
|
+
}
|