extzstd 0.2 → 0.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (88) hide show
  1. checksums.yaml +4 -4
  2. data/HISTORY.ja.md +13 -0
  3. data/README.md +17 -14
  4. data/contrib/zstd/{NEWS → CHANGELOG} +115 -2
  5. data/contrib/zstd/CODE_OF_CONDUCT.md +5 -0
  6. data/contrib/zstd/Makefile +99 -53
  7. data/contrib/zstd/README.md +59 -39
  8. data/contrib/zstd/TESTING.md +1 -1
  9. data/contrib/zstd/appveyor.yml +17 -6
  10. data/contrib/zstd/lib/BUCK +29 -2
  11. data/contrib/zstd/lib/Makefile +118 -21
  12. data/contrib/zstd/lib/README.md +84 -44
  13. data/contrib/zstd/lib/common/bitstream.h +17 -33
  14. data/contrib/zstd/lib/common/compiler.h +62 -8
  15. data/contrib/zstd/lib/common/cpu.h +215 -0
  16. data/contrib/zstd/lib/common/debug.c +44 -0
  17. data/contrib/zstd/lib/common/debug.h +134 -0
  18. data/contrib/zstd/lib/common/entropy_common.c +16 -1
  19. data/contrib/zstd/lib/common/error_private.c +7 -0
  20. data/contrib/zstd/lib/common/fse.h +48 -44
  21. data/contrib/zstd/lib/common/fse_decompress.c +3 -3
  22. data/contrib/zstd/lib/common/huf.h +169 -113
  23. data/contrib/zstd/lib/common/mem.h +20 -2
  24. data/contrib/zstd/lib/common/pool.c +135 -49
  25. data/contrib/zstd/lib/common/pool.h +40 -21
  26. data/contrib/zstd/lib/common/threading.c +2 -2
  27. data/contrib/zstd/lib/common/threading.h +12 -12
  28. data/contrib/zstd/lib/common/xxhash.c +3 -2
  29. data/contrib/zstd/lib/common/zstd_common.c +3 -6
  30. data/contrib/zstd/lib/common/zstd_errors.h +17 -7
  31. data/contrib/zstd/lib/common/zstd_internal.h +76 -48
  32. data/contrib/zstd/lib/compress/fse_compress.c +89 -209
  33. data/contrib/zstd/lib/compress/hist.c +203 -0
  34. data/contrib/zstd/lib/compress/hist.h +95 -0
  35. data/contrib/zstd/lib/compress/huf_compress.c +188 -80
  36. data/contrib/zstd/lib/compress/zstd_compress.c +2500 -1203
  37. data/contrib/zstd/lib/compress/zstd_compress_internal.h +463 -62
  38. data/contrib/zstd/lib/compress/zstd_double_fast.c +321 -131
  39. data/contrib/zstd/lib/compress/zstd_double_fast.h +13 -4
  40. data/contrib/zstd/lib/compress/zstd_fast.c +335 -108
  41. data/contrib/zstd/lib/compress/zstd_fast.h +12 -6
  42. data/contrib/zstd/lib/compress/zstd_lazy.c +654 -313
  43. data/contrib/zstd/lib/compress/zstd_lazy.h +44 -16
  44. data/contrib/zstd/lib/compress/zstd_ldm.c +310 -420
  45. data/contrib/zstd/lib/compress/zstd_ldm.h +63 -26
  46. data/contrib/zstd/lib/compress/zstd_opt.c +773 -325
  47. data/contrib/zstd/lib/compress/zstd_opt.h +31 -5
  48. data/contrib/zstd/lib/compress/zstdmt_compress.c +1468 -518
  49. data/contrib/zstd/lib/compress/zstdmt_compress.h +96 -45
  50. data/contrib/zstd/lib/decompress/huf_decompress.c +518 -282
  51. data/contrib/zstd/lib/decompress/zstd_ddict.c +240 -0
  52. data/contrib/zstd/lib/decompress/zstd_ddict.h +44 -0
  53. data/contrib/zstd/lib/decompress/zstd_decompress.c +613 -1513
  54. data/contrib/zstd/lib/decompress/zstd_decompress_block.c +1311 -0
  55. data/contrib/zstd/lib/decompress/zstd_decompress_block.h +59 -0
  56. data/contrib/zstd/lib/decompress/zstd_decompress_internal.h +175 -0
  57. data/contrib/zstd/lib/dictBuilder/cover.c +194 -113
  58. data/contrib/zstd/lib/dictBuilder/cover.h +112 -0
  59. data/contrib/zstd/lib/dictBuilder/divsufsort.c +3 -3
  60. data/contrib/zstd/lib/dictBuilder/fastcover.c +740 -0
  61. data/contrib/zstd/lib/dictBuilder/zdict.c +142 -106
  62. data/contrib/zstd/lib/dictBuilder/zdict.h +115 -49
  63. data/contrib/zstd/lib/legacy/zstd_legacy.h +44 -12
  64. data/contrib/zstd/lib/legacy/zstd_v01.c +41 -10
  65. data/contrib/zstd/lib/legacy/zstd_v01.h +12 -7
  66. data/contrib/zstd/lib/legacy/zstd_v02.c +37 -12
  67. data/contrib/zstd/lib/legacy/zstd_v02.h +12 -7
  68. data/contrib/zstd/lib/legacy/zstd_v03.c +38 -12
  69. data/contrib/zstd/lib/legacy/zstd_v03.h +12 -7
  70. data/contrib/zstd/lib/legacy/zstd_v04.c +55 -174
  71. data/contrib/zstd/lib/legacy/zstd_v04.h +12 -7
  72. data/contrib/zstd/lib/legacy/zstd_v05.c +59 -31
  73. data/contrib/zstd/lib/legacy/zstd_v05.h +12 -7
  74. data/contrib/zstd/lib/legacy/zstd_v06.c +48 -20
  75. data/contrib/zstd/lib/legacy/zstd_v06.h +10 -5
  76. data/contrib/zstd/lib/legacy/zstd_v07.c +62 -29
  77. data/contrib/zstd/lib/legacy/zstd_v07.h +10 -5
  78. data/contrib/zstd/lib/zstd.h +1346 -832
  79. data/ext/extzstd.c +27 -19
  80. data/ext/extzstd_stream.c +20 -4
  81. data/ext/zstd_compress.c +1 -0
  82. data/ext/zstd_decompress.c +4 -0
  83. data/ext/zstd_dictbuilder.c +4 -0
  84. data/ext/zstd_dictbuilder_fastcover.c +5 -0
  85. data/lib/extzstd.rb +52 -220
  86. data/lib/extzstd/version.rb +1 -1
  87. metadata +21 -7
  88. data/contrib/zstd/circle.yml +0 -63
@@ -17,10 +17,25 @@
17
17
 
18
18
 
19
19
  /* Note : This is an internal API.
20
- * Some methods are still exposed (ZSTDLIB_API),
20
+ * These APIs used to be exposed with ZSTDLIB_API,
21
21
  * because it used to be the only way to invoke MT compression.
22
- * Now, it's recommended to use ZSTD_compress_generic() instead.
23
- * These methods will stop being exposed in a future version */
22
+ * Now, it's recommended to use ZSTD_compress2 and ZSTD_compressStream2()
23
+ * instead.
24
+ *
25
+ * If you depend on these APIs and can't switch, then define
26
+ * ZSTD_LEGACY_MULTITHREADED_API when making the dynamic library.
27
+ * However, we may completely remove these functions in a future
28
+ * release, so please switch soon.
29
+ *
30
+ * This API requires ZSTD_MULTITHREAD to be defined during compilation,
31
+ * otherwise ZSTDMT_createCCtx*() will fail.
32
+ */
33
+
34
+ #ifdef ZSTD_LEGACY_MULTITHREADED_API
35
+ # define ZSTDMT_API ZSTDLIB_API
36
+ #else
37
+ # define ZSTDMT_API
38
+ #endif
24
39
 
25
40
  /* === Dependencies === */
26
41
  #include <stddef.h> /* size_t */
@@ -28,19 +43,31 @@
28
43
  #include "zstd.h" /* ZSTD_inBuffer, ZSTD_outBuffer, ZSTDLIB_API */
29
44
 
30
45
 
46
+ /* === Constants === */
47
+ #ifndef ZSTDMT_NBWORKERS_MAX
48
+ # define ZSTDMT_NBWORKERS_MAX 200
49
+ #endif
50
+ #ifndef ZSTDMT_JOBSIZE_MIN
51
+ # define ZSTDMT_JOBSIZE_MIN (1 MB)
52
+ #endif
53
+ #define ZSTDMT_JOBSIZE_MAX (MEM_32bits() ? (512 MB) : (1024 MB))
54
+
55
+
31
56
  /* === Memory management === */
32
57
  typedef struct ZSTDMT_CCtx_s ZSTDMT_CCtx;
33
- ZSTDLIB_API ZSTDMT_CCtx* ZSTDMT_createCCtx(unsigned nbThreads);
34
- ZSTDLIB_API ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(unsigned nbThreads,
58
+ /* Requires ZSTD_MULTITHREAD to be defined during compilation, otherwise it will return NULL. */
59
+ ZSTDMT_API ZSTDMT_CCtx* ZSTDMT_createCCtx(unsigned nbWorkers);
60
+ /* Requires ZSTD_MULTITHREAD to be defined during compilation, otherwise it will return NULL. */
61
+ ZSTDMT_API ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(unsigned nbWorkers,
35
62
  ZSTD_customMem cMem);
36
- ZSTDLIB_API size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* mtctx);
63
+ ZSTDMT_API size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* mtctx);
37
64
 
38
- ZSTDLIB_API size_t ZSTDMT_sizeof_CCtx(ZSTDMT_CCtx* mtctx);
65
+ ZSTDMT_API size_t ZSTDMT_sizeof_CCtx(ZSTDMT_CCtx* mtctx);
39
66
 
40
67
 
41
- /* === Simple buffer-to-butter one-pass function === */
68
+ /* === Simple one-pass compression function === */
42
69
 
43
- ZSTDLIB_API size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx,
70
+ ZSTDMT_API size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx,
44
71
  void* dst, size_t dstCapacity,
45
72
  const void* src, size_t srcSize,
46
73
  int compressionLevel);
@@ -49,34 +76,31 @@ ZSTDLIB_API size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx,
49
76
 
50
77
  /* === Streaming functions === */
51
78
 
52
- ZSTDLIB_API size_t ZSTDMT_initCStream(ZSTDMT_CCtx* mtctx, int compressionLevel);
53
- ZSTDLIB_API size_t ZSTDMT_resetCStream(ZSTDMT_CCtx* mtctx, unsigned long long pledgedSrcSize); /**< if srcSize is not known at reset time, use ZSTD_CONTENTSIZE_UNKNOWN. Note: for compatibility with older programs, 0 means the same as ZSTD_CONTENTSIZE_UNKNOWN, but it may change in the future, to mean "empty" */
79
+ ZSTDMT_API size_t ZSTDMT_initCStream(ZSTDMT_CCtx* mtctx, int compressionLevel);
80
+ ZSTDMT_API size_t ZSTDMT_resetCStream(ZSTDMT_CCtx* mtctx, unsigned long long pledgedSrcSize); /**< if srcSize is not known at reset time, use ZSTD_CONTENTSIZE_UNKNOWN. Note: for compatibility with older programs, 0 means the same as ZSTD_CONTENTSIZE_UNKNOWN, but it will change in the future to mean "empty" */
54
81
 
55
- ZSTDLIB_API size_t ZSTDMT_compressStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, ZSTD_inBuffer* input);
82
+ ZSTDMT_API size_t ZSTDMT_nextInputSizeHint(const ZSTDMT_CCtx* mtctx);
83
+ ZSTDMT_API size_t ZSTDMT_compressStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, ZSTD_inBuffer* input);
56
84
 
57
- ZSTDLIB_API size_t ZSTDMT_flushStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output); /**< @return : 0 == all flushed; >0 : still some data to be flushed; or an error code (ZSTD_isError()) */
58
- ZSTDLIB_API size_t ZSTDMT_endStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output); /**< @return : 0 == all flushed; >0 : still some data to be flushed; or an error code (ZSTD_isError()) */
85
+ ZSTDMT_API size_t ZSTDMT_flushStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output); /**< @return : 0 == all flushed; >0 : still some data to be flushed; or an error code (ZSTD_isError()) */
86
+ ZSTDMT_API size_t ZSTDMT_endStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output); /**< @return : 0 == all flushed; >0 : still some data to be flushed; or an error code (ZSTD_isError()) */
59
87
 
60
88
 
61
89
  /* === Advanced functions and parameters === */
62
90
 
63
- #ifndef ZSTDMT_JOBSIZE_MIN
64
- # define ZSTDMT_JOBSIZE_MIN (1U << 20) /* 1 MB - Minimum size of each compression job */
65
- #endif
91
+ ZSTDMT_API size_t ZSTDMT_compress_advanced(ZSTDMT_CCtx* mtctx,
92
+ void* dst, size_t dstCapacity,
93
+ const void* src, size_t srcSize,
94
+ const ZSTD_CDict* cdict,
95
+ ZSTD_parameters params,
96
+ int overlapLog);
66
97
 
67
- ZSTDLIB_API size_t ZSTDMT_compress_advanced(ZSTDMT_CCtx* mtctx,
68
- void* dst, size_t dstCapacity,
69
- const void* src, size_t srcSize,
70
- const ZSTD_CDict* cdict,
71
- ZSTD_parameters const params,
72
- unsigned overlapLog);
73
-
74
- ZSTDLIB_API size_t ZSTDMT_initCStream_advanced(ZSTDMT_CCtx* mtctx,
98
+ ZSTDMT_API size_t ZSTDMT_initCStream_advanced(ZSTDMT_CCtx* mtctx,
75
99
  const void* dict, size_t dictSize, /* dict can be released after init, a local copy is preserved within zcs */
76
100
  ZSTD_parameters params,
77
101
  unsigned long long pledgedSrcSize); /* pledgedSrcSize is optional and can be zero == unknown */
78
102
 
79
- ZSTDLIB_API size_t ZSTDMT_initCStream_usingCDict(ZSTDMT_CCtx* mtctx,
103
+ ZSTDMT_API size_t ZSTDMT_initCStream_usingCDict(ZSTDMT_CCtx* mtctx,
80
104
  const ZSTD_CDict* cdict,
81
105
  ZSTD_frameParameters fparams,
82
106
  unsigned long long pledgedSrcSize); /* note : zero means empty */
@@ -84,8 +108,9 @@ ZSTDLIB_API size_t ZSTDMT_initCStream_usingCDict(ZSTDMT_CCtx* mtctx,
84
108
  /* ZSTDMT_parameter :
85
109
  * List of parameters that can be set using ZSTDMT_setMTCtxParameter() */
86
110
  typedef enum {
87
- ZSTDMT_p_jobSize, /* Each job is compressed in parallel. By default, this value is dynamically determined depending on compression parameters. Can be set explicitly here. */
88
- ZSTDMT_p_overlapSectionLog /* Each job may reload a part of previous job to enhance compressionr ratio; 0 == no overlap, 6(default) == use 1/8th of window, >=9 == use full window */
111
+ ZSTDMT_p_jobSize, /* Each job is compressed in parallel. By default, this value is dynamically determined depending on compression parameters. Can be set explicitly here. */
112
+ ZSTDMT_p_overlapLog, /* Each job may reload a part of previous job to enhance compression ratio; 0 == no overlap, 6(default) == use 1/8th of window, >=9 == use full window. This is a "sticky" parameter : its value will be re-used on next compression job */
113
+ ZSTDMT_p_rsyncable /* Enables rsyncable mode. */
89
114
  } ZSTDMT_parameter;
90
115
 
91
116
  /* ZSTDMT_setMTCtxParameter() :
@@ -93,34 +118,60 @@ typedef enum {
93
118
  * The function must be called typically after ZSTD_createCCtx() but __before ZSTDMT_init*() !__
94
119
  * Parameters not explicitly reset by ZSTDMT_init*() remain the same in consecutive compression sessions.
95
120
  * @return : 0, or an error code (which can be tested using ZSTD_isError()) */
96
- ZSTDLIB_API size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, unsigned value);
121
+ ZSTDMT_API size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, int value);
122
+
123
+ /* ZSTDMT_getMTCtxParameter() :
124
+ * Query the ZSTDMT_CCtx for a parameter value.
125
+ * @return : 0, or an error code (which can be tested using ZSTD_isError()) */
126
+ ZSTDMT_API size_t ZSTDMT_getMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, int* value);
97
127
 
98
128
 
99
129
  /*! ZSTDMT_compressStream_generic() :
100
- * Combines ZSTDMT_compressStream() with ZSTDMT_flushStream() or ZSTDMT_endStream()
130
+ * Combines ZSTDMT_compressStream() with optional ZSTDMT_flushStream() or ZSTDMT_endStream()
101
131
  * depending on flush directive.
102
132
  * @return : minimum amount of data still to be flushed
103
133
  * 0 if fully flushed
104
- * or an error code */
105
- ZSTDLIB_API size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
134
+ * or an error code
135
+ * note : needs to be init using any ZSTD_initCStream*() variant */
136
+ ZSTDMT_API size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
106
137
  ZSTD_outBuffer* output,
107
138
  ZSTD_inBuffer* input,
108
139
  ZSTD_EndDirective endOp);
109
140
 
110
141
 
111
- /* === Private definitions; never ever use directly === */
112
-
113
- size_t ZSTDMT_CCtxParam_setMTCtxParameter(ZSTD_CCtx_params* params, ZSTDMT_parameter parameter, unsigned value);
114
-
115
- /* ZSTDMT_CCtxParam_setNbThreads()
116
- * Set nbThreads, and clamp it correctly,
117
- * also reset jobSize and overlapLog */
118
- size_t ZSTDMT_CCtxParam_setNbThreads(ZSTD_CCtx_params* params, unsigned nbThreads);
142
+ /* ========================================================
143
+ * === Private interface, for use by ZSTD_compress.c ===
144
+ * === Not exposed in libzstd. Never invoke directly ===
145
+ * ======================================================== */
146
+
147
+ /*! ZSTDMT_toFlushNow()
148
+ * Tell how many bytes are ready to be flushed immediately.
149
+ * Probe the oldest active job (not yet entirely flushed) and check its output buffer.
150
+ * If return 0, it means there is no active job,
151
+ * or, it means oldest job is still active, but everything produced has been flushed so far,
152
+ * therefore flushing is limited by speed of oldest job. */
153
+ size_t ZSTDMT_toFlushNow(ZSTDMT_CCtx* mtctx);
154
+
155
+ /*! ZSTDMT_CCtxParam_setMTCtxParameter()
156
+ * like ZSTDMT_setMTCtxParameter(), but into a ZSTD_CCtx_Params */
157
+ size_t ZSTDMT_CCtxParam_setMTCtxParameter(ZSTD_CCtx_params* params, ZSTDMT_parameter parameter, int value);
158
+
159
+ /*! ZSTDMT_CCtxParam_setNbWorkers()
160
+ * Set nbWorkers, and clamp it.
161
+ * Also reset jobSize and overlapLog */
162
+ size_t ZSTDMT_CCtxParam_setNbWorkers(ZSTD_CCtx_params* params, unsigned nbWorkers);
163
+
164
+ /*! ZSTDMT_updateCParams_whileCompressing() :
165
+ * Updates only a selected set of compression parameters, to remain compatible with current frame.
166
+ * New parameters will be applied to next compression job. */
167
+ void ZSTDMT_updateCParams_whileCompressing(ZSTDMT_CCtx* mtctx, const ZSTD_CCtx_params* cctxParams);
168
+
169
+ /*! ZSTDMT_getFrameProgression():
170
+ * tells how much data has been consumed (input) and produced (output) for current frame.
171
+ * able to count progression inside worker threads.
172
+ */
173
+ ZSTD_frameProgression ZSTDMT_getFrameProgression(ZSTDMT_CCtx* mtctx);
119
174
 
120
- /* ZSTDMT_getNbThreads():
121
- * @return nb threads currently active in mtctx.
122
- * mtctx must be valid */
123
- size_t ZSTDMT_getNbThreads(const ZSTDMT_CCtx* mtctx);
124
175
 
125
176
  /*! ZSTDMT_initCStream_internal() :
126
177
  * Private use only. Init streaming operation.
@@ -128,7 +179,7 @@ size_t ZSTDMT_getNbThreads(const ZSTDMT_CCtx* mtctx);
128
179
  * must receive dict, or cdict, or none, but not both.
129
180
  * @return : 0, or an error code */
130
181
  size_t ZSTDMT_initCStream_internal(ZSTDMT_CCtx* zcs,
131
- const void* dict, size_t dictSize, ZSTD_dictMode_e dictMode,
182
+ const void* dict, size_t dictSize, ZSTD_dictContentType_e dictContentType,
132
183
  const ZSTD_CDict* cdict,
133
184
  ZSTD_CCtx_params params, unsigned long long pledgedSrcSize);
134
185
 
@@ -1,6 +1,7 @@
1
1
  /* ******************************************************************
2
- Huffman decoder, part of New Generation Entropy library
3
- Copyright (C) 2013-2016, Yann Collet.
2
+ huff0 huffman decoder,
3
+ part of Finite State Entropy library
4
+ Copyright (C) 2013-present, Yann Collet.
4
5
 
5
6
  BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6
7
 
@@ -29,38 +30,95 @@
29
30
 
30
31
  You can contact the author at :
31
32
  - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
32
- - Public forum : https://groups.google.com/forum/#!forum/lz4c
33
33
  ****************************************************************** */
34
34
 
35
35
  /* **************************************************************
36
36
  * Dependencies
37
37
  ****************************************************************/
38
38
  #include <string.h> /* memcpy, memset */
39
- #include "bitstream.h" /* BIT_* */
40
39
  #include "compiler.h"
41
- #include "fse.h" /* header compression */
40
+ #include "bitstream.h" /* BIT_* */
41
+ #include "fse.h" /* to compress headers */
42
42
  #define HUF_STATIC_LINKING_ONLY
43
43
  #include "huf.h"
44
44
  #include "error_private.h"
45
45
 
46
+ /* **************************************************************
47
+ * Macros
48
+ ****************************************************************/
49
+
50
+ /* These two optional macros force the use one way or another of the two
51
+ * Huffman decompression implementations. You can't force in both directions
52
+ * at the same time.
53
+ */
54
+ #if defined(HUF_FORCE_DECOMPRESS_X1) && \
55
+ defined(HUF_FORCE_DECOMPRESS_X2)
56
+ #error "Cannot force the use of the X1 and X2 decoders at the same time!"
57
+ #endif
58
+
46
59
 
47
60
  /* **************************************************************
48
61
  * Error Management
49
62
  ****************************************************************/
50
63
  #define HUF_isError ERR_isError
51
- #define HUF_STATIC_ASSERT(c) { enum { HUF_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */
64
+ #define CHECK_F(f) { size_t const err_ = (f); if (HUF_isError(err_)) return err_; }
52
65
 
53
66
 
54
67
  /* **************************************************************
55
68
  * Byte alignment for workSpace management
56
69
  ****************************************************************/
57
- #define HUF_ALIGN(x, a) HUF_ALIGN_MASK((x), (a) - 1)
70
+ #define HUF_ALIGN(x, a) HUF_ALIGN_MASK((x), (a) - 1)
58
71
  #define HUF_ALIGN_MASK(x, mask) (((x) + (mask)) & ~(mask))
59
72
 
73
+
74
+ /* **************************************************************
75
+ * BMI2 Variant Wrappers
76
+ ****************************************************************/
77
+ #if DYNAMIC_BMI2
78
+
79
+ #define HUF_DGEN(fn) \
80
+ \
81
+ static size_t fn##_default( \
82
+ void* dst, size_t dstSize, \
83
+ const void* cSrc, size_t cSrcSize, \
84
+ const HUF_DTable* DTable) \
85
+ { \
86
+ return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \
87
+ } \
88
+ \
89
+ static TARGET_ATTRIBUTE("bmi2") size_t fn##_bmi2( \
90
+ void* dst, size_t dstSize, \
91
+ const void* cSrc, size_t cSrcSize, \
92
+ const HUF_DTable* DTable) \
93
+ { \
94
+ return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \
95
+ } \
96
+ \
97
+ static size_t fn(void* dst, size_t dstSize, void const* cSrc, \
98
+ size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \
99
+ { \
100
+ if (bmi2) { \
101
+ return fn##_bmi2(dst, dstSize, cSrc, cSrcSize, DTable); \
102
+ } \
103
+ return fn##_default(dst, dstSize, cSrc, cSrcSize, DTable); \
104
+ }
105
+
106
+ #else
107
+
108
+ #define HUF_DGEN(fn) \
109
+ static size_t fn(void* dst, size_t dstSize, void const* cSrc, \
110
+ size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \
111
+ { \
112
+ (void)bmi2; \
113
+ return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \
114
+ }
115
+
116
+ #endif
117
+
118
+
60
119
  /*-***************************/
61
120
  /* generic DTableDesc */
62
121
  /*-***************************/
63
-
64
122
  typedef struct { BYTE maxTableLog; BYTE tableType; BYTE tableLog; BYTE reserved; } DTableDesc;
65
123
 
66
124
  static DTableDesc HUF_getDTableDesc(const HUF_DTable* table)
@@ -71,19 +129,20 @@ static DTableDesc HUF_getDTableDesc(const HUF_DTable* table)
71
129
  }
72
130
 
73
131
 
132
+ #ifndef HUF_FORCE_DECOMPRESS_X2
133
+
74
134
  /*-***************************/
75
135
  /* single-symbol decoding */
76
136
  /*-***************************/
137
+ typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX1; /* single-symbol decoding */
77
138
 
78
- typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX2; /* single-symbol decoding */
79
-
80
- size_t HUF_readDTableX2_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize)
139
+ size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize)
81
140
  {
82
141
  U32 tableLog = 0;
83
142
  U32 nbSymbols = 0;
84
143
  size_t iSize;
85
144
  void* const dtPtr = DTable + 1;
86
- HUF_DEltX2* const dt = (HUF_DEltX2*)dtPtr;
145
+ HUF_DEltX1* const dt = (HUF_DEltX1*)dtPtr;
87
146
 
88
147
  U32* rankVal;
89
148
  BYTE* huffWeight;
@@ -94,12 +153,9 @@ size_t HUF_readDTableX2_wksp(HUF_DTable* DTable, const void* src, size_t srcSize
94
153
  huffWeight = (BYTE *)((U32 *)workSpace + spaceUsed32);
95
154
  spaceUsed32 += HUF_ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2;
96
155
 
97
- if ((spaceUsed32 << 2) > wkspSize)
98
- return ERROR(tableLog_tooLarge);
99
- workSpace = (U32 *)workSpace + spaceUsed32;
100
- wkspSize -= (spaceUsed32 << 2);
156
+ if ((spaceUsed32 << 2) > wkspSize) return ERROR(tableLog_tooLarge);
101
157
 
102
- HUF_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable));
158
+ DEBUG_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable));
103
159
  /* memset(huffWeight, 0, sizeof(huffWeight)); */ /* is not necessary, even though some analyzer complain ... */
104
160
 
105
161
  iSize = HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX + 1, rankVal, &nbSymbols, &tableLog, src, srcSize);
@@ -127,7 +183,7 @@ size_t HUF_readDTableX2_wksp(HUF_DTable* DTable, const void* src, size_t srcSize
127
183
  U32 const w = huffWeight[n];
128
184
  U32 const length = (1 << w) >> 1;
129
185
  U32 u;
130
- HUF_DEltX2 D;
186
+ HUF_DEltX1 D;
131
187
  D.byte = (BYTE)n; D.nbBits = (BYTE)(tableLog + 1 - w);
132
188
  for (u = rankVal[w]; u < rankVal[w] + length; u++)
133
189
  dt[u] = D;
@@ -137,15 +193,15 @@ size_t HUF_readDTableX2_wksp(HUF_DTable* DTable, const void* src, size_t srcSize
137
193
  return iSize;
138
194
  }
139
195
 
140
- size_t HUF_readDTableX2(HUF_DTable* DTable, const void* src, size_t srcSize)
196
+ size_t HUF_readDTableX1(HUF_DTable* DTable, const void* src, size_t srcSize)
141
197
  {
142
198
  U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
143
- return HUF_readDTableX2_wksp(DTable, src, srcSize,
199
+ return HUF_readDTableX1_wksp(DTable, src, srcSize,
144
200
  workSpace, sizeof(workSpace));
145
201
  }
146
202
 
147
-
148
- static BYTE HUF_decodeSymbolX2(BIT_DStream_t* Dstream, const HUF_DEltX2* dt, const U32 dtLog)
203
+ FORCE_INLINE_TEMPLATE BYTE
204
+ HUF_decodeSymbolX1(BIT_DStream_t* Dstream, const HUF_DEltX1* dt, const U32 dtLog)
149
205
  {
150
206
  size_t const val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */
151
207
  BYTE const c = dt[val].byte;
@@ -153,41 +209,44 @@ static BYTE HUF_decodeSymbolX2(BIT_DStream_t* Dstream, const HUF_DEltX2* dt, con
153
209
  return c;
154
210
  }
155
211
 
156
- #define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \
157
- *ptr++ = HUF_decodeSymbolX2(DStreamPtr, dt, dtLog)
212
+ #define HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) \
213
+ *ptr++ = HUF_decodeSymbolX1(DStreamPtr, dt, dtLog)
158
214
 
159
- #define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \
215
+ #define HUF_DECODE_SYMBOLX1_1(ptr, DStreamPtr) \
160
216
  if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
161
- HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr)
217
+ HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr)
162
218
 
163
- #define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \
219
+ #define HUF_DECODE_SYMBOLX1_2(ptr, DStreamPtr) \
164
220
  if (MEM_64bits()) \
165
- HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr)
221
+ HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr)
166
222
 
167
- HINT_INLINE size_t HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX2* const dt, const U32 dtLog)
223
+ HINT_INLINE size_t
224
+ HUF_decodeStreamX1(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX1* const dt, const U32 dtLog)
168
225
  {
169
226
  BYTE* const pStart = p;
170
227
 
171
228
  /* up to 4 symbols at a time */
172
- while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p <= pEnd-4)) {
173
- HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
174
- HUF_DECODE_SYMBOLX2_1(p, bitDPtr);
175
- HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
176
- HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
229
+ while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-3)) {
230
+ HUF_DECODE_SYMBOLX1_2(p, bitDPtr);
231
+ HUF_DECODE_SYMBOLX1_1(p, bitDPtr);
232
+ HUF_DECODE_SYMBOLX1_2(p, bitDPtr);
233
+ HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
177
234
  }
178
235
 
179
- /* closer to the end */
180
- while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p < pEnd))
181
- HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
236
+ /* [0-3] symbols remaining */
237
+ if (MEM_32bits())
238
+ while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd))
239
+ HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
182
240
 
183
- /* no more data to retrieve from bitstream, hence no need to reload */
241
+ /* no more data to retrieve from bitstream, no need to reload */
184
242
  while (p < pEnd)
185
- HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
243
+ HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
186
244
 
187
245
  return pEnd-pStart;
188
246
  }
189
247
 
190
- static size_t HUF_decompress1X2_usingDTable_internal(
248
+ FORCE_INLINE_TEMPLATE size_t
249
+ HUF_decompress1X1_usingDTable_internal_body(
191
250
  void* dst, size_t dstSize,
192
251
  const void* cSrc, size_t cSrcSize,
193
252
  const HUF_DTable* DTable)
@@ -195,63 +254,22 @@ static size_t HUF_decompress1X2_usingDTable_internal(
195
254
  BYTE* op = (BYTE*)dst;
196
255
  BYTE* const oend = op + dstSize;
197
256
  const void* dtPtr = DTable + 1;
198
- const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
257
+ const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr;
199
258
  BIT_DStream_t bitD;
200
259
  DTableDesc const dtd = HUF_getDTableDesc(DTable);
201
260
  U32 const dtLog = dtd.tableLog;
202
261
 
203
- { size_t const errorCode = BIT_initDStream(&bitD, cSrc, cSrcSize);
204
- if (HUF_isError(errorCode)) return errorCode; }
262
+ CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) );
205
263
 
206
- HUF_decodeStreamX2(op, &bitD, oend, dt, dtLog);
264
+ HUF_decodeStreamX1(op, &bitD, oend, dt, dtLog);
207
265
 
208
- /* check */
209
266
  if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
210
267
 
211
268
  return dstSize;
212
269
  }
213
270
 
214
- size_t HUF_decompress1X2_usingDTable(
215
- void* dst, size_t dstSize,
216
- const void* cSrc, size_t cSrcSize,
217
- const HUF_DTable* DTable)
218
- {
219
- DTableDesc dtd = HUF_getDTableDesc(DTable);
220
- if (dtd.tableType != 0) return ERROR(GENERIC);
221
- return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable);
222
- }
223
-
224
- size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
225
- const void* cSrc, size_t cSrcSize,
226
- void* workSpace, size_t wkspSize)
227
- {
228
- const BYTE* ip = (const BYTE*) cSrc;
229
-
230
- size_t const hSize = HUF_readDTableX2_wksp(DCtx, cSrc, cSrcSize, workSpace, wkspSize);
231
- if (HUF_isError(hSize)) return hSize;
232
- if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
233
- ip += hSize; cSrcSize -= hSize;
234
-
235
- return HUF_decompress1X2_usingDTable_internal (dst, dstSize, ip, cSrcSize, DCtx);
236
- }
237
-
238
-
239
- size_t HUF_decompress1X2_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
240
- const void* cSrc, size_t cSrcSize)
241
- {
242
- U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
243
- return HUF_decompress1X2_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
244
- workSpace, sizeof(workSpace));
245
- }
246
-
247
- size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
248
- {
249
- HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
250
- return HUF_decompress1X2_DCtx (DTable, dst, dstSize, cSrc, cSrcSize);
251
- }
252
-
253
-
254
- static size_t HUF_decompress4X2_usingDTable_internal(
271
+ FORCE_INLINE_TEMPLATE size_t
272
+ HUF_decompress4X1_usingDTable_internal_body(
255
273
  void* dst, size_t dstSize,
256
274
  const void* cSrc, size_t cSrcSize,
257
275
  const HUF_DTable* DTable)
@@ -263,7 +281,7 @@ static size_t HUF_decompress4X2_usingDTable_internal(
263
281
  BYTE* const ostart = (BYTE*) dst;
264
282
  BYTE* const oend = ostart + dstSize;
265
283
  const void* const dtPtr = DTable + 1;
266
- const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
284
+ const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr;
267
285
 
268
286
  /* Init */
269
287
  BIT_DStream_t bitD1;
@@ -286,57 +304,58 @@ static size_t HUF_decompress4X2_usingDTable_internal(
286
304
  BYTE* op2 = opStart2;
287
305
  BYTE* op3 = opStart3;
288
306
  BYTE* op4 = opStart4;
289
- U32 endSignal;
307
+ U32 endSignal = BIT_DStream_unfinished;
290
308
  DTableDesc const dtd = HUF_getDTableDesc(DTable);
291
309
  U32 const dtLog = dtd.tableLog;
292
310
 
293
311
  if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
294
- { size_t const errorCode = BIT_initDStream(&bitD1, istart1, length1);
295
- if (HUF_isError(errorCode)) return errorCode; }
296
- { size_t const errorCode = BIT_initDStream(&bitD2, istart2, length2);
297
- if (HUF_isError(errorCode)) return errorCode; }
298
- { size_t const errorCode = BIT_initDStream(&bitD3, istart3, length3);
299
- if (HUF_isError(errorCode)) return errorCode; }
300
- { size_t const errorCode = BIT_initDStream(&bitD4, istart4, length4);
301
- if (HUF_isError(errorCode)) return errorCode; }
312
+ CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
313
+ CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
314
+ CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
315
+ CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
302
316
 
303
- /* 16-32 symbols per loop (4-8 symbols per stream) */
317
+ /* up to 16 symbols per loop (4 symbols per stream) in 64-bit mode */
304
318
  endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
305
- for ( ; (endSignal==BIT_DStream_unfinished) && (op4<(oend-7)) ; ) {
306
- HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
307
- HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
308
- HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
309
- HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
310
- HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
311
- HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
312
- HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
313
- HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
314
- HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
315
- HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
316
- HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
317
- HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
318
- HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
319
- HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
320
- HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
321
- HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
322
- endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
319
+ while ( (endSignal==BIT_DStream_unfinished) && (op4<(oend-3)) ) {
320
+ HUF_DECODE_SYMBOLX1_2(op1, &bitD1);
321
+ HUF_DECODE_SYMBOLX1_2(op2, &bitD2);
322
+ HUF_DECODE_SYMBOLX1_2(op3, &bitD3);
323
+ HUF_DECODE_SYMBOLX1_2(op4, &bitD4);
324
+ HUF_DECODE_SYMBOLX1_1(op1, &bitD1);
325
+ HUF_DECODE_SYMBOLX1_1(op2, &bitD2);
326
+ HUF_DECODE_SYMBOLX1_1(op3, &bitD3);
327
+ HUF_DECODE_SYMBOLX1_1(op4, &bitD4);
328
+ HUF_DECODE_SYMBOLX1_2(op1, &bitD1);
329
+ HUF_DECODE_SYMBOLX1_2(op2, &bitD2);
330
+ HUF_DECODE_SYMBOLX1_2(op3, &bitD3);
331
+ HUF_DECODE_SYMBOLX1_2(op4, &bitD4);
332
+ HUF_DECODE_SYMBOLX1_0(op1, &bitD1);
333
+ HUF_DECODE_SYMBOLX1_0(op2, &bitD2);
334
+ HUF_DECODE_SYMBOLX1_0(op3, &bitD3);
335
+ HUF_DECODE_SYMBOLX1_0(op4, &bitD4);
336
+ BIT_reloadDStream(&bitD1);
337
+ BIT_reloadDStream(&bitD2);
338
+ BIT_reloadDStream(&bitD3);
339
+ BIT_reloadDStream(&bitD4);
323
340
  }
324
341
 
325
342
  /* check corruption */
343
+ /* note : should not be necessary : op# advance in lock step, and we control op4.
344
+ * but curiously, binary generated by gcc 7.2 & 7.3 with -mbmi2 runs faster when >=1 test is present */
326
345
  if (op1 > opStart2) return ERROR(corruption_detected);
327
346
  if (op2 > opStart3) return ERROR(corruption_detected);
328
347
  if (op3 > opStart4) return ERROR(corruption_detected);
329
348
  /* note : op4 supposed already verified within main loop */
330
349
 
331
350
  /* finish bitStreams one by one */
332
- HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog);
333
- HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog);
334
- HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog);
335
- HUF_decodeStreamX2(op4, &bitD4, oend, dt, dtLog);
351
+ HUF_decodeStreamX1(op1, &bitD1, opStart2, dt, dtLog);
352
+ HUF_decodeStreamX1(op2, &bitD2, opStart3, dt, dtLog);
353
+ HUF_decodeStreamX1(op3, &bitD3, opStart4, dt, dtLog);
354
+ HUF_decodeStreamX1(op4, &bitD4, oend, dt, dtLog);
336
355
 
337
356
  /* check */
338
- endSignal = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
339
- if (!endSignal) return ERROR(corruption_detected);
357
+ { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
358
+ if (!endCheck) return ERROR(corruption_detected); }
340
359
 
341
360
  /* decoded size */
342
361
  return dstSize;
@@ -344,61 +363,123 @@ static size_t HUF_decompress4X2_usingDTable_internal(
344
363
  }
345
364
 
346
365
 
347
- size_t HUF_decompress4X2_usingDTable(
366
+ typedef size_t (*HUF_decompress_usingDTable_t)(void *dst, size_t dstSize,
367
+ const void *cSrc,
368
+ size_t cSrcSize,
369
+ const HUF_DTable *DTable);
370
+
371
+ HUF_DGEN(HUF_decompress1X1_usingDTable_internal)
372
+ HUF_DGEN(HUF_decompress4X1_usingDTable_internal)
373
+
374
+
375
+
376
+ size_t HUF_decompress1X1_usingDTable(
348
377
  void* dst, size_t dstSize,
349
378
  const void* cSrc, size_t cSrcSize,
350
379
  const HUF_DTable* DTable)
351
380
  {
352
381
  DTableDesc dtd = HUF_getDTableDesc(DTable);
353
382
  if (dtd.tableType != 0) return ERROR(GENERIC);
354
- return HUF_decompress4X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable);
383
+ return HUF_decompress1X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
355
384
  }
356
385
 
357
-
358
- size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
386
+ size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
359
387
  const void* cSrc, size_t cSrcSize,
360
388
  void* workSpace, size_t wkspSize)
361
389
  {
362
390
  const BYTE* ip = (const BYTE*) cSrc;
363
391
 
364
- size_t const hSize = HUF_readDTableX2_wksp (dctx, cSrc, cSrcSize,
392
+ size_t const hSize = HUF_readDTableX1_wksp(DCtx, cSrc, cSrcSize, workSpace, wkspSize);
393
+ if (HUF_isError(hSize)) return hSize;
394
+ if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
395
+ ip += hSize; cSrcSize -= hSize;
396
+
397
+ return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0);
398
+ }
399
+
400
+
401
+ size_t HUF_decompress1X1_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
402
+ const void* cSrc, size_t cSrcSize)
403
+ {
404
+ U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
405
+ return HUF_decompress1X1_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
406
+ workSpace, sizeof(workSpace));
407
+ }
408
+
409
+ size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
410
+ {
411
+ HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX);
412
+ return HUF_decompress1X1_DCtx (DTable, dst, dstSize, cSrc, cSrcSize);
413
+ }
414
+
415
+ size_t HUF_decompress4X1_usingDTable(
416
+ void* dst, size_t dstSize,
417
+ const void* cSrc, size_t cSrcSize,
418
+ const HUF_DTable* DTable)
419
+ {
420
+ DTableDesc dtd = HUF_getDTableDesc(DTable);
421
+ if (dtd.tableType != 0) return ERROR(GENERIC);
422
+ return HUF_decompress4X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
423
+ }
424
+
425
+ static size_t HUF_decompress4X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize,
426
+ const void* cSrc, size_t cSrcSize,
427
+ void* workSpace, size_t wkspSize, int bmi2)
428
+ {
429
+ const BYTE* ip = (const BYTE*) cSrc;
430
+
431
+ size_t const hSize = HUF_readDTableX1_wksp (dctx, cSrc, cSrcSize,
365
432
  workSpace, wkspSize);
366
433
  if (HUF_isError(hSize)) return hSize;
367
434
  if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
368
435
  ip += hSize; cSrcSize -= hSize;
369
436
 
370
- return HUF_decompress4X2_usingDTable_internal (dst, dstSize, ip, cSrcSize, dctx);
437
+ return HUF_decompress4X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
438
+ }
439
+
440
+ size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
441
+ const void* cSrc, size_t cSrcSize,
442
+ void* workSpace, size_t wkspSize)
443
+ {
444
+ return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, 0);
371
445
  }
372
446
 
373
447
 
374
- size_t HUF_decompress4X2_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
448
+ size_t HUF_decompress4X1_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
375
449
  {
376
450
  U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
377
- return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
451
+ return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
378
452
  workSpace, sizeof(workSpace));
379
453
  }
380
- size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
454
+ size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
381
455
  {
382
- HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
383
- return HUF_decompress4X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
456
+ HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX);
457
+ return HUF_decompress4X1_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
384
458
  }
385
459
 
460
+ #endif /* HUF_FORCE_DECOMPRESS_X2 */
461
+
462
+
463
+ #ifndef HUF_FORCE_DECOMPRESS_X1
386
464
 
387
465
  /* *************************/
388
466
  /* double-symbols decoding */
389
467
  /* *************************/
390
- typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX4; /* double-symbols decoding */
391
468
 
469
+ typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX2; /* double-symbols decoding */
392
470
  typedef struct { BYTE symbol; BYTE weight; } sortedSymbol_t;
471
+ typedef U32 rankValCol_t[HUF_TABLELOG_MAX + 1];
472
+ typedef rankValCol_t rankVal_t[HUF_TABLELOG_MAX];
473
+
393
474
 
394
- /* HUF_fillDTableX4Level2() :
475
+ /* HUF_fillDTableX2Level2() :
395
476
  * `rankValOrigin` must be a table of at least (HUF_TABLELOG_MAX + 1) U32 */
396
- static void HUF_fillDTableX4Level2(HUF_DEltX4* DTable, U32 sizeLog, const U32 consumed,
477
+ static void HUF_fillDTableX2Level2(HUF_DEltX2* DTable, U32 sizeLog, const U32 consumed,
397
478
  const U32* rankValOrigin, const int minWeight,
398
479
  const sortedSymbol_t* sortedSymbols, const U32 sortedListSize,
399
480
  U32 nbBitsBaseline, U16 baseSeq)
400
481
  {
401
- HUF_DEltX4 DElt;
482
+ HUF_DEltX2 DElt;
402
483
  U32 rankVal[HUF_TABLELOG_MAX + 1];
403
484
 
404
485
  /* get pre-calculated rankVal */
@@ -433,10 +514,8 @@ static void HUF_fillDTableX4Level2(HUF_DEltX4* DTable, U32 sizeLog, const U32 co
433
514
  } }
434
515
  }
435
516
 
436
- typedef U32 rankValCol_t[HUF_TABLELOG_MAX + 1];
437
- typedef rankValCol_t rankVal_t[HUF_TABLELOG_MAX];
438
517
 
439
- static void HUF_fillDTableX4(HUF_DEltX4* DTable, const U32 targetLog,
518
+ static void HUF_fillDTableX2(HUF_DEltX2* DTable, const U32 targetLog,
440
519
  const sortedSymbol_t* sortedList, const U32 sortedListSize,
441
520
  const U32* rankStart, rankVal_t rankValOrigin, const U32 maxWeight,
442
521
  const U32 nbBitsBaseline)
@@ -461,12 +540,12 @@ static void HUF_fillDTableX4(HUF_DEltX4* DTable, const U32 targetLog,
461
540
  int minWeight = nbBits + scaleLog;
462
541
  if (minWeight < 1) minWeight = 1;
463
542
  sortedRank = rankStart[minWeight];
464
- HUF_fillDTableX4Level2(DTable+start, targetLog-nbBits, nbBits,
543
+ HUF_fillDTableX2Level2(DTable+start, targetLog-nbBits, nbBits,
465
544
  rankValOrigin[nbBits], minWeight,
466
545
  sortedList+sortedRank, sortedListSize-sortedRank,
467
546
  nbBitsBaseline, symbol);
468
547
  } else {
469
- HUF_DEltX4 DElt;
548
+ HUF_DEltX2 DElt;
470
549
  MEM_writeLE16(&(DElt.sequence), symbol);
471
550
  DElt.nbBits = (BYTE)(nbBits);
472
551
  DElt.length = 1;
@@ -478,16 +557,16 @@ static void HUF_fillDTableX4(HUF_DEltX4* DTable, const U32 targetLog,
478
557
  }
479
558
  }
480
559
 
481
- size_t HUF_readDTableX4_wksp(HUF_DTable* DTable, const void* src,
482
- size_t srcSize, void* workSpace,
483
- size_t wkspSize)
560
+ size_t HUF_readDTableX2_wksp(HUF_DTable* DTable,
561
+ const void* src, size_t srcSize,
562
+ void* workSpace, size_t wkspSize)
484
563
  {
485
564
  U32 tableLog, maxW, sizeOfSort, nbSymbols;
486
565
  DTableDesc dtd = HUF_getDTableDesc(DTable);
487
566
  U32 const maxTableLog = dtd.maxTableLog;
488
567
  size_t iSize;
489
568
  void* dtPtr = DTable+1; /* force compiler to avoid strict-aliasing */
490
- HUF_DEltX4* const dt = (HUF_DEltX4*)dtPtr;
569
+ HUF_DEltX2* const dt = (HUF_DEltX2*)dtPtr;
491
570
  U32 *rankStart;
492
571
 
493
572
  rankValCol_t* rankVal;
@@ -508,15 +587,12 @@ size_t HUF_readDTableX4_wksp(HUF_DTable* DTable, const void* src,
508
587
  weightList = (BYTE *)((U32 *)workSpace + spaceUsed32);
509
588
  spaceUsed32 += HUF_ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2;
510
589
 
511
- if ((spaceUsed32 << 2) > wkspSize)
512
- return ERROR(tableLog_tooLarge);
513
- workSpace = (U32 *)workSpace + spaceUsed32;
514
- wkspSize -= (spaceUsed32 << 2);
590
+ if ((spaceUsed32 << 2) > wkspSize) return ERROR(tableLog_tooLarge);
515
591
 
516
592
  rankStart = rankStart0 + 1;
517
593
  memset(rankStats, 0, sizeof(U32) * (2 * HUF_TABLELOG_MAX + 2 + 1));
518
594
 
519
- HUF_STATIC_ASSERT(sizeof(HUF_DEltX4) == sizeof(HUF_DTable)); /* if compiler fails here, assertion is wrong */
595
+ DEBUG_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(HUF_DTable)); /* if compiler fails here, assertion is wrong */
520
596
  if (maxTableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
521
597
  /* memset(weightList, 0, sizeof(weightList)); */ /* is not necessary, even though some analyzer complain ... */
522
598
 
@@ -570,7 +646,7 @@ size_t HUF_readDTableX4_wksp(HUF_DTable* DTable, const void* src,
570
646
  rankValPtr[w] = rankVal0[w] >> consumed;
571
647
  } } } }
572
648
 
573
- HUF_fillDTableX4(dt, maxTableLog,
649
+ HUF_fillDTableX2(dt, maxTableLog,
574
650
  sortedSymbol, sizeOfSort,
575
651
  rankStart0, rankVal, maxW,
576
652
  tableLog+1);
@@ -581,14 +657,16 @@ size_t HUF_readDTableX4_wksp(HUF_DTable* DTable, const void* src,
581
657
  return iSize;
582
658
  }
583
659
 
584
- size_t HUF_readDTableX4(HUF_DTable* DTable, const void* src, size_t srcSize)
660
+ size_t HUF_readDTableX2(HUF_DTable* DTable, const void* src, size_t srcSize)
585
661
  {
586
662
  U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
587
- return HUF_readDTableX4_wksp(DTable, src, srcSize,
663
+ return HUF_readDTableX2_wksp(DTable, src, srcSize,
588
664
  workSpace, sizeof(workSpace));
589
665
  }
590
666
 
591
- static U32 HUF_decodeSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DEltX4* dt, const U32 dtLog)
667
+
668
+ FORCE_INLINE_TEMPLATE U32
669
+ HUF_decodeSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog)
592
670
  {
593
671
  size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
594
672
  memcpy(op, dt+val, 2);
@@ -596,7 +674,8 @@ static U32 HUF_decodeSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DEltX4
596
674
  return dt[val].length;
597
675
  }
598
676
 
599
- static U32 HUF_decodeLastSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DEltX4* dt, const U32 dtLog)
677
+ FORCE_INLINE_TEMPLATE U32
678
+ HUF_decodeLastSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog)
600
679
  {
601
680
  size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
602
681
  memcpy(op, dt+val, 1);
@@ -611,45 +690,46 @@ static U32 HUF_decodeLastSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DE
611
690
  return 1;
612
691
  }
613
692
 
693
+ #define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \
694
+ ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
614
695
 
615
- #define HUF_DECODE_SYMBOLX4_0(ptr, DStreamPtr) \
616
- ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
617
-
618
- #define HUF_DECODE_SYMBOLX4_1(ptr, DStreamPtr) \
696
+ #define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \
619
697
  if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
620
- ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
698
+ ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
621
699
 
622
- #define HUF_DECODE_SYMBOLX4_2(ptr, DStreamPtr) \
700
+ #define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \
623
701
  if (MEM_64bits()) \
624
- ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
702
+ ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
625
703
 
626
- HINT_INLINE size_t HUF_decodeStreamX4(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd, const HUF_DEltX4* const dt, const U32 dtLog)
704
+ HINT_INLINE size_t
705
+ HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd,
706
+ const HUF_DEltX2* const dt, const U32 dtLog)
627
707
  {
628
708
  BYTE* const pStart = p;
629
709
 
630
710
  /* up to 8 symbols at a time */
631
711
  while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-(sizeof(bitDPtr->bitContainer)-1))) {
632
- HUF_DECODE_SYMBOLX4_2(p, bitDPtr);
633
- HUF_DECODE_SYMBOLX4_1(p, bitDPtr);
634
- HUF_DECODE_SYMBOLX4_2(p, bitDPtr);
635
- HUF_DECODE_SYMBOLX4_0(p, bitDPtr);
712
+ HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
713
+ HUF_DECODE_SYMBOLX2_1(p, bitDPtr);
714
+ HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
715
+ HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
636
716
  }
637
717
 
638
718
  /* closer to end : up to 2 symbols at a time */
639
719
  while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p <= pEnd-2))
640
- HUF_DECODE_SYMBOLX4_0(p, bitDPtr);
720
+ HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
641
721
 
642
722
  while (p <= pEnd-2)
643
- HUF_DECODE_SYMBOLX4_0(p, bitDPtr); /* no need to reload : reached the end of DStream */
723
+ HUF_DECODE_SYMBOLX2_0(p, bitDPtr); /* no need to reload : reached the end of DStream */
644
724
 
645
725
  if (p < pEnd)
646
- p += HUF_decodeLastSymbolX4(p, bitDPtr, dt, dtLog);
726
+ p += HUF_decodeLastSymbolX2(p, bitDPtr, dt, dtLog);
647
727
 
648
728
  return p-pStart;
649
729
  }
650
730
 
651
-
652
- static size_t HUF_decompress1X4_usingDTable_internal(
731
+ FORCE_INLINE_TEMPLATE size_t
732
+ HUF_decompress1X2_usingDTable_internal_body(
653
733
  void* dst, size_t dstSize,
654
734
  const void* cSrc, size_t cSrcSize,
655
735
  const HUF_DTable* DTable)
@@ -657,17 +737,15 @@ static size_t HUF_decompress1X4_usingDTable_internal(
657
737
  BIT_DStream_t bitD;
658
738
 
659
739
  /* Init */
660
- { size_t const errorCode = BIT_initDStream(&bitD, cSrc, cSrcSize);
661
- if (HUF_isError(errorCode)) return errorCode;
662
- }
740
+ CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) );
663
741
 
664
742
  /* decode */
665
743
  { BYTE* const ostart = (BYTE*) dst;
666
744
  BYTE* const oend = ostart + dstSize;
667
745
  const void* const dtPtr = DTable+1; /* force compiler to not use strict-aliasing */
668
- const HUF_DEltX4* const dt = (const HUF_DEltX4*)dtPtr;
746
+ const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
669
747
  DTableDesc const dtd = HUF_getDTableDesc(DTable);
670
- HUF_decodeStreamX4(ostart, &bitD, oend, dt, dtd.tableLog);
748
+ HUF_decodeStreamX2(ostart, &bitD, oend, dt, dtd.tableLog);
671
749
  }
672
750
 
673
751
  /* check */
@@ -677,47 +755,9 @@ static size_t HUF_decompress1X4_usingDTable_internal(
677
755
  return dstSize;
678
756
  }
679
757
 
680
- size_t HUF_decompress1X4_usingDTable(
681
- void* dst, size_t dstSize,
682
- const void* cSrc, size_t cSrcSize,
683
- const HUF_DTable* DTable)
684
- {
685
- DTableDesc dtd = HUF_getDTableDesc(DTable);
686
- if (dtd.tableType != 1) return ERROR(GENERIC);
687
- return HUF_decompress1X4_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable);
688
- }
689
-
690
- size_t HUF_decompress1X4_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
691
- const void* cSrc, size_t cSrcSize,
692
- void* workSpace, size_t wkspSize)
693
- {
694
- const BYTE* ip = (const BYTE*) cSrc;
695
-
696
- size_t const hSize = HUF_readDTableX4_wksp(DCtx, cSrc, cSrcSize,
697
- workSpace, wkspSize);
698
- if (HUF_isError(hSize)) return hSize;
699
- if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
700
- ip += hSize; cSrcSize -= hSize;
701
758
 
702
- return HUF_decompress1X4_usingDTable_internal (dst, dstSize, ip, cSrcSize, DCtx);
703
- }
704
-
705
-
706
- size_t HUF_decompress1X4_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
707
- const void* cSrc, size_t cSrcSize)
708
- {
709
- U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
710
- return HUF_decompress1X4_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
711
- workSpace, sizeof(workSpace));
712
- }
713
-
714
- size_t HUF_decompress1X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
715
- {
716
- HUF_CREATE_STATIC_DTABLEX4(DTable, HUF_TABLELOG_MAX);
717
- return HUF_decompress1X4_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
718
- }
719
-
720
- static size_t HUF_decompress4X4_usingDTable_internal(
759
+ FORCE_INLINE_TEMPLATE size_t
760
+ HUF_decompress4X2_usingDTable_internal_body(
721
761
  void* dst, size_t dstSize,
722
762
  const void* cSrc, size_t cSrcSize,
723
763
  const HUF_DTable* DTable)
@@ -728,7 +768,7 @@ static size_t HUF_decompress4X4_usingDTable_internal(
728
768
  BYTE* const ostart = (BYTE*) dst;
729
769
  BYTE* const oend = ostart + dstSize;
730
770
  const void* const dtPtr = DTable+1;
731
- const HUF_DEltX4* const dt = (const HUF_DEltX4*)dtPtr;
771
+ const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
732
772
 
733
773
  /* Init */
734
774
  BIT_DStream_t bitD1;
@@ -756,34 +796,30 @@ static size_t HUF_decompress4X4_usingDTable_internal(
756
796
  U32 const dtLog = dtd.tableLog;
757
797
 
758
798
  if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
759
- { size_t const errorCode = BIT_initDStream(&bitD1, istart1, length1);
760
- if (HUF_isError(errorCode)) return errorCode; }
761
- { size_t const errorCode = BIT_initDStream(&bitD2, istart2, length2);
762
- if (HUF_isError(errorCode)) return errorCode; }
763
- { size_t const errorCode = BIT_initDStream(&bitD3, istart3, length3);
764
- if (HUF_isError(errorCode)) return errorCode; }
765
- { size_t const errorCode = BIT_initDStream(&bitD4, istart4, length4);
766
- if (HUF_isError(errorCode)) return errorCode; }
799
+ CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
800
+ CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
801
+ CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
802
+ CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
767
803
 
768
804
  /* 16-32 symbols per loop (4-8 symbols per stream) */
769
805
  endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
770
806
  for ( ; (endSignal==BIT_DStream_unfinished) & (op4<(oend-(sizeof(bitD4.bitContainer)-1))) ; ) {
771
- HUF_DECODE_SYMBOLX4_2(op1, &bitD1);
772
- HUF_DECODE_SYMBOLX4_2(op2, &bitD2);
773
- HUF_DECODE_SYMBOLX4_2(op3, &bitD3);
774
- HUF_DECODE_SYMBOLX4_2(op4, &bitD4);
775
- HUF_DECODE_SYMBOLX4_1(op1, &bitD1);
776
- HUF_DECODE_SYMBOLX4_1(op2, &bitD2);
777
- HUF_DECODE_SYMBOLX4_1(op3, &bitD3);
778
- HUF_DECODE_SYMBOLX4_1(op4, &bitD4);
779
- HUF_DECODE_SYMBOLX4_2(op1, &bitD1);
780
- HUF_DECODE_SYMBOLX4_2(op2, &bitD2);
781
- HUF_DECODE_SYMBOLX4_2(op3, &bitD3);
782
- HUF_DECODE_SYMBOLX4_2(op4, &bitD4);
783
- HUF_DECODE_SYMBOLX4_0(op1, &bitD1);
784
- HUF_DECODE_SYMBOLX4_0(op2, &bitD2);
785
- HUF_DECODE_SYMBOLX4_0(op3, &bitD3);
786
- HUF_DECODE_SYMBOLX4_0(op4, &bitD4);
807
+ HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
808
+ HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
809
+ HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
810
+ HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
811
+ HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
812
+ HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
813
+ HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
814
+ HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
815
+ HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
816
+ HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
817
+ HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
818
+ HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
819
+ HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
820
+ HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
821
+ HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
822
+ HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
787
823
 
788
824
  endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
789
825
  }
@@ -795,10 +831,10 @@ static size_t HUF_decompress4X4_usingDTable_internal(
795
831
  /* note : op4 already verified within main loop */
796
832
 
797
833
  /* finish bitStreams one by one */
798
- HUF_decodeStreamX4(op1, &bitD1, opStart2, dt, dtLog);
799
- HUF_decodeStreamX4(op2, &bitD2, opStart3, dt, dtLog);
800
- HUF_decodeStreamX4(op3, &bitD3, opStart4, dt, dtLog);
801
- HUF_decodeStreamX4(op4, &bitD4, oend, dt, dtLog);
834
+ HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog);
835
+ HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog);
836
+ HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog);
837
+ HUF_decodeStreamX2(op4, &bitD4, oend, dt, dtLog);
802
838
 
803
839
  /* check */
804
840
  { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
@@ -809,60 +845,120 @@ static size_t HUF_decompress4X4_usingDTable_internal(
809
845
  }
810
846
  }
811
847
 
848
+ HUF_DGEN(HUF_decompress1X2_usingDTable_internal)
849
+ HUF_DGEN(HUF_decompress4X2_usingDTable_internal)
812
850
 
813
- size_t HUF_decompress4X4_usingDTable(
851
+ size_t HUF_decompress1X2_usingDTable(
814
852
  void* dst, size_t dstSize,
815
853
  const void* cSrc, size_t cSrcSize,
816
854
  const HUF_DTable* DTable)
817
855
  {
818
856
  DTableDesc dtd = HUF_getDTableDesc(DTable);
819
857
  if (dtd.tableType != 1) return ERROR(GENERIC);
820
- return HUF_decompress4X4_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable);
858
+ return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
821
859
  }
822
860
 
823
-
824
- size_t HUF_decompress4X4_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
861
+ size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
825
862
  const void* cSrc, size_t cSrcSize,
826
863
  void* workSpace, size_t wkspSize)
827
864
  {
828
865
  const BYTE* ip = (const BYTE*) cSrc;
829
866
 
830
- size_t hSize = HUF_readDTableX4_wksp(dctx, cSrc, cSrcSize,
867
+ size_t const hSize = HUF_readDTableX2_wksp(DCtx, cSrc, cSrcSize,
868
+ workSpace, wkspSize);
869
+ if (HUF_isError(hSize)) return hSize;
870
+ if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
871
+ ip += hSize; cSrcSize -= hSize;
872
+
873
+ return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0);
874
+ }
875
+
876
+
877
+ size_t HUF_decompress1X2_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
878
+ const void* cSrc, size_t cSrcSize)
879
+ {
880
+ U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
881
+ return HUF_decompress1X2_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
882
+ workSpace, sizeof(workSpace));
883
+ }
884
+
885
+ size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
886
+ {
887
+ HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
888
+ return HUF_decompress1X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
889
+ }
890
+
891
+ size_t HUF_decompress4X2_usingDTable(
892
+ void* dst, size_t dstSize,
893
+ const void* cSrc, size_t cSrcSize,
894
+ const HUF_DTable* DTable)
895
+ {
896
+ DTableDesc dtd = HUF_getDTableDesc(DTable);
897
+ if (dtd.tableType != 1) return ERROR(GENERIC);
898
+ return HUF_decompress4X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
899
+ }
900
+
901
+ static size_t HUF_decompress4X2_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize,
902
+ const void* cSrc, size_t cSrcSize,
903
+ void* workSpace, size_t wkspSize, int bmi2)
904
+ {
905
+ const BYTE* ip = (const BYTE*) cSrc;
906
+
907
+ size_t hSize = HUF_readDTableX2_wksp(dctx, cSrc, cSrcSize,
831
908
  workSpace, wkspSize);
832
909
  if (HUF_isError(hSize)) return hSize;
833
910
  if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
834
911
  ip += hSize; cSrcSize -= hSize;
835
912
 
836
- return HUF_decompress4X4_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx);
913
+ return HUF_decompress4X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
837
914
  }
838
915
 
916
+ size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
917
+ const void* cSrc, size_t cSrcSize,
918
+ void* workSpace, size_t wkspSize)
919
+ {
920
+ return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, /* bmi2 */ 0);
921
+ }
839
922
 
840
- size_t HUF_decompress4X4_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,
923
+
924
+ size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,
841
925
  const void* cSrc, size_t cSrcSize)
842
926
  {
843
927
  U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
844
- return HUF_decompress4X4_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
928
+ return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
845
929
  workSpace, sizeof(workSpace));
846
930
  }
847
931
 
848
- size_t HUF_decompress4X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
932
+ size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
849
933
  {
850
- HUF_CREATE_STATIC_DTABLEX4(DTable, HUF_TABLELOG_MAX);
851
- return HUF_decompress4X4_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
934
+ HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
935
+ return HUF_decompress4X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
852
936
  }
853
937
 
938
+ #endif /* HUF_FORCE_DECOMPRESS_X1 */
854
939
 
855
- /* ********************************/
856
- /* Generic decompression selector */
857
- /* ********************************/
940
+
941
+ /* ***********************************/
942
+ /* Universal decompression selectors */
943
+ /* ***********************************/
858
944
 
859
945
  size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize,
860
946
  const void* cSrc, size_t cSrcSize,
861
947
  const HUF_DTable* DTable)
862
948
  {
863
949
  DTableDesc const dtd = HUF_getDTableDesc(DTable);
864
- return dtd.tableType ? HUF_decompress1X4_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable) :
865
- HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable);
950
+ #if defined(HUF_FORCE_DECOMPRESS_X1)
951
+ (void)dtd;
952
+ assert(dtd.tableType == 0);
953
+ return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
954
+ #elif defined(HUF_FORCE_DECOMPRESS_X2)
955
+ (void)dtd;
956
+ assert(dtd.tableType == 1);
957
+ return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
958
+ #else
959
+ return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
960
+ HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
961
+ #endif
866
962
  }
867
963
 
868
964
  size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize,
@@ -870,11 +966,22 @@ size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize,
870
966
  const HUF_DTable* DTable)
871
967
  {
872
968
  DTableDesc const dtd = HUF_getDTableDesc(DTable);
873
- return dtd.tableType ? HUF_decompress4X4_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable) :
874
- HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable);
969
+ #if defined(HUF_FORCE_DECOMPRESS_X1)
970
+ (void)dtd;
971
+ assert(dtd.tableType == 0);
972
+ return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
973
+ #elif defined(HUF_FORCE_DECOMPRESS_X2)
974
+ (void)dtd;
975
+ assert(dtd.tableType == 1);
976
+ return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
977
+ #else
978
+ return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
979
+ HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
980
+ #endif
875
981
  }
876
982
 
877
983
 
984
+ #if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2)
878
985
  typedef struct { U32 tableTime; U32 decode256Time; } algo_time_t;
879
986
  static const algo_time_t algoTime[16 /* Quantization */][3 /* single, double, quad */] =
880
987
  {
@@ -896,22 +1003,35 @@ static const algo_time_t algoTime[16 /* Quantization */][3 /* single, double, qu
896
1003
  {{1455,128}, {2422,124}, {4174,124}}, /* Q ==14 : 87-93% */
897
1004
  {{ 722,128}, {1891,145}, {1936,146}}, /* Q ==15 : 93-99% */
898
1005
  };
1006
+ #endif
899
1007
 
900
1008
  /** HUF_selectDecoder() :
901
- * Tells which decoder is likely to decode faster,
902
- * based on a set of pre-determined metrics.
903
- * @return : 0==HUF_decompress4X2, 1==HUF_decompress4X4 .
904
- * Assumption : 0 < cSrcSize, dstSize <= 128 KB */
1009
+ * Tells which decoder is likely to decode faster,
1010
+ * based on a set of pre-computed metrics.
1011
+ * @return : 0==HUF_decompress4X1, 1==HUF_decompress4X2 .
1012
+ * Assumption : 0 < dstSize <= 128 KB */
905
1013
  U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize)
906
1014
  {
1015
+ assert(dstSize > 0);
1016
+ assert(dstSize <= 128*1024);
1017
+ #if defined(HUF_FORCE_DECOMPRESS_X1)
1018
+ (void)dstSize;
1019
+ (void)cSrcSize;
1020
+ return 0;
1021
+ #elif defined(HUF_FORCE_DECOMPRESS_X2)
1022
+ (void)dstSize;
1023
+ (void)cSrcSize;
1024
+ return 1;
1025
+ #else
907
1026
  /* decoder timing evaluation */
908
- U32 const Q = cSrcSize >= dstSize ? 15 : (U32)(cSrcSize * 16 / dstSize); /* Q < 16 */
909
- U32 const D256 = (U32)(dstSize >> 8);
910
- U32 const DTime0 = algoTime[Q][0].tableTime + (algoTime[Q][0].decode256Time * D256);
911
- U32 DTime1 = algoTime[Q][1].tableTime + (algoTime[Q][1].decode256Time * D256);
912
- DTime1 += DTime1 >> 3; /* advantage to algorithm using less memory, for cache eviction */
913
-
914
- return DTime1 < DTime0;
1027
+ { U32 const Q = (cSrcSize >= dstSize) ? 15 : (U32)(cSrcSize * 16 / dstSize); /* Q < 16 */
1028
+ U32 const D256 = (U32)(dstSize >> 8);
1029
+ U32 const DTime0 = algoTime[Q][0].tableTime + (algoTime[Q][0].decode256Time * D256);
1030
+ U32 DTime1 = algoTime[Q][1].tableTime + (algoTime[Q][1].decode256Time * D256);
1031
+ DTime1 += DTime1 >> 3; /* advantage to algorithm using less memory, to reduce cache eviction */
1032
+ return DTime1 < DTime0;
1033
+ }
1034
+ #endif
915
1035
  }
916
1036
 
917
1037
 
@@ -919,7 +1039,9 @@ typedef size_t (*decompressionAlgo)(void* dst, size_t dstSize, const void* cSrc,
919
1039
 
920
1040
  size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
921
1041
  {
922
- static const decompressionAlgo decompress[2] = { HUF_decompress4X2, HUF_decompress4X4 };
1042
+ #if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2)
1043
+ static const decompressionAlgo decompress[2] = { HUF_decompress4X1, HUF_decompress4X2 };
1044
+ #endif
923
1045
 
924
1046
  /* validation checks */
925
1047
  if (dstSize == 0) return ERROR(dstSize_tooSmall);
@@ -928,7 +1050,17 @@ size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcS
928
1050
  if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */
929
1051
 
930
1052
  { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1053
+ #if defined(HUF_FORCE_DECOMPRESS_X1)
1054
+ (void)algoNb;
1055
+ assert(algoNb == 0);
1056
+ return HUF_decompress4X1(dst, dstSize, cSrc, cSrcSize);
1057
+ #elif defined(HUF_FORCE_DECOMPRESS_X2)
1058
+ (void)algoNb;
1059
+ assert(algoNb == 1);
1060
+ return HUF_decompress4X2(dst, dstSize, cSrc, cSrcSize);
1061
+ #else
931
1062
  return decompress[algoNb](dst, dstSize, cSrc, cSrcSize);
1063
+ #endif
932
1064
  }
933
1065
  }
934
1066
 
@@ -941,8 +1073,18 @@ size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const
941
1073
  if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */
942
1074
 
943
1075
  { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
944
- return algoNb ? HUF_decompress4X4_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) :
945
- HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ;
1076
+ #if defined(HUF_FORCE_DECOMPRESS_X1)
1077
+ (void)algoNb;
1078
+ assert(algoNb == 0);
1079
+ return HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize);
1080
+ #elif defined(HUF_FORCE_DECOMPRESS_X2)
1081
+ (void)algoNb;
1082
+ assert(algoNb == 1);
1083
+ return HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize);
1084
+ #else
1085
+ return algoNb ? HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) :
1086
+ HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ;
1087
+ #endif
946
1088
  }
947
1089
  }
948
1090
 
@@ -964,8 +1106,19 @@ size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst,
964
1106
  if (cSrcSize == 0) return ERROR(corruption_detected);
965
1107
 
966
1108
  { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
967
- return algoNb ? HUF_decompress4X4_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize):
968
- HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
1109
+ #if defined(HUF_FORCE_DECOMPRESS_X1)
1110
+ (void)algoNb;
1111
+ assert(algoNb == 0);
1112
+ return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
1113
+ #elif defined(HUF_FORCE_DECOMPRESS_X2)
1114
+ (void)algoNb;
1115
+ assert(algoNb == 1);
1116
+ return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
1117
+ #else
1118
+ return algoNb ? HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
1119
+ cSrcSize, workSpace, wkspSize):
1120
+ HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
1121
+ #endif
969
1122
  }
970
1123
  }
971
1124
 
@@ -980,10 +1133,22 @@ size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
980
1133
  if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */
981
1134
 
982
1135
  { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
983
- return algoNb ? HUF_decompress1X4_DCtx_wksp(dctx, dst, dstSize, cSrc,
1136
+ #if defined(HUF_FORCE_DECOMPRESS_X1)
1137
+ (void)algoNb;
1138
+ assert(algoNb == 0);
1139
+ return HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc,
1140
+ cSrcSize, workSpace, wkspSize);
1141
+ #elif defined(HUF_FORCE_DECOMPRESS_X2)
1142
+ (void)algoNb;
1143
+ assert(algoNb == 1);
1144
+ return HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
1145
+ cSrcSize, workSpace, wkspSize);
1146
+ #else
1147
+ return algoNb ? HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
984
1148
  cSrcSize, workSpace, wkspSize):
985
- HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
1149
+ HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc,
986
1150
  cSrcSize, workSpace, wkspSize);
1151
+ #endif
987
1152
  }
988
1153
  }
989
1154
 
@@ -994,3 +1159,74 @@ size_t HUF_decompress1X_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,
994
1159
  return HUF_decompress1X_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
995
1160
  workSpace, sizeof(workSpace));
996
1161
  }
1162
+
1163
+
1164
+ size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2)
1165
+ {
1166
+ DTableDesc const dtd = HUF_getDTableDesc(DTable);
1167
+ #if defined(HUF_FORCE_DECOMPRESS_X1)
1168
+ (void)dtd;
1169
+ assert(dtd.tableType == 0);
1170
+ return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1171
+ #elif defined(HUF_FORCE_DECOMPRESS_X2)
1172
+ (void)dtd;
1173
+ assert(dtd.tableType == 1);
1174
+ return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1175
+ #else
1176
+ return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) :
1177
+ HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1178
+ #endif
1179
+ }
1180
+
1181
+ #ifndef HUF_FORCE_DECOMPRESS_X2
1182
+ size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2)
1183
+ {
1184
+ const BYTE* ip = (const BYTE*) cSrc;
1185
+
1186
+ size_t const hSize = HUF_readDTableX1_wksp(dctx, cSrc, cSrcSize, workSpace, wkspSize);
1187
+ if (HUF_isError(hSize)) return hSize;
1188
+ if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
1189
+ ip += hSize; cSrcSize -= hSize;
1190
+
1191
+ return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
1192
+ }
1193
+ #endif
1194
+
1195
+ size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2)
1196
+ {
1197
+ DTableDesc const dtd = HUF_getDTableDesc(DTable);
1198
+ #if defined(HUF_FORCE_DECOMPRESS_X1)
1199
+ (void)dtd;
1200
+ assert(dtd.tableType == 0);
1201
+ return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1202
+ #elif defined(HUF_FORCE_DECOMPRESS_X2)
1203
+ (void)dtd;
1204
+ assert(dtd.tableType == 1);
1205
+ return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1206
+ #else
1207
+ return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) :
1208
+ HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1209
+ #endif
1210
+ }
1211
+
1212
+ size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2)
1213
+ {
1214
+ /* validation checks */
1215
+ if (dstSize == 0) return ERROR(dstSize_tooSmall);
1216
+ if (cSrcSize == 0) return ERROR(corruption_detected);
1217
+
1218
+ { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1219
+ #if defined(HUF_FORCE_DECOMPRESS_X1)
1220
+ (void)algoNb;
1221
+ assert(algoNb == 0);
1222
+ return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
1223
+ #elif defined(HUF_FORCE_DECOMPRESS_X2)
1224
+ (void)algoNb;
1225
+ assert(algoNb == 1);
1226
+ return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
1227
+ #else
1228
+ return algoNb ? HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2) :
1229
+ HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
1230
+ #endif
1231
+ }
1232
+ }