extzstd 0.2 → 0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. checksums.yaml +4 -4
  2. data/HISTORY.ja.md +13 -0
  3. data/README.md +17 -14
  4. data/contrib/zstd/{NEWS → CHANGELOG} +115 -2
  5. data/contrib/zstd/CODE_OF_CONDUCT.md +5 -0
  6. data/contrib/zstd/Makefile +99 -53
  7. data/contrib/zstd/README.md +59 -39
  8. data/contrib/zstd/TESTING.md +1 -1
  9. data/contrib/zstd/appveyor.yml +17 -6
  10. data/contrib/zstd/lib/BUCK +29 -2
  11. data/contrib/zstd/lib/Makefile +118 -21
  12. data/contrib/zstd/lib/README.md +84 -44
  13. data/contrib/zstd/lib/common/bitstream.h +17 -33
  14. data/contrib/zstd/lib/common/compiler.h +62 -8
  15. data/contrib/zstd/lib/common/cpu.h +215 -0
  16. data/contrib/zstd/lib/common/debug.c +44 -0
  17. data/contrib/zstd/lib/common/debug.h +134 -0
  18. data/contrib/zstd/lib/common/entropy_common.c +16 -1
  19. data/contrib/zstd/lib/common/error_private.c +7 -0
  20. data/contrib/zstd/lib/common/fse.h +48 -44
  21. data/contrib/zstd/lib/common/fse_decompress.c +3 -3
  22. data/contrib/zstd/lib/common/huf.h +169 -113
  23. data/contrib/zstd/lib/common/mem.h +20 -2
  24. data/contrib/zstd/lib/common/pool.c +135 -49
  25. data/contrib/zstd/lib/common/pool.h +40 -21
  26. data/contrib/zstd/lib/common/threading.c +2 -2
  27. data/contrib/zstd/lib/common/threading.h +12 -12
  28. data/contrib/zstd/lib/common/xxhash.c +3 -2
  29. data/contrib/zstd/lib/common/zstd_common.c +3 -6
  30. data/contrib/zstd/lib/common/zstd_errors.h +17 -7
  31. data/contrib/zstd/lib/common/zstd_internal.h +76 -48
  32. data/contrib/zstd/lib/compress/fse_compress.c +89 -209
  33. data/contrib/zstd/lib/compress/hist.c +203 -0
  34. data/contrib/zstd/lib/compress/hist.h +95 -0
  35. data/contrib/zstd/lib/compress/huf_compress.c +188 -80
  36. data/contrib/zstd/lib/compress/zstd_compress.c +2500 -1203
  37. data/contrib/zstd/lib/compress/zstd_compress_internal.h +463 -62
  38. data/contrib/zstd/lib/compress/zstd_double_fast.c +321 -131
  39. data/contrib/zstd/lib/compress/zstd_double_fast.h +13 -4
  40. data/contrib/zstd/lib/compress/zstd_fast.c +335 -108
  41. data/contrib/zstd/lib/compress/zstd_fast.h +12 -6
  42. data/contrib/zstd/lib/compress/zstd_lazy.c +654 -313
  43. data/contrib/zstd/lib/compress/zstd_lazy.h +44 -16
  44. data/contrib/zstd/lib/compress/zstd_ldm.c +310 -420
  45. data/contrib/zstd/lib/compress/zstd_ldm.h +63 -26
  46. data/contrib/zstd/lib/compress/zstd_opt.c +773 -325
  47. data/contrib/zstd/lib/compress/zstd_opt.h +31 -5
  48. data/contrib/zstd/lib/compress/zstdmt_compress.c +1468 -518
  49. data/contrib/zstd/lib/compress/zstdmt_compress.h +96 -45
  50. data/contrib/zstd/lib/decompress/huf_decompress.c +518 -282
  51. data/contrib/zstd/lib/decompress/zstd_ddict.c +240 -0
  52. data/contrib/zstd/lib/decompress/zstd_ddict.h +44 -0
  53. data/contrib/zstd/lib/decompress/zstd_decompress.c +613 -1513
  54. data/contrib/zstd/lib/decompress/zstd_decompress_block.c +1311 -0
  55. data/contrib/zstd/lib/decompress/zstd_decompress_block.h +59 -0
  56. data/contrib/zstd/lib/decompress/zstd_decompress_internal.h +175 -0
  57. data/contrib/zstd/lib/dictBuilder/cover.c +194 -113
  58. data/contrib/zstd/lib/dictBuilder/cover.h +112 -0
  59. data/contrib/zstd/lib/dictBuilder/divsufsort.c +3 -3
  60. data/contrib/zstd/lib/dictBuilder/fastcover.c +740 -0
  61. data/contrib/zstd/lib/dictBuilder/zdict.c +142 -106
  62. data/contrib/zstd/lib/dictBuilder/zdict.h +115 -49
  63. data/contrib/zstd/lib/legacy/zstd_legacy.h +44 -12
  64. data/contrib/zstd/lib/legacy/zstd_v01.c +41 -10
  65. data/contrib/zstd/lib/legacy/zstd_v01.h +12 -7
  66. data/contrib/zstd/lib/legacy/zstd_v02.c +37 -12
  67. data/contrib/zstd/lib/legacy/zstd_v02.h +12 -7
  68. data/contrib/zstd/lib/legacy/zstd_v03.c +38 -12
  69. data/contrib/zstd/lib/legacy/zstd_v03.h +12 -7
  70. data/contrib/zstd/lib/legacy/zstd_v04.c +55 -174
  71. data/contrib/zstd/lib/legacy/zstd_v04.h +12 -7
  72. data/contrib/zstd/lib/legacy/zstd_v05.c +59 -31
  73. data/contrib/zstd/lib/legacy/zstd_v05.h +12 -7
  74. data/contrib/zstd/lib/legacy/zstd_v06.c +48 -20
  75. data/contrib/zstd/lib/legacy/zstd_v06.h +10 -5
  76. data/contrib/zstd/lib/legacy/zstd_v07.c +62 -29
  77. data/contrib/zstd/lib/legacy/zstd_v07.h +10 -5
  78. data/contrib/zstd/lib/zstd.h +1346 -832
  79. data/ext/extzstd.c +27 -19
  80. data/ext/extzstd_stream.c +20 -4
  81. data/ext/zstd_compress.c +1 -0
  82. data/ext/zstd_decompress.c +4 -0
  83. data/ext/zstd_dictbuilder.c +4 -0
  84. data/ext/zstd_dictbuilder_fastcover.c +5 -0
  85. data/lib/extzstd.rb +52 -220
  86. data/lib/extzstd/version.rb +1 -1
  87. metadata +21 -7
  88. data/contrib/zstd/circle.yml +0 -63
@@ -17,10 +17,25 @@
17
17
 
18
18
 
19
19
  /* Note : This is an internal API.
20
- * Some methods are still exposed (ZSTDLIB_API),
20
+ * These APIs used to be exposed with ZSTDLIB_API,
21
21
  * because it used to be the only way to invoke MT compression.
22
- * Now, it's recommended to use ZSTD_compress_generic() instead.
23
- * These methods will stop being exposed in a future version */
22
+ * Now, it's recommended to use ZSTD_compress2 and ZSTD_compressStream2()
23
+ * instead.
24
+ *
25
+ * If you depend on these APIs and can't switch, then define
26
+ * ZSTD_LEGACY_MULTITHREADED_API when making the dynamic library.
27
+ * However, we may completely remove these functions in a future
28
+ * release, so please switch soon.
29
+ *
30
+ * This API requires ZSTD_MULTITHREAD to be defined during compilation,
31
+ * otherwise ZSTDMT_createCCtx*() will fail.
32
+ */
33
+
34
+ #ifdef ZSTD_LEGACY_MULTITHREADED_API
35
+ # define ZSTDMT_API ZSTDLIB_API
36
+ #else
37
+ # define ZSTDMT_API
38
+ #endif
24
39
 
25
40
  /* === Dependencies === */
26
41
  #include <stddef.h> /* size_t */
@@ -28,19 +43,31 @@
28
43
  #include "zstd.h" /* ZSTD_inBuffer, ZSTD_outBuffer, ZSTDLIB_API */
29
44
 
30
45
 
46
+ /* === Constants === */
47
+ #ifndef ZSTDMT_NBWORKERS_MAX
48
+ # define ZSTDMT_NBWORKERS_MAX 200
49
+ #endif
50
+ #ifndef ZSTDMT_JOBSIZE_MIN
51
+ # define ZSTDMT_JOBSIZE_MIN (1 MB)
52
+ #endif
53
+ #define ZSTDMT_JOBSIZE_MAX (MEM_32bits() ? (512 MB) : (1024 MB))
54
+
55
+
31
56
  /* === Memory management === */
32
57
  typedef struct ZSTDMT_CCtx_s ZSTDMT_CCtx;
33
- ZSTDLIB_API ZSTDMT_CCtx* ZSTDMT_createCCtx(unsigned nbThreads);
34
- ZSTDLIB_API ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(unsigned nbThreads,
58
+ /* Requires ZSTD_MULTITHREAD to be defined during compilation, otherwise it will return NULL. */
59
+ ZSTDMT_API ZSTDMT_CCtx* ZSTDMT_createCCtx(unsigned nbWorkers);
60
+ /* Requires ZSTD_MULTITHREAD to be defined during compilation, otherwise it will return NULL. */
61
+ ZSTDMT_API ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(unsigned nbWorkers,
35
62
  ZSTD_customMem cMem);
36
- ZSTDLIB_API size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* mtctx);
63
+ ZSTDMT_API size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* mtctx);
37
64
 
38
- ZSTDLIB_API size_t ZSTDMT_sizeof_CCtx(ZSTDMT_CCtx* mtctx);
65
+ ZSTDMT_API size_t ZSTDMT_sizeof_CCtx(ZSTDMT_CCtx* mtctx);
39
66
 
40
67
 
41
- /* === Simple buffer-to-butter one-pass function === */
68
+ /* === Simple one-pass compression function === */
42
69
 
43
- ZSTDLIB_API size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx,
70
+ ZSTDMT_API size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx,
44
71
  void* dst, size_t dstCapacity,
45
72
  const void* src, size_t srcSize,
46
73
  int compressionLevel);
@@ -49,34 +76,31 @@ ZSTDLIB_API size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx,
49
76
 
50
77
  /* === Streaming functions === */
51
78
 
52
- ZSTDLIB_API size_t ZSTDMT_initCStream(ZSTDMT_CCtx* mtctx, int compressionLevel);
53
- ZSTDLIB_API size_t ZSTDMT_resetCStream(ZSTDMT_CCtx* mtctx, unsigned long long pledgedSrcSize); /**< if srcSize is not known at reset time, use ZSTD_CONTENTSIZE_UNKNOWN. Note: for compatibility with older programs, 0 means the same as ZSTD_CONTENTSIZE_UNKNOWN, but it may change in the future, to mean "empty" */
79
+ ZSTDMT_API size_t ZSTDMT_initCStream(ZSTDMT_CCtx* mtctx, int compressionLevel);
80
+ ZSTDMT_API size_t ZSTDMT_resetCStream(ZSTDMT_CCtx* mtctx, unsigned long long pledgedSrcSize); /**< if srcSize is not known at reset time, use ZSTD_CONTENTSIZE_UNKNOWN. Note: for compatibility with older programs, 0 means the same as ZSTD_CONTENTSIZE_UNKNOWN, but it will change in the future to mean "empty" */
54
81
 
55
- ZSTDLIB_API size_t ZSTDMT_compressStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, ZSTD_inBuffer* input);
82
+ ZSTDMT_API size_t ZSTDMT_nextInputSizeHint(const ZSTDMT_CCtx* mtctx);
83
+ ZSTDMT_API size_t ZSTDMT_compressStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, ZSTD_inBuffer* input);
56
84
 
57
- ZSTDLIB_API size_t ZSTDMT_flushStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output); /**< @return : 0 == all flushed; >0 : still some data to be flushed; or an error code (ZSTD_isError()) */
58
- ZSTDLIB_API size_t ZSTDMT_endStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output); /**< @return : 0 == all flushed; >0 : still some data to be flushed; or an error code (ZSTD_isError()) */
85
+ ZSTDMT_API size_t ZSTDMT_flushStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output); /**< @return : 0 == all flushed; >0 : still some data to be flushed; or an error code (ZSTD_isError()) */
86
+ ZSTDMT_API size_t ZSTDMT_endStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output); /**< @return : 0 == all flushed; >0 : still some data to be flushed; or an error code (ZSTD_isError()) */
59
87
 
60
88
 
61
89
  /* === Advanced functions and parameters === */
62
90
 
63
- #ifndef ZSTDMT_JOBSIZE_MIN
64
- # define ZSTDMT_JOBSIZE_MIN (1U << 20) /* 1 MB - Minimum size of each compression job */
65
- #endif
91
+ ZSTDMT_API size_t ZSTDMT_compress_advanced(ZSTDMT_CCtx* mtctx,
92
+ void* dst, size_t dstCapacity,
93
+ const void* src, size_t srcSize,
94
+ const ZSTD_CDict* cdict,
95
+ ZSTD_parameters params,
96
+ int overlapLog);
66
97
 
67
- ZSTDLIB_API size_t ZSTDMT_compress_advanced(ZSTDMT_CCtx* mtctx,
68
- void* dst, size_t dstCapacity,
69
- const void* src, size_t srcSize,
70
- const ZSTD_CDict* cdict,
71
- ZSTD_parameters const params,
72
- unsigned overlapLog);
73
-
74
- ZSTDLIB_API size_t ZSTDMT_initCStream_advanced(ZSTDMT_CCtx* mtctx,
98
+ ZSTDMT_API size_t ZSTDMT_initCStream_advanced(ZSTDMT_CCtx* mtctx,
75
99
  const void* dict, size_t dictSize, /* dict can be released after init, a local copy is preserved within zcs */
76
100
  ZSTD_parameters params,
77
101
  unsigned long long pledgedSrcSize); /* pledgedSrcSize is optional and can be zero == unknown */
78
102
 
79
- ZSTDLIB_API size_t ZSTDMT_initCStream_usingCDict(ZSTDMT_CCtx* mtctx,
103
+ ZSTDMT_API size_t ZSTDMT_initCStream_usingCDict(ZSTDMT_CCtx* mtctx,
80
104
  const ZSTD_CDict* cdict,
81
105
  ZSTD_frameParameters fparams,
82
106
  unsigned long long pledgedSrcSize); /* note : zero means empty */
@@ -84,8 +108,9 @@ ZSTDLIB_API size_t ZSTDMT_initCStream_usingCDict(ZSTDMT_CCtx* mtctx,
84
108
  /* ZSTDMT_parameter :
85
109
  * List of parameters that can be set using ZSTDMT_setMTCtxParameter() */
86
110
  typedef enum {
87
- ZSTDMT_p_jobSize, /* Each job is compressed in parallel. By default, this value is dynamically determined depending on compression parameters. Can be set explicitly here. */
88
- ZSTDMT_p_overlapSectionLog /* Each job may reload a part of previous job to enhance compressionr ratio; 0 == no overlap, 6(default) == use 1/8th of window, >=9 == use full window */
111
+ ZSTDMT_p_jobSize, /* Each job is compressed in parallel. By default, this value is dynamically determined depending on compression parameters. Can be set explicitly here. */
112
+ ZSTDMT_p_overlapLog, /* Each job may reload a part of previous job to enhance compression ratio; 0 == no overlap, 6(default) == use 1/8th of window, >=9 == use full window. This is a "sticky" parameter : its value will be re-used on next compression job */
113
+ ZSTDMT_p_rsyncable /* Enables rsyncable mode. */
89
114
  } ZSTDMT_parameter;
90
115
 
91
116
  /* ZSTDMT_setMTCtxParameter() :
@@ -93,34 +118,60 @@ typedef enum {
93
118
  * The function must be called typically after ZSTD_createCCtx() but __before ZSTDMT_init*() !__
94
119
  * Parameters not explicitly reset by ZSTDMT_init*() remain the same in consecutive compression sessions.
95
120
  * @return : 0, or an error code (which can be tested using ZSTD_isError()) */
96
- ZSTDLIB_API size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, unsigned value);
121
+ ZSTDMT_API size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, int value);
122
+
123
+ /* ZSTDMT_getMTCtxParameter() :
124
+ * Query the ZSTDMT_CCtx for a parameter value.
125
+ * @return : 0, or an error code (which can be tested using ZSTD_isError()) */
126
+ ZSTDMT_API size_t ZSTDMT_getMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, int* value);
97
127
 
98
128
 
99
129
  /*! ZSTDMT_compressStream_generic() :
100
- * Combines ZSTDMT_compressStream() with ZSTDMT_flushStream() or ZSTDMT_endStream()
130
+ * Combines ZSTDMT_compressStream() with optional ZSTDMT_flushStream() or ZSTDMT_endStream()
101
131
  * depending on flush directive.
102
132
  * @return : minimum amount of data still to be flushed
103
133
  * 0 if fully flushed
104
- * or an error code */
105
- ZSTDLIB_API size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
134
+ * or an error code
135
+ * note : needs to be init using any ZSTD_initCStream*() variant */
136
+ ZSTDMT_API size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
106
137
  ZSTD_outBuffer* output,
107
138
  ZSTD_inBuffer* input,
108
139
  ZSTD_EndDirective endOp);
109
140
 
110
141
 
111
- /* === Private definitions; never ever use directly === */
112
-
113
- size_t ZSTDMT_CCtxParam_setMTCtxParameter(ZSTD_CCtx_params* params, ZSTDMT_parameter parameter, unsigned value);
114
-
115
- /* ZSTDMT_CCtxParam_setNbThreads()
116
- * Set nbThreads, and clamp it correctly,
117
- * also reset jobSize and overlapLog */
118
- size_t ZSTDMT_CCtxParam_setNbThreads(ZSTD_CCtx_params* params, unsigned nbThreads);
142
+ /* ========================================================
143
+ * === Private interface, for use by ZSTD_compress.c ===
144
+ * === Not exposed in libzstd. Never invoke directly ===
145
+ * ======================================================== */
146
+
147
+ /*! ZSTDMT_toFlushNow()
148
+ * Tell how many bytes are ready to be flushed immediately.
149
+ * Probe the oldest active job (not yet entirely flushed) and check its output buffer.
150
+ * If return 0, it means there is no active job,
151
+ * or, it means oldest job is still active, but everything produced has been flushed so far,
152
+ * therefore flushing is limited by speed of oldest job. */
153
+ size_t ZSTDMT_toFlushNow(ZSTDMT_CCtx* mtctx);
154
+
155
+ /*! ZSTDMT_CCtxParam_setMTCtxParameter()
156
+ * like ZSTDMT_setMTCtxParameter(), but into a ZSTD_CCtx_Params */
157
+ size_t ZSTDMT_CCtxParam_setMTCtxParameter(ZSTD_CCtx_params* params, ZSTDMT_parameter parameter, int value);
158
+
159
+ /*! ZSTDMT_CCtxParam_setNbWorkers()
160
+ * Set nbWorkers, and clamp it.
161
+ * Also reset jobSize and overlapLog */
162
+ size_t ZSTDMT_CCtxParam_setNbWorkers(ZSTD_CCtx_params* params, unsigned nbWorkers);
163
+
164
+ /*! ZSTDMT_updateCParams_whileCompressing() :
165
+ * Updates only a selected set of compression parameters, to remain compatible with current frame.
166
+ * New parameters will be applied to next compression job. */
167
+ void ZSTDMT_updateCParams_whileCompressing(ZSTDMT_CCtx* mtctx, const ZSTD_CCtx_params* cctxParams);
168
+
169
+ /*! ZSTDMT_getFrameProgression():
170
+ * tells how much data has been consumed (input) and produced (output) for current frame.
171
+ * able to count progression inside worker threads.
172
+ */
173
+ ZSTD_frameProgression ZSTDMT_getFrameProgression(ZSTDMT_CCtx* mtctx);
119
174
 
120
- /* ZSTDMT_getNbThreads():
121
- * @return nb threads currently active in mtctx.
122
- * mtctx must be valid */
123
- size_t ZSTDMT_getNbThreads(const ZSTDMT_CCtx* mtctx);
124
175
 
125
176
  /*! ZSTDMT_initCStream_internal() :
126
177
  * Private use only. Init streaming operation.
@@ -128,7 +179,7 @@ size_t ZSTDMT_getNbThreads(const ZSTDMT_CCtx* mtctx);
128
179
  * must receive dict, or cdict, or none, but not both.
129
180
  * @return : 0, or an error code */
130
181
  size_t ZSTDMT_initCStream_internal(ZSTDMT_CCtx* zcs,
131
- const void* dict, size_t dictSize, ZSTD_dictMode_e dictMode,
182
+ const void* dict, size_t dictSize, ZSTD_dictContentType_e dictContentType,
132
183
  const ZSTD_CDict* cdict,
133
184
  ZSTD_CCtx_params params, unsigned long long pledgedSrcSize);
134
185
 
@@ -1,6 +1,7 @@
1
1
  /* ******************************************************************
2
- Huffman decoder, part of New Generation Entropy library
3
- Copyright (C) 2013-2016, Yann Collet.
2
+ huff0 huffman decoder,
3
+ part of Finite State Entropy library
4
+ Copyright (C) 2013-present, Yann Collet.
4
5
 
5
6
  BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6
7
 
@@ -29,38 +30,95 @@
29
30
 
30
31
  You can contact the author at :
31
32
  - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
32
- - Public forum : https://groups.google.com/forum/#!forum/lz4c
33
33
  ****************************************************************** */
34
34
 
35
35
  /* **************************************************************
36
36
  * Dependencies
37
37
  ****************************************************************/
38
38
  #include <string.h> /* memcpy, memset */
39
- #include "bitstream.h" /* BIT_* */
40
39
  #include "compiler.h"
41
- #include "fse.h" /* header compression */
40
+ #include "bitstream.h" /* BIT_* */
41
+ #include "fse.h" /* to compress headers */
42
42
  #define HUF_STATIC_LINKING_ONLY
43
43
  #include "huf.h"
44
44
  #include "error_private.h"
45
45
 
46
+ /* **************************************************************
47
+ * Macros
48
+ ****************************************************************/
49
+
50
+ /* These two optional macros force the use one way or another of the two
51
+ * Huffman decompression implementations. You can't force in both directions
52
+ * at the same time.
53
+ */
54
+ #if defined(HUF_FORCE_DECOMPRESS_X1) && \
55
+ defined(HUF_FORCE_DECOMPRESS_X2)
56
+ #error "Cannot force the use of the X1 and X2 decoders at the same time!"
57
+ #endif
58
+
46
59
 
47
60
  /* **************************************************************
48
61
  * Error Management
49
62
  ****************************************************************/
50
63
  #define HUF_isError ERR_isError
51
- #define HUF_STATIC_ASSERT(c) { enum { HUF_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */
64
+ #define CHECK_F(f) { size_t const err_ = (f); if (HUF_isError(err_)) return err_; }
52
65
 
53
66
 
54
67
  /* **************************************************************
55
68
  * Byte alignment for workSpace management
56
69
  ****************************************************************/
57
- #define HUF_ALIGN(x, a) HUF_ALIGN_MASK((x), (a) - 1)
70
+ #define HUF_ALIGN(x, a) HUF_ALIGN_MASK((x), (a) - 1)
58
71
  #define HUF_ALIGN_MASK(x, mask) (((x) + (mask)) & ~(mask))
59
72
 
73
+
74
+ /* **************************************************************
75
+ * BMI2 Variant Wrappers
76
+ ****************************************************************/
77
+ #if DYNAMIC_BMI2
78
+
79
+ #define HUF_DGEN(fn) \
80
+ \
81
+ static size_t fn##_default( \
82
+ void* dst, size_t dstSize, \
83
+ const void* cSrc, size_t cSrcSize, \
84
+ const HUF_DTable* DTable) \
85
+ { \
86
+ return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \
87
+ } \
88
+ \
89
+ static TARGET_ATTRIBUTE("bmi2") size_t fn##_bmi2( \
90
+ void* dst, size_t dstSize, \
91
+ const void* cSrc, size_t cSrcSize, \
92
+ const HUF_DTable* DTable) \
93
+ { \
94
+ return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \
95
+ } \
96
+ \
97
+ static size_t fn(void* dst, size_t dstSize, void const* cSrc, \
98
+ size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \
99
+ { \
100
+ if (bmi2) { \
101
+ return fn##_bmi2(dst, dstSize, cSrc, cSrcSize, DTable); \
102
+ } \
103
+ return fn##_default(dst, dstSize, cSrc, cSrcSize, DTable); \
104
+ }
105
+
106
+ #else
107
+
108
+ #define HUF_DGEN(fn) \
109
+ static size_t fn(void* dst, size_t dstSize, void const* cSrc, \
110
+ size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \
111
+ { \
112
+ (void)bmi2; \
113
+ return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \
114
+ }
115
+
116
+ #endif
117
+
118
+
60
119
  /*-***************************/
61
120
  /* generic DTableDesc */
62
121
  /*-***************************/
63
-
64
122
  typedef struct { BYTE maxTableLog; BYTE tableType; BYTE tableLog; BYTE reserved; } DTableDesc;
65
123
 
66
124
  static DTableDesc HUF_getDTableDesc(const HUF_DTable* table)
@@ -71,19 +129,20 @@ static DTableDesc HUF_getDTableDesc(const HUF_DTable* table)
71
129
  }
72
130
 
73
131
 
132
+ #ifndef HUF_FORCE_DECOMPRESS_X2
133
+
74
134
  /*-***************************/
75
135
  /* single-symbol decoding */
76
136
  /*-***************************/
137
+ typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX1; /* single-symbol decoding */
77
138
 
78
- typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX2; /* single-symbol decoding */
79
-
80
- size_t HUF_readDTableX2_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize)
139
+ size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize)
81
140
  {
82
141
  U32 tableLog = 0;
83
142
  U32 nbSymbols = 0;
84
143
  size_t iSize;
85
144
  void* const dtPtr = DTable + 1;
86
- HUF_DEltX2* const dt = (HUF_DEltX2*)dtPtr;
145
+ HUF_DEltX1* const dt = (HUF_DEltX1*)dtPtr;
87
146
 
88
147
  U32* rankVal;
89
148
  BYTE* huffWeight;
@@ -94,12 +153,9 @@ size_t HUF_readDTableX2_wksp(HUF_DTable* DTable, const void* src, size_t srcSize
94
153
  huffWeight = (BYTE *)((U32 *)workSpace + spaceUsed32);
95
154
  spaceUsed32 += HUF_ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2;
96
155
 
97
- if ((spaceUsed32 << 2) > wkspSize)
98
- return ERROR(tableLog_tooLarge);
99
- workSpace = (U32 *)workSpace + spaceUsed32;
100
- wkspSize -= (spaceUsed32 << 2);
156
+ if ((spaceUsed32 << 2) > wkspSize) return ERROR(tableLog_tooLarge);
101
157
 
102
- HUF_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable));
158
+ DEBUG_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable));
103
159
  /* memset(huffWeight, 0, sizeof(huffWeight)); */ /* is not necessary, even though some analyzer complain ... */
104
160
 
105
161
  iSize = HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX + 1, rankVal, &nbSymbols, &tableLog, src, srcSize);
@@ -127,7 +183,7 @@ size_t HUF_readDTableX2_wksp(HUF_DTable* DTable, const void* src, size_t srcSize
127
183
  U32 const w = huffWeight[n];
128
184
  U32 const length = (1 << w) >> 1;
129
185
  U32 u;
130
- HUF_DEltX2 D;
186
+ HUF_DEltX1 D;
131
187
  D.byte = (BYTE)n; D.nbBits = (BYTE)(tableLog + 1 - w);
132
188
  for (u = rankVal[w]; u < rankVal[w] + length; u++)
133
189
  dt[u] = D;
@@ -137,15 +193,15 @@ size_t HUF_readDTableX2_wksp(HUF_DTable* DTable, const void* src, size_t srcSize
137
193
  return iSize;
138
194
  }
139
195
 
140
- size_t HUF_readDTableX2(HUF_DTable* DTable, const void* src, size_t srcSize)
196
+ size_t HUF_readDTableX1(HUF_DTable* DTable, const void* src, size_t srcSize)
141
197
  {
142
198
  U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
143
- return HUF_readDTableX2_wksp(DTable, src, srcSize,
199
+ return HUF_readDTableX1_wksp(DTable, src, srcSize,
144
200
  workSpace, sizeof(workSpace));
145
201
  }
146
202
 
147
-
148
- static BYTE HUF_decodeSymbolX2(BIT_DStream_t* Dstream, const HUF_DEltX2* dt, const U32 dtLog)
203
+ FORCE_INLINE_TEMPLATE BYTE
204
+ HUF_decodeSymbolX1(BIT_DStream_t* Dstream, const HUF_DEltX1* dt, const U32 dtLog)
149
205
  {
150
206
  size_t const val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */
151
207
  BYTE const c = dt[val].byte;
@@ -153,41 +209,44 @@ static BYTE HUF_decodeSymbolX2(BIT_DStream_t* Dstream, const HUF_DEltX2* dt, con
153
209
  return c;
154
210
  }
155
211
 
156
- #define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \
157
- *ptr++ = HUF_decodeSymbolX2(DStreamPtr, dt, dtLog)
212
+ #define HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) \
213
+ *ptr++ = HUF_decodeSymbolX1(DStreamPtr, dt, dtLog)
158
214
 
159
- #define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \
215
+ #define HUF_DECODE_SYMBOLX1_1(ptr, DStreamPtr) \
160
216
  if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
161
- HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr)
217
+ HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr)
162
218
 
163
- #define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \
219
+ #define HUF_DECODE_SYMBOLX1_2(ptr, DStreamPtr) \
164
220
  if (MEM_64bits()) \
165
- HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr)
221
+ HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr)
166
222
 
167
- HINT_INLINE size_t HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX2* const dt, const U32 dtLog)
223
+ HINT_INLINE size_t
224
+ HUF_decodeStreamX1(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX1* const dt, const U32 dtLog)
168
225
  {
169
226
  BYTE* const pStart = p;
170
227
 
171
228
  /* up to 4 symbols at a time */
172
- while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p <= pEnd-4)) {
173
- HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
174
- HUF_DECODE_SYMBOLX2_1(p, bitDPtr);
175
- HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
176
- HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
229
+ while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-3)) {
230
+ HUF_DECODE_SYMBOLX1_2(p, bitDPtr);
231
+ HUF_DECODE_SYMBOLX1_1(p, bitDPtr);
232
+ HUF_DECODE_SYMBOLX1_2(p, bitDPtr);
233
+ HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
177
234
  }
178
235
 
179
- /* closer to the end */
180
- while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p < pEnd))
181
- HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
236
+ /* [0-3] symbols remaining */
237
+ if (MEM_32bits())
238
+ while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd))
239
+ HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
182
240
 
183
- /* no more data to retrieve from bitstream, hence no need to reload */
241
+ /* no more data to retrieve from bitstream, no need to reload */
184
242
  while (p < pEnd)
185
- HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
243
+ HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
186
244
 
187
245
  return pEnd-pStart;
188
246
  }
189
247
 
190
- static size_t HUF_decompress1X2_usingDTable_internal(
248
+ FORCE_INLINE_TEMPLATE size_t
249
+ HUF_decompress1X1_usingDTable_internal_body(
191
250
  void* dst, size_t dstSize,
192
251
  const void* cSrc, size_t cSrcSize,
193
252
  const HUF_DTable* DTable)
@@ -195,63 +254,22 @@ static size_t HUF_decompress1X2_usingDTable_internal(
195
254
  BYTE* op = (BYTE*)dst;
196
255
  BYTE* const oend = op + dstSize;
197
256
  const void* dtPtr = DTable + 1;
198
- const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
257
+ const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr;
199
258
  BIT_DStream_t bitD;
200
259
  DTableDesc const dtd = HUF_getDTableDesc(DTable);
201
260
  U32 const dtLog = dtd.tableLog;
202
261
 
203
- { size_t const errorCode = BIT_initDStream(&bitD, cSrc, cSrcSize);
204
- if (HUF_isError(errorCode)) return errorCode; }
262
+ CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) );
205
263
 
206
- HUF_decodeStreamX2(op, &bitD, oend, dt, dtLog);
264
+ HUF_decodeStreamX1(op, &bitD, oend, dt, dtLog);
207
265
 
208
- /* check */
209
266
  if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
210
267
 
211
268
  return dstSize;
212
269
  }
213
270
 
214
- size_t HUF_decompress1X2_usingDTable(
215
- void* dst, size_t dstSize,
216
- const void* cSrc, size_t cSrcSize,
217
- const HUF_DTable* DTable)
218
- {
219
- DTableDesc dtd = HUF_getDTableDesc(DTable);
220
- if (dtd.tableType != 0) return ERROR(GENERIC);
221
- return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable);
222
- }
223
-
224
- size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
225
- const void* cSrc, size_t cSrcSize,
226
- void* workSpace, size_t wkspSize)
227
- {
228
- const BYTE* ip = (const BYTE*) cSrc;
229
-
230
- size_t const hSize = HUF_readDTableX2_wksp(DCtx, cSrc, cSrcSize, workSpace, wkspSize);
231
- if (HUF_isError(hSize)) return hSize;
232
- if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
233
- ip += hSize; cSrcSize -= hSize;
234
-
235
- return HUF_decompress1X2_usingDTable_internal (dst, dstSize, ip, cSrcSize, DCtx);
236
- }
237
-
238
-
239
- size_t HUF_decompress1X2_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
240
- const void* cSrc, size_t cSrcSize)
241
- {
242
- U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
243
- return HUF_decompress1X2_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
244
- workSpace, sizeof(workSpace));
245
- }
246
-
247
- size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
248
- {
249
- HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
250
- return HUF_decompress1X2_DCtx (DTable, dst, dstSize, cSrc, cSrcSize);
251
- }
252
-
253
-
254
- static size_t HUF_decompress4X2_usingDTable_internal(
271
+ FORCE_INLINE_TEMPLATE size_t
272
+ HUF_decompress4X1_usingDTable_internal_body(
255
273
  void* dst, size_t dstSize,
256
274
  const void* cSrc, size_t cSrcSize,
257
275
  const HUF_DTable* DTable)
@@ -263,7 +281,7 @@ static size_t HUF_decompress4X2_usingDTable_internal(
263
281
  BYTE* const ostart = (BYTE*) dst;
264
282
  BYTE* const oend = ostart + dstSize;
265
283
  const void* const dtPtr = DTable + 1;
266
- const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
284
+ const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr;
267
285
 
268
286
  /* Init */
269
287
  BIT_DStream_t bitD1;
@@ -286,57 +304,58 @@ static size_t HUF_decompress4X2_usingDTable_internal(
286
304
  BYTE* op2 = opStart2;
287
305
  BYTE* op3 = opStart3;
288
306
  BYTE* op4 = opStart4;
289
- U32 endSignal;
307
+ U32 endSignal = BIT_DStream_unfinished;
290
308
  DTableDesc const dtd = HUF_getDTableDesc(DTable);
291
309
  U32 const dtLog = dtd.tableLog;
292
310
 
293
311
  if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
294
- { size_t const errorCode = BIT_initDStream(&bitD1, istart1, length1);
295
- if (HUF_isError(errorCode)) return errorCode; }
296
- { size_t const errorCode = BIT_initDStream(&bitD2, istart2, length2);
297
- if (HUF_isError(errorCode)) return errorCode; }
298
- { size_t const errorCode = BIT_initDStream(&bitD3, istart3, length3);
299
- if (HUF_isError(errorCode)) return errorCode; }
300
- { size_t const errorCode = BIT_initDStream(&bitD4, istart4, length4);
301
- if (HUF_isError(errorCode)) return errorCode; }
312
+ CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
313
+ CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
314
+ CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
315
+ CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
302
316
 
303
- /* 16-32 symbols per loop (4-8 symbols per stream) */
317
+ /* up to 16 symbols per loop (4 symbols per stream) in 64-bit mode */
304
318
  endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
305
- for ( ; (endSignal==BIT_DStream_unfinished) && (op4<(oend-7)) ; ) {
306
- HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
307
- HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
308
- HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
309
- HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
310
- HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
311
- HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
312
- HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
313
- HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
314
- HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
315
- HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
316
- HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
317
- HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
318
- HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
319
- HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
320
- HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
321
- HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
322
- endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
319
+ while ( (endSignal==BIT_DStream_unfinished) && (op4<(oend-3)) ) {
320
+ HUF_DECODE_SYMBOLX1_2(op1, &bitD1);
321
+ HUF_DECODE_SYMBOLX1_2(op2, &bitD2);
322
+ HUF_DECODE_SYMBOLX1_2(op3, &bitD3);
323
+ HUF_DECODE_SYMBOLX1_2(op4, &bitD4);
324
+ HUF_DECODE_SYMBOLX1_1(op1, &bitD1);
325
+ HUF_DECODE_SYMBOLX1_1(op2, &bitD2);
326
+ HUF_DECODE_SYMBOLX1_1(op3, &bitD3);
327
+ HUF_DECODE_SYMBOLX1_1(op4, &bitD4);
328
+ HUF_DECODE_SYMBOLX1_2(op1, &bitD1);
329
+ HUF_DECODE_SYMBOLX1_2(op2, &bitD2);
330
+ HUF_DECODE_SYMBOLX1_2(op3, &bitD3);
331
+ HUF_DECODE_SYMBOLX1_2(op4, &bitD4);
332
+ HUF_DECODE_SYMBOLX1_0(op1, &bitD1);
333
+ HUF_DECODE_SYMBOLX1_0(op2, &bitD2);
334
+ HUF_DECODE_SYMBOLX1_0(op3, &bitD3);
335
+ HUF_DECODE_SYMBOLX1_0(op4, &bitD4);
336
+ BIT_reloadDStream(&bitD1);
337
+ BIT_reloadDStream(&bitD2);
338
+ BIT_reloadDStream(&bitD3);
339
+ BIT_reloadDStream(&bitD4);
323
340
  }
324
341
 
325
342
  /* check corruption */
343
+ /* note : should not be necessary : op# advance in lock step, and we control op4.
344
+ * but curiously, binary generated by gcc 7.2 & 7.3 with -mbmi2 runs faster when >=1 test is present */
326
345
  if (op1 > opStart2) return ERROR(corruption_detected);
327
346
  if (op2 > opStart3) return ERROR(corruption_detected);
328
347
  if (op3 > opStart4) return ERROR(corruption_detected);
329
348
  /* note : op4 supposed already verified within main loop */
330
349
 
331
350
  /* finish bitStreams one by one */
332
- HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog);
333
- HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog);
334
- HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog);
335
- HUF_decodeStreamX2(op4, &bitD4, oend, dt, dtLog);
351
+ HUF_decodeStreamX1(op1, &bitD1, opStart2, dt, dtLog);
352
+ HUF_decodeStreamX1(op2, &bitD2, opStart3, dt, dtLog);
353
+ HUF_decodeStreamX1(op3, &bitD3, opStart4, dt, dtLog);
354
+ HUF_decodeStreamX1(op4, &bitD4, oend, dt, dtLog);
336
355
 
337
356
  /* check */
338
- endSignal = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
339
- if (!endSignal) return ERROR(corruption_detected);
357
+ { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
358
+ if (!endCheck) return ERROR(corruption_detected); }
340
359
 
341
360
  /* decoded size */
342
361
  return dstSize;
@@ -344,61 +363,123 @@ static size_t HUF_decompress4X2_usingDTable_internal(
344
363
  }
345
364
 
346
365
 
347
- size_t HUF_decompress4X2_usingDTable(
366
+ typedef size_t (*HUF_decompress_usingDTable_t)(void *dst, size_t dstSize,
367
+ const void *cSrc,
368
+ size_t cSrcSize,
369
+ const HUF_DTable *DTable);
370
+
371
+ HUF_DGEN(HUF_decompress1X1_usingDTable_internal)
372
+ HUF_DGEN(HUF_decompress4X1_usingDTable_internal)
373
+
374
+
375
+
376
+ size_t HUF_decompress1X1_usingDTable(
348
377
  void* dst, size_t dstSize,
349
378
  const void* cSrc, size_t cSrcSize,
350
379
  const HUF_DTable* DTable)
351
380
  {
352
381
  DTableDesc dtd = HUF_getDTableDesc(DTable);
353
382
  if (dtd.tableType != 0) return ERROR(GENERIC);
354
- return HUF_decompress4X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable);
383
+ return HUF_decompress1X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
355
384
  }
356
385
 
357
-
358
- size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
386
+ size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
359
387
  const void* cSrc, size_t cSrcSize,
360
388
  void* workSpace, size_t wkspSize)
361
389
  {
362
390
  const BYTE* ip = (const BYTE*) cSrc;
363
391
 
364
- size_t const hSize = HUF_readDTableX2_wksp (dctx, cSrc, cSrcSize,
392
+ size_t const hSize = HUF_readDTableX1_wksp(DCtx, cSrc, cSrcSize, workSpace, wkspSize);
393
+ if (HUF_isError(hSize)) return hSize;
394
+ if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
395
+ ip += hSize; cSrcSize -= hSize;
396
+
397
+ return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0);
398
+ }
399
+
400
+
401
+ size_t HUF_decompress1X1_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
402
+ const void* cSrc, size_t cSrcSize)
403
+ {
404
+ U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
405
+ return HUF_decompress1X1_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
406
+ workSpace, sizeof(workSpace));
407
+ }
408
+
409
+ size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
410
+ {
411
+ HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX);
412
+ return HUF_decompress1X1_DCtx (DTable, dst, dstSize, cSrc, cSrcSize);
413
+ }
414
+
415
+ size_t HUF_decompress4X1_usingDTable(
416
+ void* dst, size_t dstSize,
417
+ const void* cSrc, size_t cSrcSize,
418
+ const HUF_DTable* DTable)
419
+ {
420
+ DTableDesc dtd = HUF_getDTableDesc(DTable);
421
+ if (dtd.tableType != 0) return ERROR(GENERIC);
422
+ return HUF_decompress4X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
423
+ }
424
+
425
+ static size_t HUF_decompress4X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize,
426
+ const void* cSrc, size_t cSrcSize,
427
+ void* workSpace, size_t wkspSize, int bmi2)
428
+ {
429
+ const BYTE* ip = (const BYTE*) cSrc;
430
+
431
+ size_t const hSize = HUF_readDTableX1_wksp (dctx, cSrc, cSrcSize,
365
432
  workSpace, wkspSize);
366
433
  if (HUF_isError(hSize)) return hSize;
367
434
  if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
368
435
  ip += hSize; cSrcSize -= hSize;
369
436
 
370
- return HUF_decompress4X2_usingDTable_internal (dst, dstSize, ip, cSrcSize, dctx);
437
+ return HUF_decompress4X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
438
+ }
439
+
440
+ size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
441
+ const void* cSrc, size_t cSrcSize,
442
+ void* workSpace, size_t wkspSize)
443
+ {
444
+ return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, 0);
371
445
  }
372
446
 
373
447
 
374
- size_t HUF_decompress4X2_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
448
+ size_t HUF_decompress4X1_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
375
449
  {
376
450
  U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
377
- return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
451
+ return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
378
452
  workSpace, sizeof(workSpace));
379
453
  }
380
- size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
454
+ size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
381
455
  {
382
- HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
383
- return HUF_decompress4X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
456
+ HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX);
457
+ return HUF_decompress4X1_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
384
458
  }
385
459
 
460
+ #endif /* HUF_FORCE_DECOMPRESS_X2 */
461
+
462
+
463
+ #ifndef HUF_FORCE_DECOMPRESS_X1
386
464
 
387
465
  /* *************************/
388
466
  /* double-symbols decoding */
389
467
  /* *************************/
390
- typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX4; /* double-symbols decoding */
391
468
 
469
+ typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX2; /* double-symbols decoding */
392
470
  typedef struct { BYTE symbol; BYTE weight; } sortedSymbol_t;
471
+ typedef U32 rankValCol_t[HUF_TABLELOG_MAX + 1];
472
+ typedef rankValCol_t rankVal_t[HUF_TABLELOG_MAX];
473
+
393
474
 
394
- /* HUF_fillDTableX4Level2() :
475
+ /* HUF_fillDTableX2Level2() :
395
476
  * `rankValOrigin` must be a table of at least (HUF_TABLELOG_MAX + 1) U32 */
396
- static void HUF_fillDTableX4Level2(HUF_DEltX4* DTable, U32 sizeLog, const U32 consumed,
477
+ static void HUF_fillDTableX2Level2(HUF_DEltX2* DTable, U32 sizeLog, const U32 consumed,
397
478
  const U32* rankValOrigin, const int minWeight,
398
479
  const sortedSymbol_t* sortedSymbols, const U32 sortedListSize,
399
480
  U32 nbBitsBaseline, U16 baseSeq)
400
481
  {
401
- HUF_DEltX4 DElt;
482
+ HUF_DEltX2 DElt;
402
483
  U32 rankVal[HUF_TABLELOG_MAX + 1];
403
484
 
404
485
  /* get pre-calculated rankVal */
@@ -433,10 +514,8 @@ static void HUF_fillDTableX4Level2(HUF_DEltX4* DTable, U32 sizeLog, const U32 co
433
514
  } }
434
515
  }
435
516
 
436
- typedef U32 rankValCol_t[HUF_TABLELOG_MAX + 1];
437
- typedef rankValCol_t rankVal_t[HUF_TABLELOG_MAX];
438
517
 
439
- static void HUF_fillDTableX4(HUF_DEltX4* DTable, const U32 targetLog,
518
+ static void HUF_fillDTableX2(HUF_DEltX2* DTable, const U32 targetLog,
440
519
  const sortedSymbol_t* sortedList, const U32 sortedListSize,
441
520
  const U32* rankStart, rankVal_t rankValOrigin, const U32 maxWeight,
442
521
  const U32 nbBitsBaseline)
@@ -461,12 +540,12 @@ static void HUF_fillDTableX4(HUF_DEltX4* DTable, const U32 targetLog,
461
540
  int minWeight = nbBits + scaleLog;
462
541
  if (minWeight < 1) minWeight = 1;
463
542
  sortedRank = rankStart[minWeight];
464
- HUF_fillDTableX4Level2(DTable+start, targetLog-nbBits, nbBits,
543
+ HUF_fillDTableX2Level2(DTable+start, targetLog-nbBits, nbBits,
465
544
  rankValOrigin[nbBits], minWeight,
466
545
  sortedList+sortedRank, sortedListSize-sortedRank,
467
546
  nbBitsBaseline, symbol);
468
547
  } else {
469
- HUF_DEltX4 DElt;
548
+ HUF_DEltX2 DElt;
470
549
  MEM_writeLE16(&(DElt.sequence), symbol);
471
550
  DElt.nbBits = (BYTE)(nbBits);
472
551
  DElt.length = 1;
@@ -478,16 +557,16 @@ static void HUF_fillDTableX4(HUF_DEltX4* DTable, const U32 targetLog,
478
557
  }
479
558
  }
480
559
 
481
- size_t HUF_readDTableX4_wksp(HUF_DTable* DTable, const void* src,
482
- size_t srcSize, void* workSpace,
483
- size_t wkspSize)
560
+ size_t HUF_readDTableX2_wksp(HUF_DTable* DTable,
561
+ const void* src, size_t srcSize,
562
+ void* workSpace, size_t wkspSize)
484
563
  {
485
564
  U32 tableLog, maxW, sizeOfSort, nbSymbols;
486
565
  DTableDesc dtd = HUF_getDTableDesc(DTable);
487
566
  U32 const maxTableLog = dtd.maxTableLog;
488
567
  size_t iSize;
489
568
  void* dtPtr = DTable+1; /* force compiler to avoid strict-aliasing */
490
- HUF_DEltX4* const dt = (HUF_DEltX4*)dtPtr;
569
+ HUF_DEltX2* const dt = (HUF_DEltX2*)dtPtr;
491
570
  U32 *rankStart;
492
571
 
493
572
  rankValCol_t* rankVal;
@@ -508,15 +587,12 @@ size_t HUF_readDTableX4_wksp(HUF_DTable* DTable, const void* src,
508
587
  weightList = (BYTE *)((U32 *)workSpace + spaceUsed32);
509
588
  spaceUsed32 += HUF_ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2;
510
589
 
511
- if ((spaceUsed32 << 2) > wkspSize)
512
- return ERROR(tableLog_tooLarge);
513
- workSpace = (U32 *)workSpace + spaceUsed32;
514
- wkspSize -= (spaceUsed32 << 2);
590
+ if ((spaceUsed32 << 2) > wkspSize) return ERROR(tableLog_tooLarge);
515
591
 
516
592
  rankStart = rankStart0 + 1;
517
593
  memset(rankStats, 0, sizeof(U32) * (2 * HUF_TABLELOG_MAX + 2 + 1));
518
594
 
519
- HUF_STATIC_ASSERT(sizeof(HUF_DEltX4) == sizeof(HUF_DTable)); /* if compiler fails here, assertion is wrong */
595
+ DEBUG_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(HUF_DTable)); /* if compiler fails here, assertion is wrong */
520
596
  if (maxTableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
521
597
  /* memset(weightList, 0, sizeof(weightList)); */ /* is not necessary, even though some analyzer complain ... */
522
598
 
@@ -570,7 +646,7 @@ size_t HUF_readDTableX4_wksp(HUF_DTable* DTable, const void* src,
570
646
  rankValPtr[w] = rankVal0[w] >> consumed;
571
647
  } } } }
572
648
 
573
- HUF_fillDTableX4(dt, maxTableLog,
649
+ HUF_fillDTableX2(dt, maxTableLog,
574
650
  sortedSymbol, sizeOfSort,
575
651
  rankStart0, rankVal, maxW,
576
652
  tableLog+1);
@@ -581,14 +657,16 @@ size_t HUF_readDTableX4_wksp(HUF_DTable* DTable, const void* src,
581
657
  return iSize;
582
658
  }
583
659
 
584
- size_t HUF_readDTableX4(HUF_DTable* DTable, const void* src, size_t srcSize)
660
+ size_t HUF_readDTableX2(HUF_DTable* DTable, const void* src, size_t srcSize)
585
661
  {
586
662
  U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
587
- return HUF_readDTableX4_wksp(DTable, src, srcSize,
663
+ return HUF_readDTableX2_wksp(DTable, src, srcSize,
588
664
  workSpace, sizeof(workSpace));
589
665
  }
590
666
 
591
- static U32 HUF_decodeSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DEltX4* dt, const U32 dtLog)
667
+
668
+ FORCE_INLINE_TEMPLATE U32
669
+ HUF_decodeSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog)
592
670
  {
593
671
  size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
594
672
  memcpy(op, dt+val, 2);
@@ -596,7 +674,8 @@ static U32 HUF_decodeSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DEltX4
596
674
  return dt[val].length;
597
675
  }
598
676
 
599
- static U32 HUF_decodeLastSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DEltX4* dt, const U32 dtLog)
677
+ FORCE_INLINE_TEMPLATE U32
678
+ HUF_decodeLastSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog)
600
679
  {
601
680
  size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
602
681
  memcpy(op, dt+val, 1);
@@ -611,45 +690,46 @@ static U32 HUF_decodeLastSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DE
611
690
  return 1;
612
691
  }
613
692
 
693
+ #define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \
694
+ ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
614
695
 
615
- #define HUF_DECODE_SYMBOLX4_0(ptr, DStreamPtr) \
616
- ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
617
-
618
- #define HUF_DECODE_SYMBOLX4_1(ptr, DStreamPtr) \
696
+ #define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \
619
697
  if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
620
- ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
698
+ ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
621
699
 
622
- #define HUF_DECODE_SYMBOLX4_2(ptr, DStreamPtr) \
700
+ #define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \
623
701
  if (MEM_64bits()) \
624
- ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
702
+ ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
625
703
 
626
- HINT_INLINE size_t HUF_decodeStreamX4(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd, const HUF_DEltX4* const dt, const U32 dtLog)
704
+ HINT_INLINE size_t
705
+ HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd,
706
+ const HUF_DEltX2* const dt, const U32 dtLog)
627
707
  {
628
708
  BYTE* const pStart = p;
629
709
 
630
710
  /* up to 8 symbols at a time */
631
711
  while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-(sizeof(bitDPtr->bitContainer)-1))) {
632
- HUF_DECODE_SYMBOLX4_2(p, bitDPtr);
633
- HUF_DECODE_SYMBOLX4_1(p, bitDPtr);
634
- HUF_DECODE_SYMBOLX4_2(p, bitDPtr);
635
- HUF_DECODE_SYMBOLX4_0(p, bitDPtr);
712
+ HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
713
+ HUF_DECODE_SYMBOLX2_1(p, bitDPtr);
714
+ HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
715
+ HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
636
716
  }
637
717
 
638
718
  /* closer to end : up to 2 symbols at a time */
639
719
  while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p <= pEnd-2))
640
- HUF_DECODE_SYMBOLX4_0(p, bitDPtr);
720
+ HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
641
721
 
642
722
  while (p <= pEnd-2)
643
- HUF_DECODE_SYMBOLX4_0(p, bitDPtr); /* no need to reload : reached the end of DStream */
723
+ HUF_DECODE_SYMBOLX2_0(p, bitDPtr); /* no need to reload : reached the end of DStream */
644
724
 
645
725
  if (p < pEnd)
646
- p += HUF_decodeLastSymbolX4(p, bitDPtr, dt, dtLog);
726
+ p += HUF_decodeLastSymbolX2(p, bitDPtr, dt, dtLog);
647
727
 
648
728
  return p-pStart;
649
729
  }
650
730
 
651
-
652
- static size_t HUF_decompress1X4_usingDTable_internal(
731
+ FORCE_INLINE_TEMPLATE size_t
732
+ HUF_decompress1X2_usingDTable_internal_body(
653
733
  void* dst, size_t dstSize,
654
734
  const void* cSrc, size_t cSrcSize,
655
735
  const HUF_DTable* DTable)
@@ -657,17 +737,15 @@ static size_t HUF_decompress1X4_usingDTable_internal(
657
737
  BIT_DStream_t bitD;
658
738
 
659
739
  /* Init */
660
- { size_t const errorCode = BIT_initDStream(&bitD, cSrc, cSrcSize);
661
- if (HUF_isError(errorCode)) return errorCode;
662
- }
740
+ CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) );
663
741
 
664
742
  /* decode */
665
743
  { BYTE* const ostart = (BYTE*) dst;
666
744
  BYTE* const oend = ostart + dstSize;
667
745
  const void* const dtPtr = DTable+1; /* force compiler to not use strict-aliasing */
668
- const HUF_DEltX4* const dt = (const HUF_DEltX4*)dtPtr;
746
+ const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
669
747
  DTableDesc const dtd = HUF_getDTableDesc(DTable);
670
- HUF_decodeStreamX4(ostart, &bitD, oend, dt, dtd.tableLog);
748
+ HUF_decodeStreamX2(ostart, &bitD, oend, dt, dtd.tableLog);
671
749
  }
672
750
 
673
751
  /* check */
@@ -677,47 +755,9 @@ static size_t HUF_decompress1X4_usingDTable_internal(
677
755
  return dstSize;
678
756
  }
679
757
 
680
- size_t HUF_decompress1X4_usingDTable(
681
- void* dst, size_t dstSize,
682
- const void* cSrc, size_t cSrcSize,
683
- const HUF_DTable* DTable)
684
- {
685
- DTableDesc dtd = HUF_getDTableDesc(DTable);
686
- if (dtd.tableType != 1) return ERROR(GENERIC);
687
- return HUF_decompress1X4_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable);
688
- }
689
-
690
- size_t HUF_decompress1X4_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
691
- const void* cSrc, size_t cSrcSize,
692
- void* workSpace, size_t wkspSize)
693
- {
694
- const BYTE* ip = (const BYTE*) cSrc;
695
-
696
- size_t const hSize = HUF_readDTableX4_wksp(DCtx, cSrc, cSrcSize,
697
- workSpace, wkspSize);
698
- if (HUF_isError(hSize)) return hSize;
699
- if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
700
- ip += hSize; cSrcSize -= hSize;
701
758
 
702
- return HUF_decompress1X4_usingDTable_internal (dst, dstSize, ip, cSrcSize, DCtx);
703
- }
704
-
705
-
706
- size_t HUF_decompress1X4_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
707
- const void* cSrc, size_t cSrcSize)
708
- {
709
- U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
710
- return HUF_decompress1X4_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
711
- workSpace, sizeof(workSpace));
712
- }
713
-
714
- size_t HUF_decompress1X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
715
- {
716
- HUF_CREATE_STATIC_DTABLEX4(DTable, HUF_TABLELOG_MAX);
717
- return HUF_decompress1X4_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
718
- }
719
-
720
- static size_t HUF_decompress4X4_usingDTable_internal(
759
+ FORCE_INLINE_TEMPLATE size_t
760
+ HUF_decompress4X2_usingDTable_internal_body(
721
761
  void* dst, size_t dstSize,
722
762
  const void* cSrc, size_t cSrcSize,
723
763
  const HUF_DTable* DTable)
@@ -728,7 +768,7 @@ static size_t HUF_decompress4X4_usingDTable_internal(
728
768
  BYTE* const ostart = (BYTE*) dst;
729
769
  BYTE* const oend = ostart + dstSize;
730
770
  const void* const dtPtr = DTable+1;
731
- const HUF_DEltX4* const dt = (const HUF_DEltX4*)dtPtr;
771
+ const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
732
772
 
733
773
  /* Init */
734
774
  BIT_DStream_t bitD1;
@@ -756,34 +796,30 @@ static size_t HUF_decompress4X4_usingDTable_internal(
756
796
  U32 const dtLog = dtd.tableLog;
757
797
 
758
798
  if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
759
- { size_t const errorCode = BIT_initDStream(&bitD1, istart1, length1);
760
- if (HUF_isError(errorCode)) return errorCode; }
761
- { size_t const errorCode = BIT_initDStream(&bitD2, istart2, length2);
762
- if (HUF_isError(errorCode)) return errorCode; }
763
- { size_t const errorCode = BIT_initDStream(&bitD3, istart3, length3);
764
- if (HUF_isError(errorCode)) return errorCode; }
765
- { size_t const errorCode = BIT_initDStream(&bitD4, istart4, length4);
766
- if (HUF_isError(errorCode)) return errorCode; }
799
+ CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
800
+ CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
801
+ CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
802
+ CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
767
803
 
768
804
  /* 16-32 symbols per loop (4-8 symbols per stream) */
769
805
  endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
770
806
  for ( ; (endSignal==BIT_DStream_unfinished) & (op4<(oend-(sizeof(bitD4.bitContainer)-1))) ; ) {
771
- HUF_DECODE_SYMBOLX4_2(op1, &bitD1);
772
- HUF_DECODE_SYMBOLX4_2(op2, &bitD2);
773
- HUF_DECODE_SYMBOLX4_2(op3, &bitD3);
774
- HUF_DECODE_SYMBOLX4_2(op4, &bitD4);
775
- HUF_DECODE_SYMBOLX4_1(op1, &bitD1);
776
- HUF_DECODE_SYMBOLX4_1(op2, &bitD2);
777
- HUF_DECODE_SYMBOLX4_1(op3, &bitD3);
778
- HUF_DECODE_SYMBOLX4_1(op4, &bitD4);
779
- HUF_DECODE_SYMBOLX4_2(op1, &bitD1);
780
- HUF_DECODE_SYMBOLX4_2(op2, &bitD2);
781
- HUF_DECODE_SYMBOLX4_2(op3, &bitD3);
782
- HUF_DECODE_SYMBOLX4_2(op4, &bitD4);
783
- HUF_DECODE_SYMBOLX4_0(op1, &bitD1);
784
- HUF_DECODE_SYMBOLX4_0(op2, &bitD2);
785
- HUF_DECODE_SYMBOLX4_0(op3, &bitD3);
786
- HUF_DECODE_SYMBOLX4_0(op4, &bitD4);
807
+ HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
808
+ HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
809
+ HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
810
+ HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
811
+ HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
812
+ HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
813
+ HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
814
+ HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
815
+ HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
816
+ HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
817
+ HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
818
+ HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
819
+ HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
820
+ HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
821
+ HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
822
+ HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
787
823
 
788
824
  endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
789
825
  }
@@ -795,10 +831,10 @@ static size_t HUF_decompress4X4_usingDTable_internal(
795
831
  /* note : op4 already verified within main loop */
796
832
 
797
833
  /* finish bitStreams one by one */
798
- HUF_decodeStreamX4(op1, &bitD1, opStart2, dt, dtLog);
799
- HUF_decodeStreamX4(op2, &bitD2, opStart3, dt, dtLog);
800
- HUF_decodeStreamX4(op3, &bitD3, opStart4, dt, dtLog);
801
- HUF_decodeStreamX4(op4, &bitD4, oend, dt, dtLog);
834
+ HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog);
835
+ HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog);
836
+ HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog);
837
+ HUF_decodeStreamX2(op4, &bitD4, oend, dt, dtLog);
802
838
 
803
839
  /* check */
804
840
  { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
@@ -809,60 +845,120 @@ static size_t HUF_decompress4X4_usingDTable_internal(
809
845
  }
810
846
  }
811
847
 
848
+ HUF_DGEN(HUF_decompress1X2_usingDTable_internal)
849
+ HUF_DGEN(HUF_decompress4X2_usingDTable_internal)
812
850
 
813
- size_t HUF_decompress4X4_usingDTable(
851
+ size_t HUF_decompress1X2_usingDTable(
814
852
  void* dst, size_t dstSize,
815
853
  const void* cSrc, size_t cSrcSize,
816
854
  const HUF_DTable* DTable)
817
855
  {
818
856
  DTableDesc dtd = HUF_getDTableDesc(DTable);
819
857
  if (dtd.tableType != 1) return ERROR(GENERIC);
820
- return HUF_decompress4X4_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable);
858
+ return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
821
859
  }
822
860
 
823
-
824
- size_t HUF_decompress4X4_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
861
+ size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
825
862
  const void* cSrc, size_t cSrcSize,
826
863
  void* workSpace, size_t wkspSize)
827
864
  {
828
865
  const BYTE* ip = (const BYTE*) cSrc;
829
866
 
830
- size_t hSize = HUF_readDTableX4_wksp(dctx, cSrc, cSrcSize,
867
+ size_t const hSize = HUF_readDTableX2_wksp(DCtx, cSrc, cSrcSize,
868
+ workSpace, wkspSize);
869
+ if (HUF_isError(hSize)) return hSize;
870
+ if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
871
+ ip += hSize; cSrcSize -= hSize;
872
+
873
+ return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0);
874
+ }
875
+
876
+
877
+ size_t HUF_decompress1X2_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
878
+ const void* cSrc, size_t cSrcSize)
879
+ {
880
+ U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
881
+ return HUF_decompress1X2_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
882
+ workSpace, sizeof(workSpace));
883
+ }
884
+
885
+ size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
886
+ {
887
+ HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
888
+ return HUF_decompress1X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
889
+ }
890
+
891
+ size_t HUF_decompress4X2_usingDTable(
892
+ void* dst, size_t dstSize,
893
+ const void* cSrc, size_t cSrcSize,
894
+ const HUF_DTable* DTable)
895
+ {
896
+ DTableDesc dtd = HUF_getDTableDesc(DTable);
897
+ if (dtd.tableType != 1) return ERROR(GENERIC);
898
+ return HUF_decompress4X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
899
+ }
900
+
901
+ static size_t HUF_decompress4X2_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize,
902
+ const void* cSrc, size_t cSrcSize,
903
+ void* workSpace, size_t wkspSize, int bmi2)
904
+ {
905
+ const BYTE* ip = (const BYTE*) cSrc;
906
+
907
+ size_t hSize = HUF_readDTableX2_wksp(dctx, cSrc, cSrcSize,
831
908
  workSpace, wkspSize);
832
909
  if (HUF_isError(hSize)) return hSize;
833
910
  if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
834
911
  ip += hSize; cSrcSize -= hSize;
835
912
 
836
- return HUF_decompress4X4_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx);
913
+ return HUF_decompress4X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
837
914
  }
838
915
 
916
+ size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
917
+ const void* cSrc, size_t cSrcSize,
918
+ void* workSpace, size_t wkspSize)
919
+ {
920
+ return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, /* bmi2 */ 0);
921
+ }
839
922
 
840
- size_t HUF_decompress4X4_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,
923
+
924
+ size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,
841
925
  const void* cSrc, size_t cSrcSize)
842
926
  {
843
927
  U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
844
- return HUF_decompress4X4_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
928
+ return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
845
929
  workSpace, sizeof(workSpace));
846
930
  }
847
931
 
848
- size_t HUF_decompress4X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
932
+ size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
849
933
  {
850
- HUF_CREATE_STATIC_DTABLEX4(DTable, HUF_TABLELOG_MAX);
851
- return HUF_decompress4X4_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
934
+ HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
935
+ return HUF_decompress4X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
852
936
  }
853
937
 
938
+ #endif /* HUF_FORCE_DECOMPRESS_X1 */
854
939
 
855
- /* ********************************/
856
- /* Generic decompression selector */
857
- /* ********************************/
940
+
941
+ /* ***********************************/
942
+ /* Universal decompression selectors */
943
+ /* ***********************************/
858
944
 
859
945
  size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize,
860
946
  const void* cSrc, size_t cSrcSize,
861
947
  const HUF_DTable* DTable)
862
948
  {
863
949
  DTableDesc const dtd = HUF_getDTableDesc(DTable);
864
- return dtd.tableType ? HUF_decompress1X4_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable) :
865
- HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable);
950
+ #if defined(HUF_FORCE_DECOMPRESS_X1)
951
+ (void)dtd;
952
+ assert(dtd.tableType == 0);
953
+ return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
954
+ #elif defined(HUF_FORCE_DECOMPRESS_X2)
955
+ (void)dtd;
956
+ assert(dtd.tableType == 1);
957
+ return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
958
+ #else
959
+ return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
960
+ HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
961
+ #endif
866
962
  }
867
963
 
868
964
  size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize,
@@ -870,11 +966,22 @@ size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize,
870
966
  const HUF_DTable* DTable)
871
967
  {
872
968
  DTableDesc const dtd = HUF_getDTableDesc(DTable);
873
- return dtd.tableType ? HUF_decompress4X4_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable) :
874
- HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable);
969
+ #if defined(HUF_FORCE_DECOMPRESS_X1)
970
+ (void)dtd;
971
+ assert(dtd.tableType == 0);
972
+ return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
973
+ #elif defined(HUF_FORCE_DECOMPRESS_X2)
974
+ (void)dtd;
975
+ assert(dtd.tableType == 1);
976
+ return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
977
+ #else
978
+ return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
979
+ HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
980
+ #endif
875
981
  }
876
982
 
877
983
 
984
+ #if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2)
878
985
  typedef struct { U32 tableTime; U32 decode256Time; } algo_time_t;
879
986
  static const algo_time_t algoTime[16 /* Quantization */][3 /* single, double, quad */] =
880
987
  {
@@ -896,22 +1003,35 @@ static const algo_time_t algoTime[16 /* Quantization */][3 /* single, double, qu
896
1003
  {{1455,128}, {2422,124}, {4174,124}}, /* Q ==14 : 87-93% */
897
1004
  {{ 722,128}, {1891,145}, {1936,146}}, /* Q ==15 : 93-99% */
898
1005
  };
1006
+ #endif
899
1007
 
900
1008
  /** HUF_selectDecoder() :
901
- * Tells which decoder is likely to decode faster,
902
- * based on a set of pre-determined metrics.
903
- * @return : 0==HUF_decompress4X2, 1==HUF_decompress4X4 .
904
- * Assumption : 0 < cSrcSize, dstSize <= 128 KB */
1009
+ * Tells which decoder is likely to decode faster,
1010
+ * based on a set of pre-computed metrics.
1011
+ * @return : 0==HUF_decompress4X1, 1==HUF_decompress4X2 .
1012
+ * Assumption : 0 < dstSize <= 128 KB */
905
1013
  U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize)
906
1014
  {
1015
+ assert(dstSize > 0);
1016
+ assert(dstSize <= 128*1024);
1017
+ #if defined(HUF_FORCE_DECOMPRESS_X1)
1018
+ (void)dstSize;
1019
+ (void)cSrcSize;
1020
+ return 0;
1021
+ #elif defined(HUF_FORCE_DECOMPRESS_X2)
1022
+ (void)dstSize;
1023
+ (void)cSrcSize;
1024
+ return 1;
1025
+ #else
907
1026
  /* decoder timing evaluation */
908
- U32 const Q = cSrcSize >= dstSize ? 15 : (U32)(cSrcSize * 16 / dstSize); /* Q < 16 */
909
- U32 const D256 = (U32)(dstSize >> 8);
910
- U32 const DTime0 = algoTime[Q][0].tableTime + (algoTime[Q][0].decode256Time * D256);
911
- U32 DTime1 = algoTime[Q][1].tableTime + (algoTime[Q][1].decode256Time * D256);
912
- DTime1 += DTime1 >> 3; /* advantage to algorithm using less memory, for cache eviction */
913
-
914
- return DTime1 < DTime0;
1027
+ { U32 const Q = (cSrcSize >= dstSize) ? 15 : (U32)(cSrcSize * 16 / dstSize); /* Q < 16 */
1028
+ U32 const D256 = (U32)(dstSize >> 8);
1029
+ U32 const DTime0 = algoTime[Q][0].tableTime + (algoTime[Q][0].decode256Time * D256);
1030
+ U32 DTime1 = algoTime[Q][1].tableTime + (algoTime[Q][1].decode256Time * D256);
1031
+ DTime1 += DTime1 >> 3; /* advantage to algorithm using less memory, to reduce cache eviction */
1032
+ return DTime1 < DTime0;
1033
+ }
1034
+ #endif
915
1035
  }
916
1036
 
917
1037
 
@@ -919,7 +1039,9 @@ typedef size_t (*decompressionAlgo)(void* dst, size_t dstSize, const void* cSrc,
919
1039
 
920
1040
  size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
921
1041
  {
922
- static const decompressionAlgo decompress[2] = { HUF_decompress4X2, HUF_decompress4X4 };
1042
+ #if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2)
1043
+ static const decompressionAlgo decompress[2] = { HUF_decompress4X1, HUF_decompress4X2 };
1044
+ #endif
923
1045
 
924
1046
  /* validation checks */
925
1047
  if (dstSize == 0) return ERROR(dstSize_tooSmall);
@@ -928,7 +1050,17 @@ size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcS
928
1050
  if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */
929
1051
 
930
1052
  { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1053
+ #if defined(HUF_FORCE_DECOMPRESS_X1)
1054
+ (void)algoNb;
1055
+ assert(algoNb == 0);
1056
+ return HUF_decompress4X1(dst, dstSize, cSrc, cSrcSize);
1057
+ #elif defined(HUF_FORCE_DECOMPRESS_X2)
1058
+ (void)algoNb;
1059
+ assert(algoNb == 1);
1060
+ return HUF_decompress4X2(dst, dstSize, cSrc, cSrcSize);
1061
+ #else
931
1062
  return decompress[algoNb](dst, dstSize, cSrc, cSrcSize);
1063
+ #endif
932
1064
  }
933
1065
  }
934
1066
 
@@ -941,8 +1073,18 @@ size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const
941
1073
  if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */
942
1074
 
943
1075
  { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
944
- return algoNb ? HUF_decompress4X4_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) :
945
- HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ;
1076
+ #if defined(HUF_FORCE_DECOMPRESS_X1)
1077
+ (void)algoNb;
1078
+ assert(algoNb == 0);
1079
+ return HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize);
1080
+ #elif defined(HUF_FORCE_DECOMPRESS_X2)
1081
+ (void)algoNb;
1082
+ assert(algoNb == 1);
1083
+ return HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize);
1084
+ #else
1085
+ return algoNb ? HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) :
1086
+ HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ;
1087
+ #endif
946
1088
  }
947
1089
  }
948
1090
 
@@ -964,8 +1106,19 @@ size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst,
964
1106
  if (cSrcSize == 0) return ERROR(corruption_detected);
965
1107
 
966
1108
  { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
967
- return algoNb ? HUF_decompress4X4_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize):
968
- HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
1109
+ #if defined(HUF_FORCE_DECOMPRESS_X1)
1110
+ (void)algoNb;
1111
+ assert(algoNb == 0);
1112
+ return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
1113
+ #elif defined(HUF_FORCE_DECOMPRESS_X2)
1114
+ (void)algoNb;
1115
+ assert(algoNb == 1);
1116
+ return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
1117
+ #else
1118
+ return algoNb ? HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
1119
+ cSrcSize, workSpace, wkspSize):
1120
+ HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
1121
+ #endif
969
1122
  }
970
1123
  }
971
1124
 
@@ -980,10 +1133,22 @@ size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
980
1133
  if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */
981
1134
 
982
1135
  { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
983
- return algoNb ? HUF_decompress1X4_DCtx_wksp(dctx, dst, dstSize, cSrc,
1136
+ #if defined(HUF_FORCE_DECOMPRESS_X1)
1137
+ (void)algoNb;
1138
+ assert(algoNb == 0);
1139
+ return HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc,
1140
+ cSrcSize, workSpace, wkspSize);
1141
+ #elif defined(HUF_FORCE_DECOMPRESS_X2)
1142
+ (void)algoNb;
1143
+ assert(algoNb == 1);
1144
+ return HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
1145
+ cSrcSize, workSpace, wkspSize);
1146
+ #else
1147
+ return algoNb ? HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
984
1148
  cSrcSize, workSpace, wkspSize):
985
- HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
1149
+ HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc,
986
1150
  cSrcSize, workSpace, wkspSize);
1151
+ #endif
987
1152
  }
988
1153
  }
989
1154
 
@@ -994,3 +1159,74 @@ size_t HUF_decompress1X_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,
994
1159
  return HUF_decompress1X_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
995
1160
  workSpace, sizeof(workSpace));
996
1161
  }
1162
+
1163
+
1164
+ size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2)
1165
+ {
1166
+ DTableDesc const dtd = HUF_getDTableDesc(DTable);
1167
+ #if defined(HUF_FORCE_DECOMPRESS_X1)
1168
+ (void)dtd;
1169
+ assert(dtd.tableType == 0);
1170
+ return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1171
+ #elif defined(HUF_FORCE_DECOMPRESS_X2)
1172
+ (void)dtd;
1173
+ assert(dtd.tableType == 1);
1174
+ return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1175
+ #else
1176
+ return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) :
1177
+ HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1178
+ #endif
1179
+ }
1180
+
1181
+ #ifndef HUF_FORCE_DECOMPRESS_X2
1182
+ size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2)
1183
+ {
1184
+ const BYTE* ip = (const BYTE*) cSrc;
1185
+
1186
+ size_t const hSize = HUF_readDTableX1_wksp(dctx, cSrc, cSrcSize, workSpace, wkspSize);
1187
+ if (HUF_isError(hSize)) return hSize;
1188
+ if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
1189
+ ip += hSize; cSrcSize -= hSize;
1190
+
1191
+ return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
1192
+ }
1193
+ #endif
1194
+
1195
+ size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2)
1196
+ {
1197
+ DTableDesc const dtd = HUF_getDTableDesc(DTable);
1198
+ #if defined(HUF_FORCE_DECOMPRESS_X1)
1199
+ (void)dtd;
1200
+ assert(dtd.tableType == 0);
1201
+ return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1202
+ #elif defined(HUF_FORCE_DECOMPRESS_X2)
1203
+ (void)dtd;
1204
+ assert(dtd.tableType == 1);
1205
+ return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1206
+ #else
1207
+ return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) :
1208
+ HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1209
+ #endif
1210
+ }
1211
+
1212
+ size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2)
1213
+ {
1214
+ /* validation checks */
1215
+ if (dstSize == 0) return ERROR(dstSize_tooSmall);
1216
+ if (cSrcSize == 0) return ERROR(corruption_detected);
1217
+
1218
+ { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1219
+ #if defined(HUF_FORCE_DECOMPRESS_X1)
1220
+ (void)algoNb;
1221
+ assert(algoNb == 0);
1222
+ return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
1223
+ #elif defined(HUF_FORCE_DECOMPRESS_X2)
1224
+ (void)algoNb;
1225
+ assert(algoNb == 1);
1226
+ return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
1227
+ #else
1228
+ return algoNb ? HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2) :
1229
+ HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
1230
+ #endif
1231
+ }
1232
+ }