extzstd 0.0.3.CONCEPT → 0.3.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (138) hide show
  1. checksums.yaml +5 -5
  2. data/HISTORY.ja.md +39 -0
  3. data/LICENSE +6 -6
  4. data/README.md +26 -45
  5. data/contrib/zstd/CHANGELOG +555 -0
  6. data/contrib/zstd/CODE_OF_CONDUCT.md +5 -0
  7. data/contrib/zstd/CONTRIBUTING.md +392 -0
  8. data/contrib/zstd/COPYING +339 -0
  9. data/contrib/zstd/LICENSE +13 -9
  10. data/contrib/zstd/Makefile +414 -0
  11. data/contrib/zstd/README.md +170 -45
  12. data/contrib/zstd/TESTING.md +44 -0
  13. data/contrib/zstd/appveyor.yml +289 -0
  14. data/contrib/zstd/lib/BUCK +234 -0
  15. data/contrib/zstd/lib/Makefile +354 -0
  16. data/contrib/zstd/lib/README.md +179 -0
  17. data/contrib/zstd/{common → lib/common}/bitstream.h +170 -130
  18. data/contrib/zstd/lib/common/compiler.h +175 -0
  19. data/contrib/zstd/lib/common/cpu.h +215 -0
  20. data/contrib/zstd/lib/common/debug.c +24 -0
  21. data/contrib/zstd/lib/common/debug.h +114 -0
  22. data/contrib/zstd/{common → lib/common}/entropy_common.c +79 -94
  23. data/contrib/zstd/lib/common/error_private.c +55 -0
  24. data/contrib/zstd/lib/common/error_private.h +80 -0
  25. data/contrib/zstd/{common → lib/common}/fse.h +153 -93
  26. data/contrib/zstd/{common → lib/common}/fse_decompress.c +37 -82
  27. data/contrib/zstd/lib/common/huf.h +340 -0
  28. data/contrib/zstd/{common → lib/common}/mem.h +154 -78
  29. data/contrib/zstd/lib/common/pool.c +344 -0
  30. data/contrib/zstd/lib/common/pool.h +84 -0
  31. data/contrib/zstd/lib/common/threading.c +121 -0
  32. data/contrib/zstd/lib/common/threading.h +155 -0
  33. data/contrib/zstd/{common → lib/common}/xxhash.c +85 -75
  34. data/contrib/zstd/{common → lib/common}/xxhash.h +85 -73
  35. data/contrib/zstd/lib/common/zstd_common.c +83 -0
  36. data/contrib/zstd/lib/common/zstd_errors.h +94 -0
  37. data/contrib/zstd/lib/common/zstd_internal.h +447 -0
  38. data/contrib/zstd/{compress → lib/compress}/fse_compress.c +194 -303
  39. data/contrib/zstd/lib/compress/hist.c +183 -0
  40. data/contrib/zstd/lib/compress/hist.h +75 -0
  41. data/contrib/zstd/lib/compress/huf_compress.c +798 -0
  42. data/contrib/zstd/lib/compress/zstd_compress.c +4278 -0
  43. data/contrib/zstd/lib/compress/zstd_compress_internal.h +1125 -0
  44. data/contrib/zstd/lib/compress/zstd_compress_literals.c +158 -0
  45. data/contrib/zstd/lib/compress/zstd_compress_literals.h +29 -0
  46. data/contrib/zstd/lib/compress/zstd_compress_sequences.c +419 -0
  47. data/contrib/zstd/lib/compress/zstd_compress_sequences.h +54 -0
  48. data/contrib/zstd/lib/compress/zstd_compress_superblock.c +845 -0
  49. data/contrib/zstd/lib/compress/zstd_compress_superblock.h +32 -0
  50. data/contrib/zstd/lib/compress/zstd_cwksp.h +525 -0
  51. data/contrib/zstd/lib/compress/zstd_double_fast.c +521 -0
  52. data/contrib/zstd/lib/compress/zstd_double_fast.h +38 -0
  53. data/contrib/zstd/lib/compress/zstd_fast.c +496 -0
  54. data/contrib/zstd/lib/compress/zstd_fast.h +37 -0
  55. data/contrib/zstd/lib/compress/zstd_lazy.c +1138 -0
  56. data/contrib/zstd/lib/compress/zstd_lazy.h +67 -0
  57. data/contrib/zstd/lib/compress/zstd_ldm.c +619 -0
  58. data/contrib/zstd/lib/compress/zstd_ldm.h +110 -0
  59. data/contrib/zstd/lib/compress/zstd_opt.c +1200 -0
  60. data/contrib/zstd/lib/compress/zstd_opt.h +56 -0
  61. data/contrib/zstd/lib/compress/zstdmt_compress.c +2143 -0
  62. data/contrib/zstd/lib/compress/zstdmt_compress.h +192 -0
  63. data/contrib/zstd/lib/decompress/huf_decompress.c +1248 -0
  64. data/contrib/zstd/lib/decompress/zstd_ddict.c +244 -0
  65. data/contrib/zstd/lib/decompress/zstd_ddict.h +44 -0
  66. data/contrib/zstd/lib/decompress/zstd_decompress.c +1885 -0
  67. data/contrib/zstd/lib/decompress/zstd_decompress_block.c +1432 -0
  68. data/contrib/zstd/lib/decompress/zstd_decompress_block.h +59 -0
  69. data/contrib/zstd/lib/decompress/zstd_decompress_internal.h +189 -0
  70. data/contrib/zstd/{common → lib/deprecated}/zbuff.h +86 -69
  71. data/contrib/zstd/lib/deprecated/zbuff_common.c +26 -0
  72. data/contrib/zstd/lib/deprecated/zbuff_compress.c +147 -0
  73. data/contrib/zstd/lib/deprecated/zbuff_decompress.c +75 -0
  74. data/contrib/zstd/lib/dictBuilder/cover.c +1236 -0
  75. data/contrib/zstd/lib/dictBuilder/cover.h +157 -0
  76. data/contrib/zstd/{dictBuilder → lib/dictBuilder}/divsufsort.c +3 -3
  77. data/contrib/zstd/{dictBuilder → lib/dictBuilder}/divsufsort.h +5 -5
  78. data/contrib/zstd/lib/dictBuilder/fastcover.c +757 -0
  79. data/contrib/zstd/{dictBuilder → lib/dictBuilder}/zdict.c +437 -347
  80. data/contrib/zstd/lib/dictBuilder/zdict.h +305 -0
  81. data/contrib/zstd/lib/legacy/zstd_legacy.h +415 -0
  82. data/contrib/zstd/{legacy → lib/legacy}/zstd_v01.c +272 -292
  83. data/contrib/zstd/{legacy → lib/legacy}/zstd_v01.h +26 -32
  84. data/contrib/zstd/{legacy → lib/legacy}/zstd_v02.c +162 -392
  85. data/contrib/zstd/{legacy → lib/legacy}/zstd_v02.h +26 -32
  86. data/contrib/zstd/{legacy → lib/legacy}/zstd_v03.c +162 -391
  87. data/contrib/zstd/{legacy → lib/legacy}/zstd_v03.h +27 -33
  88. data/contrib/zstd/{legacy → lib/legacy}/zstd_v04.c +195 -604
  89. data/contrib/zstd/{legacy → lib/legacy}/zstd_v04.h +26 -32
  90. data/contrib/zstd/{legacy → lib/legacy}/zstd_v05.c +300 -575
  91. data/contrib/zstd/{legacy → lib/legacy}/zstd_v05.h +22 -31
  92. data/contrib/zstd/{legacy → lib/legacy}/zstd_v06.c +165 -592
  93. data/contrib/zstd/{legacy → lib/legacy}/zstd_v06.h +54 -67
  94. data/contrib/zstd/lib/legacy/zstd_v07.c +4541 -0
  95. data/contrib/zstd/lib/legacy/zstd_v07.h +187 -0
  96. data/contrib/zstd/lib/libzstd.pc.in +15 -0
  97. data/contrib/zstd/lib/zstd.h +2090 -0
  98. data/ext/depend +2 -0
  99. data/ext/extconf.rb +18 -5
  100. data/ext/extzstd.c +296 -214
  101. data/ext/extzstd.h +81 -36
  102. data/ext/extzstd_nogvls.h +0 -117
  103. data/ext/extzstd_stream.c +622 -0
  104. data/ext/libzstd_conf.h +8 -0
  105. data/ext/zstd_common.c +11 -0
  106. data/ext/zstd_compress.c +15 -0
  107. data/ext/zstd_decompress.c +6 -0
  108. data/ext/zstd_dictbuilder.c +10 -0
  109. data/ext/zstd_dictbuilder_fastcover.c +3 -0
  110. data/ext/zstd_legacy_v01.c +3 -1
  111. data/ext/zstd_legacy_v02.c +3 -1
  112. data/ext/zstd_legacy_v03.c +3 -1
  113. data/ext/zstd_legacy_v04.c +3 -1
  114. data/ext/zstd_legacy_v05.c +3 -1
  115. data/ext/zstd_legacy_v06.c +3 -1
  116. data/ext/zstd_legacy_v07.c +3 -0
  117. data/gemstub.rb +27 -21
  118. data/lib/extzstd.rb +82 -161
  119. data/lib/extzstd/version.rb +1 -1
  120. data/test/test_basic.rb +19 -6
  121. metadata +127 -59
  122. data/contrib/zstd/common/error_private.h +0 -125
  123. data/contrib/zstd/common/error_public.h +0 -77
  124. data/contrib/zstd/common/huf.h +0 -228
  125. data/contrib/zstd/common/zstd.h +0 -475
  126. data/contrib/zstd/common/zstd_common.c +0 -91
  127. data/contrib/zstd/common/zstd_internal.h +0 -238
  128. data/contrib/zstd/compress/huf_compress.c +0 -577
  129. data/contrib/zstd/compress/zbuff_compress.c +0 -327
  130. data/contrib/zstd/compress/zstd_compress.c +0 -3074
  131. data/contrib/zstd/compress/zstd_opt.h +0 -1046
  132. data/contrib/zstd/decompress/huf_decompress.c +0 -894
  133. data/contrib/zstd/decompress/zbuff_decompress.c +0 -294
  134. data/contrib/zstd/decompress/zstd_decompress.c +0 -1362
  135. data/contrib/zstd/dictBuilder/zdict.h +0 -113
  136. data/contrib/zstd/legacy/zstd_legacy.h +0 -140
  137. data/ext/extzstd_buffered.c +0 -265
  138. data/ext/zstd_amalgam.c +0 -18
@@ -0,0 +1,192 @@
1
+ /*
2
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
3
+ * All rights reserved.
4
+ *
5
+ * This source code is licensed under both the BSD-style license (found in the
6
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7
+ * in the COPYING file in the root directory of this source tree).
8
+ * You may select, at your option, one of the above-listed licenses.
9
+ */
10
+
11
+ #ifndef ZSTDMT_COMPRESS_H
12
+ #define ZSTDMT_COMPRESS_H
13
+
14
+ #if defined (__cplusplus)
15
+ extern "C" {
16
+ #endif
17
+
18
+
19
+ /* Note : This is an internal API.
20
+ * These APIs used to be exposed with ZSTDLIB_API,
21
+ * because it used to be the only way to invoke MT compression.
22
+ * Now, it's recommended to use ZSTD_compress2 and ZSTD_compressStream2()
23
+ * instead.
24
+ *
25
+ * If you depend on these APIs and can't switch, then define
26
+ * ZSTD_LEGACY_MULTITHREADED_API when making the dynamic library.
27
+ * However, we may completely remove these functions in a future
28
+ * release, so please switch soon.
29
+ *
30
+ * This API requires ZSTD_MULTITHREAD to be defined during compilation,
31
+ * otherwise ZSTDMT_createCCtx*() will fail.
32
+ */
33
+
34
+ #ifdef ZSTD_LEGACY_MULTITHREADED_API
35
+ # define ZSTDMT_API ZSTDLIB_API
36
+ #else
37
+ # define ZSTDMT_API
38
+ #endif
39
+
40
+ /* === Dependencies === */
41
+ #include <stddef.h> /* size_t */
42
+ #define ZSTD_STATIC_LINKING_ONLY /* ZSTD_parameters */
43
+ #include "../zstd.h" /* ZSTD_inBuffer, ZSTD_outBuffer, ZSTDLIB_API */
44
+
45
+
46
+ /* === Constants === */
47
+ #ifndef ZSTDMT_NBWORKERS_MAX
48
+ # define ZSTDMT_NBWORKERS_MAX 200
49
+ #endif
50
+ #ifndef ZSTDMT_JOBSIZE_MIN
51
+ # define ZSTDMT_JOBSIZE_MIN (1 MB)
52
+ #endif
53
+ #define ZSTDMT_JOBLOG_MAX (MEM_32bits() ? 29 : 30)
54
+ #define ZSTDMT_JOBSIZE_MAX (MEM_32bits() ? (512 MB) : (1024 MB))
55
+
56
+
57
+ /* === Memory management === */
58
+ typedef struct ZSTDMT_CCtx_s ZSTDMT_CCtx;
59
+ /* Requires ZSTD_MULTITHREAD to be defined during compilation, otherwise it will return NULL. */
60
+ ZSTDMT_API ZSTDMT_CCtx* ZSTDMT_createCCtx(unsigned nbWorkers);
61
+ /* Requires ZSTD_MULTITHREAD to be defined during compilation, otherwise it will return NULL. */
62
+ ZSTDMT_API ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(unsigned nbWorkers,
63
+ ZSTD_customMem cMem);
64
+ ZSTDMT_API size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* mtctx);
65
+
66
+ ZSTDMT_API size_t ZSTDMT_sizeof_CCtx(ZSTDMT_CCtx* mtctx);
67
+
68
+
69
+ /* === Simple one-pass compression function === */
70
+
71
+ ZSTDMT_API size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx,
72
+ void* dst, size_t dstCapacity,
73
+ const void* src, size_t srcSize,
74
+ int compressionLevel);
75
+
76
+
77
+
78
+ /* === Streaming functions === */
79
+
80
+ ZSTDMT_API size_t ZSTDMT_initCStream(ZSTDMT_CCtx* mtctx, int compressionLevel);
81
+ ZSTDMT_API size_t ZSTDMT_resetCStream(ZSTDMT_CCtx* mtctx, unsigned long long pledgedSrcSize); /**< if srcSize is not known at reset time, use ZSTD_CONTENTSIZE_UNKNOWN. Note: for compatibility with older programs, 0 means the same as ZSTD_CONTENTSIZE_UNKNOWN, but it will change in the future to mean "empty" */
82
+
83
+ ZSTDMT_API size_t ZSTDMT_nextInputSizeHint(const ZSTDMT_CCtx* mtctx);
84
+ ZSTDMT_API size_t ZSTDMT_compressStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, ZSTD_inBuffer* input);
85
+
86
+ ZSTDMT_API size_t ZSTDMT_flushStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output); /**< @return : 0 == all flushed; >0 : still some data to be flushed; or an error code (ZSTD_isError()) */
87
+ ZSTDMT_API size_t ZSTDMT_endStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output); /**< @return : 0 == all flushed; >0 : still some data to be flushed; or an error code (ZSTD_isError()) */
88
+
89
+
90
+ /* === Advanced functions and parameters === */
91
+
92
+ ZSTDMT_API size_t ZSTDMT_compress_advanced(ZSTDMT_CCtx* mtctx,
93
+ void* dst, size_t dstCapacity,
94
+ const void* src, size_t srcSize,
95
+ const ZSTD_CDict* cdict,
96
+ ZSTD_parameters params,
97
+ int overlapLog);
98
+
99
+ ZSTDMT_API size_t ZSTDMT_initCStream_advanced(ZSTDMT_CCtx* mtctx,
100
+ const void* dict, size_t dictSize, /* dict can be released after init, a local copy is preserved within zcs */
101
+ ZSTD_parameters params,
102
+ unsigned long long pledgedSrcSize); /* pledgedSrcSize is optional and can be zero == unknown */
103
+
104
+ ZSTDMT_API size_t ZSTDMT_initCStream_usingCDict(ZSTDMT_CCtx* mtctx,
105
+ const ZSTD_CDict* cdict,
106
+ ZSTD_frameParameters fparams,
107
+ unsigned long long pledgedSrcSize); /* note : zero means empty */
108
+
109
+ /* ZSTDMT_parameter :
110
+ * List of parameters that can be set using ZSTDMT_setMTCtxParameter() */
111
+ typedef enum {
112
+ ZSTDMT_p_jobSize, /* Each job is compressed in parallel. By default, this value is dynamically determined depending on compression parameters. Can be set explicitly here. */
113
+ ZSTDMT_p_overlapLog, /* Each job may reload a part of previous job to enhance compression ratio; 0 == no overlap, 6(default) == use 1/8th of window, >=9 == use full window. This is a "sticky" parameter : its value will be re-used on next compression job */
114
+ ZSTDMT_p_rsyncable /* Enables rsyncable mode. */
115
+ } ZSTDMT_parameter;
116
+
117
+ /* ZSTDMT_setMTCtxParameter() :
118
+ * allow setting individual parameters, one at a time, among a list of enums defined in ZSTDMT_parameter.
119
+ * The function must be called typically after ZSTD_createCCtx() but __before ZSTDMT_init*() !__
120
+ * Parameters not explicitly reset by ZSTDMT_init*() remain the same in consecutive compression sessions.
121
+ * @return : 0, or an error code (which can be tested using ZSTD_isError()) */
122
+ ZSTDMT_API size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, int value);
123
+
124
+ /* ZSTDMT_getMTCtxParameter() :
125
+ * Query the ZSTDMT_CCtx for a parameter value.
126
+ * @return : 0, or an error code (which can be tested using ZSTD_isError()) */
127
+ ZSTDMT_API size_t ZSTDMT_getMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, int* value);
128
+
129
+
130
+ /*! ZSTDMT_compressStream_generic() :
131
+ * Combines ZSTDMT_compressStream() with optional ZSTDMT_flushStream() or ZSTDMT_endStream()
132
+ * depending on flush directive.
133
+ * @return : minimum amount of data still to be flushed
134
+ * 0 if fully flushed
135
+ * or an error code
136
+ * note : needs to be init using any ZSTD_initCStream*() variant */
137
+ ZSTDMT_API size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
138
+ ZSTD_outBuffer* output,
139
+ ZSTD_inBuffer* input,
140
+ ZSTD_EndDirective endOp);
141
+
142
+
143
+ /* ========================================================
144
+ * === Private interface, for use by ZSTD_compress.c ===
145
+ * === Not exposed in libzstd. Never invoke directly ===
146
+ * ======================================================== */
147
+
148
+ /*! ZSTDMT_toFlushNow()
149
+ * Tell how many bytes are ready to be flushed immediately.
150
+ * Probe the oldest active job (not yet entirely flushed) and check its output buffer.
151
+ * If return 0, it means there is no active job,
152
+ * or, it means oldest job is still active, but everything produced has been flushed so far,
153
+ * therefore flushing is limited by speed of oldest job. */
154
+ size_t ZSTDMT_toFlushNow(ZSTDMT_CCtx* mtctx);
155
+
156
+ /*! ZSTDMT_CCtxParam_setMTCtxParameter()
157
+ * like ZSTDMT_setMTCtxParameter(), but into a ZSTD_CCtx_Params */
158
+ size_t ZSTDMT_CCtxParam_setMTCtxParameter(ZSTD_CCtx_params* params, ZSTDMT_parameter parameter, int value);
159
+
160
+ /*! ZSTDMT_CCtxParam_setNbWorkers()
161
+ * Set nbWorkers, and clamp it.
162
+ * Also reset jobSize and overlapLog */
163
+ size_t ZSTDMT_CCtxParam_setNbWorkers(ZSTD_CCtx_params* params, unsigned nbWorkers);
164
+
165
+ /*! ZSTDMT_updateCParams_whileCompressing() :
166
+ * Updates only a selected set of compression parameters, to remain compatible with current frame.
167
+ * New parameters will be applied to next compression job. */
168
+ void ZSTDMT_updateCParams_whileCompressing(ZSTDMT_CCtx* mtctx, const ZSTD_CCtx_params* cctxParams);
169
+
170
+ /*! ZSTDMT_getFrameProgression():
171
+ * tells how much data has been consumed (input) and produced (output) for current frame.
172
+ * able to count progression inside worker threads.
173
+ */
174
+ ZSTD_frameProgression ZSTDMT_getFrameProgression(ZSTDMT_CCtx* mtctx);
175
+
176
+
177
+ /*! ZSTDMT_initCStream_internal() :
178
+ * Private use only. Init streaming operation.
179
+ * expects params to be valid.
180
+ * must receive dict, or cdict, or none, but not both.
181
+ * @return : 0, or an error code */
182
+ size_t ZSTDMT_initCStream_internal(ZSTDMT_CCtx* zcs,
183
+ const void* dict, size_t dictSize, ZSTD_dictContentType_e dictContentType,
184
+ const ZSTD_CDict* cdict,
185
+ ZSTD_CCtx_params params, unsigned long long pledgedSrcSize);
186
+
187
+
188
+ #if defined (__cplusplus)
189
+ }
190
+ #endif
191
+
192
+ #endif /* ZSTDMT_COMPRESS_H */
@@ -0,0 +1,1248 @@
1
+ /* ******************************************************************
2
+ * huff0 huffman decoder,
3
+ * part of Finite State Entropy library
4
+ * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
5
+ *
6
+ * You can contact the author at :
7
+ * - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
8
+ *
9
+ * This source code is licensed under both the BSD-style license (found in the
10
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
11
+ * in the COPYING file in the root directory of this source tree).
12
+ * You may select, at your option, one of the above-listed licenses.
13
+ ****************************************************************** */
14
+
15
+ /* **************************************************************
16
+ * Dependencies
17
+ ****************************************************************/
18
+ #include <string.h> /* memcpy, memset */
19
+ #include "../common/compiler.h"
20
+ #include "../common/bitstream.h" /* BIT_* */
21
+ #include "../common/fse.h" /* to compress headers */
22
+ #define HUF_STATIC_LINKING_ONLY
23
+ #include "../common/huf.h"
24
+ #include "../common/error_private.h"
25
+
26
+ /* **************************************************************
27
+ * Macros
28
+ ****************************************************************/
29
+
30
+ /* These two optional macros force the use one way or another of the two
31
+ * Huffman decompression implementations. You can't force in both directions
32
+ * at the same time.
33
+ */
34
+ #if defined(HUF_FORCE_DECOMPRESS_X1) && \
35
+ defined(HUF_FORCE_DECOMPRESS_X2)
36
+ #error "Cannot force the use of the X1 and X2 decoders at the same time!"
37
+ #endif
38
+
39
+
40
+ /* **************************************************************
41
+ * Error Management
42
+ ****************************************************************/
43
+ #define HUF_isError ERR_isError
44
+
45
+
46
+ /* **************************************************************
47
+ * Byte alignment for workSpace management
48
+ ****************************************************************/
49
+ #define HUF_ALIGN(x, a) HUF_ALIGN_MASK((x), (a) - 1)
50
+ #define HUF_ALIGN_MASK(x, mask) (((x) + (mask)) & ~(mask))
51
+
52
+
53
+ /* **************************************************************
54
+ * BMI2 Variant Wrappers
55
+ ****************************************************************/
56
+ #if DYNAMIC_BMI2
57
+
58
+ #define HUF_DGEN(fn) \
59
+ \
60
+ static size_t fn##_default( \
61
+ void* dst, size_t dstSize, \
62
+ const void* cSrc, size_t cSrcSize, \
63
+ const HUF_DTable* DTable) \
64
+ { \
65
+ return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \
66
+ } \
67
+ \
68
+ static TARGET_ATTRIBUTE("bmi2") size_t fn##_bmi2( \
69
+ void* dst, size_t dstSize, \
70
+ const void* cSrc, size_t cSrcSize, \
71
+ const HUF_DTable* DTable) \
72
+ { \
73
+ return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \
74
+ } \
75
+ \
76
+ static size_t fn(void* dst, size_t dstSize, void const* cSrc, \
77
+ size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \
78
+ { \
79
+ if (bmi2) { \
80
+ return fn##_bmi2(dst, dstSize, cSrc, cSrcSize, DTable); \
81
+ } \
82
+ return fn##_default(dst, dstSize, cSrc, cSrcSize, DTable); \
83
+ }
84
+
85
+ #else
86
+
87
+ #define HUF_DGEN(fn) \
88
+ static size_t fn(void* dst, size_t dstSize, void const* cSrc, \
89
+ size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \
90
+ { \
91
+ (void)bmi2; \
92
+ return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \
93
+ }
94
+
95
+ #endif
96
+
97
+
98
+ /*-***************************/
99
+ /* generic DTableDesc */
100
+ /*-***************************/
101
+ typedef struct { BYTE maxTableLog; BYTE tableType; BYTE tableLog; BYTE reserved; } DTableDesc;
102
+
103
+ static DTableDesc HUF_getDTableDesc(const HUF_DTable* table)
104
+ {
105
+ DTableDesc dtd;
106
+ memcpy(&dtd, table, sizeof(dtd));
107
+ return dtd;
108
+ }
109
+
110
+
111
+ #ifndef HUF_FORCE_DECOMPRESS_X2
112
+
113
+ /*-***************************/
114
+ /* single-symbol decoding */
115
+ /*-***************************/
116
+ typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX1; /* single-symbol decoding */
117
+
118
+ size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize)
119
+ {
120
+ U32 tableLog = 0;
121
+ U32 nbSymbols = 0;
122
+ size_t iSize;
123
+ void* const dtPtr = DTable + 1;
124
+ HUF_DEltX1* const dt = (HUF_DEltX1*)dtPtr;
125
+
126
+ U32* rankVal;
127
+ BYTE* huffWeight;
128
+ size_t spaceUsed32 = 0;
129
+
130
+ rankVal = (U32 *)workSpace + spaceUsed32;
131
+ spaceUsed32 += HUF_TABLELOG_ABSOLUTEMAX + 1;
132
+ huffWeight = (BYTE *)((U32 *)workSpace + spaceUsed32);
133
+ spaceUsed32 += HUF_ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2;
134
+
135
+ if ((spaceUsed32 << 2) > wkspSize) return ERROR(tableLog_tooLarge);
136
+
137
+ DEBUG_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable));
138
+ /* memset(huffWeight, 0, sizeof(huffWeight)); */ /* is not necessary, even though some analyzer complain ... */
139
+
140
+ iSize = HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX + 1, rankVal, &nbSymbols, &tableLog, src, srcSize);
141
+ if (HUF_isError(iSize)) return iSize;
142
+
143
+ /* Table header */
144
+ { DTableDesc dtd = HUF_getDTableDesc(DTable);
145
+ if (tableLog > (U32)(dtd.maxTableLog+1)) return ERROR(tableLog_tooLarge); /* DTable too small, Huffman tree cannot fit in */
146
+ dtd.tableType = 0;
147
+ dtd.tableLog = (BYTE)tableLog;
148
+ memcpy(DTable, &dtd, sizeof(dtd));
149
+ }
150
+
151
+ /* Calculate starting value for each rank */
152
+ { U32 n, nextRankStart = 0;
153
+ for (n=1; n<tableLog+1; n++) {
154
+ U32 const current = nextRankStart;
155
+ nextRankStart += (rankVal[n] << (n-1));
156
+ rankVal[n] = current;
157
+ } }
158
+
159
+ /* fill DTable */
160
+ { U32 n;
161
+ size_t const nEnd = nbSymbols;
162
+ for (n=0; n<nEnd; n++) {
163
+ size_t const w = huffWeight[n];
164
+ size_t const length = (1 << w) >> 1;
165
+ size_t const uStart = rankVal[w];
166
+ size_t const uEnd = uStart + length;
167
+ size_t u;
168
+ HUF_DEltX1 D;
169
+ D.byte = (BYTE)n;
170
+ D.nbBits = (BYTE)(tableLog + 1 - w);
171
+ rankVal[w] = (U32)uEnd;
172
+ if (length < 4) {
173
+ /* Use length in the loop bound so the compiler knows it is short. */
174
+ for (u = 0; u < length; ++u)
175
+ dt[uStart + u] = D;
176
+ } else {
177
+ /* Unroll the loop 4 times, we know it is a power of 2. */
178
+ for (u = uStart; u < uEnd; u += 4) {
179
+ dt[u + 0] = D;
180
+ dt[u + 1] = D;
181
+ dt[u + 2] = D;
182
+ dt[u + 3] = D;
183
+ } } } }
184
+ return iSize;
185
+ }
186
+
187
+ size_t HUF_readDTableX1(HUF_DTable* DTable, const void* src, size_t srcSize)
188
+ {
189
+ U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
190
+ return HUF_readDTableX1_wksp(DTable, src, srcSize,
191
+ workSpace, sizeof(workSpace));
192
+ }
193
+
194
+ FORCE_INLINE_TEMPLATE BYTE
195
+ HUF_decodeSymbolX1(BIT_DStream_t* Dstream, const HUF_DEltX1* dt, const U32 dtLog)
196
+ {
197
+ size_t const val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */
198
+ BYTE const c = dt[val].byte;
199
+ BIT_skipBits(Dstream, dt[val].nbBits);
200
+ return c;
201
+ }
202
+
203
+ #define HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) \
204
+ *ptr++ = HUF_decodeSymbolX1(DStreamPtr, dt, dtLog)
205
+
206
+ #define HUF_DECODE_SYMBOLX1_1(ptr, DStreamPtr) \
207
+ if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
208
+ HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr)
209
+
210
+ #define HUF_DECODE_SYMBOLX1_2(ptr, DStreamPtr) \
211
+ if (MEM_64bits()) \
212
+ HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr)
213
+
214
+ HINT_INLINE size_t
215
+ HUF_decodeStreamX1(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX1* const dt, const U32 dtLog)
216
+ {
217
+ BYTE* const pStart = p;
218
+
219
+ /* up to 4 symbols at a time */
220
+ while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-3)) {
221
+ HUF_DECODE_SYMBOLX1_2(p, bitDPtr);
222
+ HUF_DECODE_SYMBOLX1_1(p, bitDPtr);
223
+ HUF_DECODE_SYMBOLX1_2(p, bitDPtr);
224
+ HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
225
+ }
226
+
227
+ /* [0-3] symbols remaining */
228
+ if (MEM_32bits())
229
+ while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd))
230
+ HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
231
+
232
+ /* no more data to retrieve from bitstream, no need to reload */
233
+ while (p < pEnd)
234
+ HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
235
+
236
+ return pEnd-pStart;
237
+ }
238
+
239
+ FORCE_INLINE_TEMPLATE size_t
240
+ HUF_decompress1X1_usingDTable_internal_body(
241
+ void* dst, size_t dstSize,
242
+ const void* cSrc, size_t cSrcSize,
243
+ const HUF_DTable* DTable)
244
+ {
245
+ BYTE* op = (BYTE*)dst;
246
+ BYTE* const oend = op + dstSize;
247
+ const void* dtPtr = DTable + 1;
248
+ const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr;
249
+ BIT_DStream_t bitD;
250
+ DTableDesc const dtd = HUF_getDTableDesc(DTable);
251
+ U32 const dtLog = dtd.tableLog;
252
+
253
+ CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) );
254
+
255
+ HUF_decodeStreamX1(op, &bitD, oend, dt, dtLog);
256
+
257
+ if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
258
+
259
+ return dstSize;
260
+ }
261
+
262
+ FORCE_INLINE_TEMPLATE size_t
263
+ HUF_decompress4X1_usingDTable_internal_body(
264
+ void* dst, size_t dstSize,
265
+ const void* cSrc, size_t cSrcSize,
266
+ const HUF_DTable* DTable)
267
+ {
268
+ /* Check */
269
+ if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */
270
+
271
+ { const BYTE* const istart = (const BYTE*) cSrc;
272
+ BYTE* const ostart = (BYTE*) dst;
273
+ BYTE* const oend = ostart + dstSize;
274
+ BYTE* const olimit = oend - 3;
275
+ const void* const dtPtr = DTable + 1;
276
+ const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr;
277
+
278
+ /* Init */
279
+ BIT_DStream_t bitD1;
280
+ BIT_DStream_t bitD2;
281
+ BIT_DStream_t bitD3;
282
+ BIT_DStream_t bitD4;
283
+ size_t const length1 = MEM_readLE16(istart);
284
+ size_t const length2 = MEM_readLE16(istart+2);
285
+ size_t const length3 = MEM_readLE16(istart+4);
286
+ size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
287
+ const BYTE* const istart1 = istart + 6; /* jumpTable */
288
+ const BYTE* const istart2 = istart1 + length1;
289
+ const BYTE* const istart3 = istart2 + length2;
290
+ const BYTE* const istart4 = istart3 + length3;
291
+ const size_t segmentSize = (dstSize+3) / 4;
292
+ BYTE* const opStart2 = ostart + segmentSize;
293
+ BYTE* const opStart3 = opStart2 + segmentSize;
294
+ BYTE* const opStart4 = opStart3 + segmentSize;
295
+ BYTE* op1 = ostart;
296
+ BYTE* op2 = opStart2;
297
+ BYTE* op3 = opStart3;
298
+ BYTE* op4 = opStart4;
299
+ DTableDesc const dtd = HUF_getDTableDesc(DTable);
300
+ U32 const dtLog = dtd.tableLog;
301
+ U32 endSignal = 1;
302
+
303
+ if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
304
+ CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
305
+ CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
306
+ CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
307
+ CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
308
+
309
+ /* up to 16 symbols per loop (4 symbols per stream) in 64-bit mode */
310
+ for ( ; (endSignal) & (op4 < olimit) ; ) {
311
+ HUF_DECODE_SYMBOLX1_2(op1, &bitD1);
312
+ HUF_DECODE_SYMBOLX1_2(op2, &bitD2);
313
+ HUF_DECODE_SYMBOLX1_2(op3, &bitD3);
314
+ HUF_DECODE_SYMBOLX1_2(op4, &bitD4);
315
+ HUF_DECODE_SYMBOLX1_1(op1, &bitD1);
316
+ HUF_DECODE_SYMBOLX1_1(op2, &bitD2);
317
+ HUF_DECODE_SYMBOLX1_1(op3, &bitD3);
318
+ HUF_DECODE_SYMBOLX1_1(op4, &bitD4);
319
+ HUF_DECODE_SYMBOLX1_2(op1, &bitD1);
320
+ HUF_DECODE_SYMBOLX1_2(op2, &bitD2);
321
+ HUF_DECODE_SYMBOLX1_2(op3, &bitD3);
322
+ HUF_DECODE_SYMBOLX1_2(op4, &bitD4);
323
+ HUF_DECODE_SYMBOLX1_0(op1, &bitD1);
324
+ HUF_DECODE_SYMBOLX1_0(op2, &bitD2);
325
+ HUF_DECODE_SYMBOLX1_0(op3, &bitD3);
326
+ HUF_DECODE_SYMBOLX1_0(op4, &bitD4);
327
+ endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished;
328
+ endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished;
329
+ endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished;
330
+ endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished;
331
+ }
332
+
333
+ /* check corruption */
334
+ /* note : should not be necessary : op# advance in lock step, and we control op4.
335
+ * but curiously, binary generated by gcc 7.2 & 7.3 with -mbmi2 runs faster when >=1 test is present */
336
+ if (op1 > opStart2) return ERROR(corruption_detected);
337
+ if (op2 > opStart3) return ERROR(corruption_detected);
338
+ if (op3 > opStart4) return ERROR(corruption_detected);
339
+ /* note : op4 supposed already verified within main loop */
340
+
341
+ /* finish bitStreams one by one */
342
+ HUF_decodeStreamX1(op1, &bitD1, opStart2, dt, dtLog);
343
+ HUF_decodeStreamX1(op2, &bitD2, opStart3, dt, dtLog);
344
+ HUF_decodeStreamX1(op3, &bitD3, opStart4, dt, dtLog);
345
+ HUF_decodeStreamX1(op4, &bitD4, oend, dt, dtLog);
346
+
347
+ /* check */
348
+ { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
349
+ if (!endCheck) return ERROR(corruption_detected); }
350
+
351
+ /* decoded size */
352
+ return dstSize;
353
+ }
354
+ }
355
+
356
+
357
+ typedef size_t (*HUF_decompress_usingDTable_t)(void *dst, size_t dstSize,
358
+ const void *cSrc,
359
+ size_t cSrcSize,
360
+ const HUF_DTable *DTable);
361
+
362
+ HUF_DGEN(HUF_decompress1X1_usingDTable_internal)
363
+ HUF_DGEN(HUF_decompress4X1_usingDTable_internal)
364
+
365
+
366
+
367
+ size_t HUF_decompress1X1_usingDTable(
368
+ void* dst, size_t dstSize,
369
+ const void* cSrc, size_t cSrcSize,
370
+ const HUF_DTable* DTable)
371
+ {
372
+ DTableDesc dtd = HUF_getDTableDesc(DTable);
373
+ if (dtd.tableType != 0) return ERROR(GENERIC);
374
+ return HUF_decompress1X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
375
+ }
376
+
377
+ size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
378
+ const void* cSrc, size_t cSrcSize,
379
+ void* workSpace, size_t wkspSize)
380
+ {
381
+ const BYTE* ip = (const BYTE*) cSrc;
382
+
383
+ size_t const hSize = HUF_readDTableX1_wksp(DCtx, cSrc, cSrcSize, workSpace, wkspSize);
384
+ if (HUF_isError(hSize)) return hSize;
385
+ if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
386
+ ip += hSize; cSrcSize -= hSize;
387
+
388
+ return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0);
389
+ }
390
+
391
+
392
+ size_t HUF_decompress1X1_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
393
+ const void* cSrc, size_t cSrcSize)
394
+ {
395
+ U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
396
+ return HUF_decompress1X1_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
397
+ workSpace, sizeof(workSpace));
398
+ }
399
+
400
+ size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
401
+ {
402
+ HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX);
403
+ return HUF_decompress1X1_DCtx (DTable, dst, dstSize, cSrc, cSrcSize);
404
+ }
405
+
406
+ size_t HUF_decompress4X1_usingDTable(
407
+ void* dst, size_t dstSize,
408
+ const void* cSrc, size_t cSrcSize,
409
+ const HUF_DTable* DTable)
410
+ {
411
+ DTableDesc dtd = HUF_getDTableDesc(DTable);
412
+ if (dtd.tableType != 0) return ERROR(GENERIC);
413
+ return HUF_decompress4X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
414
+ }
415
+
416
+ static size_t HUF_decompress4X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize,
417
+ const void* cSrc, size_t cSrcSize,
418
+ void* workSpace, size_t wkspSize, int bmi2)
419
+ {
420
+ const BYTE* ip = (const BYTE*) cSrc;
421
+
422
+ size_t const hSize = HUF_readDTableX1_wksp (dctx, cSrc, cSrcSize,
423
+ workSpace, wkspSize);
424
+ if (HUF_isError(hSize)) return hSize;
425
+ if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
426
+ ip += hSize; cSrcSize -= hSize;
427
+
428
+ return HUF_decompress4X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
429
+ }
430
+
431
+ size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
432
+ const void* cSrc, size_t cSrcSize,
433
+ void* workSpace, size_t wkspSize)
434
+ {
435
+ return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, 0);
436
+ }
437
+
438
+
439
+ size_t HUF_decompress4X1_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
440
+ {
441
+ U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
442
+ return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
443
+ workSpace, sizeof(workSpace));
444
+ }
445
+ size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
446
+ {
447
+ HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX);
448
+ return HUF_decompress4X1_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
449
+ }
450
+
451
+ #endif /* HUF_FORCE_DECOMPRESS_X2 */
452
+
453
+
454
+ #ifndef HUF_FORCE_DECOMPRESS_X1
455
+
456
+ /* *************************/
457
+ /* double-symbols decoding */
458
+ /* *************************/
459
+
460
+ typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX2; /* double-symbols decoding */
461
+ typedef struct { BYTE symbol; BYTE weight; } sortedSymbol_t;
462
+ typedef U32 rankValCol_t[HUF_TABLELOG_MAX + 1];
463
+ typedef rankValCol_t rankVal_t[HUF_TABLELOG_MAX];
464
+
465
+
466
+ /* HUF_fillDTableX2Level2() :
467
+ * `rankValOrigin` must be a table of at least (HUF_TABLELOG_MAX + 1) U32 */
468
+ static void HUF_fillDTableX2Level2(HUF_DEltX2* DTable, U32 sizeLog, const U32 consumed,
469
+ const U32* rankValOrigin, const int minWeight,
470
+ const sortedSymbol_t* sortedSymbols, const U32 sortedListSize,
471
+ U32 nbBitsBaseline, U16 baseSeq)
472
+ {
473
+ HUF_DEltX2 DElt;
474
+ U32 rankVal[HUF_TABLELOG_MAX + 1];
475
+
476
+ /* get pre-calculated rankVal */
477
+ memcpy(rankVal, rankValOrigin, sizeof(rankVal));
478
+
479
+ /* fill skipped values */
480
+ if (minWeight>1) {
481
+ U32 i, skipSize = rankVal[minWeight];
482
+ MEM_writeLE16(&(DElt.sequence), baseSeq);
483
+ DElt.nbBits = (BYTE)(consumed);
484
+ DElt.length = 1;
485
+ for (i = 0; i < skipSize; i++)
486
+ DTable[i] = DElt;
487
+ }
488
+
489
+ /* fill DTable */
490
+ { U32 s; for (s=0; s<sortedListSize; s++) { /* note : sortedSymbols already skipped */
491
+ const U32 symbol = sortedSymbols[s].symbol;
492
+ const U32 weight = sortedSymbols[s].weight;
493
+ const U32 nbBits = nbBitsBaseline - weight;
494
+ const U32 length = 1 << (sizeLog-nbBits);
495
+ const U32 start = rankVal[weight];
496
+ U32 i = start;
497
+ const U32 end = start + length;
498
+
499
+ MEM_writeLE16(&(DElt.sequence), (U16)(baseSeq + (symbol << 8)));
500
+ DElt.nbBits = (BYTE)(nbBits + consumed);
501
+ DElt.length = 2;
502
+ do { DTable[i++] = DElt; } while (i<end); /* since length >= 1 */
503
+
504
+ rankVal[weight] += length;
505
+ } }
506
+ }
507
+
508
+
509
+ static void HUF_fillDTableX2(HUF_DEltX2* DTable, const U32 targetLog,
510
+ const sortedSymbol_t* sortedList, const U32 sortedListSize,
511
+ const U32* rankStart, rankVal_t rankValOrigin, const U32 maxWeight,
512
+ const U32 nbBitsBaseline)
513
+ {
514
+ U32 rankVal[HUF_TABLELOG_MAX + 1];
515
+ const int scaleLog = nbBitsBaseline - targetLog; /* note : targetLog >= srcLog, hence scaleLog <= 1 */
516
+ const U32 minBits = nbBitsBaseline - maxWeight;
517
+ U32 s;
518
+
519
+ memcpy(rankVal, rankValOrigin, sizeof(rankVal));
520
+
521
+ /* fill DTable */
522
+ for (s=0; s<sortedListSize; s++) {
523
+ const U16 symbol = sortedList[s].symbol;
524
+ const U32 weight = sortedList[s].weight;
525
+ const U32 nbBits = nbBitsBaseline - weight;
526
+ const U32 start = rankVal[weight];
527
+ const U32 length = 1 << (targetLog-nbBits);
528
+
529
+ if (targetLog-nbBits >= minBits) { /* enough room for a second symbol */
530
+ U32 sortedRank;
531
+ int minWeight = nbBits + scaleLog;
532
+ if (minWeight < 1) minWeight = 1;
533
+ sortedRank = rankStart[minWeight];
534
+ HUF_fillDTableX2Level2(DTable+start, targetLog-nbBits, nbBits,
535
+ rankValOrigin[nbBits], minWeight,
536
+ sortedList+sortedRank, sortedListSize-sortedRank,
537
+ nbBitsBaseline, symbol);
538
+ } else {
539
+ HUF_DEltX2 DElt;
540
+ MEM_writeLE16(&(DElt.sequence), symbol);
541
+ DElt.nbBits = (BYTE)(nbBits);
542
+ DElt.length = 1;
543
+ { U32 const end = start + length;
544
+ U32 u;
545
+ for (u = start; u < end; u++) DTable[u] = DElt;
546
+ } }
547
+ rankVal[weight] += length;
548
+ }
549
+ }
550
+
551
+ size_t HUF_readDTableX2_wksp(HUF_DTable* DTable,
552
+ const void* src, size_t srcSize,
553
+ void* workSpace, size_t wkspSize)
554
+ {
555
+ U32 tableLog, maxW, sizeOfSort, nbSymbols;
556
+ DTableDesc dtd = HUF_getDTableDesc(DTable);
557
+ U32 const maxTableLog = dtd.maxTableLog;
558
+ size_t iSize;
559
+ void* dtPtr = DTable+1; /* force compiler to avoid strict-aliasing */
560
+ HUF_DEltX2* const dt = (HUF_DEltX2*)dtPtr;
561
+ U32 *rankStart;
562
+
563
+ rankValCol_t* rankVal;
564
+ U32* rankStats;
565
+ U32* rankStart0;
566
+ sortedSymbol_t* sortedSymbol;
567
+ BYTE* weightList;
568
+ size_t spaceUsed32 = 0;
569
+
570
+ rankVal = (rankValCol_t *)((U32 *)workSpace + spaceUsed32);
571
+ spaceUsed32 += (sizeof(rankValCol_t) * HUF_TABLELOG_MAX) >> 2;
572
+ rankStats = (U32 *)workSpace + spaceUsed32;
573
+ spaceUsed32 += HUF_TABLELOG_MAX + 1;
574
+ rankStart0 = (U32 *)workSpace + spaceUsed32;
575
+ spaceUsed32 += HUF_TABLELOG_MAX + 2;
576
+ sortedSymbol = (sortedSymbol_t *)workSpace + (spaceUsed32 * sizeof(U32)) / sizeof(sortedSymbol_t);
577
+ spaceUsed32 += HUF_ALIGN(sizeof(sortedSymbol_t) * (HUF_SYMBOLVALUE_MAX + 1), sizeof(U32)) >> 2;
578
+ weightList = (BYTE *)((U32 *)workSpace + spaceUsed32);
579
+ spaceUsed32 += HUF_ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2;
580
+
581
+ if ((spaceUsed32 << 2) > wkspSize) return ERROR(tableLog_tooLarge);
582
+
583
+ rankStart = rankStart0 + 1;
584
+ memset(rankStats, 0, sizeof(U32) * (2 * HUF_TABLELOG_MAX + 2 + 1));
585
+
586
+ DEBUG_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(HUF_DTable)); /* if compiler fails here, assertion is wrong */
587
+ if (maxTableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
588
+ /* memset(weightList, 0, sizeof(weightList)); */ /* is not necessary, even though some analyzer complain ... */
589
+
590
+ iSize = HUF_readStats(weightList, HUF_SYMBOLVALUE_MAX + 1, rankStats, &nbSymbols, &tableLog, src, srcSize);
591
+ if (HUF_isError(iSize)) return iSize;
592
+
593
+ /* check result */
594
+ if (tableLog > maxTableLog) return ERROR(tableLog_tooLarge); /* DTable can't fit code depth */
595
+
596
+ /* find maxWeight */
597
+ for (maxW = tableLog; rankStats[maxW]==0; maxW--) {} /* necessarily finds a solution before 0 */
598
+
599
+ /* Get start index of each weight */
600
+ { U32 w, nextRankStart = 0;
601
+ for (w=1; w<maxW+1; w++) {
602
+ U32 current = nextRankStart;
603
+ nextRankStart += rankStats[w];
604
+ rankStart[w] = current;
605
+ }
606
+ rankStart[0] = nextRankStart; /* put all 0w symbols at the end of sorted list*/
607
+ sizeOfSort = nextRankStart;
608
+ }
609
+
610
+ /* sort symbols by weight */
611
+ { U32 s;
612
+ for (s=0; s<nbSymbols; s++) {
613
+ U32 const w = weightList[s];
614
+ U32 const r = rankStart[w]++;
615
+ sortedSymbol[r].symbol = (BYTE)s;
616
+ sortedSymbol[r].weight = (BYTE)w;
617
+ }
618
+ rankStart[0] = 0; /* forget 0w symbols; this is beginning of weight(1) */
619
+ }
620
+
621
+ /* Build rankVal */
622
+ { U32* const rankVal0 = rankVal[0];
623
+ { int const rescale = (maxTableLog-tableLog) - 1; /* tableLog <= maxTableLog */
624
+ U32 nextRankVal = 0;
625
+ U32 w;
626
+ for (w=1; w<maxW+1; w++) {
627
+ U32 current = nextRankVal;
628
+ nextRankVal += rankStats[w] << (w+rescale);
629
+ rankVal0[w] = current;
630
+ } }
631
+ { U32 const minBits = tableLog+1 - maxW;
632
+ U32 consumed;
633
+ for (consumed = minBits; consumed < maxTableLog - minBits + 1; consumed++) {
634
+ U32* const rankValPtr = rankVal[consumed];
635
+ U32 w;
636
+ for (w = 1; w < maxW+1; w++) {
637
+ rankValPtr[w] = rankVal0[w] >> consumed;
638
+ } } } }
639
+
640
+ HUF_fillDTableX2(dt, maxTableLog,
641
+ sortedSymbol, sizeOfSort,
642
+ rankStart0, rankVal, maxW,
643
+ tableLog+1);
644
+
645
+ dtd.tableLog = (BYTE)maxTableLog;
646
+ dtd.tableType = 1;
647
+ memcpy(DTable, &dtd, sizeof(dtd));
648
+ return iSize;
649
+ }
650
+
651
+ size_t HUF_readDTableX2(HUF_DTable* DTable, const void* src, size_t srcSize)
652
+ {
653
+ U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
654
+ return HUF_readDTableX2_wksp(DTable, src, srcSize,
655
+ workSpace, sizeof(workSpace));
656
+ }
657
+
658
+
659
+ FORCE_INLINE_TEMPLATE U32
660
+ HUF_decodeSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog)
661
+ {
662
+ size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
663
+ memcpy(op, dt+val, 2);
664
+ BIT_skipBits(DStream, dt[val].nbBits);
665
+ return dt[val].length;
666
+ }
667
+
668
+ FORCE_INLINE_TEMPLATE U32
669
+ HUF_decodeLastSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog)
670
+ {
671
+ size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
672
+ memcpy(op, dt+val, 1);
673
+ if (dt[val].length==1) BIT_skipBits(DStream, dt[val].nbBits);
674
+ else {
675
+ if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) {
676
+ BIT_skipBits(DStream, dt[val].nbBits);
677
+ if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8))
678
+ /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */
679
+ DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8);
680
+ } }
681
+ return 1;
682
+ }
683
+
684
+ #define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \
685
+ ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
686
+
687
+ #define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \
688
+ if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
689
+ ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
690
+
691
+ #define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \
692
+ if (MEM_64bits()) \
693
+ ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
694
+
695
+ HINT_INLINE size_t
696
+ HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd,
697
+ const HUF_DEltX2* const dt, const U32 dtLog)
698
+ {
699
+ BYTE* const pStart = p;
700
+
701
+ /* up to 8 symbols at a time */
702
+ while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-(sizeof(bitDPtr->bitContainer)-1))) {
703
+ HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
704
+ HUF_DECODE_SYMBOLX2_1(p, bitDPtr);
705
+ HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
706
+ HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
707
+ }
708
+
709
+ /* closer to end : up to 2 symbols at a time */
710
+ while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p <= pEnd-2))
711
+ HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
712
+
713
+ while (p <= pEnd-2)
714
+ HUF_DECODE_SYMBOLX2_0(p, bitDPtr); /* no need to reload : reached the end of DStream */
715
+
716
+ if (p < pEnd)
717
+ p += HUF_decodeLastSymbolX2(p, bitDPtr, dt, dtLog);
718
+
719
+ return p-pStart;
720
+ }
721
+
722
+ FORCE_INLINE_TEMPLATE size_t
723
+ HUF_decompress1X2_usingDTable_internal_body(
724
+ void* dst, size_t dstSize,
725
+ const void* cSrc, size_t cSrcSize,
726
+ const HUF_DTable* DTable)
727
+ {
728
+ BIT_DStream_t bitD;
729
+
730
+ /* Init */
731
+ CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) );
732
+
733
+ /* decode */
734
+ { BYTE* const ostart = (BYTE*) dst;
735
+ BYTE* const oend = ostart + dstSize;
736
+ const void* const dtPtr = DTable+1; /* force compiler to not use strict-aliasing */
737
+ const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
738
+ DTableDesc const dtd = HUF_getDTableDesc(DTable);
739
+ HUF_decodeStreamX2(ostart, &bitD, oend, dt, dtd.tableLog);
740
+ }
741
+
742
+ /* check */
743
+ if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
744
+
745
+ /* decoded size */
746
+ return dstSize;
747
+ }
748
+
749
+ FORCE_INLINE_TEMPLATE size_t
750
+ HUF_decompress4X2_usingDTable_internal_body(
751
+ void* dst, size_t dstSize,
752
+ const void* cSrc, size_t cSrcSize,
753
+ const HUF_DTable* DTable)
754
+ {
755
+ if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */
756
+
757
+ { const BYTE* const istart = (const BYTE*) cSrc;
758
+ BYTE* const ostart = (BYTE*) dst;
759
+ BYTE* const oend = ostart + dstSize;
760
+ BYTE* const olimit = oend - (sizeof(size_t)-1);
761
+ const void* const dtPtr = DTable+1;
762
+ const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
763
+
764
+ /* Init */
765
+ BIT_DStream_t bitD1;
766
+ BIT_DStream_t bitD2;
767
+ BIT_DStream_t bitD3;
768
+ BIT_DStream_t bitD4;
769
+ size_t const length1 = MEM_readLE16(istart);
770
+ size_t const length2 = MEM_readLE16(istart+2);
771
+ size_t const length3 = MEM_readLE16(istart+4);
772
+ size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
773
+ const BYTE* const istart1 = istart + 6; /* jumpTable */
774
+ const BYTE* const istart2 = istart1 + length1;
775
+ const BYTE* const istart3 = istart2 + length2;
776
+ const BYTE* const istart4 = istart3 + length3;
777
+ size_t const segmentSize = (dstSize+3) / 4;
778
+ BYTE* const opStart2 = ostart + segmentSize;
779
+ BYTE* const opStart3 = opStart2 + segmentSize;
780
+ BYTE* const opStart4 = opStart3 + segmentSize;
781
+ BYTE* op1 = ostart;
782
+ BYTE* op2 = opStart2;
783
+ BYTE* op3 = opStart3;
784
+ BYTE* op4 = opStart4;
785
+ U32 endSignal = 1;
786
+ DTableDesc const dtd = HUF_getDTableDesc(DTable);
787
+ U32 const dtLog = dtd.tableLog;
788
+
789
+ if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
790
+ CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
791
+ CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
792
+ CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
793
+ CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
794
+
795
+ /* 16-32 symbols per loop (4-8 symbols per stream) */
796
+ for ( ; (endSignal) & (op4 < olimit); ) {
797
+ #if defined(__clang__) && (defined(__x86_64__) || defined(__i386__))
798
+ HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
799
+ HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
800
+ HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
801
+ HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
802
+ HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
803
+ HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
804
+ HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
805
+ HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
806
+ endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished;
807
+ endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished;
808
+ HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
809
+ HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
810
+ HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
811
+ HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
812
+ HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
813
+ HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
814
+ HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
815
+ HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
816
+ endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished;
817
+ endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished;
818
+ #else
819
+ HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
820
+ HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
821
+ HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
822
+ HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
823
+ HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
824
+ HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
825
+ HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
826
+ HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
827
+ HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
828
+ HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
829
+ HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
830
+ HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
831
+ HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
832
+ HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
833
+ HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
834
+ HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
835
+ endSignal = (U32)LIKELY(
836
+ (BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished)
837
+ & (BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished)
838
+ & (BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished)
839
+ & (BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished));
840
+ #endif
841
+ }
842
+
843
+ /* check corruption */
844
+ if (op1 > opStart2) return ERROR(corruption_detected);
845
+ if (op2 > opStart3) return ERROR(corruption_detected);
846
+ if (op3 > opStart4) return ERROR(corruption_detected);
847
+ /* note : op4 already verified within main loop */
848
+
849
+ /* finish bitStreams one by one */
850
+ HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog);
851
+ HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog);
852
+ HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog);
853
+ HUF_decodeStreamX2(op4, &bitD4, oend, dt, dtLog);
854
+
855
+ /* check */
856
+ { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
857
+ if (!endCheck) return ERROR(corruption_detected); }
858
+
859
+ /* decoded size */
860
+ return dstSize;
861
+ }
862
+ }
863
+
864
+ HUF_DGEN(HUF_decompress1X2_usingDTable_internal)
865
+ HUF_DGEN(HUF_decompress4X2_usingDTable_internal)
866
+
867
+ size_t HUF_decompress1X2_usingDTable(
868
+ void* dst, size_t dstSize,
869
+ const void* cSrc, size_t cSrcSize,
870
+ const HUF_DTable* DTable)
871
+ {
872
+ DTableDesc dtd = HUF_getDTableDesc(DTable);
873
+ if (dtd.tableType != 1) return ERROR(GENERIC);
874
+ return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
875
+ }
876
+
877
+ size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
878
+ const void* cSrc, size_t cSrcSize,
879
+ void* workSpace, size_t wkspSize)
880
+ {
881
+ const BYTE* ip = (const BYTE*) cSrc;
882
+
883
+ size_t const hSize = HUF_readDTableX2_wksp(DCtx, cSrc, cSrcSize,
884
+ workSpace, wkspSize);
885
+ if (HUF_isError(hSize)) return hSize;
886
+ if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
887
+ ip += hSize; cSrcSize -= hSize;
888
+
889
+ return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0);
890
+ }
891
+
892
+
893
+ size_t HUF_decompress1X2_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
894
+ const void* cSrc, size_t cSrcSize)
895
+ {
896
+ U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
897
+ return HUF_decompress1X2_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
898
+ workSpace, sizeof(workSpace));
899
+ }
900
+
901
+ size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
902
+ {
903
+ HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
904
+ return HUF_decompress1X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
905
+ }
906
+
907
+ size_t HUF_decompress4X2_usingDTable(
908
+ void* dst, size_t dstSize,
909
+ const void* cSrc, size_t cSrcSize,
910
+ const HUF_DTable* DTable)
911
+ {
912
+ DTableDesc dtd = HUF_getDTableDesc(DTable);
913
+ if (dtd.tableType != 1) return ERROR(GENERIC);
914
+ return HUF_decompress4X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
915
+ }
916
+
917
+ static size_t HUF_decompress4X2_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize,
918
+ const void* cSrc, size_t cSrcSize,
919
+ void* workSpace, size_t wkspSize, int bmi2)
920
+ {
921
+ const BYTE* ip = (const BYTE*) cSrc;
922
+
923
+ size_t hSize = HUF_readDTableX2_wksp(dctx, cSrc, cSrcSize,
924
+ workSpace, wkspSize);
925
+ if (HUF_isError(hSize)) return hSize;
926
+ if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
927
+ ip += hSize; cSrcSize -= hSize;
928
+
929
+ return HUF_decompress4X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
930
+ }
931
+
932
+ size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
933
+ const void* cSrc, size_t cSrcSize,
934
+ void* workSpace, size_t wkspSize)
935
+ {
936
+ return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, /* bmi2 */ 0);
937
+ }
938
+
939
+
940
+ size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,
941
+ const void* cSrc, size_t cSrcSize)
942
+ {
943
+ U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
944
+ return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
945
+ workSpace, sizeof(workSpace));
946
+ }
947
+
948
+ size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
949
+ {
950
+ HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
951
+ return HUF_decompress4X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
952
+ }
953
+
954
+ #endif /* HUF_FORCE_DECOMPRESS_X1 */
955
+
956
+
957
+ /* ***********************************/
958
+ /* Universal decompression selectors */
959
+ /* ***********************************/
960
+
961
+ size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize,
962
+ const void* cSrc, size_t cSrcSize,
963
+ const HUF_DTable* DTable)
964
+ {
965
+ DTableDesc const dtd = HUF_getDTableDesc(DTable);
966
+ #if defined(HUF_FORCE_DECOMPRESS_X1)
967
+ (void)dtd;
968
+ assert(dtd.tableType == 0);
969
+ return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
970
+ #elif defined(HUF_FORCE_DECOMPRESS_X2)
971
+ (void)dtd;
972
+ assert(dtd.tableType == 1);
973
+ return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
974
+ #else
975
+ return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
976
+ HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
977
+ #endif
978
+ }
979
+
980
+ size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize,
981
+ const void* cSrc, size_t cSrcSize,
982
+ const HUF_DTable* DTable)
983
+ {
984
+ DTableDesc const dtd = HUF_getDTableDesc(DTable);
985
+ #if defined(HUF_FORCE_DECOMPRESS_X1)
986
+ (void)dtd;
987
+ assert(dtd.tableType == 0);
988
+ return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
989
+ #elif defined(HUF_FORCE_DECOMPRESS_X2)
990
+ (void)dtd;
991
+ assert(dtd.tableType == 1);
992
+ return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
993
+ #else
994
+ return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
995
+ HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
996
+ #endif
997
+ }
998
+
999
+
1000
+ #if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2)
1001
+ typedef struct { U32 tableTime; U32 decode256Time; } algo_time_t;
1002
+ static const algo_time_t algoTime[16 /* Quantization */][3 /* single, double, quad */] =
1003
+ {
1004
+ /* single, double, quad */
1005
+ {{0,0}, {1,1}, {2,2}}, /* Q==0 : impossible */
1006
+ {{0,0}, {1,1}, {2,2}}, /* Q==1 : impossible */
1007
+ {{ 38,130}, {1313, 74}, {2151, 38}}, /* Q == 2 : 12-18% */
1008
+ {{ 448,128}, {1353, 74}, {2238, 41}}, /* Q == 3 : 18-25% */
1009
+ {{ 556,128}, {1353, 74}, {2238, 47}}, /* Q == 4 : 25-32% */
1010
+ {{ 714,128}, {1418, 74}, {2436, 53}}, /* Q == 5 : 32-38% */
1011
+ {{ 883,128}, {1437, 74}, {2464, 61}}, /* Q == 6 : 38-44% */
1012
+ {{ 897,128}, {1515, 75}, {2622, 68}}, /* Q == 7 : 44-50% */
1013
+ {{ 926,128}, {1613, 75}, {2730, 75}}, /* Q == 8 : 50-56% */
1014
+ {{ 947,128}, {1729, 77}, {3359, 77}}, /* Q == 9 : 56-62% */
1015
+ {{1107,128}, {2083, 81}, {4006, 84}}, /* Q ==10 : 62-69% */
1016
+ {{1177,128}, {2379, 87}, {4785, 88}}, /* Q ==11 : 69-75% */
1017
+ {{1242,128}, {2415, 93}, {5155, 84}}, /* Q ==12 : 75-81% */
1018
+ {{1349,128}, {2644,106}, {5260,106}}, /* Q ==13 : 81-87% */
1019
+ {{1455,128}, {2422,124}, {4174,124}}, /* Q ==14 : 87-93% */
1020
+ {{ 722,128}, {1891,145}, {1936,146}}, /* Q ==15 : 93-99% */
1021
+ };
1022
+ #endif
1023
+
1024
+ /** HUF_selectDecoder() :
1025
+ * Tells which decoder is likely to decode faster,
1026
+ * based on a set of pre-computed metrics.
1027
+ * @return : 0==HUF_decompress4X1, 1==HUF_decompress4X2 .
1028
+ * Assumption : 0 < dstSize <= 128 KB */
1029
+ U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize)
1030
+ {
1031
+ assert(dstSize > 0);
1032
+ assert(dstSize <= 128*1024);
1033
+ #if defined(HUF_FORCE_DECOMPRESS_X1)
1034
+ (void)dstSize;
1035
+ (void)cSrcSize;
1036
+ return 0;
1037
+ #elif defined(HUF_FORCE_DECOMPRESS_X2)
1038
+ (void)dstSize;
1039
+ (void)cSrcSize;
1040
+ return 1;
1041
+ #else
1042
+ /* decoder timing evaluation */
1043
+ { U32 const Q = (cSrcSize >= dstSize) ? 15 : (U32)(cSrcSize * 16 / dstSize); /* Q < 16 */
1044
+ U32 const D256 = (U32)(dstSize >> 8);
1045
+ U32 const DTime0 = algoTime[Q][0].tableTime + (algoTime[Q][0].decode256Time * D256);
1046
+ U32 DTime1 = algoTime[Q][1].tableTime + (algoTime[Q][1].decode256Time * D256);
1047
+ DTime1 += DTime1 >> 3; /* advantage to algorithm using less memory, to reduce cache eviction */
1048
+ return DTime1 < DTime0;
1049
+ }
1050
+ #endif
1051
+ }
1052
+
1053
+
1054
+ typedef size_t (*decompressionAlgo)(void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);
1055
+
1056
+ size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
1057
+ {
1058
+ #if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2)
1059
+ static const decompressionAlgo decompress[2] = { HUF_decompress4X1, HUF_decompress4X2 };
1060
+ #endif
1061
+
1062
+ /* validation checks */
1063
+ if (dstSize == 0) return ERROR(dstSize_tooSmall);
1064
+ if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */
1065
+ if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */
1066
+ if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */
1067
+
1068
+ { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1069
+ #if defined(HUF_FORCE_DECOMPRESS_X1)
1070
+ (void)algoNb;
1071
+ assert(algoNb == 0);
1072
+ return HUF_decompress4X1(dst, dstSize, cSrc, cSrcSize);
1073
+ #elif defined(HUF_FORCE_DECOMPRESS_X2)
1074
+ (void)algoNb;
1075
+ assert(algoNb == 1);
1076
+ return HUF_decompress4X2(dst, dstSize, cSrc, cSrcSize);
1077
+ #else
1078
+ return decompress[algoNb](dst, dstSize, cSrc, cSrcSize);
1079
+ #endif
1080
+ }
1081
+ }
1082
+
1083
+ size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
1084
+ {
1085
+ /* validation checks */
1086
+ if (dstSize == 0) return ERROR(dstSize_tooSmall);
1087
+ if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */
1088
+ if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */
1089
+ if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */
1090
+
1091
+ { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1092
+ #if defined(HUF_FORCE_DECOMPRESS_X1)
1093
+ (void)algoNb;
1094
+ assert(algoNb == 0);
1095
+ return HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize);
1096
+ #elif defined(HUF_FORCE_DECOMPRESS_X2)
1097
+ (void)algoNb;
1098
+ assert(algoNb == 1);
1099
+ return HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize);
1100
+ #else
1101
+ return algoNb ? HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) :
1102
+ HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ;
1103
+ #endif
1104
+ }
1105
+ }
1106
+
1107
+ size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
1108
+ {
1109
+ U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
1110
+ return HUF_decompress4X_hufOnly_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
1111
+ workSpace, sizeof(workSpace));
1112
+ }
1113
+
1114
+
1115
+ size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst,
1116
+ size_t dstSize, const void* cSrc,
1117
+ size_t cSrcSize, void* workSpace,
1118
+ size_t wkspSize)
1119
+ {
1120
+ /* validation checks */
1121
+ if (dstSize == 0) return ERROR(dstSize_tooSmall);
1122
+ if (cSrcSize == 0) return ERROR(corruption_detected);
1123
+
1124
+ { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1125
+ #if defined(HUF_FORCE_DECOMPRESS_X1)
1126
+ (void)algoNb;
1127
+ assert(algoNb == 0);
1128
+ return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
1129
+ #elif defined(HUF_FORCE_DECOMPRESS_X2)
1130
+ (void)algoNb;
1131
+ assert(algoNb == 1);
1132
+ return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
1133
+ #else
1134
+ return algoNb ? HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
1135
+ cSrcSize, workSpace, wkspSize):
1136
+ HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
1137
+ #endif
1138
+ }
1139
+ }
1140
+
1141
+ size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
1142
+ const void* cSrc, size_t cSrcSize,
1143
+ void* workSpace, size_t wkspSize)
1144
+ {
1145
+ /* validation checks */
1146
+ if (dstSize == 0) return ERROR(dstSize_tooSmall);
1147
+ if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */
1148
+ if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */
1149
+ if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */
1150
+
1151
+ { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1152
+ #if defined(HUF_FORCE_DECOMPRESS_X1)
1153
+ (void)algoNb;
1154
+ assert(algoNb == 0);
1155
+ return HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc,
1156
+ cSrcSize, workSpace, wkspSize);
1157
+ #elif defined(HUF_FORCE_DECOMPRESS_X2)
1158
+ (void)algoNb;
1159
+ assert(algoNb == 1);
1160
+ return HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
1161
+ cSrcSize, workSpace, wkspSize);
1162
+ #else
1163
+ return algoNb ? HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
1164
+ cSrcSize, workSpace, wkspSize):
1165
+ HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc,
1166
+ cSrcSize, workSpace, wkspSize);
1167
+ #endif
1168
+ }
1169
+ }
1170
+
1171
+ size_t HUF_decompress1X_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,
1172
+ const void* cSrc, size_t cSrcSize)
1173
+ {
1174
+ U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
1175
+ return HUF_decompress1X_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
1176
+ workSpace, sizeof(workSpace));
1177
+ }
1178
+
1179
+
1180
+ size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2)
1181
+ {
1182
+ DTableDesc const dtd = HUF_getDTableDesc(DTable);
1183
+ #if defined(HUF_FORCE_DECOMPRESS_X1)
1184
+ (void)dtd;
1185
+ assert(dtd.tableType == 0);
1186
+ return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1187
+ #elif defined(HUF_FORCE_DECOMPRESS_X2)
1188
+ (void)dtd;
1189
+ assert(dtd.tableType == 1);
1190
+ return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1191
+ #else
1192
+ return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) :
1193
+ HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1194
+ #endif
1195
+ }
1196
+
1197
+ #ifndef HUF_FORCE_DECOMPRESS_X2
1198
+ size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2)
1199
+ {
1200
+ const BYTE* ip = (const BYTE*) cSrc;
1201
+
1202
+ size_t const hSize = HUF_readDTableX1_wksp(dctx, cSrc, cSrcSize, workSpace, wkspSize);
1203
+ if (HUF_isError(hSize)) return hSize;
1204
+ if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
1205
+ ip += hSize; cSrcSize -= hSize;
1206
+
1207
+ return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
1208
+ }
1209
+ #endif
1210
+
1211
+ size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2)
1212
+ {
1213
+ DTableDesc const dtd = HUF_getDTableDesc(DTable);
1214
+ #if defined(HUF_FORCE_DECOMPRESS_X1)
1215
+ (void)dtd;
1216
+ assert(dtd.tableType == 0);
1217
+ return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1218
+ #elif defined(HUF_FORCE_DECOMPRESS_X2)
1219
+ (void)dtd;
1220
+ assert(dtd.tableType == 1);
1221
+ return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1222
+ #else
1223
+ return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) :
1224
+ HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1225
+ #endif
1226
+ }
1227
+
1228
+ size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2)
1229
+ {
1230
+ /* validation checks */
1231
+ if (dstSize == 0) return ERROR(dstSize_tooSmall);
1232
+ if (cSrcSize == 0) return ERROR(corruption_detected);
1233
+
1234
+ { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1235
+ #if defined(HUF_FORCE_DECOMPRESS_X1)
1236
+ (void)algoNb;
1237
+ assert(algoNb == 0);
1238
+ return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
1239
+ #elif defined(HUF_FORCE_DECOMPRESS_X2)
1240
+ (void)algoNb;
1241
+ assert(algoNb == 1);
1242
+ return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
1243
+ #else
1244
+ return algoNb ? HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2) :
1245
+ HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
1246
+ #endif
1247
+ }
1248
+ }