zstdlib 0.1.0-x64-mingw32

Sign up to get free protection for your applications and to get access to all the features.
Files changed (104) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGES.md +9 -0
  3. data/Gemfile +3 -0
  4. data/README.md +96 -0
  5. data/Rakefile +29 -0
  6. data/ext/zstdlib/extconf.rb +53 -0
  7. data/ext/zstdlib/ruby/zlib-2.2/zlib.c +4659 -0
  8. data/ext/zstdlib/ruby/zlib-2.3/zlib.c +4686 -0
  9. data/ext/zstdlib/ruby/zlib-2.4/zlib.c +4843 -0
  10. data/ext/zstdlib/ruby/zlib-2.5/zlib.c +4848 -0
  11. data/ext/zstdlib/ruby/zlib-2.6/zlib.c +4890 -0
  12. data/ext/zstdlib/zlib-1.2.11/adler32.c +186 -0
  13. data/ext/zstdlib/zlib-1.2.11/compress.c +86 -0
  14. data/ext/zstdlib/zlib-1.2.11/crc32.c +442 -0
  15. data/ext/zstdlib/zlib-1.2.11/crc32.h +441 -0
  16. data/ext/zstdlib/zlib-1.2.11/deflate.c +2163 -0
  17. data/ext/zstdlib/zlib-1.2.11/deflate.h +349 -0
  18. data/ext/zstdlib/zlib-1.2.11/gzclose.c +25 -0
  19. data/ext/zstdlib/zlib-1.2.11/gzguts.h +218 -0
  20. data/ext/zstdlib/zlib-1.2.11/gzlib.c +637 -0
  21. data/ext/zstdlib/zlib-1.2.11/gzread.c +654 -0
  22. data/ext/zstdlib/zlib-1.2.11/gzwrite.c +665 -0
  23. data/ext/zstdlib/zlib-1.2.11/infback.c +640 -0
  24. data/ext/zstdlib/zlib-1.2.11/inffast.c +323 -0
  25. data/ext/zstdlib/zlib-1.2.11/inffast.h +11 -0
  26. data/ext/zstdlib/zlib-1.2.11/inffixed.h +94 -0
  27. data/ext/zstdlib/zlib-1.2.11/inflate.c +1561 -0
  28. data/ext/zstdlib/zlib-1.2.11/inflate.h +125 -0
  29. data/ext/zstdlib/zlib-1.2.11/inftrees.c +304 -0
  30. data/ext/zstdlib/zlib-1.2.11/inftrees.h +62 -0
  31. data/ext/zstdlib/zlib-1.2.11/trees.c +1203 -0
  32. data/ext/zstdlib/zlib-1.2.11/trees.h +128 -0
  33. data/ext/zstdlib/zlib-1.2.11/uncompr.c +93 -0
  34. data/ext/zstdlib/zlib-1.2.11/zconf.h +534 -0
  35. data/ext/zstdlib/zlib-1.2.11/zlib.h +1912 -0
  36. data/ext/zstdlib/zlib-1.2.11/zutil.c +325 -0
  37. data/ext/zstdlib/zlib-1.2.11/zutil.h +271 -0
  38. data/ext/zstdlib/zlib.mk +14 -0
  39. data/ext/zstdlib/zlibwrapper.mk +14 -0
  40. data/ext/zstdlib/zlibwrapper/zlibwrapper.c +14 -0
  41. data/ext/zstdlib/zstd-1.3.8/lib/common/bitstream.h +455 -0
  42. data/ext/zstdlib/zstd-1.3.8/lib/common/compiler.h +140 -0
  43. data/ext/zstdlib/zstd-1.3.8/lib/common/cpu.h +215 -0
  44. data/ext/zstdlib/zstd-1.3.8/lib/common/debug.c +44 -0
  45. data/ext/zstdlib/zstd-1.3.8/lib/common/debug.h +134 -0
  46. data/ext/zstdlib/zstd-1.3.8/lib/common/entropy_common.c +236 -0
  47. data/ext/zstdlib/zstd-1.3.8/lib/common/error_private.c +54 -0
  48. data/ext/zstdlib/zstd-1.3.8/lib/common/error_private.h +76 -0
  49. data/ext/zstdlib/zstd-1.3.8/lib/common/fse.h +708 -0
  50. data/ext/zstdlib/zstd-1.3.8/lib/common/fse_decompress.c +309 -0
  51. data/ext/zstdlib/zstd-1.3.8/lib/common/huf.h +358 -0
  52. data/ext/zstdlib/zstd-1.3.8/lib/common/mem.h +380 -0
  53. data/ext/zstdlib/zstd-1.3.8/lib/common/pool.c +340 -0
  54. data/ext/zstdlib/zstd-1.3.8/lib/common/pool.h +84 -0
  55. data/ext/zstdlib/zstd-1.3.8/lib/common/threading.c +75 -0
  56. data/ext/zstdlib/zstd-1.3.8/lib/common/threading.h +123 -0
  57. data/ext/zstdlib/zstd-1.3.8/lib/common/xxhash.c +876 -0
  58. data/ext/zstdlib/zstd-1.3.8/lib/common/xxhash.h +305 -0
  59. data/ext/zstdlib/zstd-1.3.8/lib/common/zstd_common.c +83 -0
  60. data/ext/zstdlib/zstd-1.3.8/lib/common/zstd_errors.h +93 -0
  61. data/ext/zstdlib/zstd-1.3.8/lib/common/zstd_internal.h +266 -0
  62. data/ext/zstdlib/zstd-1.3.8/lib/compress/fse_compress.c +721 -0
  63. data/ext/zstdlib/zstd-1.3.8/lib/compress/hist.c +203 -0
  64. data/ext/zstdlib/zstd-1.3.8/lib/compress/hist.h +95 -0
  65. data/ext/zstdlib/zstd-1.3.8/lib/compress/huf_compress.c +798 -0
  66. data/ext/zstdlib/zstd-1.3.8/lib/compress/zstd_compress.c +4290 -0
  67. data/ext/zstdlib/zstd-1.3.8/lib/compress/zstd_compress_internal.h +860 -0
  68. data/ext/zstdlib/zstd-1.3.8/lib/compress/zstd_double_fast.c +499 -0
  69. data/ext/zstdlib/zstd-1.3.8/lib/compress/zstd_double_fast.h +38 -0
  70. data/ext/zstdlib/zstd-1.3.8/lib/compress/zstd_fast.c +391 -0
  71. data/ext/zstdlib/zstd-1.3.8/lib/compress/zstd_fast.h +37 -0
  72. data/ext/zstdlib/zstd-1.3.8/lib/compress/zstd_lazy.c +1106 -0
  73. data/ext/zstdlib/zstd-1.3.8/lib/compress/zstd_lazy.h +67 -0
  74. data/ext/zstdlib/zstd-1.3.8/lib/compress/zstd_ldm.c +597 -0
  75. data/ext/zstdlib/zstd-1.3.8/lib/compress/zstd_ldm.h +105 -0
  76. data/ext/zstdlib/zstd-1.3.8/lib/compress/zstd_opt.c +1217 -0
  77. data/ext/zstdlib/zstd-1.3.8/lib/compress/zstd_opt.h +56 -0
  78. data/ext/zstdlib/zstd-1.3.8/lib/compress/zstdmt_compress.c +2107 -0
  79. data/ext/zstdlib/zstd-1.3.8/lib/compress/zstdmt_compress.h +174 -0
  80. data/ext/zstdlib/zstd-1.3.8/lib/decompress/huf_decompress.c +1232 -0
  81. data/ext/zstdlib/zstd-1.3.8/lib/decompress/zstd_ddict.c +240 -0
  82. data/ext/zstdlib/zstd-1.3.8/lib/decompress/zstd_ddict.h +44 -0
  83. data/ext/zstdlib/zstd-1.3.8/lib/decompress/zstd_decompress.c +1672 -0
  84. data/ext/zstdlib/zstd-1.3.8/lib/decompress/zstd_decompress_block.c +1307 -0
  85. data/ext/zstdlib/zstd-1.3.8/lib/decompress/zstd_decompress_block.h +59 -0
  86. data/ext/zstdlib/zstd-1.3.8/lib/decompress/zstd_decompress_internal.h +168 -0
  87. data/ext/zstdlib/zstd-1.3.8/lib/zstd.h +1766 -0
  88. data/ext/zstdlib/zstd-1.3.8/zlibWrapper/gzclose.c +28 -0
  89. data/ext/zstdlib/zstd-1.3.8/zlibWrapper/gzcompatibility.h +68 -0
  90. data/ext/zstdlib/zstd-1.3.8/zlibWrapper/gzguts.h +227 -0
  91. data/ext/zstdlib/zstd-1.3.8/zlibWrapper/gzlib.c +640 -0
  92. data/ext/zstdlib/zstd-1.3.8/zlibWrapper/gzread.c +670 -0
  93. data/ext/zstdlib/zstd-1.3.8/zlibWrapper/gzwrite.c +671 -0
  94. data/ext/zstdlib/zstd-1.3.8/zlibWrapper/zstd_zlibwrapper.c +1105 -0
  95. data/ext/zstdlib/zstd-1.3.8/zlibWrapper/zstd_zlibwrapper.h +88 -0
  96. data/ext/zstdlib/zstd.mk +14 -0
  97. data/lib/2.2/zstdlib.so +0 -0
  98. data/lib/2.3/zstdlib.so +0 -0
  99. data/lib/2.4/zstdlib.so +0 -0
  100. data/lib/2.5/zstdlib.so +0 -0
  101. data/lib/2.6/zstdlib.so +0 -0
  102. data/lib/zstdlib.rb +6 -0
  103. data/test/zstdlib_test.rb +21 -0
  104. metadata +205 -0
@@ -0,0 +1,174 @@
1
+ /*
2
+ * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
3
+ * All rights reserved.
4
+ *
5
+ * This source code is licensed under both the BSD-style license (found in the
6
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7
+ * in the COPYING file in the root directory of this source tree).
8
+ * You may select, at your option, one of the above-listed licenses.
9
+ */
10
+
11
+ #ifndef ZSTDMT_COMPRESS_H
12
+ #define ZSTDMT_COMPRESS_H
13
+
14
+ #if defined (__cplusplus)
15
+ extern "C" {
16
+ #endif
17
+
18
+
19
+ /* Note : This is an internal API.
20
+ * Some methods are still exposed (ZSTDLIB_API),
21
+ * because it used to be the only way to invoke MT compression.
22
+ * Now, it's recommended to use ZSTD_compress_generic() instead.
23
+ * These methods will stop being exposed in a future version */
24
+
25
+ /* === Dependencies === */
26
+ #include <stddef.h> /* size_t */
27
+ #define ZSTD_STATIC_LINKING_ONLY /* ZSTD_parameters */
28
+ #include "zstd.h" /* ZSTD_inBuffer, ZSTD_outBuffer, ZSTDLIB_API */
29
+
30
+
31
+ /* === Constants === */
32
+ #ifndef ZSTDMT_NBWORKERS_MAX
33
+ # define ZSTDMT_NBWORKERS_MAX 200
34
+ #endif
35
+ #ifndef ZSTDMT_JOBSIZE_MIN
36
+ # define ZSTDMT_JOBSIZE_MIN (1 MB)
37
+ #endif
38
+ #define ZSTDMT_JOBSIZE_MAX (MEM_32bits() ? (512 MB) : (1024 MB))
39
+
40
+
41
+ /* === Memory management === */
42
+ typedef struct ZSTDMT_CCtx_s ZSTDMT_CCtx;
43
+ ZSTDLIB_API ZSTDMT_CCtx* ZSTDMT_createCCtx(unsigned nbWorkers);
44
+ ZSTDLIB_API ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(unsigned nbWorkers,
45
+ ZSTD_customMem cMem);
46
+ ZSTDLIB_API size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* mtctx);
47
+
48
+ ZSTDLIB_API size_t ZSTDMT_sizeof_CCtx(ZSTDMT_CCtx* mtctx);
49
+
50
+
51
+ /* === Simple one-pass compression function === */
52
+
53
+ ZSTDLIB_API size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx,
54
+ void* dst, size_t dstCapacity,
55
+ const void* src, size_t srcSize,
56
+ int compressionLevel);
57
+
58
+
59
+
60
+ /* === Streaming functions === */
61
+
62
+ ZSTDLIB_API size_t ZSTDMT_initCStream(ZSTDMT_CCtx* mtctx, int compressionLevel);
63
+ ZSTDLIB_API size_t ZSTDMT_resetCStream(ZSTDMT_CCtx* mtctx, unsigned long long pledgedSrcSize); /**< if srcSize is not known at reset time, use ZSTD_CONTENTSIZE_UNKNOWN. Note: for compatibility with older programs, 0 means the same as ZSTD_CONTENTSIZE_UNKNOWN, but it will change in the future to mean "empty" */
64
+
65
+ ZSTDLIB_API size_t ZSTDMT_nextInputSizeHint(const ZSTDMT_CCtx* mtctx);
66
+ ZSTDLIB_API size_t ZSTDMT_compressStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, ZSTD_inBuffer* input);
67
+
68
+ ZSTDLIB_API size_t ZSTDMT_flushStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output); /**< @return : 0 == all flushed; >0 : still some data to be flushed; or an error code (ZSTD_isError()) */
69
+ ZSTDLIB_API size_t ZSTDMT_endStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output); /**< @return : 0 == all flushed; >0 : still some data to be flushed; or an error code (ZSTD_isError()) */
70
+
71
+
72
+ /* === Advanced functions and parameters === */
73
+
74
+ ZSTDLIB_API size_t ZSTDMT_compress_advanced(ZSTDMT_CCtx* mtctx,
75
+ void* dst, size_t dstCapacity,
76
+ const void* src, size_t srcSize,
77
+ const ZSTD_CDict* cdict,
78
+ ZSTD_parameters params,
79
+ int overlapLog);
80
+
81
+ ZSTDLIB_API size_t ZSTDMT_initCStream_advanced(ZSTDMT_CCtx* mtctx,
82
+ const void* dict, size_t dictSize, /* dict can be released after init, a local copy is preserved within zcs */
83
+ ZSTD_parameters params,
84
+ unsigned long long pledgedSrcSize); /* pledgedSrcSize is optional and can be zero == unknown */
85
+
86
+ ZSTDLIB_API size_t ZSTDMT_initCStream_usingCDict(ZSTDMT_CCtx* mtctx,
87
+ const ZSTD_CDict* cdict,
88
+ ZSTD_frameParameters fparams,
89
+ unsigned long long pledgedSrcSize); /* note : zero means empty */
90
+
91
+ /* ZSTDMT_parameter :
92
+ * List of parameters that can be set using ZSTDMT_setMTCtxParameter() */
93
+ typedef enum {
94
+ ZSTDMT_p_jobSize, /* Each job is compressed in parallel. By default, this value is dynamically determined depending on compression parameters. Can be set explicitly here. */
95
+ ZSTDMT_p_overlapLog, /* Each job may reload a part of previous job to enhance compressionr ratio; 0 == no overlap, 6(default) == use 1/8th of window, >=9 == use full window. This is a "sticky" parameter : its value will be re-used on next compression job */
96
+ ZSTDMT_p_rsyncable /* Enables rsyncable mode. */
97
+ } ZSTDMT_parameter;
98
+
99
+ /* ZSTDMT_setMTCtxParameter() :
100
+ * allow setting individual parameters, one at a time, among a list of enums defined in ZSTDMT_parameter.
101
+ * The function must be called typically after ZSTD_createCCtx() but __before ZSTDMT_init*() !__
102
+ * Parameters not explicitly reset by ZSTDMT_init*() remain the same in consecutive compression sessions.
103
+ * @return : 0, or an error code (which can be tested using ZSTD_isError()) */
104
+ ZSTDLIB_API size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, int value);
105
+
106
+ /* ZSTDMT_getMTCtxParameter() :
107
+ * Query the ZSTDMT_CCtx for a parameter value.
108
+ * @return : 0, or an error code (which can be tested using ZSTD_isError()) */
109
+ ZSTDLIB_API size_t ZSTDMT_getMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, int* value);
110
+
111
+
112
+ /*! ZSTDMT_compressStream_generic() :
113
+ * Combines ZSTDMT_compressStream() with optional ZSTDMT_flushStream() or ZSTDMT_endStream()
114
+ * depending on flush directive.
115
+ * @return : minimum amount of data still to be flushed
116
+ * 0 if fully flushed
117
+ * or an error code
118
+ * note : needs to be init using any ZSTD_initCStream*() variant */
119
+ ZSTDLIB_API size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
120
+ ZSTD_outBuffer* output,
121
+ ZSTD_inBuffer* input,
122
+ ZSTD_EndDirective endOp);
123
+
124
+
125
+ /* ========================================================
126
+ * === Private interface, for use by ZSTD_compress.c ===
127
+ * === Not exposed in libzstd. Never invoke directly ===
128
+ * ======================================================== */
129
+
130
+ /*! ZSTDMT_toFlushNow()
131
+ * Tell how many bytes are ready to be flushed immediately.
132
+ * Probe the oldest active job (not yet entirely flushed) and check its output buffer.
133
+ * If return 0, it means there is no active job,
134
+ * or, it means oldest job is still active, but everything produced has been flushed so far,
135
+ * therefore flushing is limited by speed of oldest job. */
136
+ size_t ZSTDMT_toFlushNow(ZSTDMT_CCtx* mtctx);
137
+
138
+ /*! ZSTDMT_CCtxParam_setMTCtxParameter()
139
+ * like ZSTDMT_setMTCtxParameter(), but into a ZSTD_CCtx_Params */
140
+ size_t ZSTDMT_CCtxParam_setMTCtxParameter(ZSTD_CCtx_params* params, ZSTDMT_parameter parameter, int value);
141
+
142
+ /*! ZSTDMT_CCtxParam_setNbWorkers()
143
+ * Set nbWorkers, and clamp it.
144
+ * Also reset jobSize and overlapLog */
145
+ size_t ZSTDMT_CCtxParam_setNbWorkers(ZSTD_CCtx_params* params, unsigned nbWorkers);
146
+
147
+ /*! ZSTDMT_updateCParams_whileCompressing() :
148
+ * Updates only a selected set of compression parameters, to remain compatible with current frame.
149
+ * New parameters will be applied to next compression job. */
150
+ void ZSTDMT_updateCParams_whileCompressing(ZSTDMT_CCtx* mtctx, const ZSTD_CCtx_params* cctxParams);
151
+
152
+ /*! ZSTDMT_getFrameProgression():
153
+ * tells how much data has been consumed (input) and produced (output) for current frame.
154
+ * able to count progression inside worker threads.
155
+ */
156
+ ZSTD_frameProgression ZSTDMT_getFrameProgression(ZSTDMT_CCtx* mtctx);
157
+
158
+
159
+ /*! ZSTDMT_initCStream_internal() :
160
+ * Private use only. Init streaming operation.
161
+ * expects params to be valid.
162
+ * must receive dict, or cdict, or none, but not both.
163
+ * @return : 0, or an error code */
164
+ size_t ZSTDMT_initCStream_internal(ZSTDMT_CCtx* zcs,
165
+ const void* dict, size_t dictSize, ZSTD_dictContentType_e dictContentType,
166
+ const ZSTD_CDict* cdict,
167
+ ZSTD_CCtx_params params, unsigned long long pledgedSrcSize);
168
+
169
+
170
+ #if defined (__cplusplus)
171
+ }
172
+ #endif
173
+
174
+ #endif /* ZSTDMT_COMPRESS_H */
@@ -0,0 +1,1232 @@
1
+ /* ******************************************************************
2
+ huff0 huffman decoder,
3
+ part of Finite State Entropy library
4
+ Copyright (C) 2013-present, Yann Collet.
5
+
6
+ BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
7
+
8
+ Redistribution and use in source and binary forms, with or without
9
+ modification, are permitted provided that the following conditions are
10
+ met:
11
+
12
+ * Redistributions of source code must retain the above copyright
13
+ notice, this list of conditions and the following disclaimer.
14
+ * Redistributions in binary form must reproduce the above
15
+ copyright notice, this list of conditions and the following disclaimer
16
+ in the documentation and/or other materials provided with the
17
+ distribution.
18
+
19
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
+
31
+ You can contact the author at :
32
+ - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
33
+ ****************************************************************** */
34
+
35
+ /* **************************************************************
36
+ * Dependencies
37
+ ****************************************************************/
38
+ #include <string.h> /* memcpy, memset */
39
+ #include "compiler.h"
40
+ #include "bitstream.h" /* BIT_* */
41
+ #include "fse.h" /* to compress headers */
42
+ #define HUF_STATIC_LINKING_ONLY
43
+ #include "huf.h"
44
+ #include "error_private.h"
45
+
46
+ /* **************************************************************
47
+ * Macros
48
+ ****************************************************************/
49
+
50
+ /* These two optional macros force the use one way or another of the two
51
+ * Huffman decompression implementations. You can't force in both directions
52
+ * at the same time.
53
+ */
54
+ #if defined(HUF_FORCE_DECOMPRESS_X1) && \
55
+ defined(HUF_FORCE_DECOMPRESS_X2)
56
+ #error "Cannot force the use of the X1 and X2 decoders at the same time!"
57
+ #endif
58
+
59
+
60
+ /* **************************************************************
61
+ * Error Management
62
+ ****************************************************************/
63
+ #define HUF_isError ERR_isError
64
+ #define CHECK_F(f) { size_t const err_ = (f); if (HUF_isError(err_)) return err_; }
65
+
66
+
67
+ /* **************************************************************
68
+ * Byte alignment for workSpace management
69
+ ****************************************************************/
70
+ #define HUF_ALIGN(x, a) HUF_ALIGN_MASK((x), (a) - 1)
71
+ #define HUF_ALIGN_MASK(x, mask) (((x) + (mask)) & ~(mask))
72
+
73
+
74
+ /* **************************************************************
75
+ * BMI2 Variant Wrappers
76
+ ****************************************************************/
77
+ #if DYNAMIC_BMI2
78
+
79
+ #define HUF_DGEN(fn) \
80
+ \
81
+ static size_t fn##_default( \
82
+ void* dst, size_t dstSize, \
83
+ const void* cSrc, size_t cSrcSize, \
84
+ const HUF_DTable* DTable) \
85
+ { \
86
+ return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \
87
+ } \
88
+ \
89
+ static TARGET_ATTRIBUTE("bmi2") size_t fn##_bmi2( \
90
+ void* dst, size_t dstSize, \
91
+ const void* cSrc, size_t cSrcSize, \
92
+ const HUF_DTable* DTable) \
93
+ { \
94
+ return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \
95
+ } \
96
+ \
97
+ static size_t fn(void* dst, size_t dstSize, void const* cSrc, \
98
+ size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \
99
+ { \
100
+ if (bmi2) { \
101
+ return fn##_bmi2(dst, dstSize, cSrc, cSrcSize, DTable); \
102
+ } \
103
+ return fn##_default(dst, dstSize, cSrc, cSrcSize, DTable); \
104
+ }
105
+
106
+ #else
107
+
108
+ #define HUF_DGEN(fn) \
109
+ static size_t fn(void* dst, size_t dstSize, void const* cSrc, \
110
+ size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \
111
+ { \
112
+ (void)bmi2; \
113
+ return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \
114
+ }
115
+
116
+ #endif
117
+
118
+
119
+ /*-***************************/
120
+ /* generic DTableDesc */
121
+ /*-***************************/
122
+ typedef struct { BYTE maxTableLog; BYTE tableType; BYTE tableLog; BYTE reserved; } DTableDesc;
123
+
124
+ static DTableDesc HUF_getDTableDesc(const HUF_DTable* table)
125
+ {
126
+ DTableDesc dtd;
127
+ memcpy(&dtd, table, sizeof(dtd));
128
+ return dtd;
129
+ }
130
+
131
+
132
+ #ifndef HUF_FORCE_DECOMPRESS_X2
133
+
134
+ /*-***************************/
135
+ /* single-symbol decoding */
136
+ /*-***************************/
137
+ typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX1; /* single-symbol decoding */
138
+
139
+ size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize)
140
+ {
141
+ U32 tableLog = 0;
142
+ U32 nbSymbols = 0;
143
+ size_t iSize;
144
+ void* const dtPtr = DTable + 1;
145
+ HUF_DEltX1* const dt = (HUF_DEltX1*)dtPtr;
146
+
147
+ U32* rankVal;
148
+ BYTE* huffWeight;
149
+ size_t spaceUsed32 = 0;
150
+
151
+ rankVal = (U32 *)workSpace + spaceUsed32;
152
+ spaceUsed32 += HUF_TABLELOG_ABSOLUTEMAX + 1;
153
+ huffWeight = (BYTE *)((U32 *)workSpace + spaceUsed32);
154
+ spaceUsed32 += HUF_ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2;
155
+
156
+ if ((spaceUsed32 << 2) > wkspSize) return ERROR(tableLog_tooLarge);
157
+
158
+ DEBUG_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable));
159
+ /* memset(huffWeight, 0, sizeof(huffWeight)); */ /* is not necessary, even though some analyzer complain ... */
160
+
161
+ iSize = HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX + 1, rankVal, &nbSymbols, &tableLog, src, srcSize);
162
+ if (HUF_isError(iSize)) return iSize;
163
+
164
+ /* Table header */
165
+ { DTableDesc dtd = HUF_getDTableDesc(DTable);
166
+ if (tableLog > (U32)(dtd.maxTableLog+1)) return ERROR(tableLog_tooLarge); /* DTable too small, Huffman tree cannot fit in */
167
+ dtd.tableType = 0;
168
+ dtd.tableLog = (BYTE)tableLog;
169
+ memcpy(DTable, &dtd, sizeof(dtd));
170
+ }
171
+
172
+ /* Calculate starting value for each rank */
173
+ { U32 n, nextRankStart = 0;
174
+ for (n=1; n<tableLog+1; n++) {
175
+ U32 const current = nextRankStart;
176
+ nextRankStart += (rankVal[n] << (n-1));
177
+ rankVal[n] = current;
178
+ } }
179
+
180
+ /* fill DTable */
181
+ { U32 n;
182
+ for (n=0; n<nbSymbols; n++) {
183
+ U32 const w = huffWeight[n];
184
+ U32 const length = (1 << w) >> 1;
185
+ U32 u;
186
+ HUF_DEltX1 D;
187
+ D.byte = (BYTE)n; D.nbBits = (BYTE)(tableLog + 1 - w);
188
+ for (u = rankVal[w]; u < rankVal[w] + length; u++)
189
+ dt[u] = D;
190
+ rankVal[w] += length;
191
+ } }
192
+
193
+ return iSize;
194
+ }
195
+
196
+ size_t HUF_readDTableX1(HUF_DTable* DTable, const void* src, size_t srcSize)
197
+ {
198
+ U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
199
+ return HUF_readDTableX1_wksp(DTable, src, srcSize,
200
+ workSpace, sizeof(workSpace));
201
+ }
202
+
203
+ FORCE_INLINE_TEMPLATE BYTE
204
+ HUF_decodeSymbolX1(BIT_DStream_t* Dstream, const HUF_DEltX1* dt, const U32 dtLog)
205
+ {
206
+ size_t const val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */
207
+ BYTE const c = dt[val].byte;
208
+ BIT_skipBits(Dstream, dt[val].nbBits);
209
+ return c;
210
+ }
211
+
212
+ #define HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) \
213
+ *ptr++ = HUF_decodeSymbolX1(DStreamPtr, dt, dtLog)
214
+
215
+ #define HUF_DECODE_SYMBOLX1_1(ptr, DStreamPtr) \
216
+ if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
217
+ HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr)
218
+
219
+ #define HUF_DECODE_SYMBOLX1_2(ptr, DStreamPtr) \
220
+ if (MEM_64bits()) \
221
+ HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr)
222
+
223
+ HINT_INLINE size_t
224
+ HUF_decodeStreamX1(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX1* const dt, const U32 dtLog)
225
+ {
226
+ BYTE* const pStart = p;
227
+
228
+ /* up to 4 symbols at a time */
229
+ while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-3)) {
230
+ HUF_DECODE_SYMBOLX1_2(p, bitDPtr);
231
+ HUF_DECODE_SYMBOLX1_1(p, bitDPtr);
232
+ HUF_DECODE_SYMBOLX1_2(p, bitDPtr);
233
+ HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
234
+ }
235
+
236
+ /* [0-3] symbols remaining */
237
+ if (MEM_32bits())
238
+ while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd))
239
+ HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
240
+
241
+ /* no more data to retrieve from bitstream, no need to reload */
242
+ while (p < pEnd)
243
+ HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
244
+
245
+ return pEnd-pStart;
246
+ }
247
+
248
+ FORCE_INLINE_TEMPLATE size_t
249
+ HUF_decompress1X1_usingDTable_internal_body(
250
+ void* dst, size_t dstSize,
251
+ const void* cSrc, size_t cSrcSize,
252
+ const HUF_DTable* DTable)
253
+ {
254
+ BYTE* op = (BYTE*)dst;
255
+ BYTE* const oend = op + dstSize;
256
+ const void* dtPtr = DTable + 1;
257
+ const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr;
258
+ BIT_DStream_t bitD;
259
+ DTableDesc const dtd = HUF_getDTableDesc(DTable);
260
+ U32 const dtLog = dtd.tableLog;
261
+
262
+ CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) );
263
+
264
+ HUF_decodeStreamX1(op, &bitD, oend, dt, dtLog);
265
+
266
+ if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
267
+
268
+ return dstSize;
269
+ }
270
+
271
+ FORCE_INLINE_TEMPLATE size_t
272
+ HUF_decompress4X1_usingDTable_internal_body(
273
+ void* dst, size_t dstSize,
274
+ const void* cSrc, size_t cSrcSize,
275
+ const HUF_DTable* DTable)
276
+ {
277
+ /* Check */
278
+ if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */
279
+
280
+ { const BYTE* const istart = (const BYTE*) cSrc;
281
+ BYTE* const ostart = (BYTE*) dst;
282
+ BYTE* const oend = ostart + dstSize;
283
+ const void* const dtPtr = DTable + 1;
284
+ const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr;
285
+
286
+ /* Init */
287
+ BIT_DStream_t bitD1;
288
+ BIT_DStream_t bitD2;
289
+ BIT_DStream_t bitD3;
290
+ BIT_DStream_t bitD4;
291
+ size_t const length1 = MEM_readLE16(istart);
292
+ size_t const length2 = MEM_readLE16(istart+2);
293
+ size_t const length3 = MEM_readLE16(istart+4);
294
+ size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
295
+ const BYTE* const istart1 = istart + 6; /* jumpTable */
296
+ const BYTE* const istart2 = istart1 + length1;
297
+ const BYTE* const istart3 = istart2 + length2;
298
+ const BYTE* const istart4 = istart3 + length3;
299
+ const size_t segmentSize = (dstSize+3) / 4;
300
+ BYTE* const opStart2 = ostart + segmentSize;
301
+ BYTE* const opStart3 = opStart2 + segmentSize;
302
+ BYTE* const opStart4 = opStart3 + segmentSize;
303
+ BYTE* op1 = ostart;
304
+ BYTE* op2 = opStart2;
305
+ BYTE* op3 = opStart3;
306
+ BYTE* op4 = opStart4;
307
+ U32 endSignal = BIT_DStream_unfinished;
308
+ DTableDesc const dtd = HUF_getDTableDesc(DTable);
309
+ U32 const dtLog = dtd.tableLog;
310
+
311
+ if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
312
+ CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
313
+ CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
314
+ CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
315
+ CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
316
+
317
+ /* up to 16 symbols per loop (4 symbols per stream) in 64-bit mode */
318
+ endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
319
+ while ( (endSignal==BIT_DStream_unfinished) && (op4<(oend-3)) ) {
320
+ HUF_DECODE_SYMBOLX1_2(op1, &bitD1);
321
+ HUF_DECODE_SYMBOLX1_2(op2, &bitD2);
322
+ HUF_DECODE_SYMBOLX1_2(op3, &bitD3);
323
+ HUF_DECODE_SYMBOLX1_2(op4, &bitD4);
324
+ HUF_DECODE_SYMBOLX1_1(op1, &bitD1);
325
+ HUF_DECODE_SYMBOLX1_1(op2, &bitD2);
326
+ HUF_DECODE_SYMBOLX1_1(op3, &bitD3);
327
+ HUF_DECODE_SYMBOLX1_1(op4, &bitD4);
328
+ HUF_DECODE_SYMBOLX1_2(op1, &bitD1);
329
+ HUF_DECODE_SYMBOLX1_2(op2, &bitD2);
330
+ HUF_DECODE_SYMBOLX1_2(op3, &bitD3);
331
+ HUF_DECODE_SYMBOLX1_2(op4, &bitD4);
332
+ HUF_DECODE_SYMBOLX1_0(op1, &bitD1);
333
+ HUF_DECODE_SYMBOLX1_0(op2, &bitD2);
334
+ HUF_DECODE_SYMBOLX1_0(op3, &bitD3);
335
+ HUF_DECODE_SYMBOLX1_0(op4, &bitD4);
336
+ BIT_reloadDStream(&bitD1);
337
+ BIT_reloadDStream(&bitD2);
338
+ BIT_reloadDStream(&bitD3);
339
+ BIT_reloadDStream(&bitD4);
340
+ }
341
+
342
+ /* check corruption */
343
+ /* note : should not be necessary : op# advance in lock step, and we control op4.
344
+ * but curiously, binary generated by gcc 7.2 & 7.3 with -mbmi2 runs faster when >=1 test is present */
345
+ if (op1 > opStart2) return ERROR(corruption_detected);
346
+ if (op2 > opStart3) return ERROR(corruption_detected);
347
+ if (op3 > opStart4) return ERROR(corruption_detected);
348
+ /* note : op4 supposed already verified within main loop */
349
+
350
+ /* finish bitStreams one by one */
351
+ HUF_decodeStreamX1(op1, &bitD1, opStart2, dt, dtLog);
352
+ HUF_decodeStreamX1(op2, &bitD2, opStart3, dt, dtLog);
353
+ HUF_decodeStreamX1(op3, &bitD3, opStart4, dt, dtLog);
354
+ HUF_decodeStreamX1(op4, &bitD4, oend, dt, dtLog);
355
+
356
+ /* check */
357
+ { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
358
+ if (!endCheck) return ERROR(corruption_detected); }
359
+
360
+ /* decoded size */
361
+ return dstSize;
362
+ }
363
+ }
364
+
365
+
366
+ typedef size_t (*HUF_decompress_usingDTable_t)(void *dst, size_t dstSize,
367
+ const void *cSrc,
368
+ size_t cSrcSize,
369
+ const HUF_DTable *DTable);
370
+
371
+ HUF_DGEN(HUF_decompress1X1_usingDTable_internal)
372
+ HUF_DGEN(HUF_decompress4X1_usingDTable_internal)
373
+
374
+
375
+
376
+ size_t HUF_decompress1X1_usingDTable(
377
+ void* dst, size_t dstSize,
378
+ const void* cSrc, size_t cSrcSize,
379
+ const HUF_DTable* DTable)
380
+ {
381
+ DTableDesc dtd = HUF_getDTableDesc(DTable);
382
+ if (dtd.tableType != 0) return ERROR(GENERIC);
383
+ return HUF_decompress1X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
384
+ }
385
+
386
+ size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
387
+ const void* cSrc, size_t cSrcSize,
388
+ void* workSpace, size_t wkspSize)
389
+ {
390
+ const BYTE* ip = (const BYTE*) cSrc;
391
+
392
+ size_t const hSize = HUF_readDTableX1_wksp(DCtx, cSrc, cSrcSize, workSpace, wkspSize);
393
+ if (HUF_isError(hSize)) return hSize;
394
+ if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
395
+ ip += hSize; cSrcSize -= hSize;
396
+
397
+ return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0);
398
+ }
399
+
400
+
401
+ size_t HUF_decompress1X1_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
402
+ const void* cSrc, size_t cSrcSize)
403
+ {
404
+ U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
405
+ return HUF_decompress1X1_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
406
+ workSpace, sizeof(workSpace));
407
+ }
408
+
409
+ size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
410
+ {
411
+ HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX);
412
+ return HUF_decompress1X1_DCtx (DTable, dst, dstSize, cSrc, cSrcSize);
413
+ }
414
+
415
+ size_t HUF_decompress4X1_usingDTable(
416
+ void* dst, size_t dstSize,
417
+ const void* cSrc, size_t cSrcSize,
418
+ const HUF_DTable* DTable)
419
+ {
420
+ DTableDesc dtd = HUF_getDTableDesc(DTable);
421
+ if (dtd.tableType != 0) return ERROR(GENERIC);
422
+ return HUF_decompress4X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
423
+ }
424
+
425
+ static size_t HUF_decompress4X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize,
426
+ const void* cSrc, size_t cSrcSize,
427
+ void* workSpace, size_t wkspSize, int bmi2)
428
+ {
429
+ const BYTE* ip = (const BYTE*) cSrc;
430
+
431
+ size_t const hSize = HUF_readDTableX1_wksp (dctx, cSrc, cSrcSize,
432
+ workSpace, wkspSize);
433
+ if (HUF_isError(hSize)) return hSize;
434
+ if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
435
+ ip += hSize; cSrcSize -= hSize;
436
+
437
+ return HUF_decompress4X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
438
+ }
439
+
440
+ size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
441
+ const void* cSrc, size_t cSrcSize,
442
+ void* workSpace, size_t wkspSize)
443
+ {
444
+ return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, 0);
445
+ }
446
+
447
+
448
+ size_t HUF_decompress4X1_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
449
+ {
450
+ U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
451
+ return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
452
+ workSpace, sizeof(workSpace));
453
+ }
454
+ size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
455
+ {
456
+ HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX);
457
+ return HUF_decompress4X1_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
458
+ }
459
+
460
+ #endif /* HUF_FORCE_DECOMPRESS_X2 */
461
+
462
+
463
+ #ifndef HUF_FORCE_DECOMPRESS_X1
464
+
465
+ /* *************************/
466
+ /* double-symbols decoding */
467
+ /* *************************/
468
+
469
+ typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX2; /* double-symbols decoding */
470
+ typedef struct { BYTE symbol; BYTE weight; } sortedSymbol_t;
471
+ typedef U32 rankValCol_t[HUF_TABLELOG_MAX + 1];
472
+ typedef rankValCol_t rankVal_t[HUF_TABLELOG_MAX];
473
+
474
+
475
+ /* HUF_fillDTableX2Level2() :
476
+ * `rankValOrigin` must be a table of at least (HUF_TABLELOG_MAX + 1) U32 */
477
+ static void HUF_fillDTableX2Level2(HUF_DEltX2* DTable, U32 sizeLog, const U32 consumed,
478
+ const U32* rankValOrigin, const int minWeight,
479
+ const sortedSymbol_t* sortedSymbols, const U32 sortedListSize,
480
+ U32 nbBitsBaseline, U16 baseSeq)
481
+ {
482
+ HUF_DEltX2 DElt;
483
+ U32 rankVal[HUF_TABLELOG_MAX + 1];
484
+
485
+ /* get pre-calculated rankVal */
486
+ memcpy(rankVal, rankValOrigin, sizeof(rankVal));
487
+
488
+ /* fill skipped values */
489
+ if (minWeight>1) {
490
+ U32 i, skipSize = rankVal[minWeight];
491
+ MEM_writeLE16(&(DElt.sequence), baseSeq);
492
+ DElt.nbBits = (BYTE)(consumed);
493
+ DElt.length = 1;
494
+ for (i = 0; i < skipSize; i++)
495
+ DTable[i] = DElt;
496
+ }
497
+
498
+ /* fill DTable */
499
+ { U32 s; for (s=0; s<sortedListSize; s++) { /* note : sortedSymbols already skipped */
500
+ const U32 symbol = sortedSymbols[s].symbol;
501
+ const U32 weight = sortedSymbols[s].weight;
502
+ const U32 nbBits = nbBitsBaseline - weight;
503
+ const U32 length = 1 << (sizeLog-nbBits);
504
+ const U32 start = rankVal[weight];
505
+ U32 i = start;
506
+ const U32 end = start + length;
507
+
508
+ MEM_writeLE16(&(DElt.sequence), (U16)(baseSeq + (symbol << 8)));
509
+ DElt.nbBits = (BYTE)(nbBits + consumed);
510
+ DElt.length = 2;
511
+ do { DTable[i++] = DElt; } while (i<end); /* since length >= 1 */
512
+
513
+ rankVal[weight] += length;
514
+ } }
515
+ }
516
+
517
+
518
+ static void HUF_fillDTableX2(HUF_DEltX2* DTable, const U32 targetLog,
519
+ const sortedSymbol_t* sortedList, const U32 sortedListSize,
520
+ const U32* rankStart, rankVal_t rankValOrigin, const U32 maxWeight,
521
+ const U32 nbBitsBaseline)
522
+ {
523
+ U32 rankVal[HUF_TABLELOG_MAX + 1];
524
+ const int scaleLog = nbBitsBaseline - targetLog; /* note : targetLog >= srcLog, hence scaleLog <= 1 */
525
+ const U32 minBits = nbBitsBaseline - maxWeight;
526
+ U32 s;
527
+
528
+ memcpy(rankVal, rankValOrigin, sizeof(rankVal));
529
+
530
+ /* fill DTable */
531
+ for (s=0; s<sortedListSize; s++) {
532
+ const U16 symbol = sortedList[s].symbol;
533
+ const U32 weight = sortedList[s].weight;
534
+ const U32 nbBits = nbBitsBaseline - weight;
535
+ const U32 start = rankVal[weight];
536
+ const U32 length = 1 << (targetLog-nbBits);
537
+
538
+ if (targetLog-nbBits >= minBits) { /* enough room for a second symbol */
539
+ U32 sortedRank;
540
+ int minWeight = nbBits + scaleLog;
541
+ if (minWeight < 1) minWeight = 1;
542
+ sortedRank = rankStart[minWeight];
543
+ HUF_fillDTableX2Level2(DTable+start, targetLog-nbBits, nbBits,
544
+ rankValOrigin[nbBits], minWeight,
545
+ sortedList+sortedRank, sortedListSize-sortedRank,
546
+ nbBitsBaseline, symbol);
547
+ } else {
548
+ HUF_DEltX2 DElt;
549
+ MEM_writeLE16(&(DElt.sequence), symbol);
550
+ DElt.nbBits = (BYTE)(nbBits);
551
+ DElt.length = 1;
552
+ { U32 const end = start + length;
553
+ U32 u;
554
+ for (u = start; u < end; u++) DTable[u] = DElt;
555
+ } }
556
+ rankVal[weight] += length;
557
+ }
558
+ }
559
+
560
+ size_t HUF_readDTableX2_wksp(HUF_DTable* DTable,
561
+ const void* src, size_t srcSize,
562
+ void* workSpace, size_t wkspSize)
563
+ {
564
+ U32 tableLog, maxW, sizeOfSort, nbSymbols;
565
+ DTableDesc dtd = HUF_getDTableDesc(DTable);
566
+ U32 const maxTableLog = dtd.maxTableLog;
567
+ size_t iSize;
568
+ void* dtPtr = DTable+1; /* force compiler to avoid strict-aliasing */
569
+ HUF_DEltX2* const dt = (HUF_DEltX2*)dtPtr;
570
+ U32 *rankStart;
571
+
572
+ rankValCol_t* rankVal;
573
+ U32* rankStats;
574
+ U32* rankStart0;
575
+ sortedSymbol_t* sortedSymbol;
576
+ BYTE* weightList;
577
+ size_t spaceUsed32 = 0;
578
+
579
+ rankVal = (rankValCol_t *)((U32 *)workSpace + spaceUsed32);
580
+ spaceUsed32 += (sizeof(rankValCol_t) * HUF_TABLELOG_MAX) >> 2;
581
+ rankStats = (U32 *)workSpace + spaceUsed32;
582
+ spaceUsed32 += HUF_TABLELOG_MAX + 1;
583
+ rankStart0 = (U32 *)workSpace + spaceUsed32;
584
+ spaceUsed32 += HUF_TABLELOG_MAX + 2;
585
+ sortedSymbol = (sortedSymbol_t *)workSpace + (spaceUsed32 * sizeof(U32)) / sizeof(sortedSymbol_t);
586
+ spaceUsed32 += HUF_ALIGN(sizeof(sortedSymbol_t) * (HUF_SYMBOLVALUE_MAX + 1), sizeof(U32)) >> 2;
587
+ weightList = (BYTE *)((U32 *)workSpace + spaceUsed32);
588
+ spaceUsed32 += HUF_ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2;
589
+
590
+ if ((spaceUsed32 << 2) > wkspSize) return ERROR(tableLog_tooLarge);
591
+
592
+ rankStart = rankStart0 + 1;
593
+ memset(rankStats, 0, sizeof(U32) * (2 * HUF_TABLELOG_MAX + 2 + 1));
594
+
595
+ DEBUG_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(HUF_DTable)); /* if compiler fails here, assertion is wrong */
596
+ if (maxTableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
597
+ /* memset(weightList, 0, sizeof(weightList)); */ /* is not necessary, even though some analyzer complain ... */
598
+
599
+ iSize = HUF_readStats(weightList, HUF_SYMBOLVALUE_MAX + 1, rankStats, &nbSymbols, &tableLog, src, srcSize);
600
+ if (HUF_isError(iSize)) return iSize;
601
+
602
+ /* check result */
603
+ if (tableLog > maxTableLog) return ERROR(tableLog_tooLarge); /* DTable can't fit code depth */
604
+
605
+ /* find maxWeight */
606
+ for (maxW = tableLog; rankStats[maxW]==0; maxW--) {} /* necessarily finds a solution before 0 */
607
+
608
+ /* Get start index of each weight */
609
+ { U32 w, nextRankStart = 0;
610
+ for (w=1; w<maxW+1; w++) {
611
+ U32 current = nextRankStart;
612
+ nextRankStart += rankStats[w];
613
+ rankStart[w] = current;
614
+ }
615
+ rankStart[0] = nextRankStart; /* put all 0w symbols at the end of sorted list*/
616
+ sizeOfSort = nextRankStart;
617
+ }
618
+
619
+ /* sort symbols by weight */
620
+ { U32 s;
621
+ for (s=0; s<nbSymbols; s++) {
622
+ U32 const w = weightList[s];
623
+ U32 const r = rankStart[w]++;
624
+ sortedSymbol[r].symbol = (BYTE)s;
625
+ sortedSymbol[r].weight = (BYTE)w;
626
+ }
627
+ rankStart[0] = 0; /* forget 0w symbols; this is beginning of weight(1) */
628
+ }
629
+
630
+ /* Build rankVal */
631
+ { U32* const rankVal0 = rankVal[0];
632
+ { int const rescale = (maxTableLog-tableLog) - 1; /* tableLog <= maxTableLog */
633
+ U32 nextRankVal = 0;
634
+ U32 w;
635
+ for (w=1; w<maxW+1; w++) {
636
+ U32 current = nextRankVal;
637
+ nextRankVal += rankStats[w] << (w+rescale);
638
+ rankVal0[w] = current;
639
+ } }
640
+ { U32 const minBits = tableLog+1 - maxW;
641
+ U32 consumed;
642
+ for (consumed = minBits; consumed < maxTableLog - minBits + 1; consumed++) {
643
+ U32* const rankValPtr = rankVal[consumed];
644
+ U32 w;
645
+ for (w = 1; w < maxW+1; w++) {
646
+ rankValPtr[w] = rankVal0[w] >> consumed;
647
+ } } } }
648
+
649
+ HUF_fillDTableX2(dt, maxTableLog,
650
+ sortedSymbol, sizeOfSort,
651
+ rankStart0, rankVal, maxW,
652
+ tableLog+1);
653
+
654
+ dtd.tableLog = (BYTE)maxTableLog;
655
+ dtd.tableType = 1;
656
+ memcpy(DTable, &dtd, sizeof(dtd));
657
+ return iSize;
658
+ }
659
+
660
+ size_t HUF_readDTableX2(HUF_DTable* DTable, const void* src, size_t srcSize)
661
+ {
662
+ U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
663
+ return HUF_readDTableX2_wksp(DTable, src, srcSize,
664
+ workSpace, sizeof(workSpace));
665
+ }
666
+
667
+
668
+ FORCE_INLINE_TEMPLATE U32
669
+ HUF_decodeSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog)
670
+ {
671
+ size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
672
+ memcpy(op, dt+val, 2);
673
+ BIT_skipBits(DStream, dt[val].nbBits);
674
+ return dt[val].length;
675
+ }
676
+
677
+ FORCE_INLINE_TEMPLATE U32
678
+ HUF_decodeLastSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog)
679
+ {
680
+ size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
681
+ memcpy(op, dt+val, 1);
682
+ if (dt[val].length==1) BIT_skipBits(DStream, dt[val].nbBits);
683
+ else {
684
+ if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) {
685
+ BIT_skipBits(DStream, dt[val].nbBits);
686
+ if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8))
687
+ /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */
688
+ DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8);
689
+ } }
690
+ return 1;
691
+ }
692
+
693
+ #define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \
694
+ ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
695
+
696
+ #define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \
697
+ if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
698
+ ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
699
+
700
+ #define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \
701
+ if (MEM_64bits()) \
702
+ ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
703
+
704
+ HINT_INLINE size_t
705
+ HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd,
706
+ const HUF_DEltX2* const dt, const U32 dtLog)
707
+ {
708
+ BYTE* const pStart = p;
709
+
710
+ /* up to 8 symbols at a time */
711
+ while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-(sizeof(bitDPtr->bitContainer)-1))) {
712
+ HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
713
+ HUF_DECODE_SYMBOLX2_1(p, bitDPtr);
714
+ HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
715
+ HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
716
+ }
717
+
718
+ /* closer to end : up to 2 symbols at a time */
719
+ while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p <= pEnd-2))
720
+ HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
721
+
722
+ while (p <= pEnd-2)
723
+ HUF_DECODE_SYMBOLX2_0(p, bitDPtr); /* no need to reload : reached the end of DStream */
724
+
725
+ if (p < pEnd)
726
+ p += HUF_decodeLastSymbolX2(p, bitDPtr, dt, dtLog);
727
+
728
+ return p-pStart;
729
+ }
730
+
731
+ FORCE_INLINE_TEMPLATE size_t
732
+ HUF_decompress1X2_usingDTable_internal_body(
733
+ void* dst, size_t dstSize,
734
+ const void* cSrc, size_t cSrcSize,
735
+ const HUF_DTable* DTable)
736
+ {
737
+ BIT_DStream_t bitD;
738
+
739
+ /* Init */
740
+ CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) );
741
+
742
+ /* decode */
743
+ { BYTE* const ostart = (BYTE*) dst;
744
+ BYTE* const oend = ostart + dstSize;
745
+ const void* const dtPtr = DTable+1; /* force compiler to not use strict-aliasing */
746
+ const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
747
+ DTableDesc const dtd = HUF_getDTableDesc(DTable);
748
+ HUF_decodeStreamX2(ostart, &bitD, oend, dt, dtd.tableLog);
749
+ }
750
+
751
+ /* check */
752
+ if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
753
+
754
+ /* decoded size */
755
+ return dstSize;
756
+ }
757
+
758
+
759
+ FORCE_INLINE_TEMPLATE size_t
760
+ HUF_decompress4X2_usingDTable_internal_body(
761
+ void* dst, size_t dstSize,
762
+ const void* cSrc, size_t cSrcSize,
763
+ const HUF_DTable* DTable)
764
+ {
765
+ if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */
766
+
767
+ { const BYTE* const istart = (const BYTE*) cSrc;
768
+ BYTE* const ostart = (BYTE*) dst;
769
+ BYTE* const oend = ostart + dstSize;
770
+ const void* const dtPtr = DTable+1;
771
+ const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
772
+
773
+ /* Init */
774
+ BIT_DStream_t bitD1;
775
+ BIT_DStream_t bitD2;
776
+ BIT_DStream_t bitD3;
777
+ BIT_DStream_t bitD4;
778
+ size_t const length1 = MEM_readLE16(istart);
779
+ size_t const length2 = MEM_readLE16(istart+2);
780
+ size_t const length3 = MEM_readLE16(istart+4);
781
+ size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
782
+ const BYTE* const istart1 = istart + 6; /* jumpTable */
783
+ const BYTE* const istart2 = istart1 + length1;
784
+ const BYTE* const istart3 = istart2 + length2;
785
+ const BYTE* const istart4 = istart3 + length3;
786
+ size_t const segmentSize = (dstSize+3) / 4;
787
+ BYTE* const opStart2 = ostart + segmentSize;
788
+ BYTE* const opStart3 = opStart2 + segmentSize;
789
+ BYTE* const opStart4 = opStart3 + segmentSize;
790
+ BYTE* op1 = ostart;
791
+ BYTE* op2 = opStart2;
792
+ BYTE* op3 = opStart3;
793
+ BYTE* op4 = opStart4;
794
+ U32 endSignal;
795
+ DTableDesc const dtd = HUF_getDTableDesc(DTable);
796
+ U32 const dtLog = dtd.tableLog;
797
+
798
+ if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
799
+ CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
800
+ CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
801
+ CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
802
+ CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
803
+
804
+ /* 16-32 symbols per loop (4-8 symbols per stream) */
805
+ endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
806
+ for ( ; (endSignal==BIT_DStream_unfinished) & (op4<(oend-(sizeof(bitD4.bitContainer)-1))) ; ) {
807
+ HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
808
+ HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
809
+ HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
810
+ HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
811
+ HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
812
+ HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
813
+ HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
814
+ HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
815
+ HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
816
+ HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
817
+ HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
818
+ HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
819
+ HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
820
+ HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
821
+ HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
822
+ HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
823
+
824
+ endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
825
+ }
826
+
827
+ /* check corruption */
828
+ if (op1 > opStart2) return ERROR(corruption_detected);
829
+ if (op2 > opStart3) return ERROR(corruption_detected);
830
+ if (op3 > opStart4) return ERROR(corruption_detected);
831
+ /* note : op4 already verified within main loop */
832
+
833
+ /* finish bitStreams one by one */
834
+ HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog);
835
+ HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog);
836
+ HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog);
837
+ HUF_decodeStreamX2(op4, &bitD4, oend, dt, dtLog);
838
+
839
+ /* check */
840
+ { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
841
+ if (!endCheck) return ERROR(corruption_detected); }
842
+
843
+ /* decoded size */
844
+ return dstSize;
845
+ }
846
+ }
847
+
848
+ HUF_DGEN(HUF_decompress1X2_usingDTable_internal)
849
+ HUF_DGEN(HUF_decompress4X2_usingDTable_internal)
850
+
851
+ size_t HUF_decompress1X2_usingDTable(
852
+ void* dst, size_t dstSize,
853
+ const void* cSrc, size_t cSrcSize,
854
+ const HUF_DTable* DTable)
855
+ {
856
+ DTableDesc dtd = HUF_getDTableDesc(DTable);
857
+ if (dtd.tableType != 1) return ERROR(GENERIC);
858
+ return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
859
+ }
860
+
861
+ size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
862
+ const void* cSrc, size_t cSrcSize,
863
+ void* workSpace, size_t wkspSize)
864
+ {
865
+ const BYTE* ip = (const BYTE*) cSrc;
866
+
867
+ size_t const hSize = HUF_readDTableX2_wksp(DCtx, cSrc, cSrcSize,
868
+ workSpace, wkspSize);
869
+ if (HUF_isError(hSize)) return hSize;
870
+ if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
871
+ ip += hSize; cSrcSize -= hSize;
872
+
873
+ return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0);
874
+ }
875
+
876
+
877
+ size_t HUF_decompress1X2_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
878
+ const void* cSrc, size_t cSrcSize)
879
+ {
880
+ U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
881
+ return HUF_decompress1X2_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
882
+ workSpace, sizeof(workSpace));
883
+ }
884
+
885
+ size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
886
+ {
887
+ HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
888
+ return HUF_decompress1X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
889
+ }
890
+
891
+ size_t HUF_decompress4X2_usingDTable(
892
+ void* dst, size_t dstSize,
893
+ const void* cSrc, size_t cSrcSize,
894
+ const HUF_DTable* DTable)
895
+ {
896
+ DTableDesc dtd = HUF_getDTableDesc(DTable);
897
+ if (dtd.tableType != 1) return ERROR(GENERIC);
898
+ return HUF_decompress4X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
899
+ }
900
+
901
+ static size_t HUF_decompress4X2_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize,
902
+ const void* cSrc, size_t cSrcSize,
903
+ void* workSpace, size_t wkspSize, int bmi2)
904
+ {
905
+ const BYTE* ip = (const BYTE*) cSrc;
906
+
907
+ size_t hSize = HUF_readDTableX2_wksp(dctx, cSrc, cSrcSize,
908
+ workSpace, wkspSize);
909
+ if (HUF_isError(hSize)) return hSize;
910
+ if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
911
+ ip += hSize; cSrcSize -= hSize;
912
+
913
+ return HUF_decompress4X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
914
+ }
915
+
916
+ size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
917
+ const void* cSrc, size_t cSrcSize,
918
+ void* workSpace, size_t wkspSize)
919
+ {
920
+ return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, /* bmi2 */ 0);
921
+ }
922
+
923
+
924
+ size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,
925
+ const void* cSrc, size_t cSrcSize)
926
+ {
927
+ U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
928
+ return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
929
+ workSpace, sizeof(workSpace));
930
+ }
931
+
932
+ size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
933
+ {
934
+ HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
935
+ return HUF_decompress4X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
936
+ }
937
+
938
+ #endif /* HUF_FORCE_DECOMPRESS_X1 */
939
+
940
+
941
+ /* ***********************************/
942
+ /* Universal decompression selectors */
943
+ /* ***********************************/
944
+
945
+ size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize,
946
+ const void* cSrc, size_t cSrcSize,
947
+ const HUF_DTable* DTable)
948
+ {
949
+ DTableDesc const dtd = HUF_getDTableDesc(DTable);
950
+ #if defined(HUF_FORCE_DECOMPRESS_X1)
951
+ (void)dtd;
952
+ assert(dtd.tableType == 0);
953
+ return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
954
+ #elif defined(HUF_FORCE_DECOMPRESS_X2)
955
+ (void)dtd;
956
+ assert(dtd.tableType == 1);
957
+ return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
958
+ #else
959
+ return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
960
+ HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
961
+ #endif
962
+ }
963
+
964
+ size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize,
965
+ const void* cSrc, size_t cSrcSize,
966
+ const HUF_DTable* DTable)
967
+ {
968
+ DTableDesc const dtd = HUF_getDTableDesc(DTable);
969
+ #if defined(HUF_FORCE_DECOMPRESS_X1)
970
+ (void)dtd;
971
+ assert(dtd.tableType == 0);
972
+ return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
973
+ #elif defined(HUF_FORCE_DECOMPRESS_X2)
974
+ (void)dtd;
975
+ assert(dtd.tableType == 1);
976
+ return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
977
+ #else
978
+ return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
979
+ HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
980
+ #endif
981
+ }
982
+
983
+
984
+ #if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2)
985
+ typedef struct { U32 tableTime; U32 decode256Time; } algo_time_t;
986
+ static const algo_time_t algoTime[16 /* Quantization */][3 /* single, double, quad */] =
987
+ {
988
+ /* single, double, quad */
989
+ {{0,0}, {1,1}, {2,2}}, /* Q==0 : impossible */
990
+ {{0,0}, {1,1}, {2,2}}, /* Q==1 : impossible */
991
+ {{ 38,130}, {1313, 74}, {2151, 38}}, /* Q == 2 : 12-18% */
992
+ {{ 448,128}, {1353, 74}, {2238, 41}}, /* Q == 3 : 18-25% */
993
+ {{ 556,128}, {1353, 74}, {2238, 47}}, /* Q == 4 : 25-32% */
994
+ {{ 714,128}, {1418, 74}, {2436, 53}}, /* Q == 5 : 32-38% */
995
+ {{ 883,128}, {1437, 74}, {2464, 61}}, /* Q == 6 : 38-44% */
996
+ {{ 897,128}, {1515, 75}, {2622, 68}}, /* Q == 7 : 44-50% */
997
+ {{ 926,128}, {1613, 75}, {2730, 75}}, /* Q == 8 : 50-56% */
998
+ {{ 947,128}, {1729, 77}, {3359, 77}}, /* Q == 9 : 56-62% */
999
+ {{1107,128}, {2083, 81}, {4006, 84}}, /* Q ==10 : 62-69% */
1000
+ {{1177,128}, {2379, 87}, {4785, 88}}, /* Q ==11 : 69-75% */
1001
+ {{1242,128}, {2415, 93}, {5155, 84}}, /* Q ==12 : 75-81% */
1002
+ {{1349,128}, {2644,106}, {5260,106}}, /* Q ==13 : 81-87% */
1003
+ {{1455,128}, {2422,124}, {4174,124}}, /* Q ==14 : 87-93% */
1004
+ {{ 722,128}, {1891,145}, {1936,146}}, /* Q ==15 : 93-99% */
1005
+ };
1006
+ #endif
1007
+
1008
+ /** HUF_selectDecoder() :
1009
+ * Tells which decoder is likely to decode faster,
1010
+ * based on a set of pre-computed metrics.
1011
+ * @return : 0==HUF_decompress4X1, 1==HUF_decompress4X2 .
1012
+ * Assumption : 0 < dstSize <= 128 KB */
1013
+ U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize)
1014
+ {
1015
+ assert(dstSize > 0);
1016
+ assert(dstSize <= 128*1024);
1017
+ #if defined(HUF_FORCE_DECOMPRESS_X1)
1018
+ (void)dstSize;
1019
+ (void)cSrcSize;
1020
+ return 0;
1021
+ #elif defined(HUF_FORCE_DECOMPRESS_X2)
1022
+ (void)dstSize;
1023
+ (void)cSrcSize;
1024
+ return 1;
1025
+ #else
1026
+ /* decoder timing evaluation */
1027
+ { U32 const Q = (cSrcSize >= dstSize) ? 15 : (U32)(cSrcSize * 16 / dstSize); /* Q < 16 */
1028
+ U32 const D256 = (U32)(dstSize >> 8);
1029
+ U32 const DTime0 = algoTime[Q][0].tableTime + (algoTime[Q][0].decode256Time * D256);
1030
+ U32 DTime1 = algoTime[Q][1].tableTime + (algoTime[Q][1].decode256Time * D256);
1031
+ DTime1 += DTime1 >> 3; /* advantage to algorithm using less memory, to reduce cache eviction */
1032
+ return DTime1 < DTime0;
1033
+ }
1034
+ #endif
1035
+ }
1036
+
1037
+
1038
+ typedef size_t (*decompressionAlgo)(void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);
1039
+
1040
+ size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
1041
+ {
1042
+ #if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2)
1043
+ static const decompressionAlgo decompress[2] = { HUF_decompress4X1, HUF_decompress4X2 };
1044
+ #endif
1045
+
1046
+ /* validation checks */
1047
+ if (dstSize == 0) return ERROR(dstSize_tooSmall);
1048
+ if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */
1049
+ if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */
1050
+ if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */
1051
+
1052
+ { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1053
+ #if defined(HUF_FORCE_DECOMPRESS_X1)
1054
+ (void)algoNb;
1055
+ assert(algoNb == 0);
1056
+ return HUF_decompress4X1(dst, dstSize, cSrc, cSrcSize);
1057
+ #elif defined(HUF_FORCE_DECOMPRESS_X2)
1058
+ (void)algoNb;
1059
+ assert(algoNb == 1);
1060
+ return HUF_decompress4X2(dst, dstSize, cSrc, cSrcSize);
1061
+ #else
1062
+ return decompress[algoNb](dst, dstSize, cSrc, cSrcSize);
1063
+ #endif
1064
+ }
1065
+ }
1066
+
1067
+ size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
1068
+ {
1069
+ /* validation checks */
1070
+ if (dstSize == 0) return ERROR(dstSize_tooSmall);
1071
+ if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */
1072
+ if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */
1073
+ if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */
1074
+
1075
+ { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1076
+ #if defined(HUF_FORCE_DECOMPRESS_X1)
1077
+ (void)algoNb;
1078
+ assert(algoNb == 0);
1079
+ return HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize);
1080
+ #elif defined(HUF_FORCE_DECOMPRESS_X2)
1081
+ (void)algoNb;
1082
+ assert(algoNb == 1);
1083
+ return HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize);
1084
+ #else
1085
+ return algoNb ? HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) :
1086
+ HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ;
1087
+ #endif
1088
+ }
1089
+ }
1090
+
1091
+ size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
1092
+ {
1093
+ U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
1094
+ return HUF_decompress4X_hufOnly_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
1095
+ workSpace, sizeof(workSpace));
1096
+ }
1097
+
1098
+
1099
+ size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst,
1100
+ size_t dstSize, const void* cSrc,
1101
+ size_t cSrcSize, void* workSpace,
1102
+ size_t wkspSize)
1103
+ {
1104
+ /* validation checks */
1105
+ if (dstSize == 0) return ERROR(dstSize_tooSmall);
1106
+ if (cSrcSize == 0) return ERROR(corruption_detected);
1107
+
1108
+ { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1109
+ #if defined(HUF_FORCE_DECOMPRESS_X1)
1110
+ (void)algoNb;
1111
+ assert(algoNb == 0);
1112
+ return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
1113
+ #elif defined(HUF_FORCE_DECOMPRESS_X2)
1114
+ (void)algoNb;
1115
+ assert(algoNb == 1);
1116
+ return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
1117
+ #else
1118
+ return algoNb ? HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
1119
+ cSrcSize, workSpace, wkspSize):
1120
+ HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
1121
+ #endif
1122
+ }
1123
+ }
1124
+
1125
+ size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
1126
+ const void* cSrc, size_t cSrcSize,
1127
+ void* workSpace, size_t wkspSize)
1128
+ {
1129
+ /* validation checks */
1130
+ if (dstSize == 0) return ERROR(dstSize_tooSmall);
1131
+ if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */
1132
+ if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */
1133
+ if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */
1134
+
1135
+ { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1136
+ #if defined(HUF_FORCE_DECOMPRESS_X1)
1137
+ (void)algoNb;
1138
+ assert(algoNb == 0);
1139
+ return HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc,
1140
+ cSrcSize, workSpace, wkspSize);
1141
+ #elif defined(HUF_FORCE_DECOMPRESS_X2)
1142
+ (void)algoNb;
1143
+ assert(algoNb == 1);
1144
+ return HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
1145
+ cSrcSize, workSpace, wkspSize);
1146
+ #else
1147
+ return algoNb ? HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
1148
+ cSrcSize, workSpace, wkspSize):
1149
+ HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc,
1150
+ cSrcSize, workSpace, wkspSize);
1151
+ #endif
1152
+ }
1153
+ }
1154
+
1155
+ size_t HUF_decompress1X_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,
1156
+ const void* cSrc, size_t cSrcSize)
1157
+ {
1158
+ U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
1159
+ return HUF_decompress1X_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
1160
+ workSpace, sizeof(workSpace));
1161
+ }
1162
+
1163
+
1164
+ size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2)
1165
+ {
1166
+ DTableDesc const dtd = HUF_getDTableDesc(DTable);
1167
+ #if defined(HUF_FORCE_DECOMPRESS_X1)
1168
+ (void)dtd;
1169
+ assert(dtd.tableType == 0);
1170
+ return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1171
+ #elif defined(HUF_FORCE_DECOMPRESS_X2)
1172
+ (void)dtd;
1173
+ assert(dtd.tableType == 1);
1174
+ return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1175
+ #else
1176
+ return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) :
1177
+ HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1178
+ #endif
1179
+ }
1180
+
1181
+ #ifndef HUF_FORCE_DECOMPRESS_X2
1182
+ size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2)
1183
+ {
1184
+ const BYTE* ip = (const BYTE*) cSrc;
1185
+
1186
+ size_t const hSize = HUF_readDTableX1_wksp(dctx, cSrc, cSrcSize, workSpace, wkspSize);
1187
+ if (HUF_isError(hSize)) return hSize;
1188
+ if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
1189
+ ip += hSize; cSrcSize -= hSize;
1190
+
1191
+ return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
1192
+ }
1193
+ #endif
1194
+
1195
+ size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2)
1196
+ {
1197
+ DTableDesc const dtd = HUF_getDTableDesc(DTable);
1198
+ #if defined(HUF_FORCE_DECOMPRESS_X1)
1199
+ (void)dtd;
1200
+ assert(dtd.tableType == 0);
1201
+ return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1202
+ #elif defined(HUF_FORCE_DECOMPRESS_X2)
1203
+ (void)dtd;
1204
+ assert(dtd.tableType == 1);
1205
+ return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1206
+ #else
1207
+ return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) :
1208
+ HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1209
+ #endif
1210
+ }
1211
+
1212
+ size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2)
1213
+ {
1214
+ /* validation checks */
1215
+ if (dstSize == 0) return ERROR(dstSize_tooSmall);
1216
+ if (cSrcSize == 0) return ERROR(corruption_detected);
1217
+
1218
+ { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1219
+ #if defined(HUF_FORCE_DECOMPRESS_X1)
1220
+ (void)algoNb;
1221
+ assert(algoNb == 0);
1222
+ return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
1223
+ #elif defined(HUF_FORCE_DECOMPRESS_X2)
1224
+ (void)algoNb;
1225
+ assert(algoNb == 1);
1226
+ return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
1227
+ #else
1228
+ return algoNb ? HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2) :
1229
+ HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
1230
+ #endif
1231
+ }
1232
+ }