zstdlib 0.6.0-x64-mingw32 → 0.9.0-x64-mingw32

Sign up to get free protection for your applications and to get access to all the features.
Files changed (140) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGES.md +20 -0
  3. data/README.md +7 -1
  4. data/Rakefile +38 -8
  5. data/ext/{zstdlib → zstdlib_c}/extconf.rb +10 -5
  6. data/ext/{zstdlib → zstdlib_c}/ruby/zlib-2.2/zstdlib.c +2 -2
  7. data/ext/{zstdlib → zstdlib_c}/ruby/zlib-2.3/zstdlib.c +2 -2
  8. data/ext/{zstdlib → zstdlib_c}/ruby/zlib-2.4/zstdlib.c +2 -2
  9. data/ext/{zstdlib → zstdlib_c}/ruby/zlib-2.5/zstdlib.c +2 -2
  10. data/ext/{zstdlib → zstdlib_c}/ruby/zlib-2.6/zstdlib.c +2 -2
  11. data/ext/{zstdlib → zstdlib_c}/ruby/zlib-2.7/zstdlib.c +2 -2
  12. data/ext/zstdlib_c/ruby/zlib-3.0/zstdlib.c +4994 -0
  13. data/ext/zstdlib_c/ruby/zlib-3.1/zstdlib.c +5076 -0
  14. data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/adler32.c +0 -0
  15. data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/compress.c +0 -0
  16. data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/crc32.c +0 -0
  17. data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/crc32.h +0 -0
  18. data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/deflate.c +0 -0
  19. data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/deflate.h +0 -0
  20. data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/gzclose.c +0 -0
  21. data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/gzguts.h +0 -0
  22. data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/gzlib.c +0 -0
  23. data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/gzread.c +0 -0
  24. data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/gzwrite.c +0 -0
  25. data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/infback.c +0 -0
  26. data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/inffast.c +0 -0
  27. data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/inffast.h +0 -0
  28. data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/inffixed.h +0 -0
  29. data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/inflate.c +0 -0
  30. data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/inflate.h +0 -0
  31. data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/inftrees.c +0 -0
  32. data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/inftrees.h +0 -0
  33. data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/trees.c +0 -0
  34. data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/trees.h +0 -0
  35. data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/uncompr.c +0 -0
  36. data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/zconf.h +0 -0
  37. data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/zlib.h +0 -0
  38. data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/zutil.c +0 -0
  39. data/ext/{zstdlib → zstdlib_c}/zlib-1.2.11/zutil.h +0 -0
  40. data/ext/{zstdlib → zstdlib_c}/zlib.mk +0 -0
  41. data/ext/{zstdlib → zstdlib_c}/zlibwrapper/zlibwrapper.c +1 -5
  42. data/ext/{zstdlib → zstdlib_c}/zlibwrapper.mk +0 -0
  43. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/common/bitstream.h +75 -57
  44. data/ext/zstdlib_c/zstd-1.5.2/lib/common/compiler.h +335 -0
  45. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/common/cpu.h +1 -3
  46. data/ext/zstdlib_c/zstd-1.5.2/lib/common/debug.c +24 -0
  47. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/common/debug.h +22 -49
  48. data/ext/zstdlib_c/zstd-1.5.2/lib/common/entropy_common.c +368 -0
  49. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/common/error_private.c +3 -1
  50. data/ext/zstdlib_c/zstd-1.5.2/lib/common/error_private.h +159 -0
  51. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/common/fse.h +51 -42
  52. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/common/fse_decompress.c +149 -57
  53. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/common/huf.h +60 -54
  54. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/common/mem.h +87 -98
  55. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/common/pool.c +34 -23
  56. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/common/pool.h +5 -5
  57. data/ext/zstdlib_c/zstd-1.5.2/lib/common/portability_macros.h +137 -0
  58. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/common/threading.c +10 -8
  59. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/common/threading.h +4 -3
  60. data/ext/zstdlib_c/zstd-1.5.2/lib/common/xxhash.c +24 -0
  61. data/ext/zstdlib_c/zstd-1.5.2/lib/common/xxhash.h +5686 -0
  62. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/common/zstd_common.c +10 -10
  63. data/ext/zstdlib_c/zstd-1.5.2/lib/common/zstd_deps.h +111 -0
  64. data/ext/zstdlib_c/zstd-1.5.2/lib/common/zstd_internal.h +493 -0
  65. data/ext/zstdlib_c/zstd-1.5.2/lib/common/zstd_trace.h +163 -0
  66. data/ext/zstdlib_c/zstd-1.5.2/lib/compress/clevels.h +134 -0
  67. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/compress/fse_compress.c +105 -85
  68. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/compress/hist.c +41 -63
  69. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/compress/hist.h +13 -33
  70. data/ext/zstdlib_c/zstd-1.5.2/lib/compress/huf_compress.c +1370 -0
  71. data/ext/zstdlib_c/zstd-1.5.2/lib/compress/zstd_compress.c +6327 -0
  72. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_compress_internal.h +537 -82
  73. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_compress_literals.c +21 -16
  74. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_compress_literals.h +4 -2
  75. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_compress_sequences.c +61 -34
  76. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_compress_sequences.h +10 -3
  77. data/ext/zstdlib_c/zstd-1.5.2/lib/compress/zstd_compress_superblock.c +573 -0
  78. data/ext/zstdlib_c/zstd-1.5.2/lib/compress/zstd_compress_superblock.h +32 -0
  79. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_cwksp.h +236 -95
  80. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_double_fast.c +321 -143
  81. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_double_fast.h +2 -2
  82. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_fast.c +328 -137
  83. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_fast.h +2 -2
  84. data/ext/zstdlib_c/zstd-1.5.2/lib/compress/zstd_lazy.c +2104 -0
  85. data/ext/zstdlib_c/zstd-1.5.2/lib/compress/zstd_lazy.h +125 -0
  86. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_ldm.c +336 -209
  87. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_ldm.h +15 -3
  88. data/ext/zstdlib_c/zstd-1.5.2/lib/compress/zstd_ldm_geartab.h +106 -0
  89. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_opt.c +439 -239
  90. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/compress/zstd_opt.h +1 -1
  91. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/compress/zstdmt_compress.c +205 -462
  92. data/ext/zstdlib_c/zstd-1.5.2/lib/compress/zstdmt_compress.h +113 -0
  93. data/ext/zstdlib_c/zstd-1.5.2/lib/decompress/huf_decompress.c +1889 -0
  94. data/ext/zstdlib_c/zstd-1.5.2/lib/decompress/huf_decompress_amd64.S +585 -0
  95. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/decompress/zstd_ddict.c +20 -16
  96. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/decompress/zstd_ddict.h +3 -3
  97. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/decompress/zstd_decompress.c +691 -230
  98. data/ext/zstdlib_c/zstd-1.5.2/lib/decompress/zstd_decompress_block.c +2072 -0
  99. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/decompress/zstd_decompress_block.h +16 -7
  100. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/decompress/zstd_decompress_internal.h +71 -10
  101. data/ext/zstdlib_c/zstd-1.5.2/lib/zdict.h +452 -0
  102. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/lib/zstd.h +760 -234
  103. data/ext/{zstdlib/zstd-1.4.4/lib/common → zstdlib_c/zstd-1.5.2/lib}/zstd_errors.h +3 -1
  104. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/zlibWrapper/gzclose.c +0 -0
  105. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/zlibWrapper/gzcompatibility.h +1 -1
  106. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/zlibWrapper/gzguts.h +0 -0
  107. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/zlibWrapper/gzlib.c +0 -0
  108. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/zlibWrapper/gzread.c +0 -0
  109. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/zlibWrapper/gzwrite.c +0 -0
  110. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/zlibWrapper/zstd_zlibwrapper.c +133 -44
  111. data/ext/{zstdlib/zstd-1.4.4 → zstdlib_c/zstd-1.5.2}/zlibWrapper/zstd_zlibwrapper.h +1 -1
  112. data/ext/zstdlib_c/zstd.mk +15 -0
  113. data/lib/2.4/zstdlib_c.so +0 -0
  114. data/lib/2.5/zstdlib_c.so +0 -0
  115. data/lib/2.6/zstdlib_c.so +0 -0
  116. data/lib/2.7/zstdlib_c.so +0 -0
  117. data/lib/3.0/zstdlib_c.so +0 -0
  118. data/lib/zstdlib.rb +2 -2
  119. metadata +124 -114
  120. data/ext/zstdlib/zstd-1.4.4/lib/common/compiler.h +0 -159
  121. data/ext/zstdlib/zstd-1.4.4/lib/common/debug.c +0 -44
  122. data/ext/zstdlib/zstd-1.4.4/lib/common/entropy_common.c +0 -236
  123. data/ext/zstdlib/zstd-1.4.4/lib/common/error_private.h +0 -76
  124. data/ext/zstdlib/zstd-1.4.4/lib/common/xxhash.c +0 -882
  125. data/ext/zstdlib/zstd-1.4.4/lib/common/xxhash.h +0 -305
  126. data/ext/zstdlib/zstd-1.4.4/lib/common/zstd_internal.h +0 -350
  127. data/ext/zstdlib/zstd-1.4.4/lib/compress/huf_compress.c +0 -798
  128. data/ext/zstdlib/zstd-1.4.4/lib/compress/zstd_compress.c +0 -4103
  129. data/ext/zstdlib/zstd-1.4.4/lib/compress/zstd_lazy.c +0 -1115
  130. data/ext/zstdlib/zstd-1.4.4/lib/compress/zstd_lazy.h +0 -67
  131. data/ext/zstdlib/zstd-1.4.4/lib/compress/zstdmt_compress.h +0 -192
  132. data/ext/zstdlib/zstd-1.4.4/lib/decompress/huf_decompress.c +0 -1234
  133. data/ext/zstdlib/zstd-1.4.4/lib/decompress/zstd_decompress_block.c +0 -1323
  134. data/ext/zstdlib/zstd.mk +0 -14
  135. data/lib/2.2/zstdlib.so +0 -0
  136. data/lib/2.3/zstdlib.so +0 -0
  137. data/lib/2.4/zstdlib.so +0 -0
  138. data/lib/2.5/zstdlib.so +0 -0
  139. data/lib/2.6/zstdlib.so +0 -0
  140. data/lib/2.7/zstdlib.so +0 -0
@@ -0,0 +1,1889 @@
1
+ /* ******************************************************************
2
+ * huff0 huffman decoder,
3
+ * part of Finite State Entropy library
4
+ * Copyright (c) Yann Collet, Facebook, Inc.
5
+ *
6
+ * You can contact the author at :
7
+ * - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
8
+ *
9
+ * This source code is licensed under both the BSD-style license (found in the
10
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
11
+ * in the COPYING file in the root directory of this source tree).
12
+ * You may select, at your option, one of the above-listed licenses.
13
+ ****************************************************************** */
14
+
15
+ /* **************************************************************
16
+ * Dependencies
17
+ ****************************************************************/
18
+ #include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memset */
19
+ #include "../common/compiler.h"
20
+ #include "../common/bitstream.h" /* BIT_* */
21
+ #include "../common/fse.h" /* to compress headers */
22
+ #define HUF_STATIC_LINKING_ONLY
23
+ #include "../common/huf.h"
24
+ #include "../common/error_private.h"
25
+ #include "../common/zstd_internal.h"
26
+
27
+ /* **************************************************************
28
+ * Constants
29
+ ****************************************************************/
30
+
31
+ #define HUF_DECODER_FAST_TABLELOG 11
32
+
33
+ /* **************************************************************
34
+ * Macros
35
+ ****************************************************************/
36
+
37
+ /* These two optional macros force the use one way or another of the two
38
+ * Huffman decompression implementations. You can't force in both directions
39
+ * at the same time.
40
+ */
41
+ #if defined(HUF_FORCE_DECOMPRESS_X1) && \
42
+ defined(HUF_FORCE_DECOMPRESS_X2)
43
+ #error "Cannot force the use of the X1 and X2 decoders at the same time!"
44
+ #endif
45
+
46
+ #if ZSTD_ENABLE_ASM_X86_64_BMI2 && DYNAMIC_BMI2
47
+ # define HUF_ASM_X86_64_BMI2_ATTRS BMI2_TARGET_ATTRIBUTE
48
+ #else
49
+ # define HUF_ASM_X86_64_BMI2_ATTRS
50
+ #endif
51
+
52
+ #ifdef __cplusplus
53
+ # define HUF_EXTERN_C extern "C"
54
+ #else
55
+ # define HUF_EXTERN_C
56
+ #endif
57
+ #define HUF_ASM_DECL HUF_EXTERN_C
58
+
59
+ #if DYNAMIC_BMI2 || (ZSTD_ENABLE_ASM_X86_64_BMI2 && defined(__BMI2__))
60
+ # define HUF_NEED_BMI2_FUNCTION 1
61
+ #else
62
+ # define HUF_NEED_BMI2_FUNCTION 0
63
+ #endif
64
+
65
+ #if !(ZSTD_ENABLE_ASM_X86_64_BMI2 && defined(__BMI2__))
66
+ # define HUF_NEED_DEFAULT_FUNCTION 1
67
+ #else
68
+ # define HUF_NEED_DEFAULT_FUNCTION 0
69
+ #endif
70
+
71
+ /* **************************************************************
72
+ * Error Management
73
+ ****************************************************************/
74
+ #define HUF_isError ERR_isError
75
+
76
+
77
+ /* **************************************************************
78
+ * Byte alignment for workSpace management
79
+ ****************************************************************/
80
+ #define HUF_ALIGN(x, a) HUF_ALIGN_MASK((x), (a) - 1)
81
+ #define HUF_ALIGN_MASK(x, mask) (((x) + (mask)) & ~(mask))
82
+
83
+
84
+ /* **************************************************************
85
+ * BMI2 Variant Wrappers
86
+ ****************************************************************/
87
+ #if DYNAMIC_BMI2
88
+
89
+ #define HUF_DGEN(fn) \
90
+ \
91
+ static size_t fn##_default( \
92
+ void* dst, size_t dstSize, \
93
+ const void* cSrc, size_t cSrcSize, \
94
+ const HUF_DTable* DTable) \
95
+ { \
96
+ return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \
97
+ } \
98
+ \
99
+ static BMI2_TARGET_ATTRIBUTE size_t fn##_bmi2( \
100
+ void* dst, size_t dstSize, \
101
+ const void* cSrc, size_t cSrcSize, \
102
+ const HUF_DTable* DTable) \
103
+ { \
104
+ return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \
105
+ } \
106
+ \
107
+ static size_t fn(void* dst, size_t dstSize, void const* cSrc, \
108
+ size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \
109
+ { \
110
+ if (bmi2) { \
111
+ return fn##_bmi2(dst, dstSize, cSrc, cSrcSize, DTable); \
112
+ } \
113
+ return fn##_default(dst, dstSize, cSrc, cSrcSize, DTable); \
114
+ }
115
+
116
+ #else
117
+
118
+ #define HUF_DGEN(fn) \
119
+ static size_t fn(void* dst, size_t dstSize, void const* cSrc, \
120
+ size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \
121
+ { \
122
+ (void)bmi2; \
123
+ return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \
124
+ }
125
+
126
+ #endif
127
+
128
+
129
+ /*-***************************/
130
+ /* generic DTableDesc */
131
+ /*-***************************/
132
+ typedef struct { BYTE maxTableLog; BYTE tableType; BYTE tableLog; BYTE reserved; } DTableDesc;
133
+
134
+ static DTableDesc HUF_getDTableDesc(const HUF_DTable* table)
135
+ {
136
+ DTableDesc dtd;
137
+ ZSTD_memcpy(&dtd, table, sizeof(dtd));
138
+ return dtd;
139
+ }
140
+
141
+ #if ZSTD_ENABLE_ASM_X86_64_BMI2
142
+
143
+ static size_t HUF_initDStream(BYTE const* ip) {
144
+ BYTE const lastByte = ip[7];
145
+ size_t const bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0;
146
+ size_t const value = MEM_readLEST(ip) | 1;
147
+ assert(bitsConsumed <= 8);
148
+ return value << bitsConsumed;
149
+ }
150
+ typedef struct {
151
+ BYTE const* ip[4];
152
+ BYTE* op[4];
153
+ U64 bits[4];
154
+ void const* dt;
155
+ BYTE const* ilimit;
156
+ BYTE* oend;
157
+ BYTE const* iend[4];
158
+ } HUF_DecompressAsmArgs;
159
+
160
+ /**
161
+ * Initializes args for the asm decoding loop.
162
+ * @returns 0 on success
163
+ * 1 if the fallback implementation should be used.
164
+ * Or an error code on failure.
165
+ */
166
+ static size_t HUF_DecompressAsmArgs_init(HUF_DecompressAsmArgs* args, void* dst, size_t dstSize, void const* src, size_t srcSize, const HUF_DTable* DTable)
167
+ {
168
+ void const* dt = DTable + 1;
169
+ U32 const dtLog = HUF_getDTableDesc(DTable).tableLog;
170
+
171
+ const BYTE* const ilimit = (const BYTE*)src + 6 + 8;
172
+
173
+ BYTE* const oend = (BYTE*)dst + dstSize;
174
+
175
+ /* The following condition is false on x32 platform,
176
+ * but HUF_asm is not compatible with this ABI */
177
+ if (!(MEM_isLittleEndian() && !MEM_32bits())) return 1;
178
+
179
+ /* strict minimum : jump table + 1 byte per stream */
180
+ if (srcSize < 10)
181
+ return ERROR(corruption_detected);
182
+
183
+ /* Must have at least 8 bytes per stream because we don't handle initializing smaller bit containers.
184
+ * If table log is not correct at this point, fallback to the old decoder.
185
+ * On small inputs we don't have enough data to trigger the fast loop, so use the old decoder.
186
+ */
187
+ if (dtLog != HUF_DECODER_FAST_TABLELOG)
188
+ return 1;
189
+
190
+ /* Read the jump table. */
191
+ {
192
+ const BYTE* const istart = (const BYTE*)src;
193
+ size_t const length1 = MEM_readLE16(istart);
194
+ size_t const length2 = MEM_readLE16(istart+2);
195
+ size_t const length3 = MEM_readLE16(istart+4);
196
+ size_t const length4 = srcSize - (length1 + length2 + length3 + 6);
197
+ args->iend[0] = istart + 6; /* jumpTable */
198
+ args->iend[1] = args->iend[0] + length1;
199
+ args->iend[2] = args->iend[1] + length2;
200
+ args->iend[3] = args->iend[2] + length3;
201
+
202
+ /* HUF_initDStream() requires this, and this small of an input
203
+ * won't benefit from the ASM loop anyways.
204
+ * length1 must be >= 16 so that ip[0] >= ilimit before the loop
205
+ * starts.
206
+ */
207
+ if (length1 < 16 || length2 < 8 || length3 < 8 || length4 < 8)
208
+ return 1;
209
+ if (length4 > srcSize) return ERROR(corruption_detected); /* overflow */
210
+ }
211
+ /* ip[] contains the position that is currently loaded into bits[]. */
212
+ args->ip[0] = args->iend[1] - sizeof(U64);
213
+ args->ip[1] = args->iend[2] - sizeof(U64);
214
+ args->ip[2] = args->iend[3] - sizeof(U64);
215
+ args->ip[3] = (BYTE const*)src + srcSize - sizeof(U64);
216
+
217
+ /* op[] contains the output pointers. */
218
+ args->op[0] = (BYTE*)dst;
219
+ args->op[1] = args->op[0] + (dstSize+3)/4;
220
+ args->op[2] = args->op[1] + (dstSize+3)/4;
221
+ args->op[3] = args->op[2] + (dstSize+3)/4;
222
+
223
+ /* No point to call the ASM loop for tiny outputs. */
224
+ if (args->op[3] >= oend)
225
+ return 1;
226
+
227
+ /* bits[] is the bit container.
228
+ * It is read from the MSB down to the LSB.
229
+ * It is shifted left as it is read, and zeros are
230
+ * shifted in. After the lowest valid bit a 1 is
231
+ * set, so that CountTrailingZeros(bits[]) can be used
232
+ * to count how many bits we've consumed.
233
+ */
234
+ args->bits[0] = HUF_initDStream(args->ip[0]);
235
+ args->bits[1] = HUF_initDStream(args->ip[1]);
236
+ args->bits[2] = HUF_initDStream(args->ip[2]);
237
+ args->bits[3] = HUF_initDStream(args->ip[3]);
238
+
239
+ /* If ip[] >= ilimit, it is guaranteed to be safe to
240
+ * reload bits[]. It may be beyond its section, but is
241
+ * guaranteed to be valid (>= istart).
242
+ */
243
+ args->ilimit = ilimit;
244
+
245
+ args->oend = oend;
246
+ args->dt = dt;
247
+
248
+ return 0;
249
+ }
250
+
251
+ static size_t HUF_initRemainingDStream(BIT_DStream_t* bit, HUF_DecompressAsmArgs const* args, int stream, BYTE* segmentEnd)
252
+ {
253
+ /* Validate that we haven't overwritten. */
254
+ if (args->op[stream] > segmentEnd)
255
+ return ERROR(corruption_detected);
256
+ /* Validate that we haven't read beyond iend[].
257
+ * Note that ip[] may be < iend[] because the MSB is
258
+ * the next bit to read, and we may have consumed 100%
259
+ * of the stream, so down to iend[i] - 8 is valid.
260
+ */
261
+ if (args->ip[stream] < args->iend[stream] - 8)
262
+ return ERROR(corruption_detected);
263
+
264
+ /* Construct the BIT_DStream_t. */
265
+ bit->bitContainer = MEM_readLE64(args->ip[stream]);
266
+ bit->bitsConsumed = ZSTD_countTrailingZeros((size_t)args->bits[stream]);
267
+ bit->start = (const char*)args->iend[0];
268
+ bit->limitPtr = bit->start + sizeof(size_t);
269
+ bit->ptr = (const char*)args->ip[stream];
270
+
271
+ return 0;
272
+ }
273
+ #endif
274
+
275
+
276
+ #ifndef HUF_FORCE_DECOMPRESS_X2
277
+
278
+ /*-***************************/
279
+ /* single-symbol decoding */
280
+ /*-***************************/
281
+ typedef struct { BYTE nbBits; BYTE byte; } HUF_DEltX1; /* single-symbol decoding */
282
+
283
+ /**
284
+ * Packs 4 HUF_DEltX1 structs into a U64. This is used to lay down 4 entries at
285
+ * a time.
286
+ */
287
+ static U64 HUF_DEltX1_set4(BYTE symbol, BYTE nbBits) {
288
+ U64 D4;
289
+ if (MEM_isLittleEndian()) {
290
+ D4 = (symbol << 8) + nbBits;
291
+ } else {
292
+ D4 = symbol + (nbBits << 8);
293
+ }
294
+ D4 *= 0x0001000100010001ULL;
295
+ return D4;
296
+ }
297
+
298
+ /**
299
+ * Increase the tableLog to targetTableLog and rescales the stats.
300
+ * If tableLog > targetTableLog this is a no-op.
301
+ * @returns New tableLog
302
+ */
303
+ static U32 HUF_rescaleStats(BYTE* huffWeight, U32* rankVal, U32 nbSymbols, U32 tableLog, U32 targetTableLog)
304
+ {
305
+ if (tableLog > targetTableLog)
306
+ return tableLog;
307
+ if (tableLog < targetTableLog) {
308
+ U32 const scale = targetTableLog - tableLog;
309
+ U32 s;
310
+ /* Increase the weight for all non-zero probability symbols by scale. */
311
+ for (s = 0; s < nbSymbols; ++s) {
312
+ huffWeight[s] += (BYTE)((huffWeight[s] == 0) ? 0 : scale);
313
+ }
314
+ /* Update rankVal to reflect the new weights.
315
+ * All weights except 0 get moved to weight + scale.
316
+ * Weights [1, scale] are empty.
317
+ */
318
+ for (s = targetTableLog; s > scale; --s) {
319
+ rankVal[s] = rankVal[s - scale];
320
+ }
321
+ for (s = scale; s > 0; --s) {
322
+ rankVal[s] = 0;
323
+ }
324
+ }
325
+ return targetTableLog;
326
+ }
327
+
328
+ typedef struct {
329
+ U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1];
330
+ U32 rankStart[HUF_TABLELOG_ABSOLUTEMAX + 1];
331
+ U32 statsWksp[HUF_READ_STATS_WORKSPACE_SIZE_U32];
332
+ BYTE symbols[HUF_SYMBOLVALUE_MAX + 1];
333
+ BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1];
334
+ } HUF_ReadDTableX1_Workspace;
335
+
336
+
337
+ size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize)
338
+ {
339
+ return HUF_readDTableX1_wksp_bmi2(DTable, src, srcSize, workSpace, wkspSize, /* bmi2 */ 0);
340
+ }
341
+
342
+ size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int bmi2)
343
+ {
344
+ U32 tableLog = 0;
345
+ U32 nbSymbols = 0;
346
+ size_t iSize;
347
+ void* const dtPtr = DTable + 1;
348
+ HUF_DEltX1* const dt = (HUF_DEltX1*)dtPtr;
349
+ HUF_ReadDTableX1_Workspace* wksp = (HUF_ReadDTableX1_Workspace*)workSpace;
350
+
351
+ DEBUG_STATIC_ASSERT(HUF_DECOMPRESS_WORKSPACE_SIZE >= sizeof(*wksp));
352
+ if (sizeof(*wksp) > wkspSize) return ERROR(tableLog_tooLarge);
353
+
354
+ DEBUG_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable));
355
+ /* ZSTD_memset(huffWeight, 0, sizeof(huffWeight)); */ /* is not necessary, even though some analyzer complain ... */
356
+
357
+ iSize = HUF_readStats_wksp(wksp->huffWeight, HUF_SYMBOLVALUE_MAX + 1, wksp->rankVal, &nbSymbols, &tableLog, src, srcSize, wksp->statsWksp, sizeof(wksp->statsWksp), bmi2);
358
+ if (HUF_isError(iSize)) return iSize;
359
+
360
+
361
+ /* Table header */
362
+ { DTableDesc dtd = HUF_getDTableDesc(DTable);
363
+ U32 const maxTableLog = dtd.maxTableLog + 1;
364
+ U32 const targetTableLog = MIN(maxTableLog, HUF_DECODER_FAST_TABLELOG);
365
+ tableLog = HUF_rescaleStats(wksp->huffWeight, wksp->rankVal, nbSymbols, tableLog, targetTableLog);
366
+ if (tableLog > (U32)(dtd.maxTableLog+1)) return ERROR(tableLog_tooLarge); /* DTable too small, Huffman tree cannot fit in */
367
+ dtd.tableType = 0;
368
+ dtd.tableLog = (BYTE)tableLog;
369
+ ZSTD_memcpy(DTable, &dtd, sizeof(dtd));
370
+ }
371
+
372
+ /* Compute symbols and rankStart given rankVal:
373
+ *
374
+ * rankVal already contains the number of values of each weight.
375
+ *
376
+ * symbols contains the symbols ordered by weight. First are the rankVal[0]
377
+ * weight 0 symbols, followed by the rankVal[1] weight 1 symbols, and so on.
378
+ * symbols[0] is filled (but unused) to avoid a branch.
379
+ *
380
+ * rankStart contains the offset where each rank belongs in the DTable.
381
+ * rankStart[0] is not filled because there are no entries in the table for
382
+ * weight 0.
383
+ */
384
+ {
385
+ int n;
386
+ int nextRankStart = 0;
387
+ int const unroll = 4;
388
+ int const nLimit = (int)nbSymbols - unroll + 1;
389
+ for (n=0; n<(int)tableLog+1; n++) {
390
+ U32 const curr = nextRankStart;
391
+ nextRankStart += wksp->rankVal[n];
392
+ wksp->rankStart[n] = curr;
393
+ }
394
+ for (n=0; n < nLimit; n += unroll) {
395
+ int u;
396
+ for (u=0; u < unroll; ++u) {
397
+ size_t const w = wksp->huffWeight[n+u];
398
+ wksp->symbols[wksp->rankStart[w]++] = (BYTE)(n+u);
399
+ }
400
+ }
401
+ for (; n < (int)nbSymbols; ++n) {
402
+ size_t const w = wksp->huffWeight[n];
403
+ wksp->symbols[wksp->rankStart[w]++] = (BYTE)n;
404
+ }
405
+ }
406
+
407
+ /* fill DTable
408
+ * We fill all entries of each weight in order.
409
+ * That way length is a constant for each iteration of the outer loop.
410
+ * We can switch based on the length to a different inner loop which is
411
+ * optimized for that particular case.
412
+ */
413
+ {
414
+ U32 w;
415
+ int symbol=wksp->rankVal[0];
416
+ int rankStart=0;
417
+ for (w=1; w<tableLog+1; ++w) {
418
+ int const symbolCount = wksp->rankVal[w];
419
+ int const length = (1 << w) >> 1;
420
+ int uStart = rankStart;
421
+ BYTE const nbBits = (BYTE)(tableLog + 1 - w);
422
+ int s;
423
+ int u;
424
+ switch (length) {
425
+ case 1:
426
+ for (s=0; s<symbolCount; ++s) {
427
+ HUF_DEltX1 D;
428
+ D.byte = wksp->symbols[symbol + s];
429
+ D.nbBits = nbBits;
430
+ dt[uStart] = D;
431
+ uStart += 1;
432
+ }
433
+ break;
434
+ case 2:
435
+ for (s=0; s<symbolCount; ++s) {
436
+ HUF_DEltX1 D;
437
+ D.byte = wksp->symbols[symbol + s];
438
+ D.nbBits = nbBits;
439
+ dt[uStart+0] = D;
440
+ dt[uStart+1] = D;
441
+ uStart += 2;
442
+ }
443
+ break;
444
+ case 4:
445
+ for (s=0; s<symbolCount; ++s) {
446
+ U64 const D4 = HUF_DEltX1_set4(wksp->symbols[symbol + s], nbBits);
447
+ MEM_write64(dt + uStart, D4);
448
+ uStart += 4;
449
+ }
450
+ break;
451
+ case 8:
452
+ for (s=0; s<symbolCount; ++s) {
453
+ U64 const D4 = HUF_DEltX1_set4(wksp->symbols[symbol + s], nbBits);
454
+ MEM_write64(dt + uStart, D4);
455
+ MEM_write64(dt + uStart + 4, D4);
456
+ uStart += 8;
457
+ }
458
+ break;
459
+ default:
460
+ for (s=0; s<symbolCount; ++s) {
461
+ U64 const D4 = HUF_DEltX1_set4(wksp->symbols[symbol + s], nbBits);
462
+ for (u=0; u < length; u += 16) {
463
+ MEM_write64(dt + uStart + u + 0, D4);
464
+ MEM_write64(dt + uStart + u + 4, D4);
465
+ MEM_write64(dt + uStart + u + 8, D4);
466
+ MEM_write64(dt + uStart + u + 12, D4);
467
+ }
468
+ assert(u == length);
469
+ uStart += length;
470
+ }
471
+ break;
472
+ }
473
+ symbol += symbolCount;
474
+ rankStart += symbolCount * length;
475
+ }
476
+ }
477
+ return iSize;
478
+ }
479
+
480
+ FORCE_INLINE_TEMPLATE BYTE
481
+ HUF_decodeSymbolX1(BIT_DStream_t* Dstream, const HUF_DEltX1* dt, const U32 dtLog)
482
+ {
483
+ size_t const val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */
484
+ BYTE const c = dt[val].byte;
485
+ BIT_skipBits(Dstream, dt[val].nbBits);
486
+ return c;
487
+ }
488
+
489
+ #define HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) \
490
+ *ptr++ = HUF_decodeSymbolX1(DStreamPtr, dt, dtLog)
491
+
492
+ #define HUF_DECODE_SYMBOLX1_1(ptr, DStreamPtr) \
493
+ if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
494
+ HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr)
495
+
496
+ #define HUF_DECODE_SYMBOLX1_2(ptr, DStreamPtr) \
497
+ if (MEM_64bits()) \
498
+ HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr)
499
+
500
+ HINT_INLINE size_t
501
+ HUF_decodeStreamX1(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX1* const dt, const U32 dtLog)
502
+ {
503
+ BYTE* const pStart = p;
504
+
505
+ /* up to 4 symbols at a time */
506
+ if ((pEnd - p) > 3) {
507
+ while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-3)) {
508
+ HUF_DECODE_SYMBOLX1_2(p, bitDPtr);
509
+ HUF_DECODE_SYMBOLX1_1(p, bitDPtr);
510
+ HUF_DECODE_SYMBOLX1_2(p, bitDPtr);
511
+ HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
512
+ }
513
+ } else {
514
+ BIT_reloadDStream(bitDPtr);
515
+ }
516
+
517
+ /* [0-3] symbols remaining */
518
+ if (MEM_32bits())
519
+ while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd))
520
+ HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
521
+
522
+ /* no more data to retrieve from bitstream, no need to reload */
523
+ while (p < pEnd)
524
+ HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
525
+
526
+ return pEnd-pStart;
527
+ }
528
+
529
+ FORCE_INLINE_TEMPLATE size_t
530
+ HUF_decompress1X1_usingDTable_internal_body(
531
+ void* dst, size_t dstSize,
532
+ const void* cSrc, size_t cSrcSize,
533
+ const HUF_DTable* DTable)
534
+ {
535
+ BYTE* op = (BYTE*)dst;
536
+ BYTE* const oend = op + dstSize;
537
+ const void* dtPtr = DTable + 1;
538
+ const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr;
539
+ BIT_DStream_t bitD;
540
+ DTableDesc const dtd = HUF_getDTableDesc(DTable);
541
+ U32 const dtLog = dtd.tableLog;
542
+
543
+ CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) );
544
+
545
+ HUF_decodeStreamX1(op, &bitD, oend, dt, dtLog);
546
+
547
+ if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
548
+
549
+ return dstSize;
550
+ }
551
+
552
+ FORCE_INLINE_TEMPLATE size_t
553
+ HUF_decompress4X1_usingDTable_internal_body(
554
+ void* dst, size_t dstSize,
555
+ const void* cSrc, size_t cSrcSize,
556
+ const HUF_DTable* DTable)
557
+ {
558
+ /* Check */
559
+ if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */
560
+
561
+ { const BYTE* const istart = (const BYTE*) cSrc;
562
+ BYTE* const ostart = (BYTE*) dst;
563
+ BYTE* const oend = ostart + dstSize;
564
+ BYTE* const olimit = oend - 3;
565
+ const void* const dtPtr = DTable + 1;
566
+ const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr;
567
+
568
+ /* Init */
569
+ BIT_DStream_t bitD1;
570
+ BIT_DStream_t bitD2;
571
+ BIT_DStream_t bitD3;
572
+ BIT_DStream_t bitD4;
573
+ size_t const length1 = MEM_readLE16(istart);
574
+ size_t const length2 = MEM_readLE16(istart+2);
575
+ size_t const length3 = MEM_readLE16(istart+4);
576
+ size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
577
+ const BYTE* const istart1 = istart + 6; /* jumpTable */
578
+ const BYTE* const istart2 = istart1 + length1;
579
+ const BYTE* const istart3 = istart2 + length2;
580
+ const BYTE* const istart4 = istart3 + length3;
581
+ const size_t segmentSize = (dstSize+3) / 4;
582
+ BYTE* const opStart2 = ostart + segmentSize;
583
+ BYTE* const opStart3 = opStart2 + segmentSize;
584
+ BYTE* const opStart4 = opStart3 + segmentSize;
585
+ BYTE* op1 = ostart;
586
+ BYTE* op2 = opStart2;
587
+ BYTE* op3 = opStart3;
588
+ BYTE* op4 = opStart4;
589
+ DTableDesc const dtd = HUF_getDTableDesc(DTable);
590
+ U32 const dtLog = dtd.tableLog;
591
+ U32 endSignal = 1;
592
+
593
+ if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
594
+ if (opStart4 > oend) return ERROR(corruption_detected); /* overflow */
595
+ CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
596
+ CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
597
+ CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
598
+ CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
599
+
600
+ /* up to 16 symbols per loop (4 symbols per stream) in 64-bit mode */
601
+ if ((size_t)(oend - op4) >= sizeof(size_t)) {
602
+ for ( ; (endSignal) & (op4 < olimit) ; ) {
603
+ HUF_DECODE_SYMBOLX1_2(op1, &bitD1);
604
+ HUF_DECODE_SYMBOLX1_2(op2, &bitD2);
605
+ HUF_DECODE_SYMBOLX1_2(op3, &bitD3);
606
+ HUF_DECODE_SYMBOLX1_2(op4, &bitD4);
607
+ HUF_DECODE_SYMBOLX1_1(op1, &bitD1);
608
+ HUF_DECODE_SYMBOLX1_1(op2, &bitD2);
609
+ HUF_DECODE_SYMBOLX1_1(op3, &bitD3);
610
+ HUF_DECODE_SYMBOLX1_1(op4, &bitD4);
611
+ HUF_DECODE_SYMBOLX1_2(op1, &bitD1);
612
+ HUF_DECODE_SYMBOLX1_2(op2, &bitD2);
613
+ HUF_DECODE_SYMBOLX1_2(op3, &bitD3);
614
+ HUF_DECODE_SYMBOLX1_2(op4, &bitD4);
615
+ HUF_DECODE_SYMBOLX1_0(op1, &bitD1);
616
+ HUF_DECODE_SYMBOLX1_0(op2, &bitD2);
617
+ HUF_DECODE_SYMBOLX1_0(op3, &bitD3);
618
+ HUF_DECODE_SYMBOLX1_0(op4, &bitD4);
619
+ endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished;
620
+ endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished;
621
+ endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished;
622
+ endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished;
623
+ }
624
+ }
625
+
626
+ /* check corruption */
627
+ /* note : should not be necessary : op# advance in lock step, and we control op4.
628
+ * but curiously, binary generated by gcc 7.2 & 7.3 with -mbmi2 runs faster when >=1 test is present */
629
+ if (op1 > opStart2) return ERROR(corruption_detected);
630
+ if (op2 > opStart3) return ERROR(corruption_detected);
631
+ if (op3 > opStart4) return ERROR(corruption_detected);
632
+ /* note : op4 supposed already verified within main loop */
633
+
634
+ /* finish bitStreams one by one */
635
+ HUF_decodeStreamX1(op1, &bitD1, opStart2, dt, dtLog);
636
+ HUF_decodeStreamX1(op2, &bitD2, opStart3, dt, dtLog);
637
+ HUF_decodeStreamX1(op3, &bitD3, opStart4, dt, dtLog);
638
+ HUF_decodeStreamX1(op4, &bitD4, oend, dt, dtLog);
639
+
640
+ /* check */
641
+ { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
642
+ if (!endCheck) return ERROR(corruption_detected); }
643
+
644
+ /* decoded size */
645
+ return dstSize;
646
+ }
647
+ }
648
+
649
+ #if HUF_NEED_BMI2_FUNCTION
650
+ static BMI2_TARGET_ATTRIBUTE
651
+ size_t HUF_decompress4X1_usingDTable_internal_bmi2(void* dst, size_t dstSize, void const* cSrc,
652
+ size_t cSrcSize, HUF_DTable const* DTable) {
653
+ return HUF_decompress4X1_usingDTable_internal_body(dst, dstSize, cSrc, cSrcSize, DTable);
654
+ }
655
+ #endif
656
+
657
+ #if HUF_NEED_DEFAULT_FUNCTION
658
+ static
659
+ size_t HUF_decompress4X1_usingDTable_internal_default(void* dst, size_t dstSize, void const* cSrc,
660
+ size_t cSrcSize, HUF_DTable const* DTable) {
661
+ return HUF_decompress4X1_usingDTable_internal_body(dst, dstSize, cSrc, cSrcSize, DTable);
662
+ }
663
+ #endif
664
+
665
+ #if ZSTD_ENABLE_ASM_X86_64_BMI2
666
+
667
+ HUF_ASM_DECL void HUF_decompress4X1_usingDTable_internal_bmi2_asm_loop(HUF_DecompressAsmArgs* args) ZSTDLIB_HIDDEN;
668
+
669
+ static HUF_ASM_X86_64_BMI2_ATTRS
670
+ size_t
671
+ HUF_decompress4X1_usingDTable_internal_bmi2_asm(
672
+ void* dst, size_t dstSize,
673
+ const void* cSrc, size_t cSrcSize,
674
+ const HUF_DTable* DTable)
675
+ {
676
+ void const* dt = DTable + 1;
677
+ const BYTE* const iend = (const BYTE*)cSrc + 6;
678
+ BYTE* const oend = (BYTE*)dst + dstSize;
679
+ HUF_DecompressAsmArgs args;
680
+ {
681
+ size_t const ret = HUF_DecompressAsmArgs_init(&args, dst, dstSize, cSrc, cSrcSize, DTable);
682
+ FORWARD_IF_ERROR(ret, "Failed to init asm args");
683
+ if (ret != 0)
684
+ return HUF_decompress4X1_usingDTable_internal_bmi2(dst, dstSize, cSrc, cSrcSize, DTable);
685
+ }
686
+
687
+ assert(args.ip[0] >= args.ilimit);
688
+ HUF_decompress4X1_usingDTable_internal_bmi2_asm_loop(&args);
689
+
690
+ /* Our loop guarantees that ip[] >= ilimit and that we haven't
691
+ * overwritten any op[].
692
+ */
693
+ assert(args.ip[0] >= iend);
694
+ assert(args.ip[1] >= iend);
695
+ assert(args.ip[2] >= iend);
696
+ assert(args.ip[3] >= iend);
697
+ assert(args.op[3] <= oend);
698
+ (void)iend;
699
+
700
+ /* finish bit streams one by one. */
701
+ {
702
+ size_t const segmentSize = (dstSize+3) / 4;
703
+ BYTE* segmentEnd = (BYTE*)dst;
704
+ int i;
705
+ for (i = 0; i < 4; ++i) {
706
+ BIT_DStream_t bit;
707
+ if (segmentSize <= (size_t)(oend - segmentEnd))
708
+ segmentEnd += segmentSize;
709
+ else
710
+ segmentEnd = oend;
711
+ FORWARD_IF_ERROR(HUF_initRemainingDStream(&bit, &args, i, segmentEnd), "corruption");
712
+ /* Decompress and validate that we've produced exactly the expected length. */
713
+ args.op[i] += HUF_decodeStreamX1(args.op[i], &bit, segmentEnd, (HUF_DEltX1 const*)dt, HUF_DECODER_FAST_TABLELOG);
714
+ if (args.op[i] != segmentEnd) return ERROR(corruption_detected);
715
+ }
716
+ }
717
+
718
+ /* decoded size */
719
+ return dstSize;
720
+ }
721
+ #endif /* ZSTD_ENABLE_ASM_X86_64_BMI2 */
722
+
723
+ typedef size_t (*HUF_decompress_usingDTable_t)(void *dst, size_t dstSize,
724
+ const void *cSrc,
725
+ size_t cSrcSize,
726
+ const HUF_DTable *DTable);
727
+
728
+ HUF_DGEN(HUF_decompress1X1_usingDTable_internal)
729
+
730
+ static size_t HUF_decompress4X1_usingDTable_internal(void* dst, size_t dstSize, void const* cSrc,
731
+ size_t cSrcSize, HUF_DTable const* DTable, int bmi2)
732
+ {
733
+ #if DYNAMIC_BMI2
734
+ if (bmi2) {
735
+ # if ZSTD_ENABLE_ASM_X86_64_BMI2
736
+ return HUF_decompress4X1_usingDTable_internal_bmi2_asm(dst, dstSize, cSrc, cSrcSize, DTable);
737
+ # else
738
+ return HUF_decompress4X1_usingDTable_internal_bmi2(dst, dstSize, cSrc, cSrcSize, DTable);
739
+ # endif
740
+ }
741
+ #else
742
+ (void)bmi2;
743
+ #endif
744
+
745
+ #if ZSTD_ENABLE_ASM_X86_64_BMI2 && defined(__BMI2__)
746
+ return HUF_decompress4X1_usingDTable_internal_bmi2_asm(dst, dstSize, cSrc, cSrcSize, DTable);
747
+ #else
748
+ return HUF_decompress4X1_usingDTable_internal_default(dst, dstSize, cSrc, cSrcSize, DTable);
749
+ #endif
750
+ }
751
+
752
+
753
+ size_t HUF_decompress1X1_usingDTable(
754
+ void* dst, size_t dstSize,
755
+ const void* cSrc, size_t cSrcSize,
756
+ const HUF_DTable* DTable)
757
+ {
758
+ DTableDesc dtd = HUF_getDTableDesc(DTable);
759
+ if (dtd.tableType != 0) return ERROR(GENERIC);
760
+ return HUF_decompress1X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
761
+ }
762
+
763
+ size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
764
+ const void* cSrc, size_t cSrcSize,
765
+ void* workSpace, size_t wkspSize)
766
+ {
767
+ const BYTE* ip = (const BYTE*) cSrc;
768
+
769
+ size_t const hSize = HUF_readDTableX1_wksp(DCtx, cSrc, cSrcSize, workSpace, wkspSize);
770
+ if (HUF_isError(hSize)) return hSize;
771
+ if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
772
+ ip += hSize; cSrcSize -= hSize;
773
+
774
+ return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0);
775
+ }
776
+
777
+
778
+ size_t HUF_decompress4X1_usingDTable(
779
+ void* dst, size_t dstSize,
780
+ const void* cSrc, size_t cSrcSize,
781
+ const HUF_DTable* DTable)
782
+ {
783
+ DTableDesc dtd = HUF_getDTableDesc(DTable);
784
+ if (dtd.tableType != 0) return ERROR(GENERIC);
785
+ return HUF_decompress4X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
786
+ }
787
+
788
+ static size_t HUF_decompress4X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize,
789
+ const void* cSrc, size_t cSrcSize,
790
+ void* workSpace, size_t wkspSize, int bmi2)
791
+ {
792
+ const BYTE* ip = (const BYTE*) cSrc;
793
+
794
+ size_t const hSize = HUF_readDTableX1_wksp_bmi2(dctx, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
795
+ if (HUF_isError(hSize)) return hSize;
796
+ if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
797
+ ip += hSize; cSrcSize -= hSize;
798
+
799
+ return HUF_decompress4X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
800
+ }
801
+
802
+ size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
803
+ const void* cSrc, size_t cSrcSize,
804
+ void* workSpace, size_t wkspSize)
805
+ {
806
+ return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, 0);
807
+ }
808
+
809
+
810
+ #endif /* HUF_FORCE_DECOMPRESS_X2 */
811
+
812
+
813
+ #ifndef HUF_FORCE_DECOMPRESS_X1
814
+
815
+ /* *************************/
816
+ /* double-symbols decoding */
817
+ /* *************************/
818
+
819
+ typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX2; /* double-symbols decoding */
820
+ typedef struct { BYTE symbol; } sortedSymbol_t;
821
+ typedef U32 rankValCol_t[HUF_TABLELOG_MAX + 1];
822
+ typedef rankValCol_t rankVal_t[HUF_TABLELOG_MAX];
823
+
824
+ /**
825
+ * Constructs a HUF_DEltX2 in a U32.
826
+ */
827
+ static U32 HUF_buildDEltX2U32(U32 symbol, U32 nbBits, U32 baseSeq, int level)
828
+ {
829
+ U32 seq;
830
+ DEBUG_STATIC_ASSERT(offsetof(HUF_DEltX2, sequence) == 0);
831
+ DEBUG_STATIC_ASSERT(offsetof(HUF_DEltX2, nbBits) == 2);
832
+ DEBUG_STATIC_ASSERT(offsetof(HUF_DEltX2, length) == 3);
833
+ DEBUG_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(U32));
834
+ if (MEM_isLittleEndian()) {
835
+ seq = level == 1 ? symbol : (baseSeq + (symbol << 8));
836
+ return seq + (nbBits << 16) + ((U32)level << 24);
837
+ } else {
838
+ seq = level == 1 ? (symbol << 8) : ((baseSeq << 8) + symbol);
839
+ return (seq << 16) + (nbBits << 8) + (U32)level;
840
+ }
841
+ }
842
+
843
+ /**
844
+ * Constructs a HUF_DEltX2.
845
+ */
846
+ static HUF_DEltX2 HUF_buildDEltX2(U32 symbol, U32 nbBits, U32 baseSeq, int level)
847
+ {
848
+ HUF_DEltX2 DElt;
849
+ U32 const val = HUF_buildDEltX2U32(symbol, nbBits, baseSeq, level);
850
+ DEBUG_STATIC_ASSERT(sizeof(DElt) == sizeof(val));
851
+ ZSTD_memcpy(&DElt, &val, sizeof(val));
852
+ return DElt;
853
+ }
854
+
855
+ /**
856
+ * Constructs 2 HUF_DEltX2s and packs them into a U64.
857
+ */
858
+ static U64 HUF_buildDEltX2U64(U32 symbol, U32 nbBits, U16 baseSeq, int level)
859
+ {
860
+ U32 DElt = HUF_buildDEltX2U32(symbol, nbBits, baseSeq, level);
861
+ return (U64)DElt + ((U64)DElt << 32);
862
+ }
863
+
864
+ /**
865
+ * Fills the DTable rank with all the symbols from [begin, end) that are each
866
+ * nbBits long.
867
+ *
868
+ * @param DTableRank The start of the rank in the DTable.
869
+ * @param begin The first symbol to fill (inclusive).
870
+ * @param end The last symbol to fill (exclusive).
871
+ * @param nbBits Each symbol is nbBits long.
872
+ * @param tableLog The table log.
873
+ * @param baseSeq If level == 1 { 0 } else { the first level symbol }
874
+ * @param level The level in the table. Must be 1 or 2.
875
+ */
876
+ static void HUF_fillDTableX2ForWeight(
877
+ HUF_DEltX2* DTableRank,
878
+ sortedSymbol_t const* begin, sortedSymbol_t const* end,
879
+ U32 nbBits, U32 tableLog,
880
+ U16 baseSeq, int const level)
881
+ {
882
+ U32 const length = 1U << ((tableLog - nbBits) & 0x1F /* quiet static-analyzer */);
883
+ const sortedSymbol_t* ptr;
884
+ assert(level >= 1 && level <= 2);
885
+ switch (length) {
886
+ case 1:
887
+ for (ptr = begin; ptr != end; ++ptr) {
888
+ HUF_DEltX2 const DElt = HUF_buildDEltX2(ptr->symbol, nbBits, baseSeq, level);
889
+ *DTableRank++ = DElt;
890
+ }
891
+ break;
892
+ case 2:
893
+ for (ptr = begin; ptr != end; ++ptr) {
894
+ HUF_DEltX2 const DElt = HUF_buildDEltX2(ptr->symbol, nbBits, baseSeq, level);
895
+ DTableRank[0] = DElt;
896
+ DTableRank[1] = DElt;
897
+ DTableRank += 2;
898
+ }
899
+ break;
900
+ case 4:
901
+ for (ptr = begin; ptr != end; ++ptr) {
902
+ U64 const DEltX2 = HUF_buildDEltX2U64(ptr->symbol, nbBits, baseSeq, level);
903
+ ZSTD_memcpy(DTableRank + 0, &DEltX2, sizeof(DEltX2));
904
+ ZSTD_memcpy(DTableRank + 2, &DEltX2, sizeof(DEltX2));
905
+ DTableRank += 4;
906
+ }
907
+ break;
908
+ case 8:
909
+ for (ptr = begin; ptr != end; ++ptr) {
910
+ U64 const DEltX2 = HUF_buildDEltX2U64(ptr->symbol, nbBits, baseSeq, level);
911
+ ZSTD_memcpy(DTableRank + 0, &DEltX2, sizeof(DEltX2));
912
+ ZSTD_memcpy(DTableRank + 2, &DEltX2, sizeof(DEltX2));
913
+ ZSTD_memcpy(DTableRank + 4, &DEltX2, sizeof(DEltX2));
914
+ ZSTD_memcpy(DTableRank + 6, &DEltX2, sizeof(DEltX2));
915
+ DTableRank += 8;
916
+ }
917
+ break;
918
+ default:
919
+ for (ptr = begin; ptr != end; ++ptr) {
920
+ U64 const DEltX2 = HUF_buildDEltX2U64(ptr->symbol, nbBits, baseSeq, level);
921
+ HUF_DEltX2* const DTableRankEnd = DTableRank + length;
922
+ for (; DTableRank != DTableRankEnd; DTableRank += 8) {
923
+ ZSTD_memcpy(DTableRank + 0, &DEltX2, sizeof(DEltX2));
924
+ ZSTD_memcpy(DTableRank + 2, &DEltX2, sizeof(DEltX2));
925
+ ZSTD_memcpy(DTableRank + 4, &DEltX2, sizeof(DEltX2));
926
+ ZSTD_memcpy(DTableRank + 6, &DEltX2, sizeof(DEltX2));
927
+ }
928
+ }
929
+ break;
930
+ }
931
+ }
932
+
933
+ /* HUF_fillDTableX2Level2() :
934
+ * `rankValOrigin` must be a table of at least (HUF_TABLELOG_MAX + 1) U32 */
935
+ static void HUF_fillDTableX2Level2(HUF_DEltX2* DTable, U32 targetLog, const U32 consumedBits,
936
+ const U32* rankVal, const int minWeight, const int maxWeight1,
937
+ const sortedSymbol_t* sortedSymbols, U32 const* rankStart,
938
+ U32 nbBitsBaseline, U16 baseSeq)
939
+ {
940
+ /* Fill skipped values (all positions up to rankVal[minWeight]).
941
+ * These are positions only get a single symbol because the combined weight
942
+ * is too large.
943
+ */
944
+ if (minWeight>1) {
945
+ U32 const length = 1U << ((targetLog - consumedBits) & 0x1F /* quiet static-analyzer */);
946
+ U64 const DEltX2 = HUF_buildDEltX2U64(baseSeq, consumedBits, /* baseSeq */ 0, /* level */ 1);
947
+ int const skipSize = rankVal[minWeight];
948
+ assert(length > 1);
949
+ assert((U32)skipSize < length);
950
+ switch (length) {
951
+ case 2:
952
+ assert(skipSize == 1);
953
+ ZSTD_memcpy(DTable, &DEltX2, sizeof(DEltX2));
954
+ break;
955
+ case 4:
956
+ assert(skipSize <= 4);
957
+ ZSTD_memcpy(DTable + 0, &DEltX2, sizeof(DEltX2));
958
+ ZSTD_memcpy(DTable + 2, &DEltX2, sizeof(DEltX2));
959
+ break;
960
+ default:
961
+ {
962
+ int i;
963
+ for (i = 0; i < skipSize; i += 8) {
964
+ ZSTD_memcpy(DTable + i + 0, &DEltX2, sizeof(DEltX2));
965
+ ZSTD_memcpy(DTable + i + 2, &DEltX2, sizeof(DEltX2));
966
+ ZSTD_memcpy(DTable + i + 4, &DEltX2, sizeof(DEltX2));
967
+ ZSTD_memcpy(DTable + i + 6, &DEltX2, sizeof(DEltX2));
968
+ }
969
+ }
970
+ }
971
+ }
972
+
973
+ /* Fill each of the second level symbols by weight. */
974
+ {
975
+ int w;
976
+ for (w = minWeight; w < maxWeight1; ++w) {
977
+ int const begin = rankStart[w];
978
+ int const end = rankStart[w+1];
979
+ U32 const nbBits = nbBitsBaseline - w;
980
+ U32 const totalBits = nbBits + consumedBits;
981
+ HUF_fillDTableX2ForWeight(
982
+ DTable + rankVal[w],
983
+ sortedSymbols + begin, sortedSymbols + end,
984
+ totalBits, targetLog,
985
+ baseSeq, /* level */ 2);
986
+ }
987
+ }
988
+ }
989
+
990
+ static void HUF_fillDTableX2(HUF_DEltX2* DTable, const U32 targetLog,
991
+ const sortedSymbol_t* sortedList,
992
+ const U32* rankStart, rankVal_t rankValOrigin, const U32 maxWeight,
993
+ const U32 nbBitsBaseline)
994
+ {
995
+ U32* const rankVal = rankValOrigin[0];
996
+ const int scaleLog = nbBitsBaseline - targetLog; /* note : targetLog >= srcLog, hence scaleLog <= 1 */
997
+ const U32 minBits = nbBitsBaseline - maxWeight;
998
+ int w;
999
+ int const wEnd = (int)maxWeight + 1;
1000
+
1001
+ /* Fill DTable in order of weight. */
1002
+ for (w = 1; w < wEnd; ++w) {
1003
+ int const begin = (int)rankStart[w];
1004
+ int const end = (int)rankStart[w+1];
1005
+ U32 const nbBits = nbBitsBaseline - w;
1006
+
1007
+ if (targetLog-nbBits >= minBits) {
1008
+ /* Enough room for a second symbol. */
1009
+ int start = rankVal[w];
1010
+ U32 const length = 1U << ((targetLog - nbBits) & 0x1F /* quiet static-analyzer */);
1011
+ int minWeight = nbBits + scaleLog;
1012
+ int s;
1013
+ if (minWeight < 1) minWeight = 1;
1014
+ /* Fill the DTable for every symbol of weight w.
1015
+ * These symbols get at least 1 second symbol.
1016
+ */
1017
+ for (s = begin; s != end; ++s) {
1018
+ HUF_fillDTableX2Level2(
1019
+ DTable + start, targetLog, nbBits,
1020
+ rankValOrigin[nbBits], minWeight, wEnd,
1021
+ sortedList, rankStart,
1022
+ nbBitsBaseline, sortedList[s].symbol);
1023
+ start += length;
1024
+ }
1025
+ } else {
1026
+ /* Only a single symbol. */
1027
+ HUF_fillDTableX2ForWeight(
1028
+ DTable + rankVal[w],
1029
+ sortedList + begin, sortedList + end,
1030
+ nbBits, targetLog,
1031
+ /* baseSeq */ 0, /* level */ 1);
1032
+ }
1033
+ }
1034
+ }
1035
+
1036
+ typedef struct {
1037
+ rankValCol_t rankVal[HUF_TABLELOG_MAX];
1038
+ U32 rankStats[HUF_TABLELOG_MAX + 1];
1039
+ U32 rankStart0[HUF_TABLELOG_MAX + 3];
1040
+ sortedSymbol_t sortedSymbol[HUF_SYMBOLVALUE_MAX + 1];
1041
+ BYTE weightList[HUF_SYMBOLVALUE_MAX + 1];
1042
+ U32 calleeWksp[HUF_READ_STATS_WORKSPACE_SIZE_U32];
1043
+ } HUF_ReadDTableX2_Workspace;
1044
+
1045
+ size_t HUF_readDTableX2_wksp(HUF_DTable* DTable,
1046
+ const void* src, size_t srcSize,
1047
+ void* workSpace, size_t wkspSize)
1048
+ {
1049
+ return HUF_readDTableX2_wksp_bmi2(DTable, src, srcSize, workSpace, wkspSize, /* bmi2 */ 0);
1050
+ }
1051
+
1052
+ size_t HUF_readDTableX2_wksp_bmi2(HUF_DTable* DTable,
1053
+ const void* src, size_t srcSize,
1054
+ void* workSpace, size_t wkspSize, int bmi2)
1055
+ {
1056
+ U32 tableLog, maxW, nbSymbols;
1057
+ DTableDesc dtd = HUF_getDTableDesc(DTable);
1058
+ U32 maxTableLog = dtd.maxTableLog;
1059
+ size_t iSize;
1060
+ void* dtPtr = DTable+1; /* force compiler to avoid strict-aliasing */
1061
+ HUF_DEltX2* const dt = (HUF_DEltX2*)dtPtr;
1062
+ U32 *rankStart;
1063
+
1064
+ HUF_ReadDTableX2_Workspace* const wksp = (HUF_ReadDTableX2_Workspace*)workSpace;
1065
+
1066
+ if (sizeof(*wksp) > wkspSize) return ERROR(GENERIC);
1067
+
1068
+ rankStart = wksp->rankStart0 + 1;
1069
+ ZSTD_memset(wksp->rankStats, 0, sizeof(wksp->rankStats));
1070
+ ZSTD_memset(wksp->rankStart0, 0, sizeof(wksp->rankStart0));
1071
+
1072
+ DEBUG_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(HUF_DTable)); /* if compiler fails here, assertion is wrong */
1073
+ if (maxTableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
1074
+ /* ZSTD_memset(weightList, 0, sizeof(weightList)); */ /* is not necessary, even though some analyzer complain ... */
1075
+
1076
+ iSize = HUF_readStats_wksp(wksp->weightList, HUF_SYMBOLVALUE_MAX + 1, wksp->rankStats, &nbSymbols, &tableLog, src, srcSize, wksp->calleeWksp, sizeof(wksp->calleeWksp), bmi2);
1077
+ if (HUF_isError(iSize)) return iSize;
1078
+
1079
+ /* check result */
1080
+ if (tableLog > maxTableLog) return ERROR(tableLog_tooLarge); /* DTable can't fit code depth */
1081
+ if (tableLog <= HUF_DECODER_FAST_TABLELOG && maxTableLog > HUF_DECODER_FAST_TABLELOG) maxTableLog = HUF_DECODER_FAST_TABLELOG;
1082
+
1083
+ /* find maxWeight */
1084
+ for (maxW = tableLog; wksp->rankStats[maxW]==0; maxW--) {} /* necessarily finds a solution before 0 */
1085
+
1086
+ /* Get start index of each weight */
1087
+ { U32 w, nextRankStart = 0;
1088
+ for (w=1; w<maxW+1; w++) {
1089
+ U32 curr = nextRankStart;
1090
+ nextRankStart += wksp->rankStats[w];
1091
+ rankStart[w] = curr;
1092
+ }
1093
+ rankStart[0] = nextRankStart; /* put all 0w symbols at the end of sorted list*/
1094
+ rankStart[maxW+1] = nextRankStart;
1095
+ }
1096
+
1097
+ /* sort symbols by weight */
1098
+ { U32 s;
1099
+ for (s=0; s<nbSymbols; s++) {
1100
+ U32 const w = wksp->weightList[s];
1101
+ U32 const r = rankStart[w]++;
1102
+ wksp->sortedSymbol[r].symbol = (BYTE)s;
1103
+ }
1104
+ rankStart[0] = 0; /* forget 0w symbols; this is beginning of weight(1) */
1105
+ }
1106
+
1107
+ /* Build rankVal */
1108
+ { U32* const rankVal0 = wksp->rankVal[0];
1109
+ { int const rescale = (maxTableLog-tableLog) - 1; /* tableLog <= maxTableLog */
1110
+ U32 nextRankVal = 0;
1111
+ U32 w;
1112
+ for (w=1; w<maxW+1; w++) {
1113
+ U32 curr = nextRankVal;
1114
+ nextRankVal += wksp->rankStats[w] << (w+rescale);
1115
+ rankVal0[w] = curr;
1116
+ } }
1117
+ { U32 const minBits = tableLog+1 - maxW;
1118
+ U32 consumed;
1119
+ for (consumed = minBits; consumed < maxTableLog - minBits + 1; consumed++) {
1120
+ U32* const rankValPtr = wksp->rankVal[consumed];
1121
+ U32 w;
1122
+ for (w = 1; w < maxW+1; w++) {
1123
+ rankValPtr[w] = rankVal0[w] >> consumed;
1124
+ } } } }
1125
+
1126
+ HUF_fillDTableX2(dt, maxTableLog,
1127
+ wksp->sortedSymbol,
1128
+ wksp->rankStart0, wksp->rankVal, maxW,
1129
+ tableLog+1);
1130
+
1131
+ dtd.tableLog = (BYTE)maxTableLog;
1132
+ dtd.tableType = 1;
1133
+ ZSTD_memcpy(DTable, &dtd, sizeof(dtd));
1134
+ return iSize;
1135
+ }
1136
+
1137
+
1138
+ FORCE_INLINE_TEMPLATE U32
1139
+ HUF_decodeSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog)
1140
+ {
1141
+ size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
1142
+ ZSTD_memcpy(op, &dt[val].sequence, 2);
1143
+ BIT_skipBits(DStream, dt[val].nbBits);
1144
+ return dt[val].length;
1145
+ }
1146
+
1147
+ FORCE_INLINE_TEMPLATE U32
1148
+ HUF_decodeLastSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog)
1149
+ {
1150
+ size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
1151
+ ZSTD_memcpy(op, &dt[val].sequence, 1);
1152
+ if (dt[val].length==1) {
1153
+ BIT_skipBits(DStream, dt[val].nbBits);
1154
+ } else {
1155
+ if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) {
1156
+ BIT_skipBits(DStream, dt[val].nbBits);
1157
+ if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8))
1158
+ /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */
1159
+ DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8);
1160
+ }
1161
+ }
1162
+ return 1;
1163
+ }
1164
+
1165
+ #define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \
1166
+ ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
1167
+
1168
+ #define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \
1169
+ if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
1170
+ ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
1171
+
1172
+ #define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \
1173
+ if (MEM_64bits()) \
1174
+ ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
1175
+
1176
+ HINT_INLINE size_t
1177
+ HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd,
1178
+ const HUF_DEltX2* const dt, const U32 dtLog)
1179
+ {
1180
+ BYTE* const pStart = p;
1181
+
1182
+ /* up to 8 symbols at a time */
1183
+ if ((size_t)(pEnd - p) >= sizeof(bitDPtr->bitContainer)) {
1184
+ if (dtLog <= 11 && MEM_64bits()) {
1185
+ /* up to 10 symbols at a time */
1186
+ while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-9)) {
1187
+ HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
1188
+ HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
1189
+ HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
1190
+ HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
1191
+ HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
1192
+ }
1193
+ } else {
1194
+ /* up to 8 symbols at a time */
1195
+ while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-(sizeof(bitDPtr->bitContainer)-1))) {
1196
+ HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
1197
+ HUF_DECODE_SYMBOLX2_1(p, bitDPtr);
1198
+ HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
1199
+ HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
1200
+ }
1201
+ }
1202
+ } else {
1203
+ BIT_reloadDStream(bitDPtr);
1204
+ }
1205
+
1206
+ /* closer to end : up to 2 symbols at a time */
1207
+ if ((size_t)(pEnd - p) >= 2) {
1208
+ while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p <= pEnd-2))
1209
+ HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
1210
+
1211
+ while (p <= pEnd-2)
1212
+ HUF_DECODE_SYMBOLX2_0(p, bitDPtr); /* no need to reload : reached the end of DStream */
1213
+ }
1214
+
1215
+ if (p < pEnd)
1216
+ p += HUF_decodeLastSymbolX2(p, bitDPtr, dt, dtLog);
1217
+
1218
+ return p-pStart;
1219
+ }
1220
+
1221
+ FORCE_INLINE_TEMPLATE size_t
1222
+ HUF_decompress1X2_usingDTable_internal_body(
1223
+ void* dst, size_t dstSize,
1224
+ const void* cSrc, size_t cSrcSize,
1225
+ const HUF_DTable* DTable)
1226
+ {
1227
+ BIT_DStream_t bitD;
1228
+
1229
+ /* Init */
1230
+ CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) );
1231
+
1232
+ /* decode */
1233
+ { BYTE* const ostart = (BYTE*) dst;
1234
+ BYTE* const oend = ostart + dstSize;
1235
+ const void* const dtPtr = DTable+1; /* force compiler to not use strict-aliasing */
1236
+ const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
1237
+ DTableDesc const dtd = HUF_getDTableDesc(DTable);
1238
+ HUF_decodeStreamX2(ostart, &bitD, oend, dt, dtd.tableLog);
1239
+ }
1240
+
1241
+ /* check */
1242
+ if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
1243
+
1244
+ /* decoded size */
1245
+ return dstSize;
1246
+ }
1247
+ FORCE_INLINE_TEMPLATE size_t
1248
+ HUF_decompress4X2_usingDTable_internal_body(
1249
+ void* dst, size_t dstSize,
1250
+ const void* cSrc, size_t cSrcSize,
1251
+ const HUF_DTable* DTable)
1252
+ {
1253
+ if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */
1254
+
1255
+ { const BYTE* const istart = (const BYTE*) cSrc;
1256
+ BYTE* const ostart = (BYTE*) dst;
1257
+ BYTE* const oend = ostart + dstSize;
1258
+ BYTE* const olimit = oend - (sizeof(size_t)-1);
1259
+ const void* const dtPtr = DTable+1;
1260
+ const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
1261
+
1262
+ /* Init */
1263
+ BIT_DStream_t bitD1;
1264
+ BIT_DStream_t bitD2;
1265
+ BIT_DStream_t bitD3;
1266
+ BIT_DStream_t bitD4;
1267
+ size_t const length1 = MEM_readLE16(istart);
1268
+ size_t const length2 = MEM_readLE16(istart+2);
1269
+ size_t const length3 = MEM_readLE16(istart+4);
1270
+ size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
1271
+ const BYTE* const istart1 = istart + 6; /* jumpTable */
1272
+ const BYTE* const istart2 = istart1 + length1;
1273
+ const BYTE* const istart3 = istart2 + length2;
1274
+ const BYTE* const istart4 = istart3 + length3;
1275
+ size_t const segmentSize = (dstSize+3) / 4;
1276
+ BYTE* const opStart2 = ostart + segmentSize;
1277
+ BYTE* const opStart3 = opStart2 + segmentSize;
1278
+ BYTE* const opStart4 = opStart3 + segmentSize;
1279
+ BYTE* op1 = ostart;
1280
+ BYTE* op2 = opStart2;
1281
+ BYTE* op3 = opStart3;
1282
+ BYTE* op4 = opStart4;
1283
+ U32 endSignal = 1;
1284
+ DTableDesc const dtd = HUF_getDTableDesc(DTable);
1285
+ U32 const dtLog = dtd.tableLog;
1286
+
1287
+ if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
1288
+ if (opStart4 > oend) return ERROR(corruption_detected); /* overflow */
1289
+ CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
1290
+ CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
1291
+ CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
1292
+ CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
1293
+
1294
+ /* 16-32 symbols per loop (4-8 symbols per stream) */
1295
+ if ((size_t)(oend - op4) >= sizeof(size_t)) {
1296
+ for ( ; (endSignal) & (op4 < olimit); ) {
1297
+ #if defined(__clang__) && (defined(__x86_64__) || defined(__i386__))
1298
+ HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
1299
+ HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
1300
+ HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
1301
+ HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
1302
+ HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
1303
+ HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
1304
+ HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
1305
+ HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
1306
+ endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished;
1307
+ endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished;
1308
+ HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
1309
+ HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
1310
+ HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
1311
+ HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
1312
+ HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
1313
+ HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
1314
+ HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
1315
+ HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
1316
+ endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished;
1317
+ endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished;
1318
+ #else
1319
+ HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
1320
+ HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
1321
+ HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
1322
+ HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
1323
+ HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
1324
+ HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
1325
+ HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
1326
+ HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
1327
+ HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
1328
+ HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
1329
+ HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
1330
+ HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
1331
+ HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
1332
+ HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
1333
+ HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
1334
+ HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
1335
+ endSignal = (U32)LIKELY((U32)
1336
+ (BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished)
1337
+ & (BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished)
1338
+ & (BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished)
1339
+ & (BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished));
1340
+ #endif
1341
+ }
1342
+ }
1343
+
1344
+ /* check corruption */
1345
+ if (op1 > opStart2) return ERROR(corruption_detected);
1346
+ if (op2 > opStart3) return ERROR(corruption_detected);
1347
+ if (op3 > opStart4) return ERROR(corruption_detected);
1348
+ /* note : op4 already verified within main loop */
1349
+
1350
+ /* finish bitStreams one by one */
1351
+ HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog);
1352
+ HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog);
1353
+ HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog);
1354
+ HUF_decodeStreamX2(op4, &bitD4, oend, dt, dtLog);
1355
+
1356
+ /* check */
1357
+ { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
1358
+ if (!endCheck) return ERROR(corruption_detected); }
1359
+
1360
+ /* decoded size */
1361
+ return dstSize;
1362
+ }
1363
+ }
1364
+
1365
+ #if HUF_NEED_BMI2_FUNCTION
1366
+ static BMI2_TARGET_ATTRIBUTE
1367
+ size_t HUF_decompress4X2_usingDTable_internal_bmi2(void* dst, size_t dstSize, void const* cSrc,
1368
+ size_t cSrcSize, HUF_DTable const* DTable) {
1369
+ return HUF_decompress4X2_usingDTable_internal_body(dst, dstSize, cSrc, cSrcSize, DTable);
1370
+ }
1371
+ #endif
1372
+
1373
+ #if HUF_NEED_DEFAULT_FUNCTION
1374
+ static
1375
+ size_t HUF_decompress4X2_usingDTable_internal_default(void* dst, size_t dstSize, void const* cSrc,
1376
+ size_t cSrcSize, HUF_DTable const* DTable) {
1377
+ return HUF_decompress4X2_usingDTable_internal_body(dst, dstSize, cSrc, cSrcSize, DTable);
1378
+ }
1379
+ #endif
1380
+
1381
+ #if ZSTD_ENABLE_ASM_X86_64_BMI2
1382
+
1383
+ HUF_ASM_DECL void HUF_decompress4X2_usingDTable_internal_bmi2_asm_loop(HUF_DecompressAsmArgs* args) ZSTDLIB_HIDDEN;
1384
+
1385
+ static HUF_ASM_X86_64_BMI2_ATTRS size_t
1386
+ HUF_decompress4X2_usingDTable_internal_bmi2_asm(
1387
+ void* dst, size_t dstSize,
1388
+ const void* cSrc, size_t cSrcSize,
1389
+ const HUF_DTable* DTable) {
1390
+ void const* dt = DTable + 1;
1391
+ const BYTE* const iend = (const BYTE*)cSrc + 6;
1392
+ BYTE* const oend = (BYTE*)dst + dstSize;
1393
+ HUF_DecompressAsmArgs args;
1394
+ {
1395
+ size_t const ret = HUF_DecompressAsmArgs_init(&args, dst, dstSize, cSrc, cSrcSize, DTable);
1396
+ FORWARD_IF_ERROR(ret, "Failed to init asm args");
1397
+ if (ret != 0)
1398
+ return HUF_decompress4X2_usingDTable_internal_bmi2(dst, dstSize, cSrc, cSrcSize, DTable);
1399
+ }
1400
+
1401
+ assert(args.ip[0] >= args.ilimit);
1402
+ HUF_decompress4X2_usingDTable_internal_bmi2_asm_loop(&args);
1403
+
1404
+ /* note : op4 already verified within main loop */
1405
+ assert(args.ip[0] >= iend);
1406
+ assert(args.ip[1] >= iend);
1407
+ assert(args.ip[2] >= iend);
1408
+ assert(args.ip[3] >= iend);
1409
+ assert(args.op[3] <= oend);
1410
+ (void)iend;
1411
+
1412
+ /* finish bitStreams one by one */
1413
+ {
1414
+ size_t const segmentSize = (dstSize+3) / 4;
1415
+ BYTE* segmentEnd = (BYTE*)dst;
1416
+ int i;
1417
+ for (i = 0; i < 4; ++i) {
1418
+ BIT_DStream_t bit;
1419
+ if (segmentSize <= (size_t)(oend - segmentEnd))
1420
+ segmentEnd += segmentSize;
1421
+ else
1422
+ segmentEnd = oend;
1423
+ FORWARD_IF_ERROR(HUF_initRemainingDStream(&bit, &args, i, segmentEnd), "corruption");
1424
+ args.op[i] += HUF_decodeStreamX2(args.op[i], &bit, segmentEnd, (HUF_DEltX2 const*)dt, HUF_DECODER_FAST_TABLELOG);
1425
+ if (args.op[i] != segmentEnd)
1426
+ return ERROR(corruption_detected);
1427
+ }
1428
+ }
1429
+
1430
+ /* decoded size */
1431
+ return dstSize;
1432
+ }
1433
+ #endif /* ZSTD_ENABLE_ASM_X86_64_BMI2 */
1434
+
1435
+ static size_t HUF_decompress4X2_usingDTable_internal(void* dst, size_t dstSize, void const* cSrc,
1436
+ size_t cSrcSize, HUF_DTable const* DTable, int bmi2)
1437
+ {
1438
+ #if DYNAMIC_BMI2
1439
+ if (bmi2) {
1440
+ # if ZSTD_ENABLE_ASM_X86_64_BMI2
1441
+ return HUF_decompress4X2_usingDTable_internal_bmi2_asm(dst, dstSize, cSrc, cSrcSize, DTable);
1442
+ # else
1443
+ return HUF_decompress4X2_usingDTable_internal_bmi2(dst, dstSize, cSrc, cSrcSize, DTable);
1444
+ # endif
1445
+ }
1446
+ #else
1447
+ (void)bmi2;
1448
+ #endif
1449
+
1450
+ #if ZSTD_ENABLE_ASM_X86_64_BMI2 && defined(__BMI2__)
1451
+ return HUF_decompress4X2_usingDTable_internal_bmi2_asm(dst, dstSize, cSrc, cSrcSize, DTable);
1452
+ #else
1453
+ return HUF_decompress4X2_usingDTable_internal_default(dst, dstSize, cSrc, cSrcSize, DTable);
1454
+ #endif
1455
+ }
1456
+
1457
+ HUF_DGEN(HUF_decompress1X2_usingDTable_internal)
1458
+
1459
+ size_t HUF_decompress1X2_usingDTable(
1460
+ void* dst, size_t dstSize,
1461
+ const void* cSrc, size_t cSrcSize,
1462
+ const HUF_DTable* DTable)
1463
+ {
1464
+ DTableDesc dtd = HUF_getDTableDesc(DTable);
1465
+ if (dtd.tableType != 1) return ERROR(GENERIC);
1466
+ return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
1467
+ }
1468
+
1469
+ size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
1470
+ const void* cSrc, size_t cSrcSize,
1471
+ void* workSpace, size_t wkspSize)
1472
+ {
1473
+ const BYTE* ip = (const BYTE*) cSrc;
1474
+
1475
+ size_t const hSize = HUF_readDTableX2_wksp(DCtx, cSrc, cSrcSize,
1476
+ workSpace, wkspSize);
1477
+ if (HUF_isError(hSize)) return hSize;
1478
+ if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
1479
+ ip += hSize; cSrcSize -= hSize;
1480
+
1481
+ return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0);
1482
+ }
1483
+
1484
+
1485
+ size_t HUF_decompress4X2_usingDTable(
1486
+ void* dst, size_t dstSize,
1487
+ const void* cSrc, size_t cSrcSize,
1488
+ const HUF_DTable* DTable)
1489
+ {
1490
+ DTableDesc dtd = HUF_getDTableDesc(DTable);
1491
+ if (dtd.tableType != 1) return ERROR(GENERIC);
1492
+ return HUF_decompress4X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
1493
+ }
1494
+
1495
+ static size_t HUF_decompress4X2_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize,
1496
+ const void* cSrc, size_t cSrcSize,
1497
+ void* workSpace, size_t wkspSize, int bmi2)
1498
+ {
1499
+ const BYTE* ip = (const BYTE*) cSrc;
1500
+
1501
+ size_t hSize = HUF_readDTableX2_wksp(dctx, cSrc, cSrcSize,
1502
+ workSpace, wkspSize);
1503
+ if (HUF_isError(hSize)) return hSize;
1504
+ if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
1505
+ ip += hSize; cSrcSize -= hSize;
1506
+
1507
+ return HUF_decompress4X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
1508
+ }
1509
+
1510
+ size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
1511
+ const void* cSrc, size_t cSrcSize,
1512
+ void* workSpace, size_t wkspSize)
1513
+ {
1514
+ return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, /* bmi2 */ 0);
1515
+ }
1516
+
1517
+
1518
+ #endif /* HUF_FORCE_DECOMPRESS_X1 */
1519
+
1520
+
1521
+ /* ***********************************/
1522
+ /* Universal decompression selectors */
1523
+ /* ***********************************/
1524
+
1525
+ size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize,
1526
+ const void* cSrc, size_t cSrcSize,
1527
+ const HUF_DTable* DTable)
1528
+ {
1529
+ DTableDesc const dtd = HUF_getDTableDesc(DTable);
1530
+ #if defined(HUF_FORCE_DECOMPRESS_X1)
1531
+ (void)dtd;
1532
+ assert(dtd.tableType == 0);
1533
+ return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
1534
+ #elif defined(HUF_FORCE_DECOMPRESS_X2)
1535
+ (void)dtd;
1536
+ assert(dtd.tableType == 1);
1537
+ return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
1538
+ #else
1539
+ return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
1540
+ HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
1541
+ #endif
1542
+ }
1543
+
1544
+ size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize,
1545
+ const void* cSrc, size_t cSrcSize,
1546
+ const HUF_DTable* DTable)
1547
+ {
1548
+ DTableDesc const dtd = HUF_getDTableDesc(DTable);
1549
+ #if defined(HUF_FORCE_DECOMPRESS_X1)
1550
+ (void)dtd;
1551
+ assert(dtd.tableType == 0);
1552
+ return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
1553
+ #elif defined(HUF_FORCE_DECOMPRESS_X2)
1554
+ (void)dtd;
1555
+ assert(dtd.tableType == 1);
1556
+ return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
1557
+ #else
1558
+ return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
1559
+ HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
1560
+ #endif
1561
+ }
1562
+
1563
+
1564
+ #if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2)
1565
+ typedef struct { U32 tableTime; U32 decode256Time; } algo_time_t;
1566
+ static const algo_time_t algoTime[16 /* Quantization */][2 /* single, double */] =
1567
+ {
1568
+ /* single, double, quad */
1569
+ {{0,0}, {1,1}}, /* Q==0 : impossible */
1570
+ {{0,0}, {1,1}}, /* Q==1 : impossible */
1571
+ {{ 150,216}, { 381,119}}, /* Q == 2 : 12-18% */
1572
+ {{ 170,205}, { 514,112}}, /* Q == 3 : 18-25% */
1573
+ {{ 177,199}, { 539,110}}, /* Q == 4 : 25-32% */
1574
+ {{ 197,194}, { 644,107}}, /* Q == 5 : 32-38% */
1575
+ {{ 221,192}, { 735,107}}, /* Q == 6 : 38-44% */
1576
+ {{ 256,189}, { 881,106}}, /* Q == 7 : 44-50% */
1577
+ {{ 359,188}, {1167,109}}, /* Q == 8 : 50-56% */
1578
+ {{ 582,187}, {1570,114}}, /* Q == 9 : 56-62% */
1579
+ {{ 688,187}, {1712,122}}, /* Q ==10 : 62-69% */
1580
+ {{ 825,186}, {1965,136}}, /* Q ==11 : 69-75% */
1581
+ {{ 976,185}, {2131,150}}, /* Q ==12 : 75-81% */
1582
+ {{1180,186}, {2070,175}}, /* Q ==13 : 81-87% */
1583
+ {{1377,185}, {1731,202}}, /* Q ==14 : 87-93% */
1584
+ {{1412,185}, {1695,202}}, /* Q ==15 : 93-99% */
1585
+ };
1586
+ #endif
1587
+
1588
+ /** HUF_selectDecoder() :
1589
+ * Tells which decoder is likely to decode faster,
1590
+ * based on a set of pre-computed metrics.
1591
+ * @return : 0==HUF_decompress4X1, 1==HUF_decompress4X2 .
1592
+ * Assumption : 0 < dstSize <= 128 KB */
1593
+ U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize)
1594
+ {
1595
+ assert(dstSize > 0);
1596
+ assert(dstSize <= 128*1024);
1597
+ #if defined(HUF_FORCE_DECOMPRESS_X1)
1598
+ (void)dstSize;
1599
+ (void)cSrcSize;
1600
+ return 0;
1601
+ #elif defined(HUF_FORCE_DECOMPRESS_X2)
1602
+ (void)dstSize;
1603
+ (void)cSrcSize;
1604
+ return 1;
1605
+ #else
1606
+ /* decoder timing evaluation */
1607
+ { U32 const Q = (cSrcSize >= dstSize) ? 15 : (U32)(cSrcSize * 16 / dstSize); /* Q < 16 */
1608
+ U32 const D256 = (U32)(dstSize >> 8);
1609
+ U32 const DTime0 = algoTime[Q][0].tableTime + (algoTime[Q][0].decode256Time * D256);
1610
+ U32 DTime1 = algoTime[Q][1].tableTime + (algoTime[Q][1].decode256Time * D256);
1611
+ DTime1 += DTime1 >> 5; /* small advantage to algorithm using less memory, to reduce cache eviction */
1612
+ return DTime1 < DTime0;
1613
+ }
1614
+ #endif
1615
+ }
1616
+
1617
+
1618
+ size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst,
1619
+ size_t dstSize, const void* cSrc,
1620
+ size_t cSrcSize, void* workSpace,
1621
+ size_t wkspSize)
1622
+ {
1623
+ /* validation checks */
1624
+ if (dstSize == 0) return ERROR(dstSize_tooSmall);
1625
+ if (cSrcSize == 0) return ERROR(corruption_detected);
1626
+
1627
+ { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1628
+ #if defined(HUF_FORCE_DECOMPRESS_X1)
1629
+ (void)algoNb;
1630
+ assert(algoNb == 0);
1631
+ return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
1632
+ #elif defined(HUF_FORCE_DECOMPRESS_X2)
1633
+ (void)algoNb;
1634
+ assert(algoNb == 1);
1635
+ return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
1636
+ #else
1637
+ return algoNb ? HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
1638
+ cSrcSize, workSpace, wkspSize):
1639
+ HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
1640
+ #endif
1641
+ }
1642
+ }
1643
+
1644
+ size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
1645
+ const void* cSrc, size_t cSrcSize,
1646
+ void* workSpace, size_t wkspSize)
1647
+ {
1648
+ /* validation checks */
1649
+ if (dstSize == 0) return ERROR(dstSize_tooSmall);
1650
+ if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */
1651
+ if (cSrcSize == dstSize) { ZSTD_memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */
1652
+ if (cSrcSize == 1) { ZSTD_memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */
1653
+
1654
+ { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1655
+ #if defined(HUF_FORCE_DECOMPRESS_X1)
1656
+ (void)algoNb;
1657
+ assert(algoNb == 0);
1658
+ return HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc,
1659
+ cSrcSize, workSpace, wkspSize);
1660
+ #elif defined(HUF_FORCE_DECOMPRESS_X2)
1661
+ (void)algoNb;
1662
+ assert(algoNb == 1);
1663
+ return HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
1664
+ cSrcSize, workSpace, wkspSize);
1665
+ #else
1666
+ return algoNb ? HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
1667
+ cSrcSize, workSpace, wkspSize):
1668
+ HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc,
1669
+ cSrcSize, workSpace, wkspSize);
1670
+ #endif
1671
+ }
1672
+ }
1673
+
1674
+
1675
+ size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2)
1676
+ {
1677
+ DTableDesc const dtd = HUF_getDTableDesc(DTable);
1678
+ #if defined(HUF_FORCE_DECOMPRESS_X1)
1679
+ (void)dtd;
1680
+ assert(dtd.tableType == 0);
1681
+ return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1682
+ #elif defined(HUF_FORCE_DECOMPRESS_X2)
1683
+ (void)dtd;
1684
+ assert(dtd.tableType == 1);
1685
+ return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1686
+ #else
1687
+ return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) :
1688
+ HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1689
+ #endif
1690
+ }
1691
+
1692
+ #ifndef HUF_FORCE_DECOMPRESS_X2
1693
+ size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2)
1694
+ {
1695
+ const BYTE* ip = (const BYTE*) cSrc;
1696
+
1697
+ size_t const hSize = HUF_readDTableX1_wksp_bmi2(dctx, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
1698
+ if (HUF_isError(hSize)) return hSize;
1699
+ if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
1700
+ ip += hSize; cSrcSize -= hSize;
1701
+
1702
+ return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
1703
+ }
1704
+ #endif
1705
+
1706
+ size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2)
1707
+ {
1708
+ DTableDesc const dtd = HUF_getDTableDesc(DTable);
1709
+ #if defined(HUF_FORCE_DECOMPRESS_X1)
1710
+ (void)dtd;
1711
+ assert(dtd.tableType == 0);
1712
+ return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1713
+ #elif defined(HUF_FORCE_DECOMPRESS_X2)
1714
+ (void)dtd;
1715
+ assert(dtd.tableType == 1);
1716
+ return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1717
+ #else
1718
+ return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) :
1719
+ HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1720
+ #endif
1721
+ }
1722
+
1723
+ size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2)
1724
+ {
1725
+ /* validation checks */
1726
+ if (dstSize == 0) return ERROR(dstSize_tooSmall);
1727
+ if (cSrcSize == 0) return ERROR(corruption_detected);
1728
+
1729
+ { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1730
+ #if defined(HUF_FORCE_DECOMPRESS_X1)
1731
+ (void)algoNb;
1732
+ assert(algoNb == 0);
1733
+ return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
1734
+ #elif defined(HUF_FORCE_DECOMPRESS_X2)
1735
+ (void)algoNb;
1736
+ assert(algoNb == 1);
1737
+ return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
1738
+ #else
1739
+ return algoNb ? HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2) :
1740
+ HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
1741
+ #endif
1742
+ }
1743
+ }
1744
+
1745
+ #ifndef ZSTD_NO_UNUSED_FUNCTIONS
1746
+ #ifndef HUF_FORCE_DECOMPRESS_X2
1747
+ size_t HUF_readDTableX1(HUF_DTable* DTable, const void* src, size_t srcSize)
1748
+ {
1749
+ U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
1750
+ return HUF_readDTableX1_wksp(DTable, src, srcSize,
1751
+ workSpace, sizeof(workSpace));
1752
+ }
1753
+
1754
+ size_t HUF_decompress1X1_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
1755
+ const void* cSrc, size_t cSrcSize)
1756
+ {
1757
+ U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
1758
+ return HUF_decompress1X1_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
1759
+ workSpace, sizeof(workSpace));
1760
+ }
1761
+
1762
+ size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
1763
+ {
1764
+ HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX);
1765
+ return HUF_decompress1X1_DCtx (DTable, dst, dstSize, cSrc, cSrcSize);
1766
+ }
1767
+ #endif
1768
+
1769
+ #ifndef HUF_FORCE_DECOMPRESS_X1
1770
+ size_t HUF_readDTableX2(HUF_DTable* DTable, const void* src, size_t srcSize)
1771
+ {
1772
+ U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
1773
+ return HUF_readDTableX2_wksp(DTable, src, srcSize,
1774
+ workSpace, sizeof(workSpace));
1775
+ }
1776
+
1777
+ size_t HUF_decompress1X2_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
1778
+ const void* cSrc, size_t cSrcSize)
1779
+ {
1780
+ U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
1781
+ return HUF_decompress1X2_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
1782
+ workSpace, sizeof(workSpace));
1783
+ }
1784
+
1785
+ size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
1786
+ {
1787
+ HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
1788
+ return HUF_decompress1X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
1789
+ }
1790
+ #endif
1791
+
1792
+ #ifndef HUF_FORCE_DECOMPRESS_X2
1793
+ size_t HUF_decompress4X1_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
1794
+ {
1795
+ U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
1796
+ return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
1797
+ workSpace, sizeof(workSpace));
1798
+ }
1799
+ size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
1800
+ {
1801
+ HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX);
1802
+ return HUF_decompress4X1_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
1803
+ }
1804
+ #endif
1805
+
1806
+ #ifndef HUF_FORCE_DECOMPRESS_X1
1807
+ size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,
1808
+ const void* cSrc, size_t cSrcSize)
1809
+ {
1810
+ U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
1811
+ return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
1812
+ workSpace, sizeof(workSpace));
1813
+ }
1814
+
1815
+ size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
1816
+ {
1817
+ HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
1818
+ return HUF_decompress4X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
1819
+ }
1820
+ #endif
1821
+
1822
+ typedef size_t (*decompressionAlgo)(void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);
1823
+
1824
+ size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
1825
+ {
1826
+ #if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2)
1827
+ static const decompressionAlgo decompress[2] = { HUF_decompress4X1, HUF_decompress4X2 };
1828
+ #endif
1829
+
1830
+ /* validation checks */
1831
+ if (dstSize == 0) return ERROR(dstSize_tooSmall);
1832
+ if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */
1833
+ if (cSrcSize == dstSize) { ZSTD_memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */
1834
+ if (cSrcSize == 1) { ZSTD_memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */
1835
+
1836
+ { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1837
+ #if defined(HUF_FORCE_DECOMPRESS_X1)
1838
+ (void)algoNb;
1839
+ assert(algoNb == 0);
1840
+ return HUF_decompress4X1(dst, dstSize, cSrc, cSrcSize);
1841
+ #elif defined(HUF_FORCE_DECOMPRESS_X2)
1842
+ (void)algoNb;
1843
+ assert(algoNb == 1);
1844
+ return HUF_decompress4X2(dst, dstSize, cSrc, cSrcSize);
1845
+ #else
1846
+ return decompress[algoNb](dst, dstSize, cSrc, cSrcSize);
1847
+ #endif
1848
+ }
1849
+ }
1850
+
1851
+ size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
1852
+ {
1853
+ /* validation checks */
1854
+ if (dstSize == 0) return ERROR(dstSize_tooSmall);
1855
+ if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */
1856
+ if (cSrcSize == dstSize) { ZSTD_memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */
1857
+ if (cSrcSize == 1) { ZSTD_memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */
1858
+
1859
+ { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1860
+ #if defined(HUF_FORCE_DECOMPRESS_X1)
1861
+ (void)algoNb;
1862
+ assert(algoNb == 0);
1863
+ return HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize);
1864
+ #elif defined(HUF_FORCE_DECOMPRESS_X2)
1865
+ (void)algoNb;
1866
+ assert(algoNb == 1);
1867
+ return HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize);
1868
+ #else
1869
+ return algoNb ? HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) :
1870
+ HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ;
1871
+ #endif
1872
+ }
1873
+ }
1874
+
1875
+ size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
1876
+ {
1877
+ U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
1878
+ return HUF_decompress4X_hufOnly_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
1879
+ workSpace, sizeof(workSpace));
1880
+ }
1881
+
1882
+ size_t HUF_decompress1X_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,
1883
+ const void* cSrc, size_t cSrcSize)
1884
+ {
1885
+ U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
1886
+ return HUF_decompress1X_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
1887
+ workSpace, sizeof(workSpace));
1888
+ }
1889
+ #endif