zstd-ruby 1.4.0.0 → 1.4.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (95) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/ruby.yml +35 -0
  3. data/README.md +2 -2
  4. data/ext/zstdruby/libzstd/Makefile +274 -107
  5. data/ext/zstdruby/libzstd/README.md +75 -16
  6. data/ext/zstdruby/libzstd/common/bitstream.h +59 -51
  7. data/ext/zstdruby/libzstd/common/compiler.h +154 -5
  8. data/ext/zstdruby/libzstd/common/cpu.h +1 -3
  9. data/ext/zstdruby/libzstd/common/debug.c +11 -31
  10. data/ext/zstdruby/libzstd/common/debug.h +22 -49
  11. data/ext/zstdruby/libzstd/common/entropy_common.c +201 -75
  12. data/ext/zstdruby/libzstd/common/error_private.c +3 -1
  13. data/ext/zstdruby/libzstd/common/error_private.h +7 -3
  14. data/ext/zstdruby/libzstd/common/fse.h +50 -42
  15. data/ext/zstdruby/libzstd/common/fse_decompress.c +134 -50
  16. data/ext/zstdruby/libzstd/common/huf.h +41 -38
  17. data/ext/zstdruby/libzstd/common/mem.h +68 -22
  18. data/ext/zstdruby/libzstd/common/pool.c +30 -20
  19. data/ext/zstdruby/libzstd/common/pool.h +3 -3
  20. data/ext/zstdruby/libzstd/common/threading.c +51 -4
  21. data/ext/zstdruby/libzstd/common/threading.h +36 -4
  22. data/ext/zstdruby/libzstd/common/xxhash.c +39 -89
  23. data/ext/zstdruby/libzstd/common/xxhash.h +12 -32
  24. data/ext/zstdruby/libzstd/common/zstd_common.c +10 -10
  25. data/ext/zstdruby/libzstd/common/zstd_deps.h +111 -0
  26. data/ext/zstdruby/libzstd/common/zstd_errors.h +3 -1
  27. data/ext/zstdruby/libzstd/common/zstd_internal.h +231 -72
  28. data/ext/zstdruby/libzstd/common/zstd_trace.c +42 -0
  29. data/ext/zstdruby/libzstd/common/zstd_trace.h +152 -0
  30. data/ext/zstdruby/libzstd/compress/fse_compress.c +47 -63
  31. data/ext/zstdruby/libzstd/compress/hist.c +41 -63
  32. data/ext/zstdruby/libzstd/compress/hist.h +13 -33
  33. data/ext/zstdruby/libzstd/compress/huf_compress.c +288 -172
  34. data/ext/zstdruby/libzstd/compress/zstd_compress.c +2504 -1626
  35. data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +446 -85
  36. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.c +158 -0
  37. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.h +29 -0
  38. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.c +433 -0
  39. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.h +54 -0
  40. data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.c +849 -0
  41. data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.h +32 -0
  42. data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +561 -0
  43. data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +82 -60
  44. data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +2 -2
  45. data/ext/zstdruby/libzstd/compress/zstd_fast.c +106 -80
  46. data/ext/zstdruby/libzstd/compress/zstd_fast.h +2 -2
  47. data/ext/zstdruby/libzstd/compress/zstd_lazy.c +411 -105
  48. data/ext/zstdruby/libzstd/compress/zstd_lazy.h +21 -1
  49. data/ext/zstdruby/libzstd/compress/zstd_ldm.c +296 -207
  50. data/ext/zstdruby/libzstd/compress/zstd_ldm.h +14 -3
  51. data/ext/zstdruby/libzstd/compress/zstd_ldm_geartab.h +103 -0
  52. data/ext/zstdruby/libzstd/compress/zstd_opt.c +260 -148
  53. data/ext/zstdruby/libzstd/compress/zstd_opt.h +1 -1
  54. data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +153 -440
  55. data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +29 -110
  56. data/ext/zstdruby/libzstd/decompress/huf_decompress.c +356 -238
  57. data/ext/zstdruby/libzstd/decompress/zstd_ddict.c +20 -16
  58. data/ext/zstdruby/libzstd/decompress/zstd_ddict.h +3 -3
  59. data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +641 -238
  60. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +600 -371
  61. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +8 -5
  62. data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +40 -9
  63. data/ext/zstdruby/libzstd/deprecated/zbuff.h +9 -8
  64. data/ext/zstdruby/libzstd/deprecated/zbuff_common.c +2 -2
  65. data/ext/zstdruby/libzstd/deprecated/zbuff_compress.c +1 -1
  66. data/ext/zstdruby/libzstd/deprecated/zbuff_decompress.c +1 -1
  67. data/ext/zstdruby/libzstd/dictBuilder/cover.c +197 -78
  68. data/ext/zstdruby/libzstd/dictBuilder/cover.h +52 -7
  69. data/ext/zstdruby/libzstd/dictBuilder/divsufsort.c +1 -1
  70. data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +84 -66
  71. data/ext/zstdruby/libzstd/dictBuilder/zdict.c +58 -36
  72. data/ext/zstdruby/libzstd/dictBuilder/zdict.h +60 -31
  73. data/ext/zstdruby/libzstd/dll/example/Makefile +2 -1
  74. data/ext/zstdruby/libzstd/dll/example/README.md +16 -22
  75. data/ext/zstdruby/libzstd/legacy/zstd_legacy.h +8 -4
  76. data/ext/zstdruby/libzstd/legacy/zstd_v01.c +115 -111
  77. data/ext/zstdruby/libzstd/legacy/zstd_v01.h +1 -1
  78. data/ext/zstdruby/libzstd/legacy/zstd_v02.c +28 -14
  79. data/ext/zstdruby/libzstd/legacy/zstd_v02.h +1 -1
  80. data/ext/zstdruby/libzstd/legacy/zstd_v03.c +28 -14
  81. data/ext/zstdruby/libzstd/legacy/zstd_v03.h +1 -1
  82. data/ext/zstdruby/libzstd/legacy/zstd_v04.c +36 -19
  83. data/ext/zstdruby/libzstd/legacy/zstd_v04.h +1 -1
  84. data/ext/zstdruby/libzstd/legacy/zstd_v05.c +122 -107
  85. data/ext/zstdruby/libzstd/legacy/zstd_v05.h +2 -2
  86. data/ext/zstdruby/libzstd/legacy/zstd_v06.c +29 -23
  87. data/ext/zstdruby/libzstd/legacy/zstd_v06.h +1 -1
  88. data/ext/zstdruby/libzstd/legacy/zstd_v07.c +34 -24
  89. data/ext/zstdruby/libzstd/legacy/zstd_v07.h +1 -1
  90. data/ext/zstdruby/libzstd/libzstd.pc.in +2 -1
  91. data/ext/zstdruby/libzstd/zstd.h +655 -118
  92. data/lib/zstd-ruby/version.rb +1 -1
  93. data/zstd-ruby.gemspec +1 -1
  94. metadata +20 -10
  95. data/.travis.yml +0 -14
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -19,28 +19,16 @@
19
19
  /* Note : This is an internal API.
20
20
  * These APIs used to be exposed with ZSTDLIB_API,
21
21
  * because it used to be the only way to invoke MT compression.
22
- * Now, it's recommended to use ZSTD_compress2 and ZSTD_compressStream2()
23
- * instead.
24
- *
25
- * If you depend on these APIs and can't switch, then define
26
- * ZSTD_LEGACY_MULTITHREADED_API when making the dynamic library.
27
- * However, we may completely remove these functions in a future
28
- * release, so please switch soon.
22
+ * Now, you must use ZSTD_compress2 and ZSTD_compressStream2() instead.
29
23
  *
30
24
  * This API requires ZSTD_MULTITHREAD to be defined during compilation,
31
25
  * otherwise ZSTDMT_createCCtx*() will fail.
32
26
  */
33
27
 
34
- #ifdef ZSTD_LEGACY_MULTITHREADED_API
35
- # define ZSTDMT_API ZSTDLIB_API
36
- #else
37
- # define ZSTDMT_API
38
- #endif
39
-
40
28
  /* === Dependencies === */
41
- #include <stddef.h> /* size_t */
29
+ #include "../common/zstd_deps.h" /* size_t */
42
30
  #define ZSTD_STATIC_LINKING_ONLY /* ZSTD_parameters */
43
- #include "zstd.h" /* ZSTD_inBuffer, ZSTD_outBuffer, ZSTDLIB_API */
31
+ #include "../zstd.h" /* ZSTD_inBuffer, ZSTD_outBuffer, ZSTDLIB_API */
44
32
 
45
33
 
46
34
  /* === Constants === */
@@ -50,81 +38,38 @@
50
38
  #ifndef ZSTDMT_JOBSIZE_MIN
51
39
  # define ZSTDMT_JOBSIZE_MIN (1 MB)
52
40
  #endif
41
+ #define ZSTDMT_JOBLOG_MAX (MEM_32bits() ? 29 : 30)
53
42
  #define ZSTDMT_JOBSIZE_MAX (MEM_32bits() ? (512 MB) : (1024 MB))
54
43
 
55
44
 
45
+ /* ========================================================
46
+ * === Private interface, for use by ZSTD_compress.c ===
47
+ * === Not exposed in libzstd. Never invoke directly ===
48
+ * ======================================================== */
49
+
56
50
  /* === Memory management === */
57
51
  typedef struct ZSTDMT_CCtx_s ZSTDMT_CCtx;
58
52
  /* Requires ZSTD_MULTITHREAD to be defined during compilation, otherwise it will return NULL. */
59
- ZSTDMT_API ZSTDMT_CCtx* ZSTDMT_createCCtx(unsigned nbWorkers);
60
- /* Requires ZSTD_MULTITHREAD to be defined during compilation, otherwise it will return NULL. */
61
- ZSTDMT_API ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(unsigned nbWorkers,
62
- ZSTD_customMem cMem);
63
- ZSTDMT_API size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* mtctx);
64
-
65
- ZSTDMT_API size_t ZSTDMT_sizeof_CCtx(ZSTDMT_CCtx* mtctx);
66
-
67
-
68
- /* === Simple one-pass compression function === */
69
-
70
- ZSTDMT_API size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx,
71
- void* dst, size_t dstCapacity,
72
- const void* src, size_t srcSize,
73
- int compressionLevel);
74
-
53
+ ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(unsigned nbWorkers,
54
+ ZSTD_customMem cMem,
55
+ ZSTD_threadPool *pool);
56
+ size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* mtctx);
75
57
 
58
+ size_t ZSTDMT_sizeof_CCtx(ZSTDMT_CCtx* mtctx);
76
59
 
77
60
  /* === Streaming functions === */
78
61
 
79
- ZSTDMT_API size_t ZSTDMT_initCStream(ZSTDMT_CCtx* mtctx, int compressionLevel);
80
- ZSTDMT_API size_t ZSTDMT_resetCStream(ZSTDMT_CCtx* mtctx, unsigned long long pledgedSrcSize); /**< if srcSize is not known at reset time, use ZSTD_CONTENTSIZE_UNKNOWN. Note: for compatibility with older programs, 0 means the same as ZSTD_CONTENTSIZE_UNKNOWN, but it will change in the future to mean "empty" */
81
-
82
- ZSTDMT_API size_t ZSTDMT_nextInputSizeHint(const ZSTDMT_CCtx* mtctx);
83
- ZSTDMT_API size_t ZSTDMT_compressStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, ZSTD_inBuffer* input);
84
-
85
- ZSTDMT_API size_t ZSTDMT_flushStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output); /**< @return : 0 == all flushed; >0 : still some data to be flushed; or an error code (ZSTD_isError()) */
86
- ZSTDMT_API size_t ZSTDMT_endStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output); /**< @return : 0 == all flushed; >0 : still some data to be flushed; or an error code (ZSTD_isError()) */
87
-
88
-
89
- /* === Advanced functions and parameters === */
90
-
91
- ZSTDMT_API size_t ZSTDMT_compress_advanced(ZSTDMT_CCtx* mtctx,
92
- void* dst, size_t dstCapacity,
93
- const void* src, size_t srcSize,
94
- const ZSTD_CDict* cdict,
95
- ZSTD_parameters params,
96
- int overlapLog);
97
-
98
- ZSTDMT_API size_t ZSTDMT_initCStream_advanced(ZSTDMT_CCtx* mtctx,
99
- const void* dict, size_t dictSize, /* dict can be released after init, a local copy is preserved within zcs */
100
- ZSTD_parameters params,
101
- unsigned long long pledgedSrcSize); /* pledgedSrcSize is optional and can be zero == unknown */
102
-
103
- ZSTDMT_API size_t ZSTDMT_initCStream_usingCDict(ZSTDMT_CCtx* mtctx,
104
- const ZSTD_CDict* cdict,
105
- ZSTD_frameParameters fparams,
106
- unsigned long long pledgedSrcSize); /* note : zero means empty */
107
-
108
- /* ZSTDMT_parameter :
109
- * List of parameters that can be set using ZSTDMT_setMTCtxParameter() */
110
- typedef enum {
111
- ZSTDMT_p_jobSize, /* Each job is compressed in parallel. By default, this value is dynamically determined depending on compression parameters. Can be set explicitly here. */
112
- ZSTDMT_p_overlapLog, /* Each job may reload a part of previous job to enhance compression ratio; 0 == no overlap, 6(default) == use 1/8th of window, >=9 == use full window. This is a "sticky" parameter : its value will be re-used on next compression job */
113
- ZSTDMT_p_rsyncable /* Enables rsyncable mode. */
114
- } ZSTDMT_parameter;
115
-
116
- /* ZSTDMT_setMTCtxParameter() :
117
- * allow setting individual parameters, one at a time, among a list of enums defined in ZSTDMT_parameter.
118
- * The function must be called typically after ZSTD_createCCtx() but __before ZSTDMT_init*() !__
119
- * Parameters not explicitly reset by ZSTDMT_init*() remain the same in consecutive compression sessions.
120
- * @return : 0, or an error code (which can be tested using ZSTD_isError()) */
121
- ZSTDMT_API size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, int value);
122
-
123
- /* ZSTDMT_getMTCtxParameter() :
124
- * Query the ZSTDMT_CCtx for a parameter value.
125
- * @return : 0, or an error code (which can be tested using ZSTD_isError()) */
126
- ZSTDMT_API size_t ZSTDMT_getMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, int* value);
62
+ size_t ZSTDMT_nextInputSizeHint(const ZSTDMT_CCtx* mtctx);
127
63
 
64
+ /*! ZSTDMT_initCStream_internal() :
65
+ * Private use only. Init streaming operation.
66
+ * expects params to be valid.
67
+ * must receive dict, or cdict, or none, but not both.
68
+ * @return : 0, or an error code */
69
+ size_t ZSTDMT_initCStream_internal(ZSTDMT_CCtx* zcs,
70
+ const void* dict, size_t dictSize, ZSTD_dictContentType_e dictContentType,
71
+ const ZSTD_CDict* cdict,
72
+ ZSTD_CCtx_params params, unsigned long long pledgedSrcSize);
128
73
 
129
74
  /*! ZSTDMT_compressStream_generic() :
130
75
  * Combines ZSTDMT_compressStream() with optional ZSTDMT_flushStream() or ZSTDMT_endStream()
@@ -133,16 +78,10 @@ ZSTDMT_API size_t ZSTDMT_getMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter
133
78
  * 0 if fully flushed
134
79
  * or an error code
135
80
  * note : needs to be init using any ZSTD_initCStream*() variant */
136
- ZSTDMT_API size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
137
- ZSTD_outBuffer* output,
138
- ZSTD_inBuffer* input,
139
- ZSTD_EndDirective endOp);
140
-
141
-
142
- /* ========================================================
143
- * === Private interface, for use by ZSTD_compress.c ===
144
- * === Not exposed in libzstd. Never invoke directly ===
145
- * ======================================================== */
81
+ size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
82
+ ZSTD_outBuffer* output,
83
+ ZSTD_inBuffer* input,
84
+ ZSTD_EndDirective endOp);
146
85
 
147
86
  /*! ZSTDMT_toFlushNow()
148
87
  * Tell how many bytes are ready to be flushed immediately.
@@ -152,15 +91,6 @@ ZSTDMT_API size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx,
152
91
  * therefore flushing is limited by speed of oldest job. */
153
92
  size_t ZSTDMT_toFlushNow(ZSTDMT_CCtx* mtctx);
154
93
 
155
- /*! ZSTDMT_CCtxParam_setMTCtxParameter()
156
- * like ZSTDMT_setMTCtxParameter(), but into a ZSTD_CCtx_Params */
157
- size_t ZSTDMT_CCtxParam_setMTCtxParameter(ZSTD_CCtx_params* params, ZSTDMT_parameter parameter, int value);
158
-
159
- /*! ZSTDMT_CCtxParam_setNbWorkers()
160
- * Set nbWorkers, and clamp it.
161
- * Also reset jobSize and overlapLog */
162
- size_t ZSTDMT_CCtxParam_setNbWorkers(ZSTD_CCtx_params* params, unsigned nbWorkers);
163
-
164
94
  /*! ZSTDMT_updateCParams_whileCompressing() :
165
95
  * Updates only a selected set of compression parameters, to remain compatible with current frame.
166
96
  * New parameters will be applied to next compression job. */
@@ -173,17 +103,6 @@ void ZSTDMT_updateCParams_whileCompressing(ZSTDMT_CCtx* mtctx, const ZSTD_CCtx_p
173
103
  ZSTD_frameProgression ZSTDMT_getFrameProgression(ZSTDMT_CCtx* mtctx);
174
104
 
175
105
 
176
- /*! ZSTDMT_initCStream_internal() :
177
- * Private use only. Init streaming operation.
178
- * expects params to be valid.
179
- * must receive dict, or cdict, or none, but not both.
180
- * @return : 0, or an error code */
181
- size_t ZSTDMT_initCStream_internal(ZSTDMT_CCtx* zcs,
182
- const void* dict, size_t dictSize, ZSTD_dictContentType_e dictContentType,
183
- const ZSTD_CDict* cdict,
184
- ZSTD_CCtx_params params, unsigned long long pledgedSrcSize);
185
-
186
-
187
106
  #if defined (__cplusplus)
188
107
  }
189
108
  #endif
@@ -1,47 +1,27 @@
1
1
  /* ******************************************************************
2
- huff0 huffman decoder,
3
- part of Finite State Entropy library
4
- Copyright (C) 2013-present, Yann Collet.
5
-
6
- BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
7
-
8
- Redistribution and use in source and binary forms, with or without
9
- modification, are permitted provided that the following conditions are
10
- met:
11
-
12
- * Redistributions of source code must retain the above copyright
13
- notice, this list of conditions and the following disclaimer.
14
- * Redistributions in binary form must reproduce the above
15
- copyright notice, this list of conditions and the following disclaimer
16
- in the documentation and/or other materials provided with the
17
- distribution.
18
-
19
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22
- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23
- OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24
- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25
- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26
- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27
- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29
- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
-
31
- You can contact the author at :
32
- - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
2
+ * huff0 huffman decoder,
3
+ * part of Finite State Entropy library
4
+ * Copyright (c) 2013-2021, Yann Collet, Facebook, Inc.
5
+ *
6
+ * You can contact the author at :
7
+ * - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
8
+ *
9
+ * This source code is licensed under both the BSD-style license (found in the
10
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
11
+ * in the COPYING file in the root directory of this source tree).
12
+ * You may select, at your option, one of the above-listed licenses.
33
13
  ****************************************************************** */
34
14
 
35
15
  /* **************************************************************
36
16
  * Dependencies
37
17
  ****************************************************************/
38
- #include <string.h> /* memcpy, memset */
39
- #include "compiler.h"
40
- #include "bitstream.h" /* BIT_* */
41
- #include "fse.h" /* to compress headers */
18
+ #include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memset */
19
+ #include "../common/compiler.h"
20
+ #include "../common/bitstream.h" /* BIT_* */
21
+ #include "../common/fse.h" /* to compress headers */
42
22
  #define HUF_STATIC_LINKING_ONLY
43
- #include "huf.h"
44
- #include "error_private.h"
23
+ #include "../common/huf.h"
24
+ #include "../common/error_private.h"
45
25
 
46
26
  /* **************************************************************
47
27
  * Macros
@@ -61,7 +41,6 @@
61
41
  * Error Management
62
42
  ****************************************************************/
63
43
  #define HUF_isError ERR_isError
64
- #define CHECK_F(f) { size_t const err_ = (f); if (HUF_isError(err_)) return err_; }
65
44
 
66
45
 
67
46
  /* **************************************************************
@@ -124,7 +103,7 @@ typedef struct { BYTE maxTableLog; BYTE tableType; BYTE tableLog; BYTE reserved;
124
103
  static DTableDesc HUF_getDTableDesc(const HUF_DTable* table)
125
104
  {
126
105
  DTableDesc dtd;
127
- memcpy(&dtd, table, sizeof(dtd));
106
+ ZSTD_memcpy(&dtd, table, sizeof(dtd));
128
107
  return dtd;
129
108
  }
130
109
 
@@ -136,29 +115,51 @@ static DTableDesc HUF_getDTableDesc(const HUF_DTable* table)
136
115
  /*-***************************/
137
116
  typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX1; /* single-symbol decoding */
138
117
 
118
+ /**
119
+ * Packs 4 HUF_DEltX1 structs into a U64. This is used to lay down 4 entries at
120
+ * a time.
121
+ */
122
+ static U64 HUF_DEltX1_set4(BYTE symbol, BYTE nbBits) {
123
+ U64 D4;
124
+ if (MEM_isLittleEndian()) {
125
+ D4 = symbol + (nbBits << 8);
126
+ } else {
127
+ D4 = (symbol << 8) + nbBits;
128
+ }
129
+ D4 *= 0x0001000100010001ULL;
130
+ return D4;
131
+ }
132
+
133
+ typedef struct {
134
+ U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1];
135
+ U32 rankStart[HUF_TABLELOG_ABSOLUTEMAX + 1];
136
+ U32 statsWksp[HUF_READ_STATS_WORKSPACE_SIZE_U32];
137
+ BYTE symbols[HUF_SYMBOLVALUE_MAX + 1];
138
+ BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1];
139
+ } HUF_ReadDTableX1_Workspace;
140
+
141
+
139
142
  size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize)
143
+ {
144
+ return HUF_readDTableX1_wksp_bmi2(DTable, src, srcSize, workSpace, wkspSize, /* bmi2 */ 0);
145
+ }
146
+
147
+ size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int bmi2)
140
148
  {
141
149
  U32 tableLog = 0;
142
150
  U32 nbSymbols = 0;
143
151
  size_t iSize;
144
152
  void* const dtPtr = DTable + 1;
145
153
  HUF_DEltX1* const dt = (HUF_DEltX1*)dtPtr;
154
+ HUF_ReadDTableX1_Workspace* wksp = (HUF_ReadDTableX1_Workspace*)workSpace;
146
155
 
147
- U32* rankVal;
148
- BYTE* huffWeight;
149
- size_t spaceUsed32 = 0;
150
-
151
- rankVal = (U32 *)workSpace + spaceUsed32;
152
- spaceUsed32 += HUF_TABLELOG_ABSOLUTEMAX + 1;
153
- huffWeight = (BYTE *)((U32 *)workSpace + spaceUsed32);
154
- spaceUsed32 += HUF_ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2;
155
-
156
- if ((spaceUsed32 << 2) > wkspSize) return ERROR(tableLog_tooLarge);
156
+ DEBUG_STATIC_ASSERT(HUF_DECOMPRESS_WORKSPACE_SIZE >= sizeof(*wksp));
157
+ if (sizeof(*wksp) > wkspSize) return ERROR(tableLog_tooLarge);
157
158
 
158
159
  DEBUG_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable));
159
- /* memset(huffWeight, 0, sizeof(huffWeight)); */ /* is not necessary, even though some analyzer complain ... */
160
+ /* ZSTD_memset(huffWeight, 0, sizeof(huffWeight)); */ /* is not necessary, even though some analyzer complain ... */
160
161
 
161
- iSize = HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX + 1, rankVal, &nbSymbols, &tableLog, src, srcSize);
162
+ iSize = HUF_readStats_wksp(wksp->huffWeight, HUF_SYMBOLVALUE_MAX + 1, wksp->rankVal, &nbSymbols, &tableLog, src, srcSize, wksp->statsWksp, sizeof(wksp->statsWksp), bmi2);
162
163
  if (HUF_isError(iSize)) return iSize;
163
164
 
164
165
  /* Table header */
@@ -166,40 +167,117 @@ size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize
166
167
  if (tableLog > (U32)(dtd.maxTableLog+1)) return ERROR(tableLog_tooLarge); /* DTable too small, Huffman tree cannot fit in */
167
168
  dtd.tableType = 0;
168
169
  dtd.tableLog = (BYTE)tableLog;
169
- memcpy(DTable, &dtd, sizeof(dtd));
170
+ ZSTD_memcpy(DTable, &dtd, sizeof(dtd));
170
171
  }
171
172
 
172
- /* Calculate starting value for each rank */
173
- { U32 n, nextRankStart = 0;
174
- for (n=1; n<tableLog+1; n++) {
175
- U32 const current = nextRankStart;
176
- nextRankStart += (rankVal[n] << (n-1));
177
- rankVal[n] = current;
178
- } }
179
-
180
- /* fill DTable */
181
- { U32 n;
182
- for (n=0; n<nbSymbols; n++) {
183
- U32 const w = huffWeight[n];
184
- U32 const length = (1 << w) >> 1;
185
- U32 u;
186
- HUF_DEltX1 D;
187
- D.byte = (BYTE)n; D.nbBits = (BYTE)(tableLog + 1 - w);
188
- for (u = rankVal[w]; u < rankVal[w] + length; u++)
189
- dt[u] = D;
190
- rankVal[w] += length;
191
- } }
173
+ /* Compute symbols and rankStart given rankVal:
174
+ *
175
+ * rankVal already contains the number of values of each weight.
176
+ *
177
+ * symbols contains the symbols ordered by weight. First are the rankVal[0]
178
+ * weight 0 symbols, followed by the rankVal[1] weight 1 symbols, and so on.
179
+ * symbols[0] is filled (but unused) to avoid a branch.
180
+ *
181
+ * rankStart contains the offset where each rank belongs in the DTable.
182
+ * rankStart[0] is not filled because there are no entries in the table for
183
+ * weight 0.
184
+ */
185
+ {
186
+ int n;
187
+ int nextRankStart = 0;
188
+ int const unroll = 4;
189
+ int const nLimit = (int)nbSymbols - unroll + 1;
190
+ for (n=0; n<(int)tableLog+1; n++) {
191
+ U32 const curr = nextRankStart;
192
+ nextRankStart += wksp->rankVal[n];
193
+ wksp->rankStart[n] = curr;
194
+ }
195
+ for (n=0; n < nLimit; n += unroll) {
196
+ int u;
197
+ for (u=0; u < unroll; ++u) {
198
+ size_t const w = wksp->huffWeight[n+u];
199
+ wksp->symbols[wksp->rankStart[w]++] = (BYTE)(n+u);
200
+ }
201
+ }
202
+ for (; n < (int)nbSymbols; ++n) {
203
+ size_t const w = wksp->huffWeight[n];
204
+ wksp->symbols[wksp->rankStart[w]++] = (BYTE)n;
205
+ }
206
+ }
192
207
 
208
+ /* fill DTable
209
+ * We fill all entries of each weight in order.
210
+ * That way length is a constant for each iteration of the outter loop.
211
+ * We can switch based on the length to a different inner loop which is
212
+ * optimized for that particular case.
213
+ */
214
+ {
215
+ U32 w;
216
+ int symbol=wksp->rankVal[0];
217
+ int rankStart=0;
218
+ for (w=1; w<tableLog+1; ++w) {
219
+ int const symbolCount = wksp->rankVal[w];
220
+ int const length = (1 << w) >> 1;
221
+ int uStart = rankStart;
222
+ BYTE const nbBits = (BYTE)(tableLog + 1 - w);
223
+ int s;
224
+ int u;
225
+ switch (length) {
226
+ case 1:
227
+ for (s=0; s<symbolCount; ++s) {
228
+ HUF_DEltX1 D;
229
+ D.byte = wksp->symbols[symbol + s];
230
+ D.nbBits = nbBits;
231
+ dt[uStart] = D;
232
+ uStart += 1;
233
+ }
234
+ break;
235
+ case 2:
236
+ for (s=0; s<symbolCount; ++s) {
237
+ HUF_DEltX1 D;
238
+ D.byte = wksp->symbols[symbol + s];
239
+ D.nbBits = nbBits;
240
+ dt[uStart+0] = D;
241
+ dt[uStart+1] = D;
242
+ uStart += 2;
243
+ }
244
+ break;
245
+ case 4:
246
+ for (s=0; s<symbolCount; ++s) {
247
+ U64 const D4 = HUF_DEltX1_set4(wksp->symbols[symbol + s], nbBits);
248
+ MEM_write64(dt + uStart, D4);
249
+ uStart += 4;
250
+ }
251
+ break;
252
+ case 8:
253
+ for (s=0; s<symbolCount; ++s) {
254
+ U64 const D4 = HUF_DEltX1_set4(wksp->symbols[symbol + s], nbBits);
255
+ MEM_write64(dt + uStart, D4);
256
+ MEM_write64(dt + uStart + 4, D4);
257
+ uStart += 8;
258
+ }
259
+ break;
260
+ default:
261
+ for (s=0; s<symbolCount; ++s) {
262
+ U64 const D4 = HUF_DEltX1_set4(wksp->symbols[symbol + s], nbBits);
263
+ for (u=0; u < length; u += 16) {
264
+ MEM_write64(dt + uStart + u + 0, D4);
265
+ MEM_write64(dt + uStart + u + 4, D4);
266
+ MEM_write64(dt + uStart + u + 8, D4);
267
+ MEM_write64(dt + uStart + u + 12, D4);
268
+ }
269
+ assert(u == length);
270
+ uStart += length;
271
+ }
272
+ break;
273
+ }
274
+ symbol += symbolCount;
275
+ rankStart += symbolCount * length;
276
+ }
277
+ }
193
278
  return iSize;
194
279
  }
195
280
 
196
- size_t HUF_readDTableX1(HUF_DTable* DTable, const void* src, size_t srcSize)
197
- {
198
- U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
199
- return HUF_readDTableX1_wksp(DTable, src, srcSize,
200
- workSpace, sizeof(workSpace));
201
- }
202
-
203
281
  FORCE_INLINE_TEMPLATE BYTE
204
282
  HUF_decodeSymbolX1(BIT_DStream_t* Dstream, const HUF_DEltX1* dt, const U32 dtLog)
205
283
  {
@@ -280,6 +358,7 @@ HUF_decompress4X1_usingDTable_internal_body(
280
358
  { const BYTE* const istart = (const BYTE*) cSrc;
281
359
  BYTE* const ostart = (BYTE*) dst;
282
360
  BYTE* const oend = ostart + dstSize;
361
+ BYTE* const olimit = oend - 3;
283
362
  const void* const dtPtr = DTable + 1;
284
363
  const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr;
285
364
 
@@ -304,9 +383,9 @@ HUF_decompress4X1_usingDTable_internal_body(
304
383
  BYTE* op2 = opStart2;
305
384
  BYTE* op3 = opStart3;
306
385
  BYTE* op4 = opStart4;
307
- U32 endSignal = BIT_DStream_unfinished;
308
386
  DTableDesc const dtd = HUF_getDTableDesc(DTable);
309
387
  U32 const dtLog = dtd.tableLog;
388
+ U32 endSignal = 1;
310
389
 
311
390
  if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
312
391
  CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
@@ -315,8 +394,7 @@ HUF_decompress4X1_usingDTable_internal_body(
315
394
  CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
316
395
 
317
396
  /* up to 16 symbols per loop (4 symbols per stream) in 64-bit mode */
318
- endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
319
- while ( (endSignal==BIT_DStream_unfinished) && (op4<(oend-3)) ) {
397
+ for ( ; (endSignal) & (op4 < olimit) ; ) {
320
398
  HUF_DECODE_SYMBOLX1_2(op1, &bitD1);
321
399
  HUF_DECODE_SYMBOLX1_2(op2, &bitD2);
322
400
  HUF_DECODE_SYMBOLX1_2(op3, &bitD3);
@@ -333,10 +411,10 @@ HUF_decompress4X1_usingDTable_internal_body(
333
411
  HUF_DECODE_SYMBOLX1_0(op2, &bitD2);
334
412
  HUF_DECODE_SYMBOLX1_0(op3, &bitD3);
335
413
  HUF_DECODE_SYMBOLX1_0(op4, &bitD4);
336
- BIT_reloadDStream(&bitD1);
337
- BIT_reloadDStream(&bitD2);
338
- BIT_reloadDStream(&bitD3);
339
- BIT_reloadDStream(&bitD4);
414
+ endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished;
415
+ endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished;
416
+ endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished;
417
+ endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished;
340
418
  }
341
419
 
342
420
  /* check corruption */
@@ -398,20 +476,6 @@ size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
398
476
  }
399
477
 
400
478
 
401
- size_t HUF_decompress1X1_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
402
- const void* cSrc, size_t cSrcSize)
403
- {
404
- U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
405
- return HUF_decompress1X1_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
406
- workSpace, sizeof(workSpace));
407
- }
408
-
409
- size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
410
- {
411
- HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX);
412
- return HUF_decompress1X1_DCtx (DTable, dst, dstSize, cSrc, cSrcSize);
413
- }
414
-
415
479
  size_t HUF_decompress4X1_usingDTable(
416
480
  void* dst, size_t dstSize,
417
481
  const void* cSrc, size_t cSrcSize,
@@ -428,8 +492,7 @@ static size_t HUF_decompress4X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size
428
492
  {
429
493
  const BYTE* ip = (const BYTE*) cSrc;
430
494
 
431
- size_t const hSize = HUF_readDTableX1_wksp (dctx, cSrc, cSrcSize,
432
- workSpace, wkspSize);
495
+ size_t const hSize = HUF_readDTableX1_wksp_bmi2(dctx, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
433
496
  if (HUF_isError(hSize)) return hSize;
434
497
  if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
435
498
  ip += hSize; cSrcSize -= hSize;
@@ -445,18 +508,6 @@ size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
445
508
  }
446
509
 
447
510
 
448
- size_t HUF_decompress4X1_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
449
- {
450
- U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
451
- return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
452
- workSpace, sizeof(workSpace));
453
- }
454
- size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
455
- {
456
- HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX);
457
- return HUF_decompress4X1_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
458
- }
459
-
460
511
  #endif /* HUF_FORCE_DECOMPRESS_X2 */
461
512
 
462
513
 
@@ -483,7 +534,7 @@ static void HUF_fillDTableX2Level2(HUF_DEltX2* DTable, U32 sizeLog, const U32 co
483
534
  U32 rankVal[HUF_TABLELOG_MAX + 1];
484
535
 
485
536
  /* get pre-calculated rankVal */
486
- memcpy(rankVal, rankValOrigin, sizeof(rankVal));
537
+ ZSTD_memcpy(rankVal, rankValOrigin, sizeof(rankVal));
487
538
 
488
539
  /* fill skipped values */
489
540
  if (minWeight>1) {
@@ -525,7 +576,7 @@ static void HUF_fillDTableX2(HUF_DEltX2* DTable, const U32 targetLog,
525
576
  const U32 minBits = nbBitsBaseline - maxWeight;
526
577
  U32 s;
527
578
 
528
- memcpy(rankVal, rankValOrigin, sizeof(rankVal));
579
+ ZSTD_memcpy(rankVal, rankValOrigin, sizeof(rankVal));
529
580
 
530
581
  /* fill DTable */
531
582
  for (s=0; s<sortedListSize; s++) {
@@ -590,11 +641,11 @@ size_t HUF_readDTableX2_wksp(HUF_DTable* DTable,
590
641
  if ((spaceUsed32 << 2) > wkspSize) return ERROR(tableLog_tooLarge);
591
642
 
592
643
  rankStart = rankStart0 + 1;
593
- memset(rankStats, 0, sizeof(U32) * (2 * HUF_TABLELOG_MAX + 2 + 1));
644
+ ZSTD_memset(rankStats, 0, sizeof(U32) * (2 * HUF_TABLELOG_MAX + 2 + 1));
594
645
 
595
646
  DEBUG_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(HUF_DTable)); /* if compiler fails here, assertion is wrong */
596
647
  if (maxTableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
597
- /* memset(weightList, 0, sizeof(weightList)); */ /* is not necessary, even though some analyzer complain ... */
648
+ /* ZSTD_memset(weightList, 0, sizeof(weightList)); */ /* is not necessary, even though some analyzer complain ... */
598
649
 
599
650
  iSize = HUF_readStats(weightList, HUF_SYMBOLVALUE_MAX + 1, rankStats, &nbSymbols, &tableLog, src, srcSize);
600
651
  if (HUF_isError(iSize)) return iSize;
@@ -608,9 +659,9 @@ size_t HUF_readDTableX2_wksp(HUF_DTable* DTable,
608
659
  /* Get start index of each weight */
609
660
  { U32 w, nextRankStart = 0;
610
661
  for (w=1; w<maxW+1; w++) {
611
- U32 current = nextRankStart;
662
+ U32 curr = nextRankStart;
612
663
  nextRankStart += rankStats[w];
613
- rankStart[w] = current;
664
+ rankStart[w] = curr;
614
665
  }
615
666
  rankStart[0] = nextRankStart; /* put all 0w symbols at the end of sorted list*/
616
667
  sizeOfSort = nextRankStart;
@@ -633,9 +684,9 @@ size_t HUF_readDTableX2_wksp(HUF_DTable* DTable,
633
684
  U32 nextRankVal = 0;
634
685
  U32 w;
635
686
  for (w=1; w<maxW+1; w++) {
636
- U32 current = nextRankVal;
687
+ U32 curr = nextRankVal;
637
688
  nextRankVal += rankStats[w] << (w+rescale);
638
- rankVal0[w] = current;
689
+ rankVal0[w] = curr;
639
690
  } }
640
691
  { U32 const minBits = tableLog+1 - maxW;
641
692
  U32 consumed;
@@ -653,23 +704,16 @@ size_t HUF_readDTableX2_wksp(HUF_DTable* DTable,
653
704
 
654
705
  dtd.tableLog = (BYTE)maxTableLog;
655
706
  dtd.tableType = 1;
656
- memcpy(DTable, &dtd, sizeof(dtd));
707
+ ZSTD_memcpy(DTable, &dtd, sizeof(dtd));
657
708
  return iSize;
658
709
  }
659
710
 
660
- size_t HUF_readDTableX2(HUF_DTable* DTable, const void* src, size_t srcSize)
661
- {
662
- U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
663
- return HUF_readDTableX2_wksp(DTable, src, srcSize,
664
- workSpace, sizeof(workSpace));
665
- }
666
-
667
711
 
668
712
  FORCE_INLINE_TEMPLATE U32
669
713
  HUF_decodeSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog)
670
714
  {
671
715
  size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
672
- memcpy(op, dt+val, 2);
716
+ ZSTD_memcpy(op, dt+val, 2);
673
717
  BIT_skipBits(DStream, dt[val].nbBits);
674
718
  return dt[val].length;
675
719
  }
@@ -678,7 +722,7 @@ FORCE_INLINE_TEMPLATE U32
678
722
  HUF_decodeLastSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog)
679
723
  {
680
724
  size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
681
- memcpy(op, dt+val, 1);
725
+ ZSTD_memcpy(op, dt+val, 1);
682
726
  if (dt[val].length==1) BIT_skipBits(DStream, dt[val].nbBits);
683
727
  else {
684
728
  if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) {
@@ -755,7 +799,6 @@ HUF_decompress1X2_usingDTable_internal_body(
755
799
  return dstSize;
756
800
  }
757
801
 
758
-
759
802
  FORCE_INLINE_TEMPLATE size_t
760
803
  HUF_decompress4X2_usingDTable_internal_body(
761
804
  void* dst, size_t dstSize,
@@ -767,6 +810,7 @@ HUF_decompress4X2_usingDTable_internal_body(
767
810
  { const BYTE* const istart = (const BYTE*) cSrc;
768
811
  BYTE* const ostart = (BYTE*) dst;
769
812
  BYTE* const oend = ostart + dstSize;
813
+ BYTE* const olimit = oend - (sizeof(size_t)-1);
770
814
  const void* const dtPtr = DTable+1;
771
815
  const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
772
816
 
@@ -791,7 +835,7 @@ HUF_decompress4X2_usingDTable_internal_body(
791
835
  BYTE* op2 = opStart2;
792
836
  BYTE* op3 = opStart3;
793
837
  BYTE* op4 = opStart4;
794
- U32 endSignal;
838
+ U32 endSignal = 1;
795
839
  DTableDesc const dtd = HUF_getDTableDesc(DTable);
796
840
  U32 const dtLog = dtd.tableLog;
797
841
 
@@ -802,8 +846,29 @@ HUF_decompress4X2_usingDTable_internal_body(
802
846
  CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
803
847
 
804
848
  /* 16-32 symbols per loop (4-8 symbols per stream) */
805
- endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
806
- for ( ; (endSignal==BIT_DStream_unfinished) & (op4<(oend-(sizeof(bitD4.bitContainer)-1))) ; ) {
849
+ for ( ; (endSignal) & (op4 < olimit); ) {
850
+ #if defined(__clang__) && (defined(__x86_64__) || defined(__i386__))
851
+ HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
852
+ HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
853
+ HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
854
+ HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
855
+ HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
856
+ HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
857
+ HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
858
+ HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
859
+ endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished;
860
+ endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished;
861
+ HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
862
+ HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
863
+ HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
864
+ HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
865
+ HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
866
+ HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
867
+ HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
868
+ HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
869
+ endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished;
870
+ endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished;
871
+ #else
807
872
  HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
808
873
  HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
809
874
  HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
@@ -820,8 +885,12 @@ HUF_decompress4X2_usingDTable_internal_body(
820
885
  HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
821
886
  HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
822
887
  HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
823
-
824
- endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
888
+ endSignal = (U32)LIKELY(
889
+ (BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished)
890
+ & (BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished)
891
+ & (BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished)
892
+ & (BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished));
893
+ #endif
825
894
  }
826
895
 
827
896
  /* check corruption */
@@ -874,20 +943,6 @@ size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
874
943
  }
875
944
 
876
945
 
877
- size_t HUF_decompress1X2_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
878
- const void* cSrc, size_t cSrcSize)
879
- {
880
- U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
881
- return HUF_decompress1X2_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
882
- workSpace, sizeof(workSpace));
883
- }
884
-
885
- size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
886
- {
887
- HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
888
- return HUF_decompress1X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
889
- }
890
-
891
946
  size_t HUF_decompress4X2_usingDTable(
892
947
  void* dst, size_t dstSize,
893
948
  const void* cSrc, size_t cSrcSize,
@@ -921,20 +976,6 @@ size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
921
976
  }
922
977
 
923
978
 
924
- size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,
925
- const void* cSrc, size_t cSrcSize)
926
- {
927
- U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
928
- return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
929
- workSpace, sizeof(workSpace));
930
- }
931
-
932
- size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
933
- {
934
- HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
935
- return HUF_decompress4X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
936
- }
937
-
938
979
  #endif /* HUF_FORCE_DECOMPRESS_X1 */
939
980
 
940
981
 
@@ -1035,67 +1076,6 @@ U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize)
1035
1076
  }
1036
1077
 
1037
1078
 
1038
- typedef size_t (*decompressionAlgo)(void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);
1039
-
1040
- size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
1041
- {
1042
- #if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2)
1043
- static const decompressionAlgo decompress[2] = { HUF_decompress4X1, HUF_decompress4X2 };
1044
- #endif
1045
-
1046
- /* validation checks */
1047
- if (dstSize == 0) return ERROR(dstSize_tooSmall);
1048
- if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */
1049
- if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */
1050
- if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */
1051
-
1052
- { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1053
- #if defined(HUF_FORCE_DECOMPRESS_X1)
1054
- (void)algoNb;
1055
- assert(algoNb == 0);
1056
- return HUF_decompress4X1(dst, dstSize, cSrc, cSrcSize);
1057
- #elif defined(HUF_FORCE_DECOMPRESS_X2)
1058
- (void)algoNb;
1059
- assert(algoNb == 1);
1060
- return HUF_decompress4X2(dst, dstSize, cSrc, cSrcSize);
1061
- #else
1062
- return decompress[algoNb](dst, dstSize, cSrc, cSrcSize);
1063
- #endif
1064
- }
1065
- }
1066
-
1067
- size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
1068
- {
1069
- /* validation checks */
1070
- if (dstSize == 0) return ERROR(dstSize_tooSmall);
1071
- if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */
1072
- if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */
1073
- if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */
1074
-
1075
- { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1076
- #if defined(HUF_FORCE_DECOMPRESS_X1)
1077
- (void)algoNb;
1078
- assert(algoNb == 0);
1079
- return HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize);
1080
- #elif defined(HUF_FORCE_DECOMPRESS_X2)
1081
- (void)algoNb;
1082
- assert(algoNb == 1);
1083
- return HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize);
1084
- #else
1085
- return algoNb ? HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) :
1086
- HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ;
1087
- #endif
1088
- }
1089
- }
1090
-
1091
- size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
1092
- {
1093
- U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
1094
- return HUF_decompress4X_hufOnly_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
1095
- workSpace, sizeof(workSpace));
1096
- }
1097
-
1098
-
1099
1079
  size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst,
1100
1080
  size_t dstSize, const void* cSrc,
1101
1081
  size_t cSrcSize, void* workSpace,
@@ -1129,8 +1109,8 @@ size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
1129
1109
  /* validation checks */
1130
1110
  if (dstSize == 0) return ERROR(dstSize_tooSmall);
1131
1111
  if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */
1132
- if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */
1133
- if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */
1112
+ if (cSrcSize == dstSize) { ZSTD_memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */
1113
+ if (cSrcSize == 1) { ZSTD_memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */
1134
1114
 
1135
1115
  { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1136
1116
  #if defined(HUF_FORCE_DECOMPRESS_X1)
@@ -1152,14 +1132,6 @@ size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
1152
1132
  }
1153
1133
  }
1154
1134
 
1155
- size_t HUF_decompress1X_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,
1156
- const void* cSrc, size_t cSrcSize)
1157
- {
1158
- U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
1159
- return HUF_decompress1X_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
1160
- workSpace, sizeof(workSpace));
1161
- }
1162
-
1163
1135
 
1164
1136
  size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2)
1165
1137
  {
@@ -1183,7 +1155,7 @@ size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstS
1183
1155
  {
1184
1156
  const BYTE* ip = (const BYTE*) cSrc;
1185
1157
 
1186
- size_t const hSize = HUF_readDTableX1_wksp(dctx, cSrc, cSrcSize, workSpace, wkspSize);
1158
+ size_t const hSize = HUF_readDTableX1_wksp_bmi2(dctx, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
1187
1159
  if (HUF_isError(hSize)) return hSize;
1188
1160
  if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
1189
1161
  ip += hSize; cSrcSize -= hSize;
@@ -1230,3 +1202,149 @@ size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t ds
1230
1202
  #endif
1231
1203
  }
1232
1204
  }
1205
+
1206
+ #ifndef ZSTD_NO_UNUSED_FUNCTIONS
1207
+ #ifndef HUF_FORCE_DECOMPRESS_X2
1208
+ size_t HUF_readDTableX1(HUF_DTable* DTable, const void* src, size_t srcSize)
1209
+ {
1210
+ U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
1211
+ return HUF_readDTableX1_wksp(DTable, src, srcSize,
1212
+ workSpace, sizeof(workSpace));
1213
+ }
1214
+
1215
+ size_t HUF_decompress1X1_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
1216
+ const void* cSrc, size_t cSrcSize)
1217
+ {
1218
+ U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
1219
+ return HUF_decompress1X1_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
1220
+ workSpace, sizeof(workSpace));
1221
+ }
1222
+
1223
+ size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
1224
+ {
1225
+ HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX);
1226
+ return HUF_decompress1X1_DCtx (DTable, dst, dstSize, cSrc, cSrcSize);
1227
+ }
1228
+ #endif
1229
+
1230
+ #ifndef HUF_FORCE_DECOMPRESS_X1
1231
+ size_t HUF_readDTableX2(HUF_DTable* DTable, const void* src, size_t srcSize)
1232
+ {
1233
+ U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
1234
+ return HUF_readDTableX2_wksp(DTable, src, srcSize,
1235
+ workSpace, sizeof(workSpace));
1236
+ }
1237
+
1238
+ size_t HUF_decompress1X2_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
1239
+ const void* cSrc, size_t cSrcSize)
1240
+ {
1241
+ U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
1242
+ return HUF_decompress1X2_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
1243
+ workSpace, sizeof(workSpace));
1244
+ }
1245
+
1246
+ size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
1247
+ {
1248
+ HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
1249
+ return HUF_decompress1X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
1250
+ }
1251
+ #endif
1252
+
1253
+ #ifndef HUF_FORCE_DECOMPRESS_X2
1254
+ size_t HUF_decompress4X1_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
1255
+ {
1256
+ U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
1257
+ return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
1258
+ workSpace, sizeof(workSpace));
1259
+ }
1260
+ size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
1261
+ {
1262
+ HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX);
1263
+ return HUF_decompress4X1_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
1264
+ }
1265
+ #endif
1266
+
1267
+ #ifndef HUF_FORCE_DECOMPRESS_X1
1268
+ size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,
1269
+ const void* cSrc, size_t cSrcSize)
1270
+ {
1271
+ U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
1272
+ return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
1273
+ workSpace, sizeof(workSpace));
1274
+ }
1275
+
1276
+ size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
1277
+ {
1278
+ HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
1279
+ return HUF_decompress4X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
1280
+ }
1281
+ #endif
1282
+
1283
+ typedef size_t (*decompressionAlgo)(void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);
1284
+
1285
+ size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
1286
+ {
1287
+ #if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2)
1288
+ static const decompressionAlgo decompress[2] = { HUF_decompress4X1, HUF_decompress4X2 };
1289
+ #endif
1290
+
1291
+ /* validation checks */
1292
+ if (dstSize == 0) return ERROR(dstSize_tooSmall);
1293
+ if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */
1294
+ if (cSrcSize == dstSize) { ZSTD_memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */
1295
+ if (cSrcSize == 1) { ZSTD_memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */
1296
+
1297
+ { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1298
+ #if defined(HUF_FORCE_DECOMPRESS_X1)
1299
+ (void)algoNb;
1300
+ assert(algoNb == 0);
1301
+ return HUF_decompress4X1(dst, dstSize, cSrc, cSrcSize);
1302
+ #elif defined(HUF_FORCE_DECOMPRESS_X2)
1303
+ (void)algoNb;
1304
+ assert(algoNb == 1);
1305
+ return HUF_decompress4X2(dst, dstSize, cSrc, cSrcSize);
1306
+ #else
1307
+ return decompress[algoNb](dst, dstSize, cSrc, cSrcSize);
1308
+ #endif
1309
+ }
1310
+ }
1311
+
1312
+ size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
1313
+ {
1314
+ /* validation checks */
1315
+ if (dstSize == 0) return ERROR(dstSize_tooSmall);
1316
+ if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */
1317
+ if (cSrcSize == dstSize) { ZSTD_memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */
1318
+ if (cSrcSize == 1) { ZSTD_memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */
1319
+
1320
+ { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1321
+ #if defined(HUF_FORCE_DECOMPRESS_X1)
1322
+ (void)algoNb;
1323
+ assert(algoNb == 0);
1324
+ return HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize);
1325
+ #elif defined(HUF_FORCE_DECOMPRESS_X2)
1326
+ (void)algoNb;
1327
+ assert(algoNb == 1);
1328
+ return HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize);
1329
+ #else
1330
+ return algoNb ? HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) :
1331
+ HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ;
1332
+ #endif
1333
+ }
1334
+ }
1335
+
1336
+ size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
1337
+ {
1338
+ U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
1339
+ return HUF_decompress4X_hufOnly_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
1340
+ workSpace, sizeof(workSpace));
1341
+ }
1342
+
1343
+ size_t HUF_decompress1X_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,
1344
+ const void* cSrc, size_t cSrcSize)
1345
+ {
1346
+ U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
1347
+ return HUF_decompress1X_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
1348
+ workSpace, sizeof(workSpace));
1349
+ }
1350
+ #endif