zstd-ruby 1.4.1.0 → 1.5.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (96) hide show
  1. checksums.yaml +4 -4
  2. data/.github/dependabot.yml +8 -0
  3. data/.github/workflows/ruby.yml +35 -0
  4. data/README.md +2 -2
  5. data/ext/zstdruby/libzstd/BUCK +5 -7
  6. data/ext/zstdruby/libzstd/Makefile +304 -113
  7. data/ext/zstdruby/libzstd/README.md +83 -20
  8. data/ext/zstdruby/libzstd/common/bitstream.h +59 -51
  9. data/ext/zstdruby/libzstd/common/compiler.h +150 -8
  10. data/ext/zstdruby/libzstd/common/cpu.h +1 -3
  11. data/ext/zstdruby/libzstd/common/debug.c +11 -31
  12. data/ext/zstdruby/libzstd/common/debug.h +22 -49
  13. data/ext/zstdruby/libzstd/common/entropy_common.c +201 -75
  14. data/ext/zstdruby/libzstd/common/error_private.c +3 -1
  15. data/ext/zstdruby/libzstd/common/error_private.h +8 -4
  16. data/ext/zstdruby/libzstd/common/fse.h +50 -42
  17. data/ext/zstdruby/libzstd/common/fse_decompress.c +149 -55
  18. data/ext/zstdruby/libzstd/common/huf.h +43 -39
  19. data/ext/zstdruby/libzstd/common/mem.h +69 -25
  20. data/ext/zstdruby/libzstd/common/pool.c +30 -20
  21. data/ext/zstdruby/libzstd/common/pool.h +3 -3
  22. data/ext/zstdruby/libzstd/common/threading.c +51 -4
  23. data/ext/zstdruby/libzstd/common/threading.h +36 -4
  24. data/ext/zstdruby/libzstd/common/xxhash.c +40 -92
  25. data/ext/zstdruby/libzstd/common/xxhash.h +12 -32
  26. data/ext/zstdruby/libzstd/common/zstd_common.c +10 -10
  27. data/ext/zstdruby/libzstd/common/zstd_deps.h +111 -0
  28. data/ext/zstdruby/libzstd/common/zstd_internal.h +230 -111
  29. data/ext/zstdruby/libzstd/common/zstd_trace.h +154 -0
  30. data/ext/zstdruby/libzstd/compress/fse_compress.c +47 -63
  31. data/ext/zstdruby/libzstd/compress/hist.c +41 -63
  32. data/ext/zstdruby/libzstd/compress/hist.h +13 -33
  33. data/ext/zstdruby/libzstd/compress/huf_compress.c +332 -193
  34. data/ext/zstdruby/libzstd/compress/zstd_compress.c +3614 -1696
  35. data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +546 -86
  36. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.c +158 -0
  37. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.h +29 -0
  38. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.c +441 -0
  39. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.h +54 -0
  40. data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.c +572 -0
  41. data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.h +32 -0
  42. data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +662 -0
  43. data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +43 -41
  44. data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +2 -2
  45. data/ext/zstdruby/libzstd/compress/zstd_fast.c +85 -80
  46. data/ext/zstdruby/libzstd/compress/zstd_fast.h +2 -2
  47. data/ext/zstdruby/libzstd/compress/zstd_lazy.c +1184 -111
  48. data/ext/zstdruby/libzstd/compress/zstd_lazy.h +59 -1
  49. data/ext/zstdruby/libzstd/compress/zstd_ldm.c +333 -208
  50. data/ext/zstdruby/libzstd/compress/zstd_ldm.h +15 -3
  51. data/ext/zstdruby/libzstd/compress/zstd_ldm_geartab.h +103 -0
  52. data/ext/zstdruby/libzstd/compress/zstd_opt.c +228 -129
  53. data/ext/zstdruby/libzstd/compress/zstd_opt.h +1 -1
  54. data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +151 -440
  55. data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +32 -114
  56. data/ext/zstdruby/libzstd/decompress/huf_decompress.c +395 -276
  57. data/ext/zstdruby/libzstd/decompress/zstd_ddict.c +20 -16
  58. data/ext/zstdruby/libzstd/decompress/zstd_ddict.h +3 -3
  59. data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +630 -231
  60. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +606 -380
  61. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +8 -5
  62. data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +39 -9
  63. data/ext/zstdruby/libzstd/deprecated/zbuff.h +9 -8
  64. data/ext/zstdruby/libzstd/deprecated/zbuff_common.c +2 -2
  65. data/ext/zstdruby/libzstd/deprecated/zbuff_compress.c +1 -1
  66. data/ext/zstdruby/libzstd/deprecated/zbuff_decompress.c +1 -1
  67. data/ext/zstdruby/libzstd/dictBuilder/cover.c +55 -46
  68. data/ext/zstdruby/libzstd/dictBuilder/cover.h +20 -9
  69. data/ext/zstdruby/libzstd/dictBuilder/divsufsort.c +1 -1
  70. data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +43 -31
  71. data/ext/zstdruby/libzstd/dictBuilder/zdict.c +53 -30
  72. data/ext/zstdruby/libzstd/dll/example/Makefile +2 -1
  73. data/ext/zstdruby/libzstd/dll/example/README.md +16 -22
  74. data/ext/zstdruby/libzstd/legacy/zstd_legacy.h +4 -4
  75. data/ext/zstdruby/libzstd/legacy/zstd_v01.c +24 -14
  76. data/ext/zstdruby/libzstd/legacy/zstd_v01.h +1 -1
  77. data/ext/zstdruby/libzstd/legacy/zstd_v02.c +17 -8
  78. data/ext/zstdruby/libzstd/legacy/zstd_v02.h +1 -1
  79. data/ext/zstdruby/libzstd/legacy/zstd_v03.c +17 -8
  80. data/ext/zstdruby/libzstd/legacy/zstd_v03.h +1 -1
  81. data/ext/zstdruby/libzstd/legacy/zstd_v04.c +25 -11
  82. data/ext/zstdruby/libzstd/legacy/zstd_v04.h +1 -1
  83. data/ext/zstdruby/libzstd/legacy/zstd_v05.c +43 -32
  84. data/ext/zstdruby/libzstd/legacy/zstd_v05.h +2 -2
  85. data/ext/zstdruby/libzstd/legacy/zstd_v06.c +27 -19
  86. data/ext/zstdruby/libzstd/legacy/zstd_v06.h +1 -1
  87. data/ext/zstdruby/libzstd/legacy/zstd_v07.c +32 -20
  88. data/ext/zstdruby/libzstd/legacy/zstd_v07.h +1 -1
  89. data/ext/zstdruby/libzstd/libzstd.pc.in +2 -1
  90. data/ext/zstdruby/libzstd/{dictBuilder/zdict.h → zdict.h} +201 -31
  91. data/ext/zstdruby/libzstd/zstd.h +740 -153
  92. data/ext/zstdruby/libzstd/{common/zstd_errors.h → zstd_errors.h} +3 -1
  93. data/lib/zstd-ruby/version.rb +1 -1
  94. data/zstd-ruby.gemspec +1 -1
  95. metadata +21 -10
  96. data/.travis.yml +0 -14
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2018-present, Facebook, Inc.
2
+ * Copyright (c) Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -16,8 +16,6 @@
16
16
  * https://github.com/facebook/folly/blob/master/folly/CpuId.h
17
17
  */
18
18
 
19
- #include <string.h>
20
-
21
19
  #include "mem.h"
22
20
 
23
21
  #ifdef _MSC_VER
@@ -1,35 +1,15 @@
1
1
  /* ******************************************************************
2
- debug
3
- Part of FSE library
4
- Copyright (C) 2013-present, Yann Collet.
5
-
6
- BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
7
-
8
- Redistribution and use in source and binary forms, with or without
9
- modification, are permitted provided that the following conditions are
10
- met:
11
-
12
- * Redistributions of source code must retain the above copyright
13
- notice, this list of conditions and the following disclaimer.
14
- * Redistributions in binary form must reproduce the above
15
- copyright notice, this list of conditions and the following disclaimer
16
- in the documentation and/or other materials provided with the
17
- distribution.
18
-
19
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22
- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23
- OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24
- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25
- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26
- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27
- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29
- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
-
31
- You can contact the author at :
32
- - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
2
+ * debug
3
+ * Part of FSE library
4
+ * Copyright (c) Yann Collet, Facebook, Inc.
5
+ *
6
+ * You can contact the author at :
7
+ * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
8
+ *
9
+ * This source code is licensed under both the BSD-style license (found in the
10
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
11
+ * in the COPYING file in the root directory of this source tree).
12
+ * You may select, at your option, one of the above-listed licenses.
33
13
  ****************************************************************** */
34
14
 
35
15
 
@@ -1,35 +1,15 @@
1
1
  /* ******************************************************************
2
- debug
3
- Part of FSE library
4
- Copyright (C) 2013-present, Yann Collet.
5
-
6
- BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
7
-
8
- Redistribution and use in source and binary forms, with or without
9
- modification, are permitted provided that the following conditions are
10
- met:
11
-
12
- * Redistributions of source code must retain the above copyright
13
- notice, this list of conditions and the following disclaimer.
14
- * Redistributions in binary form must reproduce the above
15
- copyright notice, this list of conditions and the following disclaimer
16
- in the documentation and/or other materials provided with the
17
- distribution.
18
-
19
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22
- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23
- OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24
- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25
- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26
- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27
- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29
- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
-
31
- You can contact the author at :
32
- - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
2
+ * debug
3
+ * Part of FSE library
4
+ * Copyright (c) Yann Collet, Facebook, Inc.
5
+ *
6
+ * You can contact the author at :
7
+ * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
8
+ *
9
+ * This source code is licensed under both the BSD-style license (found in the
10
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
11
+ * in the COPYING file in the root directory of this source tree).
12
+ * You may select, at your option, one of the above-listed licenses.
33
13
  ****************************************************************** */
34
14
 
35
15
 
@@ -71,15 +51,6 @@ extern "C" {
71
51
  #endif
72
52
 
73
53
 
74
- /* DEBUGFILE can be defined externally,
75
- * typically through compiler command line.
76
- * note : currently useless.
77
- * Value must be stderr or stdout */
78
- #ifndef DEBUGFILE
79
- # define DEBUGFILE stderr
80
- #endif
81
-
82
-
83
54
  /* recommended values for DEBUGLEVEL :
84
55
  * 0 : release mode, no debug, all run-time checks disabled
85
56
  * 1 : enables assert() only, no display
@@ -96,7 +67,8 @@ extern "C" {
96
67
  */
97
68
 
98
69
  #if (DEBUGLEVEL>=1)
99
- # include <assert.h>
70
+ # define ZSTD_DEPS_NEED_ASSERT
71
+ # include "zstd_deps.h"
100
72
  #else
101
73
  # ifndef assert /* assert may be already defined, due to prior #include <assert.h> */
102
74
  # define assert(condition) ((void)0) /* disable assert (default) */
@@ -104,7 +76,8 @@ extern "C" {
104
76
  #endif
105
77
 
106
78
  #if (DEBUGLEVEL>=2)
107
- # include <stdio.h>
79
+ # define ZSTD_DEPS_NEED_IO
80
+ # include "zstd_deps.h"
108
81
  extern int g_debuglevel; /* the variable is only declared,
109
82
  it actually lives in debug.c,
110
83
  and is shared by the whole process.
@@ -112,14 +85,14 @@ extern int g_debuglevel; /* the variable is only declared,
112
85
  It's useful when enabling very verbose levels
113
86
  on selective conditions (such as position in src) */
114
87
 
115
- # define RAWLOG(l, ...) { \
116
- if (l<=g_debuglevel) { \
117
- fprintf(stderr, __VA_ARGS__); \
88
+ # define RAWLOG(l, ...) { \
89
+ if (l<=g_debuglevel) { \
90
+ ZSTD_DEBUG_PRINT(__VA_ARGS__); \
118
91
  } }
119
- # define DEBUGLOG(l, ...) { \
120
- if (l<=g_debuglevel) { \
121
- fprintf(stderr, __FILE__ ": " __VA_ARGS__); \
122
- fprintf(stderr, " \n"); \
92
+ # define DEBUGLOG(l, ...) { \
93
+ if (l<=g_debuglevel) { \
94
+ ZSTD_DEBUG_PRINT(__FILE__ ": " __VA_ARGS__); \
95
+ ZSTD_DEBUG_PRINT(" \n"); \
123
96
  } }
124
97
  #else
125
98
  # define RAWLOG(l, ...) {} /* disabled */
@@ -1,36 +1,16 @@
1
- /*
2
- Common functions of New Generation Entropy library
3
- Copyright (C) 2016, Yann Collet.
4
-
5
- BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6
-
7
- Redistribution and use in source and binary forms, with or without
8
- modification, are permitted provided that the following conditions are
9
- met:
10
-
11
- * Redistributions of source code must retain the above copyright
12
- notice, this list of conditions and the following disclaimer.
13
- * Redistributions in binary form must reproduce the above
14
- copyright notice, this list of conditions and the following disclaimer
15
- in the documentation and/or other materials provided with the
16
- distribution.
17
-
18
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21
- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22
- OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23
- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24
- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25
- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26
- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28
- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
-
30
- You can contact the author at :
31
- - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
32
- - Public forum : https://groups.google.com/forum/#!forum/lz4c
33
- *************************************************************************** */
1
+ /* ******************************************************************
2
+ * Common functions of New Generation Entropy library
3
+ * Copyright (c) Yann Collet, Facebook, Inc.
4
+ *
5
+ * You can contact the author at :
6
+ * - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
7
+ * - Public forum : https://groups.google.com/forum/#!forum/lz4c
8
+ *
9
+ * This source code is licensed under both the BSD-style license (found in the
10
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
11
+ * in the COPYING file in the root directory of this source tree).
12
+ * You may select, at your option, one of the above-listed licenses.
13
+ ****************************************************************** */
34
14
 
35
15
  /* *************************************
36
16
  * Dependencies
@@ -58,8 +38,31 @@ const char* HUF_getErrorName(size_t code) { return ERR_getErrorName(code); }
58
38
  /*-**************************************************************
59
39
  * FSE NCount encoding-decoding
60
40
  ****************************************************************/
61
- size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
62
- const void* headerBuffer, size_t hbSize)
41
+ static U32 FSE_ctz(U32 val)
42
+ {
43
+ assert(val != 0);
44
+ {
45
+ # if defined(_MSC_VER) /* Visual */
46
+ unsigned long r=0;
47
+ return _BitScanForward(&r, val) ? (unsigned)r : 0;
48
+ # elif defined(__GNUC__) && (__GNUC__ >= 3) /* GCC Intrinsic */
49
+ return __builtin_ctz(val);
50
+ # elif defined(__ICCARM__) /* IAR Intrinsic */
51
+ return __CTZ(val);
52
+ # else /* Software version */
53
+ U32 count = 0;
54
+ while ((val & 1) == 0) {
55
+ val >>= 1;
56
+ ++count;
57
+ }
58
+ return count;
59
+ # endif
60
+ }
61
+ }
62
+
63
+ FORCE_INLINE_TEMPLATE
64
+ size_t FSE_readNCount_body(short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
65
+ const void* headerBuffer, size_t hbSize)
63
66
  {
64
67
  const BYTE* const istart = (const BYTE*) headerBuffer;
65
68
  const BYTE* const iend = istart + hbSize;
@@ -70,23 +73,23 @@ size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* t
70
73
  U32 bitStream;
71
74
  int bitCount;
72
75
  unsigned charnum = 0;
76
+ unsigned const maxSV1 = *maxSVPtr + 1;
73
77
  int previous0 = 0;
74
78
 
75
- if (hbSize < 4) {
76
- /* This function only works when hbSize >= 4 */
77
- char buffer[4];
78
- memset(buffer, 0, sizeof(buffer));
79
- memcpy(buffer, headerBuffer, hbSize);
79
+ if (hbSize < 8) {
80
+ /* This function only works when hbSize >= 8 */
81
+ char buffer[8] = {0};
82
+ ZSTD_memcpy(buffer, headerBuffer, hbSize);
80
83
  { size_t const countSize = FSE_readNCount(normalizedCounter, maxSVPtr, tableLogPtr,
81
84
  buffer, sizeof(buffer));
82
85
  if (FSE_isError(countSize)) return countSize;
83
86
  if (countSize > hbSize) return ERROR(corruption_detected);
84
87
  return countSize;
85
88
  } }
86
- assert(hbSize >= 4);
89
+ assert(hbSize >= 8);
87
90
 
88
91
  /* init */
89
- memset(normalizedCounter, 0, (*maxSVPtr+1) * sizeof(normalizedCounter[0])); /* all symbols not present in NCount have a frequency of 0 */
92
+ ZSTD_memset(normalizedCounter, 0, (*maxSVPtr+1) * sizeof(normalizedCounter[0])); /* all symbols not present in NCount have a frequency of 0 */
90
93
  bitStream = MEM_readLE32(ip);
91
94
  nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG; /* extract tableLog */
92
95
  if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return ERROR(tableLog_tooLarge);
@@ -97,36 +100,58 @@ size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* t
97
100
  threshold = 1<<nbBits;
98
101
  nbBits++;
99
102
 
100
- while ((remaining>1) & (charnum<=*maxSVPtr)) {
103
+ for (;;) {
101
104
  if (previous0) {
102
- unsigned n0 = charnum;
103
- while ((bitStream & 0xFFFF) == 0xFFFF) {
104
- n0 += 24;
105
- if (ip < iend-5) {
106
- ip += 2;
107
- bitStream = MEM_readLE32(ip) >> bitCount;
105
+ /* Count the number of repeats. Each time the
106
+ * 2-bit repeat code is 0b11 there is another
107
+ * repeat.
108
+ * Avoid UB by setting the high bit to 1.
109
+ */
110
+ int repeats = FSE_ctz(~bitStream | 0x80000000) >> 1;
111
+ while (repeats >= 12) {
112
+ charnum += 3 * 12;
113
+ if (LIKELY(ip <= iend-7)) {
114
+ ip += 3;
108
115
  } else {
109
- bitStream >>= 16;
110
- bitCount += 16;
111
- } }
112
- while ((bitStream & 3) == 3) {
113
- n0 += 3;
114
- bitStream >>= 2;
115
- bitCount += 2;
116
+ bitCount -= (int)(8 * (iend - 7 - ip));
117
+ bitCount &= 31;
118
+ ip = iend - 4;
119
+ }
120
+ bitStream = MEM_readLE32(ip) >> bitCount;
121
+ repeats = FSE_ctz(~bitStream | 0x80000000) >> 1;
116
122
  }
117
- n0 += bitStream & 3;
123
+ charnum += 3 * repeats;
124
+ bitStream >>= 2 * repeats;
125
+ bitCount += 2 * repeats;
126
+
127
+ /* Add the final repeat which isn't 0b11. */
128
+ assert((bitStream & 3) < 3);
129
+ charnum += bitStream & 3;
118
130
  bitCount += 2;
119
- if (n0 > *maxSVPtr) return ERROR(maxSymbolValue_tooSmall);
120
- while (charnum < n0) normalizedCounter[charnum++] = 0;
121
- if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) {
131
+
132
+ /* This is an error, but break and return an error
133
+ * at the end, because returning out of a loop makes
134
+ * it harder for the compiler to optimize.
135
+ */
136
+ if (charnum >= maxSV1) break;
137
+
138
+ /* We don't need to set the normalized count to 0
139
+ * because we already memset the whole buffer to 0.
140
+ */
141
+
142
+ if (LIKELY(ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) {
122
143
  assert((bitCount >> 3) <= 3); /* For first condition to work */
123
144
  ip += bitCount>>3;
124
145
  bitCount &= 7;
125
- bitStream = MEM_readLE32(ip) >> bitCount;
126
146
  } else {
127
- bitStream >>= 2;
128
- } }
129
- { int const max = (2*threshold-1) - remaining;
147
+ bitCount -= (int)(8 * (iend - 4 - ip));
148
+ bitCount &= 31;
149
+ ip = iend - 4;
150
+ }
151
+ bitStream = MEM_readLE32(ip) >> bitCount;
152
+ }
153
+ {
154
+ int const max = (2*threshold-1) - remaining;
130
155
  int count;
131
156
 
132
157
  if ((bitStream & (threshold-1)) < (U32)max) {
@@ -139,24 +164,43 @@ size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* t
139
164
  }
140
165
 
141
166
  count--; /* extra accuracy */
142
- remaining -= count < 0 ? -count : count; /* -1 means +1 */
167
+ /* When it matters (small blocks), this is a
168
+ * predictable branch, because we don't use -1.
169
+ */
170
+ if (count >= 0) {
171
+ remaining -= count;
172
+ } else {
173
+ assert(count == -1);
174
+ remaining += count;
175
+ }
143
176
  normalizedCounter[charnum++] = (short)count;
144
177
  previous0 = !count;
145
- while (remaining < threshold) {
146
- nbBits--;
147
- threshold >>= 1;
178
+
179
+ assert(threshold > 1);
180
+ if (remaining < threshold) {
181
+ /* This branch can be folded into the
182
+ * threshold update condition because we
183
+ * know that threshold > 1.
184
+ */
185
+ if (remaining <= 1) break;
186
+ nbBits = BIT_highbit32(remaining) + 1;
187
+ threshold = 1 << (nbBits - 1);
148
188
  }
189
+ if (charnum >= maxSV1) break;
149
190
 
150
- if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) {
191
+ if (LIKELY(ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) {
151
192
  ip += bitCount>>3;
152
193
  bitCount &= 7;
153
194
  } else {
154
195
  bitCount -= (int)(8 * (iend - 4 - ip));
196
+ bitCount &= 31;
155
197
  ip = iend - 4;
156
198
  }
157
- bitStream = MEM_readLE32(ip) >> (bitCount & 31);
158
- } } /* while ((remaining>1) & (charnum<=*maxSVPtr)) */
199
+ bitStream = MEM_readLE32(ip) >> bitCount;
200
+ } }
159
201
  if (remaining != 1) return ERROR(corruption_detected);
202
+ /* Only possible when there are too many zeros. */
203
+ if (charnum > maxSV1) return ERROR(maxSymbolValue_tooSmall);
160
204
  if (bitCount > 32) return ERROR(corruption_detected);
161
205
  *maxSVPtr = charnum-1;
162
206
 
@@ -164,6 +208,43 @@ size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* t
164
208
  return ip-istart;
165
209
  }
166
210
 
211
+ /* Avoids the FORCE_INLINE of the _body() function. */
212
+ static size_t FSE_readNCount_body_default(
213
+ short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
214
+ const void* headerBuffer, size_t hbSize)
215
+ {
216
+ return FSE_readNCount_body(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize);
217
+ }
218
+
219
+ #if DYNAMIC_BMI2
220
+ TARGET_ATTRIBUTE("bmi2") static size_t FSE_readNCount_body_bmi2(
221
+ short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
222
+ const void* headerBuffer, size_t hbSize)
223
+ {
224
+ return FSE_readNCount_body(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize);
225
+ }
226
+ #endif
227
+
228
+ size_t FSE_readNCount_bmi2(
229
+ short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
230
+ const void* headerBuffer, size_t hbSize, int bmi2)
231
+ {
232
+ #if DYNAMIC_BMI2
233
+ if (bmi2) {
234
+ return FSE_readNCount_body_bmi2(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize);
235
+ }
236
+ #endif
237
+ (void)bmi2;
238
+ return FSE_readNCount_body_default(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize);
239
+ }
240
+
241
+ size_t FSE_readNCount(
242
+ short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
243
+ const void* headerBuffer, size_t hbSize)
244
+ {
245
+ return FSE_readNCount_bmi2(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize, /* bmi2 */ 0);
246
+ }
247
+
167
248
 
168
249
  /*! HUF_readStats() :
169
250
  Read compact Huffman tree, saved by HUF_writeCTable().
@@ -175,6 +256,17 @@ size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* t
175
256
  size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,
176
257
  U32* nbSymbolsPtr, U32* tableLogPtr,
177
258
  const void* src, size_t srcSize)
259
+ {
260
+ U32 wksp[HUF_READ_STATS_WORKSPACE_SIZE_U32];
261
+ return HUF_readStats_wksp(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, wksp, sizeof(wksp), /* bmi2 */ 0);
262
+ }
263
+
264
+ FORCE_INLINE_TEMPLATE size_t
265
+ HUF_readStats_body(BYTE* huffWeight, size_t hwSize, U32* rankStats,
266
+ U32* nbSymbolsPtr, U32* tableLogPtr,
267
+ const void* src, size_t srcSize,
268
+ void* workSpace, size_t wkspSize,
269
+ int bmi2)
178
270
  {
179
271
  U32 weightTotal;
180
272
  const BYTE* ip = (const BYTE*) src;
@@ -183,7 +275,7 @@ size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,
183
275
 
184
276
  if (!srcSize) return ERROR(srcSize_wrong);
185
277
  iSize = ip[0];
186
- /* memset(huffWeight, 0, hwSize); *//* is not necessary, even though some analyzer complain ... */
278
+ /* ZSTD_memset(huffWeight, 0, hwSize); *//* is not necessary, even though some analyzer complain ... */
187
279
 
188
280
  if (iSize >= 128) { /* special header */
189
281
  oSize = iSize - 127;
@@ -197,14 +289,14 @@ size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,
197
289
  huffWeight[n+1] = ip[n/2] & 15;
198
290
  } } }
199
291
  else { /* header compressed with FSE (normal case) */
200
- FSE_DTable fseWorkspace[FSE_DTABLE_SIZE_U32(6)]; /* 6 is max possible tableLog for HUF header (maybe even 5, to be tested) */
201
292
  if (iSize+1 > srcSize) return ERROR(srcSize_wrong);
202
- oSize = FSE_decompress_wksp(huffWeight, hwSize-1, ip+1, iSize, fseWorkspace, 6); /* max (hwSize-1) values decoded, as last one is implied */
293
+ /* max (hwSize-1) values decoded, as last one is implied */
294
+ oSize = FSE_decompress_wksp_bmi2(huffWeight, hwSize-1, ip+1, iSize, 6, workSpace, wkspSize, bmi2);
203
295
  if (FSE_isError(oSize)) return oSize;
204
296
  }
205
297
 
206
298
  /* collect weight stats */
207
- memset(rankStats, 0, (HUF_TABLELOG_MAX + 1) * sizeof(U32));
299
+ ZSTD_memset(rankStats, 0, (HUF_TABLELOG_MAX + 1) * sizeof(U32));
208
300
  weightTotal = 0;
209
301
  { U32 n; for (n=0; n<oSize; n++) {
210
302
  if (huffWeight[n] >= HUF_TABLELOG_MAX) return ERROR(corruption_detected);
@@ -234,3 +326,37 @@ size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,
234
326
  *nbSymbolsPtr = (U32)(oSize+1);
235
327
  return iSize+1;
236
328
  }
329
+
330
+ /* Avoids the FORCE_INLINE of the _body() function. */
331
+ static size_t HUF_readStats_body_default(BYTE* huffWeight, size_t hwSize, U32* rankStats,
332
+ U32* nbSymbolsPtr, U32* tableLogPtr,
333
+ const void* src, size_t srcSize,
334
+ void* workSpace, size_t wkspSize)
335
+ {
336
+ return HUF_readStats_body(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize, 0);
337
+ }
338
+
339
+ #if DYNAMIC_BMI2
340
+ static TARGET_ATTRIBUTE("bmi2") size_t HUF_readStats_body_bmi2(BYTE* huffWeight, size_t hwSize, U32* rankStats,
341
+ U32* nbSymbolsPtr, U32* tableLogPtr,
342
+ const void* src, size_t srcSize,
343
+ void* workSpace, size_t wkspSize)
344
+ {
345
+ return HUF_readStats_body(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize, 1);
346
+ }
347
+ #endif
348
+
349
+ size_t HUF_readStats_wksp(BYTE* huffWeight, size_t hwSize, U32* rankStats,
350
+ U32* nbSymbolsPtr, U32* tableLogPtr,
351
+ const void* src, size_t srcSize,
352
+ void* workSpace, size_t wkspSize,
353
+ int bmi2)
354
+ {
355
+ #if DYNAMIC_BMI2
356
+ if (bmi2) {
357
+ return HUF_readStats_body_bmi2(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize);
358
+ }
359
+ #endif
360
+ (void)bmi2;
361
+ return HUF_readStats_body_default(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize);
362
+ }