extzstd 0.3.2 → 0.3.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (108) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +3 -3
  3. data/contrib/zstd/CHANGELOG +188 -1
  4. data/contrib/zstd/CONTRIBUTING.md +157 -74
  5. data/contrib/zstd/LICENSE +4 -4
  6. data/contrib/zstd/Makefile +81 -58
  7. data/contrib/zstd/Package.swift +36 -0
  8. data/contrib/zstd/README.md +59 -35
  9. data/contrib/zstd/TESTING.md +2 -3
  10. data/contrib/zstd/appveyor.yml +49 -136
  11. data/contrib/zstd/lib/BUCK +5 -7
  12. data/contrib/zstd/lib/Makefile +87 -181
  13. data/contrib/zstd/lib/README.md +23 -6
  14. data/contrib/zstd/lib/common/allocations.h +55 -0
  15. data/contrib/zstd/lib/common/bits.h +200 -0
  16. data/contrib/zstd/lib/common/bitstream.h +33 -59
  17. data/contrib/zstd/lib/common/compiler.h +115 -45
  18. data/contrib/zstd/lib/common/cpu.h +1 -1
  19. data/contrib/zstd/lib/common/debug.c +1 -1
  20. data/contrib/zstd/lib/common/debug.h +1 -1
  21. data/contrib/zstd/lib/common/entropy_common.c +15 -37
  22. data/contrib/zstd/lib/common/error_private.c +9 -2
  23. data/contrib/zstd/lib/common/error_private.h +82 -3
  24. data/contrib/zstd/lib/common/fse.h +9 -85
  25. data/contrib/zstd/lib/common/fse_decompress.c +29 -111
  26. data/contrib/zstd/lib/common/huf.h +84 -172
  27. data/contrib/zstd/lib/common/mem.h +58 -49
  28. data/contrib/zstd/lib/common/pool.c +37 -16
  29. data/contrib/zstd/lib/common/pool.h +9 -3
  30. data/contrib/zstd/lib/common/portability_macros.h +156 -0
  31. data/contrib/zstd/lib/common/threading.c +68 -14
  32. data/contrib/zstd/lib/common/threading.h +5 -10
  33. data/contrib/zstd/lib/common/xxhash.c +7 -809
  34. data/contrib/zstd/lib/common/xxhash.h +5568 -167
  35. data/contrib/zstd/lib/common/zstd_common.c +1 -36
  36. data/contrib/zstd/lib/common/zstd_deps.h +1 -1
  37. data/contrib/zstd/lib/common/zstd_internal.h +64 -150
  38. data/contrib/zstd/lib/common/zstd_trace.h +163 -0
  39. data/contrib/zstd/lib/compress/clevels.h +134 -0
  40. data/contrib/zstd/lib/compress/fse_compress.c +69 -150
  41. data/contrib/zstd/lib/compress/hist.c +1 -1
  42. data/contrib/zstd/lib/compress/hist.h +1 -1
  43. data/contrib/zstd/lib/compress/huf_compress.c +773 -251
  44. data/contrib/zstd/lib/compress/zstd_compress.c +2650 -826
  45. data/contrib/zstd/lib/compress/zstd_compress_internal.h +509 -180
  46. data/contrib/zstd/lib/compress/zstd_compress_literals.c +117 -40
  47. data/contrib/zstd/lib/compress/zstd_compress_literals.h +16 -6
  48. data/contrib/zstd/lib/compress/zstd_compress_sequences.c +28 -19
  49. data/contrib/zstd/lib/compress/zstd_compress_sequences.h +1 -1
  50. data/contrib/zstd/lib/compress/zstd_compress_superblock.c +33 -305
  51. data/contrib/zstd/lib/compress/zstd_compress_superblock.h +1 -1
  52. data/contrib/zstd/lib/compress/zstd_cwksp.h +266 -85
  53. data/contrib/zstd/lib/compress/zstd_double_fast.c +369 -132
  54. data/contrib/zstd/lib/compress/zstd_double_fast.h +3 -2
  55. data/contrib/zstd/lib/compress/zstd_fast.c +722 -258
  56. data/contrib/zstd/lib/compress/zstd_fast.h +3 -2
  57. data/contrib/zstd/lib/compress/zstd_lazy.c +1105 -360
  58. data/contrib/zstd/lib/compress/zstd_lazy.h +41 -1
  59. data/contrib/zstd/lib/compress/zstd_ldm.c +272 -208
  60. data/contrib/zstd/lib/compress/zstd_ldm.h +3 -2
  61. data/contrib/zstd/lib/compress/zstd_ldm_geartab.h +106 -0
  62. data/contrib/zstd/lib/compress/zstd_opt.c +324 -197
  63. data/contrib/zstd/lib/compress/zstd_opt.h +1 -1
  64. data/contrib/zstd/lib/compress/zstdmt_compress.c +109 -53
  65. data/contrib/zstd/lib/compress/zstdmt_compress.h +9 -6
  66. data/contrib/zstd/lib/decompress/huf_decompress.c +1071 -539
  67. data/contrib/zstd/lib/decompress/huf_decompress_amd64.S +576 -0
  68. data/contrib/zstd/lib/decompress/zstd_ddict.c +4 -4
  69. data/contrib/zstd/lib/decompress/zstd_ddict.h +1 -1
  70. data/contrib/zstd/lib/decompress/zstd_decompress.c +507 -82
  71. data/contrib/zstd/lib/decompress/zstd_decompress_block.c +962 -310
  72. data/contrib/zstd/lib/decompress/zstd_decompress_block.h +14 -3
  73. data/contrib/zstd/lib/decompress/zstd_decompress_internal.h +54 -6
  74. data/contrib/zstd/lib/deprecated/zbuff.h +1 -1
  75. data/contrib/zstd/lib/deprecated/zbuff_common.c +1 -1
  76. data/contrib/zstd/lib/deprecated/zbuff_compress.c +24 -4
  77. data/contrib/zstd/lib/deprecated/zbuff_decompress.c +3 -1
  78. data/contrib/zstd/lib/dictBuilder/cover.c +44 -32
  79. data/contrib/zstd/lib/dictBuilder/cover.h +6 -5
  80. data/contrib/zstd/lib/dictBuilder/divsufsort.c +1 -1
  81. data/contrib/zstd/lib/dictBuilder/fastcover.c +24 -16
  82. data/contrib/zstd/lib/dictBuilder/zdict.c +88 -95
  83. data/contrib/zstd/lib/legacy/zstd_legacy.h +8 -1
  84. data/contrib/zstd/lib/legacy/zstd_v01.c +16 -53
  85. data/contrib/zstd/lib/legacy/zstd_v01.h +1 -1
  86. data/contrib/zstd/lib/legacy/zstd_v02.c +24 -69
  87. data/contrib/zstd/lib/legacy/zstd_v02.h +1 -1
  88. data/contrib/zstd/lib/legacy/zstd_v03.c +25 -72
  89. data/contrib/zstd/lib/legacy/zstd_v03.h +1 -1
  90. data/contrib/zstd/lib/legacy/zstd_v04.c +23 -69
  91. data/contrib/zstd/lib/legacy/zstd_v04.h +1 -1
  92. data/contrib/zstd/lib/legacy/zstd_v05.c +35 -85
  93. data/contrib/zstd/lib/legacy/zstd_v05.h +1 -1
  94. data/contrib/zstd/lib/legacy/zstd_v06.c +42 -87
  95. data/contrib/zstd/lib/legacy/zstd_v06.h +1 -1
  96. data/contrib/zstd/lib/legacy/zstd_v07.c +35 -82
  97. data/contrib/zstd/lib/legacy/zstd_v07.h +1 -1
  98. data/contrib/zstd/lib/libzstd.mk +214 -0
  99. data/contrib/zstd/lib/libzstd.pc.in +4 -3
  100. data/contrib/zstd/lib/module.modulemap +35 -0
  101. data/contrib/zstd/lib/{dictBuilder/zdict.h → zdict.h} +202 -33
  102. data/contrib/zstd/lib/zstd.h +922 -293
  103. data/contrib/zstd/lib/{common/zstd_errors.h → zstd_errors.h} +27 -8
  104. data/ext/extconf.rb +7 -6
  105. data/ext/extzstd.c +13 -10
  106. data/ext/libzstd_conf.h +0 -1
  107. data/ext/zstd_decompress_asm.S +1 -0
  108. metadata +16 -5
@@ -0,0 +1,134 @@
1
+ /*
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
+ * All rights reserved.
4
+ *
5
+ * This source code is licensed under both the BSD-style license (found in the
6
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7
+ * in the COPYING file in the root directory of this source tree).
8
+ * You may select, at your option, one of the above-listed licenses.
9
+ */
10
+
11
+ #ifndef ZSTD_CLEVELS_H
12
+ #define ZSTD_CLEVELS_H
13
+
14
+ #define ZSTD_STATIC_LINKING_ONLY /* ZSTD_compressionParameters */
15
+ #include "../zstd.h"
16
+
17
+ /*-===== Pre-defined compression levels =====-*/
18
+
19
+ #define ZSTD_MAX_CLEVEL 22
20
+
21
+ #ifdef __GNUC__
22
+ __attribute__((__unused__))
23
+ #endif
24
+
25
+ static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEVEL+1] = {
26
+ { /* "default" - for any srcSize > 256 KB */
27
+ /* W, C, H, S, L, TL, strat */
28
+ { 19, 12, 13, 1, 6, 1, ZSTD_fast }, /* base for negative levels */
29
+ { 19, 13, 14, 1, 7, 0, ZSTD_fast }, /* level 1 */
30
+ { 20, 15, 16, 1, 6, 0, ZSTD_fast }, /* level 2 */
31
+ { 21, 16, 17, 1, 5, 0, ZSTD_dfast }, /* level 3 */
32
+ { 21, 18, 18, 1, 5, 0, ZSTD_dfast }, /* level 4 */
33
+ { 21, 18, 19, 3, 5, 2, ZSTD_greedy }, /* level 5 */
34
+ { 21, 18, 19, 3, 5, 4, ZSTD_lazy }, /* level 6 */
35
+ { 21, 19, 20, 4, 5, 8, ZSTD_lazy }, /* level 7 */
36
+ { 21, 19, 20, 4, 5, 16, ZSTD_lazy2 }, /* level 8 */
37
+ { 22, 20, 21, 4, 5, 16, ZSTD_lazy2 }, /* level 9 */
38
+ { 22, 21, 22, 5, 5, 16, ZSTD_lazy2 }, /* level 10 */
39
+ { 22, 21, 22, 6, 5, 16, ZSTD_lazy2 }, /* level 11 */
40
+ { 22, 22, 23, 6, 5, 32, ZSTD_lazy2 }, /* level 12 */
41
+ { 22, 22, 22, 4, 5, 32, ZSTD_btlazy2 }, /* level 13 */
42
+ { 22, 22, 23, 5, 5, 32, ZSTD_btlazy2 }, /* level 14 */
43
+ { 22, 23, 23, 6, 5, 32, ZSTD_btlazy2 }, /* level 15 */
44
+ { 22, 22, 22, 5, 5, 48, ZSTD_btopt }, /* level 16 */
45
+ { 23, 23, 22, 5, 4, 64, ZSTD_btopt }, /* level 17 */
46
+ { 23, 23, 22, 6, 3, 64, ZSTD_btultra }, /* level 18 */
47
+ { 23, 24, 22, 7, 3,256, ZSTD_btultra2}, /* level 19 */
48
+ { 25, 25, 23, 7, 3,256, ZSTD_btultra2}, /* level 20 */
49
+ { 26, 26, 24, 7, 3,512, ZSTD_btultra2}, /* level 21 */
50
+ { 27, 27, 25, 9, 3,999, ZSTD_btultra2}, /* level 22 */
51
+ },
52
+ { /* for srcSize <= 256 KB */
53
+ /* W, C, H, S, L, T, strat */
54
+ { 18, 12, 13, 1, 5, 1, ZSTD_fast }, /* base for negative levels */
55
+ { 18, 13, 14, 1, 6, 0, ZSTD_fast }, /* level 1 */
56
+ { 18, 14, 14, 1, 5, 0, ZSTD_dfast }, /* level 2 */
57
+ { 18, 16, 16, 1, 4, 0, ZSTD_dfast }, /* level 3 */
58
+ { 18, 16, 17, 3, 5, 2, ZSTD_greedy }, /* level 4.*/
59
+ { 18, 17, 18, 5, 5, 2, ZSTD_greedy }, /* level 5.*/
60
+ { 18, 18, 19, 3, 5, 4, ZSTD_lazy }, /* level 6.*/
61
+ { 18, 18, 19, 4, 4, 4, ZSTD_lazy }, /* level 7 */
62
+ { 18, 18, 19, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */
63
+ { 18, 18, 19, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */
64
+ { 18, 18, 19, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */
65
+ { 18, 18, 19, 5, 4, 12, ZSTD_btlazy2 }, /* level 11.*/
66
+ { 18, 19, 19, 7, 4, 12, ZSTD_btlazy2 }, /* level 12.*/
67
+ { 18, 18, 19, 4, 4, 16, ZSTD_btopt }, /* level 13 */
68
+ { 18, 18, 19, 4, 3, 32, ZSTD_btopt }, /* level 14.*/
69
+ { 18, 18, 19, 6, 3,128, ZSTD_btopt }, /* level 15.*/
70
+ { 18, 19, 19, 6, 3,128, ZSTD_btultra }, /* level 16.*/
71
+ { 18, 19, 19, 8, 3,256, ZSTD_btultra }, /* level 17.*/
72
+ { 18, 19, 19, 6, 3,128, ZSTD_btultra2}, /* level 18.*/
73
+ { 18, 19, 19, 8, 3,256, ZSTD_btultra2}, /* level 19.*/
74
+ { 18, 19, 19, 10, 3,512, ZSTD_btultra2}, /* level 20.*/
75
+ { 18, 19, 19, 12, 3,512, ZSTD_btultra2}, /* level 21.*/
76
+ { 18, 19, 19, 13, 3,999, ZSTD_btultra2}, /* level 22.*/
77
+ },
78
+ { /* for srcSize <= 128 KB */
79
+ /* W, C, H, S, L, T, strat */
80
+ { 17, 12, 12, 1, 5, 1, ZSTD_fast }, /* base for negative levels */
81
+ { 17, 12, 13, 1, 6, 0, ZSTD_fast }, /* level 1 */
82
+ { 17, 13, 15, 1, 5, 0, ZSTD_fast }, /* level 2 */
83
+ { 17, 15, 16, 2, 5, 0, ZSTD_dfast }, /* level 3 */
84
+ { 17, 17, 17, 2, 4, 0, ZSTD_dfast }, /* level 4 */
85
+ { 17, 16, 17, 3, 4, 2, ZSTD_greedy }, /* level 5 */
86
+ { 17, 16, 17, 3, 4, 4, ZSTD_lazy }, /* level 6 */
87
+ { 17, 16, 17, 3, 4, 8, ZSTD_lazy2 }, /* level 7 */
88
+ { 17, 16, 17, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */
89
+ { 17, 16, 17, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */
90
+ { 17, 16, 17, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */
91
+ { 17, 17, 17, 5, 4, 8, ZSTD_btlazy2 }, /* level 11 */
92
+ { 17, 18, 17, 7, 4, 12, ZSTD_btlazy2 }, /* level 12 */
93
+ { 17, 18, 17, 3, 4, 12, ZSTD_btopt }, /* level 13.*/
94
+ { 17, 18, 17, 4, 3, 32, ZSTD_btopt }, /* level 14.*/
95
+ { 17, 18, 17, 6, 3,256, ZSTD_btopt }, /* level 15.*/
96
+ { 17, 18, 17, 6, 3,128, ZSTD_btultra }, /* level 16.*/
97
+ { 17, 18, 17, 8, 3,256, ZSTD_btultra }, /* level 17.*/
98
+ { 17, 18, 17, 10, 3,512, ZSTD_btultra }, /* level 18.*/
99
+ { 17, 18, 17, 5, 3,256, ZSTD_btultra2}, /* level 19.*/
100
+ { 17, 18, 17, 7, 3,512, ZSTD_btultra2}, /* level 20.*/
101
+ { 17, 18, 17, 9, 3,512, ZSTD_btultra2}, /* level 21.*/
102
+ { 17, 18, 17, 11, 3,999, ZSTD_btultra2}, /* level 22.*/
103
+ },
104
+ { /* for srcSize <= 16 KB */
105
+ /* W, C, H, S, L, T, strat */
106
+ { 14, 12, 13, 1, 5, 1, ZSTD_fast }, /* base for negative levels */
107
+ { 14, 14, 15, 1, 5, 0, ZSTD_fast }, /* level 1 */
108
+ { 14, 14, 15, 1, 4, 0, ZSTD_fast }, /* level 2 */
109
+ { 14, 14, 15, 2, 4, 0, ZSTD_dfast }, /* level 3 */
110
+ { 14, 14, 14, 4, 4, 2, ZSTD_greedy }, /* level 4 */
111
+ { 14, 14, 14, 3, 4, 4, ZSTD_lazy }, /* level 5.*/
112
+ { 14, 14, 14, 4, 4, 8, ZSTD_lazy2 }, /* level 6 */
113
+ { 14, 14, 14, 6, 4, 8, ZSTD_lazy2 }, /* level 7 */
114
+ { 14, 14, 14, 8, 4, 8, ZSTD_lazy2 }, /* level 8.*/
115
+ { 14, 15, 14, 5, 4, 8, ZSTD_btlazy2 }, /* level 9.*/
116
+ { 14, 15, 14, 9, 4, 8, ZSTD_btlazy2 }, /* level 10.*/
117
+ { 14, 15, 14, 3, 4, 12, ZSTD_btopt }, /* level 11.*/
118
+ { 14, 15, 14, 4, 3, 24, ZSTD_btopt }, /* level 12.*/
119
+ { 14, 15, 14, 5, 3, 32, ZSTD_btultra }, /* level 13.*/
120
+ { 14, 15, 15, 6, 3, 64, ZSTD_btultra }, /* level 14.*/
121
+ { 14, 15, 15, 7, 3,256, ZSTD_btultra }, /* level 15.*/
122
+ { 14, 15, 15, 5, 3, 48, ZSTD_btultra2}, /* level 16.*/
123
+ { 14, 15, 15, 6, 3,128, ZSTD_btultra2}, /* level 17.*/
124
+ { 14, 15, 15, 7, 3,256, ZSTD_btultra2}, /* level 18.*/
125
+ { 14, 15, 15, 8, 3,256, ZSTD_btultra2}, /* level 19.*/
126
+ { 14, 15, 15, 8, 3,512, ZSTD_btultra2}, /* level 20.*/
127
+ { 14, 15, 15, 9, 3,512, ZSTD_btultra2}, /* level 21.*/
128
+ { 14, 15, 15, 10, 3,999, ZSTD_btultra2}, /* level 22.*/
129
+ },
130
+ };
131
+
132
+
133
+
134
+ #endif /* ZSTD_CLEVELS_H */
@@ -1,6 +1,6 @@
1
1
  /* ******************************************************************
2
2
  * FSE : Finite State Entropy encoder
3
- * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
3
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
4
4
  *
5
5
  * You can contact the author at :
6
6
  * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
@@ -26,6 +26,7 @@
26
26
  #define ZSTD_DEPS_NEED_MALLOC
27
27
  #define ZSTD_DEPS_NEED_MATH64
28
28
  #include "../common/zstd_deps.h" /* ZSTD_malloc, ZSTD_free, ZSTD_memcpy, ZSTD_memset */
29
+ #include "../common/bits.h" /* ZSTD_highbit32 */
29
30
 
30
31
 
31
32
  /* **************************************************************
@@ -75,13 +76,14 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct,
75
76
  void* const FSCT = ((U32*)ptr) + 1 /* header */ + (tableLog ? tableSize>>1 : 1) ;
76
77
  FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT);
77
78
  U32 const step = FSE_TABLESTEP(tableSize);
79
+ U32 const maxSV1 = maxSymbolValue+1;
78
80
 
79
- U32* cumul = (U32*)workSpace;
80
- FSE_FUNCTION_TYPE* tableSymbol = (FSE_FUNCTION_TYPE*)(cumul + (maxSymbolValue + 2));
81
+ U16* cumul = (U16*)workSpace; /* size = maxSV1 */
82
+ FSE_FUNCTION_TYPE* const tableSymbol = (FSE_FUNCTION_TYPE*)(cumul + (maxSV1+1)); /* size = tableSize */
81
83
 
82
84
  U32 highThreshold = tableSize-1;
83
85
 
84
- if ((size_t)workSpace & 3) return ERROR(GENERIC); /* Must be 4 byte aligned */
86
+ assert(((size_t)workSpace & 1) == 0); /* Must be 2 bytes-aligned */
85
87
  if (FSE_BUILD_CTABLE_WORKSPACE_SIZE(maxSymbolValue, tableLog) > wkspSize) return ERROR(tableLog_tooLarge);
86
88
  /* CTable header */
87
89
  tableU16[-2] = (U16) tableLog;
@@ -89,7 +91,7 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct,
89
91
  assert(tableLog < 16); /* required for threshold strategy to work */
90
92
 
91
93
  /* For explanations on how to distribute symbol values over the table :
92
- * http://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */
94
+ * https://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */
93
95
 
94
96
  #ifdef __clang_analyzer__
95
97
  ZSTD_memset(tableSymbol, 0, sizeof(*tableSymbol) * tableSize); /* useless initialization, just to keep scan-build happy */
@@ -98,20 +100,61 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct,
98
100
  /* symbol start positions */
99
101
  { U32 u;
100
102
  cumul[0] = 0;
101
- for (u=1; u <= maxSymbolValue+1; u++) {
103
+ for (u=1; u <= maxSV1; u++) {
102
104
  if (normalizedCounter[u-1]==-1) { /* Low proba symbol */
103
105
  cumul[u] = cumul[u-1] + 1;
104
106
  tableSymbol[highThreshold--] = (FSE_FUNCTION_TYPE)(u-1);
105
107
  } else {
106
- cumul[u] = cumul[u-1] + normalizedCounter[u-1];
108
+ assert(normalizedCounter[u-1] >= 0);
109
+ cumul[u] = cumul[u-1] + (U16)normalizedCounter[u-1];
110
+ assert(cumul[u] >= cumul[u-1]); /* no overflow */
107
111
  } }
108
- cumul[maxSymbolValue+1] = tableSize+1;
112
+ cumul[maxSV1] = (U16)(tableSize+1);
109
113
  }
110
114
 
111
115
  /* Spread symbols */
112
- { U32 position = 0;
116
+ if (highThreshold == tableSize - 1) {
117
+ /* Case for no low prob count symbols. Lay down 8 bytes at a time
118
+ * to reduce branch misses since we are operating on a small block
119
+ */
120
+ BYTE* const spread = tableSymbol + tableSize; /* size = tableSize + 8 (may write beyond tableSize) */
121
+ { U64 const add = 0x0101010101010101ull;
122
+ size_t pos = 0;
123
+ U64 sv = 0;
124
+ U32 s;
125
+ for (s=0; s<maxSV1; ++s, sv += add) {
126
+ int i;
127
+ int const n = normalizedCounter[s];
128
+ MEM_write64(spread + pos, sv);
129
+ for (i = 8; i < n; i += 8) {
130
+ MEM_write64(spread + pos + i, sv);
131
+ }
132
+ assert(n>=0);
133
+ pos += (size_t)n;
134
+ }
135
+ }
136
+ /* Spread symbols across the table. Lack of lowprob symbols means that
137
+ * we don't need variable sized inner loop, so we can unroll the loop and
138
+ * reduce branch misses.
139
+ */
140
+ { size_t position = 0;
141
+ size_t s;
142
+ size_t const unroll = 2; /* Experimentally determined optimal unroll */
143
+ assert(tableSize % unroll == 0); /* FSE_MIN_TABLELOG is 5 */
144
+ for (s = 0; s < (size_t)tableSize; s += unroll) {
145
+ size_t u;
146
+ for (u = 0; u < unroll; ++u) {
147
+ size_t const uPosition = (position + (u * step)) & tableMask;
148
+ tableSymbol[uPosition] = spread[s + u];
149
+ }
150
+ position = (position + (unroll * step)) & tableMask;
151
+ }
152
+ assert(position == 0); /* Must have initialized all positions */
153
+ }
154
+ } else {
155
+ U32 position = 0;
113
156
  U32 symbol;
114
- for (symbol=0; symbol<=maxSymbolValue; symbol++) {
157
+ for (symbol=0; symbol<maxSV1; symbol++) {
115
158
  int nbOccurrences;
116
159
  int const freq = normalizedCounter[symbol];
117
160
  for (nbOccurrences=0; nbOccurrences<freq; nbOccurrences++) {
@@ -120,7 +163,6 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct,
120
163
  while (position > highThreshold)
121
164
  position = (position + step) & tableMask; /* Low proba area */
122
165
  } }
123
-
124
166
  assert(position==0); /* Must have initialized all positions */
125
167
  }
126
168
 
@@ -144,16 +186,17 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct,
144
186
  case -1:
145
187
  case 1:
146
188
  symbolTT[s].deltaNbBits = (tableLog << 16) - (1<<tableLog);
147
- symbolTT[s].deltaFindState = total - 1;
189
+ assert(total <= INT_MAX);
190
+ symbolTT[s].deltaFindState = (int)(total - 1);
148
191
  total ++;
149
192
  break;
150
193
  default :
151
- {
152
- U32 const maxBitsOut = tableLog - BIT_highbit32 (normalizedCounter[s]-1);
153
- U32 const minStatePlus = normalizedCounter[s] << maxBitsOut;
194
+ assert(normalizedCounter[s] > 1);
195
+ { U32 const maxBitsOut = tableLog - ZSTD_highbit32 ((U32)normalizedCounter[s]-1);
196
+ U32 const minStatePlus = (U32)normalizedCounter[s] << maxBitsOut;
154
197
  symbolTT[s].deltaNbBits = (maxBitsOut << 16) - minStatePlus;
155
- symbolTT[s].deltaFindState = total - normalizedCounter[s];
156
- total += normalizedCounter[s];
198
+ symbolTT[s].deltaFindState = (int)(total - (unsigned)normalizedCounter[s]);
199
+ total += (unsigned)normalizedCounter[s];
157
200
  } } } }
158
201
 
159
202
  #if 0 /* debug : symbol costs */
@@ -164,32 +207,26 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct,
164
207
  symbol, normalizedCounter[symbol],
165
208
  FSE_getMaxNbBits(symbolTT, symbol),
166
209
  (double)FSE_bitCost(symbolTT, tableLog, symbol, 8) / 256);
167
- }
168
- }
210
+ } }
169
211
  #endif
170
212
 
171
213
  return 0;
172
214
  }
173
215
 
174
- #ifndef ZSTD_NO_UNUSED_FUNCTIONS
175
- size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
176
- {
177
- FSE_FUNCTION_TYPE tableSymbol[FSE_MAX_TABLESIZE]; /* memset() is not necessary, even if static analyzer complain about it */
178
- return FSE_buildCTable_wksp(ct, normalizedCounter, maxSymbolValue, tableLog, tableSymbol, sizeof(tableSymbol));
179
- }
180
- #endif
181
-
182
216
 
183
217
 
184
218
  #ifndef FSE_COMMONDEFS_ONLY
185
219
 
186
-
187
220
  /*-**************************************************************
188
221
  * FSE NCount encoding
189
222
  ****************************************************************/
190
223
  size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog)
191
224
  {
192
- size_t const maxHeaderSize = (((maxSymbolValue+1) * tableLog) >> 3) + 3;
225
+ size_t const maxHeaderSize = (((maxSymbolValue+1) * tableLog
226
+ + 4 /* bitCount initialized at 4 */
227
+ + 2 /* first two symbols may use one additional bit each */) / 8)
228
+ + 1 /* round up to whole nb bytes */
229
+ + 2 /* additional two bytes for bitstream flush */;
193
230
  return maxSymbolValue ? maxHeaderSize : FSE_NCOUNTBOUND; /* maxSymbolValue==0 ? use default */
194
231
  }
195
232
 
@@ -306,21 +343,11 @@ size_t FSE_writeNCount (void* buffer, size_t bufferSize,
306
343
  * FSE Compression Code
307
344
  ****************************************************************/
308
345
 
309
- FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog)
310
- {
311
- size_t size;
312
- if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX;
313
- size = FSE_CTABLE_SIZE_U32 (tableLog, maxSymbolValue) * sizeof(U32);
314
- return (FSE_CTable*)ZSTD_malloc(size);
315
- }
316
-
317
- void FSE_freeCTable (FSE_CTable* ct) { ZSTD_free(ct); }
318
-
319
346
  /* provides the minimum logSize to safely represent a distribution */
320
347
  static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue)
321
348
  {
322
- U32 minBitsSrc = BIT_highbit32((U32)(srcSize)) + 1;
323
- U32 minBitsSymbols = BIT_highbit32(maxSymbolValue) + 2;
349
+ U32 minBitsSrc = ZSTD_highbit32((U32)(srcSize)) + 1;
350
+ U32 minBitsSymbols = ZSTD_highbit32(maxSymbolValue) + 2;
324
351
  U32 minBits = minBitsSrc < minBitsSymbols ? minBitsSrc : minBitsSymbols;
325
352
  assert(srcSize > 1); /* Not supported, RLE should be used instead */
326
353
  return minBits;
@@ -328,7 +355,7 @@ static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue)
328
355
 
329
356
  unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus)
330
357
  {
331
- U32 maxBitsSrc = BIT_highbit32((U32)(srcSize - 1)) - minus;
358
+ U32 maxBitsSrc = ZSTD_highbit32((U32)(srcSize - 1)) - minus;
332
359
  U32 tableLog = maxTableLog;
333
360
  U32 minBits = FSE_minTableLog(srcSize, maxSymbolValue);
334
361
  assert(srcSize > 1); /* Not supported, RLE should be used instead */
@@ -496,40 +523,6 @@ size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog,
496
523
  return tableLog;
497
524
  }
498
525
 
499
-
500
- /* fake FSE_CTable, for raw (uncompressed) input */
501
- size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits)
502
- {
503
- const unsigned tableSize = 1 << nbBits;
504
- const unsigned tableMask = tableSize - 1;
505
- const unsigned maxSymbolValue = tableMask;
506
- void* const ptr = ct;
507
- U16* const tableU16 = ( (U16*) ptr) + 2;
508
- void* const FSCT = ((U32*)ptr) + 1 /* header */ + (tableSize>>1); /* assumption : tableLog >= 1 */
509
- FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT);
510
- unsigned s;
511
-
512
- /* Sanity checks */
513
- if (nbBits < 1) return ERROR(GENERIC); /* min size */
514
-
515
- /* header */
516
- tableU16[-2] = (U16) nbBits;
517
- tableU16[-1] = (U16) maxSymbolValue;
518
-
519
- /* Build table */
520
- for (s=0; s<tableSize; s++)
521
- tableU16[s] = (U16)(tableSize + s);
522
-
523
- /* Build Symbol Transformation Table */
524
- { const U32 deltaNbBits = (nbBits << 16) - (1 << nbBits);
525
- for (s=0; s<=maxSymbolValue; s++) {
526
- symbolTT[s].deltaNbBits = deltaNbBits;
527
- symbolTT[s].deltaFindState = s-1;
528
- } }
529
-
530
- return 0;
531
- }
532
-
533
526
  /* fake FSE_CTable, for rle input (always same symbol) */
534
527
  size_t FSE_buildCTable_rle (FSE_CTable* ct, BYTE symbolValue)
535
528
  {
@@ -628,78 +621,4 @@ size_t FSE_compress_usingCTable (void* dst, size_t dstSize,
628
621
 
629
622
  size_t FSE_compressBound(size_t size) { return FSE_COMPRESSBOUND(size); }
630
623
 
631
- #ifndef ZSTD_NO_UNUSED_FUNCTIONS
632
- /* FSE_compress_wksp() :
633
- * Same as FSE_compress2(), but using an externally allocated scratch buffer (`workSpace`).
634
- * `wkspSize` size must be `(1<<tableLog)`.
635
- */
636
- size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize)
637
- {
638
- BYTE* const ostart = (BYTE*) dst;
639
- BYTE* op = ostart;
640
- BYTE* const oend = ostart + dstSize;
641
-
642
- unsigned count[FSE_MAX_SYMBOL_VALUE+1];
643
- S16 norm[FSE_MAX_SYMBOL_VALUE+1];
644
- FSE_CTable* CTable = (FSE_CTable*)workSpace;
645
- size_t const CTableSize = FSE_CTABLE_SIZE_U32(tableLog, maxSymbolValue);
646
- void* scratchBuffer = (void*)(CTable + CTableSize);
647
- size_t const scratchBufferSize = wkspSize - (CTableSize * sizeof(FSE_CTable));
648
-
649
- /* init conditions */
650
- if (wkspSize < FSE_COMPRESS_WKSP_SIZE_U32(tableLog, maxSymbolValue)) return ERROR(tableLog_tooLarge);
651
- if (srcSize <= 1) return 0; /* Not compressible */
652
- if (!maxSymbolValue) maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
653
- if (!tableLog) tableLog = FSE_DEFAULT_TABLELOG;
654
-
655
- /* Scan input and build symbol stats */
656
- { CHECK_V_F(maxCount, HIST_count_wksp(count, &maxSymbolValue, src, srcSize, scratchBuffer, scratchBufferSize) );
657
- if (maxCount == srcSize) return 1; /* only a single symbol in src : rle */
658
- if (maxCount == 1) return 0; /* each symbol present maximum once => not compressible */
659
- if (maxCount < (srcSize >> 7)) return 0; /* Heuristic : not compressible enough */
660
- }
661
-
662
- tableLog = FSE_optimalTableLog(tableLog, srcSize, maxSymbolValue);
663
- CHECK_F( FSE_normalizeCount(norm, tableLog, count, srcSize, maxSymbolValue, /* useLowProbCount */ srcSize >= 2048) );
664
-
665
- /* Write table description header */
666
- { CHECK_V_F(nc_err, FSE_writeNCount(op, oend-op, norm, maxSymbolValue, tableLog) );
667
- op += nc_err;
668
- }
669
-
670
- /* Compress */
671
- CHECK_F( FSE_buildCTable_wksp(CTable, norm, maxSymbolValue, tableLog, scratchBuffer, scratchBufferSize) );
672
- { CHECK_V_F(cSize, FSE_compress_usingCTable(op, oend - op, src, srcSize, CTable) );
673
- if (cSize == 0) return 0; /* not enough space for compressed data */
674
- op += cSize;
675
- }
676
-
677
- /* check compressibility */
678
- if ( (size_t)(op-ostart) >= srcSize-1 ) return 0;
679
-
680
- return op-ostart;
681
- }
682
-
683
- typedef struct {
684
- FSE_CTable CTable_max[FSE_CTABLE_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)];
685
- union {
686
- U32 hist_wksp[HIST_WKSP_SIZE_U32];
687
- BYTE scratchBuffer[1 << FSE_MAX_TABLELOG];
688
- } workspace;
689
- } fseWkspMax_t;
690
-
691
- size_t FSE_compress2 (void* dst, size_t dstCapacity, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog)
692
- {
693
- fseWkspMax_t scratchBuffer;
694
- DEBUG_STATIC_ASSERT(sizeof(scratchBuffer) >= FSE_COMPRESS_WKSP_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)); /* compilation failures here means scratchBuffer is not large enough */
695
- if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);
696
- return FSE_compress_wksp(dst, dstCapacity, src, srcSize, maxSymbolValue, tableLog, &scratchBuffer, sizeof(scratchBuffer));
697
- }
698
-
699
- size_t FSE_compress (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
700
- {
701
- return FSE_compress2(dst, dstCapacity, src, srcSize, FSE_MAX_SYMBOL_VALUE, FSE_DEFAULT_TABLELOG);
702
- }
703
- #endif
704
-
705
624
  #endif /* FSE_COMMONDEFS_ONLY */
@@ -1,7 +1,7 @@
1
1
  /* ******************************************************************
2
2
  * hist : Histogram functions
3
3
  * part of Finite State Entropy project
4
- * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
4
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
5
5
  *
6
6
  * You can contact the author at :
7
7
  * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
@@ -1,7 +1,7 @@
1
1
  /* ******************************************************************
2
2
  * hist : Histogram functions
3
3
  * part of Finite State Entropy project
4
- * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
4
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
5
5
  *
6
6
  * You can contact the author at :
7
7
  * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy