zstd-ruby 1.4.0.0 → 1.4.9.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (95) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/ruby.yml +35 -0
  3. data/README.md +2 -2
  4. data/ext/zstdruby/libzstd/Makefile +274 -107
  5. data/ext/zstdruby/libzstd/README.md +75 -16
  6. data/ext/zstdruby/libzstd/common/bitstream.h +59 -51
  7. data/ext/zstdruby/libzstd/common/compiler.h +154 -5
  8. data/ext/zstdruby/libzstd/common/cpu.h +1 -3
  9. data/ext/zstdruby/libzstd/common/debug.c +11 -31
  10. data/ext/zstdruby/libzstd/common/debug.h +22 -49
  11. data/ext/zstdruby/libzstd/common/entropy_common.c +201 -75
  12. data/ext/zstdruby/libzstd/common/error_private.c +3 -1
  13. data/ext/zstdruby/libzstd/common/error_private.h +7 -3
  14. data/ext/zstdruby/libzstd/common/fse.h +50 -42
  15. data/ext/zstdruby/libzstd/common/fse_decompress.c +134 -50
  16. data/ext/zstdruby/libzstd/common/huf.h +41 -38
  17. data/ext/zstdruby/libzstd/common/mem.h +68 -22
  18. data/ext/zstdruby/libzstd/common/pool.c +30 -20
  19. data/ext/zstdruby/libzstd/common/pool.h +3 -3
  20. data/ext/zstdruby/libzstd/common/threading.c +51 -4
  21. data/ext/zstdruby/libzstd/common/threading.h +36 -4
  22. data/ext/zstdruby/libzstd/common/xxhash.c +39 -89
  23. data/ext/zstdruby/libzstd/common/xxhash.h +12 -32
  24. data/ext/zstdruby/libzstd/common/zstd_common.c +10 -10
  25. data/ext/zstdruby/libzstd/common/zstd_deps.h +111 -0
  26. data/ext/zstdruby/libzstd/common/zstd_errors.h +3 -1
  27. data/ext/zstdruby/libzstd/common/zstd_internal.h +231 -72
  28. data/ext/zstdruby/libzstd/common/zstd_trace.c +42 -0
  29. data/ext/zstdruby/libzstd/common/zstd_trace.h +152 -0
  30. data/ext/zstdruby/libzstd/compress/fse_compress.c +47 -63
  31. data/ext/zstdruby/libzstd/compress/hist.c +41 -63
  32. data/ext/zstdruby/libzstd/compress/hist.h +13 -33
  33. data/ext/zstdruby/libzstd/compress/huf_compress.c +288 -172
  34. data/ext/zstdruby/libzstd/compress/zstd_compress.c +2504 -1626
  35. data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +446 -85
  36. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.c +158 -0
  37. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.h +29 -0
  38. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.c +433 -0
  39. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.h +54 -0
  40. data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.c +849 -0
  41. data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.h +32 -0
  42. data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +561 -0
  43. data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +82 -60
  44. data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +2 -2
  45. data/ext/zstdruby/libzstd/compress/zstd_fast.c +106 -80
  46. data/ext/zstdruby/libzstd/compress/zstd_fast.h +2 -2
  47. data/ext/zstdruby/libzstd/compress/zstd_lazy.c +411 -105
  48. data/ext/zstdruby/libzstd/compress/zstd_lazy.h +21 -1
  49. data/ext/zstdruby/libzstd/compress/zstd_ldm.c +296 -207
  50. data/ext/zstdruby/libzstd/compress/zstd_ldm.h +14 -3
  51. data/ext/zstdruby/libzstd/compress/zstd_ldm_geartab.h +103 -0
  52. data/ext/zstdruby/libzstd/compress/zstd_opt.c +260 -148
  53. data/ext/zstdruby/libzstd/compress/zstd_opt.h +1 -1
  54. data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +153 -440
  55. data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +29 -110
  56. data/ext/zstdruby/libzstd/decompress/huf_decompress.c +356 -238
  57. data/ext/zstdruby/libzstd/decompress/zstd_ddict.c +20 -16
  58. data/ext/zstdruby/libzstd/decompress/zstd_ddict.h +3 -3
  59. data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +641 -238
  60. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +600 -371
  61. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +8 -5
  62. data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +40 -9
  63. data/ext/zstdruby/libzstd/deprecated/zbuff.h +9 -8
  64. data/ext/zstdruby/libzstd/deprecated/zbuff_common.c +2 -2
  65. data/ext/zstdruby/libzstd/deprecated/zbuff_compress.c +1 -1
  66. data/ext/zstdruby/libzstd/deprecated/zbuff_decompress.c +1 -1
  67. data/ext/zstdruby/libzstd/dictBuilder/cover.c +197 -78
  68. data/ext/zstdruby/libzstd/dictBuilder/cover.h +52 -7
  69. data/ext/zstdruby/libzstd/dictBuilder/divsufsort.c +1 -1
  70. data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +84 -66
  71. data/ext/zstdruby/libzstd/dictBuilder/zdict.c +58 -36
  72. data/ext/zstdruby/libzstd/dictBuilder/zdict.h +60 -31
  73. data/ext/zstdruby/libzstd/dll/example/Makefile +2 -1
  74. data/ext/zstdruby/libzstd/dll/example/README.md +16 -22
  75. data/ext/zstdruby/libzstd/legacy/zstd_legacy.h +8 -4
  76. data/ext/zstdruby/libzstd/legacy/zstd_v01.c +115 -111
  77. data/ext/zstdruby/libzstd/legacy/zstd_v01.h +1 -1
  78. data/ext/zstdruby/libzstd/legacy/zstd_v02.c +28 -14
  79. data/ext/zstdruby/libzstd/legacy/zstd_v02.h +1 -1
  80. data/ext/zstdruby/libzstd/legacy/zstd_v03.c +28 -14
  81. data/ext/zstdruby/libzstd/legacy/zstd_v03.h +1 -1
  82. data/ext/zstdruby/libzstd/legacy/zstd_v04.c +36 -19
  83. data/ext/zstdruby/libzstd/legacy/zstd_v04.h +1 -1
  84. data/ext/zstdruby/libzstd/legacy/zstd_v05.c +122 -107
  85. data/ext/zstdruby/libzstd/legacy/zstd_v05.h +2 -2
  86. data/ext/zstdruby/libzstd/legacy/zstd_v06.c +29 -23
  87. data/ext/zstdruby/libzstd/legacy/zstd_v06.h +1 -1
  88. data/ext/zstdruby/libzstd/legacy/zstd_v07.c +34 -24
  89. data/ext/zstdruby/libzstd/legacy/zstd_v07.h +1 -1
  90. data/ext/zstdruby/libzstd/libzstd.pc.in +2 -1
  91. data/ext/zstdruby/libzstd/zstd.h +655 -118
  92. data/lib/zstd-ruby/version.rb +1 -1
  93. data/zstd-ruby.gemspec +1 -1
  94. metadata +20 -10
  95. data/.travis.yml +0 -14
@@ -0,0 +1,152 @@
1
+ /*
2
+ * Copyright (c) 2016-2021, Facebook, Inc.
3
+ * All rights reserved.
4
+ *
5
+ * This source code is licensed under both the BSD-style license (found in the
6
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7
+ * in the COPYING file in the root directory of this source tree).
8
+ * You may select, at your option, one of the above-listed licenses.
9
+ */
10
+
11
+ #ifndef ZSTD_TRACE_H
12
+ #define ZSTD_TRACE_H
13
+
14
+ #if defined (__cplusplus)
15
+ extern "C" {
16
+ #endif
17
+
18
+ #include <stddef.h>
19
+
20
+ /* weak symbol support */
21
+ #if !defined(ZSTD_HAVE_WEAK_SYMBOLS) && defined(__GNUC__) && \
22
+ !defined(__APPLE__) && !defined(_WIN32) && !defined(__MINGW32__) && \
23
+ !defined(__CYGWIN__)
24
+ # define ZSTD_HAVE_WEAK_SYMBOLS 1
25
+ #else
26
+ # define ZSTD_HAVE_WEAK_SYMBOLS 0
27
+ #endif
28
+ #if ZSTD_HAVE_WEAK_SYMBOLS
29
+ # define ZSTD_WEAK_ATTR __attribute__((__weak__))
30
+ #else
31
+ # define ZSTD_WEAK_ATTR
32
+ #endif
33
+
34
+ /* Only enable tracing when weak symbols are available. */
35
+ #ifndef ZSTD_TRACE
36
+ # define ZSTD_TRACE ZSTD_HAVE_WEAK_SYMBOLS
37
+ #endif
38
+
39
+ #if ZSTD_TRACE
40
+
41
+ struct ZSTD_CCtx_s;
42
+ struct ZSTD_DCtx_s;
43
+ struct ZSTD_CCtx_params_s;
44
+
45
+ typedef struct {
46
+ /**
47
+ * ZSTD_VERSION_NUMBER
48
+ *
49
+ * This is guaranteed to be the first member of ZSTD_trace.
50
+ * Otherwise, this struct is not stable between versions. If
51
+ * the version number does not match your expectation, you
52
+ * should not interpret the rest of the struct.
53
+ */
54
+ unsigned version;
55
+ /**
56
+ * Non-zero if streaming (de)compression is used.
57
+ */
58
+ unsigned streaming;
59
+ /**
60
+ * The dictionary ID.
61
+ */
62
+ unsigned dictionaryID;
63
+ /**
64
+ * Is the dictionary cold?
65
+ * Only set on decompression.
66
+ */
67
+ unsigned dictionaryIsCold;
68
+ /**
69
+ * The dictionary size or zero if no dictionary.
70
+ */
71
+ size_t dictionarySize;
72
+ /**
73
+ * The uncompressed size of the data.
74
+ */
75
+ size_t uncompressedSize;
76
+ /**
77
+ * The compressed size of the data.
78
+ */
79
+ size_t compressedSize;
80
+ /**
81
+ * The fully resolved CCtx parameters (NULL on decompression).
82
+ */
83
+ struct ZSTD_CCtx_params_s const* params;
84
+ /**
85
+ * The ZSTD_CCtx pointer (NULL on decompression).
86
+ */
87
+ struct ZSTD_CCtx_s const* cctx;
88
+ /**
89
+ * The ZSTD_DCtx pointer (NULL on compression).
90
+ */
91
+ struct ZSTD_DCtx_s const* dctx;
92
+ } ZSTD_Trace;
93
+
94
+ /**
95
+ * A tracing context. It must be 0 when tracing is disabled.
96
+ * Otherwise, any non-zero value returned by a tracing begin()
97
+ * function is presented to any subsequent calls to end().
98
+ *
99
+ * Any non-zero value is treated as tracing is enabled and not
100
+ * interpreted by the library.
101
+ *
102
+ * Two possible uses are:
103
+ * * A timestamp for when the begin() function was called.
104
+ * * A unique key identifying the (de)compression, like the
105
+ * address of the [dc]ctx pointer if you need to track
106
+ * more information than just a timestamp.
107
+ */
108
+ typedef unsigned long long ZSTD_TraceCtx;
109
+
110
+ /**
111
+ * Trace the beginning of a compression call.
112
+ * @param cctx The dctx pointer for the compression.
113
+ * It can be used as a key to map begin() to end().
114
+ * @returns Non-zero if tracing is enabled. The return value is
115
+ * passed to ZSTD_trace_compress_end().
116
+ */
117
+ ZSTD_TraceCtx ZSTD_trace_compress_begin(struct ZSTD_CCtx_s const* cctx);
118
+
119
+ /**
120
+ * Trace the end of a compression call.
121
+ * @param ctx The return value of ZSTD_trace_compress_begin().
122
+ * @param trace The zstd tracing info.
123
+ */
124
+ void ZSTD_trace_compress_end(
125
+ ZSTD_TraceCtx ctx,
126
+ ZSTD_Trace const* trace);
127
+
128
+ /**
129
+ * Trace the beginning of a decompression call.
130
+ * @param dctx The dctx pointer for the decompression.
131
+ * It can be used as a key to map begin() to end().
132
+ * @returns Non-zero if tracing is enabled. The return value is
133
+ * passed to ZSTD_trace_compress_end().
134
+ */
135
+ ZSTD_TraceCtx ZSTD_trace_decompress_begin(struct ZSTD_DCtx_s const* dctx);
136
+
137
+ /**
138
+ * Trace the end of a decompression call.
139
+ * @param ctx The return value of ZSTD_trace_decompress_begin().
140
+ * @param trace The zstd tracing info.
141
+ */
142
+ void ZSTD_trace_decompress_end(
143
+ ZSTD_TraceCtx ctx,
144
+ ZSTD_Trace const* trace);
145
+
146
+ #endif /* ZSTD_TRACE */
147
+
148
+ #if defined (__cplusplus)
149
+ }
150
+ #endif
151
+
152
+ #endif /* ZSTD_TRACE_H */
@@ -1,50 +1,31 @@
1
1
  /* ******************************************************************
2
- FSE : Finite State Entropy encoder
3
- Copyright (C) 2013-present, Yann Collet.
4
-
5
- BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6
-
7
- Redistribution and use in source and binary forms, with or without
8
- modification, are permitted provided that the following conditions are
9
- met:
10
-
11
- * Redistributions of source code must retain the above copyright
12
- notice, this list of conditions and the following disclaimer.
13
- * Redistributions in binary form must reproduce the above
14
- copyright notice, this list of conditions and the following disclaimer
15
- in the documentation and/or other materials provided with the
16
- distribution.
17
-
18
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21
- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22
- OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23
- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24
- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25
- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26
- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28
- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
-
30
- You can contact the author at :
31
- - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
32
- - Public forum : https://groups.google.com/forum/#!forum/lz4c
2
+ * FSE : Finite State Entropy encoder
3
+ * Copyright (c) 2013-2021, Yann Collet, Facebook, Inc.
4
+ *
5
+ * You can contact the author at :
6
+ * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
7
+ * - Public forum : https://groups.google.com/forum/#!forum/lz4c
8
+ *
9
+ * This source code is licensed under both the BSD-style license (found in the
10
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
11
+ * in the COPYING file in the root directory of this source tree).
12
+ * You may select, at your option, one of the above-listed licenses.
33
13
  ****************************************************************** */
34
14
 
35
15
  /* **************************************************************
36
16
  * Includes
37
17
  ****************************************************************/
38
- #include <stdlib.h> /* malloc, free, qsort */
39
- #include <string.h> /* memcpy, memset */
40
- #include "compiler.h"
41
- #include "mem.h" /* U32, U16, etc. */
42
- #include "debug.h" /* assert, DEBUGLOG */
18
+ #include "../common/compiler.h"
19
+ #include "../common/mem.h" /* U32, U16, etc. */
20
+ #include "../common/debug.h" /* assert, DEBUGLOG */
43
21
  #include "hist.h" /* HIST_count_wksp */
44
- #include "bitstream.h"
22
+ #include "../common/bitstream.h"
45
23
  #define FSE_STATIC_LINKING_ONLY
46
- #include "fse.h"
47
- #include "error_private.h"
24
+ #include "../common/fse.h"
25
+ #include "../common/error_private.h"
26
+ #define ZSTD_DEPS_NEED_MALLOC
27
+ #define ZSTD_DEPS_NEED_MATH64
28
+ #include "../common/zstd_deps.h" /* ZSTD_malloc, ZSTD_free, ZSTD_memcpy, ZSTD_memset */
48
29
 
49
30
 
50
31
  /* **************************************************************
@@ -94,13 +75,15 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct,
94
75
  void* const FSCT = ((U32*)ptr) + 1 /* header */ + (tableLog ? tableSize>>1 : 1) ;
95
76
  FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT);
96
77
  U32 const step = FSE_TABLESTEP(tableSize);
97
- U32 cumul[FSE_MAX_SYMBOL_VALUE+2];
98
78
 
99
- FSE_FUNCTION_TYPE* const tableSymbol = (FSE_FUNCTION_TYPE*)workSpace;
79
+ U32* cumul = (U32*)workSpace;
80
+ FSE_FUNCTION_TYPE* tableSymbol = (FSE_FUNCTION_TYPE*)(cumul + (maxSymbolValue + 2));
81
+
100
82
  U32 highThreshold = tableSize-1;
101
83
 
84
+ if ((size_t)workSpace & 3) return ERROR(GENERIC); /* Must be 4 byte aligned */
85
+ if (FSE_BUILD_CTABLE_WORKSPACE_SIZE(maxSymbolValue, tableLog) > wkspSize) return ERROR(tableLog_tooLarge);
102
86
  /* CTable header */
103
- if (((size_t)1 << tableLog) * sizeof(FSE_FUNCTION_TYPE) > wkspSize) return ERROR(tableLog_tooLarge);
104
87
  tableU16[-2] = (U16) tableLog;
105
88
  tableU16[-1] = (U16) maxSymbolValue;
106
89
  assert(tableLog < 16); /* required for threshold strategy to work */
@@ -109,7 +92,7 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct,
109
92
  * http://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */
110
93
 
111
94
  #ifdef __clang_analyzer__
112
- memset(tableSymbol, 0, sizeof(*tableSymbol) * tableSize); /* useless initialization, just to keep scan-build happy */
95
+ ZSTD_memset(tableSymbol, 0, sizeof(*tableSymbol) * tableSize); /* useless initialization, just to keep scan-build happy */
113
96
  #endif
114
97
 
115
98
  /* symbol start positions */
@@ -188,12 +171,13 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct,
188
171
  return 0;
189
172
  }
190
173
 
191
-
174
+ #ifndef ZSTD_NO_UNUSED_FUNCTIONS
192
175
  size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
193
176
  {
194
177
  FSE_FUNCTION_TYPE tableSymbol[FSE_MAX_TABLESIZE]; /* memset() is not necessary, even if static analyzer complain about it */
195
178
  return FSE_buildCTable_wksp(ct, normalizedCounter, maxSymbolValue, tableLog, tableSymbol, sizeof(tableSymbol));
196
179
  }
180
+ #endif
197
181
 
198
182
 
199
183
 
@@ -327,10 +311,10 @@ FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog)
327
311
  size_t size;
328
312
  if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX;
329
313
  size = FSE_CTABLE_SIZE_U32 (tableLog, maxSymbolValue) * sizeof(U32);
330
- return (FSE_CTable*)malloc(size);
314
+ return (FSE_CTable*)ZSTD_malloc(size);
331
315
  }
332
316
 
333
- void FSE_freeCTable (FSE_CTable* ct) { free(ct); }
317
+ void FSE_freeCTable (FSE_CTable* ct) { ZSTD_free(ct); }
334
318
 
335
319
  /* provides the minimum logSize to safely represent a distribution */
336
320
  static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue)
@@ -361,11 +345,10 @@ unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxS
361
345
  return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 2);
362
346
  }
363
347
 
364
-
365
348
  /* Secondary normalization method.
366
349
  To be used when primary method fails. */
367
350
 
368
- static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count, size_t total, U32 maxSymbolValue)
351
+ static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count, size_t total, U32 maxSymbolValue, short lowProbCount)
369
352
  {
370
353
  short const NOT_YET_ASSIGNED = -2;
371
354
  U32 s;
@@ -382,7 +365,7 @@ static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count,
382
365
  continue;
383
366
  }
384
367
  if (count[s] <= lowThreshold) {
385
- norm[s] = -1;
368
+ norm[s] = lowProbCount;
386
369
  distributed++;
387
370
  total -= count[s];
388
371
  continue;
@@ -434,7 +417,7 @@ static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count,
434
417
 
435
418
  { U64 const vStepLog = 62 - tableLog;
436
419
  U64 const mid = (1ULL << (vStepLog-1)) - 1;
437
- U64 const rStep = ((((U64)1<<vStepLog) * ToDistribute) + mid) / total; /* scale on remaining */
420
+ U64 const rStep = ZSTD_div64((((U64)1<<vStepLog) * ToDistribute) + mid, (U32)total); /* scale on remaining */
438
421
  U64 tmpTotal = mid;
439
422
  for (s=0; s<=maxSymbolValue; s++) {
440
423
  if (norm[s]==NOT_YET_ASSIGNED) {
@@ -451,10 +434,9 @@ static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count,
451
434
  return 0;
452
435
  }
453
436
 
454
-
455
437
  size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog,
456
438
  const unsigned* count, size_t total,
457
- unsigned maxSymbolValue)
439
+ unsigned maxSymbolValue, unsigned useLowProbCount)
458
440
  {
459
441
  /* Sanity checks */
460
442
  if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG;
@@ -463,8 +445,9 @@ size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog,
463
445
  if (tableLog < FSE_minTableLog(total, maxSymbolValue)) return ERROR(GENERIC); /* Too small tableLog, compression potentially impossible */
464
446
 
465
447
  { static U32 const rtbTable[] = { 0, 473195, 504333, 520860, 550000, 700000, 750000, 830000 };
448
+ short const lowProbCount = useLowProbCount ? -1 : 1;
466
449
  U64 const scale = 62 - tableLog;
467
- U64 const step = ((U64)1<<62) / total; /* <== here, one division ! */
450
+ U64 const step = ZSTD_div64((U64)1<<62, (U32)total); /* <== here, one division ! */
468
451
  U64 const vStep = 1ULL<<(scale-20);
469
452
  int stillToDistribute = 1<<tableLog;
470
453
  unsigned s;
@@ -476,7 +459,7 @@ size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog,
476
459
  if (count[s] == total) return 0; /* rle special case */
477
460
  if (count[s] == 0) { normalizedCounter[s]=0; continue; }
478
461
  if (count[s] <= lowThreshold) {
479
- normalizedCounter[s] = -1;
462
+ normalizedCounter[s] = lowProbCount;
480
463
  stillToDistribute--;
481
464
  } else {
482
465
  short proba = (short)((count[s]*step) >> scale);
@@ -490,7 +473,7 @@ size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog,
490
473
  } }
491
474
  if (-stillToDistribute >= (normalizedCounter[largest] >> 1)) {
492
475
  /* corner case, need another normalization method */
493
- size_t const errorCode = FSE_normalizeM2(normalizedCounter, tableLog, count, total, maxSymbolValue);
476
+ size_t const errorCode = FSE_normalizeM2(normalizedCounter, tableLog, count, total, maxSymbolValue, lowProbCount);
494
477
  if (FSE_isError(errorCode)) return errorCode;
495
478
  }
496
479
  else normalizedCounter[largest] += (short)stillToDistribute;
@@ -645,9 +628,7 @@ size_t FSE_compress_usingCTable (void* dst, size_t dstSize,
645
628
 
646
629
  size_t FSE_compressBound(size_t size) { return FSE_COMPRESSBOUND(size); }
647
630
 
648
- #define CHECK_V_F(e, f) size_t const e = f; if (ERR_isError(e)) return e
649
- #define CHECK_F(f) { CHECK_V_F(_var_err__, f); }
650
-
631
+ #ifndef ZSTD_NO_UNUSED_FUNCTIONS
651
632
  /* FSE_compress_wksp() :
652
633
  * Same as FSE_compress2(), but using an externally allocated scratch buffer (`workSpace`).
653
634
  * `wkspSize` size must be `(1<<tableLog)`.
@@ -666,7 +647,7 @@ size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t src
666
647
  size_t const scratchBufferSize = wkspSize - (CTableSize * sizeof(FSE_CTable));
667
648
 
668
649
  /* init conditions */
669
- if (wkspSize < FSE_WKSP_SIZE_U32(tableLog, maxSymbolValue)) return ERROR(tableLog_tooLarge);
650
+ if (wkspSize < FSE_COMPRESS_WKSP_SIZE_U32(tableLog, maxSymbolValue)) return ERROR(tableLog_tooLarge);
670
651
  if (srcSize <= 1) return 0; /* Not compressible */
671
652
  if (!maxSymbolValue) maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
672
653
  if (!tableLog) tableLog = FSE_DEFAULT_TABLELOG;
@@ -679,7 +660,7 @@ size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t src
679
660
  }
680
661
 
681
662
  tableLog = FSE_optimalTableLog(tableLog, srcSize, maxSymbolValue);
682
- CHECK_F( FSE_normalizeCount(norm, tableLog, count, srcSize, maxSymbolValue) );
663
+ CHECK_F( FSE_normalizeCount(norm, tableLog, count, srcSize, maxSymbolValue, /* useLowProbCount */ srcSize >= 2048) );
683
664
 
684
665
  /* Write table description header */
685
666
  { CHECK_V_F(nc_err, FSE_writeNCount(op, oend-op, norm, maxSymbolValue, tableLog) );
@@ -701,13 +682,16 @@ size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t src
701
682
 
702
683
  typedef struct {
703
684
  FSE_CTable CTable_max[FSE_CTABLE_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)];
704
- BYTE scratchBuffer[1 << FSE_MAX_TABLELOG];
685
+ union {
686
+ U32 hist_wksp[HIST_WKSP_SIZE_U32];
687
+ BYTE scratchBuffer[1 << FSE_MAX_TABLELOG];
688
+ } workspace;
705
689
  } fseWkspMax_t;
706
690
 
707
691
  size_t FSE_compress2 (void* dst, size_t dstCapacity, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog)
708
692
  {
709
693
  fseWkspMax_t scratchBuffer;
710
- DEBUG_STATIC_ASSERT(sizeof(scratchBuffer) >= FSE_WKSP_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)); /* compilation failures here means scratchBuffer is not large enough */
694
+ DEBUG_STATIC_ASSERT(sizeof(scratchBuffer) >= FSE_COMPRESS_WKSP_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)); /* compilation failures here means scratchBuffer is not large enough */
711
695
  if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);
712
696
  return FSE_compress_wksp(dst, dstCapacity, src, srcSize, maxSymbolValue, tableLog, &scratchBuffer, sizeof(scratchBuffer));
713
697
  }
@@ -716,6 +700,6 @@ size_t FSE_compress (void* dst, size_t dstCapacity, const void* src, size_t srcS
716
700
  {
717
701
  return FSE_compress2(dst, dstCapacity, src, srcSize, FSE_MAX_SYMBOL_VALUE, FSE_DEFAULT_TABLELOG);
718
702
  }
719
-
703
+ #endif
720
704
 
721
705
  #endif /* FSE_COMMONDEFS_ONLY */
@@ -1,42 +1,22 @@
1
1
  /* ******************************************************************
2
- hist : Histogram functions
3
- part of Finite State Entropy project
4
- Copyright (C) 2013-present, Yann Collet.
5
-
6
- BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
7
-
8
- Redistribution and use in source and binary forms, with or without
9
- modification, are permitted provided that the following conditions are
10
- met:
11
-
12
- * Redistributions of source code must retain the above copyright
13
- notice, this list of conditions and the following disclaimer.
14
- * Redistributions in binary form must reproduce the above
15
- copyright notice, this list of conditions and the following disclaimer
16
- in the documentation and/or other materials provided with the
17
- distribution.
18
-
19
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22
- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23
- OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24
- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25
- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26
- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27
- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29
- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
-
31
- You can contact the author at :
32
- - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
33
- - Public forum : https://groups.google.com/forum/#!forum/lz4c
2
+ * hist : Histogram functions
3
+ * part of Finite State Entropy project
4
+ * Copyright (c) 2013-2021, Yann Collet, Facebook, Inc.
5
+ *
6
+ * You can contact the author at :
7
+ * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
8
+ * - Public forum : https://groups.google.com/forum/#!forum/lz4c
9
+ *
10
+ * This source code is licensed under both the BSD-style license (found in the
11
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
12
+ * in the COPYING file in the root directory of this source tree).
13
+ * You may select, at your option, one of the above-listed licenses.
34
14
  ****************************************************************** */
35
15
 
36
16
  /* --- dependencies --- */
37
- #include "mem.h" /* U32, BYTE, etc. */
38
- #include "debug.h" /* assert, DEBUGLOG */
39
- #include "error_private.h" /* ERROR */
17
+ #include "../common/mem.h" /* U32, BYTE, etc. */
18
+ #include "../common/debug.h" /* assert, DEBUGLOG */
19
+ #include "../common/error_private.h" /* ERROR */
40
20
  #include "hist.h"
41
21
 
42
22
 
@@ -54,7 +34,7 @@ unsigned HIST_count_simple(unsigned* count, unsigned* maxSymbolValuePtr,
54
34
  unsigned maxSymbolValue = *maxSymbolValuePtr;
55
35
  unsigned largestCount=0;
56
36
 
57
- memset(count, 0, (maxSymbolValue+1) * sizeof(*count));
37
+ ZSTD_memset(count, 0, (maxSymbolValue+1) * sizeof(*count));
58
38
  if (srcSize==0) { *maxSymbolValuePtr = 0; return 0; }
59
39
 
60
40
  while (ip<end) {
@@ -80,9 +60,9 @@ typedef enum { trustInput, checkMaxSymbolValue } HIST_checkInput_e;
80
60
  * this design makes better use of OoO cpus,
81
61
  * and is noticeably faster when some values are heavily repeated.
82
62
  * But it needs some additional workspace for intermediate tables.
83
- * `workSpace` size must be a table of size >= HIST_WKSP_SIZE_U32.
63
+ * `workSpace` must be a U32 table of size >= HIST_WKSP_SIZE_U32.
84
64
  * @return : largest histogram frequency,
85
- * or an error code (notably when histogram would be larger than *maxSymbolValuePtr). */
65
+ * or an error code (notably when histogram's alphabet is larger than *maxSymbolValuePtr) */
86
66
  static size_t HIST_count_parallel_wksp(
87
67
  unsigned* count, unsigned* maxSymbolValuePtr,
88
68
  const void* source, size_t sourceSize,
@@ -91,22 +71,21 @@ static size_t HIST_count_parallel_wksp(
91
71
  {
92
72
  const BYTE* ip = (const BYTE*)source;
93
73
  const BYTE* const iend = ip+sourceSize;
94
- unsigned maxSymbolValue = *maxSymbolValuePtr;
74
+ size_t const countSize = (*maxSymbolValuePtr + 1) * sizeof(*count);
95
75
  unsigned max=0;
96
76
  U32* const Counting1 = workSpace;
97
77
  U32* const Counting2 = Counting1 + 256;
98
78
  U32* const Counting3 = Counting2 + 256;
99
79
  U32* const Counting4 = Counting3 + 256;
100
80
 
101
- memset(workSpace, 0, 4*256*sizeof(unsigned));
102
-
103
81
  /* safety checks */
82
+ assert(*maxSymbolValuePtr <= 255);
104
83
  if (!sourceSize) {
105
- memset(count, 0, maxSymbolValue + 1);
84
+ ZSTD_memset(count, 0, countSize);
106
85
  *maxSymbolValuePtr = 0;
107
86
  return 0;
108
87
  }
109
- if (!maxSymbolValue) maxSymbolValue = 255; /* 0 == default */
88
+ ZSTD_memset(workSpace, 0, 4*256*sizeof(unsigned));
110
89
 
111
90
  /* by stripes of 16 bytes */
112
91
  { U32 cached = MEM_read32(ip); ip += 4;
@@ -138,21 +117,18 @@ static size_t HIST_count_parallel_wksp(
138
117
  /* finish last symbols */
139
118
  while (ip<iend) Counting1[*ip++]++;
140
119
 
141
- if (check) { /* verify stats will fit into destination table */
142
- U32 s; for (s=255; s>maxSymbolValue; s--) {
143
- Counting1[s] += Counting2[s] + Counting3[s] + Counting4[s];
144
- if (Counting1[s]) return ERROR(maxSymbolValue_tooSmall);
145
- } }
146
-
147
120
  { U32 s;
148
- if (maxSymbolValue > 255) maxSymbolValue = 255;
149
- for (s=0; s<=maxSymbolValue; s++) {
150
- count[s] = Counting1[s] + Counting2[s] + Counting3[s] + Counting4[s];
151
- if (count[s] > max) max = count[s];
121
+ for (s=0; s<256; s++) {
122
+ Counting1[s] += Counting2[s] + Counting3[s] + Counting4[s];
123
+ if (Counting1[s] > max) max = Counting1[s];
152
124
  } }
153
125
 
154
- while (!count[maxSymbolValue]) maxSymbolValue--;
155
- *maxSymbolValuePtr = maxSymbolValue;
126
+ { unsigned maxSymbolValue = 255;
127
+ while (!Counting1[maxSymbolValue]) maxSymbolValue--;
128
+ if (check && maxSymbolValue > *maxSymbolValuePtr) return ERROR(maxSymbolValue_tooSmall);
129
+ *maxSymbolValuePtr = maxSymbolValue;
130
+ ZSTD_memmove(count, Counting1, countSize); /* in case count & Counting1 are overlapping */
131
+ }
156
132
  return (size_t)max;
157
133
  }
158
134
 
@@ -172,14 +148,6 @@ size_t HIST_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
172
148
  return HIST_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, trustInput, (U32*)workSpace);
173
149
  }
174
150
 
175
- /* fast variant (unsafe : won't check if src contains values beyond count[] limit) */
176
- size_t HIST_countFast(unsigned* count, unsigned* maxSymbolValuePtr,
177
- const void* source, size_t sourceSize)
178
- {
179
- unsigned tmpCounters[HIST_WKSP_SIZE_U32];
180
- return HIST_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, tmpCounters, sizeof(tmpCounters));
181
- }
182
-
183
151
  /* HIST_count_wksp() :
184
152
  * Same as HIST_count(), but using an externally provided scratch buffer.
185
153
  * `workSpace` size must be table of >= HIST_WKSP_SIZE_U32 unsigned */
@@ -195,9 +163,19 @@ size_t HIST_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
195
163
  return HIST_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, workSpace, workSpaceSize);
196
164
  }
197
165
 
166
+ #ifndef ZSTD_NO_UNUSED_FUNCTIONS
167
+ /* fast variant (unsafe : won't check if src contains values beyond count[] limit) */
168
+ size_t HIST_countFast(unsigned* count, unsigned* maxSymbolValuePtr,
169
+ const void* source, size_t sourceSize)
170
+ {
171
+ unsigned tmpCounters[HIST_WKSP_SIZE_U32];
172
+ return HIST_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, tmpCounters, sizeof(tmpCounters));
173
+ }
174
+
198
175
  size_t HIST_count(unsigned* count, unsigned* maxSymbolValuePtr,
199
176
  const void* src, size_t srcSize)
200
177
  {
201
178
  unsigned tmpCounters[HIST_WKSP_SIZE_U32];
202
179
  return HIST_count_wksp(count, maxSymbolValuePtr, src, srcSize, tmpCounters, sizeof(tmpCounters));
203
180
  }
181
+ #endif