zstd-ruby 1.4.5.0 → 1.4.9.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (93) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/ruby.yml +35 -0
  3. data/README.md +2 -2
  4. data/ext/zstdruby/libzstd/Makefile +237 -138
  5. data/ext/zstdruby/libzstd/README.md +28 -0
  6. data/ext/zstdruby/libzstd/common/bitstream.h +25 -16
  7. data/ext/zstdruby/libzstd/common/compiler.h +118 -4
  8. data/ext/zstdruby/libzstd/common/cpu.h +1 -3
  9. data/ext/zstdruby/libzstd/common/debug.c +1 -1
  10. data/ext/zstdruby/libzstd/common/debug.h +12 -19
  11. data/ext/zstdruby/libzstd/common/entropy_common.c +189 -43
  12. data/ext/zstdruby/libzstd/common/error_private.c +2 -1
  13. data/ext/zstdruby/libzstd/common/error_private.h +2 -2
  14. data/ext/zstdruby/libzstd/common/fse.h +40 -12
  15. data/ext/zstdruby/libzstd/common/fse_decompress.c +124 -17
  16. data/ext/zstdruby/libzstd/common/huf.h +27 -6
  17. data/ext/zstdruby/libzstd/common/mem.h +67 -94
  18. data/ext/zstdruby/libzstd/common/pool.c +23 -17
  19. data/ext/zstdruby/libzstd/common/pool.h +2 -2
  20. data/ext/zstdruby/libzstd/common/threading.c +6 -5
  21. data/ext/zstdruby/libzstd/common/xxhash.c +19 -57
  22. data/ext/zstdruby/libzstd/common/xxhash.h +2 -2
  23. data/ext/zstdruby/libzstd/common/zstd_common.c +10 -10
  24. data/ext/zstdruby/libzstd/common/zstd_deps.h +111 -0
  25. data/ext/zstdruby/libzstd/common/zstd_errors.h +2 -1
  26. data/ext/zstdruby/libzstd/common/zstd_internal.h +90 -59
  27. data/ext/zstdruby/libzstd/common/zstd_trace.c +42 -0
  28. data/ext/zstdruby/libzstd/common/zstd_trace.h +152 -0
  29. data/ext/zstdruby/libzstd/compress/fse_compress.c +31 -24
  30. data/ext/zstdruby/libzstd/compress/hist.c +27 -29
  31. data/ext/zstdruby/libzstd/compress/hist.h +2 -2
  32. data/ext/zstdruby/libzstd/compress/huf_compress.c +217 -101
  33. data/ext/zstdruby/libzstd/compress/zstd_compress.c +1495 -478
  34. data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +143 -44
  35. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.c +7 -7
  36. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.h +1 -1
  37. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.c +18 -4
  38. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.h +1 -1
  39. data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.c +25 -21
  40. data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.h +1 -1
  41. data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +62 -26
  42. data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +23 -23
  43. data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +1 -1
  44. data/ext/zstdruby/libzstd/compress/zstd_fast.c +21 -21
  45. data/ext/zstdruby/libzstd/compress/zstd_fast.h +1 -1
  46. data/ext/zstdruby/libzstd/compress/zstd_lazy.c +352 -78
  47. data/ext/zstdruby/libzstd/compress/zstd_lazy.h +21 -1
  48. data/ext/zstdruby/libzstd/compress/zstd_ldm.c +276 -209
  49. data/ext/zstdruby/libzstd/compress/zstd_ldm.h +8 -2
  50. data/ext/zstdruby/libzstd/compress/zstd_ldm_geartab.h +103 -0
  51. data/ext/zstdruby/libzstd/compress/zstd_opt.c +191 -46
  52. data/ext/zstdruby/libzstd/compress/zstd_opt.h +1 -1
  53. data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +79 -410
  54. data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +27 -109
  55. data/ext/zstdruby/libzstd/decompress/huf_decompress.c +303 -201
  56. data/ext/zstdruby/libzstd/decompress/zstd_ddict.c +9 -9
  57. data/ext/zstdruby/libzstd/decompress/zstd_ddict.h +2 -2
  58. data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +370 -87
  59. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +153 -45
  60. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +6 -3
  61. data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +28 -11
  62. data/ext/zstdruby/libzstd/deprecated/zbuff.h +1 -1
  63. data/ext/zstdruby/libzstd/deprecated/zbuff_common.c +1 -1
  64. data/ext/zstdruby/libzstd/deprecated/zbuff_compress.c +1 -1
  65. data/ext/zstdruby/libzstd/deprecated/zbuff_decompress.c +1 -1
  66. data/ext/zstdruby/libzstd/dictBuilder/cover.c +40 -31
  67. data/ext/zstdruby/libzstd/dictBuilder/cover.h +2 -2
  68. data/ext/zstdruby/libzstd/dictBuilder/divsufsort.c +1 -1
  69. data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +26 -25
  70. data/ext/zstdruby/libzstd/dictBuilder/zdict.c +22 -24
  71. data/ext/zstdruby/libzstd/dictBuilder/zdict.h +5 -4
  72. data/ext/zstdruby/libzstd/dll/example/Makefile +1 -1
  73. data/ext/zstdruby/libzstd/dll/example/README.md +16 -22
  74. data/ext/zstdruby/libzstd/legacy/zstd_legacy.h +1 -1
  75. data/ext/zstdruby/libzstd/legacy/zstd_v01.c +6 -2
  76. data/ext/zstdruby/libzstd/legacy/zstd_v01.h +1 -1
  77. data/ext/zstdruby/libzstd/legacy/zstd_v02.c +6 -2
  78. data/ext/zstdruby/libzstd/legacy/zstd_v02.h +1 -1
  79. data/ext/zstdruby/libzstd/legacy/zstd_v03.c +6 -2
  80. data/ext/zstdruby/libzstd/legacy/zstd_v03.h +1 -1
  81. data/ext/zstdruby/libzstd/legacy/zstd_v04.c +7 -3
  82. data/ext/zstdruby/libzstd/legacy/zstd_v04.h +1 -1
  83. data/ext/zstdruby/libzstd/legacy/zstd_v05.c +10 -6
  84. data/ext/zstdruby/libzstd/legacy/zstd_v05.h +1 -1
  85. data/ext/zstdruby/libzstd/legacy/zstd_v06.c +10 -6
  86. data/ext/zstdruby/libzstd/legacy/zstd_v06.h +1 -1
  87. data/ext/zstdruby/libzstd/legacy/zstd_v07.c +10 -6
  88. data/ext/zstdruby/libzstd/legacy/zstd_v07.h +1 -1
  89. data/ext/zstdruby/libzstd/libzstd.pc.in +3 -3
  90. data/ext/zstdruby/libzstd/zstd.h +414 -54
  91. data/lib/zstd-ruby/version.rb +1 -1
  92. metadata +7 -3
  93. data/.travis.yml +0 -14
@@ -0,0 +1,42 @@
1
+ /*
2
+ * Copyright (c) 2016-2021, Facebook, Inc.
3
+ * All rights reserved.
4
+ *
5
+ * This source code is licensed under both the BSD-style license (found in the
6
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7
+ * in the COPYING file in the root directory of this source tree).
8
+ * You may select, at your option, one of the above-listed licenses.
9
+ */
10
+
11
+ #include "zstd_trace.h"
12
+ #include "../zstd.h"
13
+
14
+ #include "compiler.h"
15
+
16
+ #if ZSTD_TRACE && ZSTD_HAVE_WEAK_SYMBOLS
17
+
18
+ ZSTD_WEAK_ATTR ZSTD_TraceCtx ZSTD_trace_compress_begin(ZSTD_CCtx const* cctx)
19
+ {
20
+ (void)cctx;
21
+ return 0;
22
+ }
23
+
24
+ ZSTD_WEAK_ATTR void ZSTD_trace_compress_end(ZSTD_TraceCtx ctx, ZSTD_Trace const* trace)
25
+ {
26
+ (void)ctx;
27
+ (void)trace;
28
+ }
29
+
30
+ ZSTD_WEAK_ATTR ZSTD_TraceCtx ZSTD_trace_decompress_begin(ZSTD_DCtx const* dctx)
31
+ {
32
+ (void)dctx;
33
+ return 0;
34
+ }
35
+
36
+ ZSTD_WEAK_ATTR void ZSTD_trace_decompress_end(ZSTD_TraceCtx ctx, ZSTD_Trace const* trace)
37
+ {
38
+ (void)ctx;
39
+ (void)trace;
40
+ }
41
+
42
+ #endif
@@ -0,0 +1,152 @@
1
+ /*
2
+ * Copyright (c) 2016-2021, Facebook, Inc.
3
+ * All rights reserved.
4
+ *
5
+ * This source code is licensed under both the BSD-style license (found in the
6
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7
+ * in the COPYING file in the root directory of this source tree).
8
+ * You may select, at your option, one of the above-listed licenses.
9
+ */
10
+
11
+ #ifndef ZSTD_TRACE_H
12
+ #define ZSTD_TRACE_H
13
+
14
+ #if defined (__cplusplus)
15
+ extern "C" {
16
+ #endif
17
+
18
+ #include <stddef.h>
19
+
20
+ /* weak symbol support */
21
+ #if !defined(ZSTD_HAVE_WEAK_SYMBOLS) && defined(__GNUC__) && \
22
+ !defined(__APPLE__) && !defined(_WIN32) && !defined(__MINGW32__) && \
23
+ !defined(__CYGWIN__)
24
+ # define ZSTD_HAVE_WEAK_SYMBOLS 1
25
+ #else
26
+ # define ZSTD_HAVE_WEAK_SYMBOLS 0
27
+ #endif
28
+ #if ZSTD_HAVE_WEAK_SYMBOLS
29
+ # define ZSTD_WEAK_ATTR __attribute__((__weak__))
30
+ #else
31
+ # define ZSTD_WEAK_ATTR
32
+ #endif
33
+
34
+ /* Only enable tracing when weak symbols are available. */
35
+ #ifndef ZSTD_TRACE
36
+ # define ZSTD_TRACE ZSTD_HAVE_WEAK_SYMBOLS
37
+ #endif
38
+
39
+ #if ZSTD_TRACE
40
+
41
+ struct ZSTD_CCtx_s;
42
+ struct ZSTD_DCtx_s;
43
+ struct ZSTD_CCtx_params_s;
44
+
45
+ typedef struct {
46
+ /**
47
+ * ZSTD_VERSION_NUMBER
48
+ *
49
+ * This is guaranteed to be the first member of ZSTD_trace.
50
+ * Otherwise, this struct is not stable between versions. If
51
+ * the version number does not match your expectation, you
52
+ * should not interpret the rest of the struct.
53
+ */
54
+ unsigned version;
55
+ /**
56
+ * Non-zero if streaming (de)compression is used.
57
+ */
58
+ unsigned streaming;
59
+ /**
60
+ * The dictionary ID.
61
+ */
62
+ unsigned dictionaryID;
63
+ /**
64
+ * Is the dictionary cold?
65
+ * Only set on decompression.
66
+ */
67
+ unsigned dictionaryIsCold;
68
+ /**
69
+ * The dictionary size or zero if no dictionary.
70
+ */
71
+ size_t dictionarySize;
72
+ /**
73
+ * The uncompressed size of the data.
74
+ */
75
+ size_t uncompressedSize;
76
+ /**
77
+ * The compressed size of the data.
78
+ */
79
+ size_t compressedSize;
80
+ /**
81
+ * The fully resolved CCtx parameters (NULL on decompression).
82
+ */
83
+ struct ZSTD_CCtx_params_s const* params;
84
+ /**
85
+ * The ZSTD_CCtx pointer (NULL on decompression).
86
+ */
87
+ struct ZSTD_CCtx_s const* cctx;
88
+ /**
89
+ * The ZSTD_DCtx pointer (NULL on compression).
90
+ */
91
+ struct ZSTD_DCtx_s const* dctx;
92
+ } ZSTD_Trace;
93
+
94
+ /**
95
+ * A tracing context. It must be 0 when tracing is disabled.
96
+ * Otherwise, any non-zero value returned by a tracing begin()
97
+ * function is presented to any subsequent calls to end().
98
+ *
99
+ * Any non-zero value is treated as tracing is enabled and not
100
+ * interpreted by the library.
101
+ *
102
+ * Two possible uses are:
103
+ * * A timestamp for when the begin() function was called.
104
+ * * A unique key identifying the (de)compression, like the
105
+ * address of the [dc]ctx pointer if you need to track
106
+ * more information than just a timestamp.
107
+ */
108
+ typedef unsigned long long ZSTD_TraceCtx;
109
+
110
+ /**
111
+ * Trace the beginning of a compression call.
112
+ * @param cctx The dctx pointer for the compression.
113
+ * It can be used as a key to map begin() to end().
114
+ * @returns Non-zero if tracing is enabled. The return value is
115
+ * passed to ZSTD_trace_compress_end().
116
+ */
117
+ ZSTD_TraceCtx ZSTD_trace_compress_begin(struct ZSTD_CCtx_s const* cctx);
118
+
119
+ /**
120
+ * Trace the end of a compression call.
121
+ * @param ctx The return value of ZSTD_trace_compress_begin().
122
+ * @param trace The zstd tracing info.
123
+ */
124
+ void ZSTD_trace_compress_end(
125
+ ZSTD_TraceCtx ctx,
126
+ ZSTD_Trace const* trace);
127
+
128
+ /**
129
+ * Trace the beginning of a decompression call.
130
+ * @param dctx The dctx pointer for the decompression.
131
+ * It can be used as a key to map begin() to end().
132
+ * @returns Non-zero if tracing is enabled. The return value is
133
+ * passed to ZSTD_trace_compress_end().
134
+ */
135
+ ZSTD_TraceCtx ZSTD_trace_decompress_begin(struct ZSTD_DCtx_s const* dctx);
136
+
137
+ /**
138
+ * Trace the end of a decompression call.
139
+ * @param ctx The return value of ZSTD_trace_decompress_begin().
140
+ * @param trace The zstd tracing info.
141
+ */
142
+ void ZSTD_trace_decompress_end(
143
+ ZSTD_TraceCtx ctx,
144
+ ZSTD_Trace const* trace);
145
+
146
+ #endif /* ZSTD_TRACE */
147
+
148
+ #if defined (__cplusplus)
149
+ }
150
+ #endif
151
+
152
+ #endif /* ZSTD_TRACE_H */
@@ -1,6 +1,6 @@
1
1
  /* ******************************************************************
2
2
  * FSE : Finite State Entropy encoder
3
- * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
3
+ * Copyright (c) 2013-2021, Yann Collet, Facebook, Inc.
4
4
  *
5
5
  * You can contact the author at :
6
6
  * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
@@ -15,8 +15,6 @@
15
15
  /* **************************************************************
16
16
  * Includes
17
17
  ****************************************************************/
18
- #include <stdlib.h> /* malloc, free, qsort */
19
- #include <string.h> /* memcpy, memset */
20
18
  #include "../common/compiler.h"
21
19
  #include "../common/mem.h" /* U32, U16, etc. */
22
20
  #include "../common/debug.h" /* assert, DEBUGLOG */
@@ -25,6 +23,9 @@
25
23
  #define FSE_STATIC_LINKING_ONLY
26
24
  #include "../common/fse.h"
27
25
  #include "../common/error_private.h"
26
+ #define ZSTD_DEPS_NEED_MALLOC
27
+ #define ZSTD_DEPS_NEED_MATH64
28
+ #include "../common/zstd_deps.h" /* ZSTD_malloc, ZSTD_free, ZSTD_memcpy, ZSTD_memset */
28
29
 
29
30
 
30
31
  /* **************************************************************
@@ -74,13 +75,15 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct,
74
75
  void* const FSCT = ((U32*)ptr) + 1 /* header */ + (tableLog ? tableSize>>1 : 1) ;
75
76
  FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT);
76
77
  U32 const step = FSE_TABLESTEP(tableSize);
77
- U32 cumul[FSE_MAX_SYMBOL_VALUE+2];
78
78
 
79
- FSE_FUNCTION_TYPE* const tableSymbol = (FSE_FUNCTION_TYPE*)workSpace;
79
+ U32* cumul = (U32*)workSpace;
80
+ FSE_FUNCTION_TYPE* tableSymbol = (FSE_FUNCTION_TYPE*)(cumul + (maxSymbolValue + 2));
81
+
80
82
  U32 highThreshold = tableSize-1;
81
83
 
84
+ if ((size_t)workSpace & 3) return ERROR(GENERIC); /* Must be 4 byte aligned */
85
+ if (FSE_BUILD_CTABLE_WORKSPACE_SIZE(maxSymbolValue, tableLog) > wkspSize) return ERROR(tableLog_tooLarge);
82
86
  /* CTable header */
83
- if (((size_t)1 << tableLog) * sizeof(FSE_FUNCTION_TYPE) > wkspSize) return ERROR(tableLog_tooLarge);
84
87
  tableU16[-2] = (U16) tableLog;
85
88
  tableU16[-1] = (U16) maxSymbolValue;
86
89
  assert(tableLog < 16); /* required for threshold strategy to work */
@@ -89,7 +92,7 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct,
89
92
  * http://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */
90
93
 
91
94
  #ifdef __clang_analyzer__
92
- memset(tableSymbol, 0, sizeof(*tableSymbol) * tableSize); /* useless initialization, just to keep scan-build happy */
95
+ ZSTD_memset(tableSymbol, 0, sizeof(*tableSymbol) * tableSize); /* useless initialization, just to keep scan-build happy */
93
96
  #endif
94
97
 
95
98
  /* symbol start positions */
@@ -168,12 +171,13 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct,
168
171
  return 0;
169
172
  }
170
173
 
171
-
174
+ #ifndef ZSTD_NO_UNUSED_FUNCTIONS
172
175
  size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
173
176
  {
174
177
  FSE_FUNCTION_TYPE tableSymbol[FSE_MAX_TABLESIZE]; /* memset() is not necessary, even if static analyzer complain about it */
175
178
  return FSE_buildCTable_wksp(ct, normalizedCounter, maxSymbolValue, tableLog, tableSymbol, sizeof(tableSymbol));
176
179
  }
180
+ #endif
177
181
 
178
182
 
179
183
 
@@ -307,10 +311,10 @@ FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog)
307
311
  size_t size;
308
312
  if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX;
309
313
  size = FSE_CTABLE_SIZE_U32 (tableLog, maxSymbolValue) * sizeof(U32);
310
- return (FSE_CTable*)malloc(size);
314
+ return (FSE_CTable*)ZSTD_malloc(size);
311
315
  }
312
316
 
313
- void FSE_freeCTable (FSE_CTable* ct) { free(ct); }
317
+ void FSE_freeCTable (FSE_CTable* ct) { ZSTD_free(ct); }
314
318
 
315
319
  /* provides the minimum logSize to safely represent a distribution */
316
320
  static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue)
@@ -341,11 +345,10 @@ unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxS
341
345
  return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 2);
342
346
  }
343
347
 
344
-
345
348
  /* Secondary normalization method.
346
349
  To be used when primary method fails. */
347
350
 
348
- static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count, size_t total, U32 maxSymbolValue)
351
+ static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count, size_t total, U32 maxSymbolValue, short lowProbCount)
349
352
  {
350
353
  short const NOT_YET_ASSIGNED = -2;
351
354
  U32 s;
@@ -362,7 +365,7 @@ static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count,
362
365
  continue;
363
366
  }
364
367
  if (count[s] <= lowThreshold) {
365
- norm[s] = -1;
368
+ norm[s] = lowProbCount;
366
369
  distributed++;
367
370
  total -= count[s];
368
371
  continue;
@@ -414,7 +417,7 @@ static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count,
414
417
 
415
418
  { U64 const vStepLog = 62 - tableLog;
416
419
  U64 const mid = (1ULL << (vStepLog-1)) - 1;
417
- U64 const rStep = ((((U64)1<<vStepLog) * ToDistribute) + mid) / total; /* scale on remaining */
420
+ U64 const rStep = ZSTD_div64((((U64)1<<vStepLog) * ToDistribute) + mid, (U32)total); /* scale on remaining */
418
421
  U64 tmpTotal = mid;
419
422
  for (s=0; s<=maxSymbolValue; s++) {
420
423
  if (norm[s]==NOT_YET_ASSIGNED) {
@@ -431,10 +434,9 @@ static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count,
431
434
  return 0;
432
435
  }
433
436
 
434
-
435
437
  size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog,
436
438
  const unsigned* count, size_t total,
437
- unsigned maxSymbolValue)
439
+ unsigned maxSymbolValue, unsigned useLowProbCount)
438
440
  {
439
441
  /* Sanity checks */
440
442
  if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG;
@@ -443,8 +445,9 @@ size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog,
443
445
  if (tableLog < FSE_minTableLog(total, maxSymbolValue)) return ERROR(GENERIC); /* Too small tableLog, compression potentially impossible */
444
446
 
445
447
  { static U32 const rtbTable[] = { 0, 473195, 504333, 520860, 550000, 700000, 750000, 830000 };
448
+ short const lowProbCount = useLowProbCount ? -1 : 1;
446
449
  U64 const scale = 62 - tableLog;
447
- U64 const step = ((U64)1<<62) / total; /* <== here, one division ! */
450
+ U64 const step = ZSTD_div64((U64)1<<62, (U32)total); /* <== here, one division ! */
448
451
  U64 const vStep = 1ULL<<(scale-20);
449
452
  int stillToDistribute = 1<<tableLog;
450
453
  unsigned s;
@@ -456,7 +459,7 @@ size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog,
456
459
  if (count[s] == total) return 0; /* rle special case */
457
460
  if (count[s] == 0) { normalizedCounter[s]=0; continue; }
458
461
  if (count[s] <= lowThreshold) {
459
- normalizedCounter[s] = -1;
462
+ normalizedCounter[s] = lowProbCount;
460
463
  stillToDistribute--;
461
464
  } else {
462
465
  short proba = (short)((count[s]*step) >> scale);
@@ -470,7 +473,7 @@ size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog,
470
473
  } }
471
474
  if (-stillToDistribute >= (normalizedCounter[largest] >> 1)) {
472
475
  /* corner case, need another normalization method */
473
- size_t const errorCode = FSE_normalizeM2(normalizedCounter, tableLog, count, total, maxSymbolValue);
476
+ size_t const errorCode = FSE_normalizeM2(normalizedCounter, tableLog, count, total, maxSymbolValue, lowProbCount);
474
477
  if (FSE_isError(errorCode)) return errorCode;
475
478
  }
476
479
  else normalizedCounter[largest] += (short)stillToDistribute;
@@ -625,6 +628,7 @@ size_t FSE_compress_usingCTable (void* dst, size_t dstSize,
625
628
 
626
629
  size_t FSE_compressBound(size_t size) { return FSE_COMPRESSBOUND(size); }
627
630
 
631
+ #ifndef ZSTD_NO_UNUSED_FUNCTIONS
628
632
  /* FSE_compress_wksp() :
629
633
  * Same as FSE_compress2(), but using an externally allocated scratch buffer (`workSpace`).
630
634
  * `wkspSize` size must be `(1<<tableLog)`.
@@ -643,7 +647,7 @@ size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t src
643
647
  size_t const scratchBufferSize = wkspSize - (CTableSize * sizeof(FSE_CTable));
644
648
 
645
649
  /* init conditions */
646
- if (wkspSize < FSE_WKSP_SIZE_U32(tableLog, maxSymbolValue)) return ERROR(tableLog_tooLarge);
650
+ if (wkspSize < FSE_COMPRESS_WKSP_SIZE_U32(tableLog, maxSymbolValue)) return ERROR(tableLog_tooLarge);
647
651
  if (srcSize <= 1) return 0; /* Not compressible */
648
652
  if (!maxSymbolValue) maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
649
653
  if (!tableLog) tableLog = FSE_DEFAULT_TABLELOG;
@@ -656,7 +660,7 @@ size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t src
656
660
  }
657
661
 
658
662
  tableLog = FSE_optimalTableLog(tableLog, srcSize, maxSymbolValue);
659
- CHECK_F( FSE_normalizeCount(norm, tableLog, count, srcSize, maxSymbolValue) );
663
+ CHECK_F( FSE_normalizeCount(norm, tableLog, count, srcSize, maxSymbolValue, /* useLowProbCount */ srcSize >= 2048) );
660
664
 
661
665
  /* Write table description header */
662
666
  { CHECK_V_F(nc_err, FSE_writeNCount(op, oend-op, norm, maxSymbolValue, tableLog) );
@@ -678,13 +682,16 @@ size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t src
678
682
 
679
683
  typedef struct {
680
684
  FSE_CTable CTable_max[FSE_CTABLE_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)];
681
- BYTE scratchBuffer[1 << FSE_MAX_TABLELOG];
685
+ union {
686
+ U32 hist_wksp[HIST_WKSP_SIZE_U32];
687
+ BYTE scratchBuffer[1 << FSE_MAX_TABLELOG];
688
+ } workspace;
682
689
  } fseWkspMax_t;
683
690
 
684
691
  size_t FSE_compress2 (void* dst, size_t dstCapacity, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog)
685
692
  {
686
693
  fseWkspMax_t scratchBuffer;
687
- DEBUG_STATIC_ASSERT(sizeof(scratchBuffer) >= FSE_WKSP_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)); /* compilation failures here means scratchBuffer is not large enough */
694
+ DEBUG_STATIC_ASSERT(sizeof(scratchBuffer) >= FSE_COMPRESS_WKSP_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)); /* compilation failures here means scratchBuffer is not large enough */
688
695
  if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);
689
696
  return FSE_compress_wksp(dst, dstCapacity, src, srcSize, maxSymbolValue, tableLog, &scratchBuffer, sizeof(scratchBuffer));
690
697
  }
@@ -693,6 +700,6 @@ size_t FSE_compress (void* dst, size_t dstCapacity, const void* src, size_t srcS
693
700
  {
694
701
  return FSE_compress2(dst, dstCapacity, src, srcSize, FSE_MAX_SYMBOL_VALUE, FSE_DEFAULT_TABLELOG);
695
702
  }
696
-
703
+ #endif
697
704
 
698
705
  #endif /* FSE_COMMONDEFS_ONLY */
@@ -1,7 +1,7 @@
1
1
  /* ******************************************************************
2
2
  * hist : Histogram functions
3
3
  * part of Finite State Entropy project
4
- * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
4
+ * Copyright (c) 2013-2021, Yann Collet, Facebook, Inc.
5
5
  *
6
6
  * You can contact the author at :
7
7
  * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
@@ -34,7 +34,7 @@ unsigned HIST_count_simple(unsigned* count, unsigned* maxSymbolValuePtr,
34
34
  unsigned maxSymbolValue = *maxSymbolValuePtr;
35
35
  unsigned largestCount=0;
36
36
 
37
- memset(count, 0, (maxSymbolValue+1) * sizeof(*count));
37
+ ZSTD_memset(count, 0, (maxSymbolValue+1) * sizeof(*count));
38
38
  if (srcSize==0) { *maxSymbolValuePtr = 0; return 0; }
39
39
 
40
40
  while (ip<end) {
@@ -60,9 +60,9 @@ typedef enum { trustInput, checkMaxSymbolValue } HIST_checkInput_e;
60
60
  * this design makes better use of OoO cpus,
61
61
  * and is noticeably faster when some values are heavily repeated.
62
62
  * But it needs some additional workspace for intermediate tables.
63
- * `workSpace` size must be a table of size >= HIST_WKSP_SIZE_U32.
63
+ * `workSpace` must be a U32 table of size >= HIST_WKSP_SIZE_U32.
64
64
  * @return : largest histogram frequency,
65
- * or an error code (notably when histogram would be larger than *maxSymbolValuePtr). */
65
+ * or an error code (notably when histogram's alphabet is larger than *maxSymbolValuePtr) */
66
66
  static size_t HIST_count_parallel_wksp(
67
67
  unsigned* count, unsigned* maxSymbolValuePtr,
68
68
  const void* source, size_t sourceSize,
@@ -71,22 +71,21 @@ static size_t HIST_count_parallel_wksp(
71
71
  {
72
72
  const BYTE* ip = (const BYTE*)source;
73
73
  const BYTE* const iend = ip+sourceSize;
74
- unsigned maxSymbolValue = *maxSymbolValuePtr;
74
+ size_t const countSize = (*maxSymbolValuePtr + 1) * sizeof(*count);
75
75
  unsigned max=0;
76
76
  U32* const Counting1 = workSpace;
77
77
  U32* const Counting2 = Counting1 + 256;
78
78
  U32* const Counting3 = Counting2 + 256;
79
79
  U32* const Counting4 = Counting3 + 256;
80
80
 
81
- memset(workSpace, 0, 4*256*sizeof(unsigned));
82
-
83
81
  /* safety checks */
82
+ assert(*maxSymbolValuePtr <= 255);
84
83
  if (!sourceSize) {
85
- memset(count, 0, maxSymbolValue + 1);
84
+ ZSTD_memset(count, 0, countSize);
86
85
  *maxSymbolValuePtr = 0;
87
86
  return 0;
88
87
  }
89
- if (!maxSymbolValue) maxSymbolValue = 255; /* 0 == default */
88
+ ZSTD_memset(workSpace, 0, 4*256*sizeof(unsigned));
90
89
 
91
90
  /* by stripes of 16 bytes */
92
91
  { U32 cached = MEM_read32(ip); ip += 4;
@@ -118,21 +117,18 @@ static size_t HIST_count_parallel_wksp(
118
117
  /* finish last symbols */
119
118
  while (ip<iend) Counting1[*ip++]++;
120
119
 
121
- if (check) { /* verify stats will fit into destination table */
122
- U32 s; for (s=255; s>maxSymbolValue; s--) {
123
- Counting1[s] += Counting2[s] + Counting3[s] + Counting4[s];
124
- if (Counting1[s]) return ERROR(maxSymbolValue_tooSmall);
125
- } }
126
-
127
120
  { U32 s;
128
- if (maxSymbolValue > 255) maxSymbolValue = 255;
129
- for (s=0; s<=maxSymbolValue; s++) {
130
- count[s] = Counting1[s] + Counting2[s] + Counting3[s] + Counting4[s];
131
- if (count[s] > max) max = count[s];
121
+ for (s=0; s<256; s++) {
122
+ Counting1[s] += Counting2[s] + Counting3[s] + Counting4[s];
123
+ if (Counting1[s] > max) max = Counting1[s];
132
124
  } }
133
125
 
134
- while (!count[maxSymbolValue]) maxSymbolValue--;
135
- *maxSymbolValuePtr = maxSymbolValue;
126
+ { unsigned maxSymbolValue = 255;
127
+ while (!Counting1[maxSymbolValue]) maxSymbolValue--;
128
+ if (check && maxSymbolValue > *maxSymbolValuePtr) return ERROR(maxSymbolValue_tooSmall);
129
+ *maxSymbolValuePtr = maxSymbolValue;
130
+ ZSTD_memmove(count, Counting1, countSize); /* in case count & Counting1 are overlapping */
131
+ }
136
132
  return (size_t)max;
137
133
  }
138
134
 
@@ -152,14 +148,6 @@ size_t HIST_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
152
148
  return HIST_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, trustInput, (U32*)workSpace);
153
149
  }
154
150
 
155
- /* fast variant (unsafe : won't check if src contains values beyond count[] limit) */
156
- size_t HIST_countFast(unsigned* count, unsigned* maxSymbolValuePtr,
157
- const void* source, size_t sourceSize)
158
- {
159
- unsigned tmpCounters[HIST_WKSP_SIZE_U32];
160
- return HIST_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, tmpCounters, sizeof(tmpCounters));
161
- }
162
-
163
151
  /* HIST_count_wksp() :
164
152
  * Same as HIST_count(), but using an externally provided scratch buffer.
165
153
  * `workSpace` size must be table of >= HIST_WKSP_SIZE_U32 unsigned */
@@ -175,9 +163,19 @@ size_t HIST_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
175
163
  return HIST_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, workSpace, workSpaceSize);
176
164
  }
177
165
 
166
+ #ifndef ZSTD_NO_UNUSED_FUNCTIONS
167
+ /* fast variant (unsafe : won't check if src contains values beyond count[] limit) */
168
+ size_t HIST_countFast(unsigned* count, unsigned* maxSymbolValuePtr,
169
+ const void* source, size_t sourceSize)
170
+ {
171
+ unsigned tmpCounters[HIST_WKSP_SIZE_U32];
172
+ return HIST_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, tmpCounters, sizeof(tmpCounters));
173
+ }
174
+
178
175
  size_t HIST_count(unsigned* count, unsigned* maxSymbolValuePtr,
179
176
  const void* src, size_t srcSize)
180
177
  {
181
178
  unsigned tmpCounters[HIST_WKSP_SIZE_U32];
182
179
  return HIST_count_wksp(count, maxSymbolValuePtr, src, srcSize, tmpCounters, sizeof(tmpCounters));
183
180
  }
181
+ #endif