zstd-ruby 1.3.4.0 → 1.3.5.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (43) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +1 -1
  3. data/ext/zstdruby/libzstd/Makefile +56 -10
  4. data/ext/zstdruby/libzstd/README.md +4 -0
  5. data/ext/zstdruby/libzstd/common/bitstream.h +6 -19
  6. data/ext/zstdruby/libzstd/common/compiler.h +3 -3
  7. data/ext/zstdruby/libzstd/common/cpu.h +1 -2
  8. data/ext/zstdruby/libzstd/common/debug.c +44 -0
  9. data/ext/zstdruby/libzstd/common/debug.h +123 -0
  10. data/ext/zstdruby/libzstd/common/entropy_common.c +16 -1
  11. data/ext/zstdruby/libzstd/common/fse.h +45 -41
  12. data/ext/zstdruby/libzstd/common/fse_decompress.c +1 -1
  13. data/ext/zstdruby/libzstd/common/huf.h +34 -27
  14. data/ext/zstdruby/libzstd/common/pool.c +89 -32
  15. data/ext/zstdruby/libzstd/common/pool.h +29 -19
  16. data/ext/zstdruby/libzstd/common/zstd_common.c +0 -5
  17. data/ext/zstdruby/libzstd/common/zstd_internal.h +3 -37
  18. data/ext/zstdruby/libzstd/compress/fse_compress.c +28 -163
  19. data/ext/zstdruby/libzstd/compress/hist.c +195 -0
  20. data/ext/zstdruby/libzstd/compress/hist.h +92 -0
  21. data/ext/zstdruby/libzstd/compress/huf_compress.c +14 -6
  22. data/ext/zstdruby/libzstd/compress/zstd_compress.c +798 -350
  23. data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +120 -34
  24. data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +247 -87
  25. data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +4 -1
  26. data/ext/zstdruby/libzstd/compress/zstd_fast.c +177 -56
  27. data/ext/zstdruby/libzstd/compress/zstd_fast.h +4 -1
  28. data/ext/zstdruby/libzstd/compress/zstd_lazy.c +331 -65
  29. data/ext/zstdruby/libzstd/compress/zstd_lazy.h +13 -0
  30. data/ext/zstdruby/libzstd/compress/zstd_ldm.c +15 -20
  31. data/ext/zstdruby/libzstd/compress/zstd_ldm.h +1 -2
  32. data/ext/zstdruby/libzstd/compress/zstd_opt.c +503 -300
  33. data/ext/zstdruby/libzstd/compress/zstd_opt.h +7 -0
  34. data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +122 -47
  35. data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +5 -5
  36. data/ext/zstdruby/libzstd/decompress/huf_decompress.c +325 -325
  37. data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +80 -43
  38. data/ext/zstdruby/libzstd/dictBuilder/cover.c +9 -2
  39. data/ext/zstdruby/libzstd/dictBuilder/zdict.c +5 -5
  40. data/ext/zstdruby/libzstd/legacy/zstd_v04.c +12 -61
  41. data/ext/zstdruby/libzstd/zstd.h +137 -69
  42. data/lib/zstd-ruby/version.rb +1 -1
  43. metadata +7 -3
@@ -21,6 +21,7 @@
21
21
  ***************************************/
22
22
  #include "compiler.h"
23
23
  #include "mem.h"
24
+ #include "debug.h" /* assert, DEBUGLOG, RAWLOG, g_debuglevel */
24
25
  #include "error_private.h"
25
26
  #define ZSTD_STATIC_LINKING_ONLY
26
27
  #include "zstd.h"
@@ -38,43 +39,8 @@
38
39
  extern "C" {
39
40
  #endif
40
41
 
41
-
42
- /*-*************************************
43
- * Debug
44
- ***************************************/
45
- #if defined(ZSTD_DEBUG) && (ZSTD_DEBUG>=1)
46
- # include <assert.h>
47
- #else
48
- # ifndef assert
49
- # define assert(condition) ((void)0)
50
- # endif
51
- #endif
52
-
53
- #define ZSTD_STATIC_ASSERT(c) { enum { ZSTD_static_assert = 1/(int)(!!(c)) }; }
54
-
55
- #if defined(ZSTD_DEBUG) && (ZSTD_DEBUG>=2)
56
- # include <stdio.h>
57
- extern int g_debuglog_enable;
58
- /* recommended values for ZSTD_DEBUG display levels :
59
- * 1 : no display, enables assert() only
60
- * 2 : reserved for currently active debug path
61
- * 3 : events once per object lifetime (CCtx, CDict, etc.)
62
- * 4 : events once per frame
63
- * 5 : events once per block
64
- * 6 : events once per sequence (*very* verbose) */
65
- # define RAWLOG(l, ...) { \
66
- if ((g_debuglog_enable) & (l<=ZSTD_DEBUG)) { \
67
- fprintf(stderr, __VA_ARGS__); \
68
- } }
69
- # define DEBUGLOG(l, ...) { \
70
- if ((g_debuglog_enable) & (l<=ZSTD_DEBUG)) { \
71
- fprintf(stderr, __FILE__ ": " __VA_ARGS__); \
72
- fprintf(stderr, " \n"); \
73
- } }
74
- #else
75
- # define RAWLOG(l, ...) {} /* disabled */
76
- # define DEBUGLOG(l, ...) {} /* disabled */
77
- #endif
42
+ /* ---- static assert (debug) --- */
43
+ #define ZSTD_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c)
78
44
 
79
45
 
80
46
  /*-*************************************
@@ -1,6 +1,6 @@
1
1
  /* ******************************************************************
2
2
  FSE : Finite State Entropy encoder
3
- Copyright (C) 2013-2015, Yann Collet.
3
+ Copyright (C) 2013-present, Yann Collet.
4
4
 
5
5
  BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6
6
 
@@ -37,9 +37,11 @@
37
37
  ****************************************************************/
38
38
  #include <stdlib.h> /* malloc, free, qsort */
39
39
  #include <string.h> /* memcpy, memset */
40
- #include <stdio.h> /* printf (debug) */
41
- #include "bitstream.h"
42
40
  #include "compiler.h"
41
+ #include "mem.h" /* U32, U16, etc. */
42
+ #include "debug.h" /* assert, DEBUGLOG */
43
+ #include "hist.h" /* HIST_count_wksp */
44
+ #include "bitstream.h"
43
45
  #define FSE_STATIC_LINKING_ONLY
44
46
  #include "fse.h"
45
47
  #include "error_private.h"
@@ -49,7 +51,6 @@
49
51
  * Error Management
50
52
  ****************************************************************/
51
53
  #define FSE_isError ERR_isError
52
- #define FSE_STATIC_ASSERT(c) { enum { FSE_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */
53
54
 
54
55
 
55
56
  /* **************************************************************
@@ -100,6 +101,7 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct, const short* normalizedCounter, unsi
100
101
  if (((size_t)1 << tableLog) * sizeof(FSE_FUNCTION_TYPE) > wkspSize) return ERROR(tableLog_tooLarge);
101
102
  tableU16[-2] = (U16) tableLog;
102
103
  tableU16[-1] = (U16) maxSymbolValue;
104
+ assert(tableLog < 16); /* required for the threshold strategy to work */
103
105
 
104
106
  /* For explanations on how to distribute symbol values over the table :
105
107
  * http://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */
@@ -143,7 +145,10 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct, const short* normalizedCounter, unsi
143
145
  for (s=0; s<=maxSymbolValue; s++) {
144
146
  switch (normalizedCounter[s])
145
147
  {
146
- case 0: break;
148
+ case 0:
149
+ /* filling nonetheless, for compatibility with FSE_getMaxNbBits() */
150
+ symbolTT[s].deltaNbBits = ((tableLog+1) << 16) - (1<<tableLog);
151
+ break;
147
152
 
148
153
  case -1:
149
154
  case 1:
@@ -160,6 +165,18 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct, const short* normalizedCounter, unsi
160
165
  total += normalizedCounter[s];
161
166
  } } } }
162
167
 
168
+ #if 0 /* debug : symbol costs */
169
+ DEBUGLOG(5, "\n --- table statistics : ");
170
+ { U32 symbol;
171
+ for (symbol=0; symbol<=maxSymbolValue; symbol++) {
172
+ DEBUGLOG(5, "%3u: w=%3i, maxBits=%u, fracBits=%.2f",
173
+ symbol, normalizedCounter[symbol],
174
+ FSE_getMaxNbBits(symbolTT, symbol),
175
+ (double)FSE_bitCost(symbolTT, tableLog, symbol, 8) / 256);
176
+ }
177
+ }
178
+ #endif
179
+
163
180
  return 0;
164
181
  }
165
182
 
@@ -174,8 +191,9 @@ size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned
174
191
 
175
192
  #ifndef FSE_COMMONDEFS_ONLY
176
193
 
194
+
177
195
  /*-**************************************************************
178
- * FSE NCount encoding-decoding
196
+ * FSE NCount encoding
179
197
  ****************************************************************/
180
198
  size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog)
181
199
  {
@@ -283,159 +301,6 @@ size_t FSE_writeNCount (void* buffer, size_t bufferSize, const short* normalized
283
301
  }
284
302
 
285
303
 
286
-
287
- /*-**************************************************************
288
- * Counting histogram
289
- ****************************************************************/
290
- /*! FSE_count_simple
291
- This function counts byte values within `src`, and store the histogram into table `count`.
292
- It doesn't use any additional memory.
293
- But this function is unsafe : it doesn't check that all values within `src` can fit into `count`.
294
- For this reason, prefer using a table `count` with 256 elements.
295
- @return : count of most numerous element.
296
- */
297
- size_t FSE_count_simple(unsigned* count, unsigned* maxSymbolValuePtr,
298
- const void* src, size_t srcSize)
299
- {
300
- const BYTE* ip = (const BYTE*)src;
301
- const BYTE* const end = ip + srcSize;
302
- unsigned maxSymbolValue = *maxSymbolValuePtr;
303
- unsigned max=0;
304
-
305
- memset(count, 0, (maxSymbolValue+1)*sizeof(*count));
306
- if (srcSize==0) { *maxSymbolValuePtr = 0; return 0; }
307
-
308
- while (ip<end) {
309
- assert(*ip <= maxSymbolValue);
310
- count[*ip++]++;
311
- }
312
-
313
- while (!count[maxSymbolValue]) maxSymbolValue--;
314
- *maxSymbolValuePtr = maxSymbolValue;
315
-
316
- { U32 s; for (s=0; s<=maxSymbolValue; s++) if (count[s] > max) max = count[s]; }
317
-
318
- return (size_t)max;
319
- }
320
-
321
-
322
- /* FSE_count_parallel_wksp() :
323
- * Same as FSE_count_parallel(), but using an externally provided scratch buffer.
324
- * `workSpace` size must be a minimum of `1024 * sizeof(unsigned)`.
325
- * @return : largest histogram frequency, or an error code (notably when histogram would be larger than *maxSymbolValuePtr). */
326
- static size_t FSE_count_parallel_wksp(
327
- unsigned* count, unsigned* maxSymbolValuePtr,
328
- const void* source, size_t sourceSize,
329
- unsigned checkMax, unsigned* const workSpace)
330
- {
331
- const BYTE* ip = (const BYTE*)source;
332
- const BYTE* const iend = ip+sourceSize;
333
- unsigned maxSymbolValue = *maxSymbolValuePtr;
334
- unsigned max=0;
335
- U32* const Counting1 = workSpace;
336
- U32* const Counting2 = Counting1 + 256;
337
- U32* const Counting3 = Counting2 + 256;
338
- U32* const Counting4 = Counting3 + 256;
339
-
340
- memset(workSpace, 0, 4*256*sizeof(unsigned));
341
-
342
- /* safety checks */
343
- if (!sourceSize) {
344
- memset(count, 0, maxSymbolValue + 1);
345
- *maxSymbolValuePtr = 0;
346
- return 0;
347
- }
348
- if (!maxSymbolValue) maxSymbolValue = 255; /* 0 == default */
349
-
350
- /* by stripes of 16 bytes */
351
- { U32 cached = MEM_read32(ip); ip += 4;
352
- while (ip < iend-15) {
353
- U32 c = cached; cached = MEM_read32(ip); ip += 4;
354
- Counting1[(BYTE) c ]++;
355
- Counting2[(BYTE)(c>>8) ]++;
356
- Counting3[(BYTE)(c>>16)]++;
357
- Counting4[ c>>24 ]++;
358
- c = cached; cached = MEM_read32(ip); ip += 4;
359
- Counting1[(BYTE) c ]++;
360
- Counting2[(BYTE)(c>>8) ]++;
361
- Counting3[(BYTE)(c>>16)]++;
362
- Counting4[ c>>24 ]++;
363
- c = cached; cached = MEM_read32(ip); ip += 4;
364
- Counting1[(BYTE) c ]++;
365
- Counting2[(BYTE)(c>>8) ]++;
366
- Counting3[(BYTE)(c>>16)]++;
367
- Counting4[ c>>24 ]++;
368
- c = cached; cached = MEM_read32(ip); ip += 4;
369
- Counting1[(BYTE) c ]++;
370
- Counting2[(BYTE)(c>>8) ]++;
371
- Counting3[(BYTE)(c>>16)]++;
372
- Counting4[ c>>24 ]++;
373
- }
374
- ip-=4;
375
- }
376
-
377
- /* finish last symbols */
378
- while (ip<iend) Counting1[*ip++]++;
379
-
380
- if (checkMax) { /* verify stats will fit into destination table */
381
- U32 s; for (s=255; s>maxSymbolValue; s--) {
382
- Counting1[s] += Counting2[s] + Counting3[s] + Counting4[s];
383
- if (Counting1[s]) return ERROR(maxSymbolValue_tooSmall);
384
- } }
385
-
386
- { U32 s;
387
- if (maxSymbolValue > 255) maxSymbolValue = 255;
388
- for (s=0; s<=maxSymbolValue; s++) {
389
- count[s] = Counting1[s] + Counting2[s] + Counting3[s] + Counting4[s];
390
- if (count[s] > max) max = count[s];
391
- } }
392
-
393
- while (!count[maxSymbolValue]) maxSymbolValue--;
394
- *maxSymbolValuePtr = maxSymbolValue;
395
- return (size_t)max;
396
- }
397
-
398
- /* FSE_countFast_wksp() :
399
- * Same as FSE_countFast(), but using an externally provided scratch buffer.
400
- * `workSpace` size must be table of >= `1024` unsigned */
401
- size_t FSE_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
402
- const void* source, size_t sourceSize,
403
- unsigned* workSpace)
404
- {
405
- if (sourceSize < 1500) /* heuristic threshold */
406
- return FSE_count_simple(count, maxSymbolValuePtr, source, sourceSize);
407
- return FSE_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, 0, workSpace);
408
- }
409
-
410
- /* fast variant (unsafe : won't check if src contains values beyond count[] limit) */
411
- size_t FSE_countFast(unsigned* count, unsigned* maxSymbolValuePtr,
412
- const void* source, size_t sourceSize)
413
- {
414
- unsigned tmpCounters[1024];
415
- return FSE_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, tmpCounters);
416
- }
417
-
418
- /* FSE_count_wksp() :
419
- * Same as FSE_count(), but using an externally provided scratch buffer.
420
- * `workSpace` size must be table of >= `1024` unsigned */
421
- size_t FSE_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
422
- const void* source, size_t sourceSize, unsigned* workSpace)
423
- {
424
- if (*maxSymbolValuePtr < 255)
425
- return FSE_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, 1, workSpace);
426
- *maxSymbolValuePtr = 255;
427
- return FSE_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, workSpace);
428
- }
429
-
430
- size_t FSE_count(unsigned* count, unsigned* maxSymbolValuePtr,
431
- const void* src, size_t srcSize)
432
- {
433
- unsigned tmpCounters[1024];
434
- return FSE_count_wksp(count, maxSymbolValuePtr, src, srcSize, tmpCounters);
435
- }
436
-
437
-
438
-
439
304
  /*-**************************************************************
440
305
  * FSE Compression Code
441
306
  ****************************************************************/
@@ -629,11 +494,11 @@ size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog,
629
494
  U32 s;
630
495
  U32 nTotal = 0;
631
496
  for (s=0; s<=maxSymbolValue; s++)
632
- printf("%3i: %4i \n", s, normalizedCounter[s]);
497
+ RAWLOG(2, "%3i: %4i \n", s, normalizedCounter[s]);
633
498
  for (s=0; s<=maxSymbolValue; s++)
634
499
  nTotal += abs(normalizedCounter[s]);
635
500
  if (nTotal != (1U<<tableLog))
636
- printf("Warning !!! Total == %u != %u !!!", nTotal, 1U<<tableLog);
501
+ RAWLOG(2, "Warning !!! Total == %u != %u !!!", nTotal, 1U<<tableLog);
637
502
  getchar();
638
503
  }
639
504
  #endif
@@ -800,7 +665,7 @@ size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t src
800
665
  if (!tableLog) tableLog = FSE_DEFAULT_TABLELOG;
801
666
 
802
667
  /* Scan input and build symbol stats */
803
- { CHECK_V_F(maxCount, FSE_count_wksp(count, &maxSymbolValue, src, srcSize, (unsigned*)scratchBuffer) );
668
+ { CHECK_V_F(maxCount, HIST_count_wksp(count, &maxSymbolValue, src, srcSize, (unsigned*)scratchBuffer) );
804
669
  if (maxCount == srcSize) return 1; /* only a single symbol in src : rle */
805
670
  if (maxCount == 1) return 0; /* each symbol present maximum once => not compressible */
806
671
  if (maxCount < (srcSize >> 7)) return 0; /* Heuristic : not compressible enough */
@@ -835,7 +700,7 @@ typedef struct {
835
700
  size_t FSE_compress2 (void* dst, size_t dstCapacity, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog)
836
701
  {
837
702
  fseWkspMax_t scratchBuffer;
838
- FSE_STATIC_ASSERT(sizeof(scratchBuffer) >= FSE_WKSP_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)); /* compilation failures here means scratchBuffer is not large enough */
703
+ DEBUG_STATIC_ASSERT(sizeof(scratchBuffer) >= FSE_WKSP_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)); /* compilation failures here means scratchBuffer is not large enough */
839
704
  if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);
840
705
  return FSE_compress_wksp(dst, dstCapacity, src, srcSize, maxSymbolValue, tableLog, &scratchBuffer, sizeof(scratchBuffer));
841
706
  }
@@ -0,0 +1,195 @@
1
+ /* ******************************************************************
2
+ hist : Histogram functions
3
+ part of Finite State Entropy project
4
+ Copyright (C) 2013-present, Yann Collet.
5
+
6
+ BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
7
+
8
+ Redistribution and use in source and binary forms, with or without
9
+ modification, are permitted provided that the following conditions are
10
+ met:
11
+
12
+ * Redistributions of source code must retain the above copyright
13
+ notice, this list of conditions and the following disclaimer.
14
+ * Redistributions in binary form must reproduce the above
15
+ copyright notice, this list of conditions and the following disclaimer
16
+ in the documentation and/or other materials provided with the
17
+ distribution.
18
+
19
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
+
31
+ You can contact the author at :
32
+ - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
33
+ - Public forum : https://groups.google.com/forum/#!forum/lz4c
34
+ ****************************************************************** */
35
+
36
+ /* --- dependencies --- */
37
+ #include "mem.h" /* U32, BYTE, etc. */
38
+ #include "debug.h" /* assert, DEBUGLOG */
39
+ #include "error_private.h" /* ERROR */
40
+ #include "hist.h"
41
+
42
+
43
+ /* --- Error management --- */
44
+ unsigned HIST_isError(size_t code) { return ERR_isError(code); }
45
+
46
+ /*-**************************************************************
47
+ * Histogram functions
48
+ ****************************************************************/
49
+ unsigned HIST_count_simple(unsigned* count, unsigned* maxSymbolValuePtr,
50
+ const void* src, size_t srcSize)
51
+ {
52
+ const BYTE* ip = (const BYTE*)src;
53
+ const BYTE* const end = ip + srcSize;
54
+ unsigned maxSymbolValue = *maxSymbolValuePtr;
55
+ unsigned largestCount=0;
56
+
57
+ memset(count, 0, (maxSymbolValue+1) * sizeof(*count));
58
+ if (srcSize==0) { *maxSymbolValuePtr = 0; return 0; }
59
+
60
+ while (ip<end) {
61
+ assert(*ip <= maxSymbolValue);
62
+ count[*ip++]++;
63
+ }
64
+
65
+ while (!count[maxSymbolValue]) maxSymbolValue--;
66
+ *maxSymbolValuePtr = maxSymbolValue;
67
+
68
+ { U32 s;
69
+ for (s=0; s<=maxSymbolValue; s++)
70
+ if (count[s] > largestCount) largestCount = count[s];
71
+ }
72
+
73
+ return largestCount;
74
+ }
75
+
76
+
77
+ /* HIST_count_parallel_wksp() :
78
+ * store histogram into 4 intermediate tables, recombined at the end.
79
+ * this design makes better use of OoO cpus,
80
+ * and is noticeably faster when some values are heavily repeated.
81
+ * But it needs some additional workspace for intermediate tables.
82
+ * `workSpace` size must be a table of size >= HIST_WKSP_SIZE_U32.
83
+ * @return : largest histogram frequency,
84
+ * or an error code (notably when histogram would be larger than *maxSymbolValuePtr). */
85
+ static size_t HIST_count_parallel_wksp(
86
+ unsigned* count, unsigned* maxSymbolValuePtr,
87
+ const void* source, size_t sourceSize,
88
+ unsigned checkMax,
89
+ unsigned* const workSpace)
90
+ {
91
+ const BYTE* ip = (const BYTE*)source;
92
+ const BYTE* const iend = ip+sourceSize;
93
+ unsigned maxSymbolValue = *maxSymbolValuePtr;
94
+ unsigned max=0;
95
+ U32* const Counting1 = workSpace;
96
+ U32* const Counting2 = Counting1 + 256;
97
+ U32* const Counting3 = Counting2 + 256;
98
+ U32* const Counting4 = Counting3 + 256;
99
+
100
+ memset(workSpace, 0, 4*256*sizeof(unsigned));
101
+
102
+ /* safety checks */
103
+ if (!sourceSize) {
104
+ memset(count, 0, maxSymbolValue + 1);
105
+ *maxSymbolValuePtr = 0;
106
+ return 0;
107
+ }
108
+ if (!maxSymbolValue) maxSymbolValue = 255; /* 0 == default */
109
+
110
+ /* by stripes of 16 bytes */
111
+ { U32 cached = MEM_read32(ip); ip += 4;
112
+ while (ip < iend-15) {
113
+ U32 c = cached; cached = MEM_read32(ip); ip += 4;
114
+ Counting1[(BYTE) c ]++;
115
+ Counting2[(BYTE)(c>>8) ]++;
116
+ Counting3[(BYTE)(c>>16)]++;
117
+ Counting4[ c>>24 ]++;
118
+ c = cached; cached = MEM_read32(ip); ip += 4;
119
+ Counting1[(BYTE) c ]++;
120
+ Counting2[(BYTE)(c>>8) ]++;
121
+ Counting3[(BYTE)(c>>16)]++;
122
+ Counting4[ c>>24 ]++;
123
+ c = cached; cached = MEM_read32(ip); ip += 4;
124
+ Counting1[(BYTE) c ]++;
125
+ Counting2[(BYTE)(c>>8) ]++;
126
+ Counting3[(BYTE)(c>>16)]++;
127
+ Counting4[ c>>24 ]++;
128
+ c = cached; cached = MEM_read32(ip); ip += 4;
129
+ Counting1[(BYTE) c ]++;
130
+ Counting2[(BYTE)(c>>8) ]++;
131
+ Counting3[(BYTE)(c>>16)]++;
132
+ Counting4[ c>>24 ]++;
133
+ }
134
+ ip-=4;
135
+ }
136
+
137
+ /* finish last symbols */
138
+ while (ip<iend) Counting1[*ip++]++;
139
+
140
+ if (checkMax) { /* verify stats will fit into destination table */
141
+ U32 s; for (s=255; s>maxSymbolValue; s--) {
142
+ Counting1[s] += Counting2[s] + Counting3[s] + Counting4[s];
143
+ if (Counting1[s]) return ERROR(maxSymbolValue_tooSmall);
144
+ } }
145
+
146
+ { U32 s;
147
+ if (maxSymbolValue > 255) maxSymbolValue = 255;
148
+ for (s=0; s<=maxSymbolValue; s++) {
149
+ count[s] = Counting1[s] + Counting2[s] + Counting3[s] + Counting4[s];
150
+ if (count[s] > max) max = count[s];
151
+ } }
152
+
153
+ while (!count[maxSymbolValue]) maxSymbolValue--;
154
+ *maxSymbolValuePtr = maxSymbolValue;
155
+ return (size_t)max;
156
+ }
157
+
158
+ /* HIST_countFast_wksp() :
159
+ * Same as HIST_countFast(), but using an externally provided scratch buffer.
160
+ * `workSpace` size must be table of >= HIST_WKSP_SIZE_U32 unsigned */
161
+ size_t HIST_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
162
+ const void* source, size_t sourceSize,
163
+ unsigned* workSpace)
164
+ {
165
+ if (sourceSize < 1500) /* heuristic threshold */
166
+ return HIST_count_simple(count, maxSymbolValuePtr, source, sourceSize);
167
+ return HIST_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, 0, workSpace);
168
+ }
169
+
170
+ /* fast variant (unsafe : won't check if src contains values beyond count[] limit) */
171
+ size_t HIST_countFast(unsigned* count, unsigned* maxSymbolValuePtr,
172
+ const void* source, size_t sourceSize)
173
+ {
174
+ unsigned tmpCounters[HIST_WKSP_SIZE_U32];
175
+ return HIST_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, tmpCounters);
176
+ }
177
+
178
+ /* HIST_count_wksp() :
179
+ * Same as HIST_count(), but using an externally provided scratch buffer.
180
+ * `workSpace` size must be table of >= HIST_WKSP_SIZE_U32 unsigned */
181
+ size_t HIST_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
182
+ const void* source, size_t sourceSize, unsigned* workSpace)
183
+ {
184
+ if (*maxSymbolValuePtr < 255)
185
+ return HIST_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, 1, workSpace);
186
+ *maxSymbolValuePtr = 255;
187
+ return HIST_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, workSpace);
188
+ }
189
+
190
+ size_t HIST_count(unsigned* count, unsigned* maxSymbolValuePtr,
191
+ const void* src, size_t srcSize)
192
+ {
193
+ unsigned tmpCounters[HIST_WKSP_SIZE_U32];
194
+ return HIST_count_wksp(count, maxSymbolValuePtr, src, srcSize, tmpCounters);
195
+ }