zstd-ruby 1.3.3.0 → 1.3.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. checksums.yaml +5 -5
  2. data/README.md +1 -1
  3. data/ext/zstdruby/libzstd/BUCK +13 -0
  4. data/ext/zstdruby/libzstd/README.md +32 -25
  5. data/ext/zstdruby/libzstd/common/bitstream.h +1 -1
  6. data/ext/zstdruby/libzstd/common/compiler.h +25 -0
  7. data/ext/zstdruby/libzstd/common/cpu.h +216 -0
  8. data/ext/zstdruby/libzstd/common/error_private.c +1 -0
  9. data/ext/zstdruby/libzstd/common/fse.h +1 -1
  10. data/ext/zstdruby/libzstd/common/fse_decompress.c +2 -2
  11. data/ext/zstdruby/libzstd/common/huf.h +114 -89
  12. data/ext/zstdruby/libzstd/common/pool.c +46 -17
  13. data/ext/zstdruby/libzstd/common/pool.h +18 -9
  14. data/ext/zstdruby/libzstd/common/threading.h +12 -12
  15. data/ext/zstdruby/libzstd/common/zstd_errors.h +16 -7
  16. data/ext/zstdruby/libzstd/common/zstd_internal.h +4 -5
  17. data/ext/zstdruby/libzstd/compress/fse_compress.c +19 -11
  18. data/ext/zstdruby/libzstd/compress/huf_compress.c +160 -62
  19. data/ext/zstdruby/libzstd/compress/zstd_compress.c +973 -644
  20. data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +281 -34
  21. data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +80 -62
  22. data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +11 -4
  23. data/ext/zstdruby/libzstd/compress/zstd_fast.c +87 -71
  24. data/ext/zstdruby/libzstd/compress/zstd_fast.h +10 -6
  25. data/ext/zstdruby/libzstd/compress/zstd_lazy.c +333 -274
  26. data/ext/zstdruby/libzstd/compress/zstd_lazy.h +33 -16
  27. data/ext/zstdruby/libzstd/compress/zstd_ldm.c +305 -359
  28. data/ext/zstdruby/libzstd/compress/zstd_ldm.h +64 -21
  29. data/ext/zstdruby/libzstd/compress/zstd_opt.c +194 -56
  30. data/ext/zstdruby/libzstd/compress/zstd_opt.h +17 -5
  31. data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +1131 -449
  32. data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +32 -16
  33. data/ext/zstdruby/libzstd/decompress/huf_decompress.c +390 -290
  34. data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +777 -439
  35. data/ext/zstdruby/libzstd/dictBuilder/cover.c +11 -8
  36. data/ext/zstdruby/libzstd/dictBuilder/zdict.c +83 -50
  37. data/ext/zstdruby/libzstd/dictBuilder/zdict.h +44 -43
  38. data/ext/zstdruby/libzstd/legacy/zstd_legacy.h +2 -0
  39. data/ext/zstdruby/libzstd/legacy/zstd_v04.c +42 -118
  40. data/ext/zstdruby/libzstd/legacy/zstd_v06.c +2 -2
  41. data/ext/zstdruby/libzstd/legacy/zstd_v07.c +2 -2
  42. data/ext/zstdruby/libzstd/zstd.h +254 -254
  43. data/lib/zstd-ruby/version.rb +1 -1
  44. metadata +4 -3
@@ -35,12 +35,20 @@ extern "C" {
35
35
  # define ZSTDERRORLIB_API ZSTDERRORLIB_VISIBILITY
36
36
  #endif
37
37
 
38
- /*-****************************************
39
- * error codes list
40
- * note : this API is still considered unstable
41
- * and shall not be used with a dynamic library.
42
- * only static linking is allowed
43
- ******************************************/
38
+ /*-*********************************************
39
+ * Error codes list
40
+ *-*********************************************
41
+ * Error codes _values_ are pinned down since v1.3.1 only.
42
+ * Therefore, don't rely on values if you may link to any version < v1.3.1.
43
+ *
44
+ * Only values < 100 are considered stable.
45
+ *
46
+ * note 1 : this API shall be used with static linking only.
47
+ * dynamic linking is not yet officially supported.
48
+ * note 2 : Prefer relying on the enum than on its value whenever possible
49
+ * This is the only supported way to use the error list < v1.3.1
50
+ * note 3 : ZSTD_isError() is always correct, whatever the library version.
51
+ **********************************************/
44
52
  typedef enum {
45
53
  ZSTD_error_no_error = 0,
46
54
  ZSTD_error_GENERIC = 1,
@@ -61,9 +69,10 @@ typedef enum {
61
69
  ZSTD_error_stage_wrong = 60,
62
70
  ZSTD_error_init_missing = 62,
63
71
  ZSTD_error_memory_allocation = 64,
72
+ ZSTD_error_workSpace_tooSmall= 66,
64
73
  ZSTD_error_dstSize_tooSmall = 70,
65
74
  ZSTD_error_srcSize_wrong = 72,
66
- /* following error codes are not stable and may be removed or changed in a future version */
75
+ /* following error codes are __NOT STABLE__, they can be removed or changed in future versions */
67
76
  ZSTD_error_frameIndex_tooLarge = 100,
68
77
  ZSTD_error_seekableIO = 102,
69
78
  ZSTD_error_maxCode = 120 /* never EVER use this value directly, it can change in future versions! Use ZSTD_isError() instead */
@@ -132,14 +132,15 @@ typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingTy
132
132
 
133
133
  #define Litbits 8
134
134
  #define MaxLit ((1<<Litbits) - 1)
135
- #define MaxML 52
136
- #define MaxLL 35
135
+ #define MaxML 52
136
+ #define MaxLL 35
137
137
  #define DefaultMaxOff 28
138
- #define MaxOff 31
138
+ #define MaxOff 31
139
139
  #define MaxSeq MAX(MaxLL, MaxML) /* Assumption : MaxOff < MaxLL,MaxML */
140
140
  #define MLFSELog 9
141
141
  #define LLFSELog 9
142
142
  #define OffFSELog 8
143
+ #define MaxFSELog MAX(MAX(MLFSELog, LLFSELog), OffFSELog)
143
144
 
144
145
  static const U32 LL_bits[MaxLL+1] = { 0, 0, 0, 0, 0, 0, 0, 0,
145
146
  0, 0, 0, 0, 0, 0, 0, 0,
@@ -228,8 +229,6 @@ typedef struct {
228
229
  BYTE* ofCode;
229
230
  U32 longLengthID; /* 0 == no longLength; 1 == Lit.longLength; 2 == Match.longLength; */
230
231
  U32 longLengthPos;
231
- U32 rep[ZSTD_REP_NUM];
232
- U32 repToConfirm[ZSTD_REP_NUM];
233
232
  } seqStore_t;
234
233
 
235
234
  const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx); /* compress & dictBuilder */
@@ -248,7 +248,7 @@ static size_t FSE_writeNCount_generic (void* header, size_t headerBufferSize,
248
248
  bitCount -= (count<max);
249
249
  previous0 = (count==1);
250
250
  if (remaining<1) return ERROR(GENERIC);
251
- while (remaining<threshold) nbBits--, threshold>>=1;
251
+ while (remaining<threshold) { nbBits--; threshold>>=1; }
252
252
  }
253
253
  if (bitCount>16) {
254
254
  if ((!writeIsSafe) && (out > oend - 2)) return ERROR(dstSize_tooSmall); /* Buffer overflow */
@@ -292,7 +292,7 @@ size_t FSE_writeNCount (void* buffer, size_t bufferSize, const short* normalized
292
292
  It doesn't use any additional memory.
293
293
  But this function is unsafe : it doesn't check that all values within `src` can fit into `count`.
294
294
  For this reason, prefer using a table `count` with 256 elements.
295
- @return : count of most numerous element
295
+ @return : count of most numerous element.
296
296
  */
297
297
  size_t FSE_count_simple(unsigned* count, unsigned* maxSymbolValuePtr,
298
298
  const void* src, size_t srcSize)
@@ -305,7 +305,10 @@ size_t FSE_count_simple(unsigned* count, unsigned* maxSymbolValuePtr,
305
305
  memset(count, 0, (maxSymbolValue+1)*sizeof(*count));
306
306
  if (srcSize==0) { *maxSymbolValuePtr = 0; return 0; }
307
307
 
308
- while (ip<end) count[*ip++]++;
308
+ while (ip<end) {
309
+ assert(*ip <= maxSymbolValue);
310
+ count[*ip++]++;
311
+ }
309
312
 
310
313
  while (!count[maxSymbolValue]) maxSymbolValue--;
311
314
  *maxSymbolValuePtr = maxSymbolValue;
@@ -318,7 +321,8 @@ size_t FSE_count_simple(unsigned* count, unsigned* maxSymbolValuePtr,
318
321
 
319
322
  /* FSE_count_parallel_wksp() :
320
323
  * Same as FSE_count_parallel(), but using an externally provided scratch buffer.
321
- * `workSpace` size must be a minimum of `1024 * sizeof(unsigned)`` */
324
+ * `workSpace` size must be a minimum of `1024 * sizeof(unsigned)`.
325
+ * @return : largest histogram frequency, or an error code (notably when histogram would be larger than *maxSymbolValuePtr). */
322
326
  static size_t FSE_count_parallel_wksp(
323
327
  unsigned* count, unsigned* maxSymbolValuePtr,
324
328
  const void* source, size_t sourceSize,
@@ -333,7 +337,7 @@ static size_t FSE_count_parallel_wksp(
333
337
  U32* const Counting3 = Counting2 + 256;
334
338
  U32* const Counting4 = Counting3 + 256;
335
339
 
336
- memset(Counting1, 0, 4*256*sizeof(unsigned));
340
+ memset(workSpace, 0, 4*256*sizeof(unsigned));
337
341
 
338
342
  /* safety checks */
339
343
  if (!sourceSize) {
@@ -379,7 +383,9 @@ static size_t FSE_count_parallel_wksp(
379
383
  if (Counting1[s]) return ERROR(maxSymbolValue_tooSmall);
380
384
  } }
381
385
 
382
- { U32 s; for (s=0; s<=maxSymbolValue; s++) {
386
+ { U32 s;
387
+ if (maxSymbolValue > 255) maxSymbolValue = 255;
388
+ for (s=0; s<=maxSymbolValue; s++) {
383
389
  count[s] = Counting1[s] + Counting2[s] + Counting3[s] + Counting4[s];
384
390
  if (count[s] > max) max = count[s];
385
391
  } }
@@ -393,9 +399,11 @@ static size_t FSE_count_parallel_wksp(
393
399
  * Same as FSE_countFast(), but using an externally provided scratch buffer.
394
400
  * `workSpace` size must be table of >= `1024` unsigned */
395
401
  size_t FSE_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
396
- const void* source, size_t sourceSize, unsigned* workSpace)
402
+ const void* source, size_t sourceSize,
403
+ unsigned* workSpace)
397
404
  {
398
- if (sourceSize < 1500) return FSE_count_simple(count, maxSymbolValuePtr, source, sourceSize);
405
+ if (sourceSize < 1500) /* heuristic threshold */
406
+ return FSE_count_simple(count, maxSymbolValuePtr, source, sourceSize);
399
407
  return FSE_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, 0, workSpace);
400
408
  }
401
409
 
@@ -540,7 +548,7 @@ static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count,
540
548
  find max, then give all remaining points to max */
541
549
  U32 maxV = 0, maxC = 0;
542
550
  for (s=0; s<=maxSymbolValue; s++)
543
- if (count[s] > maxC) maxV=s, maxC=count[s];
551
+ if (count[s] > maxC) { maxV=s; maxC=count[s]; }
544
552
  norm[maxV] += (short)ToDistribute;
545
553
  return 0;
546
554
  }
@@ -548,7 +556,7 @@ static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count,
548
556
  if (total == 0) {
549
557
  /* all of the symbols were low enough for the lowOne or lowThreshold */
550
558
  for (s=0; ToDistribute > 0; s = (s+1)%(maxSymbolValue+1))
551
- if (norm[s] > 0) ToDistribute--, norm[s]++;
559
+ if (norm[s] > 0) { ToDistribute--; norm[s]++; }
552
560
  return 0;
553
561
  }
554
562
 
@@ -604,7 +612,7 @@ size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog,
604
612
  U64 restToBeat = vStep * rtbTable[proba];
605
613
  proba += (count[s]*step) - ((U64)proba<<scale) > restToBeat;
606
614
  }
607
- if (proba > largestP) largestP=proba, largest=s;
615
+ if (proba > largestP) { largestP=proba; largest=s; }
608
616
  normalizedCounter[s] = proba;
609
617
  stillToDistribute -= proba;
610
618
  } }
@@ -46,6 +46,7 @@
46
46
  #include <string.h> /* memcpy, memset */
47
47
  #include <stdio.h> /* printf (debug) */
48
48
  #include "bitstream.h"
49
+ #include "compiler.h"
49
50
  #define FSE_STATIC_LINKING_ONLY /* FSE_optimalTableLog_internal */
50
51
  #include "fse.h" /* header compression */
51
52
  #define HUF_STATIC_LINKING_ONLY
@@ -322,7 +323,10 @@ static void HUF_sort(nodeElt* huffNode, const U32* count, U32 maxSymbolValue)
322
323
  U32 const c = count[n];
323
324
  U32 const r = BIT_highbit32(c+1) + 1;
324
325
  U32 pos = rank[r].current++;
325
- while ((pos > rank[r].base) && (c > huffNode[pos-1].count)) huffNode[pos]=huffNode[pos-1], pos--;
326
+ while ((pos > rank[r].base) && (c > huffNode[pos-1].count)) {
327
+ huffNode[pos] = huffNode[pos-1];
328
+ pos--;
329
+ }
326
330
  huffNode[pos].count = c;
327
331
  huffNode[pos].byte = (BYTE)n;
328
332
  }
@@ -331,10 +335,10 @@ static void HUF_sort(nodeElt* huffNode, const U32* count, U32 maxSymbolValue)
331
335
 
332
336
  /** HUF_buildCTable_wksp() :
333
337
  * Same as HUF_buildCTable(), but using externally allocated scratch buffer.
334
- * `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as a table of 1024 unsigned.
338
+ * `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as a table of HUF_CTABLE_WORKSPACE_SIZE_U32 unsigned.
335
339
  */
336
340
  #define STARTNODE (HUF_SYMBOLVALUE_MAX+1)
337
- typedef nodeElt huffNodeTable[2*HUF_SYMBOLVALUE_MAX+1 +1];
341
+ typedef nodeElt huffNodeTable[HUF_CTABLE_WORKSPACE_SIZE_U32];
338
342
  size_t HUF_buildCTable_wksp (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize)
339
343
  {
340
344
  nodeElt* const huffNode0 = (nodeElt*)workSpace;
@@ -345,9 +349,10 @@ size_t HUF_buildCTable_wksp (HUF_CElt* tree, const U32* count, U32 maxSymbolValu
345
349
  U32 nodeRoot;
346
350
 
347
351
  /* safety checks */
348
- if (wkspSize < sizeof(huffNodeTable)) return ERROR(GENERIC); /* workSpace is not large enough */
352
+ if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */
353
+ if (wkspSize < sizeof(huffNodeTable)) return ERROR(workSpace_tooSmall);
349
354
  if (maxNbBits == 0) maxNbBits = HUF_TABLELOG_DEFAULT;
350
- if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(GENERIC);
355
+ if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge);
351
356
  memset(huffNode0, 0, sizeof(huffNodeTable));
352
357
 
353
358
  /* sort, decreasing order */
@@ -405,6 +410,7 @@ size_t HUF_buildCTable_wksp (HUF_CElt* tree, const U32* count, U32 maxSymbolValu
405
410
  }
406
411
 
407
412
  /** HUF_buildCTable() :
413
+ * @return : maxNbBits
408
414
  * Note : count is used before tree is written, so they can safely overlap
409
415
  */
410
416
  size_t HUF_buildCTable (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U32 maxNbBits)
@@ -432,13 +438,14 @@ static int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, uns
432
438
  return !bad;
433
439
  }
434
440
 
435
- static void HUF_encodeSymbol(BIT_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable)
441
+ size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); }
442
+
443
+ FORCE_INLINE_TEMPLATE void
444
+ HUF_encodeSymbol(BIT_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable)
436
445
  {
437
446
  BIT_addBitsFast(bitCPtr, CTable[symbol].val, CTable[symbol].nbBits);
438
447
  }
439
448
 
440
- size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); }
441
-
442
449
  #define HUF_FLUSHBITS(s) BIT_flushBits(s)
443
450
 
444
451
  #define HUF_FLUSHBITS_1(stream) \
@@ -447,7 +454,10 @@ size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); }
447
454
  #define HUF_FLUSHBITS_2(stream) \
448
455
  if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*4+7) HUF_FLUSHBITS(stream)
449
456
 
450
- size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
457
+ FORCE_INLINE_TEMPLATE size_t
458
+ HUF_compress1X_usingCTable_internal_body(void* dst, size_t dstSize,
459
+ const void* src, size_t srcSize,
460
+ const HUF_CElt* CTable)
451
461
  {
452
462
  const BYTE* ip = (const BYTE*) src;
453
463
  BYTE* const ostart = (BYTE*)dst;
@@ -491,8 +501,58 @@ size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, si
491
501
  return BIT_closeCStream(&bitC);
492
502
  }
493
503
 
504
+ #if DYNAMIC_BMI2
494
505
 
495
- size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
506
+ static TARGET_ATTRIBUTE("bmi2") size_t
507
+ HUF_compress1X_usingCTable_internal_bmi2(void* dst, size_t dstSize,
508
+ const void* src, size_t srcSize,
509
+ const HUF_CElt* CTable)
510
+ {
511
+ return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable);
512
+ }
513
+
514
+ static size_t
515
+ HUF_compress1X_usingCTable_internal_default(void* dst, size_t dstSize,
516
+ const void* src, size_t srcSize,
517
+ const HUF_CElt* CTable)
518
+ {
519
+ return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable);
520
+ }
521
+
522
+ static size_t
523
+ HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize,
524
+ const void* src, size_t srcSize,
525
+ const HUF_CElt* CTable, const int bmi2)
526
+ {
527
+ if (bmi2) {
528
+ return HUF_compress1X_usingCTable_internal_bmi2(dst, dstSize, src, srcSize, CTable);
529
+ }
530
+ return HUF_compress1X_usingCTable_internal_default(dst, dstSize, src, srcSize, CTable);
531
+ }
532
+
533
+ #else
534
+
535
+ static size_t
536
+ HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize,
537
+ const void* src, size_t srcSize,
538
+ const HUF_CElt* CTable, const int bmi2)
539
+ {
540
+ (void)bmi2;
541
+ return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable);
542
+ }
543
+
544
+ #endif
545
+
546
+ size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
547
+ {
548
+ return HUF_compress1X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0);
549
+ }
550
+
551
+
552
+ static size_t
553
+ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
554
+ const void* src, size_t srcSize,
555
+ const HUF_CElt* CTable, int bmi2)
496
556
  {
497
557
  size_t const segmentSize = (srcSize+3)/4; /* first 3 segments */
498
558
  const BYTE* ip = (const BYTE*) src;
@@ -505,28 +565,31 @@ size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, si
505
565
  if (srcSize < 12) return 0; /* no saving possible : too small input */
506
566
  op += 6; /* jumpTable */
507
567
 
508
- { CHECK_V_F(cSize, HUF_compress1X_usingCTable(op, oend-op, ip, segmentSize, CTable) );
568
+ { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, segmentSize, CTable, bmi2) );
509
569
  if (cSize==0) return 0;
570
+ assert(cSize <= 65535);
510
571
  MEM_writeLE16(ostart, (U16)cSize);
511
572
  op += cSize;
512
573
  }
513
574
 
514
575
  ip += segmentSize;
515
- { CHECK_V_F(cSize, HUF_compress1X_usingCTable(op, oend-op, ip, segmentSize, CTable) );
576
+ { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, segmentSize, CTable, bmi2) );
516
577
  if (cSize==0) return 0;
578
+ assert(cSize <= 65535);
517
579
  MEM_writeLE16(ostart+2, (U16)cSize);
518
580
  op += cSize;
519
581
  }
520
582
 
521
583
  ip += segmentSize;
522
- { CHECK_V_F(cSize, HUF_compress1X_usingCTable(op, oend-op, ip, segmentSize, CTable) );
584
+ { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, segmentSize, CTable, bmi2) );
523
585
  if (cSize==0) return 0;
586
+ assert(cSize <= 65535);
524
587
  MEM_writeLE16(ostart+4, (U16)cSize);
525
588
  op += cSize;
526
589
  }
527
590
 
528
591
  ip += segmentSize;
529
- { CHECK_V_F(cSize, HUF_compress1X_usingCTable(op, oend-op, ip, iend-ip, CTable) );
592
+ { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, iend-ip, CTable, bmi2) );
530
593
  if (cSize==0) return 0;
531
594
  op += cSize;
532
595
  }
@@ -534,15 +597,20 @@ size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, si
534
597
  return op-ostart;
535
598
  }
536
599
 
600
+ size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
601
+ {
602
+ return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0);
603
+ }
604
+
537
605
 
538
606
  static size_t HUF_compressCTable_internal(
539
607
  BYTE* const ostart, BYTE* op, BYTE* const oend,
540
608
  const void* src, size_t srcSize,
541
- unsigned singleStream, const HUF_CElt* CTable)
609
+ unsigned singleStream, const HUF_CElt* CTable, const int bmi2)
542
610
  {
543
611
  size_t const cSize = singleStream ?
544
- HUF_compress1X_usingCTable(op, oend - op, src, srcSize, CTable) :
545
- HUF_compress4X_usingCTable(op, oend - op, src, srcSize, CTable);
612
+ HUF_compress1X_usingCTable_internal(op, oend - op, src, srcSize, CTable, bmi2) :
613
+ HUF_compress4X_usingCTable_internal(op, oend - op, src, srcSize, CTable, bmi2);
546
614
  if (HUF_isError(cSize)) { return cSize; }
547
615
  if (cSize==0) { return 0; } /* uncompressible */
548
616
  op += cSize;
@@ -551,86 +619,98 @@ static size_t HUF_compressCTable_internal(
551
619
  return op-ostart;
552
620
  }
553
621
 
622
+ typedef struct {
623
+ U32 count[HUF_SYMBOLVALUE_MAX + 1];
624
+ HUF_CElt CTable[HUF_SYMBOLVALUE_MAX + 1];
625
+ huffNodeTable nodeTable;
626
+ } HUF_compress_tables_t;
554
627
 
555
- /* `workSpace` must a table of at least 1024 unsigned */
628
+ /* HUF_compress_internal() :
629
+ * `workSpace` must a table of at least HUF_WORKSPACE_SIZE_U32 unsigned */
556
630
  static size_t HUF_compress_internal (
557
631
  void* dst, size_t dstSize,
558
632
  const void* src, size_t srcSize,
559
633
  unsigned maxSymbolValue, unsigned huffLog,
560
634
  unsigned singleStream,
561
635
  void* workSpace, size_t wkspSize,
562
- HUF_CElt* oldHufTable, HUF_repeat* repeat, int preferRepeat)
636
+ HUF_CElt* oldHufTable, HUF_repeat* repeat, int preferRepeat,
637
+ const int bmi2)
563
638
  {
639
+ HUF_compress_tables_t* const table = (HUF_compress_tables_t*)workSpace;
564
640
  BYTE* const ostart = (BYTE*)dst;
565
641
  BYTE* const oend = ostart + dstSize;
566
642
  BYTE* op = ostart;
567
643
 
568
- U32* count;
569
- size_t const countSize = sizeof(U32) * (HUF_SYMBOLVALUE_MAX + 1);
570
- HUF_CElt* CTable;
571
- size_t const CTableSize = sizeof(HUF_CElt) * (HUF_SYMBOLVALUE_MAX + 1);
572
-
573
644
  /* checks & inits */
574
- if (wkspSize < sizeof(huffNodeTable) + countSize + CTableSize) return ERROR(GENERIC);
575
- if (!srcSize) return 0; /* Uncompressed (note : 1 means rle, so first byte must be correct) */
576
- if (!dstSize) return 0; /* cannot fit within dst budget */
645
+ if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */
646
+ if (wkspSize < sizeof(*table)) return ERROR(workSpace_tooSmall);
647
+ if (!srcSize) return 0; /* Uncompressed */
648
+ if (!dstSize) return 0; /* cannot fit anything within dst budget */
577
649
  if (srcSize > HUF_BLOCKSIZE_MAX) return ERROR(srcSize_wrong); /* current block size limit */
578
650
  if (huffLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
651
+ if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge);
579
652
  if (!maxSymbolValue) maxSymbolValue = HUF_SYMBOLVALUE_MAX;
580
653
  if (!huffLog) huffLog = HUF_TABLELOG_DEFAULT;
581
654
 
582
- count = (U32*)workSpace;
583
- workSpace = (BYTE*)workSpace + countSize;
584
- wkspSize -= countSize;
585
- CTable = (HUF_CElt*)workSpace;
586
- workSpace = (BYTE*)workSpace + CTableSize;
587
- wkspSize -= CTableSize;
588
-
589
- /* Heuristic : If we don't need to check the validity of the old table use the old table for small inputs */
655
+ /* Heuristic : If old table is valid, use it for small inputs */
590
656
  if (preferRepeat && repeat && *repeat == HUF_repeat_valid) {
591
- return HUF_compressCTable_internal(ostart, op, oend, src, srcSize, singleStream, oldHufTable);
657
+ return HUF_compressCTable_internal(ostart, op, oend,
658
+ src, srcSize,
659
+ singleStream, oldHufTable, bmi2);
592
660
  }
593
661
 
594
662
  /* Scan input and build symbol stats */
595
- { CHECK_V_F(largest, FSE_count_wksp (count, &maxSymbolValue, (const BYTE*)src, srcSize, (U32*)workSpace) );
663
+ { CHECK_V_F(largest, FSE_count_wksp (table->count, &maxSymbolValue, (const BYTE*)src, srcSize, table->count) );
596
664
  if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; } /* single symbol, rle */
597
- if (largest <= (srcSize >> 7)+1) return 0; /* Fast heuristic : not compressible enough */
665
+ if (largest <= (srcSize >> 7)+1) return 0; /* heuristic : probably not compressible enough */
598
666
  }
599
667
 
600
668
  /* Check validity of previous table */
601
- if (repeat && *repeat == HUF_repeat_check && !HUF_validateCTable(oldHufTable, count, maxSymbolValue)) {
669
+ if ( repeat
670
+ && *repeat == HUF_repeat_check
671
+ && !HUF_validateCTable(oldHufTable, table->count, maxSymbolValue)) {
602
672
  *repeat = HUF_repeat_none;
603
673
  }
604
674
  /* Heuristic : use existing table for small inputs */
605
675
  if (preferRepeat && repeat && *repeat != HUF_repeat_none) {
606
- return HUF_compressCTable_internal(ostart, op, oend, src, srcSize, singleStream, oldHufTable);
676
+ return HUF_compressCTable_internal(ostart, op, oend,
677
+ src, srcSize,
678
+ singleStream, oldHufTable, bmi2);
607
679
  }
608
680
 
609
681
  /* Build Huffman Tree */
610
682
  huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue);
611
- { CHECK_V_F(maxBits, HUF_buildCTable_wksp (CTable, count, maxSymbolValue, huffLog, workSpace, wkspSize) );
683
+ { CHECK_V_F(maxBits, HUF_buildCTable_wksp(table->CTable, table->count,
684
+ maxSymbolValue, huffLog,
685
+ table->nodeTable, sizeof(table->nodeTable)) );
612
686
  huffLog = (U32)maxBits;
613
- /* Zero the unused symbols so we can check it for validity */
614
- memset(CTable + maxSymbolValue + 1, 0, CTableSize - (maxSymbolValue + 1) * sizeof(HUF_CElt));
687
+ /* Zero unused symbols in CTable, so we can check it for validity */
688
+ memset(table->CTable + (maxSymbolValue + 1), 0,
689
+ sizeof(table->CTable) - ((maxSymbolValue + 1) * sizeof(HUF_CElt)));
615
690
  }
616
691
 
617
692
  /* Write table description header */
618
- { CHECK_V_F(hSize, HUF_writeCTable (op, dstSize, CTable, maxSymbolValue, huffLog) );
619
- /* Check if using the previous table will be beneficial */
693
+ { CHECK_V_F(hSize, HUF_writeCTable (op, dstSize, table->CTable, maxSymbolValue, huffLog) );
694
+ /* Check if using previous huffman table is beneficial */
620
695
  if (repeat && *repeat != HUF_repeat_none) {
621
- size_t const oldSize = HUF_estimateCompressedSize(oldHufTable, count, maxSymbolValue);
622
- size_t const newSize = HUF_estimateCompressedSize(CTable, count, maxSymbolValue);
696
+ size_t const oldSize = HUF_estimateCompressedSize(oldHufTable, table->count, maxSymbolValue);
697
+ size_t const newSize = HUF_estimateCompressedSize(table->CTable, table->count, maxSymbolValue);
623
698
  if (oldSize <= hSize + newSize || hSize + 12 >= srcSize) {
624
- return HUF_compressCTable_internal(ostart, op, oend, src, srcSize, singleStream, oldHufTable);
625
- }
626
- }
627
- /* Use the new table */
699
+ return HUF_compressCTable_internal(ostart, op, oend,
700
+ src, srcSize,
701
+ singleStream, oldHufTable, bmi2);
702
+ } }
703
+
704
+ /* Use the new huffman table */
628
705
  if (hSize + 12ul >= srcSize) { return 0; }
629
706
  op += hSize;
630
707
  if (repeat) { *repeat = HUF_repeat_none; }
631
- if (oldHufTable) { memcpy(oldHufTable, CTable, CTableSize); } /* Save the new table */
708
+ if (oldHufTable)
709
+ memcpy(oldHufTable, table->CTable, sizeof(table->CTable)); /* Save new table */
632
710
  }
633
- return HUF_compressCTable_internal(ostart, op, oend, src, srcSize, singleStream, CTable);
711
+ return HUF_compressCTable_internal(ostart, op, oend,
712
+ src, srcSize,
713
+ singleStream, table->CTable, bmi2);
634
714
  }
635
715
 
636
716
 
@@ -639,52 +719,70 @@ size_t HUF_compress1X_wksp (void* dst, size_t dstSize,
639
719
  unsigned maxSymbolValue, unsigned huffLog,
640
720
  void* workSpace, size_t wkspSize)
641
721
  {
642
- return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 1 /* single stream */, workSpace, wkspSize, NULL, NULL, 0);
722
+ return HUF_compress_internal(dst, dstSize, src, srcSize,
723
+ maxSymbolValue, huffLog, 1 /*single stream*/,
724
+ workSpace, wkspSize,
725
+ NULL, NULL, 0, 0 /*bmi2*/);
643
726
  }
644
727
 
645
728
  size_t HUF_compress1X_repeat (void* dst, size_t dstSize,
646
729
  const void* src, size_t srcSize,
647
730
  unsigned maxSymbolValue, unsigned huffLog,
648
731
  void* workSpace, size_t wkspSize,
649
- HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat)
732
+ HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2)
650
733
  {
651
- return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 1 /* single stream */, workSpace, wkspSize, hufTable, repeat, preferRepeat);
734
+ return HUF_compress_internal(dst, dstSize, src, srcSize,
735
+ maxSymbolValue, huffLog, 1 /*single stream*/,
736
+ workSpace, wkspSize, hufTable,
737
+ repeat, preferRepeat, bmi2);
652
738
  }
653
739
 
654
740
  size_t HUF_compress1X (void* dst, size_t dstSize,
655
741
  const void* src, size_t srcSize,
656
742
  unsigned maxSymbolValue, unsigned huffLog)
657
743
  {
658
- unsigned workSpace[1024];
744
+ unsigned workSpace[HUF_WORKSPACE_SIZE_U32];
659
745
  return HUF_compress1X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace));
660
746
  }
661
747
 
748
+ /* HUF_compress4X_repeat():
749
+ * compress input using 4 streams.
750
+ * provide workspace to generate compression tables */
662
751
  size_t HUF_compress4X_wksp (void* dst, size_t dstSize,
663
752
  const void* src, size_t srcSize,
664
753
  unsigned maxSymbolValue, unsigned huffLog,
665
754
  void* workSpace, size_t wkspSize)
666
755
  {
667
- return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 0 /* 4 streams */, workSpace, wkspSize, NULL, NULL, 0);
756
+ return HUF_compress_internal(dst, dstSize, src, srcSize,
757
+ maxSymbolValue, huffLog, 0 /*4 streams*/,
758
+ workSpace, wkspSize,
759
+ NULL, NULL, 0, 0 /*bmi2*/);
668
760
  }
669
761
 
762
+ /* HUF_compress4X_repeat():
763
+ * compress input using 4 streams.
764
+ * re-use an existing huffman compression table */
670
765
  size_t HUF_compress4X_repeat (void* dst, size_t dstSize,
671
766
  const void* src, size_t srcSize,
672
767
  unsigned maxSymbolValue, unsigned huffLog,
673
768
  void* workSpace, size_t wkspSize,
674
- HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat)
769
+ HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2)
675
770
  {
676
- return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 0 /* 4 streams */, workSpace, wkspSize, hufTable, repeat, preferRepeat);
771
+ return HUF_compress_internal(dst, dstSize, src, srcSize,
772
+ maxSymbolValue, huffLog, 0 /* 4 streams */,
773
+ workSpace, wkspSize,
774
+ hufTable, repeat, preferRepeat, bmi2);
677
775
  }
678
776
 
679
777
  size_t HUF_compress2 (void* dst, size_t dstSize,
680
778
  const void* src, size_t srcSize,
681
779
  unsigned maxSymbolValue, unsigned huffLog)
682
780
  {
683
- unsigned workSpace[1024];
781
+ unsigned workSpace[HUF_WORKSPACE_SIZE_U32];
684
782
  return HUF_compress4X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace));
685
783
  }
686
784
 
687
785
  size_t HUF_compress (void* dst, size_t maxDstSize, const void* src, size_t srcSize)
688
786
  {
689
- return HUF_compress2(dst, maxDstSize, src, (U32)srcSize, 255, HUF_TABLELOG_DEFAULT);
787
+ return HUF_compress2(dst, maxDstSize, src, srcSize, 255, HUF_TABLELOG_DEFAULT);
690
788
  }