zstd-ruby 1.3.3.0 → 1.3.4.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (44) hide show
  1. checksums.yaml +5 -5
  2. data/README.md +1 -1
  3. data/ext/zstdruby/libzstd/BUCK +13 -0
  4. data/ext/zstdruby/libzstd/README.md +32 -25
  5. data/ext/zstdruby/libzstd/common/bitstream.h +1 -1
  6. data/ext/zstdruby/libzstd/common/compiler.h +25 -0
  7. data/ext/zstdruby/libzstd/common/cpu.h +216 -0
  8. data/ext/zstdruby/libzstd/common/error_private.c +1 -0
  9. data/ext/zstdruby/libzstd/common/fse.h +1 -1
  10. data/ext/zstdruby/libzstd/common/fse_decompress.c +2 -2
  11. data/ext/zstdruby/libzstd/common/huf.h +114 -89
  12. data/ext/zstdruby/libzstd/common/pool.c +46 -17
  13. data/ext/zstdruby/libzstd/common/pool.h +18 -9
  14. data/ext/zstdruby/libzstd/common/threading.h +12 -12
  15. data/ext/zstdruby/libzstd/common/zstd_errors.h +16 -7
  16. data/ext/zstdruby/libzstd/common/zstd_internal.h +4 -5
  17. data/ext/zstdruby/libzstd/compress/fse_compress.c +19 -11
  18. data/ext/zstdruby/libzstd/compress/huf_compress.c +160 -62
  19. data/ext/zstdruby/libzstd/compress/zstd_compress.c +973 -644
  20. data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +281 -34
  21. data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +80 -62
  22. data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +11 -4
  23. data/ext/zstdruby/libzstd/compress/zstd_fast.c +87 -71
  24. data/ext/zstdruby/libzstd/compress/zstd_fast.h +10 -6
  25. data/ext/zstdruby/libzstd/compress/zstd_lazy.c +333 -274
  26. data/ext/zstdruby/libzstd/compress/zstd_lazy.h +33 -16
  27. data/ext/zstdruby/libzstd/compress/zstd_ldm.c +305 -359
  28. data/ext/zstdruby/libzstd/compress/zstd_ldm.h +64 -21
  29. data/ext/zstdruby/libzstd/compress/zstd_opt.c +194 -56
  30. data/ext/zstdruby/libzstd/compress/zstd_opt.h +17 -5
  31. data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +1131 -449
  32. data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +32 -16
  33. data/ext/zstdruby/libzstd/decompress/huf_decompress.c +390 -290
  34. data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +777 -439
  35. data/ext/zstdruby/libzstd/dictBuilder/cover.c +11 -8
  36. data/ext/zstdruby/libzstd/dictBuilder/zdict.c +83 -50
  37. data/ext/zstdruby/libzstd/dictBuilder/zdict.h +44 -43
  38. data/ext/zstdruby/libzstd/legacy/zstd_legacy.h +2 -0
  39. data/ext/zstdruby/libzstd/legacy/zstd_v04.c +42 -118
  40. data/ext/zstdruby/libzstd/legacy/zstd_v06.c +2 -2
  41. data/ext/zstdruby/libzstd/legacy/zstd_v07.c +2 -2
  42. data/ext/zstdruby/libzstd/zstd.h +254 -254
  43. data/lib/zstd-ruby/version.rb +1 -1
  44. metadata +4 -3
@@ -35,12 +35,20 @@ extern "C" {
35
35
  # define ZSTDERRORLIB_API ZSTDERRORLIB_VISIBILITY
36
36
  #endif
37
37
 
38
- /*-****************************************
39
- * error codes list
40
- * note : this API is still considered unstable
41
- * and shall not be used with a dynamic library.
42
- * only static linking is allowed
43
- ******************************************/
38
+ /*-*********************************************
39
+ * Error codes list
40
+ *-*********************************************
41
+ * Error codes _values_ are pinned down since v1.3.1 only.
42
+ * Therefore, don't rely on values if you may link to any version < v1.3.1.
43
+ *
44
+ * Only values < 100 are considered stable.
45
+ *
46
+ * note 1 : this API shall be used with static linking only.
47
+ * dynamic linking is not yet officially supported.
48
+ * note 2 : Prefer relying on the enum than on its value whenever possible
49
+ * This is the only supported way to use the error list < v1.3.1
50
+ * note 3 : ZSTD_isError() is always correct, whatever the library version.
51
+ **********************************************/
44
52
  typedef enum {
45
53
  ZSTD_error_no_error = 0,
46
54
  ZSTD_error_GENERIC = 1,
@@ -61,9 +69,10 @@ typedef enum {
61
69
  ZSTD_error_stage_wrong = 60,
62
70
  ZSTD_error_init_missing = 62,
63
71
  ZSTD_error_memory_allocation = 64,
72
+ ZSTD_error_workSpace_tooSmall= 66,
64
73
  ZSTD_error_dstSize_tooSmall = 70,
65
74
  ZSTD_error_srcSize_wrong = 72,
66
- /* following error codes are not stable and may be removed or changed in a future version */
75
+ /* following error codes are __NOT STABLE__, they can be removed or changed in future versions */
67
76
  ZSTD_error_frameIndex_tooLarge = 100,
68
77
  ZSTD_error_seekableIO = 102,
69
78
  ZSTD_error_maxCode = 120 /* never EVER use this value directly, it can change in future versions! Use ZSTD_isError() instead */
@@ -132,14 +132,15 @@ typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingTy
132
132
 
133
133
  #define Litbits 8
134
134
  #define MaxLit ((1<<Litbits) - 1)
135
- #define MaxML 52
136
- #define MaxLL 35
135
+ #define MaxML 52
136
+ #define MaxLL 35
137
137
  #define DefaultMaxOff 28
138
- #define MaxOff 31
138
+ #define MaxOff 31
139
139
  #define MaxSeq MAX(MaxLL, MaxML) /* Assumption : MaxOff < MaxLL,MaxML */
140
140
  #define MLFSELog 9
141
141
  #define LLFSELog 9
142
142
  #define OffFSELog 8
143
+ #define MaxFSELog MAX(MAX(MLFSELog, LLFSELog), OffFSELog)
143
144
 
144
145
  static const U32 LL_bits[MaxLL+1] = { 0, 0, 0, 0, 0, 0, 0, 0,
145
146
  0, 0, 0, 0, 0, 0, 0, 0,
@@ -228,8 +229,6 @@ typedef struct {
228
229
  BYTE* ofCode;
229
230
  U32 longLengthID; /* 0 == no longLength; 1 == Lit.longLength; 2 == Match.longLength; */
230
231
  U32 longLengthPos;
231
- U32 rep[ZSTD_REP_NUM];
232
- U32 repToConfirm[ZSTD_REP_NUM];
233
232
  } seqStore_t;
234
233
 
235
234
  const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx); /* compress & dictBuilder */
@@ -248,7 +248,7 @@ static size_t FSE_writeNCount_generic (void* header, size_t headerBufferSize,
248
248
  bitCount -= (count<max);
249
249
  previous0 = (count==1);
250
250
  if (remaining<1) return ERROR(GENERIC);
251
- while (remaining<threshold) nbBits--, threshold>>=1;
251
+ while (remaining<threshold) { nbBits--; threshold>>=1; }
252
252
  }
253
253
  if (bitCount>16) {
254
254
  if ((!writeIsSafe) && (out > oend - 2)) return ERROR(dstSize_tooSmall); /* Buffer overflow */
@@ -292,7 +292,7 @@ size_t FSE_writeNCount (void* buffer, size_t bufferSize, const short* normalized
292
292
  It doesn't use any additional memory.
293
293
  But this function is unsafe : it doesn't check that all values within `src` can fit into `count`.
294
294
  For this reason, prefer using a table `count` with 256 elements.
295
- @return : count of most numerous element
295
+ @return : count of most numerous element.
296
296
  */
297
297
  size_t FSE_count_simple(unsigned* count, unsigned* maxSymbolValuePtr,
298
298
  const void* src, size_t srcSize)
@@ -305,7 +305,10 @@ size_t FSE_count_simple(unsigned* count, unsigned* maxSymbolValuePtr,
305
305
  memset(count, 0, (maxSymbolValue+1)*sizeof(*count));
306
306
  if (srcSize==0) { *maxSymbolValuePtr = 0; return 0; }
307
307
 
308
- while (ip<end) count[*ip++]++;
308
+ while (ip<end) {
309
+ assert(*ip <= maxSymbolValue);
310
+ count[*ip++]++;
311
+ }
309
312
 
310
313
  while (!count[maxSymbolValue]) maxSymbolValue--;
311
314
  *maxSymbolValuePtr = maxSymbolValue;
@@ -318,7 +321,8 @@ size_t FSE_count_simple(unsigned* count, unsigned* maxSymbolValuePtr,
318
321
 
319
322
  /* FSE_count_parallel_wksp() :
320
323
  * Same as FSE_count_parallel(), but using an externally provided scratch buffer.
321
- * `workSpace` size must be a minimum of `1024 * sizeof(unsigned)`` */
324
+ * `workSpace` size must be a minimum of `1024 * sizeof(unsigned)`.
325
+ * @return : largest histogram frequency, or an error code (notably when histogram would be larger than *maxSymbolValuePtr). */
322
326
  static size_t FSE_count_parallel_wksp(
323
327
  unsigned* count, unsigned* maxSymbolValuePtr,
324
328
  const void* source, size_t sourceSize,
@@ -333,7 +337,7 @@ static size_t FSE_count_parallel_wksp(
333
337
  U32* const Counting3 = Counting2 + 256;
334
338
  U32* const Counting4 = Counting3 + 256;
335
339
 
336
- memset(Counting1, 0, 4*256*sizeof(unsigned));
340
+ memset(workSpace, 0, 4*256*sizeof(unsigned));
337
341
 
338
342
  /* safety checks */
339
343
  if (!sourceSize) {
@@ -379,7 +383,9 @@ static size_t FSE_count_parallel_wksp(
379
383
  if (Counting1[s]) return ERROR(maxSymbolValue_tooSmall);
380
384
  } }
381
385
 
382
- { U32 s; for (s=0; s<=maxSymbolValue; s++) {
386
+ { U32 s;
387
+ if (maxSymbolValue > 255) maxSymbolValue = 255;
388
+ for (s=0; s<=maxSymbolValue; s++) {
383
389
  count[s] = Counting1[s] + Counting2[s] + Counting3[s] + Counting4[s];
384
390
  if (count[s] > max) max = count[s];
385
391
  } }
@@ -393,9 +399,11 @@ static size_t FSE_count_parallel_wksp(
393
399
  * Same as FSE_countFast(), but using an externally provided scratch buffer.
394
400
  * `workSpace` size must be table of >= `1024` unsigned */
395
401
  size_t FSE_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
396
- const void* source, size_t sourceSize, unsigned* workSpace)
402
+ const void* source, size_t sourceSize,
403
+ unsigned* workSpace)
397
404
  {
398
- if (sourceSize < 1500) return FSE_count_simple(count, maxSymbolValuePtr, source, sourceSize);
405
+ if (sourceSize < 1500) /* heuristic threshold */
406
+ return FSE_count_simple(count, maxSymbolValuePtr, source, sourceSize);
399
407
  return FSE_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, 0, workSpace);
400
408
  }
401
409
 
@@ -540,7 +548,7 @@ static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count,
540
548
  find max, then give all remaining points to max */
541
549
  U32 maxV = 0, maxC = 0;
542
550
  for (s=0; s<=maxSymbolValue; s++)
543
- if (count[s] > maxC) maxV=s, maxC=count[s];
551
+ if (count[s] > maxC) { maxV=s; maxC=count[s]; }
544
552
  norm[maxV] += (short)ToDistribute;
545
553
  return 0;
546
554
  }
@@ -548,7 +556,7 @@ static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count,
548
556
  if (total == 0) {
549
557
  /* all of the symbols were low enough for the lowOne or lowThreshold */
550
558
  for (s=0; ToDistribute > 0; s = (s+1)%(maxSymbolValue+1))
551
- if (norm[s] > 0) ToDistribute--, norm[s]++;
559
+ if (norm[s] > 0) { ToDistribute--; norm[s]++; }
552
560
  return 0;
553
561
  }
554
562
 
@@ -604,7 +612,7 @@ size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog,
604
612
  U64 restToBeat = vStep * rtbTable[proba];
605
613
  proba += (count[s]*step) - ((U64)proba<<scale) > restToBeat;
606
614
  }
607
- if (proba > largestP) largestP=proba, largest=s;
615
+ if (proba > largestP) { largestP=proba; largest=s; }
608
616
  normalizedCounter[s] = proba;
609
617
  stillToDistribute -= proba;
610
618
  } }
@@ -46,6 +46,7 @@
46
46
  #include <string.h> /* memcpy, memset */
47
47
  #include <stdio.h> /* printf (debug) */
48
48
  #include "bitstream.h"
49
+ #include "compiler.h"
49
50
  #define FSE_STATIC_LINKING_ONLY /* FSE_optimalTableLog_internal */
50
51
  #include "fse.h" /* header compression */
51
52
  #define HUF_STATIC_LINKING_ONLY
@@ -322,7 +323,10 @@ static void HUF_sort(nodeElt* huffNode, const U32* count, U32 maxSymbolValue)
322
323
  U32 const c = count[n];
323
324
  U32 const r = BIT_highbit32(c+1) + 1;
324
325
  U32 pos = rank[r].current++;
325
- while ((pos > rank[r].base) && (c > huffNode[pos-1].count)) huffNode[pos]=huffNode[pos-1], pos--;
326
+ while ((pos > rank[r].base) && (c > huffNode[pos-1].count)) {
327
+ huffNode[pos] = huffNode[pos-1];
328
+ pos--;
329
+ }
326
330
  huffNode[pos].count = c;
327
331
  huffNode[pos].byte = (BYTE)n;
328
332
  }
@@ -331,10 +335,10 @@ static void HUF_sort(nodeElt* huffNode, const U32* count, U32 maxSymbolValue)
331
335
 
332
336
  /** HUF_buildCTable_wksp() :
333
337
  * Same as HUF_buildCTable(), but using externally allocated scratch buffer.
334
- * `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as a table of 1024 unsigned.
338
+ * `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as a table of HUF_CTABLE_WORKSPACE_SIZE_U32 unsigned.
335
339
  */
336
340
  #define STARTNODE (HUF_SYMBOLVALUE_MAX+1)
337
- typedef nodeElt huffNodeTable[2*HUF_SYMBOLVALUE_MAX+1 +1];
341
+ typedef nodeElt huffNodeTable[HUF_CTABLE_WORKSPACE_SIZE_U32];
338
342
  size_t HUF_buildCTable_wksp (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize)
339
343
  {
340
344
  nodeElt* const huffNode0 = (nodeElt*)workSpace;
@@ -345,9 +349,10 @@ size_t HUF_buildCTable_wksp (HUF_CElt* tree, const U32* count, U32 maxSymbolValu
345
349
  U32 nodeRoot;
346
350
 
347
351
  /* safety checks */
348
- if (wkspSize < sizeof(huffNodeTable)) return ERROR(GENERIC); /* workSpace is not large enough */
352
+ if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */
353
+ if (wkspSize < sizeof(huffNodeTable)) return ERROR(workSpace_tooSmall);
349
354
  if (maxNbBits == 0) maxNbBits = HUF_TABLELOG_DEFAULT;
350
- if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(GENERIC);
355
+ if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge);
351
356
  memset(huffNode0, 0, sizeof(huffNodeTable));
352
357
 
353
358
  /* sort, decreasing order */
@@ -405,6 +410,7 @@ size_t HUF_buildCTable_wksp (HUF_CElt* tree, const U32* count, U32 maxSymbolValu
405
410
  }
406
411
 
407
412
  /** HUF_buildCTable() :
413
+ * @return : maxNbBits
408
414
  * Note : count is used before tree is written, so they can safely overlap
409
415
  */
410
416
  size_t HUF_buildCTable (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U32 maxNbBits)
@@ -432,13 +438,14 @@ static int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, uns
432
438
  return !bad;
433
439
  }
434
440
 
435
- static void HUF_encodeSymbol(BIT_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable)
441
+ size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); }
442
+
443
+ FORCE_INLINE_TEMPLATE void
444
+ HUF_encodeSymbol(BIT_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable)
436
445
  {
437
446
  BIT_addBitsFast(bitCPtr, CTable[symbol].val, CTable[symbol].nbBits);
438
447
  }
439
448
 
440
- size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); }
441
-
442
449
  #define HUF_FLUSHBITS(s) BIT_flushBits(s)
443
450
 
444
451
  #define HUF_FLUSHBITS_1(stream) \
@@ -447,7 +454,10 @@ size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); }
447
454
  #define HUF_FLUSHBITS_2(stream) \
448
455
  if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*4+7) HUF_FLUSHBITS(stream)
449
456
 
450
- size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
457
+ FORCE_INLINE_TEMPLATE size_t
458
+ HUF_compress1X_usingCTable_internal_body(void* dst, size_t dstSize,
459
+ const void* src, size_t srcSize,
460
+ const HUF_CElt* CTable)
451
461
  {
452
462
  const BYTE* ip = (const BYTE*) src;
453
463
  BYTE* const ostart = (BYTE*)dst;
@@ -491,8 +501,58 @@ size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, si
491
501
  return BIT_closeCStream(&bitC);
492
502
  }
493
503
 
504
+ #if DYNAMIC_BMI2
494
505
 
495
- size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
506
+ static TARGET_ATTRIBUTE("bmi2") size_t
507
+ HUF_compress1X_usingCTable_internal_bmi2(void* dst, size_t dstSize,
508
+ const void* src, size_t srcSize,
509
+ const HUF_CElt* CTable)
510
+ {
511
+ return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable);
512
+ }
513
+
514
+ static size_t
515
+ HUF_compress1X_usingCTable_internal_default(void* dst, size_t dstSize,
516
+ const void* src, size_t srcSize,
517
+ const HUF_CElt* CTable)
518
+ {
519
+ return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable);
520
+ }
521
+
522
+ static size_t
523
+ HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize,
524
+ const void* src, size_t srcSize,
525
+ const HUF_CElt* CTable, const int bmi2)
526
+ {
527
+ if (bmi2) {
528
+ return HUF_compress1X_usingCTable_internal_bmi2(dst, dstSize, src, srcSize, CTable);
529
+ }
530
+ return HUF_compress1X_usingCTable_internal_default(dst, dstSize, src, srcSize, CTable);
531
+ }
532
+
533
+ #else
534
+
535
+ static size_t
536
+ HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize,
537
+ const void* src, size_t srcSize,
538
+ const HUF_CElt* CTable, const int bmi2)
539
+ {
540
+ (void)bmi2;
541
+ return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable);
542
+ }
543
+
544
+ #endif
545
+
546
+ size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
547
+ {
548
+ return HUF_compress1X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0);
549
+ }
550
+
551
+
552
+ static size_t
553
+ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
554
+ const void* src, size_t srcSize,
555
+ const HUF_CElt* CTable, int bmi2)
496
556
  {
497
557
  size_t const segmentSize = (srcSize+3)/4; /* first 3 segments */
498
558
  const BYTE* ip = (const BYTE*) src;
@@ -505,28 +565,31 @@ size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, si
505
565
  if (srcSize < 12) return 0; /* no saving possible : too small input */
506
566
  op += 6; /* jumpTable */
507
567
 
508
- { CHECK_V_F(cSize, HUF_compress1X_usingCTable(op, oend-op, ip, segmentSize, CTable) );
568
+ { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, segmentSize, CTable, bmi2) );
509
569
  if (cSize==0) return 0;
570
+ assert(cSize <= 65535);
510
571
  MEM_writeLE16(ostart, (U16)cSize);
511
572
  op += cSize;
512
573
  }
513
574
 
514
575
  ip += segmentSize;
515
- { CHECK_V_F(cSize, HUF_compress1X_usingCTable(op, oend-op, ip, segmentSize, CTable) );
576
+ { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, segmentSize, CTable, bmi2) );
516
577
  if (cSize==0) return 0;
578
+ assert(cSize <= 65535);
517
579
  MEM_writeLE16(ostart+2, (U16)cSize);
518
580
  op += cSize;
519
581
  }
520
582
 
521
583
  ip += segmentSize;
522
- { CHECK_V_F(cSize, HUF_compress1X_usingCTable(op, oend-op, ip, segmentSize, CTable) );
584
+ { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, segmentSize, CTable, bmi2) );
523
585
  if (cSize==0) return 0;
586
+ assert(cSize <= 65535);
524
587
  MEM_writeLE16(ostart+4, (U16)cSize);
525
588
  op += cSize;
526
589
  }
527
590
 
528
591
  ip += segmentSize;
529
- { CHECK_V_F(cSize, HUF_compress1X_usingCTable(op, oend-op, ip, iend-ip, CTable) );
592
+ { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, iend-ip, CTable, bmi2) );
530
593
  if (cSize==0) return 0;
531
594
  op += cSize;
532
595
  }
@@ -534,15 +597,20 @@ size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, si
534
597
  return op-ostart;
535
598
  }
536
599
 
600
+ size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
601
+ {
602
+ return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0);
603
+ }
604
+
537
605
 
538
606
  static size_t HUF_compressCTable_internal(
539
607
  BYTE* const ostart, BYTE* op, BYTE* const oend,
540
608
  const void* src, size_t srcSize,
541
- unsigned singleStream, const HUF_CElt* CTable)
609
+ unsigned singleStream, const HUF_CElt* CTable, const int bmi2)
542
610
  {
543
611
  size_t const cSize = singleStream ?
544
- HUF_compress1X_usingCTable(op, oend - op, src, srcSize, CTable) :
545
- HUF_compress4X_usingCTable(op, oend - op, src, srcSize, CTable);
612
+ HUF_compress1X_usingCTable_internal(op, oend - op, src, srcSize, CTable, bmi2) :
613
+ HUF_compress4X_usingCTable_internal(op, oend - op, src, srcSize, CTable, bmi2);
546
614
  if (HUF_isError(cSize)) { return cSize; }
547
615
  if (cSize==0) { return 0; } /* uncompressible */
548
616
  op += cSize;
@@ -551,86 +619,98 @@ static size_t HUF_compressCTable_internal(
551
619
  return op-ostart;
552
620
  }
553
621
 
622
+ typedef struct {
623
+ U32 count[HUF_SYMBOLVALUE_MAX + 1];
624
+ HUF_CElt CTable[HUF_SYMBOLVALUE_MAX + 1];
625
+ huffNodeTable nodeTable;
626
+ } HUF_compress_tables_t;
554
627
 
555
- /* `workSpace` must a table of at least 1024 unsigned */
628
+ /* HUF_compress_internal() :
629
+ * `workSpace` must a table of at least HUF_WORKSPACE_SIZE_U32 unsigned */
556
630
  static size_t HUF_compress_internal (
557
631
  void* dst, size_t dstSize,
558
632
  const void* src, size_t srcSize,
559
633
  unsigned maxSymbolValue, unsigned huffLog,
560
634
  unsigned singleStream,
561
635
  void* workSpace, size_t wkspSize,
562
- HUF_CElt* oldHufTable, HUF_repeat* repeat, int preferRepeat)
636
+ HUF_CElt* oldHufTable, HUF_repeat* repeat, int preferRepeat,
637
+ const int bmi2)
563
638
  {
639
+ HUF_compress_tables_t* const table = (HUF_compress_tables_t*)workSpace;
564
640
  BYTE* const ostart = (BYTE*)dst;
565
641
  BYTE* const oend = ostart + dstSize;
566
642
  BYTE* op = ostart;
567
643
 
568
- U32* count;
569
- size_t const countSize = sizeof(U32) * (HUF_SYMBOLVALUE_MAX + 1);
570
- HUF_CElt* CTable;
571
- size_t const CTableSize = sizeof(HUF_CElt) * (HUF_SYMBOLVALUE_MAX + 1);
572
-
573
644
  /* checks & inits */
574
- if (wkspSize < sizeof(huffNodeTable) + countSize + CTableSize) return ERROR(GENERIC);
575
- if (!srcSize) return 0; /* Uncompressed (note : 1 means rle, so first byte must be correct) */
576
- if (!dstSize) return 0; /* cannot fit within dst budget */
645
+ if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */
646
+ if (wkspSize < sizeof(*table)) return ERROR(workSpace_tooSmall);
647
+ if (!srcSize) return 0; /* Uncompressed */
648
+ if (!dstSize) return 0; /* cannot fit anything within dst budget */
577
649
  if (srcSize > HUF_BLOCKSIZE_MAX) return ERROR(srcSize_wrong); /* current block size limit */
578
650
  if (huffLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
651
+ if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge);
579
652
  if (!maxSymbolValue) maxSymbolValue = HUF_SYMBOLVALUE_MAX;
580
653
  if (!huffLog) huffLog = HUF_TABLELOG_DEFAULT;
581
654
 
582
- count = (U32*)workSpace;
583
- workSpace = (BYTE*)workSpace + countSize;
584
- wkspSize -= countSize;
585
- CTable = (HUF_CElt*)workSpace;
586
- workSpace = (BYTE*)workSpace + CTableSize;
587
- wkspSize -= CTableSize;
588
-
589
- /* Heuristic : If we don't need to check the validity of the old table use the old table for small inputs */
655
+ /* Heuristic : If old table is valid, use it for small inputs */
590
656
  if (preferRepeat && repeat && *repeat == HUF_repeat_valid) {
591
- return HUF_compressCTable_internal(ostart, op, oend, src, srcSize, singleStream, oldHufTable);
657
+ return HUF_compressCTable_internal(ostart, op, oend,
658
+ src, srcSize,
659
+ singleStream, oldHufTable, bmi2);
592
660
  }
593
661
 
594
662
  /* Scan input and build symbol stats */
595
- { CHECK_V_F(largest, FSE_count_wksp (count, &maxSymbolValue, (const BYTE*)src, srcSize, (U32*)workSpace) );
663
+ { CHECK_V_F(largest, FSE_count_wksp (table->count, &maxSymbolValue, (const BYTE*)src, srcSize, table->count) );
596
664
  if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; } /* single symbol, rle */
597
- if (largest <= (srcSize >> 7)+1) return 0; /* Fast heuristic : not compressible enough */
665
+ if (largest <= (srcSize >> 7)+1) return 0; /* heuristic : probably not compressible enough */
598
666
  }
599
667
 
600
668
  /* Check validity of previous table */
601
- if (repeat && *repeat == HUF_repeat_check && !HUF_validateCTable(oldHufTable, count, maxSymbolValue)) {
669
+ if ( repeat
670
+ && *repeat == HUF_repeat_check
671
+ && !HUF_validateCTable(oldHufTable, table->count, maxSymbolValue)) {
602
672
  *repeat = HUF_repeat_none;
603
673
  }
604
674
  /* Heuristic : use existing table for small inputs */
605
675
  if (preferRepeat && repeat && *repeat != HUF_repeat_none) {
606
- return HUF_compressCTable_internal(ostart, op, oend, src, srcSize, singleStream, oldHufTable);
676
+ return HUF_compressCTable_internal(ostart, op, oend,
677
+ src, srcSize,
678
+ singleStream, oldHufTable, bmi2);
607
679
  }
608
680
 
609
681
  /* Build Huffman Tree */
610
682
  huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue);
611
- { CHECK_V_F(maxBits, HUF_buildCTable_wksp (CTable, count, maxSymbolValue, huffLog, workSpace, wkspSize) );
683
+ { CHECK_V_F(maxBits, HUF_buildCTable_wksp(table->CTable, table->count,
684
+ maxSymbolValue, huffLog,
685
+ table->nodeTable, sizeof(table->nodeTable)) );
612
686
  huffLog = (U32)maxBits;
613
- /* Zero the unused symbols so we can check it for validity */
614
- memset(CTable + maxSymbolValue + 1, 0, CTableSize - (maxSymbolValue + 1) * sizeof(HUF_CElt));
687
+ /* Zero unused symbols in CTable, so we can check it for validity */
688
+ memset(table->CTable + (maxSymbolValue + 1), 0,
689
+ sizeof(table->CTable) - ((maxSymbolValue + 1) * sizeof(HUF_CElt)));
615
690
  }
616
691
 
617
692
  /* Write table description header */
618
- { CHECK_V_F(hSize, HUF_writeCTable (op, dstSize, CTable, maxSymbolValue, huffLog) );
619
- /* Check if using the previous table will be beneficial */
693
+ { CHECK_V_F(hSize, HUF_writeCTable (op, dstSize, table->CTable, maxSymbolValue, huffLog) );
694
+ /* Check if using previous huffman table is beneficial */
620
695
  if (repeat && *repeat != HUF_repeat_none) {
621
- size_t const oldSize = HUF_estimateCompressedSize(oldHufTable, count, maxSymbolValue);
622
- size_t const newSize = HUF_estimateCompressedSize(CTable, count, maxSymbolValue);
696
+ size_t const oldSize = HUF_estimateCompressedSize(oldHufTable, table->count, maxSymbolValue);
697
+ size_t const newSize = HUF_estimateCompressedSize(table->CTable, table->count, maxSymbolValue);
623
698
  if (oldSize <= hSize + newSize || hSize + 12 >= srcSize) {
624
- return HUF_compressCTable_internal(ostart, op, oend, src, srcSize, singleStream, oldHufTable);
625
- }
626
- }
627
- /* Use the new table */
699
+ return HUF_compressCTable_internal(ostart, op, oend,
700
+ src, srcSize,
701
+ singleStream, oldHufTable, bmi2);
702
+ } }
703
+
704
+ /* Use the new huffman table */
628
705
  if (hSize + 12ul >= srcSize) { return 0; }
629
706
  op += hSize;
630
707
  if (repeat) { *repeat = HUF_repeat_none; }
631
- if (oldHufTable) { memcpy(oldHufTable, CTable, CTableSize); } /* Save the new table */
708
+ if (oldHufTable)
709
+ memcpy(oldHufTable, table->CTable, sizeof(table->CTable)); /* Save new table */
632
710
  }
633
- return HUF_compressCTable_internal(ostart, op, oend, src, srcSize, singleStream, CTable);
711
+ return HUF_compressCTable_internal(ostart, op, oend,
712
+ src, srcSize,
713
+ singleStream, table->CTable, bmi2);
634
714
  }
635
715
 
636
716
 
@@ -639,52 +719,70 @@ size_t HUF_compress1X_wksp (void* dst, size_t dstSize,
639
719
  unsigned maxSymbolValue, unsigned huffLog,
640
720
  void* workSpace, size_t wkspSize)
641
721
  {
642
- return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 1 /* single stream */, workSpace, wkspSize, NULL, NULL, 0);
722
+ return HUF_compress_internal(dst, dstSize, src, srcSize,
723
+ maxSymbolValue, huffLog, 1 /*single stream*/,
724
+ workSpace, wkspSize,
725
+ NULL, NULL, 0, 0 /*bmi2*/);
643
726
  }
644
727
 
645
728
  size_t HUF_compress1X_repeat (void* dst, size_t dstSize,
646
729
  const void* src, size_t srcSize,
647
730
  unsigned maxSymbolValue, unsigned huffLog,
648
731
  void* workSpace, size_t wkspSize,
649
- HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat)
732
+ HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2)
650
733
  {
651
- return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 1 /* single stream */, workSpace, wkspSize, hufTable, repeat, preferRepeat);
734
+ return HUF_compress_internal(dst, dstSize, src, srcSize,
735
+ maxSymbolValue, huffLog, 1 /*single stream*/,
736
+ workSpace, wkspSize, hufTable,
737
+ repeat, preferRepeat, bmi2);
652
738
  }
653
739
 
654
740
  size_t HUF_compress1X (void* dst, size_t dstSize,
655
741
  const void* src, size_t srcSize,
656
742
  unsigned maxSymbolValue, unsigned huffLog)
657
743
  {
658
- unsigned workSpace[1024];
744
+ unsigned workSpace[HUF_WORKSPACE_SIZE_U32];
659
745
  return HUF_compress1X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace));
660
746
  }
661
747
 
748
+ /* HUF_compress4X_repeat():
749
+ * compress input using 4 streams.
750
+ * provide workspace to generate compression tables */
662
751
  size_t HUF_compress4X_wksp (void* dst, size_t dstSize,
663
752
  const void* src, size_t srcSize,
664
753
  unsigned maxSymbolValue, unsigned huffLog,
665
754
  void* workSpace, size_t wkspSize)
666
755
  {
667
- return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 0 /* 4 streams */, workSpace, wkspSize, NULL, NULL, 0);
756
+ return HUF_compress_internal(dst, dstSize, src, srcSize,
757
+ maxSymbolValue, huffLog, 0 /*4 streams*/,
758
+ workSpace, wkspSize,
759
+ NULL, NULL, 0, 0 /*bmi2*/);
668
760
  }
669
761
 
762
+ /* HUF_compress4X_repeat():
763
+ * compress input using 4 streams.
764
+ * re-use an existing huffman compression table */
670
765
  size_t HUF_compress4X_repeat (void* dst, size_t dstSize,
671
766
  const void* src, size_t srcSize,
672
767
  unsigned maxSymbolValue, unsigned huffLog,
673
768
  void* workSpace, size_t wkspSize,
674
- HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat)
769
+ HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2)
675
770
  {
676
- return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 0 /* 4 streams */, workSpace, wkspSize, hufTable, repeat, preferRepeat);
771
+ return HUF_compress_internal(dst, dstSize, src, srcSize,
772
+ maxSymbolValue, huffLog, 0 /* 4 streams */,
773
+ workSpace, wkspSize,
774
+ hufTable, repeat, preferRepeat, bmi2);
677
775
  }
678
776
 
679
777
  size_t HUF_compress2 (void* dst, size_t dstSize,
680
778
  const void* src, size_t srcSize,
681
779
  unsigned maxSymbolValue, unsigned huffLog)
682
780
  {
683
- unsigned workSpace[1024];
781
+ unsigned workSpace[HUF_WORKSPACE_SIZE_U32];
684
782
  return HUF_compress4X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace));
685
783
  }
686
784
 
687
785
  size_t HUF_compress (void* dst, size_t maxDstSize, const void* src, size_t srcSize)
688
786
  {
689
- return HUF_compress2(dst, maxDstSize, src, (U32)srcSize, 255, HUF_TABLELOG_DEFAULT);
787
+ return HUF_compress2(dst, maxDstSize, src, srcSize, 255, HUF_TABLELOG_DEFAULT);
690
788
  }