extzstd 0.1 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. checksums.yaml +4 -4
  2. data/HISTORY.ja +5 -0
  3. data/README.md +5 -5
  4. data/contrib/zstd/CONTRIBUTING.md +42 -0
  5. data/contrib/zstd/LICENSE-examples +11 -0
  6. data/contrib/zstd/Makefile +315 -0
  7. data/contrib/zstd/NEWS +261 -0
  8. data/contrib/zstd/PATENTS +33 -0
  9. data/contrib/zstd/README.md +121 -41
  10. data/contrib/zstd/TESTING.md +44 -0
  11. data/contrib/zstd/appveyor.yml +178 -0
  12. data/contrib/zstd/circle.yml +75 -0
  13. data/contrib/zstd/lib/BUCK +186 -0
  14. data/contrib/zstd/lib/Makefile +163 -0
  15. data/contrib/zstd/lib/README.md +77 -0
  16. data/contrib/zstd/{common → lib/common}/bitstream.h +7 -4
  17. data/contrib/zstd/{common → lib/common}/entropy_common.c +19 -23
  18. data/contrib/zstd/{common → lib/common}/error_private.c +0 -0
  19. data/contrib/zstd/{common → lib/common}/error_private.h +0 -0
  20. data/contrib/zstd/{common → lib/common}/fse.h +94 -34
  21. data/contrib/zstd/{common → lib/common}/fse_decompress.c +18 -19
  22. data/contrib/zstd/{common → lib/common}/huf.h +52 -20
  23. data/contrib/zstd/{common → lib/common}/mem.h +17 -13
  24. data/contrib/zstd/lib/common/pool.c +194 -0
  25. data/contrib/zstd/lib/common/pool.h +56 -0
  26. data/contrib/zstd/lib/common/threading.c +80 -0
  27. data/contrib/zstd/lib/common/threading.h +104 -0
  28. data/contrib/zstd/{common → lib/common}/xxhash.c +3 -1
  29. data/contrib/zstd/{common → lib/common}/xxhash.h +11 -15
  30. data/contrib/zstd/{common → lib/common}/zstd_common.c +1 -11
  31. data/contrib/zstd/{common → lib/common}/zstd_errors.h +16 -2
  32. data/contrib/zstd/{common → lib/common}/zstd_internal.h +17 -1
  33. data/contrib/zstd/{compress → lib/compress}/fse_compress.c +138 -91
  34. data/contrib/zstd/{compress → lib/compress}/huf_compress.c +218 -67
  35. data/contrib/zstd/{compress → lib/compress}/zstd_compress.c +231 -108
  36. data/contrib/zstd/{compress → lib/compress}/zstd_opt.h +44 -25
  37. data/contrib/zstd/lib/compress/zstdmt_compress.c +739 -0
  38. data/contrib/zstd/lib/compress/zstdmt_compress.h +78 -0
  39. data/contrib/zstd/{decompress → lib/decompress}/huf_decompress.c +28 -23
  40. data/contrib/zstd/{decompress → lib/decompress}/zstd_decompress.c +814 -176
  41. data/contrib/zstd/{common → lib/deprecated}/zbuff.h +60 -39
  42. data/contrib/zstd/lib/deprecated/zbuff_common.c +26 -0
  43. data/contrib/zstd/lib/deprecated/zbuff_compress.c +145 -0
  44. data/contrib/zstd/lib/deprecated/zbuff_decompress.c +74 -0
  45. data/contrib/zstd/lib/dictBuilder/cover.c +1029 -0
  46. data/contrib/zstd/{dictBuilder → lib/dictBuilder}/divsufsort.c +0 -0
  47. data/contrib/zstd/{dictBuilder → lib/dictBuilder}/divsufsort.h +0 -0
  48. data/contrib/zstd/{dictBuilder → lib/dictBuilder}/zdict.c +68 -18
  49. data/contrib/zstd/lib/dictBuilder/zdict.h +201 -0
  50. data/contrib/zstd/{legacy → lib/legacy}/zstd_legacy.h +122 -7
  51. data/contrib/zstd/{legacy → lib/legacy}/zstd_v01.c +34 -3
  52. data/contrib/zstd/{legacy → lib/legacy}/zstd_v01.h +8 -0
  53. data/contrib/zstd/{legacy → lib/legacy}/zstd_v02.c +45 -12
  54. data/contrib/zstd/{legacy → lib/legacy}/zstd_v02.h +8 -0
  55. data/contrib/zstd/{legacy → lib/legacy}/zstd_v03.c +45 -12
  56. data/contrib/zstd/{legacy → lib/legacy}/zstd_v03.h +8 -0
  57. data/contrib/zstd/{legacy → lib/legacy}/zstd_v04.c +56 -33
  58. data/contrib/zstd/{legacy → lib/legacy}/zstd_v04.h +8 -0
  59. data/contrib/zstd/{legacy → lib/legacy}/zstd_v05.c +45 -18
  60. data/contrib/zstd/{legacy → lib/legacy}/zstd_v05.h +7 -0
  61. data/contrib/zstd/{legacy → lib/legacy}/zstd_v06.c +43 -16
  62. data/contrib/zstd/{legacy → lib/legacy}/zstd_v06.h +7 -0
  63. data/contrib/zstd/{legacy → lib/legacy}/zstd_v07.c +57 -23
  64. data/contrib/zstd/{legacy → lib/legacy}/zstd_v07.h +8 -0
  65. data/contrib/zstd/lib/libzstd.pc.in +14 -0
  66. data/contrib/zstd/{zstd.h → lib/zstd.h} +206 -71
  67. data/ext/depend +2 -0
  68. data/ext/extconf.rb +4 -4
  69. data/ext/extzstd.c +1 -1
  70. data/ext/zstd_common.c +5 -5
  71. data/ext/zstd_compress.c +3 -3
  72. data/ext/zstd_decompress.c +2 -2
  73. data/ext/zstd_dictbuilder.c +2 -2
  74. data/ext/zstd_legacy_v01.c +1 -1
  75. data/ext/zstd_legacy_v02.c +1 -1
  76. data/ext/zstd_legacy_v03.c +1 -1
  77. data/ext/zstd_legacy_v04.c +1 -1
  78. data/ext/zstd_legacy_v05.c +1 -1
  79. data/ext/zstd_legacy_v06.c +1 -1
  80. data/ext/zstd_legacy_v07.c +1 -1
  81. data/gemstub.rb +9 -5
  82. data/lib/extzstd/version.rb +1 -1
  83. metadata +73 -51
  84. data/contrib/zstd/compress/zbuff_compress.c +0 -319
  85. data/contrib/zstd/decompress/zbuff_decompress.c +0 -252
  86. data/contrib/zstd/dictBuilder/zdict.h +0 -111
@@ -15,8 +15,9 @@
15
15
  #define ZSTD_OPT_H_91842398743
16
16
 
17
17
 
18
- #define ZSTD_FREQ_DIV 5
19
- #define ZSTD_MAX_PRICE (1<<30)
18
+ #define ZSTD_LITFREQ_ADD 2
19
+ #define ZSTD_FREQ_DIV 4
20
+ #define ZSTD_MAX_PRICE (1<<30)
20
21
 
21
22
  /*-*************************************
22
23
  * Price functions for optimal parser
@@ -31,22 +32,32 @@ FORCE_INLINE void ZSTD_setLog2Prices(seqStore_t* ssPtr)
31
32
  }
32
33
 
33
34
 
34
- MEM_STATIC void ZSTD_rescaleFreqs(seqStore_t* ssPtr)
35
+ MEM_STATIC void ZSTD_rescaleFreqs(seqStore_t* ssPtr, const BYTE* src, size_t srcSize)
35
36
  {
36
37
  unsigned u;
37
38
 
38
39
  ssPtr->cachedLiterals = NULL;
39
40
  ssPtr->cachedPrice = ssPtr->cachedLitLength = 0;
41
+ ssPtr->staticPrices = 0;
40
42
 
41
43
  if (ssPtr->litLengthSum == 0) {
42
- ssPtr->litSum = (2<<Litbits);
44
+ if (srcSize <= 1024) ssPtr->staticPrices = 1;
45
+
46
+ for (u=0; u<=MaxLit; u++)
47
+ ssPtr->litFreq[u] = 0;
48
+ for (u=0; u<srcSize; u++)
49
+ ssPtr->litFreq[src[u]]++;
50
+
51
+ ssPtr->litSum = 0;
43
52
  ssPtr->litLengthSum = MaxLL+1;
44
53
  ssPtr->matchLengthSum = MaxML+1;
45
54
  ssPtr->offCodeSum = (MaxOff+1);
46
- ssPtr->matchSum = (2<<Litbits);
55
+ ssPtr->matchSum = (ZSTD_LITFREQ_ADD<<Litbits);
47
56
 
48
- for (u=0; u<=MaxLit; u++)
49
- ssPtr->litFreq[u] = 2;
57
+ for (u=0; u<=MaxLit; u++) {
58
+ ssPtr->litFreq[u] = 1 + (ssPtr->litFreq[u]>>ZSTD_FREQ_DIV);
59
+ ssPtr->litSum += ssPtr->litFreq[u];
60
+ }
50
61
  for (u=0; u<=MaxLL; u++)
51
62
  ssPtr->litLengthFreq[u] = 1;
52
63
  for (u=0; u<=MaxML; u++)
@@ -61,11 +72,11 @@ MEM_STATIC void ZSTD_rescaleFreqs(seqStore_t* ssPtr)
61
72
  ssPtr->litSum = 0;
62
73
 
63
74
  for (u=0; u<=MaxLit; u++) {
64
- ssPtr->litFreq[u] = 1 + (ssPtr->litFreq[u]>>ZSTD_FREQ_DIV);
75
+ ssPtr->litFreq[u] = 1 + (ssPtr->litFreq[u]>>(ZSTD_FREQ_DIV+1));
65
76
  ssPtr->litSum += ssPtr->litFreq[u];
66
77
  }
67
78
  for (u=0; u<=MaxLL; u++) {
68
- ssPtr->litLengthFreq[u] = 1 + (ssPtr->litLengthFreq[u]>>ZSTD_FREQ_DIV);
79
+ ssPtr->litLengthFreq[u] = 1 + (ssPtr->litLengthFreq[u]>>(ZSTD_FREQ_DIV+1));
69
80
  ssPtr->litLengthSum += ssPtr->litLengthFreq[u];
70
81
  }
71
82
  for (u=0; u<=MaxML; u++) {
@@ -73,6 +84,7 @@ MEM_STATIC void ZSTD_rescaleFreqs(seqStore_t* ssPtr)
73
84
  ssPtr->matchLengthSum += ssPtr->matchLengthFreq[u];
74
85
  ssPtr->matchSum += ssPtr->matchLengthFreq[u] * (u + 3);
75
86
  }
87
+ ssPtr->matchSum *= ZSTD_LITFREQ_ADD;
76
88
  for (u=0; u<=MaxOff; u++) {
77
89
  ssPtr->offCodeFreq[u] = 1 + (ssPtr->offCodeFreq[u]>>ZSTD_FREQ_DIV);
78
90
  ssPtr->offCodeSum += ssPtr->offCodeFreq[u];
@@ -87,6 +99,9 @@ FORCE_INLINE U32 ZSTD_getLiteralPrice(seqStore_t* ssPtr, U32 litLength, const BY
87
99
  {
88
100
  U32 price, u;
89
101
 
102
+ if (ssPtr->staticPrices)
103
+ return ZSTD_highbit32((U32)litLength+1) + (litLength*6);
104
+
90
105
  if (litLength == 0)
91
106
  return ssPtr->log2litLengthSum - ZSTD_highbit32(ssPtr->litLengthFreq[0]+1);
92
107
 
@@ -124,9 +139,13 @@ FORCE_INLINE U32 ZSTD_getLiteralPrice(seqStore_t* ssPtr, U32 litLength, const BY
124
139
  FORCE_INLINE U32 ZSTD_getPrice(seqStore_t* seqStorePtr, U32 litLength, const BYTE* literals, U32 offset, U32 matchLength, const int ultra)
125
140
  {
126
141
  /* offset */
142
+ U32 price;
127
143
  BYTE const offCode = (BYTE)ZSTD_highbit32(offset+1);
128
- U32 price = offCode + seqStorePtr->log2offCodeSum - ZSTD_highbit32(seqStorePtr->offCodeFreq[offCode]+1);
129
144
 
145
+ if (seqStorePtr->staticPrices)
146
+ return ZSTD_getLiteralPrice(seqStorePtr, litLength, literals) + ZSTD_highbit32((U32)matchLength+1) + 16 + offCode;
147
+
148
+ price = offCode + seqStorePtr->log2offCodeSum - ZSTD_highbit32(seqStorePtr->offCodeFreq[offCode]+1);
130
149
  if (!ultra && offCode >= 20) price += (offCode-19)*2;
131
150
 
132
151
  /* match Length */
@@ -144,9 +163,9 @@ MEM_STATIC void ZSTD_updatePrice(seqStore_t* seqStorePtr, U32 litLength, const B
144
163
  U32 u;
145
164
 
146
165
  /* literals */
147
- seqStorePtr->litSum += litLength;
166
+ seqStorePtr->litSum += litLength*ZSTD_LITFREQ_ADD;
148
167
  for (u=0; u < litLength; u++)
149
- seqStorePtr->litFreq[literals[u]]++;
168
+ seqStorePtr->litFreq[literals[u]] += ZSTD_LITFREQ_ADD;
150
169
 
151
170
  /* literal Length */
152
171
  { const BYTE LL_deltaCode = 19;
@@ -184,7 +203,7 @@ MEM_STATIC void ZSTD_updatePrice(seqStore_t* seqStorePtr, U32 litLength, const B
184
203
 
185
204
 
186
205
  /* Update hashTable3 up to ip (excluded)
187
- Assumption : always within prefix (ie. not within extDict) */
206
+ Assumption : always within prefix (i.e. not within extDict) */
188
207
  FORCE_INLINE
189
208
  U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_CCtx* zc, const BYTE* ip)
190
209
  {
@@ -401,7 +420,7 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx,
401
420
 
402
421
  /* init */
403
422
  ctx->nextToUpdate3 = ctx->nextToUpdate;
404
- ZSTD_rescaleFreqs(seqStorePtr);
423
+ ZSTD_rescaleFreqs(seqStorePtr, (const BYTE*)src, srcSize);
405
424
  ip += (ip==prefixStart);
406
425
  { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) rep[i]=ctx->rep[i]; }
407
426
 
@@ -416,7 +435,7 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx,
416
435
  /* check repCode */
417
436
  { U32 i, last_i = ZSTD_REP_CHECK + (ip==anchor);
418
437
  for (i=(ip == anchor); i<last_i; i++) {
419
- const S32 repCur = ((i==ZSTD_REP_MOVE_OPT) && (ip==anchor)) ? (rep[0] - 1) : rep[i];
438
+ const S32 repCur = (i==ZSTD_REP_MOVE_OPT) ? (rep[0] - 1) : rep[i];
420
439
  if ( (repCur > 0) && (repCur < (S32)(ip-prefixStart))
421
440
  && (MEM_readMINMATCH(ip, minMatch) == MEM_readMINMATCH(ip - repCur, minMatch))) {
422
441
  mlen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-repCur, iend) + minMatch;
@@ -501,7 +520,7 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx,
501
520
  best_mlen = minMatch;
502
521
  { U32 i, last_i = ZSTD_REP_CHECK + (mlen != 1);
503
522
  for (i=(opt[cur].mlen != 1); i<last_i; i++) { /* check rep */
504
- const S32 repCur = ((i==ZSTD_REP_MOVE_OPT) && (opt[cur].mlen != 1)) ? (opt[cur].rep[0] - 1) : opt[cur].rep[i];
523
+ const S32 repCur = (i==ZSTD_REP_MOVE_OPT) ? (opt[cur].rep[0] - 1) : opt[cur].rep[i];
505
524
  if ( (repCur > 0) && (repCur < (S32)(inr-prefixStart))
506
525
  && (MEM_readMINMATCH(inr, minMatch) == MEM_readMINMATCH(inr - repCur, minMatch))) {
507
526
  mlen = (U32)ZSTD_count(inr+minMatch, inr+minMatch - repCur, iend) + minMatch;
@@ -601,7 +620,7 @@ _storeSequence: /* cur, last_pos, best_mlen, best_off have to be set */
601
620
  offset--;
602
621
  } else {
603
622
  if (offset != 0) {
604
- best_off = ((offset==ZSTD_REP_MOVE_OPT) && (litLength==0)) ? (rep[0] - 1) : (rep[offset]);
623
+ best_off = (offset==ZSTD_REP_MOVE_OPT) ? (rep[0] - 1) : (rep[offset]);
605
624
  if (offset != 1) rep[2] = rep[1];
606
625
  rep[1] = rep[0];
607
626
  rep[0] = best_off;
@@ -615,7 +634,7 @@ _storeSequence: /* cur, last_pos, best_mlen, best_off have to be set */
615
634
  } } /* for (cur=0; cur < last_pos; ) */
616
635
 
617
636
  /* Save reps for next block */
618
- { int i; for (i=0; i<ZSTD_REP_NUM; i++) ctx->savedRep[i] = rep[i]; }
637
+ { int i; for (i=0; i<ZSTD_REP_NUM; i++) ctx->repToConfirm[i] = rep[i]; }
619
638
 
620
639
  /* Last Literals */
621
640
  { size_t const lastLLSize = iend - anchor;
@@ -656,7 +675,7 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx,
656
675
  { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) rep[i]=ctx->rep[i]; }
657
676
 
658
677
  ctx->nextToUpdate3 = ctx->nextToUpdate;
659
- ZSTD_rescaleFreqs(seqStorePtr);
678
+ ZSTD_rescaleFreqs(seqStorePtr, (const BYTE*)src, srcSize);
660
679
  ip += (ip==prefixStart);
661
680
 
662
681
  /* Match Loop */
@@ -671,7 +690,7 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx,
671
690
  /* check repCode */
672
691
  { U32 i, last_i = ZSTD_REP_CHECK + (ip==anchor);
673
692
  for (i = (ip==anchor); i<last_i; i++) {
674
- const S32 repCur = ((i==ZSTD_REP_MOVE_OPT) && (ip==anchor)) ? (rep[0] - 1) : rep[i];
693
+ const S32 repCur = (i==ZSTD_REP_MOVE_OPT) ? (rep[0] - 1) : rep[i];
675
694
  const U32 repIndex = (U32)(current - repCur);
676
695
  const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
677
696
  const BYTE* const repMatch = repBase + repIndex;
@@ -767,7 +786,7 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx,
767
786
  best_mlen = minMatch;
768
787
  { U32 i, last_i = ZSTD_REP_CHECK + (mlen != 1);
769
788
  for (i = (mlen != 1); i<last_i; i++) {
770
- const S32 repCur = ((i==ZSTD_REP_MOVE_OPT) && (opt[cur].mlen != 1)) ? (opt[cur].rep[0] - 1) : opt[cur].rep[i];
789
+ const S32 repCur = (i==ZSTD_REP_MOVE_OPT) ? (opt[cur].rep[0] - 1) : opt[cur].rep[i];
771
790
  const U32 repIndex = (U32)(current+cur - repCur);
772
791
  const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
773
792
  const BYTE* const repMatch = repBase + repIndex;
@@ -806,7 +825,7 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx,
806
825
 
807
826
  match_num = ZSTD_BtGetAllMatches_selectMLS_extDict(ctx, inr, iend, maxSearches, mls, matches, minMatch);
808
827
 
809
- if (match_num > 0 && matches[match_num-1].len > sufficient_len) {
828
+ if (match_num > 0 && (matches[match_num-1].len > sufficient_len || cur + matches[match_num-1].len >= ZSTD_OPT_NUM)) {
810
829
  best_mlen = matches[match_num-1].len;
811
830
  best_off = matches[match_num-1].off;
812
831
  last_pos = cur + 1;
@@ -816,7 +835,7 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx,
816
835
  /* set prices using matches at position = cur */
817
836
  for (u = 0; u < match_num; u++) {
818
837
  mlen = (u>0) ? matches[u-1].len+1 : best_mlen;
819
- best_mlen = (cur + matches[u].len < ZSTD_OPT_NUM) ? matches[u].len : ZSTD_OPT_NUM - cur;
838
+ best_mlen = matches[u].len;
820
839
 
821
840
  while (mlen <= best_mlen) {
822
841
  if (opt[cur].mlen == 1) {
@@ -873,7 +892,7 @@ _storeSequence: /* cur, last_pos, best_mlen, best_off have to be set */
873
892
  offset--;
874
893
  } else {
875
894
  if (offset != 0) {
876
- best_off = ((offset==ZSTD_REP_MOVE_OPT) && (litLength==0)) ? (rep[0] - 1) : (rep[offset]);
895
+ best_off = (offset==ZSTD_REP_MOVE_OPT) ? (rep[0] - 1) : (rep[offset]);
877
896
  if (offset != 1) rep[2] = rep[1];
878
897
  rep[1] = rep[0];
879
898
  rep[0] = best_off;
@@ -888,7 +907,7 @@ _storeSequence: /* cur, last_pos, best_mlen, best_off have to be set */
888
907
  } } /* for (cur=0; cur < last_pos; ) */
889
908
 
890
909
  /* Save reps for next block */
891
- { int i; for (i=0; i<ZSTD_REP_NUM; i++) ctx->savedRep[i] = rep[i]; }
910
+ { int i; for (i=0; i<ZSTD_REP_NUM; i++) ctx->repToConfirm[i] = rep[i]; }
892
911
 
893
912
  /* Last Literals */
894
913
  { size_t lastLLSize = iend - anchor;