extzstd 0.1 → 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (86) hide show
  1. checksums.yaml +4 -4
  2. data/HISTORY.ja +5 -0
  3. data/README.md +5 -5
  4. data/contrib/zstd/CONTRIBUTING.md +42 -0
  5. data/contrib/zstd/LICENSE-examples +11 -0
  6. data/contrib/zstd/Makefile +315 -0
  7. data/contrib/zstd/NEWS +261 -0
  8. data/contrib/zstd/PATENTS +33 -0
  9. data/contrib/zstd/README.md +121 -41
  10. data/contrib/zstd/TESTING.md +44 -0
  11. data/contrib/zstd/appveyor.yml +178 -0
  12. data/contrib/zstd/circle.yml +75 -0
  13. data/contrib/zstd/lib/BUCK +186 -0
  14. data/contrib/zstd/lib/Makefile +163 -0
  15. data/contrib/zstd/lib/README.md +77 -0
  16. data/contrib/zstd/{common → lib/common}/bitstream.h +7 -4
  17. data/contrib/zstd/{common → lib/common}/entropy_common.c +19 -23
  18. data/contrib/zstd/{common → lib/common}/error_private.c +0 -0
  19. data/contrib/zstd/{common → lib/common}/error_private.h +0 -0
  20. data/contrib/zstd/{common → lib/common}/fse.h +94 -34
  21. data/contrib/zstd/{common → lib/common}/fse_decompress.c +18 -19
  22. data/contrib/zstd/{common → lib/common}/huf.h +52 -20
  23. data/contrib/zstd/{common → lib/common}/mem.h +17 -13
  24. data/contrib/zstd/lib/common/pool.c +194 -0
  25. data/contrib/zstd/lib/common/pool.h +56 -0
  26. data/contrib/zstd/lib/common/threading.c +80 -0
  27. data/contrib/zstd/lib/common/threading.h +104 -0
  28. data/contrib/zstd/{common → lib/common}/xxhash.c +3 -1
  29. data/contrib/zstd/{common → lib/common}/xxhash.h +11 -15
  30. data/contrib/zstd/{common → lib/common}/zstd_common.c +1 -11
  31. data/contrib/zstd/{common → lib/common}/zstd_errors.h +16 -2
  32. data/contrib/zstd/{common → lib/common}/zstd_internal.h +17 -1
  33. data/contrib/zstd/{compress → lib/compress}/fse_compress.c +138 -91
  34. data/contrib/zstd/{compress → lib/compress}/huf_compress.c +218 -67
  35. data/contrib/zstd/{compress → lib/compress}/zstd_compress.c +231 -108
  36. data/contrib/zstd/{compress → lib/compress}/zstd_opt.h +44 -25
  37. data/contrib/zstd/lib/compress/zstdmt_compress.c +739 -0
  38. data/contrib/zstd/lib/compress/zstdmt_compress.h +78 -0
  39. data/contrib/zstd/{decompress → lib/decompress}/huf_decompress.c +28 -23
  40. data/contrib/zstd/{decompress → lib/decompress}/zstd_decompress.c +814 -176
  41. data/contrib/zstd/{common → lib/deprecated}/zbuff.h +60 -39
  42. data/contrib/zstd/lib/deprecated/zbuff_common.c +26 -0
  43. data/contrib/zstd/lib/deprecated/zbuff_compress.c +145 -0
  44. data/contrib/zstd/lib/deprecated/zbuff_decompress.c +74 -0
  45. data/contrib/zstd/lib/dictBuilder/cover.c +1029 -0
  46. data/contrib/zstd/{dictBuilder → lib/dictBuilder}/divsufsort.c +0 -0
  47. data/contrib/zstd/{dictBuilder → lib/dictBuilder}/divsufsort.h +0 -0
  48. data/contrib/zstd/{dictBuilder → lib/dictBuilder}/zdict.c +68 -18
  49. data/contrib/zstd/lib/dictBuilder/zdict.h +201 -0
  50. data/contrib/zstd/{legacy → lib/legacy}/zstd_legacy.h +122 -7
  51. data/contrib/zstd/{legacy → lib/legacy}/zstd_v01.c +34 -3
  52. data/contrib/zstd/{legacy → lib/legacy}/zstd_v01.h +8 -0
  53. data/contrib/zstd/{legacy → lib/legacy}/zstd_v02.c +45 -12
  54. data/contrib/zstd/{legacy → lib/legacy}/zstd_v02.h +8 -0
  55. data/contrib/zstd/{legacy → lib/legacy}/zstd_v03.c +45 -12
  56. data/contrib/zstd/{legacy → lib/legacy}/zstd_v03.h +8 -0
  57. data/contrib/zstd/{legacy → lib/legacy}/zstd_v04.c +56 -33
  58. data/contrib/zstd/{legacy → lib/legacy}/zstd_v04.h +8 -0
  59. data/contrib/zstd/{legacy → lib/legacy}/zstd_v05.c +45 -18
  60. data/contrib/zstd/{legacy → lib/legacy}/zstd_v05.h +7 -0
  61. data/contrib/zstd/{legacy → lib/legacy}/zstd_v06.c +43 -16
  62. data/contrib/zstd/{legacy → lib/legacy}/zstd_v06.h +7 -0
  63. data/contrib/zstd/{legacy → lib/legacy}/zstd_v07.c +57 -23
  64. data/contrib/zstd/{legacy → lib/legacy}/zstd_v07.h +8 -0
  65. data/contrib/zstd/lib/libzstd.pc.in +14 -0
  66. data/contrib/zstd/{zstd.h → lib/zstd.h} +206 -71
  67. data/ext/depend +2 -0
  68. data/ext/extconf.rb +4 -4
  69. data/ext/extzstd.c +1 -1
  70. data/ext/zstd_common.c +5 -5
  71. data/ext/zstd_compress.c +3 -3
  72. data/ext/zstd_decompress.c +2 -2
  73. data/ext/zstd_dictbuilder.c +2 -2
  74. data/ext/zstd_legacy_v01.c +1 -1
  75. data/ext/zstd_legacy_v02.c +1 -1
  76. data/ext/zstd_legacy_v03.c +1 -1
  77. data/ext/zstd_legacy_v04.c +1 -1
  78. data/ext/zstd_legacy_v05.c +1 -1
  79. data/ext/zstd_legacy_v06.c +1 -1
  80. data/ext/zstd_legacy_v07.c +1 -1
  81. data/gemstub.rb +9 -5
  82. data/lib/extzstd/version.rb +1 -1
  83. metadata +73 -51
  84. data/contrib/zstd/compress/zbuff_compress.c +0 -319
  85. data/contrib/zstd/decompress/zbuff_decompress.c +0 -252
  86. data/contrib/zstd/dictBuilder/zdict.h +0 -111
@@ -15,8 +15,9 @@
15
15
  #define ZSTD_OPT_H_91842398743
16
16
 
17
17
 
18
- #define ZSTD_FREQ_DIV 5
19
- #define ZSTD_MAX_PRICE (1<<30)
18
+ #define ZSTD_LITFREQ_ADD 2
19
+ #define ZSTD_FREQ_DIV 4
20
+ #define ZSTD_MAX_PRICE (1<<30)
20
21
 
21
22
  /*-*************************************
22
23
  * Price functions for optimal parser
@@ -31,22 +32,32 @@ FORCE_INLINE void ZSTD_setLog2Prices(seqStore_t* ssPtr)
31
32
  }
32
33
 
33
34
 
34
- MEM_STATIC void ZSTD_rescaleFreqs(seqStore_t* ssPtr)
35
+ MEM_STATIC void ZSTD_rescaleFreqs(seqStore_t* ssPtr, const BYTE* src, size_t srcSize)
35
36
  {
36
37
  unsigned u;
37
38
 
38
39
  ssPtr->cachedLiterals = NULL;
39
40
  ssPtr->cachedPrice = ssPtr->cachedLitLength = 0;
41
+ ssPtr->staticPrices = 0;
40
42
 
41
43
  if (ssPtr->litLengthSum == 0) {
42
- ssPtr->litSum = (2<<Litbits);
44
+ if (srcSize <= 1024) ssPtr->staticPrices = 1;
45
+
46
+ for (u=0; u<=MaxLit; u++)
47
+ ssPtr->litFreq[u] = 0;
48
+ for (u=0; u<srcSize; u++)
49
+ ssPtr->litFreq[src[u]]++;
50
+
51
+ ssPtr->litSum = 0;
43
52
  ssPtr->litLengthSum = MaxLL+1;
44
53
  ssPtr->matchLengthSum = MaxML+1;
45
54
  ssPtr->offCodeSum = (MaxOff+1);
46
- ssPtr->matchSum = (2<<Litbits);
55
+ ssPtr->matchSum = (ZSTD_LITFREQ_ADD<<Litbits);
47
56
 
48
- for (u=0; u<=MaxLit; u++)
49
- ssPtr->litFreq[u] = 2;
57
+ for (u=0; u<=MaxLit; u++) {
58
+ ssPtr->litFreq[u] = 1 + (ssPtr->litFreq[u]>>ZSTD_FREQ_DIV);
59
+ ssPtr->litSum += ssPtr->litFreq[u];
60
+ }
50
61
  for (u=0; u<=MaxLL; u++)
51
62
  ssPtr->litLengthFreq[u] = 1;
52
63
  for (u=0; u<=MaxML; u++)
@@ -61,11 +72,11 @@ MEM_STATIC void ZSTD_rescaleFreqs(seqStore_t* ssPtr)
61
72
  ssPtr->litSum = 0;
62
73
 
63
74
  for (u=0; u<=MaxLit; u++) {
64
- ssPtr->litFreq[u] = 1 + (ssPtr->litFreq[u]>>ZSTD_FREQ_DIV);
75
+ ssPtr->litFreq[u] = 1 + (ssPtr->litFreq[u]>>(ZSTD_FREQ_DIV+1));
65
76
  ssPtr->litSum += ssPtr->litFreq[u];
66
77
  }
67
78
  for (u=0; u<=MaxLL; u++) {
68
- ssPtr->litLengthFreq[u] = 1 + (ssPtr->litLengthFreq[u]>>ZSTD_FREQ_DIV);
79
+ ssPtr->litLengthFreq[u] = 1 + (ssPtr->litLengthFreq[u]>>(ZSTD_FREQ_DIV+1));
69
80
  ssPtr->litLengthSum += ssPtr->litLengthFreq[u];
70
81
  }
71
82
  for (u=0; u<=MaxML; u++) {
@@ -73,6 +84,7 @@ MEM_STATIC void ZSTD_rescaleFreqs(seqStore_t* ssPtr)
73
84
  ssPtr->matchLengthSum += ssPtr->matchLengthFreq[u];
74
85
  ssPtr->matchSum += ssPtr->matchLengthFreq[u] * (u + 3);
75
86
  }
87
+ ssPtr->matchSum *= ZSTD_LITFREQ_ADD;
76
88
  for (u=0; u<=MaxOff; u++) {
77
89
  ssPtr->offCodeFreq[u] = 1 + (ssPtr->offCodeFreq[u]>>ZSTD_FREQ_DIV);
78
90
  ssPtr->offCodeSum += ssPtr->offCodeFreq[u];
@@ -87,6 +99,9 @@ FORCE_INLINE U32 ZSTD_getLiteralPrice(seqStore_t* ssPtr, U32 litLength, const BY
87
99
  {
88
100
  U32 price, u;
89
101
 
102
+ if (ssPtr->staticPrices)
103
+ return ZSTD_highbit32((U32)litLength+1) + (litLength*6);
104
+
90
105
  if (litLength == 0)
91
106
  return ssPtr->log2litLengthSum - ZSTD_highbit32(ssPtr->litLengthFreq[0]+1);
92
107
 
@@ -124,9 +139,13 @@ FORCE_INLINE U32 ZSTD_getLiteralPrice(seqStore_t* ssPtr, U32 litLength, const BY
124
139
  FORCE_INLINE U32 ZSTD_getPrice(seqStore_t* seqStorePtr, U32 litLength, const BYTE* literals, U32 offset, U32 matchLength, const int ultra)
125
140
  {
126
141
  /* offset */
142
+ U32 price;
127
143
  BYTE const offCode = (BYTE)ZSTD_highbit32(offset+1);
128
- U32 price = offCode + seqStorePtr->log2offCodeSum - ZSTD_highbit32(seqStorePtr->offCodeFreq[offCode]+1);
129
144
 
145
+ if (seqStorePtr->staticPrices)
146
+ return ZSTD_getLiteralPrice(seqStorePtr, litLength, literals) + ZSTD_highbit32((U32)matchLength+1) + 16 + offCode;
147
+
148
+ price = offCode + seqStorePtr->log2offCodeSum - ZSTD_highbit32(seqStorePtr->offCodeFreq[offCode]+1);
130
149
  if (!ultra && offCode >= 20) price += (offCode-19)*2;
131
150
 
132
151
  /* match Length */
@@ -144,9 +163,9 @@ MEM_STATIC void ZSTD_updatePrice(seqStore_t* seqStorePtr, U32 litLength, const B
144
163
  U32 u;
145
164
 
146
165
  /* literals */
147
- seqStorePtr->litSum += litLength;
166
+ seqStorePtr->litSum += litLength*ZSTD_LITFREQ_ADD;
148
167
  for (u=0; u < litLength; u++)
149
- seqStorePtr->litFreq[literals[u]]++;
168
+ seqStorePtr->litFreq[literals[u]] += ZSTD_LITFREQ_ADD;
150
169
 
151
170
  /* literal Length */
152
171
  { const BYTE LL_deltaCode = 19;
@@ -184,7 +203,7 @@ MEM_STATIC void ZSTD_updatePrice(seqStore_t* seqStorePtr, U32 litLength, const B
184
203
 
185
204
 
186
205
  /* Update hashTable3 up to ip (excluded)
187
- Assumption : always within prefix (ie. not within extDict) */
206
+ Assumption : always within prefix (i.e. not within extDict) */
188
207
  FORCE_INLINE
189
208
  U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_CCtx* zc, const BYTE* ip)
190
209
  {
@@ -401,7 +420,7 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx,
401
420
 
402
421
  /* init */
403
422
  ctx->nextToUpdate3 = ctx->nextToUpdate;
404
- ZSTD_rescaleFreqs(seqStorePtr);
423
+ ZSTD_rescaleFreqs(seqStorePtr, (const BYTE*)src, srcSize);
405
424
  ip += (ip==prefixStart);
406
425
  { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) rep[i]=ctx->rep[i]; }
407
426
 
@@ -416,7 +435,7 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx,
416
435
  /* check repCode */
417
436
  { U32 i, last_i = ZSTD_REP_CHECK + (ip==anchor);
418
437
  for (i=(ip == anchor); i<last_i; i++) {
419
- const S32 repCur = ((i==ZSTD_REP_MOVE_OPT) && (ip==anchor)) ? (rep[0] - 1) : rep[i];
438
+ const S32 repCur = (i==ZSTD_REP_MOVE_OPT) ? (rep[0] - 1) : rep[i];
420
439
  if ( (repCur > 0) && (repCur < (S32)(ip-prefixStart))
421
440
  && (MEM_readMINMATCH(ip, minMatch) == MEM_readMINMATCH(ip - repCur, minMatch))) {
422
441
  mlen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-repCur, iend) + minMatch;
@@ -501,7 +520,7 @@ void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx,
501
520
  best_mlen = minMatch;
502
521
  { U32 i, last_i = ZSTD_REP_CHECK + (mlen != 1);
503
522
  for (i=(opt[cur].mlen != 1); i<last_i; i++) { /* check rep */
504
- const S32 repCur = ((i==ZSTD_REP_MOVE_OPT) && (opt[cur].mlen != 1)) ? (opt[cur].rep[0] - 1) : opt[cur].rep[i];
523
+ const S32 repCur = (i==ZSTD_REP_MOVE_OPT) ? (opt[cur].rep[0] - 1) : opt[cur].rep[i];
505
524
  if ( (repCur > 0) && (repCur < (S32)(inr-prefixStart))
506
525
  && (MEM_readMINMATCH(inr, minMatch) == MEM_readMINMATCH(inr - repCur, minMatch))) {
507
526
  mlen = (U32)ZSTD_count(inr+minMatch, inr+minMatch - repCur, iend) + minMatch;
@@ -601,7 +620,7 @@ _storeSequence: /* cur, last_pos, best_mlen, best_off have to be set */
601
620
  offset--;
602
621
  } else {
603
622
  if (offset != 0) {
604
- best_off = ((offset==ZSTD_REP_MOVE_OPT) && (litLength==0)) ? (rep[0] - 1) : (rep[offset]);
623
+ best_off = (offset==ZSTD_REP_MOVE_OPT) ? (rep[0] - 1) : (rep[offset]);
605
624
  if (offset != 1) rep[2] = rep[1];
606
625
  rep[1] = rep[0];
607
626
  rep[0] = best_off;
@@ -615,7 +634,7 @@ _storeSequence: /* cur, last_pos, best_mlen, best_off have to be set */
615
634
  } } /* for (cur=0; cur < last_pos; ) */
616
635
 
617
636
  /* Save reps for next block */
618
- { int i; for (i=0; i<ZSTD_REP_NUM; i++) ctx->savedRep[i] = rep[i]; }
637
+ { int i; for (i=0; i<ZSTD_REP_NUM; i++) ctx->repToConfirm[i] = rep[i]; }
619
638
 
620
639
  /* Last Literals */
621
640
  { size_t const lastLLSize = iend - anchor;
@@ -656,7 +675,7 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx,
656
675
  { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) rep[i]=ctx->rep[i]; }
657
676
 
658
677
  ctx->nextToUpdate3 = ctx->nextToUpdate;
659
- ZSTD_rescaleFreqs(seqStorePtr);
678
+ ZSTD_rescaleFreqs(seqStorePtr, (const BYTE*)src, srcSize);
660
679
  ip += (ip==prefixStart);
661
680
 
662
681
  /* Match Loop */
@@ -671,7 +690,7 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx,
671
690
  /* check repCode */
672
691
  { U32 i, last_i = ZSTD_REP_CHECK + (ip==anchor);
673
692
  for (i = (ip==anchor); i<last_i; i++) {
674
- const S32 repCur = ((i==ZSTD_REP_MOVE_OPT) && (ip==anchor)) ? (rep[0] - 1) : rep[i];
693
+ const S32 repCur = (i==ZSTD_REP_MOVE_OPT) ? (rep[0] - 1) : rep[i];
675
694
  const U32 repIndex = (U32)(current - repCur);
676
695
  const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
677
696
  const BYTE* const repMatch = repBase + repIndex;
@@ -767,7 +786,7 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx,
767
786
  best_mlen = minMatch;
768
787
  { U32 i, last_i = ZSTD_REP_CHECK + (mlen != 1);
769
788
  for (i = (mlen != 1); i<last_i; i++) {
770
- const S32 repCur = ((i==ZSTD_REP_MOVE_OPT) && (opt[cur].mlen != 1)) ? (opt[cur].rep[0] - 1) : opt[cur].rep[i];
789
+ const S32 repCur = (i==ZSTD_REP_MOVE_OPT) ? (opt[cur].rep[0] - 1) : opt[cur].rep[i];
771
790
  const U32 repIndex = (U32)(current+cur - repCur);
772
791
  const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
773
792
  const BYTE* const repMatch = repBase + repIndex;
@@ -806,7 +825,7 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx,
806
825
 
807
826
  match_num = ZSTD_BtGetAllMatches_selectMLS_extDict(ctx, inr, iend, maxSearches, mls, matches, minMatch);
808
827
 
809
- if (match_num > 0 && matches[match_num-1].len > sufficient_len) {
828
+ if (match_num > 0 && (matches[match_num-1].len > sufficient_len || cur + matches[match_num-1].len >= ZSTD_OPT_NUM)) {
810
829
  best_mlen = matches[match_num-1].len;
811
830
  best_off = matches[match_num-1].off;
812
831
  last_pos = cur + 1;
@@ -816,7 +835,7 @@ void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx,
816
835
  /* set prices using matches at position = cur */
817
836
  for (u = 0; u < match_num; u++) {
818
837
  mlen = (u>0) ? matches[u-1].len+1 : best_mlen;
819
- best_mlen = (cur + matches[u].len < ZSTD_OPT_NUM) ? matches[u].len : ZSTD_OPT_NUM - cur;
838
+ best_mlen = matches[u].len;
820
839
 
821
840
  while (mlen <= best_mlen) {
822
841
  if (opt[cur].mlen == 1) {
@@ -873,7 +892,7 @@ _storeSequence: /* cur, last_pos, best_mlen, best_off have to be set */
873
892
  offset--;
874
893
  } else {
875
894
  if (offset != 0) {
876
- best_off = ((offset==ZSTD_REP_MOVE_OPT) && (litLength==0)) ? (rep[0] - 1) : (rep[offset]);
895
+ best_off = (offset==ZSTD_REP_MOVE_OPT) ? (rep[0] - 1) : (rep[offset]);
877
896
  if (offset != 1) rep[2] = rep[1];
878
897
  rep[1] = rep[0];
879
898
  rep[0] = best_off;
@@ -888,7 +907,7 @@ _storeSequence: /* cur, last_pos, best_mlen, best_off have to be set */
888
907
  } } /* for (cur=0; cur < last_pos; ) */
889
908
 
890
909
  /* Save reps for next block */
891
- { int i; for (i=0; i<ZSTD_REP_NUM; i++) ctx->savedRep[i] = rep[i]; }
910
+ { int i; for (i=0; i<ZSTD_REP_NUM; i++) ctx->repToConfirm[i] = rep[i]; }
892
911
 
893
912
  /* Last Literals */
894
913
  { size_t lastLLSize = iend - anchor;