extzstd 0.2 → 0.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (88) hide show
  1. checksums.yaml +4 -4
  2. data/HISTORY.ja.md +13 -0
  3. data/README.md +17 -14
  4. data/contrib/zstd/{NEWS → CHANGELOG} +115 -2
  5. data/contrib/zstd/CODE_OF_CONDUCT.md +5 -0
  6. data/contrib/zstd/Makefile +99 -53
  7. data/contrib/zstd/README.md +59 -39
  8. data/contrib/zstd/TESTING.md +1 -1
  9. data/contrib/zstd/appveyor.yml +17 -6
  10. data/contrib/zstd/lib/BUCK +29 -2
  11. data/contrib/zstd/lib/Makefile +118 -21
  12. data/contrib/zstd/lib/README.md +84 -44
  13. data/contrib/zstd/lib/common/bitstream.h +17 -33
  14. data/contrib/zstd/lib/common/compiler.h +62 -8
  15. data/contrib/zstd/lib/common/cpu.h +215 -0
  16. data/contrib/zstd/lib/common/debug.c +44 -0
  17. data/contrib/zstd/lib/common/debug.h +134 -0
  18. data/contrib/zstd/lib/common/entropy_common.c +16 -1
  19. data/contrib/zstd/lib/common/error_private.c +7 -0
  20. data/contrib/zstd/lib/common/fse.h +48 -44
  21. data/contrib/zstd/lib/common/fse_decompress.c +3 -3
  22. data/contrib/zstd/lib/common/huf.h +169 -113
  23. data/contrib/zstd/lib/common/mem.h +20 -2
  24. data/contrib/zstd/lib/common/pool.c +135 -49
  25. data/contrib/zstd/lib/common/pool.h +40 -21
  26. data/contrib/zstd/lib/common/threading.c +2 -2
  27. data/contrib/zstd/lib/common/threading.h +12 -12
  28. data/contrib/zstd/lib/common/xxhash.c +3 -2
  29. data/contrib/zstd/lib/common/zstd_common.c +3 -6
  30. data/contrib/zstd/lib/common/zstd_errors.h +17 -7
  31. data/contrib/zstd/lib/common/zstd_internal.h +76 -48
  32. data/contrib/zstd/lib/compress/fse_compress.c +89 -209
  33. data/contrib/zstd/lib/compress/hist.c +203 -0
  34. data/contrib/zstd/lib/compress/hist.h +95 -0
  35. data/contrib/zstd/lib/compress/huf_compress.c +188 -80
  36. data/contrib/zstd/lib/compress/zstd_compress.c +2500 -1203
  37. data/contrib/zstd/lib/compress/zstd_compress_internal.h +463 -62
  38. data/contrib/zstd/lib/compress/zstd_double_fast.c +321 -131
  39. data/contrib/zstd/lib/compress/zstd_double_fast.h +13 -4
  40. data/contrib/zstd/lib/compress/zstd_fast.c +335 -108
  41. data/contrib/zstd/lib/compress/zstd_fast.h +12 -6
  42. data/contrib/zstd/lib/compress/zstd_lazy.c +654 -313
  43. data/contrib/zstd/lib/compress/zstd_lazy.h +44 -16
  44. data/contrib/zstd/lib/compress/zstd_ldm.c +310 -420
  45. data/contrib/zstd/lib/compress/zstd_ldm.h +63 -26
  46. data/contrib/zstd/lib/compress/zstd_opt.c +773 -325
  47. data/contrib/zstd/lib/compress/zstd_opt.h +31 -5
  48. data/contrib/zstd/lib/compress/zstdmt_compress.c +1468 -518
  49. data/contrib/zstd/lib/compress/zstdmt_compress.h +96 -45
  50. data/contrib/zstd/lib/decompress/huf_decompress.c +518 -282
  51. data/contrib/zstd/lib/decompress/zstd_ddict.c +240 -0
  52. data/contrib/zstd/lib/decompress/zstd_ddict.h +44 -0
  53. data/contrib/zstd/lib/decompress/zstd_decompress.c +613 -1513
  54. data/contrib/zstd/lib/decompress/zstd_decompress_block.c +1311 -0
  55. data/contrib/zstd/lib/decompress/zstd_decompress_block.h +59 -0
  56. data/contrib/zstd/lib/decompress/zstd_decompress_internal.h +175 -0
  57. data/contrib/zstd/lib/dictBuilder/cover.c +194 -113
  58. data/contrib/zstd/lib/dictBuilder/cover.h +112 -0
  59. data/contrib/zstd/lib/dictBuilder/divsufsort.c +3 -3
  60. data/contrib/zstd/lib/dictBuilder/fastcover.c +740 -0
  61. data/contrib/zstd/lib/dictBuilder/zdict.c +142 -106
  62. data/contrib/zstd/lib/dictBuilder/zdict.h +115 -49
  63. data/contrib/zstd/lib/legacy/zstd_legacy.h +44 -12
  64. data/contrib/zstd/lib/legacy/zstd_v01.c +41 -10
  65. data/contrib/zstd/lib/legacy/zstd_v01.h +12 -7
  66. data/contrib/zstd/lib/legacy/zstd_v02.c +37 -12
  67. data/contrib/zstd/lib/legacy/zstd_v02.h +12 -7
  68. data/contrib/zstd/lib/legacy/zstd_v03.c +38 -12
  69. data/contrib/zstd/lib/legacy/zstd_v03.h +12 -7
  70. data/contrib/zstd/lib/legacy/zstd_v04.c +55 -174
  71. data/contrib/zstd/lib/legacy/zstd_v04.h +12 -7
  72. data/contrib/zstd/lib/legacy/zstd_v05.c +59 -31
  73. data/contrib/zstd/lib/legacy/zstd_v05.h +12 -7
  74. data/contrib/zstd/lib/legacy/zstd_v06.c +48 -20
  75. data/contrib/zstd/lib/legacy/zstd_v06.h +10 -5
  76. data/contrib/zstd/lib/legacy/zstd_v07.c +62 -29
  77. data/contrib/zstd/lib/legacy/zstd_v07.h +10 -5
  78. data/contrib/zstd/lib/zstd.h +1346 -832
  79. data/ext/extzstd.c +27 -19
  80. data/ext/extzstd_stream.c +20 -4
  81. data/ext/zstd_compress.c +1 -0
  82. data/ext/zstd_decompress.c +4 -0
  83. data/ext/zstd_dictbuilder.c +4 -0
  84. data/ext/zstd_dictbuilder_fastcover.c +5 -0
  85. data/lib/extzstd.rb +52 -220
  86. data/lib/extzstd/version.rb +1 -1
  87. metadata +21 -7
  88. data/contrib/zstd/circle.yml +0 -63
@@ -27,11 +27,19 @@
27
27
  extern "C" {
28
28
  #endif
29
29
 
30
+
30
31
  /*-*************************************
31
32
  * Constants
32
33
  ***************************************/
33
- static const U32 g_searchStrength = 8;
34
- #define HASH_READ_SIZE 8
34
+ #define kSearchStrength 8
35
+ #define HASH_READ_SIZE 8
36
+ #define ZSTD_DUBT_UNSORTED_MARK 1 /* For btlazy2 strategy, index 1 now means "unsorted".
37
+ It could be confused for a real successor at index "1", if sorted as larger than its predecessor.
38
+ It's not a big deal though : candidate will just be sorted again.
39
+ Additionally, candidate position 1 will be lost.
40
+ But candidate 1 cannot hide a large tree of candidates, so it's a minimal loss.
41
+ The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be mishandled after table re-use with a different strategy
42
+ Constant required by ZSTD_compressBlock_btlazy2() and ZSTD_reduceTable_internal() */
35
43
 
36
44
 
37
45
  /*-*************************************
@@ -43,19 +51,34 @@ typedef enum { zcss_init=0, zcss_load, zcss_flush } ZSTD_cStreamStage;
43
51
  typedef struct ZSTD_prefixDict_s {
44
52
  const void* dict;
45
53
  size_t dictSize;
46
- ZSTD_dictMode_e dictMode;
54
+ ZSTD_dictContentType_e dictContentType;
47
55
  } ZSTD_prefixDict;
48
56
 
49
57
  typedef struct {
50
- U32 hufCTable[HUF_CTABLE_SIZE_U32(255)];
58
+ void* dictBuffer;
59
+ void const* dict;
60
+ size_t dictSize;
61
+ ZSTD_dictContentType_e dictContentType;
62
+ ZSTD_CDict* cdict;
63
+ } ZSTD_localDict;
64
+
65
+ typedef struct {
66
+ U32 CTable[HUF_CTABLE_SIZE_U32(255)];
67
+ HUF_repeat repeatMode;
68
+ } ZSTD_hufCTables_t;
69
+
70
+ typedef struct {
51
71
  FSE_CTable offcodeCTable[FSE_CTABLE_SIZE_U32(OffFSELog, MaxOff)];
52
72
  FSE_CTable matchlengthCTable[FSE_CTABLE_SIZE_U32(MLFSELog, MaxML)];
53
73
  FSE_CTable litlengthCTable[FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL)];
54
- U32 workspace[HUF_WORKSPACE_SIZE_U32];
55
- HUF_repeat hufCTable_repeatMode;
56
74
  FSE_repeat offcode_repeatMode;
57
75
  FSE_repeat matchlength_repeatMode;
58
76
  FSE_repeat litlength_repeatMode;
77
+ } ZSTD_fseCTables_t;
78
+
79
+ typedef struct {
80
+ ZSTD_hufCTables_t huf;
81
+ ZSTD_fseCTables_t fse;
59
82
  } ZSTD_entropyCTables_t;
60
83
 
61
84
  typedef struct {
@@ -71,34 +94,71 @@ typedef struct {
71
94
  U32 rep[ZSTD_REP_NUM];
72
95
  } ZSTD_optimal_t;
73
96
 
97
+ typedef enum { zop_dynamic=0, zop_predef } ZSTD_OptPrice_e;
98
+
74
99
  typedef struct {
75
100
  /* All tables are allocated inside cctx->workspace by ZSTD_resetCCtx_internal() */
76
- U32* litFreq; /* table of literals statistics, of size 256 */
77
- U32* litLengthFreq; /* table of litLength statistics, of size (MaxLL+1) */
78
- U32* matchLengthFreq; /* table of matchLength statistics, of size (MaxML+1) */
79
- U32* offCodeFreq; /* table of offCode statistics, of size (MaxOff+1) */
80
- ZSTD_match_t* matchTable; /* list of found matches, of size ZSTD_OPT_NUM+1 */
81
- ZSTD_optimal_t* priceTable; /* All positions tracked by optimal parser, of size ZSTD_OPT_NUM+1 */
101
+ unsigned* litFreq; /* table of literals statistics, of size 256 */
102
+ unsigned* litLengthFreq; /* table of litLength statistics, of size (MaxLL+1) */
103
+ unsigned* matchLengthFreq; /* table of matchLength statistics, of size (MaxML+1) */
104
+ unsigned* offCodeFreq; /* table of offCode statistics, of size (MaxOff+1) */
105
+ ZSTD_match_t* matchTable; /* list of found matches, of size ZSTD_OPT_NUM+1 */
106
+ ZSTD_optimal_t* priceTable; /* All positions tracked by optimal parser, of size ZSTD_OPT_NUM+1 */
82
107
 
83
108
  U32 litSum; /* nb of literals */
84
109
  U32 litLengthSum; /* nb of litLength codes */
85
110
  U32 matchLengthSum; /* nb of matchLength codes */
86
111
  U32 offCodeSum; /* nb of offset codes */
87
- /* begin updated by ZSTD_setLog2Prices */
88
- U32 log2litSum; /* pow2 to compare log2(litfreq) to */
89
- U32 log2litLengthSum; /* pow2 to compare log2(llfreq) to */
90
- U32 log2matchLengthSum; /* pow2 to compare log2(mlfreq) to */
91
- U32 log2offCodeSum; /* pow2 to compare log2(offreq) to */
92
- /* end : updated by ZSTD_setLog2Prices */
93
- U32 staticPrices; /* prices follow a pre-defined cost structure, statistics are irrelevant */
112
+ U32 litSumBasePrice; /* to compare to log2(litfreq) */
113
+ U32 litLengthSumBasePrice; /* to compare to log2(llfreq) */
114
+ U32 matchLengthSumBasePrice;/* to compare to log2(mlfreq) */
115
+ U32 offCodeSumBasePrice; /* to compare to log2(offreq) */
116
+ ZSTD_OptPrice_e priceType; /* prices can be determined dynamically, or follow a pre-defined cost structure */
117
+ const ZSTD_entropyCTables_t* symbolCosts; /* pre-calculated dictionary statistics */
118
+ ZSTD_literalCompressionMode_e literalCompressionMode;
94
119
  } optState_t;
95
120
 
121
+ typedef struct {
122
+ ZSTD_entropyCTables_t entropy;
123
+ U32 rep[ZSTD_REP_NUM];
124
+ } ZSTD_compressedBlockState_t;
125
+
126
+ typedef struct {
127
+ BYTE const* nextSrc; /* next block here to continue on current prefix */
128
+ BYTE const* base; /* All regular indexes relative to this position */
129
+ BYTE const* dictBase; /* extDict indexes relative to this position */
130
+ U32 dictLimit; /* below that point, need extDict */
131
+ U32 lowLimit; /* below that point, no more data */
132
+ } ZSTD_window_t;
133
+
134
+ typedef struct ZSTD_matchState_t ZSTD_matchState_t;
135
+ struct ZSTD_matchState_t {
136
+ ZSTD_window_t window; /* State for window round buffer management */
137
+ U32 loadedDictEnd; /* index of end of dictionary */
138
+ U32 nextToUpdate; /* index from which to continue table update */
139
+ U32 nextToUpdate3; /* index from which to continue table update */
140
+ U32 hashLog3; /* dispatch table : larger == faster, more memory */
141
+ U32* hashTable;
142
+ U32* hashTable3;
143
+ U32* chainTable;
144
+ optState_t opt; /* optimal parser state */
145
+ const ZSTD_matchState_t * dictMatchState;
146
+ ZSTD_compressionParameters cParams;
147
+ };
148
+
149
+ typedef struct {
150
+ ZSTD_compressedBlockState_t* prevCBlock;
151
+ ZSTD_compressedBlockState_t* nextCBlock;
152
+ ZSTD_matchState_t matchState;
153
+ } ZSTD_blockState_t;
154
+
96
155
  typedef struct {
97
156
  U32 offset;
98
157
  U32 checksum;
99
158
  } ldmEntry_t;
100
159
 
101
160
  typedef struct {
161
+ ZSTD_window_t window; /* State for the window round buffer management */
102
162
  ldmEntry_t* hashTable;
103
163
  BYTE* bucketOffsets; /* Next position in bucket to insert entry */
104
164
  U64 hashPower; /* Used to compute the rolling hash.
@@ -110,61 +170,74 @@ typedef struct {
110
170
  U32 hashLog; /* Log size of hashTable */
111
171
  U32 bucketSizeLog; /* Log bucket size for collision resolution, at most 8 */
112
172
  U32 minMatchLength; /* Minimum match length */
113
- U32 hashEveryLog; /* Log number of entries to skip */
173
+ U32 hashRateLog; /* Log number of entries to skip */
174
+ U32 windowLog; /* Window log for the LDM */
114
175
  } ldmParams_t;
115
176
 
177
+ typedef struct {
178
+ U32 offset;
179
+ U32 litLength;
180
+ U32 matchLength;
181
+ } rawSeq;
182
+
183
+ typedef struct {
184
+ rawSeq* seq; /* The start of the sequences */
185
+ size_t pos; /* The position where reading stopped. <= size. */
186
+ size_t size; /* The number of sequences. <= capacity. */
187
+ size_t capacity; /* The capacity starting from `seq` pointer */
188
+ } rawSeqStore_t;
189
+
116
190
  struct ZSTD_CCtx_params_s {
117
191
  ZSTD_format_e format;
118
192
  ZSTD_compressionParameters cParams;
119
193
  ZSTD_frameParameters fParams;
120
194
 
121
195
  int compressionLevel;
122
- U32 forceWindow; /* force back-references to respect limit of
196
+ int forceWindow; /* force back-references to respect limit of
123
197
  * 1<<wLog, even for dictionary */
124
198
 
199
+ ZSTD_dictAttachPref_e attachDictPref;
200
+ ZSTD_literalCompressionMode_e literalCompressionMode;
201
+
125
202
  /* Multithreading: used to pass parameters to mtctx */
126
- U32 nbThreads;
127
- unsigned jobSize;
128
- unsigned overlapSizeLog;
203
+ int nbWorkers;
204
+ size_t jobSize;
205
+ int overlapLog;
206
+ int rsyncable;
129
207
 
130
208
  /* Long distance matching parameters */
131
209
  ldmParams_t ldmParams;
132
210
 
133
- /* For use with createCCtxParams() and freeCCtxParams() only */
211
+ /* Internal use, for createCCtxParams() and freeCCtxParams() only */
134
212
  ZSTD_customMem customMem;
135
-
136
213
  }; /* typedef'd to ZSTD_CCtx_params within "zstd.h" */
137
214
 
138
215
  struct ZSTD_CCtx_s {
139
- const BYTE* nextSrc; /* next block here to continue on current prefix */
140
- const BYTE* base; /* All regular indexes relative to this position */
141
- const BYTE* dictBase; /* extDict indexes relative to this position */
142
- U32 dictLimit; /* below that point, need extDict */
143
- U32 lowLimit; /* below that point, no more data */
144
- U32 nextToUpdate; /* index from which to continue dictionary update */
145
- U32 nextToUpdate3; /* index from which to continue dictionary update */
146
- U32 hashLog3; /* dispatch table : larger == faster, more memory */
147
- U32 loadedDictEnd; /* index of end of dictionary */
148
216
  ZSTD_compressionStage_e stage;
149
- U32 dictID;
217
+ int cParamsChanged; /* == 1 if cParams(except wlog) or compression level are changed in requestedParams. Triggers transmission of new params to ZSTDMT (if available) then reset to 0. */
218
+ int bmi2; /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */
150
219
  ZSTD_CCtx_params requestedParams;
151
220
  ZSTD_CCtx_params appliedParams;
221
+ U32 dictID;
222
+
223
+ int workSpaceOversizedDuration;
152
224
  void* workSpace;
153
225
  size_t workSpaceSize;
154
226
  size_t blockSize;
155
- U64 pledgedSrcSizePlusOne; /* this way, 0 (default) == unknown */
156
- U64 consumedSrcSize;
227
+ unsigned long long pledgedSrcSizePlusOne; /* this way, 0 (default) == unknown */
228
+ unsigned long long consumedSrcSize;
229
+ unsigned long long producedCSize;
157
230
  XXH64_state_t xxhState;
158
231
  ZSTD_customMem customMem;
159
232
  size_t staticSize;
160
233
 
161
- seqStore_t seqStore; /* sequences storage ptrs */
162
- optState_t optState;
163
- ldmState_t ldmState; /* long distance matching state */
164
- U32* hashTable;
165
- U32* hashTable3;
166
- U32* chainTable;
167
- ZSTD_entropyCTables_t* entropy;
234
+ seqStore_t seqStore; /* sequences storage ptrs */
235
+ ldmState_t ldmState; /* long distance matching state */
236
+ rawSeq* ldmSequences; /* Storage for the ldm output sequences */
237
+ size_t maxNbLdmSequences;
238
+ rawSeqStore_t externSeqStore; /* Mutable reference to external sequences */
239
+ ZSTD_blockState_t blockState;
240
+ U32* entropyWorkspace; /* entropy workspace of HUF_WORKSPACE_SIZE bytes */
168
241
 
169
242
  /* streaming */
170
243
  char* inBuff;
@@ -180,7 +253,7 @@ struct ZSTD_CCtx_s {
180
253
  U32 frameEnded;
181
254
 
182
255
  /* Dictionary */
183
- ZSTD_CDict* cdictLocal;
256
+ ZSTD_localDict localDict;
184
257
  const ZSTD_CDict* cdict;
185
258
  ZSTD_prefixDict prefixDict; /* single-usage dictionary */
186
259
 
@@ -190,6 +263,16 @@ struct ZSTD_CCtx_s {
190
263
  #endif
191
264
  };
192
265
 
266
+ typedef enum { ZSTD_dtlm_fast, ZSTD_dtlm_full } ZSTD_dictTableLoadMethod_e;
267
+
268
+ typedef enum { ZSTD_noDict = 0, ZSTD_extDict = 1, ZSTD_dictMatchState = 2 } ZSTD_dictMode_e;
269
+
270
+
271
+ typedef size_t (*ZSTD_blockCompressor) (
272
+ ZSTD_matchState_t* bs, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
273
+ void const* src, size_t srcSize);
274
+ ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_dictMode_e dictMode);
275
+
193
276
 
194
277
  MEM_STATIC U32 ZSTD_LLcode(U32 litLength)
195
278
  {
@@ -229,16 +312,18 @@ MEM_STATIC U32 ZSTD_MLcode(U32 mlBase)
229
312
  */
230
313
  MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const void* literals, U32 offsetCode, size_t mlBase)
231
314
  {
232
- #if defined(ZSTD_DEBUG) && (ZSTD_DEBUG >= 6)
315
+ #if defined(DEBUGLEVEL) && (DEBUGLEVEL >= 6)
233
316
  static const BYTE* g_start = NULL;
234
317
  if (g_start==NULL) g_start = (const BYTE*)literals; /* note : index only works for compression within a single segment */
235
318
  { U32 const pos = (U32)((const BYTE*)literals - g_start);
236
- DEBUGLOG(6, "Cpos%7u :%3u literals, match%3u bytes at dist.code%7u",
319
+ DEBUGLOG(6, "Cpos%7u :%3u literals, match%4u bytes at offCode%7u",
237
320
  pos, (U32)litLength, (U32)mlBase+MINMATCH, (U32)offsetCode);
238
321
  }
239
322
  #endif
323
+ assert((size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart) < seqStorePtr->maxNbSeq);
240
324
  /* copy Literals */
241
- assert(seqStorePtr->lit + litLength <= seqStorePtr->litStart + 128 KB);
325
+ assert(seqStorePtr->maxNbLit <= 128 KB);
326
+ assert(seqStorePtr->lit + litLength <= seqStorePtr->litStart + seqStorePtr->maxNbLit);
242
327
  ZSTD_wildcopy(seqStorePtr->lit, literals, litLength);
243
328
  seqStorePtr->lit += litLength;
244
329
 
@@ -359,21 +444,28 @@ MEM_STATIC size_t ZSTD_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* co
359
444
  }
360
445
 
361
446
  /** ZSTD_count_2segments() :
362
- * can count match length with `ip` & `match` in 2 different segments.
363
- * convention : on reaching mEnd, match count continue starting from iStart
364
- */
365
- MEM_STATIC size_t ZSTD_count_2segments(const BYTE* ip, const BYTE* match, const BYTE* iEnd, const BYTE* mEnd, const BYTE* iStart)
447
+ * can count match length with `ip` & `match` in 2 different segments.
448
+ * convention : on reaching mEnd, match count continue starting from iStart
449
+ */
450
+ MEM_STATIC size_t
451
+ ZSTD_count_2segments(const BYTE* ip, const BYTE* match,
452
+ const BYTE* iEnd, const BYTE* mEnd, const BYTE* iStart)
366
453
  {
367
454
  const BYTE* const vEnd = MIN( ip + (mEnd - match), iEnd);
368
455
  size_t const matchLength = ZSTD_count(ip, match, vEnd);
369
456
  if (match + matchLength != mEnd) return matchLength;
457
+ DEBUGLOG(7, "ZSTD_count_2segments: found a 2-parts match (current length==%zu)", matchLength);
458
+ DEBUGLOG(7, "distance from match beginning to end dictionary = %zi", mEnd - match);
459
+ DEBUGLOG(7, "distance from current pos to end buffer = %zi", iEnd - ip);
460
+ DEBUGLOG(7, "next byte : ip==%02X, istart==%02X", ip[matchLength], *iStart);
461
+ DEBUGLOG(7, "final match length = %zu", matchLength + ZSTD_count(ip+matchLength, iStart, iEnd));
370
462
  return matchLength + ZSTD_count(ip+matchLength, iStart, iEnd);
371
463
  }
372
464
 
373
465
 
374
466
  /*-*************************************
375
- * Hashes
376
- ***************************************/
467
+ * Hashes
468
+ ***************************************/
377
469
  static const U32 prime3bytes = 506832829U;
378
470
  static U32 ZSTD_hash3(U32 u, U32 h) { return ((u << (32-24)) * prime3bytes) >> (32-h) ; }
379
471
  MEM_STATIC size_t ZSTD_hash3Ptr(const void* ptr, U32 h) { return ZSTD_hash3(MEM_readLE32(ptr), h); } /* only in zstd_opt.h */
@@ -411,6 +503,290 @@ MEM_STATIC size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls)
411
503
  }
412
504
  }
413
505
 
506
+ /** ZSTD_ipow() :
507
+ * Return base^exponent.
508
+ */
509
+ static U64 ZSTD_ipow(U64 base, U64 exponent)
510
+ {
511
+ U64 power = 1;
512
+ while (exponent) {
513
+ if (exponent & 1) power *= base;
514
+ exponent >>= 1;
515
+ base *= base;
516
+ }
517
+ return power;
518
+ }
519
+
520
+ #define ZSTD_ROLL_HASH_CHAR_OFFSET 10
521
+
522
+ /** ZSTD_rollingHash_append() :
523
+ * Add the buffer to the hash value.
524
+ */
525
+ static U64 ZSTD_rollingHash_append(U64 hash, void const* buf, size_t size)
526
+ {
527
+ BYTE const* istart = (BYTE const*)buf;
528
+ size_t pos;
529
+ for (pos = 0; pos < size; ++pos) {
530
+ hash *= prime8bytes;
531
+ hash += istart[pos] + ZSTD_ROLL_HASH_CHAR_OFFSET;
532
+ }
533
+ return hash;
534
+ }
535
+
536
+ /** ZSTD_rollingHash_compute() :
537
+ * Compute the rolling hash value of the buffer.
538
+ */
539
+ MEM_STATIC U64 ZSTD_rollingHash_compute(void const* buf, size_t size)
540
+ {
541
+ return ZSTD_rollingHash_append(0, buf, size);
542
+ }
543
+
544
+ /** ZSTD_rollingHash_primePower() :
545
+ * Compute the primePower to be passed to ZSTD_rollingHash_rotate() for a hash
546
+ * over a window of length bytes.
547
+ */
548
+ MEM_STATIC U64 ZSTD_rollingHash_primePower(U32 length)
549
+ {
550
+ return ZSTD_ipow(prime8bytes, length - 1);
551
+ }
552
+
553
+ /** ZSTD_rollingHash_rotate() :
554
+ * Rotate the rolling hash by one byte.
555
+ */
556
+ MEM_STATIC U64 ZSTD_rollingHash_rotate(U64 hash, BYTE toRemove, BYTE toAdd, U64 primePower)
557
+ {
558
+ hash -= (toRemove + ZSTD_ROLL_HASH_CHAR_OFFSET) * primePower;
559
+ hash *= prime8bytes;
560
+ hash += toAdd + ZSTD_ROLL_HASH_CHAR_OFFSET;
561
+ return hash;
562
+ }
563
+
564
+ /*-*************************************
565
+ * Round buffer management
566
+ ***************************************/
567
+ /* Max current allowed */
568
+ #define ZSTD_CURRENT_MAX ((3U << 29) + (1U << ZSTD_WINDOWLOG_MAX))
569
+ /* Maximum chunk size before overflow correction needs to be called again */
570
+ #define ZSTD_CHUNKSIZE_MAX \
571
+ ( ((U32)-1) /* Maximum ending current index */ \
572
+ - ZSTD_CURRENT_MAX) /* Maximum beginning lowLimit */
573
+
574
+ /**
575
+ * ZSTD_window_clear():
576
+ * Clears the window containing the history by simply setting it to empty.
577
+ */
578
+ MEM_STATIC void ZSTD_window_clear(ZSTD_window_t* window)
579
+ {
580
+ size_t const endT = (size_t)(window->nextSrc - window->base);
581
+ U32 const end = (U32)endT;
582
+
583
+ window->lowLimit = end;
584
+ window->dictLimit = end;
585
+ }
586
+
587
+ /**
588
+ * ZSTD_window_hasExtDict():
589
+ * Returns non-zero if the window has a non-empty extDict.
590
+ */
591
+ MEM_STATIC U32 ZSTD_window_hasExtDict(ZSTD_window_t const window)
592
+ {
593
+ return window.lowLimit < window.dictLimit;
594
+ }
595
+
596
+ /**
597
+ * ZSTD_matchState_dictMode():
598
+ * Inspects the provided matchState and figures out what dictMode should be
599
+ * passed to the compressor.
600
+ */
601
+ MEM_STATIC ZSTD_dictMode_e ZSTD_matchState_dictMode(const ZSTD_matchState_t *ms)
602
+ {
603
+ return ZSTD_window_hasExtDict(ms->window) ?
604
+ ZSTD_extDict :
605
+ ms->dictMatchState != NULL ?
606
+ ZSTD_dictMatchState :
607
+ ZSTD_noDict;
608
+ }
609
+
610
+ /**
611
+ * ZSTD_window_needOverflowCorrection():
612
+ * Returns non-zero if the indices are getting too large and need overflow
613
+ * protection.
614
+ */
615
+ MEM_STATIC U32 ZSTD_window_needOverflowCorrection(ZSTD_window_t const window,
616
+ void const* srcEnd)
617
+ {
618
+ U32 const current = (U32)((BYTE const*)srcEnd - window.base);
619
+ return current > ZSTD_CURRENT_MAX;
620
+ }
621
+
622
+ /**
623
+ * ZSTD_window_correctOverflow():
624
+ * Reduces the indices to protect from index overflow.
625
+ * Returns the correction made to the indices, which must be applied to every
626
+ * stored index.
627
+ *
628
+ * The least significant cycleLog bits of the indices must remain the same,
629
+ * which may be 0. Every index up to maxDist in the past must be valid.
630
+ * NOTE: (maxDist & cycleMask) must be zero.
631
+ */
632
+ MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog,
633
+ U32 maxDist, void const* src)
634
+ {
635
+ /* preemptive overflow correction:
636
+ * 1. correction is large enough:
637
+ * lowLimit > (3<<29) ==> current > 3<<29 + 1<<windowLog
638
+ * 1<<windowLog <= newCurrent < 1<<chainLog + 1<<windowLog
639
+ *
640
+ * current - newCurrent
641
+ * > (3<<29 + 1<<windowLog) - (1<<windowLog + 1<<chainLog)
642
+ * > (3<<29) - (1<<chainLog)
643
+ * > (3<<29) - (1<<30) (NOTE: chainLog <= 30)
644
+ * > 1<<29
645
+ *
646
+ * 2. (ip+ZSTD_CHUNKSIZE_MAX - cctx->base) doesn't overflow:
647
+ * After correction, current is less than (1<<chainLog + 1<<windowLog).
648
+ * In 64-bit mode we are safe, because we have 64-bit ptrdiff_t.
649
+ * In 32-bit mode we are safe, because (chainLog <= 29), so
650
+ * ip+ZSTD_CHUNKSIZE_MAX - cctx->base < 1<<32.
651
+ * 3. (cctx->lowLimit + 1<<windowLog) < 1<<32:
652
+ * windowLog <= 31 ==> 3<<29 + 1<<windowLog < 7<<29 < 1<<32.
653
+ */
654
+ U32 const cycleMask = (1U << cycleLog) - 1;
655
+ U32 const current = (U32)((BYTE const*)src - window->base);
656
+ U32 const newCurrent = (current & cycleMask) + maxDist;
657
+ U32 const correction = current - newCurrent;
658
+ assert((maxDist & cycleMask) == 0);
659
+ assert(current > newCurrent);
660
+ /* Loose bound, should be around 1<<29 (see above) */
661
+ assert(correction > 1<<28);
662
+
663
+ window->base += correction;
664
+ window->dictBase += correction;
665
+ window->lowLimit -= correction;
666
+ window->dictLimit -= correction;
667
+
668
+ DEBUGLOG(4, "Correction of 0x%x bytes to lowLimit=0x%x", correction,
669
+ window->lowLimit);
670
+ return correction;
671
+ }
672
+
673
+ /**
674
+ * ZSTD_window_enforceMaxDist():
675
+ * Updates lowLimit so that:
676
+ * (srcEnd - base) - lowLimit == maxDist + loadedDictEnd
677
+ *
678
+ * This allows a simple check that index >= lowLimit to see if index is valid.
679
+ * This must be called before a block compression call, with srcEnd as the block
680
+ * source end.
681
+ *
682
+ * If loadedDictEndPtr is not NULL, we set it to zero once we update lowLimit.
683
+ * This is because dictionaries are allowed to be referenced as long as the last
684
+ * byte of the dictionary is in the window, but once they are out of range,
685
+ * they cannot be referenced. If loadedDictEndPtr is NULL, we use
686
+ * loadedDictEnd == 0.
687
+ *
688
+ * In normal dict mode, the dict is between lowLimit and dictLimit. In
689
+ * dictMatchState mode, lowLimit and dictLimit are the same, and the dictionary
690
+ * is below them. forceWindow and dictMatchState are therefore incompatible.
691
+ */
692
+ MEM_STATIC void
693
+ ZSTD_window_enforceMaxDist(ZSTD_window_t* window,
694
+ void const* srcEnd,
695
+ U32 maxDist,
696
+ U32* loadedDictEndPtr,
697
+ const ZSTD_matchState_t** dictMatchStatePtr)
698
+ {
699
+ U32 const blockEndIdx = (U32)((BYTE const*)srcEnd - window->base);
700
+ U32 loadedDictEnd = (loadedDictEndPtr != NULL) ? *loadedDictEndPtr : 0;
701
+ DEBUGLOG(5, "ZSTD_window_enforceMaxDist: blockEndIdx=%u, maxDist=%u",
702
+ (unsigned)blockEndIdx, (unsigned)maxDist);
703
+ if (blockEndIdx > maxDist + loadedDictEnd) {
704
+ U32 const newLowLimit = blockEndIdx - maxDist;
705
+ if (window->lowLimit < newLowLimit) window->lowLimit = newLowLimit;
706
+ if (window->dictLimit < window->lowLimit) {
707
+ DEBUGLOG(5, "Update dictLimit to match lowLimit, from %u to %u",
708
+ (unsigned)window->dictLimit, (unsigned)window->lowLimit);
709
+ window->dictLimit = window->lowLimit;
710
+ }
711
+ if (loadedDictEndPtr)
712
+ *loadedDictEndPtr = 0;
713
+ if (dictMatchStatePtr)
714
+ *dictMatchStatePtr = NULL;
715
+ }
716
+ }
717
+
718
+ /**
719
+ * ZSTD_window_update():
720
+ * Updates the window by appending [src, src + srcSize) to the window.
721
+ * If it is not contiguous, the current prefix becomes the extDict, and we
722
+ * forget about the extDict. Handles overlap of the prefix and extDict.
723
+ * Returns non-zero if the segment is contiguous.
724
+ */
725
+ MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window,
726
+ void const* src, size_t srcSize)
727
+ {
728
+ BYTE const* const ip = (BYTE const*)src;
729
+ U32 contiguous = 1;
730
+ DEBUGLOG(5, "ZSTD_window_update");
731
+ /* Check if blocks follow each other */
732
+ if (src != window->nextSrc) {
733
+ /* not contiguous */
734
+ size_t const distanceFromBase = (size_t)(window->nextSrc - window->base);
735
+ DEBUGLOG(5, "Non contiguous blocks, new segment starts at %u", window->dictLimit);
736
+ window->lowLimit = window->dictLimit;
737
+ assert(distanceFromBase == (size_t)(U32)distanceFromBase); /* should never overflow */
738
+ window->dictLimit = (U32)distanceFromBase;
739
+ window->dictBase = window->base;
740
+ window->base = ip - distanceFromBase;
741
+ // ms->nextToUpdate = window->dictLimit;
742
+ if (window->dictLimit - window->lowLimit < HASH_READ_SIZE) window->lowLimit = window->dictLimit; /* too small extDict */
743
+ contiguous = 0;
744
+ }
745
+ window->nextSrc = ip + srcSize;
746
+ /* if input and dictionary overlap : reduce dictionary (area presumed modified by input) */
747
+ if ( (ip+srcSize > window->dictBase + window->lowLimit)
748
+ & (ip < window->dictBase + window->dictLimit)) {
749
+ ptrdiff_t const highInputIdx = (ip + srcSize) - window->dictBase;
750
+ U32 const lowLimitMax = (highInputIdx > (ptrdiff_t)window->dictLimit) ? window->dictLimit : (U32)highInputIdx;
751
+ window->lowLimit = lowLimitMax;
752
+ DEBUGLOG(5, "Overlapping extDict and input : new lowLimit = %u", window->lowLimit);
753
+ }
754
+ return contiguous;
755
+ }
756
+
757
+
758
+ /* debug functions */
759
+ #if (DEBUGLEVEL>=2)
760
+
761
+ MEM_STATIC double ZSTD_fWeight(U32 rawStat)
762
+ {
763
+ U32 const fp_accuracy = 8;
764
+ U32 const fp_multiplier = (1 << fp_accuracy);
765
+ U32 const newStat = rawStat + 1;
766
+ U32 const hb = ZSTD_highbit32(newStat);
767
+ U32 const BWeight = hb * fp_multiplier;
768
+ U32 const FWeight = (newStat << fp_accuracy) >> hb;
769
+ U32 const weight = BWeight + FWeight;
770
+ assert(hb + fp_accuracy < 31);
771
+ return (double)weight / fp_multiplier;
772
+ }
773
+
774
+ /* display a table content,
775
+ * listing each element, its frequency, and its predicted bit cost */
776
+ MEM_STATIC void ZSTD_debugTable(const U32* table, U32 max)
777
+ {
778
+ unsigned u, sum;
779
+ for (u=0, sum=0; u<=max; u++) sum += table[u];
780
+ DEBUGLOG(2, "total nb elts: %u", sum);
781
+ for (u=0; u<=max; u++) {
782
+ DEBUGLOG(2, "%2u: %5u (%.2f)",
783
+ u, table[u], ZSTD_fWeight(sum) - ZSTD_fWeight(table[u]) );
784
+ }
785
+ }
786
+
787
+ #endif
788
+
789
+
414
790
  #if defined (__cplusplus)
415
791
  }
416
792
  #endif
@@ -421,6 +797,13 @@ MEM_STATIC size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls)
421
797
  * These prototypes shall only be called from within lib/compress
422
798
  * ============================================================== */
423
799
 
800
+ /* ZSTD_getCParamsFromCCtxParams() :
801
+ * cParams are built depending on compressionLevel, src size hints,
802
+ * LDM and manually set compression parameters.
803
+ */
804
+ ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams(
805
+ const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize);
806
+
424
807
  /*! ZSTD_initCStream_internal() :
425
808
  * Private use only. Init streaming operation.
426
809
  * expects params to be valid.
@@ -431,12 +814,7 @@ size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs,
431
814
  const ZSTD_CDict* cdict,
432
815
  ZSTD_CCtx_params params, unsigned long long pledgedSrcSize);
433
816
 
434
- /*! ZSTD_compressStream_generic() :
435
- * Private use only. To be called from zstdmt_compress.c in single-thread mode. */
436
- size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,
437
- ZSTD_outBuffer* output,
438
- ZSTD_inBuffer* input,
439
- ZSTD_EndDirective const flushMode);
817
+ void ZSTD_resetSeqStore(seqStore_t* ssPtr);
440
818
 
441
819
  /*! ZSTD_getCParamsFromCDict() :
442
820
  * as the name implies */
@@ -446,7 +824,8 @@ ZSTD_compressionParameters ZSTD_getCParamsFromCDict(const ZSTD_CDict* cdict);
446
824
  * Private use only. To be called from zstdmt_compress.c. */
447
825
  size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx,
448
826
  const void* dict, size_t dictSize,
449
- ZSTD_dictMode_e dictMode,
827
+ ZSTD_dictContentType_e dictContentType,
828
+ ZSTD_dictTableLoadMethod_e dtlm,
450
829
  const ZSTD_CDict* cdict,
451
830
  ZSTD_CCtx_params params,
452
831
  unsigned long long pledgedSrcSize);
@@ -459,4 +838,26 @@ size_t ZSTD_compress_advanced_internal(ZSTD_CCtx* cctx,
459
838
  const void* dict,size_t dictSize,
460
839
  ZSTD_CCtx_params params);
461
840
 
841
+
842
+ /* ZSTD_writeLastEmptyBlock() :
843
+ * output an empty Block with end-of-frame mark to complete a frame
844
+ * @return : size of data written into `dst` (== ZSTD_blockHeaderSize (defined in zstd_internal.h))
845
+ * or an error code if `dstCapacity` is too small (<ZSTD_blockHeaderSize)
846
+ */
847
+ size_t ZSTD_writeLastEmptyBlock(void* dst, size_t dstCapacity);
848
+
849
+
850
+ /* ZSTD_referenceExternalSequences() :
851
+ * Must be called before starting a compression operation.
852
+ * seqs must parse a prefix of the source.
853
+ * This cannot be used when long range matching is enabled.
854
+ * Zstd will use these sequences, and pass the literals to a secondary block
855
+ * compressor.
856
+ * @return : An error code on failure.
857
+ * NOTE: seqs are not verified! Invalid sequences can cause out-of-bounds memory
858
+ * access and data corruption.
859
+ */
860
+ size_t ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq);
861
+
862
+
462
863
  #endif /* ZSTD_COMPRESS_H */