extzstd 0.2 → 0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. checksums.yaml +4 -4
  2. data/HISTORY.ja.md +13 -0
  3. data/README.md +17 -14
  4. data/contrib/zstd/{NEWS → CHANGELOG} +115 -2
  5. data/contrib/zstd/CODE_OF_CONDUCT.md +5 -0
  6. data/contrib/zstd/Makefile +99 -53
  7. data/contrib/zstd/README.md +59 -39
  8. data/contrib/zstd/TESTING.md +1 -1
  9. data/contrib/zstd/appveyor.yml +17 -6
  10. data/contrib/zstd/lib/BUCK +29 -2
  11. data/contrib/zstd/lib/Makefile +118 -21
  12. data/contrib/zstd/lib/README.md +84 -44
  13. data/contrib/zstd/lib/common/bitstream.h +17 -33
  14. data/contrib/zstd/lib/common/compiler.h +62 -8
  15. data/contrib/zstd/lib/common/cpu.h +215 -0
  16. data/contrib/zstd/lib/common/debug.c +44 -0
  17. data/contrib/zstd/lib/common/debug.h +134 -0
  18. data/contrib/zstd/lib/common/entropy_common.c +16 -1
  19. data/contrib/zstd/lib/common/error_private.c +7 -0
  20. data/contrib/zstd/lib/common/fse.h +48 -44
  21. data/contrib/zstd/lib/common/fse_decompress.c +3 -3
  22. data/contrib/zstd/lib/common/huf.h +169 -113
  23. data/contrib/zstd/lib/common/mem.h +20 -2
  24. data/contrib/zstd/lib/common/pool.c +135 -49
  25. data/contrib/zstd/lib/common/pool.h +40 -21
  26. data/contrib/zstd/lib/common/threading.c +2 -2
  27. data/contrib/zstd/lib/common/threading.h +12 -12
  28. data/contrib/zstd/lib/common/xxhash.c +3 -2
  29. data/contrib/zstd/lib/common/zstd_common.c +3 -6
  30. data/contrib/zstd/lib/common/zstd_errors.h +17 -7
  31. data/contrib/zstd/lib/common/zstd_internal.h +76 -48
  32. data/contrib/zstd/lib/compress/fse_compress.c +89 -209
  33. data/contrib/zstd/lib/compress/hist.c +203 -0
  34. data/contrib/zstd/lib/compress/hist.h +95 -0
  35. data/contrib/zstd/lib/compress/huf_compress.c +188 -80
  36. data/contrib/zstd/lib/compress/zstd_compress.c +2500 -1203
  37. data/contrib/zstd/lib/compress/zstd_compress_internal.h +463 -62
  38. data/contrib/zstd/lib/compress/zstd_double_fast.c +321 -131
  39. data/contrib/zstd/lib/compress/zstd_double_fast.h +13 -4
  40. data/contrib/zstd/lib/compress/zstd_fast.c +335 -108
  41. data/contrib/zstd/lib/compress/zstd_fast.h +12 -6
  42. data/contrib/zstd/lib/compress/zstd_lazy.c +654 -313
  43. data/contrib/zstd/lib/compress/zstd_lazy.h +44 -16
  44. data/contrib/zstd/lib/compress/zstd_ldm.c +310 -420
  45. data/contrib/zstd/lib/compress/zstd_ldm.h +63 -26
  46. data/contrib/zstd/lib/compress/zstd_opt.c +773 -325
  47. data/contrib/zstd/lib/compress/zstd_opt.h +31 -5
  48. data/contrib/zstd/lib/compress/zstdmt_compress.c +1468 -518
  49. data/contrib/zstd/lib/compress/zstdmt_compress.h +96 -45
  50. data/contrib/zstd/lib/decompress/huf_decompress.c +518 -282
  51. data/contrib/zstd/lib/decompress/zstd_ddict.c +240 -0
  52. data/contrib/zstd/lib/decompress/zstd_ddict.h +44 -0
  53. data/contrib/zstd/lib/decompress/zstd_decompress.c +613 -1513
  54. data/contrib/zstd/lib/decompress/zstd_decompress_block.c +1311 -0
  55. data/contrib/zstd/lib/decompress/zstd_decompress_block.h +59 -0
  56. data/contrib/zstd/lib/decompress/zstd_decompress_internal.h +175 -0
  57. data/contrib/zstd/lib/dictBuilder/cover.c +194 -113
  58. data/contrib/zstd/lib/dictBuilder/cover.h +112 -0
  59. data/contrib/zstd/lib/dictBuilder/divsufsort.c +3 -3
  60. data/contrib/zstd/lib/dictBuilder/fastcover.c +740 -0
  61. data/contrib/zstd/lib/dictBuilder/zdict.c +142 -106
  62. data/contrib/zstd/lib/dictBuilder/zdict.h +115 -49
  63. data/contrib/zstd/lib/legacy/zstd_legacy.h +44 -12
  64. data/contrib/zstd/lib/legacy/zstd_v01.c +41 -10
  65. data/contrib/zstd/lib/legacy/zstd_v01.h +12 -7
  66. data/contrib/zstd/lib/legacy/zstd_v02.c +37 -12
  67. data/contrib/zstd/lib/legacy/zstd_v02.h +12 -7
  68. data/contrib/zstd/lib/legacy/zstd_v03.c +38 -12
  69. data/contrib/zstd/lib/legacy/zstd_v03.h +12 -7
  70. data/contrib/zstd/lib/legacy/zstd_v04.c +55 -174
  71. data/contrib/zstd/lib/legacy/zstd_v04.h +12 -7
  72. data/contrib/zstd/lib/legacy/zstd_v05.c +59 -31
  73. data/contrib/zstd/lib/legacy/zstd_v05.h +12 -7
  74. data/contrib/zstd/lib/legacy/zstd_v06.c +48 -20
  75. data/contrib/zstd/lib/legacy/zstd_v06.h +10 -5
  76. data/contrib/zstd/lib/legacy/zstd_v07.c +62 -29
  77. data/contrib/zstd/lib/legacy/zstd_v07.h +10 -5
  78. data/contrib/zstd/lib/zstd.h +1346 -832
  79. data/ext/extzstd.c +27 -19
  80. data/ext/extzstd_stream.c +20 -4
  81. data/ext/zstd_compress.c +1 -0
  82. data/ext/zstd_decompress.c +4 -0
  83. data/ext/zstd_dictbuilder.c +4 -0
  84. data/ext/zstd_dictbuilder_fastcover.c +5 -0
  85. data/lib/extzstd.rb +52 -220
  86. data/lib/extzstd/version.rb +1 -1
  87. metadata +21 -7
  88. data/contrib/zstd/circle.yml +0 -63
@@ -27,11 +27,19 @@
27
27
  extern "C" {
28
28
  #endif
29
29
 
30
+
30
31
  /*-*************************************
31
32
  * Constants
32
33
  ***************************************/
33
- static const U32 g_searchStrength = 8;
34
- #define HASH_READ_SIZE 8
34
+ #define kSearchStrength 8
35
+ #define HASH_READ_SIZE 8
36
+ #define ZSTD_DUBT_UNSORTED_MARK 1 /* For btlazy2 strategy, index 1 now means "unsorted".
37
+ It could be confused for a real successor at index "1", if sorted as larger than its predecessor.
38
+ It's not a big deal though : candidate will just be sorted again.
39
+ Additionally, candidate position 1 will be lost.
40
+ But candidate 1 cannot hide a large tree of candidates, so it's a minimal loss.
41
+ The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be mishandled after table re-use with a different strategy
42
+ Constant required by ZSTD_compressBlock_btlazy2() and ZSTD_reduceTable_internal() */
35
43
 
36
44
 
37
45
  /*-*************************************
@@ -43,19 +51,34 @@ typedef enum { zcss_init=0, zcss_load, zcss_flush } ZSTD_cStreamStage;
43
51
  typedef struct ZSTD_prefixDict_s {
44
52
  const void* dict;
45
53
  size_t dictSize;
46
- ZSTD_dictMode_e dictMode;
54
+ ZSTD_dictContentType_e dictContentType;
47
55
  } ZSTD_prefixDict;
48
56
 
49
57
  typedef struct {
50
- U32 hufCTable[HUF_CTABLE_SIZE_U32(255)];
58
+ void* dictBuffer;
59
+ void const* dict;
60
+ size_t dictSize;
61
+ ZSTD_dictContentType_e dictContentType;
62
+ ZSTD_CDict* cdict;
63
+ } ZSTD_localDict;
64
+
65
+ typedef struct {
66
+ U32 CTable[HUF_CTABLE_SIZE_U32(255)];
67
+ HUF_repeat repeatMode;
68
+ } ZSTD_hufCTables_t;
69
+
70
+ typedef struct {
51
71
  FSE_CTable offcodeCTable[FSE_CTABLE_SIZE_U32(OffFSELog, MaxOff)];
52
72
  FSE_CTable matchlengthCTable[FSE_CTABLE_SIZE_U32(MLFSELog, MaxML)];
53
73
  FSE_CTable litlengthCTable[FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL)];
54
- U32 workspace[HUF_WORKSPACE_SIZE_U32];
55
- HUF_repeat hufCTable_repeatMode;
56
74
  FSE_repeat offcode_repeatMode;
57
75
  FSE_repeat matchlength_repeatMode;
58
76
  FSE_repeat litlength_repeatMode;
77
+ } ZSTD_fseCTables_t;
78
+
79
+ typedef struct {
80
+ ZSTD_hufCTables_t huf;
81
+ ZSTD_fseCTables_t fse;
59
82
  } ZSTD_entropyCTables_t;
60
83
 
61
84
  typedef struct {
@@ -71,34 +94,71 @@ typedef struct {
71
94
  U32 rep[ZSTD_REP_NUM];
72
95
  } ZSTD_optimal_t;
73
96
 
97
+ typedef enum { zop_dynamic=0, zop_predef } ZSTD_OptPrice_e;
98
+
74
99
  typedef struct {
75
100
  /* All tables are allocated inside cctx->workspace by ZSTD_resetCCtx_internal() */
76
- U32* litFreq; /* table of literals statistics, of size 256 */
77
- U32* litLengthFreq; /* table of litLength statistics, of size (MaxLL+1) */
78
- U32* matchLengthFreq; /* table of matchLength statistics, of size (MaxML+1) */
79
- U32* offCodeFreq; /* table of offCode statistics, of size (MaxOff+1) */
80
- ZSTD_match_t* matchTable; /* list of found matches, of size ZSTD_OPT_NUM+1 */
81
- ZSTD_optimal_t* priceTable; /* All positions tracked by optimal parser, of size ZSTD_OPT_NUM+1 */
101
+ unsigned* litFreq; /* table of literals statistics, of size 256 */
102
+ unsigned* litLengthFreq; /* table of litLength statistics, of size (MaxLL+1) */
103
+ unsigned* matchLengthFreq; /* table of matchLength statistics, of size (MaxML+1) */
104
+ unsigned* offCodeFreq; /* table of offCode statistics, of size (MaxOff+1) */
105
+ ZSTD_match_t* matchTable; /* list of found matches, of size ZSTD_OPT_NUM+1 */
106
+ ZSTD_optimal_t* priceTable; /* All positions tracked by optimal parser, of size ZSTD_OPT_NUM+1 */
82
107
 
83
108
  U32 litSum; /* nb of literals */
84
109
  U32 litLengthSum; /* nb of litLength codes */
85
110
  U32 matchLengthSum; /* nb of matchLength codes */
86
111
  U32 offCodeSum; /* nb of offset codes */
87
- /* begin updated by ZSTD_setLog2Prices */
88
- U32 log2litSum; /* pow2 to compare log2(litfreq) to */
89
- U32 log2litLengthSum; /* pow2 to compare log2(llfreq) to */
90
- U32 log2matchLengthSum; /* pow2 to compare log2(mlfreq) to */
91
- U32 log2offCodeSum; /* pow2 to compare log2(offreq) to */
92
- /* end : updated by ZSTD_setLog2Prices */
93
- U32 staticPrices; /* prices follow a pre-defined cost structure, statistics are irrelevant */
112
+ U32 litSumBasePrice; /* to compare to log2(litfreq) */
113
+ U32 litLengthSumBasePrice; /* to compare to log2(llfreq) */
114
+ U32 matchLengthSumBasePrice;/* to compare to log2(mlfreq) */
115
+ U32 offCodeSumBasePrice; /* to compare to log2(offreq) */
116
+ ZSTD_OptPrice_e priceType; /* prices can be determined dynamically, or follow a pre-defined cost structure */
117
+ const ZSTD_entropyCTables_t* symbolCosts; /* pre-calculated dictionary statistics */
118
+ ZSTD_literalCompressionMode_e literalCompressionMode;
94
119
  } optState_t;
95
120
 
121
+ typedef struct {
122
+ ZSTD_entropyCTables_t entropy;
123
+ U32 rep[ZSTD_REP_NUM];
124
+ } ZSTD_compressedBlockState_t;
125
+
126
+ typedef struct {
127
+ BYTE const* nextSrc; /* next block here to continue on current prefix */
128
+ BYTE const* base; /* All regular indexes relative to this position */
129
+ BYTE const* dictBase; /* extDict indexes relative to this position */
130
+ U32 dictLimit; /* below that point, need extDict */
131
+ U32 lowLimit; /* below that point, no more data */
132
+ } ZSTD_window_t;
133
+
134
+ typedef struct ZSTD_matchState_t ZSTD_matchState_t;
135
+ struct ZSTD_matchState_t {
136
+ ZSTD_window_t window; /* State for window round buffer management */
137
+ U32 loadedDictEnd; /* index of end of dictionary */
138
+ U32 nextToUpdate; /* index from which to continue table update */
139
+ U32 nextToUpdate3; /* index from which to continue table update */
140
+ U32 hashLog3; /* dispatch table : larger == faster, more memory */
141
+ U32* hashTable;
142
+ U32* hashTable3;
143
+ U32* chainTable;
144
+ optState_t opt; /* optimal parser state */
145
+ const ZSTD_matchState_t * dictMatchState;
146
+ ZSTD_compressionParameters cParams;
147
+ };
148
+
149
+ typedef struct {
150
+ ZSTD_compressedBlockState_t* prevCBlock;
151
+ ZSTD_compressedBlockState_t* nextCBlock;
152
+ ZSTD_matchState_t matchState;
153
+ } ZSTD_blockState_t;
154
+
96
155
  typedef struct {
97
156
  U32 offset;
98
157
  U32 checksum;
99
158
  } ldmEntry_t;
100
159
 
101
160
  typedef struct {
161
+ ZSTD_window_t window; /* State for the window round buffer management */
102
162
  ldmEntry_t* hashTable;
103
163
  BYTE* bucketOffsets; /* Next position in bucket to insert entry */
104
164
  U64 hashPower; /* Used to compute the rolling hash.
@@ -110,61 +170,74 @@ typedef struct {
110
170
  U32 hashLog; /* Log size of hashTable */
111
171
  U32 bucketSizeLog; /* Log bucket size for collision resolution, at most 8 */
112
172
  U32 minMatchLength; /* Minimum match length */
113
- U32 hashEveryLog; /* Log number of entries to skip */
173
+ U32 hashRateLog; /* Log number of entries to skip */
174
+ U32 windowLog; /* Window log for the LDM */
114
175
  } ldmParams_t;
115
176
 
177
+ typedef struct {
178
+ U32 offset;
179
+ U32 litLength;
180
+ U32 matchLength;
181
+ } rawSeq;
182
+
183
+ typedef struct {
184
+ rawSeq* seq; /* The start of the sequences */
185
+ size_t pos; /* The position where reading stopped. <= size. */
186
+ size_t size; /* The number of sequences. <= capacity. */
187
+ size_t capacity; /* The capacity starting from `seq` pointer */
188
+ } rawSeqStore_t;
189
+
116
190
  struct ZSTD_CCtx_params_s {
117
191
  ZSTD_format_e format;
118
192
  ZSTD_compressionParameters cParams;
119
193
  ZSTD_frameParameters fParams;
120
194
 
121
195
  int compressionLevel;
122
- U32 forceWindow; /* force back-references to respect limit of
196
+ int forceWindow; /* force back-references to respect limit of
123
197
  * 1<<wLog, even for dictionary */
124
198
 
199
+ ZSTD_dictAttachPref_e attachDictPref;
200
+ ZSTD_literalCompressionMode_e literalCompressionMode;
201
+
125
202
  /* Multithreading: used to pass parameters to mtctx */
126
- U32 nbThreads;
127
- unsigned jobSize;
128
- unsigned overlapSizeLog;
203
+ int nbWorkers;
204
+ size_t jobSize;
205
+ int overlapLog;
206
+ int rsyncable;
129
207
 
130
208
  /* Long distance matching parameters */
131
209
  ldmParams_t ldmParams;
132
210
 
133
- /* For use with createCCtxParams() and freeCCtxParams() only */
211
+ /* Internal use, for createCCtxParams() and freeCCtxParams() only */
134
212
  ZSTD_customMem customMem;
135
-
136
213
  }; /* typedef'd to ZSTD_CCtx_params within "zstd.h" */
137
214
 
138
215
  struct ZSTD_CCtx_s {
139
- const BYTE* nextSrc; /* next block here to continue on current prefix */
140
- const BYTE* base; /* All regular indexes relative to this position */
141
- const BYTE* dictBase; /* extDict indexes relative to this position */
142
- U32 dictLimit; /* below that point, need extDict */
143
- U32 lowLimit; /* below that point, no more data */
144
- U32 nextToUpdate; /* index from which to continue dictionary update */
145
- U32 nextToUpdate3; /* index from which to continue dictionary update */
146
- U32 hashLog3; /* dispatch table : larger == faster, more memory */
147
- U32 loadedDictEnd; /* index of end of dictionary */
148
216
  ZSTD_compressionStage_e stage;
149
- U32 dictID;
217
+ int cParamsChanged; /* == 1 if cParams(except wlog) or compression level are changed in requestedParams. Triggers transmission of new params to ZSTDMT (if available) then reset to 0. */
218
+ int bmi2; /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */
150
219
  ZSTD_CCtx_params requestedParams;
151
220
  ZSTD_CCtx_params appliedParams;
221
+ U32 dictID;
222
+
223
+ int workSpaceOversizedDuration;
152
224
  void* workSpace;
153
225
  size_t workSpaceSize;
154
226
  size_t blockSize;
155
- U64 pledgedSrcSizePlusOne; /* this way, 0 (default) == unknown */
156
- U64 consumedSrcSize;
227
+ unsigned long long pledgedSrcSizePlusOne; /* this way, 0 (default) == unknown */
228
+ unsigned long long consumedSrcSize;
229
+ unsigned long long producedCSize;
157
230
  XXH64_state_t xxhState;
158
231
  ZSTD_customMem customMem;
159
232
  size_t staticSize;
160
233
 
161
- seqStore_t seqStore; /* sequences storage ptrs */
162
- optState_t optState;
163
- ldmState_t ldmState; /* long distance matching state */
164
- U32* hashTable;
165
- U32* hashTable3;
166
- U32* chainTable;
167
- ZSTD_entropyCTables_t* entropy;
234
+ seqStore_t seqStore; /* sequences storage ptrs */
235
+ ldmState_t ldmState; /* long distance matching state */
236
+ rawSeq* ldmSequences; /* Storage for the ldm output sequences */
237
+ size_t maxNbLdmSequences;
238
+ rawSeqStore_t externSeqStore; /* Mutable reference to external sequences */
239
+ ZSTD_blockState_t blockState;
240
+ U32* entropyWorkspace; /* entropy workspace of HUF_WORKSPACE_SIZE bytes */
168
241
 
169
242
  /* streaming */
170
243
  char* inBuff;
@@ -180,7 +253,7 @@ struct ZSTD_CCtx_s {
180
253
  U32 frameEnded;
181
254
 
182
255
  /* Dictionary */
183
- ZSTD_CDict* cdictLocal;
256
+ ZSTD_localDict localDict;
184
257
  const ZSTD_CDict* cdict;
185
258
  ZSTD_prefixDict prefixDict; /* single-usage dictionary */
186
259
 
@@ -190,6 +263,16 @@ struct ZSTD_CCtx_s {
190
263
  #endif
191
264
  };
192
265
 
266
+ typedef enum { ZSTD_dtlm_fast, ZSTD_dtlm_full } ZSTD_dictTableLoadMethod_e;
267
+
268
+ typedef enum { ZSTD_noDict = 0, ZSTD_extDict = 1, ZSTD_dictMatchState = 2 } ZSTD_dictMode_e;
269
+
270
+
271
+ typedef size_t (*ZSTD_blockCompressor) (
272
+ ZSTD_matchState_t* bs, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
273
+ void const* src, size_t srcSize);
274
+ ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_dictMode_e dictMode);
275
+
193
276
 
194
277
  MEM_STATIC U32 ZSTD_LLcode(U32 litLength)
195
278
  {
@@ -229,16 +312,18 @@ MEM_STATIC U32 ZSTD_MLcode(U32 mlBase)
229
312
  */
230
313
  MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const void* literals, U32 offsetCode, size_t mlBase)
231
314
  {
232
- #if defined(ZSTD_DEBUG) && (ZSTD_DEBUG >= 6)
315
+ #if defined(DEBUGLEVEL) && (DEBUGLEVEL >= 6)
233
316
  static const BYTE* g_start = NULL;
234
317
  if (g_start==NULL) g_start = (const BYTE*)literals; /* note : index only works for compression within a single segment */
235
318
  { U32 const pos = (U32)((const BYTE*)literals - g_start);
236
- DEBUGLOG(6, "Cpos%7u :%3u literals, match%3u bytes at dist.code%7u",
319
+ DEBUGLOG(6, "Cpos%7u :%3u literals, match%4u bytes at offCode%7u",
237
320
  pos, (U32)litLength, (U32)mlBase+MINMATCH, (U32)offsetCode);
238
321
  }
239
322
  #endif
323
+ assert((size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart) < seqStorePtr->maxNbSeq);
240
324
  /* copy Literals */
241
- assert(seqStorePtr->lit + litLength <= seqStorePtr->litStart + 128 KB);
325
+ assert(seqStorePtr->maxNbLit <= 128 KB);
326
+ assert(seqStorePtr->lit + litLength <= seqStorePtr->litStart + seqStorePtr->maxNbLit);
242
327
  ZSTD_wildcopy(seqStorePtr->lit, literals, litLength);
243
328
  seqStorePtr->lit += litLength;
244
329
 
@@ -359,21 +444,28 @@ MEM_STATIC size_t ZSTD_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* co
359
444
  }
360
445
 
361
446
  /** ZSTD_count_2segments() :
362
- * can count match length with `ip` & `match` in 2 different segments.
363
- * convention : on reaching mEnd, match count continue starting from iStart
364
- */
365
- MEM_STATIC size_t ZSTD_count_2segments(const BYTE* ip, const BYTE* match, const BYTE* iEnd, const BYTE* mEnd, const BYTE* iStart)
447
+ * can count match length with `ip` & `match` in 2 different segments.
448
+ * convention : on reaching mEnd, match count continue starting from iStart
449
+ */
450
+ MEM_STATIC size_t
451
+ ZSTD_count_2segments(const BYTE* ip, const BYTE* match,
452
+ const BYTE* iEnd, const BYTE* mEnd, const BYTE* iStart)
366
453
  {
367
454
  const BYTE* const vEnd = MIN( ip + (mEnd - match), iEnd);
368
455
  size_t const matchLength = ZSTD_count(ip, match, vEnd);
369
456
  if (match + matchLength != mEnd) return matchLength;
457
+ DEBUGLOG(7, "ZSTD_count_2segments: found a 2-parts match (current length==%zu)", matchLength);
458
+ DEBUGLOG(7, "distance from match beginning to end dictionary = %zi", mEnd - match);
459
+ DEBUGLOG(7, "distance from current pos to end buffer = %zi", iEnd - ip);
460
+ DEBUGLOG(7, "next byte : ip==%02X, istart==%02X", ip[matchLength], *iStart);
461
+ DEBUGLOG(7, "final match length = %zu", matchLength + ZSTD_count(ip+matchLength, iStart, iEnd));
370
462
  return matchLength + ZSTD_count(ip+matchLength, iStart, iEnd);
371
463
  }
372
464
 
373
465
 
374
466
  /*-*************************************
375
- * Hashes
376
- ***************************************/
467
+ * Hashes
468
+ ***************************************/
377
469
  static const U32 prime3bytes = 506832829U;
378
470
  static U32 ZSTD_hash3(U32 u, U32 h) { return ((u << (32-24)) * prime3bytes) >> (32-h) ; }
379
471
  MEM_STATIC size_t ZSTD_hash3Ptr(const void* ptr, U32 h) { return ZSTD_hash3(MEM_readLE32(ptr), h); } /* only in zstd_opt.h */
@@ -411,6 +503,290 @@ MEM_STATIC size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls)
411
503
  }
412
504
  }
413
505
 
506
+ /** ZSTD_ipow() :
507
+ * Return base^exponent.
508
+ */
509
+ static U64 ZSTD_ipow(U64 base, U64 exponent)
510
+ {
511
+ U64 power = 1;
512
+ while (exponent) {
513
+ if (exponent & 1) power *= base;
514
+ exponent >>= 1;
515
+ base *= base;
516
+ }
517
+ return power;
518
+ }
519
+
520
+ #define ZSTD_ROLL_HASH_CHAR_OFFSET 10
521
+
522
+ /** ZSTD_rollingHash_append() :
523
+ * Add the buffer to the hash value.
524
+ */
525
+ static U64 ZSTD_rollingHash_append(U64 hash, void const* buf, size_t size)
526
+ {
527
+ BYTE const* istart = (BYTE const*)buf;
528
+ size_t pos;
529
+ for (pos = 0; pos < size; ++pos) {
530
+ hash *= prime8bytes;
531
+ hash += istart[pos] + ZSTD_ROLL_HASH_CHAR_OFFSET;
532
+ }
533
+ return hash;
534
+ }
535
+
536
+ /** ZSTD_rollingHash_compute() :
537
+ * Compute the rolling hash value of the buffer.
538
+ */
539
+ MEM_STATIC U64 ZSTD_rollingHash_compute(void const* buf, size_t size)
540
+ {
541
+ return ZSTD_rollingHash_append(0, buf, size);
542
+ }
543
+
544
+ /** ZSTD_rollingHash_primePower() :
545
+ * Compute the primePower to be passed to ZSTD_rollingHash_rotate() for a hash
546
+ * over a window of length bytes.
547
+ */
548
+ MEM_STATIC U64 ZSTD_rollingHash_primePower(U32 length)
549
+ {
550
+ return ZSTD_ipow(prime8bytes, length - 1);
551
+ }
552
+
553
+ /** ZSTD_rollingHash_rotate() :
554
+ * Rotate the rolling hash by one byte.
555
+ */
556
+ MEM_STATIC U64 ZSTD_rollingHash_rotate(U64 hash, BYTE toRemove, BYTE toAdd, U64 primePower)
557
+ {
558
+ hash -= (toRemove + ZSTD_ROLL_HASH_CHAR_OFFSET) * primePower;
559
+ hash *= prime8bytes;
560
+ hash += toAdd + ZSTD_ROLL_HASH_CHAR_OFFSET;
561
+ return hash;
562
+ }
563
+
564
+ /*-*************************************
565
+ * Round buffer management
566
+ ***************************************/
567
+ /* Max current allowed */
568
+ #define ZSTD_CURRENT_MAX ((3U << 29) + (1U << ZSTD_WINDOWLOG_MAX))
569
+ /* Maximum chunk size before overflow correction needs to be called again */
570
+ #define ZSTD_CHUNKSIZE_MAX \
571
+ ( ((U32)-1) /* Maximum ending current index */ \
572
+ - ZSTD_CURRENT_MAX) /* Maximum beginning lowLimit */
573
+
574
+ /**
575
+ * ZSTD_window_clear():
576
+ * Clears the window containing the history by simply setting it to empty.
577
+ */
578
+ MEM_STATIC void ZSTD_window_clear(ZSTD_window_t* window)
579
+ {
580
+ size_t const endT = (size_t)(window->nextSrc - window->base);
581
+ U32 const end = (U32)endT;
582
+
583
+ window->lowLimit = end;
584
+ window->dictLimit = end;
585
+ }
586
+
587
+ /**
588
+ * ZSTD_window_hasExtDict():
589
+ * Returns non-zero if the window has a non-empty extDict.
590
+ */
591
+ MEM_STATIC U32 ZSTD_window_hasExtDict(ZSTD_window_t const window)
592
+ {
593
+ return window.lowLimit < window.dictLimit;
594
+ }
595
+
596
+ /**
597
+ * ZSTD_matchState_dictMode():
598
+ * Inspects the provided matchState and figures out what dictMode should be
599
+ * passed to the compressor.
600
+ */
601
+ MEM_STATIC ZSTD_dictMode_e ZSTD_matchState_dictMode(const ZSTD_matchState_t *ms)
602
+ {
603
+ return ZSTD_window_hasExtDict(ms->window) ?
604
+ ZSTD_extDict :
605
+ ms->dictMatchState != NULL ?
606
+ ZSTD_dictMatchState :
607
+ ZSTD_noDict;
608
+ }
609
+
610
+ /**
611
+ * ZSTD_window_needOverflowCorrection():
612
+ * Returns non-zero if the indices are getting too large and need overflow
613
+ * protection.
614
+ */
615
+ MEM_STATIC U32 ZSTD_window_needOverflowCorrection(ZSTD_window_t const window,
616
+ void const* srcEnd)
617
+ {
618
+ U32 const current = (U32)((BYTE const*)srcEnd - window.base);
619
+ return current > ZSTD_CURRENT_MAX;
620
+ }
621
+
622
+ /**
623
+ * ZSTD_window_correctOverflow():
624
+ * Reduces the indices to protect from index overflow.
625
+ * Returns the correction made to the indices, which must be applied to every
626
+ * stored index.
627
+ *
628
+ * The least significant cycleLog bits of the indices must remain the same,
629
+ * which may be 0. Every index up to maxDist in the past must be valid.
630
+ * NOTE: (maxDist & cycleMask) must be zero.
631
+ */
632
+ MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog,
633
+ U32 maxDist, void const* src)
634
+ {
635
+ /* preemptive overflow correction:
636
+ * 1. correction is large enough:
637
+ * lowLimit > (3<<29) ==> current > 3<<29 + 1<<windowLog
638
+ * 1<<windowLog <= newCurrent < 1<<chainLog + 1<<windowLog
639
+ *
640
+ * current - newCurrent
641
+ * > (3<<29 + 1<<windowLog) - (1<<windowLog + 1<<chainLog)
642
+ * > (3<<29) - (1<<chainLog)
643
+ * > (3<<29) - (1<<30) (NOTE: chainLog <= 30)
644
+ * > 1<<29
645
+ *
646
+ * 2. (ip+ZSTD_CHUNKSIZE_MAX - cctx->base) doesn't overflow:
647
+ * After correction, current is less than (1<<chainLog + 1<<windowLog).
648
+ * In 64-bit mode we are safe, because we have 64-bit ptrdiff_t.
649
+ * In 32-bit mode we are safe, because (chainLog <= 29), so
650
+ * ip+ZSTD_CHUNKSIZE_MAX - cctx->base < 1<<32.
651
+ * 3. (cctx->lowLimit + 1<<windowLog) < 1<<32:
652
+ * windowLog <= 31 ==> 3<<29 + 1<<windowLog < 7<<29 < 1<<32.
653
+ */
654
+ U32 const cycleMask = (1U << cycleLog) - 1;
655
+ U32 const current = (U32)((BYTE const*)src - window->base);
656
+ U32 const newCurrent = (current & cycleMask) + maxDist;
657
+ U32 const correction = current - newCurrent;
658
+ assert((maxDist & cycleMask) == 0);
659
+ assert(current > newCurrent);
660
+ /* Loose bound, should be around 1<<29 (see above) */
661
+ assert(correction > 1<<28);
662
+
663
+ window->base += correction;
664
+ window->dictBase += correction;
665
+ window->lowLimit -= correction;
666
+ window->dictLimit -= correction;
667
+
668
+ DEBUGLOG(4, "Correction of 0x%x bytes to lowLimit=0x%x", correction,
669
+ window->lowLimit);
670
+ return correction;
671
+ }
672
+
673
+ /**
674
+ * ZSTD_window_enforceMaxDist():
675
+ * Updates lowLimit so that:
676
+ * (srcEnd - base) - lowLimit == maxDist + loadedDictEnd
677
+ *
678
+ * This allows a simple check that index >= lowLimit to see if index is valid.
679
+ * This must be called before a block compression call, with srcEnd as the block
680
+ * source end.
681
+ *
682
+ * If loadedDictEndPtr is not NULL, we set it to zero once we update lowLimit.
683
+ * This is because dictionaries are allowed to be referenced as long as the last
684
+ * byte of the dictionary is in the window, but once they are out of range,
685
+ * they cannot be referenced. If loadedDictEndPtr is NULL, we use
686
+ * loadedDictEnd == 0.
687
+ *
688
+ * In normal dict mode, the dict is between lowLimit and dictLimit. In
689
+ * dictMatchState mode, lowLimit and dictLimit are the same, and the dictionary
690
+ * is below them. forceWindow and dictMatchState are therefore incompatible.
691
+ */
692
+ MEM_STATIC void
693
+ ZSTD_window_enforceMaxDist(ZSTD_window_t* window,
694
+ void const* srcEnd,
695
+ U32 maxDist,
696
+ U32* loadedDictEndPtr,
697
+ const ZSTD_matchState_t** dictMatchStatePtr)
698
+ {
699
+ U32 const blockEndIdx = (U32)((BYTE const*)srcEnd - window->base);
700
+ U32 loadedDictEnd = (loadedDictEndPtr != NULL) ? *loadedDictEndPtr : 0;
701
+ DEBUGLOG(5, "ZSTD_window_enforceMaxDist: blockEndIdx=%u, maxDist=%u",
702
+ (unsigned)blockEndIdx, (unsigned)maxDist);
703
+ if (blockEndIdx > maxDist + loadedDictEnd) {
704
+ U32 const newLowLimit = blockEndIdx - maxDist;
705
+ if (window->lowLimit < newLowLimit) window->lowLimit = newLowLimit;
706
+ if (window->dictLimit < window->lowLimit) {
707
+ DEBUGLOG(5, "Update dictLimit to match lowLimit, from %u to %u",
708
+ (unsigned)window->dictLimit, (unsigned)window->lowLimit);
709
+ window->dictLimit = window->lowLimit;
710
+ }
711
+ if (loadedDictEndPtr)
712
+ *loadedDictEndPtr = 0;
713
+ if (dictMatchStatePtr)
714
+ *dictMatchStatePtr = NULL;
715
+ }
716
+ }
717
+
718
+ /**
719
+ * ZSTD_window_update():
720
+ * Updates the window by appending [src, src + srcSize) to the window.
721
+ * If it is not contiguous, the current prefix becomes the extDict, and we
722
+ * forget about the extDict. Handles overlap of the prefix and extDict.
723
+ * Returns non-zero if the segment is contiguous.
724
+ */
725
+ MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window,
726
+ void const* src, size_t srcSize)
727
+ {
728
+ BYTE const* const ip = (BYTE const*)src;
729
+ U32 contiguous = 1;
730
+ DEBUGLOG(5, "ZSTD_window_update");
731
+ /* Check if blocks follow each other */
732
+ if (src != window->nextSrc) {
733
+ /* not contiguous */
734
+ size_t const distanceFromBase = (size_t)(window->nextSrc - window->base);
735
+ DEBUGLOG(5, "Non contiguous blocks, new segment starts at %u", window->dictLimit);
736
+ window->lowLimit = window->dictLimit;
737
+ assert(distanceFromBase == (size_t)(U32)distanceFromBase); /* should never overflow */
738
+ window->dictLimit = (U32)distanceFromBase;
739
+ window->dictBase = window->base;
740
+ window->base = ip - distanceFromBase;
741
+ // ms->nextToUpdate = window->dictLimit;
742
+ if (window->dictLimit - window->lowLimit < HASH_READ_SIZE) window->lowLimit = window->dictLimit; /* too small extDict */
743
+ contiguous = 0;
744
+ }
745
+ window->nextSrc = ip + srcSize;
746
+ /* if input and dictionary overlap : reduce dictionary (area presumed modified by input) */
747
+ if ( (ip+srcSize > window->dictBase + window->lowLimit)
748
+ & (ip < window->dictBase + window->dictLimit)) {
749
+ ptrdiff_t const highInputIdx = (ip + srcSize) - window->dictBase;
750
+ U32 const lowLimitMax = (highInputIdx > (ptrdiff_t)window->dictLimit) ? window->dictLimit : (U32)highInputIdx;
751
+ window->lowLimit = lowLimitMax;
752
+ DEBUGLOG(5, "Overlapping extDict and input : new lowLimit = %u", window->lowLimit);
753
+ }
754
+ return contiguous;
755
+ }
756
+
757
+
758
+ /* debug functions */
759
+ #if (DEBUGLEVEL>=2)
760
+
761
+ MEM_STATIC double ZSTD_fWeight(U32 rawStat)
762
+ {
763
+ U32 const fp_accuracy = 8;
764
+ U32 const fp_multiplier = (1 << fp_accuracy);
765
+ U32 const newStat = rawStat + 1;
766
+ U32 const hb = ZSTD_highbit32(newStat);
767
+ U32 const BWeight = hb * fp_multiplier;
768
+ U32 const FWeight = (newStat << fp_accuracy) >> hb;
769
+ U32 const weight = BWeight + FWeight;
770
+ assert(hb + fp_accuracy < 31);
771
+ return (double)weight / fp_multiplier;
772
+ }
773
+
774
+ /* display a table content,
775
+ * listing each element, its frequency, and its predicted bit cost */
776
+ MEM_STATIC void ZSTD_debugTable(const U32* table, U32 max)
777
+ {
778
+ unsigned u, sum;
779
+ for (u=0, sum=0; u<=max; u++) sum += table[u];
780
+ DEBUGLOG(2, "total nb elts: %u", sum);
781
+ for (u=0; u<=max; u++) {
782
+ DEBUGLOG(2, "%2u: %5u (%.2f)",
783
+ u, table[u], ZSTD_fWeight(sum) - ZSTD_fWeight(table[u]) );
784
+ }
785
+ }
786
+
787
+ #endif
788
+
789
+
414
790
  #if defined (__cplusplus)
415
791
  }
416
792
  #endif
@@ -421,6 +797,13 @@ MEM_STATIC size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls)
421
797
  * These prototypes shall only be called from within lib/compress
422
798
  * ============================================================== */
423
799
 
800
+ /* ZSTD_getCParamsFromCCtxParams() :
801
+ * cParams are built depending on compressionLevel, src size hints,
802
+ * LDM and manually set compression parameters.
803
+ */
804
+ ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams(
805
+ const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize);
806
+
424
807
  /*! ZSTD_initCStream_internal() :
425
808
  * Private use only. Init streaming operation.
426
809
  * expects params to be valid.
@@ -431,12 +814,7 @@ size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs,
431
814
  const ZSTD_CDict* cdict,
432
815
  ZSTD_CCtx_params params, unsigned long long pledgedSrcSize);
433
816
 
434
- /*! ZSTD_compressStream_generic() :
435
- * Private use only. To be called from zstdmt_compress.c in single-thread mode. */
436
- size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,
437
- ZSTD_outBuffer* output,
438
- ZSTD_inBuffer* input,
439
- ZSTD_EndDirective const flushMode);
817
+ void ZSTD_resetSeqStore(seqStore_t* ssPtr);
440
818
 
441
819
  /*! ZSTD_getCParamsFromCDict() :
442
820
  * as the name implies */
@@ -446,7 +824,8 @@ ZSTD_compressionParameters ZSTD_getCParamsFromCDict(const ZSTD_CDict* cdict);
446
824
  * Private use only. To be called from zstdmt_compress.c. */
447
825
  size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx,
448
826
  const void* dict, size_t dictSize,
449
- ZSTD_dictMode_e dictMode,
827
+ ZSTD_dictContentType_e dictContentType,
828
+ ZSTD_dictTableLoadMethod_e dtlm,
450
829
  const ZSTD_CDict* cdict,
451
830
  ZSTD_CCtx_params params,
452
831
  unsigned long long pledgedSrcSize);
@@ -459,4 +838,26 @@ size_t ZSTD_compress_advanced_internal(ZSTD_CCtx* cctx,
459
838
  const void* dict,size_t dictSize,
460
839
  ZSTD_CCtx_params params);
461
840
 
841
+
842
+ /* ZSTD_writeLastEmptyBlock() :
843
+ * output an empty Block with end-of-frame mark to complete a frame
844
+ * @return : size of data written into `dst` (== ZSTD_blockHeaderSize (defined in zstd_internal.h))
845
+ * or an error code if `dstCapacity` is too small (<ZSTD_blockHeaderSize)
846
+ */
847
+ size_t ZSTD_writeLastEmptyBlock(void* dst, size_t dstCapacity);
848
+
849
+
850
+ /* ZSTD_referenceExternalSequences() :
851
+ * Must be called before starting a compression operation.
852
+ * seqs must parse a prefix of the source.
853
+ * This cannot be used when long range matching is enabled.
854
+ * Zstd will use these sequences, and pass the literals to a secondary block
855
+ * compressor.
856
+ * @return : An error code on failure.
857
+ * NOTE: seqs are not verified! Invalid sequences can cause out-of-bounds memory
858
+ * access and data corruption.
859
+ */
860
+ size_t ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq);
861
+
862
+
462
863
  #endif /* ZSTD_COMPRESS_H */