zstdlib 0.2.0 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (63) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGES.md +5 -0
  3. data/ext/zstdlib/extconf.rb +1 -1
  4. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.2}/lib/common/bitstream.h +0 -0
  5. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.2}/lib/common/compiler.h +7 -0
  6. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.2}/lib/common/cpu.h +0 -0
  7. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.2}/lib/common/debug.c +0 -0
  8. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.2}/lib/common/debug.h +0 -0
  9. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.2}/lib/common/entropy_common.c +0 -0
  10. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.2}/lib/common/error_private.c +0 -0
  11. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.2}/lib/common/error_private.h +0 -0
  12. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.2}/lib/common/fse.h +0 -0
  13. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.2}/lib/common/fse_decompress.c +0 -0
  14. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.2}/lib/common/huf.h +0 -0
  15. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.2}/lib/common/mem.h +0 -0
  16. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.2}/lib/common/pool.c +0 -0
  17. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.2}/lib/common/pool.h +0 -0
  18. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.2}/lib/common/threading.c +0 -0
  19. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.2}/lib/common/threading.h +0 -0
  20. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.2}/lib/common/xxhash.c +0 -0
  21. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.2}/lib/common/xxhash.h +0 -0
  22. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.2}/lib/common/zstd_common.c +0 -0
  23. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.2}/lib/common/zstd_errors.h +0 -0
  24. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.2}/lib/common/zstd_internal.h +58 -6
  25. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.2}/lib/compress/fse_compress.c +0 -0
  26. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.2}/lib/compress/hist.c +0 -0
  27. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.2}/lib/compress/hist.h +0 -0
  28. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.2}/lib/compress/huf_compress.c +0 -0
  29. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.2}/lib/compress/zstd_compress.c +178 -691
  30. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.2}/lib/compress/zstd_compress_internal.h +98 -30
  31. data/ext/zstdlib/zstd-1.4.2/lib/compress/zstd_compress_literals.c +149 -0
  32. data/ext/zstdlib/zstd-1.4.2/lib/compress/zstd_compress_literals.h +29 -0
  33. data/ext/zstdlib/zstd-1.4.2/lib/compress/zstd_compress_sequences.c +415 -0
  34. data/ext/zstdlib/zstd-1.4.2/lib/compress/zstd_compress_sequences.h +47 -0
  35. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.2}/lib/compress/zstd_double_fast.c +56 -36
  36. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.2}/lib/compress/zstd_double_fast.h +0 -0
  37. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.2}/lib/compress/zstd_fast.c +35 -14
  38. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.2}/lib/compress/zstd_fast.h +0 -0
  39. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.2}/lib/compress/zstd_lazy.c +10 -5
  40. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.2}/lib/compress/zstd_lazy.h +0 -0
  41. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.2}/lib/compress/zstd_ldm.c +1 -1
  42. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.2}/lib/compress/zstd_ldm.h +0 -0
  43. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.2}/lib/compress/zstd_opt.c +45 -32
  44. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.2}/lib/compress/zstd_opt.h +0 -0
  45. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.2}/lib/compress/zstdmt_compress.c +18 -7
  46. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.2}/lib/compress/zstdmt_compress.h +1 -0
  47. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.2}/lib/decompress/huf_decompress.c +0 -0
  48. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.2}/lib/decompress/zstd_ddict.c +0 -0
  49. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.2}/lib/decompress/zstd_ddict.h +0 -0
  50. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.2}/lib/decompress/zstd_decompress.c +14 -9
  51. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.2}/lib/decompress/zstd_decompress_block.c +20 -9
  52. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.2}/lib/decompress/zstd_decompress_block.h +0 -0
  53. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.2}/lib/decompress/zstd_decompress_internal.h +0 -0
  54. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.2}/lib/zstd.h +53 -21
  55. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.2}/zlibWrapper/gzclose.c +0 -0
  56. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.2}/zlibWrapper/gzcompatibility.h +0 -0
  57. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.2}/zlibWrapper/gzguts.h +0 -0
  58. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.2}/zlibWrapper/gzlib.c +0 -0
  59. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.2}/zlibWrapper/gzread.c +0 -0
  60. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.2}/zlibWrapper/gzwrite.c +0 -0
  61. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.2}/zlibWrapper/zstd_zlibwrapper.c +0 -0
  62. data/ext/zstdlib/{zstd-1.4.0 → zstd-1.4.2}/zlibWrapper/zstd_zlibwrapper.h +0 -0
  63. metadata +62 -59
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: d1ae0ce76a7d5ef6a298471266269bdfd190923109bf9a4fcab5e6d8d3f9c0b4
4
- data.tar.gz: 6b6d70cf881e5b8815fece6d37395273dc79d5f8539452a28a430699f9c0866b
3
+ metadata.gz: d1228cdf4a42f79889510be54f47660a0b9a51c78d19d3e68da0a84a827ca09a
4
+ data.tar.gz: 29be5ad1fc9312c34b326e13c20ffb26eba594c587306e23593acd0f845fc346
5
5
  SHA512:
6
- metadata.gz: 93490af691f38ed3429f6d22c3ee9efa5ddbb51d8413ca432b2988cfb2ace2d3f4faa344d56345ffd1a67e6c250f3dde0c376d3eebb783194fc311096d90379c
7
- data.tar.gz: 943827c4d0aad344234b1ec1337cd2554d9e798b74cd0f6d27412f4448b9c291f9a61da3bb52dad07c4a81ca8dabfa01d5120d7b671cf4ae5d462c8387b62c89
6
+ metadata.gz: fcbbd01b939d15cb07706749b99954e04c7d782a3ed3f433371977d0070a7a061f2daa98b2f25545672e1b0ff961903c7712c2ec7f747c90db809f187c0b7ba2
7
+ data.tar.gz: f5dca7f3c0f237257923b44020faf1093ee14f85f419972cdcc9ff6e3a1c37fae9549335ea5f24fb18c416c9f2a042feee2dd7ffddcc554604c7a4a14ce54d42
data/CHANGES.md CHANGED
@@ -1,3 +1,8 @@
1
+ # 0.3.0
2
+
3
+ Zstd version update to `1.4.2`
4
+
5
+
1
6
  # 0.2.0
2
7
 
3
8
  Zstd version update to `1.4.0`
@@ -6,7 +6,7 @@ require 'fileutils'
6
6
  include RbConfig
7
7
  include FileUtils
8
8
 
9
- ZSTD_VERSION = '1.4.0'
9
+ ZSTD_VERSION = '1.4.2'
10
10
  ZLIB_VERSION = '1.2.11'
11
11
  RB_VERSION = CONFIG['MAJOR']+'.'+CONFIG['MINOR']
12
12
  ZMOD_VERSION = RB_VERSION >= '2.3' ? '2.6' : RB_VERSION # Review requirements with every new zlib module release!
@@ -127,6 +127,13 @@
127
127
  } \
128
128
  }
129
129
 
130
+ /* vectorization */
131
+ #if !defined(__clang__) && defined(__GNUC__)
132
+ # define DONT_VECTORIZE __attribute__((optimize("no-tree-vectorize")))
133
+ #else
134
+ # define DONT_VECTORIZE
135
+ #endif
136
+
130
137
  /* disable warnings */
131
138
  #ifdef _MSC_VER /* Visual Studio */
132
139
  # include <intrin.h> /* For Visual 2005 */
@@ -34,7 +34,6 @@
34
34
  #endif
35
35
  #include "xxhash.h" /* XXH_reset, update, digest */
36
36
 
37
-
38
37
  #if defined (__cplusplus)
39
38
  extern "C" {
40
39
  #endif
@@ -193,19 +192,72 @@ static const U32 OF_defaultNormLog = OF_DEFAULTNORMLOG;
193
192
  * Shared functions to include for inlining
194
193
  *********************************************/
195
194
  static void ZSTD_copy8(void* dst, const void* src) { memcpy(dst, src, 8); }
195
+
196
196
  #define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; }
197
+ static void ZSTD_copy16(void* dst, const void* src) { memcpy(dst, src, 16); }
198
+ #define COPY16(d,s) { ZSTD_copy16(d,s); d+=16; s+=16; }
199
+
200
+ #define WILDCOPY_OVERLENGTH 8
201
+ #define VECLEN 16
202
+
203
+ typedef enum {
204
+ ZSTD_no_overlap,
205
+ ZSTD_overlap_src_before_dst,
206
+ /* ZSTD_overlap_dst_before_src, */
207
+ } ZSTD_overlap_e;
197
208
 
198
209
  /*! ZSTD_wildcopy() :
199
210
  * custom version of memcpy(), can overwrite up to WILDCOPY_OVERLENGTH bytes (if length==0) */
200
- #define WILDCOPY_OVERLENGTH 8
201
- MEM_STATIC void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length)
211
+ MEM_STATIC FORCE_INLINE_ATTR DONT_VECTORIZE
212
+ void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e ovtype)
202
213
  {
214
+ ptrdiff_t diff = (BYTE*)dst - (const BYTE*)src;
203
215
  const BYTE* ip = (const BYTE*)src;
204
216
  BYTE* op = (BYTE*)dst;
205
217
  BYTE* const oend = op + length;
206
- do
207
- COPY8(op, ip)
208
- while (op < oend);
218
+
219
+ assert(diff >= 8 || (ovtype == ZSTD_no_overlap && diff < -8));
220
+ if (length < VECLEN || (ovtype == ZSTD_overlap_src_before_dst && diff < VECLEN)) {
221
+ do
222
+ COPY8(op, ip)
223
+ while (op < oend);
224
+ }
225
+ else {
226
+ if ((length & 8) == 0)
227
+ COPY8(op, ip);
228
+ do {
229
+ COPY16(op, ip);
230
+ }
231
+ while (op < oend);
232
+ }
233
+ }
234
+
235
+ /*! ZSTD_wildcopy_16min() :
236
+ * same semantics as ZSTD_wilcopy() except guaranteed to be able to copy 16 bytes at the start */
237
+ MEM_STATIC FORCE_INLINE_ATTR DONT_VECTORIZE
238
+ void ZSTD_wildcopy_16min(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e ovtype)
239
+ {
240
+ ptrdiff_t diff = (BYTE*)dst - (const BYTE*)src;
241
+ const BYTE* ip = (const BYTE*)src;
242
+ BYTE* op = (BYTE*)dst;
243
+ BYTE* const oend = op + length;
244
+
245
+ assert(length >= 8);
246
+ assert(diff >= 8 || (ovtype == ZSTD_no_overlap && diff < -8));
247
+
248
+ if (ovtype == ZSTD_overlap_src_before_dst && diff < VECLEN) {
249
+ do
250
+ COPY8(op, ip)
251
+ while (op < oend);
252
+ }
253
+ else {
254
+ if ((length & 8) == 0)
255
+ COPY8(op, ip);
256
+ do {
257
+ COPY16(op, ip);
258
+ }
259
+ while (op < oend);
260
+ }
209
261
  }
210
262
 
211
263
  MEM_STATIC void ZSTD_wildcopy_e(void* dst, const void* src, void* dstEnd) /* should be faster for decoding, but strangely, not verified on all platform */
@@ -21,6 +21,8 @@
21
21
  #define HUF_STATIC_LINKING_ONLY
22
22
  #include "huf.h"
23
23
  #include "zstd_compress_internal.h"
24
+ #include "zstd_compress_sequences.h"
25
+ #include "zstd_compress_literals.h"
24
26
  #include "zstd_fast.h"
25
27
  #include "zstd_double_fast.h"
26
28
  #include "zstd_lazy.h"
@@ -385,6 +387,11 @@ ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param)
385
387
  bounds.upperBound = ZSTD_lcm_uncompressed;
386
388
  return bounds;
387
389
 
390
+ case ZSTD_c_targetCBlockSize:
391
+ bounds.lowerBound = ZSTD_TARGETCBLOCKSIZE_MIN;
392
+ bounds.upperBound = ZSTD_TARGETCBLOCKSIZE_MAX;
393
+ return bounds;
394
+
388
395
  default:
389
396
  { ZSTD_bounds const boundError = { ERROR(parameter_unsupported), 0, 0 };
390
397
  return boundError;
@@ -392,18 +399,6 @@ ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param)
392
399
  }
393
400
  }
394
401
 
395
- /* ZSTD_cParam_withinBounds:
396
- * @return 1 if value is within cParam bounds,
397
- * 0 otherwise */
398
- static int ZSTD_cParam_withinBounds(ZSTD_cParameter cParam, int value)
399
- {
400
- ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam);
401
- if (ZSTD_isError(bounds.error)) return 0;
402
- if (value < bounds.lowerBound) return 0;
403
- if (value > bounds.upperBound) return 0;
404
- return 1;
405
- }
406
-
407
402
  /* ZSTD_cParam_clampBounds:
408
403
  * Clamps the value into the bounded range.
409
404
  */
@@ -452,6 +447,7 @@ static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param)
452
447
  case ZSTD_c_ldmHashRateLog:
453
448
  case ZSTD_c_forceAttachDict:
454
449
  case ZSTD_c_literalCompressionMode:
450
+ case ZSTD_c_targetCBlockSize:
455
451
  default:
456
452
  return 0;
457
453
  }
@@ -497,6 +493,7 @@ size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value)
497
493
  case ZSTD_c_ldmHashLog:
498
494
  case ZSTD_c_ldmMinMatch:
499
495
  case ZSTD_c_ldmBucketSizeLog:
496
+ case ZSTD_c_targetCBlockSize:
500
497
  break;
501
498
 
502
499
  default: RETURN_ERROR(parameter_unsupported);
@@ -671,6 +668,12 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
671
668
  CCtxParams->ldmParams.hashRateLog = value;
672
669
  return CCtxParams->ldmParams.hashRateLog;
673
670
 
671
+ case ZSTD_c_targetCBlockSize :
672
+ if (value!=0) /* 0 ==> default */
673
+ BOUNDCHECK(ZSTD_c_targetCBlockSize, value);
674
+ CCtxParams->targetCBlockSize = value;
675
+ return CCtxParams->targetCBlockSize;
676
+
674
677
  default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
675
678
  }
676
679
  }
@@ -692,13 +695,13 @@ size_t ZSTD_CCtxParams_getParameter(
692
695
  *value = CCtxParams->compressionLevel;
693
696
  break;
694
697
  case ZSTD_c_windowLog :
695
- *value = CCtxParams->cParams.windowLog;
698
+ *value = (int)CCtxParams->cParams.windowLog;
696
699
  break;
697
700
  case ZSTD_c_hashLog :
698
- *value = CCtxParams->cParams.hashLog;
701
+ *value = (int)CCtxParams->cParams.hashLog;
699
702
  break;
700
703
  case ZSTD_c_chainLog :
701
- *value = CCtxParams->cParams.chainLog;
704
+ *value = (int)CCtxParams->cParams.chainLog;
702
705
  break;
703
706
  case ZSTD_c_searchLog :
704
707
  *value = CCtxParams->cParams.searchLog;
@@ -773,6 +776,9 @@ size_t ZSTD_CCtxParams_getParameter(
773
776
  case ZSTD_c_ldmHashRateLog :
774
777
  *value = CCtxParams->ldmParams.hashRateLog;
775
778
  break;
779
+ case ZSTD_c_targetCBlockSize :
780
+ *value = (int)CCtxParams->targetCBlockSize;
781
+ break;
776
782
  default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
777
783
  }
778
784
  return 0;
@@ -930,12 +936,12 @@ size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ZSTD_ResetDirective reset)
930
936
  @return : 0, or an error code if one value is beyond authorized range */
931
937
  size_t ZSTD_checkCParams(ZSTD_compressionParameters cParams)
932
938
  {
933
- BOUNDCHECK(ZSTD_c_windowLog, cParams.windowLog);
934
- BOUNDCHECK(ZSTD_c_chainLog, cParams.chainLog);
935
- BOUNDCHECK(ZSTD_c_hashLog, cParams.hashLog);
936
- BOUNDCHECK(ZSTD_c_searchLog, cParams.searchLog);
937
- BOUNDCHECK(ZSTD_c_minMatch, cParams.minMatch);
938
- BOUNDCHECK(ZSTD_c_targetLength,cParams.targetLength);
939
+ BOUNDCHECK(ZSTD_c_windowLog, (int)cParams.windowLog);
940
+ BOUNDCHECK(ZSTD_c_chainLog, (int)cParams.chainLog);
941
+ BOUNDCHECK(ZSTD_c_hashLog, (int)cParams.hashLog);
942
+ BOUNDCHECK(ZSTD_c_searchLog, (int)cParams.searchLog);
943
+ BOUNDCHECK(ZSTD_c_minMatch, (int)cParams.minMatch);
944
+ BOUNDCHECK(ZSTD_c_targetLength,(int)cParams.targetLength);
939
945
  BOUNDCHECK(ZSTD_c_strategy, cParams.strategy);
940
946
  return 0;
941
947
  }
@@ -951,7 +957,7 @@ ZSTD_clampCParams(ZSTD_compressionParameters cParams)
951
957
  if ((int)val<bounds.lowerBound) val=(type)bounds.lowerBound; \
952
958
  else if ((int)val>bounds.upperBound) val=(type)bounds.upperBound; \
953
959
  }
954
- # define CLAMP(cParam, val) CLAMP_TYPE(cParam, val, int)
960
+ # define CLAMP(cParam, val) CLAMP_TYPE(cParam, val, unsigned)
955
961
  CLAMP(ZSTD_c_windowLog, cParams.windowLog);
956
962
  CLAMP(ZSTD_c_chainLog, cParams.chainLog);
957
963
  CLAMP(ZSTD_c_hashLog, cParams.hashLog);
@@ -1282,15 +1288,14 @@ static void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs)
1282
1288
  }
1283
1289
 
1284
1290
  /*! ZSTD_invalidateMatchState()
1285
- * Invalidate all the matches in the match finder tables.
1286
- * Requires nextSrc and base to be set (can be NULL).
1291
+ * Invalidate all the matches in the match finder tables.
1292
+ * Requires nextSrc and base to be set (can be NULL).
1287
1293
  */
1288
1294
  static void ZSTD_invalidateMatchState(ZSTD_matchState_t* ms)
1289
1295
  {
1290
1296
  ZSTD_window_clear(&ms->window);
1291
1297
 
1292
1298
  ms->nextToUpdate = ms->window.dictLimit;
1293
- ms->nextToUpdate3 = ms->window.dictLimit;
1294
1299
  ms->loadedDictEnd = 0;
1295
1300
  ms->opt.litLengthSum = 0; /* force reset of btopt stats */
1296
1301
  ms->dictMatchState = NULL;
@@ -1327,15 +1332,17 @@ static size_t ZSTD_continueCCtx(ZSTD_CCtx* cctx, ZSTD_CCtx_params params, U64 pl
1327
1332
 
1328
1333
  typedef enum { ZSTDcrp_continue, ZSTDcrp_noMemset } ZSTD_compResetPolicy_e;
1329
1334
 
1335
+ typedef enum { ZSTD_resetTarget_CDict, ZSTD_resetTarget_CCtx } ZSTD_resetTarget_e;
1336
+
1330
1337
  static void*
1331
1338
  ZSTD_reset_matchState(ZSTD_matchState_t* ms,
1332
1339
  void* ptr,
1333
1340
  const ZSTD_compressionParameters* cParams,
1334
- ZSTD_compResetPolicy_e const crp, U32 const forCCtx)
1341
+ ZSTD_compResetPolicy_e const crp, ZSTD_resetTarget_e const forWho)
1335
1342
  {
1336
1343
  size_t const chainSize = (cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cParams->chainLog);
1337
1344
  size_t const hSize = ((size_t)1) << cParams->hashLog;
1338
- U32 const hashLog3 = (forCCtx && cParams->minMatch==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0;
1345
+ U32 const hashLog3 = ((forWho == ZSTD_resetTarget_CCtx) && cParams->minMatch==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0;
1339
1346
  size_t const h3Size = ((size_t)1) << hashLog3;
1340
1347
  size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32);
1341
1348
 
@@ -1349,7 +1356,7 @@ ZSTD_reset_matchState(ZSTD_matchState_t* ms,
1349
1356
  ZSTD_invalidateMatchState(ms);
1350
1357
 
1351
1358
  /* opt parser space */
1352
- if (forCCtx && (cParams->strategy >= ZSTD_btopt)) {
1359
+ if ((forWho == ZSTD_resetTarget_CCtx) && (cParams->strategy >= ZSTD_btopt)) {
1353
1360
  DEBUGLOG(4, "reserving optimal parser space");
1354
1361
  ms->opt.litFreq = (unsigned*)ptr;
1355
1362
  ms->opt.litLengthFreq = ms->opt.litFreq + (1<<Litbits);
@@ -1377,6 +1384,19 @@ ZSTD_reset_matchState(ZSTD_matchState_t* ms,
1377
1384
  return ptr;
1378
1385
  }
1379
1386
 
1387
+ /* ZSTD_indexTooCloseToMax() :
1388
+ * minor optimization : prefer memset() rather than reduceIndex()
1389
+ * which is measurably slow in some circumstances (reported for Visual Studio).
1390
+ * Works when re-using a context for a lot of smallish inputs :
1391
+ * if all inputs are smaller than ZSTD_INDEXOVERFLOW_MARGIN,
1392
+ * memset() will be triggered before reduceIndex().
1393
+ */
1394
+ #define ZSTD_INDEXOVERFLOW_MARGIN (16 MB)
1395
+ static int ZSTD_indexTooCloseToMax(ZSTD_window_t w)
1396
+ {
1397
+ return (size_t)(w.nextSrc - w.base) > (ZSTD_CURRENT_MAX - ZSTD_INDEXOVERFLOW_MARGIN);
1398
+ }
1399
+
1380
1400
  #define ZSTD_WORKSPACETOOLARGE_FACTOR 3 /* define "workspace is too large" as this number of times larger than needed */
1381
1401
  #define ZSTD_WORKSPACETOOLARGE_MAXDURATION 128 /* when workspace is continuously too large
1382
1402
  * during at least this number of times,
@@ -1388,7 +1408,7 @@ ZSTD_reset_matchState(ZSTD_matchState_t* ms,
1388
1408
  note : `params` are assumed fully validated at this stage */
1389
1409
  static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
1390
1410
  ZSTD_CCtx_params params,
1391
- U64 pledgedSrcSize,
1411
+ U64 const pledgedSrcSize,
1392
1412
  ZSTD_compResetPolicy_e const crp,
1393
1413
  ZSTD_buffered_policy_e const zbuff)
1394
1414
  {
@@ -1400,13 +1420,21 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
1400
1420
  if (ZSTD_equivalentParams(zc->appliedParams, params,
1401
1421
  zc->inBuffSize,
1402
1422
  zc->seqStore.maxNbSeq, zc->seqStore.maxNbLit,
1403
- zbuff, pledgedSrcSize)) {
1404
- DEBUGLOG(4, "ZSTD_equivalentParams()==1 -> continue mode (wLog1=%u, blockSize1=%zu)",
1405
- zc->appliedParams.cParams.windowLog, zc->blockSize);
1423
+ zbuff, pledgedSrcSize) ) {
1424
+ DEBUGLOG(4, "ZSTD_equivalentParams()==1 -> consider continue mode");
1406
1425
  zc->workSpaceOversizedDuration += (zc->workSpaceOversizedDuration > 0); /* if it was too large, it still is */
1407
- if (zc->workSpaceOversizedDuration <= ZSTD_WORKSPACETOOLARGE_MAXDURATION)
1426
+ if (zc->workSpaceOversizedDuration <= ZSTD_WORKSPACETOOLARGE_MAXDURATION) {
1427
+ DEBUGLOG(4, "continue mode confirmed (wLog1=%u, blockSize1=%zu)",
1428
+ zc->appliedParams.cParams.windowLog, zc->blockSize);
1429
+ if (ZSTD_indexTooCloseToMax(zc->blockState.matchState.window)) {
1430
+ /* prefer a reset, faster than a rescale */
1431
+ ZSTD_reset_matchState(&zc->blockState.matchState,
1432
+ zc->entropyWorkspace + HUF_WORKSPACE_SIZE_U32,
1433
+ &params.cParams,
1434
+ crp, ZSTD_resetTarget_CCtx);
1435
+ }
1408
1436
  return ZSTD_continueCCtx(zc, params, pledgedSrcSize);
1409
- } }
1437
+ } } }
1410
1438
  DEBUGLOG(4, "ZSTD_equivalentParams()==0 -> reset CCtx");
1411
1439
 
1412
1440
  if (params.ldmParams.enableLdm) {
@@ -1449,7 +1477,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
1449
1477
  DEBUGLOG(4, "windowSize: %zu - blockSize: %zu", windowSize, blockSize);
1450
1478
 
1451
1479
  if (workSpaceTooSmall || workSpaceWasteful) {
1452
- DEBUGLOG(4, "Need to resize workSpaceSize from %zuKB to %zuKB",
1480
+ DEBUGLOG(4, "Resize workSpaceSize from %zuKB to %zuKB",
1453
1481
  zc->workSpaceSize >> 10,
1454
1482
  neededSpace >> 10);
1455
1483
 
@@ -1491,7 +1519,10 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
1491
1519
 
1492
1520
  ZSTD_reset_compressedBlockState(zc->blockState.prevCBlock);
1493
1521
 
1494
- ptr = zc->entropyWorkspace + HUF_WORKSPACE_SIZE_U32;
1522
+ ptr = ZSTD_reset_matchState(&zc->blockState.matchState,
1523
+ zc->entropyWorkspace + HUF_WORKSPACE_SIZE_U32,
1524
+ &params.cParams,
1525
+ crp, ZSTD_resetTarget_CCtx);
1495
1526
 
1496
1527
  /* ldm hash table */
1497
1528
  /* initialize bucketOffsets table later for pointer alignment */
@@ -1509,8 +1540,6 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
1509
1540
  }
1510
1541
  assert(((size_t)ptr & 3) == 0); /* ensure ptr is properly aligned */
1511
1542
 
1512
- ptr = ZSTD_reset_matchState(&zc->blockState.matchState, ptr, &params.cParams, crp, /* forCCtx */ 1);
1513
-
1514
1543
  /* sequences storage */
1515
1544
  zc->seqStore.maxNbSeq = maxNbSeq;
1516
1545
  zc->seqStore.sequencesStart = (seqDef*)ptr;
@@ -1587,15 +1616,14 @@ static int ZSTD_shouldAttachDict(const ZSTD_CDict* cdict,
1587
1616
  * handled in _enforceMaxDist */
1588
1617
  }
1589
1618
 
1590
- static size_t ZSTD_resetCCtx_byAttachingCDict(
1591
- ZSTD_CCtx* cctx,
1592
- const ZSTD_CDict* cdict,
1593
- ZSTD_CCtx_params params,
1594
- U64 pledgedSrcSize,
1595
- ZSTD_buffered_policy_e zbuff)
1619
+ static size_t
1620
+ ZSTD_resetCCtx_byAttachingCDict(ZSTD_CCtx* cctx,
1621
+ const ZSTD_CDict* cdict,
1622
+ ZSTD_CCtx_params params,
1623
+ U64 pledgedSrcSize,
1624
+ ZSTD_buffered_policy_e zbuff)
1596
1625
  {
1597
- {
1598
- const ZSTD_compressionParameters *cdict_cParams = &cdict->matchState.cParams;
1626
+ { const ZSTD_compressionParameters* const cdict_cParams = &cdict->matchState.cParams;
1599
1627
  unsigned const windowLog = params.cParams.windowLog;
1600
1628
  assert(windowLog != 0);
1601
1629
  /* Resize working context table params for input only, since the dict
@@ -1607,8 +1635,7 @@ static size_t ZSTD_resetCCtx_byAttachingCDict(
1607
1635
  assert(cctx->appliedParams.cParams.strategy == cdict_cParams->strategy);
1608
1636
  }
1609
1637
 
1610
- {
1611
- const U32 cdictEnd = (U32)( cdict->matchState.window.nextSrc
1638
+ { const U32 cdictEnd = (U32)( cdict->matchState.window.nextSrc
1612
1639
  - cdict->matchState.window.base);
1613
1640
  const U32 cdictLen = cdictEnd - cdict->matchState.window.dictLimit;
1614
1641
  if (cdictLen == 0) {
@@ -1625,9 +1652,9 @@ static size_t ZSTD_resetCCtx_byAttachingCDict(
1625
1652
  cctx->blockState.matchState.window.base + cdictEnd;
1626
1653
  ZSTD_window_clear(&cctx->blockState.matchState.window);
1627
1654
  }
1655
+ /* loadedDictEnd is expressed within the referential of the active context */
1628
1656
  cctx->blockState.matchState.loadedDictEnd = cctx->blockState.matchState.window.dictLimit;
1629
- }
1630
- }
1657
+ } }
1631
1658
 
1632
1659
  cctx->dictID = cdict->dictID;
1633
1660
 
@@ -1681,7 +1708,6 @@ static size_t ZSTD_resetCCtx_byCopyingCDict(ZSTD_CCtx* cctx,
1681
1708
  ZSTD_matchState_t* dstMatchState = &cctx->blockState.matchState;
1682
1709
  dstMatchState->window = srcMatchState->window;
1683
1710
  dstMatchState->nextToUpdate = srcMatchState->nextToUpdate;
1684
- dstMatchState->nextToUpdate3= srcMatchState->nextToUpdate3;
1685
1711
  dstMatchState->loadedDictEnd= srcMatchState->loadedDictEnd;
1686
1712
  }
1687
1713
 
@@ -1761,7 +1787,6 @@ static size_t ZSTD_copyCCtx_internal(ZSTD_CCtx* dstCCtx,
1761
1787
  ZSTD_matchState_t* dstMatchState = &dstCCtx->blockState.matchState;
1762
1788
  dstMatchState->window = srcMatchState->window;
1763
1789
  dstMatchState->nextToUpdate = srcMatchState->nextToUpdate;
1764
- dstMatchState->nextToUpdate3= srcMatchState->nextToUpdate3;
1765
1790
  dstMatchState->loadedDictEnd= srcMatchState->loadedDictEnd;
1766
1791
  }
1767
1792
  dstCCtx->dictID = srcCCtx->dictID;
@@ -1831,16 +1856,15 @@ static void ZSTD_reduceTable_btlazy2(U32* const table, U32 const size, U32 const
1831
1856
 
1832
1857
  /*! ZSTD_reduceIndex() :
1833
1858
  * rescale all indexes to avoid future overflow (indexes are U32) */
1834
- static void ZSTD_reduceIndex (ZSTD_CCtx* zc, const U32 reducerValue)
1859
+ static void ZSTD_reduceIndex (ZSTD_matchState_t* ms, ZSTD_CCtx_params const* params, const U32 reducerValue)
1835
1860
  {
1836
- ZSTD_matchState_t* const ms = &zc->blockState.matchState;
1837
- { U32 const hSize = (U32)1 << zc->appliedParams.cParams.hashLog;
1861
+ { U32 const hSize = (U32)1 << params->cParams.hashLog;
1838
1862
  ZSTD_reduceTable(ms->hashTable, hSize, reducerValue);
1839
1863
  }
1840
1864
 
1841
- if (zc->appliedParams.cParams.strategy != ZSTD_fast) {
1842
- U32 const chainSize = (U32)1 << zc->appliedParams.cParams.chainLog;
1843
- if (zc->appliedParams.cParams.strategy == ZSTD_btlazy2)
1865
+ if (params->cParams.strategy != ZSTD_fast) {
1866
+ U32 const chainSize = (U32)1 << params->cParams.chainLog;
1867
+ if (params->cParams.strategy == ZSTD_btlazy2)
1844
1868
  ZSTD_reduceTable_btlazy2(ms->chainTable, chainSize, reducerValue);
1845
1869
  else
1846
1870
  ZSTD_reduceTable(ms->chainTable, chainSize, reducerValue);
@@ -1869,155 +1893,6 @@ static size_t ZSTD_noCompressBlock (void* dst, size_t dstCapacity, const void* s
1869
1893
  return ZSTD_blockHeaderSize + srcSize;
1870
1894
  }
1871
1895
 
1872
- static size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
1873
- {
1874
- BYTE* const ostart = (BYTE* const)dst;
1875
- U32 const flSize = 1 + (srcSize>31) + (srcSize>4095);
1876
-
1877
- RETURN_ERROR_IF(srcSize + flSize > dstCapacity, dstSize_tooSmall);
1878
-
1879
- switch(flSize)
1880
- {
1881
- case 1: /* 2 - 1 - 5 */
1882
- ostart[0] = (BYTE)((U32)set_basic + (srcSize<<3));
1883
- break;
1884
- case 2: /* 2 - 2 - 12 */
1885
- MEM_writeLE16(ostart, (U16)((U32)set_basic + (1<<2) + (srcSize<<4)));
1886
- break;
1887
- case 3: /* 2 - 2 - 20 */
1888
- MEM_writeLE32(ostart, (U32)((U32)set_basic + (3<<2) + (srcSize<<4)));
1889
- break;
1890
- default: /* not necessary : flSize is {1,2,3} */
1891
- assert(0);
1892
- }
1893
-
1894
- memcpy(ostart + flSize, src, srcSize);
1895
- return srcSize + flSize;
1896
- }
1897
-
1898
- static size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
1899
- {
1900
- BYTE* const ostart = (BYTE* const)dst;
1901
- U32 const flSize = 1 + (srcSize>31) + (srcSize>4095);
1902
-
1903
- (void)dstCapacity; /* dstCapacity already guaranteed to be >=4, hence large enough */
1904
-
1905
- switch(flSize)
1906
- {
1907
- case 1: /* 2 - 1 - 5 */
1908
- ostart[0] = (BYTE)((U32)set_rle + (srcSize<<3));
1909
- break;
1910
- case 2: /* 2 - 2 - 12 */
1911
- MEM_writeLE16(ostart, (U16)((U32)set_rle + (1<<2) + (srcSize<<4)));
1912
- break;
1913
- case 3: /* 2 - 2 - 20 */
1914
- MEM_writeLE32(ostart, (U32)((U32)set_rle + (3<<2) + (srcSize<<4)));
1915
- break;
1916
- default: /* not necessary : flSize is {1,2,3} */
1917
- assert(0);
1918
- }
1919
-
1920
- ostart[flSize] = *(const BYTE*)src;
1921
- return flSize+1;
1922
- }
1923
-
1924
-
1925
- /* ZSTD_minGain() :
1926
- * minimum compression required
1927
- * to generate a compress block or a compressed literals section.
1928
- * note : use same formula for both situations */
1929
- static size_t ZSTD_minGain(size_t srcSize, ZSTD_strategy strat)
1930
- {
1931
- U32 const minlog = (strat>=ZSTD_btultra) ? (U32)(strat) - 1 : 6;
1932
- ZSTD_STATIC_ASSERT(ZSTD_btultra == 8);
1933
- assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, strat));
1934
- return (srcSize >> minlog) + 2;
1935
- }
1936
-
1937
- static size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
1938
- ZSTD_hufCTables_t* nextHuf,
1939
- ZSTD_strategy strategy, int disableLiteralCompression,
1940
- void* dst, size_t dstCapacity,
1941
- const void* src, size_t srcSize,
1942
- void* workspace, size_t wkspSize,
1943
- const int bmi2)
1944
- {
1945
- size_t const minGain = ZSTD_minGain(srcSize, strategy);
1946
- size_t const lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB);
1947
- BYTE* const ostart = (BYTE*)dst;
1948
- U32 singleStream = srcSize < 256;
1949
- symbolEncodingType_e hType = set_compressed;
1950
- size_t cLitSize;
1951
-
1952
- DEBUGLOG(5,"ZSTD_compressLiterals (disableLiteralCompression=%i)",
1953
- disableLiteralCompression);
1954
-
1955
- /* Prepare nextEntropy assuming reusing the existing table */
1956
- memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
1957
-
1958
- if (disableLiteralCompression)
1959
- return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
1960
-
1961
- /* small ? don't even attempt compression (speed opt) */
1962
- # define COMPRESS_LITERALS_SIZE_MIN 63
1963
- { size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN;
1964
- if (srcSize <= minLitSize) return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
1965
- }
1966
-
1967
- RETURN_ERROR_IF(dstCapacity < lhSize+1, dstSize_tooSmall, "not enough space for compression");
1968
- { HUF_repeat repeat = prevHuf->repeatMode;
1969
- int const preferRepeat = strategy < ZSTD_lazy ? srcSize <= 1024 : 0;
1970
- if (repeat == HUF_repeat_valid && lhSize == 3) singleStream = 1;
1971
- cLitSize = singleStream ? HUF_compress1X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11,
1972
- workspace, wkspSize, (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2)
1973
- : HUF_compress4X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11,
1974
- workspace, wkspSize, (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2);
1975
- if (repeat != HUF_repeat_none) {
1976
- /* reused the existing table */
1977
- hType = set_repeat;
1978
- }
1979
- }
1980
-
1981
- if ((cLitSize==0) | (cLitSize >= srcSize - minGain) | ERR_isError(cLitSize)) {
1982
- memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
1983
- return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
1984
- }
1985
- if (cLitSize==1) {
1986
- memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
1987
- return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize);
1988
- }
1989
-
1990
- if (hType == set_compressed) {
1991
- /* using a newly constructed table */
1992
- nextHuf->repeatMode = HUF_repeat_check;
1993
- }
1994
-
1995
- /* Build header */
1996
- switch(lhSize)
1997
- {
1998
- case 3: /* 2 - 2 - 10 - 10 */
1999
- { U32 const lhc = hType + ((!singleStream) << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<14);
2000
- MEM_writeLE24(ostart, lhc);
2001
- break;
2002
- }
2003
- case 4: /* 2 - 2 - 14 - 14 */
2004
- { U32 const lhc = hType + (2 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<18);
2005
- MEM_writeLE32(ostart, lhc);
2006
- break;
2007
- }
2008
- case 5: /* 2 - 2 - 18 - 18 */
2009
- { U32 const lhc = hType + (3 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<22);
2010
- MEM_writeLE32(ostart, lhc);
2011
- ostart[4] = (BYTE)(cLitSize >> 10);
2012
- break;
2013
- }
2014
- default: /* not possible : lhSize is {3,4,5} */
2015
- assert(0);
2016
- }
2017
- return lhSize+cLitSize;
2018
- }
2019
-
2020
-
2021
1896
  void ZSTD_seqToCodes(const seqStore_t* seqStorePtr)
2022
1897
  {
2023
1898
  const seqDef* const sequences = seqStorePtr->sequencesStart;
@@ -2040,418 +1915,6 @@ void ZSTD_seqToCodes(const seqStore_t* seqStorePtr)
2040
1915
  mlCodeTable[seqStorePtr->longLengthPos] = MaxML;
2041
1916
  }
2042
1917
 
2043
-
2044
- /**
2045
- * -log2(x / 256) lookup table for x in [0, 256).
2046
- * If x == 0: Return 0
2047
- * Else: Return floor(-log2(x / 256) * 256)
2048
- */
2049
- static unsigned const kInverseProbabilityLog256[256] = {
2050
- 0, 2048, 1792, 1642, 1536, 1453, 1386, 1329, 1280, 1236, 1197, 1162,
2051
- 1130, 1100, 1073, 1047, 1024, 1001, 980, 960, 941, 923, 906, 889,
2052
- 874, 859, 844, 830, 817, 804, 791, 779, 768, 756, 745, 734,
2053
- 724, 714, 704, 694, 685, 676, 667, 658, 650, 642, 633, 626,
2054
- 618, 610, 603, 595, 588, 581, 574, 567, 561, 554, 548, 542,
2055
- 535, 529, 523, 517, 512, 506, 500, 495, 489, 484, 478, 473,
2056
- 468, 463, 458, 453, 448, 443, 438, 434, 429, 424, 420, 415,
2057
- 411, 407, 402, 398, 394, 390, 386, 382, 377, 373, 370, 366,
2058
- 362, 358, 354, 350, 347, 343, 339, 336, 332, 329, 325, 322,
2059
- 318, 315, 311, 308, 305, 302, 298, 295, 292, 289, 286, 282,
2060
- 279, 276, 273, 270, 267, 264, 261, 258, 256, 253, 250, 247,
2061
- 244, 241, 239, 236, 233, 230, 228, 225, 222, 220, 217, 215,
2062
- 212, 209, 207, 204, 202, 199, 197, 194, 192, 190, 187, 185,
2063
- 182, 180, 178, 175, 173, 171, 168, 166, 164, 162, 159, 157,
2064
- 155, 153, 151, 149, 146, 144, 142, 140, 138, 136, 134, 132,
2065
- 130, 128, 126, 123, 121, 119, 117, 115, 114, 112, 110, 108,
2066
- 106, 104, 102, 100, 98, 96, 94, 93, 91, 89, 87, 85,
2067
- 83, 82, 80, 78, 76, 74, 73, 71, 69, 67, 66, 64,
2068
- 62, 61, 59, 57, 55, 54, 52, 50, 49, 47, 46, 44,
2069
- 42, 41, 39, 37, 36, 34, 33, 31, 30, 28, 26, 25,
2070
- 23, 22, 20, 19, 17, 16, 14, 13, 11, 10, 8, 7,
2071
- 5, 4, 2, 1,
2072
- };
2073
-
2074
-
2075
- /**
2076
- * Returns the cost in bits of encoding the distribution described by count
2077
- * using the entropy bound.
2078
- */
2079
- static size_t ZSTD_entropyCost(unsigned const* count, unsigned const max, size_t const total)
2080
- {
2081
- unsigned cost = 0;
2082
- unsigned s;
2083
- for (s = 0; s <= max; ++s) {
2084
- unsigned norm = (unsigned)((256 * count[s]) / total);
2085
- if (count[s] != 0 && norm == 0)
2086
- norm = 1;
2087
- assert(count[s] < total);
2088
- cost += count[s] * kInverseProbabilityLog256[norm];
2089
- }
2090
- return cost >> 8;
2091
- }
2092
-
2093
-
2094
- /**
2095
- * Returns the cost in bits of encoding the distribution in count using the
2096
- * table described by norm. The max symbol support by norm is assumed >= max.
2097
- * norm must be valid for every symbol with non-zero probability in count.
2098
- */
2099
- static size_t ZSTD_crossEntropyCost(short const* norm, unsigned accuracyLog,
2100
- unsigned const* count, unsigned const max)
2101
- {
2102
- unsigned const shift = 8 - accuracyLog;
2103
- size_t cost = 0;
2104
- unsigned s;
2105
- assert(accuracyLog <= 8);
2106
- for (s = 0; s <= max; ++s) {
2107
- unsigned const normAcc = norm[s] != -1 ? norm[s] : 1;
2108
- unsigned const norm256 = normAcc << shift;
2109
- assert(norm256 > 0);
2110
- assert(norm256 < 256);
2111
- cost += count[s] * kInverseProbabilityLog256[norm256];
2112
- }
2113
- return cost >> 8;
2114
- }
2115
-
2116
-
2117
- static unsigned ZSTD_getFSEMaxSymbolValue(FSE_CTable const* ctable) {
2118
- void const* ptr = ctable;
2119
- U16 const* u16ptr = (U16 const*)ptr;
2120
- U32 const maxSymbolValue = MEM_read16(u16ptr + 1);
2121
- return maxSymbolValue;
2122
- }
2123
-
2124
-
2125
- /**
2126
- * Returns the cost in bits of encoding the distribution in count using ctable.
2127
- * Returns an error if ctable cannot represent all the symbols in count.
2128
- */
2129
- static size_t ZSTD_fseBitCost(
2130
- FSE_CTable const* ctable,
2131
- unsigned const* count,
2132
- unsigned const max)
2133
- {
2134
- unsigned const kAccuracyLog = 8;
2135
- size_t cost = 0;
2136
- unsigned s;
2137
- FSE_CState_t cstate;
2138
- FSE_initCState(&cstate, ctable);
2139
- RETURN_ERROR_IF(ZSTD_getFSEMaxSymbolValue(ctable) < max, GENERIC,
2140
- "Repeat FSE_CTable has maxSymbolValue %u < %u",
2141
- ZSTD_getFSEMaxSymbolValue(ctable), max);
2142
- for (s = 0; s <= max; ++s) {
2143
- unsigned const tableLog = cstate.stateLog;
2144
- unsigned const badCost = (tableLog + 1) << kAccuracyLog;
2145
- unsigned const bitCost = FSE_bitCost(cstate.symbolTT, tableLog, s, kAccuracyLog);
2146
- if (count[s] == 0)
2147
- continue;
2148
- RETURN_ERROR_IF(bitCost >= badCost, GENERIC,
2149
- "Repeat FSE_CTable has Prob[%u] == 0", s);
2150
- cost += count[s] * bitCost;
2151
- }
2152
- return cost >> kAccuracyLog;
2153
- }
2154
-
2155
- /**
2156
- * Returns the cost in bytes of encoding the normalized count header.
2157
- * Returns an error if any of the helper functions return an error.
2158
- */
2159
- static size_t ZSTD_NCountCost(unsigned const* count, unsigned const max,
2160
- size_t const nbSeq, unsigned const FSELog)
2161
- {
2162
- BYTE wksp[FSE_NCOUNTBOUND];
2163
- S16 norm[MaxSeq + 1];
2164
- const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max);
2165
- FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq, max));
2166
- return FSE_writeNCount(wksp, sizeof(wksp), norm, max, tableLog);
2167
- }
2168
-
2169
-
2170
- typedef enum {
2171
- ZSTD_defaultDisallowed = 0,
2172
- ZSTD_defaultAllowed = 1
2173
- } ZSTD_defaultPolicy_e;
2174
-
2175
- MEM_STATIC symbolEncodingType_e
2176
- ZSTD_selectEncodingType(
2177
- FSE_repeat* repeatMode, unsigned const* count, unsigned const max,
2178
- size_t const mostFrequent, size_t nbSeq, unsigned const FSELog,
2179
- FSE_CTable const* prevCTable,
2180
- short const* defaultNorm, U32 defaultNormLog,
2181
- ZSTD_defaultPolicy_e const isDefaultAllowed,
2182
- ZSTD_strategy const strategy)
2183
- {
2184
- ZSTD_STATIC_ASSERT(ZSTD_defaultDisallowed == 0 && ZSTD_defaultAllowed != 0);
2185
- if (mostFrequent == nbSeq) {
2186
- *repeatMode = FSE_repeat_none;
2187
- if (isDefaultAllowed && nbSeq <= 2) {
2188
- /* Prefer set_basic over set_rle when there are 2 or less symbols,
2189
- * since RLE uses 1 byte, but set_basic uses 5-6 bits per symbol.
2190
- * If basic encoding isn't possible, always choose RLE.
2191
- */
2192
- DEBUGLOG(5, "Selected set_basic");
2193
- return set_basic;
2194
- }
2195
- DEBUGLOG(5, "Selected set_rle");
2196
- return set_rle;
2197
- }
2198
- if (strategy < ZSTD_lazy) {
2199
- if (isDefaultAllowed) {
2200
- size_t const staticFse_nbSeq_max = 1000;
2201
- size_t const mult = 10 - strategy;
2202
- size_t const baseLog = 3;
2203
- size_t const dynamicFse_nbSeq_min = (((size_t)1 << defaultNormLog) * mult) >> baseLog; /* 28-36 for offset, 56-72 for lengths */
2204
- assert(defaultNormLog >= 5 && defaultNormLog <= 6); /* xx_DEFAULTNORMLOG */
2205
- assert(mult <= 9 && mult >= 7);
2206
- if ( (*repeatMode == FSE_repeat_valid)
2207
- && (nbSeq < staticFse_nbSeq_max) ) {
2208
- DEBUGLOG(5, "Selected set_repeat");
2209
- return set_repeat;
2210
- }
2211
- if ( (nbSeq < dynamicFse_nbSeq_min)
2212
- || (mostFrequent < (nbSeq >> (defaultNormLog-1))) ) {
2213
- DEBUGLOG(5, "Selected set_basic");
2214
- /* The format allows default tables to be repeated, but it isn't useful.
2215
- * When using simple heuristics to select encoding type, we don't want
2216
- * to confuse these tables with dictionaries. When running more careful
2217
- * analysis, we don't need to waste time checking both repeating tables
2218
- * and default tables.
2219
- */
2220
- *repeatMode = FSE_repeat_none;
2221
- return set_basic;
2222
- }
2223
- }
2224
- } else {
2225
- size_t const basicCost = isDefaultAllowed ? ZSTD_crossEntropyCost(defaultNorm, defaultNormLog, count, max) : ERROR(GENERIC);
2226
- size_t const repeatCost = *repeatMode != FSE_repeat_none ? ZSTD_fseBitCost(prevCTable, count, max) : ERROR(GENERIC);
2227
- size_t const NCountCost = ZSTD_NCountCost(count, max, nbSeq, FSELog);
2228
- size_t const compressedCost = (NCountCost << 3) + ZSTD_entropyCost(count, max, nbSeq);
2229
-
2230
- if (isDefaultAllowed) {
2231
- assert(!ZSTD_isError(basicCost));
2232
- assert(!(*repeatMode == FSE_repeat_valid && ZSTD_isError(repeatCost)));
2233
- }
2234
- assert(!ZSTD_isError(NCountCost));
2235
- assert(compressedCost < ERROR(maxCode));
2236
- DEBUGLOG(5, "Estimated bit costs: basic=%u\trepeat=%u\tcompressed=%u",
2237
- (unsigned)basicCost, (unsigned)repeatCost, (unsigned)compressedCost);
2238
- if (basicCost <= repeatCost && basicCost <= compressedCost) {
2239
- DEBUGLOG(5, "Selected set_basic");
2240
- assert(isDefaultAllowed);
2241
- *repeatMode = FSE_repeat_none;
2242
- return set_basic;
2243
- }
2244
- if (repeatCost <= compressedCost) {
2245
- DEBUGLOG(5, "Selected set_repeat");
2246
- assert(!ZSTD_isError(repeatCost));
2247
- return set_repeat;
2248
- }
2249
- assert(compressedCost < basicCost && compressedCost < repeatCost);
2250
- }
2251
- DEBUGLOG(5, "Selected set_compressed");
2252
- *repeatMode = FSE_repeat_check;
2253
- return set_compressed;
2254
- }
2255
-
2256
- MEM_STATIC size_t
2257
- ZSTD_buildCTable(void* dst, size_t dstCapacity,
2258
- FSE_CTable* nextCTable, U32 FSELog, symbolEncodingType_e type,
2259
- unsigned* count, U32 max,
2260
- const BYTE* codeTable, size_t nbSeq,
2261
- const S16* defaultNorm, U32 defaultNormLog, U32 defaultMax,
2262
- const FSE_CTable* prevCTable, size_t prevCTableSize,
2263
- void* workspace, size_t workspaceSize)
2264
- {
2265
- BYTE* op = (BYTE*)dst;
2266
- const BYTE* const oend = op + dstCapacity;
2267
- DEBUGLOG(6, "ZSTD_buildCTable (dstCapacity=%u)", (unsigned)dstCapacity);
2268
-
2269
- switch (type) {
2270
- case set_rle:
2271
- FORWARD_IF_ERROR(FSE_buildCTable_rle(nextCTable, (BYTE)max));
2272
- RETURN_ERROR_IF(dstCapacity==0, dstSize_tooSmall);
2273
- *op = codeTable[0];
2274
- return 1;
2275
- case set_repeat:
2276
- memcpy(nextCTable, prevCTable, prevCTableSize);
2277
- return 0;
2278
- case set_basic:
2279
- FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, defaultNorm, defaultMax, defaultNormLog, workspace, workspaceSize)); /* note : could be pre-calculated */
2280
- return 0;
2281
- case set_compressed: {
2282
- S16 norm[MaxSeq + 1];
2283
- size_t nbSeq_1 = nbSeq;
2284
- const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max);
2285
- if (count[codeTable[nbSeq-1]] > 1) {
2286
- count[codeTable[nbSeq-1]]--;
2287
- nbSeq_1--;
2288
- }
2289
- assert(nbSeq_1 > 1);
2290
- FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max));
2291
- { size_t const NCountSize = FSE_writeNCount(op, oend - op, norm, max, tableLog); /* overflow protected */
2292
- FORWARD_IF_ERROR(NCountSize);
2293
- FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, norm, max, tableLog, workspace, workspaceSize));
2294
- return NCountSize;
2295
- }
2296
- }
2297
- default: assert(0); RETURN_ERROR(GENERIC);
2298
- }
2299
- }
2300
-
2301
- FORCE_INLINE_TEMPLATE size_t
2302
- ZSTD_encodeSequences_body(
2303
- void* dst, size_t dstCapacity,
2304
- FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
2305
- FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
2306
- FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
2307
- seqDef const* sequences, size_t nbSeq, int longOffsets)
2308
- {
2309
- BIT_CStream_t blockStream;
2310
- FSE_CState_t stateMatchLength;
2311
- FSE_CState_t stateOffsetBits;
2312
- FSE_CState_t stateLitLength;
2313
-
2314
- RETURN_ERROR_IF(
2315
- ERR_isError(BIT_initCStream(&blockStream, dst, dstCapacity)),
2316
- dstSize_tooSmall, "not enough space remaining");
2317
- DEBUGLOG(6, "available space for bitstream : %i (dstCapacity=%u)",
2318
- (int)(blockStream.endPtr - blockStream.startPtr),
2319
- (unsigned)dstCapacity);
2320
-
2321
- /* first symbols */
2322
- FSE_initCState2(&stateMatchLength, CTable_MatchLength, mlCodeTable[nbSeq-1]);
2323
- FSE_initCState2(&stateOffsetBits, CTable_OffsetBits, ofCodeTable[nbSeq-1]);
2324
- FSE_initCState2(&stateLitLength, CTable_LitLength, llCodeTable[nbSeq-1]);
2325
- BIT_addBits(&blockStream, sequences[nbSeq-1].litLength, LL_bits[llCodeTable[nbSeq-1]]);
2326
- if (MEM_32bits()) BIT_flushBits(&blockStream);
2327
- BIT_addBits(&blockStream, sequences[nbSeq-1].matchLength, ML_bits[mlCodeTable[nbSeq-1]]);
2328
- if (MEM_32bits()) BIT_flushBits(&blockStream);
2329
- if (longOffsets) {
2330
- U32 const ofBits = ofCodeTable[nbSeq-1];
2331
- int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1);
2332
- if (extraBits) {
2333
- BIT_addBits(&blockStream, sequences[nbSeq-1].offset, extraBits);
2334
- BIT_flushBits(&blockStream);
2335
- }
2336
- BIT_addBits(&blockStream, sequences[nbSeq-1].offset >> extraBits,
2337
- ofBits - extraBits);
2338
- } else {
2339
- BIT_addBits(&blockStream, sequences[nbSeq-1].offset, ofCodeTable[nbSeq-1]);
2340
- }
2341
- BIT_flushBits(&blockStream);
2342
-
2343
- { size_t n;
2344
- for (n=nbSeq-2 ; n<nbSeq ; n--) { /* intentional underflow */
2345
- BYTE const llCode = llCodeTable[n];
2346
- BYTE const ofCode = ofCodeTable[n];
2347
- BYTE const mlCode = mlCodeTable[n];
2348
- U32 const llBits = LL_bits[llCode];
2349
- U32 const ofBits = ofCode;
2350
- U32 const mlBits = ML_bits[mlCode];
2351
- DEBUGLOG(6, "encoding: litlen:%2u - matchlen:%2u - offCode:%7u",
2352
- (unsigned)sequences[n].litLength,
2353
- (unsigned)sequences[n].matchLength + MINMATCH,
2354
- (unsigned)sequences[n].offset);
2355
- /* 32b*/ /* 64b*/
2356
- /* (7)*/ /* (7)*/
2357
- FSE_encodeSymbol(&blockStream, &stateOffsetBits, ofCode); /* 15 */ /* 15 */
2358
- FSE_encodeSymbol(&blockStream, &stateMatchLength, mlCode); /* 24 */ /* 24 */
2359
- if (MEM_32bits()) BIT_flushBits(&blockStream); /* (7)*/
2360
- FSE_encodeSymbol(&blockStream, &stateLitLength, llCode); /* 16 */ /* 33 */
2361
- if (MEM_32bits() || (ofBits+mlBits+llBits >= 64-7-(LLFSELog+MLFSELog+OffFSELog)))
2362
- BIT_flushBits(&blockStream); /* (7)*/
2363
- BIT_addBits(&blockStream, sequences[n].litLength, llBits);
2364
- if (MEM_32bits() && ((llBits+mlBits)>24)) BIT_flushBits(&blockStream);
2365
- BIT_addBits(&blockStream, sequences[n].matchLength, mlBits);
2366
- if (MEM_32bits() || (ofBits+mlBits+llBits > 56)) BIT_flushBits(&blockStream);
2367
- if (longOffsets) {
2368
- int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1);
2369
- if (extraBits) {
2370
- BIT_addBits(&blockStream, sequences[n].offset, extraBits);
2371
- BIT_flushBits(&blockStream); /* (7)*/
2372
- }
2373
- BIT_addBits(&blockStream, sequences[n].offset >> extraBits,
2374
- ofBits - extraBits); /* 31 */
2375
- } else {
2376
- BIT_addBits(&blockStream, sequences[n].offset, ofBits); /* 31 */
2377
- }
2378
- BIT_flushBits(&blockStream); /* (7)*/
2379
- DEBUGLOG(7, "remaining space : %i", (int)(blockStream.endPtr - blockStream.ptr));
2380
- } }
2381
-
2382
- DEBUGLOG(6, "ZSTD_encodeSequences: flushing ML state with %u bits", stateMatchLength.stateLog);
2383
- FSE_flushCState(&blockStream, &stateMatchLength);
2384
- DEBUGLOG(6, "ZSTD_encodeSequences: flushing Off state with %u bits", stateOffsetBits.stateLog);
2385
- FSE_flushCState(&blockStream, &stateOffsetBits);
2386
- DEBUGLOG(6, "ZSTD_encodeSequences: flushing LL state with %u bits", stateLitLength.stateLog);
2387
- FSE_flushCState(&blockStream, &stateLitLength);
2388
-
2389
- { size_t const streamSize = BIT_closeCStream(&blockStream);
2390
- RETURN_ERROR_IF(streamSize==0, dstSize_tooSmall, "not enough space");
2391
- return streamSize;
2392
- }
2393
- }
2394
-
2395
- static size_t
2396
- ZSTD_encodeSequences_default(
2397
- void* dst, size_t dstCapacity,
2398
- FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
2399
- FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
2400
- FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
2401
- seqDef const* sequences, size_t nbSeq, int longOffsets)
2402
- {
2403
- return ZSTD_encodeSequences_body(dst, dstCapacity,
2404
- CTable_MatchLength, mlCodeTable,
2405
- CTable_OffsetBits, ofCodeTable,
2406
- CTable_LitLength, llCodeTable,
2407
- sequences, nbSeq, longOffsets);
2408
- }
2409
-
2410
-
2411
- #if DYNAMIC_BMI2
2412
-
2413
- static TARGET_ATTRIBUTE("bmi2") size_t
2414
- ZSTD_encodeSequences_bmi2(
2415
- void* dst, size_t dstCapacity,
2416
- FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
2417
- FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
2418
- FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
2419
- seqDef const* sequences, size_t nbSeq, int longOffsets)
2420
- {
2421
- return ZSTD_encodeSequences_body(dst, dstCapacity,
2422
- CTable_MatchLength, mlCodeTable,
2423
- CTable_OffsetBits, ofCodeTable,
2424
- CTable_LitLength, llCodeTable,
2425
- sequences, nbSeq, longOffsets);
2426
- }
2427
-
2428
- #endif
2429
-
2430
- static size_t ZSTD_encodeSequences(
2431
- void* dst, size_t dstCapacity,
2432
- FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
2433
- FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
2434
- FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
2435
- seqDef const* sequences, size_t nbSeq, int longOffsets, int bmi2)
2436
- {
2437
- DEBUGLOG(5, "ZSTD_encodeSequences: dstCapacity = %u", (unsigned)dstCapacity);
2438
- #if DYNAMIC_BMI2
2439
- if (bmi2) {
2440
- return ZSTD_encodeSequences_bmi2(dst, dstCapacity,
2441
- CTable_MatchLength, mlCodeTable,
2442
- CTable_OffsetBits, ofCodeTable,
2443
- CTable_LitLength, llCodeTable,
2444
- sequences, nbSeq, longOffsets);
2445
- }
2446
- #endif
2447
- (void)bmi2;
2448
- return ZSTD_encodeSequences_default(dst, dstCapacity,
2449
- CTable_MatchLength, mlCodeTable,
2450
- CTable_OffsetBits, ofCodeTable,
2451
- CTable_LitLength, llCodeTable,
2452
- sequences, nbSeq, longOffsets);
2453
- }
2454
-
2455
1918
  static int ZSTD_disableLiteralsCompression(const ZSTD_CCtx_params* cctxParams)
2456
1919
  {
2457
1920
  switch (cctxParams->literalCompressionMode) {
@@ -2496,8 +1959,8 @@ ZSTD_compressSequences_internal(seqStore_t* seqStorePtr,
2496
1959
  BYTE* seqHead;
2497
1960
  BYTE* lastNCount = NULL;
2498
1961
 
1962
+ DEBUGLOG(5, "ZSTD_compressSequences_internal (nbSeq=%zu)", nbSeq);
2499
1963
  ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<<MAX(MLFSELog,LLFSELog)));
2500
- DEBUGLOG(5, "ZSTD_compressSequences_internal");
2501
1964
 
2502
1965
  /* Compress literals */
2503
1966
  { const BYTE* const literals = seqStorePtr->litStart;
@@ -2524,6 +1987,7 @@ ZSTD_compressSequences_internal(seqStore_t* seqStorePtr,
2524
1987
  op[0] = (BYTE)((nbSeq>>8) + 0x80), op[1] = (BYTE)nbSeq, op+=2;
2525
1988
  else
2526
1989
  op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3;
1990
+ assert(op <= oend);
2527
1991
  if (nbSeq==0) {
2528
1992
  /* Copy the old tables over as if we repeated them */
2529
1993
  memcpy(&nextEntropy->fse, &prevEntropy->fse, sizeof(prevEntropy->fse));
@@ -2532,6 +1996,7 @@ ZSTD_compressSequences_internal(seqStore_t* seqStorePtr,
2532
1996
 
2533
1997
  /* seqHead : flags for FSE encoding type */
2534
1998
  seqHead = op++;
1999
+ assert(op <= oend);
2535
2000
 
2536
2001
  /* convert length/distances into codes */
2537
2002
  ZSTD_seqToCodes(seqStorePtr);
@@ -2555,6 +2020,7 @@ ZSTD_compressSequences_internal(seqStore_t* seqStorePtr,
2555
2020
  if (LLtype == set_compressed)
2556
2021
  lastNCount = op;
2557
2022
  op += countSize;
2023
+ assert(op <= oend);
2558
2024
  } }
2559
2025
  /* build CTable for Offsets */
2560
2026
  { unsigned max = MaxOff;
@@ -2577,6 +2043,7 @@ ZSTD_compressSequences_internal(seqStore_t* seqStorePtr,
2577
2043
  if (Offtype == set_compressed)
2578
2044
  lastNCount = op;
2579
2045
  op += countSize;
2046
+ assert(op <= oend);
2580
2047
  } }
2581
2048
  /* build CTable for MatchLengths */
2582
2049
  { unsigned max = MaxML;
@@ -2597,6 +2064,7 @@ ZSTD_compressSequences_internal(seqStore_t* seqStorePtr,
2597
2064
  if (MLtype == set_compressed)
2598
2065
  lastNCount = op;
2599
2066
  op += countSize;
2067
+ assert(op <= oend);
2600
2068
  } }
2601
2069
 
2602
2070
  *seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2));
@@ -2610,6 +2078,7 @@ ZSTD_compressSequences_internal(seqStore_t* seqStorePtr,
2610
2078
  longOffsets, bmi2);
2611
2079
  FORWARD_IF_ERROR(bitstreamSize);
2612
2080
  op += bitstreamSize;
2081
+ assert(op <= oend);
2613
2082
  /* zstd versions <= 1.3.4 mistakenly report corruption when
2614
2083
  * FSE_readNCount() receives a buffer < 4 bytes.
2615
2084
  * Fixed by https://github.com/facebook/zstd/pull/1146.
@@ -2721,30 +2190,24 @@ void ZSTD_resetSeqStore(seqStore_t* ssPtr)
2721
2190
  ssPtr->longLengthID = 0;
2722
2191
  }
2723
2192
 
2724
- static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc,
2725
- void* dst, size_t dstCapacity,
2726
- const void* src, size_t srcSize)
2193
+ typedef enum { ZSTDbss_compress, ZSTDbss_noCompress } ZSTD_buildSeqStore_e;
2194
+
2195
+ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)
2727
2196
  {
2728
2197
  ZSTD_matchState_t* const ms = &zc->blockState.matchState;
2729
- size_t cSize;
2730
- DEBUGLOG(5, "ZSTD_compressBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)",
2731
- (unsigned)dstCapacity, (unsigned)ms->window.dictLimit, (unsigned)ms->nextToUpdate);
2198
+ DEBUGLOG(5, "ZSTD_buildSeqStore (srcSize=%zu)", srcSize);
2732
2199
  assert(srcSize <= ZSTD_BLOCKSIZE_MAX);
2733
-
2734
2200
  /* Assert that we have correctly flushed the ctx params into the ms's copy */
2735
2201
  ZSTD_assertEqualCParams(zc->appliedParams.cParams, ms->cParams);
2736
-
2737
2202
  if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) {
2738
2203
  ZSTD_ldm_skipSequences(&zc->externSeqStore, srcSize, zc->appliedParams.cParams.minMatch);
2739
- cSize = 0;
2740
- goto out; /* don't even attempt compression below a certain srcSize */
2204
+ return ZSTDbss_noCompress; /* don't even attempt compression below a certain srcSize */
2741
2205
  }
2742
2206
  ZSTD_resetSeqStore(&(zc->seqStore));
2743
2207
  /* required for optimal parser to read stats from dictionary */
2744
2208
  ms->opt.symbolCosts = &zc->blockState.prevCBlock->entropy;
2745
2209
  /* tell the optimal parser how we expect to compress literals */
2746
2210
  ms->opt.literalCompressionMode = zc->appliedParams.literalCompressionMode;
2747
-
2748
2211
  /* a gap between an attached dict and the current window is not safe,
2749
2212
  * they must remain adjacent,
2750
2213
  * and when that stops being the case, the dict must be unset */
@@ -2798,6 +2261,21 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc,
2798
2261
  { const BYTE* const lastLiterals = (const BYTE*)src + srcSize - lastLLSize;
2799
2262
  ZSTD_storeLastLiterals(&zc->seqStore, lastLiterals, lastLLSize);
2800
2263
  } }
2264
+ return ZSTDbss_compress;
2265
+ }
2266
+
2267
+ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc,
2268
+ void* dst, size_t dstCapacity,
2269
+ const void* src, size_t srcSize)
2270
+ {
2271
+ size_t cSize;
2272
+ DEBUGLOG(5, "ZSTD_compressBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)",
2273
+ (unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit, (unsigned)zc->blockState.matchState.nextToUpdate);
2274
+
2275
+ { const size_t bss = ZSTD_buildSeqStore(zc, src, srcSize);
2276
+ FORWARD_IF_ERROR(bss);
2277
+ if (bss == ZSTDbss_noCompress) { cSize = 0; goto out; }
2278
+ }
2801
2279
 
2802
2280
  /* encode sequences and literals */
2803
2281
  cSize = ZSTD_compressSequences(&zc->seqStore,
@@ -2826,6 +2304,25 @@ out:
2826
2304
  }
2827
2305
 
2828
2306
 
2307
+ static void ZSTD_overflowCorrectIfNeeded(ZSTD_matchState_t* ms, ZSTD_CCtx_params const* params, void const* ip, void const* iend)
2308
+ {
2309
+ if (ZSTD_window_needOverflowCorrection(ms->window, iend)) {
2310
+ U32 const maxDist = (U32)1 << params->cParams.windowLog;
2311
+ U32 const cycleLog = ZSTD_cycleLog(params->cParams.chainLog, params->cParams.strategy);
2312
+ U32 const correction = ZSTD_window_correctOverflow(&ms->window, cycleLog, maxDist, ip);
2313
+ ZSTD_STATIC_ASSERT(ZSTD_CHAINLOG_MAX <= 30);
2314
+ ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX_32 <= 30);
2315
+ ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX <= 31);
2316
+ ZSTD_reduceIndex(ms, params, correction);
2317
+ if (ms->nextToUpdate < correction) ms->nextToUpdate = 0;
2318
+ else ms->nextToUpdate -= correction;
2319
+ /* invalidate dictionaries on overflow correction */
2320
+ ms->loadedDictEnd = 0;
2321
+ ms->dictMatchState = NULL;
2322
+ }
2323
+ }
2324
+
2325
+
2829
2326
  /*! ZSTD_compress_frameChunk() :
2830
2327
  * Compress a chunk of data into one or multiple blocks.
2831
2328
  * All blocks will be terminated, all input will be consumed.
@@ -2844,7 +2341,7 @@ static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx,
2844
2341
  BYTE* const ostart = (BYTE*)dst;
2845
2342
  BYTE* op = ostart;
2846
2343
  U32 const maxDist = (U32)1 << cctx->appliedParams.cParams.windowLog;
2847
- assert(cctx->appliedParams.cParams.windowLog <= 31);
2344
+ assert(cctx->appliedParams.cParams.windowLog <= ZSTD_WINDOWLOG_MAX);
2848
2345
 
2849
2346
  DEBUGLOG(5, "ZSTD_compress_frameChunk (blockSize=%u)", (unsigned)blockSize);
2850
2347
  if (cctx->appliedParams.fParams.checksumFlag && srcSize)
@@ -2859,19 +2356,10 @@ static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx,
2859
2356
  "not enough space to store compressed block");
2860
2357
  if (remaining < blockSize) blockSize = remaining;
2861
2358
 
2862
- if (ZSTD_window_needOverflowCorrection(ms->window, ip + blockSize)) {
2863
- U32 const cycleLog = ZSTD_cycleLog(cctx->appliedParams.cParams.chainLog, cctx->appliedParams.cParams.strategy);
2864
- U32 const correction = ZSTD_window_correctOverflow(&ms->window, cycleLog, maxDist, ip);
2865
- ZSTD_STATIC_ASSERT(ZSTD_CHAINLOG_MAX <= 30);
2866
- ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX_32 <= 30);
2867
- ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX <= 31);
2868
- ZSTD_reduceIndex(cctx, correction);
2869
- if (ms->nextToUpdate < correction) ms->nextToUpdate = 0;
2870
- else ms->nextToUpdate -= correction;
2871
- ms->loadedDictEnd = 0;
2872
- ms->dictMatchState = NULL;
2873
- }
2874
- ZSTD_window_enforceMaxDist(&ms->window, ip + blockSize, maxDist, &ms->loadedDictEnd, &ms->dictMatchState);
2359
+ ZSTD_overflowCorrectIfNeeded(ms, &cctx->appliedParams, ip, ip + blockSize);
2360
+ ZSTD_checkDictValidity(&ms->window, ip + blockSize, maxDist, &ms->loadedDictEnd, &ms->dictMatchState);
2361
+
2362
+ /* Ensure hash/chain table insertion resumes no sooner than lowlimit */
2875
2363
  if (ms->nextToUpdate < ms->window.lowLimit) ms->nextToUpdate = ms->window.lowLimit;
2876
2364
 
2877
2365
  { size_t cSize = ZSTD_compressBlock_internal(cctx,
@@ -2899,7 +2387,7 @@ static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx,
2899
2387
  } }
2900
2388
 
2901
2389
  if (lastFrameChunk && (op>ostart)) cctx->stage = ZSTDcs_ending;
2902
- return op-ostart;
2390
+ return (size_t)(op-ostart);
2903
2391
  }
2904
2392
 
2905
2393
 
@@ -2991,6 +2479,7 @@ static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* cctx,
2991
2479
  fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, cctx->appliedParams,
2992
2480
  cctx->pledgedSrcSizePlusOne-1, cctx->dictID);
2993
2481
  FORWARD_IF_ERROR(fhSize);
2482
+ assert(fhSize <= dstCapacity);
2994
2483
  dstCapacity -= fhSize;
2995
2484
  dst = (char*)dst + fhSize;
2996
2485
  cctx->stage = ZSTDcs_ongoing;
@@ -3007,18 +2496,7 @@ static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* cctx,
3007
2496
 
3008
2497
  if (!frame) {
3009
2498
  /* overflow check and correction for block mode */
3010
- if (ZSTD_window_needOverflowCorrection(ms->window, (const char*)src + srcSize)) {
3011
- U32 const cycleLog = ZSTD_cycleLog(cctx->appliedParams.cParams.chainLog, cctx->appliedParams.cParams.strategy);
3012
- U32 const correction = ZSTD_window_correctOverflow(&ms->window, cycleLog, 1 << cctx->appliedParams.cParams.windowLog, src);
3013
- ZSTD_STATIC_ASSERT(ZSTD_CHAINLOG_MAX <= 30);
3014
- ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX_32 <= 30);
3015
- ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX <= 31);
3016
- ZSTD_reduceIndex(cctx, correction);
3017
- if (ms->nextToUpdate < correction) ms->nextToUpdate = 0;
3018
- else ms->nextToUpdate -= correction;
3019
- ms->loadedDictEnd = 0;
3020
- ms->dictMatchState = NULL;
3021
- }
2499
+ ZSTD_overflowCorrectIfNeeded(ms, &cctx->appliedParams, src, (BYTE const*)src + srcSize);
3022
2500
  }
3023
2501
 
3024
2502
  DEBUGLOG(5, "ZSTD_compressContinue_internal (blockSize=%u)", (unsigned)cctx->blockSize);
@@ -3074,7 +2552,7 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms,
3074
2552
  const void* src, size_t srcSize,
3075
2553
  ZSTD_dictTableLoadMethod_e dtlm)
3076
2554
  {
3077
- const BYTE* const ip = (const BYTE*) src;
2555
+ const BYTE* ip = (const BYTE*) src;
3078
2556
  const BYTE* const iend = ip + srcSize;
3079
2557
 
3080
2558
  ZSTD_window_update(&ms->window, src, srcSize);
@@ -3085,32 +2563,42 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms,
3085
2563
 
3086
2564
  if (srcSize <= HASH_READ_SIZE) return 0;
3087
2565
 
3088
- switch(params->cParams.strategy)
3089
- {
3090
- case ZSTD_fast:
3091
- ZSTD_fillHashTable(ms, iend, dtlm);
3092
- break;
3093
- case ZSTD_dfast:
3094
- ZSTD_fillDoubleHashTable(ms, iend, dtlm);
3095
- break;
2566
+ while (iend - ip > HASH_READ_SIZE) {
2567
+ size_t const remaining = iend - ip;
2568
+ size_t const chunk = MIN(remaining, ZSTD_CHUNKSIZE_MAX);
2569
+ const BYTE* const ichunk = ip + chunk;
3096
2570
 
3097
- case ZSTD_greedy:
3098
- case ZSTD_lazy:
3099
- case ZSTD_lazy2:
3100
- if (srcSize >= HASH_READ_SIZE)
3101
- ZSTD_insertAndFindFirstIndex(ms, iend-HASH_READ_SIZE);
3102
- break;
2571
+ ZSTD_overflowCorrectIfNeeded(ms, params, ip, ichunk);
3103
2572
 
3104
- case ZSTD_btlazy2: /* we want the dictionary table fully sorted */
3105
- case ZSTD_btopt:
3106
- case ZSTD_btultra:
3107
- case ZSTD_btultra2:
3108
- if (srcSize >= HASH_READ_SIZE)
3109
- ZSTD_updateTree(ms, iend-HASH_READ_SIZE, iend);
3110
- break;
2573
+ switch(params->cParams.strategy)
2574
+ {
2575
+ case ZSTD_fast:
2576
+ ZSTD_fillHashTable(ms, ichunk, dtlm);
2577
+ break;
2578
+ case ZSTD_dfast:
2579
+ ZSTD_fillDoubleHashTable(ms, ichunk, dtlm);
2580
+ break;
3111
2581
 
3112
- default:
3113
- assert(0); /* not possible : not a valid strategy id */
2582
+ case ZSTD_greedy:
2583
+ case ZSTD_lazy:
2584
+ case ZSTD_lazy2:
2585
+ if (chunk >= HASH_READ_SIZE)
2586
+ ZSTD_insertAndFindFirstIndex(ms, ichunk-HASH_READ_SIZE);
2587
+ break;
2588
+
2589
+ case ZSTD_btlazy2: /* we want the dictionary table fully sorted */
2590
+ case ZSTD_btopt:
2591
+ case ZSTD_btultra:
2592
+ case ZSTD_btultra2:
2593
+ if (chunk >= HASH_READ_SIZE)
2594
+ ZSTD_updateTree(ms, ichunk-HASH_READ_SIZE, ichunk);
2595
+ break;
2596
+
2597
+ default:
2598
+ assert(0); /* not possible : not a valid strategy id */
2599
+ }
2600
+
2601
+ ip = ichunk;
3114
2602
  }
3115
2603
 
3116
2604
  ms->nextToUpdate = (U32)(iend - ms->window.base);
@@ -3297,12 +2785,11 @@ static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx,
3297
2785
 
3298
2786
  FORWARD_IF_ERROR( ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize,
3299
2787
  ZSTDcrp_continue, zbuff) );
3300
- {
3301
- size_t const dictID = ZSTD_compress_insertDictionary(
2788
+ { size_t const dictID = ZSTD_compress_insertDictionary(
3302
2789
  cctx->blockState.prevCBlock, &cctx->blockState.matchState,
3303
2790
  &params, dict, dictSize, dictContentType, dtlm, cctx->entropyWorkspace);
3304
2791
  FORWARD_IF_ERROR(dictID);
3305
- assert(dictID <= (size_t)(U32)-1);
2792
+ assert(dictID <= UINT_MAX);
3306
2793
  cctx->dictID = (U32)dictID;
3307
2794
  }
3308
2795
  return 0;
@@ -3555,10 +3042,10 @@ static size_t ZSTD_initCDict_internal(
3555
3042
 
3556
3043
  /* Reset the state to no dictionary */
3557
3044
  ZSTD_reset_compressedBlockState(&cdict->cBlockState);
3558
- { void* const end = ZSTD_reset_matchState(
3559
- &cdict->matchState,
3560
- (U32*)cdict->workspace + HUF_WORKSPACE_SIZE_U32,
3561
- &cParams, ZSTDcrp_continue, /* forCCtx */ 0);
3045
+ { void* const end = ZSTD_reset_matchState(&cdict->matchState,
3046
+ (U32*)cdict->workspace + HUF_WORKSPACE_SIZE_U32,
3047
+ &cParams,
3048
+ ZSTDcrp_continue, ZSTD_resetTarget_CDict);
3562
3049
  assert(end == (char*)cdict->workspace + cdict->workspaceSize);
3563
3050
  (void)end;
3564
3051
  }
@@ -4068,7 +3555,7 @@ static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,
4068
3555
  case zcss_flush:
4069
3556
  DEBUGLOG(5, "flush stage");
4070
3557
  { size_t const toFlush = zcs->outBuffContentSize - zcs->outBuffFlushedSize;
4071
- size_t const flushed = ZSTD_limitCopy(op, oend-op,
3558
+ size_t const flushed = ZSTD_limitCopy(op, (size_t)(oend-op),
4072
3559
  zcs->outBuff + zcs->outBuffFlushedSize, toFlush);
4073
3560
  DEBUGLOG(5, "toFlush: %u into %u ==> flushed: %u",
4074
3561
  (unsigned)toFlush, (unsigned)(oend-op), (unsigned)flushed);
@@ -4262,7 +3749,7 @@ size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output)
4262
3749
  if (zcs->appliedParams.nbWorkers > 0) return remainingToFlush; /* minimal estimation */
4263
3750
  /* single thread mode : attempt to calculate remaining to flush more precisely */
4264
3751
  { size_t const lastBlockSize = zcs->frameEnded ? 0 : ZSTD_BLOCKHEADERSIZE;
4265
- size_t const checksumSize = zcs->frameEnded ? 0 : zcs->appliedParams.fParams.checksumFlag * 4;
3752
+ size_t const checksumSize = (size_t)(zcs->frameEnded ? 0 : zcs->appliedParams.fParams.checksumFlag * 4);
4266
3753
  size_t const toFlush = remainingToFlush + lastBlockSize + checksumSize;
4267
3754
  DEBUGLOG(4, "ZSTD_endStream : remaining to flush : %u", (unsigned)toFlush);
4268
3755
  return toFlush;