zstd-ruby 1.4.9.0 → 1.5.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (89) hide show
  1. checksums.yaml +4 -4
  2. data/.github/dependabot.yml +8 -0
  3. data/README.md +1 -1
  4. data/ext/zstdruby/libzstd/BUCK +5 -7
  5. data/ext/zstdruby/libzstd/Makefile +42 -13
  6. data/ext/zstdruby/libzstd/README.md +8 -4
  7. data/ext/zstdruby/libzstd/common/bitstream.h +1 -1
  8. data/ext/zstdruby/libzstd/common/compiler.h +1 -1
  9. data/ext/zstdruby/libzstd/common/cpu.h +1 -1
  10. data/ext/zstdruby/libzstd/common/debug.c +1 -1
  11. data/ext/zstdruby/libzstd/common/debug.h +1 -1
  12. data/ext/zstdruby/libzstd/common/entropy_common.c +1 -1
  13. data/ext/zstdruby/libzstd/common/error_private.c +1 -1
  14. data/ext/zstdruby/libzstd/common/error_private.h +3 -3
  15. data/ext/zstdruby/libzstd/common/fse.h +2 -2
  16. data/ext/zstdruby/libzstd/common/fse_decompress.c +25 -15
  17. data/ext/zstdruby/libzstd/common/huf.h +3 -2
  18. data/ext/zstdruby/libzstd/common/mem.h +3 -5
  19. data/ext/zstdruby/libzstd/common/pool.c +1 -1
  20. data/ext/zstdruby/libzstd/common/pool.h +1 -1
  21. data/ext/zstdruby/libzstd/common/xxhash.c +2 -4
  22. data/ext/zstdruby/libzstd/common/xxhash.h +1 -1
  23. data/ext/zstdruby/libzstd/common/zstd_common.c +1 -1
  24. data/ext/zstdruby/libzstd/common/zstd_deps.h +1 -1
  25. data/ext/zstdruby/libzstd/common/zstd_internal.h +21 -9
  26. data/ext/zstdruby/libzstd/common/zstd_trace.h +7 -5
  27. data/ext/zstdruby/libzstd/compress/fse_compress.c +1 -1
  28. data/ext/zstdruby/libzstd/compress/hist.c +1 -1
  29. data/ext/zstdruby/libzstd/compress/hist.h +1 -1
  30. data/ext/zstdruby/libzstd/compress/huf_compress.c +51 -28
  31. data/ext/zstdruby/libzstd/compress/zstd_compress.c +1373 -275
  32. data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +164 -21
  33. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.c +2 -2
  34. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.h +1 -1
  35. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.c +14 -6
  36. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.h +1 -1
  37. data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.c +5 -282
  38. data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.h +1 -1
  39. data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +147 -46
  40. data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +3 -3
  41. data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +1 -1
  42. data/ext/zstdruby/libzstd/compress/zstd_fast.c +4 -4
  43. data/ext/zstdruby/libzstd/compress/zstd_fast.h +1 -1
  44. data/ext/zstdruby/libzstd/compress/zstd_lazy.c +914 -142
  45. data/ext/zstdruby/libzstd/compress/zstd_lazy.h +39 -1
  46. data/ext/zstdruby/libzstd/compress/zstd_ldm.c +51 -15
  47. data/ext/zstdruby/libzstd/compress/zstd_ldm.h +2 -1
  48. data/ext/zstdruby/libzstd/compress/zstd_ldm_geartab.h +1 -1
  49. data/ext/zstdruby/libzstd/compress/zstd_opt.c +1 -1
  50. data/ext/zstdruby/libzstd/compress/zstd_opt.h +1 -1
  51. data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +15 -6
  52. data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +5 -5
  53. data/ext/zstdruby/libzstd/decompress/huf_decompress.c +44 -43
  54. data/ext/zstdruby/libzstd/decompress/zstd_ddict.c +1 -1
  55. data/ext/zstdruby/libzstd/decompress/zstd_ddict.h +1 -1
  56. data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +3 -4
  57. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +44 -36
  58. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +1 -1
  59. data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +1 -2
  60. data/ext/zstdruby/libzstd/deprecated/zbuff.h +1 -1
  61. data/ext/zstdruby/libzstd/deprecated/zbuff_common.c +1 -1
  62. data/ext/zstdruby/libzstd/deprecated/zbuff_compress.c +1 -1
  63. data/ext/zstdruby/libzstd/deprecated/zbuff_decompress.c +1 -1
  64. data/ext/zstdruby/libzstd/dictBuilder/cover.c +7 -6
  65. data/ext/zstdruby/libzstd/dictBuilder/cover.h +6 -5
  66. data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +7 -6
  67. data/ext/zstdruby/libzstd/dictBuilder/zdict.c +8 -7
  68. data/ext/zstdruby/libzstd/dll/example/Makefile +1 -1
  69. data/ext/zstdruby/libzstd/legacy/zstd_legacy.h +1 -1
  70. data/ext/zstdruby/libzstd/legacy/zstd_v01.c +1 -1
  71. data/ext/zstdruby/libzstd/legacy/zstd_v01.h +1 -1
  72. data/ext/zstdruby/libzstd/legacy/zstd_v02.c +1 -1
  73. data/ext/zstdruby/libzstd/legacy/zstd_v02.h +1 -1
  74. data/ext/zstdruby/libzstd/legacy/zstd_v03.c +1 -1
  75. data/ext/zstdruby/libzstd/legacy/zstd_v03.h +1 -1
  76. data/ext/zstdruby/libzstd/legacy/zstd_v04.c +1 -1
  77. data/ext/zstdruby/libzstd/legacy/zstd_v04.h +1 -1
  78. data/ext/zstdruby/libzstd/legacy/zstd_v05.c +1 -1
  79. data/ext/zstdruby/libzstd/legacy/zstd_v05.h +1 -1
  80. data/ext/zstdruby/libzstd/legacy/zstd_v06.c +1 -1
  81. data/ext/zstdruby/libzstd/legacy/zstd_v06.h +1 -1
  82. data/ext/zstdruby/libzstd/legacy/zstd_v07.c +1 -1
  83. data/ext/zstdruby/libzstd/legacy/zstd_v07.h +1 -1
  84. data/ext/zstdruby/libzstd/{dictBuilder/zdict.h → zdict.h} +148 -2
  85. data/ext/zstdruby/libzstd/zstd.h +165 -83
  86. data/ext/zstdruby/libzstd/{common/zstd_errors.h → zstd_errors.h} +1 -1
  87. data/lib/zstd-ruby/version.rb +1 -1
  88. metadata +5 -5
  89. data/ext/zstdruby/libzstd/common/zstd_trace.c +0 -42
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -1,6 +1,6 @@
1
1
  /*
2
2
  * xxHash - Fast Hash algorithm
3
- * Copyright (c) 2012-2021, Yann Collet, Facebook, Inc.
3
+ * Copyright (c) Yann Collet, Facebook, Inc.
4
4
  *
5
5
  * You can contact the author at :
6
6
  * - xxHash homepage: http://www.xxhash.com
@@ -30,9 +30,7 @@
30
30
  * Prefer these methods in priority order (0 > 1 > 2)
31
31
  */
32
32
  #ifndef XXH_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */
33
- # if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
34
- # define XXH_FORCE_MEMORY_ACCESS 2
35
- # elif (defined(__INTEL_COMPILER) && !defined(WIN32)) || \
33
+ # if (defined(__INTEL_COMPILER) && !defined(WIN32)) || \
36
34
  (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) )) || \
37
35
  defined(__ICCARM__)
38
36
  # define XXH_FORCE_MEMORY_ACCESS 1
@@ -1,7 +1,7 @@
1
1
  /*
2
2
  * xxHash - Extremely Fast Hash algorithm
3
3
  * Header File
4
- * Copyright (c) 2012-2021, Yann Collet, Facebook, Inc.
4
+ * Copyright (c) Yann Collet, Facebook, Inc.
5
5
  *
6
6
  * You can contact the author at :
7
7
  * - xxHash source repository : https://github.com/Cyan4973/xxHash
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-2021, Facebook, Inc.
2
+ * Copyright (c) Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -36,6 +36,11 @@
36
36
  # define XXH_STATIC_LINKING_ONLY /* XXH64_state_t */
37
37
  #endif
38
38
  #include "xxhash.h" /* XXH_reset, update, digest */
39
+ #ifndef ZSTD_NO_TRACE
40
+ # include "zstd_trace.h"
41
+ #else
42
+ # define ZSTD_TRACE 0
43
+ #endif
39
44
 
40
45
  #if defined (__cplusplus)
41
46
  extern "C" {
@@ -347,11 +352,18 @@ typedef enum {
347
352
  * Private declarations
348
353
  *********************************************/
349
354
  typedef struct seqDef_s {
350
- U32 offset; /* Offset code of the sequence */
355
+ U32 offset; /* offset == rawOffset + ZSTD_REP_NUM, or equivalently, offCode + 1 */
351
356
  U16 litLength;
352
357
  U16 matchLength;
353
358
  } seqDef;
354
359
 
360
+ /* Controls whether seqStore has a single "long" litLength or matchLength. See seqStore_t. */
361
+ typedef enum {
362
+ ZSTD_llt_none = 0, /* no longLengthType */
363
+ ZSTD_llt_literalLength = 1, /* represents a long literal */
364
+ ZSTD_llt_matchLength = 2 /* represents a long match */
365
+ } ZSTD_longLengthType_e;
366
+
355
367
  typedef struct {
356
368
  seqDef* sequencesStart;
357
369
  seqDef* sequences; /* ptr to end of sequences */
@@ -363,12 +375,12 @@ typedef struct {
363
375
  size_t maxNbSeq;
364
376
  size_t maxNbLit;
365
377
 
366
- /* longLengthPos and longLengthID to allow us to represent either a single litLength or matchLength
378
+ /* longLengthPos and longLengthType to allow us to represent either a single litLength or matchLength
367
379
  * in the seqStore that has a value larger than U16 (if it exists). To do so, we increment
368
- * the existing value of the litLength or matchLength by 0x10000.
380
+ * the existing value of the litLength or matchLength by 0x10000.
369
381
  */
370
- U32 longLengthID; /* 0 == no longLength; 1 == Represent the long literal; 2 == Represent the long match; */
371
- U32 longLengthPos; /* Index of the sequence to apply long length modification to */
382
+ ZSTD_longLengthType_e longLengthType;
383
+ U32 longLengthPos; /* Index of the sequence to apply long length modification to */
372
384
  } seqStore_t;
373
385
 
374
386
  typedef struct {
@@ -378,7 +390,7 @@ typedef struct {
378
390
 
379
391
  /**
380
392
  * Returns the ZSTD_sequenceLength for the given sequences. It handles the decoding of long sequences
381
- * indicated by longLengthPos and longLengthID, and adds MINMATCH back to matchLength.
393
+ * indicated by longLengthPos and longLengthType, and adds MINMATCH back to matchLength.
382
394
  */
383
395
  MEM_STATIC ZSTD_sequenceLength ZSTD_getSequenceLength(seqStore_t const* seqStore, seqDef const* seq)
384
396
  {
@@ -386,10 +398,10 @@ MEM_STATIC ZSTD_sequenceLength ZSTD_getSequenceLength(seqStore_t const* seqStore
386
398
  seqLen.litLength = seq->litLength;
387
399
  seqLen.matchLength = seq->matchLength + MINMATCH;
388
400
  if (seqStore->longLengthPos == (U32)(seq - seqStore->sequencesStart)) {
389
- if (seqStore->longLengthID == 1) {
401
+ if (seqStore->longLengthType == ZSTD_llt_literalLength) {
390
402
  seqLen.litLength += 0xFFFF;
391
403
  }
392
- if (seqStore->longLengthID == 2) {
404
+ if (seqStore->longLengthType == ZSTD_llt_matchLength) {
393
405
  seqLen.matchLength += 0xFFFF;
394
406
  }
395
407
  }
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-2021, Facebook, Inc.
2
+ * Copyright (c) Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -114,14 +114,15 @@ typedef unsigned long long ZSTD_TraceCtx;
114
114
  * @returns Non-zero if tracing is enabled. The return value is
115
115
  * passed to ZSTD_trace_compress_end().
116
116
  */
117
- ZSTD_TraceCtx ZSTD_trace_compress_begin(struct ZSTD_CCtx_s const* cctx);
117
+ ZSTD_WEAK_ATTR ZSTD_TraceCtx ZSTD_trace_compress_begin(
118
+ struct ZSTD_CCtx_s const* cctx);
118
119
 
119
120
  /**
120
121
  * Trace the end of a compression call.
121
122
  * @param ctx The return value of ZSTD_trace_compress_begin().
122
123
  * @param trace The zstd tracing info.
123
124
  */
124
- void ZSTD_trace_compress_end(
125
+ ZSTD_WEAK_ATTR void ZSTD_trace_compress_end(
125
126
  ZSTD_TraceCtx ctx,
126
127
  ZSTD_Trace const* trace);
127
128
 
@@ -132,14 +133,15 @@ void ZSTD_trace_compress_end(
132
133
  * @returns Non-zero if tracing is enabled. The return value is
133
134
  * passed to ZSTD_trace_compress_end().
134
135
  */
135
- ZSTD_TraceCtx ZSTD_trace_decompress_begin(struct ZSTD_DCtx_s const* dctx);
136
+ ZSTD_WEAK_ATTR ZSTD_TraceCtx ZSTD_trace_decompress_begin(
137
+ struct ZSTD_DCtx_s const* dctx);
136
138
 
137
139
  /**
138
140
  * Trace the end of a decompression call.
139
141
  * @param ctx The return value of ZSTD_trace_decompress_begin().
140
142
  * @param trace The zstd tracing info.
141
143
  */
142
- void ZSTD_trace_decompress_end(
144
+ ZSTD_WEAK_ATTR void ZSTD_trace_decompress_end(
143
145
  ZSTD_TraceCtx ctx,
144
146
  ZSTD_Trace const* trace);
145
147
 
@@ -1,6 +1,6 @@
1
1
  /* ******************************************************************
2
2
  * FSE : Finite State Entropy encoder
3
- * Copyright (c) 2013-2021, Yann Collet, Facebook, Inc.
3
+ * Copyright (c) Yann Collet, Facebook, Inc.
4
4
  *
5
5
  * You can contact the author at :
6
6
  * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
@@ -1,7 +1,7 @@
1
1
  /* ******************************************************************
2
2
  * hist : Histogram functions
3
3
  * part of Finite State Entropy project
4
- * Copyright (c) 2013-2021, Yann Collet, Facebook, Inc.
4
+ * Copyright (c) Yann Collet, Facebook, Inc.
5
5
  *
6
6
  * You can contact the author at :
7
7
  * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
@@ -1,7 +1,7 @@
1
1
  /* ******************************************************************
2
2
  * hist : Histogram functions
3
3
  * part of Finite State Entropy project
4
- * Copyright (c) 2013-2021, Yann Collet, Facebook, Inc.
4
+ * Copyright (c) Yann Collet, Facebook, Inc.
5
5
  *
6
6
  * You can contact the author at :
7
7
  * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
@@ -1,6 +1,6 @@
1
1
  /* ******************************************************************
2
2
  * Huffman encoder, part of New Generation Entropy library
3
- * Copyright (c) 2013-2021, Yann Collet, Facebook, Inc.
3
+ * Copyright (c) Yann Collet, Facebook, Inc.
4
4
  *
5
5
  * You can contact the author at :
6
6
  * - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
@@ -59,7 +59,15 @@ unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxS
59
59
  * Note : all elements within weightTable are supposed to be <= HUF_TABLELOG_MAX.
60
60
  */
61
61
  #define MAX_FSE_TABLELOG_FOR_HUFF_HEADER 6
62
- static size_t HUF_compressWeights (void* dst, size_t dstSize, const void* weightTable, size_t wtSize)
62
+
63
+ typedef struct {
64
+ FSE_CTable CTable[FSE_CTABLE_SIZE_U32(MAX_FSE_TABLELOG_FOR_HUFF_HEADER, HUF_TABLELOG_MAX)];
65
+ U32 scratchBuffer[FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(HUF_TABLELOG_MAX, MAX_FSE_TABLELOG_FOR_HUFF_HEADER)];
66
+ unsigned count[HUF_TABLELOG_MAX+1];
67
+ S16 norm[HUF_TABLELOG_MAX+1];
68
+ } HUF_CompressWeightsWksp;
69
+
70
+ static size_t HUF_compressWeights(void* dst, size_t dstSize, const void* weightTable, size_t wtSize, void* workspace, size_t workspaceSize)
63
71
  {
64
72
  BYTE* const ostart = (BYTE*) dst;
65
73
  BYTE* op = ostart;
@@ -67,33 +75,30 @@ static size_t HUF_compressWeights (void* dst, size_t dstSize, const void* weight
67
75
 
68
76
  unsigned maxSymbolValue = HUF_TABLELOG_MAX;
69
77
  U32 tableLog = MAX_FSE_TABLELOG_FOR_HUFF_HEADER;
78
+ HUF_CompressWeightsWksp* wksp = (HUF_CompressWeightsWksp*)workspace;
70
79
 
71
- FSE_CTable CTable[FSE_CTABLE_SIZE_U32(MAX_FSE_TABLELOG_FOR_HUFF_HEADER, HUF_TABLELOG_MAX)];
72
- U32 scratchBuffer[FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(HUF_TABLELOG_MAX, MAX_FSE_TABLELOG_FOR_HUFF_HEADER)];
73
-
74
- unsigned count[HUF_TABLELOG_MAX+1];
75
- S16 norm[HUF_TABLELOG_MAX+1];
80
+ if (workspaceSize < sizeof(HUF_CompressWeightsWksp)) return ERROR(GENERIC);
76
81
 
77
82
  /* init conditions */
78
83
  if (wtSize <= 1) return 0; /* Not compressible */
79
84
 
80
85
  /* Scan input and build symbol stats */
81
- { unsigned const maxCount = HIST_count_simple(count, &maxSymbolValue, weightTable, wtSize); /* never fails */
86
+ { unsigned const maxCount = HIST_count_simple(wksp->count, &maxSymbolValue, weightTable, wtSize); /* never fails */
82
87
  if (maxCount == wtSize) return 1; /* only a single symbol in src : rle */
83
88
  if (maxCount == 1) return 0; /* each symbol present maximum once => not compressible */
84
89
  }
85
90
 
86
91
  tableLog = FSE_optimalTableLog(tableLog, wtSize, maxSymbolValue);
87
- CHECK_F( FSE_normalizeCount(norm, tableLog, count, wtSize, maxSymbolValue, /* useLowProbCount */ 0) );
92
+ CHECK_F( FSE_normalizeCount(wksp->norm, tableLog, wksp->count, wtSize, maxSymbolValue, /* useLowProbCount */ 0) );
88
93
 
89
94
  /* Write table description header */
90
- { CHECK_V_F(hSize, FSE_writeNCount(op, (size_t)(oend-op), norm, maxSymbolValue, tableLog) );
95
+ { CHECK_V_F(hSize, FSE_writeNCount(op, (size_t)(oend-op), wksp->norm, maxSymbolValue, tableLog) );
91
96
  op += hSize;
92
97
  }
93
98
 
94
99
  /* Compress */
95
- CHECK_F( FSE_buildCTable_wksp(CTable, norm, maxSymbolValue, tableLog, scratchBuffer, sizeof(scratchBuffer)) );
96
- { CHECK_V_F(cSize, FSE_compress_usingCTable(op, (size_t)(oend - op), weightTable, wtSize, CTable) );
100
+ CHECK_F( FSE_buildCTable_wksp(wksp->CTable, wksp->norm, maxSymbolValue, tableLog, wksp->scratchBuffer, sizeof(wksp->scratchBuffer)) );
101
+ { CHECK_V_F(cSize, FSE_compress_usingCTable(op, (size_t)(oend - op), weightTable, wtSize, wksp->CTable) );
97
102
  if (cSize == 0) return 0; /* not enough space for compressed data */
98
103
  op += cSize;
99
104
  }
@@ -102,29 +107,33 @@ static size_t HUF_compressWeights (void* dst, size_t dstSize, const void* weight
102
107
  }
103
108
 
104
109
 
105
- /*! HUF_writeCTable() :
106
- `CTable` : Huffman tree to save, using huf representation.
107
- @return : size of saved CTable */
108
- size_t HUF_writeCTable (void* dst, size_t maxDstSize,
109
- const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog)
110
- {
110
+ typedef struct {
111
+ HUF_CompressWeightsWksp wksp;
111
112
  BYTE bitsToWeight[HUF_TABLELOG_MAX + 1]; /* precomputed conversion table */
112
113
  BYTE huffWeight[HUF_SYMBOLVALUE_MAX];
114
+ } HUF_WriteCTableWksp;
115
+
116
+ size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize,
117
+ const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog,
118
+ void* workspace, size_t workspaceSize)
119
+ {
113
120
  BYTE* op = (BYTE*)dst;
114
121
  U32 n;
122
+ HUF_WriteCTableWksp* wksp = (HUF_WriteCTableWksp*)workspace;
115
123
 
116
- /* check conditions */
124
+ /* check conditions */
125
+ if (workspaceSize < sizeof(HUF_WriteCTableWksp)) return ERROR(GENERIC);
117
126
  if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge);
118
127
 
119
128
  /* convert to weight */
120
- bitsToWeight[0] = 0;
129
+ wksp->bitsToWeight[0] = 0;
121
130
  for (n=1; n<huffLog+1; n++)
122
- bitsToWeight[n] = (BYTE)(huffLog + 1 - n);
131
+ wksp->bitsToWeight[n] = (BYTE)(huffLog + 1 - n);
123
132
  for (n=0; n<maxSymbolValue; n++)
124
- huffWeight[n] = bitsToWeight[CTable[n].nbBits];
133
+ wksp->huffWeight[n] = wksp->bitsToWeight[CTable[n].nbBits];
125
134
 
126
135
  /* attempt weights compression by FSE */
127
- { CHECK_V_F(hSize, HUF_compressWeights(op+1, maxDstSize-1, huffWeight, maxSymbolValue) );
136
+ { CHECK_V_F(hSize, HUF_compressWeights(op+1, maxDstSize-1, wksp->huffWeight, maxSymbolValue, &wksp->wksp, sizeof(wksp->wksp)) );
128
137
  if ((hSize>1) & (hSize < maxSymbolValue/2)) { /* FSE compressed */
129
138
  op[0] = (BYTE)hSize;
130
139
  return hSize+1;
@@ -134,12 +143,22 @@ size_t HUF_writeCTable (void* dst, size_t maxDstSize,
134
143
  if (maxSymbolValue > (256-128)) return ERROR(GENERIC); /* should not happen : likely means source cannot be compressed */
135
144
  if (((maxSymbolValue+1)/2) + 1 > maxDstSize) return ERROR(dstSize_tooSmall); /* not enough space within dst buffer */
136
145
  op[0] = (BYTE)(128 /*special case*/ + (maxSymbolValue-1));
137
- huffWeight[maxSymbolValue] = 0; /* to be sure it doesn't cause msan issue in final combination */
146
+ wksp->huffWeight[maxSymbolValue] = 0; /* to be sure it doesn't cause msan issue in final combination */
138
147
  for (n=0; n<maxSymbolValue; n+=2)
139
- op[(n/2)+1] = (BYTE)((huffWeight[n] << 4) + huffWeight[n+1]);
148
+ op[(n/2)+1] = (BYTE)((wksp->huffWeight[n] << 4) + wksp->huffWeight[n+1]);
140
149
  return ((maxSymbolValue+1)/2) + 1;
141
150
  }
142
151
 
152
+ /*! HUF_writeCTable() :
153
+ `CTable` : Huffman tree to save, using huf representation.
154
+ @return : size of saved CTable */
155
+ size_t HUF_writeCTable (void* dst, size_t maxDstSize,
156
+ const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog)
157
+ {
158
+ HUF_WriteCTableWksp wksp;
159
+ return HUF_writeCTable_wksp(dst, maxDstSize, CTable, maxSymbolValue, huffLog, &wksp, sizeof(wksp));
160
+ }
161
+
143
162
 
144
163
  size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned* hasZeroWeights)
145
164
  {
@@ -732,7 +751,10 @@ static size_t HUF_compressCTable_internal(
732
751
  typedef struct {
733
752
  unsigned count[HUF_SYMBOLVALUE_MAX + 1];
734
753
  HUF_CElt CTable[HUF_SYMBOLVALUE_MAX + 1];
735
- HUF_buildCTable_wksp_tables buildCTable_wksp;
754
+ union {
755
+ HUF_buildCTable_wksp_tables buildCTable_wksp;
756
+ HUF_WriteCTableWksp writeCTable_wksp;
757
+ } wksps;
736
758
  } HUF_compress_tables_t;
737
759
 
738
760
  /* HUF_compress_internal() :
@@ -795,7 +817,7 @@ HUF_compress_internal (void* dst, size_t dstSize,
795
817
  huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue);
796
818
  { size_t const maxBits = HUF_buildCTable_wksp(table->CTable, table->count,
797
819
  maxSymbolValue, huffLog,
798
- &table->buildCTable_wksp, sizeof(table->buildCTable_wksp));
820
+ &table->wksps.buildCTable_wksp, sizeof(table->wksps.buildCTable_wksp));
799
821
  CHECK_F(maxBits);
800
822
  huffLog = (U32)maxBits;
801
823
  /* Zero unused symbols in CTable, so we can check it for validity */
@@ -804,7 +826,8 @@ HUF_compress_internal (void* dst, size_t dstSize,
804
826
  }
805
827
 
806
828
  /* Write table description header */
807
- { CHECK_V_F(hSize, HUF_writeCTable (op, dstSize, table->CTable, maxSymbolValue, huffLog) );
829
+ { CHECK_V_F(hSize, HUF_writeCTable_wksp(op, dstSize, table->CTable, maxSymbolValue, huffLog,
830
+ &table->wksps.writeCTable_wksp, sizeof(table->wksps.writeCTable_wksp)) );
808
831
  /* Check if using previous huffman table is beneficial */
809
832
  if (repeat && *repeat != HUF_repeat_none) {
810
833
  size_t const oldSize = HUF_estimateCompressedSize(oldHufTable, table->count, maxSymbolValue);
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -14,7 +14,6 @@
14
14
  #include "../common/zstd_deps.h" /* INT_MAX, ZSTD_memset, ZSTD_memcpy */
15
15
  #include "../common/cpu.h"
16
16
  #include "../common/mem.h"
17
- #include "../common/zstd_trace.h"
18
17
  #include "hist.h" /* HIST_countFast_wksp */
19
18
  #define FSE_STATIC_LINKING_ONLY /* FSE_encodeSymbol */
20
19
  #include "../common/fse.h"
@@ -73,6 +72,10 @@ struct ZSTD_CDict_s {
73
72
  ZSTD_customMem customMem;
74
73
  U32 dictID;
75
74
  int compressionLevel; /* 0 indicates that advanced API was used to select CDict params */
75
+ ZSTD_useRowMatchFinderMode_e useRowMatchFinder; /* Indicates whether the CDict was created with params that would use
76
+ * row-based matchfinder. Unless the cdict is reloaded, we will use
77
+ * the same greedy/lazy matchfinder at compression time.
78
+ */
76
79
  }; /* typedef'd to ZSTD_CDict within "zstd.h" */
77
80
 
78
81
  ZSTD_CCtx* ZSTD_createCCtx(void)
@@ -203,6 +206,49 @@ size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs)
203
206
  /* private API call, for dictBuilder only */
204
207
  const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx) { return &(ctx->seqStore); }
205
208
 
209
+ /* Returns true if the strategy supports using a row based matchfinder */
210
+ static int ZSTD_rowMatchFinderSupported(const ZSTD_strategy strategy) {
211
+ return (strategy >= ZSTD_greedy && strategy <= ZSTD_lazy2);
212
+ }
213
+
214
+ /* Returns true if the strategy and useRowMatchFinder mode indicate that we will use the row based matchfinder
215
+ * for this compression.
216
+ */
217
+ static int ZSTD_rowMatchFinderUsed(const ZSTD_strategy strategy, const ZSTD_useRowMatchFinderMode_e mode) {
218
+ assert(mode != ZSTD_urm_auto);
219
+ return ZSTD_rowMatchFinderSupported(strategy) && (mode == ZSTD_urm_enableRowMatchFinder);
220
+ }
221
+
222
+ /* Returns row matchfinder usage enum given an initial mode and cParams */
223
+ static ZSTD_useRowMatchFinderMode_e ZSTD_resolveRowMatchFinderMode(ZSTD_useRowMatchFinderMode_e mode,
224
+ const ZSTD_compressionParameters* const cParams) {
225
+ #if !defined(ZSTD_NO_INTRINSICS) && (defined(__SSE2__) || defined(__ARM_NEON))
226
+ int const kHasSIMD128 = 1;
227
+ #else
228
+ int const kHasSIMD128 = 0;
229
+ #endif
230
+ if (mode != ZSTD_urm_auto) return mode; /* if requested enabled, but no SIMD, we still will use row matchfinder */
231
+ mode = ZSTD_urm_disableRowMatchFinder;
232
+ if (!ZSTD_rowMatchFinderSupported(cParams->strategy)) return mode;
233
+ if (kHasSIMD128) {
234
+ if (cParams->windowLog > 14) mode = ZSTD_urm_enableRowMatchFinder;
235
+ } else {
236
+ if (cParams->windowLog > 17) mode = ZSTD_urm_enableRowMatchFinder;
237
+ }
238
+ return mode;
239
+ }
240
+
241
+ /* Returns 1 if the arguments indicate that we should allocate a chainTable, 0 otherwise */
242
+ static int ZSTD_allocateChainTable(const ZSTD_strategy strategy,
243
+ const ZSTD_useRowMatchFinderMode_e useRowMatchFinder,
244
+ const U32 forDDSDict) {
245
+ assert(useRowMatchFinder != ZSTD_urm_auto);
246
+ /* We always should allocate a chaintable if we are allocating a matchstate for a DDS dictionary matchstate.
247
+ * We do not allocate a chaintable if we are using ZSTD_fast, or are using the row-based matchfinder.
248
+ */
249
+ return forDDSDict || ((strategy != ZSTD_fast) && !ZSTD_rowMatchFinderUsed(strategy, useRowMatchFinder));
250
+ }
251
+
206
252
  /* Returns 1 if compression parameters are such that we should
207
253
  * enable long distance matching (wlog >= 27, strategy >= btopt).
208
254
  * Returns 0 otherwise.
@@ -211,6 +257,14 @@ static U32 ZSTD_CParams_shouldEnableLdm(const ZSTD_compressionParameters* const
211
257
  return cParams->strategy >= ZSTD_btopt && cParams->windowLog >= 27;
212
258
  }
213
259
 
260
+ /* Returns 1 if compression parameters are such that we should
261
+ * enable blockSplitter (wlog >= 17, strategy >= btopt).
262
+ * Returns 0 otherwise.
263
+ */
264
+ static U32 ZSTD_CParams_useBlockSplitter(const ZSTD_compressionParameters* const cParams) {
265
+ return cParams->strategy >= ZSTD_btopt && cParams->windowLog >= 17;
266
+ }
267
+
214
268
  static ZSTD_CCtx_params ZSTD_makeCCtxParamsFromCParams(
215
269
  ZSTD_compressionParameters cParams)
216
270
  {
@@ -219,6 +273,7 @@ static ZSTD_CCtx_params ZSTD_makeCCtxParamsFromCParams(
219
273
  ZSTD_CCtxParams_init(&cctxParams, ZSTD_CLEVEL_DEFAULT);
220
274
  cctxParams.cParams = cParams;
221
275
 
276
+ /* Adjust advanced params according to cParams */
222
277
  if (ZSTD_CParams_shouldEnableLdm(&cParams)) {
223
278
  DEBUGLOG(4, "ZSTD_makeCCtxParamsFromCParams(): Including LDM into cctx params");
224
279
  cctxParams.ldmParams.enableLdm = 1;
@@ -228,6 +283,12 @@ static ZSTD_CCtx_params ZSTD_makeCCtxParamsFromCParams(
228
283
  assert(cctxParams.ldmParams.hashRateLog < 32);
229
284
  }
230
285
 
286
+ if (ZSTD_CParams_useBlockSplitter(&cParams)) {
287
+ DEBUGLOG(4, "ZSTD_makeCCtxParamsFromCParams(): Including block splitting into cctx params");
288
+ cctxParams.splitBlocks = 1;
289
+ }
290
+
291
+ cctxParams.useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(cctxParams.useRowMatchFinder, &cParams);
231
292
  assert(!ZSTD_checkCParams(cParams));
232
293
  return cctxParams;
233
294
  }
@@ -286,6 +347,8 @@ static void ZSTD_CCtxParams_init_internal(ZSTD_CCtx_params* cctxParams, ZSTD_par
286
347
  * But, set it for tracing anyway.
287
348
  */
288
349
  cctxParams->compressionLevel = compressionLevel;
350
+ cctxParams->useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(cctxParams->useRowMatchFinder, &params->cParams);
351
+ DEBUGLOG(4, "ZSTD_CCtxParams_init_internal: useRowMatchFinder=%d", cctxParams->useRowMatchFinder);
289
352
  }
290
353
 
291
354
  size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_parameters params)
@@ -486,6 +549,21 @@ ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param)
486
549
  bounds.upperBound = 1;
487
550
  return bounds;
488
551
 
552
+ case ZSTD_c_splitBlocks:
553
+ bounds.lowerBound = 0;
554
+ bounds.upperBound = 1;
555
+ return bounds;
556
+
557
+ case ZSTD_c_useRowMatchFinder:
558
+ bounds.lowerBound = (int)ZSTD_urm_auto;
559
+ bounds.upperBound = (int)ZSTD_urm_enableRowMatchFinder;
560
+ return bounds;
561
+
562
+ case ZSTD_c_deterministicRefPrefix:
563
+ bounds.lowerBound = 0;
564
+ bounds.upperBound = 1;
565
+ return bounds;
566
+
489
567
  default:
490
568
  bounds.error = ERROR(parameter_unsupported);
491
569
  return bounds;
@@ -547,6 +625,9 @@ static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param)
547
625
  case ZSTD_c_stableOutBuffer:
548
626
  case ZSTD_c_blockDelimiters:
549
627
  case ZSTD_c_validateSequences:
628
+ case ZSTD_c_splitBlocks:
629
+ case ZSTD_c_useRowMatchFinder:
630
+ case ZSTD_c_deterministicRefPrefix:
550
631
  default:
551
632
  return 0;
552
633
  }
@@ -599,6 +680,9 @@ size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value)
599
680
  case ZSTD_c_stableOutBuffer:
600
681
  case ZSTD_c_blockDelimiters:
601
682
  case ZSTD_c_validateSequences:
683
+ case ZSTD_c_splitBlocks:
684
+ case ZSTD_c_useRowMatchFinder:
685
+ case ZSTD_c_deterministicRefPrefix:
602
686
  break;
603
687
 
604
688
  default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
@@ -810,6 +894,21 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
810
894
  CCtxParams->validateSequences = value;
811
895
  return CCtxParams->validateSequences;
812
896
 
897
+ case ZSTD_c_splitBlocks:
898
+ BOUNDCHECK(ZSTD_c_splitBlocks, value);
899
+ CCtxParams->splitBlocks = value;
900
+ return CCtxParams->splitBlocks;
901
+
902
+ case ZSTD_c_useRowMatchFinder:
903
+ BOUNDCHECK(ZSTD_c_useRowMatchFinder, value);
904
+ CCtxParams->useRowMatchFinder = (ZSTD_useRowMatchFinderMode_e)value;
905
+ return CCtxParams->useRowMatchFinder;
906
+
907
+ case ZSTD_c_deterministicRefPrefix:
908
+ BOUNDCHECK(ZSTD_c_deterministicRefPrefix, value);
909
+ CCtxParams->deterministicRefPrefix = !!value;
910
+ return CCtxParams->deterministicRefPrefix;
911
+
813
912
  default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
814
913
  }
815
914
  }
@@ -933,6 +1032,15 @@ size_t ZSTD_CCtxParams_getParameter(
933
1032
  case ZSTD_c_validateSequences :
934
1033
  *value = (int)CCtxParams->validateSequences;
935
1034
  break;
1035
+ case ZSTD_c_splitBlocks :
1036
+ *value = (int)CCtxParams->splitBlocks;
1037
+ break;
1038
+ case ZSTD_c_useRowMatchFinder :
1039
+ *value = (int)CCtxParams->useRowMatchFinder;
1040
+ break;
1041
+ case ZSTD_c_deterministicRefPrefix:
1042
+ *value = (int)CCtxParams->deterministicRefPrefix;
1043
+ break;
936
1044
  default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
937
1045
  }
938
1046
  return 0;
@@ -1299,9 +1407,14 @@ ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams(
1299
1407
 
1300
1408
  static size_t
1301
1409
  ZSTD_sizeof_matchState(const ZSTD_compressionParameters* const cParams,
1410
+ const ZSTD_useRowMatchFinderMode_e useRowMatchFinder,
1411
+ const U32 enableDedicatedDictSearch,
1302
1412
  const U32 forCCtx)
1303
1413
  {
1304
- size_t const chainSize = (cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cParams->chainLog);
1414
+ /* chain table size should be 0 for fast or row-hash strategies */
1415
+ size_t const chainSize = ZSTD_allocateChainTable(cParams->strategy, useRowMatchFinder, enableDedicatedDictSearch && !forCCtx)
1416
+ ? ((size_t)1 << cParams->chainLog)
1417
+ : 0;
1305
1418
  size_t const hSize = ((size_t)1) << cParams->hashLog;
1306
1419
  U32 const hashLog3 = (forCCtx && cParams->minMatch==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0;
1307
1420
  size_t const h3Size = hashLog3 ? ((size_t)1) << hashLog3 : 0;
@@ -1311,24 +1424,34 @@ ZSTD_sizeof_matchState(const ZSTD_compressionParameters* const cParams,
1311
1424
  + hSize * sizeof(U32)
1312
1425
  + h3Size * sizeof(U32);
1313
1426
  size_t const optPotentialSpace =
1314
- ZSTD_cwksp_alloc_size((MaxML+1) * sizeof(U32))
1315
- + ZSTD_cwksp_alloc_size((MaxLL+1) * sizeof(U32))
1316
- + ZSTD_cwksp_alloc_size((MaxOff+1) * sizeof(U32))
1317
- + ZSTD_cwksp_alloc_size((1<<Litbits) * sizeof(U32))
1318
- + ZSTD_cwksp_alloc_size((ZSTD_OPT_NUM+1) * sizeof(ZSTD_match_t))
1319
- + ZSTD_cwksp_alloc_size((ZSTD_OPT_NUM+1) * sizeof(ZSTD_optimal_t));
1427
+ ZSTD_cwksp_aligned_alloc_size((MaxML+1) * sizeof(U32))
1428
+ + ZSTD_cwksp_aligned_alloc_size((MaxLL+1) * sizeof(U32))
1429
+ + ZSTD_cwksp_aligned_alloc_size((MaxOff+1) * sizeof(U32))
1430
+ + ZSTD_cwksp_aligned_alloc_size((1<<Litbits) * sizeof(U32))
1431
+ + ZSTD_cwksp_aligned_alloc_size((ZSTD_OPT_NUM+1) * sizeof(ZSTD_match_t))
1432
+ + ZSTD_cwksp_aligned_alloc_size((ZSTD_OPT_NUM+1) * sizeof(ZSTD_optimal_t));
1433
+ size_t const lazyAdditionalSpace = ZSTD_rowMatchFinderUsed(cParams->strategy, useRowMatchFinder)
1434
+ ? ZSTD_cwksp_aligned_alloc_size(hSize*sizeof(U16))
1435
+ : 0;
1320
1436
  size_t const optSpace = (forCCtx && (cParams->strategy >= ZSTD_btopt))
1321
1437
  ? optPotentialSpace
1322
1438
  : 0;
1439
+ size_t const slackSpace = ZSTD_cwksp_slack_space_required();
1440
+
1441
+ /* tables are guaranteed to be sized in multiples of 64 bytes (or 16 uint32_t) */
1442
+ ZSTD_STATIC_ASSERT(ZSTD_HASHLOG_MIN >= 4 && ZSTD_WINDOWLOG_MIN >= 4 && ZSTD_CHAINLOG_MIN >= 4);
1443
+ assert(useRowMatchFinder != ZSTD_urm_auto);
1444
+
1323
1445
  DEBUGLOG(4, "chainSize: %u - hSize: %u - h3Size: %u",
1324
1446
  (U32)chainSize, (U32)hSize, (U32)h3Size);
1325
- return tableSpace + optSpace;
1447
+ return tableSpace + optSpace + slackSpace + lazyAdditionalSpace;
1326
1448
  }
1327
1449
 
1328
1450
  static size_t ZSTD_estimateCCtxSize_usingCCtxParams_internal(
1329
1451
  const ZSTD_compressionParameters* cParams,
1330
1452
  const ldmParams_t* ldmParams,
1331
1453
  const int isStatic,
1454
+ const ZSTD_useRowMatchFinderMode_e useRowMatchFinder,
1332
1455
  const size_t buffInSize,
1333
1456
  const size_t buffOutSize,
1334
1457
  const U64 pledgedSrcSize)
@@ -1338,16 +1461,16 @@ static size_t ZSTD_estimateCCtxSize_usingCCtxParams_internal(
1338
1461
  U32 const divider = (cParams->minMatch==3) ? 3 : 4;
1339
1462
  size_t const maxNbSeq = blockSize / divider;
1340
1463
  size_t const tokenSpace = ZSTD_cwksp_alloc_size(WILDCOPY_OVERLENGTH + blockSize)
1341
- + ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(seqDef))
1464
+ + ZSTD_cwksp_aligned_alloc_size(maxNbSeq * sizeof(seqDef))
1342
1465
  + 3 * ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(BYTE));
1343
1466
  size_t const entropySpace = ZSTD_cwksp_alloc_size(ENTROPY_WORKSPACE_SIZE);
1344
1467
  size_t const blockStateSpace = 2 * ZSTD_cwksp_alloc_size(sizeof(ZSTD_compressedBlockState_t));
1345
- size_t const matchStateSize = ZSTD_sizeof_matchState(cParams, /* forCCtx */ 1);
1468
+ size_t const matchStateSize = ZSTD_sizeof_matchState(cParams, useRowMatchFinder, /* enableDedicatedDictSearch */ 0, /* forCCtx */ 1);
1346
1469
 
1347
1470
  size_t const ldmSpace = ZSTD_ldm_getTableSize(*ldmParams);
1348
1471
  size_t const maxNbLdmSeq = ZSTD_ldm_getMaxNbSeq(*ldmParams, blockSize);
1349
1472
  size_t const ldmSeqSpace = ldmParams->enableLdm ?
1350
- ZSTD_cwksp_alloc_size(maxNbLdmSeq * sizeof(rawSeq)) : 0;
1473
+ ZSTD_cwksp_aligned_alloc_size(maxNbLdmSeq * sizeof(rawSeq)) : 0;
1351
1474
 
1352
1475
 
1353
1476
  size_t const bufferSpace = ZSTD_cwksp_alloc_size(buffInSize)
@@ -1373,25 +1496,45 @@ size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params)
1373
1496
  {
1374
1497
  ZSTD_compressionParameters const cParams =
1375
1498
  ZSTD_getCParamsFromCCtxParams(params, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict);
1499
+ ZSTD_useRowMatchFinderMode_e const useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(params->useRowMatchFinder,
1500
+ &cParams);
1376
1501
 
1377
1502
  RETURN_ERROR_IF(params->nbWorkers > 0, GENERIC, "Estimate CCtx size is supported for single-threaded compression only.");
1378
1503
  /* estimateCCtxSize is for one-shot compression. So no buffers should
1379
1504
  * be needed. However, we still allocate two 0-sized buffers, which can
1380
1505
  * take space under ASAN. */
1381
1506
  return ZSTD_estimateCCtxSize_usingCCtxParams_internal(
1382
- &cParams, &params->ldmParams, 1, 0, 0, ZSTD_CONTENTSIZE_UNKNOWN);
1507
+ &cParams, &params->ldmParams, 1, useRowMatchFinder, 0, 0, ZSTD_CONTENTSIZE_UNKNOWN);
1383
1508
  }
1384
1509
 
1385
1510
  size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams)
1386
1511
  {
1387
- ZSTD_CCtx_params const params = ZSTD_makeCCtxParamsFromCParams(cParams);
1388
- return ZSTD_estimateCCtxSize_usingCCtxParams(&params);
1512
+ ZSTD_CCtx_params initialParams = ZSTD_makeCCtxParamsFromCParams(cParams);
1513
+ if (ZSTD_rowMatchFinderSupported(cParams.strategy)) {
1514
+ /* Pick bigger of not using and using row-based matchfinder for greedy and lazy strategies */
1515
+ size_t noRowCCtxSize;
1516
+ size_t rowCCtxSize;
1517
+ initialParams.useRowMatchFinder = ZSTD_urm_disableRowMatchFinder;
1518
+ noRowCCtxSize = ZSTD_estimateCCtxSize_usingCCtxParams(&initialParams);
1519
+ initialParams.useRowMatchFinder = ZSTD_urm_enableRowMatchFinder;
1520
+ rowCCtxSize = ZSTD_estimateCCtxSize_usingCCtxParams(&initialParams);
1521
+ return MAX(noRowCCtxSize, rowCCtxSize);
1522
+ } else {
1523
+ return ZSTD_estimateCCtxSize_usingCCtxParams(&initialParams);
1524
+ }
1389
1525
  }
1390
1526
 
1391
1527
  static size_t ZSTD_estimateCCtxSize_internal(int compressionLevel)
1392
1528
  {
1393
- ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict);
1394
- return ZSTD_estimateCCtxSize_usingCParams(cParams);
1529
+ int tier = 0;
1530
+ size_t largestSize = 0;
1531
+ static const unsigned long long srcSizeTiers[4] = {16 KB, 128 KB, 256 KB, ZSTD_CONTENTSIZE_UNKNOWN};
1532
+ for (; tier < 4; ++tier) {
1533
+ /* Choose the set of cParams for a given level across all srcSizes that give the largest cctxSize */
1534
+ ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, srcSizeTiers[tier], 0, ZSTD_cpm_noAttachDict);
1535
+ largestSize = MAX(ZSTD_estimateCCtxSize_usingCParams(cParams), largestSize);
1536
+ }
1537
+ return largestSize;
1395
1538
  }
1396
1539
 
1397
1540
  size_t ZSTD_estimateCCtxSize(int compressionLevel)
@@ -1399,6 +1542,7 @@ size_t ZSTD_estimateCCtxSize(int compressionLevel)
1399
1542
  int level;
1400
1543
  size_t memBudget = 0;
1401
1544
  for (level=MIN(compressionLevel, 1); level<=compressionLevel; level++) {
1545
+ /* Ensure monotonically increasing memory usage as compression level increases */
1402
1546
  size_t const newMB = ZSTD_estimateCCtxSize_internal(level);
1403
1547
  if (newMB > memBudget) memBudget = newMB;
1404
1548
  }
@@ -1417,17 +1561,29 @@ size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params)
1417
1561
  size_t const outBuffSize = (params->outBufferMode == ZSTD_bm_buffered)
1418
1562
  ? ZSTD_compressBound(blockSize) + 1
1419
1563
  : 0;
1564
+ ZSTD_useRowMatchFinderMode_e const useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(params->useRowMatchFinder, &params->cParams);
1420
1565
 
1421
1566
  return ZSTD_estimateCCtxSize_usingCCtxParams_internal(
1422
- &cParams, &params->ldmParams, 1, inBuffSize, outBuffSize,
1567
+ &cParams, &params->ldmParams, 1, useRowMatchFinder, inBuffSize, outBuffSize,
1423
1568
  ZSTD_CONTENTSIZE_UNKNOWN);
1424
1569
  }
1425
1570
  }
1426
1571
 
1427
1572
  size_t ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams)
1428
1573
  {
1429
- ZSTD_CCtx_params const params = ZSTD_makeCCtxParamsFromCParams(cParams);
1430
- return ZSTD_estimateCStreamSize_usingCCtxParams(&params);
1574
+ ZSTD_CCtx_params initialParams = ZSTD_makeCCtxParamsFromCParams(cParams);
1575
+ if (ZSTD_rowMatchFinderSupported(cParams.strategy)) {
1576
+ /* Pick bigger of not using and using row-based matchfinder for greedy and lazy strategies */
1577
+ size_t noRowCCtxSize;
1578
+ size_t rowCCtxSize;
1579
+ initialParams.useRowMatchFinder = ZSTD_urm_disableRowMatchFinder;
1580
+ noRowCCtxSize = ZSTD_estimateCStreamSize_usingCCtxParams(&initialParams);
1581
+ initialParams.useRowMatchFinder = ZSTD_urm_enableRowMatchFinder;
1582
+ rowCCtxSize = ZSTD_estimateCStreamSize_usingCCtxParams(&initialParams);
1583
+ return MAX(noRowCCtxSize, rowCCtxSize);
1584
+ } else {
1585
+ return ZSTD_estimateCStreamSize_usingCCtxParams(&initialParams);
1586
+ }
1431
1587
  }
1432
1588
 
1433
1589
  static size_t ZSTD_estimateCStreamSize_internal(int compressionLevel)
@@ -1552,20 +1708,27 @@ typedef enum {
1552
1708
  ZSTD_resetTarget_CCtx
1553
1709
  } ZSTD_resetTarget_e;
1554
1710
 
1711
+
1555
1712
  static size_t
1556
1713
  ZSTD_reset_matchState(ZSTD_matchState_t* ms,
1557
1714
  ZSTD_cwksp* ws,
1558
1715
  const ZSTD_compressionParameters* cParams,
1716
+ const ZSTD_useRowMatchFinderMode_e useRowMatchFinder,
1559
1717
  const ZSTD_compResetPolicy_e crp,
1560
1718
  const ZSTD_indexResetPolicy_e forceResetIndex,
1561
1719
  const ZSTD_resetTarget_e forWho)
1562
1720
  {
1563
- size_t const chainSize = (cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cParams->chainLog);
1721
+ /* disable chain table allocation for fast or row-based strategies */
1722
+ size_t const chainSize = ZSTD_allocateChainTable(cParams->strategy, useRowMatchFinder,
1723
+ ms->dedicatedDictSearch && (forWho == ZSTD_resetTarget_CDict))
1724
+ ? ((size_t)1 << cParams->chainLog)
1725
+ : 0;
1564
1726
  size_t const hSize = ((size_t)1) << cParams->hashLog;
1565
1727
  U32 const hashLog3 = ((forWho == ZSTD_resetTarget_CCtx) && cParams->minMatch==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0;
1566
1728
  size_t const h3Size = hashLog3 ? ((size_t)1) << hashLog3 : 0;
1567
1729
 
1568
1730
  DEBUGLOG(4, "reset indices : %u", forceResetIndex == ZSTDirp_reset);
1731
+ assert(useRowMatchFinder != ZSTD_urm_auto);
1569
1732
  if (forceResetIndex == ZSTDirp_reset) {
1570
1733
  ZSTD_window_init(&ms->window);
1571
1734
  ZSTD_cwksp_mark_tables_dirty(ws);
@@ -1604,11 +1767,23 @@ ZSTD_reset_matchState(ZSTD_matchState_t* ms,
1604
1767
  ms->opt.priceTable = (ZSTD_optimal_t*)ZSTD_cwksp_reserve_aligned(ws, (ZSTD_OPT_NUM+1) * sizeof(ZSTD_optimal_t));
1605
1768
  }
1606
1769
 
1770
+ if (ZSTD_rowMatchFinderUsed(cParams->strategy, useRowMatchFinder)) {
1771
+ { /* Row match finder needs an additional table of hashes ("tags") */
1772
+ size_t const tagTableSize = hSize*sizeof(U16);
1773
+ ms->tagTable = (U16*)ZSTD_cwksp_reserve_aligned(ws, tagTableSize);
1774
+ if (ms->tagTable) ZSTD_memset(ms->tagTable, 0, tagTableSize);
1775
+ }
1776
+ { /* Switch to 32-entry rows if searchLog is 5 (or more) */
1777
+ U32 const rowLog = cParams->searchLog < 5 ? 4 : 5;
1778
+ assert(cParams->hashLog > rowLog);
1779
+ ms->rowHashLog = cParams->hashLog - rowLog;
1780
+ }
1781
+ }
1782
+
1607
1783
  ms->cParams = *cParams;
1608
1784
 
1609
1785
  RETURN_ERROR_IF(ZSTD_cwksp_reserve_failed(ws), memory_allocation,
1610
1786
  "failed a workspace allocation in ZSTD_reset_matchState");
1611
-
1612
1787
  return 0;
1613
1788
  }
1614
1789
 
@@ -1625,61 +1800,85 @@ static int ZSTD_indexTooCloseToMax(ZSTD_window_t w)
1625
1800
  return (size_t)(w.nextSrc - w.base) > (ZSTD_CURRENT_MAX - ZSTD_INDEXOVERFLOW_MARGIN);
1626
1801
  }
1627
1802
 
1803
+ /** ZSTD_dictTooBig():
1804
+ * When dictionaries are larger than ZSTD_CHUNKSIZE_MAX they can't be loaded in
1805
+ * one go generically. So we ensure that in that case we reset the tables to zero,
1806
+ * so that we can load as much of the dictionary as possible.
1807
+ */
1808
+ static int ZSTD_dictTooBig(size_t const loadedDictSize)
1809
+ {
1810
+ return loadedDictSize > ZSTD_CHUNKSIZE_MAX;
1811
+ }
1812
+
1628
1813
  /*! ZSTD_resetCCtx_internal() :
1629
- note : `params` are assumed fully validated at this stage */
1814
+ * @param loadedDictSize The size of the dictionary to be loaded
1815
+ * into the context, if any. If no dictionary is used, or the
1816
+ * dictionary is being attached / copied, then pass 0.
1817
+ * note : `params` are assumed fully validated at this stage.
1818
+ */
1630
1819
  static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
1631
- ZSTD_CCtx_params params,
1820
+ ZSTD_CCtx_params const* params,
1632
1821
  U64 const pledgedSrcSize,
1822
+ size_t const loadedDictSize,
1633
1823
  ZSTD_compResetPolicy_e const crp,
1634
1824
  ZSTD_buffered_policy_e const zbuff)
1635
1825
  {
1636
1826
  ZSTD_cwksp* const ws = &zc->workspace;
1637
- DEBUGLOG(4, "ZSTD_resetCCtx_internal: pledgedSrcSize=%u, wlog=%u",
1638
- (U32)pledgedSrcSize, params.cParams.windowLog);
1639
- assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams)));
1827
+ DEBUGLOG(4, "ZSTD_resetCCtx_internal: pledgedSrcSize=%u, wlog=%u, useRowMatchFinder=%d",
1828
+ (U32)pledgedSrcSize, params->cParams.windowLog, (int)params->useRowMatchFinder);
1829
+ assert(!ZSTD_isError(ZSTD_checkCParams(params->cParams)));
1640
1830
 
1641
1831
  zc->isFirstBlock = 1;
1642
1832
 
1643
- if (params.ldmParams.enableLdm) {
1833
+ /* Set applied params early so we can modify them for LDM,
1834
+ * and point params at the applied params.
1835
+ */
1836
+ zc->appliedParams = *params;
1837
+ params = &zc->appliedParams;
1838
+
1839
+ assert(params->useRowMatchFinder != ZSTD_urm_auto);
1840
+ if (params->ldmParams.enableLdm) {
1644
1841
  /* Adjust long distance matching parameters */
1645
- ZSTD_ldm_adjustParameters(&params.ldmParams, &params.cParams);
1646
- assert(params.ldmParams.hashLog >= params.ldmParams.bucketSizeLog);
1647
- assert(params.ldmParams.hashRateLog < 32);
1842
+ ZSTD_ldm_adjustParameters(&zc->appliedParams.ldmParams, &params->cParams);
1843
+ assert(params->ldmParams.hashLog >= params->ldmParams.bucketSizeLog);
1844
+ assert(params->ldmParams.hashRateLog < 32);
1648
1845
  }
1649
1846
 
1650
- { size_t const windowSize = MAX(1, (size_t)MIN(((U64)1 << params.cParams.windowLog), pledgedSrcSize));
1847
+ { size_t const windowSize = MAX(1, (size_t)MIN(((U64)1 << params->cParams.windowLog), pledgedSrcSize));
1651
1848
  size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize);
1652
- U32 const divider = (params.cParams.minMatch==3) ? 3 : 4;
1849
+ U32 const divider = (params->cParams.minMatch==3) ? 3 : 4;
1653
1850
  size_t const maxNbSeq = blockSize / divider;
1654
- size_t const buffOutSize = (zbuff == ZSTDb_buffered && params.outBufferMode == ZSTD_bm_buffered)
1851
+ size_t const buffOutSize = (zbuff == ZSTDb_buffered && params->outBufferMode == ZSTD_bm_buffered)
1655
1852
  ? ZSTD_compressBound(blockSize) + 1
1656
1853
  : 0;
1657
- size_t const buffInSize = (zbuff == ZSTDb_buffered && params.inBufferMode == ZSTD_bm_buffered)
1854
+ size_t const buffInSize = (zbuff == ZSTDb_buffered && params->inBufferMode == ZSTD_bm_buffered)
1658
1855
  ? windowSize + blockSize
1659
1856
  : 0;
1660
- size_t const maxNbLdmSeq = ZSTD_ldm_getMaxNbSeq(params.ldmParams, blockSize);
1857
+ size_t const maxNbLdmSeq = ZSTD_ldm_getMaxNbSeq(params->ldmParams, blockSize);
1661
1858
 
1662
1859
  int const indexTooClose = ZSTD_indexTooCloseToMax(zc->blockState.matchState.window);
1860
+ int const dictTooBig = ZSTD_dictTooBig(loadedDictSize);
1663
1861
  ZSTD_indexResetPolicy_e needsIndexReset =
1664
- (!indexTooClose && zc->initialized) ? ZSTDirp_continue : ZSTDirp_reset;
1862
+ (indexTooClose || dictTooBig || !zc->initialized) ? ZSTDirp_reset : ZSTDirp_continue;
1665
1863
 
1666
1864
  size_t const neededSpace =
1667
1865
  ZSTD_estimateCCtxSize_usingCCtxParams_internal(
1668
- &params.cParams, &params.ldmParams, zc->staticSize != 0,
1866
+ &params->cParams, &params->ldmParams, zc->staticSize != 0, params->useRowMatchFinder,
1669
1867
  buffInSize, buffOutSize, pledgedSrcSize);
1868
+ int resizeWorkspace;
1869
+
1670
1870
  FORWARD_IF_ERROR(neededSpace, "cctx size estimate failed!");
1671
1871
 
1672
1872
  if (!zc->staticSize) ZSTD_cwksp_bump_oversized_duration(ws, 0);
1673
1873
 
1674
- /* Check if workspace is large enough, alloc a new one if needed */
1675
- {
1874
+ { /* Check if workspace is large enough, alloc a new one if needed */
1676
1875
  int const workspaceTooSmall = ZSTD_cwksp_sizeof(ws) < neededSpace;
1677
1876
  int const workspaceWasteful = ZSTD_cwksp_check_wasteful(ws, neededSpace);
1678
-
1877
+ resizeWorkspace = workspaceTooSmall || workspaceWasteful;
1679
1878
  DEBUGLOG(4, "Need %zu B workspace", neededSpace);
1680
1879
  DEBUGLOG(4, "windowSize: %zu - blockSize: %zu", windowSize, blockSize);
1681
1880
 
1682
- if (workspaceTooSmall || workspaceWasteful) {
1881
+ if (resizeWorkspace) {
1683
1882
  DEBUGLOG(4, "Resize workspaceSize from %zuKB to %zuKB",
1684
1883
  ZSTD_cwksp_sizeof(ws) >> 10,
1685
1884
  neededSpace >> 10);
@@ -1707,8 +1906,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
1707
1906
  ZSTD_cwksp_clear(ws);
1708
1907
 
1709
1908
  /* init params */
1710
- zc->appliedParams = params;
1711
- zc->blockState.matchState.cParams = params.cParams;
1909
+ zc->blockState.matchState.cParams = params->cParams;
1712
1910
  zc->pledgedSrcSizePlusOne = pledgedSrcSize+1;
1713
1911
  zc->consumedSrcSize = 0;
1714
1912
  zc->producedCSize = 0;
@@ -1739,11 +1937,11 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
1739
1937
  zc->outBuff = (char*)ZSTD_cwksp_reserve_buffer(ws, buffOutSize);
1740
1938
 
1741
1939
  /* ldm bucketOffsets table */
1742
- if (params.ldmParams.enableLdm) {
1940
+ if (params->ldmParams.enableLdm) {
1743
1941
  /* TODO: avoid memset? */
1744
1942
  size_t const numBuckets =
1745
- ((size_t)1) << (params.ldmParams.hashLog -
1746
- params.ldmParams.bucketSizeLog);
1943
+ ((size_t)1) << (params->ldmParams.hashLog -
1944
+ params->ldmParams.bucketSizeLog);
1747
1945
  zc->ldmState.bucketOffsets = ZSTD_cwksp_reserve_buffer(ws, numBuckets);
1748
1946
  ZSTD_memset(zc->ldmState.bucketOffsets, 0, numBuckets);
1749
1947
  }
@@ -1759,32 +1957,28 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
1759
1957
  FORWARD_IF_ERROR(ZSTD_reset_matchState(
1760
1958
  &zc->blockState.matchState,
1761
1959
  ws,
1762
- &params.cParams,
1960
+ &params->cParams,
1961
+ params->useRowMatchFinder,
1763
1962
  crp,
1764
1963
  needsIndexReset,
1765
1964
  ZSTD_resetTarget_CCtx), "");
1766
1965
 
1767
1966
  /* ldm hash table */
1768
- if (params.ldmParams.enableLdm) {
1967
+ if (params->ldmParams.enableLdm) {
1769
1968
  /* TODO: avoid memset? */
1770
- size_t const ldmHSize = ((size_t)1) << params.ldmParams.hashLog;
1969
+ size_t const ldmHSize = ((size_t)1) << params->ldmParams.hashLog;
1771
1970
  zc->ldmState.hashTable = (ldmEntry_t*)ZSTD_cwksp_reserve_aligned(ws, ldmHSize * sizeof(ldmEntry_t));
1772
1971
  ZSTD_memset(zc->ldmState.hashTable, 0, ldmHSize * sizeof(ldmEntry_t));
1773
1972
  zc->ldmSequences = (rawSeq*)ZSTD_cwksp_reserve_aligned(ws, maxNbLdmSeq * sizeof(rawSeq));
1774
1973
  zc->maxNbLdmSequences = maxNbLdmSeq;
1775
1974
 
1776
1975
  ZSTD_window_init(&zc->ldmState.window);
1777
- ZSTD_window_clear(&zc->ldmState.window);
1778
1976
  zc->ldmState.loadedDictEnd = 0;
1779
1977
  }
1780
1978
 
1781
- /* Due to alignment, when reusing a workspace, we can actually consume
1782
- * up to 3 extra bytes for alignment. See the comments in zstd_cwksp.h
1783
- */
1784
- assert(ZSTD_cwksp_used(ws) >= neededSpace &&
1785
- ZSTD_cwksp_used(ws) <= neededSpace + 3);
1786
-
1979
+ assert(ZSTD_cwksp_estimated_space_within_bounds(ws, neededSpace, resizeWorkspace));
1787
1980
  DEBUGLOG(3, "wksp: finished allocating, %zd bytes remain available", ZSTD_cwksp_available_space(ws));
1981
+
1788
1982
  zc->initialized = 1;
1789
1983
 
1790
1984
  return 0;
@@ -1840,6 +2034,8 @@ ZSTD_resetCCtx_byAttachingCDict(ZSTD_CCtx* cctx,
1840
2034
  U64 pledgedSrcSize,
1841
2035
  ZSTD_buffered_policy_e zbuff)
1842
2036
  {
2037
+ DEBUGLOG(4, "ZSTD_resetCCtx_byAttachingCDict() pledgedSrcSize=%llu",
2038
+ (unsigned long long)pledgedSrcSize);
1843
2039
  {
1844
2040
  ZSTD_compressionParameters adjusted_cdict_cParams = cdict->matchState.cParams;
1845
2041
  unsigned const windowLog = params.cParams.windowLog;
@@ -1855,7 +2051,9 @@ ZSTD_resetCCtx_byAttachingCDict(ZSTD_CCtx* cctx,
1855
2051
  params.cParams = ZSTD_adjustCParams_internal(adjusted_cdict_cParams, pledgedSrcSize,
1856
2052
  cdict->dictContentSize, ZSTD_cpm_attachDict);
1857
2053
  params.cParams.windowLog = windowLog;
1858
- FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize,
2054
+ params.useRowMatchFinder = cdict->useRowMatchFinder; /* cdict overrides */
2055
+ FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, &params, pledgedSrcSize,
2056
+ /* loadedDictSize */ 0,
1859
2057
  ZSTDcrp_makeClean, zbuff), "");
1860
2058
  assert(cctx->appliedParams.cParams.strategy == adjusted_cdict_cParams.strategy);
1861
2059
  }
@@ -1899,15 +2097,17 @@ static size_t ZSTD_resetCCtx_byCopyingCDict(ZSTD_CCtx* cctx,
1899
2097
  const ZSTD_compressionParameters *cdict_cParams = &cdict->matchState.cParams;
1900
2098
 
1901
2099
  assert(!cdict->matchState.dedicatedDictSearch);
1902
-
1903
- DEBUGLOG(4, "copying dictionary into context");
2100
+ DEBUGLOG(4, "ZSTD_resetCCtx_byCopyingCDict() pledgedSrcSize=%llu",
2101
+ (unsigned long long)pledgedSrcSize);
1904
2102
 
1905
2103
  { unsigned const windowLog = params.cParams.windowLog;
1906
2104
  assert(windowLog != 0);
1907
2105
  /* Copy only compression parameters related to tables. */
1908
2106
  params.cParams = *cdict_cParams;
1909
2107
  params.cParams.windowLog = windowLog;
1910
- FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize,
2108
+ params.useRowMatchFinder = cdict->useRowMatchFinder;
2109
+ FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, &params, pledgedSrcSize,
2110
+ /* loadedDictSize */ 0,
1911
2111
  ZSTDcrp_leaveDirty, zbuff), "");
1912
2112
  assert(cctx->appliedParams.cParams.strategy == cdict_cParams->strategy);
1913
2113
  assert(cctx->appliedParams.cParams.hashLog == cdict_cParams->hashLog);
@@ -1915,17 +2115,30 @@ static size_t ZSTD_resetCCtx_byCopyingCDict(ZSTD_CCtx* cctx,
1915
2115
  }
1916
2116
 
1917
2117
  ZSTD_cwksp_mark_tables_dirty(&cctx->workspace);
2118
+ assert(params.useRowMatchFinder != ZSTD_urm_auto);
1918
2119
 
1919
2120
  /* copy tables */
1920
- { size_t const chainSize = (cdict_cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cdict_cParams->chainLog);
2121
+ { size_t const chainSize = ZSTD_allocateChainTable(cdict_cParams->strategy, cdict->useRowMatchFinder, 0 /* DDS guaranteed disabled */)
2122
+ ? ((size_t)1 << cdict_cParams->chainLog)
2123
+ : 0;
1921
2124
  size_t const hSize = (size_t)1 << cdict_cParams->hashLog;
1922
2125
 
1923
2126
  ZSTD_memcpy(cctx->blockState.matchState.hashTable,
1924
2127
  cdict->matchState.hashTable,
1925
2128
  hSize * sizeof(U32));
1926
- ZSTD_memcpy(cctx->blockState.matchState.chainTable,
2129
+ /* Do not copy cdict's chainTable if cctx has parameters such that it would not use chainTable */
2130
+ if (ZSTD_allocateChainTable(cctx->appliedParams.cParams.strategy, cctx->appliedParams.useRowMatchFinder, 0 /* forDDSDict */)) {
2131
+ ZSTD_memcpy(cctx->blockState.matchState.chainTable,
1927
2132
  cdict->matchState.chainTable,
1928
2133
  chainSize * sizeof(U32));
2134
+ }
2135
+ /* copy tag table */
2136
+ if (ZSTD_rowMatchFinderUsed(cdict_cParams->strategy, cdict->useRowMatchFinder)) {
2137
+ size_t const tagTableSize = hSize*sizeof(U16);
2138
+ ZSTD_memcpy(cctx->blockState.matchState.tagTable,
2139
+ cdict->matchState.tagTable,
2140
+ tagTableSize);
2141
+ }
1929
2142
  }
1930
2143
 
1931
2144
  /* Zero the hashTable3, since the cdict never fills it */
@@ -1989,16 +2202,18 @@ static size_t ZSTD_copyCCtx_internal(ZSTD_CCtx* dstCCtx,
1989
2202
  U64 pledgedSrcSize,
1990
2203
  ZSTD_buffered_policy_e zbuff)
1991
2204
  {
1992
- DEBUGLOG(5, "ZSTD_copyCCtx_internal");
1993
2205
  RETURN_ERROR_IF(srcCCtx->stage!=ZSTDcs_init, stage_wrong,
1994
2206
  "Can't copy a ctx that's not in init stage.");
1995
-
2207
+ DEBUGLOG(5, "ZSTD_copyCCtx_internal");
1996
2208
  ZSTD_memcpy(&dstCCtx->customMem, &srcCCtx->customMem, sizeof(ZSTD_customMem));
1997
2209
  { ZSTD_CCtx_params params = dstCCtx->requestedParams;
1998
2210
  /* Copy only compression parameters related to tables. */
1999
2211
  params.cParams = srcCCtx->appliedParams.cParams;
2212
+ assert(srcCCtx->appliedParams.useRowMatchFinder != ZSTD_urm_auto);
2213
+ params.useRowMatchFinder = srcCCtx->appliedParams.useRowMatchFinder;
2000
2214
  params.fParams = fParams;
2001
- ZSTD_resetCCtx_internal(dstCCtx, params, pledgedSrcSize,
2215
+ ZSTD_resetCCtx_internal(dstCCtx, &params, pledgedSrcSize,
2216
+ /* loadedDictSize */ 0,
2002
2217
  ZSTDcrp_leaveDirty, zbuff);
2003
2218
  assert(dstCCtx->appliedParams.cParams.windowLog == srcCCtx->appliedParams.cParams.windowLog);
2004
2219
  assert(dstCCtx->appliedParams.cParams.strategy == srcCCtx->appliedParams.cParams.strategy);
@@ -2010,7 +2225,11 @@ static size_t ZSTD_copyCCtx_internal(ZSTD_CCtx* dstCCtx,
2010
2225
  ZSTD_cwksp_mark_tables_dirty(&dstCCtx->workspace);
2011
2226
 
2012
2227
  /* copy tables */
2013
- { size_t const chainSize = (srcCCtx->appliedParams.cParams.strategy == ZSTD_fast) ? 0 : ((size_t)1 << srcCCtx->appliedParams.cParams.chainLog);
2228
+ { size_t const chainSize = ZSTD_allocateChainTable(srcCCtx->appliedParams.cParams.strategy,
2229
+ srcCCtx->appliedParams.useRowMatchFinder,
2230
+ 0 /* forDDSDict */)
2231
+ ? ((size_t)1 << srcCCtx->appliedParams.cParams.chainLog)
2232
+ : 0;
2014
2233
  size_t const hSize = (size_t)1 << srcCCtx->appliedParams.cParams.hashLog;
2015
2234
  int const h3log = srcCCtx->blockState.matchState.hashLog3;
2016
2235
  size_t const h3Size = h3log ? ((size_t)1 << h3log) : 0;
@@ -2124,7 +2343,7 @@ static void ZSTD_reduceIndex (ZSTD_matchState_t* ms, ZSTD_CCtx_params const* par
2124
2343
  ZSTD_reduceTable(ms->hashTable, hSize, reducerValue);
2125
2344
  }
2126
2345
 
2127
- if (params->cParams.strategy != ZSTD_fast) {
2346
+ if (ZSTD_allocateChainTable(params->cParams.strategy, params->useRowMatchFinder, (U32)ms->dedicatedDictSearch)) {
2128
2347
  U32 const chainSize = (U32)1 << params->cParams.chainLog;
2129
2348
  if (params->cParams.strategy == ZSTD_btlazy2)
2130
2349
  ZSTD_reduceTable_btlazy2(ms->chainTable, chainSize, reducerValue);
@@ -2161,9 +2380,9 @@ void ZSTD_seqToCodes(const seqStore_t* seqStorePtr)
2161
2380
  ofCodeTable[u] = (BYTE)ZSTD_highbit32(sequences[u].offset);
2162
2381
  mlCodeTable[u] = (BYTE)ZSTD_MLcode(mlv);
2163
2382
  }
2164
- if (seqStorePtr->longLengthID==1)
2383
+ if (seqStorePtr->longLengthType==ZSTD_llt_literalLength)
2165
2384
  llCodeTable[seqStorePtr->longLengthPos] = MaxLL;
2166
- if (seqStorePtr->longLengthID==2)
2385
+ if (seqStorePtr->longLengthType==ZSTD_llt_matchLength)
2167
2386
  mlCodeTable[seqStorePtr->longLengthPos] = MaxML;
2168
2387
  }
2169
2388
 
@@ -2177,10 +2396,158 @@ static int ZSTD_useTargetCBlockSize(const ZSTD_CCtx_params* cctxParams)
2177
2396
  return (cctxParams->targetCBlockSize != 0);
2178
2397
  }
2179
2398
 
2180
- /* ZSTD_entropyCompressSequences_internal():
2181
- * actually compresses both literals and sequences */
2399
+ /* ZSTD_blockSplitterEnabled():
2400
+ * Returns if block splitting param is being used
2401
+ * If used, compression will do best effort to split a block in order to improve compression ratio.
2402
+ * Returns 1 if true, 0 otherwise. */
2403
+ static int ZSTD_blockSplitterEnabled(ZSTD_CCtx_params* cctxParams)
2404
+ {
2405
+ DEBUGLOG(5, "ZSTD_blockSplitterEnabled(splitBlocks=%d)", cctxParams->splitBlocks);
2406
+ return (cctxParams->splitBlocks != 0);
2407
+ }
2408
+
2409
+ /* Type returned by ZSTD_buildSequencesStatistics containing finalized symbol encoding types
2410
+ * and size of the sequences statistics
2411
+ */
2412
+ typedef struct {
2413
+ U32 LLtype;
2414
+ U32 Offtype;
2415
+ U32 MLtype;
2416
+ size_t size;
2417
+ size_t lastCountSize; /* Accounts for bug in 1.3.4. More detail in ZSTD_entropyCompressSeqStore_internal() */
2418
+ } ZSTD_symbolEncodingTypeStats_t;
2419
+
2420
+ /* ZSTD_buildSequencesStatistics():
2421
+ * Returns a ZSTD_symbolEncodingTypeStats_t, or a zstd error code in the `size` field.
2422
+ * Modifies `nextEntropy` to have the appropriate values as a side effect.
2423
+ * nbSeq must be greater than 0.
2424
+ *
2425
+ * entropyWkspSize must be of size at least ENTROPY_WORKSPACE_SIZE - (MaxSeq + 1)*sizeof(U32)
2426
+ */
2427
+ static ZSTD_symbolEncodingTypeStats_t
2428
+ ZSTD_buildSequencesStatistics(seqStore_t* seqStorePtr, size_t nbSeq,
2429
+ const ZSTD_fseCTables_t* prevEntropy, ZSTD_fseCTables_t* nextEntropy,
2430
+ BYTE* dst, const BYTE* const dstEnd,
2431
+ ZSTD_strategy strategy, unsigned* countWorkspace,
2432
+ void* entropyWorkspace, size_t entropyWkspSize) {
2433
+ BYTE* const ostart = dst;
2434
+ const BYTE* const oend = dstEnd;
2435
+ BYTE* op = ostart;
2436
+ FSE_CTable* CTable_LitLength = nextEntropy->litlengthCTable;
2437
+ FSE_CTable* CTable_OffsetBits = nextEntropy->offcodeCTable;
2438
+ FSE_CTable* CTable_MatchLength = nextEntropy->matchlengthCTable;
2439
+ const BYTE* const ofCodeTable = seqStorePtr->ofCode;
2440
+ const BYTE* const llCodeTable = seqStorePtr->llCode;
2441
+ const BYTE* const mlCodeTable = seqStorePtr->mlCode;
2442
+ ZSTD_symbolEncodingTypeStats_t stats;
2443
+
2444
+ stats.lastCountSize = 0;
2445
+ /* convert length/distances into codes */
2446
+ ZSTD_seqToCodes(seqStorePtr);
2447
+ assert(op <= oend);
2448
+ assert(nbSeq != 0); /* ZSTD_selectEncodingType() divides by nbSeq */
2449
+ /* build CTable for Literal Lengths */
2450
+ { unsigned max = MaxLL;
2451
+ size_t const mostFrequent = HIST_countFast_wksp(countWorkspace, &max, llCodeTable, nbSeq, entropyWorkspace, entropyWkspSize); /* can't fail */
2452
+ DEBUGLOG(5, "Building LL table");
2453
+ nextEntropy->litlength_repeatMode = prevEntropy->litlength_repeatMode;
2454
+ stats.LLtype = ZSTD_selectEncodingType(&nextEntropy->litlength_repeatMode,
2455
+ countWorkspace, max, mostFrequent, nbSeq,
2456
+ LLFSELog, prevEntropy->litlengthCTable,
2457
+ LL_defaultNorm, LL_defaultNormLog,
2458
+ ZSTD_defaultAllowed, strategy);
2459
+ assert(set_basic < set_compressed && set_rle < set_compressed);
2460
+ assert(!(stats.LLtype < set_compressed && nextEntropy->litlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */
2461
+ { size_t const countSize = ZSTD_buildCTable(
2462
+ op, (size_t)(oend - op),
2463
+ CTable_LitLength, LLFSELog, (symbolEncodingType_e)stats.LLtype,
2464
+ countWorkspace, max, llCodeTable, nbSeq,
2465
+ LL_defaultNorm, LL_defaultNormLog, MaxLL,
2466
+ prevEntropy->litlengthCTable,
2467
+ sizeof(prevEntropy->litlengthCTable),
2468
+ entropyWorkspace, entropyWkspSize);
2469
+ if (ZSTD_isError(countSize)) {
2470
+ DEBUGLOG(3, "ZSTD_buildCTable for LitLens failed");
2471
+ stats.size = countSize;
2472
+ return stats;
2473
+ }
2474
+ if (stats.LLtype == set_compressed)
2475
+ stats.lastCountSize = countSize;
2476
+ op += countSize;
2477
+ assert(op <= oend);
2478
+ } }
2479
+ /* build CTable for Offsets */
2480
+ { unsigned max = MaxOff;
2481
+ size_t const mostFrequent = HIST_countFast_wksp(
2482
+ countWorkspace, &max, ofCodeTable, nbSeq, entropyWorkspace, entropyWkspSize); /* can't fail */
2483
+ /* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */
2484
+ ZSTD_defaultPolicy_e const defaultPolicy = (max <= DefaultMaxOff) ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed;
2485
+ DEBUGLOG(5, "Building OF table");
2486
+ nextEntropy->offcode_repeatMode = prevEntropy->offcode_repeatMode;
2487
+ stats.Offtype = ZSTD_selectEncodingType(&nextEntropy->offcode_repeatMode,
2488
+ countWorkspace, max, mostFrequent, nbSeq,
2489
+ OffFSELog, prevEntropy->offcodeCTable,
2490
+ OF_defaultNorm, OF_defaultNormLog,
2491
+ defaultPolicy, strategy);
2492
+ assert(!(stats.Offtype < set_compressed && nextEntropy->offcode_repeatMode != FSE_repeat_none)); /* We don't copy tables */
2493
+ { size_t const countSize = ZSTD_buildCTable(
2494
+ op, (size_t)(oend - op),
2495
+ CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)stats.Offtype,
2496
+ countWorkspace, max, ofCodeTable, nbSeq,
2497
+ OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff,
2498
+ prevEntropy->offcodeCTable,
2499
+ sizeof(prevEntropy->offcodeCTable),
2500
+ entropyWorkspace, entropyWkspSize);
2501
+ if (ZSTD_isError(countSize)) {
2502
+ DEBUGLOG(3, "ZSTD_buildCTable for Offsets failed");
2503
+ stats.size = countSize;
2504
+ return stats;
2505
+ }
2506
+ if (stats.Offtype == set_compressed)
2507
+ stats.lastCountSize = countSize;
2508
+ op += countSize;
2509
+ assert(op <= oend);
2510
+ } }
2511
+ /* build CTable for MatchLengths */
2512
+ { unsigned max = MaxML;
2513
+ size_t const mostFrequent = HIST_countFast_wksp(
2514
+ countWorkspace, &max, mlCodeTable, nbSeq, entropyWorkspace, entropyWkspSize); /* can't fail */
2515
+ DEBUGLOG(5, "Building ML table (remaining space : %i)", (int)(oend-op));
2516
+ nextEntropy->matchlength_repeatMode = prevEntropy->matchlength_repeatMode;
2517
+ stats.MLtype = ZSTD_selectEncodingType(&nextEntropy->matchlength_repeatMode,
2518
+ countWorkspace, max, mostFrequent, nbSeq,
2519
+ MLFSELog, prevEntropy->matchlengthCTable,
2520
+ ML_defaultNorm, ML_defaultNormLog,
2521
+ ZSTD_defaultAllowed, strategy);
2522
+ assert(!(stats.MLtype < set_compressed && nextEntropy->matchlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */
2523
+ { size_t const countSize = ZSTD_buildCTable(
2524
+ op, (size_t)(oend - op),
2525
+ CTable_MatchLength, MLFSELog, (symbolEncodingType_e)stats.MLtype,
2526
+ countWorkspace, max, mlCodeTable, nbSeq,
2527
+ ML_defaultNorm, ML_defaultNormLog, MaxML,
2528
+ prevEntropy->matchlengthCTable,
2529
+ sizeof(prevEntropy->matchlengthCTable),
2530
+ entropyWorkspace, entropyWkspSize);
2531
+ if (ZSTD_isError(countSize)) {
2532
+ DEBUGLOG(3, "ZSTD_buildCTable for MatchLengths failed");
2533
+ stats.size = countSize;
2534
+ return stats;
2535
+ }
2536
+ if (stats.MLtype == set_compressed)
2537
+ stats.lastCountSize = countSize;
2538
+ op += countSize;
2539
+ assert(op <= oend);
2540
+ } }
2541
+ stats.size = (size_t)(op-ostart);
2542
+ return stats;
2543
+ }
2544
+
2545
+ /* ZSTD_entropyCompressSeqStore_internal():
2546
+ * compresses both literals and sequences
2547
+ * Returns compressed size of block, or a zstd error.
2548
+ */
2182
2549
  MEM_STATIC size_t
2183
- ZSTD_entropyCompressSequences_internal(seqStore_t* seqStorePtr,
2550
+ ZSTD_entropyCompressSeqStore_internal(seqStore_t* seqStorePtr,
2184
2551
  const ZSTD_entropyCTables_t* prevEntropy,
2185
2552
  ZSTD_entropyCTables_t* nextEntropy,
2186
2553
  const ZSTD_CCtx_params* cctxParams,
@@ -2194,22 +2561,20 @@ ZSTD_entropyCompressSequences_internal(seqStore_t* seqStorePtr,
2194
2561
  FSE_CTable* CTable_LitLength = nextEntropy->fse.litlengthCTable;
2195
2562
  FSE_CTable* CTable_OffsetBits = nextEntropy->fse.offcodeCTable;
2196
2563
  FSE_CTable* CTable_MatchLength = nextEntropy->fse.matchlengthCTable;
2197
- U32 LLtype, Offtype, MLtype; /* compressed, raw or rle */
2198
2564
  const seqDef* const sequences = seqStorePtr->sequencesStart;
2565
+ const size_t nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart;
2199
2566
  const BYTE* const ofCodeTable = seqStorePtr->ofCode;
2200
2567
  const BYTE* const llCodeTable = seqStorePtr->llCode;
2201
2568
  const BYTE* const mlCodeTable = seqStorePtr->mlCode;
2202
2569
  BYTE* const ostart = (BYTE*)dst;
2203
2570
  BYTE* const oend = ostart + dstCapacity;
2204
2571
  BYTE* op = ostart;
2205
- size_t const nbSeq = (size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
2206
- BYTE* seqHead;
2207
- BYTE* lastNCount = NULL;
2572
+ size_t lastCountSize;
2208
2573
 
2209
2574
  entropyWorkspace = count + (MaxSeq + 1);
2210
2575
  entropyWkspSize -= (MaxSeq + 1) * sizeof(*count);
2211
2576
 
2212
- DEBUGLOG(4, "ZSTD_entropyCompressSequences_internal (nbSeq=%zu)", nbSeq);
2577
+ DEBUGLOG(4, "ZSTD_entropyCompressSeqStore_internal (nbSeq=%zu)", nbSeq);
2213
2578
  ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<<MAX(MLFSELog,LLFSELog)));
2214
2579
  assert(entropyWkspSize >= HUF_WORKSPACE_SIZE);
2215
2580
 
@@ -2249,95 +2614,20 @@ ZSTD_entropyCompressSequences_internal(seqStore_t* seqStorePtr,
2249
2614
  ZSTD_memcpy(&nextEntropy->fse, &prevEntropy->fse, sizeof(prevEntropy->fse));
2250
2615
  return (size_t)(op - ostart);
2251
2616
  }
2252
-
2253
- /* seqHead : flags for FSE encoding type */
2254
- seqHead = op++;
2255
- assert(op <= oend);
2256
-
2257
- /* convert length/distances into codes */
2258
- ZSTD_seqToCodes(seqStorePtr);
2259
- /* build CTable for Literal Lengths */
2260
- { unsigned max = MaxLL;
2261
- size_t const mostFrequent = HIST_countFast_wksp(count, &max, llCodeTable, nbSeq, entropyWorkspace, entropyWkspSize); /* can't fail */
2262
- DEBUGLOG(5, "Building LL table");
2263
- nextEntropy->fse.litlength_repeatMode = prevEntropy->fse.litlength_repeatMode;
2264
- LLtype = ZSTD_selectEncodingType(&nextEntropy->fse.litlength_repeatMode,
2265
- count, max, mostFrequent, nbSeq,
2266
- LLFSELog, prevEntropy->fse.litlengthCTable,
2267
- LL_defaultNorm, LL_defaultNormLog,
2268
- ZSTD_defaultAllowed, strategy);
2269
- assert(set_basic < set_compressed && set_rle < set_compressed);
2270
- assert(!(LLtype < set_compressed && nextEntropy->fse.litlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */
2271
- { size_t const countSize = ZSTD_buildCTable(
2272
- op, (size_t)(oend - op),
2273
- CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype,
2274
- count, max, llCodeTable, nbSeq,
2275
- LL_defaultNorm, LL_defaultNormLog, MaxLL,
2276
- prevEntropy->fse.litlengthCTable,
2277
- sizeof(prevEntropy->fse.litlengthCTable),
2278
- entropyWorkspace, entropyWkspSize);
2279
- FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for LitLens failed");
2280
- if (LLtype == set_compressed)
2281
- lastNCount = op;
2282
- op += countSize;
2283
- assert(op <= oend);
2284
- } }
2285
- /* build CTable for Offsets */
2286
- { unsigned max = MaxOff;
2287
- size_t const mostFrequent = HIST_countFast_wksp(
2288
- count, &max, ofCodeTable, nbSeq, entropyWorkspace, entropyWkspSize); /* can't fail */
2289
- /* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */
2290
- ZSTD_defaultPolicy_e const defaultPolicy = (max <= DefaultMaxOff) ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed;
2291
- DEBUGLOG(5, "Building OF table");
2292
- nextEntropy->fse.offcode_repeatMode = prevEntropy->fse.offcode_repeatMode;
2293
- Offtype = ZSTD_selectEncodingType(&nextEntropy->fse.offcode_repeatMode,
2294
- count, max, mostFrequent, nbSeq,
2295
- OffFSELog, prevEntropy->fse.offcodeCTable,
2296
- OF_defaultNorm, OF_defaultNormLog,
2297
- defaultPolicy, strategy);
2298
- assert(!(Offtype < set_compressed && nextEntropy->fse.offcode_repeatMode != FSE_repeat_none)); /* We don't copy tables */
2299
- { size_t const countSize = ZSTD_buildCTable(
2300
- op, (size_t)(oend - op),
2301
- CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype,
2302
- count, max, ofCodeTable, nbSeq,
2303
- OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff,
2304
- prevEntropy->fse.offcodeCTable,
2305
- sizeof(prevEntropy->fse.offcodeCTable),
2306
- entropyWorkspace, entropyWkspSize);
2307
- FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for Offsets failed");
2308
- if (Offtype == set_compressed)
2309
- lastNCount = op;
2310
- op += countSize;
2311
- assert(op <= oend);
2312
- } }
2313
- /* build CTable for MatchLengths */
2314
- { unsigned max = MaxML;
2315
- size_t const mostFrequent = HIST_countFast_wksp(
2316
- count, &max, mlCodeTable, nbSeq, entropyWorkspace, entropyWkspSize); /* can't fail */
2317
- DEBUGLOG(5, "Building ML table (remaining space : %i)", (int)(oend-op));
2318
- nextEntropy->fse.matchlength_repeatMode = prevEntropy->fse.matchlength_repeatMode;
2319
- MLtype = ZSTD_selectEncodingType(&nextEntropy->fse.matchlength_repeatMode,
2320
- count, max, mostFrequent, nbSeq,
2321
- MLFSELog, prevEntropy->fse.matchlengthCTable,
2322
- ML_defaultNorm, ML_defaultNormLog,
2323
- ZSTD_defaultAllowed, strategy);
2324
- assert(!(MLtype < set_compressed && nextEntropy->fse.matchlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */
2325
- { size_t const countSize = ZSTD_buildCTable(
2326
- op, (size_t)(oend - op),
2327
- CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype,
2328
- count, max, mlCodeTable, nbSeq,
2329
- ML_defaultNorm, ML_defaultNormLog, MaxML,
2330
- prevEntropy->fse.matchlengthCTable,
2331
- sizeof(prevEntropy->fse.matchlengthCTable),
2332
- entropyWorkspace, entropyWkspSize);
2333
- FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for MatchLengths failed");
2334
- if (MLtype == set_compressed)
2335
- lastNCount = op;
2336
- op += countSize;
2337
- assert(op <= oend);
2338
- } }
2339
-
2340
- *seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2));
2617
+ {
2618
+ ZSTD_symbolEncodingTypeStats_t stats;
2619
+ BYTE* seqHead = op++;
2620
+ /* build stats for sequences */
2621
+ stats = ZSTD_buildSequencesStatistics(seqStorePtr, nbSeq,
2622
+ &prevEntropy->fse, &nextEntropy->fse,
2623
+ op, oend,
2624
+ strategy, count,
2625
+ entropyWorkspace, entropyWkspSize);
2626
+ FORWARD_IF_ERROR(stats.size, "ZSTD_buildSequencesStatistics failed!");
2627
+ *seqHead = (BYTE)((stats.LLtype<<6) + (stats.Offtype<<4) + (stats.MLtype<<2));
2628
+ lastCountSize = stats.lastCountSize;
2629
+ op += stats.size;
2630
+ }
2341
2631
 
2342
2632
  { size_t const bitstreamSize = ZSTD_encodeSequences(
2343
2633
  op, (size_t)(oend - op),
@@ -2357,9 +2647,9 @@ ZSTD_entropyCompressSequences_internal(seqStore_t* seqStorePtr,
2357
2647
  * In this exceedingly rare case, we will simply emit an uncompressed
2358
2648
  * block, since it isn't worth optimizing.
2359
2649
  */
2360
- if (lastNCount && (op - lastNCount) < 4) {
2361
- /* NCountSize >= 2 && bitstreamSize > 0 ==> lastCountSize == 3 */
2362
- assert(op - lastNCount == 3);
2650
+ if (lastCountSize && (lastCountSize + bitstreamSize) < 4) {
2651
+ /* lastCountSize >= 2 && bitstreamSize > 0 ==> lastCountSize == 3 */
2652
+ assert(lastCountSize + bitstreamSize == 3);
2363
2653
  DEBUGLOG(5, "Avoiding bug in zstd decoder in versions <= 1.3.4 by "
2364
2654
  "emitting an uncompressed block.");
2365
2655
  return 0;
@@ -2371,7 +2661,7 @@ ZSTD_entropyCompressSequences_internal(seqStore_t* seqStorePtr,
2371
2661
  }
2372
2662
 
2373
2663
  MEM_STATIC size_t
2374
- ZSTD_entropyCompressSequences(seqStore_t* seqStorePtr,
2664
+ ZSTD_entropyCompressSeqStore(seqStore_t* seqStorePtr,
2375
2665
  const ZSTD_entropyCTables_t* prevEntropy,
2376
2666
  ZSTD_entropyCTables_t* nextEntropy,
2377
2667
  const ZSTD_CCtx_params* cctxParams,
@@ -2380,7 +2670,7 @@ ZSTD_entropyCompressSequences(seqStore_t* seqStorePtr,
2380
2670
  void* entropyWorkspace, size_t entropyWkspSize,
2381
2671
  int bmi2)
2382
2672
  {
2383
- size_t const cSize = ZSTD_entropyCompressSequences_internal(
2673
+ size_t const cSize = ZSTD_entropyCompressSeqStore_internal(
2384
2674
  seqStorePtr, prevEntropy, nextEntropy, cctxParams,
2385
2675
  dst, dstCapacity,
2386
2676
  entropyWorkspace, entropyWkspSize, bmi2);
@@ -2390,20 +2680,20 @@ ZSTD_entropyCompressSequences(seqStore_t* seqStorePtr,
2390
2680
  */
2391
2681
  if ((cSize == ERROR(dstSize_tooSmall)) & (srcSize <= dstCapacity))
2392
2682
  return 0; /* block not compressed */
2393
- FORWARD_IF_ERROR(cSize, "ZSTD_entropyCompressSequences_internal failed");
2683
+ FORWARD_IF_ERROR(cSize, "ZSTD_entropyCompressSeqStore_internal failed");
2394
2684
 
2395
2685
  /* Check compressibility */
2396
2686
  { size_t const maxCSize = srcSize - ZSTD_minGain(srcSize, cctxParams->cParams.strategy);
2397
2687
  if (cSize >= maxCSize) return 0; /* block not compressed */
2398
2688
  }
2399
- DEBUGLOG(4, "ZSTD_entropyCompressSequences() cSize: %zu\n", cSize);
2689
+ DEBUGLOG(4, "ZSTD_entropyCompressSeqStore() cSize: %zu", cSize);
2400
2690
  return cSize;
2401
2691
  }
2402
2692
 
2403
2693
  /* ZSTD_selectBlockCompressor() :
2404
2694
  * Not static, but internal use only (used by long distance matcher)
2405
2695
  * assumption : strat is a valid strategy */
2406
- ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_dictMode_e dictMode)
2696
+ ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_useRowMatchFinderMode_e useRowMatchFinder, ZSTD_dictMode_e dictMode)
2407
2697
  {
2408
2698
  static const ZSTD_blockCompressor blockCompressor[4][ZSTD_STRATEGY_MAX+1] = {
2409
2699
  { ZSTD_compressBlock_fast /* default for 0 */,
@@ -2451,7 +2741,28 @@ ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_dictMo
2451
2741
  ZSTD_STATIC_ASSERT((unsigned)ZSTD_fast == 1);
2452
2742
 
2453
2743
  assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, strat));
2454
- selectedCompressor = blockCompressor[(int)dictMode][(int)strat];
2744
+ DEBUGLOG(4, "Selected block compressor: dictMode=%d strat=%d rowMatchfinder=%d", (int)dictMode, (int)strat, (int)useRowMatchFinder);
2745
+ if (ZSTD_rowMatchFinderUsed(strat, useRowMatchFinder)) {
2746
+ static const ZSTD_blockCompressor rowBasedBlockCompressors[4][3] = {
2747
+ { ZSTD_compressBlock_greedy_row,
2748
+ ZSTD_compressBlock_lazy_row,
2749
+ ZSTD_compressBlock_lazy2_row },
2750
+ { ZSTD_compressBlock_greedy_extDict_row,
2751
+ ZSTD_compressBlock_lazy_extDict_row,
2752
+ ZSTD_compressBlock_lazy2_extDict_row },
2753
+ { ZSTD_compressBlock_greedy_dictMatchState_row,
2754
+ ZSTD_compressBlock_lazy_dictMatchState_row,
2755
+ ZSTD_compressBlock_lazy2_dictMatchState_row },
2756
+ { ZSTD_compressBlock_greedy_dedicatedDictSearch_row,
2757
+ ZSTD_compressBlock_lazy_dedicatedDictSearch_row,
2758
+ ZSTD_compressBlock_lazy2_dedicatedDictSearch_row }
2759
+ };
2760
+ DEBUGLOG(4, "Selecting a row-based matchfinder");
2761
+ assert(useRowMatchFinder != ZSTD_urm_auto);
2762
+ selectedCompressor = rowBasedBlockCompressors[(int)dictMode][(int)strat - (int)ZSTD_greedy];
2763
+ } else {
2764
+ selectedCompressor = blockCompressor[(int)dictMode][(int)strat];
2765
+ }
2455
2766
  assert(selectedCompressor != NULL);
2456
2767
  return selectedCompressor;
2457
2768
  }
@@ -2467,7 +2778,7 @@ void ZSTD_resetSeqStore(seqStore_t* ssPtr)
2467
2778
  {
2468
2779
  ssPtr->lit = ssPtr->litStart;
2469
2780
  ssPtr->sequences = ssPtr->sequencesStart;
2470
- ssPtr->longLengthID = 0;
2781
+ ssPtr->longLengthType = ZSTD_llt_none;
2471
2782
  }
2472
2783
 
2473
2784
  typedef enum { ZSTDbss_compress, ZSTDbss_noCompress } ZSTD_buildSeqStore_e;
@@ -2520,6 +2831,7 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)
2520
2831
  ZSTD_ldm_blockCompress(&zc->externSeqStore,
2521
2832
  ms, &zc->seqStore,
2522
2833
  zc->blockState.nextCBlock->rep,
2834
+ zc->appliedParams.useRowMatchFinder,
2523
2835
  src, srcSize);
2524
2836
  assert(zc->externSeqStore.pos <= zc->externSeqStore.size);
2525
2837
  } else if (zc->appliedParams.ldmParams.enableLdm) {
@@ -2536,10 +2848,13 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)
2536
2848
  ZSTD_ldm_blockCompress(&ldmSeqStore,
2537
2849
  ms, &zc->seqStore,
2538
2850
  zc->blockState.nextCBlock->rep,
2851
+ zc->appliedParams.useRowMatchFinder,
2539
2852
  src, srcSize);
2540
2853
  assert(ldmSeqStore.pos == ldmSeqStore.size);
2541
2854
  } else { /* not long range mode */
2542
- ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy, dictMode);
2855
+ ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy,
2856
+ zc->appliedParams.useRowMatchFinder,
2857
+ dictMode);
2543
2858
  ms->ldmSeqStore = NULL;
2544
2859
  lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, src, srcSize);
2545
2860
  }
@@ -2573,9 +2888,9 @@ static void ZSTD_copyBlockSequences(ZSTD_CCtx* zc)
2573
2888
  outSeqs[i].rep = 0;
2574
2889
 
2575
2890
  if (i == seqStore->longLengthPos) {
2576
- if (seqStore->longLengthID == 1) {
2891
+ if (seqStore->longLengthType == ZSTD_llt_literalLength) {
2577
2892
  outSeqs[i].litLength += 0x10000;
2578
- } else if (seqStore->longLengthID == 2) {
2893
+ } else if (seqStore->longLengthType == ZSTD_llt_matchLength) {
2579
2894
  outSeqs[i].matchLength += 0x10000;
2580
2895
  }
2581
2896
  }
@@ -2686,11 +3001,713 @@ static int ZSTD_maybeRLE(seqStore_t const* seqStore)
2686
3001
  return nbSeqs < 4 && nbLits < 10;
2687
3002
  }
2688
3003
 
2689
- static void ZSTD_confirmRepcodesAndEntropyTables(ZSTD_CCtx* zc)
3004
+ static void ZSTD_blockState_confirmRepcodesAndEntropyTables(ZSTD_blockState_t* const bs)
3005
+ {
3006
+ ZSTD_compressedBlockState_t* const tmp = bs->prevCBlock;
3007
+ bs->prevCBlock = bs->nextCBlock;
3008
+ bs->nextCBlock = tmp;
3009
+ }
3010
+
3011
+ /* Writes the block header */
3012
+ static void writeBlockHeader(void* op, size_t cSize, size_t blockSize, U32 lastBlock) {
3013
+ U32 const cBlockHeader = cSize == 1 ?
3014
+ lastBlock + (((U32)bt_rle)<<1) + (U32)(blockSize << 3) :
3015
+ lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3);
3016
+ MEM_writeLE24(op, cBlockHeader);
3017
+ DEBUGLOG(3, "writeBlockHeader: cSize: %zu blockSize: %zu lastBlock: %u", cSize, blockSize, lastBlock);
3018
+ }
3019
+
3020
+ /** ZSTD_buildBlockEntropyStats_literals() :
3021
+ * Builds entropy for the literals.
3022
+ * Stores literals block type (raw, rle, compressed, repeat) and
3023
+ * huffman description table to hufMetadata.
3024
+ * Requires ENTROPY_WORKSPACE_SIZE workspace
3025
+ * @return : size of huffman description table or error code */
3026
+ static size_t ZSTD_buildBlockEntropyStats_literals(void* const src, size_t srcSize,
3027
+ const ZSTD_hufCTables_t* prevHuf,
3028
+ ZSTD_hufCTables_t* nextHuf,
3029
+ ZSTD_hufCTablesMetadata_t* hufMetadata,
3030
+ const int disableLiteralsCompression,
3031
+ void* workspace, size_t wkspSize)
3032
+ {
3033
+ BYTE* const wkspStart = (BYTE*)workspace;
3034
+ BYTE* const wkspEnd = wkspStart + wkspSize;
3035
+ BYTE* const countWkspStart = wkspStart;
3036
+ unsigned* const countWksp = (unsigned*)workspace;
3037
+ const size_t countWkspSize = (HUF_SYMBOLVALUE_MAX + 1) * sizeof(unsigned);
3038
+ BYTE* const nodeWksp = countWkspStart + countWkspSize;
3039
+ const size_t nodeWkspSize = wkspEnd-nodeWksp;
3040
+ unsigned maxSymbolValue = HUF_SYMBOLVALUE_MAX;
3041
+ unsigned huffLog = HUF_TABLELOG_DEFAULT;
3042
+ HUF_repeat repeat = prevHuf->repeatMode;
3043
+ DEBUGLOG(5, "ZSTD_buildBlockEntropyStats_literals (srcSize=%zu)", srcSize);
3044
+
3045
+ /* Prepare nextEntropy assuming reusing the existing table */
3046
+ ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
3047
+
3048
+ if (disableLiteralsCompression) {
3049
+ DEBUGLOG(5, "set_basic - disabled");
3050
+ hufMetadata->hType = set_basic;
3051
+ return 0;
3052
+ }
3053
+
3054
+ /* small ? don't even attempt compression (speed opt) */
3055
+ #ifndef COMPRESS_LITERALS_SIZE_MIN
3056
+ #define COMPRESS_LITERALS_SIZE_MIN 63
3057
+ #endif
3058
+ { size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN;
3059
+ if (srcSize <= minLitSize) {
3060
+ DEBUGLOG(5, "set_basic - too small");
3061
+ hufMetadata->hType = set_basic;
3062
+ return 0;
3063
+ }
3064
+ }
3065
+
3066
+ /* Scan input and build symbol stats */
3067
+ { size_t const largest = HIST_count_wksp (countWksp, &maxSymbolValue, (const BYTE*)src, srcSize, workspace, wkspSize);
3068
+ FORWARD_IF_ERROR(largest, "HIST_count_wksp failed");
3069
+ if (largest == srcSize) {
3070
+ DEBUGLOG(5, "set_rle");
3071
+ hufMetadata->hType = set_rle;
3072
+ return 0;
3073
+ }
3074
+ if (largest <= (srcSize >> 7)+4) {
3075
+ DEBUGLOG(5, "set_basic - no gain");
3076
+ hufMetadata->hType = set_basic;
3077
+ return 0;
3078
+ }
3079
+ }
3080
+
3081
+ /* Validate the previous Huffman table */
3082
+ if (repeat == HUF_repeat_check && !HUF_validateCTable((HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue)) {
3083
+ repeat = HUF_repeat_none;
3084
+ }
3085
+
3086
+ /* Build Huffman Tree */
3087
+ ZSTD_memset(nextHuf->CTable, 0, sizeof(nextHuf->CTable));
3088
+ huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue);
3089
+ { size_t const maxBits = HUF_buildCTable_wksp((HUF_CElt*)nextHuf->CTable, countWksp,
3090
+ maxSymbolValue, huffLog,
3091
+ nodeWksp, nodeWkspSize);
3092
+ FORWARD_IF_ERROR(maxBits, "HUF_buildCTable_wksp");
3093
+ huffLog = (U32)maxBits;
3094
+ { /* Build and write the CTable */
3095
+ size_t const newCSize = HUF_estimateCompressedSize(
3096
+ (HUF_CElt*)nextHuf->CTable, countWksp, maxSymbolValue);
3097
+ size_t const hSize = HUF_writeCTable_wksp(
3098
+ hufMetadata->hufDesBuffer, sizeof(hufMetadata->hufDesBuffer),
3099
+ (HUF_CElt*)nextHuf->CTable, maxSymbolValue, huffLog,
3100
+ nodeWksp, nodeWkspSize);
3101
+ /* Check against repeating the previous CTable */
3102
+ if (repeat != HUF_repeat_none) {
3103
+ size_t const oldCSize = HUF_estimateCompressedSize(
3104
+ (HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue);
3105
+ if (oldCSize < srcSize && (oldCSize <= hSize + newCSize || hSize + 12 >= srcSize)) {
3106
+ DEBUGLOG(5, "set_repeat - smaller");
3107
+ ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
3108
+ hufMetadata->hType = set_repeat;
3109
+ return 0;
3110
+ }
3111
+ }
3112
+ if (newCSize + hSize >= srcSize) {
3113
+ DEBUGLOG(5, "set_basic - no gains");
3114
+ ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
3115
+ hufMetadata->hType = set_basic;
3116
+ return 0;
3117
+ }
3118
+ DEBUGLOG(5, "set_compressed (hSize=%u)", (U32)hSize);
3119
+ hufMetadata->hType = set_compressed;
3120
+ nextHuf->repeatMode = HUF_repeat_check;
3121
+ return hSize;
3122
+ }
3123
+ }
3124
+ }
3125
+
3126
+
3127
+ /* ZSTD_buildDummySequencesStatistics():
3128
+ * Returns a ZSTD_symbolEncodingTypeStats_t with all encoding types as set_basic,
3129
+ * and updates nextEntropy to the appropriate repeatMode.
3130
+ */
3131
+ static ZSTD_symbolEncodingTypeStats_t
3132
+ ZSTD_buildDummySequencesStatistics(ZSTD_fseCTables_t* nextEntropy) {
3133
+ ZSTD_symbolEncodingTypeStats_t stats = {set_basic, set_basic, set_basic, 0, 0};
3134
+ nextEntropy->litlength_repeatMode = FSE_repeat_none;
3135
+ nextEntropy->offcode_repeatMode = FSE_repeat_none;
3136
+ nextEntropy->matchlength_repeatMode = FSE_repeat_none;
3137
+ return stats;
3138
+ }
3139
+
3140
+ /** ZSTD_buildBlockEntropyStats_sequences() :
3141
+ * Builds entropy for the sequences.
3142
+ * Stores symbol compression modes and fse table to fseMetadata.
3143
+ * Requires ENTROPY_WORKSPACE_SIZE wksp.
3144
+ * @return : size of fse tables or error code */
3145
+ static size_t ZSTD_buildBlockEntropyStats_sequences(seqStore_t* seqStorePtr,
3146
+ const ZSTD_fseCTables_t* prevEntropy,
3147
+ ZSTD_fseCTables_t* nextEntropy,
3148
+ const ZSTD_CCtx_params* cctxParams,
3149
+ ZSTD_fseCTablesMetadata_t* fseMetadata,
3150
+ void* workspace, size_t wkspSize)
2690
3151
  {
2691
- ZSTD_compressedBlockState_t* const tmp = zc->blockState.prevCBlock;
2692
- zc->blockState.prevCBlock = zc->blockState.nextCBlock;
2693
- zc->blockState.nextCBlock = tmp;
3152
+ ZSTD_strategy const strategy = cctxParams->cParams.strategy;
3153
+ size_t const nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart;
3154
+ BYTE* const ostart = fseMetadata->fseTablesBuffer;
3155
+ BYTE* const oend = ostart + sizeof(fseMetadata->fseTablesBuffer);
3156
+ BYTE* op = ostart;
3157
+ unsigned* countWorkspace = (unsigned*)workspace;
3158
+ unsigned* entropyWorkspace = countWorkspace + (MaxSeq + 1);
3159
+ size_t entropyWorkspaceSize = wkspSize - (MaxSeq + 1) * sizeof(*countWorkspace);
3160
+ ZSTD_symbolEncodingTypeStats_t stats;
3161
+
3162
+ DEBUGLOG(5, "ZSTD_buildBlockEntropyStats_sequences (nbSeq=%zu)", nbSeq);
3163
+ stats = nbSeq != 0 ? ZSTD_buildSequencesStatistics(seqStorePtr, nbSeq,
3164
+ prevEntropy, nextEntropy, op, oend,
3165
+ strategy, countWorkspace,
3166
+ entropyWorkspace, entropyWorkspaceSize)
3167
+ : ZSTD_buildDummySequencesStatistics(nextEntropy);
3168
+ FORWARD_IF_ERROR(stats.size, "ZSTD_buildSequencesStatistics failed!");
3169
+ fseMetadata->llType = (symbolEncodingType_e) stats.LLtype;
3170
+ fseMetadata->ofType = (symbolEncodingType_e) stats.Offtype;
3171
+ fseMetadata->mlType = (symbolEncodingType_e) stats.MLtype;
3172
+ fseMetadata->lastCountSize = stats.lastCountSize;
3173
+ return stats.size;
3174
+ }
3175
+
3176
+
3177
+ /** ZSTD_buildBlockEntropyStats() :
3178
+ * Builds entropy for the block.
3179
+ * Requires workspace size ENTROPY_WORKSPACE_SIZE
3180
+ *
3181
+ * @return : 0 on success or error code
3182
+ */
3183
+ size_t ZSTD_buildBlockEntropyStats(seqStore_t* seqStorePtr,
3184
+ const ZSTD_entropyCTables_t* prevEntropy,
3185
+ ZSTD_entropyCTables_t* nextEntropy,
3186
+ const ZSTD_CCtx_params* cctxParams,
3187
+ ZSTD_entropyCTablesMetadata_t* entropyMetadata,
3188
+ void* workspace, size_t wkspSize)
3189
+ {
3190
+ size_t const litSize = seqStorePtr->lit - seqStorePtr->litStart;
3191
+ entropyMetadata->hufMetadata.hufDesSize =
3192
+ ZSTD_buildBlockEntropyStats_literals(seqStorePtr->litStart, litSize,
3193
+ &prevEntropy->huf, &nextEntropy->huf,
3194
+ &entropyMetadata->hufMetadata,
3195
+ ZSTD_disableLiteralsCompression(cctxParams),
3196
+ workspace, wkspSize);
3197
+ FORWARD_IF_ERROR(entropyMetadata->hufMetadata.hufDesSize, "ZSTD_buildBlockEntropyStats_literals failed");
3198
+ entropyMetadata->fseMetadata.fseTablesSize =
3199
+ ZSTD_buildBlockEntropyStats_sequences(seqStorePtr,
3200
+ &prevEntropy->fse, &nextEntropy->fse,
3201
+ cctxParams,
3202
+ &entropyMetadata->fseMetadata,
3203
+ workspace, wkspSize);
3204
+ FORWARD_IF_ERROR(entropyMetadata->fseMetadata.fseTablesSize, "ZSTD_buildBlockEntropyStats_sequences failed");
3205
+ return 0;
3206
+ }
3207
+
3208
+ /* Returns the size estimate for the literals section (header + content) of a block */
3209
+ static size_t ZSTD_estimateBlockSize_literal(const BYTE* literals, size_t litSize,
3210
+ const ZSTD_hufCTables_t* huf,
3211
+ const ZSTD_hufCTablesMetadata_t* hufMetadata,
3212
+ void* workspace, size_t wkspSize,
3213
+ int writeEntropy)
3214
+ {
3215
+ unsigned* const countWksp = (unsigned*)workspace;
3216
+ unsigned maxSymbolValue = HUF_SYMBOLVALUE_MAX;
3217
+ size_t literalSectionHeaderSize = 3 + (litSize >= 1 KB) + (litSize >= 16 KB);
3218
+ U32 singleStream = litSize < 256;
3219
+
3220
+ if (hufMetadata->hType == set_basic) return litSize;
3221
+ else if (hufMetadata->hType == set_rle) return 1;
3222
+ else if (hufMetadata->hType == set_compressed || hufMetadata->hType == set_repeat) {
3223
+ size_t const largest = HIST_count_wksp (countWksp, &maxSymbolValue, (const BYTE*)literals, litSize, workspace, wkspSize);
3224
+ if (ZSTD_isError(largest)) return litSize;
3225
+ { size_t cLitSizeEstimate = HUF_estimateCompressedSize((const HUF_CElt*)huf->CTable, countWksp, maxSymbolValue);
3226
+ if (writeEntropy) cLitSizeEstimate += hufMetadata->hufDesSize;
3227
+ if (!singleStream) cLitSizeEstimate += 6; /* multi-stream huffman uses 6-byte jump table */
3228
+ return cLitSizeEstimate + literalSectionHeaderSize;
3229
+ } }
3230
+ assert(0); /* impossible */
3231
+ return 0;
3232
+ }
3233
+
3234
+ /* Returns the size estimate for the FSE-compressed symbols (of, ml, ll) of a block */
3235
+ static size_t ZSTD_estimateBlockSize_symbolType(symbolEncodingType_e type,
3236
+ const BYTE* codeTable, size_t nbSeq, unsigned maxCode,
3237
+ const FSE_CTable* fseCTable,
3238
+ const U32* additionalBits,
3239
+ short const* defaultNorm, U32 defaultNormLog, U32 defaultMax,
3240
+ void* workspace, size_t wkspSize)
3241
+ {
3242
+ unsigned* const countWksp = (unsigned*)workspace;
3243
+ const BYTE* ctp = codeTable;
3244
+ const BYTE* const ctStart = ctp;
3245
+ const BYTE* const ctEnd = ctStart + nbSeq;
3246
+ size_t cSymbolTypeSizeEstimateInBits = 0;
3247
+ unsigned max = maxCode;
3248
+
3249
+ HIST_countFast_wksp(countWksp, &max, codeTable, nbSeq, workspace, wkspSize); /* can't fail */
3250
+ if (type == set_basic) {
3251
+ /* We selected this encoding type, so it must be valid. */
3252
+ assert(max <= defaultMax);
3253
+ (void)defaultMax;
3254
+ cSymbolTypeSizeEstimateInBits = ZSTD_crossEntropyCost(defaultNorm, defaultNormLog, countWksp, max);
3255
+ } else if (type == set_rle) {
3256
+ cSymbolTypeSizeEstimateInBits = 0;
3257
+ } else if (type == set_compressed || type == set_repeat) {
3258
+ cSymbolTypeSizeEstimateInBits = ZSTD_fseBitCost(fseCTable, countWksp, max);
3259
+ }
3260
+ if (ZSTD_isError(cSymbolTypeSizeEstimateInBits)) {
3261
+ return nbSeq * 10;
3262
+ }
3263
+ while (ctp < ctEnd) {
3264
+ if (additionalBits) cSymbolTypeSizeEstimateInBits += additionalBits[*ctp];
3265
+ else cSymbolTypeSizeEstimateInBits += *ctp; /* for offset, offset code is also the number of additional bits */
3266
+ ctp++;
3267
+ }
3268
+ return cSymbolTypeSizeEstimateInBits >> 3;
3269
+ }
3270
+
3271
+ /* Returns the size estimate for the sequences section (header + content) of a block */
3272
+ static size_t ZSTD_estimateBlockSize_sequences(const BYTE* ofCodeTable,
3273
+ const BYTE* llCodeTable,
3274
+ const BYTE* mlCodeTable,
3275
+ size_t nbSeq,
3276
+ const ZSTD_fseCTables_t* fseTables,
3277
+ const ZSTD_fseCTablesMetadata_t* fseMetadata,
3278
+ void* workspace, size_t wkspSize,
3279
+ int writeEntropy)
3280
+ {
3281
+ size_t sequencesSectionHeaderSize = 1 /* seqHead */ + 1 /* min seqSize size */ + (nbSeq >= 128) + (nbSeq >= LONGNBSEQ);
3282
+ size_t cSeqSizeEstimate = 0;
3283
+ cSeqSizeEstimate += ZSTD_estimateBlockSize_symbolType(fseMetadata->ofType, ofCodeTable, nbSeq, MaxOff,
3284
+ fseTables->offcodeCTable, NULL,
3285
+ OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff,
3286
+ workspace, wkspSize);
3287
+ cSeqSizeEstimate += ZSTD_estimateBlockSize_symbolType(fseMetadata->llType, llCodeTable, nbSeq, MaxLL,
3288
+ fseTables->litlengthCTable, LL_bits,
3289
+ LL_defaultNorm, LL_defaultNormLog, MaxLL,
3290
+ workspace, wkspSize);
3291
+ cSeqSizeEstimate += ZSTD_estimateBlockSize_symbolType(fseMetadata->mlType, mlCodeTable, nbSeq, MaxML,
3292
+ fseTables->matchlengthCTable, ML_bits,
3293
+ ML_defaultNorm, ML_defaultNormLog, MaxML,
3294
+ workspace, wkspSize);
3295
+ if (writeEntropy) cSeqSizeEstimate += fseMetadata->fseTablesSize;
3296
+ return cSeqSizeEstimate + sequencesSectionHeaderSize;
3297
+ }
3298
+
3299
+ /* Returns the size estimate for a given stream of literals, of, ll, ml */
3300
+ static size_t ZSTD_estimateBlockSize(const BYTE* literals, size_t litSize,
3301
+ const BYTE* ofCodeTable,
3302
+ const BYTE* llCodeTable,
3303
+ const BYTE* mlCodeTable,
3304
+ size_t nbSeq,
3305
+ const ZSTD_entropyCTables_t* entropy,
3306
+ const ZSTD_entropyCTablesMetadata_t* entropyMetadata,
3307
+ void* workspace, size_t wkspSize,
3308
+ int writeLitEntropy, int writeSeqEntropy) {
3309
+ size_t const literalsSize = ZSTD_estimateBlockSize_literal(literals, litSize,
3310
+ &entropy->huf, &entropyMetadata->hufMetadata,
3311
+ workspace, wkspSize, writeLitEntropy);
3312
+ size_t const seqSize = ZSTD_estimateBlockSize_sequences(ofCodeTable, llCodeTable, mlCodeTable,
3313
+ nbSeq, &entropy->fse, &entropyMetadata->fseMetadata,
3314
+ workspace, wkspSize, writeSeqEntropy);
3315
+ return seqSize + literalsSize + ZSTD_blockHeaderSize;
3316
+ }
3317
+
3318
+ /* Builds entropy statistics and uses them for blocksize estimation.
3319
+ *
3320
+ * Returns the estimated compressed size of the seqStore, or a zstd error.
3321
+ */
3322
+ static size_t ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(seqStore_t* seqStore, const ZSTD_CCtx* zc) {
3323
+ ZSTD_entropyCTablesMetadata_t entropyMetadata;
3324
+ FORWARD_IF_ERROR(ZSTD_buildBlockEntropyStats(seqStore,
3325
+ &zc->blockState.prevCBlock->entropy,
3326
+ &zc->blockState.nextCBlock->entropy,
3327
+ &zc->appliedParams,
3328
+ &entropyMetadata,
3329
+ zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */), "");
3330
+ return ZSTD_estimateBlockSize(seqStore->litStart, (size_t)(seqStore->lit - seqStore->litStart),
3331
+ seqStore->ofCode, seqStore->llCode, seqStore->mlCode,
3332
+ (size_t)(seqStore->sequences - seqStore->sequencesStart),
3333
+ &zc->blockState.nextCBlock->entropy, &entropyMetadata, zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE,
3334
+ (int)(entropyMetadata.hufMetadata.hType == set_compressed), 1);
3335
+ }
3336
+
3337
+ /* Returns literals bytes represented in a seqStore */
3338
+ static size_t ZSTD_countSeqStoreLiteralsBytes(const seqStore_t* const seqStore) {
3339
+ size_t literalsBytes = 0;
3340
+ size_t const nbSeqs = seqStore->sequences - seqStore->sequencesStart;
3341
+ size_t i;
3342
+ for (i = 0; i < nbSeqs; ++i) {
3343
+ seqDef seq = seqStore->sequencesStart[i];
3344
+ literalsBytes += seq.litLength;
3345
+ if (i == seqStore->longLengthPos && seqStore->longLengthType == ZSTD_llt_literalLength) {
3346
+ literalsBytes += 0x10000;
3347
+ }
3348
+ }
3349
+ return literalsBytes;
3350
+ }
3351
+
3352
+ /* Returns match bytes represented in a seqStore */
3353
+ static size_t ZSTD_countSeqStoreMatchBytes(const seqStore_t* const seqStore) {
3354
+ size_t matchBytes = 0;
3355
+ size_t const nbSeqs = seqStore->sequences - seqStore->sequencesStart;
3356
+ size_t i;
3357
+ for (i = 0; i < nbSeqs; ++i) {
3358
+ seqDef seq = seqStore->sequencesStart[i];
3359
+ matchBytes += seq.matchLength + MINMATCH;
3360
+ if (i == seqStore->longLengthPos && seqStore->longLengthType == ZSTD_llt_matchLength) {
3361
+ matchBytes += 0x10000;
3362
+ }
3363
+ }
3364
+ return matchBytes;
3365
+ }
3366
+
3367
+ /* Derives the seqStore that is a chunk of the originalSeqStore from [startIdx, endIdx).
3368
+ * Stores the result in resultSeqStore.
3369
+ */
3370
+ static void ZSTD_deriveSeqStoreChunk(seqStore_t* resultSeqStore,
3371
+ const seqStore_t* originalSeqStore,
3372
+ size_t startIdx, size_t endIdx) {
3373
+ BYTE* const litEnd = originalSeqStore->lit;
3374
+ size_t literalsBytes;
3375
+ size_t literalsBytesPreceding = 0;
3376
+
3377
+ *resultSeqStore = *originalSeqStore;
3378
+ if (startIdx > 0) {
3379
+ resultSeqStore->sequences = originalSeqStore->sequencesStart + startIdx;
3380
+ literalsBytesPreceding = ZSTD_countSeqStoreLiteralsBytes(resultSeqStore);
3381
+ }
3382
+
3383
+ /* Move longLengthPos into the correct position if necessary */
3384
+ if (originalSeqStore->longLengthType != ZSTD_llt_none) {
3385
+ if (originalSeqStore->longLengthPos < startIdx || originalSeqStore->longLengthPos > endIdx) {
3386
+ resultSeqStore->longLengthType = ZSTD_llt_none;
3387
+ } else {
3388
+ resultSeqStore->longLengthPos -= (U32)startIdx;
3389
+ }
3390
+ }
3391
+ resultSeqStore->sequencesStart = originalSeqStore->sequencesStart + startIdx;
3392
+ resultSeqStore->sequences = originalSeqStore->sequencesStart + endIdx;
3393
+ literalsBytes = ZSTD_countSeqStoreLiteralsBytes(resultSeqStore);
3394
+ resultSeqStore->litStart += literalsBytesPreceding;
3395
+ if (endIdx == (size_t)(originalSeqStore->sequences - originalSeqStore->sequencesStart)) {
3396
+ /* This accounts for possible last literals if the derived chunk reaches the end of the block */
3397
+ resultSeqStore->lit = litEnd;
3398
+ } else {
3399
+ resultSeqStore->lit = resultSeqStore->litStart+literalsBytes;
3400
+ }
3401
+ resultSeqStore->llCode += startIdx;
3402
+ resultSeqStore->mlCode += startIdx;
3403
+ resultSeqStore->ofCode += startIdx;
3404
+ }
3405
+
3406
+ /**
3407
+ * Returns the raw offset represented by the combination of offCode, ll0, and repcode history.
3408
+ * offCode must be an offCode representing a repcode, therefore in the range of [0, 2].
3409
+ */
3410
+ static U32 ZSTD_resolveRepcodeToRawOffset(const U32 rep[ZSTD_REP_NUM], const U32 offCode, const U32 ll0) {
3411
+ U32 const adjustedOffCode = offCode + ll0;
3412
+ assert(offCode < ZSTD_REP_NUM);
3413
+ if (adjustedOffCode == ZSTD_REP_NUM) {
3414
+ /* litlength == 0 and offCode == 2 implies selection of first repcode - 1 */
3415
+ assert(rep[0] > 0);
3416
+ return rep[0] - 1;
3417
+ }
3418
+ return rep[adjustedOffCode];
3419
+ }
3420
+
3421
+ /**
3422
+ * ZSTD_seqStore_resolveOffCodes() reconciles any possible divergences in offset history that may arise
3423
+ * due to emission of RLE/raw blocks that disturb the offset history, and replaces any repcodes within
3424
+ * the seqStore that may be invalid.
3425
+ *
3426
+ * dRepcodes are updated as would be on the decompression side. cRepcodes are updated exactly in
3427
+ * accordance with the seqStore.
3428
+ */
3429
+ static void ZSTD_seqStore_resolveOffCodes(repcodes_t* const dRepcodes, repcodes_t* const cRepcodes,
3430
+ seqStore_t* const seqStore, U32 const nbSeq) {
3431
+ U32 idx = 0;
3432
+ for (; idx < nbSeq; ++idx) {
3433
+ seqDef* const seq = seqStore->sequencesStart + idx;
3434
+ U32 const ll0 = (seq->litLength == 0);
3435
+ U32 offCode = seq->offset - 1;
3436
+ assert(seq->offset > 0);
3437
+ if (offCode <= ZSTD_REP_MOVE) {
3438
+ U32 const dRawOffset = ZSTD_resolveRepcodeToRawOffset(dRepcodes->rep, offCode, ll0);
3439
+ U32 const cRawOffset = ZSTD_resolveRepcodeToRawOffset(cRepcodes->rep, offCode, ll0);
3440
+ /* Adjust simulated decompression repcode history if we come across a mismatch. Replace
3441
+ * the repcode with the offset it actually references, determined by the compression
3442
+ * repcode history.
3443
+ */
3444
+ if (dRawOffset != cRawOffset) {
3445
+ seq->offset = cRawOffset + ZSTD_REP_NUM;
3446
+ }
3447
+ }
3448
+ /* Compression repcode history is always updated with values directly from the unmodified seqStore.
3449
+ * Decompression repcode history may use modified seq->offset value taken from compression repcode history.
3450
+ */
3451
+ *dRepcodes = ZSTD_updateRep(dRepcodes->rep, seq->offset - 1, ll0);
3452
+ *cRepcodes = ZSTD_updateRep(cRepcodes->rep, offCode, ll0);
3453
+ }
3454
+ }
3455
+
3456
+ /* ZSTD_compressSeqStore_singleBlock():
3457
+ * Compresses a seqStore into a block with a block header, into the buffer dst.
3458
+ *
3459
+ * Returns the total size of that block (including header) or a ZSTD error code.
3460
+ */
3461
+ static size_t ZSTD_compressSeqStore_singleBlock(ZSTD_CCtx* zc, seqStore_t* const seqStore,
3462
+ repcodes_t* const dRep, repcodes_t* const cRep,
3463
+ void* dst, size_t dstCapacity,
3464
+ const void* src, size_t srcSize,
3465
+ U32 lastBlock, U32 isPartition) {
3466
+ const U32 rleMaxLength = 25;
3467
+ BYTE* op = (BYTE*)dst;
3468
+ const BYTE* ip = (const BYTE*)src;
3469
+ size_t cSize;
3470
+ size_t cSeqsSize;
3471
+
3472
+ /* In case of an RLE or raw block, the simulated decompression repcode history must be reset */
3473
+ repcodes_t const dRepOriginal = *dRep;
3474
+ if (isPartition)
3475
+ ZSTD_seqStore_resolveOffCodes(dRep, cRep, seqStore, (U32)(seqStore->sequences - seqStore->sequencesStart));
3476
+
3477
+ cSeqsSize = ZSTD_entropyCompressSeqStore(seqStore,
3478
+ &zc->blockState.prevCBlock->entropy, &zc->blockState.nextCBlock->entropy,
3479
+ &zc->appliedParams,
3480
+ op + ZSTD_blockHeaderSize, dstCapacity - ZSTD_blockHeaderSize,
3481
+ srcSize,
3482
+ zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */,
3483
+ zc->bmi2);
3484
+ FORWARD_IF_ERROR(cSeqsSize, "ZSTD_entropyCompressSeqStore failed!");
3485
+
3486
+ if (!zc->isFirstBlock &&
3487
+ cSeqsSize < rleMaxLength &&
3488
+ ZSTD_isRLE((BYTE const*)src, srcSize)) {
3489
+ /* We don't want to emit our first block as a RLE even if it qualifies because
3490
+ * doing so will cause the decoder (cli only) to throw a "should consume all input error."
3491
+ * This is only an issue for zstd <= v1.4.3
3492
+ */
3493
+ cSeqsSize = 1;
3494
+ }
3495
+
3496
+ if (zc->seqCollector.collectSequences) {
3497
+ ZSTD_copyBlockSequences(zc);
3498
+ ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState);
3499
+ return 0;
3500
+ }
3501
+
3502
+ if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid)
3503
+ zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check;
3504
+
3505
+ if (cSeqsSize == 0) {
3506
+ cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, srcSize, lastBlock);
3507
+ FORWARD_IF_ERROR(cSize, "Nocompress block failed");
3508
+ DEBUGLOG(4, "Writing out nocompress block, size: %zu", cSize);
3509
+ *dRep = dRepOriginal; /* reset simulated decompression repcode history */
3510
+ } else if (cSeqsSize == 1) {
3511
+ cSize = ZSTD_rleCompressBlock(op, dstCapacity, *ip, srcSize, lastBlock);
3512
+ FORWARD_IF_ERROR(cSize, "RLE compress block failed");
3513
+ DEBUGLOG(4, "Writing out RLE block, size: %zu", cSize);
3514
+ *dRep = dRepOriginal; /* reset simulated decompression repcode history */
3515
+ } else {
3516
+ ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState);
3517
+ writeBlockHeader(op, cSeqsSize, srcSize, lastBlock);
3518
+ cSize = ZSTD_blockHeaderSize + cSeqsSize;
3519
+ DEBUGLOG(4, "Writing out compressed block, size: %zu", cSize);
3520
+ }
3521
+ return cSize;
3522
+ }
3523
+
3524
+ /* Struct to keep track of where we are in our recursive calls. */
3525
+ typedef struct {
3526
+ U32* splitLocations; /* Array of split indices */
3527
+ size_t idx; /* The current index within splitLocations being worked on */
3528
+ } seqStoreSplits;
3529
+
3530
+ #define MIN_SEQUENCES_BLOCK_SPLITTING 300
3531
+ #define MAX_NB_SPLITS 196
3532
+
3533
+ /* Helper function to perform the recursive search for block splits.
3534
+ * Estimates the cost of seqStore prior to split, and estimates the cost of splitting the sequences in half.
3535
+ * If advantageous to split, then we recurse down the two sub-blocks. If not, or if an error occurred in estimation, then
3536
+ * we do not recurse.
3537
+ *
3538
+ * Note: The recursion depth is capped by a heuristic minimum number of sequences, defined by MIN_SEQUENCES_BLOCK_SPLITTING.
3539
+ * In theory, this means the absolute largest recursion depth is 10 == log2(maxNbSeqInBlock/MIN_SEQUENCES_BLOCK_SPLITTING).
3540
+ * In practice, recursion depth usually doesn't go beyond 4.
3541
+ *
3542
+ * Furthermore, the number of splits is capped by MAX_NB_SPLITS. At MAX_NB_SPLITS == 196 with the current existing blockSize
3543
+ * maximum of 128 KB, this value is actually impossible to reach.
3544
+ */
3545
+ static void ZSTD_deriveBlockSplitsHelper(seqStoreSplits* splits, size_t startIdx, size_t endIdx,
3546
+ const ZSTD_CCtx* zc, const seqStore_t* origSeqStore) {
3547
+ seqStore_t fullSeqStoreChunk;
3548
+ seqStore_t firstHalfSeqStore;
3549
+ seqStore_t secondHalfSeqStore;
3550
+ size_t estimatedOriginalSize;
3551
+ size_t estimatedFirstHalfSize;
3552
+ size_t estimatedSecondHalfSize;
3553
+ size_t midIdx = (startIdx + endIdx)/2;
3554
+
3555
+ if (endIdx - startIdx < MIN_SEQUENCES_BLOCK_SPLITTING || splits->idx >= MAX_NB_SPLITS) {
3556
+ return;
3557
+ }
3558
+ ZSTD_deriveSeqStoreChunk(&fullSeqStoreChunk, origSeqStore, startIdx, endIdx);
3559
+ ZSTD_deriveSeqStoreChunk(&firstHalfSeqStore, origSeqStore, startIdx, midIdx);
3560
+ ZSTD_deriveSeqStoreChunk(&secondHalfSeqStore, origSeqStore, midIdx, endIdx);
3561
+ estimatedOriginalSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(&fullSeqStoreChunk, zc);
3562
+ estimatedFirstHalfSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(&firstHalfSeqStore, zc);
3563
+ estimatedSecondHalfSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(&secondHalfSeqStore, zc);
3564
+ DEBUGLOG(5, "Estimated original block size: %zu -- First half split: %zu -- Second half split: %zu",
3565
+ estimatedOriginalSize, estimatedFirstHalfSize, estimatedSecondHalfSize);
3566
+ if (ZSTD_isError(estimatedOriginalSize) || ZSTD_isError(estimatedFirstHalfSize) || ZSTD_isError(estimatedSecondHalfSize)) {
3567
+ return;
3568
+ }
3569
+ if (estimatedFirstHalfSize + estimatedSecondHalfSize < estimatedOriginalSize) {
3570
+ ZSTD_deriveBlockSplitsHelper(splits, startIdx, midIdx, zc, origSeqStore);
3571
+ splits->splitLocations[splits->idx] = (U32)midIdx;
3572
+ splits->idx++;
3573
+ ZSTD_deriveBlockSplitsHelper(splits, midIdx, endIdx, zc, origSeqStore);
3574
+ }
3575
+ }
3576
+
3577
+ /* Base recursive function. Populates a table with intra-block partition indices that can improve compression ratio.
3578
+ *
3579
+ * Returns the number of splits made (which equals the size of the partition table - 1).
3580
+ */
3581
+ static size_t ZSTD_deriveBlockSplits(ZSTD_CCtx* zc, U32 partitions[], U32 nbSeq) {
3582
+ seqStoreSplits splits = {partitions, 0};
3583
+ if (nbSeq <= 4) {
3584
+ DEBUGLOG(4, "ZSTD_deriveBlockSplits: Too few sequences to split");
3585
+ /* Refuse to try and split anything with less than 4 sequences */
3586
+ return 0;
3587
+ }
3588
+ ZSTD_deriveBlockSplitsHelper(&splits, 0, nbSeq, zc, &zc->seqStore);
3589
+ splits.splitLocations[splits.idx] = nbSeq;
3590
+ DEBUGLOG(5, "ZSTD_deriveBlockSplits: final nb partitions: %zu", splits.idx+1);
3591
+ return splits.idx;
3592
+ }
3593
+
3594
+ /* ZSTD_compressBlock_splitBlock():
3595
+ * Attempts to split a given block into multiple blocks to improve compression ratio.
3596
+ *
3597
+ * Returns combined size of all blocks (which includes headers), or a ZSTD error code.
3598
+ */
3599
+ static size_t ZSTD_compressBlock_splitBlock_internal(ZSTD_CCtx* zc, void* dst, size_t dstCapacity,
3600
+ const void* src, size_t blockSize, U32 lastBlock, U32 nbSeq) {
3601
+ size_t cSize = 0;
3602
+ const BYTE* ip = (const BYTE*)src;
3603
+ BYTE* op = (BYTE*)dst;
3604
+ U32 partitions[MAX_NB_SPLITS];
3605
+ size_t i = 0;
3606
+ size_t srcBytesTotal = 0;
3607
+ size_t numSplits = ZSTD_deriveBlockSplits(zc, partitions, nbSeq);
3608
+ seqStore_t nextSeqStore;
3609
+ seqStore_t currSeqStore;
3610
+
3611
+ /* If a block is split and some partitions are emitted as RLE/uncompressed, then repcode history
3612
+ * may become invalid. In order to reconcile potentially invalid repcodes, we keep track of two
3613
+ * separate repcode histories that simulate repcode history on compression and decompression side,
3614
+ * and use the histories to determine whether we must replace a particular repcode with its raw offset.
3615
+ *
3616
+ * 1) cRep gets updated for each partition, regardless of whether the block was emitted as uncompressed
3617
+ * or RLE. This allows us to retrieve the offset value that an invalid repcode references within
3618
+ * a nocompress/RLE block.
3619
+ * 2) dRep gets updated only for compressed partitions, and when a repcode gets replaced, will use
3620
+ * the replacement offset value rather than the original repcode to update the repcode history.
3621
+ * dRep also will be the final repcode history sent to the next block.
3622
+ *
3623
+ * See ZSTD_seqStore_resolveOffCodes() for more details.
3624
+ */
3625
+ repcodes_t dRep;
3626
+ repcodes_t cRep;
3627
+ ZSTD_memcpy(dRep.rep, zc->blockState.prevCBlock->rep, sizeof(repcodes_t));
3628
+ ZSTD_memcpy(cRep.rep, zc->blockState.prevCBlock->rep, sizeof(repcodes_t));
3629
+
3630
+ DEBUGLOG(4, "ZSTD_compressBlock_splitBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)",
3631
+ (unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit,
3632
+ (unsigned)zc->blockState.matchState.nextToUpdate);
3633
+
3634
+ if (numSplits == 0) {
3635
+ size_t cSizeSingleBlock = ZSTD_compressSeqStore_singleBlock(zc, &zc->seqStore,
3636
+ &dRep, &cRep,
3637
+ op, dstCapacity,
3638
+ ip, blockSize,
3639
+ lastBlock, 0 /* isPartition */);
3640
+ FORWARD_IF_ERROR(cSizeSingleBlock, "Compressing single block from splitBlock_internal() failed!");
3641
+ DEBUGLOG(5, "ZSTD_compressBlock_splitBlock_internal: No splits");
3642
+ assert(cSizeSingleBlock <= ZSTD_BLOCKSIZE_MAX + ZSTD_blockHeaderSize);
3643
+ return cSizeSingleBlock;
3644
+ }
3645
+
3646
+ ZSTD_deriveSeqStoreChunk(&currSeqStore, &zc->seqStore, 0, partitions[0]);
3647
+ for (i = 0; i <= numSplits; ++i) {
3648
+ size_t srcBytes;
3649
+ size_t cSizeChunk;
3650
+ U32 const lastPartition = (i == numSplits);
3651
+ U32 lastBlockEntireSrc = 0;
3652
+
3653
+ srcBytes = ZSTD_countSeqStoreLiteralsBytes(&currSeqStore) + ZSTD_countSeqStoreMatchBytes(&currSeqStore);
3654
+ srcBytesTotal += srcBytes;
3655
+ if (lastPartition) {
3656
+ /* This is the final partition, need to account for possible last literals */
3657
+ srcBytes += blockSize - srcBytesTotal;
3658
+ lastBlockEntireSrc = lastBlock;
3659
+ } else {
3660
+ ZSTD_deriveSeqStoreChunk(&nextSeqStore, &zc->seqStore, partitions[i], partitions[i+1]);
3661
+ }
3662
+
3663
+ cSizeChunk = ZSTD_compressSeqStore_singleBlock(zc, &currSeqStore,
3664
+ &dRep, &cRep,
3665
+ op, dstCapacity,
3666
+ ip, srcBytes,
3667
+ lastBlockEntireSrc, 1 /* isPartition */);
3668
+ DEBUGLOG(5, "Estimated size: %zu actual size: %zu", ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(&currSeqStore, zc), cSizeChunk);
3669
+ FORWARD_IF_ERROR(cSizeChunk, "Compressing chunk failed!");
3670
+
3671
+ ip += srcBytes;
3672
+ op += cSizeChunk;
3673
+ dstCapacity -= cSizeChunk;
3674
+ cSize += cSizeChunk;
3675
+ currSeqStore = nextSeqStore;
3676
+ assert(cSizeChunk <= ZSTD_BLOCKSIZE_MAX + ZSTD_blockHeaderSize);
3677
+ }
3678
+ /* cRep and dRep may have diverged during the compression. If so, we use the dRep repcodes
3679
+ * for the next block.
3680
+ */
3681
+ ZSTD_memcpy(zc->blockState.prevCBlock->rep, dRep.rep, sizeof(repcodes_t));
3682
+ return cSize;
3683
+ }
3684
+
3685
+ static size_t ZSTD_compressBlock_splitBlock(ZSTD_CCtx* zc,
3686
+ void* dst, size_t dstCapacity,
3687
+ const void* src, size_t srcSize, U32 lastBlock) {
3688
+ const BYTE* ip = (const BYTE*)src;
3689
+ BYTE* op = (BYTE*)dst;
3690
+ U32 nbSeq;
3691
+ size_t cSize;
3692
+ DEBUGLOG(4, "ZSTD_compressBlock_splitBlock");
3693
+
3694
+ { const size_t bss = ZSTD_buildSeqStore(zc, src, srcSize);
3695
+ FORWARD_IF_ERROR(bss, "ZSTD_buildSeqStore failed");
3696
+ if (bss == ZSTDbss_noCompress) {
3697
+ if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid)
3698
+ zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check;
3699
+ cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, srcSize, lastBlock);
3700
+ FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed");
3701
+ DEBUGLOG(4, "ZSTD_compressBlock_splitBlock: Nocompress block");
3702
+ return cSize;
3703
+ }
3704
+ nbSeq = (U32)(zc->seqStore.sequences - zc->seqStore.sequencesStart);
3705
+ }
3706
+
3707
+ assert(zc->appliedParams.splitBlocks == 1);
3708
+ cSize = ZSTD_compressBlock_splitBlock_internal(zc, dst, dstCapacity, src, srcSize, lastBlock, nbSeq);
3709
+ FORWARD_IF_ERROR(cSize, "Splitting blocks failed!");
3710
+ return cSize;
2694
3711
  }
2695
3712
 
2696
3713
  static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc,
@@ -2716,12 +3733,12 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc,
2716
3733
 
2717
3734
  if (zc->seqCollector.collectSequences) {
2718
3735
  ZSTD_copyBlockSequences(zc);
2719
- ZSTD_confirmRepcodesAndEntropyTables(zc);
3736
+ ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState);
2720
3737
  return 0;
2721
3738
  }
2722
3739
 
2723
3740
  /* encode sequences and literals */
2724
- cSize = ZSTD_entropyCompressSequences(&zc->seqStore,
3741
+ cSize = ZSTD_entropyCompressSeqStore(&zc->seqStore,
2725
3742
  &zc->blockState.prevCBlock->entropy, &zc->blockState.nextCBlock->entropy,
2726
3743
  &zc->appliedParams,
2727
3744
  dst, dstCapacity,
@@ -2750,7 +3767,7 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc,
2750
3767
 
2751
3768
  out:
2752
3769
  if (!ZSTD_isError(cSize) && cSize > 1) {
2753
- ZSTD_confirmRepcodesAndEntropyTables(zc);
3770
+ ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState);
2754
3771
  }
2755
3772
  /* We check that dictionaries have offset codes available for the first
2756
3773
  * block. After the first block, the offcode table might not have large
@@ -2803,7 +3820,7 @@ static size_t ZSTD_compressBlock_targetCBlockSize_body(ZSTD_CCtx* zc,
2803
3820
  size_t const maxCSize = srcSize - ZSTD_minGain(srcSize, zc->appliedParams.cParams.strategy);
2804
3821
  FORWARD_IF_ERROR(cSize, "ZSTD_compressSuperBlock failed");
2805
3822
  if (cSize != 0 && cSize < maxCSize + ZSTD_blockHeaderSize) {
2806
- ZSTD_confirmRepcodesAndEntropyTables(zc);
3823
+ ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState);
2807
3824
  return cSize;
2808
3825
  }
2809
3826
  }
@@ -2843,9 +3860,9 @@ static void ZSTD_overflowCorrectIfNeeded(ZSTD_matchState_t* ms,
2843
3860
  void const* ip,
2844
3861
  void const* iend)
2845
3862
  {
2846
- if (ZSTD_window_needOverflowCorrection(ms->window, iend)) {
2847
- U32 const maxDist = (U32)1 << params->cParams.windowLog;
2848
- U32 const cycleLog = ZSTD_cycleLog(params->cParams.chainLog, params->cParams.strategy);
3863
+ U32 const cycleLog = ZSTD_cycleLog(params->cParams.chainLog, params->cParams.strategy);
3864
+ U32 const maxDist = (U32)1 << params->cParams.windowLog;
3865
+ if (ZSTD_window_needOverflowCorrection(ms->window, cycleLog, maxDist, ms->loadedDictEnd, ip, iend)) {
2849
3866
  U32 const correction = ZSTD_window_correctOverflow(&ms->window, cycleLog, maxDist, ip);
2850
3867
  ZSTD_STATIC_ASSERT(ZSTD_CHAINLOG_MAX <= 30);
2851
3868
  ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX_32 <= 30);
@@ -2868,7 +3885,7 @@ static void ZSTD_overflowCorrectIfNeeded(ZSTD_matchState_t* ms,
2868
3885
  * Frame is supposed already started (header already produced)
2869
3886
  * @return : compressed size, or an error code
2870
3887
  */
2871
- static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx,
3888
+ static size_t ZSTD_compress_frameChunk(ZSTD_CCtx* cctx,
2872
3889
  void* dst, size_t dstCapacity,
2873
3890
  const void* src, size_t srcSize,
2874
3891
  U32 lastFrameChunk)
@@ -2908,6 +3925,10 @@ static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx,
2908
3925
  FORWARD_IF_ERROR(cSize, "ZSTD_compressBlock_targetCBlockSize failed");
2909
3926
  assert(cSize > 0);
2910
3927
  assert(cSize <= blockSize + ZSTD_blockHeaderSize);
3928
+ } else if (ZSTD_blockSplitterEnabled(&cctx->appliedParams)) {
3929
+ cSize = ZSTD_compressBlock_splitBlock(cctx, op, dstCapacity, ip, blockSize, lastBlock);
3930
+ FORWARD_IF_ERROR(cSize, "ZSTD_compressBlock_splitBlock failed");
3931
+ assert(cSize > 0 || cctx->seqCollector.collectSequences == 1);
2911
3932
  } else {
2912
3933
  cSize = ZSTD_compressBlock_internal(cctx,
2913
3934
  op+ZSTD_blockHeaderSize, dstCapacity-ZSTD_blockHeaderSize,
@@ -3063,11 +4084,12 @@ static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* cctx,
3063
4084
 
3064
4085
  if (!srcSize) return fhSize; /* do not generate an empty block if no input */
3065
4086
 
3066
- if (!ZSTD_window_update(&ms->window, src, srcSize)) {
4087
+ if (!ZSTD_window_update(&ms->window, src, srcSize, ms->forceNonContiguous)) {
4088
+ ms->forceNonContiguous = 0;
3067
4089
  ms->nextToUpdate = ms->window.dictLimit;
3068
4090
  }
3069
4091
  if (cctx->appliedParams.ldmParams.enableLdm) {
3070
- ZSTD_window_update(&cctx->ldmState.window, src, srcSize);
4092
+ ZSTD_window_update(&cctx->ldmState.window, src, srcSize, /* forceNonContiguous */ 0);
3071
4093
  }
3072
4094
 
3073
4095
  if (!frame) {
@@ -3135,63 +4157,86 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms,
3135
4157
  {
3136
4158
  const BYTE* ip = (const BYTE*) src;
3137
4159
  const BYTE* const iend = ip + srcSize;
4160
+ int const loadLdmDict = params->ldmParams.enableLdm && ls != NULL;
4161
+
4162
+ /* Assert that we the ms params match the params we're being given */
4163
+ ZSTD_assertEqualCParams(params->cParams, ms->cParams);
3138
4164
 
3139
- ZSTD_window_update(&ms->window, src, srcSize);
4165
+ if (srcSize > ZSTD_CHUNKSIZE_MAX) {
4166
+ /* Allow the dictionary to set indices up to exactly ZSTD_CURRENT_MAX.
4167
+ * Dictionaries right at the edge will immediately trigger overflow
4168
+ * correction, but I don't want to insert extra constraints here.
4169
+ */
4170
+ U32 const maxDictSize = ZSTD_CURRENT_MAX - 1;
4171
+ /* We must have cleared our windows when our source is this large. */
4172
+ assert(ZSTD_window_isEmpty(ms->window));
4173
+ if (loadLdmDict)
4174
+ assert(ZSTD_window_isEmpty(ls->window));
4175
+ /* If the dictionary is too large, only load the suffix of the dictionary. */
4176
+ if (srcSize > maxDictSize) {
4177
+ ip = iend - maxDictSize;
4178
+ src = ip;
4179
+ srcSize = maxDictSize;
4180
+ }
4181
+ }
4182
+
4183
+ DEBUGLOG(4, "ZSTD_loadDictionaryContent(): useRowMatchFinder=%d", (int)params->useRowMatchFinder);
4184
+ ZSTD_window_update(&ms->window, src, srcSize, /* forceNonContiguous */ 0);
3140
4185
  ms->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ms->window.base);
4186
+ ms->forceNonContiguous = params->deterministicRefPrefix;
3141
4187
 
3142
- if (params->ldmParams.enableLdm && ls != NULL) {
3143
- ZSTD_window_update(&ls->window, src, srcSize);
4188
+ if (loadLdmDict) {
4189
+ ZSTD_window_update(&ls->window, src, srcSize, /* forceNonContiguous */ 0);
3144
4190
  ls->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ls->window.base);
3145
4191
  }
3146
4192
 
3147
- /* Assert that we the ms params match the params we're being given */
3148
- ZSTD_assertEqualCParams(params->cParams, ms->cParams);
3149
-
3150
4193
  if (srcSize <= HASH_READ_SIZE) return 0;
3151
4194
 
3152
- while (iend - ip > HASH_READ_SIZE) {
3153
- size_t const remaining = (size_t)(iend - ip);
3154
- size_t const chunk = MIN(remaining, ZSTD_CHUNKSIZE_MAX);
3155
- const BYTE* const ichunk = ip + chunk;
3156
-
3157
- ZSTD_overflowCorrectIfNeeded(ms, ws, params, ip, ichunk);
4195
+ ZSTD_overflowCorrectIfNeeded(ms, ws, params, ip, iend);
3158
4196
 
3159
- if (params->ldmParams.enableLdm && ls != NULL)
3160
- ZSTD_ldm_fillHashTable(ls, (const BYTE*)src, (const BYTE*)src + srcSize, &params->ldmParams);
4197
+ if (loadLdmDict)
4198
+ ZSTD_ldm_fillHashTable(ls, ip, iend, &params->ldmParams);
3161
4199
 
3162
- switch(params->cParams.strategy)
3163
- {
3164
- case ZSTD_fast:
3165
- ZSTD_fillHashTable(ms, ichunk, dtlm);
3166
- break;
3167
- case ZSTD_dfast:
3168
- ZSTD_fillDoubleHashTable(ms, ichunk, dtlm);
3169
- break;
4200
+ switch(params->cParams.strategy)
4201
+ {
4202
+ case ZSTD_fast:
4203
+ ZSTD_fillHashTable(ms, iend, dtlm);
4204
+ break;
4205
+ case ZSTD_dfast:
4206
+ ZSTD_fillDoubleHashTable(ms, iend, dtlm);
4207
+ break;
3170
4208
 
3171
- case ZSTD_greedy:
3172
- case ZSTD_lazy:
3173
- case ZSTD_lazy2:
3174
- if (chunk >= HASH_READ_SIZE && ms->dedicatedDictSearch) {
3175
- assert(chunk == remaining); /* must load everything in one go */
3176
- ZSTD_dedicatedDictSearch_lazy_loadDictionary(ms, ichunk-HASH_READ_SIZE);
3177
- } else if (chunk >= HASH_READ_SIZE) {
3178
- ZSTD_insertAndFindFirstIndex(ms, ichunk-HASH_READ_SIZE);
4209
+ case ZSTD_greedy:
4210
+ case ZSTD_lazy:
4211
+ case ZSTD_lazy2:
4212
+ assert(srcSize >= HASH_READ_SIZE);
4213
+ if (ms->dedicatedDictSearch) {
4214
+ assert(ms->chainTable != NULL);
4215
+ ZSTD_dedicatedDictSearch_lazy_loadDictionary(ms, iend-HASH_READ_SIZE);
4216
+ } else {
4217
+ assert(params->useRowMatchFinder != ZSTD_urm_auto);
4218
+ if (params->useRowMatchFinder == ZSTD_urm_enableRowMatchFinder) {
4219
+ size_t const tagTableSize = ((size_t)1 << params->cParams.hashLog) * sizeof(U16);
4220
+ ZSTD_memset(ms->tagTable, 0, tagTableSize);
4221
+ ZSTD_row_update(ms, iend-HASH_READ_SIZE);
4222
+ DEBUGLOG(4, "Using row-based hash table for lazy dict");
4223
+ } else {
4224
+ ZSTD_insertAndFindFirstIndex(ms, iend-HASH_READ_SIZE);
4225
+ DEBUGLOG(4, "Using chain-based hash table for lazy dict");
3179
4226
  }
3180
- break;
3181
-
3182
- case ZSTD_btlazy2: /* we want the dictionary table fully sorted */
3183
- case ZSTD_btopt:
3184
- case ZSTD_btultra:
3185
- case ZSTD_btultra2:
3186
- if (chunk >= HASH_READ_SIZE)
3187
- ZSTD_updateTree(ms, ichunk-HASH_READ_SIZE, ichunk);
3188
- break;
3189
-
3190
- default:
3191
- assert(0); /* not possible : not a valid strategy id */
3192
4227
  }
4228
+ break;
4229
+
4230
+ case ZSTD_btlazy2: /* we want the dictionary table fully sorted */
4231
+ case ZSTD_btopt:
4232
+ case ZSTD_btultra:
4233
+ case ZSTD_btultra2:
4234
+ assert(srcSize >= HASH_READ_SIZE);
4235
+ ZSTD_updateTree(ms, iend-HASH_READ_SIZE, iend);
4236
+ break;
3193
4237
 
3194
- ip = ichunk;
4238
+ default:
4239
+ assert(0); /* not possible : not a valid strategy id */
3195
4240
  }
3196
4241
 
3197
4242
  ms->nextToUpdate = (U32)(iend - ms->window.base);
@@ -3330,7 +4375,6 @@ static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs,
3330
4375
  const BYTE* const dictEnd = dictPtr + dictSize;
3331
4376
  size_t dictID;
3332
4377
  size_t eSize;
3333
-
3334
4378
  ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<<MAX(MLFSELog,LLFSELog)));
3335
4379
  assert(dictSize >= 8);
3336
4380
  assert(MEM_readLE32(dictPtr) == ZSTD_MAGIC_DICTIONARY);
@@ -3401,8 +4445,9 @@ static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx,
3401
4445
  const ZSTD_CCtx_params* params, U64 pledgedSrcSize,
3402
4446
  ZSTD_buffered_policy_e zbuff)
3403
4447
  {
4448
+ size_t const dictContentSize = cdict ? cdict->dictContentSize : dictSize;
3404
4449
  #if ZSTD_TRACE
3405
- cctx->traceCtx = ZSTD_trace_compress_begin(cctx);
4450
+ cctx->traceCtx = (ZSTD_trace_compress_begin != NULL) ? ZSTD_trace_compress_begin(cctx) : 0;
3406
4451
  #endif
3407
4452
  DEBUGLOG(4, "ZSTD_compressBegin_internal: wlog=%u", params->cParams.windowLog);
3408
4453
  /* params are supposed to be fully validated at this point */
@@ -3418,7 +4463,8 @@ static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx,
3418
4463
  return ZSTD_resetCCtx_usingCDict(cctx, cdict, params, pledgedSrcSize, zbuff);
3419
4464
  }
3420
4465
 
3421
- FORWARD_IF_ERROR( ZSTD_resetCCtx_internal(cctx, *params, pledgedSrcSize,
4466
+ FORWARD_IF_ERROR( ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize,
4467
+ dictContentSize,
3422
4468
  ZSTDcrp_makeClean, zbuff) , "");
3423
4469
  { size_t const dictID = cdict ?
3424
4470
  ZSTD_compress_insertDictionary(
@@ -3433,7 +4479,7 @@ static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx,
3433
4479
  FORWARD_IF_ERROR(dictID, "ZSTD_compress_insertDictionary failed");
3434
4480
  assert(dictID <= UINT_MAX);
3435
4481
  cctx->dictID = (U32)dictID;
3436
- cctx->dictContentSize = cdict ? cdict->dictContentSize : dictSize;
4482
+ cctx->dictContentSize = dictContentSize;
3437
4483
  }
3438
4484
  return 0;
3439
4485
  }
@@ -3533,7 +4579,7 @@ static size_t ZSTD_writeEpilogue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity)
3533
4579
  void ZSTD_CCtx_trace(ZSTD_CCtx* cctx, size_t extraCSize)
3534
4580
  {
3535
4581
  #if ZSTD_TRACE
3536
- if (cctx->traceCtx) {
4582
+ if (cctx->traceCtx && ZSTD_trace_compress_end != NULL) {
3537
4583
  int const streaming = cctx->inBuffSize > 0 || cctx->outBuffSize > 0 || cctx->appliedParams.nbWorkers > 0;
3538
4584
  ZSTD_Trace trace;
3539
4585
  ZSTD_memset(&trace, 0, sizeof(trace));
@@ -3586,15 +4632,14 @@ size_t ZSTD_compress_advanced (ZSTD_CCtx* cctx,
3586
4632
  const void* dict,size_t dictSize,
3587
4633
  ZSTD_parameters params)
3588
4634
  {
3589
- ZSTD_CCtx_params cctxParams;
3590
4635
  DEBUGLOG(4, "ZSTD_compress_advanced");
3591
4636
  FORWARD_IF_ERROR(ZSTD_checkCParams(params.cParams), "");
3592
- ZSTD_CCtxParams_init_internal(&cctxParams, &params, ZSTD_NO_CLEVEL);
4637
+ ZSTD_CCtxParams_init_internal(&cctx->simpleApiParams, &params, ZSTD_NO_CLEVEL);
3593
4638
  return ZSTD_compress_advanced_internal(cctx,
3594
4639
  dst, dstCapacity,
3595
4640
  src, srcSize,
3596
4641
  dict, dictSize,
3597
- &cctxParams);
4642
+ &cctx->simpleApiParams);
3598
4643
  }
3599
4644
 
3600
4645
  /* Internal */
@@ -3618,14 +4663,13 @@ size_t ZSTD_compress_usingDict(ZSTD_CCtx* cctx,
3618
4663
  const void* dict, size_t dictSize,
3619
4664
  int compressionLevel)
3620
4665
  {
3621
- ZSTD_CCtx_params cctxParams;
3622
4666
  {
3623
4667
  ZSTD_parameters const params = ZSTD_getParams_internal(compressionLevel, srcSize, dict ? dictSize : 0, ZSTD_cpm_noAttachDict);
3624
4668
  assert(params.fParams.contentSizeFlag == 1);
3625
- ZSTD_CCtxParams_init_internal(&cctxParams, &params, (compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT: compressionLevel);
4669
+ ZSTD_CCtxParams_init_internal(&cctx->simpleApiParams, &params, (compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT: compressionLevel);
3626
4670
  }
3627
4671
  DEBUGLOG(4, "ZSTD_compress_usingDict (srcSize=%u)", (unsigned)srcSize);
3628
- return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, dict, dictSize, &cctxParams);
4672
+ return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, dict, dictSize, &cctx->simpleApiParams);
3629
4673
  }
3630
4674
 
3631
4675
  size_t ZSTD_compressCCtx(ZSTD_CCtx* cctx,
@@ -3669,7 +4713,10 @@ size_t ZSTD_estimateCDictSize_advanced(
3669
4713
  DEBUGLOG(5, "sizeof(ZSTD_CDict) : %u", (unsigned)sizeof(ZSTD_CDict));
3670
4714
  return ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict))
3671
4715
  + ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE)
3672
- + ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0)
4716
+ /* enableDedicatedDictSearch == 1 ensures that CDict estimation will not be too small
4717
+ * in case we are using DDS with row-hash. */
4718
+ + ZSTD_sizeof_matchState(&cParams, ZSTD_resolveRowMatchFinderMode(ZSTD_urm_auto, &cParams),
4719
+ /* enableDedicatedDictSearch */ 1, /* forCCtx */ 0)
3673
4720
  + (dictLoadMethod == ZSTD_dlm_byRef ? 0
3674
4721
  : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void *))));
3675
4722
  }
@@ -3700,9 +4747,6 @@ static size_t ZSTD_initCDict_internal(
3700
4747
  assert(!ZSTD_checkCParams(params.cParams));
3701
4748
  cdict->matchState.cParams = params.cParams;
3702
4749
  cdict->matchState.dedicatedDictSearch = params.enableDedicatedDictSearch;
3703
- if (cdict->matchState.dedicatedDictSearch && dictSize > ZSTD_CHUNKSIZE_MAX) {
3704
- cdict->matchState.dedicatedDictSearch = 0;
3705
- }
3706
4750
  if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dictBuffer) || (!dictSize)) {
3707
4751
  cdict->dictContent = dictBuffer;
3708
4752
  } else {
@@ -3723,6 +4767,7 @@ static size_t ZSTD_initCDict_internal(
3723
4767
  &cdict->matchState,
3724
4768
  &cdict->workspace,
3725
4769
  &params.cParams,
4770
+ params.useRowMatchFinder,
3726
4771
  ZSTDcrp_makeClean,
3727
4772
  ZSTDirp_reset,
3728
4773
  ZSTD_resetTarget_CDict), "");
@@ -3746,14 +4791,17 @@ static size_t ZSTD_initCDict_internal(
3746
4791
 
3747
4792
  static ZSTD_CDict* ZSTD_createCDict_advanced_internal(size_t dictSize,
3748
4793
  ZSTD_dictLoadMethod_e dictLoadMethod,
3749
- ZSTD_compressionParameters cParams, ZSTD_customMem customMem)
4794
+ ZSTD_compressionParameters cParams,
4795
+ ZSTD_useRowMatchFinderMode_e useRowMatchFinder,
4796
+ U32 enableDedicatedDictSearch,
4797
+ ZSTD_customMem customMem)
3750
4798
  {
3751
4799
  if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL;
3752
4800
 
3753
4801
  { size_t const workspaceSize =
3754
4802
  ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict)) +
3755
4803
  ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE) +
3756
- ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0) +
4804
+ ZSTD_sizeof_matchState(&cParams, useRowMatchFinder, enableDedicatedDictSearch, /* forCCtx */ 0) +
3757
4805
  (dictLoadMethod == ZSTD_dlm_byRef ? 0
3758
4806
  : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void*))));
3759
4807
  void* const workspace = ZSTD_customMalloc(workspaceSize, customMem);
@@ -3772,7 +4820,7 @@ static ZSTD_CDict* ZSTD_createCDict_advanced_internal(size_t dictSize,
3772
4820
  ZSTD_cwksp_move(&cdict->workspace, &ws);
3773
4821
  cdict->customMem = customMem;
3774
4822
  cdict->compressionLevel = ZSTD_NO_CLEVEL; /* signals advanced API usage */
3775
-
4823
+ cdict->useRowMatchFinder = useRowMatchFinder;
3776
4824
  return cdict;
3777
4825
  }
3778
4826
  }
@@ -3824,10 +4872,13 @@ ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced2(
3824
4872
  &cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict);
3825
4873
  }
3826
4874
 
4875
+ DEBUGLOG(3, "ZSTD_createCDict_advanced2: DDS: %u", cctxParams.enableDedicatedDictSearch);
3827
4876
  cctxParams.cParams = cParams;
4877
+ cctxParams.useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(cctxParams.useRowMatchFinder, &cParams);
3828
4878
 
3829
4879
  cdict = ZSTD_createCDict_advanced_internal(dictSize,
3830
4880
  dictLoadMethod, cctxParams.cParams,
4881
+ cctxParams.useRowMatchFinder, cctxParams.enableDedicatedDictSearch,
3831
4882
  customMem);
3832
4883
 
3833
4884
  if (ZSTD_isError( ZSTD_initCDict_internal(cdict,
@@ -3896,7 +4947,9 @@ const ZSTD_CDict* ZSTD_initStaticCDict(
3896
4947
  ZSTD_dictContentType_e dictContentType,
3897
4948
  ZSTD_compressionParameters cParams)
3898
4949
  {
3899
- size_t const matchStateSize = ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0);
4950
+ ZSTD_useRowMatchFinderMode_e const useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(ZSTD_urm_auto, &cParams);
4951
+ /* enableDedicatedDictSearch == 1 ensures matchstate is not too small in case this CDict will be used for DDS + row hash */
4952
+ size_t const matchStateSize = ZSTD_sizeof_matchState(&cParams, useRowMatchFinder, /* enableDedicatedDictSearch */ 1, /* forCCtx */ 0);
3900
4953
  size_t const neededSize = ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict))
3901
4954
  + (dictLoadMethod == ZSTD_dlm_byRef ? 0
3902
4955
  : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void*))))
@@ -3921,6 +4974,8 @@ const ZSTD_CDict* ZSTD_initStaticCDict(
3921
4974
 
3922
4975
  ZSTD_CCtxParams_init(&params, 0);
3923
4976
  params.cParams = cParams;
4977
+ params.useRowMatchFinder = useRowMatchFinder;
4978
+ cdict->useRowMatchFinder = useRowMatchFinder;
3924
4979
 
3925
4980
  if (ZSTD_isError( ZSTD_initCDict_internal(cdict,
3926
4981
  dict, dictSize,
@@ -3947,15 +5002,15 @@ unsigned ZSTD_getDictID_fromCDict(const ZSTD_CDict* cdict)
3947
5002
  return cdict->dictID;
3948
5003
  }
3949
5004
 
3950
-
3951
- /* ZSTD_compressBegin_usingCDict_advanced() :
3952
- * cdict must be != NULL */
3953
- size_t ZSTD_compressBegin_usingCDict_advanced(
5005
+ /* ZSTD_compressBegin_usingCDict_internal() :
5006
+ * Implementation of various ZSTD_compressBegin_usingCDict* functions.
5007
+ */
5008
+ static size_t ZSTD_compressBegin_usingCDict_internal(
3954
5009
  ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict,
3955
5010
  ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize)
3956
5011
  {
3957
5012
  ZSTD_CCtx_params cctxParams;
3958
- DEBUGLOG(4, "ZSTD_compressBegin_usingCDict_advanced");
5013
+ DEBUGLOG(4, "ZSTD_compressBegin_usingCDict_internal");
3959
5014
  RETURN_ERROR_IF(cdict==NULL, dictionary_wrong, "NULL pointer!");
3960
5015
  /* Initialize the cctxParams from the cdict */
3961
5016
  {
@@ -3987,25 +5042,48 @@ size_t ZSTD_compressBegin_usingCDict_advanced(
3987
5042
  ZSTDb_not_buffered);
3988
5043
  }
3989
5044
 
5045
+
5046
+ /* ZSTD_compressBegin_usingCDict_advanced() :
5047
+ * This function is DEPRECATED.
5048
+ * cdict must be != NULL */
5049
+ size_t ZSTD_compressBegin_usingCDict_advanced(
5050
+ ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict,
5051
+ ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize)
5052
+ {
5053
+ return ZSTD_compressBegin_usingCDict_internal(cctx, cdict, fParams, pledgedSrcSize);
5054
+ }
5055
+
3990
5056
  /* ZSTD_compressBegin_usingCDict() :
3991
- * pledgedSrcSize=0 means "unknown"
3992
- * if pledgedSrcSize>0, it will enable contentSizeFlag */
5057
+ * cdict must be != NULL */
3993
5058
  size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict)
3994
5059
  {
3995
5060
  ZSTD_frameParameters const fParams = { 0 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ };
3996
- DEBUGLOG(4, "ZSTD_compressBegin_usingCDict : dictIDFlag == %u", !fParams.noDictIDFlag);
3997
- return ZSTD_compressBegin_usingCDict_advanced(cctx, cdict, fParams, ZSTD_CONTENTSIZE_UNKNOWN);
5061
+ return ZSTD_compressBegin_usingCDict_internal(cctx, cdict, fParams, ZSTD_CONTENTSIZE_UNKNOWN);
3998
5062
  }
3999
5063
 
4000
- size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx,
5064
+ /*! ZSTD_compress_usingCDict_internal():
5065
+ * Implementation of various ZSTD_compress_usingCDict* functions.
5066
+ */
5067
+ static size_t ZSTD_compress_usingCDict_internal(ZSTD_CCtx* cctx,
4001
5068
  void* dst, size_t dstCapacity,
4002
5069
  const void* src, size_t srcSize,
4003
5070
  const ZSTD_CDict* cdict, ZSTD_frameParameters fParams)
4004
5071
  {
4005
- FORWARD_IF_ERROR(ZSTD_compressBegin_usingCDict_advanced(cctx, cdict, fParams, srcSize), ""); /* will check if cdict != NULL */
5072
+ FORWARD_IF_ERROR(ZSTD_compressBegin_usingCDict_internal(cctx, cdict, fParams, srcSize), ""); /* will check if cdict != NULL */
4006
5073
  return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize);
4007
5074
  }
4008
5075
 
5076
+ /*! ZSTD_compress_usingCDict_advanced():
5077
+ * This function is DEPRECATED.
5078
+ */
5079
+ size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx,
5080
+ void* dst, size_t dstCapacity,
5081
+ const void* src, size_t srcSize,
5082
+ const ZSTD_CDict* cdict, ZSTD_frameParameters fParams)
5083
+ {
5084
+ return ZSTD_compress_usingCDict_internal(cctx, dst, dstCapacity, src, srcSize, cdict, fParams);
5085
+ }
5086
+
4009
5087
  /*! ZSTD_compress_usingCDict() :
4010
5088
  * Compression using a digested Dictionary.
4011
5089
  * Faster startup than ZSTD_compress_usingDict(), recommended when same dictionary is used multiple times.
@@ -4017,7 +5095,7 @@ size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx,
4017
5095
  const ZSTD_CDict* cdict)
4018
5096
  {
4019
5097
  ZSTD_frameParameters const fParams = { 1 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ };
4020
- return ZSTD_compress_usingCDict_advanced(cctx, dst, dstCapacity, src, srcSize, cdict, fParams);
5098
+ return ZSTD_compress_usingCDict_internal(cctx, dst, dstCapacity, src, srcSize, cdict, fParams);
4021
5099
  }
4022
5100
 
4023
5101
 
@@ -4427,8 +5505,13 @@ static size_t ZSTD_CCtx_init_compressStream2(ZSTD_CCtx* cctx,
4427
5505
  FORWARD_IF_ERROR( ZSTD_initLocalDict(cctx) , ""); /* Init the local dict if present. */
4428
5506
  ZSTD_memset(&cctx->prefixDict, 0, sizeof(cctx->prefixDict)); /* single usage */
4429
5507
  assert(prefixDict.dict==NULL || cctx->cdict==NULL); /* only one can be set */
4430
- if (cctx->cdict)
4431
- params.compressionLevel = cctx->cdict->compressionLevel; /* let cdict take priority in terms of compression level */
5508
+ if (cctx->cdict && !cctx->localDict.cdict) {
5509
+ /* Let the cdict's compression level take priority over the requested params.
5510
+ * But do not take the cdict's compression level if the "cdict" is actually a localDict
5511
+ * generated from ZSTD_initLocalDict().
5512
+ */
5513
+ params.compressionLevel = cctx->cdict->compressionLevel;
5514
+ }
4432
5515
  DEBUGLOG(4, "ZSTD_compressStream2 : transparent init stage");
4433
5516
  if (endOp == ZSTD_e_end) cctx->pledgedSrcSizePlusOne = inSize + 1; /* auto-fix pledgedSrcSize */
4434
5517
  {
@@ -4447,13 +5530,20 @@ static size_t ZSTD_CCtx_init_compressStream2(ZSTD_CCtx* cctx,
4447
5530
  params.ldmParams.enableLdm = 1;
4448
5531
  }
4449
5532
 
5533
+ if (ZSTD_CParams_useBlockSplitter(&params.cParams)) {
5534
+ DEBUGLOG(4, "Block splitter enabled by default (window size >= 128K, strategy >= btopt)");
5535
+ params.splitBlocks = 1;
5536
+ }
5537
+
5538
+ params.useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(params.useRowMatchFinder, &params.cParams);
5539
+
4450
5540
  #ifdef ZSTD_MULTITHREAD
4451
5541
  if ((cctx->pledgedSrcSizePlusOne-1) <= ZSTDMT_JOBSIZE_MIN) {
4452
5542
  params.nbWorkers = 0; /* do not invoke multi-threading when src size is too small */
4453
5543
  }
4454
5544
  if (params.nbWorkers > 0) {
4455
5545
  #if ZSTD_TRACE
4456
- cctx->traceCtx = ZSTD_trace_compress_begin(cctx);
5546
+ cctx->traceCtx = (ZSTD_trace_compress_begin != NULL) ? ZSTD_trace_compress_begin(cctx) : 0;
4457
5547
  #endif
4458
5548
  /* mt context creation */
4459
5549
  if (cctx->mtctx == NULL) {
@@ -4921,7 +6011,7 @@ static size_t ZSTD_compressSequences_internal(ZSTD_CCtx* cctx,
4921
6011
  continue;
4922
6012
  }
4923
6013
 
4924
- compressedSeqsSize = ZSTD_entropyCompressSequences(&cctx->seqStore,
6014
+ compressedSeqsSize = ZSTD_entropyCompressSeqStore(&cctx->seqStore,
4925
6015
  &cctx->blockState.prevCBlock->entropy, &cctx->blockState.nextCBlock->entropy,
4926
6016
  &cctx->appliedParams,
4927
6017
  op + ZSTD_blockHeaderSize /* Leave space for block header */, dstCapacity - ZSTD_blockHeaderSize,
@@ -4953,7 +6043,7 @@ static size_t ZSTD_compressSequences_internal(ZSTD_CCtx* cctx,
4953
6043
  } else {
4954
6044
  U32 cBlockHeader;
4955
6045
  /* Error checking and repcodes update */
4956
- ZSTD_confirmRepcodesAndEntropyTables(cctx);
6046
+ ZSTD_blockState_confirmRepcodesAndEntropyTables(&cctx->blockState);
4957
6047
  if (cctx->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid)
4958
6048
  cctx->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check;
4959
6049
 
@@ -5054,6 +6144,7 @@ size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output)
5054
6144
  #define ZSTD_MAX_CLEVEL 22
5055
6145
  int ZSTD_maxCLevel(void) { return ZSTD_MAX_CLEVEL; }
5056
6146
  int ZSTD_minCLevel(void) { return (int)-ZSTD_TARGETLENGTH_MAX; }
6147
+ int ZSTD_defaultCLevel(void) { return ZSTD_CLEVEL_DEFAULT; }
5057
6148
 
5058
6149
  static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEVEL+1] = {
5059
6150
  { /* "default" - for any srcSize > 256 KB */
@@ -5186,7 +6277,10 @@ static ZSTD_compressionParameters ZSTD_dedicatedDictSearch_getCParams(int const
5186
6277
  static int ZSTD_dedicatedDictSearch_isSupported(
5187
6278
  ZSTD_compressionParameters const* cParams)
5188
6279
  {
5189
- return (cParams->strategy >= ZSTD_greedy) && (cParams->strategy <= ZSTD_lazy2);
6280
+ return (cParams->strategy >= ZSTD_greedy)
6281
+ && (cParams->strategy <= ZSTD_lazy2)
6282
+ && (cParams->hashLog > cParams->chainLog)
6283
+ && (cParams->chainLog <= 24);
5190
6284
  }
5191
6285
 
5192
6286
  /**
@@ -5204,6 +6298,9 @@ static void ZSTD_dedicatedDictSearch_revertCParams(
5204
6298
  case ZSTD_lazy:
5205
6299
  case ZSTD_lazy2:
5206
6300
  cParams->hashLog -= ZSTD_LAZY_DDSS_BUCKET_LOG;
6301
+ if (cParams->hashLog < ZSTD_HASHLOG_MIN) {
6302
+ cParams->hashLog = ZSTD_HASHLOG_MIN;
6303
+ }
5207
6304
  break;
5208
6305
  case ZSTD_btlazy2:
5209
6306
  case ZSTD_btopt:
@@ -5252,6 +6349,7 @@ static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel,
5252
6349
  else row = compressionLevel;
5253
6350
 
5254
6351
  { ZSTD_compressionParameters cp = ZSTD_defaultCParameters[tableID][row];
6352
+ DEBUGLOG(5, "ZSTD_getCParams_internal selected tableID: %u row: %u strat: %u", tableID, row, (U32)cp.strategy);
5255
6353
  /* acceleration factor */
5256
6354
  if (compressionLevel < 0) {
5257
6355
  int const clampedCompressionLevel = MAX(ZSTD_minCLevel(), compressionLevel);