zstd-ruby 1.4.9.0 → 1.5.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. checksums.yaml +4 -4
  2. data/.github/dependabot.yml +8 -0
  3. data/README.md +1 -1
  4. data/ext/zstdruby/libzstd/BUCK +5 -7
  5. data/ext/zstdruby/libzstd/Makefile +42 -13
  6. data/ext/zstdruby/libzstd/README.md +8 -4
  7. data/ext/zstdruby/libzstd/common/bitstream.h +1 -1
  8. data/ext/zstdruby/libzstd/common/compiler.h +1 -1
  9. data/ext/zstdruby/libzstd/common/cpu.h +1 -1
  10. data/ext/zstdruby/libzstd/common/debug.c +1 -1
  11. data/ext/zstdruby/libzstd/common/debug.h +1 -1
  12. data/ext/zstdruby/libzstd/common/entropy_common.c +1 -1
  13. data/ext/zstdruby/libzstd/common/error_private.c +1 -1
  14. data/ext/zstdruby/libzstd/common/error_private.h +3 -3
  15. data/ext/zstdruby/libzstd/common/fse.h +2 -2
  16. data/ext/zstdruby/libzstd/common/fse_decompress.c +25 -15
  17. data/ext/zstdruby/libzstd/common/huf.h +3 -2
  18. data/ext/zstdruby/libzstd/common/mem.h +3 -5
  19. data/ext/zstdruby/libzstd/common/pool.c +1 -1
  20. data/ext/zstdruby/libzstd/common/pool.h +1 -1
  21. data/ext/zstdruby/libzstd/common/xxhash.c +2 -4
  22. data/ext/zstdruby/libzstd/common/xxhash.h +1 -1
  23. data/ext/zstdruby/libzstd/common/zstd_common.c +1 -1
  24. data/ext/zstdruby/libzstd/common/zstd_deps.h +1 -1
  25. data/ext/zstdruby/libzstd/common/zstd_internal.h +21 -9
  26. data/ext/zstdruby/libzstd/common/zstd_trace.h +7 -5
  27. data/ext/zstdruby/libzstd/compress/fse_compress.c +1 -1
  28. data/ext/zstdruby/libzstd/compress/hist.c +1 -1
  29. data/ext/zstdruby/libzstd/compress/hist.h +1 -1
  30. data/ext/zstdruby/libzstd/compress/huf_compress.c +51 -28
  31. data/ext/zstdruby/libzstd/compress/zstd_compress.c +1373 -275
  32. data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +164 -21
  33. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.c +2 -2
  34. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.h +1 -1
  35. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.c +14 -6
  36. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.h +1 -1
  37. data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.c +5 -282
  38. data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.h +1 -1
  39. data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +147 -46
  40. data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +3 -3
  41. data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +1 -1
  42. data/ext/zstdruby/libzstd/compress/zstd_fast.c +4 -4
  43. data/ext/zstdruby/libzstd/compress/zstd_fast.h +1 -1
  44. data/ext/zstdruby/libzstd/compress/zstd_lazy.c +914 -142
  45. data/ext/zstdruby/libzstd/compress/zstd_lazy.h +39 -1
  46. data/ext/zstdruby/libzstd/compress/zstd_ldm.c +51 -15
  47. data/ext/zstdruby/libzstd/compress/zstd_ldm.h +2 -1
  48. data/ext/zstdruby/libzstd/compress/zstd_ldm_geartab.h +1 -1
  49. data/ext/zstdruby/libzstd/compress/zstd_opt.c +1 -1
  50. data/ext/zstdruby/libzstd/compress/zstd_opt.h +1 -1
  51. data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +15 -6
  52. data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +5 -5
  53. data/ext/zstdruby/libzstd/decompress/huf_decompress.c +44 -43
  54. data/ext/zstdruby/libzstd/decompress/zstd_ddict.c +1 -1
  55. data/ext/zstdruby/libzstd/decompress/zstd_ddict.h +1 -1
  56. data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +3 -4
  57. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +44 -36
  58. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +1 -1
  59. data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +1 -2
  60. data/ext/zstdruby/libzstd/deprecated/zbuff.h +1 -1
  61. data/ext/zstdruby/libzstd/deprecated/zbuff_common.c +1 -1
  62. data/ext/zstdruby/libzstd/deprecated/zbuff_compress.c +1 -1
  63. data/ext/zstdruby/libzstd/deprecated/zbuff_decompress.c +1 -1
  64. data/ext/zstdruby/libzstd/dictBuilder/cover.c +7 -6
  65. data/ext/zstdruby/libzstd/dictBuilder/cover.h +6 -5
  66. data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +7 -6
  67. data/ext/zstdruby/libzstd/dictBuilder/zdict.c +8 -7
  68. data/ext/zstdruby/libzstd/dll/example/Makefile +1 -1
  69. data/ext/zstdruby/libzstd/legacy/zstd_legacy.h +1 -1
  70. data/ext/zstdruby/libzstd/legacy/zstd_v01.c +1 -1
  71. data/ext/zstdruby/libzstd/legacy/zstd_v01.h +1 -1
  72. data/ext/zstdruby/libzstd/legacy/zstd_v02.c +1 -1
  73. data/ext/zstdruby/libzstd/legacy/zstd_v02.h +1 -1
  74. data/ext/zstdruby/libzstd/legacy/zstd_v03.c +1 -1
  75. data/ext/zstdruby/libzstd/legacy/zstd_v03.h +1 -1
  76. data/ext/zstdruby/libzstd/legacy/zstd_v04.c +1 -1
  77. data/ext/zstdruby/libzstd/legacy/zstd_v04.h +1 -1
  78. data/ext/zstdruby/libzstd/legacy/zstd_v05.c +1 -1
  79. data/ext/zstdruby/libzstd/legacy/zstd_v05.h +1 -1
  80. data/ext/zstdruby/libzstd/legacy/zstd_v06.c +1 -1
  81. data/ext/zstdruby/libzstd/legacy/zstd_v06.h +1 -1
  82. data/ext/zstdruby/libzstd/legacy/zstd_v07.c +1 -1
  83. data/ext/zstdruby/libzstd/legacy/zstd_v07.h +1 -1
  84. data/ext/zstdruby/libzstd/{dictBuilder/zdict.h → zdict.h} +148 -2
  85. data/ext/zstdruby/libzstd/zstd.h +165 -83
  86. data/ext/zstdruby/libzstd/{common/zstd_errors.h → zstd_errors.h} +1 -1
  87. data/lib/zstd-ruby/version.rb +1 -1
  88. metadata +5 -5
  89. data/ext/zstdruby/libzstd/common/zstd_trace.c +0 -42
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -1,6 +1,6 @@
1
1
  /*
2
2
  * xxHash - Fast Hash algorithm
3
- * Copyright (c) 2012-2021, Yann Collet, Facebook, Inc.
3
+ * Copyright (c) Yann Collet, Facebook, Inc.
4
4
  *
5
5
  * You can contact the author at :
6
6
  * - xxHash homepage: http://www.xxhash.com
@@ -30,9 +30,7 @@
30
30
  * Prefer these methods in priority order (0 > 1 > 2)
31
31
  */
32
32
  #ifndef XXH_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */
33
- # if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
34
- # define XXH_FORCE_MEMORY_ACCESS 2
35
- # elif (defined(__INTEL_COMPILER) && !defined(WIN32)) || \
33
+ # if (defined(__INTEL_COMPILER) && !defined(WIN32)) || \
36
34
  (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) )) || \
37
35
  defined(__ICCARM__)
38
36
  # define XXH_FORCE_MEMORY_ACCESS 1
@@ -1,7 +1,7 @@
1
1
  /*
2
2
  * xxHash - Extremely Fast Hash algorithm
3
3
  * Header File
4
- * Copyright (c) 2012-2021, Yann Collet, Facebook, Inc.
4
+ * Copyright (c) Yann Collet, Facebook, Inc.
5
5
  *
6
6
  * You can contact the author at :
7
7
  * - xxHash source repository : https://github.com/Cyan4973/xxHash
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-2021, Facebook, Inc.
2
+ * Copyright (c) Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -36,6 +36,11 @@
36
36
  # define XXH_STATIC_LINKING_ONLY /* XXH64_state_t */
37
37
  #endif
38
38
  #include "xxhash.h" /* XXH_reset, update, digest */
39
+ #ifndef ZSTD_NO_TRACE
40
+ # include "zstd_trace.h"
41
+ #else
42
+ # define ZSTD_TRACE 0
43
+ #endif
39
44
 
40
45
  #if defined (__cplusplus)
41
46
  extern "C" {
@@ -347,11 +352,18 @@ typedef enum {
347
352
  * Private declarations
348
353
  *********************************************/
349
354
  typedef struct seqDef_s {
350
- U32 offset; /* Offset code of the sequence */
355
+ U32 offset; /* offset == rawOffset + ZSTD_REP_NUM, or equivalently, offCode + 1 */
351
356
  U16 litLength;
352
357
  U16 matchLength;
353
358
  } seqDef;
354
359
 
360
+ /* Controls whether seqStore has a single "long" litLength or matchLength. See seqStore_t. */
361
+ typedef enum {
362
+ ZSTD_llt_none = 0, /* no longLengthType */
363
+ ZSTD_llt_literalLength = 1, /* represents a long literal */
364
+ ZSTD_llt_matchLength = 2 /* represents a long match */
365
+ } ZSTD_longLengthType_e;
366
+
355
367
  typedef struct {
356
368
  seqDef* sequencesStart;
357
369
  seqDef* sequences; /* ptr to end of sequences */
@@ -363,12 +375,12 @@ typedef struct {
363
375
  size_t maxNbSeq;
364
376
  size_t maxNbLit;
365
377
 
366
- /* longLengthPos and longLengthID to allow us to represent either a single litLength or matchLength
378
+ /* longLengthPos and longLengthType to allow us to represent either a single litLength or matchLength
367
379
  * in the seqStore that has a value larger than U16 (if it exists). To do so, we increment
368
- * the existing value of the litLength or matchLength by 0x10000.
380
+ * the existing value of the litLength or matchLength by 0x10000.
369
381
  */
370
- U32 longLengthID; /* 0 == no longLength; 1 == Represent the long literal; 2 == Represent the long match; */
371
- U32 longLengthPos; /* Index of the sequence to apply long length modification to */
382
+ ZSTD_longLengthType_e longLengthType;
383
+ U32 longLengthPos; /* Index of the sequence to apply long length modification to */
372
384
  } seqStore_t;
373
385
 
374
386
  typedef struct {
@@ -378,7 +390,7 @@ typedef struct {
378
390
 
379
391
  /**
380
392
  * Returns the ZSTD_sequenceLength for the given sequences. It handles the decoding of long sequences
381
- * indicated by longLengthPos and longLengthID, and adds MINMATCH back to matchLength.
393
+ * indicated by longLengthPos and longLengthType, and adds MINMATCH back to matchLength.
382
394
  */
383
395
  MEM_STATIC ZSTD_sequenceLength ZSTD_getSequenceLength(seqStore_t const* seqStore, seqDef const* seq)
384
396
  {
@@ -386,10 +398,10 @@ MEM_STATIC ZSTD_sequenceLength ZSTD_getSequenceLength(seqStore_t const* seqStore
386
398
  seqLen.litLength = seq->litLength;
387
399
  seqLen.matchLength = seq->matchLength + MINMATCH;
388
400
  if (seqStore->longLengthPos == (U32)(seq - seqStore->sequencesStart)) {
389
- if (seqStore->longLengthID == 1) {
401
+ if (seqStore->longLengthType == ZSTD_llt_literalLength) {
390
402
  seqLen.litLength += 0xFFFF;
391
403
  }
392
- if (seqStore->longLengthID == 2) {
404
+ if (seqStore->longLengthType == ZSTD_llt_matchLength) {
393
405
  seqLen.matchLength += 0xFFFF;
394
406
  }
395
407
  }
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-2021, Facebook, Inc.
2
+ * Copyright (c) Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -114,14 +114,15 @@ typedef unsigned long long ZSTD_TraceCtx;
114
114
  * @returns Non-zero if tracing is enabled. The return value is
115
115
  * passed to ZSTD_trace_compress_end().
116
116
  */
117
- ZSTD_TraceCtx ZSTD_trace_compress_begin(struct ZSTD_CCtx_s const* cctx);
117
+ ZSTD_WEAK_ATTR ZSTD_TraceCtx ZSTD_trace_compress_begin(
118
+ struct ZSTD_CCtx_s const* cctx);
118
119
 
119
120
  /**
120
121
  * Trace the end of a compression call.
121
122
  * @param ctx The return value of ZSTD_trace_compress_begin().
122
123
  * @param trace The zstd tracing info.
123
124
  */
124
- void ZSTD_trace_compress_end(
125
+ ZSTD_WEAK_ATTR void ZSTD_trace_compress_end(
125
126
  ZSTD_TraceCtx ctx,
126
127
  ZSTD_Trace const* trace);
127
128
 
@@ -132,14 +133,15 @@ void ZSTD_trace_compress_end(
132
133
  * @returns Non-zero if tracing is enabled. The return value is
133
134
  * passed to ZSTD_trace_compress_end().
134
135
  */
135
- ZSTD_TraceCtx ZSTD_trace_decompress_begin(struct ZSTD_DCtx_s const* dctx);
136
+ ZSTD_WEAK_ATTR ZSTD_TraceCtx ZSTD_trace_decompress_begin(
137
+ struct ZSTD_DCtx_s const* dctx);
136
138
 
137
139
  /**
138
140
  * Trace the end of a decompression call.
139
141
  * @param ctx The return value of ZSTD_trace_decompress_begin().
140
142
  * @param trace The zstd tracing info.
141
143
  */
142
- void ZSTD_trace_decompress_end(
144
+ ZSTD_WEAK_ATTR void ZSTD_trace_decompress_end(
143
145
  ZSTD_TraceCtx ctx,
144
146
  ZSTD_Trace const* trace);
145
147
 
@@ -1,6 +1,6 @@
1
1
  /* ******************************************************************
2
2
  * FSE : Finite State Entropy encoder
3
- * Copyright (c) 2013-2021, Yann Collet, Facebook, Inc.
3
+ * Copyright (c) Yann Collet, Facebook, Inc.
4
4
  *
5
5
  * You can contact the author at :
6
6
  * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
@@ -1,7 +1,7 @@
1
1
  /* ******************************************************************
2
2
  * hist : Histogram functions
3
3
  * part of Finite State Entropy project
4
- * Copyright (c) 2013-2021, Yann Collet, Facebook, Inc.
4
+ * Copyright (c) Yann Collet, Facebook, Inc.
5
5
  *
6
6
  * You can contact the author at :
7
7
  * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
@@ -1,7 +1,7 @@
1
1
  /* ******************************************************************
2
2
  * hist : Histogram functions
3
3
  * part of Finite State Entropy project
4
- * Copyright (c) 2013-2021, Yann Collet, Facebook, Inc.
4
+ * Copyright (c) Yann Collet, Facebook, Inc.
5
5
  *
6
6
  * You can contact the author at :
7
7
  * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
@@ -1,6 +1,6 @@
1
1
  /* ******************************************************************
2
2
  * Huffman encoder, part of New Generation Entropy library
3
- * Copyright (c) 2013-2021, Yann Collet, Facebook, Inc.
3
+ * Copyright (c) Yann Collet, Facebook, Inc.
4
4
  *
5
5
  * You can contact the author at :
6
6
  * - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
@@ -59,7 +59,15 @@ unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxS
59
59
  * Note : all elements within weightTable are supposed to be <= HUF_TABLELOG_MAX.
60
60
  */
61
61
  #define MAX_FSE_TABLELOG_FOR_HUFF_HEADER 6
62
- static size_t HUF_compressWeights (void* dst, size_t dstSize, const void* weightTable, size_t wtSize)
62
+
63
+ typedef struct {
64
+ FSE_CTable CTable[FSE_CTABLE_SIZE_U32(MAX_FSE_TABLELOG_FOR_HUFF_HEADER, HUF_TABLELOG_MAX)];
65
+ U32 scratchBuffer[FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(HUF_TABLELOG_MAX, MAX_FSE_TABLELOG_FOR_HUFF_HEADER)];
66
+ unsigned count[HUF_TABLELOG_MAX+1];
67
+ S16 norm[HUF_TABLELOG_MAX+1];
68
+ } HUF_CompressWeightsWksp;
69
+
70
+ static size_t HUF_compressWeights(void* dst, size_t dstSize, const void* weightTable, size_t wtSize, void* workspace, size_t workspaceSize)
63
71
  {
64
72
  BYTE* const ostart = (BYTE*) dst;
65
73
  BYTE* op = ostart;
@@ -67,33 +75,30 @@ static size_t HUF_compressWeights (void* dst, size_t dstSize, const void* weight
67
75
 
68
76
  unsigned maxSymbolValue = HUF_TABLELOG_MAX;
69
77
  U32 tableLog = MAX_FSE_TABLELOG_FOR_HUFF_HEADER;
78
+ HUF_CompressWeightsWksp* wksp = (HUF_CompressWeightsWksp*)workspace;
70
79
 
71
- FSE_CTable CTable[FSE_CTABLE_SIZE_U32(MAX_FSE_TABLELOG_FOR_HUFF_HEADER, HUF_TABLELOG_MAX)];
72
- U32 scratchBuffer[FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(HUF_TABLELOG_MAX, MAX_FSE_TABLELOG_FOR_HUFF_HEADER)];
73
-
74
- unsigned count[HUF_TABLELOG_MAX+1];
75
- S16 norm[HUF_TABLELOG_MAX+1];
80
+ if (workspaceSize < sizeof(HUF_CompressWeightsWksp)) return ERROR(GENERIC);
76
81
 
77
82
  /* init conditions */
78
83
  if (wtSize <= 1) return 0; /* Not compressible */
79
84
 
80
85
  /* Scan input and build symbol stats */
81
- { unsigned const maxCount = HIST_count_simple(count, &maxSymbolValue, weightTable, wtSize); /* never fails */
86
+ { unsigned const maxCount = HIST_count_simple(wksp->count, &maxSymbolValue, weightTable, wtSize); /* never fails */
82
87
  if (maxCount == wtSize) return 1; /* only a single symbol in src : rle */
83
88
  if (maxCount == 1) return 0; /* each symbol present maximum once => not compressible */
84
89
  }
85
90
 
86
91
  tableLog = FSE_optimalTableLog(tableLog, wtSize, maxSymbolValue);
87
- CHECK_F( FSE_normalizeCount(norm, tableLog, count, wtSize, maxSymbolValue, /* useLowProbCount */ 0) );
92
+ CHECK_F( FSE_normalizeCount(wksp->norm, tableLog, wksp->count, wtSize, maxSymbolValue, /* useLowProbCount */ 0) );
88
93
 
89
94
  /* Write table description header */
90
- { CHECK_V_F(hSize, FSE_writeNCount(op, (size_t)(oend-op), norm, maxSymbolValue, tableLog) );
95
+ { CHECK_V_F(hSize, FSE_writeNCount(op, (size_t)(oend-op), wksp->norm, maxSymbolValue, tableLog) );
91
96
  op += hSize;
92
97
  }
93
98
 
94
99
  /* Compress */
95
- CHECK_F( FSE_buildCTable_wksp(CTable, norm, maxSymbolValue, tableLog, scratchBuffer, sizeof(scratchBuffer)) );
96
- { CHECK_V_F(cSize, FSE_compress_usingCTable(op, (size_t)(oend - op), weightTable, wtSize, CTable) );
100
+ CHECK_F( FSE_buildCTable_wksp(wksp->CTable, wksp->norm, maxSymbolValue, tableLog, wksp->scratchBuffer, sizeof(wksp->scratchBuffer)) );
101
+ { CHECK_V_F(cSize, FSE_compress_usingCTable(op, (size_t)(oend - op), weightTable, wtSize, wksp->CTable) );
97
102
  if (cSize == 0) return 0; /* not enough space for compressed data */
98
103
  op += cSize;
99
104
  }
@@ -102,29 +107,33 @@ static size_t HUF_compressWeights (void* dst, size_t dstSize, const void* weight
102
107
  }
103
108
 
104
109
 
105
- /*! HUF_writeCTable() :
106
- `CTable` : Huffman tree to save, using huf representation.
107
- @return : size of saved CTable */
108
- size_t HUF_writeCTable (void* dst, size_t maxDstSize,
109
- const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog)
110
- {
110
+ typedef struct {
111
+ HUF_CompressWeightsWksp wksp;
111
112
  BYTE bitsToWeight[HUF_TABLELOG_MAX + 1]; /* precomputed conversion table */
112
113
  BYTE huffWeight[HUF_SYMBOLVALUE_MAX];
114
+ } HUF_WriteCTableWksp;
115
+
116
+ size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize,
117
+ const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog,
118
+ void* workspace, size_t workspaceSize)
119
+ {
113
120
  BYTE* op = (BYTE*)dst;
114
121
  U32 n;
122
+ HUF_WriteCTableWksp* wksp = (HUF_WriteCTableWksp*)workspace;
115
123
 
116
- /* check conditions */
124
+ /* check conditions */
125
+ if (workspaceSize < sizeof(HUF_WriteCTableWksp)) return ERROR(GENERIC);
117
126
  if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge);
118
127
 
119
128
  /* convert to weight */
120
- bitsToWeight[0] = 0;
129
+ wksp->bitsToWeight[0] = 0;
121
130
  for (n=1; n<huffLog+1; n++)
122
- bitsToWeight[n] = (BYTE)(huffLog + 1 - n);
131
+ wksp->bitsToWeight[n] = (BYTE)(huffLog + 1 - n);
123
132
  for (n=0; n<maxSymbolValue; n++)
124
- huffWeight[n] = bitsToWeight[CTable[n].nbBits];
133
+ wksp->huffWeight[n] = wksp->bitsToWeight[CTable[n].nbBits];
125
134
 
126
135
  /* attempt weights compression by FSE */
127
- { CHECK_V_F(hSize, HUF_compressWeights(op+1, maxDstSize-1, huffWeight, maxSymbolValue) );
136
+ { CHECK_V_F(hSize, HUF_compressWeights(op+1, maxDstSize-1, wksp->huffWeight, maxSymbolValue, &wksp->wksp, sizeof(wksp->wksp)) );
128
137
  if ((hSize>1) & (hSize < maxSymbolValue/2)) { /* FSE compressed */
129
138
  op[0] = (BYTE)hSize;
130
139
  return hSize+1;
@@ -134,12 +143,22 @@ size_t HUF_writeCTable (void* dst, size_t maxDstSize,
134
143
  if (maxSymbolValue > (256-128)) return ERROR(GENERIC); /* should not happen : likely means source cannot be compressed */
135
144
  if (((maxSymbolValue+1)/2) + 1 > maxDstSize) return ERROR(dstSize_tooSmall); /* not enough space within dst buffer */
136
145
  op[0] = (BYTE)(128 /*special case*/ + (maxSymbolValue-1));
137
- huffWeight[maxSymbolValue] = 0; /* to be sure it doesn't cause msan issue in final combination */
146
+ wksp->huffWeight[maxSymbolValue] = 0; /* to be sure it doesn't cause msan issue in final combination */
138
147
  for (n=0; n<maxSymbolValue; n+=2)
139
- op[(n/2)+1] = (BYTE)((huffWeight[n] << 4) + huffWeight[n+1]);
148
+ op[(n/2)+1] = (BYTE)((wksp->huffWeight[n] << 4) + wksp->huffWeight[n+1]);
140
149
  return ((maxSymbolValue+1)/2) + 1;
141
150
  }
142
151
 
152
+ /*! HUF_writeCTable() :
153
+ `CTable` : Huffman tree to save, using huf representation.
154
+ @return : size of saved CTable */
155
+ size_t HUF_writeCTable (void* dst, size_t maxDstSize,
156
+ const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog)
157
+ {
158
+ HUF_WriteCTableWksp wksp;
159
+ return HUF_writeCTable_wksp(dst, maxDstSize, CTable, maxSymbolValue, huffLog, &wksp, sizeof(wksp));
160
+ }
161
+
143
162
 
144
163
  size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned* hasZeroWeights)
145
164
  {
@@ -732,7 +751,10 @@ static size_t HUF_compressCTable_internal(
732
751
  typedef struct {
733
752
  unsigned count[HUF_SYMBOLVALUE_MAX + 1];
734
753
  HUF_CElt CTable[HUF_SYMBOLVALUE_MAX + 1];
735
- HUF_buildCTable_wksp_tables buildCTable_wksp;
754
+ union {
755
+ HUF_buildCTable_wksp_tables buildCTable_wksp;
756
+ HUF_WriteCTableWksp writeCTable_wksp;
757
+ } wksps;
736
758
  } HUF_compress_tables_t;
737
759
 
738
760
  /* HUF_compress_internal() :
@@ -795,7 +817,7 @@ HUF_compress_internal (void* dst, size_t dstSize,
795
817
  huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue);
796
818
  { size_t const maxBits = HUF_buildCTable_wksp(table->CTable, table->count,
797
819
  maxSymbolValue, huffLog,
798
- &table->buildCTable_wksp, sizeof(table->buildCTable_wksp));
820
+ &table->wksps.buildCTable_wksp, sizeof(table->wksps.buildCTable_wksp));
799
821
  CHECK_F(maxBits);
800
822
  huffLog = (U32)maxBits;
801
823
  /* Zero unused symbols in CTable, so we can check it for validity */
@@ -804,7 +826,8 @@ HUF_compress_internal (void* dst, size_t dstSize,
804
826
  }
805
827
 
806
828
  /* Write table description header */
807
- { CHECK_V_F(hSize, HUF_writeCTable (op, dstSize, table->CTable, maxSymbolValue, huffLog) );
829
+ { CHECK_V_F(hSize, HUF_writeCTable_wksp(op, dstSize, table->CTable, maxSymbolValue, huffLog,
830
+ &table->wksps.writeCTable_wksp, sizeof(table->wksps.writeCTable_wksp)) );
808
831
  /* Check if using previous huffman table is beneficial */
809
832
  if (repeat && *repeat != HUF_repeat_none) {
810
833
  size_t const oldSize = HUF_estimateCompressedSize(oldHufTable, table->count, maxSymbolValue);
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -14,7 +14,6 @@
14
14
  #include "../common/zstd_deps.h" /* INT_MAX, ZSTD_memset, ZSTD_memcpy */
15
15
  #include "../common/cpu.h"
16
16
  #include "../common/mem.h"
17
- #include "../common/zstd_trace.h"
18
17
  #include "hist.h" /* HIST_countFast_wksp */
19
18
  #define FSE_STATIC_LINKING_ONLY /* FSE_encodeSymbol */
20
19
  #include "../common/fse.h"
@@ -73,6 +72,10 @@ struct ZSTD_CDict_s {
73
72
  ZSTD_customMem customMem;
74
73
  U32 dictID;
75
74
  int compressionLevel; /* 0 indicates that advanced API was used to select CDict params */
75
+ ZSTD_useRowMatchFinderMode_e useRowMatchFinder; /* Indicates whether the CDict was created with params that would use
76
+ * row-based matchfinder. Unless the cdict is reloaded, we will use
77
+ * the same greedy/lazy matchfinder at compression time.
78
+ */
76
79
  }; /* typedef'd to ZSTD_CDict within "zstd.h" */
77
80
 
78
81
  ZSTD_CCtx* ZSTD_createCCtx(void)
@@ -203,6 +206,49 @@ size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs)
203
206
  /* private API call, for dictBuilder only */
204
207
  const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx) { return &(ctx->seqStore); }
205
208
 
209
+ /* Returns true if the strategy supports using a row based matchfinder */
210
+ static int ZSTD_rowMatchFinderSupported(const ZSTD_strategy strategy) {
211
+ return (strategy >= ZSTD_greedy && strategy <= ZSTD_lazy2);
212
+ }
213
+
214
+ /* Returns true if the strategy and useRowMatchFinder mode indicate that we will use the row based matchfinder
215
+ * for this compression.
216
+ */
217
+ static int ZSTD_rowMatchFinderUsed(const ZSTD_strategy strategy, const ZSTD_useRowMatchFinderMode_e mode) {
218
+ assert(mode != ZSTD_urm_auto);
219
+ return ZSTD_rowMatchFinderSupported(strategy) && (mode == ZSTD_urm_enableRowMatchFinder);
220
+ }
221
+
222
+ /* Returns row matchfinder usage enum given an initial mode and cParams */
223
+ static ZSTD_useRowMatchFinderMode_e ZSTD_resolveRowMatchFinderMode(ZSTD_useRowMatchFinderMode_e mode,
224
+ const ZSTD_compressionParameters* const cParams) {
225
+ #if !defined(ZSTD_NO_INTRINSICS) && (defined(__SSE2__) || defined(__ARM_NEON))
226
+ int const kHasSIMD128 = 1;
227
+ #else
228
+ int const kHasSIMD128 = 0;
229
+ #endif
230
+ if (mode != ZSTD_urm_auto) return mode; /* if requested enabled, but no SIMD, we still will use row matchfinder */
231
+ mode = ZSTD_urm_disableRowMatchFinder;
232
+ if (!ZSTD_rowMatchFinderSupported(cParams->strategy)) return mode;
233
+ if (kHasSIMD128) {
234
+ if (cParams->windowLog > 14) mode = ZSTD_urm_enableRowMatchFinder;
235
+ } else {
236
+ if (cParams->windowLog > 17) mode = ZSTD_urm_enableRowMatchFinder;
237
+ }
238
+ return mode;
239
+ }
240
+
241
+ /* Returns 1 if the arguments indicate that we should allocate a chainTable, 0 otherwise */
242
+ static int ZSTD_allocateChainTable(const ZSTD_strategy strategy,
243
+ const ZSTD_useRowMatchFinderMode_e useRowMatchFinder,
244
+ const U32 forDDSDict) {
245
+ assert(useRowMatchFinder != ZSTD_urm_auto);
246
+ /* We always should allocate a chaintable if we are allocating a matchstate for a DDS dictionary matchstate.
247
+ * We do not allocate a chaintable if we are using ZSTD_fast, or are using the row-based matchfinder.
248
+ */
249
+ return forDDSDict || ((strategy != ZSTD_fast) && !ZSTD_rowMatchFinderUsed(strategy, useRowMatchFinder));
250
+ }
251
+
206
252
  /* Returns 1 if compression parameters are such that we should
207
253
  * enable long distance matching (wlog >= 27, strategy >= btopt).
208
254
  * Returns 0 otherwise.
@@ -211,6 +257,14 @@ static U32 ZSTD_CParams_shouldEnableLdm(const ZSTD_compressionParameters* const
211
257
  return cParams->strategy >= ZSTD_btopt && cParams->windowLog >= 27;
212
258
  }
213
259
 
260
+ /* Returns 1 if compression parameters are such that we should
261
+ * enable blockSplitter (wlog >= 17, strategy >= btopt).
262
+ * Returns 0 otherwise.
263
+ */
264
+ static U32 ZSTD_CParams_useBlockSplitter(const ZSTD_compressionParameters* const cParams) {
265
+ return cParams->strategy >= ZSTD_btopt && cParams->windowLog >= 17;
266
+ }
267
+
214
268
  static ZSTD_CCtx_params ZSTD_makeCCtxParamsFromCParams(
215
269
  ZSTD_compressionParameters cParams)
216
270
  {
@@ -219,6 +273,7 @@ static ZSTD_CCtx_params ZSTD_makeCCtxParamsFromCParams(
219
273
  ZSTD_CCtxParams_init(&cctxParams, ZSTD_CLEVEL_DEFAULT);
220
274
  cctxParams.cParams = cParams;
221
275
 
276
+ /* Adjust advanced params according to cParams */
222
277
  if (ZSTD_CParams_shouldEnableLdm(&cParams)) {
223
278
  DEBUGLOG(4, "ZSTD_makeCCtxParamsFromCParams(): Including LDM into cctx params");
224
279
  cctxParams.ldmParams.enableLdm = 1;
@@ -228,6 +283,12 @@ static ZSTD_CCtx_params ZSTD_makeCCtxParamsFromCParams(
228
283
  assert(cctxParams.ldmParams.hashRateLog < 32);
229
284
  }
230
285
 
286
+ if (ZSTD_CParams_useBlockSplitter(&cParams)) {
287
+ DEBUGLOG(4, "ZSTD_makeCCtxParamsFromCParams(): Including block splitting into cctx params");
288
+ cctxParams.splitBlocks = 1;
289
+ }
290
+
291
+ cctxParams.useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(cctxParams.useRowMatchFinder, &cParams);
231
292
  assert(!ZSTD_checkCParams(cParams));
232
293
  return cctxParams;
233
294
  }
@@ -286,6 +347,8 @@ static void ZSTD_CCtxParams_init_internal(ZSTD_CCtx_params* cctxParams, ZSTD_par
286
347
  * But, set it for tracing anyway.
287
348
  */
288
349
  cctxParams->compressionLevel = compressionLevel;
350
+ cctxParams->useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(cctxParams->useRowMatchFinder, &params->cParams);
351
+ DEBUGLOG(4, "ZSTD_CCtxParams_init_internal: useRowMatchFinder=%d", cctxParams->useRowMatchFinder);
289
352
  }
290
353
 
291
354
  size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_parameters params)
@@ -486,6 +549,21 @@ ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param)
486
549
  bounds.upperBound = 1;
487
550
  return bounds;
488
551
 
552
+ case ZSTD_c_splitBlocks:
553
+ bounds.lowerBound = 0;
554
+ bounds.upperBound = 1;
555
+ return bounds;
556
+
557
+ case ZSTD_c_useRowMatchFinder:
558
+ bounds.lowerBound = (int)ZSTD_urm_auto;
559
+ bounds.upperBound = (int)ZSTD_urm_enableRowMatchFinder;
560
+ return bounds;
561
+
562
+ case ZSTD_c_deterministicRefPrefix:
563
+ bounds.lowerBound = 0;
564
+ bounds.upperBound = 1;
565
+ return bounds;
566
+
489
567
  default:
490
568
  bounds.error = ERROR(parameter_unsupported);
491
569
  return bounds;
@@ -547,6 +625,9 @@ static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param)
547
625
  case ZSTD_c_stableOutBuffer:
548
626
  case ZSTD_c_blockDelimiters:
549
627
  case ZSTD_c_validateSequences:
628
+ case ZSTD_c_splitBlocks:
629
+ case ZSTD_c_useRowMatchFinder:
630
+ case ZSTD_c_deterministicRefPrefix:
550
631
  default:
551
632
  return 0;
552
633
  }
@@ -599,6 +680,9 @@ size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value)
599
680
  case ZSTD_c_stableOutBuffer:
600
681
  case ZSTD_c_blockDelimiters:
601
682
  case ZSTD_c_validateSequences:
683
+ case ZSTD_c_splitBlocks:
684
+ case ZSTD_c_useRowMatchFinder:
685
+ case ZSTD_c_deterministicRefPrefix:
602
686
  break;
603
687
 
604
688
  default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
@@ -810,6 +894,21 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
810
894
  CCtxParams->validateSequences = value;
811
895
  return CCtxParams->validateSequences;
812
896
 
897
+ case ZSTD_c_splitBlocks:
898
+ BOUNDCHECK(ZSTD_c_splitBlocks, value);
899
+ CCtxParams->splitBlocks = value;
900
+ return CCtxParams->splitBlocks;
901
+
902
+ case ZSTD_c_useRowMatchFinder:
903
+ BOUNDCHECK(ZSTD_c_useRowMatchFinder, value);
904
+ CCtxParams->useRowMatchFinder = (ZSTD_useRowMatchFinderMode_e)value;
905
+ return CCtxParams->useRowMatchFinder;
906
+
907
+ case ZSTD_c_deterministicRefPrefix:
908
+ BOUNDCHECK(ZSTD_c_deterministicRefPrefix, value);
909
+ CCtxParams->deterministicRefPrefix = !!value;
910
+ return CCtxParams->deterministicRefPrefix;
911
+
813
912
  default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
814
913
  }
815
914
  }
@@ -933,6 +1032,15 @@ size_t ZSTD_CCtxParams_getParameter(
933
1032
  case ZSTD_c_validateSequences :
934
1033
  *value = (int)CCtxParams->validateSequences;
935
1034
  break;
1035
+ case ZSTD_c_splitBlocks :
1036
+ *value = (int)CCtxParams->splitBlocks;
1037
+ break;
1038
+ case ZSTD_c_useRowMatchFinder :
1039
+ *value = (int)CCtxParams->useRowMatchFinder;
1040
+ break;
1041
+ case ZSTD_c_deterministicRefPrefix:
1042
+ *value = (int)CCtxParams->deterministicRefPrefix;
1043
+ break;
936
1044
  default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
937
1045
  }
938
1046
  return 0;
@@ -1299,9 +1407,14 @@ ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams(
1299
1407
 
1300
1408
  static size_t
1301
1409
  ZSTD_sizeof_matchState(const ZSTD_compressionParameters* const cParams,
1410
+ const ZSTD_useRowMatchFinderMode_e useRowMatchFinder,
1411
+ const U32 enableDedicatedDictSearch,
1302
1412
  const U32 forCCtx)
1303
1413
  {
1304
- size_t const chainSize = (cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cParams->chainLog);
1414
+ /* chain table size should be 0 for fast or row-hash strategies */
1415
+ size_t const chainSize = ZSTD_allocateChainTable(cParams->strategy, useRowMatchFinder, enableDedicatedDictSearch && !forCCtx)
1416
+ ? ((size_t)1 << cParams->chainLog)
1417
+ : 0;
1305
1418
  size_t const hSize = ((size_t)1) << cParams->hashLog;
1306
1419
  U32 const hashLog3 = (forCCtx && cParams->minMatch==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0;
1307
1420
  size_t const h3Size = hashLog3 ? ((size_t)1) << hashLog3 : 0;
@@ -1311,24 +1424,34 @@ ZSTD_sizeof_matchState(const ZSTD_compressionParameters* const cParams,
1311
1424
  + hSize * sizeof(U32)
1312
1425
  + h3Size * sizeof(U32);
1313
1426
  size_t const optPotentialSpace =
1314
- ZSTD_cwksp_alloc_size((MaxML+1) * sizeof(U32))
1315
- + ZSTD_cwksp_alloc_size((MaxLL+1) * sizeof(U32))
1316
- + ZSTD_cwksp_alloc_size((MaxOff+1) * sizeof(U32))
1317
- + ZSTD_cwksp_alloc_size((1<<Litbits) * sizeof(U32))
1318
- + ZSTD_cwksp_alloc_size((ZSTD_OPT_NUM+1) * sizeof(ZSTD_match_t))
1319
- + ZSTD_cwksp_alloc_size((ZSTD_OPT_NUM+1) * sizeof(ZSTD_optimal_t));
1427
+ ZSTD_cwksp_aligned_alloc_size((MaxML+1) * sizeof(U32))
1428
+ + ZSTD_cwksp_aligned_alloc_size((MaxLL+1) * sizeof(U32))
1429
+ + ZSTD_cwksp_aligned_alloc_size((MaxOff+1) * sizeof(U32))
1430
+ + ZSTD_cwksp_aligned_alloc_size((1<<Litbits) * sizeof(U32))
1431
+ + ZSTD_cwksp_aligned_alloc_size((ZSTD_OPT_NUM+1) * sizeof(ZSTD_match_t))
1432
+ + ZSTD_cwksp_aligned_alloc_size((ZSTD_OPT_NUM+1) * sizeof(ZSTD_optimal_t));
1433
+ size_t const lazyAdditionalSpace = ZSTD_rowMatchFinderUsed(cParams->strategy, useRowMatchFinder)
1434
+ ? ZSTD_cwksp_aligned_alloc_size(hSize*sizeof(U16))
1435
+ : 0;
1320
1436
  size_t const optSpace = (forCCtx && (cParams->strategy >= ZSTD_btopt))
1321
1437
  ? optPotentialSpace
1322
1438
  : 0;
1439
+ size_t const slackSpace = ZSTD_cwksp_slack_space_required();
1440
+
1441
+ /* tables are guaranteed to be sized in multiples of 64 bytes (or 16 uint32_t) */
1442
+ ZSTD_STATIC_ASSERT(ZSTD_HASHLOG_MIN >= 4 && ZSTD_WINDOWLOG_MIN >= 4 && ZSTD_CHAINLOG_MIN >= 4);
1443
+ assert(useRowMatchFinder != ZSTD_urm_auto);
1444
+
1323
1445
  DEBUGLOG(4, "chainSize: %u - hSize: %u - h3Size: %u",
1324
1446
  (U32)chainSize, (U32)hSize, (U32)h3Size);
1325
- return tableSpace + optSpace;
1447
+ return tableSpace + optSpace + slackSpace + lazyAdditionalSpace;
1326
1448
  }
1327
1449
 
1328
1450
  static size_t ZSTD_estimateCCtxSize_usingCCtxParams_internal(
1329
1451
  const ZSTD_compressionParameters* cParams,
1330
1452
  const ldmParams_t* ldmParams,
1331
1453
  const int isStatic,
1454
+ const ZSTD_useRowMatchFinderMode_e useRowMatchFinder,
1332
1455
  const size_t buffInSize,
1333
1456
  const size_t buffOutSize,
1334
1457
  const U64 pledgedSrcSize)
@@ -1338,16 +1461,16 @@ static size_t ZSTD_estimateCCtxSize_usingCCtxParams_internal(
1338
1461
  U32 const divider = (cParams->minMatch==3) ? 3 : 4;
1339
1462
  size_t const maxNbSeq = blockSize / divider;
1340
1463
  size_t const tokenSpace = ZSTD_cwksp_alloc_size(WILDCOPY_OVERLENGTH + blockSize)
1341
- + ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(seqDef))
1464
+ + ZSTD_cwksp_aligned_alloc_size(maxNbSeq * sizeof(seqDef))
1342
1465
  + 3 * ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(BYTE));
1343
1466
  size_t const entropySpace = ZSTD_cwksp_alloc_size(ENTROPY_WORKSPACE_SIZE);
1344
1467
  size_t const blockStateSpace = 2 * ZSTD_cwksp_alloc_size(sizeof(ZSTD_compressedBlockState_t));
1345
- size_t const matchStateSize = ZSTD_sizeof_matchState(cParams, /* forCCtx */ 1);
1468
+ size_t const matchStateSize = ZSTD_sizeof_matchState(cParams, useRowMatchFinder, /* enableDedicatedDictSearch */ 0, /* forCCtx */ 1);
1346
1469
 
1347
1470
  size_t const ldmSpace = ZSTD_ldm_getTableSize(*ldmParams);
1348
1471
  size_t const maxNbLdmSeq = ZSTD_ldm_getMaxNbSeq(*ldmParams, blockSize);
1349
1472
  size_t const ldmSeqSpace = ldmParams->enableLdm ?
1350
- ZSTD_cwksp_alloc_size(maxNbLdmSeq * sizeof(rawSeq)) : 0;
1473
+ ZSTD_cwksp_aligned_alloc_size(maxNbLdmSeq * sizeof(rawSeq)) : 0;
1351
1474
 
1352
1475
 
1353
1476
  size_t const bufferSpace = ZSTD_cwksp_alloc_size(buffInSize)
@@ -1373,25 +1496,45 @@ size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params)
1373
1496
  {
1374
1497
  ZSTD_compressionParameters const cParams =
1375
1498
  ZSTD_getCParamsFromCCtxParams(params, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict);
1499
+ ZSTD_useRowMatchFinderMode_e const useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(params->useRowMatchFinder,
1500
+ &cParams);
1376
1501
 
1377
1502
  RETURN_ERROR_IF(params->nbWorkers > 0, GENERIC, "Estimate CCtx size is supported for single-threaded compression only.");
1378
1503
  /* estimateCCtxSize is for one-shot compression. So no buffers should
1379
1504
  * be needed. However, we still allocate two 0-sized buffers, which can
1380
1505
  * take space under ASAN. */
1381
1506
  return ZSTD_estimateCCtxSize_usingCCtxParams_internal(
1382
- &cParams, &params->ldmParams, 1, 0, 0, ZSTD_CONTENTSIZE_UNKNOWN);
1507
+ &cParams, &params->ldmParams, 1, useRowMatchFinder, 0, 0, ZSTD_CONTENTSIZE_UNKNOWN);
1383
1508
  }
1384
1509
 
1385
1510
  size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams)
1386
1511
  {
1387
- ZSTD_CCtx_params const params = ZSTD_makeCCtxParamsFromCParams(cParams);
1388
- return ZSTD_estimateCCtxSize_usingCCtxParams(&params);
1512
+ ZSTD_CCtx_params initialParams = ZSTD_makeCCtxParamsFromCParams(cParams);
1513
+ if (ZSTD_rowMatchFinderSupported(cParams.strategy)) {
1514
+ /* Pick bigger of not using and using row-based matchfinder for greedy and lazy strategies */
1515
+ size_t noRowCCtxSize;
1516
+ size_t rowCCtxSize;
1517
+ initialParams.useRowMatchFinder = ZSTD_urm_disableRowMatchFinder;
1518
+ noRowCCtxSize = ZSTD_estimateCCtxSize_usingCCtxParams(&initialParams);
1519
+ initialParams.useRowMatchFinder = ZSTD_urm_enableRowMatchFinder;
1520
+ rowCCtxSize = ZSTD_estimateCCtxSize_usingCCtxParams(&initialParams);
1521
+ return MAX(noRowCCtxSize, rowCCtxSize);
1522
+ } else {
1523
+ return ZSTD_estimateCCtxSize_usingCCtxParams(&initialParams);
1524
+ }
1389
1525
  }
1390
1526
 
1391
1527
  static size_t ZSTD_estimateCCtxSize_internal(int compressionLevel)
1392
1528
  {
1393
- ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict);
1394
- return ZSTD_estimateCCtxSize_usingCParams(cParams);
1529
+ int tier = 0;
1530
+ size_t largestSize = 0;
1531
+ static const unsigned long long srcSizeTiers[4] = {16 KB, 128 KB, 256 KB, ZSTD_CONTENTSIZE_UNKNOWN};
1532
+ for (; tier < 4; ++tier) {
1533
+ /* Choose the set of cParams for a given level across all srcSizes that give the largest cctxSize */
1534
+ ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, srcSizeTiers[tier], 0, ZSTD_cpm_noAttachDict);
1535
+ largestSize = MAX(ZSTD_estimateCCtxSize_usingCParams(cParams), largestSize);
1536
+ }
1537
+ return largestSize;
1395
1538
  }
1396
1539
 
1397
1540
  size_t ZSTD_estimateCCtxSize(int compressionLevel)
@@ -1399,6 +1542,7 @@ size_t ZSTD_estimateCCtxSize(int compressionLevel)
1399
1542
  int level;
1400
1543
  size_t memBudget = 0;
1401
1544
  for (level=MIN(compressionLevel, 1); level<=compressionLevel; level++) {
1545
+ /* Ensure monotonically increasing memory usage as compression level increases */
1402
1546
  size_t const newMB = ZSTD_estimateCCtxSize_internal(level);
1403
1547
  if (newMB > memBudget) memBudget = newMB;
1404
1548
  }
@@ -1417,17 +1561,29 @@ size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params)
1417
1561
  size_t const outBuffSize = (params->outBufferMode == ZSTD_bm_buffered)
1418
1562
  ? ZSTD_compressBound(blockSize) + 1
1419
1563
  : 0;
1564
+ ZSTD_useRowMatchFinderMode_e const useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(params->useRowMatchFinder, &params->cParams);
1420
1565
 
1421
1566
  return ZSTD_estimateCCtxSize_usingCCtxParams_internal(
1422
- &cParams, &params->ldmParams, 1, inBuffSize, outBuffSize,
1567
+ &cParams, &params->ldmParams, 1, useRowMatchFinder, inBuffSize, outBuffSize,
1423
1568
  ZSTD_CONTENTSIZE_UNKNOWN);
1424
1569
  }
1425
1570
  }
1426
1571
 
1427
1572
  size_t ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams)
1428
1573
  {
1429
- ZSTD_CCtx_params const params = ZSTD_makeCCtxParamsFromCParams(cParams);
1430
- return ZSTD_estimateCStreamSize_usingCCtxParams(&params);
1574
+ ZSTD_CCtx_params initialParams = ZSTD_makeCCtxParamsFromCParams(cParams);
1575
+ if (ZSTD_rowMatchFinderSupported(cParams.strategy)) {
1576
+ /* Pick bigger of not using and using row-based matchfinder for greedy and lazy strategies */
1577
+ size_t noRowCCtxSize;
1578
+ size_t rowCCtxSize;
1579
+ initialParams.useRowMatchFinder = ZSTD_urm_disableRowMatchFinder;
1580
+ noRowCCtxSize = ZSTD_estimateCStreamSize_usingCCtxParams(&initialParams);
1581
+ initialParams.useRowMatchFinder = ZSTD_urm_enableRowMatchFinder;
1582
+ rowCCtxSize = ZSTD_estimateCStreamSize_usingCCtxParams(&initialParams);
1583
+ return MAX(noRowCCtxSize, rowCCtxSize);
1584
+ } else {
1585
+ return ZSTD_estimateCStreamSize_usingCCtxParams(&initialParams);
1586
+ }
1431
1587
  }
1432
1588
 
1433
1589
  static size_t ZSTD_estimateCStreamSize_internal(int compressionLevel)
@@ -1552,20 +1708,27 @@ typedef enum {
1552
1708
  ZSTD_resetTarget_CCtx
1553
1709
  } ZSTD_resetTarget_e;
1554
1710
 
1711
+
1555
1712
  static size_t
1556
1713
  ZSTD_reset_matchState(ZSTD_matchState_t* ms,
1557
1714
  ZSTD_cwksp* ws,
1558
1715
  const ZSTD_compressionParameters* cParams,
1716
+ const ZSTD_useRowMatchFinderMode_e useRowMatchFinder,
1559
1717
  const ZSTD_compResetPolicy_e crp,
1560
1718
  const ZSTD_indexResetPolicy_e forceResetIndex,
1561
1719
  const ZSTD_resetTarget_e forWho)
1562
1720
  {
1563
- size_t const chainSize = (cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cParams->chainLog);
1721
+ /* disable chain table allocation for fast or row-based strategies */
1722
+ size_t const chainSize = ZSTD_allocateChainTable(cParams->strategy, useRowMatchFinder,
1723
+ ms->dedicatedDictSearch && (forWho == ZSTD_resetTarget_CDict))
1724
+ ? ((size_t)1 << cParams->chainLog)
1725
+ : 0;
1564
1726
  size_t const hSize = ((size_t)1) << cParams->hashLog;
1565
1727
  U32 const hashLog3 = ((forWho == ZSTD_resetTarget_CCtx) && cParams->minMatch==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0;
1566
1728
  size_t const h3Size = hashLog3 ? ((size_t)1) << hashLog3 : 0;
1567
1729
 
1568
1730
  DEBUGLOG(4, "reset indices : %u", forceResetIndex == ZSTDirp_reset);
1731
+ assert(useRowMatchFinder != ZSTD_urm_auto);
1569
1732
  if (forceResetIndex == ZSTDirp_reset) {
1570
1733
  ZSTD_window_init(&ms->window);
1571
1734
  ZSTD_cwksp_mark_tables_dirty(ws);
@@ -1604,11 +1767,23 @@ ZSTD_reset_matchState(ZSTD_matchState_t* ms,
1604
1767
  ms->opt.priceTable = (ZSTD_optimal_t*)ZSTD_cwksp_reserve_aligned(ws, (ZSTD_OPT_NUM+1) * sizeof(ZSTD_optimal_t));
1605
1768
  }
1606
1769
 
1770
+ if (ZSTD_rowMatchFinderUsed(cParams->strategy, useRowMatchFinder)) {
1771
+ { /* Row match finder needs an additional table of hashes ("tags") */
1772
+ size_t const tagTableSize = hSize*sizeof(U16);
1773
+ ms->tagTable = (U16*)ZSTD_cwksp_reserve_aligned(ws, tagTableSize);
1774
+ if (ms->tagTable) ZSTD_memset(ms->tagTable, 0, tagTableSize);
1775
+ }
1776
+ { /* Switch to 32-entry rows if searchLog is 5 (or more) */
1777
+ U32 const rowLog = cParams->searchLog < 5 ? 4 : 5;
1778
+ assert(cParams->hashLog > rowLog);
1779
+ ms->rowHashLog = cParams->hashLog - rowLog;
1780
+ }
1781
+ }
1782
+
1607
1783
  ms->cParams = *cParams;
1608
1784
 
1609
1785
  RETURN_ERROR_IF(ZSTD_cwksp_reserve_failed(ws), memory_allocation,
1610
1786
  "failed a workspace allocation in ZSTD_reset_matchState");
1611
-
1612
1787
  return 0;
1613
1788
  }
1614
1789
 
@@ -1625,61 +1800,85 @@ static int ZSTD_indexTooCloseToMax(ZSTD_window_t w)
1625
1800
  return (size_t)(w.nextSrc - w.base) > (ZSTD_CURRENT_MAX - ZSTD_INDEXOVERFLOW_MARGIN);
1626
1801
  }
1627
1802
 
1803
+ /** ZSTD_dictTooBig():
1804
+ * When dictionaries are larger than ZSTD_CHUNKSIZE_MAX they can't be loaded in
1805
+ * one go generically. So we ensure that in that case we reset the tables to zero,
1806
+ * so that we can load as much of the dictionary as possible.
1807
+ */
1808
+ static int ZSTD_dictTooBig(size_t const loadedDictSize)
1809
+ {
1810
+ return loadedDictSize > ZSTD_CHUNKSIZE_MAX;
1811
+ }
1812
+
1628
1813
  /*! ZSTD_resetCCtx_internal() :
1629
- note : `params` are assumed fully validated at this stage */
1814
+ * @param loadedDictSize The size of the dictionary to be loaded
1815
+ * into the context, if any. If no dictionary is used, or the
1816
+ * dictionary is being attached / copied, then pass 0.
1817
+ * note : `params` are assumed fully validated at this stage.
1818
+ */
1630
1819
  static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
1631
- ZSTD_CCtx_params params,
1820
+ ZSTD_CCtx_params const* params,
1632
1821
  U64 const pledgedSrcSize,
1822
+ size_t const loadedDictSize,
1633
1823
  ZSTD_compResetPolicy_e const crp,
1634
1824
  ZSTD_buffered_policy_e const zbuff)
1635
1825
  {
1636
1826
  ZSTD_cwksp* const ws = &zc->workspace;
1637
- DEBUGLOG(4, "ZSTD_resetCCtx_internal: pledgedSrcSize=%u, wlog=%u",
1638
- (U32)pledgedSrcSize, params.cParams.windowLog);
1639
- assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams)));
1827
+ DEBUGLOG(4, "ZSTD_resetCCtx_internal: pledgedSrcSize=%u, wlog=%u, useRowMatchFinder=%d",
1828
+ (U32)pledgedSrcSize, params->cParams.windowLog, (int)params->useRowMatchFinder);
1829
+ assert(!ZSTD_isError(ZSTD_checkCParams(params->cParams)));
1640
1830
 
1641
1831
  zc->isFirstBlock = 1;
1642
1832
 
1643
- if (params.ldmParams.enableLdm) {
1833
+ /* Set applied params early so we can modify them for LDM,
1834
+ * and point params at the applied params.
1835
+ */
1836
+ zc->appliedParams = *params;
1837
+ params = &zc->appliedParams;
1838
+
1839
+ assert(params->useRowMatchFinder != ZSTD_urm_auto);
1840
+ if (params->ldmParams.enableLdm) {
1644
1841
  /* Adjust long distance matching parameters */
1645
- ZSTD_ldm_adjustParameters(&params.ldmParams, &params.cParams);
1646
- assert(params.ldmParams.hashLog >= params.ldmParams.bucketSizeLog);
1647
- assert(params.ldmParams.hashRateLog < 32);
1842
+ ZSTD_ldm_adjustParameters(&zc->appliedParams.ldmParams, &params->cParams);
1843
+ assert(params->ldmParams.hashLog >= params->ldmParams.bucketSizeLog);
1844
+ assert(params->ldmParams.hashRateLog < 32);
1648
1845
  }
1649
1846
 
1650
- { size_t const windowSize = MAX(1, (size_t)MIN(((U64)1 << params.cParams.windowLog), pledgedSrcSize));
1847
+ { size_t const windowSize = MAX(1, (size_t)MIN(((U64)1 << params->cParams.windowLog), pledgedSrcSize));
1651
1848
  size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize);
1652
- U32 const divider = (params.cParams.minMatch==3) ? 3 : 4;
1849
+ U32 const divider = (params->cParams.minMatch==3) ? 3 : 4;
1653
1850
  size_t const maxNbSeq = blockSize / divider;
1654
- size_t const buffOutSize = (zbuff == ZSTDb_buffered && params.outBufferMode == ZSTD_bm_buffered)
1851
+ size_t const buffOutSize = (zbuff == ZSTDb_buffered && params->outBufferMode == ZSTD_bm_buffered)
1655
1852
  ? ZSTD_compressBound(blockSize) + 1
1656
1853
  : 0;
1657
- size_t const buffInSize = (zbuff == ZSTDb_buffered && params.inBufferMode == ZSTD_bm_buffered)
1854
+ size_t const buffInSize = (zbuff == ZSTDb_buffered && params->inBufferMode == ZSTD_bm_buffered)
1658
1855
  ? windowSize + blockSize
1659
1856
  : 0;
1660
- size_t const maxNbLdmSeq = ZSTD_ldm_getMaxNbSeq(params.ldmParams, blockSize);
1857
+ size_t const maxNbLdmSeq = ZSTD_ldm_getMaxNbSeq(params->ldmParams, blockSize);
1661
1858
 
1662
1859
  int const indexTooClose = ZSTD_indexTooCloseToMax(zc->blockState.matchState.window);
1860
+ int const dictTooBig = ZSTD_dictTooBig(loadedDictSize);
1663
1861
  ZSTD_indexResetPolicy_e needsIndexReset =
1664
- (!indexTooClose && zc->initialized) ? ZSTDirp_continue : ZSTDirp_reset;
1862
+ (indexTooClose || dictTooBig || !zc->initialized) ? ZSTDirp_reset : ZSTDirp_continue;
1665
1863
 
1666
1864
  size_t const neededSpace =
1667
1865
  ZSTD_estimateCCtxSize_usingCCtxParams_internal(
1668
- &params.cParams, &params.ldmParams, zc->staticSize != 0,
1866
+ &params->cParams, &params->ldmParams, zc->staticSize != 0, params->useRowMatchFinder,
1669
1867
  buffInSize, buffOutSize, pledgedSrcSize);
1868
+ int resizeWorkspace;
1869
+
1670
1870
  FORWARD_IF_ERROR(neededSpace, "cctx size estimate failed!");
1671
1871
 
1672
1872
  if (!zc->staticSize) ZSTD_cwksp_bump_oversized_duration(ws, 0);
1673
1873
 
1674
- /* Check if workspace is large enough, alloc a new one if needed */
1675
- {
1874
+ { /* Check if workspace is large enough, alloc a new one if needed */
1676
1875
  int const workspaceTooSmall = ZSTD_cwksp_sizeof(ws) < neededSpace;
1677
1876
  int const workspaceWasteful = ZSTD_cwksp_check_wasteful(ws, neededSpace);
1678
-
1877
+ resizeWorkspace = workspaceTooSmall || workspaceWasteful;
1679
1878
  DEBUGLOG(4, "Need %zu B workspace", neededSpace);
1680
1879
  DEBUGLOG(4, "windowSize: %zu - blockSize: %zu", windowSize, blockSize);
1681
1880
 
1682
- if (workspaceTooSmall || workspaceWasteful) {
1881
+ if (resizeWorkspace) {
1683
1882
  DEBUGLOG(4, "Resize workspaceSize from %zuKB to %zuKB",
1684
1883
  ZSTD_cwksp_sizeof(ws) >> 10,
1685
1884
  neededSpace >> 10);
@@ -1707,8 +1906,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
1707
1906
  ZSTD_cwksp_clear(ws);
1708
1907
 
1709
1908
  /* init params */
1710
- zc->appliedParams = params;
1711
- zc->blockState.matchState.cParams = params.cParams;
1909
+ zc->blockState.matchState.cParams = params->cParams;
1712
1910
  zc->pledgedSrcSizePlusOne = pledgedSrcSize+1;
1713
1911
  zc->consumedSrcSize = 0;
1714
1912
  zc->producedCSize = 0;
@@ -1739,11 +1937,11 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
1739
1937
  zc->outBuff = (char*)ZSTD_cwksp_reserve_buffer(ws, buffOutSize);
1740
1938
 
1741
1939
  /* ldm bucketOffsets table */
1742
- if (params.ldmParams.enableLdm) {
1940
+ if (params->ldmParams.enableLdm) {
1743
1941
  /* TODO: avoid memset? */
1744
1942
  size_t const numBuckets =
1745
- ((size_t)1) << (params.ldmParams.hashLog -
1746
- params.ldmParams.bucketSizeLog);
1943
+ ((size_t)1) << (params->ldmParams.hashLog -
1944
+ params->ldmParams.bucketSizeLog);
1747
1945
  zc->ldmState.bucketOffsets = ZSTD_cwksp_reserve_buffer(ws, numBuckets);
1748
1946
  ZSTD_memset(zc->ldmState.bucketOffsets, 0, numBuckets);
1749
1947
  }
@@ -1759,32 +1957,28 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
1759
1957
  FORWARD_IF_ERROR(ZSTD_reset_matchState(
1760
1958
  &zc->blockState.matchState,
1761
1959
  ws,
1762
- &params.cParams,
1960
+ &params->cParams,
1961
+ params->useRowMatchFinder,
1763
1962
  crp,
1764
1963
  needsIndexReset,
1765
1964
  ZSTD_resetTarget_CCtx), "");
1766
1965
 
1767
1966
  /* ldm hash table */
1768
- if (params.ldmParams.enableLdm) {
1967
+ if (params->ldmParams.enableLdm) {
1769
1968
  /* TODO: avoid memset? */
1770
- size_t const ldmHSize = ((size_t)1) << params.ldmParams.hashLog;
1969
+ size_t const ldmHSize = ((size_t)1) << params->ldmParams.hashLog;
1771
1970
  zc->ldmState.hashTable = (ldmEntry_t*)ZSTD_cwksp_reserve_aligned(ws, ldmHSize * sizeof(ldmEntry_t));
1772
1971
  ZSTD_memset(zc->ldmState.hashTable, 0, ldmHSize * sizeof(ldmEntry_t));
1773
1972
  zc->ldmSequences = (rawSeq*)ZSTD_cwksp_reserve_aligned(ws, maxNbLdmSeq * sizeof(rawSeq));
1774
1973
  zc->maxNbLdmSequences = maxNbLdmSeq;
1775
1974
 
1776
1975
  ZSTD_window_init(&zc->ldmState.window);
1777
- ZSTD_window_clear(&zc->ldmState.window);
1778
1976
  zc->ldmState.loadedDictEnd = 0;
1779
1977
  }
1780
1978
 
1781
- /* Due to alignment, when reusing a workspace, we can actually consume
1782
- * up to 3 extra bytes for alignment. See the comments in zstd_cwksp.h
1783
- */
1784
- assert(ZSTD_cwksp_used(ws) >= neededSpace &&
1785
- ZSTD_cwksp_used(ws) <= neededSpace + 3);
1786
-
1979
+ assert(ZSTD_cwksp_estimated_space_within_bounds(ws, neededSpace, resizeWorkspace));
1787
1980
  DEBUGLOG(3, "wksp: finished allocating, %zd bytes remain available", ZSTD_cwksp_available_space(ws));
1981
+
1788
1982
  zc->initialized = 1;
1789
1983
 
1790
1984
  return 0;
@@ -1840,6 +2034,8 @@ ZSTD_resetCCtx_byAttachingCDict(ZSTD_CCtx* cctx,
1840
2034
  U64 pledgedSrcSize,
1841
2035
  ZSTD_buffered_policy_e zbuff)
1842
2036
  {
2037
+ DEBUGLOG(4, "ZSTD_resetCCtx_byAttachingCDict() pledgedSrcSize=%llu",
2038
+ (unsigned long long)pledgedSrcSize);
1843
2039
  {
1844
2040
  ZSTD_compressionParameters adjusted_cdict_cParams = cdict->matchState.cParams;
1845
2041
  unsigned const windowLog = params.cParams.windowLog;
@@ -1855,7 +2051,9 @@ ZSTD_resetCCtx_byAttachingCDict(ZSTD_CCtx* cctx,
1855
2051
  params.cParams = ZSTD_adjustCParams_internal(adjusted_cdict_cParams, pledgedSrcSize,
1856
2052
  cdict->dictContentSize, ZSTD_cpm_attachDict);
1857
2053
  params.cParams.windowLog = windowLog;
1858
- FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize,
2054
+ params.useRowMatchFinder = cdict->useRowMatchFinder; /* cdict overrides */
2055
+ FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, &params, pledgedSrcSize,
2056
+ /* loadedDictSize */ 0,
1859
2057
  ZSTDcrp_makeClean, zbuff), "");
1860
2058
  assert(cctx->appliedParams.cParams.strategy == adjusted_cdict_cParams.strategy);
1861
2059
  }
@@ -1899,15 +2097,17 @@ static size_t ZSTD_resetCCtx_byCopyingCDict(ZSTD_CCtx* cctx,
1899
2097
  const ZSTD_compressionParameters *cdict_cParams = &cdict->matchState.cParams;
1900
2098
 
1901
2099
  assert(!cdict->matchState.dedicatedDictSearch);
1902
-
1903
- DEBUGLOG(4, "copying dictionary into context");
2100
+ DEBUGLOG(4, "ZSTD_resetCCtx_byCopyingCDict() pledgedSrcSize=%llu",
2101
+ (unsigned long long)pledgedSrcSize);
1904
2102
 
1905
2103
  { unsigned const windowLog = params.cParams.windowLog;
1906
2104
  assert(windowLog != 0);
1907
2105
  /* Copy only compression parameters related to tables. */
1908
2106
  params.cParams = *cdict_cParams;
1909
2107
  params.cParams.windowLog = windowLog;
1910
- FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize,
2108
+ params.useRowMatchFinder = cdict->useRowMatchFinder;
2109
+ FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, &params, pledgedSrcSize,
2110
+ /* loadedDictSize */ 0,
1911
2111
  ZSTDcrp_leaveDirty, zbuff), "");
1912
2112
  assert(cctx->appliedParams.cParams.strategy == cdict_cParams->strategy);
1913
2113
  assert(cctx->appliedParams.cParams.hashLog == cdict_cParams->hashLog);
@@ -1915,17 +2115,30 @@ static size_t ZSTD_resetCCtx_byCopyingCDict(ZSTD_CCtx* cctx,
1915
2115
  }
1916
2116
 
1917
2117
  ZSTD_cwksp_mark_tables_dirty(&cctx->workspace);
2118
+ assert(params.useRowMatchFinder != ZSTD_urm_auto);
1918
2119
 
1919
2120
  /* copy tables */
1920
- { size_t const chainSize = (cdict_cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cdict_cParams->chainLog);
2121
+ { size_t const chainSize = ZSTD_allocateChainTable(cdict_cParams->strategy, cdict->useRowMatchFinder, 0 /* DDS guaranteed disabled */)
2122
+ ? ((size_t)1 << cdict_cParams->chainLog)
2123
+ : 0;
1921
2124
  size_t const hSize = (size_t)1 << cdict_cParams->hashLog;
1922
2125
 
1923
2126
  ZSTD_memcpy(cctx->blockState.matchState.hashTable,
1924
2127
  cdict->matchState.hashTable,
1925
2128
  hSize * sizeof(U32));
1926
- ZSTD_memcpy(cctx->blockState.matchState.chainTable,
2129
+ /* Do not copy cdict's chainTable if cctx has parameters such that it would not use chainTable */
2130
+ if (ZSTD_allocateChainTable(cctx->appliedParams.cParams.strategy, cctx->appliedParams.useRowMatchFinder, 0 /* forDDSDict */)) {
2131
+ ZSTD_memcpy(cctx->blockState.matchState.chainTable,
1927
2132
  cdict->matchState.chainTable,
1928
2133
  chainSize * sizeof(U32));
2134
+ }
2135
+ /* copy tag table */
2136
+ if (ZSTD_rowMatchFinderUsed(cdict_cParams->strategy, cdict->useRowMatchFinder)) {
2137
+ size_t const tagTableSize = hSize*sizeof(U16);
2138
+ ZSTD_memcpy(cctx->blockState.matchState.tagTable,
2139
+ cdict->matchState.tagTable,
2140
+ tagTableSize);
2141
+ }
1929
2142
  }
1930
2143
 
1931
2144
  /* Zero the hashTable3, since the cdict never fills it */
@@ -1989,16 +2202,18 @@ static size_t ZSTD_copyCCtx_internal(ZSTD_CCtx* dstCCtx,
1989
2202
  U64 pledgedSrcSize,
1990
2203
  ZSTD_buffered_policy_e zbuff)
1991
2204
  {
1992
- DEBUGLOG(5, "ZSTD_copyCCtx_internal");
1993
2205
  RETURN_ERROR_IF(srcCCtx->stage!=ZSTDcs_init, stage_wrong,
1994
2206
  "Can't copy a ctx that's not in init stage.");
1995
-
2207
+ DEBUGLOG(5, "ZSTD_copyCCtx_internal");
1996
2208
  ZSTD_memcpy(&dstCCtx->customMem, &srcCCtx->customMem, sizeof(ZSTD_customMem));
1997
2209
  { ZSTD_CCtx_params params = dstCCtx->requestedParams;
1998
2210
  /* Copy only compression parameters related to tables. */
1999
2211
  params.cParams = srcCCtx->appliedParams.cParams;
2212
+ assert(srcCCtx->appliedParams.useRowMatchFinder != ZSTD_urm_auto);
2213
+ params.useRowMatchFinder = srcCCtx->appliedParams.useRowMatchFinder;
2000
2214
  params.fParams = fParams;
2001
- ZSTD_resetCCtx_internal(dstCCtx, params, pledgedSrcSize,
2215
+ ZSTD_resetCCtx_internal(dstCCtx, &params, pledgedSrcSize,
2216
+ /* loadedDictSize */ 0,
2002
2217
  ZSTDcrp_leaveDirty, zbuff);
2003
2218
  assert(dstCCtx->appliedParams.cParams.windowLog == srcCCtx->appliedParams.cParams.windowLog);
2004
2219
  assert(dstCCtx->appliedParams.cParams.strategy == srcCCtx->appliedParams.cParams.strategy);
@@ -2010,7 +2225,11 @@ static size_t ZSTD_copyCCtx_internal(ZSTD_CCtx* dstCCtx,
2010
2225
  ZSTD_cwksp_mark_tables_dirty(&dstCCtx->workspace);
2011
2226
 
2012
2227
  /* copy tables */
2013
- { size_t const chainSize = (srcCCtx->appliedParams.cParams.strategy == ZSTD_fast) ? 0 : ((size_t)1 << srcCCtx->appliedParams.cParams.chainLog);
2228
+ { size_t const chainSize = ZSTD_allocateChainTable(srcCCtx->appliedParams.cParams.strategy,
2229
+ srcCCtx->appliedParams.useRowMatchFinder,
2230
+ 0 /* forDDSDict */)
2231
+ ? ((size_t)1 << srcCCtx->appliedParams.cParams.chainLog)
2232
+ : 0;
2014
2233
  size_t const hSize = (size_t)1 << srcCCtx->appliedParams.cParams.hashLog;
2015
2234
  int const h3log = srcCCtx->blockState.matchState.hashLog3;
2016
2235
  size_t const h3Size = h3log ? ((size_t)1 << h3log) : 0;
@@ -2124,7 +2343,7 @@ static void ZSTD_reduceIndex (ZSTD_matchState_t* ms, ZSTD_CCtx_params const* par
2124
2343
  ZSTD_reduceTable(ms->hashTable, hSize, reducerValue);
2125
2344
  }
2126
2345
 
2127
- if (params->cParams.strategy != ZSTD_fast) {
2346
+ if (ZSTD_allocateChainTable(params->cParams.strategy, params->useRowMatchFinder, (U32)ms->dedicatedDictSearch)) {
2128
2347
  U32 const chainSize = (U32)1 << params->cParams.chainLog;
2129
2348
  if (params->cParams.strategy == ZSTD_btlazy2)
2130
2349
  ZSTD_reduceTable_btlazy2(ms->chainTable, chainSize, reducerValue);
@@ -2161,9 +2380,9 @@ void ZSTD_seqToCodes(const seqStore_t* seqStorePtr)
2161
2380
  ofCodeTable[u] = (BYTE)ZSTD_highbit32(sequences[u].offset);
2162
2381
  mlCodeTable[u] = (BYTE)ZSTD_MLcode(mlv);
2163
2382
  }
2164
- if (seqStorePtr->longLengthID==1)
2383
+ if (seqStorePtr->longLengthType==ZSTD_llt_literalLength)
2165
2384
  llCodeTable[seqStorePtr->longLengthPos] = MaxLL;
2166
- if (seqStorePtr->longLengthID==2)
2385
+ if (seqStorePtr->longLengthType==ZSTD_llt_matchLength)
2167
2386
  mlCodeTable[seqStorePtr->longLengthPos] = MaxML;
2168
2387
  }
2169
2388
 
@@ -2177,10 +2396,158 @@ static int ZSTD_useTargetCBlockSize(const ZSTD_CCtx_params* cctxParams)
2177
2396
  return (cctxParams->targetCBlockSize != 0);
2178
2397
  }
2179
2398
 
2180
- /* ZSTD_entropyCompressSequences_internal():
2181
- * actually compresses both literals and sequences */
2399
+ /* ZSTD_blockSplitterEnabled():
2400
+ * Returns if block splitting param is being used
2401
+ * If used, compression will do best effort to split a block in order to improve compression ratio.
2402
+ * Returns 1 if true, 0 otherwise. */
2403
+ static int ZSTD_blockSplitterEnabled(ZSTD_CCtx_params* cctxParams)
2404
+ {
2405
+ DEBUGLOG(5, "ZSTD_blockSplitterEnabled(splitBlocks=%d)", cctxParams->splitBlocks);
2406
+ return (cctxParams->splitBlocks != 0);
2407
+ }
2408
+
2409
+ /* Type returned by ZSTD_buildSequencesStatistics containing finalized symbol encoding types
2410
+ * and size of the sequences statistics
2411
+ */
2412
+ typedef struct {
2413
+ U32 LLtype;
2414
+ U32 Offtype;
2415
+ U32 MLtype;
2416
+ size_t size;
2417
+ size_t lastCountSize; /* Accounts for bug in 1.3.4. More detail in ZSTD_entropyCompressSeqStore_internal() */
2418
+ } ZSTD_symbolEncodingTypeStats_t;
2419
+
2420
+ /* ZSTD_buildSequencesStatistics():
2421
+ * Returns a ZSTD_symbolEncodingTypeStats_t, or a zstd error code in the `size` field.
2422
+ * Modifies `nextEntropy` to have the appropriate values as a side effect.
2423
+ * nbSeq must be greater than 0.
2424
+ *
2425
+ * entropyWkspSize must be of size at least ENTROPY_WORKSPACE_SIZE - (MaxSeq + 1)*sizeof(U32)
2426
+ */
2427
+ static ZSTD_symbolEncodingTypeStats_t
2428
+ ZSTD_buildSequencesStatistics(seqStore_t* seqStorePtr, size_t nbSeq,
2429
+ const ZSTD_fseCTables_t* prevEntropy, ZSTD_fseCTables_t* nextEntropy,
2430
+ BYTE* dst, const BYTE* const dstEnd,
2431
+ ZSTD_strategy strategy, unsigned* countWorkspace,
2432
+ void* entropyWorkspace, size_t entropyWkspSize) {
2433
+ BYTE* const ostart = dst;
2434
+ const BYTE* const oend = dstEnd;
2435
+ BYTE* op = ostart;
2436
+ FSE_CTable* CTable_LitLength = nextEntropy->litlengthCTable;
2437
+ FSE_CTable* CTable_OffsetBits = nextEntropy->offcodeCTable;
2438
+ FSE_CTable* CTable_MatchLength = nextEntropy->matchlengthCTable;
2439
+ const BYTE* const ofCodeTable = seqStorePtr->ofCode;
2440
+ const BYTE* const llCodeTable = seqStorePtr->llCode;
2441
+ const BYTE* const mlCodeTable = seqStorePtr->mlCode;
2442
+ ZSTD_symbolEncodingTypeStats_t stats;
2443
+
2444
+ stats.lastCountSize = 0;
2445
+ /* convert length/distances into codes */
2446
+ ZSTD_seqToCodes(seqStorePtr);
2447
+ assert(op <= oend);
2448
+ assert(nbSeq != 0); /* ZSTD_selectEncodingType() divides by nbSeq */
2449
+ /* build CTable for Literal Lengths */
2450
+ { unsigned max = MaxLL;
2451
+ size_t const mostFrequent = HIST_countFast_wksp(countWorkspace, &max, llCodeTable, nbSeq, entropyWorkspace, entropyWkspSize); /* can't fail */
2452
+ DEBUGLOG(5, "Building LL table");
2453
+ nextEntropy->litlength_repeatMode = prevEntropy->litlength_repeatMode;
2454
+ stats.LLtype = ZSTD_selectEncodingType(&nextEntropy->litlength_repeatMode,
2455
+ countWorkspace, max, mostFrequent, nbSeq,
2456
+ LLFSELog, prevEntropy->litlengthCTable,
2457
+ LL_defaultNorm, LL_defaultNormLog,
2458
+ ZSTD_defaultAllowed, strategy);
2459
+ assert(set_basic < set_compressed && set_rle < set_compressed);
2460
+ assert(!(stats.LLtype < set_compressed && nextEntropy->litlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */
2461
+ { size_t const countSize = ZSTD_buildCTable(
2462
+ op, (size_t)(oend - op),
2463
+ CTable_LitLength, LLFSELog, (symbolEncodingType_e)stats.LLtype,
2464
+ countWorkspace, max, llCodeTable, nbSeq,
2465
+ LL_defaultNorm, LL_defaultNormLog, MaxLL,
2466
+ prevEntropy->litlengthCTable,
2467
+ sizeof(prevEntropy->litlengthCTable),
2468
+ entropyWorkspace, entropyWkspSize);
2469
+ if (ZSTD_isError(countSize)) {
2470
+ DEBUGLOG(3, "ZSTD_buildCTable for LitLens failed");
2471
+ stats.size = countSize;
2472
+ return stats;
2473
+ }
2474
+ if (stats.LLtype == set_compressed)
2475
+ stats.lastCountSize = countSize;
2476
+ op += countSize;
2477
+ assert(op <= oend);
2478
+ } }
2479
+ /* build CTable for Offsets */
2480
+ { unsigned max = MaxOff;
2481
+ size_t const mostFrequent = HIST_countFast_wksp(
2482
+ countWorkspace, &max, ofCodeTable, nbSeq, entropyWorkspace, entropyWkspSize); /* can't fail */
2483
+ /* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */
2484
+ ZSTD_defaultPolicy_e const defaultPolicy = (max <= DefaultMaxOff) ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed;
2485
+ DEBUGLOG(5, "Building OF table");
2486
+ nextEntropy->offcode_repeatMode = prevEntropy->offcode_repeatMode;
2487
+ stats.Offtype = ZSTD_selectEncodingType(&nextEntropy->offcode_repeatMode,
2488
+ countWorkspace, max, mostFrequent, nbSeq,
2489
+ OffFSELog, prevEntropy->offcodeCTable,
2490
+ OF_defaultNorm, OF_defaultNormLog,
2491
+ defaultPolicy, strategy);
2492
+ assert(!(stats.Offtype < set_compressed && nextEntropy->offcode_repeatMode != FSE_repeat_none)); /* We don't copy tables */
2493
+ { size_t const countSize = ZSTD_buildCTable(
2494
+ op, (size_t)(oend - op),
2495
+ CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)stats.Offtype,
2496
+ countWorkspace, max, ofCodeTable, nbSeq,
2497
+ OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff,
2498
+ prevEntropy->offcodeCTable,
2499
+ sizeof(prevEntropy->offcodeCTable),
2500
+ entropyWorkspace, entropyWkspSize);
2501
+ if (ZSTD_isError(countSize)) {
2502
+ DEBUGLOG(3, "ZSTD_buildCTable for Offsets failed");
2503
+ stats.size = countSize;
2504
+ return stats;
2505
+ }
2506
+ if (stats.Offtype == set_compressed)
2507
+ stats.lastCountSize = countSize;
2508
+ op += countSize;
2509
+ assert(op <= oend);
2510
+ } }
2511
+ /* build CTable for MatchLengths */
2512
+ { unsigned max = MaxML;
2513
+ size_t const mostFrequent = HIST_countFast_wksp(
2514
+ countWorkspace, &max, mlCodeTable, nbSeq, entropyWorkspace, entropyWkspSize); /* can't fail */
2515
+ DEBUGLOG(5, "Building ML table (remaining space : %i)", (int)(oend-op));
2516
+ nextEntropy->matchlength_repeatMode = prevEntropy->matchlength_repeatMode;
2517
+ stats.MLtype = ZSTD_selectEncodingType(&nextEntropy->matchlength_repeatMode,
2518
+ countWorkspace, max, mostFrequent, nbSeq,
2519
+ MLFSELog, prevEntropy->matchlengthCTable,
2520
+ ML_defaultNorm, ML_defaultNormLog,
2521
+ ZSTD_defaultAllowed, strategy);
2522
+ assert(!(stats.MLtype < set_compressed && nextEntropy->matchlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */
2523
+ { size_t const countSize = ZSTD_buildCTable(
2524
+ op, (size_t)(oend - op),
2525
+ CTable_MatchLength, MLFSELog, (symbolEncodingType_e)stats.MLtype,
2526
+ countWorkspace, max, mlCodeTable, nbSeq,
2527
+ ML_defaultNorm, ML_defaultNormLog, MaxML,
2528
+ prevEntropy->matchlengthCTable,
2529
+ sizeof(prevEntropy->matchlengthCTable),
2530
+ entropyWorkspace, entropyWkspSize);
2531
+ if (ZSTD_isError(countSize)) {
2532
+ DEBUGLOG(3, "ZSTD_buildCTable for MatchLengths failed");
2533
+ stats.size = countSize;
2534
+ return stats;
2535
+ }
2536
+ if (stats.MLtype == set_compressed)
2537
+ stats.lastCountSize = countSize;
2538
+ op += countSize;
2539
+ assert(op <= oend);
2540
+ } }
2541
+ stats.size = (size_t)(op-ostart);
2542
+ return stats;
2543
+ }
2544
+
2545
+ /* ZSTD_entropyCompressSeqStore_internal():
2546
+ * compresses both literals and sequences
2547
+ * Returns compressed size of block, or a zstd error.
2548
+ */
2182
2549
  MEM_STATIC size_t
2183
- ZSTD_entropyCompressSequences_internal(seqStore_t* seqStorePtr,
2550
+ ZSTD_entropyCompressSeqStore_internal(seqStore_t* seqStorePtr,
2184
2551
  const ZSTD_entropyCTables_t* prevEntropy,
2185
2552
  ZSTD_entropyCTables_t* nextEntropy,
2186
2553
  const ZSTD_CCtx_params* cctxParams,
@@ -2194,22 +2561,20 @@ ZSTD_entropyCompressSequences_internal(seqStore_t* seqStorePtr,
2194
2561
  FSE_CTable* CTable_LitLength = nextEntropy->fse.litlengthCTable;
2195
2562
  FSE_CTable* CTable_OffsetBits = nextEntropy->fse.offcodeCTable;
2196
2563
  FSE_CTable* CTable_MatchLength = nextEntropy->fse.matchlengthCTable;
2197
- U32 LLtype, Offtype, MLtype; /* compressed, raw or rle */
2198
2564
  const seqDef* const sequences = seqStorePtr->sequencesStart;
2565
+ const size_t nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart;
2199
2566
  const BYTE* const ofCodeTable = seqStorePtr->ofCode;
2200
2567
  const BYTE* const llCodeTable = seqStorePtr->llCode;
2201
2568
  const BYTE* const mlCodeTable = seqStorePtr->mlCode;
2202
2569
  BYTE* const ostart = (BYTE*)dst;
2203
2570
  BYTE* const oend = ostart + dstCapacity;
2204
2571
  BYTE* op = ostart;
2205
- size_t const nbSeq = (size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
2206
- BYTE* seqHead;
2207
- BYTE* lastNCount = NULL;
2572
+ size_t lastCountSize;
2208
2573
 
2209
2574
  entropyWorkspace = count + (MaxSeq + 1);
2210
2575
  entropyWkspSize -= (MaxSeq + 1) * sizeof(*count);
2211
2576
 
2212
- DEBUGLOG(4, "ZSTD_entropyCompressSequences_internal (nbSeq=%zu)", nbSeq);
2577
+ DEBUGLOG(4, "ZSTD_entropyCompressSeqStore_internal (nbSeq=%zu)", nbSeq);
2213
2578
  ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<<MAX(MLFSELog,LLFSELog)));
2214
2579
  assert(entropyWkspSize >= HUF_WORKSPACE_SIZE);
2215
2580
 
@@ -2249,95 +2614,20 @@ ZSTD_entropyCompressSequences_internal(seqStore_t* seqStorePtr,
2249
2614
  ZSTD_memcpy(&nextEntropy->fse, &prevEntropy->fse, sizeof(prevEntropy->fse));
2250
2615
  return (size_t)(op - ostart);
2251
2616
  }
2252
-
2253
- /* seqHead : flags for FSE encoding type */
2254
- seqHead = op++;
2255
- assert(op <= oend);
2256
-
2257
- /* convert length/distances into codes */
2258
- ZSTD_seqToCodes(seqStorePtr);
2259
- /* build CTable for Literal Lengths */
2260
- { unsigned max = MaxLL;
2261
- size_t const mostFrequent = HIST_countFast_wksp(count, &max, llCodeTable, nbSeq, entropyWorkspace, entropyWkspSize); /* can't fail */
2262
- DEBUGLOG(5, "Building LL table");
2263
- nextEntropy->fse.litlength_repeatMode = prevEntropy->fse.litlength_repeatMode;
2264
- LLtype = ZSTD_selectEncodingType(&nextEntropy->fse.litlength_repeatMode,
2265
- count, max, mostFrequent, nbSeq,
2266
- LLFSELog, prevEntropy->fse.litlengthCTable,
2267
- LL_defaultNorm, LL_defaultNormLog,
2268
- ZSTD_defaultAllowed, strategy);
2269
- assert(set_basic < set_compressed && set_rle < set_compressed);
2270
- assert(!(LLtype < set_compressed && nextEntropy->fse.litlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */
2271
- { size_t const countSize = ZSTD_buildCTable(
2272
- op, (size_t)(oend - op),
2273
- CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype,
2274
- count, max, llCodeTable, nbSeq,
2275
- LL_defaultNorm, LL_defaultNormLog, MaxLL,
2276
- prevEntropy->fse.litlengthCTable,
2277
- sizeof(prevEntropy->fse.litlengthCTable),
2278
- entropyWorkspace, entropyWkspSize);
2279
- FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for LitLens failed");
2280
- if (LLtype == set_compressed)
2281
- lastNCount = op;
2282
- op += countSize;
2283
- assert(op <= oend);
2284
- } }
2285
- /* build CTable for Offsets */
2286
- { unsigned max = MaxOff;
2287
- size_t const mostFrequent = HIST_countFast_wksp(
2288
- count, &max, ofCodeTable, nbSeq, entropyWorkspace, entropyWkspSize); /* can't fail */
2289
- /* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */
2290
- ZSTD_defaultPolicy_e const defaultPolicy = (max <= DefaultMaxOff) ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed;
2291
- DEBUGLOG(5, "Building OF table");
2292
- nextEntropy->fse.offcode_repeatMode = prevEntropy->fse.offcode_repeatMode;
2293
- Offtype = ZSTD_selectEncodingType(&nextEntropy->fse.offcode_repeatMode,
2294
- count, max, mostFrequent, nbSeq,
2295
- OffFSELog, prevEntropy->fse.offcodeCTable,
2296
- OF_defaultNorm, OF_defaultNormLog,
2297
- defaultPolicy, strategy);
2298
- assert(!(Offtype < set_compressed && nextEntropy->fse.offcode_repeatMode != FSE_repeat_none)); /* We don't copy tables */
2299
- { size_t const countSize = ZSTD_buildCTable(
2300
- op, (size_t)(oend - op),
2301
- CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype,
2302
- count, max, ofCodeTable, nbSeq,
2303
- OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff,
2304
- prevEntropy->fse.offcodeCTable,
2305
- sizeof(prevEntropy->fse.offcodeCTable),
2306
- entropyWorkspace, entropyWkspSize);
2307
- FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for Offsets failed");
2308
- if (Offtype == set_compressed)
2309
- lastNCount = op;
2310
- op += countSize;
2311
- assert(op <= oend);
2312
- } }
2313
- /* build CTable for MatchLengths */
2314
- { unsigned max = MaxML;
2315
- size_t const mostFrequent = HIST_countFast_wksp(
2316
- count, &max, mlCodeTable, nbSeq, entropyWorkspace, entropyWkspSize); /* can't fail */
2317
- DEBUGLOG(5, "Building ML table (remaining space : %i)", (int)(oend-op));
2318
- nextEntropy->fse.matchlength_repeatMode = prevEntropy->fse.matchlength_repeatMode;
2319
- MLtype = ZSTD_selectEncodingType(&nextEntropy->fse.matchlength_repeatMode,
2320
- count, max, mostFrequent, nbSeq,
2321
- MLFSELog, prevEntropy->fse.matchlengthCTable,
2322
- ML_defaultNorm, ML_defaultNormLog,
2323
- ZSTD_defaultAllowed, strategy);
2324
- assert(!(MLtype < set_compressed && nextEntropy->fse.matchlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */
2325
- { size_t const countSize = ZSTD_buildCTable(
2326
- op, (size_t)(oend - op),
2327
- CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype,
2328
- count, max, mlCodeTable, nbSeq,
2329
- ML_defaultNorm, ML_defaultNormLog, MaxML,
2330
- prevEntropy->fse.matchlengthCTable,
2331
- sizeof(prevEntropy->fse.matchlengthCTable),
2332
- entropyWorkspace, entropyWkspSize);
2333
- FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for MatchLengths failed");
2334
- if (MLtype == set_compressed)
2335
- lastNCount = op;
2336
- op += countSize;
2337
- assert(op <= oend);
2338
- } }
2339
-
2340
- *seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2));
2617
+ {
2618
+ ZSTD_symbolEncodingTypeStats_t stats;
2619
+ BYTE* seqHead = op++;
2620
+ /* build stats for sequences */
2621
+ stats = ZSTD_buildSequencesStatistics(seqStorePtr, nbSeq,
2622
+ &prevEntropy->fse, &nextEntropy->fse,
2623
+ op, oend,
2624
+ strategy, count,
2625
+ entropyWorkspace, entropyWkspSize);
2626
+ FORWARD_IF_ERROR(stats.size, "ZSTD_buildSequencesStatistics failed!");
2627
+ *seqHead = (BYTE)((stats.LLtype<<6) + (stats.Offtype<<4) + (stats.MLtype<<2));
2628
+ lastCountSize = stats.lastCountSize;
2629
+ op += stats.size;
2630
+ }
2341
2631
 
2342
2632
  { size_t const bitstreamSize = ZSTD_encodeSequences(
2343
2633
  op, (size_t)(oend - op),
@@ -2357,9 +2647,9 @@ ZSTD_entropyCompressSequences_internal(seqStore_t* seqStorePtr,
2357
2647
  * In this exceedingly rare case, we will simply emit an uncompressed
2358
2648
  * block, since it isn't worth optimizing.
2359
2649
  */
2360
- if (lastNCount && (op - lastNCount) < 4) {
2361
- /* NCountSize >= 2 && bitstreamSize > 0 ==> lastCountSize == 3 */
2362
- assert(op - lastNCount == 3);
2650
+ if (lastCountSize && (lastCountSize + bitstreamSize) < 4) {
2651
+ /* lastCountSize >= 2 && bitstreamSize > 0 ==> lastCountSize == 3 */
2652
+ assert(lastCountSize + bitstreamSize == 3);
2363
2653
  DEBUGLOG(5, "Avoiding bug in zstd decoder in versions <= 1.3.4 by "
2364
2654
  "emitting an uncompressed block.");
2365
2655
  return 0;
@@ -2371,7 +2661,7 @@ ZSTD_entropyCompressSequences_internal(seqStore_t* seqStorePtr,
2371
2661
  }
2372
2662
 
2373
2663
  MEM_STATIC size_t
2374
- ZSTD_entropyCompressSequences(seqStore_t* seqStorePtr,
2664
+ ZSTD_entropyCompressSeqStore(seqStore_t* seqStorePtr,
2375
2665
  const ZSTD_entropyCTables_t* prevEntropy,
2376
2666
  ZSTD_entropyCTables_t* nextEntropy,
2377
2667
  const ZSTD_CCtx_params* cctxParams,
@@ -2380,7 +2670,7 @@ ZSTD_entropyCompressSequences(seqStore_t* seqStorePtr,
2380
2670
  void* entropyWorkspace, size_t entropyWkspSize,
2381
2671
  int bmi2)
2382
2672
  {
2383
- size_t const cSize = ZSTD_entropyCompressSequences_internal(
2673
+ size_t const cSize = ZSTD_entropyCompressSeqStore_internal(
2384
2674
  seqStorePtr, prevEntropy, nextEntropy, cctxParams,
2385
2675
  dst, dstCapacity,
2386
2676
  entropyWorkspace, entropyWkspSize, bmi2);
@@ -2390,20 +2680,20 @@ ZSTD_entropyCompressSequences(seqStore_t* seqStorePtr,
2390
2680
  */
2391
2681
  if ((cSize == ERROR(dstSize_tooSmall)) & (srcSize <= dstCapacity))
2392
2682
  return 0; /* block not compressed */
2393
- FORWARD_IF_ERROR(cSize, "ZSTD_entropyCompressSequences_internal failed");
2683
+ FORWARD_IF_ERROR(cSize, "ZSTD_entropyCompressSeqStore_internal failed");
2394
2684
 
2395
2685
  /* Check compressibility */
2396
2686
  { size_t const maxCSize = srcSize - ZSTD_minGain(srcSize, cctxParams->cParams.strategy);
2397
2687
  if (cSize >= maxCSize) return 0; /* block not compressed */
2398
2688
  }
2399
- DEBUGLOG(4, "ZSTD_entropyCompressSequences() cSize: %zu\n", cSize);
2689
+ DEBUGLOG(4, "ZSTD_entropyCompressSeqStore() cSize: %zu", cSize);
2400
2690
  return cSize;
2401
2691
  }
2402
2692
 
2403
2693
  /* ZSTD_selectBlockCompressor() :
2404
2694
  * Not static, but internal use only (used by long distance matcher)
2405
2695
  * assumption : strat is a valid strategy */
2406
- ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_dictMode_e dictMode)
2696
+ ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_useRowMatchFinderMode_e useRowMatchFinder, ZSTD_dictMode_e dictMode)
2407
2697
  {
2408
2698
  static const ZSTD_blockCompressor blockCompressor[4][ZSTD_STRATEGY_MAX+1] = {
2409
2699
  { ZSTD_compressBlock_fast /* default for 0 */,
@@ -2451,7 +2741,28 @@ ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_dictMo
2451
2741
  ZSTD_STATIC_ASSERT((unsigned)ZSTD_fast == 1);
2452
2742
 
2453
2743
  assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, strat));
2454
- selectedCompressor = blockCompressor[(int)dictMode][(int)strat];
2744
+ DEBUGLOG(4, "Selected block compressor: dictMode=%d strat=%d rowMatchfinder=%d", (int)dictMode, (int)strat, (int)useRowMatchFinder);
2745
+ if (ZSTD_rowMatchFinderUsed(strat, useRowMatchFinder)) {
2746
+ static const ZSTD_blockCompressor rowBasedBlockCompressors[4][3] = {
2747
+ { ZSTD_compressBlock_greedy_row,
2748
+ ZSTD_compressBlock_lazy_row,
2749
+ ZSTD_compressBlock_lazy2_row },
2750
+ { ZSTD_compressBlock_greedy_extDict_row,
2751
+ ZSTD_compressBlock_lazy_extDict_row,
2752
+ ZSTD_compressBlock_lazy2_extDict_row },
2753
+ { ZSTD_compressBlock_greedy_dictMatchState_row,
2754
+ ZSTD_compressBlock_lazy_dictMatchState_row,
2755
+ ZSTD_compressBlock_lazy2_dictMatchState_row },
2756
+ { ZSTD_compressBlock_greedy_dedicatedDictSearch_row,
2757
+ ZSTD_compressBlock_lazy_dedicatedDictSearch_row,
2758
+ ZSTD_compressBlock_lazy2_dedicatedDictSearch_row }
2759
+ };
2760
+ DEBUGLOG(4, "Selecting a row-based matchfinder");
2761
+ assert(useRowMatchFinder != ZSTD_urm_auto);
2762
+ selectedCompressor = rowBasedBlockCompressors[(int)dictMode][(int)strat - (int)ZSTD_greedy];
2763
+ } else {
2764
+ selectedCompressor = blockCompressor[(int)dictMode][(int)strat];
2765
+ }
2455
2766
  assert(selectedCompressor != NULL);
2456
2767
  return selectedCompressor;
2457
2768
  }
@@ -2467,7 +2778,7 @@ void ZSTD_resetSeqStore(seqStore_t* ssPtr)
2467
2778
  {
2468
2779
  ssPtr->lit = ssPtr->litStart;
2469
2780
  ssPtr->sequences = ssPtr->sequencesStart;
2470
- ssPtr->longLengthID = 0;
2781
+ ssPtr->longLengthType = ZSTD_llt_none;
2471
2782
  }
2472
2783
 
2473
2784
  typedef enum { ZSTDbss_compress, ZSTDbss_noCompress } ZSTD_buildSeqStore_e;
@@ -2520,6 +2831,7 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)
2520
2831
  ZSTD_ldm_blockCompress(&zc->externSeqStore,
2521
2832
  ms, &zc->seqStore,
2522
2833
  zc->blockState.nextCBlock->rep,
2834
+ zc->appliedParams.useRowMatchFinder,
2523
2835
  src, srcSize);
2524
2836
  assert(zc->externSeqStore.pos <= zc->externSeqStore.size);
2525
2837
  } else if (zc->appliedParams.ldmParams.enableLdm) {
@@ -2536,10 +2848,13 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)
2536
2848
  ZSTD_ldm_blockCompress(&ldmSeqStore,
2537
2849
  ms, &zc->seqStore,
2538
2850
  zc->blockState.nextCBlock->rep,
2851
+ zc->appliedParams.useRowMatchFinder,
2539
2852
  src, srcSize);
2540
2853
  assert(ldmSeqStore.pos == ldmSeqStore.size);
2541
2854
  } else { /* not long range mode */
2542
- ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy, dictMode);
2855
+ ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy,
2856
+ zc->appliedParams.useRowMatchFinder,
2857
+ dictMode);
2543
2858
  ms->ldmSeqStore = NULL;
2544
2859
  lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, src, srcSize);
2545
2860
  }
@@ -2573,9 +2888,9 @@ static void ZSTD_copyBlockSequences(ZSTD_CCtx* zc)
2573
2888
  outSeqs[i].rep = 0;
2574
2889
 
2575
2890
  if (i == seqStore->longLengthPos) {
2576
- if (seqStore->longLengthID == 1) {
2891
+ if (seqStore->longLengthType == ZSTD_llt_literalLength) {
2577
2892
  outSeqs[i].litLength += 0x10000;
2578
- } else if (seqStore->longLengthID == 2) {
2893
+ } else if (seqStore->longLengthType == ZSTD_llt_matchLength) {
2579
2894
  outSeqs[i].matchLength += 0x10000;
2580
2895
  }
2581
2896
  }
@@ -2686,11 +3001,713 @@ static int ZSTD_maybeRLE(seqStore_t const* seqStore)
2686
3001
  return nbSeqs < 4 && nbLits < 10;
2687
3002
  }
2688
3003
 
2689
- static void ZSTD_confirmRepcodesAndEntropyTables(ZSTD_CCtx* zc)
3004
+ static void ZSTD_blockState_confirmRepcodesAndEntropyTables(ZSTD_blockState_t* const bs)
3005
+ {
3006
+ ZSTD_compressedBlockState_t* const tmp = bs->prevCBlock;
3007
+ bs->prevCBlock = bs->nextCBlock;
3008
+ bs->nextCBlock = tmp;
3009
+ }
3010
+
3011
+ /* Writes the block header */
3012
+ static void writeBlockHeader(void* op, size_t cSize, size_t blockSize, U32 lastBlock) {
3013
+ U32 const cBlockHeader = cSize == 1 ?
3014
+ lastBlock + (((U32)bt_rle)<<1) + (U32)(blockSize << 3) :
3015
+ lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3);
3016
+ MEM_writeLE24(op, cBlockHeader);
3017
+ DEBUGLOG(3, "writeBlockHeader: cSize: %zu blockSize: %zu lastBlock: %u", cSize, blockSize, lastBlock);
3018
+ }
3019
+
3020
+ /** ZSTD_buildBlockEntropyStats_literals() :
3021
+ * Builds entropy for the literals.
3022
+ * Stores literals block type (raw, rle, compressed, repeat) and
3023
+ * huffman description table to hufMetadata.
3024
+ * Requires ENTROPY_WORKSPACE_SIZE workspace
3025
+ * @return : size of huffman description table or error code */
3026
+ static size_t ZSTD_buildBlockEntropyStats_literals(void* const src, size_t srcSize,
3027
+ const ZSTD_hufCTables_t* prevHuf,
3028
+ ZSTD_hufCTables_t* nextHuf,
3029
+ ZSTD_hufCTablesMetadata_t* hufMetadata,
3030
+ const int disableLiteralsCompression,
3031
+ void* workspace, size_t wkspSize)
3032
+ {
3033
+ BYTE* const wkspStart = (BYTE*)workspace;
3034
+ BYTE* const wkspEnd = wkspStart + wkspSize;
3035
+ BYTE* const countWkspStart = wkspStart;
3036
+ unsigned* const countWksp = (unsigned*)workspace;
3037
+ const size_t countWkspSize = (HUF_SYMBOLVALUE_MAX + 1) * sizeof(unsigned);
3038
+ BYTE* const nodeWksp = countWkspStart + countWkspSize;
3039
+ const size_t nodeWkspSize = wkspEnd-nodeWksp;
3040
+ unsigned maxSymbolValue = HUF_SYMBOLVALUE_MAX;
3041
+ unsigned huffLog = HUF_TABLELOG_DEFAULT;
3042
+ HUF_repeat repeat = prevHuf->repeatMode;
3043
+ DEBUGLOG(5, "ZSTD_buildBlockEntropyStats_literals (srcSize=%zu)", srcSize);
3044
+
3045
+ /* Prepare nextEntropy assuming reusing the existing table */
3046
+ ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
3047
+
3048
+ if (disableLiteralsCompression) {
3049
+ DEBUGLOG(5, "set_basic - disabled");
3050
+ hufMetadata->hType = set_basic;
3051
+ return 0;
3052
+ }
3053
+
3054
+ /* small ? don't even attempt compression (speed opt) */
3055
+ #ifndef COMPRESS_LITERALS_SIZE_MIN
3056
+ #define COMPRESS_LITERALS_SIZE_MIN 63
3057
+ #endif
3058
+ { size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN;
3059
+ if (srcSize <= minLitSize) {
3060
+ DEBUGLOG(5, "set_basic - too small");
3061
+ hufMetadata->hType = set_basic;
3062
+ return 0;
3063
+ }
3064
+ }
3065
+
3066
+ /* Scan input and build symbol stats */
3067
+ { size_t const largest = HIST_count_wksp (countWksp, &maxSymbolValue, (const BYTE*)src, srcSize, workspace, wkspSize);
3068
+ FORWARD_IF_ERROR(largest, "HIST_count_wksp failed");
3069
+ if (largest == srcSize) {
3070
+ DEBUGLOG(5, "set_rle");
3071
+ hufMetadata->hType = set_rle;
3072
+ return 0;
3073
+ }
3074
+ if (largest <= (srcSize >> 7)+4) {
3075
+ DEBUGLOG(5, "set_basic - no gain");
3076
+ hufMetadata->hType = set_basic;
3077
+ return 0;
3078
+ }
3079
+ }
3080
+
3081
+ /* Validate the previous Huffman table */
3082
+ if (repeat == HUF_repeat_check && !HUF_validateCTable((HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue)) {
3083
+ repeat = HUF_repeat_none;
3084
+ }
3085
+
3086
+ /* Build Huffman Tree */
3087
+ ZSTD_memset(nextHuf->CTable, 0, sizeof(nextHuf->CTable));
3088
+ huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue);
3089
+ { size_t const maxBits = HUF_buildCTable_wksp((HUF_CElt*)nextHuf->CTable, countWksp,
3090
+ maxSymbolValue, huffLog,
3091
+ nodeWksp, nodeWkspSize);
3092
+ FORWARD_IF_ERROR(maxBits, "HUF_buildCTable_wksp");
3093
+ huffLog = (U32)maxBits;
3094
+ { /* Build and write the CTable */
3095
+ size_t const newCSize = HUF_estimateCompressedSize(
3096
+ (HUF_CElt*)nextHuf->CTable, countWksp, maxSymbolValue);
3097
+ size_t const hSize = HUF_writeCTable_wksp(
3098
+ hufMetadata->hufDesBuffer, sizeof(hufMetadata->hufDesBuffer),
3099
+ (HUF_CElt*)nextHuf->CTable, maxSymbolValue, huffLog,
3100
+ nodeWksp, nodeWkspSize);
3101
+ /* Check against repeating the previous CTable */
3102
+ if (repeat != HUF_repeat_none) {
3103
+ size_t const oldCSize = HUF_estimateCompressedSize(
3104
+ (HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue);
3105
+ if (oldCSize < srcSize && (oldCSize <= hSize + newCSize || hSize + 12 >= srcSize)) {
3106
+ DEBUGLOG(5, "set_repeat - smaller");
3107
+ ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
3108
+ hufMetadata->hType = set_repeat;
3109
+ return 0;
3110
+ }
3111
+ }
3112
+ if (newCSize + hSize >= srcSize) {
3113
+ DEBUGLOG(5, "set_basic - no gains");
3114
+ ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
3115
+ hufMetadata->hType = set_basic;
3116
+ return 0;
3117
+ }
3118
+ DEBUGLOG(5, "set_compressed (hSize=%u)", (U32)hSize);
3119
+ hufMetadata->hType = set_compressed;
3120
+ nextHuf->repeatMode = HUF_repeat_check;
3121
+ return hSize;
3122
+ }
3123
+ }
3124
+ }
3125
+
3126
+
3127
+ /* ZSTD_buildDummySequencesStatistics():
3128
+ * Returns a ZSTD_symbolEncodingTypeStats_t with all encoding types as set_basic,
3129
+ * and updates nextEntropy to the appropriate repeatMode.
3130
+ */
3131
+ static ZSTD_symbolEncodingTypeStats_t
3132
+ ZSTD_buildDummySequencesStatistics(ZSTD_fseCTables_t* nextEntropy) {
3133
+ ZSTD_symbolEncodingTypeStats_t stats = {set_basic, set_basic, set_basic, 0, 0};
3134
+ nextEntropy->litlength_repeatMode = FSE_repeat_none;
3135
+ nextEntropy->offcode_repeatMode = FSE_repeat_none;
3136
+ nextEntropy->matchlength_repeatMode = FSE_repeat_none;
3137
+ return stats;
3138
+ }
3139
+
3140
+ /** ZSTD_buildBlockEntropyStats_sequences() :
3141
+ * Builds entropy for the sequences.
3142
+ * Stores symbol compression modes and fse table to fseMetadata.
3143
+ * Requires ENTROPY_WORKSPACE_SIZE wksp.
3144
+ * @return : size of fse tables or error code */
3145
+ static size_t ZSTD_buildBlockEntropyStats_sequences(seqStore_t* seqStorePtr,
3146
+ const ZSTD_fseCTables_t* prevEntropy,
3147
+ ZSTD_fseCTables_t* nextEntropy,
3148
+ const ZSTD_CCtx_params* cctxParams,
3149
+ ZSTD_fseCTablesMetadata_t* fseMetadata,
3150
+ void* workspace, size_t wkspSize)
2690
3151
  {
2691
- ZSTD_compressedBlockState_t* const tmp = zc->blockState.prevCBlock;
2692
- zc->blockState.prevCBlock = zc->blockState.nextCBlock;
2693
- zc->blockState.nextCBlock = tmp;
3152
+ ZSTD_strategy const strategy = cctxParams->cParams.strategy;
3153
+ size_t const nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart;
3154
+ BYTE* const ostart = fseMetadata->fseTablesBuffer;
3155
+ BYTE* const oend = ostart + sizeof(fseMetadata->fseTablesBuffer);
3156
+ BYTE* op = ostart;
3157
+ unsigned* countWorkspace = (unsigned*)workspace;
3158
+ unsigned* entropyWorkspace = countWorkspace + (MaxSeq + 1);
3159
+ size_t entropyWorkspaceSize = wkspSize - (MaxSeq + 1) * sizeof(*countWorkspace);
3160
+ ZSTD_symbolEncodingTypeStats_t stats;
3161
+
3162
+ DEBUGLOG(5, "ZSTD_buildBlockEntropyStats_sequences (nbSeq=%zu)", nbSeq);
3163
+ stats = nbSeq != 0 ? ZSTD_buildSequencesStatistics(seqStorePtr, nbSeq,
3164
+ prevEntropy, nextEntropy, op, oend,
3165
+ strategy, countWorkspace,
3166
+ entropyWorkspace, entropyWorkspaceSize)
3167
+ : ZSTD_buildDummySequencesStatistics(nextEntropy);
3168
+ FORWARD_IF_ERROR(stats.size, "ZSTD_buildSequencesStatistics failed!");
3169
+ fseMetadata->llType = (symbolEncodingType_e) stats.LLtype;
3170
+ fseMetadata->ofType = (symbolEncodingType_e) stats.Offtype;
3171
+ fseMetadata->mlType = (symbolEncodingType_e) stats.MLtype;
3172
+ fseMetadata->lastCountSize = stats.lastCountSize;
3173
+ return stats.size;
3174
+ }
3175
+
3176
+
3177
+ /** ZSTD_buildBlockEntropyStats() :
3178
+ * Builds entropy for the block.
3179
+ * Requires workspace size ENTROPY_WORKSPACE_SIZE
3180
+ *
3181
+ * @return : 0 on success or error code
3182
+ */
3183
+ size_t ZSTD_buildBlockEntropyStats(seqStore_t* seqStorePtr,
3184
+ const ZSTD_entropyCTables_t* prevEntropy,
3185
+ ZSTD_entropyCTables_t* nextEntropy,
3186
+ const ZSTD_CCtx_params* cctxParams,
3187
+ ZSTD_entropyCTablesMetadata_t* entropyMetadata,
3188
+ void* workspace, size_t wkspSize)
3189
+ {
3190
+ size_t const litSize = seqStorePtr->lit - seqStorePtr->litStart;
3191
+ entropyMetadata->hufMetadata.hufDesSize =
3192
+ ZSTD_buildBlockEntropyStats_literals(seqStorePtr->litStart, litSize,
3193
+ &prevEntropy->huf, &nextEntropy->huf,
3194
+ &entropyMetadata->hufMetadata,
3195
+ ZSTD_disableLiteralsCompression(cctxParams),
3196
+ workspace, wkspSize);
3197
+ FORWARD_IF_ERROR(entropyMetadata->hufMetadata.hufDesSize, "ZSTD_buildBlockEntropyStats_literals failed");
3198
+ entropyMetadata->fseMetadata.fseTablesSize =
3199
+ ZSTD_buildBlockEntropyStats_sequences(seqStorePtr,
3200
+ &prevEntropy->fse, &nextEntropy->fse,
3201
+ cctxParams,
3202
+ &entropyMetadata->fseMetadata,
3203
+ workspace, wkspSize);
3204
+ FORWARD_IF_ERROR(entropyMetadata->fseMetadata.fseTablesSize, "ZSTD_buildBlockEntropyStats_sequences failed");
3205
+ return 0;
3206
+ }
3207
+
3208
+ /* Returns the size estimate for the literals section (header + content) of a block */
3209
+ static size_t ZSTD_estimateBlockSize_literal(const BYTE* literals, size_t litSize,
3210
+ const ZSTD_hufCTables_t* huf,
3211
+ const ZSTD_hufCTablesMetadata_t* hufMetadata,
3212
+ void* workspace, size_t wkspSize,
3213
+ int writeEntropy)
3214
+ {
3215
+ unsigned* const countWksp = (unsigned*)workspace;
3216
+ unsigned maxSymbolValue = HUF_SYMBOLVALUE_MAX;
3217
+ size_t literalSectionHeaderSize = 3 + (litSize >= 1 KB) + (litSize >= 16 KB);
3218
+ U32 singleStream = litSize < 256;
3219
+
3220
+ if (hufMetadata->hType == set_basic) return litSize;
3221
+ else if (hufMetadata->hType == set_rle) return 1;
3222
+ else if (hufMetadata->hType == set_compressed || hufMetadata->hType == set_repeat) {
3223
+ size_t const largest = HIST_count_wksp (countWksp, &maxSymbolValue, (const BYTE*)literals, litSize, workspace, wkspSize);
3224
+ if (ZSTD_isError(largest)) return litSize;
3225
+ { size_t cLitSizeEstimate = HUF_estimateCompressedSize((const HUF_CElt*)huf->CTable, countWksp, maxSymbolValue);
3226
+ if (writeEntropy) cLitSizeEstimate += hufMetadata->hufDesSize;
3227
+ if (!singleStream) cLitSizeEstimate += 6; /* multi-stream huffman uses 6-byte jump table */
3228
+ return cLitSizeEstimate + literalSectionHeaderSize;
3229
+ } }
3230
+ assert(0); /* impossible */
3231
+ return 0;
3232
+ }
3233
+
3234
+ /* Returns the size estimate for the FSE-compressed symbols (of, ml, ll) of a block */
3235
+ static size_t ZSTD_estimateBlockSize_symbolType(symbolEncodingType_e type,
3236
+ const BYTE* codeTable, size_t nbSeq, unsigned maxCode,
3237
+ const FSE_CTable* fseCTable,
3238
+ const U32* additionalBits,
3239
+ short const* defaultNorm, U32 defaultNormLog, U32 defaultMax,
3240
+ void* workspace, size_t wkspSize)
3241
+ {
3242
+ unsigned* const countWksp = (unsigned*)workspace;
3243
+ const BYTE* ctp = codeTable;
3244
+ const BYTE* const ctStart = ctp;
3245
+ const BYTE* const ctEnd = ctStart + nbSeq;
3246
+ size_t cSymbolTypeSizeEstimateInBits = 0;
3247
+ unsigned max = maxCode;
3248
+
3249
+ HIST_countFast_wksp(countWksp, &max, codeTable, nbSeq, workspace, wkspSize); /* can't fail */
3250
+ if (type == set_basic) {
3251
+ /* We selected this encoding type, so it must be valid. */
3252
+ assert(max <= defaultMax);
3253
+ (void)defaultMax;
3254
+ cSymbolTypeSizeEstimateInBits = ZSTD_crossEntropyCost(defaultNorm, defaultNormLog, countWksp, max);
3255
+ } else if (type == set_rle) {
3256
+ cSymbolTypeSizeEstimateInBits = 0;
3257
+ } else if (type == set_compressed || type == set_repeat) {
3258
+ cSymbolTypeSizeEstimateInBits = ZSTD_fseBitCost(fseCTable, countWksp, max);
3259
+ }
3260
+ if (ZSTD_isError(cSymbolTypeSizeEstimateInBits)) {
3261
+ return nbSeq * 10;
3262
+ }
3263
+ while (ctp < ctEnd) {
3264
+ if (additionalBits) cSymbolTypeSizeEstimateInBits += additionalBits[*ctp];
3265
+ else cSymbolTypeSizeEstimateInBits += *ctp; /* for offset, offset code is also the number of additional bits */
3266
+ ctp++;
3267
+ }
3268
+ return cSymbolTypeSizeEstimateInBits >> 3;
3269
+ }
3270
+
3271
+ /* Returns the size estimate for the sequences section (header + content) of a block */
3272
+ static size_t ZSTD_estimateBlockSize_sequences(const BYTE* ofCodeTable,
3273
+ const BYTE* llCodeTable,
3274
+ const BYTE* mlCodeTable,
3275
+ size_t nbSeq,
3276
+ const ZSTD_fseCTables_t* fseTables,
3277
+ const ZSTD_fseCTablesMetadata_t* fseMetadata,
3278
+ void* workspace, size_t wkspSize,
3279
+ int writeEntropy)
3280
+ {
3281
+ size_t sequencesSectionHeaderSize = 1 /* seqHead */ + 1 /* min seqSize size */ + (nbSeq >= 128) + (nbSeq >= LONGNBSEQ);
3282
+ size_t cSeqSizeEstimate = 0;
3283
+ cSeqSizeEstimate += ZSTD_estimateBlockSize_symbolType(fseMetadata->ofType, ofCodeTable, nbSeq, MaxOff,
3284
+ fseTables->offcodeCTable, NULL,
3285
+ OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff,
3286
+ workspace, wkspSize);
3287
+ cSeqSizeEstimate += ZSTD_estimateBlockSize_symbolType(fseMetadata->llType, llCodeTable, nbSeq, MaxLL,
3288
+ fseTables->litlengthCTable, LL_bits,
3289
+ LL_defaultNorm, LL_defaultNormLog, MaxLL,
3290
+ workspace, wkspSize);
3291
+ cSeqSizeEstimate += ZSTD_estimateBlockSize_symbolType(fseMetadata->mlType, mlCodeTable, nbSeq, MaxML,
3292
+ fseTables->matchlengthCTable, ML_bits,
3293
+ ML_defaultNorm, ML_defaultNormLog, MaxML,
3294
+ workspace, wkspSize);
3295
+ if (writeEntropy) cSeqSizeEstimate += fseMetadata->fseTablesSize;
3296
+ return cSeqSizeEstimate + sequencesSectionHeaderSize;
3297
+ }
3298
+
3299
+ /* Returns the size estimate for a given stream of literals, of, ll, ml */
3300
+ static size_t ZSTD_estimateBlockSize(const BYTE* literals, size_t litSize,
3301
+ const BYTE* ofCodeTable,
3302
+ const BYTE* llCodeTable,
3303
+ const BYTE* mlCodeTable,
3304
+ size_t nbSeq,
3305
+ const ZSTD_entropyCTables_t* entropy,
3306
+ const ZSTD_entropyCTablesMetadata_t* entropyMetadata,
3307
+ void* workspace, size_t wkspSize,
3308
+ int writeLitEntropy, int writeSeqEntropy) {
3309
+ size_t const literalsSize = ZSTD_estimateBlockSize_literal(literals, litSize,
3310
+ &entropy->huf, &entropyMetadata->hufMetadata,
3311
+ workspace, wkspSize, writeLitEntropy);
3312
+ size_t const seqSize = ZSTD_estimateBlockSize_sequences(ofCodeTable, llCodeTable, mlCodeTable,
3313
+ nbSeq, &entropy->fse, &entropyMetadata->fseMetadata,
3314
+ workspace, wkspSize, writeSeqEntropy);
3315
+ return seqSize + literalsSize + ZSTD_blockHeaderSize;
3316
+ }
3317
+
3318
+ /* Builds entropy statistics and uses them for blocksize estimation.
3319
+ *
3320
+ * Returns the estimated compressed size of the seqStore, or a zstd error.
3321
+ */
3322
+ static size_t ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(seqStore_t* seqStore, const ZSTD_CCtx* zc) {
3323
+ ZSTD_entropyCTablesMetadata_t entropyMetadata;
3324
+ FORWARD_IF_ERROR(ZSTD_buildBlockEntropyStats(seqStore,
3325
+ &zc->blockState.prevCBlock->entropy,
3326
+ &zc->blockState.nextCBlock->entropy,
3327
+ &zc->appliedParams,
3328
+ &entropyMetadata,
3329
+ zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */), "");
3330
+ return ZSTD_estimateBlockSize(seqStore->litStart, (size_t)(seqStore->lit - seqStore->litStart),
3331
+ seqStore->ofCode, seqStore->llCode, seqStore->mlCode,
3332
+ (size_t)(seqStore->sequences - seqStore->sequencesStart),
3333
+ &zc->blockState.nextCBlock->entropy, &entropyMetadata, zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE,
3334
+ (int)(entropyMetadata.hufMetadata.hType == set_compressed), 1);
3335
+ }
3336
+
3337
+ /* Returns literals bytes represented in a seqStore */
3338
+ static size_t ZSTD_countSeqStoreLiteralsBytes(const seqStore_t* const seqStore) {
3339
+ size_t literalsBytes = 0;
3340
+ size_t const nbSeqs = seqStore->sequences - seqStore->sequencesStart;
3341
+ size_t i;
3342
+ for (i = 0; i < nbSeqs; ++i) {
3343
+ seqDef seq = seqStore->sequencesStart[i];
3344
+ literalsBytes += seq.litLength;
3345
+ if (i == seqStore->longLengthPos && seqStore->longLengthType == ZSTD_llt_literalLength) {
3346
+ literalsBytes += 0x10000;
3347
+ }
3348
+ }
3349
+ return literalsBytes;
3350
+ }
3351
+
3352
+ /* Returns match bytes represented in a seqStore */
3353
+ static size_t ZSTD_countSeqStoreMatchBytes(const seqStore_t* const seqStore) {
3354
+ size_t matchBytes = 0;
3355
+ size_t const nbSeqs = seqStore->sequences - seqStore->sequencesStart;
3356
+ size_t i;
3357
+ for (i = 0; i < nbSeqs; ++i) {
3358
+ seqDef seq = seqStore->sequencesStart[i];
3359
+ matchBytes += seq.matchLength + MINMATCH;
3360
+ if (i == seqStore->longLengthPos && seqStore->longLengthType == ZSTD_llt_matchLength) {
3361
+ matchBytes += 0x10000;
3362
+ }
3363
+ }
3364
+ return matchBytes;
3365
+ }
3366
+
3367
+ /* Derives the seqStore that is a chunk of the originalSeqStore from [startIdx, endIdx).
3368
+ * Stores the result in resultSeqStore.
3369
+ */
3370
+ static void ZSTD_deriveSeqStoreChunk(seqStore_t* resultSeqStore,
3371
+ const seqStore_t* originalSeqStore,
3372
+ size_t startIdx, size_t endIdx) {
3373
+ BYTE* const litEnd = originalSeqStore->lit;
3374
+ size_t literalsBytes;
3375
+ size_t literalsBytesPreceding = 0;
3376
+
3377
+ *resultSeqStore = *originalSeqStore;
3378
+ if (startIdx > 0) {
3379
+ resultSeqStore->sequences = originalSeqStore->sequencesStart + startIdx;
3380
+ literalsBytesPreceding = ZSTD_countSeqStoreLiteralsBytes(resultSeqStore);
3381
+ }
3382
+
3383
+ /* Move longLengthPos into the correct position if necessary */
3384
+ if (originalSeqStore->longLengthType != ZSTD_llt_none) {
3385
+ if (originalSeqStore->longLengthPos < startIdx || originalSeqStore->longLengthPos > endIdx) {
3386
+ resultSeqStore->longLengthType = ZSTD_llt_none;
3387
+ } else {
3388
+ resultSeqStore->longLengthPos -= (U32)startIdx;
3389
+ }
3390
+ }
3391
+ resultSeqStore->sequencesStart = originalSeqStore->sequencesStart + startIdx;
3392
+ resultSeqStore->sequences = originalSeqStore->sequencesStart + endIdx;
3393
+ literalsBytes = ZSTD_countSeqStoreLiteralsBytes(resultSeqStore);
3394
+ resultSeqStore->litStart += literalsBytesPreceding;
3395
+ if (endIdx == (size_t)(originalSeqStore->sequences - originalSeqStore->sequencesStart)) {
3396
+ /* This accounts for possible last literals if the derived chunk reaches the end of the block */
3397
+ resultSeqStore->lit = litEnd;
3398
+ } else {
3399
+ resultSeqStore->lit = resultSeqStore->litStart+literalsBytes;
3400
+ }
3401
+ resultSeqStore->llCode += startIdx;
3402
+ resultSeqStore->mlCode += startIdx;
3403
+ resultSeqStore->ofCode += startIdx;
3404
+ }
3405
+
3406
+ /**
3407
+ * Returns the raw offset represented by the combination of offCode, ll0, and repcode history.
3408
+ * offCode must be an offCode representing a repcode, therefore in the range of [0, 2].
3409
+ */
3410
+ static U32 ZSTD_resolveRepcodeToRawOffset(const U32 rep[ZSTD_REP_NUM], const U32 offCode, const U32 ll0) {
3411
+ U32 const adjustedOffCode = offCode + ll0;
3412
+ assert(offCode < ZSTD_REP_NUM);
3413
+ if (adjustedOffCode == ZSTD_REP_NUM) {
3414
+ /* litlength == 0 and offCode == 2 implies selection of first repcode - 1 */
3415
+ assert(rep[0] > 0);
3416
+ return rep[0] - 1;
3417
+ }
3418
+ return rep[adjustedOffCode];
3419
+ }
3420
+
3421
+ /**
3422
+ * ZSTD_seqStore_resolveOffCodes() reconciles any possible divergences in offset history that may arise
3423
+ * due to emission of RLE/raw blocks that disturb the offset history, and replaces any repcodes within
3424
+ * the seqStore that may be invalid.
3425
+ *
3426
+ * dRepcodes are updated as would be on the decompression side. cRepcodes are updated exactly in
3427
+ * accordance with the seqStore.
3428
+ */
3429
+ static void ZSTD_seqStore_resolveOffCodes(repcodes_t* const dRepcodes, repcodes_t* const cRepcodes,
3430
+ seqStore_t* const seqStore, U32 const nbSeq) {
3431
+ U32 idx = 0;
3432
+ for (; idx < nbSeq; ++idx) {
3433
+ seqDef* const seq = seqStore->sequencesStart + idx;
3434
+ U32 const ll0 = (seq->litLength == 0);
3435
+ U32 offCode = seq->offset - 1;
3436
+ assert(seq->offset > 0);
3437
+ if (offCode <= ZSTD_REP_MOVE) {
3438
+ U32 const dRawOffset = ZSTD_resolveRepcodeToRawOffset(dRepcodes->rep, offCode, ll0);
3439
+ U32 const cRawOffset = ZSTD_resolveRepcodeToRawOffset(cRepcodes->rep, offCode, ll0);
3440
+ /* Adjust simulated decompression repcode history if we come across a mismatch. Replace
3441
+ * the repcode with the offset it actually references, determined by the compression
3442
+ * repcode history.
3443
+ */
3444
+ if (dRawOffset != cRawOffset) {
3445
+ seq->offset = cRawOffset + ZSTD_REP_NUM;
3446
+ }
3447
+ }
3448
+ /* Compression repcode history is always updated with values directly from the unmodified seqStore.
3449
+ * Decompression repcode history may use modified seq->offset value taken from compression repcode history.
3450
+ */
3451
+ *dRepcodes = ZSTD_updateRep(dRepcodes->rep, seq->offset - 1, ll0);
3452
+ *cRepcodes = ZSTD_updateRep(cRepcodes->rep, offCode, ll0);
3453
+ }
3454
+ }
3455
+
3456
+ /* ZSTD_compressSeqStore_singleBlock():
3457
+ * Compresses a seqStore into a block with a block header, into the buffer dst.
3458
+ *
3459
+ * Returns the total size of that block (including header) or a ZSTD error code.
3460
+ */
3461
+ static size_t ZSTD_compressSeqStore_singleBlock(ZSTD_CCtx* zc, seqStore_t* const seqStore,
3462
+ repcodes_t* const dRep, repcodes_t* const cRep,
3463
+ void* dst, size_t dstCapacity,
3464
+ const void* src, size_t srcSize,
3465
+ U32 lastBlock, U32 isPartition) {
3466
+ const U32 rleMaxLength = 25;
3467
+ BYTE* op = (BYTE*)dst;
3468
+ const BYTE* ip = (const BYTE*)src;
3469
+ size_t cSize;
3470
+ size_t cSeqsSize;
3471
+
3472
+ /* In case of an RLE or raw block, the simulated decompression repcode history must be reset */
3473
+ repcodes_t const dRepOriginal = *dRep;
3474
+ if (isPartition)
3475
+ ZSTD_seqStore_resolveOffCodes(dRep, cRep, seqStore, (U32)(seqStore->sequences - seqStore->sequencesStart));
3476
+
3477
+ cSeqsSize = ZSTD_entropyCompressSeqStore(seqStore,
3478
+ &zc->blockState.prevCBlock->entropy, &zc->blockState.nextCBlock->entropy,
3479
+ &zc->appliedParams,
3480
+ op + ZSTD_blockHeaderSize, dstCapacity - ZSTD_blockHeaderSize,
3481
+ srcSize,
3482
+ zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */,
3483
+ zc->bmi2);
3484
+ FORWARD_IF_ERROR(cSeqsSize, "ZSTD_entropyCompressSeqStore failed!");
3485
+
3486
+ if (!zc->isFirstBlock &&
3487
+ cSeqsSize < rleMaxLength &&
3488
+ ZSTD_isRLE((BYTE const*)src, srcSize)) {
3489
+ /* We don't want to emit our first block as a RLE even if it qualifies because
3490
+ * doing so will cause the decoder (cli only) to throw a "should consume all input error."
3491
+ * This is only an issue for zstd <= v1.4.3
3492
+ */
3493
+ cSeqsSize = 1;
3494
+ }
3495
+
3496
+ if (zc->seqCollector.collectSequences) {
3497
+ ZSTD_copyBlockSequences(zc);
3498
+ ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState);
3499
+ return 0;
3500
+ }
3501
+
3502
+ if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid)
3503
+ zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check;
3504
+
3505
+ if (cSeqsSize == 0) {
3506
+ cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, srcSize, lastBlock);
3507
+ FORWARD_IF_ERROR(cSize, "Nocompress block failed");
3508
+ DEBUGLOG(4, "Writing out nocompress block, size: %zu", cSize);
3509
+ *dRep = dRepOriginal; /* reset simulated decompression repcode history */
3510
+ } else if (cSeqsSize == 1) {
3511
+ cSize = ZSTD_rleCompressBlock(op, dstCapacity, *ip, srcSize, lastBlock);
3512
+ FORWARD_IF_ERROR(cSize, "RLE compress block failed");
3513
+ DEBUGLOG(4, "Writing out RLE block, size: %zu", cSize);
3514
+ *dRep = dRepOriginal; /* reset simulated decompression repcode history */
3515
+ } else {
3516
+ ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState);
3517
+ writeBlockHeader(op, cSeqsSize, srcSize, lastBlock);
3518
+ cSize = ZSTD_blockHeaderSize + cSeqsSize;
3519
+ DEBUGLOG(4, "Writing out compressed block, size: %zu", cSize);
3520
+ }
3521
+ return cSize;
3522
+ }
3523
+
3524
+ /* Struct to keep track of where we are in our recursive calls. */
3525
+ typedef struct {
3526
+ U32* splitLocations; /* Array of split indices */
3527
+ size_t idx; /* The current index within splitLocations being worked on */
3528
+ } seqStoreSplits;
3529
+
3530
+ #define MIN_SEQUENCES_BLOCK_SPLITTING 300
3531
+ #define MAX_NB_SPLITS 196
3532
+
3533
+ /* Helper function to perform the recursive search for block splits.
3534
+ * Estimates the cost of seqStore prior to split, and estimates the cost of splitting the sequences in half.
3535
+ * If advantageous to split, then we recurse down the two sub-blocks. If not, or if an error occurred in estimation, then
3536
+ * we do not recurse.
3537
+ *
3538
+ * Note: The recursion depth is capped by a heuristic minimum number of sequences, defined by MIN_SEQUENCES_BLOCK_SPLITTING.
3539
+ * In theory, this means the absolute largest recursion depth is 10 == log2(maxNbSeqInBlock/MIN_SEQUENCES_BLOCK_SPLITTING).
3540
+ * In practice, recursion depth usually doesn't go beyond 4.
3541
+ *
3542
+ * Furthermore, the number of splits is capped by MAX_NB_SPLITS. At MAX_NB_SPLITS == 196 with the current existing blockSize
3543
+ * maximum of 128 KB, this value is actually impossible to reach.
3544
+ */
3545
+ static void ZSTD_deriveBlockSplitsHelper(seqStoreSplits* splits, size_t startIdx, size_t endIdx,
3546
+ const ZSTD_CCtx* zc, const seqStore_t* origSeqStore) {
3547
+ seqStore_t fullSeqStoreChunk;
3548
+ seqStore_t firstHalfSeqStore;
3549
+ seqStore_t secondHalfSeqStore;
3550
+ size_t estimatedOriginalSize;
3551
+ size_t estimatedFirstHalfSize;
3552
+ size_t estimatedSecondHalfSize;
3553
+ size_t midIdx = (startIdx + endIdx)/2;
3554
+
3555
+ if (endIdx - startIdx < MIN_SEQUENCES_BLOCK_SPLITTING || splits->idx >= MAX_NB_SPLITS) {
3556
+ return;
3557
+ }
3558
+ ZSTD_deriveSeqStoreChunk(&fullSeqStoreChunk, origSeqStore, startIdx, endIdx);
3559
+ ZSTD_deriveSeqStoreChunk(&firstHalfSeqStore, origSeqStore, startIdx, midIdx);
3560
+ ZSTD_deriveSeqStoreChunk(&secondHalfSeqStore, origSeqStore, midIdx, endIdx);
3561
+ estimatedOriginalSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(&fullSeqStoreChunk, zc);
3562
+ estimatedFirstHalfSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(&firstHalfSeqStore, zc);
3563
+ estimatedSecondHalfSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(&secondHalfSeqStore, zc);
3564
+ DEBUGLOG(5, "Estimated original block size: %zu -- First half split: %zu -- Second half split: %zu",
3565
+ estimatedOriginalSize, estimatedFirstHalfSize, estimatedSecondHalfSize);
3566
+ if (ZSTD_isError(estimatedOriginalSize) || ZSTD_isError(estimatedFirstHalfSize) || ZSTD_isError(estimatedSecondHalfSize)) {
3567
+ return;
3568
+ }
3569
+ if (estimatedFirstHalfSize + estimatedSecondHalfSize < estimatedOriginalSize) {
3570
+ ZSTD_deriveBlockSplitsHelper(splits, startIdx, midIdx, zc, origSeqStore);
3571
+ splits->splitLocations[splits->idx] = (U32)midIdx;
3572
+ splits->idx++;
3573
+ ZSTD_deriveBlockSplitsHelper(splits, midIdx, endIdx, zc, origSeqStore);
3574
+ }
3575
+ }
3576
+
3577
+ /* Base recursive function. Populates a table with intra-block partition indices that can improve compression ratio.
3578
+ *
3579
+ * Returns the number of splits made (which equals the size of the partition table - 1).
3580
+ */
3581
+ static size_t ZSTD_deriveBlockSplits(ZSTD_CCtx* zc, U32 partitions[], U32 nbSeq) {
3582
+ seqStoreSplits splits = {partitions, 0};
3583
+ if (nbSeq <= 4) {
3584
+ DEBUGLOG(4, "ZSTD_deriveBlockSplits: Too few sequences to split");
3585
+ /* Refuse to try and split anything with less than 4 sequences */
3586
+ return 0;
3587
+ }
3588
+ ZSTD_deriveBlockSplitsHelper(&splits, 0, nbSeq, zc, &zc->seqStore);
3589
+ splits.splitLocations[splits.idx] = nbSeq;
3590
+ DEBUGLOG(5, "ZSTD_deriveBlockSplits: final nb partitions: %zu", splits.idx+1);
3591
+ return splits.idx;
3592
+ }
3593
+
3594
+ /* ZSTD_compressBlock_splitBlock():
3595
+ * Attempts to split a given block into multiple blocks to improve compression ratio.
3596
+ *
3597
+ * Returns combined size of all blocks (which includes headers), or a ZSTD error code.
3598
+ */
3599
+ static size_t ZSTD_compressBlock_splitBlock_internal(ZSTD_CCtx* zc, void* dst, size_t dstCapacity,
3600
+ const void* src, size_t blockSize, U32 lastBlock, U32 nbSeq) {
3601
+ size_t cSize = 0;
3602
+ const BYTE* ip = (const BYTE*)src;
3603
+ BYTE* op = (BYTE*)dst;
3604
+ U32 partitions[MAX_NB_SPLITS];
3605
+ size_t i = 0;
3606
+ size_t srcBytesTotal = 0;
3607
+ size_t numSplits = ZSTD_deriveBlockSplits(zc, partitions, nbSeq);
3608
+ seqStore_t nextSeqStore;
3609
+ seqStore_t currSeqStore;
3610
+
3611
+ /* If a block is split and some partitions are emitted as RLE/uncompressed, then repcode history
3612
+ * may become invalid. In order to reconcile potentially invalid repcodes, we keep track of two
3613
+ * separate repcode histories that simulate repcode history on compression and decompression side,
3614
+ * and use the histories to determine whether we must replace a particular repcode with its raw offset.
3615
+ *
3616
+ * 1) cRep gets updated for each partition, regardless of whether the block was emitted as uncompressed
3617
+ * or RLE. This allows us to retrieve the offset value that an invalid repcode references within
3618
+ * a nocompress/RLE block.
3619
+ * 2) dRep gets updated only for compressed partitions, and when a repcode gets replaced, will use
3620
+ * the replacement offset value rather than the original repcode to update the repcode history.
3621
+ * dRep also will be the final repcode history sent to the next block.
3622
+ *
3623
+ * See ZSTD_seqStore_resolveOffCodes() for more details.
3624
+ */
3625
+ repcodes_t dRep;
3626
+ repcodes_t cRep;
3627
+ ZSTD_memcpy(dRep.rep, zc->blockState.prevCBlock->rep, sizeof(repcodes_t));
3628
+ ZSTD_memcpy(cRep.rep, zc->blockState.prevCBlock->rep, sizeof(repcodes_t));
3629
+
3630
+ DEBUGLOG(4, "ZSTD_compressBlock_splitBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)",
3631
+ (unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit,
3632
+ (unsigned)zc->blockState.matchState.nextToUpdate);
3633
+
3634
+ if (numSplits == 0) {
3635
+ size_t cSizeSingleBlock = ZSTD_compressSeqStore_singleBlock(zc, &zc->seqStore,
3636
+ &dRep, &cRep,
3637
+ op, dstCapacity,
3638
+ ip, blockSize,
3639
+ lastBlock, 0 /* isPartition */);
3640
+ FORWARD_IF_ERROR(cSizeSingleBlock, "Compressing single block from splitBlock_internal() failed!");
3641
+ DEBUGLOG(5, "ZSTD_compressBlock_splitBlock_internal: No splits");
3642
+ assert(cSizeSingleBlock <= ZSTD_BLOCKSIZE_MAX + ZSTD_blockHeaderSize);
3643
+ return cSizeSingleBlock;
3644
+ }
3645
+
3646
+ ZSTD_deriveSeqStoreChunk(&currSeqStore, &zc->seqStore, 0, partitions[0]);
3647
+ for (i = 0; i <= numSplits; ++i) {
3648
+ size_t srcBytes;
3649
+ size_t cSizeChunk;
3650
+ U32 const lastPartition = (i == numSplits);
3651
+ U32 lastBlockEntireSrc = 0;
3652
+
3653
+ srcBytes = ZSTD_countSeqStoreLiteralsBytes(&currSeqStore) + ZSTD_countSeqStoreMatchBytes(&currSeqStore);
3654
+ srcBytesTotal += srcBytes;
3655
+ if (lastPartition) {
3656
+ /* This is the final partition, need to account for possible last literals */
3657
+ srcBytes += blockSize - srcBytesTotal;
3658
+ lastBlockEntireSrc = lastBlock;
3659
+ } else {
3660
+ ZSTD_deriveSeqStoreChunk(&nextSeqStore, &zc->seqStore, partitions[i], partitions[i+1]);
3661
+ }
3662
+
3663
+ cSizeChunk = ZSTD_compressSeqStore_singleBlock(zc, &currSeqStore,
3664
+ &dRep, &cRep,
3665
+ op, dstCapacity,
3666
+ ip, srcBytes,
3667
+ lastBlockEntireSrc, 1 /* isPartition */);
3668
+ DEBUGLOG(5, "Estimated size: %zu actual size: %zu", ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(&currSeqStore, zc), cSizeChunk);
3669
+ FORWARD_IF_ERROR(cSizeChunk, "Compressing chunk failed!");
3670
+
3671
+ ip += srcBytes;
3672
+ op += cSizeChunk;
3673
+ dstCapacity -= cSizeChunk;
3674
+ cSize += cSizeChunk;
3675
+ currSeqStore = nextSeqStore;
3676
+ assert(cSizeChunk <= ZSTD_BLOCKSIZE_MAX + ZSTD_blockHeaderSize);
3677
+ }
3678
+ /* cRep and dRep may have diverged during the compression. If so, we use the dRep repcodes
3679
+ * for the next block.
3680
+ */
3681
+ ZSTD_memcpy(zc->blockState.prevCBlock->rep, dRep.rep, sizeof(repcodes_t));
3682
+ return cSize;
3683
+ }
3684
+
3685
+ static size_t ZSTD_compressBlock_splitBlock(ZSTD_CCtx* zc,
3686
+ void* dst, size_t dstCapacity,
3687
+ const void* src, size_t srcSize, U32 lastBlock) {
3688
+ const BYTE* ip = (const BYTE*)src;
3689
+ BYTE* op = (BYTE*)dst;
3690
+ U32 nbSeq;
3691
+ size_t cSize;
3692
+ DEBUGLOG(4, "ZSTD_compressBlock_splitBlock");
3693
+
3694
+ { const size_t bss = ZSTD_buildSeqStore(zc, src, srcSize);
3695
+ FORWARD_IF_ERROR(bss, "ZSTD_buildSeqStore failed");
3696
+ if (bss == ZSTDbss_noCompress) {
3697
+ if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid)
3698
+ zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check;
3699
+ cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, srcSize, lastBlock);
3700
+ FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed");
3701
+ DEBUGLOG(4, "ZSTD_compressBlock_splitBlock: Nocompress block");
3702
+ return cSize;
3703
+ }
3704
+ nbSeq = (U32)(zc->seqStore.sequences - zc->seqStore.sequencesStart);
3705
+ }
3706
+
3707
+ assert(zc->appliedParams.splitBlocks == 1);
3708
+ cSize = ZSTD_compressBlock_splitBlock_internal(zc, dst, dstCapacity, src, srcSize, lastBlock, nbSeq);
3709
+ FORWARD_IF_ERROR(cSize, "Splitting blocks failed!");
3710
+ return cSize;
2694
3711
  }
2695
3712
 
2696
3713
  static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc,
@@ -2716,12 +3733,12 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc,
2716
3733
 
2717
3734
  if (zc->seqCollector.collectSequences) {
2718
3735
  ZSTD_copyBlockSequences(zc);
2719
- ZSTD_confirmRepcodesAndEntropyTables(zc);
3736
+ ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState);
2720
3737
  return 0;
2721
3738
  }
2722
3739
 
2723
3740
  /* encode sequences and literals */
2724
- cSize = ZSTD_entropyCompressSequences(&zc->seqStore,
3741
+ cSize = ZSTD_entropyCompressSeqStore(&zc->seqStore,
2725
3742
  &zc->blockState.prevCBlock->entropy, &zc->blockState.nextCBlock->entropy,
2726
3743
  &zc->appliedParams,
2727
3744
  dst, dstCapacity,
@@ -2750,7 +3767,7 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc,
2750
3767
 
2751
3768
  out:
2752
3769
  if (!ZSTD_isError(cSize) && cSize > 1) {
2753
- ZSTD_confirmRepcodesAndEntropyTables(zc);
3770
+ ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState);
2754
3771
  }
2755
3772
  /* We check that dictionaries have offset codes available for the first
2756
3773
  * block. After the first block, the offcode table might not have large
@@ -2803,7 +3820,7 @@ static size_t ZSTD_compressBlock_targetCBlockSize_body(ZSTD_CCtx* zc,
2803
3820
  size_t const maxCSize = srcSize - ZSTD_minGain(srcSize, zc->appliedParams.cParams.strategy);
2804
3821
  FORWARD_IF_ERROR(cSize, "ZSTD_compressSuperBlock failed");
2805
3822
  if (cSize != 0 && cSize < maxCSize + ZSTD_blockHeaderSize) {
2806
- ZSTD_confirmRepcodesAndEntropyTables(zc);
3823
+ ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState);
2807
3824
  return cSize;
2808
3825
  }
2809
3826
  }
@@ -2843,9 +3860,9 @@ static void ZSTD_overflowCorrectIfNeeded(ZSTD_matchState_t* ms,
2843
3860
  void const* ip,
2844
3861
  void const* iend)
2845
3862
  {
2846
- if (ZSTD_window_needOverflowCorrection(ms->window, iend)) {
2847
- U32 const maxDist = (U32)1 << params->cParams.windowLog;
2848
- U32 const cycleLog = ZSTD_cycleLog(params->cParams.chainLog, params->cParams.strategy);
3863
+ U32 const cycleLog = ZSTD_cycleLog(params->cParams.chainLog, params->cParams.strategy);
3864
+ U32 const maxDist = (U32)1 << params->cParams.windowLog;
3865
+ if (ZSTD_window_needOverflowCorrection(ms->window, cycleLog, maxDist, ms->loadedDictEnd, ip, iend)) {
2849
3866
  U32 const correction = ZSTD_window_correctOverflow(&ms->window, cycleLog, maxDist, ip);
2850
3867
  ZSTD_STATIC_ASSERT(ZSTD_CHAINLOG_MAX <= 30);
2851
3868
  ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX_32 <= 30);
@@ -2868,7 +3885,7 @@ static void ZSTD_overflowCorrectIfNeeded(ZSTD_matchState_t* ms,
2868
3885
  * Frame is supposed already started (header already produced)
2869
3886
  * @return : compressed size, or an error code
2870
3887
  */
2871
- static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx,
3888
+ static size_t ZSTD_compress_frameChunk(ZSTD_CCtx* cctx,
2872
3889
  void* dst, size_t dstCapacity,
2873
3890
  const void* src, size_t srcSize,
2874
3891
  U32 lastFrameChunk)
@@ -2908,6 +3925,10 @@ static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx,
2908
3925
  FORWARD_IF_ERROR(cSize, "ZSTD_compressBlock_targetCBlockSize failed");
2909
3926
  assert(cSize > 0);
2910
3927
  assert(cSize <= blockSize + ZSTD_blockHeaderSize);
3928
+ } else if (ZSTD_blockSplitterEnabled(&cctx->appliedParams)) {
3929
+ cSize = ZSTD_compressBlock_splitBlock(cctx, op, dstCapacity, ip, blockSize, lastBlock);
3930
+ FORWARD_IF_ERROR(cSize, "ZSTD_compressBlock_splitBlock failed");
3931
+ assert(cSize > 0 || cctx->seqCollector.collectSequences == 1);
2911
3932
  } else {
2912
3933
  cSize = ZSTD_compressBlock_internal(cctx,
2913
3934
  op+ZSTD_blockHeaderSize, dstCapacity-ZSTD_blockHeaderSize,
@@ -3063,11 +4084,12 @@ static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* cctx,
3063
4084
 
3064
4085
  if (!srcSize) return fhSize; /* do not generate an empty block if no input */
3065
4086
 
3066
- if (!ZSTD_window_update(&ms->window, src, srcSize)) {
4087
+ if (!ZSTD_window_update(&ms->window, src, srcSize, ms->forceNonContiguous)) {
4088
+ ms->forceNonContiguous = 0;
3067
4089
  ms->nextToUpdate = ms->window.dictLimit;
3068
4090
  }
3069
4091
  if (cctx->appliedParams.ldmParams.enableLdm) {
3070
- ZSTD_window_update(&cctx->ldmState.window, src, srcSize);
4092
+ ZSTD_window_update(&cctx->ldmState.window, src, srcSize, /* forceNonContiguous */ 0);
3071
4093
  }
3072
4094
 
3073
4095
  if (!frame) {
@@ -3135,63 +4157,86 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms,
3135
4157
  {
3136
4158
  const BYTE* ip = (const BYTE*) src;
3137
4159
  const BYTE* const iend = ip + srcSize;
4160
+ int const loadLdmDict = params->ldmParams.enableLdm && ls != NULL;
4161
+
4162
+ /* Assert that we the ms params match the params we're being given */
4163
+ ZSTD_assertEqualCParams(params->cParams, ms->cParams);
3138
4164
 
3139
- ZSTD_window_update(&ms->window, src, srcSize);
4165
+ if (srcSize > ZSTD_CHUNKSIZE_MAX) {
4166
+ /* Allow the dictionary to set indices up to exactly ZSTD_CURRENT_MAX.
4167
+ * Dictionaries right at the edge will immediately trigger overflow
4168
+ * correction, but I don't want to insert extra constraints here.
4169
+ */
4170
+ U32 const maxDictSize = ZSTD_CURRENT_MAX - 1;
4171
+ /* We must have cleared our windows when our source is this large. */
4172
+ assert(ZSTD_window_isEmpty(ms->window));
4173
+ if (loadLdmDict)
4174
+ assert(ZSTD_window_isEmpty(ls->window));
4175
+ /* If the dictionary is too large, only load the suffix of the dictionary. */
4176
+ if (srcSize > maxDictSize) {
4177
+ ip = iend - maxDictSize;
4178
+ src = ip;
4179
+ srcSize = maxDictSize;
4180
+ }
4181
+ }
4182
+
4183
+ DEBUGLOG(4, "ZSTD_loadDictionaryContent(): useRowMatchFinder=%d", (int)params->useRowMatchFinder);
4184
+ ZSTD_window_update(&ms->window, src, srcSize, /* forceNonContiguous */ 0);
3140
4185
  ms->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ms->window.base);
4186
+ ms->forceNonContiguous = params->deterministicRefPrefix;
3141
4187
 
3142
- if (params->ldmParams.enableLdm && ls != NULL) {
3143
- ZSTD_window_update(&ls->window, src, srcSize);
4188
+ if (loadLdmDict) {
4189
+ ZSTD_window_update(&ls->window, src, srcSize, /* forceNonContiguous */ 0);
3144
4190
  ls->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ls->window.base);
3145
4191
  }
3146
4192
 
3147
- /* Assert that we the ms params match the params we're being given */
3148
- ZSTD_assertEqualCParams(params->cParams, ms->cParams);
3149
-
3150
4193
  if (srcSize <= HASH_READ_SIZE) return 0;
3151
4194
 
3152
- while (iend - ip > HASH_READ_SIZE) {
3153
- size_t const remaining = (size_t)(iend - ip);
3154
- size_t const chunk = MIN(remaining, ZSTD_CHUNKSIZE_MAX);
3155
- const BYTE* const ichunk = ip + chunk;
3156
-
3157
- ZSTD_overflowCorrectIfNeeded(ms, ws, params, ip, ichunk);
4195
+ ZSTD_overflowCorrectIfNeeded(ms, ws, params, ip, iend);
3158
4196
 
3159
- if (params->ldmParams.enableLdm && ls != NULL)
3160
- ZSTD_ldm_fillHashTable(ls, (const BYTE*)src, (const BYTE*)src + srcSize, &params->ldmParams);
4197
+ if (loadLdmDict)
4198
+ ZSTD_ldm_fillHashTable(ls, ip, iend, &params->ldmParams);
3161
4199
 
3162
- switch(params->cParams.strategy)
3163
- {
3164
- case ZSTD_fast:
3165
- ZSTD_fillHashTable(ms, ichunk, dtlm);
3166
- break;
3167
- case ZSTD_dfast:
3168
- ZSTD_fillDoubleHashTable(ms, ichunk, dtlm);
3169
- break;
4200
+ switch(params->cParams.strategy)
4201
+ {
4202
+ case ZSTD_fast:
4203
+ ZSTD_fillHashTable(ms, iend, dtlm);
4204
+ break;
4205
+ case ZSTD_dfast:
4206
+ ZSTD_fillDoubleHashTable(ms, iend, dtlm);
4207
+ break;
3170
4208
 
3171
- case ZSTD_greedy:
3172
- case ZSTD_lazy:
3173
- case ZSTD_lazy2:
3174
- if (chunk >= HASH_READ_SIZE && ms->dedicatedDictSearch) {
3175
- assert(chunk == remaining); /* must load everything in one go */
3176
- ZSTD_dedicatedDictSearch_lazy_loadDictionary(ms, ichunk-HASH_READ_SIZE);
3177
- } else if (chunk >= HASH_READ_SIZE) {
3178
- ZSTD_insertAndFindFirstIndex(ms, ichunk-HASH_READ_SIZE);
4209
+ case ZSTD_greedy:
4210
+ case ZSTD_lazy:
4211
+ case ZSTD_lazy2:
4212
+ assert(srcSize >= HASH_READ_SIZE);
4213
+ if (ms->dedicatedDictSearch) {
4214
+ assert(ms->chainTable != NULL);
4215
+ ZSTD_dedicatedDictSearch_lazy_loadDictionary(ms, iend-HASH_READ_SIZE);
4216
+ } else {
4217
+ assert(params->useRowMatchFinder != ZSTD_urm_auto);
4218
+ if (params->useRowMatchFinder == ZSTD_urm_enableRowMatchFinder) {
4219
+ size_t const tagTableSize = ((size_t)1 << params->cParams.hashLog) * sizeof(U16);
4220
+ ZSTD_memset(ms->tagTable, 0, tagTableSize);
4221
+ ZSTD_row_update(ms, iend-HASH_READ_SIZE);
4222
+ DEBUGLOG(4, "Using row-based hash table for lazy dict");
4223
+ } else {
4224
+ ZSTD_insertAndFindFirstIndex(ms, iend-HASH_READ_SIZE);
4225
+ DEBUGLOG(4, "Using chain-based hash table for lazy dict");
3179
4226
  }
3180
- break;
3181
-
3182
- case ZSTD_btlazy2: /* we want the dictionary table fully sorted */
3183
- case ZSTD_btopt:
3184
- case ZSTD_btultra:
3185
- case ZSTD_btultra2:
3186
- if (chunk >= HASH_READ_SIZE)
3187
- ZSTD_updateTree(ms, ichunk-HASH_READ_SIZE, ichunk);
3188
- break;
3189
-
3190
- default:
3191
- assert(0); /* not possible : not a valid strategy id */
3192
4227
  }
4228
+ break;
4229
+
4230
+ case ZSTD_btlazy2: /* we want the dictionary table fully sorted */
4231
+ case ZSTD_btopt:
4232
+ case ZSTD_btultra:
4233
+ case ZSTD_btultra2:
4234
+ assert(srcSize >= HASH_READ_SIZE);
4235
+ ZSTD_updateTree(ms, iend-HASH_READ_SIZE, iend);
4236
+ break;
3193
4237
 
3194
- ip = ichunk;
4238
+ default:
4239
+ assert(0); /* not possible : not a valid strategy id */
3195
4240
  }
3196
4241
 
3197
4242
  ms->nextToUpdate = (U32)(iend - ms->window.base);
@@ -3330,7 +4375,6 @@ static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs,
3330
4375
  const BYTE* const dictEnd = dictPtr + dictSize;
3331
4376
  size_t dictID;
3332
4377
  size_t eSize;
3333
-
3334
4378
  ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<<MAX(MLFSELog,LLFSELog)));
3335
4379
  assert(dictSize >= 8);
3336
4380
  assert(MEM_readLE32(dictPtr) == ZSTD_MAGIC_DICTIONARY);
@@ -3401,8 +4445,9 @@ static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx,
3401
4445
  const ZSTD_CCtx_params* params, U64 pledgedSrcSize,
3402
4446
  ZSTD_buffered_policy_e zbuff)
3403
4447
  {
4448
+ size_t const dictContentSize = cdict ? cdict->dictContentSize : dictSize;
3404
4449
  #if ZSTD_TRACE
3405
- cctx->traceCtx = ZSTD_trace_compress_begin(cctx);
4450
+ cctx->traceCtx = (ZSTD_trace_compress_begin != NULL) ? ZSTD_trace_compress_begin(cctx) : 0;
3406
4451
  #endif
3407
4452
  DEBUGLOG(4, "ZSTD_compressBegin_internal: wlog=%u", params->cParams.windowLog);
3408
4453
  /* params are supposed to be fully validated at this point */
@@ -3418,7 +4463,8 @@ static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx,
3418
4463
  return ZSTD_resetCCtx_usingCDict(cctx, cdict, params, pledgedSrcSize, zbuff);
3419
4464
  }
3420
4465
 
3421
- FORWARD_IF_ERROR( ZSTD_resetCCtx_internal(cctx, *params, pledgedSrcSize,
4466
+ FORWARD_IF_ERROR( ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize,
4467
+ dictContentSize,
3422
4468
  ZSTDcrp_makeClean, zbuff) , "");
3423
4469
  { size_t const dictID = cdict ?
3424
4470
  ZSTD_compress_insertDictionary(
@@ -3433,7 +4479,7 @@ static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx,
3433
4479
  FORWARD_IF_ERROR(dictID, "ZSTD_compress_insertDictionary failed");
3434
4480
  assert(dictID <= UINT_MAX);
3435
4481
  cctx->dictID = (U32)dictID;
3436
- cctx->dictContentSize = cdict ? cdict->dictContentSize : dictSize;
4482
+ cctx->dictContentSize = dictContentSize;
3437
4483
  }
3438
4484
  return 0;
3439
4485
  }
@@ -3533,7 +4579,7 @@ static size_t ZSTD_writeEpilogue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity)
3533
4579
  void ZSTD_CCtx_trace(ZSTD_CCtx* cctx, size_t extraCSize)
3534
4580
  {
3535
4581
  #if ZSTD_TRACE
3536
- if (cctx->traceCtx) {
4582
+ if (cctx->traceCtx && ZSTD_trace_compress_end != NULL) {
3537
4583
  int const streaming = cctx->inBuffSize > 0 || cctx->outBuffSize > 0 || cctx->appliedParams.nbWorkers > 0;
3538
4584
  ZSTD_Trace trace;
3539
4585
  ZSTD_memset(&trace, 0, sizeof(trace));
@@ -3586,15 +4632,14 @@ size_t ZSTD_compress_advanced (ZSTD_CCtx* cctx,
3586
4632
  const void* dict,size_t dictSize,
3587
4633
  ZSTD_parameters params)
3588
4634
  {
3589
- ZSTD_CCtx_params cctxParams;
3590
4635
  DEBUGLOG(4, "ZSTD_compress_advanced");
3591
4636
  FORWARD_IF_ERROR(ZSTD_checkCParams(params.cParams), "");
3592
- ZSTD_CCtxParams_init_internal(&cctxParams, &params, ZSTD_NO_CLEVEL);
4637
+ ZSTD_CCtxParams_init_internal(&cctx->simpleApiParams, &params, ZSTD_NO_CLEVEL);
3593
4638
  return ZSTD_compress_advanced_internal(cctx,
3594
4639
  dst, dstCapacity,
3595
4640
  src, srcSize,
3596
4641
  dict, dictSize,
3597
- &cctxParams);
4642
+ &cctx->simpleApiParams);
3598
4643
  }
3599
4644
 
3600
4645
  /* Internal */
@@ -3618,14 +4663,13 @@ size_t ZSTD_compress_usingDict(ZSTD_CCtx* cctx,
3618
4663
  const void* dict, size_t dictSize,
3619
4664
  int compressionLevel)
3620
4665
  {
3621
- ZSTD_CCtx_params cctxParams;
3622
4666
  {
3623
4667
  ZSTD_parameters const params = ZSTD_getParams_internal(compressionLevel, srcSize, dict ? dictSize : 0, ZSTD_cpm_noAttachDict);
3624
4668
  assert(params.fParams.contentSizeFlag == 1);
3625
- ZSTD_CCtxParams_init_internal(&cctxParams, &params, (compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT: compressionLevel);
4669
+ ZSTD_CCtxParams_init_internal(&cctx->simpleApiParams, &params, (compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT: compressionLevel);
3626
4670
  }
3627
4671
  DEBUGLOG(4, "ZSTD_compress_usingDict (srcSize=%u)", (unsigned)srcSize);
3628
- return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, dict, dictSize, &cctxParams);
4672
+ return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, dict, dictSize, &cctx->simpleApiParams);
3629
4673
  }
3630
4674
 
3631
4675
  size_t ZSTD_compressCCtx(ZSTD_CCtx* cctx,
@@ -3669,7 +4713,10 @@ size_t ZSTD_estimateCDictSize_advanced(
3669
4713
  DEBUGLOG(5, "sizeof(ZSTD_CDict) : %u", (unsigned)sizeof(ZSTD_CDict));
3670
4714
  return ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict))
3671
4715
  + ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE)
3672
- + ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0)
4716
+ /* enableDedicatedDictSearch == 1 ensures that CDict estimation will not be too small
4717
+ * in case we are using DDS with row-hash. */
4718
+ + ZSTD_sizeof_matchState(&cParams, ZSTD_resolveRowMatchFinderMode(ZSTD_urm_auto, &cParams),
4719
+ /* enableDedicatedDictSearch */ 1, /* forCCtx */ 0)
3673
4720
  + (dictLoadMethod == ZSTD_dlm_byRef ? 0
3674
4721
  : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void *))));
3675
4722
  }
@@ -3700,9 +4747,6 @@ static size_t ZSTD_initCDict_internal(
3700
4747
  assert(!ZSTD_checkCParams(params.cParams));
3701
4748
  cdict->matchState.cParams = params.cParams;
3702
4749
  cdict->matchState.dedicatedDictSearch = params.enableDedicatedDictSearch;
3703
- if (cdict->matchState.dedicatedDictSearch && dictSize > ZSTD_CHUNKSIZE_MAX) {
3704
- cdict->matchState.dedicatedDictSearch = 0;
3705
- }
3706
4750
  if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dictBuffer) || (!dictSize)) {
3707
4751
  cdict->dictContent = dictBuffer;
3708
4752
  } else {
@@ -3723,6 +4767,7 @@ static size_t ZSTD_initCDict_internal(
3723
4767
  &cdict->matchState,
3724
4768
  &cdict->workspace,
3725
4769
  &params.cParams,
4770
+ params.useRowMatchFinder,
3726
4771
  ZSTDcrp_makeClean,
3727
4772
  ZSTDirp_reset,
3728
4773
  ZSTD_resetTarget_CDict), "");
@@ -3746,14 +4791,17 @@ static size_t ZSTD_initCDict_internal(
3746
4791
 
3747
4792
  static ZSTD_CDict* ZSTD_createCDict_advanced_internal(size_t dictSize,
3748
4793
  ZSTD_dictLoadMethod_e dictLoadMethod,
3749
- ZSTD_compressionParameters cParams, ZSTD_customMem customMem)
4794
+ ZSTD_compressionParameters cParams,
4795
+ ZSTD_useRowMatchFinderMode_e useRowMatchFinder,
4796
+ U32 enableDedicatedDictSearch,
4797
+ ZSTD_customMem customMem)
3750
4798
  {
3751
4799
  if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL;
3752
4800
 
3753
4801
  { size_t const workspaceSize =
3754
4802
  ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict)) +
3755
4803
  ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE) +
3756
- ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0) +
4804
+ ZSTD_sizeof_matchState(&cParams, useRowMatchFinder, enableDedicatedDictSearch, /* forCCtx */ 0) +
3757
4805
  (dictLoadMethod == ZSTD_dlm_byRef ? 0
3758
4806
  : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void*))));
3759
4807
  void* const workspace = ZSTD_customMalloc(workspaceSize, customMem);
@@ -3772,7 +4820,7 @@ static ZSTD_CDict* ZSTD_createCDict_advanced_internal(size_t dictSize,
3772
4820
  ZSTD_cwksp_move(&cdict->workspace, &ws);
3773
4821
  cdict->customMem = customMem;
3774
4822
  cdict->compressionLevel = ZSTD_NO_CLEVEL; /* signals advanced API usage */
3775
-
4823
+ cdict->useRowMatchFinder = useRowMatchFinder;
3776
4824
  return cdict;
3777
4825
  }
3778
4826
  }
@@ -3824,10 +4872,13 @@ ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced2(
3824
4872
  &cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict);
3825
4873
  }
3826
4874
 
4875
+ DEBUGLOG(3, "ZSTD_createCDict_advanced2: DDS: %u", cctxParams.enableDedicatedDictSearch);
3827
4876
  cctxParams.cParams = cParams;
4877
+ cctxParams.useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(cctxParams.useRowMatchFinder, &cParams);
3828
4878
 
3829
4879
  cdict = ZSTD_createCDict_advanced_internal(dictSize,
3830
4880
  dictLoadMethod, cctxParams.cParams,
4881
+ cctxParams.useRowMatchFinder, cctxParams.enableDedicatedDictSearch,
3831
4882
  customMem);
3832
4883
 
3833
4884
  if (ZSTD_isError( ZSTD_initCDict_internal(cdict,
@@ -3896,7 +4947,9 @@ const ZSTD_CDict* ZSTD_initStaticCDict(
3896
4947
  ZSTD_dictContentType_e dictContentType,
3897
4948
  ZSTD_compressionParameters cParams)
3898
4949
  {
3899
- size_t const matchStateSize = ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0);
4950
+ ZSTD_useRowMatchFinderMode_e const useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(ZSTD_urm_auto, &cParams);
4951
+ /* enableDedicatedDictSearch == 1 ensures matchstate is not too small in case this CDict will be used for DDS + row hash */
4952
+ size_t const matchStateSize = ZSTD_sizeof_matchState(&cParams, useRowMatchFinder, /* enableDedicatedDictSearch */ 1, /* forCCtx */ 0);
3900
4953
  size_t const neededSize = ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict))
3901
4954
  + (dictLoadMethod == ZSTD_dlm_byRef ? 0
3902
4955
  : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void*))))
@@ -3921,6 +4974,8 @@ const ZSTD_CDict* ZSTD_initStaticCDict(
3921
4974
 
3922
4975
  ZSTD_CCtxParams_init(&params, 0);
3923
4976
  params.cParams = cParams;
4977
+ params.useRowMatchFinder = useRowMatchFinder;
4978
+ cdict->useRowMatchFinder = useRowMatchFinder;
3924
4979
 
3925
4980
  if (ZSTD_isError( ZSTD_initCDict_internal(cdict,
3926
4981
  dict, dictSize,
@@ -3947,15 +5002,15 @@ unsigned ZSTD_getDictID_fromCDict(const ZSTD_CDict* cdict)
3947
5002
  return cdict->dictID;
3948
5003
  }
3949
5004
 
3950
-
3951
- /* ZSTD_compressBegin_usingCDict_advanced() :
3952
- * cdict must be != NULL */
3953
- size_t ZSTD_compressBegin_usingCDict_advanced(
5005
+ /* ZSTD_compressBegin_usingCDict_internal() :
5006
+ * Implementation of various ZSTD_compressBegin_usingCDict* functions.
5007
+ */
5008
+ static size_t ZSTD_compressBegin_usingCDict_internal(
3954
5009
  ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict,
3955
5010
  ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize)
3956
5011
  {
3957
5012
  ZSTD_CCtx_params cctxParams;
3958
- DEBUGLOG(4, "ZSTD_compressBegin_usingCDict_advanced");
5013
+ DEBUGLOG(4, "ZSTD_compressBegin_usingCDict_internal");
3959
5014
  RETURN_ERROR_IF(cdict==NULL, dictionary_wrong, "NULL pointer!");
3960
5015
  /* Initialize the cctxParams from the cdict */
3961
5016
  {
@@ -3987,25 +5042,48 @@ size_t ZSTD_compressBegin_usingCDict_advanced(
3987
5042
  ZSTDb_not_buffered);
3988
5043
  }
3989
5044
 
5045
+
5046
+ /* ZSTD_compressBegin_usingCDict_advanced() :
5047
+ * This function is DEPRECATED.
5048
+ * cdict must be != NULL */
5049
+ size_t ZSTD_compressBegin_usingCDict_advanced(
5050
+ ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict,
5051
+ ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize)
5052
+ {
5053
+ return ZSTD_compressBegin_usingCDict_internal(cctx, cdict, fParams, pledgedSrcSize);
5054
+ }
5055
+
3990
5056
  /* ZSTD_compressBegin_usingCDict() :
3991
- * pledgedSrcSize=0 means "unknown"
3992
- * if pledgedSrcSize>0, it will enable contentSizeFlag */
5057
+ * cdict must be != NULL */
3993
5058
  size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict)
3994
5059
  {
3995
5060
  ZSTD_frameParameters const fParams = { 0 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ };
3996
- DEBUGLOG(4, "ZSTD_compressBegin_usingCDict : dictIDFlag == %u", !fParams.noDictIDFlag);
3997
- return ZSTD_compressBegin_usingCDict_advanced(cctx, cdict, fParams, ZSTD_CONTENTSIZE_UNKNOWN);
5061
+ return ZSTD_compressBegin_usingCDict_internal(cctx, cdict, fParams, ZSTD_CONTENTSIZE_UNKNOWN);
3998
5062
  }
3999
5063
 
4000
- size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx,
5064
+ /*! ZSTD_compress_usingCDict_internal():
5065
+ * Implementation of various ZSTD_compress_usingCDict* functions.
5066
+ */
5067
+ static size_t ZSTD_compress_usingCDict_internal(ZSTD_CCtx* cctx,
4001
5068
  void* dst, size_t dstCapacity,
4002
5069
  const void* src, size_t srcSize,
4003
5070
  const ZSTD_CDict* cdict, ZSTD_frameParameters fParams)
4004
5071
  {
4005
- FORWARD_IF_ERROR(ZSTD_compressBegin_usingCDict_advanced(cctx, cdict, fParams, srcSize), ""); /* will check if cdict != NULL */
5072
+ FORWARD_IF_ERROR(ZSTD_compressBegin_usingCDict_internal(cctx, cdict, fParams, srcSize), ""); /* will check if cdict != NULL */
4006
5073
  return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize);
4007
5074
  }
4008
5075
 
5076
+ /*! ZSTD_compress_usingCDict_advanced():
5077
+ * This function is DEPRECATED.
5078
+ */
5079
+ size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx,
5080
+ void* dst, size_t dstCapacity,
5081
+ const void* src, size_t srcSize,
5082
+ const ZSTD_CDict* cdict, ZSTD_frameParameters fParams)
5083
+ {
5084
+ return ZSTD_compress_usingCDict_internal(cctx, dst, dstCapacity, src, srcSize, cdict, fParams);
5085
+ }
5086
+
4009
5087
  /*! ZSTD_compress_usingCDict() :
4010
5088
  * Compression using a digested Dictionary.
4011
5089
  * Faster startup than ZSTD_compress_usingDict(), recommended when same dictionary is used multiple times.
@@ -4017,7 +5095,7 @@ size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx,
4017
5095
  const ZSTD_CDict* cdict)
4018
5096
  {
4019
5097
  ZSTD_frameParameters const fParams = { 1 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ };
4020
- return ZSTD_compress_usingCDict_advanced(cctx, dst, dstCapacity, src, srcSize, cdict, fParams);
5098
+ return ZSTD_compress_usingCDict_internal(cctx, dst, dstCapacity, src, srcSize, cdict, fParams);
4021
5099
  }
4022
5100
 
4023
5101
 
@@ -4427,8 +5505,13 @@ static size_t ZSTD_CCtx_init_compressStream2(ZSTD_CCtx* cctx,
4427
5505
  FORWARD_IF_ERROR( ZSTD_initLocalDict(cctx) , ""); /* Init the local dict if present. */
4428
5506
  ZSTD_memset(&cctx->prefixDict, 0, sizeof(cctx->prefixDict)); /* single usage */
4429
5507
  assert(prefixDict.dict==NULL || cctx->cdict==NULL); /* only one can be set */
4430
- if (cctx->cdict)
4431
- params.compressionLevel = cctx->cdict->compressionLevel; /* let cdict take priority in terms of compression level */
5508
+ if (cctx->cdict && !cctx->localDict.cdict) {
5509
+ /* Let the cdict's compression level take priority over the requested params.
5510
+ * But do not take the cdict's compression level if the "cdict" is actually a localDict
5511
+ * generated from ZSTD_initLocalDict().
5512
+ */
5513
+ params.compressionLevel = cctx->cdict->compressionLevel;
5514
+ }
4432
5515
  DEBUGLOG(4, "ZSTD_compressStream2 : transparent init stage");
4433
5516
  if (endOp == ZSTD_e_end) cctx->pledgedSrcSizePlusOne = inSize + 1; /* auto-fix pledgedSrcSize */
4434
5517
  {
@@ -4447,13 +5530,20 @@ static size_t ZSTD_CCtx_init_compressStream2(ZSTD_CCtx* cctx,
4447
5530
  params.ldmParams.enableLdm = 1;
4448
5531
  }
4449
5532
 
5533
+ if (ZSTD_CParams_useBlockSplitter(&params.cParams)) {
5534
+ DEBUGLOG(4, "Block splitter enabled by default (window size >= 128K, strategy >= btopt)");
5535
+ params.splitBlocks = 1;
5536
+ }
5537
+
5538
+ params.useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(params.useRowMatchFinder, &params.cParams);
5539
+
4450
5540
  #ifdef ZSTD_MULTITHREAD
4451
5541
  if ((cctx->pledgedSrcSizePlusOne-1) <= ZSTDMT_JOBSIZE_MIN) {
4452
5542
  params.nbWorkers = 0; /* do not invoke multi-threading when src size is too small */
4453
5543
  }
4454
5544
  if (params.nbWorkers > 0) {
4455
5545
  #if ZSTD_TRACE
4456
- cctx->traceCtx = ZSTD_trace_compress_begin(cctx);
5546
+ cctx->traceCtx = (ZSTD_trace_compress_begin != NULL) ? ZSTD_trace_compress_begin(cctx) : 0;
4457
5547
  #endif
4458
5548
  /* mt context creation */
4459
5549
  if (cctx->mtctx == NULL) {
@@ -4921,7 +6011,7 @@ static size_t ZSTD_compressSequences_internal(ZSTD_CCtx* cctx,
4921
6011
  continue;
4922
6012
  }
4923
6013
 
4924
- compressedSeqsSize = ZSTD_entropyCompressSequences(&cctx->seqStore,
6014
+ compressedSeqsSize = ZSTD_entropyCompressSeqStore(&cctx->seqStore,
4925
6015
  &cctx->blockState.prevCBlock->entropy, &cctx->blockState.nextCBlock->entropy,
4926
6016
  &cctx->appliedParams,
4927
6017
  op + ZSTD_blockHeaderSize /* Leave space for block header */, dstCapacity - ZSTD_blockHeaderSize,
@@ -4953,7 +6043,7 @@ static size_t ZSTD_compressSequences_internal(ZSTD_CCtx* cctx,
4953
6043
  } else {
4954
6044
  U32 cBlockHeader;
4955
6045
  /* Error checking and repcodes update */
4956
- ZSTD_confirmRepcodesAndEntropyTables(cctx);
6046
+ ZSTD_blockState_confirmRepcodesAndEntropyTables(&cctx->blockState);
4957
6047
  if (cctx->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid)
4958
6048
  cctx->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check;
4959
6049
 
@@ -5054,6 +6144,7 @@ size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output)
5054
6144
  #define ZSTD_MAX_CLEVEL 22
5055
6145
  int ZSTD_maxCLevel(void) { return ZSTD_MAX_CLEVEL; }
5056
6146
  int ZSTD_minCLevel(void) { return (int)-ZSTD_TARGETLENGTH_MAX; }
6147
+ int ZSTD_defaultCLevel(void) { return ZSTD_CLEVEL_DEFAULT; }
5057
6148
 
5058
6149
  static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEVEL+1] = {
5059
6150
  { /* "default" - for any srcSize > 256 KB */
@@ -5186,7 +6277,10 @@ static ZSTD_compressionParameters ZSTD_dedicatedDictSearch_getCParams(int const
5186
6277
  static int ZSTD_dedicatedDictSearch_isSupported(
5187
6278
  ZSTD_compressionParameters const* cParams)
5188
6279
  {
5189
- return (cParams->strategy >= ZSTD_greedy) && (cParams->strategy <= ZSTD_lazy2);
6280
+ return (cParams->strategy >= ZSTD_greedy)
6281
+ && (cParams->strategy <= ZSTD_lazy2)
6282
+ && (cParams->hashLog > cParams->chainLog)
6283
+ && (cParams->chainLog <= 24);
5190
6284
  }
5191
6285
 
5192
6286
  /**
@@ -5204,6 +6298,9 @@ static void ZSTD_dedicatedDictSearch_revertCParams(
5204
6298
  case ZSTD_lazy:
5205
6299
  case ZSTD_lazy2:
5206
6300
  cParams->hashLog -= ZSTD_LAZY_DDSS_BUCKET_LOG;
6301
+ if (cParams->hashLog < ZSTD_HASHLOG_MIN) {
6302
+ cParams->hashLog = ZSTD_HASHLOG_MIN;
6303
+ }
5207
6304
  break;
5208
6305
  case ZSTD_btlazy2:
5209
6306
  case ZSTD_btopt:
@@ -5252,6 +6349,7 @@ static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel,
5252
6349
  else row = compressionLevel;
5253
6350
 
5254
6351
  { ZSTD_compressionParameters cp = ZSTD_defaultCParameters[tableID][row];
6352
+ DEBUGLOG(5, "ZSTD_getCParams_internal selected tableID: %u row: %u strat: %u", tableID, row, (U32)cp.strategy);
5255
6353
  /* acceleration factor */
5256
6354
  if (compressionLevel < 0) {
5257
6355
  int const clampedCompressionLevel = MAX(ZSTD_minCLevel(), compressionLevel);