extzstd 0.3 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (107) hide show
  1. checksums.yaml +4 -4
  2. data/HISTORY.ja.md +8 -0
  3. data/README.md +1 -1
  4. data/contrib/zstd/CHANGELOG +94 -0
  5. data/contrib/zstd/CONTRIBUTING.md +351 -1
  6. data/contrib/zstd/Makefile +32 -10
  7. data/contrib/zstd/README.md +33 -10
  8. data/contrib/zstd/TESTING.md +2 -2
  9. data/contrib/zstd/appveyor.yml +42 -4
  10. data/contrib/zstd/lib/Makefile +128 -60
  11. data/contrib/zstd/lib/README.md +47 -16
  12. data/contrib/zstd/lib/common/bitstream.h +38 -39
  13. data/contrib/zstd/lib/common/compiler.h +40 -5
  14. data/contrib/zstd/lib/common/cpu.h +1 -1
  15. data/contrib/zstd/lib/common/debug.c +11 -31
  16. data/contrib/zstd/lib/common/debug.h +11 -31
  17. data/contrib/zstd/lib/common/entropy_common.c +13 -33
  18. data/contrib/zstd/lib/common/error_private.c +2 -1
  19. data/contrib/zstd/lib/common/error_private.h +6 -2
  20. data/contrib/zstd/lib/common/fse.h +12 -32
  21. data/contrib/zstd/lib/common/fse_decompress.c +12 -35
  22. data/contrib/zstd/lib/common/huf.h +15 -33
  23. data/contrib/zstd/lib/common/mem.h +75 -2
  24. data/contrib/zstd/lib/common/pool.c +8 -4
  25. data/contrib/zstd/lib/common/pool.h +2 -2
  26. data/contrib/zstd/lib/common/threading.c +50 -4
  27. data/contrib/zstd/lib/common/threading.h +36 -4
  28. data/contrib/zstd/lib/common/xxhash.c +23 -35
  29. data/contrib/zstd/lib/common/xxhash.h +11 -31
  30. data/contrib/zstd/lib/common/zstd_common.c +1 -1
  31. data/contrib/zstd/lib/common/zstd_errors.h +2 -1
  32. data/contrib/zstd/lib/common/zstd_internal.h +154 -26
  33. data/contrib/zstd/lib/compress/fse_compress.c +17 -40
  34. data/contrib/zstd/lib/compress/hist.c +15 -35
  35. data/contrib/zstd/lib/compress/hist.h +12 -32
  36. data/contrib/zstd/lib/compress/huf_compress.c +92 -92
  37. data/contrib/zstd/lib/compress/zstd_compress.c +1191 -1330
  38. data/contrib/zstd/lib/compress/zstd_compress_internal.h +317 -55
  39. data/contrib/zstd/lib/compress/zstd_compress_literals.c +158 -0
  40. data/contrib/zstd/lib/compress/zstd_compress_literals.h +29 -0
  41. data/contrib/zstd/lib/compress/zstd_compress_sequences.c +419 -0
  42. data/contrib/zstd/lib/compress/zstd_compress_sequences.h +54 -0
  43. data/contrib/zstd/lib/compress/zstd_compress_superblock.c +845 -0
  44. data/contrib/zstd/lib/compress/zstd_compress_superblock.h +32 -0
  45. data/contrib/zstd/lib/compress/zstd_cwksp.h +525 -0
  46. data/contrib/zstd/lib/compress/zstd_double_fast.c +65 -43
  47. data/contrib/zstd/lib/compress/zstd_double_fast.h +2 -2
  48. data/contrib/zstd/lib/compress/zstd_fast.c +92 -66
  49. data/contrib/zstd/lib/compress/zstd_fast.h +2 -2
  50. data/contrib/zstd/lib/compress/zstd_lazy.c +74 -42
  51. data/contrib/zstd/lib/compress/zstd_lazy.h +1 -1
  52. data/contrib/zstd/lib/compress/zstd_ldm.c +32 -10
  53. data/contrib/zstd/lib/compress/zstd_ldm.h +7 -2
  54. data/contrib/zstd/lib/compress/zstd_opt.c +81 -114
  55. data/contrib/zstd/lib/compress/zstd_opt.h +1 -1
  56. data/contrib/zstd/lib/compress/zstdmt_compress.c +95 -51
  57. data/contrib/zstd/lib/compress/zstdmt_compress.h +3 -2
  58. data/contrib/zstd/lib/decompress/huf_decompress.c +76 -60
  59. data/contrib/zstd/lib/decompress/zstd_ddict.c +12 -8
  60. data/contrib/zstd/lib/decompress/zstd_ddict.h +2 -2
  61. data/contrib/zstd/lib/decompress/zstd_decompress.c +292 -172
  62. data/contrib/zstd/lib/decompress/zstd_decompress_block.c +459 -338
  63. data/contrib/zstd/lib/decompress/zstd_decompress_block.h +3 -3
  64. data/contrib/zstd/lib/decompress/zstd_decompress_internal.h +18 -4
  65. data/contrib/zstd/lib/deprecated/zbuff.h +9 -8
  66. data/contrib/zstd/lib/deprecated/zbuff_common.c +2 -2
  67. data/contrib/zstd/lib/deprecated/zbuff_compress.c +1 -1
  68. data/contrib/zstd/lib/deprecated/zbuff_decompress.c +1 -1
  69. data/contrib/zstd/lib/dictBuilder/cover.c +164 -54
  70. data/contrib/zstd/lib/dictBuilder/cover.h +52 -7
  71. data/contrib/zstd/lib/dictBuilder/fastcover.c +60 -43
  72. data/contrib/zstd/lib/dictBuilder/zdict.c +43 -19
  73. data/contrib/zstd/lib/dictBuilder/zdict.h +56 -28
  74. data/contrib/zstd/lib/legacy/zstd_legacy.h +8 -4
  75. data/contrib/zstd/lib/legacy/zstd_v01.c +110 -110
  76. data/contrib/zstd/lib/legacy/zstd_v01.h +1 -1
  77. data/contrib/zstd/lib/legacy/zstd_v02.c +23 -13
  78. data/contrib/zstd/lib/legacy/zstd_v02.h +1 -1
  79. data/contrib/zstd/lib/legacy/zstd_v03.c +23 -13
  80. data/contrib/zstd/lib/legacy/zstd_v03.h +1 -1
  81. data/contrib/zstd/lib/legacy/zstd_v04.c +30 -17
  82. data/contrib/zstd/lib/legacy/zstd_v04.h +1 -1
  83. data/contrib/zstd/lib/legacy/zstd_v05.c +113 -102
  84. data/contrib/zstd/lib/legacy/zstd_v05.h +2 -2
  85. data/contrib/zstd/lib/legacy/zstd_v06.c +20 -18
  86. data/contrib/zstd/lib/legacy/zstd_v06.h +1 -1
  87. data/contrib/zstd/lib/legacy/zstd_v07.c +25 -19
  88. data/contrib/zstd/lib/legacy/zstd_v07.h +1 -1
  89. data/contrib/zstd/lib/libzstd.pc.in +3 -2
  90. data/contrib/zstd/lib/zstd.h +265 -88
  91. data/ext/extzstd.h +1 -1
  92. data/ext/libzstd_conf.h +8 -0
  93. data/ext/zstd_common.c +1 -3
  94. data/ext/zstd_compress.c +3 -3
  95. data/ext/zstd_decompress.c +1 -5
  96. data/ext/zstd_dictbuilder.c +2 -3
  97. data/ext/zstd_dictbuilder_fastcover.c +1 -3
  98. data/ext/zstd_legacy_v01.c +2 -0
  99. data/ext/zstd_legacy_v02.c +2 -0
  100. data/ext/zstd_legacy_v03.c +2 -0
  101. data/ext/zstd_legacy_v04.c +2 -0
  102. data/ext/zstd_legacy_v05.c +2 -0
  103. data/ext/zstd_legacy_v06.c +2 -0
  104. data/ext/zstd_legacy_v07.c +2 -0
  105. data/lib/extzstd.rb +18 -10
  106. data/lib/extzstd/version.rb +1 -1
  107. metadata +15 -6
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -16,8 +16,8 @@
16
16
  * Dependencies
17
17
  *********************************************************/
18
18
  #include <stddef.h> /* size_t */
19
- #include "zstd.h" /* DCtx, and some public functions */
20
- #include "zstd_internal.h" /* blockProperties_t, and some public functions */
19
+ #include "../zstd.h" /* DCtx, and some public functions */
20
+ #include "../common/zstd_internal.h" /* blockProperties_t, and some public functions */
21
21
  #include "zstd_decompress_internal.h" /* ZSTD_seqSymbol */
22
22
 
23
23
 
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -19,8 +19,8 @@
19
19
  /*-*******************************************************
20
20
  * Dependencies
21
21
  *********************************************************/
22
- #include "mem.h" /* BYTE, U16, U32 */
23
- #include "zstd_internal.h" /* ZSTD_seqSymbol */
22
+ #include "../common/mem.h" /* BYTE, U16, U32 */
23
+ #include "../common/zstd_internal.h" /* ZSTD_seqSymbol */
24
24
 
25
25
 
26
26
 
@@ -95,6 +95,11 @@ typedef enum {
95
95
  ZSTD_use_once = 1 /* Use the dictionary once and set to ZSTD_dont_use */
96
96
  } ZSTD_dictUses_e;
97
97
 
98
+ typedef enum {
99
+ ZSTD_obm_buffered = 0, /* Buffer the output */
100
+ ZSTD_obm_stable = 1 /* ZSTD_outBuffer is stable */
101
+ } ZSTD_outBufferMode_e;
102
+
98
103
  struct ZSTD_DCtx_s
99
104
  {
100
105
  const ZSTD_seqSymbol* LLTptr;
@@ -147,10 +152,19 @@ struct ZSTD_DCtx_s
147
152
  U32 legacyVersion;
148
153
  U32 hostageByte;
149
154
  int noForwardProgress;
155
+ ZSTD_outBufferMode_e outBufferMode;
156
+ ZSTD_outBuffer expectedOutBuffer;
150
157
 
151
158
  /* workspace */
152
159
  BYTE litBuffer[ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH];
153
160
  BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX];
161
+
162
+ size_t oversizedDuration;
163
+
164
+ #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
165
+ void const* dictContentBeginForFuzzing;
166
+ void const* dictContentEndForFuzzing;
167
+ #endif
154
168
  }; /* typedef'd to ZSTD_DCtx within "zstd.h" */
155
169
 
156
170
 
@@ -160,7 +174,7 @@ struct ZSTD_DCtx_s
160
174
 
161
175
  /*! ZSTD_loadDEntropy() :
162
176
  * dict : must point at beginning of a valid zstd dictionary.
163
- * @return : size of entropy tables read */
177
+ * @return : size of dictionary header (size of magic number + dict ID + entropy tables) */
164
178
  size_t ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy,
165
179
  const void* const dict, size_t const dictSize);
166
180
 
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -28,7 +28,7 @@ extern "C" {
28
28
  * Dependencies
29
29
  ***************************************/
30
30
  #include <stddef.h> /* size_t */
31
- #include "zstd.h" /* ZSTD_CStream, ZSTD_DStream, ZSTDLIB_API */
31
+ #include "../zstd.h" /* ZSTD_CStream, ZSTD_DStream, ZSTDLIB_API */
32
32
 
33
33
 
34
34
  /* ***************************************************************
@@ -36,16 +36,17 @@ extern "C" {
36
36
  *****************************************************************/
37
37
  /* Deprecation warnings */
38
38
  /* Should these warnings be a problem,
39
- it is generally possible to disable them,
40
- typically with -Wno-deprecated-declarations for gcc
41
- or _CRT_SECURE_NO_WARNINGS in Visual.
42
- Otherwise, it's also possible to define ZBUFF_DISABLE_DEPRECATE_WARNINGS */
39
+ * it is generally possible to disable them,
40
+ * typically with -Wno-deprecated-declarations for gcc
41
+ * or _CRT_SECURE_NO_WARNINGS in Visual.
42
+ * Otherwise, it's also possible to define ZBUFF_DISABLE_DEPRECATE_WARNINGS
43
+ */
43
44
  #ifdef ZBUFF_DISABLE_DEPRECATE_WARNINGS
44
45
  # define ZBUFF_DEPRECATED(message) ZSTDLIB_API /* disable deprecation warnings */
45
46
  #else
46
47
  # if defined (__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */
47
48
  # define ZBUFF_DEPRECATED(message) [[deprecated(message)]] ZSTDLIB_API
48
- # elif (defined(__GNUC__) && (__GNUC__ >= 5)) || defined(__clang__)
49
+ # elif (defined(GNUC) && (GNUC > 4 || (GNUC == 4 && GNUC_MINOR >= 5))) || defined(__clang__)
49
50
  # define ZBUFF_DEPRECATED(message) ZSTDLIB_API __attribute__((deprecated(message)))
50
51
  # elif defined(__GNUC__) && (__GNUC__ >= 3)
51
52
  # define ZBUFF_DEPRECATED(message) ZSTDLIB_API __attribute__((deprecated))
@@ -185,7 +186,7 @@ ZBUFF_DEPRECATED("use ZSTD_DStreamOutSize") size_t ZBUFF_recommendedDOutSize(voi
185
186
 
186
187
  /*--- Dependency ---*/
187
188
  #define ZSTD_STATIC_LINKING_ONLY /* ZSTD_parameters, ZSTD_customMem */
188
- #include "zstd.h"
189
+ #include "../zstd.h"
189
190
 
190
191
 
191
192
  /*--- Custom memory allocator ---*/
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -11,7 +11,7 @@
11
11
  /*-*************************************
12
12
  * Dependencies
13
13
  ***************************************/
14
- #include "error_private.h"
14
+ #include "../common/error_private.h"
15
15
  #include "zbuff.h"
16
16
 
17
17
  /*-****************************************
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -26,11 +26,11 @@
26
26
  #include <string.h> /* memset */
27
27
  #include <time.h> /* clock */
28
28
 
29
- #include "mem.h" /* read */
30
- #include "pool.h"
31
- #include "threading.h"
29
+ #include "../common/mem.h" /* read */
30
+ #include "../common/pool.h"
31
+ #include "../common/threading.h"
32
32
  #include "cover.h"
33
- #include "zstd_internal.h" /* includes zstd.h */
33
+ #include "../common/zstd_internal.h" /* includes zstd.h */
34
34
  #ifndef ZDICT_STATIC_LINKING_ONLY
35
35
  #define ZDICT_STATIC_LINKING_ONLY
36
36
  #endif
@@ -526,10 +526,10 @@ static void COVER_ctx_destroy(COVER_ctx_t *ctx) {
526
526
  * Prepare a context for dictionary building.
527
527
  * The context is only dependent on the parameter `d` and can used multiple
528
528
  * times.
529
- * Returns 1 on success or zero on error.
529
+ * Returns 0 on success or error code on error.
530
530
  * The context must be destroyed with `COVER_ctx_destroy()`.
531
531
  */
532
- static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
532
+ static size_t COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
533
533
  const size_t *samplesSizes, unsigned nbSamples,
534
534
  unsigned d, double splitPoint) {
535
535
  const BYTE *const samples = (const BYTE *)samplesBuffer;
@@ -544,17 +544,17 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
544
544
  totalSamplesSize >= (size_t)COVER_MAX_SAMPLES_SIZE) {
545
545
  DISPLAYLEVEL(1, "Total samples size is too large (%u MB), maximum size is %u MB\n",
546
546
  (unsigned)(totalSamplesSize>>20), (COVER_MAX_SAMPLES_SIZE >> 20));
547
- return 0;
547
+ return ERROR(srcSize_wrong);
548
548
  }
549
549
  /* Check if there are at least 5 training samples */
550
550
  if (nbTrainSamples < 5) {
551
551
  DISPLAYLEVEL(1, "Total number of training samples is %u and is invalid.", nbTrainSamples);
552
- return 0;
552
+ return ERROR(srcSize_wrong);
553
553
  }
554
554
  /* Check if there's testing sample */
555
555
  if (nbTestSamples < 1) {
556
556
  DISPLAYLEVEL(1, "Total number of testing samples is %u and is invalid.", nbTestSamples);
557
- return 0;
557
+ return ERROR(srcSize_wrong);
558
558
  }
559
559
  /* Zero the context */
560
560
  memset(ctx, 0, sizeof(*ctx));
@@ -577,7 +577,7 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
577
577
  if (!ctx->suffix || !ctx->dmerAt || !ctx->offsets) {
578
578
  DISPLAYLEVEL(1, "Failed to allocate scratch buffers\n");
579
579
  COVER_ctx_destroy(ctx);
580
- return 0;
580
+ return ERROR(memory_allocation);
581
581
  }
582
582
  ctx->freqs = NULL;
583
583
  ctx->d = d;
@@ -624,7 +624,7 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
624
624
  (ctx->d <= 8 ? &COVER_cmp8 : &COVER_cmp), &COVER_group);
625
625
  ctx->freqs = ctx->suffix;
626
626
  ctx->suffix = NULL;
627
- return 1;
627
+ return 0;
628
628
  }
629
629
 
630
630
  void COVER_warnOnSmallCorpus(size_t maxDictSize, size_t nbDmers, int displayLevel)
@@ -638,8 +638,8 @@ void COVER_warnOnSmallCorpus(size_t maxDictSize, size_t nbDmers, int displayLeve
638
638
  "compared to the source size %u! "
639
639
  "size(source)/size(dictionary) = %f, but it should be >= "
640
640
  "10! This may lead to a subpar dictionary! We recommend "
641
- "training on sources at least 10x, and up to 100x the "
642
- "size of the dictionary!\n", (U32)maxDictSize,
641
+ "training on sources at least 10x, and preferably 100x "
642
+ "the size of the dictionary! \n", (U32)maxDictSize,
643
643
  (U32)nbDmers, ratio);
644
644
  }
645
645
 
@@ -729,11 +729,11 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
729
729
  /* Checks */
730
730
  if (!COVER_checkParameters(parameters, dictBufferCapacity)) {
731
731
  DISPLAYLEVEL(1, "Cover parameters incorrect\n");
732
- return ERROR(GENERIC);
732
+ return ERROR(parameter_outOfBound);
733
733
  }
734
734
  if (nbSamples == 0) {
735
735
  DISPLAYLEVEL(1, "Cover must have at least one input file\n");
736
- return ERROR(GENERIC);
736
+ return ERROR(srcSize_wrong);
737
737
  }
738
738
  if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) {
739
739
  DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n",
@@ -741,15 +741,18 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
741
741
  return ERROR(dstSize_tooSmall);
742
742
  }
743
743
  /* Initialize context and activeDmers */
744
- if (!COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples,
745
- parameters.d, parameters.splitPoint)) {
746
- return ERROR(GENERIC);
744
+ {
745
+ size_t const initVal = COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples,
746
+ parameters.d, parameters.splitPoint);
747
+ if (ZSTD_isError(initVal)) {
748
+ return initVal;
749
+ }
747
750
  }
748
751
  COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.suffixSize, g_displayLevel);
749
752
  if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) {
750
753
  DISPLAYLEVEL(1, "Failed to allocate dmer map: out of memory\n");
751
754
  COVER_ctx_destroy(&ctx);
752
- return ERROR(GENERIC);
755
+ return ERROR(memory_allocation);
753
756
  }
754
757
 
755
758
  DISPLAYLEVEL(2, "Building dictionary\n");
@@ -810,7 +813,7 @@ size_t COVER_checkTotalCompressedSize(const ZDICT_cover_params_t parameters,
810
813
  cctx, dst, dstCapacity, samples + offsets[i],
811
814
  samplesSizes[i], cdict);
812
815
  if (ZSTD_isError(size)) {
813
- totalCompressedSize = ERROR(GENERIC);
816
+ totalCompressedSize = size;
814
817
  goto _compressCleanup;
815
818
  }
816
819
  totalCompressedSize += size;
@@ -886,9 +889,11 @@ void COVER_best_start(COVER_best_t *best) {
886
889
  * Decrements liveJobs and signals any waiting threads if liveJobs == 0.
887
890
  * If this dictionary is the best so far save it and its parameters.
888
891
  */
889
- void COVER_best_finish(COVER_best_t *best, size_t compressedSize,
890
- ZDICT_cover_params_t parameters, void *dict,
891
- size_t dictSize) {
892
+ void COVER_best_finish(COVER_best_t *best, ZDICT_cover_params_t parameters,
893
+ COVER_dictSelection_t selection) {
894
+ void* dict = selection.dictContent;
895
+ size_t compressedSize = selection.totalCompressedSize;
896
+ size_t dictSize = selection.dictSize;
892
897
  if (!best) {
893
898
  return;
894
899
  }
@@ -914,10 +919,12 @@ void COVER_best_finish(COVER_best_t *best, size_t compressedSize,
914
919
  }
915
920
  }
916
921
  /* Save the dictionary, parameters, and size */
917
- memcpy(best->dict, dict, dictSize);
918
- best->dictSize = dictSize;
919
- best->parameters = parameters;
920
- best->compressedSize = compressedSize;
922
+ if (dict) {
923
+ memcpy(best->dict, dict, dictSize);
924
+ best->dictSize = dictSize;
925
+ best->parameters = parameters;
926
+ best->compressedSize = compressedSize;
927
+ }
921
928
  }
922
929
  if (liveJobs == 0) {
923
930
  ZSTD_pthread_cond_broadcast(&best->cond);
@@ -926,6 +933,111 @@ void COVER_best_finish(COVER_best_t *best, size_t compressedSize,
926
933
  }
927
934
  }
928
935
 
936
+ COVER_dictSelection_t COVER_dictSelectionError(size_t error) {
937
+ COVER_dictSelection_t selection = { NULL, 0, error };
938
+ return selection;
939
+ }
940
+
941
+ unsigned COVER_dictSelectionIsError(COVER_dictSelection_t selection) {
942
+ return (ZSTD_isError(selection.totalCompressedSize) || !selection.dictContent);
943
+ }
944
+
945
+ void COVER_dictSelectionFree(COVER_dictSelection_t selection){
946
+ free(selection.dictContent);
947
+ }
948
+
949
+ COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent,
950
+ size_t dictContentSize, const BYTE* samplesBuffer, const size_t* samplesSizes, unsigned nbFinalizeSamples,
951
+ size_t nbCheckSamples, size_t nbSamples, ZDICT_cover_params_t params, size_t* offsets, size_t totalCompressedSize) {
952
+
953
+ size_t largestDict = 0;
954
+ size_t largestCompressed = 0;
955
+ BYTE* customDictContentEnd = customDictContent + dictContentSize;
956
+
957
+ BYTE * largestDictbuffer = (BYTE *)malloc(dictContentSize);
958
+ BYTE * candidateDictBuffer = (BYTE *)malloc(dictContentSize);
959
+ double regressionTolerance = ((double)params.shrinkDictMaxRegression / 100.0) + 1.00;
960
+
961
+ if (!largestDictbuffer || !candidateDictBuffer) {
962
+ free(largestDictbuffer);
963
+ free(candidateDictBuffer);
964
+ return COVER_dictSelectionError(dictContentSize);
965
+ }
966
+
967
+ /* Initial dictionary size and compressed size */
968
+ memcpy(largestDictbuffer, customDictContent, dictContentSize);
969
+ dictContentSize = ZDICT_finalizeDictionary(
970
+ largestDictbuffer, dictContentSize, customDictContent, dictContentSize,
971
+ samplesBuffer, samplesSizes, nbFinalizeSamples, params.zParams);
972
+
973
+ if (ZDICT_isError(dictContentSize)) {
974
+ free(largestDictbuffer);
975
+ free(candidateDictBuffer);
976
+ return COVER_dictSelectionError(dictContentSize);
977
+ }
978
+
979
+ totalCompressedSize = COVER_checkTotalCompressedSize(params, samplesSizes,
980
+ samplesBuffer, offsets,
981
+ nbCheckSamples, nbSamples,
982
+ largestDictbuffer, dictContentSize);
983
+
984
+ if (ZSTD_isError(totalCompressedSize)) {
985
+ free(largestDictbuffer);
986
+ free(candidateDictBuffer);
987
+ return COVER_dictSelectionError(totalCompressedSize);
988
+ }
989
+
990
+ if (params.shrinkDict == 0) {
991
+ COVER_dictSelection_t selection = { largestDictbuffer, dictContentSize, totalCompressedSize };
992
+ free(candidateDictBuffer);
993
+ return selection;
994
+ }
995
+
996
+ largestDict = dictContentSize;
997
+ largestCompressed = totalCompressedSize;
998
+ dictContentSize = ZDICT_DICTSIZE_MIN;
999
+
1000
+ /* Largest dict is initially at least ZDICT_DICTSIZE_MIN */
1001
+ while (dictContentSize < largestDict) {
1002
+ memcpy(candidateDictBuffer, largestDictbuffer, largestDict);
1003
+ dictContentSize = ZDICT_finalizeDictionary(
1004
+ candidateDictBuffer, dictContentSize, customDictContentEnd - dictContentSize, dictContentSize,
1005
+ samplesBuffer, samplesSizes, nbFinalizeSamples, params.zParams);
1006
+
1007
+ if (ZDICT_isError(dictContentSize)) {
1008
+ free(largestDictbuffer);
1009
+ free(candidateDictBuffer);
1010
+ return COVER_dictSelectionError(dictContentSize);
1011
+
1012
+ }
1013
+
1014
+ totalCompressedSize = COVER_checkTotalCompressedSize(params, samplesSizes,
1015
+ samplesBuffer, offsets,
1016
+ nbCheckSamples, nbSamples,
1017
+ candidateDictBuffer, dictContentSize);
1018
+
1019
+ if (ZSTD_isError(totalCompressedSize)) {
1020
+ free(largestDictbuffer);
1021
+ free(candidateDictBuffer);
1022
+ return COVER_dictSelectionError(totalCompressedSize);
1023
+ }
1024
+
1025
+ if (totalCompressedSize <= largestCompressed * regressionTolerance) {
1026
+ COVER_dictSelection_t selection = { candidateDictBuffer, dictContentSize, totalCompressedSize };
1027
+ free(largestDictbuffer);
1028
+ return selection;
1029
+ }
1030
+ dictContentSize *= 2;
1031
+ }
1032
+ dictContentSize = largestDict;
1033
+ totalCompressedSize = largestCompressed;
1034
+ {
1035
+ COVER_dictSelection_t selection = { largestDictbuffer, dictContentSize, totalCompressedSize };
1036
+ free(candidateDictBuffer);
1037
+ return selection;
1038
+ }
1039
+ }
1040
+
929
1041
  /**
930
1042
  * Parameters for COVER_tryParameters().
931
1043
  */
@@ -951,6 +1063,7 @@ static void COVER_tryParameters(void *opaque) {
951
1063
  /* Allocate space for hash table, dict, and freqs */
952
1064
  COVER_map_t activeDmers;
953
1065
  BYTE *const dict = (BYTE * const)malloc(dictBufferCapacity);
1066
+ COVER_dictSelection_t selection = COVER_dictSelectionError(ERROR(GENERIC));
954
1067
  U32 *freqs = (U32 *)malloc(ctx->suffixSize * sizeof(U32));
955
1068
  if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) {
956
1069
  DISPLAYLEVEL(1, "Failed to allocate dmer map: out of memory\n");
@@ -966,29 +1079,21 @@ static void COVER_tryParameters(void *opaque) {
966
1079
  {
967
1080
  const size_t tail = COVER_buildDictionary(ctx, freqs, &activeDmers, dict,
968
1081
  dictBufferCapacity, parameters);
969
- dictBufferCapacity = ZDICT_finalizeDictionary(
970
- dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail,
971
- ctx->samples, ctx->samplesSizes, (unsigned)ctx->nbTrainSamples,
972
- parameters.zParams);
973
- if (ZDICT_isError(dictBufferCapacity)) {
974
- DISPLAYLEVEL(1, "Failed to finalize dictionary\n");
1082
+ selection = COVER_selectDict(dict + tail, dictBufferCapacity - tail,
1083
+ ctx->samples, ctx->samplesSizes, (unsigned)ctx->nbTrainSamples, ctx->nbTrainSamples, ctx->nbSamples, parameters, ctx->offsets,
1084
+ totalCompressedSize);
1085
+
1086
+ if (COVER_dictSelectionIsError(selection)) {
1087
+ DISPLAYLEVEL(1, "Failed to select dictionary\n");
975
1088
  goto _cleanup;
976
1089
  }
977
1090
  }
978
- /* Check total compressed size */
979
- totalCompressedSize = COVER_checkTotalCompressedSize(parameters, ctx->samplesSizes,
980
- ctx->samples, ctx->offsets,
981
- ctx->nbTrainSamples, ctx->nbSamples,
982
- dict, dictBufferCapacity);
983
-
984
1091
  _cleanup:
985
- COVER_best_finish(data->best, totalCompressedSize, parameters, dict,
986
- dictBufferCapacity);
1092
+ free(dict);
1093
+ COVER_best_finish(data->best, parameters, selection);
987
1094
  free(data);
988
1095
  COVER_map_destroy(&activeDmers);
989
- if (dict) {
990
- free(dict);
991
- }
1096
+ COVER_dictSelectionFree(selection);
992
1097
  if (freqs) {
993
1098
  free(freqs);
994
1099
  }
@@ -1010,6 +1115,7 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
1010
1115
  const unsigned kStepSize = MAX((kMaxK - kMinK) / kSteps, 1);
1011
1116
  const unsigned kIterations =
1012
1117
  (1 + (kMaxD - kMinD) / 2) * (1 + (kMaxK - kMinK) / kStepSize);
1118
+ const unsigned shrinkDict = 0;
1013
1119
  /* Local variables */
1014
1120
  const int displayLevel = parameters->zParams.notificationLevel;
1015
1121
  unsigned iteration = 1;
@@ -1022,15 +1128,15 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
1022
1128
  /* Checks */
1023
1129
  if (splitPoint <= 0 || splitPoint > 1) {
1024
1130
  LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect parameters\n");
1025
- return ERROR(GENERIC);
1131
+ return ERROR(parameter_outOfBound);
1026
1132
  }
1027
1133
  if (kMinK < kMaxD || kMaxK < kMinK) {
1028
1134
  LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect parameters\n");
1029
- return ERROR(GENERIC);
1135
+ return ERROR(parameter_outOfBound);
1030
1136
  }
1031
1137
  if (nbSamples == 0) {
1032
1138
  DISPLAYLEVEL(1, "Cover must have at least one input file\n");
1033
- return ERROR(GENERIC);
1139
+ return ERROR(srcSize_wrong);
1034
1140
  }
1035
1141
  if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) {
1036
1142
  DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n",
@@ -1054,11 +1160,14 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
1054
1160
  /* Initialize the context for this value of d */
1055
1161
  COVER_ctx_t ctx;
1056
1162
  LOCALDISPLAYLEVEL(displayLevel, 3, "d=%u\n", d);
1057
- if (!COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, d, splitPoint)) {
1058
- LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to initialize context\n");
1059
- COVER_best_destroy(&best);
1060
- POOL_free(pool);
1061
- return ERROR(GENERIC);
1163
+ {
1164
+ const size_t initVal = COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, d, splitPoint);
1165
+ if (ZSTD_isError(initVal)) {
1166
+ LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to initialize context\n");
1167
+ COVER_best_destroy(&best);
1168
+ POOL_free(pool);
1169
+ return initVal;
1170
+ }
1062
1171
  }
1063
1172
  if (!warned) {
1064
1173
  COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.suffixSize, displayLevel);
@@ -1075,7 +1184,7 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
1075
1184
  COVER_best_destroy(&best);
1076
1185
  COVER_ctx_destroy(&ctx);
1077
1186
  POOL_free(pool);
1078
- return ERROR(GENERIC);
1187
+ return ERROR(memory_allocation);
1079
1188
  }
1080
1189
  data->ctx = &ctx;
1081
1190
  data->best = &best;
@@ -1085,6 +1194,7 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
1085
1194
  data->parameters.d = d;
1086
1195
  data->parameters.splitPoint = splitPoint;
1087
1196
  data->parameters.steps = kSteps;
1197
+ data->parameters.shrinkDict = shrinkDict;
1088
1198
  data->parameters.zParams.notificationLevel = g_displayLevel;
1089
1199
  /* Check the parameters */
1090
1200
  if (!COVER_checkParameters(data->parameters, dictBufferCapacity)) {