extzstd 0.3 → 0.3.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (107) hide show
  1. checksums.yaml +4 -4
  2. data/HISTORY.ja.md +8 -0
  3. data/README.md +1 -1
  4. data/contrib/zstd/CHANGELOG +94 -0
  5. data/contrib/zstd/CONTRIBUTING.md +351 -1
  6. data/contrib/zstd/Makefile +32 -10
  7. data/contrib/zstd/README.md +33 -10
  8. data/contrib/zstd/TESTING.md +2 -2
  9. data/contrib/zstd/appveyor.yml +42 -4
  10. data/contrib/zstd/lib/Makefile +128 -60
  11. data/contrib/zstd/lib/README.md +47 -16
  12. data/contrib/zstd/lib/common/bitstream.h +38 -39
  13. data/contrib/zstd/lib/common/compiler.h +40 -5
  14. data/contrib/zstd/lib/common/cpu.h +1 -1
  15. data/contrib/zstd/lib/common/debug.c +11 -31
  16. data/contrib/zstd/lib/common/debug.h +11 -31
  17. data/contrib/zstd/lib/common/entropy_common.c +13 -33
  18. data/contrib/zstd/lib/common/error_private.c +2 -1
  19. data/contrib/zstd/lib/common/error_private.h +6 -2
  20. data/contrib/zstd/lib/common/fse.h +12 -32
  21. data/contrib/zstd/lib/common/fse_decompress.c +12 -35
  22. data/contrib/zstd/lib/common/huf.h +15 -33
  23. data/contrib/zstd/lib/common/mem.h +75 -2
  24. data/contrib/zstd/lib/common/pool.c +8 -4
  25. data/contrib/zstd/lib/common/pool.h +2 -2
  26. data/contrib/zstd/lib/common/threading.c +50 -4
  27. data/contrib/zstd/lib/common/threading.h +36 -4
  28. data/contrib/zstd/lib/common/xxhash.c +23 -35
  29. data/contrib/zstd/lib/common/xxhash.h +11 -31
  30. data/contrib/zstd/lib/common/zstd_common.c +1 -1
  31. data/contrib/zstd/lib/common/zstd_errors.h +2 -1
  32. data/contrib/zstd/lib/common/zstd_internal.h +154 -26
  33. data/contrib/zstd/lib/compress/fse_compress.c +17 -40
  34. data/contrib/zstd/lib/compress/hist.c +15 -35
  35. data/contrib/zstd/lib/compress/hist.h +12 -32
  36. data/contrib/zstd/lib/compress/huf_compress.c +92 -92
  37. data/contrib/zstd/lib/compress/zstd_compress.c +1191 -1330
  38. data/contrib/zstd/lib/compress/zstd_compress_internal.h +317 -55
  39. data/contrib/zstd/lib/compress/zstd_compress_literals.c +158 -0
  40. data/contrib/zstd/lib/compress/zstd_compress_literals.h +29 -0
  41. data/contrib/zstd/lib/compress/zstd_compress_sequences.c +419 -0
  42. data/contrib/zstd/lib/compress/zstd_compress_sequences.h +54 -0
  43. data/contrib/zstd/lib/compress/zstd_compress_superblock.c +845 -0
  44. data/contrib/zstd/lib/compress/zstd_compress_superblock.h +32 -0
  45. data/contrib/zstd/lib/compress/zstd_cwksp.h +525 -0
  46. data/contrib/zstd/lib/compress/zstd_double_fast.c +65 -43
  47. data/contrib/zstd/lib/compress/zstd_double_fast.h +2 -2
  48. data/contrib/zstd/lib/compress/zstd_fast.c +92 -66
  49. data/contrib/zstd/lib/compress/zstd_fast.h +2 -2
  50. data/contrib/zstd/lib/compress/zstd_lazy.c +74 -42
  51. data/contrib/zstd/lib/compress/zstd_lazy.h +1 -1
  52. data/contrib/zstd/lib/compress/zstd_ldm.c +32 -10
  53. data/contrib/zstd/lib/compress/zstd_ldm.h +7 -2
  54. data/contrib/zstd/lib/compress/zstd_opt.c +81 -114
  55. data/contrib/zstd/lib/compress/zstd_opt.h +1 -1
  56. data/contrib/zstd/lib/compress/zstdmt_compress.c +95 -51
  57. data/contrib/zstd/lib/compress/zstdmt_compress.h +3 -2
  58. data/contrib/zstd/lib/decompress/huf_decompress.c +76 -60
  59. data/contrib/zstd/lib/decompress/zstd_ddict.c +12 -8
  60. data/contrib/zstd/lib/decompress/zstd_ddict.h +2 -2
  61. data/contrib/zstd/lib/decompress/zstd_decompress.c +292 -172
  62. data/contrib/zstd/lib/decompress/zstd_decompress_block.c +459 -338
  63. data/contrib/zstd/lib/decompress/zstd_decompress_block.h +3 -3
  64. data/contrib/zstd/lib/decompress/zstd_decompress_internal.h +18 -4
  65. data/contrib/zstd/lib/deprecated/zbuff.h +9 -8
  66. data/contrib/zstd/lib/deprecated/zbuff_common.c +2 -2
  67. data/contrib/zstd/lib/deprecated/zbuff_compress.c +1 -1
  68. data/contrib/zstd/lib/deprecated/zbuff_decompress.c +1 -1
  69. data/contrib/zstd/lib/dictBuilder/cover.c +164 -54
  70. data/contrib/zstd/lib/dictBuilder/cover.h +52 -7
  71. data/contrib/zstd/lib/dictBuilder/fastcover.c +60 -43
  72. data/contrib/zstd/lib/dictBuilder/zdict.c +43 -19
  73. data/contrib/zstd/lib/dictBuilder/zdict.h +56 -28
  74. data/contrib/zstd/lib/legacy/zstd_legacy.h +8 -4
  75. data/contrib/zstd/lib/legacy/zstd_v01.c +110 -110
  76. data/contrib/zstd/lib/legacy/zstd_v01.h +1 -1
  77. data/contrib/zstd/lib/legacy/zstd_v02.c +23 -13
  78. data/contrib/zstd/lib/legacy/zstd_v02.h +1 -1
  79. data/contrib/zstd/lib/legacy/zstd_v03.c +23 -13
  80. data/contrib/zstd/lib/legacy/zstd_v03.h +1 -1
  81. data/contrib/zstd/lib/legacy/zstd_v04.c +30 -17
  82. data/contrib/zstd/lib/legacy/zstd_v04.h +1 -1
  83. data/contrib/zstd/lib/legacy/zstd_v05.c +113 -102
  84. data/contrib/zstd/lib/legacy/zstd_v05.h +2 -2
  85. data/contrib/zstd/lib/legacy/zstd_v06.c +20 -18
  86. data/contrib/zstd/lib/legacy/zstd_v06.h +1 -1
  87. data/contrib/zstd/lib/legacy/zstd_v07.c +25 -19
  88. data/contrib/zstd/lib/legacy/zstd_v07.h +1 -1
  89. data/contrib/zstd/lib/libzstd.pc.in +3 -2
  90. data/contrib/zstd/lib/zstd.h +265 -88
  91. data/ext/extzstd.h +1 -1
  92. data/ext/libzstd_conf.h +8 -0
  93. data/ext/zstd_common.c +1 -3
  94. data/ext/zstd_compress.c +3 -3
  95. data/ext/zstd_decompress.c +1 -5
  96. data/ext/zstd_dictbuilder.c +2 -3
  97. data/ext/zstd_dictbuilder_fastcover.c +1 -3
  98. data/ext/zstd_legacy_v01.c +2 -0
  99. data/ext/zstd_legacy_v02.c +2 -0
  100. data/ext/zstd_legacy_v03.c +2 -0
  101. data/ext/zstd_legacy_v04.c +2 -0
  102. data/ext/zstd_legacy_v05.c +2 -0
  103. data/ext/zstd_legacy_v06.c +2 -0
  104. data/ext/zstd_legacy_v07.c +2 -0
  105. data/lib/extzstd.rb +18 -10
  106. data/lib/extzstd/version.rb +1 -1
  107. metadata +15 -6
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -16,8 +16,8 @@
16
16
  * Dependencies
17
17
  *********************************************************/
18
18
  #include <stddef.h> /* size_t */
19
- #include "zstd.h" /* DCtx, and some public functions */
20
- #include "zstd_internal.h" /* blockProperties_t, and some public functions */
19
+ #include "../zstd.h" /* DCtx, and some public functions */
20
+ #include "../common/zstd_internal.h" /* blockProperties_t, and some public functions */
21
21
  #include "zstd_decompress_internal.h" /* ZSTD_seqSymbol */
22
22
 
23
23
 
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -19,8 +19,8 @@
19
19
  /*-*******************************************************
20
20
  * Dependencies
21
21
  *********************************************************/
22
- #include "mem.h" /* BYTE, U16, U32 */
23
- #include "zstd_internal.h" /* ZSTD_seqSymbol */
22
+ #include "../common/mem.h" /* BYTE, U16, U32 */
23
+ #include "../common/zstd_internal.h" /* ZSTD_seqSymbol */
24
24
 
25
25
 
26
26
 
@@ -95,6 +95,11 @@ typedef enum {
95
95
  ZSTD_use_once = 1 /* Use the dictionary once and set to ZSTD_dont_use */
96
96
  } ZSTD_dictUses_e;
97
97
 
98
+ typedef enum {
99
+ ZSTD_obm_buffered = 0, /* Buffer the output */
100
+ ZSTD_obm_stable = 1 /* ZSTD_outBuffer is stable */
101
+ } ZSTD_outBufferMode_e;
102
+
98
103
  struct ZSTD_DCtx_s
99
104
  {
100
105
  const ZSTD_seqSymbol* LLTptr;
@@ -147,10 +152,19 @@ struct ZSTD_DCtx_s
147
152
  U32 legacyVersion;
148
153
  U32 hostageByte;
149
154
  int noForwardProgress;
155
+ ZSTD_outBufferMode_e outBufferMode;
156
+ ZSTD_outBuffer expectedOutBuffer;
150
157
 
151
158
  /* workspace */
152
159
  BYTE litBuffer[ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH];
153
160
  BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX];
161
+
162
+ size_t oversizedDuration;
163
+
164
+ #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
165
+ void const* dictContentBeginForFuzzing;
166
+ void const* dictContentEndForFuzzing;
167
+ #endif
154
168
  }; /* typedef'd to ZSTD_DCtx within "zstd.h" */
155
169
 
156
170
 
@@ -160,7 +174,7 @@ struct ZSTD_DCtx_s
160
174
 
161
175
  /*! ZSTD_loadDEntropy() :
162
176
  * dict : must point at beginning of a valid zstd dictionary.
163
- * @return : size of entropy tables read */
177
+ * @return : size of dictionary header (size of magic number + dict ID + entropy tables) */
164
178
  size_t ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy,
165
179
  const void* const dict, size_t const dictSize);
166
180
 
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -28,7 +28,7 @@ extern "C" {
28
28
  * Dependencies
29
29
  ***************************************/
30
30
  #include <stddef.h> /* size_t */
31
- #include "zstd.h" /* ZSTD_CStream, ZSTD_DStream, ZSTDLIB_API */
31
+ #include "../zstd.h" /* ZSTD_CStream, ZSTD_DStream, ZSTDLIB_API */
32
32
 
33
33
 
34
34
  /* ***************************************************************
@@ -36,16 +36,17 @@ extern "C" {
36
36
  *****************************************************************/
37
37
  /* Deprecation warnings */
38
38
  /* Should these warnings be a problem,
39
- it is generally possible to disable them,
40
- typically with -Wno-deprecated-declarations for gcc
41
- or _CRT_SECURE_NO_WARNINGS in Visual.
42
- Otherwise, it's also possible to define ZBUFF_DISABLE_DEPRECATE_WARNINGS */
39
+ * it is generally possible to disable them,
40
+ * typically with -Wno-deprecated-declarations for gcc
41
+ * or _CRT_SECURE_NO_WARNINGS in Visual.
42
+ * Otherwise, it's also possible to define ZBUFF_DISABLE_DEPRECATE_WARNINGS
43
+ */
43
44
  #ifdef ZBUFF_DISABLE_DEPRECATE_WARNINGS
44
45
  # define ZBUFF_DEPRECATED(message) ZSTDLIB_API /* disable deprecation warnings */
45
46
  #else
46
47
  # if defined (__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */
47
48
  # define ZBUFF_DEPRECATED(message) [[deprecated(message)]] ZSTDLIB_API
48
- # elif (defined(__GNUC__) && (__GNUC__ >= 5)) || defined(__clang__)
49
+ # elif (defined(GNUC) && (GNUC > 4 || (GNUC == 4 && GNUC_MINOR >= 5))) || defined(__clang__)
49
50
  # define ZBUFF_DEPRECATED(message) ZSTDLIB_API __attribute__((deprecated(message)))
50
51
  # elif defined(__GNUC__) && (__GNUC__ >= 3)
51
52
  # define ZBUFF_DEPRECATED(message) ZSTDLIB_API __attribute__((deprecated))
@@ -185,7 +186,7 @@ ZBUFF_DEPRECATED("use ZSTD_DStreamOutSize") size_t ZBUFF_recommendedDOutSize(voi
185
186
 
186
187
  /*--- Dependency ---*/
187
188
  #define ZSTD_STATIC_LINKING_ONLY /* ZSTD_parameters, ZSTD_customMem */
188
- #include "zstd.h"
189
+ #include "../zstd.h"
189
190
 
190
191
 
191
192
  /*--- Custom memory allocator ---*/
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -11,7 +11,7 @@
11
11
  /*-*************************************
12
12
  * Dependencies
13
13
  ***************************************/
14
- #include "error_private.h"
14
+ #include "../common/error_private.h"
15
15
  #include "zbuff.h"
16
16
 
17
17
  /*-****************************************
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -26,11 +26,11 @@
26
26
  #include <string.h> /* memset */
27
27
  #include <time.h> /* clock */
28
28
 
29
- #include "mem.h" /* read */
30
- #include "pool.h"
31
- #include "threading.h"
29
+ #include "../common/mem.h" /* read */
30
+ #include "../common/pool.h"
31
+ #include "../common/threading.h"
32
32
  #include "cover.h"
33
- #include "zstd_internal.h" /* includes zstd.h */
33
+ #include "../common/zstd_internal.h" /* includes zstd.h */
34
34
  #ifndef ZDICT_STATIC_LINKING_ONLY
35
35
  #define ZDICT_STATIC_LINKING_ONLY
36
36
  #endif
@@ -526,10 +526,10 @@ static void COVER_ctx_destroy(COVER_ctx_t *ctx) {
526
526
  * Prepare a context for dictionary building.
527
527
  * The context is only dependent on the parameter `d` and can used multiple
528
528
  * times.
529
- * Returns 1 on success or zero on error.
529
+ * Returns 0 on success or error code on error.
530
530
  * The context must be destroyed with `COVER_ctx_destroy()`.
531
531
  */
532
- static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
532
+ static size_t COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
533
533
  const size_t *samplesSizes, unsigned nbSamples,
534
534
  unsigned d, double splitPoint) {
535
535
  const BYTE *const samples = (const BYTE *)samplesBuffer;
@@ -544,17 +544,17 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
544
544
  totalSamplesSize >= (size_t)COVER_MAX_SAMPLES_SIZE) {
545
545
  DISPLAYLEVEL(1, "Total samples size is too large (%u MB), maximum size is %u MB\n",
546
546
  (unsigned)(totalSamplesSize>>20), (COVER_MAX_SAMPLES_SIZE >> 20));
547
- return 0;
547
+ return ERROR(srcSize_wrong);
548
548
  }
549
549
  /* Check if there are at least 5 training samples */
550
550
  if (nbTrainSamples < 5) {
551
551
  DISPLAYLEVEL(1, "Total number of training samples is %u and is invalid.", nbTrainSamples);
552
- return 0;
552
+ return ERROR(srcSize_wrong);
553
553
  }
554
554
  /* Check if there's testing sample */
555
555
  if (nbTestSamples < 1) {
556
556
  DISPLAYLEVEL(1, "Total number of testing samples is %u and is invalid.", nbTestSamples);
557
- return 0;
557
+ return ERROR(srcSize_wrong);
558
558
  }
559
559
  /* Zero the context */
560
560
  memset(ctx, 0, sizeof(*ctx));
@@ -577,7 +577,7 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
577
577
  if (!ctx->suffix || !ctx->dmerAt || !ctx->offsets) {
578
578
  DISPLAYLEVEL(1, "Failed to allocate scratch buffers\n");
579
579
  COVER_ctx_destroy(ctx);
580
- return 0;
580
+ return ERROR(memory_allocation);
581
581
  }
582
582
  ctx->freqs = NULL;
583
583
  ctx->d = d;
@@ -624,7 +624,7 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
624
624
  (ctx->d <= 8 ? &COVER_cmp8 : &COVER_cmp), &COVER_group);
625
625
  ctx->freqs = ctx->suffix;
626
626
  ctx->suffix = NULL;
627
- return 1;
627
+ return 0;
628
628
  }
629
629
 
630
630
  void COVER_warnOnSmallCorpus(size_t maxDictSize, size_t nbDmers, int displayLevel)
@@ -638,8 +638,8 @@ void COVER_warnOnSmallCorpus(size_t maxDictSize, size_t nbDmers, int displayLeve
638
638
  "compared to the source size %u! "
639
639
  "size(source)/size(dictionary) = %f, but it should be >= "
640
640
  "10! This may lead to a subpar dictionary! We recommend "
641
- "training on sources at least 10x, and up to 100x the "
642
- "size of the dictionary!\n", (U32)maxDictSize,
641
+ "training on sources at least 10x, and preferably 100x "
642
+ "the size of the dictionary! \n", (U32)maxDictSize,
643
643
  (U32)nbDmers, ratio);
644
644
  }
645
645
 
@@ -729,11 +729,11 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
729
729
  /* Checks */
730
730
  if (!COVER_checkParameters(parameters, dictBufferCapacity)) {
731
731
  DISPLAYLEVEL(1, "Cover parameters incorrect\n");
732
- return ERROR(GENERIC);
732
+ return ERROR(parameter_outOfBound);
733
733
  }
734
734
  if (nbSamples == 0) {
735
735
  DISPLAYLEVEL(1, "Cover must have at least one input file\n");
736
- return ERROR(GENERIC);
736
+ return ERROR(srcSize_wrong);
737
737
  }
738
738
  if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) {
739
739
  DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n",
@@ -741,15 +741,18 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
741
741
  return ERROR(dstSize_tooSmall);
742
742
  }
743
743
  /* Initialize context and activeDmers */
744
- if (!COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples,
745
- parameters.d, parameters.splitPoint)) {
746
- return ERROR(GENERIC);
744
+ {
745
+ size_t const initVal = COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples,
746
+ parameters.d, parameters.splitPoint);
747
+ if (ZSTD_isError(initVal)) {
748
+ return initVal;
749
+ }
747
750
  }
748
751
  COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.suffixSize, g_displayLevel);
749
752
  if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) {
750
753
  DISPLAYLEVEL(1, "Failed to allocate dmer map: out of memory\n");
751
754
  COVER_ctx_destroy(&ctx);
752
- return ERROR(GENERIC);
755
+ return ERROR(memory_allocation);
753
756
  }
754
757
 
755
758
  DISPLAYLEVEL(2, "Building dictionary\n");
@@ -810,7 +813,7 @@ size_t COVER_checkTotalCompressedSize(const ZDICT_cover_params_t parameters,
810
813
  cctx, dst, dstCapacity, samples + offsets[i],
811
814
  samplesSizes[i], cdict);
812
815
  if (ZSTD_isError(size)) {
813
- totalCompressedSize = ERROR(GENERIC);
816
+ totalCompressedSize = size;
814
817
  goto _compressCleanup;
815
818
  }
816
819
  totalCompressedSize += size;
@@ -886,9 +889,11 @@ void COVER_best_start(COVER_best_t *best) {
886
889
  * Decrements liveJobs and signals any waiting threads if liveJobs == 0.
887
890
  * If this dictionary is the best so far save it and its parameters.
888
891
  */
889
- void COVER_best_finish(COVER_best_t *best, size_t compressedSize,
890
- ZDICT_cover_params_t parameters, void *dict,
891
- size_t dictSize) {
892
+ void COVER_best_finish(COVER_best_t *best, ZDICT_cover_params_t parameters,
893
+ COVER_dictSelection_t selection) {
894
+ void* dict = selection.dictContent;
895
+ size_t compressedSize = selection.totalCompressedSize;
896
+ size_t dictSize = selection.dictSize;
892
897
  if (!best) {
893
898
  return;
894
899
  }
@@ -914,10 +919,12 @@ void COVER_best_finish(COVER_best_t *best, size_t compressedSize,
914
919
  }
915
920
  }
916
921
  /* Save the dictionary, parameters, and size */
917
- memcpy(best->dict, dict, dictSize);
918
- best->dictSize = dictSize;
919
- best->parameters = parameters;
920
- best->compressedSize = compressedSize;
922
+ if (dict) {
923
+ memcpy(best->dict, dict, dictSize);
924
+ best->dictSize = dictSize;
925
+ best->parameters = parameters;
926
+ best->compressedSize = compressedSize;
927
+ }
921
928
  }
922
929
  if (liveJobs == 0) {
923
930
  ZSTD_pthread_cond_broadcast(&best->cond);
@@ -926,6 +933,111 @@ void COVER_best_finish(COVER_best_t *best, size_t compressedSize,
926
933
  }
927
934
  }
928
935
 
936
+ COVER_dictSelection_t COVER_dictSelectionError(size_t error) {
937
+ COVER_dictSelection_t selection = { NULL, 0, error };
938
+ return selection;
939
+ }
940
+
941
+ unsigned COVER_dictSelectionIsError(COVER_dictSelection_t selection) {
942
+ return (ZSTD_isError(selection.totalCompressedSize) || !selection.dictContent);
943
+ }
944
+
945
+ void COVER_dictSelectionFree(COVER_dictSelection_t selection){
946
+ free(selection.dictContent);
947
+ }
948
+
949
+ COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent,
950
+ size_t dictContentSize, const BYTE* samplesBuffer, const size_t* samplesSizes, unsigned nbFinalizeSamples,
951
+ size_t nbCheckSamples, size_t nbSamples, ZDICT_cover_params_t params, size_t* offsets, size_t totalCompressedSize) {
952
+
953
+ size_t largestDict = 0;
954
+ size_t largestCompressed = 0;
955
+ BYTE* customDictContentEnd = customDictContent + dictContentSize;
956
+
957
+ BYTE * largestDictbuffer = (BYTE *)malloc(dictContentSize);
958
+ BYTE * candidateDictBuffer = (BYTE *)malloc(dictContentSize);
959
+ double regressionTolerance = ((double)params.shrinkDictMaxRegression / 100.0) + 1.00;
960
+
961
+ if (!largestDictbuffer || !candidateDictBuffer) {
962
+ free(largestDictbuffer);
963
+ free(candidateDictBuffer);
964
+ return COVER_dictSelectionError(dictContentSize);
965
+ }
966
+
967
+ /* Initial dictionary size and compressed size */
968
+ memcpy(largestDictbuffer, customDictContent, dictContentSize);
969
+ dictContentSize = ZDICT_finalizeDictionary(
970
+ largestDictbuffer, dictContentSize, customDictContent, dictContentSize,
971
+ samplesBuffer, samplesSizes, nbFinalizeSamples, params.zParams);
972
+
973
+ if (ZDICT_isError(dictContentSize)) {
974
+ free(largestDictbuffer);
975
+ free(candidateDictBuffer);
976
+ return COVER_dictSelectionError(dictContentSize);
977
+ }
978
+
979
+ totalCompressedSize = COVER_checkTotalCompressedSize(params, samplesSizes,
980
+ samplesBuffer, offsets,
981
+ nbCheckSamples, nbSamples,
982
+ largestDictbuffer, dictContentSize);
983
+
984
+ if (ZSTD_isError(totalCompressedSize)) {
985
+ free(largestDictbuffer);
986
+ free(candidateDictBuffer);
987
+ return COVER_dictSelectionError(totalCompressedSize);
988
+ }
989
+
990
+ if (params.shrinkDict == 0) {
991
+ COVER_dictSelection_t selection = { largestDictbuffer, dictContentSize, totalCompressedSize };
992
+ free(candidateDictBuffer);
993
+ return selection;
994
+ }
995
+
996
+ largestDict = dictContentSize;
997
+ largestCompressed = totalCompressedSize;
998
+ dictContentSize = ZDICT_DICTSIZE_MIN;
999
+
1000
+ /* Largest dict is initially at least ZDICT_DICTSIZE_MIN */
1001
+ while (dictContentSize < largestDict) {
1002
+ memcpy(candidateDictBuffer, largestDictbuffer, largestDict);
1003
+ dictContentSize = ZDICT_finalizeDictionary(
1004
+ candidateDictBuffer, dictContentSize, customDictContentEnd - dictContentSize, dictContentSize,
1005
+ samplesBuffer, samplesSizes, nbFinalizeSamples, params.zParams);
1006
+
1007
+ if (ZDICT_isError(dictContentSize)) {
1008
+ free(largestDictbuffer);
1009
+ free(candidateDictBuffer);
1010
+ return COVER_dictSelectionError(dictContentSize);
1011
+
1012
+ }
1013
+
1014
+ totalCompressedSize = COVER_checkTotalCompressedSize(params, samplesSizes,
1015
+ samplesBuffer, offsets,
1016
+ nbCheckSamples, nbSamples,
1017
+ candidateDictBuffer, dictContentSize);
1018
+
1019
+ if (ZSTD_isError(totalCompressedSize)) {
1020
+ free(largestDictbuffer);
1021
+ free(candidateDictBuffer);
1022
+ return COVER_dictSelectionError(totalCompressedSize);
1023
+ }
1024
+
1025
+ if (totalCompressedSize <= largestCompressed * regressionTolerance) {
1026
+ COVER_dictSelection_t selection = { candidateDictBuffer, dictContentSize, totalCompressedSize };
1027
+ free(largestDictbuffer);
1028
+ return selection;
1029
+ }
1030
+ dictContentSize *= 2;
1031
+ }
1032
+ dictContentSize = largestDict;
1033
+ totalCompressedSize = largestCompressed;
1034
+ {
1035
+ COVER_dictSelection_t selection = { largestDictbuffer, dictContentSize, totalCompressedSize };
1036
+ free(candidateDictBuffer);
1037
+ return selection;
1038
+ }
1039
+ }
1040
+
929
1041
  /**
930
1042
  * Parameters for COVER_tryParameters().
931
1043
  */
@@ -951,6 +1063,7 @@ static void COVER_tryParameters(void *opaque) {
951
1063
  /* Allocate space for hash table, dict, and freqs */
952
1064
  COVER_map_t activeDmers;
953
1065
  BYTE *const dict = (BYTE * const)malloc(dictBufferCapacity);
1066
+ COVER_dictSelection_t selection = COVER_dictSelectionError(ERROR(GENERIC));
954
1067
  U32 *freqs = (U32 *)malloc(ctx->suffixSize * sizeof(U32));
955
1068
  if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) {
956
1069
  DISPLAYLEVEL(1, "Failed to allocate dmer map: out of memory\n");
@@ -966,29 +1079,21 @@ static void COVER_tryParameters(void *opaque) {
966
1079
  {
967
1080
  const size_t tail = COVER_buildDictionary(ctx, freqs, &activeDmers, dict,
968
1081
  dictBufferCapacity, parameters);
969
- dictBufferCapacity = ZDICT_finalizeDictionary(
970
- dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail,
971
- ctx->samples, ctx->samplesSizes, (unsigned)ctx->nbTrainSamples,
972
- parameters.zParams);
973
- if (ZDICT_isError(dictBufferCapacity)) {
974
- DISPLAYLEVEL(1, "Failed to finalize dictionary\n");
1082
+ selection = COVER_selectDict(dict + tail, dictBufferCapacity - tail,
1083
+ ctx->samples, ctx->samplesSizes, (unsigned)ctx->nbTrainSamples, ctx->nbTrainSamples, ctx->nbSamples, parameters, ctx->offsets,
1084
+ totalCompressedSize);
1085
+
1086
+ if (COVER_dictSelectionIsError(selection)) {
1087
+ DISPLAYLEVEL(1, "Failed to select dictionary\n");
975
1088
  goto _cleanup;
976
1089
  }
977
1090
  }
978
- /* Check total compressed size */
979
- totalCompressedSize = COVER_checkTotalCompressedSize(parameters, ctx->samplesSizes,
980
- ctx->samples, ctx->offsets,
981
- ctx->nbTrainSamples, ctx->nbSamples,
982
- dict, dictBufferCapacity);
983
-
984
1091
  _cleanup:
985
- COVER_best_finish(data->best, totalCompressedSize, parameters, dict,
986
- dictBufferCapacity);
1092
+ free(dict);
1093
+ COVER_best_finish(data->best, parameters, selection);
987
1094
  free(data);
988
1095
  COVER_map_destroy(&activeDmers);
989
- if (dict) {
990
- free(dict);
991
- }
1096
+ COVER_dictSelectionFree(selection);
992
1097
  if (freqs) {
993
1098
  free(freqs);
994
1099
  }
@@ -1010,6 +1115,7 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
1010
1115
  const unsigned kStepSize = MAX((kMaxK - kMinK) / kSteps, 1);
1011
1116
  const unsigned kIterations =
1012
1117
  (1 + (kMaxD - kMinD) / 2) * (1 + (kMaxK - kMinK) / kStepSize);
1118
+ const unsigned shrinkDict = 0;
1013
1119
  /* Local variables */
1014
1120
  const int displayLevel = parameters->zParams.notificationLevel;
1015
1121
  unsigned iteration = 1;
@@ -1022,15 +1128,15 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
1022
1128
  /* Checks */
1023
1129
  if (splitPoint <= 0 || splitPoint > 1) {
1024
1130
  LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect parameters\n");
1025
- return ERROR(GENERIC);
1131
+ return ERROR(parameter_outOfBound);
1026
1132
  }
1027
1133
  if (kMinK < kMaxD || kMaxK < kMinK) {
1028
1134
  LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect parameters\n");
1029
- return ERROR(GENERIC);
1135
+ return ERROR(parameter_outOfBound);
1030
1136
  }
1031
1137
  if (nbSamples == 0) {
1032
1138
  DISPLAYLEVEL(1, "Cover must have at least one input file\n");
1033
- return ERROR(GENERIC);
1139
+ return ERROR(srcSize_wrong);
1034
1140
  }
1035
1141
  if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) {
1036
1142
  DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n",
@@ -1054,11 +1160,14 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
1054
1160
  /* Initialize the context for this value of d */
1055
1161
  COVER_ctx_t ctx;
1056
1162
  LOCALDISPLAYLEVEL(displayLevel, 3, "d=%u\n", d);
1057
- if (!COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, d, splitPoint)) {
1058
- LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to initialize context\n");
1059
- COVER_best_destroy(&best);
1060
- POOL_free(pool);
1061
- return ERROR(GENERIC);
1163
+ {
1164
+ const size_t initVal = COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, d, splitPoint);
1165
+ if (ZSTD_isError(initVal)) {
1166
+ LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to initialize context\n");
1167
+ COVER_best_destroy(&best);
1168
+ POOL_free(pool);
1169
+ return initVal;
1170
+ }
1062
1171
  }
1063
1172
  if (!warned) {
1064
1173
  COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.suffixSize, displayLevel);
@@ -1075,7 +1184,7 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
1075
1184
  COVER_best_destroy(&best);
1076
1185
  COVER_ctx_destroy(&ctx);
1077
1186
  POOL_free(pool);
1078
- return ERROR(GENERIC);
1187
+ return ERROR(memory_allocation);
1079
1188
  }
1080
1189
  data->ctx = &ctx;
1081
1190
  data->best = &best;
@@ -1085,6 +1194,7 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
1085
1194
  data->parameters.d = d;
1086
1195
  data->parameters.splitPoint = splitPoint;
1087
1196
  data->parameters.steps = kSteps;
1197
+ data->parameters.shrinkDict = shrinkDict;
1088
1198
  data->parameters.zParams.notificationLevel = g_displayLevel;
1089
1199
  /* Check the parameters */
1090
1200
  if (!COVER_checkParameters(data->parameters, dictBufferCapacity)) {