zstd-ruby 1.4.0.0 → 1.4.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +1 -1
  3. data/ext/zstdruby/libzstd/Makefile +5 -0
  4. data/ext/zstdruby/libzstd/common/compiler.h +7 -0
  5. data/ext/zstdruby/libzstd/common/zstd_internal.h +58 -6
  6. data/ext/zstdruby/libzstd/compress/zstd_compress.c +175 -117
  7. data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +74 -30
  8. data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +56 -36
  9. data/ext/zstdruby/libzstd/compress/zstd_fast.c +35 -14
  10. data/ext/zstdruby/libzstd/compress/zstd_lazy.c +10 -5
  11. data/ext/zstdruby/libzstd/compress/zstd_ldm.c +1 -1
  12. data/ext/zstdruby/libzstd/compress/zstd_opt.c +45 -32
  13. data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +18 -7
  14. data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +1 -0
  15. data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +12 -9
  16. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +20 -9
  17. data/ext/zstdruby/libzstd/dictBuilder/cover.c +154 -43
  18. data/ext/zstdruby/libzstd/dictBuilder/cover.h +38 -3
  19. data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +46 -39
  20. data/ext/zstdruby/libzstd/dictBuilder/zdict.c +9 -9
  21. data/ext/zstdruby/libzstd/dictBuilder/zdict.h +5 -0
  22. data/ext/zstdruby/libzstd/legacy/zstd_legacy.h +4 -0
  23. data/ext/zstdruby/libzstd/legacy/zstd_v01.c +95 -101
  24. data/ext/zstdruby/libzstd/legacy/zstd_v02.c +11 -6
  25. data/ext/zstdruby/libzstd/legacy/zstd_v03.c +11 -6
  26. data/ext/zstdruby/libzstd/legacy/zstd_v04.c +11 -8
  27. data/ext/zstdruby/libzstd/legacy/zstd_v05.c +88 -84
  28. data/ext/zstdruby/libzstd/legacy/zstd_v06.c +2 -4
  29. data/ext/zstdruby/libzstd/legacy/zstd_v07.c +2 -4
  30. data/ext/zstdruby/libzstd/zstd.h +53 -21
  31. data/lib/zstd-ruby/version.rb +1 -1
  32. metadata +3 -4
@@ -526,10 +526,10 @@ static void COVER_ctx_destroy(COVER_ctx_t *ctx) {
526
526
  * Prepare a context for dictionary building.
527
527
  * The context is only dependent on the parameter `d` and can used multiple
528
528
  * times.
529
- * Returns 1 on success or zero on error.
529
+ * Returns 0 on success or error code on error.
530
530
  * The context must be destroyed with `COVER_ctx_destroy()`.
531
531
  */
532
- static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
532
+ static size_t COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
533
533
  const size_t *samplesSizes, unsigned nbSamples,
534
534
  unsigned d, double splitPoint) {
535
535
  const BYTE *const samples = (const BYTE *)samplesBuffer;
@@ -544,17 +544,17 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
544
544
  totalSamplesSize >= (size_t)COVER_MAX_SAMPLES_SIZE) {
545
545
  DISPLAYLEVEL(1, "Total samples size is too large (%u MB), maximum size is %u MB\n",
546
546
  (unsigned)(totalSamplesSize>>20), (COVER_MAX_SAMPLES_SIZE >> 20));
547
- return 0;
547
+ return ERROR(srcSize_wrong);
548
548
  }
549
549
  /* Check if there are at least 5 training samples */
550
550
  if (nbTrainSamples < 5) {
551
551
  DISPLAYLEVEL(1, "Total number of training samples is %u and is invalid.", nbTrainSamples);
552
- return 0;
552
+ return ERROR(srcSize_wrong);
553
553
  }
554
554
  /* Check if there's testing sample */
555
555
  if (nbTestSamples < 1) {
556
556
  DISPLAYLEVEL(1, "Total number of testing samples is %u and is invalid.", nbTestSamples);
557
- return 0;
557
+ return ERROR(srcSize_wrong);
558
558
  }
559
559
  /* Zero the context */
560
560
  memset(ctx, 0, sizeof(*ctx));
@@ -577,7 +577,7 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
577
577
  if (!ctx->suffix || !ctx->dmerAt || !ctx->offsets) {
578
578
  DISPLAYLEVEL(1, "Failed to allocate scratch buffers\n");
579
579
  COVER_ctx_destroy(ctx);
580
- return 0;
580
+ return ERROR(memory_allocation);
581
581
  }
582
582
  ctx->freqs = NULL;
583
583
  ctx->d = d;
@@ -624,7 +624,7 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
624
624
  (ctx->d <= 8 ? &COVER_cmp8 : &COVER_cmp), &COVER_group);
625
625
  ctx->freqs = ctx->suffix;
626
626
  ctx->suffix = NULL;
627
- return 1;
627
+ return 0;
628
628
  }
629
629
 
630
630
  void COVER_warnOnSmallCorpus(size_t maxDictSize, size_t nbDmers, int displayLevel)
@@ -729,11 +729,11 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
729
729
  /* Checks */
730
730
  if (!COVER_checkParameters(parameters, dictBufferCapacity)) {
731
731
  DISPLAYLEVEL(1, "Cover parameters incorrect\n");
732
- return ERROR(GENERIC);
732
+ return ERROR(parameter_outOfBound);
733
733
  }
734
734
  if (nbSamples == 0) {
735
735
  DISPLAYLEVEL(1, "Cover must have at least one input file\n");
736
- return ERROR(GENERIC);
736
+ return ERROR(srcSize_wrong);
737
737
  }
738
738
  if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) {
739
739
  DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n",
@@ -741,15 +741,18 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
741
741
  return ERROR(dstSize_tooSmall);
742
742
  }
743
743
  /* Initialize context and activeDmers */
744
- if (!COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples,
745
- parameters.d, parameters.splitPoint)) {
746
- return ERROR(GENERIC);
744
+ {
745
+ size_t const initVal = COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples,
746
+ parameters.d, parameters.splitPoint);
747
+ if (ZSTD_isError(initVal)) {
748
+ return initVal;
749
+ }
747
750
  }
748
751
  COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.suffixSize, g_displayLevel);
749
752
  if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) {
750
753
  DISPLAYLEVEL(1, "Failed to allocate dmer map: out of memory\n");
751
754
  COVER_ctx_destroy(&ctx);
752
- return ERROR(GENERIC);
755
+ return ERROR(memory_allocation);
753
756
  }
754
757
 
755
758
  DISPLAYLEVEL(2, "Building dictionary\n");
@@ -810,7 +813,7 @@ size_t COVER_checkTotalCompressedSize(const ZDICT_cover_params_t parameters,
810
813
  cctx, dst, dstCapacity, samples + offsets[i],
811
814
  samplesSizes[i], cdict);
812
815
  if (ZSTD_isError(size)) {
813
- totalCompressedSize = ERROR(GENERIC);
816
+ totalCompressedSize = size;
814
817
  goto _compressCleanup;
815
818
  }
816
819
  totalCompressedSize += size;
@@ -886,9 +889,11 @@ void COVER_best_start(COVER_best_t *best) {
886
889
  * Decrements liveJobs and signals any waiting threads if liveJobs == 0.
887
890
  * If this dictionary is the best so far save it and its parameters.
888
891
  */
889
- void COVER_best_finish(COVER_best_t *best, size_t compressedSize,
890
- ZDICT_cover_params_t parameters, void *dict,
891
- size_t dictSize) {
892
+ void COVER_best_finish(COVER_best_t *best, ZDICT_cover_params_t parameters,
893
+ COVER_dictSelection_t selection) {
894
+ void* dict = selection.dictContent;
895
+ size_t compressedSize = selection.totalCompressedSize;
896
+ size_t dictSize = selection.dictSize;
892
897
  if (!best) {
893
898
  return;
894
899
  }
@@ -914,6 +919,9 @@ void COVER_best_finish(COVER_best_t *best, size_t compressedSize,
914
919
  }
915
920
  }
916
921
  /* Save the dictionary, parameters, and size */
922
+ if (!dict) {
923
+ return;
924
+ }
917
925
  memcpy(best->dict, dict, dictSize);
918
926
  best->dictSize = dictSize;
919
927
  best->parameters = parameters;
@@ -926,6 +934,111 @@ void COVER_best_finish(COVER_best_t *best, size_t compressedSize,
926
934
  }
927
935
  }
928
936
 
937
+ COVER_dictSelection_t COVER_dictSelectionError(size_t error) {
938
+ COVER_dictSelection_t selection = { NULL, 0, error };
939
+ return selection;
940
+ }
941
+
942
+ unsigned COVER_dictSelectionIsError(COVER_dictSelection_t selection) {
943
+ return (ZSTD_isError(selection.totalCompressedSize) || !selection.dictContent);
944
+ }
945
+
946
+ void COVER_dictSelectionFree(COVER_dictSelection_t selection){
947
+ free(selection.dictContent);
948
+ }
949
+
950
+ COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent,
951
+ size_t dictContentSize, const BYTE* samplesBuffer, const size_t* samplesSizes, unsigned nbFinalizeSamples,
952
+ size_t nbCheckSamples, size_t nbSamples, ZDICT_cover_params_t params, size_t* offsets, size_t totalCompressedSize) {
953
+
954
+ size_t largestDict = 0;
955
+ size_t largestCompressed = 0;
956
+ BYTE* customDictContentEnd = customDictContent + dictContentSize;
957
+
958
+ BYTE * largestDictbuffer = (BYTE *)malloc(dictContentSize);
959
+ BYTE * candidateDictBuffer = (BYTE *)malloc(dictContentSize);
960
+ double regressionTolerance = ((double)params.shrinkDictMaxRegression / 100.0) + 1.00;
961
+
962
+ if (!largestDictbuffer || !candidateDictBuffer) {
963
+ free(largestDictbuffer);
964
+ free(candidateDictBuffer);
965
+ return COVER_dictSelectionError(dictContentSize);
966
+ }
967
+
968
+ /* Initial dictionary size and compressed size */
969
+ memcpy(largestDictbuffer, customDictContent, dictContentSize);
970
+ dictContentSize = ZDICT_finalizeDictionary(
971
+ largestDictbuffer, dictContentSize, customDictContent, dictContentSize,
972
+ samplesBuffer, samplesSizes, nbFinalizeSamples, params.zParams);
973
+
974
+ if (ZDICT_isError(dictContentSize)) {
975
+ free(largestDictbuffer);
976
+ free(candidateDictBuffer);
977
+ return COVER_dictSelectionError(dictContentSize);
978
+ }
979
+
980
+ totalCompressedSize = COVER_checkTotalCompressedSize(params, samplesSizes,
981
+ samplesBuffer, offsets,
982
+ nbCheckSamples, nbSamples,
983
+ largestDictbuffer, dictContentSize);
984
+
985
+ if (ZSTD_isError(totalCompressedSize)) {
986
+ free(largestDictbuffer);
987
+ free(candidateDictBuffer);
988
+ return COVER_dictSelectionError(totalCompressedSize);
989
+ }
990
+
991
+ if (params.shrinkDict == 0) {
992
+ COVER_dictSelection_t selection = { largestDictbuffer, dictContentSize, totalCompressedSize };
993
+ free(candidateDictBuffer);
994
+ return selection;
995
+ }
996
+
997
+ largestDict = dictContentSize;
998
+ largestCompressed = totalCompressedSize;
999
+ dictContentSize = ZDICT_DICTSIZE_MIN;
1000
+
1001
+ /* Largest dict is initially at least ZDICT_DICTSIZE_MIN */
1002
+ while (dictContentSize < largestDict) {
1003
+ memcpy(candidateDictBuffer, largestDictbuffer, largestDict);
1004
+ dictContentSize = ZDICT_finalizeDictionary(
1005
+ candidateDictBuffer, dictContentSize, customDictContentEnd - dictContentSize, dictContentSize,
1006
+ samplesBuffer, samplesSizes, nbFinalizeSamples, params.zParams);
1007
+
1008
+ if (ZDICT_isError(dictContentSize)) {
1009
+ free(largestDictbuffer);
1010
+ free(candidateDictBuffer);
1011
+ return COVER_dictSelectionError(dictContentSize);
1012
+
1013
+ }
1014
+
1015
+ totalCompressedSize = COVER_checkTotalCompressedSize(params, samplesSizes,
1016
+ samplesBuffer, offsets,
1017
+ nbCheckSamples, nbSamples,
1018
+ candidateDictBuffer, dictContentSize);
1019
+
1020
+ if (ZSTD_isError(totalCompressedSize)) {
1021
+ free(largestDictbuffer);
1022
+ free(candidateDictBuffer);
1023
+ return COVER_dictSelectionError(totalCompressedSize);
1024
+ }
1025
+
1026
+ if (totalCompressedSize <= largestCompressed * regressionTolerance) {
1027
+ COVER_dictSelection_t selection = { candidateDictBuffer, dictContentSize, totalCompressedSize };
1028
+ free(largestDictbuffer);
1029
+ return selection;
1030
+ }
1031
+ dictContentSize *= 2;
1032
+ }
1033
+ dictContentSize = largestDict;
1034
+ totalCompressedSize = largestCompressed;
1035
+ {
1036
+ COVER_dictSelection_t selection = { largestDictbuffer, dictContentSize, totalCompressedSize };
1037
+ free(candidateDictBuffer);
1038
+ return selection;
1039
+ }
1040
+ }
1041
+
929
1042
  /**
930
1043
  * Parameters for COVER_tryParameters().
931
1044
  */
@@ -951,6 +1064,7 @@ static void COVER_tryParameters(void *opaque) {
951
1064
  /* Allocate space for hash table, dict, and freqs */
952
1065
  COVER_map_t activeDmers;
953
1066
  BYTE *const dict = (BYTE * const)malloc(dictBufferCapacity);
1067
+ COVER_dictSelection_t selection = COVER_dictSelectionError(ERROR(GENERIC));
954
1068
  U32 *freqs = (U32 *)malloc(ctx->suffixSize * sizeof(U32));
955
1069
  if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) {
956
1070
  DISPLAYLEVEL(1, "Failed to allocate dmer map: out of memory\n");
@@ -966,29 +1080,21 @@ static void COVER_tryParameters(void *opaque) {
966
1080
  {
967
1081
  const size_t tail = COVER_buildDictionary(ctx, freqs, &activeDmers, dict,
968
1082
  dictBufferCapacity, parameters);
969
- dictBufferCapacity = ZDICT_finalizeDictionary(
970
- dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail,
971
- ctx->samples, ctx->samplesSizes, (unsigned)ctx->nbTrainSamples,
972
- parameters.zParams);
973
- if (ZDICT_isError(dictBufferCapacity)) {
974
- DISPLAYLEVEL(1, "Failed to finalize dictionary\n");
1083
+ selection = COVER_selectDict(dict + tail, dictBufferCapacity - tail,
1084
+ ctx->samples, ctx->samplesSizes, (unsigned)ctx->nbTrainSamples, ctx->nbTrainSamples, ctx->nbSamples, parameters, ctx->offsets,
1085
+ totalCompressedSize);
1086
+
1087
+ if (COVER_dictSelectionIsError(selection)) {
1088
+ DISPLAYLEVEL(1, "Failed to select dictionary\n");
975
1089
  goto _cleanup;
976
1090
  }
977
1091
  }
978
- /* Check total compressed size */
979
- totalCompressedSize = COVER_checkTotalCompressedSize(parameters, ctx->samplesSizes,
980
- ctx->samples, ctx->offsets,
981
- ctx->nbTrainSamples, ctx->nbSamples,
982
- dict, dictBufferCapacity);
983
-
984
1092
  _cleanup:
985
- COVER_best_finish(data->best, totalCompressedSize, parameters, dict,
986
- dictBufferCapacity);
1093
+ free(dict);
1094
+ COVER_best_finish(data->best, parameters, selection);
987
1095
  free(data);
988
1096
  COVER_map_destroy(&activeDmers);
989
- if (dict) {
990
- free(dict);
991
- }
1097
+ COVER_dictSelectionFree(selection);
992
1098
  if (freqs) {
993
1099
  free(freqs);
994
1100
  }
@@ -1010,6 +1116,7 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
1010
1116
  const unsigned kStepSize = MAX((kMaxK - kMinK) / kSteps, 1);
1011
1117
  const unsigned kIterations =
1012
1118
  (1 + (kMaxD - kMinD) / 2) * (1 + (kMaxK - kMinK) / kStepSize);
1119
+ const unsigned shrinkDict = 0;
1013
1120
  /* Local variables */
1014
1121
  const int displayLevel = parameters->zParams.notificationLevel;
1015
1122
  unsigned iteration = 1;
@@ -1022,15 +1129,15 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
1022
1129
  /* Checks */
1023
1130
  if (splitPoint <= 0 || splitPoint > 1) {
1024
1131
  LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect parameters\n");
1025
- return ERROR(GENERIC);
1132
+ return ERROR(parameter_outOfBound);
1026
1133
  }
1027
1134
  if (kMinK < kMaxD || kMaxK < kMinK) {
1028
1135
  LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect parameters\n");
1029
- return ERROR(GENERIC);
1136
+ return ERROR(parameter_outOfBound);
1030
1137
  }
1031
1138
  if (nbSamples == 0) {
1032
1139
  DISPLAYLEVEL(1, "Cover must have at least one input file\n");
1033
- return ERROR(GENERIC);
1140
+ return ERROR(srcSize_wrong);
1034
1141
  }
1035
1142
  if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) {
1036
1143
  DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n",
@@ -1054,11 +1161,14 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
1054
1161
  /* Initialize the context for this value of d */
1055
1162
  COVER_ctx_t ctx;
1056
1163
  LOCALDISPLAYLEVEL(displayLevel, 3, "d=%u\n", d);
1057
- if (!COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, d, splitPoint)) {
1058
- LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to initialize context\n");
1059
- COVER_best_destroy(&best);
1060
- POOL_free(pool);
1061
- return ERROR(GENERIC);
1164
+ {
1165
+ const size_t initVal = COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, d, splitPoint);
1166
+ if (ZSTD_isError(initVal)) {
1167
+ LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to initialize context\n");
1168
+ COVER_best_destroy(&best);
1169
+ POOL_free(pool);
1170
+ return initVal;
1171
+ }
1062
1172
  }
1063
1173
  if (!warned) {
1064
1174
  COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.suffixSize, displayLevel);
@@ -1075,7 +1185,7 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
1075
1185
  COVER_best_destroy(&best);
1076
1186
  COVER_ctx_destroy(&ctx);
1077
1187
  POOL_free(pool);
1078
- return ERROR(GENERIC);
1188
+ return ERROR(memory_allocation);
1079
1189
  }
1080
1190
  data->ctx = &ctx;
1081
1191
  data->best = &best;
@@ -1085,6 +1195,7 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
1085
1195
  data->parameters.d = d;
1086
1196
  data->parameters.splitPoint = splitPoint;
1087
1197
  data->parameters.steps = kSteps;
1198
+ data->parameters.shrinkDict = shrinkDict;
1088
1199
  data->parameters.zParams.notificationLevel = g_displayLevel;
1089
1200
  /* Check the parameters */
1090
1201
  if (!COVER_checkParameters(data->parameters, dictBufferCapacity)) {
@@ -46,6 +46,15 @@ typedef struct {
46
46
  U32 size;
47
47
  } COVER_epoch_info_t;
48
48
 
49
+ /**
50
+ * Struct used for the dictionary selection function.
51
+ */
52
+ typedef struct COVER_dictSelection {
53
+ BYTE* dictContent;
54
+ size_t dictSize;
55
+ size_t totalCompressedSize;
56
+ } COVER_dictSelection_t;
57
+
49
58
  /**
50
59
  * Computes the number of epochs and the size of each epoch.
51
60
  * We will make sure that each epoch gets at least 10 * k bytes.
@@ -107,6 +116,32 @@ void COVER_best_start(COVER_best_t *best);
107
116
  * Decrements liveJobs and signals any waiting threads if liveJobs == 0.
108
117
  * If this dictionary is the best so far save it and its parameters.
109
118
  */
110
- void COVER_best_finish(COVER_best_t *best, size_t compressedSize,
111
- ZDICT_cover_params_t parameters, void *dict,
112
- size_t dictSize);
119
+ void COVER_best_finish(COVER_best_t *best, ZDICT_cover_params_t parameters,
120
+ COVER_dictSelection_t selection);
121
+ /**
122
+ * Error function for COVER_selectDict function. Checks if the return
123
+ * value is an error.
124
+ */
125
+ unsigned COVER_dictSelectionIsError(COVER_dictSelection_t selection);
126
+
127
+ /**
128
+ * Error function for COVER_selectDict function. Returns a struct where
129
+ * return.totalCompressedSize is a ZSTD error.
130
+ */
131
+ COVER_dictSelection_t COVER_dictSelectionError(size_t error);
132
+
133
+ /**
134
+ * Always call after selectDict is called to free up used memory from
135
+ * newly created dictionary.
136
+ */
137
+ void COVER_dictSelectionFree(COVER_dictSelection_t selection);
138
+
139
+ /**
140
+ * Called to finalize the dictionary and select one based on whether or not
141
+ * the shrink-dict flag was enabled. If enabled the dictionary used is the
142
+ * smallest dictionary within a specified regression of the compressed size
143
+ * from the largest dictionary.
144
+ */
145
+ COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent,
146
+ size_t dictContentSize, const BYTE* samplesBuffer, const size_t* samplesSizes, unsigned nbFinalizeSamples,
147
+ size_t nbCheckSamples, size_t nbSamples, ZDICT_cover_params_t params, size_t* offsets, size_t totalCompressedSize);
@@ -287,10 +287,10 @@ FASTCOVER_computeFrequency(U32* freqs, const FASTCOVER_ctx_t* ctx)
287
287
  * Prepare a context for dictionary building.
288
288
  * The context is only dependent on the parameter `d` and can used multiple
289
289
  * times.
290
- * Returns 1 on success or zero on error.
290
+ * Returns 0 on success or error code on error.
291
291
  * The context must be destroyed with `FASTCOVER_ctx_destroy()`.
292
292
  */
293
- static int
293
+ static size_t
294
294
  FASTCOVER_ctx_init(FASTCOVER_ctx_t* ctx,
295
295
  const void* samplesBuffer,
296
296
  const size_t* samplesSizes, unsigned nbSamples,
@@ -310,19 +310,19 @@ FASTCOVER_ctx_init(FASTCOVER_ctx_t* ctx,
310
310
  totalSamplesSize >= (size_t)FASTCOVER_MAX_SAMPLES_SIZE) {
311
311
  DISPLAYLEVEL(1, "Total samples size is too large (%u MB), maximum size is %u MB\n",
312
312
  (unsigned)(totalSamplesSize >> 20), (FASTCOVER_MAX_SAMPLES_SIZE >> 20));
313
- return 0;
313
+ return ERROR(srcSize_wrong);
314
314
  }
315
315
 
316
316
  /* Check if there are at least 5 training samples */
317
317
  if (nbTrainSamples < 5) {
318
318
  DISPLAYLEVEL(1, "Total number of training samples is %u and is invalid\n", nbTrainSamples);
319
- return 0;
319
+ return ERROR(srcSize_wrong);
320
320
  }
321
321
 
322
322
  /* Check if there's testing sample */
323
323
  if (nbTestSamples < 1) {
324
324
  DISPLAYLEVEL(1, "Total number of testing samples is %u and is invalid.\n", nbTestSamples);
325
- return 0;
325
+ return ERROR(srcSize_wrong);
326
326
  }
327
327
 
328
328
  /* Zero the context */
@@ -347,7 +347,7 @@ FASTCOVER_ctx_init(FASTCOVER_ctx_t* ctx,
347
347
  if (ctx->offsets == NULL) {
348
348
  DISPLAYLEVEL(1, "Failed to allocate scratch buffers \n");
349
349
  FASTCOVER_ctx_destroy(ctx);
350
- return 0;
350
+ return ERROR(memory_allocation);
351
351
  }
352
352
 
353
353
  /* Fill offsets from the samplesSizes */
@@ -364,13 +364,13 @@ FASTCOVER_ctx_init(FASTCOVER_ctx_t* ctx,
364
364
  if (ctx->freqs == NULL) {
365
365
  DISPLAYLEVEL(1, "Failed to allocate frequency table \n");
366
366
  FASTCOVER_ctx_destroy(ctx);
367
- return 0;
367
+ return ERROR(memory_allocation);
368
368
  }
369
369
 
370
370
  DISPLAYLEVEL(2, "Computing frequencies\n");
371
371
  FASTCOVER_computeFrequency(ctx->freqs, ctx);
372
372
 
373
- return 1;
373
+ return 0;
374
374
  }
375
375
 
376
376
 
@@ -435,7 +435,6 @@ FASTCOVER_buildDictionary(const FASTCOVER_ctx_t* ctx,
435
435
  return tail;
436
436
  }
437
437
 
438
-
439
438
  /**
440
439
  * Parameters for FASTCOVER_tryParameters().
441
440
  */
@@ -464,6 +463,7 @@ static void FASTCOVER_tryParameters(void *opaque)
464
463
  U16* segmentFreqs = (U16 *)calloc(((U64)1 << ctx->f), sizeof(U16));
465
464
  /* Allocate space for hash table, dict, and freqs */
466
465
  BYTE *const dict = (BYTE * const)malloc(dictBufferCapacity);
466
+ COVER_dictSelection_t selection = COVER_dictSelectionError(ERROR(GENERIC));
467
467
  U32 *freqs = (U32*) malloc(((U64)1 << ctx->f) * sizeof(U32));
468
468
  if (!segmentFreqs || !dict || !freqs) {
469
469
  DISPLAYLEVEL(1, "Failed to allocate buffers: out of memory\n");
@@ -473,27 +473,24 @@ static void FASTCOVER_tryParameters(void *opaque)
473
473
  memcpy(freqs, ctx->freqs, ((U64)1 << ctx->f) * sizeof(U32));
474
474
  /* Build the dictionary */
475
475
  { const size_t tail = FASTCOVER_buildDictionary(ctx, freqs, dict, dictBufferCapacity,
476
- parameters, segmentFreqs);
476
+ parameters, segmentFreqs);
477
+
477
478
  const unsigned nbFinalizeSamples = (unsigned)(ctx->nbTrainSamples * ctx->accelParams.finalize / 100);
478
- dictBufferCapacity = ZDICT_finalizeDictionary(
479
- dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail,
480
- ctx->samples, ctx->samplesSizes, nbFinalizeSamples, parameters.zParams);
481
- if (ZDICT_isError(dictBufferCapacity)) {
482
- DISPLAYLEVEL(1, "Failed to finalize dictionary\n");
479
+ selection = COVER_selectDict(dict + tail, dictBufferCapacity - tail,
480
+ ctx->samples, ctx->samplesSizes, nbFinalizeSamples, ctx->nbTrainSamples, ctx->nbSamples, parameters, ctx->offsets,
481
+ totalCompressedSize);
482
+
483
+ if (COVER_dictSelectionIsError(selection)) {
484
+ DISPLAYLEVEL(1, "Failed to select dictionary\n");
483
485
  goto _cleanup;
484
486
  }
485
487
  }
486
- /* Check total compressed size */
487
- totalCompressedSize = COVER_checkTotalCompressedSize(parameters, ctx->samplesSizes,
488
- ctx->samples, ctx->offsets,
489
- ctx->nbTrainSamples, ctx->nbSamples,
490
- dict, dictBufferCapacity);
491
488
  _cleanup:
492
- COVER_best_finish(data->best, totalCompressedSize, parameters, dict,
493
- dictBufferCapacity);
489
+ free(dict);
490
+ COVER_best_finish(data->best, parameters, selection);
494
491
  free(data);
495
492
  free(segmentFreqs);
496
- free(dict);
493
+ COVER_dictSelectionFree(selection);
497
494
  free(freqs);
498
495
  }
499
496
 
@@ -508,6 +505,7 @@ FASTCOVER_convertToCoverParams(ZDICT_fastCover_params_t fastCoverParams,
508
505
  coverParams->nbThreads = fastCoverParams.nbThreads;
509
506
  coverParams->splitPoint = fastCoverParams.splitPoint;
510
507
  coverParams->zParams = fastCoverParams.zParams;
508
+ coverParams->shrinkDict = fastCoverParams.shrinkDict;
511
509
  }
512
510
 
513
511
 
@@ -524,6 +522,7 @@ FASTCOVER_convertToFastCoverParams(ZDICT_cover_params_t coverParams,
524
522
  fastCoverParams->f = f;
525
523
  fastCoverParams->accel = accel;
526
524
  fastCoverParams->zParams = coverParams.zParams;
525
+ fastCoverParams->shrinkDict = coverParams.shrinkDict;
527
526
  }
528
527
 
529
528
 
@@ -550,11 +549,11 @@ ZDICT_trainFromBuffer_fastCover(void* dictBuffer, size_t dictBufferCapacity,
550
549
  if (!FASTCOVER_checkParameters(coverParams, dictBufferCapacity, parameters.f,
551
550
  parameters.accel)) {
552
551
  DISPLAYLEVEL(1, "FASTCOVER parameters incorrect\n");
553
- return ERROR(GENERIC);
552
+ return ERROR(parameter_outOfBound);
554
553
  }
555
554
  if (nbSamples == 0) {
556
555
  DISPLAYLEVEL(1, "FASTCOVER must have at least one input file\n");
557
- return ERROR(GENERIC);
556
+ return ERROR(srcSize_wrong);
558
557
  }
559
558
  if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) {
560
559
  DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n",
@@ -564,11 +563,14 @@ ZDICT_trainFromBuffer_fastCover(void* dictBuffer, size_t dictBufferCapacity,
564
563
  /* Assign corresponding FASTCOVER_accel_t to accelParams*/
565
564
  accelParams = FASTCOVER_defaultAccelParameters[parameters.accel];
566
565
  /* Initialize context */
567
- if (!FASTCOVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples,
566
+ {
567
+ size_t const initVal = FASTCOVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples,
568
568
  coverParams.d, parameters.splitPoint, parameters.f,
569
- accelParams)) {
570
- DISPLAYLEVEL(1, "Failed to initialize context\n");
571
- return ERROR(GENERIC);
569
+ accelParams);
570
+ if (ZSTD_isError(initVal)) {
571
+ DISPLAYLEVEL(1, "Failed to initialize context\n");
572
+ return initVal;
573
+ }
572
574
  }
573
575
  COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.nbDmers, g_displayLevel);
574
576
  /* Build the dictionary */
@@ -616,6 +618,7 @@ ZDICT_optimizeTrainFromBuffer_fastCover(
616
618
  (1 + (kMaxD - kMinD) / 2) * (1 + (kMaxK - kMinK) / kStepSize);
617
619
  const unsigned f = parameters->f == 0 ? DEFAULT_F : parameters->f;
618
620
  const unsigned accel = parameters->accel == 0 ? DEFAULT_ACCEL : parameters->accel;
621
+ const unsigned shrinkDict = 0;
619
622
  /* Local variables */
620
623
  const int displayLevel = parameters->zParams.notificationLevel;
621
624
  unsigned iteration = 1;
@@ -627,19 +630,19 @@ ZDICT_optimizeTrainFromBuffer_fastCover(
627
630
  /* Checks */
628
631
  if (splitPoint <= 0 || splitPoint > 1) {
629
632
  LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect splitPoint\n");
630
- return ERROR(GENERIC);
633
+ return ERROR(parameter_outOfBound);
631
634
  }
632
635
  if (accel == 0 || accel > FASTCOVER_MAX_ACCEL) {
633
636
  LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect accel\n");
634
- return ERROR(GENERIC);
637
+ return ERROR(parameter_outOfBound);
635
638
  }
636
639
  if (kMinK < kMaxD || kMaxK < kMinK) {
637
640
  LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect k\n");
638
- return ERROR(GENERIC);
641
+ return ERROR(parameter_outOfBound);
639
642
  }
640
643
  if (nbSamples == 0) {
641
644
  LOCALDISPLAYLEVEL(displayLevel, 1, "FASTCOVER must have at least one input file\n");
642
- return ERROR(GENERIC);
645
+ return ERROR(srcSize_wrong);
643
646
  }
644
647
  if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) {
645
648
  LOCALDISPLAYLEVEL(displayLevel, 1, "dictBufferCapacity must be at least %u\n",
@@ -666,11 +669,14 @@ ZDICT_optimizeTrainFromBuffer_fastCover(
666
669
  /* Initialize the context for this value of d */
667
670
  FASTCOVER_ctx_t ctx;
668
671
  LOCALDISPLAYLEVEL(displayLevel, 3, "d=%u\n", d);
669
- if (!FASTCOVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, d, splitPoint, f, accelParams)) {
670
- LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to initialize context\n");
671
- COVER_best_destroy(&best);
672
- POOL_free(pool);
673
- return ERROR(GENERIC);
672
+ {
673
+ size_t const initVal = FASTCOVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, d, splitPoint, f, accelParams);
674
+ if (ZSTD_isError(initVal)) {
675
+ LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to initialize context\n");
676
+ COVER_best_destroy(&best);
677
+ POOL_free(pool);
678
+ return initVal;
679
+ }
674
680
  }
675
681
  if (!warned) {
676
682
  COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.nbDmers, displayLevel);
@@ -687,7 +693,7 @@ ZDICT_optimizeTrainFromBuffer_fastCover(
687
693
  COVER_best_destroy(&best);
688
694
  FASTCOVER_ctx_destroy(&ctx);
689
695
  POOL_free(pool);
690
- return ERROR(GENERIC);
696
+ return ERROR(memory_allocation);
691
697
  }
692
698
  data->ctx = &ctx;
693
699
  data->best = &best;
@@ -697,6 +703,7 @@ ZDICT_optimizeTrainFromBuffer_fastCover(
697
703
  data->parameters.d = d;
698
704
  data->parameters.splitPoint = splitPoint;
699
705
  data->parameters.steps = kSteps;
706
+ data->parameters.shrinkDict = shrinkDict;
700
707
  data->parameters.zParams.notificationLevel = g_displayLevel;
701
708
  /* Check the parameters */
702
709
  if (!FASTCOVER_checkParameters(data->parameters, dictBufferCapacity,