zstd-ruby 1.4.0.0 → 1.4.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (32) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +1 -1
  3. data/ext/zstdruby/libzstd/Makefile +5 -0
  4. data/ext/zstdruby/libzstd/common/compiler.h +7 -0
  5. data/ext/zstdruby/libzstd/common/zstd_internal.h +58 -6
  6. data/ext/zstdruby/libzstd/compress/zstd_compress.c +175 -117
  7. data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +74 -30
  8. data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +56 -36
  9. data/ext/zstdruby/libzstd/compress/zstd_fast.c +35 -14
  10. data/ext/zstdruby/libzstd/compress/zstd_lazy.c +10 -5
  11. data/ext/zstdruby/libzstd/compress/zstd_ldm.c +1 -1
  12. data/ext/zstdruby/libzstd/compress/zstd_opt.c +45 -32
  13. data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +18 -7
  14. data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +1 -0
  15. data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +12 -9
  16. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +20 -9
  17. data/ext/zstdruby/libzstd/dictBuilder/cover.c +154 -43
  18. data/ext/zstdruby/libzstd/dictBuilder/cover.h +38 -3
  19. data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +46 -39
  20. data/ext/zstdruby/libzstd/dictBuilder/zdict.c +9 -9
  21. data/ext/zstdruby/libzstd/dictBuilder/zdict.h +5 -0
  22. data/ext/zstdruby/libzstd/legacy/zstd_legacy.h +4 -0
  23. data/ext/zstdruby/libzstd/legacy/zstd_v01.c +95 -101
  24. data/ext/zstdruby/libzstd/legacy/zstd_v02.c +11 -6
  25. data/ext/zstdruby/libzstd/legacy/zstd_v03.c +11 -6
  26. data/ext/zstdruby/libzstd/legacy/zstd_v04.c +11 -8
  27. data/ext/zstdruby/libzstd/legacy/zstd_v05.c +88 -84
  28. data/ext/zstdruby/libzstd/legacy/zstd_v06.c +2 -4
  29. data/ext/zstdruby/libzstd/legacy/zstd_v07.c +2 -4
  30. data/ext/zstdruby/libzstd/zstd.h +53 -21
  31. data/lib/zstd-ruby/version.rb +1 -1
  32. metadata +3 -4
@@ -526,10 +526,10 @@ static void COVER_ctx_destroy(COVER_ctx_t *ctx) {
526
526
  * Prepare a context for dictionary building.
527
527
  * The context is only dependent on the parameter `d` and can used multiple
528
528
  * times.
529
- * Returns 1 on success or zero on error.
529
+ * Returns 0 on success or error code on error.
530
530
  * The context must be destroyed with `COVER_ctx_destroy()`.
531
531
  */
532
- static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
532
+ static size_t COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
533
533
  const size_t *samplesSizes, unsigned nbSamples,
534
534
  unsigned d, double splitPoint) {
535
535
  const BYTE *const samples = (const BYTE *)samplesBuffer;
@@ -544,17 +544,17 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
544
544
  totalSamplesSize >= (size_t)COVER_MAX_SAMPLES_SIZE) {
545
545
  DISPLAYLEVEL(1, "Total samples size is too large (%u MB), maximum size is %u MB\n",
546
546
  (unsigned)(totalSamplesSize>>20), (COVER_MAX_SAMPLES_SIZE >> 20));
547
- return 0;
547
+ return ERROR(srcSize_wrong);
548
548
  }
549
549
  /* Check if there are at least 5 training samples */
550
550
  if (nbTrainSamples < 5) {
551
551
  DISPLAYLEVEL(1, "Total number of training samples is %u and is invalid.", nbTrainSamples);
552
- return 0;
552
+ return ERROR(srcSize_wrong);
553
553
  }
554
554
  /* Check if there's testing sample */
555
555
  if (nbTestSamples < 1) {
556
556
  DISPLAYLEVEL(1, "Total number of testing samples is %u and is invalid.", nbTestSamples);
557
- return 0;
557
+ return ERROR(srcSize_wrong);
558
558
  }
559
559
  /* Zero the context */
560
560
  memset(ctx, 0, sizeof(*ctx));
@@ -577,7 +577,7 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
577
577
  if (!ctx->suffix || !ctx->dmerAt || !ctx->offsets) {
578
578
  DISPLAYLEVEL(1, "Failed to allocate scratch buffers\n");
579
579
  COVER_ctx_destroy(ctx);
580
- return 0;
580
+ return ERROR(memory_allocation);
581
581
  }
582
582
  ctx->freqs = NULL;
583
583
  ctx->d = d;
@@ -624,7 +624,7 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
624
624
  (ctx->d <= 8 ? &COVER_cmp8 : &COVER_cmp), &COVER_group);
625
625
  ctx->freqs = ctx->suffix;
626
626
  ctx->suffix = NULL;
627
- return 1;
627
+ return 0;
628
628
  }
629
629
 
630
630
  void COVER_warnOnSmallCorpus(size_t maxDictSize, size_t nbDmers, int displayLevel)
@@ -729,11 +729,11 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
729
729
  /* Checks */
730
730
  if (!COVER_checkParameters(parameters, dictBufferCapacity)) {
731
731
  DISPLAYLEVEL(1, "Cover parameters incorrect\n");
732
- return ERROR(GENERIC);
732
+ return ERROR(parameter_outOfBound);
733
733
  }
734
734
  if (nbSamples == 0) {
735
735
  DISPLAYLEVEL(1, "Cover must have at least one input file\n");
736
- return ERROR(GENERIC);
736
+ return ERROR(srcSize_wrong);
737
737
  }
738
738
  if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) {
739
739
  DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n",
@@ -741,15 +741,18 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
741
741
  return ERROR(dstSize_tooSmall);
742
742
  }
743
743
  /* Initialize context and activeDmers */
744
- if (!COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples,
745
- parameters.d, parameters.splitPoint)) {
746
- return ERROR(GENERIC);
744
+ {
745
+ size_t const initVal = COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples,
746
+ parameters.d, parameters.splitPoint);
747
+ if (ZSTD_isError(initVal)) {
748
+ return initVal;
749
+ }
747
750
  }
748
751
  COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.suffixSize, g_displayLevel);
749
752
  if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) {
750
753
  DISPLAYLEVEL(1, "Failed to allocate dmer map: out of memory\n");
751
754
  COVER_ctx_destroy(&ctx);
752
- return ERROR(GENERIC);
755
+ return ERROR(memory_allocation);
753
756
  }
754
757
 
755
758
  DISPLAYLEVEL(2, "Building dictionary\n");
@@ -810,7 +813,7 @@ size_t COVER_checkTotalCompressedSize(const ZDICT_cover_params_t parameters,
810
813
  cctx, dst, dstCapacity, samples + offsets[i],
811
814
  samplesSizes[i], cdict);
812
815
  if (ZSTD_isError(size)) {
813
- totalCompressedSize = ERROR(GENERIC);
816
+ totalCompressedSize = size;
814
817
  goto _compressCleanup;
815
818
  }
816
819
  totalCompressedSize += size;
@@ -886,9 +889,11 @@ void COVER_best_start(COVER_best_t *best) {
886
889
  * Decrements liveJobs and signals any waiting threads if liveJobs == 0.
887
890
  * If this dictionary is the best so far save it and its parameters.
888
891
  */
889
- void COVER_best_finish(COVER_best_t *best, size_t compressedSize,
890
- ZDICT_cover_params_t parameters, void *dict,
891
- size_t dictSize) {
892
+ void COVER_best_finish(COVER_best_t *best, ZDICT_cover_params_t parameters,
893
+ COVER_dictSelection_t selection) {
894
+ void* dict = selection.dictContent;
895
+ size_t compressedSize = selection.totalCompressedSize;
896
+ size_t dictSize = selection.dictSize;
892
897
  if (!best) {
893
898
  return;
894
899
  }
@@ -914,6 +919,9 @@ void COVER_best_finish(COVER_best_t *best, size_t compressedSize,
914
919
  }
915
920
  }
916
921
  /* Save the dictionary, parameters, and size */
922
+ if (!dict) {
923
+ return;
924
+ }
917
925
  memcpy(best->dict, dict, dictSize);
918
926
  best->dictSize = dictSize;
919
927
  best->parameters = parameters;
@@ -926,6 +934,111 @@ void COVER_best_finish(COVER_best_t *best, size_t compressedSize,
926
934
  }
927
935
  }
928
936
 
937
+ COVER_dictSelection_t COVER_dictSelectionError(size_t error) {
938
+ COVER_dictSelection_t selection = { NULL, 0, error };
939
+ return selection;
940
+ }
941
+
942
+ unsigned COVER_dictSelectionIsError(COVER_dictSelection_t selection) {
943
+ return (ZSTD_isError(selection.totalCompressedSize) || !selection.dictContent);
944
+ }
945
+
946
+ void COVER_dictSelectionFree(COVER_dictSelection_t selection){
947
+ free(selection.dictContent);
948
+ }
949
+
950
+ COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent,
951
+ size_t dictContentSize, const BYTE* samplesBuffer, const size_t* samplesSizes, unsigned nbFinalizeSamples,
952
+ size_t nbCheckSamples, size_t nbSamples, ZDICT_cover_params_t params, size_t* offsets, size_t totalCompressedSize) {
953
+
954
+ size_t largestDict = 0;
955
+ size_t largestCompressed = 0;
956
+ BYTE* customDictContentEnd = customDictContent + dictContentSize;
957
+
958
+ BYTE * largestDictbuffer = (BYTE *)malloc(dictContentSize);
959
+ BYTE * candidateDictBuffer = (BYTE *)malloc(dictContentSize);
960
+ double regressionTolerance = ((double)params.shrinkDictMaxRegression / 100.0) + 1.00;
961
+
962
+ if (!largestDictbuffer || !candidateDictBuffer) {
963
+ free(largestDictbuffer);
964
+ free(candidateDictBuffer);
965
+ return COVER_dictSelectionError(dictContentSize);
966
+ }
967
+
968
+ /* Initial dictionary size and compressed size */
969
+ memcpy(largestDictbuffer, customDictContent, dictContentSize);
970
+ dictContentSize = ZDICT_finalizeDictionary(
971
+ largestDictbuffer, dictContentSize, customDictContent, dictContentSize,
972
+ samplesBuffer, samplesSizes, nbFinalizeSamples, params.zParams);
973
+
974
+ if (ZDICT_isError(dictContentSize)) {
975
+ free(largestDictbuffer);
976
+ free(candidateDictBuffer);
977
+ return COVER_dictSelectionError(dictContentSize);
978
+ }
979
+
980
+ totalCompressedSize = COVER_checkTotalCompressedSize(params, samplesSizes,
981
+ samplesBuffer, offsets,
982
+ nbCheckSamples, nbSamples,
983
+ largestDictbuffer, dictContentSize);
984
+
985
+ if (ZSTD_isError(totalCompressedSize)) {
986
+ free(largestDictbuffer);
987
+ free(candidateDictBuffer);
988
+ return COVER_dictSelectionError(totalCompressedSize);
989
+ }
990
+
991
+ if (params.shrinkDict == 0) {
992
+ COVER_dictSelection_t selection = { largestDictbuffer, dictContentSize, totalCompressedSize };
993
+ free(candidateDictBuffer);
994
+ return selection;
995
+ }
996
+
997
+ largestDict = dictContentSize;
998
+ largestCompressed = totalCompressedSize;
999
+ dictContentSize = ZDICT_DICTSIZE_MIN;
1000
+
1001
+ /* Largest dict is initially at least ZDICT_DICTSIZE_MIN */
1002
+ while (dictContentSize < largestDict) {
1003
+ memcpy(candidateDictBuffer, largestDictbuffer, largestDict);
1004
+ dictContentSize = ZDICT_finalizeDictionary(
1005
+ candidateDictBuffer, dictContentSize, customDictContentEnd - dictContentSize, dictContentSize,
1006
+ samplesBuffer, samplesSizes, nbFinalizeSamples, params.zParams);
1007
+
1008
+ if (ZDICT_isError(dictContentSize)) {
1009
+ free(largestDictbuffer);
1010
+ free(candidateDictBuffer);
1011
+ return COVER_dictSelectionError(dictContentSize);
1012
+
1013
+ }
1014
+
1015
+ totalCompressedSize = COVER_checkTotalCompressedSize(params, samplesSizes,
1016
+ samplesBuffer, offsets,
1017
+ nbCheckSamples, nbSamples,
1018
+ candidateDictBuffer, dictContentSize);
1019
+
1020
+ if (ZSTD_isError(totalCompressedSize)) {
1021
+ free(largestDictbuffer);
1022
+ free(candidateDictBuffer);
1023
+ return COVER_dictSelectionError(totalCompressedSize);
1024
+ }
1025
+
1026
+ if (totalCompressedSize <= largestCompressed * regressionTolerance) {
1027
+ COVER_dictSelection_t selection = { candidateDictBuffer, dictContentSize, totalCompressedSize };
1028
+ free(largestDictbuffer);
1029
+ return selection;
1030
+ }
1031
+ dictContentSize *= 2;
1032
+ }
1033
+ dictContentSize = largestDict;
1034
+ totalCompressedSize = largestCompressed;
1035
+ {
1036
+ COVER_dictSelection_t selection = { largestDictbuffer, dictContentSize, totalCompressedSize };
1037
+ free(candidateDictBuffer);
1038
+ return selection;
1039
+ }
1040
+ }
1041
+
929
1042
  /**
930
1043
  * Parameters for COVER_tryParameters().
931
1044
  */
@@ -951,6 +1064,7 @@ static void COVER_tryParameters(void *opaque) {
951
1064
  /* Allocate space for hash table, dict, and freqs */
952
1065
  COVER_map_t activeDmers;
953
1066
  BYTE *const dict = (BYTE * const)malloc(dictBufferCapacity);
1067
+ COVER_dictSelection_t selection = COVER_dictSelectionError(ERROR(GENERIC));
954
1068
  U32 *freqs = (U32 *)malloc(ctx->suffixSize * sizeof(U32));
955
1069
  if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) {
956
1070
  DISPLAYLEVEL(1, "Failed to allocate dmer map: out of memory\n");
@@ -966,29 +1080,21 @@ static void COVER_tryParameters(void *opaque) {
966
1080
  {
967
1081
  const size_t tail = COVER_buildDictionary(ctx, freqs, &activeDmers, dict,
968
1082
  dictBufferCapacity, parameters);
969
- dictBufferCapacity = ZDICT_finalizeDictionary(
970
- dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail,
971
- ctx->samples, ctx->samplesSizes, (unsigned)ctx->nbTrainSamples,
972
- parameters.zParams);
973
- if (ZDICT_isError(dictBufferCapacity)) {
974
- DISPLAYLEVEL(1, "Failed to finalize dictionary\n");
1083
+ selection = COVER_selectDict(dict + tail, dictBufferCapacity - tail,
1084
+ ctx->samples, ctx->samplesSizes, (unsigned)ctx->nbTrainSamples, ctx->nbTrainSamples, ctx->nbSamples, parameters, ctx->offsets,
1085
+ totalCompressedSize);
1086
+
1087
+ if (COVER_dictSelectionIsError(selection)) {
1088
+ DISPLAYLEVEL(1, "Failed to select dictionary\n");
975
1089
  goto _cleanup;
976
1090
  }
977
1091
  }
978
- /* Check total compressed size */
979
- totalCompressedSize = COVER_checkTotalCompressedSize(parameters, ctx->samplesSizes,
980
- ctx->samples, ctx->offsets,
981
- ctx->nbTrainSamples, ctx->nbSamples,
982
- dict, dictBufferCapacity);
983
-
984
1092
  _cleanup:
985
- COVER_best_finish(data->best, totalCompressedSize, parameters, dict,
986
- dictBufferCapacity);
1093
+ free(dict);
1094
+ COVER_best_finish(data->best, parameters, selection);
987
1095
  free(data);
988
1096
  COVER_map_destroy(&activeDmers);
989
- if (dict) {
990
- free(dict);
991
- }
1097
+ COVER_dictSelectionFree(selection);
992
1098
  if (freqs) {
993
1099
  free(freqs);
994
1100
  }
@@ -1010,6 +1116,7 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
1010
1116
  const unsigned kStepSize = MAX((kMaxK - kMinK) / kSteps, 1);
1011
1117
  const unsigned kIterations =
1012
1118
  (1 + (kMaxD - kMinD) / 2) * (1 + (kMaxK - kMinK) / kStepSize);
1119
+ const unsigned shrinkDict = 0;
1013
1120
  /* Local variables */
1014
1121
  const int displayLevel = parameters->zParams.notificationLevel;
1015
1122
  unsigned iteration = 1;
@@ -1022,15 +1129,15 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
1022
1129
  /* Checks */
1023
1130
  if (splitPoint <= 0 || splitPoint > 1) {
1024
1131
  LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect parameters\n");
1025
- return ERROR(GENERIC);
1132
+ return ERROR(parameter_outOfBound);
1026
1133
  }
1027
1134
  if (kMinK < kMaxD || kMaxK < kMinK) {
1028
1135
  LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect parameters\n");
1029
- return ERROR(GENERIC);
1136
+ return ERROR(parameter_outOfBound);
1030
1137
  }
1031
1138
  if (nbSamples == 0) {
1032
1139
  DISPLAYLEVEL(1, "Cover must have at least one input file\n");
1033
- return ERROR(GENERIC);
1140
+ return ERROR(srcSize_wrong);
1034
1141
  }
1035
1142
  if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) {
1036
1143
  DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n",
@@ -1054,11 +1161,14 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
1054
1161
  /* Initialize the context for this value of d */
1055
1162
  COVER_ctx_t ctx;
1056
1163
  LOCALDISPLAYLEVEL(displayLevel, 3, "d=%u\n", d);
1057
- if (!COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, d, splitPoint)) {
1058
- LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to initialize context\n");
1059
- COVER_best_destroy(&best);
1060
- POOL_free(pool);
1061
- return ERROR(GENERIC);
1164
+ {
1165
+ const size_t initVal = COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, d, splitPoint);
1166
+ if (ZSTD_isError(initVal)) {
1167
+ LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to initialize context\n");
1168
+ COVER_best_destroy(&best);
1169
+ POOL_free(pool);
1170
+ return initVal;
1171
+ }
1062
1172
  }
1063
1173
  if (!warned) {
1064
1174
  COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.suffixSize, displayLevel);
@@ -1075,7 +1185,7 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
1075
1185
  COVER_best_destroy(&best);
1076
1186
  COVER_ctx_destroy(&ctx);
1077
1187
  POOL_free(pool);
1078
- return ERROR(GENERIC);
1188
+ return ERROR(memory_allocation);
1079
1189
  }
1080
1190
  data->ctx = &ctx;
1081
1191
  data->best = &best;
@@ -1085,6 +1195,7 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
1085
1195
  data->parameters.d = d;
1086
1196
  data->parameters.splitPoint = splitPoint;
1087
1197
  data->parameters.steps = kSteps;
1198
+ data->parameters.shrinkDict = shrinkDict;
1088
1199
  data->parameters.zParams.notificationLevel = g_displayLevel;
1089
1200
  /* Check the parameters */
1090
1201
  if (!COVER_checkParameters(data->parameters, dictBufferCapacity)) {
@@ -46,6 +46,15 @@ typedef struct {
46
46
  U32 size;
47
47
  } COVER_epoch_info_t;
48
48
 
49
+ /**
50
+ * Struct used for the dictionary selection function.
51
+ */
52
+ typedef struct COVER_dictSelection {
53
+ BYTE* dictContent;
54
+ size_t dictSize;
55
+ size_t totalCompressedSize;
56
+ } COVER_dictSelection_t;
57
+
49
58
  /**
50
59
  * Computes the number of epochs and the size of each epoch.
51
60
  * We will make sure that each epoch gets at least 10 * k bytes.
@@ -107,6 +116,32 @@ void COVER_best_start(COVER_best_t *best);
107
116
  * Decrements liveJobs and signals any waiting threads if liveJobs == 0.
108
117
  * If this dictionary is the best so far save it and its parameters.
109
118
  */
110
- void COVER_best_finish(COVER_best_t *best, size_t compressedSize,
111
- ZDICT_cover_params_t parameters, void *dict,
112
- size_t dictSize);
119
+ void COVER_best_finish(COVER_best_t *best, ZDICT_cover_params_t parameters,
120
+ COVER_dictSelection_t selection);
121
+ /**
122
+ * Error function for COVER_selectDict function. Checks if the return
123
+ * value is an error.
124
+ */
125
+ unsigned COVER_dictSelectionIsError(COVER_dictSelection_t selection);
126
+
127
+ /**
128
+ * Error function for COVER_selectDict function. Returns a struct where
129
+ * return.totalCompressedSize is a ZSTD error.
130
+ */
131
+ COVER_dictSelection_t COVER_dictSelectionError(size_t error);
132
+
133
+ /**
134
+ * Always call after selectDict is called to free up used memory from
135
+ * newly created dictionary.
136
+ */
137
+ void COVER_dictSelectionFree(COVER_dictSelection_t selection);
138
+
139
+ /**
140
+ * Called to finalize the dictionary and select one based on whether or not
141
+ * the shrink-dict flag was enabled. If enabled the dictionary used is the
142
+ * smallest dictionary within a specified regression of the compressed size
143
+ * from the largest dictionary.
144
+ */
145
+ COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent,
146
+ size_t dictContentSize, const BYTE* samplesBuffer, const size_t* samplesSizes, unsigned nbFinalizeSamples,
147
+ size_t nbCheckSamples, size_t nbSamples, ZDICT_cover_params_t params, size_t* offsets, size_t totalCompressedSize);
@@ -287,10 +287,10 @@ FASTCOVER_computeFrequency(U32* freqs, const FASTCOVER_ctx_t* ctx)
287
287
  * Prepare a context for dictionary building.
288
288
  * The context is only dependent on the parameter `d` and can used multiple
289
289
  * times.
290
- * Returns 1 on success or zero on error.
290
+ * Returns 0 on success or error code on error.
291
291
  * The context must be destroyed with `FASTCOVER_ctx_destroy()`.
292
292
  */
293
- static int
293
+ static size_t
294
294
  FASTCOVER_ctx_init(FASTCOVER_ctx_t* ctx,
295
295
  const void* samplesBuffer,
296
296
  const size_t* samplesSizes, unsigned nbSamples,
@@ -310,19 +310,19 @@ FASTCOVER_ctx_init(FASTCOVER_ctx_t* ctx,
310
310
  totalSamplesSize >= (size_t)FASTCOVER_MAX_SAMPLES_SIZE) {
311
311
  DISPLAYLEVEL(1, "Total samples size is too large (%u MB), maximum size is %u MB\n",
312
312
  (unsigned)(totalSamplesSize >> 20), (FASTCOVER_MAX_SAMPLES_SIZE >> 20));
313
- return 0;
313
+ return ERROR(srcSize_wrong);
314
314
  }
315
315
 
316
316
  /* Check if there are at least 5 training samples */
317
317
  if (nbTrainSamples < 5) {
318
318
  DISPLAYLEVEL(1, "Total number of training samples is %u and is invalid\n", nbTrainSamples);
319
- return 0;
319
+ return ERROR(srcSize_wrong);
320
320
  }
321
321
 
322
322
  /* Check if there's testing sample */
323
323
  if (nbTestSamples < 1) {
324
324
  DISPLAYLEVEL(1, "Total number of testing samples is %u and is invalid.\n", nbTestSamples);
325
- return 0;
325
+ return ERROR(srcSize_wrong);
326
326
  }
327
327
 
328
328
  /* Zero the context */
@@ -347,7 +347,7 @@ FASTCOVER_ctx_init(FASTCOVER_ctx_t* ctx,
347
347
  if (ctx->offsets == NULL) {
348
348
  DISPLAYLEVEL(1, "Failed to allocate scratch buffers \n");
349
349
  FASTCOVER_ctx_destroy(ctx);
350
- return 0;
350
+ return ERROR(memory_allocation);
351
351
  }
352
352
 
353
353
  /* Fill offsets from the samplesSizes */
@@ -364,13 +364,13 @@ FASTCOVER_ctx_init(FASTCOVER_ctx_t* ctx,
364
364
  if (ctx->freqs == NULL) {
365
365
  DISPLAYLEVEL(1, "Failed to allocate frequency table \n");
366
366
  FASTCOVER_ctx_destroy(ctx);
367
- return 0;
367
+ return ERROR(memory_allocation);
368
368
  }
369
369
 
370
370
  DISPLAYLEVEL(2, "Computing frequencies\n");
371
371
  FASTCOVER_computeFrequency(ctx->freqs, ctx);
372
372
 
373
- return 1;
373
+ return 0;
374
374
  }
375
375
 
376
376
 
@@ -435,7 +435,6 @@ FASTCOVER_buildDictionary(const FASTCOVER_ctx_t* ctx,
435
435
  return tail;
436
436
  }
437
437
 
438
-
439
438
  /**
440
439
  * Parameters for FASTCOVER_tryParameters().
441
440
  */
@@ -464,6 +463,7 @@ static void FASTCOVER_tryParameters(void *opaque)
464
463
  U16* segmentFreqs = (U16 *)calloc(((U64)1 << ctx->f), sizeof(U16));
465
464
  /* Allocate space for hash table, dict, and freqs */
466
465
  BYTE *const dict = (BYTE * const)malloc(dictBufferCapacity);
466
+ COVER_dictSelection_t selection = COVER_dictSelectionError(ERROR(GENERIC));
467
467
  U32 *freqs = (U32*) malloc(((U64)1 << ctx->f) * sizeof(U32));
468
468
  if (!segmentFreqs || !dict || !freqs) {
469
469
  DISPLAYLEVEL(1, "Failed to allocate buffers: out of memory\n");
@@ -473,27 +473,24 @@ static void FASTCOVER_tryParameters(void *opaque)
473
473
  memcpy(freqs, ctx->freqs, ((U64)1 << ctx->f) * sizeof(U32));
474
474
  /* Build the dictionary */
475
475
  { const size_t tail = FASTCOVER_buildDictionary(ctx, freqs, dict, dictBufferCapacity,
476
- parameters, segmentFreqs);
476
+ parameters, segmentFreqs);
477
+
477
478
  const unsigned nbFinalizeSamples = (unsigned)(ctx->nbTrainSamples * ctx->accelParams.finalize / 100);
478
- dictBufferCapacity = ZDICT_finalizeDictionary(
479
- dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail,
480
- ctx->samples, ctx->samplesSizes, nbFinalizeSamples, parameters.zParams);
481
- if (ZDICT_isError(dictBufferCapacity)) {
482
- DISPLAYLEVEL(1, "Failed to finalize dictionary\n");
479
+ selection = COVER_selectDict(dict + tail, dictBufferCapacity - tail,
480
+ ctx->samples, ctx->samplesSizes, nbFinalizeSamples, ctx->nbTrainSamples, ctx->nbSamples, parameters, ctx->offsets,
481
+ totalCompressedSize);
482
+
483
+ if (COVER_dictSelectionIsError(selection)) {
484
+ DISPLAYLEVEL(1, "Failed to select dictionary\n");
483
485
  goto _cleanup;
484
486
  }
485
487
  }
486
- /* Check total compressed size */
487
- totalCompressedSize = COVER_checkTotalCompressedSize(parameters, ctx->samplesSizes,
488
- ctx->samples, ctx->offsets,
489
- ctx->nbTrainSamples, ctx->nbSamples,
490
- dict, dictBufferCapacity);
491
488
  _cleanup:
492
- COVER_best_finish(data->best, totalCompressedSize, parameters, dict,
493
- dictBufferCapacity);
489
+ free(dict);
490
+ COVER_best_finish(data->best, parameters, selection);
494
491
  free(data);
495
492
  free(segmentFreqs);
496
- free(dict);
493
+ COVER_dictSelectionFree(selection);
497
494
  free(freqs);
498
495
  }
499
496
 
@@ -508,6 +505,7 @@ FASTCOVER_convertToCoverParams(ZDICT_fastCover_params_t fastCoverParams,
508
505
  coverParams->nbThreads = fastCoverParams.nbThreads;
509
506
  coverParams->splitPoint = fastCoverParams.splitPoint;
510
507
  coverParams->zParams = fastCoverParams.zParams;
508
+ coverParams->shrinkDict = fastCoverParams.shrinkDict;
511
509
  }
512
510
 
513
511
 
@@ -524,6 +522,7 @@ FASTCOVER_convertToFastCoverParams(ZDICT_cover_params_t coverParams,
524
522
  fastCoverParams->f = f;
525
523
  fastCoverParams->accel = accel;
526
524
  fastCoverParams->zParams = coverParams.zParams;
525
+ fastCoverParams->shrinkDict = coverParams.shrinkDict;
527
526
  }
528
527
 
529
528
 
@@ -550,11 +549,11 @@ ZDICT_trainFromBuffer_fastCover(void* dictBuffer, size_t dictBufferCapacity,
550
549
  if (!FASTCOVER_checkParameters(coverParams, dictBufferCapacity, parameters.f,
551
550
  parameters.accel)) {
552
551
  DISPLAYLEVEL(1, "FASTCOVER parameters incorrect\n");
553
- return ERROR(GENERIC);
552
+ return ERROR(parameter_outOfBound);
554
553
  }
555
554
  if (nbSamples == 0) {
556
555
  DISPLAYLEVEL(1, "FASTCOVER must have at least one input file\n");
557
- return ERROR(GENERIC);
556
+ return ERROR(srcSize_wrong);
558
557
  }
559
558
  if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) {
560
559
  DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n",
@@ -564,11 +563,14 @@ ZDICT_trainFromBuffer_fastCover(void* dictBuffer, size_t dictBufferCapacity,
564
563
  /* Assign corresponding FASTCOVER_accel_t to accelParams*/
565
564
  accelParams = FASTCOVER_defaultAccelParameters[parameters.accel];
566
565
  /* Initialize context */
567
- if (!FASTCOVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples,
566
+ {
567
+ size_t const initVal = FASTCOVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples,
568
568
  coverParams.d, parameters.splitPoint, parameters.f,
569
- accelParams)) {
570
- DISPLAYLEVEL(1, "Failed to initialize context\n");
571
- return ERROR(GENERIC);
569
+ accelParams);
570
+ if (ZSTD_isError(initVal)) {
571
+ DISPLAYLEVEL(1, "Failed to initialize context\n");
572
+ return initVal;
573
+ }
572
574
  }
573
575
  COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.nbDmers, g_displayLevel);
574
576
  /* Build the dictionary */
@@ -616,6 +618,7 @@ ZDICT_optimizeTrainFromBuffer_fastCover(
616
618
  (1 + (kMaxD - kMinD) / 2) * (1 + (kMaxK - kMinK) / kStepSize);
617
619
  const unsigned f = parameters->f == 0 ? DEFAULT_F : parameters->f;
618
620
  const unsigned accel = parameters->accel == 0 ? DEFAULT_ACCEL : parameters->accel;
621
+ const unsigned shrinkDict = 0;
619
622
  /* Local variables */
620
623
  const int displayLevel = parameters->zParams.notificationLevel;
621
624
  unsigned iteration = 1;
@@ -627,19 +630,19 @@ ZDICT_optimizeTrainFromBuffer_fastCover(
627
630
  /* Checks */
628
631
  if (splitPoint <= 0 || splitPoint > 1) {
629
632
  LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect splitPoint\n");
630
- return ERROR(GENERIC);
633
+ return ERROR(parameter_outOfBound);
631
634
  }
632
635
  if (accel == 0 || accel > FASTCOVER_MAX_ACCEL) {
633
636
  LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect accel\n");
634
- return ERROR(GENERIC);
637
+ return ERROR(parameter_outOfBound);
635
638
  }
636
639
  if (kMinK < kMaxD || kMaxK < kMinK) {
637
640
  LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect k\n");
638
- return ERROR(GENERIC);
641
+ return ERROR(parameter_outOfBound);
639
642
  }
640
643
  if (nbSamples == 0) {
641
644
  LOCALDISPLAYLEVEL(displayLevel, 1, "FASTCOVER must have at least one input file\n");
642
- return ERROR(GENERIC);
645
+ return ERROR(srcSize_wrong);
643
646
  }
644
647
  if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) {
645
648
  LOCALDISPLAYLEVEL(displayLevel, 1, "dictBufferCapacity must be at least %u\n",
@@ -666,11 +669,14 @@ ZDICT_optimizeTrainFromBuffer_fastCover(
666
669
  /* Initialize the context for this value of d */
667
670
  FASTCOVER_ctx_t ctx;
668
671
  LOCALDISPLAYLEVEL(displayLevel, 3, "d=%u\n", d);
669
- if (!FASTCOVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, d, splitPoint, f, accelParams)) {
670
- LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to initialize context\n");
671
- COVER_best_destroy(&best);
672
- POOL_free(pool);
673
- return ERROR(GENERIC);
672
+ {
673
+ size_t const initVal = FASTCOVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, d, splitPoint, f, accelParams);
674
+ if (ZSTD_isError(initVal)) {
675
+ LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to initialize context\n");
676
+ COVER_best_destroy(&best);
677
+ POOL_free(pool);
678
+ return initVal;
679
+ }
674
680
  }
675
681
  if (!warned) {
676
682
  COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.nbDmers, displayLevel);
@@ -687,7 +693,7 @@ ZDICT_optimizeTrainFromBuffer_fastCover(
687
693
  COVER_best_destroy(&best);
688
694
  FASTCOVER_ctx_destroy(&ctx);
689
695
  POOL_free(pool);
690
- return ERROR(GENERIC);
696
+ return ERROR(memory_allocation);
691
697
  }
692
698
  data->ctx = &ctx;
693
699
  data->best = &best;
@@ -697,6 +703,7 @@ ZDICT_optimizeTrainFromBuffer_fastCover(
697
703
  data->parameters.d = d;
698
704
  data->parameters.splitPoint = splitPoint;
699
705
  data->parameters.steps = kSteps;
706
+ data->parameters.shrinkDict = shrinkDict;
700
707
  data->parameters.zParams.notificationLevel = g_displayLevel;
701
708
  /* Check the parameters */
702
709
  if (!FASTCOVER_checkParameters(data->parameters, dictBufferCapacity,