extzstd 0.3 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (107) hide show
  1. checksums.yaml +4 -4
  2. data/HISTORY.ja.md +8 -0
  3. data/README.md +1 -1
  4. data/contrib/zstd/CHANGELOG +94 -0
  5. data/contrib/zstd/CONTRIBUTING.md +351 -1
  6. data/contrib/zstd/Makefile +32 -10
  7. data/contrib/zstd/README.md +33 -10
  8. data/contrib/zstd/TESTING.md +2 -2
  9. data/contrib/zstd/appveyor.yml +42 -4
  10. data/contrib/zstd/lib/Makefile +128 -60
  11. data/contrib/zstd/lib/README.md +47 -16
  12. data/contrib/zstd/lib/common/bitstream.h +38 -39
  13. data/contrib/zstd/lib/common/compiler.h +40 -5
  14. data/contrib/zstd/lib/common/cpu.h +1 -1
  15. data/contrib/zstd/lib/common/debug.c +11 -31
  16. data/contrib/zstd/lib/common/debug.h +11 -31
  17. data/contrib/zstd/lib/common/entropy_common.c +13 -33
  18. data/contrib/zstd/lib/common/error_private.c +2 -1
  19. data/contrib/zstd/lib/common/error_private.h +6 -2
  20. data/contrib/zstd/lib/common/fse.h +12 -32
  21. data/contrib/zstd/lib/common/fse_decompress.c +12 -35
  22. data/contrib/zstd/lib/common/huf.h +15 -33
  23. data/contrib/zstd/lib/common/mem.h +75 -2
  24. data/contrib/zstd/lib/common/pool.c +8 -4
  25. data/contrib/zstd/lib/common/pool.h +2 -2
  26. data/contrib/zstd/lib/common/threading.c +50 -4
  27. data/contrib/zstd/lib/common/threading.h +36 -4
  28. data/contrib/zstd/lib/common/xxhash.c +23 -35
  29. data/contrib/zstd/lib/common/xxhash.h +11 -31
  30. data/contrib/zstd/lib/common/zstd_common.c +1 -1
  31. data/contrib/zstd/lib/common/zstd_errors.h +2 -1
  32. data/contrib/zstd/lib/common/zstd_internal.h +154 -26
  33. data/contrib/zstd/lib/compress/fse_compress.c +17 -40
  34. data/contrib/zstd/lib/compress/hist.c +15 -35
  35. data/contrib/zstd/lib/compress/hist.h +12 -32
  36. data/contrib/zstd/lib/compress/huf_compress.c +92 -92
  37. data/contrib/zstd/lib/compress/zstd_compress.c +1191 -1330
  38. data/contrib/zstd/lib/compress/zstd_compress_internal.h +317 -55
  39. data/contrib/zstd/lib/compress/zstd_compress_literals.c +158 -0
  40. data/contrib/zstd/lib/compress/zstd_compress_literals.h +29 -0
  41. data/contrib/zstd/lib/compress/zstd_compress_sequences.c +419 -0
  42. data/contrib/zstd/lib/compress/zstd_compress_sequences.h +54 -0
  43. data/contrib/zstd/lib/compress/zstd_compress_superblock.c +845 -0
  44. data/contrib/zstd/lib/compress/zstd_compress_superblock.h +32 -0
  45. data/contrib/zstd/lib/compress/zstd_cwksp.h +525 -0
  46. data/contrib/zstd/lib/compress/zstd_double_fast.c +65 -43
  47. data/contrib/zstd/lib/compress/zstd_double_fast.h +2 -2
  48. data/contrib/zstd/lib/compress/zstd_fast.c +92 -66
  49. data/contrib/zstd/lib/compress/zstd_fast.h +2 -2
  50. data/contrib/zstd/lib/compress/zstd_lazy.c +74 -42
  51. data/contrib/zstd/lib/compress/zstd_lazy.h +1 -1
  52. data/contrib/zstd/lib/compress/zstd_ldm.c +32 -10
  53. data/contrib/zstd/lib/compress/zstd_ldm.h +7 -2
  54. data/contrib/zstd/lib/compress/zstd_opt.c +81 -114
  55. data/contrib/zstd/lib/compress/zstd_opt.h +1 -1
  56. data/contrib/zstd/lib/compress/zstdmt_compress.c +95 -51
  57. data/contrib/zstd/lib/compress/zstdmt_compress.h +3 -2
  58. data/contrib/zstd/lib/decompress/huf_decompress.c +76 -60
  59. data/contrib/zstd/lib/decompress/zstd_ddict.c +12 -8
  60. data/contrib/zstd/lib/decompress/zstd_ddict.h +2 -2
  61. data/contrib/zstd/lib/decompress/zstd_decompress.c +292 -172
  62. data/contrib/zstd/lib/decompress/zstd_decompress_block.c +459 -338
  63. data/contrib/zstd/lib/decompress/zstd_decompress_block.h +3 -3
  64. data/contrib/zstd/lib/decompress/zstd_decompress_internal.h +18 -4
  65. data/contrib/zstd/lib/deprecated/zbuff.h +9 -8
  66. data/contrib/zstd/lib/deprecated/zbuff_common.c +2 -2
  67. data/contrib/zstd/lib/deprecated/zbuff_compress.c +1 -1
  68. data/contrib/zstd/lib/deprecated/zbuff_decompress.c +1 -1
  69. data/contrib/zstd/lib/dictBuilder/cover.c +164 -54
  70. data/contrib/zstd/lib/dictBuilder/cover.h +52 -7
  71. data/contrib/zstd/lib/dictBuilder/fastcover.c +60 -43
  72. data/contrib/zstd/lib/dictBuilder/zdict.c +43 -19
  73. data/contrib/zstd/lib/dictBuilder/zdict.h +56 -28
  74. data/contrib/zstd/lib/legacy/zstd_legacy.h +8 -4
  75. data/contrib/zstd/lib/legacy/zstd_v01.c +110 -110
  76. data/contrib/zstd/lib/legacy/zstd_v01.h +1 -1
  77. data/contrib/zstd/lib/legacy/zstd_v02.c +23 -13
  78. data/contrib/zstd/lib/legacy/zstd_v02.h +1 -1
  79. data/contrib/zstd/lib/legacy/zstd_v03.c +23 -13
  80. data/contrib/zstd/lib/legacy/zstd_v03.h +1 -1
  81. data/contrib/zstd/lib/legacy/zstd_v04.c +30 -17
  82. data/contrib/zstd/lib/legacy/zstd_v04.h +1 -1
  83. data/contrib/zstd/lib/legacy/zstd_v05.c +113 -102
  84. data/contrib/zstd/lib/legacy/zstd_v05.h +2 -2
  85. data/contrib/zstd/lib/legacy/zstd_v06.c +20 -18
  86. data/contrib/zstd/lib/legacy/zstd_v06.h +1 -1
  87. data/contrib/zstd/lib/legacy/zstd_v07.c +25 -19
  88. data/contrib/zstd/lib/legacy/zstd_v07.h +1 -1
  89. data/contrib/zstd/lib/libzstd.pc.in +3 -2
  90. data/contrib/zstd/lib/zstd.h +265 -88
  91. data/ext/extzstd.h +1 -1
  92. data/ext/libzstd_conf.h +8 -0
  93. data/ext/zstd_common.c +1 -3
  94. data/ext/zstd_compress.c +3 -3
  95. data/ext/zstd_decompress.c +1 -5
  96. data/ext/zstd_dictbuilder.c +2 -3
  97. data/ext/zstd_dictbuilder_fastcover.c +1 -3
  98. data/ext/zstd_legacy_v01.c +2 -0
  99. data/ext/zstd_legacy_v02.c +2 -0
  100. data/ext/zstd_legacy_v03.c +2 -0
  101. data/ext/zstd_legacy_v04.c +2 -0
  102. data/ext/zstd_legacy_v05.c +2 -0
  103. data/ext/zstd_legacy_v06.c +2 -0
  104. data/ext/zstd_legacy_v07.c +2 -0
  105. data/lib/extzstd.rb +18 -10
  106. data/lib/extzstd/version.rb +1 -1
  107. metadata +15 -6
@@ -1,11 +1,21 @@
1
+ /*
2
+ * Copyright (c) 2017-2020, Facebook, Inc.
3
+ * All rights reserved.
4
+ *
5
+ * This source code is licensed under both the BSD-style license (found in the
6
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7
+ * in the COPYING file in the root directory of this source tree).
8
+ * You may select, at your option, one of the above-listed licenses.
9
+ */
10
+
1
11
  #include <stdio.h> /* fprintf */
2
12
  #include <stdlib.h> /* malloc, free, qsort */
3
13
  #include <string.h> /* memset */
4
14
  #include <time.h> /* clock */
5
- #include "mem.h" /* read */
6
- #include "pool.h"
7
- #include "threading.h"
8
- #include "zstd_internal.h" /* includes zstd.h */
15
+ #include "../common/mem.h" /* read */
16
+ #include "../common/pool.h"
17
+ #include "../common/threading.h"
18
+ #include "../common/zstd_internal.h" /* includes zstd.h */
9
19
  #ifndef ZDICT_STATIC_LINKING_ONLY
10
20
  #define ZDICT_STATIC_LINKING_ONLY
11
21
  #endif
@@ -46,6 +56,15 @@ typedef struct {
46
56
  U32 size;
47
57
  } COVER_epoch_info_t;
48
58
 
59
+ /**
60
+ * Struct used for the dictionary selection function.
61
+ */
62
+ typedef struct COVER_dictSelection {
63
+ BYTE* dictContent;
64
+ size_t dictSize;
65
+ size_t totalCompressedSize;
66
+ } COVER_dictSelection_t;
67
+
49
68
  /**
50
69
  * Computes the number of epochs and the size of each epoch.
51
70
  * We will make sure that each epoch gets at least 10 * k bytes.
@@ -107,6 +126,32 @@ void COVER_best_start(COVER_best_t *best);
107
126
  * Decrements liveJobs and signals any waiting threads if liveJobs == 0.
108
127
  * If this dictionary is the best so far save it and its parameters.
109
128
  */
110
- void COVER_best_finish(COVER_best_t *best, size_t compressedSize,
111
- ZDICT_cover_params_t parameters, void *dict,
112
- size_t dictSize);
129
+ void COVER_best_finish(COVER_best_t *best, ZDICT_cover_params_t parameters,
130
+ COVER_dictSelection_t selection);
131
+ /**
132
+ * Error function for COVER_selectDict function. Checks if the return
133
+ * value is an error.
134
+ */
135
+ unsigned COVER_dictSelectionIsError(COVER_dictSelection_t selection);
136
+
137
+ /**
138
+ * Error function for COVER_selectDict function. Returns a struct where
139
+ * return.totalCompressedSize is a ZSTD error.
140
+ */
141
+ COVER_dictSelection_t COVER_dictSelectionError(size_t error);
142
+
143
+ /**
144
+ * Always call after selectDict is called to free up used memory from
145
+ * newly created dictionary.
146
+ */
147
+ void COVER_dictSelectionFree(COVER_dictSelection_t selection);
148
+
149
+ /**
150
+ * Called to finalize the dictionary and select one based on whether or not
151
+ * the shrink-dict flag was enabled. If enabled the dictionary used is the
152
+ * smallest dictionary within a specified regression of the compressed size
153
+ * from the largest dictionary.
154
+ */
155
+ COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent,
156
+ size_t dictContentSize, const BYTE* samplesBuffer, const size_t* samplesSizes, unsigned nbFinalizeSamples,
157
+ size_t nbCheckSamples, size_t nbSamples, ZDICT_cover_params_t params, size_t* offsets, size_t totalCompressedSize);
@@ -1,3 +1,13 @@
1
+ /*
2
+ * Copyright (c) 2018-2020, Facebook, Inc.
3
+ * All rights reserved.
4
+ *
5
+ * This source code is licensed under both the BSD-style license (found in the
6
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7
+ * in the COPYING file in the root directory of this source tree).
8
+ * You may select, at your option, one of the above-listed licenses.
9
+ */
10
+
1
11
  /*-*************************************
2
12
  * Dependencies
3
13
  ***************************************/
@@ -6,11 +16,11 @@
6
16
  #include <string.h> /* memset */
7
17
  #include <time.h> /* clock */
8
18
 
9
- #include "mem.h" /* read */
10
- #include "pool.h"
11
- #include "threading.h"
19
+ #include "../common/mem.h" /* read */
20
+ #include "../common/pool.h"
21
+ #include "../common/threading.h"
12
22
  #include "cover.h"
13
- #include "zstd_internal.h" /* includes zstd.h */
23
+ #include "../common/zstd_internal.h" /* includes zstd.h */
14
24
  #ifndef ZDICT_STATIC_LINKING_ONLY
15
25
  #define ZDICT_STATIC_LINKING_ONLY
16
26
  #endif
@@ -287,10 +297,10 @@ FASTCOVER_computeFrequency(U32* freqs, const FASTCOVER_ctx_t* ctx)
287
297
  * Prepare a context for dictionary building.
288
298
  * The context is only dependent on the parameter `d` and can used multiple
289
299
  * times.
290
- * Returns 1 on success or zero on error.
300
+ * Returns 0 on success or error code on error.
291
301
  * The context must be destroyed with `FASTCOVER_ctx_destroy()`.
292
302
  */
293
- static int
303
+ static size_t
294
304
  FASTCOVER_ctx_init(FASTCOVER_ctx_t* ctx,
295
305
  const void* samplesBuffer,
296
306
  const size_t* samplesSizes, unsigned nbSamples,
@@ -310,19 +320,19 @@ FASTCOVER_ctx_init(FASTCOVER_ctx_t* ctx,
310
320
  totalSamplesSize >= (size_t)FASTCOVER_MAX_SAMPLES_SIZE) {
311
321
  DISPLAYLEVEL(1, "Total samples size is too large (%u MB), maximum size is %u MB\n",
312
322
  (unsigned)(totalSamplesSize >> 20), (FASTCOVER_MAX_SAMPLES_SIZE >> 20));
313
- return 0;
323
+ return ERROR(srcSize_wrong);
314
324
  }
315
325
 
316
326
  /* Check if there are at least 5 training samples */
317
327
  if (nbTrainSamples < 5) {
318
328
  DISPLAYLEVEL(1, "Total number of training samples is %u and is invalid\n", nbTrainSamples);
319
- return 0;
329
+ return ERROR(srcSize_wrong);
320
330
  }
321
331
 
322
332
  /* Check if there's testing sample */
323
333
  if (nbTestSamples < 1) {
324
334
  DISPLAYLEVEL(1, "Total number of testing samples is %u and is invalid.\n", nbTestSamples);
325
- return 0;
335
+ return ERROR(srcSize_wrong);
326
336
  }
327
337
 
328
338
  /* Zero the context */
@@ -347,7 +357,7 @@ FASTCOVER_ctx_init(FASTCOVER_ctx_t* ctx,
347
357
  if (ctx->offsets == NULL) {
348
358
  DISPLAYLEVEL(1, "Failed to allocate scratch buffers \n");
349
359
  FASTCOVER_ctx_destroy(ctx);
350
- return 0;
360
+ return ERROR(memory_allocation);
351
361
  }
352
362
 
353
363
  /* Fill offsets from the samplesSizes */
@@ -364,13 +374,13 @@ FASTCOVER_ctx_init(FASTCOVER_ctx_t* ctx,
364
374
  if (ctx->freqs == NULL) {
365
375
  DISPLAYLEVEL(1, "Failed to allocate frequency table \n");
366
376
  FASTCOVER_ctx_destroy(ctx);
367
- return 0;
377
+ return ERROR(memory_allocation);
368
378
  }
369
379
 
370
380
  DISPLAYLEVEL(2, "Computing frequencies\n");
371
381
  FASTCOVER_computeFrequency(ctx->freqs, ctx);
372
382
 
373
- return 1;
383
+ return 0;
374
384
  }
375
385
 
376
386
 
@@ -435,7 +445,6 @@ FASTCOVER_buildDictionary(const FASTCOVER_ctx_t* ctx,
435
445
  return tail;
436
446
  }
437
447
 
438
-
439
448
  /**
440
449
  * Parameters for FASTCOVER_tryParameters().
441
450
  */
@@ -464,6 +473,7 @@ static void FASTCOVER_tryParameters(void *opaque)
464
473
  U16* segmentFreqs = (U16 *)calloc(((U64)1 << ctx->f), sizeof(U16));
465
474
  /* Allocate space for hash table, dict, and freqs */
466
475
  BYTE *const dict = (BYTE * const)malloc(dictBufferCapacity);
476
+ COVER_dictSelection_t selection = COVER_dictSelectionError(ERROR(GENERIC));
467
477
  U32 *freqs = (U32*) malloc(((U64)1 << ctx->f) * sizeof(U32));
468
478
  if (!segmentFreqs || !dict || !freqs) {
469
479
  DISPLAYLEVEL(1, "Failed to allocate buffers: out of memory\n");
@@ -473,27 +483,24 @@ static void FASTCOVER_tryParameters(void *opaque)
473
483
  memcpy(freqs, ctx->freqs, ((U64)1 << ctx->f) * sizeof(U32));
474
484
  /* Build the dictionary */
475
485
  { const size_t tail = FASTCOVER_buildDictionary(ctx, freqs, dict, dictBufferCapacity,
476
- parameters, segmentFreqs);
486
+ parameters, segmentFreqs);
487
+
477
488
  const unsigned nbFinalizeSamples = (unsigned)(ctx->nbTrainSamples * ctx->accelParams.finalize / 100);
478
- dictBufferCapacity = ZDICT_finalizeDictionary(
479
- dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail,
480
- ctx->samples, ctx->samplesSizes, nbFinalizeSamples, parameters.zParams);
481
- if (ZDICT_isError(dictBufferCapacity)) {
482
- DISPLAYLEVEL(1, "Failed to finalize dictionary\n");
489
+ selection = COVER_selectDict(dict + tail, dictBufferCapacity - tail,
490
+ ctx->samples, ctx->samplesSizes, nbFinalizeSamples, ctx->nbTrainSamples, ctx->nbSamples, parameters, ctx->offsets,
491
+ totalCompressedSize);
492
+
493
+ if (COVER_dictSelectionIsError(selection)) {
494
+ DISPLAYLEVEL(1, "Failed to select dictionary\n");
483
495
  goto _cleanup;
484
496
  }
485
497
  }
486
- /* Check total compressed size */
487
- totalCompressedSize = COVER_checkTotalCompressedSize(parameters, ctx->samplesSizes,
488
- ctx->samples, ctx->offsets,
489
- ctx->nbTrainSamples, ctx->nbSamples,
490
- dict, dictBufferCapacity);
491
498
  _cleanup:
492
- COVER_best_finish(data->best, totalCompressedSize, parameters, dict,
493
- dictBufferCapacity);
499
+ free(dict);
500
+ COVER_best_finish(data->best, parameters, selection);
494
501
  free(data);
495
502
  free(segmentFreqs);
496
- free(dict);
503
+ COVER_dictSelectionFree(selection);
497
504
  free(freqs);
498
505
  }
499
506
 
@@ -508,6 +515,7 @@ FASTCOVER_convertToCoverParams(ZDICT_fastCover_params_t fastCoverParams,
508
515
  coverParams->nbThreads = fastCoverParams.nbThreads;
509
516
  coverParams->splitPoint = fastCoverParams.splitPoint;
510
517
  coverParams->zParams = fastCoverParams.zParams;
518
+ coverParams->shrinkDict = fastCoverParams.shrinkDict;
511
519
  }
512
520
 
513
521
 
@@ -524,6 +532,7 @@ FASTCOVER_convertToFastCoverParams(ZDICT_cover_params_t coverParams,
524
532
  fastCoverParams->f = f;
525
533
  fastCoverParams->accel = accel;
526
534
  fastCoverParams->zParams = coverParams.zParams;
535
+ fastCoverParams->shrinkDict = coverParams.shrinkDict;
527
536
  }
528
537
 
529
538
 
@@ -550,11 +559,11 @@ ZDICT_trainFromBuffer_fastCover(void* dictBuffer, size_t dictBufferCapacity,
550
559
  if (!FASTCOVER_checkParameters(coverParams, dictBufferCapacity, parameters.f,
551
560
  parameters.accel)) {
552
561
  DISPLAYLEVEL(1, "FASTCOVER parameters incorrect\n");
553
- return ERROR(GENERIC);
562
+ return ERROR(parameter_outOfBound);
554
563
  }
555
564
  if (nbSamples == 0) {
556
565
  DISPLAYLEVEL(1, "FASTCOVER must have at least one input file\n");
557
- return ERROR(GENERIC);
566
+ return ERROR(srcSize_wrong);
558
567
  }
559
568
  if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) {
560
569
  DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n",
@@ -564,11 +573,14 @@ ZDICT_trainFromBuffer_fastCover(void* dictBuffer, size_t dictBufferCapacity,
564
573
  /* Assign corresponding FASTCOVER_accel_t to accelParams*/
565
574
  accelParams = FASTCOVER_defaultAccelParameters[parameters.accel];
566
575
  /* Initialize context */
567
- if (!FASTCOVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples,
576
+ {
577
+ size_t const initVal = FASTCOVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples,
568
578
  coverParams.d, parameters.splitPoint, parameters.f,
569
- accelParams)) {
570
- DISPLAYLEVEL(1, "Failed to initialize context\n");
571
- return ERROR(GENERIC);
579
+ accelParams);
580
+ if (ZSTD_isError(initVal)) {
581
+ DISPLAYLEVEL(1, "Failed to initialize context\n");
582
+ return initVal;
583
+ }
572
584
  }
573
585
  COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.nbDmers, g_displayLevel);
574
586
  /* Build the dictionary */
@@ -616,6 +628,7 @@ ZDICT_optimizeTrainFromBuffer_fastCover(
616
628
  (1 + (kMaxD - kMinD) / 2) * (1 + (kMaxK - kMinK) / kStepSize);
617
629
  const unsigned f = parameters->f == 0 ? DEFAULT_F : parameters->f;
618
630
  const unsigned accel = parameters->accel == 0 ? DEFAULT_ACCEL : parameters->accel;
631
+ const unsigned shrinkDict = 0;
619
632
  /* Local variables */
620
633
  const int displayLevel = parameters->zParams.notificationLevel;
621
634
  unsigned iteration = 1;
@@ -627,19 +640,19 @@ ZDICT_optimizeTrainFromBuffer_fastCover(
627
640
  /* Checks */
628
641
  if (splitPoint <= 0 || splitPoint > 1) {
629
642
  LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect splitPoint\n");
630
- return ERROR(GENERIC);
643
+ return ERROR(parameter_outOfBound);
631
644
  }
632
645
  if (accel == 0 || accel > FASTCOVER_MAX_ACCEL) {
633
646
  LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect accel\n");
634
- return ERROR(GENERIC);
647
+ return ERROR(parameter_outOfBound);
635
648
  }
636
649
  if (kMinK < kMaxD || kMaxK < kMinK) {
637
650
  LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect k\n");
638
- return ERROR(GENERIC);
651
+ return ERROR(parameter_outOfBound);
639
652
  }
640
653
  if (nbSamples == 0) {
641
654
  LOCALDISPLAYLEVEL(displayLevel, 1, "FASTCOVER must have at least one input file\n");
642
- return ERROR(GENERIC);
655
+ return ERROR(srcSize_wrong);
643
656
  }
644
657
  if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) {
645
658
  LOCALDISPLAYLEVEL(displayLevel, 1, "dictBufferCapacity must be at least %u\n",
@@ -666,11 +679,14 @@ ZDICT_optimizeTrainFromBuffer_fastCover(
666
679
  /* Initialize the context for this value of d */
667
680
  FASTCOVER_ctx_t ctx;
668
681
  LOCALDISPLAYLEVEL(displayLevel, 3, "d=%u\n", d);
669
- if (!FASTCOVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, d, splitPoint, f, accelParams)) {
670
- LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to initialize context\n");
671
- COVER_best_destroy(&best);
672
- POOL_free(pool);
673
- return ERROR(GENERIC);
682
+ {
683
+ size_t const initVal = FASTCOVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, d, splitPoint, f, accelParams);
684
+ if (ZSTD_isError(initVal)) {
685
+ LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to initialize context\n");
686
+ COVER_best_destroy(&best);
687
+ POOL_free(pool);
688
+ return initVal;
689
+ }
674
690
  }
675
691
  if (!warned) {
676
692
  COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.nbDmers, displayLevel);
@@ -687,7 +703,7 @@ ZDICT_optimizeTrainFromBuffer_fastCover(
687
703
  COVER_best_destroy(&best);
688
704
  FASTCOVER_ctx_destroy(&ctx);
689
705
  POOL_free(pool);
690
- return ERROR(GENERIC);
706
+ return ERROR(memory_allocation);
691
707
  }
692
708
  data->ctx = &ctx;
693
709
  data->best = &best;
@@ -697,6 +713,7 @@ ZDICT_optimizeTrainFromBuffer_fastCover(
697
713
  data->parameters.d = d;
698
714
  data->parameters.splitPoint = splitPoint;
699
715
  data->parameters.steps = kSteps;
716
+ data->parameters.shrinkDict = shrinkDict;
700
717
  data->parameters.zParams.notificationLevel = g_displayLevel;
701
718
  /* Check the parameters */
702
719
  if (!FASTCOVER_checkParameters(data->parameters, dictBufferCapacity,
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -37,17 +37,18 @@
37
37
  #include <stdio.h> /* fprintf, fopen, ftello64 */
38
38
  #include <time.h> /* clock */
39
39
 
40
- #include "mem.h" /* read */
41
- #include "fse.h" /* FSE_normalizeCount, FSE_writeNCount */
40
+ #include "../common/mem.h" /* read */
41
+ #include "../common/fse.h" /* FSE_normalizeCount, FSE_writeNCount */
42
42
  #define HUF_STATIC_LINKING_ONLY
43
- #include "huf.h" /* HUF_buildCTable, HUF_writeCTable */
44
- #include "zstd_internal.h" /* includes zstd.h */
45
- #include "xxhash.h" /* XXH64 */
43
+ #include "../common/huf.h" /* HUF_buildCTable, HUF_writeCTable */
44
+ #include "../common/zstd_internal.h" /* includes zstd.h */
45
+ #include "../common/xxhash.h" /* XXH64 */
46
46
  #include "divsufsort.h"
47
47
  #ifndef ZDICT_STATIC_LINKING_ONLY
48
48
  # define ZDICT_STATIC_LINKING_ONLY
49
49
  #endif
50
50
  #include "zdict.h"
51
+ #include "../compress/zstd_compress_internal.h" /* ZSTD_loadCEntropy() */
51
52
 
52
53
 
53
54
  /*-*************************************
@@ -99,6 +100,29 @@ unsigned ZDICT_getDictID(const void* dictBuffer, size_t dictSize)
99
100
  return MEM_readLE32((const char*)dictBuffer + 4);
100
101
  }
101
102
 
103
+ size_t ZDICT_getDictHeaderSize(const void* dictBuffer, size_t dictSize)
104
+ {
105
+ size_t headerSize;
106
+ if (dictSize <= 8 || MEM_readLE32(dictBuffer) != ZSTD_MAGIC_DICTIONARY) return ERROR(dictionary_corrupted);
107
+
108
+ { unsigned offcodeMaxValue = MaxOff;
109
+ ZSTD_compressedBlockState_t* bs = (ZSTD_compressedBlockState_t*)malloc(sizeof(ZSTD_compressedBlockState_t));
110
+ U32* wksp = (U32*)malloc(HUF_WORKSPACE_SIZE);
111
+ short* offcodeNCount = (short*)malloc((MaxOff+1)*sizeof(short));
112
+ if (!bs || !wksp || !offcodeNCount) {
113
+ headerSize = ERROR(memory_allocation);
114
+ } else {
115
+ ZSTD_reset_compressedBlockState(bs);
116
+ headerSize = ZSTD_loadCEntropy(bs, wksp, offcodeNCount, &offcodeMaxValue, dictBuffer, dictSize);
117
+ }
118
+
119
+ free(bs);
120
+ free(wksp);
121
+ free(offcodeNCount);
122
+ }
123
+
124
+ return headerSize;
125
+ }
102
126
 
103
127
  /*-********************************************************
104
128
  * Dictionary training functions
@@ -571,7 +595,7 @@ static void ZDICT_fillNoise(void* buffer, size_t length)
571
595
  unsigned const prime1 = 2654435761U;
572
596
  unsigned const prime2 = 2246822519U;
573
597
  unsigned acc = prime1;
574
- size_t p=0;;
598
+ size_t p=0;
575
599
  for (p=0; p<length; p++) {
576
600
  acc *= prime2;
577
601
  ((unsigned char*)buffer)[p] = (unsigned char)(acc >> 21);
@@ -588,12 +612,12 @@ typedef struct
588
612
 
589
613
  #define MAXREPOFFSET 1024
590
614
 
591
- static void ZDICT_countEStats(EStats_ress_t esr, ZSTD_parameters params,
615
+ static void ZDICT_countEStats(EStats_ress_t esr, const ZSTD_parameters* params,
592
616
  unsigned* countLit, unsigned* offsetcodeCount, unsigned* matchlengthCount, unsigned* litlengthCount, U32* repOffsets,
593
617
  const void* src, size_t srcSize,
594
618
  U32 notificationLevel)
595
619
  {
596
- size_t const blockSizeMax = MIN (ZSTD_BLOCKSIZE_MAX, 1 << params.cParams.windowLog);
620
+ size_t const blockSizeMax = MIN (ZSTD_BLOCKSIZE_MAX, 1 << params->cParams.windowLog);
597
621
  size_t cSize;
598
622
 
599
623
  if (srcSize > blockSizeMax) srcSize = blockSizeMax; /* protection vs large samples */
@@ -731,7 +755,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
731
755
 
732
756
  /* collect stats on all samples */
733
757
  for (u=0; u<nbFiles; u++) {
734
- ZDICT_countEStats(esr, params,
758
+ ZDICT_countEStats(esr, &params,
735
759
  countLit, offcodeCount, matchLengthCount, litLengthCount, repOffset,
736
760
  (const char*)srcBuffer + pos, fileSizes[u],
737
761
  notificationLevel);
@@ -741,7 +765,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
741
765
  /* analyze, build stats, starting with literals */
742
766
  { size_t maxNbBits = HUF_buildCTable (hufTable, countLit, 255, huffLog);
743
767
  if (HUF_isError(maxNbBits)) {
744
- eSize = ERROR(GENERIC);
768
+ eSize = maxNbBits;
745
769
  DISPLAYLEVEL(1, " HUF_buildCTable error \n");
746
770
  goto _cleanup;
747
771
  }
@@ -764,7 +788,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
764
788
  total=0; for (u=0; u<=offcodeMax; u++) total+=offcodeCount[u];
765
789
  errorCode = FSE_normalizeCount(offcodeNCount, Offlog, offcodeCount, total, offcodeMax);
766
790
  if (FSE_isError(errorCode)) {
767
- eSize = ERROR(GENERIC);
791
+ eSize = errorCode;
768
792
  DISPLAYLEVEL(1, "FSE_normalizeCount error with offcodeCount \n");
769
793
  goto _cleanup;
770
794
  }
@@ -773,7 +797,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
773
797
  total=0; for (u=0; u<=MaxML; u++) total+=matchLengthCount[u];
774
798
  errorCode = FSE_normalizeCount(matchLengthNCount, mlLog, matchLengthCount, total, MaxML);
775
799
  if (FSE_isError(errorCode)) {
776
- eSize = ERROR(GENERIC);
800
+ eSize = errorCode;
777
801
  DISPLAYLEVEL(1, "FSE_normalizeCount error with matchLengthCount \n");
778
802
  goto _cleanup;
779
803
  }
@@ -782,7 +806,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
782
806
  total=0; for (u=0; u<=MaxLL; u++) total+=litLengthCount[u];
783
807
  errorCode = FSE_normalizeCount(litLengthNCount, llLog, litLengthCount, total, MaxLL);
784
808
  if (FSE_isError(errorCode)) {
785
- eSize = ERROR(GENERIC);
809
+ eSize = errorCode;
786
810
  DISPLAYLEVEL(1, "FSE_normalizeCount error with litLengthCount \n");
787
811
  goto _cleanup;
788
812
  }
@@ -791,7 +815,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
791
815
  /* write result to buffer */
792
816
  { size_t const hhSize = HUF_writeCTable(dstPtr, maxDstSize, hufTable, 255, huffLog);
793
817
  if (HUF_isError(hhSize)) {
794
- eSize = ERROR(GENERIC);
818
+ eSize = hhSize;
795
819
  DISPLAYLEVEL(1, "HUF_writeCTable error \n");
796
820
  goto _cleanup;
797
821
  }
@@ -802,7 +826,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
802
826
 
803
827
  { size_t const ohSize = FSE_writeNCount(dstPtr, maxDstSize, offcodeNCount, OFFCODE_MAX, Offlog);
804
828
  if (FSE_isError(ohSize)) {
805
- eSize = ERROR(GENERIC);
829
+ eSize = ohSize;
806
830
  DISPLAYLEVEL(1, "FSE_writeNCount error with offcodeNCount \n");
807
831
  goto _cleanup;
808
832
  }
@@ -813,7 +837,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
813
837
 
814
838
  { size_t const mhSize = FSE_writeNCount(dstPtr, maxDstSize, matchLengthNCount, MaxML, mlLog);
815
839
  if (FSE_isError(mhSize)) {
816
- eSize = ERROR(GENERIC);
840
+ eSize = mhSize;
817
841
  DISPLAYLEVEL(1, "FSE_writeNCount error with matchLengthNCount \n");
818
842
  goto _cleanup;
819
843
  }
@@ -824,7 +848,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
824
848
 
825
849
  { size_t const lhSize = FSE_writeNCount(dstPtr, maxDstSize, litLengthNCount, MaxLL, llLog);
826
850
  if (FSE_isError(lhSize)) {
827
- eSize = ERROR(GENERIC);
851
+ eSize = lhSize;
828
852
  DISPLAYLEVEL(1, "FSE_writeNCount error with litlengthNCount \n");
829
853
  goto _cleanup;
830
854
  }
@@ -834,7 +858,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
834
858
  }
835
859
 
836
860
  if (maxDstSize<12) {
837
- eSize = ERROR(GENERIC);
861
+ eSize = ERROR(dstSize_tooSmall);
838
862
  DISPLAYLEVEL(1, "not enough space to write RepOffsets \n");
839
863
  goto _cleanup;
840
864
  }