extzstd 0.1.1 → 0.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (85) hide show
  1. checksums.yaml +5 -5
  2. data/HISTORY.ja.md +18 -0
  3. data/README.md +15 -50
  4. data/contrib/zstd/CONTRIBUTING.md +1 -1
  5. data/contrib/zstd/COPYING +339 -0
  6. data/contrib/zstd/Makefile +82 -51
  7. data/contrib/zstd/NEWS +92 -5
  8. data/contrib/zstd/README.md +50 -41
  9. data/contrib/zstd/appveyor.yml +164 -102
  10. data/contrib/zstd/circle.yml +10 -22
  11. data/contrib/zstd/lib/BUCK +31 -10
  12. data/contrib/zstd/lib/Makefile +57 -31
  13. data/contrib/zstd/lib/README.md +68 -37
  14. data/contrib/zstd/lib/common/bitstream.h +130 -76
  15. data/contrib/zstd/lib/common/compiler.h +86 -0
  16. data/contrib/zstd/lib/common/error_private.c +15 -11
  17. data/contrib/zstd/lib/common/error_private.h +8 -8
  18. data/contrib/zstd/lib/common/fse.h +19 -9
  19. data/contrib/zstd/lib/common/fse_decompress.c +3 -22
  20. data/contrib/zstd/lib/common/huf.h +68 -26
  21. data/contrib/zstd/lib/common/mem.h +23 -35
  22. data/contrib/zstd/lib/common/pool.c +123 -63
  23. data/contrib/zstd/lib/common/pool.h +19 -10
  24. data/contrib/zstd/lib/common/threading.c +11 -16
  25. data/contrib/zstd/lib/common/threading.h +52 -33
  26. data/contrib/zstd/lib/common/xxhash.c +28 -22
  27. data/contrib/zstd/lib/common/zstd_common.c +40 -27
  28. data/contrib/zstd/lib/common/zstd_errors.h +43 -34
  29. data/contrib/zstd/lib/common/zstd_internal.h +131 -123
  30. data/contrib/zstd/lib/compress/fse_compress.c +17 -33
  31. data/contrib/zstd/lib/compress/huf_compress.c +15 -9
  32. data/contrib/zstd/lib/compress/zstd_compress.c +2096 -2363
  33. data/contrib/zstd/lib/compress/zstd_compress_internal.h +462 -0
  34. data/contrib/zstd/lib/compress/zstd_double_fast.c +309 -0
  35. data/contrib/zstd/lib/compress/zstd_double_fast.h +29 -0
  36. data/contrib/zstd/lib/compress/zstd_fast.c +243 -0
  37. data/contrib/zstd/lib/compress/zstd_fast.h +31 -0
  38. data/contrib/zstd/lib/compress/zstd_lazy.c +765 -0
  39. data/contrib/zstd/lib/compress/zstd_lazy.h +39 -0
  40. data/contrib/zstd/lib/compress/zstd_ldm.c +707 -0
  41. data/contrib/zstd/lib/compress/zstd_ldm.h +68 -0
  42. data/contrib/zstd/lib/compress/zstd_opt.c +785 -0
  43. data/contrib/zstd/lib/compress/zstd_opt.h +19 -908
  44. data/contrib/zstd/lib/compress/zstdmt_compress.c +737 -327
  45. data/contrib/zstd/lib/compress/zstdmt_compress.h +88 -26
  46. data/contrib/zstd/lib/decompress/huf_decompress.c +158 -50
  47. data/contrib/zstd/lib/decompress/zstd_decompress.c +884 -699
  48. data/contrib/zstd/lib/deprecated/zbuff.h +5 -4
  49. data/contrib/zstd/lib/deprecated/zbuff_common.c +5 -5
  50. data/contrib/zstd/lib/deprecated/zbuff_compress.c +6 -4
  51. data/contrib/zstd/lib/deprecated/zbuff_decompress.c +5 -4
  52. data/contrib/zstd/lib/dictBuilder/cover.c +93 -77
  53. data/contrib/zstd/lib/dictBuilder/zdict.c +107 -92
  54. data/contrib/zstd/lib/dictBuilder/zdict.h +112 -102
  55. data/contrib/zstd/lib/legacy/zstd_legacy.h +9 -4
  56. data/contrib/zstd/lib/legacy/zstd_v01.c +7 -6
  57. data/contrib/zstd/lib/legacy/zstd_v01.h +5 -4
  58. data/contrib/zstd/lib/legacy/zstd_v02.c +27 -99
  59. data/contrib/zstd/lib/legacy/zstd_v02.h +5 -4
  60. data/contrib/zstd/lib/legacy/zstd_v03.c +26 -98
  61. data/contrib/zstd/lib/legacy/zstd_v03.h +5 -4
  62. data/contrib/zstd/lib/legacy/zstd_v04.c +22 -91
  63. data/contrib/zstd/lib/legacy/zstd_v04.h +5 -4
  64. data/contrib/zstd/lib/legacy/zstd_v05.c +23 -99
  65. data/contrib/zstd/lib/legacy/zstd_v05.h +5 -4
  66. data/contrib/zstd/lib/legacy/zstd_v06.c +22 -96
  67. data/contrib/zstd/lib/legacy/zstd_v06.h +5 -4
  68. data/contrib/zstd/lib/legacy/zstd_v07.c +19 -95
  69. data/contrib/zstd/lib/legacy/zstd_v07.h +5 -4
  70. data/contrib/zstd/lib/zstd.h +895 -271
  71. data/ext/extconf.rb +11 -2
  72. data/ext/extzstd.c +45 -128
  73. data/ext/extzstd.h +74 -31
  74. data/ext/extzstd_stream.c +401 -142
  75. data/ext/zstd_common.c +5 -0
  76. data/ext/zstd_compress.c +8 -0
  77. data/ext/zstd_decompress.c +1 -0
  78. data/ext/zstd_dictbuilder.c +2 -0
  79. data/lib/extzstd/version.rb +1 -1
  80. data/lib/extzstd.rb +48 -1
  81. data/test/test_basic.rb +9 -1
  82. metadata +17 -7
  83. data/HISTORY.ja +0 -10
  84. data/contrib/zstd/LICENSE-examples +0 -11
  85. data/contrib/zstd/PATENTS +0 -33
@@ -1,18 +1,20 @@
1
- /**
1
+ /*
2
2
  * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
- * This source code is licensed under the BSD-style license found in the
6
- * LICENSE file in the root directory of this source tree. An additional grant
7
- * of patent rights can be found in the PATENTS file in the same directory.
5
+ * This source code is licensed under both the BSD-style license (found in the
6
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7
+ * in the COPYING file in the root directory of this source tree).
8
+ * You may select, at your option, one of the above-listed licenses.
8
9
  */
9
10
 
10
11
 
11
12
  /*-**************************************
12
13
  * Tuning parameters
13
14
  ****************************************/
15
+ #define MINRATIO 4 /* minimum nb of apparition to be selected in dictionary */
14
16
  #define ZDICT_MAX_SAMPLES_SIZE (2000U << 20)
15
- #define ZDICT_MIN_SAMPLES_SIZE 512
17
+ #define ZDICT_MIN_SAMPLES_SIZE (ZDICT_CONTENTSIZE_MIN * MINRATIO)
16
18
 
17
19
 
18
20
  /*-**************************************
@@ -59,11 +61,8 @@
59
61
 
60
62
  #define NOISELENGTH 32
61
63
 
62
- #define MINRATIO 4
63
- static const int g_compressionLevel_default = 6;
64
+ static const int g_compressionLevel_default = 3;
64
65
  static const U32 g_selectivity_default = 9;
65
- static const size_t g_provision_entropySize = 200;
66
- static const size_t g_min_fast_dictContent = 192;
67
66
 
68
67
 
69
68
  /*-*************************************
@@ -96,7 +95,7 @@ const char* ZDICT_getErrorName(size_t errorCode) { return ERR_getErrorName(error
96
95
  unsigned ZDICT_getDictID(const void* dictBuffer, size_t dictSize)
97
96
  {
98
97
  if (dictSize < 8) return 0;
99
- if (MEM_readLE32(dictBuffer) != ZSTD_DICT_MAGIC) return 0;
98
+ if (MEM_readLE32(dictBuffer) != ZSTD_MAGIC_DICTIONARY) return 0;
100
99
  return MEM_readLE32((const char*)dictBuffer + 4);
101
100
  }
102
101
 
@@ -104,7 +103,7 @@ unsigned ZDICT_getDictID(const void* dictBuffer, size_t dictSize)
104
103
  /*-********************************************************
105
104
  * Dictionary training functions
106
105
  **********************************************************/
107
- static unsigned ZDICT_NbCommonBytes (register size_t val)
106
+ static unsigned ZDICT_NbCommonBytes (size_t val)
108
107
  {
109
108
  if (MEM_isLittleEndian()) {
110
109
  if (MEM_64bits()) {
@@ -308,10 +307,10 @@ static dictItem ZDICT_analyzePos(
308
307
  /* look backward */
309
308
  length = MINMATCHLENGTH;
310
309
  while ((length >= MINMATCHLENGTH) & (start > 0)) {
311
- length = ZDICT_count(b + pos, b + suffix[start - 1]);
312
- if (length >= LLIMIT) length = LLIMIT - 1;
313
- lengthList[length]++;
314
- if (length >= MINMATCHLENGTH) start--;
310
+ length = ZDICT_count(b + pos, b + suffix[start - 1]);
311
+ if (length >= LLIMIT) length = LLIMIT - 1;
312
+ lengthList[length]++;
313
+ if (length >= MINMATCHLENGTH) start--;
315
314
  }
316
315
 
317
316
  /* largest useful length */
@@ -363,21 +362,35 @@ static dictItem ZDICT_analyzePos(
363
362
  }
364
363
 
365
364
 
366
- /*! ZDICT_checkMerge
365
+ static int isIncluded(const void* in, const void* container, size_t length)
366
+ {
367
+ const char* const ip = (const char*) in;
368
+ const char* const into = (const char*) container;
369
+ size_t u;
370
+
371
+ for (u=0; u<length; u++) { /* works because end of buffer is a noisy guard band */
372
+ if (ip[u] != into[u]) break;
373
+ }
374
+
375
+ return u==length;
376
+ }
377
+
378
+ /*! ZDICT_tryMerge() :
367
379
  check if dictItem can be merged, do it if possible
368
380
  @return : id of destination elt, 0 if not merged
369
381
  */
370
- static U32 ZDICT_checkMerge(dictItem* table, dictItem elt, U32 eltNbToSkip)
382
+ static U32 ZDICT_tryMerge(dictItem* table, dictItem elt, U32 eltNbToSkip, const void* buffer)
371
383
  {
372
384
  const U32 tableSize = table->pos;
373
385
  const U32 eltEnd = elt.pos + elt.length;
386
+ const char* const buf = (const char*) buffer;
374
387
 
375
388
  /* tail overlap */
376
389
  U32 u; for (u=1; u<tableSize; u++) {
377
390
  if (u==eltNbToSkip) continue;
378
391
  if ((table[u].pos > elt.pos) && (table[u].pos <= eltEnd)) { /* overlap, existing > new */
379
392
  /* append */
380
- U32 addedLength = table[u].pos - elt.pos;
393
+ U32 const addedLength = table[u].pos - elt.pos;
381
394
  table[u].length += addedLength;
382
395
  table[u].pos = elt.pos;
383
396
  table[u].savings += elt.savings * addedLength / elt.length; /* rough approx */
@@ -393,9 +406,10 @@ static U32 ZDICT_checkMerge(dictItem* table, dictItem elt, U32 eltNbToSkip)
393
406
  /* front overlap */
394
407
  for (u=1; u<tableSize; u++) {
395
408
  if (u==eltNbToSkip) continue;
409
+
396
410
  if ((table[u].pos + table[u].length >= elt.pos) && (table[u].pos < elt.pos)) { /* overlap, existing < new */
397
411
  /* append */
398
- int addedLength = (int)eltEnd - (table[u].pos + table[u].length);
412
+ int const addedLength = (int)eltEnd - (table[u].pos + table[u].length);
399
413
  table[u].savings += elt.length / 8; /* rough approx bonus */
400
414
  if (addedLength > 0) { /* otherwise, elt fully included into existing */
401
415
  table[u].length += addedLength;
@@ -407,7 +421,18 @@ static U32 ZDICT_checkMerge(dictItem* table, dictItem elt, U32 eltNbToSkip)
407
421
  table[u] = table[u-1], u--;
408
422
  table[u] = elt;
409
423
  return u;
410
- } }
424
+ }
425
+
426
+ if (MEM_read64(buf + table[u].pos) == MEM_read64(buf + elt.pos + 1)) {
427
+ if (isIncluded(buf + table[u].pos, buf + elt.pos + 1, table[u].length)) {
428
+ size_t const addedLength = MAX( (int)elt.length - (int)table[u].length , 1 );
429
+ table[u].pos = elt.pos;
430
+ table[u].savings += (U32)(elt.savings * addedLength / elt.length);
431
+ table[u].length = MIN(elt.length, table[u].length + 1);
432
+ return u;
433
+ }
434
+ }
435
+ }
411
436
 
412
437
  return 0;
413
438
  }
@@ -415,8 +440,8 @@ static U32 ZDICT_checkMerge(dictItem* table, dictItem elt, U32 eltNbToSkip)
415
440
 
416
441
  static void ZDICT_removeDictItem(dictItem* table, U32 id)
417
442
  {
418
- /* convention : first element is nb of elts */
419
- U32 const max = table->pos;
443
+ /* convention : table[0].pos stores nb of elts */
444
+ U32 const max = table[0].pos;
420
445
  U32 u;
421
446
  if (!id) return; /* protection, should never happen */
422
447
  for (u=id; u<max-1; u++)
@@ -425,14 +450,14 @@ static void ZDICT_removeDictItem(dictItem* table, U32 id)
425
450
  }
426
451
 
427
452
 
428
- static void ZDICT_insertDictItem(dictItem* table, U32 maxSize, dictItem elt)
453
+ static void ZDICT_insertDictItem(dictItem* table, U32 maxSize, dictItem elt, const void* buffer)
429
454
  {
430
455
  /* merge if possible */
431
- U32 mergeId = ZDICT_checkMerge(table, elt, 0);
456
+ U32 mergeId = ZDICT_tryMerge(table, elt, 0, buffer);
432
457
  if (mergeId) {
433
458
  U32 newMerge = 1;
434
459
  while (newMerge) {
435
- newMerge = ZDICT_checkMerge(table, table[mergeId], mergeId);
460
+ newMerge = ZDICT_tryMerge(table, table[mergeId], mergeId, buffer);
436
461
  if (newMerge) ZDICT_removeDictItem(table, mergeId);
437
462
  mergeId = newMerge;
438
463
  }
@@ -463,7 +488,7 @@ static U32 ZDICT_dictSize(const dictItem* dictList)
463
488
  }
464
489
 
465
490
 
466
- static size_t ZDICT_trainBuffer(dictItem* dictList, U32 dictListSize,
491
+ static size_t ZDICT_trainBuffer_legacy(dictItem* dictList, U32 dictListSize,
467
492
  const void* const buffer, size_t bufferSize, /* buffer must end with noisy guard band */
468
493
  const size_t* fileSizes, unsigned nbFiles,
469
494
  U32 minRatio, U32 notificationLevel)
@@ -480,7 +505,7 @@ static size_t ZDICT_trainBuffer(dictItem* dictList, U32 dictListSize,
480
505
  # define DISPLAYUPDATE(l, ...) if (notificationLevel>=l) { \
481
506
  if (ZDICT_clockSpan(displayClock) > refreshRate) \
482
507
  { displayClock = clock(); DISPLAY(__VA_ARGS__); \
483
- if (notificationLevel>=4) fflush(stdout); } }
508
+ if (notificationLevel>=4) fflush(stderr); } }
484
509
 
485
510
  /* init */
486
511
  DISPLAYLEVEL(2, "\r%70s\r", ""); /* clean display line */
@@ -521,7 +546,7 @@ static size_t ZDICT_trainBuffer(dictItem* dictList, U32 dictListSize,
521
546
  if (doneMarks[cursor]) { cursor++; continue; }
522
547
  solution = ZDICT_analyzePos(doneMarks, suffix, reverseSuffix[cursor], buffer, minRatio, notificationLevel);
523
548
  if (solution.length==0) { cursor++; continue; }
524
- ZDICT_insertDictItem(dictList, dictListSize, solution);
549
+ ZDICT_insertDictItem(dictList, dictListSize, solution, buffer);
525
550
  cursor += solution.length;
526
551
  DISPLAYUPDATE(2, "\r%4.2f %% \r", (double)cursor / bufferSize * 100);
527
552
  } }
@@ -552,7 +577,7 @@ typedef struct
552
577
  {
553
578
  ZSTD_CCtx* ref;
554
579
  ZSTD_CCtx* zc;
555
- void* workPlace; /* must be ZSTD_BLOCKSIZE_ABSOLUTEMAX allocated */
580
+ void* workPlace; /* must be ZSTD_BLOCKSIZE_MAX allocated */
556
581
  } EStats_ress_t;
557
582
 
558
583
  #define MAXREPOFFSET 1024
@@ -561,14 +586,14 @@ static void ZDICT_countEStats(EStats_ress_t esr, ZSTD_parameters params,
561
586
  U32* countLit, U32* offsetcodeCount, U32* matchlengthCount, U32* litlengthCount, U32* repOffsets,
562
587
  const void* src, size_t srcSize, U32 notificationLevel)
563
588
  {
564
- size_t const blockSizeMax = MIN (ZSTD_BLOCKSIZE_ABSOLUTEMAX, 1 << params.cParams.windowLog);
589
+ size_t const blockSizeMax = MIN (ZSTD_BLOCKSIZE_MAX, 1 << params.cParams.windowLog);
565
590
  size_t cSize;
566
591
 
567
592
  if (srcSize > blockSizeMax) srcSize = blockSizeMax; /* protection vs large samples */
568
593
  { size_t const errorCode = ZSTD_copyCCtx(esr.zc, esr.ref, 0);
569
594
  if (ZSTD_isError(errorCode)) { DISPLAYLEVEL(1, "warning : ZSTD_copyCCtx failed \n"); return; }
570
595
  }
571
- cSize = ZSTD_compressBlock(esr.zc, esr.workPlace, ZSTD_BLOCKSIZE_ABSOLUTEMAX, src, srcSize);
596
+ cSize = ZSTD_compressBlock(esr.zc, esr.workPlace, ZSTD_BLOCKSIZE_MAX, src, srcSize);
572
597
  if (ZSTD_isError(cSize)) { DISPLAYLEVEL(3, "warning : could not compress sample size %u \n", (U32)srcSize); return; }
573
598
 
574
599
  if (cSize) { /* if == 0; block is not compressible */
@@ -610,17 +635,6 @@ static void ZDICT_countEStats(EStats_ress_t esr, ZSTD_parameters params,
610
635
  } } }
611
636
  }
612
637
 
613
- /*
614
- static size_t ZDICT_maxSampleSize(const size_t* fileSizes, unsigned nbFiles)
615
- {
616
- unsigned u;
617
- size_t max=0;
618
- for (u=0; u<nbFiles; u++)
619
- if (max < fileSizes[u]) max = fileSizes[u];
620
- return max;
621
- }
622
- */
623
-
624
638
  static size_t ZDICT_totalSampleSize(const size_t* fileSizes, unsigned nbFiles)
625
639
  {
626
640
  size_t total=0;
@@ -676,26 +690,26 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
676
690
  /* init */
677
691
  esr.ref = ZSTD_createCCtx();
678
692
  esr.zc = ZSTD_createCCtx();
679
- esr.workPlace = malloc(ZSTD_BLOCKSIZE_ABSOLUTEMAX);
693
+ esr.workPlace = malloc(ZSTD_BLOCKSIZE_MAX);
680
694
  if (!esr.ref || !esr.zc || !esr.workPlace) {
681
695
  eSize = ERROR(memory_allocation);
682
696
  DISPLAYLEVEL(1, "Not enough memory \n");
683
697
  goto _cleanup;
684
698
  }
685
- if (offcodeMax>OFFCODE_MAX) { eSize = ERROR(dictionary_wrong); goto _cleanup; } /* too large dictionary */
686
- for (u=0; u<256; u++) countLit[u]=1; /* any character must be described */
687
- for (u=0; u<=offcodeMax; u++) offcodeCount[u]=1;
688
- for (u=0; u<=MaxML; u++) matchLengthCount[u]=1;
689
- for (u=0; u<=MaxLL; u++) litLengthCount[u]=1;
699
+ if (offcodeMax>OFFCODE_MAX) { eSize = ERROR(dictionaryCreation_failed); goto _cleanup; } /* too large dictionary */
700
+ for (u=0; u<256; u++) countLit[u] = 1; /* any character must be described */
701
+ for (u=0; u<=offcodeMax; u++) offcodeCount[u] = 1;
702
+ for (u=0; u<=MaxML; u++) matchLengthCount[u] = 1;
703
+ for (u=0; u<=MaxLL; u++) litLengthCount[u] = 1;
690
704
  memset(repOffset, 0, sizeof(repOffset));
691
705
  repOffset[1] = repOffset[4] = repOffset[8] = 1;
692
706
  memset(bestRepOffset, 0, sizeof(bestRepOffset));
693
- if (compressionLevel==0) compressionLevel=g_compressionLevel_default;
707
+ if (compressionLevel<=0) compressionLevel = g_compressionLevel_default;
694
708
  params = ZSTD_getParams(compressionLevel, averageSampleSize, dictBufferSize);
695
709
  { size_t const beginResult = ZSTD_compressBegin_advanced(esr.ref, dictBuffer, dictBufferSize, params, 0);
696
- if (ZSTD_isError(beginResult)) {
710
+ if (ZSTD_isError(beginResult)) {
711
+ DISPLAYLEVEL(1, "error : ZSTD_compressBegin_advanced() failed : %s \n", ZSTD_getErrorName(beginResult));
697
712
  eSize = ERROR(GENERIC);
698
- DISPLAYLEVEL(1, "error : ZSTD_compressBegin_advanced failed \n");
699
713
  goto _cleanup;
700
714
  } }
701
715
 
@@ -812,7 +826,6 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
812
826
  MEM_writeLE32(dstPtr+4, repStartValue[1]);
813
827
  MEM_writeLE32(dstPtr+8, repStartValue[2]);
814
828
  #endif
815
- //dstPtr += 12;
816
829
  eSize += 12;
817
830
 
818
831
  _cleanup:
@@ -831,7 +844,7 @@ size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity,
831
844
  ZDICT_params_t params)
832
845
  {
833
846
  size_t hSize;
834
- #define HBUFFSIZE 256
847
+ #define HBUFFSIZE 256 /* should prove large enough for all entropy headers */
835
848
  BYTE header[HBUFFSIZE];
836
849
  int const compressionLevel = (params.compressionLevel <= 0) ? g_compressionLevel_default : params.compressionLevel;
837
850
  U32 const notificationLevel = params.notificationLevel;
@@ -842,7 +855,7 @@ size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity,
842
855
  if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) return ERROR(dstSize_tooSmall);
843
856
 
844
857
  /* dictionary header */
845
- MEM_writeLE32(header, ZSTD_DICT_MAGIC);
858
+ MEM_writeLE32(header, ZSTD_MAGIC_DICTIONARY);
846
859
  { U64 const randomID = XXH64(customDictContent, dictContentSize, 0);
847
860
  U32 const compliantID = (randomID % ((1U<<31)-32768)) + 32768;
848
861
  U32 const dictID = params.dictID ? params.dictID : compliantID;
@@ -877,20 +890,11 @@ size_t ZDICT_addEntropyTablesFromBuffer_advanced(void* dictBuffer, size_t dictCo
877
890
  const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
878
891
  ZDICT_params_t params)
879
892
  {
880
- size_t hSize;
881
893
  int const compressionLevel = (params.compressionLevel <= 0) ? g_compressionLevel_default : params.compressionLevel;
882
894
  U32 const notificationLevel = params.notificationLevel;
895
+ size_t hSize = 8;
883
896
 
884
- /* dictionary header */
885
- MEM_writeLE32(dictBuffer, ZSTD_DICT_MAGIC);
886
- { U64 const randomID = XXH64((char*)dictBuffer + dictBufferCapacity - dictContentSize, dictContentSize, 0);
887
- U32 const compliantID = (randomID % ((1U<<31)-32768)) + 32768;
888
- U32 const dictID = params.dictID ? params.dictID : compliantID;
889
- MEM_writeLE32((char*)dictBuffer+4, dictID);
890
- }
891
- hSize = 8;
892
-
893
- /* entropy tables */
897
+ /* calculate entropy tables */
894
898
  DISPLAYLEVEL(2, "\r%70s\r", ""); /* clean display line */
895
899
  DISPLAYLEVEL(2, "statistics ... \n");
896
900
  { size_t const eSize = ZDICT_analyzeEntropy((char*)dictBuffer+hSize, dictBufferCapacity-hSize,
@@ -902,6 +906,13 @@ size_t ZDICT_addEntropyTablesFromBuffer_advanced(void* dictBuffer, size_t dictCo
902
906
  hSize += eSize;
903
907
  }
904
908
 
909
+ /* add dictionary header (after entropy tables) */
910
+ MEM_writeLE32(dictBuffer, ZSTD_MAGIC_DICTIONARY);
911
+ { U64 const randomID = XXH64((char*)dictBuffer + dictBufferCapacity - dictContentSize, dictContentSize, 0);
912
+ U32 const compliantID = (randomID % ((1U<<31)-32768)) + 32768;
913
+ U32 const dictID = params.dictID ? params.dictID : compliantID;
914
+ MEM_writeLE32((char*)dictBuffer+4, dictID);
915
+ }
905
916
 
906
917
  if (hSize + dictContentSize < dictBufferCapacity)
907
918
  memmove((char*)dictBuffer + hSize, (char*)dictBuffer + dictBufferCapacity - dictContentSize, dictContentSize);
@@ -909,14 +920,14 @@ size_t ZDICT_addEntropyTablesFromBuffer_advanced(void* dictBuffer, size_t dictCo
909
920
  }
910
921
 
911
922
 
912
- /*! ZDICT_trainFromBuffer_unsafe() :
923
+ /*! ZDICT_trainFromBuffer_unsafe_legacy() :
913
924
  * Warning : `samplesBuffer` must be followed by noisy guard band.
914
925
  * @return : size of dictionary, or an error code which can be tested with ZDICT_isError()
915
926
  */
916
- size_t ZDICT_trainFromBuffer_unsafe(
927
+ size_t ZDICT_trainFromBuffer_unsafe_legacy(
917
928
  void* dictBuffer, size_t maxDictSize,
918
929
  const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
919
- ZDICT_params_t params)
930
+ ZDICT_legacy_params_t params)
920
931
  {
921
932
  U32 const dictListSize = MAX(MAX(DICTLISTSIZE_DEFAULT, nbSamples), (U32)(maxDictSize/16));
922
933
  dictItem* const dictList = (dictItem*)malloc(dictListSize * sizeof(*dictList));
@@ -925,24 +936,24 @@ size_t ZDICT_trainFromBuffer_unsafe(
925
936
  size_t const targetDictSize = maxDictSize;
926
937
  size_t const samplesBuffSize = ZDICT_totalSampleSize(samplesSizes, nbSamples);
927
938
  size_t dictSize = 0;
928
- U32 const notificationLevel = params.notificationLevel;
939
+ U32 const notificationLevel = params.zParams.notificationLevel;
929
940
 
930
941
  /* checks */
931
942
  if (!dictList) return ERROR(memory_allocation);
932
- if (maxDictSize <= g_provision_entropySize + g_min_fast_dictContent) { free(dictList); return ERROR(dstSize_tooSmall); }
933
- if (samplesBuffSize < ZDICT_MIN_SAMPLES_SIZE) { free(dictList); return 0; } /* not enough source to create dictionary */
943
+ if (maxDictSize < ZDICT_DICTSIZE_MIN) { free(dictList); return ERROR(dstSize_tooSmall); } /* requested dictionary size is too small */
944
+ if (samplesBuffSize < ZDICT_MIN_SAMPLES_SIZE) { free(dictList); return ERROR(dictionaryCreation_failed); } /* not enough source to create dictionary */
934
945
 
935
946
  /* init */
936
947
  ZDICT_initDictItem(dictList);
937
948
 
938
949
  /* build dictionary */
939
- ZDICT_trainBuffer(dictList, dictListSize,
940
- samplesBuffer, samplesBuffSize,
941
- samplesSizes, nbSamples,
942
- minRep, notificationLevel);
950
+ ZDICT_trainBuffer_legacy(dictList, dictListSize,
951
+ samplesBuffer, samplesBuffSize,
952
+ samplesSizes, nbSamples,
953
+ minRep, notificationLevel);
943
954
 
944
955
  /* display best matches */
945
- if (params.notificationLevel>= 3) {
956
+ if (params.zParams.notificationLevel>= 3) {
946
957
  U32 const nb = MIN(25, dictList[0].pos);
947
958
  U32 const dictContentSize = ZDICT_dictSize(dictList);
948
959
  U32 u;
@@ -963,14 +974,15 @@ size_t ZDICT_trainFromBuffer_unsafe(
963
974
 
964
975
  /* create dictionary */
965
976
  { U32 dictContentSize = ZDICT_dictSize(dictList);
966
- if (dictContentSize < targetDictSize/3) {
977
+ if (dictContentSize < ZDICT_CONTENTSIZE_MIN) { free(dictList); return ERROR(dictionaryCreation_failed); } /* dictionary content too small */
978
+ if (dictContentSize < targetDictSize/4) {
967
979
  DISPLAYLEVEL(2, "! warning : selected content significantly smaller than requested (%u < %u) \n", dictContentSize, (U32)maxDictSize);
980
+ if (samplesBuffSize < 10 * targetDictSize)
981
+ DISPLAYLEVEL(2, "! consider increasing the number of samples (total size : %u MB)\n", (U32)(samplesBuffSize>>20));
968
982
  if (minRep > MINRATIO) {
969
983
  DISPLAYLEVEL(2, "! consider increasing selectivity to produce larger dictionary (-s%u) \n", selectivity+1);
970
984
  DISPLAYLEVEL(2, "! note : larger dictionaries are not necessarily better, test its efficiency on samples \n");
971
985
  }
972
- if (samplesBuffSize < 10 * targetDictSize)
973
- DISPLAYLEVEL(2, "! consider increasing the number of samples (total size : %u MB)\n", (U32)(samplesBuffSize>>20));
974
986
  }
975
987
 
976
988
  if ((dictContentSize > targetDictSize*3) && (nbSamples > 2*MINRATIO) && (selectivity>1)) {
@@ -978,7 +990,7 @@ size_t ZDICT_trainFromBuffer_unsafe(
978
990
  while ((nbSamples >> proposedSelectivity) <= MINRATIO) { proposedSelectivity--; }
979
991
  DISPLAYLEVEL(2, "! note : calculated dictionary significantly larger than requested (%u > %u) \n", dictContentSize, (U32)maxDictSize);
980
992
  DISPLAYLEVEL(2, "! consider increasing dictionary size, or produce denser dictionary (-s%u) \n", proposedSelectivity);
981
- DISPLAYLEVEL(2, "! always test dictionary efficiency on samples \n");
993
+ DISPLAYLEVEL(2, "! always test dictionary efficiency on real samples \n");
982
994
  }
983
995
 
984
996
  /* limit dictionary size */
@@ -1004,7 +1016,7 @@ size_t ZDICT_trainFromBuffer_unsafe(
1004
1016
 
1005
1017
  dictSize = ZDICT_addEntropyTablesFromBuffer_advanced(dictBuffer, dictContentSize, maxDictSize,
1006
1018
  samplesBuffer, samplesSizes, nbSamples,
1007
- params);
1019
+ params.zParams);
1008
1020
  }
1009
1021
 
1010
1022
  /* clean up */
@@ -1015,9 +1027,9 @@ size_t ZDICT_trainFromBuffer_unsafe(
1015
1027
 
1016
1028
  /* issue : samplesBuffer need to be followed by a noisy guard band.
1017
1029
  * work around : duplicate the buffer, and add the noise */
1018
- size_t ZDICT_trainFromBuffer_advanced(void* dictBuffer, size_t dictBufferCapacity,
1019
- const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
1020
- ZDICT_params_t params)
1030
+ size_t ZDICT_trainFromBuffer_legacy(void* dictBuffer, size_t dictBufferCapacity,
1031
+ const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
1032
+ ZDICT_legacy_params_t params)
1021
1033
  {
1022
1034
  size_t result;
1023
1035
  void* newBuff;
@@ -1030,10 +1042,9 @@ size_t ZDICT_trainFromBuffer_advanced(void* dictBuffer, size_t dictBufferCapacit
1030
1042
  memcpy(newBuff, samplesBuffer, sBuffSize);
1031
1043
  ZDICT_fillNoise((char*)newBuff + sBuffSize, NOISELENGTH); /* guard band, for end of buffer condition */
1032
1044
 
1033
- result = ZDICT_trainFromBuffer_unsafe(
1034
- dictBuffer, dictBufferCapacity,
1035
- newBuff, samplesSizes, nbSamples,
1036
- params);
1045
+ result =
1046
+ ZDICT_trainFromBuffer_unsafe_legacy(dictBuffer, dictBufferCapacity, newBuff,
1047
+ samplesSizes, nbSamples, params);
1037
1048
  free(newBuff);
1038
1049
  return result;
1039
1050
  }
@@ -1042,11 +1053,15 @@ size_t ZDICT_trainFromBuffer_advanced(void* dictBuffer, size_t dictBufferCapacit
1042
1053
  size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity,
1043
1054
  const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples)
1044
1055
  {
1045
- ZDICT_params_t params;
1056
+ ZDICT_cover_params_t params;
1046
1057
  memset(&params, 0, sizeof(params));
1047
- return ZDICT_trainFromBuffer_advanced(dictBuffer, dictBufferCapacity,
1048
- samplesBuffer, samplesSizes, nbSamples,
1049
- params);
1058
+ params.d = 8;
1059
+ params.steps = 4;
1060
+ /* Default to level 6 since no compression level information is avaialble */
1061
+ params.zParams.compressionLevel = 6;
1062
+ return ZDICT_optimizeTrainFromBuffer_cover(dictBuffer, dictBufferCapacity,
1063
+ samplesBuffer, samplesSizes,
1064
+ nbSamples, &params);
1050
1065
  }
1051
1066
 
1052
1067
  size_t ZDICT_addEntropyTablesFromBuffer(void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity,