zstd-ruby 1.1.4.0 → 1.2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. checksums.yaml +4 -4
  2. data/.travis.yml +1 -1
  3. data/README.md +1 -1
  4. data/ext/zstdruby/libzstd/Makefile +11 -1
  5. data/ext/zstdruby/libzstd/README.md +8 -0
  6. data/ext/zstdruby/libzstd/common/bitstream.h +56 -27
  7. data/ext/zstdruby/libzstd/common/error_private.c +2 -1
  8. data/ext/zstdruby/libzstd/common/fse.h +7 -3
  9. data/ext/zstdruby/libzstd/common/huf.h +42 -19
  10. data/ext/zstdruby/libzstd/common/mem.h +2 -3
  11. data/ext/zstdruby/libzstd/common/zstd_errors.h +1 -0
  12. data/ext/zstdruby/libzstd/common/zstd_internal.h +3 -2
  13. data/ext/zstdruby/libzstd/compress/fse_compress.c +10 -10
  14. data/ext/zstdruby/libzstd/compress/zstd_compress.c +455 -244
  15. data/ext/zstdruby/libzstd/compress/zstd_opt.h +6 -4
  16. data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +40 -28
  17. data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +115 -219
  18. data/ext/zstdruby/libzstd/dictBuilder/cover.c +34 -13
  19. data/ext/zstdruby/libzstd/dictBuilder/zdict.c +65 -43
  20. data/ext/zstdruby/libzstd/dictBuilder/zdict.h +7 -7
  21. data/ext/zstdruby/libzstd/dll/example/README.md +5 -5
  22. data/ext/zstdruby/libzstd/dll/example/build_package.bat +1 -0
  23. data/ext/zstdruby/libzstd/legacy/zstd_v01.c +1 -1
  24. data/ext/zstdruby/libzstd/legacy/zstd_v02.c +21 -21
  25. data/ext/zstdruby/libzstd/legacy/zstd_v03.c +20 -20
  26. data/ext/zstdruby/libzstd/legacy/zstd_v04.c +4 -4
  27. data/ext/zstdruby/libzstd/legacy/zstd_v05.c +2 -2
  28. data/ext/zstdruby/libzstd/legacy/zstd_v06.c +2 -2
  29. data/ext/zstdruby/libzstd/zstd.h +88 -68
  30. data/lib/zstd-ruby/version.rb +1 -1
  31. metadata +3 -3
@@ -59,8 +59,6 @@ static int g_displayLevel = 2;
59
59
  if ((clock() - g_time > refreshRate) || (displayLevel >= 4)) { \
60
60
  g_time = clock(); \
61
61
  DISPLAY(__VA_ARGS__); \
62
- if (displayLevel >= 4) \
63
- fflush(stdout); \
64
62
  } \
65
63
  }
66
64
  #define DISPLAYUPDATE(l, ...) LOCALDISPLAYUPDATE(g_displayLevel, l, __VA_ARGS__)
@@ -236,10 +234,22 @@ static size_t COVER_sum(const size_t *samplesSizes, unsigned nbSamples) {
236
234
  * Returns 1 if the dmer at lp is greater than the dmer at rp.
237
235
  */
238
236
  static int COVER_cmp(COVER_ctx_t *ctx, const void *lp, const void *rp) {
239
- const U32 lhs = *(const U32 *)lp;
240
- const U32 rhs = *(const U32 *)rp;
237
+ U32 const lhs = *(U32 const *)lp;
238
+ U32 const rhs = *(U32 const *)rp;
241
239
  return memcmp(ctx->samples + lhs, ctx->samples + rhs, ctx->d);
242
240
  }
241
+ /**
242
+ * Faster version for d <= 8.
243
+ */
244
+ static int COVER_cmp8(COVER_ctx_t *ctx, const void *lp, const void *rp) {
245
+ U64 const mask = (ctx->d == 8) ? (U64)-1 : (((U64)1 << (8 * ctx->d)) - 1);
246
+ U64 const lhs = MEM_readLE64(ctx->samples + *(U32 const *)lp) & mask;
247
+ U64 const rhs = MEM_readLE64(ctx->samples + *(U32 const *)rp) & mask;
248
+ if (lhs < rhs) {
249
+ return -1;
250
+ }
251
+ return (lhs > rhs);
252
+ }
243
253
 
244
254
  /**
245
255
  * Same as COVER_cmp() except ties are broken by pointer value
@@ -253,6 +263,16 @@ static int COVER_strict_cmp(const void *lp, const void *rp) {
253
263
  }
254
264
  return result;
255
265
  }
266
+ /**
267
+ * Faster version for d <= 8.
268
+ */
269
+ static int COVER_strict_cmp8(const void *lp, const void *rp) {
270
+ int result = COVER_cmp8(g_ctx, lp, rp);
271
+ if (result == 0) {
272
+ result = lp < rp ? -1 : 1;
273
+ }
274
+ return result;
275
+ }
256
276
 
257
277
  /**
258
278
  * Returns the first pointer in [first, last) whose element does not compare
@@ -508,7 +528,7 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
508
528
  const BYTE *const samples = (const BYTE *)samplesBuffer;
509
529
  const size_t totalSamplesSize = COVER_sum(samplesSizes, nbSamples);
510
530
  /* Checks */
511
- if (totalSamplesSize < d ||
531
+ if (totalSamplesSize < MAX(d, sizeof(U64)) ||
512
532
  totalSamplesSize >= (size_t)COVER_MAX_SAMPLES_SIZE) {
513
533
  DISPLAYLEVEL(1, "Total samples size is too large, maximum size is %u MB\n",
514
534
  (COVER_MAX_SAMPLES_SIZE >> 20));
@@ -522,7 +542,7 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
522
542
  ctx->samplesSizes = samplesSizes;
523
543
  ctx->nbSamples = nbSamples;
524
544
  /* Partial suffix array */
525
- ctx->suffixSize = totalSamplesSize - d + 1;
545
+ ctx->suffixSize = totalSamplesSize - MAX(d, sizeof(U64)) + 1;
526
546
  ctx->suffix = (U32 *)malloc(ctx->suffixSize * sizeof(U32));
527
547
  /* Maps index to the dmerID */
528
548
  ctx->dmerAt = (U32 *)malloc(ctx->suffixSize * sizeof(U32));
@@ -556,7 +576,8 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
556
576
  }
557
577
  /* qsort doesn't take an opaque pointer, so pass as a global */
558
578
  g_ctx = ctx;
559
- qsort(ctx->suffix, ctx->suffixSize, sizeof(U32), &COVER_strict_cmp);
579
+ qsort(ctx->suffix, ctx->suffixSize, sizeof(U32),
580
+ (ctx->d <= 8 ? &COVER_strict_cmp8 : &COVER_strict_cmp));
560
581
  }
561
582
  DISPLAYLEVEL(2, "Computing frequencies\n");
562
583
  /* For each dmer group (group of positions with the same first d bytes):
@@ -566,8 +587,8 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
566
587
  * 2. We calculate how many samples the dmer occurs in and save it in
567
588
  * freqs[dmerId].
568
589
  */
569
- COVER_groupBy(ctx->suffix, ctx->suffixSize, sizeof(U32), ctx, &COVER_cmp,
570
- &COVER_group);
590
+ COVER_groupBy(ctx->suffix, ctx->suffixSize, sizeof(U32), ctx,
591
+ (ctx->d <= 8 ? &COVER_cmp8 : &COVER_cmp), &COVER_group);
571
592
  ctx->freqs = ctx->suffix;
572
593
  ctx->suffix = NULL;
573
594
  return 1;
@@ -918,10 +939,10 @@ ZDICTLIB_API size_t COVER_optimizeTrainFromBuffer(void *dictBuffer,
918
939
  /* constants */
919
940
  const unsigned nbThreads = parameters->nbThreads;
920
941
  const unsigned kMinD = parameters->d == 0 ? 6 : parameters->d;
921
- const unsigned kMaxD = parameters->d == 0 ? 16 : parameters->d;
922
- const unsigned kMinK = parameters->k == 0 ? kMaxD : parameters->k;
923
- const unsigned kMaxK = parameters->k == 0 ? 2048 : parameters->k;
924
- const unsigned kSteps = parameters->steps == 0 ? 32 : parameters->steps;
942
+ const unsigned kMaxD = parameters->d == 0 ? 8 : parameters->d;
943
+ const unsigned kMinK = parameters->k == 0 ? 50 : parameters->k;
944
+ const unsigned kMaxK = parameters->k == 0 ? 2000 : parameters->k;
945
+ const unsigned kSteps = parameters->steps == 0 ? 40 : parameters->steps;
925
946
  const unsigned kStepSize = MAX((kMaxK - kMinK) / kSteps, 1);
926
947
  const unsigned kIterations =
927
948
  (1 + (kMaxD - kMinD) / 2) * (1 + (kMaxK - kMinK) / kStepSize);
@@ -11,8 +11,9 @@
11
11
  /*-**************************************
12
12
  * Tuning parameters
13
13
  ****************************************/
14
+ #define MINRATIO 4 /* minimum nb of apparition to be selected in dictionary */
14
15
  #define ZDICT_MAX_SAMPLES_SIZE (2000U << 20)
15
- #define ZDICT_MIN_SAMPLES_SIZE 512
16
+ #define ZDICT_MIN_SAMPLES_SIZE (ZDICT_CONTENTSIZE_MIN * MINRATIO)
16
17
 
17
18
 
18
19
  /*-**************************************
@@ -59,11 +60,8 @@
59
60
 
60
61
  #define NOISELENGTH 32
61
62
 
62
- #define MINRATIO 4
63
63
  static const int g_compressionLevel_default = 6;
64
64
  static const U32 g_selectivity_default = 9;
65
- static const size_t g_provision_entropySize = 200;
66
- static const size_t g_min_fast_dictContent = 192;
67
65
 
68
66
 
69
67
  /*-*************************************
@@ -308,10 +306,10 @@ static dictItem ZDICT_analyzePos(
308
306
  /* look backward */
309
307
  length = MINMATCHLENGTH;
310
308
  while ((length >= MINMATCHLENGTH) & (start > 0)) {
311
- length = ZDICT_count(b + pos, b + suffix[start - 1]);
312
- if (length >= LLIMIT) length = LLIMIT - 1;
313
- lengthList[length]++;
314
- if (length >= MINMATCHLENGTH) start--;
309
+ length = ZDICT_count(b + pos, b + suffix[start - 1]);
310
+ if (length >= LLIMIT) length = LLIMIT - 1;
311
+ lengthList[length]++;
312
+ if (length >= MINMATCHLENGTH) start--;
315
313
  }
316
314
 
317
315
  /* largest useful length */
@@ -363,21 +361,35 @@ static dictItem ZDICT_analyzePos(
363
361
  }
364
362
 
365
363
 
364
+ static int isIncluded(const void* in, const void* container, size_t length)
365
+ {
366
+ const char* const ip = (const char*) in;
367
+ const char* const into = (const char*) container;
368
+ size_t u;
369
+
370
+ for (u=0; u<length; u++) { /* works because end of buffer is a noisy guard band */
371
+ if (ip[u] != into[u]) break;
372
+ }
373
+
374
+ return u==length;
375
+ }
376
+
366
377
  /*! ZDICT_checkMerge
367
378
  check if dictItem can be merged, do it if possible
368
379
  @return : id of destination elt, 0 if not merged
369
380
  */
370
- static U32 ZDICT_checkMerge(dictItem* table, dictItem elt, U32 eltNbToSkip)
381
+ static U32 ZDICT_tryMerge(dictItem* table, dictItem elt, U32 eltNbToSkip, const void* buffer)
371
382
  {
372
383
  const U32 tableSize = table->pos;
373
384
  const U32 eltEnd = elt.pos + elt.length;
385
+ const char* const buf = (const char*) buffer;
374
386
 
375
387
  /* tail overlap */
376
388
  U32 u; for (u=1; u<tableSize; u++) {
377
389
  if (u==eltNbToSkip) continue;
378
390
  if ((table[u].pos > elt.pos) && (table[u].pos <= eltEnd)) { /* overlap, existing > new */
379
391
  /* append */
380
- U32 addedLength = table[u].pos - elt.pos;
392
+ U32 const addedLength = table[u].pos - elt.pos;
381
393
  table[u].length += addedLength;
382
394
  table[u].pos = elt.pos;
383
395
  table[u].savings += elt.savings * addedLength / elt.length; /* rough approx */
@@ -393,9 +405,10 @@ static U32 ZDICT_checkMerge(dictItem* table, dictItem elt, U32 eltNbToSkip)
393
405
  /* front overlap */
394
406
  for (u=1; u<tableSize; u++) {
395
407
  if (u==eltNbToSkip) continue;
408
+
396
409
  if ((table[u].pos + table[u].length >= elt.pos) && (table[u].pos < elt.pos)) { /* overlap, existing < new */
397
410
  /* append */
398
- int addedLength = (int)eltEnd - (table[u].pos + table[u].length);
411
+ int const addedLength = (int)eltEnd - (table[u].pos + table[u].length);
399
412
  table[u].savings += elt.length / 8; /* rough approx bonus */
400
413
  if (addedLength > 0) { /* otherwise, elt fully included into existing */
401
414
  table[u].length += addedLength;
@@ -407,7 +420,18 @@ static U32 ZDICT_checkMerge(dictItem* table, dictItem elt, U32 eltNbToSkip)
407
420
  table[u] = table[u-1], u--;
408
421
  table[u] = elt;
409
422
  return u;
410
- } }
423
+ }
424
+
425
+ if (MEM_read64(buf + table[u].pos) == MEM_read64(buf + elt.pos + 1)) {
426
+ if (isIncluded(buf + table[u].pos, buf + elt.pos + 1, table[u].length)) {
427
+ size_t const addedLength = MAX( (int)elt.length - (int)table[u].length , 1 );
428
+ table[u].pos = elt.pos;
429
+ table[u].savings += (U32)(elt.savings * addedLength / elt.length);
430
+ table[u].length = MIN(elt.length, table[u].length + 1);
431
+ return u;
432
+ }
433
+ }
434
+ }
411
435
 
412
436
  return 0;
413
437
  }
@@ -425,14 +449,14 @@ static void ZDICT_removeDictItem(dictItem* table, U32 id)
425
449
  }
426
450
 
427
451
 
428
- static void ZDICT_insertDictItem(dictItem* table, U32 maxSize, dictItem elt)
452
+ static void ZDICT_insertDictItem(dictItem* table, U32 maxSize, dictItem elt, const void* buffer)
429
453
  {
430
454
  /* merge if possible */
431
- U32 mergeId = ZDICT_checkMerge(table, elt, 0);
455
+ U32 mergeId = ZDICT_tryMerge(table, elt, 0, buffer);
432
456
  if (mergeId) {
433
457
  U32 newMerge = 1;
434
458
  while (newMerge) {
435
- newMerge = ZDICT_checkMerge(table, table[mergeId], mergeId);
459
+ newMerge = ZDICT_tryMerge(table, table[mergeId], mergeId, buffer);
436
460
  if (newMerge) ZDICT_removeDictItem(table, mergeId);
437
461
  mergeId = newMerge;
438
462
  }
@@ -480,7 +504,7 @@ static size_t ZDICT_trainBuffer(dictItem* dictList, U32 dictListSize,
480
504
  # define DISPLAYUPDATE(l, ...) if (notificationLevel>=l) { \
481
505
  if (ZDICT_clockSpan(displayClock) > refreshRate) \
482
506
  { displayClock = clock(); DISPLAY(__VA_ARGS__); \
483
- if (notificationLevel>=4) fflush(stdout); } }
507
+ if (notificationLevel>=4) fflush(stderr); } }
484
508
 
485
509
  /* init */
486
510
  DISPLAYLEVEL(2, "\r%70s\r", ""); /* clean display line */
@@ -521,7 +545,7 @@ static size_t ZDICT_trainBuffer(dictItem* dictList, U32 dictListSize,
521
545
  if (doneMarks[cursor]) { cursor++; continue; }
522
546
  solution = ZDICT_analyzePos(doneMarks, suffix, reverseSuffix[cursor], buffer, minRatio, notificationLevel);
523
547
  if (solution.length==0) { cursor++; continue; }
524
- ZDICT_insertDictItem(dictList, dictListSize, solution);
548
+ ZDICT_insertDictItem(dictList, dictListSize, solution, buffer);
525
549
  cursor += solution.length;
526
550
  DISPLAYUPDATE(2, "\r%4.2f %% \r", (double)cursor / bufferSize * 100);
527
551
  } }
@@ -683,19 +707,19 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
683
707
  goto _cleanup;
684
708
  }
685
709
  if (offcodeMax>OFFCODE_MAX) { eSize = ERROR(dictionary_wrong); goto _cleanup; } /* too large dictionary */
686
- for (u=0; u<256; u++) countLit[u]=1; /* any character must be described */
687
- for (u=0; u<=offcodeMax; u++) offcodeCount[u]=1;
688
- for (u=0; u<=MaxML; u++) matchLengthCount[u]=1;
689
- for (u=0; u<=MaxLL; u++) litLengthCount[u]=1;
710
+ for (u=0; u<256; u++) countLit[u] = 1; /* any character must be described */
711
+ for (u=0; u<=offcodeMax; u++) offcodeCount[u] = 1;
712
+ for (u=0; u<=MaxML; u++) matchLengthCount[u] = 1;
713
+ for (u=0; u<=MaxLL; u++) litLengthCount[u] = 1;
690
714
  memset(repOffset, 0, sizeof(repOffset));
691
715
  repOffset[1] = repOffset[4] = repOffset[8] = 1;
692
716
  memset(bestRepOffset, 0, sizeof(bestRepOffset));
693
- if (compressionLevel==0) compressionLevel=g_compressionLevel_default;
717
+ if (compressionLevel==0) compressionLevel = g_compressionLevel_default;
694
718
  params = ZSTD_getParams(compressionLevel, averageSampleSize, dictBufferSize);
695
719
  { size_t const beginResult = ZSTD_compressBegin_advanced(esr.ref, dictBuffer, dictBufferSize, params, 0);
696
- if (ZSTD_isError(beginResult)) {
720
+ if (ZSTD_isError(beginResult)) {
721
+ DISPLAYLEVEL(1, "error : ZSTD_compressBegin_advanced() failed : %s \n", ZSTD_getErrorName(beginResult));
697
722
  eSize = ERROR(GENERIC);
698
- DISPLAYLEVEL(1, "error : ZSTD_compressBegin_advanced failed \n");
699
723
  goto _cleanup;
700
724
  } }
701
725
 
@@ -812,7 +836,6 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
812
836
  MEM_writeLE32(dstPtr+4, repStartValue[1]);
813
837
  MEM_writeLE32(dstPtr+8, repStartValue[2]);
814
838
  #endif
815
- //dstPtr += 12;
816
839
  eSize += 12;
817
840
 
818
841
  _cleanup:
@@ -831,7 +854,7 @@ size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity,
831
854
  ZDICT_params_t params)
832
855
  {
833
856
  size_t hSize;
834
- #define HBUFFSIZE 256
857
+ #define HBUFFSIZE 256 /* should prove large enough for all entropy headers */
835
858
  BYTE header[HBUFFSIZE];
836
859
  int const compressionLevel = (params.compressionLevel <= 0) ? g_compressionLevel_default : params.compressionLevel;
837
860
  U32 const notificationLevel = params.notificationLevel;
@@ -877,20 +900,11 @@ size_t ZDICT_addEntropyTablesFromBuffer_advanced(void* dictBuffer, size_t dictCo
877
900
  const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
878
901
  ZDICT_params_t params)
879
902
  {
880
- size_t hSize;
881
903
  int const compressionLevel = (params.compressionLevel <= 0) ? g_compressionLevel_default : params.compressionLevel;
882
904
  U32 const notificationLevel = params.notificationLevel;
905
+ size_t hSize = 8;
883
906
 
884
- /* dictionary header */
885
- MEM_writeLE32(dictBuffer, ZSTD_DICT_MAGIC);
886
- { U64 const randomID = XXH64((char*)dictBuffer + dictBufferCapacity - dictContentSize, dictContentSize, 0);
887
- U32 const compliantID = (randomID % ((1U<<31)-32768)) + 32768;
888
- U32 const dictID = params.dictID ? params.dictID : compliantID;
889
- MEM_writeLE32((char*)dictBuffer+4, dictID);
890
- }
891
- hSize = 8;
892
-
893
- /* entropy tables */
907
+ /* calculate entropy tables */
894
908
  DISPLAYLEVEL(2, "\r%70s\r", ""); /* clean display line */
895
909
  DISPLAYLEVEL(2, "statistics ... \n");
896
910
  { size_t const eSize = ZDICT_analyzeEntropy((char*)dictBuffer+hSize, dictBufferCapacity-hSize,
@@ -902,6 +916,13 @@ size_t ZDICT_addEntropyTablesFromBuffer_advanced(void* dictBuffer, size_t dictCo
902
916
  hSize += eSize;
903
917
  }
904
918
 
919
+ /* add dictionary header (after entropy tables) */
920
+ MEM_writeLE32(dictBuffer, ZSTD_DICT_MAGIC);
921
+ { U64 const randomID = XXH64((char*)dictBuffer + dictBufferCapacity - dictContentSize, dictContentSize, 0);
922
+ U32 const compliantID = (randomID % ((1U<<31)-32768)) + 32768;
923
+ U32 const dictID = params.dictID ? params.dictID : compliantID;
924
+ MEM_writeLE32((char*)dictBuffer+4, dictID);
925
+ }
905
926
 
906
927
  if (hSize + dictContentSize < dictBufferCapacity)
907
928
  memmove((char*)dictBuffer + hSize, (char*)dictBuffer + dictBufferCapacity - dictContentSize, dictContentSize);
@@ -929,8 +950,8 @@ size_t ZDICT_trainFromBuffer_unsafe(
929
950
 
930
951
  /* checks */
931
952
  if (!dictList) return ERROR(memory_allocation);
932
- if (maxDictSize <= g_provision_entropySize + g_min_fast_dictContent) { free(dictList); return ERROR(dstSize_tooSmall); }
933
- if (samplesBuffSize < ZDICT_MIN_SAMPLES_SIZE) { free(dictList); return 0; } /* not enough source to create dictionary */
953
+ if (maxDictSize < ZDICT_DICTSIZE_MIN) { free(dictList); return ERROR(dstSize_tooSmall); } /* requested dictionary size is too small */
954
+ if (samplesBuffSize < ZDICT_MIN_SAMPLES_SIZE) { free(dictList); return ERROR(dictionaryCreation_failed); } /* not enough source to create dictionary */
934
955
 
935
956
  /* init */
936
957
  ZDICT_initDictItem(dictList);
@@ -963,14 +984,15 @@ size_t ZDICT_trainFromBuffer_unsafe(
963
984
 
964
985
  /* create dictionary */
965
986
  { U32 dictContentSize = ZDICT_dictSize(dictList);
966
- if (dictContentSize < targetDictSize/3) {
987
+ if (dictContentSize < ZDICT_CONTENTSIZE_MIN) { free(dictList); return ERROR(dictionaryCreation_failed); } /* dictionary content too small */
988
+ if (dictContentSize < targetDictSize/4) {
967
989
  DISPLAYLEVEL(2, "! warning : selected content significantly smaller than requested (%u < %u) \n", dictContentSize, (U32)maxDictSize);
990
+ if (samplesBuffSize < 10 * targetDictSize)
991
+ DISPLAYLEVEL(2, "! consider increasing the number of samples (total size : %u MB)\n", (U32)(samplesBuffSize>>20));
968
992
  if (minRep > MINRATIO) {
969
993
  DISPLAYLEVEL(2, "! consider increasing selectivity to produce larger dictionary (-s%u) \n", selectivity+1);
970
994
  DISPLAYLEVEL(2, "! note : larger dictionaries are not necessarily better, test its efficiency on samples \n");
971
995
  }
972
- if (samplesBuffSize < 10 * targetDictSize)
973
- DISPLAYLEVEL(2, "! consider increasing the number of samples (total size : %u MB)\n", (U32)(samplesBuffSize>>20));
974
996
  }
975
997
 
976
998
  if ((dictContentSize > targetDictSize*3) && (nbSamples > 2*MINRATIO) && (selectivity>1)) {
@@ -978,7 +1000,7 @@ size_t ZDICT_trainFromBuffer_unsafe(
978
1000
  while ((nbSamples >> proposedSelectivity) <= MINRATIO) { proposedSelectivity--; }
979
1001
  DISPLAYLEVEL(2, "! note : calculated dictionary significantly larger than requested (%u > %u) \n", dictContentSize, (U32)maxDictSize);
980
1002
  DISPLAYLEVEL(2, "! consider increasing dictionary size, or produce denser dictionary (-s%u) \n", proposedSelectivity);
981
- DISPLAYLEVEL(2, "! always test dictionary efficiency on samples \n");
1003
+ DISPLAYLEVEL(2, "! always test dictionary efficiency on real samples \n");
982
1004
  }
983
1005
 
984
1006
  /* limit dictionary size */
@@ -88,7 +88,7 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_advanced(void* dictBuffer, size_t dict
88
88
 
89
89
  /*! COVER_params_t :
90
90
  For all values 0 means default.
91
- kMin and d are the only required parameters.
91
+ k and d are the only required parameters.
92
92
  */
93
93
  typedef struct {
94
94
  unsigned k; /* Segment size : constraint: 0 < k : Reasonable range [16, 2048+] */
@@ -147,18 +147,18 @@ ZDICTLIB_API size_t COVER_optimizeTrainFromBuffer(void* dictBuffer, size_t dictB
147
147
  Samples must be stored concatenated in a flat buffer `samplesBuffer`,
148
148
  supplied with an array of sizes `samplesSizes`, providing the size of each sample in order.
149
149
 
150
- dictContentSize must be > ZDICT_CONTENTSIZE_MIN bytes.
151
- maxDictSize must be >= dictContentSize, and must be > ZDICT_DICTSIZE_MIN bytes.
150
+ dictContentSize must be >= ZDICT_CONTENTSIZE_MIN bytes.
151
+ maxDictSize must be >= dictContentSize, and must be >= ZDICT_DICTSIZE_MIN bytes.
152
152
 
153
153
  @return : size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`),
154
154
  or an error code, which can be tested by ZDICT_isError().
155
155
  note : ZDICT_finalizeDictionary() will push notifications into stderr if instructed to, using notificationLevel>0.
156
- note 2 : dictBuffer and customDictContent can overlap
156
+ note 2 : dictBuffer and dictContent can overlap
157
157
  */
158
- #define ZDICT_CONTENTSIZE_MIN 256
159
- #define ZDICT_DICTSIZE_MIN 512
158
+ #define ZDICT_CONTENTSIZE_MIN 128
159
+ #define ZDICT_DICTSIZE_MIN 256
160
160
  ZDICTLIB_API size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity,
161
- const void* customDictContent, size_t dictContentSize,
161
+ const void* dictContent, size_t dictContentSize,
162
162
  const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
163
163
  ZDICT_params_t parameters);
164
164
 
@@ -4,11 +4,11 @@ ZSTD Windows binary package
4
4
  #### The package contents
5
5
 
6
6
  - `zstd.exe` : Command Line Utility, supporting gzip-like arguments
7
- - `dll\libzstd.dll` : The DLL of ZSTD library
8
- - `dll\libzstd.lib` : The import library of ZSTD library for Visual C++
9
- - `example\` : The example of usage of ZSTD library
10
- - `include\` : Header files required with ZSTD library
11
- - `static\libzstd_static.lib` : The static ZSTD library
7
+ - `dll\libzstd.dll` : The ZSTD dynamic library (DLL)
8
+ - `dll\libzstd.lib` : The import library of the ZSTD dynamic library (DLL) for Visual C++
9
+ - `example\` : The example of usage of the ZSTD library
10
+ - `include\` : Header files required by the ZSTD library
11
+ - `static\libzstd_static.lib` : The static ZSTD library (LIB)
12
12
 
13
13
 
14
14
  #### Usage of Command Line Interface
@@ -9,6 +9,7 @@ COPY lib\common\mem.h bin\example\
9
9
  COPY lib\common\zstd_errors.h bin\example\
10
10
  COPY lib\common\zstd_internal.h bin\example\
11
11
  COPY lib\common\error_private.h bin\example\
12
+ COPY lib\common\xxhash.h bin\example\
12
13
  COPY lib\zstd.h bin\include\
13
14
  COPY lib\libzstd.a bin\static\libzstd_static.lib
14
15
  COPY lib\dll\libzstd.* bin\dll\
@@ -1432,7 +1432,7 @@ typedef struct ZSTD_Cctx_s
1432
1432
  #else
1433
1433
  U32 hashTable[HASH_TABLESIZE];
1434
1434
  #endif
1435
- BYTE buffer[WORKPLACESIZE];
1435
+ BYTE buffer[WORKPLACESIZE];
1436
1436
  } cctxi_t;
1437
1437
 
1438
1438
 
@@ -475,8 +475,8 @@ MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, U32 nbBits)
475
475
 
476
476
  MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD)
477
477
  {
478
- if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8)) /* should never happen */
479
- return BIT_DStream_overflow;
478
+ if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8)) /* should never happen */
479
+ return BIT_DStream_overflow;
480
480
 
481
481
  if (bitD->ptr >= bitD->start + sizeof(bitD->bitContainer))
482
482
  {
@@ -1334,8 +1334,8 @@ static size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsi
1334
1334
  else
1335
1335
  {
1336
1336
  bitCount -= (int)(8 * (iend - 4 - ip));
1337
- ip = iend - 4;
1338
- }
1337
+ ip = iend - 4;
1338
+ }
1339
1339
  bitStream = MEM_readLE32(ip) >> (bitCount & 31);
1340
1340
  }
1341
1341
  }
@@ -2040,7 +2040,7 @@ static size_t HUF_readDTableX4 (U32* DTable, const void* src, size_t srcSize)
2040
2040
  rankStart[0] = 0; /* forget 0w symbols; this is beginning of weight(1) */
2041
2041
  }
2042
2042
 
2043
- /* Build rankVal */
2043
+ /* Build rankVal */
2044
2044
  {
2045
2045
  const U32 minBits = tableLog+1 - maxW;
2046
2046
  U32 nextRankVal = 0;
@@ -2374,7 +2374,7 @@ static size_t HUF_readDTableX6 (U32* DTable, const void* src, size_t srcSize)
2374
2374
  rankStart[0] = 0; /* forget 0w symbols; this is beginning of weight(1) */
2375
2375
  }
2376
2376
 
2377
- /* Build rankVal */
2377
+ /* Build rankVal */
2378
2378
  {
2379
2379
  const U32 minBits = tableLog+1 - maxW;
2380
2380
  U32 nextRankVal = 0;
@@ -2948,14 +2948,14 @@ static size_t ZSTD_decodeLiteralsBlock(void* ctx,
2948
2948
  const size_t litSize = (MEM_readLE32(istart) & 0xFFFFFF) >> 2; /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */
2949
2949
  if (litSize > srcSize-11) /* risk of reading too far with wildcopy */
2950
2950
  {
2951
- if (litSize > srcSize-3) return ERROR(corruption_detected);
2952
- memcpy(dctx->litBuffer, istart, litSize);
2953
- dctx->litPtr = dctx->litBuffer;
2954
- dctx->litSize = litSize;
2955
- memset(dctx->litBuffer + dctx->litSize, 0, 8);
2956
- return litSize+3;
2957
- }
2958
- /* direct reference into compressed stream */
2951
+ if (litSize > srcSize-3) return ERROR(corruption_detected);
2952
+ memcpy(dctx->litBuffer, istart, litSize);
2953
+ dctx->litPtr = dctx->litBuffer;
2954
+ dctx->litSize = litSize;
2955
+ memset(dctx->litBuffer + dctx->litSize, 0, 8);
2956
+ return litSize+3;
2957
+ }
2958
+ /* direct reference into compressed stream */
2959
2959
  dctx->litPtr = istart+3;
2960
2960
  dctx->litSize = litSize;
2961
2961
  return litSize+3;
@@ -3515,13 +3515,13 @@ static size_t ZSTD_decompressContinue(ZSTD_DCtx* ctx, void* dst, size_t maxDstSi
3515
3515
 
3516
3516
  unsigned ZSTDv02_isError(size_t code)
3517
3517
  {
3518
- return ZSTD_isError(code);
3518
+ return ZSTD_isError(code);
3519
3519
  }
3520
3520
 
3521
3521
  size_t ZSTDv02_decompress( void* dst, size_t maxOriginalSize,
3522
3522
  const void* src, size_t compressedSize)
3523
3523
  {
3524
- return ZSTD_decompress(dst, maxOriginalSize, src, compressedSize);
3524
+ return ZSTD_decompress(dst, maxOriginalSize, src, compressedSize);
3525
3525
  }
3526
3526
 
3527
3527
  size_t ZSTDv02_findFrameCompressedSize(const void *src, size_t compressedSize)
@@ -3531,25 +3531,25 @@ size_t ZSTDv02_findFrameCompressedSize(const void *src, size_t compressedSize)
3531
3531
 
3532
3532
  ZSTDv02_Dctx* ZSTDv02_createDCtx(void)
3533
3533
  {
3534
- return (ZSTDv02_Dctx*)ZSTD_createDCtx();
3534
+ return (ZSTDv02_Dctx*)ZSTD_createDCtx();
3535
3535
  }
3536
3536
 
3537
3537
  size_t ZSTDv02_freeDCtx(ZSTDv02_Dctx* dctx)
3538
3538
  {
3539
- return ZSTD_freeDCtx((ZSTD_DCtx*)dctx);
3539
+ return ZSTD_freeDCtx((ZSTD_DCtx*)dctx);
3540
3540
  }
3541
3541
 
3542
3542
  size_t ZSTDv02_resetDCtx(ZSTDv02_Dctx* dctx)
3543
3543
  {
3544
- return ZSTD_resetDCtx((ZSTD_DCtx*)dctx);
3544
+ return ZSTD_resetDCtx((ZSTD_DCtx*)dctx);
3545
3545
  }
3546
3546
 
3547
3547
  size_t ZSTDv02_nextSrcSizeToDecompress(ZSTDv02_Dctx* dctx)
3548
3548
  {
3549
- return ZSTD_nextSrcSizeToDecompress((ZSTD_DCtx*)dctx);
3549
+ return ZSTD_nextSrcSizeToDecompress((ZSTD_DCtx*)dctx);
3550
3550
  }
3551
3551
 
3552
3552
  size_t ZSTDv02_decompressContinue(ZSTDv02_Dctx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
3553
3553
  {
3554
- return ZSTD_decompressContinue((ZSTD_DCtx*)dctx, dst, maxDstSize, src, srcSize);
3554
+ return ZSTD_decompressContinue((ZSTD_DCtx*)dctx, dst, maxDstSize, src, srcSize);
3555
3555
  }