zstd-ruby 1.1.4.0 → 1.2.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (31) hide show
  1. checksums.yaml +4 -4
  2. data/.travis.yml +1 -1
  3. data/README.md +1 -1
  4. data/ext/zstdruby/libzstd/Makefile +11 -1
  5. data/ext/zstdruby/libzstd/README.md +8 -0
  6. data/ext/zstdruby/libzstd/common/bitstream.h +56 -27
  7. data/ext/zstdruby/libzstd/common/error_private.c +2 -1
  8. data/ext/zstdruby/libzstd/common/fse.h +7 -3
  9. data/ext/zstdruby/libzstd/common/huf.h +42 -19
  10. data/ext/zstdruby/libzstd/common/mem.h +2 -3
  11. data/ext/zstdruby/libzstd/common/zstd_errors.h +1 -0
  12. data/ext/zstdruby/libzstd/common/zstd_internal.h +3 -2
  13. data/ext/zstdruby/libzstd/compress/fse_compress.c +10 -10
  14. data/ext/zstdruby/libzstd/compress/zstd_compress.c +455 -244
  15. data/ext/zstdruby/libzstd/compress/zstd_opt.h +6 -4
  16. data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +40 -28
  17. data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +115 -219
  18. data/ext/zstdruby/libzstd/dictBuilder/cover.c +34 -13
  19. data/ext/zstdruby/libzstd/dictBuilder/zdict.c +65 -43
  20. data/ext/zstdruby/libzstd/dictBuilder/zdict.h +7 -7
  21. data/ext/zstdruby/libzstd/dll/example/README.md +5 -5
  22. data/ext/zstdruby/libzstd/dll/example/build_package.bat +1 -0
  23. data/ext/zstdruby/libzstd/legacy/zstd_v01.c +1 -1
  24. data/ext/zstdruby/libzstd/legacy/zstd_v02.c +21 -21
  25. data/ext/zstdruby/libzstd/legacy/zstd_v03.c +20 -20
  26. data/ext/zstdruby/libzstd/legacy/zstd_v04.c +4 -4
  27. data/ext/zstdruby/libzstd/legacy/zstd_v05.c +2 -2
  28. data/ext/zstdruby/libzstd/legacy/zstd_v06.c +2 -2
  29. data/ext/zstdruby/libzstd/zstd.h +88 -68
  30. data/lib/zstd-ruby/version.rb +1 -1
  31. metadata +3 -3
@@ -59,8 +59,6 @@ static int g_displayLevel = 2;
59
59
  if ((clock() - g_time > refreshRate) || (displayLevel >= 4)) { \
60
60
  g_time = clock(); \
61
61
  DISPLAY(__VA_ARGS__); \
62
- if (displayLevel >= 4) \
63
- fflush(stdout); \
64
62
  } \
65
63
  }
66
64
  #define DISPLAYUPDATE(l, ...) LOCALDISPLAYUPDATE(g_displayLevel, l, __VA_ARGS__)
@@ -236,10 +234,22 @@ static size_t COVER_sum(const size_t *samplesSizes, unsigned nbSamples) {
236
234
  * Returns 1 if the dmer at lp is greater than the dmer at rp.
237
235
  */
238
236
  static int COVER_cmp(COVER_ctx_t *ctx, const void *lp, const void *rp) {
239
- const U32 lhs = *(const U32 *)lp;
240
- const U32 rhs = *(const U32 *)rp;
237
+ U32 const lhs = *(U32 const *)lp;
238
+ U32 const rhs = *(U32 const *)rp;
241
239
  return memcmp(ctx->samples + lhs, ctx->samples + rhs, ctx->d);
242
240
  }
241
+ /**
242
+ * Faster version for d <= 8.
243
+ */
244
+ static int COVER_cmp8(COVER_ctx_t *ctx, const void *lp, const void *rp) {
245
+ U64 const mask = (ctx->d == 8) ? (U64)-1 : (((U64)1 << (8 * ctx->d)) - 1);
246
+ U64 const lhs = MEM_readLE64(ctx->samples + *(U32 const *)lp) & mask;
247
+ U64 const rhs = MEM_readLE64(ctx->samples + *(U32 const *)rp) & mask;
248
+ if (lhs < rhs) {
249
+ return -1;
250
+ }
251
+ return (lhs > rhs);
252
+ }
243
253
 
244
254
  /**
245
255
  * Same as COVER_cmp() except ties are broken by pointer value
@@ -253,6 +263,16 @@ static int COVER_strict_cmp(const void *lp, const void *rp) {
253
263
  }
254
264
  return result;
255
265
  }
266
+ /**
267
+ * Faster version for d <= 8.
268
+ */
269
+ static int COVER_strict_cmp8(const void *lp, const void *rp) {
270
+ int result = COVER_cmp8(g_ctx, lp, rp);
271
+ if (result == 0) {
272
+ result = lp < rp ? -1 : 1;
273
+ }
274
+ return result;
275
+ }
256
276
 
257
277
  /**
258
278
  * Returns the first pointer in [first, last) whose element does not compare
@@ -508,7 +528,7 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
508
528
  const BYTE *const samples = (const BYTE *)samplesBuffer;
509
529
  const size_t totalSamplesSize = COVER_sum(samplesSizes, nbSamples);
510
530
  /* Checks */
511
- if (totalSamplesSize < d ||
531
+ if (totalSamplesSize < MAX(d, sizeof(U64)) ||
512
532
  totalSamplesSize >= (size_t)COVER_MAX_SAMPLES_SIZE) {
513
533
  DISPLAYLEVEL(1, "Total samples size is too large, maximum size is %u MB\n",
514
534
  (COVER_MAX_SAMPLES_SIZE >> 20));
@@ -522,7 +542,7 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
522
542
  ctx->samplesSizes = samplesSizes;
523
543
  ctx->nbSamples = nbSamples;
524
544
  /* Partial suffix array */
525
- ctx->suffixSize = totalSamplesSize - d + 1;
545
+ ctx->suffixSize = totalSamplesSize - MAX(d, sizeof(U64)) + 1;
526
546
  ctx->suffix = (U32 *)malloc(ctx->suffixSize * sizeof(U32));
527
547
  /* Maps index to the dmerID */
528
548
  ctx->dmerAt = (U32 *)malloc(ctx->suffixSize * sizeof(U32));
@@ -556,7 +576,8 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
556
576
  }
557
577
  /* qsort doesn't take an opaque pointer, so pass as a global */
558
578
  g_ctx = ctx;
559
- qsort(ctx->suffix, ctx->suffixSize, sizeof(U32), &COVER_strict_cmp);
579
+ qsort(ctx->suffix, ctx->suffixSize, sizeof(U32),
580
+ (ctx->d <= 8 ? &COVER_strict_cmp8 : &COVER_strict_cmp));
560
581
  }
561
582
  DISPLAYLEVEL(2, "Computing frequencies\n");
562
583
  /* For each dmer group (group of positions with the same first d bytes):
@@ -566,8 +587,8 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
566
587
  * 2. We calculate how many samples the dmer occurs in and save it in
567
588
  * freqs[dmerId].
568
589
  */
569
- COVER_groupBy(ctx->suffix, ctx->suffixSize, sizeof(U32), ctx, &COVER_cmp,
570
- &COVER_group);
590
+ COVER_groupBy(ctx->suffix, ctx->suffixSize, sizeof(U32), ctx,
591
+ (ctx->d <= 8 ? &COVER_cmp8 : &COVER_cmp), &COVER_group);
571
592
  ctx->freqs = ctx->suffix;
572
593
  ctx->suffix = NULL;
573
594
  return 1;
@@ -918,10 +939,10 @@ ZDICTLIB_API size_t COVER_optimizeTrainFromBuffer(void *dictBuffer,
918
939
  /* constants */
919
940
  const unsigned nbThreads = parameters->nbThreads;
920
941
  const unsigned kMinD = parameters->d == 0 ? 6 : parameters->d;
921
- const unsigned kMaxD = parameters->d == 0 ? 16 : parameters->d;
922
- const unsigned kMinK = parameters->k == 0 ? kMaxD : parameters->k;
923
- const unsigned kMaxK = parameters->k == 0 ? 2048 : parameters->k;
924
- const unsigned kSteps = parameters->steps == 0 ? 32 : parameters->steps;
942
+ const unsigned kMaxD = parameters->d == 0 ? 8 : parameters->d;
943
+ const unsigned kMinK = parameters->k == 0 ? 50 : parameters->k;
944
+ const unsigned kMaxK = parameters->k == 0 ? 2000 : parameters->k;
945
+ const unsigned kSteps = parameters->steps == 0 ? 40 : parameters->steps;
925
946
  const unsigned kStepSize = MAX((kMaxK - kMinK) / kSteps, 1);
926
947
  const unsigned kIterations =
927
948
  (1 + (kMaxD - kMinD) / 2) * (1 + (kMaxK - kMinK) / kStepSize);
@@ -11,8 +11,9 @@
11
11
  /*-**************************************
12
12
  * Tuning parameters
13
13
  ****************************************/
14
+ #define MINRATIO 4 /* minimum nb of apparition to be selected in dictionary */
14
15
  #define ZDICT_MAX_SAMPLES_SIZE (2000U << 20)
15
- #define ZDICT_MIN_SAMPLES_SIZE 512
16
+ #define ZDICT_MIN_SAMPLES_SIZE (ZDICT_CONTENTSIZE_MIN * MINRATIO)
16
17
 
17
18
 
18
19
  /*-**************************************
@@ -59,11 +60,8 @@
59
60
 
60
61
  #define NOISELENGTH 32
61
62
 
62
- #define MINRATIO 4
63
63
  static const int g_compressionLevel_default = 6;
64
64
  static const U32 g_selectivity_default = 9;
65
- static const size_t g_provision_entropySize = 200;
66
- static const size_t g_min_fast_dictContent = 192;
67
65
 
68
66
 
69
67
  /*-*************************************
@@ -308,10 +306,10 @@ static dictItem ZDICT_analyzePos(
308
306
  /* look backward */
309
307
  length = MINMATCHLENGTH;
310
308
  while ((length >= MINMATCHLENGTH) & (start > 0)) {
311
- length = ZDICT_count(b + pos, b + suffix[start - 1]);
312
- if (length >= LLIMIT) length = LLIMIT - 1;
313
- lengthList[length]++;
314
- if (length >= MINMATCHLENGTH) start--;
309
+ length = ZDICT_count(b + pos, b + suffix[start - 1]);
310
+ if (length >= LLIMIT) length = LLIMIT - 1;
311
+ lengthList[length]++;
312
+ if (length >= MINMATCHLENGTH) start--;
315
313
  }
316
314
 
317
315
  /* largest useful length */
@@ -363,21 +361,35 @@ static dictItem ZDICT_analyzePos(
363
361
  }
364
362
 
365
363
 
364
+ static int isIncluded(const void* in, const void* container, size_t length)
365
+ {
366
+ const char* const ip = (const char*) in;
367
+ const char* const into = (const char*) container;
368
+ size_t u;
369
+
370
+ for (u=0; u<length; u++) { /* works because end of buffer is a noisy guard band */
371
+ if (ip[u] != into[u]) break;
372
+ }
373
+
374
+ return u==length;
375
+ }
376
+
366
377
  /*! ZDICT_checkMerge
367
378
  check if dictItem can be merged, do it if possible
368
379
  @return : id of destination elt, 0 if not merged
369
380
  */
370
- static U32 ZDICT_checkMerge(dictItem* table, dictItem elt, U32 eltNbToSkip)
381
+ static U32 ZDICT_tryMerge(dictItem* table, dictItem elt, U32 eltNbToSkip, const void* buffer)
371
382
  {
372
383
  const U32 tableSize = table->pos;
373
384
  const U32 eltEnd = elt.pos + elt.length;
385
+ const char* const buf = (const char*) buffer;
374
386
 
375
387
  /* tail overlap */
376
388
  U32 u; for (u=1; u<tableSize; u++) {
377
389
  if (u==eltNbToSkip) continue;
378
390
  if ((table[u].pos > elt.pos) && (table[u].pos <= eltEnd)) { /* overlap, existing > new */
379
391
  /* append */
380
- U32 addedLength = table[u].pos - elt.pos;
392
+ U32 const addedLength = table[u].pos - elt.pos;
381
393
  table[u].length += addedLength;
382
394
  table[u].pos = elt.pos;
383
395
  table[u].savings += elt.savings * addedLength / elt.length; /* rough approx */
@@ -393,9 +405,10 @@ static U32 ZDICT_checkMerge(dictItem* table, dictItem elt, U32 eltNbToSkip)
393
405
  /* front overlap */
394
406
  for (u=1; u<tableSize; u++) {
395
407
  if (u==eltNbToSkip) continue;
408
+
396
409
  if ((table[u].pos + table[u].length >= elt.pos) && (table[u].pos < elt.pos)) { /* overlap, existing < new */
397
410
  /* append */
398
- int addedLength = (int)eltEnd - (table[u].pos + table[u].length);
411
+ int const addedLength = (int)eltEnd - (table[u].pos + table[u].length);
399
412
  table[u].savings += elt.length / 8; /* rough approx bonus */
400
413
  if (addedLength > 0) { /* otherwise, elt fully included into existing */
401
414
  table[u].length += addedLength;
@@ -407,7 +420,18 @@ static U32 ZDICT_checkMerge(dictItem* table, dictItem elt, U32 eltNbToSkip)
407
420
  table[u] = table[u-1], u--;
408
421
  table[u] = elt;
409
422
  return u;
410
- } }
423
+ }
424
+
425
+ if (MEM_read64(buf + table[u].pos) == MEM_read64(buf + elt.pos + 1)) {
426
+ if (isIncluded(buf + table[u].pos, buf + elt.pos + 1, table[u].length)) {
427
+ size_t const addedLength = MAX( (int)elt.length - (int)table[u].length , 1 );
428
+ table[u].pos = elt.pos;
429
+ table[u].savings += (U32)(elt.savings * addedLength / elt.length);
430
+ table[u].length = MIN(elt.length, table[u].length + 1);
431
+ return u;
432
+ }
433
+ }
434
+ }
411
435
 
412
436
  return 0;
413
437
  }
@@ -425,14 +449,14 @@ static void ZDICT_removeDictItem(dictItem* table, U32 id)
425
449
  }
426
450
 
427
451
 
428
- static void ZDICT_insertDictItem(dictItem* table, U32 maxSize, dictItem elt)
452
+ static void ZDICT_insertDictItem(dictItem* table, U32 maxSize, dictItem elt, const void* buffer)
429
453
  {
430
454
  /* merge if possible */
431
- U32 mergeId = ZDICT_checkMerge(table, elt, 0);
455
+ U32 mergeId = ZDICT_tryMerge(table, elt, 0, buffer);
432
456
  if (mergeId) {
433
457
  U32 newMerge = 1;
434
458
  while (newMerge) {
435
- newMerge = ZDICT_checkMerge(table, table[mergeId], mergeId);
459
+ newMerge = ZDICT_tryMerge(table, table[mergeId], mergeId, buffer);
436
460
  if (newMerge) ZDICT_removeDictItem(table, mergeId);
437
461
  mergeId = newMerge;
438
462
  }
@@ -480,7 +504,7 @@ static size_t ZDICT_trainBuffer(dictItem* dictList, U32 dictListSize,
480
504
  # define DISPLAYUPDATE(l, ...) if (notificationLevel>=l) { \
481
505
  if (ZDICT_clockSpan(displayClock) > refreshRate) \
482
506
  { displayClock = clock(); DISPLAY(__VA_ARGS__); \
483
- if (notificationLevel>=4) fflush(stdout); } }
507
+ if (notificationLevel>=4) fflush(stderr); } }
484
508
 
485
509
  /* init */
486
510
  DISPLAYLEVEL(2, "\r%70s\r", ""); /* clean display line */
@@ -521,7 +545,7 @@ static size_t ZDICT_trainBuffer(dictItem* dictList, U32 dictListSize,
521
545
  if (doneMarks[cursor]) { cursor++; continue; }
522
546
  solution = ZDICT_analyzePos(doneMarks, suffix, reverseSuffix[cursor], buffer, minRatio, notificationLevel);
523
547
  if (solution.length==0) { cursor++; continue; }
524
- ZDICT_insertDictItem(dictList, dictListSize, solution);
548
+ ZDICT_insertDictItem(dictList, dictListSize, solution, buffer);
525
549
  cursor += solution.length;
526
550
  DISPLAYUPDATE(2, "\r%4.2f %% \r", (double)cursor / bufferSize * 100);
527
551
  } }
@@ -683,19 +707,19 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
683
707
  goto _cleanup;
684
708
  }
685
709
  if (offcodeMax>OFFCODE_MAX) { eSize = ERROR(dictionary_wrong); goto _cleanup; } /* too large dictionary */
686
- for (u=0; u<256; u++) countLit[u]=1; /* any character must be described */
687
- for (u=0; u<=offcodeMax; u++) offcodeCount[u]=1;
688
- for (u=0; u<=MaxML; u++) matchLengthCount[u]=1;
689
- for (u=0; u<=MaxLL; u++) litLengthCount[u]=1;
710
+ for (u=0; u<256; u++) countLit[u] = 1; /* any character must be described */
711
+ for (u=0; u<=offcodeMax; u++) offcodeCount[u] = 1;
712
+ for (u=0; u<=MaxML; u++) matchLengthCount[u] = 1;
713
+ for (u=0; u<=MaxLL; u++) litLengthCount[u] = 1;
690
714
  memset(repOffset, 0, sizeof(repOffset));
691
715
  repOffset[1] = repOffset[4] = repOffset[8] = 1;
692
716
  memset(bestRepOffset, 0, sizeof(bestRepOffset));
693
- if (compressionLevel==0) compressionLevel=g_compressionLevel_default;
717
+ if (compressionLevel==0) compressionLevel = g_compressionLevel_default;
694
718
  params = ZSTD_getParams(compressionLevel, averageSampleSize, dictBufferSize);
695
719
  { size_t const beginResult = ZSTD_compressBegin_advanced(esr.ref, dictBuffer, dictBufferSize, params, 0);
696
- if (ZSTD_isError(beginResult)) {
720
+ if (ZSTD_isError(beginResult)) {
721
+ DISPLAYLEVEL(1, "error : ZSTD_compressBegin_advanced() failed : %s \n", ZSTD_getErrorName(beginResult));
697
722
  eSize = ERROR(GENERIC);
698
- DISPLAYLEVEL(1, "error : ZSTD_compressBegin_advanced failed \n");
699
723
  goto _cleanup;
700
724
  } }
701
725
 
@@ -812,7 +836,6 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
812
836
  MEM_writeLE32(dstPtr+4, repStartValue[1]);
813
837
  MEM_writeLE32(dstPtr+8, repStartValue[2]);
814
838
  #endif
815
- //dstPtr += 12;
816
839
  eSize += 12;
817
840
 
818
841
  _cleanup:
@@ -831,7 +854,7 @@ size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity,
831
854
  ZDICT_params_t params)
832
855
  {
833
856
  size_t hSize;
834
- #define HBUFFSIZE 256
857
+ #define HBUFFSIZE 256 /* should prove large enough for all entropy headers */
835
858
  BYTE header[HBUFFSIZE];
836
859
  int const compressionLevel = (params.compressionLevel <= 0) ? g_compressionLevel_default : params.compressionLevel;
837
860
  U32 const notificationLevel = params.notificationLevel;
@@ -877,20 +900,11 @@ size_t ZDICT_addEntropyTablesFromBuffer_advanced(void* dictBuffer, size_t dictCo
877
900
  const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
878
901
  ZDICT_params_t params)
879
902
  {
880
- size_t hSize;
881
903
  int const compressionLevel = (params.compressionLevel <= 0) ? g_compressionLevel_default : params.compressionLevel;
882
904
  U32 const notificationLevel = params.notificationLevel;
905
+ size_t hSize = 8;
883
906
 
884
- /* dictionary header */
885
- MEM_writeLE32(dictBuffer, ZSTD_DICT_MAGIC);
886
- { U64 const randomID = XXH64((char*)dictBuffer + dictBufferCapacity - dictContentSize, dictContentSize, 0);
887
- U32 const compliantID = (randomID % ((1U<<31)-32768)) + 32768;
888
- U32 const dictID = params.dictID ? params.dictID : compliantID;
889
- MEM_writeLE32((char*)dictBuffer+4, dictID);
890
- }
891
- hSize = 8;
892
-
893
- /* entropy tables */
907
+ /* calculate entropy tables */
894
908
  DISPLAYLEVEL(2, "\r%70s\r", ""); /* clean display line */
895
909
  DISPLAYLEVEL(2, "statistics ... \n");
896
910
  { size_t const eSize = ZDICT_analyzeEntropy((char*)dictBuffer+hSize, dictBufferCapacity-hSize,
@@ -902,6 +916,13 @@ size_t ZDICT_addEntropyTablesFromBuffer_advanced(void* dictBuffer, size_t dictCo
902
916
  hSize += eSize;
903
917
  }
904
918
 
919
+ /* add dictionary header (after entropy tables) */
920
+ MEM_writeLE32(dictBuffer, ZSTD_DICT_MAGIC);
921
+ { U64 const randomID = XXH64((char*)dictBuffer + dictBufferCapacity - dictContentSize, dictContentSize, 0);
922
+ U32 const compliantID = (randomID % ((1U<<31)-32768)) + 32768;
923
+ U32 const dictID = params.dictID ? params.dictID : compliantID;
924
+ MEM_writeLE32((char*)dictBuffer+4, dictID);
925
+ }
905
926
 
906
927
  if (hSize + dictContentSize < dictBufferCapacity)
907
928
  memmove((char*)dictBuffer + hSize, (char*)dictBuffer + dictBufferCapacity - dictContentSize, dictContentSize);
@@ -929,8 +950,8 @@ size_t ZDICT_trainFromBuffer_unsafe(
929
950
 
930
951
  /* checks */
931
952
  if (!dictList) return ERROR(memory_allocation);
932
- if (maxDictSize <= g_provision_entropySize + g_min_fast_dictContent) { free(dictList); return ERROR(dstSize_tooSmall); }
933
- if (samplesBuffSize < ZDICT_MIN_SAMPLES_SIZE) { free(dictList); return 0; } /* not enough source to create dictionary */
953
+ if (maxDictSize < ZDICT_DICTSIZE_MIN) { free(dictList); return ERROR(dstSize_tooSmall); } /* requested dictionary size is too small */
954
+ if (samplesBuffSize < ZDICT_MIN_SAMPLES_SIZE) { free(dictList); return ERROR(dictionaryCreation_failed); } /* not enough source to create dictionary */
934
955
 
935
956
  /* init */
936
957
  ZDICT_initDictItem(dictList);
@@ -963,14 +984,15 @@ size_t ZDICT_trainFromBuffer_unsafe(
963
984
 
964
985
  /* create dictionary */
965
986
  { U32 dictContentSize = ZDICT_dictSize(dictList);
966
- if (dictContentSize < targetDictSize/3) {
987
+ if (dictContentSize < ZDICT_CONTENTSIZE_MIN) { free(dictList); return ERROR(dictionaryCreation_failed); } /* dictionary content too small */
988
+ if (dictContentSize < targetDictSize/4) {
967
989
  DISPLAYLEVEL(2, "! warning : selected content significantly smaller than requested (%u < %u) \n", dictContentSize, (U32)maxDictSize);
990
+ if (samplesBuffSize < 10 * targetDictSize)
991
+ DISPLAYLEVEL(2, "! consider increasing the number of samples (total size : %u MB)\n", (U32)(samplesBuffSize>>20));
968
992
  if (minRep > MINRATIO) {
969
993
  DISPLAYLEVEL(2, "! consider increasing selectivity to produce larger dictionary (-s%u) \n", selectivity+1);
970
994
  DISPLAYLEVEL(2, "! note : larger dictionaries are not necessarily better, test its efficiency on samples \n");
971
995
  }
972
- if (samplesBuffSize < 10 * targetDictSize)
973
- DISPLAYLEVEL(2, "! consider increasing the number of samples (total size : %u MB)\n", (U32)(samplesBuffSize>>20));
974
996
  }
975
997
 
976
998
  if ((dictContentSize > targetDictSize*3) && (nbSamples > 2*MINRATIO) && (selectivity>1)) {
@@ -978,7 +1000,7 @@ size_t ZDICT_trainFromBuffer_unsafe(
978
1000
  while ((nbSamples >> proposedSelectivity) <= MINRATIO) { proposedSelectivity--; }
979
1001
  DISPLAYLEVEL(2, "! note : calculated dictionary significantly larger than requested (%u > %u) \n", dictContentSize, (U32)maxDictSize);
980
1002
  DISPLAYLEVEL(2, "! consider increasing dictionary size, or produce denser dictionary (-s%u) \n", proposedSelectivity);
981
- DISPLAYLEVEL(2, "! always test dictionary efficiency on samples \n");
1003
+ DISPLAYLEVEL(2, "! always test dictionary efficiency on real samples \n");
982
1004
  }
983
1005
 
984
1006
  /* limit dictionary size */
@@ -88,7 +88,7 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_advanced(void* dictBuffer, size_t dict
88
88
 
89
89
  /*! COVER_params_t :
90
90
  For all values 0 means default.
91
- kMin and d are the only required parameters.
91
+ k and d are the only required parameters.
92
92
  */
93
93
  typedef struct {
94
94
  unsigned k; /* Segment size : constraint: 0 < k : Reasonable range [16, 2048+] */
@@ -147,18 +147,18 @@ ZDICTLIB_API size_t COVER_optimizeTrainFromBuffer(void* dictBuffer, size_t dictB
147
147
  Samples must be stored concatenated in a flat buffer `samplesBuffer`,
148
148
  supplied with an array of sizes `samplesSizes`, providing the size of each sample in order.
149
149
 
150
- dictContentSize must be > ZDICT_CONTENTSIZE_MIN bytes.
151
- maxDictSize must be >= dictContentSize, and must be > ZDICT_DICTSIZE_MIN bytes.
150
+ dictContentSize must be >= ZDICT_CONTENTSIZE_MIN bytes.
151
+ maxDictSize must be >= dictContentSize, and must be >= ZDICT_DICTSIZE_MIN bytes.
152
152
 
153
153
  @return : size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`),
154
154
  or an error code, which can be tested by ZDICT_isError().
155
155
  note : ZDICT_finalizeDictionary() will push notifications into stderr if instructed to, using notificationLevel>0.
156
- note 2 : dictBuffer and customDictContent can overlap
156
+ note 2 : dictBuffer and dictContent can overlap
157
157
  */
158
- #define ZDICT_CONTENTSIZE_MIN 256
159
- #define ZDICT_DICTSIZE_MIN 512
158
+ #define ZDICT_CONTENTSIZE_MIN 128
159
+ #define ZDICT_DICTSIZE_MIN 256
160
160
  ZDICTLIB_API size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity,
161
- const void* customDictContent, size_t dictContentSize,
161
+ const void* dictContent, size_t dictContentSize,
162
162
  const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
163
163
  ZDICT_params_t parameters);
164
164
 
@@ -4,11 +4,11 @@ ZSTD Windows binary package
4
4
  #### The package contents
5
5
 
6
6
  - `zstd.exe` : Command Line Utility, supporting gzip-like arguments
7
- - `dll\libzstd.dll` : The DLL of ZSTD library
8
- - `dll\libzstd.lib` : The import library of ZSTD library for Visual C++
9
- - `example\` : The example of usage of ZSTD library
10
- - `include\` : Header files required with ZSTD library
11
- - `static\libzstd_static.lib` : The static ZSTD library
7
+ - `dll\libzstd.dll` : The ZSTD dynamic library (DLL)
8
+ - `dll\libzstd.lib` : The import library of the ZSTD dynamic library (DLL) for Visual C++
9
+ - `example\` : The example of usage of the ZSTD library
10
+ - `include\` : Header files required by the ZSTD library
11
+ - `static\libzstd_static.lib` : The static ZSTD library (LIB)
12
12
 
13
13
 
14
14
  #### Usage of Command Line Interface
@@ -9,6 +9,7 @@ COPY lib\common\mem.h bin\example\
9
9
  COPY lib\common\zstd_errors.h bin\example\
10
10
  COPY lib\common\zstd_internal.h bin\example\
11
11
  COPY lib\common\error_private.h bin\example\
12
+ COPY lib\common\xxhash.h bin\example\
12
13
  COPY lib\zstd.h bin\include\
13
14
  COPY lib\libzstd.a bin\static\libzstd_static.lib
14
15
  COPY lib\dll\libzstd.* bin\dll\
@@ -1432,7 +1432,7 @@ typedef struct ZSTD_Cctx_s
1432
1432
  #else
1433
1433
  U32 hashTable[HASH_TABLESIZE];
1434
1434
  #endif
1435
- BYTE buffer[WORKPLACESIZE];
1435
+ BYTE buffer[WORKPLACESIZE];
1436
1436
  } cctxi_t;
1437
1437
 
1438
1438
 
@@ -475,8 +475,8 @@ MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, U32 nbBits)
475
475
 
476
476
  MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD)
477
477
  {
478
- if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8)) /* should never happen */
479
- return BIT_DStream_overflow;
478
+ if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8)) /* should never happen */
479
+ return BIT_DStream_overflow;
480
480
 
481
481
  if (bitD->ptr >= bitD->start + sizeof(bitD->bitContainer))
482
482
  {
@@ -1334,8 +1334,8 @@ static size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsi
1334
1334
  else
1335
1335
  {
1336
1336
  bitCount -= (int)(8 * (iend - 4 - ip));
1337
- ip = iend - 4;
1338
- }
1337
+ ip = iend - 4;
1338
+ }
1339
1339
  bitStream = MEM_readLE32(ip) >> (bitCount & 31);
1340
1340
  }
1341
1341
  }
@@ -2040,7 +2040,7 @@ static size_t HUF_readDTableX4 (U32* DTable, const void* src, size_t srcSize)
2040
2040
  rankStart[0] = 0; /* forget 0w symbols; this is beginning of weight(1) */
2041
2041
  }
2042
2042
 
2043
- /* Build rankVal */
2043
+ /* Build rankVal */
2044
2044
  {
2045
2045
  const U32 minBits = tableLog+1 - maxW;
2046
2046
  U32 nextRankVal = 0;
@@ -2374,7 +2374,7 @@ static size_t HUF_readDTableX6 (U32* DTable, const void* src, size_t srcSize)
2374
2374
  rankStart[0] = 0; /* forget 0w symbols; this is beginning of weight(1) */
2375
2375
  }
2376
2376
 
2377
- /* Build rankVal */
2377
+ /* Build rankVal */
2378
2378
  {
2379
2379
  const U32 minBits = tableLog+1 - maxW;
2380
2380
  U32 nextRankVal = 0;
@@ -2948,14 +2948,14 @@ static size_t ZSTD_decodeLiteralsBlock(void* ctx,
2948
2948
  const size_t litSize = (MEM_readLE32(istart) & 0xFFFFFF) >> 2; /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */
2949
2949
  if (litSize > srcSize-11) /* risk of reading too far with wildcopy */
2950
2950
  {
2951
- if (litSize > srcSize-3) return ERROR(corruption_detected);
2952
- memcpy(dctx->litBuffer, istart, litSize);
2953
- dctx->litPtr = dctx->litBuffer;
2954
- dctx->litSize = litSize;
2955
- memset(dctx->litBuffer + dctx->litSize, 0, 8);
2956
- return litSize+3;
2957
- }
2958
- /* direct reference into compressed stream */
2951
+ if (litSize > srcSize-3) return ERROR(corruption_detected);
2952
+ memcpy(dctx->litBuffer, istart, litSize);
2953
+ dctx->litPtr = dctx->litBuffer;
2954
+ dctx->litSize = litSize;
2955
+ memset(dctx->litBuffer + dctx->litSize, 0, 8);
2956
+ return litSize+3;
2957
+ }
2958
+ /* direct reference into compressed stream */
2959
2959
  dctx->litPtr = istart+3;
2960
2960
  dctx->litSize = litSize;
2961
2961
  return litSize+3;
@@ -3515,13 +3515,13 @@ static size_t ZSTD_decompressContinue(ZSTD_DCtx* ctx, void* dst, size_t maxDstSi
3515
3515
 
3516
3516
  unsigned ZSTDv02_isError(size_t code)
3517
3517
  {
3518
- return ZSTD_isError(code);
3518
+ return ZSTD_isError(code);
3519
3519
  }
3520
3520
 
3521
3521
  size_t ZSTDv02_decompress( void* dst, size_t maxOriginalSize,
3522
3522
  const void* src, size_t compressedSize)
3523
3523
  {
3524
- return ZSTD_decompress(dst, maxOriginalSize, src, compressedSize);
3524
+ return ZSTD_decompress(dst, maxOriginalSize, src, compressedSize);
3525
3525
  }
3526
3526
 
3527
3527
  size_t ZSTDv02_findFrameCompressedSize(const void *src, size_t compressedSize)
@@ -3531,25 +3531,25 @@ size_t ZSTDv02_findFrameCompressedSize(const void *src, size_t compressedSize)
3531
3531
 
3532
3532
  ZSTDv02_Dctx* ZSTDv02_createDCtx(void)
3533
3533
  {
3534
- return (ZSTDv02_Dctx*)ZSTD_createDCtx();
3534
+ return (ZSTDv02_Dctx*)ZSTD_createDCtx();
3535
3535
  }
3536
3536
 
3537
3537
  size_t ZSTDv02_freeDCtx(ZSTDv02_Dctx* dctx)
3538
3538
  {
3539
- return ZSTD_freeDCtx((ZSTD_DCtx*)dctx);
3539
+ return ZSTD_freeDCtx((ZSTD_DCtx*)dctx);
3540
3540
  }
3541
3541
 
3542
3542
  size_t ZSTDv02_resetDCtx(ZSTDv02_Dctx* dctx)
3543
3543
  {
3544
- return ZSTD_resetDCtx((ZSTD_DCtx*)dctx);
3544
+ return ZSTD_resetDCtx((ZSTD_DCtx*)dctx);
3545
3545
  }
3546
3546
 
3547
3547
  size_t ZSTDv02_nextSrcSizeToDecompress(ZSTDv02_Dctx* dctx)
3548
3548
  {
3549
- return ZSTD_nextSrcSizeToDecompress((ZSTD_DCtx*)dctx);
3549
+ return ZSTD_nextSrcSizeToDecompress((ZSTD_DCtx*)dctx);
3550
3550
  }
3551
3551
 
3552
3552
  size_t ZSTDv02_decompressContinue(ZSTDv02_Dctx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
3553
3553
  {
3554
- return ZSTD_decompressContinue((ZSTD_DCtx*)dctx, dst, maxDstSize, src, srcSize);
3554
+ return ZSTD_decompressContinue((ZSTD_DCtx*)dctx, dst, maxDstSize, src, srcSize);
3555
3555
  }