extzstd 0.0.3.CONCEPT-x86-mingw32 → 0.1-x86-mingw32

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. checksums.yaml +4 -4
  2. data/HISTORY.ja +5 -0
  3. data/LICENSE +6 -6
  4. data/README.md +35 -22
  5. data/contrib/zstd/LICENSE +13 -9
  6. data/contrib/zstd/README.md +37 -44
  7. data/contrib/zstd/common/entropy_common.c +33 -39
  8. data/contrib/zstd/common/error_private.c +43 -0
  9. data/contrib/zstd/common/error_private.h +11 -60
  10. data/contrib/zstd/common/fse.h +11 -5
  11. data/contrib/zstd/common/fse_decompress.c +14 -16
  12. data/contrib/zstd/common/huf.h +1 -1
  13. data/contrib/zstd/common/mem.h +36 -43
  14. data/contrib/zstd/common/xxhash.c +31 -18
  15. data/contrib/zstd/common/xxhash.h +71 -35
  16. data/contrib/zstd/common/zbuff.h +29 -35
  17. data/contrib/zstd/common/zstd_common.c +24 -32
  18. data/contrib/zstd/common/zstd_errors.h +60 -0
  19. data/contrib/zstd/common/zstd_internal.h +109 -80
  20. data/contrib/zstd/compress/fse_compress.c +9 -6
  21. data/contrib/zstd/compress/huf_compress.c +30 -74
  22. data/contrib/zstd/compress/zbuff_compress.c +43 -51
  23. data/contrib/zstd/compress/zstd_compress.c +953 -763
  24. data/contrib/zstd/compress/zstd_opt.h +115 -261
  25. data/contrib/zstd/decompress/huf_decompress.c +29 -40
  26. data/contrib/zstd/decompress/zbuff_decompress.c +36 -78
  27. data/contrib/zstd/decompress/zstd_decompress.c +976 -496
  28. data/contrib/zstd/dictBuilder/divsufsort.h +5 -5
  29. data/contrib/zstd/dictBuilder/zdict.c +194 -229
  30. data/contrib/zstd/dictBuilder/zdict.h +66 -68
  31. data/contrib/zstd/legacy/zstd_legacy.h +168 -49
  32. data/contrib/zstd/legacy/zstd_v01.c +95 -178
  33. data/contrib/zstd/legacy/zstd_v01.h +12 -32
  34. data/contrib/zstd/legacy/zstd_v02.c +48 -274
  35. data/contrib/zstd/legacy/zstd_v02.h +12 -32
  36. data/contrib/zstd/legacy/zstd_v03.c +48 -274
  37. data/contrib/zstd/legacy/zstd_v03.h +12 -32
  38. data/contrib/zstd/legacy/zstd_v04.c +63 -320
  39. data/contrib/zstd/legacy/zstd_v04.h +13 -33
  40. data/contrib/zstd/legacy/zstd_v05.c +80 -345
  41. data/contrib/zstd/legacy/zstd_v05.h +9 -31
  42. data/contrib/zstd/legacy/zstd_v06.c +48 -458
  43. data/contrib/zstd/legacy/zstd_v06.h +41 -67
  44. data/contrib/zstd/legacy/zstd_v07.c +4544 -0
  45. data/contrib/zstd/legacy/zstd_v07.h +173 -0
  46. data/contrib/zstd/zstd.h +640 -0
  47. data/ext/extconf.rb +7 -3
  48. data/ext/extzstd.c +263 -106
  49. data/ext/extzstd.h +8 -6
  50. data/ext/extzstd_nogvls.h +0 -117
  51. data/ext/extzstd_stream.c +347 -0
  52. data/ext/zstd_common.c +8 -0
  53. data/ext/zstd_compress.c +6 -0
  54. data/ext/zstd_decompress.c +5 -0
  55. data/ext/zstd_dictbuilder.c +5 -0
  56. data/ext/zstd_legacy_v07.c +1 -0
  57. data/gemstub.rb +18 -16
  58. data/lib/2.1/extzstd.so +0 -0
  59. data/lib/2.2/extzstd.so +0 -0
  60. data/lib/2.3/extzstd.so +0 -0
  61. data/lib/extzstd/version.rb +1 -1
  62. data/lib/extzstd.rb +77 -43
  63. data/test/test_basic.rb +11 -6
  64. metadata +23 -11
  65. data/contrib/zstd/common/error_public.h +0 -77
  66. data/contrib/zstd/common/zstd.h +0 -475
  67. data/ext/extzstd_buffered.c +0 -265
  68. data/ext/zstd_amalgam.c +0 -18
  69. data/lib/2.0/extzstd.so +0 -0
@@ -1,40 +1,18 @@
1
- /*
2
- dictBuilder - dictionary builder for zstd
3
- Copyright (C) Yann Collet 2016
4
-
5
- BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6
-
7
- Redistribution and use in source and binary forms, with or without
8
- modification, are permitted provided that the following conditions are
9
- met:
10
-
11
- * Redistributions of source code must retain the above copyright
12
- notice, this list of conditions and the following disclaimer.
13
- * Redistributions in binary form must reproduce the above
14
- copyright notice, this list of conditions and the following disclaimer
15
- in the documentation and/or other materials provided with the
16
- distribution.
17
-
18
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21
- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22
- OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23
- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24
- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25
- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26
- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28
- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
-
30
- You can contact the author at :
31
- - Zstd homepage : https://www.zstd.net
32
- */
1
+ /**
2
+ * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
3
+ * All rights reserved.
4
+ *
5
+ * This source code is licensed under the BSD-style license found in the
6
+ * LICENSE file in the root directory of this source tree. An additional grant
7
+ * of patent rights can be found in the PATENTS file in the same directory.
8
+ */
9
+
33
10
 
34
11
  /*-**************************************
35
12
  * Tuning parameters
36
13
  ****************************************/
37
14
  #define ZDICT_MAX_SAMPLES_SIZE (2000U << 20)
15
+ #define ZDICT_MIN_SAMPLES_SIZE 512
38
16
 
39
17
 
40
18
  /*-**************************************
@@ -78,14 +56,12 @@
78
56
  #define MB *(1 <<20)
79
57
  #define GB *(1U<<30)
80
58
 
81
- #define DICTLISTSIZE 10000
59
+ #define DICTLISTSIZE_DEFAULT 10000
82
60
 
83
61
  #define NOISELENGTH 32
84
- #define PRIME1 2654435761U
85
- #define PRIME2 2246822519U
86
62
 
87
63
  #define MINRATIO 4
88
- static const U32 g_compressionLevel_default = 5;
64
+ static const int g_compressionLevel_default = 5;
89
65
  static const U32 g_selectivity_default = 9;
90
66
  static const size_t g_provision_entropySize = 200;
91
67
  static const size_t g_min_fast_dictContent = 192;
@@ -95,26 +71,18 @@ static const size_t g_min_fast_dictContent = 192;
95
71
  * Console display
96
72
  ***************************************/
97
73
  #define DISPLAY(...) { fprintf(stderr, __VA_ARGS__); fflush( stderr ); }
98
- #define DISPLAYLEVEL(l, ...) if (g_displayLevel>=l) { DISPLAY(__VA_ARGS__); }
99
- static unsigned g_displayLevel = 0; /* 0 : no display; 1: errors; 2: default; 4: full information */
100
-
101
- #define DISPLAYUPDATE(l, ...) if (g_displayLevel>=l) { \
102
- if (ZDICT_clockSpan(g_time) > refreshRate) \
103
- { g_time = clock(); DISPLAY(__VA_ARGS__); \
104
- if (g_displayLevel>=4) fflush(stdout); } }
105
- static const clock_t refreshRate = CLOCKS_PER_SEC * 3 / 10;
106
- static clock_t g_time = 0;
74
+ #define DISPLAYLEVEL(l, ...) if (notificationLevel>=l) { DISPLAY(__VA_ARGS__); } /* 0 : no display; 1: errors; 2: default; 3: details; 4: debug */
107
75
 
108
76
  static clock_t ZDICT_clockSpan(clock_t nPrevious) { return clock() - nPrevious; }
109
77
 
110
- static void ZDICT_printHex(U32 dlevel, const void* ptr, size_t length)
78
+ static void ZDICT_printHex(const void* ptr, size_t length)
111
79
  {
112
80
  const BYTE* const b = (const BYTE*)ptr;
113
81
  size_t u;
114
82
  for (u=0; u<length; u++) {
115
83
  BYTE c = b[u];
116
84
  if (c<32 || c>126) c = '.'; /* non-printable char */
117
- DISPLAYLEVEL(dlevel, "%c", c);
85
+ DISPLAY("%c", c);
118
86
  }
119
87
  }
120
88
 
@@ -126,6 +94,13 @@ unsigned ZDICT_isError(size_t errorCode) { return ERR_isError(errorCode); }
126
94
 
127
95
  const char* ZDICT_getErrorName(size_t errorCode) { return ERR_getErrorName(errorCode); }
128
96
 
97
+ unsigned ZDICT_getDictID(const void* dictBuffer, size_t dictSize)
98
+ {
99
+ if (dictSize < 8) return 0;
100
+ if (MEM_readLE32(dictBuffer) != ZSTD_DICT_MAGIC) return 0;
101
+ return MEM_readLE32((const char*)dictBuffer + 4);
102
+ }
103
+
129
104
 
130
105
  /*-********************************************************
131
106
  * Dictionary training functions
@@ -228,7 +203,7 @@ static void ZDICT_initDictItem(dictItem* d)
228
203
  static dictItem ZDICT_analyzePos(
229
204
  BYTE* doneMarks,
230
205
  const int* suffix, U32 start,
231
- const void* buffer, U32 minRatio)
206
+ const void* buffer, U32 minRatio, U32 notificationLevel)
232
207
  {
233
208
  U32 lengthList[LLIMIT] = {0};
234
209
  U32 cumulLength[LLIMIT] = {0};
@@ -332,12 +307,13 @@ static dictItem ZDICT_analyzePos(
332
307
  } while (length >=MINMATCHLENGTH);
333
308
 
334
309
  /* look backward */
335
- do {
336
- length = ZDICT_count(b + pos, b + suffix[start-1]);
337
- if (length >= LLIMIT) length = LLIMIT-1;
338
- lengthList[length]++;
339
- if (length >=MINMATCHLENGTH) start--;
340
- } while(length >= MINMATCHLENGTH);
310
+ length = MINMATCHLENGTH;
311
+ while ((length >= MINMATCHLENGTH) & (start > 0)) {
312
+ length = ZDICT_count(b + pos, b + suffix[start - 1]);
313
+ if (length >= LLIMIT) length = LLIMIT - 1;
314
+ lengthList[length]++;
315
+ if (length >= MINMATCHLENGTH) start--;
316
+ }
341
317
 
342
318
  /* largest useful length */
343
319
  memset(cumulLength, 0, sizeof(cumulLength));
@@ -395,21 +371,22 @@ static dictItem ZDICT_analyzePos(
395
371
  static U32 ZDICT_checkMerge(dictItem* table, dictItem elt, U32 eltNbToSkip)
396
372
  {
397
373
  const U32 tableSize = table->pos;
398
- const U32 max = elt.pos + (elt.length-1);
374
+ const U32 eltEnd = elt.pos + elt.length;
399
375
 
400
376
  /* tail overlap */
401
377
  U32 u; for (u=1; u<tableSize; u++) {
402
378
  if (u==eltNbToSkip) continue;
403
- if ((table[u].pos > elt.pos) && (table[u].pos < max)) { /* overlap */
379
+ if ((table[u].pos > elt.pos) && (table[u].pos <= eltEnd)) { /* overlap, existing > new */
404
380
  /* append */
405
381
  U32 addedLength = table[u].pos - elt.pos;
406
382
  table[u].length += addedLength;
407
383
  table[u].pos = elt.pos;
408
384
  table[u].savings += elt.savings * addedLength / elt.length; /* rough approx */
409
- table[u].savings += elt.length / 8; /* rough approx */
385
+ table[u].savings += elt.length / 8; /* rough approx bonus */
410
386
  elt = table[u];
387
+ /* sort : improve rank */
411
388
  while ((u>1) && (table[u-1].savings < elt.savings))
412
- table[u] = table[u-1], u--;
389
+ table[u] = table[u-1], u--;
413
390
  table[u] = elt;
414
391
  return u;
415
392
  } }
@@ -417,14 +394,15 @@ static U32 ZDICT_checkMerge(dictItem* table, dictItem elt, U32 eltNbToSkip)
417
394
  /* front overlap */
418
395
  for (u=1; u<tableSize; u++) {
419
396
  if (u==eltNbToSkip) continue;
420
- if ((table[u].pos + table[u].length > elt.pos) && (table[u].pos < elt.pos)) { /* overlap */
397
+ if ((table[u].pos + table[u].length >= elt.pos) && (table[u].pos < elt.pos)) { /* overlap, existing < new */
421
398
  /* append */
422
- int addedLength = (elt.pos + elt.length) - (table[u].pos + table[u].length);
423
- table[u].savings += elt.length / 8; /* rough approx */
424
- if (addedLength > 0) { /* otherwise, already included */
399
+ int addedLength = (int)eltEnd - (table[u].pos + table[u].length);
400
+ table[u].savings += elt.length / 8; /* rough approx bonus */
401
+ if (addedLength > 0) { /* otherwise, elt fully included into existing */
425
402
  table[u].length += addedLength;
426
403
  table[u].savings += elt.savings * addedLength / elt.length; /* rough approx */
427
404
  }
405
+ /* sort : improve rank */
428
406
  elt = table[u];
429
407
  while ((u>1) && (table[u-1].savings < elt.savings))
430
408
  table[u] = table[u-1], u--;
@@ -489,15 +467,21 @@ static U32 ZDICT_dictSize(const dictItem* dictList)
489
467
  static size_t ZDICT_trainBuffer(dictItem* dictList, U32 dictListSize,
490
468
  const void* const buffer, size_t bufferSize, /* buffer must end with noisy guard band */
491
469
  const size_t* fileSizes, unsigned nbFiles,
492
- U32 shiftRatio, unsigned maxDictSize)
470
+ U32 minRatio, U32 notificationLevel)
493
471
  {
494
472
  int* const suffix0 = (int*)malloc((bufferSize+2)*sizeof(*suffix0));
495
473
  int* const suffix = suffix0+1;
496
474
  U32* reverseSuffix = (U32*)malloc((bufferSize)*sizeof(*reverseSuffix));
497
475
  BYTE* doneMarks = (BYTE*)malloc((bufferSize+16)*sizeof(*doneMarks)); /* +16 for overflow security */
498
476
  U32* filePos = (U32*)malloc(nbFiles * sizeof(*filePos));
499
- U32 minRatio = nbFiles >> shiftRatio;
500
477
  size_t result = 0;
478
+ clock_t displayClock = 0;
479
+ clock_t const refreshRate = CLOCKS_PER_SEC * 3 / 10;
480
+
481
+ # define DISPLAYUPDATE(l, ...) if (notificationLevel>=l) { \
482
+ if (ZDICT_clockSpan(displayClock) > refreshRate) \
483
+ { displayClock = clock(); DISPLAY(__VA_ARGS__); \
484
+ if (notificationLevel>=4) fflush(stdout); } }
501
485
 
502
486
  /* init */
503
487
  DISPLAYLEVEL(2, "\r%70s\r", ""); /* clean display line */
@@ -523,7 +507,8 @@ static size_t ZDICT_trainBuffer(dictItem* dictList, U32 dictListSize,
523
507
  { size_t pos;
524
508
  for (pos=0; pos < bufferSize; pos++)
525
509
  reverseSuffix[suffix[pos]] = (U32)pos;
526
- /* build file pos */
510
+ /* note filePos tracks borders between samples.
511
+ It's not used at this stage, but planned to become useful in a later update */
527
512
  filePos[0] = 0;
528
513
  for (pos=1; pos<nbFiles; pos++)
529
514
  filePos[pos] = (U32)(filePos[pos-1] + fileSizes[pos-1]);
@@ -535,23 +520,13 @@ static size_t ZDICT_trainBuffer(dictItem* dictList, U32 dictListSize,
535
520
  { U32 cursor; for (cursor=0; cursor < bufferSize; ) {
536
521
  dictItem solution;
537
522
  if (doneMarks[cursor]) { cursor++; continue; }
538
- solution = ZDICT_analyzePos(doneMarks, suffix, reverseSuffix[cursor], buffer, minRatio);
523
+ solution = ZDICT_analyzePos(doneMarks, suffix, reverseSuffix[cursor], buffer, minRatio, notificationLevel);
539
524
  if (solution.length==0) { cursor++; continue; }
540
525
  ZDICT_insertDictItem(dictList, dictListSize, solution);
541
526
  cursor += solution.length;
542
527
  DISPLAYUPDATE(2, "\r%4.2f %% \r", (double)cursor / bufferSize * 100);
543
528
  } }
544
529
 
545
- /* limit dictionary size */
546
- { U32 const max = dictList->pos; /* convention : nb of useful elts within dictList */
547
- U32 currentSize = 0;
548
- U32 n; for (n=1; n<max; n++) {
549
- currentSize += dictList[n].length;
550
- if (currentSize > maxDictSize) break;
551
- }
552
- dictList->pos = n;
553
- }
554
-
555
530
  _cleanup:
556
531
  free(suffix0);
557
532
  free(reverseSuffix);
@@ -563,10 +538,12 @@ _cleanup:
563
538
 
564
539
  static void ZDICT_fillNoise(void* buffer, size_t length)
565
540
  {
566
- unsigned acc = PRIME1;
541
+ unsigned const prime1 = 2654435761U;
542
+ unsigned const prime2 = 2246822519U;
543
+ unsigned acc = prime1;
567
544
  size_t p=0;;
568
545
  for (p=0; p<length; p++) {
569
- acc *= PRIME2;
546
+ acc *= prime2;
570
547
  ((unsigned char*)buffer)[p] = (unsigned char)(acc >> 21);
571
548
  }
572
549
  }
@@ -576,23 +553,23 @@ typedef struct
576
553
  {
577
554
  ZSTD_CCtx* ref;
578
555
  ZSTD_CCtx* zc;
579
- void* workPlace; /* must be ZSTD_BLOCKSIZE_MAX allocated */
556
+ void* workPlace; /* must be ZSTD_BLOCKSIZE_ABSOLUTEMAX allocated */
580
557
  } EStats_ress_t;
581
558
 
582
559
  #define MAXREPOFFSET 1024
583
560
 
584
561
  static void ZDICT_countEStats(EStats_ress_t esr, ZSTD_parameters params,
585
562
  U32* countLit, U32* offsetcodeCount, U32* matchlengthCount, U32* litlengthCount, U32* repOffsets,
586
- const void* src, size_t srcSize)
563
+ const void* src, size_t srcSize, U32 notificationLevel)
587
564
  {
588
- size_t const blockSizeMax = MIN (ZSTD_BLOCKSIZE_MAX, 1 << params.cParams.windowLog);
565
+ size_t const blockSizeMax = MIN (ZSTD_BLOCKSIZE_ABSOLUTEMAX, 1 << params.cParams.windowLog);
589
566
  size_t cSize;
590
567
 
591
568
  if (srcSize > blockSizeMax) srcSize = blockSizeMax; /* protection vs large samples */
592
- { size_t const errorCode = ZSTD_copyCCtx(esr.zc, esr.ref);
593
- if (ZSTD_isError(errorCode)) { DISPLAYLEVEL(1, "warning : ZSTD_copyCCtx failed \n"); return; }
594
- }
595
- cSize = ZSTD_compressBlock(esr.zc, esr.workPlace, ZSTD_BLOCKSIZE_MAX, src, srcSize);
569
+ { size_t const errorCode = ZSTD_copyCCtx(esr.zc, esr.ref, 0);
570
+ if (ZSTD_isError(errorCode)) { DISPLAYLEVEL(1, "warning : ZSTD_copyCCtx failed \n"); return; }
571
+ }
572
+ cSize = ZSTD_compressBlock(esr.zc, esr.workPlace, ZSTD_BLOCKSIZE_ABSOLUTEMAX, src, srcSize);
596
573
  if (ZSTD_isError(cSize)) { DISPLAYLEVEL(1, "warning : could not compress sample size %u \n", (U32)srcSize); return; }
597
574
 
598
575
  if (cSize) { /* if == 0; block is not compressible */
@@ -605,34 +582,33 @@ static void ZDICT_countEStats(EStats_ress_t esr, ZSTD_parameters params,
605
582
  }
606
583
 
607
584
  /* seqStats */
608
- { size_t const nbSeq = (size_t)(seqStorePtr->offset - seqStorePtr->offsetStart);
609
- ZSTD_seqToCodes(seqStorePtr, nbSeq);
585
+ { U32 const nbSeq = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
586
+ ZSTD_seqToCodes(seqStorePtr);
610
587
 
611
- { const BYTE* codePtr = seqStorePtr->offCodeStart;
612
- size_t u;
588
+ { const BYTE* codePtr = seqStorePtr->ofCode;
589
+ U32 u;
613
590
  for (u=0; u<nbSeq; u++) offsetcodeCount[codePtr[u]]++;
614
591
  }
615
592
 
616
- { const BYTE* codePtr = seqStorePtr->mlCodeStart;
617
- size_t u;
593
+ { const BYTE* codePtr = seqStorePtr->mlCode;
594
+ U32 u;
618
595
  for (u=0; u<nbSeq; u++) matchlengthCount[codePtr[u]]++;
619
596
  }
620
597
 
621
- { const BYTE* codePtr = seqStorePtr->llCodeStart;
622
- size_t u;
598
+ { const BYTE* codePtr = seqStorePtr->llCode;
599
+ U32 u;
623
600
  for (u=0; u<nbSeq; u++) litlengthCount[codePtr[u]]++;
624
- } }
601
+ }
625
602
 
626
- /* rep offsets */
627
- { const U32* const offsetPtr = seqStorePtr->offsetStart;
628
- U32 offset1 = offsetPtr[0] - 3;
629
- U32 offset2 = offsetPtr[1] - 3;
630
- if (offset1 >= MAXREPOFFSET) offset1 = 0;
631
- if (offset2 >= MAXREPOFFSET) offset2 = 0;
632
- repOffsets[offset1] += 3;
633
- repOffsets[offset2] += 1;
634
- }
635
- }
603
+ if (nbSeq >= 2) { /* rep offsets */
604
+ const seqDef* const seq = seqStorePtr->sequencesStart;
605
+ U32 offset1 = seq[0].offset - 3;
606
+ U32 offset2 = seq[1].offset - 3;
607
+ if (offset1 >= MAXREPOFFSET) offset1 = 0;
608
+ if (offset2 >= MAXREPOFFSET) offset2 = 0;
609
+ repOffsets[offset1] += 3;
610
+ repOffsets[offset2] += 1;
611
+ } } }
636
612
  }
637
613
 
638
614
  /*
@@ -671,60 +647,65 @@ static void ZDICT_insertSortCount(offsetCount_t table[ZSTD_REP_NUM+1], U32 val,
671
647
  }
672
648
 
673
649
 
674
- #define OFFCODE_MAX 18 /* only applicable to first block */
650
+ #define OFFCODE_MAX 30 /* only applicable to first block */
675
651
  static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
676
- unsigned compressionLevel,
677
- const void* srcBuffer, const size_t* fileSizes, unsigned nbFiles,
678
- const void* dictBuffer, size_t dictBufferSize)
652
+ unsigned compressionLevel,
653
+ const void* srcBuffer, const size_t* fileSizes, unsigned nbFiles,
654
+ const void* dictBuffer, size_t dictBufferSize,
655
+ unsigned notificationLevel)
679
656
  {
680
657
  U32 countLit[256];
681
658
  HUF_CREATE_STATIC_CTABLE(hufTable, 255);
682
659
  U32 offcodeCount[OFFCODE_MAX+1];
683
660
  short offcodeNCount[OFFCODE_MAX+1];
661
+ U32 offcodeMax = ZSTD_highbit32((U32)(dictBufferSize + 128 KB));
684
662
  U32 matchLengthCount[MaxML+1];
685
663
  short matchLengthNCount[MaxML+1];
686
664
  U32 litLengthCount[MaxLL+1];
687
665
  short litLengthNCount[MaxLL+1];
688
- U32 repOffset[MAXREPOFFSET] = { 0 };
666
+ U32 repOffset[MAXREPOFFSET];
689
667
  offsetCount_t bestRepOffset[ZSTD_REP_NUM+1];
690
668
  EStats_ress_t esr;
691
669
  ZSTD_parameters params;
692
- U32 u, huffLog = 12, Offlog = OffFSELog, mlLog = MLFSELog, llLog = LLFSELog, total;
670
+ U32 u, huffLog = 11, Offlog = OffFSELog, mlLog = MLFSELog, llLog = LLFSELog, total;
693
671
  size_t pos = 0, errorCode;
694
672
  size_t eSize = 0;
695
673
  size_t const totalSrcSize = ZDICT_totalSampleSize(fileSizes, nbFiles);
696
- size_t const averageSampleSize = totalSrcSize / nbFiles;
674
+ size_t const averageSampleSize = totalSrcSize / (nbFiles + !nbFiles);
697
675
  BYTE* dstPtr = (BYTE*)dstBuffer;
698
676
 
699
677
  /* init */
678
+ esr.ref = ZSTD_createCCtx();
679
+ esr.zc = ZSTD_createCCtx();
680
+ esr.workPlace = malloc(ZSTD_BLOCKSIZE_ABSOLUTEMAX);
681
+ if (!esr.ref || !esr.zc || !esr.workPlace) {
682
+ eSize = ERROR(memory_allocation);
683
+ DISPLAYLEVEL(1, "Not enough memory \n");
684
+ goto _cleanup;
685
+ }
686
+ if (offcodeMax>OFFCODE_MAX) { eSize = ERROR(dictionary_wrong); goto _cleanup; } /* too large dictionary */
700
687
  for (u=0; u<256; u++) countLit[u]=1; /* any character must be described */
701
- for (u=0; u<=OFFCODE_MAX; u++) offcodeCount[u]=1;
688
+ for (u=0; u<=offcodeMax; u++) offcodeCount[u]=1;
702
689
  for (u=0; u<=MaxML; u++) matchLengthCount[u]=1;
703
690
  for (u=0; u<=MaxLL; u++) litLengthCount[u]=1;
691
+ memset(repOffset, 0, sizeof(repOffset));
704
692
  repOffset[1] = repOffset[4] = repOffset[8] = 1;
705
693
  memset(bestRepOffset, 0, sizeof(bestRepOffset));
706
- esr.ref = ZSTD_createCCtx();
707
- esr.zc = ZSTD_createCCtx();
708
- esr.workPlace = malloc(ZSTD_BLOCKSIZE_MAX);
709
- if (!esr.ref || !esr.zc || !esr.workPlace) {
710
- eSize = ERROR(memory_allocation);
711
- DISPLAYLEVEL(1, "Not enough memory");
712
- goto _cleanup;
713
- }
714
694
  if (compressionLevel==0) compressionLevel=g_compressionLevel_default;
715
695
  params = ZSTD_getParams(compressionLevel, averageSampleSize, dictBufferSize);
716
- { size_t const beginResult = ZSTD_compressBegin_advanced(esr.ref, dictBuffer, dictBufferSize, params, 0);
717
- if (ZSTD_isError(beginResult)) {
718
- eSize = ERROR(GENERIC);
719
- DISPLAYLEVEL(1, "error : ZSTD_compressBegin_advanced failed ");
720
- goto _cleanup;
721
- } }
696
+ { size_t const beginResult = ZSTD_compressBegin_advanced(esr.ref, dictBuffer, dictBufferSize, params, 0);
697
+ if (ZSTD_isError(beginResult)) {
698
+ eSize = ERROR(GENERIC);
699
+ DISPLAYLEVEL(1, "error : ZSTD_compressBegin_advanced failed \n");
700
+ goto _cleanup;
701
+ } }
722
702
 
723
703
  /* collect stats on all files */
724
704
  for (u=0; u<nbFiles; u++) {
725
705
  ZDICT_countEStats(esr, params,
726
- countLit, offcodeCount, matchLengthCount, litLengthCount, repOffset,
727
- (const char*)srcBuffer + pos, fileSizes[u]);
706
+ countLit, offcodeCount, matchLengthCount, litLengthCount, repOffset,
707
+ (const char*)srcBuffer + pos, fileSizes[u],
708
+ notificationLevel);
728
709
  pos += fileSizes[u];
729
710
  }
730
711
 
@@ -732,7 +713,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
732
713
  errorCode = HUF_buildCTable (hufTable, countLit, 255, huffLog);
733
714
  if (HUF_isError(errorCode)) {
734
715
  eSize = ERROR(GENERIC);
735
- DISPLAYLEVEL(1, "HUF_buildCTable error");
716
+ DISPLAYLEVEL(1, "HUF_buildCTable error \n");
736
717
  goto _cleanup;
737
718
  }
738
719
  huffLog = (U32)errorCode;
@@ -744,11 +725,11 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
744
725
  }
745
726
  /* note : the result of this phase should be used to better appreciate the impact on statistics */
746
727
 
747
- total=0; for (u=0; u<=OFFCODE_MAX; u++) total+=offcodeCount[u];
748
- errorCode = FSE_normalizeCount(offcodeNCount, Offlog, offcodeCount, total, OFFCODE_MAX);
728
+ total=0; for (u=0; u<=offcodeMax; u++) total+=offcodeCount[u];
729
+ errorCode = FSE_normalizeCount(offcodeNCount, Offlog, offcodeCount, total, offcodeMax);
749
730
  if (FSE_isError(errorCode)) {
750
731
  eSize = ERROR(GENERIC);
751
- DISPLAYLEVEL(1, "FSE_normalizeCount error with offcodeCount");
732
+ DISPLAYLEVEL(1, "FSE_normalizeCount error with offcodeCount \n");
752
733
  goto _cleanup;
753
734
  }
754
735
  Offlog = (U32)errorCode;
@@ -757,7 +738,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
757
738
  errorCode = FSE_normalizeCount(matchLengthNCount, mlLog, matchLengthCount, total, MaxML);
758
739
  if (FSE_isError(errorCode)) {
759
740
  eSize = ERROR(GENERIC);
760
- DISPLAYLEVEL(1, "FSE_normalizeCount error with matchLengthCount");
741
+ DISPLAYLEVEL(1, "FSE_normalizeCount error with matchLengthCount \n");
761
742
  goto _cleanup;
762
743
  }
763
744
  mlLog = (U32)errorCode;
@@ -766,17 +747,16 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
766
747
  errorCode = FSE_normalizeCount(litLengthNCount, llLog, litLengthCount, total, MaxLL);
767
748
  if (FSE_isError(errorCode)) {
768
749
  eSize = ERROR(GENERIC);
769
- DISPLAYLEVEL(1, "FSE_normalizeCount error with litLengthCount");
750
+ DISPLAYLEVEL(1, "FSE_normalizeCount error with litLengthCount \n");
770
751
  goto _cleanup;
771
752
  }
772
753
  llLog = (U32)errorCode;
773
754
 
774
-
775
755
  /* write result to buffer */
776
756
  { size_t const hhSize = HUF_writeCTable(dstPtr, maxDstSize, hufTable, 255, huffLog);
777
757
  if (HUF_isError(hhSize)) {
778
758
  eSize = ERROR(GENERIC);
779
- DISPLAYLEVEL(1, "HUF_writeCTable error");
759
+ DISPLAYLEVEL(1, "HUF_writeCTable error \n");
780
760
  goto _cleanup;
781
761
  }
782
762
  dstPtr += hhSize;
@@ -787,7 +767,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
787
767
  { size_t const ohSize = FSE_writeNCount(dstPtr, maxDstSize, offcodeNCount, OFFCODE_MAX, Offlog);
788
768
  if (FSE_isError(ohSize)) {
789
769
  eSize = ERROR(GENERIC);
790
- DISPLAYLEVEL(1, "FSE_writeNCount error with offcodeNCount");
770
+ DISPLAYLEVEL(1, "FSE_writeNCount error with offcodeNCount \n");
791
771
  goto _cleanup;
792
772
  }
793
773
  dstPtr += ohSize;
@@ -798,7 +778,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
798
778
  { size_t const mhSize = FSE_writeNCount(dstPtr, maxDstSize, matchLengthNCount, MaxML, mlLog);
799
779
  if (FSE_isError(mhSize)) {
800
780
  eSize = ERROR(GENERIC);
801
- DISPLAYLEVEL(1, "FSE_writeNCount error with matchLengthNCount");
781
+ DISPLAYLEVEL(1, "FSE_writeNCount error with matchLengthNCount \n");
802
782
  goto _cleanup;
803
783
  }
804
784
  dstPtr += mhSize;
@@ -809,7 +789,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
809
789
  { size_t const lhSize = FSE_writeNCount(dstPtr, maxDstSize, litLengthNCount, MaxLL, llLog);
810
790
  if (FSE_isError(lhSize)) {
811
791
  eSize = ERROR(GENERIC);
812
- DISPLAYLEVEL(1, "FSE_writeNCount error with litlengthNCount");
792
+ DISPLAYLEVEL(1, "FSE_writeNCount error with litlengthNCount \n");
813
793
  goto _cleanup;
814
794
  }
815
795
  dstPtr += lhSize;
@@ -819,7 +799,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
819
799
 
820
800
  if (maxDstSize<12) {
821
801
  eSize = ERROR(GENERIC);
822
- DISPLAYLEVEL(1, "not enough space to write RepOffsets");
802
+ DISPLAYLEVEL(1, "not enough space to write RepOffsets \n");
823
803
  goto _cleanup;
824
804
  }
825
805
  # if 0
@@ -833,7 +813,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
833
813
  MEM_writeLE32(dstPtr+4, repStartValue[1]);
834
814
  MEM_writeLE32(dstPtr+8, repStartValue[2]);
835
815
  #endif
836
- dstPtr += 12;
816
+ //dstPtr += 12;
837
817
  eSize += 12;
838
818
 
839
819
  _cleanup:
@@ -845,51 +825,13 @@ _cleanup:
845
825
  }
846
826
 
847
827
 
848
- #define DIB_FASTSEGMENTSIZE 64
849
- /*! ZDICT_fastSampling() (based on an idea proposed by Giuseppe Ottaviano) :
850
- Fill `dictBuffer` with stripes of size DIB_FASTSEGMENTSIZE from `samplesBuffer`,
851
- up to `dictSize`.
852
- Filling starts from the end of `dictBuffer`, down to maximum possible.
853
- if `dictSize` is not a multiply of DIB_FASTSEGMENTSIZE, some bytes at beginning of `dictBuffer` won't be used.
854
- @return : amount of data written into `dictBuffer`,
855
- or an error code
856
- */
857
- static size_t ZDICT_fastSampling(void* dictBuffer, size_t dictSize,
858
- const void* samplesBuffer, size_t samplesSize)
859
- {
860
- char* dstPtr = (char*)dictBuffer + dictSize;
861
- const char* srcPtr = (const char*)samplesBuffer;
862
- size_t const nbSegments = dictSize / DIB_FASTSEGMENTSIZE;
863
- size_t segNb, interSize;
864
-
865
- if (nbSegments <= 2) return ERROR(srcSize_wrong);
866
- if (samplesSize < dictSize) return ERROR(srcSize_wrong);
867
-
868
- /* first and last segments are part of dictionary, in case they contain interesting header/footer */
869
- dstPtr -= DIB_FASTSEGMENTSIZE;
870
- memcpy(dstPtr, srcPtr, DIB_FASTSEGMENTSIZE);
871
- dstPtr -= DIB_FASTSEGMENTSIZE;
872
- memcpy(dstPtr, srcPtr+samplesSize-DIB_FASTSEGMENTSIZE, DIB_FASTSEGMENTSIZE);
873
-
874
- /* regularly copy a segment */
875
- interSize = (samplesSize - nbSegments*DIB_FASTSEGMENTSIZE) / (nbSegments-1);
876
- srcPtr += DIB_FASTSEGMENTSIZE;
877
- for (segNb=2; segNb < nbSegments; segNb++) {
878
- srcPtr += interSize;
879
- dstPtr -= DIB_FASTSEGMENTSIZE;
880
- memcpy(dstPtr, srcPtr, DIB_FASTSEGMENTSIZE);
881
- srcPtr += DIB_FASTSEGMENTSIZE;
882
- }
883
-
884
- return nbSegments * DIB_FASTSEGMENTSIZE;
885
- }
886
-
887
828
  size_t ZDICT_addEntropyTablesFromBuffer_advanced(void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity,
888
829
  const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
889
830
  ZDICT_params_t params)
890
831
  {
891
832
  size_t hSize;
892
- unsigned const compressionLevel = (params.compressionLevel == 0) ? g_compressionLevel_default : params.compressionLevel;
833
+ int const compressionLevel = (params.compressionLevel <= 0) ? g_compressionLevel_default : params.compressionLevel;
834
+ U32 const notificationLevel = params.notificationLevel;
893
835
 
894
836
  /* dictionary header */
895
837
  MEM_writeLE32(dictBuffer, ZSTD_DICT_MAGIC);
@@ -903,10 +845,15 @@ size_t ZDICT_addEntropyTablesFromBuffer_advanced(void* dictBuffer, size_t dictCo
903
845
  /* entropy tables */
904
846
  DISPLAYLEVEL(2, "\r%70s\r", ""); /* clean display line */
905
847
  DISPLAYLEVEL(2, "statistics ... \n");
906
- hSize += ZDICT_analyzeEntropy((char*)dictBuffer+hSize, dictBufferCapacity-hSize,
848
+ { size_t const eSize = ZDICT_analyzeEntropy((char*)dictBuffer+hSize, dictBufferCapacity-hSize,
907
849
  compressionLevel,
908
850
  samplesBuffer, samplesSizes, nbSamples,
909
- (char*)dictBuffer + dictBufferCapacity - dictContentSize, dictContentSize);
851
+ (char*)dictBuffer + dictBufferCapacity - dictContentSize, dictContentSize,
852
+ notificationLevel);
853
+ if (ZDICT_isError(eSize)) return eSize;
854
+ hSize += eSize;
855
+ }
856
+
910
857
 
911
858
  if (hSize + dictContentSize < dictBufferCapacity)
912
859
  memmove((char*)dictBuffer + hSize, (char*)dictBuffer + dictBufferCapacity - dictContentSize, dictContentSize);
@@ -914,60 +861,86 @@ size_t ZDICT_addEntropyTablesFromBuffer_advanced(void* dictBuffer, size_t dictCo
914
861
  }
915
862
 
916
863
 
917
- #define DIB_MINSAMPLESSIZE (DIB_FASTSEGMENTSIZE*3)
918
864
  /*! ZDICT_trainFromBuffer_unsafe() :
919
- * `samplesBuffer` must be followed by noisy guard band.
920
- * @return : size of dictionary.
865
+ * Warning : `samplesBuffer` must be followed by noisy guard band.
866
+ * @return : size of dictionary, or an error code which can be tested with ZDICT_isError()
921
867
  */
922
868
  size_t ZDICT_trainFromBuffer_unsafe(
923
869
  void* dictBuffer, size_t maxDictSize,
924
870
  const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
925
871
  ZDICT_params_t params)
926
872
  {
927
- U32 const dictListSize = MAX( MAX(DICTLISTSIZE, nbSamples), (U32)(maxDictSize/16));
873
+ U32 const dictListSize = MAX(MAX(DICTLISTSIZE_DEFAULT, nbSamples), (U32)(maxDictSize/16));
928
874
  dictItem* const dictList = (dictItem*)malloc(dictListSize * sizeof(*dictList));
929
- unsigned selectivity = params.selectivityLevel;
875
+ unsigned const selectivity = params.selectivityLevel == 0 ? g_selectivity_default : params.selectivityLevel;
876
+ unsigned const minRep = (selectivity > 30) ? MINRATIO : nbSamples >> selectivity;
930
877
  size_t const targetDictSize = maxDictSize;
931
- size_t sBuffSize;
878
+ size_t const samplesBuffSize = ZDICT_totalSampleSize(samplesSizes, nbSamples);
932
879
  size_t dictSize = 0;
880
+ U32 const notificationLevel = params.notificationLevel;
933
881
 
934
882
  /* checks */
935
883
  if (!dictList) return ERROR(memory_allocation);
936
884
  if (maxDictSize <= g_provision_entropySize + g_min_fast_dictContent) { free(dictList); return ERROR(dstSize_tooSmall); }
885
+ if (samplesBuffSize < ZDICT_MIN_SAMPLES_SIZE) { free(dictList); return 0; } /* not enough source to create dictionary */
937
886
 
938
887
  /* init */
939
- { unsigned u; for (u=0, sBuffSize=0; u<nbSamples; u++) sBuffSize += samplesSizes[u]; }
940
- if (sBuffSize < DIB_MINSAMPLESSIZE) { free(dictList); return 0; } /* not enough source to create dictionary */
941
888
  ZDICT_initDictItem(dictList);
942
- g_displayLevel = params.notificationLevel;
943
- if (selectivity==0) selectivity = g_selectivity_default;
944
889
 
945
890
  /* build dictionary */
946
- if (selectivity>1) { /* selectivity == 1 => fast mode */
947
- ZDICT_trainBuffer(dictList, dictListSize,
948
- samplesBuffer, sBuffSize,
949
- samplesSizes, nbSamples,
950
- selectivity, (U32)targetDictSize);
951
-
952
- /* display best matches */
953
- if (g_displayLevel>= 3) {
954
- U32 const nb = 25;
955
- U32 const dictContentSize = ZDICT_dictSize(dictList);
956
- U32 u;
957
- DISPLAYLEVEL(3, "\n %u segments found, of total size %u \n", dictList[0].pos, dictContentSize);
958
- DISPLAYLEVEL(3, "list %u best segments \n", nb);
959
- for (u=1; u<=nb; u++) {
960
- U32 p = dictList[u].pos;
961
- U32 l = dictList[u].length;
962
- U32 d = MIN(40, l);
963
- DISPLAYLEVEL(3, "%3u:%3u bytes at pos %8u, savings %7u bytes |",
964
- u, l, p, dictList[u].savings);
965
- ZDICT_printHex(3, (const char*)samplesBuffer+p, d);
966
- DISPLAYLEVEL(3, "| \n");
967
- } } }
891
+ ZDICT_trainBuffer(dictList, dictListSize,
892
+ samplesBuffer, samplesBuffSize,
893
+ samplesSizes, nbSamples,
894
+ minRep, notificationLevel);
895
+
896
+ /* display best matches */
897
+ if (params.notificationLevel>= 3) {
898
+ U32 const nb = MIN(25, dictList[0].pos);
899
+ U32 const dictContentSize = ZDICT_dictSize(dictList);
900
+ U32 u;
901
+ DISPLAYLEVEL(3, "\n %u segments found, of total size %u \n", dictList[0].pos, dictContentSize);
902
+ DISPLAYLEVEL(3, "list %u best segments \n", nb);
903
+ for (u=1; u<=nb; u++) {
904
+ U32 pos = dictList[u].pos;
905
+ U32 length = dictList[u].length;
906
+ U32 printedLength = MIN(40, length);
907
+ DISPLAYLEVEL(3, "%3u:%3u bytes at pos %8u, savings %7u bytes |",
908
+ u, length, pos, dictList[u].savings);
909
+ ZDICT_printHex((const char*)samplesBuffer+pos, printedLength);
910
+ DISPLAYLEVEL(3, "| \n");
911
+ } }
912
+
968
913
 
969
914
  /* create dictionary */
970
915
  { U32 dictContentSize = ZDICT_dictSize(dictList);
916
+ if (dictContentSize < targetDictSize/3) {
917
+ DISPLAYLEVEL(2, "! warning : selected content significantly smaller than requested (%u < %u) \n", dictContentSize, (U32)maxDictSize);
918
+ if (minRep > MINRATIO) {
919
+ DISPLAYLEVEL(2, "! consider increasing selectivity to produce larger dictionary (-s%u) \n", selectivity+1);
920
+ DISPLAYLEVEL(2, "! note : larger dictionaries are not necessarily better, test its efficiency on samples \n");
921
+ }
922
+ if (samplesBuffSize < 10 * targetDictSize)
923
+ DISPLAYLEVEL(2, "! consider increasing the number of samples (total size : %u MB)\n", (U32)(samplesBuffSize>>20));
924
+ }
925
+
926
+ if ((dictContentSize > targetDictSize*3) && (nbSamples > 2*MINRATIO) && (selectivity>1)) {
927
+ U32 proposedSelectivity = selectivity-1;
928
+ while ((nbSamples >> proposedSelectivity) <= MINRATIO) { proposedSelectivity--; }
929
+ DISPLAYLEVEL(2, "! note : calculated dictionary significantly larger than requested (%u > %u) \n", dictContentSize, (U32)maxDictSize);
930
+ DISPLAYLEVEL(2, "! consider increasing dictionary size, or produce denser dictionary (-s%u) \n", proposedSelectivity);
931
+ DISPLAYLEVEL(2, "! always test dictionary efficiency on samples \n");
932
+ }
933
+
934
+ /* limit dictionary size */
935
+ { U32 const max = dictList->pos; /* convention : nb of useful elts within dictList */
936
+ U32 currentSize = 0;
937
+ U32 n; for (n=1; n<max; n++) {
938
+ currentSize += dictList[n].length;
939
+ if (currentSize > targetDictSize) { currentSize -= dictList[n].length; break; }
940
+ }
941
+ dictList->pos = n;
942
+ dictContentSize = currentSize;
943
+ }
971
944
 
972
945
  /* build dict content */
973
946
  { U32 u;
@@ -979,14 +952,6 @@ size_t ZDICT_trainFromBuffer_unsafe(
979
952
  memcpy(ptr, (const char*)samplesBuffer+dictList[u].pos, l);
980
953
  } }
981
954
 
982
- /* fast mode dict content */
983
- if (selectivity==1) { /* note could also be used to complete a dictionary, but not necessarily better */
984
- DISPLAYLEVEL(3, "\r%70s\r", ""); /* clean display line */
985
- DISPLAYLEVEL(3, "Adding %u KB with fast sampling \n", (U32)(targetDictSize>>10));
986
- dictContentSize = (U32)ZDICT_fastSampling(dictBuffer, targetDictSize,
987
- samplesBuffer, sBuffSize);
988
- }
989
-
990
955
  dictSize = ZDICT_addEntropyTablesFromBuffer_advanced(dictBuffer, dictContentSize, maxDictSize,
991
956
  samplesBuffer, samplesSizes, nbSamples,
992
957
  params);
@@ -1004,23 +969,23 @@ size_t ZDICT_trainFromBuffer_advanced(void* dictBuffer, size_t dictBufferCapacit
1004
969
  const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
1005
970
  ZDICT_params_t params)
1006
971
  {
972
+ size_t result;
1007
973
  void* newBuff;
1008
- size_t sBuffSize;
974
+ size_t const sBuffSize = ZDICT_totalSampleSize(samplesSizes, nbSamples);
975
+ if (sBuffSize < ZDICT_MIN_SAMPLES_SIZE) return 0; /* not enough content => no dictionary */
1009
976
 
1010
- { unsigned u; for (u=0, sBuffSize=0; u<nbSamples; u++) sBuffSize += samplesSizes[u]; }
1011
- if (sBuffSize==0) return 0; /* empty content => no dictionary */
1012
977
  newBuff = malloc(sBuffSize + NOISELENGTH);
1013
978
  if (!newBuff) return ERROR(memory_allocation);
1014
979
 
1015
980
  memcpy(newBuff, samplesBuffer, sBuffSize);
1016
981
  ZDICT_fillNoise((char*)newBuff + sBuffSize, NOISELENGTH); /* guard band, for end of buffer condition */
1017
982
 
1018
- { size_t const result = ZDICT_trainFromBuffer_unsafe(
983
+ result = ZDICT_trainFromBuffer_unsafe(
1019
984
  dictBuffer, dictBufferCapacity,
1020
985
  newBuff, samplesSizes, nbSamples,
1021
986
  params);
1022
- free(newBuff);
1023
- return result; }
987
+ free(newBuff);
988
+ return result;
1024
989
  }
1025
990
 
1026
991