extzstd 0.0.3.CONCEPT-x86-mingw32 → 0.1-x86-mingw32

Sign up to get free protection for your applications and to get access to all the features.
Files changed (69) hide show
  1. checksums.yaml +4 -4
  2. data/HISTORY.ja +5 -0
  3. data/LICENSE +6 -6
  4. data/README.md +35 -22
  5. data/contrib/zstd/LICENSE +13 -9
  6. data/contrib/zstd/README.md +37 -44
  7. data/contrib/zstd/common/entropy_common.c +33 -39
  8. data/contrib/zstd/common/error_private.c +43 -0
  9. data/contrib/zstd/common/error_private.h +11 -60
  10. data/contrib/zstd/common/fse.h +11 -5
  11. data/contrib/zstd/common/fse_decompress.c +14 -16
  12. data/contrib/zstd/common/huf.h +1 -1
  13. data/contrib/zstd/common/mem.h +36 -43
  14. data/contrib/zstd/common/xxhash.c +31 -18
  15. data/contrib/zstd/common/xxhash.h +71 -35
  16. data/contrib/zstd/common/zbuff.h +29 -35
  17. data/contrib/zstd/common/zstd_common.c +24 -32
  18. data/contrib/zstd/common/zstd_errors.h +60 -0
  19. data/contrib/zstd/common/zstd_internal.h +109 -80
  20. data/contrib/zstd/compress/fse_compress.c +9 -6
  21. data/contrib/zstd/compress/huf_compress.c +30 -74
  22. data/contrib/zstd/compress/zbuff_compress.c +43 -51
  23. data/contrib/zstd/compress/zstd_compress.c +953 -763
  24. data/contrib/zstd/compress/zstd_opt.h +115 -261
  25. data/contrib/zstd/decompress/huf_decompress.c +29 -40
  26. data/contrib/zstd/decompress/zbuff_decompress.c +36 -78
  27. data/contrib/zstd/decompress/zstd_decompress.c +976 -496
  28. data/contrib/zstd/dictBuilder/divsufsort.h +5 -5
  29. data/contrib/zstd/dictBuilder/zdict.c +194 -229
  30. data/contrib/zstd/dictBuilder/zdict.h +66 -68
  31. data/contrib/zstd/legacy/zstd_legacy.h +168 -49
  32. data/contrib/zstd/legacy/zstd_v01.c +95 -178
  33. data/contrib/zstd/legacy/zstd_v01.h +12 -32
  34. data/contrib/zstd/legacy/zstd_v02.c +48 -274
  35. data/contrib/zstd/legacy/zstd_v02.h +12 -32
  36. data/contrib/zstd/legacy/zstd_v03.c +48 -274
  37. data/contrib/zstd/legacy/zstd_v03.h +12 -32
  38. data/contrib/zstd/legacy/zstd_v04.c +63 -320
  39. data/contrib/zstd/legacy/zstd_v04.h +13 -33
  40. data/contrib/zstd/legacy/zstd_v05.c +80 -345
  41. data/contrib/zstd/legacy/zstd_v05.h +9 -31
  42. data/contrib/zstd/legacy/zstd_v06.c +48 -458
  43. data/contrib/zstd/legacy/zstd_v06.h +41 -67
  44. data/contrib/zstd/legacy/zstd_v07.c +4544 -0
  45. data/contrib/zstd/legacy/zstd_v07.h +173 -0
  46. data/contrib/zstd/zstd.h +640 -0
  47. data/ext/extconf.rb +7 -3
  48. data/ext/extzstd.c +263 -106
  49. data/ext/extzstd.h +8 -6
  50. data/ext/extzstd_nogvls.h +0 -117
  51. data/ext/extzstd_stream.c +347 -0
  52. data/ext/zstd_common.c +8 -0
  53. data/ext/zstd_compress.c +6 -0
  54. data/ext/zstd_decompress.c +5 -0
  55. data/ext/zstd_dictbuilder.c +5 -0
  56. data/ext/zstd_legacy_v07.c +1 -0
  57. data/gemstub.rb +18 -16
  58. data/lib/2.1/extzstd.so +0 -0
  59. data/lib/2.2/extzstd.so +0 -0
  60. data/lib/2.3/extzstd.so +0 -0
  61. data/lib/extzstd/version.rb +1 -1
  62. data/lib/extzstd.rb +77 -43
  63. data/test/test_basic.rb +11 -6
  64. metadata +23 -11
  65. data/contrib/zstd/common/error_public.h +0 -77
  66. data/contrib/zstd/common/zstd.h +0 -475
  67. data/ext/extzstd_buffered.c +0 -265
  68. data/ext/zstd_amalgam.c +0 -18
  69. data/lib/2.0/extzstd.so +0 -0
@@ -1,40 +1,18 @@
1
- /*
2
- dictBuilder - dictionary builder for zstd
3
- Copyright (C) Yann Collet 2016
4
-
5
- BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6
-
7
- Redistribution and use in source and binary forms, with or without
8
- modification, are permitted provided that the following conditions are
9
- met:
10
-
11
- * Redistributions of source code must retain the above copyright
12
- notice, this list of conditions and the following disclaimer.
13
- * Redistributions in binary form must reproduce the above
14
- copyright notice, this list of conditions and the following disclaimer
15
- in the documentation and/or other materials provided with the
16
- distribution.
17
-
18
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21
- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22
- OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23
- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24
- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25
- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26
- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28
- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
-
30
- You can contact the author at :
31
- - Zstd homepage : https://www.zstd.net
32
- */
1
+ /**
2
+ * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
3
+ * All rights reserved.
4
+ *
5
+ * This source code is licensed under the BSD-style license found in the
6
+ * LICENSE file in the root directory of this source tree. An additional grant
7
+ * of patent rights can be found in the PATENTS file in the same directory.
8
+ */
9
+
33
10
 
34
11
  /*-**************************************
35
12
  * Tuning parameters
36
13
  ****************************************/
37
14
  #define ZDICT_MAX_SAMPLES_SIZE (2000U << 20)
15
+ #define ZDICT_MIN_SAMPLES_SIZE 512
38
16
 
39
17
 
40
18
  /*-**************************************
@@ -78,14 +56,12 @@
78
56
  #define MB *(1 <<20)
79
57
  #define GB *(1U<<30)
80
58
 
81
- #define DICTLISTSIZE 10000
59
+ #define DICTLISTSIZE_DEFAULT 10000
82
60
 
83
61
  #define NOISELENGTH 32
84
- #define PRIME1 2654435761U
85
- #define PRIME2 2246822519U
86
62
 
87
63
  #define MINRATIO 4
88
- static const U32 g_compressionLevel_default = 5;
64
+ static const int g_compressionLevel_default = 5;
89
65
  static const U32 g_selectivity_default = 9;
90
66
  static const size_t g_provision_entropySize = 200;
91
67
  static const size_t g_min_fast_dictContent = 192;
@@ -95,26 +71,18 @@ static const size_t g_min_fast_dictContent = 192;
95
71
  * Console display
96
72
  ***************************************/
97
73
  #define DISPLAY(...) { fprintf(stderr, __VA_ARGS__); fflush( stderr ); }
98
- #define DISPLAYLEVEL(l, ...) if (g_displayLevel>=l) { DISPLAY(__VA_ARGS__); }
99
- static unsigned g_displayLevel = 0; /* 0 : no display; 1: errors; 2: default; 4: full information */
100
-
101
- #define DISPLAYUPDATE(l, ...) if (g_displayLevel>=l) { \
102
- if (ZDICT_clockSpan(g_time) > refreshRate) \
103
- { g_time = clock(); DISPLAY(__VA_ARGS__); \
104
- if (g_displayLevel>=4) fflush(stdout); } }
105
- static const clock_t refreshRate = CLOCKS_PER_SEC * 3 / 10;
106
- static clock_t g_time = 0;
74
+ #define DISPLAYLEVEL(l, ...) if (notificationLevel>=l) { DISPLAY(__VA_ARGS__); } /* 0 : no display; 1: errors; 2: default; 3: details; 4: debug */
107
75
 
108
76
  static clock_t ZDICT_clockSpan(clock_t nPrevious) { return clock() - nPrevious; }
109
77
 
110
- static void ZDICT_printHex(U32 dlevel, const void* ptr, size_t length)
78
+ static void ZDICT_printHex(const void* ptr, size_t length)
111
79
  {
112
80
  const BYTE* const b = (const BYTE*)ptr;
113
81
  size_t u;
114
82
  for (u=0; u<length; u++) {
115
83
  BYTE c = b[u];
116
84
  if (c<32 || c>126) c = '.'; /* non-printable char */
117
- DISPLAYLEVEL(dlevel, "%c", c);
85
+ DISPLAY("%c", c);
118
86
  }
119
87
  }
120
88
 
@@ -126,6 +94,13 @@ unsigned ZDICT_isError(size_t errorCode) { return ERR_isError(errorCode); }
126
94
 
127
95
  const char* ZDICT_getErrorName(size_t errorCode) { return ERR_getErrorName(errorCode); }
128
96
 
97
+ unsigned ZDICT_getDictID(const void* dictBuffer, size_t dictSize)
98
+ {
99
+ if (dictSize < 8) return 0;
100
+ if (MEM_readLE32(dictBuffer) != ZSTD_DICT_MAGIC) return 0;
101
+ return MEM_readLE32((const char*)dictBuffer + 4);
102
+ }
103
+
129
104
 
130
105
  /*-********************************************************
131
106
  * Dictionary training functions
@@ -228,7 +203,7 @@ static void ZDICT_initDictItem(dictItem* d)
228
203
  static dictItem ZDICT_analyzePos(
229
204
  BYTE* doneMarks,
230
205
  const int* suffix, U32 start,
231
- const void* buffer, U32 minRatio)
206
+ const void* buffer, U32 minRatio, U32 notificationLevel)
232
207
  {
233
208
  U32 lengthList[LLIMIT] = {0};
234
209
  U32 cumulLength[LLIMIT] = {0};
@@ -332,12 +307,13 @@ static dictItem ZDICT_analyzePos(
332
307
  } while (length >=MINMATCHLENGTH);
333
308
 
334
309
  /* look backward */
335
- do {
336
- length = ZDICT_count(b + pos, b + suffix[start-1]);
337
- if (length >= LLIMIT) length = LLIMIT-1;
338
- lengthList[length]++;
339
- if (length >=MINMATCHLENGTH) start--;
340
- } while(length >= MINMATCHLENGTH);
310
+ length = MINMATCHLENGTH;
311
+ while ((length >= MINMATCHLENGTH) & (start > 0)) {
312
+ length = ZDICT_count(b + pos, b + suffix[start - 1]);
313
+ if (length >= LLIMIT) length = LLIMIT - 1;
314
+ lengthList[length]++;
315
+ if (length >= MINMATCHLENGTH) start--;
316
+ }
341
317
 
342
318
  /* largest useful length */
343
319
  memset(cumulLength, 0, sizeof(cumulLength));
@@ -395,21 +371,22 @@ static dictItem ZDICT_analyzePos(
395
371
  static U32 ZDICT_checkMerge(dictItem* table, dictItem elt, U32 eltNbToSkip)
396
372
  {
397
373
  const U32 tableSize = table->pos;
398
- const U32 max = elt.pos + (elt.length-1);
374
+ const U32 eltEnd = elt.pos + elt.length;
399
375
 
400
376
  /* tail overlap */
401
377
  U32 u; for (u=1; u<tableSize; u++) {
402
378
  if (u==eltNbToSkip) continue;
403
- if ((table[u].pos > elt.pos) && (table[u].pos < max)) { /* overlap */
379
+ if ((table[u].pos > elt.pos) && (table[u].pos <= eltEnd)) { /* overlap, existing > new */
404
380
  /* append */
405
381
  U32 addedLength = table[u].pos - elt.pos;
406
382
  table[u].length += addedLength;
407
383
  table[u].pos = elt.pos;
408
384
  table[u].savings += elt.savings * addedLength / elt.length; /* rough approx */
409
- table[u].savings += elt.length / 8; /* rough approx */
385
+ table[u].savings += elt.length / 8; /* rough approx bonus */
410
386
  elt = table[u];
387
+ /* sort : improve rank */
411
388
  while ((u>1) && (table[u-1].savings < elt.savings))
412
- table[u] = table[u-1], u--;
389
+ table[u] = table[u-1], u--;
413
390
  table[u] = elt;
414
391
  return u;
415
392
  } }
@@ -417,14 +394,15 @@ static U32 ZDICT_checkMerge(dictItem* table, dictItem elt, U32 eltNbToSkip)
417
394
  /* front overlap */
418
395
  for (u=1; u<tableSize; u++) {
419
396
  if (u==eltNbToSkip) continue;
420
- if ((table[u].pos + table[u].length > elt.pos) && (table[u].pos < elt.pos)) { /* overlap */
397
+ if ((table[u].pos + table[u].length >= elt.pos) && (table[u].pos < elt.pos)) { /* overlap, existing < new */
421
398
  /* append */
422
- int addedLength = (elt.pos + elt.length) - (table[u].pos + table[u].length);
423
- table[u].savings += elt.length / 8; /* rough approx */
424
- if (addedLength > 0) { /* otherwise, already included */
399
+ int addedLength = (int)eltEnd - (table[u].pos + table[u].length);
400
+ table[u].savings += elt.length / 8; /* rough approx bonus */
401
+ if (addedLength > 0) { /* otherwise, elt fully included into existing */
425
402
  table[u].length += addedLength;
426
403
  table[u].savings += elt.savings * addedLength / elt.length; /* rough approx */
427
404
  }
405
+ /* sort : improve rank */
428
406
  elt = table[u];
429
407
  while ((u>1) && (table[u-1].savings < elt.savings))
430
408
  table[u] = table[u-1], u--;
@@ -489,15 +467,21 @@ static U32 ZDICT_dictSize(const dictItem* dictList)
489
467
  static size_t ZDICT_trainBuffer(dictItem* dictList, U32 dictListSize,
490
468
  const void* const buffer, size_t bufferSize, /* buffer must end with noisy guard band */
491
469
  const size_t* fileSizes, unsigned nbFiles,
492
- U32 shiftRatio, unsigned maxDictSize)
470
+ U32 minRatio, U32 notificationLevel)
493
471
  {
494
472
  int* const suffix0 = (int*)malloc((bufferSize+2)*sizeof(*suffix0));
495
473
  int* const suffix = suffix0+1;
496
474
  U32* reverseSuffix = (U32*)malloc((bufferSize)*sizeof(*reverseSuffix));
497
475
  BYTE* doneMarks = (BYTE*)malloc((bufferSize+16)*sizeof(*doneMarks)); /* +16 for overflow security */
498
476
  U32* filePos = (U32*)malloc(nbFiles * sizeof(*filePos));
499
- U32 minRatio = nbFiles >> shiftRatio;
500
477
  size_t result = 0;
478
+ clock_t displayClock = 0;
479
+ clock_t const refreshRate = CLOCKS_PER_SEC * 3 / 10;
480
+
481
+ # define DISPLAYUPDATE(l, ...) if (notificationLevel>=l) { \
482
+ if (ZDICT_clockSpan(displayClock) > refreshRate) \
483
+ { displayClock = clock(); DISPLAY(__VA_ARGS__); \
484
+ if (notificationLevel>=4) fflush(stdout); } }
501
485
 
502
486
  /* init */
503
487
  DISPLAYLEVEL(2, "\r%70s\r", ""); /* clean display line */
@@ -523,7 +507,8 @@ static size_t ZDICT_trainBuffer(dictItem* dictList, U32 dictListSize,
523
507
  { size_t pos;
524
508
  for (pos=0; pos < bufferSize; pos++)
525
509
  reverseSuffix[suffix[pos]] = (U32)pos;
526
- /* build file pos */
510
+ /* note filePos tracks borders between samples.
511
+ It's not used at this stage, but planned to become useful in a later update */
527
512
  filePos[0] = 0;
528
513
  for (pos=1; pos<nbFiles; pos++)
529
514
  filePos[pos] = (U32)(filePos[pos-1] + fileSizes[pos-1]);
@@ -535,23 +520,13 @@ static size_t ZDICT_trainBuffer(dictItem* dictList, U32 dictListSize,
535
520
  { U32 cursor; for (cursor=0; cursor < bufferSize; ) {
536
521
  dictItem solution;
537
522
  if (doneMarks[cursor]) { cursor++; continue; }
538
- solution = ZDICT_analyzePos(doneMarks, suffix, reverseSuffix[cursor], buffer, minRatio);
523
+ solution = ZDICT_analyzePos(doneMarks, suffix, reverseSuffix[cursor], buffer, minRatio, notificationLevel);
539
524
  if (solution.length==0) { cursor++; continue; }
540
525
  ZDICT_insertDictItem(dictList, dictListSize, solution);
541
526
  cursor += solution.length;
542
527
  DISPLAYUPDATE(2, "\r%4.2f %% \r", (double)cursor / bufferSize * 100);
543
528
  } }
544
529
 
545
- /* limit dictionary size */
546
- { U32 const max = dictList->pos; /* convention : nb of useful elts within dictList */
547
- U32 currentSize = 0;
548
- U32 n; for (n=1; n<max; n++) {
549
- currentSize += dictList[n].length;
550
- if (currentSize > maxDictSize) break;
551
- }
552
- dictList->pos = n;
553
- }
554
-
555
530
  _cleanup:
556
531
  free(suffix0);
557
532
  free(reverseSuffix);
@@ -563,10 +538,12 @@ _cleanup:
563
538
 
564
539
  static void ZDICT_fillNoise(void* buffer, size_t length)
565
540
  {
566
- unsigned acc = PRIME1;
541
+ unsigned const prime1 = 2654435761U;
542
+ unsigned const prime2 = 2246822519U;
543
+ unsigned acc = prime1;
567
544
  size_t p=0;;
568
545
  for (p=0; p<length; p++) {
569
- acc *= PRIME2;
546
+ acc *= prime2;
570
547
  ((unsigned char*)buffer)[p] = (unsigned char)(acc >> 21);
571
548
  }
572
549
  }
@@ -576,23 +553,23 @@ typedef struct
576
553
  {
577
554
  ZSTD_CCtx* ref;
578
555
  ZSTD_CCtx* zc;
579
- void* workPlace; /* must be ZSTD_BLOCKSIZE_MAX allocated */
556
+ void* workPlace; /* must be ZSTD_BLOCKSIZE_ABSOLUTEMAX allocated */
580
557
  } EStats_ress_t;
581
558
 
582
559
  #define MAXREPOFFSET 1024
583
560
 
584
561
  static void ZDICT_countEStats(EStats_ress_t esr, ZSTD_parameters params,
585
562
  U32* countLit, U32* offsetcodeCount, U32* matchlengthCount, U32* litlengthCount, U32* repOffsets,
586
- const void* src, size_t srcSize)
563
+ const void* src, size_t srcSize, U32 notificationLevel)
587
564
  {
588
- size_t const blockSizeMax = MIN (ZSTD_BLOCKSIZE_MAX, 1 << params.cParams.windowLog);
565
+ size_t const blockSizeMax = MIN (ZSTD_BLOCKSIZE_ABSOLUTEMAX, 1 << params.cParams.windowLog);
589
566
  size_t cSize;
590
567
 
591
568
  if (srcSize > blockSizeMax) srcSize = blockSizeMax; /* protection vs large samples */
592
- { size_t const errorCode = ZSTD_copyCCtx(esr.zc, esr.ref);
593
- if (ZSTD_isError(errorCode)) { DISPLAYLEVEL(1, "warning : ZSTD_copyCCtx failed \n"); return; }
594
- }
595
- cSize = ZSTD_compressBlock(esr.zc, esr.workPlace, ZSTD_BLOCKSIZE_MAX, src, srcSize);
569
+ { size_t const errorCode = ZSTD_copyCCtx(esr.zc, esr.ref, 0);
570
+ if (ZSTD_isError(errorCode)) { DISPLAYLEVEL(1, "warning : ZSTD_copyCCtx failed \n"); return; }
571
+ }
572
+ cSize = ZSTD_compressBlock(esr.zc, esr.workPlace, ZSTD_BLOCKSIZE_ABSOLUTEMAX, src, srcSize);
596
573
  if (ZSTD_isError(cSize)) { DISPLAYLEVEL(1, "warning : could not compress sample size %u \n", (U32)srcSize); return; }
597
574
 
598
575
  if (cSize) { /* if == 0; block is not compressible */
@@ -605,34 +582,33 @@ static void ZDICT_countEStats(EStats_ress_t esr, ZSTD_parameters params,
605
582
  }
606
583
 
607
584
  /* seqStats */
608
- { size_t const nbSeq = (size_t)(seqStorePtr->offset - seqStorePtr->offsetStart);
609
- ZSTD_seqToCodes(seqStorePtr, nbSeq);
585
+ { U32 const nbSeq = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
586
+ ZSTD_seqToCodes(seqStorePtr);
610
587
 
611
- { const BYTE* codePtr = seqStorePtr->offCodeStart;
612
- size_t u;
588
+ { const BYTE* codePtr = seqStorePtr->ofCode;
589
+ U32 u;
613
590
  for (u=0; u<nbSeq; u++) offsetcodeCount[codePtr[u]]++;
614
591
  }
615
592
 
616
- { const BYTE* codePtr = seqStorePtr->mlCodeStart;
617
- size_t u;
593
+ { const BYTE* codePtr = seqStorePtr->mlCode;
594
+ U32 u;
618
595
  for (u=0; u<nbSeq; u++) matchlengthCount[codePtr[u]]++;
619
596
  }
620
597
 
621
- { const BYTE* codePtr = seqStorePtr->llCodeStart;
622
- size_t u;
598
+ { const BYTE* codePtr = seqStorePtr->llCode;
599
+ U32 u;
623
600
  for (u=0; u<nbSeq; u++) litlengthCount[codePtr[u]]++;
624
- } }
601
+ }
625
602
 
626
- /* rep offsets */
627
- { const U32* const offsetPtr = seqStorePtr->offsetStart;
628
- U32 offset1 = offsetPtr[0] - 3;
629
- U32 offset2 = offsetPtr[1] - 3;
630
- if (offset1 >= MAXREPOFFSET) offset1 = 0;
631
- if (offset2 >= MAXREPOFFSET) offset2 = 0;
632
- repOffsets[offset1] += 3;
633
- repOffsets[offset2] += 1;
634
- }
635
- }
603
+ if (nbSeq >= 2) { /* rep offsets */
604
+ const seqDef* const seq = seqStorePtr->sequencesStart;
605
+ U32 offset1 = seq[0].offset - 3;
606
+ U32 offset2 = seq[1].offset - 3;
607
+ if (offset1 >= MAXREPOFFSET) offset1 = 0;
608
+ if (offset2 >= MAXREPOFFSET) offset2 = 0;
609
+ repOffsets[offset1] += 3;
610
+ repOffsets[offset2] += 1;
611
+ } } }
636
612
  }
637
613
 
638
614
  /*
@@ -671,60 +647,65 @@ static void ZDICT_insertSortCount(offsetCount_t table[ZSTD_REP_NUM+1], U32 val,
671
647
  }
672
648
 
673
649
 
674
- #define OFFCODE_MAX 18 /* only applicable to first block */
650
+ #define OFFCODE_MAX 30 /* only applicable to first block */
675
651
  static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
676
- unsigned compressionLevel,
677
- const void* srcBuffer, const size_t* fileSizes, unsigned nbFiles,
678
- const void* dictBuffer, size_t dictBufferSize)
652
+ unsigned compressionLevel,
653
+ const void* srcBuffer, const size_t* fileSizes, unsigned nbFiles,
654
+ const void* dictBuffer, size_t dictBufferSize,
655
+ unsigned notificationLevel)
679
656
  {
680
657
  U32 countLit[256];
681
658
  HUF_CREATE_STATIC_CTABLE(hufTable, 255);
682
659
  U32 offcodeCount[OFFCODE_MAX+1];
683
660
  short offcodeNCount[OFFCODE_MAX+1];
661
+ U32 offcodeMax = ZSTD_highbit32((U32)(dictBufferSize + 128 KB));
684
662
  U32 matchLengthCount[MaxML+1];
685
663
  short matchLengthNCount[MaxML+1];
686
664
  U32 litLengthCount[MaxLL+1];
687
665
  short litLengthNCount[MaxLL+1];
688
- U32 repOffset[MAXREPOFFSET] = { 0 };
666
+ U32 repOffset[MAXREPOFFSET];
689
667
  offsetCount_t bestRepOffset[ZSTD_REP_NUM+1];
690
668
  EStats_ress_t esr;
691
669
  ZSTD_parameters params;
692
- U32 u, huffLog = 12, Offlog = OffFSELog, mlLog = MLFSELog, llLog = LLFSELog, total;
670
+ U32 u, huffLog = 11, Offlog = OffFSELog, mlLog = MLFSELog, llLog = LLFSELog, total;
693
671
  size_t pos = 0, errorCode;
694
672
  size_t eSize = 0;
695
673
  size_t const totalSrcSize = ZDICT_totalSampleSize(fileSizes, nbFiles);
696
- size_t const averageSampleSize = totalSrcSize / nbFiles;
674
+ size_t const averageSampleSize = totalSrcSize / (nbFiles + !nbFiles);
697
675
  BYTE* dstPtr = (BYTE*)dstBuffer;
698
676
 
699
677
  /* init */
678
+ esr.ref = ZSTD_createCCtx();
679
+ esr.zc = ZSTD_createCCtx();
680
+ esr.workPlace = malloc(ZSTD_BLOCKSIZE_ABSOLUTEMAX);
681
+ if (!esr.ref || !esr.zc || !esr.workPlace) {
682
+ eSize = ERROR(memory_allocation);
683
+ DISPLAYLEVEL(1, "Not enough memory \n");
684
+ goto _cleanup;
685
+ }
686
+ if (offcodeMax>OFFCODE_MAX) { eSize = ERROR(dictionary_wrong); goto _cleanup; } /* too large dictionary */
700
687
  for (u=0; u<256; u++) countLit[u]=1; /* any character must be described */
701
- for (u=0; u<=OFFCODE_MAX; u++) offcodeCount[u]=1;
688
+ for (u=0; u<=offcodeMax; u++) offcodeCount[u]=1;
702
689
  for (u=0; u<=MaxML; u++) matchLengthCount[u]=1;
703
690
  for (u=0; u<=MaxLL; u++) litLengthCount[u]=1;
691
+ memset(repOffset, 0, sizeof(repOffset));
704
692
  repOffset[1] = repOffset[4] = repOffset[8] = 1;
705
693
  memset(bestRepOffset, 0, sizeof(bestRepOffset));
706
- esr.ref = ZSTD_createCCtx();
707
- esr.zc = ZSTD_createCCtx();
708
- esr.workPlace = malloc(ZSTD_BLOCKSIZE_MAX);
709
- if (!esr.ref || !esr.zc || !esr.workPlace) {
710
- eSize = ERROR(memory_allocation);
711
- DISPLAYLEVEL(1, "Not enough memory");
712
- goto _cleanup;
713
- }
714
694
  if (compressionLevel==0) compressionLevel=g_compressionLevel_default;
715
695
  params = ZSTD_getParams(compressionLevel, averageSampleSize, dictBufferSize);
716
- { size_t const beginResult = ZSTD_compressBegin_advanced(esr.ref, dictBuffer, dictBufferSize, params, 0);
717
- if (ZSTD_isError(beginResult)) {
718
- eSize = ERROR(GENERIC);
719
- DISPLAYLEVEL(1, "error : ZSTD_compressBegin_advanced failed ");
720
- goto _cleanup;
721
- } }
696
+ { size_t const beginResult = ZSTD_compressBegin_advanced(esr.ref, dictBuffer, dictBufferSize, params, 0);
697
+ if (ZSTD_isError(beginResult)) {
698
+ eSize = ERROR(GENERIC);
699
+ DISPLAYLEVEL(1, "error : ZSTD_compressBegin_advanced failed \n");
700
+ goto _cleanup;
701
+ } }
722
702
 
723
703
  /* collect stats on all files */
724
704
  for (u=0; u<nbFiles; u++) {
725
705
  ZDICT_countEStats(esr, params,
726
- countLit, offcodeCount, matchLengthCount, litLengthCount, repOffset,
727
- (const char*)srcBuffer + pos, fileSizes[u]);
706
+ countLit, offcodeCount, matchLengthCount, litLengthCount, repOffset,
707
+ (const char*)srcBuffer + pos, fileSizes[u],
708
+ notificationLevel);
728
709
  pos += fileSizes[u];
729
710
  }
730
711
 
@@ -732,7 +713,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
732
713
  errorCode = HUF_buildCTable (hufTable, countLit, 255, huffLog);
733
714
  if (HUF_isError(errorCode)) {
734
715
  eSize = ERROR(GENERIC);
735
- DISPLAYLEVEL(1, "HUF_buildCTable error");
716
+ DISPLAYLEVEL(1, "HUF_buildCTable error \n");
736
717
  goto _cleanup;
737
718
  }
738
719
  huffLog = (U32)errorCode;
@@ -744,11 +725,11 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
744
725
  }
745
726
  /* note : the result of this phase should be used to better appreciate the impact on statistics */
746
727
 
747
- total=0; for (u=0; u<=OFFCODE_MAX; u++) total+=offcodeCount[u];
748
- errorCode = FSE_normalizeCount(offcodeNCount, Offlog, offcodeCount, total, OFFCODE_MAX);
728
+ total=0; for (u=0; u<=offcodeMax; u++) total+=offcodeCount[u];
729
+ errorCode = FSE_normalizeCount(offcodeNCount, Offlog, offcodeCount, total, offcodeMax);
749
730
  if (FSE_isError(errorCode)) {
750
731
  eSize = ERROR(GENERIC);
751
- DISPLAYLEVEL(1, "FSE_normalizeCount error with offcodeCount");
732
+ DISPLAYLEVEL(1, "FSE_normalizeCount error with offcodeCount \n");
752
733
  goto _cleanup;
753
734
  }
754
735
  Offlog = (U32)errorCode;
@@ -757,7 +738,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
757
738
  errorCode = FSE_normalizeCount(matchLengthNCount, mlLog, matchLengthCount, total, MaxML);
758
739
  if (FSE_isError(errorCode)) {
759
740
  eSize = ERROR(GENERIC);
760
- DISPLAYLEVEL(1, "FSE_normalizeCount error with matchLengthCount");
741
+ DISPLAYLEVEL(1, "FSE_normalizeCount error with matchLengthCount \n");
761
742
  goto _cleanup;
762
743
  }
763
744
  mlLog = (U32)errorCode;
@@ -766,17 +747,16 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
766
747
  errorCode = FSE_normalizeCount(litLengthNCount, llLog, litLengthCount, total, MaxLL);
767
748
  if (FSE_isError(errorCode)) {
768
749
  eSize = ERROR(GENERIC);
769
- DISPLAYLEVEL(1, "FSE_normalizeCount error with litLengthCount");
750
+ DISPLAYLEVEL(1, "FSE_normalizeCount error with litLengthCount \n");
770
751
  goto _cleanup;
771
752
  }
772
753
  llLog = (U32)errorCode;
773
754
 
774
-
775
755
  /* write result to buffer */
776
756
  { size_t const hhSize = HUF_writeCTable(dstPtr, maxDstSize, hufTable, 255, huffLog);
777
757
  if (HUF_isError(hhSize)) {
778
758
  eSize = ERROR(GENERIC);
779
- DISPLAYLEVEL(1, "HUF_writeCTable error");
759
+ DISPLAYLEVEL(1, "HUF_writeCTable error \n");
780
760
  goto _cleanup;
781
761
  }
782
762
  dstPtr += hhSize;
@@ -787,7 +767,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
787
767
  { size_t const ohSize = FSE_writeNCount(dstPtr, maxDstSize, offcodeNCount, OFFCODE_MAX, Offlog);
788
768
  if (FSE_isError(ohSize)) {
789
769
  eSize = ERROR(GENERIC);
790
- DISPLAYLEVEL(1, "FSE_writeNCount error with offcodeNCount");
770
+ DISPLAYLEVEL(1, "FSE_writeNCount error with offcodeNCount \n");
791
771
  goto _cleanup;
792
772
  }
793
773
  dstPtr += ohSize;
@@ -798,7 +778,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
798
778
  { size_t const mhSize = FSE_writeNCount(dstPtr, maxDstSize, matchLengthNCount, MaxML, mlLog);
799
779
  if (FSE_isError(mhSize)) {
800
780
  eSize = ERROR(GENERIC);
801
- DISPLAYLEVEL(1, "FSE_writeNCount error with matchLengthNCount");
781
+ DISPLAYLEVEL(1, "FSE_writeNCount error with matchLengthNCount \n");
802
782
  goto _cleanup;
803
783
  }
804
784
  dstPtr += mhSize;
@@ -809,7 +789,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
809
789
  { size_t const lhSize = FSE_writeNCount(dstPtr, maxDstSize, litLengthNCount, MaxLL, llLog);
810
790
  if (FSE_isError(lhSize)) {
811
791
  eSize = ERROR(GENERIC);
812
- DISPLAYLEVEL(1, "FSE_writeNCount error with litlengthNCount");
792
+ DISPLAYLEVEL(1, "FSE_writeNCount error with litlengthNCount \n");
813
793
  goto _cleanup;
814
794
  }
815
795
  dstPtr += lhSize;
@@ -819,7 +799,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
819
799
 
820
800
  if (maxDstSize<12) {
821
801
  eSize = ERROR(GENERIC);
822
- DISPLAYLEVEL(1, "not enough space to write RepOffsets");
802
+ DISPLAYLEVEL(1, "not enough space to write RepOffsets \n");
823
803
  goto _cleanup;
824
804
  }
825
805
  # if 0
@@ -833,7 +813,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
833
813
  MEM_writeLE32(dstPtr+4, repStartValue[1]);
834
814
  MEM_writeLE32(dstPtr+8, repStartValue[2]);
835
815
  #endif
836
- dstPtr += 12;
816
+ //dstPtr += 12;
837
817
  eSize += 12;
838
818
 
839
819
  _cleanup:
@@ -845,51 +825,13 @@ _cleanup:
845
825
  }
846
826
 
847
827
 
848
- #define DIB_FASTSEGMENTSIZE 64
849
- /*! ZDICT_fastSampling() (based on an idea proposed by Giuseppe Ottaviano) :
850
- Fill `dictBuffer` with stripes of size DIB_FASTSEGMENTSIZE from `samplesBuffer`,
851
- up to `dictSize`.
852
- Filling starts from the end of `dictBuffer`, down to maximum possible.
853
- if `dictSize` is not a multiply of DIB_FASTSEGMENTSIZE, some bytes at beginning of `dictBuffer` won't be used.
854
- @return : amount of data written into `dictBuffer`,
855
- or an error code
856
- */
857
- static size_t ZDICT_fastSampling(void* dictBuffer, size_t dictSize,
858
- const void* samplesBuffer, size_t samplesSize)
859
- {
860
- char* dstPtr = (char*)dictBuffer + dictSize;
861
- const char* srcPtr = (const char*)samplesBuffer;
862
- size_t const nbSegments = dictSize / DIB_FASTSEGMENTSIZE;
863
- size_t segNb, interSize;
864
-
865
- if (nbSegments <= 2) return ERROR(srcSize_wrong);
866
- if (samplesSize < dictSize) return ERROR(srcSize_wrong);
867
-
868
- /* first and last segments are part of dictionary, in case they contain interesting header/footer */
869
- dstPtr -= DIB_FASTSEGMENTSIZE;
870
- memcpy(dstPtr, srcPtr, DIB_FASTSEGMENTSIZE);
871
- dstPtr -= DIB_FASTSEGMENTSIZE;
872
- memcpy(dstPtr, srcPtr+samplesSize-DIB_FASTSEGMENTSIZE, DIB_FASTSEGMENTSIZE);
873
-
874
- /* regularly copy a segment */
875
- interSize = (samplesSize - nbSegments*DIB_FASTSEGMENTSIZE) / (nbSegments-1);
876
- srcPtr += DIB_FASTSEGMENTSIZE;
877
- for (segNb=2; segNb < nbSegments; segNb++) {
878
- srcPtr += interSize;
879
- dstPtr -= DIB_FASTSEGMENTSIZE;
880
- memcpy(dstPtr, srcPtr, DIB_FASTSEGMENTSIZE);
881
- srcPtr += DIB_FASTSEGMENTSIZE;
882
- }
883
-
884
- return nbSegments * DIB_FASTSEGMENTSIZE;
885
- }
886
-
887
828
  size_t ZDICT_addEntropyTablesFromBuffer_advanced(void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity,
888
829
  const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
889
830
  ZDICT_params_t params)
890
831
  {
891
832
  size_t hSize;
892
- unsigned const compressionLevel = (params.compressionLevel == 0) ? g_compressionLevel_default : params.compressionLevel;
833
+ int const compressionLevel = (params.compressionLevel <= 0) ? g_compressionLevel_default : params.compressionLevel;
834
+ U32 const notificationLevel = params.notificationLevel;
893
835
 
894
836
  /* dictionary header */
895
837
  MEM_writeLE32(dictBuffer, ZSTD_DICT_MAGIC);
@@ -903,10 +845,15 @@ size_t ZDICT_addEntropyTablesFromBuffer_advanced(void* dictBuffer, size_t dictCo
903
845
  /* entropy tables */
904
846
  DISPLAYLEVEL(2, "\r%70s\r", ""); /* clean display line */
905
847
  DISPLAYLEVEL(2, "statistics ... \n");
906
- hSize += ZDICT_analyzeEntropy((char*)dictBuffer+hSize, dictBufferCapacity-hSize,
848
+ { size_t const eSize = ZDICT_analyzeEntropy((char*)dictBuffer+hSize, dictBufferCapacity-hSize,
907
849
  compressionLevel,
908
850
  samplesBuffer, samplesSizes, nbSamples,
909
- (char*)dictBuffer + dictBufferCapacity - dictContentSize, dictContentSize);
851
+ (char*)dictBuffer + dictBufferCapacity - dictContentSize, dictContentSize,
852
+ notificationLevel);
853
+ if (ZDICT_isError(eSize)) return eSize;
854
+ hSize += eSize;
855
+ }
856
+
910
857
 
911
858
  if (hSize + dictContentSize < dictBufferCapacity)
912
859
  memmove((char*)dictBuffer + hSize, (char*)dictBuffer + dictBufferCapacity - dictContentSize, dictContentSize);
@@ -914,60 +861,86 @@ size_t ZDICT_addEntropyTablesFromBuffer_advanced(void* dictBuffer, size_t dictCo
914
861
  }
915
862
 
916
863
 
917
- #define DIB_MINSAMPLESSIZE (DIB_FASTSEGMENTSIZE*3)
918
864
  /*! ZDICT_trainFromBuffer_unsafe() :
919
- * `samplesBuffer` must be followed by noisy guard band.
920
- * @return : size of dictionary.
865
+ * Warning : `samplesBuffer` must be followed by noisy guard band.
866
+ * @return : size of dictionary, or an error code which can be tested with ZDICT_isError()
921
867
  */
922
868
  size_t ZDICT_trainFromBuffer_unsafe(
923
869
  void* dictBuffer, size_t maxDictSize,
924
870
  const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
925
871
  ZDICT_params_t params)
926
872
  {
927
- U32 const dictListSize = MAX( MAX(DICTLISTSIZE, nbSamples), (U32)(maxDictSize/16));
873
+ U32 const dictListSize = MAX(MAX(DICTLISTSIZE_DEFAULT, nbSamples), (U32)(maxDictSize/16));
928
874
  dictItem* const dictList = (dictItem*)malloc(dictListSize * sizeof(*dictList));
929
- unsigned selectivity = params.selectivityLevel;
875
+ unsigned const selectivity = params.selectivityLevel == 0 ? g_selectivity_default : params.selectivityLevel;
876
+ unsigned const minRep = (selectivity > 30) ? MINRATIO : nbSamples >> selectivity;
930
877
  size_t const targetDictSize = maxDictSize;
931
- size_t sBuffSize;
878
+ size_t const samplesBuffSize = ZDICT_totalSampleSize(samplesSizes, nbSamples);
932
879
  size_t dictSize = 0;
880
+ U32 const notificationLevel = params.notificationLevel;
933
881
 
934
882
  /* checks */
935
883
  if (!dictList) return ERROR(memory_allocation);
936
884
  if (maxDictSize <= g_provision_entropySize + g_min_fast_dictContent) { free(dictList); return ERROR(dstSize_tooSmall); }
885
+ if (samplesBuffSize < ZDICT_MIN_SAMPLES_SIZE) { free(dictList); return 0; } /* not enough source to create dictionary */
937
886
 
938
887
  /* init */
939
- { unsigned u; for (u=0, sBuffSize=0; u<nbSamples; u++) sBuffSize += samplesSizes[u]; }
940
- if (sBuffSize < DIB_MINSAMPLESSIZE) { free(dictList); return 0; } /* not enough source to create dictionary */
941
888
  ZDICT_initDictItem(dictList);
942
- g_displayLevel = params.notificationLevel;
943
- if (selectivity==0) selectivity = g_selectivity_default;
944
889
 
945
890
  /* build dictionary */
946
- if (selectivity>1) { /* selectivity == 1 => fast mode */
947
- ZDICT_trainBuffer(dictList, dictListSize,
948
- samplesBuffer, sBuffSize,
949
- samplesSizes, nbSamples,
950
- selectivity, (U32)targetDictSize);
951
-
952
- /* display best matches */
953
- if (g_displayLevel>= 3) {
954
- U32 const nb = 25;
955
- U32 const dictContentSize = ZDICT_dictSize(dictList);
956
- U32 u;
957
- DISPLAYLEVEL(3, "\n %u segments found, of total size %u \n", dictList[0].pos, dictContentSize);
958
- DISPLAYLEVEL(3, "list %u best segments \n", nb);
959
- for (u=1; u<=nb; u++) {
960
- U32 p = dictList[u].pos;
961
- U32 l = dictList[u].length;
962
- U32 d = MIN(40, l);
963
- DISPLAYLEVEL(3, "%3u:%3u bytes at pos %8u, savings %7u bytes |",
964
- u, l, p, dictList[u].savings);
965
- ZDICT_printHex(3, (const char*)samplesBuffer+p, d);
966
- DISPLAYLEVEL(3, "| \n");
967
- } } }
891
+ ZDICT_trainBuffer(dictList, dictListSize,
892
+ samplesBuffer, samplesBuffSize,
893
+ samplesSizes, nbSamples,
894
+ minRep, notificationLevel);
895
+
896
+ /* display best matches */
897
+ if (params.notificationLevel>= 3) {
898
+ U32 const nb = MIN(25, dictList[0].pos);
899
+ U32 const dictContentSize = ZDICT_dictSize(dictList);
900
+ U32 u;
901
+ DISPLAYLEVEL(3, "\n %u segments found, of total size %u \n", dictList[0].pos, dictContentSize);
902
+ DISPLAYLEVEL(3, "list %u best segments \n", nb);
903
+ for (u=1; u<=nb; u++) {
904
+ U32 pos = dictList[u].pos;
905
+ U32 length = dictList[u].length;
906
+ U32 printedLength = MIN(40, length);
907
+ DISPLAYLEVEL(3, "%3u:%3u bytes at pos %8u, savings %7u bytes |",
908
+ u, length, pos, dictList[u].savings);
909
+ ZDICT_printHex((const char*)samplesBuffer+pos, printedLength);
910
+ DISPLAYLEVEL(3, "| \n");
911
+ } }
912
+
968
913
 
969
914
  /* create dictionary */
970
915
  { U32 dictContentSize = ZDICT_dictSize(dictList);
916
+ if (dictContentSize < targetDictSize/3) {
917
+ DISPLAYLEVEL(2, "! warning : selected content significantly smaller than requested (%u < %u) \n", dictContentSize, (U32)maxDictSize);
918
+ if (minRep > MINRATIO) {
919
+ DISPLAYLEVEL(2, "! consider increasing selectivity to produce larger dictionary (-s%u) \n", selectivity+1);
920
+ DISPLAYLEVEL(2, "! note : larger dictionaries are not necessarily better, test its efficiency on samples \n");
921
+ }
922
+ if (samplesBuffSize < 10 * targetDictSize)
923
+ DISPLAYLEVEL(2, "! consider increasing the number of samples (total size : %u MB)\n", (U32)(samplesBuffSize>>20));
924
+ }
925
+
926
+ if ((dictContentSize > targetDictSize*3) && (nbSamples > 2*MINRATIO) && (selectivity>1)) {
927
+ U32 proposedSelectivity = selectivity-1;
928
+ while ((nbSamples >> proposedSelectivity) <= MINRATIO) { proposedSelectivity--; }
929
+ DISPLAYLEVEL(2, "! note : calculated dictionary significantly larger than requested (%u > %u) \n", dictContentSize, (U32)maxDictSize);
930
+ DISPLAYLEVEL(2, "! consider increasing dictionary size, or produce denser dictionary (-s%u) \n", proposedSelectivity);
931
+ DISPLAYLEVEL(2, "! always test dictionary efficiency on samples \n");
932
+ }
933
+
934
+ /* limit dictionary size */
935
+ { U32 const max = dictList->pos; /* convention : nb of useful elts within dictList */
936
+ U32 currentSize = 0;
937
+ U32 n; for (n=1; n<max; n++) {
938
+ currentSize += dictList[n].length;
939
+ if (currentSize > targetDictSize) { currentSize -= dictList[n].length; break; }
940
+ }
941
+ dictList->pos = n;
942
+ dictContentSize = currentSize;
943
+ }
971
944
 
972
945
  /* build dict content */
973
946
  { U32 u;
@@ -979,14 +952,6 @@ size_t ZDICT_trainFromBuffer_unsafe(
979
952
  memcpy(ptr, (const char*)samplesBuffer+dictList[u].pos, l);
980
953
  } }
981
954
 
982
- /* fast mode dict content */
983
- if (selectivity==1) { /* note could also be used to complete a dictionary, but not necessarily better */
984
- DISPLAYLEVEL(3, "\r%70s\r", ""); /* clean display line */
985
- DISPLAYLEVEL(3, "Adding %u KB with fast sampling \n", (U32)(targetDictSize>>10));
986
- dictContentSize = (U32)ZDICT_fastSampling(dictBuffer, targetDictSize,
987
- samplesBuffer, sBuffSize);
988
- }
989
-
990
955
  dictSize = ZDICT_addEntropyTablesFromBuffer_advanced(dictBuffer, dictContentSize, maxDictSize,
991
956
  samplesBuffer, samplesSizes, nbSamples,
992
957
  params);
@@ -1004,23 +969,23 @@ size_t ZDICT_trainFromBuffer_advanced(void* dictBuffer, size_t dictBufferCapacit
1004
969
  const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
1005
970
  ZDICT_params_t params)
1006
971
  {
972
+ size_t result;
1007
973
  void* newBuff;
1008
- size_t sBuffSize;
974
+ size_t const sBuffSize = ZDICT_totalSampleSize(samplesSizes, nbSamples);
975
+ if (sBuffSize < ZDICT_MIN_SAMPLES_SIZE) return 0; /* not enough content => no dictionary */
1009
976
 
1010
- { unsigned u; for (u=0, sBuffSize=0; u<nbSamples; u++) sBuffSize += samplesSizes[u]; }
1011
- if (sBuffSize==0) return 0; /* empty content => no dictionary */
1012
977
  newBuff = malloc(sBuffSize + NOISELENGTH);
1013
978
  if (!newBuff) return ERROR(memory_allocation);
1014
979
 
1015
980
  memcpy(newBuff, samplesBuffer, sBuffSize);
1016
981
  ZDICT_fillNoise((char*)newBuff + sBuffSize, NOISELENGTH); /* guard band, for end of buffer condition */
1017
982
 
1018
- { size_t const result = ZDICT_trainFromBuffer_unsafe(
983
+ result = ZDICT_trainFromBuffer_unsafe(
1019
984
  dictBuffer, dictBufferCapacity,
1020
985
  newBuff, samplesSizes, nbSamples,
1021
986
  params);
1022
- free(newBuff);
1023
- return result; }
987
+ free(newBuff);
988
+ return result;
1024
989
  }
1025
990
 
1026
991