RubyGems - extzstd - Versions diffs - 0.0.3.CONCEPT → 0.1 - Mend

extzstd 0.0.3.CONCEPT → 0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (65) hide show

checksums.yaml +4 -4
data/HISTORY.ja +5 -0
data/LICENSE +6 -6
data/README.md +35 -22
data/contrib/zstd/LICENSE +13 -9
data/contrib/zstd/README.md +37 -44
data/contrib/zstd/common/entropy_common.c +33 -39
data/contrib/zstd/common/error_private.c +43 -0
data/contrib/zstd/common/error_private.h +11 -60
data/contrib/zstd/common/fse.h +11 -5
data/contrib/zstd/common/fse_decompress.c +14 -16
data/contrib/zstd/common/huf.h +1 -1
data/contrib/zstd/common/mem.h +36 -43
data/contrib/zstd/common/xxhash.c +31 -18
data/contrib/zstd/common/xxhash.h +71 -35
data/contrib/zstd/common/zbuff.h +29 -35
data/contrib/zstd/common/zstd_common.c +24 -32
data/contrib/zstd/common/zstd_errors.h +60 -0
data/contrib/zstd/common/zstd_internal.h +109 -80
data/contrib/zstd/compress/fse_compress.c +9 -6
data/contrib/zstd/compress/huf_compress.c +30 -74
data/contrib/zstd/compress/zbuff_compress.c +43 -51
data/contrib/zstd/compress/zstd_compress.c +953 -763
data/contrib/zstd/compress/zstd_opt.h +115 -261
data/contrib/zstd/decompress/huf_decompress.c +29 -40
data/contrib/zstd/decompress/zbuff_decompress.c +36 -78
data/contrib/zstd/decompress/zstd_decompress.c +976 -496
data/contrib/zstd/dictBuilder/divsufsort.h +5 -5
data/contrib/zstd/dictBuilder/zdict.c +194 -229
data/contrib/zstd/dictBuilder/zdict.h +66 -68
data/contrib/zstd/legacy/zstd_legacy.h +168 -49
data/contrib/zstd/legacy/zstd_v01.c +95 -178
data/contrib/zstd/legacy/zstd_v01.h +12 -32
data/contrib/zstd/legacy/zstd_v02.c +48 -274
data/contrib/zstd/legacy/zstd_v02.h +12 -32
data/contrib/zstd/legacy/zstd_v03.c +48 -274
data/contrib/zstd/legacy/zstd_v03.h +12 -32
data/contrib/zstd/legacy/zstd_v04.c +63 -320
data/contrib/zstd/legacy/zstd_v04.h +13 -33
data/contrib/zstd/legacy/zstd_v05.c +80 -345
data/contrib/zstd/legacy/zstd_v05.h +9 -31
data/contrib/zstd/legacy/zstd_v06.c +48 -458
data/contrib/zstd/legacy/zstd_v06.h +41 -67
data/contrib/zstd/legacy/zstd_v07.c +4544 -0
data/contrib/zstd/legacy/zstd_v07.h +173 -0
data/contrib/zstd/zstd.h +640 -0
data/ext/extconf.rb +7 -3
data/ext/extzstd.c +263 -106
data/ext/extzstd.h +8 -6
data/ext/extzstd_nogvls.h +0 -117
data/ext/extzstd_stream.c +347 -0
data/ext/zstd_common.c +8 -0
data/ext/zstd_compress.c +6 -0
data/ext/zstd_decompress.c +5 -0
data/ext/zstd_dictbuilder.c +5 -0
data/ext/zstd_legacy_v07.c +1 -0
data/gemstub.rb +18 -16
data/lib/extzstd/version.rb +1 -1
data/lib/extzstd.rb +77 -43
data/test/test_basic.rb +11 -6
metadata +23 -10
data/contrib/zstd/common/error_public.h +0 -77
data/contrib/zstd/common/zstd.h +0 -475
data/ext/extzstd_buffered.c +0 -265
data/ext/zstd_amalgam.c +0 -18

data/contrib/zstd/dictBuilder/zdict.c CHANGED Viewed

@@ -1,40 +1,18 @@
-/*
-    dictBuilder - dictionary builder for zstd
-    Copyright (C) Yann Collet 2016
-    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
-    Redistribution and use in source and binary forms, with or without
-    modification, are permitted provided that the following conditions are
-    met:
-    * Redistributions of source code must retain the above copyright
-    notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above
-    copyright notice, this list of conditions and the following disclaimer
-    in the documentation and/or other materials provided with the
-    distribution.
-    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-    You can contact the author at :
-    - Zstd homepage : https://www.zstd.net
-*/
+/**
+ * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree. An additional grant
+ * of patent rights can be found in the PATENTS file in the same directory.
+ */
 /*-**************************************
 *  Tuning parameters
 ****************************************/
 #define ZDICT_MAX_SAMPLES_SIZE (2000U << 20)
+#define ZDICT_MIN_SAMPLES_SIZE 512
 /*-**************************************
@@ -78,14 +56,12 @@
 #define MB *(1 <<20)
 #define GB *(1U<<30)
-#define DICTLISTSIZE 10000
+#define DICTLISTSIZE_DEFAULT 10000
 #define NOISELENGTH 32
-#define PRIME1   2654435761U
-#define PRIME2   2246822519U
 #define MINRATIO 4
-static const U32 g_compressionLevel_default = 5;
+static const int g_compressionLevel_default = 5;
 static const U32 g_selectivity_default = 9;
 static const size_t g_provision_entropySize = 200;
 static const size_t g_min_fast_dictContent = 192;
@@ -95,26 +71,18 @@ static const size_t g_min_fast_dictContent = 192;
 *  Console display
 ***************************************/
 #define DISPLAY(...)         { fprintf(stderr, __VA_ARGS__); fflush( stderr ); }
-#define DISPLAYLEVEL(l, ...) if (g_displayLevel>=l) { DISPLAY(__VA_ARGS__); }
-static unsigned g_displayLevel = 0;   /* 0 : no display;   1: errors;   2: default;  4: full information */
-#define DISPLAYUPDATE(l, ...) if (g_displayLevel>=l) { \
-            if (ZDICT_clockSpan(g_time) > refreshRate)  \
-            { g_time = clock(); DISPLAY(__VA_ARGS__); \
-            if (g_displayLevel>=4) fflush(stdout); } }
-static const clock_t refreshRate = CLOCKS_PER_SEC * 3 / 10;
-static clock_t g_time = 0;
+#define DISPLAYLEVEL(l, ...) if (notificationLevel>=l) { DISPLAY(__VA_ARGS__); }    /* 0 : no display;   1: errors;   2: default;  3: details;  4: debug */
 static clock_t ZDICT_clockSpan(clock_t nPrevious) { return clock() - nPrevious; }
-static void ZDICT_printHex(U32 dlevel, const void* ptr, size_t length)
+static void ZDICT_printHex(const void* ptr, size_t length)
 {
     const BYTE* const b = (const BYTE*)ptr;
     size_t u;
     for (u=0; u<length; u++) {
         BYTE c = b[u];
         if (c<32 || c>126) c = '.';   /* non-printable char */
-        DISPLAYLEVEL(dlevel, "%c", c);
+        DISPLAY("%c", c);
     }
 }
@@ -126,6 +94,13 @@ unsigned ZDICT_isError(size_t errorCode) { return ERR_isError(errorCode); }
 const char* ZDICT_getErrorName(size_t errorCode) { return ERR_getErrorName(errorCode); }
+unsigned ZDICT_getDictID(const void* dictBuffer, size_t dictSize)
+{
+    if (dictSize < 8) return 0;
+    if (MEM_readLE32(dictBuffer) != ZSTD_DICT_MAGIC) return 0;
+    return MEM_readLE32((const char*)dictBuffer + 4);
+}
 /*-********************************************************
 *  Dictionary training functions
@@ -228,7 +203,7 @@ static void ZDICT_initDictItem(dictItem* d)
 static dictItem ZDICT_analyzePos(
                        BYTE* doneMarks,
                        const int* suffix, U32 start,
-                       const void* buffer, U32 minRatio)
+                       const void* buffer, U32 minRatio, U32 notificationLevel)
 {
     U32 lengthList[LLIMIT] = {0};
     U32 cumulLength[LLIMIT] = {0};
@@ -332,12 +307,13 @@ static dictItem ZDICT_analyzePos(
         } while (length >=MINMATCHLENGTH);
         /* look backward */
-        do {
-            length = ZDICT_count(b + pos, b + suffix[start-1]);
-            if (length >= LLIMIT) length = LLIMIT-1;
-            lengthList[length]++;
-            if (length >=MINMATCHLENGTH) start--;
-        } while(length >= MINMATCHLENGTH);
+		length = MINMATCHLENGTH;
+		while ((length >= MINMATCHLENGTH) & (start > 0)) {
+			length = ZDICT_count(b + pos, b + suffix[start - 1]);
+			if (length >= LLIMIT) length = LLIMIT - 1;
+			lengthList[length]++;
+			if (length >= MINMATCHLENGTH) start--;
+		}
         /* largest useful length */
         memset(cumulLength, 0, sizeof(cumulLength));
@@ -395,21 +371,22 @@ static dictItem ZDICT_analyzePos(
 static U32 ZDICT_checkMerge(dictItem* table, dictItem elt, U32 eltNbToSkip)
 {
     const U32 tableSize = table->pos;
-    const U32 max = elt.pos + (elt.length-1);
+    const U32 eltEnd = elt.pos + elt.length;
     /* tail overlap */
     U32 u; for (u=1; u<tableSize; u++) {
         if (u==eltNbToSkip) continue;
-        if ((table[u].pos > elt.pos) && (table[u].pos < max)) {  /* overlap */
+        if ((table[u].pos > elt.pos) && (table[u].pos <= eltEnd)) {  /* overlap, existing > new */
             /* append */
             U32 addedLength = table[u].pos - elt.pos;
             table[u].length += addedLength;
             table[u].pos = elt.pos;
             table[u].savings += elt.savings * addedLength / elt.length;   /* rough approx */
-            table[u].savings += elt.length / 8;    /* rough approx */
+            table[u].savings += elt.length / 8;    /* rough approx bonus */
             elt = table[u];
+            /* sort : improve rank */
             while ((u>1) && (table[u-1].savings < elt.savings))
-                table[u] = table[u-1], u--;
+            table[u] = table[u-1], u--;
             table[u] = elt;
             return u;
     }   }
@@ -417,14 +394,15 @@ static U32 ZDICT_checkMerge(dictItem* table, dictItem elt, U32 eltNbToSkip)
     /* front overlap */
     for (u=1; u<tableSize; u++) {
         if (u==eltNbToSkip) continue;
-        if ((table[u].pos + table[u].length > elt.pos) && (table[u].pos < elt.pos)) {  /* overlap */
+        if ((table[u].pos + table[u].length >= elt.pos) && (table[u].pos < elt.pos)) {  /* overlap, existing < new */
             /* append */
-            int addedLength = (elt.pos + elt.length) - (table[u].pos + table[u].length);
-            table[u].savings += elt.length / 8;    /* rough approx */
-            if (addedLength > 0) {   /* otherwise, already included */
+            int addedLength = (int)eltEnd - (table[u].pos + table[u].length);
+            table[u].savings += elt.length / 8;    /* rough approx bonus */
+            if (addedLength > 0) {   /* otherwise, elt fully included into existing */
                 table[u].length += addedLength;
                 table[u].savings += elt.savings * addedLength / elt.length;   /* rough approx */
             }
+            /* sort : improve rank */
             elt = table[u];
             while ((u>1) && (table[u-1].savings < elt.savings))
                 table[u] = table[u-1], u--;
@@ -489,15 +467,21 @@ static U32 ZDICT_dictSize(const dictItem* dictList)
 static size_t ZDICT_trainBuffer(dictItem* dictList, U32 dictListSize,
                             const void* const buffer, size_t bufferSize,   /* buffer must end with noisy guard band */
                             const size_t* fileSizes, unsigned nbFiles,
-                            U32 shiftRatio, unsigned maxDictSize)
+                            U32 minRatio, U32 notificationLevel)
 {
     int* const suffix0 = (int*)malloc((bufferSize+2)*sizeof(*suffix0));
     int* const suffix = suffix0+1;
     U32* reverseSuffix = (U32*)malloc((bufferSize)*sizeof(*reverseSuffix));
     BYTE* doneMarks = (BYTE*)malloc((bufferSize+16)*sizeof(*doneMarks));   /* +16 for overflow security */
     U32* filePos = (U32*)malloc(nbFiles * sizeof(*filePos));
-    U32 minRatio = nbFiles >> shiftRatio;
     size_t result = 0;
+    clock_t displayClock = 0;
+    clock_t const refreshRate = CLOCKS_PER_SEC * 3 / 10;
+#   define DISPLAYUPDATE(l, ...) if (notificationLevel>=l) { \
+            if (ZDICT_clockSpan(displayClock) > refreshRate)  \
+            { displayClock = clock(); DISPLAY(__VA_ARGS__); \
+            if (notificationLevel>=4) fflush(stdout); } }
     /* init */
     DISPLAYLEVEL(2, "\r%70s\r", "");   /* clean display line */
@@ -523,7 +507,8 @@ static size_t ZDICT_trainBuffer(dictItem* dictList, U32 dictListSize,
     {   size_t pos;
         for (pos=0; pos < bufferSize; pos++)
             reverseSuffix[suffix[pos]] = (U32)pos;
-        /* build file pos */
+        /* note filePos tracks borders between samples.
+           It's not used at this stage, but planned to become useful in a later update */
         filePos[0] = 0;
         for (pos=1; pos<nbFiles; pos++)
             filePos[pos] = (U32)(filePos[pos-1] + fileSizes[pos-1]);
@@ -535,23 +520,13 @@ static size_t ZDICT_trainBuffer(dictItem* dictList, U32 dictListSize,
     {   U32 cursor; for (cursor=0; cursor < bufferSize; ) {
             dictItem solution;
             if (doneMarks[cursor]) { cursor++; continue; }
-            solution = ZDICT_analyzePos(doneMarks, suffix, reverseSuffix[cursor], buffer, minRatio);
+            solution = ZDICT_analyzePos(doneMarks, suffix, reverseSuffix[cursor], buffer, minRatio, notificationLevel);
             if (solution.length==0) { cursor++; continue; }
             ZDICT_insertDictItem(dictList, dictListSize, solution);
             cursor += solution.length;
             DISPLAYUPDATE(2, "\r%4.2f %% \r", (double)cursor / bufferSize * 100);
     }   }
-    /* limit dictionary size */
-    {   U32 const max = dictList->pos;   /* convention : nb of useful elts within dictList */
-        U32 currentSize = 0;
-        U32 n; for (n=1; n<max; n++) {
-            currentSize += dictList[n].length;
-            if (currentSize > maxDictSize) break;
-        }
-        dictList->pos = n;
-    }
 _cleanup:
     free(suffix0);
     free(reverseSuffix);
@@ -563,10 +538,12 @@ _cleanup:
 static void ZDICT_fillNoise(void* buffer, size_t length)
 {
-    unsigned acc = PRIME1;
+    unsigned const prime1 = 2654435761U;
+    unsigned const prime2 = 2246822519U;
+    unsigned acc = prime1;
     size_t p=0;;
     for (p=0; p<length; p++) {
-        acc *= PRIME2;
+        acc *= prime2;
         ((unsigned char*)buffer)[p] = (unsigned char)(acc >> 21);
     }
 }
@@ -576,23 +553,23 @@ typedef struct
 {
     ZSTD_CCtx* ref;
     ZSTD_CCtx* zc;
-    void* workPlace;   /* must be ZSTD_BLOCKSIZE_MAX allocated */
+    void* workPlace;   /* must be ZSTD_BLOCKSIZE_ABSOLUTEMAX allocated */
 } EStats_ress_t;
 #define MAXREPOFFSET 1024
 static void ZDICT_countEStats(EStats_ress_t esr, ZSTD_parameters params,
                             U32* countLit, U32* offsetcodeCount, U32* matchlengthCount, U32* litlengthCount, U32* repOffsets,
-                            const void* src, size_t srcSize)
+                            const void* src, size_t srcSize, U32 notificationLevel)
 {
-    size_t const blockSizeMax = MIN (ZSTD_BLOCKSIZE_MAX, 1 << params.cParams.windowLog);
+    size_t const blockSizeMax = MIN (ZSTD_BLOCKSIZE_ABSOLUTEMAX, 1 << params.cParams.windowLog);
     size_t cSize;
     if (srcSize > blockSizeMax) srcSize = blockSizeMax;   /* protection vs large samples */
-	{	size_t const errorCode = ZSTD_copyCCtx(esr.zc, esr.ref);
-		if (ZSTD_isError(errorCode)) { DISPLAYLEVEL(1, "warning : ZSTD_copyCCtx failed \n"); return; }
-	}
-    cSize = ZSTD_compressBlock(esr.zc, esr.workPlace, ZSTD_BLOCKSIZE_MAX, src, srcSize);
+    {  size_t const errorCode = ZSTD_copyCCtx(esr.zc, esr.ref, 0);
+            if (ZSTD_isError(errorCode)) { DISPLAYLEVEL(1, "warning : ZSTD_copyCCtx failed \n"); return; }
+    }
+    cSize = ZSTD_compressBlock(esr.zc, esr.workPlace, ZSTD_BLOCKSIZE_ABSOLUTEMAX, src, srcSize);
     if (ZSTD_isError(cSize)) { DISPLAYLEVEL(1, "warning : could not compress sample size %u \n", (U32)srcSize); return; }
     if (cSize) {  /* if == 0; block is not compressible */
@@ -605,34 +582,33 @@ static void ZDICT_countEStats(EStats_ress_t esr, ZSTD_parameters params,
         }
         /* seqStats */
-        {   size_t const nbSeq = (size_t)(seqStorePtr->offset - seqStorePtr->offsetStart);
-            ZSTD_seqToCodes(seqStorePtr, nbSeq);
+        {   U32 const nbSeq = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
+            ZSTD_seqToCodes(seqStorePtr);
-            {   const BYTE* codePtr = seqStorePtr->offCodeStart;
-                size_t u;
+            {   const BYTE* codePtr = seqStorePtr->ofCode;
+                U32 u;
                 for (u=0; u<nbSeq; u++) offsetcodeCount[codePtr[u]]++;
             }
-            {   const BYTE* codePtr = seqStorePtr->mlCodeStart;
-                size_t u;
+            {   const BYTE* codePtr = seqStorePtr->mlCode;
+                U32 u;
                 for (u=0; u<nbSeq; u++) matchlengthCount[codePtr[u]]++;
             }
-            {   const BYTE* codePtr = seqStorePtr->llCodeStart;
-                size_t u;
+            {   const BYTE* codePtr = seqStorePtr->llCode;
+                U32 u;
                 for (u=0; u<nbSeq; u++) litlengthCount[codePtr[u]]++;
-        }   }
+            }
-        /* rep offsets */
-        {   const U32* const offsetPtr = seqStorePtr->offsetStart;
-            U32 offset1 = offsetPtr[0] - 3;
-            U32 offset2 = offsetPtr[1] - 3;
-            if (offset1 >= MAXREPOFFSET) offset1 = 0;
-            if (offset2 >= MAXREPOFFSET) offset2 = 0;
-            repOffsets[offset1] += 3;
-            repOffsets[offset2] += 1;
-        }
-    }
+            if (nbSeq >= 2) { /* rep offsets */
+                const seqDef* const seq = seqStorePtr->sequencesStart;
+                U32 offset1 = seq[0].offset - 3;
+                U32 offset2 = seq[1].offset - 3;
+                if (offset1 >= MAXREPOFFSET) offset1 = 0;
+                if (offset2 >= MAXREPOFFSET) offset2 = 0;
+                repOffsets[offset1] += 3;
+                repOffsets[offset2] += 1;
+    }   }   }
 }
 /*
@@ -671,60 +647,65 @@ static void ZDICT_insertSortCount(offsetCount_t table[ZSTD_REP_NUM+1], U32 val,
 }
-#define OFFCODE_MAX 18  /* only applicable to first block */
+#define OFFCODE_MAX 30  /* only applicable to first block */
 static size_t ZDICT_analyzeEntropy(void*  dstBuffer, size_t maxDstSize,
-                                 unsigned compressionLevel,
-                           const void*  srcBuffer, const size_t* fileSizes, unsigned nbFiles,
-                           const void* dictBuffer, size_t  dictBufferSize)
+                                   unsigned compressionLevel,
+                             const void*  srcBuffer, const size_t* fileSizes, unsigned nbFiles,
+                             const void* dictBuffer, size_t  dictBufferSize,
+                                   unsigned notificationLevel)
 {
     U32 countLit[256];
     HUF_CREATE_STATIC_CTABLE(hufTable, 255);
     U32 offcodeCount[OFFCODE_MAX+1];
     short offcodeNCount[OFFCODE_MAX+1];
+    U32 offcodeMax = ZSTD_highbit32((U32)(dictBufferSize + 128 KB));
     U32 matchLengthCount[MaxML+1];
     short matchLengthNCount[MaxML+1];
     U32 litLengthCount[MaxLL+1];
     short litLengthNCount[MaxLL+1];
-    U32 repOffset[MAXREPOFFSET] = { 0 };
+    U32 repOffset[MAXREPOFFSET];
     offsetCount_t bestRepOffset[ZSTD_REP_NUM+1];
     EStats_ress_t esr;
     ZSTD_parameters params;
-    U32 u, huffLog = 12, Offlog = OffFSELog, mlLog = MLFSELog, llLog = LLFSELog, total;
+    U32 u, huffLog = 11, Offlog = OffFSELog, mlLog = MLFSELog, llLog = LLFSELog, total;
     size_t pos = 0, errorCode;
     size_t eSize = 0;
     size_t const totalSrcSize = ZDICT_totalSampleSize(fileSizes, nbFiles);
-    size_t const averageSampleSize = totalSrcSize / nbFiles;
+    size_t const averageSampleSize = totalSrcSize / (nbFiles + !nbFiles);
     BYTE* dstPtr = (BYTE*)dstBuffer;
     /* init */
+    esr.ref = ZSTD_createCCtx();
+    esr.zc = ZSTD_createCCtx();
+    esr.workPlace = malloc(ZSTD_BLOCKSIZE_ABSOLUTEMAX);
+    if (!esr.ref || !esr.zc || !esr.workPlace) {
+        eSize = ERROR(memory_allocation);
+        DISPLAYLEVEL(1, "Not enough memory \n");
+        goto _cleanup;
+    }
+    if (offcodeMax>OFFCODE_MAX) { eSize = ERROR(dictionary_wrong); goto _cleanup; }   /* too large dictionary */
     for (u=0; u<256; u++) countLit[u]=1;   /* any character must be described */
-    for (u=0; u<=OFFCODE_MAX; u++) offcodeCount[u]=1;
+    for (u=0; u<=offcodeMax; u++) offcodeCount[u]=1;
     for (u=0; u<=MaxML; u++) matchLengthCount[u]=1;
     for (u=0; u<=MaxLL; u++) litLengthCount[u]=1;
+    memset(repOffset, 0, sizeof(repOffset));
     repOffset[1] = repOffset[4] = repOffset[8] = 1;
     memset(bestRepOffset, 0, sizeof(bestRepOffset));
-    esr.ref = ZSTD_createCCtx();
-    esr.zc = ZSTD_createCCtx();
-    esr.workPlace = malloc(ZSTD_BLOCKSIZE_MAX);
-    if (!esr.ref || !esr.zc || !esr.workPlace) {
-            eSize = ERROR(memory_allocation);
-            DISPLAYLEVEL(1, "Not enough memory");
-            goto _cleanup;
-    }
     if (compressionLevel==0) compressionLevel=g_compressionLevel_default;
     params = ZSTD_getParams(compressionLevel, averageSampleSize, dictBufferSize);
-	{	size_t const beginResult = ZSTD_compressBegin_advanced(esr.ref, dictBuffer, dictBufferSize, params, 0);
-		if (ZSTD_isError(beginResult)) {
-			eSize = ERROR(GENERIC);
-			DISPLAYLEVEL(1, "error : ZSTD_compressBegin_advanced failed ");
-			goto _cleanup;
-	}	}
+    {   size_t const beginResult = ZSTD_compressBegin_advanced(esr.ref, dictBuffer, dictBufferSize, params, 0);
+            if (ZSTD_isError(beginResult)) {
+            eSize = ERROR(GENERIC);
+            DISPLAYLEVEL(1, "error : ZSTD_compressBegin_advanced failed \n");
+            goto _cleanup;
+    }   }
     /* collect stats on all files */
     for (u=0; u<nbFiles; u++) {
         ZDICT_countEStats(esr, params,
-                        countLit, offcodeCount, matchLengthCount, litLengthCount, repOffset,
-           (const char*)srcBuffer + pos, fileSizes[u]);
+                          countLit, offcodeCount, matchLengthCount, litLengthCount, repOffset,
+                         (const char*)srcBuffer + pos, fileSizes[u],
+                          notificationLevel);
         pos += fileSizes[u];
     }
@@ -732,7 +713,7 @@ static size_t ZDICT_analyzeEntropy(void*  dstBuffer, size_t maxDstSize,
     errorCode = HUF_buildCTable (hufTable, countLit, 255, huffLog);
     if (HUF_isError(errorCode)) {
         eSize = ERROR(GENERIC);
-        DISPLAYLEVEL(1, "HUF_buildCTable error");
+        DISPLAYLEVEL(1, "HUF_buildCTable error \n");
         goto _cleanup;
     }
     huffLog = (U32)errorCode;
@@ -744,11 +725,11 @@ static size_t ZDICT_analyzeEntropy(void*  dstBuffer, size_t maxDstSize,
     }
     /* note : the result of this phase should be used to better appreciate the impact on statistics */
-    total=0; for (u=0; u<=OFFCODE_MAX; u++) total+=offcodeCount[u];
-    errorCode = FSE_normalizeCount(offcodeNCount, Offlog, offcodeCount, total, OFFCODE_MAX);
+    total=0; for (u=0; u<=offcodeMax; u++) total+=offcodeCount[u];
+    errorCode = FSE_normalizeCount(offcodeNCount, Offlog, offcodeCount, total, offcodeMax);
     if (FSE_isError(errorCode)) {
         eSize = ERROR(GENERIC);
-        DISPLAYLEVEL(1, "FSE_normalizeCount error with offcodeCount");
+        DISPLAYLEVEL(1, "FSE_normalizeCount error with offcodeCount \n");
         goto _cleanup;
     }
     Offlog = (U32)errorCode;
@@ -757,7 +738,7 @@ static size_t ZDICT_analyzeEntropy(void*  dstBuffer, size_t maxDstSize,
     errorCode = FSE_normalizeCount(matchLengthNCount, mlLog, matchLengthCount, total, MaxML);
     if (FSE_isError(errorCode)) {
         eSize = ERROR(GENERIC);
-        DISPLAYLEVEL(1, "FSE_normalizeCount error with matchLengthCount");
+        DISPLAYLEVEL(1, "FSE_normalizeCount error with matchLengthCount \n");
         goto _cleanup;
     }
     mlLog = (U32)errorCode;
@@ -766,17 +747,16 @@ static size_t ZDICT_analyzeEntropy(void*  dstBuffer, size_t maxDstSize,
     errorCode = FSE_normalizeCount(litLengthNCount, llLog, litLengthCount, total, MaxLL);
     if (FSE_isError(errorCode)) {
         eSize = ERROR(GENERIC);
-        DISPLAYLEVEL(1, "FSE_normalizeCount error with litLengthCount");
+        DISPLAYLEVEL(1, "FSE_normalizeCount error with litLengthCount \n");
         goto _cleanup;
     }
     llLog = (U32)errorCode;
     /* write result to buffer */
     {   size_t const hhSize = HUF_writeCTable(dstPtr, maxDstSize, hufTable, 255, huffLog);
         if (HUF_isError(hhSize)) {
             eSize = ERROR(GENERIC);
-            DISPLAYLEVEL(1, "HUF_writeCTable error");
+            DISPLAYLEVEL(1, "HUF_writeCTable error \n");
             goto _cleanup;
         }
         dstPtr += hhSize;
@@ -787,7 +767,7 @@ static size_t ZDICT_analyzeEntropy(void*  dstBuffer, size_t maxDstSize,
     {   size_t const ohSize = FSE_writeNCount(dstPtr, maxDstSize, offcodeNCount, OFFCODE_MAX, Offlog);
         if (FSE_isError(ohSize)) {
             eSize = ERROR(GENERIC);
-            DISPLAYLEVEL(1, "FSE_writeNCount error with offcodeNCount");
+            DISPLAYLEVEL(1, "FSE_writeNCount error with offcodeNCount \n");
             goto _cleanup;
         }
         dstPtr += ohSize;
@@ -798,7 +778,7 @@ static size_t ZDICT_analyzeEntropy(void*  dstBuffer, size_t maxDstSize,
     {   size_t const mhSize = FSE_writeNCount(dstPtr, maxDstSize, matchLengthNCount, MaxML, mlLog);
         if (FSE_isError(mhSize)) {
             eSize = ERROR(GENERIC);
-            DISPLAYLEVEL(1, "FSE_writeNCount error with matchLengthNCount");
+            DISPLAYLEVEL(1, "FSE_writeNCount error with matchLengthNCount \n");
             goto _cleanup;
         }
         dstPtr += mhSize;
@@ -809,7 +789,7 @@ static size_t ZDICT_analyzeEntropy(void*  dstBuffer, size_t maxDstSize,
     {   size_t const lhSize = FSE_writeNCount(dstPtr, maxDstSize, litLengthNCount, MaxLL, llLog);
         if (FSE_isError(lhSize)) {
             eSize = ERROR(GENERIC);
-            DISPLAYLEVEL(1, "FSE_writeNCount error with litlengthNCount");
+            DISPLAYLEVEL(1, "FSE_writeNCount error with litlengthNCount \n");
             goto _cleanup;
         }
         dstPtr += lhSize;
@@ -819,7 +799,7 @@ static size_t ZDICT_analyzeEntropy(void*  dstBuffer, size_t maxDstSize,
     if (maxDstSize<12) {
         eSize = ERROR(GENERIC);
-        DISPLAYLEVEL(1, "not enough space to write RepOffsets");
+        DISPLAYLEVEL(1, "not enough space to write RepOffsets \n");
         goto _cleanup;
     }
 # if 0
@@ -833,7 +813,7 @@ static size_t ZDICT_analyzeEntropy(void*  dstBuffer, size_t maxDstSize,
     MEM_writeLE32(dstPtr+4, repStartValue[1]);
     MEM_writeLE32(dstPtr+8, repStartValue[2]);
 #endif
-    dstPtr += 12;
+    //dstPtr += 12;
     eSize += 12;
 _cleanup:
@@ -845,51 +825,13 @@ _cleanup:
 }
-#define DIB_FASTSEGMENTSIZE 64
-/*! ZDICT_fastSampling()  (based on an idea proposed by Giuseppe Ottaviano) :
-    Fill `dictBuffer` with stripes of size DIB_FASTSEGMENTSIZE from `samplesBuffer`,
-    up to `dictSize`.
-    Filling starts from the end of `dictBuffer`, down to maximum possible.
-    if `dictSize` is not a multiply of DIB_FASTSEGMENTSIZE, some bytes at beginning of `dictBuffer` won't be used.
-    @return : amount of data written into `dictBuffer`,
-              or an error code
-*/
-static size_t ZDICT_fastSampling(void* dictBuffer, size_t dictSize,
-                         const void* samplesBuffer, size_t samplesSize)
-{
-    char* dstPtr = (char*)dictBuffer + dictSize;
-    const char* srcPtr = (const char*)samplesBuffer;
-    size_t const nbSegments = dictSize / DIB_FASTSEGMENTSIZE;
-    size_t segNb, interSize;
-    if (nbSegments <= 2) return ERROR(srcSize_wrong);
-    if (samplesSize < dictSize) return ERROR(srcSize_wrong);
-    /* first and last segments are part of dictionary, in case they contain interesting header/footer */
-    dstPtr -= DIB_FASTSEGMENTSIZE;
-    memcpy(dstPtr, srcPtr, DIB_FASTSEGMENTSIZE);
-    dstPtr -= DIB_FASTSEGMENTSIZE;
-    memcpy(dstPtr, srcPtr+samplesSize-DIB_FASTSEGMENTSIZE, DIB_FASTSEGMENTSIZE);
-    /* regularly copy a segment */
-    interSize = (samplesSize - nbSegments*DIB_FASTSEGMENTSIZE) / (nbSegments-1);
-    srcPtr += DIB_FASTSEGMENTSIZE;
-    for (segNb=2; segNb < nbSegments; segNb++) {
-        srcPtr += interSize;
-        dstPtr -= DIB_FASTSEGMENTSIZE;
-        memcpy(dstPtr, srcPtr, DIB_FASTSEGMENTSIZE);
-        srcPtr += DIB_FASTSEGMENTSIZE;
-    }
-    return nbSegments * DIB_FASTSEGMENTSIZE;
-}
 size_t ZDICT_addEntropyTablesFromBuffer_advanced(void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity,
                                                  const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
                                                  ZDICT_params_t params)
 {
     size_t hSize;
-    unsigned const compressionLevel = (params.compressionLevel == 0) ? g_compressionLevel_default : params.compressionLevel;
+    int const compressionLevel = (params.compressionLevel <= 0) ? g_compressionLevel_default : params.compressionLevel;
+    U32 const notificationLevel = params.notificationLevel;
     /* dictionary header */
     MEM_writeLE32(dictBuffer, ZSTD_DICT_MAGIC);
@@ -903,10 +845,15 @@ size_t ZDICT_addEntropyTablesFromBuffer_advanced(void* dictBuffer, size_t dictCo
     /* entropy tables */
     DISPLAYLEVEL(2, "\r%70s\r", "");   /* clean display line */
     DISPLAYLEVEL(2, "statistics ... \n");
-    hSize += ZDICT_analyzeEntropy((char*)dictBuffer+hSize, dictBufferCapacity-hSize,
+    {   size_t const eSize = ZDICT_analyzeEntropy((char*)dictBuffer+hSize, dictBufferCapacity-hSize,
                                   compressionLevel,
                                   samplesBuffer, samplesSizes, nbSamples,
-                                  (char*)dictBuffer + dictBufferCapacity - dictContentSize, dictContentSize);
+                                  (char*)dictBuffer + dictBufferCapacity - dictContentSize, dictContentSize,
+                                  notificationLevel);
+        if (ZDICT_isError(eSize)) return eSize;
+        hSize += eSize;
+    }
     if (hSize + dictContentSize < dictBufferCapacity)
         memmove((char*)dictBuffer + hSize, (char*)dictBuffer + dictBufferCapacity - dictContentSize, dictContentSize);
@@ -914,60 +861,86 @@ size_t ZDICT_addEntropyTablesFromBuffer_advanced(void* dictBuffer, size_t dictCo
 }
-#define DIB_MINSAMPLESSIZE (DIB_FASTSEGMENTSIZE*3)
 /*! ZDICT_trainFromBuffer_unsafe() :
-*   `samplesBuffer` must be followed by noisy guard band.
-*   @return : size of dictionary.
+*   Warning : `samplesBuffer` must be followed by noisy guard band.
+*   @return : size of dictionary, or an error code which can be tested with ZDICT_isError()
 */
 size_t ZDICT_trainFromBuffer_unsafe(
                             void* dictBuffer, size_t maxDictSize,
                             const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
                             ZDICT_params_t params)
 {
-    U32 const dictListSize = MAX( MAX(DICTLISTSIZE, nbSamples), (U32)(maxDictSize/16));
+    U32 const dictListSize = MAX(MAX(DICTLISTSIZE_DEFAULT, nbSamples), (U32)(maxDictSize/16));
     dictItem* const dictList = (dictItem*)malloc(dictListSize * sizeof(*dictList));
-    unsigned selectivity = params.selectivityLevel;
+    unsigned const selectivity = params.selectivityLevel == 0 ? g_selectivity_default : params.selectivityLevel;
+    unsigned const minRep = (selectivity > 30) ? MINRATIO : nbSamples >> selectivity;
     size_t const targetDictSize = maxDictSize;
-    size_t sBuffSize;
+    size_t const samplesBuffSize = ZDICT_totalSampleSize(samplesSizes, nbSamples);
     size_t dictSize = 0;
+    U32 const notificationLevel = params.notificationLevel;
     /* checks */
     if (!dictList) return ERROR(memory_allocation);
     if (maxDictSize <= g_provision_entropySize + g_min_fast_dictContent) { free(dictList); return ERROR(dstSize_tooSmall); }
+    if (samplesBuffSize < ZDICT_MIN_SAMPLES_SIZE) { free(dictList); return 0; }   /* not enough source to create dictionary */
     /* init */
-    { unsigned u; for (u=0, sBuffSize=0; u<nbSamples; u++) sBuffSize += samplesSizes[u]; }
-    if (sBuffSize < DIB_MINSAMPLESSIZE) { free(dictList); return 0; }   /* not enough source to create dictionary */
     ZDICT_initDictItem(dictList);
-    g_displayLevel = params.notificationLevel;
-    if (selectivity==0) selectivity = g_selectivity_default;
     /* build dictionary */
-    if (selectivity>1) {  /* selectivity == 1 => fast mode */
-        ZDICT_trainBuffer(dictList, dictListSize,
-                        samplesBuffer, sBuffSize,
-                        samplesSizes, nbSamples,
-                        selectivity, (U32)targetDictSize);
-        /* display best matches */
-        if (g_displayLevel>= 3) {
-            U32 const nb = 25;
-            U32 const dictContentSize = ZDICT_dictSize(dictList);
-            U32 u;
-            DISPLAYLEVEL(3, "\n %u segments found, of total size %u \n", dictList[0].pos, dictContentSize);
-            DISPLAYLEVEL(3, "list %u best segments \n", nb);
-            for (u=1; u<=nb; u++) {
-                U32 p = dictList[u].pos;
-                U32 l = dictList[u].length;
-                U32 d = MIN(40, l);
-                DISPLAYLEVEL(3, "%3u:%3u bytes at pos %8u, savings %7u bytes |",
-                             u, l, p, dictList[u].savings);
-                ZDICT_printHex(3, (const char*)samplesBuffer+p, d);
-                DISPLAYLEVEL(3, "| \n");
-    }   }   }
+    ZDICT_trainBuffer(dictList, dictListSize,
+                    samplesBuffer, samplesBuffSize,
+                    samplesSizes, nbSamples,
+                    minRep, notificationLevel);
+    /* display best matches */
+    if (params.notificationLevel>= 3) {
+        U32 const nb = MIN(25, dictList[0].pos);
+        U32 const dictContentSize = ZDICT_dictSize(dictList);
+        U32 u;
+        DISPLAYLEVEL(3, "\n %u segments found, of total size %u \n", dictList[0].pos, dictContentSize);
+        DISPLAYLEVEL(3, "list %u best segments \n", nb);
+        for (u=1; u<=nb; u++) {
+            U32 pos = dictList[u].pos;
+            U32 length = dictList[u].length;
+            U32 printedLength = MIN(40, length);
+            DISPLAYLEVEL(3, "%3u:%3u bytes at pos %8u, savings %7u bytes |",
+                         u, length, pos, dictList[u].savings);
+            ZDICT_printHex((const char*)samplesBuffer+pos, printedLength);
+            DISPLAYLEVEL(3, "| \n");
+    }   }
     /* create dictionary */
     {   U32 dictContentSize = ZDICT_dictSize(dictList);
+        if (dictContentSize < targetDictSize/3) {
+            DISPLAYLEVEL(2, "!  warning : selected content significantly smaller than requested (%u < %u) \n", dictContentSize, (U32)maxDictSize);
+            if (minRep > MINRATIO) {
+                DISPLAYLEVEL(2, "!  consider increasing selectivity to produce larger dictionary (-s%u) \n", selectivity+1);
+                DISPLAYLEVEL(2, "!  note : larger dictionaries are not necessarily better, test its efficiency on samples \n");
+            }
+            if (samplesBuffSize < 10 * targetDictSize)
+                DISPLAYLEVEL(2, "!  consider increasing the number of samples (total size : %u MB)\n", (U32)(samplesBuffSize>>20));
+        }
+        if ((dictContentSize > targetDictSize*3) && (nbSamples > 2*MINRATIO) && (selectivity>1)) {
+            U32 proposedSelectivity = selectivity-1;
+            while ((nbSamples >> proposedSelectivity) <= MINRATIO) { proposedSelectivity--; }
+            DISPLAYLEVEL(2, "!  note : calculated dictionary significantly larger than requested (%u > %u) \n", dictContentSize, (U32)maxDictSize);
+            DISPLAYLEVEL(2, "!  consider increasing dictionary size, or produce denser dictionary (-s%u) \n", proposedSelectivity);
+            DISPLAYLEVEL(2, "!  always test dictionary efficiency on samples \n");
+        }
+        /* limit dictionary size */
+        {   U32 const max = dictList->pos;   /* convention : nb of useful elts within dictList */
+            U32 currentSize = 0;
+            U32 n; for (n=1; n<max; n++) {
+                currentSize += dictList[n].length;
+                if (currentSize > targetDictSize) { currentSize -= dictList[n].length; break; }
+            }
+            dictList->pos = n;
+            dictContentSize = currentSize;
+        }
         /* build dict content */
         {   U32 u;
@@ -979,14 +952,6 @@ size_t ZDICT_trainFromBuffer_unsafe(
                 memcpy(ptr, (const char*)samplesBuffer+dictList[u].pos, l);
         }   }
-        /* fast mode dict content */
-        if (selectivity==1) {  /* note could also be used to complete a dictionary, but not necessarily better */
-            DISPLAYLEVEL(3, "\r%70s\r", "");   /* clean display line */
-            DISPLAYLEVEL(3, "Adding %u KB with fast sampling \n", (U32)(targetDictSize>>10));
-            dictContentSize = (U32)ZDICT_fastSampling(dictBuffer, targetDictSize,
-                                                      samplesBuffer, sBuffSize);
-        }
         dictSize = ZDICT_addEntropyTablesFromBuffer_advanced(dictBuffer, dictContentSize, maxDictSize,
                                                              samplesBuffer, samplesSizes, nbSamples,
                                                              params);
@@ -1004,23 +969,23 @@ size_t ZDICT_trainFromBuffer_advanced(void* dictBuffer, size_t dictBufferCapacit
                                       const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
                                       ZDICT_params_t params)
 {
+    size_t result;
     void* newBuff;
-    size_t sBuffSize;
+    size_t const sBuffSize = ZDICT_totalSampleSize(samplesSizes, nbSamples);
+    if (sBuffSize < ZDICT_MIN_SAMPLES_SIZE) return 0;   /* not enough content => no dictionary */
-    { unsigned u; for (u=0, sBuffSize=0; u<nbSamples; u++) sBuffSize += samplesSizes[u]; }
-    if (sBuffSize==0) return 0;   /* empty content => no dictionary */
     newBuff = malloc(sBuffSize + NOISELENGTH);
     if (!newBuff) return ERROR(memory_allocation);
     memcpy(newBuff, samplesBuffer, sBuffSize);
     ZDICT_fillNoise((char*)newBuff + sBuffSize, NOISELENGTH);   /* guard band, for end of buffer condition */
-    { size_t const result = ZDICT_trainFromBuffer_unsafe(
+    result = ZDICT_trainFromBuffer_unsafe(
                                         dictBuffer, dictBufferCapacity,
                                         newBuff, samplesSizes, nbSamples,
                                         params);
-      free(newBuff);
-      return result; }
+    free(newBuff);
+    return result;
 }