zstd-ruby 0.1.2 → 1.1.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/ext/zstdruby/libzstd/BUCK +186 -0
- data/ext/zstdruby/libzstd/Makefile +58 -31
- data/ext/zstdruby/libzstd/common/mem.h +1 -1
- data/ext/zstdruby/libzstd/common/pool.c +194 -0
- data/ext/zstdruby/libzstd/common/pool.h +56 -0
- data/ext/zstdruby/libzstd/common/threading.c +79 -0
- data/ext/zstdruby/libzstd/common/threading.h +104 -0
- data/ext/zstdruby/libzstd/common/zstd_common.c +0 -4
- data/ext/zstdruby/libzstd/common/zstd_errors.h +16 -2
- data/ext/zstdruby/libzstd/common/zstd_internal.h +9 -0
- data/ext/zstdruby/libzstd/compress/zstd_compress.c +94 -51
- data/ext/zstdruby/libzstd/compress/zstd_opt.h +6 -6
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +740 -0
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +78 -0
- data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +39 -22
- data/ext/zstdruby/libzstd/deprecated/zbuff_common.c +26 -0
- data/ext/zstdruby/libzstd/dictBuilder/cover.c +1021 -0
- data/ext/zstdruby/libzstd/dictBuilder/zdict.c +60 -12
- data/ext/zstdruby/libzstd/dictBuilder/zdict.h +109 -19
- data/ext/zstdruby/libzstd/dll/example/fullbench-dll.vcxproj +2 -0
- data/ext/zstdruby/libzstd/legacy/zstd_v04.c +13 -19
- data/ext/zstdruby/libzstd/legacy/zstd_v05.c +4 -2
- data/ext/zstdruby/libzstd/legacy/zstd_v07.c +3 -3
- data/ext/zstdruby/libzstd/zstd.h +53 -25
- data/lib/zstd-ruby/version.rb +1 -1
- metadata +11 -2
@@ -0,0 +1,78 @@
|
|
1
|
+
/**
|
2
|
+
* Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
|
3
|
+
* All rights reserved.
|
4
|
+
*
|
5
|
+
* This source code is licensed under the BSD-style license found in the
|
6
|
+
* LICENSE file in the root directory of this source tree. An additional grant
|
7
|
+
* of patent rights can be found in the PATENTS file in the same directory.
|
8
|
+
*/
|
9
|
+
|
10
|
+
#ifndef ZSTDMT_COMPRESS_H
|
11
|
+
#define ZSTDMT_COMPRESS_H
|
12
|
+
|
13
|
+
#if defined (__cplusplus)
|
14
|
+
extern "C" {
|
15
|
+
#endif
|
16
|
+
|
17
|
+
|
18
|
+
/* Note : All prototypes defined in this file shall be considered experimental.
|
19
|
+
* There is no guarantee of API continuity (yet) on any of these prototypes */
|
20
|
+
|
21
|
+
/* === Dependencies === */
|
22
|
+
#include <stddef.h> /* size_t */
|
23
|
+
#define ZSTD_STATIC_LINKING_ONLY /* ZSTD_parameters */
|
24
|
+
#include "zstd.h" /* ZSTD_inBuffer, ZSTD_outBuffer, ZSTDLIB_API */
|
25
|
+
|
26
|
+
|
27
|
+
/* === Simple one-pass functions === */
|
28
|
+
|
29
|
+
typedef struct ZSTDMT_CCtx_s ZSTDMT_CCtx;
|
30
|
+
ZSTDLIB_API ZSTDMT_CCtx* ZSTDMT_createCCtx(unsigned nbThreads);
|
31
|
+
ZSTDLIB_API size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* cctx);
|
32
|
+
|
33
|
+
ZSTDLIB_API size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* cctx,
|
34
|
+
void* dst, size_t dstCapacity,
|
35
|
+
const void* src, size_t srcSize,
|
36
|
+
int compressionLevel);
|
37
|
+
|
38
|
+
|
39
|
+
/* === Streaming functions === */
|
40
|
+
|
41
|
+
ZSTDLIB_API size_t ZSTDMT_initCStream(ZSTDMT_CCtx* mtctx, int compressionLevel);
|
42
|
+
ZSTDLIB_API size_t ZSTDMT_resetCStream(ZSTDMT_CCtx* mtctx, unsigned long long pledgedSrcSize); /**< pledgedSrcSize is optional and can be zero == unknown */
|
43
|
+
|
44
|
+
ZSTDLIB_API size_t ZSTDMT_compressStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, ZSTD_inBuffer* input);
|
45
|
+
|
46
|
+
ZSTDLIB_API size_t ZSTDMT_flushStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output); /**< @return : 0 == all flushed; >0 : still some data to be flushed; or an error code (ZSTD_isError()) */
|
47
|
+
ZSTDLIB_API size_t ZSTDMT_endStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output); /**< @return : 0 == all flushed; >0 : still some data to be flushed; or an error code (ZSTD_isError()) */
|
48
|
+
|
49
|
+
|
50
|
+
/* === Advanced functions and parameters === */
|
51
|
+
|
52
|
+
#ifndef ZSTDMT_SECTION_SIZE_MIN
|
53
|
+
# define ZSTDMT_SECTION_SIZE_MIN (1U << 20) /* 1 MB - Minimum size of each compression job */
|
54
|
+
#endif
|
55
|
+
|
56
|
+
ZSTDLIB_API size_t ZSTDMT_initCStream_advanced(ZSTDMT_CCtx* mtctx, const void* dict, size_t dictSize, /**< dict can be released after init, a local copy is preserved within zcs */
|
57
|
+
ZSTD_parameters params, unsigned long long pledgedSrcSize); /**< pledgedSrcSize is optional and can be zero == unknown */
|
58
|
+
|
59
|
+
/* ZSDTMT_parameter :
|
60
|
+
* List of parameters that can be set using ZSTDMT_setMTCtxParameter() */
|
61
|
+
typedef enum {
|
62
|
+
ZSTDMT_p_sectionSize, /* size of input "section". Each section is compressed in parallel. 0 means default, which is dynamically determined within compression functions */
|
63
|
+
ZSTDMT_p_overlapSectionLog /* Log of overlapped section; 0 == no overlap, 6(default) == use 1/8th of window, >=9 == use full window */
|
64
|
+
} ZSDTMT_parameter;
|
65
|
+
|
66
|
+
/* ZSTDMT_setMTCtxParameter() :
|
67
|
+
* allow setting individual parameters, one at a time, among a list of enums defined in ZSTDMT_parameter.
|
68
|
+
* The function must be called typically after ZSTD_createCCtx().
|
69
|
+
* Parameters not explicitly reset by ZSTDMT_init*() remain the same in consecutive compression sessions.
|
70
|
+
* @return : 0, or an error code (which can be tested using ZSTD_isError()) */
|
71
|
+
ZSTDLIB_API size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSDTMT_parameter parameter, unsigned value);
|
72
|
+
|
73
|
+
|
74
|
+
#if defined (__cplusplus)
|
75
|
+
}
|
76
|
+
#endif
|
77
|
+
|
78
|
+
#endif /* ZSTDMT_COMPRESS_H */
|
@@ -1444,7 +1444,7 @@ size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx,
|
|
1444
1444
|
#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT==1)
|
1445
1445
|
if (ZSTD_isLegacy(src, srcSize)) return ZSTD_decompressLegacy(dst, dstCapacity, src, srcSize, dict, dictSize);
|
1446
1446
|
#endif
|
1447
|
-
ZSTD_decompressBegin_usingDict(dctx, dict, dictSize);
|
1447
|
+
CHECK_F(ZSTD_decompressBegin_usingDict(dctx, dict, dictSize));
|
1448
1448
|
ZSTD_checkContinuity(dctx, dst);
|
1449
1449
|
return ZSTD_decompressFrame(dctx, dst, dstCapacity, src, srcSize);
|
1450
1450
|
}
|
@@ -1671,9 +1671,9 @@ static size_t ZSTD_loadEntropy(ZSTD_DCtx* dctx, const void* const dict, size_t c
|
|
1671
1671
|
}
|
1672
1672
|
|
1673
1673
|
if (dictPtr+12 > dictEnd) return ERROR(dictionary_corrupted);
|
1674
|
-
dctx->rep[0] = MEM_readLE32(dictPtr+0); if (dctx->rep[0] >= dictSize) return ERROR(dictionary_corrupted);
|
1675
|
-
dctx->rep[1] = MEM_readLE32(dictPtr+4); if (dctx->rep[1] >= dictSize) return ERROR(dictionary_corrupted);
|
1676
|
-
dctx->rep[2] = MEM_readLE32(dictPtr+8); if (dctx->rep[2] >= dictSize) return ERROR(dictionary_corrupted);
|
1674
|
+
dctx->rep[0] = MEM_readLE32(dictPtr+0); if (dctx->rep[0] == 0 || dctx->rep[0] >= dictSize) return ERROR(dictionary_corrupted);
|
1675
|
+
dctx->rep[1] = MEM_readLE32(dictPtr+4); if (dctx->rep[1] == 0 || dctx->rep[1] >= dictSize) return ERROR(dictionary_corrupted);
|
1676
|
+
dctx->rep[2] = MEM_readLE32(dictPtr+8); if (dctx->rep[2] == 0 || dctx->rep[2] >= dictSize) return ERROR(dictionary_corrupted);
|
1677
1677
|
dictPtr += 12;
|
1678
1678
|
|
1679
1679
|
dctx->litEntropy = dctx->fseEntropy = 1;
|
@@ -1713,39 +1713,44 @@ size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t
|
|
1713
1713
|
/* ====== ZSTD_DDict ====== */
|
1714
1714
|
|
1715
1715
|
struct ZSTD_DDict_s {
|
1716
|
-
void*
|
1716
|
+
void* dictBuffer;
|
1717
|
+
const void* dictContent;
|
1717
1718
|
size_t dictSize;
|
1718
1719
|
ZSTD_DCtx* refContext;
|
1719
1720
|
}; /* typedef'd to ZSTD_DDict within "zstd.h" */
|
1720
1721
|
|
1721
|
-
ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize, ZSTD_customMem customMem)
|
1722
|
+
ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize, unsigned byReference, ZSTD_customMem customMem)
|
1722
1723
|
{
|
1723
1724
|
if (!customMem.customAlloc && !customMem.customFree) customMem = defaultCustomMem;
|
1724
1725
|
if (!customMem.customAlloc || !customMem.customFree) return NULL;
|
1725
1726
|
|
1726
1727
|
{ ZSTD_DDict* const ddict = (ZSTD_DDict*) ZSTD_malloc(sizeof(ZSTD_DDict), customMem);
|
1727
|
-
void* const dictContent = ZSTD_malloc(dictSize, customMem);
|
1728
1728
|
ZSTD_DCtx* const dctx = ZSTD_createDCtx_advanced(customMem);
|
1729
1729
|
|
1730
|
-
if (!
|
1731
|
-
ZSTD_free(dictContent, customMem);
|
1730
|
+
if (!ddict || !dctx) {
|
1732
1731
|
ZSTD_free(ddict, customMem);
|
1733
1732
|
ZSTD_free(dctx, customMem);
|
1734
1733
|
return NULL;
|
1735
1734
|
}
|
1736
1735
|
|
1737
|
-
if (dictSize) {
|
1738
|
-
|
1736
|
+
if ((byReference) || (!dict) || (!dictSize)) {
|
1737
|
+
ddict->dictBuffer = NULL;
|
1738
|
+
ddict->dictContent = dict;
|
1739
|
+
} else {
|
1740
|
+
void* const internalBuffer = ZSTD_malloc(dictSize, customMem);
|
1741
|
+
if (!internalBuffer) { ZSTD_free(dctx, customMem); ZSTD_free(ddict, customMem); return NULL; }
|
1742
|
+
memcpy(internalBuffer, dict, dictSize);
|
1743
|
+
ddict->dictBuffer = internalBuffer;
|
1744
|
+
ddict->dictContent = internalBuffer;
|
1739
1745
|
}
|
1740
|
-
{ size_t const errorCode = ZSTD_decompressBegin_usingDict(dctx, dictContent, dictSize);
|
1746
|
+
{ size_t const errorCode = ZSTD_decompressBegin_usingDict(dctx, ddict->dictContent, dictSize);
|
1741
1747
|
if (ZSTD_isError(errorCode)) {
|
1742
|
-
ZSTD_free(
|
1748
|
+
ZSTD_free(ddict->dictBuffer, customMem);
|
1743
1749
|
ZSTD_free(ddict, customMem);
|
1744
1750
|
ZSTD_free(dctx, customMem);
|
1745
1751
|
return NULL;
|
1746
1752
|
} }
|
1747
1753
|
|
1748
|
-
ddict->dict = dictContent;
|
1749
1754
|
ddict->dictSize = dictSize;
|
1750
1755
|
ddict->refContext = dctx;
|
1751
1756
|
return ddict;
|
@@ -1758,15 +1763,27 @@ ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize, ZSTD_cu
|
|
1758
1763
|
ZSTD_DDict* ZSTD_createDDict(const void* dict, size_t dictSize)
|
1759
1764
|
{
|
1760
1765
|
ZSTD_customMem const allocator = { NULL, NULL, NULL };
|
1761
|
-
return ZSTD_createDDict_advanced(dict, dictSize, allocator);
|
1766
|
+
return ZSTD_createDDict_advanced(dict, dictSize, 0, allocator);
|
1762
1767
|
}
|
1763
1768
|
|
1769
|
+
|
1770
|
+
/*! ZSTD_createDDict_byReference() :
|
1771
|
+
* Create a digested dictionary, ready to start decompression operation without startup delay.
|
1772
|
+
* Dictionary content is simply referenced, and therefore stays in dictBuffer.
|
1773
|
+
* It is important that dictBuffer outlives DDict, it must remain read accessible throughout the lifetime of DDict */
|
1774
|
+
ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize)
|
1775
|
+
{
|
1776
|
+
ZSTD_customMem const allocator = { NULL, NULL, NULL };
|
1777
|
+
return ZSTD_createDDict_advanced(dictBuffer, dictSize, 1, allocator);
|
1778
|
+
}
|
1779
|
+
|
1780
|
+
|
1764
1781
|
size_t ZSTD_freeDDict(ZSTD_DDict* ddict)
|
1765
1782
|
{
|
1766
1783
|
if (ddict==NULL) return 0; /* support free on NULL */
|
1767
1784
|
{ ZSTD_customMem const cMem = ddict->refContext->customMem;
|
1768
1785
|
ZSTD_freeDCtx(ddict->refContext);
|
1769
|
-
ZSTD_free(ddict->
|
1786
|
+
ZSTD_free(ddict->dictBuffer, cMem);
|
1770
1787
|
ZSTD_free(ddict, cMem);
|
1771
1788
|
return 0;
|
1772
1789
|
}
|
@@ -1775,7 +1792,7 @@ size_t ZSTD_freeDDict(ZSTD_DDict* ddict)
|
|
1775
1792
|
size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict)
|
1776
1793
|
{
|
1777
1794
|
if (ddict==NULL) return 0; /* support sizeof on NULL */
|
1778
|
-
return sizeof(*ddict) +
|
1795
|
+
return sizeof(*ddict) + ZSTD_sizeof_DCtx(ddict->refContext) + (ddict->dictBuffer ? ddict->dictSize : 0) ;
|
1779
1796
|
}
|
1780
1797
|
|
1781
1798
|
/*! ZSTD_getDictID_fromDict() :
|
@@ -1796,7 +1813,7 @@ unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize)
|
|
1796
1813
|
unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict)
|
1797
1814
|
{
|
1798
1815
|
if (ddict==NULL) return 0;
|
1799
|
-
return ZSTD_getDictID_fromDict(ddict->
|
1816
|
+
return ZSTD_getDictID_fromDict(ddict->dictContent, ddict->dictSize);
|
1800
1817
|
}
|
1801
1818
|
|
1802
1819
|
/*! ZSTD_getDictID_fromFrame() :
|
@@ -1827,7 +1844,7 @@ size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx,
|
|
1827
1844
|
const ZSTD_DDict* ddict)
|
1828
1845
|
{
|
1829
1846
|
#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT==1)
|
1830
|
-
if (ZSTD_isLegacy(src, srcSize)) return ZSTD_decompressLegacy(dst, dstCapacity, src, srcSize, ddict->
|
1847
|
+
if (ZSTD_isLegacy(src, srcSize)) return ZSTD_decompressLegacy(dst, dstCapacity, src, srcSize, ddict->dictContent, ddict->dictSize);
|
1831
1848
|
#endif
|
1832
1849
|
ZSTD_refDCtx(dctx, ddict->refContext);
|
1833
1850
|
ZSTD_checkContinuity(dctx, dst);
|
@@ -1919,7 +1936,7 @@ size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t di
|
|
1919
1936
|
zds->stage = zdss_loadHeader;
|
1920
1937
|
zds->lhSize = zds->inPos = zds->outStart = zds->outEnd = 0;
|
1921
1938
|
ZSTD_freeDDict(zds->ddictLocal);
|
1922
|
-
if (dict) {
|
1939
|
+
if (dict && dictSize >= 8) {
|
1923
1940
|
zds->ddictLocal = ZSTD_createDDict(dict, dictSize);
|
1924
1941
|
if (zds->ddictLocal == NULL) return ERROR(memory_allocation);
|
1925
1942
|
} else zds->ddictLocal = NULL;
|
@@ -1956,7 +1973,7 @@ size_t ZSTD_setDStreamParameter(ZSTD_DStream* zds,
|
|
1956
1973
|
switch(paramType)
|
1957
1974
|
{
|
1958
1975
|
default : return ERROR(parameter_unknown);
|
1959
|
-
case
|
1976
|
+
case DStream_p_maxWindowSize : zds->maxWindowSize = paramValue ? paramValue : (U32)(-1); break;
|
1960
1977
|
}
|
1961
1978
|
return 0;
|
1962
1979
|
}
|
@@ -2007,7 +2024,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
|
|
2007
2024
|
#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
|
2008
2025
|
{ U32 const legacyVersion = ZSTD_isLegacy(istart, iend-istart);
|
2009
2026
|
if (legacyVersion) {
|
2010
|
-
const void* const dict = zds->ddict ? zds->ddict->
|
2027
|
+
const void* const dict = zds->ddict ? zds->ddict->dictContent : NULL;
|
2011
2028
|
size_t const dictSize = zds->ddict ? zds->ddict->dictSize : 0;
|
2012
2029
|
CHECK_F(ZSTD_initLegacyStream(&zds->legacyContext, zds->previousLegacyVersion, legacyVersion,
|
2013
2030
|
dict, dictSize));
|
@@ -0,0 +1,26 @@
|
|
1
|
+
/**
|
2
|
+
* Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
|
3
|
+
* All rights reserved.
|
4
|
+
*
|
5
|
+
* This source code is licensed under the BSD-style license found in the
|
6
|
+
* LICENSE file in the root directory of this source tree. An additional grant
|
7
|
+
* of patent rights can be found in the PATENTS file in the same directory.
|
8
|
+
*/
|
9
|
+
|
10
|
+
/*-*************************************
|
11
|
+
* Dependencies
|
12
|
+
***************************************/
|
13
|
+
#include "error_private.h"
|
14
|
+
#include "zbuff.h"
|
15
|
+
|
16
|
+
/*-****************************************
|
17
|
+
* ZBUFF Error Management (deprecated)
|
18
|
+
******************************************/
|
19
|
+
|
20
|
+
/*! ZBUFF_isError() :
|
21
|
+
* tells if a return value is an error code */
|
22
|
+
unsigned ZBUFF_isError(size_t errorCode) { return ERR_isError(errorCode); }
|
23
|
+
/*! ZBUFF_getErrorName() :
|
24
|
+
* provides error code string from function result (useful for debugging) */
|
25
|
+
const char* ZBUFF_getErrorName(size_t errorCode) { return ERR_getErrorName(errorCode); }
|
26
|
+
|
@@ -0,0 +1,1021 @@
|
|
1
|
+
/**
|
2
|
+
* Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
|
3
|
+
* All rights reserved.
|
4
|
+
*
|
5
|
+
* This source code is licensed under the BSD-style license found in the
|
6
|
+
* LICENSE file in the root directory of this source tree. An additional grant
|
7
|
+
* of patent rights can be found in the PATENTS file in the same directory.
|
8
|
+
*/
|
9
|
+
|
10
|
+
/*-*************************************
|
11
|
+
* Dependencies
|
12
|
+
***************************************/
|
13
|
+
#include <stdio.h> /* fprintf */
|
14
|
+
#include <stdlib.h> /* malloc, free, qsort */
|
15
|
+
#include <string.h> /* memset */
|
16
|
+
#include <time.h> /* clock */
|
17
|
+
|
18
|
+
#include "mem.h" /* read */
|
19
|
+
#include "pool.h"
|
20
|
+
#include "threading.h"
|
21
|
+
#include "zstd_internal.h" /* includes zstd.h */
|
22
|
+
#ifndef ZDICT_STATIC_LINKING_ONLY
|
23
|
+
#define ZDICT_STATIC_LINKING_ONLY
|
24
|
+
#endif
|
25
|
+
#include "zdict.h"
|
26
|
+
|
27
|
+
/*-*************************************
|
28
|
+
* Constants
|
29
|
+
***************************************/
|
30
|
+
#define COVER_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((U32)-1) : ((U32)1 GB))
|
31
|
+
|
32
|
+
/*-*************************************
|
33
|
+
* Console display
|
34
|
+
***************************************/
|
35
|
+
static int g_displayLevel = 2;
|
36
|
+
#define DISPLAY(...) \
|
37
|
+
{ \
|
38
|
+
fprintf(stderr, __VA_ARGS__); \
|
39
|
+
fflush(stderr); \
|
40
|
+
}
|
41
|
+
#define LOCALDISPLAYLEVEL(displayLevel, l, ...) \
|
42
|
+
if (displayLevel >= l) { \
|
43
|
+
DISPLAY(__VA_ARGS__); \
|
44
|
+
} /* 0 : no display; 1: errors; 2: default; 3: details; 4: debug */
|
45
|
+
#define DISPLAYLEVEL(l, ...) LOCALDISPLAYLEVEL(g_displayLevel, l, __VA_ARGS__)
|
46
|
+
|
47
|
+
#define LOCALDISPLAYUPDATE(displayLevel, l, ...) \
|
48
|
+
if (displayLevel >= l) { \
|
49
|
+
if ((clock() - g_time > refreshRate) || (displayLevel >= 4)) { \
|
50
|
+
g_time = clock(); \
|
51
|
+
DISPLAY(__VA_ARGS__); \
|
52
|
+
if (displayLevel >= 4) \
|
53
|
+
fflush(stdout); \
|
54
|
+
} \
|
55
|
+
}
|
56
|
+
#define DISPLAYUPDATE(l, ...) LOCALDISPLAYUPDATE(g_displayLevel, l, __VA_ARGS__)
|
57
|
+
static const clock_t refreshRate = CLOCKS_PER_SEC * 15 / 100;
|
58
|
+
static clock_t g_time = 0;
|
59
|
+
|
60
|
+
/*-*************************************
|
61
|
+
* Hash table
|
62
|
+
***************************************
|
63
|
+
* A small specialized hash map for storing activeDmers.
|
64
|
+
* The map does not resize, so if it becomes full it will loop forever.
|
65
|
+
* Thus, the map must be large enough to store every value.
|
66
|
+
* The map implements linear probing and keeps its load less than 0.5.
|
67
|
+
*/
|
68
|
+
|
69
|
+
#define MAP_EMPTY_VALUE ((U32)-1)
|
70
|
+
typedef struct COVER_map_pair_t_s {
|
71
|
+
U32 key;
|
72
|
+
U32 value;
|
73
|
+
} COVER_map_pair_t;
|
74
|
+
|
75
|
+
typedef struct COVER_map_s {
|
76
|
+
COVER_map_pair_t *data;
|
77
|
+
U32 sizeLog;
|
78
|
+
U32 size;
|
79
|
+
U32 sizeMask;
|
80
|
+
} COVER_map_t;
|
81
|
+
|
82
|
+
/**
|
83
|
+
* Clear the map.
|
84
|
+
*/
|
85
|
+
static void COVER_map_clear(COVER_map_t *map) {
|
86
|
+
memset(map->data, MAP_EMPTY_VALUE, map->size * sizeof(COVER_map_pair_t));
|
87
|
+
}
|
88
|
+
|
89
|
+
/**
|
90
|
+
* Initializes a map of the given size.
|
91
|
+
* Returns 1 on success and 0 on failure.
|
92
|
+
* The map must be destroyed with COVER_map_destroy().
|
93
|
+
* The map is only guaranteed to be large enough to hold size elements.
|
94
|
+
*/
|
95
|
+
static int COVER_map_init(COVER_map_t *map, U32 size) {
|
96
|
+
map->sizeLog = ZSTD_highbit32(size) + 2;
|
97
|
+
map->size = (U32)1 << map->sizeLog;
|
98
|
+
map->sizeMask = map->size - 1;
|
99
|
+
map->data = (COVER_map_pair_t *)malloc(map->size * sizeof(COVER_map_pair_t));
|
100
|
+
if (!map->data) {
|
101
|
+
map->sizeLog = 0;
|
102
|
+
map->size = 0;
|
103
|
+
return 0;
|
104
|
+
}
|
105
|
+
COVER_map_clear(map);
|
106
|
+
return 1;
|
107
|
+
}
|
108
|
+
|
109
|
+
/**
|
110
|
+
* Internal hash function
|
111
|
+
*/
|
112
|
+
static const U32 prime4bytes = 2654435761U;
|
113
|
+
static U32 COVER_map_hash(COVER_map_t *map, U32 key) {
|
114
|
+
return (key * prime4bytes) >> (32 - map->sizeLog);
|
115
|
+
}
|
116
|
+
|
117
|
+
/**
|
118
|
+
* Helper function that returns the index that a key should be placed into.
|
119
|
+
*/
|
120
|
+
static U32 COVER_map_index(COVER_map_t *map, U32 key) {
|
121
|
+
const U32 hash = COVER_map_hash(map, key);
|
122
|
+
U32 i;
|
123
|
+
for (i = hash;; i = (i + 1) & map->sizeMask) {
|
124
|
+
COVER_map_pair_t *pos = &map->data[i];
|
125
|
+
if (pos->value == MAP_EMPTY_VALUE) {
|
126
|
+
return i;
|
127
|
+
}
|
128
|
+
if (pos->key == key) {
|
129
|
+
return i;
|
130
|
+
}
|
131
|
+
}
|
132
|
+
}
|
133
|
+
|
134
|
+
/**
|
135
|
+
* Returns the pointer to the value for key.
|
136
|
+
* If key is not in the map, it is inserted and the value is set to 0.
|
137
|
+
* The map must not be full.
|
138
|
+
*/
|
139
|
+
static U32 *COVER_map_at(COVER_map_t *map, U32 key) {
|
140
|
+
COVER_map_pair_t *pos = &map->data[COVER_map_index(map, key)];
|
141
|
+
if (pos->value == MAP_EMPTY_VALUE) {
|
142
|
+
pos->key = key;
|
143
|
+
pos->value = 0;
|
144
|
+
}
|
145
|
+
return &pos->value;
|
146
|
+
}
|
147
|
+
|
148
|
+
/**
|
149
|
+
* Deletes key from the map if present.
|
150
|
+
*/
|
151
|
+
static void COVER_map_remove(COVER_map_t *map, U32 key) {
|
152
|
+
U32 i = COVER_map_index(map, key);
|
153
|
+
COVER_map_pair_t *del = &map->data[i];
|
154
|
+
U32 shift = 1;
|
155
|
+
if (del->value == MAP_EMPTY_VALUE) {
|
156
|
+
return;
|
157
|
+
}
|
158
|
+
for (i = (i + 1) & map->sizeMask;; i = (i + 1) & map->sizeMask) {
|
159
|
+
COVER_map_pair_t *const pos = &map->data[i];
|
160
|
+
/* If the position is empty we are done */
|
161
|
+
if (pos->value == MAP_EMPTY_VALUE) {
|
162
|
+
del->value = MAP_EMPTY_VALUE;
|
163
|
+
return;
|
164
|
+
}
|
165
|
+
/* If pos can be moved to del do so */
|
166
|
+
if (((i - COVER_map_hash(map, pos->key)) & map->sizeMask) >= shift) {
|
167
|
+
del->key = pos->key;
|
168
|
+
del->value = pos->value;
|
169
|
+
del = pos;
|
170
|
+
shift = 1;
|
171
|
+
} else {
|
172
|
+
++shift;
|
173
|
+
}
|
174
|
+
}
|
175
|
+
}
|
176
|
+
|
177
|
+
/**
|
178
|
+
* Destroyes a map that is inited with COVER_map_init().
|
179
|
+
*/
|
180
|
+
static void COVER_map_destroy(COVER_map_t *map) {
|
181
|
+
if (map->data) {
|
182
|
+
free(map->data);
|
183
|
+
}
|
184
|
+
map->data = NULL;
|
185
|
+
map->size = 0;
|
186
|
+
}
|
187
|
+
|
188
|
+
/*-*************************************
|
189
|
+
* Context
|
190
|
+
***************************************/
|
191
|
+
|
192
|
+
typedef struct {
|
193
|
+
const BYTE *samples;
|
194
|
+
size_t *offsets;
|
195
|
+
const size_t *samplesSizes;
|
196
|
+
size_t nbSamples;
|
197
|
+
U32 *suffix;
|
198
|
+
size_t suffixSize;
|
199
|
+
U32 *freqs;
|
200
|
+
U32 *dmerAt;
|
201
|
+
unsigned d;
|
202
|
+
} COVER_ctx_t;
|
203
|
+
|
204
|
+
/* We need a global context for qsort... */
|
205
|
+
static COVER_ctx_t *g_ctx = NULL;
|
206
|
+
|
207
|
+
/*-*************************************
|
208
|
+
* Helper functions
|
209
|
+
***************************************/
|
210
|
+
|
211
|
+
/**
|
212
|
+
* Returns the sum of the sample sizes.
|
213
|
+
*/
|
214
|
+
static size_t COVER_sum(const size_t *samplesSizes, unsigned nbSamples) {
|
215
|
+
size_t sum = 0;
|
216
|
+
size_t i;
|
217
|
+
for (i = 0; i < nbSamples; ++i) {
|
218
|
+
sum += samplesSizes[i];
|
219
|
+
}
|
220
|
+
return sum;
|
221
|
+
}
|
222
|
+
|
223
|
+
/**
|
224
|
+
* Returns -1 if the dmer at lp is less than the dmer at rp.
|
225
|
+
* Return 0 if the dmers at lp and rp are equal.
|
226
|
+
* Returns 1 if the dmer at lp is greater than the dmer at rp.
|
227
|
+
*/
|
228
|
+
static int COVER_cmp(COVER_ctx_t *ctx, const void *lp, const void *rp) {
|
229
|
+
const U32 lhs = *(const U32 *)lp;
|
230
|
+
const U32 rhs = *(const U32 *)rp;
|
231
|
+
return memcmp(ctx->samples + lhs, ctx->samples + rhs, ctx->d);
|
232
|
+
}
|
233
|
+
|
234
|
+
/**
|
235
|
+
* Same as COVER_cmp() except ties are broken by pointer value
|
236
|
+
* NOTE: g_ctx must be set to call this function. A global is required because
|
237
|
+
* qsort doesn't take an opaque pointer.
|
238
|
+
*/
|
239
|
+
static int COVER_strict_cmp(const void *lp, const void *rp) {
|
240
|
+
int result = COVER_cmp(g_ctx, lp, rp);
|
241
|
+
if (result == 0) {
|
242
|
+
result = lp < rp ? -1 : 1;
|
243
|
+
}
|
244
|
+
return result;
|
245
|
+
}
|
246
|
+
|
247
|
+
/**
|
248
|
+
* Returns the first pointer in [first, last) whose element does not compare
|
249
|
+
* less than value. If no such element exists it returns last.
|
250
|
+
*/
|
251
|
+
static const size_t *COVER_lower_bound(const size_t *first, const size_t *last,
|
252
|
+
size_t value) {
|
253
|
+
size_t count = last - first;
|
254
|
+
while (count != 0) {
|
255
|
+
size_t step = count / 2;
|
256
|
+
const size_t *ptr = first;
|
257
|
+
ptr += step;
|
258
|
+
if (*ptr < value) {
|
259
|
+
first = ++ptr;
|
260
|
+
count -= step + 1;
|
261
|
+
} else {
|
262
|
+
count = step;
|
263
|
+
}
|
264
|
+
}
|
265
|
+
return first;
|
266
|
+
}
|
267
|
+
|
268
|
+
/**
|
269
|
+
* Generic groupBy function.
|
270
|
+
* Groups an array sorted by cmp into groups with equivalent values.
|
271
|
+
* Calls grp for each group.
|
272
|
+
*/
|
273
|
+
static void
|
274
|
+
COVER_groupBy(const void *data, size_t count, size_t size, COVER_ctx_t *ctx,
|
275
|
+
int (*cmp)(COVER_ctx_t *, const void *, const void *),
|
276
|
+
void (*grp)(COVER_ctx_t *, const void *, const void *)) {
|
277
|
+
const BYTE *ptr = (const BYTE *)data;
|
278
|
+
size_t num = 0;
|
279
|
+
while (num < count) {
|
280
|
+
const BYTE *grpEnd = ptr + size;
|
281
|
+
++num;
|
282
|
+
while (num < count && cmp(ctx, ptr, grpEnd) == 0) {
|
283
|
+
grpEnd += size;
|
284
|
+
++num;
|
285
|
+
}
|
286
|
+
grp(ctx, ptr, grpEnd);
|
287
|
+
ptr = grpEnd;
|
288
|
+
}
|
289
|
+
}
|
290
|
+
|
291
|
+
/*-*************************************
|
292
|
+
* Cover functions
|
293
|
+
***************************************/
|
294
|
+
|
295
|
+
/**
|
296
|
+
* Called on each group of positions with the same dmer.
|
297
|
+
* Counts the frequency of each dmer and saves it in the suffix array.
|
298
|
+
* Fills `ctx->dmerAt`.
|
299
|
+
*/
|
300
|
+
static void COVER_group(COVER_ctx_t *ctx, const void *group,
|
301
|
+
const void *groupEnd) {
|
302
|
+
/* The group consists of all the positions with the same first d bytes. */
|
303
|
+
const U32 *grpPtr = (const U32 *)group;
|
304
|
+
const U32 *grpEnd = (const U32 *)groupEnd;
|
305
|
+
/* The dmerId is how we will reference this dmer.
|
306
|
+
* This allows us to map the whole dmer space to a much smaller space, the
|
307
|
+
* size of the suffix array.
|
308
|
+
*/
|
309
|
+
const U32 dmerId = (U32)(grpPtr - ctx->suffix);
|
310
|
+
/* Count the number of samples this dmer shows up in */
|
311
|
+
U32 freq = 0;
|
312
|
+
/* Details */
|
313
|
+
const size_t *curOffsetPtr = ctx->offsets;
|
314
|
+
const size_t *offsetsEnd = ctx->offsets + ctx->nbSamples;
|
315
|
+
/* Once *grpPtr >= curSampleEnd this occurrence of the dmer is in a
|
316
|
+
* different sample than the last.
|
317
|
+
*/
|
318
|
+
size_t curSampleEnd = ctx->offsets[0];
|
319
|
+
for (; grpPtr != grpEnd; ++grpPtr) {
|
320
|
+
/* Save the dmerId for this position so we can get back to it. */
|
321
|
+
ctx->dmerAt[*grpPtr] = dmerId;
|
322
|
+
/* Dictionaries only help for the first reference to the dmer.
|
323
|
+
* After that zstd can reference the match from the previous reference.
|
324
|
+
* So only count each dmer once for each sample it is in.
|
325
|
+
*/
|
326
|
+
if (*grpPtr < curSampleEnd) {
|
327
|
+
continue;
|
328
|
+
}
|
329
|
+
freq += 1;
|
330
|
+
/* Binary search to find the end of the sample *grpPtr is in.
|
331
|
+
* In the common case that grpPtr + 1 == grpEnd we can skip the binary
|
332
|
+
* search because the loop is over.
|
333
|
+
*/
|
334
|
+
if (grpPtr + 1 != grpEnd) {
|
335
|
+
const size_t *sampleEndPtr =
|
336
|
+
COVER_lower_bound(curOffsetPtr, offsetsEnd, *grpPtr);
|
337
|
+
curSampleEnd = *sampleEndPtr;
|
338
|
+
curOffsetPtr = sampleEndPtr + 1;
|
339
|
+
}
|
340
|
+
}
|
341
|
+
/* At this point we are never going to look at this segment of the suffix
|
342
|
+
* array again. We take advantage of this fact to save memory.
|
343
|
+
* We store the frequency of the dmer in the first position of the group,
|
344
|
+
* which is dmerId.
|
345
|
+
*/
|
346
|
+
ctx->suffix[dmerId] = freq;
|
347
|
+
}
|
348
|
+
|
349
|
+
/**
|
350
|
+
* A segment is a range in the source as well as the score of the segment.
|
351
|
+
*/
|
352
|
+
typedef struct {
|
353
|
+
U32 begin;
|
354
|
+
U32 end;
|
355
|
+
double score;
|
356
|
+
} COVER_segment_t;
|
357
|
+
|
358
|
+
/**
|
359
|
+
* Selects the best segment in an epoch.
|
360
|
+
* Segments of are scored according to the function:
|
361
|
+
*
|
362
|
+
* Let F(d) be the frequency of dmer d.
|
363
|
+
* Let S_i be the dmer at position i of segment S which has length k.
|
364
|
+
*
|
365
|
+
* Score(S) = F(S_1) + F(S_2) + ... + F(S_{k-d+1})
|
366
|
+
*
|
367
|
+
* Once the dmer d is in the dictionay we set F(d) = 0.
|
368
|
+
*/
|
369
|
+
static COVER_segment_t COVER_selectSegment(const COVER_ctx_t *ctx, U32 *freqs,
|
370
|
+
COVER_map_t *activeDmers, U32 begin,
|
371
|
+
U32 end, COVER_params_t parameters) {
|
372
|
+
/* Constants */
|
373
|
+
const U32 k = parameters.k;
|
374
|
+
const U32 d = parameters.d;
|
375
|
+
const U32 dmersInK = k - d + 1;
|
376
|
+
/* Try each segment (activeSegment) and save the best (bestSegment) */
|
377
|
+
COVER_segment_t bestSegment = {0, 0, 0};
|
378
|
+
COVER_segment_t activeSegment;
|
379
|
+
/* Reset the activeDmers in the segment */
|
380
|
+
COVER_map_clear(activeDmers);
|
381
|
+
/* The activeSegment starts at the beginning of the epoch. */
|
382
|
+
activeSegment.begin = begin;
|
383
|
+
activeSegment.end = begin;
|
384
|
+
activeSegment.score = 0;
|
385
|
+
/* Slide the activeSegment through the whole epoch.
|
386
|
+
* Save the best segment in bestSegment.
|
387
|
+
*/
|
388
|
+
while (activeSegment.end < end) {
|
389
|
+
/* The dmerId for the dmer at the next position */
|
390
|
+
U32 newDmer = ctx->dmerAt[activeSegment.end];
|
391
|
+
/* The entry in activeDmers for this dmerId */
|
392
|
+
U32 *newDmerOcc = COVER_map_at(activeDmers, newDmer);
|
393
|
+
/* If the dmer isn't already present in the segment add its score. */
|
394
|
+
if (*newDmerOcc == 0) {
|
395
|
+
/* The paper suggest using the L-0.5 norm, but experiments show that it
|
396
|
+
* doesn't help.
|
397
|
+
*/
|
398
|
+
activeSegment.score += freqs[newDmer];
|
399
|
+
}
|
400
|
+
/* Add the dmer to the segment */
|
401
|
+
activeSegment.end += 1;
|
402
|
+
*newDmerOcc += 1;
|
403
|
+
|
404
|
+
/* If the window is now too large, drop the first position */
|
405
|
+
if (activeSegment.end - activeSegment.begin == dmersInK + 1) {
|
406
|
+
U32 delDmer = ctx->dmerAt[activeSegment.begin];
|
407
|
+
U32 *delDmerOcc = COVER_map_at(activeDmers, delDmer);
|
408
|
+
activeSegment.begin += 1;
|
409
|
+
*delDmerOcc -= 1;
|
410
|
+
/* If this is the last occurence of the dmer, subtract its score */
|
411
|
+
if (*delDmerOcc == 0) {
|
412
|
+
COVER_map_remove(activeDmers, delDmer);
|
413
|
+
activeSegment.score -= freqs[delDmer];
|
414
|
+
}
|
415
|
+
}
|
416
|
+
|
417
|
+
/* If this segment is the best so far save it */
|
418
|
+
if (activeSegment.score > bestSegment.score) {
|
419
|
+
bestSegment = activeSegment;
|
420
|
+
}
|
421
|
+
}
|
422
|
+
{
|
423
|
+
/* Trim off the zero frequency head and tail from the segment. */
|
424
|
+
U32 newBegin = bestSegment.end;
|
425
|
+
U32 newEnd = bestSegment.begin;
|
426
|
+
U32 pos;
|
427
|
+
for (pos = bestSegment.begin; pos != bestSegment.end; ++pos) {
|
428
|
+
U32 freq = freqs[ctx->dmerAt[pos]];
|
429
|
+
if (freq != 0) {
|
430
|
+
newBegin = MIN(newBegin, pos);
|
431
|
+
newEnd = pos + 1;
|
432
|
+
}
|
433
|
+
}
|
434
|
+
bestSegment.begin = newBegin;
|
435
|
+
bestSegment.end = newEnd;
|
436
|
+
}
|
437
|
+
{
|
438
|
+
/* Zero out the frequency of each dmer covered by the chosen segment. */
|
439
|
+
U32 pos;
|
440
|
+
for (pos = bestSegment.begin; pos != bestSegment.end; ++pos) {
|
441
|
+
freqs[ctx->dmerAt[pos]] = 0;
|
442
|
+
}
|
443
|
+
}
|
444
|
+
return bestSegment;
|
445
|
+
}
|
446
|
+
|
447
|
+
/**
|
448
|
+
* Check the validity of the parameters.
|
449
|
+
* Returns non-zero if the parameters are valid and 0 otherwise.
|
450
|
+
*/
|
451
|
+
static int COVER_checkParameters(COVER_params_t parameters) {
|
452
|
+
/* k and d are required parameters */
|
453
|
+
if (parameters.d == 0 || parameters.k == 0) {
|
454
|
+
return 0;
|
455
|
+
}
|
456
|
+
/* d <= k */
|
457
|
+
if (parameters.d > parameters.k) {
|
458
|
+
return 0;
|
459
|
+
}
|
460
|
+
return 1;
|
461
|
+
}
|
462
|
+
|
463
|
+
/**
|
464
|
+
* Clean up a context initialized with `COVER_ctx_init()`.
|
465
|
+
*/
|
466
|
+
static void COVER_ctx_destroy(COVER_ctx_t *ctx) {
|
467
|
+
if (!ctx) {
|
468
|
+
return;
|
469
|
+
}
|
470
|
+
if (ctx->suffix) {
|
471
|
+
free(ctx->suffix);
|
472
|
+
ctx->suffix = NULL;
|
473
|
+
}
|
474
|
+
if (ctx->freqs) {
|
475
|
+
free(ctx->freqs);
|
476
|
+
ctx->freqs = NULL;
|
477
|
+
}
|
478
|
+
if (ctx->dmerAt) {
|
479
|
+
free(ctx->dmerAt);
|
480
|
+
ctx->dmerAt = NULL;
|
481
|
+
}
|
482
|
+
if (ctx->offsets) {
|
483
|
+
free(ctx->offsets);
|
484
|
+
ctx->offsets = NULL;
|
485
|
+
}
|
486
|
+
}
|
487
|
+
|
488
|
+
/**
|
489
|
+
* Prepare a context for dictionary building.
|
490
|
+
* The context is only dependent on the parameter `d` and can used multiple
|
491
|
+
* times.
|
492
|
+
* Returns 1 on success or zero on error.
|
493
|
+
* The context must be destroyed with `COVER_ctx_destroy()`.
|
494
|
+
*/
|
495
|
+
static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
|
496
|
+
const size_t *samplesSizes, unsigned nbSamples,
|
497
|
+
unsigned d) {
|
498
|
+
const BYTE *const samples = (const BYTE *)samplesBuffer;
|
499
|
+
const size_t totalSamplesSize = COVER_sum(samplesSizes, nbSamples);
|
500
|
+
/* Checks */
|
501
|
+
if (totalSamplesSize < d ||
|
502
|
+
totalSamplesSize >= (size_t)COVER_MAX_SAMPLES_SIZE) {
|
503
|
+
DISPLAYLEVEL(1, "Total samples size is too large, maximum size is %u MB\n",
|
504
|
+
(COVER_MAX_SAMPLES_SIZE >> 20));
|
505
|
+
return 0;
|
506
|
+
}
|
507
|
+
/* Zero the context */
|
508
|
+
memset(ctx, 0, sizeof(*ctx));
|
509
|
+
DISPLAYLEVEL(2, "Training on %u samples of total size %u\n", nbSamples,
|
510
|
+
(U32)totalSamplesSize);
|
511
|
+
ctx->samples = samples;
|
512
|
+
ctx->samplesSizes = samplesSizes;
|
513
|
+
ctx->nbSamples = nbSamples;
|
514
|
+
/* Partial suffix array */
|
515
|
+
ctx->suffixSize = totalSamplesSize - d + 1;
|
516
|
+
ctx->suffix = (U32 *)malloc(ctx->suffixSize * sizeof(U32));
|
517
|
+
/* Maps index to the dmerID */
|
518
|
+
ctx->dmerAt = (U32 *)malloc(ctx->suffixSize * sizeof(U32));
|
519
|
+
/* The offsets of each file */
|
520
|
+
ctx->offsets = (size_t *)malloc((nbSamples + 1) * sizeof(size_t));
|
521
|
+
if (!ctx->suffix || !ctx->dmerAt || !ctx->offsets) {
|
522
|
+
DISPLAYLEVEL(1, "Failed to allocate scratch buffers\n");
|
523
|
+
COVER_ctx_destroy(ctx);
|
524
|
+
return 0;
|
525
|
+
}
|
526
|
+
ctx->freqs = NULL;
|
527
|
+
ctx->d = d;
|
528
|
+
|
529
|
+
/* Fill offsets from the samlesSizes */
|
530
|
+
{
|
531
|
+
U32 i;
|
532
|
+
ctx->offsets[0] = 0;
|
533
|
+
for (i = 1; i <= nbSamples; ++i) {
|
534
|
+
ctx->offsets[i] = ctx->offsets[i - 1] + samplesSizes[i - 1];
|
535
|
+
}
|
536
|
+
}
|
537
|
+
DISPLAYLEVEL(2, "Constructing partial suffix array\n");
|
538
|
+
{
|
539
|
+
/* suffix is a partial suffix array.
|
540
|
+
* It only sorts suffixes by their first parameters.d bytes.
|
541
|
+
* The sort is stable, so each dmer group is sorted by position in input.
|
542
|
+
*/
|
543
|
+
U32 i;
|
544
|
+
for (i = 0; i < ctx->suffixSize; ++i) {
|
545
|
+
ctx->suffix[i] = i;
|
546
|
+
}
|
547
|
+
/* qsort doesn't take an opaque pointer, so pass as a global */
|
548
|
+
g_ctx = ctx;
|
549
|
+
qsort(ctx->suffix, ctx->suffixSize, sizeof(U32), &COVER_strict_cmp);
|
550
|
+
}
|
551
|
+
DISPLAYLEVEL(2, "Computing frequencies\n");
|
552
|
+
/* For each dmer group (group of positions with the same first d bytes):
|
553
|
+
* 1. For each position we set dmerAt[position] = dmerID. The dmerID is
|
554
|
+
* (groupBeginPtr - suffix). This allows us to go from position to
|
555
|
+
* dmerID so we can look up values in freq.
|
556
|
+
* 2. We calculate how many samples the dmer occurs in and save it in
|
557
|
+
* freqs[dmerId].
|
558
|
+
*/
|
559
|
+
COVER_groupBy(ctx->suffix, ctx->suffixSize, sizeof(U32), ctx, &COVER_cmp,
|
560
|
+
&COVER_group);
|
561
|
+
ctx->freqs = ctx->suffix;
|
562
|
+
ctx->suffix = NULL;
|
563
|
+
return 1;
|
564
|
+
}
|
565
|
+
|
566
|
+
/**
|
567
|
+
* Given the prepared context build the dictionary.
|
568
|
+
*/
|
569
|
+
static size_t COVER_buildDictionary(const COVER_ctx_t *ctx, U32 *freqs,
|
570
|
+
COVER_map_t *activeDmers, void *dictBuffer,
|
571
|
+
size_t dictBufferCapacity,
|
572
|
+
COVER_params_t parameters) {
|
573
|
+
BYTE *const dict = (BYTE *)dictBuffer;
|
574
|
+
size_t tail = dictBufferCapacity;
|
575
|
+
/* Divide the data up into epochs of equal size.
|
576
|
+
* We will select at least one segment from each epoch.
|
577
|
+
*/
|
578
|
+
const U32 epochs = (U32)(dictBufferCapacity / parameters.k);
|
579
|
+
const U32 epochSize = (U32)(ctx->suffixSize / epochs);
|
580
|
+
size_t epoch;
|
581
|
+
DISPLAYLEVEL(2, "Breaking content into %u epochs of size %u\n", epochs,
|
582
|
+
epochSize);
|
583
|
+
/* Loop through the epochs until there are no more segments or the dictionary
|
584
|
+
* is full.
|
585
|
+
*/
|
586
|
+
for (epoch = 0; tail > 0; epoch = (epoch + 1) % epochs) {
|
587
|
+
const U32 epochBegin = (U32)(epoch * epochSize);
|
588
|
+
const U32 epochEnd = epochBegin + epochSize;
|
589
|
+
size_t segmentSize;
|
590
|
+
/* Select a segment */
|
591
|
+
COVER_segment_t segment = COVER_selectSegment(
|
592
|
+
ctx, freqs, activeDmers, epochBegin, epochEnd, parameters);
|
593
|
+
/* Trim the segment if necessary and if it is empty then we are done */
|
594
|
+
segmentSize = MIN(segment.end - segment.begin + parameters.d - 1, tail);
|
595
|
+
if (segmentSize == 0) {
|
596
|
+
break;
|
597
|
+
}
|
598
|
+
/* We fill the dictionary from the back to allow the best segments to be
|
599
|
+
* referenced with the smallest offsets.
|
600
|
+
*/
|
601
|
+
tail -= segmentSize;
|
602
|
+
memcpy(dict + tail, ctx->samples + segment.begin, segmentSize);
|
603
|
+
DISPLAYUPDATE(
|
604
|
+
2, "\r%u%% ",
|
605
|
+
(U32)(((dictBufferCapacity - tail) * 100) / dictBufferCapacity));
|
606
|
+
}
|
607
|
+
DISPLAYLEVEL(2, "\r%79s\r", "");
|
608
|
+
return tail;
|
609
|
+
}
|
610
|
+
|
611
|
+
/**
|
612
|
+
* Translate from COVER_params_t to ZDICT_params_t required for finalizing the
|
613
|
+
* dictionary.
|
614
|
+
*/
|
615
|
+
static ZDICT_params_t COVER_translateParams(COVER_params_t parameters) {
|
616
|
+
ZDICT_params_t zdictParams;
|
617
|
+
memset(&zdictParams, 0, sizeof(zdictParams));
|
618
|
+
zdictParams.notificationLevel = 1;
|
619
|
+
zdictParams.dictID = parameters.dictID;
|
620
|
+
zdictParams.compressionLevel = parameters.compressionLevel;
|
621
|
+
return zdictParams;
|
622
|
+
}
|
623
|
+
|
624
|
+
/**
|
625
|
+
* Constructs a dictionary using a heuristic based on the following paper:
|
626
|
+
*
|
627
|
+
* Liao, Petri, Moffat, Wirth
|
628
|
+
* Effective Construction of Relative Lempel-Ziv Dictionaries
|
629
|
+
* Published in WWW 2016.
|
630
|
+
*/
|
631
|
+
ZDICTLIB_API size_t COVER_trainFromBuffer(
|
632
|
+
void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer,
|
633
|
+
const size_t *samplesSizes, unsigned nbSamples, COVER_params_t parameters) {
|
634
|
+
BYTE *const dict = (BYTE *)dictBuffer;
|
635
|
+
COVER_ctx_t ctx;
|
636
|
+
COVER_map_t activeDmers;
|
637
|
+
/* Checks */
|
638
|
+
if (!COVER_checkParameters(parameters)) {
|
639
|
+
DISPLAYLEVEL(1, "Cover parameters incorrect\n");
|
640
|
+
return ERROR(GENERIC);
|
641
|
+
}
|
642
|
+
if (nbSamples == 0) {
|
643
|
+
DISPLAYLEVEL(1, "Cover must have at least one input file\n");
|
644
|
+
return ERROR(GENERIC);
|
645
|
+
}
|
646
|
+
if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) {
|
647
|
+
DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n",
|
648
|
+
ZDICT_DICTSIZE_MIN);
|
649
|
+
return ERROR(dstSize_tooSmall);
|
650
|
+
}
|
651
|
+
/* Initialize global data */
|
652
|
+
g_displayLevel = parameters.notificationLevel;
|
653
|
+
/* Initialize context and activeDmers */
|
654
|
+
if (!COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples,
|
655
|
+
parameters.d)) {
|
656
|
+
return ERROR(GENERIC);
|
657
|
+
}
|
658
|
+
if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) {
|
659
|
+
DISPLAYLEVEL(1, "Failed to allocate dmer map: out of memory\n");
|
660
|
+
COVER_ctx_destroy(&ctx);
|
661
|
+
return ERROR(GENERIC);
|
662
|
+
}
|
663
|
+
|
664
|
+
DISPLAYLEVEL(2, "Building dictionary\n");
|
665
|
+
{
|
666
|
+
const size_t tail =
|
667
|
+
COVER_buildDictionary(&ctx, ctx.freqs, &activeDmers, dictBuffer,
|
668
|
+
dictBufferCapacity, parameters);
|
669
|
+
ZDICT_params_t zdictParams = COVER_translateParams(parameters);
|
670
|
+
const size_t dictionarySize = ZDICT_finalizeDictionary(
|
671
|
+
dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail,
|
672
|
+
samplesBuffer, samplesSizes, nbSamples, zdictParams);
|
673
|
+
if (!ZSTD_isError(dictionarySize)) {
|
674
|
+
DISPLAYLEVEL(2, "Constructed dictionary of size %u\n",
|
675
|
+
(U32)dictionarySize);
|
676
|
+
}
|
677
|
+
COVER_ctx_destroy(&ctx);
|
678
|
+
COVER_map_destroy(&activeDmers);
|
679
|
+
return dictionarySize;
|
680
|
+
}
|
681
|
+
}
|
682
|
+
|
683
|
+
/**
|
684
|
+
* COVER_best_t is used for two purposes:
|
685
|
+
* 1. Synchronizing threads.
|
686
|
+
* 2. Saving the best parameters and dictionary.
|
687
|
+
*
|
688
|
+
* All of the methods except COVER_best_init() are thread safe if zstd is
|
689
|
+
* compiled with multithreaded support.
|
690
|
+
*/
|
691
|
+
typedef struct COVER_best_s {
|
692
|
+
pthread_mutex_t mutex;
|
693
|
+
pthread_cond_t cond;
|
694
|
+
size_t liveJobs;
|
695
|
+
void *dict;
|
696
|
+
size_t dictSize;
|
697
|
+
COVER_params_t parameters;
|
698
|
+
size_t compressedSize;
|
699
|
+
} COVER_best_t;
|
700
|
+
|
701
|
+
/**
|
702
|
+
* Initialize the `COVER_best_t`.
|
703
|
+
*/
|
704
|
+
static void COVER_best_init(COVER_best_t *best) {
|
705
|
+
if (!best) {
|
706
|
+
return;
|
707
|
+
}
|
708
|
+
pthread_mutex_init(&best->mutex, NULL);
|
709
|
+
pthread_cond_init(&best->cond, NULL);
|
710
|
+
best->liveJobs = 0;
|
711
|
+
best->dict = NULL;
|
712
|
+
best->dictSize = 0;
|
713
|
+
best->compressedSize = (size_t)-1;
|
714
|
+
memset(&best->parameters, 0, sizeof(best->parameters));
|
715
|
+
}
|
716
|
+
|
717
|
+
/**
|
718
|
+
* Wait until liveJobs == 0.
|
719
|
+
*/
|
720
|
+
static void COVER_best_wait(COVER_best_t *best) {
|
721
|
+
if (!best) {
|
722
|
+
return;
|
723
|
+
}
|
724
|
+
pthread_mutex_lock(&best->mutex);
|
725
|
+
while (best->liveJobs != 0) {
|
726
|
+
pthread_cond_wait(&best->cond, &best->mutex);
|
727
|
+
}
|
728
|
+
pthread_mutex_unlock(&best->mutex);
|
729
|
+
}
|
730
|
+
|
731
|
+
/**
|
732
|
+
* Call COVER_best_wait() and then destroy the COVER_best_t.
|
733
|
+
*/
|
734
|
+
static void COVER_best_destroy(COVER_best_t *best) {
|
735
|
+
if (!best) {
|
736
|
+
return;
|
737
|
+
}
|
738
|
+
COVER_best_wait(best);
|
739
|
+
if (best->dict) {
|
740
|
+
free(best->dict);
|
741
|
+
}
|
742
|
+
pthread_mutex_destroy(&best->mutex);
|
743
|
+
pthread_cond_destroy(&best->cond);
|
744
|
+
}
|
745
|
+
|
746
|
+
/**
|
747
|
+
* Called when a thread is about to be launched.
|
748
|
+
* Increments liveJobs.
|
749
|
+
*/
|
750
|
+
static void COVER_best_start(COVER_best_t *best) {
|
751
|
+
if (!best) {
|
752
|
+
return;
|
753
|
+
}
|
754
|
+
pthread_mutex_lock(&best->mutex);
|
755
|
+
++best->liveJobs;
|
756
|
+
pthread_mutex_unlock(&best->mutex);
|
757
|
+
}
|
758
|
+
|
759
|
+
/**
|
760
|
+
* Called when a thread finishes executing, both on error or success.
|
761
|
+
* Decrements liveJobs and signals any waiting threads if liveJobs == 0.
|
762
|
+
* If this dictionary is the best so far save it and its parameters.
|
763
|
+
*/
|
764
|
+
static void COVER_best_finish(COVER_best_t *best, size_t compressedSize,
|
765
|
+
COVER_params_t parameters, void *dict,
|
766
|
+
size_t dictSize) {
|
767
|
+
if (!best) {
|
768
|
+
return;
|
769
|
+
}
|
770
|
+
{
|
771
|
+
size_t liveJobs;
|
772
|
+
pthread_mutex_lock(&best->mutex);
|
773
|
+
--best->liveJobs;
|
774
|
+
liveJobs = best->liveJobs;
|
775
|
+
/* If the new dictionary is better */
|
776
|
+
if (compressedSize < best->compressedSize) {
|
777
|
+
/* Allocate space if necessary */
|
778
|
+
if (!best->dict || best->dictSize < dictSize) {
|
779
|
+
if (best->dict) {
|
780
|
+
free(best->dict);
|
781
|
+
}
|
782
|
+
best->dict = malloc(dictSize);
|
783
|
+
if (!best->dict) {
|
784
|
+
best->compressedSize = ERROR(GENERIC);
|
785
|
+
best->dictSize = 0;
|
786
|
+
return;
|
787
|
+
}
|
788
|
+
}
|
789
|
+
/* Save the dictionary, parameters, and size */
|
790
|
+
memcpy(best->dict, dict, dictSize);
|
791
|
+
best->dictSize = dictSize;
|
792
|
+
best->parameters = parameters;
|
793
|
+
best->compressedSize = compressedSize;
|
794
|
+
}
|
795
|
+
pthread_mutex_unlock(&best->mutex);
|
796
|
+
if (liveJobs == 0) {
|
797
|
+
pthread_cond_broadcast(&best->cond);
|
798
|
+
}
|
799
|
+
}
|
800
|
+
}
|
801
|
+
|
802
|
+
/**
|
803
|
+
* Parameters for COVER_tryParameters().
|
804
|
+
*/
|
805
|
+
typedef struct COVER_tryParameters_data_s {
|
806
|
+
const COVER_ctx_t *ctx;
|
807
|
+
COVER_best_t *best;
|
808
|
+
size_t dictBufferCapacity;
|
809
|
+
COVER_params_t parameters;
|
810
|
+
} COVER_tryParameters_data_t;
|
811
|
+
|
812
|
+
/**
|
813
|
+
* Tries a set of parameters and upates the COVER_best_t with the results.
|
814
|
+
* This function is thread safe if zstd is compiled with multithreaded support.
|
815
|
+
* It takes its parameters as an *OWNING* opaque pointer to support threading.
|
816
|
+
*/
|
817
|
+
static void COVER_tryParameters(void *opaque) {
|
818
|
+
/* Save parameters as local variables */
|
819
|
+
COVER_tryParameters_data_t *const data = (COVER_tryParameters_data_t *)opaque;
|
820
|
+
const COVER_ctx_t *const ctx = data->ctx;
|
821
|
+
const COVER_params_t parameters = data->parameters;
|
822
|
+
size_t dictBufferCapacity = data->dictBufferCapacity;
|
823
|
+
size_t totalCompressedSize = ERROR(GENERIC);
|
824
|
+
/* Allocate space for hash table, dict, and freqs */
|
825
|
+
COVER_map_t activeDmers;
|
826
|
+
BYTE *const dict = (BYTE * const)malloc(dictBufferCapacity);
|
827
|
+
U32 *freqs = (U32 *)malloc(ctx->suffixSize * sizeof(U32));
|
828
|
+
if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) {
|
829
|
+
DISPLAYLEVEL(1, "Failed to allocate dmer map: out of memory\n");
|
830
|
+
goto _cleanup;
|
831
|
+
}
|
832
|
+
if (!dict || !freqs) {
|
833
|
+
DISPLAYLEVEL(1, "Failed to allocate buffers: out of memory\n");
|
834
|
+
goto _cleanup;
|
835
|
+
}
|
836
|
+
/* Copy the frequencies because we need to modify them */
|
837
|
+
memcpy(freqs, ctx->freqs, ctx->suffixSize * sizeof(U32));
|
838
|
+
/* Build the dictionary */
|
839
|
+
{
|
840
|
+
const size_t tail = COVER_buildDictionary(ctx, freqs, &activeDmers, dict,
|
841
|
+
dictBufferCapacity, parameters);
|
842
|
+
const ZDICT_params_t zdictParams = COVER_translateParams(parameters);
|
843
|
+
dictBufferCapacity = ZDICT_finalizeDictionary(
|
844
|
+
dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail,
|
845
|
+
ctx->samples, ctx->samplesSizes, (unsigned)ctx->nbSamples, zdictParams);
|
846
|
+
if (ZDICT_isError(dictBufferCapacity)) {
|
847
|
+
DISPLAYLEVEL(1, "Failed to finalize dictionary\n");
|
848
|
+
goto _cleanup;
|
849
|
+
}
|
850
|
+
}
|
851
|
+
/* Check total compressed size */
|
852
|
+
{
|
853
|
+
/* Pointers */
|
854
|
+
ZSTD_CCtx *cctx;
|
855
|
+
ZSTD_CDict *cdict;
|
856
|
+
void *dst;
|
857
|
+
/* Local variables */
|
858
|
+
size_t dstCapacity;
|
859
|
+
size_t i;
|
860
|
+
/* Allocate dst with enough space to compress the maximum sized sample */
|
861
|
+
{
|
862
|
+
size_t maxSampleSize = 0;
|
863
|
+
for (i = 0; i < ctx->nbSamples; ++i) {
|
864
|
+
maxSampleSize = MAX(ctx->samplesSizes[i], maxSampleSize);
|
865
|
+
}
|
866
|
+
dstCapacity = ZSTD_compressBound(maxSampleSize);
|
867
|
+
dst = malloc(dstCapacity);
|
868
|
+
}
|
869
|
+
/* Create the cctx and cdict */
|
870
|
+
cctx = ZSTD_createCCtx();
|
871
|
+
cdict =
|
872
|
+
ZSTD_createCDict(dict, dictBufferCapacity, parameters.compressionLevel);
|
873
|
+
if (!dst || !cctx || !cdict) {
|
874
|
+
goto _compressCleanup;
|
875
|
+
}
|
876
|
+
/* Compress each sample and sum their sizes (or error) */
|
877
|
+
totalCompressedSize = 0;
|
878
|
+
for (i = 0; i < ctx->nbSamples; ++i) {
|
879
|
+
const size_t size = ZSTD_compress_usingCDict(
|
880
|
+
cctx, dst, dstCapacity, ctx->samples + ctx->offsets[i],
|
881
|
+
ctx->samplesSizes[i], cdict);
|
882
|
+
if (ZSTD_isError(size)) {
|
883
|
+
totalCompressedSize = ERROR(GENERIC);
|
884
|
+
goto _compressCleanup;
|
885
|
+
}
|
886
|
+
totalCompressedSize += size;
|
887
|
+
}
|
888
|
+
_compressCleanup:
|
889
|
+
ZSTD_freeCCtx(cctx);
|
890
|
+
ZSTD_freeCDict(cdict);
|
891
|
+
if (dst) {
|
892
|
+
free(dst);
|
893
|
+
}
|
894
|
+
}
|
895
|
+
|
896
|
+
_cleanup:
|
897
|
+
COVER_best_finish(data->best, totalCompressedSize, parameters, dict,
|
898
|
+
dictBufferCapacity);
|
899
|
+
free(data);
|
900
|
+
COVER_map_destroy(&activeDmers);
|
901
|
+
if (dict) {
|
902
|
+
free(dict);
|
903
|
+
}
|
904
|
+
if (freqs) {
|
905
|
+
free(freqs);
|
906
|
+
}
|
907
|
+
}
|
908
|
+
|
909
|
+
ZDICTLIB_API size_t COVER_optimizeTrainFromBuffer(void *dictBuffer,
|
910
|
+
size_t dictBufferCapacity,
|
911
|
+
const void *samplesBuffer,
|
912
|
+
const size_t *samplesSizes,
|
913
|
+
unsigned nbSamples,
|
914
|
+
COVER_params_t *parameters) {
|
915
|
+
/* constants */
|
916
|
+
const unsigned nbThreads = parameters->nbThreads;
|
917
|
+
const unsigned kMinD = parameters->d == 0 ? 6 : parameters->d;
|
918
|
+
const unsigned kMaxD = parameters->d == 0 ? 16 : parameters->d;
|
919
|
+
const unsigned kMinK = parameters->k == 0 ? kMaxD : parameters->k;
|
920
|
+
const unsigned kMaxK = parameters->k == 0 ? 2048 : parameters->k;
|
921
|
+
const unsigned kSteps = parameters->steps == 0 ? 32 : parameters->steps;
|
922
|
+
const unsigned kStepSize = MAX((kMaxK - kMinK) / kSteps, 1);
|
923
|
+
const unsigned kIterations =
|
924
|
+
(1 + (kMaxD - kMinD) / 2) * (1 + (kMaxK - kMinK) / kStepSize);
|
925
|
+
/* Local variables */
|
926
|
+
const int displayLevel = parameters->notificationLevel;
|
927
|
+
unsigned iteration = 1;
|
928
|
+
unsigned d;
|
929
|
+
unsigned k;
|
930
|
+
COVER_best_t best;
|
931
|
+
POOL_ctx *pool = NULL;
|
932
|
+
/* Checks */
|
933
|
+
if (kMinK < kMaxD || kMaxK < kMinK) {
|
934
|
+
LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect parameters\n");
|
935
|
+
return ERROR(GENERIC);
|
936
|
+
}
|
937
|
+
if (nbSamples == 0) {
|
938
|
+
DISPLAYLEVEL(1, "Cover must have at least one input file\n");
|
939
|
+
return ERROR(GENERIC);
|
940
|
+
}
|
941
|
+
if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) {
|
942
|
+
DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n",
|
943
|
+
ZDICT_DICTSIZE_MIN);
|
944
|
+
return ERROR(dstSize_tooSmall);
|
945
|
+
}
|
946
|
+
if (nbThreads > 1) {
|
947
|
+
pool = POOL_create(nbThreads, 1);
|
948
|
+
if (!pool) {
|
949
|
+
return ERROR(memory_allocation);
|
950
|
+
}
|
951
|
+
}
|
952
|
+
/* Initialization */
|
953
|
+
COVER_best_init(&best);
|
954
|
+
/* Turn down global display level to clean up display at level 2 and below */
|
955
|
+
g_displayLevel = parameters->notificationLevel - 1;
|
956
|
+
/* Loop through d first because each new value needs a new context */
|
957
|
+
LOCALDISPLAYLEVEL(displayLevel, 2, "Trying %u different sets of parameters\n",
|
958
|
+
kIterations);
|
959
|
+
for (d = kMinD; d <= kMaxD; d += 2) {
|
960
|
+
/* Initialize the context for this value of d */
|
961
|
+
COVER_ctx_t ctx;
|
962
|
+
LOCALDISPLAYLEVEL(displayLevel, 3, "d=%u\n", d);
|
963
|
+
if (!COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, d)) {
|
964
|
+
LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to initialize context\n");
|
965
|
+
COVER_best_destroy(&best);
|
966
|
+
return ERROR(GENERIC);
|
967
|
+
}
|
968
|
+
/* Loop through k reusing the same context */
|
969
|
+
for (k = kMinK; k <= kMaxK; k += kStepSize) {
|
970
|
+
/* Prepare the arguments */
|
971
|
+
COVER_tryParameters_data_t *data = (COVER_tryParameters_data_t *)malloc(
|
972
|
+
sizeof(COVER_tryParameters_data_t));
|
973
|
+
LOCALDISPLAYLEVEL(displayLevel, 3, "k=%u\n", k);
|
974
|
+
if (!data) {
|
975
|
+
LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to allocate parameters\n");
|
976
|
+
COVER_best_destroy(&best);
|
977
|
+
COVER_ctx_destroy(&ctx);
|
978
|
+
return ERROR(GENERIC);
|
979
|
+
}
|
980
|
+
data->ctx = &ctx;
|
981
|
+
data->best = &best;
|
982
|
+
data->dictBufferCapacity = dictBufferCapacity;
|
983
|
+
data->parameters = *parameters;
|
984
|
+
data->parameters.k = k;
|
985
|
+
data->parameters.d = d;
|
986
|
+
data->parameters.steps = kSteps;
|
987
|
+
/* Check the parameters */
|
988
|
+
if (!COVER_checkParameters(data->parameters)) {
|
989
|
+
DISPLAYLEVEL(1, "Cover parameters incorrect\n");
|
990
|
+
continue;
|
991
|
+
}
|
992
|
+
/* Call the function and pass ownership of data to it */
|
993
|
+
COVER_best_start(&best);
|
994
|
+
if (pool) {
|
995
|
+
POOL_add(pool, &COVER_tryParameters, data);
|
996
|
+
} else {
|
997
|
+
COVER_tryParameters(data);
|
998
|
+
}
|
999
|
+
/* Print status */
|
1000
|
+
LOCALDISPLAYUPDATE(displayLevel, 2, "\r%u%% ",
|
1001
|
+
(U32)((iteration * 100) / kIterations));
|
1002
|
+
++iteration;
|
1003
|
+
}
|
1004
|
+
COVER_best_wait(&best);
|
1005
|
+
COVER_ctx_destroy(&ctx);
|
1006
|
+
}
|
1007
|
+
LOCALDISPLAYLEVEL(displayLevel, 2, "\r%79s\r", "");
|
1008
|
+
/* Fill the output buffer and parameters with output of the best parameters */
|
1009
|
+
{
|
1010
|
+
const size_t dictSize = best.dictSize;
|
1011
|
+
if (ZSTD_isError(best.compressedSize)) {
|
1012
|
+
COVER_best_destroy(&best);
|
1013
|
+
return best.compressedSize;
|
1014
|
+
}
|
1015
|
+
*parameters = best.parameters;
|
1016
|
+
memcpy(dictBuffer, best.dict, dictSize);
|
1017
|
+
COVER_best_destroy(&best);
|
1018
|
+
POOL_free(pool);
|
1019
|
+
return dictSize;
|
1020
|
+
}
|
1021
|
+
}
|