zstd-ruby 1.4.0.0 → 1.4.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/ruby.yml +35 -0
- data/README.md +2 -2
- data/ext/zstdruby/libzstd/Makefile +274 -107
- data/ext/zstdruby/libzstd/README.md +75 -16
- data/ext/zstdruby/libzstd/common/bitstream.h +59 -51
- data/ext/zstdruby/libzstd/common/compiler.h +154 -5
- data/ext/zstdruby/libzstd/common/cpu.h +1 -3
- data/ext/zstdruby/libzstd/common/debug.c +11 -31
- data/ext/zstdruby/libzstd/common/debug.h +22 -49
- data/ext/zstdruby/libzstd/common/entropy_common.c +201 -75
- data/ext/zstdruby/libzstd/common/error_private.c +3 -1
- data/ext/zstdruby/libzstd/common/error_private.h +7 -3
- data/ext/zstdruby/libzstd/common/fse.h +50 -42
- data/ext/zstdruby/libzstd/common/fse_decompress.c +134 -50
- data/ext/zstdruby/libzstd/common/huf.h +41 -38
- data/ext/zstdruby/libzstd/common/mem.h +68 -22
- data/ext/zstdruby/libzstd/common/pool.c +30 -20
- data/ext/zstdruby/libzstd/common/pool.h +3 -3
- data/ext/zstdruby/libzstd/common/threading.c +51 -4
- data/ext/zstdruby/libzstd/common/threading.h +36 -4
- data/ext/zstdruby/libzstd/common/xxhash.c +39 -89
- data/ext/zstdruby/libzstd/common/xxhash.h +12 -32
- data/ext/zstdruby/libzstd/common/zstd_common.c +10 -10
- data/ext/zstdruby/libzstd/common/zstd_deps.h +111 -0
- data/ext/zstdruby/libzstd/common/zstd_errors.h +3 -1
- data/ext/zstdruby/libzstd/common/zstd_internal.h +231 -72
- data/ext/zstdruby/libzstd/common/zstd_trace.c +42 -0
- data/ext/zstdruby/libzstd/common/zstd_trace.h +152 -0
- data/ext/zstdruby/libzstd/compress/fse_compress.c +47 -63
- data/ext/zstdruby/libzstd/compress/hist.c +41 -63
- data/ext/zstdruby/libzstd/compress/hist.h +13 -33
- data/ext/zstdruby/libzstd/compress/huf_compress.c +288 -172
- data/ext/zstdruby/libzstd/compress/zstd_compress.c +2504 -1626
- data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +446 -85
- data/ext/zstdruby/libzstd/compress/zstd_compress_literals.c +158 -0
- data/ext/zstdruby/libzstd/compress/zstd_compress_literals.h +29 -0
- data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.c +433 -0
- data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.h +54 -0
- data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.c +849 -0
- data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.h +32 -0
- data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +561 -0
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +82 -60
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +2 -2
- data/ext/zstdruby/libzstd/compress/zstd_fast.c +106 -80
- data/ext/zstdruby/libzstd/compress/zstd_fast.h +2 -2
- data/ext/zstdruby/libzstd/compress/zstd_lazy.c +411 -105
- data/ext/zstdruby/libzstd/compress/zstd_lazy.h +21 -1
- data/ext/zstdruby/libzstd/compress/zstd_ldm.c +296 -207
- data/ext/zstdruby/libzstd/compress/zstd_ldm.h +14 -3
- data/ext/zstdruby/libzstd/compress/zstd_ldm_geartab.h +103 -0
- data/ext/zstdruby/libzstd/compress/zstd_opt.c +260 -148
- data/ext/zstdruby/libzstd/compress/zstd_opt.h +1 -1
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +153 -440
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +29 -110
- data/ext/zstdruby/libzstd/decompress/huf_decompress.c +356 -238
- data/ext/zstdruby/libzstd/decompress/zstd_ddict.c +20 -16
- data/ext/zstdruby/libzstd/decompress/zstd_ddict.h +3 -3
- data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +641 -238
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +600 -371
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +8 -5
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +40 -9
- data/ext/zstdruby/libzstd/deprecated/zbuff.h +9 -8
- data/ext/zstdruby/libzstd/deprecated/zbuff_common.c +2 -2
- data/ext/zstdruby/libzstd/deprecated/zbuff_compress.c +1 -1
- data/ext/zstdruby/libzstd/deprecated/zbuff_decompress.c +1 -1
- data/ext/zstdruby/libzstd/dictBuilder/cover.c +197 -78
- data/ext/zstdruby/libzstd/dictBuilder/cover.h +52 -7
- data/ext/zstdruby/libzstd/dictBuilder/divsufsort.c +1 -1
- data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +84 -66
- data/ext/zstdruby/libzstd/dictBuilder/zdict.c +58 -36
- data/ext/zstdruby/libzstd/dictBuilder/zdict.h +60 -31
- data/ext/zstdruby/libzstd/dll/example/Makefile +2 -1
- data/ext/zstdruby/libzstd/dll/example/README.md +16 -22
- data/ext/zstdruby/libzstd/legacy/zstd_legacy.h +8 -4
- data/ext/zstdruby/libzstd/legacy/zstd_v01.c +115 -111
- data/ext/zstdruby/libzstd/legacy/zstd_v01.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v02.c +28 -14
- data/ext/zstdruby/libzstd/legacy/zstd_v02.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v03.c +28 -14
- data/ext/zstdruby/libzstd/legacy/zstd_v03.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v04.c +36 -19
- data/ext/zstdruby/libzstd/legacy/zstd_v04.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v05.c +122 -107
- data/ext/zstdruby/libzstd/legacy/zstd_v05.h +2 -2
- data/ext/zstdruby/libzstd/legacy/zstd_v06.c +29 -23
- data/ext/zstdruby/libzstd/legacy/zstd_v06.h +1 -1
- data/ext/zstdruby/libzstd/legacy/zstd_v07.c +34 -24
- data/ext/zstdruby/libzstd/legacy/zstd_v07.h +1 -1
- data/ext/zstdruby/libzstd/libzstd.pc.in +2 -1
- data/ext/zstdruby/libzstd/zstd.h +655 -118
- data/lib/zstd-ruby/version.rb +1 -1
- data/zstd-ruby.gemspec +1 -1
- metadata +20 -10
- data/.travis.yml +0 -14
|
@@ -1,11 +1,21 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright (c) 2017-2021, Facebook, Inc.
|
|
3
|
+
* All rights reserved.
|
|
4
|
+
*
|
|
5
|
+
* This source code is licensed under both the BSD-style license (found in the
|
|
6
|
+
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
|
7
|
+
* in the COPYING file in the root directory of this source tree).
|
|
8
|
+
* You may select, at your option, one of the above-listed licenses.
|
|
9
|
+
*/
|
|
10
|
+
|
|
1
11
|
#include <stdio.h> /* fprintf */
|
|
2
12
|
#include <stdlib.h> /* malloc, free, qsort */
|
|
3
13
|
#include <string.h> /* memset */
|
|
4
14
|
#include <time.h> /* clock */
|
|
5
|
-
#include "mem.h" /* read */
|
|
6
|
-
#include "pool.h"
|
|
7
|
-
#include "threading.h"
|
|
8
|
-
#include "zstd_internal.h" /* includes zstd.h */
|
|
15
|
+
#include "../common/mem.h" /* read */
|
|
16
|
+
#include "../common/pool.h"
|
|
17
|
+
#include "../common/threading.h"
|
|
18
|
+
#include "../common/zstd_internal.h" /* includes zstd.h */
|
|
9
19
|
#ifndef ZDICT_STATIC_LINKING_ONLY
|
|
10
20
|
#define ZDICT_STATIC_LINKING_ONLY
|
|
11
21
|
#endif
|
|
@@ -46,6 +56,15 @@ typedef struct {
|
|
|
46
56
|
U32 size;
|
|
47
57
|
} COVER_epoch_info_t;
|
|
48
58
|
|
|
59
|
+
/**
|
|
60
|
+
* Struct used for the dictionary selection function.
|
|
61
|
+
*/
|
|
62
|
+
typedef struct COVER_dictSelection {
|
|
63
|
+
BYTE* dictContent;
|
|
64
|
+
size_t dictSize;
|
|
65
|
+
size_t totalCompressedSize;
|
|
66
|
+
} COVER_dictSelection_t;
|
|
67
|
+
|
|
49
68
|
/**
|
|
50
69
|
* Computes the number of epochs and the size of each epoch.
|
|
51
70
|
* We will make sure that each epoch gets at least 10 * k bytes.
|
|
@@ -107,6 +126,32 @@ void COVER_best_start(COVER_best_t *best);
|
|
|
107
126
|
* Decrements liveJobs and signals any waiting threads if liveJobs == 0.
|
|
108
127
|
* If this dictionary is the best so far save it and its parameters.
|
|
109
128
|
*/
|
|
110
|
-
void COVER_best_finish(COVER_best_t *best,
|
|
111
|
-
|
|
112
|
-
|
|
129
|
+
void COVER_best_finish(COVER_best_t *best, ZDICT_cover_params_t parameters,
|
|
130
|
+
COVER_dictSelection_t selection);
|
|
131
|
+
/**
|
|
132
|
+
* Error function for COVER_selectDict function. Checks if the return
|
|
133
|
+
* value is an error.
|
|
134
|
+
*/
|
|
135
|
+
unsigned COVER_dictSelectionIsError(COVER_dictSelection_t selection);
|
|
136
|
+
|
|
137
|
+
/**
|
|
138
|
+
* Error function for COVER_selectDict function. Returns a struct where
|
|
139
|
+
* return.totalCompressedSize is a ZSTD error.
|
|
140
|
+
*/
|
|
141
|
+
COVER_dictSelection_t COVER_dictSelectionError(size_t error);
|
|
142
|
+
|
|
143
|
+
/**
|
|
144
|
+
* Always call after selectDict is called to free up used memory from
|
|
145
|
+
* newly created dictionary.
|
|
146
|
+
*/
|
|
147
|
+
void COVER_dictSelectionFree(COVER_dictSelection_t selection);
|
|
148
|
+
|
|
149
|
+
/**
|
|
150
|
+
* Called to finalize the dictionary and select one based on whether or not
|
|
151
|
+
* the shrink-dict flag was enabled. If enabled the dictionary used is the
|
|
152
|
+
* smallest dictionary within a specified regression of the compressed size
|
|
153
|
+
* from the largest dictionary.
|
|
154
|
+
*/
|
|
155
|
+
COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent, size_t dictBufferCapacity,
|
|
156
|
+
size_t dictContentSize, const BYTE* samplesBuffer, const size_t* samplesSizes, unsigned nbFinalizeSamples,
|
|
157
|
+
size_t nbCheckSamples, size_t nbSamples, ZDICT_cover_params_t params, size_t* offsets, size_t totalCompressedSize);
|
|
@@ -1576,7 +1576,7 @@ note:
|
|
|
1576
1576
|
/* Construct the inverse suffix array of type B* suffixes using trsort. */
|
|
1577
1577
|
trsort(ISAb, SA, m, 1);
|
|
1578
1578
|
|
|
1579
|
-
/* Set the sorted order of
|
|
1579
|
+
/* Set the sorted order of type B* suffixes. */
|
|
1580
1580
|
for(i = n - 1, j = m, c0 = T[n - 1]; 0 <= i;) {
|
|
1581
1581
|
for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) >= c1); --i, c1 = c0) { }
|
|
1582
1582
|
if(0 <= i) {
|
|
@@ -1,3 +1,13 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright (c) 2018-2021, Facebook, Inc.
|
|
3
|
+
* All rights reserved.
|
|
4
|
+
*
|
|
5
|
+
* This source code is licensed under both the BSD-style license (found in the
|
|
6
|
+
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
|
7
|
+
* in the COPYING file in the root directory of this source tree).
|
|
8
|
+
* You may select, at your option, one of the above-listed licenses.
|
|
9
|
+
*/
|
|
10
|
+
|
|
1
11
|
/*-*************************************
|
|
2
12
|
* Dependencies
|
|
3
13
|
***************************************/
|
|
@@ -6,11 +16,12 @@
|
|
|
6
16
|
#include <string.h> /* memset */
|
|
7
17
|
#include <time.h> /* clock */
|
|
8
18
|
|
|
9
|
-
#include "mem.h" /* read */
|
|
10
|
-
#include "pool.h"
|
|
11
|
-
#include "threading.h"
|
|
19
|
+
#include "../common/mem.h" /* read */
|
|
20
|
+
#include "../common/pool.h"
|
|
21
|
+
#include "../common/threading.h"
|
|
12
22
|
#include "cover.h"
|
|
13
|
-
#include "zstd_internal.h" /* includes zstd.h */
|
|
23
|
+
#include "../common/zstd_internal.h" /* includes zstd.h */
|
|
24
|
+
#include "../compress/zstd_compress_internal.h" /* ZSTD_hash*() */
|
|
14
25
|
#ifndef ZDICT_STATIC_LINKING_ONLY
|
|
15
26
|
#define ZDICT_STATIC_LINKING_ONLY
|
|
16
27
|
#endif
|
|
@@ -23,7 +34,7 @@
|
|
|
23
34
|
#define FASTCOVER_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((unsigned)-1) : ((unsigned)1 GB))
|
|
24
35
|
#define FASTCOVER_MAX_F 31
|
|
25
36
|
#define FASTCOVER_MAX_ACCEL 10
|
|
26
|
-
#define
|
|
37
|
+
#define FASTCOVER_DEFAULT_SPLITPOINT 0.75
|
|
27
38
|
#define DEFAULT_F 20
|
|
28
39
|
#define DEFAULT_ACCEL 1
|
|
29
40
|
|
|
@@ -31,50 +42,50 @@
|
|
|
31
42
|
/*-*************************************
|
|
32
43
|
* Console display
|
|
33
44
|
***************************************/
|
|
45
|
+
#ifndef LOCALDISPLAYLEVEL
|
|
34
46
|
static int g_displayLevel = 2;
|
|
47
|
+
#endif
|
|
48
|
+
#undef DISPLAY
|
|
35
49
|
#define DISPLAY(...) \
|
|
36
50
|
{ \
|
|
37
51
|
fprintf(stderr, __VA_ARGS__); \
|
|
38
52
|
fflush(stderr); \
|
|
39
53
|
}
|
|
54
|
+
#undef LOCALDISPLAYLEVEL
|
|
40
55
|
#define LOCALDISPLAYLEVEL(displayLevel, l, ...) \
|
|
41
56
|
if (displayLevel >= l) { \
|
|
42
57
|
DISPLAY(__VA_ARGS__); \
|
|
43
58
|
} /* 0 : no display; 1: errors; 2: default; 3: details; 4: debug */
|
|
59
|
+
#undef DISPLAYLEVEL
|
|
44
60
|
#define DISPLAYLEVEL(l, ...) LOCALDISPLAYLEVEL(g_displayLevel, l, __VA_ARGS__)
|
|
45
61
|
|
|
62
|
+
#ifndef LOCALDISPLAYUPDATE
|
|
63
|
+
static const clock_t g_refreshRate = CLOCKS_PER_SEC * 15 / 100;
|
|
64
|
+
static clock_t g_time = 0;
|
|
65
|
+
#endif
|
|
66
|
+
#undef LOCALDISPLAYUPDATE
|
|
46
67
|
#define LOCALDISPLAYUPDATE(displayLevel, l, ...) \
|
|
47
68
|
if (displayLevel >= l) { \
|
|
48
|
-
if ((clock() - g_time >
|
|
69
|
+
if ((clock() - g_time > g_refreshRate) || (displayLevel >= 4)) { \
|
|
49
70
|
g_time = clock(); \
|
|
50
71
|
DISPLAY(__VA_ARGS__); \
|
|
51
72
|
} \
|
|
52
73
|
}
|
|
74
|
+
#undef DISPLAYUPDATE
|
|
53
75
|
#define DISPLAYUPDATE(l, ...) LOCALDISPLAYUPDATE(g_displayLevel, l, __VA_ARGS__)
|
|
54
|
-
static const clock_t refreshRate = CLOCKS_PER_SEC * 15 / 100;
|
|
55
|
-
static clock_t g_time = 0;
|
|
56
76
|
|
|
57
77
|
|
|
58
78
|
/*-*************************************
|
|
59
79
|
* Hash Functions
|
|
60
80
|
***************************************/
|
|
61
|
-
static const U64 prime6bytes = 227718039650203ULL;
|
|
62
|
-
static size_t ZSTD_hash6(U64 u, U32 h) { return (size_t)(((u << (64-48)) * prime6bytes) >> (64-h)) ; }
|
|
63
|
-
static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_readLE64(p), h); }
|
|
64
|
-
|
|
65
|
-
static const U64 prime8bytes = 0xCF1BBCDCB7A56463ULL;
|
|
66
|
-
static size_t ZSTD_hash8(U64 u, U32 h) { return (size_t)(((u) * prime8bytes) >> (64-h)) ; }
|
|
67
|
-
static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h); }
|
|
68
|
-
|
|
69
|
-
|
|
70
81
|
/**
|
|
71
|
-
* Hash the d-byte value pointed to by p and mod 2^f
|
|
82
|
+
* Hash the d-byte value pointed to by p and mod 2^f into the frequency vector
|
|
72
83
|
*/
|
|
73
|
-
static size_t FASTCOVER_hashPtrToIndex(const void* p, U32
|
|
84
|
+
static size_t FASTCOVER_hashPtrToIndex(const void* p, U32 f, unsigned d) {
|
|
74
85
|
if (d == 6) {
|
|
75
|
-
return ZSTD_hash6Ptr(p,
|
|
86
|
+
return ZSTD_hash6Ptr(p, f);
|
|
76
87
|
}
|
|
77
|
-
return ZSTD_hash8Ptr(p,
|
|
88
|
+
return ZSTD_hash8Ptr(p, f);
|
|
78
89
|
}
|
|
79
90
|
|
|
80
91
|
|
|
@@ -287,10 +298,10 @@ FASTCOVER_computeFrequency(U32* freqs, const FASTCOVER_ctx_t* ctx)
|
|
|
287
298
|
* Prepare a context for dictionary building.
|
|
288
299
|
* The context is only dependent on the parameter `d` and can used multiple
|
|
289
300
|
* times.
|
|
290
|
-
* Returns
|
|
301
|
+
* Returns 0 on success or error code on error.
|
|
291
302
|
* The context must be destroyed with `FASTCOVER_ctx_destroy()`.
|
|
292
303
|
*/
|
|
293
|
-
static
|
|
304
|
+
static size_t
|
|
294
305
|
FASTCOVER_ctx_init(FASTCOVER_ctx_t* ctx,
|
|
295
306
|
const void* samplesBuffer,
|
|
296
307
|
const size_t* samplesSizes, unsigned nbSamples,
|
|
@@ -310,19 +321,19 @@ FASTCOVER_ctx_init(FASTCOVER_ctx_t* ctx,
|
|
|
310
321
|
totalSamplesSize >= (size_t)FASTCOVER_MAX_SAMPLES_SIZE) {
|
|
311
322
|
DISPLAYLEVEL(1, "Total samples size is too large (%u MB), maximum size is %u MB\n",
|
|
312
323
|
(unsigned)(totalSamplesSize >> 20), (FASTCOVER_MAX_SAMPLES_SIZE >> 20));
|
|
313
|
-
return
|
|
324
|
+
return ERROR(srcSize_wrong);
|
|
314
325
|
}
|
|
315
326
|
|
|
316
327
|
/* Check if there are at least 5 training samples */
|
|
317
328
|
if (nbTrainSamples < 5) {
|
|
318
329
|
DISPLAYLEVEL(1, "Total number of training samples is %u and is invalid\n", nbTrainSamples);
|
|
319
|
-
return
|
|
330
|
+
return ERROR(srcSize_wrong);
|
|
320
331
|
}
|
|
321
332
|
|
|
322
333
|
/* Check if there's testing sample */
|
|
323
334
|
if (nbTestSamples < 1) {
|
|
324
335
|
DISPLAYLEVEL(1, "Total number of testing samples is %u and is invalid.\n", nbTestSamples);
|
|
325
|
-
return
|
|
336
|
+
return ERROR(srcSize_wrong);
|
|
326
337
|
}
|
|
327
338
|
|
|
328
339
|
/* Zero the context */
|
|
@@ -347,7 +358,7 @@ FASTCOVER_ctx_init(FASTCOVER_ctx_t* ctx,
|
|
|
347
358
|
if (ctx->offsets == NULL) {
|
|
348
359
|
DISPLAYLEVEL(1, "Failed to allocate scratch buffers \n");
|
|
349
360
|
FASTCOVER_ctx_destroy(ctx);
|
|
350
|
-
return
|
|
361
|
+
return ERROR(memory_allocation);
|
|
351
362
|
}
|
|
352
363
|
|
|
353
364
|
/* Fill offsets from the samplesSizes */
|
|
@@ -364,13 +375,13 @@ FASTCOVER_ctx_init(FASTCOVER_ctx_t* ctx,
|
|
|
364
375
|
if (ctx->freqs == NULL) {
|
|
365
376
|
DISPLAYLEVEL(1, "Failed to allocate frequency table \n");
|
|
366
377
|
FASTCOVER_ctx_destroy(ctx);
|
|
367
|
-
return
|
|
378
|
+
return ERROR(memory_allocation);
|
|
368
379
|
}
|
|
369
380
|
|
|
370
381
|
DISPLAYLEVEL(2, "Computing frequencies\n");
|
|
371
382
|
FASTCOVER_computeFrequency(ctx->freqs, ctx);
|
|
372
383
|
|
|
373
|
-
return
|
|
384
|
+
return 0;
|
|
374
385
|
}
|
|
375
386
|
|
|
376
387
|
|
|
@@ -435,7 +446,6 @@ FASTCOVER_buildDictionary(const FASTCOVER_ctx_t* ctx,
|
|
|
435
446
|
return tail;
|
|
436
447
|
}
|
|
437
448
|
|
|
438
|
-
|
|
439
449
|
/**
|
|
440
450
|
* Parameters for FASTCOVER_tryParameters().
|
|
441
451
|
*/
|
|
@@ -452,19 +462,20 @@ typedef struct FASTCOVER_tryParameters_data_s {
|
|
|
452
462
|
* This function is thread safe if zstd is compiled with multithreaded support.
|
|
453
463
|
* It takes its parameters as an *OWNING* opaque pointer to support threading.
|
|
454
464
|
*/
|
|
455
|
-
static void FASTCOVER_tryParameters(void
|
|
465
|
+
static void FASTCOVER_tryParameters(void* opaque)
|
|
456
466
|
{
|
|
457
467
|
/* Save parameters as local variables */
|
|
458
|
-
FASTCOVER_tryParameters_data_t *const data = (FASTCOVER_tryParameters_data_t
|
|
468
|
+
FASTCOVER_tryParameters_data_t *const data = (FASTCOVER_tryParameters_data_t*)opaque;
|
|
459
469
|
const FASTCOVER_ctx_t *const ctx = data->ctx;
|
|
460
470
|
const ZDICT_cover_params_t parameters = data->parameters;
|
|
461
471
|
size_t dictBufferCapacity = data->dictBufferCapacity;
|
|
462
472
|
size_t totalCompressedSize = ERROR(GENERIC);
|
|
463
473
|
/* Initialize array to keep track of frequency of dmer within activeSegment */
|
|
464
|
-
U16* segmentFreqs = (U16
|
|
474
|
+
U16* segmentFreqs = (U16*)calloc(((U64)1 << ctx->f), sizeof(U16));
|
|
465
475
|
/* Allocate space for hash table, dict, and freqs */
|
|
466
|
-
BYTE *const dict = (BYTE
|
|
467
|
-
|
|
476
|
+
BYTE *const dict = (BYTE*)malloc(dictBufferCapacity);
|
|
477
|
+
COVER_dictSelection_t selection = COVER_dictSelectionError(ERROR(GENERIC));
|
|
478
|
+
U32* freqs = (U32*) malloc(((U64)1 << ctx->f) * sizeof(U32));
|
|
468
479
|
if (!segmentFreqs || !dict || !freqs) {
|
|
469
480
|
DISPLAYLEVEL(1, "Failed to allocate buffers: out of memory\n");
|
|
470
481
|
goto _cleanup;
|
|
@@ -473,27 +484,24 @@ static void FASTCOVER_tryParameters(void *opaque)
|
|
|
473
484
|
memcpy(freqs, ctx->freqs, ((U64)1 << ctx->f) * sizeof(U32));
|
|
474
485
|
/* Build the dictionary */
|
|
475
486
|
{ const size_t tail = FASTCOVER_buildDictionary(ctx, freqs, dict, dictBufferCapacity,
|
|
476
|
-
|
|
487
|
+
parameters, segmentFreqs);
|
|
488
|
+
|
|
477
489
|
const unsigned nbFinalizeSamples = (unsigned)(ctx->nbTrainSamples * ctx->accelParams.finalize / 100);
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
490
|
+
selection = COVER_selectDict(dict + tail, dictBufferCapacity, dictBufferCapacity - tail,
|
|
491
|
+
ctx->samples, ctx->samplesSizes, nbFinalizeSamples, ctx->nbTrainSamples, ctx->nbSamples, parameters, ctx->offsets,
|
|
492
|
+
totalCompressedSize);
|
|
493
|
+
|
|
494
|
+
if (COVER_dictSelectionIsError(selection)) {
|
|
495
|
+
DISPLAYLEVEL(1, "Failed to select dictionary\n");
|
|
483
496
|
goto _cleanup;
|
|
484
497
|
}
|
|
485
498
|
}
|
|
486
|
-
/* Check total compressed size */
|
|
487
|
-
totalCompressedSize = COVER_checkTotalCompressedSize(parameters, ctx->samplesSizes,
|
|
488
|
-
ctx->samples, ctx->offsets,
|
|
489
|
-
ctx->nbTrainSamples, ctx->nbSamples,
|
|
490
|
-
dict, dictBufferCapacity);
|
|
491
499
|
_cleanup:
|
|
492
|
-
|
|
493
|
-
|
|
500
|
+
free(dict);
|
|
501
|
+
COVER_best_finish(data->best, parameters, selection);
|
|
494
502
|
free(data);
|
|
495
503
|
free(segmentFreqs);
|
|
496
|
-
|
|
504
|
+
COVER_dictSelectionFree(selection);
|
|
497
505
|
free(freqs);
|
|
498
506
|
}
|
|
499
507
|
|
|
@@ -508,6 +516,7 @@ FASTCOVER_convertToCoverParams(ZDICT_fastCover_params_t fastCoverParams,
|
|
|
508
516
|
coverParams->nbThreads = fastCoverParams.nbThreads;
|
|
509
517
|
coverParams->splitPoint = fastCoverParams.splitPoint;
|
|
510
518
|
coverParams->zParams = fastCoverParams.zParams;
|
|
519
|
+
coverParams->shrinkDict = fastCoverParams.shrinkDict;
|
|
511
520
|
}
|
|
512
521
|
|
|
513
522
|
|
|
@@ -524,6 +533,7 @@ FASTCOVER_convertToFastCoverParams(ZDICT_cover_params_t coverParams,
|
|
|
524
533
|
fastCoverParams->f = f;
|
|
525
534
|
fastCoverParams->accel = accel;
|
|
526
535
|
fastCoverParams->zParams = coverParams.zParams;
|
|
536
|
+
fastCoverParams->shrinkDict = coverParams.shrinkDict;
|
|
527
537
|
}
|
|
528
538
|
|
|
529
539
|
|
|
@@ -550,11 +560,11 @@ ZDICT_trainFromBuffer_fastCover(void* dictBuffer, size_t dictBufferCapacity,
|
|
|
550
560
|
if (!FASTCOVER_checkParameters(coverParams, dictBufferCapacity, parameters.f,
|
|
551
561
|
parameters.accel)) {
|
|
552
562
|
DISPLAYLEVEL(1, "FASTCOVER parameters incorrect\n");
|
|
553
|
-
return ERROR(
|
|
563
|
+
return ERROR(parameter_outOfBound);
|
|
554
564
|
}
|
|
555
565
|
if (nbSamples == 0) {
|
|
556
566
|
DISPLAYLEVEL(1, "FASTCOVER must have at least one input file\n");
|
|
557
|
-
return ERROR(
|
|
567
|
+
return ERROR(srcSize_wrong);
|
|
558
568
|
}
|
|
559
569
|
if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) {
|
|
560
570
|
DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n",
|
|
@@ -564,11 +574,14 @@ ZDICT_trainFromBuffer_fastCover(void* dictBuffer, size_t dictBufferCapacity,
|
|
|
564
574
|
/* Assign corresponding FASTCOVER_accel_t to accelParams*/
|
|
565
575
|
accelParams = FASTCOVER_defaultAccelParameters[parameters.accel];
|
|
566
576
|
/* Initialize context */
|
|
567
|
-
|
|
577
|
+
{
|
|
578
|
+
size_t const initVal = FASTCOVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples,
|
|
568
579
|
coverParams.d, parameters.splitPoint, parameters.f,
|
|
569
|
-
accelParams)
|
|
570
|
-
|
|
571
|
-
|
|
580
|
+
accelParams);
|
|
581
|
+
if (ZSTD_isError(initVal)) {
|
|
582
|
+
DISPLAYLEVEL(1, "Failed to initialize context\n");
|
|
583
|
+
return initVal;
|
|
584
|
+
}
|
|
572
585
|
}
|
|
573
586
|
COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.nbDmers, g_displayLevel);
|
|
574
587
|
/* Build the dictionary */
|
|
@@ -605,7 +618,7 @@ ZDICT_optimizeTrainFromBuffer_fastCover(
|
|
|
605
618
|
/* constants */
|
|
606
619
|
const unsigned nbThreads = parameters->nbThreads;
|
|
607
620
|
const double splitPoint =
|
|
608
|
-
parameters->splitPoint <= 0.0 ?
|
|
621
|
+
parameters->splitPoint <= 0.0 ? FASTCOVER_DEFAULT_SPLITPOINT : parameters->splitPoint;
|
|
609
622
|
const unsigned kMinD = parameters->d == 0 ? 6 : parameters->d;
|
|
610
623
|
const unsigned kMaxD = parameters->d == 0 ? 8 : parameters->d;
|
|
611
624
|
const unsigned kMinK = parameters->k == 0 ? 50 : parameters->k;
|
|
@@ -616,6 +629,7 @@ ZDICT_optimizeTrainFromBuffer_fastCover(
|
|
|
616
629
|
(1 + (kMaxD - kMinD) / 2) * (1 + (kMaxK - kMinK) / kStepSize);
|
|
617
630
|
const unsigned f = parameters->f == 0 ? DEFAULT_F : parameters->f;
|
|
618
631
|
const unsigned accel = parameters->accel == 0 ? DEFAULT_ACCEL : parameters->accel;
|
|
632
|
+
const unsigned shrinkDict = 0;
|
|
619
633
|
/* Local variables */
|
|
620
634
|
const int displayLevel = parameters->zParams.notificationLevel;
|
|
621
635
|
unsigned iteration = 1;
|
|
@@ -627,19 +641,19 @@ ZDICT_optimizeTrainFromBuffer_fastCover(
|
|
|
627
641
|
/* Checks */
|
|
628
642
|
if (splitPoint <= 0 || splitPoint > 1) {
|
|
629
643
|
LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect splitPoint\n");
|
|
630
|
-
return ERROR(
|
|
644
|
+
return ERROR(parameter_outOfBound);
|
|
631
645
|
}
|
|
632
646
|
if (accel == 0 || accel > FASTCOVER_MAX_ACCEL) {
|
|
633
647
|
LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect accel\n");
|
|
634
|
-
return ERROR(
|
|
648
|
+
return ERROR(parameter_outOfBound);
|
|
635
649
|
}
|
|
636
650
|
if (kMinK < kMaxD || kMaxK < kMinK) {
|
|
637
651
|
LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect k\n");
|
|
638
|
-
return ERROR(
|
|
652
|
+
return ERROR(parameter_outOfBound);
|
|
639
653
|
}
|
|
640
654
|
if (nbSamples == 0) {
|
|
641
655
|
LOCALDISPLAYLEVEL(displayLevel, 1, "FASTCOVER must have at least one input file\n");
|
|
642
|
-
return ERROR(
|
|
656
|
+
return ERROR(srcSize_wrong);
|
|
643
657
|
}
|
|
644
658
|
if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) {
|
|
645
659
|
LOCALDISPLAYLEVEL(displayLevel, 1, "dictBufferCapacity must be at least %u\n",
|
|
@@ -666,11 +680,14 @@ ZDICT_optimizeTrainFromBuffer_fastCover(
|
|
|
666
680
|
/* Initialize the context for this value of d */
|
|
667
681
|
FASTCOVER_ctx_t ctx;
|
|
668
682
|
LOCALDISPLAYLEVEL(displayLevel, 3, "d=%u\n", d);
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
683
|
+
{
|
|
684
|
+
size_t const initVal = FASTCOVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, d, splitPoint, f, accelParams);
|
|
685
|
+
if (ZSTD_isError(initVal)) {
|
|
686
|
+
LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to initialize context\n");
|
|
687
|
+
COVER_best_destroy(&best);
|
|
688
|
+
POOL_free(pool);
|
|
689
|
+
return initVal;
|
|
690
|
+
}
|
|
674
691
|
}
|
|
675
692
|
if (!warned) {
|
|
676
693
|
COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.nbDmers, displayLevel);
|
|
@@ -687,7 +704,7 @@ ZDICT_optimizeTrainFromBuffer_fastCover(
|
|
|
687
704
|
COVER_best_destroy(&best);
|
|
688
705
|
FASTCOVER_ctx_destroy(&ctx);
|
|
689
706
|
POOL_free(pool);
|
|
690
|
-
return ERROR(
|
|
707
|
+
return ERROR(memory_allocation);
|
|
691
708
|
}
|
|
692
709
|
data->ctx = &ctx;
|
|
693
710
|
data->best = &best;
|
|
@@ -697,6 +714,7 @@ ZDICT_optimizeTrainFromBuffer_fastCover(
|
|
|
697
714
|
data->parameters.d = d;
|
|
698
715
|
data->parameters.splitPoint = splitPoint;
|
|
699
716
|
data->parameters.steps = kSteps;
|
|
717
|
+
data->parameters.shrinkDict = shrinkDict;
|
|
700
718
|
data->parameters.zParams.notificationLevel = g_displayLevel;
|
|
701
719
|
/* Check the parameters */
|
|
702
720
|
if (!FASTCOVER_checkParameters(data->parameters, dictBufferCapacity,
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/*
|
|
2
|
-
* Copyright (c) 2016-
|
|
2
|
+
* Copyright (c) 2016-2021, Yann Collet, Facebook, Inc.
|
|
3
3
|
* All rights reserved.
|
|
4
4
|
*
|
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
|
@@ -23,9 +23,13 @@
|
|
|
23
23
|
/* Unix Large Files support (>4GB) */
|
|
24
24
|
#define _FILE_OFFSET_BITS 64
|
|
25
25
|
#if (defined(__sun__) && (!defined(__LP64__))) /* Sun Solaris 32-bits requires specific definitions */
|
|
26
|
+
# ifndef _LARGEFILE_SOURCE
|
|
26
27
|
# define _LARGEFILE_SOURCE
|
|
28
|
+
# endif
|
|
27
29
|
#elif ! defined(__LP64__) /* No point defining Large file for 64 bit */
|
|
30
|
+
# ifndef _LARGEFILE64_SOURCE
|
|
28
31
|
# define _LARGEFILE64_SOURCE
|
|
32
|
+
# endif
|
|
29
33
|
#endif
|
|
30
34
|
|
|
31
35
|
|
|
@@ -37,17 +41,18 @@
|
|
|
37
41
|
#include <stdio.h> /* fprintf, fopen, ftello64 */
|
|
38
42
|
#include <time.h> /* clock */
|
|
39
43
|
|
|
40
|
-
#include "mem.h" /* read */
|
|
41
|
-
#include "fse.h" /* FSE_normalizeCount, FSE_writeNCount */
|
|
44
|
+
#include "../common/mem.h" /* read */
|
|
45
|
+
#include "../common/fse.h" /* FSE_normalizeCount, FSE_writeNCount */
|
|
42
46
|
#define HUF_STATIC_LINKING_ONLY
|
|
43
|
-
#include "huf.h" /* HUF_buildCTable, HUF_writeCTable */
|
|
44
|
-
#include "zstd_internal.h" /* includes zstd.h */
|
|
45
|
-
#include "xxhash.h" /* XXH64 */
|
|
47
|
+
#include "../common/huf.h" /* HUF_buildCTable, HUF_writeCTable */
|
|
48
|
+
#include "../common/zstd_internal.h" /* includes zstd.h */
|
|
49
|
+
#include "../common/xxhash.h" /* XXH64 */
|
|
46
50
|
#include "divsufsort.h"
|
|
47
51
|
#ifndef ZDICT_STATIC_LINKING_ONLY
|
|
48
52
|
# define ZDICT_STATIC_LINKING_ONLY
|
|
49
53
|
#endif
|
|
50
54
|
#include "zdict.h"
|
|
55
|
+
#include "../compress/zstd_compress_internal.h" /* ZSTD_loadCEntropy() */
|
|
51
56
|
|
|
52
57
|
|
|
53
58
|
/*-*************************************
|
|
@@ -61,14 +66,15 @@
|
|
|
61
66
|
|
|
62
67
|
#define NOISELENGTH 32
|
|
63
68
|
|
|
64
|
-
static const int g_compressionLevel_default = 3;
|
|
65
69
|
static const U32 g_selectivity_default = 9;
|
|
66
70
|
|
|
67
71
|
|
|
68
72
|
/*-*************************************
|
|
69
73
|
* Console display
|
|
70
74
|
***************************************/
|
|
75
|
+
#undef DISPLAY
|
|
71
76
|
#define DISPLAY(...) { fprintf(stderr, __VA_ARGS__); fflush( stderr ); }
|
|
77
|
+
#undef DISPLAYLEVEL
|
|
72
78
|
#define DISPLAYLEVEL(l, ...) if (notificationLevel>=l) { DISPLAY(__VA_ARGS__); } /* 0 : no display; 1: errors; 2: default; 3: details; 4: debug */
|
|
73
79
|
|
|
74
80
|
static clock_t ZDICT_clockSpan(clock_t nPrevious) { return clock() - nPrevious; }
|
|
@@ -99,6 +105,26 @@ unsigned ZDICT_getDictID(const void* dictBuffer, size_t dictSize)
|
|
|
99
105
|
return MEM_readLE32((const char*)dictBuffer + 4);
|
|
100
106
|
}
|
|
101
107
|
|
|
108
|
+
size_t ZDICT_getDictHeaderSize(const void* dictBuffer, size_t dictSize)
|
|
109
|
+
{
|
|
110
|
+
size_t headerSize;
|
|
111
|
+
if (dictSize <= 8 || MEM_readLE32(dictBuffer) != ZSTD_MAGIC_DICTIONARY) return ERROR(dictionary_corrupted);
|
|
112
|
+
|
|
113
|
+
{ ZSTD_compressedBlockState_t* bs = (ZSTD_compressedBlockState_t*)malloc(sizeof(ZSTD_compressedBlockState_t));
|
|
114
|
+
U32* wksp = (U32*)malloc(HUF_WORKSPACE_SIZE);
|
|
115
|
+
if (!bs || !wksp) {
|
|
116
|
+
headerSize = ERROR(memory_allocation);
|
|
117
|
+
} else {
|
|
118
|
+
ZSTD_reset_compressedBlockState(bs);
|
|
119
|
+
headerSize = ZSTD_loadCEntropy(bs, wksp, dictBuffer, dictSize);
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
free(bs);
|
|
123
|
+
free(wksp);
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
return headerSize;
|
|
127
|
+
}
|
|
102
128
|
|
|
103
129
|
/*-********************************************************
|
|
104
130
|
* Dictionary training functions
|
|
@@ -508,6 +534,7 @@ static size_t ZDICT_trainBuffer_legacy(dictItem* dictList, U32 dictListSize,
|
|
|
508
534
|
clock_t displayClock = 0;
|
|
509
535
|
clock_t const refreshRate = CLOCKS_PER_SEC * 3 / 10;
|
|
510
536
|
|
|
537
|
+
# undef DISPLAYUPDATE
|
|
511
538
|
# define DISPLAYUPDATE(l, ...) if (notificationLevel>=l) { \
|
|
512
539
|
if (ZDICT_clockSpan(displayClock) > refreshRate) \
|
|
513
540
|
{ displayClock = clock(); DISPLAY(__VA_ARGS__); \
|
|
@@ -571,7 +598,7 @@ static void ZDICT_fillNoise(void* buffer, size_t length)
|
|
|
571
598
|
unsigned const prime1 = 2654435761U;
|
|
572
599
|
unsigned const prime2 = 2246822519U;
|
|
573
600
|
unsigned acc = prime1;
|
|
574
|
-
size_t p=0
|
|
601
|
+
size_t p=0;
|
|
575
602
|
for (p=0; p<length; p++) {
|
|
576
603
|
acc *= prime2;
|
|
577
604
|
((unsigned char*)buffer)[p] = (unsigned char)(acc >> 21);
|
|
@@ -588,12 +615,12 @@ typedef struct
|
|
|
588
615
|
|
|
589
616
|
#define MAXREPOFFSET 1024
|
|
590
617
|
|
|
591
|
-
static void ZDICT_countEStats(EStats_ress_t esr, ZSTD_parameters params,
|
|
618
|
+
static void ZDICT_countEStats(EStats_ress_t esr, const ZSTD_parameters* params,
|
|
592
619
|
unsigned* countLit, unsigned* offsetcodeCount, unsigned* matchlengthCount, unsigned* litlengthCount, U32* repOffsets,
|
|
593
620
|
const void* src, size_t srcSize,
|
|
594
621
|
U32 notificationLevel)
|
|
595
622
|
{
|
|
596
|
-
size_t const blockSizeMax = MIN (ZSTD_BLOCKSIZE_MAX, 1 << params
|
|
623
|
+
size_t const blockSizeMax = MIN (ZSTD_BLOCKSIZE_MAX, 1 << params->cParams.windowLog);
|
|
597
624
|
size_t cSize;
|
|
598
625
|
|
|
599
626
|
if (srcSize > blockSizeMax) srcSize = blockSizeMax; /* protection vs large samples */
|
|
@@ -682,7 +709,7 @@ static void ZDICT_flatLit(unsigned* countLit)
|
|
|
682
709
|
|
|
683
710
|
#define OFFCODE_MAX 30 /* only applicable to first block */
|
|
684
711
|
static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|
685
|
-
|
|
712
|
+
int compressionLevel,
|
|
686
713
|
const void* srcBuffer, const size_t* fileSizes, unsigned nbFiles,
|
|
687
714
|
const void* dictBuffer, size_t dictBufferSize,
|
|
688
715
|
unsigned notificationLevel)
|
|
@@ -717,7 +744,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|
|
717
744
|
memset(repOffset, 0, sizeof(repOffset));
|
|
718
745
|
repOffset[1] = repOffset[4] = repOffset[8] = 1;
|
|
719
746
|
memset(bestRepOffset, 0, sizeof(bestRepOffset));
|
|
720
|
-
if (compressionLevel==0) compressionLevel =
|
|
747
|
+
if (compressionLevel==0) compressionLevel = ZSTD_CLEVEL_DEFAULT;
|
|
721
748
|
params = ZSTD_getParams(compressionLevel, averageSampleSize, dictBufferSize);
|
|
722
749
|
|
|
723
750
|
esr.dict = ZSTD_createCDict_advanced(dictBuffer, dictBufferSize, ZSTD_dlm_byRef, ZSTD_dct_rawContent, params.cParams, ZSTD_defaultCMem);
|
|
@@ -731,7 +758,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|
|
731
758
|
|
|
732
759
|
/* collect stats on all samples */
|
|
733
760
|
for (u=0; u<nbFiles; u++) {
|
|
734
|
-
ZDICT_countEStats(esr, params,
|
|
761
|
+
ZDICT_countEStats(esr, ¶ms,
|
|
735
762
|
countLit, offcodeCount, matchLengthCount, litLengthCount, repOffset,
|
|
736
763
|
(const char*)srcBuffer + pos, fileSizes[u],
|
|
737
764
|
notificationLevel);
|
|
@@ -741,7 +768,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|
|
741
768
|
/* analyze, build stats, starting with literals */
|
|
742
769
|
{ size_t maxNbBits = HUF_buildCTable (hufTable, countLit, 255, huffLog);
|
|
743
770
|
if (HUF_isError(maxNbBits)) {
|
|
744
|
-
eSize =
|
|
771
|
+
eSize = maxNbBits;
|
|
745
772
|
DISPLAYLEVEL(1, " HUF_buildCTable error \n");
|
|
746
773
|
goto _cleanup;
|
|
747
774
|
}
|
|
@@ -762,27 +789,27 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|
|
762
789
|
/* note : the result of this phase should be used to better appreciate the impact on statistics */
|
|
763
790
|
|
|
764
791
|
total=0; for (u=0; u<=offcodeMax; u++) total+=offcodeCount[u];
|
|
765
|
-
errorCode = FSE_normalizeCount(offcodeNCount, Offlog, offcodeCount, total, offcodeMax);
|
|
792
|
+
errorCode = FSE_normalizeCount(offcodeNCount, Offlog, offcodeCount, total, offcodeMax, /* useLowProbCount */ 1);
|
|
766
793
|
if (FSE_isError(errorCode)) {
|
|
767
|
-
eSize =
|
|
794
|
+
eSize = errorCode;
|
|
768
795
|
DISPLAYLEVEL(1, "FSE_normalizeCount error with offcodeCount \n");
|
|
769
796
|
goto _cleanup;
|
|
770
797
|
}
|
|
771
798
|
Offlog = (U32)errorCode;
|
|
772
799
|
|
|
773
800
|
total=0; for (u=0; u<=MaxML; u++) total+=matchLengthCount[u];
|
|
774
|
-
errorCode = FSE_normalizeCount(matchLengthNCount, mlLog, matchLengthCount, total, MaxML);
|
|
801
|
+
errorCode = FSE_normalizeCount(matchLengthNCount, mlLog, matchLengthCount, total, MaxML, /* useLowProbCount */ 1);
|
|
775
802
|
if (FSE_isError(errorCode)) {
|
|
776
|
-
eSize =
|
|
803
|
+
eSize = errorCode;
|
|
777
804
|
DISPLAYLEVEL(1, "FSE_normalizeCount error with matchLengthCount \n");
|
|
778
805
|
goto _cleanup;
|
|
779
806
|
}
|
|
780
807
|
mlLog = (U32)errorCode;
|
|
781
808
|
|
|
782
809
|
total=0; for (u=0; u<=MaxLL; u++) total+=litLengthCount[u];
|
|
783
|
-
errorCode = FSE_normalizeCount(litLengthNCount, llLog, litLengthCount, total, MaxLL);
|
|
810
|
+
errorCode = FSE_normalizeCount(litLengthNCount, llLog, litLengthCount, total, MaxLL, /* useLowProbCount */ 1);
|
|
784
811
|
if (FSE_isError(errorCode)) {
|
|
785
|
-
eSize =
|
|
812
|
+
eSize = errorCode;
|
|
786
813
|
DISPLAYLEVEL(1, "FSE_normalizeCount error with litLengthCount \n");
|
|
787
814
|
goto _cleanup;
|
|
788
815
|
}
|
|
@@ -791,7 +818,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|
|
791
818
|
/* write result to buffer */
|
|
792
819
|
{ size_t const hhSize = HUF_writeCTable(dstPtr, maxDstSize, hufTable, 255, huffLog);
|
|
793
820
|
if (HUF_isError(hhSize)) {
|
|
794
|
-
eSize =
|
|
821
|
+
eSize = hhSize;
|
|
795
822
|
DISPLAYLEVEL(1, "HUF_writeCTable error \n");
|
|
796
823
|
goto _cleanup;
|
|
797
824
|
}
|
|
@@ -802,7 +829,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|
|
802
829
|
|
|
803
830
|
{ size_t const ohSize = FSE_writeNCount(dstPtr, maxDstSize, offcodeNCount, OFFCODE_MAX, Offlog);
|
|
804
831
|
if (FSE_isError(ohSize)) {
|
|
805
|
-
eSize =
|
|
832
|
+
eSize = ohSize;
|
|
806
833
|
DISPLAYLEVEL(1, "FSE_writeNCount error with offcodeNCount \n");
|
|
807
834
|
goto _cleanup;
|
|
808
835
|
}
|
|
@@ -813,7 +840,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|
|
813
840
|
|
|
814
841
|
{ size_t const mhSize = FSE_writeNCount(dstPtr, maxDstSize, matchLengthNCount, MaxML, mlLog);
|
|
815
842
|
if (FSE_isError(mhSize)) {
|
|
816
|
-
eSize =
|
|
843
|
+
eSize = mhSize;
|
|
817
844
|
DISPLAYLEVEL(1, "FSE_writeNCount error with matchLengthNCount \n");
|
|
818
845
|
goto _cleanup;
|
|
819
846
|
}
|
|
@@ -824,7 +851,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|
|
824
851
|
|
|
825
852
|
{ size_t const lhSize = FSE_writeNCount(dstPtr, maxDstSize, litLengthNCount, MaxLL, llLog);
|
|
826
853
|
if (FSE_isError(lhSize)) {
|
|
827
|
-
eSize =
|
|
854
|
+
eSize = lhSize;
|
|
828
855
|
DISPLAYLEVEL(1, "FSE_writeNCount error with litlengthNCount \n");
|
|
829
856
|
goto _cleanup;
|
|
830
857
|
}
|
|
@@ -834,7 +861,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|
|
834
861
|
}
|
|
835
862
|
|
|
836
863
|
if (maxDstSize<12) {
|
|
837
|
-
eSize = ERROR(
|
|
864
|
+
eSize = ERROR(dstSize_tooSmall);
|
|
838
865
|
DISPLAYLEVEL(1, "not enough space to write RepOffsets \n");
|
|
839
866
|
goto _cleanup;
|
|
840
867
|
}
|
|
@@ -869,7 +896,7 @@ size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity,
|
|
|
869
896
|
size_t hSize;
|
|
870
897
|
#define HBUFFSIZE 256 /* should prove large enough for all entropy headers */
|
|
871
898
|
BYTE header[HBUFFSIZE];
|
|
872
|
-
int const compressionLevel = (params.compressionLevel == 0) ?
|
|
899
|
+
int const compressionLevel = (params.compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT : params.compressionLevel;
|
|
873
900
|
U32 const notificationLevel = params.notificationLevel;
|
|
874
901
|
|
|
875
902
|
/* check conditions */
|
|
@@ -915,7 +942,7 @@ static size_t ZDICT_addEntropyTablesFromBuffer_advanced(
|
|
|
915
942
|
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
|
|
916
943
|
ZDICT_params_t params)
|
|
917
944
|
{
|
|
918
|
-
int const compressionLevel = (params.compressionLevel == 0) ?
|
|
945
|
+
int const compressionLevel = (params.compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT : params.compressionLevel;
|
|
919
946
|
U32 const notificationLevel = params.notificationLevel;
|
|
920
947
|
size_t hSize = 8;
|
|
921
948
|
|
|
@@ -944,16 +971,11 @@ static size_t ZDICT_addEntropyTablesFromBuffer_advanced(
|
|
|
944
971
|
return MIN(dictBufferCapacity, hSize+dictContentSize);
|
|
945
972
|
}
|
|
946
973
|
|
|
947
|
-
/* Hidden declaration for dbio.c */
|
|
948
|
-
size_t ZDICT_trainFromBuffer_unsafe_legacy(
|
|
949
|
-
void* dictBuffer, size_t maxDictSize,
|
|
950
|
-
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
|
|
951
|
-
ZDICT_legacy_params_t params);
|
|
952
974
|
/*! ZDICT_trainFromBuffer_unsafe_legacy() :
|
|
953
|
-
* Warning : `samplesBuffer` must be followed by noisy guard band
|
|
975
|
+
* Warning : `samplesBuffer` must be followed by noisy guard band !!!
|
|
954
976
|
* @return : size of dictionary, or an error code which can be tested with ZDICT_isError()
|
|
955
977
|
*/
|
|
956
|
-
size_t ZDICT_trainFromBuffer_unsafe_legacy(
|
|
978
|
+
static size_t ZDICT_trainFromBuffer_unsafe_legacy(
|
|
957
979
|
void* dictBuffer, size_t maxDictSize,
|
|
958
980
|
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
|
|
959
981
|
ZDICT_legacy_params_t params)
|
|
@@ -1090,8 +1112,8 @@ size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity,
|
|
|
1090
1112
|
memset(¶ms, 0, sizeof(params));
|
|
1091
1113
|
params.d = 8;
|
|
1092
1114
|
params.steps = 4;
|
|
1093
|
-
/*
|
|
1094
|
-
params.zParams.compressionLevel =
|
|
1115
|
+
/* Use default level since no compression level information is available */
|
|
1116
|
+
params.zParams.compressionLevel = ZSTD_CLEVEL_DEFAULT;
|
|
1095
1117
|
#if defined(DEBUGLEVEL) && (DEBUGLEVEL>=1)
|
|
1096
1118
|
params.zParams.notificationLevel = DEBUGLEVEL;
|
|
1097
1119
|
#endif
|