extzstd 0.3 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/HISTORY.ja.md +8 -0
- data/README.md +1 -1
- data/contrib/zstd/CHANGELOG +94 -0
- data/contrib/zstd/CONTRIBUTING.md +351 -1
- data/contrib/zstd/Makefile +32 -10
- data/contrib/zstd/README.md +33 -10
- data/contrib/zstd/TESTING.md +2 -2
- data/contrib/zstd/appveyor.yml +42 -4
- data/contrib/zstd/lib/Makefile +128 -60
- data/contrib/zstd/lib/README.md +47 -16
- data/contrib/zstd/lib/common/bitstream.h +38 -39
- data/contrib/zstd/lib/common/compiler.h +40 -5
- data/contrib/zstd/lib/common/cpu.h +1 -1
- data/contrib/zstd/lib/common/debug.c +11 -31
- data/contrib/zstd/lib/common/debug.h +11 -31
- data/contrib/zstd/lib/common/entropy_common.c +13 -33
- data/contrib/zstd/lib/common/error_private.c +2 -1
- data/contrib/zstd/lib/common/error_private.h +6 -2
- data/contrib/zstd/lib/common/fse.h +12 -32
- data/contrib/zstd/lib/common/fse_decompress.c +12 -35
- data/contrib/zstd/lib/common/huf.h +15 -33
- data/contrib/zstd/lib/common/mem.h +75 -2
- data/contrib/zstd/lib/common/pool.c +8 -4
- data/contrib/zstd/lib/common/pool.h +2 -2
- data/contrib/zstd/lib/common/threading.c +50 -4
- data/contrib/zstd/lib/common/threading.h +36 -4
- data/contrib/zstd/lib/common/xxhash.c +23 -35
- data/contrib/zstd/lib/common/xxhash.h +11 -31
- data/contrib/zstd/lib/common/zstd_common.c +1 -1
- data/contrib/zstd/lib/common/zstd_errors.h +2 -1
- data/contrib/zstd/lib/common/zstd_internal.h +154 -26
- data/contrib/zstd/lib/compress/fse_compress.c +17 -40
- data/contrib/zstd/lib/compress/hist.c +15 -35
- data/contrib/zstd/lib/compress/hist.h +12 -32
- data/contrib/zstd/lib/compress/huf_compress.c +92 -92
- data/contrib/zstd/lib/compress/zstd_compress.c +1191 -1330
- data/contrib/zstd/lib/compress/zstd_compress_internal.h +317 -55
- data/contrib/zstd/lib/compress/zstd_compress_literals.c +158 -0
- data/contrib/zstd/lib/compress/zstd_compress_literals.h +29 -0
- data/contrib/zstd/lib/compress/zstd_compress_sequences.c +419 -0
- data/contrib/zstd/lib/compress/zstd_compress_sequences.h +54 -0
- data/contrib/zstd/lib/compress/zstd_compress_superblock.c +845 -0
- data/contrib/zstd/lib/compress/zstd_compress_superblock.h +32 -0
- data/contrib/zstd/lib/compress/zstd_cwksp.h +525 -0
- data/contrib/zstd/lib/compress/zstd_double_fast.c +65 -43
- data/contrib/zstd/lib/compress/zstd_double_fast.h +2 -2
- data/contrib/zstd/lib/compress/zstd_fast.c +92 -66
- data/contrib/zstd/lib/compress/zstd_fast.h +2 -2
- data/contrib/zstd/lib/compress/zstd_lazy.c +74 -42
- data/contrib/zstd/lib/compress/zstd_lazy.h +1 -1
- data/contrib/zstd/lib/compress/zstd_ldm.c +32 -10
- data/contrib/zstd/lib/compress/zstd_ldm.h +7 -2
- data/contrib/zstd/lib/compress/zstd_opt.c +81 -114
- data/contrib/zstd/lib/compress/zstd_opt.h +1 -1
- data/contrib/zstd/lib/compress/zstdmt_compress.c +95 -51
- data/contrib/zstd/lib/compress/zstdmt_compress.h +3 -2
- data/contrib/zstd/lib/decompress/huf_decompress.c +76 -60
- data/contrib/zstd/lib/decompress/zstd_ddict.c +12 -8
- data/contrib/zstd/lib/decompress/zstd_ddict.h +2 -2
- data/contrib/zstd/lib/decompress/zstd_decompress.c +292 -172
- data/contrib/zstd/lib/decompress/zstd_decompress_block.c +459 -338
- data/contrib/zstd/lib/decompress/zstd_decompress_block.h +3 -3
- data/contrib/zstd/lib/decompress/zstd_decompress_internal.h +18 -4
- data/contrib/zstd/lib/deprecated/zbuff.h +9 -8
- data/contrib/zstd/lib/deprecated/zbuff_common.c +2 -2
- data/contrib/zstd/lib/deprecated/zbuff_compress.c +1 -1
- data/contrib/zstd/lib/deprecated/zbuff_decompress.c +1 -1
- data/contrib/zstd/lib/dictBuilder/cover.c +164 -54
- data/contrib/zstd/lib/dictBuilder/cover.h +52 -7
- data/contrib/zstd/lib/dictBuilder/fastcover.c +60 -43
- data/contrib/zstd/lib/dictBuilder/zdict.c +43 -19
- data/contrib/zstd/lib/dictBuilder/zdict.h +56 -28
- data/contrib/zstd/lib/legacy/zstd_legacy.h +8 -4
- data/contrib/zstd/lib/legacy/zstd_v01.c +110 -110
- data/contrib/zstd/lib/legacy/zstd_v01.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v02.c +23 -13
- data/contrib/zstd/lib/legacy/zstd_v02.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v03.c +23 -13
- data/contrib/zstd/lib/legacy/zstd_v03.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v04.c +30 -17
- data/contrib/zstd/lib/legacy/zstd_v04.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v05.c +113 -102
- data/contrib/zstd/lib/legacy/zstd_v05.h +2 -2
- data/contrib/zstd/lib/legacy/zstd_v06.c +20 -18
- data/contrib/zstd/lib/legacy/zstd_v06.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v07.c +25 -19
- data/contrib/zstd/lib/legacy/zstd_v07.h +1 -1
- data/contrib/zstd/lib/libzstd.pc.in +3 -2
- data/contrib/zstd/lib/zstd.h +265 -88
- data/ext/extzstd.h +1 -1
- data/ext/libzstd_conf.h +8 -0
- data/ext/zstd_common.c +1 -3
- data/ext/zstd_compress.c +3 -3
- data/ext/zstd_decompress.c +1 -5
- data/ext/zstd_dictbuilder.c +2 -3
- data/ext/zstd_dictbuilder_fastcover.c +1 -3
- data/ext/zstd_legacy_v01.c +2 -0
- data/ext/zstd_legacy_v02.c +2 -0
- data/ext/zstd_legacy_v03.c +2 -0
- data/ext/zstd_legacy_v04.c +2 -0
- data/ext/zstd_legacy_v05.c +2 -0
- data/ext/zstd_legacy_v06.c +2 -0
- data/ext/zstd_legacy_v07.c +2 -0
- data/lib/extzstd.rb +18 -10
- data/lib/extzstd/version.rb +1 -1
- metadata +15 -6
|
@@ -1,11 +1,21 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright (c) 2017-2020, Facebook, Inc.
|
|
3
|
+
* All rights reserved.
|
|
4
|
+
*
|
|
5
|
+
* This source code is licensed under both the BSD-style license (found in the
|
|
6
|
+
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
|
7
|
+
* in the COPYING file in the root directory of this source tree).
|
|
8
|
+
* You may select, at your option, one of the above-listed licenses.
|
|
9
|
+
*/
|
|
10
|
+
|
|
1
11
|
#include <stdio.h> /* fprintf */
|
|
2
12
|
#include <stdlib.h> /* malloc, free, qsort */
|
|
3
13
|
#include <string.h> /* memset */
|
|
4
14
|
#include <time.h> /* clock */
|
|
5
|
-
#include "mem.h" /* read */
|
|
6
|
-
#include "pool.h"
|
|
7
|
-
#include "threading.h"
|
|
8
|
-
#include "zstd_internal.h" /* includes zstd.h */
|
|
15
|
+
#include "../common/mem.h" /* read */
|
|
16
|
+
#include "../common/pool.h"
|
|
17
|
+
#include "../common/threading.h"
|
|
18
|
+
#include "../common/zstd_internal.h" /* includes zstd.h */
|
|
9
19
|
#ifndef ZDICT_STATIC_LINKING_ONLY
|
|
10
20
|
#define ZDICT_STATIC_LINKING_ONLY
|
|
11
21
|
#endif
|
|
@@ -46,6 +56,15 @@ typedef struct {
|
|
|
46
56
|
U32 size;
|
|
47
57
|
} COVER_epoch_info_t;
|
|
48
58
|
|
|
59
|
+
/**
|
|
60
|
+
* Struct used for the dictionary selection function.
|
|
61
|
+
*/
|
|
62
|
+
typedef struct COVER_dictSelection {
|
|
63
|
+
BYTE* dictContent;
|
|
64
|
+
size_t dictSize;
|
|
65
|
+
size_t totalCompressedSize;
|
|
66
|
+
} COVER_dictSelection_t;
|
|
67
|
+
|
|
49
68
|
/**
|
|
50
69
|
* Computes the number of epochs and the size of each epoch.
|
|
51
70
|
* We will make sure that each epoch gets at least 10 * k bytes.
|
|
@@ -107,6 +126,32 @@ void COVER_best_start(COVER_best_t *best);
|
|
|
107
126
|
* Decrements liveJobs and signals any waiting threads if liveJobs == 0.
|
|
108
127
|
* If this dictionary is the best so far save it and its parameters.
|
|
109
128
|
*/
|
|
110
|
-
void COVER_best_finish(COVER_best_t *best,
|
|
111
|
-
|
|
112
|
-
|
|
129
|
+
void COVER_best_finish(COVER_best_t *best, ZDICT_cover_params_t parameters,
|
|
130
|
+
COVER_dictSelection_t selection);
|
|
131
|
+
/**
|
|
132
|
+
* Error function for COVER_selectDict function. Checks if the return
|
|
133
|
+
* value is an error.
|
|
134
|
+
*/
|
|
135
|
+
unsigned COVER_dictSelectionIsError(COVER_dictSelection_t selection);
|
|
136
|
+
|
|
137
|
+
/**
|
|
138
|
+
* Error function for COVER_selectDict function. Returns a struct where
|
|
139
|
+
* return.totalCompressedSize is a ZSTD error.
|
|
140
|
+
*/
|
|
141
|
+
COVER_dictSelection_t COVER_dictSelectionError(size_t error);
|
|
142
|
+
|
|
143
|
+
/**
|
|
144
|
+
* Always call after selectDict is called to free up used memory from
|
|
145
|
+
* newly created dictionary.
|
|
146
|
+
*/
|
|
147
|
+
void COVER_dictSelectionFree(COVER_dictSelection_t selection);
|
|
148
|
+
|
|
149
|
+
/**
|
|
150
|
+
* Called to finalize the dictionary and select one based on whether or not
|
|
151
|
+
* the shrink-dict flag was enabled. If enabled the dictionary used is the
|
|
152
|
+
* smallest dictionary within a specified regression of the compressed size
|
|
153
|
+
* from the largest dictionary.
|
|
154
|
+
*/
|
|
155
|
+
COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent,
|
|
156
|
+
size_t dictContentSize, const BYTE* samplesBuffer, const size_t* samplesSizes, unsigned nbFinalizeSamples,
|
|
157
|
+
size_t nbCheckSamples, size_t nbSamples, ZDICT_cover_params_t params, size_t* offsets, size_t totalCompressedSize);
|
|
@@ -1,3 +1,13 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright (c) 2018-2020, Facebook, Inc.
|
|
3
|
+
* All rights reserved.
|
|
4
|
+
*
|
|
5
|
+
* This source code is licensed under both the BSD-style license (found in the
|
|
6
|
+
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
|
7
|
+
* in the COPYING file in the root directory of this source tree).
|
|
8
|
+
* You may select, at your option, one of the above-listed licenses.
|
|
9
|
+
*/
|
|
10
|
+
|
|
1
11
|
/*-*************************************
|
|
2
12
|
* Dependencies
|
|
3
13
|
***************************************/
|
|
@@ -6,11 +16,11 @@
|
|
|
6
16
|
#include <string.h> /* memset */
|
|
7
17
|
#include <time.h> /* clock */
|
|
8
18
|
|
|
9
|
-
#include "mem.h" /* read */
|
|
10
|
-
#include "pool.h"
|
|
11
|
-
#include "threading.h"
|
|
19
|
+
#include "../common/mem.h" /* read */
|
|
20
|
+
#include "../common/pool.h"
|
|
21
|
+
#include "../common/threading.h"
|
|
12
22
|
#include "cover.h"
|
|
13
|
-
#include "zstd_internal.h" /* includes zstd.h */
|
|
23
|
+
#include "../common/zstd_internal.h" /* includes zstd.h */
|
|
14
24
|
#ifndef ZDICT_STATIC_LINKING_ONLY
|
|
15
25
|
#define ZDICT_STATIC_LINKING_ONLY
|
|
16
26
|
#endif
|
|
@@ -287,10 +297,10 @@ FASTCOVER_computeFrequency(U32* freqs, const FASTCOVER_ctx_t* ctx)
|
|
|
287
297
|
* Prepare a context for dictionary building.
|
|
288
298
|
* The context is only dependent on the parameter `d` and can used multiple
|
|
289
299
|
* times.
|
|
290
|
-
* Returns
|
|
300
|
+
* Returns 0 on success or error code on error.
|
|
291
301
|
* The context must be destroyed with `FASTCOVER_ctx_destroy()`.
|
|
292
302
|
*/
|
|
293
|
-
static
|
|
303
|
+
static size_t
|
|
294
304
|
FASTCOVER_ctx_init(FASTCOVER_ctx_t* ctx,
|
|
295
305
|
const void* samplesBuffer,
|
|
296
306
|
const size_t* samplesSizes, unsigned nbSamples,
|
|
@@ -310,19 +320,19 @@ FASTCOVER_ctx_init(FASTCOVER_ctx_t* ctx,
|
|
|
310
320
|
totalSamplesSize >= (size_t)FASTCOVER_MAX_SAMPLES_SIZE) {
|
|
311
321
|
DISPLAYLEVEL(1, "Total samples size is too large (%u MB), maximum size is %u MB\n",
|
|
312
322
|
(unsigned)(totalSamplesSize >> 20), (FASTCOVER_MAX_SAMPLES_SIZE >> 20));
|
|
313
|
-
return
|
|
323
|
+
return ERROR(srcSize_wrong);
|
|
314
324
|
}
|
|
315
325
|
|
|
316
326
|
/* Check if there are at least 5 training samples */
|
|
317
327
|
if (nbTrainSamples < 5) {
|
|
318
328
|
DISPLAYLEVEL(1, "Total number of training samples is %u and is invalid\n", nbTrainSamples);
|
|
319
|
-
return
|
|
329
|
+
return ERROR(srcSize_wrong);
|
|
320
330
|
}
|
|
321
331
|
|
|
322
332
|
/* Check if there's testing sample */
|
|
323
333
|
if (nbTestSamples < 1) {
|
|
324
334
|
DISPLAYLEVEL(1, "Total number of testing samples is %u and is invalid.\n", nbTestSamples);
|
|
325
|
-
return
|
|
335
|
+
return ERROR(srcSize_wrong);
|
|
326
336
|
}
|
|
327
337
|
|
|
328
338
|
/* Zero the context */
|
|
@@ -347,7 +357,7 @@ FASTCOVER_ctx_init(FASTCOVER_ctx_t* ctx,
|
|
|
347
357
|
if (ctx->offsets == NULL) {
|
|
348
358
|
DISPLAYLEVEL(1, "Failed to allocate scratch buffers \n");
|
|
349
359
|
FASTCOVER_ctx_destroy(ctx);
|
|
350
|
-
return
|
|
360
|
+
return ERROR(memory_allocation);
|
|
351
361
|
}
|
|
352
362
|
|
|
353
363
|
/* Fill offsets from the samplesSizes */
|
|
@@ -364,13 +374,13 @@ FASTCOVER_ctx_init(FASTCOVER_ctx_t* ctx,
|
|
|
364
374
|
if (ctx->freqs == NULL) {
|
|
365
375
|
DISPLAYLEVEL(1, "Failed to allocate frequency table \n");
|
|
366
376
|
FASTCOVER_ctx_destroy(ctx);
|
|
367
|
-
return
|
|
377
|
+
return ERROR(memory_allocation);
|
|
368
378
|
}
|
|
369
379
|
|
|
370
380
|
DISPLAYLEVEL(2, "Computing frequencies\n");
|
|
371
381
|
FASTCOVER_computeFrequency(ctx->freqs, ctx);
|
|
372
382
|
|
|
373
|
-
return
|
|
383
|
+
return 0;
|
|
374
384
|
}
|
|
375
385
|
|
|
376
386
|
|
|
@@ -435,7 +445,6 @@ FASTCOVER_buildDictionary(const FASTCOVER_ctx_t* ctx,
|
|
|
435
445
|
return tail;
|
|
436
446
|
}
|
|
437
447
|
|
|
438
|
-
|
|
439
448
|
/**
|
|
440
449
|
* Parameters for FASTCOVER_tryParameters().
|
|
441
450
|
*/
|
|
@@ -464,6 +473,7 @@ static void FASTCOVER_tryParameters(void *opaque)
|
|
|
464
473
|
U16* segmentFreqs = (U16 *)calloc(((U64)1 << ctx->f), sizeof(U16));
|
|
465
474
|
/* Allocate space for hash table, dict, and freqs */
|
|
466
475
|
BYTE *const dict = (BYTE * const)malloc(dictBufferCapacity);
|
|
476
|
+
COVER_dictSelection_t selection = COVER_dictSelectionError(ERROR(GENERIC));
|
|
467
477
|
U32 *freqs = (U32*) malloc(((U64)1 << ctx->f) * sizeof(U32));
|
|
468
478
|
if (!segmentFreqs || !dict || !freqs) {
|
|
469
479
|
DISPLAYLEVEL(1, "Failed to allocate buffers: out of memory\n");
|
|
@@ -473,27 +483,24 @@ static void FASTCOVER_tryParameters(void *opaque)
|
|
|
473
483
|
memcpy(freqs, ctx->freqs, ((U64)1 << ctx->f) * sizeof(U32));
|
|
474
484
|
/* Build the dictionary */
|
|
475
485
|
{ const size_t tail = FASTCOVER_buildDictionary(ctx, freqs, dict, dictBufferCapacity,
|
|
476
|
-
|
|
486
|
+
parameters, segmentFreqs);
|
|
487
|
+
|
|
477
488
|
const unsigned nbFinalizeSamples = (unsigned)(ctx->nbTrainSamples * ctx->accelParams.finalize / 100);
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
489
|
+
selection = COVER_selectDict(dict + tail, dictBufferCapacity - tail,
|
|
490
|
+
ctx->samples, ctx->samplesSizes, nbFinalizeSamples, ctx->nbTrainSamples, ctx->nbSamples, parameters, ctx->offsets,
|
|
491
|
+
totalCompressedSize);
|
|
492
|
+
|
|
493
|
+
if (COVER_dictSelectionIsError(selection)) {
|
|
494
|
+
DISPLAYLEVEL(1, "Failed to select dictionary\n");
|
|
483
495
|
goto _cleanup;
|
|
484
496
|
}
|
|
485
497
|
}
|
|
486
|
-
/* Check total compressed size */
|
|
487
|
-
totalCompressedSize = COVER_checkTotalCompressedSize(parameters, ctx->samplesSizes,
|
|
488
|
-
ctx->samples, ctx->offsets,
|
|
489
|
-
ctx->nbTrainSamples, ctx->nbSamples,
|
|
490
|
-
dict, dictBufferCapacity);
|
|
491
498
|
_cleanup:
|
|
492
|
-
|
|
493
|
-
|
|
499
|
+
free(dict);
|
|
500
|
+
COVER_best_finish(data->best, parameters, selection);
|
|
494
501
|
free(data);
|
|
495
502
|
free(segmentFreqs);
|
|
496
|
-
|
|
503
|
+
COVER_dictSelectionFree(selection);
|
|
497
504
|
free(freqs);
|
|
498
505
|
}
|
|
499
506
|
|
|
@@ -508,6 +515,7 @@ FASTCOVER_convertToCoverParams(ZDICT_fastCover_params_t fastCoverParams,
|
|
|
508
515
|
coverParams->nbThreads = fastCoverParams.nbThreads;
|
|
509
516
|
coverParams->splitPoint = fastCoverParams.splitPoint;
|
|
510
517
|
coverParams->zParams = fastCoverParams.zParams;
|
|
518
|
+
coverParams->shrinkDict = fastCoverParams.shrinkDict;
|
|
511
519
|
}
|
|
512
520
|
|
|
513
521
|
|
|
@@ -524,6 +532,7 @@ FASTCOVER_convertToFastCoverParams(ZDICT_cover_params_t coverParams,
|
|
|
524
532
|
fastCoverParams->f = f;
|
|
525
533
|
fastCoverParams->accel = accel;
|
|
526
534
|
fastCoverParams->zParams = coverParams.zParams;
|
|
535
|
+
fastCoverParams->shrinkDict = coverParams.shrinkDict;
|
|
527
536
|
}
|
|
528
537
|
|
|
529
538
|
|
|
@@ -550,11 +559,11 @@ ZDICT_trainFromBuffer_fastCover(void* dictBuffer, size_t dictBufferCapacity,
|
|
|
550
559
|
if (!FASTCOVER_checkParameters(coverParams, dictBufferCapacity, parameters.f,
|
|
551
560
|
parameters.accel)) {
|
|
552
561
|
DISPLAYLEVEL(1, "FASTCOVER parameters incorrect\n");
|
|
553
|
-
return ERROR(
|
|
562
|
+
return ERROR(parameter_outOfBound);
|
|
554
563
|
}
|
|
555
564
|
if (nbSamples == 0) {
|
|
556
565
|
DISPLAYLEVEL(1, "FASTCOVER must have at least one input file\n");
|
|
557
|
-
return ERROR(
|
|
566
|
+
return ERROR(srcSize_wrong);
|
|
558
567
|
}
|
|
559
568
|
if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) {
|
|
560
569
|
DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n",
|
|
@@ -564,11 +573,14 @@ ZDICT_trainFromBuffer_fastCover(void* dictBuffer, size_t dictBufferCapacity,
|
|
|
564
573
|
/* Assign corresponding FASTCOVER_accel_t to accelParams*/
|
|
565
574
|
accelParams = FASTCOVER_defaultAccelParameters[parameters.accel];
|
|
566
575
|
/* Initialize context */
|
|
567
|
-
|
|
576
|
+
{
|
|
577
|
+
size_t const initVal = FASTCOVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples,
|
|
568
578
|
coverParams.d, parameters.splitPoint, parameters.f,
|
|
569
|
-
accelParams)
|
|
570
|
-
|
|
571
|
-
|
|
579
|
+
accelParams);
|
|
580
|
+
if (ZSTD_isError(initVal)) {
|
|
581
|
+
DISPLAYLEVEL(1, "Failed to initialize context\n");
|
|
582
|
+
return initVal;
|
|
583
|
+
}
|
|
572
584
|
}
|
|
573
585
|
COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.nbDmers, g_displayLevel);
|
|
574
586
|
/* Build the dictionary */
|
|
@@ -616,6 +628,7 @@ ZDICT_optimizeTrainFromBuffer_fastCover(
|
|
|
616
628
|
(1 + (kMaxD - kMinD) / 2) * (1 + (kMaxK - kMinK) / kStepSize);
|
|
617
629
|
const unsigned f = parameters->f == 0 ? DEFAULT_F : parameters->f;
|
|
618
630
|
const unsigned accel = parameters->accel == 0 ? DEFAULT_ACCEL : parameters->accel;
|
|
631
|
+
const unsigned shrinkDict = 0;
|
|
619
632
|
/* Local variables */
|
|
620
633
|
const int displayLevel = parameters->zParams.notificationLevel;
|
|
621
634
|
unsigned iteration = 1;
|
|
@@ -627,19 +640,19 @@ ZDICT_optimizeTrainFromBuffer_fastCover(
|
|
|
627
640
|
/* Checks */
|
|
628
641
|
if (splitPoint <= 0 || splitPoint > 1) {
|
|
629
642
|
LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect splitPoint\n");
|
|
630
|
-
return ERROR(
|
|
643
|
+
return ERROR(parameter_outOfBound);
|
|
631
644
|
}
|
|
632
645
|
if (accel == 0 || accel > FASTCOVER_MAX_ACCEL) {
|
|
633
646
|
LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect accel\n");
|
|
634
|
-
return ERROR(
|
|
647
|
+
return ERROR(parameter_outOfBound);
|
|
635
648
|
}
|
|
636
649
|
if (kMinK < kMaxD || kMaxK < kMinK) {
|
|
637
650
|
LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect k\n");
|
|
638
|
-
return ERROR(
|
|
651
|
+
return ERROR(parameter_outOfBound);
|
|
639
652
|
}
|
|
640
653
|
if (nbSamples == 0) {
|
|
641
654
|
LOCALDISPLAYLEVEL(displayLevel, 1, "FASTCOVER must have at least one input file\n");
|
|
642
|
-
return ERROR(
|
|
655
|
+
return ERROR(srcSize_wrong);
|
|
643
656
|
}
|
|
644
657
|
if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) {
|
|
645
658
|
LOCALDISPLAYLEVEL(displayLevel, 1, "dictBufferCapacity must be at least %u\n",
|
|
@@ -666,11 +679,14 @@ ZDICT_optimizeTrainFromBuffer_fastCover(
|
|
|
666
679
|
/* Initialize the context for this value of d */
|
|
667
680
|
FASTCOVER_ctx_t ctx;
|
|
668
681
|
LOCALDISPLAYLEVEL(displayLevel, 3, "d=%u\n", d);
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
682
|
+
{
|
|
683
|
+
size_t const initVal = FASTCOVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, d, splitPoint, f, accelParams);
|
|
684
|
+
if (ZSTD_isError(initVal)) {
|
|
685
|
+
LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to initialize context\n");
|
|
686
|
+
COVER_best_destroy(&best);
|
|
687
|
+
POOL_free(pool);
|
|
688
|
+
return initVal;
|
|
689
|
+
}
|
|
674
690
|
}
|
|
675
691
|
if (!warned) {
|
|
676
692
|
COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.nbDmers, displayLevel);
|
|
@@ -687,7 +703,7 @@ ZDICT_optimizeTrainFromBuffer_fastCover(
|
|
|
687
703
|
COVER_best_destroy(&best);
|
|
688
704
|
FASTCOVER_ctx_destroy(&ctx);
|
|
689
705
|
POOL_free(pool);
|
|
690
|
-
return ERROR(
|
|
706
|
+
return ERROR(memory_allocation);
|
|
691
707
|
}
|
|
692
708
|
data->ctx = &ctx;
|
|
693
709
|
data->best = &best;
|
|
@@ -697,6 +713,7 @@ ZDICT_optimizeTrainFromBuffer_fastCover(
|
|
|
697
713
|
data->parameters.d = d;
|
|
698
714
|
data->parameters.splitPoint = splitPoint;
|
|
699
715
|
data->parameters.steps = kSteps;
|
|
716
|
+
data->parameters.shrinkDict = shrinkDict;
|
|
700
717
|
data->parameters.zParams.notificationLevel = g_displayLevel;
|
|
701
718
|
/* Check the parameters */
|
|
702
719
|
if (!FASTCOVER_checkParameters(data->parameters, dictBufferCapacity,
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/*
|
|
2
|
-
* Copyright (c) 2016-
|
|
2
|
+
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
|
|
3
3
|
* All rights reserved.
|
|
4
4
|
*
|
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
|
@@ -37,17 +37,18 @@
|
|
|
37
37
|
#include <stdio.h> /* fprintf, fopen, ftello64 */
|
|
38
38
|
#include <time.h> /* clock */
|
|
39
39
|
|
|
40
|
-
#include "mem.h" /* read */
|
|
41
|
-
#include "fse.h" /* FSE_normalizeCount, FSE_writeNCount */
|
|
40
|
+
#include "../common/mem.h" /* read */
|
|
41
|
+
#include "../common/fse.h" /* FSE_normalizeCount, FSE_writeNCount */
|
|
42
42
|
#define HUF_STATIC_LINKING_ONLY
|
|
43
|
-
#include "huf.h" /* HUF_buildCTable, HUF_writeCTable */
|
|
44
|
-
#include "zstd_internal.h" /* includes zstd.h */
|
|
45
|
-
#include "xxhash.h" /* XXH64 */
|
|
43
|
+
#include "../common/huf.h" /* HUF_buildCTable, HUF_writeCTable */
|
|
44
|
+
#include "../common/zstd_internal.h" /* includes zstd.h */
|
|
45
|
+
#include "../common/xxhash.h" /* XXH64 */
|
|
46
46
|
#include "divsufsort.h"
|
|
47
47
|
#ifndef ZDICT_STATIC_LINKING_ONLY
|
|
48
48
|
# define ZDICT_STATIC_LINKING_ONLY
|
|
49
49
|
#endif
|
|
50
50
|
#include "zdict.h"
|
|
51
|
+
#include "../compress/zstd_compress_internal.h" /* ZSTD_loadCEntropy() */
|
|
51
52
|
|
|
52
53
|
|
|
53
54
|
/*-*************************************
|
|
@@ -99,6 +100,29 @@ unsigned ZDICT_getDictID(const void* dictBuffer, size_t dictSize)
|
|
|
99
100
|
return MEM_readLE32((const char*)dictBuffer + 4);
|
|
100
101
|
}
|
|
101
102
|
|
|
103
|
+
size_t ZDICT_getDictHeaderSize(const void* dictBuffer, size_t dictSize)
|
|
104
|
+
{
|
|
105
|
+
size_t headerSize;
|
|
106
|
+
if (dictSize <= 8 || MEM_readLE32(dictBuffer) != ZSTD_MAGIC_DICTIONARY) return ERROR(dictionary_corrupted);
|
|
107
|
+
|
|
108
|
+
{ unsigned offcodeMaxValue = MaxOff;
|
|
109
|
+
ZSTD_compressedBlockState_t* bs = (ZSTD_compressedBlockState_t*)malloc(sizeof(ZSTD_compressedBlockState_t));
|
|
110
|
+
U32* wksp = (U32*)malloc(HUF_WORKSPACE_SIZE);
|
|
111
|
+
short* offcodeNCount = (short*)malloc((MaxOff+1)*sizeof(short));
|
|
112
|
+
if (!bs || !wksp || !offcodeNCount) {
|
|
113
|
+
headerSize = ERROR(memory_allocation);
|
|
114
|
+
} else {
|
|
115
|
+
ZSTD_reset_compressedBlockState(bs);
|
|
116
|
+
headerSize = ZSTD_loadCEntropy(bs, wksp, offcodeNCount, &offcodeMaxValue, dictBuffer, dictSize);
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
free(bs);
|
|
120
|
+
free(wksp);
|
|
121
|
+
free(offcodeNCount);
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
return headerSize;
|
|
125
|
+
}
|
|
102
126
|
|
|
103
127
|
/*-********************************************************
|
|
104
128
|
* Dictionary training functions
|
|
@@ -571,7 +595,7 @@ static void ZDICT_fillNoise(void* buffer, size_t length)
|
|
|
571
595
|
unsigned const prime1 = 2654435761U;
|
|
572
596
|
unsigned const prime2 = 2246822519U;
|
|
573
597
|
unsigned acc = prime1;
|
|
574
|
-
size_t p=0
|
|
598
|
+
size_t p=0;
|
|
575
599
|
for (p=0; p<length; p++) {
|
|
576
600
|
acc *= prime2;
|
|
577
601
|
((unsigned char*)buffer)[p] = (unsigned char)(acc >> 21);
|
|
@@ -588,12 +612,12 @@ typedef struct
|
|
|
588
612
|
|
|
589
613
|
#define MAXREPOFFSET 1024
|
|
590
614
|
|
|
591
|
-
static void ZDICT_countEStats(EStats_ress_t esr, ZSTD_parameters params,
|
|
615
|
+
static void ZDICT_countEStats(EStats_ress_t esr, const ZSTD_parameters* params,
|
|
592
616
|
unsigned* countLit, unsigned* offsetcodeCount, unsigned* matchlengthCount, unsigned* litlengthCount, U32* repOffsets,
|
|
593
617
|
const void* src, size_t srcSize,
|
|
594
618
|
U32 notificationLevel)
|
|
595
619
|
{
|
|
596
|
-
size_t const blockSizeMax = MIN (ZSTD_BLOCKSIZE_MAX, 1 << params
|
|
620
|
+
size_t const blockSizeMax = MIN (ZSTD_BLOCKSIZE_MAX, 1 << params->cParams.windowLog);
|
|
597
621
|
size_t cSize;
|
|
598
622
|
|
|
599
623
|
if (srcSize > blockSizeMax) srcSize = blockSizeMax; /* protection vs large samples */
|
|
@@ -731,7 +755,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|
|
731
755
|
|
|
732
756
|
/* collect stats on all samples */
|
|
733
757
|
for (u=0; u<nbFiles; u++) {
|
|
734
|
-
ZDICT_countEStats(esr, params,
|
|
758
|
+
ZDICT_countEStats(esr, ¶ms,
|
|
735
759
|
countLit, offcodeCount, matchLengthCount, litLengthCount, repOffset,
|
|
736
760
|
(const char*)srcBuffer + pos, fileSizes[u],
|
|
737
761
|
notificationLevel);
|
|
@@ -741,7 +765,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|
|
741
765
|
/* analyze, build stats, starting with literals */
|
|
742
766
|
{ size_t maxNbBits = HUF_buildCTable (hufTable, countLit, 255, huffLog);
|
|
743
767
|
if (HUF_isError(maxNbBits)) {
|
|
744
|
-
eSize =
|
|
768
|
+
eSize = maxNbBits;
|
|
745
769
|
DISPLAYLEVEL(1, " HUF_buildCTable error \n");
|
|
746
770
|
goto _cleanup;
|
|
747
771
|
}
|
|
@@ -764,7 +788,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|
|
764
788
|
total=0; for (u=0; u<=offcodeMax; u++) total+=offcodeCount[u];
|
|
765
789
|
errorCode = FSE_normalizeCount(offcodeNCount, Offlog, offcodeCount, total, offcodeMax);
|
|
766
790
|
if (FSE_isError(errorCode)) {
|
|
767
|
-
eSize =
|
|
791
|
+
eSize = errorCode;
|
|
768
792
|
DISPLAYLEVEL(1, "FSE_normalizeCount error with offcodeCount \n");
|
|
769
793
|
goto _cleanup;
|
|
770
794
|
}
|
|
@@ -773,7 +797,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|
|
773
797
|
total=0; for (u=0; u<=MaxML; u++) total+=matchLengthCount[u];
|
|
774
798
|
errorCode = FSE_normalizeCount(matchLengthNCount, mlLog, matchLengthCount, total, MaxML);
|
|
775
799
|
if (FSE_isError(errorCode)) {
|
|
776
|
-
eSize =
|
|
800
|
+
eSize = errorCode;
|
|
777
801
|
DISPLAYLEVEL(1, "FSE_normalizeCount error with matchLengthCount \n");
|
|
778
802
|
goto _cleanup;
|
|
779
803
|
}
|
|
@@ -782,7 +806,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|
|
782
806
|
total=0; for (u=0; u<=MaxLL; u++) total+=litLengthCount[u];
|
|
783
807
|
errorCode = FSE_normalizeCount(litLengthNCount, llLog, litLengthCount, total, MaxLL);
|
|
784
808
|
if (FSE_isError(errorCode)) {
|
|
785
|
-
eSize =
|
|
809
|
+
eSize = errorCode;
|
|
786
810
|
DISPLAYLEVEL(1, "FSE_normalizeCount error with litLengthCount \n");
|
|
787
811
|
goto _cleanup;
|
|
788
812
|
}
|
|
@@ -791,7 +815,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|
|
791
815
|
/* write result to buffer */
|
|
792
816
|
{ size_t const hhSize = HUF_writeCTable(dstPtr, maxDstSize, hufTable, 255, huffLog);
|
|
793
817
|
if (HUF_isError(hhSize)) {
|
|
794
|
-
eSize =
|
|
818
|
+
eSize = hhSize;
|
|
795
819
|
DISPLAYLEVEL(1, "HUF_writeCTable error \n");
|
|
796
820
|
goto _cleanup;
|
|
797
821
|
}
|
|
@@ -802,7 +826,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|
|
802
826
|
|
|
803
827
|
{ size_t const ohSize = FSE_writeNCount(dstPtr, maxDstSize, offcodeNCount, OFFCODE_MAX, Offlog);
|
|
804
828
|
if (FSE_isError(ohSize)) {
|
|
805
|
-
eSize =
|
|
829
|
+
eSize = ohSize;
|
|
806
830
|
DISPLAYLEVEL(1, "FSE_writeNCount error with offcodeNCount \n");
|
|
807
831
|
goto _cleanup;
|
|
808
832
|
}
|
|
@@ -813,7 +837,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|
|
813
837
|
|
|
814
838
|
{ size_t const mhSize = FSE_writeNCount(dstPtr, maxDstSize, matchLengthNCount, MaxML, mlLog);
|
|
815
839
|
if (FSE_isError(mhSize)) {
|
|
816
|
-
eSize =
|
|
840
|
+
eSize = mhSize;
|
|
817
841
|
DISPLAYLEVEL(1, "FSE_writeNCount error with matchLengthNCount \n");
|
|
818
842
|
goto _cleanup;
|
|
819
843
|
}
|
|
@@ -824,7 +848,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|
|
824
848
|
|
|
825
849
|
{ size_t const lhSize = FSE_writeNCount(dstPtr, maxDstSize, litLengthNCount, MaxLL, llLog);
|
|
826
850
|
if (FSE_isError(lhSize)) {
|
|
827
|
-
eSize =
|
|
851
|
+
eSize = lhSize;
|
|
828
852
|
DISPLAYLEVEL(1, "FSE_writeNCount error with litlengthNCount \n");
|
|
829
853
|
goto _cleanup;
|
|
830
854
|
}
|
|
@@ -834,7 +858,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|
|
834
858
|
}
|
|
835
859
|
|
|
836
860
|
if (maxDstSize<12) {
|
|
837
|
-
eSize = ERROR(
|
|
861
|
+
eSize = ERROR(dstSize_tooSmall);
|
|
838
862
|
DISPLAYLEVEL(1, "not enough space to write RepOffsets \n");
|
|
839
863
|
goto _cleanup;
|
|
840
864
|
}
|