extzstd 0.3 → 0.3.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/HISTORY.ja.md +8 -0
- data/README.md +1 -1
- data/contrib/zstd/CHANGELOG +94 -0
- data/contrib/zstd/CONTRIBUTING.md +351 -1
- data/contrib/zstd/Makefile +32 -10
- data/contrib/zstd/README.md +33 -10
- data/contrib/zstd/TESTING.md +2 -2
- data/contrib/zstd/appveyor.yml +42 -4
- data/contrib/zstd/lib/Makefile +128 -60
- data/contrib/zstd/lib/README.md +47 -16
- data/contrib/zstd/lib/common/bitstream.h +38 -39
- data/contrib/zstd/lib/common/compiler.h +40 -5
- data/contrib/zstd/lib/common/cpu.h +1 -1
- data/contrib/zstd/lib/common/debug.c +11 -31
- data/contrib/zstd/lib/common/debug.h +11 -31
- data/contrib/zstd/lib/common/entropy_common.c +13 -33
- data/contrib/zstd/lib/common/error_private.c +2 -1
- data/contrib/zstd/lib/common/error_private.h +6 -2
- data/contrib/zstd/lib/common/fse.h +12 -32
- data/contrib/zstd/lib/common/fse_decompress.c +12 -35
- data/contrib/zstd/lib/common/huf.h +15 -33
- data/contrib/zstd/lib/common/mem.h +75 -2
- data/contrib/zstd/lib/common/pool.c +8 -4
- data/contrib/zstd/lib/common/pool.h +2 -2
- data/contrib/zstd/lib/common/threading.c +50 -4
- data/contrib/zstd/lib/common/threading.h +36 -4
- data/contrib/zstd/lib/common/xxhash.c +23 -35
- data/contrib/zstd/lib/common/xxhash.h +11 -31
- data/contrib/zstd/lib/common/zstd_common.c +1 -1
- data/contrib/zstd/lib/common/zstd_errors.h +2 -1
- data/contrib/zstd/lib/common/zstd_internal.h +154 -26
- data/contrib/zstd/lib/compress/fse_compress.c +17 -40
- data/contrib/zstd/lib/compress/hist.c +15 -35
- data/contrib/zstd/lib/compress/hist.h +12 -32
- data/contrib/zstd/lib/compress/huf_compress.c +92 -92
- data/contrib/zstd/lib/compress/zstd_compress.c +1191 -1330
- data/contrib/zstd/lib/compress/zstd_compress_internal.h +317 -55
- data/contrib/zstd/lib/compress/zstd_compress_literals.c +158 -0
- data/contrib/zstd/lib/compress/zstd_compress_literals.h +29 -0
- data/contrib/zstd/lib/compress/zstd_compress_sequences.c +419 -0
- data/contrib/zstd/lib/compress/zstd_compress_sequences.h +54 -0
- data/contrib/zstd/lib/compress/zstd_compress_superblock.c +845 -0
- data/contrib/zstd/lib/compress/zstd_compress_superblock.h +32 -0
- data/contrib/zstd/lib/compress/zstd_cwksp.h +525 -0
- data/contrib/zstd/lib/compress/zstd_double_fast.c +65 -43
- data/contrib/zstd/lib/compress/zstd_double_fast.h +2 -2
- data/contrib/zstd/lib/compress/zstd_fast.c +92 -66
- data/contrib/zstd/lib/compress/zstd_fast.h +2 -2
- data/contrib/zstd/lib/compress/zstd_lazy.c +74 -42
- data/contrib/zstd/lib/compress/zstd_lazy.h +1 -1
- data/contrib/zstd/lib/compress/zstd_ldm.c +32 -10
- data/contrib/zstd/lib/compress/zstd_ldm.h +7 -2
- data/contrib/zstd/lib/compress/zstd_opt.c +81 -114
- data/contrib/zstd/lib/compress/zstd_opt.h +1 -1
- data/contrib/zstd/lib/compress/zstdmt_compress.c +95 -51
- data/contrib/zstd/lib/compress/zstdmt_compress.h +3 -2
- data/contrib/zstd/lib/decompress/huf_decompress.c +76 -60
- data/contrib/zstd/lib/decompress/zstd_ddict.c +12 -8
- data/contrib/zstd/lib/decompress/zstd_ddict.h +2 -2
- data/contrib/zstd/lib/decompress/zstd_decompress.c +292 -172
- data/contrib/zstd/lib/decompress/zstd_decompress_block.c +459 -338
- data/contrib/zstd/lib/decompress/zstd_decompress_block.h +3 -3
- data/contrib/zstd/lib/decompress/zstd_decompress_internal.h +18 -4
- data/contrib/zstd/lib/deprecated/zbuff.h +9 -8
- data/contrib/zstd/lib/deprecated/zbuff_common.c +2 -2
- data/contrib/zstd/lib/deprecated/zbuff_compress.c +1 -1
- data/contrib/zstd/lib/deprecated/zbuff_decompress.c +1 -1
- data/contrib/zstd/lib/dictBuilder/cover.c +164 -54
- data/contrib/zstd/lib/dictBuilder/cover.h +52 -7
- data/contrib/zstd/lib/dictBuilder/fastcover.c +60 -43
- data/contrib/zstd/lib/dictBuilder/zdict.c +43 -19
- data/contrib/zstd/lib/dictBuilder/zdict.h +56 -28
- data/contrib/zstd/lib/legacy/zstd_legacy.h +8 -4
- data/contrib/zstd/lib/legacy/zstd_v01.c +110 -110
- data/contrib/zstd/lib/legacy/zstd_v01.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v02.c +23 -13
- data/contrib/zstd/lib/legacy/zstd_v02.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v03.c +23 -13
- data/contrib/zstd/lib/legacy/zstd_v03.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v04.c +30 -17
- data/contrib/zstd/lib/legacy/zstd_v04.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v05.c +113 -102
- data/contrib/zstd/lib/legacy/zstd_v05.h +2 -2
- data/contrib/zstd/lib/legacy/zstd_v06.c +20 -18
- data/contrib/zstd/lib/legacy/zstd_v06.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v07.c +25 -19
- data/contrib/zstd/lib/legacy/zstd_v07.h +1 -1
- data/contrib/zstd/lib/libzstd.pc.in +3 -2
- data/contrib/zstd/lib/zstd.h +265 -88
- data/ext/extzstd.h +1 -1
- data/ext/libzstd_conf.h +8 -0
- data/ext/zstd_common.c +1 -3
- data/ext/zstd_compress.c +3 -3
- data/ext/zstd_decompress.c +1 -5
- data/ext/zstd_dictbuilder.c +2 -3
- data/ext/zstd_dictbuilder_fastcover.c +1 -3
- data/ext/zstd_legacy_v01.c +2 -0
- data/ext/zstd_legacy_v02.c +2 -0
- data/ext/zstd_legacy_v03.c +2 -0
- data/ext/zstd_legacy_v04.c +2 -0
- data/ext/zstd_legacy_v05.c +2 -0
- data/ext/zstd_legacy_v06.c +2 -0
- data/ext/zstd_legacy_v07.c +2 -0
- data/lib/extzstd.rb +18 -10
- data/lib/extzstd/version.rb +1 -1
- metadata +15 -6
@@ -1,11 +1,21 @@
|
|
1
|
+
/*
|
2
|
+
* Copyright (c) 2017-2020, Facebook, Inc.
|
3
|
+
* All rights reserved.
|
4
|
+
*
|
5
|
+
* This source code is licensed under both the BSD-style license (found in the
|
6
|
+
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
7
|
+
* in the COPYING file in the root directory of this source tree).
|
8
|
+
* You may select, at your option, one of the above-listed licenses.
|
9
|
+
*/
|
10
|
+
|
1
11
|
#include <stdio.h> /* fprintf */
|
2
12
|
#include <stdlib.h> /* malloc, free, qsort */
|
3
13
|
#include <string.h> /* memset */
|
4
14
|
#include <time.h> /* clock */
|
5
|
-
#include "mem.h" /* read */
|
6
|
-
#include "pool.h"
|
7
|
-
#include "threading.h"
|
8
|
-
#include "zstd_internal.h" /* includes zstd.h */
|
15
|
+
#include "../common/mem.h" /* read */
|
16
|
+
#include "../common/pool.h"
|
17
|
+
#include "../common/threading.h"
|
18
|
+
#include "../common/zstd_internal.h" /* includes zstd.h */
|
9
19
|
#ifndef ZDICT_STATIC_LINKING_ONLY
|
10
20
|
#define ZDICT_STATIC_LINKING_ONLY
|
11
21
|
#endif
|
@@ -46,6 +56,15 @@ typedef struct {
|
|
46
56
|
U32 size;
|
47
57
|
} COVER_epoch_info_t;
|
48
58
|
|
59
|
+
/**
|
60
|
+
* Struct used for the dictionary selection function.
|
61
|
+
*/
|
62
|
+
typedef struct COVER_dictSelection {
|
63
|
+
BYTE* dictContent;
|
64
|
+
size_t dictSize;
|
65
|
+
size_t totalCompressedSize;
|
66
|
+
} COVER_dictSelection_t;
|
67
|
+
|
49
68
|
/**
|
50
69
|
* Computes the number of epochs and the size of each epoch.
|
51
70
|
* We will make sure that each epoch gets at least 10 * k bytes.
|
@@ -107,6 +126,32 @@ void COVER_best_start(COVER_best_t *best);
|
|
107
126
|
* Decrements liveJobs and signals any waiting threads if liveJobs == 0.
|
108
127
|
* If this dictionary is the best so far save it and its parameters.
|
109
128
|
*/
|
110
|
-
void COVER_best_finish(COVER_best_t *best,
|
111
|
-
|
112
|
-
|
129
|
+
void COVER_best_finish(COVER_best_t *best, ZDICT_cover_params_t parameters,
|
130
|
+
COVER_dictSelection_t selection);
|
131
|
+
/**
|
132
|
+
* Error function for COVER_selectDict function. Checks if the return
|
133
|
+
* value is an error.
|
134
|
+
*/
|
135
|
+
unsigned COVER_dictSelectionIsError(COVER_dictSelection_t selection);
|
136
|
+
|
137
|
+
/**
|
138
|
+
* Error function for COVER_selectDict function. Returns a struct where
|
139
|
+
* return.totalCompressedSize is a ZSTD error.
|
140
|
+
*/
|
141
|
+
COVER_dictSelection_t COVER_dictSelectionError(size_t error);
|
142
|
+
|
143
|
+
/**
|
144
|
+
* Always call after selectDict is called to free up used memory from
|
145
|
+
* newly created dictionary.
|
146
|
+
*/
|
147
|
+
void COVER_dictSelectionFree(COVER_dictSelection_t selection);
|
148
|
+
|
149
|
+
/**
|
150
|
+
* Called to finalize the dictionary and select one based on whether or not
|
151
|
+
* the shrink-dict flag was enabled. If enabled the dictionary used is the
|
152
|
+
* smallest dictionary within a specified regression of the compressed size
|
153
|
+
* from the largest dictionary.
|
154
|
+
*/
|
155
|
+
COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent,
|
156
|
+
size_t dictContentSize, const BYTE* samplesBuffer, const size_t* samplesSizes, unsigned nbFinalizeSamples,
|
157
|
+
size_t nbCheckSamples, size_t nbSamples, ZDICT_cover_params_t params, size_t* offsets, size_t totalCompressedSize);
|
@@ -1,3 +1,13 @@
|
|
1
|
+
/*
|
2
|
+
* Copyright (c) 2018-2020, Facebook, Inc.
|
3
|
+
* All rights reserved.
|
4
|
+
*
|
5
|
+
* This source code is licensed under both the BSD-style license (found in the
|
6
|
+
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
7
|
+
* in the COPYING file in the root directory of this source tree).
|
8
|
+
* You may select, at your option, one of the above-listed licenses.
|
9
|
+
*/
|
10
|
+
|
1
11
|
/*-*************************************
|
2
12
|
* Dependencies
|
3
13
|
***************************************/
|
@@ -6,11 +16,11 @@
|
|
6
16
|
#include <string.h> /* memset */
|
7
17
|
#include <time.h> /* clock */
|
8
18
|
|
9
|
-
#include "mem.h" /* read */
|
10
|
-
#include "pool.h"
|
11
|
-
#include "threading.h"
|
19
|
+
#include "../common/mem.h" /* read */
|
20
|
+
#include "../common/pool.h"
|
21
|
+
#include "../common/threading.h"
|
12
22
|
#include "cover.h"
|
13
|
-
#include "zstd_internal.h" /* includes zstd.h */
|
23
|
+
#include "../common/zstd_internal.h" /* includes zstd.h */
|
14
24
|
#ifndef ZDICT_STATIC_LINKING_ONLY
|
15
25
|
#define ZDICT_STATIC_LINKING_ONLY
|
16
26
|
#endif
|
@@ -287,10 +297,10 @@ FASTCOVER_computeFrequency(U32* freqs, const FASTCOVER_ctx_t* ctx)
|
|
287
297
|
* Prepare a context for dictionary building.
|
288
298
|
* The context is only dependent on the parameter `d` and can used multiple
|
289
299
|
* times.
|
290
|
-
* Returns
|
300
|
+
* Returns 0 on success or error code on error.
|
291
301
|
* The context must be destroyed with `FASTCOVER_ctx_destroy()`.
|
292
302
|
*/
|
293
|
-
static
|
303
|
+
static size_t
|
294
304
|
FASTCOVER_ctx_init(FASTCOVER_ctx_t* ctx,
|
295
305
|
const void* samplesBuffer,
|
296
306
|
const size_t* samplesSizes, unsigned nbSamples,
|
@@ -310,19 +320,19 @@ FASTCOVER_ctx_init(FASTCOVER_ctx_t* ctx,
|
|
310
320
|
totalSamplesSize >= (size_t)FASTCOVER_MAX_SAMPLES_SIZE) {
|
311
321
|
DISPLAYLEVEL(1, "Total samples size is too large (%u MB), maximum size is %u MB\n",
|
312
322
|
(unsigned)(totalSamplesSize >> 20), (FASTCOVER_MAX_SAMPLES_SIZE >> 20));
|
313
|
-
return
|
323
|
+
return ERROR(srcSize_wrong);
|
314
324
|
}
|
315
325
|
|
316
326
|
/* Check if there are at least 5 training samples */
|
317
327
|
if (nbTrainSamples < 5) {
|
318
328
|
DISPLAYLEVEL(1, "Total number of training samples is %u and is invalid\n", nbTrainSamples);
|
319
|
-
return
|
329
|
+
return ERROR(srcSize_wrong);
|
320
330
|
}
|
321
331
|
|
322
332
|
/* Check if there's testing sample */
|
323
333
|
if (nbTestSamples < 1) {
|
324
334
|
DISPLAYLEVEL(1, "Total number of testing samples is %u and is invalid.\n", nbTestSamples);
|
325
|
-
return
|
335
|
+
return ERROR(srcSize_wrong);
|
326
336
|
}
|
327
337
|
|
328
338
|
/* Zero the context */
|
@@ -347,7 +357,7 @@ FASTCOVER_ctx_init(FASTCOVER_ctx_t* ctx,
|
|
347
357
|
if (ctx->offsets == NULL) {
|
348
358
|
DISPLAYLEVEL(1, "Failed to allocate scratch buffers \n");
|
349
359
|
FASTCOVER_ctx_destroy(ctx);
|
350
|
-
return
|
360
|
+
return ERROR(memory_allocation);
|
351
361
|
}
|
352
362
|
|
353
363
|
/* Fill offsets from the samplesSizes */
|
@@ -364,13 +374,13 @@ FASTCOVER_ctx_init(FASTCOVER_ctx_t* ctx,
|
|
364
374
|
if (ctx->freqs == NULL) {
|
365
375
|
DISPLAYLEVEL(1, "Failed to allocate frequency table \n");
|
366
376
|
FASTCOVER_ctx_destroy(ctx);
|
367
|
-
return
|
377
|
+
return ERROR(memory_allocation);
|
368
378
|
}
|
369
379
|
|
370
380
|
DISPLAYLEVEL(2, "Computing frequencies\n");
|
371
381
|
FASTCOVER_computeFrequency(ctx->freqs, ctx);
|
372
382
|
|
373
|
-
return
|
383
|
+
return 0;
|
374
384
|
}
|
375
385
|
|
376
386
|
|
@@ -435,7 +445,6 @@ FASTCOVER_buildDictionary(const FASTCOVER_ctx_t* ctx,
|
|
435
445
|
return tail;
|
436
446
|
}
|
437
447
|
|
438
|
-
|
439
448
|
/**
|
440
449
|
* Parameters for FASTCOVER_tryParameters().
|
441
450
|
*/
|
@@ -464,6 +473,7 @@ static void FASTCOVER_tryParameters(void *opaque)
|
|
464
473
|
U16* segmentFreqs = (U16 *)calloc(((U64)1 << ctx->f), sizeof(U16));
|
465
474
|
/* Allocate space for hash table, dict, and freqs */
|
466
475
|
BYTE *const dict = (BYTE * const)malloc(dictBufferCapacity);
|
476
|
+
COVER_dictSelection_t selection = COVER_dictSelectionError(ERROR(GENERIC));
|
467
477
|
U32 *freqs = (U32*) malloc(((U64)1 << ctx->f) * sizeof(U32));
|
468
478
|
if (!segmentFreqs || !dict || !freqs) {
|
469
479
|
DISPLAYLEVEL(1, "Failed to allocate buffers: out of memory\n");
|
@@ -473,27 +483,24 @@ static void FASTCOVER_tryParameters(void *opaque)
|
|
473
483
|
memcpy(freqs, ctx->freqs, ((U64)1 << ctx->f) * sizeof(U32));
|
474
484
|
/* Build the dictionary */
|
475
485
|
{ const size_t tail = FASTCOVER_buildDictionary(ctx, freqs, dict, dictBufferCapacity,
|
476
|
-
|
486
|
+
parameters, segmentFreqs);
|
487
|
+
|
477
488
|
const unsigned nbFinalizeSamples = (unsigned)(ctx->nbTrainSamples * ctx->accelParams.finalize / 100);
|
478
|
-
|
479
|
-
|
480
|
-
|
481
|
-
|
482
|
-
|
489
|
+
selection = COVER_selectDict(dict + tail, dictBufferCapacity - tail,
|
490
|
+
ctx->samples, ctx->samplesSizes, nbFinalizeSamples, ctx->nbTrainSamples, ctx->nbSamples, parameters, ctx->offsets,
|
491
|
+
totalCompressedSize);
|
492
|
+
|
493
|
+
if (COVER_dictSelectionIsError(selection)) {
|
494
|
+
DISPLAYLEVEL(1, "Failed to select dictionary\n");
|
483
495
|
goto _cleanup;
|
484
496
|
}
|
485
497
|
}
|
486
|
-
/* Check total compressed size */
|
487
|
-
totalCompressedSize = COVER_checkTotalCompressedSize(parameters, ctx->samplesSizes,
|
488
|
-
ctx->samples, ctx->offsets,
|
489
|
-
ctx->nbTrainSamples, ctx->nbSamples,
|
490
|
-
dict, dictBufferCapacity);
|
491
498
|
_cleanup:
|
492
|
-
|
493
|
-
|
499
|
+
free(dict);
|
500
|
+
COVER_best_finish(data->best, parameters, selection);
|
494
501
|
free(data);
|
495
502
|
free(segmentFreqs);
|
496
|
-
|
503
|
+
COVER_dictSelectionFree(selection);
|
497
504
|
free(freqs);
|
498
505
|
}
|
499
506
|
|
@@ -508,6 +515,7 @@ FASTCOVER_convertToCoverParams(ZDICT_fastCover_params_t fastCoverParams,
|
|
508
515
|
coverParams->nbThreads = fastCoverParams.nbThreads;
|
509
516
|
coverParams->splitPoint = fastCoverParams.splitPoint;
|
510
517
|
coverParams->zParams = fastCoverParams.zParams;
|
518
|
+
coverParams->shrinkDict = fastCoverParams.shrinkDict;
|
511
519
|
}
|
512
520
|
|
513
521
|
|
@@ -524,6 +532,7 @@ FASTCOVER_convertToFastCoverParams(ZDICT_cover_params_t coverParams,
|
|
524
532
|
fastCoverParams->f = f;
|
525
533
|
fastCoverParams->accel = accel;
|
526
534
|
fastCoverParams->zParams = coverParams.zParams;
|
535
|
+
fastCoverParams->shrinkDict = coverParams.shrinkDict;
|
527
536
|
}
|
528
537
|
|
529
538
|
|
@@ -550,11 +559,11 @@ ZDICT_trainFromBuffer_fastCover(void* dictBuffer, size_t dictBufferCapacity,
|
|
550
559
|
if (!FASTCOVER_checkParameters(coverParams, dictBufferCapacity, parameters.f,
|
551
560
|
parameters.accel)) {
|
552
561
|
DISPLAYLEVEL(1, "FASTCOVER parameters incorrect\n");
|
553
|
-
return ERROR(
|
562
|
+
return ERROR(parameter_outOfBound);
|
554
563
|
}
|
555
564
|
if (nbSamples == 0) {
|
556
565
|
DISPLAYLEVEL(1, "FASTCOVER must have at least one input file\n");
|
557
|
-
return ERROR(
|
566
|
+
return ERROR(srcSize_wrong);
|
558
567
|
}
|
559
568
|
if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) {
|
560
569
|
DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n",
|
@@ -564,11 +573,14 @@ ZDICT_trainFromBuffer_fastCover(void* dictBuffer, size_t dictBufferCapacity,
|
|
564
573
|
/* Assign corresponding FASTCOVER_accel_t to accelParams*/
|
565
574
|
accelParams = FASTCOVER_defaultAccelParameters[parameters.accel];
|
566
575
|
/* Initialize context */
|
567
|
-
|
576
|
+
{
|
577
|
+
size_t const initVal = FASTCOVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples,
|
568
578
|
coverParams.d, parameters.splitPoint, parameters.f,
|
569
|
-
accelParams)
|
570
|
-
|
571
|
-
|
579
|
+
accelParams);
|
580
|
+
if (ZSTD_isError(initVal)) {
|
581
|
+
DISPLAYLEVEL(1, "Failed to initialize context\n");
|
582
|
+
return initVal;
|
583
|
+
}
|
572
584
|
}
|
573
585
|
COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.nbDmers, g_displayLevel);
|
574
586
|
/* Build the dictionary */
|
@@ -616,6 +628,7 @@ ZDICT_optimizeTrainFromBuffer_fastCover(
|
|
616
628
|
(1 + (kMaxD - kMinD) / 2) * (1 + (kMaxK - kMinK) / kStepSize);
|
617
629
|
const unsigned f = parameters->f == 0 ? DEFAULT_F : parameters->f;
|
618
630
|
const unsigned accel = parameters->accel == 0 ? DEFAULT_ACCEL : parameters->accel;
|
631
|
+
const unsigned shrinkDict = 0;
|
619
632
|
/* Local variables */
|
620
633
|
const int displayLevel = parameters->zParams.notificationLevel;
|
621
634
|
unsigned iteration = 1;
|
@@ -627,19 +640,19 @@ ZDICT_optimizeTrainFromBuffer_fastCover(
|
|
627
640
|
/* Checks */
|
628
641
|
if (splitPoint <= 0 || splitPoint > 1) {
|
629
642
|
LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect splitPoint\n");
|
630
|
-
return ERROR(
|
643
|
+
return ERROR(parameter_outOfBound);
|
631
644
|
}
|
632
645
|
if (accel == 0 || accel > FASTCOVER_MAX_ACCEL) {
|
633
646
|
LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect accel\n");
|
634
|
-
return ERROR(
|
647
|
+
return ERROR(parameter_outOfBound);
|
635
648
|
}
|
636
649
|
if (kMinK < kMaxD || kMaxK < kMinK) {
|
637
650
|
LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect k\n");
|
638
|
-
return ERROR(
|
651
|
+
return ERROR(parameter_outOfBound);
|
639
652
|
}
|
640
653
|
if (nbSamples == 0) {
|
641
654
|
LOCALDISPLAYLEVEL(displayLevel, 1, "FASTCOVER must have at least one input file\n");
|
642
|
-
return ERROR(
|
655
|
+
return ERROR(srcSize_wrong);
|
643
656
|
}
|
644
657
|
if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) {
|
645
658
|
LOCALDISPLAYLEVEL(displayLevel, 1, "dictBufferCapacity must be at least %u\n",
|
@@ -666,11 +679,14 @@ ZDICT_optimizeTrainFromBuffer_fastCover(
|
|
666
679
|
/* Initialize the context for this value of d */
|
667
680
|
FASTCOVER_ctx_t ctx;
|
668
681
|
LOCALDISPLAYLEVEL(displayLevel, 3, "d=%u\n", d);
|
669
|
-
|
670
|
-
|
671
|
-
|
672
|
-
|
673
|
-
|
682
|
+
{
|
683
|
+
size_t const initVal = FASTCOVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, d, splitPoint, f, accelParams);
|
684
|
+
if (ZSTD_isError(initVal)) {
|
685
|
+
LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to initialize context\n");
|
686
|
+
COVER_best_destroy(&best);
|
687
|
+
POOL_free(pool);
|
688
|
+
return initVal;
|
689
|
+
}
|
674
690
|
}
|
675
691
|
if (!warned) {
|
676
692
|
COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.nbDmers, displayLevel);
|
@@ -687,7 +703,7 @@ ZDICT_optimizeTrainFromBuffer_fastCover(
|
|
687
703
|
COVER_best_destroy(&best);
|
688
704
|
FASTCOVER_ctx_destroy(&ctx);
|
689
705
|
POOL_free(pool);
|
690
|
-
return ERROR(
|
706
|
+
return ERROR(memory_allocation);
|
691
707
|
}
|
692
708
|
data->ctx = &ctx;
|
693
709
|
data->best = &best;
|
@@ -697,6 +713,7 @@ ZDICT_optimizeTrainFromBuffer_fastCover(
|
|
697
713
|
data->parameters.d = d;
|
698
714
|
data->parameters.splitPoint = splitPoint;
|
699
715
|
data->parameters.steps = kSteps;
|
716
|
+
data->parameters.shrinkDict = shrinkDict;
|
700
717
|
data->parameters.zParams.notificationLevel = g_displayLevel;
|
701
718
|
/* Check the parameters */
|
702
719
|
if (!FASTCOVER_checkParameters(data->parameters, dictBufferCapacity,
|
@@ -1,5 +1,5 @@
|
|
1
1
|
/*
|
2
|
-
* Copyright (c) 2016-
|
2
|
+
* Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
|
3
3
|
* All rights reserved.
|
4
4
|
*
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
@@ -37,17 +37,18 @@
|
|
37
37
|
#include <stdio.h> /* fprintf, fopen, ftello64 */
|
38
38
|
#include <time.h> /* clock */
|
39
39
|
|
40
|
-
#include "mem.h" /* read */
|
41
|
-
#include "fse.h" /* FSE_normalizeCount, FSE_writeNCount */
|
40
|
+
#include "../common/mem.h" /* read */
|
41
|
+
#include "../common/fse.h" /* FSE_normalizeCount, FSE_writeNCount */
|
42
42
|
#define HUF_STATIC_LINKING_ONLY
|
43
|
-
#include "huf.h" /* HUF_buildCTable, HUF_writeCTable */
|
44
|
-
#include "zstd_internal.h" /* includes zstd.h */
|
45
|
-
#include "xxhash.h" /* XXH64 */
|
43
|
+
#include "../common/huf.h" /* HUF_buildCTable, HUF_writeCTable */
|
44
|
+
#include "../common/zstd_internal.h" /* includes zstd.h */
|
45
|
+
#include "../common/xxhash.h" /* XXH64 */
|
46
46
|
#include "divsufsort.h"
|
47
47
|
#ifndef ZDICT_STATIC_LINKING_ONLY
|
48
48
|
# define ZDICT_STATIC_LINKING_ONLY
|
49
49
|
#endif
|
50
50
|
#include "zdict.h"
|
51
|
+
#include "../compress/zstd_compress_internal.h" /* ZSTD_loadCEntropy() */
|
51
52
|
|
52
53
|
|
53
54
|
/*-*************************************
|
@@ -99,6 +100,29 @@ unsigned ZDICT_getDictID(const void* dictBuffer, size_t dictSize)
|
|
99
100
|
return MEM_readLE32((const char*)dictBuffer + 4);
|
100
101
|
}
|
101
102
|
|
103
|
+
size_t ZDICT_getDictHeaderSize(const void* dictBuffer, size_t dictSize)
|
104
|
+
{
|
105
|
+
size_t headerSize;
|
106
|
+
if (dictSize <= 8 || MEM_readLE32(dictBuffer) != ZSTD_MAGIC_DICTIONARY) return ERROR(dictionary_corrupted);
|
107
|
+
|
108
|
+
{ unsigned offcodeMaxValue = MaxOff;
|
109
|
+
ZSTD_compressedBlockState_t* bs = (ZSTD_compressedBlockState_t*)malloc(sizeof(ZSTD_compressedBlockState_t));
|
110
|
+
U32* wksp = (U32*)malloc(HUF_WORKSPACE_SIZE);
|
111
|
+
short* offcodeNCount = (short*)malloc((MaxOff+1)*sizeof(short));
|
112
|
+
if (!bs || !wksp || !offcodeNCount) {
|
113
|
+
headerSize = ERROR(memory_allocation);
|
114
|
+
} else {
|
115
|
+
ZSTD_reset_compressedBlockState(bs);
|
116
|
+
headerSize = ZSTD_loadCEntropy(bs, wksp, offcodeNCount, &offcodeMaxValue, dictBuffer, dictSize);
|
117
|
+
}
|
118
|
+
|
119
|
+
free(bs);
|
120
|
+
free(wksp);
|
121
|
+
free(offcodeNCount);
|
122
|
+
}
|
123
|
+
|
124
|
+
return headerSize;
|
125
|
+
}
|
102
126
|
|
103
127
|
/*-********************************************************
|
104
128
|
* Dictionary training functions
|
@@ -571,7 +595,7 @@ static void ZDICT_fillNoise(void* buffer, size_t length)
|
|
571
595
|
unsigned const prime1 = 2654435761U;
|
572
596
|
unsigned const prime2 = 2246822519U;
|
573
597
|
unsigned acc = prime1;
|
574
|
-
size_t p=0
|
598
|
+
size_t p=0;
|
575
599
|
for (p=0; p<length; p++) {
|
576
600
|
acc *= prime2;
|
577
601
|
((unsigned char*)buffer)[p] = (unsigned char)(acc >> 21);
|
@@ -588,12 +612,12 @@ typedef struct
|
|
588
612
|
|
589
613
|
#define MAXREPOFFSET 1024
|
590
614
|
|
591
|
-
static void ZDICT_countEStats(EStats_ress_t esr, ZSTD_parameters params,
|
615
|
+
static void ZDICT_countEStats(EStats_ress_t esr, const ZSTD_parameters* params,
|
592
616
|
unsigned* countLit, unsigned* offsetcodeCount, unsigned* matchlengthCount, unsigned* litlengthCount, U32* repOffsets,
|
593
617
|
const void* src, size_t srcSize,
|
594
618
|
U32 notificationLevel)
|
595
619
|
{
|
596
|
-
size_t const blockSizeMax = MIN (ZSTD_BLOCKSIZE_MAX, 1 << params
|
620
|
+
size_t const blockSizeMax = MIN (ZSTD_BLOCKSIZE_MAX, 1 << params->cParams.windowLog);
|
597
621
|
size_t cSize;
|
598
622
|
|
599
623
|
if (srcSize > blockSizeMax) srcSize = blockSizeMax; /* protection vs large samples */
|
@@ -731,7 +755,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|
731
755
|
|
732
756
|
/* collect stats on all samples */
|
733
757
|
for (u=0; u<nbFiles; u++) {
|
734
|
-
ZDICT_countEStats(esr, params,
|
758
|
+
ZDICT_countEStats(esr, ¶ms,
|
735
759
|
countLit, offcodeCount, matchLengthCount, litLengthCount, repOffset,
|
736
760
|
(const char*)srcBuffer + pos, fileSizes[u],
|
737
761
|
notificationLevel);
|
@@ -741,7 +765,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|
741
765
|
/* analyze, build stats, starting with literals */
|
742
766
|
{ size_t maxNbBits = HUF_buildCTable (hufTable, countLit, 255, huffLog);
|
743
767
|
if (HUF_isError(maxNbBits)) {
|
744
|
-
eSize =
|
768
|
+
eSize = maxNbBits;
|
745
769
|
DISPLAYLEVEL(1, " HUF_buildCTable error \n");
|
746
770
|
goto _cleanup;
|
747
771
|
}
|
@@ -764,7 +788,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|
764
788
|
total=0; for (u=0; u<=offcodeMax; u++) total+=offcodeCount[u];
|
765
789
|
errorCode = FSE_normalizeCount(offcodeNCount, Offlog, offcodeCount, total, offcodeMax);
|
766
790
|
if (FSE_isError(errorCode)) {
|
767
|
-
eSize =
|
791
|
+
eSize = errorCode;
|
768
792
|
DISPLAYLEVEL(1, "FSE_normalizeCount error with offcodeCount \n");
|
769
793
|
goto _cleanup;
|
770
794
|
}
|
@@ -773,7 +797,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|
773
797
|
total=0; for (u=0; u<=MaxML; u++) total+=matchLengthCount[u];
|
774
798
|
errorCode = FSE_normalizeCount(matchLengthNCount, mlLog, matchLengthCount, total, MaxML);
|
775
799
|
if (FSE_isError(errorCode)) {
|
776
|
-
eSize =
|
800
|
+
eSize = errorCode;
|
777
801
|
DISPLAYLEVEL(1, "FSE_normalizeCount error with matchLengthCount \n");
|
778
802
|
goto _cleanup;
|
779
803
|
}
|
@@ -782,7 +806,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|
782
806
|
total=0; for (u=0; u<=MaxLL; u++) total+=litLengthCount[u];
|
783
807
|
errorCode = FSE_normalizeCount(litLengthNCount, llLog, litLengthCount, total, MaxLL);
|
784
808
|
if (FSE_isError(errorCode)) {
|
785
|
-
eSize =
|
809
|
+
eSize = errorCode;
|
786
810
|
DISPLAYLEVEL(1, "FSE_normalizeCount error with litLengthCount \n");
|
787
811
|
goto _cleanup;
|
788
812
|
}
|
@@ -791,7 +815,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|
791
815
|
/* write result to buffer */
|
792
816
|
{ size_t const hhSize = HUF_writeCTable(dstPtr, maxDstSize, hufTable, 255, huffLog);
|
793
817
|
if (HUF_isError(hhSize)) {
|
794
|
-
eSize =
|
818
|
+
eSize = hhSize;
|
795
819
|
DISPLAYLEVEL(1, "HUF_writeCTable error \n");
|
796
820
|
goto _cleanup;
|
797
821
|
}
|
@@ -802,7 +826,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|
802
826
|
|
803
827
|
{ size_t const ohSize = FSE_writeNCount(dstPtr, maxDstSize, offcodeNCount, OFFCODE_MAX, Offlog);
|
804
828
|
if (FSE_isError(ohSize)) {
|
805
|
-
eSize =
|
829
|
+
eSize = ohSize;
|
806
830
|
DISPLAYLEVEL(1, "FSE_writeNCount error with offcodeNCount \n");
|
807
831
|
goto _cleanup;
|
808
832
|
}
|
@@ -813,7 +837,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|
813
837
|
|
814
838
|
{ size_t const mhSize = FSE_writeNCount(dstPtr, maxDstSize, matchLengthNCount, MaxML, mlLog);
|
815
839
|
if (FSE_isError(mhSize)) {
|
816
|
-
eSize =
|
840
|
+
eSize = mhSize;
|
817
841
|
DISPLAYLEVEL(1, "FSE_writeNCount error with matchLengthNCount \n");
|
818
842
|
goto _cleanup;
|
819
843
|
}
|
@@ -824,7 +848,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|
824
848
|
|
825
849
|
{ size_t const lhSize = FSE_writeNCount(dstPtr, maxDstSize, litLengthNCount, MaxLL, llLog);
|
826
850
|
if (FSE_isError(lhSize)) {
|
827
|
-
eSize =
|
851
|
+
eSize = lhSize;
|
828
852
|
DISPLAYLEVEL(1, "FSE_writeNCount error with litlengthNCount \n");
|
829
853
|
goto _cleanup;
|
830
854
|
}
|
@@ -834,7 +858,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
|
|
834
858
|
}
|
835
859
|
|
836
860
|
if (maxDstSize<12) {
|
837
|
-
eSize = ERROR(
|
861
|
+
eSize = ERROR(dstSize_tooSmall);
|
838
862
|
DISPLAYLEVEL(1, "not enough space to write RepOffsets \n");
|
839
863
|
goto _cleanup;
|
840
864
|
}
|