extzstd 0.1.1 → 0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/HISTORY.ja.md +18 -0
- data/README.md +15 -50
- data/contrib/zstd/CONTRIBUTING.md +1 -1
- data/contrib/zstd/COPYING +339 -0
- data/contrib/zstd/Makefile +82 -51
- data/contrib/zstd/NEWS +92 -5
- data/contrib/zstd/README.md +50 -41
- data/contrib/zstd/appveyor.yml +164 -102
- data/contrib/zstd/circle.yml +10 -22
- data/contrib/zstd/lib/BUCK +31 -10
- data/contrib/zstd/lib/Makefile +57 -31
- data/contrib/zstd/lib/README.md +68 -37
- data/contrib/zstd/lib/common/bitstream.h +130 -76
- data/contrib/zstd/lib/common/compiler.h +86 -0
- data/contrib/zstd/lib/common/error_private.c +15 -11
- data/contrib/zstd/lib/common/error_private.h +8 -8
- data/contrib/zstd/lib/common/fse.h +19 -9
- data/contrib/zstd/lib/common/fse_decompress.c +3 -22
- data/contrib/zstd/lib/common/huf.h +68 -26
- data/contrib/zstd/lib/common/mem.h +23 -35
- data/contrib/zstd/lib/common/pool.c +123 -63
- data/contrib/zstd/lib/common/pool.h +19 -10
- data/contrib/zstd/lib/common/threading.c +11 -16
- data/contrib/zstd/lib/common/threading.h +52 -33
- data/contrib/zstd/lib/common/xxhash.c +28 -22
- data/contrib/zstd/lib/common/zstd_common.c +40 -27
- data/contrib/zstd/lib/common/zstd_errors.h +43 -34
- data/contrib/zstd/lib/common/zstd_internal.h +131 -123
- data/contrib/zstd/lib/compress/fse_compress.c +17 -33
- data/contrib/zstd/lib/compress/huf_compress.c +15 -9
- data/contrib/zstd/lib/compress/zstd_compress.c +2096 -2363
- data/contrib/zstd/lib/compress/zstd_compress_internal.h +462 -0
- data/contrib/zstd/lib/compress/zstd_double_fast.c +309 -0
- data/contrib/zstd/lib/compress/zstd_double_fast.h +29 -0
- data/contrib/zstd/lib/compress/zstd_fast.c +243 -0
- data/contrib/zstd/lib/compress/zstd_fast.h +31 -0
- data/contrib/zstd/lib/compress/zstd_lazy.c +765 -0
- data/contrib/zstd/lib/compress/zstd_lazy.h +39 -0
- data/contrib/zstd/lib/compress/zstd_ldm.c +707 -0
- data/contrib/zstd/lib/compress/zstd_ldm.h +68 -0
- data/contrib/zstd/lib/compress/zstd_opt.c +785 -0
- data/contrib/zstd/lib/compress/zstd_opt.h +19 -908
- data/contrib/zstd/lib/compress/zstdmt_compress.c +737 -327
- data/contrib/zstd/lib/compress/zstdmt_compress.h +88 -26
- data/contrib/zstd/lib/decompress/huf_decompress.c +158 -50
- data/contrib/zstd/lib/decompress/zstd_decompress.c +884 -699
- data/contrib/zstd/lib/deprecated/zbuff.h +5 -4
- data/contrib/zstd/lib/deprecated/zbuff_common.c +5 -5
- data/contrib/zstd/lib/deprecated/zbuff_compress.c +6 -4
- data/contrib/zstd/lib/deprecated/zbuff_decompress.c +5 -4
- data/contrib/zstd/lib/dictBuilder/cover.c +93 -77
- data/contrib/zstd/lib/dictBuilder/zdict.c +107 -92
- data/contrib/zstd/lib/dictBuilder/zdict.h +112 -102
- data/contrib/zstd/lib/legacy/zstd_legacy.h +9 -4
- data/contrib/zstd/lib/legacy/zstd_v01.c +7 -6
- data/contrib/zstd/lib/legacy/zstd_v01.h +5 -4
- data/contrib/zstd/lib/legacy/zstd_v02.c +27 -99
- data/contrib/zstd/lib/legacy/zstd_v02.h +5 -4
- data/contrib/zstd/lib/legacy/zstd_v03.c +26 -98
- data/contrib/zstd/lib/legacy/zstd_v03.h +5 -4
- data/contrib/zstd/lib/legacy/zstd_v04.c +22 -91
- data/contrib/zstd/lib/legacy/zstd_v04.h +5 -4
- data/contrib/zstd/lib/legacy/zstd_v05.c +23 -99
- data/contrib/zstd/lib/legacy/zstd_v05.h +5 -4
- data/contrib/zstd/lib/legacy/zstd_v06.c +22 -96
- data/contrib/zstd/lib/legacy/zstd_v06.h +5 -4
- data/contrib/zstd/lib/legacy/zstd_v07.c +19 -95
- data/contrib/zstd/lib/legacy/zstd_v07.h +5 -4
- data/contrib/zstd/lib/zstd.h +895 -271
- data/ext/extconf.rb +11 -2
- data/ext/extzstd.c +45 -128
- data/ext/extzstd.h +74 -31
- data/ext/extzstd_stream.c +401 -142
- data/ext/zstd_common.c +5 -0
- data/ext/zstd_compress.c +8 -0
- data/ext/zstd_decompress.c +1 -0
- data/ext/zstd_dictbuilder.c +2 -0
- data/lib/extzstd/version.rb +1 -1
- data/lib/extzstd.rb +48 -1
- data/test/test_basic.rb +9 -1
- metadata +17 -7
- data/HISTORY.ja +0 -10
- data/contrib/zstd/LICENSE-examples +0 -11
- data/contrib/zstd/PATENTS +0 -33
@@ -1,10 +1,11 @@
|
|
1
|
-
|
1
|
+
/*
|
2
2
|
* Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
|
3
3
|
* All rights reserved.
|
4
4
|
*
|
5
|
-
* This source code is licensed under the BSD-style license found in the
|
6
|
-
* LICENSE file in the root directory of this source tree
|
7
|
-
*
|
5
|
+
* This source code is licensed under both the BSD-style license (found in the
|
6
|
+
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
7
|
+
* in the COPYING file in the root directory of this source tree).
|
8
|
+
* You may select, at your option, one of the above-listed licenses.
|
8
9
|
*/
|
9
10
|
|
10
11
|
/* ***************************************************************
|
@@ -1,10 +1,11 @@
|
|
1
|
-
|
1
|
+
/*
|
2
2
|
* Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
|
3
3
|
* All rights reserved.
|
4
4
|
*
|
5
|
-
* This source code is licensed under the BSD-style license found in the
|
6
|
-
* LICENSE file in the root directory of this source tree
|
7
|
-
*
|
5
|
+
* This source code is licensed under both the BSD-style license (found in the
|
6
|
+
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
7
|
+
* in the COPYING file in the root directory of this source tree).
|
8
|
+
* You may select, at your option, one of the above-listed licenses.
|
8
9
|
*/
|
9
10
|
|
10
11
|
/*-*************************************
|
@@ -23,4 +24,3 @@ unsigned ZBUFF_isError(size_t errorCode) { return ERR_isError(errorCode); }
|
|
23
24
|
/*! ZBUFF_getErrorName() :
|
24
25
|
* provides error code string from function result (useful for debugging) */
|
25
26
|
const char* ZBUFF_getErrorName(size_t errorCode) { return ERR_getErrorName(errorCode); }
|
26
|
-
|
@@ -1,10 +1,11 @@
|
|
1
|
-
|
1
|
+
/*
|
2
2
|
* Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
|
3
3
|
* All rights reserved.
|
4
4
|
*
|
5
|
-
* This source code is licensed under the BSD-style license found in the
|
6
|
-
* LICENSE file in the root directory of this source tree
|
7
|
-
*
|
5
|
+
* This source code is licensed under both the BSD-style license (found in the
|
6
|
+
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
7
|
+
* in the COPYING file in the root directory of this source tree).
|
8
|
+
* You may select, at your option, one of the above-listed licenses.
|
8
9
|
*/
|
9
10
|
|
10
11
|
|
@@ -71,6 +72,7 @@ size_t ZBUFF_compressInit_advanced(ZBUFF_CCtx* zbc,
|
|
71
72
|
const void* dict, size_t dictSize,
|
72
73
|
ZSTD_parameters params, unsigned long long pledgedSrcSize)
|
73
74
|
{
|
75
|
+
if (pledgedSrcSize==0) pledgedSrcSize = ZSTD_CONTENTSIZE_UNKNOWN; /* preserve "0 == unknown" behavior */
|
74
76
|
return ZSTD_initCStream_advanced(zbc, dict, dictSize, params, pledgedSrcSize);
|
75
77
|
}
|
76
78
|
|
@@ -1,10 +1,11 @@
|
|
1
|
-
|
1
|
+
/*
|
2
2
|
* Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
|
3
3
|
* All rights reserved.
|
4
4
|
*
|
5
|
-
* This source code is licensed under the BSD-style license found in the
|
6
|
-
* LICENSE file in the root directory of this source tree
|
7
|
-
*
|
5
|
+
* This source code is licensed under both the BSD-style license (found in the
|
6
|
+
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
7
|
+
* in the COPYING file in the root directory of this source tree).
|
8
|
+
* You may select, at your option, one of the above-listed licenses.
|
8
9
|
*/
|
9
10
|
|
10
11
|
|
@@ -1,10 +1,11 @@
|
|
1
|
-
|
1
|
+
/*
|
2
2
|
* Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
|
3
3
|
* All rights reserved.
|
4
4
|
*
|
5
|
-
* This source code is licensed under the BSD-style license found in the
|
6
|
-
* LICENSE file in the root directory of this source tree
|
7
|
-
*
|
5
|
+
* This source code is licensed under both the BSD-style license (found in the
|
6
|
+
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
7
|
+
* in the COPYING file in the root directory of this source tree).
|
8
|
+
* You may select, at your option, one of the above-listed licenses.
|
8
9
|
*/
|
9
10
|
|
10
11
|
/* *****************************************************************************
|
@@ -59,8 +60,6 @@ static int g_displayLevel = 2;
|
|
59
60
|
if ((clock() - g_time > refreshRate) || (displayLevel >= 4)) { \
|
60
61
|
g_time = clock(); \
|
61
62
|
DISPLAY(__VA_ARGS__); \
|
62
|
-
if (displayLevel >= 4) \
|
63
|
-
fflush(stdout); \
|
64
63
|
} \
|
65
64
|
}
|
66
65
|
#define DISPLAYUPDATE(l, ...) LOCALDISPLAYUPDATE(g_displayLevel, l, __VA_ARGS__)
|
@@ -236,10 +235,22 @@ static size_t COVER_sum(const size_t *samplesSizes, unsigned nbSamples) {
|
|
236
235
|
* Returns 1 if the dmer at lp is greater than the dmer at rp.
|
237
236
|
*/
|
238
237
|
static int COVER_cmp(COVER_ctx_t *ctx, const void *lp, const void *rp) {
|
239
|
-
const
|
240
|
-
const
|
238
|
+
U32 const lhs = *(U32 const *)lp;
|
239
|
+
U32 const rhs = *(U32 const *)rp;
|
241
240
|
return memcmp(ctx->samples + lhs, ctx->samples + rhs, ctx->d);
|
242
241
|
}
|
242
|
+
/**
|
243
|
+
* Faster version for d <= 8.
|
244
|
+
*/
|
245
|
+
static int COVER_cmp8(COVER_ctx_t *ctx, const void *lp, const void *rp) {
|
246
|
+
U64 const mask = (ctx->d == 8) ? (U64)-1 : (((U64)1 << (8 * ctx->d)) - 1);
|
247
|
+
U64 const lhs = MEM_readLE64(ctx->samples + *(U32 const *)lp) & mask;
|
248
|
+
U64 const rhs = MEM_readLE64(ctx->samples + *(U32 const *)rp) & mask;
|
249
|
+
if (lhs < rhs) {
|
250
|
+
return -1;
|
251
|
+
}
|
252
|
+
return (lhs > rhs);
|
253
|
+
}
|
243
254
|
|
244
255
|
/**
|
245
256
|
* Same as COVER_cmp() except ties are broken by pointer value
|
@@ -253,6 +264,16 @@ static int COVER_strict_cmp(const void *lp, const void *rp) {
|
|
253
264
|
}
|
254
265
|
return result;
|
255
266
|
}
|
267
|
+
/**
|
268
|
+
* Faster version for d <= 8.
|
269
|
+
*/
|
270
|
+
static int COVER_strict_cmp8(const void *lp, const void *rp) {
|
271
|
+
int result = COVER_cmp8(g_ctx, lp, rp);
|
272
|
+
if (result == 0) {
|
273
|
+
result = lp < rp ? -1 : 1;
|
274
|
+
}
|
275
|
+
return result;
|
276
|
+
}
|
256
277
|
|
257
278
|
/**
|
258
279
|
* Returns the first pointer in [first, last) whose element does not compare
|
@@ -362,7 +383,7 @@ static void COVER_group(COVER_ctx_t *ctx, const void *group,
|
|
362
383
|
typedef struct {
|
363
384
|
U32 begin;
|
364
385
|
U32 end;
|
365
|
-
|
386
|
+
U32 score;
|
366
387
|
} COVER_segment_t;
|
367
388
|
|
368
389
|
/**
|
@@ -378,7 +399,8 @@ typedef struct {
|
|
378
399
|
*/
|
379
400
|
static COVER_segment_t COVER_selectSegment(const COVER_ctx_t *ctx, U32 *freqs,
|
380
401
|
COVER_map_t *activeDmers, U32 begin,
|
381
|
-
U32 end,
|
402
|
+
U32 end,
|
403
|
+
ZDICT_cover_params_t parameters) {
|
382
404
|
/* Constants */
|
383
405
|
const U32 k = parameters.k;
|
384
406
|
const U32 d = parameters.d;
|
@@ -458,11 +480,16 @@ static COVER_segment_t COVER_selectSegment(const COVER_ctx_t *ctx, U32 *freqs,
|
|
458
480
|
* Check the validity of the parameters.
|
459
481
|
* Returns non-zero if the parameters are valid and 0 otherwise.
|
460
482
|
*/
|
461
|
-
static int COVER_checkParameters(
|
483
|
+
static int COVER_checkParameters(ZDICT_cover_params_t parameters,
|
484
|
+
size_t maxDictSize) {
|
462
485
|
/* k and d are required parameters */
|
463
486
|
if (parameters.d == 0 || parameters.k == 0) {
|
464
487
|
return 0;
|
465
488
|
}
|
489
|
+
/* k <= maxDictSize */
|
490
|
+
if (parameters.k > maxDictSize) {
|
491
|
+
return 0;
|
492
|
+
}
|
466
493
|
/* d <= k */
|
467
494
|
if (parameters.d > parameters.k) {
|
468
495
|
return 0;
|
@@ -508,7 +535,7 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
|
|
508
535
|
const BYTE *const samples = (const BYTE *)samplesBuffer;
|
509
536
|
const size_t totalSamplesSize = COVER_sum(samplesSizes, nbSamples);
|
510
537
|
/* Checks */
|
511
|
-
if (totalSamplesSize < d ||
|
538
|
+
if (totalSamplesSize < MAX(d, sizeof(U64)) ||
|
512
539
|
totalSamplesSize >= (size_t)COVER_MAX_SAMPLES_SIZE) {
|
513
540
|
DISPLAYLEVEL(1, "Total samples size is too large, maximum size is %u MB\n",
|
514
541
|
(COVER_MAX_SAMPLES_SIZE >> 20));
|
@@ -522,7 +549,7 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
|
|
522
549
|
ctx->samplesSizes = samplesSizes;
|
523
550
|
ctx->nbSamples = nbSamples;
|
524
551
|
/* Partial suffix array */
|
525
|
-
ctx->suffixSize = totalSamplesSize - d + 1;
|
552
|
+
ctx->suffixSize = totalSamplesSize - MAX(d, sizeof(U64)) + 1;
|
526
553
|
ctx->suffix = (U32 *)malloc(ctx->suffixSize * sizeof(U32));
|
527
554
|
/* Maps index to the dmerID */
|
528
555
|
ctx->dmerAt = (U32 *)malloc(ctx->suffixSize * sizeof(U32));
|
@@ -556,7 +583,8 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
|
|
556
583
|
}
|
557
584
|
/* qsort doesn't take an opaque pointer, so pass as a global */
|
558
585
|
g_ctx = ctx;
|
559
|
-
qsort(ctx->suffix, ctx->suffixSize, sizeof(U32),
|
586
|
+
qsort(ctx->suffix, ctx->suffixSize, sizeof(U32),
|
587
|
+
(ctx->d <= 8 ? &COVER_strict_cmp8 : &COVER_strict_cmp));
|
560
588
|
}
|
561
589
|
DISPLAYLEVEL(2, "Computing frequencies\n");
|
562
590
|
/* For each dmer group (group of positions with the same first d bytes):
|
@@ -566,8 +594,8 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
|
|
566
594
|
* 2. We calculate how many samples the dmer occurs in and save it in
|
567
595
|
* freqs[dmerId].
|
568
596
|
*/
|
569
|
-
COVER_groupBy(ctx->suffix, ctx->suffixSize, sizeof(U32), ctx,
|
570
|
-
&COVER_group);
|
597
|
+
COVER_groupBy(ctx->suffix, ctx->suffixSize, sizeof(U32), ctx,
|
598
|
+
(ctx->d <= 8 ? &COVER_cmp8 : &COVER_cmp), &COVER_group);
|
571
599
|
ctx->freqs = ctx->suffix;
|
572
600
|
ctx->suffix = NULL;
|
573
601
|
return 1;
|
@@ -579,7 +607,7 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
|
|
579
607
|
static size_t COVER_buildDictionary(const COVER_ctx_t *ctx, U32 *freqs,
|
580
608
|
COVER_map_t *activeDmers, void *dictBuffer,
|
581
609
|
size_t dictBufferCapacity,
|
582
|
-
|
610
|
+
ZDICT_cover_params_t parameters) {
|
583
611
|
BYTE *const dict = (BYTE *)dictBuffer;
|
584
612
|
size_t tail = dictBufferCapacity;
|
585
613
|
/* Divide the data up into epochs of equal size.
|
@@ -600,9 +628,13 @@ static size_t COVER_buildDictionary(const COVER_ctx_t *ctx, U32 *freqs,
|
|
600
628
|
/* Select a segment */
|
601
629
|
COVER_segment_t segment = COVER_selectSegment(
|
602
630
|
ctx, freqs, activeDmers, epochBegin, epochEnd, parameters);
|
603
|
-
/*
|
631
|
+
/* If the segment covers no dmers, then we are out of content */
|
632
|
+
if (segment.score == 0) {
|
633
|
+
break;
|
634
|
+
}
|
635
|
+
/* Trim the segment if necessary and if it is too small then we are done */
|
604
636
|
segmentSize = MIN(segment.end - segment.begin + parameters.d - 1, tail);
|
605
|
-
if (segmentSize
|
637
|
+
if (segmentSize < parameters.d) {
|
606
638
|
break;
|
607
639
|
}
|
608
640
|
/* We fill the dictionary from the back to allow the best segments to be
|
@@ -618,27 +650,15 @@ static size_t COVER_buildDictionary(const COVER_ctx_t *ctx, U32 *freqs,
|
|
618
650
|
return tail;
|
619
651
|
}
|
620
652
|
|
621
|
-
|
622
|
-
* Translate from COVER_params_t to ZDICT_params_t required for finalizing the
|
623
|
-
* dictionary.
|
624
|
-
*/
|
625
|
-
static ZDICT_params_t COVER_translateParams(COVER_params_t parameters) {
|
626
|
-
ZDICT_params_t zdictParams;
|
627
|
-
memset(&zdictParams, 0, sizeof(zdictParams));
|
628
|
-
zdictParams.notificationLevel = 1;
|
629
|
-
zdictParams.dictID = parameters.dictID;
|
630
|
-
zdictParams.compressionLevel = parameters.compressionLevel;
|
631
|
-
return zdictParams;
|
632
|
-
}
|
633
|
-
|
634
|
-
ZDICTLIB_API size_t COVER_trainFromBuffer(
|
653
|
+
ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
|
635
654
|
void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer,
|
636
|
-
const size_t *samplesSizes, unsigned nbSamples,
|
655
|
+
const size_t *samplesSizes, unsigned nbSamples,
|
656
|
+
ZDICT_cover_params_t parameters) {
|
637
657
|
BYTE *const dict = (BYTE *)dictBuffer;
|
638
658
|
COVER_ctx_t ctx;
|
639
659
|
COVER_map_t activeDmers;
|
640
660
|
/* Checks */
|
641
|
-
if (!COVER_checkParameters(parameters)) {
|
661
|
+
if (!COVER_checkParameters(parameters, dictBufferCapacity)) {
|
642
662
|
DISPLAYLEVEL(1, "Cover parameters incorrect\n");
|
643
663
|
return ERROR(GENERIC);
|
644
664
|
}
|
@@ -652,7 +672,7 @@ ZDICTLIB_API size_t COVER_trainFromBuffer(
|
|
652
672
|
return ERROR(dstSize_tooSmall);
|
653
673
|
}
|
654
674
|
/* Initialize global data */
|
655
|
-
g_displayLevel = parameters.notificationLevel;
|
675
|
+
g_displayLevel = parameters.zParams.notificationLevel;
|
656
676
|
/* Initialize context and activeDmers */
|
657
677
|
if (!COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples,
|
658
678
|
parameters.d)) {
|
@@ -669,10 +689,9 @@ ZDICTLIB_API size_t COVER_trainFromBuffer(
|
|
669
689
|
const size_t tail =
|
670
690
|
COVER_buildDictionary(&ctx, ctx.freqs, &activeDmers, dictBuffer,
|
671
691
|
dictBufferCapacity, parameters);
|
672
|
-
ZDICT_params_t zdictParams = COVER_translateParams(parameters);
|
673
692
|
const size_t dictionarySize = ZDICT_finalizeDictionary(
|
674
693
|
dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail,
|
675
|
-
samplesBuffer, samplesSizes, nbSamples,
|
694
|
+
samplesBuffer, samplesSizes, nbSamples, parameters.zParams);
|
676
695
|
if (!ZSTD_isError(dictionarySize)) {
|
677
696
|
DISPLAYLEVEL(2, "Constructed dictionary of size %u\n",
|
678
697
|
(U32)dictionarySize);
|
@@ -692,12 +711,12 @@ ZDICTLIB_API size_t COVER_trainFromBuffer(
|
|
692
711
|
* compiled with multithreaded support.
|
693
712
|
*/
|
694
713
|
typedef struct COVER_best_s {
|
695
|
-
|
696
|
-
|
714
|
+
ZSTD_pthread_mutex_t mutex;
|
715
|
+
ZSTD_pthread_cond_t cond;
|
697
716
|
size_t liveJobs;
|
698
717
|
void *dict;
|
699
718
|
size_t dictSize;
|
700
|
-
|
719
|
+
ZDICT_cover_params_t parameters;
|
701
720
|
size_t compressedSize;
|
702
721
|
} COVER_best_t;
|
703
722
|
|
@@ -705,11 +724,9 @@ typedef struct COVER_best_s {
|
|
705
724
|
* Initialize the `COVER_best_t`.
|
706
725
|
*/
|
707
726
|
static void COVER_best_init(COVER_best_t *best) {
|
708
|
-
if (
|
709
|
-
|
710
|
-
|
711
|
-
pthread_mutex_init(&best->mutex, NULL);
|
712
|
-
pthread_cond_init(&best->cond, NULL);
|
727
|
+
if (best==NULL) return; /* compatible with init on NULL */
|
728
|
+
(void)ZSTD_pthread_mutex_init(&best->mutex, NULL);
|
729
|
+
(void)ZSTD_pthread_cond_init(&best->cond, NULL);
|
713
730
|
best->liveJobs = 0;
|
714
731
|
best->dict = NULL;
|
715
732
|
best->dictSize = 0;
|
@@ -724,11 +741,11 @@ static void COVER_best_wait(COVER_best_t *best) {
|
|
724
741
|
if (!best) {
|
725
742
|
return;
|
726
743
|
}
|
727
|
-
|
744
|
+
ZSTD_pthread_mutex_lock(&best->mutex);
|
728
745
|
while (best->liveJobs != 0) {
|
729
|
-
|
746
|
+
ZSTD_pthread_cond_wait(&best->cond, &best->mutex);
|
730
747
|
}
|
731
|
-
|
748
|
+
ZSTD_pthread_mutex_unlock(&best->mutex);
|
732
749
|
}
|
733
750
|
|
734
751
|
/**
|
@@ -742,8 +759,8 @@ static void COVER_best_destroy(COVER_best_t *best) {
|
|
742
759
|
if (best->dict) {
|
743
760
|
free(best->dict);
|
744
761
|
}
|
745
|
-
|
746
|
-
|
762
|
+
ZSTD_pthread_mutex_destroy(&best->mutex);
|
763
|
+
ZSTD_pthread_cond_destroy(&best->cond);
|
747
764
|
}
|
748
765
|
|
749
766
|
/**
|
@@ -754,9 +771,9 @@ static void COVER_best_start(COVER_best_t *best) {
|
|
754
771
|
if (!best) {
|
755
772
|
return;
|
756
773
|
}
|
757
|
-
|
774
|
+
ZSTD_pthread_mutex_lock(&best->mutex);
|
758
775
|
++best->liveJobs;
|
759
|
-
|
776
|
+
ZSTD_pthread_mutex_unlock(&best->mutex);
|
760
777
|
}
|
761
778
|
|
762
779
|
/**
|
@@ -765,14 +782,14 @@ static void COVER_best_start(COVER_best_t *best) {
|
|
765
782
|
* If this dictionary is the best so far save it and its parameters.
|
766
783
|
*/
|
767
784
|
static void COVER_best_finish(COVER_best_t *best, size_t compressedSize,
|
768
|
-
|
785
|
+
ZDICT_cover_params_t parameters, void *dict,
|
769
786
|
size_t dictSize) {
|
770
787
|
if (!best) {
|
771
788
|
return;
|
772
789
|
}
|
773
790
|
{
|
774
791
|
size_t liveJobs;
|
775
|
-
|
792
|
+
ZSTD_pthread_mutex_lock(&best->mutex);
|
776
793
|
--best->liveJobs;
|
777
794
|
liveJobs = best->liveJobs;
|
778
795
|
/* If the new dictionary is better */
|
@@ -795,9 +812,9 @@ static void COVER_best_finish(COVER_best_t *best, size_t compressedSize,
|
|
795
812
|
best->parameters = parameters;
|
796
813
|
best->compressedSize = compressedSize;
|
797
814
|
}
|
798
|
-
|
815
|
+
ZSTD_pthread_mutex_unlock(&best->mutex);
|
799
816
|
if (liveJobs == 0) {
|
800
|
-
|
817
|
+
ZSTD_pthread_cond_broadcast(&best->cond);
|
801
818
|
}
|
802
819
|
}
|
803
820
|
}
|
@@ -809,7 +826,7 @@ typedef struct COVER_tryParameters_data_s {
|
|
809
826
|
const COVER_ctx_t *ctx;
|
810
827
|
COVER_best_t *best;
|
811
828
|
size_t dictBufferCapacity;
|
812
|
-
|
829
|
+
ZDICT_cover_params_t parameters;
|
813
830
|
} COVER_tryParameters_data_t;
|
814
831
|
|
815
832
|
/**
|
@@ -821,7 +838,7 @@ static void COVER_tryParameters(void *opaque) {
|
|
821
838
|
/* Save parameters as local variables */
|
822
839
|
COVER_tryParameters_data_t *const data = (COVER_tryParameters_data_t *)opaque;
|
823
840
|
const COVER_ctx_t *const ctx = data->ctx;
|
824
|
-
const
|
841
|
+
const ZDICT_cover_params_t parameters = data->parameters;
|
825
842
|
size_t dictBufferCapacity = data->dictBufferCapacity;
|
826
843
|
size_t totalCompressedSize = ERROR(GENERIC);
|
827
844
|
/* Allocate space for hash table, dict, and freqs */
|
@@ -842,10 +859,10 @@ static void COVER_tryParameters(void *opaque) {
|
|
842
859
|
{
|
843
860
|
const size_t tail = COVER_buildDictionary(ctx, freqs, &activeDmers, dict,
|
844
861
|
dictBufferCapacity, parameters);
|
845
|
-
const ZDICT_params_t zdictParams = COVER_translateParams(parameters);
|
846
862
|
dictBufferCapacity = ZDICT_finalizeDictionary(
|
847
863
|
dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail,
|
848
|
-
ctx->samples, ctx->samplesSizes, (unsigned)ctx->nbSamples,
|
864
|
+
ctx->samples, ctx->samplesSizes, (unsigned)ctx->nbSamples,
|
865
|
+
parameters.zParams);
|
849
866
|
if (ZDICT_isError(dictBufferCapacity)) {
|
850
867
|
DISPLAYLEVEL(1, "Failed to finalize dictionary\n");
|
851
868
|
goto _cleanup;
|
@@ -871,13 +888,13 @@ static void COVER_tryParameters(void *opaque) {
|
|
871
888
|
}
|
872
889
|
/* Create the cctx and cdict */
|
873
890
|
cctx = ZSTD_createCCtx();
|
874
|
-
cdict =
|
875
|
-
|
891
|
+
cdict = ZSTD_createCDict(dict, dictBufferCapacity,
|
892
|
+
parameters.zParams.compressionLevel);
|
876
893
|
if (!dst || !cctx || !cdict) {
|
877
894
|
goto _compressCleanup;
|
878
895
|
}
|
879
896
|
/* Compress each sample and sum their sizes (or error) */
|
880
|
-
totalCompressedSize =
|
897
|
+
totalCompressedSize = dictBufferCapacity;
|
881
898
|
for (i = 0; i < ctx->nbSamples; ++i) {
|
882
899
|
const size_t size = ZSTD_compress_usingCDict(
|
883
900
|
cctx, dst, dstCapacity, ctx->samples + ctx->offsets[i],
|
@@ -909,24 +926,22 @@ _cleanup:
|
|
909
926
|
}
|
910
927
|
}
|
911
928
|
|
912
|
-
ZDICTLIB_API size_t
|
913
|
-
|
914
|
-
|
915
|
-
|
916
|
-
unsigned nbSamples,
|
917
|
-
COVER_params_t *parameters) {
|
929
|
+
ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
|
930
|
+
void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer,
|
931
|
+
const size_t *samplesSizes, unsigned nbSamples,
|
932
|
+
ZDICT_cover_params_t *parameters) {
|
918
933
|
/* constants */
|
919
934
|
const unsigned nbThreads = parameters->nbThreads;
|
920
935
|
const unsigned kMinD = parameters->d == 0 ? 6 : parameters->d;
|
921
|
-
const unsigned kMaxD = parameters->d == 0 ?
|
922
|
-
const unsigned kMinK = parameters->k == 0 ?
|
923
|
-
const unsigned kMaxK = parameters->k == 0 ?
|
924
|
-
const unsigned kSteps = parameters->steps == 0 ?
|
936
|
+
const unsigned kMaxD = parameters->d == 0 ? 8 : parameters->d;
|
937
|
+
const unsigned kMinK = parameters->k == 0 ? 50 : parameters->k;
|
938
|
+
const unsigned kMaxK = parameters->k == 0 ? 2000 : parameters->k;
|
939
|
+
const unsigned kSteps = parameters->steps == 0 ? 40 : parameters->steps;
|
925
940
|
const unsigned kStepSize = MAX((kMaxK - kMinK) / kSteps, 1);
|
926
941
|
const unsigned kIterations =
|
927
942
|
(1 + (kMaxD - kMinD) / 2) * (1 + (kMaxK - kMinK) / kStepSize);
|
928
943
|
/* Local variables */
|
929
|
-
const int displayLevel = parameters->notificationLevel;
|
944
|
+
const int displayLevel = parameters->zParams.notificationLevel;
|
930
945
|
unsigned iteration = 1;
|
931
946
|
unsigned d;
|
932
947
|
unsigned k;
|
@@ -955,7 +970,7 @@ ZDICTLIB_API size_t COVER_optimizeTrainFromBuffer(void *dictBuffer,
|
|
955
970
|
/* Initialization */
|
956
971
|
COVER_best_init(&best);
|
957
972
|
/* Turn down global display level to clean up display at level 2 and below */
|
958
|
-
g_displayLevel =
|
973
|
+
g_displayLevel = displayLevel == 0 ? 0 : displayLevel - 1;
|
959
974
|
/* Loop through d first because each new value needs a new context */
|
960
975
|
LOCALDISPLAYLEVEL(displayLevel, 2, "Trying %u different sets of parameters\n",
|
961
976
|
kIterations);
|
@@ -989,8 +1004,9 @@ ZDICTLIB_API size_t COVER_optimizeTrainFromBuffer(void *dictBuffer,
|
|
989
1004
|
data->parameters.k = k;
|
990
1005
|
data->parameters.d = d;
|
991
1006
|
data->parameters.steps = kSteps;
|
1007
|
+
data->parameters.zParams.notificationLevel = g_displayLevel;
|
992
1008
|
/* Check the parameters */
|
993
|
-
if (!COVER_checkParameters(data->parameters)) {
|
1009
|
+
if (!COVER_checkParameters(data->parameters, dictBufferCapacity)) {
|
994
1010
|
DISPLAYLEVEL(1, "Cover parameters incorrect\n");
|
995
1011
|
free(data);
|
996
1012
|
continue;
|