extzstd 0.1.1 → 0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/HISTORY.ja.md +18 -0
- data/README.md +15 -50
- data/contrib/zstd/CONTRIBUTING.md +1 -1
- data/contrib/zstd/COPYING +339 -0
- data/contrib/zstd/Makefile +82 -51
- data/contrib/zstd/NEWS +92 -5
- data/contrib/zstd/README.md +50 -41
- data/contrib/zstd/appveyor.yml +164 -102
- data/contrib/zstd/circle.yml +10 -22
- data/contrib/zstd/lib/BUCK +31 -10
- data/contrib/zstd/lib/Makefile +57 -31
- data/contrib/zstd/lib/README.md +68 -37
- data/contrib/zstd/lib/common/bitstream.h +130 -76
- data/contrib/zstd/lib/common/compiler.h +86 -0
- data/contrib/zstd/lib/common/error_private.c +15 -11
- data/contrib/zstd/lib/common/error_private.h +8 -8
- data/contrib/zstd/lib/common/fse.h +19 -9
- data/contrib/zstd/lib/common/fse_decompress.c +3 -22
- data/contrib/zstd/lib/common/huf.h +68 -26
- data/contrib/zstd/lib/common/mem.h +23 -35
- data/contrib/zstd/lib/common/pool.c +123 -63
- data/contrib/zstd/lib/common/pool.h +19 -10
- data/contrib/zstd/lib/common/threading.c +11 -16
- data/contrib/zstd/lib/common/threading.h +52 -33
- data/contrib/zstd/lib/common/xxhash.c +28 -22
- data/contrib/zstd/lib/common/zstd_common.c +40 -27
- data/contrib/zstd/lib/common/zstd_errors.h +43 -34
- data/contrib/zstd/lib/common/zstd_internal.h +131 -123
- data/contrib/zstd/lib/compress/fse_compress.c +17 -33
- data/contrib/zstd/lib/compress/huf_compress.c +15 -9
- data/contrib/zstd/lib/compress/zstd_compress.c +2096 -2363
- data/contrib/zstd/lib/compress/zstd_compress_internal.h +462 -0
- data/contrib/zstd/lib/compress/zstd_double_fast.c +309 -0
- data/contrib/zstd/lib/compress/zstd_double_fast.h +29 -0
- data/contrib/zstd/lib/compress/zstd_fast.c +243 -0
- data/contrib/zstd/lib/compress/zstd_fast.h +31 -0
- data/contrib/zstd/lib/compress/zstd_lazy.c +765 -0
- data/contrib/zstd/lib/compress/zstd_lazy.h +39 -0
- data/contrib/zstd/lib/compress/zstd_ldm.c +707 -0
- data/contrib/zstd/lib/compress/zstd_ldm.h +68 -0
- data/contrib/zstd/lib/compress/zstd_opt.c +785 -0
- data/contrib/zstd/lib/compress/zstd_opt.h +19 -908
- data/contrib/zstd/lib/compress/zstdmt_compress.c +737 -327
- data/contrib/zstd/lib/compress/zstdmt_compress.h +88 -26
- data/contrib/zstd/lib/decompress/huf_decompress.c +158 -50
- data/contrib/zstd/lib/decompress/zstd_decompress.c +884 -699
- data/contrib/zstd/lib/deprecated/zbuff.h +5 -4
- data/contrib/zstd/lib/deprecated/zbuff_common.c +5 -5
- data/contrib/zstd/lib/deprecated/zbuff_compress.c +6 -4
- data/contrib/zstd/lib/deprecated/zbuff_decompress.c +5 -4
- data/contrib/zstd/lib/dictBuilder/cover.c +93 -77
- data/contrib/zstd/lib/dictBuilder/zdict.c +107 -92
- data/contrib/zstd/lib/dictBuilder/zdict.h +112 -102
- data/contrib/zstd/lib/legacy/zstd_legacy.h +9 -4
- data/contrib/zstd/lib/legacy/zstd_v01.c +7 -6
- data/contrib/zstd/lib/legacy/zstd_v01.h +5 -4
- data/contrib/zstd/lib/legacy/zstd_v02.c +27 -99
- data/contrib/zstd/lib/legacy/zstd_v02.h +5 -4
- data/contrib/zstd/lib/legacy/zstd_v03.c +26 -98
- data/contrib/zstd/lib/legacy/zstd_v03.h +5 -4
- data/contrib/zstd/lib/legacy/zstd_v04.c +22 -91
- data/contrib/zstd/lib/legacy/zstd_v04.h +5 -4
- data/contrib/zstd/lib/legacy/zstd_v05.c +23 -99
- data/contrib/zstd/lib/legacy/zstd_v05.h +5 -4
- data/contrib/zstd/lib/legacy/zstd_v06.c +22 -96
- data/contrib/zstd/lib/legacy/zstd_v06.h +5 -4
- data/contrib/zstd/lib/legacy/zstd_v07.c +19 -95
- data/contrib/zstd/lib/legacy/zstd_v07.h +5 -4
- data/contrib/zstd/lib/zstd.h +895 -271
- data/ext/extconf.rb +11 -2
- data/ext/extzstd.c +45 -128
- data/ext/extzstd.h +74 -31
- data/ext/extzstd_stream.c +401 -142
- data/ext/zstd_common.c +5 -0
- data/ext/zstd_compress.c +8 -0
- data/ext/zstd_decompress.c +1 -0
- data/ext/zstd_dictbuilder.c +2 -0
- data/lib/extzstd/version.rb +1 -1
- data/lib/extzstd.rb +48 -1
- data/test/test_basic.rb +9 -1
- metadata +17 -7
- data/HISTORY.ja +0 -10
- data/contrib/zstd/LICENSE-examples +0 -11
- data/contrib/zstd/PATENTS +0 -33
|
@@ -1,10 +1,11 @@
|
|
|
1
|
-
|
|
1
|
+
/*
|
|
2
2
|
* Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
|
|
3
3
|
* All rights reserved.
|
|
4
4
|
*
|
|
5
|
-
* This source code is licensed under the BSD-style license found in the
|
|
6
|
-
* LICENSE file in the root directory of this source tree
|
|
7
|
-
*
|
|
5
|
+
* This source code is licensed under both the BSD-style license (found in the
|
|
6
|
+
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
|
7
|
+
* in the COPYING file in the root directory of this source tree).
|
|
8
|
+
* You may select, at your option, one of the above-listed licenses.
|
|
8
9
|
*/
|
|
9
10
|
|
|
10
11
|
/* ***************************************************************
|
|
@@ -1,10 +1,11 @@
|
|
|
1
|
-
|
|
1
|
+
/*
|
|
2
2
|
* Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
|
|
3
3
|
* All rights reserved.
|
|
4
4
|
*
|
|
5
|
-
* This source code is licensed under the BSD-style license found in the
|
|
6
|
-
* LICENSE file in the root directory of this source tree
|
|
7
|
-
*
|
|
5
|
+
* This source code is licensed under both the BSD-style license (found in the
|
|
6
|
+
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
|
7
|
+
* in the COPYING file in the root directory of this source tree).
|
|
8
|
+
* You may select, at your option, one of the above-listed licenses.
|
|
8
9
|
*/
|
|
9
10
|
|
|
10
11
|
/*-*************************************
|
|
@@ -23,4 +24,3 @@ unsigned ZBUFF_isError(size_t errorCode) { return ERR_isError(errorCode); }
|
|
|
23
24
|
/*! ZBUFF_getErrorName() :
|
|
24
25
|
* provides error code string from function result (useful for debugging) */
|
|
25
26
|
const char* ZBUFF_getErrorName(size_t errorCode) { return ERR_getErrorName(errorCode); }
|
|
26
|
-
|
|
@@ -1,10 +1,11 @@
|
|
|
1
|
-
|
|
1
|
+
/*
|
|
2
2
|
* Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
|
|
3
3
|
* All rights reserved.
|
|
4
4
|
*
|
|
5
|
-
* This source code is licensed under the BSD-style license found in the
|
|
6
|
-
* LICENSE file in the root directory of this source tree
|
|
7
|
-
*
|
|
5
|
+
* This source code is licensed under both the BSD-style license (found in the
|
|
6
|
+
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
|
7
|
+
* in the COPYING file in the root directory of this source tree).
|
|
8
|
+
* You may select, at your option, one of the above-listed licenses.
|
|
8
9
|
*/
|
|
9
10
|
|
|
10
11
|
|
|
@@ -71,6 +72,7 @@ size_t ZBUFF_compressInit_advanced(ZBUFF_CCtx* zbc,
|
|
|
71
72
|
const void* dict, size_t dictSize,
|
|
72
73
|
ZSTD_parameters params, unsigned long long pledgedSrcSize)
|
|
73
74
|
{
|
|
75
|
+
if (pledgedSrcSize==0) pledgedSrcSize = ZSTD_CONTENTSIZE_UNKNOWN; /* preserve "0 == unknown" behavior */
|
|
74
76
|
return ZSTD_initCStream_advanced(zbc, dict, dictSize, params, pledgedSrcSize);
|
|
75
77
|
}
|
|
76
78
|
|
|
@@ -1,10 +1,11 @@
|
|
|
1
|
-
|
|
1
|
+
/*
|
|
2
2
|
* Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
|
|
3
3
|
* All rights reserved.
|
|
4
4
|
*
|
|
5
|
-
* This source code is licensed under the BSD-style license found in the
|
|
6
|
-
* LICENSE file in the root directory of this source tree
|
|
7
|
-
*
|
|
5
|
+
* This source code is licensed under both the BSD-style license (found in the
|
|
6
|
+
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
|
7
|
+
* in the COPYING file in the root directory of this source tree).
|
|
8
|
+
* You may select, at your option, one of the above-listed licenses.
|
|
8
9
|
*/
|
|
9
10
|
|
|
10
11
|
|
|
@@ -1,10 +1,11 @@
|
|
|
1
|
-
|
|
1
|
+
/*
|
|
2
2
|
* Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
|
|
3
3
|
* All rights reserved.
|
|
4
4
|
*
|
|
5
|
-
* This source code is licensed under the BSD-style license found in the
|
|
6
|
-
* LICENSE file in the root directory of this source tree
|
|
7
|
-
*
|
|
5
|
+
* This source code is licensed under both the BSD-style license (found in the
|
|
6
|
+
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
|
|
7
|
+
* in the COPYING file in the root directory of this source tree).
|
|
8
|
+
* You may select, at your option, one of the above-listed licenses.
|
|
8
9
|
*/
|
|
9
10
|
|
|
10
11
|
/* *****************************************************************************
|
|
@@ -59,8 +60,6 @@ static int g_displayLevel = 2;
|
|
|
59
60
|
if ((clock() - g_time > refreshRate) || (displayLevel >= 4)) { \
|
|
60
61
|
g_time = clock(); \
|
|
61
62
|
DISPLAY(__VA_ARGS__); \
|
|
62
|
-
if (displayLevel >= 4) \
|
|
63
|
-
fflush(stdout); \
|
|
64
63
|
} \
|
|
65
64
|
}
|
|
66
65
|
#define DISPLAYUPDATE(l, ...) LOCALDISPLAYUPDATE(g_displayLevel, l, __VA_ARGS__)
|
|
@@ -236,10 +235,22 @@ static size_t COVER_sum(const size_t *samplesSizes, unsigned nbSamples) {
|
|
|
236
235
|
* Returns 1 if the dmer at lp is greater than the dmer at rp.
|
|
237
236
|
*/
|
|
238
237
|
static int COVER_cmp(COVER_ctx_t *ctx, const void *lp, const void *rp) {
|
|
239
|
-
const
|
|
240
|
-
const
|
|
238
|
+
U32 const lhs = *(U32 const *)lp;
|
|
239
|
+
U32 const rhs = *(U32 const *)rp;
|
|
241
240
|
return memcmp(ctx->samples + lhs, ctx->samples + rhs, ctx->d);
|
|
242
241
|
}
|
|
242
|
+
/**
|
|
243
|
+
* Faster version for d <= 8.
|
|
244
|
+
*/
|
|
245
|
+
static int COVER_cmp8(COVER_ctx_t *ctx, const void *lp, const void *rp) {
|
|
246
|
+
U64 const mask = (ctx->d == 8) ? (U64)-1 : (((U64)1 << (8 * ctx->d)) - 1);
|
|
247
|
+
U64 const lhs = MEM_readLE64(ctx->samples + *(U32 const *)lp) & mask;
|
|
248
|
+
U64 const rhs = MEM_readLE64(ctx->samples + *(U32 const *)rp) & mask;
|
|
249
|
+
if (lhs < rhs) {
|
|
250
|
+
return -1;
|
|
251
|
+
}
|
|
252
|
+
return (lhs > rhs);
|
|
253
|
+
}
|
|
243
254
|
|
|
244
255
|
/**
|
|
245
256
|
* Same as COVER_cmp() except ties are broken by pointer value
|
|
@@ -253,6 +264,16 @@ static int COVER_strict_cmp(const void *lp, const void *rp) {
|
|
|
253
264
|
}
|
|
254
265
|
return result;
|
|
255
266
|
}
|
|
267
|
+
/**
|
|
268
|
+
* Faster version for d <= 8.
|
|
269
|
+
*/
|
|
270
|
+
static int COVER_strict_cmp8(const void *lp, const void *rp) {
|
|
271
|
+
int result = COVER_cmp8(g_ctx, lp, rp);
|
|
272
|
+
if (result == 0) {
|
|
273
|
+
result = lp < rp ? -1 : 1;
|
|
274
|
+
}
|
|
275
|
+
return result;
|
|
276
|
+
}
|
|
256
277
|
|
|
257
278
|
/**
|
|
258
279
|
* Returns the first pointer in [first, last) whose element does not compare
|
|
@@ -362,7 +383,7 @@ static void COVER_group(COVER_ctx_t *ctx, const void *group,
|
|
|
362
383
|
typedef struct {
|
|
363
384
|
U32 begin;
|
|
364
385
|
U32 end;
|
|
365
|
-
|
|
386
|
+
U32 score;
|
|
366
387
|
} COVER_segment_t;
|
|
367
388
|
|
|
368
389
|
/**
|
|
@@ -378,7 +399,8 @@ typedef struct {
|
|
|
378
399
|
*/
|
|
379
400
|
static COVER_segment_t COVER_selectSegment(const COVER_ctx_t *ctx, U32 *freqs,
|
|
380
401
|
COVER_map_t *activeDmers, U32 begin,
|
|
381
|
-
U32 end,
|
|
402
|
+
U32 end,
|
|
403
|
+
ZDICT_cover_params_t parameters) {
|
|
382
404
|
/* Constants */
|
|
383
405
|
const U32 k = parameters.k;
|
|
384
406
|
const U32 d = parameters.d;
|
|
@@ -458,11 +480,16 @@ static COVER_segment_t COVER_selectSegment(const COVER_ctx_t *ctx, U32 *freqs,
|
|
|
458
480
|
* Check the validity of the parameters.
|
|
459
481
|
* Returns non-zero if the parameters are valid and 0 otherwise.
|
|
460
482
|
*/
|
|
461
|
-
static int COVER_checkParameters(
|
|
483
|
+
static int COVER_checkParameters(ZDICT_cover_params_t parameters,
|
|
484
|
+
size_t maxDictSize) {
|
|
462
485
|
/* k and d are required parameters */
|
|
463
486
|
if (parameters.d == 0 || parameters.k == 0) {
|
|
464
487
|
return 0;
|
|
465
488
|
}
|
|
489
|
+
/* k <= maxDictSize */
|
|
490
|
+
if (parameters.k > maxDictSize) {
|
|
491
|
+
return 0;
|
|
492
|
+
}
|
|
466
493
|
/* d <= k */
|
|
467
494
|
if (parameters.d > parameters.k) {
|
|
468
495
|
return 0;
|
|
@@ -508,7 +535,7 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
|
|
|
508
535
|
const BYTE *const samples = (const BYTE *)samplesBuffer;
|
|
509
536
|
const size_t totalSamplesSize = COVER_sum(samplesSizes, nbSamples);
|
|
510
537
|
/* Checks */
|
|
511
|
-
if (totalSamplesSize < d ||
|
|
538
|
+
if (totalSamplesSize < MAX(d, sizeof(U64)) ||
|
|
512
539
|
totalSamplesSize >= (size_t)COVER_MAX_SAMPLES_SIZE) {
|
|
513
540
|
DISPLAYLEVEL(1, "Total samples size is too large, maximum size is %u MB\n",
|
|
514
541
|
(COVER_MAX_SAMPLES_SIZE >> 20));
|
|
@@ -522,7 +549,7 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
|
|
|
522
549
|
ctx->samplesSizes = samplesSizes;
|
|
523
550
|
ctx->nbSamples = nbSamples;
|
|
524
551
|
/* Partial suffix array */
|
|
525
|
-
ctx->suffixSize = totalSamplesSize - d + 1;
|
|
552
|
+
ctx->suffixSize = totalSamplesSize - MAX(d, sizeof(U64)) + 1;
|
|
526
553
|
ctx->suffix = (U32 *)malloc(ctx->suffixSize * sizeof(U32));
|
|
527
554
|
/* Maps index to the dmerID */
|
|
528
555
|
ctx->dmerAt = (U32 *)malloc(ctx->suffixSize * sizeof(U32));
|
|
@@ -556,7 +583,8 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
|
|
|
556
583
|
}
|
|
557
584
|
/* qsort doesn't take an opaque pointer, so pass as a global */
|
|
558
585
|
g_ctx = ctx;
|
|
559
|
-
qsort(ctx->suffix, ctx->suffixSize, sizeof(U32),
|
|
586
|
+
qsort(ctx->suffix, ctx->suffixSize, sizeof(U32),
|
|
587
|
+
(ctx->d <= 8 ? &COVER_strict_cmp8 : &COVER_strict_cmp));
|
|
560
588
|
}
|
|
561
589
|
DISPLAYLEVEL(2, "Computing frequencies\n");
|
|
562
590
|
/* For each dmer group (group of positions with the same first d bytes):
|
|
@@ -566,8 +594,8 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
|
|
|
566
594
|
* 2. We calculate how many samples the dmer occurs in and save it in
|
|
567
595
|
* freqs[dmerId].
|
|
568
596
|
*/
|
|
569
|
-
COVER_groupBy(ctx->suffix, ctx->suffixSize, sizeof(U32), ctx,
|
|
570
|
-
&COVER_group);
|
|
597
|
+
COVER_groupBy(ctx->suffix, ctx->suffixSize, sizeof(U32), ctx,
|
|
598
|
+
(ctx->d <= 8 ? &COVER_cmp8 : &COVER_cmp), &COVER_group);
|
|
571
599
|
ctx->freqs = ctx->suffix;
|
|
572
600
|
ctx->suffix = NULL;
|
|
573
601
|
return 1;
|
|
@@ -579,7 +607,7 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
|
|
|
579
607
|
static size_t COVER_buildDictionary(const COVER_ctx_t *ctx, U32 *freqs,
|
|
580
608
|
COVER_map_t *activeDmers, void *dictBuffer,
|
|
581
609
|
size_t dictBufferCapacity,
|
|
582
|
-
|
|
610
|
+
ZDICT_cover_params_t parameters) {
|
|
583
611
|
BYTE *const dict = (BYTE *)dictBuffer;
|
|
584
612
|
size_t tail = dictBufferCapacity;
|
|
585
613
|
/* Divide the data up into epochs of equal size.
|
|
@@ -600,9 +628,13 @@ static size_t COVER_buildDictionary(const COVER_ctx_t *ctx, U32 *freqs,
|
|
|
600
628
|
/* Select a segment */
|
|
601
629
|
COVER_segment_t segment = COVER_selectSegment(
|
|
602
630
|
ctx, freqs, activeDmers, epochBegin, epochEnd, parameters);
|
|
603
|
-
/*
|
|
631
|
+
/* If the segment covers no dmers, then we are out of content */
|
|
632
|
+
if (segment.score == 0) {
|
|
633
|
+
break;
|
|
634
|
+
}
|
|
635
|
+
/* Trim the segment if necessary and if it is too small then we are done */
|
|
604
636
|
segmentSize = MIN(segment.end - segment.begin + parameters.d - 1, tail);
|
|
605
|
-
if (segmentSize
|
|
637
|
+
if (segmentSize < parameters.d) {
|
|
606
638
|
break;
|
|
607
639
|
}
|
|
608
640
|
/* We fill the dictionary from the back to allow the best segments to be
|
|
@@ -618,27 +650,15 @@ static size_t COVER_buildDictionary(const COVER_ctx_t *ctx, U32 *freqs,
|
|
|
618
650
|
return tail;
|
|
619
651
|
}
|
|
620
652
|
|
|
621
|
-
|
|
622
|
-
* Translate from COVER_params_t to ZDICT_params_t required for finalizing the
|
|
623
|
-
* dictionary.
|
|
624
|
-
*/
|
|
625
|
-
static ZDICT_params_t COVER_translateParams(COVER_params_t parameters) {
|
|
626
|
-
ZDICT_params_t zdictParams;
|
|
627
|
-
memset(&zdictParams, 0, sizeof(zdictParams));
|
|
628
|
-
zdictParams.notificationLevel = 1;
|
|
629
|
-
zdictParams.dictID = parameters.dictID;
|
|
630
|
-
zdictParams.compressionLevel = parameters.compressionLevel;
|
|
631
|
-
return zdictParams;
|
|
632
|
-
}
|
|
633
|
-
|
|
634
|
-
ZDICTLIB_API size_t COVER_trainFromBuffer(
|
|
653
|
+
ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
|
|
635
654
|
void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer,
|
|
636
|
-
const size_t *samplesSizes, unsigned nbSamples,
|
|
655
|
+
const size_t *samplesSizes, unsigned nbSamples,
|
|
656
|
+
ZDICT_cover_params_t parameters) {
|
|
637
657
|
BYTE *const dict = (BYTE *)dictBuffer;
|
|
638
658
|
COVER_ctx_t ctx;
|
|
639
659
|
COVER_map_t activeDmers;
|
|
640
660
|
/* Checks */
|
|
641
|
-
if (!COVER_checkParameters(parameters)) {
|
|
661
|
+
if (!COVER_checkParameters(parameters, dictBufferCapacity)) {
|
|
642
662
|
DISPLAYLEVEL(1, "Cover parameters incorrect\n");
|
|
643
663
|
return ERROR(GENERIC);
|
|
644
664
|
}
|
|
@@ -652,7 +672,7 @@ ZDICTLIB_API size_t COVER_trainFromBuffer(
|
|
|
652
672
|
return ERROR(dstSize_tooSmall);
|
|
653
673
|
}
|
|
654
674
|
/* Initialize global data */
|
|
655
|
-
g_displayLevel = parameters.notificationLevel;
|
|
675
|
+
g_displayLevel = parameters.zParams.notificationLevel;
|
|
656
676
|
/* Initialize context and activeDmers */
|
|
657
677
|
if (!COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples,
|
|
658
678
|
parameters.d)) {
|
|
@@ -669,10 +689,9 @@ ZDICTLIB_API size_t COVER_trainFromBuffer(
|
|
|
669
689
|
const size_t tail =
|
|
670
690
|
COVER_buildDictionary(&ctx, ctx.freqs, &activeDmers, dictBuffer,
|
|
671
691
|
dictBufferCapacity, parameters);
|
|
672
|
-
ZDICT_params_t zdictParams = COVER_translateParams(parameters);
|
|
673
692
|
const size_t dictionarySize = ZDICT_finalizeDictionary(
|
|
674
693
|
dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail,
|
|
675
|
-
samplesBuffer, samplesSizes, nbSamples,
|
|
694
|
+
samplesBuffer, samplesSizes, nbSamples, parameters.zParams);
|
|
676
695
|
if (!ZSTD_isError(dictionarySize)) {
|
|
677
696
|
DISPLAYLEVEL(2, "Constructed dictionary of size %u\n",
|
|
678
697
|
(U32)dictionarySize);
|
|
@@ -692,12 +711,12 @@ ZDICTLIB_API size_t COVER_trainFromBuffer(
|
|
|
692
711
|
* compiled with multithreaded support.
|
|
693
712
|
*/
|
|
694
713
|
typedef struct COVER_best_s {
|
|
695
|
-
|
|
696
|
-
|
|
714
|
+
ZSTD_pthread_mutex_t mutex;
|
|
715
|
+
ZSTD_pthread_cond_t cond;
|
|
697
716
|
size_t liveJobs;
|
|
698
717
|
void *dict;
|
|
699
718
|
size_t dictSize;
|
|
700
|
-
|
|
719
|
+
ZDICT_cover_params_t parameters;
|
|
701
720
|
size_t compressedSize;
|
|
702
721
|
} COVER_best_t;
|
|
703
722
|
|
|
@@ -705,11 +724,9 @@ typedef struct COVER_best_s {
|
|
|
705
724
|
* Initialize the `COVER_best_t`.
|
|
706
725
|
*/
|
|
707
726
|
static void COVER_best_init(COVER_best_t *best) {
|
|
708
|
-
if (
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
pthread_mutex_init(&best->mutex, NULL);
|
|
712
|
-
pthread_cond_init(&best->cond, NULL);
|
|
727
|
+
if (best==NULL) return; /* compatible with init on NULL */
|
|
728
|
+
(void)ZSTD_pthread_mutex_init(&best->mutex, NULL);
|
|
729
|
+
(void)ZSTD_pthread_cond_init(&best->cond, NULL);
|
|
713
730
|
best->liveJobs = 0;
|
|
714
731
|
best->dict = NULL;
|
|
715
732
|
best->dictSize = 0;
|
|
@@ -724,11 +741,11 @@ static void COVER_best_wait(COVER_best_t *best) {
|
|
|
724
741
|
if (!best) {
|
|
725
742
|
return;
|
|
726
743
|
}
|
|
727
|
-
|
|
744
|
+
ZSTD_pthread_mutex_lock(&best->mutex);
|
|
728
745
|
while (best->liveJobs != 0) {
|
|
729
|
-
|
|
746
|
+
ZSTD_pthread_cond_wait(&best->cond, &best->mutex);
|
|
730
747
|
}
|
|
731
|
-
|
|
748
|
+
ZSTD_pthread_mutex_unlock(&best->mutex);
|
|
732
749
|
}
|
|
733
750
|
|
|
734
751
|
/**
|
|
@@ -742,8 +759,8 @@ static void COVER_best_destroy(COVER_best_t *best) {
|
|
|
742
759
|
if (best->dict) {
|
|
743
760
|
free(best->dict);
|
|
744
761
|
}
|
|
745
|
-
|
|
746
|
-
|
|
762
|
+
ZSTD_pthread_mutex_destroy(&best->mutex);
|
|
763
|
+
ZSTD_pthread_cond_destroy(&best->cond);
|
|
747
764
|
}
|
|
748
765
|
|
|
749
766
|
/**
|
|
@@ -754,9 +771,9 @@ static void COVER_best_start(COVER_best_t *best) {
|
|
|
754
771
|
if (!best) {
|
|
755
772
|
return;
|
|
756
773
|
}
|
|
757
|
-
|
|
774
|
+
ZSTD_pthread_mutex_lock(&best->mutex);
|
|
758
775
|
++best->liveJobs;
|
|
759
|
-
|
|
776
|
+
ZSTD_pthread_mutex_unlock(&best->mutex);
|
|
760
777
|
}
|
|
761
778
|
|
|
762
779
|
/**
|
|
@@ -765,14 +782,14 @@ static void COVER_best_start(COVER_best_t *best) {
|
|
|
765
782
|
* If this dictionary is the best so far save it and its parameters.
|
|
766
783
|
*/
|
|
767
784
|
static void COVER_best_finish(COVER_best_t *best, size_t compressedSize,
|
|
768
|
-
|
|
785
|
+
ZDICT_cover_params_t parameters, void *dict,
|
|
769
786
|
size_t dictSize) {
|
|
770
787
|
if (!best) {
|
|
771
788
|
return;
|
|
772
789
|
}
|
|
773
790
|
{
|
|
774
791
|
size_t liveJobs;
|
|
775
|
-
|
|
792
|
+
ZSTD_pthread_mutex_lock(&best->mutex);
|
|
776
793
|
--best->liveJobs;
|
|
777
794
|
liveJobs = best->liveJobs;
|
|
778
795
|
/* If the new dictionary is better */
|
|
@@ -795,9 +812,9 @@ static void COVER_best_finish(COVER_best_t *best, size_t compressedSize,
|
|
|
795
812
|
best->parameters = parameters;
|
|
796
813
|
best->compressedSize = compressedSize;
|
|
797
814
|
}
|
|
798
|
-
|
|
815
|
+
ZSTD_pthread_mutex_unlock(&best->mutex);
|
|
799
816
|
if (liveJobs == 0) {
|
|
800
|
-
|
|
817
|
+
ZSTD_pthread_cond_broadcast(&best->cond);
|
|
801
818
|
}
|
|
802
819
|
}
|
|
803
820
|
}
|
|
@@ -809,7 +826,7 @@ typedef struct COVER_tryParameters_data_s {
|
|
|
809
826
|
const COVER_ctx_t *ctx;
|
|
810
827
|
COVER_best_t *best;
|
|
811
828
|
size_t dictBufferCapacity;
|
|
812
|
-
|
|
829
|
+
ZDICT_cover_params_t parameters;
|
|
813
830
|
} COVER_tryParameters_data_t;
|
|
814
831
|
|
|
815
832
|
/**
|
|
@@ -821,7 +838,7 @@ static void COVER_tryParameters(void *opaque) {
|
|
|
821
838
|
/* Save parameters as local variables */
|
|
822
839
|
COVER_tryParameters_data_t *const data = (COVER_tryParameters_data_t *)opaque;
|
|
823
840
|
const COVER_ctx_t *const ctx = data->ctx;
|
|
824
|
-
const
|
|
841
|
+
const ZDICT_cover_params_t parameters = data->parameters;
|
|
825
842
|
size_t dictBufferCapacity = data->dictBufferCapacity;
|
|
826
843
|
size_t totalCompressedSize = ERROR(GENERIC);
|
|
827
844
|
/* Allocate space for hash table, dict, and freqs */
|
|
@@ -842,10 +859,10 @@ static void COVER_tryParameters(void *opaque) {
|
|
|
842
859
|
{
|
|
843
860
|
const size_t tail = COVER_buildDictionary(ctx, freqs, &activeDmers, dict,
|
|
844
861
|
dictBufferCapacity, parameters);
|
|
845
|
-
const ZDICT_params_t zdictParams = COVER_translateParams(parameters);
|
|
846
862
|
dictBufferCapacity = ZDICT_finalizeDictionary(
|
|
847
863
|
dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail,
|
|
848
|
-
ctx->samples, ctx->samplesSizes, (unsigned)ctx->nbSamples,
|
|
864
|
+
ctx->samples, ctx->samplesSizes, (unsigned)ctx->nbSamples,
|
|
865
|
+
parameters.zParams);
|
|
849
866
|
if (ZDICT_isError(dictBufferCapacity)) {
|
|
850
867
|
DISPLAYLEVEL(1, "Failed to finalize dictionary\n");
|
|
851
868
|
goto _cleanup;
|
|
@@ -871,13 +888,13 @@ static void COVER_tryParameters(void *opaque) {
|
|
|
871
888
|
}
|
|
872
889
|
/* Create the cctx and cdict */
|
|
873
890
|
cctx = ZSTD_createCCtx();
|
|
874
|
-
cdict =
|
|
875
|
-
|
|
891
|
+
cdict = ZSTD_createCDict(dict, dictBufferCapacity,
|
|
892
|
+
parameters.zParams.compressionLevel);
|
|
876
893
|
if (!dst || !cctx || !cdict) {
|
|
877
894
|
goto _compressCleanup;
|
|
878
895
|
}
|
|
879
896
|
/* Compress each sample and sum their sizes (or error) */
|
|
880
|
-
totalCompressedSize =
|
|
897
|
+
totalCompressedSize = dictBufferCapacity;
|
|
881
898
|
for (i = 0; i < ctx->nbSamples; ++i) {
|
|
882
899
|
const size_t size = ZSTD_compress_usingCDict(
|
|
883
900
|
cctx, dst, dstCapacity, ctx->samples + ctx->offsets[i],
|
|
@@ -909,24 +926,22 @@ _cleanup:
|
|
|
909
926
|
}
|
|
910
927
|
}
|
|
911
928
|
|
|
912
|
-
ZDICTLIB_API size_t
|
|
913
|
-
|
|
914
|
-
|
|
915
|
-
|
|
916
|
-
unsigned nbSamples,
|
|
917
|
-
COVER_params_t *parameters) {
|
|
929
|
+
ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
|
|
930
|
+
void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer,
|
|
931
|
+
const size_t *samplesSizes, unsigned nbSamples,
|
|
932
|
+
ZDICT_cover_params_t *parameters) {
|
|
918
933
|
/* constants */
|
|
919
934
|
const unsigned nbThreads = parameters->nbThreads;
|
|
920
935
|
const unsigned kMinD = parameters->d == 0 ? 6 : parameters->d;
|
|
921
|
-
const unsigned kMaxD = parameters->d == 0 ?
|
|
922
|
-
const unsigned kMinK = parameters->k == 0 ?
|
|
923
|
-
const unsigned kMaxK = parameters->k == 0 ?
|
|
924
|
-
const unsigned kSteps = parameters->steps == 0 ?
|
|
936
|
+
const unsigned kMaxD = parameters->d == 0 ? 8 : parameters->d;
|
|
937
|
+
const unsigned kMinK = parameters->k == 0 ? 50 : parameters->k;
|
|
938
|
+
const unsigned kMaxK = parameters->k == 0 ? 2000 : parameters->k;
|
|
939
|
+
const unsigned kSteps = parameters->steps == 0 ? 40 : parameters->steps;
|
|
925
940
|
const unsigned kStepSize = MAX((kMaxK - kMinK) / kSteps, 1);
|
|
926
941
|
const unsigned kIterations =
|
|
927
942
|
(1 + (kMaxD - kMinD) / 2) * (1 + (kMaxK - kMinK) / kStepSize);
|
|
928
943
|
/* Local variables */
|
|
929
|
-
const int displayLevel = parameters->notificationLevel;
|
|
944
|
+
const int displayLevel = parameters->zParams.notificationLevel;
|
|
930
945
|
unsigned iteration = 1;
|
|
931
946
|
unsigned d;
|
|
932
947
|
unsigned k;
|
|
@@ -955,7 +970,7 @@ ZDICTLIB_API size_t COVER_optimizeTrainFromBuffer(void *dictBuffer,
|
|
|
955
970
|
/* Initialization */
|
|
956
971
|
COVER_best_init(&best);
|
|
957
972
|
/* Turn down global display level to clean up display at level 2 and below */
|
|
958
|
-
g_displayLevel =
|
|
973
|
+
g_displayLevel = displayLevel == 0 ? 0 : displayLevel - 1;
|
|
959
974
|
/* Loop through d first because each new value needs a new context */
|
|
960
975
|
LOCALDISPLAYLEVEL(displayLevel, 2, "Trying %u different sets of parameters\n",
|
|
961
976
|
kIterations);
|
|
@@ -989,8 +1004,9 @@ ZDICTLIB_API size_t COVER_optimizeTrainFromBuffer(void *dictBuffer,
|
|
|
989
1004
|
data->parameters.k = k;
|
|
990
1005
|
data->parameters.d = d;
|
|
991
1006
|
data->parameters.steps = kSteps;
|
|
1007
|
+
data->parameters.zParams.notificationLevel = g_displayLevel;
|
|
992
1008
|
/* Check the parameters */
|
|
993
|
-
if (!COVER_checkParameters(data->parameters)) {
|
|
1009
|
+
if (!COVER_checkParameters(data->parameters, dictBufferCapacity)) {
|
|
994
1010
|
DISPLAYLEVEL(1, "Cover parameters incorrect\n");
|
|
995
1011
|
free(data);
|
|
996
1012
|
continue;
|