extzstd 0.3.1 → 0.3.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +28 -14
- data/contrib/zstd/CHANGELOG +301 -56
- data/contrib/zstd/CONTRIBUTING.md +169 -72
- data/contrib/zstd/LICENSE +4 -4
- data/contrib/zstd/Makefile +116 -87
- data/contrib/zstd/Package.swift +36 -0
- data/contrib/zstd/README.md +62 -32
- data/contrib/zstd/TESTING.md +2 -3
- data/contrib/zstd/appveyor.yml +52 -136
- data/contrib/zstd/lib/BUCK +5 -7
- data/contrib/zstd/lib/Makefile +225 -222
- data/contrib/zstd/lib/README.md +51 -6
- data/contrib/zstd/lib/common/allocations.h +55 -0
- data/contrib/zstd/lib/common/bits.h +200 -0
- data/contrib/zstd/lib/common/bitstream.h +45 -62
- data/contrib/zstd/lib/common/compiler.h +205 -22
- data/contrib/zstd/lib/common/cpu.h +1 -3
- data/contrib/zstd/lib/common/debug.c +1 -1
- data/contrib/zstd/lib/common/debug.h +12 -19
- data/contrib/zstd/lib/common/entropy_common.c +172 -48
- data/contrib/zstd/lib/common/error_private.c +10 -2
- data/contrib/zstd/lib/common/error_private.h +82 -3
- data/contrib/zstd/lib/common/fse.h +37 -86
- data/contrib/zstd/lib/common/fse_decompress.c +117 -92
- data/contrib/zstd/lib/common/huf.h +99 -166
- data/contrib/zstd/lib/common/mem.h +124 -142
- data/contrib/zstd/lib/common/pool.c +54 -27
- data/contrib/zstd/lib/common/pool.h +10 -4
- data/contrib/zstd/lib/common/portability_macros.h +156 -0
- data/contrib/zstd/lib/common/threading.c +74 -19
- data/contrib/zstd/lib/common/threading.h +5 -10
- data/contrib/zstd/lib/common/xxhash.c +7 -847
- data/contrib/zstd/lib/common/xxhash.h +5568 -167
- data/contrib/zstd/lib/common/zstd_common.c +2 -37
- data/contrib/zstd/lib/common/zstd_deps.h +111 -0
- data/contrib/zstd/lib/common/zstd_internal.h +132 -187
- data/contrib/zstd/lib/common/zstd_trace.h +163 -0
- data/contrib/zstd/lib/compress/clevels.h +134 -0
- data/contrib/zstd/lib/compress/fse_compress.c +83 -157
- data/contrib/zstd/lib/compress/hist.c +27 -29
- data/contrib/zstd/lib/compress/hist.h +2 -2
- data/contrib/zstd/lib/compress/huf_compress.c +916 -279
- data/contrib/zstd/lib/compress/zstd_compress.c +3773 -1019
- data/contrib/zstd/lib/compress/zstd_compress_internal.h +610 -203
- data/contrib/zstd/lib/compress/zstd_compress_literals.c +119 -42
- data/contrib/zstd/lib/compress/zstd_compress_literals.h +16 -6
- data/contrib/zstd/lib/compress/zstd_compress_sequences.c +42 -19
- data/contrib/zstd/lib/compress/zstd_compress_sequences.h +1 -1
- data/contrib/zstd/lib/compress/zstd_compress_superblock.c +49 -317
- data/contrib/zstd/lib/compress/zstd_compress_superblock.h +1 -1
- data/contrib/zstd/lib/compress/zstd_cwksp.h +320 -103
- data/contrib/zstd/lib/compress/zstd_double_fast.c +388 -151
- data/contrib/zstd/lib/compress/zstd_double_fast.h +3 -2
- data/contrib/zstd/lib/compress/zstd_fast.c +729 -265
- data/contrib/zstd/lib/compress/zstd_fast.h +3 -2
- data/contrib/zstd/lib/compress/zstd_lazy.c +1270 -251
- data/contrib/zstd/lib/compress/zstd_lazy.h +61 -1
- data/contrib/zstd/lib/compress/zstd_ldm.c +324 -219
- data/contrib/zstd/lib/compress/zstd_ldm.h +9 -2
- data/contrib/zstd/lib/compress/zstd_ldm_geartab.h +106 -0
- data/contrib/zstd/lib/compress/zstd_opt.c +481 -209
- data/contrib/zstd/lib/compress/zstd_opt.h +1 -1
- data/contrib/zstd/lib/compress/zstdmt_compress.c +181 -457
- data/contrib/zstd/lib/compress/zstdmt_compress.h +34 -113
- data/contrib/zstd/lib/decompress/huf_decompress.c +1199 -565
- data/contrib/zstd/lib/decompress/huf_decompress_amd64.S +576 -0
- data/contrib/zstd/lib/decompress/zstd_ddict.c +12 -12
- data/contrib/zstd/lib/decompress/zstd_ddict.h +2 -2
- data/contrib/zstd/lib/decompress/zstd_decompress.c +627 -157
- data/contrib/zstd/lib/decompress/zstd_decompress_block.c +1086 -326
- data/contrib/zstd/lib/decompress/zstd_decompress_block.h +19 -5
- data/contrib/zstd/lib/decompress/zstd_decompress_internal.h +62 -13
- data/contrib/zstd/lib/deprecated/zbuff.h +1 -1
- data/contrib/zstd/lib/deprecated/zbuff_common.c +1 -1
- data/contrib/zstd/lib/deprecated/zbuff_compress.c +24 -4
- data/contrib/zstd/lib/deprecated/zbuff_decompress.c +3 -1
- data/contrib/zstd/lib/dictBuilder/cover.c +73 -52
- data/contrib/zstd/lib/dictBuilder/cover.h +7 -6
- data/contrib/zstd/lib/dictBuilder/divsufsort.c +1 -1
- data/contrib/zstd/lib/dictBuilder/fastcover.c +44 -35
- data/contrib/zstd/lib/dictBuilder/zdict.c +103 -111
- data/contrib/zstd/lib/legacy/zstd_legacy.h +8 -1
- data/contrib/zstd/lib/legacy/zstd_v01.c +21 -54
- data/contrib/zstd/lib/legacy/zstd_v01.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v02.c +29 -70
- data/contrib/zstd/lib/legacy/zstd_v02.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v03.c +30 -73
- data/contrib/zstd/lib/legacy/zstd_v03.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v04.c +29 -71
- data/contrib/zstd/lib/legacy/zstd_v04.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v05.c +40 -86
- data/contrib/zstd/lib/legacy/zstd_v05.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v06.c +47 -88
- data/contrib/zstd/lib/legacy/zstd_v06.h +1 -1
- data/contrib/zstd/lib/legacy/zstd_v07.c +40 -83
- data/contrib/zstd/lib/legacy/zstd_v07.h +1 -1
- data/contrib/zstd/lib/libzstd.mk +214 -0
- data/contrib/zstd/lib/libzstd.pc.in +7 -6
- data/contrib/zstd/lib/module.modulemap +35 -0
- data/contrib/zstd/lib/{dictBuilder/zdict.h → zdict.h} +203 -34
- data/contrib/zstd/lib/zstd.h +1217 -287
- data/contrib/zstd/lib/{common/zstd_errors.h → zstd_errors.h} +28 -8
- data/ext/extconf.rb +7 -6
- data/ext/extzstd.c +19 -10
- data/ext/extzstd.h +6 -0
- data/ext/libzstd_conf.h +0 -1
- data/ext/zstd_decompress_asm.S +1 -0
- data/gemstub.rb +3 -21
- data/lib/extzstd/version.rb +6 -1
- data/lib/extzstd.rb +0 -2
- data/test/test_basic.rb +0 -5
- metadata +18 -6
@@ -1,5 +1,5 @@
|
|
1
1
|
/*
|
2
|
-
* Copyright (c)
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
3
3
|
* All rights reserved.
|
4
4
|
*
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
@@ -8,6 +8,10 @@
|
|
8
8
|
* You may select, at your option, one of the above-listed licenses.
|
9
9
|
*/
|
10
10
|
|
11
|
+
#ifndef ZDICT_STATIC_LINKING_ONLY
|
12
|
+
# define ZDICT_STATIC_LINKING_ONLY
|
13
|
+
#endif
|
14
|
+
|
11
15
|
#include <stdio.h> /* fprintf */
|
12
16
|
#include <stdlib.h> /* malloc, free, qsort */
|
13
17
|
#include <string.h> /* memset */
|
@@ -16,10 +20,7 @@
|
|
16
20
|
#include "../common/pool.h"
|
17
21
|
#include "../common/threading.h"
|
18
22
|
#include "../common/zstd_internal.h" /* includes zstd.h */
|
19
|
-
#
|
20
|
-
#define ZDICT_STATIC_LINKING_ONLY
|
21
|
-
#endif
|
22
|
-
#include "zdict.h"
|
23
|
+
#include "../zdict.h"
|
23
24
|
|
24
25
|
/**
|
25
26
|
* COVER_best_t is used for two purposes:
|
@@ -152,6 +153,6 @@ void COVER_dictSelectionFree(COVER_dictSelection_t selection);
|
|
152
153
|
* smallest dictionary within a specified regression of the compressed size
|
153
154
|
* from the largest dictionary.
|
154
155
|
*/
|
155
|
-
COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent,
|
156
|
+
COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent, size_t dictBufferCapacity,
|
156
157
|
size_t dictContentSize, const BYTE* samplesBuffer, const size_t* samplesSizes, unsigned nbFinalizeSamples,
|
157
158
|
size_t nbCheckSamples, size_t nbSamples, ZDICT_cover_params_t params, size_t* offsets, size_t totalCompressedSize);
|
@@ -1576,7 +1576,7 @@ note:
|
|
1576
1576
|
/* Construct the inverse suffix array of type B* suffixes using trsort. */
|
1577
1577
|
trsort(ISAb, SA, m, 1);
|
1578
1578
|
|
1579
|
-
/* Set the sorted order of
|
1579
|
+
/* Set the sorted order of type B* suffixes. */
|
1580
1580
|
for(i = n - 1, j = m, c0 = T[n - 1]; 0 <= i;) {
|
1581
1581
|
for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) >= c1); --i, c1 = c0) { }
|
1582
1582
|
if(0 <= i) {
|
@@ -1,5 +1,5 @@
|
|
1
1
|
/*
|
2
|
-
* Copyright (c)
|
2
|
+
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
3
3
|
* All rights reserved.
|
4
4
|
*
|
5
5
|
* This source code is licensed under both the BSD-style license (found in the
|
@@ -16,24 +16,33 @@
|
|
16
16
|
#include <string.h> /* memset */
|
17
17
|
#include <time.h> /* clock */
|
18
18
|
|
19
|
+
#ifndef ZDICT_STATIC_LINKING_ONLY
|
20
|
+
# define ZDICT_STATIC_LINKING_ONLY
|
21
|
+
#endif
|
22
|
+
|
19
23
|
#include "../common/mem.h" /* read */
|
20
24
|
#include "../common/pool.h"
|
21
25
|
#include "../common/threading.h"
|
22
|
-
#include "cover.h"
|
23
26
|
#include "../common/zstd_internal.h" /* includes zstd.h */
|
24
|
-
#
|
25
|
-
#
|
26
|
-
#
|
27
|
-
#include "zdict.h"
|
27
|
+
#include "../compress/zstd_compress_internal.h" /* ZSTD_hash*() */
|
28
|
+
#include "../zdict.h"
|
29
|
+
#include "cover.h"
|
28
30
|
|
29
31
|
|
30
32
|
/*-*************************************
|
31
33
|
* Constants
|
32
34
|
***************************************/
|
35
|
+
/**
|
36
|
+
* There are 32bit indexes used to ref samples, so limit samples size to 4GB
|
37
|
+
* on 64bit builds.
|
38
|
+
* For 32bit builds we choose 1 GB.
|
39
|
+
* Most 32bit platforms have 2GB user-mode addressable space and we allocate a large
|
40
|
+
* contiguous buffer, so 1GB is already a high limit.
|
41
|
+
*/
|
33
42
|
#define FASTCOVER_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((unsigned)-1) : ((unsigned)1 GB))
|
34
43
|
#define FASTCOVER_MAX_F 31
|
35
44
|
#define FASTCOVER_MAX_ACCEL 10
|
36
|
-
#define
|
45
|
+
#define FASTCOVER_DEFAULT_SPLITPOINT 0.75
|
37
46
|
#define DEFAULT_F 20
|
38
47
|
#define DEFAULT_ACCEL 1
|
39
48
|
|
@@ -41,50 +50,50 @@
|
|
41
50
|
/*-*************************************
|
42
51
|
* Console display
|
43
52
|
***************************************/
|
44
|
-
|
53
|
+
#ifndef LOCALDISPLAYLEVEL
|
54
|
+
static int g_displayLevel = 0;
|
55
|
+
#endif
|
56
|
+
#undef DISPLAY
|
45
57
|
#define DISPLAY(...) \
|
46
58
|
{ \
|
47
59
|
fprintf(stderr, __VA_ARGS__); \
|
48
60
|
fflush(stderr); \
|
49
61
|
}
|
62
|
+
#undef LOCALDISPLAYLEVEL
|
50
63
|
#define LOCALDISPLAYLEVEL(displayLevel, l, ...) \
|
51
64
|
if (displayLevel >= l) { \
|
52
65
|
DISPLAY(__VA_ARGS__); \
|
53
66
|
} /* 0 : no display; 1: errors; 2: default; 3: details; 4: debug */
|
67
|
+
#undef DISPLAYLEVEL
|
54
68
|
#define DISPLAYLEVEL(l, ...) LOCALDISPLAYLEVEL(g_displayLevel, l, __VA_ARGS__)
|
55
69
|
|
70
|
+
#ifndef LOCALDISPLAYUPDATE
|
71
|
+
static const clock_t g_refreshRate = CLOCKS_PER_SEC * 15 / 100;
|
72
|
+
static clock_t g_time = 0;
|
73
|
+
#endif
|
74
|
+
#undef LOCALDISPLAYUPDATE
|
56
75
|
#define LOCALDISPLAYUPDATE(displayLevel, l, ...) \
|
57
76
|
if (displayLevel >= l) { \
|
58
|
-
if ((clock() - g_time >
|
77
|
+
if ((clock() - g_time > g_refreshRate) || (displayLevel >= 4)) { \
|
59
78
|
g_time = clock(); \
|
60
79
|
DISPLAY(__VA_ARGS__); \
|
61
80
|
} \
|
62
81
|
}
|
82
|
+
#undef DISPLAYUPDATE
|
63
83
|
#define DISPLAYUPDATE(l, ...) LOCALDISPLAYUPDATE(g_displayLevel, l, __VA_ARGS__)
|
64
|
-
static const clock_t refreshRate = CLOCKS_PER_SEC * 15 / 100;
|
65
|
-
static clock_t g_time = 0;
|
66
84
|
|
67
85
|
|
68
86
|
/*-*************************************
|
69
87
|
* Hash Functions
|
70
88
|
***************************************/
|
71
|
-
static const U64 prime6bytes = 227718039650203ULL;
|
72
|
-
static size_t ZSTD_hash6(U64 u, U32 h) { return (size_t)(((u << (64-48)) * prime6bytes) >> (64-h)) ; }
|
73
|
-
static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_readLE64(p), h); }
|
74
|
-
|
75
|
-
static const U64 prime8bytes = 0xCF1BBCDCB7A56463ULL;
|
76
|
-
static size_t ZSTD_hash8(U64 u, U32 h) { return (size_t)(((u) * prime8bytes) >> (64-h)) ; }
|
77
|
-
static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h); }
|
78
|
-
|
79
|
-
|
80
89
|
/**
|
81
|
-
* Hash the d-byte value pointed to by p and mod 2^f
|
90
|
+
* Hash the d-byte value pointed to by p and mod 2^f into the frequency vector
|
82
91
|
*/
|
83
|
-
static size_t FASTCOVER_hashPtrToIndex(const void* p, U32
|
92
|
+
static size_t FASTCOVER_hashPtrToIndex(const void* p, U32 f, unsigned d) {
|
84
93
|
if (d == 6) {
|
85
|
-
return ZSTD_hash6Ptr(p,
|
94
|
+
return ZSTD_hash6Ptr(p, f);
|
86
95
|
}
|
87
|
-
return ZSTD_hash8Ptr(p,
|
96
|
+
return ZSTD_hash8Ptr(p, f);
|
88
97
|
}
|
89
98
|
|
90
99
|
|
@@ -295,7 +304,7 @@ FASTCOVER_computeFrequency(U32* freqs, const FASTCOVER_ctx_t* ctx)
|
|
295
304
|
|
296
305
|
/**
|
297
306
|
* Prepare a context for dictionary building.
|
298
|
-
* The context is only dependent on the parameter `d` and can used multiple
|
307
|
+
* The context is only dependent on the parameter `d` and can be used multiple
|
299
308
|
* times.
|
300
309
|
* Returns 0 on success or error code on error.
|
301
310
|
* The context must be destroyed with `FASTCOVER_ctx_destroy()`.
|
@@ -461,20 +470,20 @@ typedef struct FASTCOVER_tryParameters_data_s {
|
|
461
470
|
* This function is thread safe if zstd is compiled with multithreaded support.
|
462
471
|
* It takes its parameters as an *OWNING* opaque pointer to support threading.
|
463
472
|
*/
|
464
|
-
static void FASTCOVER_tryParameters(void
|
473
|
+
static void FASTCOVER_tryParameters(void* opaque)
|
465
474
|
{
|
466
475
|
/* Save parameters as local variables */
|
467
|
-
FASTCOVER_tryParameters_data_t *const data = (FASTCOVER_tryParameters_data_t
|
476
|
+
FASTCOVER_tryParameters_data_t *const data = (FASTCOVER_tryParameters_data_t*)opaque;
|
468
477
|
const FASTCOVER_ctx_t *const ctx = data->ctx;
|
469
478
|
const ZDICT_cover_params_t parameters = data->parameters;
|
470
479
|
size_t dictBufferCapacity = data->dictBufferCapacity;
|
471
480
|
size_t totalCompressedSize = ERROR(GENERIC);
|
472
481
|
/* Initialize array to keep track of frequency of dmer within activeSegment */
|
473
|
-
U16* segmentFreqs = (U16
|
482
|
+
U16* segmentFreqs = (U16*)calloc(((U64)1 << ctx->f), sizeof(U16));
|
474
483
|
/* Allocate space for hash table, dict, and freqs */
|
475
|
-
BYTE *const dict = (BYTE
|
484
|
+
BYTE *const dict = (BYTE*)malloc(dictBufferCapacity);
|
476
485
|
COVER_dictSelection_t selection = COVER_dictSelectionError(ERROR(GENERIC));
|
477
|
-
U32
|
486
|
+
U32* freqs = (U32*) malloc(((U64)1 << ctx->f) * sizeof(U32));
|
478
487
|
if (!segmentFreqs || !dict || !freqs) {
|
479
488
|
DISPLAYLEVEL(1, "Failed to allocate buffers: out of memory\n");
|
480
489
|
goto _cleanup;
|
@@ -486,7 +495,7 @@ static void FASTCOVER_tryParameters(void *opaque)
|
|
486
495
|
parameters, segmentFreqs);
|
487
496
|
|
488
497
|
const unsigned nbFinalizeSamples = (unsigned)(ctx->nbTrainSamples * ctx->accelParams.finalize / 100);
|
489
|
-
selection = COVER_selectDict(dict + tail, dictBufferCapacity - tail,
|
498
|
+
selection = COVER_selectDict(dict + tail, dictBufferCapacity, dictBufferCapacity - tail,
|
490
499
|
ctx->samples, ctx->samplesSizes, nbFinalizeSamples, ctx->nbTrainSamples, ctx->nbSamples, parameters, ctx->offsets,
|
491
500
|
totalCompressedSize);
|
492
501
|
|
@@ -547,7 +556,7 @@ ZDICT_trainFromBuffer_fastCover(void* dictBuffer, size_t dictBufferCapacity,
|
|
547
556
|
ZDICT_cover_params_t coverParams;
|
548
557
|
FASTCOVER_accel_t accelParams;
|
549
558
|
/* Initialize global data */
|
550
|
-
g_displayLevel = parameters.zParams.notificationLevel;
|
559
|
+
g_displayLevel = (int)parameters.zParams.notificationLevel;
|
551
560
|
/* Assign splitPoint and f if not provided */
|
552
561
|
parameters.splitPoint = 1.0;
|
553
562
|
parameters.f = parameters.f == 0 ? DEFAULT_F : parameters.f;
|
@@ -617,7 +626,7 @@ ZDICT_optimizeTrainFromBuffer_fastCover(
|
|
617
626
|
/* constants */
|
618
627
|
const unsigned nbThreads = parameters->nbThreads;
|
619
628
|
const double splitPoint =
|
620
|
-
parameters->splitPoint <= 0.0 ?
|
629
|
+
parameters->splitPoint <= 0.0 ? FASTCOVER_DEFAULT_SPLITPOINT : parameters->splitPoint;
|
621
630
|
const unsigned kMinD = parameters->d == 0 ? 6 : parameters->d;
|
622
631
|
const unsigned kMaxD = parameters->d == 0 ? 8 : parameters->d;
|
623
632
|
const unsigned kMinK = parameters->k == 0 ? 50 : parameters->k;
|
@@ -630,7 +639,7 @@ ZDICT_optimizeTrainFromBuffer_fastCover(
|
|
630
639
|
const unsigned accel = parameters->accel == 0 ? DEFAULT_ACCEL : parameters->accel;
|
631
640
|
const unsigned shrinkDict = 0;
|
632
641
|
/* Local variables */
|
633
|
-
const int displayLevel = parameters->zParams.notificationLevel;
|
642
|
+
const int displayLevel = (int)parameters->zParams.notificationLevel;
|
634
643
|
unsigned iteration = 1;
|
635
644
|
unsigned d;
|
636
645
|
unsigned k;
|
@@ -714,7 +723,7 @@ ZDICT_optimizeTrainFromBuffer_fastCover(
|
|
714
723
|
data->parameters.splitPoint = splitPoint;
|
715
724
|
data->parameters.steps = kSteps;
|
716
725
|
data->parameters.shrinkDict = shrinkDict;
|
717
|
-
data->parameters.zParams.notificationLevel = g_displayLevel;
|
726
|
+
data->parameters.zParams.notificationLevel = (unsigned)g_displayLevel;
|
718
727
|
/* Check the parameters */
|
719
728
|
if (!FASTCOVER_checkParameters(data->parameters, dictBufferCapacity,
|
720
729
|
data->ctx->f, accel)) {
|