extlz4 0.2.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/HISTORY.ja.md +116 -0
- data/LICENSE +24 -0
- data/README.md +203 -0
- data/Rakefile +212 -0
- data/bin/extlz4 +220 -0
- data/contrib/lz4/INSTALL +15 -0
- data/contrib/lz4/LICENSE +11 -0
- data/contrib/lz4/NEWS +231 -0
- data/contrib/lz4/README.md +114 -0
- data/contrib/lz4/circle.yml +39 -0
- data/contrib/lz4/lib/LICENSE +24 -0
- data/contrib/lz4/lib/README.md +73 -0
- data/contrib/lz4/lib/liblz4.pc.in +14 -0
- data/contrib/lz4/lib/lz4.c +1478 -0
- data/contrib/lz4/lib/lz4.h +463 -0
- data/contrib/lz4/lib/lz4frame.c +1669 -0
- data/contrib/lz4/lib/lz4frame.h +391 -0
- data/contrib/lz4/lib/lz4frame_static.h +143 -0
- data/contrib/lz4/lib/lz4hc.c +807 -0
- data/contrib/lz4/lib/lz4hc.h +278 -0
- data/contrib/lz4/lib/lz4opt.h +366 -0
- data/contrib/lz4/lib/xxhash.c +894 -0
- data/contrib/lz4/lib/xxhash.h +293 -0
- data/examples/frameapi.rb +43 -0
- data/ext/blockapi.c +1046 -0
- data/ext/depend +4 -0
- data/ext/extconf.rb +60 -0
- data/ext/extlz4.c +69 -0
- data/ext/extlz4.h +109 -0
- data/ext/frameapi.c +780 -0
- data/ext/hashargs.c +151 -0
- data/ext/hashargs.h +110 -0
- data/ext/lz4_amalgam.c +31 -0
- data/gemstub.rb +40 -0
- data/lib/extlz4.rb +327 -0
- data/lib/extlz4/compat.rb +12 -0
- data/lib/extlz4/fix-0.1bug.rb +96 -0
- data/lib/extlz4/oldstream.rb +529 -0
- data/lib/extlz4/version.rb +3 -0
- data/test/common.rb +18 -0
- data/test/test_blockapi.rb +105 -0
- data/test/test_frameapi.rb +59 -0
- metadata +126 -0
@@ -0,0 +1,278 @@
|
|
1
|
+
/*
|
2
|
+
LZ4 HC - High Compression Mode of LZ4
|
3
|
+
Header File
|
4
|
+
Copyright (C) 2011-2017, Yann Collet.
|
5
|
+
BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
|
6
|
+
|
7
|
+
Redistribution and use in source and binary forms, with or without
|
8
|
+
modification, are permitted provided that the following conditions are
|
9
|
+
met:
|
10
|
+
|
11
|
+
* Redistributions of source code must retain the above copyright
|
12
|
+
notice, this list of conditions and the following disclaimer.
|
13
|
+
* Redistributions in binary form must reproduce the above
|
14
|
+
copyright notice, this list of conditions and the following disclaimer
|
15
|
+
in the documentation and/or other materials provided with the
|
16
|
+
distribution.
|
17
|
+
|
18
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
19
|
+
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
20
|
+
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
21
|
+
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
22
|
+
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
23
|
+
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
24
|
+
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
25
|
+
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
26
|
+
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
27
|
+
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
28
|
+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
29
|
+
|
30
|
+
You can contact the author at :
|
31
|
+
- LZ4 source repository : https://github.com/lz4/lz4
|
32
|
+
- LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c
|
33
|
+
*/
|
34
|
+
#ifndef LZ4_HC_H_19834876238432
|
35
|
+
#define LZ4_HC_H_19834876238432
|
36
|
+
|
37
|
+
#if defined (__cplusplus)
|
38
|
+
extern "C" {
|
39
|
+
#endif
|
40
|
+
|
41
|
+
/* --- Dependency --- */
|
42
|
+
/* note : lz4hc is not an independent module, it requires lz4.h/lz4.c for proper compilation */
|
43
|
+
#include "lz4.h" /* stddef, LZ4LIB_API, LZ4_DEPRECATED */
|
44
|
+
|
45
|
+
|
46
|
+
/* --- Useful constants --- */
|
47
|
+
#define LZ4HC_CLEVEL_MIN 3
|
48
|
+
#define LZ4HC_CLEVEL_DEFAULT 9
|
49
|
+
#define LZ4HC_CLEVEL_OPT_MIN 11
|
50
|
+
#define LZ4HC_CLEVEL_MAX 12
|
51
|
+
|
52
|
+
|
53
|
+
/*-************************************
|
54
|
+
* Block Compression
|
55
|
+
**************************************/
|
56
|
+
/*! LZ4_compress_HC() :
|
57
|
+
* Compress data from `src` into `dst`, using the more powerful but slower "HC" algorithm.
|
58
|
+
* `dst` must be already allocated.
|
59
|
+
* Compression is guaranteed to succeed if `dstCapacity >= LZ4_compressBound(srcSize)` (see "lz4.h")
|
60
|
+
* Max supported `srcSize` value is LZ4_MAX_INPUT_SIZE (see "lz4.h")
|
61
|
+
* `compressionLevel` : Recommended values are between 4 and 9, although any value between 1 and LZ4HC_CLEVEL_MAX will work.
|
62
|
+
* Values >LZ4HC_CLEVEL_MAX behave the same as LZ4HC_CLEVEL_MAX.
|
63
|
+
* @return : the number of bytes written into 'dst'
|
64
|
+
* or 0 if compression fails.
|
65
|
+
*/
|
66
|
+
LZ4LIB_API int LZ4_compress_HC (const char* src, char* dst, int srcSize, int dstCapacity, int compressionLevel);
|
67
|
+
|
68
|
+
|
69
|
+
/* Note :
|
70
|
+
* Decompression functions are provided within "lz4.h" (BSD license)
|
71
|
+
*/
|
72
|
+
|
73
|
+
|
74
|
+
/*! LZ4_compress_HC_extStateHC() :
|
75
|
+
* Same as LZ4_compress_HC(), but using an externally allocated memory segment for `state`.
|
76
|
+
* `state` size is provided by LZ4_sizeofStateHC().
|
77
|
+
* Memory segment must be aligned on 8-bytes boundaries (which a normal malloc() will do properly).
|
78
|
+
*/
|
79
|
+
LZ4LIB_API int LZ4_compress_HC_extStateHC(void* state, const char* src, char* dst, int srcSize, int maxDstSize, int compressionLevel);
|
80
|
+
LZ4LIB_API int LZ4_sizeofStateHC(void);
|
81
|
+
|
82
|
+
|
83
|
+
/*-************************************
|
84
|
+
* Streaming Compression
|
85
|
+
* Bufferless synchronous API
|
86
|
+
**************************************/
|
87
|
+
typedef union LZ4_streamHC_u LZ4_streamHC_t; /* incomplete type (defined later) */
|
88
|
+
|
89
|
+
/*! LZ4_createStreamHC() and LZ4_freeStreamHC() :
|
90
|
+
* These functions create and release memory for LZ4 HC streaming state.
|
91
|
+
* Newly created states are automatically initialized.
|
92
|
+
* Existing states can be re-used several times, using LZ4_resetStreamHC().
|
93
|
+
* These methods are API and ABI stable, they can be used in combination with a DLL.
|
94
|
+
*/
|
95
|
+
LZ4LIB_API LZ4_streamHC_t* LZ4_createStreamHC(void);
|
96
|
+
LZ4LIB_API int LZ4_freeStreamHC (LZ4_streamHC_t* streamHCPtr);
|
97
|
+
|
98
|
+
LZ4LIB_API void LZ4_resetStreamHC (LZ4_streamHC_t* streamHCPtr, int compressionLevel);
|
99
|
+
LZ4LIB_API int LZ4_loadDictHC (LZ4_streamHC_t* streamHCPtr, const char* dictionary, int dictSize);
|
100
|
+
|
101
|
+
LZ4LIB_API int LZ4_compress_HC_continue (LZ4_streamHC_t* streamHCPtr, const char* src, char* dst, int srcSize, int maxDstSize);
|
102
|
+
|
103
|
+
LZ4LIB_API int LZ4_saveDictHC (LZ4_streamHC_t* streamHCPtr, char* safeBuffer, int maxDictSize);
|
104
|
+
|
105
|
+
/*
|
106
|
+
These functions compress data in successive blocks of any size, using previous blocks as dictionary.
|
107
|
+
One key assumption is that previous blocks (up to 64 KB) remain read-accessible while compressing next blocks.
|
108
|
+
There is an exception for ring buffers, which can be smaller than 64 KB.
|
109
|
+
Ring buffers scenario is automatically detected and handled by LZ4_compress_HC_continue().
|
110
|
+
|
111
|
+
Before starting compression, state must be properly initialized, using LZ4_resetStreamHC().
|
112
|
+
A first "fictional block" can then be designated as initial dictionary, using LZ4_loadDictHC() (Optional).
|
113
|
+
|
114
|
+
Then, use LZ4_compress_HC_continue() to compress each successive block.
|
115
|
+
Previous memory blocks (including initial dictionary when present) must remain accessible and unmodified during compression.
|
116
|
+
'dst' buffer should be sized to handle worst case scenarios (see LZ4_compressBound()), to ensure operation success.
|
117
|
+
Because in case of failure, the API does not guarantee context recovery, and context will have to be reset.
|
118
|
+
If `dst` buffer budget cannot be >= LZ4_compressBound(), consider using LZ4_compress_HC_continue_destSize() instead.
|
119
|
+
|
120
|
+
If, for any reason, previous data block can't be preserved unmodified in memory for next compression block,
|
121
|
+
you can save it to a more stable memory space, using LZ4_saveDictHC().
|
122
|
+
Return value of LZ4_saveDictHC() is the size of dictionary effectively saved into 'safeBuffer'.
|
123
|
+
*/
|
124
|
+
|
125
|
+
|
126
|
+
/*-*************************************
|
127
|
+
* PRIVATE DEFINITIONS :
|
128
|
+
* Do not use these definitions.
|
129
|
+
* They are exposed to allow static allocation of `LZ4_streamHC_t`.
|
130
|
+
* Using these definitions makes the code vulnerable to potential API break when upgrading LZ4
|
131
|
+
**************************************/
|
132
|
+
#define LZ4HC_DICTIONARY_LOGSIZE 17 /* because of btopt, hc would only need 16 */
|
133
|
+
#define LZ4HC_MAXD (1<<LZ4HC_DICTIONARY_LOGSIZE)
|
134
|
+
#define LZ4HC_MAXD_MASK (LZ4HC_MAXD - 1)
|
135
|
+
|
136
|
+
#define LZ4HC_HASH_LOG 15
|
137
|
+
#define LZ4HC_HASHTABLESIZE (1 << LZ4HC_HASH_LOG)
|
138
|
+
#define LZ4HC_HASH_MASK (LZ4HC_HASHTABLESIZE - 1)
|
139
|
+
|
140
|
+
|
141
|
+
#if defined(__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
|
142
|
+
#include <stdint.h>
|
143
|
+
|
144
|
+
typedef struct
|
145
|
+
{
|
146
|
+
uint32_t hashTable[LZ4HC_HASHTABLESIZE];
|
147
|
+
uint16_t chainTable[LZ4HC_MAXD];
|
148
|
+
const uint8_t* end; /* next block here to continue on current prefix */
|
149
|
+
const uint8_t* base; /* All index relative to this position */
|
150
|
+
const uint8_t* dictBase; /* alternate base for extDict */
|
151
|
+
uint8_t* inputBuffer; /* deprecated */
|
152
|
+
uint32_t dictLimit; /* below that point, need extDict */
|
153
|
+
uint32_t lowLimit; /* below that point, no more dict */
|
154
|
+
uint32_t nextToUpdate; /* index from which to continue dictionary update */
|
155
|
+
uint32_t searchNum; /* only for optimal parser */
|
156
|
+
uint32_t compressionLevel;
|
157
|
+
} LZ4HC_CCtx_internal;
|
158
|
+
|
159
|
+
#else
|
160
|
+
|
161
|
+
typedef struct
|
162
|
+
{
|
163
|
+
unsigned int hashTable[LZ4HC_HASHTABLESIZE];
|
164
|
+
unsigned short chainTable[LZ4HC_MAXD];
|
165
|
+
const unsigned char* end; /* next block here to continue on current prefix */
|
166
|
+
const unsigned char* base; /* All index relative to this position */
|
167
|
+
const unsigned char* dictBase; /* alternate base for extDict */
|
168
|
+
unsigned char* inputBuffer; /* deprecated */
|
169
|
+
unsigned int dictLimit; /* below that point, need extDict */
|
170
|
+
unsigned int lowLimit; /* below that point, no more dict */
|
171
|
+
unsigned int nextToUpdate; /* index from which to continue dictionary update */
|
172
|
+
unsigned int searchNum; /* only for optimal parser */
|
173
|
+
int compressionLevel;
|
174
|
+
} LZ4HC_CCtx_internal;
|
175
|
+
|
176
|
+
#endif
|
177
|
+
|
178
|
+
#define LZ4_STREAMHCSIZE (4*LZ4HC_HASHTABLESIZE + 2*LZ4HC_MAXD + 56) /* 393268 */
|
179
|
+
#define LZ4_STREAMHCSIZE_SIZET (LZ4_STREAMHCSIZE / sizeof(size_t))
|
180
|
+
union LZ4_streamHC_u {
|
181
|
+
size_t table[LZ4_STREAMHCSIZE_SIZET];
|
182
|
+
LZ4HC_CCtx_internal internal_donotuse;
|
183
|
+
}; /* previously typedef'd to LZ4_streamHC_t */
|
184
|
+
/*
|
185
|
+
LZ4_streamHC_t :
|
186
|
+
This structure allows static allocation of LZ4 HC streaming state.
|
187
|
+
State must be initialized using LZ4_resetStreamHC() before first use.
|
188
|
+
|
189
|
+
Static allocation shall only be used in combination with static linking.
|
190
|
+
When invoking LZ4 from a DLL, use create/free functions instead, which are API and ABI stable.
|
191
|
+
*/
|
192
|
+
|
193
|
+
|
194
|
+
/*-************************************
|
195
|
+
* Deprecated Functions
|
196
|
+
**************************************/
|
197
|
+
/* see lz4.h LZ4_DISABLE_DEPRECATE_WARNINGS to turn off deprecation warnings */
|
198
|
+
|
199
|
+
/* deprecated compression functions */
|
200
|
+
/* these functions will trigger warning messages in future releases */
|
201
|
+
LZ4LIB_API LZ4_DEPRECATED("use LZ4_compress_HC() instead") int LZ4_compressHC (const char* source, char* dest, int inputSize);
|
202
|
+
LZ4LIB_API LZ4_DEPRECATED("use LZ4_compress_HC() instead") int LZ4_compressHC_limitedOutput (const char* source, char* dest, int inputSize, int maxOutputSize);
|
203
|
+
LZ4LIB_API LZ4_DEPRECATED("use LZ4_compress_HC() instead") int LZ4_compressHC2 (const char* source, char* dest, int inputSize, int compressionLevel);
|
204
|
+
LZ4LIB_API LZ4_DEPRECATED("use LZ4_compress_HC() instead") int LZ4_compressHC2_limitedOutput (const char* source, char* dest, int inputSize, int maxOutputSize, int compressionLevel);
|
205
|
+
LZ4LIB_API LZ4_DEPRECATED("use LZ4_compress_HC_extStateHC() instead") int LZ4_compressHC_withStateHC (void* state, const char* source, char* dest, int inputSize);
|
206
|
+
LZ4LIB_API LZ4_DEPRECATED("use LZ4_compress_HC_extStateHC() instead") int LZ4_compressHC_limitedOutput_withStateHC (void* state, const char* source, char* dest, int inputSize, int maxOutputSize);
|
207
|
+
LZ4LIB_API LZ4_DEPRECATED("use LZ4_compress_HC_extStateHC() instead") int LZ4_compressHC2_withStateHC (void* state, const char* source, char* dest, int inputSize, int compressionLevel);
|
208
|
+
LZ4LIB_API LZ4_DEPRECATED("use LZ4_compress_HC_extStateHC() instead") int LZ4_compressHC2_limitedOutput_withStateHC(void* state, const char* source, char* dest, int inputSize, int maxOutputSize, int compressionLevel);
|
209
|
+
LZ4LIB_API LZ4_DEPRECATED("use LZ4_compress_HC_continue() instead") int LZ4_compressHC_continue (LZ4_streamHC_t* LZ4_streamHCPtr, const char* source, char* dest, int inputSize);
|
210
|
+
LZ4LIB_API LZ4_DEPRECATED("use LZ4_compress_HC_continue() instead") int LZ4_compressHC_limitedOutput_continue (LZ4_streamHC_t* LZ4_streamHCPtr, const char* source, char* dest, int inputSize, int maxOutputSize);
|
211
|
+
|
212
|
+
/* Deprecated Streaming functions using older model; should no longer be used */
|
213
|
+
LZ4LIB_API LZ4_DEPRECATED("use LZ4_createStreamHC() instead") void* LZ4_createHC (char* inputBuffer);
|
214
|
+
LZ4LIB_API LZ4_DEPRECATED("use LZ4_saveDictHC() instead") char* LZ4_slideInputBufferHC (void* LZ4HC_Data);
|
215
|
+
LZ4LIB_API LZ4_DEPRECATED("use LZ4_freeStreamHC() instead") int LZ4_freeHC (void* LZ4HC_Data);
|
216
|
+
LZ4LIB_API LZ4_DEPRECATED("use LZ4_compress_HC_continue() instead") int LZ4_compressHC2_continue (void* LZ4HC_Data, const char* source, char* dest, int inputSize, int compressionLevel);
|
217
|
+
LZ4LIB_API LZ4_DEPRECATED("use LZ4_compress_HC_continue() instead") int LZ4_compressHC2_limitedOutput_continue (void* LZ4HC_Data, const char* source, char* dest, int inputSize, int maxOutputSize, int compressionLevel);
|
218
|
+
LZ4LIB_API LZ4_DEPRECATED("use LZ4_createStreamHC() instead") int LZ4_sizeofStreamStateHC(void);
|
219
|
+
LZ4LIB_API LZ4_DEPRECATED("use LZ4_resetStreamHC() instead") int LZ4_resetStreamStateHC(void* state, char* inputBuffer);
|
220
|
+
|
221
|
+
|
222
|
+
#if defined (__cplusplus)
|
223
|
+
}
|
224
|
+
#endif
|
225
|
+
|
226
|
+
#endif /* LZ4_HC_H_19834876238432 */
|
227
|
+
|
228
|
+
/*-************************************************
|
229
|
+
* !!!!! STATIC LINKING ONLY !!!!!
|
230
|
+
* Following definitions are considered experimental.
|
231
|
+
* They should not be linked from DLL,
|
232
|
+
* as there is no guarantee of API stability yet.
|
233
|
+
* Prototypes will be promoted to "stable" status
|
234
|
+
* after successfull usage in real-life scenarios.
|
235
|
+
*************************************************/
|
236
|
+
#ifdef LZ4_HC_STATIC_LINKING_ONLY /* protection macro */
|
237
|
+
#ifndef LZ4_HC_SLO_098092834
|
238
|
+
#define LZ4_HC_SLO_098092834
|
239
|
+
|
240
|
+
/*! LZ4_compress_HC_destSize() : v1.8.0 (experimental)
|
241
|
+
* Will try to compress as much data from `src` as possible
|
242
|
+
* that can fit into `targetDstSize` budget.
|
243
|
+
* Result is provided in 2 parts :
|
244
|
+
* @return : the number of bytes written into 'dst'
|
245
|
+
* or 0 if compression fails.
|
246
|
+
* `srcSizePtr` : value will be updated to indicate how much bytes were read from `src`
|
247
|
+
*/
|
248
|
+
int LZ4_compress_HC_destSize(void* LZ4HC_Data,
|
249
|
+
const char* src, char* dst,
|
250
|
+
int* srcSizePtr, int targetDstSize,
|
251
|
+
int compressionLevel);
|
252
|
+
|
253
|
+
/*! LZ4_compress_HC_continue_destSize() : v1.8.0 (experimental)
|
254
|
+
* Similar as LZ4_compress_HC_continue(),
|
255
|
+
* but will read a variable nb of bytes from `src`
|
256
|
+
* to fit into `targetDstSize` budget.
|
257
|
+
* Result is provided in 2 parts :
|
258
|
+
* @return : the number of bytes written into 'dst'
|
259
|
+
* or 0 if compression fails.
|
260
|
+
* `srcSizePtr` : value will be updated to indicate how much bytes were read from `src`.
|
261
|
+
* Important : due to limitations, this prototype only works well up to cLevel < LZ4HC_CLEVEL_OPT_MIN
|
262
|
+
* beyond that level, compression performance will be much reduced due to internal incompatibilities
|
263
|
+
*/
|
264
|
+
int LZ4_compress_HC_continue_destSize(LZ4_streamHC_t* LZ4_streamHCPtr,
|
265
|
+
const char* src, char* dst,
|
266
|
+
int* srcSizePtr, int targetDstSize);
|
267
|
+
|
268
|
+
/*! LZ4_setCompressionLevel() : v1.8.0 (experimental)
|
269
|
+
* It's possible to change compression level after LZ4_resetStreamHC(), between 2 invocations of LZ4_compress_HC_continue*(),
|
270
|
+
* but that requires to stay in the same mode (aka 1-10 or 11-12).
|
271
|
+
* This function ensures this condition.
|
272
|
+
*/
|
273
|
+
void LZ4_setCompressionLevel(LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel);
|
274
|
+
|
275
|
+
|
276
|
+
|
277
|
+
#endif /* LZ4_HC_SLO_098092834 */
|
278
|
+
#endif /* LZ4_HC_STATIC_LINKING_ONLY */
|
@@ -0,0 +1,366 @@
|
|
1
|
+
/*
|
2
|
+
lz4opt.h - Optimal Mode of LZ4
|
3
|
+
Copyright (C) 2015-2017, Przemyslaw Skibinski <inikep@gmail.com>
|
4
|
+
Note : this file is intended to be included within lz4hc.c
|
5
|
+
|
6
|
+
BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
|
7
|
+
|
8
|
+
Redistribution and use in source and binary forms, with or without
|
9
|
+
modification, are permitted provided that the following conditions are
|
10
|
+
met:
|
11
|
+
|
12
|
+
* Redistributions of source code must retain the above copyright
|
13
|
+
notice, this list of conditions and the following disclaimer.
|
14
|
+
* Redistributions in binary form must reproduce the above
|
15
|
+
copyright notice, this list of conditions and the following disclaimer
|
16
|
+
in the documentation and/or other materials provided with the
|
17
|
+
distribution.
|
18
|
+
|
19
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
20
|
+
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
21
|
+
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
22
|
+
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
23
|
+
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
24
|
+
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
25
|
+
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
26
|
+
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
27
|
+
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
28
|
+
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
29
|
+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
30
|
+
|
31
|
+
You can contact the author at :
|
32
|
+
- LZ4 source repository : https://github.com/lz4/lz4
|
33
|
+
- LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c
|
34
|
+
*/
|
35
|
+
|
36
|
+
#define LZ4_OPT_NUM (1<<12)
|
37
|
+
|
38
|
+
|
39
|
+
typedef struct {
|
40
|
+
int off;
|
41
|
+
int len;
|
42
|
+
} LZ4HC_match_t;
|
43
|
+
|
44
|
+
typedef struct {
|
45
|
+
int price;
|
46
|
+
int off;
|
47
|
+
int mlen;
|
48
|
+
int litlen;
|
49
|
+
} LZ4HC_optimal_t;
|
50
|
+
|
51
|
+
|
52
|
+
/* price in bytes */
|
53
|
+
FORCE_INLINE size_t LZ4HC_literalsPrice(size_t litlen)
|
54
|
+
{
|
55
|
+
size_t price = litlen;
|
56
|
+
if (litlen >= (size_t)RUN_MASK)
|
57
|
+
price += 1 + (litlen-RUN_MASK)/255;
|
58
|
+
return price;
|
59
|
+
}
|
60
|
+
|
61
|
+
|
62
|
+
/* requires mlen >= MINMATCH */
|
63
|
+
FORCE_INLINE size_t LZ4HC_sequencePrice(size_t litlen, size_t mlen)
|
64
|
+
{
|
65
|
+
size_t price = 2 + 1; /* 16-bit offset + token */
|
66
|
+
|
67
|
+
price += LZ4HC_literalsPrice(litlen);
|
68
|
+
|
69
|
+
if (mlen >= (size_t)(ML_MASK+MINMATCH))
|
70
|
+
price+= 1 + (mlen-(ML_MASK+MINMATCH))/255;
|
71
|
+
|
72
|
+
return price;
|
73
|
+
}
|
74
|
+
|
75
|
+
|
76
|
+
/*-*************************************
|
77
|
+
* Binary Tree search
|
78
|
+
***************************************/
|
79
|
+
FORCE_INLINE int LZ4HC_BinTree_InsertAndGetAllMatches (
|
80
|
+
LZ4HC_CCtx_internal* ctx,
|
81
|
+
const BYTE* const ip,
|
82
|
+
const BYTE* const iHighLimit,
|
83
|
+
size_t best_mlen,
|
84
|
+
LZ4HC_match_t* matches,
|
85
|
+
int* matchNum)
|
86
|
+
{
|
87
|
+
U16* const chainTable = ctx->chainTable;
|
88
|
+
U32* const HashTable = ctx->hashTable;
|
89
|
+
const BYTE* const base = ctx->base;
|
90
|
+
const U32 dictLimit = ctx->dictLimit;
|
91
|
+
const U32 current = (U32)(ip - base);
|
92
|
+
const U32 lowLimit = (ctx->lowLimit + MAX_DISTANCE > current) ? ctx->lowLimit : current - (MAX_DISTANCE - 1);
|
93
|
+
const BYTE* const dictBase = ctx->dictBase;
|
94
|
+
const BYTE* match;
|
95
|
+
int nbAttempts = ctx->searchNum;
|
96
|
+
int mnum = 0;
|
97
|
+
U16 *ptr0, *ptr1, delta0, delta1;
|
98
|
+
U32 matchIndex;
|
99
|
+
size_t matchLength = 0;
|
100
|
+
U32* HashPos;
|
101
|
+
|
102
|
+
if (ip + MINMATCH > iHighLimit) return 1;
|
103
|
+
|
104
|
+
/* HC4 match finder */
|
105
|
+
HashPos = &HashTable[LZ4HC_hashPtr(ip)];
|
106
|
+
matchIndex = *HashPos;
|
107
|
+
*HashPos = current;
|
108
|
+
|
109
|
+
ptr0 = &DELTANEXTMAXD(current*2+1);
|
110
|
+
ptr1 = &DELTANEXTMAXD(current*2);
|
111
|
+
delta0 = delta1 = (U16)(current - matchIndex);
|
112
|
+
|
113
|
+
while ((matchIndex < current) && (matchIndex>=lowLimit) && (nbAttempts)) {
|
114
|
+
nbAttempts--;
|
115
|
+
if (matchIndex >= dictLimit) {
|
116
|
+
match = base + matchIndex;
|
117
|
+
matchLength = LZ4_count(ip, match, iHighLimit);
|
118
|
+
} else {
|
119
|
+
const BYTE* vLimit = ip + (dictLimit - matchIndex);
|
120
|
+
match = dictBase + matchIndex;
|
121
|
+
if (vLimit > iHighLimit) vLimit = iHighLimit;
|
122
|
+
matchLength = LZ4_count(ip, match, vLimit);
|
123
|
+
if ((ip+matchLength == vLimit) && (vLimit < iHighLimit))
|
124
|
+
matchLength += LZ4_count(ip+matchLength, base+dictLimit, iHighLimit);
|
125
|
+
if (matchIndex+matchLength >= dictLimit)
|
126
|
+
match = base + matchIndex; /* to prepare for next usage of match[matchLength] */
|
127
|
+
}
|
128
|
+
|
129
|
+
if (matchLength > best_mlen) {
|
130
|
+
best_mlen = matchLength;
|
131
|
+
if (matches) {
|
132
|
+
if (matchIndex >= dictLimit)
|
133
|
+
matches[mnum].off = (int)(ip - match);
|
134
|
+
else
|
135
|
+
matches[mnum].off = (int)(ip - (base + matchIndex)); /* virtual matchpos */
|
136
|
+
matches[mnum].len = (int)matchLength;
|
137
|
+
mnum++;
|
138
|
+
}
|
139
|
+
if (best_mlen > LZ4_OPT_NUM) break;
|
140
|
+
}
|
141
|
+
|
142
|
+
if (ip+matchLength >= iHighLimit) /* equal : no way to know if inf or sup */
|
143
|
+
break; /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt the tree */
|
144
|
+
|
145
|
+
DEBUGLOG(6, "ip :%016llX", (U64)ip);
|
146
|
+
DEBUGLOG(6, "match:%016llX", (U64)match);
|
147
|
+
if (*(ip+matchLength) < *(match+matchLength)) {
|
148
|
+
*ptr0 = delta0;
|
149
|
+
ptr0 = &DELTANEXTMAXD(matchIndex*2);
|
150
|
+
if (*ptr0 == (U16)-1) break;
|
151
|
+
delta0 = *ptr0;
|
152
|
+
delta1 += delta0;
|
153
|
+
matchIndex -= delta0;
|
154
|
+
} else {
|
155
|
+
*ptr1 = delta1;
|
156
|
+
ptr1 = &DELTANEXTMAXD(matchIndex*2+1);
|
157
|
+
if (*ptr1 == (U16)-1) break;
|
158
|
+
delta1 = *ptr1;
|
159
|
+
delta0 += delta1;
|
160
|
+
matchIndex -= delta1;
|
161
|
+
}
|
162
|
+
}
|
163
|
+
|
164
|
+
*ptr0 = (U16)-1;
|
165
|
+
*ptr1 = (U16)-1;
|
166
|
+
if (matchNum) *matchNum = mnum;
|
167
|
+
/* if (best_mlen > 8) return best_mlen-8; */
|
168
|
+
if (!matchNum) return 1;
|
169
|
+
return 1;
|
170
|
+
}
|
171
|
+
|
172
|
+
|
173
|
+
FORCE_INLINE void LZ4HC_updateBinTree(LZ4HC_CCtx_internal* ctx, const BYTE* const ip, const BYTE* const iHighLimit)
|
174
|
+
{
|
175
|
+
const BYTE* const base = ctx->base;
|
176
|
+
const U32 target = (U32)(ip - base);
|
177
|
+
U32 idx = ctx->nextToUpdate;
|
178
|
+
while(idx < target)
|
179
|
+
idx += LZ4HC_BinTree_InsertAndGetAllMatches(ctx, base+idx, iHighLimit, 8, NULL, NULL);
|
180
|
+
}
|
181
|
+
|
182
|
+
|
183
|
+
/** Tree updater, providing best match */
|
184
|
+
FORCE_INLINE int LZ4HC_BinTree_GetAllMatches (
|
185
|
+
LZ4HC_CCtx_internal* ctx,
|
186
|
+
const BYTE* const ip, const BYTE* const iHighLimit,
|
187
|
+
size_t best_mlen, LZ4HC_match_t* matches, const int fullUpdate)
|
188
|
+
{
|
189
|
+
int mnum = 0;
|
190
|
+
if (ip < ctx->base + ctx->nextToUpdate) return 0; /* skipped area */
|
191
|
+
if (fullUpdate) LZ4HC_updateBinTree(ctx, ip, iHighLimit);
|
192
|
+
best_mlen = LZ4HC_BinTree_InsertAndGetAllMatches(ctx, ip, iHighLimit, best_mlen, matches, &mnum);
|
193
|
+
ctx->nextToUpdate = (U32)(ip - ctx->base + best_mlen);
|
194
|
+
return mnum;
|
195
|
+
}
|
196
|
+
|
197
|
+
|
198
|
+
#define SET_PRICE(pos, ml, offset, ll, cost) \
|
199
|
+
{ \
|
200
|
+
while (last_pos < pos) { opt[last_pos+1].price = 1<<30; last_pos++; } \
|
201
|
+
opt[pos].mlen = (int)ml; \
|
202
|
+
opt[pos].off = (int)offset; \
|
203
|
+
opt[pos].litlen = (int)ll; \
|
204
|
+
opt[pos].price = (int)cost; \
|
205
|
+
}
|
206
|
+
|
207
|
+
|
208
|
+
static int LZ4HC_compress_optimal (
|
209
|
+
LZ4HC_CCtx_internal* ctx,
|
210
|
+
const char* const source,
|
211
|
+
char* dest,
|
212
|
+
int inputSize,
|
213
|
+
int maxOutputSize,
|
214
|
+
limitedOutput_directive limit,
|
215
|
+
size_t sufficient_len,
|
216
|
+
const int fullUpdate
|
217
|
+
)
|
218
|
+
{
|
219
|
+
LZ4HC_optimal_t opt[LZ4_OPT_NUM + 1]; /* this uses a bit too much stack memory to my taste ... */
|
220
|
+
LZ4HC_match_t matches[LZ4_OPT_NUM + 1];
|
221
|
+
|
222
|
+
const BYTE* ip = (const BYTE*) source;
|
223
|
+
const BYTE* anchor = ip;
|
224
|
+
const BYTE* const iend = ip + inputSize;
|
225
|
+
const BYTE* const mflimit = iend - MFLIMIT;
|
226
|
+
const BYTE* const matchlimit = (iend - LASTLITERALS);
|
227
|
+
BYTE* op = (BYTE*) dest;
|
228
|
+
BYTE* const oend = op + maxOutputSize;
|
229
|
+
|
230
|
+
/* init */
|
231
|
+
DEBUGLOG(5, "LZ4HC_compress_optimal");
|
232
|
+
if (sufficient_len >= LZ4_OPT_NUM) sufficient_len = LZ4_OPT_NUM-1;
|
233
|
+
ctx->end += inputSize;
|
234
|
+
ip++;
|
235
|
+
|
236
|
+
/* Main Loop */
|
237
|
+
while (ip < mflimit) {
|
238
|
+
size_t const llen = ip - anchor;
|
239
|
+
size_t last_pos = 0;
|
240
|
+
size_t match_num, cur, best_mlen, best_off;
|
241
|
+
memset(opt, 0, sizeof(LZ4HC_optimal_t)); /* memset only the first one */
|
242
|
+
|
243
|
+
match_num = LZ4HC_BinTree_GetAllMatches(ctx, ip, matchlimit, MINMATCH-1, matches, fullUpdate);
|
244
|
+
if (!match_num) { ip++; continue; }
|
245
|
+
|
246
|
+
if ((size_t)matches[match_num-1].len > sufficient_len) {
|
247
|
+
/* good enough solution : immediate encoding */
|
248
|
+
best_mlen = matches[match_num-1].len;
|
249
|
+
best_off = matches[match_num-1].off;
|
250
|
+
cur = 0;
|
251
|
+
last_pos = 1;
|
252
|
+
goto encode;
|
253
|
+
}
|
254
|
+
|
255
|
+
/* set prices using matches at position = 0 */
|
256
|
+
{ size_t matchNb;
|
257
|
+
for (matchNb = 0; matchNb < match_num; matchNb++) {
|
258
|
+
size_t mlen = (matchNb>0) ? (size_t)matches[matchNb-1].len+1 : MINMATCH;
|
259
|
+
best_mlen = matches[matchNb].len; /* necessarily < sufficient_len < LZ4_OPT_NUM */
|
260
|
+
for ( ; mlen <= best_mlen ; mlen++) {
|
261
|
+
size_t const cost = LZ4HC_sequencePrice(llen, mlen) - LZ4HC_literalsPrice(llen);
|
262
|
+
SET_PRICE(mlen, mlen, matches[matchNb].off, 0, cost); /* updates last_pos and opt[pos] */
|
263
|
+
} } }
|
264
|
+
|
265
|
+
if (last_pos < MINMATCH) { ip++; continue; } /* note : on clang at least, this test improves performance */
|
266
|
+
|
267
|
+
/* check further positions */
|
268
|
+
opt[0].mlen = opt[1].mlen = 1;
|
269
|
+
for (cur = 1; cur <= last_pos; cur++) {
|
270
|
+
const BYTE* const curPtr = ip + cur;
|
271
|
+
|
272
|
+
/* establish baseline price if cur is literal */
|
273
|
+
{ size_t price, litlen;
|
274
|
+
if (opt[cur-1].mlen == 1) {
|
275
|
+
/* no match at previous position */
|
276
|
+
litlen = opt[cur-1].litlen + 1;
|
277
|
+
if (cur > litlen) {
|
278
|
+
price = opt[cur - litlen].price + LZ4HC_literalsPrice(litlen);
|
279
|
+
} else {
|
280
|
+
price = LZ4HC_literalsPrice(llen + litlen) - LZ4HC_literalsPrice(llen);
|
281
|
+
}
|
282
|
+
} else {
|
283
|
+
litlen = 1;
|
284
|
+
price = opt[cur - 1].price + LZ4HC_literalsPrice(1);
|
285
|
+
}
|
286
|
+
|
287
|
+
if (price < (size_t)opt[cur].price)
|
288
|
+
SET_PRICE(cur, 1 /*mlen*/, 0 /*off*/, litlen, price); /* note : increases last_pos */
|
289
|
+
}
|
290
|
+
|
291
|
+
if (cur == last_pos || curPtr >= mflimit) break;
|
292
|
+
|
293
|
+
match_num = LZ4HC_BinTree_GetAllMatches(ctx, curPtr, matchlimit, MINMATCH-1, matches, fullUpdate);
|
294
|
+
if ((match_num > 0) && (size_t)matches[match_num-1].len > sufficient_len) {
|
295
|
+
/* immediate encoding */
|
296
|
+
best_mlen = matches[match_num-1].len;
|
297
|
+
best_off = matches[match_num-1].off;
|
298
|
+
last_pos = cur + 1;
|
299
|
+
goto encode;
|
300
|
+
}
|
301
|
+
|
302
|
+
/* set prices using matches at position = cur */
|
303
|
+
{ size_t matchNb;
|
304
|
+
for (matchNb = 0; matchNb < match_num; matchNb++) {
|
305
|
+
size_t ml = (matchNb>0) ? (size_t)matches[matchNb-1].len+1 : MINMATCH;
|
306
|
+
best_mlen = (cur + matches[matchNb].len < LZ4_OPT_NUM) ?
|
307
|
+
(size_t)matches[matchNb].len : LZ4_OPT_NUM - cur;
|
308
|
+
|
309
|
+
for ( ; ml <= best_mlen ; ml++) {
|
310
|
+
size_t ll, price;
|
311
|
+
if (opt[cur].mlen == 1) {
|
312
|
+
ll = opt[cur].litlen;
|
313
|
+
if (cur > ll)
|
314
|
+
price = opt[cur - ll].price + LZ4HC_sequencePrice(ll, ml);
|
315
|
+
else
|
316
|
+
price = LZ4HC_sequencePrice(llen + ll, ml) - LZ4HC_literalsPrice(llen);
|
317
|
+
} else {
|
318
|
+
ll = 0;
|
319
|
+
price = opt[cur].price + LZ4HC_sequencePrice(0, ml);
|
320
|
+
}
|
321
|
+
|
322
|
+
if (cur + ml > last_pos || price < (size_t)opt[cur + ml].price) {
|
323
|
+
SET_PRICE(cur + ml, ml, matches[matchNb].off, ll, price);
|
324
|
+
} } } }
|
325
|
+
} /* for (cur = 1; cur <= last_pos; cur++) */
|
326
|
+
|
327
|
+
best_mlen = opt[last_pos].mlen;
|
328
|
+
best_off = opt[last_pos].off;
|
329
|
+
cur = last_pos - best_mlen;
|
330
|
+
|
331
|
+
encode: /* cur, last_pos, best_mlen, best_off must be set */
|
332
|
+
opt[0].mlen = 1;
|
333
|
+
while (1) { /* from end to beginning */
|
334
|
+
size_t const ml = opt[cur].mlen;
|
335
|
+
int const offset = opt[cur].off;
|
336
|
+
opt[cur].mlen = (int)best_mlen;
|
337
|
+
opt[cur].off = (int)best_off;
|
338
|
+
best_mlen = ml;
|
339
|
+
best_off = offset;
|
340
|
+
if (ml > cur) break; /* can this happen ? */
|
341
|
+
cur -= ml;
|
342
|
+
}
|
343
|
+
|
344
|
+
/* encode all recorded sequences */
|
345
|
+
cur = 0;
|
346
|
+
while (cur < last_pos) {
|
347
|
+
int const ml = opt[cur].mlen;
|
348
|
+
int const offset = opt[cur].off;
|
349
|
+
if (ml == 1) { ip++; cur++; continue; }
|
350
|
+
cur += ml;
|
351
|
+
if ( LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ip - offset, limit, oend) ) return 0;
|
352
|
+
}
|
353
|
+
} /* while (ip < mflimit) */
|
354
|
+
|
355
|
+
/* Encode Last Literals */
|
356
|
+
{ int lastRun = (int)(iend - anchor);
|
357
|
+
if ((limit) && (((char*)op - dest) + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > (U32)maxOutputSize)) return 0; /* Check output limit */
|
358
|
+
if (lastRun>=(int)RUN_MASK) { *op++=(RUN_MASK<<ML_BITS); lastRun-=RUN_MASK; for(; lastRun > 254 ; lastRun-=255) *op++ = 255; *op++ = (BYTE) lastRun; }
|
359
|
+
else *op++ = (BYTE)(lastRun<<ML_BITS);
|
360
|
+
memcpy(op, anchor, iend - anchor);
|
361
|
+
op += iend-anchor;
|
362
|
+
}
|
363
|
+
|
364
|
+
/* End */
|
365
|
+
return (int) ((char*)op-dest);
|
366
|
+
}
|