extlz4 0.2.4.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/HISTORY.ja.md +116 -0
- data/LICENSE +24 -0
- data/README.md +203 -0
- data/Rakefile +212 -0
- data/bin/extlz4 +220 -0
- data/contrib/lz4/INSTALL +15 -0
- data/contrib/lz4/LICENSE +11 -0
- data/contrib/lz4/NEWS +231 -0
- data/contrib/lz4/README.md +114 -0
- data/contrib/lz4/circle.yml +39 -0
- data/contrib/lz4/lib/LICENSE +24 -0
- data/contrib/lz4/lib/README.md +73 -0
- data/contrib/lz4/lib/liblz4.pc.in +14 -0
- data/contrib/lz4/lib/lz4.c +1478 -0
- data/contrib/lz4/lib/lz4.h +463 -0
- data/contrib/lz4/lib/lz4frame.c +1669 -0
- data/contrib/lz4/lib/lz4frame.h +391 -0
- data/contrib/lz4/lib/lz4frame_static.h +143 -0
- data/contrib/lz4/lib/lz4hc.c +807 -0
- data/contrib/lz4/lib/lz4hc.h +278 -0
- data/contrib/lz4/lib/lz4opt.h +366 -0
- data/contrib/lz4/lib/xxhash.c +894 -0
- data/contrib/lz4/lib/xxhash.h +293 -0
- data/examples/frameapi.rb +43 -0
- data/ext/blockapi.c +1046 -0
- data/ext/depend +4 -0
- data/ext/extconf.rb +60 -0
- data/ext/extlz4.c +69 -0
- data/ext/extlz4.h +109 -0
- data/ext/frameapi.c +780 -0
- data/ext/hashargs.c +151 -0
- data/ext/hashargs.h +110 -0
- data/ext/lz4_amalgam.c +31 -0
- data/gemstub.rb +40 -0
- data/lib/extlz4.rb +327 -0
- data/lib/extlz4/compat.rb +12 -0
- data/lib/extlz4/fix-0.1bug.rb +96 -0
- data/lib/extlz4/oldstream.rb +529 -0
- data/lib/extlz4/version.rb +3 -0
- data/test/common.rb +18 -0
- data/test/test_blockapi.rb +105 -0
- data/test/test_frameapi.rb +59 -0
- metadata +126 -0
@@ -0,0 +1,278 @@
|
|
1
|
+
/*
|
2
|
+
LZ4 HC - High Compression Mode of LZ4
|
3
|
+
Header File
|
4
|
+
Copyright (C) 2011-2017, Yann Collet.
|
5
|
+
BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
|
6
|
+
|
7
|
+
Redistribution and use in source and binary forms, with or without
|
8
|
+
modification, are permitted provided that the following conditions are
|
9
|
+
met:
|
10
|
+
|
11
|
+
* Redistributions of source code must retain the above copyright
|
12
|
+
notice, this list of conditions and the following disclaimer.
|
13
|
+
* Redistributions in binary form must reproduce the above
|
14
|
+
copyright notice, this list of conditions and the following disclaimer
|
15
|
+
in the documentation and/or other materials provided with the
|
16
|
+
distribution.
|
17
|
+
|
18
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
19
|
+
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
20
|
+
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
21
|
+
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
22
|
+
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
23
|
+
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
24
|
+
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
25
|
+
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
26
|
+
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
27
|
+
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
28
|
+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
29
|
+
|
30
|
+
You can contact the author at :
|
31
|
+
- LZ4 source repository : https://github.com/lz4/lz4
|
32
|
+
- LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c
|
33
|
+
*/
|
34
|
+
#ifndef LZ4_HC_H_19834876238432
|
35
|
+
#define LZ4_HC_H_19834876238432
|
36
|
+
|
37
|
+
#if defined (__cplusplus)
|
38
|
+
extern "C" {
|
39
|
+
#endif
|
40
|
+
|
41
|
+
/* --- Dependency --- */
|
42
|
+
/* note : lz4hc is not an independent module, it requires lz4.h/lz4.c for proper compilation */
|
43
|
+
#include "lz4.h" /* stddef, LZ4LIB_API, LZ4_DEPRECATED */
|
44
|
+
|
45
|
+
|
46
|
+
/* --- Useful constants --- */
|
47
|
+
#define LZ4HC_CLEVEL_MIN 3
|
48
|
+
#define LZ4HC_CLEVEL_DEFAULT 9
|
49
|
+
#define LZ4HC_CLEVEL_OPT_MIN 11
|
50
|
+
#define LZ4HC_CLEVEL_MAX 12
|
51
|
+
|
52
|
+
|
53
|
+
/*-************************************
|
54
|
+
* Block Compression
|
55
|
+
**************************************/
|
56
|
+
/*! LZ4_compress_HC() :
|
57
|
+
* Compress data from `src` into `dst`, using the more powerful but slower "HC" algorithm.
|
58
|
+
* `dst` must be already allocated.
|
59
|
+
* Compression is guaranteed to succeed if `dstCapacity >= LZ4_compressBound(srcSize)` (see "lz4.h")
|
60
|
+
* Max supported `srcSize` value is LZ4_MAX_INPUT_SIZE (see "lz4.h")
|
61
|
+
* `compressionLevel` : Recommended values are between 4 and 9, although any value between 1 and LZ4HC_CLEVEL_MAX will work.
|
62
|
+
* Values >LZ4HC_CLEVEL_MAX behave the same as LZ4HC_CLEVEL_MAX.
|
63
|
+
* @return : the number of bytes written into 'dst'
|
64
|
+
* or 0 if compression fails.
|
65
|
+
*/
|
66
|
+
LZ4LIB_API int LZ4_compress_HC (const char* src, char* dst, int srcSize, int dstCapacity, int compressionLevel);
|
67
|
+
|
68
|
+
|
69
|
+
/* Note :
|
70
|
+
* Decompression functions are provided within "lz4.h" (BSD license)
|
71
|
+
*/
|
72
|
+
|
73
|
+
|
74
|
+
/*! LZ4_compress_HC_extStateHC() :
|
75
|
+
* Same as LZ4_compress_HC(), but using an externally allocated memory segment for `state`.
|
76
|
+
* `state` size is provided by LZ4_sizeofStateHC().
|
77
|
+
* Memory segment must be aligned on 8-bytes boundaries (which a normal malloc() will do properly).
|
78
|
+
*/
|
79
|
+
LZ4LIB_API int LZ4_compress_HC_extStateHC(void* state, const char* src, char* dst, int srcSize, int maxDstSize, int compressionLevel);
|
80
|
+
LZ4LIB_API int LZ4_sizeofStateHC(void);
|
81
|
+
|
82
|
+
|
83
|
+
/*-************************************
|
84
|
+
* Streaming Compression
|
85
|
+
* Bufferless synchronous API
|
86
|
+
**************************************/
|
87
|
+
typedef union LZ4_streamHC_u LZ4_streamHC_t; /* incomplete type (defined later) */
|
88
|
+
|
89
|
+
/*! LZ4_createStreamHC() and LZ4_freeStreamHC() :
|
90
|
+
* These functions create and release memory for LZ4 HC streaming state.
|
91
|
+
* Newly created states are automatically initialized.
|
92
|
+
* Existing states can be re-used several times, using LZ4_resetStreamHC().
|
93
|
+
* These methods are API and ABI stable, they can be used in combination with a DLL.
|
94
|
+
*/
|
95
|
+
LZ4LIB_API LZ4_streamHC_t* LZ4_createStreamHC(void);
|
96
|
+
LZ4LIB_API int LZ4_freeStreamHC (LZ4_streamHC_t* streamHCPtr);
|
97
|
+
|
98
|
+
LZ4LIB_API void LZ4_resetStreamHC (LZ4_streamHC_t* streamHCPtr, int compressionLevel);
|
99
|
+
LZ4LIB_API int LZ4_loadDictHC (LZ4_streamHC_t* streamHCPtr, const char* dictionary, int dictSize);
|
100
|
+
|
101
|
+
LZ4LIB_API int LZ4_compress_HC_continue (LZ4_streamHC_t* streamHCPtr, const char* src, char* dst, int srcSize, int maxDstSize);
|
102
|
+
|
103
|
+
LZ4LIB_API int LZ4_saveDictHC (LZ4_streamHC_t* streamHCPtr, char* safeBuffer, int maxDictSize);
|
104
|
+
|
105
|
+
/*
|
106
|
+
These functions compress data in successive blocks of any size, using previous blocks as dictionary.
|
107
|
+
One key assumption is that previous blocks (up to 64 KB) remain read-accessible while compressing next blocks.
|
108
|
+
There is an exception for ring buffers, which can be smaller than 64 KB.
|
109
|
+
Ring buffers scenario is automatically detected and handled by LZ4_compress_HC_continue().
|
110
|
+
|
111
|
+
Before starting compression, state must be properly initialized, using LZ4_resetStreamHC().
|
112
|
+
A first "fictional block" can then be designated as initial dictionary, using LZ4_loadDictHC() (Optional).
|
113
|
+
|
114
|
+
Then, use LZ4_compress_HC_continue() to compress each successive block.
|
115
|
+
Previous memory blocks (including initial dictionary when present) must remain accessible and unmodified during compression.
|
116
|
+
'dst' buffer should be sized to handle worst case scenarios (see LZ4_compressBound()), to ensure operation success.
|
117
|
+
Because in case of failure, the API does not guarantee context recovery, and context will have to be reset.
|
118
|
+
If `dst` buffer budget cannot be >= LZ4_compressBound(), consider using LZ4_compress_HC_continue_destSize() instead.
|
119
|
+
|
120
|
+
If, for any reason, previous data block can't be preserved unmodified in memory for next compression block,
|
121
|
+
you can save it to a more stable memory space, using LZ4_saveDictHC().
|
122
|
+
Return value of LZ4_saveDictHC() is the size of dictionary effectively saved into 'safeBuffer'.
|
123
|
+
*/
|
124
|
+
|
125
|
+
|
126
|
+
/*-*************************************
|
127
|
+
* PRIVATE DEFINITIONS :
|
128
|
+
* Do not use these definitions.
|
129
|
+
* They are exposed to allow static allocation of `LZ4_streamHC_t`.
|
130
|
+
* Using these definitions makes the code vulnerable to potential API break when upgrading LZ4
|
131
|
+
**************************************/
|
132
|
+
#define LZ4HC_DICTIONARY_LOGSIZE 17 /* because of btopt, hc would only need 16 */
|
133
|
+
#define LZ4HC_MAXD (1<<LZ4HC_DICTIONARY_LOGSIZE)
|
134
|
+
#define LZ4HC_MAXD_MASK (LZ4HC_MAXD - 1)
|
135
|
+
|
136
|
+
#define LZ4HC_HASH_LOG 15
|
137
|
+
#define LZ4HC_HASHTABLESIZE (1 << LZ4HC_HASH_LOG)
|
138
|
+
#define LZ4HC_HASH_MASK (LZ4HC_HASHTABLESIZE - 1)
|
139
|
+
|
140
|
+
|
141
|
+
#if defined(__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
|
142
|
+
#include <stdint.h>
|
143
|
+
|
144
|
+
typedef struct
|
145
|
+
{
|
146
|
+
uint32_t hashTable[LZ4HC_HASHTABLESIZE];
|
147
|
+
uint16_t chainTable[LZ4HC_MAXD];
|
148
|
+
const uint8_t* end; /* next block here to continue on current prefix */
|
149
|
+
const uint8_t* base; /* All index relative to this position */
|
150
|
+
const uint8_t* dictBase; /* alternate base for extDict */
|
151
|
+
uint8_t* inputBuffer; /* deprecated */
|
152
|
+
uint32_t dictLimit; /* below that point, need extDict */
|
153
|
+
uint32_t lowLimit; /* below that point, no more dict */
|
154
|
+
uint32_t nextToUpdate; /* index from which to continue dictionary update */
|
155
|
+
uint32_t searchNum; /* only for optimal parser */
|
156
|
+
uint32_t compressionLevel;
|
157
|
+
} LZ4HC_CCtx_internal;
|
158
|
+
|
159
|
+
#else
|
160
|
+
|
161
|
+
typedef struct
|
162
|
+
{
|
163
|
+
unsigned int hashTable[LZ4HC_HASHTABLESIZE];
|
164
|
+
unsigned short chainTable[LZ4HC_MAXD];
|
165
|
+
const unsigned char* end; /* next block here to continue on current prefix */
|
166
|
+
const unsigned char* base; /* All index relative to this position */
|
167
|
+
const unsigned char* dictBase; /* alternate base for extDict */
|
168
|
+
unsigned char* inputBuffer; /* deprecated */
|
169
|
+
unsigned int dictLimit; /* below that point, need extDict */
|
170
|
+
unsigned int lowLimit; /* below that point, no more dict */
|
171
|
+
unsigned int nextToUpdate; /* index from which to continue dictionary update */
|
172
|
+
unsigned int searchNum; /* only for optimal parser */
|
173
|
+
int compressionLevel;
|
174
|
+
} LZ4HC_CCtx_internal;
|
175
|
+
|
176
|
+
#endif
|
177
|
+
|
178
|
+
#define LZ4_STREAMHCSIZE (4*LZ4HC_HASHTABLESIZE + 2*LZ4HC_MAXD + 56) /* 393268 */
|
179
|
+
#define LZ4_STREAMHCSIZE_SIZET (LZ4_STREAMHCSIZE / sizeof(size_t))
|
180
|
+
union LZ4_streamHC_u {
|
181
|
+
size_t table[LZ4_STREAMHCSIZE_SIZET];
|
182
|
+
LZ4HC_CCtx_internal internal_donotuse;
|
183
|
+
}; /* previously typedef'd to LZ4_streamHC_t */
|
184
|
+
/*
|
185
|
+
LZ4_streamHC_t :
|
186
|
+
This structure allows static allocation of LZ4 HC streaming state.
|
187
|
+
State must be initialized using LZ4_resetStreamHC() before first use.
|
188
|
+
|
189
|
+
Static allocation shall only be used in combination with static linking.
|
190
|
+
When invoking LZ4 from a DLL, use create/free functions instead, which are API and ABI stable.
|
191
|
+
*/
|
192
|
+
|
193
|
+
|
194
|
+
/*-************************************
|
195
|
+
* Deprecated Functions
|
196
|
+
**************************************/
|
197
|
+
/* see lz4.h LZ4_DISABLE_DEPRECATE_WARNINGS to turn off deprecation warnings */
|
198
|
+
|
199
|
+
/* deprecated compression functions */
|
200
|
+
/* these functions will trigger warning messages in future releases */
|
201
|
+
LZ4LIB_API LZ4_DEPRECATED("use LZ4_compress_HC() instead") int LZ4_compressHC (const char* source, char* dest, int inputSize);
|
202
|
+
LZ4LIB_API LZ4_DEPRECATED("use LZ4_compress_HC() instead") int LZ4_compressHC_limitedOutput (const char* source, char* dest, int inputSize, int maxOutputSize);
|
203
|
+
LZ4LIB_API LZ4_DEPRECATED("use LZ4_compress_HC() instead") int LZ4_compressHC2 (const char* source, char* dest, int inputSize, int compressionLevel);
|
204
|
+
LZ4LIB_API LZ4_DEPRECATED("use LZ4_compress_HC() instead") int LZ4_compressHC2_limitedOutput (const char* source, char* dest, int inputSize, int maxOutputSize, int compressionLevel);
|
205
|
+
LZ4LIB_API LZ4_DEPRECATED("use LZ4_compress_HC_extStateHC() instead") int LZ4_compressHC_withStateHC (void* state, const char* source, char* dest, int inputSize);
|
206
|
+
LZ4LIB_API LZ4_DEPRECATED("use LZ4_compress_HC_extStateHC() instead") int LZ4_compressHC_limitedOutput_withStateHC (void* state, const char* source, char* dest, int inputSize, int maxOutputSize);
|
207
|
+
LZ4LIB_API LZ4_DEPRECATED("use LZ4_compress_HC_extStateHC() instead") int LZ4_compressHC2_withStateHC (void* state, const char* source, char* dest, int inputSize, int compressionLevel);
|
208
|
+
LZ4LIB_API LZ4_DEPRECATED("use LZ4_compress_HC_extStateHC() instead") int LZ4_compressHC2_limitedOutput_withStateHC(void* state, const char* source, char* dest, int inputSize, int maxOutputSize, int compressionLevel);
|
209
|
+
LZ4LIB_API LZ4_DEPRECATED("use LZ4_compress_HC_continue() instead") int LZ4_compressHC_continue (LZ4_streamHC_t* LZ4_streamHCPtr, const char* source, char* dest, int inputSize);
|
210
|
+
LZ4LIB_API LZ4_DEPRECATED("use LZ4_compress_HC_continue() instead") int LZ4_compressHC_limitedOutput_continue (LZ4_streamHC_t* LZ4_streamHCPtr, const char* source, char* dest, int inputSize, int maxOutputSize);
|
211
|
+
|
212
|
+
/* Deprecated Streaming functions using older model; should no longer be used */
|
213
|
+
LZ4LIB_API LZ4_DEPRECATED("use LZ4_createStreamHC() instead") void* LZ4_createHC (char* inputBuffer);
|
214
|
+
LZ4LIB_API LZ4_DEPRECATED("use LZ4_saveDictHC() instead") char* LZ4_slideInputBufferHC (void* LZ4HC_Data);
|
215
|
+
LZ4LIB_API LZ4_DEPRECATED("use LZ4_freeStreamHC() instead") int LZ4_freeHC (void* LZ4HC_Data);
|
216
|
+
LZ4LIB_API LZ4_DEPRECATED("use LZ4_compress_HC_continue() instead") int LZ4_compressHC2_continue (void* LZ4HC_Data, const char* source, char* dest, int inputSize, int compressionLevel);
|
217
|
+
LZ4LIB_API LZ4_DEPRECATED("use LZ4_compress_HC_continue() instead") int LZ4_compressHC2_limitedOutput_continue (void* LZ4HC_Data, const char* source, char* dest, int inputSize, int maxOutputSize, int compressionLevel);
|
218
|
+
LZ4LIB_API LZ4_DEPRECATED("use LZ4_createStreamHC() instead") int LZ4_sizeofStreamStateHC(void);
|
219
|
+
LZ4LIB_API LZ4_DEPRECATED("use LZ4_resetStreamHC() instead") int LZ4_resetStreamStateHC(void* state, char* inputBuffer);
|
220
|
+
|
221
|
+
|
222
|
+
#if defined (__cplusplus)
|
223
|
+
}
|
224
|
+
#endif
|
225
|
+
|
226
|
+
#endif /* LZ4_HC_H_19834876238432 */
|
227
|
+
|
228
|
+
/*-************************************************
|
229
|
+
* !!!!! STATIC LINKING ONLY !!!!!
|
230
|
+
* Following definitions are considered experimental.
|
231
|
+
* They should not be linked from DLL,
|
232
|
+
* as there is no guarantee of API stability yet.
|
233
|
+
* Prototypes will be promoted to "stable" status
|
234
|
+
* after successfull usage in real-life scenarios.
|
235
|
+
*************************************************/
|
236
|
+
#ifdef LZ4_HC_STATIC_LINKING_ONLY /* protection macro */
|
237
|
+
#ifndef LZ4_HC_SLO_098092834
|
238
|
+
#define LZ4_HC_SLO_098092834
|
239
|
+
|
240
|
+
/*! LZ4_compress_HC_destSize() : v1.8.0 (experimental)
|
241
|
+
* Will try to compress as much data from `src` as possible
|
242
|
+
* that can fit into `targetDstSize` budget.
|
243
|
+
* Result is provided in 2 parts :
|
244
|
+
* @return : the number of bytes written into 'dst'
|
245
|
+
* or 0 if compression fails.
|
246
|
+
* `srcSizePtr` : value will be updated to indicate how much bytes were read from `src`
|
247
|
+
*/
|
248
|
+
int LZ4_compress_HC_destSize(void* LZ4HC_Data,
|
249
|
+
const char* src, char* dst,
|
250
|
+
int* srcSizePtr, int targetDstSize,
|
251
|
+
int compressionLevel);
|
252
|
+
|
253
|
+
/*! LZ4_compress_HC_continue_destSize() : v1.8.0 (experimental)
|
254
|
+
* Similar as LZ4_compress_HC_continue(),
|
255
|
+
* but will read a variable nb of bytes from `src`
|
256
|
+
* to fit into `targetDstSize` budget.
|
257
|
+
* Result is provided in 2 parts :
|
258
|
+
* @return : the number of bytes written into 'dst'
|
259
|
+
* or 0 if compression fails.
|
260
|
+
* `srcSizePtr` : value will be updated to indicate how much bytes were read from `src`.
|
261
|
+
* Important : due to limitations, this prototype only works well up to cLevel < LZ4HC_CLEVEL_OPT_MIN
|
262
|
+
* beyond that level, compression performance will be much reduced due to internal incompatibilities
|
263
|
+
*/
|
264
|
+
int LZ4_compress_HC_continue_destSize(LZ4_streamHC_t* LZ4_streamHCPtr,
|
265
|
+
const char* src, char* dst,
|
266
|
+
int* srcSizePtr, int targetDstSize);
|
267
|
+
|
268
|
+
/*! LZ4_setCompressionLevel() : v1.8.0 (experimental)
|
269
|
+
* It's possible to change compression level after LZ4_resetStreamHC(), between 2 invocations of LZ4_compress_HC_continue*(),
|
270
|
+
* but that requires to stay in the same mode (aka 1-10 or 11-12).
|
271
|
+
* This function ensures this condition.
|
272
|
+
*/
|
273
|
+
void LZ4_setCompressionLevel(LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel);
|
274
|
+
|
275
|
+
|
276
|
+
|
277
|
+
#endif /* LZ4_HC_SLO_098092834 */
|
278
|
+
#endif /* LZ4_HC_STATIC_LINKING_ONLY */
|
@@ -0,0 +1,366 @@
|
|
1
|
+
/*
|
2
|
+
lz4opt.h - Optimal Mode of LZ4
|
3
|
+
Copyright (C) 2015-2017, Przemyslaw Skibinski <inikep@gmail.com>
|
4
|
+
Note : this file is intended to be included within lz4hc.c
|
5
|
+
|
6
|
+
BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
|
7
|
+
|
8
|
+
Redistribution and use in source and binary forms, with or without
|
9
|
+
modification, are permitted provided that the following conditions are
|
10
|
+
met:
|
11
|
+
|
12
|
+
* Redistributions of source code must retain the above copyright
|
13
|
+
notice, this list of conditions and the following disclaimer.
|
14
|
+
* Redistributions in binary form must reproduce the above
|
15
|
+
copyright notice, this list of conditions and the following disclaimer
|
16
|
+
in the documentation and/or other materials provided with the
|
17
|
+
distribution.
|
18
|
+
|
19
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
20
|
+
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
21
|
+
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
22
|
+
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
23
|
+
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
24
|
+
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
25
|
+
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
26
|
+
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
27
|
+
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
28
|
+
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
29
|
+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
30
|
+
|
31
|
+
You can contact the author at :
|
32
|
+
- LZ4 source repository : https://github.com/lz4/lz4
|
33
|
+
- LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c
|
34
|
+
*/
|
35
|
+
|
36
|
+
#define LZ4_OPT_NUM (1<<12)
|
37
|
+
|
38
|
+
|
39
|
+
typedef struct {
|
40
|
+
int off;
|
41
|
+
int len;
|
42
|
+
} LZ4HC_match_t;
|
43
|
+
|
44
|
+
typedef struct {
|
45
|
+
int price;
|
46
|
+
int off;
|
47
|
+
int mlen;
|
48
|
+
int litlen;
|
49
|
+
} LZ4HC_optimal_t;
|
50
|
+
|
51
|
+
|
52
|
+
/* price in bytes */
|
53
|
+
FORCE_INLINE size_t LZ4HC_literalsPrice(size_t litlen)
|
54
|
+
{
|
55
|
+
size_t price = litlen;
|
56
|
+
if (litlen >= (size_t)RUN_MASK)
|
57
|
+
price += 1 + (litlen-RUN_MASK)/255;
|
58
|
+
return price;
|
59
|
+
}
|
60
|
+
|
61
|
+
|
62
|
+
/* requires mlen >= MINMATCH */
|
63
|
+
FORCE_INLINE size_t LZ4HC_sequencePrice(size_t litlen, size_t mlen)
|
64
|
+
{
|
65
|
+
size_t price = 2 + 1; /* 16-bit offset + token */
|
66
|
+
|
67
|
+
price += LZ4HC_literalsPrice(litlen);
|
68
|
+
|
69
|
+
if (mlen >= (size_t)(ML_MASK+MINMATCH))
|
70
|
+
price+= 1 + (mlen-(ML_MASK+MINMATCH))/255;
|
71
|
+
|
72
|
+
return price;
|
73
|
+
}
|
74
|
+
|
75
|
+
|
76
|
+
/*-*************************************
|
77
|
+
* Binary Tree search
|
78
|
+
***************************************/
|
79
|
+
FORCE_INLINE int LZ4HC_BinTree_InsertAndGetAllMatches (
|
80
|
+
LZ4HC_CCtx_internal* ctx,
|
81
|
+
const BYTE* const ip,
|
82
|
+
const BYTE* const iHighLimit,
|
83
|
+
size_t best_mlen,
|
84
|
+
LZ4HC_match_t* matches,
|
85
|
+
int* matchNum)
|
86
|
+
{
|
87
|
+
U16* const chainTable = ctx->chainTable;
|
88
|
+
U32* const HashTable = ctx->hashTable;
|
89
|
+
const BYTE* const base = ctx->base;
|
90
|
+
const U32 dictLimit = ctx->dictLimit;
|
91
|
+
const U32 current = (U32)(ip - base);
|
92
|
+
const U32 lowLimit = (ctx->lowLimit + MAX_DISTANCE > current) ? ctx->lowLimit : current - (MAX_DISTANCE - 1);
|
93
|
+
const BYTE* const dictBase = ctx->dictBase;
|
94
|
+
const BYTE* match;
|
95
|
+
int nbAttempts = ctx->searchNum;
|
96
|
+
int mnum = 0;
|
97
|
+
U16 *ptr0, *ptr1, delta0, delta1;
|
98
|
+
U32 matchIndex;
|
99
|
+
size_t matchLength = 0;
|
100
|
+
U32* HashPos;
|
101
|
+
|
102
|
+
if (ip + MINMATCH > iHighLimit) return 1;
|
103
|
+
|
104
|
+
/* HC4 match finder */
|
105
|
+
HashPos = &HashTable[LZ4HC_hashPtr(ip)];
|
106
|
+
matchIndex = *HashPos;
|
107
|
+
*HashPos = current;
|
108
|
+
|
109
|
+
ptr0 = &DELTANEXTMAXD(current*2+1);
|
110
|
+
ptr1 = &DELTANEXTMAXD(current*2);
|
111
|
+
delta0 = delta1 = (U16)(current - matchIndex);
|
112
|
+
|
113
|
+
while ((matchIndex < current) && (matchIndex>=lowLimit) && (nbAttempts)) {
|
114
|
+
nbAttempts--;
|
115
|
+
if (matchIndex >= dictLimit) {
|
116
|
+
match = base + matchIndex;
|
117
|
+
matchLength = LZ4_count(ip, match, iHighLimit);
|
118
|
+
} else {
|
119
|
+
const BYTE* vLimit = ip + (dictLimit - matchIndex);
|
120
|
+
match = dictBase + matchIndex;
|
121
|
+
if (vLimit > iHighLimit) vLimit = iHighLimit;
|
122
|
+
matchLength = LZ4_count(ip, match, vLimit);
|
123
|
+
if ((ip+matchLength == vLimit) && (vLimit < iHighLimit))
|
124
|
+
matchLength += LZ4_count(ip+matchLength, base+dictLimit, iHighLimit);
|
125
|
+
if (matchIndex+matchLength >= dictLimit)
|
126
|
+
match = base + matchIndex; /* to prepare for next usage of match[matchLength] */
|
127
|
+
}
|
128
|
+
|
129
|
+
if (matchLength > best_mlen) {
|
130
|
+
best_mlen = matchLength;
|
131
|
+
if (matches) {
|
132
|
+
if (matchIndex >= dictLimit)
|
133
|
+
matches[mnum].off = (int)(ip - match);
|
134
|
+
else
|
135
|
+
matches[mnum].off = (int)(ip - (base + matchIndex)); /* virtual matchpos */
|
136
|
+
matches[mnum].len = (int)matchLength;
|
137
|
+
mnum++;
|
138
|
+
}
|
139
|
+
if (best_mlen > LZ4_OPT_NUM) break;
|
140
|
+
}
|
141
|
+
|
142
|
+
if (ip+matchLength >= iHighLimit) /* equal : no way to know if inf or sup */
|
143
|
+
break; /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt the tree */
|
144
|
+
|
145
|
+
DEBUGLOG(6, "ip :%016llX", (U64)ip);
|
146
|
+
DEBUGLOG(6, "match:%016llX", (U64)match);
|
147
|
+
if (*(ip+matchLength) < *(match+matchLength)) {
|
148
|
+
*ptr0 = delta0;
|
149
|
+
ptr0 = &DELTANEXTMAXD(matchIndex*2);
|
150
|
+
if (*ptr0 == (U16)-1) break;
|
151
|
+
delta0 = *ptr0;
|
152
|
+
delta1 += delta0;
|
153
|
+
matchIndex -= delta0;
|
154
|
+
} else {
|
155
|
+
*ptr1 = delta1;
|
156
|
+
ptr1 = &DELTANEXTMAXD(matchIndex*2+1);
|
157
|
+
if (*ptr1 == (U16)-1) break;
|
158
|
+
delta1 = *ptr1;
|
159
|
+
delta0 += delta1;
|
160
|
+
matchIndex -= delta1;
|
161
|
+
}
|
162
|
+
}
|
163
|
+
|
164
|
+
*ptr0 = (U16)-1;
|
165
|
+
*ptr1 = (U16)-1;
|
166
|
+
if (matchNum) *matchNum = mnum;
|
167
|
+
/* if (best_mlen > 8) return best_mlen-8; */
|
168
|
+
if (!matchNum) return 1;
|
169
|
+
return 1;
|
170
|
+
}
|
171
|
+
|
172
|
+
|
173
|
+
FORCE_INLINE void LZ4HC_updateBinTree(LZ4HC_CCtx_internal* ctx, const BYTE* const ip, const BYTE* const iHighLimit)
|
174
|
+
{
|
175
|
+
const BYTE* const base = ctx->base;
|
176
|
+
const U32 target = (U32)(ip - base);
|
177
|
+
U32 idx = ctx->nextToUpdate;
|
178
|
+
while(idx < target)
|
179
|
+
idx += LZ4HC_BinTree_InsertAndGetAllMatches(ctx, base+idx, iHighLimit, 8, NULL, NULL);
|
180
|
+
}
|
181
|
+
|
182
|
+
|
183
|
+
/** Tree updater, providing best match */
|
184
|
+
FORCE_INLINE int LZ4HC_BinTree_GetAllMatches (
|
185
|
+
LZ4HC_CCtx_internal* ctx,
|
186
|
+
const BYTE* const ip, const BYTE* const iHighLimit,
|
187
|
+
size_t best_mlen, LZ4HC_match_t* matches, const int fullUpdate)
|
188
|
+
{
|
189
|
+
int mnum = 0;
|
190
|
+
if (ip < ctx->base + ctx->nextToUpdate) return 0; /* skipped area */
|
191
|
+
if (fullUpdate) LZ4HC_updateBinTree(ctx, ip, iHighLimit);
|
192
|
+
best_mlen = LZ4HC_BinTree_InsertAndGetAllMatches(ctx, ip, iHighLimit, best_mlen, matches, &mnum);
|
193
|
+
ctx->nextToUpdate = (U32)(ip - ctx->base + best_mlen);
|
194
|
+
return mnum;
|
195
|
+
}
|
196
|
+
|
197
|
+
|
198
|
+
#define SET_PRICE(pos, ml, offset, ll, cost) \
|
199
|
+
{ \
|
200
|
+
while (last_pos < pos) { opt[last_pos+1].price = 1<<30; last_pos++; } \
|
201
|
+
opt[pos].mlen = (int)ml; \
|
202
|
+
opt[pos].off = (int)offset; \
|
203
|
+
opt[pos].litlen = (int)ll; \
|
204
|
+
opt[pos].price = (int)cost; \
|
205
|
+
}
|
206
|
+
|
207
|
+
|
208
|
+
static int LZ4HC_compress_optimal (
|
209
|
+
LZ4HC_CCtx_internal* ctx,
|
210
|
+
const char* const source,
|
211
|
+
char* dest,
|
212
|
+
int inputSize,
|
213
|
+
int maxOutputSize,
|
214
|
+
limitedOutput_directive limit,
|
215
|
+
size_t sufficient_len,
|
216
|
+
const int fullUpdate
|
217
|
+
)
|
218
|
+
{
|
219
|
+
LZ4HC_optimal_t opt[LZ4_OPT_NUM + 1]; /* this uses a bit too much stack memory to my taste ... */
|
220
|
+
LZ4HC_match_t matches[LZ4_OPT_NUM + 1];
|
221
|
+
|
222
|
+
const BYTE* ip = (const BYTE*) source;
|
223
|
+
const BYTE* anchor = ip;
|
224
|
+
const BYTE* const iend = ip + inputSize;
|
225
|
+
const BYTE* const mflimit = iend - MFLIMIT;
|
226
|
+
const BYTE* const matchlimit = (iend - LASTLITERALS);
|
227
|
+
BYTE* op = (BYTE*) dest;
|
228
|
+
BYTE* const oend = op + maxOutputSize;
|
229
|
+
|
230
|
+
/* init */
|
231
|
+
DEBUGLOG(5, "LZ4HC_compress_optimal");
|
232
|
+
if (sufficient_len >= LZ4_OPT_NUM) sufficient_len = LZ4_OPT_NUM-1;
|
233
|
+
ctx->end += inputSize;
|
234
|
+
ip++;
|
235
|
+
|
236
|
+
/* Main Loop */
|
237
|
+
while (ip < mflimit) {
|
238
|
+
size_t const llen = ip - anchor;
|
239
|
+
size_t last_pos = 0;
|
240
|
+
size_t match_num, cur, best_mlen, best_off;
|
241
|
+
memset(opt, 0, sizeof(LZ4HC_optimal_t)); /* memset only the first one */
|
242
|
+
|
243
|
+
match_num = LZ4HC_BinTree_GetAllMatches(ctx, ip, matchlimit, MINMATCH-1, matches, fullUpdate);
|
244
|
+
if (!match_num) { ip++; continue; }
|
245
|
+
|
246
|
+
if ((size_t)matches[match_num-1].len > sufficient_len) {
|
247
|
+
/* good enough solution : immediate encoding */
|
248
|
+
best_mlen = matches[match_num-1].len;
|
249
|
+
best_off = matches[match_num-1].off;
|
250
|
+
cur = 0;
|
251
|
+
last_pos = 1;
|
252
|
+
goto encode;
|
253
|
+
}
|
254
|
+
|
255
|
+
/* set prices using matches at position = 0 */
|
256
|
+
{ size_t matchNb;
|
257
|
+
for (matchNb = 0; matchNb < match_num; matchNb++) {
|
258
|
+
size_t mlen = (matchNb>0) ? (size_t)matches[matchNb-1].len+1 : MINMATCH;
|
259
|
+
best_mlen = matches[matchNb].len; /* necessarily < sufficient_len < LZ4_OPT_NUM */
|
260
|
+
for ( ; mlen <= best_mlen ; mlen++) {
|
261
|
+
size_t const cost = LZ4HC_sequencePrice(llen, mlen) - LZ4HC_literalsPrice(llen);
|
262
|
+
SET_PRICE(mlen, mlen, matches[matchNb].off, 0, cost); /* updates last_pos and opt[pos] */
|
263
|
+
} } }
|
264
|
+
|
265
|
+
if (last_pos < MINMATCH) { ip++; continue; } /* note : on clang at least, this test improves performance */
|
266
|
+
|
267
|
+
/* check further positions */
|
268
|
+
opt[0].mlen = opt[1].mlen = 1;
|
269
|
+
for (cur = 1; cur <= last_pos; cur++) {
|
270
|
+
const BYTE* const curPtr = ip + cur;
|
271
|
+
|
272
|
+
/* establish baseline price if cur is literal */
|
273
|
+
{ size_t price, litlen;
|
274
|
+
if (opt[cur-1].mlen == 1) {
|
275
|
+
/* no match at previous position */
|
276
|
+
litlen = opt[cur-1].litlen + 1;
|
277
|
+
if (cur > litlen) {
|
278
|
+
price = opt[cur - litlen].price + LZ4HC_literalsPrice(litlen);
|
279
|
+
} else {
|
280
|
+
price = LZ4HC_literalsPrice(llen + litlen) - LZ4HC_literalsPrice(llen);
|
281
|
+
}
|
282
|
+
} else {
|
283
|
+
litlen = 1;
|
284
|
+
price = opt[cur - 1].price + LZ4HC_literalsPrice(1);
|
285
|
+
}
|
286
|
+
|
287
|
+
if (price < (size_t)opt[cur].price)
|
288
|
+
SET_PRICE(cur, 1 /*mlen*/, 0 /*off*/, litlen, price); /* note : increases last_pos */
|
289
|
+
}
|
290
|
+
|
291
|
+
if (cur == last_pos || curPtr >= mflimit) break;
|
292
|
+
|
293
|
+
match_num = LZ4HC_BinTree_GetAllMatches(ctx, curPtr, matchlimit, MINMATCH-1, matches, fullUpdate);
|
294
|
+
if ((match_num > 0) && (size_t)matches[match_num-1].len > sufficient_len) {
|
295
|
+
/* immediate encoding */
|
296
|
+
best_mlen = matches[match_num-1].len;
|
297
|
+
best_off = matches[match_num-1].off;
|
298
|
+
last_pos = cur + 1;
|
299
|
+
goto encode;
|
300
|
+
}
|
301
|
+
|
302
|
+
/* set prices using matches at position = cur */
|
303
|
+
{ size_t matchNb;
|
304
|
+
for (matchNb = 0; matchNb < match_num; matchNb++) {
|
305
|
+
size_t ml = (matchNb>0) ? (size_t)matches[matchNb-1].len+1 : MINMATCH;
|
306
|
+
best_mlen = (cur + matches[matchNb].len < LZ4_OPT_NUM) ?
|
307
|
+
(size_t)matches[matchNb].len : LZ4_OPT_NUM - cur;
|
308
|
+
|
309
|
+
for ( ; ml <= best_mlen ; ml++) {
|
310
|
+
size_t ll, price;
|
311
|
+
if (opt[cur].mlen == 1) {
|
312
|
+
ll = opt[cur].litlen;
|
313
|
+
if (cur > ll)
|
314
|
+
price = opt[cur - ll].price + LZ4HC_sequencePrice(ll, ml);
|
315
|
+
else
|
316
|
+
price = LZ4HC_sequencePrice(llen + ll, ml) - LZ4HC_literalsPrice(llen);
|
317
|
+
} else {
|
318
|
+
ll = 0;
|
319
|
+
price = opt[cur].price + LZ4HC_sequencePrice(0, ml);
|
320
|
+
}
|
321
|
+
|
322
|
+
if (cur + ml > last_pos || price < (size_t)opt[cur + ml].price) {
|
323
|
+
SET_PRICE(cur + ml, ml, matches[matchNb].off, ll, price);
|
324
|
+
} } } }
|
325
|
+
} /* for (cur = 1; cur <= last_pos; cur++) */
|
326
|
+
|
327
|
+
best_mlen = opt[last_pos].mlen;
|
328
|
+
best_off = opt[last_pos].off;
|
329
|
+
cur = last_pos - best_mlen;
|
330
|
+
|
331
|
+
encode: /* cur, last_pos, best_mlen, best_off must be set */
|
332
|
+
opt[0].mlen = 1;
|
333
|
+
while (1) { /* from end to beginning */
|
334
|
+
size_t const ml = opt[cur].mlen;
|
335
|
+
int const offset = opt[cur].off;
|
336
|
+
opt[cur].mlen = (int)best_mlen;
|
337
|
+
opt[cur].off = (int)best_off;
|
338
|
+
best_mlen = ml;
|
339
|
+
best_off = offset;
|
340
|
+
if (ml > cur) break; /* can this happen ? */
|
341
|
+
cur -= ml;
|
342
|
+
}
|
343
|
+
|
344
|
+
/* encode all recorded sequences */
|
345
|
+
cur = 0;
|
346
|
+
while (cur < last_pos) {
|
347
|
+
int const ml = opt[cur].mlen;
|
348
|
+
int const offset = opt[cur].off;
|
349
|
+
if (ml == 1) { ip++; cur++; continue; }
|
350
|
+
cur += ml;
|
351
|
+
if ( LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ip - offset, limit, oend) ) return 0;
|
352
|
+
}
|
353
|
+
} /* while (ip < mflimit) */
|
354
|
+
|
355
|
+
/* Encode Last Literals */
|
356
|
+
{ int lastRun = (int)(iend - anchor);
|
357
|
+
if ((limit) && (((char*)op - dest) + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > (U32)maxOutputSize)) return 0; /* Check output limit */
|
358
|
+
if (lastRun>=(int)RUN_MASK) { *op++=(RUN_MASK<<ML_BITS); lastRun-=RUN_MASK; for(; lastRun > 254 ; lastRun-=255) *op++ = 255; *op++ = (BYTE) lastRun; }
|
359
|
+
else *op++ = (BYTE)(lastRun<<ML_BITS);
|
360
|
+
memcpy(op, anchor, iend - anchor);
|
361
|
+
op += iend-anchor;
|
362
|
+
}
|
363
|
+
|
364
|
+
/* End */
|
365
|
+
return (int) ((char*)op-dest);
|
366
|
+
}
|