extlz4 0.2.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,293 @@
1
+ /*
2
+ xxHash - Extremely Fast Hash algorithm
3
+ Header File
4
+ Copyright (C) 2012-2016, Yann Collet.
5
+
6
+ BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
7
+
8
+ Redistribution and use in source and binary forms, with or without
9
+ modification, are permitted provided that the following conditions are
10
+ met:
11
+
12
+ * Redistributions of source code must retain the above copyright
13
+ notice, this list of conditions and the following disclaimer.
14
+ * Redistributions in binary form must reproduce the above
15
+ copyright notice, this list of conditions and the following disclaimer
16
+ in the documentation and/or other materials provided with the
17
+ distribution.
18
+
19
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
+
31
+ You can contact the author at :
32
+ - xxHash source repository : https://github.com/Cyan4973/xxHash
33
+ */
34
+
35
+ /* Notice extracted from xxHash homepage :
36
+
37
+ xxHash is an extremely fast Hash algorithm, running at RAM speed limits.
38
+ It also successfully passes all tests from the SMHasher suite.
39
+
40
+ Comparison (single thread, Windows Seven 32 bits, using SMHasher on a Core 2 Duo @3GHz)
41
+
42
+ Name Speed Q.Score Author
43
+ xxHash 5.4 GB/s 10
44
+ CrapWow 3.2 GB/s 2 Andrew
45
+ MumurHash 3a 2.7 GB/s 10 Austin Appleby
46
+ SpookyHash 2.0 GB/s 10 Bob Jenkins
47
+ SBox 1.4 GB/s 9 Bret Mulvey
48
+ Lookup3 1.2 GB/s 9 Bob Jenkins
49
+ SuperFastHash 1.2 GB/s 1 Paul Hsieh
50
+ CityHash64 1.05 GB/s 10 Pike & Alakuijala
51
+ FNV 0.55 GB/s 5 Fowler, Noll, Vo
52
+ CRC32 0.43 GB/s 9
53
+ MD5-32 0.33 GB/s 10 Ronald L. Rivest
54
+ SHA1-32 0.28 GB/s 10
55
+
56
+ Q.Score is a measure of quality of the hash function.
57
+ It depends on successfully passing SMHasher test set.
58
+ 10 is a perfect score.
59
+
60
+ A 64-bits version, named XXH64, is available since r35.
61
+ It offers much better speed, but for 64-bits applications only.
62
+ Name Speed on 64 bits Speed on 32 bits
63
+ XXH64 13.8 GB/s 1.9 GB/s
64
+ XXH32 6.8 GB/s 6.0 GB/s
65
+ */
66
+
67
+ #ifndef XXHASH_H_5627135585666179
68
+ #define XXHASH_H_5627135585666179 1
69
+
70
+ #if defined (__cplusplus)
71
+ extern "C" {
72
+ #endif
73
+
74
+
75
+ /* ****************************
76
+ * Definitions
77
+ ******************************/
78
+ #include <stddef.h> /* size_t */
79
+ typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode;
80
+
81
+
82
+ /* ****************************
83
+ * API modifier
84
+ ******************************/
85
+ /** XXH_PRIVATE_API
86
+ * This is useful to include xxhash functions in `static` mode
87
+ * in order to inline them, and remove their symbol from the public list.
88
+ * Methodology :
89
+ * #define XXH_PRIVATE_API
90
+ * #include "xxhash.h"
91
+ * `xxhash.c` is automatically included.
92
+ * It's not useful to compile and link it as a separate module.
93
+ */
94
+ #ifdef XXH_PRIVATE_API
95
+ # ifndef XXH_STATIC_LINKING_ONLY
96
+ # define XXH_STATIC_LINKING_ONLY
97
+ # endif
98
+ # if defined(__GNUC__)
99
+ # define XXH_PUBLIC_API static __inline __attribute__((unused))
100
+ # elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
101
+ # define XXH_PUBLIC_API static inline
102
+ # elif defined(_MSC_VER)
103
+ # define XXH_PUBLIC_API static __inline
104
+ # else
105
+ # define XXH_PUBLIC_API static /* this version may generate warnings for unused static functions; disable the relevant warning */
106
+ # endif
107
+ #else
108
+ # define XXH_PUBLIC_API /* do nothing */
109
+ #endif /* XXH_PRIVATE_API */
110
+
111
+ /*!XXH_NAMESPACE, aka Namespace Emulation :
112
+
113
+ If you want to include _and expose_ xxHash functions from within your own library,
114
+ but also want to avoid symbol collisions with other libraries which may also include xxHash,
115
+
116
+ you can use XXH_NAMESPACE, to automatically prefix any public symbol from xxhash library
117
+ with the value of XXH_NAMESPACE (therefore, avoid NULL and numeric values).
118
+
119
+ Note that no change is required within the calling program as long as it includes `xxhash.h` :
120
+ regular symbol name will be automatically translated by this header.
121
+ */
122
+ #ifdef XXH_NAMESPACE
123
+ # define XXH_CAT(A,B) A##B
124
+ # define XXH_NAME2(A,B) XXH_CAT(A,B)
125
+ # define XXH_versionNumber XXH_NAME2(XXH_NAMESPACE, XXH_versionNumber)
126
+ # define XXH32 XXH_NAME2(XXH_NAMESPACE, XXH32)
127
+ # define XXH32_createState XXH_NAME2(XXH_NAMESPACE, XXH32_createState)
128
+ # define XXH32_freeState XXH_NAME2(XXH_NAMESPACE, XXH32_freeState)
129
+ # define XXH32_reset XXH_NAME2(XXH_NAMESPACE, XXH32_reset)
130
+ # define XXH32_update XXH_NAME2(XXH_NAMESPACE, XXH32_update)
131
+ # define XXH32_digest XXH_NAME2(XXH_NAMESPACE, XXH32_digest)
132
+ # define XXH32_copyState XXH_NAME2(XXH_NAMESPACE, XXH32_copyState)
133
+ # define XXH32_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH32_canonicalFromHash)
134
+ # define XXH32_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH32_hashFromCanonical)
135
+ # define XXH64 XXH_NAME2(XXH_NAMESPACE, XXH64)
136
+ # define XXH64_createState XXH_NAME2(XXH_NAMESPACE, XXH64_createState)
137
+ # define XXH64_freeState XXH_NAME2(XXH_NAMESPACE, XXH64_freeState)
138
+ # define XXH64_reset XXH_NAME2(XXH_NAMESPACE, XXH64_reset)
139
+ # define XXH64_update XXH_NAME2(XXH_NAMESPACE, XXH64_update)
140
+ # define XXH64_digest XXH_NAME2(XXH_NAMESPACE, XXH64_digest)
141
+ # define XXH64_copyState XXH_NAME2(XXH_NAMESPACE, XXH64_copyState)
142
+ # define XXH64_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH64_canonicalFromHash)
143
+ # define XXH64_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH64_hashFromCanonical)
144
+ #endif
145
+
146
+
147
+ /* *************************************
148
+ * Version
149
+ ***************************************/
150
+ #define XXH_VERSION_MAJOR 0
151
+ #define XXH_VERSION_MINOR 6
152
+ #define XXH_VERSION_RELEASE 2
153
+ #define XXH_VERSION_NUMBER (XXH_VERSION_MAJOR *100*100 + XXH_VERSION_MINOR *100 + XXH_VERSION_RELEASE)
154
+ XXH_PUBLIC_API unsigned XXH_versionNumber (void);
155
+
156
+
157
+ /*-**********************************************************************
158
+ * 32-bits hash
159
+ ************************************************************************/
160
+ typedef unsigned int XXH32_hash_t;
161
+
162
+ /*! XXH32() :
163
+ Calculate the 32-bits hash of sequence "length" bytes stored at memory address "input".
164
+ The memory between input & input+length must be valid (allocated and read-accessible).
165
+ "seed" can be used to alter the result predictably.
166
+ Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s */
167
+ XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t length, unsigned int seed);
168
+
169
+ /*====== Streaming ======*/
170
+ typedef struct XXH32_state_s XXH32_state_t; /* incomplete type */
171
+ XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void);
172
+ XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr);
173
+ XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dst_state, const XXH32_state_t* src_state);
174
+
175
+ XXH_PUBLIC_API XXH_errorcode XXH32_reset (XXH32_state_t* statePtr, unsigned int seed);
176
+ XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* input, size_t length);
177
+ XXH_PUBLIC_API XXH32_hash_t XXH32_digest (const XXH32_state_t* statePtr);
178
+
179
+ /*
180
+ These functions generate the xxHash of an input provided in multiple segments.
181
+ Note that, for small input, they are slower than single-call functions, due to state management.
182
+ For small input, prefer `XXH32()` and `XXH64()` .
183
+
184
+ XXH state must first be allocated, using XXH*_createState() .
185
+
186
+ Start a new hash by initializing state with a seed, using XXH*_reset().
187
+
188
+ Then, feed the hash state by calling XXH*_update() as many times as necessary.
189
+ Obviously, input must be allocated and read accessible.
190
+ The function returns an error code, with 0 meaning OK, and any other value meaning there is an error.
191
+
192
+ Finally, a hash value can be produced anytime, by using XXH*_digest().
193
+ This function returns the nn-bits hash as an int or long long.
194
+
195
+ It's still possible to continue inserting input into the hash state after a digest,
196
+ and generate some new hashes later on, by calling again XXH*_digest().
197
+
198
+ When done, free XXH state space if it was allocated dynamically.
199
+ */
200
+
201
+ /*====== Canonical representation ======*/
202
+
203
+ typedef struct { unsigned char digest[4]; } XXH32_canonical_t;
204
+ XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash);
205
+ XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src);
206
+
207
+ /* Default result type for XXH functions are primitive unsigned 32 and 64 bits.
208
+ * The canonical representation uses human-readable write convention, aka big-endian (large digits first).
209
+ * These functions allow transformation of hash result into and from its canonical format.
210
+ * This way, hash values can be written into a file / memory, and remain comparable on different systems and programs.
211
+ */
212
+
213
+
214
+ #ifndef XXH_NO_LONG_LONG
215
+ /*-**********************************************************************
216
+ * 64-bits hash
217
+ ************************************************************************/
218
+ typedef unsigned long long XXH64_hash_t;
219
+
220
+ /*! XXH64() :
221
+ Calculate the 64-bits hash of sequence of length "len" stored at memory address "input".
222
+ "seed" can be used to alter the result predictably.
223
+ This function runs faster on 64-bits systems, but slower on 32-bits systems (see benchmark).
224
+ */
225
+ XXH_PUBLIC_API XXH64_hash_t XXH64 (const void* input, size_t length, unsigned long long seed);
226
+
227
+ /*====== Streaming ======*/
228
+ typedef struct XXH64_state_s XXH64_state_t; /* incomplete type */
229
+ XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void);
230
+ XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr);
231
+ XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* dst_state, const XXH64_state_t* src_state);
232
+
233
+ XXH_PUBLIC_API XXH_errorcode XXH64_reset (XXH64_state_t* statePtr, unsigned long long seed);
234
+ XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* statePtr, const void* input, size_t length);
235
+ XXH_PUBLIC_API XXH64_hash_t XXH64_digest (const XXH64_state_t* statePtr);
236
+
237
+ /*====== Canonical representation ======*/
238
+ typedef struct { unsigned char digest[8]; } XXH64_canonical_t;
239
+ XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash);
240
+ XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src);
241
+ #endif /* XXH_NO_LONG_LONG */
242
+
243
+
244
+ #ifdef XXH_STATIC_LINKING_ONLY
245
+
246
+ /* ================================================================================================
247
+ This section contains definitions which are not guaranteed to remain stable.
248
+ They may change in future versions, becoming incompatible with a different version of the library.
249
+ They shall only be used with static linking.
250
+ Never use these definitions in association with dynamic linking !
251
+ =================================================================================================== */
252
+
253
+ /* These definitions are only meant to allow allocation of XXH state
254
+ statically, on stack, or in a struct for example.
255
+ Do not use members directly. */
256
+
257
+ struct XXH32_state_s {
258
+ unsigned total_len_32;
259
+ unsigned large_len;
260
+ unsigned v1;
261
+ unsigned v2;
262
+ unsigned v3;
263
+ unsigned v4;
264
+ unsigned mem32[4]; /* buffer defined as U32 for alignment */
265
+ unsigned memsize;
266
+ unsigned reserved; /* never read nor write, will be removed in a future version */
267
+ }; /* typedef'd to XXH32_state_t */
268
+
269
+ #ifndef XXH_NO_LONG_LONG
270
+ struct XXH64_state_s {
271
+ unsigned long long total_len;
272
+ unsigned long long v1;
273
+ unsigned long long v2;
274
+ unsigned long long v3;
275
+ unsigned long long v4;
276
+ unsigned long long mem64[4]; /* buffer defined as U64 for alignment */
277
+ unsigned memsize;
278
+ unsigned reserved[2]; /* never read nor write, will be removed in a future version */
279
+ }; /* typedef'd to XXH64_state_t */
280
+ #endif
281
+
282
+ # ifdef XXH_PRIVATE_API
283
+ # include "xxhash.c" /* include xxhash function bodies as `static`, for inlining */
284
+ # endif
285
+
286
+ #endif /* XXH_STATIC_LINKING_ONLY */
287
+
288
+
289
+ #if defined (__cplusplus)
290
+ }
291
+ #endif
292
+
293
+ #endif /* XXHASH_H_5627135585666179 */
@@ -0,0 +1,43 @@
1
+ #!ruby
2
+
3
+ #
4
+ # This code is under public domain (CC0)
5
+ # <http://creativecommons.org/publicdomain/zero/1.0/>.
6
+ #
7
+ # To the extent possible under law, dearblue has waived all copyright
8
+ # and related or neighboring rights to this work.
9
+ #
10
+ # dearblue <dearblue@users.sourceforce.jp>
11
+ #
12
+
13
+ # need for calcration crc32 in this example
14
+ class String
15
+ require "zlib"
16
+
17
+ def crc32
18
+ Zlib.crc32(self)
19
+ end
20
+ end
21
+
22
+ ########
23
+
24
+ # first, load library
25
+ require "extlz4"
26
+
27
+ # prepair source data
28
+ src = File.read(ARGV[0] || __FILE__, mode: "rb")
29
+ puts "%s:%d: src.bytesize=%d, src.crc32=0x%08X\n" %
30
+ [__FILE__, __LINE__, src.bytesize, src.crc32]
31
+
32
+ # compress data by LZ4 Frame
33
+ encdata = LZ4.encode(src)
34
+ # OR, encdata = LZ4.encode(src, level = 1)
35
+ puts "%s:%d: encdata.bytesize=%d, encdata.crc32=0x%08X\n" %
36
+ [__FILE__, __LINE__, encdata.bytesize, encdata.crc32]
37
+
38
+ decdata = LZ4.decode(encdata)
39
+ puts "%s:%d: decdata.bytesize=%d, decdata.crc32=0x%08X\n" %
40
+ [__FILE__, __LINE__, decdata.bytesize, decdata.crc32]
41
+
42
+ puts "%s:%d: comparison source data and decompressed data: %s\n" %
43
+ [__FILE__, __LINE__, src == decdata ? "SAME" : "NOT SAME (BUG!)"]
@@ -0,0 +1,1046 @@
1
+ #include "extlz4.h"
2
+ #include <lz4.h>
3
+ #include <lz4hc.h>
4
+
5
+ #define RDOCFAKE(code)
6
+
7
+ RDOCFAKE(extlz4_mLZ4 = rb_define_module("LZ4"));
8
+
9
+ #if __GNUC__ || __clang__ || EXTLZ4_FORCE_EXPECT
10
+ #define AUX_LIKELY(x) __builtin_expect(!!(x), 1)
11
+ #define AUX_UNLIKELY(x) __builtin_expect(!!(x), 0)
12
+ #else
13
+ #define AUX_LIKELY(x) (x)
14
+ #define AUX_UNLIKELY(x) (x)
15
+ #endif
16
+
17
+
18
+ static void *
19
+ aux_LZ4_compress_fast_continue_nogvl(va_list *vp)
20
+ {
21
+ LZ4_stream_t *context = va_arg(*vp, LZ4_stream_t *);
22
+ const char *src = va_arg(*vp, const char *);
23
+ char *dest = va_arg(*vp, char *);
24
+ int srcsize = va_arg(*vp, int);
25
+ int destsize = va_arg(*vp, int);
26
+ int acceleration = va_arg(*vp, int);
27
+
28
+ // NOTE: キャストについては aux_LZ4_decompress_safe_continue_nogvl() を参照されたし
29
+ return (void *)(intptr_t)LZ4_compress_fast_continue(context, src, dest, srcsize, destsize, acceleration);
30
+ }
31
+
32
+ static int
33
+ aux_LZ4_compress_fast_continue(void *context, const char *src, char *dest, int srcsize, int destsize, int acceleration)
34
+ {
35
+ return (int)aux_thread_call_without_gvl(
36
+ aux_LZ4_compress_fast_continue_nogvl, NULL,
37
+ context, src, dest, srcsize, destsize, acceleration);
38
+ }
39
+
40
+ static void *
41
+ aux_LZ4_compressHC_continue_nogvl(va_list *vp)
42
+ {
43
+ LZ4_streamHC_t *context = va_arg(*vp, LZ4_streamHC_t *);
44
+ const char *src = va_arg(*vp, const char *);
45
+ char *dest = va_arg(*vp, char *);
46
+ int srcsize = va_arg(*vp, int);
47
+ int destsize = va_arg(*vp, int);
48
+
49
+ // NOTE: キャストについては aux_LZ4_decompress_safe_continue_nogvl() を参照されたし
50
+ return (void *)(intptr_t)LZ4_compress_HC_continue(context, src, dest, srcsize, destsize);
51
+ }
52
+
53
+ static int
54
+ aux_LZ4_compressHC_continue(void *context, const char *src, char *dest, int srcsize, int destsize, int acceleration__ignored__)
55
+ {
56
+ (void)acceleration__ignored__;
57
+ return (int)aux_thread_call_without_gvl(
58
+ aux_LZ4_compressHC_continue_nogvl, NULL,
59
+ context, src, dest, srcsize, destsize);
60
+ }
61
+
62
+ static void *
63
+ aux_LZ4_decompress_safe_continue_nogvl(va_list *vp)
64
+ {
65
+ LZ4_streamDecode_t *context = va_arg(*vp, LZ4_streamDecode_t *);
66
+ const char *src = va_arg(*vp, const char *);
67
+ char *dest = va_arg(*vp, char *);
68
+ int srcsize = va_arg(*vp, int);
69
+ int maxsize = va_arg(*vp, int);
70
+
71
+ // NOTE: キャストを (int) -> (intptr_t) -> (void *) としている理由は、
72
+ // NOTE: (int) -> (void *) とするとコンパイラが警告してくるのを避けるため
73
+ return (void *)(intptr_t)LZ4_decompress_safe_continue(context, src, dest, srcsize, maxsize);
74
+ }
75
+
76
+ static int
77
+ aux_LZ4_decompress_safe_continue(LZ4_streamDecode_t *context, const char *src, char *dest, int srcsize, int maxsize)
78
+ {
79
+ return (int)aux_thread_call_without_gvl(
80
+ aux_LZ4_decompress_safe_continue_nogvl, NULL,
81
+ context, src, dest, srcsize, maxsize);
82
+ }
83
+
84
+ static inline size_t
85
+ aux_lz4_expandsize(const char **p, const char *end, size_t size)
86
+ {
87
+ while (AUX_LIKELY(*p < end)) {
88
+ int s = (uint8_t)*(*p) ++;
89
+ size += s;
90
+ if (AUX_LIKELY(s != 255)) {
91
+ return size;
92
+ }
93
+ }
94
+
95
+ rb_raise(extlz4_eError, "encounted invalid end of sequence");
96
+ }
97
+
98
+ static inline size_t
99
+ aux_lz4_scanseq(const char *p, const char *end, size_t *linksize)
100
+ {
101
+ size_t size = 0;
102
+ while (AUX_LIKELY(p < end)) {
103
+ uint8_t token = (uint8_t)*p ++;
104
+ size_t s = token >> 4;
105
+ if (AUX_LIKELY(s == 15)) {
106
+ s = aux_lz4_expandsize(&p, end, s);
107
+ }
108
+ size += s;
109
+ p += s;
110
+
111
+ if (AUX_UNLIKELY(p + 2 >= end)) {
112
+ if (p == end) {
113
+ #if 0
114
+ s = token & 0x0f;
115
+ if (s != 0) {
116
+ // TODO: raise? or do nothing?
117
+ }
118
+ #endif
119
+ return size;
120
+ }
121
+ break;
122
+ }
123
+ size_t offset = (uint8_t)*p ++;
124
+ offset |= ((size_t)((uint8_t)*p ++)) << 8;
125
+ if (linksize) {
126
+ ssize_t n = offset - size;
127
+ if (AUX_UNLIKELY(n > 0 && n > (ssize_t)*linksize)) {
128
+ *linksize = n;
129
+ }
130
+ }
131
+ #if 0
132
+ if (AUX_UNLIKELY(offset == 0)) {
133
+ rb_raise(extlz4_eError, "offset is zero");
134
+ }
135
+ #endif
136
+ s = token & 0x0f;
137
+ if (AUX_LIKELY(s == 15)) {
138
+ s = aux_lz4_expandsize(&p, end, s);
139
+ }
140
+ size += s + 4;
141
+ }
142
+
143
+ rb_raise(extlz4_eError, "encounted invalid end of sequence");
144
+ }
145
+
146
+ /*
147
+ * lz4 シーケンスから伸張後のバイト数を得る
148
+ *
149
+ * str が文字列であることを保証するのは呼び出し元の責任
150
+ */
151
+ static size_t
152
+ aux_lz4_scansize(VALUE str)
153
+ {
154
+ const char *p;
155
+ size_t size;
156
+ RSTRING_GETMEM(str, p, size);
157
+
158
+ return aux_lz4_scanseq(p, p + size, NULL);
159
+ }
160
+
161
+ /*
162
+ * offset トークンがバッファの負の数を表しているか確認する。
163
+ *
164
+ * 戻り値はその最大距離を返す (負の数として見るならば最小値だが、絶対値に変換する)。
165
+ *
166
+ * 名称の link は LZ4 frame からとった。
167
+ */
168
+ static size_t
169
+ aux_lz4_linksize(VALUE str)
170
+ {
171
+ const char *p;
172
+ size_t size;
173
+ RSTRING_GETMEM(str, p, size);
174
+
175
+ size_t linksize = 0;
176
+ aux_lz4_scanseq(p, p + size, &linksize);
177
+
178
+ return linksize;
179
+ }
180
+
181
+ static inline VALUE
182
+ aux_shouldbe_string(VALUE obj)
183
+ {
184
+ rb_check_type(obj, RUBY_T_STRING);
185
+ return obj;
186
+ }
187
+
188
+ static inline size_t
189
+ aux_lz4_compressbound(VALUE src)
190
+ {
191
+ return LZ4_compressBound(RSTRING_LEN(src));
192
+ }
193
+
194
+ enum {
195
+ MAX_PREDICT_SIZE = 65536,
196
+ };
197
+
198
+ static inline VALUE
199
+ make_predict(VALUE predict)
200
+ {
201
+ if (NIL_P(predict)) {
202
+ return Qnil;
203
+ }
204
+
205
+ rb_check_type(predict, RUBY_T_STRING);
206
+ size_t size = RSTRING_LEN(predict);
207
+ if (size == 0) {
208
+ return Qnil;
209
+ }
210
+ if (size > MAX_PREDICT_SIZE) {
211
+ predict = rb_str_subseq(predict, size - MAX_PREDICT_SIZE, MAX_PREDICT_SIZE);
212
+ } else {
213
+ predict = rb_str_dup(predict);
214
+ }
215
+ return rb_str_freeze(predict);
216
+ }
217
+
218
+
219
+ /*
220
+ * calculate destination size from source data
221
+ */
222
+ typedef size_t aux_calc_destsize_f(VALUE src);
223
+
224
+ static inline void
225
+ blockprocess_args(int argc, VALUE argv[], VALUE *src, VALUE *dest, size_t *maxsize, int *level, aux_calc_destsize_f *calcsize)
226
+ {
227
+ const VALUE *argend = argv + argc;
228
+ VALUE tmp;
229
+
230
+ if (level) {
231
+ int w = -1;
232
+ if (argc > 1) {
233
+ tmp = argv[0];
234
+ if (NIL_P(tmp)) {
235
+ argv ++;
236
+ } else if (rb_obj_is_kind_of(tmp, rb_cNumeric)) {
237
+ argv ++;
238
+ w = NUM2INT(tmp);
239
+ }
240
+ }
241
+ *level = w;
242
+ }
243
+
244
+ if (argv < argend) {
245
+ *src = aux_shouldbe_string(argv[0]);
246
+ switch (argend - argv) {
247
+ case 1:
248
+ *maxsize = calcsize(*src);
249
+ *dest = rb_str_buf_new(*maxsize);
250
+ return;
251
+ case 2:
252
+ tmp = argv[1];
253
+ if (RB_TYPE_P(tmp, RUBY_T_STRING)) {
254
+ *maxsize = calcsize(*src);
255
+ *dest = aux_shouldbe_string(tmp);
256
+ aux_str_reserve(*dest, *maxsize);
257
+ } else {
258
+ *maxsize = NUM2SIZET(tmp);
259
+ *dest = rb_str_buf_new(*maxsize);
260
+ }
261
+ return;
262
+ case 3:
263
+ *maxsize = NUM2SIZET(argv[1]);
264
+ *dest = aux_shouldbe_string(argv[2]);
265
+ aux_str_reserve(*dest, *maxsize);
266
+ return;
267
+ }
268
+ }
269
+
270
+ rb_error_arity(argc, 1, (level ? 4 : 3));
271
+ }
272
+
273
+ /*
274
+ * Document-class: LZ4::BlockEncoder
275
+ *
276
+ * このクラスは LZ4 Block API を扱うためのものです。
277
+ */
278
+
279
+ typedef void blockencoder_reset_f(void *context, int level);
280
+ typedef void *blockencoder_create_f(void);
281
+ typedef int blockencoder_free_f(void *context);
282
+ typedef int blockencoder_loaddict_f(void *context, const char *dict, int dictsize);
283
+ typedef int blockencoder_savedict_f(void *context, char *dict, int dictsize);
284
+ typedef int blockencoder_update_f(void *context, const char *src, char *dest, int srcsize, int destsize, int acceleration);
285
+ typedef int blockencoder_update_unlinked_f(void *context, const char *src, char *dest, int srcsize, int destsize);
286
+
287
+ struct blockencoder_traits
288
+ {
289
+ blockencoder_reset_f *reset;
290
+ blockencoder_create_f *create;
291
+ blockencoder_free_f *free;
292
+ blockencoder_loaddict_f *loaddict;
293
+ blockencoder_savedict_f *savedict;
294
+ blockencoder_update_f *update;
295
+ /* blockencoder_update_unlinked_f *update_unlinked; */
296
+ };
297
+
298
+ static void
299
+ aux_LZ4_resetStream(LZ4_stream_t *context, int level__ignored__)
300
+ {
301
+ (void)level__ignored__;
302
+ LZ4_resetStream(context);
303
+ }
304
+
305
+ static const struct blockencoder_traits blockencoder_traits_std = {
306
+ .reset = (blockencoder_reset_f *)aux_LZ4_resetStream,
307
+ .create = (blockencoder_create_f *)LZ4_createStream,
308
+ .free = (blockencoder_free_f *)LZ4_freeStream,
309
+ .loaddict = (blockencoder_loaddict_f *)LZ4_loadDict,
310
+ .savedict = (blockencoder_savedict_f *)LZ4_saveDict,
311
+ .update = (blockencoder_update_f *)aux_LZ4_compress_fast_continue,
312
+ /* .update_unlinked = (blockencoder_update_unlinked_f *)LZ4_compress_limitedOutput_withState, */
313
+ };
314
+
315
+ static const struct blockencoder_traits blockencoder_traits_hc = {
316
+ .reset = (blockencoder_reset_f *)LZ4_resetStreamHC,
317
+ .create = (blockencoder_create_f *)LZ4_createStreamHC,
318
+ .free = (blockencoder_free_f *)LZ4_freeStreamHC,
319
+ .loaddict = (blockencoder_loaddict_f *)LZ4_loadDictHC,
320
+ .savedict = (blockencoder_savedict_f *)LZ4_saveDictHC,
321
+ .update = (blockencoder_update_f *)aux_LZ4_compressHC_continue,
322
+ /* .update_unlinked = (blockencoder_update_unlinked_f *)LZ4_compressHC_limitedOutput_withStateHC, */
323
+ };
324
+
325
+ struct blockencoder
326
+ {
327
+ void *context;
328
+ const struct blockencoder_traits *traits;
329
+ VALUE predict;
330
+ int level;
331
+ int prefixsize;
332
+ char prefix[1 << 16]; /* 64 KiB; LZ4_loadDict, LZ4_saveDict */
333
+ };
334
+
335
+ static void
336
+ blkenc_mark(void *pp)
337
+ {
338
+ struct blockencoder *p = pp;
339
+ rb_gc_mark(p->predict);
340
+ }
341
+
342
+ static void
343
+ blkenc_free(void *pp)
344
+ {
345
+ struct blockencoder *p = pp;
346
+ if (p->context && p->traits) {
347
+ p->traits->free(p->context);
348
+ }
349
+ memset(p, 0, sizeof(*p));
350
+ xfree(p);
351
+ }
352
+
353
+ static const rb_data_type_t blockencoder_type = {
354
+ .wrap_struct_name = "extlz4.LZ4.BlockEncoder",
355
+ .function.dmark = blkenc_mark,
356
+ .function.dfree = blkenc_free,
357
+ /* .function.dsize = blkenc_size, */
358
+ .flags = RUBY_TYPED_FREE_IMMEDIATELY,
359
+ };
360
+
361
+ static VALUE
362
+ blkenc_alloc(VALUE klass)
363
+ {
364
+ struct blockencoder *p;
365
+ VALUE v = TypedData_Make_Struct(klass, struct blockencoder, &blockencoder_type, p);
366
+ p->predict = Qnil;
367
+ return v;
368
+ }
369
+
370
+ static inline struct blockencoder *
371
+ getencoderp(VALUE enc)
372
+ {
373
+ return getrefp(enc, &blockencoder_type);
374
+ }
375
+
376
+ static inline struct blockencoder *
377
+ getencoder(VALUE enc)
378
+ {
379
+ return getref(enc, &blockencoder_type);
380
+ }
381
+
382
+ static inline struct blockencoder *
383
+ encoder_context(VALUE enc)
384
+ {
385
+ struct blockencoder *p = getencoder(enc);
386
+ if (!p->context) {
387
+ rb_raise(extlz4_eError,
388
+ "not initialized yet - #<%s:%p>",
389
+ rb_obj_classname(enc), (void *)enc);
390
+ }
391
+
392
+ return p;
393
+ }
394
+
395
+ static inline void
396
+ blkenc_setup(int argc, VALUE argv[], struct blockencoder *p, VALUE predict)
397
+ {
398
+ rb_check_arity(argc, 0, 2);
399
+
400
+ if (p->context) {
401
+ void *cx = p->context;
402
+ p->context = NULL;
403
+ p->traits->free(cx);
404
+ }
405
+
406
+ if (argc == 0 || NIL_P(argv[0])) {
407
+ p->level = 1;
408
+ p->traits = &blockencoder_traits_std;
409
+ } else {
410
+ p->level = NUM2UINT(argv[0]);
411
+ if (p->level < 0) {
412
+ p->traits = &blockencoder_traits_std;
413
+ p->level = -p->level;
414
+ } else {
415
+ p->traits = &blockencoder_traits_hc;
416
+ }
417
+ }
418
+
419
+ if (argc < 2) {
420
+ p->predict = predict;
421
+ } else {
422
+ p->predict = make_predict(argv[1]);
423
+ }
424
+
425
+ p->context = p->traits->create();
426
+ if (!p->context) {
427
+ rb_gc();
428
+ p->context = p->traits->create();
429
+ if (!p->context) {
430
+ errno = ENOMEM;
431
+ rb_sys_fail("failed context allocation by LZ4_createStream()");
432
+ }
433
+ }
434
+
435
+ p->traits->reset(p->context, p->level);
436
+
437
+ if (NIL_P(p->predict)) {
438
+ p->traits->loaddict(p->context, NULL, 0);
439
+ } else {
440
+ /*
441
+ * NOTE: すぐ下で LZ4_saveDict() を実行するため、
442
+ * NOTE: p->predict のバッファ領域が保持されることはない。
443
+ */
444
+ p->traits->loaddict(p->context, RSTRING_PTR(p->predict), RSTRING_LEN(p->predict));
445
+ }
446
+
447
+ p->prefixsize = p->traits->savedict(p->context, p->prefix, sizeof(p->prefix));
448
+ }
449
+
450
+ /*
451
+ * call-seq:
452
+ * initialize(level = nil, predict = nil)
453
+ *
454
+ * [INFECTION]
455
+ * +self+ <- +predict+
456
+ *
457
+ * [RETURN]
458
+ * self
459
+ *
460
+ * [level]
461
+ * When given +nil+, encode normal compression.
462
+ *
463
+ * When given +0+ .. +15+, encode high compression.
464
+ *
465
+ * [predict]
466
+ * Preset dictionary.
467
+ */
468
+ static VALUE
469
+ blkenc_init(int argc, VALUE argv[], VALUE enc)
470
+ {
471
+ struct blockencoder *p = getencoder(enc);
472
+ if (p->context) {
473
+ rb_raise(extlz4_eError,
474
+ "already initialized - #<%s:%p>",
475
+ rb_obj_classname(enc), (void *)enc);
476
+ }
477
+
478
+ blkenc_setup(argc, argv, p, Qnil);
479
+ rb_obj_infect(enc, p->predict);
480
+
481
+ return enc;
482
+ }
483
+
484
+ /*
485
+ * call-seq:
486
+ * update(src, dest = "") -> dest
487
+ * update(src, max_dest_size, dest = "") -> dest
488
+ *
489
+ * [INFECTION]
490
+ * +dest+ <- +self+ <- +src+
491
+ */
492
+ static VALUE
493
+ blkenc_update(int argc, VALUE argv[], VALUE enc)
494
+ {
495
+ struct blockencoder *p = encoder_context(enc);
496
+ VALUE src, dest;
497
+ size_t maxsize;
498
+ blockprocess_args(argc, argv, &src, &dest, &maxsize, NULL, aux_lz4_compressbound);
499
+ rb_obj_infect(enc, src);
500
+ rb_obj_infect(dest, enc);
501
+ char *srcp;
502
+ size_t srcsize;
503
+ RSTRING_GETMEM(src, srcp, srcsize);
504
+ int s = p->traits->update(p->context, srcp, RSTRING_PTR(dest), srcsize, maxsize, p->level);
505
+ if (s <= 0) {
506
+ rb_raise(extlz4_eError,
507
+ "destsize too small (given destsize is %zu)",
508
+ rb_str_capacity(dest));
509
+ }
510
+ p->prefixsize = p->traits->savedict(p->context, p->prefix, sizeof(p->prefix));
511
+ rb_str_set_len(dest, s);
512
+ return dest;
513
+ }
514
+
515
+ /*
516
+ * call-seq:
517
+ * reset(level = nil) -> self
518
+ * reset(level, predict) -> self
519
+ *
520
+ * [INFECTION]
521
+ * +self+ < +predict+
522
+ *
523
+ * Reset block stream encoder.
524
+ */
525
+ static VALUE
526
+ blkenc_reset(int argc, VALUE argv[], VALUE enc)
527
+ {
528
+ struct blockencoder *p = encoder_context(enc);
529
+ blkenc_setup(argc, argv, p, p->predict);
530
+ rb_obj_infect(enc, p->predict);
531
+
532
+ return enc;
533
+ }
534
+
535
+ static VALUE
536
+ blkenc_release(VALUE enc)
537
+ {
538
+ struct blockencoder *p = getencoder(enc);
539
+ if (p->traits && p->context) {
540
+ p->traits->free(p->context);
541
+ }
542
+ p->context = NULL;
543
+ p->traits = NULL;
544
+ memset(p->prefix, 0, sizeof(p->prefix));
545
+ p->prefixsize = 0;
546
+ return Qnil;
547
+ }
548
+
549
+ static VALUE
550
+ blkenc_predict(VALUE enc)
551
+ {
552
+ return getencoder(enc)->predict;
553
+ }
554
+
555
+ /*
556
+ * call-seq:
557
+ * savedict -> dict or nil
558
+ * savedict(buf) -> buf or nil
559
+ */
560
+ static VALUE
561
+ blkenc_savedict(int argc, VALUE argv[], VALUE enc)
562
+ {
563
+ struct blockencoder *p = encoder_context(enc);
564
+ VALUE dict;
565
+
566
+ if (argc == 0) {
567
+ dict = rb_str_buf_new(p->prefixsize);
568
+ } else if (argc == 1) {
569
+ dict = argv[0];
570
+ aux_str_reserve(dict, p->prefixsize);
571
+ } else {
572
+ rb_error_arity(argc, 0, 1);
573
+ }
574
+
575
+ memcpy(RSTRING_PTR(dict), p->prefix, p->prefixsize);
576
+ if (p->prefixsize > 0) {
577
+ rb_str_set_len(dict, p->prefixsize);
578
+ rb_obj_infect(dict, enc);
579
+ return dict;
580
+ } else {
581
+ return Qnil;
582
+ }
583
+ }
584
+
585
+ static VALUE
586
+ blkenc_inspect(VALUE enc)
587
+ {
588
+ struct blockencoder *p = getencoderp(enc);
589
+ if (p && p->context) {
590
+ if (p->traits == &blockencoder_traits_std) {
591
+ return rb_sprintf("#<%s:%p (fast compression %d)%s>",
592
+ rb_obj_classname(enc), (void *)enc, p->level,
593
+ (NIL_P(p->predict)) ? "" : " (with predict)");
594
+ } else if (p->traits == &blockencoder_traits_hc) {
595
+ return rb_sprintf("#<%s:%p (high compression %d)%s>",
596
+ rb_obj_classname(enc), (void *)enc, p->level,
597
+ (NIL_P(p->predict)) ? "" : " (with predict)");
598
+ } else {
599
+ return rb_sprintf("#<%s:%p **INVALID COMPRESSOR**>",
600
+ rb_obj_classname(enc), (void *)enc);
601
+ }
602
+ } else {
603
+ return rb_sprintf("#<%s:%p **NOT INITIALIZED**>",
604
+ rb_obj_classname(enc), (void *)enc);
605
+ }
606
+ }
607
+
608
+ /*
609
+ * call-seq:
610
+ * compressbound(src) -> size
611
+ *
612
+ * Calcuration maximum size of encoded data in worst case.
613
+ */
614
+ static VALUE
615
+ blkenc_s_compressbound(VALUE mod, VALUE src)
616
+ {
617
+ return SIZET2NUM(LZ4_compressBound(NUM2UINT(src)));
618
+ }
619
+
620
+ typedef int aux_lz4_encoder_f(const char *src, char *dest, int srcsize, int maxsize, int level);
621
+
622
+ /*
623
+ * call-seq:
624
+ * encode(src, dest = "") -> dest with compressed string data
625
+ * encode(src, max_dest_size, dest = "") -> dest with compressed string data
626
+ * encode(level, src, dest = "") -> dest with compressed string data
627
+ * encode(level, src, max_dest_size, dest = "") -> dest with compressed string data
628
+ *
629
+ * Encode to block LZ4 data.
630
+ *
631
+ * level を指定した場合、より圧縮処理に時間を掛けて圧縮効率を高めることが出来ます。
632
+ *
633
+ * 実装の都合上、圧縮関数は LZ4_compress_fast / LZ4_compress_HC が使われます。
634
+ *
635
+ * [INFECTION]
636
+ * +dest+ <- +src+
637
+ *
638
+ * [RETURN]
639
+ * 圧縮されたデータが文字列として返ります。dest を指定した場合は、圧縮データを格納した dest を返します。
640
+ *
641
+ * 圧縮データには自身の終わりやデータ長が含まれていないため、伸張する際には余計なデータが付随していると正常に伸張できません。
642
+ *
643
+ * [src]
644
+ * 圧縮対象となる文字列オブジェクトを指定します。
645
+ *
646
+ * [max_dest_size (optional)]
647
+ * 出力バッファの最大バイト数を指定します。圧縮時にこれよりも多くのバッファ長が必要になった場合は例外が発生します。
648
+ *
649
+ * 省略時は src 長から最悪値が計算されます。dest が最初に確保できれば圧縮処理中に例外が発生することがありません。
650
+ *
651
+ * [dest (optional)]
652
+ * 出力先とする文字列オブジェクトを指定します。
653
+ *
654
+ * max_dest_size が同時に指定されない場合、出力バッファの最大バイト長は src 長から最悪値が求められて調整されます。
655
+ *
656
+ * [level (optional)]
657
+ * 圧縮レベルとしての数値または nil を指定します。
658
+ *
659
+ * 0 を指定した場合、LZ4 の規定値による高効率圧縮処理が行われます。
660
+ *
661
+ * 0 を超えた数値を指定した場合、LZ4 の高効率圧縮処理が行われます。
662
+ *
663
+ * nil を与えるか省略した場合、通常の圧縮処理が行われます。
664
+ *
665
+ * 0 に満たない数値を指定した場合、高速圧縮処理が行われます。
666
+ * 内部でこの値は絶対値に変換されて LZ4_compress_fast() の acceleration 引数として渡されます。
667
+ */
668
+ static VALUE
669
+ blkenc_s_encode(int argc, VALUE argv[], VALUE lz4)
670
+ {
671
+ VALUE src, dest;
672
+ size_t maxsize;
673
+ int level;
674
+ blockprocess_args(argc, argv, &src, &dest, &maxsize, &level, aux_lz4_compressbound);
675
+
676
+ aux_lz4_encoder_f *encoder;
677
+ if (level < 0) {
678
+ encoder = LZ4_compress_fast;
679
+ level = -level;
680
+ } else {
681
+ encoder = LZ4_compress_HC;
682
+ }
683
+
684
+ size_t srcsize = RSTRING_LEN(src);
685
+ if (srcsize > LZ4_MAX_INPUT_SIZE) {
686
+ rb_raise(extlz4_eError,
687
+ "source size is too big for lz4 encode (given %zu, but max %zu bytes)",
688
+ srcsize, (size_t)LZ4_MAX_INPUT_SIZE);
689
+ }
690
+ aux_str_reserve(dest, maxsize);
691
+ rb_str_set_len(dest, 0);
692
+ rb_obj_infect(dest, src);
693
+
694
+ int size = encoder(RSTRING_PTR(src), RSTRING_PTR(dest), srcsize, maxsize, level);
695
+ if (size <= 0) {
696
+ rb_raise(extlz4_eError,
697
+ "failed LZ4 compress - maxsize is too small, or out of memory");
698
+ }
699
+
700
+ rb_str_set_len(dest, size);
701
+
702
+ return dest;
703
+ }
704
+
705
+ static void
706
+ init_blockencoder(void)
707
+ {
708
+ VALUE cBlockEncoder = rb_define_class_under(extlz4_mLZ4, "BlockEncoder", rb_cObject);
709
+ rb_define_alloc_func(cBlockEncoder, blkenc_alloc);
710
+ rb_define_method(cBlockEncoder, "initialize", RUBY_METHOD_FUNC(blkenc_init), -1);
711
+ rb_define_method(cBlockEncoder, "reset", RUBY_METHOD_FUNC(blkenc_reset), -1);
712
+ rb_define_method(cBlockEncoder, "update", RUBY_METHOD_FUNC(blkenc_update), -1);
713
+ rb_define_method(cBlockEncoder, "release", RUBY_METHOD_FUNC(blkenc_release), 0);
714
+ rb_define_method(cBlockEncoder, "predict", RUBY_METHOD_FUNC(blkenc_predict), 0);
715
+ rb_define_method(cBlockEncoder, "savedict", RUBY_METHOD_FUNC(blkenc_savedict), -1);
716
+ rb_define_method(cBlockEncoder, "inspect", RUBY_METHOD_FUNC(blkenc_inspect), 0);
717
+ rb_define_alias(cBlockEncoder, "encode", "update");
718
+ rb_define_alias(cBlockEncoder, "compress", "update");
719
+ rb_define_alias(cBlockEncoder, "free", "release");
720
+
721
+ rb_define_singleton_method(cBlockEncoder, "compressbound", blkenc_s_compressbound, 1);
722
+ rb_define_singleton_method(cBlockEncoder, "encode", blkenc_s_encode, -1);
723
+ rb_define_alias(rb_singleton_class(cBlockEncoder), "compress", "encode");
724
+
725
+ rb_define_const(extlz4_mLZ4, "LZ4HC_CLEVEL_MIN", INT2FIX(LZ4HC_CLEVEL_MIN));
726
+ rb_define_const(extlz4_mLZ4, "LZ4HC_CLEVEL_DEFAULT", INT2FIX(LZ4HC_CLEVEL_DEFAULT));
727
+ rb_define_const(extlz4_mLZ4, "LZ4HC_CLEVEL_OPT_MIN", INT2FIX(LZ4HC_CLEVEL_OPT_MIN));
728
+ rb_define_const(extlz4_mLZ4, "LZ4HC_CLEVEL_MAX", INT2FIX(LZ4HC_CLEVEL_MAX));
729
+
730
+ rb_define_const(extlz4_mLZ4, "HC_CLEVEL_MIN", INT2FIX(LZ4HC_CLEVEL_MIN));
731
+ rb_define_const(extlz4_mLZ4, "HC_CLEVEL_DEFAULT", INT2FIX(LZ4HC_CLEVEL_DEFAULT));
732
+ rb_define_const(extlz4_mLZ4, "HC_CLEVEL_OPT_MIN", INT2FIX(LZ4HC_CLEVEL_OPT_MIN));
733
+ rb_define_const(extlz4_mLZ4, "HC_CLEVEL_MAX", INT2FIX(LZ4HC_CLEVEL_MAX));
734
+ }
735
+
736
+ /*
737
+ * class LZ4::BlockDecoder
738
+ */
739
+
740
+ struct blockdecoder
741
+ {
742
+ void *context;
743
+ VALUE predict;
744
+ size_t dictsize;
745
+ char dictbuf[64 * 1024];
746
+ };
747
+
748
+ static void
749
+ blkdec_mark(void *pp)
750
+ {
751
+ struct blockdecoder *p = pp;
752
+ rb_gc_mark(p->predict);
753
+ }
754
+
755
+ static void
756
+ blkdec_free(void *pp)
757
+ {
758
+ struct blockdecoder *p = pp;
759
+ if (p->context) {
760
+ LZ4_freeStreamDecode(p->context);
761
+ }
762
+ memset(p, 0, sizeof(*p));
763
+ xfree(p);
764
+ }
765
+
766
+ static const rb_data_type_t blockdecoder_type = {
767
+ .wrap_struct_name = "extlz4.LZ4.BlockDecoder",
768
+ .function.dmark = blkdec_mark,
769
+ .function.dfree = blkdec_free,
770
+ /* .function.dsize = blkdec_size, */
771
+ .flags = RUBY_TYPED_FREE_IMMEDIATELY,
772
+ };
773
+
774
+ static VALUE
775
+ blkdec_alloc(VALUE klass)
776
+ {
777
+ struct blockdecoder *p;
778
+ VALUE v = TypedData_Make_Struct(klass, struct blockdecoder, &blockdecoder_type, p);
779
+ p->predict = Qnil;
780
+ return v;
781
+ }
782
+
783
+ static inline struct blockdecoder *
784
+ getdecoderp(VALUE dec)
785
+ {
786
+ return getrefp(dec, &blockdecoder_type);
787
+ }
788
+
789
+ static inline struct blockdecoder *
790
+ getdecoder(VALUE dec)
791
+ {
792
+ return getref(dec, &blockdecoder_type);
793
+ }
794
+
795
+ static inline void
796
+ blkdec_setup(int argc, VALUE argv[], VALUE predict, struct blockdecoder *p)
797
+ {
798
+ VALUE predict1;
799
+ rb_scan_args(argc, argv, "01", &predict1);
800
+ if (argc == 0) {
801
+ p->predict = predict;
802
+ } else {
803
+ if (NIL_P(predict1)) {
804
+ p->predict = predict;
805
+ } else {
806
+ rb_check_type(predict1, RUBY_T_STRING);
807
+ p->predict = predict = rb_str_dup(predict1);
808
+ }
809
+ }
810
+
811
+ if (!p->context) {
812
+ p->context = LZ4_createStreamDecode();
813
+ if (!p->context) {
814
+ rb_gc();
815
+ p->context = LZ4_createStreamDecode();
816
+ if (!p->context) {
817
+ errno = ENOMEM;
818
+ rb_sys_fail("failed LZ4_createStreamDecode()");
819
+ }
820
+ }
821
+ }
822
+
823
+ if (!NIL_P(predict)) {
824
+ const char *pdp;
825
+ RSTRING_GETMEM(predict, pdp, p->dictsize);
826
+ if (p->dictsize > sizeof(p->dictbuf)) {
827
+ pdp += p->dictsize - sizeof(p->dictbuf);
828
+ p->dictsize = sizeof(p->dictbuf);
829
+ }
830
+
831
+ memcpy(p->dictbuf, pdp, p->dictsize);
832
+ } else {
833
+ p->dictsize = 0;
834
+ }
835
+ }
836
+
837
+ /*
838
+ * call-seq:
839
+ * initialize
840
+ * initialize(preset_dictionary)
841
+ *
842
+ * [INFECTION]
843
+ * +self+ < +preset_dictionary+
844
+ */
845
+ static VALUE
846
+ blkdec_init(int argc, VALUE argv[], VALUE dec)
847
+ {
848
+ struct blockdecoder *p = getdecoder(dec);
849
+
850
+ blkdec_setup(argc, argv, Qnil, p);
851
+ rb_obj_infect(dec, p->predict);
852
+
853
+ return dec;
854
+ }
855
+
856
+ /*
857
+ * call-seq:
858
+ * reset
859
+ * reset(preset_dictionary)
860
+ *
861
+ * [INFECTION]
862
+ * +self+ < +preset_dictionary+
863
+ */
864
+ static VALUE
865
+ blkdec_reset(int argc, VALUE argv[], VALUE dec)
866
+ {
867
+ struct blockdecoder *p = getdecoder(dec);
868
+
869
+ blkdec_setup(argc, argv, p->predict, p);
870
+ rb_obj_infect(dec, p->predict);
871
+
872
+ return dec;
873
+ }
874
+
875
+ /*
876
+ * call-seq:
877
+ * update(src, dest = "") -> dest for decoded string data
878
+ * update(src, max_dest_size, dest = "") -> dest for decoded string data
879
+ *
880
+ * Decode block lz4 data of stream block.
881
+ *
882
+ * Given arguments and return values are same as LZ4#block_decode.
883
+ * See LZ4#block_decode for about its.
884
+ *
885
+ * 出力先は、max_dest_size が与えられていない場合、必要に応じて自動的に拡張されます。
886
+ * この場合、いったん圧縮された LZ4 データを走査するため、事前に僅かな CPU 時間を必要とします。
887
+ *
888
+ * [INFECTION]
889
+ * +dest+ < +self+ < +src+
890
+ */
891
+ static VALUE
892
+ blkdec_update(int argc, VALUE argv[], VALUE dec)
893
+ {
894
+ struct blockdecoder *p = getdecoder(dec);
895
+ if (!p->context) { rb_raise(extlz4_eError, "need reset (context not initialized)"); }
896
+ VALUE src, dest;
897
+ size_t maxsize;
898
+ blockprocess_args(argc, argv, &src, &dest, &maxsize, NULL, aux_lz4_scansize);
899
+ rb_obj_infect(dec, src);
900
+ rb_obj_infect(dest, dec);
901
+ const char *srcp;
902
+ size_t srcsize;
903
+ RSTRING_GETMEM(src, srcp, srcsize);
904
+ LZ4_setStreamDecode(p->context, p->dictbuf, p->dictsize);
905
+ int s = aux_LZ4_decompress_safe_continue(p->context, srcp, RSTRING_PTR(dest), srcsize, maxsize);
906
+ if (s < 0) {
907
+ rb_raise(extlz4_eError,
908
+ "`max_dest_size' too small, or corrupt lz4'd data");
909
+ }
910
+ rb_str_set_len(dest, s);
911
+
912
+ /* copy prefix */
913
+ if (s < sizeof(p->dictbuf)) {
914
+ ssize_t discard = (p->dictsize + s) - sizeof(p->dictbuf);
915
+ if (discard > 0) {
916
+ size_t remain = p->dictsize - discard;
917
+ memmove(p->dictbuf, (const char *)(p->dictbuf + discard), remain);
918
+ p->dictsize = remain;
919
+ }
920
+
921
+ memcpy(p->dictbuf + p->dictsize, RSTRING_PTR(dest), s);
922
+ p->dictsize += s;
923
+ } else {
924
+ memcpy(p->dictbuf, RSTRING_END(dest) - sizeof(p->dictbuf), sizeof(p->dictbuf));
925
+ p->dictsize = sizeof(p->dictbuf);
926
+ }
927
+
928
+ return dest;
929
+ }
930
+
931
+ /*
932
+ * call-seq:
933
+ * release -> nil
934
+ *
935
+ * Release allocated internal heap memory.
936
+ */
937
+ static VALUE
938
+ blkdec_release(VALUE lz4)
939
+ {
940
+ struct blockdecoder *p = getdecoderp(lz4);
941
+ if (!p) { return Qnil; }
942
+ if (p->context) {
943
+ LZ4_freeStreamDecode(p->context);
944
+ p->context = NULL;
945
+ }
946
+ // TODO: p->predict と p->prefix も rb_str_resize で 0 にするべきか?
947
+ p->predict = Qnil;
948
+ return Qnil;
949
+ }
950
+
951
+ /*
952
+ * call-seq:
953
+ * scansize(lz4_blockencoded_data) -> integer
954
+ *
955
+ * Scan block lz4 data, and get decoded byte size.
956
+ *
957
+ * このメソッドは、block_decode メソッドに max_dest_size なしで利用する場合の検証目的で利用できるようにしてあります。
958
+ *
959
+ * その他の有用な使い方があるのかは不明です。
960
+ */
961
+ static VALUE
962
+ blkdec_s_scansize(VALUE mod, VALUE str)
963
+ {
964
+ rb_check_type(str, RUBY_T_STRING);
965
+ return SIZET2NUM(aux_lz4_scansize(str));
966
+ }
967
+
968
+ /*
969
+ * call-seq:
970
+ * linksize(lz4_blockencoded_data) -> prefix size as integer
971
+ *
972
+ * Scan block lz4 data, and get prefix byte size.
973
+ */
974
+ static VALUE
975
+ blkdec_s_linksize(VALUE mod, VALUE str)
976
+ {
977
+ rb_check_type(str, RUBY_T_STRING);
978
+ return SIZET2NUM(aux_lz4_linksize(str));
979
+ }
980
+
981
+ /*
982
+ * call-seq:
983
+ * decode(src, dest = "") -> dest with decoded string data
984
+ * decode(src, max_dest_size, dest = "") -> dest with decoded string data
985
+ *
986
+ * Decode block LZ4 data.
987
+ *
988
+ * 出力先は、max_dest_size が与えられていない場合、必要に応じて自動的に拡張されます。
989
+ * この場合、いったん圧縮された LZ4 データを走査するため、事前に僅かな CPU 時間を必要とします。
990
+ *
991
+ * [INFECTION]
992
+ * +dest+ < +src+
993
+ */
994
+ static VALUE
995
+ blkdec_s_decode(int argc, VALUE argv[], VALUE lz4)
996
+ {
997
+ VALUE src, dest;
998
+ size_t maxsize;
999
+ blockprocess_args(argc, argv, &src, &dest, &maxsize, NULL, aux_lz4_scansize);
1000
+
1001
+ aux_str_reserve(dest, maxsize);
1002
+ rb_str_set_len(dest, 0);
1003
+ rb_obj_infect(dest, src);
1004
+
1005
+ int size = LZ4_decompress_safe(RSTRING_PTR(src), RSTRING_PTR(dest), RSTRING_LEN(src), maxsize);
1006
+ if (size < 0) {
1007
+ rb_raise(extlz4_eError,
1008
+ "failed LZ4_decompress_safe - max_dest_size is too small, or data is corrupted");
1009
+ }
1010
+
1011
+ rb_str_set_len(dest, size);
1012
+
1013
+ return dest;
1014
+ }
1015
+
1016
+ static void
1017
+ init_blockdecoder(void)
1018
+ {
1019
+ VALUE cBlockDecoder = rb_define_class_under(extlz4_mLZ4, "BlockDecoder", rb_cObject);
1020
+ rb_define_alloc_func(cBlockDecoder, blkdec_alloc);
1021
+ rb_define_method(cBlockDecoder, "initialize", RUBY_METHOD_FUNC(blkdec_init), -1);
1022
+ rb_define_method(cBlockDecoder, "reset", RUBY_METHOD_FUNC(blkdec_reset), -1);
1023
+ rb_define_method(cBlockDecoder, "update", RUBY_METHOD_FUNC(blkdec_update), -1);
1024
+ rb_define_method(cBlockDecoder, "release", RUBY_METHOD_FUNC(blkdec_release), 0);
1025
+ rb_define_alias(cBlockDecoder, "decode", "update");
1026
+ rb_define_alias(cBlockDecoder, "decompress", "update");
1027
+ rb_define_alias(cBlockDecoder, "uncompress", "update");
1028
+ rb_define_alias(cBlockDecoder, "free", "release");
1029
+
1030
+ rb_define_singleton_method(cBlockDecoder, "scansize", blkdec_s_scansize, 1);
1031
+ rb_define_singleton_method(cBlockDecoder, "linksize", blkdec_s_linksize, 1);
1032
+ rb_define_singleton_method(cBlockDecoder, "decode", blkdec_s_decode, -1);
1033
+ rb_define_alias(rb_singleton_class(cBlockDecoder), "decompress", "decode");
1034
+ rb_define_alias(rb_singleton_class(cBlockDecoder), "uncompress", "decode");
1035
+ }
1036
+
1037
+ /*
1038
+ * initializer blockapi.c
1039
+ */
1040
+
1041
+ void
1042
+ extlz4_init_blockapi(void)
1043
+ {
1044
+ init_blockencoder();
1045
+ init_blockdecoder();
1046
+ }