lzfx 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/README.md ADDED
@@ -0,0 +1,33 @@
1
+ # Ruby bindings for lzfx
2
+
3
+ lzfx is a tiny, extremely fast compression library.
4
+
5
+ ## lzfx
6
+ this gem contains lzfx.
7
+
8
+ * http://code.google.com/p/lzfx/
9
+
10
+ > LZFX is a small (one C file, 200 non-comment lines) BSD-licensed library designed for very-high-speed compression of redundant data. It is descended from liblzf and is 100% compatible with existing LZF-compressed data.
11
+
12
+ > LZFX was originally developed as a component of the h5py project, which uses an LZF-based compressor to process scientific data.
13
+
14
+ ## Example
15
+
16
+ require 'rubygems'
17
+ require 'lzfxruby'
18
+
19
+ a = "aaaaaaaaabbbbbbbbbbbbabababaccccccccccjshabbbbbbaaa"
20
+ puts "source: #{a}"
21
+ puts "source size: #{a.size}"
22
+
23
+ b = Lzfx.compress a
24
+ puts "compressed: #{b}"
25
+ puts "compressed size: #{b.size}"
26
+
27
+ c = Lzfx.decompress b
28
+ puts "decompressed: #{c}"
29
+ puts "decompressed size: #{c.size}"
30
+ puts "source == decompressed: #{a == c}"
31
+
32
+ ## Copyright
33
+ Copyright: (C) 2012 Kengo HAMASAKI <k.hamasaki@gmail.com>
data/ext/extconf.rb ADDED
@@ -0,0 +1,2 @@
1
+ require 'mkmf'
2
+ create_makefile('lzfx')
data/ext/lzfx-util.c ADDED
@@ -0,0 +1,450 @@
1
+ #include <stdlib.h>
2
+ #include <stdio.h>
3
+ #include <unistd.h>
4
+ #include <fcntl.h>
5
+ #include <string.h>
6
+ #include "lzfx.h"
7
+ #include <errno.h>
8
+ #include <stdint.h>
9
+
10
+ #define BLOCKSIZE (1024*1024)
11
+
12
+ typedef unsigned char u8;
13
+
14
+ typedef enum {
15
+ MODE_COMPRESS,
16
+ MODE_DECOMPRESS
17
+ } fx_mode_t;
18
+
19
+ typedef enum {
20
+ KIND_FILEHEADER = 0,
21
+ KIND_COMPRESSED = 1,
22
+ KIND_UNCOMPRESSED = 2
23
+ } fx_kind_t;
24
+
25
+ typedef struct {
26
+ int ifd, ofd;
27
+ fx_mode_t mode;
28
+ } FX_STATE;
29
+
30
+ static
31
+ void fx_init(FX_STATE *state, int ifd, int ofd, fx_mode_t mode){
32
+ state->ifd = ifd;
33
+ state->ofd = ofd;
34
+ state->mode = mode;
35
+ }
36
+
37
+ /* Read len bytes from the input file.
38
+
39
+ buf: Output buffer
40
+ len: # bytes to read
41
+
42
+ >=0: bytes read, either 0 (EOF) or len
43
+ <0: Read error
44
+ */
45
+ static inline
46
+ int fx_read_bytes(const FX_STATE state, void* buf, const size_t len){
47
+
48
+ ssize_t rc = 0;
49
+ size_t count = 0;
50
+
51
+ do {
52
+ rc = read(state.ifd, ((u8*)buf)+count, len-count);
53
+ count += rc;
54
+ } while(rc>0 && count<len);
55
+
56
+ if(rc<0){
57
+ fprintf(stderr, "Read failed: %s\n", strerror(errno));
58
+ return -1;
59
+ }
60
+
61
+ if(count>0 && count!=len){
62
+ fprintf(stderr, "Read truncated (%u bytes short)\n", (unsigned int)(len-count));
63
+ return -1;
64
+ }
65
+ return count;
66
+ }
67
+
68
+ /* Write len bytes from the buffer to the output file.
69
+
70
+ buf: Input buffer
71
+ len: # of bytes in buf
72
+
73
+ >=0: Bytes written
74
+ <0: Write error
75
+ */
76
+ static inline
77
+ int fx_write_bytes(const FX_STATE state, const void* buf, const size_t len){
78
+
79
+ ssize_t rc = 0;
80
+ size_t count = 0;
81
+
82
+ do {
83
+ rc = write(state.ofd, ((u8*)buf)+count, len-count);
84
+ count += rc;
85
+ } while(rc>0 && count<len);
86
+
87
+ if(rc<0){
88
+ fprintf(stderr, "Write failed: %s\n", strerror(errno));
89
+ return -1;
90
+ }
91
+
92
+ return count;
93
+ }
94
+
95
+ /* Skip len bytes in the input file.
96
+
97
+ len: # of bytes to skip
98
+
99
+ 0: Success
100
+ <0: Read error
101
+ */
102
+ static
103
+ int fx_skip_bytes(const FX_STATE state, const size_t len){
104
+
105
+ off_t rc;
106
+
107
+ rc = lseek(state.ifd, len, SEEK_CUR);
108
+ if(rc==((off_t)-1)){
109
+ fprintf(stderr, "Read error: %s\n", strerror(errno));
110
+ return -1;
111
+ }
112
+
113
+ return 0;
114
+ }
115
+
116
+ /* Read a header from the input stream.
117
+
118
+ kind_in: Will contain the header kind
119
+ len_in: Will contain the block length
120
+
121
+ >0: Read success (10)
122
+ 0: EOF
123
+ <0: Read error (message printed)
124
+ */
125
+ static inline
126
+ int fx_read_header(const FX_STATE state, fx_kind_t *kind_in, uint32_t *len_in){
127
+
128
+ int rc;
129
+ uint16_t kind;
130
+ uint32_t len;
131
+ u8 head[10];
132
+
133
+ rc = fx_read_bytes(state, head, 10);
134
+ if(rc<=0) return rc; /* 0 = EOF; <0 = already printed error */
135
+
136
+ if(head[0]!='L' || head[1]!='Z' || head[2]!='F' || head[3]!='X'){
137
+ fprintf(stderr, "Illegal header %X %X %X %X\n",
138
+ (int)head[0], (int)head[1], (int)head[2], (int)head[3]);
139
+ return -1;
140
+ }
141
+
142
+ kind = (head[4]<<8) | head[5];
143
+ len = (head[6]<<24) | (head[7]<<16) | (head[8]<<8) | head[9];
144
+
145
+ *kind_in = kind;
146
+ *len_in = len;
147
+
148
+ return rc;
149
+ }
150
+
151
+ /* Write a block to output file, adding the header.
152
+
153
+ kind_in: Header kind
154
+ len: Block length
155
+ data: Block data
156
+
157
+ 0: Write success
158
+ <0: Write error (message printed)
159
+ */
160
+ static inline
161
+ int fx_write_block(const FX_STATE state, const fx_kind_t kind_in,
162
+ const uint32_t len, const void* data){
163
+
164
+ const uint16_t kind = kind_in;
165
+ u8 head[] = {'L','Z','F','X', 0, 0, 0, 0, 0, 0};
166
+
167
+ head[4] = kind >> 8; head[5] = kind;
168
+ head[6] = len >> 24; head[7] = len >> 16;
169
+ head[8] = len >> 8; head[9] = len;
170
+
171
+ if(fx_write_bytes(state, head, 10) < 0) return -1;
172
+ if(fx_write_bytes(state, data, len) < 0) return -1;
173
+
174
+ return 0;
175
+ }
176
+
177
+ /* Decompress a block (KIND_COMPRESSED) to the output file.
178
+
179
+ ibuf: The block (including 4-byte leader)
180
+ len: Total block length
181
+
182
+ 0: Success
183
+ <0: Error (message printed)
184
+ */
185
+ static
186
+ int fx_decompress_block(const FX_STATE state, const u8 *ibuf, const size_t len){
187
+
188
+ static u8* obuf;
189
+ static size_t obuf_len;
190
+
191
+ uint32_t usize;
192
+ unsigned int usize_real;
193
+ int rc;
194
+
195
+ if(len<4){
196
+ fprintf(stderr, "Compressed size truncated\n");
197
+ return -2;
198
+ }
199
+
200
+ usize = (ibuf[0]<<24) | (ibuf[1]<<16) | (ibuf[2]<<8) | ibuf[3];
201
+
202
+ if(usize>obuf_len){
203
+ obuf = (u8*)realloc(obuf, usize);
204
+ if(obuf==NULL) return -1; /* This leaks but we quit right away */
205
+ obuf_len = usize;
206
+ }
207
+
208
+ usize_real = usize;
209
+
210
+ rc = lzfx_decompress(ibuf+4, len-4, obuf, &usize_real);
211
+ if(rc<0){
212
+ fprintf(stderr, "Decompression failed: code %d\n", rc);
213
+ return -2;
214
+ }
215
+ if(usize_real != usize){
216
+ fprintf(stderr, "Decompressed data has wrong length (%d vs expected %d)\n", (int)usize_real, (int)usize);
217
+ return -2;
218
+ }
219
+
220
+ rc = fx_write_bytes(state, obuf, usize);
221
+ if(rc<0) return -1;
222
+
223
+ return 0;
224
+ }
225
+
226
+ static inline
227
+ int mem_resize(u8** buf, size_t *ilen, const size_t olen){
228
+ void* tbuf;
229
+ if(olen>*ilen){
230
+ tbuf = realloc(*buf, olen);
231
+ if(tbuf==NULL){
232
+ fprintf(stderr, "Can't allocate memory (%lu bytes)\n", (unsigned long)olen);
233
+ return -1;
234
+ }
235
+ *buf = tbuf;
236
+ *ilen = olen;
237
+ }
238
+ return 0;
239
+ }
240
+
241
+ /* Compress a block of raw data and store it in the output file.
242
+
243
+ ibuf: Raw data block to compress
244
+ ilen: Length of data block
245
+
246
+ 0: Success
247
+ <0: Error (message printed)
248
+ */
249
+ static
250
+ int fx_compress_block(const FX_STATE state, const u8* ibuf,
251
+ const uint32_t ilen){
252
+
253
+ static u8* obuf;
254
+ static size_t obuf_len;
255
+
256
+ unsigned int compressed_len;
257
+ int rc;
258
+
259
+ if(ilen<=4){
260
+ rc = fx_write_block(state, KIND_UNCOMPRESSED, ilen, ibuf);
261
+ return rc<0 ? -1 : 0;
262
+ }
263
+
264
+ rc = mem_resize(&obuf, &obuf_len, ilen);
265
+ if(rc<0) return -1;
266
+
267
+ /* 4-byte space to store the usize */
268
+ compressed_len = ilen - 4;
269
+
270
+ rc = lzfx_compress(ibuf, ilen, obuf+4, &compressed_len);
271
+ if(rc<0 && rc != LZFX_ESIZE){
272
+ fprintf(stderr, "Compression error (code %d)\n", rc);
273
+ return -1;
274
+ }
275
+
276
+ if(rc == LZFX_ESIZE || !compressed_len){
277
+
278
+ rc = fx_write_block(state, KIND_UNCOMPRESSED, ilen, ibuf);
279
+ if(rc<0) return -1;
280
+
281
+ } else {
282
+
283
+ obuf[0] = ilen >> 24;
284
+ obuf[1] = ilen >> 16;
285
+ obuf[2] = ilen >> 8;
286
+ obuf[3] = ilen;
287
+
288
+ rc = fx_write_block(state, KIND_COMPRESSED, compressed_len+4, obuf);
289
+ if(rc<0) return -1;
290
+ }
291
+
292
+ return 0;
293
+ }
294
+
295
+
296
+ /* Compress from input to output file.
297
+
298
+ 0: Success
299
+ <0: Failure (message printed)
300
+ */
301
+ int fx_create(const FX_STATE state){
302
+
303
+ unsigned long blockno = 0;
304
+ ssize_t rc = 0;
305
+ size_t count = 0;
306
+ u8* ibuf = (u8*)malloc(BLOCKSIZE);
307
+
308
+ do {
309
+ rc = 0;
310
+ count = 0;
311
+ do {
312
+ rc = read(state.ifd, ibuf, BLOCKSIZE-count);
313
+ if(rc<0){
314
+ fprintf(stderr, "Read error: %s\n", strerror(rc));
315
+ goto failed;
316
+ }
317
+ count += rc;
318
+ } while(rc > 0 && count < BLOCKSIZE);
319
+
320
+ blockno++;
321
+
322
+ if(count>0){
323
+ rc = fx_compress_block(state, ibuf, count);
324
+ if(rc<0) goto failed;
325
+ }
326
+
327
+ } while(count==BLOCKSIZE);
328
+
329
+ free(ibuf);
330
+ return 0;
331
+
332
+ failed:
333
+ fprintf(stderr, "Compression failed at byte %lu\n", blockno*BLOCKSIZE + count);
334
+ free(ibuf);
335
+ return -1;
336
+ }
337
+
338
+ /* Read an LZFX file
339
+
340
+ 0: success
341
+ <0: Failure (message printed)
342
+ */
343
+ int fx_read(const FX_STATE state){
344
+
345
+ int rc;
346
+ fx_kind_t kind = 0;
347
+ uint32_t blocksize = 0;
348
+
349
+ static u8* ibuf;
350
+ static size_t ilen;
351
+
352
+ while(1){
353
+ rc = fx_read_header(state, &kind, &blocksize);
354
+ if(rc==0) break; /* EOF */
355
+ if(rc<0) return -1;
356
+ if(blocksize==0) continue;
357
+
358
+ switch(kind){
359
+
360
+ case KIND_UNCOMPRESSED:
361
+ rc = mem_resize(&ibuf, &ilen, blocksize);
362
+ if(rc<0) return -1;
363
+
364
+ rc = fx_read_bytes(state, ibuf, blocksize);
365
+ if(rc<0) return -1;
366
+ if(rc==0){
367
+ fprintf(stderr, "EOF after block header (tried to read %d bytes)\n", blocksize);
368
+ return -1;
369
+ }
370
+
371
+ rc = fx_write_bytes(state, ibuf, blocksize);
372
+ if(rc<0) return -1;
373
+ break;
374
+
375
+ case KIND_COMPRESSED:
376
+ rc = mem_resize(&ibuf, &ilen, blocksize);
377
+ if(rc<0) return -1;
378
+
379
+ rc = fx_read_bytes(state, ibuf, blocksize);
380
+ if(rc<0) return -1;
381
+ if(rc==0){
382
+ fprintf(stderr, "EOF after block header\n");
383
+ return -1;
384
+ }
385
+ rc = fx_decompress_block(state, ibuf, blocksize);
386
+ if(rc<0) return -1;
387
+ break;
388
+
389
+ default:
390
+ rc = fx_skip_bytes(state, blocksize);
391
+ if(rc<0) return -1;
392
+ }
393
+ }
394
+ return 0;
395
+ }
396
+
397
+ int main(int argc, char* argv[]){
398
+
399
+ int rc;
400
+ int ifd, ofd;
401
+ fx_mode_t mode;
402
+ FX_STATE state;
403
+
404
+ fprintf(stderr, "LZFX compression utility 0.1\n"
405
+ "http://lzfx.googlecode.com\n"
406
+ "*********************************\n"
407
+ " THIS IS A DEVELOPMENT RELEASE\n"
408
+ " DO NOT USE ON CRITICAL DATA\n"
409
+ "*********************************\n");
410
+
411
+ if(argc!=4){
412
+ fprintf(stderr, "Syntax is lzfx <namein> <nameout> c|d\n");
413
+ return 1;
414
+ }
415
+
416
+ ifd = open(argv[1], O_RDONLY);
417
+ if(ifd<0){
418
+ fprintf(stderr, "Can't open input file\n");
419
+ return 1;
420
+ }
421
+
422
+ ofd = open(argv[2], O_CREAT | O_WRONLY | O_TRUNC, 0644);
423
+ if(ofd<0){
424
+ fprintf(stderr, "Can't open output file for write\n");
425
+ return 1;
426
+ }
427
+
428
+ if(!strcmp(argv[3], "c")){
429
+ mode = MODE_COMPRESS;
430
+ } else if(!strcmp(argv[3], "d")){
431
+ mode = MODE_DECOMPRESS;
432
+ } else {
433
+ fprintf(stderr, "Illegal mode (must be 'c' or 'd')\n");
434
+ return 1;
435
+ }
436
+
437
+ fx_init(&state, ifd, ofd, mode);
438
+
439
+ switch(mode){
440
+ case MODE_COMPRESS:
441
+ rc = fx_create(state);
442
+ break;
443
+ case MODE_DECOMPRESS:
444
+ rc = fx_read(state);
445
+ break;
446
+ }
447
+
448
+ return rc ? 1 : 0;
449
+ }
450
+
data/ext/lzfx.c ADDED
@@ -0,0 +1,366 @@
1
+ /*
2
+ * Copyright (c) 2009 Andrew Collette <andrew.collette at gmail.com>
3
+ * http://lzfx.googlecode.com
4
+ *
5
+ * Implements an LZF-compatible compressor/decompressor based on the liblzf
6
+ * codebase written by Marc Lehmann. This code is released under the BSD
7
+ * license. License and original copyright statement follow.
8
+ *
9
+ *
10
+ * Copyright (c) 2000-2008 Marc Alexander Lehmann <schmorp@schmorp.de>
11
+ *
12
+ * Redistribution and use in source and binary forms, with or without modifica-
13
+ * tion, are permitted provided that the following conditions are met:
14
+ *
15
+ * 1. Redistributions of source code must retain the above copyright notice,
16
+ * this list of conditions and the following disclaimer.
17
+ *
18
+ * 2. Redistributions in binary form must reproduce the above copyright
19
+ * notice, this list of conditions and the following disclaimer in the
20
+ * documentation and/or other materials provided with the distribution.
21
+ *
22
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
23
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER-
24
+ * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
25
+ * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE-
26
+ * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
27
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
28
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
29
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH-
30
+ * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
31
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
32
+ */
33
+
34
+ #include "lzfx.h"
35
+
36
+ #define LZFX_HSIZE (1 << (LZFX_HLOG))
37
+
38
+ /* We need this for memset */
39
+ #ifdef __cplusplus
40
+ # include <cstring>
41
+ #else
42
+ # include <string.h>
43
+ #endif
44
+
45
+ #if __GNUC__ >= 3 && !DISABLE_EXPECT
46
+ # define fx_expect_false(expr) __builtin_expect((expr) != 0, 0)
47
+ # define fx_expect_true(expr) __builtin_expect((expr) != 0, 1)
48
+ #else
49
+ # define fx_expect_false(expr) (expr)
50
+ # define fx_expect_true(expr) (expr)
51
+ #endif
52
+
53
+ typedef unsigned char u8;
54
+ typedef const u8 *LZSTATE[LZFX_HSIZE];
55
+
56
+ /* Define the hash function */
57
+ #define LZFX_FRST(p) (((p[0]) << 8) | p[1])
58
+ #define LZFX_NEXT(v,p) (((v) << 8) | p[2])
59
+ #define LZFX_IDX(h) ((( h >> (3*8 - LZFX_HLOG)) - h ) & (LZFX_HSIZE - 1))
60
+
61
+ /* These cannot be changed, as they are related to the compressed format. */
62
+ #define LZFX_MAX_LIT (1 << 5)
63
+ #define LZFX_MAX_OFF (1 << 13)
64
+ #define LZFX_MAX_REF ((1 << 8) + (1 << 3))
65
+
66
+ static
67
+ int lzfx_getsize(const void* ibuf, unsigned int ilen, unsigned int *olen);
68
+
69
+ /* Compressed format
70
+
71
+ There are two kinds of structures in LZF/LZFX: literal runs and back
72
+ references. The length of a literal run is encoded as L - 1, as it must
73
+ contain at least one byte. Literals are encoded as follows:
74
+
75
+ 000LLLLL <L+1 bytes>
76
+
77
+ Back references are encoded as follows. The smallest possible encoded
78
+ length value is 1, as otherwise the control byte would be recognized as
79
+ a literal run. Since at least three bytes must match for a back reference
80
+ to be inserted, the length is encoded as L - 2 instead of L - 1. The
81
+ offset (distance to the desired data in the output buffer) is encoded as
82
+ o - 1, as all offsets are at least 1. The binary format is:
83
+
84
+ LLLooooo oooooooo for backrefs of real length < 9 (1 <= L < 7)
85
+ 111ooooo LLLLLLLL oooooooo for backrefs of real length >= 9 (L > 7)
86
+ */
87
+ #include <stdio.h>
88
+ int lzfx_compress(const void *const ibuf, const unsigned int ilen,
89
+ void *obuf, unsigned int *const olen){
90
+
91
+ /* Hash table; an array of u8*'s which point
92
+ to various locations in the input buffer */
93
+ const u8 *htab[LZFX_HSIZE];
94
+
95
+ const u8 **hslot; /* Pointer to entry in hash table */
96
+ unsigned int hval; /* Hash value generated by macros above */
97
+ const u8 *ref; /* Pointer to candidate match location in input */
98
+
99
+ const u8 *ip = (const u8 *)ibuf;
100
+ const u8 *const in_end = ip + ilen;
101
+
102
+ u8 *op = (u8 *)obuf;
103
+ const u8 *const out_end = (olen == NULL ? NULL : op + *olen);
104
+
105
+ int lit; /* # of bytes in current literal run */
106
+
107
+ #if defined (WIN32) && defined (_M_X64)
108
+ unsigned _int64 off; /* workaround for missing POSIX compliance */
109
+ #else
110
+ unsigned long off;
111
+ #endif
112
+
113
+ if(olen == NULL) return LZFX_EARGS;
114
+ if(ibuf == NULL){
115
+ if(ilen != 0) return LZFX_EARGS;
116
+ *olen = 0;
117
+ return 0;
118
+ }
119
+ if(obuf == NULL){
120
+ if(olen != 0) return LZFX_EARGS;
121
+ return lzfx_getsize(ibuf, ilen, olen);
122
+ }
123
+
124
+ memset(htab, 0, sizeof(htab));
125
+
126
+ /* Start a literal run. Whenever we do this the output pointer is
127
+ advanced because the current byte will hold the encoded length. */
128
+ lit = 0; op++;
129
+
130
+ hval = LZFX_FRST(ip);
131
+
132
+ while(ip + 2 < in_end){ /* The NEXT macro reads 2 bytes ahead */
133
+
134
+ hval = LZFX_NEXT(hval, ip);
135
+ hslot = htab + LZFX_IDX(hval);
136
+
137
+ ref = *hslot; *hslot = ip;
138
+
139
+ if( ref < ip
140
+ && (off = ip - ref - 1) < LZFX_MAX_OFF
141
+ && ip + 4 < in_end /* Backref takes up to 3 bytes, so don't bother */
142
+ && ref > (u8 *)ibuf
143
+ && ref[0] == ip[0]
144
+ && ref[1] == ip[1]
145
+ && ref[2] == ip[2] ) {
146
+
147
+ unsigned int len = 3; /* We already know 3 bytes match */
148
+ const unsigned int maxlen = in_end - ip - 2 > LZFX_MAX_REF ?
149
+ LZFX_MAX_REF : in_end - ip - 2;
150
+
151
+ /* lit == 0: op + 3 must be < out_end (because we undo the run)
152
+ lit != 0: op + 3 + 1 must be < out_end */
153
+ if(fx_expect_false(op - !lit + 3 + 1 >= out_end))
154
+ return LZFX_ESIZE;
155
+
156
+ op [- lit - 1] = lit - 1; /* Terminate literal run */
157
+ op -= !lit; /* Undo run if length is zero */
158
+
159
+ /* Start checking at the fourth byte */
160
+ while (len < maxlen && ref[len] == ip[len])
161
+ len++;
162
+
163
+ len -= 2; /* We encode the length as #octets - 2 */
164
+
165
+ /* Format 1: [LLLooooo oooooooo] */
166
+ if (len < 7) {
167
+ *op++ = (off >> 8) + (len << 5);
168
+ *op++ = off;
169
+
170
+ /* Format 2: [111ooooo LLLLLLLL oooooooo] */
171
+ } else {
172
+ *op++ = (off >> 8) + (7 << 5);
173
+ *op++ = len - 7;
174
+ *op++ = off;
175
+ }
176
+
177
+ lit = 0; op++;
178
+
179
+ ip += len + 1; /* ip = initial ip + #octets -1 */
180
+
181
+ if (fx_expect_false (ip + 3 >= in_end)){
182
+ ip++; /* Code following expects exit at bottom of loop */
183
+ break;
184
+ }
185
+
186
+ hval = LZFX_FRST (ip);
187
+ hval = LZFX_NEXT (hval, ip);
188
+ htab[LZFX_IDX (hval)] = ip;
189
+
190
+ ip++; /* ip = initial ip + #octets */
191
+
192
+ } else {
193
+ /* Keep copying literal bytes */
194
+
195
+ if (fx_expect_false (op >= out_end)) return LZFX_ESIZE;
196
+
197
+ lit++; *op++ = *ip++;
198
+
199
+ if (fx_expect_false (lit == LZFX_MAX_LIT)) {
200
+ op [- lit - 1] = lit - 1; /* stop run */
201
+ lit = 0; op++; /* start run */
202
+ }
203
+
204
+ } /* if() found match in htab */
205
+
206
+ } /* while(ip < ilen -2) */
207
+
208
+ /* At most 3 bytes remain in input. We therefore need 4 bytes available
209
+ in the output buffer to store them (3 data + ctrl byte).*/
210
+ if (op + 3 > out_end) return LZFX_ESIZE;
211
+
212
+ while (ip < in_end) {
213
+
214
+ lit++; *op++ = *ip++;
215
+
216
+ if (fx_expect_false (lit == LZFX_MAX_LIT)){
217
+ op [- lit - 1] = lit - 1;
218
+ lit = 0; op++;
219
+ }
220
+ }
221
+
222
+ op [- lit - 1] = lit - 1;
223
+ op -= !lit;
224
+
225
+ *olen = op - (u8 *)obuf;
226
+ return 0;
227
+ }
228
+
229
+ /* Decompressor */
230
+ int lzfx_decompress(const void* ibuf, unsigned int ilen,
231
+ void* obuf, unsigned int *olen){
232
+
233
+ u8 const *ip = (const u8 *)ibuf;
234
+ u8 const *const in_end = ip + ilen;
235
+ u8 *op = (u8 *)obuf;
236
+ u8 const *const out_end = (olen == NULL ? NULL : op + *olen);
237
+
238
+ unsigned int remain_len = 0;
239
+ int rc;
240
+
241
+ if(olen == NULL) return LZFX_EARGS;
242
+ if(ibuf == NULL){
243
+ if(ilen != 0) return LZFX_EARGS;
244
+ *olen = 0;
245
+ return 0;
246
+ }
247
+ if(obuf == NULL){
248
+ if(olen != 0) return LZFX_EARGS;
249
+ return lzfx_getsize(ibuf, ilen, olen);
250
+ }
251
+
252
+ do {
253
+ unsigned int ctrl = *ip++;
254
+
255
+ /* Format 000LLLLL: a literal byte string follows, of length L+1 */
256
+ if(ctrl < (1 << 5)) {
257
+
258
+ ctrl++;
259
+
260
+ if(fx_expect_false(op + ctrl > out_end)){
261
+ --ip; /* Rewind to control byte */
262
+ goto guess;
263
+ }
264
+ if(fx_expect_false(ip + ctrl > in_end)) return LZFX_ECORRUPT;
265
+
266
+ do
267
+ *op++ = *ip++;
268
+ while(--ctrl);
269
+
270
+ /* Format #1 [LLLooooo oooooooo]: backref of length L+1+2
271
+ ^^^^^ ^^^^^^^^
272
+ A B
273
+ #2 [111ooooo LLLLLLLL oooooooo] backref of length L+7+2
274
+ ^^^^^ ^^^^^^^^
275
+ A B
276
+ In both cases the location of the backref is computed from the
277
+ remaining part of the data as follows:
278
+
279
+ location = op - A*256 - B - 1
280
+ */
281
+ } else {
282
+
283
+ unsigned int len = (ctrl >> 5);
284
+ u8 *ref = op - ((ctrl & 0x1f) << 8) -1;
285
+
286
+ if(len==7) len += *ip++; /* i.e. format #2 */
287
+
288
+ len += 2; /* len is now #octets */
289
+
290
+ if(fx_expect_false(op + len > out_end)){
291
+ ip -= (len >= 9) ? 2 : 1; /* Rewind to control byte */
292
+ goto guess;
293
+ }
294
+ if(fx_expect_false(ip >= in_end)) return LZFX_ECORRUPT;
295
+
296
+ ref -= *ip++;
297
+
298
+ if(fx_expect_false(ref < (u8*)obuf)) return LZFX_ECORRUPT;
299
+
300
+ do
301
+ *op++ = *ref++;
302
+ while (--len);
303
+ }
304
+
305
+ } while (ip < in_end);
306
+
307
+ *olen = op - (u8 *)obuf;
308
+
309
+ return 0;
310
+
311
+ guess:
312
+ rc = lzfx_getsize(ip, ilen - (ip-(u8*)ibuf), &remain_len);
313
+ if(rc>=0) *olen = remain_len + (op - (u8*)obuf);
314
+ return rc;
315
+ }
316
+
317
+ /* Guess len. No parameters may be NULL; this is not checked. */
318
+ static
319
+ int lzfx_getsize(const void* ibuf, unsigned int ilen, unsigned int *olen){
320
+
321
+ u8 const *ip = (const u8 *)ibuf;
322
+ u8 const *const in_end = ip + ilen;
323
+ int tot_len = 0;
324
+
325
+ while (ip < in_end) {
326
+
327
+ unsigned int ctrl = *ip++;
328
+
329
+ if(ctrl < (1 << 5)) {
330
+
331
+ ctrl++;
332
+
333
+ if(ip + ctrl > in_end)
334
+ return LZFX_ECORRUPT;
335
+
336
+ tot_len += ctrl;
337
+ ip += ctrl;
338
+
339
+ } else {
340
+
341
+ unsigned int len = (ctrl >> 5);
342
+
343
+ if(len==7){ /* i.e. format #2 */
344
+ len += *ip++;
345
+ }
346
+
347
+ len += 2; /* len is now #octets */
348
+
349
+ if(ip >= in_end) return LZFX_ECORRUPT;
350
+
351
+ ip++; /* skip the ref byte */
352
+
353
+ tot_len += len;
354
+
355
+ }
356
+
357
+ }
358
+
359
+ *olen = tot_len;
360
+
361
+ return 0;
362
+ }
363
+
364
+
365
+
366
+
data/ext/lzfx.h ADDED
@@ -0,0 +1,98 @@
1
+ /*
2
+ * Copyright (c) 2009 Andrew Collette <andrew.collette at gmail.com>
3
+ * http://lzfx.googlecode.com
4
+ *
5
+ * Implements an LZF-compatible compressor/decompressor based on the liblzf
6
+ * codebase written by Marc Lehmann. This code is released under the BSD
7
+ * license. License and original copyright statement follow.
8
+ *
9
+ *
10
+ * Copyright (c) 2000-2008 Marc Alexander Lehmann <schmorp@schmorp.de>
11
+ *
12
+ * Redistribution and use in source and binary forms, with or without modifica-
13
+ * tion, are permitted provided that the following conditions are met:
14
+ *
15
+ * 1. Redistributions of source code must retain the above copyright notice,
16
+ * this list of conditions and the following disclaimer.
17
+ *
18
+ * 2. Redistributions in binary form must reproduce the above copyright
19
+ * notice, this list of conditions and the following disclaimer in the
20
+ * documentation and/or other materials provided with the distribution.
21
+ *
22
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
23
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER-
24
+ * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
25
+ * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE-
26
+ * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
27
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
28
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
29
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH-
30
+ * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
31
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
32
+ */
33
+
34
+ #ifndef LZFX_H
35
+ #define LZFX_H
36
+
37
+ #ifdef __cplusplus
38
+ extern "C" {
39
+ #endif
40
+
41
+ /* Documented behavior, including function signatures and error codes,
42
+ is guaranteed to remain unchanged for releases with the same major
43
+ version number. Releases of the same major version are also able
44
+ to read each other's output, although the output itself is not
45
+ guaranteed to be byte-for-byte identical.
46
+ */
47
+ #define LZFX_VERSION_MAJOR 0
48
+ #define LZFX_VERSION_MINOR 1
49
+ #define LZFX_VERSION_STRING "0.1"
50
+
51
+ /* Hashtable size (2**LZFX_HLOG entries) */
52
+ #ifndef LZFX_HLOG
53
+ # define LZFX_HLOG 16
54
+ #endif
55
+
56
+ /* Predefined errors. */
57
+ #define LZFX_ESIZE -1 /* Output buffer too small */
58
+ #define LZFX_ECORRUPT -2 /* Invalid data for decompression */
59
+ #define LZFX_EARGS -3 /* Arguments invalid (NULL) */
60
+
61
+ /* Buffer-to buffer compression.
62
+
63
+ Supply pre-allocated input and output buffers via ibuf and obuf, and
64
+ their size in bytes via ilen and olen. Buffers may not overlap.
65
+
66
+ On success, the function returns a non-negative value and the argument
67
+ olen contains the compressed size in bytes. On failure, a negative
68
+ value is returned and olen is not modified.
69
+ */
70
+ int lzfx_compress(const void* ibuf, unsigned int ilen,
71
+ void* obuf, unsigned int *olen);
72
+
73
+ /* Buffer-to-buffer decompression.
74
+
75
+ Supply pre-allocated input and output buffers via ibuf and obuf, and
76
+ their size in bytes via ilen and olen. Buffers may not overlap.
77
+
78
+ On success, the function returns a non-negative value and the argument
79
+ olen contains the uncompressed size in bytes. On failure, a negative
80
+ value is returned.
81
+
82
+ If the failure code is LZFX_ESIZE, olen contains the minimum buffer size
83
+ required to hold the decompressed data. Otherwise, olen is not modified.
84
+
85
+ Supplying a zero *olen is a valid and supported strategy to determine the
86
+ required buffer size. This does not require decompression of the entire
87
+ stream and is consequently very fast. Argument obuf may be NULL in
88
+ this case only.
89
+ */
90
+ int lzfx_decompress(const void* ibuf, unsigned int ilen,
91
+ void* obuf, unsigned int *olen);
92
+
93
+
94
+ #ifdef __cplusplus
95
+ } /* extern "C" */
96
+ #endif
97
+
98
+ #endif
data/ext/lzfxruby.c ADDED
@@ -0,0 +1,73 @@
1
+ #include <string.h>
2
+ #include "ruby.h"
3
+ #define COFFICIENT_OF_BUFFER 3
4
+
5
+ static VALUE rb_lzfx_compress(VALUE self, VALUE input) {
6
+ unsigned char *compressed, *data;
7
+ int out_length, data_length;
8
+ int retcode;
9
+ VALUE retval;
10
+
11
+ Check_Type(input, T_STRING);
12
+ data = StringValuePtr(input);
13
+ data_length = RSTRING_LEN(input);
14
+
15
+ if (data_length < 1) {
16
+ return Qnil;
17
+ }
18
+
19
+ out_length = data_length * COFFICIENT_OF_BUFFER;
20
+ compressed = malloc(out_length);
21
+
22
+ retcode = lzfx_compress(data, data_length, compressed, &out_length);
23
+
24
+ if(retcode < 0) {
25
+ ruby_xfree(compressed);
26
+ return Qnil;
27
+ }
28
+
29
+ retval = rb_str_new(compressed, out_length);
30
+
31
+ ruby_xfree(compressed);
32
+
33
+ return retval;
34
+ }
35
+
36
+ static VALUE rb_lzfx_decompress(VALUE self, VALUE input) {
37
+ unsigned char *decompressed, *data;
38
+ int out_length, data_length;
39
+ int retcode;
40
+ VALUE retval;
41
+
42
+ Check_Type(input, T_STRING);
43
+
44
+ data = RSTRING_PTR(input);
45
+ data_length = RSTRING_LEN(input);
46
+
47
+ if (data_length < 1) {
48
+ return Qnil;
49
+ }
50
+
51
+ out_length = data_length * COFFICIENT_OF_BUFFER;
52
+ decompressed = malloc(out_length);
53
+
54
+ retcode = lzfx_decompress(data, data_length, decompressed, &out_length);
55
+
56
+ if(retcode < 0) {
57
+ ruby_xfree(decompressed);
58
+ return Qnil;
59
+ }
60
+
61
+ retval = rb_str_new(decompressed, out_length);
62
+
63
+ ruby_xfree(decompressed);
64
+
65
+ return retval;
66
+ }
67
+
68
+ void Init_lzfx() {
69
+ VALUE rb_mLzfx;
70
+ rb_mLzfx = rb_define_module("Lzfx");
71
+ rb_define_module_function(rb_mLzfx, "compress", rb_lzfx_compress, 1);
72
+ rb_define_module_function(rb_mLzfx, "decompress", rb_lzfx_decompress, 1);
73
+ }
metadata ADDED
@@ -0,0 +1,75 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: lzfx
3
+ version: !ruby/object:Gem::Version
4
+ hash: 27
5
+ prerelease:
6
+ segments:
7
+ - 0
8
+ - 1
9
+ - 0
10
+ version: 0.1.0
11
+ platform: ruby
12
+ authors:
13
+ - hmsk
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2012-04-06 00:00:00 +09:00
19
+ default_executable:
20
+ dependencies: []
21
+
22
+ description: Ruby bindings for lzfx. lzfx is a tiny, extremely fast compression library.
23
+ email: k.hamasaki@gmail.com
24
+ executables: []
25
+
26
+ extensions:
27
+ - ext/extconf.rb
28
+ extra_rdoc_files:
29
+ - README.md
30
+ files:
31
+ - ext/lzfx-util.c
32
+ - ext/lzfx.c
33
+ - ext/lzfxruby.c
34
+ - ext/lzfx.h
35
+ - ext/extconf.rb
36
+ - README.md
37
+ has_rdoc: true
38
+ homepage: https://github.com/winebarrel/infra-study/tree/master/4th/gems/lzfx
39
+ licenses: []
40
+
41
+ post_install_message:
42
+ rdoc_options:
43
+ - --title
44
+ - lzfx - Ruby bindings for lzfx.
45
+ - --main
46
+ - README.md
47
+ require_paths:
48
+ - lib
49
+ required_ruby_version: !ruby/object:Gem::Requirement
50
+ none: false
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ hash: 3
55
+ segments:
56
+ - 0
57
+ version: "0"
58
+ required_rubygems_version: !ruby/object:Gem::Requirement
59
+ none: false
60
+ requirements:
61
+ - - ">="
62
+ - !ruby/object:Gem::Version
63
+ hash: 3
64
+ segments:
65
+ - 0
66
+ version: "0"
67
+ requirements: []
68
+
69
+ rubyforge_project:
70
+ rubygems_version: 1.5.0
71
+ signing_key:
72
+ specification_version: 3
73
+ summary: Ruby bindings for lzfx.
74
+ test_files: []
75
+