lzfx 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.md ADDED
@@ -0,0 +1,33 @@
1
+ # Ruby bindings for lzfx
2
+
3
+ lzfx is a tiny, extremely fast compression library.
4
+
5
+ ## lzfx
6
+ this gem contains lzfx.
7
+
8
+ * http://code.google.com/p/lzfx/
9
+
10
+ > LZFX is a small (one C file, 200 non-comment lines) BSD-licensed library designed for very-high-speed compression of redundant data. It is descended from liblzf and is 100% compatible with existing LZF-compressed data.
11
+
12
+ > LZFX was originally developed as a component of the h5py project, which uses an LZF-based compressor to process scientific data.
13
+
14
+ ## Example
15
+
16
+ require 'rubygems'
17
+ require 'lzfxruby'
18
+
19
+ a = "aaaaaaaaabbbbbbbbbbbbabababaccccccccccjshabbbbbbaaa"
20
+ puts "source: #{a}"
21
+ puts "source size: #{a.size}"
22
+
23
+ b = Lzfx.compress a
24
+ puts "compressed: #{b}"
25
+ puts "compressed size: #{b.size}"
26
+
27
+ c = Lzfx.decompress b
28
+ puts "decompressed: #{c}"
29
+ puts "decompressed size: #{c.size}"
30
+ puts "source == decompressed: #{a == c}"
31
+
32
+ ## Copyright
33
+ Copyright: (C) 2012 Kengo HAMASAKI <k.hamasaki@gmail.com>
data/ext/extconf.rb ADDED
@@ -0,0 +1,2 @@
1
+ require 'mkmf'
2
+ create_makefile('lzfx')
data/ext/lzfx-util.c ADDED
@@ -0,0 +1,450 @@
1
+ #include <stdlib.h>
2
+ #include <stdio.h>
3
+ #include <unistd.h>
4
+ #include <fcntl.h>
5
+ #include <string.h>
6
+ #include "lzfx.h"
7
+ #include <errno.h>
8
+ #include <stdint.h>
9
+
10
+ #define BLOCKSIZE (1024*1024)
11
+
12
+ typedef unsigned char u8;
13
+
14
+ typedef enum {
15
+ MODE_COMPRESS,
16
+ MODE_DECOMPRESS
17
+ } fx_mode_t;
18
+
19
+ typedef enum {
20
+ KIND_FILEHEADER = 0,
21
+ KIND_COMPRESSED = 1,
22
+ KIND_UNCOMPRESSED = 2
23
+ } fx_kind_t;
24
+
25
+ typedef struct {
26
+ int ifd, ofd;
27
+ fx_mode_t mode;
28
+ } FX_STATE;
29
+
30
+ static
31
+ void fx_init(FX_STATE *state, int ifd, int ofd, fx_mode_t mode){
32
+ state->ifd = ifd;
33
+ state->ofd = ofd;
34
+ state->mode = mode;
35
+ }
36
+
37
+ /* Read len bytes from the input file.
38
+
39
+ buf: Output buffer
40
+ len: # bytes to read
41
+
42
+ >=0: bytes read, either 0 (EOF) or len
43
+ <0: Read error
44
+ */
45
+ static inline
46
+ int fx_read_bytes(const FX_STATE state, void* buf, const size_t len){
47
+
48
+ ssize_t rc = 0;
49
+ size_t count = 0;
50
+
51
+ do {
52
+ rc = read(state.ifd, ((u8*)buf)+count, len-count);
53
+ count += rc;
54
+ } while(rc>0 && count<len);
55
+
56
+ if(rc<0){
57
+ fprintf(stderr, "Read failed: %s\n", strerror(errno));
58
+ return -1;
59
+ }
60
+
61
+ if(count>0 && count!=len){
62
+ fprintf(stderr, "Read truncated (%u bytes short)\n", (unsigned int)(len-count));
63
+ return -1;
64
+ }
65
+ return count;
66
+ }
67
+
68
+ /* Write len bytes from the buffer to the output file.
69
+
70
+ buf: Input buffer
71
+ len: # of bytes in buf
72
+
73
+ >=0: Bytes written
74
+ <0: Write error
75
+ */
76
+ static inline
77
+ int fx_write_bytes(const FX_STATE state, const void* buf, const size_t len){
78
+
79
+ ssize_t rc = 0;
80
+ size_t count = 0;
81
+
82
+ do {
83
+ rc = write(state.ofd, ((u8*)buf)+count, len-count);
84
+ count += rc;
85
+ } while(rc>0 && count<len);
86
+
87
+ if(rc<0){
88
+ fprintf(stderr, "Write failed: %s\n", strerror(errno));
89
+ return -1;
90
+ }
91
+
92
+ return count;
93
+ }
94
+
95
+ /* Skip len bytes in the input file.
96
+
97
+ len: # of bytes to skip
98
+
99
+ 0: Success
100
+ <0: Read error
101
+ */
102
+ static
103
+ int fx_skip_bytes(const FX_STATE state, const size_t len){
104
+
105
+ off_t rc;
106
+
107
+ rc = lseek(state.ifd, len, SEEK_CUR);
108
+ if(rc==((off_t)-1)){
109
+ fprintf(stderr, "Read error: %s\n", strerror(errno));
110
+ return -1;
111
+ }
112
+
113
+ return 0;
114
+ }
115
+
116
+ /* Read a header from the input stream.
117
+
118
+ kind_in: Will contain the header kind
119
+ len_in: Will contain the block length
120
+
121
+ >0: Read success (10)
122
+ 0: EOF
123
+ <0: Read error (message printed)
124
+ */
125
+ static inline
126
+ int fx_read_header(const FX_STATE state, fx_kind_t *kind_in, uint32_t *len_in){
127
+
128
+ int rc;
129
+ uint16_t kind;
130
+ uint32_t len;
131
+ u8 head[10];
132
+
133
+ rc = fx_read_bytes(state, head, 10);
134
+ if(rc<=0) return rc; /* 0 = EOF; <0 = already printed error */
135
+
136
+ if(head[0]!='L' || head[1]!='Z' || head[2]!='F' || head[3]!='X'){
137
+ fprintf(stderr, "Illegal header %X %X %X %X\n",
138
+ (int)head[0], (int)head[1], (int)head[2], (int)head[3]);
139
+ return -1;
140
+ }
141
+
142
+ kind = (head[4]<<8) | head[5];
143
+ len = (head[6]<<24) | (head[7]<<16) | (head[8]<<8) | head[9];
144
+
145
+ *kind_in = kind;
146
+ *len_in = len;
147
+
148
+ return rc;
149
+ }
150
+
151
+ /* Write a block to output file, adding the header.
152
+
153
+ kind_in: Header kind
154
+ len: Block length
155
+ data: Block data
156
+
157
+ 0: Write success
158
+ <0: Write error (message printed)
159
+ */
160
+ static inline
161
+ int fx_write_block(const FX_STATE state, const fx_kind_t kind_in,
162
+ const uint32_t len, const void* data){
163
+
164
+ const uint16_t kind = kind_in;
165
+ u8 head[] = {'L','Z','F','X', 0, 0, 0, 0, 0, 0};
166
+
167
+ head[4] = kind >> 8; head[5] = kind;
168
+ head[6] = len >> 24; head[7] = len >> 16;
169
+ head[8] = len >> 8; head[9] = len;
170
+
171
+ if(fx_write_bytes(state, head, 10) < 0) return -1;
172
+ if(fx_write_bytes(state, data, len) < 0) return -1;
173
+
174
+ return 0;
175
+ }
176
+
177
+ /* Decompress a block (KIND_COMPRESSED) to the output file.
178
+
179
+ ibuf: The block (including 4-byte leader)
180
+ len: Total block length
181
+
182
+ 0: Success
183
+ <0: Error (message printed)
184
+ */
185
+ static
186
+ int fx_decompress_block(const FX_STATE state, const u8 *ibuf, const size_t len){
187
+
188
+ static u8* obuf;
189
+ static size_t obuf_len;
190
+
191
+ uint32_t usize;
192
+ unsigned int usize_real;
193
+ int rc;
194
+
195
+ if(len<4){
196
+ fprintf(stderr, "Compressed size truncated\n");
197
+ return -2;
198
+ }
199
+
200
+ usize = (ibuf[0]<<24) | (ibuf[1]<<16) | (ibuf[2]<<8) | ibuf[3];
201
+
202
+ if(usize>obuf_len){
203
+ obuf = (u8*)realloc(obuf, usize);
204
+ if(obuf==NULL) return -1; /* This leaks but we quit right away */
205
+ obuf_len = usize;
206
+ }
207
+
208
+ usize_real = usize;
209
+
210
+ rc = lzfx_decompress(ibuf+4, len-4, obuf, &usize_real);
211
+ if(rc<0){
212
+ fprintf(stderr, "Decompression failed: code %d\n", rc);
213
+ return -2;
214
+ }
215
+ if(usize_real != usize){
216
+ fprintf(stderr, "Decompressed data has wrong length (%d vs expected %d)\n", (int)usize_real, (int)usize);
217
+ return -2;
218
+ }
219
+
220
+ rc = fx_write_bytes(state, obuf, usize);
221
+ if(rc<0) return -1;
222
+
223
+ return 0;
224
+ }
225
+
226
+ static inline
227
+ int mem_resize(u8** buf, size_t *ilen, const size_t olen){
228
+ void* tbuf;
229
+ if(olen>*ilen){
230
+ tbuf = realloc(*buf, olen);
231
+ if(tbuf==NULL){
232
+ fprintf(stderr, "Can't allocate memory (%lu bytes)\n", (unsigned long)olen);
233
+ return -1;
234
+ }
235
+ *buf = tbuf;
236
+ *ilen = olen;
237
+ }
238
+ return 0;
239
+ }
240
+
241
+ /* Compress a block of raw data and store it in the output file.
242
+
243
+ ibuf: Raw data block to compress
244
+ ilen: Length of data block
245
+
246
+ 0: Success
247
+ <0: Error (message printed)
248
+ */
249
+ static
250
+ int fx_compress_block(const FX_STATE state, const u8* ibuf,
251
+ const uint32_t ilen){
252
+
253
+ static u8* obuf;
254
+ static size_t obuf_len;
255
+
256
+ unsigned int compressed_len;
257
+ int rc;
258
+
259
+ if(ilen<=4){
260
+ rc = fx_write_block(state, KIND_UNCOMPRESSED, ilen, ibuf);
261
+ return rc<0 ? -1 : 0;
262
+ }
263
+
264
+ rc = mem_resize(&obuf, &obuf_len, ilen);
265
+ if(rc<0) return -1;
266
+
267
+ /* 4-byte space to store the usize */
268
+ compressed_len = ilen - 4;
269
+
270
+ rc = lzfx_compress(ibuf, ilen, obuf+4, &compressed_len);
271
+ if(rc<0 && rc != LZFX_ESIZE){
272
+ fprintf(stderr, "Compression error (code %d)\n", rc);
273
+ return -1;
274
+ }
275
+
276
+ if(rc == LZFX_ESIZE || !compressed_len){
277
+
278
+ rc = fx_write_block(state, KIND_UNCOMPRESSED, ilen, ibuf);
279
+ if(rc<0) return -1;
280
+
281
+ } else {
282
+
283
+ obuf[0] = ilen >> 24;
284
+ obuf[1] = ilen >> 16;
285
+ obuf[2] = ilen >> 8;
286
+ obuf[3] = ilen;
287
+
288
+ rc = fx_write_block(state, KIND_COMPRESSED, compressed_len+4, obuf);
289
+ if(rc<0) return -1;
290
+ }
291
+
292
+ return 0;
293
+ }
294
+
295
+
296
+ /* Compress from input to output file.
297
+
298
+ 0: Success
299
+ <0: Failure (message printed)
300
+ */
301
+ int fx_create(const FX_STATE state){
302
+
303
+ unsigned long blockno = 0;
304
+ ssize_t rc = 0;
305
+ size_t count = 0;
306
+ u8* ibuf = (u8*)malloc(BLOCKSIZE);
307
+
308
+ do {
309
+ rc = 0;
310
+ count = 0;
311
+ do {
312
+ rc = read(state.ifd, ibuf, BLOCKSIZE-count);
313
+ if(rc<0){
314
+ fprintf(stderr, "Read error: %s\n", strerror(rc));
315
+ goto failed;
316
+ }
317
+ count += rc;
318
+ } while(rc > 0 && count < BLOCKSIZE);
319
+
320
+ blockno++;
321
+
322
+ if(count>0){
323
+ rc = fx_compress_block(state, ibuf, count);
324
+ if(rc<0) goto failed;
325
+ }
326
+
327
+ } while(count==BLOCKSIZE);
328
+
329
+ free(ibuf);
330
+ return 0;
331
+
332
+ failed:
333
+ fprintf(stderr, "Compression failed at byte %lu\n", blockno*BLOCKSIZE + count);
334
+ free(ibuf);
335
+ return -1;
336
+ }
337
+
338
+ /* Read an LZFX file
339
+
340
+ 0: success
341
+ <0: Failure (message printed)
342
+ */
343
+ int fx_read(const FX_STATE state){
344
+
345
+ int rc;
346
+ fx_kind_t kind = 0;
347
+ uint32_t blocksize = 0;
348
+
349
+ static u8* ibuf;
350
+ static size_t ilen;
351
+
352
+ while(1){
353
+ rc = fx_read_header(state, &kind, &blocksize);
354
+ if(rc==0) break; /* EOF */
355
+ if(rc<0) return -1;
356
+ if(blocksize==0) continue;
357
+
358
+ switch(kind){
359
+
360
+ case KIND_UNCOMPRESSED:
361
+ rc = mem_resize(&ibuf, &ilen, blocksize);
362
+ if(rc<0) return -1;
363
+
364
+ rc = fx_read_bytes(state, ibuf, blocksize);
365
+ if(rc<0) return -1;
366
+ if(rc==0){
367
+ fprintf(stderr, "EOF after block header (tried to read %d bytes)\n", blocksize);
368
+ return -1;
369
+ }
370
+
371
+ rc = fx_write_bytes(state, ibuf, blocksize);
372
+ if(rc<0) return -1;
373
+ break;
374
+
375
+ case KIND_COMPRESSED:
376
+ rc = mem_resize(&ibuf, &ilen, blocksize);
377
+ if(rc<0) return -1;
378
+
379
+ rc = fx_read_bytes(state, ibuf, blocksize);
380
+ if(rc<0) return -1;
381
+ if(rc==0){
382
+ fprintf(stderr, "EOF after block header\n");
383
+ return -1;
384
+ }
385
+ rc = fx_decompress_block(state, ibuf, blocksize);
386
+ if(rc<0) return -1;
387
+ break;
388
+
389
+ default:
390
+ rc = fx_skip_bytes(state, blocksize);
391
+ if(rc<0) return -1;
392
+ }
393
+ }
394
+ return 0;
395
+ }
396
+
397
+ int main(int argc, char* argv[]){
398
+
399
+ int rc;
400
+ int ifd, ofd;
401
+ fx_mode_t mode;
402
+ FX_STATE state;
403
+
404
+ fprintf(stderr, "LZFX compression utility 0.1\n"
405
+ "http://lzfx.googlecode.com\n"
406
+ "*********************************\n"
407
+ " THIS IS A DEVELOPMENT RELEASE\n"
408
+ " DO NOT USE ON CRITICAL DATA\n"
409
+ "*********************************\n");
410
+
411
+ if(argc!=4){
412
+ fprintf(stderr, "Syntax is lzfx <namein> <nameout> c|d\n");
413
+ return 1;
414
+ }
415
+
416
+ ifd = open(argv[1], O_RDONLY);
417
+ if(ifd<0){
418
+ fprintf(stderr, "Can't open input file\n");
419
+ return 1;
420
+ }
421
+
422
+ ofd = open(argv[2], O_CREAT | O_WRONLY | O_TRUNC, 0644);
423
+ if(ofd<0){
424
+ fprintf(stderr, "Can't open output file for write\n");
425
+ return 1;
426
+ }
427
+
428
+ if(!strcmp(argv[3], "c")){
429
+ mode = MODE_COMPRESS;
430
+ } else if(!strcmp(argv[3], "d")){
431
+ mode = MODE_DECOMPRESS;
432
+ } else {
433
+ fprintf(stderr, "Illegal mode (must be 'c' or 'd')\n");
434
+ return 1;
435
+ }
436
+
437
+ fx_init(&state, ifd, ofd, mode);
438
+
439
+ switch(mode){
440
+ case MODE_COMPRESS:
441
+ rc = fx_create(state);
442
+ break;
443
+ case MODE_DECOMPRESS:
444
+ rc = fx_read(state);
445
+ break;
446
+ }
447
+
448
+ return rc ? 1 : 0;
449
+ }
450
+
data/ext/lzfx.c ADDED
@@ -0,0 +1,366 @@
1
+ /*
2
+ * Copyright (c) 2009 Andrew Collette <andrew.collette at gmail.com>
3
+ * http://lzfx.googlecode.com
4
+ *
5
+ * Implements an LZF-compatible compressor/decompressor based on the liblzf
6
+ * codebase written by Marc Lehmann. This code is released under the BSD
7
+ * license. License and original copyright statement follow.
8
+ *
9
+ *
10
+ * Copyright (c) 2000-2008 Marc Alexander Lehmann <schmorp@schmorp.de>
11
+ *
12
+ * Redistribution and use in source and binary forms, with or without modifica-
13
+ * tion, are permitted provided that the following conditions are met:
14
+ *
15
+ * 1. Redistributions of source code must retain the above copyright notice,
16
+ * this list of conditions and the following disclaimer.
17
+ *
18
+ * 2. Redistributions in binary form must reproduce the above copyright
19
+ * notice, this list of conditions and the following disclaimer in the
20
+ * documentation and/or other materials provided with the distribution.
21
+ *
22
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
23
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER-
24
+ * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
25
+ * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE-
26
+ * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
27
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
28
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
29
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH-
30
+ * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
31
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
32
+ */
33
+
34
+ #include "lzfx.h"
35
+
36
+ #define LZFX_HSIZE (1 << (LZFX_HLOG))
37
+
38
+ /* We need this for memset */
39
+ #ifdef __cplusplus
40
+ # include <cstring>
41
+ #else
42
+ # include <string.h>
43
+ #endif
44
+
45
+ #if __GNUC__ >= 3 && !DISABLE_EXPECT
46
+ # define fx_expect_false(expr) __builtin_expect((expr) != 0, 0)
47
+ # define fx_expect_true(expr) __builtin_expect((expr) != 0, 1)
48
+ #else
49
+ # define fx_expect_false(expr) (expr)
50
+ # define fx_expect_true(expr) (expr)
51
+ #endif
52
+
53
+ typedef unsigned char u8;
54
+ typedef const u8 *LZSTATE[LZFX_HSIZE];
55
+
56
+ /* Define the hash function */
57
+ #define LZFX_FRST(p) (((p[0]) << 8) | p[1])
58
+ #define LZFX_NEXT(v,p) (((v) << 8) | p[2])
59
+ #define LZFX_IDX(h) ((( h >> (3*8 - LZFX_HLOG)) - h ) & (LZFX_HSIZE - 1))
60
+
61
+ /* These cannot be changed, as they are related to the compressed format. */
62
+ #define LZFX_MAX_LIT (1 << 5)
63
+ #define LZFX_MAX_OFF (1 << 13)
64
+ #define LZFX_MAX_REF ((1 << 8) + (1 << 3))
65
+
66
+ static
67
+ int lzfx_getsize(const void* ibuf, unsigned int ilen, unsigned int *olen);
68
+
69
+ /* Compressed format
70
+
71
+ There are two kinds of structures in LZF/LZFX: literal runs and back
72
+ references. The length of a literal run is encoded as L - 1, as it must
73
+ contain at least one byte. Literals are encoded as follows:
74
+
75
+ 000LLLLL <L+1 bytes>
76
+
77
+ Back references are encoded as follows. The smallest possible encoded
78
+ length value is 1, as otherwise the control byte would be recognized as
79
+ a literal run. Since at least three bytes must match for a back reference
80
+ to be inserted, the length is encoded as L - 2 instead of L - 1. The
81
+ offset (distance to the desired data in the output buffer) is encoded as
82
+ o - 1, as all offsets are at least 1. The binary format is:
83
+
84
+ LLLooooo oooooooo for backrefs of real length < 9 (1 <= L < 7)
85
+ 111ooooo LLLLLLLL oooooooo for backrefs of real length >= 9 (L > 7)
86
+ */
87
+ #include <stdio.h>
88
+ int lzfx_compress(const void *const ibuf, const unsigned int ilen,
89
+ void *obuf, unsigned int *const olen){
90
+
91
+ /* Hash table; an array of u8*'s which point
92
+ to various locations in the input buffer */
93
+ const u8 *htab[LZFX_HSIZE];
94
+
95
+ const u8 **hslot; /* Pointer to entry in hash table */
96
+ unsigned int hval; /* Hash value generated by macros above */
97
+ const u8 *ref; /* Pointer to candidate match location in input */
98
+
99
+ const u8 *ip = (const u8 *)ibuf;
100
+ const u8 *const in_end = ip + ilen;
101
+
102
+ u8 *op = (u8 *)obuf;
103
+ const u8 *const out_end = (olen == NULL ? NULL : op + *olen);
104
+
105
+ int lit; /* # of bytes in current literal run */
106
+
107
+ #if defined (WIN32) && defined (_M_X64)
108
+ unsigned _int64 off; /* workaround for missing POSIX compliance */
109
+ #else
110
+ unsigned long off;
111
+ #endif
112
+
113
+ if(olen == NULL) return LZFX_EARGS;
114
+ if(ibuf == NULL){
115
+ if(ilen != 0) return LZFX_EARGS;
116
+ *olen = 0;
117
+ return 0;
118
+ }
119
+ if(obuf == NULL){
120
+ if(olen != 0) return LZFX_EARGS;
121
+ return lzfx_getsize(ibuf, ilen, olen);
122
+ }
123
+
124
+ memset(htab, 0, sizeof(htab));
125
+
126
+ /* Start a literal run. Whenever we do this the output pointer is
127
+ advanced because the current byte will hold the encoded length. */
128
+ lit = 0; op++;
129
+
130
+ hval = LZFX_FRST(ip);
131
+
132
+ while(ip + 2 < in_end){ /* The NEXT macro reads 2 bytes ahead */
133
+
134
+ hval = LZFX_NEXT(hval, ip);
135
+ hslot = htab + LZFX_IDX(hval);
136
+
137
+ ref = *hslot; *hslot = ip;
138
+
139
+ if( ref < ip
140
+ && (off = ip - ref - 1) < LZFX_MAX_OFF
141
+ && ip + 4 < in_end /* Backref takes up to 3 bytes, so don't bother */
142
+ && ref > (u8 *)ibuf
143
+ && ref[0] == ip[0]
144
+ && ref[1] == ip[1]
145
+ && ref[2] == ip[2] ) {
146
+
147
+ unsigned int len = 3; /* We already know 3 bytes match */
148
+ const unsigned int maxlen = in_end - ip - 2 > LZFX_MAX_REF ?
149
+ LZFX_MAX_REF : in_end - ip - 2;
150
+
151
+ /* lit == 0: op + 3 must be < out_end (because we undo the run)
152
+ lit != 0: op + 3 + 1 must be < out_end */
153
+ if(fx_expect_false(op - !lit + 3 + 1 >= out_end))
154
+ return LZFX_ESIZE;
155
+
156
+ op [- lit - 1] = lit - 1; /* Terminate literal run */
157
+ op -= !lit; /* Undo run if length is zero */
158
+
159
+ /* Start checking at the fourth byte */
160
+ while (len < maxlen && ref[len] == ip[len])
161
+ len++;
162
+
163
+ len -= 2; /* We encode the length as #octets - 2 */
164
+
165
+ /* Format 1: [LLLooooo oooooooo] */
166
+ if (len < 7) {
167
+ *op++ = (off >> 8) + (len << 5);
168
+ *op++ = off;
169
+
170
+ /* Format 2: [111ooooo LLLLLLLL oooooooo] */
171
+ } else {
172
+ *op++ = (off >> 8) + (7 << 5);
173
+ *op++ = len - 7;
174
+ *op++ = off;
175
+ }
176
+
177
+ lit = 0; op++;
178
+
179
+ ip += len + 1; /* ip = initial ip + #octets -1 */
180
+
181
+ if (fx_expect_false (ip + 3 >= in_end)){
182
+ ip++; /* Code following expects exit at bottom of loop */
183
+ break;
184
+ }
185
+
186
+ hval = LZFX_FRST (ip);
187
+ hval = LZFX_NEXT (hval, ip);
188
+ htab[LZFX_IDX (hval)] = ip;
189
+
190
+ ip++; /* ip = initial ip + #octets */
191
+
192
+ } else {
193
+ /* Keep copying literal bytes */
194
+
195
+ if (fx_expect_false (op >= out_end)) return LZFX_ESIZE;
196
+
197
+ lit++; *op++ = *ip++;
198
+
199
+ if (fx_expect_false (lit == LZFX_MAX_LIT)) {
200
+ op [- lit - 1] = lit - 1; /* stop run */
201
+ lit = 0; op++; /* start run */
202
+ }
203
+
204
+ } /* if() found match in htab */
205
+
206
+ } /* while(ip < ilen -2) */
207
+
208
+ /* At most 3 bytes remain in input. We therefore need 4 bytes available
209
+ in the output buffer to store them (3 data + ctrl byte).*/
210
+ if (op + 3 > out_end) return LZFX_ESIZE;
211
+
212
+ while (ip < in_end) {
213
+
214
+ lit++; *op++ = *ip++;
215
+
216
+ if (fx_expect_false (lit == LZFX_MAX_LIT)){
217
+ op [- lit - 1] = lit - 1;
218
+ lit = 0; op++;
219
+ }
220
+ }
221
+
222
+ op [- lit - 1] = lit - 1;
223
+ op -= !lit;
224
+
225
+ *olen = op - (u8 *)obuf;
226
+ return 0;
227
+ }
228
+
229
+ /* Decompressor */
230
+ int lzfx_decompress(const void* ibuf, unsigned int ilen,
231
+ void* obuf, unsigned int *olen){
232
+
233
+ u8 const *ip = (const u8 *)ibuf;
234
+ u8 const *const in_end = ip + ilen;
235
+ u8 *op = (u8 *)obuf;
236
+ u8 const *const out_end = (olen == NULL ? NULL : op + *olen);
237
+
238
+ unsigned int remain_len = 0;
239
+ int rc;
240
+
241
+ if(olen == NULL) return LZFX_EARGS;
242
+ if(ibuf == NULL){
243
+ if(ilen != 0) return LZFX_EARGS;
244
+ *olen = 0;
245
+ return 0;
246
+ }
247
+ if(obuf == NULL){
248
+ if(olen != 0) return LZFX_EARGS;
249
+ return lzfx_getsize(ibuf, ilen, olen);
250
+ }
251
+
252
+ do {
253
+ unsigned int ctrl = *ip++;
254
+
255
+ /* Format 000LLLLL: a literal byte string follows, of length L+1 */
256
+ if(ctrl < (1 << 5)) {
257
+
258
+ ctrl++;
259
+
260
+ if(fx_expect_false(op + ctrl > out_end)){
261
+ --ip; /* Rewind to control byte */
262
+ goto guess;
263
+ }
264
+ if(fx_expect_false(ip + ctrl > in_end)) return LZFX_ECORRUPT;
265
+
266
+ do
267
+ *op++ = *ip++;
268
+ while(--ctrl);
269
+
270
+ /* Format #1 [LLLooooo oooooooo]: backref of length L+1+2
271
+ ^^^^^ ^^^^^^^^
272
+ A B
273
+ #2 [111ooooo LLLLLLLL oooooooo] backref of length L+7+2
274
+ ^^^^^ ^^^^^^^^
275
+ A B
276
+ In both cases the location of the backref is computed from the
277
+ remaining part of the data as follows:
278
+
279
+ location = op - A*256 - B - 1
280
+ */
281
+ } else {
282
+
283
+ unsigned int len = (ctrl >> 5);
284
+ u8 *ref = op - ((ctrl & 0x1f) << 8) -1;
285
+
286
+ if(len==7) len += *ip++; /* i.e. format #2 */
287
+
288
+ len += 2; /* len is now #octets */
289
+
290
+ if(fx_expect_false(op + len > out_end)){
291
+ ip -= (len >= 9) ? 2 : 1; /* Rewind to control byte */
292
+ goto guess;
293
+ }
294
+ if(fx_expect_false(ip >= in_end)) return LZFX_ECORRUPT;
295
+
296
+ ref -= *ip++;
297
+
298
+ if(fx_expect_false(ref < (u8*)obuf)) return LZFX_ECORRUPT;
299
+
300
+ do
301
+ *op++ = *ref++;
302
+ while (--len);
303
+ }
304
+
305
+ } while (ip < in_end);
306
+
307
+ *olen = op - (u8 *)obuf;
308
+
309
+ return 0;
310
+
311
+ guess:
312
+ rc = lzfx_getsize(ip, ilen - (ip-(u8*)ibuf), &remain_len);
313
+ if(rc>=0) *olen = remain_len + (op - (u8*)obuf);
314
+ return rc;
315
+ }
316
+
317
+ /* Guess len. No parameters may be NULL; this is not checked. */
318
+ static
319
+ int lzfx_getsize(const void* ibuf, unsigned int ilen, unsigned int *olen){
320
+
321
+ u8 const *ip = (const u8 *)ibuf;
322
+ u8 const *const in_end = ip + ilen;
323
+ int tot_len = 0;
324
+
325
+ while (ip < in_end) {
326
+
327
+ unsigned int ctrl = *ip++;
328
+
329
+ if(ctrl < (1 << 5)) {
330
+
331
+ ctrl++;
332
+
333
+ if(ip + ctrl > in_end)
334
+ return LZFX_ECORRUPT;
335
+
336
+ tot_len += ctrl;
337
+ ip += ctrl;
338
+
339
+ } else {
340
+
341
+ unsigned int len = (ctrl >> 5);
342
+
343
+ if(len==7){ /* i.e. format #2 */
344
+ len += *ip++;
345
+ }
346
+
347
+ len += 2; /* len is now #octets */
348
+
349
+ if(ip >= in_end) return LZFX_ECORRUPT;
350
+
351
+ ip++; /* skip the ref byte */
352
+
353
+ tot_len += len;
354
+
355
+ }
356
+
357
+ }
358
+
359
+ *olen = tot_len;
360
+
361
+ return 0;
362
+ }
363
+
364
+
365
+
366
+
data/ext/lzfx.h ADDED
@@ -0,0 +1,98 @@
1
+ /*
2
+ * Copyright (c) 2009 Andrew Collette <andrew.collette at gmail.com>
3
+ * http://lzfx.googlecode.com
4
+ *
5
+ * Implements an LZF-compatible compressor/decompressor based on the liblzf
6
+ * codebase written by Marc Lehmann. This code is released under the BSD
7
+ * license. License and original copyright statement follow.
8
+ *
9
+ *
10
+ * Copyright (c) 2000-2008 Marc Alexander Lehmann <schmorp@schmorp.de>
11
+ *
12
+ * Redistribution and use in source and binary forms, with or without modifica-
13
+ * tion, are permitted provided that the following conditions are met:
14
+ *
15
+ * 1. Redistributions of source code must retain the above copyright notice,
16
+ * this list of conditions and the following disclaimer.
17
+ *
18
+ * 2. Redistributions in binary form must reproduce the above copyright
19
+ * notice, this list of conditions and the following disclaimer in the
20
+ * documentation and/or other materials provided with the distribution.
21
+ *
22
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
23
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER-
24
+ * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
25
+ * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE-
26
+ * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
27
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
28
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
29
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH-
30
+ * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
31
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
32
+ */
33
+
34
+ #ifndef LZFX_H
35
+ #define LZFX_H
36
+
37
+ #ifdef __cplusplus
38
+ extern "C" {
39
+ #endif
40
+
41
+ /* Documented behavior, including function signatures and error codes,
42
+ is guaranteed to remain unchanged for releases with the same major
43
+ version number. Releases of the same major version are also able
44
+ to read each other's output, although the output itself is not
45
+ guaranteed to be byte-for-byte identical.
46
+ */
47
+ #define LZFX_VERSION_MAJOR 0
48
+ #define LZFX_VERSION_MINOR 1
49
+ #define LZFX_VERSION_STRING "0.1"
50
+
51
+ /* Hashtable size (2**LZFX_HLOG entries) */
52
+ #ifndef LZFX_HLOG
53
+ # define LZFX_HLOG 16
54
+ #endif
55
+
56
+ /* Predefined errors. */
57
+ #define LZFX_ESIZE -1 /* Output buffer too small */
58
+ #define LZFX_ECORRUPT -2 /* Invalid data for decompression */
59
+ #define LZFX_EARGS -3 /* Arguments invalid (NULL) */
60
+
61
+ /* Buffer-to buffer compression.
62
+
63
+ Supply pre-allocated input and output buffers via ibuf and obuf, and
64
+ their size in bytes via ilen and olen. Buffers may not overlap.
65
+
66
+ On success, the function returns a non-negative value and the argument
67
+ olen contains the compressed size in bytes. On failure, a negative
68
+ value is returned and olen is not modified.
69
+ */
70
+ int lzfx_compress(const void* ibuf, unsigned int ilen,
71
+ void* obuf, unsigned int *olen);
72
+
73
+ /* Buffer-to-buffer decompression.
74
+
75
+ Supply pre-allocated input and output buffers via ibuf and obuf, and
76
+ their size in bytes via ilen and olen. Buffers may not overlap.
77
+
78
+ On success, the function returns a non-negative value and the argument
79
+ olen contains the uncompressed size in bytes. On failure, a negative
80
+ value is returned.
81
+
82
+ If the failure code is LZFX_ESIZE, olen contains the minimum buffer size
83
+ required to hold the decompressed data. Otherwise, olen is not modified.
84
+
85
+ Supplying a zero *olen is a valid and supported strategy to determine the
86
+ required buffer size. This does not require decompression of the entire
87
+ stream and is consequently very fast. Argument obuf may be NULL in
88
+ this case only.
89
+ */
90
+ int lzfx_decompress(const void* ibuf, unsigned int ilen,
91
+ void* obuf, unsigned int *olen);
92
+
93
+
94
+ #ifdef __cplusplus
95
+ } /* extern "C" */
96
+ #endif
97
+
98
+ #endif
data/ext/lzfxruby.c ADDED
@@ -0,0 +1,73 @@
1
+ #include <string.h>
2
+ #include "ruby.h"
3
+ #define COFFICIENT_OF_BUFFER 3
4
+
5
+ static VALUE rb_lzfx_compress(VALUE self, VALUE input) {
6
+ unsigned char *compressed, *data;
7
+ int out_length, data_length;
8
+ int retcode;
9
+ VALUE retval;
10
+
11
+ Check_Type(input, T_STRING);
12
+ data = StringValuePtr(input);
13
+ data_length = RSTRING_LEN(input);
14
+
15
+ if (data_length < 1) {
16
+ return Qnil;
17
+ }
18
+
19
+ out_length = data_length * COFFICIENT_OF_BUFFER;
20
+ compressed = malloc(out_length);
21
+
22
+ retcode = lzfx_compress(data, data_length, compressed, &out_length);
23
+
24
+ if(retcode < 0) {
25
+ ruby_xfree(compressed);
26
+ return Qnil;
27
+ }
28
+
29
+ retval = rb_str_new(compressed, out_length);
30
+
31
+ ruby_xfree(compressed);
32
+
33
+ return retval;
34
+ }
35
+
36
+ static VALUE rb_lzfx_decompress(VALUE self, VALUE input) {
37
+ unsigned char *decompressed, *data;
38
+ int out_length, data_length;
39
+ int retcode;
40
+ VALUE retval;
41
+
42
+ Check_Type(input, T_STRING);
43
+
44
+ data = RSTRING_PTR(input);
45
+ data_length = RSTRING_LEN(input);
46
+
47
+ if (data_length < 1) {
48
+ return Qnil;
49
+ }
50
+
51
+ out_length = data_length * COFFICIENT_OF_BUFFER;
52
+ decompressed = malloc(out_length);
53
+
54
+ retcode = lzfx_decompress(data, data_length, decompressed, &out_length);
55
+
56
+ if(retcode < 0) {
57
+ ruby_xfree(decompressed);
58
+ return Qnil;
59
+ }
60
+
61
+ retval = rb_str_new(decompressed, out_length);
62
+
63
+ ruby_xfree(decompressed);
64
+
65
+ return retval;
66
+ }
67
+
68
+ void Init_lzfx() {
69
+ VALUE rb_mLzfx;
70
+ rb_mLzfx = rb_define_module("Lzfx");
71
+ rb_define_module_function(rb_mLzfx, "compress", rb_lzfx_compress, 1);
72
+ rb_define_module_function(rb_mLzfx, "decompress", rb_lzfx_decompress, 1);
73
+ }
metadata ADDED
@@ -0,0 +1,75 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: lzfx
3
+ version: !ruby/object:Gem::Version
4
+ hash: 27
5
+ prerelease:
6
+ segments:
7
+ - 0
8
+ - 1
9
+ - 0
10
+ version: 0.1.0
11
+ platform: ruby
12
+ authors:
13
+ - hmsk
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2012-04-06 00:00:00 +09:00
19
+ default_executable:
20
+ dependencies: []
21
+
22
+ description: Ruby bindings for lzfx. lzfx is a tiny, extremely fast compression library.
23
+ email: k.hamasaki@gmail.com
24
+ executables: []
25
+
26
+ extensions:
27
+ - ext/extconf.rb
28
+ extra_rdoc_files:
29
+ - README.md
30
+ files:
31
+ - ext/lzfx-util.c
32
+ - ext/lzfx.c
33
+ - ext/lzfxruby.c
34
+ - ext/lzfx.h
35
+ - ext/extconf.rb
36
+ - README.md
37
+ has_rdoc: true
38
+ homepage: https://github.com/winebarrel/infra-study/tree/master/4th/gems/lzfx
39
+ licenses: []
40
+
41
+ post_install_message:
42
+ rdoc_options:
43
+ - --title
44
+ - lzfx - Ruby bindings for lzfx.
45
+ - --main
46
+ - README.md
47
+ require_paths:
48
+ - lib
49
+ required_ruby_version: !ruby/object:Gem::Requirement
50
+ none: false
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ hash: 3
55
+ segments:
56
+ - 0
57
+ version: "0"
58
+ required_rubygems_version: !ruby/object:Gem::Requirement
59
+ none: false
60
+ requirements:
61
+ - - ">="
62
+ - !ruby/object:Gem::Version
63
+ hash: 3
64
+ segments:
65
+ - 0
66
+ version: "0"
67
+ requirements: []
68
+
69
+ rubyforge_project:
70
+ rubygems_version: 1.5.0
71
+ signing_key:
72
+ specification_version: 3
73
+ summary: Ruby bindings for lzfx.
74
+ test_files: []
75
+