lzss 0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/example/example.rb +19 -0
- data/src/extconf.rb +3 -0
- data/src/lzss-ext.c +29 -0
- data/src/lzss.c +233 -0
- metadata +66 -0
data/example/example.rb
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
require "compress/lzss"
|
|
2
|
+
|
|
3
|
+
# read
|
|
4
|
+
data = File.binread('kiss.rmvb')
|
|
5
|
+
# compress
|
|
6
|
+
data = Compress::LZSS.encode data
|
|
7
|
+
# write
|
|
8
|
+
File.open 'kiss.lzss', 'wb' do |f|
|
|
9
|
+
f << data
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
# read
|
|
13
|
+
data = File.binread('kiss.lzss')
|
|
14
|
+
# de compress
|
|
15
|
+
data = Compress::LZSS.decode data
|
|
16
|
+
# write
|
|
17
|
+
File.open 'kiss2.rmvb', 'wb' do |f|
|
|
18
|
+
f << data
|
|
19
|
+
end
|
data/src/extconf.rb
ADDED
data/src/lzss-ext.c
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
#include <ruby.h>
|
|
2
|
+
|
|
3
|
+
size_t Encode(size_t ilen, char* istr, size_t olen, char* ostr);
|
|
4
|
+
// note: should free the returned ptr
|
|
5
|
+
char* Decode(size_t ilen, unsigned char* istr, size_t *olen);
|
|
6
|
+
|
|
7
|
+
static VALUE encode(VALUE self, VALUE str) {
|
|
8
|
+
size_t ilen = RSTRING_LEN(str);
|
|
9
|
+
char* buff = (char*)malloc(ilen * 2);
|
|
10
|
+
size_t olen = Encode(RSTRING_LEN(str), RSTRING_PTR(str), ilen * 2, buff);
|
|
11
|
+
VALUE ret = rb_str_new(buff, olen);
|
|
12
|
+
free(buff);
|
|
13
|
+
return ret;
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
static VALUE decode(VALUE self, VALUE str) {
|
|
17
|
+
size_t olen = 0;
|
|
18
|
+
char* buff = Decode(RSTRING_LEN(str), RSTRING_PTR(str), &olen);
|
|
19
|
+
VALUE ret = rb_str_new(buff, olen);
|
|
20
|
+
free(buff);
|
|
21
|
+
return ret;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
void Init_lzss() {
|
|
25
|
+
VALUE Compress = rb_define_module("Compress");
|
|
26
|
+
VALUE LZSS = rb_define_module_under(Compress, "LZSS");
|
|
27
|
+
rb_define_module_function(LZSS, "encode", RUBY_METHOD_FUNC(encode), 1);
|
|
28
|
+
rb_define_module_function(LZSS, "decode", RUBY_METHOD_FUNC(decode), 1);
|
|
29
|
+
}
|
data/src/lzss.c
ADDED
|
@@ -0,0 +1,233 @@
|
|
|
1
|
+
/**************************************************************
|
|
2
|
+
LZSS.C -- A Data Compression Program
|
|
3
|
+
(tab = 4 spaces)
|
|
4
|
+
***************************************************************
|
|
5
|
+
4/6/1989 Haruhiko Okumura
|
|
6
|
+
Use, distribute, and modify this program freely.
|
|
7
|
+
Please send me your improved versions.
|
|
8
|
+
PC-VAN SCIENCE
|
|
9
|
+
NIFTY-Serve PAF01022
|
|
10
|
+
CompuServe 74050,1022
|
|
11
|
+
**************************************************************/
|
|
12
|
+
#include <stdlib.h>
|
|
13
|
+
#include <stdio.h>
|
|
14
|
+
#include <string.h>
|
|
15
|
+
#include <ctype.h>
|
|
16
|
+
|
|
17
|
+
#define N 4096 /* size of ring buffer */
|
|
18
|
+
#define F 18 /* upper limit for match_length */
|
|
19
|
+
#define THRESHOLD 2 /* encode string into position and length if match_length is greater than this */
|
|
20
|
+
#define NIL N /* index for root of binary search trees */
|
|
21
|
+
|
|
22
|
+
/* of longest match. These are set by the InsertNode() procedure. */
|
|
23
|
+
static int match_position;
|
|
24
|
+
static int match_length;
|
|
25
|
+
|
|
26
|
+
static void InsertNode(unsigned char* text_buf, int* lson, int* rson, int* dad, int r)
|
|
27
|
+
/* Inserts string of length F, text_buf[r..r+F-1], into one of the
|
|
28
|
+
trees (text_buf[r]'th tree) and returns the longest-match position
|
|
29
|
+
and length via the global variables match_position and match_length.
|
|
30
|
+
If match_length = F, then removes the old node in favor of the new
|
|
31
|
+
one, because the old one will be deleted sooner.
|
|
32
|
+
Note r plays double role, as tree node and position in buffer. */
|
|
33
|
+
{
|
|
34
|
+
int i, p, cmp;
|
|
35
|
+
unsigned char *key;
|
|
36
|
+
|
|
37
|
+
cmp = 1; key = &text_buf[r]; p = N + 1 + key[0];
|
|
38
|
+
rson[r] = lson[r] = NIL; match_length = 0;
|
|
39
|
+
for ( ; ; ) {
|
|
40
|
+
if (cmp >= 0) {
|
|
41
|
+
if (rson[p] != NIL) p = rson[p];
|
|
42
|
+
else { rson[p] = r; dad[r] = p; return; }
|
|
43
|
+
} else {
|
|
44
|
+
if (lson[p] != NIL) p = lson[p];
|
|
45
|
+
else { lson[p] = r; dad[r] = p; return; }
|
|
46
|
+
}
|
|
47
|
+
for (i = 1; i < F; i++)
|
|
48
|
+
if ((cmp = key[i] - text_buf[p + i]) != 0) break;
|
|
49
|
+
if (i > match_length) {
|
|
50
|
+
match_position = p;
|
|
51
|
+
if ((match_length = i) >= F) break;
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
dad[r] = dad[p]; lson[r] = lson[p]; rson[r] = rson[p];
|
|
55
|
+
dad[lson[p]] = r; dad[rson[p]] = r;
|
|
56
|
+
if (rson[dad[p]] == p) rson[dad[p]] = r;
|
|
57
|
+
else lson[dad[p]] = r;
|
|
58
|
+
dad[p] = NIL; /* remove p */
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
static void DeleteNode(int* lson, int* rson, int* dad, int p) /* deletes node p from tree */
|
|
62
|
+
{
|
|
63
|
+
int q;
|
|
64
|
+
|
|
65
|
+
if (dad[p] == NIL) return; /* not in tree */
|
|
66
|
+
if (rson[p] == NIL) q = lson[p];
|
|
67
|
+
else if (lson[p] == NIL) q = rson[p];
|
|
68
|
+
else {
|
|
69
|
+
q = lson[p];
|
|
70
|
+
if (rson[q] != NIL) {
|
|
71
|
+
do { q = rson[q]; } while (rson[q] != NIL);
|
|
72
|
+
rson[dad[q]] = lson[q]; dad[lson[q]] = dad[q];
|
|
73
|
+
lson[q] = lson[p]; dad[lson[p]] = q;
|
|
74
|
+
}
|
|
75
|
+
rson[q] = rson[p]; dad[rson[p]] = q;
|
|
76
|
+
}
|
|
77
|
+
dad[q] = dad[p];
|
|
78
|
+
if (rson[dad[p]] == p) rson[dad[p]] = q; else lson[dad[p]] = q;
|
|
79
|
+
dad[p] = NIL;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
#define _get(c) \
|
|
83
|
+
if (! ilen) {\
|
|
84
|
+
c = EOF;\
|
|
85
|
+
break;\
|
|
86
|
+
}\
|
|
87
|
+
c = *istr;\
|
|
88
|
+
++istr;\
|
|
89
|
+
--ilen
|
|
90
|
+
|
|
91
|
+
#define _put(c) \
|
|
92
|
+
*ostr = c;\
|
|
93
|
+
++ostr;\
|
|
94
|
+
--olen
|
|
95
|
+
|
|
96
|
+
size_t Encode(size_t ilen, char* istr, size_t olen, char* ostr)
|
|
97
|
+
{
|
|
98
|
+
int i, c, len, r, s, last_match_length, code_buf_ptr;
|
|
99
|
+
unsigned char code_buf[17], mask;
|
|
100
|
+
size_t codesize = 0;
|
|
101
|
+
int lson[N + 1], rson[N + 257], dad[N + 1]; /* left & right children & parents -- These constitute binary search trees. */
|
|
102
|
+
unsigned char text_buf[N + F - 1]; /* ring buffer of size N, with extra F-1 bytes to facilitate string comparison */
|
|
103
|
+
|
|
104
|
+
match_position = 0;
|
|
105
|
+
match_length = 0;
|
|
106
|
+
|
|
107
|
+
if (ilen == 0) return 0;
|
|
108
|
+
|
|
109
|
+
/* initialize trees */
|
|
110
|
+
/* For i = 0 to N - 1, rson[i] and lson[i] will be the right and
|
|
111
|
+
left children of node i. These nodes need not be initialized.
|
|
112
|
+
Also, dad[i] is the parent of node i. These are initialized to
|
|
113
|
+
NIL (= N), which stands for 'not used.'
|
|
114
|
+
For i = 0 to 255, rson[N + i + 1] is the root of the tree
|
|
115
|
+
for strings that begin with character i. These are initialized
|
|
116
|
+
to NIL. Note there are 256 trees. */
|
|
117
|
+
for (i = N + 1; i <= N + 256; i++) rson[i] = NIL;
|
|
118
|
+
for (i = 0; i < N; i++) dad[i] = NIL;
|
|
119
|
+
|
|
120
|
+
code_buf[0] = 0; /* code_buf[1..16] saves eight units of code, and
|
|
121
|
+
code_buf[0] works as eight flags, "1" representing that the unit
|
|
122
|
+
is an unencoded letter (1 byte), "0" a position-and-length pair
|
|
123
|
+
(2 bytes). Thus, eight units require at most 16 bytes of code. */
|
|
124
|
+
code_buf_ptr = mask = 1;
|
|
125
|
+
s = 0; r = N - F;
|
|
126
|
+
for (i = s; i < r; i++) text_buf[i] = 0; /* Clear the buffer with
|
|
127
|
+
any character that will appear often. */
|
|
128
|
+
for (len = 0; len < F && ilen; len++) {
|
|
129
|
+
_get(c);
|
|
130
|
+
text_buf[r + len] = c;
|
|
131
|
+
/* Read F bytes into the last F bytes of the buffer */
|
|
132
|
+
}
|
|
133
|
+
for (i = 1; i <= F; i++) InsertNode(text_buf, lson, rson, dad, r - i); /* Insert the F strings,
|
|
134
|
+
each of which begins with one or more 'space' characters. Note
|
|
135
|
+
the order in which these strings are inserted. This way,
|
|
136
|
+
degenerate trees will be less likely to occur. */
|
|
137
|
+
InsertNode(text_buf, lson, rson, dad, r); /* Finally, insert the whole string just read. The global variables match_length and match_position are set. */
|
|
138
|
+
do {
|
|
139
|
+
if (match_length > len) match_length = len; /* match_length may be spuriously long near the end of text. */
|
|
140
|
+
if (match_length <= THRESHOLD) {
|
|
141
|
+
match_length = 1; /* Not long enough match. Send one byte. */
|
|
142
|
+
code_buf[0] |= mask; /* 'send one byte' flag */
|
|
143
|
+
code_buf[code_buf_ptr++] = text_buf[r]; /* Send uncoded. */
|
|
144
|
+
} else {
|
|
145
|
+
code_buf[code_buf_ptr++] = (unsigned char) match_position;
|
|
146
|
+
code_buf[code_buf_ptr++] = (unsigned char)
|
|
147
|
+
(((match_position >> 4) & 0xf0)
|
|
148
|
+
| (match_length - (THRESHOLD + 1))); /* Send position and
|
|
149
|
+
length pair. Note match_length > THRESHOLD. */
|
|
150
|
+
}
|
|
151
|
+
if ((mask <<= 1) == 0) { /* Shift mask left one bit. */
|
|
152
|
+
for (i = 0; i < code_buf_ptr; i++) { /* Send at most 8 units of */
|
|
153
|
+
_put(code_buf[i]); /* code together */
|
|
154
|
+
}
|
|
155
|
+
codesize += code_buf_ptr;
|
|
156
|
+
code_buf[0] = 0; code_buf_ptr = mask = 1;
|
|
157
|
+
}
|
|
158
|
+
last_match_length = match_length;
|
|
159
|
+
for (i = 0; i < last_match_length && ilen; i++) {
|
|
160
|
+
_get(c);
|
|
161
|
+
DeleteNode(lson, rson, dad, s); /* Delete old strings and */
|
|
162
|
+
text_buf[s] = c; /* read new bytes */
|
|
163
|
+
if (s < F - 1) text_buf[s + N] = c; /* If the position is
|
|
164
|
+
near the end of buffer, extend the buffer to make
|
|
165
|
+
string comparison easier. */
|
|
166
|
+
s = (s + 1) & (N - 1); r = (r + 1) & (N - 1);
|
|
167
|
+
/* Since this is a ring buffer, increment the position
|
|
168
|
+
modulo N. */
|
|
169
|
+
InsertNode(text_buf, lson, rson, dad, r); /* Register the string in text_buf[r..r+F-1] */
|
|
170
|
+
}
|
|
171
|
+
while (i++ < last_match_length) { /* After the end of text, */
|
|
172
|
+
DeleteNode(lson, rson, dad, s); /* no need to read, but */
|
|
173
|
+
s = (s + 1) & (N - 1); r = (r + 1) & (N - 1);
|
|
174
|
+
if (--len) InsertNode(text_buf, lson, rson, dad, r); /* buffer may not be empty. */
|
|
175
|
+
}
|
|
176
|
+
} while (len > 0); /* until length of string to be processed is zero */
|
|
177
|
+
if (code_buf_ptr > 1) { /* Send remaining code. */
|
|
178
|
+
for (i = 0; i < code_buf_ptr; i++) {
|
|
179
|
+
_put(code_buf[i]);
|
|
180
|
+
}
|
|
181
|
+
codesize += code_buf_ptr;
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
return codesize;
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
#undef _put
|
|
188
|
+
#define _put(c) \
|
|
189
|
+
if (*olen == limit) {\
|
|
190
|
+
limit *= 2;\
|
|
191
|
+
ostr = realloc(ostr, limit);\
|
|
192
|
+
}\
|
|
193
|
+
ostr[*olen] = c;\
|
|
194
|
+
*olen += 1
|
|
195
|
+
|
|
196
|
+
// note: should free the returned ptr
|
|
197
|
+
char* Decode(size_t ilen, unsigned char* istr, size_t *olen) /* Just the reverse of Encode(). */
|
|
198
|
+
{
|
|
199
|
+
unsigned char text_buf[N + F - 1]; /* ring buffer of size N, with extra F-1 bytes to facilitate string comparison */
|
|
200
|
+
int i, j, k, r, c;
|
|
201
|
+
unsigned int flags;
|
|
202
|
+
char* ostr = malloc(ilen);
|
|
203
|
+
int limit = ilen;
|
|
204
|
+
*olen = 0;
|
|
205
|
+
|
|
206
|
+
for (i = 0; i < N - F; i++) text_buf[i] = 0;
|
|
207
|
+
r = N - F; flags = 0;
|
|
208
|
+
for ( ; ; ) {
|
|
209
|
+
if (((flags >>= 1) & 256) == 0) {
|
|
210
|
+
_get(c);
|
|
211
|
+
flags = c | 0xff00; /* uses higher byte cleverly */
|
|
212
|
+
} /* to count eight */
|
|
213
|
+
if (flags & 1) {
|
|
214
|
+
_get(c);
|
|
215
|
+
_put(c);
|
|
216
|
+
text_buf[r++] = c; r &= (N - 1);
|
|
217
|
+
} else {
|
|
218
|
+
_get(i);
|
|
219
|
+
_get(j);
|
|
220
|
+
i |= ((j & 0xf0) << 4); j = (j & 0x0f) + THRESHOLD;
|
|
221
|
+
for (k = 0; k <= j; k++) {
|
|
222
|
+
c = text_buf[(i + k) & (N - 1)];
|
|
223
|
+
_put(c);
|
|
224
|
+
text_buf[r++] = c; r &= (N - 1);
|
|
225
|
+
}
|
|
226
|
+
}
|
|
227
|
+
}
|
|
228
|
+
return ostr;
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
#undef _get
|
|
232
|
+
#undef _put
|
|
233
|
+
|
metadata
ADDED
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: lzss
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
prerelease: false
|
|
5
|
+
segments:
|
|
6
|
+
- 0
|
|
7
|
+
- 1
|
|
8
|
+
version: "0.1"
|
|
9
|
+
platform: ruby
|
|
10
|
+
authors:
|
|
11
|
+
- NS, FX
|
|
12
|
+
autorequire:
|
|
13
|
+
bindir: bin
|
|
14
|
+
cert_chain: []
|
|
15
|
+
|
|
16
|
+
date: 2010-05-03 00:00:00 +08:00
|
|
17
|
+
default_executable:
|
|
18
|
+
dependencies: []
|
|
19
|
+
|
|
20
|
+
description:
|
|
21
|
+
email: usurffx@gmail.com
|
|
22
|
+
executables: []
|
|
23
|
+
|
|
24
|
+
extensions:
|
|
25
|
+
- src/extconf.rb
|
|
26
|
+
extra_rdoc_files: []
|
|
27
|
+
|
|
28
|
+
files:
|
|
29
|
+
- src/lzss.c
|
|
30
|
+
- src/lzss-ext.c
|
|
31
|
+
- src/extconf.rb
|
|
32
|
+
- example/example.rb
|
|
33
|
+
has_rdoc: true
|
|
34
|
+
homepage: http://rednaxelafx.javaeye.com
|
|
35
|
+
licenses: []
|
|
36
|
+
|
|
37
|
+
post_install_message:
|
|
38
|
+
rdoc_options: []
|
|
39
|
+
|
|
40
|
+
require_paths:
|
|
41
|
+
- lib
|
|
42
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
43
|
+
requirements:
|
|
44
|
+
- - ">="
|
|
45
|
+
- !ruby/object:Gem::Version
|
|
46
|
+
segments:
|
|
47
|
+
- 1
|
|
48
|
+
- 9
|
|
49
|
+
- 0
|
|
50
|
+
version: 1.9.0
|
|
51
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
52
|
+
requirements:
|
|
53
|
+
- - ">="
|
|
54
|
+
- !ruby/object:Gem::Version
|
|
55
|
+
segments:
|
|
56
|
+
- 0
|
|
57
|
+
version: "0"
|
|
58
|
+
requirements: []
|
|
59
|
+
|
|
60
|
+
rubyforge_project:
|
|
61
|
+
rubygems_version: 1.3.6
|
|
62
|
+
signing_key:
|
|
63
|
+
specification_version: 3
|
|
64
|
+
summary: lzss compress algorithm for ruby
|
|
65
|
+
test_files: []
|
|
66
|
+
|