lzss 0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,19 @@
1
+ require "compress/lzss"
2
+
3
+ # read
4
+ data = File.binread('kiss.rmvb')
5
+ # compress
6
+ data = Compress::LZSS.encode data
7
+ # write
8
+ File.open 'kiss.lzss', 'wb' do |f|
9
+ f << data
10
+ end
11
+
12
+ # read
13
+ data = File.binread('kiss.lzss')
14
+ # de compress
15
+ data = Compress::LZSS.decode data
16
+ # write
17
+ File.open 'kiss2.rmvb', 'wb' do |f|
18
+ f << data
19
+ end
@@ -0,0 +1,3 @@
1
+ require "mkmf"
2
+
3
+ create_makefile 'compress/lzss'
@@ -0,0 +1,29 @@
1
+ #include <ruby.h>
2
+
3
+ size_t Encode(size_t ilen, char* istr, size_t olen, char* ostr);
4
+ // note: should free the returned ptr
5
+ char* Decode(size_t ilen, unsigned char* istr, size_t *olen);
6
+
7
+ static VALUE encode(VALUE self, VALUE str) {
8
+ size_t ilen = RSTRING_LEN(str);
9
+ char* buff = (char*)malloc(ilen * 2);
10
+ size_t olen = Encode(RSTRING_LEN(str), RSTRING_PTR(str), ilen * 2, buff);
11
+ VALUE ret = rb_str_new(buff, olen);
12
+ free(buff);
13
+ return ret;
14
+ }
15
+
16
+ static VALUE decode(VALUE self, VALUE str) {
17
+ size_t olen = 0;
18
+ char* buff = Decode(RSTRING_LEN(str), RSTRING_PTR(str), &olen);
19
+ VALUE ret = rb_str_new(buff, olen);
20
+ free(buff);
21
+ return ret;
22
+ }
23
+
24
+ void Init_lzss() {
25
+ VALUE Compress = rb_define_module("Compress");
26
+ VALUE LZSS = rb_define_module_under(Compress, "LZSS");
27
+ rb_define_module_function(LZSS, "encode", RUBY_METHOD_FUNC(encode), 1);
28
+ rb_define_module_function(LZSS, "decode", RUBY_METHOD_FUNC(decode), 1);
29
+ }
@@ -0,0 +1,233 @@
1
+ /**************************************************************
2
+ LZSS.C -- A Data Compression Program
3
+ (tab = 4 spaces)
4
+ ***************************************************************
5
+ 4/6/1989 Haruhiko Okumura
6
+ Use, distribute, and modify this program freely.
7
+ Please send me your improved versions.
8
+ PC-VAN SCIENCE
9
+ NIFTY-Serve PAF01022
10
+ CompuServe 74050,1022
11
+ **************************************************************/
12
+ #include <stdlib.h>
13
+ #include <stdio.h>
14
+ #include <string.h>
15
+ #include <ctype.h>
16
+
17
+ #define N 4096 /* size of ring buffer */
18
+ #define F 18 /* upper limit for match_length */
19
+ #define THRESHOLD 2 /* encode string into position and length if match_length is greater than this */
20
+ #define NIL N /* index for root of binary search trees */
21
+
22
+ /* of longest match. These are set by the InsertNode() procedure. */
23
+ static int match_position;
24
+ static int match_length;
25
+
26
+ static void InsertNode(unsigned char* text_buf, int* lson, int* rson, int* dad, int r)
27
+ /* Inserts string of length F, text_buf[r..r+F-1], into one of the
28
+ trees (text_buf[r]'th tree) and returns the longest-match position
29
+ and length via the global variables match_position and match_length.
30
+ If match_length = F, then removes the old node in favor of the new
31
+ one, because the old one will be deleted sooner.
32
+ Note r plays double role, as tree node and position in buffer. */
33
+ {
34
+ int i, p, cmp;
35
+ unsigned char *key;
36
+
37
+ cmp = 1; key = &text_buf[r]; p = N + 1 + key[0];
38
+ rson[r] = lson[r] = NIL; match_length = 0;
39
+ for ( ; ; ) {
40
+ if (cmp >= 0) {
41
+ if (rson[p] != NIL) p = rson[p];
42
+ else { rson[p] = r; dad[r] = p; return; }
43
+ } else {
44
+ if (lson[p] != NIL) p = lson[p];
45
+ else { lson[p] = r; dad[r] = p; return; }
46
+ }
47
+ for (i = 1; i < F; i++)
48
+ if ((cmp = key[i] - text_buf[p + i]) != 0) break;
49
+ if (i > match_length) {
50
+ match_position = p;
51
+ if ((match_length = i) >= F) break;
52
+ }
53
+ }
54
+ dad[r] = dad[p]; lson[r] = lson[p]; rson[r] = rson[p];
55
+ dad[lson[p]] = r; dad[rson[p]] = r;
56
+ if (rson[dad[p]] == p) rson[dad[p]] = r;
57
+ else lson[dad[p]] = r;
58
+ dad[p] = NIL; /* remove p */
59
+ }
60
+
61
+ static void DeleteNode(int* lson, int* rson, int* dad, int p) /* deletes node p from tree */
62
+ {
63
+ int q;
64
+
65
+ if (dad[p] == NIL) return; /* not in tree */
66
+ if (rson[p] == NIL) q = lson[p];
67
+ else if (lson[p] == NIL) q = rson[p];
68
+ else {
69
+ q = lson[p];
70
+ if (rson[q] != NIL) {
71
+ do { q = rson[q]; } while (rson[q] != NIL);
72
+ rson[dad[q]] = lson[q]; dad[lson[q]] = dad[q];
73
+ lson[q] = lson[p]; dad[lson[p]] = q;
74
+ }
75
+ rson[q] = rson[p]; dad[rson[p]] = q;
76
+ }
77
+ dad[q] = dad[p];
78
+ if (rson[dad[p]] == p) rson[dad[p]] = q; else lson[dad[p]] = q;
79
+ dad[p] = NIL;
80
+ }
81
+
82
+ #define _get(c) \
83
+ if (! ilen) {\
84
+ c = EOF;\
85
+ break;\
86
+ }\
87
+ c = *istr;\
88
+ ++istr;\
89
+ --ilen
90
+
91
+ #define _put(c) \
92
+ *ostr = c;\
93
+ ++ostr;\
94
+ --olen
95
+
96
+ size_t Encode(size_t ilen, char* istr, size_t olen, char* ostr)
97
+ {
98
+ int i, c, len, r, s, last_match_length, code_buf_ptr;
99
+ unsigned char code_buf[17], mask;
100
+ size_t codesize = 0;
101
+ int lson[N + 1], rson[N + 257], dad[N + 1]; /* left & right children & parents -- These constitute binary search trees. */
102
+ unsigned char text_buf[N + F - 1]; /* ring buffer of size N, with extra F-1 bytes to facilitate string comparison */
103
+
104
+ match_position = 0;
105
+ match_length = 0;
106
+
107
+ if (ilen == 0) return 0;
108
+
109
+ /* initialize trees */
110
+ /* For i = 0 to N - 1, rson[i] and lson[i] will be the right and
111
+ left children of node i. These nodes need not be initialized.
112
+ Also, dad[i] is the parent of node i. These are initialized to
113
+ NIL (= N), which stands for 'not used.'
114
+ For i = 0 to 255, rson[N + i + 1] is the root of the tree
115
+ for strings that begin with character i. These are initialized
116
+ to NIL. Note there are 256 trees. */
117
+ for (i = N + 1; i <= N + 256; i++) rson[i] = NIL;
118
+ for (i = 0; i < N; i++) dad[i] = NIL;
119
+
120
+ code_buf[0] = 0; /* code_buf[1..16] saves eight units of code, and
121
+ code_buf[0] works as eight flags, "1" representing that the unit
122
+ is an unencoded letter (1 byte), "0" a position-and-length pair
123
+ (2 bytes). Thus, eight units require at most 16 bytes of code. */
124
+ code_buf_ptr = mask = 1;
125
+ s = 0; r = N - F;
126
+ for (i = s; i < r; i++) text_buf[i] = 0; /* Clear the buffer with
127
+ any character that will appear often. */
128
+ for (len = 0; len < F && ilen; len++) {
129
+ _get(c);
130
+ text_buf[r + len] = c;
131
+ /* Read F bytes into the last F bytes of the buffer */
132
+ }
133
+ for (i = 1; i <= F; i++) InsertNode(text_buf, lson, rson, dad, r - i); /* Insert the F strings,
134
+ each of which begins with one or more 'space' characters. Note
135
+ the order in which these strings are inserted. This way,
136
+ degenerate trees will be less likely to occur. */
137
+ InsertNode(text_buf, lson, rson, dad, r); /* Finally, insert the whole string just read. The global variables match_length and match_position are set. */
138
+ do {
139
+ if (match_length > len) match_length = len; /* match_length may be spuriously long near the end of text. */
140
+ if (match_length <= THRESHOLD) {
141
+ match_length = 1; /* Not long enough match. Send one byte. */
142
+ code_buf[0] |= mask; /* 'send one byte' flag */
143
+ code_buf[code_buf_ptr++] = text_buf[r]; /* Send uncoded. */
144
+ } else {
145
+ code_buf[code_buf_ptr++] = (unsigned char) match_position;
146
+ code_buf[code_buf_ptr++] = (unsigned char)
147
+ (((match_position >> 4) & 0xf0)
148
+ | (match_length - (THRESHOLD + 1))); /* Send position and
149
+ length pair. Note match_length > THRESHOLD. */
150
+ }
151
+ if ((mask <<= 1) == 0) { /* Shift mask left one bit. */
152
+ for (i = 0; i < code_buf_ptr; i++) { /* Send at most 8 units of */
153
+ _put(code_buf[i]); /* code together */
154
+ }
155
+ codesize += code_buf_ptr;
156
+ code_buf[0] = 0; code_buf_ptr = mask = 1;
157
+ }
158
+ last_match_length = match_length;
159
+ for (i = 0; i < last_match_length && ilen; i++) {
160
+ _get(c);
161
+ DeleteNode(lson, rson, dad, s); /* Delete old strings and */
162
+ text_buf[s] = c; /* read new bytes */
163
+ if (s < F - 1) text_buf[s + N] = c; /* If the position is
164
+ near the end of buffer, extend the buffer to make
165
+ string comparison easier. */
166
+ s = (s + 1) & (N - 1); r = (r + 1) & (N - 1);
167
+ /* Since this is a ring buffer, increment the position
168
+ modulo N. */
169
+ InsertNode(text_buf, lson, rson, dad, r); /* Register the string in text_buf[r..r+F-1] */
170
+ }
171
+ while (i++ < last_match_length) { /* After the end of text, */
172
+ DeleteNode(lson, rson, dad, s); /* no need to read, but */
173
+ s = (s + 1) & (N - 1); r = (r + 1) & (N - 1);
174
+ if (--len) InsertNode(text_buf, lson, rson, dad, r); /* buffer may not be empty. */
175
+ }
176
+ } while (len > 0); /* until length of string to be processed is zero */
177
+ if (code_buf_ptr > 1) { /* Send remaining code. */
178
+ for (i = 0; i < code_buf_ptr; i++) {
179
+ _put(code_buf[i]);
180
+ }
181
+ codesize += code_buf_ptr;
182
+ }
183
+
184
+ return codesize;
185
+ }
186
+
187
+ #undef _put
188
+ #define _put(c) \
189
+ if (*olen == limit) {\
190
+ limit *= 2;\
191
+ ostr = realloc(ostr, limit);\
192
+ }\
193
+ ostr[*olen] = c;\
194
+ *olen += 1
195
+
196
+ // note: should free the returned ptr
197
+ char* Decode(size_t ilen, unsigned char* istr, size_t *olen) /* Just the reverse of Encode(). */
198
+ {
199
+ unsigned char text_buf[N + F - 1]; /* ring buffer of size N, with extra F-1 bytes to facilitate string comparison */
200
+ int i, j, k, r, c;
201
+ unsigned int flags;
202
+ char* ostr = malloc(ilen);
203
+ int limit = ilen;
204
+ *olen = 0;
205
+
206
+ for (i = 0; i < N - F; i++) text_buf[i] = 0;
207
+ r = N - F; flags = 0;
208
+ for ( ; ; ) {
209
+ if (((flags >>= 1) & 256) == 0) {
210
+ _get(c);
211
+ flags = c | 0xff00; /* uses higher byte cleverly */
212
+ } /* to count eight */
213
+ if (flags & 1) {
214
+ _get(c);
215
+ _put(c);
216
+ text_buf[r++] = c; r &= (N - 1);
217
+ } else {
218
+ _get(i);
219
+ _get(j);
220
+ i |= ((j & 0xf0) << 4); j = (j & 0x0f) + THRESHOLD;
221
+ for (k = 0; k <= j; k++) {
222
+ c = text_buf[(i + k) & (N - 1)];
223
+ _put(c);
224
+ text_buf[r++] = c; r &= (N - 1);
225
+ }
226
+ }
227
+ }
228
+ return ostr;
229
+ }
230
+
231
+ #undef _get
232
+ #undef _put
233
+
metadata ADDED
@@ -0,0 +1,66 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: lzss
3
+ version: !ruby/object:Gem::Version
4
+ prerelease: false
5
+ segments:
6
+ - 0
7
+ - 1
8
+ version: "0.1"
9
+ platform: ruby
10
+ authors:
11
+ - NS, FX
12
+ autorequire:
13
+ bindir: bin
14
+ cert_chain: []
15
+
16
+ date: 2010-05-03 00:00:00 +08:00
17
+ default_executable:
18
+ dependencies: []
19
+
20
+ description:
21
+ email: usurffx@gmail.com
22
+ executables: []
23
+
24
+ extensions:
25
+ - src/extconf.rb
26
+ extra_rdoc_files: []
27
+
28
+ files:
29
+ - src/lzss.c
30
+ - src/lzss-ext.c
31
+ - src/extconf.rb
32
+ - example/example.rb
33
+ has_rdoc: true
34
+ homepage: http://rednaxelafx.javaeye.com
35
+ licenses: []
36
+
37
+ post_install_message:
38
+ rdoc_options: []
39
+
40
+ require_paths:
41
+ - lib
42
+ required_ruby_version: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - ">="
45
+ - !ruby/object:Gem::Version
46
+ segments:
47
+ - 1
48
+ - 9
49
+ - 0
50
+ version: 1.9.0
51
+ required_rubygems_version: !ruby/object:Gem::Requirement
52
+ requirements:
53
+ - - ">="
54
+ - !ruby/object:Gem::Version
55
+ segments:
56
+ - 0
57
+ version: "0"
58
+ requirements: []
59
+
60
+ rubyforge_project:
61
+ rubygems_version: 1.3.6
62
+ signing_key:
63
+ specification_version: 3
64
+ summary: lzss compress algorithm for ruby
65
+ test_files: []
66
+