peterc-rsmaz 0.0.2 → 0.0.3
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +7 -0
- data/README.rdoc +5 -4
- data/lib/rsmaz.rb +38 -22
- data/spec/rsmaz_spec.rb +32 -2
- data/spec/spec_helper.rb +4 -0
- metadata +2 -2
data/History.txt
CHANGED
data/README.rdoc
CHANGED
@@ -12,7 +12,8 @@
|
|
12
12
|
http://github.com/antirez/smaz/tree/master
|
13
13
|
|
14
14
|
I've done some initial cleanup of a pure Ruby->C port, but this
|
15
|
-
is not yet complete. It does pass the specs, however!
|
15
|
+
is not yet complete. It does pass the specs, however! Feel free
|
16
|
+
to clean it up as it's a bit memory inefficient right now... :)
|
16
17
|
|
17
18
|
== REQUIREMENTS:
|
18
19
|
|
@@ -20,13 +21,13 @@
|
|
20
21
|
* Some strings to compress
|
21
22
|
* A sense of humor
|
22
23
|
|
23
|
-
==
|
24
|
+
== USAGE:
|
24
25
|
|
25
|
-
|
26
|
+
require 'rsmaz'
|
26
27
|
r = RSmaz.compress("whatever")
|
27
28
|
puts RSmaz.decompress(r)
|
28
29
|
|
29
|
-
== LICENSE:
|
30
|
+
== RSMAZ LICENSE:
|
30
31
|
|
31
32
|
Copyright (c) 2009 Peter Cooper, Salvatore Sanfilippo
|
32
33
|
|
data/lib/rsmaz.rb
CHANGED
@@ -4,11 +4,15 @@ $:.unshift(File.dirname(__FILE__)) unless
|
|
4
4
|
require 'strscan'
|
5
5
|
|
6
6
|
# Silly hack to allow usage of String#ord in Ruby 1.9 without breaking Ruby 1.8
|
7
|
-
|
7
|
+
if RUBY_VERSION < '1.9.0'
|
8
|
+
class String
|
9
|
+
def ord; self[0]; end;
|
10
|
+
end
|
11
|
+
end
|
8
12
|
|
9
13
|
# RSmaz is too small to bother splitting into separate files, so I'll be lazy..
|
10
14
|
module RSmaz
|
11
|
-
VERSION = '0.0.
|
15
|
+
VERSION = '0.0.3'
|
12
16
|
|
13
17
|
# From http://github.com/antirez/smaz/blob/4b913924e15b7663ee0240af19cedfd266052aab/smaz.c
|
14
18
|
CODEBOOK = ["\002s,\266", "\003had\232\002leW", "\003on \216", "", "\001yS",
|
@@ -84,50 +88,62 @@ module RSmaz
|
|
84
88
|
|
85
89
|
# Compress a string to Smaz encoding
|
86
90
|
def self.compress(input)
|
87
|
-
h1, h2, h3 = 0
|
88
91
|
verb = ""
|
89
92
|
out = ""
|
93
|
+
input = input.dup
|
90
94
|
|
91
95
|
# This algorithm has been ported to Ruby from C and only
|
92
96
|
# slightly Rubyized.. still a lonnnng way to go. Wanna give it a crack?
|
93
97
|
while (input && input.length > 0)
|
94
|
-
h1 = h2 = input
|
95
|
-
h2 += input[1].ord if (input.length > 1)
|
96
|
-
h3 = h2 ^ input[2].ord
|
98
|
+
h1 = h2 = input.ord << 3
|
99
|
+
h2 += input[1,1].ord if (input.length > 1)
|
100
|
+
h3 = (input.length > 2) ? h2 ^ input[2,1].ord : 0
|
97
101
|
q = []
|
98
102
|
|
99
|
-
[input.length, 7].min.downto(1) do |
|
100
|
-
slot = if
|
101
|
-
CODEBOOK[h1 % 241]
|
102
|
-
elsif
|
103
|
-
CODEBOOK[h2 % 241]
|
103
|
+
[input.length, 7].min.downto(1) do |j|
|
104
|
+
slot = if j == 1
|
105
|
+
CODEBOOK[h1 % 241].dup
|
106
|
+
elsif j == 2
|
107
|
+
CODEBOOK[h2 % 241].dup
|
104
108
|
else
|
105
|
-
CODEBOOK[h3 % 241]
|
109
|
+
CODEBOOK[h3 % 241].dup
|
106
110
|
end
|
107
111
|
|
108
112
|
while (slot && slot[0]) do
|
109
|
-
if (slot
|
113
|
+
if (slot.ord == j && (slot[1,j] == input[0,j]))
|
110
114
|
# Match found in hash table
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
+
|
116
|
+
# Add verbatim data, if any (yes, it's quicker with the check)
|
117
|
+
unless verb.empty?
|
118
|
+
q << verb
|
119
|
+
verb = ""
|
120
|
+
end
|
121
|
+
|
122
|
+
# Add encoded data and ditch unnecessary part of input string
|
123
|
+
q << slot[slot.ord+1,1].ord
|
124
|
+
input.slice!(0..j-1)
|
125
|
+
break
|
115
126
|
else
|
116
|
-
slot = slot[
|
127
|
+
# This in-place hack is quicker than slot = slot[1..-1]
|
128
|
+
|
129
|
+
slot.reverse!.chop!.reverse!
|
130
|
+
#slot.slice!(0)
|
131
|
+
#slot[0] = ''
|
117
132
|
end
|
118
133
|
end
|
119
134
|
end
|
120
135
|
|
121
136
|
# No queue? It means we matched nothing, so add the current byte to the verbatim buffer
|
122
137
|
if q.empty?
|
123
|
-
verb << input[0]
|
124
|
-
input
|
138
|
+
verb << input[0,1] if input[0]
|
139
|
+
input.slice!(0)
|
125
140
|
end
|
126
141
|
|
127
142
|
# If the verbatim buffer is getting too long or we're at the end of the doc
|
128
143
|
# throw the verbatim buffer to the output queue
|
129
144
|
q << verb if verb.length == 256 || (verb.length > 0 && input.length == 0)
|
130
145
|
|
146
|
+
# Turn the queue into correctly encoded data
|
131
147
|
out << q.collect do |item|
|
132
148
|
if item.class == String && item.length == 1
|
133
149
|
"\376" + item
|
@@ -148,11 +164,11 @@ module RSmaz
|
|
148
164
|
out = ""
|
149
165
|
s = StringScanner.new(input)
|
150
166
|
until s.eos?
|
151
|
-
bv = s.get_byte
|
167
|
+
bv = s.get_byte.ord
|
152
168
|
if (bv == 254)
|
153
169
|
out << s.get_byte
|
154
170
|
elsif (bv == 255)
|
155
|
-
len = s.get_byte
|
171
|
+
len = s.get_byte.ord + 1
|
156
172
|
len.times do
|
157
173
|
out << s.get_byte
|
158
174
|
end
|
data/spec/rsmaz_spec.rb
CHANGED
@@ -2,12 +2,21 @@ require File.dirname(__FILE__) + '/spec_helper.rb'
|
|
2
2
|
|
3
3
|
describe RSmaz do
|
4
4
|
|
5
|
+
before(:each) do
|
6
|
+
# Do some memory leak checking
|
7
|
+
puts "\nMemory used: #{memory_usage}K"
|
8
|
+
end
|
9
|
+
|
10
|
+
after(:each) do
|
11
|
+
puts "\nMemory used: #{memory_usage}K"
|
12
|
+
end
|
13
|
+
|
5
14
|
it "should compress 'the' to one byte" do
|
6
15
|
RSmaz.compress("the").length.should == 1
|
7
16
|
end
|
8
17
|
|
9
|
-
it "should compress 'thex' to
|
10
|
-
RSmaz.compress("thex").length.should ==
|
18
|
+
it "should compress 'thex' to two bytes" do
|
19
|
+
RSmaz.compress("thex").length.should == 2
|
11
20
|
end
|
12
21
|
|
13
22
|
it "should compress and decompress strings back to the same string" do
|
@@ -23,4 +32,25 @@ describe RSmaz do
|
|
23
32
|
it "should properly decode a reference compression (so the internal coding doesn't change)" do
|
24
33
|
RSmaz.decompress("\020\230`A\376o\f\026\030").should == "hello world"
|
25
34
|
end
|
35
|
+
|
36
|
+
it "should compress to the same extent as the reference smaz implementation" do
|
37
|
+
RSmaz.compress("foobar").length.should == 4
|
38
|
+
RSmaz.compress("Nel mezzo del cammin di nostra vita, mi ritrovai in una selva oscura").length.should == 46
|
39
|
+
end
|
40
|
+
|
41
|
+
it "should compress and decompress lots of random strings without issues" do
|
42
|
+
100.times do
|
43
|
+
str = (1..100).map { |a| (rand(26)+97).chr }.join
|
44
|
+
RSmaz.decompress(RSmaz.compress(str)).length.should == str.length
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
it "should compress and decompress lots of random strings without issues (again)" do
|
49
|
+
100.times do
|
50
|
+
str = (1..100).map { |a| (rand(26)+97).chr }.join
|
51
|
+
RSmaz.decompress(RSmaz.compress(str)).length.should == str.length
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
|
26
56
|
end
|
data/spec/spec_helper.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: peterc-rsmaz
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Peter Cooper
|
@@ -32,7 +32,7 @@ dependencies:
|
|
32
32
|
- !ruby/object:Gem::Version
|
33
33
|
version: 1.8.0
|
34
34
|
version:
|
35
|
-
description: "Short String Compression for Ruby. RSmaz is a pure-Ruby port of the Smaz short string compression algorithm by Salvatore Sanfilippo and released as a C library at: http://github.com/antirez/smaz/tree/master I've done some initial cleanup of a pure Ruby->C port, but this is not yet complete. It does pass the specs, however!"
|
35
|
+
description: "Short String Compression for Ruby. RSmaz is a pure-Ruby port of the Smaz short string compression algorithm by Salvatore Sanfilippo and released as a C library at: http://github.com/antirez/smaz/tree/master I've done some initial cleanup of a pure Ruby->C port, but this is not yet complete. It does pass the specs, however! Feel free to clean it up as it's a bit memory inefficient right now... :)"
|
36
36
|
email:
|
37
37
|
- pcooper@petercooper.co.uk
|
38
38
|
executables: []
|