peterc-rsmaz 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +7 -0
- data/README.rdoc +5 -4
- data/lib/rsmaz.rb +38 -22
- data/spec/rsmaz_spec.rb +32 -2
- data/spec/spec_helper.rb +4 -0
- metadata +2 -2
data/History.txt
CHANGED
data/README.rdoc
CHANGED
@@ -12,7 +12,8 @@
|
|
12
12
|
http://github.com/antirez/smaz/tree/master
|
13
13
|
|
14
14
|
I've done some initial cleanup of a pure Ruby->C port, but this
|
15
|
-
is not yet complete. It does pass the specs, however!
|
15
|
+
is not yet complete. It does pass the specs, however! Feel free
|
16
|
+
to clean it up as it's a bit memory inefficient right now... :)
|
16
17
|
|
17
18
|
== REQUIREMENTS:
|
18
19
|
|
@@ -20,13 +21,13 @@
|
|
20
21
|
* Some strings to compress
|
21
22
|
* A sense of humor
|
22
23
|
|
23
|
-
==
|
24
|
+
== USAGE:
|
24
25
|
|
25
|
-
|
26
|
+
require 'rsmaz'
|
26
27
|
r = RSmaz.compress("whatever")
|
27
28
|
puts RSmaz.decompress(r)
|
28
29
|
|
29
|
-
== LICENSE:
|
30
|
+
== RSMAZ LICENSE:
|
30
31
|
|
31
32
|
Copyright (c) 2009 Peter Cooper, Salvatore Sanfilippo
|
32
33
|
|
data/lib/rsmaz.rb
CHANGED
@@ -4,11 +4,15 @@ $:.unshift(File.dirname(__FILE__)) unless
|
|
4
4
|
require 'strscan'
|
5
5
|
|
6
6
|
# Silly hack to allow usage of String#ord in Ruby 1.9 without breaking Ruby 1.8
|
7
|
-
|
7
|
+
if RUBY_VERSION < '1.9.0'
|
8
|
+
class String
|
9
|
+
def ord; self[0]; end;
|
10
|
+
end
|
11
|
+
end
|
8
12
|
|
9
13
|
# RSmaz is too small to bother splitting into separate files, so I'll be lazy..
|
10
14
|
module RSmaz
|
11
|
-
VERSION = '0.0.
|
15
|
+
VERSION = '0.0.3'
|
12
16
|
|
13
17
|
# From http://github.com/antirez/smaz/blob/4b913924e15b7663ee0240af19cedfd266052aab/smaz.c
|
14
18
|
CODEBOOK = ["\002s,\266", "\003had\232\002leW", "\003on \216", "", "\001yS",
|
@@ -84,50 +88,62 @@ module RSmaz
|
|
84
88
|
|
85
89
|
# Compress a string to Smaz encoding
|
86
90
|
def self.compress(input)
|
87
|
-
h1, h2, h3 = 0
|
88
91
|
verb = ""
|
89
92
|
out = ""
|
93
|
+
input = input.dup
|
90
94
|
|
91
95
|
# This algorithm has been ported to Ruby from C and only
|
92
96
|
# slightly Rubyized.. still a lonnnng way to go. Wanna give it a crack?
|
93
97
|
while (input && input.length > 0)
|
94
|
-
h1 = h2 = input
|
95
|
-
h2 += input[1].ord if (input.length > 1)
|
96
|
-
h3 = h2 ^ input[2].ord
|
98
|
+
h1 = h2 = input.ord << 3
|
99
|
+
h2 += input[1,1].ord if (input.length > 1)
|
100
|
+
h3 = (input.length > 2) ? h2 ^ input[2,1].ord : 0
|
97
101
|
q = []
|
98
102
|
|
99
|
-
[input.length, 7].min.downto(1) do |
|
100
|
-
slot = if
|
101
|
-
CODEBOOK[h1 % 241]
|
102
|
-
elsif
|
103
|
-
CODEBOOK[h2 % 241]
|
103
|
+
[input.length, 7].min.downto(1) do |j|
|
104
|
+
slot = if j == 1
|
105
|
+
CODEBOOK[h1 % 241].dup
|
106
|
+
elsif j == 2
|
107
|
+
CODEBOOK[h2 % 241].dup
|
104
108
|
else
|
105
|
-
CODEBOOK[h3 % 241]
|
109
|
+
CODEBOOK[h3 % 241].dup
|
106
110
|
end
|
107
111
|
|
108
112
|
while (slot && slot[0]) do
|
109
|
-
if (slot
|
113
|
+
if (slot.ord == j && (slot[1,j] == input[0,j]))
|
110
114
|
# Match found in hash table
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
+
|
116
|
+
# Add verbatim data, if any (yes, it's quicker with the check)
|
117
|
+
unless verb.empty?
|
118
|
+
q << verb
|
119
|
+
verb = ""
|
120
|
+
end
|
121
|
+
|
122
|
+
# Add encoded data and ditch unnecessary part of input string
|
123
|
+
q << slot[slot.ord+1,1].ord
|
124
|
+
input.slice!(0..j-1)
|
125
|
+
break
|
115
126
|
else
|
116
|
-
slot = slot[
|
127
|
+
# This in-place hack is quicker than slot = slot[1..-1]
|
128
|
+
|
129
|
+
slot.reverse!.chop!.reverse!
|
130
|
+
#slot.slice!(0)
|
131
|
+
#slot[0] = ''
|
117
132
|
end
|
118
133
|
end
|
119
134
|
end
|
120
135
|
|
121
136
|
# No queue? It means we matched nothing, so add the current byte to the verbatim buffer
|
122
137
|
if q.empty?
|
123
|
-
verb << input[0]
|
124
|
-
input
|
138
|
+
verb << input[0,1] if input[0]
|
139
|
+
input.slice!(0)
|
125
140
|
end
|
126
141
|
|
127
142
|
# If the verbatim buffer is getting too long or we're at the end of the doc
|
128
143
|
# throw the verbatim buffer to the output queue
|
129
144
|
q << verb if verb.length == 256 || (verb.length > 0 && input.length == 0)
|
130
145
|
|
146
|
+
# Turn the queue into correctly encoded data
|
131
147
|
out << q.collect do |item|
|
132
148
|
if item.class == String && item.length == 1
|
133
149
|
"\376" + item
|
@@ -148,11 +164,11 @@ module RSmaz
|
|
148
164
|
out = ""
|
149
165
|
s = StringScanner.new(input)
|
150
166
|
until s.eos?
|
151
|
-
bv = s.get_byte
|
167
|
+
bv = s.get_byte.ord
|
152
168
|
if (bv == 254)
|
153
169
|
out << s.get_byte
|
154
170
|
elsif (bv == 255)
|
155
|
-
len = s.get_byte
|
171
|
+
len = s.get_byte.ord + 1
|
156
172
|
len.times do
|
157
173
|
out << s.get_byte
|
158
174
|
end
|
data/spec/rsmaz_spec.rb
CHANGED
@@ -2,12 +2,21 @@ require File.dirname(__FILE__) + '/spec_helper.rb'
|
|
2
2
|
|
3
3
|
describe RSmaz do
|
4
4
|
|
5
|
+
before(:each) do
|
6
|
+
# Do some memory leak checking
|
7
|
+
puts "\nMemory used: #{memory_usage}K"
|
8
|
+
end
|
9
|
+
|
10
|
+
after(:each) do
|
11
|
+
puts "\nMemory used: #{memory_usage}K"
|
12
|
+
end
|
13
|
+
|
5
14
|
it "should compress 'the' to one byte" do
|
6
15
|
RSmaz.compress("the").length.should == 1
|
7
16
|
end
|
8
17
|
|
9
|
-
it "should compress 'thex' to
|
10
|
-
RSmaz.compress("thex").length.should ==
|
18
|
+
it "should compress 'thex' to two bytes" do
|
19
|
+
RSmaz.compress("thex").length.should == 2
|
11
20
|
end
|
12
21
|
|
13
22
|
it "should compress and decompress strings back to the same string" do
|
@@ -23,4 +32,25 @@ describe RSmaz do
|
|
23
32
|
it "should properly decode a reference compression (so the internal coding doesn't change)" do
|
24
33
|
RSmaz.decompress("\020\230`A\376o\f\026\030").should == "hello world"
|
25
34
|
end
|
35
|
+
|
36
|
+
it "should compress to the same extent as the reference smaz implementation" do
|
37
|
+
RSmaz.compress("foobar").length.should == 4
|
38
|
+
RSmaz.compress("Nel mezzo del cammin di nostra vita, mi ritrovai in una selva oscura").length.should == 46
|
39
|
+
end
|
40
|
+
|
41
|
+
it "should compress and decompress lots of random strings without issues" do
|
42
|
+
100.times do
|
43
|
+
str = (1..100).map { |a| (rand(26)+97).chr }.join
|
44
|
+
RSmaz.decompress(RSmaz.compress(str)).length.should == str.length
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
it "should compress and decompress lots of random strings without issues (again)" do
|
49
|
+
100.times do
|
50
|
+
str = (1..100).map { |a| (rand(26)+97).chr }.join
|
51
|
+
RSmaz.decompress(RSmaz.compress(str)).length.should == str.length
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
|
26
56
|
end
|
data/spec/spec_helper.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: peterc-rsmaz
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Peter Cooper
|
@@ -32,7 +32,7 @@ dependencies:
|
|
32
32
|
- !ruby/object:Gem::Version
|
33
33
|
version: 1.8.0
|
34
34
|
version:
|
35
|
-
description: "Short String Compression for Ruby. RSmaz is a pure-Ruby port of the Smaz short string compression algorithm by Salvatore Sanfilippo and released as a C library at: http://github.com/antirez/smaz/tree/master I've done some initial cleanup of a pure Ruby->C port, but this is not yet complete. It does pass the specs, however!"
|
35
|
+
description: "Short String Compression for Ruby. RSmaz is a pure-Ruby port of the Smaz short string compression algorithm by Salvatore Sanfilippo and released as a C library at: http://github.com/antirez/smaz/tree/master I've done some initial cleanup of a pure Ruby->C port, but this is not yet complete. It does pass the specs, however! Feel free to clean it up as it's a bit memory inefficient right now... :)"
|
36
36
|
email:
|
37
37
|
- pcooper@petercooper.co.uk
|
38
38
|
executables: []
|