peterc-rsmaz 0.0.2 → 0.0.3

Sign up to get free protection for your applications and to get access to all the features.
data/History.txt CHANGED
@@ -1,3 +1,10 @@
1
+ == 0.0.3 2009-04-02
2
+
3
+ * Now produces same output as reference smaz implementation
4
+ * Better compression
5
+ * Performance tweaks
6
+ * Initial steps of Rubyizing the algorithm
7
+
1
8
  == 0.0.2 2009-04-02
2
9
 
3
10
  * Ruby 1.9 support added
data/README.rdoc CHANGED
@@ -12,7 +12,8 @@
12
12
  http://github.com/antirez/smaz/tree/master
13
13
 
14
14
  I've done some initial cleanup of a pure Ruby->C port, but this
15
- is not yet complete. It does pass the specs, however!
15
+ is not yet complete. It does pass the specs, however! Feel free
16
+ to clean it up as it's a bit memory inefficient right now... :)
16
17
 
17
18
  == REQUIREMENTS:
18
19
 
@@ -20,13 +21,13 @@
20
21
  * Some strings to compress
21
22
  * A sense of humor
22
23
 
23
- == INSTALL:
24
+ == USAGE:
24
25
 
25
- * require 'rsmaz'
26
+ require 'rsmaz'
26
27
  r = RSmaz.compress("whatever")
27
28
  puts RSmaz.decompress(r)
28
29
 
29
- == LICENSE:
30
+ == RSMAZ LICENSE:
30
31
 
31
32
  Copyright (c) 2009 Peter Cooper, Salvatore Sanfilippo
32
33
 
data/lib/rsmaz.rb CHANGED
@@ -4,11 +4,15 @@ $:.unshift(File.dirname(__FILE__)) unless
4
4
  require 'strscan'
5
5
 
6
6
  # Silly hack to allow usage of String#ord in Ruby 1.9 without breaking Ruby 1.8
7
- class Fixnum; def ord; self; end; end
7
+ if RUBY_VERSION < '1.9.0'
8
+ class String
9
+ def ord; self[0]; end;
10
+ end
11
+ end
8
12
 
9
13
  # RSmaz is too small to bother splitting into separate files, so I'll be lazy..
10
14
  module RSmaz
11
- VERSION = '0.0.2'
15
+ VERSION = '0.0.3'
12
16
 
13
17
  # From http://github.com/antirez/smaz/blob/4b913924e15b7663ee0240af19cedfd266052aab/smaz.c
14
18
  CODEBOOK = ["\002s,\266", "\003had\232\002leW", "\003on \216", "", "\001yS",
@@ -84,50 +88,62 @@ module RSmaz
84
88
 
85
89
  # Compress a string to Smaz encoding
86
90
  def self.compress(input)
87
- h1, h2, h3 = 0
88
91
  verb = ""
89
92
  out = ""
93
+ input = input.dup
90
94
 
91
95
  # This algorithm has been ported to Ruby from C and only
92
96
  # slightly Rubyized.. still a lonnnng way to go. Wanna give it a crack?
93
97
  while (input && input.length > 0)
94
- h1 = h2 = input[0].ord << 3
95
- h2 += input[1].ord if (input.length > 1)
96
- h3 = h2 ^ input[2].ord if (input.length > 2)
98
+ h1 = h2 = input.ord << 3
99
+ h2 += input[1,1].ord if (input.length > 1)
100
+ h3 = (input.length > 2) ? h2 ^ input[2,1].ord : 0
97
101
  q = []
98
102
 
99
- [input.length, 7].min.downto(1) do |j2|
100
- slot = if j2 == 1
101
- CODEBOOK[h1 % 241]
102
- elsif j2 == 2
103
- CODEBOOK[h2 % 241]
103
+ [input.length, 7].min.downto(1) do |j|
104
+ slot = if j == 1
105
+ CODEBOOK[h1 % 241].dup
106
+ elsif j == 2
107
+ CODEBOOK[h2 % 241].dup
104
108
  else
105
- CODEBOOK[h3 % 241]
109
+ CODEBOOK[h3 % 241].dup
106
110
  end
107
111
 
108
112
  while (slot && slot[0]) do
109
- if (slot[0].ord == j2 && (slot[1,j2] == input[0,j2]))
113
+ if (slot.ord == j && (slot[1,j] == input[0,j]))
110
114
  # Match found in hash table
111
- q << verb
112
- verb = ""
113
- q << slot[slot[0].ord+1].ord
114
- input = input[j2..-1]
115
+
116
+ # Add verbatim data, if any (yes, it's quicker with the check)
117
+ unless verb.empty?
118
+ q << verb
119
+ verb = ""
120
+ end
121
+
122
+ # Add encoded data and ditch unnecessary part of input string
123
+ q << slot[slot.ord+1,1].ord
124
+ input.slice!(0..j-1)
125
+ break
115
126
  else
116
- slot = slot[2..-1]
127
+ # This in-place hack is quicker than slot = slot[1..-1]
128
+
129
+ slot.reverse!.chop!.reverse!
130
+ #slot.slice!(0)
131
+ #slot[0] = ''
117
132
  end
118
133
  end
119
134
  end
120
135
 
121
136
  # No queue? It means we matched nothing, so add the current byte to the verbatim buffer
122
137
  if q.empty?
123
- verb << input[0].ord if input[0]
124
- input = input[1..-1]
138
+ verb << input[0,1] if input[0]
139
+ input.slice!(0)
125
140
  end
126
141
 
127
142
  # If the verbatim buffer is getting too long or we're at the end of the doc
128
143
  # throw the verbatim buffer to the output queue
129
144
  q << verb if verb.length == 256 || (verb.length > 0 && input.length == 0)
130
145
 
146
+ # Turn the queue into correctly encoded data
131
147
  out << q.collect do |item|
132
148
  if item.class == String && item.length == 1
133
149
  "\376" + item
@@ -148,11 +164,11 @@ module RSmaz
148
164
  out = ""
149
165
  s = StringScanner.new(input)
150
166
  until s.eos?
151
- bv = s.get_byte[0].ord
167
+ bv = s.get_byte.ord
152
168
  if (bv == 254)
153
169
  out << s.get_byte
154
170
  elsif (bv == 255)
155
- len = s.get_byte[0].ord + 1
171
+ len = s.get_byte.ord + 1
156
172
  len.times do
157
173
  out << s.get_byte
158
174
  end
data/spec/rsmaz_spec.rb CHANGED
@@ -2,12 +2,21 @@ require File.dirname(__FILE__) + '/spec_helper.rb'
2
2
 
3
3
  describe RSmaz do
4
4
 
5
+ before(:each) do
6
+ # Do some memory leak checking
7
+ puts "\nMemory used: #{memory_usage}K"
8
+ end
9
+
10
+ after(:each) do
11
+ puts "\nMemory used: #{memory_usage}K"
12
+ end
13
+
5
14
  it "should compress 'the' to one byte" do
6
15
  RSmaz.compress("the").length.should == 1
7
16
  end
8
17
 
9
- it "should compress 'thex' to three bytes" do
10
- RSmaz.compress("thex").length.should == 3
18
+ it "should compress 'thex' to two bytes" do
19
+ RSmaz.compress("thex").length.should == 2
11
20
  end
12
21
 
13
22
  it "should compress and decompress strings back to the same string" do
@@ -23,4 +32,25 @@ describe RSmaz do
23
32
  it "should properly decode a reference compression (so the internal coding doesn't change)" do
24
33
  RSmaz.decompress("\020\230`A\376o\f\026\030").should == "hello world"
25
34
  end
35
+
36
+ it "should compress to the same extent as the reference smaz implementation" do
37
+ RSmaz.compress("foobar").length.should == 4
38
+ RSmaz.compress("Nel mezzo del cammin di nostra vita, mi ritrovai in una selva oscura").length.should == 46
39
+ end
40
+
41
+ it "should compress and decompress lots of random strings without issues" do
42
+ 100.times do
43
+ str = (1..100).map { |a| (rand(26)+97).chr }.join
44
+ RSmaz.decompress(RSmaz.compress(str)).length.should == str.length
45
+ end
46
+ end
47
+
48
+ it "should compress and decompress lots of random strings without issues (again)" do
49
+ 100.times do
50
+ str = (1..100).map { |a| (rand(26)+97).chr }.join
51
+ RSmaz.decompress(RSmaz.compress(str)).length.should == str.length
52
+ end
53
+ end
54
+
55
+
26
56
  end
data/spec/spec_helper.rb CHANGED
@@ -8,3 +8,7 @@ end
8
8
 
9
9
  $:.unshift(File.dirname(__FILE__) + '/../lib')
10
10
  require 'rsmaz'
11
+
12
+ def memory_usage
13
+ `ps -Orss #{Process.pid} | tail -1`.scan(/\d+/)[1].to_i rescue 0
14
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: peterc-rsmaz
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Peter Cooper
@@ -32,7 +32,7 @@ dependencies:
32
32
  - !ruby/object:Gem::Version
33
33
  version: 1.8.0
34
34
  version:
35
- description: "Short String Compression for Ruby. RSmaz is a pure-Ruby port of the Smaz short string compression algorithm by Salvatore Sanfilippo and released as a C library at: http://github.com/antirez/smaz/tree/master I've done some initial cleanup of a pure Ruby->C port, but this is not yet complete. It does pass the specs, however!"
35
+ description: "Short String Compression for Ruby. RSmaz is a pure-Ruby port of the Smaz short string compression algorithm by Salvatore Sanfilippo and released as a C library at: http://github.com/antirez/smaz/tree/master I've done some initial cleanup of a pure Ruby->C port, but this is not yet complete. It does pass the specs, however! Feel free to clean it up as it's a bit memory inefficient right now... :)"
36
36
  email:
37
37
  - pcooper@petercooper.co.uk
38
38
  executables: []