peterc-rsmaz 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/History.txt CHANGED
@@ -1,3 +1,10 @@
1
+ == 0.0.3 2009-04-02
2
+
3
+ * Now produces same output as reference smaz implementation
4
+ * Better compression
5
+ * Performance tweaks
6
+ * Initial steps of Rubyizing the algorithm
7
+
1
8
  == 0.0.2 2009-04-02
2
9
 
3
10
  * Ruby 1.9 support added
data/README.rdoc CHANGED
@@ -12,7 +12,8 @@
12
12
  http://github.com/antirez/smaz/tree/master
13
13
 
14
14
  I've done some initial cleanup of a pure Ruby->C port, but this
15
- is not yet complete. It does pass the specs, however!
15
+ is not yet complete. It does pass the specs, however! Feel free
16
+ to clean it up as it's a bit memory inefficient right now... :)
16
17
 
17
18
  == REQUIREMENTS:
18
19
 
@@ -20,13 +21,13 @@
20
21
  * Some strings to compress
21
22
  * A sense of humor
22
23
 
23
- == INSTALL:
24
+ == USAGE:
24
25
 
25
- * require 'rsmaz'
26
+ require 'rsmaz'
26
27
  r = RSmaz.compress("whatever")
27
28
  puts RSmaz.decompress(r)
28
29
 
29
- == LICENSE:
30
+ == RSMAZ LICENSE:
30
31
 
31
32
  Copyright (c) 2009 Peter Cooper, Salvatore Sanfilippo
32
33
 
data/lib/rsmaz.rb CHANGED
@@ -4,11 +4,15 @@ $:.unshift(File.dirname(__FILE__)) unless
4
4
  require 'strscan'
5
5
 
6
6
  # Silly hack to allow usage of String#ord in Ruby 1.9 without breaking Ruby 1.8
7
- class Fixnum; def ord; self; end; end
7
+ if RUBY_VERSION < '1.9.0'
8
+ class String
9
+ def ord; self[0]; end;
10
+ end
11
+ end
8
12
 
9
13
  # RSmaz is too small to bother splitting into separate files, so I'll be lazy..
10
14
  module RSmaz
11
- VERSION = '0.0.2'
15
+ VERSION = '0.0.3'
12
16
 
13
17
  # From http://github.com/antirez/smaz/blob/4b913924e15b7663ee0240af19cedfd266052aab/smaz.c
14
18
  CODEBOOK = ["\002s,\266", "\003had\232\002leW", "\003on \216", "", "\001yS",
@@ -84,50 +88,62 @@ module RSmaz
84
88
 
85
89
  # Compress a string to Smaz encoding
86
90
  def self.compress(input)
87
- h1, h2, h3 = 0
88
91
  verb = ""
89
92
  out = ""
93
+ input = input.dup
90
94
 
91
95
  # This algorithm has been ported to Ruby from C and only
92
96
  # slightly Rubyized.. still a lonnnng way to go. Wanna give it a crack?
93
97
  while (input && input.length > 0)
94
- h1 = h2 = input[0].ord << 3
95
- h2 += input[1].ord if (input.length > 1)
96
- h3 = h2 ^ input[2].ord if (input.length > 2)
98
+ h1 = h2 = input.ord << 3
99
+ h2 += input[1,1].ord if (input.length > 1)
100
+ h3 = (input.length > 2) ? h2 ^ input[2,1].ord : 0
97
101
  q = []
98
102
 
99
- [input.length, 7].min.downto(1) do |j2|
100
- slot = if j2 == 1
101
- CODEBOOK[h1 % 241]
102
- elsif j2 == 2
103
- CODEBOOK[h2 % 241]
103
+ [input.length, 7].min.downto(1) do |j|
104
+ slot = if j == 1
105
+ CODEBOOK[h1 % 241].dup
106
+ elsif j == 2
107
+ CODEBOOK[h2 % 241].dup
104
108
  else
105
- CODEBOOK[h3 % 241]
109
+ CODEBOOK[h3 % 241].dup
106
110
  end
107
111
 
108
112
  while (slot && slot[0]) do
109
- if (slot[0].ord == j2 && (slot[1,j2] == input[0,j2]))
113
+ if (slot.ord == j && (slot[1,j] == input[0,j]))
110
114
  # Match found in hash table
111
- q << verb
112
- verb = ""
113
- q << slot[slot[0].ord+1].ord
114
- input = input[j2..-1]
115
+
116
+ # Add verbatim data, if any (yes, it's quicker with the check)
117
+ unless verb.empty?
118
+ q << verb
119
+ verb = ""
120
+ end
121
+
122
+ # Add encoded data and ditch unnecessary part of input string
123
+ q << slot[slot.ord+1,1].ord
124
+ input.slice!(0..j-1)
125
+ break
115
126
  else
116
- slot = slot[2..-1]
127
+ # This in-place hack is quicker than slot = slot[1..-1]
128
+
129
+ slot.reverse!.chop!.reverse!
130
+ #slot.slice!(0)
131
+ #slot[0] = ''
117
132
  end
118
133
  end
119
134
  end
120
135
 
121
136
  # No queue? It means we matched nothing, so add the current byte to the verbatim buffer
122
137
  if q.empty?
123
- verb << input[0].ord if input[0]
124
- input = input[1..-1]
138
+ verb << input[0,1] if input[0]
139
+ input.slice!(0)
125
140
  end
126
141
 
127
142
  # If the verbatim buffer is getting too long or we're at the end of the doc
128
143
  # throw the verbatim buffer to the output queue
129
144
  q << verb if verb.length == 256 || (verb.length > 0 && input.length == 0)
130
145
 
146
+ # Turn the queue into correctly encoded data
131
147
  out << q.collect do |item|
132
148
  if item.class == String && item.length == 1
133
149
  "\376" + item
@@ -148,11 +164,11 @@ module RSmaz
148
164
  out = ""
149
165
  s = StringScanner.new(input)
150
166
  until s.eos?
151
- bv = s.get_byte[0].ord
167
+ bv = s.get_byte.ord
152
168
  if (bv == 254)
153
169
  out << s.get_byte
154
170
  elsif (bv == 255)
155
- len = s.get_byte[0].ord + 1
171
+ len = s.get_byte.ord + 1
156
172
  len.times do
157
173
  out << s.get_byte
158
174
  end
data/spec/rsmaz_spec.rb CHANGED
@@ -2,12 +2,21 @@ require File.dirname(__FILE__) + '/spec_helper.rb'
2
2
 
3
3
  describe RSmaz do
4
4
 
5
+ before(:each) do
6
+ # Do some memory leak checking
7
+ puts "\nMemory used: #{memory_usage}K"
8
+ end
9
+
10
+ after(:each) do
11
+ puts "\nMemory used: #{memory_usage}K"
12
+ end
13
+
5
14
  it "should compress 'the' to one byte" do
6
15
  RSmaz.compress("the").length.should == 1
7
16
  end
8
17
 
9
- it "should compress 'thex' to three bytes" do
10
- RSmaz.compress("thex").length.should == 3
18
+ it "should compress 'thex' to two bytes" do
19
+ RSmaz.compress("thex").length.should == 2
11
20
  end
12
21
 
13
22
  it "should compress and decompress strings back to the same string" do
@@ -23,4 +32,25 @@ describe RSmaz do
23
32
  it "should properly decode a reference compression (so the internal coding doesn't change)" do
24
33
  RSmaz.decompress("\020\230`A\376o\f\026\030").should == "hello world"
25
34
  end
35
+
36
+ it "should compress to the same extent as the reference smaz implementation" do
37
+ RSmaz.compress("foobar").length.should == 4
38
+ RSmaz.compress("Nel mezzo del cammin di nostra vita, mi ritrovai in una selva oscura").length.should == 46
39
+ end
40
+
41
+ it "should compress and decompress lots of random strings without issues" do
42
+ 100.times do
43
+ str = (1..100).map { |a| (rand(26)+97).chr }.join
44
+ RSmaz.decompress(RSmaz.compress(str)).length.should == str.length
45
+ end
46
+ end
47
+
48
+ it "should compress and decompress lots of random strings without issues (again)" do
49
+ 100.times do
50
+ str = (1..100).map { |a| (rand(26)+97).chr }.join
51
+ RSmaz.decompress(RSmaz.compress(str)).length.should == str.length
52
+ end
53
+ end
54
+
55
+
26
56
  end
data/spec/spec_helper.rb CHANGED
@@ -8,3 +8,7 @@ end
8
8
 
9
9
  $:.unshift(File.dirname(__FILE__) + '/../lib')
10
10
  require 'rsmaz'
11
+
12
+ def memory_usage
13
+ `ps -Orss #{Process.pid} | tail -1`.scan(/\d+/)[1].to_i rescue 0
14
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: peterc-rsmaz
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Peter Cooper
@@ -32,7 +32,7 @@ dependencies:
32
32
  - !ruby/object:Gem::Version
33
33
  version: 1.8.0
34
34
  version:
35
- description: "Short String Compression for Ruby. RSmaz is a pure-Ruby port of the Smaz short string compression algorithm by Salvatore Sanfilippo and released as a C library at: http://github.com/antirez/smaz/tree/master I've done some initial cleanup of a pure Ruby->C port, but this is not yet complete. It does pass the specs, however!"
35
+ description: "Short String Compression for Ruby. RSmaz is a pure-Ruby port of the Smaz short string compression algorithm by Salvatore Sanfilippo and released as a C library at: http://github.com/antirez/smaz/tree/master I've done some initial cleanup of a pure Ruby->C port, but this is not yet complete. It does pass the specs, however! Feel free to clean it up as it's a bit memory inefficient right now... :)"
36
36
  email:
37
37
  - pcooper@petercooper.co.uk
38
38
  executables: []