RubyGems - peterc-rsmaz - Versions diffs - 0.0.2 → 0.0.3 - Mend

peterc-rsmaz 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

data/History.txt CHANGED Viewed

@@ -1,3 +1,10 @@
+== 0.0.3 2009-04-02
+* Now produces same output as reference smaz implementation
+* Better compression
+* Performance tweaks
+* Initial steps of Rubyizing the algorithm
 == 0.0.2 2009-04-02
 * Ruby 1.9 support added

data/README.rdoc CHANGED Viewed

@@ -12,7 +12,8 @@
   http://github.com/antirez/smaz/tree/master
   I've done some initial cleanup of a pure Ruby->C port, but this
-  is not yet complete. It does pass the specs, however!
+  is not yet complete. It does pass the specs, however! Feel free
+  to clean it up as it's a bit memory inefficient right now... :)
 == REQUIREMENTS:
@@ -20,13 +21,13 @@
 * Some strings to compress
 * A sense of humor
-== INSTALL:
+== USAGE:
-* require 'rsmaz'
+  require 'rsmaz'
   r = RSmaz.compress("whatever")
   puts RSmaz.decompress(r)
-== LICENSE:
+== RSMAZ LICENSE:
 Copyright (c) 2009 Peter Cooper, Salvatore Sanfilippo

data/lib/rsmaz.rb CHANGED Viewed

@@ -4,11 +4,15 @@ $:.unshift(File.dirname(__FILE__)) unless
 require 'strscan'
 # Silly hack to allow usage of String#ord in Ruby 1.9 without breaking Ruby 1.8
-class Fixnum; def ord; self; end; end
+if RUBY_VERSION < '1.9.0'
+  class String
+    def ord; self[0]; end;
+  end
+end
 # RSmaz is too small to bother splitting into separate files, so I'll be lazy..
 module RSmaz
-  VERSION = '0.0.2'
+  VERSION = '0.0.3'
   # From http://github.com/antirez/smaz/blob/4b913924e15b7663ee0240af19cedfd266052aab/smaz.c
   CODEBOOK = ["\002s,\266", "\003had\232\002leW", "\003on \216", "", "\001yS",
@@ -84,50 +88,62 @@ module RSmaz
   # Compress a string to Smaz encoding
   def self.compress(input)
-    h1, h2, h3 = 0
     verb = ""
     out = ""
+    input = input.dup
     # This algorithm has been ported to Ruby from C and only
     # slightly Rubyized.. still a lonnnng way to go. Wanna give it a crack?
     while (input && input.length > 0)
-      h1 = h2 = input[0].ord << 3
-      h2 += input[1].ord if (input.length > 1)
-      h3 = h2 ^ input[2].ord if (input.length > 2)
+      h1 = h2 = input.ord << 3
+      h2 += input[1,1].ord if (input.length > 1)
+      h3 = (input.length > 2) ? h2 ^ input[2,1].ord : 0
       q = []
-      [input.length, 7].min.downto(1) do |j2|
-        slot = if j2 == 1
-          CODEBOOK[h1 % 241]
-        elsif j2 == 2
-          CODEBOOK[h2 % 241]
+      [input.length, 7].min.downto(1) do |j|
+        slot = if j == 1
+          CODEBOOK[h1 % 241].dup
+        elsif j == 2
+          CODEBOOK[h2 % 241].dup
         else
-          CODEBOOK[h3 % 241]
+          CODEBOOK[h3 % 241].dup
         end
         while (slot && slot[0]) do
-          if (slot[0].ord == j2 && (slot[1,j2] == input[0,j2]))
+          if (slot.ord == j && (slot[1,j] == input[0,j]))
             # Match found in hash table
-            q << verb
-            verb = ""
-            q << slot[slot[0].ord+1].ord
-            input = input[j2..-1]
+            # Add verbatim data, if any (yes, it's quicker with the check)
+            unless verb.empty?
+              q << verb
+              verb = ""
+            end
+            # Add encoded data and ditch unnecessary part of input string
+            q << slot[slot.ord+1,1].ord
+            input.slice!(0..j-1)
+            break
           else
-            slot = slot[2..-1]
+            # This in-place hack is quicker than slot = slot[1..-1]
+            slot.reverse!.chop!.reverse!
+            #slot.slice!(0)
+            #slot[0] = ''
           end
         end
       end
       # No queue? It means we matched nothing, so add the current byte to the verbatim buffer
       if q.empty?
-        verb << input[0].ord if input[0]
-        input = input[1..-1]
+        verb << input[0,1] if input[0]
+        input.slice!(0)
       end
       # If the verbatim buffer is getting too long or we're at the end of the doc
       # throw the verbatim buffer to the output queue
       q << verb if verb.length == 256 || (verb.length > 0 && input.length == 0)
+      # Turn the queue into correctly encoded data
       out << q.collect do |item|
         if item.class == String && item.length == 1
           "\376" + item
@@ -148,11 +164,11 @@ module RSmaz
     out = ""
     s = StringScanner.new(input)
     until s.eos?
-      bv = s.get_byte[0].ord
+      bv = s.get_byte.ord
       if (bv == 254)
         out << s.get_byte
       elsif (bv == 255)
-        len = s.get_byte[0].ord + 1
+        len = s.get_byte.ord + 1
         len.times do
           out << s.get_byte
         end

data/spec/rsmaz_spec.rb CHANGED Viewed

@@ -2,12 +2,21 @@ require File.dirname(__FILE__) + '/spec_helper.rb'
 describe RSmaz do
+  before(:each) do
+    # Do some memory leak checking
+    puts "\nMemory used: #{memory_usage}K"
+  end
+  after(:each) do
+    puts "\nMemory used: #{memory_usage}K"
+  end
   it "should compress 'the' to one byte" do
     RSmaz.compress("the").length.should == 1
   end
-  it "should compress 'thex' to three bytes" do
-    RSmaz.compress("thex").length.should == 3
+  it "should compress 'thex' to two bytes" do
+    RSmaz.compress("thex").length.should == 2
   end
   it "should compress and decompress strings back to the same string" do
@@ -23,4 +32,25 @@ describe RSmaz do
   it "should properly decode a reference compression (so the internal coding doesn't change)" do
     RSmaz.decompress("\020\230`A\376o\f\026\030").should == "hello world"
   end
+  it "should compress to the same extent as the reference smaz implementation" do
+    RSmaz.compress("foobar").length.should == 4
+    RSmaz.compress("Nel mezzo del cammin di nostra vita, mi ritrovai in una selva oscura").length.should == 46
+  end
+  it "should compress and decompress lots of random strings without issues" do
+    100.times do
+      str = (1..100).map { |a| (rand(26)+97).chr }.join
+      RSmaz.decompress(RSmaz.compress(str)).length.should == str.length
+    end
+  end
+  it "should compress and decompress lots of random strings without issues (again)" do
+    100.times do
+      str = (1..100).map { |a| (rand(26)+97).chr }.join
+      RSmaz.decompress(RSmaz.compress(str)).length.should == str.length
+    end
+  end
 end

data/spec/spec_helper.rb CHANGED Viewed

@@ -8,3 +8,7 @@ end
 $:.unshift(File.dirname(__FILE__) + '/../lib')
 require 'rsmaz'
+def memory_usage
+  `ps -Orss #{Process.pid} | tail -1`.scan(/\d+/)[1].to_i rescue 0
+end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: peterc-rsmaz
 version: !ruby/object:Gem::Version
-  version: 0.0.2
+  version: 0.0.3
 platform: ruby
 authors:
 - Peter Cooper
@@ -32,7 +32,7 @@ dependencies:
       - !ruby/object:Gem::Version
         version: 1.8.0
     version:
-description: "Short String Compression for Ruby.  RSmaz is a pure-Ruby port of the Smaz short string compression algorithm by Salvatore Sanfilippo and released as a C library at: http://github.com/antirez/smaz/tree/master  I've done some initial cleanup of a pure Ruby->C port, but this is not yet complete. It does pass the specs, however!"
+description: "Short String Compression for Ruby.  RSmaz is a pure-Ruby port of the Smaz short string compression algorithm by Salvatore Sanfilippo and released as a C library at: http://github.com/antirez/smaz/tree/master  I've done some initial cleanup of a pure Ruby->C port, but this is not yet complete. It does pass the specs, however! Feel free to clean it up as it's a bit memory inefficient right now... :)"
 email:
 - pcooper@petercooper.co.uk
 executables: []