RubyGems - string-eater - Versions diffs - 0.1.0 → 0.2.0 - Mend

string-eater 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

data/README.md CHANGED Viewed

@@ -18,7 +18,11 @@ implemenatations that provide support for C extensions.
 ## Installation
-We'll publish this gem soon, but for now you can clone and install as
+If your system is set up to allow it, you can just do
+    gem install string-eater
+Or,  if you prefer a more hands-on approach or want to hack at the source:
     git clone git://github.com/dantswain/string-eater.git
     cd string-eater

data/lib/c-tokenizer.rb CHANGED Viewed

@@ -14,6 +14,11 @@ class StringEater::CTokenizer
     self.tokens << StringEater::Token::new_separator(tokens)
   end
+  # This is very slow, only do it when necessary
+  def self.dup_tokens
+    Marshal.load(Marshal.dump(tokens))
+  end
   def initialize
     refresh_tokens
   end
@@ -22,8 +27,35 @@ class StringEater::CTokenizer
     @tokens
   end
+  def extract_all_fields
+    @token_filter = lambda do |t|
+      t.opts[:extract] = true if t.name
+    end
+    refresh_tokens
+  end
+  def extract_no_fields
+    @token_filter = lambda do |t|
+      t.opts[:extract] = false if t.name
+    end
+    refresh_tokens
+  end
+  def extract_fields *fields
+    @token_filter = lambda do |t|
+      t.opts[:extract] = fields.include?(t.name)
+    end
+    refresh_tokens
+  end
+  # This is very slow, only do it once before processing
   def refresh_tokens
-    @tokens = self.class.tokens
+    @tokens = self.class.dup_tokens
+    if @token_filter
+      @tokens.each{|t| @token_filter.call(t)}
+    end
     tokens_to_find = tokens.each_with_index.map do |t, i|
       [i, t.string] if t.string
     end.compact
@@ -37,6 +69,8 @@ class StringEater::CTokenizer
     @tokens_to_extract_indexes = tokens_to_extract.map{|t| t[0]}
     @tokens_to_extract_names = tokens.map{|t| t.name}
+    @have_tokens_to_extract = (@tokens_to_extract_indexes.size > 0)
   end
   def describe_line
@@ -53,7 +87,7 @@ class StringEater::CTokenizer
     @extracted_tokens ||= {}
     @extracted_tokens.clear
-    tokens.first.breakpoints[0] = 0
+    return unless @have_tokens_to_extract
     @extracted_tokens = ctokenize!(@string,
                                    @tokens_to_find_indexes,

data/lib/version.rb CHANGED Viewed

@@ -1,7 +1,7 @@
 module StringEater
   module VERSION
     MAJOR = 0
-    MINOR = 1
+    MINOR = 2
     PATCH = 0
     PRE   = nil
     STRING = [MAJOR, MINOR, PATCH, PRE].compact.join('.')

data/spec/string_eater_spec.rb CHANGED Viewed

@@ -24,6 +24,7 @@ describe Example1 do
     @tokenizer = Example1.new
     @str1 = "foo bar|baz"
     @first_word1 = "foo"
+    @second_word1 = "bar"
     @third_word1 = "baz"
     @bp1 = [0, 3,4,7,8,11]
   end
@@ -34,6 +35,36 @@ describe Example1 do
     end
   end
+  describe "#extract_all_fields" do
+    it "should extract all of the fields" do
+      @tokenizer.extract_all_fields
+      @tokenizer.tokenize!(@str1)
+      @tokenizer.first_word.should == @first_word1
+      @tokenizer.second_word.should == @second_word1
+      @tokenizer.third_word.should == @third_word1
+    end
+  end
+  describe "#extract_no_fields" do
+    it "should not extract any of the fields" do
+      @tokenizer.extract_no_fields
+      @tokenizer.tokenize!(@str1)
+      @tokenizer.first_word.should be_nil
+      @tokenizer.second_word.should be_nil
+      @tokenizer.third_word.should be_nil
+    end
+  end
+  describe "#extract_fields" do
+    it "should allow us to set which fields get extracted" do
+      @tokenizer.extract_fields :second_word
+      @tokenizer.tokenize!(@str1)
+      @tokenizer.first_word.should be_nil
+      @tokenizer.second_word.should == @second_word1
+      @tokenizer.third_word.should be_nil
+    end
+  end
   describe "tokenize!" do
     it "should return itself" do
       @tokenizer.tokenize!(@str1).should == @tokenizer

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: string-eater
 version: !ruby/object:Gem::Version
-  version: 0.1.0
+  version: 0.2.0
   prerelease:
 platform: ruby
 authors:
@@ -9,7 +9,7 @@ authors:
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2012-08-20 00:00:00.000000000 Z
+date: 2012-08-21 00:00:00.000000000 Z
 dependencies: []
 description: Fast string tokenizer. Nom strings.
 email:
@@ -64,3 +64,4 @@ test_files:
 - spec/nginx_spec.rb
 - spec/spec_helper.rb
 - spec/string_eater_spec.rb
+has_rdoc: