string-eater 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.md CHANGED
@@ -18,7 +18,11 @@ implemenatations that provide support for C extensions.
18
18
 
19
19
  ## Installation
20
20
 
21
- We'll publish this gem soon, but for now you can clone and install as
21
+ If your system is set up to allow it, you can just do
22
+
23
+ gem install string-eater
24
+
25
+ Or, if you prefer a more hands-on approach or want to hack at the source:
22
26
 
23
27
  git clone git://github.com/dantswain/string-eater.git
24
28
  cd string-eater
data/lib/c-tokenizer.rb CHANGED
@@ -14,6 +14,11 @@ class StringEater::CTokenizer
14
14
  self.tokens << StringEater::Token::new_separator(tokens)
15
15
  end
16
16
 
17
+ # This is very slow, only do it when necessary
18
+ def self.dup_tokens
19
+ Marshal.load(Marshal.dump(tokens))
20
+ end
21
+
17
22
  def initialize
18
23
  refresh_tokens
19
24
  end
@@ -22,8 +27,35 @@ class StringEater::CTokenizer
22
27
  @tokens
23
28
  end
24
29
 
30
+ def extract_all_fields
31
+ @token_filter = lambda do |t|
32
+ t.opts[:extract] = true if t.name
33
+ end
34
+ refresh_tokens
35
+ end
36
+
37
+ def extract_no_fields
38
+ @token_filter = lambda do |t|
39
+ t.opts[:extract] = false if t.name
40
+ end
41
+ refresh_tokens
42
+ end
43
+
44
+ def extract_fields *fields
45
+ @token_filter = lambda do |t|
46
+ t.opts[:extract] = fields.include?(t.name)
47
+ end
48
+ refresh_tokens
49
+ end
50
+
51
+ # This is very slow, only do it once before processing
25
52
  def refresh_tokens
26
- @tokens = self.class.tokens
53
+ @tokens = self.class.dup_tokens
54
+
55
+ if @token_filter
56
+ @tokens.each{|t| @token_filter.call(t)}
57
+ end
58
+
27
59
  tokens_to_find = tokens.each_with_index.map do |t, i|
28
60
  [i, t.string] if t.string
29
61
  end.compact
@@ -37,6 +69,8 @@ class StringEater::CTokenizer
37
69
 
38
70
  @tokens_to_extract_indexes = tokens_to_extract.map{|t| t[0]}
39
71
  @tokens_to_extract_names = tokens.map{|t| t.name}
72
+
73
+ @have_tokens_to_extract = (@tokens_to_extract_indexes.size > 0)
40
74
  end
41
75
 
42
76
  def describe_line
@@ -53,7 +87,7 @@ class StringEater::CTokenizer
53
87
  @extracted_tokens ||= {}
54
88
  @extracted_tokens.clear
55
89
 
56
- tokens.first.breakpoints[0] = 0
90
+ return unless @have_tokens_to_extract
57
91
 
58
92
  @extracted_tokens = ctokenize!(@string,
59
93
  @tokens_to_find_indexes,
data/lib/version.rb CHANGED
@@ -1,7 +1,7 @@
1
1
  module StringEater
2
2
  module VERSION
3
3
  MAJOR = 0
4
- MINOR = 1
4
+ MINOR = 2
5
5
  PATCH = 0
6
6
  PRE = nil
7
7
  STRING = [MAJOR, MINOR, PATCH, PRE].compact.join('.')
@@ -24,6 +24,7 @@ describe Example1 do
24
24
  @tokenizer = Example1.new
25
25
  @str1 = "foo bar|baz"
26
26
  @first_word1 = "foo"
27
+ @second_word1 = "bar"
27
28
  @third_word1 = "baz"
28
29
  @bp1 = [0, 3,4,7,8,11]
29
30
  end
@@ -34,6 +35,36 @@ describe Example1 do
34
35
  end
35
36
  end
36
37
 
38
+ describe "#extract_all_fields" do
39
+ it "should extract all of the fields" do
40
+ @tokenizer.extract_all_fields
41
+ @tokenizer.tokenize!(@str1)
42
+ @tokenizer.first_word.should == @first_word1
43
+ @tokenizer.second_word.should == @second_word1
44
+ @tokenizer.third_word.should == @third_word1
45
+ end
46
+ end
47
+
48
+ describe "#extract_no_fields" do
49
+ it "should not extract any of the fields" do
50
+ @tokenizer.extract_no_fields
51
+ @tokenizer.tokenize!(@str1)
52
+ @tokenizer.first_word.should be_nil
53
+ @tokenizer.second_word.should be_nil
54
+ @tokenizer.third_word.should be_nil
55
+ end
56
+ end
57
+
58
+ describe "#extract_fields" do
59
+ it "should allow us to set which fields get extracted" do
60
+ @tokenizer.extract_fields :second_word
61
+ @tokenizer.tokenize!(@str1)
62
+ @tokenizer.first_word.should be_nil
63
+ @tokenizer.second_word.should == @second_word1
64
+ @tokenizer.third_word.should be_nil
65
+ end
66
+ end
67
+
37
68
  describe "tokenize!" do
38
69
  it "should return itself" do
39
70
  @tokenizer.tokenize!(@str1).should == @tokenizer
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: string-eater
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-08-20 00:00:00.000000000 Z
12
+ date: 2012-08-21 00:00:00.000000000 Z
13
13
  dependencies: []
14
14
  description: Fast string tokenizer. Nom strings.
15
15
  email:
@@ -64,3 +64,4 @@ test_files:
64
64
  - spec/nginx_spec.rb
65
65
  - spec/spec_helper.rb
66
66
  - spec/string_eater_spec.rb
67
+ has_rdoc: