keyword_prospector 0.8.0
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +4 -0
- data/License.txt +20 -0
- data/Manifest.txt +40 -0
- data/README.txt +95 -0
- data/Rakefile +4 -0
- data/config/hoe.rb +73 -0
- data/config/requirements.rb +15 -0
- data/lib/hyperlink_strategy.rb +56 -0
- data/lib/keyword_decorator.rb +7 -0
- data/lib/keyword_linker.rb +174 -0
- data/lib/keyword_prospector.rb +171 -0
- data/lib/lookup_chain.rb +65 -0
- data/lib/match.rb +37 -0
- data/lib/profile.rb +72 -0
- data/lib/search_and_replace.rb +45 -0
- data/lib/state.rb +85 -0
- data/script/console +10 -0
- data/script/destroy +14 -0
- data/script/generate +14 -0
- data/script/txt2html +82 -0
- data/setup.rb +1585 -0
- data/spec/hyperlink_strategy_spec.rb +69 -0
- data/spec/keyword_linker_spec.rb +226 -0
- data/spec/keyword_prospector_spec.rb +232 -0
- data/spec/lookup_chain_spec.rb +104 -0
- data/spec/match_spec.rb +140 -0
- data/spec/search_and_replace_spec.rb +58 -0
- data/spec/spec.opts +1 -0
- data/spec/spec_helper.rb +10 -0
- data/spec/state_spec.rb +128 -0
- data/tasks/deployment.rake +34 -0
- data/tasks/environment.rake +7 -0
- data/tasks/rspec.rake +21 -0
- data/tasks/website.rake +17 -0
- data/website/index.html +141 -0
- data/website/index.txt +83 -0
- data/website/javascripts/rounded_corners_lite.inc.js +285 -0
- data/website/stylesheets/screen.css +138 -0
- data/website/template.html.erb +48 -0
- metadata +107 -0
@@ -0,0 +1,69 @@
|
|
1
|
+
#
|
2
|
+
# (C) 2008 Los Angeles Times
|
3
|
+
#
|
4
|
+
require File.dirname(__FILE__) + '/spec_helper'
|
5
|
+
require 'hyperlink_strategy'
|
6
|
+
|
7
|
+
describe HyperlinkStrategy do
|
8
|
+
before :each do
|
9
|
+
@strategy = HyperlinkStrategy.new
|
10
|
+
end
|
11
|
+
|
12
|
+
it "Should create hyperlinks to the provided URL" do
|
13
|
+
@strategy.url="http://travel.latimes.com"
|
14
|
+
|
15
|
+
@strategy.decorate("Foo").should == "<a href=\"#{@strategy.url}\">Foo</a>"
|
16
|
+
end
|
17
|
+
|
18
|
+
it "should accept url and options in the constructor" do
|
19
|
+
tmp = HyperlinkStrategy.new(:url, :foo => :bar)
|
20
|
+
tmp.url.should == :url
|
21
|
+
tmp.options[:foo].should == :bar
|
22
|
+
end
|
23
|
+
|
24
|
+
it "should accept options to specify html attributes" do
|
25
|
+
@strategy.options = {:title => "foo title", :style => "hidden;"}
|
26
|
+
@strategy.url = 'foourl'
|
27
|
+
|
28
|
+
linked_text = @strategy.decorate("Foo")
|
29
|
+
linked_text.should match(%r{<a .*>Foo</a>})
|
30
|
+
linked_text.should match(%r{href="foourl"})
|
31
|
+
linked_text.should match(%r{title="foo title"})
|
32
|
+
linked_text.should match(%r{style="hidden;"})
|
33
|
+
end
|
34
|
+
|
35
|
+
describe "keywords" do
|
36
|
+
it "should allow setting and retrieving keywords" do
|
37
|
+
keywords = %w{a b c d e}
|
38
|
+
@strategy.keywords = keywords
|
39
|
+
|
40
|
+
@strategy.keywords.should == Set.new(keywords)
|
41
|
+
end
|
42
|
+
|
43
|
+
it "should allow comma-separated strings for setting" do
|
44
|
+
@strategy.keywords = "foo", "bar", "baz"
|
45
|
+
@strategy.keywords.should == Set.new(["foo", "bar", "baz"])
|
46
|
+
end
|
47
|
+
|
48
|
+
it "should allow a single string for setting" do
|
49
|
+
@strategy.keywords = "xyzzy"
|
50
|
+
@strategy.keywords.should == Set.new(["xyzzy"])
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
describe "add_keyword" do
|
55
|
+
it "should add keywords to empty set" do
|
56
|
+
@strategy.keywords.should == Set.new
|
57
|
+
|
58
|
+
@strategy.add_keyword("foo").keywords.should == Set.new("foo")
|
59
|
+
end
|
60
|
+
|
61
|
+
it "should add keywords to existing set" do
|
62
|
+
keywords = %w{foo, bar, baz}
|
63
|
+
@strategy.keywords = keywords
|
64
|
+
|
65
|
+
@strategy.add_keyword("xyzzy").keywords.should ==
|
66
|
+
Set.new(keywords + ["xyzzy"])
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
@@ -0,0 +1,226 @@
|
|
1
|
+
#
|
2
|
+
# (C) 2008 Los Angeles Times
|
3
|
+
#
|
4
|
+
require File.dirname(__FILE__) + '/spec_helper'
|
5
|
+
require 'keyword_linker'
|
6
|
+
|
7
|
+
describe KeywordLinker do
|
8
|
+
before(:each) do
|
9
|
+
@kl = KeywordLinker.new
|
10
|
+
end
|
11
|
+
|
12
|
+
describe :add_url do
|
13
|
+
it "should accept a string (url) and a single keyword" do
|
14
|
+
@kl.add_url("url", "keyword")
|
15
|
+
end
|
16
|
+
|
17
|
+
it "should accept a string (url) and an array of keywords" do
|
18
|
+
@kl.add_url("url", ["keyword1", "keyword2"])
|
19
|
+
end
|
20
|
+
|
21
|
+
it "should accept options for html attributes" do
|
22
|
+
@kl.add_url("url", "keyword", :class => "awesome")
|
23
|
+
|
24
|
+
linked_text = @kl.link_text("keyword")
|
25
|
+
|
26
|
+
linked_text.should match(%r{^<a .*>keyword</a>$})
|
27
|
+
linked_text.should match(%r{href="url"})
|
28
|
+
linked_text.should match(%r{class="awesome"})
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
describe :link_text do
|
33
|
+
it "should init_tree if I forget to" do
|
34
|
+
@kl.add_url("url", "foo")
|
35
|
+
|
36
|
+
@kl.link_text("Is there a foo in the house?").should ==
|
37
|
+
"Is there a <a href=\"url\">foo</a> in the house?"
|
38
|
+
end
|
39
|
+
|
40
|
+
it "should return original text when there are no matches" do
|
41
|
+
@kl.add_url("url", "foo")
|
42
|
+
@kl.init_tree
|
43
|
+
|
44
|
+
orig_text = "Is there a bar in the house?"
|
45
|
+
@kl.link_text(orig_text).should == orig_text
|
46
|
+
end
|
47
|
+
|
48
|
+
it "should return linked text when URL's are provided with keyword" do
|
49
|
+
@kl.add_url("url", "foo")
|
50
|
+
@kl.init_tree
|
51
|
+
|
52
|
+
@kl.link_text("Is there a foo in the house?").should ==
|
53
|
+
"Is there a <a href=\"url\">foo</a> in the house?"
|
54
|
+
end
|
55
|
+
|
56
|
+
it "should return linked text when URL's are provided with keyword array" do
|
57
|
+
@kl.add_url("url", %w{foo bar baz})
|
58
|
+
@kl.init_tree
|
59
|
+
|
60
|
+
@kl.link_text("pool bar party").should == "pool <a href=\"url\">bar</a> party"
|
61
|
+
end
|
62
|
+
|
63
|
+
it "should link correctly at the beginning of the text" do
|
64
|
+
@kl.add_url("url", "foo")
|
65
|
+
|
66
|
+
@kl.link_text("foo is the word").should == '<a href="url">foo</a> is the word'
|
67
|
+
end
|
68
|
+
|
69
|
+
it "should link correctly at the end of the text" do
|
70
|
+
@kl.add_url("url", "foo")
|
71
|
+
|
72
|
+
@kl.link_text("the word is foo").should == 'the word is <a href="url">foo</a>'
|
73
|
+
end
|
74
|
+
|
75
|
+
it "should perform multiple links in the text" do
|
76
|
+
@kl.add_url("url1", "foo")
|
77
|
+
@kl.add_url("url2", "bar")
|
78
|
+
|
79
|
+
@kl.link_text("the foo and the bar are awesome").should ==
|
80
|
+
'the <a href="url1">foo</a> and the <a href="url2">bar</a> are awesome'
|
81
|
+
end
|
82
|
+
|
83
|
+
it "should link only the first instance of each keyword" do
|
84
|
+
@kl.add_url("url", "foo")
|
85
|
+
|
86
|
+
@kl.link_text("foo, foo, or foo?").should == '<a href="url">foo</a>, foo, or foo?'
|
87
|
+
end
|
88
|
+
|
89
|
+
it "should link only the first instance of each keyword in separate text elements" do
|
90
|
+
@kl.add_url("url", "foo")
|
91
|
+
|
92
|
+
@kl.link_text("<i>foo</i>, <b>foo</b>, or <u>foo</u>?").should == '<i><a href="url">foo</a></i>, <b>foo</b>, or <u>foo</u>?'
|
93
|
+
end
|
94
|
+
|
95
|
+
it "should link only the first instance of each url" do
|
96
|
+
@kl.add_url("url", %w[foo bar baz])
|
97
|
+
|
98
|
+
@kl.link_text("bar, baz, or foo?").should == '<a href="url">bar</a>, baz, or foo?'
|
99
|
+
end
|
100
|
+
|
101
|
+
it "should link longest match in overlapping text" do
|
102
|
+
@kl.add_url("url", ["foo bar", "bar baz xyzzy"])
|
103
|
+
|
104
|
+
@kl.link_text("foo bar baz xyzzy").should == 'foo <a href="url">bar baz xyzzy</a>'
|
105
|
+
end
|
106
|
+
|
107
|
+
describe "with another KeywordLinker in the constructor" do
|
108
|
+
before(:each) do
|
109
|
+
@combo_linker = KeywordLinker.new(@kl)
|
110
|
+
end
|
111
|
+
|
112
|
+
it "should link keywords from both linkers" do
|
113
|
+
@kl.add_url("foourl", "foo")
|
114
|
+
@combo_linker.add_url("barurl", "bar")
|
115
|
+
|
116
|
+
@combo_linker.link_text("foo bar").should == '<a href="foourl">foo</a> <a href="barurl">bar</a>'
|
117
|
+
end
|
118
|
+
|
119
|
+
it "should prioritize its own keywords over the parent's keywords" do
|
120
|
+
@kl.add_url("foourl1", "foo")
|
121
|
+
@combo_linker.add_url("foourl2", "foo")
|
122
|
+
|
123
|
+
@combo_linker.link_text("foo").should == '<a href="foourl2">foo</a>'
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
describe "with an array of KeywordLinkers as parents in the constructor" do
|
128
|
+
before(:each) do
|
129
|
+
@kl2 = KeywordLinker.new
|
130
|
+
@combo_linker = KeywordLinker.new([@kl, @kl2])
|
131
|
+
end
|
132
|
+
|
133
|
+
it "should link keywords from all linkers" do
|
134
|
+
@kl.add_url("foourl", "foo")
|
135
|
+
@kl2.add_url("barurl", "bar")
|
136
|
+
@combo_linker.add_url("bazurl", "baz")
|
137
|
+
|
138
|
+
@combo_linker.link_text("foo bar baz").should == '<a href="foourl">foo</a> <a href="barurl">bar</a> <a href="bazurl">baz</a>'
|
139
|
+
end
|
140
|
+
|
141
|
+
it "should prioritize its own keywords over the parents' keywords" do
|
142
|
+
@kl.add_url("foourl1", "foo")
|
143
|
+
@kl2.add_url("foourl2", "foo")
|
144
|
+
@combo_linker.add_url("foourl3", "foo")
|
145
|
+
|
146
|
+
@combo_linker.link_text("foo").should == '<a href="foourl3">foo</a>'
|
147
|
+
end
|
148
|
+
end
|
149
|
+
|
150
|
+
describe "with an array of KeywordLinkers as lookups in the constructor" do
|
151
|
+
before(:each) do
|
152
|
+
@kl2 = KeywordLinker.new
|
153
|
+
@combo_linker = KeywordLinker.new([@kl, @kl2])
|
154
|
+
end
|
155
|
+
|
156
|
+
it "should link keywords from all lookups" do
|
157
|
+
@kl.add_url("foourl", "foo")
|
158
|
+
@kl2.add_url("barurl", "bar")
|
159
|
+
|
160
|
+
@combo_linker.link_text("foo bar baz").should == '<a href="foourl">foo</a> <a href="barurl">bar</a> baz'
|
161
|
+
end
|
162
|
+
end
|
163
|
+
|
164
|
+
describe "with an arbitrary lookup object in the constructor" do
|
165
|
+
it "should provide results from the lookup object" do
|
166
|
+
lookup = mock(Object)
|
167
|
+
lookup.should_receive(:process).with(:text).and_return([:result])
|
168
|
+
kl = KeywordLinker.new(lookup)
|
169
|
+
kl.process(:text).should == [:result]
|
170
|
+
end
|
171
|
+
|
172
|
+
it "should reject objects from the constructor if they don't have a process method" do
|
173
|
+
lookup = mock(Object)
|
174
|
+
lambda{KeywordLinker.new(nil, lookup)}.should raise_error(ArgumentError)
|
175
|
+
end
|
176
|
+
end
|
177
|
+
|
178
|
+
describe "with multiple level hierarchy" do
|
179
|
+
before(:each) do
|
180
|
+
@kl2 = KeywordLinker.new(@kl)
|
181
|
+
@kl3 = KeywordLinker.new(@kl2)
|
182
|
+
@combo_linker = KeywordLinker.new(@kl3)
|
183
|
+
end
|
184
|
+
|
185
|
+
it "should link keywords from all linkers" do
|
186
|
+
@kl.add_url("foourl", "foo")
|
187
|
+
@kl2.add_url("barurl", "bar")
|
188
|
+
@kl3.add_url("bazurl", "baz")
|
189
|
+
@combo_linker.add_url("xyzzyurl", "xyzzy")
|
190
|
+
|
191
|
+
@combo_linker.link_text("foo bar baz xyzzy").should == '<a href="foourl">foo</a> <a href="barurl">bar</a> <a href="bazurl">baz</a> <a href="xyzzyurl">xyzzy</a>'
|
192
|
+
end
|
193
|
+
end
|
194
|
+
end
|
195
|
+
|
196
|
+
describe "linking html text" do
|
197
|
+
it "should skip linking inside tag attributes" do
|
198
|
+
@kl.add_url("url", "foo")
|
199
|
+
|
200
|
+
@kl.link_text('<td title="another foo for you">foo</td>').should ==
|
201
|
+
'<td title="another foo for you"><a href="url">foo</a></td>'
|
202
|
+
end
|
203
|
+
|
204
|
+
it "should not link inside of <a></a> tags" do
|
205
|
+
@kl.add_url("url", "foo")
|
206
|
+
|
207
|
+
@kl.link_text('<a href="bar">baz foo bar</a> and foo').should ==
|
208
|
+
'<a href="bar">baz foo bar</a> and <a href="url">foo</a>'
|
209
|
+
end
|
210
|
+
|
211
|
+
it "shouldn't choke on bogus etags" do
|
212
|
+
@kl.add_url("url", "foo")
|
213
|
+
|
214
|
+
lambda{@kl.link_text('foo </i>')}.should_not raise_error
|
215
|
+
end
|
216
|
+
end
|
217
|
+
|
218
|
+
describe "blacklisting keywords" do
|
219
|
+
it "should stop linking of every occurrence of the keyword" do
|
220
|
+
@kl.add_url("url", "Los Angeles")
|
221
|
+
@kl.blacklist_keyword("Los Angeles Times")
|
222
|
+
|
223
|
+
@kl.link_text("Los Angeles Times Los Angeles Times").should == "Los Angeles Times Los Angeles Times"
|
224
|
+
end
|
225
|
+
end
|
226
|
+
end
|
@@ -0,0 +1,232 @@
|
|
1
|
+
#
|
2
|
+
# (C) 2008 Los Angeles Times
|
3
|
+
#
|
4
|
+
require File.dirname(__FILE__) + '/spec_helper'
|
5
|
+
require 'set'
|
6
|
+
|
7
|
+
describe KeywordProspector do
|
8
|
+
it "should match keywords in text, respecting word boundaries" do
|
9
|
+
dl = KeywordProspector.new
|
10
|
+
|
11
|
+
dl.add('test')
|
12
|
+
dl.add('fido')
|
13
|
+
dl.add('te')
|
14
|
+
dl.add('fi')
|
15
|
+
dl.add('dot')
|
16
|
+
dl.add('dots')
|
17
|
+
dl.add('sis')
|
18
|
+
|
19
|
+
dl.construct_fail
|
20
|
+
|
21
|
+
matches = []
|
22
|
+
dl.process('hello fido this is a test') {|x| matches << x}
|
23
|
+
matches.size.should == 2
|
24
|
+
(matches.collect{|match| match.keyword} & %w{test fido}).
|
25
|
+
size.should == 2
|
26
|
+
end
|
27
|
+
|
28
|
+
it "should give correct location for a single match within a string" do
|
29
|
+
dl = KeywordProspector.new(["foo"])
|
30
|
+
|
31
|
+
match = nil
|
32
|
+
dl.process("A foo and his money are soon parted") {|x| match = x}
|
33
|
+
|
34
|
+
match.start_idx.should == 2
|
35
|
+
match.end_idx.should == 5
|
36
|
+
end
|
37
|
+
|
38
|
+
it "should give correct location for the second match within a string" do
|
39
|
+
dl = KeywordProspector.new(["foo", "bar"])
|
40
|
+
|
41
|
+
match = []
|
42
|
+
dl.process("foo bar") {|x| match << x}
|
43
|
+
|
44
|
+
match
|
45
|
+
match[0].keyword.should == "foo"
|
46
|
+
match[0].start_idx.should == 0
|
47
|
+
match[0].end_idx.should == 3
|
48
|
+
match[1].keyword.should == "bar"
|
49
|
+
match[1].start_idx.should == 4
|
50
|
+
match[1].end_idx.should == 7
|
51
|
+
end
|
52
|
+
|
53
|
+
it "Should include information about where the match is present in the string" do
|
54
|
+
dl = KeywordProspector.new %w{foo oo bar baz xyzzy thud}
|
55
|
+
|
56
|
+
matches = {}
|
57
|
+
|
58
|
+
dl.process('foo, bar, xyzzy and also the baz') {|x| matches[x.keyword] = x}
|
59
|
+
|
60
|
+
matches["foo"].should_not be_nil
|
61
|
+
matches["bar"].should_not be_nil
|
62
|
+
matches["baz"].should_not be_nil
|
63
|
+
matches["xyzzy"].should_not be_nil
|
64
|
+
|
65
|
+
matches["foo"].start_idx.should == 0
|
66
|
+
matches["foo"].end_idx.should == 3
|
67
|
+
matches["bar"].start_idx.should == 5
|
68
|
+
matches["bar"].end_idx.should == 8
|
69
|
+
matches["xyzzy"].start_idx.should == 10
|
70
|
+
matches["xyzzy"].end_idx.should == 15
|
71
|
+
matches["baz"].start_idx.should == 29
|
72
|
+
matches["baz"].end_idx.should == 32
|
73
|
+
end
|
74
|
+
|
75
|
+
it "should match a single word to itself" do
|
76
|
+
dl = KeywordProspector.new(["foo"])
|
77
|
+
count = 0
|
78
|
+
dl.process("foo"){count += 1}
|
79
|
+
count.should == 1
|
80
|
+
end
|
81
|
+
|
82
|
+
it "should not match a single word to a different word" do
|
83
|
+
dl = KeywordProspector.new(["foo"])
|
84
|
+
count = 0
|
85
|
+
dl.process("bar"){count += 1}
|
86
|
+
count.should == 0
|
87
|
+
end
|
88
|
+
|
89
|
+
it "should call the block once for every match" do
|
90
|
+
dl = KeywordProspector.new(["foo"])
|
91
|
+
count = 0
|
92
|
+
dl.process("foo foo foo"){count += 1}
|
93
|
+
count.should == 3
|
94
|
+
end
|
95
|
+
|
96
|
+
it "Should get correct start and end matches with overlapping matches" do
|
97
|
+
keywords = ['Sling Blade', 'Blade Runner', 'foo', 'bar']
|
98
|
+
dl = KeywordProspector.new(keywords)
|
99
|
+
candidate = 'Sling Blade Runner foo bar'
|
100
|
+
matches = {}
|
101
|
+
dl.process(candidate) {|x| matches[x.keyword] = x}
|
102
|
+
|
103
|
+
keywords.each do |keyword|
|
104
|
+
matches[keyword].start_idx.should == candidate.index(keyword)
|
105
|
+
matches[keyword].end_idx.should == candidate.index(keyword) +
|
106
|
+
keyword.length
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
it "returns a sorted array of matches when a block is not given" do
|
111
|
+
keywords = %w{foo bar baz xyzzy thud}
|
112
|
+
|
113
|
+
dl = KeywordProspector.new(keywords)
|
114
|
+
results =
|
115
|
+
dl.process("The best metavariables are thud, xyzzy, and of course foo.")
|
116
|
+
|
117
|
+
results.class.should == Array
|
118
|
+
|
119
|
+
results.should == results.sort
|
120
|
+
end
|
121
|
+
|
122
|
+
it "filters out shorter matches multiple matches overlap" do
|
123
|
+
dl = KeywordProspector.new(["a b c", "c d", "e f", "f g h", "i j k l m",
|
124
|
+
"k l m n o p q"])
|
125
|
+
|
126
|
+
results = dl.process("a b c d, e f g h, i j k l m n o p q",
|
127
|
+
:filter_overlaps => true)
|
128
|
+
|
129
|
+
results.size.should == 3
|
130
|
+
|
131
|
+
results[0].keyword.should == 'a b c'
|
132
|
+
results[1].keyword.should == 'f g h'
|
133
|
+
results[2].keyword.should == 'k l m n o p q'
|
134
|
+
end
|
135
|
+
|
136
|
+
it "detects word chars" do
|
137
|
+
KeywordProspector.word_char?(?a).should be_true
|
138
|
+
KeywordProspector.word_char?(?k).should be_true
|
139
|
+
KeywordProspector.word_char?(?z).should be_true
|
140
|
+
KeywordProspector.word_char?(?A).should be_true
|
141
|
+
KeywordProspector.word_char?(?K).should be_true
|
142
|
+
KeywordProspector.word_char?(?Z).should be_true
|
143
|
+
KeywordProspector.word_char?(?0).should be_true
|
144
|
+
KeywordProspector.word_char?(?7).should be_true
|
145
|
+
KeywordProspector.word_char?(?9).should be_true
|
146
|
+
KeywordProspector.word_char?(?_).should be_true
|
147
|
+
end
|
148
|
+
|
149
|
+
it "detects non-word chars" do
|
150
|
+
KeywordProspector.word_char?(?-).should be_false
|
151
|
+
KeywordProspector.word_char?(?>).should be_false
|
152
|
+
KeywordProspector.word_char?(?<).should be_false
|
153
|
+
KeywordProspector.word_char?(?.).should be_false
|
154
|
+
KeywordProspector.word_char?(32).should be_false
|
155
|
+
KeywordProspector.word_char?(9).should be_false
|
156
|
+
end
|
157
|
+
|
158
|
+
it "word_delimiter? is opposite of word_char?" do
|
159
|
+
KeywordProspector.word_delimiter?(?.).should be_true
|
160
|
+
KeywordProspector.word_delimiter?(32).should be_true
|
161
|
+
KeywordProspector.word_delimiter?(?K).should be_false
|
162
|
+
end
|
163
|
+
|
164
|
+
describe "word boundary detection" do
|
165
|
+
before(:each) do
|
166
|
+
keywords = %w{foo bar baz xyzzy thud}
|
167
|
+
@dl = KeywordProspector.new(keywords)
|
168
|
+
end
|
169
|
+
|
170
|
+
describe "allows" do
|
171
|
+
it "matching at beginning of string" do
|
172
|
+
results = @dl.process("foo is the word")
|
173
|
+
|
174
|
+
results.size.should == 1
|
175
|
+
results[0].keyword.should == "foo"
|
176
|
+
end
|
177
|
+
|
178
|
+
it "matching at end of string" do
|
179
|
+
results = @dl.process("the word is bar")
|
180
|
+
|
181
|
+
results.size.should == 1
|
182
|
+
results[0].keyword.should == "bar"
|
183
|
+
end
|
184
|
+
end
|
185
|
+
|
186
|
+
describe "doesn't allow" do
|
187
|
+
it "matches not starting on a word boundary" do
|
188
|
+
results = @dl.process("topaz is a gem but tobaz is not")
|
189
|
+
results.size.should == 0
|
190
|
+
end
|
191
|
+
|
192
|
+
it "matches not ending on a word boundary" do
|
193
|
+
results = @dl.process("are you xyzzypated?")
|
194
|
+
results.size.should == 0
|
195
|
+
end
|
196
|
+
|
197
|
+
it "matches at the beginning of the string and not ending on a word boundary" do
|
198
|
+
results = @dl.process("fooby you too?")
|
199
|
+
results.size.should == 0
|
200
|
+
end
|
201
|
+
|
202
|
+
it "multiple candidate matches in various places" do
|
203
|
+
results = @dl.process("fooby barby bazby tofoo tobar tobaz ambazbafoo")
|
204
|
+
results.size.should == 0
|
205
|
+
end
|
206
|
+
end
|
207
|
+
end
|
208
|
+
|
209
|
+
describe "with decoration strategy objects" do
|
210
|
+
it "should read keywords from the object" do
|
211
|
+
strategy = Object.new
|
212
|
+
strategy.should_receive(:keywords).and_return(Set.new(%w{foo bar baz}))
|
213
|
+
|
214
|
+
dl = KeywordProspector.new([strategy])
|
215
|
+
end
|
216
|
+
|
217
|
+
it "should return strategy objects in results" do
|
218
|
+
strategy = Object.new
|
219
|
+
strategy.should_receive(:keywords).and_return(Set.new(%w{foo bar baz}))
|
220
|
+
|
221
|
+
dl = KeywordProspector.new([strategy])
|
222
|
+
|
223
|
+
results = dl.process("foo, bar, and baz")
|
224
|
+
|
225
|
+
results.size.should == 3
|
226
|
+
|
227
|
+
results.each do |result|
|
228
|
+
result.output.should == strategy
|
229
|
+
end
|
230
|
+
end
|
231
|
+
end
|
232
|
+
end
|
@@ -0,0 +1,104 @@
|
|
1
|
+
#
|
2
|
+
# (C) 2008 Los Angeles Times
|
3
|
+
#
|
4
|
+
require File.dirname(__FILE__) + '/spec_helper'
|
5
|
+
require 'lookup_chain'
|
6
|
+
require 'match'
|
7
|
+
|
8
|
+
describe LookupChain do
|
9
|
+
before(:each) do
|
10
|
+
@dl1 = mock(Object, :process => :dummy_method)
|
11
|
+
@dl2 = mock(Object, :process => :dummy_method)
|
12
|
+
end
|
13
|
+
|
14
|
+
describe :initialize do
|
15
|
+
describe "should check all objects for a process method" do
|
16
|
+
it "when given an array of objects" do
|
17
|
+
lambda {LookupChain.new([@dl1])}.should_not raise_error
|
18
|
+
lambda {LookupChain.new([Object.new])}.should raise_error(ArgumentError)
|
19
|
+
end
|
20
|
+
|
21
|
+
it "when given multiple objects in constructor" do
|
22
|
+
lambda {LookupChain.new(@dl1, @dl2)}.should_not raise_error
|
23
|
+
lambda {LookupChain.new(@dl1, Object.new)}.should raise_error(ArgumentError)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
describe :<< do
|
29
|
+
it "should check for a process method" do
|
30
|
+
lambda{LookupChain.new << @dl1}.should_not raise_error
|
31
|
+
lambda{LookupChain.new << Object.new}.should raise_error(ArgumentError)
|
32
|
+
end
|
33
|
+
|
34
|
+
it "should add to the end of the list of lookups" do
|
35
|
+
lc = LookupChain.new
|
36
|
+
lc.lookups.should == []
|
37
|
+
|
38
|
+
lc << @dl1
|
39
|
+
lc.lookups.should == [@dl1]
|
40
|
+
|
41
|
+
lc << @dl2
|
42
|
+
lc.lookups.should == [@dl1, @dl2]
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
describe :lookups do
|
47
|
+
it "should return an array of lookup objects assigned in constructor" do
|
48
|
+
LookupChain.new(@dl1, @dl2).lookups.should == [@dl1, @dl2]
|
49
|
+
LookupChain.new([@dl2, @dl1]).lookups.should == [@dl2, @dl1]
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
describe :process do
|
54
|
+
it "should call process on all child objects" do
|
55
|
+
@dl1.should_receive(:process).with(:text).and_return([])
|
56
|
+
@dl2.should_receive(:process).with(:text).and_return([])
|
57
|
+
lc = LookupChain.new(@dl1, @dl2)
|
58
|
+
|
59
|
+
lc.process(:text)
|
60
|
+
end
|
61
|
+
|
62
|
+
it "should return a sorted list of match objects from all lookups" do
|
63
|
+
match1 = Match.new("match1", 0, 3)
|
64
|
+
match2 = Match.new("match2", 5, 7)
|
65
|
+
match3 = Match.new("match3", 11, 13)
|
66
|
+
match4 = Match.new("match4", 19, 31)
|
67
|
+
|
68
|
+
@dl1.should_receive(:process).with(:text).and_return([match1, match4])
|
69
|
+
@dl2.should_receive(:process).with(:text).and_return([match2, match3])
|
70
|
+
|
71
|
+
lc = LookupChain.new(@dl1, @dl2)
|
72
|
+
|
73
|
+
lc.process(:text).should == [match1, match2, match3, match4]
|
74
|
+
end
|
75
|
+
|
76
|
+
it "should return the longest match when there are overlaps, regardless of priority order" do
|
77
|
+
match1 = Match.new("match1", 0, 3)
|
78
|
+
match2 = Match.new("match2", 1, 7)
|
79
|
+
|
80
|
+
@dl1.stub!(:process).with(:text).and_return([match1])
|
81
|
+
@dl2.stub!(:process).with(:text).and_return([match2])
|
82
|
+
|
83
|
+
lc = LookupChain.new(@dl1, @dl2)
|
84
|
+
lc.process(:text).should == [match2]
|
85
|
+
|
86
|
+
lc = LookupChain.new(@dl2, @dl1)
|
87
|
+
lc.process(:text).should == [match2]
|
88
|
+
end
|
89
|
+
|
90
|
+
it "should prioritize the first lookup object in the list when there are overlapping matches of equal length" do
|
91
|
+
match1 = Match.new("match1", 0, 3)
|
92
|
+
match2 = Match.new("match2", 1, 4)
|
93
|
+
|
94
|
+
@dl1.stub!(:process).with(:text).and_return([match1])
|
95
|
+
@dl2.stub!(:process).with(:text).and_return([match2])
|
96
|
+
|
97
|
+
lc = LookupChain.new(@dl1, @dl2)
|
98
|
+
lc.process(:text).should == [match1]
|
99
|
+
|
100
|
+
lc = LookupChain.new(@dl2, @dl1)
|
101
|
+
lc.process(:text).should == [match2]
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|