wombat 2.4.0 → 2.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +2 -0
- data/CHANGELOG.md +67 -0
- data/Gemfile.lock +52 -50
- data/README.md +4 -4
- data/VERSION +1 -1
- data/examples/iterator.rb +25 -25
- data/examples/list.rb +22 -22
- data/examples/xml.rb +15 -15
- data/fixtures/vcr_cassettes/make_post_request.yml +69 -0
- data/lib/wombat/crawler.rb +13 -7
- data/lib/wombat/dsl/metadata.rb +8 -0
- data/lib/wombat/processing/parser.rb +22 -7
- data/lib/wombat/property/locators/factory.rb +28 -28
- data/lib/wombat/property/locators/follow.rb +15 -38
- data/lib/wombat/property/locators/list.rb +13 -13
- data/lib/wombat.rb +8 -8
- data/spec/crawler_spec.rb +64 -42
- data/spec/integration/crawler_inheritance_spec.rb +63 -0
- data/spec/integration/integration_spec.rb +30 -0
- data/spec/processing/parser_spec.rb +35 -0
- data/spec/property/locators/factory_spec.rb +14 -14
- data/spec/property/locators/html_spec.rb +18 -18
- data/spec/property/locators/list_spec.rb +8 -8
- data/spec/property/locators/text_spec.rb +44 -44
- data/spec/wombat_spec.rb +36 -36
- data/wombat.gemspec +7 -3
- metadata +6 -2
@@ -11,32 +11,32 @@ require 'wombat/property/locators/headers'
|
|
11
11
|
class Wombat::Property::Locators::UnknownTypeException < Exception; end;
|
12
12
|
|
13
13
|
module Wombat
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
14
|
+
module Property
|
15
|
+
module Locators
|
16
|
+
module Factory
|
17
|
+
def self.locator_for(property)
|
18
|
+
klass = case(property.wombat_property_format)
|
19
|
+
when :text
|
20
|
+
Text
|
21
|
+
when :list
|
22
|
+
List
|
23
|
+
when :html
|
24
|
+
Html
|
25
|
+
when :iterator
|
26
|
+
Iterator
|
27
|
+
when :container
|
28
|
+
PropertyGroup
|
29
|
+
when :follow
|
30
|
+
Follow
|
31
|
+
when :headers
|
32
|
+
Headers
|
33
|
+
else
|
34
|
+
raise Wombat::Property::Locators::UnknownTypeException.new("Unknown property format #{property.format}.")
|
35
|
+
end
|
36
36
|
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
end
|
37
|
+
klass.new(property)
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
@@ -1,46 +1,23 @@
|
|
1
1
|
#coding: utf-8
|
2
2
|
|
3
3
|
module Wombat
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
4
|
+
module Property
|
5
|
+
module Locators
|
6
|
+
class Follow < Base
|
7
|
+
def locate(context, page = nil)
|
8
|
+
super do
|
9
9
|
locate_nodes(context).flat_map do |node|
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
# absolute.
|
16
|
-
# So, let wombat try that approach when
|
17
|
-
# loading relative link fails.
|
18
|
-
#
|
19
|
-
target_page = page.click node
|
20
|
-
context = target_page.parser
|
10
|
+
mechanize_page = context.mechanize_page
|
11
|
+
link = Mechanize::Page::Link.new(node, page, mechanize_page)
|
12
|
+
target_page = page.click link
|
13
|
+
context = target_page.parser
|
14
|
+
context.mechanize_page = mechanize_page
|
21
15
|
|
22
|
-
|
23
|
-
rescue Mechanize::ResponseCodeError => e
|
24
|
-
# Either the page is unavailable, or
|
25
|
-
# the link is mistakenly relative
|
26
|
-
#
|
27
|
-
raise e if retried
|
28
|
-
|
29
|
-
# Give it a try first time
|
30
|
-
href = node.attributes && node.attributes["href"]
|
31
|
-
if href.respond_to? :value
|
32
|
-
href.value = '/' + href.value unless
|
33
|
-
href.value.start_with? '/'
|
34
|
-
retried = true
|
35
|
-
retry
|
36
|
-
else
|
37
|
-
raise e
|
38
|
-
end
|
39
|
-
end
|
16
|
+
filter_properties(context, page)
|
40
17
|
end
|
41
18
|
end
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
46
23
|
end
|
@@ -1,17 +1,17 @@
|
|
1
1
|
#coding: utf-8
|
2
2
|
|
3
3
|
module Wombat
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
4
|
+
module Property
|
5
|
+
module Locators
|
6
|
+
class List < Base
|
7
|
+
def locate(context, page = nil)
|
8
|
+
super do
|
9
|
+
locate_nodes(context).map do |n|
|
10
|
+
n.is_a?(String) ? n.strip : n.inner_text.strip
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
16
|
end
|
17
|
-
end
|
17
|
+
end
|
data/lib/wombat.rb
CHANGED
@@ -3,15 +3,15 @@
|
|
3
3
|
require 'wombat/crawler'
|
4
4
|
|
5
5
|
module Wombat
|
6
|
-
|
6
|
+
class << self
|
7
7
|
|
8
8
|
attr_reader :proxy_args, :user_agent, :user_agent_alias
|
9
9
|
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
10
|
+
def crawl(&block)
|
11
|
+
klass = Class.new
|
12
|
+
klass.send(:include, Wombat::Crawler)
|
13
|
+
klass.new.crawl(&block)
|
14
|
+
end
|
15
15
|
|
16
16
|
def configure
|
17
17
|
yield self
|
@@ -29,6 +29,6 @@ module Wombat
|
|
29
29
|
@user_agent_alias = user_agent_alias
|
30
30
|
end
|
31
31
|
|
32
|
-
|
33
|
-
|
32
|
+
alias_method :scrape, :crawl
|
33
|
+
end
|
34
34
|
end
|
data/spec/crawler_spec.rb
CHANGED
@@ -13,12 +13,12 @@ describe Wombat::Crawler do
|
|
13
13
|
|
14
14
|
@crawler.event { event_called = true }
|
15
15
|
|
16
|
-
event_called.
|
16
|
+
expect(event_called).to eq(true)
|
17
17
|
end
|
18
18
|
|
19
19
|
it 'should provide metadata to yielded block' do
|
20
20
|
@crawler.event do
|
21
|
-
self.class.should
|
21
|
+
self.class.should eq(Wombat::DSL::PropertyGroup)
|
22
22
|
end
|
23
23
|
end
|
24
24
|
|
@@ -36,11 +36,11 @@ describe Wombat::Crawler do
|
|
36
36
|
|
37
37
|
@crawler.location { |v| v.latitude -50.2323 }
|
38
38
|
|
39
|
-
@crawler_instance.
|
40
|
-
arg["event"]["title"].selector.
|
41
|
-
arg["event"]["time"].selector.to_s.
|
42
|
-
arg["venue"]["name"].selector.
|
43
|
-
arg["location"]["latitude"].selector.
|
39
|
+
expect(@crawler_instance).to receive(:parse) do |arg|
|
40
|
+
expect(arg["event"]["title"].selector).to eq("Fulltronic Dezembro")
|
41
|
+
expect(arg["event"]["time"].selector.to_s).to eq(time.to_s)
|
42
|
+
expect(arg["venue"]["name"].selector).to eq("Scooba")
|
43
|
+
expect(arg["location"]["latitude"].selector).to eq(-50.2323)
|
44
44
|
end
|
45
45
|
|
46
46
|
@crawler_instance.crawl
|
@@ -52,24 +52,28 @@ describe Wombat::Crawler do
|
|
52
52
|
another_crawler_instance = another_crawler.new
|
53
53
|
|
54
54
|
another_crawler.event { |e| e.title 'Ibiza' }
|
55
|
-
another_crawler_instance.
|
55
|
+
expect(another_crawler_instance).to receive(:parse) { |arg|
|
56
|
+
expect(arg["event"]["title"].selector).to eq("Ibiza")
|
57
|
+
}
|
56
58
|
another_crawler_instance.crawl
|
57
59
|
|
58
60
|
@crawler.event { |e| e.title 'Fulltronic Dezembro' }
|
59
|
-
@crawler_instance.
|
61
|
+
expect(@crawler_instance).to receive(:parse) { |arg|
|
62
|
+
expect(arg["event"]["title"].selector).to eq("Fulltronic Dezembro")
|
63
|
+
}
|
60
64
|
@crawler_instance.crawl
|
61
65
|
end
|
62
66
|
|
63
67
|
it 'should be able to assign arbitrary plain text metadata' do
|
64
68
|
@crawler.some_data("/event/list", :html, "geo") { |p| true }
|
65
69
|
|
66
|
-
@crawler_instance.
|
70
|
+
expect(@crawler_instance).to receive(:parse) do |arg|
|
67
71
|
prop = arg['some_data']
|
68
|
-
prop.wombat_property_name.
|
69
|
-
prop.selector.
|
70
|
-
prop.format.
|
71
|
-
prop.namespaces.
|
72
|
-
prop.callback.
|
72
|
+
expect(prop.wombat_property_name).to eq("some_data")
|
73
|
+
expect(prop.selector).to eq("/event/list")
|
74
|
+
expect(prop.format).to eq(:html)
|
75
|
+
expect(prop.namespaces).to eq("geo")
|
76
|
+
expect(prop.callback).to_not eq(nil)
|
73
77
|
end
|
74
78
|
|
75
79
|
@crawler_instance.crawl
|
@@ -84,9 +88,9 @@ describe Wombat::Crawler do
|
|
84
88
|
another "css=.information"
|
85
89
|
end
|
86
90
|
|
87
|
-
@crawler_instance.
|
88
|
-
arg["structure"]["data"].selector.
|
89
|
-
arg["structure"]["another"].selector.
|
91
|
+
expect(@crawler_instance).to receive(:parse) do |arg|
|
92
|
+
expect(arg["structure"]["data"].selector).to eq("xpath=/xyz")
|
93
|
+
expect(arg["structure"]["another"].selector).to eq("css=.information")
|
90
94
|
end
|
91
95
|
|
92
96
|
@crawler_instance.crawl
|
@@ -97,8 +101,8 @@ describe Wombat::Crawler do
|
|
97
101
|
end
|
98
102
|
|
99
103
|
it 'should assign metadata format' do
|
100
|
-
@crawler_instance.
|
101
|
-
arg[:document_format].
|
104
|
+
expect(@crawler_instance).to receive(:parse) do |arg|
|
105
|
+
expect(arg[:document_format]).to eq(:xml)
|
102
106
|
end
|
103
107
|
@crawler.document_format :xml
|
104
108
|
@crawler_instance.crawl
|
@@ -108,9 +112,9 @@ describe Wombat::Crawler do
|
|
108
112
|
@crawler.base_url "danielnc.com"
|
109
113
|
@crawler.path "/itens"
|
110
114
|
|
111
|
-
@crawler_instance.
|
112
|
-
arg[:base_url].
|
113
|
-
arg[:path].
|
115
|
+
expect(@crawler_instance).to receive(:parse) do |arg|
|
116
|
+
expect(arg[:base_url]).to eq("danielnc.com")
|
117
|
+
expect(arg[:path]).to eq("/itens/1")
|
114
118
|
end
|
115
119
|
|
116
120
|
@crawler_instance.crawl do
|
@@ -119,44 +123,59 @@ describe Wombat::Crawler do
|
|
119
123
|
|
120
124
|
another_instance = @crawler.new
|
121
125
|
|
122
|
-
another_instance.
|
123
|
-
arg[:base_url].
|
124
|
-
arg[:path].
|
126
|
+
expect(another_instance).to receive(:parse) do |arg|
|
127
|
+
expect(arg[:base_url]).to eq("danielnc.com")
|
128
|
+
expect(arg[:path]).to eq("/itens")
|
125
129
|
end
|
126
130
|
|
127
131
|
another_instance.crawl
|
128
132
|
end
|
129
133
|
|
134
|
+
it 'should crawl with url and block' do
|
135
|
+
url = 'http://danielinc.com/itens'
|
136
|
+
|
137
|
+
expect(@crawler_instance).to receive(:parse).with(anything, url)
|
138
|
+
@crawler_instance.crawl(url) do
|
139
|
+
end
|
140
|
+
|
141
|
+
another_instance = @crawler.new
|
142
|
+
expect(another_instance).to receive(:parse).with(anything, url)
|
143
|
+
|
144
|
+
another_instance.crawl(url)
|
145
|
+
end
|
146
|
+
|
130
147
|
it 'should remove created method missing' do
|
131
148
|
@crawler.base_url "danielnc.com"
|
132
149
|
@crawler.path "/itens"
|
133
150
|
|
134
|
-
@crawler_instance.
|
135
|
-
arg[:base_url].
|
136
|
-
arg[:path].
|
151
|
+
expect(@crawler_instance).to receive(:parse) do |arg|
|
152
|
+
expect(arg[:base_url]).to eq("danielnc.com")
|
153
|
+
expect(arg[:path]).to eq("/itens/1")
|
137
154
|
end
|
138
155
|
|
139
156
|
@crawler_instance.crawl do
|
140
157
|
path "/itens/1"
|
141
158
|
end
|
142
159
|
|
143
|
-
lambda {
|
160
|
+
expect(lambda {
|
161
|
+
@craler_intance.undefined_method
|
162
|
+
}).to raise_error(NoMethodError)
|
144
163
|
end
|
145
164
|
|
146
165
|
it 'should remove created instance variable' do
|
147
166
|
@crawler.base_url "danielnc.com"
|
148
167
|
@crawler.path "/itens"
|
149
168
|
|
150
|
-
@crawler_instance.
|
151
|
-
arg[:base_url].
|
152
|
-
arg[:path].
|
169
|
+
expect(@crawler_instance).to receive(:parse) do |arg|
|
170
|
+
expect(arg[:base_url]).to eq("danielnc.com")
|
171
|
+
expect(arg[:path]).to eq("/itens/1")
|
153
172
|
end
|
154
173
|
|
155
174
|
@crawler_instance.crawl do
|
156
175
|
path "/itens/1"
|
157
176
|
end
|
158
177
|
|
159
|
-
@crawler_instance.instance_variables.index(:@metadata_dup).
|
178
|
+
expect(@crawler_instance.instance_variables.index(:@metadata_dup)).to be_nil
|
160
179
|
end
|
161
180
|
|
162
181
|
context "response code" do
|
@@ -169,7 +188,7 @@ describe Wombat::Crawler do
|
|
169
188
|
@crawler.search "css=.btn-search"
|
170
189
|
|
171
190
|
@crawler_instance.crawl
|
172
|
-
@crawler_instance.response_code.
|
191
|
+
expect(@crawler_instance.response_code).to be(200)
|
173
192
|
end
|
174
193
|
|
175
194
|
end
|
@@ -183,7 +202,7 @@ describe Wombat::Crawler do
|
|
183
202
|
@crawler.document_format :xml
|
184
203
|
|
185
204
|
@crawler_instance.crawl
|
186
|
-
@crawler_instance.response_code.
|
205
|
+
expect(@crawler_instance.response_code).to be(200)
|
187
206
|
end
|
188
207
|
end
|
189
208
|
|
@@ -195,8 +214,9 @@ describe Wombat::Crawler do
|
|
195
214
|
|
196
215
|
@crawler.search "css=.btn-search"
|
197
216
|
|
198
|
-
lambda { @crawler_instance.crawl }.
|
199
|
-
|
217
|
+
expect(lambda { @crawler_instance.crawl }).to raise_error(
|
218
|
+
"404 => Net::HTTPNotFound for http://www.terra.com.br/portal/ -- unhandled response")
|
219
|
+
expect(@crawler_instance.response_code).to be(404)
|
200
220
|
end
|
201
221
|
end
|
202
222
|
|
@@ -208,8 +228,10 @@ describe Wombat::Crawler do
|
|
208
228
|
|
209
229
|
@crawler.search "css=.btn-search"
|
210
230
|
@crawler.document_format :xml
|
211
|
-
lambda {
|
212
|
-
|
231
|
+
expect(lambda {
|
232
|
+
@crawler_instance.crawl
|
233
|
+
}).to raise_error(RestClient::ResourceNotFound)
|
234
|
+
expect(@crawler_instance.response_code).to be(404)
|
213
235
|
end
|
214
236
|
end
|
215
237
|
end
|
@@ -217,8 +239,8 @@ describe Wombat::Crawler do
|
|
217
239
|
|
218
240
|
describe '#scrape' do
|
219
241
|
it 'should alias to crawl' do
|
220
|
-
@crawler_instance.
|
242
|
+
expect(@crawler_instance).to receive :parse
|
221
243
|
@crawler_instance.scrape
|
222
244
|
end
|
223
245
|
end
|
224
|
-
end
|
246
|
+
end
|
@@ -0,0 +1,63 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
require 'spec_helper'
|
3
|
+
|
4
|
+
describe 'crawler base and one derived class' do
|
5
|
+
class A
|
6
|
+
include Wombat::Crawler
|
7
|
+
title 'xpath=//head/title'
|
8
|
+
end
|
9
|
+
|
10
|
+
class B < A
|
11
|
+
base_url "http://www.terra.com.br"
|
12
|
+
path "/portal"
|
13
|
+
search "css=.btn-search"
|
14
|
+
end
|
15
|
+
|
16
|
+
it 'extracts properties defined in the base class ' do
|
17
|
+
VCR.use_cassette('basic_crawler_page') do
|
18
|
+
b = B.new
|
19
|
+
data = b.crawl
|
20
|
+
expect(data).to have_key('title')
|
21
|
+
expect(data).to have_key('search')
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
describe 'two derived classes' do
|
27
|
+
class D
|
28
|
+
include Wombat::Crawler
|
29
|
+
title 'xpath=//head/title'
|
30
|
+
end
|
31
|
+
|
32
|
+
class E < D
|
33
|
+
base_url "http://www.terra.com.br"
|
34
|
+
path "/portal"
|
35
|
+
search "css=.btn-search"
|
36
|
+
end
|
37
|
+
|
38
|
+
class F < D
|
39
|
+
title 'xpath=//broken/badly'
|
40
|
+
base_url "https://www.github.com"
|
41
|
+
path "/"
|
42
|
+
end
|
43
|
+
|
44
|
+
|
45
|
+
it 'second derived class does not overwrite base class properties' do
|
46
|
+
VCR.use_cassette('basic_crawler_page') do
|
47
|
+
e = E.new
|
48
|
+
data = e.crawl
|
49
|
+
expect(data).to have_key('title')
|
50
|
+
expect(data).to have_key('search')
|
51
|
+
expect(data['title']).to eq('Terra - Notícias, vídeos, esportes, economia, diversão, música, moda, fotolog, blog, chat')
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
it 'first derived class does not overwrite base class properties' do
|
56
|
+
VCR.use_cassette('follow_links') do
|
57
|
+
f = F.new
|
58
|
+
data = f.crawl
|
59
|
+
expect(data).to have_key('title')
|
60
|
+
expect(data['title']).to be_nil
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
@@ -273,4 +273,34 @@ describe 'basic crawler setup' do
|
|
273
273
|
}
|
274
274
|
end
|
275
275
|
end
|
276
|
+
|
277
|
+
it 'should make post requests if needed' do
|
278
|
+
VCR.use_cassette('make_post_request') do
|
279
|
+
data = { your_name: "Name" }
|
280
|
+
crawler = Class.new
|
281
|
+
crawler.send(:include, Wombat::Crawler)
|
282
|
+
crawler.base_url "http://hroch486.icpf.cas.cz"
|
283
|
+
crawler.path "/cgi-bin/echo.pl"
|
284
|
+
crawler.http_method :post
|
285
|
+
crawler.data data
|
286
|
+
|
287
|
+
crawler.my_name 'css=ul:last li:last'
|
288
|
+
|
289
|
+
crawler_instance = crawler.new
|
290
|
+
results = crawler_instance.crawl
|
291
|
+
|
292
|
+
results["my_name"].should eq("your_name = Name")
|
293
|
+
end
|
294
|
+
end
|
295
|
+
|
296
|
+
it 'should let the url be passed as an argument to crawl' do
|
297
|
+
VCR.use_cassette('basic_crawler_page') do
|
298
|
+
crawler = Class.new
|
299
|
+
crawler.send(:include, Wombat::Crawler)
|
300
|
+
crawler.send(:title, 'xpath=//head/title')
|
301
|
+
crawler_instance = crawler.new
|
302
|
+
results = crawler_instance.crawl('http://www.terra.com.br/portal')
|
303
|
+
results['title'].should eq('Terra - Notícias, vídeos, esportes, economia, diversão, música, moda, fotolog, blog, chat')
|
304
|
+
end
|
305
|
+
end
|
276
306
|
end
|
@@ -17,11 +17,46 @@ describe Wombat::Processing::Parser do
|
|
17
17
|
fake_document.should_receive(:parser).and_return(fake_parser)
|
18
18
|
fake_document.should_receive(:header).and_return(fake_header)
|
19
19
|
fake_parser.should_receive(:headers=)
|
20
|
+
fake_parser.should_receive(:mechanize_page=)
|
20
21
|
@parser.mechanize.should_receive(:get).with("http://www.google.com/search").and_return fake_document
|
21
22
|
|
22
23
|
@parser.parse @metadata
|
23
24
|
end
|
24
25
|
|
26
|
+
it 'should able to make post requests' do
|
27
|
+
data = { your_name: "Name" }
|
28
|
+
@metadata.base_url "http://hroch486.icpf.cas.cz"
|
29
|
+
@metadata.path "/cgi-bin/echo.pl"
|
30
|
+
@metadata.http_method :post
|
31
|
+
@metadata.data data
|
32
|
+
|
33
|
+
fake_document = double :document
|
34
|
+
fake_parser = double :parser
|
35
|
+
fake_header = double :header
|
36
|
+
fake_document.should_receive(:parser).and_return(fake_parser)
|
37
|
+
fake_document.should_receive(:header).and_return(fake_header)
|
38
|
+
fake_parser.should_receive(:headers=)
|
39
|
+
fake_parser.should_receive(:mechanize_page=)
|
40
|
+
@parser.mechanize.should_receive(:post).with("http://hroch486.icpf.cas.cz/cgi-bin/echo.pl", data).and_return fake_document
|
41
|
+
|
42
|
+
@parser.parse @metadata
|
43
|
+
end
|
44
|
+
|
45
|
+
it 'should accept the url as an argument to parse' do
|
46
|
+
@metadata.http_method :get
|
47
|
+
|
48
|
+
fake_document = double :document
|
49
|
+
fake_parser = double :parser
|
50
|
+
fake_header = double :header
|
51
|
+
fake_document.should_receive(:parser).and_return(fake_parser)
|
52
|
+
fake_document.should_receive(:header).and_return(fake_header)
|
53
|
+
fake_parser.should_receive(:headers=)
|
54
|
+
fake_parser.should_receive(:mechanize_page=)
|
55
|
+
@parser.mechanize.should_receive(:get).with("https://www.github.com/notifications").and_return fake_document
|
56
|
+
|
57
|
+
@parser.parse(@metadata, 'https://www.github.com/notifications')
|
58
|
+
end
|
59
|
+
|
25
60
|
it 'should correctly parse xml documents' do
|
26
61
|
fake_document = double :xml
|
27
62
|
fake_parser = double :parser
|
@@ -1,18 +1,18 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
|
3
3
|
describe Wombat::Property::Locators::Factory do
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
4
|
+
it 'should instantiate correct locator according to property type' do
|
5
|
+
Wombat::Property::Locators::Factory.locator_for(Wombat::DSL::Property.new(nil, nil, :text)).should be_a(Wombat::Property::Locators::Text)
|
6
|
+
Wombat::Property::Locators::Factory.locator_for(Wombat::DSL::Property.new(nil, nil, :html)).should be_a(Wombat::Property::Locators::Html)
|
7
|
+
Wombat::Property::Locators::Factory.locator_for(Wombat::DSL::Property.new(nil, nil, :list)).should be_a(Wombat::Property::Locators::List)
|
8
|
+
Wombat::Property::Locators::Factory.locator_for(Wombat::DSL::Property.new(nil, nil, :follow)).should be_a(Wombat::Property::Locators::Follow)
|
9
|
+
Wombat::Property::Locators::Factory.locator_for(Wombat::DSL::Property.new(nil, nil, :iterator)).should be_a(Wombat::Property::Locators::Iterator)
|
10
|
+
Wombat::Property::Locators::Factory.locator_for(Wombat::DSL::Property.new(nil, nil, :container)).should be_a(Wombat::Property::Locators::PropertyGroup)
|
11
|
+
end
|
12
12
|
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
end
|
13
|
+
it 'should raise correct exception if provided property is of unknown type' do
|
14
|
+
lambda {
|
15
|
+
Wombat::Property::Locators::Factory.locator_for(Wombat::DSL::Property.new(nil, nil, :weird))
|
16
|
+
}.should raise_error(Wombat::Property::Locators::UnknownTypeException, "Unknown property format weird.")
|
17
|
+
end
|
18
|
+
end
|
@@ -1,26 +1,26 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
|
3
3
|
describe Wombat::Property::Locators::Html do
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
4
|
+
it 'should locate html property' do
|
5
|
+
fake_elem = double :element
|
6
|
+
context = double :context
|
7
|
+
fake_elem.stub inner_html: "Something cool "
|
8
|
+
context.stub(:xpath).with("/abc", nil).and_return [fake_elem]
|
9
|
+
property = Wombat::DSL::Property.new('data1', 'xpath=/abc', :html)
|
10
10
|
|
11
|
-
|
11
|
+
locator = Wombat::Property::Locators::Html.new(property)
|
12
12
|
|
13
|
-
|
14
|
-
|
13
|
+
locator.locate(context).should == { "data1" => "Something cool" }
|
14
|
+
end
|
15
15
|
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
16
|
+
it 'should return null if the property cannot be found' do
|
17
|
+
fake_elem = double :element
|
18
|
+
context = double :context
|
19
|
+
context.stub(:xpath).with("/abc", nil).and_return []
|
20
|
+
property = Wombat::DSL::Property.new('data1', 'xpath=/abc', :html)
|
21
21
|
|
22
|
-
|
22
|
+
locator = Wombat::Property::Locators::Html.new(property)
|
23
23
|
|
24
|
-
|
25
|
-
|
26
|
-
end
|
24
|
+
locator.locate(context).should == { "data1" => nil }
|
25
|
+
end
|
26
|
+
end
|
@@ -1,13 +1,13 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
|
3
3
|
describe Wombat::Property::Locators::List do
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
4
|
+
it 'should locate a list of nodes' do
|
5
|
+
context = double :context
|
6
|
+
context.stub(:css).with(".selector").and_return %w(1 2 3 4 5)
|
7
|
+
property = Wombat::DSL::Property.new('data1', 'css=.selector', :list)
|
8
8
|
|
9
|
-
|
9
|
+
locator = Wombat::Property::Locators::List.new(property)
|
10
10
|
|
11
|
-
|
12
|
-
|
13
|
-
end
|
11
|
+
locator.locate(context).should == { "data1" => %w(1 2 3 4 5) }
|
12
|
+
end
|
13
|
+
end
|