wombat 2.4.0 → 2.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +2 -0
- data/CHANGELOG.md +67 -0
- data/Gemfile.lock +52 -50
- data/README.md +4 -4
- data/VERSION +1 -1
- data/examples/iterator.rb +25 -25
- data/examples/list.rb +22 -22
- data/examples/xml.rb +15 -15
- data/fixtures/vcr_cassettes/make_post_request.yml +69 -0
- data/lib/wombat/crawler.rb +13 -7
- data/lib/wombat/dsl/metadata.rb +8 -0
- data/lib/wombat/processing/parser.rb +22 -7
- data/lib/wombat/property/locators/factory.rb +28 -28
- data/lib/wombat/property/locators/follow.rb +15 -38
- data/lib/wombat/property/locators/list.rb +13 -13
- data/lib/wombat.rb +8 -8
- data/spec/crawler_spec.rb +64 -42
- data/spec/integration/crawler_inheritance_spec.rb +63 -0
- data/spec/integration/integration_spec.rb +30 -0
- data/spec/processing/parser_spec.rb +35 -0
- data/spec/property/locators/factory_spec.rb +14 -14
- data/spec/property/locators/html_spec.rb +18 -18
- data/spec/property/locators/list_spec.rb +8 -8
- data/spec/property/locators/text_spec.rb +44 -44
- data/spec/wombat_spec.rb +36 -36
- data/wombat.gemspec +7 -3
- metadata +6 -2
@@ -11,32 +11,32 @@ require 'wombat/property/locators/headers'
|
|
11
11
|
class Wombat::Property::Locators::UnknownTypeException < Exception; end;
|
12
12
|
|
13
13
|
module Wombat
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
14
|
+
module Property
|
15
|
+
module Locators
|
16
|
+
module Factory
|
17
|
+
def self.locator_for(property)
|
18
|
+
klass = case(property.wombat_property_format)
|
19
|
+
when :text
|
20
|
+
Text
|
21
|
+
when :list
|
22
|
+
List
|
23
|
+
when :html
|
24
|
+
Html
|
25
|
+
when :iterator
|
26
|
+
Iterator
|
27
|
+
when :container
|
28
|
+
PropertyGroup
|
29
|
+
when :follow
|
30
|
+
Follow
|
31
|
+
when :headers
|
32
|
+
Headers
|
33
|
+
else
|
34
|
+
raise Wombat::Property::Locators::UnknownTypeException.new("Unknown property format #{property.format}.")
|
35
|
+
end
|
36
36
|
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
end
|
37
|
+
klass.new(property)
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
@@ -1,46 +1,23 @@
|
|
1
1
|
#coding: utf-8
|
2
2
|
|
3
3
|
module Wombat
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
4
|
+
module Property
|
5
|
+
module Locators
|
6
|
+
class Follow < Base
|
7
|
+
def locate(context, page = nil)
|
8
|
+
super do
|
9
9
|
locate_nodes(context).flat_map do |node|
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
# absolute.
|
16
|
-
# So, let wombat try that approach when
|
17
|
-
# loading relative link fails.
|
18
|
-
#
|
19
|
-
target_page = page.click node
|
20
|
-
context = target_page.parser
|
10
|
+
mechanize_page = context.mechanize_page
|
11
|
+
link = Mechanize::Page::Link.new(node, page, mechanize_page)
|
12
|
+
target_page = page.click link
|
13
|
+
context = target_page.parser
|
14
|
+
context.mechanize_page = mechanize_page
|
21
15
|
|
22
|
-
|
23
|
-
rescue Mechanize::ResponseCodeError => e
|
24
|
-
# Either the page is unavailable, or
|
25
|
-
# the link is mistakenly relative
|
26
|
-
#
|
27
|
-
raise e if retried
|
28
|
-
|
29
|
-
# Give it a try first time
|
30
|
-
href = node.attributes && node.attributes["href"]
|
31
|
-
if href.respond_to? :value
|
32
|
-
href.value = '/' + href.value unless
|
33
|
-
href.value.start_with? '/'
|
34
|
-
retried = true
|
35
|
-
retry
|
36
|
-
else
|
37
|
-
raise e
|
38
|
-
end
|
39
|
-
end
|
16
|
+
filter_properties(context, page)
|
40
17
|
end
|
41
18
|
end
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
46
23
|
end
|
@@ -1,17 +1,17 @@
|
|
1
1
|
#coding: utf-8
|
2
2
|
|
3
3
|
module Wombat
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
4
|
+
module Property
|
5
|
+
module Locators
|
6
|
+
class List < Base
|
7
|
+
def locate(context, page = nil)
|
8
|
+
super do
|
9
|
+
locate_nodes(context).map do |n|
|
10
|
+
n.is_a?(String) ? n.strip : n.inner_text.strip
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
16
|
end
|
17
|
-
end
|
17
|
+
end
|
data/lib/wombat.rb
CHANGED
@@ -3,15 +3,15 @@
|
|
3
3
|
require 'wombat/crawler'
|
4
4
|
|
5
5
|
module Wombat
|
6
|
-
|
6
|
+
class << self
|
7
7
|
|
8
8
|
attr_reader :proxy_args, :user_agent, :user_agent_alias
|
9
9
|
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
10
|
+
def crawl(&block)
|
11
|
+
klass = Class.new
|
12
|
+
klass.send(:include, Wombat::Crawler)
|
13
|
+
klass.new.crawl(&block)
|
14
|
+
end
|
15
15
|
|
16
16
|
def configure
|
17
17
|
yield self
|
@@ -29,6 +29,6 @@ module Wombat
|
|
29
29
|
@user_agent_alias = user_agent_alias
|
30
30
|
end
|
31
31
|
|
32
|
-
|
33
|
-
|
32
|
+
alias_method :scrape, :crawl
|
33
|
+
end
|
34
34
|
end
|
data/spec/crawler_spec.rb
CHANGED
@@ -13,12 +13,12 @@ describe Wombat::Crawler do
|
|
13
13
|
|
14
14
|
@crawler.event { event_called = true }
|
15
15
|
|
16
|
-
event_called.
|
16
|
+
expect(event_called).to eq(true)
|
17
17
|
end
|
18
18
|
|
19
19
|
it 'should provide metadata to yielded block' do
|
20
20
|
@crawler.event do
|
21
|
-
self.class.should
|
21
|
+
self.class.should eq(Wombat::DSL::PropertyGroup)
|
22
22
|
end
|
23
23
|
end
|
24
24
|
|
@@ -36,11 +36,11 @@ describe Wombat::Crawler do
|
|
36
36
|
|
37
37
|
@crawler.location { |v| v.latitude -50.2323 }
|
38
38
|
|
39
|
-
@crawler_instance.
|
40
|
-
arg["event"]["title"].selector.
|
41
|
-
arg["event"]["time"].selector.to_s.
|
42
|
-
arg["venue"]["name"].selector.
|
43
|
-
arg["location"]["latitude"].selector.
|
39
|
+
expect(@crawler_instance).to receive(:parse) do |arg|
|
40
|
+
expect(arg["event"]["title"].selector).to eq("Fulltronic Dezembro")
|
41
|
+
expect(arg["event"]["time"].selector.to_s).to eq(time.to_s)
|
42
|
+
expect(arg["venue"]["name"].selector).to eq("Scooba")
|
43
|
+
expect(arg["location"]["latitude"].selector).to eq(-50.2323)
|
44
44
|
end
|
45
45
|
|
46
46
|
@crawler_instance.crawl
|
@@ -52,24 +52,28 @@ describe Wombat::Crawler do
|
|
52
52
|
another_crawler_instance = another_crawler.new
|
53
53
|
|
54
54
|
another_crawler.event { |e| e.title 'Ibiza' }
|
55
|
-
another_crawler_instance.
|
55
|
+
expect(another_crawler_instance).to receive(:parse) { |arg|
|
56
|
+
expect(arg["event"]["title"].selector).to eq("Ibiza")
|
57
|
+
}
|
56
58
|
another_crawler_instance.crawl
|
57
59
|
|
58
60
|
@crawler.event { |e| e.title 'Fulltronic Dezembro' }
|
59
|
-
@crawler_instance.
|
61
|
+
expect(@crawler_instance).to receive(:parse) { |arg|
|
62
|
+
expect(arg["event"]["title"].selector).to eq("Fulltronic Dezembro")
|
63
|
+
}
|
60
64
|
@crawler_instance.crawl
|
61
65
|
end
|
62
66
|
|
63
67
|
it 'should be able to assign arbitrary plain text metadata' do
|
64
68
|
@crawler.some_data("/event/list", :html, "geo") { |p| true }
|
65
69
|
|
66
|
-
@crawler_instance.
|
70
|
+
expect(@crawler_instance).to receive(:parse) do |arg|
|
67
71
|
prop = arg['some_data']
|
68
|
-
prop.wombat_property_name.
|
69
|
-
prop.selector.
|
70
|
-
prop.format.
|
71
|
-
prop.namespaces.
|
72
|
-
prop.callback.
|
72
|
+
expect(prop.wombat_property_name).to eq("some_data")
|
73
|
+
expect(prop.selector).to eq("/event/list")
|
74
|
+
expect(prop.format).to eq(:html)
|
75
|
+
expect(prop.namespaces).to eq("geo")
|
76
|
+
expect(prop.callback).to_not eq(nil)
|
73
77
|
end
|
74
78
|
|
75
79
|
@crawler_instance.crawl
|
@@ -84,9 +88,9 @@ describe Wombat::Crawler do
|
|
84
88
|
another "css=.information"
|
85
89
|
end
|
86
90
|
|
87
|
-
@crawler_instance.
|
88
|
-
arg["structure"]["data"].selector.
|
89
|
-
arg["structure"]["another"].selector.
|
91
|
+
expect(@crawler_instance).to receive(:parse) do |arg|
|
92
|
+
expect(arg["structure"]["data"].selector).to eq("xpath=/xyz")
|
93
|
+
expect(arg["structure"]["another"].selector).to eq("css=.information")
|
90
94
|
end
|
91
95
|
|
92
96
|
@crawler_instance.crawl
|
@@ -97,8 +101,8 @@ describe Wombat::Crawler do
|
|
97
101
|
end
|
98
102
|
|
99
103
|
it 'should assign metadata format' do
|
100
|
-
@crawler_instance.
|
101
|
-
arg[:document_format].
|
104
|
+
expect(@crawler_instance).to receive(:parse) do |arg|
|
105
|
+
expect(arg[:document_format]).to eq(:xml)
|
102
106
|
end
|
103
107
|
@crawler.document_format :xml
|
104
108
|
@crawler_instance.crawl
|
@@ -108,9 +112,9 @@ describe Wombat::Crawler do
|
|
108
112
|
@crawler.base_url "danielnc.com"
|
109
113
|
@crawler.path "/itens"
|
110
114
|
|
111
|
-
@crawler_instance.
|
112
|
-
arg[:base_url].
|
113
|
-
arg[:path].
|
115
|
+
expect(@crawler_instance).to receive(:parse) do |arg|
|
116
|
+
expect(arg[:base_url]).to eq("danielnc.com")
|
117
|
+
expect(arg[:path]).to eq("/itens/1")
|
114
118
|
end
|
115
119
|
|
116
120
|
@crawler_instance.crawl do
|
@@ -119,44 +123,59 @@ describe Wombat::Crawler do
|
|
119
123
|
|
120
124
|
another_instance = @crawler.new
|
121
125
|
|
122
|
-
another_instance.
|
123
|
-
arg[:base_url].
|
124
|
-
arg[:path].
|
126
|
+
expect(another_instance).to receive(:parse) do |arg|
|
127
|
+
expect(arg[:base_url]).to eq("danielnc.com")
|
128
|
+
expect(arg[:path]).to eq("/itens")
|
125
129
|
end
|
126
130
|
|
127
131
|
another_instance.crawl
|
128
132
|
end
|
129
133
|
|
134
|
+
it 'should crawl with url and block' do
|
135
|
+
url = 'http://danielinc.com/itens'
|
136
|
+
|
137
|
+
expect(@crawler_instance).to receive(:parse).with(anything, url)
|
138
|
+
@crawler_instance.crawl(url) do
|
139
|
+
end
|
140
|
+
|
141
|
+
another_instance = @crawler.new
|
142
|
+
expect(another_instance).to receive(:parse).with(anything, url)
|
143
|
+
|
144
|
+
another_instance.crawl(url)
|
145
|
+
end
|
146
|
+
|
130
147
|
it 'should remove created method missing' do
|
131
148
|
@crawler.base_url "danielnc.com"
|
132
149
|
@crawler.path "/itens"
|
133
150
|
|
134
|
-
@crawler_instance.
|
135
|
-
arg[:base_url].
|
136
|
-
arg[:path].
|
151
|
+
expect(@crawler_instance).to receive(:parse) do |arg|
|
152
|
+
expect(arg[:base_url]).to eq("danielnc.com")
|
153
|
+
expect(arg[:path]).to eq("/itens/1")
|
137
154
|
end
|
138
155
|
|
139
156
|
@crawler_instance.crawl do
|
140
157
|
path "/itens/1"
|
141
158
|
end
|
142
159
|
|
143
|
-
lambda {
|
160
|
+
expect(lambda {
|
161
|
+
@craler_intance.undefined_method
|
162
|
+
}).to raise_error(NoMethodError)
|
144
163
|
end
|
145
164
|
|
146
165
|
it 'should remove created instance variable' do
|
147
166
|
@crawler.base_url "danielnc.com"
|
148
167
|
@crawler.path "/itens"
|
149
168
|
|
150
|
-
@crawler_instance.
|
151
|
-
arg[:base_url].
|
152
|
-
arg[:path].
|
169
|
+
expect(@crawler_instance).to receive(:parse) do |arg|
|
170
|
+
expect(arg[:base_url]).to eq("danielnc.com")
|
171
|
+
expect(arg[:path]).to eq("/itens/1")
|
153
172
|
end
|
154
173
|
|
155
174
|
@crawler_instance.crawl do
|
156
175
|
path "/itens/1"
|
157
176
|
end
|
158
177
|
|
159
|
-
@crawler_instance.instance_variables.index(:@metadata_dup).
|
178
|
+
expect(@crawler_instance.instance_variables.index(:@metadata_dup)).to be_nil
|
160
179
|
end
|
161
180
|
|
162
181
|
context "response code" do
|
@@ -169,7 +188,7 @@ describe Wombat::Crawler do
|
|
169
188
|
@crawler.search "css=.btn-search"
|
170
189
|
|
171
190
|
@crawler_instance.crawl
|
172
|
-
@crawler_instance.response_code.
|
191
|
+
expect(@crawler_instance.response_code).to be(200)
|
173
192
|
end
|
174
193
|
|
175
194
|
end
|
@@ -183,7 +202,7 @@ describe Wombat::Crawler do
|
|
183
202
|
@crawler.document_format :xml
|
184
203
|
|
185
204
|
@crawler_instance.crawl
|
186
|
-
@crawler_instance.response_code.
|
205
|
+
expect(@crawler_instance.response_code).to be(200)
|
187
206
|
end
|
188
207
|
end
|
189
208
|
|
@@ -195,8 +214,9 @@ describe Wombat::Crawler do
|
|
195
214
|
|
196
215
|
@crawler.search "css=.btn-search"
|
197
216
|
|
198
|
-
lambda { @crawler_instance.crawl }.
|
199
|
-
|
217
|
+
expect(lambda { @crawler_instance.crawl }).to raise_error(
|
218
|
+
"404 => Net::HTTPNotFound for http://www.terra.com.br/portal/ -- unhandled response")
|
219
|
+
expect(@crawler_instance.response_code).to be(404)
|
200
220
|
end
|
201
221
|
end
|
202
222
|
|
@@ -208,8 +228,10 @@ describe Wombat::Crawler do
|
|
208
228
|
|
209
229
|
@crawler.search "css=.btn-search"
|
210
230
|
@crawler.document_format :xml
|
211
|
-
lambda {
|
212
|
-
|
231
|
+
expect(lambda {
|
232
|
+
@crawler_instance.crawl
|
233
|
+
}).to raise_error(RestClient::ResourceNotFound)
|
234
|
+
expect(@crawler_instance.response_code).to be(404)
|
213
235
|
end
|
214
236
|
end
|
215
237
|
end
|
@@ -217,8 +239,8 @@ describe Wombat::Crawler do
|
|
217
239
|
|
218
240
|
describe '#scrape' do
|
219
241
|
it 'should alias to crawl' do
|
220
|
-
@crawler_instance.
|
242
|
+
expect(@crawler_instance).to receive :parse
|
221
243
|
@crawler_instance.scrape
|
222
244
|
end
|
223
245
|
end
|
224
|
-
end
|
246
|
+
end
|
@@ -0,0 +1,63 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
require 'spec_helper'
|
3
|
+
|
4
|
+
describe 'crawler base and one derived class' do
|
5
|
+
class A
|
6
|
+
include Wombat::Crawler
|
7
|
+
title 'xpath=//head/title'
|
8
|
+
end
|
9
|
+
|
10
|
+
class B < A
|
11
|
+
base_url "http://www.terra.com.br"
|
12
|
+
path "/portal"
|
13
|
+
search "css=.btn-search"
|
14
|
+
end
|
15
|
+
|
16
|
+
it 'extracts properties defined in the base class ' do
|
17
|
+
VCR.use_cassette('basic_crawler_page') do
|
18
|
+
b = B.new
|
19
|
+
data = b.crawl
|
20
|
+
expect(data).to have_key('title')
|
21
|
+
expect(data).to have_key('search')
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
describe 'two derived classes' do
|
27
|
+
class D
|
28
|
+
include Wombat::Crawler
|
29
|
+
title 'xpath=//head/title'
|
30
|
+
end
|
31
|
+
|
32
|
+
class E < D
|
33
|
+
base_url "http://www.terra.com.br"
|
34
|
+
path "/portal"
|
35
|
+
search "css=.btn-search"
|
36
|
+
end
|
37
|
+
|
38
|
+
class F < D
|
39
|
+
title 'xpath=//broken/badly'
|
40
|
+
base_url "https://www.github.com"
|
41
|
+
path "/"
|
42
|
+
end
|
43
|
+
|
44
|
+
|
45
|
+
it 'second derived class does not overwrite base class properties' do
|
46
|
+
VCR.use_cassette('basic_crawler_page') do
|
47
|
+
e = E.new
|
48
|
+
data = e.crawl
|
49
|
+
expect(data).to have_key('title')
|
50
|
+
expect(data).to have_key('search')
|
51
|
+
expect(data['title']).to eq('Terra - Notícias, vídeos, esportes, economia, diversão, música, moda, fotolog, blog, chat')
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
it 'first derived class does not overwrite base class properties' do
|
56
|
+
VCR.use_cassette('follow_links') do
|
57
|
+
f = F.new
|
58
|
+
data = f.crawl
|
59
|
+
expect(data).to have_key('title')
|
60
|
+
expect(data['title']).to be_nil
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
@@ -273,4 +273,34 @@ describe 'basic crawler setup' do
|
|
273
273
|
}
|
274
274
|
end
|
275
275
|
end
|
276
|
+
|
277
|
+
it 'should make post requests if needed' do
|
278
|
+
VCR.use_cassette('make_post_request') do
|
279
|
+
data = { your_name: "Name" }
|
280
|
+
crawler = Class.new
|
281
|
+
crawler.send(:include, Wombat::Crawler)
|
282
|
+
crawler.base_url "http://hroch486.icpf.cas.cz"
|
283
|
+
crawler.path "/cgi-bin/echo.pl"
|
284
|
+
crawler.http_method :post
|
285
|
+
crawler.data data
|
286
|
+
|
287
|
+
crawler.my_name 'css=ul:last li:last'
|
288
|
+
|
289
|
+
crawler_instance = crawler.new
|
290
|
+
results = crawler_instance.crawl
|
291
|
+
|
292
|
+
results["my_name"].should eq("your_name = Name")
|
293
|
+
end
|
294
|
+
end
|
295
|
+
|
296
|
+
it 'should let the url be passed as an argument to crawl' do
|
297
|
+
VCR.use_cassette('basic_crawler_page') do
|
298
|
+
crawler = Class.new
|
299
|
+
crawler.send(:include, Wombat::Crawler)
|
300
|
+
crawler.send(:title, 'xpath=//head/title')
|
301
|
+
crawler_instance = crawler.new
|
302
|
+
results = crawler_instance.crawl('http://www.terra.com.br/portal')
|
303
|
+
results['title'].should eq('Terra - Notícias, vídeos, esportes, economia, diversão, música, moda, fotolog, blog, chat')
|
304
|
+
end
|
305
|
+
end
|
276
306
|
end
|
@@ -17,11 +17,46 @@ describe Wombat::Processing::Parser do
|
|
17
17
|
fake_document.should_receive(:parser).and_return(fake_parser)
|
18
18
|
fake_document.should_receive(:header).and_return(fake_header)
|
19
19
|
fake_parser.should_receive(:headers=)
|
20
|
+
fake_parser.should_receive(:mechanize_page=)
|
20
21
|
@parser.mechanize.should_receive(:get).with("http://www.google.com/search").and_return fake_document
|
21
22
|
|
22
23
|
@parser.parse @metadata
|
23
24
|
end
|
24
25
|
|
26
|
+
it 'should able to make post requests' do
|
27
|
+
data = { your_name: "Name" }
|
28
|
+
@metadata.base_url "http://hroch486.icpf.cas.cz"
|
29
|
+
@metadata.path "/cgi-bin/echo.pl"
|
30
|
+
@metadata.http_method :post
|
31
|
+
@metadata.data data
|
32
|
+
|
33
|
+
fake_document = double :document
|
34
|
+
fake_parser = double :parser
|
35
|
+
fake_header = double :header
|
36
|
+
fake_document.should_receive(:parser).and_return(fake_parser)
|
37
|
+
fake_document.should_receive(:header).and_return(fake_header)
|
38
|
+
fake_parser.should_receive(:headers=)
|
39
|
+
fake_parser.should_receive(:mechanize_page=)
|
40
|
+
@parser.mechanize.should_receive(:post).with("http://hroch486.icpf.cas.cz/cgi-bin/echo.pl", data).and_return fake_document
|
41
|
+
|
42
|
+
@parser.parse @metadata
|
43
|
+
end
|
44
|
+
|
45
|
+
it 'should accept the url as an argument to parse' do
|
46
|
+
@metadata.http_method :get
|
47
|
+
|
48
|
+
fake_document = double :document
|
49
|
+
fake_parser = double :parser
|
50
|
+
fake_header = double :header
|
51
|
+
fake_document.should_receive(:parser).and_return(fake_parser)
|
52
|
+
fake_document.should_receive(:header).and_return(fake_header)
|
53
|
+
fake_parser.should_receive(:headers=)
|
54
|
+
fake_parser.should_receive(:mechanize_page=)
|
55
|
+
@parser.mechanize.should_receive(:get).with("https://www.github.com/notifications").and_return fake_document
|
56
|
+
|
57
|
+
@parser.parse(@metadata, 'https://www.github.com/notifications')
|
58
|
+
end
|
59
|
+
|
25
60
|
it 'should correctly parse xml documents' do
|
26
61
|
fake_document = double :xml
|
27
62
|
fake_parser = double :parser
|
@@ -1,18 +1,18 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
|
3
3
|
describe Wombat::Property::Locators::Factory do
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
4
|
+
it 'should instantiate correct locator according to property type' do
|
5
|
+
Wombat::Property::Locators::Factory.locator_for(Wombat::DSL::Property.new(nil, nil, :text)).should be_a(Wombat::Property::Locators::Text)
|
6
|
+
Wombat::Property::Locators::Factory.locator_for(Wombat::DSL::Property.new(nil, nil, :html)).should be_a(Wombat::Property::Locators::Html)
|
7
|
+
Wombat::Property::Locators::Factory.locator_for(Wombat::DSL::Property.new(nil, nil, :list)).should be_a(Wombat::Property::Locators::List)
|
8
|
+
Wombat::Property::Locators::Factory.locator_for(Wombat::DSL::Property.new(nil, nil, :follow)).should be_a(Wombat::Property::Locators::Follow)
|
9
|
+
Wombat::Property::Locators::Factory.locator_for(Wombat::DSL::Property.new(nil, nil, :iterator)).should be_a(Wombat::Property::Locators::Iterator)
|
10
|
+
Wombat::Property::Locators::Factory.locator_for(Wombat::DSL::Property.new(nil, nil, :container)).should be_a(Wombat::Property::Locators::PropertyGroup)
|
11
|
+
end
|
12
12
|
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
end
|
13
|
+
it 'should raise correct exception if provided property is of unknown type' do
|
14
|
+
lambda {
|
15
|
+
Wombat::Property::Locators::Factory.locator_for(Wombat::DSL::Property.new(nil, nil, :weird))
|
16
|
+
}.should raise_error(Wombat::Property::Locators::UnknownTypeException, "Unknown property format weird.")
|
17
|
+
end
|
18
|
+
end
|
@@ -1,26 +1,26 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
|
3
3
|
describe Wombat::Property::Locators::Html do
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
4
|
+
it 'should locate html property' do
|
5
|
+
fake_elem = double :element
|
6
|
+
context = double :context
|
7
|
+
fake_elem.stub inner_html: "Something cool "
|
8
|
+
context.stub(:xpath).with("/abc", nil).and_return [fake_elem]
|
9
|
+
property = Wombat::DSL::Property.new('data1', 'xpath=/abc', :html)
|
10
10
|
|
11
|
-
|
11
|
+
locator = Wombat::Property::Locators::Html.new(property)
|
12
12
|
|
13
|
-
|
14
|
-
|
13
|
+
locator.locate(context).should == { "data1" => "Something cool" }
|
14
|
+
end
|
15
15
|
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
16
|
+
it 'should return null if the property cannot be found' do
|
17
|
+
fake_elem = double :element
|
18
|
+
context = double :context
|
19
|
+
context.stub(:xpath).with("/abc", nil).and_return []
|
20
|
+
property = Wombat::DSL::Property.new('data1', 'xpath=/abc', :html)
|
21
21
|
|
22
|
-
|
22
|
+
locator = Wombat::Property::Locators::Html.new(property)
|
23
23
|
|
24
|
-
|
25
|
-
|
26
|
-
end
|
24
|
+
locator.locate(context).should == { "data1" => nil }
|
25
|
+
end
|
26
|
+
end
|
@@ -1,13 +1,13 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
|
3
3
|
describe Wombat::Property::Locators::List do
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
4
|
+
it 'should locate a list of nodes' do
|
5
|
+
context = double :context
|
6
|
+
context.stub(:css).with(".selector").and_return %w(1 2 3 4 5)
|
7
|
+
property = Wombat::DSL::Property.new('data1', 'css=.selector', :list)
|
8
8
|
|
9
|
-
|
9
|
+
locator = Wombat::Property::Locators::List.new(property)
|
10
10
|
|
11
|
-
|
12
|
-
|
13
|
-
end
|
11
|
+
locator.locate(context).should == { "data1" => %w(1 2 3 4 5) }
|
12
|
+
end
|
13
|
+
end
|