wombat 2.5.0 → 2.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +1 -0
- data/Gemfile.lock +17 -15
- data/README.md +3 -3
- data/VERSION +1 -1
- data/fixtures/vcr_cassettes/follow_links_v2.yml +21850 -0
- data/lib/wombat/processing/parser.rb +3 -0
- data/spec/integration/integration_spec.rb +124 -0
- data/spec/sample_crawler_spec.rb +6 -5
- data/wombat.gemspec +5 -4
- metadata +4 -3
@@ -274,6 +274,130 @@ describe 'basic crawler setup' do
|
|
274
274
|
end
|
275
275
|
end
|
276
276
|
|
277
|
+
it 'should follow links - issue #53' do
|
278
|
+
VCR.use_cassette('follow_links_v2', :preserve_exact_body_bytes => true) do
|
279
|
+
result = Wombat.crawl do
|
280
|
+
base_url "http://www.icy-veins.com/"
|
281
|
+
path "heroes/hero-guides"
|
282
|
+
|
283
|
+
heroes "css=.page_content .nav_content_block_entry_heroes_hero", :iterator do
|
284
|
+
name "xpath=."
|
285
|
+
builds "xpath=./a", :follow do
|
286
|
+
title "css=h1"
|
287
|
+
end
|
288
|
+
end
|
289
|
+
end
|
290
|
+
|
291
|
+
expect(result).to eq(
|
292
|
+
{"heroes"=>
|
293
|
+
[{"name"=>"Abathur",
|
294
|
+
"builds"=>[{"title"=>"Abathur Build Guide “You. Enhanced. Improved.”"}]},
|
295
|
+
{"name"=>"Anub'arak",
|
296
|
+
"builds"=>[{"title"=>"Anub'arak Build Guide “Time is fleeting.”"}]},
|
297
|
+
{"name"=>"Artanis",
|
298
|
+
"builds"=>[{"title"=>"Artanis Build Guide “Direct my wrath.”"}]},
|
299
|
+
{"name"=>"Arthas",
|
300
|
+
"builds"=>[{"title"=>"Arthas Build Guide “Frostmourne hungers.”"}]},
|
301
|
+
{"name"=>"Azmodan",
|
302
|
+
"builds"=>[{"title"=>"Azmodan Build Guide “I shall rule alone!”"}]},
|
303
|
+
{"name"=>"Brightwing",
|
304
|
+
"builds"=>
|
305
|
+
[{"title"=>"Brightwing Build Guide “You don't want to be my enemy!”"}]},
|
306
|
+
{"name"=>"Chen",
|
307
|
+
"builds"=>[{"title"=>"Chen Build Guide “I bring Pandamonium!”"}]},
|
308
|
+
{"name"=>"Cho",
|
309
|
+
"builds"=>
|
310
|
+
[{"title"=>
|
311
|
+
"Cho Build Guide “This Nexus, all its power... it will be MINE!”"}]},
|
312
|
+
{"name"=>"Diablo",
|
313
|
+
"builds"=>
|
314
|
+
[{"title"=>"Diablo Build Guide “Kneel before the Lord of Terror.”"}]},
|
315
|
+
{"name"=>"E.T.C.",
|
316
|
+
"builds"=>[{"title"=>"E.T.C. Build Guide “You can't kill the metal!”"}]},
|
317
|
+
{"name"=>"Falstad",
|
318
|
+
"builds"=>[{"title"=>"Falstad Build Guide “Time to drop the hammer!”"}]},
|
319
|
+
{"name"=>"Gall",
|
320
|
+
"builds"=>
|
321
|
+
[{"title"=>
|
322
|
+
"Gall Build Guide “Those who oppose me invite their own demise!”"}]},
|
323
|
+
{"name"=>"Gazlowe",
|
324
|
+
"builds"=>[{"title"=>"Gazlowe Build Guide “Hey, time is money friend.”"}]},
|
325
|
+
{"name"=>"Greymane",
|
326
|
+
"builds"=>[{"title"=>"Greymane Build Guide “I am the alpha!”"}]},
|
327
|
+
{"name"=>"Illidan",
|
328
|
+
"builds"=>[{"title"=>"Illidan Build Guide “Now I am complete!”"}]},
|
329
|
+
{"name"=>"Jaina",
|
330
|
+
"builds"=>[{"title"=>"Jaina Build Guide “I'm here to help.”"}]},
|
331
|
+
{"name"=>"Johanna",
|
332
|
+
"builds"=>[{"title"=>"Johanna Build Guide “The Crusade marches on!”"}]},
|
333
|
+
{"name"=>"Kael'thas",
|
334
|
+
"builds"=>[{"title"=>"Kael'thas Build Guide “Anar'alah belore!”"}]},
|
335
|
+
{"name"=>"Kerrigan",
|
336
|
+
"builds"=>[{"title"=>"Kerrigan Build Guide “Long live the real Queen.”"}]},
|
337
|
+
{"name"=>"Kharazim",
|
338
|
+
"builds"=>[{"title"=>"Kharazim Build Guide “Feel the wrath of Ytar!”"}]},
|
339
|
+
{"name"=>"Leoric",
|
340
|
+
"builds"=>
|
341
|
+
[{"title"=>"Leoric Build Guide “All will suffer as I have suffered!”"}]},
|
342
|
+
{"name"=>"Li Li",
|
343
|
+
"builds"=>[{"title"=>"Li Li Build Guide “Ready for adventure!”"}]},
|
344
|
+
{"name"=>"Lt. Morales",
|
345
|
+
"builds"=>
|
346
|
+
[{"title"=>
|
347
|
+
"Lt. Morales Build Guide “I protect every member of my squad!”"}]},
|
348
|
+
{"name"=>"Lunara",
|
349
|
+
"builds"=>[{"title"=>"Lunara Build Guide “Taste my spear!”"}]},
|
350
|
+
{"name"=>"Malfurion",
|
351
|
+
"builds"=>
|
352
|
+
[{"title"=>"Malfurion Build Guide “Nature will rise against you!”"}]},
|
353
|
+
{"name"=>"Muradin",
|
354
|
+
"builds"=>[{"title"=>"Muradin Build Guide “It's hammer time!”"}]},
|
355
|
+
{"name"=>"Murky",
|
356
|
+
"builds"=>[{"title"=>"Murky Build Guide “Mrglrglmrglmrrrlggg!”"}]},
|
357
|
+
{"name"=>"Nazeebo",
|
358
|
+
"builds"=>[{"title"=>"Nazeebo Build Guide “The spirits speak to me.”"}]},
|
359
|
+
{"name"=>"Nova",
|
360
|
+
"builds"=>
|
361
|
+
[{"title"=>"Nova Build Guide “Ready to have your mind blown?”"}]},
|
362
|
+
{"name"=>"Raynor",
|
363
|
+
"builds"=>[{"title"=>"Raynor Build Guide “Hit 'em hard and fast.”"}]},
|
364
|
+
{"name"=>"Rehgar",
|
365
|
+
"builds"=>[{"title"=>"Rehgar Build Guide “To the Arena!”"}]},
|
366
|
+
{"name"=>"Rexxar",
|
367
|
+
"builds"=>[{"title"=>"Rexxar Build Guide “The beasts obey me!”"}]},
|
368
|
+
{"name"=>"Sgt. Hammer",
|
369
|
+
"builds"=>[{"title"=>"Sgt. Hammer Build Guide “Napalm's airborne!”"}]},
|
370
|
+
{"name"=>"Sonya",
|
371
|
+
"builds"=>[{"title"=>"Sonya Build Guide “Time to die!”"}]},
|
372
|
+
{"name"=>"Stitches",
|
373
|
+
"builds"=>[{"title"=>"Stitches Build Guide “ROAAAARR!”"}]},
|
374
|
+
{"name"=>"Sylvanas",
|
375
|
+
"builds"=>[{"title"=>"Sylvanas Build Guide “Let none survive!”"}]},
|
376
|
+
{"name"=>"Tassadar",
|
377
|
+
"builds"=>[{"title"=>"Tassadar Build Guide “Executor, I stand ready!”"}]},
|
378
|
+
{"name"=>"The Butcher",
|
379
|
+
"builds"=>[{"title"=>"The Butcher Build Guide “Fresh meat!”"}]},
|
380
|
+
{"name"=>"Thrall",
|
381
|
+
"builds"=>
|
382
|
+
[{"title"=>"Thrall Build Guide “The Elements will destroy you!”"}]},
|
383
|
+
{"name"=>"Tychus",
|
384
|
+
"builds"=>[{"title"=>"Tychus Build Guide “So, you gonna bark all day?”"}]},
|
385
|
+
{"name"=>"Tyrael",
|
386
|
+
"builds"=>[{"title"=>"Tyrael Build Guide “I am Justice itself!”"}]},
|
387
|
+
{"name"=>"Tyrande",
|
388
|
+
"builds"=>
|
389
|
+
[{"title"=>"Tyrande Build Guide “Feel the wrath of the Heavens!”"}]},
|
390
|
+
{"name"=>"Uther",
|
391
|
+
"builds"=>[{"title"=>"Uther Build Guide “I will fight with honor!”"}]},
|
392
|
+
{"name"=>"Valla",
|
393
|
+
"builds"=>[{"title"=>"Valla Build Guide “Be vewy, vewy quiet...”"}]},
|
394
|
+
{"name"=>"Zagara",
|
395
|
+
"builds"=>[{"title"=>"Zagara Build Guide “The Swarm hungers.”"}]},
|
396
|
+
{"name"=>"Zeratul",
|
397
|
+
"builds"=>[{"title"=>"Zeratul Build Guide “I serve the Xel'naga.”"}]}]})
|
398
|
+
end
|
399
|
+
end
|
400
|
+
|
277
401
|
it 'should make post requests if needed' do
|
278
402
|
VCR.use_cassette('make_post_request') do
|
279
403
|
data = { your_name: "Name" }
|
data/spec/sample_crawler_spec.rb
CHANGED
@@ -10,13 +10,14 @@ describe SampleCrawler do
|
|
10
10
|
@sample_crawler.should_receive(:parse) do |args|
|
11
11
|
args['event_group'].wombat_property_selector.should == "css=div.title-agenda"
|
12
12
|
it = args['event_group']
|
13
|
-
it["event"]["title"].wombat_property_selector.
|
14
|
-
it["event"]["date"].wombat_property_selector.
|
15
|
-
|
16
|
-
it["
|
13
|
+
expect(it["event"]["title"].wombat_property_selector).to eq("xpath=.")
|
14
|
+
expect(it["event"]["date"].wombat_property_selector).to(
|
15
|
+
eq("xpath=//div[@class='scrollable-items']/div[@class='s-item active']//a"))
|
16
|
+
expect(it["event"]["type"].wombat_property_selector).to eq("xpath=.type")
|
17
|
+
expect(it["venue"]["name"].wombat_property_selector).to eq("xpath=.")
|
17
18
|
|
18
19
|
args[:base_url].should == 'http://www.obaoba.com.br'
|
19
|
-
args[:path].should == '/porto-alegre/agenda'
|
20
|
+
args[:path].should == '/porto-alegre/agenda'
|
20
21
|
end
|
21
22
|
|
22
23
|
@sample_crawler.crawl
|
data/wombat.gemspec
CHANGED
@@ -2,16 +2,16 @@
|
|
2
2
|
# DO NOT EDIT THIS FILE DIRECTLY
|
3
3
|
# Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
|
4
4
|
# -*- encoding: utf-8 -*-
|
5
|
-
# stub: wombat 2.5.
|
5
|
+
# stub: wombat 2.5.1 ruby lib
|
6
6
|
|
7
7
|
Gem::Specification.new do |s|
|
8
8
|
s.name = "wombat"
|
9
|
-
s.version = "2.5.
|
9
|
+
s.version = "2.5.1"
|
10
10
|
|
11
11
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
12
12
|
s.require_paths = ["lib"]
|
13
13
|
s.authors = ["Felipe Lima"]
|
14
|
-
s.date = "2016-
|
14
|
+
s.date = "2016-03-31"
|
15
15
|
s.description = "Generic Web crawler with a DSL that parses structured data from web pages"
|
16
16
|
s.email = "felipe.lima@gmail.com"
|
17
17
|
s.extra_rdoc_files = [
|
@@ -40,6 +40,7 @@ Gem::Specification.new do |s|
|
|
40
40
|
"fixtures/vcr_cassettes/broken_selector.yml",
|
41
41
|
"fixtures/vcr_cassettes/error_page.yml",
|
42
42
|
"fixtures/vcr_cassettes/follow_links.yml",
|
43
|
+
"fixtures/vcr_cassettes/follow_links_v2.yml",
|
43
44
|
"fixtures/vcr_cassettes/follow_relative_links.yml",
|
44
45
|
"fixtures/vcr_cassettes/for_each_page.yml",
|
45
46
|
"fixtures/vcr_cassettes/headers_selector.yml",
|
@@ -86,7 +87,7 @@ Gem::Specification.new do |s|
|
|
86
87
|
s.homepage = "http://felipecsl.github.com/wombat"
|
87
88
|
s.licenses = ["MIT"]
|
88
89
|
s.required_ruby_version = Gem::Requirement.new(">= 1.9")
|
89
|
-
s.rubygems_version = "2.4.
|
90
|
+
s.rubygems_version = "2.4.8"
|
90
91
|
s.summary = "Ruby DSL to scrape web pages"
|
91
92
|
|
92
93
|
if s.respond_to? :specification_version then
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: wombat
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.5.
|
4
|
+
version: 2.5.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Felipe Lima
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-03-31 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: mechanize
|
@@ -193,6 +193,7 @@ files:
|
|
193
193
|
- fixtures/vcr_cassettes/broken_selector.yml
|
194
194
|
- fixtures/vcr_cassettes/error_page.yml
|
195
195
|
- fixtures/vcr_cassettes/follow_links.yml
|
196
|
+
- fixtures/vcr_cassettes/follow_links_v2.yml
|
196
197
|
- fixtures/vcr_cassettes/follow_relative_links.yml
|
197
198
|
- fixtures/vcr_cassettes/for_each_page.yml
|
198
199
|
- fixtures/vcr_cassettes/headers_selector.yml
|
@@ -255,7 +256,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
255
256
|
version: '0'
|
256
257
|
requirements: []
|
257
258
|
rubyforge_project:
|
258
|
-
rubygems_version: 2.4.
|
259
|
+
rubygems_version: 2.4.8
|
259
260
|
signing_key:
|
260
261
|
specification_version: 4
|
261
262
|
summary: Ruby DSL to scrape web pages
|