wombat 2.5.0 → 2.5.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +1 -0
- data/Gemfile.lock +17 -15
- data/README.md +3 -3
- data/VERSION +1 -1
- data/fixtures/vcr_cassettes/follow_links_v2.yml +21850 -0
- data/lib/wombat/processing/parser.rb +3 -0
- data/spec/integration/integration_spec.rb +124 -0
- data/spec/sample_crawler_spec.rb +6 -5
- data/wombat.gemspec +5 -4
- metadata +4 -3
@@ -274,6 +274,130 @@ describe 'basic crawler setup' do
|
|
274
274
|
end
|
275
275
|
end
|
276
276
|
|
277
|
+
it 'should follow links - issue #53' do
|
278
|
+
VCR.use_cassette('follow_links_v2', :preserve_exact_body_bytes => true) do
|
279
|
+
result = Wombat.crawl do
|
280
|
+
base_url "http://www.icy-veins.com/"
|
281
|
+
path "heroes/hero-guides"
|
282
|
+
|
283
|
+
heroes "css=.page_content .nav_content_block_entry_heroes_hero", :iterator do
|
284
|
+
name "xpath=."
|
285
|
+
builds "xpath=./a", :follow do
|
286
|
+
title "css=h1"
|
287
|
+
end
|
288
|
+
end
|
289
|
+
end
|
290
|
+
|
291
|
+
expect(result).to eq(
|
292
|
+
{"heroes"=>
|
293
|
+
[{"name"=>"Abathur",
|
294
|
+
"builds"=>[{"title"=>"Abathur Build Guide “You. Enhanced. Improved.”"}]},
|
295
|
+
{"name"=>"Anub'arak",
|
296
|
+
"builds"=>[{"title"=>"Anub'arak Build Guide “Time is fleeting.”"}]},
|
297
|
+
{"name"=>"Artanis",
|
298
|
+
"builds"=>[{"title"=>"Artanis Build Guide “Direct my wrath.”"}]},
|
299
|
+
{"name"=>"Arthas",
|
300
|
+
"builds"=>[{"title"=>"Arthas Build Guide “Frostmourne hungers.”"}]},
|
301
|
+
{"name"=>"Azmodan",
|
302
|
+
"builds"=>[{"title"=>"Azmodan Build Guide “I shall rule alone!”"}]},
|
303
|
+
{"name"=>"Brightwing",
|
304
|
+
"builds"=>
|
305
|
+
[{"title"=>"Brightwing Build Guide “You don't want to be my enemy!”"}]},
|
306
|
+
{"name"=>"Chen",
|
307
|
+
"builds"=>[{"title"=>"Chen Build Guide “I bring Pandamonium!”"}]},
|
308
|
+
{"name"=>"Cho",
|
309
|
+
"builds"=>
|
310
|
+
[{"title"=>
|
311
|
+
"Cho Build Guide “This Nexus, all its power... it will be MINE!”"}]},
|
312
|
+
{"name"=>"Diablo",
|
313
|
+
"builds"=>
|
314
|
+
[{"title"=>"Diablo Build Guide “Kneel before the Lord of Terror.”"}]},
|
315
|
+
{"name"=>"E.T.C.",
|
316
|
+
"builds"=>[{"title"=>"E.T.C. Build Guide “You can't kill the metal!”"}]},
|
317
|
+
{"name"=>"Falstad",
|
318
|
+
"builds"=>[{"title"=>"Falstad Build Guide “Time to drop the hammer!”"}]},
|
319
|
+
{"name"=>"Gall",
|
320
|
+
"builds"=>
|
321
|
+
[{"title"=>
|
322
|
+
"Gall Build Guide “Those who oppose me invite their own demise!”"}]},
|
323
|
+
{"name"=>"Gazlowe",
|
324
|
+
"builds"=>[{"title"=>"Gazlowe Build Guide “Hey, time is money friend.”"}]},
|
325
|
+
{"name"=>"Greymane",
|
326
|
+
"builds"=>[{"title"=>"Greymane Build Guide “I am the alpha!”"}]},
|
327
|
+
{"name"=>"Illidan",
|
328
|
+
"builds"=>[{"title"=>"Illidan Build Guide “Now I am complete!”"}]},
|
329
|
+
{"name"=>"Jaina",
|
330
|
+
"builds"=>[{"title"=>"Jaina Build Guide “I'm here to help.”"}]},
|
331
|
+
{"name"=>"Johanna",
|
332
|
+
"builds"=>[{"title"=>"Johanna Build Guide “The Crusade marches on!”"}]},
|
333
|
+
{"name"=>"Kael'thas",
|
334
|
+
"builds"=>[{"title"=>"Kael'thas Build Guide “Anar'alah belore!”"}]},
|
335
|
+
{"name"=>"Kerrigan",
|
336
|
+
"builds"=>[{"title"=>"Kerrigan Build Guide “Long live the real Queen.”"}]},
|
337
|
+
{"name"=>"Kharazim",
|
338
|
+
"builds"=>[{"title"=>"Kharazim Build Guide “Feel the wrath of Ytar!”"}]},
|
339
|
+
{"name"=>"Leoric",
|
340
|
+
"builds"=>
|
341
|
+
[{"title"=>"Leoric Build Guide “All will suffer as I have suffered!”"}]},
|
342
|
+
{"name"=>"Li Li",
|
343
|
+
"builds"=>[{"title"=>"Li Li Build Guide “Ready for adventure!”"}]},
|
344
|
+
{"name"=>"Lt. Morales",
|
345
|
+
"builds"=>
|
346
|
+
[{"title"=>
|
347
|
+
"Lt. Morales Build Guide “I protect every member of my squad!”"}]},
|
348
|
+
{"name"=>"Lunara",
|
349
|
+
"builds"=>[{"title"=>"Lunara Build Guide “Taste my spear!”"}]},
|
350
|
+
{"name"=>"Malfurion",
|
351
|
+
"builds"=>
|
352
|
+
[{"title"=>"Malfurion Build Guide “Nature will rise against you!”"}]},
|
353
|
+
{"name"=>"Muradin",
|
354
|
+
"builds"=>[{"title"=>"Muradin Build Guide “It's hammer time!”"}]},
|
355
|
+
{"name"=>"Murky",
|
356
|
+
"builds"=>[{"title"=>"Murky Build Guide “Mrglrglmrglmrrrlggg!”"}]},
|
357
|
+
{"name"=>"Nazeebo",
|
358
|
+
"builds"=>[{"title"=>"Nazeebo Build Guide “The spirits speak to me.”"}]},
|
359
|
+
{"name"=>"Nova",
|
360
|
+
"builds"=>
|
361
|
+
[{"title"=>"Nova Build Guide “Ready to have your mind blown?”"}]},
|
362
|
+
{"name"=>"Raynor",
|
363
|
+
"builds"=>[{"title"=>"Raynor Build Guide “Hit 'em hard and fast.”"}]},
|
364
|
+
{"name"=>"Rehgar",
|
365
|
+
"builds"=>[{"title"=>"Rehgar Build Guide “To the Arena!”"}]},
|
366
|
+
{"name"=>"Rexxar",
|
367
|
+
"builds"=>[{"title"=>"Rexxar Build Guide “The beasts obey me!”"}]},
|
368
|
+
{"name"=>"Sgt. Hammer",
|
369
|
+
"builds"=>[{"title"=>"Sgt. Hammer Build Guide “Napalm's airborne!”"}]},
|
370
|
+
{"name"=>"Sonya",
|
371
|
+
"builds"=>[{"title"=>"Sonya Build Guide “Time to die!”"}]},
|
372
|
+
{"name"=>"Stitches",
|
373
|
+
"builds"=>[{"title"=>"Stitches Build Guide “ROAAAARR!”"}]},
|
374
|
+
{"name"=>"Sylvanas",
|
375
|
+
"builds"=>[{"title"=>"Sylvanas Build Guide “Let none survive!”"}]},
|
376
|
+
{"name"=>"Tassadar",
|
377
|
+
"builds"=>[{"title"=>"Tassadar Build Guide “Executor, I stand ready!”"}]},
|
378
|
+
{"name"=>"The Butcher",
|
379
|
+
"builds"=>[{"title"=>"The Butcher Build Guide “Fresh meat!”"}]},
|
380
|
+
{"name"=>"Thrall",
|
381
|
+
"builds"=>
|
382
|
+
[{"title"=>"Thrall Build Guide “The Elements will destroy you!”"}]},
|
383
|
+
{"name"=>"Tychus",
|
384
|
+
"builds"=>[{"title"=>"Tychus Build Guide “So, you gonna bark all day?”"}]},
|
385
|
+
{"name"=>"Tyrael",
|
386
|
+
"builds"=>[{"title"=>"Tyrael Build Guide “I am Justice itself!”"}]},
|
387
|
+
{"name"=>"Tyrande",
|
388
|
+
"builds"=>
|
389
|
+
[{"title"=>"Tyrande Build Guide “Feel the wrath of the Heavens!”"}]},
|
390
|
+
{"name"=>"Uther",
|
391
|
+
"builds"=>[{"title"=>"Uther Build Guide “I will fight with honor!”"}]},
|
392
|
+
{"name"=>"Valla",
|
393
|
+
"builds"=>[{"title"=>"Valla Build Guide “Be vewy, vewy quiet...”"}]},
|
394
|
+
{"name"=>"Zagara",
|
395
|
+
"builds"=>[{"title"=>"Zagara Build Guide “The Swarm hungers.”"}]},
|
396
|
+
{"name"=>"Zeratul",
|
397
|
+
"builds"=>[{"title"=>"Zeratul Build Guide “I serve the Xel'naga.”"}]}]})
|
398
|
+
end
|
399
|
+
end
|
400
|
+
|
277
401
|
it 'should make post requests if needed' do
|
278
402
|
VCR.use_cassette('make_post_request') do
|
279
403
|
data = { your_name: "Name" }
|
data/spec/sample_crawler_spec.rb
CHANGED
@@ -10,13 +10,14 @@ describe SampleCrawler do
|
|
10
10
|
@sample_crawler.should_receive(:parse) do |args|
|
11
11
|
args['event_group'].wombat_property_selector.should == "css=div.title-agenda"
|
12
12
|
it = args['event_group']
|
13
|
-
it["event"]["title"].wombat_property_selector.
|
14
|
-
it["event"]["date"].wombat_property_selector.
|
15
|
-
|
16
|
-
it["
|
13
|
+
expect(it["event"]["title"].wombat_property_selector).to eq("xpath=.")
|
14
|
+
expect(it["event"]["date"].wombat_property_selector).to(
|
15
|
+
eq("xpath=//div[@class='scrollable-items']/div[@class='s-item active']//a"))
|
16
|
+
expect(it["event"]["type"].wombat_property_selector).to eq("xpath=.type")
|
17
|
+
expect(it["venue"]["name"].wombat_property_selector).to eq("xpath=.")
|
17
18
|
|
18
19
|
args[:base_url].should == 'http://www.obaoba.com.br'
|
19
|
-
args[:path].should == '/porto-alegre/agenda'
|
20
|
+
args[:path].should == '/porto-alegre/agenda'
|
20
21
|
end
|
21
22
|
|
22
23
|
@sample_crawler.crawl
|
data/wombat.gemspec
CHANGED
@@ -2,16 +2,16 @@
|
|
2
2
|
# DO NOT EDIT THIS FILE DIRECTLY
|
3
3
|
# Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
|
4
4
|
# -*- encoding: utf-8 -*-
|
5
|
-
# stub: wombat 2.5.
|
5
|
+
# stub: wombat 2.5.1 ruby lib
|
6
6
|
|
7
7
|
Gem::Specification.new do |s|
|
8
8
|
s.name = "wombat"
|
9
|
-
s.version = "2.5.
|
9
|
+
s.version = "2.5.1"
|
10
10
|
|
11
11
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
12
12
|
s.require_paths = ["lib"]
|
13
13
|
s.authors = ["Felipe Lima"]
|
14
|
-
s.date = "2016-
|
14
|
+
s.date = "2016-03-31"
|
15
15
|
s.description = "Generic Web crawler with a DSL that parses structured data from web pages"
|
16
16
|
s.email = "felipe.lima@gmail.com"
|
17
17
|
s.extra_rdoc_files = [
|
@@ -40,6 +40,7 @@ Gem::Specification.new do |s|
|
|
40
40
|
"fixtures/vcr_cassettes/broken_selector.yml",
|
41
41
|
"fixtures/vcr_cassettes/error_page.yml",
|
42
42
|
"fixtures/vcr_cassettes/follow_links.yml",
|
43
|
+
"fixtures/vcr_cassettes/follow_links_v2.yml",
|
43
44
|
"fixtures/vcr_cassettes/follow_relative_links.yml",
|
44
45
|
"fixtures/vcr_cassettes/for_each_page.yml",
|
45
46
|
"fixtures/vcr_cassettes/headers_selector.yml",
|
@@ -86,7 +87,7 @@ Gem::Specification.new do |s|
|
|
86
87
|
s.homepage = "http://felipecsl.github.com/wombat"
|
87
88
|
s.licenses = ["MIT"]
|
88
89
|
s.required_ruby_version = Gem::Requirement.new(">= 1.9")
|
89
|
-
s.rubygems_version = "2.4.
|
90
|
+
s.rubygems_version = "2.4.8"
|
90
91
|
s.summary = "Ruby DSL to scrape web pages"
|
91
92
|
|
92
93
|
if s.respond_to? :specification_version then
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: wombat
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.5.
|
4
|
+
version: 2.5.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Felipe Lima
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-03-31 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: mechanize
|
@@ -193,6 +193,7 @@ files:
|
|
193
193
|
- fixtures/vcr_cassettes/broken_selector.yml
|
194
194
|
- fixtures/vcr_cassettes/error_page.yml
|
195
195
|
- fixtures/vcr_cassettes/follow_links.yml
|
196
|
+
- fixtures/vcr_cassettes/follow_links_v2.yml
|
196
197
|
- fixtures/vcr_cassettes/follow_relative_links.yml
|
197
198
|
- fixtures/vcr_cassettes/for_each_page.yml
|
198
199
|
- fixtures/vcr_cassettes/headers_selector.yml
|
@@ -255,7 +256,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
255
256
|
version: '0'
|
256
257
|
requirements: []
|
257
258
|
rubyforge_project:
|
258
|
-
rubygems_version: 2.4.
|
259
|
+
rubygems_version: 2.4.8
|
259
260
|
signing_key:
|
260
261
|
specification_version: 4
|
261
262
|
summary: Ruby DSL to scrape web pages
|