wombat 2.5.0 → 2.5.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -9,6 +9,9 @@ module Nokogiri
9
9
  class Document
10
10
  attr_accessor :headers
11
11
  end
12
+ class Element
13
+ attr_accessor :mechanize_page
14
+ end
12
15
  end
13
16
  module HTML
14
17
  class Document
@@ -274,6 +274,130 @@ describe 'basic crawler setup' do
274
274
  end
275
275
  end
276
276
 
277
+ it 'should follow links - issue #53' do
278
+ VCR.use_cassette('follow_links_v2', :preserve_exact_body_bytes => true) do
279
+ result = Wombat.crawl do
280
+ base_url "http://www.icy-veins.com/"
281
+ path "heroes/hero-guides"
282
+
283
+ heroes "css=.page_content .nav_content_block_entry_heroes_hero", :iterator do
284
+ name "xpath=."
285
+ builds "xpath=./a", :follow do
286
+ title "css=h1"
287
+ end
288
+ end
289
+ end
290
+
291
+ expect(result).to eq(
292
+ {"heroes"=>
293
+ [{"name"=>"Abathur",
294
+ "builds"=>[{"title"=>"Abathur Build Guide “You. Enhanced. Improved.”"}]},
295
+ {"name"=>"Anub'arak",
296
+ "builds"=>[{"title"=>"Anub'arak Build Guide “Time is fleeting.”"}]},
297
+ {"name"=>"Artanis",
298
+ "builds"=>[{"title"=>"Artanis Build Guide “Direct my wrath.”"}]},
299
+ {"name"=>"Arthas",
300
+ "builds"=>[{"title"=>"Arthas Build Guide “Frostmourne hungers.”"}]},
301
+ {"name"=>"Azmodan",
302
+ "builds"=>[{"title"=>"Azmodan Build Guide “I shall rule alone!”"}]},
303
+ {"name"=>"Brightwing",
304
+ "builds"=>
305
+ [{"title"=>"Brightwing Build Guide “You don't want to be my enemy!”"}]},
306
+ {"name"=>"Chen",
307
+ "builds"=>[{"title"=>"Chen Build Guide “I bring Pandamonium!”"}]},
308
+ {"name"=>"Cho",
309
+ "builds"=>
310
+ [{"title"=>
311
+ "Cho Build Guide “This Nexus, all its power... it will be MINE!”"}]},
312
+ {"name"=>"Diablo",
313
+ "builds"=>
314
+ [{"title"=>"Diablo Build Guide “Kneel before the Lord of Terror.”"}]},
315
+ {"name"=>"E.T.C.",
316
+ "builds"=>[{"title"=>"E.T.C. Build Guide “You can't kill the metal!”"}]},
317
+ {"name"=>"Falstad",
318
+ "builds"=>[{"title"=>"Falstad Build Guide “Time to drop the hammer!”"}]},
319
+ {"name"=>"Gall",
320
+ "builds"=>
321
+ [{"title"=>
322
+ "Gall Build Guide “Those who oppose me invite their own demise!”"}]},
323
+ {"name"=>"Gazlowe",
324
+ "builds"=>[{"title"=>"Gazlowe Build Guide “Hey, time is money friend.”"}]},
325
+ {"name"=>"Greymane",
326
+ "builds"=>[{"title"=>"Greymane Build Guide “I am the alpha!”"}]},
327
+ {"name"=>"Illidan",
328
+ "builds"=>[{"title"=>"Illidan Build Guide “Now I am complete!”"}]},
329
+ {"name"=>"Jaina",
330
+ "builds"=>[{"title"=>"Jaina Build Guide “I'm here to help.”"}]},
331
+ {"name"=>"Johanna",
332
+ "builds"=>[{"title"=>"Johanna Build Guide “The Crusade marches on!”"}]},
333
+ {"name"=>"Kael'thas",
334
+ "builds"=>[{"title"=>"Kael'thas Build Guide “Anar'alah belore!”"}]},
335
+ {"name"=>"Kerrigan",
336
+ "builds"=>[{"title"=>"Kerrigan Build Guide “Long live the real Queen.”"}]},
337
+ {"name"=>"Kharazim",
338
+ "builds"=>[{"title"=>"Kharazim Build Guide “Feel the wrath of Ytar!”"}]},
339
+ {"name"=>"Leoric",
340
+ "builds"=>
341
+ [{"title"=>"Leoric Build Guide “All will suffer as I have suffered!”"}]},
342
+ {"name"=>"Li Li",
343
+ "builds"=>[{"title"=>"Li Li Build Guide “Ready for adventure!”"}]},
344
+ {"name"=>"Lt. Morales",
345
+ "builds"=>
346
+ [{"title"=>
347
+ "Lt. Morales Build Guide “I protect every member of my squad!”"}]},
348
+ {"name"=>"Lunara",
349
+ "builds"=>[{"title"=>"Lunara Build Guide “Taste my spear!”"}]},
350
+ {"name"=>"Malfurion",
351
+ "builds"=>
352
+ [{"title"=>"Malfurion Build Guide “Nature will rise against you!”"}]},
353
+ {"name"=>"Muradin",
354
+ "builds"=>[{"title"=>"Muradin Build Guide “It's hammer time!”"}]},
355
+ {"name"=>"Murky",
356
+ "builds"=>[{"title"=>"Murky Build Guide “Mrglrglmrglmrrrlggg!”"}]},
357
+ {"name"=>"Nazeebo",
358
+ "builds"=>[{"title"=>"Nazeebo Build Guide “The spirits speak to me.”"}]},
359
+ {"name"=>"Nova",
360
+ "builds"=>
361
+ [{"title"=>"Nova Build Guide “Ready to have your mind blown?”"}]},
362
+ {"name"=>"Raynor",
363
+ "builds"=>[{"title"=>"Raynor Build Guide “Hit 'em hard and fast.”"}]},
364
+ {"name"=>"Rehgar",
365
+ "builds"=>[{"title"=>"Rehgar Build Guide “To the Arena!”"}]},
366
+ {"name"=>"Rexxar",
367
+ "builds"=>[{"title"=>"Rexxar Build Guide “The beasts obey me!”"}]},
368
+ {"name"=>"Sgt. Hammer",
369
+ "builds"=>[{"title"=>"Sgt. Hammer Build Guide “Napalm's airborne!”"}]},
370
+ {"name"=>"Sonya",
371
+ "builds"=>[{"title"=>"Sonya Build Guide “Time to die!”"}]},
372
+ {"name"=>"Stitches",
373
+ "builds"=>[{"title"=>"Stitches Build Guide “ROAAAARR!”"}]},
374
+ {"name"=>"Sylvanas",
375
+ "builds"=>[{"title"=>"Sylvanas Build Guide “Let none survive!”"}]},
376
+ {"name"=>"Tassadar",
377
+ "builds"=>[{"title"=>"Tassadar Build Guide “Executor, I stand ready!”"}]},
378
+ {"name"=>"The Butcher",
379
+ "builds"=>[{"title"=>"The Butcher Build Guide “Fresh meat!”"}]},
380
+ {"name"=>"Thrall",
381
+ "builds"=>
382
+ [{"title"=>"Thrall Build Guide “The Elements will destroy you!”"}]},
383
+ {"name"=>"Tychus",
384
+ "builds"=>[{"title"=>"Tychus Build Guide “So, you gonna bark all day?”"}]},
385
+ {"name"=>"Tyrael",
386
+ "builds"=>[{"title"=>"Tyrael Build Guide “I am Justice itself!”"}]},
387
+ {"name"=>"Tyrande",
388
+ "builds"=>
389
+ [{"title"=>"Tyrande Build Guide “Feel the wrath of the Heavens!”"}]},
390
+ {"name"=>"Uther",
391
+ "builds"=>[{"title"=>"Uther Build Guide “I will fight with honor!”"}]},
392
+ {"name"=>"Valla",
393
+ "builds"=>[{"title"=>"Valla Build Guide “Be vewy, vewy quiet...”"}]},
394
+ {"name"=>"Zagara",
395
+ "builds"=>[{"title"=>"Zagara Build Guide “The Swarm hungers.”"}]},
396
+ {"name"=>"Zeratul",
397
+ "builds"=>[{"title"=>"Zeratul Build Guide “I serve the Xel'naga.”"}]}]})
398
+ end
399
+ end
400
+
277
401
  it 'should make post requests if needed' do
278
402
  VCR.use_cassette('make_post_request') do
279
403
  data = { your_name: "Name" }
@@ -10,13 +10,14 @@ describe SampleCrawler do
10
10
  @sample_crawler.should_receive(:parse) do |args|
11
11
  args['event_group'].wombat_property_selector.should == "css=div.title-agenda"
12
12
  it = args['event_group']
13
- it["event"]["title"].wombat_property_selector.should == "xpath=."
14
- it["event"]["date"].wombat_property_selector.should == "xpath=//div[@class='scrollable-items']/div[@class='s-item active']//a"
15
- it["event"]["type"].wombat_property_selector.should == "xpath=.type"
16
- it["venue"]["name"].wombat_property_selector.should == "xpath=."
13
+ expect(it["event"]["title"].wombat_property_selector).to eq("xpath=.")
14
+ expect(it["event"]["date"].wombat_property_selector).to(
15
+ eq("xpath=//div[@class='scrollable-items']/div[@class='s-item active']//a"))
16
+ expect(it["event"]["type"].wombat_property_selector).to eq("xpath=.type")
17
+ expect(it["venue"]["name"].wombat_property_selector).to eq("xpath=.")
17
18
 
18
19
  args[:base_url].should == 'http://www.obaoba.com.br'
19
- args[:path].should == '/porto-alegre/agenda'
20
+ args[:path].should == '/porto-alegre/agenda'
20
21
  end
21
22
 
22
23
  @sample_crawler.crawl
@@ -2,16 +2,16 @@
2
2
  # DO NOT EDIT THIS FILE DIRECTLY
3
3
  # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
4
4
  # -*- encoding: utf-8 -*-
5
- # stub: wombat 2.5.0 ruby lib
5
+ # stub: wombat 2.5.1 ruby lib
6
6
 
7
7
  Gem::Specification.new do |s|
8
8
  s.name = "wombat"
9
- s.version = "2.5.0"
9
+ s.version = "2.5.1"
10
10
 
11
11
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
12
12
  s.require_paths = ["lib"]
13
13
  s.authors = ["Felipe Lima"]
14
- s.date = "2016-01-27"
14
+ s.date = "2016-03-31"
15
15
  s.description = "Generic Web crawler with a DSL that parses structured data from web pages"
16
16
  s.email = "felipe.lima@gmail.com"
17
17
  s.extra_rdoc_files = [
@@ -40,6 +40,7 @@ Gem::Specification.new do |s|
40
40
  "fixtures/vcr_cassettes/broken_selector.yml",
41
41
  "fixtures/vcr_cassettes/error_page.yml",
42
42
  "fixtures/vcr_cassettes/follow_links.yml",
43
+ "fixtures/vcr_cassettes/follow_links_v2.yml",
43
44
  "fixtures/vcr_cassettes/follow_relative_links.yml",
44
45
  "fixtures/vcr_cassettes/for_each_page.yml",
45
46
  "fixtures/vcr_cassettes/headers_selector.yml",
@@ -86,7 +87,7 @@ Gem::Specification.new do |s|
86
87
  s.homepage = "http://felipecsl.github.com/wombat"
87
88
  s.licenses = ["MIT"]
88
89
  s.required_ruby_version = Gem::Requirement.new(">= 1.9")
89
- s.rubygems_version = "2.4.6"
90
+ s.rubygems_version = "2.4.8"
90
91
  s.summary = "Ruby DSL to scrape web pages"
91
92
 
92
93
  if s.respond_to? :specification_version then
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: wombat
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.5.0
4
+ version: 2.5.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Felipe Lima
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-01-27 00:00:00.000000000 Z
11
+ date: 2016-03-31 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: mechanize
@@ -193,6 +193,7 @@ files:
193
193
  - fixtures/vcr_cassettes/broken_selector.yml
194
194
  - fixtures/vcr_cassettes/error_page.yml
195
195
  - fixtures/vcr_cassettes/follow_links.yml
196
+ - fixtures/vcr_cassettes/follow_links_v2.yml
196
197
  - fixtures/vcr_cassettes/follow_relative_links.yml
197
198
  - fixtures/vcr_cassettes/for_each_page.yml
198
199
  - fixtures/vcr_cassettes/headers_selector.yml
@@ -255,7 +256,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
255
256
  version: '0'
256
257
  requirements: []
257
258
  rubyforge_project:
258
- rubygems_version: 2.4.6
259
+ rubygems_version: 2.4.8
259
260
  signing_key:
260
261
  specification_version: 4
261
262
  summary: Ruby DSL to scrape web pages