wombat 2.5.0 → 2.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -9,6 +9,9 @@ module Nokogiri
9
9
  class Document
10
10
  attr_accessor :headers
11
11
  end
12
+ class Element
13
+ attr_accessor :mechanize_page
14
+ end
12
15
  end
13
16
  module HTML
14
17
  class Document
@@ -274,6 +274,130 @@ describe 'basic crawler setup' do
274
274
  end
275
275
  end
276
276
 
277
+ it 'should follow links - issue #53' do
278
+ VCR.use_cassette('follow_links_v2', :preserve_exact_body_bytes => true) do
279
+ result = Wombat.crawl do
280
+ base_url "http://www.icy-veins.com/"
281
+ path "heroes/hero-guides"
282
+
283
+ heroes "css=.page_content .nav_content_block_entry_heroes_hero", :iterator do
284
+ name "xpath=."
285
+ builds "xpath=./a", :follow do
286
+ title "css=h1"
287
+ end
288
+ end
289
+ end
290
+
291
+ expect(result).to eq(
292
+ {"heroes"=>
293
+ [{"name"=>"Abathur",
294
+ "builds"=>[{"title"=>"Abathur Build Guide “You. Enhanced. Improved.”"}]},
295
+ {"name"=>"Anub'arak",
296
+ "builds"=>[{"title"=>"Anub'arak Build Guide “Time is fleeting.”"}]},
297
+ {"name"=>"Artanis",
298
+ "builds"=>[{"title"=>"Artanis Build Guide “Direct my wrath.”"}]},
299
+ {"name"=>"Arthas",
300
+ "builds"=>[{"title"=>"Arthas Build Guide “Frostmourne hungers.”"}]},
301
+ {"name"=>"Azmodan",
302
+ "builds"=>[{"title"=>"Azmodan Build Guide “I shall rule alone!”"}]},
303
+ {"name"=>"Brightwing",
304
+ "builds"=>
305
+ [{"title"=>"Brightwing Build Guide “You don't want to be my enemy!”"}]},
306
+ {"name"=>"Chen",
307
+ "builds"=>[{"title"=>"Chen Build Guide “I bring Pandamonium!”"}]},
308
+ {"name"=>"Cho",
309
+ "builds"=>
310
+ [{"title"=>
311
+ "Cho Build Guide “This Nexus, all its power... it will be MINE!”"}]},
312
+ {"name"=>"Diablo",
313
+ "builds"=>
314
+ [{"title"=>"Diablo Build Guide “Kneel before the Lord of Terror.”"}]},
315
+ {"name"=>"E.T.C.",
316
+ "builds"=>[{"title"=>"E.T.C. Build Guide “You can't kill the metal!”"}]},
317
+ {"name"=>"Falstad",
318
+ "builds"=>[{"title"=>"Falstad Build Guide “Time to drop the hammer!”"}]},
319
+ {"name"=>"Gall",
320
+ "builds"=>
321
+ [{"title"=>
322
+ "Gall Build Guide “Those who oppose me invite their own demise!”"}]},
323
+ {"name"=>"Gazlowe",
324
+ "builds"=>[{"title"=>"Gazlowe Build Guide “Hey, time is money friend.”"}]},
325
+ {"name"=>"Greymane",
326
+ "builds"=>[{"title"=>"Greymane Build Guide “I am the alpha!”"}]},
327
+ {"name"=>"Illidan",
328
+ "builds"=>[{"title"=>"Illidan Build Guide “Now I am complete!”"}]},
329
+ {"name"=>"Jaina",
330
+ "builds"=>[{"title"=>"Jaina Build Guide “I'm here to help.”"}]},
331
+ {"name"=>"Johanna",
332
+ "builds"=>[{"title"=>"Johanna Build Guide “The Crusade marches on!”"}]},
333
+ {"name"=>"Kael'thas",
334
+ "builds"=>[{"title"=>"Kael'thas Build Guide “Anar'alah belore!”"}]},
335
+ {"name"=>"Kerrigan",
336
+ "builds"=>[{"title"=>"Kerrigan Build Guide “Long live the real Queen.”"}]},
337
+ {"name"=>"Kharazim",
338
+ "builds"=>[{"title"=>"Kharazim Build Guide “Feel the wrath of Ytar!”"}]},
339
+ {"name"=>"Leoric",
340
+ "builds"=>
341
+ [{"title"=>"Leoric Build Guide “All will suffer as I have suffered!”"}]},
342
+ {"name"=>"Li Li",
343
+ "builds"=>[{"title"=>"Li Li Build Guide “Ready for adventure!”"}]},
344
+ {"name"=>"Lt. Morales",
345
+ "builds"=>
346
+ [{"title"=>
347
+ "Lt. Morales Build Guide “I protect every member of my squad!”"}]},
348
+ {"name"=>"Lunara",
349
+ "builds"=>[{"title"=>"Lunara Build Guide “Taste my spear!”"}]},
350
+ {"name"=>"Malfurion",
351
+ "builds"=>
352
+ [{"title"=>"Malfurion Build Guide “Nature will rise against you!”"}]},
353
+ {"name"=>"Muradin",
354
+ "builds"=>[{"title"=>"Muradin Build Guide “It's hammer time!”"}]},
355
+ {"name"=>"Murky",
356
+ "builds"=>[{"title"=>"Murky Build Guide “Mrglrglmrglmrrrlggg!”"}]},
357
+ {"name"=>"Nazeebo",
358
+ "builds"=>[{"title"=>"Nazeebo Build Guide “The spirits speak to me.”"}]},
359
+ {"name"=>"Nova",
360
+ "builds"=>
361
+ [{"title"=>"Nova Build Guide “Ready to have your mind blown?”"}]},
362
+ {"name"=>"Raynor",
363
+ "builds"=>[{"title"=>"Raynor Build Guide “Hit 'em hard and fast.”"}]},
364
+ {"name"=>"Rehgar",
365
+ "builds"=>[{"title"=>"Rehgar Build Guide “To the Arena!”"}]},
366
+ {"name"=>"Rexxar",
367
+ "builds"=>[{"title"=>"Rexxar Build Guide “The beasts obey me!”"}]},
368
+ {"name"=>"Sgt. Hammer",
369
+ "builds"=>[{"title"=>"Sgt. Hammer Build Guide “Napalm's airborne!”"}]},
370
+ {"name"=>"Sonya",
371
+ "builds"=>[{"title"=>"Sonya Build Guide “Time to die!”"}]},
372
+ {"name"=>"Stitches",
373
+ "builds"=>[{"title"=>"Stitches Build Guide “ROAAAARR!”"}]},
374
+ {"name"=>"Sylvanas",
375
+ "builds"=>[{"title"=>"Sylvanas Build Guide “Let none survive!”"}]},
376
+ {"name"=>"Tassadar",
377
+ "builds"=>[{"title"=>"Tassadar Build Guide “Executor, I stand ready!”"}]},
378
+ {"name"=>"The Butcher",
379
+ "builds"=>[{"title"=>"The Butcher Build Guide “Fresh meat!”"}]},
380
+ {"name"=>"Thrall",
381
+ "builds"=>
382
+ [{"title"=>"Thrall Build Guide “The Elements will destroy you!”"}]},
383
+ {"name"=>"Tychus",
384
+ "builds"=>[{"title"=>"Tychus Build Guide “So, you gonna bark all day?”"}]},
385
+ {"name"=>"Tyrael",
386
+ "builds"=>[{"title"=>"Tyrael Build Guide “I am Justice itself!”"}]},
387
+ {"name"=>"Tyrande",
388
+ "builds"=>
389
+ [{"title"=>"Tyrande Build Guide “Feel the wrath of the Heavens!”"}]},
390
+ {"name"=>"Uther",
391
+ "builds"=>[{"title"=>"Uther Build Guide “I will fight with honor!”"}]},
392
+ {"name"=>"Valla",
393
+ "builds"=>[{"title"=>"Valla Build Guide “Be vewy, vewy quiet...”"}]},
394
+ {"name"=>"Zagara",
395
+ "builds"=>[{"title"=>"Zagara Build Guide “The Swarm hungers.”"}]},
396
+ {"name"=>"Zeratul",
397
+ "builds"=>[{"title"=>"Zeratul Build Guide “I serve the Xel'naga.”"}]}]})
398
+ end
399
+ end
400
+
277
401
  it 'should make post requests if needed' do
278
402
  VCR.use_cassette('make_post_request') do
279
403
  data = { your_name: "Name" }
@@ -10,13 +10,14 @@ describe SampleCrawler do
10
10
  @sample_crawler.should_receive(:parse) do |args|
11
11
  args['event_group'].wombat_property_selector.should == "css=div.title-agenda"
12
12
  it = args['event_group']
13
- it["event"]["title"].wombat_property_selector.should == "xpath=."
14
- it["event"]["date"].wombat_property_selector.should == "xpath=//div[@class='scrollable-items']/div[@class='s-item active']//a"
15
- it["event"]["type"].wombat_property_selector.should == "xpath=.type"
16
- it["venue"]["name"].wombat_property_selector.should == "xpath=."
13
+ expect(it["event"]["title"].wombat_property_selector).to eq("xpath=.")
14
+ expect(it["event"]["date"].wombat_property_selector).to(
15
+ eq("xpath=//div[@class='scrollable-items']/div[@class='s-item active']//a"))
16
+ expect(it["event"]["type"].wombat_property_selector).to eq("xpath=.type")
17
+ expect(it["venue"]["name"].wombat_property_selector).to eq("xpath=.")
17
18
 
18
19
  args[:base_url].should == 'http://www.obaoba.com.br'
19
- args[:path].should == '/porto-alegre/agenda'
20
+ args[:path].should == '/porto-alegre/agenda'
20
21
  end
21
22
 
22
23
  @sample_crawler.crawl
@@ -2,16 +2,16 @@
2
2
  # DO NOT EDIT THIS FILE DIRECTLY
3
3
  # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
4
4
  # -*- encoding: utf-8 -*-
5
- # stub: wombat 2.5.0 ruby lib
5
+ # stub: wombat 2.5.1 ruby lib
6
6
 
7
7
  Gem::Specification.new do |s|
8
8
  s.name = "wombat"
9
- s.version = "2.5.0"
9
+ s.version = "2.5.1"
10
10
 
11
11
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
12
12
  s.require_paths = ["lib"]
13
13
  s.authors = ["Felipe Lima"]
14
- s.date = "2016-01-27"
14
+ s.date = "2016-03-31"
15
15
  s.description = "Generic Web crawler with a DSL that parses structured data from web pages"
16
16
  s.email = "felipe.lima@gmail.com"
17
17
  s.extra_rdoc_files = [
@@ -40,6 +40,7 @@ Gem::Specification.new do |s|
40
40
  "fixtures/vcr_cassettes/broken_selector.yml",
41
41
  "fixtures/vcr_cassettes/error_page.yml",
42
42
  "fixtures/vcr_cassettes/follow_links.yml",
43
+ "fixtures/vcr_cassettes/follow_links_v2.yml",
43
44
  "fixtures/vcr_cassettes/follow_relative_links.yml",
44
45
  "fixtures/vcr_cassettes/for_each_page.yml",
45
46
  "fixtures/vcr_cassettes/headers_selector.yml",
@@ -86,7 +87,7 @@ Gem::Specification.new do |s|
86
87
  s.homepage = "http://felipecsl.github.com/wombat"
87
88
  s.licenses = ["MIT"]
88
89
  s.required_ruby_version = Gem::Requirement.new(">= 1.9")
89
- s.rubygems_version = "2.4.6"
90
+ s.rubygems_version = "2.4.8"
90
91
  s.summary = "Ruby DSL to scrape web pages"
91
92
 
92
93
  if s.respond_to? :specification_version then
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: wombat
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.5.0
4
+ version: 2.5.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Felipe Lima
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-01-27 00:00:00.000000000 Z
11
+ date: 2016-03-31 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: mechanize
@@ -193,6 +193,7 @@ files:
193
193
  - fixtures/vcr_cassettes/broken_selector.yml
194
194
  - fixtures/vcr_cassettes/error_page.yml
195
195
  - fixtures/vcr_cassettes/follow_links.yml
196
+ - fixtures/vcr_cassettes/follow_links_v2.yml
196
197
  - fixtures/vcr_cassettes/follow_relative_links.yml
197
198
  - fixtures/vcr_cassettes/for_each_page.yml
198
199
  - fixtures/vcr_cassettes/headers_selector.yml
@@ -255,7 +256,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
255
256
  version: '0'
256
257
  requirements: []
257
258
  rubyforge_project:
258
- rubygems_version: 2.4.6
259
+ rubygems_version: 2.4.8
259
260
  signing_key:
260
261
  specification_version: 4
261
262
  summary: Ruby DSL to scrape web pages