scrapouille 0.0.6 → 0.0.7

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 9ac7508a3cc01e967f6cf32e986578b99f5ddf23
4
- data.tar.gz: 3bb5e1f56cfbe7c1c5668a387c679f6b8ac4b8ed
3
+ metadata.gz: e4fe2677b4492ce2c30d305949f0b8f72a48859d
4
+ data.tar.gz: e3891caa4fd74331d483e4308d2d42d1f9fc0944
5
5
  SHA512:
6
- metadata.gz: a5a8462493a89d40936cbf7ae3ef85e69c5e36e89581c13ea12bfdd40abb9336c2d056352b67a35d3f88719ddd15f24aa02c67ba9c30b4dd4d3c343d4eec7154
7
- data.tar.gz: ddf07efb781bb461c4e12e8b467b5cbc04f9bd75d9e07ab7eda3ceb2a4fdc673be3cbe939ede43a168fce6183892e58f9b0c0843b4a172d0e9379a08dacb3111
6
+ metadata.gz: 54ca09257d43ad187090d6cd35d4de71328574d4ab4682253033bb00d32f8788720ac65841195d844e381e564a20147ddc53ab1741f4ca6fd75769425c9645f3
7
+ data.tar.gz: 44ed891ee0c667918c945708c5344848042a4f2d3e26427e03a4746bf4e5601b4d0aacbf8a1bbdf9c6e9e79f1355a3af0461416479cefd18205c25e83b8195ef
@@ -22,9 +22,16 @@ module Scrapouille
22
22
  add_rule(:collect_unique, property, xpath_options, block)
23
23
  end
24
24
 
25
- def scrap_each!(uris)
26
- raise ArgumentError, 'Expecting enumerable as argument' unless uris.respond_to? :map
27
- uris.map do |uri|
25
+ def scrap_each!(*uris)
26
+ if uris.length == 1
27
+ full_uris = uris.first
28
+ elsif uris.length == 2
29
+ root, relative_uris = *uris
30
+ full_uris = relative_uris.map do |uri| "#{root}/#{uri}" end
31
+ else
32
+ raise ArgumentError, "Expecting 1 or 2 arguments when calling #{__callee__}"
33
+ end
34
+ full_uris.map do |uri|
28
35
  scrap!(uri)
29
36
  end
30
37
  end
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |spec|
4
4
  spec.name = "scrapouille"
5
- spec.version = "0.0.6"
5
+ spec.version = "0.0.7"
6
6
  spec.authors = ["simcap"]
7
7
  spec.summary = %q{Interactive and declarative XPath driven HTML scraper}
8
8
  spec.description = %q{Interactive and declarative XPath driven HTML scraper}
@@ -52,6 +52,36 @@ class TestScraping < MiniTest::Unit::TestCase
52
52
  )
53
53
  end
54
54
 
55
+
56
+ def test_scrap_each_using_root_and_relative_uri
57
+ scraper = Scrapouille.configure do
58
+ scrap 'fullname', at: "//div[@class='player-name']/h1/child::text()"
59
+ scrap 'image_url', at: "//div[@id='basic']//img/attribute::src"
60
+ scrap 'rank', at: "//div[@class='position']/text()" do |c|
61
+ Integer(c.sub('#', ''))
62
+ end
63
+ end
64
+
65
+ results = scraper.scrap_each!("#{__dir__}/fixtures", ['tennis-player.html', 'other-tennis-player.html'])
66
+
67
+ assert Array === results
68
+ assert_equal({
69
+ 'fullname' => 'Richard Gasquet',
70
+ 'image_url' => 'http://cdn.tennis.com/uploads/img/2014/06/12/gasquet/regular.jpg',
71
+ 'rank' => 21
72
+ },
73
+ results[0]
74
+ )
75
+ assert_equal({
76
+ 'fullname' => 'Rafael Nadal',
77
+ 'image_url' => 'http://cdn.tennis.com/uploads/img/1201/01/01/rnadal/regular.jpg',
78
+ 'rank' => 2
79
+ },
80
+ results[1]
81
+ )
82
+ end
83
+
84
+
55
85
  def test_scrap_attribute_value
56
86
  scraper = Scrapouille.configure do
57
87
  scrap :djokovic_picture_src, at: "//img[contains(@src, 'djokovicz')]/@src"
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: scrapouille
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.6
4
+ version: 0.0.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - simcap