scraprr 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: c7ede4f1bfe1eebbca1915f4c515537ada2c71b1
4
- data.tar.gz: 22e5a2d1cce4c50d0ed8d11d41eb2aaa7529ebef
3
+ metadata.gz: 94ea1d7615ca2011a82052a968bea0f557e1c210
4
+ data.tar.gz: 762eea3721533afe842d013dd3000781faddd4f2
5
5
  SHA512:
6
- metadata.gz: a24d82be7501c66d1192a3900c837c27605559328ab0e2e486664fd10bb34a920a1cc028f1e24f9a1692af6d595849b614cb8d9ff5ecc8a0d4f5ea592574cc43
7
- data.tar.gz: 1c1a572ad884718f93509aa1d5348681bdeb428a00a70b7acb43c5719bb6e54b6449ea1ab62342c76ef28692ecfd8edb6bdbded218c7fc323a2ec624bb63e9b5
6
+ metadata.gz: 149a1660d39220c2c7e484c31d5b4ef2613ce9432c6e1d2e60e69afbdd062c33d60ac7a6e87a71f381ef7b6c95adedf8df502206294b6b0cc38b54b2f86326af
7
+ data.tar.gz: b98578eb8c1dff1106ed6f2824aaee07965eefbc7b1eb007c19a5a4b8c8041537ad78431f8c6d4fa48d3d30f85de28a2ccece7005318ae107860f20ad9167b16
@@ -0,0 +1,19 @@
1
+ #!/usr/bin/env ruby
2
+ require 'pp'
3
+ require 'open-uri'
4
+ require 'watir'
5
+ require 'watir-webdriver'
6
+ require 'scraprr'
7
+
8
+
9
+ scraper = Scraprr::Scraper.new('div[skinpart=itemsContainer] > div').
10
+ attribute(:name, 'div[skinpart=title]', :required => true).
11
+ attribute(:abv, 'div[skinpart=description]', :regexp => /([0-9.]+)%/)
12
+
13
+ browser = Watir::Browser.new
14
+ browser.goto 'http://www.thetaphaus.co.nz/#!now-on-tap/c14a4'
15
+ sleep 10
16
+ document = Nokogiri::HTML(browser.html)
17
+ browser.close
18
+
19
+ pp scraper.extract(document)
@@ -0,0 +1,13 @@
1
+ #!/usr/bin/env ruby
2
+ require 'pp'
3
+ require 'open-uri'
4
+ require 'scraprr'
5
+
6
+ scraper = Scraprr::Scraper.new('table tr').
7
+ attribute(:brewery, './td[1]', :required => true).
8
+ attribute(:url, './td[1]/a', :attr => 'href', :required => true).
9
+ attribute(:year, './td[3]').
10
+ attribute(:production, './td[4]')
11
+
12
+ document = Nokogiri::HTML(open('http://en.wikipedia.org/wiki/Trappist_beer'))
13
+ pp scraper.extract(document)
@@ -5,7 +5,8 @@ module Scraprr
5
5
  end
6
6
 
7
7
  def run(value)
8
- @chain.run(value).strip
8
+ value = @chain.run(value)
9
+ value.strip if value
9
10
  end
10
11
  end
11
12
  end
@@ -1,3 +1,3 @@
1
1
  module Scraprr
2
- VERSION = "0.0.1"
2
+ VERSION = "0.0.2"
3
3
  end
@@ -13,5 +13,13 @@ describe Scraprr::StripFilter do
13
13
  @chain = Scraprr::StripFilter.new(@chain)
14
14
  @chain.run(value).must_equal('test')
15
15
  end
16
+
17
+ it "returns nil if value was nil" do
18
+ value = nil
19
+ @chain.expect(:run, value, [value])
20
+
21
+ @chain = Scraprr::StripFilter.new(@chain)
22
+ @chain.run(value).must_equal(nil)
23
+ end
16
24
  end
17
25
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: scraprr
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - James Fargher
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-05-03 00:00:00.000000000 Z
11
+ date: 2013-12-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -78,6 +78,8 @@ files:
78
78
  - LICENSE.txt
79
79
  - README.md
80
80
  - Rakefile
81
+ - example/taphaus.rb
82
+ - example/wikipedia.rb
81
83
  - lib/scraprr.rb
82
84
  - lib/scraprr/attribute_scraper.rb
83
85
  - lib/scraprr/exceptions.rb
@@ -118,7 +120,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
118
120
  version: '0'
119
121
  requirements: []
120
122
  rubyforge_project:
121
- rubygems_version: 2.0.0
123
+ rubygems_version: 2.0.3
122
124
  signing_key:
123
125
  specification_version: 4
124
126
  summary: Declarative HTML/XML scraper
@@ -132,4 +134,3 @@ test_files:
132
134
  - test/scraprr/strip_filter_test.rb
133
135
  - test/scraprr/value_extractor_test.rb
134
136
  - test/test_helper.rb
135
- has_rdoc: