skrape 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +10 -4
- data/lib/skrape.rb +8 -1
- data/lib/skrape/version.rb +1 -1
- data/spec/skrape_spec.rb +11 -2
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d0e1a84190cf280563a99b9c5a8e6ae0e3bef7e8
|
4
|
+
data.tar.gz: 7c1b968d7feedd6cb146cddac6e4e07c9b0e3b3a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f833b7fe7873798b691fb97ef18013b60535fd45074a056494d0267e25fa348f09ac9fd94ee46c31fd81e2978a97679f1d865a2cfdf2c227f9146a67d2174a4e
|
7
|
+
data.tar.gz: 473ae1b801b5b8a5b7857d9add162422d7c0702b5d4260dc0103d15360aad98dae68bd62044377668e151de46ad2517b9353fea50d7670d405b5c4e78e77126f
|
data/README.md
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
# Skrape
|
2
2
|
|
3
|
-
|
3
|
+
Skrape provides a cute DSL for extracting information from pages on the
|
4
|
+
web. You give it a url and a block and it gives you back a hash.
|
4
5
|
|
5
6
|
## Installation
|
6
7
|
|
@@ -18,9 +19,6 @@ Or install it yourself as:
|
|
18
19
|
|
19
20
|
## Usage
|
20
21
|
|
21
|
-
Skrape provides a cute DSL for extracting information from pages on the
|
22
|
-
web. You give it a url and a block and it gives you back a hash.
|
23
|
-
|
24
22
|
Lets say you have a page like this:
|
25
23
|
|
26
24
|
<html><body><h1>I am a title</h1></body></html>
|
@@ -47,6 +45,14 @@ The element(s) will be passed into the block as a
|
|
47
45
|
Nokogiri::XML::NodeSet for you to play with. Whatever text you return
|
48
46
|
will be added to the hash of things to return.
|
49
47
|
|
48
|
+
For those moments when you want an error raised when a selector returns
|
49
|
+
nothing you can add:
|
50
|
+
|
51
|
+
results = Skrape::Page.new(url).extract do
|
52
|
+
error_when_selector_returns_nothing true
|
53
|
+
extract_link_href with: 'a', and_run: proc {|link| link.attr('href').value }
|
54
|
+
end
|
55
|
+
|
50
56
|
|
51
57
|
## Contributing
|
52
58
|
|
data/lib/skrape.rb
CHANGED
@@ -10,6 +10,7 @@ module Skrape
|
|
10
10
|
class Page
|
11
11
|
|
12
12
|
def initialize url
|
13
|
+
@fail_loudly = false
|
13
14
|
@extracted_info = {}
|
14
15
|
agent = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/32.0.1700.102 Chrome/32.0.1700.102 Safari/537.36"
|
15
16
|
@document = Nokogiri::HTML(open(url, "User-Agent" => agent))
|
@@ -21,10 +22,16 @@ module Skrape
|
|
21
22
|
@extracted_info
|
22
23
|
end
|
23
24
|
|
25
|
+
def error_when_selector_returns_nothing value
|
26
|
+
@fail_loudly = value
|
27
|
+
end
|
28
|
+
|
24
29
|
def method_missing name, args
|
25
30
|
feature_name = name.to_s.gsub('extract_', '').to_sym
|
26
31
|
element = @document.css args[:with]
|
27
|
-
|
32
|
+
if @fail_loudly
|
33
|
+
raise NoElementsFoundError, "the css selector for '#{feature_name}' did not return anything" if element.empty?
|
34
|
+
end
|
28
35
|
if args[:and_run]
|
29
36
|
@extracted_info[feature_name] = args[:and_run].call(element)
|
30
37
|
else
|
data/lib/skrape/version.rb
CHANGED
data/spec/skrape_spec.rb
CHANGED
@@ -25,12 +25,21 @@ describe Skrape do
|
|
25
25
|
expect(results[:link_href]).to eq "http://www.iana.org/domains/example"
|
26
26
|
end
|
27
27
|
|
28
|
-
it "raises
|
28
|
+
it "does not raises an error when a selector returns nothing" do
|
29
29
|
expect{
|
30
30
|
Skrape::Page.new(url).extract do
|
31
31
|
extract_nothing with: 'foo'
|
32
32
|
end
|
33
|
-
}.
|
33
|
+
}.not_to raise_error
|
34
|
+
end
|
35
|
+
|
36
|
+
it "raises a error when when told to" do
|
37
|
+
expect{
|
38
|
+
Skrape::Page.new(url).extract do
|
39
|
+
error_when_selector_returns_nothing true
|
40
|
+
extract_nothing with: 'foo'
|
41
|
+
end
|
42
|
+
}.to raise_error
|
34
43
|
end
|
35
44
|
|
36
45
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: skrape
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Mike Williamson
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2014-
|
12
|
+
date: 2014-03-07 00:00:00 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bundler
|