webscour 0.1 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,18 @@
1
+ WebScour
2
+ =======
3
+
4
+ *WebScour* is a little DSL web scraping tool.
5
+
6
+ require 'webscour'
7
+
8
+ blog = WebScour.parse('http://example.com') do
9
+ element :title
10
+ end
11
+
12
+ blog.title
13
+ #=> "Hello World"
14
+
15
+
16
+ [See Nibbler][nibbler] for more `element` examples.
17
+
18
+ [nibbler]: http://github.com/mislav/nibbler
@@ -0,0 +1,30 @@
1
+ require 'rubygems'
2
+ require 'nibbler'
3
+ require 'nokogiri'
4
+ require 'faraday'
5
+
6
+ class WebScour
7
+ attr_accessor :uri, :document, :klass
8
+ def self.parse(uri, &block)
9
+ new(uri, &block)
10
+ end
11
+
12
+ def initialize(uri, &block)
13
+ @document, @uri = fetch uri
14
+ @klass = Class.new(Nibbler) { instance_eval(&block) }.parse(@document.body)
15
+ end
16
+
17
+ def method_missing(m)
18
+ @klass.send(:"#{m}")
19
+ end
20
+
21
+ private
22
+ def fetch(uri)
23
+ response = Faraday.get uri
24
+ if [301,302].include?(response.status)
25
+ uri = response.headers['location']
26
+ response = fetch(uri).first
27
+ end
28
+ [response, uri]
29
+ end
30
+ end
@@ -0,0 +1,3 @@
1
+ class WebScour
2
+ VERSION = "0.1.1"
3
+ end
@@ -0,0 +1,5 @@
1
+ require 'rubygems'
2
+ require 'rspec'
3
+ require 'fakeweb'
4
+ require File.dirname(__FILE__) + "/../lib/webscour"
5
+
@@ -0,0 +1,53 @@
1
+ require File.dirname(__FILE__) + "/spec_helper"
2
+
3
+ describe "WebScour" do
4
+ before do
5
+ @data = <<-HTML
6
+ <html>
7
+ <head>
8
+ <title>Hello World</title>
9
+ </head>
10
+ <body>
11
+ <p>This is testing.</p>
12
+
13
+ <ul>
14
+ <li>A</li>
15
+ <li>B</li>
16
+ <li>C</li>
17
+ </ul>
18
+ </body>
19
+ </html>
20
+ HTML
21
+ FakeWeb.register_uri(:any, "http://www.example.com", :body => @data)
22
+ end
23
+
24
+ it "should have data" do
25
+ @data.should_not be_nil
26
+ @data.should_not be_empty
27
+ end
28
+
29
+ it "should have content" do
30
+ resp = Faraday.get "http://www.example.com/"
31
+ resp.body.should_not be_empty
32
+ resp.body.should == @data
33
+ end
34
+
35
+ it "should have title" do
36
+ html = WebScour.parse("http://www.example.com/") do
37
+
38
+ element :title
39
+
40
+ element './/body/p' => :body
41
+
42
+ elements :ul => :list do
43
+ element:li => :text
44
+ end
45
+
46
+ end
47
+
48
+ html.title.should == "Hello World"
49
+ html.body.should =~ /This is testing\./
50
+ html.list.should be_kind_of(Array)
51
+ html.list.first.text.should == "A"
52
+ end
53
+ end
metadata CHANGED
@@ -1,12 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: webscour
3
3
  version: !ruby/object:Gem::Version
4
- hash: 9
4
+ hash: 25
5
5
  prerelease: false
6
6
  segments:
7
7
  - 0
8
8
  - 1
9
- version: "0.1"
9
+ - 1
10
+ version: 0.1.1
10
11
  platform: ruby
11
12
  authors:
12
13
  - Bryan Goines
@@ -14,7 +15,7 @@ autorequire:
14
15
  bindir: bin
15
16
  cert_chain: []
16
17
 
17
- date: 2010-11-01 00:00:00 -05:00
18
+ date: 2010-11-06 00:00:00 -05:00
18
19
  default_executable:
19
20
  dependencies:
20
21
  - !ruby/object:Gem::Dependency
@@ -123,8 +124,12 @@ extensions: []
123
124
 
124
125
  extra_rdoc_files: []
125
126
 
126
- files: []
127
-
127
+ files:
128
+ - README.md
129
+ - lib/webscour.rb
130
+ - lib/webscour/version.rb
131
+ - spec/spec_helper.rb
132
+ - spec/webscour_spec.rb
128
133
  has_rdoc: true
129
134
  homepage: http://rubygems.org/gems/webscour
130
135
  licenses: []