webscour 0.1 → 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,18 @@
1
+ WebScour
2
+ =======
3
+
4
+ *WebScour* is a little DSL web scraping tool.
5
+
6
+ require 'webscour'
7
+
8
+ blog = WebScour.parse('http://example.com') do
9
+ element :title
10
+ end
11
+
12
+ blog.title
13
+ #=> "Hello World"
14
+
15
+
16
+ [See Nibbler][nibbler] for more `element` examples.
17
+
18
+ [nibbler]: http://github.com/mislav/nibbler
@@ -0,0 +1,30 @@
1
+ require 'rubygems'
2
+ require 'nibbler'
3
+ require 'nokogiri'
4
+ require 'faraday'
5
+
6
+ class WebScour
7
+ attr_accessor :uri, :document, :klass
8
+ def self.parse(uri, &block)
9
+ new(uri, &block)
10
+ end
11
+
12
+ def initialize(uri, &block)
13
+ @document, @uri = fetch uri
14
+ @klass = Class.new(Nibbler) { instance_eval(&block) }.parse(@document.body)
15
+ end
16
+
17
+ def method_missing(m)
18
+ @klass.send(:"#{m}")
19
+ end
20
+
21
+ private
22
+ def fetch(uri)
23
+ response = Faraday.get uri
24
+ if [301,302].include?(response.status)
25
+ uri = response.headers['location']
26
+ response = fetch(uri).first
27
+ end
28
+ [response, uri]
29
+ end
30
+ end
@@ -0,0 +1,3 @@
1
+ class WebScour
2
+ VERSION = "0.1.1"
3
+ end
@@ -0,0 +1,5 @@
1
+ require 'rubygems'
2
+ require 'rspec'
3
+ require 'fakeweb'
4
+ require File.dirname(__FILE__) + "/../lib/webscour"
5
+
@@ -0,0 +1,53 @@
1
+ require File.dirname(__FILE__) + "/spec_helper"
2
+
3
+ describe "WebScour" do
4
+ before do
5
+ @data = <<-HTML
6
+ <html>
7
+ <head>
8
+ <title>Hello World</title>
9
+ </head>
10
+ <body>
11
+ <p>This is testing.</p>
12
+
13
+ <ul>
14
+ <li>A</li>
15
+ <li>B</li>
16
+ <li>C</li>
17
+ </ul>
18
+ </body>
19
+ </html>
20
+ HTML
21
+ FakeWeb.register_uri(:any, "http://www.example.com", :body => @data)
22
+ end
23
+
24
+ it "should have data" do
25
+ @data.should_not be_nil
26
+ @data.should_not be_empty
27
+ end
28
+
29
+ it "should have content" do
30
+ resp = Faraday.get "http://www.example.com/"
31
+ resp.body.should_not be_empty
32
+ resp.body.should == @data
33
+ end
34
+
35
+ it "should have title" do
36
+ html = WebScour.parse("http://www.example.com/") do
37
+
38
+ element :title
39
+
40
+ element './/body/p' => :body
41
+
42
+ elements :ul => :list do
43
+ element:li => :text
44
+ end
45
+
46
+ end
47
+
48
+ html.title.should == "Hello World"
49
+ html.body.should =~ /This is testing\./
50
+ html.list.should be_kind_of(Array)
51
+ html.list.first.text.should == "A"
52
+ end
53
+ end
metadata CHANGED
@@ -1,12 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: webscour
3
3
  version: !ruby/object:Gem::Version
4
- hash: 9
4
+ hash: 25
5
5
  prerelease: false
6
6
  segments:
7
7
  - 0
8
8
  - 1
9
- version: "0.1"
9
+ - 1
10
+ version: 0.1.1
10
11
  platform: ruby
11
12
  authors:
12
13
  - Bryan Goines
@@ -14,7 +15,7 @@ autorequire:
14
15
  bindir: bin
15
16
  cert_chain: []
16
17
 
17
- date: 2010-11-01 00:00:00 -05:00
18
+ date: 2010-11-06 00:00:00 -05:00
18
19
  default_executable:
19
20
  dependencies:
20
21
  - !ruby/object:Gem::Dependency
@@ -123,8 +124,12 @@ extensions: []
123
124
 
124
125
  extra_rdoc_files: []
125
126
 
126
- files: []
127
-
127
+ files:
128
+ - README.md
129
+ - lib/webscour.rb
130
+ - lib/webscour/version.rb
131
+ - spec/spec_helper.rb
132
+ - spec/webscour_spec.rb
128
133
  has_rdoc: true
129
134
  homepage: http://rubygems.org/gems/webscour
130
135
  licenses: []