object-scraper 0.0.3 → 0.0.4

Sign up to get free protection for your applications and to get access to all the features.
data/Manifest CHANGED
@@ -3,7 +3,7 @@ README.rdoc
3
3
  Rakefile
4
4
  lib/object-scraper.rb
5
5
  lib/object-scraper/scraper.rb
6
- object-scraper.gemspec
6
+ spec/data/incomplete_objects.html
7
7
  spec/data/twitter.html
8
8
  spec/object-scraper/scraper_spec.rb
9
9
  spec/spec.opts
data/Rakefile CHANGED
@@ -2,7 +2,7 @@ require 'rubygems'
2
2
  require 'rake'
3
3
  require 'echoe'
4
4
 
5
- Echoe.new('object-scraper', '0.0.3') do |p|
5
+ Echoe.new('object-scraper', '0.0.4') do |p|
6
6
  p.summary = "Recipe like object extraction from HTML sources"
7
7
  p.description = "Object scraper is a thin wrapper for hpricot to enable recipe-like extraction of ruby objects from various web sites."
8
8
  p.url = "http://github.com/enricogenauck/object-scraper"
@@ -66,7 +66,11 @@ class Scraper
66
66
 
67
67
  def method_missing(symbol, *args, &block)
68
68
  if block_given?
69
- @current_object.send("#{symbol}=", yield(@current_node))
69
+ @current_object.send("#{symbol}=", begin
70
+ yield(@current_node)
71
+ rescue
72
+ puts "Warning, parsing failed at #{@current_node.inspect}"
73
+ end)
70
74
  else
71
75
  @current_object.send("#{symbol}=", args.first)
72
76
  end
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = %q{object-scraper}
5
- s.version = "0.0.3"
5
+ s.version = "0.0.4"
6
6
 
7
7
  s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
8
8
  s.authors = ["Enrico Genauck"]
@@ -10,7 +10,7 @@ Gem::Specification.new do |s|
10
10
  s.description = %q{Object scraper is a thin wrapper for hpricot to enable recipe-like extraction of ruby objects from various web sites.}
11
11
  s.email = %q{kontakt@enricogenauck.de}
12
12
  s.extra_rdoc_files = ["README.rdoc", "lib/object-scraper.rb", "lib/object-scraper/scraper.rb"]
13
- s.files = ["Manifest", "README.rdoc", "Rakefile", "lib/object-scraper.rb", "lib/object-scraper/scraper.rb", "object-scraper.gemspec", "spec/data/twitter.html", "spec/object-scraper/scraper_spec.rb", "spec/spec.opts", "spec/spec_helper.rb"]
13
+ s.files = ["Manifest", "README.rdoc", "Rakefile", "lib/object-scraper.rb", "lib/object-scraper/scraper.rb", "spec/data/incomplete_objects.html", "spec/data/twitter.html", "spec/object-scraper/scraper_spec.rb", "spec/spec.opts", "spec/spec_helper.rb", "object-scraper.gemspec"]
14
14
  s.homepage = %q{http://github.com/enricogenauck/object-scraper}
15
15
  s.rdoc_options = ["--line-numbers", "--inline-source", "--title", "Object-scraper", "--main", "README.rdoc"]
16
16
  s.require_paths = ["lib"]
@@ -0,0 +1,12 @@
1
+ <html>
2
+ <head></head>
3
+ <body>
4
+ <div class="status">
5
+ <h1>Item 1</h1>
6
+ <p>content</p>
7
+ </div>
8
+ <div class="status">
9
+ <h1>Item 2</h1>
10
+ </div>
11
+ </body>
12
+ </html>
@@ -3,6 +3,7 @@ require File.expand_path(File.join(File.dirname(__FILE__), '..', 'spec_helper'))
3
3
  describe Scraper do
4
4
  before :all do
5
5
  @uri = File.expand_path(File.join(File.dirname(__FILE__), '..', 'data', 'twitter.html' ))
6
+ @faulty_source = File.expand_path(File.join(File.dirname(__FILE__), '..', 'data', 'incomplete_objects.html' ))
6
7
  @pattern = ".status"
7
8
  class Entry < Object
8
9
  attr_accessor :text, :date
@@ -74,5 +75,16 @@ describe Scraper do
74
75
  @objects.first.date.should == DateTime.parse("Mon Nov 30 04:10:51 +0000 2009")
75
76
  end
76
77
 
78
+ it "should get the objects despite of parse errors" do
79
+ Scraper.define(:errors, :class => :entry, :source => @faulty_source, :node => @pattern) do |s|
80
+ s.text { |node| node.at("h1").inner_html }
81
+ s.date { |node| node.at("p").inner_html }
82
+ end
83
+
84
+ @objects = Scraper.parse(:errors)
85
+ @objects[0].date.should == "content"
86
+ @objects[1].date.should be_nil
87
+ end
88
+
77
89
  end
78
90
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: object-scraper
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.3
4
+ version: 0.0.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Enrico Genauck
@@ -38,11 +38,12 @@ files:
38
38
  - Rakefile
39
39
  - lib/object-scraper.rb
40
40
  - lib/object-scraper/scraper.rb
41
- - object-scraper.gemspec
41
+ - spec/data/incomplete_objects.html
42
42
  - spec/data/twitter.html
43
43
  - spec/object-scraper/scraper_spec.rb
44
44
  - spec/spec.opts
45
45
  - spec/spec_helper.rb
46
+ - object-scraper.gemspec
46
47
  has_rdoc: true
47
48
  homepage: http://github.com/enricogenauck/object-scraper
48
49
  licenses: []