object-scraper 0.0.3 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Manifest CHANGED
@@ -3,7 +3,7 @@ README.rdoc
3
3
  Rakefile
4
4
  lib/object-scraper.rb
5
5
  lib/object-scraper/scraper.rb
6
- object-scraper.gemspec
6
+ spec/data/incomplete_objects.html
7
7
  spec/data/twitter.html
8
8
  spec/object-scraper/scraper_spec.rb
9
9
  spec/spec.opts
data/Rakefile CHANGED
@@ -2,7 +2,7 @@ require 'rubygems'
2
2
  require 'rake'
3
3
  require 'echoe'
4
4
 
5
- Echoe.new('object-scraper', '0.0.3') do |p|
5
+ Echoe.new('object-scraper', '0.0.4') do |p|
6
6
  p.summary = "Recipe like object extraction from HTML sources"
7
7
  p.description = "Object scraper is a thin wrapper for hpricot to enable recipe-like extraction of ruby objects from various web sites."
8
8
  p.url = "http://github.com/enricogenauck/object-scraper"
@@ -66,7 +66,11 @@ class Scraper
66
66
 
67
67
  def method_missing(symbol, *args, &block)
68
68
  if block_given?
69
- @current_object.send("#{symbol}=", yield(@current_node))
69
+ @current_object.send("#{symbol}=", begin
70
+ yield(@current_node)
71
+ rescue
72
+ puts "Warning, parsing failed at #{@current_node.inspect}"
73
+ end)
70
74
  else
71
75
  @current_object.send("#{symbol}=", args.first)
72
76
  end
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = %q{object-scraper}
5
- s.version = "0.0.3"
5
+ s.version = "0.0.4"
6
6
 
7
7
  s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
8
8
  s.authors = ["Enrico Genauck"]
@@ -10,7 +10,7 @@ Gem::Specification.new do |s|
10
10
  s.description = %q{Object scraper is a thin wrapper for hpricot to enable recipe-like extraction of ruby objects from various web sites.}
11
11
  s.email = %q{kontakt@enricogenauck.de}
12
12
  s.extra_rdoc_files = ["README.rdoc", "lib/object-scraper.rb", "lib/object-scraper/scraper.rb"]
13
- s.files = ["Manifest", "README.rdoc", "Rakefile", "lib/object-scraper.rb", "lib/object-scraper/scraper.rb", "object-scraper.gemspec", "spec/data/twitter.html", "spec/object-scraper/scraper_spec.rb", "spec/spec.opts", "spec/spec_helper.rb"]
13
+ s.files = ["Manifest", "README.rdoc", "Rakefile", "lib/object-scraper.rb", "lib/object-scraper/scraper.rb", "spec/data/incomplete_objects.html", "spec/data/twitter.html", "spec/object-scraper/scraper_spec.rb", "spec/spec.opts", "spec/spec_helper.rb", "object-scraper.gemspec"]
14
14
  s.homepage = %q{http://github.com/enricogenauck/object-scraper}
15
15
  s.rdoc_options = ["--line-numbers", "--inline-source", "--title", "Object-scraper", "--main", "README.rdoc"]
16
16
  s.require_paths = ["lib"]
@@ -0,0 +1,12 @@
1
+ <html>
2
+ <head></head>
3
+ <body>
4
+ <div class="status">
5
+ <h1>Item 1</h1>
6
+ <p>content</p>
7
+ </div>
8
+ <div class="status">
9
+ <h1>Item 2</h1>
10
+ </div>
11
+ </body>
12
+ </html>
@@ -3,6 +3,7 @@ require File.expand_path(File.join(File.dirname(__FILE__), '..', 'spec_helper'))
3
3
  describe Scraper do
4
4
  before :all do
5
5
  @uri = File.expand_path(File.join(File.dirname(__FILE__), '..', 'data', 'twitter.html' ))
6
+ @faulty_source = File.expand_path(File.join(File.dirname(__FILE__), '..', 'data', 'incomplete_objects.html' ))
6
7
  @pattern = ".status"
7
8
  class Entry < Object
8
9
  attr_accessor :text, :date
@@ -74,5 +75,16 @@ describe Scraper do
74
75
  @objects.first.date.should == DateTime.parse("Mon Nov 30 04:10:51 +0000 2009")
75
76
  end
76
77
 
78
+ it "should get the objects despite of parse errors" do
79
+ Scraper.define(:errors, :class => :entry, :source => @faulty_source, :node => @pattern) do |s|
80
+ s.text { |node| node.at("h1").inner_html }
81
+ s.date { |node| node.at("p").inner_html }
82
+ end
83
+
84
+ @objects = Scraper.parse(:errors)
85
+ @objects[0].date.should == "content"
86
+ @objects[1].date.should be_nil
87
+ end
88
+
77
89
  end
78
90
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: object-scraper
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.3
4
+ version: 0.0.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Enrico Genauck
@@ -38,11 +38,12 @@ files:
38
38
  - Rakefile
39
39
  - lib/object-scraper.rb
40
40
  - lib/object-scraper/scraper.rb
41
- - object-scraper.gemspec
41
+ - spec/data/incomplete_objects.html
42
42
  - spec/data/twitter.html
43
43
  - spec/object-scraper/scraper_spec.rb
44
44
  - spec/spec.opts
45
45
  - spec/spec_helper.rb
46
+ - object-scraper.gemspec
46
47
  has_rdoc: true
47
48
  homepage: http://github.com/enricogenauck/object-scraper
48
49
  licenses: []