object-scraper 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.rdoc CHANGED
@@ -31,6 +31,10 @@ extraction of ruby objects from various web sites.
31
31
 
32
32
  @objects = Scraper.parse(:twitter)
33
33
 
34
+ If you define multiple scrapers, you can collect all their objects with one simple method
35
+
36
+ @objects = Scraper.parse_all
37
+
34
38
  == Advanced Example
35
39
 
36
40
  It is possible to use other existing HTML parsers instead of hpricot.
data/Rakefile CHANGED
@@ -2,7 +2,7 @@ require 'rubygems'
2
2
  require 'rake'
3
3
  require 'echoe'
4
4
 
5
- Echoe.new('object-scraper', '0.0.2') do |p|
5
+ Echoe.new('object-scraper', '0.0.3') do |p|
6
6
  p.summary = "Recipe like object extraction from HTML sources"
7
7
  p.description = "Object scraper is a thin wrapper for hpricot to enable recipe-like extraction of ruby objects from various web sites."
8
8
  p.url = "http://github.com/enricogenauck/object-scraper"
@@ -42,6 +42,12 @@ class Scraper
42
42
  def self.parse(name)
43
43
  scraper_by_name(name).parse
44
44
  end
45
+
46
+ def self.parse_all
47
+ objects = []
48
+ scrapers.each_value { |s| objects << s.parse }
49
+ objects.flatten
50
+ end
45
51
 
46
52
  def parse
47
53
  doc = open(@scraper_source) { |f| Scraper.scrape_source_with.call(f) }
@@ -2,11 +2,11 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = %q{object-scraper}
5
- s.version = "0.0.2"
5
+ s.version = "0.0.3"
6
6
 
7
7
  s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
8
8
  s.authors = ["Enrico Genauck"]
9
- s.date = %q{2009-12-10}
9
+ s.date = %q{2010-02-03}
10
10
  s.description = %q{Object scraper is a thin wrapper for hpricot to enable recipe-like extraction of ruby objects from various web sites.}
11
11
  s.email = %q{kontakt@enricogenauck.de}
12
12
  s.extra_rdoc_files = ["README.rdoc", "lib/object-scraper.rb", "lib/object-scraper/scraper.rb"]
@@ -43,6 +43,22 @@ describe Scraper do
43
43
  @objects.first.date.should == DateTime.parse("Mon Nov 30 04:10:51 +0000 2009")
44
44
  end
45
45
 
46
+ it "should get the objects from multiple scrapers" do
47
+ Scraper.define(:twitter_1, :class => :entry, :source => @uri, :node => @pattern) do |s|
48
+ s.text { |node| node.at(".entry-content").inner_html }
49
+ s.date { |node| DateTime.parse(node.at(".timestamp")[:data][/\'.*\'/].delete("'")) }
50
+ end
51
+
52
+ Scraper.define(:twitter_2, :class => :entry, :source => @uri, :node => @pattern) do |s|
53
+ s.text { |node| node.at(".entry-content").inner_html }
54
+ s.date { |node| DateTime.parse(node.at(".timestamp")[:data][/\'.*\'/].delete("'")) }
55
+ end
56
+
57
+
58
+ @objects = Scraper.parse_all
59
+ @objects.size.should == 40
60
+ end
61
+
46
62
  it "should use a different html parser" do
47
63
  require 'nokogiri'
48
64
  Scraper.scrape_source_with = Proc.new { |source| Nokogiri::HTML(source) }
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: object-scraper
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Enrico Genauck
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-12-10 00:00:00 +01:00
12
+ date: 2010-02-03 00:00:00 +01:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency