object-scraper 0.0.2 → 0.0.3

Sign up to get free protection for your applications and to get access to all the features.
data/README.rdoc CHANGED
@@ -31,6 +31,10 @@ extraction of ruby objects from various web sites.
31
31
 
32
32
  @objects = Scraper.parse(:twitter)
33
33
 
34
+ If you define multiple scrapers, you can collect all their objects with one simple method
35
+
36
+ @objects = Scraper.parse_all
37
+
34
38
  == Advanced Example
35
39
 
36
40
  It is possible to use other existing HTML parsers instead of hpricot.
data/Rakefile CHANGED
@@ -2,7 +2,7 @@ require 'rubygems'
2
2
  require 'rake'
3
3
  require 'echoe'
4
4
 
5
- Echoe.new('object-scraper', '0.0.2') do |p|
5
+ Echoe.new('object-scraper', '0.0.3') do |p|
6
6
  p.summary = "Recipe like object extraction from HTML sources"
7
7
  p.description = "Object scraper is a thin wrapper for hpricot to enable recipe-like extraction of ruby objects from various web sites."
8
8
  p.url = "http://github.com/enricogenauck/object-scraper"
@@ -42,6 +42,12 @@ class Scraper
42
42
  def self.parse(name)
43
43
  scraper_by_name(name).parse
44
44
  end
45
+
46
+ def self.parse_all
47
+ objects = []
48
+ scrapers.each_value { |s| objects << s.parse }
49
+ objects.flatten
50
+ end
45
51
 
46
52
  def parse
47
53
  doc = open(@scraper_source) { |f| Scraper.scrape_source_with.call(f) }
@@ -2,11 +2,11 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = %q{object-scraper}
5
- s.version = "0.0.2"
5
+ s.version = "0.0.3"
6
6
 
7
7
  s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
8
8
  s.authors = ["Enrico Genauck"]
9
- s.date = %q{2009-12-10}
9
+ s.date = %q{2010-02-03}
10
10
  s.description = %q{Object scraper is a thin wrapper for hpricot to enable recipe-like extraction of ruby objects from various web sites.}
11
11
  s.email = %q{kontakt@enricogenauck.de}
12
12
  s.extra_rdoc_files = ["README.rdoc", "lib/object-scraper.rb", "lib/object-scraper/scraper.rb"]
@@ -43,6 +43,22 @@ describe Scraper do
43
43
  @objects.first.date.should == DateTime.parse("Mon Nov 30 04:10:51 +0000 2009")
44
44
  end
45
45
 
46
+ it "should get the objects from multiple scrapers" do
47
+ Scraper.define(:twitter_1, :class => :entry, :source => @uri, :node => @pattern) do |s|
48
+ s.text { |node| node.at(".entry-content").inner_html }
49
+ s.date { |node| DateTime.parse(node.at(".timestamp")[:data][/\'.*\'/].delete("'")) }
50
+ end
51
+
52
+ Scraper.define(:twitter_2, :class => :entry, :source => @uri, :node => @pattern) do |s|
53
+ s.text { |node| node.at(".entry-content").inner_html }
54
+ s.date { |node| DateTime.parse(node.at(".timestamp")[:data][/\'.*\'/].delete("'")) }
55
+ end
56
+
57
+
58
+ @objects = Scraper.parse_all
59
+ @objects.size.should == 40
60
+ end
61
+
46
62
  it "should use a different html parser" do
47
63
  require 'nokogiri'
48
64
  Scraper.scrape_source_with = Proc.new { |source| Nokogiri::HTML(source) }
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: object-scraper
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Enrico Genauck
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-12-10 00:00:00 +01:00
12
+ date: 2010-02-03 00:00:00 +01:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency