metrocot 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,70 @@
1
+ require 'rubygems'
2
+ require 'hpricot'
3
+ require 'metrocot'
4
+ require 'rake'
5
+
6
+ $rakefile = nil # shuts up a warning in rdoctask.rb
7
+
8
+ class TestMetrocot < Test::Unit::TestCase
9
+
10
+ def setup
11
+ Rake.application.clear
12
+ end
13
+
14
+ class Abc < Object
15
+ attr_accessor :a, :b, :c
16
+ def initialize( a, b, c )
17
+ @a = a
18
+ @b = b
19
+ @c = c
20
+ end
21
+ end
22
+
23
+ def test_nothing
24
+
25
+ html = "<html><head></head><body><h1>hello!</h1><h2>A</h2><p>a 1</p><p>a 2</p><h2>B</h2><p>b 1</p><p>b 2</p><h2>C</h2><p>c 1</p><p>c 2</p></body></html>"
26
+
27
+ doc = Hpricot(html)
28
+
29
+ scraper = Metrocot.new(
30
+ :a => Metrocot::Scanners::TextScanner,
31
+ :b => Metrocot::Scanners::TextScanner,
32
+ :c => Metrocot::Scanners::TextScanner
33
+ )
34
+
35
+ # scraper.verbose = true
36
+
37
+ assert_equal( [[]], scraper.scrape(doc).descend("//html/body") { |td|
38
+ td.collect( "a=.//h3 b=.//p c=.//p" ) { |a, b, c|
39
+ Abc.new( a, b, c )
40
+ }
41
+ }.values )
42
+
43
+ end
44
+
45
+ def test_abc
46
+
47
+ html = "<html><head></head><body><h1>hello!</h1><h2>A</h2><p>a 1</p><p>a 2</p><h2>B</h2><p>b 1</p><p>b 2</p><h2>C</h2><p>c 1</p><p>c 2</p></body></html>"
48
+
49
+ doc = Hpricot(html)
50
+
51
+ scraper = Metrocot.new(
52
+ :a => Metrocot::Scanners::TextScanner,
53
+ :b => Metrocot::Scanners::TextScanner,
54
+ :c => Metrocot::Scanners::TextScanner
55
+ )
56
+
57
+ # scraper.verbose = true
58
+
59
+ abcs = scraper.scrape(doc).descend("//html/body") { |td|
60
+ td.collect( "a=.//h2 b=.//p c=.//p" ) { |a, b, c|
61
+ Abc.new( a, b, c )
62
+ }
63
+ }.values.flatten
64
+
65
+ assert_equal(3, abcs.size)
66
+
67
+ end
68
+
69
+ end
70
+
metadata ADDED
@@ -0,0 +1,72 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: metrocot
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ platform: ruby
6
+ authors:
7
+ - Helmut Hissen
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-01-05 00:00:00 -08:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: hoe
17
+ type: :development
18
+ version_requirement:
19
+ version_requirements: !ruby/object:Gem::Requirement
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: 1.8.2
24
+ version:
25
+ description: Metrocot builds on top of Hpricot to allow scraping of list data from HTML pages with a minimum of code and page specific information. The specification is done is a very compact readable format.
26
+ email:
27
+ - helmut@zeebar.com
28
+ executables:
29
+ - metrocot
30
+ extensions: []
31
+
32
+ extra_rdoc_files:
33
+ - History.txt
34
+ - Manifest.txt
35
+ - README.txt
36
+ files:
37
+ - History.txt
38
+ - Manifest.txt
39
+ - README.txt
40
+ - Rakefile
41
+ - bin/metrocot
42
+ - lib/metrocot.rb
43
+ - test/test_metrocot.rb
44
+ has_rdoc: true
45
+ homepage: http://www.metrocascade.com/mdn/opensource/metrocot (url)
46
+ post_install_message:
47
+ rdoc_options:
48
+ - --main
49
+ - README.txt
50
+ require_paths:
51
+ - lib
52
+ required_ruby_version: !ruby/object:Gem::Requirement
53
+ requirements:
54
+ - - ">="
55
+ - !ruby/object:Gem::Version
56
+ version: "0"
57
+ version:
58
+ required_rubygems_version: !ruby/object:Gem::Requirement
59
+ requirements:
60
+ - - ">="
61
+ - !ruby/object:Gem::Version
62
+ version: "0"
63
+ version:
64
+ requirements: []
65
+
66
+ rubyforge_project: metrocot
67
+ rubygems_version: 1.2.0
68
+ signing_key:
69
+ specification_version: 2
70
+ summary: Metrocot builds on top of Hpricot to allow scraping of list data from HTML pages with a minimum of code and page specific information
71
+ test_files:
72
+ - test/test_metrocot.rb