hpricot 0.4-mswin32

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ module TestFiles
2
+ Dir.chdir(File.dirname(__FILE__)) do
3
+ Dir['files/*.{html,xhtml}'].each do |fname|
4
+ const_set fname[%r!/(\w+)\.\w+$!, 1].upcase, IO.read(fname)
5
+ end
6
+ end
7
+ end
@@ -0,0 +1,141 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'test/unit'
4
+ require 'hpricot'
5
+ require 'load_files'
6
+
7
+ class TestParser < Test::Unit::TestCase
8
+ def setup
9
+ @basic = Hpricot.parse(TestFiles::BASIC)
10
+ @boingboing = Hpricot.parse(TestFiles::BOINGBOING)
11
+ @immob = Hpricot.parse(TestFiles::IMMOB)
12
+ @uswebgen = Hpricot.parse(TestFiles::USWEBGEN)
13
+ # @utf8 = Hpricot.parse(TestFiles::UTF8)
14
+ end
15
+
16
+ # def test_set_attr
17
+ # @basic.search('//p').set('class', 'para')
18
+ # assert_equal '', @basic.search('//p').map { |x| x.attributes }
19
+ # end
20
+
21
+ def test_scan_text
22
+ assert_equal 'FOO', Hpricot.make("FOO").first.content
23
+ end
24
+
25
+ def test_get_element_by_id
26
+ assert_equal 'link1', @basic.get_element_by_id('link1')['id']
27
+ assert_equal 'link1', @basic.get_element_by_id('body1').get_element_by_id('link1').get_attribute('id')
28
+ end
29
+
30
+ def test_get_element_by_tag_name
31
+ assert_equal 'link1', @basic.get_elements_by_tag_name('a')[0].get_attribute('id')
32
+ assert_equal 'link1', @basic.get_elements_by_tag_name('body')[0].get_element_by_id('link1').get_attribute('id')
33
+ end
34
+
35
+ def test_output_basic
36
+ @basic2 = Hpricot.parse(@basic.inner_html)
37
+ scan_basic @basic2
38
+ end
39
+
40
+ def test_scan_basic
41
+ scan_basic @basic
42
+ end
43
+
44
+ def scan_basic doc
45
+ assert_equal 'link1', doc.at('#link1')['id']
46
+ assert_equal 'link1', doc.at("p a")['id']
47
+ assert_equal 'link1', (doc/:p/:a).first['id']
48
+ assert_equal 'link1', doc.search('p').at('a').get_attribute('id')
49
+ assert_equal 'link2', (doc/'p').filter('.ohmy').search('a').first.get_attribute('id')
50
+ assert_equal (doc/'p')[2], (doc/'p').filter(':nth(2)')[0]
51
+ assert_equal 4, (doc/'p').filter('*').length
52
+ assert_equal 4, (doc/'p').filter('* *').length
53
+ eles = (doc/'p').filter('.ohmy')
54
+ assert_equal 1, eles.length
55
+ assert_equal 'ohmy', eles.first.get_attribute('class')
56
+ assert_equal 3, (doc/'p:not(.ohmy)').length
57
+ assert_equal 3, (doc/'p').not('.ohmy').length
58
+ assert_equal 3, (doc/'p').not(eles.first).length
59
+ assert_equal 2, (doc/'p').filter('[@class]').length
60
+ assert_equal 'last final', (doc/'p[@class~="final"]').first.get_attribute('class')
61
+ assert_equal 1, (doc/'p').filter('[@class~="final"]').length
62
+ assert_equal 2, (doc/'p > a').length
63
+ assert_equal 1, (doc/'p.ohmy > a').length
64
+ assert_equal 2, (doc/'p / a').length
65
+ assert_equal 2, (doc/'link ~ link').length
66
+ assert_equal 3, (doc/'title ~ link').length
67
+ end
68
+
69
+ def test_scan_boingboing
70
+ assert_equal 60, (@boingboing/'p.posted').length
71
+ assert_equal 1, @boingboing.search("//a[@name='027906']").length
72
+ end
73
+
74
+ def test_css_negation
75
+ assert_equal 3, (@basic/'p:not(.final)').length
76
+ end
77
+
78
+ def test_remove_attribute
79
+ (@basic/:p).each { |ele| ele.remove_attribute('class') }
80
+ assert_equal 0, (@basic/'p[@class]').length
81
+ end
82
+
83
+ def test_abs_xpath
84
+ assert_equal 60, @boingboing.search("/html/body//p[@class='posted']").length
85
+ assert_equal 60, @boingboing.search("/*/body//p[@class='posted']").length
86
+ assert_equal 18, @boingboing.search("//script").length
87
+ divs = @boingboing.search("//script/../div")
88
+ assert_equal 2, divs.length
89
+ assert_equal 1, divs.search('a').length
90
+ imgs = @boingboing.search('//div/p/a/img')
91
+ assert_equal 15, imgs.length
92
+ assert_equal 17, @boingboing.search('//div').search('p/a/img').length
93
+ assert imgs.all? { |x| x.name == 'img' }
94
+ end
95
+
96
+ def test_predicates
97
+ assert_equal 2, @boingboing.search('//link[@rel="alternate"]').length
98
+ p_imgs = @boingboing.search('//div/p[/a/img]')
99
+ assert_equal 15, p_imgs.length
100
+ assert p_imgs.all? { |x| x.name == 'p' }
101
+ p_imgs = @boingboing.search('//div/p[a/img]')
102
+ assert_equal 18, p_imgs.length
103
+ assert p_imgs.all? { |x| x.name == 'p' }
104
+ assert_equal 1, @boingboing.search('//input[@checked]').length
105
+ end
106
+
107
+ def test_alt_predicates
108
+ assert_equal 2, @boingboing.search('//table/tr:last').length
109
+ assert_equal "<p>The third paragraph</p>",
110
+ @basic.search('p:eq(2)').to_html
111
+ assert_equal '<p class="last final"><b>THE FINAL PARAGRAPH</b></p>',
112
+ @basic.search('p:last').to_html
113
+ assert_equal 'last final', @basic.search('//p:last-of-type').first.get_attribute('class')
114
+ end
115
+
116
+ def test_many_paths
117
+ assert_equal 62, @boingboing.search('p.posted, link[@rel="alternate"]').length
118
+ assert_equal 20, @boingboing.search('//div/p[a/img]|//link[@rel="alternate"]').length
119
+ end
120
+
121
+ def test_body_newlines
122
+ body = @immob.at(:body)
123
+ {'background' => '', 'bgcolor' => '#ffffff', 'text' => '#000000', 'marginheight' => '10',
124
+ 'marginwidth' => '10', 'leftmargin' => '10', 'topmargin' => '10', 'link' => '#000066',
125
+ 'alink' => '#ff6600', 'hlink' => "#ff6600", 'vlink' => "#000000"}.each do |k, v|
126
+ assert_equal v, body[k]
127
+ end
128
+ end
129
+
130
+ def test_javascripts
131
+ assert_equal 3, (@immob/:script)[0].inner_html.scan(/<LINK/).length
132
+ end
133
+
134
+ def test_uswebgen
135
+ # sent by brent beardsley, hpricot 0.3 had problems with all the links.
136
+ assert_equal 67, (@uswebgen/:a).length
137
+ end
138
+
139
+ def test_unicode
140
+ end
141
+ end
metadata ADDED
@@ -0,0 +1,72 @@
1
+ --- !ruby/object:Gem::Specification
2
+ rubygems_version: 0.9.0
3
+ specification_version: 1
4
+ name: hpricot
5
+ version: !ruby/object:Gem::Version
6
+ version: "0.4"
7
+ date: 2006-08-11 00:00:00 -06:00
8
+ summary: a swift, liberal HTML parser with a fantastic library
9
+ require_paths:
10
+ - lib
11
+ email: why@ruby-lang.org
12
+ homepage: http://code.whytheluckystiff.net/hpricot/
13
+ rubyforge_project:
14
+ description: a swift, liberal HTML parser with a fantastic library
15
+ autorequire:
16
+ default_executable:
17
+ bindir: bin
18
+ has_rdoc: false
19
+ required_ruby_version: !ruby/object:Gem::Version::Requirement
20
+ requirements:
21
+ - - ">"
22
+ - !ruby/object:Gem::Version
23
+ version: 0.0.0
24
+ version:
25
+ platform: mswin32
26
+ signing_key:
27
+ cert_chain:
28
+ post_install_message:
29
+ authors:
30
+ - why the lucky stiff
31
+ files:
32
+ - test/load_files.rb
33
+ - test/test_parser.rb
34
+ - test/files/uswebgen.html
35
+ - test/files/boingboing.html
36
+ - test/files/immob.html
37
+ - test/files/basic.xhtml
38
+ - lib/hpricot.rb
39
+ - lib/hpricot/htmlinfo.rb
40
+ - lib/hpricot/text.rb
41
+ - lib/hpricot/inspect.rb
42
+ - lib/hpricot/modules.rb
43
+ - lib/hpricot/parse.rb
44
+ - lib/hpricot/tag.rb
45
+ - lib/hpricot/traverse.rb
46
+ - lib/hpricot/elements.rb
47
+ - ext/hpricot_scan/hpricot_scan.c
48
+ - ext/hpricot_scan/extconf.rb
49
+ - ext/hpricot_scan/hpricot_scan.h
50
+ - ext/hpricot_scan/hpricot_scan.rl
51
+ - CHANGELOG
52
+ - README
53
+ - Rakefile
54
+ - COPYING
55
+ - extras/mingw-rbconfig.rb
56
+ - lib/hpricot_scan.so
57
+ test_files: []
58
+
59
+ rdoc_options: []
60
+
61
+ extra_rdoc_files:
62
+ - README
63
+ - CHANGELOG
64
+ - COPYING
65
+ executables: []
66
+
67
+ extensions: []
68
+
69
+ requirements: []
70
+
71
+ dependencies: []
72
+