hpricot 0.6-jruby
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +62 -0
- data/COPYING +18 -0
- data/README +284 -0
- data/Rakefile +211 -0
- data/ext/hpricot_scan/HpricotScanService.java +1340 -0
- data/ext/hpricot_scan/extconf.rb +6 -0
- data/ext/hpricot_scan/hpricot_common.rl +76 -0
- data/ext/hpricot_scan/hpricot_scan.c +5976 -0
- data/ext/hpricot_scan/hpricot_scan.h +79 -0
- data/ext/hpricot_scan/hpricot_scan.java.rl +363 -0
- data/ext/hpricot_scan/hpricot_scan.rl +273 -0
- data/extras/mingw-rbconfig.rb +176 -0
- data/lib/hpricot.rb +26 -0
- data/lib/hpricot/blankslate.rb +63 -0
- data/lib/hpricot/builder.rb +200 -0
- data/lib/hpricot/elements.rb +510 -0
- data/lib/hpricot/htmlinfo.rb +672 -0
- data/lib/hpricot/inspect.rb +107 -0
- data/lib/hpricot/modules.rb +37 -0
- data/lib/hpricot/parse.rb +297 -0
- data/lib/hpricot/tag.rb +228 -0
- data/lib/hpricot/tags.rb +164 -0
- data/lib/hpricot/traverse.rb +821 -0
- data/lib/hpricot/xchar.rb +94 -0
- data/lib/i686-linux/hpricot_scan.jar +0 -0
- data/test/files/basic.xhtml +17 -0
- data/test/files/boingboing.html +2266 -0
- data/test/files/cy0.html +3653 -0
- data/test/files/immob.html +400 -0
- data/test/files/pace_application.html +1320 -0
- data/test/files/tenderlove.html +16 -0
- data/test/files/uswebgen.html +220 -0
- data/test/files/utf8.html +1054 -0
- data/test/files/week9.html +1723 -0
- data/test/files/why.xml +19 -0
- data/test/load_files.rb +7 -0
- data/test/test_alter.rb +65 -0
- data/test/test_builder.rb +24 -0
- data/test/test_parser.rb +379 -0
- data/test/test_paths.rb +16 -0
- data/test/test_preserved.rb +66 -0
- data/test/test_xml.rb +28 -0
- metadata +98 -0
data/test/test_paths.rb
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'test/unit'
|
4
|
+
require 'hpricot'
|
5
|
+
require 'load_files'
|
6
|
+
|
7
|
+
class TestParser < Test::Unit::TestCase
|
8
|
+
def test_roundtrip
|
9
|
+
@basic = Hpricot.parse(TestFiles::BASIC)
|
10
|
+
%w[link link[2] body #link1 a p.ohmy].each do |css_sel|
|
11
|
+
ele = @basic.at(css_sel)
|
12
|
+
assert_equal ele, @basic.at(ele.css_path)
|
13
|
+
assert_equal ele, @basic.at(ele.xpath)
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
@@ -0,0 +1,66 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'test/unit'
|
4
|
+
require 'hpricot'
|
5
|
+
require 'load_files'
|
6
|
+
|
7
|
+
class TestPreserved < Test::Unit::TestCase
|
8
|
+
def assert_roundtrip str
|
9
|
+
doc = Hpricot(str)
|
10
|
+
yield doc if block_given?
|
11
|
+
str2 = doc.to_original_html
|
12
|
+
[*str].zip([*str2]).each do |s1, s2|
|
13
|
+
assert_equal s1, s2
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
def assert_html str1, str2
|
18
|
+
doc = Hpricot(str2)
|
19
|
+
yield doc if block_given?
|
20
|
+
assert_equal str1, doc.to_original_html
|
21
|
+
end
|
22
|
+
|
23
|
+
def test_simple
|
24
|
+
str = "<p>Hpricot is a <b>you know <i>uh</b> fine thing.</p>"
|
25
|
+
assert_html str, str
|
26
|
+
assert_html "<p class=\"new\">Hpricot is a <b>you know <i>uh</b> fine thing.</p>", str do |doc|
|
27
|
+
(doc/:p).set('class', 'new')
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
def test_parent
|
32
|
+
str = "<html><base href='/'><head><title>Test</title></head><body><div id='wrap'><p>Paragraph one.</p><p>Paragraph two.</p></div></body></html>"
|
33
|
+
assert_html str, str
|
34
|
+
assert_html "<html><base href='/'><body><div id=\"all\"><div><p>Paragraph one.</p></div><div><p>Paragraph two.</p></div></div></body></html>", str do |doc|
|
35
|
+
(doc/:head).remove
|
36
|
+
(doc/:div).set('id', 'all')
|
37
|
+
(doc/:p).wrap('<div></div>')
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
def test_escaping_of_contents
|
42
|
+
doc = Hpricot(TestFiles::BOINGBOING)
|
43
|
+
assert_equal "Fukuda\342\200\231s Automatic Door opens around your body as you pass through it. The idea is to save energy and keep the room clean.", doc.at("img[@alt='200606131240']").next.to_s.strip
|
44
|
+
end
|
45
|
+
|
46
|
+
def test_files
|
47
|
+
assert_roundtrip TestFiles::BASIC
|
48
|
+
assert_roundtrip TestFiles::BOINGBOING
|
49
|
+
assert_roundtrip TestFiles::CY0
|
50
|
+
end
|
51
|
+
|
52
|
+
def test_escaping_of_attrs
|
53
|
+
# ampersands in URLs
|
54
|
+
str = %{<a href="http://google.com/search?q=hpricot&l=en">Google</a>}
|
55
|
+
link = (doc = Hpricot(str)).at(:a)
|
56
|
+
assert_equal "http://google.com/search?q=hpricot&l=en", link['href']
|
57
|
+
assert_equal "http://google.com/search?q=hpricot&l=en", link.attributes['href']
|
58
|
+
assert_equal "http://google.com/search?q=hpricot&l=en", link.get_attribute('href')
|
59
|
+
assert_equal "http://google.com/search?q=hpricot&l=en", link.raw_attributes['href']
|
60
|
+
assert_equal str, doc.to_html
|
61
|
+
|
62
|
+
# alter the url
|
63
|
+
link['href'] = "javascript:alert(\"AGGA-KA-BOO!\")"
|
64
|
+
assert_equal %{<a href="javascript:alert("AGGA-KA-BOO!")">Google</a>}, doc.to_html
|
65
|
+
end
|
66
|
+
end
|
data/test/test_xml.rb
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'test/unit'
|
4
|
+
require 'hpricot'
|
5
|
+
require 'load_files'
|
6
|
+
|
7
|
+
class TestParser < Test::Unit::TestCase
|
8
|
+
# normally, the link tags are empty HTML tags.
|
9
|
+
# contributed by laudney.
|
10
|
+
def test_normally_empty
|
11
|
+
doc = Hpricot::XML("<rss><channel><title>this is title</title><link>http://fake.com</link></channel></rss>")
|
12
|
+
assert_equal "this is title", (doc/:rss/:channel/:title).text
|
13
|
+
assert_equal "http://fake.com", (doc/:rss/:channel/:link).text
|
14
|
+
end
|
15
|
+
|
16
|
+
# make sure XML doesn't get downcased
|
17
|
+
def test_casing
|
18
|
+
doc = Hpricot::XML(TestFiles::WHY)
|
19
|
+
assert_equal "hourly", (doc.at "sy:updatePeriod").inner_html
|
20
|
+
assert_equal 1, (doc/"guid[@isPermaLink]").length
|
21
|
+
end
|
22
|
+
|
23
|
+
# be sure tags named "text" are ok
|
24
|
+
def test_text_tags
|
25
|
+
doc = Hpricot::XML("<feed><title>City Poisoned</title><text>Rita Lee has poisoned Brazil.</text></feed>")
|
26
|
+
assert_equal "City Poisoned", (doc/"title").text
|
27
|
+
end
|
28
|
+
end
|
metadata
ADDED
@@ -0,0 +1,98 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
rubygems_version: 0.9.0
|
3
|
+
specification_version: 1
|
4
|
+
name: hpricot
|
5
|
+
version: !ruby/object:Gem::Version
|
6
|
+
version: "0.6"
|
7
|
+
date: 2007-06-15 00:00:00 -07:00
|
8
|
+
summary: a swift, liberal HTML parser with a fantastic library
|
9
|
+
require_paths:
|
10
|
+
- lib/i686-linux
|
11
|
+
- lib
|
12
|
+
email: why@ruby-lang.org
|
13
|
+
homepage: http://code.whytheluckystiff.net/hpricot/
|
14
|
+
rubyforge_project:
|
15
|
+
description: a swift, liberal HTML parser with a fantastic library
|
16
|
+
autorequire:
|
17
|
+
default_executable:
|
18
|
+
bindir: bin
|
19
|
+
has_rdoc: true
|
20
|
+
required_ruby_version: !ruby/object:Gem::Version::Requirement
|
21
|
+
requirements:
|
22
|
+
- - ">"
|
23
|
+
- !ruby/object:Gem::Version
|
24
|
+
version: 0.0.0
|
25
|
+
version:
|
26
|
+
platform: jruby
|
27
|
+
signing_key:
|
28
|
+
cert_chain:
|
29
|
+
post_install_message:
|
30
|
+
authors:
|
31
|
+
- why the lucky stiff
|
32
|
+
files:
|
33
|
+
- CHANGELOG
|
34
|
+
- COPYING
|
35
|
+
- README
|
36
|
+
- Rakefile
|
37
|
+
- test/files
|
38
|
+
- test/test_preserved.rb
|
39
|
+
- test/test_paths.rb
|
40
|
+
- test/load_files.rb
|
41
|
+
- test/test_xml.rb
|
42
|
+
- test/test_parser.rb
|
43
|
+
- test/test_alter.rb
|
44
|
+
- test/test_builder.rb
|
45
|
+
- test/files/why.xml
|
46
|
+
- test/files/boingboing.html
|
47
|
+
- test/files/uswebgen.html
|
48
|
+
- test/files/immob.html
|
49
|
+
- test/files/week9.html
|
50
|
+
- test/files/utf8.html
|
51
|
+
- test/files/basic.xhtml
|
52
|
+
- test/files/cy0.html
|
53
|
+
- test/files/tenderlove.html
|
54
|
+
- test/files/pace_application.html
|
55
|
+
- lib/hpricot
|
56
|
+
- lib/hpricot.rb
|
57
|
+
- lib/i686-linux
|
58
|
+
- lib/hpricot/builder.rb
|
59
|
+
- lib/hpricot/htmlinfo.rb
|
60
|
+
- lib/hpricot/xchar.rb
|
61
|
+
- lib/hpricot/inspect.rb
|
62
|
+
- lib/hpricot/modules.rb
|
63
|
+
- lib/hpricot/parse.rb
|
64
|
+
- lib/hpricot/tag.rb
|
65
|
+
- lib/hpricot/traverse.rb
|
66
|
+
- lib/hpricot/elements.rb
|
67
|
+
- lib/hpricot/tags.rb
|
68
|
+
- lib/hpricot/blankslate.rb
|
69
|
+
- extras/mingw-rbconfig.rb
|
70
|
+
- ext/hpricot_scan/hpricot_scan.h
|
71
|
+
- ext/hpricot_scan/HpricotScanService.java
|
72
|
+
- ext/hpricot_scan/hpricot_scan.c
|
73
|
+
- ext/hpricot_scan/extconf.rb
|
74
|
+
- ext/hpricot_scan/hpricot_common.rl
|
75
|
+
- ext/hpricot_scan/hpricot_scan.rl
|
76
|
+
- ext/hpricot_scan/hpricot_scan.java.rl
|
77
|
+
- lib/i686-linux/hpricot_scan.jar
|
78
|
+
test_files: []
|
79
|
+
|
80
|
+
rdoc_options:
|
81
|
+
- --quiet
|
82
|
+
- --title
|
83
|
+
- The Hpricot Reference
|
84
|
+
- --main
|
85
|
+
- README
|
86
|
+
- --inline-source
|
87
|
+
extra_rdoc_files:
|
88
|
+
- README
|
89
|
+
- CHANGELOG
|
90
|
+
- COPYING
|
91
|
+
executables: []
|
92
|
+
|
93
|
+
extensions: []
|
94
|
+
|
95
|
+
requirements: []
|
96
|
+
|
97
|
+
dependencies: []
|
98
|
+
|