hpricot 0.8.2-java
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +88 -0
- data/COPYING +18 -0
- data/README +275 -0
- data/Rakefile +272 -0
- data/ext/fast_xs/FastXsService.java +1030 -0
- data/ext/fast_xs/extconf.rb +4 -0
- data/ext/fast_xs/fast_xs.c +201 -0
- data/ext/hpricot_scan/HpricotCss.java +831 -0
- data/ext/hpricot_scan/HpricotScanService.java +2086 -0
- data/ext/hpricot_scan/extconf.rb +6 -0
- data/ext/hpricot_scan/hpricot_common.rl +76 -0
- data/ext/hpricot_scan/hpricot_css.c +3503 -0
- data/ext/hpricot_scan/hpricot_css.java.rl +155 -0
- data/ext/hpricot_scan/hpricot_css.rl +115 -0
- data/ext/hpricot_scan/hpricot_scan.c +6927 -0
- data/ext/hpricot_scan/hpricot_scan.h +79 -0
- data/ext/hpricot_scan/hpricot_scan.java.rl +1152 -0
- data/ext/hpricot_scan/hpricot_scan.rl +788 -0
- data/extras/mingw-rbconfig.rb +176 -0
- data/lib/fast_xs.jar +0 -0
- data/lib/hpricot.rb +26 -0
- data/lib/hpricot/blankslate.rb +63 -0
- data/lib/hpricot/builder.rb +216 -0
- data/lib/hpricot/elements.rb +510 -0
- data/lib/hpricot/htmlinfo.rb +691 -0
- data/lib/hpricot/inspect.rb +103 -0
- data/lib/hpricot/modules.rb +40 -0
- data/lib/hpricot/parse.rb +38 -0
- data/lib/hpricot/tag.rb +219 -0
- data/lib/hpricot/tags.rb +164 -0
- data/lib/hpricot/traverse.rb +839 -0
- data/lib/hpricot/xchar.rb +94 -0
- data/lib/hpricot_scan.jar +0 -0
- data/test/files/basic.xhtml +17 -0
- data/test/files/boingboing.html +2266 -0
- data/test/files/cy0.html +3653 -0
- data/test/files/immob.html +400 -0
- data/test/files/pace_application.html +1320 -0
- data/test/files/tenderlove.html +16 -0
- data/test/files/uswebgen.html +220 -0
- data/test/files/utf8.html +1054 -0
- data/test/files/week9.html +1723 -0
- data/test/files/why.xml +19 -0
- data/test/load_files.rb +7 -0
- data/test/nokogiri-bench.rb +64 -0
- data/test/test_alter.rb +96 -0
- data/test/test_builder.rb +37 -0
- data/test/test_parser.rb +428 -0
- data/test/test_paths.rb +25 -0
- data/test/test_preserved.rb +88 -0
- data/test/test_xml.rb +28 -0
- metadata +112 -0
data/test/test_paths.rb
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'test/unit'
|
4
|
+
require 'hpricot'
|
5
|
+
require 'load_files'
|
6
|
+
|
7
|
+
class TestParser < Test::Unit::TestCase
|
8
|
+
def test_roundtrip
|
9
|
+
@basic = Hpricot.parse(TestFiles::BASIC)
|
10
|
+
%w[link link[2] body #link1 a p.ohmy].each do |css_sel|
|
11
|
+
ele = @basic.at(css_sel)
|
12
|
+
assert_equal ele, @basic.at(ele.css_path)
|
13
|
+
assert_equal ele, @basic.at(ele.xpath)
|
14
|
+
end
|
15
|
+
end
|
16
|
+
def test_attr_brackets
|
17
|
+
doc = Hpricot('<input name="vendor[porkpies]"/>')
|
18
|
+
assert_equal 1, (doc/'input[@name^="vendor[porkpies]"]').length
|
19
|
+
assert_equal 1, (doc/'input[@name="vendor[porkpies]"]').length
|
20
|
+
assert_equal 0, (doc/'input[@name$="]]]]]"]').length
|
21
|
+
|
22
|
+
doc = Hpricot('<input name="vendor[porkpies][meaty]"/>')
|
23
|
+
assert_equal 1, (doc/'input[@name^="vendor[porkpies][meaty]"]').length
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,88 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
#!/usr/bin/env ruby
|
3
|
+
|
4
|
+
require 'test/unit'
|
5
|
+
require 'hpricot'
|
6
|
+
require 'load_files'
|
7
|
+
|
8
|
+
unless "".respond_to?(:lines)
|
9
|
+
require 'enumerator'
|
10
|
+
class String
|
11
|
+
def lines
|
12
|
+
Enumerable::Enumerator.new(self, :each_line)
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
class TestPreserved < Test::Unit::TestCase
|
18
|
+
def assert_roundtrip str
|
19
|
+
doc = Hpricot(str)
|
20
|
+
yield doc if block_given?
|
21
|
+
str2 = doc.to_original_html
|
22
|
+
if RUBY_VERSION =~ /^1.9/
|
23
|
+
str2.force_encoding('UTF-8')
|
24
|
+
end
|
25
|
+
str.lines.zip(str2.lines).each do |s1, s2|
|
26
|
+
assert_equal s1, s2
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
def assert_html str1, str2
|
31
|
+
doc = Hpricot(str2)
|
32
|
+
yield doc if block_given?
|
33
|
+
assert_equal str1, doc.to_original_html
|
34
|
+
end
|
35
|
+
|
36
|
+
def test_simple
|
37
|
+
str = "<p>Hpricot is a <b>you know <i>uh</b> fine thing.</p>"
|
38
|
+
assert_html str, str
|
39
|
+
assert_html "<p class=\"new\">Hpricot is a <b>you know <i>uh</b> fine thing.</p>", str do |doc|
|
40
|
+
(doc/:p).set('class', 'new')
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def test_parent
|
45
|
+
str = "<html><base href='/'><head><title>Test</title></head><body><div id='wrap'><p>Paragraph one.</p><p>Paragraph two.</p></div></body></html>"
|
46
|
+
assert_html str, str
|
47
|
+
assert_html "<html><base href='/'><body><div id=\"all\"><div><p>Paragraph one.</p></div><div><p>Paragraph two.</p></div></div></body></html>", str do |doc|
|
48
|
+
(doc/:head).remove
|
49
|
+
(doc/:div).set('id', 'all')
|
50
|
+
(doc/:p).wrap('<div></div>')
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
def test_escaping_of_contents
|
55
|
+
doc = Hpricot(TestFiles::BOINGBOING)
|
56
|
+
assert_equal "Fukuda’s Automatic Door opens around your body as you pass through it. The idea is to save energy and keep the room clean.", doc.at("img[@alt='200606131240']").next.to_s.strip
|
57
|
+
end
|
58
|
+
|
59
|
+
def test_files
|
60
|
+
assert_roundtrip TestFiles::BASIC
|
61
|
+
assert_roundtrip TestFiles::BOINGBOING
|
62
|
+
assert_roundtrip TestFiles::CY0
|
63
|
+
end
|
64
|
+
|
65
|
+
def test_fixup_link
|
66
|
+
doc = %{<?xml version="1.0" encoding="UTF-8"?><rss><channel><link>ht</link></channel></rss>}
|
67
|
+
assert_roundtrip doc
|
68
|
+
assert_equal Hpricot(doc).to_s,
|
69
|
+
%{<?xml version="1.0" encoding="UTF-8"?><rss><channel><link />ht</channel></rss>}
|
70
|
+
assert_equal Hpricot.XML(doc).to_s,
|
71
|
+
%{<?xml version="1.0" encoding="UTF-8"?><rss><channel><link>ht</link></channel></rss>}
|
72
|
+
end
|
73
|
+
|
74
|
+
def test_escaping_of_attrs
|
75
|
+
# ampersands in URLs
|
76
|
+
str = %{<a href="http://google.com/search?q=hpricot&l=en">Google</a>}
|
77
|
+
link = (doc = Hpricot(str)).at(:a)
|
78
|
+
assert_equal "http://google.com/search?q=hpricot&l=en", link['href']
|
79
|
+
assert_equal "http://google.com/search?q=hpricot&l=en", link.attributes['href']
|
80
|
+
assert_equal "http://google.com/search?q=hpricot&l=en", link.get_attribute('href')
|
81
|
+
assert_equal "http://google.com/search?q=hpricot&l=en", link.raw_attributes['href']
|
82
|
+
assert_equal str, doc.to_html
|
83
|
+
|
84
|
+
# alter the url
|
85
|
+
link['href'] = "javascript:alert(\"AGGA-KA-BOO!\")"
|
86
|
+
assert_equal %{<a href="javascript:alert("AGGA-KA-BOO!")">Google</a>}, doc.to_html
|
87
|
+
end
|
88
|
+
end
|
data/test/test_xml.rb
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'test/unit'
|
4
|
+
require 'hpricot'
|
5
|
+
require 'load_files'
|
6
|
+
|
7
|
+
class TestParser < Test::Unit::TestCase
|
8
|
+
# normally, the link tags are empty HTML tags.
|
9
|
+
# contributed by laudney.
|
10
|
+
def test_normally_empty
|
11
|
+
doc = Hpricot::XML("<rss><channel><title>this is title</title><link>http://fake.com</link></channel></rss>")
|
12
|
+
assert_equal "this is title", (doc/:rss/:channel/:title).text
|
13
|
+
assert_equal "http://fake.com", (doc/:rss/:channel/:link).text
|
14
|
+
end
|
15
|
+
|
16
|
+
# make sure XML doesn't get downcased
|
17
|
+
def test_casing
|
18
|
+
doc = Hpricot::XML(TestFiles::WHY)
|
19
|
+
assert_equal "hourly", (doc.at "sy:updatePeriod").inner_html
|
20
|
+
assert_equal 1, (doc/"guid[@isPermaLink]").length
|
21
|
+
end
|
22
|
+
|
23
|
+
# be sure tags named "text" are ok
|
24
|
+
def test_text_tags
|
25
|
+
doc = Hpricot::XML("<feed><title>City Poisoned</title><text>Rita Lee has poisoned Brazil.</text></feed>")
|
26
|
+
assert_equal "City Poisoned", (doc/"title").text
|
27
|
+
end
|
28
|
+
end
|
metadata
ADDED
@@ -0,0 +1,112 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: hpricot
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.8.2
|
5
|
+
platform: java
|
6
|
+
authors:
|
7
|
+
- why the lucky stiff
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2009-11-05 00:00:00 -06:00
|
13
|
+
default_executable:
|
14
|
+
dependencies: []
|
15
|
+
|
16
|
+
description: a swift, liberal HTML parser with a fantastic library
|
17
|
+
email: why@ruby-lang.org
|
18
|
+
executables: []
|
19
|
+
|
20
|
+
extensions: []
|
21
|
+
|
22
|
+
extra_rdoc_files:
|
23
|
+
- README
|
24
|
+
- CHANGELOG
|
25
|
+
- COPYING
|
26
|
+
files:
|
27
|
+
- CHANGELOG
|
28
|
+
- COPYING
|
29
|
+
- README
|
30
|
+
- Rakefile
|
31
|
+
- test/files/basic.xhtml
|
32
|
+
- test/files/boingboing.html
|
33
|
+
- test/files/cy0.html
|
34
|
+
- test/files/immob.html
|
35
|
+
- test/files/pace_application.html
|
36
|
+
- test/files/tenderlove.html
|
37
|
+
- test/files/uswebgen.html
|
38
|
+
- test/files/utf8.html
|
39
|
+
- test/files/week9.html
|
40
|
+
- test/files/why.xml
|
41
|
+
- test/load_files.rb
|
42
|
+
- test/nokogiri-bench.rb
|
43
|
+
- test/test_alter.rb
|
44
|
+
- test/test_builder.rb
|
45
|
+
- test/test_parser.rb
|
46
|
+
- test/test_paths.rb
|
47
|
+
- test/test_preserved.rb
|
48
|
+
- test/test_xml.rb
|
49
|
+
- lib/hpricot/blankslate.rb
|
50
|
+
- lib/hpricot/builder.rb
|
51
|
+
- lib/hpricot/elements.rb
|
52
|
+
- lib/hpricot/htmlinfo.rb
|
53
|
+
- lib/hpricot/inspect.rb
|
54
|
+
- lib/hpricot/modules.rb
|
55
|
+
- lib/hpricot/parse.rb
|
56
|
+
- lib/hpricot/tag.rb
|
57
|
+
- lib/hpricot/tags.rb
|
58
|
+
- lib/hpricot/traverse.rb
|
59
|
+
- lib/hpricot/xchar.rb
|
60
|
+
- lib/hpricot.rb
|
61
|
+
- extras/mingw-rbconfig.rb
|
62
|
+
- ext/hpricot_scan/hpricot_scan.h
|
63
|
+
- ext/fast_xs/FastXsService.java
|
64
|
+
- ext/hpricot_scan/HpricotCss.java
|
65
|
+
- ext/hpricot_scan/HpricotScanService.java
|
66
|
+
- ext/fast_xs/fast_xs.c
|
67
|
+
- ext/hpricot_scan/hpricot_css.c
|
68
|
+
- ext/hpricot_scan/hpricot_scan.c
|
69
|
+
- ext/fast_xs/extconf.rb
|
70
|
+
- ext/hpricot_scan/extconf.rb
|
71
|
+
- ext/hpricot_scan/hpricot_common.rl
|
72
|
+
- ext/hpricot_scan/hpricot_css.java.rl
|
73
|
+
- ext/hpricot_scan/hpricot_css.rl
|
74
|
+
- ext/hpricot_scan/hpricot_scan.java.rl
|
75
|
+
- ext/hpricot_scan/hpricot_scan.rl
|
76
|
+
- lib/hpricot_scan.jar
|
77
|
+
- lib/fast_xs.jar
|
78
|
+
has_rdoc: true
|
79
|
+
homepage: http://code.whytheluckystiff.net/hpricot/
|
80
|
+
licenses: []
|
81
|
+
|
82
|
+
post_install_message:
|
83
|
+
rdoc_options:
|
84
|
+
- --quiet
|
85
|
+
- --title
|
86
|
+
- The Hpricot Reference
|
87
|
+
- --main
|
88
|
+
- README
|
89
|
+
- --inline-source
|
90
|
+
require_paths:
|
91
|
+
- lib
|
92
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - ">="
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: "0"
|
97
|
+
version:
|
98
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
99
|
+
requirements:
|
100
|
+
- - ">="
|
101
|
+
- !ruby/object:Gem::Version
|
102
|
+
version: "0"
|
103
|
+
version:
|
104
|
+
requirements: []
|
105
|
+
|
106
|
+
rubyforge_project: hobix
|
107
|
+
rubygems_version: 1.3.5
|
108
|
+
signing_key:
|
109
|
+
specification_version: 3
|
110
|
+
summary: a swift, liberal HTML parser with a fantastic library
|
111
|
+
test_files: []
|
112
|
+
|