hpricot 0.8.3-i386-mswin32
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +104 -0
- data/COPYING +18 -0
- data/README.md +276 -0
- data/Rakefile +234 -0
- data/ext/fast_xs/FastXsService.java +1123 -0
- data/ext/fast_xs/extconf.rb +4 -0
- data/ext/fast_xs/fast_xs.c +210 -0
- data/ext/hpricot_scan/HpricotCss.java +850 -0
- data/ext/hpricot_scan/HpricotScanService.java +2099 -0
- data/ext/hpricot_scan/extconf.rb +9 -0
- data/ext/hpricot_scan/hpricot_common.rl +76 -0
- data/ext/hpricot_scan/hpricot_css.c +3511 -0
- data/ext/hpricot_scan/hpricot_css.java.rl +155 -0
- data/ext/hpricot_scan/hpricot_css.rl +120 -0
- data/ext/hpricot_scan/hpricot_scan.c +7039 -0
- data/ext/hpricot_scan/hpricot_scan.h +79 -0
- data/ext/hpricot_scan/hpricot_scan.java.rl +1161 -0
- data/ext/hpricot_scan/hpricot_scan.rl +896 -0
- data/extras/hpricot.png +0 -0
- data/lib/fast_xs.rb +1 -0
- data/lib/fast_xs/1.8/fast_xs.so +0 -0
- data/lib/fast_xs/1.9/fast_xs.so +0 -0
- data/lib/hpricot.rb +26 -0
- data/lib/hpricot/blankslate.rb +63 -0
- data/lib/hpricot/builder.rb +216 -0
- data/lib/hpricot/elements.rb +514 -0
- data/lib/hpricot/htmlinfo.rb +691 -0
- data/lib/hpricot/inspect.rb +103 -0
- data/lib/hpricot/modules.rb +40 -0
- data/lib/hpricot/parse.rb +38 -0
- data/lib/hpricot/tag.rb +219 -0
- data/lib/hpricot/tags.rb +164 -0
- data/lib/hpricot/traverse.rb +839 -0
- data/lib/hpricot/xchar.rb +94 -0
- data/lib/hpricot_scan.rb +1 -0
- data/lib/hpricot_scan/1.8/hpricot_scan.so +0 -0
- data/lib/hpricot_scan/1.9/hpricot_scan.so +0 -0
- data/test/files/basic.xhtml +17 -0
- data/test/files/boingboing.html +2266 -0
- data/test/files/cy0.html +3653 -0
- data/test/files/immob.html +400 -0
- data/test/files/pace_application.html +1320 -0
- data/test/files/tenderlove.html +16 -0
- data/test/files/uswebgen.html +220 -0
- data/test/files/utf8.html +1054 -0
- data/test/files/week9.html +1723 -0
- data/test/files/why.xml +19 -0
- data/test/load_files.rb +7 -0
- data/test/nokogiri-bench.rb +64 -0
- data/test/test_alter.rb +96 -0
- data/test/test_builder.rb +37 -0
- data/test/test_parser.rb +457 -0
- data/test/test_paths.rb +25 -0
- data/test/test_preserved.rb +88 -0
- data/test/test_xml.rb +28 -0
- metadata +128 -0
data/test/test_paths.rb
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'test/unit'
|
4
|
+
require 'hpricot'
|
5
|
+
require 'load_files'
|
6
|
+
|
7
|
+
class TestParser < Test::Unit::TestCase
|
8
|
+
def test_roundtrip
|
9
|
+
@basic = Hpricot.parse(TestFiles::BASIC)
|
10
|
+
%w[link link[2] body #link1 a p.ohmy].each do |css_sel|
|
11
|
+
ele = @basic.at(css_sel)
|
12
|
+
assert_equal ele, @basic.at(ele.css_path)
|
13
|
+
assert_equal ele, @basic.at(ele.xpath)
|
14
|
+
end
|
15
|
+
end
|
16
|
+
def test_attr_brackets
|
17
|
+
doc = Hpricot('<input name="vendor[porkpies]"/>')
|
18
|
+
assert_equal 1, (doc/'input[@name^="vendor[porkpies]"]').length
|
19
|
+
assert_equal 1, (doc/'input[@name="vendor[porkpies]"]').length
|
20
|
+
assert_equal 0, (doc/'input[@name$="]]]]]"]').length
|
21
|
+
|
22
|
+
doc = Hpricot('<input name="vendor[porkpies][meaty]"/>')
|
23
|
+
assert_equal 1, (doc/'input[@name^="vendor[porkpies][meaty]"]').length
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,88 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
#!/usr/bin/env ruby
|
3
|
+
|
4
|
+
require 'test/unit'
|
5
|
+
require 'hpricot'
|
6
|
+
require 'load_files'
|
7
|
+
|
8
|
+
unless "".respond_to?(:lines)
|
9
|
+
require 'enumerator'
|
10
|
+
class String
|
11
|
+
def lines
|
12
|
+
Enumerable::Enumerator.new(self, :each_line)
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
class TestPreserved < Test::Unit::TestCase
|
18
|
+
def assert_roundtrip str
|
19
|
+
doc = Hpricot(str)
|
20
|
+
yield doc if block_given?
|
21
|
+
str2 = doc.to_original_html
|
22
|
+
if RUBY_VERSION =~ /^1.9/
|
23
|
+
str2.force_encoding('UTF-8')
|
24
|
+
end
|
25
|
+
str.lines.zip(str2.lines).each do |s1, s2|
|
26
|
+
assert_equal s1, s2
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
def assert_html str1, str2
|
31
|
+
doc = Hpricot(str2)
|
32
|
+
yield doc if block_given?
|
33
|
+
assert_equal str1, doc.to_original_html
|
34
|
+
end
|
35
|
+
|
36
|
+
def test_simple
|
37
|
+
str = "<p>Hpricot is a <b>you know <i>uh</b> fine thing.</p>"
|
38
|
+
assert_html str, str
|
39
|
+
assert_html "<p class=\"new\">Hpricot is a <b>you know <i>uh</b> fine thing.</p>", str do |doc|
|
40
|
+
(doc/:p).set('class', 'new')
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def test_parent
|
45
|
+
str = "<html><base href='/'><head><title>Test</title></head><body><div id='wrap'><p>Paragraph one.</p><p>Paragraph two.</p></div></body></html>"
|
46
|
+
assert_html str, str
|
47
|
+
assert_html "<html><base href='/'><body><div id=\"all\"><div><p>Paragraph one.</p></div><div><p>Paragraph two.</p></div></div></body></html>", str do |doc|
|
48
|
+
(doc/:head).remove
|
49
|
+
(doc/:div).set('id', 'all')
|
50
|
+
(doc/:p).wrap('<div></div>')
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
def test_escaping_of_contents
|
55
|
+
doc = Hpricot(TestFiles::BOINGBOING)
|
56
|
+
assert_equal "Fukuda’s Automatic Door opens around your body as you pass through it. The idea is to save energy and keep the room clean.", doc.at("img[@alt='200606131240']").next.to_s.strip
|
57
|
+
end
|
58
|
+
|
59
|
+
def test_files
|
60
|
+
assert_roundtrip TestFiles::BASIC
|
61
|
+
assert_roundtrip TestFiles::BOINGBOING
|
62
|
+
assert_roundtrip TestFiles::CY0
|
63
|
+
end
|
64
|
+
|
65
|
+
def test_fixup_link
|
66
|
+
doc = %{<?xml version="1.0" encoding="UTF-8"?><rss><channel><link>ht</link></channel></rss>}
|
67
|
+
assert_roundtrip doc
|
68
|
+
assert_equal Hpricot(doc).to_s,
|
69
|
+
%{<?xml version="1.0" encoding="UTF-8"?><rss><channel><link />ht</channel></rss>}
|
70
|
+
assert_equal Hpricot.XML(doc).to_s,
|
71
|
+
%{<?xml version="1.0" encoding="UTF-8"?><rss><channel><link>ht</link></channel></rss>}
|
72
|
+
end
|
73
|
+
|
74
|
+
def test_escaping_of_attrs
|
75
|
+
# ampersands in URLs
|
76
|
+
str = %{<a href="http://google.com/search?q=hpricot&l=en">Google</a>}
|
77
|
+
link = (doc = Hpricot(str)).at(:a)
|
78
|
+
assert_equal "http://google.com/search?q=hpricot&l=en", link['href']
|
79
|
+
assert_equal "http://google.com/search?q=hpricot&l=en", link.attributes['href']
|
80
|
+
assert_equal "http://google.com/search?q=hpricot&l=en", link.get_attribute('href')
|
81
|
+
assert_equal "http://google.com/search?q=hpricot&l=en", link.raw_attributes['href']
|
82
|
+
assert_equal str, doc.to_html
|
83
|
+
|
84
|
+
# alter the url
|
85
|
+
link['href'] = "javascript:alert(\"AGGA-KA-BOO!\")"
|
86
|
+
assert_equal %{<a href="javascript:alert("AGGA-KA-BOO!")">Google</a>}, doc.to_html
|
87
|
+
end
|
88
|
+
end
|
data/test/test_xml.rb
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'test/unit'
|
4
|
+
require 'hpricot'
|
5
|
+
require 'load_files'
|
6
|
+
|
7
|
+
class TestParser < Test::Unit::TestCase
|
8
|
+
# normally, the link tags are empty HTML tags.
|
9
|
+
# contributed by laudney.
|
10
|
+
def test_normally_empty
|
11
|
+
doc = Hpricot::XML("<rss><channel><title>this is title</title><link>http://fake.com</link></channel></rss>")
|
12
|
+
assert_equal "this is title", (doc/:rss/:channel/:title).text
|
13
|
+
assert_equal "http://fake.com", (doc/:rss/:channel/:link).text
|
14
|
+
end
|
15
|
+
|
16
|
+
# make sure XML doesn't get downcased
|
17
|
+
def test_casing
|
18
|
+
doc = Hpricot::XML(TestFiles::WHY)
|
19
|
+
assert_equal "hourly", (doc.at "sy:updatePeriod").inner_html
|
20
|
+
assert_equal 1, (doc/"guid[@isPermaLink]").length
|
21
|
+
end
|
22
|
+
|
23
|
+
# be sure tags named "text" are ok
|
24
|
+
def test_text_tags
|
25
|
+
doc = Hpricot::XML("<feed><title>City Poisoned</title><text>Rita Lee has poisoned Brazil.</text></feed>")
|
26
|
+
assert_equal "City Poisoned", (doc/"title").text
|
27
|
+
end
|
28
|
+
end
|
metadata
ADDED
@@ -0,0 +1,128 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: hpricot
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 57
|
5
|
+
prerelease: false
|
6
|
+
segments:
|
7
|
+
- 0
|
8
|
+
- 8
|
9
|
+
- 3
|
10
|
+
version: 0.8.3
|
11
|
+
platform: i386-mswin32
|
12
|
+
authors:
|
13
|
+
- why the lucky stiff
|
14
|
+
autorequire:
|
15
|
+
bindir: bin
|
16
|
+
cert_chain: []
|
17
|
+
|
18
|
+
date: 2010-11-03 00:00:00 -05:00
|
19
|
+
default_executable:
|
20
|
+
dependencies: []
|
21
|
+
|
22
|
+
description: a swift, liberal HTML parser with a fantastic library
|
23
|
+
email: why@ruby-lang.org
|
24
|
+
executables: []
|
25
|
+
|
26
|
+
extensions: []
|
27
|
+
|
28
|
+
extra_rdoc_files:
|
29
|
+
- README.md
|
30
|
+
- CHANGELOG
|
31
|
+
- COPYING
|
32
|
+
files:
|
33
|
+
- CHANGELOG
|
34
|
+
- COPYING
|
35
|
+
- README.md
|
36
|
+
- Rakefile
|
37
|
+
- test/files/basic.xhtml
|
38
|
+
- test/files/boingboing.html
|
39
|
+
- test/files/cy0.html
|
40
|
+
- test/files/immob.html
|
41
|
+
- test/files/pace_application.html
|
42
|
+
- test/files/tenderlove.html
|
43
|
+
- test/files/uswebgen.html
|
44
|
+
- test/files/utf8.html
|
45
|
+
- test/files/week9.html
|
46
|
+
- test/files/why.xml
|
47
|
+
- test/load_files.rb
|
48
|
+
- test/nokogiri-bench.rb
|
49
|
+
- test/test_alter.rb
|
50
|
+
- test/test_builder.rb
|
51
|
+
- test/test_parser.rb
|
52
|
+
- test/test_paths.rb
|
53
|
+
- test/test_preserved.rb
|
54
|
+
- test/test_xml.rb
|
55
|
+
- extras/hpricot.png
|
56
|
+
- lib/hpricot/blankslate.rb
|
57
|
+
- lib/hpricot/builder.rb
|
58
|
+
- lib/hpricot/elements.rb
|
59
|
+
- lib/hpricot/htmlinfo.rb
|
60
|
+
- lib/hpricot/inspect.rb
|
61
|
+
- lib/hpricot/modules.rb
|
62
|
+
- lib/hpricot/parse.rb
|
63
|
+
- lib/hpricot/tag.rb
|
64
|
+
- lib/hpricot/tags.rb
|
65
|
+
- lib/hpricot/traverse.rb
|
66
|
+
- lib/hpricot/xchar.rb
|
67
|
+
- lib/hpricot.rb
|
68
|
+
- ext/hpricot_scan/hpricot_scan.h
|
69
|
+
- ext/fast_xs/FastXsService.java
|
70
|
+
- ext/hpricot_scan/HpricotCss.java
|
71
|
+
- ext/hpricot_scan/HpricotScanService.java
|
72
|
+
- ext/fast_xs/fast_xs.c
|
73
|
+
- ext/hpricot_scan/hpricot_css.c
|
74
|
+
- ext/hpricot_scan/hpricot_scan.c
|
75
|
+
- ext/fast_xs/extconf.rb
|
76
|
+
- ext/hpricot_scan/extconf.rb
|
77
|
+
- ext/hpricot_scan/hpricot_common.rl
|
78
|
+
- ext/hpricot_scan/hpricot_css.java.rl
|
79
|
+
- ext/hpricot_scan/hpricot_css.rl
|
80
|
+
- ext/hpricot_scan/hpricot_scan.java.rl
|
81
|
+
- ext/hpricot_scan/hpricot_scan.rl
|
82
|
+
- lib/hpricot_scan.rb
|
83
|
+
- lib/hpricot_scan/1.8/hpricot_scan.so
|
84
|
+
- lib/hpricot_scan/1.9/hpricot_scan.so
|
85
|
+
- lib/fast_xs.rb
|
86
|
+
- lib/fast_xs/1.8/fast_xs.so
|
87
|
+
- lib/fast_xs/1.9/fast_xs.so
|
88
|
+
has_rdoc: true
|
89
|
+
homepage: http://code.whytheluckystiff.net/hpricot/
|
90
|
+
licenses: []
|
91
|
+
|
92
|
+
post_install_message:
|
93
|
+
rdoc_options:
|
94
|
+
- --quiet
|
95
|
+
- --title
|
96
|
+
- The Hpricot Reference
|
97
|
+
- --main
|
98
|
+
- README.md
|
99
|
+
- --inline-source
|
100
|
+
require_paths:
|
101
|
+
- lib
|
102
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
103
|
+
none: false
|
104
|
+
requirements:
|
105
|
+
- - ">="
|
106
|
+
- !ruby/object:Gem::Version
|
107
|
+
hash: 3
|
108
|
+
segments:
|
109
|
+
- 0
|
110
|
+
version: "0"
|
111
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
112
|
+
none: false
|
113
|
+
requirements:
|
114
|
+
- - ">="
|
115
|
+
- !ruby/object:Gem::Version
|
116
|
+
hash: 3
|
117
|
+
segments:
|
118
|
+
- 0
|
119
|
+
version: "0"
|
120
|
+
requirements: []
|
121
|
+
|
122
|
+
rubyforge_project: hobix
|
123
|
+
rubygems_version: 1.3.7
|
124
|
+
signing_key:
|
125
|
+
specification_version: 3
|
126
|
+
summary: a swift, liberal HTML parser with a fantastic library
|
127
|
+
test_files: []
|
128
|
+
|