webtranslateit-hpricot 0.9.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +15 -0
- data/CHANGELOG +122 -0
- data/COPYING +18 -0
- data/README.md +295 -0
- data/Rakefile +237 -0
- data/ext/fast_xs/FastXsService.java +1123 -0
- data/ext/fast_xs/extconf.rb +4 -0
- data/ext/fast_xs/fast_xs.c +210 -0
- data/ext/hpricot_scan/HpricotCss.java +850 -0
- data/ext/hpricot_scan/HpricotScanService.java +2085 -0
- data/ext/hpricot_scan/MANIFEST +0 -0
- data/ext/hpricot_scan/extconf.rb +9 -0
- data/ext/hpricot_scan/hpricot_common.rl +76 -0
- data/ext/hpricot_scan/hpricot_css.c +3511 -0
- data/ext/hpricot_scan/hpricot_css.java.rl +155 -0
- data/ext/hpricot_scan/hpricot_css.rl +120 -0
- data/ext/hpricot_scan/hpricot_scan.c +6848 -0
- data/ext/hpricot_scan/hpricot_scan.h +79 -0
- data/ext/hpricot_scan/hpricot_scan.java.rl +1173 -0
- data/ext/hpricot_scan/hpricot_scan.rl +911 -0
- data/extras/hpricot.png +0 -0
- data/hpricot.gemspec +18 -0
- data/lib/hpricot/blankslate.rb +63 -0
- data/lib/hpricot/builder.rb +217 -0
- data/lib/hpricot/elements.rb +514 -0
- data/lib/hpricot/htmlinfo.rb +691 -0
- data/lib/hpricot/inspect.rb +103 -0
- data/lib/hpricot/modules.rb +40 -0
- data/lib/hpricot/parse.rb +38 -0
- data/lib/hpricot/tag.rb +219 -0
- data/lib/hpricot/tags.rb +164 -0
- data/lib/hpricot/traverse.rb +839 -0
- data/lib/hpricot/xchar.rb +95 -0
- data/lib/hpricot.rb +26 -0
- data/setup.rb +1585 -0
- data/test/files/basic.xhtml +17 -0
- data/test/files/boingboing.html +2266 -0
- data/test/files/cy0.html +3653 -0
- data/test/files/immob.html +400 -0
- data/test/files/pace_application.html +1320 -0
- data/test/files/tenderlove.html +16 -0
- data/test/files/uswebgen.html +220 -0
- data/test/files/utf8.html +1054 -0
- data/test/files/week9.html +1723 -0
- data/test/files/why.xml +19 -0
- data/test/load_files.rb +7 -0
- data/test/nokogiri-bench.rb +64 -0
- data/test/test_alter.rb +96 -0
- data/test/test_builder.rb +37 -0
- data/test/test_parser.rb +496 -0
- data/test/test_paths.rb +25 -0
- data/test/test_preserved.rb +88 -0
- data/test/test_xml.rb +28 -0
- metadata +106 -0
data/test/files/why.xml
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
<?xml version='1.0'?><rss xmlns:admin='http://webns.net/mvcb/' version='2.0' xmlns:sy='http://purl.org/rss/1.0/modules/syndication/' xmlns:dc='http://purl.org/dc/elements/1.1/' xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#'>
|
2
|
+
<channel>
|
3
|
+
<title>why the lucky stiff</title>
|
4
|
+
<link>http://whytheluckystiff.net</link>
|
5
|
+
<description>hex-editing reality to give us infinite grenades!!</description>
|
6
|
+
<dc:language>en-us</dc:language>
|
7
|
+
<dc:creator/>
|
8
|
+
<dc:date>2007-01-16T22:39:04+00:00</dc:date>
|
9
|
+
<admin:generatorAgent rdf:resource='http://hobix.com/?v=0.4'/>
|
10
|
+
<sy:updatePeriod>hourly</sy:updatePeriod>
|
11
|
+
<sy:updateFrequency>1</sy:updateFrequency>
|
12
|
+
<sy:updateBase>2000-01-01T12:00+00:00</sy:updateBase>
|
13
|
+
<item><title>1.3</title><link>http://whytheluckystiff.net/quatrains/1.3.html</link><guid isPermaLink='false'>quatrains/1.3@http://whytheluckystiff.net</guid><dc:subject>quatrains</dc:subject><dc:subject>quatrains</dc:subject><dc:creator>why the lucky stiff</dc:creator><dc:date>2007-01-14T08:47:05+00:00</dc:date><description><blockquote>
|
14
|
+
<p>That cadillac of yours and that driver of yours!<br />You and your teacups rattling away in the back seat!<br />You always took the mike, oh, and all those cowboys you shot!<br />I held your hand! And I&#8217;ll shoot a cowboy one day!</p>
|
15
|
+
</blockquote>
|
16
|
+
<blockquote>
|
17
|
+
<p>You said, &#8220;Let&#8217;s run into the woods like kids!&#8221; <br />You said, &#8220;Let&#8217;s rub our hands together super-hot!&#8221; <br />And we scalded the trees and left octagons, I think that was you and<br />You threw parties on the roof!</p>
|
18
|
+
</blockquote></description></item></channel>
|
19
|
+
</rss>
|
data/test/load_files.rb
ADDED
@@ -0,0 +1,64 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require 'rubygems'
|
3
|
+
require 'open-uri'
|
4
|
+
require 'hpricot'
|
5
|
+
require 'nokogiri'
|
6
|
+
require 'benchmark'
|
7
|
+
|
8
|
+
content = File.read("test/files/boingboing.html")
|
9
|
+
|
10
|
+
N = 100
|
11
|
+
|
12
|
+
unless Gem.loaded_specs['hpricot'].version > Gem::Version.new('0.6.161')
|
13
|
+
abort "** Use higher than Hpricot 0.6.161!"
|
14
|
+
end
|
15
|
+
|
16
|
+
puts "Hpricot #{Gem.loaded_specs['hpricot'].version} vs. Nokogiri #{Gem.loaded_specs['nokogiri'].version}"
|
17
|
+
hdoc = Hpricot(content)
|
18
|
+
ndoc = Nokogiri.Hpricot(content)
|
19
|
+
|
20
|
+
Benchmark.bm do |x|
|
21
|
+
x.report('hpricot:doc') do
|
22
|
+
N.times do
|
23
|
+
Hpricot(content)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
x.report('nokogiri:doc') do
|
28
|
+
N.times do
|
29
|
+
Nokogiri.Hpricot(content)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
Benchmark.bm do |x|
|
35
|
+
x.report('hpricot:xpath') do
|
36
|
+
N.times do
|
37
|
+
info = hdoc.search("//a[@name='027906']").first.inner_text
|
38
|
+
url = hdoc.search("h3[text()='College kids reportedly taking more smart drugs']").first.inner_text
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
x.report('nokogiri:xpath') do
|
43
|
+
N.times do
|
44
|
+
info = ndoc.search("//a[@name='027906']").first.inner_text
|
45
|
+
url = ndoc.search("h3[text()='College kids reportedly taking more smart drugs']").first.inner_text
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
Benchmark.bm do |x|
|
51
|
+
x.report('hpricot:css') do
|
52
|
+
N.times do
|
53
|
+
info = hdoc.search('form input[@checked]').first
|
54
|
+
url = hdoc.search('td spacer').first.inner_text
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
x.report('nokogiri:css') do
|
59
|
+
N.times do
|
60
|
+
info = ndoc.search('form input[@checked]').first
|
61
|
+
url = ndoc.search('td spacer').first.inner_text
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
data/test/test_alter.rb
ADDED
@@ -0,0 +1,96 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
#!/usr/bin/env ruby
|
3
|
+
|
4
|
+
require 'test/unit'
|
5
|
+
require 'hpricot'
|
6
|
+
require 'load_files'
|
7
|
+
|
8
|
+
class TestAlter < Test::Unit::TestCase
|
9
|
+
def setup
|
10
|
+
@basic = Hpricot.parse(TestFiles::BASIC)
|
11
|
+
end
|
12
|
+
|
13
|
+
def test_before
|
14
|
+
test0 = "<link rel='stylesheet' href='test0.css' />"
|
15
|
+
@basic.at("link").before(test0)
|
16
|
+
assert_equal 'test0.css', @basic.at("link").attributes['href']
|
17
|
+
end
|
18
|
+
|
19
|
+
def test_after
|
20
|
+
test_inf = "<link rel='stylesheet' href='test_inf.css' />"
|
21
|
+
@basic.search("link")[-1].after(test_inf)
|
22
|
+
assert_equal 'test_inf.css', @basic.search("link")[-1].attributes['href']
|
23
|
+
end
|
24
|
+
|
25
|
+
def test_wrap
|
26
|
+
ohmy = (@basic/"p.ohmy").wrap("<div id='wrapper'></div>")
|
27
|
+
assert_equal 'wrapper', ohmy[0].parent['id']
|
28
|
+
assert_equal 'ohmy', Hpricot(@basic.to_html).at("#wrapper").children[0]['class']
|
29
|
+
end
|
30
|
+
|
31
|
+
def test_add_class
|
32
|
+
first_p = (@basic/"p:first").add_class("testing123")
|
33
|
+
assert first_p[0].get_attribute("class").split(" ").include?("testing123")
|
34
|
+
assert (Hpricot(@basic.to_html)/"p:first")[0].attributes["class"].split(" ").include?("testing123")
|
35
|
+
assert !(Hpricot(@basic.to_html)/"p:gt(0)")[0].attributes["class"].split(" ").include?("testing123")
|
36
|
+
end
|
37
|
+
|
38
|
+
def test_change_attributes
|
39
|
+
all_ps = (@basic/"p").attr("title", "Some Title & Etc…")
|
40
|
+
all_as = (@basic/"a").attr("href", "http://my_new_href.com")
|
41
|
+
all_lb = (@basic/"link").attr("href") { |e| e.name }
|
42
|
+
assert_changed(@basic, "p", all_ps) {|p| p.raw_attributes["title"] == "Some Title & Etc…"}
|
43
|
+
assert_changed(@basic, "a", all_as) {|a| a.attributes["href"] == "http://my_new_href.com"}
|
44
|
+
assert_changed(@basic, "link", all_lb) {|a| a.attributes["href"] == "link" }
|
45
|
+
end
|
46
|
+
|
47
|
+
def test_change_attributes2
|
48
|
+
all_as = (@basic%"a").attributes["href"] = "http://my_new_href.com"
|
49
|
+
all_ps = (@basic%"p").attributes["title"] = "Some Title & Etc…"
|
50
|
+
assert_equal (@basic%"a").raw_attributes["href"], "http://my_new_href.com"
|
51
|
+
assert_equal (@basic%"p").raw_attributes["title"], "Some Title & Etc…"
|
52
|
+
assert_equal (@basic%"p").attributes["title"], "Some Title & Etc…"
|
53
|
+
end
|
54
|
+
|
55
|
+
def test_remove_attr
|
56
|
+
all_rl = (@basic/"link").remove_attr("href")
|
57
|
+
assert_changed(@basic, "link", all_rl) { |link| link['href'].nil? }
|
58
|
+
end
|
59
|
+
|
60
|
+
def test_remove_class
|
61
|
+
all_c1 = (@basic/"p[@class*='last']").remove_class("last")
|
62
|
+
assert_changed(@basic, "p[@class*='last']", all_c1) { |p| p['class'] == 'final' }
|
63
|
+
end
|
64
|
+
|
65
|
+
def test_remove_all_classes
|
66
|
+
all_c2 = (@basic/"p[@class]").remove_class
|
67
|
+
assert_changed(@basic, "p[@class]", all_c2) { |p| p['class'].nil? }
|
68
|
+
end
|
69
|
+
|
70
|
+
def test_xml_casing
|
71
|
+
doc = Hpricot.XML("<root><wildCat>text</wildCat></root>")
|
72
|
+
(doc/:root/:wildCat).after("<beanPole>gravity</beanPole>")
|
73
|
+
assert_equal doc.to_s, "<root><wildCat>text</wildCat><beanPole>gravity</beanPole></root>"
|
74
|
+
|
75
|
+
frag = Hpricot.XML do
|
76
|
+
b { i "A bit of HTML" }
|
77
|
+
end
|
78
|
+
(frag/:b).after("<beanPole>gravity</beanPole>")
|
79
|
+
assert_equal frag.to_s, "<b><i>A bit of HTML</i></b><beanPole>gravity</beanPole>"
|
80
|
+
end
|
81
|
+
|
82
|
+
def test_reparent_empty_nodes
|
83
|
+
doc = Hpricot("<div/>")
|
84
|
+
doc.root.inner_html = "foo"
|
85
|
+
assert_equal doc.root.inner_html, "foo"
|
86
|
+
doc.root.inner_html = ""
|
87
|
+
assert_equal doc.root.inner_html, ""
|
88
|
+
doc.root.swap { b "test" }
|
89
|
+
assert_equal doc.root.inner_html, "test"
|
90
|
+
end
|
91
|
+
|
92
|
+
def assert_changed original, selector, set, &block
|
93
|
+
assert set.all?(&block)
|
94
|
+
assert Hpricot(original.to_html).search(selector).all?(&block)
|
95
|
+
end
|
96
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
#!/usr/bin/env ruby
|
3
|
+
|
4
|
+
require 'test/unit'
|
5
|
+
require 'hpricot'
|
6
|
+
|
7
|
+
class TestBuilder < Test::Unit::TestCase
|
8
|
+
def test_escaping_text
|
9
|
+
doc = Hpricot() { b "<a\"b>" }
|
10
|
+
assert_equal "<b><a"b></b>", doc.to_html
|
11
|
+
assert_equal %{<a"b>}, doc.at("text()").to_s
|
12
|
+
end
|
13
|
+
|
14
|
+
def test_no_escaping_text
|
15
|
+
doc = Hpricot() { div.test.me! { text "<a\"b>" } }
|
16
|
+
assert_equal %{<div class="test" id="me"><a"b></div>}, doc.to_html
|
17
|
+
assert_equal %{<a"b>}, doc.at("text()").to_s
|
18
|
+
end
|
19
|
+
|
20
|
+
def test_latin1_entities
|
21
|
+
doc = Hpricot() { b "ۥ" }
|
22
|
+
assert_equal "<b>ۥ</b>", doc.to_html
|
23
|
+
assert_equal "ۥ", doc.at("text()").to_s
|
24
|
+
end
|
25
|
+
|
26
|
+
def test_escaping_attrs
|
27
|
+
text = "<span style='font-family:\"MS Mincho\"'>Some text</span>"
|
28
|
+
assert_equal "<span style=\"font-family:\\\"MS Mincho\\\"\">Some text</span>",
|
29
|
+
Hpricot(text).to_html
|
30
|
+
end
|
31
|
+
|
32
|
+
def test_korean_utf8_entities
|
33
|
+
a = '한글'
|
34
|
+
doc = Hpricot() { b a }
|
35
|
+
assert_equal "<b>한글</b>", doc.to_html
|
36
|
+
end
|
37
|
+
end
|