webtranslateit-hpricot 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +15 -0
- data/CHANGELOG +122 -0
- data/COPYING +18 -0
- data/README.md +295 -0
- data/Rakefile +237 -0
- data/ext/fast_xs/FastXsService.java +1123 -0
- data/ext/fast_xs/extconf.rb +4 -0
- data/ext/fast_xs/fast_xs.c +210 -0
- data/ext/hpricot_scan/HpricotCss.java +850 -0
- data/ext/hpricot_scan/HpricotScanService.java +2085 -0
- data/ext/hpricot_scan/MANIFEST +0 -0
- data/ext/hpricot_scan/extconf.rb +9 -0
- data/ext/hpricot_scan/hpricot_common.rl +76 -0
- data/ext/hpricot_scan/hpricot_css.c +3511 -0
- data/ext/hpricot_scan/hpricot_css.java.rl +155 -0
- data/ext/hpricot_scan/hpricot_css.rl +120 -0
- data/ext/hpricot_scan/hpricot_scan.c +6848 -0
- data/ext/hpricot_scan/hpricot_scan.h +79 -0
- data/ext/hpricot_scan/hpricot_scan.java.rl +1173 -0
- data/ext/hpricot_scan/hpricot_scan.rl +911 -0
- data/extras/hpricot.png +0 -0
- data/hpricot.gemspec +18 -0
- data/lib/hpricot/blankslate.rb +63 -0
- data/lib/hpricot/builder.rb +217 -0
- data/lib/hpricot/elements.rb +514 -0
- data/lib/hpricot/htmlinfo.rb +691 -0
- data/lib/hpricot/inspect.rb +103 -0
- data/lib/hpricot/modules.rb +40 -0
- data/lib/hpricot/parse.rb +38 -0
- data/lib/hpricot/tag.rb +219 -0
- data/lib/hpricot/tags.rb +164 -0
- data/lib/hpricot/traverse.rb +839 -0
- data/lib/hpricot/xchar.rb +95 -0
- data/lib/hpricot.rb +26 -0
- data/setup.rb +1585 -0
- data/test/files/basic.xhtml +17 -0
- data/test/files/boingboing.html +2266 -0
- data/test/files/cy0.html +3653 -0
- data/test/files/immob.html +400 -0
- data/test/files/pace_application.html +1320 -0
- data/test/files/tenderlove.html +16 -0
- data/test/files/uswebgen.html +220 -0
- data/test/files/utf8.html +1054 -0
- data/test/files/week9.html +1723 -0
- data/test/files/why.xml +19 -0
- data/test/load_files.rb +7 -0
- data/test/nokogiri-bench.rb +64 -0
- data/test/test_alter.rb +96 -0
- data/test/test_builder.rb +37 -0
- data/test/test_parser.rb +496 -0
- data/test/test_paths.rb +25 -0
- data/test/test_preserved.rb +88 -0
- data/test/test_xml.rb +28 -0
- metadata +106 -0
data/test/files/why.xml
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
<?xml version='1.0'?><rss xmlns:admin='http://webns.net/mvcb/' version='2.0' xmlns:sy='http://purl.org/rss/1.0/modules/syndication/' xmlns:dc='http://purl.org/dc/elements/1.1/' xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#'>
|
|
2
|
+
<channel>
|
|
3
|
+
<title>why the lucky stiff</title>
|
|
4
|
+
<link>http://whytheluckystiff.net</link>
|
|
5
|
+
<description>hex-editing reality to give us infinite grenades!!</description>
|
|
6
|
+
<dc:language>en-us</dc:language>
|
|
7
|
+
<dc:creator/>
|
|
8
|
+
<dc:date>2007-01-16T22:39:04+00:00</dc:date>
|
|
9
|
+
<admin:generatorAgent rdf:resource='http://hobix.com/?v=0.4'/>
|
|
10
|
+
<sy:updatePeriod>hourly</sy:updatePeriod>
|
|
11
|
+
<sy:updateFrequency>1</sy:updateFrequency>
|
|
12
|
+
<sy:updateBase>2000-01-01T12:00+00:00</sy:updateBase>
|
|
13
|
+
<item><title>1.3</title><link>http://whytheluckystiff.net/quatrains/1.3.html</link><guid isPermaLink='false'>quatrains/1.3@http://whytheluckystiff.net</guid><dc:subject>quatrains</dc:subject><dc:subject>quatrains</dc:subject><dc:creator>why the lucky stiff</dc:creator><dc:date>2007-01-14T08:47:05+00:00</dc:date><description><blockquote>
|
|
14
|
+
<p>That cadillac of yours and that driver of yours!<br />You and your teacups rattling away in the back seat!<br />You always took the mike, oh, and all those cowboys you shot!<br />I held your hand! And I&#8217;ll shoot a cowboy one day!</p>
|
|
15
|
+
</blockquote>
|
|
16
|
+
<blockquote>
|
|
17
|
+
<p>You said, &#8220;Let&#8217;s run into the woods like kids!&#8221; <br />You said, &#8220;Let&#8217;s rub our hands together super-hot!&#8221; <br />And we scalded the trees and left octagons, I think that was you and<br />You threw parties on the roof!</p>
|
|
18
|
+
</blockquote></description></item></channel>
|
|
19
|
+
</rss>
|
data/test/load_files.rb
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
require 'rubygems'
|
|
3
|
+
require 'open-uri'
|
|
4
|
+
require 'hpricot'
|
|
5
|
+
require 'nokogiri'
|
|
6
|
+
require 'benchmark'
|
|
7
|
+
|
|
8
|
+
content = File.read("test/files/boingboing.html")
|
|
9
|
+
|
|
10
|
+
N = 100
|
|
11
|
+
|
|
12
|
+
unless Gem.loaded_specs['hpricot'].version > Gem::Version.new('0.6.161')
|
|
13
|
+
abort "** Use higher than Hpricot 0.6.161!"
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
puts "Hpricot #{Gem.loaded_specs['hpricot'].version} vs. Nokogiri #{Gem.loaded_specs['nokogiri'].version}"
|
|
17
|
+
hdoc = Hpricot(content)
|
|
18
|
+
ndoc = Nokogiri.Hpricot(content)
|
|
19
|
+
|
|
20
|
+
Benchmark.bm do |x|
|
|
21
|
+
x.report('hpricot:doc') do
|
|
22
|
+
N.times do
|
|
23
|
+
Hpricot(content)
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
x.report('nokogiri:doc') do
|
|
28
|
+
N.times do
|
|
29
|
+
Nokogiri.Hpricot(content)
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
Benchmark.bm do |x|
|
|
35
|
+
x.report('hpricot:xpath') do
|
|
36
|
+
N.times do
|
|
37
|
+
info = hdoc.search("//a[@name='027906']").first.inner_text
|
|
38
|
+
url = hdoc.search("h3[text()='College kids reportedly taking more smart drugs']").first.inner_text
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
x.report('nokogiri:xpath') do
|
|
43
|
+
N.times do
|
|
44
|
+
info = ndoc.search("//a[@name='027906']").first.inner_text
|
|
45
|
+
url = ndoc.search("h3[text()='College kids reportedly taking more smart drugs']").first.inner_text
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
Benchmark.bm do |x|
|
|
51
|
+
x.report('hpricot:css') do
|
|
52
|
+
N.times do
|
|
53
|
+
info = hdoc.search('form input[@checked]').first
|
|
54
|
+
url = hdoc.search('td spacer').first.inner_text
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
x.report('nokogiri:css') do
|
|
59
|
+
N.times do
|
|
60
|
+
info = ndoc.search('form input[@checked]').first
|
|
61
|
+
url = ndoc.search('td spacer').first.inner_text
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
end
|
data/test/test_alter.rb
ADDED
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
#!/usr/bin/env ruby
|
|
3
|
+
|
|
4
|
+
require 'test/unit'
|
|
5
|
+
require 'hpricot'
|
|
6
|
+
require 'load_files'
|
|
7
|
+
|
|
8
|
+
class TestAlter < Test::Unit::TestCase
|
|
9
|
+
def setup
|
|
10
|
+
@basic = Hpricot.parse(TestFiles::BASIC)
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def test_before
|
|
14
|
+
test0 = "<link rel='stylesheet' href='test0.css' />"
|
|
15
|
+
@basic.at("link").before(test0)
|
|
16
|
+
assert_equal 'test0.css', @basic.at("link").attributes['href']
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def test_after
|
|
20
|
+
test_inf = "<link rel='stylesheet' href='test_inf.css' />"
|
|
21
|
+
@basic.search("link")[-1].after(test_inf)
|
|
22
|
+
assert_equal 'test_inf.css', @basic.search("link")[-1].attributes['href']
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def test_wrap
|
|
26
|
+
ohmy = (@basic/"p.ohmy").wrap("<div id='wrapper'></div>")
|
|
27
|
+
assert_equal 'wrapper', ohmy[0].parent['id']
|
|
28
|
+
assert_equal 'ohmy', Hpricot(@basic.to_html).at("#wrapper").children[0]['class']
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def test_add_class
|
|
32
|
+
first_p = (@basic/"p:first").add_class("testing123")
|
|
33
|
+
assert first_p[0].get_attribute("class").split(" ").include?("testing123")
|
|
34
|
+
assert (Hpricot(@basic.to_html)/"p:first")[0].attributes["class"].split(" ").include?("testing123")
|
|
35
|
+
assert !(Hpricot(@basic.to_html)/"p:gt(0)")[0].attributes["class"].split(" ").include?("testing123")
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def test_change_attributes
|
|
39
|
+
all_ps = (@basic/"p").attr("title", "Some Title & Etc…")
|
|
40
|
+
all_as = (@basic/"a").attr("href", "http://my_new_href.com")
|
|
41
|
+
all_lb = (@basic/"link").attr("href") { |e| e.name }
|
|
42
|
+
assert_changed(@basic, "p", all_ps) {|p| p.raw_attributes["title"] == "Some Title & Etc…"}
|
|
43
|
+
assert_changed(@basic, "a", all_as) {|a| a.attributes["href"] == "http://my_new_href.com"}
|
|
44
|
+
assert_changed(@basic, "link", all_lb) {|a| a.attributes["href"] == "link" }
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
def test_change_attributes2
|
|
48
|
+
all_as = (@basic%"a").attributes["href"] = "http://my_new_href.com"
|
|
49
|
+
all_ps = (@basic%"p").attributes["title"] = "Some Title & Etc…"
|
|
50
|
+
assert_equal (@basic%"a").raw_attributes["href"], "http://my_new_href.com"
|
|
51
|
+
assert_equal (@basic%"p").raw_attributes["title"], "Some Title & Etc…"
|
|
52
|
+
assert_equal (@basic%"p").attributes["title"], "Some Title & Etc…"
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
def test_remove_attr
|
|
56
|
+
all_rl = (@basic/"link").remove_attr("href")
|
|
57
|
+
assert_changed(@basic, "link", all_rl) { |link| link['href'].nil? }
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
def test_remove_class
|
|
61
|
+
all_c1 = (@basic/"p[@class*='last']").remove_class("last")
|
|
62
|
+
assert_changed(@basic, "p[@class*='last']", all_c1) { |p| p['class'] == 'final' }
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
def test_remove_all_classes
|
|
66
|
+
all_c2 = (@basic/"p[@class]").remove_class
|
|
67
|
+
assert_changed(@basic, "p[@class]", all_c2) { |p| p['class'].nil? }
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
def test_xml_casing
|
|
71
|
+
doc = Hpricot.XML("<root><wildCat>text</wildCat></root>")
|
|
72
|
+
(doc/:root/:wildCat).after("<beanPole>gravity</beanPole>")
|
|
73
|
+
assert_equal doc.to_s, "<root><wildCat>text</wildCat><beanPole>gravity</beanPole></root>"
|
|
74
|
+
|
|
75
|
+
frag = Hpricot.XML do
|
|
76
|
+
b { i "A bit of HTML" }
|
|
77
|
+
end
|
|
78
|
+
(frag/:b).after("<beanPole>gravity</beanPole>")
|
|
79
|
+
assert_equal frag.to_s, "<b><i>A bit of HTML</i></b><beanPole>gravity</beanPole>"
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
def test_reparent_empty_nodes
|
|
83
|
+
doc = Hpricot("<div/>")
|
|
84
|
+
doc.root.inner_html = "foo"
|
|
85
|
+
assert_equal doc.root.inner_html, "foo"
|
|
86
|
+
doc.root.inner_html = ""
|
|
87
|
+
assert_equal doc.root.inner_html, ""
|
|
88
|
+
doc.root.swap { b "test" }
|
|
89
|
+
assert_equal doc.root.inner_html, "test"
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
def assert_changed original, selector, set, &block
|
|
93
|
+
assert set.all?(&block)
|
|
94
|
+
assert Hpricot(original.to_html).search(selector).all?(&block)
|
|
95
|
+
end
|
|
96
|
+
end
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
#!/usr/bin/env ruby
|
|
3
|
+
|
|
4
|
+
require 'test/unit'
|
|
5
|
+
require 'hpricot'
|
|
6
|
+
|
|
7
|
+
class TestBuilder < Test::Unit::TestCase
|
|
8
|
+
def test_escaping_text
|
|
9
|
+
doc = Hpricot() { b "<a\"b>" }
|
|
10
|
+
assert_equal "<b><a"b></b>", doc.to_html
|
|
11
|
+
assert_equal %{<a"b>}, doc.at("text()").to_s
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def test_no_escaping_text
|
|
15
|
+
doc = Hpricot() { div.test.me! { text "<a\"b>" } }
|
|
16
|
+
assert_equal %{<div class="test" id="me"><a"b></div>}, doc.to_html
|
|
17
|
+
assert_equal %{<a"b>}, doc.at("text()").to_s
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def test_latin1_entities
|
|
21
|
+
doc = Hpricot() { b "ۥ" }
|
|
22
|
+
assert_equal "<b>ۥ</b>", doc.to_html
|
|
23
|
+
assert_equal "ۥ", doc.at("text()").to_s
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def test_escaping_attrs
|
|
27
|
+
text = "<span style='font-family:\"MS Mincho\"'>Some text</span>"
|
|
28
|
+
assert_equal "<span style=\"font-family:\\\"MS Mincho\\\"\">Some text</span>",
|
|
29
|
+
Hpricot(text).to_html
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def test_korean_utf8_entities
|
|
33
|
+
a = '한글'
|
|
34
|
+
doc = Hpricot() { b a }
|
|
35
|
+
assert_equal "<b>한글</b>", doc.to_html
|
|
36
|
+
end
|
|
37
|
+
end
|