webtranslateit-hpricot 0.9.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (55) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +15 -0
  3. data/CHANGELOG +122 -0
  4. data/COPYING +18 -0
  5. data/README.md +295 -0
  6. data/Rakefile +237 -0
  7. data/ext/fast_xs/FastXsService.java +1123 -0
  8. data/ext/fast_xs/extconf.rb +4 -0
  9. data/ext/fast_xs/fast_xs.c +210 -0
  10. data/ext/hpricot_scan/HpricotCss.java +850 -0
  11. data/ext/hpricot_scan/HpricotScanService.java +2085 -0
  12. data/ext/hpricot_scan/MANIFEST +0 -0
  13. data/ext/hpricot_scan/extconf.rb +9 -0
  14. data/ext/hpricot_scan/hpricot_common.rl +76 -0
  15. data/ext/hpricot_scan/hpricot_css.c +3511 -0
  16. data/ext/hpricot_scan/hpricot_css.java.rl +155 -0
  17. data/ext/hpricot_scan/hpricot_css.rl +120 -0
  18. data/ext/hpricot_scan/hpricot_scan.c +6848 -0
  19. data/ext/hpricot_scan/hpricot_scan.h +79 -0
  20. data/ext/hpricot_scan/hpricot_scan.java.rl +1173 -0
  21. data/ext/hpricot_scan/hpricot_scan.rl +911 -0
  22. data/extras/hpricot.png +0 -0
  23. data/hpricot.gemspec +18 -0
  24. data/lib/hpricot/blankslate.rb +63 -0
  25. data/lib/hpricot/builder.rb +217 -0
  26. data/lib/hpricot/elements.rb +514 -0
  27. data/lib/hpricot/htmlinfo.rb +691 -0
  28. data/lib/hpricot/inspect.rb +103 -0
  29. data/lib/hpricot/modules.rb +40 -0
  30. data/lib/hpricot/parse.rb +38 -0
  31. data/lib/hpricot/tag.rb +219 -0
  32. data/lib/hpricot/tags.rb +164 -0
  33. data/lib/hpricot/traverse.rb +839 -0
  34. data/lib/hpricot/xchar.rb +95 -0
  35. data/lib/hpricot.rb +26 -0
  36. data/setup.rb +1585 -0
  37. data/test/files/basic.xhtml +17 -0
  38. data/test/files/boingboing.html +2266 -0
  39. data/test/files/cy0.html +3653 -0
  40. data/test/files/immob.html +400 -0
  41. data/test/files/pace_application.html +1320 -0
  42. data/test/files/tenderlove.html +16 -0
  43. data/test/files/uswebgen.html +220 -0
  44. data/test/files/utf8.html +1054 -0
  45. data/test/files/week9.html +1723 -0
  46. data/test/files/why.xml +19 -0
  47. data/test/load_files.rb +7 -0
  48. data/test/nokogiri-bench.rb +64 -0
  49. data/test/test_alter.rb +96 -0
  50. data/test/test_builder.rb +37 -0
  51. data/test/test_parser.rb +496 -0
  52. data/test/test_paths.rb +25 -0
  53. data/test/test_preserved.rb +88 -0
  54. data/test/test_xml.rb +28 -0
  55. metadata +106 -0
@@ -0,0 +1,19 @@
1
+ <?xml version='1.0'?><rss xmlns:admin='http://webns.net/mvcb/' version='2.0' xmlns:sy='http://purl.org/rss/1.0/modules/syndication/' xmlns:dc='http://purl.org/dc/elements/1.1/' xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#'>
2
+ <channel>
3
+ <title>why the lucky stiff</title>
4
+ <link>http://whytheluckystiff.net</link>
5
+ <description>hex-editing reality to give us infinite grenades!!</description>
6
+ <dc:language>en-us</dc:language>
7
+ <dc:creator/>
8
+ <dc:date>2007-01-16T22:39:04+00:00</dc:date>
9
+ <admin:generatorAgent rdf:resource='http://hobix.com/?v=0.4'/>
10
+ <sy:updatePeriod>hourly</sy:updatePeriod>
11
+ <sy:updateFrequency>1</sy:updateFrequency>
12
+ <sy:updateBase>2000-01-01T12:00+00:00</sy:updateBase>
13
+ <item><title>1.3</title><link>http://whytheluckystiff.net/quatrains/1.3.html</link><guid isPermaLink='false'>quatrains/1.3@http://whytheluckystiff.net</guid><dc:subject>quatrains</dc:subject><dc:subject>quatrains</dc:subject><dc:creator>why the lucky stiff</dc:creator><dc:date>2007-01-14T08:47:05+00:00</dc:date><description>&lt;blockquote&gt;
14
+ &lt;p&gt;That cadillac of yours and that driver of yours!&lt;br /&gt;You and your teacups rattling away in the back seat!&lt;br /&gt;You always took the mike, oh, and all those cowboys you shot!&lt;br /&gt;I held your hand! And I&amp;#8217;ll shoot a cowboy one day!&lt;/p&gt;
15
+ &lt;/blockquote&gt;
16
+ &lt;blockquote&gt;
17
+ &lt;p&gt;You said, &amp;#8220;Let&amp;#8217;s run into the woods like kids!&amp;#8221; &lt;br /&gt;You said, &amp;#8220;Let&amp;#8217;s rub our hands together super-hot!&amp;#8221; &lt;br /&gt;And we scalded the trees and left octagons, I think that was you and&lt;br /&gt;You threw parties on the roof!&lt;/p&gt;
18
+ &lt;/blockquote&gt;</description></item></channel>
19
+ </rss>
@@ -0,0 +1,7 @@
1
+ module TestFiles
2
+ Dir.chdir(File.dirname(__FILE__)) do
3
+ Dir['files/*.{html,xhtml,xml}'].each do |fname|
4
+ const_set fname[%r!/(\w+)\.\w+$!, 1].upcase, IO.read(fname)
5
+ end
6
+ end
7
+ end
@@ -0,0 +1,64 @@
1
+ #!/usr/bin/env ruby
2
+ require 'rubygems'
3
+ require 'open-uri'
4
+ require 'hpricot'
5
+ require 'nokogiri'
6
+ require 'benchmark'
7
+
8
+ content = File.read("test/files/boingboing.html")
9
+
10
+ N = 100
11
+
12
+ unless Gem.loaded_specs['hpricot'].version > Gem::Version.new('0.6.161')
13
+ abort "** Use higher than Hpricot 0.6.161!"
14
+ end
15
+
16
+ puts "Hpricot #{Gem.loaded_specs['hpricot'].version} vs. Nokogiri #{Gem.loaded_specs['nokogiri'].version}"
17
+ hdoc = Hpricot(content)
18
+ ndoc = Nokogiri.Hpricot(content)
19
+
20
+ Benchmark.bm do |x|
21
+ x.report('hpricot:doc') do
22
+ N.times do
23
+ Hpricot(content)
24
+ end
25
+ end
26
+
27
+ x.report('nokogiri:doc') do
28
+ N.times do
29
+ Nokogiri.Hpricot(content)
30
+ end
31
+ end
32
+ end
33
+
34
+ Benchmark.bm do |x|
35
+ x.report('hpricot:xpath') do
36
+ N.times do
37
+ info = hdoc.search("//a[@name='027906']").first.inner_text
38
+ url = hdoc.search("h3[text()='College kids reportedly taking more smart drugs']").first.inner_text
39
+ end
40
+ end
41
+
42
+ x.report('nokogiri:xpath') do
43
+ N.times do
44
+ info = ndoc.search("//a[@name='027906']").first.inner_text
45
+ url = ndoc.search("h3[text()='College kids reportedly taking more smart drugs']").first.inner_text
46
+ end
47
+ end
48
+ end
49
+
50
+ Benchmark.bm do |x|
51
+ x.report('hpricot:css') do
52
+ N.times do
53
+ info = hdoc.search('form input[@checked]').first
54
+ url = hdoc.search('td spacer').first.inner_text
55
+ end
56
+ end
57
+
58
+ x.report('nokogiri:css') do
59
+ N.times do
60
+ info = ndoc.search('form input[@checked]').first
61
+ url = ndoc.search('td spacer').first.inner_text
62
+ end
63
+ end
64
+ end
@@ -0,0 +1,96 @@
1
+ # -*- coding: utf-8 -*-
2
+ #!/usr/bin/env ruby
3
+
4
+ require 'test/unit'
5
+ require 'hpricot'
6
+ require 'load_files'
7
+
8
+ class TestAlter < Test::Unit::TestCase
9
+ def setup
10
+ @basic = Hpricot.parse(TestFiles::BASIC)
11
+ end
12
+
13
+ def test_before
14
+ test0 = "<link rel='stylesheet' href='test0.css' />"
15
+ @basic.at("link").before(test0)
16
+ assert_equal 'test0.css', @basic.at("link").attributes['href']
17
+ end
18
+
19
+ def test_after
20
+ test_inf = "<link rel='stylesheet' href='test_inf.css' />"
21
+ @basic.search("link")[-1].after(test_inf)
22
+ assert_equal 'test_inf.css', @basic.search("link")[-1].attributes['href']
23
+ end
24
+
25
+ def test_wrap
26
+ ohmy = (@basic/"p.ohmy").wrap("<div id='wrapper'></div>")
27
+ assert_equal 'wrapper', ohmy[0].parent['id']
28
+ assert_equal 'ohmy', Hpricot(@basic.to_html).at("#wrapper").children[0]['class']
29
+ end
30
+
31
+ def test_add_class
32
+ first_p = (@basic/"p:first").add_class("testing123")
33
+ assert first_p[0].get_attribute("class").split(" ").include?("testing123")
34
+ assert (Hpricot(@basic.to_html)/"p:first")[0].attributes["class"].split(" ").include?("testing123")
35
+ assert !(Hpricot(@basic.to_html)/"p:gt(0)")[0].attributes["class"].split(" ").include?("testing123")
36
+ end
37
+
38
+ def test_change_attributes
39
+ all_ps = (@basic/"p").attr("title", "Some Title & Etc…")
40
+ all_as = (@basic/"a").attr("href", "http://my_new_href.com")
41
+ all_lb = (@basic/"link").attr("href") { |e| e.name }
42
+ assert_changed(@basic, "p", all_ps) {|p| p.raw_attributes["title"] == "Some Title &amp; Etc&#8230;"}
43
+ assert_changed(@basic, "a", all_as) {|a| a.attributes["href"] == "http://my_new_href.com"}
44
+ assert_changed(@basic, "link", all_lb) {|a| a.attributes["href"] == "link" }
45
+ end
46
+
47
+ def test_change_attributes2
48
+ all_as = (@basic%"a").attributes["href"] = "http://my_new_href.com"
49
+ all_ps = (@basic%"p").attributes["title"] = "Some Title & Etc…"
50
+ assert_equal (@basic%"a").raw_attributes["href"], "http://my_new_href.com"
51
+ assert_equal (@basic%"p").raw_attributes["title"], "Some Title &amp; Etc&#8230;"
52
+ assert_equal (@basic%"p").attributes["title"], "Some Title & Etc…"
53
+ end
54
+
55
+ def test_remove_attr
56
+ all_rl = (@basic/"link").remove_attr("href")
57
+ assert_changed(@basic, "link", all_rl) { |link| link['href'].nil? }
58
+ end
59
+
60
+ def test_remove_class
61
+ all_c1 = (@basic/"p[@class*='last']").remove_class("last")
62
+ assert_changed(@basic, "p[@class*='last']", all_c1) { |p| p['class'] == 'final' }
63
+ end
64
+
65
+ def test_remove_all_classes
66
+ all_c2 = (@basic/"p[@class]").remove_class
67
+ assert_changed(@basic, "p[@class]", all_c2) { |p| p['class'].nil? }
68
+ end
69
+
70
+ def test_xml_casing
71
+ doc = Hpricot.XML("<root><wildCat>text</wildCat></root>")
72
+ (doc/:root/:wildCat).after("<beanPole>gravity</beanPole>")
73
+ assert_equal doc.to_s, "<root><wildCat>text</wildCat><beanPole>gravity</beanPole></root>"
74
+
75
+ frag = Hpricot.XML do
76
+ b { i "A bit of HTML" }
77
+ end
78
+ (frag/:b).after("<beanPole>gravity</beanPole>")
79
+ assert_equal frag.to_s, "<b><i>A bit of HTML</i></b><beanPole>gravity</beanPole>"
80
+ end
81
+
82
+ def test_reparent_empty_nodes
83
+ doc = Hpricot("<div/>")
84
+ doc.root.inner_html = "foo"
85
+ assert_equal doc.root.inner_html, "foo"
86
+ doc.root.inner_html = ""
87
+ assert_equal doc.root.inner_html, ""
88
+ doc.root.swap { b "test" }
89
+ assert_equal doc.root.inner_html, "test"
90
+ end
91
+
92
+ def assert_changed original, selector, set, &block
93
+ assert set.all?(&block)
94
+ assert Hpricot(original.to_html).search(selector).all?(&block)
95
+ end
96
+ end
@@ -0,0 +1,37 @@
1
+ # -*- coding: utf-8 -*-
2
+ #!/usr/bin/env ruby
3
+
4
+ require 'test/unit'
5
+ require 'hpricot'
6
+
7
+ class TestBuilder < Test::Unit::TestCase
8
+ def test_escaping_text
9
+ doc = Hpricot() { b "<a\"b>" }
10
+ assert_equal "<b>&lt;a&quot;b&gt;</b>", doc.to_html
11
+ assert_equal %{<a"b>}, doc.at("text()").to_s
12
+ end
13
+
14
+ def test_no_escaping_text
15
+ doc = Hpricot() { div.test.me! { text "<a\"b>" } }
16
+ assert_equal %{<div class="test" id="me"><a"b></div>}, doc.to_html
17
+ assert_equal %{<a"b>}, doc.at("text()").to_s
18
+ end
19
+
20
+ def test_latin1_entities
21
+ doc = Hpricot() { b "ۥ" }
22
+ assert_equal "<b>&#8364;&#8226;</b>", doc.to_html
23
+ assert_equal "ۥ", doc.at("text()").to_s
24
+ end
25
+
26
+ def test_escaping_attrs
27
+ text = "<span style='font-family:\"MS Mincho\"'>Some text</span>"
28
+ assert_equal "<span style=\"font-family:\\\"MS Mincho\\\"\">Some text</span>",
29
+ Hpricot(text).to_html
30
+ end
31
+
32
+ def test_korean_utf8_entities
33
+ a = '한글'
34
+ doc = Hpricot() { b a }
35
+ assert_equal "<b>&#54620;&#44544;</b>", doc.to_html
36
+ end
37
+ end