webtranslateit-hpricot 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +15 -0
  3. data/CHANGELOG +122 -0
  4. data/COPYING +18 -0
  5. data/README.md +295 -0
  6. data/Rakefile +237 -0
  7. data/ext/fast_xs/FastXsService.java +1123 -0
  8. data/ext/fast_xs/extconf.rb +4 -0
  9. data/ext/fast_xs/fast_xs.c +210 -0
  10. data/ext/hpricot_scan/HpricotCss.java +850 -0
  11. data/ext/hpricot_scan/HpricotScanService.java +2085 -0
  12. data/ext/hpricot_scan/MANIFEST +0 -0
  13. data/ext/hpricot_scan/extconf.rb +9 -0
  14. data/ext/hpricot_scan/hpricot_common.rl +76 -0
  15. data/ext/hpricot_scan/hpricot_css.c +3511 -0
  16. data/ext/hpricot_scan/hpricot_css.java.rl +155 -0
  17. data/ext/hpricot_scan/hpricot_css.rl +120 -0
  18. data/ext/hpricot_scan/hpricot_scan.c +6848 -0
  19. data/ext/hpricot_scan/hpricot_scan.h +79 -0
  20. data/ext/hpricot_scan/hpricot_scan.java.rl +1173 -0
  21. data/ext/hpricot_scan/hpricot_scan.rl +911 -0
  22. data/extras/hpricot.png +0 -0
  23. data/hpricot.gemspec +18 -0
  24. data/lib/hpricot/blankslate.rb +63 -0
  25. data/lib/hpricot/builder.rb +217 -0
  26. data/lib/hpricot/elements.rb +514 -0
  27. data/lib/hpricot/htmlinfo.rb +691 -0
  28. data/lib/hpricot/inspect.rb +103 -0
  29. data/lib/hpricot/modules.rb +40 -0
  30. data/lib/hpricot/parse.rb +38 -0
  31. data/lib/hpricot/tag.rb +219 -0
  32. data/lib/hpricot/tags.rb +164 -0
  33. data/lib/hpricot/traverse.rb +839 -0
  34. data/lib/hpricot/xchar.rb +95 -0
  35. data/lib/hpricot.rb +26 -0
  36. data/setup.rb +1585 -0
  37. data/test/files/basic.xhtml +17 -0
  38. data/test/files/boingboing.html +2266 -0
  39. data/test/files/cy0.html +3653 -0
  40. data/test/files/immob.html +400 -0
  41. data/test/files/pace_application.html +1320 -0
  42. data/test/files/tenderlove.html +16 -0
  43. data/test/files/uswebgen.html +220 -0
  44. data/test/files/utf8.html +1054 -0
  45. data/test/files/week9.html +1723 -0
  46. data/test/files/why.xml +19 -0
  47. data/test/load_files.rb +7 -0
  48. data/test/nokogiri-bench.rb +64 -0
  49. data/test/test_alter.rb +96 -0
  50. data/test/test_builder.rb +37 -0
  51. data/test/test_parser.rb +496 -0
  52. data/test/test_paths.rb +25 -0
  53. data/test/test_preserved.rb +88 -0
  54. data/test/test_xml.rb +28 -0
  55. metadata +106 -0
@@ -0,0 +1,19 @@
1
+ <?xml version='1.0'?><rss xmlns:admin='http://webns.net/mvcb/' version='2.0' xmlns:sy='http://purl.org/rss/1.0/modules/syndication/' xmlns:dc='http://purl.org/dc/elements/1.1/' xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#'>
2
+ <channel>
3
+ <title>why the lucky stiff</title>
4
+ <link>http://whytheluckystiff.net</link>
5
+ <description>hex-editing reality to give us infinite grenades!!</description>
6
+ <dc:language>en-us</dc:language>
7
+ <dc:creator/>
8
+ <dc:date>2007-01-16T22:39:04+00:00</dc:date>
9
+ <admin:generatorAgent rdf:resource='http://hobix.com/?v=0.4'/>
10
+ <sy:updatePeriod>hourly</sy:updatePeriod>
11
+ <sy:updateFrequency>1</sy:updateFrequency>
12
+ <sy:updateBase>2000-01-01T12:00+00:00</sy:updateBase>
13
+ <item><title>1.3</title><link>http://whytheluckystiff.net/quatrains/1.3.html</link><guid isPermaLink='false'>quatrains/1.3@http://whytheluckystiff.net</guid><dc:subject>quatrains</dc:subject><dc:subject>quatrains</dc:subject><dc:creator>why the lucky stiff</dc:creator><dc:date>2007-01-14T08:47:05+00:00</dc:date><description>&lt;blockquote&gt;
14
+ &lt;p&gt;That cadillac of yours and that driver of yours!&lt;br /&gt;You and your teacups rattling away in the back seat!&lt;br /&gt;You always took the mike, oh, and all those cowboys you shot!&lt;br /&gt;I held your hand! And I&amp;#8217;ll shoot a cowboy one day!&lt;/p&gt;
15
+ &lt;/blockquote&gt;
16
+ &lt;blockquote&gt;
17
+ &lt;p&gt;You said, &amp;#8220;Let&amp;#8217;s run into the woods like kids!&amp;#8221; &lt;br /&gt;You said, &amp;#8220;Let&amp;#8217;s rub our hands together super-hot!&amp;#8221; &lt;br /&gt;And we scalded the trees and left octagons, I think that was you and&lt;br /&gt;You threw parties on the roof!&lt;/p&gt;
18
+ &lt;/blockquote&gt;</description></item></channel>
19
+ </rss>
@@ -0,0 +1,7 @@
1
+ module TestFiles
2
+ Dir.chdir(File.dirname(__FILE__)) do
3
+ Dir['files/*.{html,xhtml,xml}'].each do |fname|
4
+ const_set fname[%r!/(\w+)\.\w+$!, 1].upcase, IO.read(fname)
5
+ end
6
+ end
7
+ end
@@ -0,0 +1,64 @@
1
+ #!/usr/bin/env ruby
2
+ require 'rubygems'
3
+ require 'open-uri'
4
+ require 'hpricot'
5
+ require 'nokogiri'
6
+ require 'benchmark'
7
+
8
+ content = File.read("test/files/boingboing.html")
9
+
10
+ N = 100
11
+
12
+ unless Gem.loaded_specs['hpricot'].version > Gem::Version.new('0.6.161')
13
+ abort "** Use higher than Hpricot 0.6.161!"
14
+ end
15
+
16
+ puts "Hpricot #{Gem.loaded_specs['hpricot'].version} vs. Nokogiri #{Gem.loaded_specs['nokogiri'].version}"
17
+ hdoc = Hpricot(content)
18
+ ndoc = Nokogiri.Hpricot(content)
19
+
20
+ Benchmark.bm do |x|
21
+ x.report('hpricot:doc') do
22
+ N.times do
23
+ Hpricot(content)
24
+ end
25
+ end
26
+
27
+ x.report('nokogiri:doc') do
28
+ N.times do
29
+ Nokogiri.Hpricot(content)
30
+ end
31
+ end
32
+ end
33
+
34
+ Benchmark.bm do |x|
35
+ x.report('hpricot:xpath') do
36
+ N.times do
37
+ info = hdoc.search("//a[@name='027906']").first.inner_text
38
+ url = hdoc.search("h3[text()='College kids reportedly taking more smart drugs']").first.inner_text
39
+ end
40
+ end
41
+
42
+ x.report('nokogiri:xpath') do
43
+ N.times do
44
+ info = ndoc.search("//a[@name='027906']").first.inner_text
45
+ url = ndoc.search("h3[text()='College kids reportedly taking more smart drugs']").first.inner_text
46
+ end
47
+ end
48
+ end
49
+
50
+ Benchmark.bm do |x|
51
+ x.report('hpricot:css') do
52
+ N.times do
53
+ info = hdoc.search('form input[@checked]').first
54
+ url = hdoc.search('td spacer').first.inner_text
55
+ end
56
+ end
57
+
58
+ x.report('nokogiri:css') do
59
+ N.times do
60
+ info = ndoc.search('form input[@checked]').first
61
+ url = ndoc.search('td spacer').first.inner_text
62
+ end
63
+ end
64
+ end
@@ -0,0 +1,96 @@
1
+ # -*- coding: utf-8 -*-
2
+ #!/usr/bin/env ruby
3
+
4
+ require 'test/unit'
5
+ require 'hpricot'
6
+ require 'load_files'
7
+
8
+ class TestAlter < Test::Unit::TestCase
9
+ def setup
10
+ @basic = Hpricot.parse(TestFiles::BASIC)
11
+ end
12
+
13
+ def test_before
14
+ test0 = "<link rel='stylesheet' href='test0.css' />"
15
+ @basic.at("link").before(test0)
16
+ assert_equal 'test0.css', @basic.at("link").attributes['href']
17
+ end
18
+
19
+ def test_after
20
+ test_inf = "<link rel='stylesheet' href='test_inf.css' />"
21
+ @basic.search("link")[-1].after(test_inf)
22
+ assert_equal 'test_inf.css', @basic.search("link")[-1].attributes['href']
23
+ end
24
+
25
+ def test_wrap
26
+ ohmy = (@basic/"p.ohmy").wrap("<div id='wrapper'></div>")
27
+ assert_equal 'wrapper', ohmy[0].parent['id']
28
+ assert_equal 'ohmy', Hpricot(@basic.to_html).at("#wrapper").children[0]['class']
29
+ end
30
+
31
+ def test_add_class
32
+ first_p = (@basic/"p:first").add_class("testing123")
33
+ assert first_p[0].get_attribute("class").split(" ").include?("testing123")
34
+ assert (Hpricot(@basic.to_html)/"p:first")[0].attributes["class"].split(" ").include?("testing123")
35
+ assert !(Hpricot(@basic.to_html)/"p:gt(0)")[0].attributes["class"].split(" ").include?("testing123")
36
+ end
37
+
38
+ def test_change_attributes
39
+ all_ps = (@basic/"p").attr("title", "Some Title & Etc…")
40
+ all_as = (@basic/"a").attr("href", "http://my_new_href.com")
41
+ all_lb = (@basic/"link").attr("href") { |e| e.name }
42
+ assert_changed(@basic, "p", all_ps) {|p| p.raw_attributes["title"] == "Some Title &amp; Etc&#8230;"}
43
+ assert_changed(@basic, "a", all_as) {|a| a.attributes["href"] == "http://my_new_href.com"}
44
+ assert_changed(@basic, "link", all_lb) {|a| a.attributes["href"] == "link" }
45
+ end
46
+
47
+ def test_change_attributes2
48
+ all_as = (@basic%"a").attributes["href"] = "http://my_new_href.com"
49
+ all_ps = (@basic%"p").attributes["title"] = "Some Title & Etc…"
50
+ assert_equal (@basic%"a").raw_attributes["href"], "http://my_new_href.com"
51
+ assert_equal (@basic%"p").raw_attributes["title"], "Some Title &amp; Etc&#8230;"
52
+ assert_equal (@basic%"p").attributes["title"], "Some Title & Etc…"
53
+ end
54
+
55
+ def test_remove_attr
56
+ all_rl = (@basic/"link").remove_attr("href")
57
+ assert_changed(@basic, "link", all_rl) { |link| link['href'].nil? }
58
+ end
59
+
60
+ def test_remove_class
61
+ all_c1 = (@basic/"p[@class*='last']").remove_class("last")
62
+ assert_changed(@basic, "p[@class*='last']", all_c1) { |p| p['class'] == 'final' }
63
+ end
64
+
65
+ def test_remove_all_classes
66
+ all_c2 = (@basic/"p[@class]").remove_class
67
+ assert_changed(@basic, "p[@class]", all_c2) { |p| p['class'].nil? }
68
+ end
69
+
70
+ def test_xml_casing
71
+ doc = Hpricot.XML("<root><wildCat>text</wildCat></root>")
72
+ (doc/:root/:wildCat).after("<beanPole>gravity</beanPole>")
73
+ assert_equal doc.to_s, "<root><wildCat>text</wildCat><beanPole>gravity</beanPole></root>"
74
+
75
+ frag = Hpricot.XML do
76
+ b { i "A bit of HTML" }
77
+ end
78
+ (frag/:b).after("<beanPole>gravity</beanPole>")
79
+ assert_equal frag.to_s, "<b><i>A bit of HTML</i></b><beanPole>gravity</beanPole>"
80
+ end
81
+
82
+ def test_reparent_empty_nodes
83
+ doc = Hpricot("<div/>")
84
+ doc.root.inner_html = "foo"
85
+ assert_equal doc.root.inner_html, "foo"
86
+ doc.root.inner_html = ""
87
+ assert_equal doc.root.inner_html, ""
88
+ doc.root.swap { b "test" }
89
+ assert_equal doc.root.inner_html, "test"
90
+ end
91
+
92
+ def assert_changed original, selector, set, &block
93
+ assert set.all?(&block)
94
+ assert Hpricot(original.to_html).search(selector).all?(&block)
95
+ end
96
+ end
@@ -0,0 +1,37 @@
1
+ # -*- coding: utf-8 -*-
2
+ #!/usr/bin/env ruby
3
+
4
+ require 'test/unit'
5
+ require 'hpricot'
6
+
7
+ class TestBuilder < Test::Unit::TestCase
8
+ def test_escaping_text
9
+ doc = Hpricot() { b "<a\"b>" }
10
+ assert_equal "<b>&lt;a&quot;b&gt;</b>", doc.to_html
11
+ assert_equal %{<a"b>}, doc.at("text()").to_s
12
+ end
13
+
14
+ def test_no_escaping_text
15
+ doc = Hpricot() { div.test.me! { text "<a\"b>" } }
16
+ assert_equal %{<div class="test" id="me"><a"b></div>}, doc.to_html
17
+ assert_equal %{<a"b>}, doc.at("text()").to_s
18
+ end
19
+
20
+ def test_latin1_entities
21
+ doc = Hpricot() { b "ۥ" }
22
+ assert_equal "<b>&#8364;&#8226;</b>", doc.to_html
23
+ assert_equal "ۥ", doc.at("text()").to_s
24
+ end
25
+
26
+ def test_escaping_attrs
27
+ text = "<span style='font-family:\"MS Mincho\"'>Some text</span>"
28
+ assert_equal "<span style=\"font-family:\\\"MS Mincho\\\"\">Some text</span>",
29
+ Hpricot(text).to_html
30
+ end
31
+
32
+ def test_korean_utf8_entities
33
+ a = '한글'
34
+ doc = Hpricot() { b a }
35
+ assert_equal "<b>&#54620;&#44544;</b>", doc.to_html
36
+ end
37
+ end