nokogiri 1.1.1-java

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (142) hide show
  1. data/History.ja.txt +99 -0
  2. data/History.txt +99 -0
  3. data/Manifest.txt +141 -0
  4. data/README.ja.txt +100 -0
  5. data/README.txt +109 -0
  6. data/Rakefile +354 -0
  7. data/ext/nokogiri/extconf.rb +93 -0
  8. data/ext/nokogiri/html_document.c +86 -0
  9. data/ext/nokogiri/html_document.h +10 -0
  10. data/ext/nokogiri/html_sax_parser.c +36 -0
  11. data/ext/nokogiri/html_sax_parser.h +11 -0
  12. data/ext/nokogiri/native.c +41 -0
  13. data/ext/nokogiri/native.h +50 -0
  14. data/ext/nokogiri/xml_cdata.c +44 -0
  15. data/ext/nokogiri/xml_cdata.h +9 -0
  16. data/ext/nokogiri/xml_comment.c +42 -0
  17. data/ext/nokogiri/xml_comment.h +9 -0
  18. data/ext/nokogiri/xml_document.c +206 -0
  19. data/ext/nokogiri/xml_document.h +10 -0
  20. data/ext/nokogiri/xml_dtd.c +121 -0
  21. data/ext/nokogiri/xml_dtd.h +8 -0
  22. data/ext/nokogiri/xml_io.c +17 -0
  23. data/ext/nokogiri/xml_io.h +9 -0
  24. data/ext/nokogiri/xml_node.c +727 -0
  25. data/ext/nokogiri/xml_node.h +13 -0
  26. data/ext/nokogiri/xml_node_set.c +118 -0
  27. data/ext/nokogiri/xml_node_set.h +9 -0
  28. data/ext/nokogiri/xml_reader.c +465 -0
  29. data/ext/nokogiri/xml_reader.h +10 -0
  30. data/ext/nokogiri/xml_sax_parser.c +201 -0
  31. data/ext/nokogiri/xml_sax_parser.h +10 -0
  32. data/ext/nokogiri/xml_syntax_error.c +199 -0
  33. data/ext/nokogiri/xml_syntax_error.h +11 -0
  34. data/ext/nokogiri/xml_text.c +40 -0
  35. data/ext/nokogiri/xml_text.h +9 -0
  36. data/ext/nokogiri/xml_xpath.c +53 -0
  37. data/ext/nokogiri/xml_xpath.h +11 -0
  38. data/ext/nokogiri/xml_xpath_context.c +214 -0
  39. data/ext/nokogiri/xml_xpath_context.h +9 -0
  40. data/ext/nokogiri/xslt_stylesheet.c +123 -0
  41. data/ext/nokogiri/xslt_stylesheet.h +9 -0
  42. data/lib/action-nokogiri.rb +30 -0
  43. data/lib/nokogiri.rb +72 -0
  44. data/lib/nokogiri/css.rb +25 -0
  45. data/lib/nokogiri/css/generated_parser.rb +721 -0
  46. data/lib/nokogiri/css/generated_tokenizer.rb +159 -0
  47. data/lib/nokogiri/css/node.rb +97 -0
  48. data/lib/nokogiri/css/parser.rb +64 -0
  49. data/lib/nokogiri/css/parser.y +216 -0
  50. data/lib/nokogiri/css/syntax_error.rb +6 -0
  51. data/lib/nokogiri/css/tokenizer.rb +9 -0
  52. data/lib/nokogiri/css/tokenizer.rex +63 -0
  53. data/lib/nokogiri/css/xpath_visitor.rb +168 -0
  54. data/lib/nokogiri/decorators.rb +2 -0
  55. data/lib/nokogiri/decorators/hpricot.rb +3 -0
  56. data/lib/nokogiri/decorators/hpricot/node.rb +56 -0
  57. data/lib/nokogiri/decorators/hpricot/node_set.rb +54 -0
  58. data/lib/nokogiri/decorators/hpricot/xpath_visitor.rb +28 -0
  59. data/lib/nokogiri/decorators/slop.rb +31 -0
  60. data/lib/nokogiri/hpricot.rb +51 -0
  61. data/lib/nokogiri/html.rb +105 -0
  62. data/lib/nokogiri/html/builder.rb +9 -0
  63. data/lib/nokogiri/html/document.rb +9 -0
  64. data/lib/nokogiri/html/sax/parser.rb +21 -0
  65. data/lib/nokogiri/version.rb +3 -0
  66. data/lib/nokogiri/xml.rb +83 -0
  67. data/lib/nokogiri/xml/after_handler.rb +18 -0
  68. data/lib/nokogiri/xml/attr.rb +10 -0
  69. data/lib/nokogiri/xml/before_handler.rb +33 -0
  70. data/lib/nokogiri/xml/builder.rb +84 -0
  71. data/lib/nokogiri/xml/cdata.rb +9 -0
  72. data/lib/nokogiri/xml/comment.rb +6 -0
  73. data/lib/nokogiri/xml/document.rb +55 -0
  74. data/lib/nokogiri/xml/dtd.rb +6 -0
  75. data/lib/nokogiri/xml/element.rb +6 -0
  76. data/lib/nokogiri/xml/entity_declaration.rb +9 -0
  77. data/lib/nokogiri/xml/node.rb +333 -0
  78. data/lib/nokogiri/xml/node_set.rb +197 -0
  79. data/lib/nokogiri/xml/notation.rb +6 -0
  80. data/lib/nokogiri/xml/reader.rb +20 -0
  81. data/lib/nokogiri/xml/sax.rb +9 -0
  82. data/lib/nokogiri/xml/sax/document.rb +59 -0
  83. data/lib/nokogiri/xml/sax/parser.rb +37 -0
  84. data/lib/nokogiri/xml/syntax_error.rb +21 -0
  85. data/lib/nokogiri/xml/text.rb +6 -0
  86. data/lib/nokogiri/xml/xpath.rb +10 -0
  87. data/lib/nokogiri/xml/xpath/syntax_error.rb +8 -0
  88. data/lib/nokogiri/xml/xpath_context.rb +14 -0
  89. data/lib/nokogiri/xslt.rb +28 -0
  90. data/lib/nokogiri/xslt/stylesheet.rb +6 -0
  91. data/test/css/test_nthiness.rb +159 -0
  92. data/test/css/test_parser.rb +237 -0
  93. data/test/css/test_tokenizer.rb +162 -0
  94. data/test/css/test_xpath_visitor.rb +64 -0
  95. data/test/files/dont_hurt_em_why.xml +422 -0
  96. data/test/files/exslt.xml +8 -0
  97. data/test/files/exslt.xslt +35 -0
  98. data/test/files/staff.xml +59 -0
  99. data/test/files/staff.xslt +32 -0
  100. data/test/files/tlm.html +850 -0
  101. data/test/helper.rb +78 -0
  102. data/test/hpricot/files/basic.xhtml +17 -0
  103. data/test/hpricot/files/boingboing.html +2266 -0
  104. data/test/hpricot/files/cy0.html +3653 -0
  105. data/test/hpricot/files/immob.html +400 -0
  106. data/test/hpricot/files/pace_application.html +1320 -0
  107. data/test/hpricot/files/tenderlove.html +16 -0
  108. data/test/hpricot/files/uswebgen.html +220 -0
  109. data/test/hpricot/files/utf8.html +1054 -0
  110. data/test/hpricot/files/week9.html +1723 -0
  111. data/test/hpricot/files/why.xml +19 -0
  112. data/test/hpricot/load_files.rb +11 -0
  113. data/test/hpricot/test_alter.rb +67 -0
  114. data/test/hpricot/test_builder.rb +27 -0
  115. data/test/hpricot/test_parser.rb +426 -0
  116. data/test/hpricot/test_paths.rb +15 -0
  117. data/test/hpricot/test_preserved.rb +77 -0
  118. data/test/hpricot/test_xml.rb +30 -0
  119. data/test/html/sax/test_parser.rb +27 -0
  120. data/test/html/test_builder.rb +89 -0
  121. data/test/html/test_document.rb +150 -0
  122. data/test/html/test_node.rb +21 -0
  123. data/test/test_convert_xpath.rb +185 -0
  124. data/test/test_css_cache.rb +57 -0
  125. data/test/test_gc.rb +15 -0
  126. data/test/test_memory_leak.rb +38 -0
  127. data/test/test_nokogiri.rb +97 -0
  128. data/test/test_reader.rb +222 -0
  129. data/test/test_xslt_transforms.rb +93 -0
  130. data/test/xml/sax/test_parser.rb +95 -0
  131. data/test/xml/test_attr.rb +15 -0
  132. data/test/xml/test_builder.rb +16 -0
  133. data/test/xml/test_cdata.rb +18 -0
  134. data/test/xml/test_comment.rb +16 -0
  135. data/test/xml/test_document.rb +195 -0
  136. data/test/xml/test_dtd.rb +43 -0
  137. data/test/xml/test_node.rb +394 -0
  138. data/test/xml/test_node_set.rb +143 -0
  139. data/test/xml/test_text.rb +13 -0
  140. data/test/xml/test_xpath.rb +105 -0
  141. data/vendor/hoe.rb +1020 -0
  142. metadata +233 -0
@@ -0,0 +1,15 @@
1
+ require File.expand_path(File.join(File.dirname(__FILE__), '..', "helper"))
2
+ require File.join(File.dirname(__FILE__),"load_files")
3
+
4
+ class TestParser < Nokogiri::TestCase
5
+ include Nokogiri
6
+
7
+ def test_roundtrip
8
+ @basic = Hpricot.parse(TestFiles::BASIC)
9
+ %w[link link[2] body #link1 a p.ohmy].each do |css_sel|
10
+ ele = @basic.at(css_sel)
11
+ assert_equal ele, @basic.at(ele.css_path), ele.css_path
12
+ assert_equal ele, @basic.at(ele.xpath), ele.xpath
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,77 @@
1
+ require File.expand_path(File.join(File.dirname(__FILE__), '..', "helper"))
2
+ require File.join(File.dirname(__FILE__),"load_files")
3
+
4
+ class TestPreserved < Nokogiri::TestCase
5
+ def assert_roundtrip str
6
+ doc = Nokogiri.Hpricot(str)
7
+ yield doc if block_given?
8
+ str2 = doc.to_original_html
9
+ [*str].zip([*str2]).each do |s1, s2|
10
+ assert_equal s1, s2
11
+ end
12
+ end
13
+
14
+ def assert_html str1, str2
15
+ doc = Nokogiri.Hpricot(str2)
16
+ yield doc if block_given?
17
+ assert_equal str1, doc.to_original_html
18
+ end
19
+
20
+ ####
21
+ # Not supporting to_original_html
22
+ #def test_simple
23
+ # str = "<p>Hpricot is a <b>you know <i>uh</b> fine thing.</p>"
24
+ # assert_html str, str
25
+ # assert_html "<p class=\"new\">Hpricot is a <b>you know <i>uh</b> fine thing.</p>", str do |doc|
26
+ # (doc/:p).set('class', 'new')
27
+ # end
28
+ #end
29
+
30
+ ####
31
+ # Not supporting to_original_html
32
+ #def test_parent
33
+ # str = "<html><base href='/'><head><title>Test</title></head><body><div id='wrap'><p>Paragraph one.</p><p>Paragraph two.</p></div></body></html>"
34
+ # assert_html str, str
35
+ # assert_html "<html><base href='/'><body><div id=\"all\"><div><p>Paragraph one.</p></div><div><p>Paragraph two.</p></div></div></body></html>", str do |doc|
36
+ # (doc/:head).remove
37
+ # (doc/:div).set('id', 'all')
38
+ # (doc/:p).wrap('<div></div>')
39
+ # end
40
+ #end
41
+
42
+ # Not really a valid test. If libxml can figure out the encoding of the file,
43
+ # it will use that encoding, otherwise it uses the &#xwhatever so that no data
44
+ # is lost.
45
+ #
46
+ # libxml on OSX can't figure out the encoding, so this tests passes. linux
47
+ # can figure out the encoding, so it fails.
48
+ #def test_escaping_of_contents
49
+ # doc = Nokogiri.Hpricot(TestFiles::BOINGBOING)
50
+ # assert_equal "Fukuda&#x2019;s Automatic Door opens around your body as you pass through it. The idea is to save energy and keep the room clean.", doc.at("img[@alt='200606131240']").next.to_s.strip
51
+ #end
52
+
53
+ ####
54
+ # Modified. No.
55
+ #def test_files
56
+ # assert_roundtrip TestFiles::BASIC
57
+ # assert_roundtrip TestFiles::BOINGBOING
58
+ # assert_roundtrip TestFiles::CY0
59
+ #end
60
+
61
+ ####
62
+ # Modified.. When calling "to_html" on the document, proper html/doc tags
63
+ # are produced too.
64
+ def test_escaping_of_attrs
65
+ # ampersands in URLs
66
+ str = %{<a href="http://google.com/search?q=nokogiri&amp;l=en">Google</a>}
67
+ link = (doc = Nokogiri.Hpricot(str)).at(:a)
68
+ assert_equal "http://google.com/search?q=nokogiri&l=en", link['href']
69
+ assert_equal "http://google.com/search?q=nokogiri&l=en", link.get_attribute('href')
70
+ assert_equal "http://google.com/search?q=nokogiri&l=en", link.raw_attributes['href']
71
+ assert_equal str, link.to_html
72
+
73
+ # alter the url
74
+ link['href'] = "javascript:alert(\"AGGA-KA-BOO!\")"
75
+ assert_equal %{<a href="javascript:alert(&quot;AGGA-KA-BOO!&quot;)">Google</a>}, link.to_html.gsub(/%22/, '&quot;')
76
+ end
77
+ end
@@ -0,0 +1,30 @@
1
+ require File.expand_path(File.join(File.dirname(__FILE__), '..', "helper"))
2
+ require File.join(File.dirname(__FILE__),"load_files")
3
+
4
+ class TestParser < Nokogiri::TestCase
5
+ include Nokogiri
6
+ # normally, the link tags are empty HTML tags.
7
+ # contributed by laudney.
8
+ def test_normally_empty
9
+ doc = Hpricot::XML("<rss><channel><title>this is title</title><link>http://fake.com</link></channel></rss>")
10
+ assert_equal "this is title", (doc/:rss/:channel/:title).text
11
+ assert_equal "http://fake.com", (doc/:rss/:channel/:link).text
12
+ end
13
+
14
+ # make sure XML doesn't get downcased
15
+ def test_casing
16
+ doc = Hpricot::XML(TestFiles::WHY)
17
+
18
+ ### Modified.
19
+ # I don't want to differentiate pseudo classes from namespaces. If
20
+ # you're parsing xml, use XPath. That's what its for. :-P
21
+ assert_equal "hourly", (doc.at "//sy:updatePeriod").content
22
+ assert_equal 1, (doc/"guid[@isPermaLink]").length
23
+ end
24
+
25
+ # be sure tags named "text" are ok
26
+ def test_text_tags
27
+ doc = Hpricot::XML("<feed><title>City Poisoned</title><text>Rita Lee has poisoned Brazil.</text></feed>")
28
+ assert_equal "City Poisoned", (doc/"title").text
29
+ end
30
+ end
@@ -0,0 +1,27 @@
1
+ require File.expand_path(File.join(File.dirname(__FILE__), '..', '..', "helper"))
2
+
3
+ module Nokogiri
4
+ module HTML
5
+ module SAX
6
+ class TestParser < Nokogiri::SAX::TestCase
7
+ def setup
8
+ @parser = HTML::SAX::Parser.new(Doc.new)
9
+ end
10
+
11
+ def test_parse_file
12
+ @parser.parse_file(HTML_FILE)
13
+ assert_equal 1110, @parser.document.end_elements.length
14
+ end
15
+
16
+ def test_parse_document
17
+ @parser.parse_memory(<<-eoxml)
18
+ <p>Paragraph 1</p>
19
+ <p>Paragraph 2</p>
20
+ eoxml
21
+ assert_equal([["html", []], ["body", []], ["p", []], ["p", []]],
22
+ @parser.document.start_elements)
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,89 @@
1
+ require File.expand_path(File.join(File.dirname(__FILE__), '..', "helper"))
2
+
3
+ module Nokogiri
4
+ module HTML
5
+ class TestBuilder < Nokogiri::TestCase
6
+ def test_hash_as_attributes
7
+ builder = Nokogiri::HTML::Builder.new do
8
+ div(:id => 'awesome') {
9
+ h1 "america"
10
+ }
11
+ end
12
+ assert_equal('<div id="awesome"><h1>america</h1></div>',
13
+ builder.to_html.gsub(/\n/, ''))
14
+ end
15
+
16
+ def test_has_ampersand
17
+ builder = Nokogiri::HTML::Builder.new do
18
+ div.rad.thing! {
19
+ text "<awe&some>"
20
+ b "hello & world"
21
+ }
22
+ end
23
+ assert_equal(
24
+ '<div class="rad" id="thing">&lt;awe&amp;some&gt;<b>hello &amp; world</b></div>',
25
+ builder.to_html.gsub(/\n/, ''))
26
+ end
27
+
28
+ def test_multi_tags
29
+ builder = Nokogiri::HTML::Builder.new do
30
+ div.rad.thing! {
31
+ text "<awesome>"
32
+ b "hello"
33
+ }
34
+ end
35
+ assert_equal(
36
+ '<div class="rad" id="thing">&lt;awesome&gt;<b>hello</b></div>',
37
+ builder.doc.to_html.gsub(/\n/, ''))
38
+ end
39
+
40
+ def test_attributes_plus_block
41
+ builder = Nokogiri::HTML::Builder.new do
42
+ div.rad.thing! {
43
+ text "<awesome>"
44
+ }
45
+ end
46
+ assert_equal('<div class="rad" id="thing">&lt;awesome&gt;</div>',
47
+ builder.doc.to_html.chomp)
48
+ end
49
+
50
+ def test_builder_adds_attributes
51
+ builder = Nokogiri::HTML::Builder.new do
52
+ div.rad.thing! "tender div"
53
+ end
54
+ assert_equal('<div class="rad" id="thing">tender div</div>',
55
+ builder.doc.to_html.chomp)
56
+ end
57
+
58
+ def test_bold_tag
59
+ builder = Nokogiri::HTML::Builder.new do
60
+ b "bold tag"
61
+ end
62
+ assert_equal('<b>bold tag</b>', builder.doc.to_html.chomp)
63
+ end
64
+
65
+ def test_html_then_body_tag
66
+ builder = Nokogiri::HTML::Builder.new do
67
+ html {
68
+ body {
69
+ b "bold tag"
70
+ }
71
+ }
72
+ end
73
+ assert_equal('<html><body><b>bold tag</b></body></html>',
74
+ builder.doc.to_html.chomp)
75
+ end
76
+
77
+ def test_instance_eval_with_delegation_to_block_context
78
+ class << self
79
+ def foo
80
+ "foo!"
81
+ end
82
+ end
83
+
84
+ builder = Nokogiri::HTML::Builder.new { text foo }
85
+ assert builder.to_html.include?("foo!")
86
+ end
87
+ end
88
+ end
89
+ end
@@ -0,0 +1,150 @@
1
+ require File.expand_path(File.join(File.dirname(__FILE__), '..', "helper"))
2
+
3
+ module Nokogiri
4
+ module HTML
5
+ class TestDocument < Nokogiri::TestCase
6
+ def setup
7
+ @html = Nokogiri::HTML.parse(File.read(HTML_FILE))
8
+ end
9
+
10
+ def test_HTML_function
11
+ html = Nokogiri::HTML(File.read(HTML_FILE))
12
+ assert html.html?
13
+ end
14
+
15
+ def test_relative_css
16
+ html = Nokogiri::HTML(<<-eohtml)
17
+ <html>
18
+ <body>
19
+ <div>
20
+ <p>inside div tag</p>
21
+ </div>
22
+ <p>outside div tag</p>
23
+ </body>
24
+ </html>
25
+ eohtml
26
+ set = html.search('div').search('p')
27
+ assert_equal(1, set.length)
28
+ assert_equal('inside div tag', set.first.inner_text)
29
+ end
30
+
31
+ def test_multi_css
32
+ html = Nokogiri::HTML(<<-eohtml)
33
+ <html>
34
+ <body>
35
+ <div>
36
+ <p>p tag</p>
37
+ <a>a tag</a>
38
+ </div>
39
+ </body>
40
+ </html>
41
+ eohtml
42
+ set = html.css('p, a')
43
+ assert_equal(2, set.length)
44
+ assert_equal ['a tag', 'p tag'].sort, set.map { |x| x.content }.sort
45
+ end
46
+
47
+ def test_inner_text
48
+ html = Nokogiri::HTML(<<-eohtml)
49
+ <html>
50
+ <body>
51
+ <div>
52
+ <p>
53
+ Hello world!
54
+ </p>
55
+ </div>
56
+ </body>
57
+ </html>
58
+ eohtml
59
+ node = html.xpath('//div').first
60
+ assert_equal('Hello world!', node.inner_text.strip)
61
+ end
62
+
63
+ def test_inner_html
64
+ html = Nokogiri::HTML(<<-eohtml)
65
+ <html>
66
+ <body>
67
+ <div>
68
+ <p>
69
+ Hello world!
70
+ </p>
71
+ </div>
72
+ </body>
73
+ </html>
74
+ eohtml
75
+ node = html.xpath('//div').first
76
+ assert_equal('<p>Helloworld!</p>', node.inner_html.gsub(/\s/, ''))
77
+ end
78
+
79
+ def test_fragment
80
+ node_set = Nokogiri::HTML.fragment(<<-eohtml)
81
+ <div>
82
+ <b>Hello World</b>
83
+ </div>
84
+ eohtml
85
+ assert_equal 1, node_set.length
86
+ assert_equal 'div', node_set.first.name
87
+ assert_match(/Hello World/, node_set.to_html)
88
+ end
89
+
90
+ def test_relative_css_finder
91
+ doc = Nokogiri::HTML(<<-eohtml)
92
+ <html>
93
+ <body>
94
+ <div class="red">
95
+ <p>
96
+ inside red
97
+ </p>
98
+ </div>
99
+ <div class="green">
100
+ <p>
101
+ inside green
102
+ </p>
103
+ </div>
104
+ </body>
105
+ </html>
106
+ eohtml
107
+ red_divs = doc.css('div.red')
108
+ assert_equal 1, red_divs.length
109
+ p_tags = red_divs.first.css('p')
110
+ assert_equal 1, p_tags.length
111
+ assert_equal 'inside red', p_tags.first.text.strip
112
+ end
113
+
114
+ def test_find_classes
115
+ doc = Nokogiri::HTML(<<-eohtml)
116
+ <html>
117
+ <body>
118
+ <p class="red">RED</p>
119
+ <p class="awesome red">RED</p>
120
+ <p class="notred">GREEN</p>
121
+ <p class="green notred">GREEN</p>
122
+ </body>
123
+ </html>
124
+ eohtml
125
+ list = doc.css('.red')
126
+ assert_equal 2, list.length
127
+ assert_equal %w{ RED RED }, list.map { |x| x.text }
128
+ end
129
+
130
+ def test_parse_can_take_io
131
+ html = nil
132
+ File.open(HTML_FILE, 'rb') { |f|
133
+ html = Nokogiri::HTML(f)
134
+ }
135
+ assert html.html?
136
+ end
137
+
138
+ def test_html?
139
+ assert !@html.xml?
140
+ assert @html.html?
141
+ end
142
+
143
+ def test_serialize
144
+ assert @html.serialize
145
+ assert @html.to_html
146
+ end
147
+ end
148
+ end
149
+ end
150
+
@@ -0,0 +1,21 @@
1
+ require File.expand_path(File.join(File.dirname(__FILE__), '..', "helper"))
2
+
3
+ require 'nkf'
4
+
5
+ module Nokogiri
6
+ module HTML
7
+ class TestNode < Nokogiri::TestCase
8
+ def test_to_html_does_not_contain_entities
9
+ html = NKF.nkf("-e --msdos", <<-EOH)
10
+ <html><body>
11
+ <p> test paragraph
12
+ foo bar </p>
13
+ </body></html>
14
+ EOH
15
+ nokogiri = Nokogiri::HTML.parse(html)
16
+ assert_equal "<p>testparagraph\r\nfoobar</p>",
17
+ nokogiri.at("p").to_html.gsub(/ /, '')
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,185 @@
1
+ require File.expand_path(File.join(File.dirname(__FILE__), "helper"))
2
+
3
+ begin
4
+ require 'rubygems'
5
+ require 'hpricot'
6
+ HAS_HPRICOT = true
7
+ rescue LoadError
8
+ HAS_HPRICOT = false
9
+ end
10
+
11
+ class TestConvertXPath < Nokogiri::TestCase
12
+
13
+ def setup
14
+ @N = Nokogiri(File.read(HTML_FILE))
15
+ @NH = Nokogiri.Hpricot(File.read(HTML_FILE)) # decorated document
16
+ @H = Hpricot(File.read(HTML_FILE)) if HAS_HPRICOT
17
+ end
18
+
19
+ def assert_syntactical_equivalence(hpath, xpath, match, &blk)
20
+ blk ||= lambda {|j| j.first}
21
+ assert_equal match, blk.call(@N.search(xpath)), "xpath result did not match"
22
+ if HAS_HPRICOT
23
+ assert_equal match, blk.call(@H.search(hpath)), "hpath result did not match"
24
+ end
25
+ assert_equal [xpath], @NH.convert_to_xpath(hpath), "converted hpath did not match xpath"
26
+ end
27
+
28
+ def test_ordinary_xpath_conversions
29
+ assert_equal(".//p", @NH.convert_to_xpath("p").first)
30
+ assert_equal(".//p", @NH.convert_to_xpath(:p).first)
31
+ assert_equal(".//p", @NH.convert_to_xpath("//p").first)
32
+ assert_equal(".//p", @NH.convert_to_xpath(".//p").first)
33
+ end
34
+
35
+ def test_child_tag
36
+ assert_syntactical_equivalence("h1[a]", ".//h1[child::a]", "Tender Lovemaking") do |j|
37
+ j.inner_text
38
+ end
39
+ end
40
+
41
+ def test_child_tag_equals
42
+ assert_syntactical_equivalence("h1[a='Tender Lovemaking']", ".//h1[child::a = 'Tender Lovemaking']", "Tender Lovemaking") do |j|
43
+ j.inner_text
44
+ end
45
+ end
46
+
47
+ def test_filter_contains
48
+ assert_syntactical_equivalence("title:contains('Tender')", ".//title[contains(., 'Tender')]",
49
+ "Tender Lovemaking ") do |j|
50
+ j.inner_text
51
+ end
52
+ end
53
+
54
+ def test_filter_comment
55
+ assert_syntactical_equivalence("div comment()[2]", ".//div//comment()[position() = 2]", "<!-- end of header -->") do |j|
56
+ j.first.to_s
57
+ end
58
+ end
59
+
60
+ def test_filter_text
61
+ assert_syntactical_equivalence("a[text()]", ".//a[normalize-space(child::text())]", "<a href=\"http://tenderlovemaking.com\">Tender Lovemaking</a>") do |j|
62
+ j.first.to_s
63
+ end
64
+ assert_syntactical_equivalence("a[text()='Tender Lovemaking']", ".//a[normalize-space(child::text()) = 'Tender Lovemaking']", "<a href=\"http://tenderlovemaking.com\">Tender Lovemaking</a>") do |j|
65
+ j.first.to_s
66
+ end
67
+ assert_syntactical_equivalence("a/text()", ".//a/child::text()", "Tender Lovemaking") do |j|
68
+ j.first.to_s
69
+ end
70
+ assert_syntactical_equivalence("h2//a[text()!='Back Home!']", ".//h2//a[normalize-space(child::text()) != 'Back Home!']", "Meow meow meow meow meow") do |j|
71
+ j.first.inner_text
72
+ end
73
+ end
74
+
75
+ def test_filter_by_attr
76
+ assert_syntactical_equivalence("a[@href='http://blog.geminigeek.com/wordpress-theme']",
77
+ ".//a[@href = 'http://blog.geminigeek.com/wordpress-theme']",
78
+ "http://blog.geminigeek.com/wordpress-theme") do |j|
79
+ j.first["href"]
80
+ end
81
+ end
82
+
83
+ def test_css_id
84
+ assert_syntactical_equivalence("#linkcat-7", ".//*[@id = 'linkcat-7']", "linkcat-7") do |j|
85
+ j.first["id"]
86
+ end
87
+ assert_syntactical_equivalence("li#linkcat-7", ".//li[@id = 'linkcat-7']", "linkcat-7") do |j|
88
+ j.first["id"]
89
+ end
90
+ end
91
+
92
+ def test_css_class
93
+ assert_syntactical_equivalence(".cat-item-15", ".//*[contains(concat(' ', @class, ' '), ' cat-item-15 ')]",
94
+ "cat-item cat-item-15") do |j|
95
+ j.first["class"]
96
+ end
97
+ assert_syntactical_equivalence("li.cat-item-15", ".//li[contains(concat(' ', @class, ' '), ' cat-item-15 ')]",
98
+ "cat-item cat-item-15") do |j|
99
+ j.first["class"]
100
+ end
101
+ end
102
+
103
+ def test_css_tags
104
+ assert_syntactical_equivalence("div li a", ".//div//li//a", "http://brobinius.org/") do |j|
105
+ j.first.inner_text
106
+ end
107
+ assert_syntactical_equivalence("div li > a", ".//div//li/a", "http://brobinius.org/") do |j|
108
+ j.first.inner_text
109
+ end
110
+ assert_syntactical_equivalence("h1 ~ small", ".//small[preceding-sibling::h1]", "The act of making love, tenderly.") do |j|
111
+ j.first.inner_text
112
+ end
113
+ assert_syntactical_equivalence("h1 ~ small", ".//small[preceding-sibling::h1]", "The act of making love, tenderly.") do |j|
114
+ j.first.inner_text
115
+ end
116
+ end
117
+
118
+ def test_positional
119
+ ##
120
+ # we are intentionally NOT staying compatible with nth-and-friends, as Hpricot has an OB1 bug.
121
+ #
122
+ # assert_syntactical_equivalence("div > div:eq(0)", ".//div/div[position() = 1]", "\r\nTender Lovemaking\r\nThe act of making love, tenderly.\r\n") do |j|
123
+ # j.first.inner_text
124
+ # end
125
+ # assert_syntactical_equivalence("div/div:eq(0)", ".//div/div[position() = 1]", "\r\nTender Lovemaking\r\nThe act of making love, tenderly.\r\n") do |j|
126
+ # j.first.inner_text
127
+ # end
128
+ # assert_syntactical_equivalence("div/div:nth(0)", ".//div/div[position() = 1]", "\r\nTender Lovemaking\r\nThe act of making love, tenderly.\r\n") do |j|
129
+ # j.first.inner_text
130
+ # end
131
+ # assert_syntactical_equivalence("div/div:nth-of-type(0)", ".//div/div[position() = 1]", "\r\nTender Lovemaking\r\nThe act of making love, tenderly.\r\n") do |j|
132
+ # j.first.inner_text
133
+ # end
134
+ assert_syntactical_equivalence("div/div:first()", ".//div/div[position() = 1]", "\r\nTender Lovemaking\r\nThe act of making love, tenderly.\r\n".gsub(/[\r\n]/, '')) do |j|
135
+ j.first.inner_text.gsub(/[\r\n]/, '')
136
+ end
137
+ assert_syntactical_equivalence("div/div:first", ".//div/div[position() = 1]", "\r\nTender Lovemaking\r\nThe act of making love, tenderly.\r\n".gsub(/[\r\n]/, '')) do |j|
138
+ j.first.inner_text.gsub(/[\r\n]/, '')
139
+ end
140
+ assert_syntactical_equivalence("div//a:last()", ".//div//a[position() = last()]", "Wordpress") do |j|
141
+ j.last.inner_text
142
+ end
143
+ assert_syntactical_equivalence("div//a:last", ".//div//a[position() = last()]", "Wordpress") do |j|
144
+ j.last.inner_text
145
+ end
146
+ end
147
+
148
+ def test_multiple_filters
149
+ assert_syntactical_equivalence("a[@rel='bookmark'][1]", ".//a[@rel = 'bookmark' and position() = 1]", "Back Home!") do |j|
150
+ j.first.inner_text
151
+ end
152
+ end
153
+
154
+ def test_compat_mode_namespaces
155
+ assert_equal(".//*[name()='t:sam']", @NH.convert_to_xpath("//t:sam").first)
156
+ assert_equal(".//*[name()='t:sam'][@rel='bookmark'][1]", @NH.convert_to_xpath("//t:sam[@rel='bookmark'][1]").first)
157
+ end
158
+
159
+ ##
160
+ # 'and' is not supported by hpricot
161
+ # def test_and
162
+ # assert_syntactical_equivalence("div[h1 and small]", ".//div[h1 and small]", "\r\nTender Lovemaking\r\nThe act of making love, tenderly.\r\n") do |j|
163
+ # j.inner_text
164
+ # end
165
+ # end
166
+
167
+
168
+
169
+ # TODO:
170
+ # doc/'title ~ link' -> links that are siblings of title
171
+ # doc/'p[@class~="final"]' -> class includes string (whitespacy)
172
+ # doc/'p[text()*="final"]' -> class includes string (index) (broken: always returns true?)
173
+ # doc/'p[text()$="final"]' -> /final$/
174
+ # doc/'p[text()|="final"]' -> /^final$/
175
+ # doc/'p[text()^="final"]' -> string starts with 'final
176
+ # nth_first
177
+ # nth_last
178
+ # even
179
+ # odd
180
+ # first-child, nth-child, last-child, nth-last-child, nth-last-of-type
181
+ # only-of-type, only-child
182
+ # parent
183
+ # empty
184
+ # root
185
+ end