nokogiri-maglev- 1.5.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (225) hide show
  1. data/.autotest +26 -0
  2. data/.gemtest +0 -0
  3. data/CHANGELOG.ja.rdoc +544 -0
  4. data/CHANGELOG.rdoc +532 -0
  5. data/Manifest.txt +283 -0
  6. data/README.ja.rdoc +106 -0
  7. data/README.rdoc +174 -0
  8. data/Rakefile +171 -0
  9. data/bin/nokogiri +53 -0
  10. data/ext/nokogiri/depend +358 -0
  11. data/ext/nokogiri/extconf.rb +124 -0
  12. data/ext/nokogiri/html_document.c +154 -0
  13. data/ext/nokogiri/html_document.h +10 -0
  14. data/ext/nokogiri/html_element_description.c +276 -0
  15. data/ext/nokogiri/html_element_description.h +10 -0
  16. data/ext/nokogiri/html_entity_lookup.c +32 -0
  17. data/ext/nokogiri/html_entity_lookup.h +8 -0
  18. data/ext/nokogiri/html_sax_parser_context.c +94 -0
  19. data/ext/nokogiri/html_sax_parser_context.h +11 -0
  20. data/ext/nokogiri/nokogiri.c +115 -0
  21. data/ext/nokogiri/nokogiri.h +160 -0
  22. data/ext/nokogiri/st.c +576 -0
  23. data/ext/nokogiri/xml_attr.c +94 -0
  24. data/ext/nokogiri/xml_attr.h +9 -0
  25. data/ext/nokogiri/xml_attribute_decl.c +70 -0
  26. data/ext/nokogiri/xml_attribute_decl.h +9 -0
  27. data/ext/nokogiri/xml_cdata.c +56 -0
  28. data/ext/nokogiri/xml_cdata.h +9 -0
  29. data/ext/nokogiri/xml_comment.c +54 -0
  30. data/ext/nokogiri/xml_comment.h +9 -0
  31. data/ext/nokogiri/xml_document.c +478 -0
  32. data/ext/nokogiri/xml_document.h +23 -0
  33. data/ext/nokogiri/xml_document_fragment.c +48 -0
  34. data/ext/nokogiri/xml_document_fragment.h +10 -0
  35. data/ext/nokogiri/xml_dtd.c +202 -0
  36. data/ext/nokogiri/xml_dtd.h +10 -0
  37. data/ext/nokogiri/xml_element_content.c +123 -0
  38. data/ext/nokogiri/xml_element_content.h +10 -0
  39. data/ext/nokogiri/xml_element_decl.c +69 -0
  40. data/ext/nokogiri/xml_element_decl.h +9 -0
  41. data/ext/nokogiri/xml_encoding_handler.c +79 -0
  42. data/ext/nokogiri/xml_encoding_handler.h +8 -0
  43. data/ext/nokogiri/xml_entity_decl.c +110 -0
  44. data/ext/nokogiri/xml_entity_decl.h +10 -0
  45. data/ext/nokogiri/xml_entity_reference.c +52 -0
  46. data/ext/nokogiri/xml_entity_reference.h +9 -0
  47. data/ext/nokogiri/xml_io.c +56 -0
  48. data/ext/nokogiri/xml_io.h +11 -0
  49. data/ext/nokogiri/xml_libxml2_hacks.c +112 -0
  50. data/ext/nokogiri/xml_libxml2_hacks.h +12 -0
  51. data/ext/nokogiri/xml_namespace.c +84 -0
  52. data/ext/nokogiri/xml_namespace.h +13 -0
  53. data/ext/nokogiri/xml_node.c +1397 -0
  54. data/ext/nokogiri/xml_node.h +13 -0
  55. data/ext/nokogiri/xml_node_set.c +418 -0
  56. data/ext/nokogiri/xml_node_set.h +9 -0
  57. data/ext/nokogiri/xml_processing_instruction.c +56 -0
  58. data/ext/nokogiri/xml_processing_instruction.h +9 -0
  59. data/ext/nokogiri/xml_reader.c +684 -0
  60. data/ext/nokogiri/xml_reader.h +10 -0
  61. data/ext/nokogiri/xml_relax_ng.c +162 -0
  62. data/ext/nokogiri/xml_relax_ng.h +9 -0
  63. data/ext/nokogiri/xml_sax_parser.c +293 -0
  64. data/ext/nokogiri/xml_sax_parser.h +39 -0
  65. data/ext/nokogiri/xml_sax_parser_context.c +199 -0
  66. data/ext/nokogiri/xml_sax_parser_context.h +10 -0
  67. data/ext/nokogiri/xml_sax_push_parser.c +115 -0
  68. data/ext/nokogiri/xml_sax_push_parser.h +9 -0
  69. data/ext/nokogiri/xml_schema.c +205 -0
  70. data/ext/nokogiri/xml_schema.h +9 -0
  71. data/ext/nokogiri/xml_syntax_error.c +58 -0
  72. data/ext/nokogiri/xml_syntax_error.h +13 -0
  73. data/ext/nokogiri/xml_text.c +50 -0
  74. data/ext/nokogiri/xml_text.h +9 -0
  75. data/ext/nokogiri/xml_xpath_context.c +315 -0
  76. data/ext/nokogiri/xml_xpath_context.h +9 -0
  77. data/ext/nokogiri/xslt_stylesheet.c +265 -0
  78. data/ext/nokogiri/xslt_stylesheet.h +9 -0
  79. data/lib/nokogiri.rb +127 -0
  80. data/lib/nokogiri/css.rb +27 -0
  81. data/lib/nokogiri/css/node.rb +99 -0
  82. data/lib/nokogiri/css/parser.rb +677 -0
  83. data/lib/nokogiri/css/parser.y +237 -0
  84. data/lib/nokogiri/css/parser_extras.rb +91 -0
  85. data/lib/nokogiri/css/syntax_error.rb +7 -0
  86. data/lib/nokogiri/css/tokenizer.rb +152 -0
  87. data/lib/nokogiri/css/tokenizer.rex +55 -0
  88. data/lib/nokogiri/css/xpath_visitor.rb +171 -0
  89. data/lib/nokogiri/decorators/slop.rb +35 -0
  90. data/lib/nokogiri/html.rb +36 -0
  91. data/lib/nokogiri/html/builder.rb +35 -0
  92. data/lib/nokogiri/html/document.rb +213 -0
  93. data/lib/nokogiri/html/document_fragment.rb +41 -0
  94. data/lib/nokogiri/html/element_description.rb +23 -0
  95. data/lib/nokogiri/html/element_description_defaults.rb +671 -0
  96. data/lib/nokogiri/html/entity_lookup.rb +13 -0
  97. data/lib/nokogiri/html/sax/parser.rb +52 -0
  98. data/lib/nokogiri/html/sax/parser_context.rb +16 -0
  99. data/lib/nokogiri/syntax_error.rb +4 -0
  100. data/lib/nokogiri/version.rb +88 -0
  101. data/lib/nokogiri/xml.rb +67 -0
  102. data/lib/nokogiri/xml/attr.rb +14 -0
  103. data/lib/nokogiri/xml/attribute_decl.rb +18 -0
  104. data/lib/nokogiri/xml/builder.rb +426 -0
  105. data/lib/nokogiri/xml/cdata.rb +11 -0
  106. data/lib/nokogiri/xml/character_data.rb +7 -0
  107. data/lib/nokogiri/xml/document.rb +234 -0
  108. data/lib/nokogiri/xml/document_fragment.rb +98 -0
  109. data/lib/nokogiri/xml/dtd.rb +22 -0
  110. data/lib/nokogiri/xml/element_content.rb +36 -0
  111. data/lib/nokogiri/xml/element_decl.rb +13 -0
  112. data/lib/nokogiri/xml/entity_decl.rb +19 -0
  113. data/lib/nokogiri/xml/namespace.rb +13 -0
  114. data/lib/nokogiri/xml/node.rb +915 -0
  115. data/lib/nokogiri/xml/node/save_options.rb +61 -0
  116. data/lib/nokogiri/xml/node_set.rb +357 -0
  117. data/lib/nokogiri/xml/notation.rb +6 -0
  118. data/lib/nokogiri/xml/parse_options.rb +93 -0
  119. data/lib/nokogiri/xml/pp.rb +2 -0
  120. data/lib/nokogiri/xml/pp/character_data.rb +18 -0
  121. data/lib/nokogiri/xml/pp/node.rb +56 -0
  122. data/lib/nokogiri/xml/processing_instruction.rb +8 -0
  123. data/lib/nokogiri/xml/reader.rb +112 -0
  124. data/lib/nokogiri/xml/relax_ng.rb +32 -0
  125. data/lib/nokogiri/xml/sax.rb +4 -0
  126. data/lib/nokogiri/xml/sax/document.rb +164 -0
  127. data/lib/nokogiri/xml/sax/parser.rb +115 -0
  128. data/lib/nokogiri/xml/sax/parser_context.rb +16 -0
  129. data/lib/nokogiri/xml/sax/push_parser.rb +60 -0
  130. data/lib/nokogiri/xml/schema.rb +63 -0
  131. data/lib/nokogiri/xml/syntax_error.rb +47 -0
  132. data/lib/nokogiri/xml/text.rb +9 -0
  133. data/lib/nokogiri/xml/xpath.rb +10 -0
  134. data/lib/nokogiri/xml/xpath/syntax_error.rb +11 -0
  135. data/lib/nokogiri/xml/xpath_context.rb +16 -0
  136. data/lib/nokogiri/xslt.rb +52 -0
  137. data/lib/nokogiri/xslt/stylesheet.rb +25 -0
  138. data/lib/xsd/xmlparser/nokogiri.rb +90 -0
  139. data/nokogiri_help_responses.md +40 -0
  140. data/tasks/cross_compile.rb +152 -0
  141. data/tasks/nokogiri.org.rb +18 -0
  142. data/tasks/test.rb +94 -0
  143. data/test/css/test_nthiness.rb +159 -0
  144. data/test/css/test_parser.rb +303 -0
  145. data/test/css/test_tokenizer.rb +198 -0
  146. data/test/css/test_xpath_visitor.rb +85 -0
  147. data/test/decorators/test_slop.rb +16 -0
  148. data/test/files/2ch.html +108 -0
  149. data/test/files/address_book.rlx +12 -0
  150. data/test/files/address_book.xml +10 -0
  151. data/test/files/bar/bar.xsd +4 -0
  152. data/test/files/dont_hurt_em_why.xml +422 -0
  153. data/test/files/encoding.html +82 -0
  154. data/test/files/encoding.xhtml +84 -0
  155. data/test/files/exslt.xml +8 -0
  156. data/test/files/exslt.xslt +35 -0
  157. data/test/files/foo/foo.xsd +4 -0
  158. data/test/files/metacharset.html +10 -0
  159. data/test/files/noencoding.html +47 -0
  160. data/test/files/po.xml +32 -0
  161. data/test/files/po.xsd +66 -0
  162. data/test/files/shift_jis.html +10 -0
  163. data/test/files/shift_jis.xml +5 -0
  164. data/test/files/snuggles.xml +3 -0
  165. data/test/files/staff.dtd +10 -0
  166. data/test/files/staff.xml +59 -0
  167. data/test/files/staff.xslt +32 -0
  168. data/test/files/tlm.html +850 -0
  169. data/test/files/valid_bar.xml +2 -0
  170. data/test/helper.rb +173 -0
  171. data/test/html/sax/test_parser.rb +139 -0
  172. data/test/html/sax/test_parser_context.rb +48 -0
  173. data/test/html/test_builder.rb +165 -0
  174. data/test/html/test_document.rb +472 -0
  175. data/test/html/test_document_encoding.rb +138 -0
  176. data/test/html/test_document_fragment.rb +255 -0
  177. data/test/html/test_element_description.rb +101 -0
  178. data/test/html/test_named_characters.rb +14 -0
  179. data/test/html/test_node.rb +193 -0
  180. data/test/html/test_node_encoding.rb +27 -0
  181. data/test/test_convert_xpath.rb +135 -0
  182. data/test/test_css_cache.rb +45 -0
  183. data/test/test_encoding_handler.rb +46 -0
  184. data/test/test_memory_leak.rb +72 -0
  185. data/test/test_nokogiri.rb +133 -0
  186. data/test/test_reader.rb +425 -0
  187. data/test/test_soap4r_sax.rb +52 -0
  188. data/test/test_xslt_transforms.rb +193 -0
  189. data/test/xml/node/test_save_options.rb +28 -0
  190. data/test/xml/node/test_subclass.rb +44 -0
  191. data/test/xml/sax/test_parser.rb +338 -0
  192. data/test/xml/sax/test_parser_context.rb +113 -0
  193. data/test/xml/sax/test_push_parser.rb +156 -0
  194. data/test/xml/test_attr.rb +65 -0
  195. data/test/xml/test_attribute_decl.rb +86 -0
  196. data/test/xml/test_builder.rb +227 -0
  197. data/test/xml/test_cdata.rb +50 -0
  198. data/test/xml/test_comment.rb +29 -0
  199. data/test/xml/test_document.rb +697 -0
  200. data/test/xml/test_document_encoding.rb +26 -0
  201. data/test/xml/test_document_fragment.rb +192 -0
  202. data/test/xml/test_dtd.rb +107 -0
  203. data/test/xml/test_dtd_encoding.rb +33 -0
  204. data/test/xml/test_element_content.rb +56 -0
  205. data/test/xml/test_element_decl.rb +73 -0
  206. data/test/xml/test_entity_decl.rb +122 -0
  207. data/test/xml/test_entity_reference.rb +21 -0
  208. data/test/xml/test_namespace.rb +70 -0
  209. data/test/xml/test_node.rb +917 -0
  210. data/test/xml/test_node_attributes.rb +34 -0
  211. data/test/xml/test_node_encoding.rb +107 -0
  212. data/test/xml/test_node_reparenting.rb +334 -0
  213. data/test/xml/test_node_set.rb +742 -0
  214. data/test/xml/test_parse_options.rb +52 -0
  215. data/test/xml/test_processing_instruction.rb +30 -0
  216. data/test/xml/test_reader_encoding.rb +126 -0
  217. data/test/xml/test_relax_ng.rb +60 -0
  218. data/test/xml/test_schema.rb +94 -0
  219. data/test/xml/test_syntax_error.rb +12 -0
  220. data/test/xml/test_text.rb +47 -0
  221. data/test/xml/test_unparented_node.rb +381 -0
  222. data/test/xml/test_xpath.rb +237 -0
  223. data/test/xslt/test_custom_functions.rb +94 -0
  224. data/test/xslt/test_exception_handling.rb +37 -0
  225. metadata +548 -0
@@ -0,0 +1,14 @@
1
+ require "helper"
2
+
3
+ module Nokogiri
4
+ module HTML
5
+ class TestNamedCharacters < Nokogiri::TestCase
6
+ def test_named_character
7
+ copy = NamedCharacters.get('copy')
8
+ assert_equal 169, NamedCharacters['copy']
9
+ assert_equal copy.value, NamedCharacters['copy']
10
+ assert copy.description
11
+ end
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,193 @@
1
+ require "helper"
2
+
3
+ # require 'nkf' # skip Network Kanji Filter for now
4
+
5
+ module Nokogiri
6
+ module HTML
7
+ class TestNode < Nokogiri::TestCase
8
+ def setup
9
+ super
10
+ @html = Nokogiri::HTML(<<-eohtml)
11
+ <html>
12
+ <head></head>
13
+ <body>
14
+ <div class='baz'><a href="foo" class="bar">first</a></div>
15
+ </body>
16
+ </html>
17
+ eohtml
18
+ end
19
+
20
+ def test_to_a
21
+ assert_equal [['class', 'bar'], ['href', 'foo']],@html.at('a').to_a.sort
22
+ end
23
+
24
+ def test_attr
25
+ node = @html.at('div.baz')
26
+ assert_equal node['class'], node.attr('class')
27
+ end
28
+
29
+ def test_get_attribute
30
+ element = @html.at('div')
31
+ assert_equal 'baz', element.get_attribute('class')
32
+ assert_equal 'baz', element['class']
33
+ element['href'] = "javascript:alert(\"AGGA-KA-BOO!\")"
34
+ assert_match(/%22AGGA-KA-BOO!%22/, element.to_html)
35
+ end
36
+
37
+ def test_css_path_round_trip
38
+ doc = Nokogiri::HTML(File.read(HTML_FILE))
39
+ %w{ #header small div[2] div.post body }.each do |css_sel|
40
+ ele = doc.at css_sel
41
+ assert_equal ele, doc.at(ele.css_path), ele.css_path
42
+ end
43
+ end
44
+
45
+ def test_path_round_trip
46
+ doc = Nokogiri::HTML(File.read(HTML_FILE))
47
+ %w{ #header small div[2] div.post body }.each do |css_sel|
48
+ ele = doc.at css_sel
49
+ assert_equal ele, doc.at(ele.path), ele.path
50
+ end
51
+ end
52
+
53
+ def test_append_with_document
54
+ assert_raises(ArgumentError) do
55
+ @html.root << Nokogiri::HTML::Document.new
56
+ end
57
+ end
58
+
59
+ ###
60
+ # Make sure a document that doesn't declare a meta encoding returns
61
+ # nil.
62
+ def test_meta_encoding
63
+ assert_nil @html.meta_encoding
64
+ end
65
+
66
+ def test_description
67
+ assert desc = @html.at('a.bar').description
68
+ assert_equal 'a', desc.name
69
+ end
70
+
71
+ def test_ancestors_with_selector
72
+ assert node = @html.at('a.bar').child
73
+ assert list = node.ancestors('.baz')
74
+ assert_equal 1, list.length
75
+ assert_equal 'div', list.first.name
76
+ end
77
+
78
+ def test_matches_inside_fragment
79
+ fragment = DocumentFragment.new @html
80
+ fragment << XML::Node.new('a', @html)
81
+
82
+ a = fragment.children.last
83
+ assert a.matches?('a'), 'a should match'
84
+ end
85
+
86
+ def test_css_matches?
87
+ assert node = @html.at('a.bar')
88
+ assert node.matches?('a.bar')
89
+ end
90
+
91
+ def test_xpath_matches?
92
+ assert node = @html.at('//a')
93
+ assert node.matches?('//a')
94
+ end
95
+
96
+ def test_unlink_then_swap
97
+ node = @html.at('a')
98
+ node.unlink
99
+
100
+ another_node = @html.at('div')
101
+ assert another_node, 'should have a node'
102
+
103
+ # This used to segv
104
+ assert_nothing_raised do
105
+ node.add_previous_sibling another_node
106
+ end
107
+ end
108
+
109
+ def test_z_swap
110
+ # SEGV in spacePop from xmlParseElement
111
+ @html.at('div').swap('<a href="foo">bar</a>')
112
+ a_tag = @html.css('a').first
113
+ assert_equal 'body', a_tag.parent.name
114
+ assert_equal 0, @html.css('div').length
115
+ end
116
+
117
+ def test_z_swap_with_regex_characters
118
+ # SEGV in spacePop from xmlParseElement
119
+ @html.at('div').swap('<a href="foo">ba)r</a>')
120
+ a_tag = @html.css('a').first
121
+ assert_equal 'ba)r', a_tag.text
122
+ end
123
+
124
+ def test_attribute_decodes_entities
125
+ node = @html.at('div')
126
+ node['href'] = 'foo&bar'
127
+ assert_equal 'foo&bar', node['href']
128
+ node['href'] += '&baz'
129
+ assert_equal 'foo&bar&baz', node['href']
130
+ end
131
+
132
+ def test_parse_config_option
133
+ node = @html.at('div')
134
+ options = nil
135
+ node.parse("<div></div>") do |config|
136
+ options = config
137
+ end
138
+ assert_equal Nokogiri::XML::ParseOptions::DEFAULT_HTML, options.to_i
139
+ end
140
+
141
+ def test_z_fragment_handler_does_not_regurge_on_invalid_attributes
142
+ # SEGV in spacePop from xmlParseElement
143
+ iframe = %Q{<iframe style="width: 0%; height: 0px" src="http://someurl" allowtransparency></iframe>}
144
+ assert_nothing_raised { @html.at('div').fragment(iframe) }
145
+ end
146
+
147
+ def test_z_fragment # SEGV in spacePop from xmlParseElement
148
+ fragment = @html.fragment(<<-eohtml)
149
+ hello
150
+ <div class="foo">
151
+ <p>bar</p>
152
+ </div>
153
+ world
154
+ eohtml
155
+ assert_match(/^hello/, fragment.inner_html.strip)
156
+ assert_equal 3, fragment.children.length
157
+ assert p_tag = fragment.css('p').first
158
+ assert_equal 'div', p_tag.parent.name
159
+ assert_equal 'foo', p_tag.parent['class']
160
+ end
161
+
162
+ def test_fragment_serialization
163
+ fragment = Nokogiri::HTML.fragment("<div>foo</div>")
164
+ assert_equal "<div>foo</div>", fragment.serialize.chomp
165
+ assert_equal "<div>foo</div>", fragment.to_xml.chomp
166
+ assert_equal "<div>foo</div>", fragment.inner_html
167
+ assert_equal "<div>foo</div>", fragment.to_html
168
+ assert_equal "<div>foo</div>", fragment.to_s
169
+ end
170
+
171
+ def test_to_html_does_not_contain_entities
172
+ return unless defined?(NKF) # NKF is not implemented on Rubinius as of 2009-11-23
173
+ html = NKF.nkf("-e --msdos", <<-EOH)
174
+ <html><body>
175
+ <p> test paragraph
176
+ foo bar </p>
177
+ </body></html>
178
+ EOH
179
+ nokogiri = Nokogiri::HTML.parse(html)
180
+
181
+ if RUBY_PLATFORM =~ /java/
182
+ # NKF linebreak modes are not supported as of jruby 1.2
183
+ # see http://jira.codehaus.org/browse/JRUBY-3602 for status
184
+ assert_equal "<p>testparagraph\nfoobar</p>",
185
+ nokogiri.at("p").to_html.gsub(/ /, '')
186
+ else
187
+ assert_equal "<p>testparagraph\r\nfoobar</p>",
188
+ nokogiri.at("p").to_html.gsub(/ /, '')
189
+ end
190
+ end
191
+ end
192
+ end
193
+ end
@@ -0,0 +1,27 @@
1
+ # -*- coding: utf-8 -*-
2
+ require "helper"
3
+
4
+ module Nokogiri
5
+ module HTML
6
+ if RUBY_VERSION =~ /^1\.9/
7
+ class TestNodeEncoding < Nokogiri::TestCase
8
+ def test_inner_html
9
+ doc = Nokogiri::HTML File.open(SHIFT_JIS_HTML, 'rb')
10
+
11
+ hello = "こんにちは"
12
+
13
+ contents = doc.at('h2').inner_html
14
+ assert_equal doc.encoding, contents.encoding.name
15
+ assert_match hello.encode('Shift_JIS'), contents
16
+
17
+ contents = doc.at('h2').inner_html(:encoding => 'UTF-8')
18
+ assert_match hello, contents
19
+
20
+ doc.encoding = 'UTF-8'
21
+ contents = doc.at('h2').inner_html
22
+ assert_match hello, contents
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,135 @@
1
+ require "helper"
2
+
3
+ class TestConvertXPath < Nokogiri::TestCase
4
+
5
+ def setup
6
+ super
7
+ @N = Nokogiri(File.read(HTML_FILE))
8
+ end
9
+
10
+ def assert_syntactical_equivalence(hpath, xpath, match, &blk)
11
+ blk ||= lambda {|j| j.first}
12
+ assert_equal match, blk.call(@N.search(xpath)), "xpath result did not match"
13
+ end
14
+
15
+ def test_child_tag
16
+ assert_syntactical_equivalence("h1[a]", ".//h1[child::a]", "Tender Lovemaking") do |j|
17
+ j.inner_text
18
+ end
19
+ end
20
+
21
+ def test_child_tag_equals
22
+ assert_syntactical_equivalence("h1[a='Tender Lovemaking']", ".//h1[child::a = 'Tender Lovemaking']", "Tender Lovemaking") do |j|
23
+ j.inner_text
24
+ end
25
+ end
26
+
27
+ def test_filter_contains
28
+ assert_syntactical_equivalence("title:contains('Tender')", ".//title[contains(., 'Tender')]",
29
+ "Tender Lovemaking ") do |j|
30
+ j.inner_text
31
+ end
32
+ end
33
+
34
+ def test_filter_comment
35
+ assert_syntactical_equivalence("div comment()[2]", ".//div//comment()[position() = 2]", "<!-- end of header -->") do |j|
36
+ j.first.to_s
37
+ end
38
+ end
39
+
40
+ def test_filter_text
41
+ assert_syntactical_equivalence("a[text()]", ".//a[normalize-space(child::text())]", "<a href=\"http://tenderlovemaking.com\">Tender Lovemaking</a>") do |j|
42
+ j.first.to_s
43
+ end
44
+ assert_syntactical_equivalence("a[text()='Tender Lovemaking']", ".//a[normalize-space(child::text()) = 'Tender Lovemaking']", "<a href=\"http://tenderlovemaking.com\">Tender Lovemaking</a>") do |j|
45
+ j.first.to_s
46
+ end
47
+ assert_syntactical_equivalence("a/text()", ".//a/child::text()", "Tender Lovemaking") do |j|
48
+ j.first.to_s
49
+ end
50
+ assert_syntactical_equivalence("h2//a[text()!='Back Home!']", ".//h2//a[normalize-space(child::text()) != 'Back Home!']", "Meow meow meow meow meow") do |j|
51
+ j.first.inner_text
52
+ end
53
+ end
54
+
55
+ def test_filter_by_attr
56
+ assert_syntactical_equivalence("a[@href='http://blog.geminigeek.com/wordpress-theme']",
57
+ ".//a[@href = 'http://blog.geminigeek.com/wordpress-theme']",
58
+ "http://blog.geminigeek.com/wordpress-theme") do |j|
59
+ j.first["href"]
60
+ end
61
+ end
62
+
63
+ def test_css_id
64
+ assert_syntactical_equivalence("#linkcat-7", ".//*[@id = 'linkcat-7']", "linkcat-7") do |j|
65
+ j.first["id"]
66
+ end
67
+ assert_syntactical_equivalence("li#linkcat-7", ".//li[@id = 'linkcat-7']", "linkcat-7") do |j|
68
+ j.first["id"]
69
+ end
70
+ end
71
+
72
+ def test_css_class
73
+ assert_syntactical_equivalence(".cat-item-15", ".//*[contains(concat(' ', @class, ' '), ' cat-item-15 ')]",
74
+ "cat-item cat-item-15") do |j|
75
+ j.first["class"]
76
+ end
77
+ assert_syntactical_equivalence("li.cat-item-15", ".//li[contains(concat(' ', @class, ' '), ' cat-item-15 ')]",
78
+ "cat-item cat-item-15") do |j|
79
+ j.first["class"]
80
+ end
81
+ end
82
+
83
+ def test_css_tags
84
+ assert_syntactical_equivalence("div li a", ".//div//li//a", "http://brobinius.org/") do |j|
85
+ j.first.inner_text
86
+ end
87
+ assert_syntactical_equivalence("div li > a", ".//div//li/a", "http://brobinius.org/") do |j|
88
+ j.first.inner_text
89
+ end
90
+ assert_syntactical_equivalence("h1 ~ small", ".//small[preceding-sibling::h1]", "The act of making love, tenderly.") do |j|
91
+ j.first.inner_text
92
+ end
93
+ assert_syntactical_equivalence("h1 ~ small", ".//small[preceding-sibling::h1]", "The act of making love, tenderly.") do |j|
94
+ j.first.inner_text
95
+ end
96
+ end
97
+
98
+ def test_positional
99
+ assert_syntactical_equivalence("div/div:first()", ".//div/div[position() = 1]", "\r\nTender Lovemaking\r\nThe act of making love, tenderly.\r\n".gsub(/[\r\n]/, '')) do |j|
100
+ j.first.inner_text.gsub(/[\r\n]/, '')
101
+ end
102
+ assert_syntactical_equivalence("div/div:first", ".//div/div[position() = 1]", "\r\nTender Lovemaking\r\nThe act of making love, tenderly.\r\n".gsub(/[\r\n]/, '')) do |j|
103
+ j.first.inner_text.gsub(/[\r\n]/, '')
104
+ end
105
+ assert_syntactical_equivalence("div//a:last()", ".//div//a[position() = last()]", "Wordpress") do |j|
106
+ j.last.inner_text
107
+ end
108
+ assert_syntactical_equivalence("div//a:last", ".//div//a[position() = last()]", "Wordpress") do |j|
109
+ j.last.inner_text
110
+ end
111
+ end
112
+
113
+ def test_multiple_filters
114
+ assert_syntactical_equivalence("a[@rel='bookmark'][1]", ".//a[@rel = 'bookmark' and position() = 1]", "Back Home!") do |j|
115
+ j.first.inner_text
116
+ end
117
+ end
118
+
119
+ # TODO:
120
+ # doc/'title ~ link' -> links that are siblings of title
121
+ # doc/'p[@class~="final"]' -> class includes string (whitespacy)
122
+ # doc/'p[text()*="final"]' -> class includes string (index) (broken: always returns true?)
123
+ # doc/'p[text()$="final"]' -> /final$/
124
+ # doc/'p[text()|="final"]' -> /^final$/
125
+ # doc/'p[text()^="final"]' -> string starts with 'final
126
+ # nth_first
127
+ # nth_last
128
+ # even
129
+ # odd
130
+ # first-child, nth-child, last-child, nth-last-child, nth-last-of-type
131
+ # only-of-type, only-child
132
+ # parent
133
+ # empty
134
+ # root
135
+ end
@@ -0,0 +1,45 @@
1
+ require "helper"
2
+
3
+ class TestCssCache < Nokogiri::TestCase
4
+
5
+ def setup
6
+ super
7
+ @css = "a1 > b2 > c3"
8
+ @parse_result = Nokogiri::CSS.parse(@css)
9
+ @to_xpath_result = @parse_result.map {|ast| ast.to_xpath}
10
+ Nokogiri::CSS::Parser.class_eval do
11
+ class << @cache
12
+ alias :old_bracket :[]
13
+ attr_reader :count
14
+ def [](key)
15
+ @count ||= 0
16
+ @count += 1
17
+ old_bracket(key)
18
+ end
19
+ end
20
+ end
21
+ assert Nokogiri::CSS::Parser.cache_on?
22
+ end
23
+
24
+ def teardown
25
+ Nokogiri::CSS::Parser.clear_cache
26
+ Nokogiri::CSS::Parser.set_cache true
27
+ end
28
+
29
+ [ false, true ].each do |cache_setting|
30
+ define_method "test_css_cache_#{cache_setting ? "true" : "false"}" do
31
+ times = cache_setting ? 4 : nil
32
+
33
+ Nokogiri::CSS::Parser.set_cache cache_setting
34
+
35
+ Nokogiri::CSS.xpath_for(@css)
36
+ Nokogiri::CSS.xpath_for(@css)
37
+ Nokogiri::CSS::Parser.new.xpath_for(@css)
38
+ Nokogiri::CSS::Parser.new.xpath_for(@css)
39
+
40
+ assert_equal(times, Nokogiri::CSS::Parser.class_eval { @cache.count })
41
+ end
42
+ end
43
+
44
+
45
+ end
@@ -0,0 +1,46 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ require "helper"
4
+
5
+ class TestEncodingHandler < Nokogiri::TestCase
6
+ def teardown
7
+ Nokogiri::EncodingHandler.clear_aliases!
8
+ end
9
+
10
+ def test_get
11
+ assert_not_nil Nokogiri::EncodingHandler['UTF-8']
12
+ assert_nil Nokogiri::EncodingHandler['alsdkjfhaldskjfh']
13
+ end
14
+
15
+ def test_name
16
+ eh = Nokogiri::EncodingHandler['UTF-8']
17
+ assert_equal "UTF-8", eh.name
18
+ end
19
+
20
+ def test_alias
21
+ Nokogiri::EncodingHandler.alias('UTF-8', 'UTF-18')
22
+ assert_equal 'UTF-8', Nokogiri::EncodingHandler['UTF-18'].name
23
+ end
24
+
25
+ def test_cleanup_aliases
26
+ assert_nil Nokogiri::EncodingHandler['UTF-9']
27
+ Nokogiri::EncodingHandler.alias('UTF-8', 'UTF-9')
28
+ assert_not_nil Nokogiri::EncodingHandler['UTF-9']
29
+
30
+ Nokogiri::EncodingHandler.clear_aliases!
31
+ assert_nil Nokogiri::EncodingHandler['UTF-9']
32
+ end
33
+
34
+ def test_delete
35
+ assert_nil Nokogiri::EncodingHandler['UTF-9']
36
+ Nokogiri::EncodingHandler.alias('UTF-8', 'UTF-9')
37
+ assert_not_nil Nokogiri::EncodingHandler['UTF-9']
38
+
39
+ Nokogiri::EncodingHandler.delete 'UTF-9'
40
+ assert_nil Nokogiri::EncodingHandler['UTF-9']
41
+ end
42
+
43
+ def test_delete_non_existent
44
+ assert_nil Nokogiri::EncodingHandler.delete('UTF-9')
45
+ end
46
+ end