nokogiri-maven 1.5.0-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (278) hide show
  1. data/CHANGELOG.ja.rdoc +544 -0
  2. data/CHANGELOG.rdoc +532 -0
  3. data/Manifest.txt +283 -0
  4. data/README.ja.rdoc +106 -0
  5. data/README.rdoc +174 -0
  6. data/Rakefile +164 -0
  7. data/bin/nokogiri +53 -0
  8. data/ext/java/nokogiri/EncodingHandler.java +124 -0
  9. data/ext/java/nokogiri/HtmlDocument.java +119 -0
  10. data/ext/java/nokogiri/HtmlElementDescription.java +145 -0
  11. data/ext/java/nokogiri/HtmlEntityLookup.java +79 -0
  12. data/ext/java/nokogiri/HtmlSaxParserContext.java +259 -0
  13. data/ext/java/nokogiri/NokogiriService.java +590 -0
  14. data/ext/java/nokogiri/XmlAttr.java +180 -0
  15. data/ext/java/nokogiri/XmlAttributeDecl.java +130 -0
  16. data/ext/java/nokogiri/XmlCdata.java +84 -0
  17. data/ext/java/nokogiri/XmlComment.java +86 -0
  18. data/ext/java/nokogiri/XmlDocument.java +519 -0
  19. data/ext/java/nokogiri/XmlDocumentFragment.java +223 -0
  20. data/ext/java/nokogiri/XmlDtd.java +469 -0
  21. data/ext/java/nokogiri/XmlElement.java +195 -0
  22. data/ext/java/nokogiri/XmlElementContent.java +382 -0
  23. data/ext/java/nokogiri/XmlElementDecl.java +152 -0
  24. data/ext/java/nokogiri/XmlEntityDecl.java +162 -0
  25. data/ext/java/nokogiri/XmlEntityReference.java +97 -0
  26. data/ext/java/nokogiri/XmlNamespace.java +183 -0
  27. data/ext/java/nokogiri/XmlNode.java +1378 -0
  28. data/ext/java/nokogiri/XmlNodeSet.java +267 -0
  29. data/ext/java/nokogiri/XmlProcessingInstruction.java +99 -0
  30. data/ext/java/nokogiri/XmlReader.java +408 -0
  31. data/ext/java/nokogiri/XmlRelaxng.java +144 -0
  32. data/ext/java/nokogiri/XmlSaxParserContext.java +367 -0
  33. data/ext/java/nokogiri/XmlSaxPushParser.java +184 -0
  34. data/ext/java/nokogiri/XmlSchema.java +324 -0
  35. data/ext/java/nokogiri/XmlSyntaxError.java +119 -0
  36. data/ext/java/nokogiri/XmlText.java +119 -0
  37. data/ext/java/nokogiri/XmlXpathContext.java +199 -0
  38. data/ext/java/nokogiri/XsltStylesheet.java +197 -0
  39. data/ext/java/nokogiri/internals/HtmlDomParserContext.java +204 -0
  40. data/ext/java/nokogiri/internals/NokogiriDocumentCache.java +73 -0
  41. data/ext/java/nokogiri/internals/NokogiriErrorHandler.java +86 -0
  42. data/ext/java/nokogiri/internals/NokogiriHandler.java +327 -0
  43. data/ext/java/nokogiri/internals/NokogiriHelpers.java +639 -0
  44. data/ext/java/nokogiri/internals/NokogiriNamespaceCache.java +167 -0
  45. data/ext/java/nokogiri/internals/NokogiriNamespaceContext.java +130 -0
  46. data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler.java +74 -0
  47. data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler4NekoHtml.java +121 -0
  48. data/ext/java/nokogiri/internals/NokogiriStrictErrorHandler.java +79 -0
  49. data/ext/java/nokogiri/internals/NokogiriXPathFunction.java +141 -0
  50. data/ext/java/nokogiri/internals/NokogiriXPathFunctionResolver.java +73 -0
  51. data/ext/java/nokogiri/internals/NokogiriXPathVariableResolver.java +67 -0
  52. data/ext/java/nokogiri/internals/NokogiriXsltErrorListener.java +86 -0
  53. data/ext/java/nokogiri/internals/ParserContext.java +276 -0
  54. data/ext/java/nokogiri/internals/PushInputStream.java +411 -0
  55. data/ext/java/nokogiri/internals/ReaderNode.java +531 -0
  56. data/ext/java/nokogiri/internals/SaveContextVisitor.java +567 -0
  57. data/ext/java/nokogiri/internals/SchemaErrorHandler.java +76 -0
  58. data/ext/java/nokogiri/internals/XmlDeclHandler.java +42 -0
  59. data/ext/java/nokogiri/internals/XmlDomParser.java +76 -0
  60. data/ext/java/nokogiri/internals/XmlDomParserContext.java +244 -0
  61. data/ext/java/nokogiri/internals/XmlSaxParser.java +65 -0
  62. data/ext/java/nokogiri/internals/XsltExtensionFunction.java +72 -0
  63. data/ext/nokogiri/depend +358 -0
  64. data/ext/nokogiri/extconf.rb +124 -0
  65. data/ext/nokogiri/html_document.c +154 -0
  66. data/ext/nokogiri/html_document.h +10 -0
  67. data/ext/nokogiri/html_element_description.c +276 -0
  68. data/ext/nokogiri/html_element_description.h +10 -0
  69. data/ext/nokogiri/html_entity_lookup.c +32 -0
  70. data/ext/nokogiri/html_entity_lookup.h +8 -0
  71. data/ext/nokogiri/html_sax_parser_context.c +94 -0
  72. data/ext/nokogiri/html_sax_parser_context.h +11 -0
  73. data/ext/nokogiri/nokogiri.c +115 -0
  74. data/ext/nokogiri/nokogiri.h +160 -0
  75. data/ext/nokogiri/xml_attr.c +94 -0
  76. data/ext/nokogiri/xml_attr.h +9 -0
  77. data/ext/nokogiri/xml_attribute_decl.c +70 -0
  78. data/ext/nokogiri/xml_attribute_decl.h +9 -0
  79. data/ext/nokogiri/xml_cdata.c +56 -0
  80. data/ext/nokogiri/xml_cdata.h +9 -0
  81. data/ext/nokogiri/xml_comment.c +54 -0
  82. data/ext/nokogiri/xml_comment.h +9 -0
  83. data/ext/nokogiri/xml_document.c +478 -0
  84. data/ext/nokogiri/xml_document.h +23 -0
  85. data/ext/nokogiri/xml_document_fragment.c +48 -0
  86. data/ext/nokogiri/xml_document_fragment.h +10 -0
  87. data/ext/nokogiri/xml_dtd.c +202 -0
  88. data/ext/nokogiri/xml_dtd.h +10 -0
  89. data/ext/nokogiri/xml_element_content.c +123 -0
  90. data/ext/nokogiri/xml_element_content.h +10 -0
  91. data/ext/nokogiri/xml_element_decl.c +69 -0
  92. data/ext/nokogiri/xml_element_decl.h +9 -0
  93. data/ext/nokogiri/xml_encoding_handler.c +79 -0
  94. data/ext/nokogiri/xml_encoding_handler.h +8 -0
  95. data/ext/nokogiri/xml_entity_decl.c +110 -0
  96. data/ext/nokogiri/xml_entity_decl.h +10 -0
  97. data/ext/nokogiri/xml_entity_reference.c +52 -0
  98. data/ext/nokogiri/xml_entity_reference.h +9 -0
  99. data/ext/nokogiri/xml_io.c +56 -0
  100. data/ext/nokogiri/xml_io.h +11 -0
  101. data/ext/nokogiri/xml_libxml2_hacks.c +112 -0
  102. data/ext/nokogiri/xml_libxml2_hacks.h +12 -0
  103. data/ext/nokogiri/xml_namespace.c +84 -0
  104. data/ext/nokogiri/xml_namespace.h +13 -0
  105. data/ext/nokogiri/xml_node.c +1385 -0
  106. data/ext/nokogiri/xml_node.h +13 -0
  107. data/ext/nokogiri/xml_node_set.c +418 -0
  108. data/ext/nokogiri/xml_node_set.h +9 -0
  109. data/ext/nokogiri/xml_processing_instruction.c +56 -0
  110. data/ext/nokogiri/xml_processing_instruction.h +9 -0
  111. data/ext/nokogiri/xml_reader.c +684 -0
  112. data/ext/nokogiri/xml_reader.h +10 -0
  113. data/ext/nokogiri/xml_relax_ng.c +161 -0
  114. data/ext/nokogiri/xml_relax_ng.h +9 -0
  115. data/ext/nokogiri/xml_sax_parser.c +293 -0
  116. data/ext/nokogiri/xml_sax_parser.h +39 -0
  117. data/ext/nokogiri/xml_sax_parser_context.c +199 -0
  118. data/ext/nokogiri/xml_sax_parser_context.h +10 -0
  119. data/ext/nokogiri/xml_sax_push_parser.c +115 -0
  120. data/ext/nokogiri/xml_sax_push_parser.h +9 -0
  121. data/ext/nokogiri/xml_schema.c +205 -0
  122. data/ext/nokogiri/xml_schema.h +9 -0
  123. data/ext/nokogiri/xml_syntax_error.c +58 -0
  124. data/ext/nokogiri/xml_syntax_error.h +13 -0
  125. data/ext/nokogiri/xml_text.c +50 -0
  126. data/ext/nokogiri/xml_text.h +9 -0
  127. data/ext/nokogiri/xml_xpath_context.c +309 -0
  128. data/ext/nokogiri/xml_xpath_context.h +9 -0
  129. data/ext/nokogiri/xslt_stylesheet.c +264 -0
  130. data/ext/nokogiri/xslt_stylesheet.h +9 -0
  131. data/lib/nokogiri.rb +127 -0
  132. data/lib/nokogiri/css.rb +27 -0
  133. data/lib/nokogiri/css/node.rb +99 -0
  134. data/lib/nokogiri/css/parser.rb +677 -0
  135. data/lib/nokogiri/css/parser.y +237 -0
  136. data/lib/nokogiri/css/parser_extras.rb +91 -0
  137. data/lib/nokogiri/css/syntax_error.rb +7 -0
  138. data/lib/nokogiri/css/tokenizer.rb +152 -0
  139. data/lib/nokogiri/css/tokenizer.rex +55 -0
  140. data/lib/nokogiri/css/xpath_visitor.rb +171 -0
  141. data/lib/nokogiri/decorators/slop.rb +35 -0
  142. data/lib/nokogiri/html.rb +36 -0
  143. data/lib/nokogiri/html/builder.rb +35 -0
  144. data/lib/nokogiri/html/document.rb +213 -0
  145. data/lib/nokogiri/html/document_fragment.rb +41 -0
  146. data/lib/nokogiri/html/element_description.rb +23 -0
  147. data/lib/nokogiri/html/element_description_defaults.rb +671 -0
  148. data/lib/nokogiri/html/entity_lookup.rb +13 -0
  149. data/lib/nokogiri/html/sax/parser.rb +52 -0
  150. data/lib/nokogiri/html/sax/parser_context.rb +16 -0
  151. data/lib/nokogiri/nokogiri.jar +0 -0
  152. data/lib/nokogiri/syntax_error.rb +4 -0
  153. data/lib/nokogiri/version.rb +88 -0
  154. data/lib/nokogiri/xml.rb +67 -0
  155. data/lib/nokogiri/xml/attr.rb +14 -0
  156. data/lib/nokogiri/xml/attribute_decl.rb +18 -0
  157. data/lib/nokogiri/xml/builder.rb +425 -0
  158. data/lib/nokogiri/xml/cdata.rb +11 -0
  159. data/lib/nokogiri/xml/character_data.rb +7 -0
  160. data/lib/nokogiri/xml/document.rb +234 -0
  161. data/lib/nokogiri/xml/document_fragment.rb +98 -0
  162. data/lib/nokogiri/xml/dtd.rb +22 -0
  163. data/lib/nokogiri/xml/element_content.rb +36 -0
  164. data/lib/nokogiri/xml/element_decl.rb +13 -0
  165. data/lib/nokogiri/xml/entity_decl.rb +19 -0
  166. data/lib/nokogiri/xml/namespace.rb +13 -0
  167. data/lib/nokogiri/xml/node.rb +915 -0
  168. data/lib/nokogiri/xml/node/save_options.rb +61 -0
  169. data/lib/nokogiri/xml/node_set.rb +357 -0
  170. data/lib/nokogiri/xml/notation.rb +6 -0
  171. data/lib/nokogiri/xml/parse_options.rb +93 -0
  172. data/lib/nokogiri/xml/pp.rb +2 -0
  173. data/lib/nokogiri/xml/pp/character_data.rb +18 -0
  174. data/lib/nokogiri/xml/pp/node.rb +56 -0
  175. data/lib/nokogiri/xml/processing_instruction.rb +8 -0
  176. data/lib/nokogiri/xml/reader.rb +112 -0
  177. data/lib/nokogiri/xml/relax_ng.rb +32 -0
  178. data/lib/nokogiri/xml/sax.rb +4 -0
  179. data/lib/nokogiri/xml/sax/document.rb +164 -0
  180. data/lib/nokogiri/xml/sax/parser.rb +115 -0
  181. data/lib/nokogiri/xml/sax/parser_context.rb +16 -0
  182. data/lib/nokogiri/xml/sax/push_parser.rb +60 -0
  183. data/lib/nokogiri/xml/schema.rb +63 -0
  184. data/lib/nokogiri/xml/syntax_error.rb +47 -0
  185. data/lib/nokogiri/xml/text.rb +9 -0
  186. data/lib/nokogiri/xml/xpath.rb +10 -0
  187. data/lib/nokogiri/xml/xpath/syntax_error.rb +11 -0
  188. data/lib/nokogiri/xml/xpath_context.rb +16 -0
  189. data/lib/nokogiri/xslt.rb +52 -0
  190. data/lib/nokogiri/xslt/stylesheet.rb +25 -0
  191. data/lib/xsd/xmlparser/nokogiri.rb +90 -0
  192. data/nokogiri_help_responses.md +40 -0
  193. data/tasks/cross_compile.rb +152 -0
  194. data/tasks/nokogiri.org.rb +18 -0
  195. data/tasks/test.rb +94 -0
  196. data/test/css/test_nthiness.rb +159 -0
  197. data/test/css/test_parser.rb +303 -0
  198. data/test/css/test_tokenizer.rb +198 -0
  199. data/test/css/test_xpath_visitor.rb +85 -0
  200. data/test/decorators/test_slop.rb +16 -0
  201. data/test/files/2ch.html +108 -0
  202. data/test/files/address_book.rlx +12 -0
  203. data/test/files/address_book.xml +10 -0
  204. data/test/files/bar/bar.xsd +4 -0
  205. data/test/files/dont_hurt_em_why.xml +422 -0
  206. data/test/files/encoding.html +82 -0
  207. data/test/files/encoding.xhtml +84 -0
  208. data/test/files/exslt.xml +8 -0
  209. data/test/files/exslt.xslt +35 -0
  210. data/test/files/foo/foo.xsd +4 -0
  211. data/test/files/metacharset.html +10 -0
  212. data/test/files/noencoding.html +47 -0
  213. data/test/files/po.xml +32 -0
  214. data/test/files/po.xsd +66 -0
  215. data/test/files/shift_jis.html +10 -0
  216. data/test/files/shift_jis.xml +5 -0
  217. data/test/files/snuggles.xml +3 -0
  218. data/test/files/staff.dtd +10 -0
  219. data/test/files/staff.xml +59 -0
  220. data/test/files/staff.xslt +32 -0
  221. data/test/files/tlm.html +850 -0
  222. data/test/files/valid_bar.xml +2 -0
  223. data/test/helper.rb +173 -0
  224. data/test/html/sax/test_parser.rb +136 -0
  225. data/test/html/sax/test_parser_context.rb +48 -0
  226. data/test/html/test_builder.rb +164 -0
  227. data/test/html/test_document.rb +472 -0
  228. data/test/html/test_document_encoding.rb +138 -0
  229. data/test/html/test_document_fragment.rb +255 -0
  230. data/test/html/test_element_description.rb +100 -0
  231. data/test/html/test_named_characters.rb +14 -0
  232. data/test/html/test_node.rb +190 -0
  233. data/test/html/test_node_encoding.rb +27 -0
  234. data/test/test_convert_xpath.rb +135 -0
  235. data/test/test_css_cache.rb +45 -0
  236. data/test/test_encoding_handler.rb +46 -0
  237. data/test/test_memory_leak.rb +72 -0
  238. data/test/test_nokogiri.rb +132 -0
  239. data/test/test_reader.rb +425 -0
  240. data/test/test_soap4r_sax.rb +52 -0
  241. data/test/test_xslt_transforms.rb +193 -0
  242. data/test/xml/node/test_save_options.rb +28 -0
  243. data/test/xml/node/test_subclass.rb +44 -0
  244. data/test/xml/sax/test_parser.rb +338 -0
  245. data/test/xml/sax/test_parser_context.rb +113 -0
  246. data/test/xml/sax/test_push_parser.rb +156 -0
  247. data/test/xml/test_attr.rb +65 -0
  248. data/test/xml/test_attribute_decl.rb +86 -0
  249. data/test/xml/test_builder.rb +227 -0
  250. data/test/xml/test_cdata.rb +50 -0
  251. data/test/xml/test_comment.rb +29 -0
  252. data/test/xml/test_document.rb +697 -0
  253. data/test/xml/test_document_encoding.rb +26 -0
  254. data/test/xml/test_document_fragment.rb +192 -0
  255. data/test/xml/test_dtd.rb +107 -0
  256. data/test/xml/test_dtd_encoding.rb +33 -0
  257. data/test/xml/test_element_content.rb +56 -0
  258. data/test/xml/test_element_decl.rb +73 -0
  259. data/test/xml/test_entity_decl.rb +122 -0
  260. data/test/xml/test_entity_reference.rb +21 -0
  261. data/test/xml/test_namespace.rb +70 -0
  262. data/test/xml/test_node.rb +917 -0
  263. data/test/xml/test_node_attributes.rb +34 -0
  264. data/test/xml/test_node_encoding.rb +107 -0
  265. data/test/xml/test_node_reparenting.rb +334 -0
  266. data/test/xml/test_node_set.rb +742 -0
  267. data/test/xml/test_parse_options.rb +52 -0
  268. data/test/xml/test_processing_instruction.rb +30 -0
  269. data/test/xml/test_reader_encoding.rb +126 -0
  270. data/test/xml/test_relax_ng.rb +60 -0
  271. data/test/xml/test_schema.rb +94 -0
  272. data/test/xml/test_syntax_error.rb +12 -0
  273. data/test/xml/test_text.rb +47 -0
  274. data/test/xml/test_unparented_node.rb +381 -0
  275. data/test/xml/test_xpath.rb +237 -0
  276. data/test/xslt/test_custom_functions.rb +94 -0
  277. data/test/xslt/test_exception_handling.rb +37 -0
  278. metadata +552 -0
@@ -0,0 +1,14 @@
1
+ require "helper"
2
+
3
+ module Nokogiri
4
+ module HTML
5
+ class TestNamedCharacters < Nokogiri::TestCase
6
+ def test_named_character
7
+ copy = NamedCharacters.get('copy')
8
+ assert_equal 169, NamedCharacters['copy']
9
+ assert_equal copy.value, NamedCharacters['copy']
10
+ assert copy.description
11
+ end
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,190 @@
1
+ require "helper"
2
+
3
+ require 'nkf'
4
+
5
+ module Nokogiri
6
+ module HTML
7
+ class TestNode < Nokogiri::TestCase
8
+ def setup
9
+ super
10
+ @html = Nokogiri::HTML(<<-eohtml)
11
+ <html>
12
+ <head></head>
13
+ <body>
14
+ <div class='baz'><a href="foo" class="bar">first</a></div>
15
+ </body>
16
+ </html>
17
+ eohtml
18
+ end
19
+
20
+ def test_to_a
21
+ assert_equal [['class', 'bar'], ['href', 'foo']],@html.at('a').to_a.sort
22
+ end
23
+
24
+ def test_attr
25
+ node = @html.at('div.baz')
26
+ assert_equal node['class'], node.attr('class')
27
+ end
28
+
29
+ def test_get_attribute
30
+ element = @html.at('div')
31
+ assert_equal 'baz', element.get_attribute('class')
32
+ assert_equal 'baz', element['class']
33
+ element['href'] = "javascript:alert(\"AGGA-KA-BOO!\")"
34
+ assert_match(/%22AGGA-KA-BOO!%22/, element.to_html)
35
+ end
36
+
37
+ def test_css_path_round_trip
38
+ doc = Nokogiri::HTML(File.read(HTML_FILE))
39
+ %w{ #header small div[2] div.post body }.each do |css_sel|
40
+ ele = doc.at css_sel
41
+ assert_equal ele, doc.at(ele.css_path), ele.css_path
42
+ end
43
+ end
44
+
45
+ def test_path_round_trip
46
+ doc = Nokogiri::HTML(File.read(HTML_FILE))
47
+ %w{ #header small div[2] div.post body }.each do |css_sel|
48
+ ele = doc.at css_sel
49
+ assert_equal ele, doc.at(ele.path), ele.path
50
+ end
51
+ end
52
+
53
+ def test_append_with_document
54
+ assert_raises(ArgumentError) do
55
+ @html.root << Nokogiri::HTML::Document.new
56
+ end
57
+ end
58
+
59
+ ###
60
+ # Make sure a document that doesn't declare a meta encoding returns
61
+ # nil.
62
+ def test_meta_encoding
63
+ assert_nil @html.meta_encoding
64
+ end
65
+
66
+ def test_description
67
+ assert desc = @html.at('a.bar').description
68
+ assert_equal 'a', desc.name
69
+ end
70
+
71
+ def test_ancestors_with_selector
72
+ assert node = @html.at('a.bar').child
73
+ assert list = node.ancestors('.baz')
74
+ assert_equal 1, list.length
75
+ assert_equal 'div', list.first.name
76
+ end
77
+
78
+ def test_matches_inside_fragment
79
+ fragment = DocumentFragment.new @html
80
+ fragment << XML::Node.new('a', @html)
81
+
82
+ a = fragment.children.last
83
+ assert a.matches?('a'), 'a should match'
84
+ end
85
+
86
+ def test_css_matches?
87
+ assert node = @html.at('a.bar')
88
+ assert node.matches?('a.bar')
89
+ end
90
+
91
+ def test_xpath_matches?
92
+ assert node = @html.at('//a')
93
+ assert node.matches?('//a')
94
+ end
95
+
96
+ def test_unlink_then_swap
97
+ node = @html.at('a')
98
+ node.unlink
99
+
100
+ another_node = @html.at('div')
101
+ assert another_node, 'should have a node'
102
+
103
+ # This used to segv
104
+ assert_nothing_raised do
105
+ node.add_previous_sibling another_node
106
+ end
107
+ end
108
+
109
+ def test_swap
110
+ @html.at('div').swap('<a href="foo">bar</a>')
111
+ a_tag = @html.css('a').first
112
+ assert_equal 'body', a_tag.parent.name
113
+ assert_equal 0, @html.css('div').length
114
+ end
115
+
116
+ def test_swap_with_regex_characters
117
+ @html.at('div').swap('<a href="foo">ba)r</a>')
118
+ a_tag = @html.css('a').first
119
+ assert_equal 'ba)r', a_tag.text
120
+ end
121
+
122
+ def test_attribute_decodes_entities
123
+ node = @html.at('div')
124
+ node['href'] = 'foo&bar'
125
+ assert_equal 'foo&bar', node['href']
126
+ node['href'] += '&baz'
127
+ assert_equal 'foo&bar&baz', node['href']
128
+ end
129
+
130
+ def test_parse_config_option
131
+ node = @html.at('div')
132
+ options = nil
133
+ node.parse("<div></div>") do |config|
134
+ options = config
135
+ end
136
+ assert_equal Nokogiri::XML::ParseOptions::DEFAULT_HTML, options.to_i
137
+ end
138
+
139
+ def test_fragment_handler_does_not_regurge_on_invalid_attributes
140
+ iframe = %Q{<iframe style="width: 0%; height: 0px" src="http://someurl" allowtransparency></iframe>}
141
+ assert_nothing_raised { @html.at('div').fragment(iframe) }
142
+ end
143
+
144
+ def test_fragment
145
+ fragment = @html.fragment(<<-eohtml)
146
+ hello
147
+ <div class="foo">
148
+ <p>bar</p>
149
+ </div>
150
+ world
151
+ eohtml
152
+ assert_match(/^hello/, fragment.inner_html.strip)
153
+ assert_equal 3, fragment.children.length
154
+ assert p_tag = fragment.css('p').first
155
+ assert_equal 'div', p_tag.parent.name
156
+ assert_equal 'foo', p_tag.parent['class']
157
+ end
158
+
159
+ def test_fragment_serialization
160
+ fragment = Nokogiri::HTML.fragment("<div>foo</div>")
161
+ assert_equal "<div>foo</div>", fragment.serialize.chomp
162
+ assert_equal "<div>foo</div>", fragment.to_xml.chomp
163
+ assert_equal "<div>foo</div>", fragment.inner_html
164
+ assert_equal "<div>foo</div>", fragment.to_html
165
+ assert_equal "<div>foo</div>", fragment.to_s
166
+ end
167
+
168
+ def test_to_html_does_not_contain_entities
169
+ return unless defined?(NKF) # NKF is not implemented on Rubinius as of 2009-11-23
170
+ html = NKF.nkf("-e --msdos", <<-EOH)
171
+ <html><body>
172
+ <p> test paragraph
173
+ foo bar </p>
174
+ </body></html>
175
+ EOH
176
+ nokogiri = Nokogiri::HTML.parse(html)
177
+
178
+ if RUBY_PLATFORM =~ /java/
179
+ # NKF linebreak modes are not supported as of jruby 1.2
180
+ # see http://jira.codehaus.org/browse/JRUBY-3602 for status
181
+ assert_equal "<p>testparagraph\nfoobar</p>",
182
+ nokogiri.at("p").to_html.gsub(/ /, '')
183
+ else
184
+ assert_equal "<p>testparagraph\r\nfoobar</p>",
185
+ nokogiri.at("p").to_html.gsub(/ /, '')
186
+ end
187
+ end
188
+ end
189
+ end
190
+ end
@@ -0,0 +1,27 @@
1
+ # -*- coding: utf-8 -*-
2
+ require "helper"
3
+
4
+ module Nokogiri
5
+ module HTML
6
+ if RUBY_VERSION =~ /^1\.9/
7
+ class TestNodeEncoding < Nokogiri::TestCase
8
+ def test_inner_html
9
+ doc = Nokogiri::HTML File.open(SHIFT_JIS_HTML, 'rb')
10
+
11
+ hello = "こんにちは"
12
+
13
+ contents = doc.at('h2').inner_html
14
+ assert_equal doc.encoding, contents.encoding.name
15
+ assert_match hello.encode('Shift_JIS'), contents
16
+
17
+ contents = doc.at('h2').inner_html(:encoding => 'UTF-8')
18
+ assert_match hello, contents
19
+
20
+ doc.encoding = 'UTF-8'
21
+ contents = doc.at('h2').inner_html
22
+ assert_match hello, contents
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,135 @@
1
+ require "helper"
2
+
3
+ class TestConvertXPath < Nokogiri::TestCase
4
+
5
+ def setup
6
+ super
7
+ @N = Nokogiri(File.read(HTML_FILE))
8
+ end
9
+
10
+ def assert_syntactical_equivalence(hpath, xpath, match, &blk)
11
+ blk ||= lambda {|j| j.first}
12
+ assert_equal match, blk.call(@N.search(xpath)), "xpath result did not match"
13
+ end
14
+
15
+ def test_child_tag
16
+ assert_syntactical_equivalence("h1[a]", ".//h1[child::a]", "Tender Lovemaking") do |j|
17
+ j.inner_text
18
+ end
19
+ end
20
+
21
+ def test_child_tag_equals
22
+ assert_syntactical_equivalence("h1[a='Tender Lovemaking']", ".//h1[child::a = 'Tender Lovemaking']", "Tender Lovemaking") do |j|
23
+ j.inner_text
24
+ end
25
+ end
26
+
27
+ def test_filter_contains
28
+ assert_syntactical_equivalence("title:contains('Tender')", ".//title[contains(., 'Tender')]",
29
+ "Tender Lovemaking ") do |j|
30
+ j.inner_text
31
+ end
32
+ end
33
+
34
+ def test_filter_comment
35
+ assert_syntactical_equivalence("div comment()[2]", ".//div//comment()[position() = 2]", "<!-- end of header -->") do |j|
36
+ j.first.to_s
37
+ end
38
+ end
39
+
40
+ def test_filter_text
41
+ assert_syntactical_equivalence("a[text()]", ".//a[normalize-space(child::text())]", "<a href=\"http://tenderlovemaking.com\">Tender Lovemaking</a>") do |j|
42
+ j.first.to_s
43
+ end
44
+ assert_syntactical_equivalence("a[text()='Tender Lovemaking']", ".//a[normalize-space(child::text()) = 'Tender Lovemaking']", "<a href=\"http://tenderlovemaking.com\">Tender Lovemaking</a>") do |j|
45
+ j.first.to_s
46
+ end
47
+ assert_syntactical_equivalence("a/text()", ".//a/child::text()", "Tender Lovemaking") do |j|
48
+ j.first.to_s
49
+ end
50
+ assert_syntactical_equivalence("h2//a[text()!='Back Home!']", ".//h2//a[normalize-space(child::text()) != 'Back Home!']", "Meow meow meow meow meow") do |j|
51
+ j.first.inner_text
52
+ end
53
+ end
54
+
55
+ def test_filter_by_attr
56
+ assert_syntactical_equivalence("a[@href='http://blog.geminigeek.com/wordpress-theme']",
57
+ ".//a[@href = 'http://blog.geminigeek.com/wordpress-theme']",
58
+ "http://blog.geminigeek.com/wordpress-theme") do |j|
59
+ j.first["href"]
60
+ end
61
+ end
62
+
63
+ def test_css_id
64
+ assert_syntactical_equivalence("#linkcat-7", ".//*[@id = 'linkcat-7']", "linkcat-7") do |j|
65
+ j.first["id"]
66
+ end
67
+ assert_syntactical_equivalence("li#linkcat-7", ".//li[@id = 'linkcat-7']", "linkcat-7") do |j|
68
+ j.first["id"]
69
+ end
70
+ end
71
+
72
+ def test_css_class
73
+ assert_syntactical_equivalence(".cat-item-15", ".//*[contains(concat(' ', @class, ' '), ' cat-item-15 ')]",
74
+ "cat-item cat-item-15") do |j|
75
+ j.first["class"]
76
+ end
77
+ assert_syntactical_equivalence("li.cat-item-15", ".//li[contains(concat(' ', @class, ' '), ' cat-item-15 ')]",
78
+ "cat-item cat-item-15") do |j|
79
+ j.first["class"]
80
+ end
81
+ end
82
+
83
+ def test_css_tags
84
+ assert_syntactical_equivalence("div li a", ".//div//li//a", "http://brobinius.org/") do |j|
85
+ j.first.inner_text
86
+ end
87
+ assert_syntactical_equivalence("div li > a", ".//div//li/a", "http://brobinius.org/") do |j|
88
+ j.first.inner_text
89
+ end
90
+ assert_syntactical_equivalence("h1 ~ small", ".//small[preceding-sibling::h1]", "The act of making love, tenderly.") do |j|
91
+ j.first.inner_text
92
+ end
93
+ assert_syntactical_equivalence("h1 ~ small", ".//small[preceding-sibling::h1]", "The act of making love, tenderly.") do |j|
94
+ j.first.inner_text
95
+ end
96
+ end
97
+
98
+ def test_positional
99
+ assert_syntactical_equivalence("div/div:first()", ".//div/div[position() = 1]", "\r\nTender Lovemaking\r\nThe act of making love, tenderly.\r\n".gsub(/[\r\n]/, '')) do |j|
100
+ j.first.inner_text.gsub(/[\r\n]/, '')
101
+ end
102
+ assert_syntactical_equivalence("div/div:first", ".//div/div[position() = 1]", "\r\nTender Lovemaking\r\nThe act of making love, tenderly.\r\n".gsub(/[\r\n]/, '')) do |j|
103
+ j.first.inner_text.gsub(/[\r\n]/, '')
104
+ end
105
+ assert_syntactical_equivalence("div//a:last()", ".//div//a[position() = last()]", "Wordpress") do |j|
106
+ j.last.inner_text
107
+ end
108
+ assert_syntactical_equivalence("div//a:last", ".//div//a[position() = last()]", "Wordpress") do |j|
109
+ j.last.inner_text
110
+ end
111
+ end
112
+
113
+ def test_multiple_filters
114
+ assert_syntactical_equivalence("a[@rel='bookmark'][1]", ".//a[@rel = 'bookmark' and position() = 1]", "Back Home!") do |j|
115
+ j.first.inner_text
116
+ end
117
+ end
118
+
119
+ # TODO:
120
+ # doc/'title ~ link' -> links that are siblings of title
121
+ # doc/'p[@class~="final"]' -> class includes string (whitespacy)
122
+ # doc/'p[text()*="final"]' -> class includes string (index) (broken: always returns true?)
123
+ # doc/'p[text()$="final"]' -> /final$/
124
+ # doc/'p[text()|="final"]' -> /^final$/
125
+ # doc/'p[text()^="final"]' -> string starts with 'final
126
+ # nth_first
127
+ # nth_last
128
+ # even
129
+ # odd
130
+ # first-child, nth-child, last-child, nth-last-child, nth-last-of-type
131
+ # only-of-type, only-child
132
+ # parent
133
+ # empty
134
+ # root
135
+ end
@@ -0,0 +1,45 @@
1
+ require "helper"
2
+
3
+ class TestCssCache < Nokogiri::TestCase
4
+
5
+ def setup
6
+ super
7
+ @css = "a1 > b2 > c3"
8
+ @parse_result = Nokogiri::CSS.parse(@css)
9
+ @to_xpath_result = @parse_result.map {|ast| ast.to_xpath}
10
+ Nokogiri::CSS::Parser.class_eval do
11
+ class << @cache
12
+ alias :old_bracket :[]
13
+ attr_reader :count
14
+ def [](key)
15
+ @count ||= 0
16
+ @count += 1
17
+ old_bracket(key)
18
+ end
19
+ end
20
+ end
21
+ assert Nokogiri::CSS::Parser.cache_on?
22
+ end
23
+
24
+ def teardown
25
+ Nokogiri::CSS::Parser.clear_cache
26
+ Nokogiri::CSS::Parser.set_cache true
27
+ end
28
+
29
+ [ false, true ].each do |cache_setting|
30
+ define_method "test_css_cache_#{cache_setting ? "true" : "false"}" do
31
+ times = cache_setting ? 4 : nil
32
+
33
+ Nokogiri::CSS::Parser.set_cache cache_setting
34
+
35
+ Nokogiri::CSS.xpath_for(@css)
36
+ Nokogiri::CSS.xpath_for(@css)
37
+ Nokogiri::CSS::Parser.new.xpath_for(@css)
38
+ Nokogiri::CSS::Parser.new.xpath_for(@css)
39
+
40
+ assert_equal(times, Nokogiri::CSS::Parser.class_eval { @cache.count })
41
+ end
42
+ end
43
+
44
+
45
+ end
@@ -0,0 +1,46 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ require "helper"
4
+
5
+ class TestEncodingHandler < Nokogiri::TestCase
6
+ def teardown
7
+ Nokogiri::EncodingHandler.clear_aliases!
8
+ end
9
+
10
+ def test_get
11
+ assert_not_nil Nokogiri::EncodingHandler['UTF-8']
12
+ assert_nil Nokogiri::EncodingHandler['alsdkjfhaldskjfh']
13
+ end
14
+
15
+ def test_name
16
+ eh = Nokogiri::EncodingHandler['UTF-8']
17
+ assert_equal "UTF-8", eh.name
18
+ end
19
+
20
+ def test_alias
21
+ Nokogiri::EncodingHandler.alias('UTF-8', 'UTF-18')
22
+ assert_equal 'UTF-8', Nokogiri::EncodingHandler['UTF-18'].name
23
+ end
24
+
25
+ def test_cleanup_aliases
26
+ assert_nil Nokogiri::EncodingHandler['UTF-9']
27
+ Nokogiri::EncodingHandler.alias('UTF-8', 'UTF-9')
28
+ assert_not_nil Nokogiri::EncodingHandler['UTF-9']
29
+
30
+ Nokogiri::EncodingHandler.clear_aliases!
31
+ assert_nil Nokogiri::EncodingHandler['UTF-9']
32
+ end
33
+
34
+ def test_delete
35
+ assert_nil Nokogiri::EncodingHandler['UTF-9']
36
+ Nokogiri::EncodingHandler.alias('UTF-8', 'UTF-9')
37
+ assert_not_nil Nokogiri::EncodingHandler['UTF-9']
38
+
39
+ Nokogiri::EncodingHandler.delete 'UTF-9'
40
+ assert_nil Nokogiri::EncodingHandler['UTF-9']
41
+ end
42
+
43
+ def test_delete_non_existent
44
+ assert_nil Nokogiri::EncodingHandler.delete('UTF-9')
45
+ end
46
+ end