nokogiri 1.0.0 → 1.6.8.1

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (309) hide show
  1. checksums.yaml +7 -0
  2. data/.autotest +26 -0
  3. data/.cross_rubies +9 -0
  4. data/.editorconfig +17 -0
  5. data/.gemtest +0 -0
  6. data/.travis.yml +51 -0
  7. data/CHANGELOG.rdoc +1160 -0
  8. data/CONTRIBUTING.md +42 -0
  9. data/C_CODING_STYLE.rdoc +33 -0
  10. data/Gemfile +22 -0
  11. data/LICENSE.txt +31 -0
  12. data/Manifest.txt +284 -40
  13. data/README.md +166 -0
  14. data/ROADMAP.md +111 -0
  15. data/Rakefile +310 -199
  16. data/STANDARD_RESPONSES.md +47 -0
  17. data/Y_U_NO_GEMSPEC.md +155 -0
  18. data/appveyor.yml +22 -0
  19. data/bin/nokogiri +118 -0
  20. data/build_all +45 -0
  21. data/dependencies.yml +29 -0
  22. data/ext/nokogiri/depend +358 -0
  23. data/ext/nokogiri/extconf.rb +664 -34
  24. data/ext/nokogiri/html_document.c +120 -33
  25. data/ext/nokogiri/html_document.h +1 -1
  26. data/ext/nokogiri/html_element_description.c +279 -0
  27. data/ext/nokogiri/html_element_description.h +10 -0
  28. data/ext/nokogiri/html_entity_lookup.c +32 -0
  29. data/ext/nokogiri/html_entity_lookup.h +8 -0
  30. data/ext/nokogiri/html_sax_parser_context.c +116 -0
  31. data/ext/nokogiri/html_sax_parser_context.h +11 -0
  32. data/ext/nokogiri/html_sax_push_parser.c +87 -0
  33. data/ext/nokogiri/html_sax_push_parser.h +9 -0
  34. data/ext/nokogiri/nokogiri.c +145 -0
  35. data/ext/nokogiri/nokogiri.h +131 -0
  36. data/ext/nokogiri/xml_attr.c +94 -0
  37. data/ext/nokogiri/xml_attr.h +9 -0
  38. data/ext/nokogiri/xml_attribute_decl.c +70 -0
  39. data/ext/nokogiri/xml_attribute_decl.h +9 -0
  40. data/ext/nokogiri/xml_cdata.c +23 -19
  41. data/ext/nokogiri/xml_cdata.h +1 -1
  42. data/ext/nokogiri/xml_comment.c +69 -0
  43. data/ext/nokogiri/xml_comment.h +9 -0
  44. data/ext/nokogiri/xml_document.c +501 -54
  45. data/ext/nokogiri/xml_document.h +14 -1
  46. data/ext/nokogiri/xml_document_fragment.c +48 -0
  47. data/ext/nokogiri/xml_document_fragment.h +10 -0
  48. data/ext/nokogiri/xml_dtd.c +109 -24
  49. data/ext/nokogiri/xml_dtd.h +3 -1
  50. data/ext/nokogiri/xml_element_content.c +123 -0
  51. data/ext/nokogiri/xml_element_content.h +10 -0
  52. data/ext/nokogiri/xml_element_decl.c +69 -0
  53. data/ext/nokogiri/xml_element_decl.h +9 -0
  54. data/ext/nokogiri/xml_encoding_handler.c +79 -0
  55. data/ext/nokogiri/xml_encoding_handler.h +8 -0
  56. data/ext/nokogiri/xml_entity_decl.c +110 -0
  57. data/ext/nokogiri/xml_entity_decl.h +10 -0
  58. data/ext/nokogiri/xml_entity_reference.c +52 -0
  59. data/ext/nokogiri/xml_entity_reference.h +9 -0
  60. data/ext/nokogiri/xml_io.c +60 -0
  61. data/ext/nokogiri/xml_io.h +11 -0
  62. data/ext/nokogiri/xml_libxml2_hacks.c +112 -0
  63. data/ext/nokogiri/xml_libxml2_hacks.h +12 -0
  64. data/ext/nokogiri/xml_namespace.c +117 -0
  65. data/ext/nokogiri/xml_namespace.h +13 -0
  66. data/ext/nokogiri/xml_node.c +1285 -315
  67. data/ext/nokogiri/xml_node.h +4 -6
  68. data/ext/nokogiri/xml_node_set.c +415 -54
  69. data/ext/nokogiri/xml_node_set.h +6 -2
  70. data/ext/nokogiri/xml_processing_instruction.c +56 -0
  71. data/ext/nokogiri/xml_processing_instruction.h +9 -0
  72. data/ext/nokogiri/xml_reader.c +316 -77
  73. data/ext/nokogiri/xml_reader.h +1 -1
  74. data/ext/nokogiri/xml_relax_ng.c +161 -0
  75. data/ext/nokogiri/xml_relax_ng.h +9 -0
  76. data/ext/nokogiri/xml_sax_parser.c +215 -80
  77. data/ext/nokogiri/xml_sax_parser.h +30 -1
  78. data/ext/nokogiri/xml_sax_parser_context.c +262 -0
  79. data/ext/nokogiri/xml_sax_parser_context.h +10 -0
  80. data/ext/nokogiri/xml_sax_push_parser.c +115 -0
  81. data/ext/nokogiri/xml_sax_push_parser.h +9 -0
  82. data/ext/nokogiri/xml_schema.c +205 -0
  83. data/ext/nokogiri/xml_schema.h +9 -0
  84. data/ext/nokogiri/xml_syntax_error.c +45 -175
  85. data/ext/nokogiri/xml_syntax_error.h +4 -2
  86. data/ext/nokogiri/xml_text.c +37 -14
  87. data/ext/nokogiri/xml_text.h +1 -1
  88. data/ext/nokogiri/xml_xpath_context.c +230 -13
  89. data/ext/nokogiri/xml_xpath_context.h +2 -1
  90. data/ext/nokogiri/xslt_stylesheet.c +196 -34
  91. data/ext/nokogiri/xslt_stylesheet.h +6 -1
  92. data/lib/nokogiri/css/node.rb +18 -61
  93. data/lib/nokogiri/css/parser.rb +725 -17
  94. data/lib/nokogiri/css/parser.y +126 -63
  95. data/lib/nokogiri/css/parser_extras.rb +91 -0
  96. data/lib/nokogiri/css/syntax_error.rb +7 -0
  97. data/lib/nokogiri/css/tokenizer.rb +148 -5
  98. data/lib/nokogiri/css/tokenizer.rex +31 -39
  99. data/lib/nokogiri/css/xpath_visitor.rb +109 -51
  100. data/lib/nokogiri/css.rb +24 -3
  101. data/lib/nokogiri/decorators/slop.rb +42 -0
  102. data/lib/nokogiri/html/builder.rb +27 -1
  103. data/lib/nokogiri/html/document.rb +329 -3
  104. data/lib/nokogiri/html/document_fragment.rb +39 -0
  105. data/lib/nokogiri/html/element_description.rb +23 -0
  106. data/lib/nokogiri/html/element_description_defaults.rb +671 -0
  107. data/lib/nokogiri/html/entity_lookup.rb +13 -0
  108. data/lib/nokogiri/html/sax/parser.rb +35 -4
  109. data/lib/nokogiri/html/sax/parser_context.rb +16 -0
  110. data/lib/nokogiri/html/sax/push_parser.rb +36 -0
  111. data/lib/nokogiri/html.rb +18 -76
  112. data/lib/nokogiri/syntax_error.rb +4 -0
  113. data/lib/nokogiri/version.rb +106 -1
  114. data/lib/nokogiri/xml/attr.rb +14 -0
  115. data/lib/nokogiri/xml/attribute_decl.rb +18 -0
  116. data/lib/nokogiri/xml/builder.rb +395 -31
  117. data/lib/nokogiri/xml/cdata.rb +4 -2
  118. data/lib/nokogiri/xml/character_data.rb +7 -0
  119. data/lib/nokogiri/xml/document.rb +267 -12
  120. data/lib/nokogiri/xml/document_fragment.rb +149 -0
  121. data/lib/nokogiri/xml/dtd.rb +27 -1
  122. data/lib/nokogiri/xml/element_content.rb +36 -0
  123. data/lib/nokogiri/xml/element_decl.rb +13 -0
  124. data/lib/nokogiri/xml/entity_decl.rb +19 -0
  125. data/lib/nokogiri/xml/namespace.rb +13 -0
  126. data/lib/nokogiri/xml/node/save_options.rb +61 -0
  127. data/lib/nokogiri/xml/node.rb +748 -109
  128. data/lib/nokogiri/xml/node_set.rb +200 -72
  129. data/lib/nokogiri/xml/parse_options.rb +120 -0
  130. data/lib/nokogiri/xml/pp/character_data.rb +18 -0
  131. data/lib/nokogiri/xml/pp/node.rb +56 -0
  132. data/lib/nokogiri/xml/pp.rb +2 -0
  133. data/lib/nokogiri/xml/processing_instruction.rb +8 -0
  134. data/lib/nokogiri/xml/reader.rb +102 -4
  135. data/lib/nokogiri/xml/relax_ng.rb +32 -0
  136. data/lib/nokogiri/xml/sax/document.rb +114 -2
  137. data/lib/nokogiri/xml/sax/parser.rb +97 -7
  138. data/lib/nokogiri/xml/sax/parser_context.rb +16 -0
  139. data/lib/nokogiri/xml/sax/push_parser.rb +60 -0
  140. data/lib/nokogiri/xml/sax.rb +2 -7
  141. data/lib/nokogiri/xml/schema.rb +63 -0
  142. data/lib/nokogiri/xml/searchable.rb +221 -0
  143. data/lib/nokogiri/xml/syntax_error.rb +27 -1
  144. data/lib/nokogiri/xml/text.rb +4 -1
  145. data/lib/nokogiri/xml/xpath/syntax_error.rb +11 -0
  146. data/lib/nokogiri/xml/xpath.rb +4 -0
  147. data/lib/nokogiri/xml/xpath_context.rb +3 -1
  148. data/lib/nokogiri/xml.rb +45 -38
  149. data/lib/nokogiri/xslt/stylesheet.rb +19 -0
  150. data/lib/nokogiri/xslt.rb +47 -2
  151. data/lib/nokogiri.rb +117 -24
  152. data/lib/xsd/xmlparser/nokogiri.rb +102 -0
  153. data/patches/sort-patches-by-date +25 -0
  154. data/ports/archives/libxml2-2.9.4.tar.gz +0 -0
  155. data/ports/archives/libxslt-1.1.29.tar.gz +0 -0
  156. data/suppressions/README.txt +1 -0
  157. data/suppressions/nokogiri_ree-1.8.7.358.supp +61 -0
  158. data/suppressions/nokogiri_ruby-1.8.7.370.supp +0 -0
  159. data/suppressions/nokogiri_ruby-1.9.2.320.supp +28 -0
  160. data/suppressions/nokogiri_ruby-1.9.3.327.supp +28 -0
  161. data/tasks/test.rb +100 -0
  162. data/test/css/test_nthiness.rb +73 -6
  163. data/test/css/test_parser.rb +184 -39
  164. data/test/css/test_tokenizer.rb +72 -19
  165. data/test/css/test_xpath_visitor.rb +44 -2
  166. data/test/decorators/test_slop.rb +20 -0
  167. data/test/files/2ch.html +108 -0
  168. data/test/files/GH_1042.html +18 -0
  169. data/test/files/address_book.rlx +12 -0
  170. data/test/files/address_book.xml +10 -0
  171. data/test/files/atom.xml +344 -0
  172. data/test/files/bar/bar.xsd +4 -0
  173. data/test/files/bogus.xml +0 -0
  174. data/test/files/dont_hurt_em_why.xml +422 -0
  175. data/test/files/encoding.html +82 -0
  176. data/test/files/encoding.xhtml +84 -0
  177. data/test/files/exslt.xml +8 -0
  178. data/test/files/exslt.xslt +35 -0
  179. data/test/files/foo/foo.xsd +4 -0
  180. data/test/files/metacharset.html +10 -0
  181. data/test/files/namespace_pressure_test.xml +1684 -0
  182. data/test/files/noencoding.html +47 -0
  183. data/test/files/po.xml +32 -0
  184. data/test/files/po.xsd +66 -0
  185. data/test/files/saml/saml20assertion_schema.xsd +283 -0
  186. data/test/files/saml/saml20protocol_schema.xsd +302 -0
  187. data/test/files/saml/xenc_schema.xsd +146 -0
  188. data/test/files/saml/xmldsig_schema.xsd +318 -0
  189. data/test/files/shift_jis.html +10 -0
  190. data/test/files/shift_jis.xml +5 -0
  191. data/test/files/shift_jis_no_charset.html +9 -0
  192. data/test/files/slow-xpath.xml +25509 -0
  193. data/test/files/snuggles.xml +3 -0
  194. data/test/files/staff.dtd +10 -0
  195. data/test/files/test_document_url/bar.xml +2 -0
  196. data/test/files/test_document_url/document.dtd +4 -0
  197. data/test/files/test_document_url/document.xml +6 -0
  198. data/test/files/tlm.html +2 -1
  199. data/test/files/to_be_xincluded.xml +2 -0
  200. data/test/files/valid_bar.xml +2 -0
  201. data/test/files/xinclude.xml +4 -0
  202. data/test/helper.rb +124 -13
  203. data/test/html/sax/test_parser.rb +118 -4
  204. data/test/html/sax/test_parser_context.rb +46 -0
  205. data/test/html/sax/test_push_parser.rb +87 -0
  206. data/test/html/test_builder.rb +94 -8
  207. data/test/html/test_document.rb +626 -11
  208. data/test/html/test_document_encoding.rb +145 -0
  209. data/test/html/test_document_fragment.rb +301 -0
  210. data/test/html/test_element_description.rb +105 -0
  211. data/test/html/test_named_characters.rb +14 -0
  212. data/test/html/test_node.rb +212 -0
  213. data/test/html/test_node_encoding.rb +85 -0
  214. data/test/namespaces/test_additional_namespaces_in_builder_doc.rb +14 -0
  215. data/test/namespaces/test_namespaces_aliased_default.rb +24 -0
  216. data/test/namespaces/test_namespaces_in_builder_doc.rb +75 -0
  217. data/test/namespaces/test_namespaces_in_cloned_doc.rb +31 -0
  218. data/test/namespaces/test_namespaces_in_created_doc.rb +75 -0
  219. data/test/namespaces/test_namespaces_in_parsed_doc.rb +80 -0
  220. data/test/namespaces/test_namespaces_preservation.rb +31 -0
  221. data/test/test_convert_xpath.rb +2 -47
  222. data/test/test_css_cache.rb +45 -0
  223. data/test/test_encoding_handler.rb +48 -0
  224. data/test/test_memory_leak.rb +156 -0
  225. data/test/test_nokogiri.rb +103 -1
  226. data/test/test_soap4r_sax.rb +52 -0
  227. data/test/test_xslt_transforms.rb +293 -8
  228. data/test/xml/node/test_save_options.rb +28 -0
  229. data/test/xml/node/test_subclass.rb +44 -0
  230. data/test/xml/sax/test_parser.rb +309 -8
  231. data/test/xml/sax/test_parser_context.rb +115 -0
  232. data/test/xml/sax/test_push_parser.rb +157 -0
  233. data/test/xml/test_attr.rb +67 -0
  234. data/test/xml/test_attribute_decl.rb +86 -0
  235. data/test/xml/test_builder.rb +327 -2
  236. data/test/xml/test_c14n.rb +180 -0
  237. data/test/xml/test_cdata.rb +32 -2
  238. data/test/xml/test_comment.rb +40 -0
  239. data/test/xml/test_document.rb +846 -35
  240. data/test/xml/test_document_encoding.rb +31 -0
  241. data/test/xml/test_document_fragment.rb +271 -0
  242. data/test/xml/test_dtd.rb +153 -9
  243. data/test/xml/test_dtd_encoding.rb +31 -0
  244. data/test/xml/test_element_content.rb +56 -0
  245. data/test/xml/test_element_decl.rb +73 -0
  246. data/test/xml/test_entity_decl.rb +122 -0
  247. data/test/xml/test_entity_reference.rb +251 -0
  248. data/test/xml/test_namespace.rb +96 -0
  249. data/test/xml/test_node.rb +1126 -105
  250. data/test/xml/test_node_attributes.rb +115 -0
  251. data/test/xml/test_node_encoding.rb +69 -0
  252. data/test/xml/test_node_inheritance.rb +32 -0
  253. data/test/xml/test_node_reparenting.rb +549 -0
  254. data/test/xml/test_node_set.rb +668 -9
  255. data/test/xml/test_parse_options.rb +64 -0
  256. data/test/xml/test_processing_instruction.rb +30 -0
  257. data/test/xml/test_reader.rb +589 -0
  258. data/test/xml/test_reader_encoding.rb +134 -0
  259. data/test/xml/test_relax_ng.rb +60 -0
  260. data/test/xml/test_schema.rb +142 -0
  261. data/test/xml/test_syntax_error.rb +30 -0
  262. data/test/xml/test_text.rb +49 -2
  263. data/test/xml/test_unparented_node.rb +440 -0
  264. data/test/xml/test_xinclude.rb +83 -0
  265. data/test/xml/test_xpath.rb +445 -0
  266. data/test/xslt/test_custom_functions.rb +133 -0
  267. data/test/xslt/test_exception_handling.rb +37 -0
  268. data/test_all +107 -0
  269. metadata +459 -115
  270. data/History.txt +0 -6
  271. data/README.ja.txt +0 -86
  272. data/README.txt +0 -87
  273. data/ext/nokogiri/html_sax_parser.c +0 -32
  274. data/ext/nokogiri/html_sax_parser.h +0 -11
  275. data/ext/nokogiri/native.c +0 -40
  276. data/ext/nokogiri/native.h +0 -51
  277. data/ext/nokogiri/xml_xpath.c +0 -46
  278. data/ext/nokogiri/xml_xpath.h +0 -11
  279. data/lib/nokogiri/css/generated_parser.rb +0 -653
  280. data/lib/nokogiri/css/generated_tokenizer.rb +0 -159
  281. data/lib/nokogiri/decorators/hpricot/node.rb +0 -58
  282. data/lib/nokogiri/decorators/hpricot/node_set.rb +0 -14
  283. data/lib/nokogiri/decorators/hpricot/xpath_visitor.rb +0 -17
  284. data/lib/nokogiri/decorators/hpricot.rb +0 -3
  285. data/lib/nokogiri/decorators.rb +0 -1
  286. data/lib/nokogiri/hpricot.rb +0 -47
  287. data/lib/nokogiri/xml/after_handler.rb +0 -18
  288. data/lib/nokogiri/xml/before_handler.rb +0 -32
  289. data/lib/nokogiri/xml/element.rb +0 -6
  290. data/lib/nokogiri/xml/entity_declaration.rb +0 -9
  291. data/nokogiri.gemspec +0 -34
  292. data/test/hpricot/files/basic.xhtml +0 -17
  293. data/test/hpricot/files/boingboing.html +0 -2266
  294. data/test/hpricot/files/cy0.html +0 -3653
  295. data/test/hpricot/files/immob.html +0 -400
  296. data/test/hpricot/files/pace_application.html +0 -1320
  297. data/test/hpricot/files/tenderlove.html +0 -16
  298. data/test/hpricot/files/uswebgen.html +0 -220
  299. data/test/hpricot/files/utf8.html +0 -1054
  300. data/test/hpricot/files/week9.html +0 -1723
  301. data/test/hpricot/files/why.xml +0 -19
  302. data/test/hpricot/load_files.rb +0 -7
  303. data/test/hpricot/test_alter.rb +0 -67
  304. data/test/hpricot/test_builder.rb +0 -27
  305. data/test/hpricot/test_parser.rb +0 -423
  306. data/test/hpricot/test_paths.rb +0 -15
  307. data/test/hpricot/test_preserved.rb +0 -78
  308. data/test/hpricot/test_xml.rb +0 -30
  309. data/test/test_reader.rb +0 -222
@@ -0,0 +1,31 @@
1
+ require "helper"
2
+
3
+ module Nokogiri
4
+ module XML
5
+ class TestDocumentEncoding < Nokogiri::TestCase
6
+ def setup
7
+ super
8
+ @xml = Nokogiri::XML(File.read(SHIFT_JIS_XML), SHIFT_JIS_XML)
9
+ end
10
+
11
+ def test_url
12
+ assert_equal 'UTF-8', @xml.url.encoding.name
13
+ end
14
+
15
+ def test_encoding
16
+ assert_equal 'UTF-8', @xml.encoding.encoding.name
17
+ end
18
+
19
+ def test_dotted_version
20
+ if Nokogiri.uses_libxml?
21
+ assert_equal 'UTF-8', Nokogiri::LIBXML_VERSION.encoding.name
22
+ end
23
+ end
24
+
25
+ def test_empty_doc_encoding
26
+ encoding = 'US-ASCII'
27
+ assert_equal encoding, Nokogiri::XML(nil, nil, encoding).encoding
28
+ end
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,271 @@
1
+ require "helper"
2
+
3
+ module Nokogiri
4
+ module XML
5
+ class TestDocumentFragment < Nokogiri::TestCase
6
+ def setup
7
+ super
8
+ @xml = Nokogiri::XML.parse(File.read(XML_FILE), XML_FILE)
9
+ end
10
+
11
+ def test_replace_text_node
12
+ html = "foo"
13
+ doc = Nokogiri::XML::DocumentFragment.parse(html)
14
+ doc.children[0].replace "bar"
15
+ assert_equal 'bar', doc.children[0].content
16
+ end
17
+
18
+ def test_fragment_is_relative
19
+ doc = Nokogiri::XML('<root><a xmlns="blah" /></root>')
20
+ ctx = doc.root.child
21
+ fragment = Nokogiri::XML::DocumentFragment.new(doc, '<hello />', ctx)
22
+ hello = fragment.child
23
+
24
+ assert_equal 'hello', hello.name
25
+ assert_equal doc.root.child.namespace, hello.namespace
26
+ end
27
+
28
+ def test_node_fragment_is_relative
29
+ doc = Nokogiri::XML('<root><a xmlns="blah" /></root>')
30
+ assert doc.root.child
31
+ fragment = doc.root.child.fragment('<hello />')
32
+ hello = fragment.child
33
+
34
+ assert_equal 'hello', hello.name
35
+ assert_equal doc.root.child.namespace, hello.namespace
36
+ end
37
+
38
+ def test_new
39
+ assert Nokogiri::XML::DocumentFragment.new(@xml)
40
+ end
41
+
42
+ def test_fragment_should_have_document
43
+ fragment = Nokogiri::XML::DocumentFragment.new(@xml)
44
+ assert_equal @xml, fragment.document
45
+ end
46
+
47
+ def test_name
48
+ fragment = Nokogiri::XML::DocumentFragment.new(@xml)
49
+ assert_equal '#document-fragment', fragment.name
50
+ end
51
+
52
+ def test_static_method
53
+ fragment = Nokogiri::XML::DocumentFragment.parse("<div>a</div>")
54
+ assert_instance_of Nokogiri::XML::DocumentFragment, fragment
55
+ end
56
+
57
+ def test_static_method_with_namespaces
58
+ # follows different path in FragmentHandler#start_element which blew up after 597195ff
59
+ fragment = Nokogiri::XML::DocumentFragment.parse("<o:div>a</o:div>")
60
+ assert_instance_of Nokogiri::XML::DocumentFragment, fragment
61
+ end
62
+
63
+ def test_many_fragments
64
+ 100.times { Nokogiri::XML::DocumentFragment.new(@xml) }
65
+ end
66
+
67
+ def test_subclass
68
+ klass = Class.new(Nokogiri::XML::DocumentFragment)
69
+ fragment = klass.new(@xml, "<div>a</div>")
70
+ assert_instance_of klass, fragment
71
+ end
72
+
73
+ def test_subclass_parse
74
+ klass = Class.new(Nokogiri::XML::DocumentFragment)
75
+ doc = klass.parse("<div>a</div>")
76
+ assert_instance_of klass, doc
77
+ end
78
+
79
+ def test_unparented_text_node_parse
80
+ fragment = Nokogiri::XML::DocumentFragment.parse("foo")
81
+ fragment.children.after("<bar/>")
82
+ end
83
+
84
+ def test_xml_fragment
85
+ fragment = Nokogiri::XML.fragment("<div>a</div>")
86
+ assert_equal "<div>a</div>", fragment.to_s
87
+ end
88
+
89
+ def test_xml_fragment_has_multiple_toplevel_children
90
+ doc = "<div>b</div><div>e</div>"
91
+ fragment = Nokogiri::XML::Document.new.fragment(doc)
92
+ assert_equal "<div>b</div><div>e</div>", fragment.to_s
93
+ end
94
+
95
+ def test_xml_fragment_has_outer_text
96
+ # this test is descriptive, not prescriptive.
97
+ doc = "a<div>b</div>"
98
+ fragment = Nokogiri::XML::Document.new.fragment(doc)
99
+ assert_equal "a<div>b</div>", fragment.to_s
100
+
101
+ doc = "<div>b</div>c"
102
+ fragment = Nokogiri::XML::Document.new.fragment(doc)
103
+ assert_equal "<div>b</div>c", fragment.to_s
104
+ end
105
+
106
+ def test_xml_fragment_case_sensitivity
107
+ doc = "<crazyDiv>b</crazyDiv>"
108
+ fragment = Nokogiri::XML::Document.new.fragment(doc)
109
+ assert_equal "<crazyDiv>b</crazyDiv>", fragment.to_s
110
+ end
111
+
112
+ def test_xml_fragment_with_leading_whitespace
113
+ doc = " <div>b</div> "
114
+ fragment = Nokogiri::XML::Document.new.fragment(doc)
115
+ assert_equal " <div>b</div> ", fragment.to_s
116
+ end
117
+
118
+ def test_xml_fragment_with_leading_whitespace_and_newline
119
+ doc = " \n<div>b</div> "
120
+ fragment = Nokogiri::XML::Document.new.fragment(doc)
121
+ assert_equal " \n<div>b</div> ", fragment.to_s
122
+ end
123
+
124
+ def test_fragment_children_search
125
+ fragment = Nokogiri::XML::Document.new.fragment(
126
+ '<div><p id="content">hi</p></div>'
127
+ )
128
+ expected = fragment.children.xpath('.//p')
129
+ assert_equal 1, expected.length
130
+
131
+ css = fragment.children.css('p')
132
+ search_css = fragment.children.search('p')
133
+ search_xpath = fragment.children.search('.//p')
134
+ assert_equal expected, css
135
+ assert_equal expected, search_css
136
+ assert_equal expected, search_xpath
137
+ end
138
+
139
+ def test_fragment_search_three_ways
140
+ frag = Nokogiri::XML::Document.new.fragment '<p id="content">foo</p><p id="content">bar</p>'
141
+ expected = frag.xpath('./*[@id = "content"]')
142
+ assert_equal 2, expected.length
143
+
144
+ [
145
+ [:css, '#content'],
146
+ [:search, '#content'],
147
+ [:search, './*[@id = \'content\']'],
148
+ ].each do |method, query|
149
+ result = frag.send(method, query)
150
+ assert_equal(expected, result,
151
+ "fragment search with :#{method} using '#{query}' expected '#{expected}' got '#{result}'")
152
+ end
153
+ end
154
+
155
+ def test_fragment_search_with_multiple_queries
156
+ xml = '<thing>
157
+ <div class="title">important thing</div>
158
+ </thing>
159
+ <thing>
160
+ <div class="content">stuff</div>
161
+ </thing>
162
+ <thing>
163
+ <p class="blah">more stuff</div>
164
+ </thing>'
165
+ fragment = Nokogiri::XML.fragment(xml)
166
+ assert_kind_of Nokogiri::XML::DocumentFragment, fragment
167
+
168
+ assert_equal 3, fragment.xpath('.//div', './/p').length
169
+ assert_equal 3, fragment.css('.title', '.content', 'p').length
170
+ assert_equal 3, fragment.search('.//div', 'p.blah').length
171
+ end
172
+
173
+ def test_fragment_without_a_namespace_does_not_get_a_namespace
174
+ doc = Nokogiri::XML <<-EOX
175
+ <root xmlns="http://tenderlovemaking.com/" xmlns:foo="http://flavorjon.es/" xmlns:bar="http://google.com/">
176
+ <foo:existing></foo:existing>
177
+ </root>
178
+ EOX
179
+ frag = doc.fragment "<newnode></newnode>"
180
+ assert_nil frag.namespace
181
+ end
182
+
183
+ def test_fragment_namespace_resolves_against_document_root
184
+ doc = Nokogiri::XML <<-EOX
185
+ <root xmlns:foo="http://flavorjon.es/" xmlns:bar="http://google.com/">
186
+ <foo:existing></foo:existing>
187
+ </root>
188
+ EOX
189
+ ns = doc.root.namespace_definitions.detect { |x| x.prefix == "bar" }
190
+
191
+ frag = doc.fragment "<bar:newnode></bar:newnode>"
192
+ assert frag.children.first.namespace
193
+ assert_equal ns, frag.children.first.namespace
194
+ end
195
+
196
+ def test_fragment_invalid_namespace_is_silently_ignored
197
+ doc = Nokogiri::XML <<-EOX
198
+ <root xmlns:foo="http://flavorjon.es/" xmlns:bar="http://google.com/">
199
+ <foo:existing></foo:existing>
200
+ </root>
201
+ EOX
202
+ frag = doc.fragment "<baz:newnode></baz:newnode>"
203
+ assert_nil frag.children.first.namespace
204
+ end
205
+
206
+ def test_decorator_is_applied
207
+ x = Module.new do
208
+ def awesome!
209
+ end
210
+ end
211
+ util_decorate(@xml, x)
212
+ fragment = Nokogiri::XML::DocumentFragment.new(@xml, "<div>a</div><div>b</div>")
213
+
214
+ assert node_set = fragment.css('div')
215
+ assert node_set.respond_to?(:awesome!)
216
+ node_set.each do |node|
217
+ assert node.respond_to?(:awesome!), node.class
218
+ end
219
+ assert fragment.children.respond_to?(:awesome!), fragment.children.class
220
+ end
221
+
222
+ def test_add_node_to_doc_fragment_segfault
223
+ frag = Nokogiri::XML::DocumentFragment.new(@xml, '<p>hello world</p>')
224
+ Nokogiri::XML::Comment.new(frag,'moo')
225
+ end
226
+
227
+ def test_issue_1077_parsing_of_frozen_strings
228
+ input = <<-EOS
229
+ <?xml version="1.0" encoding="utf-8"?>
230
+ <library>
231
+ <book title="I like turtles"/>
232
+ </library>
233
+ EOS
234
+ input.freeze
235
+
236
+ Nokogiri::XML::DocumentFragment.parse(input) # assert_nothing_raised
237
+ end
238
+
239
+ if Nokogiri.uses_libxml?
240
+ def test_for_libxml_in_context_fragment_parsing_bug_workaround
241
+ 10.times do
242
+ begin
243
+ fragment = Nokogiri::XML.fragment("<div></div>")
244
+ parent = fragment.children.first
245
+ child = parent.parse("<h1></h1>").first
246
+ parent.add_child child
247
+ end
248
+ GC.start
249
+ end
250
+ end
251
+
252
+ def test_for_libxml_in_context_memory_badness_when_encountering_encoding_errors
253
+ # see issue #643 for background
254
+ # this test exists solely to raise an error during valgrind test runs.
255
+ html = <<-EOHTML
256
+ <html>
257
+ <head>
258
+ <meta http-equiv="Content-Type" content="text/html; charset=shizzle" />
259
+ </head>
260
+ <body>
261
+ <div>Foo</div>
262
+ </body>
263
+ </html>
264
+ EOHTML
265
+ doc = Nokogiri::HTML html
266
+ doc.at_css("div").replace("Bar")
267
+ end
268
+ end
269
+ end
270
+ end
271
+ end
data/test/xml/test_dtd.rb CHANGED
@@ -1,13 +1,163 @@
1
- require File.expand_path(File.join(File.dirname(__FILE__), '..', "helper"))
1
+ require "helper"
2
2
 
3
3
  module Nokogiri
4
- module HTML
4
+ module XML
5
5
  class TestDTD < Nokogiri::TestCase
6
6
  def setup
7
- @xml = Nokogiri::XML.parse(File.read(XML_FILE))
7
+ super
8
+ @xml = Nokogiri::XML(File.open(XML_FILE))
8
9
  assert @dtd = @xml.internal_subset
9
10
  end
10
11
 
12
+ def test_system_id
13
+ assert_equal 'staff.dtd', @dtd.system_id
14
+ end
15
+
16
+ def test_external_id
17
+ xml = Nokogiri::XML('<!DOCTYPE foo PUBLIC "bar" ""><foo />')
18
+ assert dtd = xml.internal_subset, 'no internal subset'
19
+ assert_equal 'bar', dtd.external_id
20
+ end
21
+
22
+ def test_html_dtd
23
+ {
24
+ 'MathML 2.0' => [
25
+ '<!DOCTYPE math PUBLIC "-//W3C//DTD MathML 2.0//EN" "http://www.w3.org/Math/DTD/mathml2/mathml2.dtd">',
26
+ false,
27
+ false,
28
+ ],
29
+ 'HTML 2.0' => [
30
+ '<!DOCTYPE html PUBLIC "-//IETF//DTD HTML 2.0//EN">',
31
+ true,
32
+ false,
33
+ ],
34
+ 'HTML 3.2' => [
35
+ '<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">',
36
+ true,
37
+ false,
38
+ ],
39
+ 'XHTML Basic 1.0' => [
40
+ '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML Basic 1.0//EN" "http://www.w3.org/TR/xhtml-basic/xhtml-basic10.dtd">',
41
+ true,
42
+ false,
43
+ ],
44
+ 'XHTML 1.0 Strict' => [
45
+ '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">',
46
+ true,
47
+ false,
48
+ ],
49
+ 'XHTML + MathML + SVG Profile (XHTML as the host language)' => [
50
+ '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1 plus MathML 2.0 plus SVG 1.1//EN" "http://www.w3.org/2002/04/xhtml-math-svg/xhtml-math-svg.dtd">',
51
+ true,
52
+ false,
53
+ ],
54
+ 'XHTML + MathML + SVG Profile (Using SVG as the host)' => [
55
+ '<!DOCTYPE svg:svg PUBLIC "-//W3C//DTD XHTML 1.1 plus MathML 2.0 plus SVG 1.1//EN" "http://www.w3.org/2002/04/xhtml-math-svg/xhtml-math-svg.dtd">',
56
+ false,
57
+ false,
58
+ ],
59
+ 'CHTML 1.0' => [
60
+ '<!DOCTYPE HTML PUBLIC "-//W3C//DTD Compact HTML 1.0 Draft//EN">',
61
+ true,
62
+ false,
63
+ ],
64
+ 'HTML 4.01 Strict' => [
65
+ '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">',
66
+ true,
67
+ false,
68
+ ],
69
+ 'HTML 4.01 Transitional' => [
70
+ '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">',
71
+ true,
72
+ false,
73
+ ],
74
+ 'HTML 4.01 Frameset' => [
75
+ '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Frameset//EN" "http://www.w3.org/TR/html4/frameset.dtd">',
76
+ true,
77
+ false,
78
+ ],
79
+ 'HTML 5' => [
80
+ '<!DOCTYPE html>',
81
+ true,
82
+ true,
83
+ ],
84
+ 'HTML 5 legacy compatible' => [
85
+ '<!DOCTYPE HTML SYSTEM "about:legacy-compat">',
86
+ true,
87
+ true,
88
+ ],
89
+ }.each { |name, (dtd_str, html_p, html5_p)|
90
+ doc = Nokogiri(dtd_str)
91
+ dtd = doc.internal_subset
92
+ assert_instance_of Nokogiri::XML::DTD, dtd, name
93
+ if html_p
94
+ assert_send [dtd, :html_dtd?], name
95
+ else
96
+ assert_not_send [dtd, :html_dtd?], name
97
+ end
98
+ if html5_p
99
+ assert_send [dtd, :html5_dtd?], name
100
+ else
101
+ assert_not_send [dtd, :html5_dtd?], name
102
+ end
103
+ }
104
+ end
105
+
106
+ def test_content
107
+ assert_raise NoMethodError do
108
+ @dtd.content
109
+ end
110
+ end
111
+
112
+ def test_empty_attributes
113
+ dtd = Nokogiri::HTML("<html></html>").internal_subset
114
+ assert_equal Hash.new, dtd.attributes
115
+ end
116
+
117
+ def test_attributes
118
+ assert_equal ['width'], @dtd.attributes.keys
119
+ assert_equal '0', @dtd.attributes['width'].default
120
+ end
121
+
122
+ def test_keys
123
+ assert_equal ['width'], @dtd.keys
124
+ end
125
+
126
+ def test_each
127
+ hash = {}
128
+ @dtd.each { |key, value| hash[key] = value }
129
+ assert_equal @dtd.attributes, hash
130
+ end
131
+
132
+ def test_namespace
133
+ assert_raise NoMethodError do
134
+ @dtd.namespace
135
+ end
136
+ end
137
+
138
+ def test_namespace_definitions
139
+ assert_raise NoMethodError do
140
+ @dtd.namespace_definitions
141
+ end
142
+ end
143
+
144
+ def test_line
145
+ assert_raise NoMethodError do
146
+ @dtd.line
147
+ end
148
+ end
149
+
150
+ def test_validate
151
+ if Nokogiri.uses_libxml?
152
+ list = @xml.internal_subset.validate @xml
153
+ assert_equal 44, list.length
154
+ else
155
+ xml = Nokogiri::XML(File.open(XML_FILE)) {|cfg| cfg.dtdvalid}
156
+ list = xml.internal_subset.validate xml
157
+ assert_equal 40, list.length
158
+ end
159
+ end
160
+
11
161
  def test_external_subsets
12
162
  assert subset = @xml.internal_subset
13
163
  assert_equal 'staff', subset.name
@@ -18,12 +168,6 @@ module Nokogiri
18
168
  assert_equal %w[ ent1 ent2 ent3 ent4 ent5 ].sort, entities.keys.sort
19
169
  end
20
170
 
21
- def test_attributes
22
- assert attributes = @dtd.attributes
23
- assert_equal %w[ width ], attributes.keys
24
- assert_equal 'width', attributes['width'].name
25
- end
26
-
27
171
  def test_elements
28
172
  assert elements = @dtd.elements
29
173
  assert_equal %w[ br ], elements.keys
@@ -0,0 +1,31 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ require "helper"
4
+
5
+ module Nokogiri
6
+ module XML
7
+ class TestDTDEncoding < Nokogiri::TestCase
8
+ def setup
9
+ super
10
+ @xml = Nokogiri::XML(File.read(XML_FILE), XML_FILE, 'UTF-8')
11
+ assert @dtd = @xml.internal_subset
12
+ end
13
+
14
+ def test_entities
15
+ @dtd.entities.each do |k,v|
16
+ assert_equal @xml.encoding, k.encoding.name
17
+ end
18
+ end
19
+
20
+ def test_notations
21
+ @dtd.notations.each do |k,notation|
22
+ assert_equal 'UTF-8', k.encoding.name
23
+ %w{ name public_id system_id }.each do |attribute|
24
+ v = notation.send(:"#{attribute}") || next
25
+ assert_equal 'UTF-8', v.encoding.name
26
+ end
27
+ end
28
+ end
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,56 @@
1
+ require "helper"
2
+
3
+ module Nokogiri
4
+ module XML
5
+ class TestElementContent < Nokogiri::TestCase
6
+ def setup
7
+ super
8
+ @xml = Nokogiri::XML(<<-eoxml)
9
+ <?xml version="1.0"?><?TEST-STYLE PIDATA?>
10
+ <!DOCTYPE staff SYSTEM "staff.dtd" [
11
+ <!ELEMENT br EMPTY>
12
+ <!ELEMENT div1 (head, (p | list | note)*, div2*)>
13
+ <!ELEMENT div2 (tender:love)>
14
+ ]>
15
+ <root/>
16
+ eoxml
17
+ @elements = @xml.internal_subset.children.find_all { |x|
18
+ x.type == 15
19
+ }
20
+ @tree = @elements[1].content
21
+ end
22
+
23
+ def test_allowed_content_not_defined
24
+ assert_nil @elements.first.content
25
+ end
26
+
27
+ def test_document
28
+ assert @tree
29
+ assert_equal @xml, @tree.document
30
+ end
31
+
32
+ def test_type
33
+ assert_equal ElementContent::SEQ, @tree.type
34
+ end
35
+
36
+ def test_children
37
+ assert_equal 2, @tree.children.length
38
+ end
39
+
40
+ def test_name
41
+ assert_nil @tree.name
42
+ assert_equal 'head', @tree.children.first.name
43
+ assert_equal 'p', @tree.children[1].children.first.children.first.name
44
+ end
45
+
46
+ def test_occur
47
+ assert_equal ElementContent::ONCE, @tree.occur
48
+ end
49
+
50
+ def test_prefix
51
+ assert_nil @tree.prefix
52
+ assert_equal 'tender', @elements[2].content.prefix
53
+ end
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,73 @@
1
+ require "helper"
2
+
3
+ module Nokogiri
4
+ module XML
5
+ class TestElementDecl < Nokogiri::TestCase
6
+ def setup
7
+ super
8
+ @xml = Nokogiri::XML(<<-eoxml)
9
+ <?xml version="1.0"?><?TEST-STYLE PIDATA?>
10
+ <!DOCTYPE staff SYSTEM "staff.dtd" [
11
+ <!ELEMENT br EMPTY>
12
+ <!ELEMENT div1 (head, (p | list | note)*, div2*)>
13
+ <!ELEMENT my:way EMPTY>
14
+ <!ATTLIST br width CDATA "0">
15
+ <!ATTLIST br height CDATA "0">
16
+ ]>
17
+ <root/>
18
+ eoxml
19
+ @elements = @xml.internal_subset.children.find_all { |x|
20
+ x.type == 15
21
+ }
22
+ end
23
+
24
+ def test_inspect
25
+ e = @elements.first
26
+ assert_equal(
27
+ "#<#{e.class.name}:#{sprintf("0x%x", e.object_id)} #{e.to_s.inspect}>",
28
+ e.inspect
29
+ )
30
+ end
31
+
32
+ def test_prefix
33
+ assert_nil @elements[1].prefix
34
+ assert_equal 'my', @elements[2].prefix
35
+ end
36
+
37
+ def test_line
38
+ assert_raise NoMethodError do
39
+ @elements.first.line
40
+ end
41
+ end
42
+
43
+ def test_namespace
44
+ assert_raise NoMethodError do
45
+ @elements.first.namespace
46
+ end
47
+ end
48
+
49
+ def test_namespace_definitions
50
+ assert_raise NoMethodError do
51
+ @elements.first.namespace_definitions
52
+ end
53
+ end
54
+
55
+ def test_element_type
56
+ assert_equal 1, @elements.first.element_type
57
+ end
58
+
59
+ def test_type
60
+ assert_equal 15, @elements.first.type
61
+ end
62
+
63
+ def test_class
64
+ assert_instance_of Nokogiri::XML::ElementDecl, @elements.first
65
+ end
66
+
67
+ def test_attributes
68
+ assert_equal 2, @elements.first.attribute_nodes.length
69
+ assert_equal 'width', @elements.first.attribute_nodes.first.name
70
+ end
71
+ end
72
+ end
73
+ end