nokogiri 1.8.5 → 1.13.9

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (353) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +3 -21
  3. data/LICENSE-DEPENDENCIES.md +1159 -868
  4. data/LICENSE.md +5 -28
  5. data/README.md +196 -90
  6. data/bin/nokogiri +63 -50
  7. data/dependencies.yml +13 -59
  8. data/ext/nokogiri/depend +38 -358
  9. data/ext/nokogiri/extconf.rb +765 -420
  10. data/ext/nokogiri/gumbo.c +584 -0
  11. data/ext/nokogiri/html4_document.c +166 -0
  12. data/ext/nokogiri/html4_element_description.c +294 -0
  13. data/ext/nokogiri/html4_entity_lookup.c +37 -0
  14. data/ext/nokogiri/html4_sax_parser_context.c +119 -0
  15. data/ext/nokogiri/html4_sax_push_parser.c +95 -0
  16. data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
  17. data/ext/nokogiri/nokogiri.c +228 -91
  18. data/ext/nokogiri/nokogiri.h +199 -88
  19. data/ext/nokogiri/test_global_handlers.c +40 -0
  20. data/ext/nokogiri/xml_attr.c +42 -37
  21. data/ext/nokogiri/xml_attribute_decl.c +21 -21
  22. data/ext/nokogiri/xml_cdata.c +14 -19
  23. data/ext/nokogiri/xml_comment.c +19 -26
  24. data/ext/nokogiri/xml_document.c +296 -217
  25. data/ext/nokogiri/xml_document_fragment.c +12 -16
  26. data/ext/nokogiri/xml_dtd.c +64 -58
  27. data/ext/nokogiri/xml_element_content.c +31 -26
  28. data/ext/nokogiri/xml_element_decl.c +25 -25
  29. data/ext/nokogiri/xml_encoding_handler.c +43 -18
  30. data/ext/nokogiri/xml_entity_decl.c +37 -35
  31. data/ext/nokogiri/xml_entity_reference.c +16 -18
  32. data/ext/nokogiri/xml_namespace.c +99 -54
  33. data/ext/nokogiri/xml_node.c +1107 -658
  34. data/ext/nokogiri/xml_node_set.c +178 -166
  35. data/ext/nokogiri/xml_processing_instruction.c +17 -19
  36. data/ext/nokogiri/xml_reader.c +277 -175
  37. data/ext/nokogiri/xml_relax_ng.c +52 -28
  38. data/ext/nokogiri/xml_sax_parser.c +112 -112
  39. data/ext/nokogiri/xml_sax_parser_context.c +112 -86
  40. data/ext/nokogiri/xml_sax_push_parser.c +36 -27
  41. data/ext/nokogiri/xml_schema.c +114 -35
  42. data/ext/nokogiri/xml_syntax_error.c +42 -21
  43. data/ext/nokogiri/xml_text.c +14 -18
  44. data/ext/nokogiri/xml_xpath_context.c +226 -115
  45. data/ext/nokogiri/xslt_stylesheet.c +265 -173
  46. data/gumbo-parser/CHANGES.md +63 -0
  47. data/gumbo-parser/Makefile +101 -0
  48. data/gumbo-parser/THANKS +27 -0
  49. data/gumbo-parser/src/Makefile +34 -0
  50. data/gumbo-parser/src/README.md +41 -0
  51. data/gumbo-parser/src/ascii.c +75 -0
  52. data/gumbo-parser/src/ascii.h +115 -0
  53. data/gumbo-parser/src/attribute.c +42 -0
  54. data/gumbo-parser/src/attribute.h +17 -0
  55. data/gumbo-parser/src/char_ref.c +22225 -0
  56. data/gumbo-parser/src/char_ref.h +29 -0
  57. data/gumbo-parser/src/char_ref.rl +2154 -0
  58. data/gumbo-parser/src/error.c +626 -0
  59. data/gumbo-parser/src/error.h +148 -0
  60. data/gumbo-parser/src/foreign_attrs.c +104 -0
  61. data/gumbo-parser/src/foreign_attrs.gperf +27 -0
  62. data/gumbo-parser/src/gumbo.h +943 -0
  63. data/gumbo-parser/src/insertion_mode.h +33 -0
  64. data/gumbo-parser/src/macros.h +91 -0
  65. data/gumbo-parser/src/parser.c +4875 -0
  66. data/gumbo-parser/src/parser.h +41 -0
  67. data/gumbo-parser/src/replacement.h +33 -0
  68. data/gumbo-parser/src/string_buffer.c +103 -0
  69. data/gumbo-parser/src/string_buffer.h +68 -0
  70. data/gumbo-parser/src/string_piece.c +48 -0
  71. data/gumbo-parser/src/svg_attrs.c +174 -0
  72. data/gumbo-parser/src/svg_attrs.gperf +77 -0
  73. data/gumbo-parser/src/svg_tags.c +137 -0
  74. data/gumbo-parser/src/svg_tags.gperf +55 -0
  75. data/gumbo-parser/src/tag.c +222 -0
  76. data/gumbo-parser/src/tag_lookup.c +382 -0
  77. data/gumbo-parser/src/tag_lookup.gperf +169 -0
  78. data/gumbo-parser/src/tag_lookup.h +13 -0
  79. data/gumbo-parser/src/token_buffer.c +79 -0
  80. data/gumbo-parser/src/token_buffer.h +71 -0
  81. data/gumbo-parser/src/token_type.h +17 -0
  82. data/gumbo-parser/src/tokenizer.c +3463 -0
  83. data/gumbo-parser/src/tokenizer.h +112 -0
  84. data/gumbo-parser/src/tokenizer_states.h +339 -0
  85. data/gumbo-parser/src/utf8.c +245 -0
  86. data/gumbo-parser/src/utf8.h +164 -0
  87. data/gumbo-parser/src/util.c +68 -0
  88. data/gumbo-parser/src/util.h +30 -0
  89. data/gumbo-parser/src/vector.c +111 -0
  90. data/gumbo-parser/src/vector.h +45 -0
  91. data/lib/nokogiri/class_resolver.rb +67 -0
  92. data/lib/nokogiri/css/node.rb +10 -8
  93. data/lib/nokogiri/css/parser.rb +397 -377
  94. data/lib/nokogiri/css/parser.y +250 -245
  95. data/lib/nokogiri/css/parser_extras.rb +54 -49
  96. data/lib/nokogiri/css/syntax_error.rb +3 -1
  97. data/lib/nokogiri/css/tokenizer.rb +107 -104
  98. data/lib/nokogiri/css/tokenizer.rex +3 -2
  99. data/lib/nokogiri/css/xpath_visitor.rb +218 -91
  100. data/lib/nokogiri/css.rb +50 -17
  101. data/lib/nokogiri/decorators/slop.rb +9 -7
  102. data/lib/nokogiri/extension.rb +31 -0
  103. data/lib/nokogiri/gumbo.rb +15 -0
  104. data/lib/nokogiri/html.rb +38 -27
  105. data/lib/nokogiri/{html → html4}/builder.rb +4 -2
  106. data/lib/nokogiri/{html → html4}/document.rb +103 -105
  107. data/lib/nokogiri/html4/document_fragment.rb +54 -0
  108. data/lib/nokogiri/{html → html4}/element_description.rb +3 -1
  109. data/lib/nokogiri/html4/element_description_defaults.rb +578 -0
  110. data/lib/nokogiri/{html → html4}/entity_lookup.rb +4 -2
  111. data/lib/nokogiri/{html → html4}/sax/parser.rb +17 -16
  112. data/lib/nokogiri/html4/sax/parser_context.rb +20 -0
  113. data/lib/nokogiri/{html → html4}/sax/push_parser.rb +12 -11
  114. data/lib/nokogiri/html4.rb +46 -0
  115. data/lib/nokogiri/html5/document.rb +91 -0
  116. data/lib/nokogiri/html5/document_fragment.rb +83 -0
  117. data/lib/nokogiri/html5/node.rb +100 -0
  118. data/lib/nokogiri/html5.rb +478 -0
  119. data/lib/nokogiri/jruby/dependencies.rb +21 -0
  120. data/lib/nokogiri/syntax_error.rb +2 -0
  121. data/lib/nokogiri/version/constant.rb +6 -0
  122. data/lib/nokogiri/version/info.rb +222 -0
  123. data/lib/nokogiri/version.rb +3 -108
  124. data/lib/nokogiri/xml/attr.rb +6 -3
  125. data/lib/nokogiri/xml/attribute_decl.rb +3 -1
  126. data/lib/nokogiri/xml/builder.rb +97 -53
  127. data/lib/nokogiri/xml/cdata.rb +3 -1
  128. data/lib/nokogiri/xml/character_data.rb +2 -0
  129. data/lib/nokogiri/xml/document.rb +224 -86
  130. data/lib/nokogiri/xml/document_fragment.rb +57 -44
  131. data/lib/nokogiri/xml/dtd.rb +4 -2
  132. data/lib/nokogiri/xml/element_content.rb +2 -0
  133. data/lib/nokogiri/xml/element_decl.rb +3 -1
  134. data/lib/nokogiri/xml/entity_decl.rb +4 -2
  135. data/lib/nokogiri/xml/entity_reference.rb +2 -0
  136. data/lib/nokogiri/xml/namespace.rb +3 -0
  137. data/lib/nokogiri/xml/node/save_options.rb +10 -5
  138. data/lib/nokogiri/xml/node.rb +895 -377
  139. data/lib/nokogiri/xml/node_set.rb +92 -65
  140. data/lib/nokogiri/xml/notation.rb +13 -0
  141. data/lib/nokogiri/xml/parse_options.rb +22 -8
  142. data/lib/nokogiri/xml/pp/character_data.rb +9 -6
  143. data/lib/nokogiri/xml/pp/node.rb +25 -26
  144. data/lib/nokogiri/xml/pp.rb +4 -2
  145. data/lib/nokogiri/xml/processing_instruction.rb +3 -1
  146. data/lib/nokogiri/xml/reader.rb +21 -28
  147. data/lib/nokogiri/xml/relax_ng.rb +8 -2
  148. data/lib/nokogiri/xml/sax/document.rb +45 -49
  149. data/lib/nokogiri/xml/sax/parser.rb +38 -34
  150. data/lib/nokogiri/xml/sax/parser_context.rb +8 -3
  151. data/lib/nokogiri/xml/sax/push_parser.rb +6 -5
  152. data/lib/nokogiri/xml/sax.rb +6 -4
  153. data/lib/nokogiri/xml/schema.rb +19 -9
  154. data/lib/nokogiri/xml/searchable.rb +112 -72
  155. data/lib/nokogiri/xml/syntax_error.rb +6 -4
  156. data/lib/nokogiri/xml/text.rb +2 -0
  157. data/lib/nokogiri/xml/xpath/syntax_error.rb +4 -2
  158. data/lib/nokogiri/xml/xpath.rb +15 -4
  159. data/lib/nokogiri/xml/xpath_context.rb +3 -3
  160. data/lib/nokogiri/xml.rb +38 -37
  161. data/lib/nokogiri/xslt/stylesheet.rb +3 -1
  162. data/lib/nokogiri/xslt.rb +29 -20
  163. data/lib/nokogiri.rb +49 -65
  164. data/lib/xsd/xmlparser/nokogiri.rb +26 -24
  165. data/patches/libxml2/0001-Remove-script-macro-support.patch +40 -0
  166. data/patches/libxml2/0002-Update-entities-to-remove-handling-of-ssi.patch +44 -0
  167. data/patches/libxml2/0003-libxml2.la-is-in-top_builddir.patch +25 -0
  168. data/patches/libxml2/0005-avoid-isnan-isinf.patch +81 -0
  169. data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
  170. data/patches/libxslt/0001-update-automake-files-for-arm64.patch +3037 -0
  171. data/ports/archives/libxml2-2.10.3.tar.xz +0 -0
  172. data/ports/archives/libxslt-1.1.37.tar.xz +0 -0
  173. metadata +211 -266
  174. data/.autotest +0 -22
  175. data/.cross_rubies +0 -8
  176. data/.editorconfig +0 -17
  177. data/.gemtest +0 -0
  178. data/.travis.yml +0 -63
  179. data/CHANGELOG.md +0 -1368
  180. data/CONTRIBUTING.md +0 -42
  181. data/C_CODING_STYLE.rdoc +0 -33
  182. data/Gemfile-libxml-ruby +0 -3
  183. data/Manifest.txt +0 -370
  184. data/ROADMAP.md +0 -111
  185. data/Rakefile +0 -348
  186. data/SECURITY.md +0 -19
  187. data/STANDARD_RESPONSES.md +0 -47
  188. data/Y_U_NO_GEMSPEC.md +0 -155
  189. data/appveyor.yml +0 -29
  190. data/build_all +0 -44
  191. data/ext/nokogiri/html_document.c +0 -170
  192. data/ext/nokogiri/html_document.h +0 -10
  193. data/ext/nokogiri/html_element_description.c +0 -279
  194. data/ext/nokogiri/html_element_description.h +0 -10
  195. data/ext/nokogiri/html_entity_lookup.c +0 -32
  196. data/ext/nokogiri/html_entity_lookup.h +0 -8
  197. data/ext/nokogiri/html_sax_parser_context.c +0 -116
  198. data/ext/nokogiri/html_sax_parser_context.h +0 -11
  199. data/ext/nokogiri/html_sax_push_parser.c +0 -87
  200. data/ext/nokogiri/html_sax_push_parser.h +0 -9
  201. data/ext/nokogiri/xml_attr.h +0 -9
  202. data/ext/nokogiri/xml_attribute_decl.h +0 -9
  203. data/ext/nokogiri/xml_cdata.h +0 -9
  204. data/ext/nokogiri/xml_comment.h +0 -9
  205. data/ext/nokogiri/xml_document.h +0 -23
  206. data/ext/nokogiri/xml_document_fragment.h +0 -10
  207. data/ext/nokogiri/xml_dtd.h +0 -10
  208. data/ext/nokogiri/xml_element_content.h +0 -10
  209. data/ext/nokogiri/xml_element_decl.h +0 -9
  210. data/ext/nokogiri/xml_encoding_handler.h +0 -8
  211. data/ext/nokogiri/xml_entity_decl.h +0 -10
  212. data/ext/nokogiri/xml_entity_reference.h +0 -9
  213. data/ext/nokogiri/xml_io.c +0 -61
  214. data/ext/nokogiri/xml_io.h +0 -11
  215. data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
  216. data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
  217. data/ext/nokogiri/xml_namespace.h +0 -15
  218. data/ext/nokogiri/xml_node.h +0 -13
  219. data/ext/nokogiri/xml_node_set.h +0 -12
  220. data/ext/nokogiri/xml_processing_instruction.h +0 -9
  221. data/ext/nokogiri/xml_reader.h +0 -10
  222. data/ext/nokogiri/xml_relax_ng.h +0 -9
  223. data/ext/nokogiri/xml_sax_parser.h +0 -39
  224. data/ext/nokogiri/xml_sax_parser_context.h +0 -10
  225. data/ext/nokogiri/xml_sax_push_parser.h +0 -9
  226. data/ext/nokogiri/xml_schema.h +0 -9
  227. data/ext/nokogiri/xml_syntax_error.h +0 -13
  228. data/ext/nokogiri/xml_text.h +0 -9
  229. data/ext/nokogiri/xml_xpath_context.h +0 -10
  230. data/ext/nokogiri/xslt_stylesheet.h +0 -14
  231. data/lib/nokogiri/html/document_fragment.rb +0 -49
  232. data/lib/nokogiri/html/element_description_defaults.rb +0 -671
  233. data/lib/nokogiri/html/sax/parser_context.rb +0 -16
  234. data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +0 -78
  235. data/patches/libxml2/0002-Fix-nullptr-deref-with-XPath-logic-ops.patch +0 -54
  236. data/patches/libxml2/0003-Fix-infinite-loop-in-LZMA-decompression.patch +0 -50
  237. data/patches/sort-patches-by-date +0 -25
  238. data/ports/archives/libxml2-2.9.8.tar.gz +0 -0
  239. data/ports/archives/libxslt-1.1.32.tar.gz +0 -0
  240. data/suppressions/README.txt +0 -1
  241. data/suppressions/nokogiri_ruby-2.supp +0 -10
  242. data/tasks/test.rb +0 -100
  243. data/test/css/test_nthiness.rb +0 -226
  244. data/test/css/test_parser.rb +0 -386
  245. data/test/css/test_tokenizer.rb +0 -215
  246. data/test/css/test_xpath_visitor.rb +0 -96
  247. data/test/decorators/test_slop.rb +0 -23
  248. data/test/files/2ch.html +0 -108
  249. data/test/files/GH_1042.html +0 -18
  250. data/test/files/address_book.rlx +0 -12
  251. data/test/files/address_book.xml +0 -10
  252. data/test/files/atom.xml +0 -344
  253. data/test/files/bar/bar.xsd +0 -4
  254. data/test/files/bogus.xml +0 -0
  255. data/test/files/dont_hurt_em_why.xml +0 -422
  256. data/test/files/encoding.html +0 -82
  257. data/test/files/encoding.xhtml +0 -84
  258. data/test/files/exslt.xml +0 -8
  259. data/test/files/exslt.xslt +0 -35
  260. data/test/files/foo/foo.xsd +0 -4
  261. data/test/files/metacharset.html +0 -10
  262. data/test/files/namespace_pressure_test.xml +0 -1684
  263. data/test/files/noencoding.html +0 -47
  264. data/test/files/po.xml +0 -32
  265. data/test/files/po.xsd +0 -66
  266. data/test/files/saml/saml20assertion_schema.xsd +0 -283
  267. data/test/files/saml/saml20protocol_schema.xsd +0 -302
  268. data/test/files/saml/xenc_schema.xsd +0 -146
  269. data/test/files/saml/xmldsig_schema.xsd +0 -318
  270. data/test/files/shift_jis.html +0 -10
  271. data/test/files/shift_jis.xml +0 -5
  272. data/test/files/shift_jis_no_charset.html +0 -9
  273. data/test/files/slow-xpath.xml +0 -25509
  274. data/test/files/snuggles.xml +0 -3
  275. data/test/files/staff.dtd +0 -10
  276. data/test/files/staff.xml +0 -59
  277. data/test/files/staff.xslt +0 -32
  278. data/test/files/test_document_url/bar.xml +0 -2
  279. data/test/files/test_document_url/document.dtd +0 -4
  280. data/test/files/test_document_url/document.xml +0 -6
  281. data/test/files/tlm.html +0 -851
  282. data/test/files/to_be_xincluded.xml +0 -2
  283. data/test/files/valid_bar.xml +0 -2
  284. data/test/files/xinclude.xml +0 -4
  285. data/test/helper.rb +0 -271
  286. data/test/html/sax/test_parser.rb +0 -168
  287. data/test/html/sax/test_parser_context.rb +0 -46
  288. data/test/html/sax/test_parser_text.rb +0 -163
  289. data/test/html/sax/test_push_parser.rb +0 -87
  290. data/test/html/test_attributes.rb +0 -85
  291. data/test/html/test_builder.rb +0 -164
  292. data/test/html/test_document.rb +0 -712
  293. data/test/html/test_document_encoding.rb +0 -143
  294. data/test/html/test_document_fragment.rb +0 -310
  295. data/test/html/test_element_description.rb +0 -105
  296. data/test/html/test_named_characters.rb +0 -14
  297. data/test/html/test_node.rb +0 -212
  298. data/test/html/test_node_encoding.rb +0 -91
  299. data/test/namespaces/test_additional_namespaces_in_builder_doc.rb +0 -14
  300. data/test/namespaces/test_namespaces_aliased_default.rb +0 -24
  301. data/test/namespaces/test_namespaces_in_builder_doc.rb +0 -75
  302. data/test/namespaces/test_namespaces_in_cloned_doc.rb +0 -31
  303. data/test/namespaces/test_namespaces_in_created_doc.rb +0 -75
  304. data/test/namespaces/test_namespaces_in_parsed_doc.rb +0 -80
  305. data/test/namespaces/test_namespaces_preservation.rb +0 -31
  306. data/test/test_convert_xpath.rb +0 -135
  307. data/test/test_css_cache.rb +0 -47
  308. data/test/test_encoding_handler.rb +0 -48
  309. data/test/test_memory_leak.rb +0 -156
  310. data/test/test_nokogiri.rb +0 -138
  311. data/test/test_soap4r_sax.rb +0 -52
  312. data/test/test_xslt_transforms.rb +0 -314
  313. data/test/xml/node/test_save_options.rb +0 -28
  314. data/test/xml/node/test_subclass.rb +0 -44
  315. data/test/xml/sax/test_parser.rb +0 -402
  316. data/test/xml/sax/test_parser_context.rb +0 -115
  317. data/test/xml/sax/test_parser_text.rb +0 -202
  318. data/test/xml/sax/test_push_parser.rb +0 -265
  319. data/test/xml/test_attr.rb +0 -74
  320. data/test/xml/test_attribute_decl.rb +0 -86
  321. data/test/xml/test_builder.rb +0 -341
  322. data/test/xml/test_c14n.rb +0 -180
  323. data/test/xml/test_cdata.rb +0 -54
  324. data/test/xml/test_comment.rb +0 -40
  325. data/test/xml/test_document.rb +0 -982
  326. data/test/xml/test_document_encoding.rb +0 -31
  327. data/test/xml/test_document_fragment.rb +0 -298
  328. data/test/xml/test_dtd.rb +0 -187
  329. data/test/xml/test_dtd_encoding.rb +0 -31
  330. data/test/xml/test_element_content.rb +0 -56
  331. data/test/xml/test_element_decl.rb +0 -73
  332. data/test/xml/test_entity_decl.rb +0 -122
  333. data/test/xml/test_entity_reference.rb +0 -262
  334. data/test/xml/test_namespace.rb +0 -96
  335. data/test/xml/test_node.rb +0 -1325
  336. data/test/xml/test_node_attributes.rb +0 -115
  337. data/test/xml/test_node_encoding.rb +0 -75
  338. data/test/xml/test_node_inheritance.rb +0 -32
  339. data/test/xml/test_node_reparenting.rb +0 -592
  340. data/test/xml/test_node_set.rb +0 -809
  341. data/test/xml/test_parse_options.rb +0 -64
  342. data/test/xml/test_processing_instruction.rb +0 -30
  343. data/test/xml/test_reader.rb +0 -620
  344. data/test/xml/test_reader_encoding.rb +0 -134
  345. data/test/xml/test_relax_ng.rb +0 -60
  346. data/test/xml/test_schema.rb +0 -142
  347. data/test/xml/test_syntax_error.rb +0 -36
  348. data/test/xml/test_text.rb +0 -60
  349. data/test/xml/test_unparented_node.rb +0 -483
  350. data/test/xml/test_xinclude.rb +0 -83
  351. data/test/xml/test_xpath.rb +0 -470
  352. data/test/xslt/test_custom_functions.rb +0 -133
  353. data/test/xslt/test_exception_handling.rb +0 -37
@@ -1,402 +0,0 @@
1
- # -*- coding: utf-8 -*-
2
-
3
- require "helper"
4
-
5
- module Nokogiri
6
- module XML
7
- module SAX
8
- class TestParser < Nokogiri::SAX::TestCase
9
- def setup
10
- super
11
- @parser = XML::SAX::Parser.new(Doc.new)
12
- end
13
-
14
- def test_parser_context_yielded_io
15
- doc = Doc.new
16
- parser = XML::SAX::Parser.new doc
17
- xml = "<foo a='&amp;b'/>"
18
-
19
- block_called = false
20
- parser.parse(StringIO.new(xml)) { |ctx|
21
- block_called = true
22
- ctx.replace_entities = true
23
- }
24
-
25
- assert block_called
26
-
27
- assert_equal [['foo', [['a', '&b']]]], doc.start_elements
28
- end
29
-
30
- def test_parser_context_yielded_in_memory
31
- doc = Doc.new
32
- parser = XML::SAX::Parser.new doc
33
- xml = "<foo a='&amp;b'/>"
34
-
35
- block_called = false
36
- parser.parse(xml) { |ctx|
37
- block_called = true
38
- ctx.replace_entities = true
39
- }
40
-
41
- assert block_called
42
-
43
- assert_equal [['foo', [['a', '&b']]]], doc.start_elements
44
- end
45
-
46
- def test_empty_decl
47
- parser = XML::SAX::Parser.new(Doc.new)
48
-
49
- xml = "<root />"
50
- parser.parse xml
51
- assert parser.document.start_document_called, xml
52
- assert_nil parser.document.xmldecls, xml
53
- end
54
-
55
- def test_xml_decl
56
- [
57
- ['<?xml version="1.0" ?>',
58
- ['1.0']],
59
- ['<?xml version="1.0" encoding="UTF-8" ?>',
60
- ['1.0', 'UTF-8']],
61
- ['<?xml version="1.0" standalone="yes"?>',
62
- ['1.0', 'yes']],
63
- ['<?xml version="1.0" standalone="no"?>',
64
- ['1.0', 'no']],
65
- ['<?xml version="1.0" encoding="UTF-8" standalone="no"?>',
66
- ['1.0', "UTF-8", 'no']],
67
- ['<?xml version="1.0" encoding="ISO-8859-1" standalone="yes"?>',
68
- ['1.0', "ISO-8859-1", 'yes']]
69
- ].each do |decl, value|
70
- parser = XML::SAX::Parser.new(Doc.new)
71
-
72
- xml = "#{decl}\n<root />"
73
- parser.parse xml
74
- assert parser.document.start_document_called, xml
75
- assert_equal value, parser.document.xmldecls, xml
76
- end
77
- end
78
-
79
- def test_parse_empty
80
- assert_raises RuntimeError do
81
- @parser.parse('')
82
- end
83
- end
84
-
85
- def test_namespace_declaration_order_is_saved
86
- @parser.parse <<-eoxml
87
- <root xmlns:foo='http://foo.example.com/' xmlns='http://example.com/'>
88
- <a foo:bar='hello' />
89
- </root>
90
- eoxml
91
- assert_equal 2, @parser.document.start_elements_namespace.length
92
- el = @parser.document.start_elements_namespace.first
93
- namespaces = el.last
94
- assert_equal ['foo', 'http://foo.example.com/'], namespaces.first
95
- assert_equal [nil, 'http://example.com/'], namespaces.last
96
- end
97
-
98
- def test_bad_document_calls_error_handler
99
- @parser.parse('<foo><bar></foo>')
100
- assert @parser.document.errors
101
- assert @parser.document.errors.length > 0
102
- end
103
-
104
- def test_namespace_are_super_fun_to_parse
105
- @parser.parse <<-eoxml
106
- <root xmlns:foo='http://foo.example.com/'>
107
- <a foo:bar='hello' />
108
- <b xmlns:foo='http://bar.example.com/'>
109
- <a foo:bar='hello' />
110
- </b>
111
- <foo:bar>hello world</foo:bar>
112
- </root>
113
- eoxml
114
-
115
- assert @parser.document.start_elements_namespace.length > 0
116
- el = @parser.document.start_elements_namespace[1]
117
- assert_equal 'a', el.first
118
- assert_equal 1, el[1].length
119
-
120
- attribute = el[1].first
121
- assert_equal 'bar', attribute.localname
122
- assert_equal 'foo', attribute.prefix
123
- assert_equal 'hello', attribute.value
124
- assert_equal 'http://foo.example.com/', attribute.uri
125
- end
126
-
127
- def test_sax_v1_namespace_attribute_declarations
128
- @parser.parse <<-eoxml
129
- <root xmlns:foo='http://foo.example.com/' xmlns='http://example.com/'>
130
- <a foo:bar='hello' />
131
- <b xmlns:foo='http://bar.example.com/'>
132
- <a foo:bar='hello' />
133
- </b>
134
- <foo:bar>hello world</foo:bar>
135
- </root>
136
- eoxml
137
- assert @parser.document.start_elements.length > 0
138
- elm = @parser.document.start_elements.first
139
- assert_equal 'root', elm.first
140
- assert elm[1].include?(['xmlns:foo', 'http://foo.example.com/'])
141
- assert elm[1].include?(['xmlns', 'http://example.com/'])
142
- end
143
-
144
- def test_sax_v1_namespace_nodes
145
- @parser.parse <<-eoxml
146
- <root xmlns:foo='http://foo.example.com/' xmlns='http://example.com/'>
147
- <a foo:bar='hello' />
148
- <b xmlns:foo='http://bar.example.com/'>
149
- <a foo:bar='hello' />
150
- </b>
151
- <foo:bar>hello world</foo:bar>
152
- </root>
153
- eoxml
154
- assert_equal 5, @parser.document.start_elements.length
155
- assert @parser.document.start_elements.map(&:first).include?('foo:bar')
156
- assert @parser.document.end_elements.map(&:first).include?('foo:bar')
157
- end
158
-
159
- def test_start_is_called_without_namespace
160
- @parser.parse(<<-eoxml)
161
- <root xmlns:foo='http://foo.example.com/' xmlns='http://example.com/'>
162
- <foo:f><bar></foo:f>
163
- </root>
164
- eoxml
165
- assert_equal ['root', 'foo:f', 'bar'],
166
- @parser.document.start_elements.map(&:first)
167
- end
168
-
169
- def test_parser_sets_encoding
170
- parser = XML::SAX::Parser.new(Doc.new, 'UTF-8')
171
- assert_equal 'UTF-8', parser.encoding
172
- end
173
-
174
- def test_errors_set_after_parsing_bad_dom
175
- doc = Nokogiri::XML('<foo><bar></foo>')
176
- assert doc.errors
177
-
178
- @parser.parse('<foo><bar></foo>')
179
- assert @parser.document.errors
180
- assert @parser.document.errors.length > 0
181
-
182
- doc.errors.each do |error|
183
- assert_equal 'UTF-8', error.message.encoding.name
184
- end
185
-
186
- # when using JRuby Nokogiri, more errors will be generated as the DOM
187
- # parser continue to parse an ill formed document, while the sax parser
188
- # will stop at the first error
189
- unless Nokogiri.jruby?
190
- assert_equal doc.errors.length, @parser.document.errors.length
191
- end
192
- end
193
-
194
- def test_parse_with_memory_argument
195
- @parser.parse(File.read(XML_FILE))
196
- assert(@parser.document.cdata_blocks.length > 0)
197
- end
198
-
199
- def test_parse_with_io_argument
200
- File.open(XML_FILE, 'rb') { |f|
201
- @parser.parse(f)
202
- }
203
- assert(@parser.document.cdata_blocks.length > 0)
204
- end
205
-
206
- def test_parse_io
207
- call_parse_io_with_encoding 'UTF-8'
208
- end
209
-
210
- # issue #828
211
- def test_parse_io_lower_case_encoding
212
- call_parse_io_with_encoding 'utf-8'
213
- end
214
-
215
- def call_parse_io_with_encoding encoding
216
- File.open(XML_FILE, 'rb') { |f|
217
- @parser.parse_io(f, encoding)
218
- }
219
- assert(@parser.document.cdata_blocks.length > 0)
220
-
221
- called = false
222
- @parser.document.start_elements.flatten.each do |thing|
223
- assert_equal 'UTF-8', thing.encoding.name
224
- called = true
225
- end
226
- assert called
227
-
228
- called = false
229
- @parser.document.end_elements.flatten.each do |thing|
230
- assert_equal 'UTF-8', thing.encoding.name
231
- called = true
232
- end
233
- assert called
234
-
235
- called = false
236
- @parser.document.data.each do |thing|
237
- assert_equal 'UTF-8', thing.encoding.name
238
- called = true
239
- end
240
- assert called
241
-
242
- called = false
243
- @parser.document.comments.flatten.each do |thing|
244
- assert_equal 'UTF-8', thing.encoding.name
245
- called = true
246
- end
247
- assert called
248
-
249
- called = false
250
- @parser.document.cdata_blocks.flatten.each do |thing|
251
- assert_equal 'UTF-8', thing.encoding.name
252
- called = true
253
- end
254
- assert called
255
- end
256
-
257
- def test_parse_file
258
- @parser.parse_file(XML_FILE)
259
-
260
- assert_raises(ArgumentError) {
261
- @parser.parse_file(nil)
262
- }
263
-
264
- assert_raises(Errno::ENOENT) {
265
- @parser.parse_file('')
266
- }
267
- assert_raises(Errno::EISDIR) {
268
- @parser.parse_file(File.expand_path(File.dirname(__FILE__)))
269
- }
270
- end
271
-
272
- def test_render_parse_nil_param
273
- assert_raises(ArgumentError) { @parser.parse_memory(nil) }
274
- end
275
-
276
- def test_bad_encoding_args
277
- assert_raises(ArgumentError) { XML::SAX::Parser.new(Doc.new, 'not an encoding') }
278
- assert_raises(ArgumentError) { @parser.parse_io(StringIO.new('<root/>'), 'not an encoding')}
279
- end
280
-
281
- def test_ctag
282
- @parser.parse_memory(<<-eoxml)
283
- <p id="asdfasdf">
284
- <![CDATA[ This is a comment ]]>
285
- Paragraph 1
286
- </p>
287
- eoxml
288
- assert_equal [' This is a comment '], @parser.document.cdata_blocks
289
- end
290
-
291
- def test_comment
292
- @parser.parse_memory(<<-eoxml)
293
- <p id="asdfasdf">
294
- <!-- This is a comment -->
295
- Paragraph 1
296
- </p>
297
- eoxml
298
- assert_equal [' This is a comment '], @parser.document.comments
299
- end
300
-
301
- def test_characters
302
- @parser.parse_memory(<<-eoxml)
303
- <p id="asdfasdf">Paragraph 1</p>
304
- eoxml
305
- assert_equal ['Paragraph 1'], @parser.document.data
306
- end
307
-
308
- def test_end_document
309
- @parser.parse_memory(<<-eoxml)
310
- <p id="asdfasdf">Paragraph 1</p>
311
- eoxml
312
- assert @parser.document.end_document_called
313
- end
314
-
315
- def test_end_element
316
- @parser.parse_memory(<<-eoxml)
317
- <p id="asdfasdf">Paragraph 1</p>
318
- eoxml
319
- assert_equal [["p"]],
320
- @parser.document.end_elements
321
- end
322
-
323
- def test_start_element_attrs
324
- @parser.parse_memory(<<-eoxml)
325
- <p id="asdfasdf">Paragraph 1</p>
326
- eoxml
327
- assert_equal [["p", [["id", "asdfasdf"]]]],
328
- @parser.document.start_elements
329
- end
330
-
331
- def test_start_element_attrs_include_namespaces
332
- @parser.parse_memory(<<-eoxml)
333
- <p xmlns:foo='http://foo.example.com/'>Paragraph 1</p>
334
- eoxml
335
- assert_equal [["p", [['xmlns:foo', 'http://foo.example.com/']]]],
336
- @parser.document.start_elements
337
- end
338
-
339
- def test_processing_instruction
340
- @parser.parse_memory(<<-eoxml)
341
- <?xml-stylesheet href="a.xsl" type="text/xsl"?>
342
- <?xml version="1.0"?>
343
- eoxml
344
- assert_equal [['xml-stylesheet', 'href="a.xsl" type="text/xsl"']],
345
- @parser.document.processing_instructions
346
- end
347
-
348
- if Nokogiri.uses_libxml? # JRuby SAXParser only parses well-formed XML documents
349
- def test_parse_document
350
- @parser.parse_memory(<<-eoxml)
351
- <p>Paragraph 1</p>
352
- <p>Paragraph 2</p>
353
- eoxml
354
- end
355
- end
356
-
357
- def test_parser_attributes
358
- xml = <<-eoxml
359
- <?xml version="1.0" ?><root><foo a="&amp;b" c="&gt;d" /></root>
360
- eoxml
361
-
362
- block_called = false
363
- @parser.parse(xml) { |ctx|
364
- block_called = true
365
- ctx.replace_entities = true
366
- }
367
-
368
- assert block_called
369
-
370
- assert_equal [['root', []], ['foo', [['a', '&b'], ['c', '>d']]]], @parser.document.start_elements
371
- end
372
-
373
- def test_recovery_from_incorrect_xml
374
- xml = <<-eoxml
375
- <?xml version="1.0" ?><Root><Data><?xml version='1.0'?><Item>hey</Item></Data><Data><Item>hey yourself</Item></Data></Root>
376
- eoxml
377
-
378
- block_called = false
379
- @parser.parse(xml) { |ctx|
380
- block_called = true
381
- ctx.recovery = true
382
- }
383
-
384
- assert block_called
385
-
386
- assert_equal [['Root', []], ['Data', []], ['Item', []], ['Data', []], ['Item', []]], @parser.document.start_elements
387
- end
388
-
389
- def test_square_bracket_in_text # issue 1261
390
- xml = <<-eoxml
391
- <tu tuid="87dea04cf60af103ff09d1dba36ae820" segtype="block">
392
- <prop type="x-smartling-string-variant">en:#:home_page:#:stories:#:[6]:#:name</prop>
393
- <tuv xml:lang="en-US"><seg>Sandy S.</seg></tuv>
394
- </tu>
395
- eoxml
396
- @parser.parse(xml)
397
- assert @parser.document.data.must_include "en:#:home_page:#:stories:#:[6]:#:name"
398
- end
399
- end
400
- end
401
- end
402
- end
@@ -1,115 +0,0 @@
1
- # -*- coding: utf-8 -*-
2
-
3
- require "helper"
4
-
5
- module Nokogiri
6
- module XML
7
- module SAX
8
- class TestParserContext < Nokogiri::SAX::TestCase
9
- def setup
10
- @xml = '<hello>
11
-
12
- world
13
- <inter>
14
- <net>
15
- </net>
16
- </inter>
17
-
18
- </hello>'
19
- end
20
-
21
- class Counter < Nokogiri::XML::SAX::Document
22
- attr_accessor :context, :lines, :columns
23
- def initialize
24
- @context = nil
25
- @lines = []
26
- @columns = []
27
- end
28
-
29
- def start_element name, attrs = []
30
- @lines << [name, context.line]
31
- @columns << [name, context.column]
32
- end
33
- end
34
-
35
- def test_line_numbers
36
- sax_handler = Counter.new
37
-
38
- parser = Nokogiri::XML::SAX::Parser.new(sax_handler)
39
- parser.parse(@xml) do |ctx|
40
- sax_handler.context = ctx
41
- end
42
-
43
- assert_equal [["hello", 1], ["inter", 4], ["net", 5]],
44
- sax_handler.lines
45
- end
46
-
47
- def test_column_numbers
48
- sax_handler = Counter.new
49
-
50
- parser = Nokogiri::XML::SAX::Parser.new(sax_handler)
51
- parser.parse(@xml) do |ctx|
52
- sax_handler.context = ctx
53
- end
54
-
55
- assert_equal [["hello", 7], ["inter", 7], ["net", 9]],
56
- sax_handler.columns
57
- end
58
-
59
- def test_replace_entities
60
- pc = ParserContext.new StringIO.new('<root />'), 'UTF-8'
61
- pc.replace_entities = false
62
- assert_equal false, pc.replace_entities
63
-
64
- pc.replace_entities = true
65
- assert_equal true, pc.replace_entities
66
- end
67
-
68
- def test_recovery
69
- pc = ParserContext.new StringIO.new('<root />'), 'UTF-8'
70
- pc.recovery = false
71
- assert_equal false, pc.recovery
72
-
73
- pc.recovery = true
74
- assert_equal true, pc.recovery
75
- end
76
-
77
- def test_from_io
78
- ctx = ParserContext.new StringIO.new('fo'), 'UTF-8'
79
- assert ctx
80
- end
81
-
82
- def test_from_string
83
- assert ParserContext.new 'blah blah'
84
- end
85
-
86
- def test_parse_with
87
- ctx = ParserContext.new 'blah'
88
- assert_raises ArgumentError do
89
- ctx.parse_with nil
90
- end
91
- end
92
-
93
- def test_parse_with_sax_parser
94
- xml = "<root />"
95
- ctx = ParserContext.new xml
96
- parser = Parser.new Doc.new
97
- assert_nil ctx.parse_with parser
98
- end
99
-
100
- def test_from_file
101
- ctx = ParserContext.file XML_FILE
102
- parser = Parser.new Doc.new
103
- assert_nil ctx.parse_with parser
104
- end
105
-
106
- def test_parse_with_returns_nil
107
- xml = "<root />"
108
- ctx = ParserContext.new xml
109
- parser = Parser.new Doc.new
110
- assert_nil ctx.parse_with(parser)
111
- end
112
- end
113
- end
114
- end
115
- end