nokogiri 1.8.5 → 1.13.6

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (356) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +3 -21
  3. data/LICENSE-DEPENDENCIES.md +1159 -868
  4. data/LICENSE.md +5 -28
  5. data/README.md +196 -90
  6. data/bin/nokogiri +63 -50
  7. data/dependencies.yml +13 -59
  8. data/ext/nokogiri/depend +38 -358
  9. data/ext/nokogiri/extconf.rb +750 -420
  10. data/ext/nokogiri/gumbo.c +584 -0
  11. data/ext/nokogiri/html4_document.c +166 -0
  12. data/ext/nokogiri/html4_element_description.c +294 -0
  13. data/ext/nokogiri/html4_entity_lookup.c +37 -0
  14. data/ext/nokogiri/html4_sax_parser_context.c +119 -0
  15. data/ext/nokogiri/html4_sax_push_parser.c +95 -0
  16. data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
  17. data/ext/nokogiri/nokogiri.c +228 -91
  18. data/ext/nokogiri/nokogiri.h +191 -89
  19. data/ext/nokogiri/test_global_handlers.c +40 -0
  20. data/ext/nokogiri/xml_attr.c +41 -36
  21. data/ext/nokogiri/xml_attribute_decl.c +18 -18
  22. data/ext/nokogiri/xml_cdata.c +13 -18
  23. data/ext/nokogiri/xml_comment.c +19 -26
  24. data/ext/nokogiri/xml_document.c +291 -216
  25. data/ext/nokogiri/xml_document_fragment.c +12 -16
  26. data/ext/nokogiri/xml_dtd.c +56 -50
  27. data/ext/nokogiri/xml_element_content.c +31 -26
  28. data/ext/nokogiri/xml_element_decl.c +22 -22
  29. data/ext/nokogiri/xml_encoding_handler.c +43 -18
  30. data/ext/nokogiri/xml_entity_decl.c +32 -30
  31. data/ext/nokogiri/xml_entity_reference.c +16 -18
  32. data/ext/nokogiri/xml_namespace.c +61 -52
  33. data/ext/nokogiri/xml_node.c +1044 -616
  34. data/ext/nokogiri/xml_node_set.c +174 -162
  35. data/ext/nokogiri/xml_processing_instruction.c +17 -19
  36. data/ext/nokogiri/xml_reader.c +226 -175
  37. data/ext/nokogiri/xml_relax_ng.c +52 -28
  38. data/ext/nokogiri/xml_sax_parser.c +112 -112
  39. data/ext/nokogiri/xml_sax_parser_context.c +112 -86
  40. data/ext/nokogiri/xml_sax_push_parser.c +36 -27
  41. data/ext/nokogiri/xml_schema.c +112 -33
  42. data/ext/nokogiri/xml_syntax_error.c +42 -21
  43. data/ext/nokogiri/xml_text.c +13 -17
  44. data/ext/nokogiri/xml_xpath_context.c +223 -115
  45. data/ext/nokogiri/xslt_stylesheet.c +265 -173
  46. data/gumbo-parser/CHANGES.md +63 -0
  47. data/gumbo-parser/Makefile +101 -0
  48. data/gumbo-parser/THANKS +27 -0
  49. data/gumbo-parser/src/Makefile +34 -0
  50. data/gumbo-parser/src/README.md +41 -0
  51. data/gumbo-parser/src/ascii.c +75 -0
  52. data/gumbo-parser/src/ascii.h +115 -0
  53. data/gumbo-parser/src/attribute.c +42 -0
  54. data/gumbo-parser/src/attribute.h +17 -0
  55. data/gumbo-parser/src/char_ref.c +22225 -0
  56. data/gumbo-parser/src/char_ref.h +29 -0
  57. data/gumbo-parser/src/char_ref.rl +2154 -0
  58. data/gumbo-parser/src/error.c +626 -0
  59. data/gumbo-parser/src/error.h +148 -0
  60. data/gumbo-parser/src/foreign_attrs.c +104 -0
  61. data/gumbo-parser/src/foreign_attrs.gperf +27 -0
  62. data/gumbo-parser/src/gumbo.h +943 -0
  63. data/gumbo-parser/src/insertion_mode.h +33 -0
  64. data/gumbo-parser/src/macros.h +91 -0
  65. data/gumbo-parser/src/parser.c +4875 -0
  66. data/gumbo-parser/src/parser.h +41 -0
  67. data/gumbo-parser/src/replacement.h +33 -0
  68. data/gumbo-parser/src/string_buffer.c +103 -0
  69. data/gumbo-parser/src/string_buffer.h +68 -0
  70. data/gumbo-parser/src/string_piece.c +48 -0
  71. data/gumbo-parser/src/svg_attrs.c +174 -0
  72. data/gumbo-parser/src/svg_attrs.gperf +77 -0
  73. data/gumbo-parser/src/svg_tags.c +137 -0
  74. data/gumbo-parser/src/svg_tags.gperf +55 -0
  75. data/gumbo-parser/src/tag.c +222 -0
  76. data/gumbo-parser/src/tag_lookup.c +382 -0
  77. data/gumbo-parser/src/tag_lookup.gperf +169 -0
  78. data/gumbo-parser/src/tag_lookup.h +13 -0
  79. data/gumbo-parser/src/token_buffer.c +79 -0
  80. data/gumbo-parser/src/token_buffer.h +71 -0
  81. data/gumbo-parser/src/token_type.h +17 -0
  82. data/gumbo-parser/src/tokenizer.c +3463 -0
  83. data/gumbo-parser/src/tokenizer.h +112 -0
  84. data/gumbo-parser/src/tokenizer_states.h +339 -0
  85. data/gumbo-parser/src/utf8.c +245 -0
  86. data/gumbo-parser/src/utf8.h +164 -0
  87. data/gumbo-parser/src/util.c +68 -0
  88. data/gumbo-parser/src/util.h +30 -0
  89. data/gumbo-parser/src/vector.c +111 -0
  90. data/gumbo-parser/src/vector.h +45 -0
  91. data/lib/nokogiri/class_resolver.rb +67 -0
  92. data/lib/nokogiri/css/node.rb +10 -8
  93. data/lib/nokogiri/css/parser.rb +397 -377
  94. data/lib/nokogiri/css/parser.y +250 -245
  95. data/lib/nokogiri/css/parser_extras.rb +54 -49
  96. data/lib/nokogiri/css/syntax_error.rb +3 -1
  97. data/lib/nokogiri/css/tokenizer.rb +107 -104
  98. data/lib/nokogiri/css/tokenizer.rex +3 -2
  99. data/lib/nokogiri/css/xpath_visitor.rb +218 -91
  100. data/lib/nokogiri/css.rb +50 -17
  101. data/lib/nokogiri/decorators/slop.rb +9 -7
  102. data/lib/nokogiri/extension.rb +31 -0
  103. data/lib/nokogiri/gumbo.rb +15 -0
  104. data/lib/nokogiri/html.rb +38 -27
  105. data/lib/nokogiri/{html → html4}/builder.rb +4 -2
  106. data/lib/nokogiri/{html → html4}/document.rb +103 -105
  107. data/lib/nokogiri/html4/document_fragment.rb +54 -0
  108. data/lib/nokogiri/{html → html4}/element_description.rb +3 -1
  109. data/lib/nokogiri/html4/element_description_defaults.rb +578 -0
  110. data/lib/nokogiri/{html → html4}/entity_lookup.rb +4 -2
  111. data/lib/nokogiri/{html → html4}/sax/parser.rb +17 -16
  112. data/lib/nokogiri/html4/sax/parser_context.rb +20 -0
  113. data/lib/nokogiri/{html → html4}/sax/push_parser.rb +12 -11
  114. data/lib/nokogiri/html4.rb +46 -0
  115. data/lib/nokogiri/html5/document.rb +91 -0
  116. data/lib/nokogiri/html5/document_fragment.rb +83 -0
  117. data/lib/nokogiri/html5/node.rb +100 -0
  118. data/lib/nokogiri/html5.rb +478 -0
  119. data/lib/nokogiri/jruby/dependencies.rb +21 -0
  120. data/lib/nokogiri/syntax_error.rb +2 -0
  121. data/lib/nokogiri/version/constant.rb +6 -0
  122. data/lib/nokogiri/version/info.rb +222 -0
  123. data/lib/nokogiri/version.rb +3 -108
  124. data/lib/nokogiri/xml/attr.rb +6 -3
  125. data/lib/nokogiri/xml/attribute_decl.rb +3 -1
  126. data/lib/nokogiri/xml/builder.rb +97 -53
  127. data/lib/nokogiri/xml/cdata.rb +3 -1
  128. data/lib/nokogiri/xml/character_data.rb +2 -0
  129. data/lib/nokogiri/xml/document.rb +224 -86
  130. data/lib/nokogiri/xml/document_fragment.rb +57 -44
  131. data/lib/nokogiri/xml/dtd.rb +4 -2
  132. data/lib/nokogiri/xml/element_content.rb +2 -0
  133. data/lib/nokogiri/xml/element_decl.rb +3 -1
  134. data/lib/nokogiri/xml/entity_decl.rb +4 -2
  135. data/lib/nokogiri/xml/entity_reference.rb +2 -0
  136. data/lib/nokogiri/xml/namespace.rb +3 -0
  137. data/lib/nokogiri/xml/node/save_options.rb +10 -5
  138. data/lib/nokogiri/xml/node.rb +895 -377
  139. data/lib/nokogiri/xml/node_set.rb +92 -65
  140. data/lib/nokogiri/xml/notation.rb +13 -0
  141. data/lib/nokogiri/xml/parse_options.rb +22 -8
  142. data/lib/nokogiri/xml/pp/character_data.rb +9 -6
  143. data/lib/nokogiri/xml/pp/node.rb +25 -26
  144. data/lib/nokogiri/xml/pp.rb +4 -2
  145. data/lib/nokogiri/xml/processing_instruction.rb +3 -1
  146. data/lib/nokogiri/xml/reader.rb +23 -28
  147. data/lib/nokogiri/xml/relax_ng.rb +8 -2
  148. data/lib/nokogiri/xml/sax/document.rb +45 -49
  149. data/lib/nokogiri/xml/sax/parser.rb +38 -34
  150. data/lib/nokogiri/xml/sax/parser_context.rb +8 -3
  151. data/lib/nokogiri/xml/sax/push_parser.rb +6 -5
  152. data/lib/nokogiri/xml/sax.rb +6 -4
  153. data/lib/nokogiri/xml/schema.rb +19 -9
  154. data/lib/nokogiri/xml/searchable.rb +112 -72
  155. data/lib/nokogiri/xml/syntax_error.rb +6 -4
  156. data/lib/nokogiri/xml/text.rb +2 -0
  157. data/lib/nokogiri/xml/xpath/syntax_error.rb +4 -2
  158. data/lib/nokogiri/xml/xpath.rb +15 -4
  159. data/lib/nokogiri/xml/xpath_context.rb +3 -3
  160. data/lib/nokogiri/xml.rb +38 -37
  161. data/lib/nokogiri/xslt/stylesheet.rb +3 -1
  162. data/lib/nokogiri/xslt.rb +29 -20
  163. data/lib/nokogiri.rb +49 -65
  164. data/lib/xsd/xmlparser/nokogiri.rb +26 -24
  165. data/patches/libxml2/0001-Remove-script-macro-support.patch +40 -0
  166. data/patches/libxml2/0002-Update-entities-to-remove-handling-of-ssi.patch +44 -0
  167. data/patches/libxml2/0003-libxml2.la-is-in-top_builddir.patch +25 -0
  168. data/patches/libxml2/0004-use-glibc-strlen.patch +53 -0
  169. data/patches/libxml2/0005-avoid-isnan-isinf.patch +81 -0
  170. data/patches/libxml2/0006-update-automake-files-for-arm64.patch +3040 -0
  171. data/patches/libxml2/0008-htmlParseComment-handle-abruptly-closed-comments.patch +61 -0
  172. data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
  173. data/patches/libxslt/0001-update-automake-files-for-arm64.patch +3037 -0
  174. data/ports/archives/libxml2-2.9.14.tar.xz +0 -0
  175. data/ports/archives/libxslt-1.1.35.tar.xz +0 -0
  176. metadata +220 -266
  177. data/.autotest +0 -22
  178. data/.cross_rubies +0 -8
  179. data/.editorconfig +0 -17
  180. data/.gemtest +0 -0
  181. data/.travis.yml +0 -63
  182. data/CHANGELOG.md +0 -1368
  183. data/CONTRIBUTING.md +0 -42
  184. data/C_CODING_STYLE.rdoc +0 -33
  185. data/Gemfile-libxml-ruby +0 -3
  186. data/Manifest.txt +0 -370
  187. data/ROADMAP.md +0 -111
  188. data/Rakefile +0 -348
  189. data/SECURITY.md +0 -19
  190. data/STANDARD_RESPONSES.md +0 -47
  191. data/Y_U_NO_GEMSPEC.md +0 -155
  192. data/appveyor.yml +0 -29
  193. data/build_all +0 -44
  194. data/ext/nokogiri/html_document.c +0 -170
  195. data/ext/nokogiri/html_document.h +0 -10
  196. data/ext/nokogiri/html_element_description.c +0 -279
  197. data/ext/nokogiri/html_element_description.h +0 -10
  198. data/ext/nokogiri/html_entity_lookup.c +0 -32
  199. data/ext/nokogiri/html_entity_lookup.h +0 -8
  200. data/ext/nokogiri/html_sax_parser_context.c +0 -116
  201. data/ext/nokogiri/html_sax_parser_context.h +0 -11
  202. data/ext/nokogiri/html_sax_push_parser.c +0 -87
  203. data/ext/nokogiri/html_sax_push_parser.h +0 -9
  204. data/ext/nokogiri/xml_attr.h +0 -9
  205. data/ext/nokogiri/xml_attribute_decl.h +0 -9
  206. data/ext/nokogiri/xml_cdata.h +0 -9
  207. data/ext/nokogiri/xml_comment.h +0 -9
  208. data/ext/nokogiri/xml_document.h +0 -23
  209. data/ext/nokogiri/xml_document_fragment.h +0 -10
  210. data/ext/nokogiri/xml_dtd.h +0 -10
  211. data/ext/nokogiri/xml_element_content.h +0 -10
  212. data/ext/nokogiri/xml_element_decl.h +0 -9
  213. data/ext/nokogiri/xml_encoding_handler.h +0 -8
  214. data/ext/nokogiri/xml_entity_decl.h +0 -10
  215. data/ext/nokogiri/xml_entity_reference.h +0 -9
  216. data/ext/nokogiri/xml_io.c +0 -61
  217. data/ext/nokogiri/xml_io.h +0 -11
  218. data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
  219. data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
  220. data/ext/nokogiri/xml_namespace.h +0 -15
  221. data/ext/nokogiri/xml_node.h +0 -13
  222. data/ext/nokogiri/xml_node_set.h +0 -12
  223. data/ext/nokogiri/xml_processing_instruction.h +0 -9
  224. data/ext/nokogiri/xml_reader.h +0 -10
  225. data/ext/nokogiri/xml_relax_ng.h +0 -9
  226. data/ext/nokogiri/xml_sax_parser.h +0 -39
  227. data/ext/nokogiri/xml_sax_parser_context.h +0 -10
  228. data/ext/nokogiri/xml_sax_push_parser.h +0 -9
  229. data/ext/nokogiri/xml_schema.h +0 -9
  230. data/ext/nokogiri/xml_syntax_error.h +0 -13
  231. data/ext/nokogiri/xml_text.h +0 -9
  232. data/ext/nokogiri/xml_xpath_context.h +0 -10
  233. data/ext/nokogiri/xslt_stylesheet.h +0 -14
  234. data/lib/nokogiri/html/document_fragment.rb +0 -49
  235. data/lib/nokogiri/html/element_description_defaults.rb +0 -671
  236. data/lib/nokogiri/html/sax/parser_context.rb +0 -16
  237. data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +0 -78
  238. data/patches/libxml2/0002-Fix-nullptr-deref-with-XPath-logic-ops.patch +0 -54
  239. data/patches/libxml2/0003-Fix-infinite-loop-in-LZMA-decompression.patch +0 -50
  240. data/patches/sort-patches-by-date +0 -25
  241. data/ports/archives/libxml2-2.9.8.tar.gz +0 -0
  242. data/ports/archives/libxslt-1.1.32.tar.gz +0 -0
  243. data/suppressions/README.txt +0 -1
  244. data/suppressions/nokogiri_ruby-2.supp +0 -10
  245. data/tasks/test.rb +0 -100
  246. data/test/css/test_nthiness.rb +0 -226
  247. data/test/css/test_parser.rb +0 -386
  248. data/test/css/test_tokenizer.rb +0 -215
  249. data/test/css/test_xpath_visitor.rb +0 -96
  250. data/test/decorators/test_slop.rb +0 -23
  251. data/test/files/2ch.html +0 -108
  252. data/test/files/GH_1042.html +0 -18
  253. data/test/files/address_book.rlx +0 -12
  254. data/test/files/address_book.xml +0 -10
  255. data/test/files/atom.xml +0 -344
  256. data/test/files/bar/bar.xsd +0 -4
  257. data/test/files/bogus.xml +0 -0
  258. data/test/files/dont_hurt_em_why.xml +0 -422
  259. data/test/files/encoding.html +0 -82
  260. data/test/files/encoding.xhtml +0 -84
  261. data/test/files/exslt.xml +0 -8
  262. data/test/files/exslt.xslt +0 -35
  263. data/test/files/foo/foo.xsd +0 -4
  264. data/test/files/metacharset.html +0 -10
  265. data/test/files/namespace_pressure_test.xml +0 -1684
  266. data/test/files/noencoding.html +0 -47
  267. data/test/files/po.xml +0 -32
  268. data/test/files/po.xsd +0 -66
  269. data/test/files/saml/saml20assertion_schema.xsd +0 -283
  270. data/test/files/saml/saml20protocol_schema.xsd +0 -302
  271. data/test/files/saml/xenc_schema.xsd +0 -146
  272. data/test/files/saml/xmldsig_schema.xsd +0 -318
  273. data/test/files/shift_jis.html +0 -10
  274. data/test/files/shift_jis.xml +0 -5
  275. data/test/files/shift_jis_no_charset.html +0 -9
  276. data/test/files/slow-xpath.xml +0 -25509
  277. data/test/files/snuggles.xml +0 -3
  278. data/test/files/staff.dtd +0 -10
  279. data/test/files/staff.xml +0 -59
  280. data/test/files/staff.xslt +0 -32
  281. data/test/files/test_document_url/bar.xml +0 -2
  282. data/test/files/test_document_url/document.dtd +0 -4
  283. data/test/files/test_document_url/document.xml +0 -6
  284. data/test/files/tlm.html +0 -851
  285. data/test/files/to_be_xincluded.xml +0 -2
  286. data/test/files/valid_bar.xml +0 -2
  287. data/test/files/xinclude.xml +0 -4
  288. data/test/helper.rb +0 -271
  289. data/test/html/sax/test_parser.rb +0 -168
  290. data/test/html/sax/test_parser_context.rb +0 -46
  291. data/test/html/sax/test_parser_text.rb +0 -163
  292. data/test/html/sax/test_push_parser.rb +0 -87
  293. data/test/html/test_attributes.rb +0 -85
  294. data/test/html/test_builder.rb +0 -164
  295. data/test/html/test_document.rb +0 -712
  296. data/test/html/test_document_encoding.rb +0 -143
  297. data/test/html/test_document_fragment.rb +0 -310
  298. data/test/html/test_element_description.rb +0 -105
  299. data/test/html/test_named_characters.rb +0 -14
  300. data/test/html/test_node.rb +0 -212
  301. data/test/html/test_node_encoding.rb +0 -91
  302. data/test/namespaces/test_additional_namespaces_in_builder_doc.rb +0 -14
  303. data/test/namespaces/test_namespaces_aliased_default.rb +0 -24
  304. data/test/namespaces/test_namespaces_in_builder_doc.rb +0 -75
  305. data/test/namespaces/test_namespaces_in_cloned_doc.rb +0 -31
  306. data/test/namespaces/test_namespaces_in_created_doc.rb +0 -75
  307. data/test/namespaces/test_namespaces_in_parsed_doc.rb +0 -80
  308. data/test/namespaces/test_namespaces_preservation.rb +0 -31
  309. data/test/test_convert_xpath.rb +0 -135
  310. data/test/test_css_cache.rb +0 -47
  311. data/test/test_encoding_handler.rb +0 -48
  312. data/test/test_memory_leak.rb +0 -156
  313. data/test/test_nokogiri.rb +0 -138
  314. data/test/test_soap4r_sax.rb +0 -52
  315. data/test/test_xslt_transforms.rb +0 -314
  316. data/test/xml/node/test_save_options.rb +0 -28
  317. data/test/xml/node/test_subclass.rb +0 -44
  318. data/test/xml/sax/test_parser.rb +0 -402
  319. data/test/xml/sax/test_parser_context.rb +0 -115
  320. data/test/xml/sax/test_parser_text.rb +0 -202
  321. data/test/xml/sax/test_push_parser.rb +0 -265
  322. data/test/xml/test_attr.rb +0 -74
  323. data/test/xml/test_attribute_decl.rb +0 -86
  324. data/test/xml/test_builder.rb +0 -341
  325. data/test/xml/test_c14n.rb +0 -180
  326. data/test/xml/test_cdata.rb +0 -54
  327. data/test/xml/test_comment.rb +0 -40
  328. data/test/xml/test_document.rb +0 -982
  329. data/test/xml/test_document_encoding.rb +0 -31
  330. data/test/xml/test_document_fragment.rb +0 -298
  331. data/test/xml/test_dtd.rb +0 -187
  332. data/test/xml/test_dtd_encoding.rb +0 -31
  333. data/test/xml/test_element_content.rb +0 -56
  334. data/test/xml/test_element_decl.rb +0 -73
  335. data/test/xml/test_entity_decl.rb +0 -122
  336. data/test/xml/test_entity_reference.rb +0 -262
  337. data/test/xml/test_namespace.rb +0 -96
  338. data/test/xml/test_node.rb +0 -1325
  339. data/test/xml/test_node_attributes.rb +0 -115
  340. data/test/xml/test_node_encoding.rb +0 -75
  341. data/test/xml/test_node_inheritance.rb +0 -32
  342. data/test/xml/test_node_reparenting.rb +0 -592
  343. data/test/xml/test_node_set.rb +0 -809
  344. data/test/xml/test_parse_options.rb +0 -64
  345. data/test/xml/test_processing_instruction.rb +0 -30
  346. data/test/xml/test_reader.rb +0 -620
  347. data/test/xml/test_reader_encoding.rb +0 -134
  348. data/test/xml/test_relax_ng.rb +0 -60
  349. data/test/xml/test_schema.rb +0 -142
  350. data/test/xml/test_syntax_error.rb +0 -36
  351. data/test/xml/test_text.rb +0 -60
  352. data/test/xml/test_unparented_node.rb +0 -483
  353. data/test/xml/test_xinclude.rb +0 -83
  354. data/test/xml/test_xpath.rb +0 -470
  355. data/test/xslt/test_custom_functions.rb +0 -133
  356. data/test/xslt/test_exception_handling.rb +0 -37
@@ -1,402 +0,0 @@
1
- # -*- coding: utf-8 -*-
2
-
3
- require "helper"
4
-
5
- module Nokogiri
6
- module XML
7
- module SAX
8
- class TestParser < Nokogiri::SAX::TestCase
9
- def setup
10
- super
11
- @parser = XML::SAX::Parser.new(Doc.new)
12
- end
13
-
14
- def test_parser_context_yielded_io
15
- doc = Doc.new
16
- parser = XML::SAX::Parser.new doc
17
- xml = "<foo a='&amp;b'/>"
18
-
19
- block_called = false
20
- parser.parse(StringIO.new(xml)) { |ctx|
21
- block_called = true
22
- ctx.replace_entities = true
23
- }
24
-
25
- assert block_called
26
-
27
- assert_equal [['foo', [['a', '&b']]]], doc.start_elements
28
- end
29
-
30
- def test_parser_context_yielded_in_memory
31
- doc = Doc.new
32
- parser = XML::SAX::Parser.new doc
33
- xml = "<foo a='&amp;b'/>"
34
-
35
- block_called = false
36
- parser.parse(xml) { |ctx|
37
- block_called = true
38
- ctx.replace_entities = true
39
- }
40
-
41
- assert block_called
42
-
43
- assert_equal [['foo', [['a', '&b']]]], doc.start_elements
44
- end
45
-
46
- def test_empty_decl
47
- parser = XML::SAX::Parser.new(Doc.new)
48
-
49
- xml = "<root />"
50
- parser.parse xml
51
- assert parser.document.start_document_called, xml
52
- assert_nil parser.document.xmldecls, xml
53
- end
54
-
55
- def test_xml_decl
56
- [
57
- ['<?xml version="1.0" ?>',
58
- ['1.0']],
59
- ['<?xml version="1.0" encoding="UTF-8" ?>',
60
- ['1.0', 'UTF-8']],
61
- ['<?xml version="1.0" standalone="yes"?>',
62
- ['1.0', 'yes']],
63
- ['<?xml version="1.0" standalone="no"?>',
64
- ['1.0', 'no']],
65
- ['<?xml version="1.0" encoding="UTF-8" standalone="no"?>',
66
- ['1.0', "UTF-8", 'no']],
67
- ['<?xml version="1.0" encoding="ISO-8859-1" standalone="yes"?>',
68
- ['1.0', "ISO-8859-1", 'yes']]
69
- ].each do |decl, value|
70
- parser = XML::SAX::Parser.new(Doc.new)
71
-
72
- xml = "#{decl}\n<root />"
73
- parser.parse xml
74
- assert parser.document.start_document_called, xml
75
- assert_equal value, parser.document.xmldecls, xml
76
- end
77
- end
78
-
79
- def test_parse_empty
80
- assert_raises RuntimeError do
81
- @parser.parse('')
82
- end
83
- end
84
-
85
- def test_namespace_declaration_order_is_saved
86
- @parser.parse <<-eoxml
87
- <root xmlns:foo='http://foo.example.com/' xmlns='http://example.com/'>
88
- <a foo:bar='hello' />
89
- </root>
90
- eoxml
91
- assert_equal 2, @parser.document.start_elements_namespace.length
92
- el = @parser.document.start_elements_namespace.first
93
- namespaces = el.last
94
- assert_equal ['foo', 'http://foo.example.com/'], namespaces.first
95
- assert_equal [nil, 'http://example.com/'], namespaces.last
96
- end
97
-
98
- def test_bad_document_calls_error_handler
99
- @parser.parse('<foo><bar></foo>')
100
- assert @parser.document.errors
101
- assert @parser.document.errors.length > 0
102
- end
103
-
104
- def test_namespace_are_super_fun_to_parse
105
- @parser.parse <<-eoxml
106
- <root xmlns:foo='http://foo.example.com/'>
107
- <a foo:bar='hello' />
108
- <b xmlns:foo='http://bar.example.com/'>
109
- <a foo:bar='hello' />
110
- </b>
111
- <foo:bar>hello world</foo:bar>
112
- </root>
113
- eoxml
114
-
115
- assert @parser.document.start_elements_namespace.length > 0
116
- el = @parser.document.start_elements_namespace[1]
117
- assert_equal 'a', el.first
118
- assert_equal 1, el[1].length
119
-
120
- attribute = el[1].first
121
- assert_equal 'bar', attribute.localname
122
- assert_equal 'foo', attribute.prefix
123
- assert_equal 'hello', attribute.value
124
- assert_equal 'http://foo.example.com/', attribute.uri
125
- end
126
-
127
- def test_sax_v1_namespace_attribute_declarations
128
- @parser.parse <<-eoxml
129
- <root xmlns:foo='http://foo.example.com/' xmlns='http://example.com/'>
130
- <a foo:bar='hello' />
131
- <b xmlns:foo='http://bar.example.com/'>
132
- <a foo:bar='hello' />
133
- </b>
134
- <foo:bar>hello world</foo:bar>
135
- </root>
136
- eoxml
137
- assert @parser.document.start_elements.length > 0
138
- elm = @parser.document.start_elements.first
139
- assert_equal 'root', elm.first
140
- assert elm[1].include?(['xmlns:foo', 'http://foo.example.com/'])
141
- assert elm[1].include?(['xmlns', 'http://example.com/'])
142
- end
143
-
144
- def test_sax_v1_namespace_nodes
145
- @parser.parse <<-eoxml
146
- <root xmlns:foo='http://foo.example.com/' xmlns='http://example.com/'>
147
- <a foo:bar='hello' />
148
- <b xmlns:foo='http://bar.example.com/'>
149
- <a foo:bar='hello' />
150
- </b>
151
- <foo:bar>hello world</foo:bar>
152
- </root>
153
- eoxml
154
- assert_equal 5, @parser.document.start_elements.length
155
- assert @parser.document.start_elements.map(&:first).include?('foo:bar')
156
- assert @parser.document.end_elements.map(&:first).include?('foo:bar')
157
- end
158
-
159
- def test_start_is_called_without_namespace
160
- @parser.parse(<<-eoxml)
161
- <root xmlns:foo='http://foo.example.com/' xmlns='http://example.com/'>
162
- <foo:f><bar></foo:f>
163
- </root>
164
- eoxml
165
- assert_equal ['root', 'foo:f', 'bar'],
166
- @parser.document.start_elements.map(&:first)
167
- end
168
-
169
- def test_parser_sets_encoding
170
- parser = XML::SAX::Parser.new(Doc.new, 'UTF-8')
171
- assert_equal 'UTF-8', parser.encoding
172
- end
173
-
174
- def test_errors_set_after_parsing_bad_dom
175
- doc = Nokogiri::XML('<foo><bar></foo>')
176
- assert doc.errors
177
-
178
- @parser.parse('<foo><bar></foo>')
179
- assert @parser.document.errors
180
- assert @parser.document.errors.length > 0
181
-
182
- doc.errors.each do |error|
183
- assert_equal 'UTF-8', error.message.encoding.name
184
- end
185
-
186
- # when using JRuby Nokogiri, more errors will be generated as the DOM
187
- # parser continue to parse an ill formed document, while the sax parser
188
- # will stop at the first error
189
- unless Nokogiri.jruby?
190
- assert_equal doc.errors.length, @parser.document.errors.length
191
- end
192
- end
193
-
194
- def test_parse_with_memory_argument
195
- @parser.parse(File.read(XML_FILE))
196
- assert(@parser.document.cdata_blocks.length > 0)
197
- end
198
-
199
- def test_parse_with_io_argument
200
- File.open(XML_FILE, 'rb') { |f|
201
- @parser.parse(f)
202
- }
203
- assert(@parser.document.cdata_blocks.length > 0)
204
- end
205
-
206
- def test_parse_io
207
- call_parse_io_with_encoding 'UTF-8'
208
- end
209
-
210
- # issue #828
211
- def test_parse_io_lower_case_encoding
212
- call_parse_io_with_encoding 'utf-8'
213
- end
214
-
215
- def call_parse_io_with_encoding encoding
216
- File.open(XML_FILE, 'rb') { |f|
217
- @parser.parse_io(f, encoding)
218
- }
219
- assert(@parser.document.cdata_blocks.length > 0)
220
-
221
- called = false
222
- @parser.document.start_elements.flatten.each do |thing|
223
- assert_equal 'UTF-8', thing.encoding.name
224
- called = true
225
- end
226
- assert called
227
-
228
- called = false
229
- @parser.document.end_elements.flatten.each do |thing|
230
- assert_equal 'UTF-8', thing.encoding.name
231
- called = true
232
- end
233
- assert called
234
-
235
- called = false
236
- @parser.document.data.each do |thing|
237
- assert_equal 'UTF-8', thing.encoding.name
238
- called = true
239
- end
240
- assert called
241
-
242
- called = false
243
- @parser.document.comments.flatten.each do |thing|
244
- assert_equal 'UTF-8', thing.encoding.name
245
- called = true
246
- end
247
- assert called
248
-
249
- called = false
250
- @parser.document.cdata_blocks.flatten.each do |thing|
251
- assert_equal 'UTF-8', thing.encoding.name
252
- called = true
253
- end
254
- assert called
255
- end
256
-
257
- def test_parse_file
258
- @parser.parse_file(XML_FILE)
259
-
260
- assert_raises(ArgumentError) {
261
- @parser.parse_file(nil)
262
- }
263
-
264
- assert_raises(Errno::ENOENT) {
265
- @parser.parse_file('')
266
- }
267
- assert_raises(Errno::EISDIR) {
268
- @parser.parse_file(File.expand_path(File.dirname(__FILE__)))
269
- }
270
- end
271
-
272
- def test_render_parse_nil_param
273
- assert_raises(ArgumentError) { @parser.parse_memory(nil) }
274
- end
275
-
276
- def test_bad_encoding_args
277
- assert_raises(ArgumentError) { XML::SAX::Parser.new(Doc.new, 'not an encoding') }
278
- assert_raises(ArgumentError) { @parser.parse_io(StringIO.new('<root/>'), 'not an encoding')}
279
- end
280
-
281
- def test_ctag
282
- @parser.parse_memory(<<-eoxml)
283
- <p id="asdfasdf">
284
- <![CDATA[ This is a comment ]]>
285
- Paragraph 1
286
- </p>
287
- eoxml
288
- assert_equal [' This is a comment '], @parser.document.cdata_blocks
289
- end
290
-
291
- def test_comment
292
- @parser.parse_memory(<<-eoxml)
293
- <p id="asdfasdf">
294
- <!-- This is a comment -->
295
- Paragraph 1
296
- </p>
297
- eoxml
298
- assert_equal [' This is a comment '], @parser.document.comments
299
- end
300
-
301
- def test_characters
302
- @parser.parse_memory(<<-eoxml)
303
- <p id="asdfasdf">Paragraph 1</p>
304
- eoxml
305
- assert_equal ['Paragraph 1'], @parser.document.data
306
- end
307
-
308
- def test_end_document
309
- @parser.parse_memory(<<-eoxml)
310
- <p id="asdfasdf">Paragraph 1</p>
311
- eoxml
312
- assert @parser.document.end_document_called
313
- end
314
-
315
- def test_end_element
316
- @parser.parse_memory(<<-eoxml)
317
- <p id="asdfasdf">Paragraph 1</p>
318
- eoxml
319
- assert_equal [["p"]],
320
- @parser.document.end_elements
321
- end
322
-
323
- def test_start_element_attrs
324
- @parser.parse_memory(<<-eoxml)
325
- <p id="asdfasdf">Paragraph 1</p>
326
- eoxml
327
- assert_equal [["p", [["id", "asdfasdf"]]]],
328
- @parser.document.start_elements
329
- end
330
-
331
- def test_start_element_attrs_include_namespaces
332
- @parser.parse_memory(<<-eoxml)
333
- <p xmlns:foo='http://foo.example.com/'>Paragraph 1</p>
334
- eoxml
335
- assert_equal [["p", [['xmlns:foo', 'http://foo.example.com/']]]],
336
- @parser.document.start_elements
337
- end
338
-
339
- def test_processing_instruction
340
- @parser.parse_memory(<<-eoxml)
341
- <?xml-stylesheet href="a.xsl" type="text/xsl"?>
342
- <?xml version="1.0"?>
343
- eoxml
344
- assert_equal [['xml-stylesheet', 'href="a.xsl" type="text/xsl"']],
345
- @parser.document.processing_instructions
346
- end
347
-
348
- if Nokogiri.uses_libxml? # JRuby SAXParser only parses well-formed XML documents
349
- def test_parse_document
350
- @parser.parse_memory(<<-eoxml)
351
- <p>Paragraph 1</p>
352
- <p>Paragraph 2</p>
353
- eoxml
354
- end
355
- end
356
-
357
- def test_parser_attributes
358
- xml = <<-eoxml
359
- <?xml version="1.0" ?><root><foo a="&amp;b" c="&gt;d" /></root>
360
- eoxml
361
-
362
- block_called = false
363
- @parser.parse(xml) { |ctx|
364
- block_called = true
365
- ctx.replace_entities = true
366
- }
367
-
368
- assert block_called
369
-
370
- assert_equal [['root', []], ['foo', [['a', '&b'], ['c', '>d']]]], @parser.document.start_elements
371
- end
372
-
373
- def test_recovery_from_incorrect_xml
374
- xml = <<-eoxml
375
- <?xml version="1.0" ?><Root><Data><?xml version='1.0'?><Item>hey</Item></Data><Data><Item>hey yourself</Item></Data></Root>
376
- eoxml
377
-
378
- block_called = false
379
- @parser.parse(xml) { |ctx|
380
- block_called = true
381
- ctx.recovery = true
382
- }
383
-
384
- assert block_called
385
-
386
- assert_equal [['Root', []], ['Data', []], ['Item', []], ['Data', []], ['Item', []]], @parser.document.start_elements
387
- end
388
-
389
- def test_square_bracket_in_text # issue 1261
390
- xml = <<-eoxml
391
- <tu tuid="87dea04cf60af103ff09d1dba36ae820" segtype="block">
392
- <prop type="x-smartling-string-variant">en:#:home_page:#:stories:#:[6]:#:name</prop>
393
- <tuv xml:lang="en-US"><seg>Sandy S.</seg></tuv>
394
- </tu>
395
- eoxml
396
- @parser.parse(xml)
397
- assert @parser.document.data.must_include "en:#:home_page:#:stories:#:[6]:#:name"
398
- end
399
- end
400
- end
401
- end
402
- end
@@ -1,115 +0,0 @@
1
- # -*- coding: utf-8 -*-
2
-
3
- require "helper"
4
-
5
- module Nokogiri
6
- module XML
7
- module SAX
8
- class TestParserContext < Nokogiri::SAX::TestCase
9
- def setup
10
- @xml = '<hello>
11
-
12
- world
13
- <inter>
14
- <net>
15
- </net>
16
- </inter>
17
-
18
- </hello>'
19
- end
20
-
21
- class Counter < Nokogiri::XML::SAX::Document
22
- attr_accessor :context, :lines, :columns
23
- def initialize
24
- @context = nil
25
- @lines = []
26
- @columns = []
27
- end
28
-
29
- def start_element name, attrs = []
30
- @lines << [name, context.line]
31
- @columns << [name, context.column]
32
- end
33
- end
34
-
35
- def test_line_numbers
36
- sax_handler = Counter.new
37
-
38
- parser = Nokogiri::XML::SAX::Parser.new(sax_handler)
39
- parser.parse(@xml) do |ctx|
40
- sax_handler.context = ctx
41
- end
42
-
43
- assert_equal [["hello", 1], ["inter", 4], ["net", 5]],
44
- sax_handler.lines
45
- end
46
-
47
- def test_column_numbers
48
- sax_handler = Counter.new
49
-
50
- parser = Nokogiri::XML::SAX::Parser.new(sax_handler)
51
- parser.parse(@xml) do |ctx|
52
- sax_handler.context = ctx
53
- end
54
-
55
- assert_equal [["hello", 7], ["inter", 7], ["net", 9]],
56
- sax_handler.columns
57
- end
58
-
59
- def test_replace_entities
60
- pc = ParserContext.new StringIO.new('<root />'), 'UTF-8'
61
- pc.replace_entities = false
62
- assert_equal false, pc.replace_entities
63
-
64
- pc.replace_entities = true
65
- assert_equal true, pc.replace_entities
66
- end
67
-
68
- def test_recovery
69
- pc = ParserContext.new StringIO.new('<root />'), 'UTF-8'
70
- pc.recovery = false
71
- assert_equal false, pc.recovery
72
-
73
- pc.recovery = true
74
- assert_equal true, pc.recovery
75
- end
76
-
77
- def test_from_io
78
- ctx = ParserContext.new StringIO.new('fo'), 'UTF-8'
79
- assert ctx
80
- end
81
-
82
- def test_from_string
83
- assert ParserContext.new 'blah blah'
84
- end
85
-
86
- def test_parse_with
87
- ctx = ParserContext.new 'blah'
88
- assert_raises ArgumentError do
89
- ctx.parse_with nil
90
- end
91
- end
92
-
93
- def test_parse_with_sax_parser
94
- xml = "<root />"
95
- ctx = ParserContext.new xml
96
- parser = Parser.new Doc.new
97
- assert_nil ctx.parse_with parser
98
- end
99
-
100
- def test_from_file
101
- ctx = ParserContext.file XML_FILE
102
- parser = Parser.new Doc.new
103
- assert_nil ctx.parse_with parser
104
- end
105
-
106
- def test_parse_with_returns_nil
107
- xml = "<root />"
108
- ctx = ParserContext.new xml
109
- parser = Parser.new Doc.new
110
- assert_nil ctx.parse_with(parser)
111
- end
112
- end
113
- end
114
- end
115
- end