nokogiri 1.8.5 → 1.15.3

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (358) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +40 -18
  3. data/LICENSE-DEPENDENCIES.md +1636 -1024
  4. data/LICENSE.md +5 -28
  5. data/README.md +203 -90
  6. data/bin/nokogiri +63 -50
  7. data/dependencies.yml +33 -61
  8. data/ext/nokogiri/depend +38 -358
  9. data/ext/nokogiri/extconf.rb +867 -417
  10. data/ext/nokogiri/gumbo.c +594 -0
  11. data/ext/nokogiri/html4_document.c +165 -0
  12. data/ext/nokogiri/html4_element_description.c +299 -0
  13. data/ext/nokogiri/html4_entity_lookup.c +37 -0
  14. data/ext/nokogiri/html4_sax_parser_context.c +108 -0
  15. data/ext/nokogiri/html4_sax_push_parser.c +95 -0
  16. data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
  17. data/ext/nokogiri/nokogiri.c +251 -105
  18. data/ext/nokogiri/nokogiri.h +215 -90
  19. data/ext/nokogiri/test_global_handlers.c +40 -0
  20. data/ext/nokogiri/xml_attr.c +42 -37
  21. data/ext/nokogiri/xml_attribute_decl.c +22 -22
  22. data/ext/nokogiri/xml_cdata.c +40 -31
  23. data/ext/nokogiri/xml_comment.c +20 -27
  24. data/ext/nokogiri/xml_document.c +401 -237
  25. data/ext/nokogiri/xml_document_fragment.c +13 -17
  26. data/ext/nokogiri/xml_dtd.c +64 -58
  27. data/ext/nokogiri/xml_element_content.c +63 -55
  28. data/ext/nokogiri/xml_element_decl.c +31 -31
  29. data/ext/nokogiri/xml_encoding_handler.c +54 -21
  30. data/ext/nokogiri/xml_entity_decl.c +37 -35
  31. data/ext/nokogiri/xml_entity_reference.c +17 -19
  32. data/ext/nokogiri/xml_namespace.c +136 -62
  33. data/ext/nokogiri/xml_node.c +1387 -678
  34. data/ext/nokogiri/xml_node_set.c +246 -216
  35. data/ext/nokogiri/xml_processing_instruction.c +18 -20
  36. data/ext/nokogiri/xml_reader.c +347 -212
  37. data/ext/nokogiri/xml_relax_ng.c +86 -77
  38. data/ext/nokogiri/xml_sax_parser.c +149 -124
  39. data/ext/nokogiri/xml_sax_parser_context.c +145 -103
  40. data/ext/nokogiri/xml_sax_push_parser.c +64 -36
  41. data/ext/nokogiri/xml_schema.c +138 -81
  42. data/ext/nokogiri/xml_syntax_error.c +42 -21
  43. data/ext/nokogiri/xml_text.c +36 -26
  44. data/ext/nokogiri/xml_xpath_context.c +366 -178
  45. data/ext/nokogiri/xslt_stylesheet.c +335 -189
  46. data/gumbo-parser/CHANGES.md +63 -0
  47. data/gumbo-parser/Makefile +111 -0
  48. data/gumbo-parser/THANKS +27 -0
  49. data/gumbo-parser/src/Makefile +34 -0
  50. data/gumbo-parser/src/README.md +41 -0
  51. data/gumbo-parser/src/ascii.c +75 -0
  52. data/gumbo-parser/src/ascii.h +115 -0
  53. data/gumbo-parser/src/attribute.c +42 -0
  54. data/gumbo-parser/src/attribute.h +17 -0
  55. data/gumbo-parser/src/char_ref.c +22225 -0
  56. data/gumbo-parser/src/char_ref.h +29 -0
  57. data/gumbo-parser/src/char_ref.rl +2154 -0
  58. data/gumbo-parser/src/error.c +630 -0
  59. data/gumbo-parser/src/error.h +148 -0
  60. data/gumbo-parser/src/foreign_attrs.c +103 -0
  61. data/gumbo-parser/src/foreign_attrs.gperf +27 -0
  62. data/gumbo-parser/src/insertion_mode.h +33 -0
  63. data/gumbo-parser/src/macros.h +91 -0
  64. data/gumbo-parser/src/nokogiri_gumbo.h +944 -0
  65. data/gumbo-parser/src/parser.c +4891 -0
  66. data/gumbo-parser/src/parser.h +41 -0
  67. data/gumbo-parser/src/replacement.h +33 -0
  68. data/gumbo-parser/src/string_buffer.c +103 -0
  69. data/gumbo-parser/src/string_buffer.h +68 -0
  70. data/gumbo-parser/src/string_piece.c +48 -0
  71. data/gumbo-parser/src/svg_attrs.c +174 -0
  72. data/gumbo-parser/src/svg_attrs.gperf +77 -0
  73. data/gumbo-parser/src/svg_tags.c +137 -0
  74. data/gumbo-parser/src/svg_tags.gperf +55 -0
  75. data/gumbo-parser/src/tag.c +223 -0
  76. data/gumbo-parser/src/tag_lookup.c +382 -0
  77. data/gumbo-parser/src/tag_lookup.gperf +170 -0
  78. data/gumbo-parser/src/tag_lookup.h +13 -0
  79. data/gumbo-parser/src/token_buffer.c +79 -0
  80. data/gumbo-parser/src/token_buffer.h +71 -0
  81. data/gumbo-parser/src/token_type.h +17 -0
  82. data/gumbo-parser/src/tokenizer.c +3463 -0
  83. data/gumbo-parser/src/tokenizer.h +112 -0
  84. data/gumbo-parser/src/tokenizer_states.h +339 -0
  85. data/gumbo-parser/src/utf8.c +245 -0
  86. data/gumbo-parser/src/utf8.h +164 -0
  87. data/gumbo-parser/src/util.c +66 -0
  88. data/gumbo-parser/src/util.h +34 -0
  89. data/gumbo-parser/src/vector.c +111 -0
  90. data/gumbo-parser/src/vector.h +45 -0
  91. data/lib/nokogiri/class_resolver.rb +67 -0
  92. data/lib/nokogiri/css/node.rb +10 -8
  93. data/lib/nokogiri/css/parser.rb +397 -377
  94. data/lib/nokogiri/css/parser.y +250 -245
  95. data/lib/nokogiri/css/parser_extras.rb +54 -49
  96. data/lib/nokogiri/css/syntax_error.rb +3 -1
  97. data/lib/nokogiri/css/tokenizer.rb +107 -104
  98. data/lib/nokogiri/css/tokenizer.rex +3 -2
  99. data/lib/nokogiri/css/xpath_visitor.rb +224 -95
  100. data/lib/nokogiri/css.rb +56 -17
  101. data/lib/nokogiri/decorators/slop.rb +9 -7
  102. data/lib/nokogiri/encoding_handler.rb +57 -0
  103. data/lib/nokogiri/extension.rb +32 -0
  104. data/lib/nokogiri/gumbo.rb +15 -0
  105. data/lib/nokogiri/html.rb +38 -27
  106. data/lib/nokogiri/{html → html4}/builder.rb +4 -2
  107. data/lib/nokogiri/html4/document.rb +214 -0
  108. data/lib/nokogiri/html4/document_fragment.rb +54 -0
  109. data/lib/nokogiri/{html → html4}/element_description.rb +3 -1
  110. data/lib/nokogiri/html4/element_description_defaults.rb +2040 -0
  111. data/lib/nokogiri/html4/encoding_reader.rb +121 -0
  112. data/lib/nokogiri/{html → html4}/entity_lookup.rb +4 -2
  113. data/lib/nokogiri/{html → html4}/sax/parser.rb +17 -16
  114. data/lib/nokogiri/html4/sax/parser_context.rb +20 -0
  115. data/lib/nokogiri/{html → html4}/sax/push_parser.rb +12 -11
  116. data/lib/nokogiri/html4.rb +47 -0
  117. data/lib/nokogiri/html5/document.rb +168 -0
  118. data/lib/nokogiri/html5/document_fragment.rb +90 -0
  119. data/lib/nokogiri/html5/node.rb +103 -0
  120. data/lib/nokogiri/html5.rb +392 -0
  121. data/lib/nokogiri/jruby/dependencies.rb +3 -0
  122. data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
  123. data/lib/nokogiri/syntax_error.rb +2 -0
  124. data/lib/nokogiri/version/constant.rb +6 -0
  125. data/lib/nokogiri/version/info.rb +223 -0
  126. data/lib/nokogiri/version.rb +3 -108
  127. data/lib/nokogiri/xml/attr.rb +55 -3
  128. data/lib/nokogiri/xml/attribute_decl.rb +6 -2
  129. data/lib/nokogiri/xml/builder.rb +98 -54
  130. data/lib/nokogiri/xml/cdata.rb +3 -1
  131. data/lib/nokogiri/xml/character_data.rb +2 -0
  132. data/lib/nokogiri/xml/document.rb +312 -126
  133. data/lib/nokogiri/xml/document_fragment.rb +104 -48
  134. data/lib/nokogiri/xml/dtd.rb +4 -2
  135. data/lib/nokogiri/xml/element_content.rb +12 -2
  136. data/lib/nokogiri/xml/element_decl.rb +6 -2
  137. data/lib/nokogiri/xml/entity_decl.rb +7 -3
  138. data/lib/nokogiri/xml/entity_reference.rb +2 -0
  139. data/lib/nokogiri/xml/namespace.rb +45 -0
  140. data/lib/nokogiri/xml/node/save_options.rb +23 -8
  141. data/lib/nokogiri/xml/node.rb +1093 -411
  142. data/lib/nokogiri/xml/node_set.rb +173 -67
  143. data/lib/nokogiri/xml/notation.rb +13 -0
  144. data/lib/nokogiri/xml/parse_options.rb +145 -52
  145. data/lib/nokogiri/xml/pp/character_data.rb +9 -6
  146. data/lib/nokogiri/xml/pp/node.rb +42 -30
  147. data/lib/nokogiri/xml/pp.rb +4 -2
  148. data/lib/nokogiri/xml/processing_instruction.rb +4 -1
  149. data/lib/nokogiri/xml/reader.rb +21 -28
  150. data/lib/nokogiri/xml/relax_ng.rb +8 -2
  151. data/lib/nokogiri/xml/sax/document.rb +45 -49
  152. data/lib/nokogiri/xml/sax/parser.rb +39 -36
  153. data/lib/nokogiri/xml/sax/parser_context.rb +8 -3
  154. data/lib/nokogiri/xml/sax/push_parser.rb +6 -5
  155. data/lib/nokogiri/xml/sax.rb +6 -4
  156. data/lib/nokogiri/xml/schema.rb +19 -9
  157. data/lib/nokogiri/xml/searchable.rb +120 -72
  158. data/lib/nokogiri/xml/syntax_error.rb +6 -4
  159. data/lib/nokogiri/xml/text.rb +2 -0
  160. data/lib/nokogiri/xml/xpath/syntax_error.rb +4 -2
  161. data/lib/nokogiri/xml/xpath.rb +15 -4
  162. data/lib/nokogiri/xml/xpath_context.rb +3 -3
  163. data/lib/nokogiri/xml.rb +38 -37
  164. data/lib/nokogiri/xslt/stylesheet.rb +3 -1
  165. data/lib/nokogiri/xslt.rb +101 -22
  166. data/lib/nokogiri.rb +59 -75
  167. data/lib/xsd/xmlparser/nokogiri.rb +29 -25
  168. data/patches/libxml2/0001-Remove-script-macro-support.patch +40 -0
  169. data/patches/libxml2/0002-Update-entities-to-remove-handling-of-ssi.patch +44 -0
  170. data/patches/libxml2/0003-libxml2.la-is-in-top_builddir.patch +25 -0
  171. data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
  172. data/patches/libxml2/0010-update-config.guess-and-config.sub-for-libxml2.patch +224 -0
  173. data/patches/libxml2/0011-rip-out-libxml2-s-libc_single_threaded-support.patch +30 -0
  174. data/patches/libxslt/0001-update-config.guess-and-config.sub-for-libxslt.patch +224 -0
  175. data/ports/archives/libxml2-2.11.4.tar.xz +0 -0
  176. data/ports/archives/libxslt-1.1.38.tar.xz +0 -0
  177. metadata +126 -399
  178. data/.autotest +0 -22
  179. data/.cross_rubies +0 -8
  180. data/.editorconfig +0 -17
  181. data/.gemtest +0 -0
  182. data/.travis.yml +0 -63
  183. data/CHANGELOG.md +0 -1368
  184. data/CONTRIBUTING.md +0 -42
  185. data/C_CODING_STYLE.rdoc +0 -33
  186. data/Gemfile-libxml-ruby +0 -3
  187. data/Manifest.txt +0 -370
  188. data/ROADMAP.md +0 -111
  189. data/Rakefile +0 -348
  190. data/SECURITY.md +0 -19
  191. data/STANDARD_RESPONSES.md +0 -47
  192. data/Y_U_NO_GEMSPEC.md +0 -155
  193. data/appveyor.yml +0 -29
  194. data/build_all +0 -44
  195. data/ext/nokogiri/html_document.c +0 -170
  196. data/ext/nokogiri/html_document.h +0 -10
  197. data/ext/nokogiri/html_element_description.c +0 -279
  198. data/ext/nokogiri/html_element_description.h +0 -10
  199. data/ext/nokogiri/html_entity_lookup.c +0 -32
  200. data/ext/nokogiri/html_entity_lookup.h +0 -8
  201. data/ext/nokogiri/html_sax_parser_context.c +0 -116
  202. data/ext/nokogiri/html_sax_parser_context.h +0 -11
  203. data/ext/nokogiri/html_sax_push_parser.c +0 -87
  204. data/ext/nokogiri/html_sax_push_parser.h +0 -9
  205. data/ext/nokogiri/xml_attr.h +0 -9
  206. data/ext/nokogiri/xml_attribute_decl.h +0 -9
  207. data/ext/nokogiri/xml_cdata.h +0 -9
  208. data/ext/nokogiri/xml_comment.h +0 -9
  209. data/ext/nokogiri/xml_document.h +0 -23
  210. data/ext/nokogiri/xml_document_fragment.h +0 -10
  211. data/ext/nokogiri/xml_dtd.h +0 -10
  212. data/ext/nokogiri/xml_element_content.h +0 -10
  213. data/ext/nokogiri/xml_element_decl.h +0 -9
  214. data/ext/nokogiri/xml_encoding_handler.h +0 -8
  215. data/ext/nokogiri/xml_entity_decl.h +0 -10
  216. data/ext/nokogiri/xml_entity_reference.h +0 -9
  217. data/ext/nokogiri/xml_io.c +0 -61
  218. data/ext/nokogiri/xml_io.h +0 -11
  219. data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
  220. data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
  221. data/ext/nokogiri/xml_namespace.h +0 -15
  222. data/ext/nokogiri/xml_node.h +0 -13
  223. data/ext/nokogiri/xml_node_set.h +0 -12
  224. data/ext/nokogiri/xml_processing_instruction.h +0 -9
  225. data/ext/nokogiri/xml_reader.h +0 -10
  226. data/ext/nokogiri/xml_relax_ng.h +0 -9
  227. data/ext/nokogiri/xml_sax_parser.h +0 -39
  228. data/ext/nokogiri/xml_sax_parser_context.h +0 -10
  229. data/ext/nokogiri/xml_sax_push_parser.h +0 -9
  230. data/ext/nokogiri/xml_schema.h +0 -9
  231. data/ext/nokogiri/xml_syntax_error.h +0 -13
  232. data/ext/nokogiri/xml_text.h +0 -9
  233. data/ext/nokogiri/xml_xpath_context.h +0 -10
  234. data/ext/nokogiri/xslt_stylesheet.h +0 -14
  235. data/lib/nokogiri/html/document.rb +0 -335
  236. data/lib/nokogiri/html/document_fragment.rb +0 -49
  237. data/lib/nokogiri/html/element_description_defaults.rb +0 -671
  238. data/lib/nokogiri/html/sax/parser_context.rb +0 -16
  239. data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +0 -78
  240. data/patches/libxml2/0002-Fix-nullptr-deref-with-XPath-logic-ops.patch +0 -54
  241. data/patches/libxml2/0003-Fix-infinite-loop-in-LZMA-decompression.patch +0 -50
  242. data/patches/sort-patches-by-date +0 -25
  243. data/ports/archives/libxml2-2.9.8.tar.gz +0 -0
  244. data/ports/archives/libxslt-1.1.32.tar.gz +0 -0
  245. data/suppressions/README.txt +0 -1
  246. data/suppressions/nokogiri_ruby-2.supp +0 -10
  247. data/tasks/test.rb +0 -100
  248. data/test/css/test_nthiness.rb +0 -226
  249. data/test/css/test_parser.rb +0 -386
  250. data/test/css/test_tokenizer.rb +0 -215
  251. data/test/css/test_xpath_visitor.rb +0 -96
  252. data/test/decorators/test_slop.rb +0 -23
  253. data/test/files/2ch.html +0 -108
  254. data/test/files/GH_1042.html +0 -18
  255. data/test/files/address_book.rlx +0 -12
  256. data/test/files/address_book.xml +0 -10
  257. data/test/files/atom.xml +0 -344
  258. data/test/files/bar/bar.xsd +0 -4
  259. data/test/files/bogus.xml +0 -0
  260. data/test/files/dont_hurt_em_why.xml +0 -422
  261. data/test/files/encoding.html +0 -82
  262. data/test/files/encoding.xhtml +0 -84
  263. data/test/files/exslt.xml +0 -8
  264. data/test/files/exslt.xslt +0 -35
  265. data/test/files/foo/foo.xsd +0 -4
  266. data/test/files/metacharset.html +0 -10
  267. data/test/files/namespace_pressure_test.xml +0 -1684
  268. data/test/files/noencoding.html +0 -47
  269. data/test/files/po.xml +0 -32
  270. data/test/files/po.xsd +0 -66
  271. data/test/files/saml/saml20assertion_schema.xsd +0 -283
  272. data/test/files/saml/saml20protocol_schema.xsd +0 -302
  273. data/test/files/saml/xenc_schema.xsd +0 -146
  274. data/test/files/saml/xmldsig_schema.xsd +0 -318
  275. data/test/files/shift_jis.html +0 -10
  276. data/test/files/shift_jis.xml +0 -5
  277. data/test/files/shift_jis_no_charset.html +0 -9
  278. data/test/files/slow-xpath.xml +0 -25509
  279. data/test/files/snuggles.xml +0 -3
  280. data/test/files/staff.dtd +0 -10
  281. data/test/files/staff.xml +0 -59
  282. data/test/files/staff.xslt +0 -32
  283. data/test/files/test_document_url/bar.xml +0 -2
  284. data/test/files/test_document_url/document.dtd +0 -4
  285. data/test/files/test_document_url/document.xml +0 -6
  286. data/test/files/tlm.html +0 -851
  287. data/test/files/to_be_xincluded.xml +0 -2
  288. data/test/files/valid_bar.xml +0 -2
  289. data/test/files/xinclude.xml +0 -4
  290. data/test/helper.rb +0 -271
  291. data/test/html/sax/test_parser.rb +0 -168
  292. data/test/html/sax/test_parser_context.rb +0 -46
  293. data/test/html/sax/test_parser_text.rb +0 -163
  294. data/test/html/sax/test_push_parser.rb +0 -87
  295. data/test/html/test_attributes.rb +0 -85
  296. data/test/html/test_builder.rb +0 -164
  297. data/test/html/test_document.rb +0 -712
  298. data/test/html/test_document_encoding.rb +0 -143
  299. data/test/html/test_document_fragment.rb +0 -310
  300. data/test/html/test_element_description.rb +0 -105
  301. data/test/html/test_named_characters.rb +0 -14
  302. data/test/html/test_node.rb +0 -212
  303. data/test/html/test_node_encoding.rb +0 -91
  304. data/test/namespaces/test_additional_namespaces_in_builder_doc.rb +0 -14
  305. data/test/namespaces/test_namespaces_aliased_default.rb +0 -24
  306. data/test/namespaces/test_namespaces_in_builder_doc.rb +0 -75
  307. data/test/namespaces/test_namespaces_in_cloned_doc.rb +0 -31
  308. data/test/namespaces/test_namespaces_in_created_doc.rb +0 -75
  309. data/test/namespaces/test_namespaces_in_parsed_doc.rb +0 -80
  310. data/test/namespaces/test_namespaces_preservation.rb +0 -31
  311. data/test/test_convert_xpath.rb +0 -135
  312. data/test/test_css_cache.rb +0 -47
  313. data/test/test_encoding_handler.rb +0 -48
  314. data/test/test_memory_leak.rb +0 -156
  315. data/test/test_nokogiri.rb +0 -138
  316. data/test/test_soap4r_sax.rb +0 -52
  317. data/test/test_xslt_transforms.rb +0 -314
  318. data/test/xml/node/test_save_options.rb +0 -28
  319. data/test/xml/node/test_subclass.rb +0 -44
  320. data/test/xml/sax/test_parser.rb +0 -402
  321. data/test/xml/sax/test_parser_context.rb +0 -115
  322. data/test/xml/sax/test_parser_text.rb +0 -202
  323. data/test/xml/sax/test_push_parser.rb +0 -265
  324. data/test/xml/test_attr.rb +0 -74
  325. data/test/xml/test_attribute_decl.rb +0 -86
  326. data/test/xml/test_builder.rb +0 -341
  327. data/test/xml/test_c14n.rb +0 -180
  328. data/test/xml/test_cdata.rb +0 -54
  329. data/test/xml/test_comment.rb +0 -40
  330. data/test/xml/test_document.rb +0 -982
  331. data/test/xml/test_document_encoding.rb +0 -31
  332. data/test/xml/test_document_fragment.rb +0 -298
  333. data/test/xml/test_dtd.rb +0 -187
  334. data/test/xml/test_dtd_encoding.rb +0 -31
  335. data/test/xml/test_element_content.rb +0 -56
  336. data/test/xml/test_element_decl.rb +0 -73
  337. data/test/xml/test_entity_decl.rb +0 -122
  338. data/test/xml/test_entity_reference.rb +0 -262
  339. data/test/xml/test_namespace.rb +0 -96
  340. data/test/xml/test_node.rb +0 -1325
  341. data/test/xml/test_node_attributes.rb +0 -115
  342. data/test/xml/test_node_encoding.rb +0 -75
  343. data/test/xml/test_node_inheritance.rb +0 -32
  344. data/test/xml/test_node_reparenting.rb +0 -592
  345. data/test/xml/test_node_set.rb +0 -809
  346. data/test/xml/test_parse_options.rb +0 -64
  347. data/test/xml/test_processing_instruction.rb +0 -30
  348. data/test/xml/test_reader.rb +0 -620
  349. data/test/xml/test_reader_encoding.rb +0 -134
  350. data/test/xml/test_relax_ng.rb +0 -60
  351. data/test/xml/test_schema.rb +0 -142
  352. data/test/xml/test_syntax_error.rb +0 -36
  353. data/test/xml/test_text.rb +0 -60
  354. data/test/xml/test_unparented_node.rb +0 -483
  355. data/test/xml/test_xinclude.rb +0 -83
  356. data/test/xml/test_xpath.rb +0 -470
  357. data/test/xslt/test_custom_functions.rb +0 -133
  358. data/test/xslt/test_exception_handling.rb +0 -37
@@ -1,402 +0,0 @@
1
- # -*- coding: utf-8 -*-
2
-
3
- require "helper"
4
-
5
- module Nokogiri
6
- module XML
7
- module SAX
8
- class TestParser < Nokogiri::SAX::TestCase
9
- def setup
10
- super
11
- @parser = XML::SAX::Parser.new(Doc.new)
12
- end
13
-
14
- def test_parser_context_yielded_io
15
- doc = Doc.new
16
- parser = XML::SAX::Parser.new doc
17
- xml = "<foo a='&amp;b'/>"
18
-
19
- block_called = false
20
- parser.parse(StringIO.new(xml)) { |ctx|
21
- block_called = true
22
- ctx.replace_entities = true
23
- }
24
-
25
- assert block_called
26
-
27
- assert_equal [['foo', [['a', '&b']]]], doc.start_elements
28
- end
29
-
30
- def test_parser_context_yielded_in_memory
31
- doc = Doc.new
32
- parser = XML::SAX::Parser.new doc
33
- xml = "<foo a='&amp;b'/>"
34
-
35
- block_called = false
36
- parser.parse(xml) { |ctx|
37
- block_called = true
38
- ctx.replace_entities = true
39
- }
40
-
41
- assert block_called
42
-
43
- assert_equal [['foo', [['a', '&b']]]], doc.start_elements
44
- end
45
-
46
- def test_empty_decl
47
- parser = XML::SAX::Parser.new(Doc.new)
48
-
49
- xml = "<root />"
50
- parser.parse xml
51
- assert parser.document.start_document_called, xml
52
- assert_nil parser.document.xmldecls, xml
53
- end
54
-
55
- def test_xml_decl
56
- [
57
- ['<?xml version="1.0" ?>',
58
- ['1.0']],
59
- ['<?xml version="1.0" encoding="UTF-8" ?>',
60
- ['1.0', 'UTF-8']],
61
- ['<?xml version="1.0" standalone="yes"?>',
62
- ['1.0', 'yes']],
63
- ['<?xml version="1.0" standalone="no"?>',
64
- ['1.0', 'no']],
65
- ['<?xml version="1.0" encoding="UTF-8" standalone="no"?>',
66
- ['1.0', "UTF-8", 'no']],
67
- ['<?xml version="1.0" encoding="ISO-8859-1" standalone="yes"?>',
68
- ['1.0', "ISO-8859-1", 'yes']]
69
- ].each do |decl, value|
70
- parser = XML::SAX::Parser.new(Doc.new)
71
-
72
- xml = "#{decl}\n<root />"
73
- parser.parse xml
74
- assert parser.document.start_document_called, xml
75
- assert_equal value, parser.document.xmldecls, xml
76
- end
77
- end
78
-
79
- def test_parse_empty
80
- assert_raises RuntimeError do
81
- @parser.parse('')
82
- end
83
- end
84
-
85
- def test_namespace_declaration_order_is_saved
86
- @parser.parse <<-eoxml
87
- <root xmlns:foo='http://foo.example.com/' xmlns='http://example.com/'>
88
- <a foo:bar='hello' />
89
- </root>
90
- eoxml
91
- assert_equal 2, @parser.document.start_elements_namespace.length
92
- el = @parser.document.start_elements_namespace.first
93
- namespaces = el.last
94
- assert_equal ['foo', 'http://foo.example.com/'], namespaces.first
95
- assert_equal [nil, 'http://example.com/'], namespaces.last
96
- end
97
-
98
- def test_bad_document_calls_error_handler
99
- @parser.parse('<foo><bar></foo>')
100
- assert @parser.document.errors
101
- assert @parser.document.errors.length > 0
102
- end
103
-
104
- def test_namespace_are_super_fun_to_parse
105
- @parser.parse <<-eoxml
106
- <root xmlns:foo='http://foo.example.com/'>
107
- <a foo:bar='hello' />
108
- <b xmlns:foo='http://bar.example.com/'>
109
- <a foo:bar='hello' />
110
- </b>
111
- <foo:bar>hello world</foo:bar>
112
- </root>
113
- eoxml
114
-
115
- assert @parser.document.start_elements_namespace.length > 0
116
- el = @parser.document.start_elements_namespace[1]
117
- assert_equal 'a', el.first
118
- assert_equal 1, el[1].length
119
-
120
- attribute = el[1].first
121
- assert_equal 'bar', attribute.localname
122
- assert_equal 'foo', attribute.prefix
123
- assert_equal 'hello', attribute.value
124
- assert_equal 'http://foo.example.com/', attribute.uri
125
- end
126
-
127
- def test_sax_v1_namespace_attribute_declarations
128
- @parser.parse <<-eoxml
129
- <root xmlns:foo='http://foo.example.com/' xmlns='http://example.com/'>
130
- <a foo:bar='hello' />
131
- <b xmlns:foo='http://bar.example.com/'>
132
- <a foo:bar='hello' />
133
- </b>
134
- <foo:bar>hello world</foo:bar>
135
- </root>
136
- eoxml
137
- assert @parser.document.start_elements.length > 0
138
- elm = @parser.document.start_elements.first
139
- assert_equal 'root', elm.first
140
- assert elm[1].include?(['xmlns:foo', 'http://foo.example.com/'])
141
- assert elm[1].include?(['xmlns', 'http://example.com/'])
142
- end
143
-
144
- def test_sax_v1_namespace_nodes
145
- @parser.parse <<-eoxml
146
- <root xmlns:foo='http://foo.example.com/' xmlns='http://example.com/'>
147
- <a foo:bar='hello' />
148
- <b xmlns:foo='http://bar.example.com/'>
149
- <a foo:bar='hello' />
150
- </b>
151
- <foo:bar>hello world</foo:bar>
152
- </root>
153
- eoxml
154
- assert_equal 5, @parser.document.start_elements.length
155
- assert @parser.document.start_elements.map(&:first).include?('foo:bar')
156
- assert @parser.document.end_elements.map(&:first).include?('foo:bar')
157
- end
158
-
159
- def test_start_is_called_without_namespace
160
- @parser.parse(<<-eoxml)
161
- <root xmlns:foo='http://foo.example.com/' xmlns='http://example.com/'>
162
- <foo:f><bar></foo:f>
163
- </root>
164
- eoxml
165
- assert_equal ['root', 'foo:f', 'bar'],
166
- @parser.document.start_elements.map(&:first)
167
- end
168
-
169
- def test_parser_sets_encoding
170
- parser = XML::SAX::Parser.new(Doc.new, 'UTF-8')
171
- assert_equal 'UTF-8', parser.encoding
172
- end
173
-
174
- def test_errors_set_after_parsing_bad_dom
175
- doc = Nokogiri::XML('<foo><bar></foo>')
176
- assert doc.errors
177
-
178
- @parser.parse('<foo><bar></foo>')
179
- assert @parser.document.errors
180
- assert @parser.document.errors.length > 0
181
-
182
- doc.errors.each do |error|
183
- assert_equal 'UTF-8', error.message.encoding.name
184
- end
185
-
186
- # when using JRuby Nokogiri, more errors will be generated as the DOM
187
- # parser continue to parse an ill formed document, while the sax parser
188
- # will stop at the first error
189
- unless Nokogiri.jruby?
190
- assert_equal doc.errors.length, @parser.document.errors.length
191
- end
192
- end
193
-
194
- def test_parse_with_memory_argument
195
- @parser.parse(File.read(XML_FILE))
196
- assert(@parser.document.cdata_blocks.length > 0)
197
- end
198
-
199
- def test_parse_with_io_argument
200
- File.open(XML_FILE, 'rb') { |f|
201
- @parser.parse(f)
202
- }
203
- assert(@parser.document.cdata_blocks.length > 0)
204
- end
205
-
206
- def test_parse_io
207
- call_parse_io_with_encoding 'UTF-8'
208
- end
209
-
210
- # issue #828
211
- def test_parse_io_lower_case_encoding
212
- call_parse_io_with_encoding 'utf-8'
213
- end
214
-
215
- def call_parse_io_with_encoding encoding
216
- File.open(XML_FILE, 'rb') { |f|
217
- @parser.parse_io(f, encoding)
218
- }
219
- assert(@parser.document.cdata_blocks.length > 0)
220
-
221
- called = false
222
- @parser.document.start_elements.flatten.each do |thing|
223
- assert_equal 'UTF-8', thing.encoding.name
224
- called = true
225
- end
226
- assert called
227
-
228
- called = false
229
- @parser.document.end_elements.flatten.each do |thing|
230
- assert_equal 'UTF-8', thing.encoding.name
231
- called = true
232
- end
233
- assert called
234
-
235
- called = false
236
- @parser.document.data.each do |thing|
237
- assert_equal 'UTF-8', thing.encoding.name
238
- called = true
239
- end
240
- assert called
241
-
242
- called = false
243
- @parser.document.comments.flatten.each do |thing|
244
- assert_equal 'UTF-8', thing.encoding.name
245
- called = true
246
- end
247
- assert called
248
-
249
- called = false
250
- @parser.document.cdata_blocks.flatten.each do |thing|
251
- assert_equal 'UTF-8', thing.encoding.name
252
- called = true
253
- end
254
- assert called
255
- end
256
-
257
- def test_parse_file
258
- @parser.parse_file(XML_FILE)
259
-
260
- assert_raises(ArgumentError) {
261
- @parser.parse_file(nil)
262
- }
263
-
264
- assert_raises(Errno::ENOENT) {
265
- @parser.parse_file('')
266
- }
267
- assert_raises(Errno::EISDIR) {
268
- @parser.parse_file(File.expand_path(File.dirname(__FILE__)))
269
- }
270
- end
271
-
272
- def test_render_parse_nil_param
273
- assert_raises(ArgumentError) { @parser.parse_memory(nil) }
274
- end
275
-
276
- def test_bad_encoding_args
277
- assert_raises(ArgumentError) { XML::SAX::Parser.new(Doc.new, 'not an encoding') }
278
- assert_raises(ArgumentError) { @parser.parse_io(StringIO.new('<root/>'), 'not an encoding')}
279
- end
280
-
281
- def test_ctag
282
- @parser.parse_memory(<<-eoxml)
283
- <p id="asdfasdf">
284
- <![CDATA[ This is a comment ]]>
285
- Paragraph 1
286
- </p>
287
- eoxml
288
- assert_equal [' This is a comment '], @parser.document.cdata_blocks
289
- end
290
-
291
- def test_comment
292
- @parser.parse_memory(<<-eoxml)
293
- <p id="asdfasdf">
294
- <!-- This is a comment -->
295
- Paragraph 1
296
- </p>
297
- eoxml
298
- assert_equal [' This is a comment '], @parser.document.comments
299
- end
300
-
301
- def test_characters
302
- @parser.parse_memory(<<-eoxml)
303
- <p id="asdfasdf">Paragraph 1</p>
304
- eoxml
305
- assert_equal ['Paragraph 1'], @parser.document.data
306
- end
307
-
308
- def test_end_document
309
- @parser.parse_memory(<<-eoxml)
310
- <p id="asdfasdf">Paragraph 1</p>
311
- eoxml
312
- assert @parser.document.end_document_called
313
- end
314
-
315
- def test_end_element
316
- @parser.parse_memory(<<-eoxml)
317
- <p id="asdfasdf">Paragraph 1</p>
318
- eoxml
319
- assert_equal [["p"]],
320
- @parser.document.end_elements
321
- end
322
-
323
- def test_start_element_attrs
324
- @parser.parse_memory(<<-eoxml)
325
- <p id="asdfasdf">Paragraph 1</p>
326
- eoxml
327
- assert_equal [["p", [["id", "asdfasdf"]]]],
328
- @parser.document.start_elements
329
- end
330
-
331
- def test_start_element_attrs_include_namespaces
332
- @parser.parse_memory(<<-eoxml)
333
- <p xmlns:foo='http://foo.example.com/'>Paragraph 1</p>
334
- eoxml
335
- assert_equal [["p", [['xmlns:foo', 'http://foo.example.com/']]]],
336
- @parser.document.start_elements
337
- end
338
-
339
- def test_processing_instruction
340
- @parser.parse_memory(<<-eoxml)
341
- <?xml-stylesheet href="a.xsl" type="text/xsl"?>
342
- <?xml version="1.0"?>
343
- eoxml
344
- assert_equal [['xml-stylesheet', 'href="a.xsl" type="text/xsl"']],
345
- @parser.document.processing_instructions
346
- end
347
-
348
- if Nokogiri.uses_libxml? # JRuby SAXParser only parses well-formed XML documents
349
- def test_parse_document
350
- @parser.parse_memory(<<-eoxml)
351
- <p>Paragraph 1</p>
352
- <p>Paragraph 2</p>
353
- eoxml
354
- end
355
- end
356
-
357
- def test_parser_attributes
358
- xml = <<-eoxml
359
- <?xml version="1.0" ?><root><foo a="&amp;b" c="&gt;d" /></root>
360
- eoxml
361
-
362
- block_called = false
363
- @parser.parse(xml) { |ctx|
364
- block_called = true
365
- ctx.replace_entities = true
366
- }
367
-
368
- assert block_called
369
-
370
- assert_equal [['root', []], ['foo', [['a', '&b'], ['c', '>d']]]], @parser.document.start_elements
371
- end
372
-
373
- def test_recovery_from_incorrect_xml
374
- xml = <<-eoxml
375
- <?xml version="1.0" ?><Root><Data><?xml version='1.0'?><Item>hey</Item></Data><Data><Item>hey yourself</Item></Data></Root>
376
- eoxml
377
-
378
- block_called = false
379
- @parser.parse(xml) { |ctx|
380
- block_called = true
381
- ctx.recovery = true
382
- }
383
-
384
- assert block_called
385
-
386
- assert_equal [['Root', []], ['Data', []], ['Item', []], ['Data', []], ['Item', []]], @parser.document.start_elements
387
- end
388
-
389
- def test_square_bracket_in_text # issue 1261
390
- xml = <<-eoxml
391
- <tu tuid="87dea04cf60af103ff09d1dba36ae820" segtype="block">
392
- <prop type="x-smartling-string-variant">en:#:home_page:#:stories:#:[6]:#:name</prop>
393
- <tuv xml:lang="en-US"><seg>Sandy S.</seg></tuv>
394
- </tu>
395
- eoxml
396
- @parser.parse(xml)
397
- assert @parser.document.data.must_include "en:#:home_page:#:stories:#:[6]:#:name"
398
- end
399
- end
400
- end
401
- end
402
- end
@@ -1,115 +0,0 @@
1
- # -*- coding: utf-8 -*-
2
-
3
- require "helper"
4
-
5
- module Nokogiri
6
- module XML
7
- module SAX
8
- class TestParserContext < Nokogiri::SAX::TestCase
9
- def setup
10
- @xml = '<hello>
11
-
12
- world
13
- <inter>
14
- <net>
15
- </net>
16
- </inter>
17
-
18
- </hello>'
19
- end
20
-
21
- class Counter < Nokogiri::XML::SAX::Document
22
- attr_accessor :context, :lines, :columns
23
- def initialize
24
- @context = nil
25
- @lines = []
26
- @columns = []
27
- end
28
-
29
- def start_element name, attrs = []
30
- @lines << [name, context.line]
31
- @columns << [name, context.column]
32
- end
33
- end
34
-
35
- def test_line_numbers
36
- sax_handler = Counter.new
37
-
38
- parser = Nokogiri::XML::SAX::Parser.new(sax_handler)
39
- parser.parse(@xml) do |ctx|
40
- sax_handler.context = ctx
41
- end
42
-
43
- assert_equal [["hello", 1], ["inter", 4], ["net", 5]],
44
- sax_handler.lines
45
- end
46
-
47
- def test_column_numbers
48
- sax_handler = Counter.new
49
-
50
- parser = Nokogiri::XML::SAX::Parser.new(sax_handler)
51
- parser.parse(@xml) do |ctx|
52
- sax_handler.context = ctx
53
- end
54
-
55
- assert_equal [["hello", 7], ["inter", 7], ["net", 9]],
56
- sax_handler.columns
57
- end
58
-
59
- def test_replace_entities
60
- pc = ParserContext.new StringIO.new('<root />'), 'UTF-8'
61
- pc.replace_entities = false
62
- assert_equal false, pc.replace_entities
63
-
64
- pc.replace_entities = true
65
- assert_equal true, pc.replace_entities
66
- end
67
-
68
- def test_recovery
69
- pc = ParserContext.new StringIO.new('<root />'), 'UTF-8'
70
- pc.recovery = false
71
- assert_equal false, pc.recovery
72
-
73
- pc.recovery = true
74
- assert_equal true, pc.recovery
75
- end
76
-
77
- def test_from_io
78
- ctx = ParserContext.new StringIO.new('fo'), 'UTF-8'
79
- assert ctx
80
- end
81
-
82
- def test_from_string
83
- assert ParserContext.new 'blah blah'
84
- end
85
-
86
- def test_parse_with
87
- ctx = ParserContext.new 'blah'
88
- assert_raises ArgumentError do
89
- ctx.parse_with nil
90
- end
91
- end
92
-
93
- def test_parse_with_sax_parser
94
- xml = "<root />"
95
- ctx = ParserContext.new xml
96
- parser = Parser.new Doc.new
97
- assert_nil ctx.parse_with parser
98
- end
99
-
100
- def test_from_file
101
- ctx = ParserContext.file XML_FILE
102
- parser = Parser.new Doc.new
103
- assert_nil ctx.parse_with parser
104
- end
105
-
106
- def test_parse_with_returns_nil
107
- xml = "<root />"
108
- ctx = ParserContext.new xml
109
- parser = Parser.new Doc.new
110
- assert_nil ctx.parse_with(parser)
111
- end
112
- end
113
- end
114
- end
115
- end