nokogiri 1.8.5 → 1.13.9

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (353) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +3 -21
  3. data/LICENSE-DEPENDENCIES.md +1159 -868
  4. data/LICENSE.md +5 -28
  5. data/README.md +196 -90
  6. data/bin/nokogiri +63 -50
  7. data/dependencies.yml +13 -59
  8. data/ext/nokogiri/depend +38 -358
  9. data/ext/nokogiri/extconf.rb +765 -420
  10. data/ext/nokogiri/gumbo.c +584 -0
  11. data/ext/nokogiri/html4_document.c +166 -0
  12. data/ext/nokogiri/html4_element_description.c +294 -0
  13. data/ext/nokogiri/html4_entity_lookup.c +37 -0
  14. data/ext/nokogiri/html4_sax_parser_context.c +119 -0
  15. data/ext/nokogiri/html4_sax_push_parser.c +95 -0
  16. data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
  17. data/ext/nokogiri/nokogiri.c +228 -91
  18. data/ext/nokogiri/nokogiri.h +199 -88
  19. data/ext/nokogiri/test_global_handlers.c +40 -0
  20. data/ext/nokogiri/xml_attr.c +42 -37
  21. data/ext/nokogiri/xml_attribute_decl.c +21 -21
  22. data/ext/nokogiri/xml_cdata.c +14 -19
  23. data/ext/nokogiri/xml_comment.c +19 -26
  24. data/ext/nokogiri/xml_document.c +296 -217
  25. data/ext/nokogiri/xml_document_fragment.c +12 -16
  26. data/ext/nokogiri/xml_dtd.c +64 -58
  27. data/ext/nokogiri/xml_element_content.c +31 -26
  28. data/ext/nokogiri/xml_element_decl.c +25 -25
  29. data/ext/nokogiri/xml_encoding_handler.c +43 -18
  30. data/ext/nokogiri/xml_entity_decl.c +37 -35
  31. data/ext/nokogiri/xml_entity_reference.c +16 -18
  32. data/ext/nokogiri/xml_namespace.c +99 -54
  33. data/ext/nokogiri/xml_node.c +1107 -658
  34. data/ext/nokogiri/xml_node_set.c +178 -166
  35. data/ext/nokogiri/xml_processing_instruction.c +17 -19
  36. data/ext/nokogiri/xml_reader.c +277 -175
  37. data/ext/nokogiri/xml_relax_ng.c +52 -28
  38. data/ext/nokogiri/xml_sax_parser.c +112 -112
  39. data/ext/nokogiri/xml_sax_parser_context.c +112 -86
  40. data/ext/nokogiri/xml_sax_push_parser.c +36 -27
  41. data/ext/nokogiri/xml_schema.c +114 -35
  42. data/ext/nokogiri/xml_syntax_error.c +42 -21
  43. data/ext/nokogiri/xml_text.c +14 -18
  44. data/ext/nokogiri/xml_xpath_context.c +226 -115
  45. data/ext/nokogiri/xslt_stylesheet.c +265 -173
  46. data/gumbo-parser/CHANGES.md +63 -0
  47. data/gumbo-parser/Makefile +101 -0
  48. data/gumbo-parser/THANKS +27 -0
  49. data/gumbo-parser/src/Makefile +34 -0
  50. data/gumbo-parser/src/README.md +41 -0
  51. data/gumbo-parser/src/ascii.c +75 -0
  52. data/gumbo-parser/src/ascii.h +115 -0
  53. data/gumbo-parser/src/attribute.c +42 -0
  54. data/gumbo-parser/src/attribute.h +17 -0
  55. data/gumbo-parser/src/char_ref.c +22225 -0
  56. data/gumbo-parser/src/char_ref.h +29 -0
  57. data/gumbo-parser/src/char_ref.rl +2154 -0
  58. data/gumbo-parser/src/error.c +626 -0
  59. data/gumbo-parser/src/error.h +148 -0
  60. data/gumbo-parser/src/foreign_attrs.c +104 -0
  61. data/gumbo-parser/src/foreign_attrs.gperf +27 -0
  62. data/gumbo-parser/src/gumbo.h +943 -0
  63. data/gumbo-parser/src/insertion_mode.h +33 -0
  64. data/gumbo-parser/src/macros.h +91 -0
  65. data/gumbo-parser/src/parser.c +4875 -0
  66. data/gumbo-parser/src/parser.h +41 -0
  67. data/gumbo-parser/src/replacement.h +33 -0
  68. data/gumbo-parser/src/string_buffer.c +103 -0
  69. data/gumbo-parser/src/string_buffer.h +68 -0
  70. data/gumbo-parser/src/string_piece.c +48 -0
  71. data/gumbo-parser/src/svg_attrs.c +174 -0
  72. data/gumbo-parser/src/svg_attrs.gperf +77 -0
  73. data/gumbo-parser/src/svg_tags.c +137 -0
  74. data/gumbo-parser/src/svg_tags.gperf +55 -0
  75. data/gumbo-parser/src/tag.c +222 -0
  76. data/gumbo-parser/src/tag_lookup.c +382 -0
  77. data/gumbo-parser/src/tag_lookup.gperf +169 -0
  78. data/gumbo-parser/src/tag_lookup.h +13 -0
  79. data/gumbo-parser/src/token_buffer.c +79 -0
  80. data/gumbo-parser/src/token_buffer.h +71 -0
  81. data/gumbo-parser/src/token_type.h +17 -0
  82. data/gumbo-parser/src/tokenizer.c +3463 -0
  83. data/gumbo-parser/src/tokenizer.h +112 -0
  84. data/gumbo-parser/src/tokenizer_states.h +339 -0
  85. data/gumbo-parser/src/utf8.c +245 -0
  86. data/gumbo-parser/src/utf8.h +164 -0
  87. data/gumbo-parser/src/util.c +68 -0
  88. data/gumbo-parser/src/util.h +30 -0
  89. data/gumbo-parser/src/vector.c +111 -0
  90. data/gumbo-parser/src/vector.h +45 -0
  91. data/lib/nokogiri/class_resolver.rb +67 -0
  92. data/lib/nokogiri/css/node.rb +10 -8
  93. data/lib/nokogiri/css/parser.rb +397 -377
  94. data/lib/nokogiri/css/parser.y +250 -245
  95. data/lib/nokogiri/css/parser_extras.rb +54 -49
  96. data/lib/nokogiri/css/syntax_error.rb +3 -1
  97. data/lib/nokogiri/css/tokenizer.rb +107 -104
  98. data/lib/nokogiri/css/tokenizer.rex +3 -2
  99. data/lib/nokogiri/css/xpath_visitor.rb +218 -91
  100. data/lib/nokogiri/css.rb +50 -17
  101. data/lib/nokogiri/decorators/slop.rb +9 -7
  102. data/lib/nokogiri/extension.rb +31 -0
  103. data/lib/nokogiri/gumbo.rb +15 -0
  104. data/lib/nokogiri/html.rb +38 -27
  105. data/lib/nokogiri/{html → html4}/builder.rb +4 -2
  106. data/lib/nokogiri/{html → html4}/document.rb +103 -105
  107. data/lib/nokogiri/html4/document_fragment.rb +54 -0
  108. data/lib/nokogiri/{html → html4}/element_description.rb +3 -1
  109. data/lib/nokogiri/html4/element_description_defaults.rb +578 -0
  110. data/lib/nokogiri/{html → html4}/entity_lookup.rb +4 -2
  111. data/lib/nokogiri/{html → html4}/sax/parser.rb +17 -16
  112. data/lib/nokogiri/html4/sax/parser_context.rb +20 -0
  113. data/lib/nokogiri/{html → html4}/sax/push_parser.rb +12 -11
  114. data/lib/nokogiri/html4.rb +46 -0
  115. data/lib/nokogiri/html5/document.rb +91 -0
  116. data/lib/nokogiri/html5/document_fragment.rb +83 -0
  117. data/lib/nokogiri/html5/node.rb +100 -0
  118. data/lib/nokogiri/html5.rb +478 -0
  119. data/lib/nokogiri/jruby/dependencies.rb +21 -0
  120. data/lib/nokogiri/syntax_error.rb +2 -0
  121. data/lib/nokogiri/version/constant.rb +6 -0
  122. data/lib/nokogiri/version/info.rb +222 -0
  123. data/lib/nokogiri/version.rb +3 -108
  124. data/lib/nokogiri/xml/attr.rb +6 -3
  125. data/lib/nokogiri/xml/attribute_decl.rb +3 -1
  126. data/lib/nokogiri/xml/builder.rb +97 -53
  127. data/lib/nokogiri/xml/cdata.rb +3 -1
  128. data/lib/nokogiri/xml/character_data.rb +2 -0
  129. data/lib/nokogiri/xml/document.rb +224 -86
  130. data/lib/nokogiri/xml/document_fragment.rb +57 -44
  131. data/lib/nokogiri/xml/dtd.rb +4 -2
  132. data/lib/nokogiri/xml/element_content.rb +2 -0
  133. data/lib/nokogiri/xml/element_decl.rb +3 -1
  134. data/lib/nokogiri/xml/entity_decl.rb +4 -2
  135. data/lib/nokogiri/xml/entity_reference.rb +2 -0
  136. data/lib/nokogiri/xml/namespace.rb +3 -0
  137. data/lib/nokogiri/xml/node/save_options.rb +10 -5
  138. data/lib/nokogiri/xml/node.rb +895 -377
  139. data/lib/nokogiri/xml/node_set.rb +92 -65
  140. data/lib/nokogiri/xml/notation.rb +13 -0
  141. data/lib/nokogiri/xml/parse_options.rb +22 -8
  142. data/lib/nokogiri/xml/pp/character_data.rb +9 -6
  143. data/lib/nokogiri/xml/pp/node.rb +25 -26
  144. data/lib/nokogiri/xml/pp.rb +4 -2
  145. data/lib/nokogiri/xml/processing_instruction.rb +3 -1
  146. data/lib/nokogiri/xml/reader.rb +21 -28
  147. data/lib/nokogiri/xml/relax_ng.rb +8 -2
  148. data/lib/nokogiri/xml/sax/document.rb +45 -49
  149. data/lib/nokogiri/xml/sax/parser.rb +38 -34
  150. data/lib/nokogiri/xml/sax/parser_context.rb +8 -3
  151. data/lib/nokogiri/xml/sax/push_parser.rb +6 -5
  152. data/lib/nokogiri/xml/sax.rb +6 -4
  153. data/lib/nokogiri/xml/schema.rb +19 -9
  154. data/lib/nokogiri/xml/searchable.rb +112 -72
  155. data/lib/nokogiri/xml/syntax_error.rb +6 -4
  156. data/lib/nokogiri/xml/text.rb +2 -0
  157. data/lib/nokogiri/xml/xpath/syntax_error.rb +4 -2
  158. data/lib/nokogiri/xml/xpath.rb +15 -4
  159. data/lib/nokogiri/xml/xpath_context.rb +3 -3
  160. data/lib/nokogiri/xml.rb +38 -37
  161. data/lib/nokogiri/xslt/stylesheet.rb +3 -1
  162. data/lib/nokogiri/xslt.rb +29 -20
  163. data/lib/nokogiri.rb +49 -65
  164. data/lib/xsd/xmlparser/nokogiri.rb +26 -24
  165. data/patches/libxml2/0001-Remove-script-macro-support.patch +40 -0
  166. data/patches/libxml2/0002-Update-entities-to-remove-handling-of-ssi.patch +44 -0
  167. data/patches/libxml2/0003-libxml2.la-is-in-top_builddir.patch +25 -0
  168. data/patches/libxml2/0005-avoid-isnan-isinf.patch +81 -0
  169. data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
  170. data/patches/libxslt/0001-update-automake-files-for-arm64.patch +3037 -0
  171. data/ports/archives/libxml2-2.10.3.tar.xz +0 -0
  172. data/ports/archives/libxslt-1.1.37.tar.xz +0 -0
  173. metadata +211 -266
  174. data/.autotest +0 -22
  175. data/.cross_rubies +0 -8
  176. data/.editorconfig +0 -17
  177. data/.gemtest +0 -0
  178. data/.travis.yml +0 -63
  179. data/CHANGELOG.md +0 -1368
  180. data/CONTRIBUTING.md +0 -42
  181. data/C_CODING_STYLE.rdoc +0 -33
  182. data/Gemfile-libxml-ruby +0 -3
  183. data/Manifest.txt +0 -370
  184. data/ROADMAP.md +0 -111
  185. data/Rakefile +0 -348
  186. data/SECURITY.md +0 -19
  187. data/STANDARD_RESPONSES.md +0 -47
  188. data/Y_U_NO_GEMSPEC.md +0 -155
  189. data/appveyor.yml +0 -29
  190. data/build_all +0 -44
  191. data/ext/nokogiri/html_document.c +0 -170
  192. data/ext/nokogiri/html_document.h +0 -10
  193. data/ext/nokogiri/html_element_description.c +0 -279
  194. data/ext/nokogiri/html_element_description.h +0 -10
  195. data/ext/nokogiri/html_entity_lookup.c +0 -32
  196. data/ext/nokogiri/html_entity_lookup.h +0 -8
  197. data/ext/nokogiri/html_sax_parser_context.c +0 -116
  198. data/ext/nokogiri/html_sax_parser_context.h +0 -11
  199. data/ext/nokogiri/html_sax_push_parser.c +0 -87
  200. data/ext/nokogiri/html_sax_push_parser.h +0 -9
  201. data/ext/nokogiri/xml_attr.h +0 -9
  202. data/ext/nokogiri/xml_attribute_decl.h +0 -9
  203. data/ext/nokogiri/xml_cdata.h +0 -9
  204. data/ext/nokogiri/xml_comment.h +0 -9
  205. data/ext/nokogiri/xml_document.h +0 -23
  206. data/ext/nokogiri/xml_document_fragment.h +0 -10
  207. data/ext/nokogiri/xml_dtd.h +0 -10
  208. data/ext/nokogiri/xml_element_content.h +0 -10
  209. data/ext/nokogiri/xml_element_decl.h +0 -9
  210. data/ext/nokogiri/xml_encoding_handler.h +0 -8
  211. data/ext/nokogiri/xml_entity_decl.h +0 -10
  212. data/ext/nokogiri/xml_entity_reference.h +0 -9
  213. data/ext/nokogiri/xml_io.c +0 -61
  214. data/ext/nokogiri/xml_io.h +0 -11
  215. data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
  216. data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
  217. data/ext/nokogiri/xml_namespace.h +0 -15
  218. data/ext/nokogiri/xml_node.h +0 -13
  219. data/ext/nokogiri/xml_node_set.h +0 -12
  220. data/ext/nokogiri/xml_processing_instruction.h +0 -9
  221. data/ext/nokogiri/xml_reader.h +0 -10
  222. data/ext/nokogiri/xml_relax_ng.h +0 -9
  223. data/ext/nokogiri/xml_sax_parser.h +0 -39
  224. data/ext/nokogiri/xml_sax_parser_context.h +0 -10
  225. data/ext/nokogiri/xml_sax_push_parser.h +0 -9
  226. data/ext/nokogiri/xml_schema.h +0 -9
  227. data/ext/nokogiri/xml_syntax_error.h +0 -13
  228. data/ext/nokogiri/xml_text.h +0 -9
  229. data/ext/nokogiri/xml_xpath_context.h +0 -10
  230. data/ext/nokogiri/xslt_stylesheet.h +0 -14
  231. data/lib/nokogiri/html/document_fragment.rb +0 -49
  232. data/lib/nokogiri/html/element_description_defaults.rb +0 -671
  233. data/lib/nokogiri/html/sax/parser_context.rb +0 -16
  234. data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +0 -78
  235. data/patches/libxml2/0002-Fix-nullptr-deref-with-XPath-logic-ops.patch +0 -54
  236. data/patches/libxml2/0003-Fix-infinite-loop-in-LZMA-decompression.patch +0 -50
  237. data/patches/sort-patches-by-date +0 -25
  238. data/ports/archives/libxml2-2.9.8.tar.gz +0 -0
  239. data/ports/archives/libxslt-1.1.32.tar.gz +0 -0
  240. data/suppressions/README.txt +0 -1
  241. data/suppressions/nokogiri_ruby-2.supp +0 -10
  242. data/tasks/test.rb +0 -100
  243. data/test/css/test_nthiness.rb +0 -226
  244. data/test/css/test_parser.rb +0 -386
  245. data/test/css/test_tokenizer.rb +0 -215
  246. data/test/css/test_xpath_visitor.rb +0 -96
  247. data/test/decorators/test_slop.rb +0 -23
  248. data/test/files/2ch.html +0 -108
  249. data/test/files/GH_1042.html +0 -18
  250. data/test/files/address_book.rlx +0 -12
  251. data/test/files/address_book.xml +0 -10
  252. data/test/files/atom.xml +0 -344
  253. data/test/files/bar/bar.xsd +0 -4
  254. data/test/files/bogus.xml +0 -0
  255. data/test/files/dont_hurt_em_why.xml +0 -422
  256. data/test/files/encoding.html +0 -82
  257. data/test/files/encoding.xhtml +0 -84
  258. data/test/files/exslt.xml +0 -8
  259. data/test/files/exslt.xslt +0 -35
  260. data/test/files/foo/foo.xsd +0 -4
  261. data/test/files/metacharset.html +0 -10
  262. data/test/files/namespace_pressure_test.xml +0 -1684
  263. data/test/files/noencoding.html +0 -47
  264. data/test/files/po.xml +0 -32
  265. data/test/files/po.xsd +0 -66
  266. data/test/files/saml/saml20assertion_schema.xsd +0 -283
  267. data/test/files/saml/saml20protocol_schema.xsd +0 -302
  268. data/test/files/saml/xenc_schema.xsd +0 -146
  269. data/test/files/saml/xmldsig_schema.xsd +0 -318
  270. data/test/files/shift_jis.html +0 -10
  271. data/test/files/shift_jis.xml +0 -5
  272. data/test/files/shift_jis_no_charset.html +0 -9
  273. data/test/files/slow-xpath.xml +0 -25509
  274. data/test/files/snuggles.xml +0 -3
  275. data/test/files/staff.dtd +0 -10
  276. data/test/files/staff.xml +0 -59
  277. data/test/files/staff.xslt +0 -32
  278. data/test/files/test_document_url/bar.xml +0 -2
  279. data/test/files/test_document_url/document.dtd +0 -4
  280. data/test/files/test_document_url/document.xml +0 -6
  281. data/test/files/tlm.html +0 -851
  282. data/test/files/to_be_xincluded.xml +0 -2
  283. data/test/files/valid_bar.xml +0 -2
  284. data/test/files/xinclude.xml +0 -4
  285. data/test/helper.rb +0 -271
  286. data/test/html/sax/test_parser.rb +0 -168
  287. data/test/html/sax/test_parser_context.rb +0 -46
  288. data/test/html/sax/test_parser_text.rb +0 -163
  289. data/test/html/sax/test_push_parser.rb +0 -87
  290. data/test/html/test_attributes.rb +0 -85
  291. data/test/html/test_builder.rb +0 -164
  292. data/test/html/test_document.rb +0 -712
  293. data/test/html/test_document_encoding.rb +0 -143
  294. data/test/html/test_document_fragment.rb +0 -310
  295. data/test/html/test_element_description.rb +0 -105
  296. data/test/html/test_named_characters.rb +0 -14
  297. data/test/html/test_node.rb +0 -212
  298. data/test/html/test_node_encoding.rb +0 -91
  299. data/test/namespaces/test_additional_namespaces_in_builder_doc.rb +0 -14
  300. data/test/namespaces/test_namespaces_aliased_default.rb +0 -24
  301. data/test/namespaces/test_namespaces_in_builder_doc.rb +0 -75
  302. data/test/namespaces/test_namespaces_in_cloned_doc.rb +0 -31
  303. data/test/namespaces/test_namespaces_in_created_doc.rb +0 -75
  304. data/test/namespaces/test_namespaces_in_parsed_doc.rb +0 -80
  305. data/test/namespaces/test_namespaces_preservation.rb +0 -31
  306. data/test/test_convert_xpath.rb +0 -135
  307. data/test/test_css_cache.rb +0 -47
  308. data/test/test_encoding_handler.rb +0 -48
  309. data/test/test_memory_leak.rb +0 -156
  310. data/test/test_nokogiri.rb +0 -138
  311. data/test/test_soap4r_sax.rb +0 -52
  312. data/test/test_xslt_transforms.rb +0 -314
  313. data/test/xml/node/test_save_options.rb +0 -28
  314. data/test/xml/node/test_subclass.rb +0 -44
  315. data/test/xml/sax/test_parser.rb +0 -402
  316. data/test/xml/sax/test_parser_context.rb +0 -115
  317. data/test/xml/sax/test_parser_text.rb +0 -202
  318. data/test/xml/sax/test_push_parser.rb +0 -265
  319. data/test/xml/test_attr.rb +0 -74
  320. data/test/xml/test_attribute_decl.rb +0 -86
  321. data/test/xml/test_builder.rb +0 -341
  322. data/test/xml/test_c14n.rb +0 -180
  323. data/test/xml/test_cdata.rb +0 -54
  324. data/test/xml/test_comment.rb +0 -40
  325. data/test/xml/test_document.rb +0 -982
  326. data/test/xml/test_document_encoding.rb +0 -31
  327. data/test/xml/test_document_fragment.rb +0 -298
  328. data/test/xml/test_dtd.rb +0 -187
  329. data/test/xml/test_dtd_encoding.rb +0 -31
  330. data/test/xml/test_element_content.rb +0 -56
  331. data/test/xml/test_element_decl.rb +0 -73
  332. data/test/xml/test_entity_decl.rb +0 -122
  333. data/test/xml/test_entity_reference.rb +0 -262
  334. data/test/xml/test_namespace.rb +0 -96
  335. data/test/xml/test_node.rb +0 -1325
  336. data/test/xml/test_node_attributes.rb +0 -115
  337. data/test/xml/test_node_encoding.rb +0 -75
  338. data/test/xml/test_node_inheritance.rb +0 -32
  339. data/test/xml/test_node_reparenting.rb +0 -592
  340. data/test/xml/test_node_set.rb +0 -809
  341. data/test/xml/test_parse_options.rb +0 -64
  342. data/test/xml/test_processing_instruction.rb +0 -30
  343. data/test/xml/test_reader.rb +0 -620
  344. data/test/xml/test_reader_encoding.rb +0 -134
  345. data/test/xml/test_relax_ng.rb +0 -60
  346. data/test/xml/test_schema.rb +0 -142
  347. data/test/xml/test_syntax_error.rb +0 -36
  348. data/test/xml/test_text.rb +0 -60
  349. data/test/xml/test_unparented_node.rb +0 -483
  350. data/test/xml/test_xinclude.rb +0 -83
  351. data/test/xml/test_xpath.rb +0 -470
  352. data/test/xslt/test_custom_functions.rb +0 -133
  353. data/test/xslt/test_exception_handling.rb +0 -37
@@ -1,20 +1,21 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Nokogiri
2
4
  module XML
3
5
  ###
4
- # SAX Parsers are event driven parsers. Nokogiri provides two different
5
- # event based parsers when dealing with XML. If you want to do SAX style
6
- # parsing using HTML, check out Nokogiri::HTML::SAX.
6
+ # SAX Parsers are event driven parsers. Nokogiri provides two different event based parsers when
7
+ # dealing with XML. If you want to do SAX style parsing using HTML, check out
8
+ # Nokogiri::HTML4::SAX.
7
9
  #
8
- # The basic way a SAX style parser works is by creating a parser,
9
- # telling the parser about the events we're interested in, then giving
10
- # the parser some XML to process. The parser will notify you when
11
- # it encounters events you said you would like to know about.
10
+ # The basic way a SAX style parser works is by creating a parser, telling the parser about the
11
+ # events we're interested in, then giving the parser some XML to process. The parser will notify
12
+ # you when it encounters events you said you would like to know about.
12
13
  #
13
- # To register for events, you simply subclass Nokogiri::XML::SAX::Document,
14
- # and implement the methods for which you would like notification.
14
+ # To register for events, you simply subclass Nokogiri::XML::SAX::Document, and implement the
15
+ # methods for which you would like notification.
15
16
  #
16
- # For example, if I want to be notified when a document ends, and when an
17
- # element starts, I would write a class like this:
17
+ # For example, if I want to be notified when a document ends, and when an element starts, I
18
+ # would write a class like this:
18
19
  #
19
20
  # class MyDocument < Nokogiri::XML::SAX::Document
20
21
  # def end_document
@@ -26,8 +27,7 @@ module Nokogiri
26
27
  # end
27
28
  # end
28
29
  #
29
- # Then I would instantiate a SAX parser with this document, and feed the
30
- # parser some XML
30
+ # Then I would instantiate a SAX parser with this document, and feed the parser some XML
31
31
  #
32
32
  # # Create a new parser
33
33
  # parser = Nokogiri::XML::SAX::Parser.new(MyDocument.new)
@@ -35,25 +35,21 @@ module Nokogiri
35
35
  # # Feed the parser some XML
36
36
  # parser.parse(File.open(ARGV[0]))
37
37
  #
38
- # Now my document handler will be called when each node starts, and when
39
- # then document ends. To see what kinds of events are available, take
40
- # a look at Nokogiri::XML::SAX::Document.
38
+ # Now my document handler will be called when each node starts, and when then document ends. To
39
+ # see what kinds of events are available, take a look at Nokogiri::XML::SAX::Document.
41
40
  #
42
- # Two SAX parsers for XML are available, a parser that reads from a string
43
- # or IO object as it feels necessary, and a parser that lets you spoon
44
- # feed it XML. If you want to let Nokogiri deal with reading your XML,
45
- # use the Nokogiri::XML::SAX::Parser. If you want to have fine grain
41
+ # Two SAX parsers for XML are available, a parser that reads from a string or IO object as it
42
+ # feels necessary, and a parser that lets you spoon feed it XML. If you want to let Nokogiri
43
+ # deal with reading your XML, use the Nokogiri::XML::SAX::Parser. If you want to have fine grain
46
44
  # control over the XML input, use the Nokogiri::XML::SAX::PushParser.
47
45
  module SAX
48
46
  ###
49
- # This class is used for registering types of events you are interested
50
- # in handling. All of the methods on this class are available as
51
- # possible events while parsing an XML document. To register for any
52
- # particular event, just subclass this class and implement the methods
53
- # you are interested in knowing about.
47
+ # This class is used for registering types of events you are interested in handling. All of
48
+ # the methods on this class are available as possible events while parsing an XML document. To
49
+ # register for any particular event, just subclass this class and implement the methods you
50
+ # are interested in knowing about.
54
51
  #
55
- # To only be notified about start and end element events, write a class
56
- # like this:
52
+ # To only be notified about start and end element events, write a class like this:
57
53
  #
58
54
  # class MyDocument < Nokogiri::XML::SAX::Document
59
55
  # def start_element name, attrs = []
@@ -65,12 +61,12 @@ module Nokogiri
65
61
  # end
66
62
  # end
67
63
  #
68
- # You can use this event handler for any SAX style parser included with
69
- # Nokogiri. See Nokogiri::XML::SAX, and Nokogiri::HTML::SAX.
64
+ # You can use this event handler for any SAX style parser included with Nokogiri. See
65
+ # Nokogiri::XML::SAX, and Nokogiri::HTML4::SAX.
70
66
  class Document
71
67
  ###
72
68
  # Called when an XML declaration is parsed
73
- def xmldecl version, encoding, standalone
69
+ def xmldecl(version, encoding, standalone)
74
70
  end
75
71
 
76
72
  ###
@@ -88,13 +84,13 @@ module Nokogiri
88
84
  # * +name+ is the name of the tag
89
85
  # * +attrs+ are an assoc list of namespaces and attributes, e.g.:
90
86
  # [ ["xmlns:foo", "http://sample.net"], ["size", "large"] ]
91
- def start_element name, attrs = []
87
+ def start_element(name, attrs = [])
92
88
  end
93
89
 
94
90
  ###
95
91
  # Called at the end of an element
96
92
  # +name+ is the tag name
97
- def end_element name
93
+ def end_element(name)
98
94
  end
99
95
 
100
96
  ###
@@ -104,16 +100,16 @@ module Nokogiri
104
100
  # +prefix+ is the namespace prefix for the element
105
101
  # +uri+ is the associated namespace URI
106
102
  # +ns+ is a hash of namespace prefix:urls associated with the element
107
- def start_element_namespace name, attrs = [], prefix = nil, uri = nil, ns = []
103
+ def start_element_namespace(name, attrs = [], prefix = nil, uri = nil, ns = [])
108
104
  ###
109
105
  # Deal with SAX v1 interface
110
- name = [prefix, name].compact.join(':')
111
- attributes = ns.map { |ns_prefix,ns_uri|
112
- [['xmlns', ns_prefix].compact.join(':'), ns_uri]
113
- } + attrs.map { |attr|
114
- [[attr.prefix, attr.localname].compact.join(':'), attr.value]
115
- }
116
- start_element name, attributes
106
+ name = [prefix, name].compact.join(":")
107
+ attributes = ns.map do |ns_prefix, ns_uri|
108
+ [["xmlns", ns_prefix].compact.join(":"), ns_uri]
109
+ end + attrs.map do |attr|
110
+ [[attr.prefix, attr.localname].compact.join(":"), attr.value]
111
+ end
112
+ start_element(name, attributes)
117
113
  end
118
114
 
119
115
  ###
@@ -121,49 +117,49 @@ module Nokogiri
121
117
  # +name+ is the element's name
122
118
  # +prefix+ is the namespace prefix associated with the element
123
119
  # +uri+ is the associated namespace URI
124
- def end_element_namespace name, prefix = nil, uri = nil
120
+ def end_element_namespace(name, prefix = nil, uri = nil)
125
121
  ###
126
122
  # Deal with SAX v1 interface
127
- end_element [prefix, name].compact.join(':')
123
+ end_element([prefix, name].compact.join(":"))
128
124
  end
129
125
 
130
126
  ###
131
- # Characters read between a tag. This method might be called multiple
127
+ # Characters read between a tag. This method might be called multiple
132
128
  # times given one contiguous string of characters.
133
129
  #
134
130
  # +string+ contains the character data
135
- def characters string
131
+ def characters(string)
136
132
  end
137
133
 
138
134
  ###
139
135
  # Called when comments are encountered
140
136
  # +string+ contains the comment data
141
- def comment string
137
+ def comment(string)
142
138
  end
143
139
 
144
140
  ###
145
141
  # Called on document warnings
146
142
  # +string+ contains the warning
147
- def warning string
143
+ def warning(string)
148
144
  end
149
145
 
150
146
  ###
151
147
  # Called on document errors
152
148
  # +string+ contains the error
153
- def error string
149
+ def error(string)
154
150
  end
155
151
 
156
152
  ###
157
153
  # Called when cdata blocks are found
158
154
  # +string+ contains the cdata content
159
- def cdata_block string
155
+ def cdata_block(string)
160
156
  end
161
157
 
162
158
  ###
163
159
  # Called when processing instructions are found
164
160
  # +name+ is the target of the instruction
165
161
  # +content+ is the value of the instruction
166
- def processing_instruction name, content
162
+ def processing_instruction(name, content)
167
163
  end
168
164
  end
169
165
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Nokogiri
2
4
  module XML
3
5
  module SAX
@@ -35,29 +37,29 @@ module Nokogiri
35
37
 
36
38
  # Encodinds this parser supports
37
39
  ENCODINGS = {
38
- 'NONE' => 0, # No char encoding detected
39
- 'UTF-8' => 1, # UTF-8
40
- 'UTF16LE' => 2, # UTF-16 little endian
41
- 'UTF16BE' => 3, # UTF-16 big endian
42
- 'UCS4LE' => 4, # UCS-4 little endian
43
- 'UCS4BE' => 5, # UCS-4 big endian
44
- 'EBCDIC' => 6, # EBCDIC uh!
45
- 'UCS4-2143' => 7, # UCS-4 unusual ordering
46
- 'UCS4-3412' => 8, # UCS-4 unusual ordering
47
- 'UCS2' => 9, # UCS-2
48
- 'ISO-8859-1' => 10, # ISO-8859-1 ISO Latin 1
49
- 'ISO-8859-2' => 11, # ISO-8859-2 ISO Latin 2
50
- 'ISO-8859-3' => 12, # ISO-8859-3
51
- 'ISO-8859-4' => 13, # ISO-8859-4
52
- 'ISO-8859-5' => 14, # ISO-8859-5
53
- 'ISO-8859-6' => 15, # ISO-8859-6
54
- 'ISO-8859-7' => 16, # ISO-8859-7
55
- 'ISO-8859-8' => 17, # ISO-8859-8
56
- 'ISO-8859-9' => 18, # ISO-8859-9
57
- 'ISO-2022-JP' => 19, # ISO-2022-JP
58
- 'SHIFT-JIS' => 20, # Shift_JIS
59
- 'EUC-JP' => 21, # EUC-JP
60
- 'ASCII' => 22, # pure ASCII
40
+ "NONE" => 0, # No char encoding detected
41
+ "UTF-8" => 1, # UTF-8
42
+ "UTF16LE" => 2, # UTF-16 little endian
43
+ "UTF16BE" => 3, # UTF-16 big endian
44
+ "UCS4LE" => 4, # UCS-4 little endian
45
+ "UCS4BE" => 5, # UCS-4 big endian
46
+ "EBCDIC" => 6, # EBCDIC uh!
47
+ "UCS4-2143" => 7, # UCS-4 unusual ordering
48
+ "UCS4-3412" => 8, # UCS-4 unusual ordering
49
+ "UCS2" => 9, # UCS-2
50
+ "ISO-8859-1" => 10, # ISO-8859-1 ISO Latin 1
51
+ "ISO-8859-2" => 11, # ISO-8859-2 ISO Latin 2
52
+ "ISO-8859-3" => 12, # ISO-8859-3
53
+ "ISO-8859-4" => 13, # ISO-8859-4
54
+ "ISO-8859-5" => 14, # ISO-8859-5
55
+ "ISO-8859-6" => 15, # ISO-8859-6
56
+ "ISO-8859-7" => 16, # ISO-8859-7
57
+ "ISO-8859-8" => 17, # ISO-8859-8
58
+ "ISO-8859-9" => 18, # ISO-8859-9
59
+ "ISO-2022-JP" => 19, # ISO-2022-JP
60
+ "SHIFT-JIS" => 20, # Shift_JIS
61
+ "EUC-JP" => 21, # EUC-JP
62
+ "ASCII" => 22, # pure ASCII
61
63
  }
62
64
 
63
65
  # The Nokogiri::XML::SAX::Document where events will be sent.
@@ -67,7 +69,7 @@ module Nokogiri
67
69
  attr_accessor :encoding
68
70
 
69
71
  # Create a new Parser with +doc+ and +encoding+
70
- def initialize doc = Nokogiri::XML::SAX::Document.new, encoding = 'UTF-8'
72
+ def initialize(doc = Nokogiri::XML::SAX::Document.new, encoding = "UTF-8")
71
73
  @encoding = check_encoding(encoding)
72
74
  @document = doc
73
75
  @warned = false
@@ -76,7 +78,7 @@ module Nokogiri
76
78
  ###
77
79
  # Parse given +thing+ which may be a string containing xml, or an
78
80
  # IO object.
79
- def parse thing, &block
81
+ def parse(thing, &block)
80
82
  if thing.respond_to?(:read) && thing.respond_to?(:close)
81
83
  parse_io(thing, &block)
82
84
  else
@@ -86,34 +88,36 @@ module Nokogiri
86
88
 
87
89
  ###
88
90
  # Parse given +io+
89
- def parse_io io, encoding = 'ASCII'
91
+ def parse_io(io, encoding = "ASCII")
90
92
  @encoding = check_encoding(encoding)
91
93
  ctx = ParserContext.io(io, ENCODINGS[@encoding])
92
94
  yield ctx if block_given?
93
- ctx.parse_with self
95
+ ctx.parse_with(self)
94
96
  end
95
97
 
96
98
  ###
97
99
  # Parse a file with +filename+
98
- def parse_file filename
100
+ def parse_file(filename)
99
101
  raise ArgumentError unless filename
100
102
  raise Errno::ENOENT unless File.exist?(filename)
101
103
  raise Errno::EISDIR if File.directory?(filename)
102
- ctx = ParserContext.file filename
104
+
105
+ ctx = ParserContext.file(filename)
103
106
  yield ctx if block_given?
104
- ctx.parse_with self
107
+ ctx.parse_with(self)
105
108
  end
106
109
 
107
- def parse_memory data
108
- ctx = ParserContext.memory data
110
+ def parse_memory(data)
111
+ ctx = ParserContext.memory(data)
109
112
  yield ctx if block_given?
110
- ctx.parse_with self
113
+ ctx.parse_with(self)
111
114
  end
112
115
 
113
116
  private
117
+
114
118
  def check_encoding(encoding)
115
119
  encoding.upcase.tap do |enc|
116
- raise ArgumentError.new("'#{enc}' is not a valid encoding") unless ENCODINGS[enc]
120
+ raise ArgumentError, "'#{enc}' is not a valid encoding" unless ENCODINGS[enc]
117
121
  end
118
122
  end
119
123
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Nokogiri
2
4
  module XML
3
5
  module SAX
@@ -6,9 +8,12 @@ module Nokogiri
6
8
  # by the user. Instead, you should be looking at
7
9
  # Nokogiri::XML::SAX::Parser
8
10
  class ParserContext
9
- def self.new thing, encoding = 'UTF-8'
10
- [:read, :close].all? { |x| thing.respond_to?(x) } ?
11
- io(thing, Parser::ENCODINGS[encoding]) : memory(thing)
11
+ def self.new(thing, encoding = "UTF-8")
12
+ if [:read, :close].all? { |x| thing.respond_to?(x) }
13
+ io(thing, Parser::ENCODINGS[encoding])
14
+ else
15
+ memory(thing)
16
+ end
12
17
  end
13
18
  end
14
19
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Nokogiri
2
4
  module XML
3
5
  module SAX
@@ -23,7 +25,6 @@ module Nokogiri
23
25
  # parser << "/div>"
24
26
  # parser.finish
25
27
  class PushParser
26
-
27
28
  # The Nokogiri::XML::SAX::Document on which the PushParser will be
28
29
  # operating
29
30
  attr_accessor :document
@@ -31,7 +32,7 @@ module Nokogiri
31
32
  ###
32
33
  # Create a new PushParser with +doc+ as the SAX Document, providing
33
34
  # an optional +file_name+ and +encoding+
34
- def initialize(doc = XML::SAX::Document.new, file_name = nil, encoding = 'UTF-8')
35
+ def initialize(doc = XML::SAX::Document.new, file_name = nil, encoding = "UTF-8")
35
36
  @document = doc
36
37
  @encoding = encoding
37
38
  @sax_parser = XML::SAX::Parser.new(doc)
@@ -43,16 +44,16 @@ module Nokogiri
43
44
  ###
44
45
  # Write a +chunk+ of XML to the PushParser. Any callback methods
45
46
  # that can be called will be called immediately.
46
- def write chunk, last_chunk = false
47
+ def write(chunk, last_chunk = false)
47
48
  native_write(chunk, last_chunk)
48
49
  end
49
- alias :<< :write
50
+ alias_method :<<, :write
50
51
 
51
52
  ###
52
53
  # Finish the parsing. This method is only necessary for
53
54
  # Nokogiri::XML::SAX::Document#end_document to be called.
54
55
  def finish
55
- write '', true
56
+ write("", true)
56
57
  end
57
58
  end
58
59
  end
@@ -1,4 +1,6 @@
1
- require 'nokogiri/xml/sax/document'
2
- require 'nokogiri/xml/sax/parser_context'
3
- require 'nokogiri/xml/sax/parser'
4
- require 'nokogiri/xml/sax/push_parser'
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "sax/document"
4
+ require_relative "sax/parser_context"
5
+ require_relative "sax/parser"
6
+ require_relative "sax/push_parser"
@@ -1,11 +1,13 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Nokogiri
2
4
  module XML
3
5
  class << self
4
6
  ###
5
7
  # Create a new Nokogiri::XML::Schema object using a +string_or_io+
6
8
  # object.
7
- def Schema string_or_io
8
- Schema.new(string_or_io)
9
+ def Schema(string_or_io, options = ParseOptions::DEFAULT_SCHEMA)
10
+ Schema.new(string_or_io, options)
9
11
  end
10
12
  end
11
13
 
@@ -26,15 +28,23 @@ module Nokogiri
26
28
  # end
27
29
  #
28
30
  # The list of errors are Nokogiri::XML::SyntaxError objects.
31
+ #
32
+ # NOTE: As of v1.11.0, Schema treats inputs as UNTRUSTED by default, and so external entities
33
+ # are not resolved from the network (`http://` or `ftp://`). Previously, parsing treated
34
+ # documents as "trusted" by default which was counter to Nokogiri's "untrusted by default"
35
+ # security policy. If a document is trusted, then the caller may turn off the NONET option via
36
+ # the ParseOptions to re-enable external entity resolution over a network connection.
29
37
  class Schema
30
38
  # Errors while parsing the schema file
31
39
  attr_accessor :errors
40
+ # The Nokogiri::XML::ParseOptions used to parse the schema
41
+ attr_accessor :parse_options
32
42
 
33
43
  ###
34
44
  # Create a new Nokogiri::XML::Schema object using a +string_or_io+
35
45
  # object.
36
- def self.new string_or_io
37
- from_document Nokogiri::XML(string_or_io)
46
+ def self.new(string_or_io, options = ParseOptions::DEFAULT_SCHEMA)
47
+ from_document(Nokogiri::XML(string_or_io), options)
38
48
  end
39
49
 
40
50
  ###
@@ -42,9 +52,9 @@ module Nokogiri
42
52
  # Nokogiri::XML::Document object, or a filename. An Array of
43
53
  # Nokogiri::XML::SyntaxError objects found while validating the
44
54
  # +thing+ is returned.
45
- def validate thing
46
- if thing.is_a?(Nokogiri::XML::Document)
47
- validate_document(thing)
55
+ def validate(thing)
56
+ if thing.is_a?(Nokogiri::XML::Document)
57
+ validate_document(thing)
48
58
  elsif File.file?(thing)
49
59
  validate_file(thing)
50
60
  else
@@ -55,8 +65,8 @@ module Nokogiri
55
65
  ###
56
66
  # Returns true if +thing+ is a valid Nokogiri::XML::Document or
57
67
  # file.
58
- def valid? thing
59
- validate(thing).length == 0
68
+ def valid?(thing)
69
+ validate(thing).empty?
60
70
  end
61
71
  end
62
72
  end