nokogiri 1.8.5 → 1.13.9

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (353) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +3 -21
  3. data/LICENSE-DEPENDENCIES.md +1159 -868
  4. data/LICENSE.md +5 -28
  5. data/README.md +196 -90
  6. data/bin/nokogiri +63 -50
  7. data/dependencies.yml +13 -59
  8. data/ext/nokogiri/depend +38 -358
  9. data/ext/nokogiri/extconf.rb +765 -420
  10. data/ext/nokogiri/gumbo.c +584 -0
  11. data/ext/nokogiri/html4_document.c +166 -0
  12. data/ext/nokogiri/html4_element_description.c +294 -0
  13. data/ext/nokogiri/html4_entity_lookup.c +37 -0
  14. data/ext/nokogiri/html4_sax_parser_context.c +119 -0
  15. data/ext/nokogiri/html4_sax_push_parser.c +95 -0
  16. data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
  17. data/ext/nokogiri/nokogiri.c +228 -91
  18. data/ext/nokogiri/nokogiri.h +199 -88
  19. data/ext/nokogiri/test_global_handlers.c +40 -0
  20. data/ext/nokogiri/xml_attr.c +42 -37
  21. data/ext/nokogiri/xml_attribute_decl.c +21 -21
  22. data/ext/nokogiri/xml_cdata.c +14 -19
  23. data/ext/nokogiri/xml_comment.c +19 -26
  24. data/ext/nokogiri/xml_document.c +296 -217
  25. data/ext/nokogiri/xml_document_fragment.c +12 -16
  26. data/ext/nokogiri/xml_dtd.c +64 -58
  27. data/ext/nokogiri/xml_element_content.c +31 -26
  28. data/ext/nokogiri/xml_element_decl.c +25 -25
  29. data/ext/nokogiri/xml_encoding_handler.c +43 -18
  30. data/ext/nokogiri/xml_entity_decl.c +37 -35
  31. data/ext/nokogiri/xml_entity_reference.c +16 -18
  32. data/ext/nokogiri/xml_namespace.c +99 -54
  33. data/ext/nokogiri/xml_node.c +1107 -658
  34. data/ext/nokogiri/xml_node_set.c +178 -166
  35. data/ext/nokogiri/xml_processing_instruction.c +17 -19
  36. data/ext/nokogiri/xml_reader.c +277 -175
  37. data/ext/nokogiri/xml_relax_ng.c +52 -28
  38. data/ext/nokogiri/xml_sax_parser.c +112 -112
  39. data/ext/nokogiri/xml_sax_parser_context.c +112 -86
  40. data/ext/nokogiri/xml_sax_push_parser.c +36 -27
  41. data/ext/nokogiri/xml_schema.c +114 -35
  42. data/ext/nokogiri/xml_syntax_error.c +42 -21
  43. data/ext/nokogiri/xml_text.c +14 -18
  44. data/ext/nokogiri/xml_xpath_context.c +226 -115
  45. data/ext/nokogiri/xslt_stylesheet.c +265 -173
  46. data/gumbo-parser/CHANGES.md +63 -0
  47. data/gumbo-parser/Makefile +101 -0
  48. data/gumbo-parser/THANKS +27 -0
  49. data/gumbo-parser/src/Makefile +34 -0
  50. data/gumbo-parser/src/README.md +41 -0
  51. data/gumbo-parser/src/ascii.c +75 -0
  52. data/gumbo-parser/src/ascii.h +115 -0
  53. data/gumbo-parser/src/attribute.c +42 -0
  54. data/gumbo-parser/src/attribute.h +17 -0
  55. data/gumbo-parser/src/char_ref.c +22225 -0
  56. data/gumbo-parser/src/char_ref.h +29 -0
  57. data/gumbo-parser/src/char_ref.rl +2154 -0
  58. data/gumbo-parser/src/error.c +626 -0
  59. data/gumbo-parser/src/error.h +148 -0
  60. data/gumbo-parser/src/foreign_attrs.c +104 -0
  61. data/gumbo-parser/src/foreign_attrs.gperf +27 -0
  62. data/gumbo-parser/src/gumbo.h +943 -0
  63. data/gumbo-parser/src/insertion_mode.h +33 -0
  64. data/gumbo-parser/src/macros.h +91 -0
  65. data/gumbo-parser/src/parser.c +4875 -0
  66. data/gumbo-parser/src/parser.h +41 -0
  67. data/gumbo-parser/src/replacement.h +33 -0
  68. data/gumbo-parser/src/string_buffer.c +103 -0
  69. data/gumbo-parser/src/string_buffer.h +68 -0
  70. data/gumbo-parser/src/string_piece.c +48 -0
  71. data/gumbo-parser/src/svg_attrs.c +174 -0
  72. data/gumbo-parser/src/svg_attrs.gperf +77 -0
  73. data/gumbo-parser/src/svg_tags.c +137 -0
  74. data/gumbo-parser/src/svg_tags.gperf +55 -0
  75. data/gumbo-parser/src/tag.c +222 -0
  76. data/gumbo-parser/src/tag_lookup.c +382 -0
  77. data/gumbo-parser/src/tag_lookup.gperf +169 -0
  78. data/gumbo-parser/src/tag_lookup.h +13 -0
  79. data/gumbo-parser/src/token_buffer.c +79 -0
  80. data/gumbo-parser/src/token_buffer.h +71 -0
  81. data/gumbo-parser/src/token_type.h +17 -0
  82. data/gumbo-parser/src/tokenizer.c +3463 -0
  83. data/gumbo-parser/src/tokenizer.h +112 -0
  84. data/gumbo-parser/src/tokenizer_states.h +339 -0
  85. data/gumbo-parser/src/utf8.c +245 -0
  86. data/gumbo-parser/src/utf8.h +164 -0
  87. data/gumbo-parser/src/util.c +68 -0
  88. data/gumbo-parser/src/util.h +30 -0
  89. data/gumbo-parser/src/vector.c +111 -0
  90. data/gumbo-parser/src/vector.h +45 -0
  91. data/lib/nokogiri/class_resolver.rb +67 -0
  92. data/lib/nokogiri/css/node.rb +10 -8
  93. data/lib/nokogiri/css/parser.rb +397 -377
  94. data/lib/nokogiri/css/parser.y +250 -245
  95. data/lib/nokogiri/css/parser_extras.rb +54 -49
  96. data/lib/nokogiri/css/syntax_error.rb +3 -1
  97. data/lib/nokogiri/css/tokenizer.rb +107 -104
  98. data/lib/nokogiri/css/tokenizer.rex +3 -2
  99. data/lib/nokogiri/css/xpath_visitor.rb +218 -91
  100. data/lib/nokogiri/css.rb +50 -17
  101. data/lib/nokogiri/decorators/slop.rb +9 -7
  102. data/lib/nokogiri/extension.rb +31 -0
  103. data/lib/nokogiri/gumbo.rb +15 -0
  104. data/lib/nokogiri/html.rb +38 -27
  105. data/lib/nokogiri/{html → html4}/builder.rb +4 -2
  106. data/lib/nokogiri/{html → html4}/document.rb +103 -105
  107. data/lib/nokogiri/html4/document_fragment.rb +54 -0
  108. data/lib/nokogiri/{html → html4}/element_description.rb +3 -1
  109. data/lib/nokogiri/html4/element_description_defaults.rb +578 -0
  110. data/lib/nokogiri/{html → html4}/entity_lookup.rb +4 -2
  111. data/lib/nokogiri/{html → html4}/sax/parser.rb +17 -16
  112. data/lib/nokogiri/html4/sax/parser_context.rb +20 -0
  113. data/lib/nokogiri/{html → html4}/sax/push_parser.rb +12 -11
  114. data/lib/nokogiri/html4.rb +46 -0
  115. data/lib/nokogiri/html5/document.rb +91 -0
  116. data/lib/nokogiri/html5/document_fragment.rb +83 -0
  117. data/lib/nokogiri/html5/node.rb +100 -0
  118. data/lib/nokogiri/html5.rb +478 -0
  119. data/lib/nokogiri/jruby/dependencies.rb +21 -0
  120. data/lib/nokogiri/syntax_error.rb +2 -0
  121. data/lib/nokogiri/version/constant.rb +6 -0
  122. data/lib/nokogiri/version/info.rb +222 -0
  123. data/lib/nokogiri/version.rb +3 -108
  124. data/lib/nokogiri/xml/attr.rb +6 -3
  125. data/lib/nokogiri/xml/attribute_decl.rb +3 -1
  126. data/lib/nokogiri/xml/builder.rb +97 -53
  127. data/lib/nokogiri/xml/cdata.rb +3 -1
  128. data/lib/nokogiri/xml/character_data.rb +2 -0
  129. data/lib/nokogiri/xml/document.rb +224 -86
  130. data/lib/nokogiri/xml/document_fragment.rb +57 -44
  131. data/lib/nokogiri/xml/dtd.rb +4 -2
  132. data/lib/nokogiri/xml/element_content.rb +2 -0
  133. data/lib/nokogiri/xml/element_decl.rb +3 -1
  134. data/lib/nokogiri/xml/entity_decl.rb +4 -2
  135. data/lib/nokogiri/xml/entity_reference.rb +2 -0
  136. data/lib/nokogiri/xml/namespace.rb +3 -0
  137. data/lib/nokogiri/xml/node/save_options.rb +10 -5
  138. data/lib/nokogiri/xml/node.rb +895 -377
  139. data/lib/nokogiri/xml/node_set.rb +92 -65
  140. data/lib/nokogiri/xml/notation.rb +13 -0
  141. data/lib/nokogiri/xml/parse_options.rb +22 -8
  142. data/lib/nokogiri/xml/pp/character_data.rb +9 -6
  143. data/lib/nokogiri/xml/pp/node.rb +25 -26
  144. data/lib/nokogiri/xml/pp.rb +4 -2
  145. data/lib/nokogiri/xml/processing_instruction.rb +3 -1
  146. data/lib/nokogiri/xml/reader.rb +21 -28
  147. data/lib/nokogiri/xml/relax_ng.rb +8 -2
  148. data/lib/nokogiri/xml/sax/document.rb +45 -49
  149. data/lib/nokogiri/xml/sax/parser.rb +38 -34
  150. data/lib/nokogiri/xml/sax/parser_context.rb +8 -3
  151. data/lib/nokogiri/xml/sax/push_parser.rb +6 -5
  152. data/lib/nokogiri/xml/sax.rb +6 -4
  153. data/lib/nokogiri/xml/schema.rb +19 -9
  154. data/lib/nokogiri/xml/searchable.rb +112 -72
  155. data/lib/nokogiri/xml/syntax_error.rb +6 -4
  156. data/lib/nokogiri/xml/text.rb +2 -0
  157. data/lib/nokogiri/xml/xpath/syntax_error.rb +4 -2
  158. data/lib/nokogiri/xml/xpath.rb +15 -4
  159. data/lib/nokogiri/xml/xpath_context.rb +3 -3
  160. data/lib/nokogiri/xml.rb +38 -37
  161. data/lib/nokogiri/xslt/stylesheet.rb +3 -1
  162. data/lib/nokogiri/xslt.rb +29 -20
  163. data/lib/nokogiri.rb +49 -65
  164. data/lib/xsd/xmlparser/nokogiri.rb +26 -24
  165. data/patches/libxml2/0001-Remove-script-macro-support.patch +40 -0
  166. data/patches/libxml2/0002-Update-entities-to-remove-handling-of-ssi.patch +44 -0
  167. data/patches/libxml2/0003-libxml2.la-is-in-top_builddir.patch +25 -0
  168. data/patches/libxml2/0005-avoid-isnan-isinf.patch +81 -0
  169. data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
  170. data/patches/libxslt/0001-update-automake-files-for-arm64.patch +3037 -0
  171. data/ports/archives/libxml2-2.10.3.tar.xz +0 -0
  172. data/ports/archives/libxslt-1.1.37.tar.xz +0 -0
  173. metadata +211 -266
  174. data/.autotest +0 -22
  175. data/.cross_rubies +0 -8
  176. data/.editorconfig +0 -17
  177. data/.gemtest +0 -0
  178. data/.travis.yml +0 -63
  179. data/CHANGELOG.md +0 -1368
  180. data/CONTRIBUTING.md +0 -42
  181. data/C_CODING_STYLE.rdoc +0 -33
  182. data/Gemfile-libxml-ruby +0 -3
  183. data/Manifest.txt +0 -370
  184. data/ROADMAP.md +0 -111
  185. data/Rakefile +0 -348
  186. data/SECURITY.md +0 -19
  187. data/STANDARD_RESPONSES.md +0 -47
  188. data/Y_U_NO_GEMSPEC.md +0 -155
  189. data/appveyor.yml +0 -29
  190. data/build_all +0 -44
  191. data/ext/nokogiri/html_document.c +0 -170
  192. data/ext/nokogiri/html_document.h +0 -10
  193. data/ext/nokogiri/html_element_description.c +0 -279
  194. data/ext/nokogiri/html_element_description.h +0 -10
  195. data/ext/nokogiri/html_entity_lookup.c +0 -32
  196. data/ext/nokogiri/html_entity_lookup.h +0 -8
  197. data/ext/nokogiri/html_sax_parser_context.c +0 -116
  198. data/ext/nokogiri/html_sax_parser_context.h +0 -11
  199. data/ext/nokogiri/html_sax_push_parser.c +0 -87
  200. data/ext/nokogiri/html_sax_push_parser.h +0 -9
  201. data/ext/nokogiri/xml_attr.h +0 -9
  202. data/ext/nokogiri/xml_attribute_decl.h +0 -9
  203. data/ext/nokogiri/xml_cdata.h +0 -9
  204. data/ext/nokogiri/xml_comment.h +0 -9
  205. data/ext/nokogiri/xml_document.h +0 -23
  206. data/ext/nokogiri/xml_document_fragment.h +0 -10
  207. data/ext/nokogiri/xml_dtd.h +0 -10
  208. data/ext/nokogiri/xml_element_content.h +0 -10
  209. data/ext/nokogiri/xml_element_decl.h +0 -9
  210. data/ext/nokogiri/xml_encoding_handler.h +0 -8
  211. data/ext/nokogiri/xml_entity_decl.h +0 -10
  212. data/ext/nokogiri/xml_entity_reference.h +0 -9
  213. data/ext/nokogiri/xml_io.c +0 -61
  214. data/ext/nokogiri/xml_io.h +0 -11
  215. data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
  216. data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
  217. data/ext/nokogiri/xml_namespace.h +0 -15
  218. data/ext/nokogiri/xml_node.h +0 -13
  219. data/ext/nokogiri/xml_node_set.h +0 -12
  220. data/ext/nokogiri/xml_processing_instruction.h +0 -9
  221. data/ext/nokogiri/xml_reader.h +0 -10
  222. data/ext/nokogiri/xml_relax_ng.h +0 -9
  223. data/ext/nokogiri/xml_sax_parser.h +0 -39
  224. data/ext/nokogiri/xml_sax_parser_context.h +0 -10
  225. data/ext/nokogiri/xml_sax_push_parser.h +0 -9
  226. data/ext/nokogiri/xml_schema.h +0 -9
  227. data/ext/nokogiri/xml_syntax_error.h +0 -13
  228. data/ext/nokogiri/xml_text.h +0 -9
  229. data/ext/nokogiri/xml_xpath_context.h +0 -10
  230. data/ext/nokogiri/xslt_stylesheet.h +0 -14
  231. data/lib/nokogiri/html/document_fragment.rb +0 -49
  232. data/lib/nokogiri/html/element_description_defaults.rb +0 -671
  233. data/lib/nokogiri/html/sax/parser_context.rb +0 -16
  234. data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +0 -78
  235. data/patches/libxml2/0002-Fix-nullptr-deref-with-XPath-logic-ops.patch +0 -54
  236. data/patches/libxml2/0003-Fix-infinite-loop-in-LZMA-decompression.patch +0 -50
  237. data/patches/sort-patches-by-date +0 -25
  238. data/ports/archives/libxml2-2.9.8.tar.gz +0 -0
  239. data/ports/archives/libxslt-1.1.32.tar.gz +0 -0
  240. data/suppressions/README.txt +0 -1
  241. data/suppressions/nokogiri_ruby-2.supp +0 -10
  242. data/tasks/test.rb +0 -100
  243. data/test/css/test_nthiness.rb +0 -226
  244. data/test/css/test_parser.rb +0 -386
  245. data/test/css/test_tokenizer.rb +0 -215
  246. data/test/css/test_xpath_visitor.rb +0 -96
  247. data/test/decorators/test_slop.rb +0 -23
  248. data/test/files/2ch.html +0 -108
  249. data/test/files/GH_1042.html +0 -18
  250. data/test/files/address_book.rlx +0 -12
  251. data/test/files/address_book.xml +0 -10
  252. data/test/files/atom.xml +0 -344
  253. data/test/files/bar/bar.xsd +0 -4
  254. data/test/files/bogus.xml +0 -0
  255. data/test/files/dont_hurt_em_why.xml +0 -422
  256. data/test/files/encoding.html +0 -82
  257. data/test/files/encoding.xhtml +0 -84
  258. data/test/files/exslt.xml +0 -8
  259. data/test/files/exslt.xslt +0 -35
  260. data/test/files/foo/foo.xsd +0 -4
  261. data/test/files/metacharset.html +0 -10
  262. data/test/files/namespace_pressure_test.xml +0 -1684
  263. data/test/files/noencoding.html +0 -47
  264. data/test/files/po.xml +0 -32
  265. data/test/files/po.xsd +0 -66
  266. data/test/files/saml/saml20assertion_schema.xsd +0 -283
  267. data/test/files/saml/saml20protocol_schema.xsd +0 -302
  268. data/test/files/saml/xenc_schema.xsd +0 -146
  269. data/test/files/saml/xmldsig_schema.xsd +0 -318
  270. data/test/files/shift_jis.html +0 -10
  271. data/test/files/shift_jis.xml +0 -5
  272. data/test/files/shift_jis_no_charset.html +0 -9
  273. data/test/files/slow-xpath.xml +0 -25509
  274. data/test/files/snuggles.xml +0 -3
  275. data/test/files/staff.dtd +0 -10
  276. data/test/files/staff.xml +0 -59
  277. data/test/files/staff.xslt +0 -32
  278. data/test/files/test_document_url/bar.xml +0 -2
  279. data/test/files/test_document_url/document.dtd +0 -4
  280. data/test/files/test_document_url/document.xml +0 -6
  281. data/test/files/tlm.html +0 -851
  282. data/test/files/to_be_xincluded.xml +0 -2
  283. data/test/files/valid_bar.xml +0 -2
  284. data/test/files/xinclude.xml +0 -4
  285. data/test/helper.rb +0 -271
  286. data/test/html/sax/test_parser.rb +0 -168
  287. data/test/html/sax/test_parser_context.rb +0 -46
  288. data/test/html/sax/test_parser_text.rb +0 -163
  289. data/test/html/sax/test_push_parser.rb +0 -87
  290. data/test/html/test_attributes.rb +0 -85
  291. data/test/html/test_builder.rb +0 -164
  292. data/test/html/test_document.rb +0 -712
  293. data/test/html/test_document_encoding.rb +0 -143
  294. data/test/html/test_document_fragment.rb +0 -310
  295. data/test/html/test_element_description.rb +0 -105
  296. data/test/html/test_named_characters.rb +0 -14
  297. data/test/html/test_node.rb +0 -212
  298. data/test/html/test_node_encoding.rb +0 -91
  299. data/test/namespaces/test_additional_namespaces_in_builder_doc.rb +0 -14
  300. data/test/namespaces/test_namespaces_aliased_default.rb +0 -24
  301. data/test/namespaces/test_namespaces_in_builder_doc.rb +0 -75
  302. data/test/namespaces/test_namespaces_in_cloned_doc.rb +0 -31
  303. data/test/namespaces/test_namespaces_in_created_doc.rb +0 -75
  304. data/test/namespaces/test_namespaces_in_parsed_doc.rb +0 -80
  305. data/test/namespaces/test_namespaces_preservation.rb +0 -31
  306. data/test/test_convert_xpath.rb +0 -135
  307. data/test/test_css_cache.rb +0 -47
  308. data/test/test_encoding_handler.rb +0 -48
  309. data/test/test_memory_leak.rb +0 -156
  310. data/test/test_nokogiri.rb +0 -138
  311. data/test/test_soap4r_sax.rb +0 -52
  312. data/test/test_xslt_transforms.rb +0 -314
  313. data/test/xml/node/test_save_options.rb +0 -28
  314. data/test/xml/node/test_subclass.rb +0 -44
  315. data/test/xml/sax/test_parser.rb +0 -402
  316. data/test/xml/sax/test_parser_context.rb +0 -115
  317. data/test/xml/sax/test_parser_text.rb +0 -202
  318. data/test/xml/sax/test_push_parser.rb +0 -265
  319. data/test/xml/test_attr.rb +0 -74
  320. data/test/xml/test_attribute_decl.rb +0 -86
  321. data/test/xml/test_builder.rb +0 -341
  322. data/test/xml/test_c14n.rb +0 -180
  323. data/test/xml/test_cdata.rb +0 -54
  324. data/test/xml/test_comment.rb +0 -40
  325. data/test/xml/test_document.rb +0 -982
  326. data/test/xml/test_document_encoding.rb +0 -31
  327. data/test/xml/test_document_fragment.rb +0 -298
  328. data/test/xml/test_dtd.rb +0 -187
  329. data/test/xml/test_dtd_encoding.rb +0 -31
  330. data/test/xml/test_element_content.rb +0 -56
  331. data/test/xml/test_element_decl.rb +0 -73
  332. data/test/xml/test_entity_decl.rb +0 -122
  333. data/test/xml/test_entity_reference.rb +0 -262
  334. data/test/xml/test_namespace.rb +0 -96
  335. data/test/xml/test_node.rb +0 -1325
  336. data/test/xml/test_node_attributes.rb +0 -115
  337. data/test/xml/test_node_encoding.rb +0 -75
  338. data/test/xml/test_node_inheritance.rb +0 -32
  339. data/test/xml/test_node_reparenting.rb +0 -592
  340. data/test/xml/test_node_set.rb +0 -809
  341. data/test/xml/test_parse_options.rb +0 -64
  342. data/test/xml/test_processing_instruction.rb +0 -30
  343. data/test/xml/test_reader.rb +0 -620
  344. data/test/xml/test_reader_encoding.rb +0 -134
  345. data/test/xml/test_relax_ng.rb +0 -60
  346. data/test/xml/test_schema.rb +0 -142
  347. data/test/xml/test_syntax_error.rb +0 -36
  348. data/test/xml/test_text.rb +0 -60
  349. data/test/xml/test_unparented_node.rb +0 -483
  350. data/test/xml/test_xinclude.rb +0 -83
  351. data/test/xml/test_xpath.rb +0 -470
  352. data/test/xslt/test_custom_functions.rb +0 -133
  353. data/test/xslt/test_exception_handling.rb +0 -37
@@ -0,0 +1,20 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Nokogiri
4
+ module HTML4
5
+ module SAX
6
+ ###
7
+ # Context for HTML SAX parsers. This class is usually not instantiated by the user. Instead,
8
+ # you should be looking at Nokogiri::HTML4::SAX::Parser
9
+ class ParserContext < Nokogiri::XML::SAX::ParserContext
10
+ def self.new(thing, encoding = "UTF-8")
11
+ if [:read, :close].all? { |x| thing.respond_to?(x) }
12
+ super
13
+ else
14
+ memory(thing, encoding)
15
+ end
16
+ end
17
+ end
18
+ end
19
+ end
20
+ end
@@ -1,34 +1,35 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Nokogiri
2
- module HTML
4
+ module HTML4
3
5
  module SAX
4
6
  class PushParser
5
-
6
- # The Nokogiri::HTML::SAX::Document on which the PushParser will be
7
+ # The Nokogiri::HTML4::SAX::Document on which the PushParser will be
7
8
  # operating
8
9
  attr_accessor :document
9
-
10
- def initialize(doc = HTML::SAX::Document.new, file_name = nil, encoding = 'UTF-8')
10
+
11
+ def initialize(doc = HTML4::SAX::Document.new, file_name = nil, encoding = "UTF-8")
11
12
  @document = doc
12
13
  @encoding = encoding
13
- @sax_parser = HTML::SAX::Parser.new(doc, @encoding)
14
+ @sax_parser = HTML4::SAX::Parser.new(doc, @encoding)
14
15
 
15
16
  ## Create our push parser context
16
17
  initialize_native(@sax_parser, file_name, encoding)
17
18
  end
18
-
19
+
19
20
  ###
20
21
  # Write a +chunk+ of HTML to the PushParser. Any callback methods
21
22
  # that can be called will be called immediately.
22
- def write chunk, last_chunk = false
23
+ def write(chunk, last_chunk = false)
23
24
  native_write(chunk, last_chunk)
24
25
  end
25
- alias :<< :write
26
+ alias_method :<<, :write
26
27
 
27
28
  ###
28
29
  # Finish the parsing. This method is only necessary for
29
- # Nokogiri::HTML::SAX::Document#end_document to be called.
30
+ # Nokogiri::HTML4::SAX::Document#end_document to be called.
30
31
  def finish
31
- write '', true
32
+ write("", true)
32
33
  end
33
34
  end
34
35
  end
@@ -0,0 +1,46 @@
1
+ # coding: utf-8
2
+ # frozen_string_literal: true
3
+
4
+ module Nokogiri
5
+ class << self
6
+ # :call-seq:
7
+ # HTML4(input, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML, &block) → Nokogiri::HTML4::Document
8
+ #
9
+ # Parse HTML. Convenience method for Nokogiri::HTML4::Document.parse
10
+ def HTML4(input, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML, &block)
11
+ Nokogiri::HTML4::Document.parse(input, url, encoding, options, &block)
12
+ end
13
+ end
14
+
15
+ # Since v1.12.0
16
+ #
17
+ # 💡 Before v1.12.0, Nokogiri::HTML4 did not exist, and Nokogiri::HTML was the module/namespace
18
+ # for parsing HTML.
19
+ module HTML4
20
+ class << self
21
+ ###
22
+ # Parse HTML. Convenience method for Nokogiri::HTML4::Document.parse
23
+ def parse(input, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML, &block)
24
+ Document.parse(input, url, encoding, options, &block)
25
+ end
26
+
27
+ ####
28
+ # Parse a fragment from +string+ in to a NodeSet.
29
+ def fragment(string, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML, &block)
30
+ HTML4::DocumentFragment.parse(string, encoding, options, &block)
31
+ end
32
+ end
33
+
34
+ # Instance of Nokogiri::HTML4::EntityLookup
35
+ NamedCharacters = EntityLookup.new
36
+ end
37
+ end
38
+
39
+ require_relative "html4/entity_lookup"
40
+ require_relative "html4/document"
41
+ require_relative "html4/document_fragment"
42
+ require_relative "html4/sax/parser_context"
43
+ require_relative "html4/sax/parser"
44
+ require_relative "html4/sax/push_parser"
45
+ require_relative "html4/element_description"
46
+ require_relative "html4/element_description_defaults"
@@ -0,0 +1,91 @@
1
+ # coding: utf-8
2
+ # frozen_string_literal: true
3
+
4
+ #
5
+ # Copyright 2013-2021 Sam Ruby, Stephen Checkoway
6
+ #
7
+ # Licensed under the Apache License, Version 2.0 (the "License");
8
+ # you may not use this file except in compliance with the License.
9
+ # You may obtain a copy of the License at
10
+ #
11
+ # http://www.apache.org/licenses/LICENSE-2.0
12
+ #
13
+ # Unless required by applicable law or agreed to in writing, software
14
+ # distributed under the License is distributed on an "AS IS" BASIS,
15
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16
+ # See the License for the specific language governing permissions and
17
+ # limitations under the License.
18
+ #
19
+
20
+ require_relative "../html4/document"
21
+
22
+ module Nokogiri
23
+ module HTML5
24
+ # Since v1.12.0
25
+ #
26
+ # 💡 HTML5 functionality is not available when running JRuby.
27
+ class Document < Nokogiri::HTML4::Document
28
+ def self.parse(string_or_io, url = nil, encoding = nil, **options, &block)
29
+ yield options if block
30
+ string_or_io = "" unless string_or_io
31
+
32
+ if string_or_io.respond_to?(:encoding) && string_or_io.encoding.name != "ASCII-8BIT"
33
+ encoding ||= string_or_io.encoding.name
34
+ end
35
+
36
+ if string_or_io.respond_to?(:read) && string_or_io.respond_to?(:path)
37
+ url ||= string_or_io.path
38
+ end
39
+ unless string_or_io.respond_to?(:read) || string_or_io.respond_to?(:to_str)
40
+ raise ArgumentError, "not a string or IO object"
41
+ end
42
+
43
+ do_parse(string_or_io, url, encoding, options)
44
+ end
45
+
46
+ def self.read_io(io, url = nil, encoding = nil, **options)
47
+ raise ArgumentError, "io object doesn't respond to :read" unless io.respond_to?(:read)
48
+
49
+ do_parse(io, url, encoding, options)
50
+ end
51
+
52
+ def self.read_memory(string, url = nil, encoding = nil, **options)
53
+ raise ArgumentError, "string object doesn't respond to :to_str" unless string.respond_to?(:to_str)
54
+
55
+ do_parse(string, url, encoding, options)
56
+ end
57
+
58
+ def fragment(tags = nil)
59
+ DocumentFragment.new(self, tags, root)
60
+ end
61
+
62
+ def to_xml(options = {}, &block)
63
+ # Bypass XML::Document#to_xml which doesn't add
64
+ # XML::Node::SaveOptions::AS_XML like XML::Node#to_xml does.
65
+ XML::Node.instance_method(:to_xml).bind(self).call(options, &block)
66
+ end
67
+
68
+ # :call-seq:
69
+ # xpath_doctype() → Nokogiri::CSS::XPathVisitor::DoctypeConfig
70
+ #
71
+ # [Returns] The document type which determines CSS-to-XPath translation.
72
+ #
73
+ # See XPathVisitor for more information.
74
+ def xpath_doctype
75
+ Nokogiri::CSS::XPathVisitor::DoctypeConfig::HTML5
76
+ end
77
+
78
+ private
79
+
80
+ def self.do_parse(string_or_io, url, encoding, options)
81
+ string = HTML5.read_and_encode(string_or_io, encoding)
82
+ max_attributes = options[:max_attributes] || Nokogiri::Gumbo::DEFAULT_MAX_ATTRIBUTES
83
+ max_errors = options[:max_errors] || options[:max_parse_errors] || Nokogiri::Gumbo::DEFAULT_MAX_ERRORS
84
+ max_depth = options[:max_tree_depth] || Nokogiri::Gumbo::DEFAULT_MAX_TREE_DEPTH
85
+ doc = Nokogiri::Gumbo.parse(string, url, max_attributes, max_errors, max_depth)
86
+ doc.encoding = "UTF-8"
87
+ doc
88
+ end
89
+ end
90
+ end
91
+ end
@@ -0,0 +1,83 @@
1
+ # coding: utf-8
2
+ # frozen_string_literal: true
3
+
4
+ #
5
+ # Copyright 2013-2021 Sam Ruby, Stephen Checkoway
6
+ #
7
+ # Licensed under the Apache License, Version 2.0 (the "License");
8
+ # you may not use this file except in compliance with the License.
9
+ # You may obtain a copy of the License at
10
+ #
11
+ # http://www.apache.org/licenses/LICENSE-2.0
12
+ #
13
+ # Unless required by applicable law or agreed to in writing, software
14
+ # distributed under the License is distributed on an "AS IS" BASIS,
15
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16
+ # See the License for the specific language governing permissions and
17
+ # limitations under the License.
18
+ #
19
+
20
+ require_relative "../html4/document_fragment"
21
+
22
+ module Nokogiri
23
+ module HTML5
24
+ # Since v1.12.0
25
+ #
26
+ # 💡 HTML5 functionality is not available when running JRuby.
27
+ class DocumentFragment < Nokogiri::HTML4::DocumentFragment
28
+ attr_accessor :document
29
+ attr_accessor :errors
30
+
31
+ # Create a document fragment.
32
+ def initialize(doc, tags = nil, ctx = nil, options = {})
33
+ self.document = doc
34
+ self.errors = []
35
+ return self unless tags
36
+
37
+ max_attributes = options[:max_attributes] || Nokogiri::Gumbo::DEFAULT_MAX_ATTRIBUTES
38
+ max_errors = options[:max_errors] || Nokogiri::Gumbo::DEFAULT_MAX_ERRORS
39
+ max_depth = options[:max_tree_depth] || Nokogiri::Gumbo::DEFAULT_MAX_TREE_DEPTH
40
+ tags = Nokogiri::HTML5.read_and_encode(tags, nil)
41
+ Nokogiri::Gumbo.fragment(self, tags, ctx, max_attributes, max_errors, max_depth)
42
+ end
43
+
44
+ def serialize(options = {}, &block)
45
+ # Bypass XML::Document.serialize which doesn't support options even
46
+ # though XML::Node.serialize does!
47
+ XML::Node.instance_method(:serialize).bind(self).call(options, &block)
48
+ end
49
+
50
+ # Parse a document fragment from +tags+, returning a Nodeset.
51
+ def self.parse(tags, encoding = nil, options = {})
52
+ doc = HTML5::Document.new
53
+ tags = HTML5.read_and_encode(tags, encoding)
54
+ doc.encoding = "UTF-8"
55
+ new(doc, tags, nil, options)
56
+ end
57
+
58
+ def extract_params(params) # :nodoc:
59
+ handler = params.find do |param|
60
+ ![Hash, String, Symbol].include?(param.class)
61
+ end
62
+ params -= [handler] if handler
63
+
64
+ hashes = []
65
+ while Hash === params.last || params.last.nil?
66
+ hashes << params.pop
67
+ break if params.empty?
68
+ end
69
+ ns, binds = hashes.reverse
70
+
71
+ ns ||=
72
+ begin
73
+ ns = {}
74
+ children.each { |child| ns.merge!(child.namespaces) }
75
+ ns
76
+ end
77
+
78
+ [params, handler, ns, binds]
79
+ end
80
+ end
81
+ end
82
+ end
83
+ # vim: set shiftwidth=2 softtabstop=2 tabstop=8 expandtab:
@@ -0,0 +1,100 @@
1
+ # coding: utf-8
2
+ # frozen_string_literal: true
3
+
4
+ #
5
+ # Copyright 2013-2021 Sam Ruby, Stephen Checkoway
6
+ #
7
+ # Licensed under the Apache License, Version 2.0 (the "License");
8
+ # you may not use this file except in compliance with the License.
9
+ # You may obtain a copy of the License at
10
+ #
11
+ # http://www.apache.org/licenses/LICENSE-2.0
12
+ #
13
+ # Unless required by applicable law or agreed to in writing, software
14
+ # distributed under the License is distributed on an "AS IS" BASIS,
15
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16
+ # See the License for the specific language governing permissions and
17
+ # limitations under the License.
18
+ #
19
+
20
+ require_relative "../xml/node"
21
+
22
+ module Nokogiri
23
+ module HTML5
24
+ # Since v1.12.0
25
+ #
26
+ # 💡 HTML5 functionality is not available when running JRuby.
27
+ module Node
28
+ def inner_html(options = {})
29
+ return super(options) unless document.is_a?(HTML5::Document)
30
+
31
+ result = options[:preserve_newline] && HTML5.prepend_newline?(self) ? +"\n" : +""
32
+ result << children.map { |child| child.to_html(options) }.join
33
+ result
34
+ end
35
+
36
+ def write_to(io, *options)
37
+ return super(io, *options) unless document.is_a?(HTML5::Document)
38
+
39
+ options = options.first.is_a?(Hash) ? options.shift : {}
40
+ encoding = options[:encoding] || options[0]
41
+ if Nokogiri.jruby?
42
+ save_options = options[:save_with] || options[1]
43
+ indent_times = options[:indent] || 0
44
+ else
45
+ save_options = options[:save_with] || options[1] || XML::Node::SaveOptions::FORMAT
46
+ indent_times = options[:indent] || 2
47
+ end
48
+ indent_string = (options[:indent_text] || " ") * indent_times
49
+
50
+ config = XML::Node::SaveOptions.new(save_options.to_i)
51
+ yield config if block_given?
52
+
53
+ config_options = config.options
54
+ if config_options & (XML::Node::SaveOptions::AS_XML | XML::Node::SaveOptions::AS_XHTML) != 0
55
+ # Use Nokogiri's serializing code.
56
+ native_write_to(io, encoding, indent_string, config_options)
57
+ else
58
+ # Serialize including the current node.
59
+ encoding ||= document.encoding || Encoding::UTF_8
60
+ internal_ops = {
61
+ preserve_newline: options[:preserve_newline] || false,
62
+ }
63
+ HTML5.serialize_node_internal(self, io, encoding, internal_ops)
64
+ end
65
+ end
66
+
67
+ def fragment(tags)
68
+ return super(tags) unless document.is_a?(HTML5::Document)
69
+
70
+ DocumentFragment.new(document, tags, self)
71
+ end
72
+
73
+ private
74
+
75
+ # HTML elements can have attributes that contain colons.
76
+ # Nokogiri::XML::Node#[]= treats names with colons as a prefixed QName
77
+ # and tries to create an attribute in a namespace. This is especially
78
+ # annoying with attribute names like xml:lang since libxml2 will
79
+ # actually create the xml namespace if it doesn't exist already.
80
+ def add_child_node_and_reparent_attrs(node)
81
+ return super(node) unless document.is_a?(HTML5::Document)
82
+
83
+ # I'm not sure what this method is supposed to do. Reparenting
84
+ # namespaces is handled by libxml2, including child namespaces which
85
+ # this method wouldn't handle.
86
+ # https://github.com/sparklemotion/nokogiri/issues/1790
87
+ add_child_node(node)
88
+ # node.attribute_nodes.find_all { |a| a.namespace }.each do |attr|
89
+ # attr.remove
90
+ # ns = attr.namespace
91
+ # a["#{ns.prefix}:#{attr.name}"] = attr.value
92
+ # end
93
+ end
94
+ end
95
+ # Monkey patch
96
+ XML::Node.prepend(HTML5::Node)
97
+ end
98
+ end
99
+
100
+ # vim: set shiftwidth=2 softtabstop=2 tabstop=8 expandtab: