nokogiri 1.6.8.1 → 1.13.4

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (354) hide show
  1. checksums.yaml +5 -5
  2. data/Gemfile +3 -20
  3. data/LICENSE-DEPENDENCIES.md +1903 -0
  4. data/LICENSE.md +9 -0
  5. data/README.md +197 -83
  6. data/bin/nokogiri +63 -50
  7. data/dependencies.yml +16 -22
  8. data/ext/nokogiri/depend +38 -358
  9. data/ext/nokogiri/extconf.rb +751 -432
  10. data/ext/nokogiri/gumbo.c +584 -0
  11. data/ext/nokogiri/html4_document.c +166 -0
  12. data/ext/nokogiri/html4_element_description.c +294 -0
  13. data/ext/nokogiri/html4_entity_lookup.c +37 -0
  14. data/ext/nokogiri/html4_sax_parser_context.c +120 -0
  15. data/ext/nokogiri/html4_sax_push_parser.c +95 -0
  16. data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
  17. data/ext/nokogiri/nokogiri.c +228 -95
  18. data/ext/nokogiri/nokogiri.h +190 -98
  19. data/ext/nokogiri/test_global_handlers.c +40 -0
  20. data/ext/nokogiri/xml_attr.c +49 -40
  21. data/ext/nokogiri/xml_attribute_decl.c +18 -18
  22. data/ext/nokogiri/xml_cdata.c +24 -23
  23. data/ext/nokogiri/xml_comment.c +19 -26
  24. data/ext/nokogiri/xml_document.c +291 -217
  25. data/ext/nokogiri/xml_document_fragment.c +12 -16
  26. data/ext/nokogiri/xml_dtd.c +56 -50
  27. data/ext/nokogiri/xml_element_content.c +31 -26
  28. data/ext/nokogiri/xml_element_decl.c +22 -22
  29. data/ext/nokogiri/xml_encoding_handler.c +43 -18
  30. data/ext/nokogiri/xml_entity_decl.c +32 -30
  31. data/ext/nokogiri/xml_entity_reference.c +16 -18
  32. data/ext/nokogiri/xml_namespace.c +61 -58
  33. data/ext/nokogiri/xml_node.c +1194 -729
  34. data/ext/nokogiri/xml_node_set.c +178 -165
  35. data/ext/nokogiri/xml_processing_instruction.c +17 -19
  36. data/ext/nokogiri/xml_reader.c +226 -175
  37. data/ext/nokogiri/xml_relax_ng.c +52 -28
  38. data/ext/nokogiri/xml_sax_parser.c +115 -114
  39. data/ext/nokogiri/xml_sax_parser_context.c +105 -86
  40. data/ext/nokogiri/xml_sax_push_parser.c +88 -35
  41. data/ext/nokogiri/xml_schema.c +112 -33
  42. data/ext/nokogiri/xml_syntax_error.c +42 -21
  43. data/ext/nokogiri/xml_text.c +13 -17
  44. data/ext/nokogiri/xml_xpath_context.c +223 -115
  45. data/ext/nokogiri/xslt_stylesheet.c +265 -173
  46. data/gumbo-parser/CHANGES.md +63 -0
  47. data/gumbo-parser/Makefile +101 -0
  48. data/gumbo-parser/THANKS +27 -0
  49. data/gumbo-parser/src/Makefile +34 -0
  50. data/gumbo-parser/src/README.md +41 -0
  51. data/gumbo-parser/src/ascii.c +75 -0
  52. data/gumbo-parser/src/ascii.h +115 -0
  53. data/gumbo-parser/src/attribute.c +42 -0
  54. data/gumbo-parser/src/attribute.h +17 -0
  55. data/gumbo-parser/src/char_ref.c +22225 -0
  56. data/gumbo-parser/src/char_ref.h +29 -0
  57. data/gumbo-parser/src/char_ref.rl +2154 -0
  58. data/gumbo-parser/src/error.c +626 -0
  59. data/gumbo-parser/src/error.h +148 -0
  60. data/gumbo-parser/src/foreign_attrs.c +104 -0
  61. data/gumbo-parser/src/foreign_attrs.gperf +27 -0
  62. data/gumbo-parser/src/gumbo.h +943 -0
  63. data/gumbo-parser/src/insertion_mode.h +33 -0
  64. data/gumbo-parser/src/macros.h +91 -0
  65. data/gumbo-parser/src/parser.c +4875 -0
  66. data/gumbo-parser/src/parser.h +41 -0
  67. data/gumbo-parser/src/replacement.h +33 -0
  68. data/gumbo-parser/src/string_buffer.c +103 -0
  69. data/gumbo-parser/src/string_buffer.h +68 -0
  70. data/gumbo-parser/src/string_piece.c +48 -0
  71. data/gumbo-parser/src/svg_attrs.c +174 -0
  72. data/gumbo-parser/src/svg_attrs.gperf +77 -0
  73. data/gumbo-parser/src/svg_tags.c +137 -0
  74. data/gumbo-parser/src/svg_tags.gperf +55 -0
  75. data/gumbo-parser/src/tag.c +222 -0
  76. data/gumbo-parser/src/tag_lookup.c +382 -0
  77. data/gumbo-parser/src/tag_lookup.gperf +169 -0
  78. data/gumbo-parser/src/tag_lookup.h +13 -0
  79. data/gumbo-parser/src/token_buffer.c +79 -0
  80. data/gumbo-parser/src/token_buffer.h +71 -0
  81. data/gumbo-parser/src/token_type.h +17 -0
  82. data/gumbo-parser/src/tokenizer.c +3463 -0
  83. data/gumbo-parser/src/tokenizer.h +112 -0
  84. data/gumbo-parser/src/tokenizer_states.h +339 -0
  85. data/gumbo-parser/src/utf8.c +245 -0
  86. data/gumbo-parser/src/utf8.h +164 -0
  87. data/gumbo-parser/src/util.c +68 -0
  88. data/gumbo-parser/src/util.h +30 -0
  89. data/gumbo-parser/src/vector.c +111 -0
  90. data/gumbo-parser/src/vector.h +45 -0
  91. data/lib/nokogiri/class_resolver.rb +67 -0
  92. data/lib/nokogiri/css/node.rb +10 -8
  93. data/lib/nokogiri/css/parser.rb +410 -372
  94. data/lib/nokogiri/css/parser.y +260 -244
  95. data/lib/nokogiri/css/parser_extras.rb +54 -49
  96. data/lib/nokogiri/css/syntax_error.rb +3 -1
  97. data/lib/nokogiri/css/tokenizer.rb +107 -104
  98. data/lib/nokogiri/css/tokenizer.rex +7 -6
  99. data/lib/nokogiri/css/xpath_visitor.rb +226 -92
  100. data/lib/nokogiri/css.rb +50 -17
  101. data/lib/nokogiri/decorators/slop.rb +9 -7
  102. data/lib/nokogiri/extension.rb +31 -0
  103. data/lib/nokogiri/gumbo.rb +15 -0
  104. data/lib/nokogiri/html.rb +38 -27
  105. data/lib/nokogiri/{html → html4}/builder.rb +4 -2
  106. data/lib/nokogiri/{html → html4}/document.rb +104 -106
  107. data/lib/nokogiri/html4/document_fragment.rb +54 -0
  108. data/lib/nokogiri/{html → html4}/element_description.rb +3 -1
  109. data/lib/nokogiri/html4/element_description_defaults.rb +578 -0
  110. data/lib/nokogiri/{html → html4}/entity_lookup.rb +4 -2
  111. data/lib/nokogiri/{html → html4}/sax/parser.rb +24 -13
  112. data/lib/nokogiri/html4/sax/parser_context.rb +20 -0
  113. data/lib/nokogiri/{html → html4}/sax/push_parser.rb +12 -11
  114. data/lib/nokogiri/html4.rb +46 -0
  115. data/lib/nokogiri/html5/document.rb +91 -0
  116. data/lib/nokogiri/html5/document_fragment.rb +83 -0
  117. data/lib/nokogiri/html5/node.rb +100 -0
  118. data/lib/nokogiri/html5.rb +478 -0
  119. data/lib/nokogiri/jruby/dependencies.rb +21 -0
  120. data/lib/nokogiri/syntax_error.rb +2 -0
  121. data/lib/nokogiri/version/constant.rb +6 -0
  122. data/lib/nokogiri/version/info.rb +222 -0
  123. data/lib/nokogiri/version.rb +3 -107
  124. data/lib/nokogiri/xml/attr.rb +6 -3
  125. data/lib/nokogiri/xml/attribute_decl.rb +3 -1
  126. data/lib/nokogiri/xml/builder.rb +97 -53
  127. data/lib/nokogiri/xml/cdata.rb +3 -1
  128. data/lib/nokogiri/xml/character_data.rb +2 -0
  129. data/lib/nokogiri/xml/document.rb +231 -93
  130. data/lib/nokogiri/xml/document_fragment.rb +57 -44
  131. data/lib/nokogiri/xml/dtd.rb +4 -2
  132. data/lib/nokogiri/xml/element_content.rb +2 -0
  133. data/lib/nokogiri/xml/element_decl.rb +3 -1
  134. data/lib/nokogiri/xml/entity_decl.rb +4 -2
  135. data/lib/nokogiri/xml/entity_reference.rb +20 -0
  136. data/lib/nokogiri/xml/namespace.rb +3 -0
  137. data/lib/nokogiri/xml/node/save_options.rb +10 -5
  138. data/lib/nokogiri/xml/node.rb +911 -337
  139. data/lib/nokogiri/xml/node_set.rb +141 -84
  140. data/lib/nokogiri/xml/notation.rb +13 -0
  141. data/lib/nokogiri/xml/parse_options.rb +23 -9
  142. data/lib/nokogiri/xml/pp/character_data.rb +9 -6
  143. data/lib/nokogiri/xml/pp/node.rb +25 -26
  144. data/lib/nokogiri/xml/pp.rb +4 -2
  145. data/lib/nokogiri/xml/processing_instruction.rb +3 -1
  146. data/lib/nokogiri/xml/reader.rb +23 -28
  147. data/lib/nokogiri/xml/relax_ng.rb +8 -2
  148. data/lib/nokogiri/xml/sax/document.rb +45 -49
  149. data/lib/nokogiri/xml/sax/parser.rb +43 -40
  150. data/lib/nokogiri/xml/sax/parser_context.rb +8 -3
  151. data/lib/nokogiri/xml/sax/push_parser.rb +6 -5
  152. data/lib/nokogiri/xml/sax.rb +6 -4
  153. data/lib/nokogiri/xml/schema.rb +19 -9
  154. data/lib/nokogiri/xml/searchable.rb +138 -89
  155. data/lib/nokogiri/xml/syntax_error.rb +26 -1
  156. data/lib/nokogiri/xml/text.rb +2 -0
  157. data/lib/nokogiri/xml/xpath/syntax_error.rb +4 -2
  158. data/lib/nokogiri/xml/xpath.rb +15 -4
  159. data/lib/nokogiri/xml/xpath_context.rb +3 -3
  160. data/lib/nokogiri/xml.rb +39 -37
  161. data/lib/nokogiri/xslt/stylesheet.rb +3 -1
  162. data/lib/nokogiri/xslt.rb +29 -20
  163. data/lib/nokogiri.rb +49 -65
  164. data/lib/xsd/xmlparser/nokogiri.rb +26 -24
  165. data/patches/libxml2/0001-Remove-script-macro-support.patch +40 -0
  166. data/patches/libxml2/0002-Update-entities-to-remove-handling-of-ssi.patch +44 -0
  167. data/patches/libxml2/0003-libxml2.la-is-in-top_builddir.patch +25 -0
  168. data/patches/libxml2/0004-use-glibc-strlen.patch +53 -0
  169. data/patches/libxml2/0005-avoid-isnan-isinf.patch +81 -0
  170. data/patches/libxml2/0006-update-automake-files-for-arm64.patch +3040 -0
  171. data/patches/libxml2/0008-htmlParseComment-handle-abruptly-closed-comments.patch +61 -0
  172. data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
  173. data/patches/libxml2/0010-Revert-Different-approach-to-fix-quadratic-behavior.patch +45 -0
  174. data/patches/libxslt/0001-update-automake-files-for-arm64.patch +3037 -0
  175. data/ports/archives/libxml2-2.9.13.tar.xz +0 -0
  176. data/ports/archives/libxslt-1.1.35.tar.xz +0 -0
  177. metadata +233 -258
  178. data/.autotest +0 -26
  179. data/.cross_rubies +0 -9
  180. data/.editorconfig +0 -17
  181. data/.gemtest +0 -0
  182. data/.travis.yml +0 -51
  183. data/CHANGELOG.rdoc +0 -1160
  184. data/CONTRIBUTING.md +0 -42
  185. data/C_CODING_STYLE.rdoc +0 -33
  186. data/LICENSE.txt +0 -31
  187. data/Manifest.txt +0 -364
  188. data/ROADMAP.md +0 -111
  189. data/Rakefile +0 -375
  190. data/STANDARD_RESPONSES.md +0 -47
  191. data/Y_U_NO_GEMSPEC.md +0 -155
  192. data/appveyor.yml +0 -22
  193. data/build_all +0 -45
  194. data/ext/nokogiri/html_document.c +0 -170
  195. data/ext/nokogiri/html_document.h +0 -10
  196. data/ext/nokogiri/html_element_description.c +0 -279
  197. data/ext/nokogiri/html_element_description.h +0 -10
  198. data/ext/nokogiri/html_entity_lookup.c +0 -32
  199. data/ext/nokogiri/html_entity_lookup.h +0 -8
  200. data/ext/nokogiri/html_sax_parser_context.c +0 -116
  201. data/ext/nokogiri/html_sax_parser_context.h +0 -11
  202. data/ext/nokogiri/html_sax_push_parser.c +0 -87
  203. data/ext/nokogiri/html_sax_push_parser.h +0 -9
  204. data/ext/nokogiri/xml_attr.h +0 -9
  205. data/ext/nokogiri/xml_attribute_decl.h +0 -9
  206. data/ext/nokogiri/xml_cdata.h +0 -9
  207. data/ext/nokogiri/xml_comment.h +0 -9
  208. data/ext/nokogiri/xml_document.h +0 -23
  209. data/ext/nokogiri/xml_document_fragment.h +0 -10
  210. data/ext/nokogiri/xml_dtd.h +0 -10
  211. data/ext/nokogiri/xml_element_content.h +0 -10
  212. data/ext/nokogiri/xml_element_decl.h +0 -9
  213. data/ext/nokogiri/xml_encoding_handler.h +0 -8
  214. data/ext/nokogiri/xml_entity_decl.h +0 -10
  215. data/ext/nokogiri/xml_entity_reference.h +0 -9
  216. data/ext/nokogiri/xml_io.c +0 -60
  217. data/ext/nokogiri/xml_io.h +0 -11
  218. data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
  219. data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
  220. data/ext/nokogiri/xml_namespace.h +0 -13
  221. data/ext/nokogiri/xml_node.h +0 -13
  222. data/ext/nokogiri/xml_node_set.h +0 -13
  223. data/ext/nokogiri/xml_processing_instruction.h +0 -9
  224. data/ext/nokogiri/xml_reader.h +0 -10
  225. data/ext/nokogiri/xml_relax_ng.h +0 -9
  226. data/ext/nokogiri/xml_sax_parser.h +0 -39
  227. data/ext/nokogiri/xml_sax_parser_context.h +0 -10
  228. data/ext/nokogiri/xml_sax_push_parser.h +0 -9
  229. data/ext/nokogiri/xml_schema.h +0 -9
  230. data/ext/nokogiri/xml_syntax_error.h +0 -13
  231. data/ext/nokogiri/xml_text.h +0 -9
  232. data/ext/nokogiri/xml_xpath_context.h +0 -10
  233. data/ext/nokogiri/xslt_stylesheet.h +0 -14
  234. data/lib/nokogiri/html/document_fragment.rb +0 -39
  235. data/lib/nokogiri/html/element_description_defaults.rb +0 -671
  236. data/lib/nokogiri/html/sax/parser_context.rb +0 -16
  237. data/patches/sort-patches-by-date +0 -25
  238. data/ports/archives/libxml2-2.9.4.tar.gz +0 -0
  239. data/ports/archives/libxslt-1.1.29.tar.gz +0 -0
  240. data/suppressions/README.txt +0 -1
  241. data/suppressions/nokogiri_ree-1.8.7.358.supp +0 -61
  242. data/suppressions/nokogiri_ruby-1.8.7.370.supp +0 -0
  243. data/suppressions/nokogiri_ruby-1.9.2.320.supp +0 -28
  244. data/suppressions/nokogiri_ruby-1.9.3.327.supp +0 -28
  245. data/tasks/test.rb +0 -100
  246. data/test/css/test_nthiness.rb +0 -226
  247. data/test/css/test_parser.rb +0 -369
  248. data/test/css/test_tokenizer.rb +0 -215
  249. data/test/css/test_xpath_visitor.rb +0 -96
  250. data/test/decorators/test_slop.rb +0 -20
  251. data/test/files/2ch.html +0 -108
  252. data/test/files/GH_1042.html +0 -18
  253. data/test/files/address_book.rlx +0 -12
  254. data/test/files/address_book.xml +0 -10
  255. data/test/files/atom.xml +0 -344
  256. data/test/files/bar/bar.xsd +0 -4
  257. data/test/files/bogus.xml +0 -0
  258. data/test/files/dont_hurt_em_why.xml +0 -422
  259. data/test/files/encoding.html +0 -82
  260. data/test/files/encoding.xhtml +0 -84
  261. data/test/files/exslt.xml +0 -8
  262. data/test/files/exslt.xslt +0 -35
  263. data/test/files/foo/foo.xsd +0 -4
  264. data/test/files/metacharset.html +0 -10
  265. data/test/files/namespace_pressure_test.xml +0 -1684
  266. data/test/files/noencoding.html +0 -47
  267. data/test/files/po.xml +0 -32
  268. data/test/files/po.xsd +0 -66
  269. data/test/files/saml/saml20assertion_schema.xsd +0 -283
  270. data/test/files/saml/saml20protocol_schema.xsd +0 -302
  271. data/test/files/saml/xenc_schema.xsd +0 -146
  272. data/test/files/saml/xmldsig_schema.xsd +0 -318
  273. data/test/files/shift_jis.html +0 -10
  274. data/test/files/shift_jis.xml +0 -5
  275. data/test/files/shift_jis_no_charset.html +0 -9
  276. data/test/files/slow-xpath.xml +0 -25509
  277. data/test/files/snuggles.xml +0 -3
  278. data/test/files/staff.dtd +0 -10
  279. data/test/files/staff.xml +0 -59
  280. data/test/files/staff.xslt +0 -32
  281. data/test/files/test_document_url/bar.xml +0 -2
  282. data/test/files/test_document_url/document.dtd +0 -4
  283. data/test/files/test_document_url/document.xml +0 -6
  284. data/test/files/tlm.html +0 -851
  285. data/test/files/to_be_xincluded.xml +0 -2
  286. data/test/files/valid_bar.xml +0 -2
  287. data/test/files/xinclude.xml +0 -4
  288. data/test/helper.rb +0 -181
  289. data/test/html/sax/test_parser.rb +0 -141
  290. data/test/html/sax/test_parser_context.rb +0 -46
  291. data/test/html/sax/test_push_parser.rb +0 -87
  292. data/test/html/test_builder.rb +0 -164
  293. data/test/html/test_document.rb +0 -701
  294. data/test/html/test_document_encoding.rb +0 -145
  295. data/test/html/test_document_fragment.rb +0 -301
  296. data/test/html/test_element_description.rb +0 -105
  297. data/test/html/test_named_characters.rb +0 -14
  298. data/test/html/test_node.rb +0 -212
  299. data/test/html/test_node_encoding.rb +0 -85
  300. data/test/namespaces/test_additional_namespaces_in_builder_doc.rb +0 -14
  301. data/test/namespaces/test_namespaces_aliased_default.rb +0 -24
  302. data/test/namespaces/test_namespaces_in_builder_doc.rb +0 -75
  303. data/test/namespaces/test_namespaces_in_cloned_doc.rb +0 -31
  304. data/test/namespaces/test_namespaces_in_created_doc.rb +0 -75
  305. data/test/namespaces/test_namespaces_in_parsed_doc.rb +0 -80
  306. data/test/namespaces/test_namespaces_preservation.rb +0 -31
  307. data/test/test_convert_xpath.rb +0 -135
  308. data/test/test_css_cache.rb +0 -45
  309. data/test/test_encoding_handler.rb +0 -48
  310. data/test/test_memory_leak.rb +0 -156
  311. data/test/test_nokogiri.rb +0 -138
  312. data/test/test_soap4r_sax.rb +0 -52
  313. data/test/test_xslt_transforms.rb +0 -314
  314. data/test/xml/node/test_save_options.rb +0 -28
  315. data/test/xml/node/test_subclass.rb +0 -44
  316. data/test/xml/sax/test_parser.rb +0 -394
  317. data/test/xml/sax/test_parser_context.rb +0 -115
  318. data/test/xml/sax/test_push_parser.rb +0 -157
  319. data/test/xml/test_attr.rb +0 -67
  320. data/test/xml/test_attribute_decl.rb +0 -86
  321. data/test/xml/test_builder.rb +0 -341
  322. data/test/xml/test_c14n.rb +0 -180
  323. data/test/xml/test_cdata.rb +0 -48
  324. data/test/xml/test_comment.rb +0 -40
  325. data/test/xml/test_document.rb +0 -982
  326. data/test/xml/test_document_encoding.rb +0 -31
  327. data/test/xml/test_document_fragment.rb +0 -271
  328. data/test/xml/test_dtd.rb +0 -187
  329. data/test/xml/test_dtd_encoding.rb +0 -31
  330. data/test/xml/test_element_content.rb +0 -56
  331. data/test/xml/test_element_decl.rb +0 -73
  332. data/test/xml/test_entity_decl.rb +0 -122
  333. data/test/xml/test_entity_reference.rb +0 -251
  334. data/test/xml/test_namespace.rb +0 -96
  335. data/test/xml/test_node.rb +0 -1244
  336. data/test/xml/test_node_attributes.rb +0 -115
  337. data/test/xml/test_node_encoding.rb +0 -69
  338. data/test/xml/test_node_inheritance.rb +0 -32
  339. data/test/xml/test_node_reparenting.rb +0 -549
  340. data/test/xml/test_node_set.rb +0 -775
  341. data/test/xml/test_parse_options.rb +0 -64
  342. data/test/xml/test_processing_instruction.rb +0 -30
  343. data/test/xml/test_reader.rb +0 -589
  344. data/test/xml/test_reader_encoding.rb +0 -134
  345. data/test/xml/test_relax_ng.rb +0 -60
  346. data/test/xml/test_schema.rb +0 -142
  347. data/test/xml/test_syntax_error.rb +0 -30
  348. data/test/xml/test_text.rb +0 -60
  349. data/test/xml/test_unparented_node.rb +0 -440
  350. data/test/xml/test_xinclude.rb +0 -83
  351. data/test/xml/test_xpath.rb +0 -445
  352. data/test/xslt/test_custom_functions.rb +0 -133
  353. data/test/xslt/test_exception_handling.rb +0 -37
  354. data/test_all +0 -107
@@ -0,0 +1,61 @@
1
+ From 3ea8d08da310b645e37940eaae5cc28e251b155b Mon Sep 17 00:00:00 2001
2
+ From: Mike Dalessio <mike.dalessio@gmail.com>
3
+ Date: Sat, 17 Jul 2021 14:36:53 -0400
4
+ Subject: [PATCH] htmlParseComment: handle abruptly-closed comments
5
+
6
+ See guidance provided on abrutply-closed comments here:
7
+
8
+ https://html.spec.whatwg.org/multipage/parsing.html#parse-error-abrupt-closing-of-empty-comment
9
+ ---
10
+ HTMLparser.c | 11 +++++++++++
11
+ include/libxml/xmlerror.h | 1 +
12
+ 2 files changed, 12 insertions(+)
13
+
14
+ diff --git a/HTMLparser.c b/HTMLparser.c
15
+ index b56363a..f0bf294 100644
16
+ --- a/HTMLparser.c
17
+ +++ b/HTMLparser.c
18
+ @@ -3485,10 +3485,20 @@ htmlParseComment(htmlParserCtxtPtr ctxt) {
19
+ q = CUR_CHAR(ql);
20
+ if (q == 0)
21
+ goto unfinished;
22
+ + if (q == '>') {
23
+ + htmlParseErr(ctxt, XML_ERR_COMMENT_ABRUPTLY_ENDED, "Comment abruptly ended", NULL, NULL);
24
+ + cur = '>';
25
+ + goto finished;
26
+ + }
27
+ NEXTL(ql);
28
+ r = CUR_CHAR(rl);
29
+ if (r == 0)
30
+ goto unfinished;
31
+ + if (q == '-' && r == '>') {
32
+ + htmlParseErr(ctxt, XML_ERR_COMMENT_ABRUPTLY_ENDED, "Comment abruptly ended", NULL, NULL);
33
+ + cur = '>';
34
+ + goto finished;
35
+ + }
36
+ NEXTL(rl);
37
+ cur = CUR_CHAR(l);
38
+ while ((cur != 0) &&
39
+ @@ -3536,6 +3546,7 @@ htmlParseComment(htmlParserCtxtPtr ctxt) {
40
+ cur = next;
41
+ l = nl;
42
+ }
43
+ +finished:
44
+ buf[len] = 0;
45
+ if (cur == '>') {
46
+ NEXT;
47
+ diff --git a/include/libxml/xmlerror.h b/include/libxml/xmlerror.h
48
+ index c101997..7b68e40 100644
49
+ --- a/include/libxml/xmlerror.h
50
+ +++ b/include/libxml/xmlerror.h
51
+ @@ -209,6 +209,7 @@ typedef enum {
52
+ XML_ERR_VERSION_MISMATCH, /* 109 */
53
+ XML_ERR_NAME_TOO_LONG, /* 110 */
54
+ XML_ERR_USER_STOP, /* 111 */
55
+ + XML_ERR_COMMENT_ABRUPTLY_ENDED, /* 112 */
56
+ XML_NS_ERR_XML_NAMESPACE = 200,
57
+ XML_NS_ERR_UNDEFINED_NAMESPACE, /* 201 */
58
+ XML_NS_ERR_QNAME, /* 202 */
59
+ --
60
+ 2.31.0
61
+
@@ -0,0 +1,77 @@
1
+ From 74c95ec5932c737d4fcb06b8646b0017364ada14 Mon Sep 17 00:00:00 2001
2
+ From: Mike Dalessio <mike.dalessio@gmail.com>
3
+ Date: Fri, 24 Dec 2021 19:08:01 -0500
4
+ Subject: [PATCH] attempt to hack in wildcard namespaces to xpath
5
+
6
+ I'm not confident this is a bulletproof patch.
7
+ ---
8
+ xpath.c | 24 ++++++++++++++++++------
9
+ 1 file changed, 18 insertions(+), 6 deletions(-)
10
+
11
+ diff --git a/xpath.c b/xpath.c
12
+ index 1aa2f1a..c7f0885 100644
13
+ --- a/xpath.c
14
+ +++ b/xpath.c
15
+ @@ -146,6 +146,9 @@
16
+ #define XPATH_MAX_RECURSION_DEPTH 5000
17
+ #endif
18
+
19
+ +#define WILDCARD_PREFIX "*"
20
+ +#define IS_WILDCARD_PREFIX(p) xmlStrEqual((xmlChar*)WILDCARD_PREFIX, p)
21
+ +
22
+ /*
23
+ * TODO:
24
+ * There are a few spots where some tests are done which depend upon ascii
25
+ @@ -11073,12 +11076,15 @@ xmlXPathCompNodeTest(xmlXPathParserContextPtr ctxt, xmlXPathTestVal *test,
26
+ SKIP_BLANKS;
27
+
28
+ if ((name == NULL) && (CUR == '*')) {
29
+ - /*
30
+ - * All elements
31
+ - */
32
+ NEXT;
33
+ - *test = NODE_TEST_ALL;
34
+ - return(NULL);
35
+ + if (CUR != ':') {
36
+ + /*
37
+ + * All elements
38
+ + */
39
+ + *test = NODE_TEST_ALL;
40
+ + return(NULL);
41
+ + }
42
+ + name = xmlCharStrdup(WILDCARD_PREFIX);
43
+ }
44
+
45
+ if (name == NULL)
46
+ @@ -11327,6 +11333,10 @@ xmlXPathCompStep(xmlXPathParserContextPtr ctxt) {
47
+ }
48
+ #endif
49
+ if (CUR == '*') {
50
+ + if (NXT(1) == ':') {
51
+ + NEXT;
52
+ + name = xmlCharStrdup(WILDCARD_PREFIX);
53
+ + }
54
+ axis = AXIS_CHILD;
55
+ } else {
56
+ if (name == NULL)
57
+ @@ -12030,7 +12040,7 @@ xmlXPathNodeCollectAndTest(xmlXPathParserContextPtr ctxt,
58
+ /*
59
+ * Setup namespaces.
60
+ */
61
+ - if (prefix != NULL) {
62
+ + if (prefix != NULL && !IS_WILDCARD_PREFIX(prefix)) {
63
+ URI = xmlXPathNsLookup(xpctxt, prefix);
64
+ if (URI == NULL) {
65
+ xmlXPathReleaseObject(xpctxt, obj);
66
+ @@ -12369,6 +12379,8 @@ xmlXPathNodeCollectAndTest(xmlXPathParserContextPtr ctxt,
67
+ {
68
+ XP_TEST_HIT
69
+ }
70
+ + } else if (IS_WILDCARD_PREFIX(prefix)) {
71
+ + XP_TEST_HIT
72
+ } else {
73
+ if ((cur->ns != NULL) &&
74
+ (xmlStrEqual(URI, cur->ns->href)))
75
+ --
76
+ 2.31.0
77
+
@@ -0,0 +1,45 @@
1
+ From ddc5f3d22644e0f6fbcc20541c86825757ffee62 Mon Sep 17 00:00:00 2001
2
+ From: Mike Dalessio <mike.dalessio@gmail.com>
3
+ Date: Mon, 21 Feb 2022 18:27:45 -0500
4
+ Subject: [PATCH] Revert "Different approach to fix quadratic behavior in HTML
5
+ push parser"
6
+
7
+ This reverts commit 798bdf13f6964a650b9a0b7b4b3a769f6f1d509a.
8
+ ---
9
+ HTMLparser.c | 14 +-------------
10
+ 1 file changed, 1 insertion(+), 13 deletions(-)
11
+
12
+ diff --git a/HTMLparser.c b/HTMLparser.c
13
+ index eba2d7c..c0b8119 100644
14
+ --- a/HTMLparser.c
15
+ +++ b/HTMLparser.c
16
+ @@ -3960,25 +3960,13 @@ htmlParseStartTag(htmlParserCtxtPtr ctxt) {
17
+ htmlParseErr(ctxt, XML_ERR_NAME_REQUIRED,
18
+ "htmlParseStartTag: invalid element name\n",
19
+ NULL, NULL);
20
+ - /*
21
+ - * The recovery code is disabled for now as it can result in
22
+ - * quadratic behavior with the push parser. htmlParseStartTag
23
+ - * must consume all content up to the final '>' in order to avoid
24
+ - * rescanning for this terminator.
25
+ - *
26
+ - * For a proper fix in line with HTML5, htmlParseStartTag and
27
+ - * htmlParseElement should only be called when there's an ASCII
28
+ - * alpha character following the initial '<'. Otherwise, the '<'
29
+ - * should be emitted as text (unless followed by '!', '/' or '?').
30
+ - */
31
+ -#if 0
32
+ /* if recover preserve text on classic misconstructs */
33
+ if ((ctxt->recovery) && ((IS_BLANK_CH(CUR)) || (CUR == '<') ||
34
+ (CUR == '=') || (CUR == '>') || (((CUR >= '0') && (CUR <= '9'))))) {
35
+ htmlParseCharDataInternal(ctxt, '<');
36
+ return(-1);
37
+ }
38
+ -#endif
39
+ +
40
+
41
+ /* Dump the bogus tag like browsers do */
42
+ while ((CUR != 0) && (CUR != '>') &&
43
+ --
44
+ 2.31.0
45
+