nokogiri 1.8.5 → 1.15.3

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (358) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +40 -18
  3. data/LICENSE-DEPENDENCIES.md +1636 -1024
  4. data/LICENSE.md +5 -28
  5. data/README.md +203 -90
  6. data/bin/nokogiri +63 -50
  7. data/dependencies.yml +33 -61
  8. data/ext/nokogiri/depend +38 -358
  9. data/ext/nokogiri/extconf.rb +867 -417
  10. data/ext/nokogiri/gumbo.c +594 -0
  11. data/ext/nokogiri/html4_document.c +165 -0
  12. data/ext/nokogiri/html4_element_description.c +299 -0
  13. data/ext/nokogiri/html4_entity_lookup.c +37 -0
  14. data/ext/nokogiri/html4_sax_parser_context.c +108 -0
  15. data/ext/nokogiri/html4_sax_push_parser.c +95 -0
  16. data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
  17. data/ext/nokogiri/nokogiri.c +251 -105
  18. data/ext/nokogiri/nokogiri.h +215 -90
  19. data/ext/nokogiri/test_global_handlers.c +40 -0
  20. data/ext/nokogiri/xml_attr.c +42 -37
  21. data/ext/nokogiri/xml_attribute_decl.c +22 -22
  22. data/ext/nokogiri/xml_cdata.c +40 -31
  23. data/ext/nokogiri/xml_comment.c +20 -27
  24. data/ext/nokogiri/xml_document.c +401 -237
  25. data/ext/nokogiri/xml_document_fragment.c +13 -17
  26. data/ext/nokogiri/xml_dtd.c +64 -58
  27. data/ext/nokogiri/xml_element_content.c +63 -55
  28. data/ext/nokogiri/xml_element_decl.c +31 -31
  29. data/ext/nokogiri/xml_encoding_handler.c +54 -21
  30. data/ext/nokogiri/xml_entity_decl.c +37 -35
  31. data/ext/nokogiri/xml_entity_reference.c +17 -19
  32. data/ext/nokogiri/xml_namespace.c +136 -62
  33. data/ext/nokogiri/xml_node.c +1387 -678
  34. data/ext/nokogiri/xml_node_set.c +246 -216
  35. data/ext/nokogiri/xml_processing_instruction.c +18 -20
  36. data/ext/nokogiri/xml_reader.c +347 -212
  37. data/ext/nokogiri/xml_relax_ng.c +86 -77
  38. data/ext/nokogiri/xml_sax_parser.c +149 -124
  39. data/ext/nokogiri/xml_sax_parser_context.c +145 -103
  40. data/ext/nokogiri/xml_sax_push_parser.c +64 -36
  41. data/ext/nokogiri/xml_schema.c +138 -81
  42. data/ext/nokogiri/xml_syntax_error.c +42 -21
  43. data/ext/nokogiri/xml_text.c +36 -26
  44. data/ext/nokogiri/xml_xpath_context.c +366 -178
  45. data/ext/nokogiri/xslt_stylesheet.c +335 -189
  46. data/gumbo-parser/CHANGES.md +63 -0
  47. data/gumbo-parser/Makefile +111 -0
  48. data/gumbo-parser/THANKS +27 -0
  49. data/gumbo-parser/src/Makefile +34 -0
  50. data/gumbo-parser/src/README.md +41 -0
  51. data/gumbo-parser/src/ascii.c +75 -0
  52. data/gumbo-parser/src/ascii.h +115 -0
  53. data/gumbo-parser/src/attribute.c +42 -0
  54. data/gumbo-parser/src/attribute.h +17 -0
  55. data/gumbo-parser/src/char_ref.c +22225 -0
  56. data/gumbo-parser/src/char_ref.h +29 -0
  57. data/gumbo-parser/src/char_ref.rl +2154 -0
  58. data/gumbo-parser/src/error.c +630 -0
  59. data/gumbo-parser/src/error.h +148 -0
  60. data/gumbo-parser/src/foreign_attrs.c +103 -0
  61. data/gumbo-parser/src/foreign_attrs.gperf +27 -0
  62. data/gumbo-parser/src/insertion_mode.h +33 -0
  63. data/gumbo-parser/src/macros.h +91 -0
  64. data/gumbo-parser/src/nokogiri_gumbo.h +944 -0
  65. data/gumbo-parser/src/parser.c +4891 -0
  66. data/gumbo-parser/src/parser.h +41 -0
  67. data/gumbo-parser/src/replacement.h +33 -0
  68. data/gumbo-parser/src/string_buffer.c +103 -0
  69. data/gumbo-parser/src/string_buffer.h +68 -0
  70. data/gumbo-parser/src/string_piece.c +48 -0
  71. data/gumbo-parser/src/svg_attrs.c +174 -0
  72. data/gumbo-parser/src/svg_attrs.gperf +77 -0
  73. data/gumbo-parser/src/svg_tags.c +137 -0
  74. data/gumbo-parser/src/svg_tags.gperf +55 -0
  75. data/gumbo-parser/src/tag.c +223 -0
  76. data/gumbo-parser/src/tag_lookup.c +382 -0
  77. data/gumbo-parser/src/tag_lookup.gperf +170 -0
  78. data/gumbo-parser/src/tag_lookup.h +13 -0
  79. data/gumbo-parser/src/token_buffer.c +79 -0
  80. data/gumbo-parser/src/token_buffer.h +71 -0
  81. data/gumbo-parser/src/token_type.h +17 -0
  82. data/gumbo-parser/src/tokenizer.c +3463 -0
  83. data/gumbo-parser/src/tokenizer.h +112 -0
  84. data/gumbo-parser/src/tokenizer_states.h +339 -0
  85. data/gumbo-parser/src/utf8.c +245 -0
  86. data/gumbo-parser/src/utf8.h +164 -0
  87. data/gumbo-parser/src/util.c +66 -0
  88. data/gumbo-parser/src/util.h +34 -0
  89. data/gumbo-parser/src/vector.c +111 -0
  90. data/gumbo-parser/src/vector.h +45 -0
  91. data/lib/nokogiri/class_resolver.rb +67 -0
  92. data/lib/nokogiri/css/node.rb +10 -8
  93. data/lib/nokogiri/css/parser.rb +397 -377
  94. data/lib/nokogiri/css/parser.y +250 -245
  95. data/lib/nokogiri/css/parser_extras.rb +54 -49
  96. data/lib/nokogiri/css/syntax_error.rb +3 -1
  97. data/lib/nokogiri/css/tokenizer.rb +107 -104
  98. data/lib/nokogiri/css/tokenizer.rex +3 -2
  99. data/lib/nokogiri/css/xpath_visitor.rb +224 -95
  100. data/lib/nokogiri/css.rb +56 -17
  101. data/lib/nokogiri/decorators/slop.rb +9 -7
  102. data/lib/nokogiri/encoding_handler.rb +57 -0
  103. data/lib/nokogiri/extension.rb +32 -0
  104. data/lib/nokogiri/gumbo.rb +15 -0
  105. data/lib/nokogiri/html.rb +38 -27
  106. data/lib/nokogiri/{html → html4}/builder.rb +4 -2
  107. data/lib/nokogiri/html4/document.rb +214 -0
  108. data/lib/nokogiri/html4/document_fragment.rb +54 -0
  109. data/lib/nokogiri/{html → html4}/element_description.rb +3 -1
  110. data/lib/nokogiri/html4/element_description_defaults.rb +2040 -0
  111. data/lib/nokogiri/html4/encoding_reader.rb +121 -0
  112. data/lib/nokogiri/{html → html4}/entity_lookup.rb +4 -2
  113. data/lib/nokogiri/{html → html4}/sax/parser.rb +17 -16
  114. data/lib/nokogiri/html4/sax/parser_context.rb +20 -0
  115. data/lib/nokogiri/{html → html4}/sax/push_parser.rb +12 -11
  116. data/lib/nokogiri/html4.rb +47 -0
  117. data/lib/nokogiri/html5/document.rb +168 -0
  118. data/lib/nokogiri/html5/document_fragment.rb +90 -0
  119. data/lib/nokogiri/html5/node.rb +103 -0
  120. data/lib/nokogiri/html5.rb +392 -0
  121. data/lib/nokogiri/jruby/dependencies.rb +3 -0
  122. data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
  123. data/lib/nokogiri/syntax_error.rb +2 -0
  124. data/lib/nokogiri/version/constant.rb +6 -0
  125. data/lib/nokogiri/version/info.rb +223 -0
  126. data/lib/nokogiri/version.rb +3 -108
  127. data/lib/nokogiri/xml/attr.rb +55 -3
  128. data/lib/nokogiri/xml/attribute_decl.rb +6 -2
  129. data/lib/nokogiri/xml/builder.rb +98 -54
  130. data/lib/nokogiri/xml/cdata.rb +3 -1
  131. data/lib/nokogiri/xml/character_data.rb +2 -0
  132. data/lib/nokogiri/xml/document.rb +312 -126
  133. data/lib/nokogiri/xml/document_fragment.rb +104 -48
  134. data/lib/nokogiri/xml/dtd.rb +4 -2
  135. data/lib/nokogiri/xml/element_content.rb +12 -2
  136. data/lib/nokogiri/xml/element_decl.rb +6 -2
  137. data/lib/nokogiri/xml/entity_decl.rb +7 -3
  138. data/lib/nokogiri/xml/entity_reference.rb +2 -0
  139. data/lib/nokogiri/xml/namespace.rb +45 -0
  140. data/lib/nokogiri/xml/node/save_options.rb +23 -8
  141. data/lib/nokogiri/xml/node.rb +1093 -411
  142. data/lib/nokogiri/xml/node_set.rb +173 -67
  143. data/lib/nokogiri/xml/notation.rb +13 -0
  144. data/lib/nokogiri/xml/parse_options.rb +145 -52
  145. data/lib/nokogiri/xml/pp/character_data.rb +9 -6
  146. data/lib/nokogiri/xml/pp/node.rb +42 -30
  147. data/lib/nokogiri/xml/pp.rb +4 -2
  148. data/lib/nokogiri/xml/processing_instruction.rb +4 -1
  149. data/lib/nokogiri/xml/reader.rb +21 -28
  150. data/lib/nokogiri/xml/relax_ng.rb +8 -2
  151. data/lib/nokogiri/xml/sax/document.rb +45 -49
  152. data/lib/nokogiri/xml/sax/parser.rb +39 -36
  153. data/lib/nokogiri/xml/sax/parser_context.rb +8 -3
  154. data/lib/nokogiri/xml/sax/push_parser.rb +6 -5
  155. data/lib/nokogiri/xml/sax.rb +6 -4
  156. data/lib/nokogiri/xml/schema.rb +19 -9
  157. data/lib/nokogiri/xml/searchable.rb +120 -72
  158. data/lib/nokogiri/xml/syntax_error.rb +6 -4
  159. data/lib/nokogiri/xml/text.rb +2 -0
  160. data/lib/nokogiri/xml/xpath/syntax_error.rb +4 -2
  161. data/lib/nokogiri/xml/xpath.rb +15 -4
  162. data/lib/nokogiri/xml/xpath_context.rb +3 -3
  163. data/lib/nokogiri/xml.rb +38 -37
  164. data/lib/nokogiri/xslt/stylesheet.rb +3 -1
  165. data/lib/nokogiri/xslt.rb +101 -22
  166. data/lib/nokogiri.rb +59 -75
  167. data/lib/xsd/xmlparser/nokogiri.rb +29 -25
  168. data/patches/libxml2/0001-Remove-script-macro-support.patch +40 -0
  169. data/patches/libxml2/0002-Update-entities-to-remove-handling-of-ssi.patch +44 -0
  170. data/patches/libxml2/0003-libxml2.la-is-in-top_builddir.patch +25 -0
  171. data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
  172. data/patches/libxml2/0010-update-config.guess-and-config.sub-for-libxml2.patch +224 -0
  173. data/patches/libxml2/0011-rip-out-libxml2-s-libc_single_threaded-support.patch +30 -0
  174. data/patches/libxslt/0001-update-config.guess-and-config.sub-for-libxslt.patch +224 -0
  175. data/ports/archives/libxml2-2.11.4.tar.xz +0 -0
  176. data/ports/archives/libxslt-1.1.38.tar.xz +0 -0
  177. metadata +126 -399
  178. data/.autotest +0 -22
  179. data/.cross_rubies +0 -8
  180. data/.editorconfig +0 -17
  181. data/.gemtest +0 -0
  182. data/.travis.yml +0 -63
  183. data/CHANGELOG.md +0 -1368
  184. data/CONTRIBUTING.md +0 -42
  185. data/C_CODING_STYLE.rdoc +0 -33
  186. data/Gemfile-libxml-ruby +0 -3
  187. data/Manifest.txt +0 -370
  188. data/ROADMAP.md +0 -111
  189. data/Rakefile +0 -348
  190. data/SECURITY.md +0 -19
  191. data/STANDARD_RESPONSES.md +0 -47
  192. data/Y_U_NO_GEMSPEC.md +0 -155
  193. data/appveyor.yml +0 -29
  194. data/build_all +0 -44
  195. data/ext/nokogiri/html_document.c +0 -170
  196. data/ext/nokogiri/html_document.h +0 -10
  197. data/ext/nokogiri/html_element_description.c +0 -279
  198. data/ext/nokogiri/html_element_description.h +0 -10
  199. data/ext/nokogiri/html_entity_lookup.c +0 -32
  200. data/ext/nokogiri/html_entity_lookup.h +0 -8
  201. data/ext/nokogiri/html_sax_parser_context.c +0 -116
  202. data/ext/nokogiri/html_sax_parser_context.h +0 -11
  203. data/ext/nokogiri/html_sax_push_parser.c +0 -87
  204. data/ext/nokogiri/html_sax_push_parser.h +0 -9
  205. data/ext/nokogiri/xml_attr.h +0 -9
  206. data/ext/nokogiri/xml_attribute_decl.h +0 -9
  207. data/ext/nokogiri/xml_cdata.h +0 -9
  208. data/ext/nokogiri/xml_comment.h +0 -9
  209. data/ext/nokogiri/xml_document.h +0 -23
  210. data/ext/nokogiri/xml_document_fragment.h +0 -10
  211. data/ext/nokogiri/xml_dtd.h +0 -10
  212. data/ext/nokogiri/xml_element_content.h +0 -10
  213. data/ext/nokogiri/xml_element_decl.h +0 -9
  214. data/ext/nokogiri/xml_encoding_handler.h +0 -8
  215. data/ext/nokogiri/xml_entity_decl.h +0 -10
  216. data/ext/nokogiri/xml_entity_reference.h +0 -9
  217. data/ext/nokogiri/xml_io.c +0 -61
  218. data/ext/nokogiri/xml_io.h +0 -11
  219. data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
  220. data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
  221. data/ext/nokogiri/xml_namespace.h +0 -15
  222. data/ext/nokogiri/xml_node.h +0 -13
  223. data/ext/nokogiri/xml_node_set.h +0 -12
  224. data/ext/nokogiri/xml_processing_instruction.h +0 -9
  225. data/ext/nokogiri/xml_reader.h +0 -10
  226. data/ext/nokogiri/xml_relax_ng.h +0 -9
  227. data/ext/nokogiri/xml_sax_parser.h +0 -39
  228. data/ext/nokogiri/xml_sax_parser_context.h +0 -10
  229. data/ext/nokogiri/xml_sax_push_parser.h +0 -9
  230. data/ext/nokogiri/xml_schema.h +0 -9
  231. data/ext/nokogiri/xml_syntax_error.h +0 -13
  232. data/ext/nokogiri/xml_text.h +0 -9
  233. data/ext/nokogiri/xml_xpath_context.h +0 -10
  234. data/ext/nokogiri/xslt_stylesheet.h +0 -14
  235. data/lib/nokogiri/html/document.rb +0 -335
  236. data/lib/nokogiri/html/document_fragment.rb +0 -49
  237. data/lib/nokogiri/html/element_description_defaults.rb +0 -671
  238. data/lib/nokogiri/html/sax/parser_context.rb +0 -16
  239. data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +0 -78
  240. data/patches/libxml2/0002-Fix-nullptr-deref-with-XPath-logic-ops.patch +0 -54
  241. data/patches/libxml2/0003-Fix-infinite-loop-in-LZMA-decompression.patch +0 -50
  242. data/patches/sort-patches-by-date +0 -25
  243. data/ports/archives/libxml2-2.9.8.tar.gz +0 -0
  244. data/ports/archives/libxslt-1.1.32.tar.gz +0 -0
  245. data/suppressions/README.txt +0 -1
  246. data/suppressions/nokogiri_ruby-2.supp +0 -10
  247. data/tasks/test.rb +0 -100
  248. data/test/css/test_nthiness.rb +0 -226
  249. data/test/css/test_parser.rb +0 -386
  250. data/test/css/test_tokenizer.rb +0 -215
  251. data/test/css/test_xpath_visitor.rb +0 -96
  252. data/test/decorators/test_slop.rb +0 -23
  253. data/test/files/2ch.html +0 -108
  254. data/test/files/GH_1042.html +0 -18
  255. data/test/files/address_book.rlx +0 -12
  256. data/test/files/address_book.xml +0 -10
  257. data/test/files/atom.xml +0 -344
  258. data/test/files/bar/bar.xsd +0 -4
  259. data/test/files/bogus.xml +0 -0
  260. data/test/files/dont_hurt_em_why.xml +0 -422
  261. data/test/files/encoding.html +0 -82
  262. data/test/files/encoding.xhtml +0 -84
  263. data/test/files/exslt.xml +0 -8
  264. data/test/files/exslt.xslt +0 -35
  265. data/test/files/foo/foo.xsd +0 -4
  266. data/test/files/metacharset.html +0 -10
  267. data/test/files/namespace_pressure_test.xml +0 -1684
  268. data/test/files/noencoding.html +0 -47
  269. data/test/files/po.xml +0 -32
  270. data/test/files/po.xsd +0 -66
  271. data/test/files/saml/saml20assertion_schema.xsd +0 -283
  272. data/test/files/saml/saml20protocol_schema.xsd +0 -302
  273. data/test/files/saml/xenc_schema.xsd +0 -146
  274. data/test/files/saml/xmldsig_schema.xsd +0 -318
  275. data/test/files/shift_jis.html +0 -10
  276. data/test/files/shift_jis.xml +0 -5
  277. data/test/files/shift_jis_no_charset.html +0 -9
  278. data/test/files/slow-xpath.xml +0 -25509
  279. data/test/files/snuggles.xml +0 -3
  280. data/test/files/staff.dtd +0 -10
  281. data/test/files/staff.xml +0 -59
  282. data/test/files/staff.xslt +0 -32
  283. data/test/files/test_document_url/bar.xml +0 -2
  284. data/test/files/test_document_url/document.dtd +0 -4
  285. data/test/files/test_document_url/document.xml +0 -6
  286. data/test/files/tlm.html +0 -851
  287. data/test/files/to_be_xincluded.xml +0 -2
  288. data/test/files/valid_bar.xml +0 -2
  289. data/test/files/xinclude.xml +0 -4
  290. data/test/helper.rb +0 -271
  291. data/test/html/sax/test_parser.rb +0 -168
  292. data/test/html/sax/test_parser_context.rb +0 -46
  293. data/test/html/sax/test_parser_text.rb +0 -163
  294. data/test/html/sax/test_push_parser.rb +0 -87
  295. data/test/html/test_attributes.rb +0 -85
  296. data/test/html/test_builder.rb +0 -164
  297. data/test/html/test_document.rb +0 -712
  298. data/test/html/test_document_encoding.rb +0 -143
  299. data/test/html/test_document_fragment.rb +0 -310
  300. data/test/html/test_element_description.rb +0 -105
  301. data/test/html/test_named_characters.rb +0 -14
  302. data/test/html/test_node.rb +0 -212
  303. data/test/html/test_node_encoding.rb +0 -91
  304. data/test/namespaces/test_additional_namespaces_in_builder_doc.rb +0 -14
  305. data/test/namespaces/test_namespaces_aliased_default.rb +0 -24
  306. data/test/namespaces/test_namespaces_in_builder_doc.rb +0 -75
  307. data/test/namespaces/test_namespaces_in_cloned_doc.rb +0 -31
  308. data/test/namespaces/test_namespaces_in_created_doc.rb +0 -75
  309. data/test/namespaces/test_namespaces_in_parsed_doc.rb +0 -80
  310. data/test/namespaces/test_namespaces_preservation.rb +0 -31
  311. data/test/test_convert_xpath.rb +0 -135
  312. data/test/test_css_cache.rb +0 -47
  313. data/test/test_encoding_handler.rb +0 -48
  314. data/test/test_memory_leak.rb +0 -156
  315. data/test/test_nokogiri.rb +0 -138
  316. data/test/test_soap4r_sax.rb +0 -52
  317. data/test/test_xslt_transforms.rb +0 -314
  318. data/test/xml/node/test_save_options.rb +0 -28
  319. data/test/xml/node/test_subclass.rb +0 -44
  320. data/test/xml/sax/test_parser.rb +0 -402
  321. data/test/xml/sax/test_parser_context.rb +0 -115
  322. data/test/xml/sax/test_parser_text.rb +0 -202
  323. data/test/xml/sax/test_push_parser.rb +0 -265
  324. data/test/xml/test_attr.rb +0 -74
  325. data/test/xml/test_attribute_decl.rb +0 -86
  326. data/test/xml/test_builder.rb +0 -341
  327. data/test/xml/test_c14n.rb +0 -180
  328. data/test/xml/test_cdata.rb +0 -54
  329. data/test/xml/test_comment.rb +0 -40
  330. data/test/xml/test_document.rb +0 -982
  331. data/test/xml/test_document_encoding.rb +0 -31
  332. data/test/xml/test_document_fragment.rb +0 -298
  333. data/test/xml/test_dtd.rb +0 -187
  334. data/test/xml/test_dtd_encoding.rb +0 -31
  335. data/test/xml/test_element_content.rb +0 -56
  336. data/test/xml/test_element_decl.rb +0 -73
  337. data/test/xml/test_entity_decl.rb +0 -122
  338. data/test/xml/test_entity_reference.rb +0 -262
  339. data/test/xml/test_namespace.rb +0 -96
  340. data/test/xml/test_node.rb +0 -1325
  341. data/test/xml/test_node_attributes.rb +0 -115
  342. data/test/xml/test_node_encoding.rb +0 -75
  343. data/test/xml/test_node_inheritance.rb +0 -32
  344. data/test/xml/test_node_reparenting.rb +0 -592
  345. data/test/xml/test_node_set.rb +0 -809
  346. data/test/xml/test_parse_options.rb +0 -64
  347. data/test/xml/test_processing_instruction.rb +0 -30
  348. data/test/xml/test_reader.rb +0 -620
  349. data/test/xml/test_reader_encoding.rb +0 -134
  350. data/test/xml/test_relax_ng.rb +0 -60
  351. data/test/xml/test_schema.rb +0 -142
  352. data/test/xml/test_syntax_error.rb +0 -36
  353. data/test/xml/test_text.rb +0 -60
  354. data/test/xml/test_unparented_node.rb +0 -483
  355. data/test/xml/test_xinclude.rb +0 -83
  356. data/test/xml/test_xpath.rb +0 -470
  357. data/test/xslt/test_custom_functions.rb +0 -133
  358. data/test/xslt/test_exception_handling.rb +0 -37
@@ -0,0 +1,148 @@
1
+ #ifndef GUMBO_ERROR_H_
2
+ #define GUMBO_ERROR_H_
3
+
4
+ #include <stdint.h>
5
+
6
+ #include "nokogiri_gumbo.h"
7
+ #include "insertion_mode.h"
8
+ #include "string_buffer.h"
9
+ #include "token_type.h"
10
+ #include "tokenizer_states.h"
11
+
12
+ #ifdef __cplusplus
13
+ extern "C" {
14
+ #endif
15
+
16
+ struct GumboInternalParser;
17
+
18
+ typedef enum {
19
+ // Defined errors.
20
+ // https://html.spec.whatwg.org/multipage/parsing.html#parse-errors
21
+ GUMBO_ERR_ABRUPT_CLOSING_OF_EMPTY_COMMENT,
22
+ GUMBO_ERR_ABRUPT_DOCTYPE_PUBLIC_IDENTIFIER,
23
+ GUMBO_ERR_ABRUPT_DOCTYPE_SYSTEM_IDENTIFIER,
24
+ GUMBO_ERR_ABSENCE_OF_DIGITS_IN_NUMERIC_CHARACTER_REFERENCE,
25
+ GUMBO_ERR_CDATA_IN_HTML_CONTENT,
26
+ GUMBO_ERR_CHARACTER_REFERENCE_OUTSIDE_UNICODE_RANGE,
27
+ GUMBO_ERR_CONTROL_CHARACTER_IN_INPUT_STREAM,
28
+ GUMBO_ERR_CONTROL_CHARACTER_REFERENCE,
29
+ GUMBO_ERR_END_TAG_WITH_ATTRIBUTES,
30
+ GUMBO_ERR_DUPLICATE_ATTRIBUTE,
31
+ GUMBO_ERR_END_TAG_WITH_TRAILING_SOLIDUS,
32
+ GUMBO_ERR_EOF_BEFORE_TAG_NAME,
33
+ GUMBO_ERR_EOF_IN_CDATA,
34
+ GUMBO_ERR_EOF_IN_COMMENT,
35
+ GUMBO_ERR_EOF_IN_DOCTYPE,
36
+ GUMBO_ERR_EOF_IN_SCRIPT_HTML_COMMENT_LIKE_TEXT,
37
+ GUMBO_ERR_EOF_IN_TAG,
38
+ GUMBO_ERR_INCORRECTLY_CLOSED_COMMENT,
39
+ GUMBO_ERR_INCORRECTLY_OPENED_COMMENT,
40
+ GUMBO_ERR_INVALID_CHARACTER_SEQUENCE_AFTER_DOCTYPE_NAME,
41
+ GUMBO_ERR_INVALID_FIRST_CHARACTER_OF_TAG_NAME,
42
+ GUMBO_ERR_MISSING_ATTRIBUTE_VALUE,
43
+ GUMBO_ERR_MISSING_DOCTYPE_NAME,
44
+ GUMBO_ERR_MISSING_DOCTYPE_PUBLIC_IDENTIFIER,
45
+ GUMBO_ERR_MISSING_DOCTYPE_SYSTEM_IDENTIFIER,
46
+ GUMBO_ERR_MISSING_END_TAG_NAME,
47
+ GUMBO_ERR_MISSING_QUOTE_BEFORE_DOCTYPE_PUBLIC_IDENTIFIER,
48
+ GUMBO_ERR_MISSING_QUOTE_BEFORE_DOCTYPE_SYSTEM_IDENTIFIER,
49
+ GUMBO_ERR_MISSING_SEMICOLON_AFTER_CHARACTER_REFERENCE,
50
+ GUMBO_ERR_MISSING_WHITESPACE_AFTER_DOCTYPE_PUBLIC_KEYWORD,
51
+ GUMBO_ERR_MISSING_WHITESPACE_AFTER_DOCTYPE_SYSTEM_KEYWORD,
52
+ GUMBO_ERR_MISSING_WHITESPACE_BEFORE_DOCTYPE_NAME,
53
+ GUMBO_ERR_MISSING_WHITESPACE_BETWEEN_ATTRIBUTES,
54
+ GUMBO_ERR_MISSING_WHITESPACE_BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS,
55
+ GUMBO_ERR_NESTED_COMMENT,
56
+ GUMBO_ERR_NONCHARACTER_CHARACTER_REFERENCE,
57
+ GUMBO_ERR_NONCHARACTER_IN_INPUT_STREAM,
58
+ GUMBO_ERR_NON_VOID_HTML_ELEMENT_START_TAG_WITH_TRAILING_SOLIDUS,
59
+ GUMBO_ERR_NULL_CHARACTER_REFERENCE,
60
+ GUMBO_ERR_SURROGATE_CHARACTER_REFERENCE,
61
+ GUMBO_ERR_SURROGATE_IN_INPUT_STREAM,
62
+ GUMBO_ERR_UNEXPECTED_CHARACTER_AFTER_DOCTYPE_SYSTEM_IDENTIFIER,
63
+ GUMBO_ERR_UNEXPECTED_CHARACTER_IN_ATTRIBUTE_NAME,
64
+ GUMBO_ERR_UNEXPECTED_CHARACTER_IN_UNQUOTED_ATTRIBUTE_VALUE,
65
+ GUMBO_ERR_UNEXPECTED_EQUALS_SIGN_BEFORE_ATTRIBUTE_NAME,
66
+ GUMBO_ERR_UNEXPECTED_NULL_CHARACTER,
67
+ GUMBO_ERR_UNEXPECTED_QUESTION_MARK_INSTEAD_OF_TAG_NAME,
68
+ GUMBO_ERR_UNEXPECTED_SOLIDUS_IN_TAG,
69
+ GUMBO_ERR_UNKNOWN_NAMED_CHARACTER_REFERENCE,
70
+
71
+ // Encoding errors.
72
+ GUMBO_ERR_UTF8_INVALID,
73
+ GUMBO_ERR_UTF8_TRUNCATED,
74
+
75
+ // Generic parser error.
76
+ GUMBO_ERR_PARSER,
77
+ } GumboErrorType;
78
+
79
+ // Additional data for tokenizer errors.
80
+ // This records the current state and codepoint encountered - this is usually
81
+ // enough to reconstruct what went wrong and provide a friendly error message.
82
+ typedef struct GumboInternalTokenizerError {
83
+ // The bad codepoint encountered.
84
+ int codepoint;
85
+
86
+ // The state that the tokenizer was in at the time.
87
+ GumboTokenizerEnum state;
88
+ } GumboTokenizerError;
89
+
90
+ // Additional data for parse errors.
91
+ typedef struct GumboInternalParserError {
92
+ // The type of input token that resulted in this error.
93
+ GumboTokenType input_type;
94
+
95
+ // The HTML tag of the input token. TAG_UNKNOWN if this was not a tag token.
96
+ GumboTag input_tag;
97
+
98
+ // The insertion mode that the parser was in at the time.
99
+ GumboInsertionMode parser_state;
100
+
101
+ // The tag stack at the point of the error. Note that this is an GumboVector
102
+ // of GumboTag's *stored by value* - cast the void* to an GumboTag directly to
103
+ // get at the tag.
104
+ GumboVector /* GumboTag */ tag_stack;
105
+ } GumboParserError;
106
+
107
+ // The overall error struct representing an error in decoding/tokenizing/parsing
108
+ // the HTML. This contains an enumerated type flag, a source position, and then
109
+ // a union of fields containing data specific to the error.
110
+ struct GumboInternalError {
111
+ // The type of error.
112
+ GumboErrorType type;
113
+
114
+ // The position within the source file where the error occurred.
115
+ GumboSourcePosition position;
116
+
117
+ // The piece of text that caused the error.
118
+ GumboStringPiece original_text;
119
+
120
+ // Type-specific error information.
121
+ union {
122
+ // Tokenizer errors.
123
+ GumboTokenizerError tokenizer;
124
+
125
+ // Parser errors.
126
+ GumboParserError parser;
127
+ } v;
128
+ };
129
+
130
+ // Adds a new error to the parser's error list, and returns a pointer to it so
131
+ // that clients can fill out the rest of its fields. May return NULL if we're
132
+ // already over the max_errors field specified in GumboOptions.
133
+ GumboError* gumbo_add_error(struct GumboInternalParser* parser);
134
+
135
+ // Initializes the errors vector in the parser.
136
+ void gumbo_init_errors(struct GumboInternalParser* errors);
137
+
138
+ // Frees all the errors in the 'errors_' field of the parser.
139
+ void gumbo_destroy_errors(struct GumboInternalParser* errors);
140
+
141
+ // Frees the memory used for a single GumboError.
142
+ void gumbo_error_destroy(GumboError* error);
143
+
144
+ #ifdef __cplusplus
145
+ }
146
+ #endif
147
+
148
+ #endif // GUMBO_ERROR_H_
@@ -0,0 +1,103 @@
1
+ /* ANSI-C code produced by gperf version 3.1 */
2
+ /* Command-line: gperf -m100 -n src/foreign_attrs.gperf */
3
+ /* Computed positions: -k'8-9' */
4
+ /* Filtered by: gperf-filter.sed */
5
+
6
+ #include "replacement.h"
7
+ #include "macros.h"
8
+ #include <string.h>
9
+
10
+ #define TOTAL_KEYWORDS 11
11
+ #define MIN_WORD_LENGTH 5
12
+ #define MAX_WORD_LENGTH 13
13
+ #define MIN_HASH_VALUE 0
14
+ #define MAX_HASH_VALUE 10
15
+ /* maximum key range = 11, duplicates = 0 */
16
+
17
+ static inline unsigned int
18
+ hash (register const char *str, register size_t len)
19
+ {
20
+ static const unsigned char asso_values[] =
21
+ {
22
+ 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
23
+ 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
24
+ 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
25
+ 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
26
+ 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
27
+ 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
28
+ 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
29
+ 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
30
+ 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
31
+ 11, 11, 11, 11, 11, 11, 11, 11, 11, 2,
32
+ 11, 1, 11, 10, 4, 4, 11, 11, 3, 11,
33
+ 11, 5, 3, 11, 0, 11, 2, 11, 11, 11,
34
+ 11, 2, 11, 11, 11, 11, 11, 11, 11, 11,
35
+ 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
36
+ 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
37
+ 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
38
+ 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
39
+ 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
40
+ 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
41
+ 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
42
+ 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
43
+ 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
44
+ 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
45
+ 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
46
+ 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
47
+ 11, 11, 11, 11, 11, 11
48
+ };
49
+ register unsigned int hval = 0;
50
+
51
+ switch (len)
52
+ {
53
+ default:
54
+ hval += asso_values[(unsigned char)str[8]];
55
+ /*FALLTHROUGH*/
56
+ case 8:
57
+ hval += asso_values[(unsigned char)str[7]];
58
+ /*FALLTHROUGH*/
59
+ case 7:
60
+ case 6:
61
+ case 5:
62
+ break;
63
+ }
64
+ return hval;
65
+ }
66
+
67
+ const ForeignAttrReplacement *
68
+ gumbo_get_foreign_attr_replacement (register const char *str, register size_t len)
69
+ {
70
+ static const unsigned char lengthtable[] =
71
+ {
72
+ 5, 10, 13, 9, 13, 10, 11, 11, 10, 10, 8
73
+ };
74
+ static const ForeignAttrReplacement wordlist[] =
75
+ {
76
+ {"xmlns", "xmlns", GUMBO_ATTR_NAMESPACE_XMLNS},
77
+ {"xlink:href", "href", GUMBO_ATTR_NAMESPACE_XLINK},
78
+ {"xlink:arcrole", "arcrole", GUMBO_ATTR_NAMESPACE_XLINK},
79
+ {"xml:space", "space", GUMBO_ATTR_NAMESPACE_XML},
80
+ {"xlink:actuate", "actuate", GUMBO_ATTR_NAMESPACE_XLINK},
81
+ {"xlink:type", "type", GUMBO_ATTR_NAMESPACE_XLINK},
82
+ {"xlink:title", "title", GUMBO_ATTR_NAMESPACE_XLINK},
83
+ {"xmlns:xlink", "xlink", GUMBO_ATTR_NAMESPACE_XMLNS},
84
+ {"xlink:role", "role", GUMBO_ATTR_NAMESPACE_XLINK},
85
+ {"xlink:show", "show", GUMBO_ATTR_NAMESPACE_XLINK},
86
+ {"xml:lang", "lang", GUMBO_ATTR_NAMESPACE_XML}
87
+ };
88
+
89
+ if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
90
+ {
91
+ register unsigned int key = hash (str, len);
92
+
93
+ if (key <= MAX_HASH_VALUE)
94
+ if (len == lengthtable[key])
95
+ {
96
+ register const char *s = wordlist[key].from;
97
+
98
+ if (s && *str == *s && !memcmp (str + 1, s + 1, len - 1))
99
+ return &wordlist[key];
100
+ }
101
+ }
102
+ return 0;
103
+ }
@@ -0,0 +1,27 @@
1
+ %{
2
+ #include "replacement.h"
3
+ #include "macros.h"
4
+ %}
5
+
6
+ %struct-type
7
+ %omit-struct-type
8
+ %compare-lengths
9
+ %readonly-tables
10
+ %null-strings
11
+ %includes
12
+ %define lookup-function-name gumbo_get_foreign_attr_replacement
13
+ %define slot-name from
14
+ ForeignAttrReplacement;
15
+
16
+ %%
17
+ "xlink:actuate", "actuate", GUMBO_ATTR_NAMESPACE_XLINK
18
+ "xlink:arcrole", "arcrole", GUMBO_ATTR_NAMESPACE_XLINK
19
+ "xlink:href", "href", GUMBO_ATTR_NAMESPACE_XLINK
20
+ "xlink:role", "role", GUMBO_ATTR_NAMESPACE_XLINK
21
+ "xlink:show", "show", GUMBO_ATTR_NAMESPACE_XLINK
22
+ "xlink:title", "title", GUMBO_ATTR_NAMESPACE_XLINK
23
+ "xlink:type", "type", GUMBO_ATTR_NAMESPACE_XLINK
24
+ "xml:lang", "lang", GUMBO_ATTR_NAMESPACE_XML
25
+ "xml:space", "space", GUMBO_ATTR_NAMESPACE_XML
26
+ "xmlns", "xmlns", GUMBO_ATTR_NAMESPACE_XMLNS
27
+ "xmlns:xlink", "xlink", GUMBO_ATTR_NAMESPACE_XMLNS
@@ -0,0 +1,33 @@
1
+ #ifndef GUMBO_INSERTION_MODE_H_
2
+ #define GUMBO_INSERTION_MODE_H_
3
+
4
+ // https://html.spec.whatwg.org/multipage/parsing.html#insertion-mode
5
+ // If new enum values are added, be sure to update the kTokenHandlers
6
+ // dispatch table in parser.c.
7
+ typedef enum {
8
+ GUMBO_INSERTION_MODE_INITIAL,
9
+ GUMBO_INSERTION_MODE_BEFORE_HTML,
10
+ GUMBO_INSERTION_MODE_BEFORE_HEAD,
11
+ GUMBO_INSERTION_MODE_IN_HEAD,
12
+ GUMBO_INSERTION_MODE_IN_HEAD_NOSCRIPT,
13
+ GUMBO_INSERTION_MODE_AFTER_HEAD,
14
+ GUMBO_INSERTION_MODE_IN_BODY,
15
+ GUMBO_INSERTION_MODE_TEXT,
16
+ GUMBO_INSERTION_MODE_IN_TABLE,
17
+ GUMBO_INSERTION_MODE_IN_TABLE_TEXT,
18
+ GUMBO_INSERTION_MODE_IN_CAPTION,
19
+ GUMBO_INSERTION_MODE_IN_COLUMN_GROUP,
20
+ GUMBO_INSERTION_MODE_IN_TABLE_BODY,
21
+ GUMBO_INSERTION_MODE_IN_ROW,
22
+ GUMBO_INSERTION_MODE_IN_CELL,
23
+ GUMBO_INSERTION_MODE_IN_SELECT,
24
+ GUMBO_INSERTION_MODE_IN_SELECT_IN_TABLE,
25
+ GUMBO_INSERTION_MODE_IN_TEMPLATE,
26
+ GUMBO_INSERTION_MODE_AFTER_BODY,
27
+ GUMBO_INSERTION_MODE_IN_FRAMESET,
28
+ GUMBO_INSERTION_MODE_AFTER_FRAMESET,
29
+ GUMBO_INSERTION_MODE_AFTER_AFTER_BODY,
30
+ GUMBO_INSERTION_MODE_AFTER_AFTER_FRAMESET
31
+ } GumboInsertionMode;
32
+
33
+ #endif // GUMBO_INSERTION_MODE_H_
@@ -0,0 +1,91 @@
1
+ #ifndef MACROS_H
2
+ #define MACROS_H
3
+
4
+ #if (!defined(__STDC_VERSION__) || !(__STDC_VERSION__ >= 199901L)) \
5
+ && !defined(_WIN32) && !defined(__cplusplus)
6
+ # error C99 compiler required
7
+ #endif
8
+
9
+ #if defined(_WIN32)
10
+ # define inline __inline
11
+ # define __func__ __FUNCTION__
12
+ #endif
13
+
14
+ // Calculate the number of elements in an array.
15
+ // The extra division on the third line is a trick to help prevent
16
+ // passing a pointer to the first element of an array instead of a
17
+ // reference to the array itself.
18
+ #define ARRAY_COUNT(x) ( \
19
+ (sizeof(x) / sizeof((x)[0])) \
20
+ / ((size_t)(!(sizeof(x) % sizeof((x)[0])))) \
21
+ )
22
+
23
+ #ifdef NDEBUG
24
+ #define UNUSED_IF_NDEBUG(x) (void)(x)
25
+ #else
26
+ #define UNUSED_IF_NDEBUG(x)
27
+ #endif
28
+
29
+ #ifdef __GNUC__
30
+ #define GNUC_AT_LEAST(major, minor) ( \
31
+ (__GNUC__ > major) \
32
+ || ((__GNUC__ == major) && (__GNUC_MINOR__ >= minor)) )
33
+ #else
34
+ #define GNUC_AT_LEAST(major, minor) 0
35
+ #endif
36
+
37
+ #ifdef __has_attribute
38
+ #define HAS_ATTRIBUTE(x) __has_attribute(x)
39
+ #else
40
+ #define HAS_ATTRIBUTE(x) 0
41
+ #endif
42
+
43
+ #if GNUC_AT_LEAST(3, 0) || HAS_ATTRIBUTE(unused) || defined(__TINYC__)
44
+ #define UNUSED __attribute__((__unused__))
45
+ #else
46
+ #define UNUSED
47
+ #endif
48
+
49
+ #if GNUC_AT_LEAST(3, 0)
50
+ #define MALLOC __attribute__((__malloc__))
51
+ #define PRINTF(x) __attribute__((__format__(__printf__, (x), (x + 1))))
52
+ #define PURE __attribute__((__pure__))
53
+ #define CONST_FN __attribute__((__const__))
54
+ #else
55
+ #define MALLOC
56
+ #define PRINTF(x)
57
+ #define PURE
58
+ #define CONST_FN
59
+ #endif
60
+
61
+ #define UNUSED_ARG(x) unused__ ## x UNUSED
62
+
63
+ #if GNUC_AT_LEAST(3, 0) && defined(__OPTIMIZE__)
64
+ #define likely(x) __builtin_expect(!!(x), 1)
65
+ #define unlikely(x) __builtin_expect(!!(x), 0)
66
+ #else
67
+ #define likely(x) (x)
68
+ #define unlikely(x) (x)
69
+ #endif
70
+
71
+ #if GNUC_AT_LEAST(3, 3) || HAS_ATTRIBUTE(nonnull)
72
+ #define NONNULL_ARGS __attribute__((__nonnull__))
73
+ #else
74
+ #define NONNULL_ARGS
75
+ #endif
76
+
77
+ #if GNUC_AT_LEAST(3, 4) || HAS_ATTRIBUTE(warn_unused_result)
78
+ #define WARN_UNUSED_RESULT __attribute__((__warn_unused_result__))
79
+ #else
80
+ #define WARN_UNUSED_RESULT
81
+ #endif
82
+
83
+ #if GNUC_AT_LEAST(5, 0) || HAS_ATTRIBUTE(returns_nonnull)
84
+ #define RETURNS_NONNULL __attribute__((__returns_nonnull__))
85
+ #else
86
+ #define RETURNS_NONNULL
87
+ #endif
88
+
89
+ #define XMALLOC MALLOC RETURNS_NONNULL
90
+
91
+ #endif // ndef MACROS_H