nokogiri 1.6.8.1 → 1.13.4

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (354) hide show
  1. checksums.yaml +5 -5
  2. data/Gemfile +3 -20
  3. data/LICENSE-DEPENDENCIES.md +1903 -0
  4. data/LICENSE.md +9 -0
  5. data/README.md +197 -83
  6. data/bin/nokogiri +63 -50
  7. data/dependencies.yml +16 -22
  8. data/ext/nokogiri/depend +38 -358
  9. data/ext/nokogiri/extconf.rb +751 -432
  10. data/ext/nokogiri/gumbo.c +584 -0
  11. data/ext/nokogiri/html4_document.c +166 -0
  12. data/ext/nokogiri/html4_element_description.c +294 -0
  13. data/ext/nokogiri/html4_entity_lookup.c +37 -0
  14. data/ext/nokogiri/html4_sax_parser_context.c +120 -0
  15. data/ext/nokogiri/html4_sax_push_parser.c +95 -0
  16. data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
  17. data/ext/nokogiri/nokogiri.c +228 -95
  18. data/ext/nokogiri/nokogiri.h +190 -98
  19. data/ext/nokogiri/test_global_handlers.c +40 -0
  20. data/ext/nokogiri/xml_attr.c +49 -40
  21. data/ext/nokogiri/xml_attribute_decl.c +18 -18
  22. data/ext/nokogiri/xml_cdata.c +24 -23
  23. data/ext/nokogiri/xml_comment.c +19 -26
  24. data/ext/nokogiri/xml_document.c +291 -217
  25. data/ext/nokogiri/xml_document_fragment.c +12 -16
  26. data/ext/nokogiri/xml_dtd.c +56 -50
  27. data/ext/nokogiri/xml_element_content.c +31 -26
  28. data/ext/nokogiri/xml_element_decl.c +22 -22
  29. data/ext/nokogiri/xml_encoding_handler.c +43 -18
  30. data/ext/nokogiri/xml_entity_decl.c +32 -30
  31. data/ext/nokogiri/xml_entity_reference.c +16 -18
  32. data/ext/nokogiri/xml_namespace.c +61 -58
  33. data/ext/nokogiri/xml_node.c +1194 -729
  34. data/ext/nokogiri/xml_node_set.c +178 -165
  35. data/ext/nokogiri/xml_processing_instruction.c +17 -19
  36. data/ext/nokogiri/xml_reader.c +226 -175
  37. data/ext/nokogiri/xml_relax_ng.c +52 -28
  38. data/ext/nokogiri/xml_sax_parser.c +115 -114
  39. data/ext/nokogiri/xml_sax_parser_context.c +105 -86
  40. data/ext/nokogiri/xml_sax_push_parser.c +88 -35
  41. data/ext/nokogiri/xml_schema.c +112 -33
  42. data/ext/nokogiri/xml_syntax_error.c +42 -21
  43. data/ext/nokogiri/xml_text.c +13 -17
  44. data/ext/nokogiri/xml_xpath_context.c +223 -115
  45. data/ext/nokogiri/xslt_stylesheet.c +265 -173
  46. data/gumbo-parser/CHANGES.md +63 -0
  47. data/gumbo-parser/Makefile +101 -0
  48. data/gumbo-parser/THANKS +27 -0
  49. data/gumbo-parser/src/Makefile +34 -0
  50. data/gumbo-parser/src/README.md +41 -0
  51. data/gumbo-parser/src/ascii.c +75 -0
  52. data/gumbo-parser/src/ascii.h +115 -0
  53. data/gumbo-parser/src/attribute.c +42 -0
  54. data/gumbo-parser/src/attribute.h +17 -0
  55. data/gumbo-parser/src/char_ref.c +22225 -0
  56. data/gumbo-parser/src/char_ref.h +29 -0
  57. data/gumbo-parser/src/char_ref.rl +2154 -0
  58. data/gumbo-parser/src/error.c +626 -0
  59. data/gumbo-parser/src/error.h +148 -0
  60. data/gumbo-parser/src/foreign_attrs.c +104 -0
  61. data/gumbo-parser/src/foreign_attrs.gperf +27 -0
  62. data/gumbo-parser/src/gumbo.h +943 -0
  63. data/gumbo-parser/src/insertion_mode.h +33 -0
  64. data/gumbo-parser/src/macros.h +91 -0
  65. data/gumbo-parser/src/parser.c +4875 -0
  66. data/gumbo-parser/src/parser.h +41 -0
  67. data/gumbo-parser/src/replacement.h +33 -0
  68. data/gumbo-parser/src/string_buffer.c +103 -0
  69. data/gumbo-parser/src/string_buffer.h +68 -0
  70. data/gumbo-parser/src/string_piece.c +48 -0
  71. data/gumbo-parser/src/svg_attrs.c +174 -0
  72. data/gumbo-parser/src/svg_attrs.gperf +77 -0
  73. data/gumbo-parser/src/svg_tags.c +137 -0
  74. data/gumbo-parser/src/svg_tags.gperf +55 -0
  75. data/gumbo-parser/src/tag.c +222 -0
  76. data/gumbo-parser/src/tag_lookup.c +382 -0
  77. data/gumbo-parser/src/tag_lookup.gperf +169 -0
  78. data/gumbo-parser/src/tag_lookup.h +13 -0
  79. data/gumbo-parser/src/token_buffer.c +79 -0
  80. data/gumbo-parser/src/token_buffer.h +71 -0
  81. data/gumbo-parser/src/token_type.h +17 -0
  82. data/gumbo-parser/src/tokenizer.c +3463 -0
  83. data/gumbo-parser/src/tokenizer.h +112 -0
  84. data/gumbo-parser/src/tokenizer_states.h +339 -0
  85. data/gumbo-parser/src/utf8.c +245 -0
  86. data/gumbo-parser/src/utf8.h +164 -0
  87. data/gumbo-parser/src/util.c +68 -0
  88. data/gumbo-parser/src/util.h +30 -0
  89. data/gumbo-parser/src/vector.c +111 -0
  90. data/gumbo-parser/src/vector.h +45 -0
  91. data/lib/nokogiri/class_resolver.rb +67 -0
  92. data/lib/nokogiri/css/node.rb +10 -8
  93. data/lib/nokogiri/css/parser.rb +410 -372
  94. data/lib/nokogiri/css/parser.y +260 -244
  95. data/lib/nokogiri/css/parser_extras.rb +54 -49
  96. data/lib/nokogiri/css/syntax_error.rb +3 -1
  97. data/lib/nokogiri/css/tokenizer.rb +107 -104
  98. data/lib/nokogiri/css/tokenizer.rex +7 -6
  99. data/lib/nokogiri/css/xpath_visitor.rb +226 -92
  100. data/lib/nokogiri/css.rb +50 -17
  101. data/lib/nokogiri/decorators/slop.rb +9 -7
  102. data/lib/nokogiri/extension.rb +31 -0
  103. data/lib/nokogiri/gumbo.rb +15 -0
  104. data/lib/nokogiri/html.rb +38 -27
  105. data/lib/nokogiri/{html → html4}/builder.rb +4 -2
  106. data/lib/nokogiri/{html → html4}/document.rb +104 -106
  107. data/lib/nokogiri/html4/document_fragment.rb +54 -0
  108. data/lib/nokogiri/{html → html4}/element_description.rb +3 -1
  109. data/lib/nokogiri/html4/element_description_defaults.rb +578 -0
  110. data/lib/nokogiri/{html → html4}/entity_lookup.rb +4 -2
  111. data/lib/nokogiri/{html → html4}/sax/parser.rb +24 -13
  112. data/lib/nokogiri/html4/sax/parser_context.rb +20 -0
  113. data/lib/nokogiri/{html → html4}/sax/push_parser.rb +12 -11
  114. data/lib/nokogiri/html4.rb +46 -0
  115. data/lib/nokogiri/html5/document.rb +91 -0
  116. data/lib/nokogiri/html5/document_fragment.rb +83 -0
  117. data/lib/nokogiri/html5/node.rb +100 -0
  118. data/lib/nokogiri/html5.rb +478 -0
  119. data/lib/nokogiri/jruby/dependencies.rb +21 -0
  120. data/lib/nokogiri/syntax_error.rb +2 -0
  121. data/lib/nokogiri/version/constant.rb +6 -0
  122. data/lib/nokogiri/version/info.rb +222 -0
  123. data/lib/nokogiri/version.rb +3 -107
  124. data/lib/nokogiri/xml/attr.rb +6 -3
  125. data/lib/nokogiri/xml/attribute_decl.rb +3 -1
  126. data/lib/nokogiri/xml/builder.rb +97 -53
  127. data/lib/nokogiri/xml/cdata.rb +3 -1
  128. data/lib/nokogiri/xml/character_data.rb +2 -0
  129. data/lib/nokogiri/xml/document.rb +231 -93
  130. data/lib/nokogiri/xml/document_fragment.rb +57 -44
  131. data/lib/nokogiri/xml/dtd.rb +4 -2
  132. data/lib/nokogiri/xml/element_content.rb +2 -0
  133. data/lib/nokogiri/xml/element_decl.rb +3 -1
  134. data/lib/nokogiri/xml/entity_decl.rb +4 -2
  135. data/lib/nokogiri/xml/entity_reference.rb +20 -0
  136. data/lib/nokogiri/xml/namespace.rb +3 -0
  137. data/lib/nokogiri/xml/node/save_options.rb +10 -5
  138. data/lib/nokogiri/xml/node.rb +911 -337
  139. data/lib/nokogiri/xml/node_set.rb +141 -84
  140. data/lib/nokogiri/xml/notation.rb +13 -0
  141. data/lib/nokogiri/xml/parse_options.rb +23 -9
  142. data/lib/nokogiri/xml/pp/character_data.rb +9 -6
  143. data/lib/nokogiri/xml/pp/node.rb +25 -26
  144. data/lib/nokogiri/xml/pp.rb +4 -2
  145. data/lib/nokogiri/xml/processing_instruction.rb +3 -1
  146. data/lib/nokogiri/xml/reader.rb +23 -28
  147. data/lib/nokogiri/xml/relax_ng.rb +8 -2
  148. data/lib/nokogiri/xml/sax/document.rb +45 -49
  149. data/lib/nokogiri/xml/sax/parser.rb +43 -40
  150. data/lib/nokogiri/xml/sax/parser_context.rb +8 -3
  151. data/lib/nokogiri/xml/sax/push_parser.rb +6 -5
  152. data/lib/nokogiri/xml/sax.rb +6 -4
  153. data/lib/nokogiri/xml/schema.rb +19 -9
  154. data/lib/nokogiri/xml/searchable.rb +138 -89
  155. data/lib/nokogiri/xml/syntax_error.rb +26 -1
  156. data/lib/nokogiri/xml/text.rb +2 -0
  157. data/lib/nokogiri/xml/xpath/syntax_error.rb +4 -2
  158. data/lib/nokogiri/xml/xpath.rb +15 -4
  159. data/lib/nokogiri/xml/xpath_context.rb +3 -3
  160. data/lib/nokogiri/xml.rb +39 -37
  161. data/lib/nokogiri/xslt/stylesheet.rb +3 -1
  162. data/lib/nokogiri/xslt.rb +29 -20
  163. data/lib/nokogiri.rb +49 -65
  164. data/lib/xsd/xmlparser/nokogiri.rb +26 -24
  165. data/patches/libxml2/0001-Remove-script-macro-support.patch +40 -0
  166. data/patches/libxml2/0002-Update-entities-to-remove-handling-of-ssi.patch +44 -0
  167. data/patches/libxml2/0003-libxml2.la-is-in-top_builddir.patch +25 -0
  168. data/patches/libxml2/0004-use-glibc-strlen.patch +53 -0
  169. data/patches/libxml2/0005-avoid-isnan-isinf.patch +81 -0
  170. data/patches/libxml2/0006-update-automake-files-for-arm64.patch +3040 -0
  171. data/patches/libxml2/0008-htmlParseComment-handle-abruptly-closed-comments.patch +61 -0
  172. data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
  173. data/patches/libxml2/0010-Revert-Different-approach-to-fix-quadratic-behavior.patch +45 -0
  174. data/patches/libxslt/0001-update-automake-files-for-arm64.patch +3037 -0
  175. data/ports/archives/libxml2-2.9.13.tar.xz +0 -0
  176. data/ports/archives/libxslt-1.1.35.tar.xz +0 -0
  177. metadata +233 -258
  178. data/.autotest +0 -26
  179. data/.cross_rubies +0 -9
  180. data/.editorconfig +0 -17
  181. data/.gemtest +0 -0
  182. data/.travis.yml +0 -51
  183. data/CHANGELOG.rdoc +0 -1160
  184. data/CONTRIBUTING.md +0 -42
  185. data/C_CODING_STYLE.rdoc +0 -33
  186. data/LICENSE.txt +0 -31
  187. data/Manifest.txt +0 -364
  188. data/ROADMAP.md +0 -111
  189. data/Rakefile +0 -375
  190. data/STANDARD_RESPONSES.md +0 -47
  191. data/Y_U_NO_GEMSPEC.md +0 -155
  192. data/appveyor.yml +0 -22
  193. data/build_all +0 -45
  194. data/ext/nokogiri/html_document.c +0 -170
  195. data/ext/nokogiri/html_document.h +0 -10
  196. data/ext/nokogiri/html_element_description.c +0 -279
  197. data/ext/nokogiri/html_element_description.h +0 -10
  198. data/ext/nokogiri/html_entity_lookup.c +0 -32
  199. data/ext/nokogiri/html_entity_lookup.h +0 -8
  200. data/ext/nokogiri/html_sax_parser_context.c +0 -116
  201. data/ext/nokogiri/html_sax_parser_context.h +0 -11
  202. data/ext/nokogiri/html_sax_push_parser.c +0 -87
  203. data/ext/nokogiri/html_sax_push_parser.h +0 -9
  204. data/ext/nokogiri/xml_attr.h +0 -9
  205. data/ext/nokogiri/xml_attribute_decl.h +0 -9
  206. data/ext/nokogiri/xml_cdata.h +0 -9
  207. data/ext/nokogiri/xml_comment.h +0 -9
  208. data/ext/nokogiri/xml_document.h +0 -23
  209. data/ext/nokogiri/xml_document_fragment.h +0 -10
  210. data/ext/nokogiri/xml_dtd.h +0 -10
  211. data/ext/nokogiri/xml_element_content.h +0 -10
  212. data/ext/nokogiri/xml_element_decl.h +0 -9
  213. data/ext/nokogiri/xml_encoding_handler.h +0 -8
  214. data/ext/nokogiri/xml_entity_decl.h +0 -10
  215. data/ext/nokogiri/xml_entity_reference.h +0 -9
  216. data/ext/nokogiri/xml_io.c +0 -60
  217. data/ext/nokogiri/xml_io.h +0 -11
  218. data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
  219. data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
  220. data/ext/nokogiri/xml_namespace.h +0 -13
  221. data/ext/nokogiri/xml_node.h +0 -13
  222. data/ext/nokogiri/xml_node_set.h +0 -13
  223. data/ext/nokogiri/xml_processing_instruction.h +0 -9
  224. data/ext/nokogiri/xml_reader.h +0 -10
  225. data/ext/nokogiri/xml_relax_ng.h +0 -9
  226. data/ext/nokogiri/xml_sax_parser.h +0 -39
  227. data/ext/nokogiri/xml_sax_parser_context.h +0 -10
  228. data/ext/nokogiri/xml_sax_push_parser.h +0 -9
  229. data/ext/nokogiri/xml_schema.h +0 -9
  230. data/ext/nokogiri/xml_syntax_error.h +0 -13
  231. data/ext/nokogiri/xml_text.h +0 -9
  232. data/ext/nokogiri/xml_xpath_context.h +0 -10
  233. data/ext/nokogiri/xslt_stylesheet.h +0 -14
  234. data/lib/nokogiri/html/document_fragment.rb +0 -39
  235. data/lib/nokogiri/html/element_description_defaults.rb +0 -671
  236. data/lib/nokogiri/html/sax/parser_context.rb +0 -16
  237. data/patches/sort-patches-by-date +0 -25
  238. data/ports/archives/libxml2-2.9.4.tar.gz +0 -0
  239. data/ports/archives/libxslt-1.1.29.tar.gz +0 -0
  240. data/suppressions/README.txt +0 -1
  241. data/suppressions/nokogiri_ree-1.8.7.358.supp +0 -61
  242. data/suppressions/nokogiri_ruby-1.8.7.370.supp +0 -0
  243. data/suppressions/nokogiri_ruby-1.9.2.320.supp +0 -28
  244. data/suppressions/nokogiri_ruby-1.9.3.327.supp +0 -28
  245. data/tasks/test.rb +0 -100
  246. data/test/css/test_nthiness.rb +0 -226
  247. data/test/css/test_parser.rb +0 -369
  248. data/test/css/test_tokenizer.rb +0 -215
  249. data/test/css/test_xpath_visitor.rb +0 -96
  250. data/test/decorators/test_slop.rb +0 -20
  251. data/test/files/2ch.html +0 -108
  252. data/test/files/GH_1042.html +0 -18
  253. data/test/files/address_book.rlx +0 -12
  254. data/test/files/address_book.xml +0 -10
  255. data/test/files/atom.xml +0 -344
  256. data/test/files/bar/bar.xsd +0 -4
  257. data/test/files/bogus.xml +0 -0
  258. data/test/files/dont_hurt_em_why.xml +0 -422
  259. data/test/files/encoding.html +0 -82
  260. data/test/files/encoding.xhtml +0 -84
  261. data/test/files/exslt.xml +0 -8
  262. data/test/files/exslt.xslt +0 -35
  263. data/test/files/foo/foo.xsd +0 -4
  264. data/test/files/metacharset.html +0 -10
  265. data/test/files/namespace_pressure_test.xml +0 -1684
  266. data/test/files/noencoding.html +0 -47
  267. data/test/files/po.xml +0 -32
  268. data/test/files/po.xsd +0 -66
  269. data/test/files/saml/saml20assertion_schema.xsd +0 -283
  270. data/test/files/saml/saml20protocol_schema.xsd +0 -302
  271. data/test/files/saml/xenc_schema.xsd +0 -146
  272. data/test/files/saml/xmldsig_schema.xsd +0 -318
  273. data/test/files/shift_jis.html +0 -10
  274. data/test/files/shift_jis.xml +0 -5
  275. data/test/files/shift_jis_no_charset.html +0 -9
  276. data/test/files/slow-xpath.xml +0 -25509
  277. data/test/files/snuggles.xml +0 -3
  278. data/test/files/staff.dtd +0 -10
  279. data/test/files/staff.xml +0 -59
  280. data/test/files/staff.xslt +0 -32
  281. data/test/files/test_document_url/bar.xml +0 -2
  282. data/test/files/test_document_url/document.dtd +0 -4
  283. data/test/files/test_document_url/document.xml +0 -6
  284. data/test/files/tlm.html +0 -851
  285. data/test/files/to_be_xincluded.xml +0 -2
  286. data/test/files/valid_bar.xml +0 -2
  287. data/test/files/xinclude.xml +0 -4
  288. data/test/helper.rb +0 -181
  289. data/test/html/sax/test_parser.rb +0 -141
  290. data/test/html/sax/test_parser_context.rb +0 -46
  291. data/test/html/sax/test_push_parser.rb +0 -87
  292. data/test/html/test_builder.rb +0 -164
  293. data/test/html/test_document.rb +0 -701
  294. data/test/html/test_document_encoding.rb +0 -145
  295. data/test/html/test_document_fragment.rb +0 -301
  296. data/test/html/test_element_description.rb +0 -105
  297. data/test/html/test_named_characters.rb +0 -14
  298. data/test/html/test_node.rb +0 -212
  299. data/test/html/test_node_encoding.rb +0 -85
  300. data/test/namespaces/test_additional_namespaces_in_builder_doc.rb +0 -14
  301. data/test/namespaces/test_namespaces_aliased_default.rb +0 -24
  302. data/test/namespaces/test_namespaces_in_builder_doc.rb +0 -75
  303. data/test/namespaces/test_namespaces_in_cloned_doc.rb +0 -31
  304. data/test/namespaces/test_namespaces_in_created_doc.rb +0 -75
  305. data/test/namespaces/test_namespaces_in_parsed_doc.rb +0 -80
  306. data/test/namespaces/test_namespaces_preservation.rb +0 -31
  307. data/test/test_convert_xpath.rb +0 -135
  308. data/test/test_css_cache.rb +0 -45
  309. data/test/test_encoding_handler.rb +0 -48
  310. data/test/test_memory_leak.rb +0 -156
  311. data/test/test_nokogiri.rb +0 -138
  312. data/test/test_soap4r_sax.rb +0 -52
  313. data/test/test_xslt_transforms.rb +0 -314
  314. data/test/xml/node/test_save_options.rb +0 -28
  315. data/test/xml/node/test_subclass.rb +0 -44
  316. data/test/xml/sax/test_parser.rb +0 -394
  317. data/test/xml/sax/test_parser_context.rb +0 -115
  318. data/test/xml/sax/test_push_parser.rb +0 -157
  319. data/test/xml/test_attr.rb +0 -67
  320. data/test/xml/test_attribute_decl.rb +0 -86
  321. data/test/xml/test_builder.rb +0 -341
  322. data/test/xml/test_c14n.rb +0 -180
  323. data/test/xml/test_cdata.rb +0 -48
  324. data/test/xml/test_comment.rb +0 -40
  325. data/test/xml/test_document.rb +0 -982
  326. data/test/xml/test_document_encoding.rb +0 -31
  327. data/test/xml/test_document_fragment.rb +0 -271
  328. data/test/xml/test_dtd.rb +0 -187
  329. data/test/xml/test_dtd_encoding.rb +0 -31
  330. data/test/xml/test_element_content.rb +0 -56
  331. data/test/xml/test_element_decl.rb +0 -73
  332. data/test/xml/test_entity_decl.rb +0 -122
  333. data/test/xml/test_entity_reference.rb +0 -251
  334. data/test/xml/test_namespace.rb +0 -96
  335. data/test/xml/test_node.rb +0 -1244
  336. data/test/xml/test_node_attributes.rb +0 -115
  337. data/test/xml/test_node_encoding.rb +0 -69
  338. data/test/xml/test_node_inheritance.rb +0 -32
  339. data/test/xml/test_node_reparenting.rb +0 -549
  340. data/test/xml/test_node_set.rb +0 -775
  341. data/test/xml/test_parse_options.rb +0 -64
  342. data/test/xml/test_processing_instruction.rb +0 -30
  343. data/test/xml/test_reader.rb +0 -589
  344. data/test/xml/test_reader_encoding.rb +0 -134
  345. data/test/xml/test_relax_ng.rb +0 -60
  346. data/test/xml/test_schema.rb +0 -142
  347. data/test/xml/test_syntax_error.rb +0 -30
  348. data/test/xml/test_text.rb +0 -60
  349. data/test/xml/test_unparented_node.rb +0 -440
  350. data/test/xml/test_xinclude.rb +0 -83
  351. data/test/xml/test_xpath.rb +0 -445
  352. data/test/xslt/test_custom_functions.rb +0 -133
  353. data/test/xslt/test_exception_handling.rb +0 -37
  354. data/test_all +0 -107
@@ -0,0 +1,148 @@
1
+ #ifndef GUMBO_ERROR_H_
2
+ #define GUMBO_ERROR_H_
3
+
4
+ #include <stdint.h>
5
+
6
+ #include "gumbo.h"
7
+ #include "insertion_mode.h"
8
+ #include "string_buffer.h"
9
+ #include "token_type.h"
10
+ #include "tokenizer_states.h"
11
+
12
+ #ifdef __cplusplus
13
+ extern "C" {
14
+ #endif
15
+
16
+ struct GumboInternalParser;
17
+
18
+ typedef enum {
19
+ // Defined errors.
20
+ // https://html.spec.whatwg.org/multipage/parsing.html#parse-errors
21
+ GUMBO_ERR_ABRUPT_CLOSING_OF_EMPTY_COMMENT,
22
+ GUMBO_ERR_ABRUPT_DOCTYPE_PUBLIC_IDENTIFIER,
23
+ GUMBO_ERR_ABRUPT_DOCTYPE_SYSTEM_IDENTIFIER,
24
+ GUMBO_ERR_ABSENCE_OF_DIGITS_IN_NUMERIC_CHARACTER_REFERENCE,
25
+ GUMBO_ERR_CDATA_IN_HTML_CONTENT,
26
+ GUMBO_ERR_CHARACTER_REFERENCE_OUTSIDE_UNICODE_RANGE,
27
+ GUMBO_ERR_CONTROL_CHARACTER_IN_INPUT_STREAM,
28
+ GUMBO_ERR_CONTROL_CHARACTER_REFERENCE,
29
+ GUMBO_ERR_END_TAG_WITH_ATTRIBUTES,
30
+ GUMBO_ERR_DUPLICATE_ATTRIBUTE,
31
+ GUMBO_ERR_END_TAG_WITH_TRAILING_SOLIDUS,
32
+ GUMBO_ERR_EOF_BEFORE_TAG_NAME,
33
+ GUMBO_ERR_EOF_IN_CDATA,
34
+ GUMBO_ERR_EOF_IN_COMMENT,
35
+ GUMBO_ERR_EOF_IN_DOCTYPE,
36
+ GUMBO_ERR_EOF_IN_SCRIPT_HTML_COMMENT_LIKE_TEXT,
37
+ GUMBO_ERR_EOF_IN_TAG,
38
+ GUMBO_ERR_INCORRECTLY_CLOSED_COMMENT,
39
+ GUMBO_ERR_INCORRECTLY_OPENED_COMMENT,
40
+ GUMBO_ERR_INVALID_CHARACTER_SEQUENCE_AFTER_DOCTYPE_NAME,
41
+ GUMBO_ERR_INVALID_FIRST_CHARACTER_OF_TAG_NAME,
42
+ GUMBO_ERR_MISSING_ATTRIBUTE_VALUE,
43
+ GUMBO_ERR_MISSING_DOCTYPE_NAME,
44
+ GUMBO_ERR_MISSING_DOCTYPE_PUBLIC_IDENTIFIER,
45
+ GUMBO_ERR_MISSING_DOCTYPE_SYSTEM_IDENTIFIER,
46
+ GUMBO_ERR_MISSING_END_TAG_NAME,
47
+ GUMBO_ERR_MISSING_QUOTE_BEFORE_DOCTYPE_PUBLIC_IDENTIFIER,
48
+ GUMBO_ERR_MISSING_QUOTE_BEFORE_DOCTYPE_SYSTEM_IDENTIFIER,
49
+ GUMBO_ERR_MISSING_SEMICOLON_AFTER_CHARACTER_REFERENCE,
50
+ GUMBO_ERR_MISSING_WHITESPACE_AFTER_DOCTYPE_PUBLIC_KEYWORD,
51
+ GUMBO_ERR_MISSING_WHITESPACE_AFTER_DOCTYPE_SYSTEM_KEYWORD,
52
+ GUMBO_ERR_MISSING_WHITESPACE_BEFORE_DOCTYPE_NAME,
53
+ GUMBO_ERR_MISSING_WHITESPACE_BETWEEN_ATTRIBUTES,
54
+ GUMBO_ERR_MISSING_WHITESPACE_BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS,
55
+ GUMBO_ERR_NESTED_COMMENT,
56
+ GUMBO_ERR_NONCHARACTER_CHARACTER_REFERENCE,
57
+ GUMBO_ERR_NONCHARACTER_IN_INPUT_STREAM,
58
+ GUMBO_ERR_NON_VOID_HTML_ELEMENT_START_TAG_WITH_TRAILING_SOLIDUS,
59
+ GUMBO_ERR_NULL_CHARACTER_REFERENCE,
60
+ GUMBO_ERR_SURROGATE_CHARACTER_REFERENCE,
61
+ GUMBO_ERR_SURROGATE_IN_INPUT_STREAM,
62
+ GUMBO_ERR_UNEXPECTED_CHARACTER_AFTER_DOCTYPE_SYSTEM_IDENTIFIER,
63
+ GUMBO_ERR_UNEXPECTED_CHARACTER_IN_ATTRIBUTE_NAME,
64
+ GUMBO_ERR_UNEXPECTED_CHARACTER_IN_UNQUOTED_ATTRIBUTE_VALUE,
65
+ GUMBO_ERR_UNEXPECTED_EQUALS_SIGN_BEFORE_ATTRIBUTE_NAME,
66
+ GUMBO_ERR_UNEXPECTED_NULL_CHARACTER,
67
+ GUMBO_ERR_UNEXPECTED_QUESTION_MARK_INSTEAD_OF_TAG_NAME,
68
+ GUMBO_ERR_UNEXPECTED_SOLIDUS_IN_TAG,
69
+ GUMBO_ERR_UNKNOWN_NAMED_CHARACTER_REFERENCE,
70
+
71
+ // Encoding errors.
72
+ GUMBO_ERR_UTF8_INVALID,
73
+ GUMBO_ERR_UTF8_TRUNCATED,
74
+
75
+ // Generic parser error.
76
+ GUMBO_ERR_PARSER,
77
+ } GumboErrorType;
78
+
79
+ // Additional data for tokenizer errors.
80
+ // This records the current state and codepoint encountered - this is usually
81
+ // enough to reconstruct what went wrong and provide a friendly error message.
82
+ typedef struct GumboInternalTokenizerError {
83
+ // The bad codepoint encountered.
84
+ int codepoint;
85
+
86
+ // The state that the tokenizer was in at the time.
87
+ GumboTokenizerEnum state;
88
+ } GumboTokenizerError;
89
+
90
+ // Additional data for parse errors.
91
+ typedef struct GumboInternalParserError {
92
+ // The type of input token that resulted in this error.
93
+ GumboTokenType input_type;
94
+
95
+ // The HTML tag of the input token. TAG_UNKNOWN if this was not a tag token.
96
+ GumboTag input_tag;
97
+
98
+ // The insertion mode that the parser was in at the time.
99
+ GumboInsertionMode parser_state;
100
+
101
+ // The tag stack at the point of the error. Note that this is an GumboVector
102
+ // of GumboTag's *stored by value* - cast the void* to an GumboTag directly to
103
+ // get at the tag.
104
+ GumboVector /* GumboTag */ tag_stack;
105
+ } GumboParserError;
106
+
107
+ // The overall error struct representing an error in decoding/tokenizing/parsing
108
+ // the HTML. This contains an enumerated type flag, a source position, and then
109
+ // a union of fields containing data specific to the error.
110
+ struct GumboInternalError {
111
+ // The type of error.
112
+ GumboErrorType type;
113
+
114
+ // The position within the source file where the error occurred.
115
+ GumboSourcePosition position;
116
+
117
+ // The piece of text that caused the error.
118
+ GumboStringPiece original_text;
119
+
120
+ // Type-specific error information.
121
+ union {
122
+ // Tokenizer errors.
123
+ GumboTokenizerError tokenizer;
124
+
125
+ // Parser errors.
126
+ GumboParserError parser;
127
+ } v;
128
+ };
129
+
130
+ // Adds a new error to the parser's error list, and returns a pointer to it so
131
+ // that clients can fill out the rest of its fields. May return NULL if we're
132
+ // already over the max_errors field specified in GumboOptions.
133
+ GumboError* gumbo_add_error(struct GumboInternalParser* parser);
134
+
135
+ // Initializes the errors vector in the parser.
136
+ void gumbo_init_errors(struct GumboInternalParser* errors);
137
+
138
+ // Frees all the errors in the 'errors_' field of the parser.
139
+ void gumbo_destroy_errors(struct GumboInternalParser* errors);
140
+
141
+ // Frees the memory used for a single GumboError.
142
+ void gumbo_error_destroy(GumboError* error);
143
+
144
+ #ifdef __cplusplus
145
+ }
146
+ #endif
147
+
148
+ #endif // GUMBO_ERROR_H_
@@ -0,0 +1,104 @@
1
+ /* ANSI-C code produced by gperf version 3.1 */
2
+ /* Command-line: gperf -m100 -n lib/foreign_attrs.gperf */
3
+ /* Computed positions: -k'2,8' */
4
+ /* Filtered by: mk/gperf-filter.sed */
5
+
6
+ #include "replacement.h"
7
+ #include "macros.h"
8
+ #include <string.h>
9
+
10
+ #define TOTAL_KEYWORDS 11
11
+ #define MIN_WORD_LENGTH 5
12
+ #define MAX_WORD_LENGTH 13
13
+ #define MIN_HASH_VALUE 0
14
+ #define MAX_HASH_VALUE 10
15
+ /* maximum key range = 11, duplicates = 0 */
16
+
17
+ static inline unsigned int
18
+ hash (register const char *str, register size_t len)
19
+ {
20
+ static const unsigned char asso_values[] =
21
+ {
22
+ 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
23
+ 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
24
+ 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
25
+ 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
26
+ 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
27
+ 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
28
+ 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
29
+ 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
30
+ 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
31
+ 11, 11, 11, 11, 11, 11, 11, 11, 11, 2,
32
+ 11, 10, 11, 9, 7, 6, 11, 11, 1, 0,
33
+ 11, 5, 11, 11, 4, 11, 11, 11, 11, 11,
34
+ 11, 3, 11, 11, 11, 11, 11, 11, 11, 11,
35
+ 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
36
+ 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
37
+ 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
38
+ 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
39
+ 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
40
+ 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
41
+ 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
42
+ 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
43
+ 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
44
+ 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
45
+ 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
46
+ 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
47
+ 11, 11, 11, 11, 11, 11
48
+ };
49
+ register unsigned int hval = 0;
50
+
51
+ switch (len)
52
+ {
53
+ default:
54
+ hval += asso_values[(unsigned char)str[7]];
55
+ /*FALLTHROUGH*/
56
+ case 7:
57
+ case 6:
58
+ case 5:
59
+ case 4:
60
+ case 3:
61
+ case 2:
62
+ hval += asso_values[(unsigned char)str[1]];
63
+ break;
64
+ }
65
+ return hval;
66
+ }
67
+
68
+ const ForeignAttrReplacement *
69
+ gumbo_get_foreign_attr_replacement (register const char *str, register size_t len)
70
+ {
71
+ static const unsigned char lengthtable[] =
72
+ {
73
+ 5, 11, 9, 13, 10, 10, 10, 11, 10, 8, 8
74
+ };
75
+ static const ForeignAttrReplacement wordlist[] =
76
+ {
77
+ {"xmlns", "xmlns", GUMBO_ATTR_NAMESPACE_XMLNS},
78
+ {"xmlns:xlink", "xlink", GUMBO_ATTR_NAMESPACE_XMLNS},
79
+ {"xml:space", "space", GUMBO_ATTR_NAMESPACE_XML},
80
+ {"xlink:actuate", "actuate", GUMBO_ATTR_NAMESPACE_XLINK},
81
+ {"xlink:type", "type", GUMBO_ATTR_NAMESPACE_XLINK},
82
+ {"xlink:href", "href", GUMBO_ATTR_NAMESPACE_XLINK},
83
+ {"xlink:role", "role", GUMBO_ATTR_NAMESPACE_XLINK},
84
+ {"xlink:title", "title", GUMBO_ATTR_NAMESPACE_XLINK},
85
+ {"xlink:show", "show", GUMBO_ATTR_NAMESPACE_XLINK},
86
+ {"xml:lang", "lang", GUMBO_ATTR_NAMESPACE_XML},
87
+ {"xml:base", "base", GUMBO_ATTR_NAMESPACE_XML}
88
+ };
89
+
90
+ if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
91
+ {
92
+ register unsigned int key = hash (str, len);
93
+
94
+ if (key <= MAX_HASH_VALUE)
95
+ if (len == lengthtable[key])
96
+ {
97
+ register const char *s = wordlist[key].from;
98
+
99
+ if (s && *str == *s && !memcmp (str + 1, s + 1, len - 1))
100
+ return &wordlist[key];
101
+ }
102
+ }
103
+ return 0;
104
+ }
@@ -0,0 +1,27 @@
1
+ %{
2
+ #include "replacement.h"
3
+ #include "macros.h"
4
+ %}
5
+
6
+ %struct-type
7
+ %omit-struct-type
8
+ %compare-lengths
9
+ %readonly-tables
10
+ %null-strings
11
+ %includes
12
+ %define lookup-function-name gumbo_get_foreign_attr_replacement
13
+ %define slot-name from
14
+ ForeignAttrReplacement;
15
+
16
+ %%
17
+ "xlink:actuate", "actuate", GUMBO_ATTR_NAMESPACE_XLINK
18
+ "xlink:href", "href", GUMBO_ATTR_NAMESPACE_XLINK
19
+ "xlink:role", "role", GUMBO_ATTR_NAMESPACE_XLINK
20
+ "xlink:show", "show", GUMBO_ATTR_NAMESPACE_XLINK
21
+ "xlink:title", "title", GUMBO_ATTR_NAMESPACE_XLINK
22
+ "xlink:type", "type", GUMBO_ATTR_NAMESPACE_XLINK
23
+ "xml:base", "base", GUMBO_ATTR_NAMESPACE_XML
24
+ "xml:lang", "lang", GUMBO_ATTR_NAMESPACE_XML
25
+ "xml:space", "space", GUMBO_ATTR_NAMESPACE_XML
26
+ "xmlns", "xmlns", GUMBO_ATTR_NAMESPACE_XMLNS
27
+ "xmlns:xlink", "xlink", GUMBO_ATTR_NAMESPACE_XMLNS