nokogiri 1.6.0 → 1.13.2

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (340) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile +3 -19
  3. data/LICENSE-DEPENDENCIES.md +1903 -0
  4. data/LICENSE.md +9 -0
  5. data/README.md +280 -0
  6. data/bin/nokogiri +84 -31
  7. data/dependencies.yml +23 -4
  8. data/ext/nokogiri/depend +38 -358
  9. data/ext/nokogiri/extconf.rb +952 -132
  10. data/ext/nokogiri/gumbo.c +584 -0
  11. data/ext/nokogiri/html4_document.c +166 -0
  12. data/ext/nokogiri/html4_element_description.c +294 -0
  13. data/ext/nokogiri/html4_entity_lookup.c +37 -0
  14. data/ext/nokogiri/html4_sax_parser_context.c +120 -0
  15. data/ext/nokogiri/html4_sax_push_parser.c +95 -0
  16. data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
  17. data/ext/nokogiri/nokogiri.c +231 -96
  18. data/ext/nokogiri/nokogiri.h +188 -129
  19. data/ext/nokogiri/test_global_handlers.c +40 -0
  20. data/ext/nokogiri/xml_attr.c +49 -40
  21. data/ext/nokogiri/xml_attribute_decl.c +18 -18
  22. data/ext/nokogiri/xml_cdata.c +24 -23
  23. data/ext/nokogiri/xml_comment.c +29 -21
  24. data/ext/nokogiri/xml_document.c +327 -223
  25. data/ext/nokogiri/xml_document_fragment.c +12 -16
  26. data/ext/nokogiri/xml_dtd.c +56 -50
  27. data/ext/nokogiri/xml_element_content.c +31 -26
  28. data/ext/nokogiri/xml_element_decl.c +22 -22
  29. data/ext/nokogiri/xml_encoding_handler.c +45 -20
  30. data/ext/nokogiri/xml_entity_decl.c +32 -30
  31. data/ext/nokogiri/xml_entity_reference.c +16 -18
  32. data/ext/nokogiri/xml_namespace.c +74 -32
  33. data/ext/nokogiri/xml_node.c +1290 -680
  34. data/ext/nokogiri/xml_node_set.c +239 -208
  35. data/ext/nokogiri/xml_processing_instruction.c +17 -19
  36. data/ext/nokogiri/xml_reader.c +227 -189
  37. data/ext/nokogiri/xml_relax_ng.c +52 -28
  38. data/ext/nokogiri/xml_sax_parser.c +123 -125
  39. data/ext/nokogiri/xml_sax_parser_context.c +138 -79
  40. data/ext/nokogiri/xml_sax_push_parser.c +88 -35
  41. data/ext/nokogiri/xml_schema.c +112 -33
  42. data/ext/nokogiri/xml_syntax_error.c +50 -23
  43. data/ext/nokogiri/xml_text.c +14 -18
  44. data/ext/nokogiri/xml_xpath_context.c +227 -140
  45. data/ext/nokogiri/xslt_stylesheet.c +269 -177
  46. data/gumbo-parser/CHANGES.md +63 -0
  47. data/gumbo-parser/Makefile +101 -0
  48. data/gumbo-parser/THANKS +27 -0
  49. data/gumbo-parser/src/Makefile +34 -0
  50. data/gumbo-parser/src/README.md +41 -0
  51. data/gumbo-parser/src/ascii.c +75 -0
  52. data/gumbo-parser/src/ascii.h +115 -0
  53. data/gumbo-parser/src/attribute.c +42 -0
  54. data/gumbo-parser/src/attribute.h +17 -0
  55. data/gumbo-parser/src/char_ref.c +22225 -0
  56. data/gumbo-parser/src/char_ref.h +29 -0
  57. data/gumbo-parser/src/char_ref.rl +2154 -0
  58. data/gumbo-parser/src/error.c +626 -0
  59. data/gumbo-parser/src/error.h +148 -0
  60. data/gumbo-parser/src/foreign_attrs.c +104 -0
  61. data/gumbo-parser/src/foreign_attrs.gperf +27 -0
  62. data/gumbo-parser/src/gumbo.h +943 -0
  63. data/gumbo-parser/src/insertion_mode.h +33 -0
  64. data/gumbo-parser/src/macros.h +91 -0
  65. data/gumbo-parser/src/parser.c +4875 -0
  66. data/gumbo-parser/src/parser.h +41 -0
  67. data/gumbo-parser/src/replacement.h +33 -0
  68. data/gumbo-parser/src/string_buffer.c +103 -0
  69. data/gumbo-parser/src/string_buffer.h +68 -0
  70. data/gumbo-parser/src/string_piece.c +48 -0
  71. data/gumbo-parser/src/svg_attrs.c +174 -0
  72. data/gumbo-parser/src/svg_attrs.gperf +77 -0
  73. data/gumbo-parser/src/svg_tags.c +137 -0
  74. data/gumbo-parser/src/svg_tags.gperf +55 -0
  75. data/gumbo-parser/src/tag.c +222 -0
  76. data/gumbo-parser/src/tag_lookup.c +382 -0
  77. data/gumbo-parser/src/tag_lookup.gperf +169 -0
  78. data/gumbo-parser/src/tag_lookup.h +13 -0
  79. data/gumbo-parser/src/token_buffer.c +79 -0
  80. data/gumbo-parser/src/token_buffer.h +71 -0
  81. data/gumbo-parser/src/token_type.h +17 -0
  82. data/gumbo-parser/src/tokenizer.c +3463 -0
  83. data/gumbo-parser/src/tokenizer.h +112 -0
  84. data/gumbo-parser/src/tokenizer_states.h +339 -0
  85. data/gumbo-parser/src/utf8.c +245 -0
  86. data/gumbo-parser/src/utf8.h +164 -0
  87. data/gumbo-parser/src/util.c +68 -0
  88. data/gumbo-parser/src/util.h +30 -0
  89. data/gumbo-parser/src/vector.c +111 -0
  90. data/gumbo-parser/src/vector.h +45 -0
  91. data/lib/nokogiri/class_resolver.rb +67 -0
  92. data/lib/nokogiri/css/node.rb +10 -58
  93. data/lib/nokogiri/css/parser.rb +407 -357
  94. data/lib/nokogiri/css/parser.y +265 -246
  95. data/lib/nokogiri/css/parser_extras.rb +52 -49
  96. data/lib/nokogiri/css/syntax_error.rb +3 -1
  97. data/lib/nokogiri/css/tokenizer.rb +107 -104
  98. data/lib/nokogiri/css/tokenizer.rex +8 -7
  99. data/lib/nokogiri/css/xpath_visitor.rb +266 -80
  100. data/lib/nokogiri/css.rb +50 -17
  101. data/lib/nokogiri/decorators/slop.rb +17 -8
  102. data/lib/nokogiri/extension.rb +31 -0
  103. data/lib/nokogiri/gumbo.rb +15 -0
  104. data/lib/nokogiri/html.rb +38 -27
  105. data/lib/nokogiri/{html → html4}/builder.rb +4 -2
  106. data/lib/nokogiri/html4/document.rb +331 -0
  107. data/lib/nokogiri/html4/document_fragment.rb +54 -0
  108. data/lib/nokogiri/{html → html4}/element_description.rb +3 -1
  109. data/lib/nokogiri/html4/element_description_defaults.rb +578 -0
  110. data/lib/nokogiri/{html → html4}/entity_lookup.rb +4 -2
  111. data/lib/nokogiri/{html → html4}/sax/parser.rb +24 -15
  112. data/lib/nokogiri/html4/sax/parser_context.rb +20 -0
  113. data/lib/nokogiri/html4/sax/push_parser.rb +37 -0
  114. data/lib/nokogiri/html4.rb +46 -0
  115. data/lib/nokogiri/html5/document.rb +88 -0
  116. data/lib/nokogiri/html5/document_fragment.rb +83 -0
  117. data/lib/nokogiri/html5/node.rb +96 -0
  118. data/lib/nokogiri/html5.rb +477 -0
  119. data/lib/nokogiri/jruby/dependencies.rb +21 -0
  120. data/lib/nokogiri/syntax_error.rb +2 -0
  121. data/lib/nokogiri/version/constant.rb +6 -0
  122. data/lib/nokogiri/version/info.rb +221 -0
  123. data/lib/nokogiri/version.rb +3 -105
  124. data/lib/nokogiri/xml/attr.rb +6 -3
  125. data/lib/nokogiri/xml/attribute_decl.rb +3 -1
  126. data/lib/nokogiri/xml/builder.rb +96 -54
  127. data/lib/nokogiri/xml/cdata.rb +3 -1
  128. data/lib/nokogiri/xml/character_data.rb +2 -0
  129. data/lib/nokogiri/xml/document.rb +234 -95
  130. data/lib/nokogiri/xml/document_fragment.rb +86 -36
  131. data/lib/nokogiri/xml/dtd.rb +16 -4
  132. data/lib/nokogiri/xml/element_content.rb +2 -0
  133. data/lib/nokogiri/xml/element_decl.rb +3 -1
  134. data/lib/nokogiri/xml/entity_decl.rb +4 -2
  135. data/lib/nokogiri/xml/entity_reference.rb +20 -0
  136. data/lib/nokogiri/xml/namespace.rb +3 -0
  137. data/lib/nokogiri/xml/node/save_options.rb +8 -4
  138. data/lib/nokogiri/xml/node.rb +947 -502
  139. data/lib/nokogiri/xml/node_set.rb +168 -159
  140. data/lib/nokogiri/xml/notation.rb +13 -0
  141. data/lib/nokogiri/xml/parse_options.rb +40 -5
  142. data/lib/nokogiri/xml/pp/character_data.rb +9 -6
  143. data/lib/nokogiri/xml/pp/node.rb +25 -26
  144. data/lib/nokogiri/xml/pp.rb +4 -2
  145. data/lib/nokogiri/xml/processing_instruction.rb +3 -1
  146. data/lib/nokogiri/xml/reader.rb +23 -28
  147. data/lib/nokogiri/xml/relax_ng.rb +8 -2
  148. data/lib/nokogiri/xml/sax/document.rb +45 -49
  149. data/lib/nokogiri/xml/sax/parser.rb +43 -41
  150. data/lib/nokogiri/xml/sax/parser_context.rb +8 -3
  151. data/lib/nokogiri/xml/sax/push_parser.rb +6 -5
  152. data/lib/nokogiri/xml/sax.rb +6 -4
  153. data/lib/nokogiri/xml/schema.rb +19 -9
  154. data/lib/nokogiri/xml/searchable.rb +270 -0
  155. data/lib/nokogiri/xml/syntax_error.rb +25 -1
  156. data/lib/nokogiri/xml/text.rb +2 -0
  157. data/lib/nokogiri/xml/xpath/syntax_error.rb +4 -2
  158. data/lib/nokogiri/xml/xpath.rb +15 -4
  159. data/lib/nokogiri/xml/xpath_context.rb +3 -3
  160. data/lib/nokogiri/xml.rb +38 -36
  161. data/lib/nokogiri/xslt/stylesheet.rb +3 -1
  162. data/lib/nokogiri/xslt.rb +29 -20
  163. data/lib/nokogiri.rb +69 -69
  164. data/lib/xsd/xmlparser/nokogiri.rb +26 -24
  165. data/patches/libxml2/0001-Remove-script-macro-support.patch +40 -0
  166. data/patches/libxml2/0002-Update-entities-to-remove-handling-of-ssi.patch +44 -0
  167. data/patches/libxml2/0003-libxml2.la-is-in-top_builddir.patch +25 -0
  168. data/patches/libxml2/0004-use-glibc-strlen.patch +53 -0
  169. data/patches/libxml2/0005-avoid-isnan-isinf.patch +81 -0
  170. data/patches/libxml2/0006-update-automake-files-for-arm64.patch +3040 -0
  171. data/patches/libxml2/0008-htmlParseComment-handle-abruptly-closed-comments.patch +61 -0
  172. data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
  173. data/patches/libxslt/0001-update-automake-files-for-arm64.patch +3037 -0
  174. data/ports/archives/libxml2-2.9.13.tar.xz +0 -0
  175. data/ports/archives/libxslt-1.1.35.tar.xz +0 -0
  176. metadata +278 -362
  177. data/.autotest +0 -26
  178. data/.gemtest +0 -0
  179. data/.travis.yml +0 -27
  180. data/CHANGELOG.ja.rdoc +0 -819
  181. data/CHANGELOG.rdoc +0 -819
  182. data/C_CODING_STYLE.rdoc +0 -33
  183. data/Manifest.txt +0 -315
  184. data/README.ja.rdoc +0 -106
  185. data/README.rdoc +0 -175
  186. data/ROADMAP.md +0 -90
  187. data/Rakefile +0 -246
  188. data/STANDARD_RESPONSES.md +0 -47
  189. data/Y_U_NO_GEMSPEC.md +0 -155
  190. data/build_all +0 -105
  191. data/ext/nokogiri/html_document.c +0 -170
  192. data/ext/nokogiri/html_document.h +0 -10
  193. data/ext/nokogiri/html_element_description.c +0 -279
  194. data/ext/nokogiri/html_element_description.h +0 -10
  195. data/ext/nokogiri/html_entity_lookup.c +0 -32
  196. data/ext/nokogiri/html_entity_lookup.h +0 -8
  197. data/ext/nokogiri/html_sax_parser_context.c +0 -116
  198. data/ext/nokogiri/html_sax_parser_context.h +0 -11
  199. data/ext/nokogiri/html_sax_push_parser.c +0 -87
  200. data/ext/nokogiri/html_sax_push_parser.h +0 -9
  201. data/ext/nokogiri/xml_attr.h +0 -9
  202. data/ext/nokogiri/xml_attribute_decl.h +0 -9
  203. data/ext/nokogiri/xml_cdata.h +0 -9
  204. data/ext/nokogiri/xml_comment.h +0 -9
  205. data/ext/nokogiri/xml_document.h +0 -23
  206. data/ext/nokogiri/xml_document_fragment.h +0 -10
  207. data/ext/nokogiri/xml_dtd.h +0 -10
  208. data/ext/nokogiri/xml_element_content.h +0 -10
  209. data/ext/nokogiri/xml_element_decl.h +0 -9
  210. data/ext/nokogiri/xml_encoding_handler.h +0 -8
  211. data/ext/nokogiri/xml_entity_decl.h +0 -10
  212. data/ext/nokogiri/xml_entity_reference.h +0 -9
  213. data/ext/nokogiri/xml_io.c +0 -56
  214. data/ext/nokogiri/xml_io.h +0 -11
  215. data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
  216. data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
  217. data/ext/nokogiri/xml_namespace.h +0 -13
  218. data/ext/nokogiri/xml_node.h +0 -13
  219. data/ext/nokogiri/xml_node_set.h +0 -14
  220. data/ext/nokogiri/xml_processing_instruction.h +0 -9
  221. data/ext/nokogiri/xml_reader.h +0 -10
  222. data/ext/nokogiri/xml_relax_ng.h +0 -9
  223. data/ext/nokogiri/xml_sax_parser.h +0 -39
  224. data/ext/nokogiri/xml_sax_parser_context.h +0 -10
  225. data/ext/nokogiri/xml_sax_push_parser.h +0 -9
  226. data/ext/nokogiri/xml_schema.h +0 -9
  227. data/ext/nokogiri/xml_syntax_error.h +0 -13
  228. data/ext/nokogiri/xml_text.h +0 -9
  229. data/ext/nokogiri/xml_xpath_context.h +0 -10
  230. data/ext/nokogiri/xslt_stylesheet.h +0 -14
  231. data/lib/nokogiri/html/document.rb +0 -254
  232. data/lib/nokogiri/html/document_fragment.rb +0 -41
  233. data/lib/nokogiri/html/element_description_defaults.rb +0 -671
  234. data/lib/nokogiri/html/sax/parser_context.rb +0 -16
  235. data/lib/nokogiri/html/sax/push_parser.rb +0 -16
  236. data/ports/archives/libxml2-2.8.0.tar.gz +0 -0
  237. data/ports/archives/libxslt-1.1.26.tar.gz +0 -0
  238. data/tasks/cross_compile.rb +0 -132
  239. data/tasks/nokogiri.org.rb +0 -24
  240. data/tasks/test.rb +0 -95
  241. data/test/css/test_nthiness.rb +0 -159
  242. data/test/css/test_parser.rb +0 -341
  243. data/test/css/test_tokenizer.rb +0 -198
  244. data/test/css/test_xpath_visitor.rb +0 -91
  245. data/test/decorators/test_slop.rb +0 -16
  246. data/test/files/2ch.html +0 -108
  247. data/test/files/address_book.rlx +0 -12
  248. data/test/files/address_book.xml +0 -10
  249. data/test/files/bar/bar.xsd +0 -4
  250. data/test/files/bogus.xml +0 -0
  251. data/test/files/dont_hurt_em_why.xml +0 -422
  252. data/test/files/encoding.html +0 -82
  253. data/test/files/encoding.xhtml +0 -84
  254. data/test/files/exslt.xml +0 -8
  255. data/test/files/exslt.xslt +0 -35
  256. data/test/files/foo/foo.xsd +0 -4
  257. data/test/files/metacharset.html +0 -10
  258. data/test/files/noencoding.html +0 -47
  259. data/test/files/po.xml +0 -32
  260. data/test/files/po.xsd +0 -66
  261. data/test/files/saml/saml20assertion_schema.xsd +0 -283
  262. data/test/files/saml/saml20protocol_schema.xsd +0 -302
  263. data/test/files/saml/xenc_schema.xsd +0 -146
  264. data/test/files/saml/xmldsig_schema.xsd +0 -318
  265. data/test/files/shift_jis.html +0 -10
  266. data/test/files/shift_jis.xml +0 -5
  267. data/test/files/snuggles.xml +0 -3
  268. data/test/files/staff.dtd +0 -10
  269. data/test/files/staff.xml +0 -59
  270. data/test/files/staff.xslt +0 -32
  271. data/test/files/test_document_url/bar.xml +0 -2
  272. data/test/files/test_document_url/document.dtd +0 -4
  273. data/test/files/test_document_url/document.xml +0 -6
  274. data/test/files/tlm.html +0 -850
  275. data/test/files/to_be_xincluded.xml +0 -2
  276. data/test/files/valid_bar.xml +0 -2
  277. data/test/files/xinclude.xml +0 -4
  278. data/test/helper.rb +0 -154
  279. data/test/html/sax/test_parser.rb +0 -141
  280. data/test/html/sax/test_parser_context.rb +0 -46
  281. data/test/html/test_builder.rb +0 -164
  282. data/test/html/test_document.rb +0 -552
  283. data/test/html/test_document_encoding.rb +0 -138
  284. data/test/html/test_document_fragment.rb +0 -261
  285. data/test/html/test_element_description.rb +0 -105
  286. data/test/html/test_named_characters.rb +0 -14
  287. data/test/html/test_node.rb +0 -196
  288. data/test/html/test_node_encoding.rb +0 -27
  289. data/test/namespaces/test_additional_namespaces_in_builder_doc.rb +0 -14
  290. data/test/namespaces/test_namespaces_in_builder_doc.rb +0 -75
  291. data/test/namespaces/test_namespaces_in_created_doc.rb +0 -75
  292. data/test/namespaces/test_namespaces_in_parsed_doc.rb +0 -66
  293. data/test/test_convert_xpath.rb +0 -135
  294. data/test/test_css_cache.rb +0 -45
  295. data/test/test_encoding_handler.rb +0 -46
  296. data/test/test_memory_leak.rb +0 -156
  297. data/test/test_nokogiri.rb +0 -132
  298. data/test/test_reader.rb +0 -555
  299. data/test/test_soap4r_sax.rb +0 -52
  300. data/test/test_xslt_transforms.rb +0 -254
  301. data/test/xml/node/test_save_options.rb +0 -28
  302. data/test/xml/node/test_subclass.rb +0 -44
  303. data/test/xml/sax/test_parser.rb +0 -366
  304. data/test/xml/sax/test_parser_context.rb +0 -106
  305. data/test/xml/sax/test_push_parser.rb +0 -157
  306. data/test/xml/test_attr.rb +0 -64
  307. data/test/xml/test_attribute_decl.rb +0 -86
  308. data/test/xml/test_builder.rb +0 -306
  309. data/test/xml/test_c14n.rb +0 -151
  310. data/test/xml/test_cdata.rb +0 -48
  311. data/test/xml/test_comment.rb +0 -29
  312. data/test/xml/test_document.rb +0 -828
  313. data/test/xml/test_document_encoding.rb +0 -28
  314. data/test/xml/test_document_fragment.rb +0 -223
  315. data/test/xml/test_dtd.rb +0 -103
  316. data/test/xml/test_dtd_encoding.rb +0 -33
  317. data/test/xml/test_element_content.rb +0 -56
  318. data/test/xml/test_element_decl.rb +0 -73
  319. data/test/xml/test_entity_decl.rb +0 -122
  320. data/test/xml/test_entity_reference.rb +0 -245
  321. data/test/xml/test_namespace.rb +0 -95
  322. data/test/xml/test_node.rb +0 -1137
  323. data/test/xml/test_node_attributes.rb +0 -96
  324. data/test/xml/test_node_encoding.rb +0 -107
  325. data/test/xml/test_node_inheritance.rb +0 -32
  326. data/test/xml/test_node_reparenting.rb +0 -374
  327. data/test/xml/test_node_set.rb +0 -755
  328. data/test/xml/test_parse_options.rb +0 -64
  329. data/test/xml/test_processing_instruction.rb +0 -30
  330. data/test/xml/test_reader_encoding.rb +0 -142
  331. data/test/xml/test_relax_ng.rb +0 -60
  332. data/test/xml/test_schema.rb +0 -103
  333. data/test/xml/test_syntax_error.rb +0 -12
  334. data/test/xml/test_text.rb +0 -45
  335. data/test/xml/test_unparented_node.rb +0 -422
  336. data/test/xml/test_xinclude.rb +0 -83
  337. data/test/xml/test_xpath.rb +0 -295
  338. data/test/xslt/test_custom_functions.rb +0 -133
  339. data/test/xslt/test_exception_handling.rb +0 -37
  340. data/test_all +0 -81
@@ -0,0 +1,169 @@
1
+ %{
2
+ #include "tag_lookup.h"
3
+ #include "macros.h"
4
+ #include "ascii.h"
5
+ %}
6
+
7
+ %ignore-case
8
+ %struct-type
9
+ %omit-struct-type
10
+ %compare-lengths
11
+ %readonly-tables
12
+ %null-strings
13
+ %includes
14
+ %define lookup-function-name gumbo_tag_lookup
15
+ %define slot-name key
16
+ %define initializer-suffix ,GUMBO_TAG_UNKNOWN
17
+ TagHashSlot;
18
+
19
+ %%
20
+ html, GUMBO_TAG_HTML
21
+ head, GUMBO_TAG_HEAD
22
+ title, GUMBO_TAG_TITLE
23
+ base, GUMBO_TAG_BASE
24
+ link, GUMBO_TAG_LINK
25
+ meta, GUMBO_TAG_META
26
+ style, GUMBO_TAG_STYLE
27
+ script, GUMBO_TAG_SCRIPT
28
+ noscript, GUMBO_TAG_NOSCRIPT
29
+ template, GUMBO_TAG_TEMPLATE
30
+ body, GUMBO_TAG_BODY
31
+ article, GUMBO_TAG_ARTICLE
32
+ section, GUMBO_TAG_SECTION
33
+ nav, GUMBO_TAG_NAV
34
+ aside, GUMBO_TAG_ASIDE
35
+ h1, GUMBO_TAG_H1
36
+ h2, GUMBO_TAG_H2
37
+ h3, GUMBO_TAG_H3
38
+ h4, GUMBO_TAG_H4
39
+ h5, GUMBO_TAG_H5
40
+ h6, GUMBO_TAG_H6
41
+ hgroup, GUMBO_TAG_HGROUP
42
+ header, GUMBO_TAG_HEADER
43
+ footer, GUMBO_TAG_FOOTER
44
+ address, GUMBO_TAG_ADDRESS
45
+ p, GUMBO_TAG_P
46
+ hr, GUMBO_TAG_HR
47
+ pre, GUMBO_TAG_PRE
48
+ blockquote, GUMBO_TAG_BLOCKQUOTE
49
+ ol, GUMBO_TAG_OL
50
+ ul, GUMBO_TAG_UL
51
+ li, GUMBO_TAG_LI
52
+ dl, GUMBO_TAG_DL
53
+ dt, GUMBO_TAG_DT
54
+ dd, GUMBO_TAG_DD
55
+ figure, GUMBO_TAG_FIGURE
56
+ figcaption, GUMBO_TAG_FIGCAPTION
57
+ main, GUMBO_TAG_MAIN
58
+ div, GUMBO_TAG_DIV
59
+ a, GUMBO_TAG_A
60
+ em, GUMBO_TAG_EM
61
+ strong, GUMBO_TAG_STRONG
62
+ small, GUMBO_TAG_SMALL
63
+ s, GUMBO_TAG_S
64
+ cite, GUMBO_TAG_CITE
65
+ q, GUMBO_TAG_Q
66
+ dfn, GUMBO_TAG_DFN
67
+ abbr, GUMBO_TAG_ABBR
68
+ data, GUMBO_TAG_DATA
69
+ time, GUMBO_TAG_TIME
70
+ code, GUMBO_TAG_CODE
71
+ var, GUMBO_TAG_VAR
72
+ samp, GUMBO_TAG_SAMP
73
+ kbd, GUMBO_TAG_KBD
74
+ sub, GUMBO_TAG_SUB
75
+ sup, GUMBO_TAG_SUP
76
+ i, GUMBO_TAG_I
77
+ b, GUMBO_TAG_B
78
+ u, GUMBO_TAG_U
79
+ mark, GUMBO_TAG_MARK
80
+ ruby, GUMBO_TAG_RUBY
81
+ rt, GUMBO_TAG_RT
82
+ rp, GUMBO_TAG_RP
83
+ bdi, GUMBO_TAG_BDI
84
+ bdo, GUMBO_TAG_BDO
85
+ span, GUMBO_TAG_SPAN
86
+ br, GUMBO_TAG_BR
87
+ wbr, GUMBO_TAG_WBR
88
+ ins, GUMBO_TAG_INS
89
+ del, GUMBO_TAG_DEL
90
+ image, GUMBO_TAG_IMAGE
91
+ img, GUMBO_TAG_IMG
92
+ iframe, GUMBO_TAG_IFRAME
93
+ embed, GUMBO_TAG_EMBED
94
+ object, GUMBO_TAG_OBJECT
95
+ param, GUMBO_TAG_PARAM
96
+ video, GUMBO_TAG_VIDEO
97
+ audio, GUMBO_TAG_AUDIO
98
+ source, GUMBO_TAG_SOURCE
99
+ track, GUMBO_TAG_TRACK
100
+ canvas, GUMBO_TAG_CANVAS
101
+ map, GUMBO_TAG_MAP
102
+ area, GUMBO_TAG_AREA
103
+ math, GUMBO_TAG_MATH
104
+ mi, GUMBO_TAG_MI
105
+ mo, GUMBO_TAG_MO
106
+ mn, GUMBO_TAG_MN
107
+ ms, GUMBO_TAG_MS
108
+ mtext, GUMBO_TAG_MTEXT
109
+ mglyph, GUMBO_TAG_MGLYPH
110
+ malignmark, GUMBO_TAG_MALIGNMARK
111
+ annotation-xml, GUMBO_TAG_ANNOTATION_XML
112
+ svg, GUMBO_TAG_SVG
113
+ foreignobject, GUMBO_TAG_FOREIGNOBJECT
114
+ desc, GUMBO_TAG_DESC
115
+ table, GUMBO_TAG_TABLE
116
+ caption, GUMBO_TAG_CAPTION
117
+ colgroup, GUMBO_TAG_COLGROUP
118
+ col, GUMBO_TAG_COL
119
+ tbody, GUMBO_TAG_TBODY
120
+ thead, GUMBO_TAG_THEAD
121
+ tfoot, GUMBO_TAG_TFOOT
122
+ tr, GUMBO_TAG_TR
123
+ td, GUMBO_TAG_TD
124
+ th, GUMBO_TAG_TH
125
+ form, GUMBO_TAG_FORM
126
+ fieldset, GUMBO_TAG_FIELDSET
127
+ legend, GUMBO_TAG_LEGEND
128
+ label, GUMBO_TAG_LABEL
129
+ input, GUMBO_TAG_INPUT
130
+ button, GUMBO_TAG_BUTTON
131
+ select, GUMBO_TAG_SELECT
132
+ datalist, GUMBO_TAG_DATALIST
133
+ optgroup, GUMBO_TAG_OPTGROUP
134
+ option, GUMBO_TAG_OPTION
135
+ textarea, GUMBO_TAG_TEXTAREA
136
+ keygen, GUMBO_TAG_KEYGEN
137
+ output, GUMBO_TAG_OUTPUT
138
+ progress, GUMBO_TAG_PROGRESS
139
+ meter, GUMBO_TAG_METER
140
+ details, GUMBO_TAG_DETAILS
141
+ summary, GUMBO_TAG_SUMMARY
142
+ menu, GUMBO_TAG_MENU
143
+ menuitem, GUMBO_TAG_MENUITEM
144
+ applet, GUMBO_TAG_APPLET
145
+ acronym, GUMBO_TAG_ACRONYM
146
+ bgsound, GUMBO_TAG_BGSOUND
147
+ dir, GUMBO_TAG_DIR
148
+ frame, GUMBO_TAG_FRAME
149
+ frameset, GUMBO_TAG_FRAMESET
150
+ noframes, GUMBO_TAG_NOFRAMES
151
+ listing, GUMBO_TAG_LISTING
152
+ xmp, GUMBO_TAG_XMP
153
+ nextid, GUMBO_TAG_NEXTID
154
+ noembed, GUMBO_TAG_NOEMBED
155
+ plaintext, GUMBO_TAG_PLAINTEXT
156
+ rb, GUMBO_TAG_RB
157
+ strike, GUMBO_TAG_STRIKE
158
+ basefont, GUMBO_TAG_BASEFONT
159
+ big, GUMBO_TAG_BIG
160
+ blink, GUMBO_TAG_BLINK
161
+ center, GUMBO_TAG_CENTER
162
+ font, GUMBO_TAG_FONT
163
+ marquee, GUMBO_TAG_MARQUEE
164
+ multicol, GUMBO_TAG_MULTICOL
165
+ nobr, GUMBO_TAG_NOBR
166
+ spacer, GUMBO_TAG_SPACER
167
+ tt, GUMBO_TAG_TT
168
+ rtc, GUMBO_TAG_RTC
169
+ dialog, GUMBO_TAG_DIALOG
@@ -0,0 +1,13 @@
1
+ #ifndef GUMBO_TAG_LOOKUP_H_
2
+ #define GUMBO_TAG_LOOKUP_H_
3
+
4
+ #include "gumbo.h"
5
+
6
+ typedef struct {
7
+ const char *key;
8
+ const GumboTag tag;
9
+ } TagHashSlot;
10
+
11
+ const TagHashSlot *gumbo_tag_lookup(const char *str, size_t len);
12
+
13
+ #endif // GUMBO_TAG_LOOKUP_H_
@@ -0,0 +1,79 @@
1
+ /*
2
+ Copyright 2018 Stephen Checkoway
3
+
4
+ Licensed under the Apache License, Version 2.0 (the "License");
5
+ you may not use this file except in compliance with the License.
6
+ You may obtain a copy of the License at
7
+
8
+ https://www.apache.org/licenses/LICENSE-2.0
9
+
10
+ Unless required by applicable law or agreed to in writing, software
11
+ distributed under the License is distributed on an "AS IS" BASIS,
12
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ See the License for the specific language governing permissions and
14
+ limitations under the License.
15
+ */
16
+
17
+ #include <assert.h>
18
+
19
+ #include "ascii.h"
20
+ #include "token_buffer.h"
21
+ #include "tokenizer.h"
22
+ #include "util.h"
23
+
24
+ struct GumboInternalCharacterToken {
25
+ GumboSourcePosition position;
26
+ GumboStringPiece original_text;
27
+ int c;
28
+ };
29
+
30
+ void gumbo_character_token_buffer_init(GumboCharacterTokenBuffer* buffer) {
31
+ buffer->data = NULL;
32
+ buffer->length = 0;
33
+ buffer->capacity = 0;
34
+ }
35
+
36
+ void gumbo_character_token_buffer_append (
37
+ const GumboToken* token,
38
+ GumboCharacterTokenBuffer* buffer
39
+ ) {
40
+ assert(token->type == GUMBO_TOKEN_WHITESPACE
41
+ || token->type == GUMBO_TOKEN_CHARACTER);
42
+ if (buffer->length == buffer->capacity) {
43
+ if (buffer->capacity == 0)
44
+ buffer->capacity = 10;
45
+ else
46
+ buffer->capacity *= 2;
47
+ size_t bytes = sizeof(*buffer->data) * buffer->capacity;
48
+ buffer->data = gumbo_realloc(buffer->data, bytes);
49
+ }
50
+ size_t index = buffer->length++;
51
+ buffer->data[index].position = token->position;
52
+ buffer->data[index].original_text = token->original_text;
53
+ buffer->data[index].c = token->v.character;
54
+ }
55
+
56
+ void gumbo_character_token_buffer_get (
57
+ const GumboCharacterTokenBuffer* buffer,
58
+ size_t index,
59
+ struct GumboInternalToken* output
60
+ ) {
61
+ assert(index < buffer->length);
62
+ int c = buffer->data[index].c;
63
+ output->type = gumbo_ascii_isspace(c)?
64
+ GUMBO_TOKEN_WHITESPACE : GUMBO_TOKEN_CHARACTER;
65
+ output->position = buffer->data[index].position;
66
+ output->original_text = buffer->data[index].original_text;
67
+ output->v.character = c;
68
+ }
69
+
70
+ void gumbo_character_token_buffer_clear(GumboCharacterTokenBuffer* buffer) {
71
+ buffer->length = 0;
72
+ }
73
+
74
+ void gumbo_character_token_buffer_destroy(GumboCharacterTokenBuffer* buffer) {
75
+ gumbo_free(buffer->data);
76
+ buffer->data = NULL;
77
+ buffer->length = 0;
78
+ buffer->capacity = 0;
79
+ }
@@ -0,0 +1,71 @@
1
+ /*
2
+ Copyright 2018 Stephen Checkoway
3
+
4
+ Licensed under the Apache License, Version 2.0 (the "License");
5
+ you may not use this file except in compliance with the License.
6
+ You may obtain a copy of the License at
7
+
8
+ https://www.apache.org/licenses/LICENSE-2.0
9
+
10
+ Unless required by applicable law or agreed to in writing, software
11
+ distributed under the License is distributed on an "AS IS" BASIS,
12
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ See the License for the specific language governing permissions and
14
+ limitations under the License.
15
+ */
16
+
17
+ #ifndef GUMBO_TOKEN_BUFFER_H
18
+ #define GUMBO_TOKEN_BUFFER_H
19
+
20
+ #include <stdbool.h>
21
+ #include <stddef.h>
22
+
23
+ #include "gumbo.h"
24
+
25
+ #ifdef __cplusplus
26
+ extern "C" {
27
+ #endif
28
+
29
+ struct GumboInternalCharacterToken;
30
+ struct GumboInternalToken;
31
+
32
+ // A struct representing a growable sequence of character (and whitespace)
33
+ // tokens.
34
+ typedef struct {
35
+ // A pointer to the start of the sequence.
36
+ struct GumboInternalCharacterToken* data;
37
+
38
+ // The length of the sequence.
39
+ size_t length;
40
+
41
+ // The capacity of the buffer.
42
+ size_t capacity;
43
+ } GumboCharacterTokenBuffer;
44
+
45
+ // Initializes a new GumboCharacterTokenBuffer.
46
+ void gumbo_character_token_buffer_init(GumboCharacterTokenBuffer* buffer);
47
+
48
+ // Appends a character (or whitespace) token.
49
+ void gumbo_character_token_buffer_append (
50
+ const struct GumboInternalToken* token,
51
+ GumboCharacterTokenBuffer* buffer
52
+ );
53
+
54
+ void gumbo_character_token_buffer_get (
55
+ const GumboCharacterTokenBuffer* buffer,
56
+ size_t index,
57
+ struct GumboInternalToken* output
58
+ );
59
+
60
+ // Reinitialize this string buffer. This clears it by setting length=0. It
61
+ // does not zero out the buffer itself.
62
+ void gumbo_character_token_buffer_clear(GumboCharacterTokenBuffer* buffer);
63
+
64
+ // Deallocates this GumboCharacterTokenBuffer.
65
+ void gumbo_character_token_buffer_destroy(GumboCharacterTokenBuffer* buffer);
66
+
67
+ #ifdef __cplusplus
68
+ }
69
+ #endif
70
+
71
+ #endif // GUMBO_TOKEN_BUFFER_H
@@ -0,0 +1,17 @@
1
+ #ifndef GUMBO_TOKEN_TYPE_H_
2
+ #define GUMBO_TOKEN_TYPE_H_
3
+
4
+ // An enum representing the type of token.
5
+ typedef enum {
6
+ GUMBO_TOKEN_DOCTYPE,
7
+ GUMBO_TOKEN_START_TAG,
8
+ GUMBO_TOKEN_END_TAG,
9
+ GUMBO_TOKEN_COMMENT,
10
+ GUMBO_TOKEN_WHITESPACE,
11
+ GUMBO_TOKEN_CHARACTER,
12
+ GUMBO_TOKEN_CDATA,
13
+ GUMBO_TOKEN_NULL,
14
+ GUMBO_TOKEN_EOF
15
+ } GumboTokenType;
16
+
17
+ #endif // GUMBO_TOKEN_TYPE_H_