nokogiri 1.5.10 → 1.13.0
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +7 -0
- data/Gemfile +5 -0
- data/LICENSE-DEPENDENCIES.md +1903 -0
- data/LICENSE.md +9 -0
- data/README.md +280 -0
- data/bin/nokogiri +84 -31
- data/dependencies.yml +73 -0
- data/ext/nokogiri/depend +38 -358
- data/ext/nokogiri/extconf.rb +956 -100
- data/ext/nokogiri/gumbo.c +584 -0
- data/ext/nokogiri/html4_document.c +166 -0
- data/ext/nokogiri/html4_element_description.c +294 -0
- data/ext/nokogiri/html4_entity_lookup.c +37 -0
- data/ext/nokogiri/html4_sax_parser_context.c +120 -0
- data/ext/nokogiri/html4_sax_push_parser.c +95 -0
- data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
- data/ext/nokogiri/nokogiri.c +232 -87
- data/ext/nokogiri/nokogiri.h +188 -129
- data/ext/nokogiri/test_global_handlers.c +40 -0
- data/ext/nokogiri/xml_attr.c +49 -40
- data/ext/nokogiri/xml_attribute_decl.c +18 -18
- data/ext/nokogiri/xml_cdata.c +24 -23
- data/ext/nokogiri/xml_comment.c +29 -21
- data/ext/nokogiri/xml_document.c +327 -223
- data/ext/nokogiri/xml_document_fragment.c +12 -16
- data/ext/nokogiri/xml_dtd.c +56 -50
- data/ext/nokogiri/xml_element_content.c +31 -26
- data/ext/nokogiri/xml_element_decl.c +22 -22
- data/ext/nokogiri/xml_encoding_handler.c +45 -20
- data/ext/nokogiri/xml_entity_decl.c +32 -30
- data/ext/nokogiri/xml_entity_reference.c +16 -18
- data/ext/nokogiri/xml_namespace.c +74 -32
- data/ext/nokogiri/xml_node.c +1290 -680
- data/ext/nokogiri/xml_node_set.c +239 -208
- data/ext/nokogiri/xml_processing_instruction.c +17 -19
- data/ext/nokogiri/xml_reader.c +227 -189
- data/ext/nokogiri/xml_relax_ng.c +52 -28
- data/ext/nokogiri/xml_sax_parser.c +123 -125
- data/ext/nokogiri/xml_sax_parser_context.c +138 -79
- data/ext/nokogiri/xml_sax_push_parser.c +88 -35
- data/ext/nokogiri/xml_schema.c +112 -33
- data/ext/nokogiri/xml_syntax_error.c +50 -23
- data/ext/nokogiri/xml_text.c +14 -18
- data/ext/nokogiri/xml_xpath_context.c +227 -140
- data/ext/nokogiri/xslt_stylesheet.c +162 -168
- data/gumbo-parser/CHANGES.md +63 -0
- data/gumbo-parser/Makefile +101 -0
- data/gumbo-parser/THANKS +27 -0
- data/gumbo-parser/src/Makefile +34 -0
- data/gumbo-parser/src/README.md +41 -0
- data/gumbo-parser/src/ascii.c +75 -0
- data/gumbo-parser/src/ascii.h +115 -0
- data/gumbo-parser/src/attribute.c +42 -0
- data/gumbo-parser/src/attribute.h +17 -0
- data/gumbo-parser/src/char_ref.c +22225 -0
- data/gumbo-parser/src/char_ref.h +29 -0
- data/gumbo-parser/src/char_ref.rl +2154 -0
- data/gumbo-parser/src/error.c +626 -0
- data/gumbo-parser/src/error.h +148 -0
- data/gumbo-parser/src/foreign_attrs.c +104 -0
- data/gumbo-parser/src/foreign_attrs.gperf +27 -0
- data/gumbo-parser/src/gumbo.h +943 -0
- data/gumbo-parser/src/insertion_mode.h +33 -0
- data/gumbo-parser/src/macros.h +91 -0
- data/gumbo-parser/src/parser.c +4875 -0
- data/gumbo-parser/src/parser.h +41 -0
- data/gumbo-parser/src/replacement.h +33 -0
- data/gumbo-parser/src/string_buffer.c +103 -0
- data/gumbo-parser/src/string_buffer.h +68 -0
- data/gumbo-parser/src/string_piece.c +48 -0
- data/gumbo-parser/src/svg_attrs.c +174 -0
- data/gumbo-parser/src/svg_attrs.gperf +77 -0
- data/gumbo-parser/src/svg_tags.c +137 -0
- data/gumbo-parser/src/svg_tags.gperf +55 -0
- data/gumbo-parser/src/tag.c +222 -0
- data/gumbo-parser/src/tag_lookup.c +382 -0
- data/gumbo-parser/src/tag_lookup.gperf +169 -0
- data/gumbo-parser/src/tag_lookup.h +13 -0
- data/gumbo-parser/src/token_buffer.c +79 -0
- data/gumbo-parser/src/token_buffer.h +71 -0
- data/gumbo-parser/src/token_type.h +17 -0
- data/gumbo-parser/src/tokenizer.c +3463 -0
- data/gumbo-parser/src/tokenizer.h +112 -0
- data/gumbo-parser/src/tokenizer_states.h +339 -0
- data/gumbo-parser/src/utf8.c +245 -0
- data/gumbo-parser/src/utf8.h +164 -0
- data/gumbo-parser/src/util.c +68 -0
- data/gumbo-parser/src/util.h +30 -0
- data/gumbo-parser/src/vector.c +111 -0
- data/gumbo-parser/src/vector.h +45 -0
- data/lib/nokogiri/class_resolver.rb +67 -0
- data/lib/nokogiri/css/node.rb +10 -58
- data/lib/nokogiri/css/parser.rb +327 -288
- data/lib/nokogiri/css/parser.y +67 -45
- data/lib/nokogiri/css/parser_extras.rb +52 -49
- data/lib/nokogiri/css/syntax_error.rb +3 -1
- data/lib/nokogiri/css/tokenizer.rb +107 -104
- data/lib/nokogiri/css/tokenizer.rex +7 -6
- data/lib/nokogiri/css/xpath_visitor.rb +263 -75
- data/lib/nokogiri/css.rb +50 -17
- data/lib/nokogiri/decorators/slop.rb +17 -8
- data/lib/nokogiri/extension.rb +31 -0
- data/lib/nokogiri/gumbo.rb +15 -0
- data/lib/nokogiri/html.rb +38 -27
- data/lib/nokogiri/{html → html4}/builder.rb +4 -2
- data/lib/nokogiri/html4/document.rb +331 -0
- data/lib/nokogiri/html4/document_fragment.rb +54 -0
- data/lib/nokogiri/{html → html4}/element_description.rb +3 -1
- data/lib/nokogiri/html4/element_description_defaults.rb +578 -0
- data/lib/nokogiri/{html → html4}/entity_lookup.rb +4 -2
- data/lib/nokogiri/{html → html4}/sax/parser.rb +24 -15
- data/lib/nokogiri/html4/sax/parser_context.rb +20 -0
- data/lib/nokogiri/html4/sax/push_parser.rb +37 -0
- data/lib/nokogiri/html4.rb +46 -0
- data/lib/nokogiri/html5/document.rb +88 -0
- data/lib/nokogiri/html5/document_fragment.rb +83 -0
- data/lib/nokogiri/html5/node.rb +96 -0
- data/lib/nokogiri/html5.rb +477 -0
- data/lib/nokogiri/jruby/dependencies.rb +21 -0
- data/lib/nokogiri/syntax_error.rb +2 -0
- data/lib/nokogiri/version/constant.rb +6 -0
- data/lib/nokogiri/version/info.rb +221 -0
- data/lib/nokogiri/version.rb +3 -90
- data/lib/nokogiri/xml/attr.rb +6 -3
- data/lib/nokogiri/xml/attribute_decl.rb +3 -1
- data/lib/nokogiri/xml/builder.rb +96 -54
- data/lib/nokogiri/xml/cdata.rb +3 -1
- data/lib/nokogiri/xml/character_data.rb +2 -0
- data/lib/nokogiri/xml/document.rb +234 -95
- data/lib/nokogiri/xml/document_fragment.rb +86 -36
- data/lib/nokogiri/xml/dtd.rb +16 -4
- data/lib/nokogiri/xml/element_content.rb +2 -0
- data/lib/nokogiri/xml/element_decl.rb +3 -1
- data/lib/nokogiri/xml/entity_decl.rb +4 -2
- data/lib/nokogiri/xml/entity_reference.rb +20 -0
- data/lib/nokogiri/xml/namespace.rb +3 -0
- data/lib/nokogiri/xml/node/save_options.rb +8 -4
- data/lib/nokogiri/xml/node.rb +947 -502
- data/lib/nokogiri/xml/node_set.rb +168 -159
- data/lib/nokogiri/xml/notation.rb +13 -0
- data/lib/nokogiri/xml/parse_options.rb +40 -5
- data/lib/nokogiri/xml/pp/character_data.rb +9 -6
- data/lib/nokogiri/xml/pp/node.rb +25 -26
- data/lib/nokogiri/xml/pp.rb +4 -2
- data/lib/nokogiri/xml/processing_instruction.rb +3 -1
- data/lib/nokogiri/xml/reader.rb +23 -28
- data/lib/nokogiri/xml/relax_ng.rb +8 -2
- data/lib/nokogiri/xml/sax/document.rb +45 -49
- data/lib/nokogiri/xml/sax/parser.rb +43 -41
- data/lib/nokogiri/xml/sax/parser_context.rb +8 -3
- data/lib/nokogiri/xml/sax/push_parser.rb +6 -5
- data/lib/nokogiri/xml/sax.rb +6 -4
- data/lib/nokogiri/xml/schema.rb +19 -9
- data/lib/nokogiri/xml/searchable.rb +259 -0
- data/lib/nokogiri/xml/syntax_error.rb +25 -1
- data/lib/nokogiri/xml/text.rb +2 -0
- data/lib/nokogiri/xml/xpath/syntax_error.rb +4 -2
- data/lib/nokogiri/xml/xpath.rb +15 -4
- data/lib/nokogiri/xml/xpath_context.rb +3 -3
- data/lib/nokogiri/xml.rb +38 -36
- data/lib/nokogiri/xslt/stylesheet.rb +3 -1
- data/lib/nokogiri/xslt.rb +18 -16
- data/lib/nokogiri.rb +69 -69
- data/lib/xsd/xmlparser/nokogiri.rb +26 -24
- data/patches/libxml2/0001-Remove-script-macro-support.patch +40 -0
- data/patches/libxml2/0002-Update-entities-to-remove-handling-of-ssi.patch +44 -0
- data/patches/libxml2/0003-libxml2.la-is-in-top_builddir.patch +25 -0
- data/patches/libxml2/0004-use-glibc-strlen.patch +53 -0
- data/patches/libxml2/0005-avoid-isnan-isinf.patch +81 -0
- data/patches/libxml2/0006-update-automake-files-for-arm64.patch +2511 -0
- data/patches/libxml2/0007-Fix-XPath-recursion-limit.patch +31 -0
- data/patches/libxml2/0008-htmlParseComment-handle-abruptly-closed-comments.patch +61 -0
- data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
- data/patches/libxslt/0001-update-automake-files-for-arm64.patch +2511 -0
- data/patches/libxslt/0002-Fix-xml2-config-check-in-configure-script.patch +19 -0
- data/ports/archives/libxml2-2.9.12.tar.gz +0 -0
- data/ports/archives/libxslt-1.1.34.tar.gz +0 -0
- metadata +382 -460
- data/.autotest +0 -26
- data/.gemtest +0 -0
- data/CHANGELOG.ja.rdoc +0 -785
- data/CHANGELOG.rdoc +0 -783
- data/C_CODING_STYLE.rdoc +0 -33
- data/Manifest.txt +0 -303
- data/README.ja.rdoc +0 -106
- data/README.rdoc +0 -175
- data/ROADMAP.md +0 -90
- data/Rakefile +0 -228
- data/STANDARD_RESPONSES.md +0 -47
- data/Y_U_NO_GEMSPEC.md +0 -155
- data/build_all +0 -105
- data/ext/nokogiri/html_document.c +0 -170
- data/ext/nokogiri/html_document.h +0 -10
- data/ext/nokogiri/html_element_description.c +0 -279
- data/ext/nokogiri/html_element_description.h +0 -10
- data/ext/nokogiri/html_entity_lookup.c +0 -32
- data/ext/nokogiri/html_entity_lookup.h +0 -8
- data/ext/nokogiri/html_sax_parser_context.c +0 -116
- data/ext/nokogiri/html_sax_parser_context.h +0 -11
- data/ext/nokogiri/html_sax_push_parser.c +0 -87
- data/ext/nokogiri/html_sax_push_parser.h +0 -9
- data/ext/nokogiri/xml_attr.h +0 -9
- data/ext/nokogiri/xml_attribute_decl.h +0 -9
- data/ext/nokogiri/xml_cdata.h +0 -9
- data/ext/nokogiri/xml_comment.h +0 -9
- data/ext/nokogiri/xml_document.h +0 -23
- data/ext/nokogiri/xml_document_fragment.h +0 -10
- data/ext/nokogiri/xml_dtd.h +0 -10
- data/ext/nokogiri/xml_element_content.h +0 -10
- data/ext/nokogiri/xml_element_decl.h +0 -9
- data/ext/nokogiri/xml_encoding_handler.h +0 -8
- data/ext/nokogiri/xml_entity_decl.h +0 -10
- data/ext/nokogiri/xml_entity_reference.h +0 -9
- data/ext/nokogiri/xml_io.c +0 -56
- data/ext/nokogiri/xml_io.h +0 -11
- data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
- data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
- data/ext/nokogiri/xml_namespace.h +0 -13
- data/ext/nokogiri/xml_node.h +0 -13
- data/ext/nokogiri/xml_node_set.h +0 -14
- data/ext/nokogiri/xml_processing_instruction.h +0 -9
- data/ext/nokogiri/xml_reader.h +0 -10
- data/ext/nokogiri/xml_relax_ng.h +0 -9
- data/ext/nokogiri/xml_sax_parser.h +0 -39
- data/ext/nokogiri/xml_sax_parser_context.h +0 -10
- data/ext/nokogiri/xml_sax_push_parser.h +0 -9
- data/ext/nokogiri/xml_schema.h +0 -9
- data/ext/nokogiri/xml_syntax_error.h +0 -13
- data/ext/nokogiri/xml_text.h +0 -9
- data/ext/nokogiri/xml_xpath_context.h +0 -10
- data/ext/nokogiri/xslt_stylesheet.h +0 -14
- data/lib/nokogiri/html/document.rb +0 -254
- data/lib/nokogiri/html/document_fragment.rb +0 -41
- data/lib/nokogiri/html/element_description_defaults.rb +0 -671
- data/lib/nokogiri/html/sax/parser_context.rb +0 -16
- data/lib/nokogiri/html/sax/push_parser.rb +0 -16
- data/tasks/cross_compile.rb +0 -150
- data/tasks/nokogiri.org.rb +0 -24
- data/tasks/test.rb +0 -95
- data/test/css/test_nthiness.rb +0 -159
- data/test/css/test_parser.rb +0 -341
- data/test/css/test_tokenizer.rb +0 -198
- data/test/css/test_xpath_visitor.rb +0 -91
- data/test/decorators/test_slop.rb +0 -16
- data/test/files/2ch.html +0 -108
- data/test/files/address_book.rlx +0 -12
- data/test/files/address_book.xml +0 -10
- data/test/files/bar/bar.xsd +0 -4
- data/test/files/dont_hurt_em_why.xml +0 -422
- data/test/files/encoding.html +0 -82
- data/test/files/encoding.xhtml +0 -84
- data/test/files/exslt.xml +0 -8
- data/test/files/exslt.xslt +0 -35
- data/test/files/foo/foo.xsd +0 -4
- data/test/files/metacharset.html +0 -10
- data/test/files/noencoding.html +0 -47
- data/test/files/po.xml +0 -32
- data/test/files/po.xsd +0 -66
- data/test/files/shift_jis.html +0 -10
- data/test/files/shift_jis.xml +0 -5
- data/test/files/snuggles.xml +0 -3
- data/test/files/staff.dtd +0 -10
- data/test/files/staff.xml +0 -59
- data/test/files/staff.xslt +0 -32
- data/test/files/test_document_url/bar.xml +0 -2
- data/test/files/test_document_url/document.dtd +0 -4
- data/test/files/test_document_url/document.xml +0 -6
- data/test/files/tlm.html +0 -850
- data/test/files/to_be_xincluded.xml +0 -2
- data/test/files/valid_bar.xml +0 -2
- data/test/files/xinclude.xml +0 -4
- data/test/helper.rb +0 -154
- data/test/html/sax/test_parser.rb +0 -141
- data/test/html/sax/test_parser_context.rb +0 -46
- data/test/html/test_builder.rb +0 -164
- data/test/html/test_document.rb +0 -552
- data/test/html/test_document_encoding.rb +0 -138
- data/test/html/test_document_fragment.rb +0 -261
- data/test/html/test_element_description.rb +0 -105
- data/test/html/test_named_characters.rb +0 -14
- data/test/html/test_node.rb +0 -196
- data/test/html/test_node_encoding.rb +0 -27
- data/test/namespaces/test_additional_namespaces_in_builder_doc.rb +0 -14
- data/test/namespaces/test_namespaces_in_builder_doc.rb +0 -75
- data/test/namespaces/test_namespaces_in_created_doc.rb +0 -75
- data/test/namespaces/test_namespaces_in_parsed_doc.rb +0 -66
- data/test/test_convert_xpath.rb +0 -135
- data/test/test_css_cache.rb +0 -45
- data/test/test_encoding_handler.rb +0 -46
- data/test/test_memory_leak.rb +0 -156
- data/test/test_nokogiri.rb +0 -132
- data/test/test_reader.rb +0 -555
- data/test/test_soap4r_sax.rb +0 -52
- data/test/test_xslt_transforms.rb +0 -254
- data/test/xml/node/test_save_options.rb +0 -28
- data/test/xml/node/test_subclass.rb +0 -44
- data/test/xml/sax/test_parser.rb +0 -366
- data/test/xml/sax/test_parser_context.rb +0 -106
- data/test/xml/sax/test_push_parser.rb +0 -157
- data/test/xml/test_attr.rb +0 -64
- data/test/xml/test_attribute_decl.rb +0 -86
- data/test/xml/test_builder.rb +0 -306
- data/test/xml/test_c14n.rb +0 -151
- data/test/xml/test_cdata.rb +0 -48
- data/test/xml/test_comment.rb +0 -29
- data/test/xml/test_document.rb +0 -828
- data/test/xml/test_document_encoding.rb +0 -28
- data/test/xml/test_document_fragment.rb +0 -223
- data/test/xml/test_dtd.rb +0 -103
- data/test/xml/test_dtd_encoding.rb +0 -33
- data/test/xml/test_element_content.rb +0 -56
- data/test/xml/test_element_decl.rb +0 -73
- data/test/xml/test_entity_decl.rb +0 -122
- data/test/xml/test_entity_reference.rb +0 -245
- data/test/xml/test_namespace.rb +0 -95
- data/test/xml/test_node.rb +0 -1137
- data/test/xml/test_node_attributes.rb +0 -96
- data/test/xml/test_node_encoding.rb +0 -107
- data/test/xml/test_node_inheritance.rb +0 -32
- data/test/xml/test_node_reparenting.rb +0 -374
- data/test/xml/test_node_set.rb +0 -755
- data/test/xml/test_parse_options.rb +0 -64
- data/test/xml/test_processing_instruction.rb +0 -30
- data/test/xml/test_reader_encoding.rb +0 -142
- data/test/xml/test_relax_ng.rb +0 -60
- data/test/xml/test_schema.rb +0 -103
- data/test/xml/test_syntax_error.rb +0 -12
- data/test/xml/test_text.rb +0 -45
- data/test/xml/test_unparented_node.rb +0 -422
- data/test/xml/test_xinclude.rb +0 -83
- data/test/xml/test_xpath.rb +0 -295
- data/test/xslt/test_custom_functions.rb +0 -133
- data/test/xslt/test_exception_handling.rb +0 -37
- data/test_all +0 -81
@@ -0,0 +1,169 @@
|
|
1
|
+
%{
|
2
|
+
#include "tag_lookup.h"
|
3
|
+
#include "macros.h"
|
4
|
+
#include "ascii.h"
|
5
|
+
%}
|
6
|
+
|
7
|
+
%ignore-case
|
8
|
+
%struct-type
|
9
|
+
%omit-struct-type
|
10
|
+
%compare-lengths
|
11
|
+
%readonly-tables
|
12
|
+
%null-strings
|
13
|
+
%includes
|
14
|
+
%define lookup-function-name gumbo_tag_lookup
|
15
|
+
%define slot-name key
|
16
|
+
%define initializer-suffix ,GUMBO_TAG_UNKNOWN
|
17
|
+
TagHashSlot;
|
18
|
+
|
19
|
+
%%
|
20
|
+
html, GUMBO_TAG_HTML
|
21
|
+
head, GUMBO_TAG_HEAD
|
22
|
+
title, GUMBO_TAG_TITLE
|
23
|
+
base, GUMBO_TAG_BASE
|
24
|
+
link, GUMBO_TAG_LINK
|
25
|
+
meta, GUMBO_TAG_META
|
26
|
+
style, GUMBO_TAG_STYLE
|
27
|
+
script, GUMBO_TAG_SCRIPT
|
28
|
+
noscript, GUMBO_TAG_NOSCRIPT
|
29
|
+
template, GUMBO_TAG_TEMPLATE
|
30
|
+
body, GUMBO_TAG_BODY
|
31
|
+
article, GUMBO_TAG_ARTICLE
|
32
|
+
section, GUMBO_TAG_SECTION
|
33
|
+
nav, GUMBO_TAG_NAV
|
34
|
+
aside, GUMBO_TAG_ASIDE
|
35
|
+
h1, GUMBO_TAG_H1
|
36
|
+
h2, GUMBO_TAG_H2
|
37
|
+
h3, GUMBO_TAG_H3
|
38
|
+
h4, GUMBO_TAG_H4
|
39
|
+
h5, GUMBO_TAG_H5
|
40
|
+
h6, GUMBO_TAG_H6
|
41
|
+
hgroup, GUMBO_TAG_HGROUP
|
42
|
+
header, GUMBO_TAG_HEADER
|
43
|
+
footer, GUMBO_TAG_FOOTER
|
44
|
+
address, GUMBO_TAG_ADDRESS
|
45
|
+
p, GUMBO_TAG_P
|
46
|
+
hr, GUMBO_TAG_HR
|
47
|
+
pre, GUMBO_TAG_PRE
|
48
|
+
blockquote, GUMBO_TAG_BLOCKQUOTE
|
49
|
+
ol, GUMBO_TAG_OL
|
50
|
+
ul, GUMBO_TAG_UL
|
51
|
+
li, GUMBO_TAG_LI
|
52
|
+
dl, GUMBO_TAG_DL
|
53
|
+
dt, GUMBO_TAG_DT
|
54
|
+
dd, GUMBO_TAG_DD
|
55
|
+
figure, GUMBO_TAG_FIGURE
|
56
|
+
figcaption, GUMBO_TAG_FIGCAPTION
|
57
|
+
main, GUMBO_TAG_MAIN
|
58
|
+
div, GUMBO_TAG_DIV
|
59
|
+
a, GUMBO_TAG_A
|
60
|
+
em, GUMBO_TAG_EM
|
61
|
+
strong, GUMBO_TAG_STRONG
|
62
|
+
small, GUMBO_TAG_SMALL
|
63
|
+
s, GUMBO_TAG_S
|
64
|
+
cite, GUMBO_TAG_CITE
|
65
|
+
q, GUMBO_TAG_Q
|
66
|
+
dfn, GUMBO_TAG_DFN
|
67
|
+
abbr, GUMBO_TAG_ABBR
|
68
|
+
data, GUMBO_TAG_DATA
|
69
|
+
time, GUMBO_TAG_TIME
|
70
|
+
code, GUMBO_TAG_CODE
|
71
|
+
var, GUMBO_TAG_VAR
|
72
|
+
samp, GUMBO_TAG_SAMP
|
73
|
+
kbd, GUMBO_TAG_KBD
|
74
|
+
sub, GUMBO_TAG_SUB
|
75
|
+
sup, GUMBO_TAG_SUP
|
76
|
+
i, GUMBO_TAG_I
|
77
|
+
b, GUMBO_TAG_B
|
78
|
+
u, GUMBO_TAG_U
|
79
|
+
mark, GUMBO_TAG_MARK
|
80
|
+
ruby, GUMBO_TAG_RUBY
|
81
|
+
rt, GUMBO_TAG_RT
|
82
|
+
rp, GUMBO_TAG_RP
|
83
|
+
bdi, GUMBO_TAG_BDI
|
84
|
+
bdo, GUMBO_TAG_BDO
|
85
|
+
span, GUMBO_TAG_SPAN
|
86
|
+
br, GUMBO_TAG_BR
|
87
|
+
wbr, GUMBO_TAG_WBR
|
88
|
+
ins, GUMBO_TAG_INS
|
89
|
+
del, GUMBO_TAG_DEL
|
90
|
+
image, GUMBO_TAG_IMAGE
|
91
|
+
img, GUMBO_TAG_IMG
|
92
|
+
iframe, GUMBO_TAG_IFRAME
|
93
|
+
embed, GUMBO_TAG_EMBED
|
94
|
+
object, GUMBO_TAG_OBJECT
|
95
|
+
param, GUMBO_TAG_PARAM
|
96
|
+
video, GUMBO_TAG_VIDEO
|
97
|
+
audio, GUMBO_TAG_AUDIO
|
98
|
+
source, GUMBO_TAG_SOURCE
|
99
|
+
track, GUMBO_TAG_TRACK
|
100
|
+
canvas, GUMBO_TAG_CANVAS
|
101
|
+
map, GUMBO_TAG_MAP
|
102
|
+
area, GUMBO_TAG_AREA
|
103
|
+
math, GUMBO_TAG_MATH
|
104
|
+
mi, GUMBO_TAG_MI
|
105
|
+
mo, GUMBO_TAG_MO
|
106
|
+
mn, GUMBO_TAG_MN
|
107
|
+
ms, GUMBO_TAG_MS
|
108
|
+
mtext, GUMBO_TAG_MTEXT
|
109
|
+
mglyph, GUMBO_TAG_MGLYPH
|
110
|
+
malignmark, GUMBO_TAG_MALIGNMARK
|
111
|
+
annotation-xml, GUMBO_TAG_ANNOTATION_XML
|
112
|
+
svg, GUMBO_TAG_SVG
|
113
|
+
foreignobject, GUMBO_TAG_FOREIGNOBJECT
|
114
|
+
desc, GUMBO_TAG_DESC
|
115
|
+
table, GUMBO_TAG_TABLE
|
116
|
+
caption, GUMBO_TAG_CAPTION
|
117
|
+
colgroup, GUMBO_TAG_COLGROUP
|
118
|
+
col, GUMBO_TAG_COL
|
119
|
+
tbody, GUMBO_TAG_TBODY
|
120
|
+
thead, GUMBO_TAG_THEAD
|
121
|
+
tfoot, GUMBO_TAG_TFOOT
|
122
|
+
tr, GUMBO_TAG_TR
|
123
|
+
td, GUMBO_TAG_TD
|
124
|
+
th, GUMBO_TAG_TH
|
125
|
+
form, GUMBO_TAG_FORM
|
126
|
+
fieldset, GUMBO_TAG_FIELDSET
|
127
|
+
legend, GUMBO_TAG_LEGEND
|
128
|
+
label, GUMBO_TAG_LABEL
|
129
|
+
input, GUMBO_TAG_INPUT
|
130
|
+
button, GUMBO_TAG_BUTTON
|
131
|
+
select, GUMBO_TAG_SELECT
|
132
|
+
datalist, GUMBO_TAG_DATALIST
|
133
|
+
optgroup, GUMBO_TAG_OPTGROUP
|
134
|
+
option, GUMBO_TAG_OPTION
|
135
|
+
textarea, GUMBO_TAG_TEXTAREA
|
136
|
+
keygen, GUMBO_TAG_KEYGEN
|
137
|
+
output, GUMBO_TAG_OUTPUT
|
138
|
+
progress, GUMBO_TAG_PROGRESS
|
139
|
+
meter, GUMBO_TAG_METER
|
140
|
+
details, GUMBO_TAG_DETAILS
|
141
|
+
summary, GUMBO_TAG_SUMMARY
|
142
|
+
menu, GUMBO_TAG_MENU
|
143
|
+
menuitem, GUMBO_TAG_MENUITEM
|
144
|
+
applet, GUMBO_TAG_APPLET
|
145
|
+
acronym, GUMBO_TAG_ACRONYM
|
146
|
+
bgsound, GUMBO_TAG_BGSOUND
|
147
|
+
dir, GUMBO_TAG_DIR
|
148
|
+
frame, GUMBO_TAG_FRAME
|
149
|
+
frameset, GUMBO_TAG_FRAMESET
|
150
|
+
noframes, GUMBO_TAG_NOFRAMES
|
151
|
+
listing, GUMBO_TAG_LISTING
|
152
|
+
xmp, GUMBO_TAG_XMP
|
153
|
+
nextid, GUMBO_TAG_NEXTID
|
154
|
+
noembed, GUMBO_TAG_NOEMBED
|
155
|
+
plaintext, GUMBO_TAG_PLAINTEXT
|
156
|
+
rb, GUMBO_TAG_RB
|
157
|
+
strike, GUMBO_TAG_STRIKE
|
158
|
+
basefont, GUMBO_TAG_BASEFONT
|
159
|
+
big, GUMBO_TAG_BIG
|
160
|
+
blink, GUMBO_TAG_BLINK
|
161
|
+
center, GUMBO_TAG_CENTER
|
162
|
+
font, GUMBO_TAG_FONT
|
163
|
+
marquee, GUMBO_TAG_MARQUEE
|
164
|
+
multicol, GUMBO_TAG_MULTICOL
|
165
|
+
nobr, GUMBO_TAG_NOBR
|
166
|
+
spacer, GUMBO_TAG_SPACER
|
167
|
+
tt, GUMBO_TAG_TT
|
168
|
+
rtc, GUMBO_TAG_RTC
|
169
|
+
dialog, GUMBO_TAG_DIALOG
|
@@ -0,0 +1,13 @@
|
|
1
|
+
#ifndef GUMBO_TAG_LOOKUP_H_
|
2
|
+
#define GUMBO_TAG_LOOKUP_H_
|
3
|
+
|
4
|
+
#include "gumbo.h"
|
5
|
+
|
6
|
+
typedef struct {
|
7
|
+
const char *key;
|
8
|
+
const GumboTag tag;
|
9
|
+
} TagHashSlot;
|
10
|
+
|
11
|
+
const TagHashSlot *gumbo_tag_lookup(const char *str, size_t len);
|
12
|
+
|
13
|
+
#endif // GUMBO_TAG_LOOKUP_H_
|
@@ -0,0 +1,79 @@
|
|
1
|
+
/*
|
2
|
+
Copyright 2018 Stephen Checkoway
|
3
|
+
|
4
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
5
|
+
you may not use this file except in compliance with the License.
|
6
|
+
You may obtain a copy of the License at
|
7
|
+
|
8
|
+
https://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
|
10
|
+
Unless required by applicable law or agreed to in writing, software
|
11
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13
|
+
See the License for the specific language governing permissions and
|
14
|
+
limitations under the License.
|
15
|
+
*/
|
16
|
+
|
17
|
+
#include <assert.h>
|
18
|
+
|
19
|
+
#include "ascii.h"
|
20
|
+
#include "token_buffer.h"
|
21
|
+
#include "tokenizer.h"
|
22
|
+
#include "util.h"
|
23
|
+
|
24
|
+
struct GumboInternalCharacterToken {
|
25
|
+
GumboSourcePosition position;
|
26
|
+
GumboStringPiece original_text;
|
27
|
+
int c;
|
28
|
+
};
|
29
|
+
|
30
|
+
void gumbo_character_token_buffer_init(GumboCharacterTokenBuffer* buffer) {
|
31
|
+
buffer->data = NULL;
|
32
|
+
buffer->length = 0;
|
33
|
+
buffer->capacity = 0;
|
34
|
+
}
|
35
|
+
|
36
|
+
void gumbo_character_token_buffer_append (
|
37
|
+
const GumboToken* token,
|
38
|
+
GumboCharacterTokenBuffer* buffer
|
39
|
+
) {
|
40
|
+
assert(token->type == GUMBO_TOKEN_WHITESPACE
|
41
|
+
|| token->type == GUMBO_TOKEN_CHARACTER);
|
42
|
+
if (buffer->length == buffer->capacity) {
|
43
|
+
if (buffer->capacity == 0)
|
44
|
+
buffer->capacity = 10;
|
45
|
+
else
|
46
|
+
buffer->capacity *= 2;
|
47
|
+
size_t bytes = sizeof(*buffer->data) * buffer->capacity;
|
48
|
+
buffer->data = gumbo_realloc(buffer->data, bytes);
|
49
|
+
}
|
50
|
+
size_t index = buffer->length++;
|
51
|
+
buffer->data[index].position = token->position;
|
52
|
+
buffer->data[index].original_text = token->original_text;
|
53
|
+
buffer->data[index].c = token->v.character;
|
54
|
+
}
|
55
|
+
|
56
|
+
void gumbo_character_token_buffer_get (
|
57
|
+
const GumboCharacterTokenBuffer* buffer,
|
58
|
+
size_t index,
|
59
|
+
struct GumboInternalToken* output
|
60
|
+
) {
|
61
|
+
assert(index < buffer->length);
|
62
|
+
int c = buffer->data[index].c;
|
63
|
+
output->type = gumbo_ascii_isspace(c)?
|
64
|
+
GUMBO_TOKEN_WHITESPACE : GUMBO_TOKEN_CHARACTER;
|
65
|
+
output->position = buffer->data[index].position;
|
66
|
+
output->original_text = buffer->data[index].original_text;
|
67
|
+
output->v.character = c;
|
68
|
+
}
|
69
|
+
|
70
|
+
void gumbo_character_token_buffer_clear(GumboCharacterTokenBuffer* buffer) {
|
71
|
+
buffer->length = 0;
|
72
|
+
}
|
73
|
+
|
74
|
+
void gumbo_character_token_buffer_destroy(GumboCharacterTokenBuffer* buffer) {
|
75
|
+
gumbo_free(buffer->data);
|
76
|
+
buffer->data = NULL;
|
77
|
+
buffer->length = 0;
|
78
|
+
buffer->capacity = 0;
|
79
|
+
}
|
@@ -0,0 +1,71 @@
|
|
1
|
+
/*
|
2
|
+
Copyright 2018 Stephen Checkoway
|
3
|
+
|
4
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
5
|
+
you may not use this file except in compliance with the License.
|
6
|
+
You may obtain a copy of the License at
|
7
|
+
|
8
|
+
https://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
|
10
|
+
Unless required by applicable law or agreed to in writing, software
|
11
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13
|
+
See the License for the specific language governing permissions and
|
14
|
+
limitations under the License.
|
15
|
+
*/
|
16
|
+
|
17
|
+
#ifndef GUMBO_TOKEN_BUFFER_H
|
18
|
+
#define GUMBO_TOKEN_BUFFER_H
|
19
|
+
|
20
|
+
#include <stdbool.h>
|
21
|
+
#include <stddef.h>
|
22
|
+
|
23
|
+
#include "gumbo.h"
|
24
|
+
|
25
|
+
#ifdef __cplusplus
|
26
|
+
extern "C" {
|
27
|
+
#endif
|
28
|
+
|
29
|
+
struct GumboInternalCharacterToken;
|
30
|
+
struct GumboInternalToken;
|
31
|
+
|
32
|
+
// A struct representing a growable sequence of character (and whitespace)
|
33
|
+
// tokens.
|
34
|
+
typedef struct {
|
35
|
+
// A pointer to the start of the sequence.
|
36
|
+
struct GumboInternalCharacterToken* data;
|
37
|
+
|
38
|
+
// The length of the sequence.
|
39
|
+
size_t length;
|
40
|
+
|
41
|
+
// The capacity of the buffer.
|
42
|
+
size_t capacity;
|
43
|
+
} GumboCharacterTokenBuffer;
|
44
|
+
|
45
|
+
// Initializes a new GumboCharacterTokenBuffer.
|
46
|
+
void gumbo_character_token_buffer_init(GumboCharacterTokenBuffer* buffer);
|
47
|
+
|
48
|
+
// Appends a character (or whitespace) token.
|
49
|
+
void gumbo_character_token_buffer_append (
|
50
|
+
const struct GumboInternalToken* token,
|
51
|
+
GumboCharacterTokenBuffer* buffer
|
52
|
+
);
|
53
|
+
|
54
|
+
void gumbo_character_token_buffer_get (
|
55
|
+
const GumboCharacterTokenBuffer* buffer,
|
56
|
+
size_t index,
|
57
|
+
struct GumboInternalToken* output
|
58
|
+
);
|
59
|
+
|
60
|
+
// Reinitialize this string buffer. This clears it by setting length=0. It
|
61
|
+
// does not zero out the buffer itself.
|
62
|
+
void gumbo_character_token_buffer_clear(GumboCharacterTokenBuffer* buffer);
|
63
|
+
|
64
|
+
// Deallocates this GumboCharacterTokenBuffer.
|
65
|
+
void gumbo_character_token_buffer_destroy(GumboCharacterTokenBuffer* buffer);
|
66
|
+
|
67
|
+
#ifdef __cplusplus
|
68
|
+
}
|
69
|
+
#endif
|
70
|
+
|
71
|
+
#endif // GUMBO_TOKEN_BUFFER_H
|
@@ -0,0 +1,17 @@
|
|
1
|
+
#ifndef GUMBO_TOKEN_TYPE_H_
|
2
|
+
#define GUMBO_TOKEN_TYPE_H_
|
3
|
+
|
4
|
+
// An enum representing the type of token.
|
5
|
+
typedef enum {
|
6
|
+
GUMBO_TOKEN_DOCTYPE,
|
7
|
+
GUMBO_TOKEN_START_TAG,
|
8
|
+
GUMBO_TOKEN_END_TAG,
|
9
|
+
GUMBO_TOKEN_COMMENT,
|
10
|
+
GUMBO_TOKEN_WHITESPACE,
|
11
|
+
GUMBO_TOKEN_CHARACTER,
|
12
|
+
GUMBO_TOKEN_CDATA,
|
13
|
+
GUMBO_TOKEN_NULL,
|
14
|
+
GUMBO_TOKEN_EOF
|
15
|
+
} GumboTokenType;
|
16
|
+
|
17
|
+
#endif // GUMBO_TOKEN_TYPE_H_
|