nokogiri 1.9.1 → 1.15.3
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/Gemfile +45 -0
- data/LICENSE-DEPENDENCIES.md +1636 -1024
- data/LICENSE.md +5 -28
- data/README.md +203 -89
- data/bin/nokogiri +63 -50
- data/dependencies.yml +33 -61
- data/ext/nokogiri/depend +38 -358
- data/ext/nokogiri/extconf.rb +864 -418
- data/ext/nokogiri/gumbo.c +594 -0
- data/ext/nokogiri/html4_document.c +165 -0
- data/ext/nokogiri/html4_element_description.c +299 -0
- data/ext/nokogiri/html4_entity_lookup.c +37 -0
- data/ext/nokogiri/html4_sax_parser_context.c +108 -0
- data/ext/nokogiri/html4_sax_push_parser.c +95 -0
- data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
- data/ext/nokogiri/nokogiri.c +251 -105
- data/ext/nokogiri/nokogiri.h +215 -90
- data/ext/nokogiri/test_global_handlers.c +40 -0
- data/ext/nokogiri/xml_attr.c +17 -17
- data/ext/nokogiri/xml_attribute_decl.c +22 -22
- data/ext/nokogiri/xml_cdata.c +40 -31
- data/ext/nokogiri/xml_comment.c +20 -27
- data/ext/nokogiri/xml_document.c +401 -240
- data/ext/nokogiri/xml_document_fragment.c +13 -17
- data/ext/nokogiri/xml_dtd.c +64 -58
- data/ext/nokogiri/xml_element_content.c +63 -55
- data/ext/nokogiri/xml_element_decl.c +31 -31
- data/ext/nokogiri/xml_encoding_handler.c +54 -21
- data/ext/nokogiri/xml_entity_decl.c +37 -35
- data/ext/nokogiri/xml_entity_reference.c +17 -19
- data/ext/nokogiri/xml_namespace.c +135 -61
- data/ext/nokogiri/xml_node.c +1346 -677
- data/ext/nokogiri/xml_node_set.c +246 -216
- data/ext/nokogiri/xml_processing_instruction.c +18 -20
- data/ext/nokogiri/xml_reader.c +347 -212
- data/ext/nokogiri/xml_relax_ng.c +86 -77
- data/ext/nokogiri/xml_sax_parser.c +149 -124
- data/ext/nokogiri/xml_sax_parser_context.c +145 -103
- data/ext/nokogiri/xml_sax_push_parser.c +64 -36
- data/ext/nokogiri/xml_schema.c +138 -81
- data/ext/nokogiri/xml_syntax_error.c +42 -21
- data/ext/nokogiri/xml_text.c +36 -26
- data/ext/nokogiri/xml_xpath_context.c +366 -178
- data/ext/nokogiri/xslt_stylesheet.c +335 -189
- data/gumbo-parser/CHANGES.md +63 -0
- data/gumbo-parser/Makefile +111 -0
- data/gumbo-parser/THANKS +27 -0
- data/gumbo-parser/src/Makefile +34 -0
- data/gumbo-parser/src/README.md +41 -0
- data/gumbo-parser/src/ascii.c +75 -0
- data/gumbo-parser/src/ascii.h +115 -0
- data/gumbo-parser/src/attribute.c +42 -0
- data/gumbo-parser/src/attribute.h +17 -0
- data/gumbo-parser/src/char_ref.c +22225 -0
- data/gumbo-parser/src/char_ref.h +29 -0
- data/gumbo-parser/src/char_ref.rl +2154 -0
- data/gumbo-parser/src/error.c +630 -0
- data/gumbo-parser/src/error.h +148 -0
- data/gumbo-parser/src/foreign_attrs.c +103 -0
- data/gumbo-parser/src/foreign_attrs.gperf +27 -0
- data/gumbo-parser/src/insertion_mode.h +33 -0
- data/gumbo-parser/src/macros.h +91 -0
- data/gumbo-parser/src/nokogiri_gumbo.h +944 -0
- data/gumbo-parser/src/parser.c +4891 -0
- data/gumbo-parser/src/parser.h +41 -0
- data/gumbo-parser/src/replacement.h +33 -0
- data/gumbo-parser/src/string_buffer.c +103 -0
- data/gumbo-parser/src/string_buffer.h +68 -0
- data/gumbo-parser/src/string_piece.c +48 -0
- data/gumbo-parser/src/svg_attrs.c +174 -0
- data/gumbo-parser/src/svg_attrs.gperf +77 -0
- data/gumbo-parser/src/svg_tags.c +137 -0
- data/gumbo-parser/src/svg_tags.gperf +55 -0
- data/gumbo-parser/src/tag.c +223 -0
- data/gumbo-parser/src/tag_lookup.c +382 -0
- data/gumbo-parser/src/tag_lookup.gperf +170 -0
- data/gumbo-parser/src/tag_lookup.h +13 -0
- data/gumbo-parser/src/token_buffer.c +79 -0
- data/gumbo-parser/src/token_buffer.h +71 -0
- data/gumbo-parser/src/token_type.h +17 -0
- data/gumbo-parser/src/tokenizer.c +3463 -0
- data/gumbo-parser/src/tokenizer.h +112 -0
- data/gumbo-parser/src/tokenizer_states.h +339 -0
- data/gumbo-parser/src/utf8.c +245 -0
- data/gumbo-parser/src/utf8.h +164 -0
- data/gumbo-parser/src/util.c +66 -0
- data/gumbo-parser/src/util.h +34 -0
- data/gumbo-parser/src/vector.c +111 -0
- data/gumbo-parser/src/vector.h +45 -0
- data/lib/nokogiri/class_resolver.rb +67 -0
- data/lib/nokogiri/css/node.rb +10 -8
- data/lib/nokogiri/css/parser.rb +397 -377
- data/lib/nokogiri/css/parser.y +250 -245
- data/lib/nokogiri/css/parser_extras.rb +54 -49
- data/lib/nokogiri/css/syntax_error.rb +3 -1
- data/lib/nokogiri/css/tokenizer.rb +107 -104
- data/lib/nokogiri/css/tokenizer.rex +3 -2
- data/lib/nokogiri/css/xpath_visitor.rb +224 -95
- data/lib/nokogiri/css.rb +56 -17
- data/lib/nokogiri/decorators/slop.rb +9 -7
- data/lib/nokogiri/encoding_handler.rb +57 -0
- data/lib/nokogiri/extension.rb +32 -0
- data/lib/nokogiri/gumbo.rb +15 -0
- data/lib/nokogiri/html.rb +38 -27
- data/lib/nokogiri/{html → html4}/builder.rb +4 -2
- data/lib/nokogiri/html4/document.rb +214 -0
- data/lib/nokogiri/html4/document_fragment.rb +54 -0
- data/lib/nokogiri/{html → html4}/element_description.rb +3 -1
- data/lib/nokogiri/html4/element_description_defaults.rb +2040 -0
- data/lib/nokogiri/html4/encoding_reader.rb +121 -0
- data/lib/nokogiri/{html → html4}/entity_lookup.rb +4 -2
- data/lib/nokogiri/{html → html4}/sax/parser.rb +17 -16
- data/lib/nokogiri/html4/sax/parser_context.rb +20 -0
- data/lib/nokogiri/{html → html4}/sax/push_parser.rb +12 -11
- data/lib/nokogiri/html4.rb +47 -0
- data/lib/nokogiri/html5/document.rb +168 -0
- data/lib/nokogiri/html5/document_fragment.rb +90 -0
- data/lib/nokogiri/html5/node.rb +103 -0
- data/lib/nokogiri/html5.rb +392 -0
- data/lib/nokogiri/jruby/dependencies.rb +3 -0
- data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
- data/lib/nokogiri/syntax_error.rb +2 -0
- data/lib/nokogiri/version/constant.rb +6 -0
- data/lib/nokogiri/version/info.rb +223 -0
- data/lib/nokogiri/version.rb +3 -108
- data/lib/nokogiri/xml/attr.rb +55 -3
- data/lib/nokogiri/xml/attribute_decl.rb +6 -2
- data/lib/nokogiri/xml/builder.rb +98 -54
- data/lib/nokogiri/xml/cdata.rb +3 -1
- data/lib/nokogiri/xml/character_data.rb +2 -0
- data/lib/nokogiri/xml/document.rb +312 -126
- data/lib/nokogiri/xml/document_fragment.rb +93 -48
- data/lib/nokogiri/xml/dtd.rb +4 -2
- data/lib/nokogiri/xml/element_content.rb +12 -2
- data/lib/nokogiri/xml/element_decl.rb +6 -2
- data/lib/nokogiri/xml/entity_decl.rb +7 -3
- data/lib/nokogiri/xml/entity_reference.rb +2 -0
- data/lib/nokogiri/xml/namespace.rb +45 -0
- data/lib/nokogiri/xml/node/save_options.rb +23 -8
- data/lib/nokogiri/xml/node.rb +1088 -418
- data/lib/nokogiri/xml/node_set.rb +173 -63
- data/lib/nokogiri/xml/notation.rb +13 -0
- data/lib/nokogiri/xml/parse_options.rb +145 -52
- data/lib/nokogiri/xml/pp/character_data.rb +9 -6
- data/lib/nokogiri/xml/pp/node.rb +42 -30
- data/lib/nokogiri/xml/pp.rb +4 -2
- data/lib/nokogiri/xml/processing_instruction.rb +4 -1
- data/lib/nokogiri/xml/reader.rb +21 -28
- data/lib/nokogiri/xml/relax_ng.rb +8 -2
- data/lib/nokogiri/xml/sax/document.rb +45 -49
- data/lib/nokogiri/xml/sax/parser.rb +39 -36
- data/lib/nokogiri/xml/sax/parser_context.rb +8 -3
- data/lib/nokogiri/xml/sax/push_parser.rb +6 -5
- data/lib/nokogiri/xml/sax.rb +6 -4
- data/lib/nokogiri/xml/schema.rb +19 -9
- data/lib/nokogiri/xml/searchable.rb +120 -72
- data/lib/nokogiri/xml/syntax_error.rb +6 -4
- data/lib/nokogiri/xml/text.rb +2 -0
- data/lib/nokogiri/xml/xpath/syntax_error.rb +4 -2
- data/lib/nokogiri/xml/xpath.rb +15 -4
- data/lib/nokogiri/xml/xpath_context.rb +3 -3
- data/lib/nokogiri/xml.rb +38 -37
- data/lib/nokogiri/xslt/stylesheet.rb +3 -1
- data/lib/nokogiri/xslt.rb +101 -22
- data/lib/nokogiri.rb +59 -75
- data/lib/xsd/xmlparser/nokogiri.rb +29 -25
- data/patches/libxml2/0001-Remove-script-macro-support.patch +40 -0
- data/patches/libxml2/0002-Update-entities-to-remove-handling-of-ssi.patch +44 -0
- data/patches/libxml2/0003-libxml2.la-is-in-top_builddir.patch +25 -0
- data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
- data/patches/libxml2/0010-update-config.guess-and-config.sub-for-libxml2.patch +224 -0
- data/patches/libxml2/0011-rip-out-libxml2-s-libc_single_threaded-support.patch +30 -0
- data/patches/libxslt/0001-update-config.guess-and-config.sub-for-libxslt.patch +224 -0
- data/ports/archives/libxml2-2.11.4.tar.xz +0 -0
- data/ports/archives/libxslt-1.1.38.tar.xz +0 -0
- metadata +128 -265
- data/ext/nokogiri/html_document.c +0 -170
- data/ext/nokogiri/html_document.h +0 -10
- data/ext/nokogiri/html_element_description.c +0 -279
- data/ext/nokogiri/html_element_description.h +0 -10
- data/ext/nokogiri/html_entity_lookup.c +0 -32
- data/ext/nokogiri/html_entity_lookup.h +0 -8
- data/ext/nokogiri/html_sax_parser_context.c +0 -116
- data/ext/nokogiri/html_sax_parser_context.h +0 -11
- data/ext/nokogiri/html_sax_push_parser.c +0 -87
- data/ext/nokogiri/html_sax_push_parser.h +0 -9
- data/ext/nokogiri/xml_attr.h +0 -9
- data/ext/nokogiri/xml_attribute_decl.h +0 -9
- data/ext/nokogiri/xml_cdata.h +0 -9
- data/ext/nokogiri/xml_comment.h +0 -9
- data/ext/nokogiri/xml_document.h +0 -23
- data/ext/nokogiri/xml_document_fragment.h +0 -10
- data/ext/nokogiri/xml_dtd.h +0 -10
- data/ext/nokogiri/xml_element_content.h +0 -10
- data/ext/nokogiri/xml_element_decl.h +0 -9
- data/ext/nokogiri/xml_encoding_handler.h +0 -8
- data/ext/nokogiri/xml_entity_decl.h +0 -10
- data/ext/nokogiri/xml_entity_reference.h +0 -9
- data/ext/nokogiri/xml_io.c +0 -61
- data/ext/nokogiri/xml_io.h +0 -11
- data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
- data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
- data/ext/nokogiri/xml_namespace.h +0 -14
- data/ext/nokogiri/xml_node.h +0 -13
- data/ext/nokogiri/xml_node_set.h +0 -12
- data/ext/nokogiri/xml_processing_instruction.h +0 -9
- data/ext/nokogiri/xml_reader.h +0 -10
- data/ext/nokogiri/xml_relax_ng.h +0 -9
- data/ext/nokogiri/xml_sax_parser.h +0 -39
- data/ext/nokogiri/xml_sax_parser_context.h +0 -10
- data/ext/nokogiri/xml_sax_push_parser.h +0 -9
- data/ext/nokogiri/xml_schema.h +0 -9
- data/ext/nokogiri/xml_syntax_error.h +0 -13
- data/ext/nokogiri/xml_text.h +0 -9
- data/ext/nokogiri/xml_xpath_context.h +0 -10
- data/ext/nokogiri/xslt_stylesheet.h +0 -14
- data/lib/nokogiri/html/document.rb +0 -335
- data/lib/nokogiri/html/document_fragment.rb +0 -49
- data/lib/nokogiri/html/element_description_defaults.rb +0 -671
- data/lib/nokogiri/html/sax/parser_context.rb +0 -16
- data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +0 -78
- data/patches/libxml2/0002-Fix-nullptr-deref-with-XPath-logic-ops.patch +0 -54
- data/patches/libxml2/0003-Fix-infinite-loop-in-LZMA-decompression.patch +0 -50
- data/ports/archives/libxml2-2.9.8.tar.gz +0 -0
- data/ports/archives/libxslt-1.1.32.tar.gz +0 -0
@@ -0,0 +1,148 @@
|
|
1
|
+
#ifndef GUMBO_ERROR_H_
|
2
|
+
#define GUMBO_ERROR_H_
|
3
|
+
|
4
|
+
#include <stdint.h>
|
5
|
+
|
6
|
+
#include "nokogiri_gumbo.h"
|
7
|
+
#include "insertion_mode.h"
|
8
|
+
#include "string_buffer.h"
|
9
|
+
#include "token_type.h"
|
10
|
+
#include "tokenizer_states.h"
|
11
|
+
|
12
|
+
#ifdef __cplusplus
|
13
|
+
extern "C" {
|
14
|
+
#endif
|
15
|
+
|
16
|
+
struct GumboInternalParser;
|
17
|
+
|
18
|
+
typedef enum {
|
19
|
+
// Defined errors.
|
20
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#parse-errors
|
21
|
+
GUMBO_ERR_ABRUPT_CLOSING_OF_EMPTY_COMMENT,
|
22
|
+
GUMBO_ERR_ABRUPT_DOCTYPE_PUBLIC_IDENTIFIER,
|
23
|
+
GUMBO_ERR_ABRUPT_DOCTYPE_SYSTEM_IDENTIFIER,
|
24
|
+
GUMBO_ERR_ABSENCE_OF_DIGITS_IN_NUMERIC_CHARACTER_REFERENCE,
|
25
|
+
GUMBO_ERR_CDATA_IN_HTML_CONTENT,
|
26
|
+
GUMBO_ERR_CHARACTER_REFERENCE_OUTSIDE_UNICODE_RANGE,
|
27
|
+
GUMBO_ERR_CONTROL_CHARACTER_IN_INPUT_STREAM,
|
28
|
+
GUMBO_ERR_CONTROL_CHARACTER_REFERENCE,
|
29
|
+
GUMBO_ERR_END_TAG_WITH_ATTRIBUTES,
|
30
|
+
GUMBO_ERR_DUPLICATE_ATTRIBUTE,
|
31
|
+
GUMBO_ERR_END_TAG_WITH_TRAILING_SOLIDUS,
|
32
|
+
GUMBO_ERR_EOF_BEFORE_TAG_NAME,
|
33
|
+
GUMBO_ERR_EOF_IN_CDATA,
|
34
|
+
GUMBO_ERR_EOF_IN_COMMENT,
|
35
|
+
GUMBO_ERR_EOF_IN_DOCTYPE,
|
36
|
+
GUMBO_ERR_EOF_IN_SCRIPT_HTML_COMMENT_LIKE_TEXT,
|
37
|
+
GUMBO_ERR_EOF_IN_TAG,
|
38
|
+
GUMBO_ERR_INCORRECTLY_CLOSED_COMMENT,
|
39
|
+
GUMBO_ERR_INCORRECTLY_OPENED_COMMENT,
|
40
|
+
GUMBO_ERR_INVALID_CHARACTER_SEQUENCE_AFTER_DOCTYPE_NAME,
|
41
|
+
GUMBO_ERR_INVALID_FIRST_CHARACTER_OF_TAG_NAME,
|
42
|
+
GUMBO_ERR_MISSING_ATTRIBUTE_VALUE,
|
43
|
+
GUMBO_ERR_MISSING_DOCTYPE_NAME,
|
44
|
+
GUMBO_ERR_MISSING_DOCTYPE_PUBLIC_IDENTIFIER,
|
45
|
+
GUMBO_ERR_MISSING_DOCTYPE_SYSTEM_IDENTIFIER,
|
46
|
+
GUMBO_ERR_MISSING_END_TAG_NAME,
|
47
|
+
GUMBO_ERR_MISSING_QUOTE_BEFORE_DOCTYPE_PUBLIC_IDENTIFIER,
|
48
|
+
GUMBO_ERR_MISSING_QUOTE_BEFORE_DOCTYPE_SYSTEM_IDENTIFIER,
|
49
|
+
GUMBO_ERR_MISSING_SEMICOLON_AFTER_CHARACTER_REFERENCE,
|
50
|
+
GUMBO_ERR_MISSING_WHITESPACE_AFTER_DOCTYPE_PUBLIC_KEYWORD,
|
51
|
+
GUMBO_ERR_MISSING_WHITESPACE_AFTER_DOCTYPE_SYSTEM_KEYWORD,
|
52
|
+
GUMBO_ERR_MISSING_WHITESPACE_BEFORE_DOCTYPE_NAME,
|
53
|
+
GUMBO_ERR_MISSING_WHITESPACE_BETWEEN_ATTRIBUTES,
|
54
|
+
GUMBO_ERR_MISSING_WHITESPACE_BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS,
|
55
|
+
GUMBO_ERR_NESTED_COMMENT,
|
56
|
+
GUMBO_ERR_NONCHARACTER_CHARACTER_REFERENCE,
|
57
|
+
GUMBO_ERR_NONCHARACTER_IN_INPUT_STREAM,
|
58
|
+
GUMBO_ERR_NON_VOID_HTML_ELEMENT_START_TAG_WITH_TRAILING_SOLIDUS,
|
59
|
+
GUMBO_ERR_NULL_CHARACTER_REFERENCE,
|
60
|
+
GUMBO_ERR_SURROGATE_CHARACTER_REFERENCE,
|
61
|
+
GUMBO_ERR_SURROGATE_IN_INPUT_STREAM,
|
62
|
+
GUMBO_ERR_UNEXPECTED_CHARACTER_AFTER_DOCTYPE_SYSTEM_IDENTIFIER,
|
63
|
+
GUMBO_ERR_UNEXPECTED_CHARACTER_IN_ATTRIBUTE_NAME,
|
64
|
+
GUMBO_ERR_UNEXPECTED_CHARACTER_IN_UNQUOTED_ATTRIBUTE_VALUE,
|
65
|
+
GUMBO_ERR_UNEXPECTED_EQUALS_SIGN_BEFORE_ATTRIBUTE_NAME,
|
66
|
+
GUMBO_ERR_UNEXPECTED_NULL_CHARACTER,
|
67
|
+
GUMBO_ERR_UNEXPECTED_QUESTION_MARK_INSTEAD_OF_TAG_NAME,
|
68
|
+
GUMBO_ERR_UNEXPECTED_SOLIDUS_IN_TAG,
|
69
|
+
GUMBO_ERR_UNKNOWN_NAMED_CHARACTER_REFERENCE,
|
70
|
+
|
71
|
+
// Encoding errors.
|
72
|
+
GUMBO_ERR_UTF8_INVALID,
|
73
|
+
GUMBO_ERR_UTF8_TRUNCATED,
|
74
|
+
|
75
|
+
// Generic parser error.
|
76
|
+
GUMBO_ERR_PARSER,
|
77
|
+
} GumboErrorType;
|
78
|
+
|
79
|
+
// Additional data for tokenizer errors.
|
80
|
+
// This records the current state and codepoint encountered - this is usually
|
81
|
+
// enough to reconstruct what went wrong and provide a friendly error message.
|
82
|
+
typedef struct GumboInternalTokenizerError {
|
83
|
+
// The bad codepoint encountered.
|
84
|
+
int codepoint;
|
85
|
+
|
86
|
+
// The state that the tokenizer was in at the time.
|
87
|
+
GumboTokenizerEnum state;
|
88
|
+
} GumboTokenizerError;
|
89
|
+
|
90
|
+
// Additional data for parse errors.
|
91
|
+
typedef struct GumboInternalParserError {
|
92
|
+
// The type of input token that resulted in this error.
|
93
|
+
GumboTokenType input_type;
|
94
|
+
|
95
|
+
// The HTML tag of the input token. TAG_UNKNOWN if this was not a tag token.
|
96
|
+
GumboTag input_tag;
|
97
|
+
|
98
|
+
// The insertion mode that the parser was in at the time.
|
99
|
+
GumboInsertionMode parser_state;
|
100
|
+
|
101
|
+
// The tag stack at the point of the error. Note that this is an GumboVector
|
102
|
+
// of GumboTag's *stored by value* - cast the void* to an GumboTag directly to
|
103
|
+
// get at the tag.
|
104
|
+
GumboVector /* GumboTag */ tag_stack;
|
105
|
+
} GumboParserError;
|
106
|
+
|
107
|
+
// The overall error struct representing an error in decoding/tokenizing/parsing
|
108
|
+
// the HTML. This contains an enumerated type flag, a source position, and then
|
109
|
+
// a union of fields containing data specific to the error.
|
110
|
+
struct GumboInternalError {
|
111
|
+
// The type of error.
|
112
|
+
GumboErrorType type;
|
113
|
+
|
114
|
+
// The position within the source file where the error occurred.
|
115
|
+
GumboSourcePosition position;
|
116
|
+
|
117
|
+
// The piece of text that caused the error.
|
118
|
+
GumboStringPiece original_text;
|
119
|
+
|
120
|
+
// Type-specific error information.
|
121
|
+
union {
|
122
|
+
// Tokenizer errors.
|
123
|
+
GumboTokenizerError tokenizer;
|
124
|
+
|
125
|
+
// Parser errors.
|
126
|
+
GumboParserError parser;
|
127
|
+
} v;
|
128
|
+
};
|
129
|
+
|
130
|
+
// Adds a new error to the parser's error list, and returns a pointer to it so
|
131
|
+
// that clients can fill out the rest of its fields. May return NULL if we're
|
132
|
+
// already over the max_errors field specified in GumboOptions.
|
133
|
+
GumboError* gumbo_add_error(struct GumboInternalParser* parser);
|
134
|
+
|
135
|
+
// Initializes the errors vector in the parser.
|
136
|
+
void gumbo_init_errors(struct GumboInternalParser* errors);
|
137
|
+
|
138
|
+
// Frees all the errors in the 'errors_' field of the parser.
|
139
|
+
void gumbo_destroy_errors(struct GumboInternalParser* errors);
|
140
|
+
|
141
|
+
// Frees the memory used for a single GumboError.
|
142
|
+
void gumbo_error_destroy(GumboError* error);
|
143
|
+
|
144
|
+
#ifdef __cplusplus
|
145
|
+
}
|
146
|
+
#endif
|
147
|
+
|
148
|
+
#endif // GUMBO_ERROR_H_
|
@@ -0,0 +1,103 @@
|
|
1
|
+
/* ANSI-C code produced by gperf version 3.1 */
|
2
|
+
/* Command-line: gperf -m100 -n src/foreign_attrs.gperf */
|
3
|
+
/* Computed positions: -k'8-9' */
|
4
|
+
/* Filtered by: gperf-filter.sed */
|
5
|
+
|
6
|
+
#include "replacement.h"
|
7
|
+
#include "macros.h"
|
8
|
+
#include <string.h>
|
9
|
+
|
10
|
+
#define TOTAL_KEYWORDS 11
|
11
|
+
#define MIN_WORD_LENGTH 5
|
12
|
+
#define MAX_WORD_LENGTH 13
|
13
|
+
#define MIN_HASH_VALUE 0
|
14
|
+
#define MAX_HASH_VALUE 10
|
15
|
+
/* maximum key range = 11, duplicates = 0 */
|
16
|
+
|
17
|
+
static inline unsigned int
|
18
|
+
hash (register const char *str, register size_t len)
|
19
|
+
{
|
20
|
+
static const unsigned char asso_values[] =
|
21
|
+
{
|
22
|
+
11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
|
23
|
+
11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
|
24
|
+
11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
|
25
|
+
11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
|
26
|
+
11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
|
27
|
+
11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
|
28
|
+
11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
|
29
|
+
11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
|
30
|
+
11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
|
31
|
+
11, 11, 11, 11, 11, 11, 11, 11, 11, 2,
|
32
|
+
11, 1, 11, 10, 4, 4, 11, 11, 3, 11,
|
33
|
+
11, 5, 3, 11, 0, 11, 2, 11, 11, 11,
|
34
|
+
11, 2, 11, 11, 11, 11, 11, 11, 11, 11,
|
35
|
+
11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
|
36
|
+
11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
|
37
|
+
11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
|
38
|
+
11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
|
39
|
+
11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
|
40
|
+
11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
|
41
|
+
11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
|
42
|
+
11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
|
43
|
+
11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
|
44
|
+
11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
|
45
|
+
11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
|
46
|
+
11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
|
47
|
+
11, 11, 11, 11, 11, 11
|
48
|
+
};
|
49
|
+
register unsigned int hval = 0;
|
50
|
+
|
51
|
+
switch (len)
|
52
|
+
{
|
53
|
+
default:
|
54
|
+
hval += asso_values[(unsigned char)str[8]];
|
55
|
+
/*FALLTHROUGH*/
|
56
|
+
case 8:
|
57
|
+
hval += asso_values[(unsigned char)str[7]];
|
58
|
+
/*FALLTHROUGH*/
|
59
|
+
case 7:
|
60
|
+
case 6:
|
61
|
+
case 5:
|
62
|
+
break;
|
63
|
+
}
|
64
|
+
return hval;
|
65
|
+
}
|
66
|
+
|
67
|
+
const ForeignAttrReplacement *
|
68
|
+
gumbo_get_foreign_attr_replacement (register const char *str, register size_t len)
|
69
|
+
{
|
70
|
+
static const unsigned char lengthtable[] =
|
71
|
+
{
|
72
|
+
5, 10, 13, 9, 13, 10, 11, 11, 10, 10, 8
|
73
|
+
};
|
74
|
+
static const ForeignAttrReplacement wordlist[] =
|
75
|
+
{
|
76
|
+
{"xmlns", "xmlns", GUMBO_ATTR_NAMESPACE_XMLNS},
|
77
|
+
{"xlink:href", "href", GUMBO_ATTR_NAMESPACE_XLINK},
|
78
|
+
{"xlink:arcrole", "arcrole", GUMBO_ATTR_NAMESPACE_XLINK},
|
79
|
+
{"xml:space", "space", GUMBO_ATTR_NAMESPACE_XML},
|
80
|
+
{"xlink:actuate", "actuate", GUMBO_ATTR_NAMESPACE_XLINK},
|
81
|
+
{"xlink:type", "type", GUMBO_ATTR_NAMESPACE_XLINK},
|
82
|
+
{"xlink:title", "title", GUMBO_ATTR_NAMESPACE_XLINK},
|
83
|
+
{"xmlns:xlink", "xlink", GUMBO_ATTR_NAMESPACE_XMLNS},
|
84
|
+
{"xlink:role", "role", GUMBO_ATTR_NAMESPACE_XLINK},
|
85
|
+
{"xlink:show", "show", GUMBO_ATTR_NAMESPACE_XLINK},
|
86
|
+
{"xml:lang", "lang", GUMBO_ATTR_NAMESPACE_XML}
|
87
|
+
};
|
88
|
+
|
89
|
+
if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
|
90
|
+
{
|
91
|
+
register unsigned int key = hash (str, len);
|
92
|
+
|
93
|
+
if (key <= MAX_HASH_VALUE)
|
94
|
+
if (len == lengthtable[key])
|
95
|
+
{
|
96
|
+
register const char *s = wordlist[key].from;
|
97
|
+
|
98
|
+
if (s && *str == *s && !memcmp (str + 1, s + 1, len - 1))
|
99
|
+
return &wordlist[key];
|
100
|
+
}
|
101
|
+
}
|
102
|
+
return 0;
|
103
|
+
}
|
@@ -0,0 +1,27 @@
|
|
1
|
+
%{
|
2
|
+
#include "replacement.h"
|
3
|
+
#include "macros.h"
|
4
|
+
%}
|
5
|
+
|
6
|
+
%struct-type
|
7
|
+
%omit-struct-type
|
8
|
+
%compare-lengths
|
9
|
+
%readonly-tables
|
10
|
+
%null-strings
|
11
|
+
%includes
|
12
|
+
%define lookup-function-name gumbo_get_foreign_attr_replacement
|
13
|
+
%define slot-name from
|
14
|
+
ForeignAttrReplacement;
|
15
|
+
|
16
|
+
%%
|
17
|
+
"xlink:actuate", "actuate", GUMBO_ATTR_NAMESPACE_XLINK
|
18
|
+
"xlink:arcrole", "arcrole", GUMBO_ATTR_NAMESPACE_XLINK
|
19
|
+
"xlink:href", "href", GUMBO_ATTR_NAMESPACE_XLINK
|
20
|
+
"xlink:role", "role", GUMBO_ATTR_NAMESPACE_XLINK
|
21
|
+
"xlink:show", "show", GUMBO_ATTR_NAMESPACE_XLINK
|
22
|
+
"xlink:title", "title", GUMBO_ATTR_NAMESPACE_XLINK
|
23
|
+
"xlink:type", "type", GUMBO_ATTR_NAMESPACE_XLINK
|
24
|
+
"xml:lang", "lang", GUMBO_ATTR_NAMESPACE_XML
|
25
|
+
"xml:space", "space", GUMBO_ATTR_NAMESPACE_XML
|
26
|
+
"xmlns", "xmlns", GUMBO_ATTR_NAMESPACE_XMLNS
|
27
|
+
"xmlns:xlink", "xlink", GUMBO_ATTR_NAMESPACE_XMLNS
|
@@ -0,0 +1,33 @@
|
|
1
|
+
#ifndef GUMBO_INSERTION_MODE_H_
|
2
|
+
#define GUMBO_INSERTION_MODE_H_
|
3
|
+
|
4
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#insertion-mode
|
5
|
+
// If new enum values are added, be sure to update the kTokenHandlers
|
6
|
+
// dispatch table in parser.c.
|
7
|
+
typedef enum {
|
8
|
+
GUMBO_INSERTION_MODE_INITIAL,
|
9
|
+
GUMBO_INSERTION_MODE_BEFORE_HTML,
|
10
|
+
GUMBO_INSERTION_MODE_BEFORE_HEAD,
|
11
|
+
GUMBO_INSERTION_MODE_IN_HEAD,
|
12
|
+
GUMBO_INSERTION_MODE_IN_HEAD_NOSCRIPT,
|
13
|
+
GUMBO_INSERTION_MODE_AFTER_HEAD,
|
14
|
+
GUMBO_INSERTION_MODE_IN_BODY,
|
15
|
+
GUMBO_INSERTION_MODE_TEXT,
|
16
|
+
GUMBO_INSERTION_MODE_IN_TABLE,
|
17
|
+
GUMBO_INSERTION_MODE_IN_TABLE_TEXT,
|
18
|
+
GUMBO_INSERTION_MODE_IN_CAPTION,
|
19
|
+
GUMBO_INSERTION_MODE_IN_COLUMN_GROUP,
|
20
|
+
GUMBO_INSERTION_MODE_IN_TABLE_BODY,
|
21
|
+
GUMBO_INSERTION_MODE_IN_ROW,
|
22
|
+
GUMBO_INSERTION_MODE_IN_CELL,
|
23
|
+
GUMBO_INSERTION_MODE_IN_SELECT,
|
24
|
+
GUMBO_INSERTION_MODE_IN_SELECT_IN_TABLE,
|
25
|
+
GUMBO_INSERTION_MODE_IN_TEMPLATE,
|
26
|
+
GUMBO_INSERTION_MODE_AFTER_BODY,
|
27
|
+
GUMBO_INSERTION_MODE_IN_FRAMESET,
|
28
|
+
GUMBO_INSERTION_MODE_AFTER_FRAMESET,
|
29
|
+
GUMBO_INSERTION_MODE_AFTER_AFTER_BODY,
|
30
|
+
GUMBO_INSERTION_MODE_AFTER_AFTER_FRAMESET
|
31
|
+
} GumboInsertionMode;
|
32
|
+
|
33
|
+
#endif // GUMBO_INSERTION_MODE_H_
|
@@ -0,0 +1,91 @@
|
|
1
|
+
#ifndef MACROS_H
|
2
|
+
#define MACROS_H
|
3
|
+
|
4
|
+
#if (!defined(__STDC_VERSION__) || !(__STDC_VERSION__ >= 199901L)) \
|
5
|
+
&& !defined(_WIN32) && !defined(__cplusplus)
|
6
|
+
# error C99 compiler required
|
7
|
+
#endif
|
8
|
+
|
9
|
+
#if defined(_WIN32)
|
10
|
+
# define inline __inline
|
11
|
+
# define __func__ __FUNCTION__
|
12
|
+
#endif
|
13
|
+
|
14
|
+
// Calculate the number of elements in an array.
|
15
|
+
// The extra division on the third line is a trick to help prevent
|
16
|
+
// passing a pointer to the first element of an array instead of a
|
17
|
+
// reference to the array itself.
|
18
|
+
#define ARRAY_COUNT(x) ( \
|
19
|
+
(sizeof(x) / sizeof((x)[0])) \
|
20
|
+
/ ((size_t)(!(sizeof(x) % sizeof((x)[0])))) \
|
21
|
+
)
|
22
|
+
|
23
|
+
#ifdef NDEBUG
|
24
|
+
#define UNUSED_IF_NDEBUG(x) (void)(x)
|
25
|
+
#else
|
26
|
+
#define UNUSED_IF_NDEBUG(x)
|
27
|
+
#endif
|
28
|
+
|
29
|
+
#ifdef __GNUC__
|
30
|
+
#define GNUC_AT_LEAST(major, minor) ( \
|
31
|
+
(__GNUC__ > major) \
|
32
|
+
|| ((__GNUC__ == major) && (__GNUC_MINOR__ >= minor)) )
|
33
|
+
#else
|
34
|
+
#define GNUC_AT_LEAST(major, minor) 0
|
35
|
+
#endif
|
36
|
+
|
37
|
+
#ifdef __has_attribute
|
38
|
+
#define HAS_ATTRIBUTE(x) __has_attribute(x)
|
39
|
+
#else
|
40
|
+
#define HAS_ATTRIBUTE(x) 0
|
41
|
+
#endif
|
42
|
+
|
43
|
+
#if GNUC_AT_LEAST(3, 0) || HAS_ATTRIBUTE(unused) || defined(__TINYC__)
|
44
|
+
#define UNUSED __attribute__((__unused__))
|
45
|
+
#else
|
46
|
+
#define UNUSED
|
47
|
+
#endif
|
48
|
+
|
49
|
+
#if GNUC_AT_LEAST(3, 0)
|
50
|
+
#define MALLOC __attribute__((__malloc__))
|
51
|
+
#define PRINTF(x) __attribute__((__format__(__printf__, (x), (x + 1))))
|
52
|
+
#define PURE __attribute__((__pure__))
|
53
|
+
#define CONST_FN __attribute__((__const__))
|
54
|
+
#else
|
55
|
+
#define MALLOC
|
56
|
+
#define PRINTF(x)
|
57
|
+
#define PURE
|
58
|
+
#define CONST_FN
|
59
|
+
#endif
|
60
|
+
|
61
|
+
#define UNUSED_ARG(x) unused__ ## x UNUSED
|
62
|
+
|
63
|
+
#if GNUC_AT_LEAST(3, 0) && defined(__OPTIMIZE__)
|
64
|
+
#define likely(x) __builtin_expect(!!(x), 1)
|
65
|
+
#define unlikely(x) __builtin_expect(!!(x), 0)
|
66
|
+
#else
|
67
|
+
#define likely(x) (x)
|
68
|
+
#define unlikely(x) (x)
|
69
|
+
#endif
|
70
|
+
|
71
|
+
#if GNUC_AT_LEAST(3, 3) || HAS_ATTRIBUTE(nonnull)
|
72
|
+
#define NONNULL_ARGS __attribute__((__nonnull__))
|
73
|
+
#else
|
74
|
+
#define NONNULL_ARGS
|
75
|
+
#endif
|
76
|
+
|
77
|
+
#if GNUC_AT_LEAST(3, 4) || HAS_ATTRIBUTE(warn_unused_result)
|
78
|
+
#define WARN_UNUSED_RESULT __attribute__((__warn_unused_result__))
|
79
|
+
#else
|
80
|
+
#define WARN_UNUSED_RESULT
|
81
|
+
#endif
|
82
|
+
|
83
|
+
#if GNUC_AT_LEAST(5, 0) || HAS_ATTRIBUTE(returns_nonnull)
|
84
|
+
#define RETURNS_NONNULL __attribute__((__returns_nonnull__))
|
85
|
+
#else
|
86
|
+
#define RETURNS_NONNULL
|
87
|
+
#endif
|
88
|
+
|
89
|
+
#define XMALLOC MALLOC RETURNS_NONNULL
|
90
|
+
|
91
|
+
#endif // ndef MACROS_H
|