nokogumbo 1.5.0 → 2.0.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +237 -26
- data/ext/nokogumbo/extconf.rb +121 -0
- data/ext/nokogumbo/nokogumbo.c +793 -0
- data/gumbo-parser/src/ascii.c +75 -0
- data/gumbo-parser/src/ascii.h +115 -0
- data/gumbo-parser/src/attribute.c +26 -28
- data/gumbo-parser/src/attribute.h +3 -23
- data/gumbo-parser/src/char_ref.c +5972 -6816
- data/gumbo-parser/src/char_ref.h +14 -45
- data/gumbo-parser/src/error.c +510 -163
- data/gumbo-parser/src/error.h +70 -147
- data/gumbo-parser/src/foreign_attrs.c +104 -0
- data/gumbo-parser/src/gumbo.h +577 -305
- data/gumbo-parser/src/insertion_mode.h +4 -28
- data/gumbo-parser/src/macros.h +91 -0
- data/gumbo-parser/src/parser.c +2922 -2228
- data/gumbo-parser/src/parser.h +6 -22
- data/gumbo-parser/src/replacement.h +33 -0
- data/gumbo-parser/src/string_buffer.c +43 -50
- data/gumbo-parser/src/string_buffer.h +24 -40
- data/gumbo-parser/src/string_piece.c +39 -39
- data/gumbo-parser/src/svg_attrs.c +174 -0
- data/gumbo-parser/src/svg_tags.c +137 -0
- data/gumbo-parser/src/tag.c +186 -59
- data/gumbo-parser/src/tag_lookup.c +382 -0
- data/gumbo-parser/src/tag_lookup.h +13 -0
- data/gumbo-parser/src/token_buffer.c +79 -0
- data/gumbo-parser/src/token_buffer.h +71 -0
- data/gumbo-parser/src/token_type.h +1 -25
- data/gumbo-parser/src/tokenizer.c +2127 -1561
- data/gumbo-parser/src/tokenizer.h +41 -52
- data/gumbo-parser/src/tokenizer_states.h +281 -45
- data/gumbo-parser/src/utf8.c +98 -123
- data/gumbo-parser/src/utf8.h +84 -52
- data/gumbo-parser/src/util.c +48 -38
- data/gumbo-parser/src/util.h +10 -40
- data/gumbo-parser/src/vector.c +45 -57
- data/gumbo-parser/src/vector.h +17 -39
- data/lib/nokogumbo.rb +11 -173
- data/lib/nokogumbo/html5.rb +252 -0
- data/lib/nokogumbo/html5/document.rb +53 -0
- data/lib/nokogumbo/html5/document_fragment.rb +62 -0
- data/lib/nokogumbo/html5/node.rb +72 -0
- data/lib/nokogumbo/version.rb +3 -0
- metadata +43 -24
- data/ext/nokogumboc/extconf.rb +0 -60
- data/ext/nokogumboc/nokogumbo.c +0 -295
- data/gumbo-parser/src/char_ref.rl +0 -2554
- data/gumbo-parser/src/string_piece.h +0 -38
- data/gumbo-parser/src/tag.in +0 -150
- data/gumbo-parser/src/tag_enum.h +0 -153
- data/gumbo-parser/src/tag_gperf.h +0 -105
- data/gumbo-parser/src/tag_sizes.h +0 -4
- data/gumbo-parser/src/tag_strings.h +0 -153
- data/gumbo-parser/visualc/include/strings.h +0 -4
- data/test-nokogumbo.rb +0 -190
@@ -1,25 +1,9 @@
|
|
1
|
-
// Copyright 2010 Google Inc. All Rights Reserved.
|
2
|
-
//
|
3
|
-
// Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
-
// you may not use this file except in compliance with the License.
|
5
|
-
// You may obtain a copy of the License at
|
6
|
-
//
|
7
|
-
// http://www.apache.org/licenses/LICENSE-2.0
|
8
|
-
//
|
9
|
-
// Unless required by applicable law or agreed to in writing, software
|
10
|
-
// distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
-
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
-
// See the License for the specific language governing permissions and
|
13
|
-
// limitations under the License.
|
14
|
-
//
|
15
|
-
// Author: jdtang@google.com (Jonathan Tang)
|
16
|
-
//
|
17
|
-
// This contains an implementation of a tokenizer for HTML5. It consumes a
|
18
|
-
// buffer of UTF-8 characters, and then emits a stream of tokens.
|
19
|
-
|
20
1
|
#ifndef GUMBO_TOKENIZER_H_
|
21
2
|
#define GUMBO_TOKENIZER_H_
|
22
3
|
|
4
|
+
// This contains an implementation of a tokenizer for HTML5. It consumes a
|
5
|
+
// buffer of UTF-8 characters, and then emits a stream of tokens.
|
6
|
+
|
23
7
|
#include <stdbool.h>
|
24
8
|
#include <stddef.h>
|
25
9
|
|
@@ -49,11 +33,20 @@ typedef struct GumboInternalTokenDocType {
|
|
49
33
|
// Struct containing all information pertaining to start tag tokens.
|
50
34
|
typedef struct GumboInternalTokenStartTag {
|
51
35
|
GumboTag tag;
|
36
|
+
// NULL unless tag is GUMBO_TAG_UNKNOWN
|
37
|
+
char *name;
|
52
38
|
GumboVector /* GumboAttribute */ attributes;
|
53
39
|
bool is_self_closing;
|
54
40
|
} GumboTokenStartTag;
|
55
41
|
|
56
|
-
//
|
42
|
+
// Struct containing all information pertaining to end tag tokens.
|
43
|
+
typedef struct GumboInternalTokenEndTag {
|
44
|
+
GumboTag tag;
|
45
|
+
// NULL unless tag is GUMBO_TAG_UNKNOWN
|
46
|
+
char *name;
|
47
|
+
} GumboTokenEndTag;
|
48
|
+
|
49
|
+
// A data structure representing a single token in the input stream. This
|
57
50
|
// contains an enum for the type, the source position, a GumboStringPiece
|
58
51
|
// pointing to the original text, and then a union for any parsed data.
|
59
52
|
typedef struct GumboInternalToken {
|
@@ -63,7 +56,7 @@ typedef struct GumboInternalToken {
|
|
63
56
|
union {
|
64
57
|
GumboTokenDocType doc_type;
|
65
58
|
GumboTokenStartTag start_tag;
|
66
|
-
|
59
|
+
GumboTokenEndTag end_tag;
|
67
60
|
const char* text; // For comments.
|
68
61
|
int character; // For character, whitespace, null, and EOF tokens.
|
69
62
|
} v;
|
@@ -71,53 +64,49 @@ typedef struct GumboInternalToken {
|
|
71
64
|
|
72
65
|
// Initializes the tokenizer state within the GumboParser object, setting up a
|
73
66
|
// parse of the specified text.
|
74
|
-
void gumbo_tokenizer_state_init(
|
75
|
-
|
67
|
+
void gumbo_tokenizer_state_init (
|
68
|
+
struct GumboInternalParser* parser,
|
69
|
+
const char* text,
|
70
|
+
size_t text_length
|
71
|
+
);
|
76
72
|
|
77
73
|
// Destroys the tokenizer state within the GumboParser object, freeing any
|
78
74
|
// dynamically-allocated structures within it.
|
79
75
|
void gumbo_tokenizer_state_destroy(struct GumboInternalParser* parser);
|
80
76
|
|
81
|
-
// Sets the tokenizer state to the specified value.
|
77
|
+
// Sets the tokenizer state to the specified value. This is needed by some
|
82
78
|
// parser states, which alter the state of the tokenizer in response to tags
|
83
79
|
// seen.
|
84
|
-
void gumbo_tokenizer_set_state(
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
//
|
90
|
-
//
|
91
|
-
|
92
|
-
|
80
|
+
void gumbo_tokenizer_set_state (
|
81
|
+
struct GumboInternalParser* parser,
|
82
|
+
GumboTokenizerEnum state
|
83
|
+
);
|
84
|
+
|
85
|
+
// Flags whether the adjusted current node is a foreign content element. This
|
86
|
+
// is necessary for the markup declaration open state, where the tokenizer
|
87
|
+
// must be aware of the state of the parser to properly tokenize bad comment
|
88
|
+
// tags.
|
89
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#markup-declaration-open-state
|
90
|
+
void gumbo_tokenizer_set_is_adjusted_current_node_foreign (
|
91
|
+
struct GumboInternalParser* parser,
|
92
|
+
bool is_foreign
|
93
|
+
);
|
93
94
|
|
94
95
|
// Lexes a single token from the specified buffer, filling the output with the
|
95
|
-
// parsed GumboToken data structure.
|
96
|
-
|
97
|
-
|
98
|
-
//
|
99
|
-
// struct GumboInternalParser parser;
|
100
|
-
// GumboToken output;
|
101
|
-
// gumbo_tokenizer_state_init(&parser, text, strlen(text));
|
102
|
-
// while (gumbo_lex(&parser, &output)) {
|
103
|
-
// ...do stuff with output.
|
104
|
-
// gumbo_token_destroy(&parser, &token);
|
105
|
-
// }
|
106
|
-
// gumbo_tokenizer_state_destroy(&parser);
|
107
|
-
bool gumbo_lex(struct GumboInternalParser* parser, GumboToken* output);
|
108
|
-
|
109
|
-
// Frees the internally-allocated pointers within an GumboToken. Note that this
|
96
|
+
// parsed GumboToken data structure.
|
97
|
+
void gumbo_lex(struct GumboInternalParser* parser, GumboToken* output);
|
98
|
+
|
99
|
+
// Frees the internally-allocated pointers within a GumboToken. Note that this
|
110
100
|
// doesn't free the token itself, since oftentimes it will be allocated on the
|
111
|
-
// stack.
|
112
|
-
// appropriate) can handle that.
|
101
|
+
// stack.
|
113
102
|
//
|
114
103
|
// Note that if you are handing over ownership of the internal strings to some
|
115
104
|
// other data structure - for example, a parse tree - these do not need to be
|
116
105
|
// freed.
|
117
|
-
void gumbo_token_destroy(
|
106
|
+
void gumbo_token_destroy(GumboToken* token);
|
118
107
|
|
119
108
|
#ifdef __cplusplus
|
120
109
|
}
|
121
110
|
#endif
|
122
111
|
|
123
|
-
#endif
|
112
|
+
#endif // GUMBO_TOKENIZER_H_
|
@@ -1,103 +1,339 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
//
|
5
|
-
// You may obtain a copy of the License at
|
6
|
-
//
|
7
|
-
// http://www.apache.org/licenses/LICENSE-2.0
|
8
|
-
//
|
9
|
-
// Unless required by applicable law or agreed to in writing, software
|
10
|
-
// distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
-
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
-
// See the License for the specific language governing permissions and
|
13
|
-
// limitations under the License.
|
14
|
-
//
|
15
|
-
// Author: jdtang@google.com (Jonathan Tang)
|
16
|
-
//
|
17
|
-
// This contains the list of states used in the tokenizer. Although at first
|
1
|
+
#ifndef GUMBO_TOKENIZER_STATES_H_
|
2
|
+
#define GUMBO_TOKENIZER_STATES_H_
|
3
|
+
|
4
|
+
// This contains the list of states used in the tokenizer. Although at first
|
18
5
|
// glance it seems like these could be kept internal to the tokenizer, several
|
19
6
|
// of the actions in the parser require that it reach into the tokenizer and
|
20
|
-
// reset the tokenizer state.
|
7
|
+
// reset the tokenizer state. For that to work, it needs to have the
|
21
8
|
// definitions of individual states available.
|
22
9
|
//
|
23
10
|
// This may also be useful for providing more detailed error messages for parse
|
24
11
|
// errors, as we can match up states and inputs in a table without having to
|
25
12
|
// clutter the tokenizer code with lots of precise error messages.
|
26
13
|
|
27
|
-
#ifndef GUMBO_TOKENIZER_STATES_H_
|
28
|
-
#define GUMBO_TOKENIZER_STATES_H_
|
29
|
-
|
30
14
|
// The ordering of this enum is also used to build the dispatch table for the
|
31
15
|
// tokenizer state machine, so if it is changed, be sure to update that too.
|
32
16
|
typedef enum {
|
17
|
+
// 12.2.5.1 Data state
|
18
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#data-state
|
33
19
|
GUMBO_LEX_DATA,
|
34
|
-
|
20
|
+
|
21
|
+
// 12.2.5.2 RCDATA state
|
22
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#rcdata-state
|
35
23
|
GUMBO_LEX_RCDATA,
|
36
|
-
|
24
|
+
|
25
|
+
// 12.2.5.3 RAWTEXT state
|
26
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#rawtext-state<Paste>
|
37
27
|
GUMBO_LEX_RAWTEXT,
|
38
|
-
|
28
|
+
|
29
|
+
// 12.2.5.4 Script data state
|
30
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#script-data-state
|
31
|
+
GUMBO_LEX_SCRIPT_DATA,
|
32
|
+
|
33
|
+
// 12.2.5.5 PLAINTEXT state
|
34
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#plaintext-state
|
39
35
|
GUMBO_LEX_PLAINTEXT,
|
36
|
+
|
37
|
+
// 12.2.5.6 Tag open state
|
38
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#tag-open-state
|
40
39
|
GUMBO_LEX_TAG_OPEN,
|
40
|
+
|
41
|
+
// 12.2.5.7 End tag open state
|
42
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#end-tag-open-state
|
41
43
|
GUMBO_LEX_END_TAG_OPEN,
|
44
|
+
|
45
|
+
// 12.2.5.8 Tag name state
|
46
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#tag-name-state
|
42
47
|
GUMBO_LEX_TAG_NAME,
|
48
|
+
|
49
|
+
// 12.2.5.9 RCDATA less-than sign state
|
50
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#rcdata-less-than-sign-state
|
43
51
|
GUMBO_LEX_RCDATA_LT,
|
52
|
+
|
53
|
+
// 12.2.5.10 RCDATA end tag open state
|
54
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#rcdata-end-tag-open-state
|
44
55
|
GUMBO_LEX_RCDATA_END_TAG_OPEN,
|
56
|
+
|
57
|
+
// 12.2.5.11 RCDATA end tag name state
|
58
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#rcdata-end-tag-name-state
|
45
59
|
GUMBO_LEX_RCDATA_END_TAG_NAME,
|
60
|
+
|
61
|
+
// 12.2.5.12 RAWTEXT less-than sign state
|
62
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#rawtext-less-than-sign-state
|
46
63
|
GUMBO_LEX_RAWTEXT_LT,
|
64
|
+
|
65
|
+
// 12.2.5.13 RAWTEXT end tag open state
|
66
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#rawtext-end-tag-open-state
|
47
67
|
GUMBO_LEX_RAWTEXT_END_TAG_OPEN,
|
68
|
+
|
69
|
+
// 12.2.5.14 RAWTEXT end tag name state
|
70
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#rawtext-end-tag-name-state
|
48
71
|
GUMBO_LEX_RAWTEXT_END_TAG_NAME,
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
72
|
+
|
73
|
+
// 12.2.5.15 Script data less-than sign state
|
74
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#script-data-less-than-sign-state
|
75
|
+
GUMBO_LEX_SCRIPT_DATA_LT,
|
76
|
+
|
77
|
+
// 12.2.5.16 Script data end tag open state
|
78
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#script-data-end-tag-open-state
|
79
|
+
GUMBO_LEX_SCRIPT_DATA_END_TAG_OPEN,
|
80
|
+
|
81
|
+
// 12.2.5.17 Script data end tag name state
|
82
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#script-data-end-tag-name-state
|
83
|
+
GUMBO_LEX_SCRIPT_DATA_END_TAG_NAME,
|
84
|
+
|
85
|
+
// 12.2.5.18 Script data escape start state
|
86
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#script-data-escape-start-state
|
87
|
+
GUMBO_LEX_SCRIPT_DATA_ESCAPED_START,
|
88
|
+
|
89
|
+
// 12.2.5.19 Script data escape start dash state
|
90
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#script-data-escape-start-dash-state
|
91
|
+
GUMBO_LEX_SCRIPT_DATA_ESCAPED_START_DASH,
|
92
|
+
|
93
|
+
// 12.2.5.20 Script data escaped state
|
94
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#script-data-escaped-state
|
95
|
+
GUMBO_LEX_SCRIPT_DATA_ESCAPED,
|
96
|
+
|
97
|
+
// 12.2.5.21 Script data escaped dash state
|
98
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#script-data-escaped-dash-state
|
99
|
+
GUMBO_LEX_SCRIPT_DATA_ESCAPED_DASH,
|
100
|
+
|
101
|
+
// 12.2.5.22 Script data escaped dash dash state
|
102
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#script-data-escaped-dash-dash-state
|
103
|
+
GUMBO_LEX_SCRIPT_DATA_ESCAPED_DASH_DASH,
|
104
|
+
|
105
|
+
// 12.2.5.23 Script data escaped less than sign state
|
106
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#script-data-escaped-less-than-sign-state
|
107
|
+
GUMBO_LEX_SCRIPT_DATA_ESCAPED_LT,
|
108
|
+
|
109
|
+
// 12.2.5.24 Script data escaped end tag open state
|
110
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#script-data-escaped-end-tag-open-state
|
111
|
+
GUMBO_LEX_SCRIPT_DATA_ESCAPED_END_TAG_OPEN,
|
112
|
+
|
113
|
+
// 12.2.5.25 Script data escaped end tag name state
|
114
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#script-data-escaped-end-tag-name-state
|
115
|
+
GUMBO_LEX_SCRIPT_DATA_ESCAPED_END_TAG_NAME,
|
116
|
+
|
117
|
+
// 12.2.5.26 Script data double escape start state
|
118
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#script-data-double-escape-start-state
|
119
|
+
GUMBO_LEX_SCRIPT_DATA_DOUBLE_ESCAPED_START,
|
120
|
+
|
121
|
+
// 12.2.5.27 Script data double escaped state
|
122
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#script-data-double-escaped-state
|
123
|
+
GUMBO_LEX_SCRIPT_DATA_DOUBLE_ESCAPED,
|
124
|
+
|
125
|
+
// 12.2.5.28 Script data double escaped dash state
|
126
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#script-data-double-escaped-dash-state
|
127
|
+
GUMBO_LEX_SCRIPT_DATA_DOUBLE_ESCAPED_DASH,
|
128
|
+
|
129
|
+
// 12.2.5.29 Script data double escaped dash dash state
|
130
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#script-data-double-escaped-dash-dash-state
|
131
|
+
GUMBO_LEX_SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH,
|
132
|
+
|
133
|
+
// 12.2.5.30 Script data double escaped less-than sign state
|
134
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#script-data-double-escaped-less-than-sign-state
|
135
|
+
GUMBO_LEX_SCRIPT_DATA_DOUBLE_ESCAPED_LT,
|
136
|
+
|
137
|
+
// 12.2.5.31 Script data double escape end state (XXX: spec bug with the
|
138
|
+
// name?)
|
139
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#script-data-double-escape-end-state
|
140
|
+
GUMBO_LEX_SCRIPT_DATA_DOUBLE_ESCAPED_END,
|
141
|
+
|
142
|
+
// 12.2.5.32 Before attribute name state
|
143
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#before-attribute-name-state
|
66
144
|
GUMBO_LEX_BEFORE_ATTR_NAME,
|
145
|
+
|
146
|
+
// 12.2.5.33 Attributet name state
|
147
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#attribute-name-state
|
67
148
|
GUMBO_LEX_ATTR_NAME,
|
149
|
+
|
150
|
+
// 12.2.5.34 After attribute name state
|
151
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#after-attribute-name-state
|
68
152
|
GUMBO_LEX_AFTER_ATTR_NAME,
|
153
|
+
|
154
|
+
// 12.2.5.35 Before attribute value state
|
155
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#before-attribute-value-state
|
69
156
|
GUMBO_LEX_BEFORE_ATTR_VALUE,
|
157
|
+
|
158
|
+
// 12.2.5.36 Attribute value (double-quoted) state
|
159
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#attribute-value-(double-quoted)-state
|
70
160
|
GUMBO_LEX_ATTR_VALUE_DOUBLE_QUOTED,
|
161
|
+
|
162
|
+
// 12.2.5.37 Attribute value (single-quoted) state
|
163
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#attribute-value-(single-quoted)-state
|
71
164
|
GUMBO_LEX_ATTR_VALUE_SINGLE_QUOTED,
|
165
|
+
|
166
|
+
// 12.2.5.38 Attribute value (unquoted) state
|
167
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#attribute-value-(unquoted)-state
|
72
168
|
GUMBO_LEX_ATTR_VALUE_UNQUOTED,
|
73
|
-
|
169
|
+
|
170
|
+
// 12.2.5.39 After attribute value (quoted) state
|
171
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#after-attribute-value-(quoted)-state
|
74
172
|
GUMBO_LEX_AFTER_ATTR_VALUE_QUOTED,
|
173
|
+
|
174
|
+
// 12.2.5.40 Self-closing start tag state
|
175
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#self-closing-start-tag-state
|
75
176
|
GUMBO_LEX_SELF_CLOSING_START_TAG,
|
177
|
+
|
178
|
+
// 12.2.5.41 Bogus comment state
|
179
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#bogus-comment-state
|
76
180
|
GUMBO_LEX_BOGUS_COMMENT,
|
77
|
-
|
181
|
+
|
182
|
+
// 12.2.5.42 Markup declaration open state
|
183
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#markup-declaration-open-state
|
184
|
+
GUMBO_LEX_MARKUP_DECLARATION_OPEN,
|
185
|
+
|
186
|
+
// 12.2.5.43 Comment start state
|
187
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#comment-start-state
|
78
188
|
GUMBO_LEX_COMMENT_START,
|
189
|
+
|
190
|
+
// 12.2.5.44 Comment start dash state
|
191
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#comment-start-dash-state
|
79
192
|
GUMBO_LEX_COMMENT_START_DASH,
|
193
|
+
|
194
|
+
// 12.2.5.45 Comment state
|
195
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#comment-state
|
80
196
|
GUMBO_LEX_COMMENT,
|
197
|
+
|
198
|
+
// 12.2.5.46 Comment less-than sign state
|
199
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#comment-less-than-sign-state
|
200
|
+
GUMBO_LEX_COMMENT_LT,
|
201
|
+
|
202
|
+
// 12.2.5.47 Comment less-than sign bang state
|
203
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#comment-less-than-sign-bang-state
|
204
|
+
GUMBO_LEX_COMMENT_LT_BANG,
|
205
|
+
|
206
|
+
// 12.2.5.48 Comment less-than sign bang dash state
|
207
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#comment-less-than-sign-bang-dash-state
|
208
|
+
GUMBO_LEX_COMMENT_LT_BANG_DASH,
|
209
|
+
|
210
|
+
// 12.2.5.49 Comment less-than sign bang dash dash state
|
211
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#comment-less-than-sign-bang-dash-dash-state
|
212
|
+
GUMBO_LEX_COMMENT_LT_BANG_DASH_DASH,
|
213
|
+
|
214
|
+
// 12.2.5.50 Comment end dash state
|
215
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#comment-end-dash-state
|
81
216
|
GUMBO_LEX_COMMENT_END_DASH,
|
217
|
+
|
218
|
+
// 12.2.5.51 Comment end state
|
219
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#comment-end-state
|
82
220
|
GUMBO_LEX_COMMENT_END,
|
221
|
+
|
222
|
+
// 12.2.5.52 Comment end bang state
|
223
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#comment-end-bang-state
|
83
224
|
GUMBO_LEX_COMMENT_END_BANG,
|
225
|
+
|
226
|
+
// 12.2.5.53 DOCTYPE state
|
227
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#doctype-state
|
84
228
|
GUMBO_LEX_DOCTYPE,
|
229
|
+
|
230
|
+
// 12.2.5.54 Before DOCTYPE name state
|
231
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#before-doctype-name-state
|
85
232
|
GUMBO_LEX_BEFORE_DOCTYPE_NAME,
|
233
|
+
|
234
|
+
// 12.2.5.55 DOCTYPE name state
|
235
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#doctype-name-state
|
86
236
|
GUMBO_LEX_DOCTYPE_NAME,
|
237
|
+
|
238
|
+
// 12.2.5.56 After DOCTYPE name state
|
239
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#after-doctype-name-state
|
87
240
|
GUMBO_LEX_AFTER_DOCTYPE_NAME,
|
241
|
+
|
242
|
+
// 12.2.5.57 After DOCTYPE public keyword state
|
243
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#after-doctype-public-keyword-state
|
88
244
|
GUMBO_LEX_AFTER_DOCTYPE_PUBLIC_KEYWORD,
|
245
|
+
|
246
|
+
// 12.2.5.58 Before DOCTYPE public identifier state
|
247
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#before-doctype-public-identifier-state
|
89
248
|
GUMBO_LEX_BEFORE_DOCTYPE_PUBLIC_ID,
|
249
|
+
|
250
|
+
// 12.2.5.59 DOCTYPE public identifier (double-quoted) state
|
251
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#doctype-public-identifier-(double-quoted)-state
|
90
252
|
GUMBO_LEX_DOCTYPE_PUBLIC_ID_DOUBLE_QUOTED,
|
253
|
+
|
254
|
+
// 12.2.5.60 DOCTYPE public identifier (single-quoted) state
|
255
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#doctype-public-identifier-(single-quoted)-state
|
91
256
|
GUMBO_LEX_DOCTYPE_PUBLIC_ID_SINGLE_QUOTED,
|
257
|
+
|
258
|
+
// 12.2.5.61 After DOCTYPE public identifier state
|
259
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#after-doctype-public-identifier-state
|
92
260
|
GUMBO_LEX_AFTER_DOCTYPE_PUBLIC_ID,
|
261
|
+
|
262
|
+
// 12.2.5.62 Between DOCTYPE public and system identifiers state
|
263
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#between-doctype-public-and-system-identifiers-state
|
93
264
|
GUMBO_LEX_BETWEEN_DOCTYPE_PUBLIC_SYSTEM_ID,
|
265
|
+
|
266
|
+
// 12.2.5.63 After DOCTYPE system keyword state
|
267
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#after-doctype-system-keyword-state
|
94
268
|
GUMBO_LEX_AFTER_DOCTYPE_SYSTEM_KEYWORD,
|
269
|
+
|
270
|
+
// 12.2.5.64 Before DOCTYPE system identifier state
|
271
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#before-doctype-system-identifier-state
|
95
272
|
GUMBO_LEX_BEFORE_DOCTYPE_SYSTEM_ID,
|
273
|
+
|
274
|
+
// 12.2.5.65 DOCTYPE system identifier (double-quoted) state
|
275
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#doctype-system-identifier-(double-quoted)-state
|
96
276
|
GUMBO_LEX_DOCTYPE_SYSTEM_ID_DOUBLE_QUOTED,
|
277
|
+
|
278
|
+
// 12.2.5.66 DOCTYPE system identifier (single-quoted) state
|
279
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#doctype-system-identifier-(single-quoted)-state
|
97
280
|
GUMBO_LEX_DOCTYPE_SYSTEM_ID_SINGLE_QUOTED,
|
281
|
+
|
282
|
+
// 12.2.5.67 After DOCTYPE system identifier state
|
283
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#after-doctype-system-identifier-state
|
98
284
|
GUMBO_LEX_AFTER_DOCTYPE_SYSTEM_ID,
|
285
|
+
|
286
|
+
// 12.2.5.68 Bogus DOCTYPE state
|
287
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#bogus-doctype-state
|
99
288
|
GUMBO_LEX_BOGUS_DOCTYPE,
|
100
|
-
|
289
|
+
|
290
|
+
// 12.2.5.69 CDATA section state
|
291
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#cdata-section-state
|
292
|
+
GUMBO_LEX_CDATA_SECTION,
|
293
|
+
|
294
|
+
// 12.2.5.70 CDATA section bracket state
|
295
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#cdata-section-bracket-state
|
296
|
+
GUMBO_LEX_CDATA_SECTION_BRACKET,
|
297
|
+
|
298
|
+
// 12.2.5.71 CDATA section end state
|
299
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#cdata-section-end-state
|
300
|
+
GUMBO_LEX_CDATA_SECTION_END,
|
301
|
+
|
302
|
+
// 12.2.5.72 Character reference state
|
303
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#character-reference-state
|
304
|
+
GUMBO_LEX_CHARACTER_REFERENCE,
|
305
|
+
|
306
|
+
// 12.2.5.73 Named character reference state
|
307
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#named-character-reference-state
|
308
|
+
GUMBO_LEX_NAMED_CHARACTER_REFERENCE,
|
309
|
+
|
310
|
+
// 12.2.5.74 Ambiguous ampersand state
|
311
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#ambiguous-ampersand-state
|
312
|
+
GUMBO_LEX_AMBIGUOUS_AMPERSAND,
|
313
|
+
|
314
|
+
// 12.2.5.75 Numeric character reference state
|
315
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#numeric-character-reference-state
|
316
|
+
GUMBO_LEX_NUMERIC_CHARACTER_REFERENCE,
|
317
|
+
|
318
|
+
// 12.2.5.76 Hexadecimal character reference start state
|
319
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#hexadecimal-character-reference-start-state
|
320
|
+
GUMBO_LEX_HEXADECIMAL_CHARACTER_REFERENCE_START,
|
321
|
+
|
322
|
+
// 12.2.5.77 Decimal character reference start state
|
323
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#decimal-character-reference-start-state
|
324
|
+
GUMBO_LEX_DECIMAL_CHARACTER_REFERENCE_START,
|
325
|
+
|
326
|
+
// 12.2.5.78 Hexadecimal character reference state
|
327
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#hexadecimal-character-reference-state
|
328
|
+
GUMBO_LEX_HEXADECIMAL_CHARACTER_REFERENCE,
|
329
|
+
|
330
|
+
// 12.2.5.79 Decimal character reference state
|
331
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#decimal-character-reference-state
|
332
|
+
GUMBO_LEX_DECIMAL_CHARACTER_REFERENCE,
|
333
|
+
|
334
|
+
// 12.2.5.80 Numeric character reference end state
|
335
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#numeric-character-reference-end-state
|
336
|
+
GUMBO_LEX_NUMERIC_CHARACTER_REFERENCE_END
|
101
337
|
} GumboTokenizerEnum;
|
102
338
|
|
103
|
-
#endif
|
339
|
+
#endif // GUMBO_TOKENIZER_STATES_H_
|