nokogumbo 1.5.0 → 2.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +237 -26
- data/ext/nokogumbo/extconf.rb +121 -0
- data/ext/nokogumbo/nokogumbo.c +793 -0
- data/gumbo-parser/src/ascii.c +75 -0
- data/gumbo-parser/src/ascii.h +115 -0
- data/gumbo-parser/src/attribute.c +26 -28
- data/gumbo-parser/src/attribute.h +3 -23
- data/gumbo-parser/src/char_ref.c +5972 -6816
- data/gumbo-parser/src/char_ref.h +14 -45
- data/gumbo-parser/src/error.c +510 -163
- data/gumbo-parser/src/error.h +70 -147
- data/gumbo-parser/src/foreign_attrs.c +104 -0
- data/gumbo-parser/src/gumbo.h +577 -305
- data/gumbo-parser/src/insertion_mode.h +4 -28
- data/gumbo-parser/src/macros.h +91 -0
- data/gumbo-parser/src/parser.c +2922 -2228
- data/gumbo-parser/src/parser.h +6 -22
- data/gumbo-parser/src/replacement.h +33 -0
- data/gumbo-parser/src/string_buffer.c +43 -50
- data/gumbo-parser/src/string_buffer.h +24 -40
- data/gumbo-parser/src/string_piece.c +39 -39
- data/gumbo-parser/src/svg_attrs.c +174 -0
- data/gumbo-parser/src/svg_tags.c +137 -0
- data/gumbo-parser/src/tag.c +186 -59
- data/gumbo-parser/src/tag_lookup.c +382 -0
- data/gumbo-parser/src/tag_lookup.h +13 -0
- data/gumbo-parser/src/token_buffer.c +79 -0
- data/gumbo-parser/src/token_buffer.h +71 -0
- data/gumbo-parser/src/token_type.h +1 -25
- data/gumbo-parser/src/tokenizer.c +2127 -1561
- data/gumbo-parser/src/tokenizer.h +41 -52
- data/gumbo-parser/src/tokenizer_states.h +281 -45
- data/gumbo-parser/src/utf8.c +98 -123
- data/gumbo-parser/src/utf8.h +84 -52
- data/gumbo-parser/src/util.c +48 -38
- data/gumbo-parser/src/util.h +10 -40
- data/gumbo-parser/src/vector.c +45 -57
- data/gumbo-parser/src/vector.h +17 -39
- data/lib/nokogumbo.rb +11 -173
- data/lib/nokogumbo/html5.rb +252 -0
- data/lib/nokogumbo/html5/document.rb +53 -0
- data/lib/nokogumbo/html5/document_fragment.rb +62 -0
- data/lib/nokogumbo/html5/node.rb +72 -0
- data/lib/nokogumbo/version.rb +3 -0
- metadata +43 -24
- data/ext/nokogumboc/extconf.rb +0 -60
- data/ext/nokogumboc/nokogumbo.c +0 -295
- data/gumbo-parser/src/char_ref.rl +0 -2554
- data/gumbo-parser/src/string_piece.h +0 -38
- data/gumbo-parser/src/tag.in +0 -150
- data/gumbo-parser/src/tag_enum.h +0 -153
- data/gumbo-parser/src/tag_gperf.h +0 -105
- data/gumbo-parser/src/tag_sizes.h +0 -4
- data/gumbo-parser/src/tag_strings.h +0 -153
- data/gumbo-parser/visualc/include/strings.h +0 -4
- data/test-nokogumbo.rb +0 -190
@@ -1,25 +1,9 @@
|
|
1
|
-
// Copyright 2010 Google Inc. All Rights Reserved.
|
2
|
-
//
|
3
|
-
// Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
-
// you may not use this file except in compliance with the License.
|
5
|
-
// You may obtain a copy of the License at
|
6
|
-
//
|
7
|
-
// http://www.apache.org/licenses/LICENSE-2.0
|
8
|
-
//
|
9
|
-
// Unless required by applicable law or agreed to in writing, software
|
10
|
-
// distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
-
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
-
// See the License for the specific language governing permissions and
|
13
|
-
// limitations under the License.
|
14
|
-
//
|
15
|
-
// Author: jdtang@google.com (Jonathan Tang)
|
16
|
-
//
|
17
|
-
// This contains an implementation of a tokenizer for HTML5. It consumes a
|
18
|
-
// buffer of UTF-8 characters, and then emits a stream of tokens.
|
19
|
-
|
20
1
|
#ifndef GUMBO_TOKENIZER_H_
|
21
2
|
#define GUMBO_TOKENIZER_H_
|
22
3
|
|
4
|
+
// This contains an implementation of a tokenizer for HTML5. It consumes a
|
5
|
+
// buffer of UTF-8 characters, and then emits a stream of tokens.
|
6
|
+
|
23
7
|
#include <stdbool.h>
|
24
8
|
#include <stddef.h>
|
25
9
|
|
@@ -49,11 +33,20 @@ typedef struct GumboInternalTokenDocType {
|
|
49
33
|
// Struct containing all information pertaining to start tag tokens.
|
50
34
|
typedef struct GumboInternalTokenStartTag {
|
51
35
|
GumboTag tag;
|
36
|
+
// NULL unless tag is GUMBO_TAG_UNKNOWN
|
37
|
+
char *name;
|
52
38
|
GumboVector /* GumboAttribute */ attributes;
|
53
39
|
bool is_self_closing;
|
54
40
|
} GumboTokenStartTag;
|
55
41
|
|
56
|
-
//
|
42
|
+
// Struct containing all information pertaining to end tag tokens.
|
43
|
+
typedef struct GumboInternalTokenEndTag {
|
44
|
+
GumboTag tag;
|
45
|
+
// NULL unless tag is GUMBO_TAG_UNKNOWN
|
46
|
+
char *name;
|
47
|
+
} GumboTokenEndTag;
|
48
|
+
|
49
|
+
// A data structure representing a single token in the input stream. This
|
57
50
|
// contains an enum for the type, the source position, a GumboStringPiece
|
58
51
|
// pointing to the original text, and then a union for any parsed data.
|
59
52
|
typedef struct GumboInternalToken {
|
@@ -63,7 +56,7 @@ typedef struct GumboInternalToken {
|
|
63
56
|
union {
|
64
57
|
GumboTokenDocType doc_type;
|
65
58
|
GumboTokenStartTag start_tag;
|
66
|
-
|
59
|
+
GumboTokenEndTag end_tag;
|
67
60
|
const char* text; // For comments.
|
68
61
|
int character; // For character, whitespace, null, and EOF tokens.
|
69
62
|
} v;
|
@@ -71,53 +64,49 @@ typedef struct GumboInternalToken {
|
|
71
64
|
|
72
65
|
// Initializes the tokenizer state within the GumboParser object, setting up a
|
73
66
|
// parse of the specified text.
|
74
|
-
void gumbo_tokenizer_state_init(
|
75
|
-
|
67
|
+
void gumbo_tokenizer_state_init (
|
68
|
+
struct GumboInternalParser* parser,
|
69
|
+
const char* text,
|
70
|
+
size_t text_length
|
71
|
+
);
|
76
72
|
|
77
73
|
// Destroys the tokenizer state within the GumboParser object, freeing any
|
78
74
|
// dynamically-allocated structures within it.
|
79
75
|
void gumbo_tokenizer_state_destroy(struct GumboInternalParser* parser);
|
80
76
|
|
81
|
-
// Sets the tokenizer state to the specified value.
|
77
|
+
// Sets the tokenizer state to the specified value. This is needed by some
|
82
78
|
// parser states, which alter the state of the tokenizer in response to tags
|
83
79
|
// seen.
|
84
|
-
void gumbo_tokenizer_set_state(
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
//
|
90
|
-
//
|
91
|
-
|
92
|
-
|
80
|
+
void gumbo_tokenizer_set_state (
|
81
|
+
struct GumboInternalParser* parser,
|
82
|
+
GumboTokenizerEnum state
|
83
|
+
);
|
84
|
+
|
85
|
+
// Flags whether the adjusted current node is a foreign content element. This
|
86
|
+
// is necessary for the markup declaration open state, where the tokenizer
|
87
|
+
// must be aware of the state of the parser to properly tokenize bad comment
|
88
|
+
// tags.
|
89
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#markup-declaration-open-state
|
90
|
+
void gumbo_tokenizer_set_is_adjusted_current_node_foreign (
|
91
|
+
struct GumboInternalParser* parser,
|
92
|
+
bool is_foreign
|
93
|
+
);
|
93
94
|
|
94
95
|
// Lexes a single token from the specified buffer, filling the output with the
|
95
|
-
// parsed GumboToken data structure.
|
96
|
-
|
97
|
-
|
98
|
-
//
|
99
|
-
// struct GumboInternalParser parser;
|
100
|
-
// GumboToken output;
|
101
|
-
// gumbo_tokenizer_state_init(&parser, text, strlen(text));
|
102
|
-
// while (gumbo_lex(&parser, &output)) {
|
103
|
-
// ...do stuff with output.
|
104
|
-
// gumbo_token_destroy(&parser, &token);
|
105
|
-
// }
|
106
|
-
// gumbo_tokenizer_state_destroy(&parser);
|
107
|
-
bool gumbo_lex(struct GumboInternalParser* parser, GumboToken* output);
|
108
|
-
|
109
|
-
// Frees the internally-allocated pointers within an GumboToken. Note that this
|
96
|
+
// parsed GumboToken data structure.
|
97
|
+
void gumbo_lex(struct GumboInternalParser* parser, GumboToken* output);
|
98
|
+
|
99
|
+
// Frees the internally-allocated pointers within a GumboToken. Note that this
|
110
100
|
// doesn't free the token itself, since oftentimes it will be allocated on the
|
111
|
-
// stack.
|
112
|
-
// appropriate) can handle that.
|
101
|
+
// stack.
|
113
102
|
//
|
114
103
|
// Note that if you are handing over ownership of the internal strings to some
|
115
104
|
// other data structure - for example, a parse tree - these do not need to be
|
116
105
|
// freed.
|
117
|
-
void gumbo_token_destroy(
|
106
|
+
void gumbo_token_destroy(GumboToken* token);
|
118
107
|
|
119
108
|
#ifdef __cplusplus
|
120
109
|
}
|
121
110
|
#endif
|
122
111
|
|
123
|
-
#endif
|
112
|
+
#endif // GUMBO_TOKENIZER_H_
|
@@ -1,103 +1,339 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
//
|
5
|
-
// You may obtain a copy of the License at
|
6
|
-
//
|
7
|
-
// http://www.apache.org/licenses/LICENSE-2.0
|
8
|
-
//
|
9
|
-
// Unless required by applicable law or agreed to in writing, software
|
10
|
-
// distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
-
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
-
// See the License for the specific language governing permissions and
|
13
|
-
// limitations under the License.
|
14
|
-
//
|
15
|
-
// Author: jdtang@google.com (Jonathan Tang)
|
16
|
-
//
|
17
|
-
// This contains the list of states used in the tokenizer. Although at first
|
1
|
+
#ifndef GUMBO_TOKENIZER_STATES_H_
|
2
|
+
#define GUMBO_TOKENIZER_STATES_H_
|
3
|
+
|
4
|
+
// This contains the list of states used in the tokenizer. Although at first
|
18
5
|
// glance it seems like these could be kept internal to the tokenizer, several
|
19
6
|
// of the actions in the parser require that it reach into the tokenizer and
|
20
|
-
// reset the tokenizer state.
|
7
|
+
// reset the tokenizer state. For that to work, it needs to have the
|
21
8
|
// definitions of individual states available.
|
22
9
|
//
|
23
10
|
// This may also be useful for providing more detailed error messages for parse
|
24
11
|
// errors, as we can match up states and inputs in a table without having to
|
25
12
|
// clutter the tokenizer code with lots of precise error messages.
|
26
13
|
|
27
|
-
#ifndef GUMBO_TOKENIZER_STATES_H_
|
28
|
-
#define GUMBO_TOKENIZER_STATES_H_
|
29
|
-
|
30
14
|
// The ordering of this enum is also used to build the dispatch table for the
|
31
15
|
// tokenizer state machine, so if it is changed, be sure to update that too.
|
32
16
|
typedef enum {
|
17
|
+
// 12.2.5.1 Data state
|
18
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#data-state
|
33
19
|
GUMBO_LEX_DATA,
|
34
|
-
|
20
|
+
|
21
|
+
// 12.2.5.2 RCDATA state
|
22
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#rcdata-state
|
35
23
|
GUMBO_LEX_RCDATA,
|
36
|
-
|
24
|
+
|
25
|
+
// 12.2.5.3 RAWTEXT state
|
26
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#rawtext-state<Paste>
|
37
27
|
GUMBO_LEX_RAWTEXT,
|
38
|
-
|
28
|
+
|
29
|
+
// 12.2.5.4 Script data state
|
30
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#script-data-state
|
31
|
+
GUMBO_LEX_SCRIPT_DATA,
|
32
|
+
|
33
|
+
// 12.2.5.5 PLAINTEXT state
|
34
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#plaintext-state
|
39
35
|
GUMBO_LEX_PLAINTEXT,
|
36
|
+
|
37
|
+
// 12.2.5.6 Tag open state
|
38
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#tag-open-state
|
40
39
|
GUMBO_LEX_TAG_OPEN,
|
40
|
+
|
41
|
+
// 12.2.5.7 End tag open state
|
42
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#end-tag-open-state
|
41
43
|
GUMBO_LEX_END_TAG_OPEN,
|
44
|
+
|
45
|
+
// 12.2.5.8 Tag name state
|
46
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#tag-name-state
|
42
47
|
GUMBO_LEX_TAG_NAME,
|
48
|
+
|
49
|
+
// 12.2.5.9 RCDATA less-than sign state
|
50
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#rcdata-less-than-sign-state
|
43
51
|
GUMBO_LEX_RCDATA_LT,
|
52
|
+
|
53
|
+
// 12.2.5.10 RCDATA end tag open state
|
54
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#rcdata-end-tag-open-state
|
44
55
|
GUMBO_LEX_RCDATA_END_TAG_OPEN,
|
56
|
+
|
57
|
+
// 12.2.5.11 RCDATA end tag name state
|
58
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#rcdata-end-tag-name-state
|
45
59
|
GUMBO_LEX_RCDATA_END_TAG_NAME,
|
60
|
+
|
61
|
+
// 12.2.5.12 RAWTEXT less-than sign state
|
62
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#rawtext-less-than-sign-state
|
46
63
|
GUMBO_LEX_RAWTEXT_LT,
|
64
|
+
|
65
|
+
// 12.2.5.13 RAWTEXT end tag open state
|
66
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#rawtext-end-tag-open-state
|
47
67
|
GUMBO_LEX_RAWTEXT_END_TAG_OPEN,
|
68
|
+
|
69
|
+
// 12.2.5.14 RAWTEXT end tag name state
|
70
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#rawtext-end-tag-name-state
|
48
71
|
GUMBO_LEX_RAWTEXT_END_TAG_NAME,
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
72
|
+
|
73
|
+
// 12.2.5.15 Script data less-than sign state
|
74
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#script-data-less-than-sign-state
|
75
|
+
GUMBO_LEX_SCRIPT_DATA_LT,
|
76
|
+
|
77
|
+
// 12.2.5.16 Script data end tag open state
|
78
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#script-data-end-tag-open-state
|
79
|
+
GUMBO_LEX_SCRIPT_DATA_END_TAG_OPEN,
|
80
|
+
|
81
|
+
// 12.2.5.17 Script data end tag name state
|
82
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#script-data-end-tag-name-state
|
83
|
+
GUMBO_LEX_SCRIPT_DATA_END_TAG_NAME,
|
84
|
+
|
85
|
+
// 12.2.5.18 Script data escape start state
|
86
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#script-data-escape-start-state
|
87
|
+
GUMBO_LEX_SCRIPT_DATA_ESCAPED_START,
|
88
|
+
|
89
|
+
// 12.2.5.19 Script data escape start dash state
|
90
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#script-data-escape-start-dash-state
|
91
|
+
GUMBO_LEX_SCRIPT_DATA_ESCAPED_START_DASH,
|
92
|
+
|
93
|
+
// 12.2.5.20 Script data escaped state
|
94
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#script-data-escaped-state
|
95
|
+
GUMBO_LEX_SCRIPT_DATA_ESCAPED,
|
96
|
+
|
97
|
+
// 12.2.5.21 Script data escaped dash state
|
98
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#script-data-escaped-dash-state
|
99
|
+
GUMBO_LEX_SCRIPT_DATA_ESCAPED_DASH,
|
100
|
+
|
101
|
+
// 12.2.5.22 Script data escaped dash dash state
|
102
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#script-data-escaped-dash-dash-state
|
103
|
+
GUMBO_LEX_SCRIPT_DATA_ESCAPED_DASH_DASH,
|
104
|
+
|
105
|
+
// 12.2.5.23 Script data escaped less than sign state
|
106
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#script-data-escaped-less-than-sign-state
|
107
|
+
GUMBO_LEX_SCRIPT_DATA_ESCAPED_LT,
|
108
|
+
|
109
|
+
// 12.2.5.24 Script data escaped end tag open state
|
110
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#script-data-escaped-end-tag-open-state
|
111
|
+
GUMBO_LEX_SCRIPT_DATA_ESCAPED_END_TAG_OPEN,
|
112
|
+
|
113
|
+
// 12.2.5.25 Script data escaped end tag name state
|
114
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#script-data-escaped-end-tag-name-state
|
115
|
+
GUMBO_LEX_SCRIPT_DATA_ESCAPED_END_TAG_NAME,
|
116
|
+
|
117
|
+
// 12.2.5.26 Script data double escape start state
|
118
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#script-data-double-escape-start-state
|
119
|
+
GUMBO_LEX_SCRIPT_DATA_DOUBLE_ESCAPED_START,
|
120
|
+
|
121
|
+
// 12.2.5.27 Script data double escaped state
|
122
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#script-data-double-escaped-state
|
123
|
+
GUMBO_LEX_SCRIPT_DATA_DOUBLE_ESCAPED,
|
124
|
+
|
125
|
+
// 12.2.5.28 Script data double escaped dash state
|
126
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#script-data-double-escaped-dash-state
|
127
|
+
GUMBO_LEX_SCRIPT_DATA_DOUBLE_ESCAPED_DASH,
|
128
|
+
|
129
|
+
// 12.2.5.29 Script data double escaped dash dash state
|
130
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#script-data-double-escaped-dash-dash-state
|
131
|
+
GUMBO_LEX_SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH,
|
132
|
+
|
133
|
+
// 12.2.5.30 Script data double escaped less-than sign state
|
134
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#script-data-double-escaped-less-than-sign-state
|
135
|
+
GUMBO_LEX_SCRIPT_DATA_DOUBLE_ESCAPED_LT,
|
136
|
+
|
137
|
+
// 12.2.5.31 Script data double escape end state (XXX: spec bug with the
|
138
|
+
// name?)
|
139
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#script-data-double-escape-end-state
|
140
|
+
GUMBO_LEX_SCRIPT_DATA_DOUBLE_ESCAPED_END,
|
141
|
+
|
142
|
+
// 12.2.5.32 Before attribute name state
|
143
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#before-attribute-name-state
|
66
144
|
GUMBO_LEX_BEFORE_ATTR_NAME,
|
145
|
+
|
146
|
+
// 12.2.5.33 Attributet name state
|
147
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#attribute-name-state
|
67
148
|
GUMBO_LEX_ATTR_NAME,
|
149
|
+
|
150
|
+
// 12.2.5.34 After attribute name state
|
151
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#after-attribute-name-state
|
68
152
|
GUMBO_LEX_AFTER_ATTR_NAME,
|
153
|
+
|
154
|
+
// 12.2.5.35 Before attribute value state
|
155
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#before-attribute-value-state
|
69
156
|
GUMBO_LEX_BEFORE_ATTR_VALUE,
|
157
|
+
|
158
|
+
// 12.2.5.36 Attribute value (double-quoted) state
|
159
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#attribute-value-(double-quoted)-state
|
70
160
|
GUMBO_LEX_ATTR_VALUE_DOUBLE_QUOTED,
|
161
|
+
|
162
|
+
// 12.2.5.37 Attribute value (single-quoted) state
|
163
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#attribute-value-(single-quoted)-state
|
71
164
|
GUMBO_LEX_ATTR_VALUE_SINGLE_QUOTED,
|
165
|
+
|
166
|
+
// 12.2.5.38 Attribute value (unquoted) state
|
167
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#attribute-value-(unquoted)-state
|
72
168
|
GUMBO_LEX_ATTR_VALUE_UNQUOTED,
|
73
|
-
|
169
|
+
|
170
|
+
// 12.2.5.39 After attribute value (quoted) state
|
171
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#after-attribute-value-(quoted)-state
|
74
172
|
GUMBO_LEX_AFTER_ATTR_VALUE_QUOTED,
|
173
|
+
|
174
|
+
// 12.2.5.40 Self-closing start tag state
|
175
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#self-closing-start-tag-state
|
75
176
|
GUMBO_LEX_SELF_CLOSING_START_TAG,
|
177
|
+
|
178
|
+
// 12.2.5.41 Bogus comment state
|
179
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#bogus-comment-state
|
76
180
|
GUMBO_LEX_BOGUS_COMMENT,
|
77
|
-
|
181
|
+
|
182
|
+
// 12.2.5.42 Markup declaration open state
|
183
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#markup-declaration-open-state
|
184
|
+
GUMBO_LEX_MARKUP_DECLARATION_OPEN,
|
185
|
+
|
186
|
+
// 12.2.5.43 Comment start state
|
187
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#comment-start-state
|
78
188
|
GUMBO_LEX_COMMENT_START,
|
189
|
+
|
190
|
+
// 12.2.5.44 Comment start dash state
|
191
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#comment-start-dash-state
|
79
192
|
GUMBO_LEX_COMMENT_START_DASH,
|
193
|
+
|
194
|
+
// 12.2.5.45 Comment state
|
195
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#comment-state
|
80
196
|
GUMBO_LEX_COMMENT,
|
197
|
+
|
198
|
+
// 12.2.5.46 Comment less-than sign state
|
199
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#comment-less-than-sign-state
|
200
|
+
GUMBO_LEX_COMMENT_LT,
|
201
|
+
|
202
|
+
// 12.2.5.47 Comment less-than sign bang state
|
203
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#comment-less-than-sign-bang-state
|
204
|
+
GUMBO_LEX_COMMENT_LT_BANG,
|
205
|
+
|
206
|
+
// 12.2.5.48 Comment less-than sign bang dash state
|
207
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#comment-less-than-sign-bang-dash-state
|
208
|
+
GUMBO_LEX_COMMENT_LT_BANG_DASH,
|
209
|
+
|
210
|
+
// 12.2.5.49 Comment less-than sign bang dash dash state
|
211
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#comment-less-than-sign-bang-dash-dash-state
|
212
|
+
GUMBO_LEX_COMMENT_LT_BANG_DASH_DASH,
|
213
|
+
|
214
|
+
// 12.2.5.50 Comment end dash state
|
215
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#comment-end-dash-state
|
81
216
|
GUMBO_LEX_COMMENT_END_DASH,
|
217
|
+
|
218
|
+
// 12.2.5.51 Comment end state
|
219
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#comment-end-state
|
82
220
|
GUMBO_LEX_COMMENT_END,
|
221
|
+
|
222
|
+
// 12.2.5.52 Comment end bang state
|
223
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#comment-end-bang-state
|
83
224
|
GUMBO_LEX_COMMENT_END_BANG,
|
225
|
+
|
226
|
+
// 12.2.5.53 DOCTYPE state
|
227
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#doctype-state
|
84
228
|
GUMBO_LEX_DOCTYPE,
|
229
|
+
|
230
|
+
// 12.2.5.54 Before DOCTYPE name state
|
231
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#before-doctype-name-state
|
85
232
|
GUMBO_LEX_BEFORE_DOCTYPE_NAME,
|
233
|
+
|
234
|
+
// 12.2.5.55 DOCTYPE name state
|
235
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#doctype-name-state
|
86
236
|
GUMBO_LEX_DOCTYPE_NAME,
|
237
|
+
|
238
|
+
// 12.2.5.56 After DOCTYPE name state
|
239
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#after-doctype-name-state
|
87
240
|
GUMBO_LEX_AFTER_DOCTYPE_NAME,
|
241
|
+
|
242
|
+
// 12.2.5.57 After DOCTYPE public keyword state
|
243
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#after-doctype-public-keyword-state
|
88
244
|
GUMBO_LEX_AFTER_DOCTYPE_PUBLIC_KEYWORD,
|
245
|
+
|
246
|
+
// 12.2.5.58 Before DOCTYPE public identifier state
|
247
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#before-doctype-public-identifier-state
|
89
248
|
GUMBO_LEX_BEFORE_DOCTYPE_PUBLIC_ID,
|
249
|
+
|
250
|
+
// 12.2.5.59 DOCTYPE public identifier (double-quoted) state
|
251
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#doctype-public-identifier-(double-quoted)-state
|
90
252
|
GUMBO_LEX_DOCTYPE_PUBLIC_ID_DOUBLE_QUOTED,
|
253
|
+
|
254
|
+
// 12.2.5.60 DOCTYPE public identifier (single-quoted) state
|
255
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#doctype-public-identifier-(single-quoted)-state
|
91
256
|
GUMBO_LEX_DOCTYPE_PUBLIC_ID_SINGLE_QUOTED,
|
257
|
+
|
258
|
+
// 12.2.5.61 After DOCTYPE public identifier state
|
259
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#after-doctype-public-identifier-state
|
92
260
|
GUMBO_LEX_AFTER_DOCTYPE_PUBLIC_ID,
|
261
|
+
|
262
|
+
// 12.2.5.62 Between DOCTYPE public and system identifiers state
|
263
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#between-doctype-public-and-system-identifiers-state
|
93
264
|
GUMBO_LEX_BETWEEN_DOCTYPE_PUBLIC_SYSTEM_ID,
|
265
|
+
|
266
|
+
// 12.2.5.63 After DOCTYPE system keyword state
|
267
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#after-doctype-system-keyword-state
|
94
268
|
GUMBO_LEX_AFTER_DOCTYPE_SYSTEM_KEYWORD,
|
269
|
+
|
270
|
+
// 12.2.5.64 Before DOCTYPE system identifier state
|
271
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#before-doctype-system-identifier-state
|
95
272
|
GUMBO_LEX_BEFORE_DOCTYPE_SYSTEM_ID,
|
273
|
+
|
274
|
+
// 12.2.5.65 DOCTYPE system identifier (double-quoted) state
|
275
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#doctype-system-identifier-(double-quoted)-state
|
96
276
|
GUMBO_LEX_DOCTYPE_SYSTEM_ID_DOUBLE_QUOTED,
|
277
|
+
|
278
|
+
// 12.2.5.66 DOCTYPE system identifier (single-quoted) state
|
279
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#doctype-system-identifier-(single-quoted)-state
|
97
280
|
GUMBO_LEX_DOCTYPE_SYSTEM_ID_SINGLE_QUOTED,
|
281
|
+
|
282
|
+
// 12.2.5.67 After DOCTYPE system identifier state
|
283
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#after-doctype-system-identifier-state
|
98
284
|
GUMBO_LEX_AFTER_DOCTYPE_SYSTEM_ID,
|
285
|
+
|
286
|
+
// 12.2.5.68 Bogus DOCTYPE state
|
287
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#bogus-doctype-state
|
99
288
|
GUMBO_LEX_BOGUS_DOCTYPE,
|
100
|
-
|
289
|
+
|
290
|
+
// 12.2.5.69 CDATA section state
|
291
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#cdata-section-state
|
292
|
+
GUMBO_LEX_CDATA_SECTION,
|
293
|
+
|
294
|
+
// 12.2.5.70 CDATA section bracket state
|
295
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#cdata-section-bracket-state
|
296
|
+
GUMBO_LEX_CDATA_SECTION_BRACKET,
|
297
|
+
|
298
|
+
// 12.2.5.71 CDATA section end state
|
299
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#cdata-section-end-state
|
300
|
+
GUMBO_LEX_CDATA_SECTION_END,
|
301
|
+
|
302
|
+
// 12.2.5.72 Character reference state
|
303
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#character-reference-state
|
304
|
+
GUMBO_LEX_CHARACTER_REFERENCE,
|
305
|
+
|
306
|
+
// 12.2.5.73 Named character reference state
|
307
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#named-character-reference-state
|
308
|
+
GUMBO_LEX_NAMED_CHARACTER_REFERENCE,
|
309
|
+
|
310
|
+
// 12.2.5.74 Ambiguous ampersand state
|
311
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#ambiguous-ampersand-state
|
312
|
+
GUMBO_LEX_AMBIGUOUS_AMPERSAND,
|
313
|
+
|
314
|
+
// 12.2.5.75 Numeric character reference state
|
315
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#numeric-character-reference-state
|
316
|
+
GUMBO_LEX_NUMERIC_CHARACTER_REFERENCE,
|
317
|
+
|
318
|
+
// 12.2.5.76 Hexadecimal character reference start state
|
319
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#hexadecimal-character-reference-start-state
|
320
|
+
GUMBO_LEX_HEXADECIMAL_CHARACTER_REFERENCE_START,
|
321
|
+
|
322
|
+
// 12.2.5.77 Decimal character reference start state
|
323
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#decimal-character-reference-start-state
|
324
|
+
GUMBO_LEX_DECIMAL_CHARACTER_REFERENCE_START,
|
325
|
+
|
326
|
+
// 12.2.5.78 Hexadecimal character reference state
|
327
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#hexadecimal-character-reference-state
|
328
|
+
GUMBO_LEX_HEXADECIMAL_CHARACTER_REFERENCE,
|
329
|
+
|
330
|
+
// 12.2.5.79 Decimal character reference state
|
331
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#decimal-character-reference-state
|
332
|
+
GUMBO_LEX_DECIMAL_CHARACTER_REFERENCE,
|
333
|
+
|
334
|
+
// 12.2.5.80 Numeric character reference end state
|
335
|
+
// https://html.spec.whatwg.org/multipage/parsing.html#numeric-character-reference-end-state
|
336
|
+
GUMBO_LEX_NUMERIC_CHARACTER_REFERENCE_END
|
101
337
|
} GumboTokenizerEnum;
|
102
338
|
|
103
|
-
#endif
|
339
|
+
#endif // GUMBO_TOKENIZER_STATES_H_
|