nokogumbo 1.3.0 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/README.md +8 -2
- data/ext/nokogumboc/extconf.rb +18 -6
- data/ext/nokogumboc/nokogumbo.c +102 -42
- data/gumbo-parser/src/attribute.c +1 -1
- data/gumbo-parser/src/char_ref.c +37 -67
- data/gumbo-parser/src/char_ref.h +3 -4
- data/gumbo-parser/src/char_ref.rl +6 -1
- data/gumbo-parser/src/error.c +51 -51
- data/gumbo-parser/src/error.h +7 -9
- data/gumbo-parser/src/gumbo.h +45 -181
- data/gumbo-parser/src/parser.c +1439 -1172
- data/gumbo-parser/src/string_buffer.c +14 -10
- data/gumbo-parser/src/string_buffer.h +9 -6
- data/gumbo-parser/src/string_piece.c +5 -6
- data/gumbo-parser/src/string_piece.h +2 -3
- data/gumbo-parser/src/tag.c +36 -166
- data/gumbo-parser/src/tag.in +150 -0
- data/gumbo-parser/src/tag_enum.h +153 -0
- data/gumbo-parser/src/tag_gperf.h +105 -0
- data/gumbo-parser/src/tag_sizes.h +4 -0
- data/gumbo-parser/src/tag_strings.h +153 -0
- data/gumbo-parser/src/token_type.h +1 -0
- data/gumbo-parser/src/tokenizer.c +278 -361
- data/gumbo-parser/src/tokenizer.h +2 -2
- data/gumbo-parser/src/utf8.c +53 -52
- data/gumbo-parser/src/utf8.h +1 -2
- data/gumbo-parser/src/util.c +1 -1
- data/gumbo-parser/src/util.h +0 -2
- data/gumbo-parser/src/vector.c +17 -17
- data/gumbo-parser/src/vector.h +6 -8
- data/gumbo-parser/visualc/include/strings.h +2 -1
- data/lib/nokogumbo.rb +8 -8
- data/test-nokogumbo.rb +190 -0
- metadata +19 -17
data/gumbo-parser/src/parser.c
CHANGED
@@ -32,48 +32,55 @@
|
|
32
32
|
#include "util.h"
|
33
33
|
#include "vector.h"
|
34
34
|
|
35
|
-
|
36
35
|
#define AVOID_UNUSED_VARIABLE_WARNING(i) (void)(i)
|
37
36
|
|
38
|
-
#define GUMBO_STRING(literal)
|
39
|
-
|
37
|
+
#define GUMBO_STRING(literal) \
|
38
|
+
{ literal, sizeof(literal) - 1 }
|
39
|
+
#define TERMINATOR \
|
40
|
+
{ "", 0 }
|
40
41
|
|
41
|
-
|
42
|
-
|
43
|
-
|
42
|
+
typedef char gumbo_tagset[GUMBO_TAG_LAST];
|
43
|
+
#define TAG(tag) [GUMBO_TAG_##tag] = (1 << GUMBO_NAMESPACE_HTML)
|
44
|
+
#define TAG_SVG(tag) [GUMBO_TAG_##tag] = (1 << GUMBO_NAMESPACE_SVG)
|
45
|
+
#define TAG_MATHML(tag) [GUMBO_TAG_##tag] = (1 << GUMBO_NAMESPACE_MATHML)
|
44
46
|
|
45
|
-
|
46
|
-
|
47
|
-
}
|
47
|
+
#define TAGSET_INCLUDES(tagset, namespace, tag) \
|
48
|
+
(tag < GUMBO_TAG_LAST && tagset[(int) tag] == (1 << (int) namespace))
|
48
49
|
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
50
|
+
// selected forward declarations as it is getting hard to find
|
51
|
+
// an appropriate order
|
52
|
+
static bool node_html_tag_is(const GumboNode*, GumboTag);
|
53
|
+
static GumboInsertionMode get_current_template_insertion_mode(
|
54
|
+
const GumboParser*);
|
55
|
+
static bool handle_in_template(GumboParser*, GumboToken*);
|
56
|
+
static void destroy_node(GumboParser*, GumboNode*);
|
57
|
+
|
58
|
+
static void* malloc_wrapper(void* unused, size_t size) { return malloc(size); }
|
59
|
+
|
60
|
+
static void free_wrapper(void* unused, void* ptr) { free(ptr); }
|
61
|
+
|
62
|
+
const GumboOptions kGumboDefaultOptions = {&malloc_wrapper, &free_wrapper, NULL,
|
63
|
+
8, false, -1, GUMBO_TAG_LAST, GUMBO_NAMESPACE_HTML};
|
57
64
|
|
58
65
|
static const GumboStringPiece kDoctypeHtml = GUMBO_STRING("html");
|
59
|
-
static const GumboStringPiece kPublicIdHtml4_0 =
|
60
|
-
"-//W3C//DTD HTML 4.0//EN");
|
61
|
-
static const GumboStringPiece kPublicIdHtml4_01 =
|
62
|
-
"-//W3C//DTD HTML 4.01//EN");
|
63
|
-
static const GumboStringPiece kPublicIdXhtml1_0 =
|
64
|
-
"-//W3C//DTD XHTML 1.0 Strict//EN");
|
65
|
-
static const GumboStringPiece kPublicIdXhtml1_1 =
|
66
|
-
"-//W3C//DTD XHTML 1.1//EN");
|
67
|
-
static const GumboStringPiece kSystemIdRecHtml4_0 =
|
68
|
-
"http://www.w3.org/TR/REC-html40/strict.dtd");
|
69
|
-
static const GumboStringPiece kSystemIdHtml4 =
|
70
|
-
"http://www.w3.org/TR/html4/strict.dtd");
|
71
|
-
static const GumboStringPiece kSystemIdXhtmlStrict1_1 =
|
72
|
-
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd");
|
73
|
-
static const GumboStringPiece kSystemIdXhtml1_1 =
|
74
|
-
"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd");
|
75
|
-
static const GumboStringPiece kSystemIdLegacyCompat =
|
76
|
-
"about:legacy-compat");
|
66
|
+
static const GumboStringPiece kPublicIdHtml4_0 =
|
67
|
+
GUMBO_STRING("-//W3C//DTD HTML 4.0//EN");
|
68
|
+
static const GumboStringPiece kPublicIdHtml4_01 =
|
69
|
+
GUMBO_STRING("-//W3C//DTD HTML 4.01//EN");
|
70
|
+
static const GumboStringPiece kPublicIdXhtml1_0 =
|
71
|
+
GUMBO_STRING("-//W3C//DTD XHTML 1.0 Strict//EN");
|
72
|
+
static const GumboStringPiece kPublicIdXhtml1_1 =
|
73
|
+
GUMBO_STRING("-//W3C//DTD XHTML 1.1//EN");
|
74
|
+
static const GumboStringPiece kSystemIdRecHtml4_0 =
|
75
|
+
GUMBO_STRING("http://www.w3.org/TR/REC-html40/strict.dtd");
|
76
|
+
static const GumboStringPiece kSystemIdHtml4 =
|
77
|
+
GUMBO_STRING("http://www.w3.org/TR/html4/strict.dtd");
|
78
|
+
static const GumboStringPiece kSystemIdXhtmlStrict1_1 =
|
79
|
+
GUMBO_STRING("http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd");
|
80
|
+
static const GumboStringPiece kSystemIdXhtml1_1 =
|
81
|
+
GUMBO_STRING("http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd");
|
82
|
+
static const GumboStringPiece kSystemIdLegacyCompat =
|
83
|
+
GUMBO_STRING("about:legacy-compat");
|
77
84
|
|
78
85
|
// The doctype arrays have an explicit terminator because we want to pass them
|
79
86
|
// to a helper function, and passing them as a pointer discards sizeof
|
@@ -81,96 +88,86 @@ static const GumboStringPiece kSystemIdLegacyCompat = GUMBO_STRING(
|
|
81
88
|
// over them use sizeof directly instead of a terminator.
|
82
89
|
|
83
90
|
static const GumboStringPiece kQuirksModePublicIdPrefixes[] = {
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
};
|
91
|
+
GUMBO_STRING("+//Silmaril//dtd html Pro v0r11 19970101//"),
|
92
|
+
GUMBO_STRING("-//AdvaSoft Ltd//DTD HTML 3.0 asWedit + extensions//"),
|
93
|
+
GUMBO_STRING("-//AS//DTD HTML 3.0 asWedit + extensions//"),
|
94
|
+
GUMBO_STRING("-//IETF//DTD HTML 2.0 Level 1//"),
|
95
|
+
GUMBO_STRING("-//IETF//DTD HTML 2.0 Level 2//"),
|
96
|
+
GUMBO_STRING("-//IETF//DTD HTML 2.0 Strict Level 1//"),
|
97
|
+
GUMBO_STRING("-//IETF//DTD HTML 2.0 Strict Level 2//"),
|
98
|
+
GUMBO_STRING("-//IETF//DTD HTML 2.0 Strict//"),
|
99
|
+
GUMBO_STRING("-//IETF//DTD HTML 2.0//"),
|
100
|
+
GUMBO_STRING("-//IETF//DTD HTML 2.1E//"),
|
101
|
+
GUMBO_STRING("-//IETF//DTD HTML 3.0//"),
|
102
|
+
GUMBO_STRING("-//IETF//DTD HTML 3.2 Final//"),
|
103
|
+
GUMBO_STRING("-//IETF//DTD HTML 3.2//"),
|
104
|
+
GUMBO_STRING("-//IETF//DTD HTML 3//"),
|
105
|
+
GUMBO_STRING("-//IETF//DTD HTML Level 0//"),
|
106
|
+
GUMBO_STRING("-//IETF//DTD HTML Level 1//"),
|
107
|
+
GUMBO_STRING("-//IETF//DTD HTML Level 2//"),
|
108
|
+
GUMBO_STRING("-//IETF//DTD HTML Level 3//"),
|
109
|
+
GUMBO_STRING("-//IETF//DTD HTML Strict Level 0//"),
|
110
|
+
GUMBO_STRING("-//IETF//DTD HTML Strict Level 1//"),
|
111
|
+
GUMBO_STRING("-//IETF//DTD HTML Strict Level 2//"),
|
112
|
+
GUMBO_STRING("-//IETF//DTD HTML Strict Level 3//"),
|
113
|
+
GUMBO_STRING("-//IETF//DTD HTML Strict//"),
|
114
|
+
GUMBO_STRING("-//IETF//DTD HTML//"),
|
115
|
+
GUMBO_STRING("-//Metrius//DTD Metrius Presentational//"),
|
116
|
+
GUMBO_STRING("-//Microsoft//DTD Internet Explorer 2.0 HTML Strict//"),
|
117
|
+
GUMBO_STRING("-//Microsoft//DTD Internet Explorer 2.0 HTML//"),
|
118
|
+
GUMBO_STRING("-//Microsoft//DTD Internet Explorer 2.0 Tables//"),
|
119
|
+
GUMBO_STRING("-//Microsoft//DTD Internet Explorer 3.0 HTML Strict//"),
|
120
|
+
GUMBO_STRING("-//Microsoft//DTD Internet Explorer 3.0 HTML//"),
|
121
|
+
GUMBO_STRING("-//Microsoft//DTD Internet Explorer 3.0 Tables//"),
|
122
|
+
GUMBO_STRING("-//Netscape Comm. Corp.//DTD HTML//"),
|
123
|
+
GUMBO_STRING("-//Netscape Comm. Corp.//DTD Strict HTML//"),
|
124
|
+
GUMBO_STRING("-//O'Reilly and Associates//DTD HTML 2.0//"),
|
125
|
+
GUMBO_STRING("-//O'Reilly and Associates//DTD HTML Extended 1.0//"),
|
126
|
+
GUMBO_STRING("-//O'Reilly and Associates//DTD HTML Extended Relaxed 1.0//"),
|
127
|
+
GUMBO_STRING(
|
128
|
+
"-//SoftQuad Software//DTD HoTMetaL PRO 6.0::19990601::)"
|
129
|
+
"extensions to HTML 4.0//"),
|
130
|
+
GUMBO_STRING(
|
131
|
+
"-//SoftQuad//DTD HoTMetaL PRO 4.0::19971010::"
|
132
|
+
"extensions to HTML 4.0//"),
|
133
|
+
GUMBO_STRING("-//Spyglass//DTD HTML 2.0 Extended//"),
|
134
|
+
GUMBO_STRING("-//SQ//DTD HTML 2.0 HoTMetaL + extensions//"),
|
135
|
+
GUMBO_STRING("-//Sun Microsystems Corp.//DTD HotJava HTML//"),
|
136
|
+
GUMBO_STRING("-//Sun Microsystems Corp.//DTD HotJava Strict HTML//"),
|
137
|
+
GUMBO_STRING("-//W3C//DTD HTML 3 1995-03-24//"),
|
138
|
+
GUMBO_STRING("-//W3C//DTD HTML 3.2 Draft//"),
|
139
|
+
GUMBO_STRING("-//W3C//DTD HTML 3.2 Final//"),
|
140
|
+
GUMBO_STRING("-//W3C//DTD HTML 3.2//"),
|
141
|
+
GUMBO_STRING("-//W3C//DTD HTML 3.2S Draft//"),
|
142
|
+
GUMBO_STRING("-//W3C//DTD HTML 4.0 Frameset//"),
|
143
|
+
GUMBO_STRING("-//W3C//DTD HTML 4.0 Transitional//"),
|
144
|
+
GUMBO_STRING("-//W3C//DTD HTML Experimental 19960712//"),
|
145
|
+
GUMBO_STRING("-//W3C//DTD HTML Experimental 970421//"),
|
146
|
+
GUMBO_STRING("-//W3C//DTD W3 HTML//"),
|
147
|
+
GUMBO_STRING("-//W3O//DTD W3 HTML 3.0//"),
|
148
|
+
GUMBO_STRING("-//WebTechs//DTD Mozilla HTML 2.0//"),
|
149
|
+
GUMBO_STRING("-//WebTechs//DTD Mozilla HTML//"), TERMINATOR};
|
143
150
|
|
144
151
|
static const GumboStringPiece kQuirksModePublicIdExactMatches[] = {
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
TERMINATOR
|
149
|
-
};
|
152
|
+
GUMBO_STRING("-//W3O//DTD W3 HTML Strict 3.0//EN//"),
|
153
|
+
GUMBO_STRING("-/W3C/DTD HTML 4.0 Transitional/EN"), GUMBO_STRING("HTML"),
|
154
|
+
TERMINATOR};
|
150
155
|
|
151
156
|
static const GumboStringPiece kQuirksModeSystemIdExactMatches[] = {
|
152
|
-
|
153
|
-
|
154
|
-
};
|
157
|
+
GUMBO_STRING("http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd"),
|
158
|
+
TERMINATOR};
|
155
159
|
|
156
160
|
static const GumboStringPiece kLimitedQuirksPublicIdPrefixes[] = {
|
157
|
-
|
158
|
-
|
159
|
-
TERMINATOR
|
160
|
-
};
|
161
|
+
GUMBO_STRING("-//W3C//DTD XHTML 1.0 Frameset//"),
|
162
|
+
GUMBO_STRING("-//W3C//DTD XHTML 1.0 Transitional//"), TERMINATOR};
|
161
163
|
|
162
|
-
static const GumboStringPiece kLimitedQuirksRequiresSystemIdPublicIdPrefixes[] =
|
163
|
-
|
164
|
-
|
165
|
-
TERMINATOR
|
166
|
-
};
|
164
|
+
static const GumboStringPiece kLimitedQuirksRequiresSystemIdPublicIdPrefixes[] =
|
165
|
+
{GUMBO_STRING("-//W3C//DTD HTML 4.01 Frameset//"),
|
166
|
+
GUMBO_STRING("-//W3C//DTD HTML 4.01 Transitional//"), TERMINATOR};
|
167
167
|
|
168
168
|
// Indexed by GumboNamespaceEnum; keep in sync with that.
|
169
|
-
static const char* kLegalXmlns[] = {
|
170
|
-
|
171
|
-
"http://www.w3.org/2000/svg",
|
172
|
-
"http://www.w3.org/1998/Math/MathML"
|
173
|
-
};
|
169
|
+
static const char* kLegalXmlns[] = {"http://www.w3.org/1999/xhtml",
|
170
|
+
"http://www.w3.org/2000/svg", "http://www.w3.org/1998/Math/MathML"};
|
174
171
|
|
175
172
|
typedef struct _ReplacementEntry {
|
176
173
|
const GumboStringPiece from;
|
@@ -178,112 +175,112 @@ typedef struct _ReplacementEntry {
|
|
178
175
|
} ReplacementEntry;
|
179
176
|
|
180
177
|
#define REPLACEMENT_ENTRY(from, to) \
|
181
|
-
|
178
|
+
{ GUMBO_STRING(from), GUMBO_STRING(to) }
|
182
179
|
|
183
180
|
// Static data for SVG attribute replacements.
|
184
|
-
//
|
181
|
+
// https://html.spec.whatwg.org/multipage/syntax.html#creating-and-inserting-nodes
|
185
182
|
static const ReplacementEntry kSvgAttributeReplacements[] = {
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
183
|
+
REPLACEMENT_ENTRY("attributename", "attributeName"),
|
184
|
+
REPLACEMENT_ENTRY("attributetype", "attributeType"),
|
185
|
+
REPLACEMENT_ENTRY("basefrequency", "baseFrequency"),
|
186
|
+
REPLACEMENT_ENTRY("baseprofile", "baseProfile"),
|
187
|
+
REPLACEMENT_ENTRY("calcmode", "calcMode"),
|
188
|
+
REPLACEMENT_ENTRY("clippathunits", "clipPathUnits"),
|
189
|
+
// REPLACEMENT_ENTRY("contentscripttype", "contentScriptType"),
|
190
|
+
// REPLACEMENT_ENTRY("contentstyletype", "contentStyleType"),
|
191
|
+
REPLACEMENT_ENTRY("diffuseconstant", "diffuseConstant"),
|
192
|
+
REPLACEMENT_ENTRY("edgemode", "edgeMode"),
|
193
|
+
// REPLACEMENT_ENTRY("externalresourcesrequired",
|
194
|
+
// "externalResourcesRequired"),
|
195
|
+
// REPLACEMENT_ENTRY("filterres", "filterRes"),
|
196
|
+
REPLACEMENT_ENTRY("filterunits", "filterUnits"),
|
197
|
+
REPLACEMENT_ENTRY("glyphref", "glyphRef"),
|
198
|
+
REPLACEMENT_ENTRY("gradienttransform", "gradientTransform"),
|
199
|
+
REPLACEMENT_ENTRY("gradientunits", "gradientUnits"),
|
200
|
+
REPLACEMENT_ENTRY("kernelmatrix", "kernelMatrix"),
|
201
|
+
REPLACEMENT_ENTRY("kernelunitlength", "kernelUnitLength"),
|
202
|
+
REPLACEMENT_ENTRY("keypoints", "keyPoints"),
|
203
|
+
REPLACEMENT_ENTRY("keysplines", "keySplines"),
|
204
|
+
REPLACEMENT_ENTRY("keytimes", "keyTimes"),
|
205
|
+
REPLACEMENT_ENTRY("lengthadjust", "lengthAdjust"),
|
206
|
+
REPLACEMENT_ENTRY("limitingconeangle", "limitingConeAngle"),
|
207
|
+
REPLACEMENT_ENTRY("markerheight", "markerHeight"),
|
208
|
+
REPLACEMENT_ENTRY("markerunits", "markerUnits"),
|
209
|
+
REPLACEMENT_ENTRY("markerwidth", "markerWidth"),
|
210
|
+
REPLACEMENT_ENTRY("maskcontentunits", "maskContentUnits"),
|
211
|
+
REPLACEMENT_ENTRY("maskunits", "maskUnits"),
|
212
|
+
REPLACEMENT_ENTRY("numoctaves", "numOctaves"),
|
213
|
+
REPLACEMENT_ENTRY("pathlength", "pathLength"),
|
214
|
+
REPLACEMENT_ENTRY("patterncontentunits", "patternContentUnits"),
|
215
|
+
REPLACEMENT_ENTRY("patterntransform", "patternTransform"),
|
216
|
+
REPLACEMENT_ENTRY("patternunits", "patternUnits"),
|
217
|
+
REPLACEMENT_ENTRY("pointsatx", "pointsAtX"),
|
218
|
+
REPLACEMENT_ENTRY("pointsaty", "pointsAtY"),
|
219
|
+
REPLACEMENT_ENTRY("pointsatz", "pointsAtZ"),
|
220
|
+
REPLACEMENT_ENTRY("preservealpha", "preserveAlpha"),
|
221
|
+
REPLACEMENT_ENTRY("preserveaspectratio", "preserveAspectRatio"),
|
222
|
+
REPLACEMENT_ENTRY("primitiveunits", "primitiveUnits"),
|
223
|
+
REPLACEMENT_ENTRY("refx", "refX"), REPLACEMENT_ENTRY("refy", "refY"),
|
224
|
+
REPLACEMENT_ENTRY("repeatcount", "repeatCount"),
|
225
|
+
REPLACEMENT_ENTRY("repeatdur", "repeatDur"),
|
226
|
+
REPLACEMENT_ENTRY("requiredextensions", "requiredExtensions"),
|
227
|
+
REPLACEMENT_ENTRY("requiredfeatures", "requiredFeatures"),
|
228
|
+
REPLACEMENT_ENTRY("specularconstant", "specularConstant"),
|
229
|
+
REPLACEMENT_ENTRY("specularexponent", "specularExponent"),
|
230
|
+
REPLACEMENT_ENTRY("spreadmethod", "spreadMethod"),
|
231
|
+
REPLACEMENT_ENTRY("startoffset", "startOffset"),
|
232
|
+
REPLACEMENT_ENTRY("stddeviation", "stdDeviation"),
|
233
|
+
REPLACEMENT_ENTRY("stitchtiles", "stitchTiles"),
|
234
|
+
REPLACEMENT_ENTRY("surfacescale", "surfaceScale"),
|
235
|
+
REPLACEMENT_ENTRY("systemlanguage", "systemLanguage"),
|
236
|
+
REPLACEMENT_ENTRY("tablevalues", "tableValues"),
|
237
|
+
REPLACEMENT_ENTRY("targetx", "targetX"),
|
238
|
+
REPLACEMENT_ENTRY("targety", "targetY"),
|
239
|
+
REPLACEMENT_ENTRY("textlength", "textLength"),
|
240
|
+
REPLACEMENT_ENTRY("viewbox", "viewBox"),
|
241
|
+
REPLACEMENT_ENTRY("viewtarget", "viewTarget"),
|
242
|
+
REPLACEMENT_ENTRY("xchannelselector", "xChannelSelector"),
|
243
|
+
REPLACEMENT_ENTRY("ychannelselector", "yChannelSelector"),
|
244
|
+
REPLACEMENT_ENTRY("zoomandpan", "zoomAndPan"),
|
248
245
|
};
|
249
246
|
|
250
247
|
static const ReplacementEntry kSvgTagReplacements[] = {
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
248
|
+
REPLACEMENT_ENTRY("altglyph", "altGlyph"),
|
249
|
+
REPLACEMENT_ENTRY("altglyphdef", "altGlyphDef"),
|
250
|
+
REPLACEMENT_ENTRY("altglyphitem", "altGlyphItem"),
|
251
|
+
REPLACEMENT_ENTRY("animatecolor", "animateColor"),
|
252
|
+
REPLACEMENT_ENTRY("animatemotion", "animateMotion"),
|
253
|
+
REPLACEMENT_ENTRY("animatetransform", "animateTransform"),
|
254
|
+
REPLACEMENT_ENTRY("clippath", "clipPath"),
|
255
|
+
REPLACEMENT_ENTRY("feblend", "feBlend"),
|
256
|
+
REPLACEMENT_ENTRY("fecolormatrix", "feColorMatrix"),
|
257
|
+
REPLACEMENT_ENTRY("fecomponenttransfer", "feComponentTransfer"),
|
258
|
+
REPLACEMENT_ENTRY("fecomposite", "feComposite"),
|
259
|
+
REPLACEMENT_ENTRY("feconvolvematrix", "feConvolveMatrix"),
|
260
|
+
REPLACEMENT_ENTRY("fediffuselighting", "feDiffuseLighting"),
|
261
|
+
REPLACEMENT_ENTRY("fedisplacementmap", "feDisplacementMap"),
|
262
|
+
REPLACEMENT_ENTRY("fedistantlight", "feDistantLight"),
|
263
|
+
REPLACEMENT_ENTRY("feflood", "feFlood"),
|
264
|
+
REPLACEMENT_ENTRY("fefunca", "feFuncA"),
|
265
|
+
REPLACEMENT_ENTRY("fefuncb", "feFuncB"),
|
266
|
+
REPLACEMENT_ENTRY("fefuncg", "feFuncG"),
|
267
|
+
REPLACEMENT_ENTRY("fefuncr", "feFuncR"),
|
268
|
+
REPLACEMENT_ENTRY("fegaussianblur", "feGaussianBlur"),
|
269
|
+
REPLACEMENT_ENTRY("feimage", "feImage"),
|
270
|
+
REPLACEMENT_ENTRY("femerge", "feMerge"),
|
271
|
+
REPLACEMENT_ENTRY("femergenode", "feMergeNode"),
|
272
|
+
REPLACEMENT_ENTRY("femorphology", "feMorphology"),
|
273
|
+
REPLACEMENT_ENTRY("feoffset", "feOffset"),
|
274
|
+
REPLACEMENT_ENTRY("fepointlight", "fePointLight"),
|
275
|
+
REPLACEMENT_ENTRY("fespecularlighting", "feSpecularLighting"),
|
276
|
+
REPLACEMENT_ENTRY("fespotlight", "feSpotLight"),
|
277
|
+
REPLACEMENT_ENTRY("fetile", "feTile"),
|
278
|
+
REPLACEMENT_ENTRY("feturbulence", "feTurbulence"),
|
279
|
+
REPLACEMENT_ENTRY("foreignobject", "foreignObject"),
|
280
|
+
REPLACEMENT_ENTRY("glyphref", "glyphRef"),
|
281
|
+
REPLACEMENT_ENTRY("lineargradient", "linearGradient"),
|
282
|
+
REPLACEMENT_ENTRY("radialgradient", "radialGradient"),
|
283
|
+
REPLACEMENT_ENTRY("textpath", "textPath"),
|
287
284
|
};
|
288
285
|
|
289
286
|
typedef struct _NamespacedAttributeReplacement {
|
@@ -293,18 +290,18 @@ typedef struct _NamespacedAttributeReplacement {
|
|
293
290
|
} NamespacedAttributeReplacement;
|
294
291
|
|
295
292
|
static const NamespacedAttributeReplacement kForeignAttributeReplacements[] = {
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
|
293
|
+
{"xlink:actuate", "actuate", GUMBO_ATTR_NAMESPACE_XLINK},
|
294
|
+
{"xlink:actuate", "actuate", GUMBO_ATTR_NAMESPACE_XLINK},
|
295
|
+
{"xlink:href", "href", GUMBO_ATTR_NAMESPACE_XLINK},
|
296
|
+
{"xlink:role", "role", GUMBO_ATTR_NAMESPACE_XLINK},
|
297
|
+
{"xlink:show", "show", GUMBO_ATTR_NAMESPACE_XLINK},
|
298
|
+
{"xlink:title", "title", GUMBO_ATTR_NAMESPACE_XLINK},
|
299
|
+
{"xlink:type", "type", GUMBO_ATTR_NAMESPACE_XLINK},
|
300
|
+
{"xml:base", "base", GUMBO_ATTR_NAMESPACE_XML},
|
301
|
+
{"xml:lang", "lang", GUMBO_ATTR_NAMESPACE_XML},
|
302
|
+
{"xml:space", "space", GUMBO_ATTR_NAMESPACE_XML},
|
303
|
+
{"xmlns", "xmlns", GUMBO_ATTR_NAMESPACE_XMLNS},
|
304
|
+
{"xmlns:xlink", "xlink", GUMBO_ATTR_NAMESPACE_XMLNS},
|
308
305
|
};
|
309
306
|
|
310
307
|
// The "scope marker" for the list of active formatting elements. We use a
|
@@ -336,7 +333,7 @@ typedef struct _TextNodeBufferState {
|
|
336
333
|
// The source position of the start of this text node.
|
337
334
|
GumboSourcePosition _start_position;
|
338
335
|
|
339
|
-
// The type of node that will be inserted (TEXT or WHITESPACE).
|
336
|
+
// The type of node that will be inserted (TEXT, CDATA, or WHITESPACE).
|
340
337
|
GumboNodeType _type;
|
341
338
|
} TextNodeBufferState;
|
342
339
|
|
@@ -362,6 +359,9 @@ typedef struct GumboInternalParserState {
|
|
362
359
|
GumboNode* _head_element;
|
363
360
|
GumboNode* _form_element;
|
364
361
|
|
362
|
+
// The element used as fragment context when parsing in fragment mode
|
363
|
+
GumboNode* _fragment_ctx;
|
364
|
+
|
365
365
|
// The flag for when the spec says "Reprocess the current token in..."
|
366
366
|
bool _reprocess_current_token;
|
367
367
|
|
@@ -418,14 +418,14 @@ static bool attribute_matches(
|
|
418
418
|
static bool attribute_matches_case_sensitive(
|
419
419
|
const GumboVector* attributes, const char* name, const char* value) {
|
420
420
|
const GumboAttribute* attr = gumbo_get_attribute(attributes, name);
|
421
|
-
return attr ?
|
421
|
+
return attr ? strcmp(value, attr->value) == 0 : false;
|
422
422
|
}
|
423
423
|
|
424
424
|
// Checks if the specified attribute vectors are identical.
|
425
425
|
static bool all_attributes_match(
|
426
426
|
const GumboVector* attr1, const GumboVector* attr2) {
|
427
|
-
int num_unmatched_attr2_elements = attr2->length;
|
428
|
-
for (int i = 0; i < attr1->length; ++i) {
|
427
|
+
unsigned int num_unmatched_attr2_elements = attr2->length;
|
428
|
+
for (unsigned int i = 0; i < attr1->length; ++i) {
|
429
429
|
const GumboAttribute* attr = attr1->data[i];
|
430
430
|
if (attribute_matches_case_sensitive(attr2, attr->name, attr->value)) {
|
431
431
|
--num_unmatched_attr2_elements;
|
@@ -453,8 +453,7 @@ static GumboNode* create_node(GumboParser* parser, GumboNodeType type) {
|
|
453
453
|
static GumboNode* new_document_node(GumboParser* parser) {
|
454
454
|
GumboNode* document_node = create_node(parser, GUMBO_NODE_DOCUMENT);
|
455
455
|
document_node->parse_flags = GUMBO_INSERTION_BY_PARSER;
|
456
|
-
gumbo_vector_init(
|
457
|
-
parser, 1, &document_node->v.document.children);
|
456
|
+
gumbo_vector_init(parser, 1, &document_node->v.document.children);
|
458
457
|
|
459
458
|
// Must be initialized explicitly, as there's no guarantee that we'll see a
|
460
459
|
// doc type token.
|
@@ -489,6 +488,7 @@ static void parser_state_init(GumboParser* parser) {
|
|
489
488
|
gumbo_vector_init(parser, 5, &parser_state->_template_insertion_modes);
|
490
489
|
parser_state->_head_element = NULL;
|
491
490
|
parser_state->_form_element = NULL;
|
491
|
+
parser_state->_fragment_ctx = NULL;
|
492
492
|
parser_state->_current_token = NULL;
|
493
493
|
parser_state->_closed_body_tag = false;
|
494
494
|
parser_state->_closed_html_tag = false;
|
@@ -497,6 +497,9 @@ static void parser_state_init(GumboParser* parser) {
|
|
497
497
|
|
498
498
|
static void parser_state_destroy(GumboParser* parser) {
|
499
499
|
GumboParserState* state = parser->_parser_state;
|
500
|
+
if (state->_fragment_ctx) {
|
501
|
+
destroy_node(parser, state->_fragment_ctx);
|
502
|
+
}
|
500
503
|
gumbo_vector_destroy(parser, &state->_active_formatting_elements);
|
501
504
|
gumbo_vector_destroy(parser, &state->_open_elements);
|
502
505
|
gumbo_vector_destroy(parser, &state->_template_insertion_modes);
|
@@ -508,6 +511,10 @@ static GumboNode* get_document_node(GumboParser* parser) {
|
|
508
511
|
return parser->_output->document;
|
509
512
|
}
|
510
513
|
|
514
|
+
static bool is_fragment_parser(const GumboParser* parser) {
|
515
|
+
return !!parser->_parser_state->_fragment_ctx;
|
516
|
+
}
|
517
|
+
|
511
518
|
// Returns the node at the bottom of the stack of open elements, or NULL if no
|
512
519
|
// elements have been added yet.
|
513
520
|
static GumboNode* get_current_node(GumboParser* parser) {
|
@@ -521,6 +528,14 @@ static GumboNode* get_current_node(GumboParser* parser) {
|
|
521
528
|
return open_elements->data[open_elements->length - 1];
|
522
529
|
}
|
523
530
|
|
531
|
+
static GumboNode* get_adjusted_current_node(GumboParser* parser) {
|
532
|
+
GumboParserState* state = parser->_parser_state;
|
533
|
+
if (state->_open_elements.length == 1 && state->_fragment_ctx) {
|
534
|
+
return state->_fragment_ctx;
|
535
|
+
}
|
536
|
+
return get_current_node(parser);
|
537
|
+
}
|
538
|
+
|
524
539
|
// Returns true if the given needle is in the given array of literal
|
525
540
|
// GumboStringPieces. If exact_match is true, this requires that they match
|
526
541
|
// exactly; otherwise, this performs a prefix match to check if any of the
|
@@ -528,7 +543,7 @@ static GumboNode* get_current_node(GumboParser* parser) {
|
|
528
543
|
// case-insensitive match.
|
529
544
|
static bool is_in_static_list(
|
530
545
|
const char* needle, const GumboStringPiece* haystack, bool exact_match) {
|
531
|
-
for (int i = 0; haystack[i].length > 0; ++i) {
|
546
|
+
for (unsigned int i = 0; haystack[i].length > 0; ++i) {
|
532
547
|
if ((exact_match && !strcmp(needle, haystack[i].data)) ||
|
533
548
|
(!exact_match && !strcasecmp(needle, haystack[i].data))) {
|
534
549
|
return true;
|
@@ -547,15 +562,36 @@ static void set_insertion_mode(GumboParser* parser, GumboInsertionMode mode) {
|
|
547
562
|
// indicate that there is no appropriate insertion mode, and the loop should
|
548
563
|
// continue.
|
549
564
|
static GumboInsertionMode get_appropriate_insertion_mode(
|
550
|
-
const
|
551
|
-
|
565
|
+
const GumboParser* parser, int index) {
|
566
|
+
const GumboVector* open_elements = &parser->_parser_state->_open_elements;
|
567
|
+
const GumboNode* node = open_elements->data[index];
|
568
|
+
const bool is_last = index == 0;
|
569
|
+
|
570
|
+
if (is_last && is_fragment_parser(parser)) {
|
571
|
+
node = parser->_parser_state->_fragment_ctx;
|
572
|
+
}
|
573
|
+
|
574
|
+
assert(node->type == GUMBO_NODE_ELEMENT || node->type == GUMBO_NODE_TEMPLATE);
|
552
575
|
switch (node->v.element.tag) {
|
553
|
-
case GUMBO_TAG_SELECT:
|
576
|
+
case GUMBO_TAG_SELECT: {
|
577
|
+
if (is_last) {
|
578
|
+
return GUMBO_INSERTION_MODE_IN_SELECT;
|
579
|
+
}
|
580
|
+
for (int i = index; i > 0; --i) {
|
581
|
+
const GumboNode* ancestor = open_elements->data[i];
|
582
|
+
if (node_html_tag_is(ancestor, GUMBO_TAG_TEMPLATE)) {
|
583
|
+
return GUMBO_INSERTION_MODE_IN_SELECT;
|
584
|
+
}
|
585
|
+
if (node_html_tag_is(ancestor, GUMBO_TAG_TABLE)) {
|
586
|
+
return GUMBO_INSERTION_MODE_IN_SELECT_IN_TABLE;
|
587
|
+
}
|
588
|
+
}
|
554
589
|
return GUMBO_INSERTION_MODE_IN_SELECT;
|
590
|
+
}
|
555
591
|
case GUMBO_TAG_TD:
|
556
592
|
case GUMBO_TAG_TH:
|
557
|
-
|
558
|
-
|
593
|
+
if (!is_last) return GUMBO_INSERTION_MODE_IN_CELL;
|
594
|
+
break;
|
559
595
|
case GUMBO_TAG_TR:
|
560
596
|
return GUMBO_INSERTION_MODE_IN_ROW;
|
561
597
|
case GUMBO_TAG_TBODY:
|
@@ -568,25 +604,30 @@ static GumboInsertionMode get_appropriate_insertion_mode(
|
|
568
604
|
return GUMBO_INSERTION_MODE_IN_COLUMN_GROUP;
|
569
605
|
case GUMBO_TAG_TABLE:
|
570
606
|
return GUMBO_INSERTION_MODE_IN_TABLE;
|
607
|
+
case GUMBO_TAG_TEMPLATE:
|
608
|
+
return get_current_template_insertion_mode(parser);
|
571
609
|
case GUMBO_TAG_HEAD:
|
610
|
+
if (!is_last) return GUMBO_INSERTION_MODE_IN_HEAD;
|
611
|
+
break;
|
572
612
|
case GUMBO_TAG_BODY:
|
573
613
|
return GUMBO_INSERTION_MODE_IN_BODY;
|
574
614
|
case GUMBO_TAG_FRAMESET:
|
575
615
|
return GUMBO_INSERTION_MODE_IN_FRAMESET;
|
576
616
|
case GUMBO_TAG_HTML:
|
577
|
-
return
|
617
|
+
return parser->_parser_state->_head_element
|
618
|
+
? GUMBO_INSERTION_MODE_AFTER_HEAD
|
619
|
+
: GUMBO_INSERTION_MODE_BEFORE_HEAD;
|
578
620
|
default:
|
579
|
-
|
580
|
-
GUMBO_INSERTION_MODE_IN_BODY : GUMBO_INSERTION_MODE_INITIAL;
|
621
|
+
break;
|
581
622
|
}
|
623
|
+
return is_last ? GUMBO_INSERTION_MODE_IN_BODY : GUMBO_INSERTION_MODE_INITIAL;
|
582
624
|
}
|
583
625
|
|
584
626
|
// This performs the actual "reset the insertion mode" loop.
|
585
627
|
static void reset_insertion_mode_appropriately(GumboParser* parser) {
|
586
628
|
const GumboVector* open_elements = &parser->_parser_state->_open_elements;
|
587
|
-
for (int i = open_elements->length; --i >= 0;
|
588
|
-
GumboInsertionMode mode =
|
589
|
-
get_appropriate_insertion_mode(open_elements->data[i], i == 0);
|
629
|
+
for (int i = open_elements->length; --i >= 0;) {
|
630
|
+
GumboInsertionMode mode = get_appropriate_insertion_mode(parser, i);
|
590
631
|
if (mode != GUMBO_INSERTION_MODE_INITIAL) {
|
591
632
|
set_insertion_mode(parser, mode);
|
592
633
|
return;
|
@@ -597,7 +638,8 @@ static void reset_insertion_mode_appropriately(GumboParser* parser) {
|
|
597
638
|
assert(0);
|
598
639
|
}
|
599
640
|
|
600
|
-
static GumboError* parser_add_parse_error(
|
641
|
+
static GumboError* parser_add_parse_error(
|
642
|
+
GumboParser* parser, const GumboToken* token) {
|
601
643
|
gumbo_debug("Adding parse error.\n");
|
602
644
|
GumboError* error = gumbo_add_error(parser);
|
603
645
|
if (!error) {
|
@@ -616,13 +658,14 @@ static GumboError* parser_add_parse_error(GumboParser* parser, const GumboToken*
|
|
616
658
|
}
|
617
659
|
GumboParserState* state = parser->_parser_state;
|
618
660
|
extra_data->parser_state = state->_insertion_mode;
|
619
|
-
gumbo_vector_init(
|
620
|
-
|
621
|
-
for (int i = 0; i < state->_open_elements.length; ++i) {
|
661
|
+
gumbo_vector_init(
|
662
|
+
parser, state->_open_elements.length, &extra_data->tag_stack);
|
663
|
+
for (unsigned int i = 0; i < state->_open_elements.length; ++i) {
|
622
664
|
const GumboNode* node = state->_open_elements.data[i];
|
623
|
-
assert(
|
624
|
-
|
625
|
-
|
665
|
+
assert(
|
666
|
+
node->type == GUMBO_NODE_ELEMENT || node->type == GUMBO_NODE_TEMPLATE);
|
667
|
+
gumbo_vector_add(
|
668
|
+
parser, (void*) node->v.element.tag, &extra_data->tag_stack);
|
626
669
|
}
|
627
670
|
return error;
|
628
671
|
}
|
@@ -631,13 +674,8 @@ static GumboError* parser_add_parse_error(GumboParser* parser, const GumboToken*
|
|
631
674
|
// by is_start) with one of the tag types in the varargs list. Terminate the
|
632
675
|
// list with GUMBO_TAG_LAST; this functions as a sentinel since no portion of
|
633
676
|
// the spec references tags that are not in the spec.
|
634
|
-
|
635
|
-
|
636
|
-
// way so it's easy to verify the code against the spec), but it may be worth
|
637
|
-
// coming up with a notion of a "tag set" that includes a list of tags, and
|
638
|
-
// using that in many places. It'd probably also help performance, but I want
|
639
|
-
// to profile before optimizing.
|
640
|
-
static bool tag_in(const GumboToken* token, bool is_start, ...) {
|
677
|
+
static bool tag_in(
|
678
|
+
const GumboToken* token, bool is_start, const gumbo_tagset tags) {
|
641
679
|
GumboTag token_tag;
|
642
680
|
if (is_start && token->type == GUMBO_TOKEN_START_TAG) {
|
643
681
|
token_tag = token->v.start_tag.tag;
|
@@ -646,19 +684,7 @@ static bool tag_in(const GumboToken* token, bool is_start, ...) {
|
|
646
684
|
} else {
|
647
685
|
return false;
|
648
686
|
}
|
649
|
-
|
650
|
-
va_list tags;
|
651
|
-
va_start(tags, is_start);
|
652
|
-
bool result = false;
|
653
|
-
for (GumboTag tag = va_arg(tags, GumboTag); tag != GUMBO_TAG_LAST;
|
654
|
-
tag = va_arg(tags, GumboTag)) {
|
655
|
-
if (tag == token_tag) {
|
656
|
-
result = true;
|
657
|
-
break;
|
658
|
-
}
|
659
|
-
}
|
660
|
-
va_end(tags);
|
661
|
-
return result;
|
687
|
+
return (token_tag < GUMBO_TAG_LAST && tags[(int) token_tag] != 0);
|
662
688
|
}
|
663
689
|
|
664
690
|
// Like tag_in, but for the single-tag case.
|
@@ -673,50 +699,125 @@ static bool tag_is(const GumboToken* token, bool is_start, GumboTag tag) {
|
|
673
699
|
}
|
674
700
|
|
675
701
|
// Like tag_in, but checks for the tag of a node, rather than a token.
|
676
|
-
static bool
|
702
|
+
static bool node_tag_in_set(const GumboNode* node, const gumbo_tagset tags) {
|
677
703
|
assert(node != NULL);
|
678
|
-
if (node->type != GUMBO_NODE_ELEMENT) {
|
704
|
+
if (node->type != GUMBO_NODE_ELEMENT && node->type != GUMBO_NODE_TEMPLATE) {
|
679
705
|
return false;
|
680
706
|
}
|
681
|
-
|
682
|
-
|
683
|
-
va_list tags;
|
684
|
-
va_start(tags, node);
|
685
|
-
bool result = false;
|
686
|
-
for (GumboTag tag = va_arg(tags, GumboTag); tag != GUMBO_TAG_LAST;
|
687
|
-
tag = va_arg(tags, GumboTag)) {
|
688
|
-
assert(tag <= GUMBO_TAG_LAST);
|
689
|
-
if (tag == node_tag) {
|
690
|
-
result = true;
|
691
|
-
break;
|
692
|
-
}
|
693
|
-
}
|
694
|
-
va_end(tags);
|
695
|
-
return result;
|
707
|
+
return TAGSET_INCLUDES(
|
708
|
+
tags, node->v.element.tag_namespace, node->v.element.tag);
|
696
709
|
}
|
697
710
|
|
698
711
|
// Like node_tag_in, but for the single-tag case.
|
699
|
-
static bool
|
700
|
-
|
712
|
+
static bool node_qualified_tag_is(
|
713
|
+
const GumboNode* node, GumboNamespaceEnum ns, GumboTag tag) {
|
714
|
+
assert(node);
|
715
|
+
return (node->type == GUMBO_NODE_ELEMENT ||
|
716
|
+
node->type == GUMBO_NODE_TEMPLATE) &&
|
717
|
+
node->v.element.tag == tag && node->v.element.tag_namespace == ns;
|
718
|
+
}
|
719
|
+
|
720
|
+
// Like node_tag_in, but for the single-tag case in the HTML namespace
|
721
|
+
static bool node_html_tag_is(const GumboNode* node, GumboTag tag) {
|
722
|
+
return node_qualified_tag_is(node, GUMBO_NAMESPACE_HTML, tag);
|
723
|
+
}
|
724
|
+
|
725
|
+
static void push_template_insertion_mode(
|
726
|
+
GumboParser* parser, GumboInsertionMode mode) {
|
727
|
+
gumbo_vector_add(
|
728
|
+
parser, (void*) mode, &parser->_parser_state->_template_insertion_modes);
|
729
|
+
}
|
730
|
+
|
731
|
+
static void pop_template_insertion_mode(GumboParser* parser) {
|
732
|
+
gumbo_vector_pop(parser, &parser->_parser_state->_template_insertion_modes);
|
733
|
+
}
|
734
|
+
|
735
|
+
// Returns the current template insertion mode. If the stack of template
|
736
|
+
// insertion modes is empty, this returns GUMBO_INSERTION_MODE_INITIAL.
|
737
|
+
static GumboInsertionMode get_current_template_insertion_mode(
|
738
|
+
const GumboParser* parser) {
|
739
|
+
GumboVector* template_insertion_modes =
|
740
|
+
&parser->_parser_state->_template_insertion_modes;
|
741
|
+
if (template_insertion_modes->length == 0) {
|
742
|
+
return GUMBO_INSERTION_MODE_INITIAL;
|
743
|
+
}
|
744
|
+
return (GumboInsertionMode)
|
745
|
+
template_insertion_modes->data[(template_insertion_modes->length - 1)];
|
701
746
|
}
|
702
747
|
|
703
748
|
// http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#mathml-text-integration-point
|
704
749
|
static bool is_mathml_integration_point(const GumboNode* node) {
|
705
|
-
return
|
706
|
-
|
707
|
-
|
750
|
+
return node_tag_in_set(
|
751
|
+
node, (gumbo_tagset){TAG_MATHML(MI), TAG_MATHML(MO), TAG_MATHML(MN),
|
752
|
+
TAG_MATHML(MS), TAG_MATHML(MTEXT)});
|
708
753
|
}
|
709
754
|
|
710
755
|
// http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#html-integration-point
|
711
756
|
static bool is_html_integration_point(const GumboNode* node) {
|
712
|
-
return (
|
713
|
-
|
714
|
-
|
715
|
-
|
716
|
-
|
717
|
-
|
718
|
-
|
719
|
-
|
757
|
+
return node_tag_in_set(node, (gumbo_tagset){TAG_SVG(FOREIGNOBJECT),
|
758
|
+
TAG_SVG(DESC), TAG_SVG(TITLE)}) ||
|
759
|
+
(node_qualified_tag_is(
|
760
|
+
node, GUMBO_NAMESPACE_MATHML, GUMBO_TAG_ANNOTATION_XML) &&
|
761
|
+
(attribute_matches(
|
762
|
+
&node->v.element.attributes, "encoding", "text/html") ||
|
763
|
+
attribute_matches(&node->v.element.attributes, "encoding",
|
764
|
+
"application/xhtml+xml")));
|
765
|
+
}
|
766
|
+
|
767
|
+
// This represents a place to insert a node, consisting of a target parent and a
|
768
|
+
// child index within that parent. If the node should be inserted at the end of
|
769
|
+
// the parent's child, index will be -1.
|
770
|
+
typedef struct {
|
771
|
+
GumboNode* target;
|
772
|
+
int index;
|
773
|
+
} InsertionLocation;
|
774
|
+
|
775
|
+
InsertionLocation get_appropriate_insertion_location(
|
776
|
+
GumboParser* parser, GumboNode* override_target) {
|
777
|
+
InsertionLocation retval = {override_target, -1};
|
778
|
+
if (retval.target == NULL) {
|
779
|
+
// No override target; default to the current node, but special-case the
|
780
|
+
// root node since get_current_node() assumes the stack of open elements is
|
781
|
+
// non-empty.
|
782
|
+
retval.target = parser->_output->root != NULL ? get_current_node(parser)
|
783
|
+
: get_document_node(parser);
|
784
|
+
}
|
785
|
+
if (!parser->_parser_state->_foster_parent_insertions ||
|
786
|
+
!node_tag_in_set(retval.target, (gumbo_tagset){TAG(TABLE), TAG(TBODY),
|
787
|
+
TAG(TFOOT), TAG(THEAD), TAG(TR)})) {
|
788
|
+
return retval;
|
789
|
+
}
|
790
|
+
|
791
|
+
// Foster-parenting case.
|
792
|
+
int last_template_index = -1;
|
793
|
+
int last_table_index = -1;
|
794
|
+
GumboVector* open_elements = &parser->_parser_state->_open_elements;
|
795
|
+
for (unsigned int i = 0; i < open_elements->length; ++i) {
|
796
|
+
if (node_html_tag_is(open_elements->data[i], GUMBO_TAG_TEMPLATE)) {
|
797
|
+
last_template_index = i;
|
798
|
+
}
|
799
|
+
if (node_html_tag_is(open_elements->data[i], GUMBO_TAG_TABLE)) {
|
800
|
+
last_table_index = i;
|
801
|
+
}
|
802
|
+
}
|
803
|
+
if (last_template_index != -1 &&
|
804
|
+
(last_table_index == -1 || last_template_index > last_table_index)) {
|
805
|
+
retval.target = open_elements->data[last_template_index];
|
806
|
+
return retval;
|
807
|
+
}
|
808
|
+
if (last_table_index == -1) {
|
809
|
+
retval.target = open_elements->data[0];
|
810
|
+
return retval;
|
811
|
+
}
|
812
|
+
GumboNode* last_table = open_elements->data[last_table_index];
|
813
|
+
if (last_table->parent != NULL) {
|
814
|
+
retval.target = last_table->parent;
|
815
|
+
retval.index = last_table->index_within_parent;
|
816
|
+
return retval;
|
817
|
+
}
|
818
|
+
|
819
|
+
retval.target = open_elements->data[last_table_index - 1];
|
820
|
+
return retval;
|
720
821
|
}
|
721
822
|
|
722
823
|
// Appends a node to the end of its parent, setting the "parent" and
|
@@ -726,7 +827,8 @@ static void append_node(
|
|
726
827
|
assert(node->parent == NULL);
|
727
828
|
assert(node->index_within_parent == -1);
|
728
829
|
GumboVector* children;
|
729
|
-
if (parent->type == GUMBO_NODE_ELEMENT
|
830
|
+
if (parent->type == GUMBO_NODE_ELEMENT ||
|
831
|
+
parent->type == GUMBO_NODE_TEMPLATE) {
|
730
832
|
children = &parent->v.element.children;
|
731
833
|
} else {
|
732
834
|
assert(parent->type == GUMBO_NODE_DOCUMENT);
|
@@ -738,64 +840,41 @@ static void append_node(
|
|
738
840
|
assert(node->index_within_parent < children->length);
|
739
841
|
}
|
740
842
|
|
741
|
-
// Inserts a node at the specified
|
843
|
+
// Inserts a node at the specified InsertionLocation, updating the
|
742
844
|
// "parent" and "index_within_parent" fields of it and all its siblings.
|
845
|
+
// If the index of the location is -1, this calls append_node.
|
743
846
|
static void insert_node(
|
744
|
-
GumboParser* parser, GumboNode*
|
847
|
+
GumboParser* parser, GumboNode* node, InsertionLocation location) {
|
745
848
|
assert(node->parent == NULL);
|
746
849
|
assert(node->index_within_parent == -1);
|
747
|
-
|
748
|
-
|
749
|
-
|
750
|
-
|
751
|
-
|
752
|
-
|
753
|
-
|
754
|
-
|
755
|
-
|
756
|
-
|
757
|
-
|
758
|
-
|
759
|
-
|
760
|
-
}
|
850
|
+
GumboNode* parent = location.target;
|
851
|
+
int index = location.index;
|
852
|
+
if (index != -1) {
|
853
|
+
GumboVector* children = NULL;
|
854
|
+
if (parent->type == GUMBO_NODE_ELEMENT ||
|
855
|
+
parent->type == GUMBO_NODE_TEMPLATE) {
|
856
|
+
children = &parent->v.element.children;
|
857
|
+
} else if (parent->type == GUMBO_NODE_DOCUMENT) {
|
858
|
+
children = &parent->v.document.children;
|
859
|
+
assert(children->length == 0);
|
860
|
+
} else {
|
861
|
+
assert(0);
|
862
|
+
}
|
761
863
|
|
762
|
-
|
763
|
-
|
764
|
-
|
765
|
-
|
766
|
-
|
767
|
-
|
768
|
-
|
769
|
-
|
770
|
-
|
771
|
-
|
772
|
-
GumboNode* table_element = open_elements->data[i];
|
773
|
-
if (node_tag_is(table_element, GUMBO_TAG_TABLE)) {
|
774
|
-
foster_parent_element = table_element->parent;
|
775
|
-
if (!foster_parent_element ||
|
776
|
-
foster_parent_element->type != GUMBO_NODE_ELEMENT) {
|
777
|
-
// Table has no parent; spec says it's possible if a script manipulated
|
778
|
-
// the DOM, although I don't think we have to worry about this case.
|
779
|
-
gumbo_debug("Table has no parent.\n");
|
780
|
-
foster_parent_element = open_elements->data[i - 1];
|
781
|
-
break;
|
782
|
-
}
|
783
|
-
assert(foster_parent_element->type == GUMBO_NODE_ELEMENT);
|
784
|
-
gumbo_debug("Found enclosing table (%x) at %d; parent=%s, index=%d.\n",
|
785
|
-
table_element, i, gumbo_normalized_tagname(
|
786
|
-
foster_parent_element->v.element.tag),
|
787
|
-
table_element->index_within_parent);
|
788
|
-
assert(foster_parent_element->v.element.children.data[
|
789
|
-
table_element->index_within_parent] == table_element);
|
790
|
-
insert_node(parser, foster_parent_element,
|
791
|
-
table_element->index_within_parent, node);
|
792
|
-
return;
|
864
|
+
assert(index >= 0);
|
865
|
+
assert((unsigned int) index < children->length);
|
866
|
+
node->parent = parent;
|
867
|
+
node->index_within_parent = index;
|
868
|
+
gumbo_vector_insert_at(parser, (void*) node, index, children);
|
869
|
+
assert(node->index_within_parent < children->length);
|
870
|
+
for (unsigned int i = index + 1; i < children->length; ++i) {
|
871
|
+
GumboNode* sibling = children->data[i];
|
872
|
+
sibling->index_within_parent = i;
|
873
|
+
assert(sibling->index_within_parent < children->length);
|
793
874
|
}
|
875
|
+
} else {
|
876
|
+
append_node(parser, parent, node);
|
794
877
|
}
|
795
|
-
if (node->type == GUMBO_NODE_ELEMENT) {
|
796
|
-
gumbo_vector_add(parser, (void*) node, open_elements);
|
797
|
-
}
|
798
|
-
append_node(parser, foster_parent_element, node);
|
799
878
|
}
|
800
879
|
|
801
880
|
static void maybe_flush_text_node_buffer(GumboParser* parser) {
|
@@ -806,30 +885,31 @@ static void maybe_flush_text_node_buffer(GumboParser* parser) {
|
|
806
885
|
}
|
807
886
|
|
808
887
|
assert(buffer_state->_type == GUMBO_NODE_WHITESPACE ||
|
809
|
-
buffer_state->_type == GUMBO_NODE_TEXT
|
888
|
+
buffer_state->_type == GUMBO_NODE_TEXT ||
|
889
|
+
buffer_state->_type == GUMBO_NODE_CDATA);
|
810
890
|
GumboNode* text_node = create_node(parser, buffer_state->_type);
|
811
891
|
GumboText* text_node_data = &text_node->v.text;
|
812
|
-
text_node_data->text =
|
813
|
-
parser, &buffer_state->_buffer);
|
892
|
+
text_node_data->text =
|
893
|
+
gumbo_string_buffer_to_string(parser, &buffer_state->_buffer);
|
814
894
|
text_node_data->original_text.data = buffer_state->_start_original_text;
|
815
895
|
text_node_data->original_text.length =
|
816
896
|
state->_current_token->original_text.data -
|
817
897
|
buffer_state->_start_original_text;
|
818
898
|
text_node_data->start_pos = buffer_state->_start_position;
|
819
|
-
|
820
|
-
|
821
|
-
|
822
|
-
|
899
|
+
|
900
|
+
gumbo_debug("Flushing text node buffer of %.*s.\n",
|
901
|
+
(int) buffer_state->_buffer.length, buffer_state->_buffer.data);
|
902
|
+
|
903
|
+
InsertionLocation location = get_appropriate_insertion_location(parser, NULL);
|
904
|
+
if (location.target->type == GUMBO_NODE_DOCUMENT) {
|
905
|
+
// The DOM does not allow Document nodes to have Text children, so per the
|
906
|
+
// spec, they are dropped on the floor.
|
907
|
+
destroy_node(parser, text_node);
|
823
908
|
} else {
|
824
|
-
|
825
|
-
parser, parser->_output->root ?
|
826
|
-
get_current_node(parser) : parser->_output->document, text_node);
|
909
|
+
insert_node(parser, text_node, location);
|
827
910
|
}
|
828
|
-
gumbo_debug("Flushing text node buffer of %.*s.\n",
|
829
|
-
(int) buffer_state->_buffer.length, buffer_state->_buffer.data);
|
830
911
|
|
831
|
-
|
832
|
-
gumbo_string_buffer_init(parser, &buffer_state->_buffer);
|
912
|
+
gumbo_string_buffer_clear(parser, &buffer_state->_buffer);
|
833
913
|
buffer_state->_type = GUMBO_NODE_WHITESPACE;
|
834
914
|
assert(buffer_state->_buffer.length == 0);
|
835
915
|
}
|
@@ -837,18 +917,17 @@ static void maybe_flush_text_node_buffer(GumboParser* parser) {
|
|
837
917
|
static void record_end_of_element(
|
838
918
|
GumboToken* current_token, GumboElement* element) {
|
839
919
|
element->end_pos = current_token->position;
|
840
|
-
element->original_end_tag =
|
841
|
-
|
842
|
-
|
920
|
+
element->original_end_tag = current_token->type == GUMBO_TOKEN_END_TAG
|
921
|
+
? current_token->original_text
|
922
|
+
: kGumboEmptyString;
|
843
923
|
}
|
844
924
|
|
845
925
|
static GumboNode* pop_current_node(GumboParser* parser) {
|
846
926
|
GumboParserState* state = parser->_parser_state;
|
847
927
|
maybe_flush_text_node_buffer(parser);
|
848
928
|
if (state->_open_elements.length > 0) {
|
849
|
-
assert(
|
850
|
-
gumbo_debug(
|
851
|
-
"Popping %s node.\n",
|
929
|
+
assert(node_html_tag_is(state->_open_elements.data[0], GUMBO_TAG_HTML));
|
930
|
+
gumbo_debug("Popping %s node.\n",
|
852
931
|
gumbo_normalized_tagname(get_current_node(parser)->v.element.tag));
|
853
932
|
}
|
854
933
|
GumboNode* current_node = gumbo_vector_pop(parser, &state->_open_elements);
|
@@ -856,13 +935,16 @@ static GumboNode* pop_current_node(GumboParser* parser) {
|
|
856
935
|
assert(state->_open_elements.length == 0);
|
857
936
|
return NULL;
|
858
937
|
}
|
859
|
-
assert(current_node->type == GUMBO_NODE_ELEMENT
|
938
|
+
assert(current_node->type == GUMBO_NODE_ELEMENT ||
|
939
|
+
current_node->type == GUMBO_NODE_TEMPLATE);
|
860
940
|
bool is_closed_body_or_html_tag =
|
861
|
-
(
|
862
|
-
|
941
|
+
(node_html_tag_is(current_node, GUMBO_TAG_BODY) &&
|
942
|
+
state->_closed_body_tag) ||
|
943
|
+
(node_html_tag_is(current_node, GUMBO_TAG_HTML) &&
|
944
|
+
state->_closed_html_tag);
|
863
945
|
if ((state->_current_token->type != GUMBO_TOKEN_END_TAG ||
|
864
|
-
|
865
|
-
|
946
|
+
!node_html_tag_is(current_node, state->_current_token->v.end_tag)) &&
|
947
|
+
!is_closed_body_or_html_tag) {
|
866
948
|
current_node->parse_flags |= GUMBO_INSERTION_IMPLICIT_END_TAG;
|
867
949
|
}
|
868
950
|
if (!is_closed_body_or_html_tag) {
|
@@ -885,25 +967,25 @@ static void append_comment_node(
|
|
885
967
|
|
886
968
|
// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#clear-the-stack-back-to-a-table-row-context
|
887
969
|
static void clear_stack_to_table_row_context(GumboParser* parser) {
|
888
|
-
while (!
|
889
|
-
|
970
|
+
while (!node_tag_in_set(get_current_node(parser),
|
971
|
+
(gumbo_tagset){TAG(HTML), TAG(TR), TAG(TEMPLATE)})) {
|
890
972
|
pop_current_node(parser);
|
891
973
|
}
|
892
974
|
}
|
893
975
|
|
894
976
|
// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#clear-the-stack-back-to-a-table-context
|
895
977
|
static void clear_stack_to_table_context(GumboParser* parser) {
|
896
|
-
while (!
|
897
|
-
|
978
|
+
while (!node_tag_in_set(get_current_node(parser),
|
979
|
+
(gumbo_tagset){TAG(HTML), TAG(TABLE), TAG(TEMPLATE)})) {
|
898
980
|
pop_current_node(parser);
|
899
981
|
}
|
900
982
|
}
|
901
983
|
|
902
984
|
// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#clear-the-stack-back-to-a-table-body-context
|
903
985
|
void clear_stack_to_table_body_context(GumboParser* parser) {
|
904
|
-
while (!
|
905
|
-
|
906
|
-
|
986
|
+
while (!node_tag_in_set(get_current_node(parser),
|
987
|
+
(gumbo_tagset){TAG(HTML), TAG(TBODY), TAG(TFOOT), TAG(THEAD),
|
988
|
+
TAG(TEMPLATE)})) {
|
907
989
|
pop_current_node(parser);
|
908
990
|
}
|
909
991
|
}
|
@@ -918,7 +1000,9 @@ static GumboNode* create_element(GumboParser* parser, GumboTag tag) {
|
|
918
1000
|
element->tag_namespace = GUMBO_NAMESPACE_HTML;
|
919
1001
|
element->original_tag = kGumboEmptyString;
|
920
1002
|
element->original_end_tag = kGumboEmptyString;
|
921
|
-
element->start_pos = parser->_parser_state->_current_token
|
1003
|
+
element->start_pos = (parser->_parser_state->_current_token)
|
1004
|
+
? parser->_parser_state->_current_token->position
|
1005
|
+
: kGumboEmptySourcePosition;
|
922
1006
|
element->end_pos = kGumboEmptySourcePosition;
|
923
1007
|
return node;
|
924
1008
|
}
|
@@ -929,7 +1013,12 @@ static GumboNode* create_element_from_token(
|
|
929
1013
|
assert(token->type == GUMBO_TOKEN_START_TAG);
|
930
1014
|
GumboTokenStartTag* start_tag = &token->v.start_tag;
|
931
1015
|
|
932
|
-
|
1016
|
+
GumboNodeType type = (tag_namespace == GUMBO_NAMESPACE_HTML &&
|
1017
|
+
start_tag->tag == GUMBO_TAG_TEMPLATE)
|
1018
|
+
? GUMBO_NODE_TEMPLATE
|
1019
|
+
: GUMBO_NODE_ELEMENT;
|
1020
|
+
|
1021
|
+
GumboNode* node = create_node(parser, type);
|
933
1022
|
GumboElement* element = &node->v.element;
|
934
1023
|
gumbo_vector_init(parser, 1, &element->children);
|
935
1024
|
element->attributes = start_tag->attributes;
|
@@ -952,7 +1041,7 @@ static GumboNode* create_element_from_token(
|
|
952
1041
|
|
953
1042
|
// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#insert-an-html-element
|
954
1043
|
static void insert_element(GumboParser* parser, GumboNode* node,
|
955
|
-
|
1044
|
+
bool is_reconstructing_formatting_elements) {
|
956
1045
|
GumboParserState* state = parser->_parser_state;
|
957
1046
|
// NOTE(jdtang): The text node buffer must always be flushed before inserting
|
958
1047
|
// a node, otherwise we're handling nodes in a different order than the spec
|
@@ -966,20 +1055,8 @@ static void insert_element(GumboParser* parser, GumboNode* node,
|
|
966
1055
|
if (!is_reconstructing_formatting_elements) {
|
967
1056
|
maybe_flush_text_node_buffer(parser);
|
968
1057
|
}
|
969
|
-
|
970
|
-
|
971
|
-
GUMBO_TAG_TFOOT, GUMBO_TAG_THEAD, GUMBO_TAG_TR, GUMBO_TAG_LAST)) {
|
972
|
-
foster_parent_element(parser, node);
|
973
|
-
gumbo_vector_add(parser, (void*) node, &state->_open_elements);
|
974
|
-
return;
|
975
|
-
}
|
976
|
-
|
977
|
-
// This is called to insert the root HTML element, but get_current_node
|
978
|
-
// assumes the stack of open elements is non-empty, so we need special
|
979
|
-
// handling for this case.
|
980
|
-
append_node(
|
981
|
-
parser, parser->_output->root ?
|
982
|
-
get_current_node(parser) : parser->_output->document, node);
|
1058
|
+
InsertionLocation location = get_appropriate_insertion_location(parser, NULL);
|
1059
|
+
insert_node(parser, node, location);
|
983
1060
|
gumbo_vector_add(parser, (void*) node, &state->_open_elements);
|
984
1061
|
}
|
985
1062
|
|
@@ -992,7 +1069,7 @@ static GumboNode* insert_element_from_token(
|
|
992
1069
|
create_element_from_token(parser, token, GUMBO_NAMESPACE_HTML);
|
993
1070
|
insert_element(parser, element, false);
|
994
1071
|
gumbo_debug("Inserting <%s> element (@%x) from token.\n",
|
995
|
-
|
1072
|
+
gumbo_normalized_tagname(element->v.element.tag), element);
|
996
1073
|
return element;
|
997
1074
|
}
|
998
1075
|
|
@@ -1005,7 +1082,7 @@ static GumboNode* insert_element_of_tag_type(
|
|
1005
1082
|
element->parse_flags |= GUMBO_INSERTION_BY_PARSER | reason;
|
1006
1083
|
insert_element(parser, element, false);
|
1007
1084
|
gumbo_debug("Inserting %s element (@%x) from tag type.\n",
|
1008
|
-
|
1085
|
+
gumbo_normalized_tagname(tag), element);
|
1009
1086
|
return element;
|
1010
1087
|
}
|
1011
1088
|
|
@@ -1017,16 +1094,14 @@ static GumboNode* insert_foreign_element(
|
|
1017
1094
|
GumboNode* element = create_element_from_token(parser, token, tag_namespace);
|
1018
1095
|
insert_element(parser, element, false);
|
1019
1096
|
if (token_has_attribute(token, "xmlns") &&
|
1020
|
-
!attribute_matches_case_sensitive(
|
1021
|
-
&token->v.start_tag.attributes, "xmlns",
|
1097
|
+
!attribute_matches_case_sensitive(&token->v.start_tag.attributes, "xmlns",
|
1022
1098
|
kLegalXmlns[tag_namespace])) {
|
1023
1099
|
// TODO(jdtang): Since there're multiple possible error codes here, we
|
1024
1100
|
// eventually need reason codes to differentiate them.
|
1025
1101
|
parser_add_parse_error(parser, token);
|
1026
1102
|
}
|
1027
1103
|
if (token_has_attribute(token, "xmlns:xlink") &&
|
1028
|
-
!attribute_matches_case_sensitive(
|
1029
|
-
&token->v.start_tag.attributes,
|
1104
|
+
!attribute_matches_case_sensitive(&token->v.start_tag.attributes,
|
1030
1105
|
"xmlns:xlink", "http://www.w3.org/1999/xlink")) {
|
1031
1106
|
parser_add_parse_error(parser, token);
|
1032
1107
|
}
|
@@ -1035,7 +1110,8 @@ static GumboNode* insert_foreign_element(
|
|
1035
1110
|
|
1036
1111
|
static void insert_text_token(GumboParser* parser, GumboToken* token) {
|
1037
1112
|
assert(token->type == GUMBO_TOKEN_WHITESPACE ||
|
1038
|
-
token->type == GUMBO_TOKEN_CHARACTER
|
1113
|
+
token->type == GUMBO_TOKEN_CHARACTER ||
|
1114
|
+
token->type == GUMBO_TOKEN_NULL || token->type == GUMBO_TOKEN_CDATA);
|
1039
1115
|
TextNodeBufferState* buffer_state = &parser->_parser_state->_text_node;
|
1040
1116
|
if (buffer_state->_buffer.length == 0) {
|
1041
1117
|
// Initialize position fields.
|
@@ -1046,6 +1122,8 @@ static void insert_text_token(GumboParser* parser, GumboToken* token) {
|
|
1046
1122
|
parser, token->v.character, &buffer_state->_buffer);
|
1047
1123
|
if (token->type == GUMBO_TOKEN_CHARACTER) {
|
1048
1124
|
buffer_state->_type = GUMBO_NODE_TEXT;
|
1125
|
+
} else if (token->type == GUMBO_TOKEN_CDATA) {
|
1126
|
+
buffer_state->_type = GUMBO_NODE_CDATA;
|
1049
1127
|
}
|
1050
1128
|
gumbo_debug("Inserting text token '%c'.\n", token->v.character);
|
1051
1129
|
}
|
@@ -1068,12 +1146,12 @@ static void acknowledge_self_closing_tag(GumboParser* parser) {
|
|
1068
1146
|
// elements, and fills in its index if so.
|
1069
1147
|
static bool find_last_anchor_index(GumboParser* parser, int* anchor_index) {
|
1070
1148
|
GumboVector* elements = &parser->_parser_state->_active_formatting_elements;
|
1071
|
-
for (int i = elements->length; --i >= 0;
|
1149
|
+
for (int i = elements->length; --i >= 0;) {
|
1072
1150
|
GumboNode* node = elements->data[i];
|
1073
1151
|
if (node == &kActiveFormattingScopeMarker) {
|
1074
1152
|
return false;
|
1075
1153
|
}
|
1076
|
-
if (
|
1154
|
+
if (node_html_tag_is(node, GUMBO_TAG_A)) {
|
1077
1155
|
*anchor_index = i;
|
1078
1156
|
return true;
|
1079
1157
|
}
|
@@ -1085,23 +1163,21 @@ static bool find_last_anchor_index(GumboParser* parser, int* anchor_index) {
|
|
1085
1163
|
// formatting elements (after the last active scope marker) that have a specific
|
1086
1164
|
// tag. If this is > 0, then earliest_matching_index will be filled in with the
|
1087
1165
|
// index of the first such element.
|
1088
|
-
static int count_formatting_elements_of_tag(
|
1089
|
-
|
1090
|
-
int* earliest_matching_index) {
|
1166
|
+
static int count_formatting_elements_of_tag(GumboParser* parser,
|
1167
|
+
const GumboNode* desired_node, int* earliest_matching_index) {
|
1091
1168
|
const GumboElement* desired_element = &desired_node->v.element;
|
1092
1169
|
GumboVector* elements = &parser->_parser_state->_active_formatting_elements;
|
1093
1170
|
int num_identical_elements = 0;
|
1094
|
-
for (int i = elements->length; --i >= 0;
|
1171
|
+
for (int i = elements->length; --i >= 0;) {
|
1095
1172
|
GumboNode* node = elements->data[i];
|
1096
1173
|
if (node == &kActiveFormattingScopeMarker) {
|
1097
1174
|
break;
|
1098
1175
|
}
|
1099
1176
|
assert(node->type == GUMBO_NODE_ELEMENT);
|
1100
|
-
|
1101
|
-
|
1102
|
-
|
1103
|
-
|
1104
|
-
&desired_element->attributes)) {
|
1177
|
+
if (node_qualified_tag_is(
|
1178
|
+
node, desired_element->tag_namespace, desired_element->tag) &&
|
1179
|
+
all_attributes_match(
|
1180
|
+
&node->v.element.attributes, &desired_element->attributes)) {
|
1105
1181
|
num_identical_elements++;
|
1106
1182
|
*earliest_matching_index = i;
|
1107
1183
|
}
|
@@ -1128,7 +1204,7 @@ static void add_formatting_element(GumboParser* parser, const GumboNode* node) {
|
|
1128
1204
|
// Noah's Ark clause: if there're at least 3, remove the earliest.
|
1129
1205
|
if (num_identical_elements >= 3) {
|
1130
1206
|
gumbo_debug("Noah's ark clause: removing element at %d.\n",
|
1131
|
-
|
1207
|
+
earliest_identical_element);
|
1132
1208
|
gumbo_vector_remove_at(parser, earliest_identical_element, elements);
|
1133
1209
|
}
|
1134
1210
|
|
@@ -1137,7 +1213,7 @@ static void add_formatting_element(GumboParser* parser, const GumboNode* node) {
|
|
1137
1213
|
|
1138
1214
|
static bool is_open_element(GumboParser* parser, const GumboNode* node) {
|
1139
1215
|
GumboVector* open_elements = &parser->_parser_state->_open_elements;
|
1140
|
-
for (int i = 0; i < open_elements->length; ++i) {
|
1216
|
+
for (unsigned int i = 0; i < open_elements->length; ++i) {
|
1141
1217
|
if (open_elements->data[i] == node) {
|
1142
1218
|
return true;
|
1143
1219
|
}
|
@@ -1149,8 +1225,8 @@ static bool is_open_element(GumboParser* parser, const GumboNode* node) {
|
|
1149
1225
|
// clone shares no structure with the original node: all owned strings and
|
1150
1226
|
// values are fresh copies.
|
1151
1227
|
GumboNode* clone_node(
|
1152
|
-
GumboParser* parser,
|
1153
|
-
assert(node->type == GUMBO_NODE_ELEMENT);
|
1228
|
+
GumboParser* parser, GumboNode* node, GumboParseFlags reason) {
|
1229
|
+
assert(node->type == GUMBO_NODE_ELEMENT || node->type == GUMBO_NODE_TEMPLATE);
|
1154
1230
|
GumboNode* new_node = gumbo_parser_allocate(parser, sizeof(GumboNode));
|
1155
1231
|
*new_node = *node;
|
1156
1232
|
new_node->parent = NULL;
|
@@ -1164,7 +1240,7 @@ GumboNode* clone_node(
|
|
1164
1240
|
|
1165
1241
|
const GumboVector* old_attributes = &node->v.element.attributes;
|
1166
1242
|
gumbo_vector_init(parser, old_attributes->length, &element->attributes);
|
1167
|
-
for (int i = 0; i < old_attributes->length; ++i) {
|
1243
|
+
for (unsigned int i = 0; i < old_attributes->length; ++i) {
|
1168
1244
|
const GumboAttribute* old_attr = old_attributes->data[i];
|
1169
1245
|
GumboAttribute* attr =
|
1170
1246
|
gumbo_parser_allocate(parser, sizeof(GumboAttribute));
|
@@ -1188,8 +1264,8 @@ static void reconstruct_active_formatting_elements(GumboParser* parser) {
|
|
1188
1264
|
}
|
1189
1265
|
|
1190
1266
|
// Step 2 & 3
|
1191
|
-
int i = elements->length - 1;
|
1192
|
-
|
1267
|
+
unsigned int i = elements->length - 1;
|
1268
|
+
GumboNode* element = elements->data[i];
|
1193
1269
|
if (element == &kActiveFormattingScopeMarker ||
|
1194
1270
|
is_open_element(parser, element)) {
|
1195
1271
|
return;
|
@@ -1199,7 +1275,7 @@ static void reconstruct_active_formatting_elements(GumboParser* parser) {
|
|
1199
1275
|
do {
|
1200
1276
|
if (i == 0) {
|
1201
1277
|
// Step 4
|
1202
|
-
i = -1;
|
1278
|
+
i = -1; // Incremented to 0 below.
|
1203
1279
|
break;
|
1204
1280
|
}
|
1205
1281
|
// Step 5
|
@@ -1209,9 +1285,8 @@ static void reconstruct_active_formatting_elements(GumboParser* parser) {
|
|
1209
1285
|
|
1210
1286
|
++i;
|
1211
1287
|
gumbo_debug("Reconstructing elements from %d on %s parent.\n", i,
|
1212
|
-
|
1213
|
-
|
1214
|
-
for(; i < elements->length; ++i) {
|
1288
|
+
gumbo_normalized_tagname(get_current_node(parser)->v.element.tag));
|
1289
|
+
for (; i < elements->length; ++i) {
|
1215
1290
|
// Step 7 & 8.
|
1216
1291
|
assert(elements->length > 0);
|
1217
1292
|
assert(i < elements->length);
|
@@ -1220,11 +1295,16 @@ static void reconstruct_active_formatting_elements(GumboParser* parser) {
|
|
1220
1295
|
GumboNode* clone = clone_node(
|
1221
1296
|
parser, element, GUMBO_INSERTION_RECONSTRUCTED_FORMATTING_ELEMENT);
|
1222
1297
|
// Step 9.
|
1223
|
-
|
1298
|
+
InsertionLocation location =
|
1299
|
+
get_appropriate_insertion_location(parser, NULL);
|
1300
|
+
insert_node(parser, clone, location);
|
1301
|
+
gumbo_vector_add(
|
1302
|
+
parser, (void*) clone, &parser->_parser_state->_open_elements);
|
1303
|
+
|
1224
1304
|
// Step 10.
|
1225
1305
|
elements->data[i] = clone;
|
1226
1306
|
gumbo_debug("Reconstructed %s element at %d.\n",
|
1227
|
-
|
1307
|
+
gumbo_normalized_tagname(clone->v.element.tag), i);
|
1228
1308
|
}
|
1229
1309
|
}
|
1230
1310
|
|
@@ -1235,32 +1315,30 @@ static void clear_active_formatting_elements(GumboParser* parser) {
|
|
1235
1315
|
do {
|
1236
1316
|
node = gumbo_vector_pop(parser, elements);
|
1237
1317
|
++num_elements_cleared;
|
1238
|
-
} while(node && node != &kActiveFormattingScopeMarker);
|
1318
|
+
} while (node && node != &kActiveFormattingScopeMarker);
|
1239
1319
|
gumbo_debug("Cleared %d elements from active formatting list.\n",
|
1240
|
-
|
1320
|
+
num_elements_cleared);
|
1241
1321
|
}
|
1242
1322
|
|
1243
1323
|
// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#the-initial-insertion-mode
|
1244
1324
|
static GumboQuirksModeEnum compute_quirks_mode(
|
1245
1325
|
const GumboTokenDocType* doctype) {
|
1246
|
-
if (doctype->force_quirks ||
|
1247
|
-
|
1248
|
-
|
1249
|
-
|
1250
|
-
|
1251
|
-
|
1252
|
-
|
1253
|
-
kQuirksModeSystemIdExactMatches, true) ||
|
1326
|
+
if (doctype->force_quirks || strcmp(doctype->name, kDoctypeHtml.data) ||
|
1327
|
+
is_in_static_list(
|
1328
|
+
doctype->public_identifier, kQuirksModePublicIdPrefixes, false) ||
|
1329
|
+
is_in_static_list(
|
1330
|
+
doctype->public_identifier, kQuirksModePublicIdExactMatches, true) ||
|
1331
|
+
is_in_static_list(
|
1332
|
+
doctype->system_identifier, kQuirksModeSystemIdExactMatches, true) ||
|
1254
1333
|
(is_in_static_list(doctype->public_identifier,
|
1255
|
-
|
1256
|
-
|
1334
|
+
kLimitedQuirksRequiresSystemIdPublicIdPrefixes, false) &&
|
1335
|
+
!doctype->has_system_identifier)) {
|
1257
1336
|
return GUMBO_DOCTYPE_QUIRKS;
|
1258
|
-
} else if (
|
1259
|
-
|
1260
|
-
|
1261
|
-
|
1262
|
-
|
1263
|
-
&& doctype->has_system_identifier)) {
|
1337
|
+
} else if (is_in_static_list(doctype->public_identifier,
|
1338
|
+
kLimitedQuirksPublicIdPrefixes, false) ||
|
1339
|
+
(is_in_static_list(doctype->public_identifier,
|
1340
|
+
kLimitedQuirksRequiresSystemIdPublicIdPrefixes, false) &&
|
1341
|
+
doctype->has_system_identifier)) {
|
1264
1342
|
return GUMBO_DOCTYPE_LIMITED_QUIRKS;
|
1265
1343
|
}
|
1266
1344
|
return GUMBO_DOCTYPE_NO_QUIRKS;
|
@@ -1269,83 +1347,50 @@ static GumboQuirksModeEnum compute_quirks_mode(
|
|
1269
1347
|
// The following functions are all defined by the "has an element in __ scope"
|
1270
1348
|
// sections of the HTML5 spec:
|
1271
1349
|
// http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#has-an-element-in-the-specific-scope
|
1272
|
-
// The basic idea behind them is that they check for an element of the given
|
1273
|
-
// name, contained within a scope formed by a set of other
|
1274
|
-
// example, "has an element in list scope" looks for an element of
|
1275
|
-
// within the nearest enclosing <ol> or <ul>, along
|
1276
|
-
// element types that serve to "firewall" their content
|
1277
|
-
// document.
|
1278
|
-
|
1279
|
-
|
1350
|
+
// The basic idea behind them is that they check for an element of the given
|
1351
|
+
// qualified name, contained within a scope formed by a set of other qualified
|
1352
|
+
// names. For example, "has an element in list scope" looks for an element of
|
1353
|
+
// the given qualified name within the nearest enclosing <ol> or <ul>, along
|
1354
|
+
// with a bunch of generic element types that serve to "firewall" their content
|
1355
|
+
// from the rest of the document. Note that because of the way the spec is
|
1356
|
+
// written,
|
1357
|
+
// all elements are expected to be in the HTML namespace
|
1358
|
+
static bool has_an_element_in_specific_scope(GumboParser* parser,
|
1359
|
+
int expected_size, const GumboTag* expected, bool negate,
|
1360
|
+
const gumbo_tagset tags) {
|
1280
1361
|
GumboVector* open_elements = &parser->_parser_state->_open_elements;
|
1281
|
-
|
1282
|
-
va_start(args, negate);
|
1283
|
-
// va_arg can only run through the list once, so we copy it to an GumboVector
|
1284
|
-
// here. I wonder if it'd make more sense to make tags the GumboVector*
|
1285
|
-
// parameter and 'expected' a vararg list, but that'd require changing a lot
|
1286
|
-
// of code for unknown benefit. We may want to change the representation of
|
1287
|
-
// these tag sets anyway, to something more efficient.
|
1288
|
-
GumboVector tags;
|
1289
|
-
gumbo_vector_init(parser, 10, &tags);
|
1290
|
-
for (GumboTag tag = va_arg(args, GumboTag); tag != GUMBO_TAG_LAST;
|
1291
|
-
tag = va_arg(args, GumboTag)) {
|
1292
|
-
// We store the tags inline instead of storing pointers to them.
|
1293
|
-
gumbo_vector_add(parser, (void*) tag, &tags);
|
1294
|
-
}
|
1295
|
-
va_end(args);
|
1296
|
-
|
1297
|
-
bool result = false;
|
1298
|
-
for (int i = open_elements->length; --i >= 0; ) {
|
1362
|
+
for (int i = open_elements->length; --i >= 0;) {
|
1299
1363
|
const GumboNode* node = open_elements->data[i];
|
1300
|
-
if (node->type != GUMBO_NODE_ELEMENT)
|
1364
|
+
if (node->type != GUMBO_NODE_ELEMENT && node->type != GUMBO_NODE_TEMPLATE)
|
1301
1365
|
continue;
|
1302
|
-
|
1366
|
+
|
1303
1367
|
GumboTag node_tag = node->v.element.tag;
|
1304
|
-
|
1305
|
-
|
1306
|
-
if (node_tag ==
|
1307
|
-
|
1308
|
-
goto cleanup;
|
1309
|
-
}
|
1368
|
+
GumboNamespaceEnum node_ns = node->v.element.tag_namespace;
|
1369
|
+
for (int j = 0; j < expected_size; ++j) {
|
1370
|
+
if (node_tag == expected[j] && node_ns == GUMBO_NAMESPACE_HTML)
|
1371
|
+
return true;
|
1310
1372
|
}
|
1311
1373
|
|
1312
|
-
bool
|
1313
|
-
|
1314
|
-
GumboTag tag = (GumboTag) tags.data[j];
|
1315
|
-
if (tag == node_tag) {
|
1316
|
-
found_tag = true;
|
1317
|
-
break;
|
1318
|
-
}
|
1319
|
-
}
|
1320
|
-
if (negate != found_tag) {
|
1321
|
-
result = false;
|
1322
|
-
goto cleanup;
|
1323
|
-
}
|
1374
|
+
bool found = TAGSET_INCLUDES(tags, node_ns, node_tag);
|
1375
|
+
if (negate != found) return false;
|
1324
1376
|
}
|
1325
|
-
|
1326
|
-
gumbo_vector_destroy(parser, &tags);
|
1327
|
-
return result;
|
1377
|
+
return false;
|
1328
1378
|
}
|
1329
1379
|
|
1330
|
-
//
|
1331
|
-
|
1332
|
-
|
1333
|
-
|
1334
|
-
|
1335
|
-
// GumboVector.
|
1336
|
-
#define DECLARE_ONE_ELEMENT_GUMBO_VECTOR(varname, from_var) \
|
1337
|
-
void* varname ## _tmp_array[1] = { (void*) from_var }; \
|
1338
|
-
GumboVector varname = { varname ## _tmp_array, 1, 1 }
|
1380
|
+
// Checks for the presence of an open element of the specified tag type.
|
1381
|
+
static bool has_open_element(GumboParser* parser, GumboTag tag) {
|
1382
|
+
return has_an_element_in_specific_scope(
|
1383
|
+
parser, 1, &tag, false, (gumbo_tagset){TAG(HTML)});
|
1384
|
+
}
|
1339
1385
|
|
1340
1386
|
// http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#has-an-element-in-scope
|
1341
1387
|
static bool has_an_element_in_scope(GumboParser* parser, GumboTag tag) {
|
1342
|
-
|
1343
|
-
|
1344
|
-
|
1345
|
-
|
1346
|
-
|
1347
|
-
|
1348
|
-
GUMBO_TAG_DESC, GUMBO_TAG_TITLE, GUMBO_TAG_LAST);
|
1388
|
+
return has_an_element_in_specific_scope(parser, 1, &tag, false,
|
1389
|
+
(gumbo_tagset){TAG(APPLET), TAG(CAPTION), TAG(HTML), TAG(TABLE), TAG(TD),
|
1390
|
+
TAG(TH), TAG(MARQUEE), TAG(OBJECT), TAG(TEMPLATE), TAG_MATHML(MI),
|
1391
|
+
TAG_MATHML(MO), TAG_MATHML(MN), TAG_MATHML(MS), TAG_MATHML(MTEXT),
|
1392
|
+
TAG_MATHML(ANNOTATION_XML), TAG_SVG(FOREIGNOBJECT), TAG_SVG(DESC),
|
1393
|
+
TAG_SVG(TITLE)});
|
1349
1394
|
}
|
1350
1395
|
|
1351
1396
|
// Like "has an element in scope", but for the specific case of looking for a
|
@@ -1356,21 +1401,21 @@ static bool has_an_element_in_scope(GumboParser* parser, GumboTag tag) {
|
|
1356
1401
|
// parameterize it.
|
1357
1402
|
static bool has_node_in_scope(GumboParser* parser, const GumboNode* node) {
|
1358
1403
|
GumboVector* open_elements = &parser->_parser_state->_open_elements;
|
1359
|
-
for (int i = open_elements->length; --i >= 0;
|
1404
|
+
for (int i = open_elements->length; --i >= 0;) {
|
1360
1405
|
const GumboNode* current = open_elements->data[i];
|
1361
1406
|
if (current == node) {
|
1362
1407
|
return true;
|
1363
1408
|
}
|
1364
|
-
if (current->type != GUMBO_NODE_ELEMENT
|
1409
|
+
if (current->type != GUMBO_NODE_ELEMENT &&
|
1410
|
+
current->type != GUMBO_NODE_TEMPLATE) {
|
1365
1411
|
continue;
|
1366
1412
|
}
|
1367
|
-
if (
|
1368
|
-
|
1369
|
-
|
1370
|
-
|
1371
|
-
|
1372
|
-
|
1373
|
-
GUMBO_TAG_LAST)) {
|
1413
|
+
if (node_tag_in_set(current,
|
1414
|
+
(gumbo_tagset){TAG(APPLET), TAG(CAPTION), TAG(HTML), TAG(TABLE),
|
1415
|
+
TAG(TD), TAG(TH), TAG(MARQUEE), TAG(OBJECT), TAG(TEMPLATE),
|
1416
|
+
TAG_MATHML(MI), TAG_MATHML(MO), TAG_MATHML(MN), TAG_MATHML(MS),
|
1417
|
+
TAG_MATHML(MTEXT), TAG_MATHML(ANNOTATION_XML),
|
1418
|
+
TAG_SVG(FOREIGNOBJECT), TAG_SVG(DESC), TAG_SVG(TITLE)})) {
|
1374
1419
|
return false;
|
1375
1420
|
}
|
1376
1421
|
}
|
@@ -1378,79 +1423,72 @@ static bool has_node_in_scope(GumboParser* parser, const GumboNode* node) {
|
|
1378
1423
|
return false;
|
1379
1424
|
}
|
1380
1425
|
|
1381
|
-
// Like has_an_element_in_scope, but restricts the expected
|
1382
|
-
// possible
|
1383
|
-
static bool has_an_element_in_scope_with_tagname(
|
1384
|
-
|
1385
|
-
|
1386
|
-
|
1387
|
-
|
1388
|
-
|
1389
|
-
|
1390
|
-
|
1391
|
-
tag = va_arg(args, GumboTag)) {
|
1392
|
-
gumbo_vector_add(parser, (void*) tag, &tags);
|
1393
|
-
}
|
1394
|
-
bool found = has_an_element_in_specific_scope(
|
1395
|
-
parser, &tags, false, GUMBO_TAG_APPLET, GUMBO_TAG_CAPTION, GUMBO_TAG_HTML,
|
1396
|
-
GUMBO_TAG_TABLE, GUMBO_TAG_TD, GUMBO_TAG_TH, GUMBO_TAG_MARQUEE,
|
1397
|
-
GUMBO_TAG_OBJECT, GUMBO_TAG_MI, GUMBO_TAG_MO, GUMBO_TAG_MN, GUMBO_TAG_MS,
|
1398
|
-
GUMBO_TAG_MTEXT, GUMBO_TAG_ANNOTATION_XML, GUMBO_TAG_FOREIGNOBJECT,
|
1399
|
-
GUMBO_TAG_DESC, GUMBO_TAG_TITLE, GUMBO_TAG_LAST);
|
1400
|
-
gumbo_vector_destroy(parser, &tags);
|
1401
|
-
va_end(args);
|
1402
|
-
return found;
|
1426
|
+
// Like has_an_element_in_scope, but restricts the expected qualified name to a
|
1427
|
+
// range of possible qualified names instead of just a single one.
|
1428
|
+
static bool has_an_element_in_scope_with_tagname(
|
1429
|
+
GumboParser* parser, int expected_len, const GumboTag expected[]) {
|
1430
|
+
return has_an_element_in_specific_scope(parser, expected_len, expected, false,
|
1431
|
+
(gumbo_tagset){TAG(APPLET), TAG(CAPTION), TAG(HTML), TAG(TABLE), TAG(TD),
|
1432
|
+
TAG(TH), TAG(MARQUEE), TAG(OBJECT), TAG(TEMPLATE), TAG_MATHML(MI),
|
1433
|
+
TAG_MATHML(MO), TAG_MATHML(MN), TAG_MATHML(MS), TAG_MATHML(MTEXT),
|
1434
|
+
TAG_MATHML(ANNOTATION_XML), TAG_SVG(FOREIGNOBJECT), TAG_SVG(DESC),
|
1435
|
+
TAG_SVG(TITLE)});
|
1403
1436
|
}
|
1404
1437
|
|
1405
1438
|
// http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#has-an-element-in-list-item-scope
|
1406
1439
|
static bool has_an_element_in_list_scope(GumboParser* parser, GumboTag tag) {
|
1407
|
-
|
1408
|
-
|
1409
|
-
|
1410
|
-
|
1411
|
-
|
1412
|
-
|
1413
|
-
GUMBO_TAG_DESC, GUMBO_TAG_TITLE, GUMBO_TAG_OL, GUMBO_TAG_UL,
|
1414
|
-
GUMBO_TAG_LAST);
|
1440
|
+
return has_an_element_in_specific_scope(parser, 1, &tag, false,
|
1441
|
+
(gumbo_tagset){TAG(APPLET), TAG(CAPTION), TAG(HTML), TAG(TABLE), TAG(TD),
|
1442
|
+
TAG(TH), TAG(MARQUEE), TAG(OBJECT), TAG(TEMPLATE), TAG_MATHML(MI),
|
1443
|
+
TAG_MATHML(MO), TAG_MATHML(MN), TAG_MATHML(MS), TAG_MATHML(MTEXT),
|
1444
|
+
TAG_MATHML(ANNOTATION_XML), TAG_SVG(FOREIGNOBJECT), TAG_SVG(DESC),
|
1445
|
+
TAG_SVG(TITLE), TAG(OL), TAG(UL)});
|
1415
1446
|
}
|
1416
1447
|
|
1417
1448
|
// http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#has-an-element-in-button-scope
|
1418
1449
|
static bool has_an_element_in_button_scope(GumboParser* parser, GumboTag tag) {
|
1419
|
-
|
1420
|
-
|
1421
|
-
|
1422
|
-
|
1423
|
-
|
1424
|
-
|
1425
|
-
GUMBO_TAG_DESC, GUMBO_TAG_TITLE, GUMBO_TAG_BUTTON, GUMBO_TAG_LAST);
|
1450
|
+
return has_an_element_in_specific_scope(parser, 1, &tag, false,
|
1451
|
+
(gumbo_tagset){TAG(APPLET), TAG(CAPTION), TAG(HTML), TAG(TABLE), TAG(TD),
|
1452
|
+
TAG(TH), TAG(MARQUEE), TAG(OBJECT), TAG(TEMPLATE), TAG_MATHML(MI),
|
1453
|
+
TAG_MATHML(MO), TAG_MATHML(MN), TAG_MATHML(MS), TAG_MATHML(MTEXT),
|
1454
|
+
TAG_MATHML(ANNOTATION_XML), TAG_SVG(FOREIGNOBJECT), TAG_SVG(DESC),
|
1455
|
+
TAG_SVG(TITLE), TAG(BUTTON)});
|
1426
1456
|
}
|
1427
1457
|
|
1428
1458
|
// http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#has-an-element-in-table-scope
|
1429
1459
|
static bool has_an_element_in_table_scope(GumboParser* parser, GumboTag tag) {
|
1430
|
-
|
1431
|
-
|
1432
|
-
parser, &tags, false, GUMBO_TAG_HTML, GUMBO_TAG_TABLE, GUMBO_TAG_LAST);
|
1460
|
+
return has_an_element_in_specific_scope(parser, 1, &tag, false,
|
1461
|
+
(gumbo_tagset){TAG(HTML), TAG(TABLE), TAG(TEMPLATE)});
|
1433
1462
|
}
|
1434
1463
|
|
1435
1464
|
// http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#has-an-element-in-select-scope
|
1436
1465
|
static bool has_an_element_in_select_scope(GumboParser* parser, GumboTag tag) {
|
1437
|
-
DECLARE_ONE_ELEMENT_GUMBO_VECTOR(tags, tag);
|
1438
1466
|
return has_an_element_in_specific_scope(
|
1439
|
-
parser, &
|
1440
|
-
GUMBO_TAG_LAST);
|
1467
|
+
parser, 1, &tag, true, (gumbo_tagset){TAG(OPTGROUP), TAG(OPTION)});
|
1441
1468
|
}
|
1442
1469
|
|
1443
|
-
|
1444
1470
|
// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#generate-implied-end-tags
|
1445
1471
|
// "exception" is the "element to exclude from the process" listed in the spec.
|
1446
1472
|
// Pass GUMBO_TAG_LAST to not exclude any of them.
|
1447
1473
|
static void generate_implied_end_tags(GumboParser* parser, GumboTag exception) {
|
1448
|
-
for (;
|
1449
|
-
|
1450
|
-
|
1451
|
-
|
1452
|
-
|
1453
|
-
|
1474
|
+
for (; node_tag_in_set(get_current_node(parser),
|
1475
|
+
(gumbo_tagset){TAG(DD), TAG(DT), TAG(LI), TAG(OPTION),
|
1476
|
+
TAG(OPTGROUP), TAG(P), TAG(RP), TAG(RB), TAG(RT), TAG(RTC)}) &&
|
1477
|
+
!node_html_tag_is(get_current_node(parser), exception);
|
1478
|
+
pop_current_node(parser))
|
1479
|
+
;
|
1480
|
+
}
|
1481
|
+
|
1482
|
+
// This is the "generate all implied end tags thoroughly" clause of the spec.
|
1483
|
+
// https://html.spec.whatwg.org/multipage/syntax.html#closing-elements-that-have-implied-end-tags
|
1484
|
+
static void generate_all_implied_end_tags_thoroughly(GumboParser* parser) {
|
1485
|
+
for (
|
1486
|
+
; node_tag_in_set(get_current_node(parser),
|
1487
|
+
(gumbo_tagset){TAG(CAPTION), TAG(COLGROUP), TAG(DD), TAG(DT), TAG(LI),
|
1488
|
+
TAG(OPTION), TAG(OPTGROUP), TAG(P), TAG(RP), TAG(RT), TAG(RTC),
|
1489
|
+
TAG(TBODY), TAG(TD), TAG(TFOOT), TAG(TH), TAG(HEAD), TAG(TR)});
|
1490
|
+
pop_current_node(parser))
|
1491
|
+
;
|
1454
1492
|
}
|
1455
1493
|
|
1456
1494
|
// This factors out the clauses relating to "act as if an end tag token with tag
|
@@ -1463,7 +1501,7 @@ static bool close_table(GumboParser* parser) {
|
|
1463
1501
|
}
|
1464
1502
|
|
1465
1503
|
GumboNode* node = pop_current_node(parser);
|
1466
|
-
while (!
|
1504
|
+
while (!node_html_tag_is(node, GUMBO_TAG_TABLE)) {
|
1467
1505
|
node = pop_current_node(parser);
|
1468
1506
|
}
|
1469
1507
|
reset_insertion_mode_appropriately(parser);
|
@@ -1472,18 +1510,18 @@ static bool close_table(GumboParser* parser) {
|
|
1472
1510
|
|
1473
1511
|
// This factors out the clauses relating to "act as if an end tag token with tag
|
1474
1512
|
// name `cell_tag` had been seen".
|
1475
|
-
static bool close_table_cell(
|
1476
|
-
|
1513
|
+
static bool close_table_cell(
|
1514
|
+
GumboParser* parser, const GumboToken* token, GumboTag cell_tag) {
|
1477
1515
|
bool result = true;
|
1478
1516
|
generate_implied_end_tags(parser, GUMBO_TAG_LAST);
|
1479
1517
|
const GumboNode* node = get_current_node(parser);
|
1480
|
-
if (!
|
1518
|
+
if (!node_html_tag_is(node, cell_tag)) {
|
1481
1519
|
parser_add_parse_error(parser, token);
|
1482
1520
|
result = false;
|
1483
1521
|
}
|
1484
1522
|
do {
|
1485
1523
|
node = pop_current_node(parser);
|
1486
|
-
} while (!
|
1524
|
+
} while (!node_html_tag_is(node, cell_tag));
|
1487
1525
|
|
1488
1526
|
clear_active_formatting_elements(parser);
|
1489
1527
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_ROW);
|
@@ -1508,7 +1546,7 @@ static bool close_current_cell(GumboParser* parser, const GumboToken* token) {
|
|
1508
1546
|
// resets the insertion mode appropriately.
|
1509
1547
|
static void close_current_select(GumboParser* parser) {
|
1510
1548
|
GumboNode* node = pop_current_node(parser);
|
1511
|
-
while (!
|
1549
|
+
while (!node_html_tag_is(node, GUMBO_TAG_SELECT)) {
|
1512
1550
|
node = pop_current_node(parser);
|
1513
1551
|
}
|
1514
1552
|
reset_insertion_mode_appropriately(parser);
|
@@ -1517,60 +1555,48 @@ static void close_current_select(GumboParser* parser) {
|
|
1517
1555
|
// The list of nodes in the "special" category:
|
1518
1556
|
// http://www.whatwg.org/specs/web-apps/current-work/complete/parsing.html#special
|
1519
1557
|
static bool is_special_node(const GumboNode* node) {
|
1520
|
-
assert(node->type == GUMBO_NODE_ELEMENT);
|
1521
|
-
|
1522
|
-
|
1523
|
-
|
1524
|
-
|
1525
|
-
|
1526
|
-
|
1527
|
-
|
1528
|
-
|
1529
|
-
|
1530
|
-
|
1531
|
-
|
1532
|
-
|
1533
|
-
|
1534
|
-
|
1535
|
-
|
1536
|
-
|
1537
|
-
|
1538
|
-
|
1539
|
-
|
1540
|
-
|
1541
|
-
|
1542
|
-
|
1543
|
-
|
1544
|
-
|
1545
|
-
|
1546
|
-
|
1547
|
-
return node_tag_in(node,
|
1548
|
-
GUMBO_TAG_MI, GUMBO_TAG_MO, GUMBO_TAG_MN, GUMBO_TAG_MS,
|
1549
|
-
GUMBO_TAG_MTEXT, GUMBO_TAG_ANNOTATION_XML, GUMBO_TAG_LAST);
|
1550
|
-
case GUMBO_NAMESPACE_SVG:
|
1551
|
-
return node_tag_in(node,
|
1552
|
-
GUMBO_TAG_FOREIGNOBJECT, GUMBO_TAG_DESC, GUMBO_TAG_LAST);
|
1553
|
-
}
|
1554
|
-
abort();
|
1555
|
-
return false; // Pacify compiler.
|
1556
|
-
}
|
1557
|
-
|
1558
|
-
// Implicitly closes currently open tags until it reaches an element with the
|
1559
|
-
// specified tag name. If the elements closed are in the set handled by
|
1558
|
+
assert(node->type == GUMBO_NODE_ELEMENT || node->type == GUMBO_NODE_TEMPLATE);
|
1559
|
+
return node_tag_in_set(node,
|
1560
|
+
(gumbo_tagset){TAG(ADDRESS), TAG(APPLET), TAG(AREA), TAG(ARTICLE),
|
1561
|
+
TAG(ASIDE), TAG(BASE), TAG(BASEFONT), TAG(BGSOUND), TAG(BLOCKQUOTE),
|
1562
|
+
TAG(BODY), TAG(BR), TAG(BUTTON), TAG(CAPTION), TAG(CENTER), TAG(COL),
|
1563
|
+
TAG(COLGROUP), TAG(MENUITEM), TAG(DD), TAG(DETAILS), TAG(DIR),
|
1564
|
+
TAG(DIV), TAG(DL), TAG(DT), TAG(EMBED), TAG(FIELDSET),
|
1565
|
+
TAG(FIGCAPTION), TAG(FIGURE), TAG(FOOTER), TAG(FORM), TAG(FRAME),
|
1566
|
+
TAG(FRAMESET), TAG(H1), TAG(H2), TAG(H3), TAG(H4), TAG(H5), TAG(H6),
|
1567
|
+
TAG(HEAD), TAG(HEADER), TAG(HGROUP), TAG(HR), TAG(HTML), TAG(IFRAME),
|
1568
|
+
TAG(IMG), TAG(INPUT), TAG(ISINDEX), TAG(LI), TAG(LINK), TAG(LISTING),
|
1569
|
+
TAG(MARQUEE), TAG(MENU), TAG(META), TAG(NAV), TAG(NOEMBED),
|
1570
|
+
TAG(NOFRAMES), TAG(NOSCRIPT), TAG(OBJECT), TAG(OL), TAG(P),
|
1571
|
+
TAG(PARAM), TAG(PLAINTEXT), TAG(PRE), TAG(SCRIPT), TAG(SECTION),
|
1572
|
+
TAG(SELECT), TAG(STYLE), TAG(SUMMARY), TAG(TABLE), TAG(TBODY),
|
1573
|
+
TAG(TD), TAG(TEMPLATE), TAG(TEXTAREA), TAG(TFOOT), TAG(TH),
|
1574
|
+
TAG(THEAD), TAG(TITLE), TAG(TR), TAG(UL), TAG(WBR), TAG(XMP),
|
1575
|
+
|
1576
|
+
TAG_MATHML(MI), TAG_MATHML(MO), TAG_MATHML(MN), TAG_MATHML(MS),
|
1577
|
+
TAG_MATHML(MTEXT), TAG_MATHML(ANNOTATION_XML),
|
1578
|
+
|
1579
|
+
TAG_SVG(FOREIGNOBJECT), TAG_SVG(DESC)});
|
1580
|
+
}
|
1581
|
+
|
1582
|
+
// Implicitly closes currently open elements until it reaches an element with
|
1583
|
+
// the
|
1584
|
+
// specified qualified name. If the elements closed are in the set handled by
|
1560
1585
|
// generate_implied_end_tags, this is normal operation and this function returns
|
1561
1586
|
// true. Otherwise, a parse error is recorded and this function returns false.
|
1562
|
-
static bool implicitly_close_tags(
|
1563
|
-
|
1587
|
+
static bool implicitly_close_tags(GumboParser* parser, GumboToken* token,
|
1588
|
+
GumboNamespaceEnum target_ns, GumboTag target) {
|
1564
1589
|
bool result = true;
|
1565
1590
|
generate_implied_end_tags(parser, target);
|
1566
|
-
if (!
|
1591
|
+
if (!node_qualified_tag_is(get_current_node(parser), target_ns, target)) {
|
1567
1592
|
parser_add_parse_error(parser, token);
|
1568
|
-
while (
|
1593
|
+
while (
|
1594
|
+
!node_qualified_tag_is(get_current_node(parser), target_ns, target)) {
|
1569
1595
|
pop_current_node(parser);
|
1570
1596
|
}
|
1571
1597
|
result = false;
|
1572
1598
|
}
|
1573
|
-
assert(
|
1599
|
+
assert(node_qualified_tag_is(get_current_node(parser), target_ns, target));
|
1574
1600
|
pop_current_node(parser);
|
1575
1601
|
return result;
|
1576
1602
|
}
|
@@ -1579,9 +1605,11 @@ static bool implicitly_close_tags(
|
|
1579
1605
|
// a </p> tag was encountered, implicitly closing tags. Returns false if a
|
1580
1606
|
// parse error occurs. This is a convenience function because this particular
|
1581
1607
|
// clause appears several times in the spec.
|
1582
|
-
static bool maybe_implicitly_close_p_tag(
|
1608
|
+
static bool maybe_implicitly_close_p_tag(
|
1609
|
+
GumboParser* parser, GumboToken* token) {
|
1583
1610
|
if (has_an_element_in_button_scope(parser, GUMBO_TAG_P)) {
|
1584
|
-
return implicitly_close_tags(
|
1611
|
+
return implicitly_close_tags(
|
1612
|
+
parser, token, GUMBO_NAMESPACE_HTML, GUMBO_TAG_P);
|
1585
1613
|
}
|
1586
1614
|
return true;
|
1587
1615
|
}
|
@@ -1592,18 +1620,19 @@ static void maybe_implicitly_close_list_tag(
|
|
1592
1620
|
GumboParser* parser, GumboToken* token, bool is_li) {
|
1593
1621
|
GumboParserState* state = parser->_parser_state;
|
1594
1622
|
state->_frameset_ok = false;
|
1595
|
-
for (int i = state->_open_elements.length; --i >= 0;
|
1623
|
+
for (int i = state->_open_elements.length; --i >= 0;) {
|
1596
1624
|
const GumboNode* node = state->_open_elements.data[i];
|
1597
|
-
bool is_list_tag =
|
1598
|
-
|
1599
|
-
|
1625
|
+
bool is_list_tag =
|
1626
|
+
is_li ? node_html_tag_is(node, GUMBO_TAG_LI)
|
1627
|
+
: node_tag_in_set(node, (gumbo_tagset){TAG(DD), TAG(DT)});
|
1600
1628
|
if (is_list_tag) {
|
1601
|
-
implicitly_close_tags(
|
1629
|
+
implicitly_close_tags(
|
1630
|
+
parser, token, node->v.element.tag_namespace, node->v.element.tag);
|
1602
1631
|
return;
|
1603
1632
|
}
|
1604
1633
|
if (is_special_node(node) &&
|
1605
|
-
!
|
1606
|
-
|
1634
|
+
!node_tag_in_set(
|
1635
|
+
node, (gumbo_tagset){TAG(ADDRESS), TAG(DIV), TAG(P)})) {
|
1607
1636
|
return;
|
1608
1637
|
}
|
1609
1638
|
}
|
@@ -1616,7 +1645,7 @@ static void merge_attributes(
|
|
1616
1645
|
const GumboVector* token_attr = &token->v.start_tag.attributes;
|
1617
1646
|
GumboVector* node_attr = &node->v.element.attributes;
|
1618
1647
|
|
1619
|
-
for (int i = 0; i < token_attr->length; ++i) {
|
1648
|
+
for (unsigned int i = 0; i < token_attr->length; ++i) {
|
1620
1649
|
GumboAttribute* attr = token_attr->data[i];
|
1621
1650
|
if (!gumbo_get_attribute(node_attr, attr->name)) {
|
1622
1651
|
// Ownership of the attribute is transferred by this gumbo_vector_add,
|
@@ -1640,8 +1669,8 @@ static void merge_attributes(
|
|
1640
1669
|
}
|
1641
1670
|
|
1642
1671
|
const char* gumbo_normalize_svg_tagname(const GumboStringPiece* tag) {
|
1643
|
-
for (
|
1644
|
-
|
1672
|
+
for (size_t i = 0; i < sizeof(kSvgTagReplacements) / sizeof(ReplacementEntry);
|
1673
|
+
++i) {
|
1645
1674
|
const ReplacementEntry* entry = &kSvgTagReplacements[i];
|
1646
1675
|
if (gumbo_string_equals_ignore_case(tag, &entry->from)) {
|
1647
1676
|
return entry->to.data;
|
@@ -1656,9 +1685,9 @@ const char* gumbo_normalize_svg_tagname(const GumboStringPiece* tag) {
|
|
1656
1685
|
static void adjust_foreign_attributes(GumboParser* parser, GumboToken* token) {
|
1657
1686
|
assert(token->type == GUMBO_TOKEN_START_TAG);
|
1658
1687
|
const GumboVector* attributes = &token->v.start_tag.attributes;
|
1659
|
-
for (
|
1660
|
-
|
1661
|
-
|
1688
|
+
for (size_t i = 0; i < sizeof(kForeignAttributeReplacements) /
|
1689
|
+
sizeof(NamespacedAttributeReplacement);
|
1690
|
+
++i) {
|
1662
1691
|
const NamespacedAttributeReplacement* entry =
|
1663
1692
|
&kForeignAttributeReplacements[i];
|
1664
1693
|
GumboAttribute* attr = gumbo_get_attribute(attributes, entry->from);
|
@@ -1676,7 +1705,7 @@ static void adjust_foreign_attributes(GumboParser* parser, GumboToken* token) {
|
|
1676
1705
|
static void adjust_svg_attributes(GumboParser* parser, GumboToken* token) {
|
1677
1706
|
assert(token->type == GUMBO_TOKEN_START_TAG);
|
1678
1707
|
const GumboVector* attributes = &token->v.start_tag.attributes;
|
1679
|
-
for (
|
1708
|
+
for (size_t i = 0;
|
1680
1709
|
i < sizeof(kSvgAttributeReplacements) / sizeof(ReplacementEntry); ++i) {
|
1681
1710
|
const ReplacementEntry* entry = &kSvgAttributeReplacements[i];
|
1682
1711
|
GumboAttribute* attr = gumbo_get_attribute(attributes, entry->from.data);
|
@@ -1693,8 +1722,8 @@ static void adjust_svg_attributes(GumboParser* parser, GumboToken* token) {
|
|
1693
1722
|
// value.
|
1694
1723
|
static void adjust_mathml_attributes(GumboParser* parser, GumboToken* token) {
|
1695
1724
|
assert(token->type == GUMBO_TOKEN_START_TAG);
|
1696
|
-
GumboAttribute* attr =
|
1697
|
-
&token->v.start_tag.attributes, "definitionurl");
|
1725
|
+
GumboAttribute* attr =
|
1726
|
+
gumbo_get_attribute(&token->v.start_tag.attributes, "definitionurl");
|
1698
1727
|
if (!attr) {
|
1699
1728
|
return;
|
1700
1729
|
}
|
@@ -1702,32 +1731,30 @@ static void adjust_mathml_attributes(GumboParser* parser, GumboToken* token) {
|
|
1702
1731
|
attr->name = gumbo_copy_stringz(parser, "definitionURL");
|
1703
1732
|
}
|
1704
1733
|
|
1705
|
-
static bool doctype_matches(
|
1706
|
-
const
|
1707
|
-
const GumboStringPiece* public_id,
|
1708
|
-
const GumboStringPiece* system_id,
|
1734
|
+
static bool doctype_matches(const GumboTokenDocType* doctype,
|
1735
|
+
const GumboStringPiece* public_id, const GumboStringPiece* system_id,
|
1709
1736
|
bool allow_missing_system_id) {
|
1710
1737
|
return !strcmp(doctype->public_identifier, public_id->data) &&
|
1711
|
-
|
1712
|
-
|
1738
|
+
(allow_missing_system_id || doctype->has_system_identifier) &&
|
1739
|
+
!strcmp(doctype->system_identifier, system_id->data);
|
1713
1740
|
}
|
1714
1741
|
|
1715
1742
|
static bool maybe_add_doctype_error(
|
1716
1743
|
GumboParser* parser, const GumboToken* token) {
|
1717
1744
|
const GumboTokenDocType* doctype = &token->v.doc_type;
|
1718
1745
|
bool html_doctype = !strcmp(doctype->name, kDoctypeHtml.data);
|
1719
|
-
if ((!html_doctype ||
|
1720
|
-
|
1721
|
-
|
1722
|
-
|
1723
|
-
!(html_doctype && (
|
1724
|
-
|
1725
|
-
|
1726
|
-
|
1727
|
-
|
1728
|
-
|
1729
|
-
|
1730
|
-
|
1746
|
+
if ((!html_doctype || doctype->has_public_identifier ||
|
1747
|
+
(doctype->has_system_identifier &&
|
1748
|
+
!strcmp(
|
1749
|
+
doctype->system_identifier, kSystemIdLegacyCompat.data))) &&
|
1750
|
+
!(html_doctype && (doctype_matches(doctype, &kPublicIdHtml4_0,
|
1751
|
+
&kSystemIdRecHtml4_0, true) ||
|
1752
|
+
doctype_matches(doctype, &kPublicIdHtml4_01,
|
1753
|
+
&kSystemIdHtml4, true) ||
|
1754
|
+
doctype_matches(doctype, &kPublicIdXhtml1_0,
|
1755
|
+
&kSystemIdXhtmlStrict1_1, false) ||
|
1756
|
+
doctype_matches(doctype, &kPublicIdXhtml1_1,
|
1757
|
+
&kSystemIdXhtml1_1, false)))) {
|
1731
1758
|
parser_add_parse_error(parser, token);
|
1732
1759
|
return false;
|
1733
1760
|
}
|
@@ -1750,7 +1777,7 @@ static void remove_from_parent(GumboParser* parser, GumboNode* node) {
|
|
1750
1777
|
gumbo_vector_remove_at(parser, index, children);
|
1751
1778
|
node->parent = NULL;
|
1752
1779
|
node->index_within_parent = -1;
|
1753
|
-
for (int i = index; i < children->length; ++i) {
|
1780
|
+
for (unsigned int i = index; i < children->length; ++i) {
|
1754
1781
|
GumboNode* child = children->data[i];
|
1755
1782
|
child->index_within_parent = i;
|
1756
1783
|
}
|
@@ -1759,29 +1786,38 @@ static void remove_from_parent(GumboParser* parser, GumboNode* node) {
|
|
1759
1786
|
// http://www.whatwg.org/specs/web-apps/current-work/multipage/the-end.html#an-introduction-to-error-handling-and-strange-cases-in-the-parser
|
1760
1787
|
// Also described in the "in body" handling for end formatting tags.
|
1761
1788
|
static bool adoption_agency_algorithm(
|
1762
|
-
GumboParser* parser, GumboToken* token, GumboTag
|
1789
|
+
GumboParser* parser, GumboToken* token, GumboTag subject) {
|
1763
1790
|
GumboParserState* state = parser->_parser_state;
|
1764
1791
|
gumbo_debug("Entering adoption agency algorithm.\n");
|
1765
|
-
//
|
1766
|
-
|
1767
|
-
|
1792
|
+
// Step 1.
|
1793
|
+
GumboNode* current_node = get_current_node(parser);
|
1794
|
+
if (current_node->v.element.tag_namespace == GUMBO_NAMESPACE_HTML &&
|
1795
|
+
current_node->v.element.tag == subject &&
|
1796
|
+
gumbo_vector_index_of(
|
1797
|
+
&state->_active_formatting_elements, current_node) == -1) {
|
1798
|
+
pop_current_node(parser);
|
1799
|
+
return false;
|
1800
|
+
}
|
1801
|
+
// Steps 2-4 & 20:
|
1802
|
+
for (unsigned int i = 0; i < 8; ++i) {
|
1803
|
+
// Step 5.
|
1768
1804
|
GumboNode* formatting_node = NULL;
|
1769
1805
|
int formatting_node_in_open_elements = -1;
|
1770
|
-
for (int j = state->_active_formatting_elements.length; --j >= 0;
|
1806
|
+
for (int j = state->_active_formatting_elements.length; --j >= 0;) {
|
1771
1807
|
GumboNode* current_node = state->_active_formatting_elements.data[j];
|
1772
1808
|
if (current_node == &kActiveFormattingScopeMarker) {
|
1773
1809
|
gumbo_debug("Broke on scope marker; aborting.\n");
|
1774
1810
|
// Last scope marker; abort the algorithm.
|
1775
1811
|
return false;
|
1776
1812
|
}
|
1777
|
-
if (
|
1813
|
+
if (node_html_tag_is(current_node, subject)) {
|
1778
1814
|
// Found it.
|
1779
1815
|
formatting_node = current_node;
|
1780
|
-
formatting_node_in_open_elements =
|
1781
|
-
&state->_open_elements, formatting_node);
|
1816
|
+
formatting_node_in_open_elements =
|
1817
|
+
gumbo_vector_index_of(&state->_open_elements, formatting_node);
|
1782
1818
|
gumbo_debug("Formatting element of tag %s at %d.\n",
|
1783
|
-
|
1784
|
-
|
1819
|
+
gumbo_normalized_tagname(subject),
|
1820
|
+
formatting_node_in_open_elements);
|
1785
1821
|
break;
|
1786
1822
|
}
|
1787
1823
|
}
|
@@ -1793,74 +1829,84 @@ static bool adoption_agency_algorithm(
|
|
1793
1829
|
return false;
|
1794
1830
|
}
|
1795
1831
|
|
1832
|
+
// Step 6
|
1796
1833
|
if (formatting_node_in_open_elements == -1) {
|
1797
1834
|
gumbo_debug("Formatting node not on stack of open elements.\n");
|
1798
|
-
|
1799
|
-
|
1835
|
+
parser_add_parse_error(parser, token);
|
1836
|
+
gumbo_vector_remove(
|
1837
|
+
parser, formatting_node, &state->_active_formatting_elements);
|
1800
1838
|
return false;
|
1801
1839
|
}
|
1802
1840
|
|
1841
|
+
// Step 7
|
1803
1842
|
if (!has_an_element_in_scope(parser, formatting_node->v.element.tag)) {
|
1804
1843
|
parser_add_parse_error(parser, token);
|
1805
1844
|
gumbo_debug("Element not in scope.\n");
|
1806
1845
|
return false;
|
1807
1846
|
}
|
1847
|
+
|
1848
|
+
// Step 8
|
1808
1849
|
if (formatting_node != get_current_node(parser)) {
|
1809
1850
|
parser_add_parse_error(parser, token); // But continue onwards.
|
1810
1851
|
}
|
1811
1852
|
assert(formatting_node);
|
1812
|
-
assert(!
|
1813
|
-
assert(!
|
1853
|
+
assert(!node_html_tag_is(formatting_node, GUMBO_TAG_HTML));
|
1854
|
+
assert(!node_html_tag_is(formatting_node, GUMBO_TAG_BODY));
|
1814
1855
|
|
1815
|
-
// Step
|
1856
|
+
// Step 9 & 10
|
1816
1857
|
GumboNode* furthest_block = NULL;
|
1817
|
-
for (int j = formatting_node_in_open_elements;
|
1858
|
+
for (unsigned int j = formatting_node_in_open_elements;
|
1818
1859
|
j < state->_open_elements.length; ++j) {
|
1819
1860
|
assert(j > 0);
|
1820
1861
|
GumboNode* current = state->_open_elements.data[j];
|
1821
1862
|
if (is_special_node(current)) {
|
1822
|
-
// Step
|
1863
|
+
// Step 9.
|
1823
1864
|
furthest_block = current;
|
1824
1865
|
break;
|
1825
1866
|
}
|
1826
1867
|
}
|
1827
1868
|
if (!furthest_block) {
|
1828
|
-
// Step
|
1869
|
+
// Step 10.
|
1829
1870
|
while (get_current_node(parser) != formatting_node) {
|
1830
1871
|
pop_current_node(parser);
|
1831
1872
|
}
|
1832
1873
|
// And the formatting element itself.
|
1833
1874
|
pop_current_node(parser);
|
1834
|
-
gumbo_vector_remove(
|
1835
|
-
|
1875
|
+
gumbo_vector_remove(
|
1876
|
+
parser, formatting_node, &state->_active_formatting_elements);
|
1836
1877
|
return false;
|
1837
1878
|
}
|
1838
|
-
assert(!
|
1879
|
+
assert(!node_html_tag_is(furthest_block, GUMBO_TAG_HTML));
|
1839
1880
|
assert(furthest_block);
|
1840
1881
|
|
1841
|
-
// Step
|
1882
|
+
// Step 11.
|
1842
1883
|
// Elements may be moved and reparented by this algorithm, so
|
1843
1884
|
// common_ancestor is not necessarily the same as formatting_node->parent.
|
1844
1885
|
GumboNode* common_ancestor =
|
1845
|
-
state->_open_elements.data[gumbo_vector_index_of(
|
1846
|
-
|
1886
|
+
state->_open_elements.data[gumbo_vector_index_of(&state->_open_elements,
|
1887
|
+
formatting_node) -
|
1888
|
+
1];
|
1847
1889
|
gumbo_debug("Common ancestor tag = %s, furthest block tag = %s.\n",
|
1848
|
-
|
1849
|
-
|
1890
|
+
gumbo_normalized_tagname(common_ancestor->v.element.tag),
|
1891
|
+
gumbo_normalized_tagname(furthest_block->v.element.tag));
|
1850
1892
|
|
1851
|
-
// Step
|
1893
|
+
// Step 12.
|
1852
1894
|
int bookmark = gumbo_vector_index_of(
|
1853
|
-
|
1854
|
-
|
1895
|
+
&state->_active_formatting_elements, formatting_node) +
|
1896
|
+
1;
|
1897
|
+
gumbo_debug("Bookmark at %d.\n", bookmark);
|
1898
|
+
// Step 13.
|
1855
1899
|
GumboNode* node = furthest_block;
|
1856
1900
|
GumboNode* last_node = furthest_block;
|
1857
1901
|
// Must be stored explicitly, in case node is removed from the stack of open
|
1858
1902
|
// elements, to handle step 9.4.
|
1859
1903
|
int saved_node_index = gumbo_vector_index_of(&state->_open_elements, node);
|
1860
1904
|
assert(saved_node_index > 0);
|
1861
|
-
// Step
|
1862
|
-
for (int j = 0
|
1863
|
-
// Step
|
1905
|
+
// Step 13.1.
|
1906
|
+
for (int j = 0;;) {
|
1907
|
+
// Step 13.2.
|
1908
|
+
++j;
|
1909
|
+
// Step 13.3.
|
1864
1910
|
int node_index = gumbo_vector_index_of(&state->_open_elements, node);
|
1865
1911
|
gumbo_debug(
|
1866
1912
|
"Current index: %d, last index: %d.\n", node_index, saved_node_index);
|
@@ -1869,59 +1915,72 @@ static bool adoption_agency_algorithm(
|
|
1869
1915
|
}
|
1870
1916
|
saved_node_index = --node_index;
|
1871
1917
|
assert(node_index > 0);
|
1872
|
-
assert(node_index < state->_open_elements.capacity);
|
1918
|
+
assert((unsigned int) node_index < state->_open_elements.capacity);
|
1873
1919
|
node = state->_open_elements.data[node_index];
|
1874
1920
|
assert(node->parent);
|
1875
|
-
|
1876
|
-
|
1877
|
-
|
1921
|
+
if (node == formatting_node) {
|
1922
|
+
// Step 13.4.
|
1923
|
+
break;
|
1924
|
+
}
|
1925
|
+
int formatting_index =
|
1926
|
+
gumbo_vector_index_of(&state->_active_formatting_elements, node);
|
1927
|
+
if (j > 3 && formatting_index != -1) {
|
1928
|
+
// Step 13.5.
|
1929
|
+
gumbo_debug("Removing formatting element at %d.\n", formatting_index);
|
1930
|
+
gumbo_vector_remove_at(
|
1931
|
+
parser, formatting_index, &state->_active_formatting_elements);
|
1932
|
+
// Removing the element shifts all indices over by one, so we may need
|
1933
|
+
// to move the bookmark.
|
1934
|
+
if (formatting_index < bookmark) {
|
1935
|
+
--bookmark;
|
1936
|
+
gumbo_debug("Moving bookmark to %d.\n", bookmark);
|
1937
|
+
}
|
1938
|
+
continue;
|
1939
|
+
}
|
1940
|
+
if (formatting_index == -1) {
|
1941
|
+
// Step 13.6.
|
1878
1942
|
gumbo_vector_remove_at(parser, node_index, &state->_open_elements);
|
1879
1943
|
continue;
|
1880
|
-
} else if (node == formatting_node) {
|
1881
|
-
// Step 9.6.
|
1882
|
-
break;
|
1883
1944
|
}
|
1884
|
-
// Step
|
1885
|
-
|
1886
|
-
|
1945
|
+
// Step 13.7.
|
1946
|
+
// "common ancestor as the intended parent" doesn't actually mean insert
|
1947
|
+
// it into the common ancestor; that happens below.
|
1887
1948
|
node = clone_node(parser, node, GUMBO_INSERTION_ADOPTION_AGENCY_CLONED);
|
1949
|
+
assert(formatting_index >= 0);
|
1888
1950
|
state->_active_formatting_elements.data[formatting_index] = node;
|
1951
|
+
assert(node_index >= 0);
|
1889
1952
|
state->_open_elements.data[node_index] = node;
|
1890
|
-
// Step
|
1953
|
+
// Step 13.8.
|
1891
1954
|
if (last_node == furthest_block) {
|
1892
1955
|
bookmark = formatting_index + 1;
|
1893
|
-
|
1956
|
+
gumbo_debug("Bookmark moved to %d.\n", bookmark);
|
1957
|
+
assert((unsigned int) bookmark <= state->_active_formatting_elements.length);
|
1894
1958
|
}
|
1895
|
-
// Step
|
1959
|
+
// Step 13.9.
|
1896
1960
|
last_node->parse_flags |= GUMBO_INSERTION_ADOPTION_AGENCY_MOVED;
|
1897
1961
|
remove_from_parent(parser, last_node);
|
1898
1962
|
append_node(parser, node, last_node);
|
1899
|
-
// Step
|
1963
|
+
// Step 13.10.
|
1900
1964
|
last_node = node;
|
1901
|
-
}
|
1965
|
+
} // Step 13.11.
|
1902
1966
|
|
1903
|
-
// Step
|
1967
|
+
// Step 14.
|
1904
1968
|
gumbo_debug("Removing %s node from parent ",
|
1905
|
-
|
1969
|
+
gumbo_normalized_tagname(last_node->v.element.tag));
|
1906
1970
|
remove_from_parent(parser, last_node);
|
1907
1971
|
last_node->parse_flags |= GUMBO_INSERTION_ADOPTION_AGENCY_MOVED;
|
1908
|
-
|
1909
|
-
|
1910
|
-
|
1911
|
-
|
1912
|
-
|
1913
|
-
} else {
|
1914
|
-
gumbo_debug("and inserting it into %s.\n",
|
1915
|
-
gumbo_normalized_tagname(common_ancestor->v.element.tag));
|
1916
|
-
append_node(parser, common_ancestor, last_node);
|
1917
|
-
}
|
1972
|
+
InsertionLocation location =
|
1973
|
+
get_appropriate_insertion_location(parser, common_ancestor);
|
1974
|
+
gumbo_debug("and inserting it into %s.\n",
|
1975
|
+
gumbo_normalized_tagname(location.target->v.element.tag));
|
1976
|
+
insert_node(parser, last_node, location);
|
1918
1977
|
|
1919
|
-
// Step
|
1978
|
+
// Step 15.
|
1920
1979
|
GumboNode* new_formatting_node = clone_node(
|
1921
1980
|
parser, formatting_node, GUMBO_INSERTION_ADOPTION_AGENCY_CLONED);
|
1922
1981
|
formatting_node->parse_flags |= GUMBO_INSERTION_IMPLICIT_END_TAG;
|
1923
1982
|
|
1924
|
-
// Step
|
1983
|
+
// Step 16. Instead of appending nodes one-by-one, we swap the children
|
1925
1984
|
// vector of furthest_block with the empty children of new_formatting_node,
|
1926
1985
|
// reducing memory traffic and allocations. We still have to reset their
|
1927
1986
|
// parent pointers, though.
|
@@ -1931,15 +1990,15 @@ static bool adoption_agency_algorithm(
|
|
1931
1990
|
furthest_block->v.element.children = temp;
|
1932
1991
|
|
1933
1992
|
temp = new_formatting_node->v.element.children;
|
1934
|
-
for (int i = 0; i < temp.length; ++i) {
|
1993
|
+
for (unsigned int i = 0; i < temp.length; ++i) {
|
1935
1994
|
GumboNode* child = temp.data[i];
|
1936
1995
|
child->parent = new_formatting_node;
|
1937
1996
|
}
|
1938
1997
|
|
1939
|
-
// Step
|
1998
|
+
// Step 17.
|
1940
1999
|
append_node(parser, furthest_block, new_formatting_node);
|
1941
2000
|
|
1942
|
-
// Step
|
2001
|
+
// Step 18.
|
1943
2002
|
// If the formatting node was before the bookmark, it may shift over all
|
1944
2003
|
// indices after it, so we need to explicitly find the index and possibly
|
1945
2004
|
// adjust the bookmark.
|
@@ -1947,25 +2006,27 @@ static bool adoption_agency_algorithm(
|
|
1947
2006
|
&state->_active_formatting_elements, formatting_node);
|
1948
2007
|
assert(formatting_node_index != -1);
|
1949
2008
|
if (formatting_node_index < bookmark) {
|
2009
|
+
gumbo_debug(
|
2010
|
+
"Formatting node at %d is before bookmark at %d; decrementing.\n",
|
2011
|
+
formatting_node_index, bookmark);
|
1950
2012
|
--bookmark;
|
1951
2013
|
}
|
1952
2014
|
gumbo_vector_remove_at(
|
1953
2015
|
parser, formatting_node_index, &state->_active_formatting_elements);
|
1954
2016
|
assert(bookmark >= 0);
|
1955
|
-
assert(bookmark <= state->_active_formatting_elements.length);
|
2017
|
+
assert((unsigned int) bookmark <= state->_active_formatting_elements.length);
|
1956
2018
|
gumbo_vector_insert_at(parser, new_formatting_node, bookmark,
|
1957
|
-
|
2019
|
+
&state->_active_formatting_elements);
|
1958
2020
|
|
1959
|
-
// Step
|
1960
|
-
gumbo_vector_remove(
|
1961
|
-
|
1962
|
-
|
1963
|
-
&state->_open_elements, furthest_block) + 1;
|
2021
|
+
// Step 19.
|
2022
|
+
gumbo_vector_remove(parser, formatting_node, &state->_open_elements);
|
2023
|
+
int insert_at =
|
2024
|
+
gumbo_vector_index_of(&state->_open_elements, furthest_block) + 1;
|
1964
2025
|
assert(insert_at >= 0);
|
1965
|
-
assert(insert_at <= state->_open_elements.length);
|
2026
|
+
assert((unsigned int) insert_at <= state->_open_elements.length);
|
1966
2027
|
gumbo_vector_insert_at(
|
1967
2028
|
parser, new_formatting_node, insert_at, &state->_open_elements);
|
1968
|
-
}
|
2029
|
+
} // Step 20.
|
1969
2030
|
return true;
|
1970
2031
|
}
|
1971
2032
|
|
@@ -1988,17 +2049,19 @@ static void ignore_token(GumboParser* parser) {
|
|
1988
2049
|
|
1989
2050
|
// http://www.whatwg.org/specs/web-apps/current-work/complete/the-end.html
|
1990
2051
|
static void finish_parsing(GumboParser* parser) {
|
2052
|
+
gumbo_debug("Finishing parsing");
|
1991
2053
|
maybe_flush_text_node_buffer(parser);
|
1992
2054
|
GumboParserState* state = parser->_parser_state;
|
1993
2055
|
for (GumboNode* node = pop_current_node(parser); node;
|
1994
2056
|
node = pop_current_node(parser)) {
|
1995
|
-
if ((
|
1996
|
-
(
|
2057
|
+
if ((node_html_tag_is(node, GUMBO_TAG_BODY) && state->_closed_body_tag) ||
|
2058
|
+
(node_html_tag_is(node, GUMBO_TAG_HTML) && state->_closed_html_tag)) {
|
1997
2059
|
continue;
|
1998
2060
|
}
|
1999
2061
|
node->parse_flags |= GUMBO_INSERTION_IMPLICIT_END_TAG;
|
2000
2062
|
}
|
2001
|
-
while (pop_current_node(parser))
|
2063
|
+
while (pop_current_node(parser))
|
2064
|
+
; // Pop them all.
|
2002
2065
|
}
|
2003
2066
|
|
2004
2067
|
static bool handle_initial(GumboParser* parser, GumboToken* token) {
|
@@ -2042,9 +2105,9 @@ static bool handle_before_html(GumboParser* parser, GumboToken* token) {
|
|
2042
2105
|
parser->_output->root = html_node;
|
2043
2106
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_BEFORE_HEAD);
|
2044
2107
|
return true;
|
2045
|
-
} else if (token->type == GUMBO_TOKEN_END_TAG &&
|
2046
|
-
|
2047
|
-
|
2108
|
+
} else if (token->type == GUMBO_TOKEN_END_TAG &&
|
2109
|
+
!tag_in(token, false,
|
2110
|
+
(gumbo_tagset){TAG(HEAD), TAG(BODY), TAG(HTML), TAG(BR)})) {
|
2048
2111
|
parser_add_parse_error(parser, token);
|
2049
2112
|
ignore_token(parser);
|
2050
2113
|
return false;
|
@@ -2076,9 +2139,9 @@ static bool handle_before_head(GumboParser* parser, GumboToken* token) {
|
|
2076
2139
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_HEAD);
|
2077
2140
|
parser->_parser_state->_head_element = node;
|
2078
2141
|
return true;
|
2079
|
-
} else if (token->type == GUMBO_TOKEN_END_TAG &&
|
2080
|
-
|
2081
|
-
|
2142
|
+
} else if (token->type == GUMBO_TOKEN_END_TAG &&
|
2143
|
+
!tag_in(token, false,
|
2144
|
+
(gumbo_tagset){TAG(HEAD), TAG(BODY), TAG(HTML), TAG(BR)})) {
|
2082
2145
|
parser_add_parse_error(parser, token);
|
2083
2146
|
ignore_token(parser);
|
2084
2147
|
return false;
|
@@ -2110,9 +2173,9 @@ static bool handle_in_head(GumboParser* parser, GumboToken* token) {
|
|
2110
2173
|
return true;
|
2111
2174
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_HTML)) {
|
2112
2175
|
return handle_in_body(parser, token);
|
2113
|
-
} else if (tag_in(token, kStartTag,
|
2114
|
-
|
2115
|
-
|
2176
|
+
} else if (tag_in(token, kStartTag,
|
2177
|
+
(gumbo_tagset){TAG(BASE), TAG(BASEFONT), TAG(BGSOUND),
|
2178
|
+
TAG(MENUITEM), TAG(LINK)})) {
|
2116
2179
|
insert_element_from_token(parser, token);
|
2117
2180
|
pop_current_node(parser);
|
2118
2181
|
acknowledge_self_closing_tag(parser);
|
@@ -2129,8 +2192,8 @@ static bool handle_in_head(GumboParser* parser, GumboToken* token) {
|
|
2129
2192
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_TITLE)) {
|
2130
2193
|
run_generic_parsing_algorithm(parser, token, GUMBO_LEX_RCDATA);
|
2131
2194
|
return true;
|
2132
|
-
} else if (tag_in(
|
2133
|
-
|
2195
|
+
} else if (tag_in(
|
2196
|
+
token, kStartTag, (gumbo_tagset){TAG(NOFRAMES), TAG(STYLE)})) {
|
2134
2197
|
run_generic_parsing_algorithm(parser, token, GUMBO_LEX_RAWTEXT);
|
2135
2198
|
return true;
|
2136
2199
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_NOSCRIPT)) {
|
@@ -2143,32 +2206,51 @@ static bool handle_in_head(GumboParser* parser, GumboToken* token) {
|
|
2143
2206
|
} else if (tag_is(token, kEndTag, GUMBO_TAG_HEAD)) {
|
2144
2207
|
GumboNode* head = pop_current_node(parser);
|
2145
2208
|
AVOID_UNUSED_VARIABLE_WARNING(head);
|
2146
|
-
assert(
|
2209
|
+
assert(node_html_tag_is(head, GUMBO_TAG_HEAD));
|
2147
2210
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_AFTER_HEAD);
|
2148
2211
|
return true;
|
2149
|
-
} else if (
|
2150
|
-
|
2151
|
-
|
2152
|
-
|
2212
|
+
} else if (tag_in(token, kEndTag,
|
2213
|
+
(gumbo_tagset){TAG(BODY), TAG(HTML), TAG(BR)})) {
|
2214
|
+
pop_current_node(parser);
|
2215
|
+
set_insertion_mode(parser, GUMBO_INSERTION_MODE_AFTER_HEAD);
|
2216
|
+
parser->_parser_state->_reprocess_current_token = true;
|
2217
|
+
return true;
|
2218
|
+
} else if (tag_is(token, kStartTag, GUMBO_TAG_TEMPLATE)) {
|
2219
|
+
insert_element_from_token(parser, token);
|
2220
|
+
add_formatting_element(parser, &kActiveFormattingScopeMarker);
|
2221
|
+
parser->_parser_state->_frameset_ok = false;
|
2222
|
+
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TEMPLATE);
|
2223
|
+
push_template_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TEMPLATE);
|
2224
|
+
return true;
|
2225
|
+
} else if (tag_is(token, kEndTag, GUMBO_TAG_TEMPLATE)) {
|
2226
|
+
if (!has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
|
2227
|
+
parser_add_parse_error(parser, token);
|
2228
|
+
ignore_token(parser);
|
2229
|
+
return false;
|
2230
|
+
}
|
2231
|
+
generate_all_implied_end_tags_thoroughly(parser);
|
2232
|
+
bool success = true;
|
2233
|
+
if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_TEMPLATE)) {
|
2234
|
+
parser_add_parse_error(parser, token);
|
2235
|
+
success = false;
|
2236
|
+
}
|
2237
|
+
while (!node_html_tag_is(pop_current_node(parser), GUMBO_TAG_TEMPLATE))
|
2238
|
+
;
|
2239
|
+
clear_active_formatting_elements(parser);
|
2240
|
+
pop_template_insertion_mode(parser);
|
2241
|
+
reset_insertion_mode_appropriately(parser);
|
2242
|
+
return success;
|
2153
2243
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_HEAD) ||
|
2154
|
-
(token->type == GUMBO_TOKEN_END_TAG
|
2155
|
-
!tag_in(token, kEndTag, GUMBO_TAG_BODY, GUMBO_TAG_HTML,
|
2156
|
-
GUMBO_TAG_BR, GUMBO_TAG_LAST))) {
|
2157
|
-
parser_add_parse_error(parser, token);
|
2158
|
-
return false;
|
2159
|
-
} else if (tag_is(token, kStartTag, GUMBO_TAG_UNKNOWN) && token->v.start_tag.is_self_closing) {
|
2244
|
+
(token->type == GUMBO_TOKEN_END_TAG)) {
|
2160
2245
|
parser_add_parse_error(parser, token);
|
2161
2246
|
ignore_token(parser);
|
2162
2247
|
return false;
|
2163
2248
|
} else {
|
2164
|
-
|
2165
|
-
assert(node_tag_is(node, GUMBO_TAG_HEAD));
|
2166
|
-
AVOID_UNUSED_VARIABLE_WARNING(node);
|
2249
|
+
pop_current_node(parser);
|
2167
2250
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_AFTER_HEAD);
|
2168
2251
|
parser->_parser_state->_reprocess_current_token = true;
|
2169
2252
|
return true;
|
2170
2253
|
}
|
2171
|
-
|
2172
2254
|
return true;
|
2173
2255
|
}
|
2174
2256
|
|
@@ -2181,27 +2263,27 @@ static bool handle_in_head_noscript(GumboParser* parser, GumboToken* token) {
|
|
2181
2263
|
return handle_in_body(parser, token);
|
2182
2264
|
} else if (tag_is(token, kEndTag, GUMBO_TAG_NOSCRIPT)) {
|
2183
2265
|
const GumboNode* node = pop_current_node(parser);
|
2184
|
-
assert(
|
2266
|
+
assert(node_html_tag_is(node, GUMBO_TAG_NOSCRIPT));
|
2185
2267
|
AVOID_UNUSED_VARIABLE_WARNING(node);
|
2186
2268
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_HEAD);
|
2187
2269
|
return true;
|
2188
2270
|
} else if (token->type == GUMBO_TOKEN_WHITESPACE ||
|
2189
2271
|
token->type == GUMBO_TOKEN_COMMENT ||
|
2190
|
-
tag_in(token, kStartTag,
|
2191
|
-
|
2192
|
-
|
2272
|
+
tag_in(token, kStartTag,
|
2273
|
+
(gumbo_tagset){TAG(BASEFONT), TAG(BGSOUND), TAG(LINK),
|
2274
|
+
TAG(META), TAG(NOFRAMES), TAG(STYLE)})) {
|
2193
2275
|
return handle_in_head(parser, token);
|
2194
|
-
} else if (tag_in(
|
2195
|
-
|
2196
|
-
|
2197
|
-
|
2276
|
+
} else if (tag_in(
|
2277
|
+
token, kStartTag, (gumbo_tagset){TAG(HEAD), TAG(NOSCRIPT)}) ||
|
2278
|
+
(token->type == GUMBO_TOKEN_END_TAG &&
|
2279
|
+
!tag_is(token, kEndTag, GUMBO_TAG_BR))) {
|
2198
2280
|
parser_add_parse_error(parser, token);
|
2199
2281
|
ignore_token(parser);
|
2200
2282
|
return false;
|
2201
2283
|
} else {
|
2202
2284
|
parser_add_parse_error(parser, token);
|
2203
2285
|
const GumboNode* node = pop_current_node(parser);
|
2204
|
-
assert(
|
2286
|
+
assert(node_html_tag_is(node, GUMBO_TAG_NOSCRIPT));
|
2205
2287
|
AVOID_UNUSED_VARIABLE_WARNING(node);
|
2206
2288
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_HEAD);
|
2207
2289
|
parser->_parser_state->_reprocess_current_token = true;
|
@@ -2233,10 +2315,10 @@ static bool handle_after_head(GumboParser* parser, GumboToken* token) {
|
|
2233
2315
|
insert_element_from_token(parser, token);
|
2234
2316
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_FRAMESET);
|
2235
2317
|
return true;
|
2236
|
-
} else if (tag_in(token, kStartTag,
|
2237
|
-
|
2238
|
-
|
2239
|
-
|
2318
|
+
} else if (tag_in(token, kStartTag,
|
2319
|
+
(gumbo_tagset){TAG(BASE), TAG(BASEFONT), TAG(BGSOUND),
|
2320
|
+
TAG(LINK), TAG(META), TAG(NOFRAMES), TAG(SCRIPT),
|
2321
|
+
TAG(STYLE), TAG(TEMPLATE), TAG(TITLE)})) {
|
2240
2322
|
parser_add_parse_error(parser, token);
|
2241
2323
|
assert(state->_head_element != NULL);
|
2242
2324
|
// This must be flushed before we push the head element on, as there may be
|
@@ -2246,10 +2328,12 @@ static bool handle_after_head(GumboParser* parser, GumboToken* token) {
|
|
2246
2328
|
bool result = handle_in_head(parser, token);
|
2247
2329
|
gumbo_vector_remove(parser, state->_head_element, &state->_open_elements);
|
2248
2330
|
return result;
|
2331
|
+
} else if (tag_is(token, kEndTag, GUMBO_TAG_TEMPLATE)) {
|
2332
|
+
return handle_in_head(parser, token);
|
2249
2333
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_HEAD) ||
|
2250
|
-
|
2251
|
-
|
2252
|
-
|
2334
|
+
(token->type == GUMBO_TOKEN_END_TAG &&
|
2335
|
+
!tag_in(token, kEndTag,
|
2336
|
+
(gumbo_tagset){TAG(BODY), TAG(HTML), TAG(BR)}))) {
|
2253
2337
|
parser_add_parse_error(parser, token);
|
2254
2338
|
ignore_token(parser);
|
2255
2339
|
return false;
|
@@ -2263,24 +2347,23 @@ static bool handle_after_head(GumboParser* parser, GumboToken* token) {
|
|
2263
2347
|
|
2264
2348
|
static void destroy_node(GumboParser* parser, GumboNode* node) {
|
2265
2349
|
switch (node->type) {
|
2266
|
-
case GUMBO_NODE_DOCUMENT:
|
2267
|
-
|
2268
|
-
|
2269
|
-
|
2270
|
-
destroy_node(parser, doc->children.data[i]);
|
2271
|
-
}
|
2272
|
-
gumbo_parser_deallocate(parser, (void*) doc->children.data);
|
2273
|
-
gumbo_parser_deallocate(parser, (void*) doc->name);
|
2274
|
-
gumbo_parser_deallocate(parser, (void*) doc->public_identifier);
|
2275
|
-
gumbo_parser_deallocate(parser, (void*) doc->system_identifier);
|
2350
|
+
case GUMBO_NODE_DOCUMENT: {
|
2351
|
+
GumboDocument* doc = &node->v.document;
|
2352
|
+
for (unsigned int i = 0; i < doc->children.length; ++i) {
|
2353
|
+
destroy_node(parser, doc->children.data[i]);
|
2276
2354
|
}
|
2277
|
-
|
2355
|
+
gumbo_parser_deallocate(parser, (void*) doc->children.data);
|
2356
|
+
gumbo_parser_deallocate(parser, (void*) doc->name);
|
2357
|
+
gumbo_parser_deallocate(parser, (void*) doc->public_identifier);
|
2358
|
+
gumbo_parser_deallocate(parser, (void*) doc->system_identifier);
|
2359
|
+
} break;
|
2360
|
+
case GUMBO_NODE_TEMPLATE:
|
2278
2361
|
case GUMBO_NODE_ELEMENT:
|
2279
|
-
for (int i = 0; i < node->v.element.attributes.length; ++i) {
|
2362
|
+
for (unsigned int i = 0; i < node->v.element.attributes.length; ++i) {
|
2280
2363
|
gumbo_destroy_attribute(parser, node->v.element.attributes.data[i]);
|
2281
2364
|
}
|
2282
2365
|
gumbo_parser_deallocate(parser, node->v.element.attributes.data);
|
2283
|
-
for (int i = 0; i < node->v.element.children.length; ++i) {
|
2366
|
+
for (unsigned int i = 0; i < node->v.element.children.length; ++i) {
|
2284
2367
|
destroy_node(parser, node->v.element.children.data[i]);
|
2285
2368
|
}
|
2286
2369
|
gumbo_parser_deallocate(parser, node->v.element.children.data);
|
@@ -2307,7 +2390,8 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2307
2390
|
reconstruct_active_formatting_elements(parser);
|
2308
2391
|
insert_text_token(parser, token);
|
2309
2392
|
return true;
|
2310
|
-
} else if (token->type == GUMBO_TOKEN_CHARACTER
|
2393
|
+
} else if (token->type == GUMBO_TOKEN_CHARACTER ||
|
2394
|
+
token->type == GUMBO_TOKEN_CDATA) {
|
2311
2395
|
reconstruct_active_formatting_elements(parser);
|
2312
2396
|
insert_text_token(parser, token);
|
2313
2397
|
set_frameset_not_ok(parser);
|
@@ -2320,20 +2404,26 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2320
2404
|
ignore_token(parser);
|
2321
2405
|
return false;
|
2322
2406
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_HTML)) {
|
2407
|
+
parser_add_parse_error(parser, token);
|
2408
|
+
if (has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
|
2409
|
+
ignore_token(parser);
|
2410
|
+
return false;
|
2411
|
+
}
|
2323
2412
|
assert(parser->_output->root != NULL);
|
2324
2413
|
assert(parser->_output->root->type == GUMBO_NODE_ELEMENT);
|
2325
|
-
parser_add_parse_error(parser, token);
|
2326
2414
|
merge_attributes(parser, token, parser->_output->root);
|
2327
2415
|
return false;
|
2328
|
-
} else if (tag_in(token, kStartTag,
|
2329
|
-
|
2330
|
-
|
2331
|
-
|
2416
|
+
} else if (tag_in(token, kStartTag,
|
2417
|
+
(gumbo_tagset){TAG(BASE), TAG(BASEFONT), TAG(BGSOUND),
|
2418
|
+
TAG(MENUITEM), TAG(LINK), TAG(META), TAG(NOFRAMES),
|
2419
|
+
TAG(SCRIPT), TAG(STYLE), TAG(TEMPLATE), TAG(TITLE)}) ||
|
2420
|
+
tag_is(token, kEndTag, GUMBO_TAG_TEMPLATE)) {
|
2332
2421
|
return handle_in_head(parser, token);
|
2333
2422
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_BODY)) {
|
2334
2423
|
parser_add_parse_error(parser, token);
|
2335
2424
|
if (state->_open_elements.length < 2 ||
|
2336
|
-
!
|
2425
|
+
!node_html_tag_is(state->_open_elements.data[1], GUMBO_TAG_BODY) ||
|
2426
|
+
has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
|
2337
2427
|
ignore_token(parser);
|
2338
2428
|
return false;
|
2339
2429
|
}
|
@@ -2343,7 +2433,7 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2343
2433
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_FRAMESET)) {
|
2344
2434
|
parser_add_parse_error(parser, token);
|
2345
2435
|
if (state->_open_elements.length < 2 ||
|
2346
|
-
!
|
2436
|
+
!node_html_tag_is(state->_open_elements.data[1], GUMBO_TAG_BODY) ||
|
2347
2437
|
!state->_frameset_ok) {
|
2348
2438
|
ignore_token(parser);
|
2349
2439
|
return false;
|
@@ -2367,7 +2457,7 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2367
2457
|
// Remove the body node. We may want to factor this out into a generic
|
2368
2458
|
// helper, but right now this is the only code that needs to do this.
|
2369
2459
|
GumboVector* children = &parser->_output->root->v.element.children;
|
2370
|
-
for (int i = 0; i < children->length; ++i) {
|
2460
|
+
for (unsigned int i = 0; i < children->length; ++i) {
|
2371
2461
|
if (children->data[i] == body_node) {
|
2372
2462
|
gumbo_vector_remove_at(parser, i, children);
|
2373
2463
|
break;
|
@@ -2380,33 +2470,32 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2380
2470
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_FRAMESET);
|
2381
2471
|
return true;
|
2382
2472
|
} else if (token->type == GUMBO_TOKEN_EOF) {
|
2383
|
-
for (int i = 0; i < state->_open_elements.length; ++i) {
|
2384
|
-
if (!
|
2385
|
-
|
2386
|
-
|
2387
|
-
|
2388
|
-
GUMBO_TAG_HTML, GUMBO_TAG_LAST)) {
|
2473
|
+
for (unsigned int i = 0; i < state->_open_elements.length; ++i) {
|
2474
|
+
if (!node_tag_in_set(state->_open_elements.data[i],
|
2475
|
+
(gumbo_tagset){TAG(DD), TAG(DT), TAG(LI), TAG(P), TAG(TBODY),
|
2476
|
+
TAG(TD), TAG(TFOOT), TAG(TH), TAG(THEAD), TAG(TR), TAG(BODY),
|
2477
|
+
TAG(HTML)})) {
|
2389
2478
|
parser_add_parse_error(parser, token);
|
2390
|
-
return false;
|
2391
2479
|
}
|
2392
2480
|
}
|
2481
|
+
if (get_current_template_insertion_mode(parser) !=
|
2482
|
+
GUMBO_INSERTION_MODE_INITIAL) {
|
2483
|
+
return handle_in_template(parser, token);
|
2484
|
+
}
|
2393
2485
|
return true;
|
2394
|
-
} else if (tag_in(token, kEndTag,
|
2395
|
-
GUMBO_TAG_LAST)) {
|
2486
|
+
} else if (tag_in(token, kEndTag, (gumbo_tagset){TAG(BODY), TAG(HTML)})) {
|
2396
2487
|
if (!has_an_element_in_scope(parser, GUMBO_TAG_BODY)) {
|
2397
2488
|
parser_add_parse_error(parser, token);
|
2398
2489
|
ignore_token(parser);
|
2399
2490
|
return false;
|
2400
2491
|
}
|
2401
2492
|
bool success = true;
|
2402
|
-
for (int i = 0; i < state->_open_elements.length; ++i) {
|
2403
|
-
if (!
|
2404
|
-
|
2405
|
-
|
2406
|
-
|
2407
|
-
|
2408
|
-
GUMBO_TAG_TR, GUMBO_TAG_BODY, GUMBO_TAG_HTML,
|
2409
|
-
GUMBO_TAG_LAST)) {
|
2493
|
+
for (unsigned int i = 0; i < state->_open_elements.length; ++i) {
|
2494
|
+
if (!node_tag_in_set(state->_open_elements.data[i],
|
2495
|
+
(gumbo_tagset){TAG(DD), TAG(DT), TAG(LI), TAG(OPTGROUP),
|
2496
|
+
TAG(OPTION), TAG(P), TAG(RB), TAG(RP), TAG(RT), TAG(RTC),
|
2497
|
+
TAG(TBODY), TAG(TD), TAG(TFOOT), TAG(TH), TAG(THEAD), TAG(TR),
|
2498
|
+
TAG(BODY), TAG(HTML)})) {
|
2410
2499
|
parser_add_parse_error(parser, token);
|
2411
2500
|
success = false;
|
2412
2501
|
break;
|
@@ -2417,58 +2506,58 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2417
2506
|
parser->_parser_state->_reprocess_current_token = true;
|
2418
2507
|
} else {
|
2419
2508
|
GumboNode* body = state->_open_elements.data[1];
|
2420
|
-
assert(
|
2509
|
+
assert(node_html_tag_is(body, GUMBO_TAG_BODY));
|
2421
2510
|
record_end_of_element(state->_current_token, &body->v.element);
|
2422
2511
|
}
|
2423
2512
|
return success;
|
2424
|
-
} else if (tag_in(token, kStartTag,
|
2425
|
-
|
2426
|
-
|
2427
|
-
|
2428
|
-
|
2429
|
-
|
2430
|
-
|
2431
|
-
GUMBO_TAG_SUMMARY, GUMBO_TAG_UL, GUMBO_TAG_LAST)) {
|
2513
|
+
} else if (tag_in(token, kStartTag,
|
2514
|
+
(gumbo_tagset){TAG(ADDRESS), TAG(ARTICLE), TAG(ASIDE),
|
2515
|
+
TAG(BLOCKQUOTE), TAG(CENTER), TAG(DETAILS), TAG(DIR),
|
2516
|
+
TAG(DIV), TAG(DL), TAG(FIELDSET), TAG(FIGCAPTION),
|
2517
|
+
TAG(FIGURE), TAG(FOOTER), TAG(HEADER), TAG(HGROUP),
|
2518
|
+
TAG(MENU), TAG(MAIN), TAG(NAV), TAG(OL), TAG(P),
|
2519
|
+
TAG(SECTION), TAG(SUMMARY), TAG(UL)})) {
|
2432
2520
|
bool result = maybe_implicitly_close_p_tag(parser, token);
|
2433
2521
|
insert_element_from_token(parser, token);
|
2434
2522
|
return result;
|
2435
|
-
} else if (tag_in(token, kStartTag,
|
2436
|
-
|
2523
|
+
} else if (tag_in(token, kStartTag, (gumbo_tagset){TAG(H1), TAG(H2), TAG(H3),
|
2524
|
+
TAG(H4), TAG(H5), TAG(H6)})) {
|
2437
2525
|
bool result = maybe_implicitly_close_p_tag(parser, token);
|
2438
|
-
if (
|
2439
|
-
|
2440
|
-
|
2526
|
+
if (node_tag_in_set(
|
2527
|
+
get_current_node(parser), (gumbo_tagset){TAG(H1), TAG(H2), TAG(H3),
|
2528
|
+
TAG(H4), TAG(H5), TAG(H6)})) {
|
2441
2529
|
parser_add_parse_error(parser, token);
|
2442
2530
|
pop_current_node(parser);
|
2443
2531
|
result = false;
|
2444
2532
|
}
|
2445
2533
|
insert_element_from_token(parser, token);
|
2446
2534
|
return result;
|
2447
|
-
} else if (tag_in(token, kStartTag,
|
2448
|
-
GUMBO_TAG_LAST)) {
|
2535
|
+
} else if (tag_in(token, kStartTag, (gumbo_tagset){TAG(PRE), TAG(LISTING)})) {
|
2449
2536
|
bool result = maybe_implicitly_close_p_tag(parser, token);
|
2450
2537
|
insert_element_from_token(parser, token);
|
2451
2538
|
state->_ignore_next_linefeed = true;
|
2452
2539
|
state->_frameset_ok = false;
|
2453
2540
|
return result;
|
2454
2541
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_FORM)) {
|
2455
|
-
if (state->_form_element != NULL
|
2542
|
+
if (state->_form_element != NULL &&
|
2543
|
+
!has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
|
2456
2544
|
gumbo_debug("Ignoring nested form.\n");
|
2457
2545
|
parser_add_parse_error(parser, token);
|
2458
2546
|
ignore_token(parser);
|
2459
2547
|
return false;
|
2460
2548
|
}
|
2461
2549
|
bool result = maybe_implicitly_close_p_tag(parser, token);
|
2462
|
-
|
2463
|
-
|
2550
|
+
GumboNode* form_element = insert_element_from_token(parser, token);
|
2551
|
+
if (!has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
|
2552
|
+
state->_form_element = form_element;
|
2553
|
+
}
|
2464
2554
|
return result;
|
2465
2555
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_LI)) {
|
2466
2556
|
maybe_implicitly_close_list_tag(parser, token, true);
|
2467
2557
|
bool result = maybe_implicitly_close_p_tag(parser, token);
|
2468
2558
|
insert_element_from_token(parser, token);
|
2469
2559
|
return result;
|
2470
|
-
} else if (tag_in(token, kStartTag,
|
2471
|
-
GUMBO_TAG_LAST)) {
|
2560
|
+
} else if (tag_in(token, kStartTag, (gumbo_tagset){TAG(DD), TAG(DT)})) {
|
2472
2561
|
maybe_implicitly_close_list_tag(parser, token, false);
|
2473
2562
|
bool result = maybe_implicitly_close_p_tag(parser, token);
|
2474
2563
|
insert_element_from_token(parser, token);
|
@@ -2481,7 +2570,8 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2481
2570
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_BUTTON)) {
|
2482
2571
|
if (has_an_element_in_scope(parser, GUMBO_TAG_BUTTON)) {
|
2483
2572
|
parser_add_parse_error(parser, token);
|
2484
|
-
implicitly_close_tags(
|
2573
|
+
implicitly_close_tags(
|
2574
|
+
parser, token, GUMBO_NAMESPACE_HTML, GUMBO_TAG_BUTTON);
|
2485
2575
|
state->_reprocess_current_token = true;
|
2486
2576
|
return false;
|
2487
2577
|
}
|
@@ -2489,67 +2579,83 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2489
2579
|
insert_element_from_token(parser, token);
|
2490
2580
|
state->_frameset_ok = false;
|
2491
2581
|
return true;
|
2492
|
-
} else if (tag_in(token, kEndTag,
|
2493
|
-
|
2494
|
-
|
2495
|
-
|
2496
|
-
|
2497
|
-
|
2498
|
-
|
2499
|
-
GUMBO_TAG_SECTION, GUMBO_TAG_SUMMARY, GUMBO_TAG_UL,
|
2500
|
-
GUMBO_TAG_LAST)) {
|
2582
|
+
} else if (tag_in(token, kEndTag,
|
2583
|
+
(gumbo_tagset){TAG(ADDRESS), TAG(ARTICLE), TAG(ASIDE),
|
2584
|
+
TAG(BLOCKQUOTE), TAG(BUTTON), TAG(CENTER), TAG(DETAILS),
|
2585
|
+
TAG(DIR), TAG(DIV), TAG(DL), TAG(FIELDSET),
|
2586
|
+
TAG(FIGCAPTION), TAG(FIGURE), TAG(FOOTER), TAG(HEADER),
|
2587
|
+
TAG(HGROUP), TAG(LISTING), TAG(MAIN), TAG(MENU), TAG(NAV),
|
2588
|
+
TAG(OL), TAG(PRE), TAG(SECTION), TAG(SUMMARY), TAG(UL)})) {
|
2501
2589
|
GumboTag tag = token->v.end_tag;
|
2502
2590
|
if (!has_an_element_in_scope(parser, tag)) {
|
2503
2591
|
parser_add_parse_error(parser, token);
|
2504
2592
|
ignore_token(parser);
|
2505
2593
|
return false;
|
2506
2594
|
}
|
2507
|
-
implicitly_close_tags(
|
2595
|
+
implicitly_close_tags(
|
2596
|
+
parser, token, GUMBO_NAMESPACE_HTML, token->v.end_tag);
|
2508
2597
|
return true;
|
2509
2598
|
} else if (tag_is(token, kEndTag, GUMBO_TAG_FORM)) {
|
2510
|
-
|
2511
|
-
|
2512
|
-
|
2513
|
-
|
2514
|
-
|
2515
|
-
|
2516
|
-
|
2517
|
-
|
2518
|
-
|
2519
|
-
|
2520
|
-
|
2521
|
-
|
2522
|
-
|
2523
|
-
|
2524
|
-
|
2525
|
-
|
2526
|
-
|
2599
|
+
if (has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
|
2600
|
+
if (!has_an_element_in_scope(parser, GUMBO_TAG_FORM)) {
|
2601
|
+
parser_add_parse_error(parser, token);
|
2602
|
+
ignore_token(parser);
|
2603
|
+
return false;
|
2604
|
+
}
|
2605
|
+
bool success = true;
|
2606
|
+
generate_implied_end_tags(parser, GUMBO_TAG_LAST);
|
2607
|
+
if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_FORM)) {
|
2608
|
+
parser_add_parse_error(parser, token);
|
2609
|
+
return false;
|
2610
|
+
}
|
2611
|
+
while (!node_html_tag_is(pop_current_node(parser), GUMBO_TAG_FORM))
|
2612
|
+
;
|
2613
|
+
return success;
|
2614
|
+
} else {
|
2615
|
+
bool result = true;
|
2616
|
+
const GumboNode* node = state->_form_element;
|
2617
|
+
assert(!node || node->type == GUMBO_NODE_ELEMENT);
|
2618
|
+
state->_form_element = NULL;
|
2619
|
+
if (!node || !has_node_in_scope(parser, node)) {
|
2620
|
+
gumbo_debug("Closing an unopened form.\n");
|
2621
|
+
parser_add_parse_error(parser, token);
|
2622
|
+
ignore_token(parser);
|
2623
|
+
return false;
|
2624
|
+
}
|
2625
|
+
// This differs from implicitly_close_tags because we remove *only* the
|
2626
|
+
// <form> element; other nodes are left in scope.
|
2627
|
+
generate_implied_end_tags(parser, GUMBO_TAG_LAST);
|
2628
|
+
if (get_current_node(parser) != node) {
|
2629
|
+
parser_add_parse_error(parser, token);
|
2630
|
+
result = false;
|
2631
|
+
}
|
2527
2632
|
|
2528
|
-
|
2529
|
-
|
2530
|
-
|
2531
|
-
|
2532
|
-
|
2533
|
-
|
2633
|
+
GumboVector* open_elements = &state->_open_elements;
|
2634
|
+
int index = gumbo_vector_index_of(open_elements, node);
|
2635
|
+
assert(index >= 0);
|
2636
|
+
gumbo_vector_remove_at(parser, index, open_elements);
|
2637
|
+
return result;
|
2638
|
+
}
|
2534
2639
|
} else if (tag_is(token, kEndTag, GUMBO_TAG_P)) {
|
2535
2640
|
if (!has_an_element_in_button_scope(parser, GUMBO_TAG_P)) {
|
2536
2641
|
parser_add_parse_error(parser, token);
|
2537
|
-
reconstruct_active_formatting_elements(parser);
|
2642
|
+
// reconstruct_active_formatting_elements(parser);
|
2538
2643
|
insert_element_of_tag_type(
|
2539
2644
|
parser, GUMBO_TAG_P, GUMBO_INSERTION_CONVERTED_FROM_END_TAG);
|
2540
2645
|
state->_reprocess_current_token = true;
|
2541
2646
|
return false;
|
2542
2647
|
}
|
2543
|
-
return implicitly_close_tags(
|
2648
|
+
return implicitly_close_tags(
|
2649
|
+
parser, token, GUMBO_NAMESPACE_HTML, GUMBO_TAG_P);
|
2544
2650
|
} else if (tag_is(token, kEndTag, GUMBO_TAG_LI)) {
|
2545
2651
|
if (!has_an_element_in_list_scope(parser, GUMBO_TAG_LI)) {
|
2546
2652
|
parser_add_parse_error(parser, token);
|
2547
2653
|
ignore_token(parser);
|
2548
2654
|
return false;
|
2549
2655
|
}
|
2550
|
-
return implicitly_close_tags(
|
2551
|
-
|
2552
|
-
|
2656
|
+
return implicitly_close_tags(
|
2657
|
+
parser, token, GUMBO_NAMESPACE_HTML, GUMBO_TAG_LI);
|
2658
|
+
} else if (tag_in(token, kEndTag, (gumbo_tagset){TAG(DD), TAG(DT)})) {
|
2553
2659
|
assert(token->type == GUMBO_TOKEN_END_TAG);
|
2554
2660
|
GumboTag token_tag = token->v.end_tag;
|
2555
2661
|
if (!has_an_element_in_scope(parser, token_tag)) {
|
@@ -2557,12 +2663,13 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2557
2663
|
ignore_token(parser);
|
2558
2664
|
return false;
|
2559
2665
|
}
|
2560
|
-
return implicitly_close_tags(
|
2561
|
-
|
2562
|
-
|
2666
|
+
return implicitly_close_tags(
|
2667
|
+
parser, token, GUMBO_NAMESPACE_HTML, token_tag);
|
2668
|
+
} else if (tag_in(token, kEndTag, (gumbo_tagset){TAG(H1), TAG(H2), TAG(H3),
|
2669
|
+
TAG(H4), TAG(H5), TAG(H6)})) {
|
2563
2670
|
if (!has_an_element_in_scope_with_tagname(
|
2564
|
-
parser, GUMBO_TAG_H1, GUMBO_TAG_H2, GUMBO_TAG_H3,
|
2565
|
-
|
2671
|
+
parser, 6, (GumboTag[]){GUMBO_TAG_H1, GUMBO_TAG_H2, GUMBO_TAG_H3,
|
2672
|
+
GUMBO_TAG_H4, GUMBO_TAG_H5, GUMBO_TAG_H6})) {
|
2566
2673
|
// No heading open; ignore the token entirely.
|
2567
2674
|
parser_add_parse_error(parser, token);
|
2568
2675
|
ignore_token(parser);
|
@@ -2570,7 +2677,7 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2570
2677
|
} else {
|
2571
2678
|
generate_implied_end_tags(parser, GUMBO_TAG_LAST);
|
2572
2679
|
const GumboNode* current_node = get_current_node(parser);
|
2573
|
-
bool success =
|
2680
|
+
bool success = node_html_tag_is(current_node, token->v.end_tag);
|
2574
2681
|
if (!success) {
|
2575
2682
|
// There're children of the heading currently open; close them below and
|
2576
2683
|
// record a parse error.
|
@@ -2580,9 +2687,9 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2580
2687
|
}
|
2581
2688
|
do {
|
2582
2689
|
current_node = pop_current_node(parser);
|
2583
|
-
} while (!
|
2584
|
-
|
2585
|
-
|
2690
|
+
} while (!node_tag_in_set(
|
2691
|
+
current_node, (gumbo_tagset){TAG(H1), TAG(H2), TAG(H3),
|
2692
|
+
TAG(H4), TAG(H5), TAG(H6)}));
|
2586
2693
|
return success;
|
2587
2694
|
}
|
2588
2695
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_A)) {
|
@@ -2600,19 +2707,17 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2600
2707
|
if (find_last_anchor_index(parser, &last_a)) {
|
2601
2708
|
void* last_element = gumbo_vector_remove_at(
|
2602
2709
|
parser, last_a, &state->_active_formatting_elements);
|
2603
|
-
gumbo_vector_remove(
|
2604
|
-
parser, last_element, &state->_open_elements);
|
2710
|
+
gumbo_vector_remove(parser, last_element, &state->_open_elements);
|
2605
2711
|
}
|
2606
2712
|
success = false;
|
2607
2713
|
}
|
2608
2714
|
reconstruct_active_formatting_elements(parser);
|
2609
2715
|
add_formatting_element(parser, insert_element_from_token(parser, token));
|
2610
2716
|
return success;
|
2611
|
-
} else if (tag_in(token, kStartTag,
|
2612
|
-
|
2613
|
-
|
2614
|
-
|
2615
|
-
GUMBO_TAG_LAST)) {
|
2717
|
+
} else if (tag_in(token, kStartTag,
|
2718
|
+
(gumbo_tagset){TAG(B), TAG(BIG), TAG(CODE), TAG(EM), TAG(FONT),
|
2719
|
+
TAG(I), TAG(S), TAG(SMALL), TAG(STRIKE), TAG(STRONG),
|
2720
|
+
TAG(TT), TAG(U)})) {
|
2616
2721
|
reconstruct_active_formatting_elements(parser);
|
2617
2722
|
add_formatting_element(parser, insert_element_from_token(parser, token));
|
2618
2723
|
return true;
|
@@ -2628,28 +2733,27 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2628
2733
|
insert_element_from_token(parser, token);
|
2629
2734
|
add_formatting_element(parser, get_current_node(parser));
|
2630
2735
|
return result;
|
2631
|
-
} else if (tag_in(token, kEndTag,
|
2632
|
-
|
2633
|
-
|
2634
|
-
|
2635
|
-
GUMBO_TAG_U, GUMBO_TAG_LAST)) {
|
2736
|
+
} else if (tag_in(token, kEndTag,
|
2737
|
+
(gumbo_tagset){TAG(A), TAG(B), TAG(BIG), TAG(CODE), TAG(EM),
|
2738
|
+
TAG(FONT), TAG(I), TAG(NOBR), TAG(S), TAG(SMALL),
|
2739
|
+
TAG(STRIKE), TAG(STRONG), TAG(TT), TAG(U)})) {
|
2636
2740
|
return adoption_agency_algorithm(parser, token, token->v.end_tag);
|
2637
|
-
} else if (tag_in(token, kStartTag,
|
2638
|
-
|
2741
|
+
} else if (tag_in(token, kStartTag,
|
2742
|
+
(gumbo_tagset){TAG(APPLET), TAG(MARQUEE), TAG(OBJECT)})) {
|
2639
2743
|
reconstruct_active_formatting_elements(parser);
|
2640
2744
|
insert_element_from_token(parser, token);
|
2641
2745
|
add_formatting_element(parser, &kActiveFormattingScopeMarker);
|
2642
2746
|
set_frameset_not_ok(parser);
|
2643
2747
|
return true;
|
2644
|
-
} else if (tag_in(token, kEndTag,
|
2645
|
-
|
2748
|
+
} else if (tag_in(token, kEndTag,
|
2749
|
+
(gumbo_tagset){TAG(APPLET), TAG(MARQUEE), TAG(OBJECT)})) {
|
2646
2750
|
GumboTag token_tag = token->v.end_tag;
|
2647
2751
|
if (!has_an_element_in_table_scope(parser, token_tag)) {
|
2648
2752
|
parser_add_parse_error(parser, token);
|
2649
2753
|
ignore_token(parser);
|
2650
2754
|
return false;
|
2651
2755
|
}
|
2652
|
-
implicitly_close_tags(parser, token, token_tag);
|
2756
|
+
implicitly_close_tags(parser, token, GUMBO_NAMESPACE_HTML, token_tag);
|
2653
2757
|
clear_active_formatting_elements(parser);
|
2654
2758
|
return true;
|
2655
2759
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_TABLE)) {
|
@@ -2661,9 +2765,9 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2661
2765
|
set_frameset_not_ok(parser);
|
2662
2766
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
|
2663
2767
|
return true;
|
2664
|
-
} else if (tag_in(token, kStartTag,
|
2665
|
-
|
2666
|
-
|
2768
|
+
} else if (tag_in(token, kStartTag,
|
2769
|
+
(gumbo_tagset){TAG(AREA), TAG(BR), TAG(EMBED), TAG(IMG),
|
2770
|
+
TAG(IMAGE), TAG(KEYGEN), TAG(WBR)})) {
|
2667
2771
|
bool success = true;
|
2668
2772
|
if (tag_is(token, kStartTag, GUMBO_TAG_IMAGE)) {
|
2669
2773
|
success = false;
|
@@ -2693,8 +2797,8 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2693
2797
|
pop_current_node(parser);
|
2694
2798
|
acknowledge_self_closing_tag(parser);
|
2695
2799
|
return true;
|
2696
|
-
} else if (tag_in(token, kStartTag,
|
2697
|
-
|
2800
|
+
} else if (tag_in(token, kStartTag,
|
2801
|
+
(gumbo_tagset){TAG(PARAM), TAG(SOURCE), TAG(TRACK)})) {
|
2698
2802
|
insert_element_from_token(parser, token);
|
2699
2803
|
pop_current_node(parser);
|
2700
2804
|
acknowledge_self_closing_tag(parser);
|
@@ -2708,7 +2812,8 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2708
2812
|
return result;
|
2709
2813
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_ISINDEX)) {
|
2710
2814
|
parser_add_parse_error(parser, token);
|
2711
|
-
if (parser->_parser_state->_form_element != NULL
|
2815
|
+
if (parser->_parser_state->_form_element != NULL &&
|
2816
|
+
!has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
|
2712
2817
|
ignore_token(parser);
|
2713
2818
|
return false;
|
2714
2819
|
}
|
@@ -2723,15 +2828,18 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2723
2828
|
|
2724
2829
|
GumboNode* form = insert_element_of_tag_type(
|
2725
2830
|
parser, GUMBO_TAG_FORM, GUMBO_INSERTION_FROM_ISINDEX);
|
2831
|
+
if (!has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
|
2832
|
+
parser->_parser_state->_form_element = form;
|
2833
|
+
}
|
2726
2834
|
if (action_attr) {
|
2727
2835
|
gumbo_vector_add(parser, action_attr, &form->v.element.attributes);
|
2728
2836
|
}
|
2729
|
-
insert_element_of_tag_type(
|
2730
|
-
|
2731
|
-
pop_current_node(parser);
|
2837
|
+
insert_element_of_tag_type(
|
2838
|
+
parser, GUMBO_TAG_HR, GUMBO_INSERTION_FROM_ISINDEX);
|
2839
|
+
pop_current_node(parser); // <hr>
|
2732
2840
|
|
2733
|
-
insert_element_of_tag_type(
|
2734
|
-
|
2841
|
+
insert_element_of_tag_type(
|
2842
|
+
parser, GUMBO_TAG_LABEL, GUMBO_INSERTION_FROM_ISINDEX);
|
2735
2843
|
TextNodeBufferState* text_state = &parser->_parser_state->_text_node;
|
2736
2844
|
text_state->_start_original_text = token->original_text.data;
|
2737
2845
|
text_state->_start_position = token->position;
|
@@ -2744,15 +2852,15 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2744
2852
|
text_state->_buffer.capacity = prompt_attr_length + 1;
|
2745
2853
|
gumbo_destroy_attribute(parser, prompt_attr);
|
2746
2854
|
} else {
|
2747
|
-
GumboStringPiece prompt_text =
|
2748
|
-
"This is a searchable index. Enter search keywords: ");
|
2855
|
+
GumboStringPiece prompt_text =
|
2856
|
+
GUMBO_STRING("This is a searchable index. Enter search keywords: ");
|
2749
2857
|
gumbo_string_buffer_append_string(
|
2750
2858
|
parser, &prompt_text, &text_state->_buffer);
|
2751
2859
|
}
|
2752
2860
|
|
2753
2861
|
GumboNode* input = insert_element_of_tag_type(
|
2754
2862
|
parser, GUMBO_TAG_INPUT, GUMBO_INSERTION_FROM_ISINDEX);
|
2755
|
-
for (int i = 0; i < token_attrs->length; ++i) {
|
2863
|
+
for (unsigned int i = 0; i < token_attrs->length; ++i) {
|
2756
2864
|
GumboAttribute* attr = token_attrs->data[i];
|
2757
2865
|
if (attr != prompt_attr && attr != action_attr && attr != name_attr) {
|
2758
2866
|
gumbo_vector_add(parser, attr, &input->v.element.attributes);
|
@@ -2765,6 +2873,13 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2765
2873
|
// touching the attributes.
|
2766
2874
|
ignore_token(parser);
|
2767
2875
|
|
2876
|
+
// The name attribute, if present, should be destroyed since it's ignored
|
2877
|
+
// when copying over. The action attribute should be kept since it's moved
|
2878
|
+
// to the form.
|
2879
|
+
if (name_attr) {
|
2880
|
+
gumbo_destroy_attribute(parser, name_attr);
|
2881
|
+
}
|
2882
|
+
|
2768
2883
|
GumboAttribute* name =
|
2769
2884
|
gumbo_parser_allocate(parser, sizeof(GumboAttribute));
|
2770
2885
|
GumboStringPiece name_str = GUMBO_STRING("name");
|
@@ -2780,12 +2895,15 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2780
2895
|
name->value_end = kGumboEmptySourcePosition;
|
2781
2896
|
gumbo_vector_add(parser, name, &input->v.element.attributes);
|
2782
2897
|
|
2783
|
-
pop_current_node(parser);
|
2784
|
-
pop_current_node(parser);
|
2898
|
+
pop_current_node(parser); // <input>
|
2899
|
+
pop_current_node(parser); // <label>
|
2785
2900
|
insert_element_of_tag_type(
|
2786
2901
|
parser, GUMBO_TAG_HR, GUMBO_INSERTION_FROM_ISINDEX);
|
2787
|
-
pop_current_node(parser);
|
2788
|
-
pop_current_node(parser);
|
2902
|
+
pop_current_node(parser); // <hr>
|
2903
|
+
pop_current_node(parser); // <form>
|
2904
|
+
if (!has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
|
2905
|
+
parser->_parser_state->_form_element = NULL;
|
2906
|
+
}
|
2789
2907
|
return false;
|
2790
2908
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_TEXTAREA)) {
|
2791
2909
|
run_generic_parsing_algorithm(parser, token, GUMBO_LEX_RCDATA);
|
@@ -2820,21 +2938,27 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2820
2938
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_SELECT);
|
2821
2939
|
}
|
2822
2940
|
return true;
|
2823
|
-
} else if (tag_in(token, kStartTag,
|
2824
|
-
|
2825
|
-
if (
|
2941
|
+
} else if (tag_in(token, kStartTag,
|
2942
|
+
(gumbo_tagset){TAG(OPTION), TAG(OPTGROUP)})) {
|
2943
|
+
if (node_html_tag_is(get_current_node(parser), GUMBO_TAG_OPTION)) {
|
2826
2944
|
pop_current_node(parser);
|
2827
2945
|
}
|
2828
2946
|
reconstruct_active_formatting_elements(parser);
|
2829
2947
|
insert_element_from_token(parser, token);
|
2830
2948
|
return true;
|
2831
|
-
} else if (tag_in(token, kStartTag,
|
2832
|
-
|
2949
|
+
} else if (tag_in(token, kStartTag,
|
2950
|
+
(gumbo_tagset){TAG(RB), TAG(RP), TAG(RT), TAG(RTC)})) {
|
2833
2951
|
bool success = true;
|
2952
|
+
GumboTag exception =
|
2953
|
+
tag_in(token, kStartTag, (gumbo_tagset){TAG(RT), TAG(RP)})
|
2954
|
+
? GUMBO_TAG_RTC
|
2955
|
+
: GUMBO_TAG_LAST;
|
2834
2956
|
if (has_an_element_in_scope(parser, GUMBO_TAG_RUBY)) {
|
2835
|
-
generate_implied_end_tags(parser,
|
2957
|
+
generate_implied_end_tags(parser, exception);
|
2836
2958
|
}
|
2837
|
-
if (!
|
2959
|
+
if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_RUBY) &&
|
2960
|
+
!(exception == GUMBO_TAG_LAST ||
|
2961
|
+
node_html_tag_is(get_current_node(parser), GUMBO_TAG_RTC))) {
|
2838
2962
|
parser_add_parse_error(parser, token);
|
2839
2963
|
success = false;
|
2840
2964
|
}
|
@@ -2867,11 +2991,10 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2867
2991
|
acknowledge_self_closing_tag(parser);
|
2868
2992
|
}
|
2869
2993
|
return true;
|
2870
|
-
} else if (tag_in(token, kStartTag,
|
2871
|
-
|
2872
|
-
|
2873
|
-
|
2874
|
-
GUMBO_TAG_LAST)) {
|
2994
|
+
} else if (tag_in(token, kStartTag,
|
2995
|
+
(gumbo_tagset){TAG(CAPTION), TAG(COL), TAG(COLGROUP),
|
2996
|
+
TAG(FRAME), TAG(HEAD), TAG(TBODY), TAG(TD), TAG(TFOOT),
|
2997
|
+
TAG(TH), TAG(THEAD), TAG(TR)})) {
|
2875
2998
|
parser_add_parse_error(parser, token);
|
2876
2999
|
ignore_token(parser);
|
2877
3000
|
return false;
|
@@ -2883,22 +3006,22 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2883
3006
|
assert(token->type == GUMBO_TOKEN_END_TAG);
|
2884
3007
|
GumboTag end_tag = token->v.end_tag;
|
2885
3008
|
assert(state->_open_elements.length > 0);
|
2886
|
-
assert(
|
3009
|
+
assert(node_html_tag_is(state->_open_elements.data[0], GUMBO_TAG_HTML));
|
2887
3010
|
// Walk up the stack of open elements until we find one that either:
|
2888
3011
|
// a) Matches the tag name we saw
|
2889
3012
|
// b) Is in the "special" category.
|
2890
3013
|
// If we see a), implicitly close everything up to and including it. If we
|
2891
3014
|
// see b), then record a parse error, don't close anything (except the
|
2892
3015
|
// implied end tags) and ignore the end tag token.
|
2893
|
-
for (int i = state->_open_elements.length; --i >= 0;
|
3016
|
+
for (int i = state->_open_elements.length; --i >= 0;) {
|
2894
3017
|
const GumboNode* node = state->_open_elements.data[i];
|
2895
|
-
if (node
|
2896
|
-
node_tag_is(node, end_tag)) {
|
3018
|
+
if (node_html_tag_is(node, end_tag)) {
|
2897
3019
|
generate_implied_end_tags(parser, end_tag);
|
2898
3020
|
// TODO(jdtang): Do I need to add a parse error here? The condition in
|
2899
3021
|
// the spec seems like it's the inverse of the loop condition above, and
|
2900
3022
|
// so would never fire.
|
2901
|
-
while (node != pop_current_node(parser))
|
3023
|
+
while (node != pop_current_node(parser))
|
3024
|
+
; // Pop everything.
|
2902
3025
|
return true;
|
2903
3026
|
} else if (is_special_node(node)) {
|
2904
3027
|
parser_add_parse_error(parser, token);
|
@@ -2914,7 +3037,8 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2914
3037
|
|
2915
3038
|
// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#parsing-main-incdata
|
2916
3039
|
static bool handle_text(GumboParser* parser, GumboToken* token) {
|
2917
|
-
if (token->type == GUMBO_TOKEN_CHARACTER ||
|
3040
|
+
if (token->type == GUMBO_TOKEN_CHARACTER ||
|
3041
|
+
token->type == GUMBO_TOKEN_WHITESPACE) {
|
2918
3042
|
insert_text_token(parser, token);
|
2919
3043
|
} else {
|
2920
3044
|
// We provide only bare-bones script handling that doesn't involve any of
|
@@ -2974,13 +3098,12 @@ static bool handle_in_table(GumboParser* parser, GumboToken* token) {
|
|
2974
3098
|
parser->_parser_state->_reprocess_current_token = true;
|
2975
3099
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_COLUMN_GROUP);
|
2976
3100
|
return true;
|
2977
|
-
} else if (tag_in(token, kStartTag,
|
2978
|
-
|
2979
|
-
|
3101
|
+
} else if (tag_in(token, kStartTag,
|
3102
|
+
(gumbo_tagset){TAG(TBODY), TAG(TFOOT), TAG(THEAD), TAG(TD),
|
3103
|
+
TAG(TH), TAG(TR)})) {
|
2980
3104
|
clear_stack_to_table_context(parser);
|
2981
3105
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE_BODY);
|
2982
|
-
if (tag_in(token, kStartTag,
|
2983
|
-
GUMBO_TAG_LAST)) {
|
3106
|
+
if (tag_in(token, kStartTag, (gumbo_tagset){TAG(TD), TAG(TH), TAG(TR)})) {
|
2984
3107
|
insert_element_of_tag_type(
|
2985
3108
|
parser, GUMBO_TAG_TBODY, GUMBO_INSERTION_IMPLIED);
|
2986
3109
|
state->_reprocess_current_token = true;
|
@@ -3002,27 +3125,27 @@ static bool handle_in_table(GumboParser* parser, GumboToken* token) {
|
|
3002
3125
|
return false;
|
3003
3126
|
}
|
3004
3127
|
return true;
|
3005
|
-
} else if (tag_in(token, kEndTag,
|
3006
|
-
|
3007
|
-
|
3008
|
-
|
3009
|
-
GUMBO_TAG_LAST)) {
|
3128
|
+
} else if (tag_in(token, kEndTag,
|
3129
|
+
(gumbo_tagset){TAG(BODY), TAG(CAPTION), TAG(COL),
|
3130
|
+
TAG(COLGROUP), TAG(HTML), TAG(TBODY), TAG(TD), TAG(TFOOT),
|
3131
|
+
TAG(TH), TAG(THEAD), TAG(TR)})) {
|
3010
3132
|
parser_add_parse_error(parser, token);
|
3011
3133
|
ignore_token(parser);
|
3012
3134
|
return false;
|
3013
|
-
} else if (tag_in(token, kStartTag,
|
3014
|
-
|
3135
|
+
} else if (tag_in(token, kStartTag,
|
3136
|
+
(gumbo_tagset){TAG(STYLE), TAG(SCRIPT), TAG(TEMPLATE)}) ||
|
3137
|
+
(tag_is(token, kEndTag, GUMBO_TAG_TEMPLATE))) {
|
3015
3138
|
return handle_in_head(parser, token);
|
3016
3139
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_INPUT) &&
|
3017
|
-
attribute_matches(
|
3018
|
-
|
3140
|
+
attribute_matches(
|
3141
|
+
&token->v.start_tag.attributes, "type", "hidden")) {
|
3019
3142
|
parser_add_parse_error(parser, token);
|
3020
3143
|
insert_element_from_token(parser, token);
|
3021
3144
|
pop_current_node(parser);
|
3022
3145
|
return false;
|
3023
3146
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_FORM)) {
|
3024
3147
|
parser_add_parse_error(parser, token);
|
3025
|
-
if (state->_form_element) {
|
3148
|
+
if (state->_form_element || has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
|
3026
3149
|
ignore_token(parser);
|
3027
3150
|
return false;
|
3028
3151
|
}
|
@@ -3030,11 +3153,7 @@ static bool handle_in_table(GumboParser* parser, GumboToken* token) {
|
|
3030
3153
|
pop_current_node(parser);
|
3031
3154
|
return false;
|
3032
3155
|
} else if (token->type == GUMBO_TOKEN_EOF) {
|
3033
|
-
|
3034
|
-
parser_add_parse_error(parser, token);
|
3035
|
-
return false;
|
3036
|
-
}
|
3037
|
-
return true;
|
3156
|
+
return handle_in_body(parser, token);
|
3038
3157
|
} else {
|
3039
3158
|
parser_add_parse_error(parser, token);
|
3040
3159
|
state->_foster_parent_insertions = true;
|
@@ -3062,8 +3181,9 @@ static bool handle_in_table_text(GumboParser* parser, GumboToken* token) {
|
|
3062
3181
|
// Note that TextNodeBuffer may contain UTF-8 characters, but the presence
|
3063
3182
|
// of any one byte that is not whitespace means we flip the flag, so this
|
3064
3183
|
// loop is still valid.
|
3065
|
-
for (int i = 0; i < buffer->length; ++i) {
|
3066
|
-
if (!isspace(
|
3184
|
+
for (unsigned int i = 0; i < buffer->length; ++i) {
|
3185
|
+
if (!isspace((unsigned char) buffer->data[i]) ||
|
3186
|
+
buffer->data[i] == '\v') {
|
3067
3187
|
state->_foster_parent_insertions = true;
|
3068
3188
|
reconstruct_active_formatting_elements(parser);
|
3069
3189
|
break;
|
@@ -3079,38 +3199,43 @@ static bool handle_in_table_text(GumboParser* parser, GumboToken* token) {
|
|
3079
3199
|
|
3080
3200
|
// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#parsing-main-incaption
|
3081
3201
|
static bool handle_in_caption(GumboParser* parser, GumboToken* token) {
|
3082
|
-
if (
|
3083
|
-
GUMBO_TAG_COLGROUP, GUMBO_TAG_TBODY, GUMBO_TAG_TD,
|
3084
|
-
GUMBO_TAG_TFOOT, GUMBO_TAG_TH, GUMBO_TAG_THEAD,
|
3085
|
-
GUMBO_TAG_TR, GUMBO_TAG_LAST) ||
|
3086
|
-
tag_in(token, kEndTag, GUMBO_TAG_CAPTION, GUMBO_TAG_TABLE,
|
3087
|
-
GUMBO_TAG_LAST)) {
|
3202
|
+
if (tag_is(token, kEndTag, GUMBO_TAG_CAPTION)) {
|
3088
3203
|
if (!has_an_element_in_table_scope(parser, GUMBO_TAG_CAPTION)) {
|
3089
3204
|
parser_add_parse_error(parser, token);
|
3090
3205
|
ignore_token(parser);
|
3091
3206
|
return false;
|
3207
|
+
} else {
|
3208
|
+
generate_implied_end_tags(parser, GUMBO_TAG_LAST);
|
3209
|
+
bool result = true;
|
3210
|
+
if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_CAPTION)) {
|
3211
|
+
parser_add_parse_error(parser, token);
|
3212
|
+
}
|
3213
|
+
while (!node_html_tag_is(pop_current_node(parser), GUMBO_TAG_CAPTION))
|
3214
|
+
;
|
3215
|
+
clear_active_formatting_elements(parser);
|
3216
|
+
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
|
3217
|
+
return result;
|
3092
3218
|
}
|
3093
|
-
|
3094
|
-
|
3095
|
-
|
3096
|
-
|
3097
|
-
|
3098
|
-
|
3099
|
-
if (!node_tag_is(get_current_node(parser), GUMBO_TAG_CAPTION)) {
|
3219
|
+
} else if (tag_in(token, kStartTag,
|
3220
|
+
(gumbo_tagset){TAG(CAPTION), TAG(COL), TAG(COLGROUP),
|
3221
|
+
TAG(TBODY), TAG(TD), TAG(TFOOT), TAG(TH), TAG(THEAD),
|
3222
|
+
TAG(TR)}) ||
|
3223
|
+
(tag_is(token, kEndTag, GUMBO_TAG_TABLE))) {
|
3224
|
+
if (!has_an_element_in_table_scope(parser, GUMBO_TAG_CAPTION)) {
|
3100
3225
|
parser_add_parse_error(parser, token);
|
3101
|
-
|
3102
|
-
|
3103
|
-
}
|
3104
|
-
result = false;
|
3226
|
+
ignore_token(parser);
|
3227
|
+
return false;
|
3105
3228
|
}
|
3106
|
-
pop_current_node(parser)
|
3229
|
+
while (!node_html_tag_is(pop_current_node(parser), GUMBO_TAG_CAPTION))
|
3230
|
+
;
|
3107
3231
|
clear_active_formatting_elements(parser);
|
3108
3232
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
|
3109
|
-
|
3110
|
-
|
3111
|
-
|
3112
|
-
|
3113
|
-
|
3233
|
+
parser->_parser_state->_reprocess_current_token = true;
|
3234
|
+
return true;
|
3235
|
+
} else if (tag_in(token, kEndTag,
|
3236
|
+
(gumbo_tagset){TAG(BODY), TAG(COL), TAG(COLGROUP), TAG(HTML),
|
3237
|
+
TAG(TBODY), TAG(TD), TAG(TFOOT), TAG(TH), TAG(THEAD),
|
3238
|
+
TAG(TR)})) {
|
3114
3239
|
parser_add_parse_error(parser, token);
|
3115
3240
|
ignore_token(parser);
|
3116
3241
|
return false;
|
@@ -3138,24 +3263,33 @@ static bool handle_in_column_group(GumboParser* parser, GumboToken* token) {
|
|
3138
3263
|
pop_current_node(parser);
|
3139
3264
|
acknowledge_self_closing_tag(parser);
|
3140
3265
|
return true;
|
3266
|
+
} else if (tag_is(token, kEndTag, GUMBO_TAG_COLGROUP)) {
|
3267
|
+
if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_COLGROUP)) {
|
3268
|
+
parser_add_parse_error(parser, token);
|
3269
|
+
ignore_token(parser);
|
3270
|
+
return false;
|
3271
|
+
}
|
3272
|
+
pop_current_node(parser);
|
3273
|
+
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
|
3274
|
+
return false;
|
3141
3275
|
} else if (tag_is(token, kEndTag, GUMBO_TAG_COL)) {
|
3142
3276
|
parser_add_parse_error(parser, token);
|
3143
3277
|
ignore_token(parser);
|
3144
3278
|
return false;
|
3145
|
-
} else if (token
|
3146
|
-
|
3147
|
-
return
|
3279
|
+
} else if (tag_is(token, kStartTag, GUMBO_TAG_TEMPLATE) ||
|
3280
|
+
tag_is(token, kEndTag, GUMBO_TAG_TEMPLATE)) {
|
3281
|
+
return handle_in_head(parser, token);
|
3282
|
+
} else if (token->type == GUMBO_TOKEN_EOF) {
|
3283
|
+
return handle_in_body(parser, token);
|
3148
3284
|
} else {
|
3149
|
-
if (get_current_node(parser)
|
3285
|
+
if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_COLGROUP)) {
|
3150
3286
|
parser_add_parse_error(parser, token);
|
3287
|
+
ignore_token(parser);
|
3151
3288
|
return false;
|
3152
3289
|
}
|
3153
|
-
assert(node_tag_is(get_current_node(parser), GUMBO_TAG_COLGROUP));
|
3154
3290
|
pop_current_node(parser);
|
3155
3291
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
|
3156
|
-
|
3157
|
-
parser->_parser_state->_reprocess_current_token = true;
|
3158
|
-
}
|
3292
|
+
parser->_parser_state->_reprocess_current_token = true;
|
3159
3293
|
return true;
|
3160
3294
|
}
|
3161
3295
|
}
|
@@ -3167,16 +3301,15 @@ static bool handle_in_table_body(GumboParser* parser, GumboToken* token) {
|
|
3167
3301
|
insert_element_from_token(parser, token);
|
3168
3302
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_ROW);
|
3169
3303
|
return true;
|
3170
|
-
} else if (tag_in(token, kStartTag,
|
3171
|
-
GUMBO_TAG_LAST)) {
|
3304
|
+
} else if (tag_in(token, kStartTag, (gumbo_tagset){TAG(TD), TAG(TH)})) {
|
3172
3305
|
parser_add_parse_error(parser, token);
|
3173
3306
|
clear_stack_to_table_body_context(parser);
|
3174
3307
|
insert_element_of_tag_type(parser, GUMBO_TAG_TR, GUMBO_INSERTION_IMPLIED);
|
3175
3308
|
parser->_parser_state->_reprocess_current_token = true;
|
3176
3309
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_ROW);
|
3177
3310
|
return false;
|
3178
|
-
} else if (tag_in(token, kEndTag,
|
3179
|
-
|
3311
|
+
} else if (tag_in(token, kEndTag,
|
3312
|
+
(gumbo_tagset){TAG(TBODY), TAG(TFOOT), TAG(THEAD)})) {
|
3180
3313
|
if (!has_an_element_in_table_scope(parser, token->v.end_tag)) {
|
3181
3314
|
parser_add_parse_error(parser, token);
|
3182
3315
|
ignore_token(parser);
|
@@ -3186,13 +3319,13 @@ static bool handle_in_table_body(GumboParser* parser, GumboToken* token) {
|
|
3186
3319
|
pop_current_node(parser);
|
3187
3320
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
|
3188
3321
|
return true;
|
3189
|
-
} else if (tag_in(token, kStartTag,
|
3190
|
-
|
3191
|
-
|
3322
|
+
} else if (tag_in(token, kStartTag,
|
3323
|
+
(gumbo_tagset){TAG(CAPTION), TAG(COL), TAG(COLGROUP),
|
3324
|
+
TAG(TBODY), TAG(TFOOT), TAG(THEAD)}) ||
|
3192
3325
|
tag_is(token, kEndTag, GUMBO_TAG_TABLE)) {
|
3193
3326
|
if (!(has_an_element_in_table_scope(parser, GUMBO_TAG_TBODY) ||
|
3194
|
-
|
3195
|
-
|
3327
|
+
has_an_element_in_table_scope(parser, GUMBO_TAG_THEAD) ||
|
3328
|
+
has_an_element_in_table_scope(parser, GUMBO_TAG_TFOOT))) {
|
3196
3329
|
parser_add_parse_error(parser, token);
|
3197
3330
|
ignore_token(parser);
|
3198
3331
|
return false;
|
@@ -3202,10 +3335,9 @@ static bool handle_in_table_body(GumboParser* parser, GumboToken* token) {
|
|
3202
3335
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
|
3203
3336
|
parser->_parser_state->_reprocess_current_token = true;
|
3204
3337
|
return true;
|
3205
|
-
} else if (tag_in(token, kEndTag,
|
3206
|
-
|
3207
|
-
|
3208
|
-
{
|
3338
|
+
} else if (tag_in(token, kEndTag,
|
3339
|
+
(gumbo_tagset){TAG(BODY), TAG(CAPTION), TAG(COL), TAG(TR),
|
3340
|
+
TAG(COLGROUP), TAG(HTML), TAG(TD), TAG(TH)})) {
|
3209
3341
|
parser_add_parse_error(parser, token);
|
3210
3342
|
ignore_token(parser);
|
3211
3343
|
return false;
|
@@ -3216,48 +3348,55 @@ static bool handle_in_table_body(GumboParser* parser, GumboToken* token) {
|
|
3216
3348
|
|
3217
3349
|
// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#parsing-main-intr
|
3218
3350
|
static bool handle_in_row(GumboParser* parser, GumboToken* token) {
|
3219
|
-
if (tag_in(token, kStartTag,
|
3351
|
+
if (tag_in(token, kStartTag, (gumbo_tagset){TAG(TH), TAG(TD)})) {
|
3220
3352
|
clear_stack_to_table_row_context(parser);
|
3221
3353
|
insert_element_from_token(parser, token);
|
3222
3354
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_CELL);
|
3223
3355
|
add_formatting_element(parser, &kActiveFormattingScopeMarker);
|
3224
3356
|
return true;
|
3225
|
-
} else if (
|
3226
|
-
|
3227
|
-
|
3228
|
-
|
3229
|
-
|
3230
|
-
|
3231
|
-
|
3232
|
-
|
3233
|
-
|
3234
|
-
|
3235
|
-
|
3236
|
-
|
3237
|
-
|
3238
|
-
|
3239
|
-
|
3240
|
-
|
3241
|
-
for (int i = 0; i < parser->_parser_state->_open_elements.length; ++i) {
|
3242
|
-
const GumboNode* node = parser->_parser_state->_open_elements.data[i];
|
3243
|
-
gumbo_debug("%s\n", gumbo_normalized_tagname(node->v.element.tag));
|
3244
|
-
}
|
3357
|
+
} else if (tag_is(token, kEndTag, GUMBO_TAG_TR)) {
|
3358
|
+
if (!has_an_element_in_table_scope(parser, GUMBO_TAG_TR)) {
|
3359
|
+
parser_add_parse_error(parser, token);
|
3360
|
+
ignore_token(parser);
|
3361
|
+
return false;
|
3362
|
+
} else {
|
3363
|
+
clear_stack_to_table_row_context(parser);
|
3364
|
+
pop_current_node(parser);
|
3365
|
+
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE_BODY);
|
3366
|
+
return true;
|
3367
|
+
}
|
3368
|
+
} else if (tag_in(token, kStartTag,
|
3369
|
+
(gumbo_tagset){TAG(CAPTION), TAG(COL), TAG(COLGROUP),
|
3370
|
+
TAG(TBODY), TAG(TFOOT), TAG(THEAD), TAG(TR)}) ||
|
3371
|
+
tag_is(token, kEndTag, GUMBO_TAG_TABLE)) {
|
3372
|
+
if (!has_an_element_in_table_scope(parser, GUMBO_TAG_TR)) {
|
3245
3373
|
parser_add_parse_error(parser, token);
|
3246
3374
|
ignore_token(parser);
|
3247
3375
|
return false;
|
3376
|
+
} else {
|
3377
|
+
clear_stack_to_table_row_context(parser);
|
3378
|
+
pop_current_node(parser);
|
3379
|
+
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE_BODY);
|
3380
|
+
parser->_parser_state->_reprocess_current_token = true;
|
3381
|
+
return true;
|
3248
3382
|
}
|
3249
|
-
|
3250
|
-
|
3251
|
-
|
3252
|
-
|
3253
|
-
|
3254
|
-
|
3383
|
+
} else if (tag_in(token, kEndTag,
|
3384
|
+
(gumbo_tagset){TAG(TBODY), TAG(TFOOT), TAG(THEAD)})) {
|
3385
|
+
if (!has_an_element_in_table_scope(parser, token->v.end_tag) ||
|
3386
|
+
(!has_an_element_in_table_scope(parser, GUMBO_TAG_TR))) {
|
3387
|
+
parser_add_parse_error(parser, token);
|
3388
|
+
ignore_token(parser);
|
3389
|
+
return false;
|
3390
|
+
} else {
|
3391
|
+
clear_stack_to_table_row_context(parser);
|
3392
|
+
pop_current_node(parser);
|
3393
|
+
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE_BODY);
|
3255
3394
|
parser->_parser_state->_reprocess_current_token = true;
|
3395
|
+
return true;
|
3256
3396
|
}
|
3257
|
-
|
3258
|
-
|
3259
|
-
|
3260
|
-
GUMBO_TAG_TD, GUMBO_TAG_TH, GUMBO_TAG_LAST)) {
|
3397
|
+
} else if (tag_in(token, kEndTag,
|
3398
|
+
(gumbo_tagset){TAG(BODY), TAG(CAPTION), TAG(COL),
|
3399
|
+
TAG(COLGROUP), TAG(HTML), TAG(TD), TAG(TH)})) {
|
3261
3400
|
parser_add_parse_error(parser, token);
|
3262
3401
|
ignore_token(parser);
|
3263
3402
|
return false;
|
@@ -3268,17 +3407,18 @@ static bool handle_in_row(GumboParser* parser, GumboToken* token) {
|
|
3268
3407
|
|
3269
3408
|
// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#parsing-main-intd
|
3270
3409
|
static bool handle_in_cell(GumboParser* parser, GumboToken* token) {
|
3271
|
-
if (tag_in(token, kEndTag,
|
3410
|
+
if (tag_in(token, kEndTag, (gumbo_tagset){TAG(TD), TAG(TH)})) {
|
3272
3411
|
GumboTag token_tag = token->v.end_tag;
|
3273
3412
|
if (!has_an_element_in_table_scope(parser, token_tag)) {
|
3274
3413
|
parser_add_parse_error(parser, token);
|
3414
|
+
ignore_token(parser);
|
3275
3415
|
return false;
|
3276
3416
|
}
|
3277
3417
|
return close_table_cell(parser, token, token_tag);
|
3278
|
-
} else if (tag_in(token, kStartTag,
|
3279
|
-
|
3280
|
-
|
3281
|
-
|
3418
|
+
} else if (tag_in(token, kStartTag,
|
3419
|
+
(gumbo_tagset){TAG(CAPTION), TAG(COL), TAG(COLGROUP),
|
3420
|
+
TAG(TBODY), TAG(TD), TAG(TFOOT), TAG(TH), TAG(THEAD),
|
3421
|
+
TAG(TR)})) {
|
3282
3422
|
gumbo_debug("Handling <td> in cell.\n");
|
3283
3423
|
if (!has_an_element_in_table_scope(parser, GUMBO_TAG_TH) &&
|
3284
3424
|
!has_an_element_in_table_scope(parser, GUMBO_TAG_TD)) {
|
@@ -3289,15 +3429,13 @@ static bool handle_in_cell(GumboParser* parser, GumboToken* token) {
|
|
3289
3429
|
}
|
3290
3430
|
parser->_parser_state->_reprocess_current_token = true;
|
3291
3431
|
return close_current_cell(parser, token);
|
3292
|
-
} else if (tag_in(token, kEndTag,
|
3293
|
-
|
3294
|
-
GUMBO_TAG_LAST)) {
|
3432
|
+
} else if (tag_in(token, kEndTag, (gumbo_tagset){TAG(BODY), TAG(CAPTION),
|
3433
|
+
TAG(COL), TAG(COLGROUP), TAG(HTML)})) {
|
3295
3434
|
parser_add_parse_error(parser, token);
|
3296
3435
|
ignore_token(parser);
|
3297
3436
|
return false;
|
3298
|
-
} else if (tag_in(token, kEndTag,
|
3299
|
-
|
3300
|
-
GUMBO_TAG_LAST)) {
|
3437
|
+
} else if (tag_in(token, kEndTag, (gumbo_tagset){TAG(TABLE), TAG(TBODY),
|
3438
|
+
TAG(TFOOT), TAG(THEAD), TAG(TR)})) {
|
3301
3439
|
if (!has_an_element_in_table_scope(parser, token->v.end_tag)) {
|
3302
3440
|
parser_add_parse_error(parser, token);
|
3303
3441
|
ignore_token(parser);
|
@@ -3330,28 +3468,28 @@ static bool handle_in_select(GumboParser* parser, GumboToken* token) {
|
|
3330
3468
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_HTML)) {
|
3331
3469
|
return handle_in_body(parser, token);
|
3332
3470
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_OPTION)) {
|
3333
|
-
if (
|
3471
|
+
if (node_html_tag_is(get_current_node(parser), GUMBO_TAG_OPTION)) {
|
3334
3472
|
pop_current_node(parser);
|
3335
3473
|
}
|
3336
3474
|
insert_element_from_token(parser, token);
|
3337
3475
|
return true;
|
3338
3476
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_OPTGROUP)) {
|
3339
|
-
if (
|
3477
|
+
if (node_html_tag_is(get_current_node(parser), GUMBO_TAG_OPTION)) {
|
3340
3478
|
pop_current_node(parser);
|
3341
3479
|
}
|
3342
|
-
if (
|
3480
|
+
if (node_html_tag_is(get_current_node(parser), GUMBO_TAG_OPTGROUP)) {
|
3343
3481
|
pop_current_node(parser);
|
3344
3482
|
}
|
3345
3483
|
insert_element_from_token(parser, token);
|
3346
3484
|
return true;
|
3347
3485
|
} else if (tag_is(token, kEndTag, GUMBO_TAG_OPTGROUP)) {
|
3348
3486
|
GumboVector* open_elements = &parser->_parser_state->_open_elements;
|
3349
|
-
if (
|
3350
|
-
|
3351
|
-
|
3487
|
+
if (node_html_tag_is(get_current_node(parser), GUMBO_TAG_OPTION) &&
|
3488
|
+
node_html_tag_is(open_elements->data[open_elements->length - 2],
|
3489
|
+
GUMBO_TAG_OPTGROUP)) {
|
3352
3490
|
pop_current_node(parser);
|
3353
3491
|
}
|
3354
|
-
if (
|
3492
|
+
if (node_html_tag_is(get_current_node(parser), GUMBO_TAG_OPTGROUP)) {
|
3355
3493
|
pop_current_node(parser);
|
3356
3494
|
return true;
|
3357
3495
|
} else {
|
@@ -3360,7 +3498,7 @@ static bool handle_in_select(GumboParser* parser, GumboToken* token) {
|
|
3360
3498
|
return false;
|
3361
3499
|
}
|
3362
3500
|
} else if (tag_is(token, kEndTag, GUMBO_TAG_OPTION)) {
|
3363
|
-
if (
|
3501
|
+
if (node_html_tag_is(get_current_node(parser), GUMBO_TAG_OPTION)) {
|
3364
3502
|
pop_current_node(parser);
|
3365
3503
|
return true;
|
3366
3504
|
} else {
|
@@ -3379,10 +3517,12 @@ static bool handle_in_select(GumboParser* parser, GumboToken* token) {
|
|
3379
3517
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_SELECT)) {
|
3380
3518
|
parser_add_parse_error(parser, token);
|
3381
3519
|
ignore_token(parser);
|
3382
|
-
|
3520
|
+
if (has_an_element_in_select_scope(parser, GUMBO_TAG_SELECT)) {
|
3521
|
+
close_current_select(parser);
|
3522
|
+
}
|
3383
3523
|
return false;
|
3384
|
-
} else if (tag_in(token, kStartTag,
|
3385
|
-
|
3524
|
+
} else if (tag_in(token, kStartTag,
|
3525
|
+
(gumbo_tagset){TAG(INPUT), TAG(KEYGEN), TAG(TEXTAREA)})) {
|
3386
3526
|
parser_add_parse_error(parser, token);
|
3387
3527
|
if (!has_an_element_in_select_scope(parser, GUMBO_TAG_SELECT)) {
|
3388
3528
|
ignore_token(parser);
|
@@ -3391,14 +3531,12 @@ static bool handle_in_select(GumboParser* parser, GumboToken* token) {
|
|
3391
3531
|
parser->_parser_state->_reprocess_current_token = true;
|
3392
3532
|
}
|
3393
3533
|
return false;
|
3394
|
-
} else if (
|
3534
|
+
} else if (tag_in(token, kStartTag,
|
3535
|
+
(gumbo_tagset){TAG(SCRIPT), TAG(TEMPLATE)}) ||
|
3536
|
+
tag_is(token, kEndTag, GUMBO_TAG_TEMPLATE)) {
|
3395
3537
|
return handle_in_head(parser, token);
|
3396
3538
|
} else if (token->type == GUMBO_TOKEN_EOF) {
|
3397
|
-
|
3398
|
-
parser_add_parse_error(parser, token);
|
3399
|
-
return false;
|
3400
|
-
}
|
3401
|
-
return true;
|
3539
|
+
return handle_in_body(parser, token);
|
3402
3540
|
} else {
|
3403
3541
|
parser_add_parse_error(parser, token);
|
3404
3542
|
ignore_token(parser);
|
@@ -3408,25 +3546,28 @@ static bool handle_in_select(GumboParser* parser, GumboToken* token) {
|
|
3408
3546
|
|
3409
3547
|
// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#parsing-main-inselectintable
|
3410
3548
|
static bool handle_in_select_in_table(GumboParser* parser, GumboToken* token) {
|
3411
|
-
if (tag_in(token, kStartTag,
|
3412
|
-
|
3413
|
-
|
3549
|
+
if (tag_in(token, kStartTag,
|
3550
|
+
(gumbo_tagset){TAG(CAPTION), TAG(TABLE), TAG(TBODY), TAG(TFOOT),
|
3551
|
+
TAG(THEAD), TAG(TR), TAG(TD), TAG(TH)})) {
|
3414
3552
|
parser_add_parse_error(parser, token);
|
3415
3553
|
close_current_select(parser);
|
3416
3554
|
parser->_parser_state->_reprocess_current_token = true;
|
3417
3555
|
return false;
|
3418
|
-
} else if (tag_in(token, kEndTag,
|
3419
|
-
|
3420
|
-
|
3556
|
+
} else if (tag_in(token, kEndTag,
|
3557
|
+
(gumbo_tagset){TAG(CAPTION), TAG(TABLE), TAG(TBODY),
|
3558
|
+
TAG(TFOOT), TAG(THEAD), TAG(TR), TAG(TD), TAG(TH)})) {
|
3421
3559
|
parser_add_parse_error(parser, token);
|
3422
|
-
if (has_an_element_in_table_scope(parser, token->v.end_tag)) {
|
3560
|
+
if (!has_an_element_in_table_scope(parser, token->v.end_tag)) {
|
3561
|
+
ignore_token(parser);
|
3562
|
+
return false;
|
3563
|
+
} else {
|
3423
3564
|
close_current_select(parser);
|
3424
|
-
|
3565
|
+
// close_current_select already does the
|
3566
|
+
// reset_insertion_mode_appropriately
|
3567
|
+
// reset_insertion_mode_appropriately(parser);
|
3425
3568
|
parser->_parser_state->_reprocess_current_token = true;
|
3426
|
-
|
3427
|
-
ignore_token(parser);
|
3569
|
+
return false;
|
3428
3570
|
}
|
3429
|
-
return false;
|
3430
3571
|
} else {
|
3431
3572
|
return handle_in_select(parser, token);
|
3432
3573
|
}
|
@@ -3434,8 +3575,71 @@ static bool handle_in_select_in_table(GumboParser* parser, GumboToken* token) {
|
|
3434
3575
|
|
3435
3576
|
// http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#parsing-main-intemplate
|
3436
3577
|
static bool handle_in_template(GumboParser* parser, GumboToken* token) {
|
3437
|
-
|
3438
|
-
|
3578
|
+
GumboParserState* state = parser->_parser_state;
|
3579
|
+
if (token->type == GUMBO_TOKEN_WHITESPACE ||
|
3580
|
+
token->type == GUMBO_TOKEN_CHARACTER ||
|
3581
|
+
token->type == GUMBO_TOKEN_COMMENT || token->type == GUMBO_TOKEN_NULL ||
|
3582
|
+
token->type == GUMBO_TOKEN_DOCTYPE) {
|
3583
|
+
return handle_in_body(parser, token);
|
3584
|
+
} else if (tag_in(token, kStartTag,
|
3585
|
+
(gumbo_tagset){TAG(BASE), TAG(BASEFONT), TAG(BGSOUND),
|
3586
|
+
TAG(LINK), TAG(META), TAG(NOFRAMES), TAG(SCRIPT),
|
3587
|
+
TAG(STYLE), TAG(TEMPLATE), TAG(TITLE)}) ||
|
3588
|
+
tag_is(token, kEndTag, GUMBO_TAG_TEMPLATE)) {
|
3589
|
+
return handle_in_head(parser, token);
|
3590
|
+
} else if (tag_in(
|
3591
|
+
token, kStartTag, (gumbo_tagset){TAG(CAPTION), TAG(COLGROUP),
|
3592
|
+
TAG(TBODY), TAG(TFOOT), TAG(THEAD)})) {
|
3593
|
+
pop_template_insertion_mode(parser);
|
3594
|
+
push_template_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
|
3595
|
+
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
|
3596
|
+
state->_reprocess_current_token = true;
|
3597
|
+
return true;
|
3598
|
+
} else if (tag_is(token, kStartTag, GUMBO_TAG_COL)) {
|
3599
|
+
pop_template_insertion_mode(parser);
|
3600
|
+
push_template_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_COLUMN_GROUP);
|
3601
|
+
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_COLUMN_GROUP);
|
3602
|
+
state->_reprocess_current_token = true;
|
3603
|
+
return true;
|
3604
|
+
} else if (tag_is(token, kStartTag, GUMBO_TAG_TR)) {
|
3605
|
+
pop_template_insertion_mode(parser);
|
3606
|
+
push_template_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE_BODY);
|
3607
|
+
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE_BODY);
|
3608
|
+
state->_reprocess_current_token = true;
|
3609
|
+
return true;
|
3610
|
+
} else if (tag_in(token, kStartTag, (gumbo_tagset){TAG(TD), TAG(TH)})) {
|
3611
|
+
pop_template_insertion_mode(parser);
|
3612
|
+
push_template_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_ROW);
|
3613
|
+
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_ROW);
|
3614
|
+
state->_reprocess_current_token = true;
|
3615
|
+
return true;
|
3616
|
+
} else if (token->type == GUMBO_TOKEN_START_TAG) {
|
3617
|
+
pop_template_insertion_mode(parser);
|
3618
|
+
push_template_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_BODY);
|
3619
|
+
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_BODY);
|
3620
|
+
state->_reprocess_current_token = true;
|
3621
|
+
return true;
|
3622
|
+
} else if (token->type == GUMBO_TOKEN_END_TAG) {
|
3623
|
+
parser_add_parse_error(parser, token);
|
3624
|
+
ignore_token(parser);
|
3625
|
+
return false;
|
3626
|
+
} else if (token->type == GUMBO_TOKEN_EOF) {
|
3627
|
+
if (!has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
|
3628
|
+
// Stop parsing.
|
3629
|
+
return true;
|
3630
|
+
}
|
3631
|
+
parser_add_parse_error(parser, token);
|
3632
|
+
while (!node_html_tag_is(pop_current_node(parser), GUMBO_TAG_TEMPLATE))
|
3633
|
+
;
|
3634
|
+
clear_active_formatting_elements(parser);
|
3635
|
+
pop_template_insertion_mode(parser);
|
3636
|
+
reset_insertion_mode_appropriately(parser);
|
3637
|
+
state->_reprocess_current_token = true;
|
3638
|
+
return false;
|
3639
|
+
} else {
|
3640
|
+
assert(0);
|
3641
|
+
return false;
|
3642
|
+
}
|
3439
3643
|
}
|
3440
3644
|
|
3441
3645
|
// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#parsing-main-afterbody
|
@@ -3453,10 +3657,15 @@ static bool handle_after_body(GumboParser* parser, GumboToken* token) {
|
|
3453
3657
|
ignore_token(parser);
|
3454
3658
|
return false;
|
3455
3659
|
} else if (tag_is(token, kEndTag, GUMBO_TAG_HTML)) {
|
3456
|
-
|
3660
|
+
/* fragment case: ignore the closing HTML token */
|
3661
|
+
if (is_fragment_parser(parser)) {
|
3662
|
+
parser_add_parse_error(parser, token);
|
3663
|
+
ignore_token(parser);
|
3664
|
+
return false;
|
3665
|
+
}
|
3457
3666
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_AFTER_AFTER_BODY);
|
3458
3667
|
GumboNode* html = parser->_parser_state->_open_elements.data[0];
|
3459
|
-
assert(
|
3668
|
+
assert(node_html_tag_is(html, GUMBO_TAG_HTML));
|
3460
3669
|
record_end_of_element(
|
3461
3670
|
parser->_parser_state->_current_token, &html->v.element);
|
3462
3671
|
return true;
|
@@ -3488,15 +3697,14 @@ static bool handle_in_frameset(GumboParser* parser, GumboToken* token) {
|
|
3488
3697
|
insert_element_from_token(parser, token);
|
3489
3698
|
return true;
|
3490
3699
|
} else if (tag_is(token, kEndTag, GUMBO_TAG_FRAMESET)) {
|
3491
|
-
if (
|
3700
|
+
if (node_html_tag_is(get_current_node(parser), GUMBO_TAG_HTML)) {
|
3492
3701
|
parser_add_parse_error(parser, token);
|
3493
3702
|
ignore_token(parser);
|
3494
3703
|
return false;
|
3495
3704
|
}
|
3496
3705
|
pop_current_node(parser);
|
3497
|
-
|
3498
|
-
|
3499
|
-
if (!node_tag_is(get_current_node(parser), GUMBO_TAG_FRAMESET)) {
|
3706
|
+
if (!is_fragment_parser(parser) &&
|
3707
|
+
!node_html_tag_is(get_current_node(parser), GUMBO_TAG_FRAMESET)) {
|
3500
3708
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_AFTER_FRAMESET);
|
3501
3709
|
}
|
3502
3710
|
return true;
|
@@ -3508,7 +3716,7 @@ static bool handle_in_frameset(GumboParser* parser, GumboToken* token) {
|
|
3508
3716
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_NOFRAMES)) {
|
3509
3717
|
return handle_in_head(parser, token);
|
3510
3718
|
} else if (token->type == GUMBO_TOKEN_EOF) {
|
3511
|
-
if (!
|
3719
|
+
if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_HTML)) {
|
3512
3720
|
parser_add_parse_error(parser, token);
|
3513
3721
|
return false;
|
3514
3722
|
}
|
@@ -3536,7 +3744,7 @@ static bool handle_after_frameset(GumboParser* parser, GumboToken* token) {
|
|
3536
3744
|
return handle_in_body(parser, token);
|
3537
3745
|
} else if (tag_is(token, kEndTag, GUMBO_TAG_HTML)) {
|
3538
3746
|
GumboNode* html = parser->_parser_state->_open_elements.data[0];
|
3539
|
-
assert(
|
3747
|
+
assert(node_html_tag_is(html, GUMBO_TAG_HTML));
|
3540
3748
|
record_end_of_element(
|
3541
3749
|
parser->_parser_state->_current_token, &html->v.element);
|
3542
3750
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_AFTER_AFTER_FRAMESET);
|
@@ -3595,31 +3803,14 @@ static bool handle_after_after_frameset(
|
|
3595
3803
|
// Function pointers for each insertion mode. Keep in sync with
|
3596
3804
|
// insertion_mode.h.
|
3597
3805
|
typedef bool (*TokenHandler)(GumboParser* parser, GumboToken* token);
|
3598
|
-
static const TokenHandler kTokenHandlers[] = {
|
3599
|
-
|
3600
|
-
|
3601
|
-
|
3602
|
-
|
3603
|
-
|
3604
|
-
|
3605
|
-
|
3606
|
-
handle_text,
|
3607
|
-
handle_in_table,
|
3608
|
-
handle_in_table_text,
|
3609
|
-
handle_in_caption,
|
3610
|
-
handle_in_column_group,
|
3611
|
-
handle_in_table_body,
|
3612
|
-
handle_in_row,
|
3613
|
-
handle_in_cell,
|
3614
|
-
handle_in_select,
|
3615
|
-
handle_in_select_in_table,
|
3616
|
-
handle_in_template,
|
3617
|
-
handle_after_body,
|
3618
|
-
handle_in_frameset,
|
3619
|
-
handle_after_frameset,
|
3620
|
-
handle_after_after_body,
|
3621
|
-
handle_after_after_frameset
|
3622
|
-
};
|
3806
|
+
static const TokenHandler kTokenHandlers[] = {handle_initial,
|
3807
|
+
handle_before_html, handle_before_head, handle_in_head,
|
3808
|
+
handle_in_head_noscript, handle_after_head, handle_in_body, handle_text,
|
3809
|
+
handle_in_table, handle_in_table_text, handle_in_caption,
|
3810
|
+
handle_in_column_group, handle_in_table_body, handle_in_row, handle_in_cell,
|
3811
|
+
handle_in_select, handle_in_select_in_table, handle_in_template,
|
3812
|
+
handle_after_body, handle_in_frameset, handle_after_frameset,
|
3813
|
+
handle_after_after_body, handle_after_after_frameset};
|
3623
3814
|
|
3624
3815
|
static bool handle_html_content(GumboParser* parser, GumboToken* token) {
|
3625
3816
|
return kTokenHandlers[(unsigned int) parser->_parser_state->_insertion_mode](
|
@@ -3628,16 +3819,17 @@ static bool handle_html_content(GumboParser* parser, GumboToken* token) {
|
|
3628
3819
|
|
3629
3820
|
// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#parsing-main-inforeign
|
3630
3821
|
static bool handle_in_foreign_content(GumboParser* parser, GumboToken* token) {
|
3822
|
+
gumbo_debug("Handling foreign content");
|
3631
3823
|
switch (token->type) {
|
3632
3824
|
case GUMBO_TOKEN_NULL:
|
3633
3825
|
parser_add_parse_error(parser, token);
|
3634
|
-
token->type = GUMBO_TOKEN_CHARACTER;
|
3635
3826
|
token->v.character = kUtf8ReplacementChar;
|
3636
3827
|
insert_text_token(parser, token);
|
3637
3828
|
return false;
|
3638
3829
|
case GUMBO_TOKEN_WHITESPACE:
|
3639
3830
|
insert_text_token(parser, token);
|
3640
3831
|
return true;
|
3832
|
+
case GUMBO_TOKEN_CDATA:
|
3641
3833
|
case GUMBO_TOKEN_CHARACTER:
|
3642
3834
|
insert_text_token(parser, token);
|
3643
3835
|
set_frameset_not_ok(parser);
|
@@ -3654,35 +3846,44 @@ static bool handle_in_foreign_content(GumboParser* parser, GumboToken* token) {
|
|
3654
3846
|
break;
|
3655
3847
|
}
|
3656
3848
|
// Order matters for these clauses.
|
3657
|
-
if (tag_in(token, kStartTag,
|
3658
|
-
|
3659
|
-
|
3660
|
-
|
3661
|
-
|
3662
|
-
|
3663
|
-
|
3664
|
-
|
3665
|
-
|
3666
|
-
|
3667
|
-
|
3668
|
-
|
3669
|
-
|
3670
|
-
|
3671
|
-
token_has_attribute(token, "color") ||
|
3672
|
-
token_has_attribute(token, "face") ||
|
3673
|
-
token_has_attribute(token, "size")))) {
|
3849
|
+
if (tag_in(token, kStartTag,
|
3850
|
+
(gumbo_tagset){TAG(B), TAG(BIG), TAG(BLOCKQUOTE), TAG(BODY), TAG(BR),
|
3851
|
+
TAG(CENTER), TAG(CODE), TAG(DD), TAG(DIV), TAG(DL), TAG(DT),
|
3852
|
+
TAG(EM), TAG(EMBED), TAG(H1), TAG(H2), TAG(H3), TAG(H4), TAG(H5),
|
3853
|
+
TAG(H6), TAG(HEAD), TAG(HR), TAG(I), TAG(IMG), TAG(LI),
|
3854
|
+
TAG(LISTING), TAG(MENU), TAG(META), TAG(NOBR), TAG(OL), TAG(P),
|
3855
|
+
TAG(PRE), TAG(RUBY), TAG(S), TAG(SMALL), TAG(SPAN), TAG(STRONG),
|
3856
|
+
TAG(STRIKE), TAG(SUB), TAG(SUP), TAG(TABLE), TAG(TT), TAG(U),
|
3857
|
+
TAG(UL), TAG(VAR)}) ||
|
3858
|
+
(tag_is(token, kStartTag, GUMBO_TAG_FONT) &&
|
3859
|
+
(token_has_attribute(token, "color") ||
|
3860
|
+
token_has_attribute(token, "face") ||
|
3861
|
+
token_has_attribute(token, "size")))) {
|
3862
|
+
/* Parse error */
|
3674
3863
|
parser_add_parse_error(parser, token);
|
3675
|
-
|
3676
|
-
|
3677
|
-
|
3678
|
-
|
3679
|
-
|
3680
|
-
|
3681
|
-
parser
|
3682
|
-
|
3683
|
-
|
3864
|
+
|
3865
|
+
/*
|
3866
|
+
* Fragment case: If the parser was originally created for the HTML
|
3867
|
+
* fragment parsing algorithm, then act as described in the "any other
|
3868
|
+
* start tag" entry below.
|
3869
|
+
*/
|
3870
|
+
if (!is_fragment_parser(parser)) {
|
3871
|
+
do {
|
3872
|
+
pop_current_node(parser);
|
3873
|
+
} while (!(is_mathml_integration_point(get_current_node(parser)) ||
|
3874
|
+
is_html_integration_point(get_current_node(parser)) ||
|
3875
|
+
get_current_node(parser)->v.element.tag_namespace ==
|
3876
|
+
GUMBO_NAMESPACE_HTML));
|
3877
|
+
parser->_parser_state->_reprocess_current_token = true;
|
3878
|
+
return false;
|
3879
|
+
}
|
3880
|
+
|
3881
|
+
assert(token->type == GUMBO_TOKEN_START_TAG);
|
3882
|
+
}
|
3883
|
+
|
3884
|
+
if (token->type == GUMBO_TOKEN_START_TAG) {
|
3684
3885
|
const GumboNamespaceEnum current_namespace =
|
3685
|
-
|
3886
|
+
get_adjusted_current_node(parser)->v.element.tag_namespace;
|
3686
3887
|
if (current_namespace == GUMBO_NAMESPACE_MATHML) {
|
3687
3888
|
adjust_mathml_attributes(parser, token);
|
3688
3889
|
}
|
@@ -3698,8 +3899,8 @@ static bool handle_in_foreign_content(GumboParser* parser, GumboToken* token) {
|
|
3698
3899
|
acknowledge_self_closing_tag(parser);
|
3699
3900
|
}
|
3700
3901
|
return true;
|
3701
|
-
|
3702
|
-
|
3902
|
+
// </script> tags are handled like any other end tag, putting the script's
|
3903
|
+
// text into a text node child and closing the current node.
|
3703
3904
|
} else {
|
3704
3905
|
assert(token->type == GUMBO_TOKEN_END_TAG);
|
3705
3906
|
GumboNode* node = get_current_node(parser);
|
@@ -3715,13 +3916,13 @@ static bool handle_in_foreign_content(GumboParser* parser, GumboToken* token) {
|
|
3715
3916
|
is_success = false;
|
3716
3917
|
}
|
3717
3918
|
int i = parser->_parser_state->_open_elements.length;
|
3718
|
-
for(
|
3919
|
+
for (--i; i > 0;) {
|
3719
3920
|
// Here we move up the stack until we find an HTML element (in which
|
3720
3921
|
// case we do nothing) or we find the element that we're about to
|
3721
3922
|
// close (in which case we pop everything we've seen until that
|
3722
3923
|
// point.)
|
3723
3924
|
gumbo_debug("Foreign %.*s node at %d.\n", node_tagname.length,
|
3724
|
-
|
3925
|
+
node_tagname.data, i);
|
3725
3926
|
if (gumbo_string_equals_ignore_case(&node_tagname, &token_tagname)) {
|
3726
3927
|
gumbo_debug("Matches.\n");
|
3727
3928
|
while (pop_current_node(parser) != node) {
|
@@ -3749,7 +3950,6 @@ static bool handle_in_foreign_content(GumboParser* parser, GumboToken* token) {
|
|
3749
3950
|
}
|
3750
3951
|
}
|
3751
3952
|
|
3752
|
-
|
3753
3953
|
// http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#tree-construction
|
3754
3954
|
static bool handle_token(GumboParser* parser, GumboToken* token) {
|
3755
3955
|
if (parser->_parser_state->_ignore_next_linefeed &&
|
@@ -3771,29 +3971,31 @@ static bool handle_token(GumboParser* parser, GumboToken* token) {
|
|
3771
3971
|
parser->_parser_state->_closed_html_tag = true;
|
3772
3972
|
}
|
3773
3973
|
|
3774
|
-
const GumboNode* current_node =
|
3775
|
-
assert(!current_node || current_node->type == GUMBO_NODE_ELEMENT
|
3974
|
+
const GumboNode* current_node = get_adjusted_current_node(parser);
|
3975
|
+
assert(!current_node || current_node->type == GUMBO_NODE_ELEMENT ||
|
3976
|
+
current_node->type == GUMBO_NODE_TEMPLATE);
|
3776
3977
|
if (current_node) {
|
3777
3978
|
gumbo_debug("Current node: <%s>.\n",
|
3778
|
-
|
3979
|
+
gumbo_normalized_tagname(current_node->v.element.tag));
|
3779
3980
|
}
|
3780
3981
|
if (!current_node ||
|
3781
3982
|
current_node->v.element.tag_namespace == GUMBO_NAMESPACE_HTML ||
|
3782
3983
|
(is_mathml_integration_point(current_node) &&
|
3783
|
-
|
3784
|
-
|
3785
|
-
|
3786
|
-
|
3787
|
-
|
3788
|
-
|
3984
|
+
(token->type == GUMBO_TOKEN_CHARACTER ||
|
3985
|
+
token->type == GUMBO_TOKEN_WHITESPACE ||
|
3986
|
+
token->type == GUMBO_TOKEN_NULL ||
|
3987
|
+
(token->type == GUMBO_TOKEN_START_TAG &&
|
3988
|
+
!tag_in(token, kStartTag,
|
3989
|
+
(gumbo_tagset){TAG(MGLYPH), TAG(MALIGNMARK)})))) ||
|
3789
3990
|
(current_node->v.element.tag_namespace == GUMBO_NAMESPACE_MATHML &&
|
3790
|
-
|
3791
|
-
|
3792
|
-
|
3793
|
-
|
3794
|
-
token->type ==
|
3795
|
-
|
3796
|
-
|
3991
|
+
node_qualified_tag_is(
|
3992
|
+
current_node, GUMBO_NAMESPACE_MATHML, GUMBO_TAG_ANNOTATION_XML) &&
|
3993
|
+
tag_is(token, kStartTag, GUMBO_TAG_SVG)) ||
|
3994
|
+
(is_html_integration_point(current_node) &&
|
3995
|
+
(token->type == GUMBO_TOKEN_START_TAG ||
|
3996
|
+
token->type == GUMBO_TOKEN_CHARACTER ||
|
3997
|
+
token->type == GUMBO_TOKEN_NULL ||
|
3998
|
+
token->type == GUMBO_TOKEN_WHITESPACE)) ||
|
3797
3999
|
token->type == GUMBO_TOKEN_EOF) {
|
3798
4000
|
return handle_html_content(parser, token);
|
3799
4001
|
} else {
|
@@ -3801,6 +4003,66 @@ static bool handle_token(GumboParser* parser, GumboToken* token) {
|
|
3801
4003
|
}
|
3802
4004
|
}
|
3803
4005
|
|
4006
|
+
static void fragment_parser_init(GumboParser* parser, GumboTag fragment_ctx,
|
4007
|
+
GumboNamespaceEnum fragment_namespace) {
|
4008
|
+
GumboNode* root;
|
4009
|
+
assert(fragment_ctx != GUMBO_TAG_LAST);
|
4010
|
+
|
4011
|
+
// 3
|
4012
|
+
parser->_parser_state->_fragment_ctx = create_element(parser, fragment_ctx);
|
4013
|
+
parser->_parser_state->_fragment_ctx->v.element.tag_namespace =
|
4014
|
+
fragment_namespace;
|
4015
|
+
|
4016
|
+
// 4
|
4017
|
+
if (fragment_namespace == GUMBO_NAMESPACE_HTML) {
|
4018
|
+
// Non-HTML namespaces always start in the DATA state.
|
4019
|
+
switch (fragment_ctx) {
|
4020
|
+
case GUMBO_TAG_TITLE:
|
4021
|
+
case GUMBO_TAG_TEXTAREA:
|
4022
|
+
gumbo_tokenizer_set_state(parser, GUMBO_LEX_RCDATA);
|
4023
|
+
break;
|
4024
|
+
|
4025
|
+
case GUMBO_TAG_STYLE:
|
4026
|
+
case GUMBO_TAG_XMP:
|
4027
|
+
case GUMBO_TAG_IFRAME:
|
4028
|
+
case GUMBO_TAG_NOEMBED:
|
4029
|
+
case GUMBO_TAG_NOFRAMES:
|
4030
|
+
gumbo_tokenizer_set_state(parser, GUMBO_LEX_RAWTEXT);
|
4031
|
+
break;
|
4032
|
+
|
4033
|
+
case GUMBO_TAG_SCRIPT:
|
4034
|
+
gumbo_tokenizer_set_state(parser, GUMBO_LEX_SCRIPT);
|
4035
|
+
break;
|
4036
|
+
|
4037
|
+
case GUMBO_TAG_NOSCRIPT:
|
4038
|
+
/* scripting is disabled in Gumbo, so leave the tokenizer
|
4039
|
+
* in the default data state */
|
4040
|
+
break;
|
4041
|
+
|
4042
|
+
case GUMBO_TAG_PLAINTEXT:
|
4043
|
+
gumbo_tokenizer_set_state(parser, GUMBO_LEX_PLAINTEXT);
|
4044
|
+
break;
|
4045
|
+
|
4046
|
+
default:
|
4047
|
+
/* default data state */
|
4048
|
+
break;
|
4049
|
+
}
|
4050
|
+
}
|
4051
|
+
|
4052
|
+
// 5. 6. 7.
|
4053
|
+
root = insert_element_of_tag_type(
|
4054
|
+
parser, GUMBO_TAG_HTML, GUMBO_INSERTION_IMPLIED);
|
4055
|
+
parser->_output->root = root;
|
4056
|
+
|
4057
|
+
// 8.
|
4058
|
+
if (fragment_ctx == GUMBO_TAG_TEMPLATE) {
|
4059
|
+
push_template_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TEMPLATE);
|
4060
|
+
}
|
4061
|
+
|
4062
|
+
// 10.
|
4063
|
+
reset_insertion_mode_appropriately(parser);
|
4064
|
+
}
|
4065
|
+
|
3804
4066
|
GumboOutput* gumbo_parse(const char* buffer) {
|
3805
4067
|
return gumbo_parse_with_options(
|
3806
4068
|
&kGumboDefaultOptions, buffer, strlen(buffer));
|
@@ -3814,6 +4076,11 @@ GumboOutput* gumbo_parse_with_options(
|
|
3814
4076
|
gumbo_tokenizer_state_init(&parser, buffer, length);
|
3815
4077
|
parser_state_init(&parser);
|
3816
4078
|
|
4079
|
+
if (options->fragment_context != GUMBO_TAG_LAST) {
|
4080
|
+
fragment_parser_init(
|
4081
|
+
&parser, options->fragment_context, options->fragment_namespace);
|
4082
|
+
}
|
4083
|
+
|
3817
4084
|
GumboParserState* state = parser._parser_state;
|
3818
4085
|
gumbo_debug("Parsing %.*s.\n", length, buffer);
|
3819
4086
|
|
@@ -3823,14 +4090,15 @@ GumboOutput* gumbo_parse_with_options(
|
|
3823
4090
|
|
3824
4091
|
GumboToken token;
|
3825
4092
|
bool has_error = false;
|
4093
|
+
|
3826
4094
|
do {
|
3827
4095
|
if (state->_reprocess_current_token) {
|
3828
4096
|
state->_reprocess_current_token = false;
|
3829
4097
|
} else {
|
3830
4098
|
GumboNode* current_node = get_current_node(&parser);
|
3831
|
-
gumbo_tokenizer_set_is_current_node_foreign(
|
3832
|
-
|
3833
|
-
|
4099
|
+
gumbo_tokenizer_set_is_current_node_foreign(&parser,
|
4100
|
+
current_node &&
|
4101
|
+
current_node->v.element.tag_namespace != GUMBO_NAMESPACE_HTML);
|
3834
4102
|
has_error = !gumbo_lex(&parser, &token) || has_error;
|
3835
4103
|
}
|
3836
4104
|
const char* token_type = "text";
|
@@ -3850,14 +4118,13 @@ GumboOutput* gumbo_parse_with_options(
|
|
3850
4118
|
default:
|
3851
4119
|
break;
|
3852
4120
|
}
|
3853
|
-
gumbo_debug("Handling %s token @%d:%d in state %d.\n",
|
3854
|
-
|
3855
|
-
state->_insertion_mode);
|
4121
|
+
gumbo_debug("Handling %s token @%d:%d in state %d.\n", (char*) token_type,
|
4122
|
+
token.position.line, token.position.column, state->_insertion_mode);
|
3856
4123
|
|
3857
4124
|
state->_current_token = &token;
|
3858
4125
|
state->_self_closing_flag_acknowledged =
|
3859
4126
|
!(token.type == GUMBO_TOKEN_START_TAG &&
|
3860
|
-
|
4127
|
+
token.v.start_tag.is_self_closing);
|
3861
4128
|
|
3862
4129
|
has_error = !handle_token(&parser, &token) || has_error;
|
3863
4130
|
|
@@ -3913,7 +4180,7 @@ void gumbo_destroy_output(const GumboOptions* options, GumboOutput* output) {
|
|
3913
4180
|
GumboParser parser;
|
3914
4181
|
parser._options = options;
|
3915
4182
|
destroy_node(&parser, output->document);
|
3916
|
-
for (int i = 0; i < output->errors.length; ++i) {
|
4183
|
+
for (unsigned int i = 0; i < output->errors.length; ++i) {
|
3917
4184
|
gumbo_error_destroy(&parser, output->errors.data[i]);
|
3918
4185
|
}
|
3919
4186
|
gumbo_vector_destroy(&parser, &output->errors);
|