nokogumbo 1.3.0 → 1.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/README.md +8 -2
- data/ext/nokogumboc/extconf.rb +18 -6
- data/ext/nokogumboc/nokogumbo.c +102 -42
- data/gumbo-parser/src/attribute.c +1 -1
- data/gumbo-parser/src/char_ref.c +37 -67
- data/gumbo-parser/src/char_ref.h +3 -4
- data/gumbo-parser/src/char_ref.rl +6 -1
- data/gumbo-parser/src/error.c +51 -51
- data/gumbo-parser/src/error.h +7 -9
- data/gumbo-parser/src/gumbo.h +45 -181
- data/gumbo-parser/src/parser.c +1439 -1172
- data/gumbo-parser/src/string_buffer.c +14 -10
- data/gumbo-parser/src/string_buffer.h +9 -6
- data/gumbo-parser/src/string_piece.c +5 -6
- data/gumbo-parser/src/string_piece.h +2 -3
- data/gumbo-parser/src/tag.c +36 -166
- data/gumbo-parser/src/tag.in +150 -0
- data/gumbo-parser/src/tag_enum.h +153 -0
- data/gumbo-parser/src/tag_gperf.h +105 -0
- data/gumbo-parser/src/tag_sizes.h +4 -0
- data/gumbo-parser/src/tag_strings.h +153 -0
- data/gumbo-parser/src/token_type.h +1 -0
- data/gumbo-parser/src/tokenizer.c +278 -361
- data/gumbo-parser/src/tokenizer.h +2 -2
- data/gumbo-parser/src/utf8.c +53 -52
- data/gumbo-parser/src/utf8.h +1 -2
- data/gumbo-parser/src/util.c +1 -1
- data/gumbo-parser/src/util.h +0 -2
- data/gumbo-parser/src/vector.c +17 -17
- data/gumbo-parser/src/vector.h +6 -8
- data/gumbo-parser/visualc/include/strings.h +2 -1
- data/lib/nokogumbo.rb +8 -8
- data/test-nokogumbo.rb +190 -0
- metadata +19 -17
data/gumbo-parser/src/parser.c
CHANGED
@@ -32,48 +32,55 @@
|
|
32
32
|
#include "util.h"
|
33
33
|
#include "vector.h"
|
34
34
|
|
35
|
-
|
36
35
|
#define AVOID_UNUSED_VARIABLE_WARNING(i) (void)(i)
|
37
36
|
|
38
|
-
#define GUMBO_STRING(literal)
|
39
|
-
|
37
|
+
#define GUMBO_STRING(literal) \
|
38
|
+
{ literal, sizeof(literal) - 1 }
|
39
|
+
#define TERMINATOR \
|
40
|
+
{ "", 0 }
|
40
41
|
|
41
|
-
|
42
|
-
|
43
|
-
|
42
|
+
typedef char gumbo_tagset[GUMBO_TAG_LAST];
|
43
|
+
#define TAG(tag) [GUMBO_TAG_##tag] = (1 << GUMBO_NAMESPACE_HTML)
|
44
|
+
#define TAG_SVG(tag) [GUMBO_TAG_##tag] = (1 << GUMBO_NAMESPACE_SVG)
|
45
|
+
#define TAG_MATHML(tag) [GUMBO_TAG_##tag] = (1 << GUMBO_NAMESPACE_MATHML)
|
44
46
|
|
45
|
-
|
46
|
-
|
47
|
-
}
|
47
|
+
#define TAGSET_INCLUDES(tagset, namespace, tag) \
|
48
|
+
(tag < GUMBO_TAG_LAST && tagset[(int) tag] == (1 << (int) namespace))
|
48
49
|
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
50
|
+
// selected forward declarations as it is getting hard to find
|
51
|
+
// an appropriate order
|
52
|
+
static bool node_html_tag_is(const GumboNode*, GumboTag);
|
53
|
+
static GumboInsertionMode get_current_template_insertion_mode(
|
54
|
+
const GumboParser*);
|
55
|
+
static bool handle_in_template(GumboParser*, GumboToken*);
|
56
|
+
static void destroy_node(GumboParser*, GumboNode*);
|
57
|
+
|
58
|
+
static void* malloc_wrapper(void* unused, size_t size) { return malloc(size); }
|
59
|
+
|
60
|
+
static void free_wrapper(void* unused, void* ptr) { free(ptr); }
|
61
|
+
|
62
|
+
const GumboOptions kGumboDefaultOptions = {&malloc_wrapper, &free_wrapper, NULL,
|
63
|
+
8, false, -1, GUMBO_TAG_LAST, GUMBO_NAMESPACE_HTML};
|
57
64
|
|
58
65
|
static const GumboStringPiece kDoctypeHtml = GUMBO_STRING("html");
|
59
|
-
static const GumboStringPiece kPublicIdHtml4_0 =
|
60
|
-
"-//W3C//DTD HTML 4.0//EN");
|
61
|
-
static const GumboStringPiece kPublicIdHtml4_01 =
|
62
|
-
"-//W3C//DTD HTML 4.01//EN");
|
63
|
-
static const GumboStringPiece kPublicIdXhtml1_0 =
|
64
|
-
"-//W3C//DTD XHTML 1.0 Strict//EN");
|
65
|
-
static const GumboStringPiece kPublicIdXhtml1_1 =
|
66
|
-
"-//W3C//DTD XHTML 1.1//EN");
|
67
|
-
static const GumboStringPiece kSystemIdRecHtml4_0 =
|
68
|
-
"http://www.w3.org/TR/REC-html40/strict.dtd");
|
69
|
-
static const GumboStringPiece kSystemIdHtml4 =
|
70
|
-
"http://www.w3.org/TR/html4/strict.dtd");
|
71
|
-
static const GumboStringPiece kSystemIdXhtmlStrict1_1 =
|
72
|
-
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd");
|
73
|
-
static const GumboStringPiece kSystemIdXhtml1_1 =
|
74
|
-
"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd");
|
75
|
-
static const GumboStringPiece kSystemIdLegacyCompat =
|
76
|
-
"about:legacy-compat");
|
66
|
+
static const GumboStringPiece kPublicIdHtml4_0 =
|
67
|
+
GUMBO_STRING("-//W3C//DTD HTML 4.0//EN");
|
68
|
+
static const GumboStringPiece kPublicIdHtml4_01 =
|
69
|
+
GUMBO_STRING("-//W3C//DTD HTML 4.01//EN");
|
70
|
+
static const GumboStringPiece kPublicIdXhtml1_0 =
|
71
|
+
GUMBO_STRING("-//W3C//DTD XHTML 1.0 Strict//EN");
|
72
|
+
static const GumboStringPiece kPublicIdXhtml1_1 =
|
73
|
+
GUMBO_STRING("-//W3C//DTD XHTML 1.1//EN");
|
74
|
+
static const GumboStringPiece kSystemIdRecHtml4_0 =
|
75
|
+
GUMBO_STRING("http://www.w3.org/TR/REC-html40/strict.dtd");
|
76
|
+
static const GumboStringPiece kSystemIdHtml4 =
|
77
|
+
GUMBO_STRING("http://www.w3.org/TR/html4/strict.dtd");
|
78
|
+
static const GumboStringPiece kSystemIdXhtmlStrict1_1 =
|
79
|
+
GUMBO_STRING("http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd");
|
80
|
+
static const GumboStringPiece kSystemIdXhtml1_1 =
|
81
|
+
GUMBO_STRING("http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd");
|
82
|
+
static const GumboStringPiece kSystemIdLegacyCompat =
|
83
|
+
GUMBO_STRING("about:legacy-compat");
|
77
84
|
|
78
85
|
// The doctype arrays have an explicit terminator because we want to pass them
|
79
86
|
// to a helper function, and passing them as a pointer discards sizeof
|
@@ -81,96 +88,86 @@ static const GumboStringPiece kSystemIdLegacyCompat = GUMBO_STRING(
|
|
81
88
|
// over them use sizeof directly instead of a terminator.
|
82
89
|
|
83
90
|
static const GumboStringPiece kQuirksModePublicIdPrefixes[] = {
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
};
|
91
|
+
GUMBO_STRING("+//Silmaril//dtd html Pro v0r11 19970101//"),
|
92
|
+
GUMBO_STRING("-//AdvaSoft Ltd//DTD HTML 3.0 asWedit + extensions//"),
|
93
|
+
GUMBO_STRING("-//AS//DTD HTML 3.0 asWedit + extensions//"),
|
94
|
+
GUMBO_STRING("-//IETF//DTD HTML 2.0 Level 1//"),
|
95
|
+
GUMBO_STRING("-//IETF//DTD HTML 2.0 Level 2//"),
|
96
|
+
GUMBO_STRING("-//IETF//DTD HTML 2.0 Strict Level 1//"),
|
97
|
+
GUMBO_STRING("-//IETF//DTD HTML 2.0 Strict Level 2//"),
|
98
|
+
GUMBO_STRING("-//IETF//DTD HTML 2.0 Strict//"),
|
99
|
+
GUMBO_STRING("-//IETF//DTD HTML 2.0//"),
|
100
|
+
GUMBO_STRING("-//IETF//DTD HTML 2.1E//"),
|
101
|
+
GUMBO_STRING("-//IETF//DTD HTML 3.0//"),
|
102
|
+
GUMBO_STRING("-//IETF//DTD HTML 3.2 Final//"),
|
103
|
+
GUMBO_STRING("-//IETF//DTD HTML 3.2//"),
|
104
|
+
GUMBO_STRING("-//IETF//DTD HTML 3//"),
|
105
|
+
GUMBO_STRING("-//IETF//DTD HTML Level 0//"),
|
106
|
+
GUMBO_STRING("-//IETF//DTD HTML Level 1//"),
|
107
|
+
GUMBO_STRING("-//IETF//DTD HTML Level 2//"),
|
108
|
+
GUMBO_STRING("-//IETF//DTD HTML Level 3//"),
|
109
|
+
GUMBO_STRING("-//IETF//DTD HTML Strict Level 0//"),
|
110
|
+
GUMBO_STRING("-//IETF//DTD HTML Strict Level 1//"),
|
111
|
+
GUMBO_STRING("-//IETF//DTD HTML Strict Level 2//"),
|
112
|
+
GUMBO_STRING("-//IETF//DTD HTML Strict Level 3//"),
|
113
|
+
GUMBO_STRING("-//IETF//DTD HTML Strict//"),
|
114
|
+
GUMBO_STRING("-//IETF//DTD HTML//"),
|
115
|
+
GUMBO_STRING("-//Metrius//DTD Metrius Presentational//"),
|
116
|
+
GUMBO_STRING("-//Microsoft//DTD Internet Explorer 2.0 HTML Strict//"),
|
117
|
+
GUMBO_STRING("-//Microsoft//DTD Internet Explorer 2.0 HTML//"),
|
118
|
+
GUMBO_STRING("-//Microsoft//DTD Internet Explorer 2.0 Tables//"),
|
119
|
+
GUMBO_STRING("-//Microsoft//DTD Internet Explorer 3.0 HTML Strict//"),
|
120
|
+
GUMBO_STRING("-//Microsoft//DTD Internet Explorer 3.0 HTML//"),
|
121
|
+
GUMBO_STRING("-//Microsoft//DTD Internet Explorer 3.0 Tables//"),
|
122
|
+
GUMBO_STRING("-//Netscape Comm. Corp.//DTD HTML//"),
|
123
|
+
GUMBO_STRING("-//Netscape Comm. Corp.//DTD Strict HTML//"),
|
124
|
+
GUMBO_STRING("-//O'Reilly and Associates//DTD HTML 2.0//"),
|
125
|
+
GUMBO_STRING("-//O'Reilly and Associates//DTD HTML Extended 1.0//"),
|
126
|
+
GUMBO_STRING("-//O'Reilly and Associates//DTD HTML Extended Relaxed 1.0//"),
|
127
|
+
GUMBO_STRING(
|
128
|
+
"-//SoftQuad Software//DTD HoTMetaL PRO 6.0::19990601::)"
|
129
|
+
"extensions to HTML 4.0//"),
|
130
|
+
GUMBO_STRING(
|
131
|
+
"-//SoftQuad//DTD HoTMetaL PRO 4.0::19971010::"
|
132
|
+
"extensions to HTML 4.0//"),
|
133
|
+
GUMBO_STRING("-//Spyglass//DTD HTML 2.0 Extended//"),
|
134
|
+
GUMBO_STRING("-//SQ//DTD HTML 2.0 HoTMetaL + extensions//"),
|
135
|
+
GUMBO_STRING("-//Sun Microsystems Corp.//DTD HotJava HTML//"),
|
136
|
+
GUMBO_STRING("-//Sun Microsystems Corp.//DTD HotJava Strict HTML//"),
|
137
|
+
GUMBO_STRING("-//W3C//DTD HTML 3 1995-03-24//"),
|
138
|
+
GUMBO_STRING("-//W3C//DTD HTML 3.2 Draft//"),
|
139
|
+
GUMBO_STRING("-//W3C//DTD HTML 3.2 Final//"),
|
140
|
+
GUMBO_STRING("-//W3C//DTD HTML 3.2//"),
|
141
|
+
GUMBO_STRING("-//W3C//DTD HTML 3.2S Draft//"),
|
142
|
+
GUMBO_STRING("-//W3C//DTD HTML 4.0 Frameset//"),
|
143
|
+
GUMBO_STRING("-//W3C//DTD HTML 4.0 Transitional//"),
|
144
|
+
GUMBO_STRING("-//W3C//DTD HTML Experimental 19960712//"),
|
145
|
+
GUMBO_STRING("-//W3C//DTD HTML Experimental 970421//"),
|
146
|
+
GUMBO_STRING("-//W3C//DTD W3 HTML//"),
|
147
|
+
GUMBO_STRING("-//W3O//DTD W3 HTML 3.0//"),
|
148
|
+
GUMBO_STRING("-//WebTechs//DTD Mozilla HTML 2.0//"),
|
149
|
+
GUMBO_STRING("-//WebTechs//DTD Mozilla HTML//"), TERMINATOR};
|
143
150
|
|
144
151
|
static const GumboStringPiece kQuirksModePublicIdExactMatches[] = {
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
TERMINATOR
|
149
|
-
};
|
152
|
+
GUMBO_STRING("-//W3O//DTD W3 HTML Strict 3.0//EN//"),
|
153
|
+
GUMBO_STRING("-/W3C/DTD HTML 4.0 Transitional/EN"), GUMBO_STRING("HTML"),
|
154
|
+
TERMINATOR};
|
150
155
|
|
151
156
|
static const GumboStringPiece kQuirksModeSystemIdExactMatches[] = {
|
152
|
-
|
153
|
-
|
154
|
-
};
|
157
|
+
GUMBO_STRING("http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd"),
|
158
|
+
TERMINATOR};
|
155
159
|
|
156
160
|
static const GumboStringPiece kLimitedQuirksPublicIdPrefixes[] = {
|
157
|
-
|
158
|
-
|
159
|
-
TERMINATOR
|
160
|
-
};
|
161
|
+
GUMBO_STRING("-//W3C//DTD XHTML 1.0 Frameset//"),
|
162
|
+
GUMBO_STRING("-//W3C//DTD XHTML 1.0 Transitional//"), TERMINATOR};
|
161
163
|
|
162
|
-
static const GumboStringPiece kLimitedQuirksRequiresSystemIdPublicIdPrefixes[] =
|
163
|
-
|
164
|
-
|
165
|
-
TERMINATOR
|
166
|
-
};
|
164
|
+
static const GumboStringPiece kLimitedQuirksRequiresSystemIdPublicIdPrefixes[] =
|
165
|
+
{GUMBO_STRING("-//W3C//DTD HTML 4.01 Frameset//"),
|
166
|
+
GUMBO_STRING("-//W3C//DTD HTML 4.01 Transitional//"), TERMINATOR};
|
167
167
|
|
168
168
|
// Indexed by GumboNamespaceEnum; keep in sync with that.
|
169
|
-
static const char* kLegalXmlns[] = {
|
170
|
-
|
171
|
-
"http://www.w3.org/2000/svg",
|
172
|
-
"http://www.w3.org/1998/Math/MathML"
|
173
|
-
};
|
169
|
+
static const char* kLegalXmlns[] = {"http://www.w3.org/1999/xhtml",
|
170
|
+
"http://www.w3.org/2000/svg", "http://www.w3.org/1998/Math/MathML"};
|
174
171
|
|
175
172
|
typedef struct _ReplacementEntry {
|
176
173
|
const GumboStringPiece from;
|
@@ -178,112 +175,112 @@ typedef struct _ReplacementEntry {
|
|
178
175
|
} ReplacementEntry;
|
179
176
|
|
180
177
|
#define REPLACEMENT_ENTRY(from, to) \
|
181
|
-
|
178
|
+
{ GUMBO_STRING(from), GUMBO_STRING(to) }
|
182
179
|
|
183
180
|
// Static data for SVG attribute replacements.
|
184
|
-
//
|
181
|
+
// https://html.spec.whatwg.org/multipage/syntax.html#creating-and-inserting-nodes
|
185
182
|
static const ReplacementEntry kSvgAttributeReplacements[] = {
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
183
|
+
REPLACEMENT_ENTRY("attributename", "attributeName"),
|
184
|
+
REPLACEMENT_ENTRY("attributetype", "attributeType"),
|
185
|
+
REPLACEMENT_ENTRY("basefrequency", "baseFrequency"),
|
186
|
+
REPLACEMENT_ENTRY("baseprofile", "baseProfile"),
|
187
|
+
REPLACEMENT_ENTRY("calcmode", "calcMode"),
|
188
|
+
REPLACEMENT_ENTRY("clippathunits", "clipPathUnits"),
|
189
|
+
// REPLACEMENT_ENTRY("contentscripttype", "contentScriptType"),
|
190
|
+
// REPLACEMENT_ENTRY("contentstyletype", "contentStyleType"),
|
191
|
+
REPLACEMENT_ENTRY("diffuseconstant", "diffuseConstant"),
|
192
|
+
REPLACEMENT_ENTRY("edgemode", "edgeMode"),
|
193
|
+
// REPLACEMENT_ENTRY("externalresourcesrequired",
|
194
|
+
// "externalResourcesRequired"),
|
195
|
+
// REPLACEMENT_ENTRY("filterres", "filterRes"),
|
196
|
+
REPLACEMENT_ENTRY("filterunits", "filterUnits"),
|
197
|
+
REPLACEMENT_ENTRY("glyphref", "glyphRef"),
|
198
|
+
REPLACEMENT_ENTRY("gradienttransform", "gradientTransform"),
|
199
|
+
REPLACEMENT_ENTRY("gradientunits", "gradientUnits"),
|
200
|
+
REPLACEMENT_ENTRY("kernelmatrix", "kernelMatrix"),
|
201
|
+
REPLACEMENT_ENTRY("kernelunitlength", "kernelUnitLength"),
|
202
|
+
REPLACEMENT_ENTRY("keypoints", "keyPoints"),
|
203
|
+
REPLACEMENT_ENTRY("keysplines", "keySplines"),
|
204
|
+
REPLACEMENT_ENTRY("keytimes", "keyTimes"),
|
205
|
+
REPLACEMENT_ENTRY("lengthadjust", "lengthAdjust"),
|
206
|
+
REPLACEMENT_ENTRY("limitingconeangle", "limitingConeAngle"),
|
207
|
+
REPLACEMENT_ENTRY("markerheight", "markerHeight"),
|
208
|
+
REPLACEMENT_ENTRY("markerunits", "markerUnits"),
|
209
|
+
REPLACEMENT_ENTRY("markerwidth", "markerWidth"),
|
210
|
+
REPLACEMENT_ENTRY("maskcontentunits", "maskContentUnits"),
|
211
|
+
REPLACEMENT_ENTRY("maskunits", "maskUnits"),
|
212
|
+
REPLACEMENT_ENTRY("numoctaves", "numOctaves"),
|
213
|
+
REPLACEMENT_ENTRY("pathlength", "pathLength"),
|
214
|
+
REPLACEMENT_ENTRY("patterncontentunits", "patternContentUnits"),
|
215
|
+
REPLACEMENT_ENTRY("patterntransform", "patternTransform"),
|
216
|
+
REPLACEMENT_ENTRY("patternunits", "patternUnits"),
|
217
|
+
REPLACEMENT_ENTRY("pointsatx", "pointsAtX"),
|
218
|
+
REPLACEMENT_ENTRY("pointsaty", "pointsAtY"),
|
219
|
+
REPLACEMENT_ENTRY("pointsatz", "pointsAtZ"),
|
220
|
+
REPLACEMENT_ENTRY("preservealpha", "preserveAlpha"),
|
221
|
+
REPLACEMENT_ENTRY("preserveaspectratio", "preserveAspectRatio"),
|
222
|
+
REPLACEMENT_ENTRY("primitiveunits", "primitiveUnits"),
|
223
|
+
REPLACEMENT_ENTRY("refx", "refX"), REPLACEMENT_ENTRY("refy", "refY"),
|
224
|
+
REPLACEMENT_ENTRY("repeatcount", "repeatCount"),
|
225
|
+
REPLACEMENT_ENTRY("repeatdur", "repeatDur"),
|
226
|
+
REPLACEMENT_ENTRY("requiredextensions", "requiredExtensions"),
|
227
|
+
REPLACEMENT_ENTRY("requiredfeatures", "requiredFeatures"),
|
228
|
+
REPLACEMENT_ENTRY("specularconstant", "specularConstant"),
|
229
|
+
REPLACEMENT_ENTRY("specularexponent", "specularExponent"),
|
230
|
+
REPLACEMENT_ENTRY("spreadmethod", "spreadMethod"),
|
231
|
+
REPLACEMENT_ENTRY("startoffset", "startOffset"),
|
232
|
+
REPLACEMENT_ENTRY("stddeviation", "stdDeviation"),
|
233
|
+
REPLACEMENT_ENTRY("stitchtiles", "stitchTiles"),
|
234
|
+
REPLACEMENT_ENTRY("surfacescale", "surfaceScale"),
|
235
|
+
REPLACEMENT_ENTRY("systemlanguage", "systemLanguage"),
|
236
|
+
REPLACEMENT_ENTRY("tablevalues", "tableValues"),
|
237
|
+
REPLACEMENT_ENTRY("targetx", "targetX"),
|
238
|
+
REPLACEMENT_ENTRY("targety", "targetY"),
|
239
|
+
REPLACEMENT_ENTRY("textlength", "textLength"),
|
240
|
+
REPLACEMENT_ENTRY("viewbox", "viewBox"),
|
241
|
+
REPLACEMENT_ENTRY("viewtarget", "viewTarget"),
|
242
|
+
REPLACEMENT_ENTRY("xchannelselector", "xChannelSelector"),
|
243
|
+
REPLACEMENT_ENTRY("ychannelselector", "yChannelSelector"),
|
244
|
+
REPLACEMENT_ENTRY("zoomandpan", "zoomAndPan"),
|
248
245
|
};
|
249
246
|
|
250
247
|
static const ReplacementEntry kSvgTagReplacements[] = {
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
248
|
+
REPLACEMENT_ENTRY("altglyph", "altGlyph"),
|
249
|
+
REPLACEMENT_ENTRY("altglyphdef", "altGlyphDef"),
|
250
|
+
REPLACEMENT_ENTRY("altglyphitem", "altGlyphItem"),
|
251
|
+
REPLACEMENT_ENTRY("animatecolor", "animateColor"),
|
252
|
+
REPLACEMENT_ENTRY("animatemotion", "animateMotion"),
|
253
|
+
REPLACEMENT_ENTRY("animatetransform", "animateTransform"),
|
254
|
+
REPLACEMENT_ENTRY("clippath", "clipPath"),
|
255
|
+
REPLACEMENT_ENTRY("feblend", "feBlend"),
|
256
|
+
REPLACEMENT_ENTRY("fecolormatrix", "feColorMatrix"),
|
257
|
+
REPLACEMENT_ENTRY("fecomponenttransfer", "feComponentTransfer"),
|
258
|
+
REPLACEMENT_ENTRY("fecomposite", "feComposite"),
|
259
|
+
REPLACEMENT_ENTRY("feconvolvematrix", "feConvolveMatrix"),
|
260
|
+
REPLACEMENT_ENTRY("fediffuselighting", "feDiffuseLighting"),
|
261
|
+
REPLACEMENT_ENTRY("fedisplacementmap", "feDisplacementMap"),
|
262
|
+
REPLACEMENT_ENTRY("fedistantlight", "feDistantLight"),
|
263
|
+
REPLACEMENT_ENTRY("feflood", "feFlood"),
|
264
|
+
REPLACEMENT_ENTRY("fefunca", "feFuncA"),
|
265
|
+
REPLACEMENT_ENTRY("fefuncb", "feFuncB"),
|
266
|
+
REPLACEMENT_ENTRY("fefuncg", "feFuncG"),
|
267
|
+
REPLACEMENT_ENTRY("fefuncr", "feFuncR"),
|
268
|
+
REPLACEMENT_ENTRY("fegaussianblur", "feGaussianBlur"),
|
269
|
+
REPLACEMENT_ENTRY("feimage", "feImage"),
|
270
|
+
REPLACEMENT_ENTRY("femerge", "feMerge"),
|
271
|
+
REPLACEMENT_ENTRY("femergenode", "feMergeNode"),
|
272
|
+
REPLACEMENT_ENTRY("femorphology", "feMorphology"),
|
273
|
+
REPLACEMENT_ENTRY("feoffset", "feOffset"),
|
274
|
+
REPLACEMENT_ENTRY("fepointlight", "fePointLight"),
|
275
|
+
REPLACEMENT_ENTRY("fespecularlighting", "feSpecularLighting"),
|
276
|
+
REPLACEMENT_ENTRY("fespotlight", "feSpotLight"),
|
277
|
+
REPLACEMENT_ENTRY("fetile", "feTile"),
|
278
|
+
REPLACEMENT_ENTRY("feturbulence", "feTurbulence"),
|
279
|
+
REPLACEMENT_ENTRY("foreignobject", "foreignObject"),
|
280
|
+
REPLACEMENT_ENTRY("glyphref", "glyphRef"),
|
281
|
+
REPLACEMENT_ENTRY("lineargradient", "linearGradient"),
|
282
|
+
REPLACEMENT_ENTRY("radialgradient", "radialGradient"),
|
283
|
+
REPLACEMENT_ENTRY("textpath", "textPath"),
|
287
284
|
};
|
288
285
|
|
289
286
|
typedef struct _NamespacedAttributeReplacement {
|
@@ -293,18 +290,18 @@ typedef struct _NamespacedAttributeReplacement {
|
|
293
290
|
} NamespacedAttributeReplacement;
|
294
291
|
|
295
292
|
static const NamespacedAttributeReplacement kForeignAttributeReplacements[] = {
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
|
293
|
+
{"xlink:actuate", "actuate", GUMBO_ATTR_NAMESPACE_XLINK},
|
294
|
+
{"xlink:actuate", "actuate", GUMBO_ATTR_NAMESPACE_XLINK},
|
295
|
+
{"xlink:href", "href", GUMBO_ATTR_NAMESPACE_XLINK},
|
296
|
+
{"xlink:role", "role", GUMBO_ATTR_NAMESPACE_XLINK},
|
297
|
+
{"xlink:show", "show", GUMBO_ATTR_NAMESPACE_XLINK},
|
298
|
+
{"xlink:title", "title", GUMBO_ATTR_NAMESPACE_XLINK},
|
299
|
+
{"xlink:type", "type", GUMBO_ATTR_NAMESPACE_XLINK},
|
300
|
+
{"xml:base", "base", GUMBO_ATTR_NAMESPACE_XML},
|
301
|
+
{"xml:lang", "lang", GUMBO_ATTR_NAMESPACE_XML},
|
302
|
+
{"xml:space", "space", GUMBO_ATTR_NAMESPACE_XML},
|
303
|
+
{"xmlns", "xmlns", GUMBO_ATTR_NAMESPACE_XMLNS},
|
304
|
+
{"xmlns:xlink", "xlink", GUMBO_ATTR_NAMESPACE_XMLNS},
|
308
305
|
};
|
309
306
|
|
310
307
|
// The "scope marker" for the list of active formatting elements. We use a
|
@@ -336,7 +333,7 @@ typedef struct _TextNodeBufferState {
|
|
336
333
|
// The source position of the start of this text node.
|
337
334
|
GumboSourcePosition _start_position;
|
338
335
|
|
339
|
-
// The type of node that will be inserted (TEXT or WHITESPACE).
|
336
|
+
// The type of node that will be inserted (TEXT, CDATA, or WHITESPACE).
|
340
337
|
GumboNodeType _type;
|
341
338
|
} TextNodeBufferState;
|
342
339
|
|
@@ -362,6 +359,9 @@ typedef struct GumboInternalParserState {
|
|
362
359
|
GumboNode* _head_element;
|
363
360
|
GumboNode* _form_element;
|
364
361
|
|
362
|
+
// The element used as fragment context when parsing in fragment mode
|
363
|
+
GumboNode* _fragment_ctx;
|
364
|
+
|
365
365
|
// The flag for when the spec says "Reprocess the current token in..."
|
366
366
|
bool _reprocess_current_token;
|
367
367
|
|
@@ -418,14 +418,14 @@ static bool attribute_matches(
|
|
418
418
|
static bool attribute_matches_case_sensitive(
|
419
419
|
const GumboVector* attributes, const char* name, const char* value) {
|
420
420
|
const GumboAttribute* attr = gumbo_get_attribute(attributes, name);
|
421
|
-
return attr ?
|
421
|
+
return attr ? strcmp(value, attr->value) == 0 : false;
|
422
422
|
}
|
423
423
|
|
424
424
|
// Checks if the specified attribute vectors are identical.
|
425
425
|
static bool all_attributes_match(
|
426
426
|
const GumboVector* attr1, const GumboVector* attr2) {
|
427
|
-
int num_unmatched_attr2_elements = attr2->length;
|
428
|
-
for (int i = 0; i < attr1->length; ++i) {
|
427
|
+
unsigned int num_unmatched_attr2_elements = attr2->length;
|
428
|
+
for (unsigned int i = 0; i < attr1->length; ++i) {
|
429
429
|
const GumboAttribute* attr = attr1->data[i];
|
430
430
|
if (attribute_matches_case_sensitive(attr2, attr->name, attr->value)) {
|
431
431
|
--num_unmatched_attr2_elements;
|
@@ -453,8 +453,7 @@ static GumboNode* create_node(GumboParser* parser, GumboNodeType type) {
|
|
453
453
|
static GumboNode* new_document_node(GumboParser* parser) {
|
454
454
|
GumboNode* document_node = create_node(parser, GUMBO_NODE_DOCUMENT);
|
455
455
|
document_node->parse_flags = GUMBO_INSERTION_BY_PARSER;
|
456
|
-
gumbo_vector_init(
|
457
|
-
parser, 1, &document_node->v.document.children);
|
456
|
+
gumbo_vector_init(parser, 1, &document_node->v.document.children);
|
458
457
|
|
459
458
|
// Must be initialized explicitly, as there's no guarantee that we'll see a
|
460
459
|
// doc type token.
|
@@ -489,6 +488,7 @@ static void parser_state_init(GumboParser* parser) {
|
|
489
488
|
gumbo_vector_init(parser, 5, &parser_state->_template_insertion_modes);
|
490
489
|
parser_state->_head_element = NULL;
|
491
490
|
parser_state->_form_element = NULL;
|
491
|
+
parser_state->_fragment_ctx = NULL;
|
492
492
|
parser_state->_current_token = NULL;
|
493
493
|
parser_state->_closed_body_tag = false;
|
494
494
|
parser_state->_closed_html_tag = false;
|
@@ -497,6 +497,9 @@ static void parser_state_init(GumboParser* parser) {
|
|
497
497
|
|
498
498
|
static void parser_state_destroy(GumboParser* parser) {
|
499
499
|
GumboParserState* state = parser->_parser_state;
|
500
|
+
if (state->_fragment_ctx) {
|
501
|
+
destroy_node(parser, state->_fragment_ctx);
|
502
|
+
}
|
500
503
|
gumbo_vector_destroy(parser, &state->_active_formatting_elements);
|
501
504
|
gumbo_vector_destroy(parser, &state->_open_elements);
|
502
505
|
gumbo_vector_destroy(parser, &state->_template_insertion_modes);
|
@@ -508,6 +511,10 @@ static GumboNode* get_document_node(GumboParser* parser) {
|
|
508
511
|
return parser->_output->document;
|
509
512
|
}
|
510
513
|
|
514
|
+
static bool is_fragment_parser(const GumboParser* parser) {
|
515
|
+
return !!parser->_parser_state->_fragment_ctx;
|
516
|
+
}
|
517
|
+
|
511
518
|
// Returns the node at the bottom of the stack of open elements, or NULL if no
|
512
519
|
// elements have been added yet.
|
513
520
|
static GumboNode* get_current_node(GumboParser* parser) {
|
@@ -521,6 +528,14 @@ static GumboNode* get_current_node(GumboParser* parser) {
|
|
521
528
|
return open_elements->data[open_elements->length - 1];
|
522
529
|
}
|
523
530
|
|
531
|
+
static GumboNode* get_adjusted_current_node(GumboParser* parser) {
|
532
|
+
GumboParserState* state = parser->_parser_state;
|
533
|
+
if (state->_open_elements.length == 1 && state->_fragment_ctx) {
|
534
|
+
return state->_fragment_ctx;
|
535
|
+
}
|
536
|
+
return get_current_node(parser);
|
537
|
+
}
|
538
|
+
|
524
539
|
// Returns true if the given needle is in the given array of literal
|
525
540
|
// GumboStringPieces. If exact_match is true, this requires that they match
|
526
541
|
// exactly; otherwise, this performs a prefix match to check if any of the
|
@@ -528,7 +543,7 @@ static GumboNode* get_current_node(GumboParser* parser) {
|
|
528
543
|
// case-insensitive match.
|
529
544
|
static bool is_in_static_list(
|
530
545
|
const char* needle, const GumboStringPiece* haystack, bool exact_match) {
|
531
|
-
for (int i = 0; haystack[i].length > 0; ++i) {
|
546
|
+
for (unsigned int i = 0; haystack[i].length > 0; ++i) {
|
532
547
|
if ((exact_match && !strcmp(needle, haystack[i].data)) ||
|
533
548
|
(!exact_match && !strcasecmp(needle, haystack[i].data))) {
|
534
549
|
return true;
|
@@ -547,15 +562,36 @@ static void set_insertion_mode(GumboParser* parser, GumboInsertionMode mode) {
|
|
547
562
|
// indicate that there is no appropriate insertion mode, and the loop should
|
548
563
|
// continue.
|
549
564
|
static GumboInsertionMode get_appropriate_insertion_mode(
|
550
|
-
const
|
551
|
-
|
565
|
+
const GumboParser* parser, int index) {
|
566
|
+
const GumboVector* open_elements = &parser->_parser_state->_open_elements;
|
567
|
+
const GumboNode* node = open_elements->data[index];
|
568
|
+
const bool is_last = index == 0;
|
569
|
+
|
570
|
+
if (is_last && is_fragment_parser(parser)) {
|
571
|
+
node = parser->_parser_state->_fragment_ctx;
|
572
|
+
}
|
573
|
+
|
574
|
+
assert(node->type == GUMBO_NODE_ELEMENT || node->type == GUMBO_NODE_TEMPLATE);
|
552
575
|
switch (node->v.element.tag) {
|
553
|
-
case GUMBO_TAG_SELECT:
|
576
|
+
case GUMBO_TAG_SELECT: {
|
577
|
+
if (is_last) {
|
578
|
+
return GUMBO_INSERTION_MODE_IN_SELECT;
|
579
|
+
}
|
580
|
+
for (int i = index; i > 0; --i) {
|
581
|
+
const GumboNode* ancestor = open_elements->data[i];
|
582
|
+
if (node_html_tag_is(ancestor, GUMBO_TAG_TEMPLATE)) {
|
583
|
+
return GUMBO_INSERTION_MODE_IN_SELECT;
|
584
|
+
}
|
585
|
+
if (node_html_tag_is(ancestor, GUMBO_TAG_TABLE)) {
|
586
|
+
return GUMBO_INSERTION_MODE_IN_SELECT_IN_TABLE;
|
587
|
+
}
|
588
|
+
}
|
554
589
|
return GUMBO_INSERTION_MODE_IN_SELECT;
|
590
|
+
}
|
555
591
|
case GUMBO_TAG_TD:
|
556
592
|
case GUMBO_TAG_TH:
|
557
|
-
|
558
|
-
|
593
|
+
if (!is_last) return GUMBO_INSERTION_MODE_IN_CELL;
|
594
|
+
break;
|
559
595
|
case GUMBO_TAG_TR:
|
560
596
|
return GUMBO_INSERTION_MODE_IN_ROW;
|
561
597
|
case GUMBO_TAG_TBODY:
|
@@ -568,25 +604,30 @@ static GumboInsertionMode get_appropriate_insertion_mode(
|
|
568
604
|
return GUMBO_INSERTION_MODE_IN_COLUMN_GROUP;
|
569
605
|
case GUMBO_TAG_TABLE:
|
570
606
|
return GUMBO_INSERTION_MODE_IN_TABLE;
|
607
|
+
case GUMBO_TAG_TEMPLATE:
|
608
|
+
return get_current_template_insertion_mode(parser);
|
571
609
|
case GUMBO_TAG_HEAD:
|
610
|
+
if (!is_last) return GUMBO_INSERTION_MODE_IN_HEAD;
|
611
|
+
break;
|
572
612
|
case GUMBO_TAG_BODY:
|
573
613
|
return GUMBO_INSERTION_MODE_IN_BODY;
|
574
614
|
case GUMBO_TAG_FRAMESET:
|
575
615
|
return GUMBO_INSERTION_MODE_IN_FRAMESET;
|
576
616
|
case GUMBO_TAG_HTML:
|
577
|
-
return
|
617
|
+
return parser->_parser_state->_head_element
|
618
|
+
? GUMBO_INSERTION_MODE_AFTER_HEAD
|
619
|
+
: GUMBO_INSERTION_MODE_BEFORE_HEAD;
|
578
620
|
default:
|
579
|
-
|
580
|
-
GUMBO_INSERTION_MODE_IN_BODY : GUMBO_INSERTION_MODE_INITIAL;
|
621
|
+
break;
|
581
622
|
}
|
623
|
+
return is_last ? GUMBO_INSERTION_MODE_IN_BODY : GUMBO_INSERTION_MODE_INITIAL;
|
582
624
|
}
|
583
625
|
|
584
626
|
// This performs the actual "reset the insertion mode" loop.
|
585
627
|
static void reset_insertion_mode_appropriately(GumboParser* parser) {
|
586
628
|
const GumboVector* open_elements = &parser->_parser_state->_open_elements;
|
587
|
-
for (int i = open_elements->length; --i >= 0;
|
588
|
-
GumboInsertionMode mode =
|
589
|
-
get_appropriate_insertion_mode(open_elements->data[i], i == 0);
|
629
|
+
for (int i = open_elements->length; --i >= 0;) {
|
630
|
+
GumboInsertionMode mode = get_appropriate_insertion_mode(parser, i);
|
590
631
|
if (mode != GUMBO_INSERTION_MODE_INITIAL) {
|
591
632
|
set_insertion_mode(parser, mode);
|
592
633
|
return;
|
@@ -597,7 +638,8 @@ static void reset_insertion_mode_appropriately(GumboParser* parser) {
|
|
597
638
|
assert(0);
|
598
639
|
}
|
599
640
|
|
600
|
-
static GumboError* parser_add_parse_error(
|
641
|
+
static GumboError* parser_add_parse_error(
|
642
|
+
GumboParser* parser, const GumboToken* token) {
|
601
643
|
gumbo_debug("Adding parse error.\n");
|
602
644
|
GumboError* error = gumbo_add_error(parser);
|
603
645
|
if (!error) {
|
@@ -616,13 +658,14 @@ static GumboError* parser_add_parse_error(GumboParser* parser, const GumboToken*
|
|
616
658
|
}
|
617
659
|
GumboParserState* state = parser->_parser_state;
|
618
660
|
extra_data->parser_state = state->_insertion_mode;
|
619
|
-
gumbo_vector_init(
|
620
|
-
|
621
|
-
for (int i = 0; i < state->_open_elements.length; ++i) {
|
661
|
+
gumbo_vector_init(
|
662
|
+
parser, state->_open_elements.length, &extra_data->tag_stack);
|
663
|
+
for (unsigned int i = 0; i < state->_open_elements.length; ++i) {
|
622
664
|
const GumboNode* node = state->_open_elements.data[i];
|
623
|
-
assert(
|
624
|
-
|
625
|
-
|
665
|
+
assert(
|
666
|
+
node->type == GUMBO_NODE_ELEMENT || node->type == GUMBO_NODE_TEMPLATE);
|
667
|
+
gumbo_vector_add(
|
668
|
+
parser, (void*) node->v.element.tag, &extra_data->tag_stack);
|
626
669
|
}
|
627
670
|
return error;
|
628
671
|
}
|
@@ -631,13 +674,8 @@ static GumboError* parser_add_parse_error(GumboParser* parser, const GumboToken*
|
|
631
674
|
// by is_start) with one of the tag types in the varargs list. Terminate the
|
632
675
|
// list with GUMBO_TAG_LAST; this functions as a sentinel since no portion of
|
633
676
|
// the spec references tags that are not in the spec.
|
634
|
-
|
635
|
-
|
636
|
-
// way so it's easy to verify the code against the spec), but it may be worth
|
637
|
-
// coming up with a notion of a "tag set" that includes a list of tags, and
|
638
|
-
// using that in many places. It'd probably also help performance, but I want
|
639
|
-
// to profile before optimizing.
|
640
|
-
static bool tag_in(const GumboToken* token, bool is_start, ...) {
|
677
|
+
static bool tag_in(
|
678
|
+
const GumboToken* token, bool is_start, const gumbo_tagset tags) {
|
641
679
|
GumboTag token_tag;
|
642
680
|
if (is_start && token->type == GUMBO_TOKEN_START_TAG) {
|
643
681
|
token_tag = token->v.start_tag.tag;
|
@@ -646,19 +684,7 @@ static bool tag_in(const GumboToken* token, bool is_start, ...) {
|
|
646
684
|
} else {
|
647
685
|
return false;
|
648
686
|
}
|
649
|
-
|
650
|
-
va_list tags;
|
651
|
-
va_start(tags, is_start);
|
652
|
-
bool result = false;
|
653
|
-
for (GumboTag tag = va_arg(tags, GumboTag); tag != GUMBO_TAG_LAST;
|
654
|
-
tag = va_arg(tags, GumboTag)) {
|
655
|
-
if (tag == token_tag) {
|
656
|
-
result = true;
|
657
|
-
break;
|
658
|
-
}
|
659
|
-
}
|
660
|
-
va_end(tags);
|
661
|
-
return result;
|
687
|
+
return (token_tag < GUMBO_TAG_LAST && tags[(int) token_tag] != 0);
|
662
688
|
}
|
663
689
|
|
664
690
|
// Like tag_in, but for the single-tag case.
|
@@ -673,50 +699,125 @@ static bool tag_is(const GumboToken* token, bool is_start, GumboTag tag) {
|
|
673
699
|
}
|
674
700
|
|
675
701
|
// Like tag_in, but checks for the tag of a node, rather than a token.
|
676
|
-
static bool
|
702
|
+
static bool node_tag_in_set(const GumboNode* node, const gumbo_tagset tags) {
|
677
703
|
assert(node != NULL);
|
678
|
-
if (node->type != GUMBO_NODE_ELEMENT) {
|
704
|
+
if (node->type != GUMBO_NODE_ELEMENT && node->type != GUMBO_NODE_TEMPLATE) {
|
679
705
|
return false;
|
680
706
|
}
|
681
|
-
|
682
|
-
|
683
|
-
va_list tags;
|
684
|
-
va_start(tags, node);
|
685
|
-
bool result = false;
|
686
|
-
for (GumboTag tag = va_arg(tags, GumboTag); tag != GUMBO_TAG_LAST;
|
687
|
-
tag = va_arg(tags, GumboTag)) {
|
688
|
-
assert(tag <= GUMBO_TAG_LAST);
|
689
|
-
if (tag == node_tag) {
|
690
|
-
result = true;
|
691
|
-
break;
|
692
|
-
}
|
693
|
-
}
|
694
|
-
va_end(tags);
|
695
|
-
return result;
|
707
|
+
return TAGSET_INCLUDES(
|
708
|
+
tags, node->v.element.tag_namespace, node->v.element.tag);
|
696
709
|
}
|
697
710
|
|
698
711
|
// Like node_tag_in, but for the single-tag case.
|
699
|
-
static bool
|
700
|
-
|
712
|
+
static bool node_qualified_tag_is(
|
713
|
+
const GumboNode* node, GumboNamespaceEnum ns, GumboTag tag) {
|
714
|
+
assert(node);
|
715
|
+
return (node->type == GUMBO_NODE_ELEMENT ||
|
716
|
+
node->type == GUMBO_NODE_TEMPLATE) &&
|
717
|
+
node->v.element.tag == tag && node->v.element.tag_namespace == ns;
|
718
|
+
}
|
719
|
+
|
720
|
+
// Like node_tag_in, but for the single-tag case in the HTML namespace
|
721
|
+
static bool node_html_tag_is(const GumboNode* node, GumboTag tag) {
|
722
|
+
return node_qualified_tag_is(node, GUMBO_NAMESPACE_HTML, tag);
|
723
|
+
}
|
724
|
+
|
725
|
+
static void push_template_insertion_mode(
|
726
|
+
GumboParser* parser, GumboInsertionMode mode) {
|
727
|
+
gumbo_vector_add(
|
728
|
+
parser, (void*) mode, &parser->_parser_state->_template_insertion_modes);
|
729
|
+
}
|
730
|
+
|
731
|
+
static void pop_template_insertion_mode(GumboParser* parser) {
|
732
|
+
gumbo_vector_pop(parser, &parser->_parser_state->_template_insertion_modes);
|
733
|
+
}
|
734
|
+
|
735
|
+
// Returns the current template insertion mode. If the stack of template
|
736
|
+
// insertion modes is empty, this returns GUMBO_INSERTION_MODE_INITIAL.
|
737
|
+
static GumboInsertionMode get_current_template_insertion_mode(
|
738
|
+
const GumboParser* parser) {
|
739
|
+
GumboVector* template_insertion_modes =
|
740
|
+
&parser->_parser_state->_template_insertion_modes;
|
741
|
+
if (template_insertion_modes->length == 0) {
|
742
|
+
return GUMBO_INSERTION_MODE_INITIAL;
|
743
|
+
}
|
744
|
+
return (GumboInsertionMode)
|
745
|
+
template_insertion_modes->data[(template_insertion_modes->length - 1)];
|
701
746
|
}
|
702
747
|
|
703
748
|
// http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#mathml-text-integration-point
|
704
749
|
static bool is_mathml_integration_point(const GumboNode* node) {
|
705
|
-
return
|
706
|
-
|
707
|
-
|
750
|
+
return node_tag_in_set(
|
751
|
+
node, (gumbo_tagset){TAG_MATHML(MI), TAG_MATHML(MO), TAG_MATHML(MN),
|
752
|
+
TAG_MATHML(MS), TAG_MATHML(MTEXT)});
|
708
753
|
}
|
709
754
|
|
710
755
|
// http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#html-integration-point
|
711
756
|
static bool is_html_integration_point(const GumboNode* node) {
|
712
|
-
return (
|
713
|
-
|
714
|
-
|
715
|
-
|
716
|
-
|
717
|
-
|
718
|
-
|
719
|
-
|
757
|
+
return node_tag_in_set(node, (gumbo_tagset){TAG_SVG(FOREIGNOBJECT),
|
758
|
+
TAG_SVG(DESC), TAG_SVG(TITLE)}) ||
|
759
|
+
(node_qualified_tag_is(
|
760
|
+
node, GUMBO_NAMESPACE_MATHML, GUMBO_TAG_ANNOTATION_XML) &&
|
761
|
+
(attribute_matches(
|
762
|
+
&node->v.element.attributes, "encoding", "text/html") ||
|
763
|
+
attribute_matches(&node->v.element.attributes, "encoding",
|
764
|
+
"application/xhtml+xml")));
|
765
|
+
}
|
766
|
+
|
767
|
+
// This represents a place to insert a node, consisting of a target parent and a
|
768
|
+
// child index within that parent. If the node should be inserted at the end of
|
769
|
+
// the parent's child, index will be -1.
|
770
|
+
typedef struct {
|
771
|
+
GumboNode* target;
|
772
|
+
int index;
|
773
|
+
} InsertionLocation;
|
774
|
+
|
775
|
+
InsertionLocation get_appropriate_insertion_location(
|
776
|
+
GumboParser* parser, GumboNode* override_target) {
|
777
|
+
InsertionLocation retval = {override_target, -1};
|
778
|
+
if (retval.target == NULL) {
|
779
|
+
// No override target; default to the current node, but special-case the
|
780
|
+
// root node since get_current_node() assumes the stack of open elements is
|
781
|
+
// non-empty.
|
782
|
+
retval.target = parser->_output->root != NULL ? get_current_node(parser)
|
783
|
+
: get_document_node(parser);
|
784
|
+
}
|
785
|
+
if (!parser->_parser_state->_foster_parent_insertions ||
|
786
|
+
!node_tag_in_set(retval.target, (gumbo_tagset){TAG(TABLE), TAG(TBODY),
|
787
|
+
TAG(TFOOT), TAG(THEAD), TAG(TR)})) {
|
788
|
+
return retval;
|
789
|
+
}
|
790
|
+
|
791
|
+
// Foster-parenting case.
|
792
|
+
int last_template_index = -1;
|
793
|
+
int last_table_index = -1;
|
794
|
+
GumboVector* open_elements = &parser->_parser_state->_open_elements;
|
795
|
+
for (unsigned int i = 0; i < open_elements->length; ++i) {
|
796
|
+
if (node_html_tag_is(open_elements->data[i], GUMBO_TAG_TEMPLATE)) {
|
797
|
+
last_template_index = i;
|
798
|
+
}
|
799
|
+
if (node_html_tag_is(open_elements->data[i], GUMBO_TAG_TABLE)) {
|
800
|
+
last_table_index = i;
|
801
|
+
}
|
802
|
+
}
|
803
|
+
if (last_template_index != -1 &&
|
804
|
+
(last_table_index == -1 || last_template_index > last_table_index)) {
|
805
|
+
retval.target = open_elements->data[last_template_index];
|
806
|
+
return retval;
|
807
|
+
}
|
808
|
+
if (last_table_index == -1) {
|
809
|
+
retval.target = open_elements->data[0];
|
810
|
+
return retval;
|
811
|
+
}
|
812
|
+
GumboNode* last_table = open_elements->data[last_table_index];
|
813
|
+
if (last_table->parent != NULL) {
|
814
|
+
retval.target = last_table->parent;
|
815
|
+
retval.index = last_table->index_within_parent;
|
816
|
+
return retval;
|
817
|
+
}
|
818
|
+
|
819
|
+
retval.target = open_elements->data[last_table_index - 1];
|
820
|
+
return retval;
|
720
821
|
}
|
721
822
|
|
722
823
|
// Appends a node to the end of its parent, setting the "parent" and
|
@@ -726,7 +827,8 @@ static void append_node(
|
|
726
827
|
assert(node->parent == NULL);
|
727
828
|
assert(node->index_within_parent == -1);
|
728
829
|
GumboVector* children;
|
729
|
-
if (parent->type == GUMBO_NODE_ELEMENT
|
830
|
+
if (parent->type == GUMBO_NODE_ELEMENT ||
|
831
|
+
parent->type == GUMBO_NODE_TEMPLATE) {
|
730
832
|
children = &parent->v.element.children;
|
731
833
|
} else {
|
732
834
|
assert(parent->type == GUMBO_NODE_DOCUMENT);
|
@@ -738,64 +840,41 @@ static void append_node(
|
|
738
840
|
assert(node->index_within_parent < children->length);
|
739
841
|
}
|
740
842
|
|
741
|
-
// Inserts a node at the specified
|
843
|
+
// Inserts a node at the specified InsertionLocation, updating the
|
742
844
|
// "parent" and "index_within_parent" fields of it and all its siblings.
|
845
|
+
// If the index of the location is -1, this calls append_node.
|
743
846
|
static void insert_node(
|
744
|
-
GumboParser* parser, GumboNode*
|
847
|
+
GumboParser* parser, GumboNode* node, InsertionLocation location) {
|
745
848
|
assert(node->parent == NULL);
|
746
849
|
assert(node->index_within_parent == -1);
|
747
|
-
|
748
|
-
|
749
|
-
|
750
|
-
|
751
|
-
|
752
|
-
|
753
|
-
|
754
|
-
|
755
|
-
|
756
|
-
|
757
|
-
|
758
|
-
|
759
|
-
|
760
|
-
}
|
850
|
+
GumboNode* parent = location.target;
|
851
|
+
int index = location.index;
|
852
|
+
if (index != -1) {
|
853
|
+
GumboVector* children = NULL;
|
854
|
+
if (parent->type == GUMBO_NODE_ELEMENT ||
|
855
|
+
parent->type == GUMBO_NODE_TEMPLATE) {
|
856
|
+
children = &parent->v.element.children;
|
857
|
+
} else if (parent->type == GUMBO_NODE_DOCUMENT) {
|
858
|
+
children = &parent->v.document.children;
|
859
|
+
assert(children->length == 0);
|
860
|
+
} else {
|
861
|
+
assert(0);
|
862
|
+
}
|
761
863
|
|
762
|
-
|
763
|
-
|
764
|
-
|
765
|
-
|
766
|
-
|
767
|
-
|
768
|
-
|
769
|
-
|
770
|
-
|
771
|
-
|
772
|
-
GumboNode* table_element = open_elements->data[i];
|
773
|
-
if (node_tag_is(table_element, GUMBO_TAG_TABLE)) {
|
774
|
-
foster_parent_element = table_element->parent;
|
775
|
-
if (!foster_parent_element ||
|
776
|
-
foster_parent_element->type != GUMBO_NODE_ELEMENT) {
|
777
|
-
// Table has no parent; spec says it's possible if a script manipulated
|
778
|
-
// the DOM, although I don't think we have to worry about this case.
|
779
|
-
gumbo_debug("Table has no parent.\n");
|
780
|
-
foster_parent_element = open_elements->data[i - 1];
|
781
|
-
break;
|
782
|
-
}
|
783
|
-
assert(foster_parent_element->type == GUMBO_NODE_ELEMENT);
|
784
|
-
gumbo_debug("Found enclosing table (%x) at %d; parent=%s, index=%d.\n",
|
785
|
-
table_element, i, gumbo_normalized_tagname(
|
786
|
-
foster_parent_element->v.element.tag),
|
787
|
-
table_element->index_within_parent);
|
788
|
-
assert(foster_parent_element->v.element.children.data[
|
789
|
-
table_element->index_within_parent] == table_element);
|
790
|
-
insert_node(parser, foster_parent_element,
|
791
|
-
table_element->index_within_parent, node);
|
792
|
-
return;
|
864
|
+
assert(index >= 0);
|
865
|
+
assert((unsigned int) index < children->length);
|
866
|
+
node->parent = parent;
|
867
|
+
node->index_within_parent = index;
|
868
|
+
gumbo_vector_insert_at(parser, (void*) node, index, children);
|
869
|
+
assert(node->index_within_parent < children->length);
|
870
|
+
for (unsigned int i = index + 1; i < children->length; ++i) {
|
871
|
+
GumboNode* sibling = children->data[i];
|
872
|
+
sibling->index_within_parent = i;
|
873
|
+
assert(sibling->index_within_parent < children->length);
|
793
874
|
}
|
875
|
+
} else {
|
876
|
+
append_node(parser, parent, node);
|
794
877
|
}
|
795
|
-
if (node->type == GUMBO_NODE_ELEMENT) {
|
796
|
-
gumbo_vector_add(parser, (void*) node, open_elements);
|
797
|
-
}
|
798
|
-
append_node(parser, foster_parent_element, node);
|
799
878
|
}
|
800
879
|
|
801
880
|
static void maybe_flush_text_node_buffer(GumboParser* parser) {
|
@@ -806,30 +885,31 @@ static void maybe_flush_text_node_buffer(GumboParser* parser) {
|
|
806
885
|
}
|
807
886
|
|
808
887
|
assert(buffer_state->_type == GUMBO_NODE_WHITESPACE ||
|
809
|
-
buffer_state->_type == GUMBO_NODE_TEXT
|
888
|
+
buffer_state->_type == GUMBO_NODE_TEXT ||
|
889
|
+
buffer_state->_type == GUMBO_NODE_CDATA);
|
810
890
|
GumboNode* text_node = create_node(parser, buffer_state->_type);
|
811
891
|
GumboText* text_node_data = &text_node->v.text;
|
812
|
-
text_node_data->text =
|
813
|
-
parser, &buffer_state->_buffer);
|
892
|
+
text_node_data->text =
|
893
|
+
gumbo_string_buffer_to_string(parser, &buffer_state->_buffer);
|
814
894
|
text_node_data->original_text.data = buffer_state->_start_original_text;
|
815
895
|
text_node_data->original_text.length =
|
816
896
|
state->_current_token->original_text.data -
|
817
897
|
buffer_state->_start_original_text;
|
818
898
|
text_node_data->start_pos = buffer_state->_start_position;
|
819
|
-
|
820
|
-
|
821
|
-
|
822
|
-
|
899
|
+
|
900
|
+
gumbo_debug("Flushing text node buffer of %.*s.\n",
|
901
|
+
(int) buffer_state->_buffer.length, buffer_state->_buffer.data);
|
902
|
+
|
903
|
+
InsertionLocation location = get_appropriate_insertion_location(parser, NULL);
|
904
|
+
if (location.target->type == GUMBO_NODE_DOCUMENT) {
|
905
|
+
// The DOM does not allow Document nodes to have Text children, so per the
|
906
|
+
// spec, they are dropped on the floor.
|
907
|
+
destroy_node(parser, text_node);
|
823
908
|
} else {
|
824
|
-
|
825
|
-
parser, parser->_output->root ?
|
826
|
-
get_current_node(parser) : parser->_output->document, text_node);
|
909
|
+
insert_node(parser, text_node, location);
|
827
910
|
}
|
828
|
-
gumbo_debug("Flushing text node buffer of %.*s.\n",
|
829
|
-
(int) buffer_state->_buffer.length, buffer_state->_buffer.data);
|
830
911
|
|
831
|
-
|
832
|
-
gumbo_string_buffer_init(parser, &buffer_state->_buffer);
|
912
|
+
gumbo_string_buffer_clear(parser, &buffer_state->_buffer);
|
833
913
|
buffer_state->_type = GUMBO_NODE_WHITESPACE;
|
834
914
|
assert(buffer_state->_buffer.length == 0);
|
835
915
|
}
|
@@ -837,18 +917,17 @@ static void maybe_flush_text_node_buffer(GumboParser* parser) {
|
|
837
917
|
static void record_end_of_element(
|
838
918
|
GumboToken* current_token, GumboElement* element) {
|
839
919
|
element->end_pos = current_token->position;
|
840
|
-
element->original_end_tag =
|
841
|
-
|
842
|
-
|
920
|
+
element->original_end_tag = current_token->type == GUMBO_TOKEN_END_TAG
|
921
|
+
? current_token->original_text
|
922
|
+
: kGumboEmptyString;
|
843
923
|
}
|
844
924
|
|
845
925
|
static GumboNode* pop_current_node(GumboParser* parser) {
|
846
926
|
GumboParserState* state = parser->_parser_state;
|
847
927
|
maybe_flush_text_node_buffer(parser);
|
848
928
|
if (state->_open_elements.length > 0) {
|
849
|
-
assert(
|
850
|
-
gumbo_debug(
|
851
|
-
"Popping %s node.\n",
|
929
|
+
assert(node_html_tag_is(state->_open_elements.data[0], GUMBO_TAG_HTML));
|
930
|
+
gumbo_debug("Popping %s node.\n",
|
852
931
|
gumbo_normalized_tagname(get_current_node(parser)->v.element.tag));
|
853
932
|
}
|
854
933
|
GumboNode* current_node = gumbo_vector_pop(parser, &state->_open_elements);
|
@@ -856,13 +935,16 @@ static GumboNode* pop_current_node(GumboParser* parser) {
|
|
856
935
|
assert(state->_open_elements.length == 0);
|
857
936
|
return NULL;
|
858
937
|
}
|
859
|
-
assert(current_node->type == GUMBO_NODE_ELEMENT
|
938
|
+
assert(current_node->type == GUMBO_NODE_ELEMENT ||
|
939
|
+
current_node->type == GUMBO_NODE_TEMPLATE);
|
860
940
|
bool is_closed_body_or_html_tag =
|
861
|
-
(
|
862
|
-
|
941
|
+
(node_html_tag_is(current_node, GUMBO_TAG_BODY) &&
|
942
|
+
state->_closed_body_tag) ||
|
943
|
+
(node_html_tag_is(current_node, GUMBO_TAG_HTML) &&
|
944
|
+
state->_closed_html_tag);
|
863
945
|
if ((state->_current_token->type != GUMBO_TOKEN_END_TAG ||
|
864
|
-
|
865
|
-
|
946
|
+
!node_html_tag_is(current_node, state->_current_token->v.end_tag)) &&
|
947
|
+
!is_closed_body_or_html_tag) {
|
866
948
|
current_node->parse_flags |= GUMBO_INSERTION_IMPLICIT_END_TAG;
|
867
949
|
}
|
868
950
|
if (!is_closed_body_or_html_tag) {
|
@@ -885,25 +967,25 @@ static void append_comment_node(
|
|
885
967
|
|
886
968
|
// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#clear-the-stack-back-to-a-table-row-context
|
887
969
|
static void clear_stack_to_table_row_context(GumboParser* parser) {
|
888
|
-
while (!
|
889
|
-
|
970
|
+
while (!node_tag_in_set(get_current_node(parser),
|
971
|
+
(gumbo_tagset){TAG(HTML), TAG(TR), TAG(TEMPLATE)})) {
|
890
972
|
pop_current_node(parser);
|
891
973
|
}
|
892
974
|
}
|
893
975
|
|
894
976
|
// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#clear-the-stack-back-to-a-table-context
|
895
977
|
static void clear_stack_to_table_context(GumboParser* parser) {
|
896
|
-
while (!
|
897
|
-
|
978
|
+
while (!node_tag_in_set(get_current_node(parser),
|
979
|
+
(gumbo_tagset){TAG(HTML), TAG(TABLE), TAG(TEMPLATE)})) {
|
898
980
|
pop_current_node(parser);
|
899
981
|
}
|
900
982
|
}
|
901
983
|
|
902
984
|
// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#clear-the-stack-back-to-a-table-body-context
|
903
985
|
void clear_stack_to_table_body_context(GumboParser* parser) {
|
904
|
-
while (!
|
905
|
-
|
906
|
-
|
986
|
+
while (!node_tag_in_set(get_current_node(parser),
|
987
|
+
(gumbo_tagset){TAG(HTML), TAG(TBODY), TAG(TFOOT), TAG(THEAD),
|
988
|
+
TAG(TEMPLATE)})) {
|
907
989
|
pop_current_node(parser);
|
908
990
|
}
|
909
991
|
}
|
@@ -918,7 +1000,9 @@ static GumboNode* create_element(GumboParser* parser, GumboTag tag) {
|
|
918
1000
|
element->tag_namespace = GUMBO_NAMESPACE_HTML;
|
919
1001
|
element->original_tag = kGumboEmptyString;
|
920
1002
|
element->original_end_tag = kGumboEmptyString;
|
921
|
-
element->start_pos = parser->_parser_state->_current_token
|
1003
|
+
element->start_pos = (parser->_parser_state->_current_token)
|
1004
|
+
? parser->_parser_state->_current_token->position
|
1005
|
+
: kGumboEmptySourcePosition;
|
922
1006
|
element->end_pos = kGumboEmptySourcePosition;
|
923
1007
|
return node;
|
924
1008
|
}
|
@@ -929,7 +1013,12 @@ static GumboNode* create_element_from_token(
|
|
929
1013
|
assert(token->type == GUMBO_TOKEN_START_TAG);
|
930
1014
|
GumboTokenStartTag* start_tag = &token->v.start_tag;
|
931
1015
|
|
932
|
-
|
1016
|
+
GumboNodeType type = (tag_namespace == GUMBO_NAMESPACE_HTML &&
|
1017
|
+
start_tag->tag == GUMBO_TAG_TEMPLATE)
|
1018
|
+
? GUMBO_NODE_TEMPLATE
|
1019
|
+
: GUMBO_NODE_ELEMENT;
|
1020
|
+
|
1021
|
+
GumboNode* node = create_node(parser, type);
|
933
1022
|
GumboElement* element = &node->v.element;
|
934
1023
|
gumbo_vector_init(parser, 1, &element->children);
|
935
1024
|
element->attributes = start_tag->attributes;
|
@@ -952,7 +1041,7 @@ static GumboNode* create_element_from_token(
|
|
952
1041
|
|
953
1042
|
// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#insert-an-html-element
|
954
1043
|
static void insert_element(GumboParser* parser, GumboNode* node,
|
955
|
-
|
1044
|
+
bool is_reconstructing_formatting_elements) {
|
956
1045
|
GumboParserState* state = parser->_parser_state;
|
957
1046
|
// NOTE(jdtang): The text node buffer must always be flushed before inserting
|
958
1047
|
// a node, otherwise we're handling nodes in a different order than the spec
|
@@ -966,20 +1055,8 @@ static void insert_element(GumboParser* parser, GumboNode* node,
|
|
966
1055
|
if (!is_reconstructing_formatting_elements) {
|
967
1056
|
maybe_flush_text_node_buffer(parser);
|
968
1057
|
}
|
969
|
-
|
970
|
-
|
971
|
-
GUMBO_TAG_TFOOT, GUMBO_TAG_THEAD, GUMBO_TAG_TR, GUMBO_TAG_LAST)) {
|
972
|
-
foster_parent_element(parser, node);
|
973
|
-
gumbo_vector_add(parser, (void*) node, &state->_open_elements);
|
974
|
-
return;
|
975
|
-
}
|
976
|
-
|
977
|
-
// This is called to insert the root HTML element, but get_current_node
|
978
|
-
// assumes the stack of open elements is non-empty, so we need special
|
979
|
-
// handling for this case.
|
980
|
-
append_node(
|
981
|
-
parser, parser->_output->root ?
|
982
|
-
get_current_node(parser) : parser->_output->document, node);
|
1058
|
+
InsertionLocation location = get_appropriate_insertion_location(parser, NULL);
|
1059
|
+
insert_node(parser, node, location);
|
983
1060
|
gumbo_vector_add(parser, (void*) node, &state->_open_elements);
|
984
1061
|
}
|
985
1062
|
|
@@ -992,7 +1069,7 @@ static GumboNode* insert_element_from_token(
|
|
992
1069
|
create_element_from_token(parser, token, GUMBO_NAMESPACE_HTML);
|
993
1070
|
insert_element(parser, element, false);
|
994
1071
|
gumbo_debug("Inserting <%s> element (@%x) from token.\n",
|
995
|
-
|
1072
|
+
gumbo_normalized_tagname(element->v.element.tag), element);
|
996
1073
|
return element;
|
997
1074
|
}
|
998
1075
|
|
@@ -1005,7 +1082,7 @@ static GumboNode* insert_element_of_tag_type(
|
|
1005
1082
|
element->parse_flags |= GUMBO_INSERTION_BY_PARSER | reason;
|
1006
1083
|
insert_element(parser, element, false);
|
1007
1084
|
gumbo_debug("Inserting %s element (@%x) from tag type.\n",
|
1008
|
-
|
1085
|
+
gumbo_normalized_tagname(tag), element);
|
1009
1086
|
return element;
|
1010
1087
|
}
|
1011
1088
|
|
@@ -1017,16 +1094,14 @@ static GumboNode* insert_foreign_element(
|
|
1017
1094
|
GumboNode* element = create_element_from_token(parser, token, tag_namespace);
|
1018
1095
|
insert_element(parser, element, false);
|
1019
1096
|
if (token_has_attribute(token, "xmlns") &&
|
1020
|
-
!attribute_matches_case_sensitive(
|
1021
|
-
&token->v.start_tag.attributes, "xmlns",
|
1097
|
+
!attribute_matches_case_sensitive(&token->v.start_tag.attributes, "xmlns",
|
1022
1098
|
kLegalXmlns[tag_namespace])) {
|
1023
1099
|
// TODO(jdtang): Since there're multiple possible error codes here, we
|
1024
1100
|
// eventually need reason codes to differentiate them.
|
1025
1101
|
parser_add_parse_error(parser, token);
|
1026
1102
|
}
|
1027
1103
|
if (token_has_attribute(token, "xmlns:xlink") &&
|
1028
|
-
!attribute_matches_case_sensitive(
|
1029
|
-
&token->v.start_tag.attributes,
|
1104
|
+
!attribute_matches_case_sensitive(&token->v.start_tag.attributes,
|
1030
1105
|
"xmlns:xlink", "http://www.w3.org/1999/xlink")) {
|
1031
1106
|
parser_add_parse_error(parser, token);
|
1032
1107
|
}
|
@@ -1035,7 +1110,8 @@ static GumboNode* insert_foreign_element(
|
|
1035
1110
|
|
1036
1111
|
static void insert_text_token(GumboParser* parser, GumboToken* token) {
|
1037
1112
|
assert(token->type == GUMBO_TOKEN_WHITESPACE ||
|
1038
|
-
token->type == GUMBO_TOKEN_CHARACTER
|
1113
|
+
token->type == GUMBO_TOKEN_CHARACTER ||
|
1114
|
+
token->type == GUMBO_TOKEN_NULL || token->type == GUMBO_TOKEN_CDATA);
|
1039
1115
|
TextNodeBufferState* buffer_state = &parser->_parser_state->_text_node;
|
1040
1116
|
if (buffer_state->_buffer.length == 0) {
|
1041
1117
|
// Initialize position fields.
|
@@ -1046,6 +1122,8 @@ static void insert_text_token(GumboParser* parser, GumboToken* token) {
|
|
1046
1122
|
parser, token->v.character, &buffer_state->_buffer);
|
1047
1123
|
if (token->type == GUMBO_TOKEN_CHARACTER) {
|
1048
1124
|
buffer_state->_type = GUMBO_NODE_TEXT;
|
1125
|
+
} else if (token->type == GUMBO_TOKEN_CDATA) {
|
1126
|
+
buffer_state->_type = GUMBO_NODE_CDATA;
|
1049
1127
|
}
|
1050
1128
|
gumbo_debug("Inserting text token '%c'.\n", token->v.character);
|
1051
1129
|
}
|
@@ -1068,12 +1146,12 @@ static void acknowledge_self_closing_tag(GumboParser* parser) {
|
|
1068
1146
|
// elements, and fills in its index if so.
|
1069
1147
|
static bool find_last_anchor_index(GumboParser* parser, int* anchor_index) {
|
1070
1148
|
GumboVector* elements = &parser->_parser_state->_active_formatting_elements;
|
1071
|
-
for (int i = elements->length; --i >= 0;
|
1149
|
+
for (int i = elements->length; --i >= 0;) {
|
1072
1150
|
GumboNode* node = elements->data[i];
|
1073
1151
|
if (node == &kActiveFormattingScopeMarker) {
|
1074
1152
|
return false;
|
1075
1153
|
}
|
1076
|
-
if (
|
1154
|
+
if (node_html_tag_is(node, GUMBO_TAG_A)) {
|
1077
1155
|
*anchor_index = i;
|
1078
1156
|
return true;
|
1079
1157
|
}
|
@@ -1085,23 +1163,21 @@ static bool find_last_anchor_index(GumboParser* parser, int* anchor_index) {
|
|
1085
1163
|
// formatting elements (after the last active scope marker) that have a specific
|
1086
1164
|
// tag. If this is > 0, then earliest_matching_index will be filled in with the
|
1087
1165
|
// index of the first such element.
|
1088
|
-
static int count_formatting_elements_of_tag(
|
1089
|
-
|
1090
|
-
int* earliest_matching_index) {
|
1166
|
+
static int count_formatting_elements_of_tag(GumboParser* parser,
|
1167
|
+
const GumboNode* desired_node, int* earliest_matching_index) {
|
1091
1168
|
const GumboElement* desired_element = &desired_node->v.element;
|
1092
1169
|
GumboVector* elements = &parser->_parser_state->_active_formatting_elements;
|
1093
1170
|
int num_identical_elements = 0;
|
1094
|
-
for (int i = elements->length; --i >= 0;
|
1171
|
+
for (int i = elements->length; --i >= 0;) {
|
1095
1172
|
GumboNode* node = elements->data[i];
|
1096
1173
|
if (node == &kActiveFormattingScopeMarker) {
|
1097
1174
|
break;
|
1098
1175
|
}
|
1099
1176
|
assert(node->type == GUMBO_NODE_ELEMENT);
|
1100
|
-
|
1101
|
-
|
1102
|
-
|
1103
|
-
|
1104
|
-
&desired_element->attributes)) {
|
1177
|
+
if (node_qualified_tag_is(
|
1178
|
+
node, desired_element->tag_namespace, desired_element->tag) &&
|
1179
|
+
all_attributes_match(
|
1180
|
+
&node->v.element.attributes, &desired_element->attributes)) {
|
1105
1181
|
num_identical_elements++;
|
1106
1182
|
*earliest_matching_index = i;
|
1107
1183
|
}
|
@@ -1128,7 +1204,7 @@ static void add_formatting_element(GumboParser* parser, const GumboNode* node) {
|
|
1128
1204
|
// Noah's Ark clause: if there're at least 3, remove the earliest.
|
1129
1205
|
if (num_identical_elements >= 3) {
|
1130
1206
|
gumbo_debug("Noah's ark clause: removing element at %d.\n",
|
1131
|
-
|
1207
|
+
earliest_identical_element);
|
1132
1208
|
gumbo_vector_remove_at(parser, earliest_identical_element, elements);
|
1133
1209
|
}
|
1134
1210
|
|
@@ -1137,7 +1213,7 @@ static void add_formatting_element(GumboParser* parser, const GumboNode* node) {
|
|
1137
1213
|
|
1138
1214
|
static bool is_open_element(GumboParser* parser, const GumboNode* node) {
|
1139
1215
|
GumboVector* open_elements = &parser->_parser_state->_open_elements;
|
1140
|
-
for (int i = 0; i < open_elements->length; ++i) {
|
1216
|
+
for (unsigned int i = 0; i < open_elements->length; ++i) {
|
1141
1217
|
if (open_elements->data[i] == node) {
|
1142
1218
|
return true;
|
1143
1219
|
}
|
@@ -1149,8 +1225,8 @@ static bool is_open_element(GumboParser* parser, const GumboNode* node) {
|
|
1149
1225
|
// clone shares no structure with the original node: all owned strings and
|
1150
1226
|
// values are fresh copies.
|
1151
1227
|
GumboNode* clone_node(
|
1152
|
-
GumboParser* parser,
|
1153
|
-
assert(node->type == GUMBO_NODE_ELEMENT);
|
1228
|
+
GumboParser* parser, GumboNode* node, GumboParseFlags reason) {
|
1229
|
+
assert(node->type == GUMBO_NODE_ELEMENT || node->type == GUMBO_NODE_TEMPLATE);
|
1154
1230
|
GumboNode* new_node = gumbo_parser_allocate(parser, sizeof(GumboNode));
|
1155
1231
|
*new_node = *node;
|
1156
1232
|
new_node->parent = NULL;
|
@@ -1164,7 +1240,7 @@ GumboNode* clone_node(
|
|
1164
1240
|
|
1165
1241
|
const GumboVector* old_attributes = &node->v.element.attributes;
|
1166
1242
|
gumbo_vector_init(parser, old_attributes->length, &element->attributes);
|
1167
|
-
for (int i = 0; i < old_attributes->length; ++i) {
|
1243
|
+
for (unsigned int i = 0; i < old_attributes->length; ++i) {
|
1168
1244
|
const GumboAttribute* old_attr = old_attributes->data[i];
|
1169
1245
|
GumboAttribute* attr =
|
1170
1246
|
gumbo_parser_allocate(parser, sizeof(GumboAttribute));
|
@@ -1188,8 +1264,8 @@ static void reconstruct_active_formatting_elements(GumboParser* parser) {
|
|
1188
1264
|
}
|
1189
1265
|
|
1190
1266
|
// Step 2 & 3
|
1191
|
-
int i = elements->length - 1;
|
1192
|
-
|
1267
|
+
unsigned int i = elements->length - 1;
|
1268
|
+
GumboNode* element = elements->data[i];
|
1193
1269
|
if (element == &kActiveFormattingScopeMarker ||
|
1194
1270
|
is_open_element(parser, element)) {
|
1195
1271
|
return;
|
@@ -1199,7 +1275,7 @@ static void reconstruct_active_formatting_elements(GumboParser* parser) {
|
|
1199
1275
|
do {
|
1200
1276
|
if (i == 0) {
|
1201
1277
|
// Step 4
|
1202
|
-
i = -1;
|
1278
|
+
i = -1; // Incremented to 0 below.
|
1203
1279
|
break;
|
1204
1280
|
}
|
1205
1281
|
// Step 5
|
@@ -1209,9 +1285,8 @@ static void reconstruct_active_formatting_elements(GumboParser* parser) {
|
|
1209
1285
|
|
1210
1286
|
++i;
|
1211
1287
|
gumbo_debug("Reconstructing elements from %d on %s parent.\n", i,
|
1212
|
-
|
1213
|
-
|
1214
|
-
for(; i < elements->length; ++i) {
|
1288
|
+
gumbo_normalized_tagname(get_current_node(parser)->v.element.tag));
|
1289
|
+
for (; i < elements->length; ++i) {
|
1215
1290
|
// Step 7 & 8.
|
1216
1291
|
assert(elements->length > 0);
|
1217
1292
|
assert(i < elements->length);
|
@@ -1220,11 +1295,16 @@ static void reconstruct_active_formatting_elements(GumboParser* parser) {
|
|
1220
1295
|
GumboNode* clone = clone_node(
|
1221
1296
|
parser, element, GUMBO_INSERTION_RECONSTRUCTED_FORMATTING_ELEMENT);
|
1222
1297
|
// Step 9.
|
1223
|
-
|
1298
|
+
InsertionLocation location =
|
1299
|
+
get_appropriate_insertion_location(parser, NULL);
|
1300
|
+
insert_node(parser, clone, location);
|
1301
|
+
gumbo_vector_add(
|
1302
|
+
parser, (void*) clone, &parser->_parser_state->_open_elements);
|
1303
|
+
|
1224
1304
|
// Step 10.
|
1225
1305
|
elements->data[i] = clone;
|
1226
1306
|
gumbo_debug("Reconstructed %s element at %d.\n",
|
1227
|
-
|
1307
|
+
gumbo_normalized_tagname(clone->v.element.tag), i);
|
1228
1308
|
}
|
1229
1309
|
}
|
1230
1310
|
|
@@ -1235,32 +1315,30 @@ static void clear_active_formatting_elements(GumboParser* parser) {
|
|
1235
1315
|
do {
|
1236
1316
|
node = gumbo_vector_pop(parser, elements);
|
1237
1317
|
++num_elements_cleared;
|
1238
|
-
} while(node && node != &kActiveFormattingScopeMarker);
|
1318
|
+
} while (node && node != &kActiveFormattingScopeMarker);
|
1239
1319
|
gumbo_debug("Cleared %d elements from active formatting list.\n",
|
1240
|
-
|
1320
|
+
num_elements_cleared);
|
1241
1321
|
}
|
1242
1322
|
|
1243
1323
|
// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#the-initial-insertion-mode
|
1244
1324
|
static GumboQuirksModeEnum compute_quirks_mode(
|
1245
1325
|
const GumboTokenDocType* doctype) {
|
1246
|
-
if (doctype->force_quirks ||
|
1247
|
-
|
1248
|
-
|
1249
|
-
|
1250
|
-
|
1251
|
-
|
1252
|
-
|
1253
|
-
kQuirksModeSystemIdExactMatches, true) ||
|
1326
|
+
if (doctype->force_quirks || strcmp(doctype->name, kDoctypeHtml.data) ||
|
1327
|
+
is_in_static_list(
|
1328
|
+
doctype->public_identifier, kQuirksModePublicIdPrefixes, false) ||
|
1329
|
+
is_in_static_list(
|
1330
|
+
doctype->public_identifier, kQuirksModePublicIdExactMatches, true) ||
|
1331
|
+
is_in_static_list(
|
1332
|
+
doctype->system_identifier, kQuirksModeSystemIdExactMatches, true) ||
|
1254
1333
|
(is_in_static_list(doctype->public_identifier,
|
1255
|
-
|
1256
|
-
|
1334
|
+
kLimitedQuirksRequiresSystemIdPublicIdPrefixes, false) &&
|
1335
|
+
!doctype->has_system_identifier)) {
|
1257
1336
|
return GUMBO_DOCTYPE_QUIRKS;
|
1258
|
-
} else if (
|
1259
|
-
|
1260
|
-
|
1261
|
-
|
1262
|
-
|
1263
|
-
&& doctype->has_system_identifier)) {
|
1337
|
+
} else if (is_in_static_list(doctype->public_identifier,
|
1338
|
+
kLimitedQuirksPublicIdPrefixes, false) ||
|
1339
|
+
(is_in_static_list(doctype->public_identifier,
|
1340
|
+
kLimitedQuirksRequiresSystemIdPublicIdPrefixes, false) &&
|
1341
|
+
doctype->has_system_identifier)) {
|
1264
1342
|
return GUMBO_DOCTYPE_LIMITED_QUIRKS;
|
1265
1343
|
}
|
1266
1344
|
return GUMBO_DOCTYPE_NO_QUIRKS;
|
@@ -1269,83 +1347,50 @@ static GumboQuirksModeEnum compute_quirks_mode(
|
|
1269
1347
|
// The following functions are all defined by the "has an element in __ scope"
|
1270
1348
|
// sections of the HTML5 spec:
|
1271
1349
|
// http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#has-an-element-in-the-specific-scope
|
1272
|
-
// The basic idea behind them is that they check for an element of the given
|
1273
|
-
// name, contained within a scope formed by a set of other
|
1274
|
-
// example, "has an element in list scope" looks for an element of
|
1275
|
-
// within the nearest enclosing <ol> or <ul>, along
|
1276
|
-
// element types that serve to "firewall" their content
|
1277
|
-
// document.
|
1278
|
-
|
1279
|
-
|
1350
|
+
// The basic idea behind them is that they check for an element of the given
|
1351
|
+
// qualified name, contained within a scope formed by a set of other qualified
|
1352
|
+
// names. For example, "has an element in list scope" looks for an element of
|
1353
|
+
// the given qualified name within the nearest enclosing <ol> or <ul>, along
|
1354
|
+
// with a bunch of generic element types that serve to "firewall" their content
|
1355
|
+
// from the rest of the document. Note that because of the way the spec is
|
1356
|
+
// written,
|
1357
|
+
// all elements are expected to be in the HTML namespace
|
1358
|
+
static bool has_an_element_in_specific_scope(GumboParser* parser,
|
1359
|
+
int expected_size, const GumboTag* expected, bool negate,
|
1360
|
+
const gumbo_tagset tags) {
|
1280
1361
|
GumboVector* open_elements = &parser->_parser_state->_open_elements;
|
1281
|
-
|
1282
|
-
va_start(args, negate);
|
1283
|
-
// va_arg can only run through the list once, so we copy it to an GumboVector
|
1284
|
-
// here. I wonder if it'd make more sense to make tags the GumboVector*
|
1285
|
-
// parameter and 'expected' a vararg list, but that'd require changing a lot
|
1286
|
-
// of code for unknown benefit. We may want to change the representation of
|
1287
|
-
// these tag sets anyway, to something more efficient.
|
1288
|
-
GumboVector tags;
|
1289
|
-
gumbo_vector_init(parser, 10, &tags);
|
1290
|
-
for (GumboTag tag = va_arg(args, GumboTag); tag != GUMBO_TAG_LAST;
|
1291
|
-
tag = va_arg(args, GumboTag)) {
|
1292
|
-
// We store the tags inline instead of storing pointers to them.
|
1293
|
-
gumbo_vector_add(parser, (void*) tag, &tags);
|
1294
|
-
}
|
1295
|
-
va_end(args);
|
1296
|
-
|
1297
|
-
bool result = false;
|
1298
|
-
for (int i = open_elements->length; --i >= 0; ) {
|
1362
|
+
for (int i = open_elements->length; --i >= 0;) {
|
1299
1363
|
const GumboNode* node = open_elements->data[i];
|
1300
|
-
if (node->type != GUMBO_NODE_ELEMENT)
|
1364
|
+
if (node->type != GUMBO_NODE_ELEMENT && node->type != GUMBO_NODE_TEMPLATE)
|
1301
1365
|
continue;
|
1302
|
-
|
1366
|
+
|
1303
1367
|
GumboTag node_tag = node->v.element.tag;
|
1304
|
-
|
1305
|
-
|
1306
|
-
if (node_tag ==
|
1307
|
-
|
1308
|
-
goto cleanup;
|
1309
|
-
}
|
1368
|
+
GumboNamespaceEnum node_ns = node->v.element.tag_namespace;
|
1369
|
+
for (int j = 0; j < expected_size; ++j) {
|
1370
|
+
if (node_tag == expected[j] && node_ns == GUMBO_NAMESPACE_HTML)
|
1371
|
+
return true;
|
1310
1372
|
}
|
1311
1373
|
|
1312
|
-
bool
|
1313
|
-
|
1314
|
-
GumboTag tag = (GumboTag) tags.data[j];
|
1315
|
-
if (tag == node_tag) {
|
1316
|
-
found_tag = true;
|
1317
|
-
break;
|
1318
|
-
}
|
1319
|
-
}
|
1320
|
-
if (negate != found_tag) {
|
1321
|
-
result = false;
|
1322
|
-
goto cleanup;
|
1323
|
-
}
|
1374
|
+
bool found = TAGSET_INCLUDES(tags, node_ns, node_tag);
|
1375
|
+
if (negate != found) return false;
|
1324
1376
|
}
|
1325
|
-
|
1326
|
-
gumbo_vector_destroy(parser, &tags);
|
1327
|
-
return result;
|
1377
|
+
return false;
|
1328
1378
|
}
|
1329
1379
|
|
1330
|
-
//
|
1331
|
-
|
1332
|
-
|
1333
|
-
|
1334
|
-
|
1335
|
-
// GumboVector.
|
1336
|
-
#define DECLARE_ONE_ELEMENT_GUMBO_VECTOR(varname, from_var) \
|
1337
|
-
void* varname ## _tmp_array[1] = { (void*) from_var }; \
|
1338
|
-
GumboVector varname = { varname ## _tmp_array, 1, 1 }
|
1380
|
+
// Checks for the presence of an open element of the specified tag type.
|
1381
|
+
static bool has_open_element(GumboParser* parser, GumboTag tag) {
|
1382
|
+
return has_an_element_in_specific_scope(
|
1383
|
+
parser, 1, &tag, false, (gumbo_tagset){TAG(HTML)});
|
1384
|
+
}
|
1339
1385
|
|
1340
1386
|
// http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#has-an-element-in-scope
|
1341
1387
|
static bool has_an_element_in_scope(GumboParser* parser, GumboTag tag) {
|
1342
|
-
|
1343
|
-
|
1344
|
-
|
1345
|
-
|
1346
|
-
|
1347
|
-
|
1348
|
-
GUMBO_TAG_DESC, GUMBO_TAG_TITLE, GUMBO_TAG_LAST);
|
1388
|
+
return has_an_element_in_specific_scope(parser, 1, &tag, false,
|
1389
|
+
(gumbo_tagset){TAG(APPLET), TAG(CAPTION), TAG(HTML), TAG(TABLE), TAG(TD),
|
1390
|
+
TAG(TH), TAG(MARQUEE), TAG(OBJECT), TAG(TEMPLATE), TAG_MATHML(MI),
|
1391
|
+
TAG_MATHML(MO), TAG_MATHML(MN), TAG_MATHML(MS), TAG_MATHML(MTEXT),
|
1392
|
+
TAG_MATHML(ANNOTATION_XML), TAG_SVG(FOREIGNOBJECT), TAG_SVG(DESC),
|
1393
|
+
TAG_SVG(TITLE)});
|
1349
1394
|
}
|
1350
1395
|
|
1351
1396
|
// Like "has an element in scope", but for the specific case of looking for a
|
@@ -1356,21 +1401,21 @@ static bool has_an_element_in_scope(GumboParser* parser, GumboTag tag) {
|
|
1356
1401
|
// parameterize it.
|
1357
1402
|
static bool has_node_in_scope(GumboParser* parser, const GumboNode* node) {
|
1358
1403
|
GumboVector* open_elements = &parser->_parser_state->_open_elements;
|
1359
|
-
for (int i = open_elements->length; --i >= 0;
|
1404
|
+
for (int i = open_elements->length; --i >= 0;) {
|
1360
1405
|
const GumboNode* current = open_elements->data[i];
|
1361
1406
|
if (current == node) {
|
1362
1407
|
return true;
|
1363
1408
|
}
|
1364
|
-
if (current->type != GUMBO_NODE_ELEMENT
|
1409
|
+
if (current->type != GUMBO_NODE_ELEMENT &&
|
1410
|
+
current->type != GUMBO_NODE_TEMPLATE) {
|
1365
1411
|
continue;
|
1366
1412
|
}
|
1367
|
-
if (
|
1368
|
-
|
1369
|
-
|
1370
|
-
|
1371
|
-
|
1372
|
-
|
1373
|
-
GUMBO_TAG_LAST)) {
|
1413
|
+
if (node_tag_in_set(current,
|
1414
|
+
(gumbo_tagset){TAG(APPLET), TAG(CAPTION), TAG(HTML), TAG(TABLE),
|
1415
|
+
TAG(TD), TAG(TH), TAG(MARQUEE), TAG(OBJECT), TAG(TEMPLATE),
|
1416
|
+
TAG_MATHML(MI), TAG_MATHML(MO), TAG_MATHML(MN), TAG_MATHML(MS),
|
1417
|
+
TAG_MATHML(MTEXT), TAG_MATHML(ANNOTATION_XML),
|
1418
|
+
TAG_SVG(FOREIGNOBJECT), TAG_SVG(DESC), TAG_SVG(TITLE)})) {
|
1374
1419
|
return false;
|
1375
1420
|
}
|
1376
1421
|
}
|
@@ -1378,79 +1423,72 @@ static bool has_node_in_scope(GumboParser* parser, const GumboNode* node) {
|
|
1378
1423
|
return false;
|
1379
1424
|
}
|
1380
1425
|
|
1381
|
-
// Like has_an_element_in_scope, but restricts the expected
|
1382
|
-
// possible
|
1383
|
-
static bool has_an_element_in_scope_with_tagname(
|
1384
|
-
|
1385
|
-
|
1386
|
-
|
1387
|
-
|
1388
|
-
|
1389
|
-
|
1390
|
-
|
1391
|
-
tag = va_arg(args, GumboTag)) {
|
1392
|
-
gumbo_vector_add(parser, (void*) tag, &tags);
|
1393
|
-
}
|
1394
|
-
bool found = has_an_element_in_specific_scope(
|
1395
|
-
parser, &tags, false, GUMBO_TAG_APPLET, GUMBO_TAG_CAPTION, GUMBO_TAG_HTML,
|
1396
|
-
GUMBO_TAG_TABLE, GUMBO_TAG_TD, GUMBO_TAG_TH, GUMBO_TAG_MARQUEE,
|
1397
|
-
GUMBO_TAG_OBJECT, GUMBO_TAG_MI, GUMBO_TAG_MO, GUMBO_TAG_MN, GUMBO_TAG_MS,
|
1398
|
-
GUMBO_TAG_MTEXT, GUMBO_TAG_ANNOTATION_XML, GUMBO_TAG_FOREIGNOBJECT,
|
1399
|
-
GUMBO_TAG_DESC, GUMBO_TAG_TITLE, GUMBO_TAG_LAST);
|
1400
|
-
gumbo_vector_destroy(parser, &tags);
|
1401
|
-
va_end(args);
|
1402
|
-
return found;
|
1426
|
+
// Like has_an_element_in_scope, but restricts the expected qualified name to a
|
1427
|
+
// range of possible qualified names instead of just a single one.
|
1428
|
+
static bool has_an_element_in_scope_with_tagname(
|
1429
|
+
GumboParser* parser, int expected_len, const GumboTag expected[]) {
|
1430
|
+
return has_an_element_in_specific_scope(parser, expected_len, expected, false,
|
1431
|
+
(gumbo_tagset){TAG(APPLET), TAG(CAPTION), TAG(HTML), TAG(TABLE), TAG(TD),
|
1432
|
+
TAG(TH), TAG(MARQUEE), TAG(OBJECT), TAG(TEMPLATE), TAG_MATHML(MI),
|
1433
|
+
TAG_MATHML(MO), TAG_MATHML(MN), TAG_MATHML(MS), TAG_MATHML(MTEXT),
|
1434
|
+
TAG_MATHML(ANNOTATION_XML), TAG_SVG(FOREIGNOBJECT), TAG_SVG(DESC),
|
1435
|
+
TAG_SVG(TITLE)});
|
1403
1436
|
}
|
1404
1437
|
|
1405
1438
|
// http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#has-an-element-in-list-item-scope
|
1406
1439
|
static bool has_an_element_in_list_scope(GumboParser* parser, GumboTag tag) {
|
1407
|
-
|
1408
|
-
|
1409
|
-
|
1410
|
-
|
1411
|
-
|
1412
|
-
|
1413
|
-
GUMBO_TAG_DESC, GUMBO_TAG_TITLE, GUMBO_TAG_OL, GUMBO_TAG_UL,
|
1414
|
-
GUMBO_TAG_LAST);
|
1440
|
+
return has_an_element_in_specific_scope(parser, 1, &tag, false,
|
1441
|
+
(gumbo_tagset){TAG(APPLET), TAG(CAPTION), TAG(HTML), TAG(TABLE), TAG(TD),
|
1442
|
+
TAG(TH), TAG(MARQUEE), TAG(OBJECT), TAG(TEMPLATE), TAG_MATHML(MI),
|
1443
|
+
TAG_MATHML(MO), TAG_MATHML(MN), TAG_MATHML(MS), TAG_MATHML(MTEXT),
|
1444
|
+
TAG_MATHML(ANNOTATION_XML), TAG_SVG(FOREIGNOBJECT), TAG_SVG(DESC),
|
1445
|
+
TAG_SVG(TITLE), TAG(OL), TAG(UL)});
|
1415
1446
|
}
|
1416
1447
|
|
1417
1448
|
// http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#has-an-element-in-button-scope
|
1418
1449
|
static bool has_an_element_in_button_scope(GumboParser* parser, GumboTag tag) {
|
1419
|
-
|
1420
|
-
|
1421
|
-
|
1422
|
-
|
1423
|
-
|
1424
|
-
|
1425
|
-
GUMBO_TAG_DESC, GUMBO_TAG_TITLE, GUMBO_TAG_BUTTON, GUMBO_TAG_LAST);
|
1450
|
+
return has_an_element_in_specific_scope(parser, 1, &tag, false,
|
1451
|
+
(gumbo_tagset){TAG(APPLET), TAG(CAPTION), TAG(HTML), TAG(TABLE), TAG(TD),
|
1452
|
+
TAG(TH), TAG(MARQUEE), TAG(OBJECT), TAG(TEMPLATE), TAG_MATHML(MI),
|
1453
|
+
TAG_MATHML(MO), TAG_MATHML(MN), TAG_MATHML(MS), TAG_MATHML(MTEXT),
|
1454
|
+
TAG_MATHML(ANNOTATION_XML), TAG_SVG(FOREIGNOBJECT), TAG_SVG(DESC),
|
1455
|
+
TAG_SVG(TITLE), TAG(BUTTON)});
|
1426
1456
|
}
|
1427
1457
|
|
1428
1458
|
// http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#has-an-element-in-table-scope
|
1429
1459
|
static bool has_an_element_in_table_scope(GumboParser* parser, GumboTag tag) {
|
1430
|
-
|
1431
|
-
|
1432
|
-
parser, &tags, false, GUMBO_TAG_HTML, GUMBO_TAG_TABLE, GUMBO_TAG_LAST);
|
1460
|
+
return has_an_element_in_specific_scope(parser, 1, &tag, false,
|
1461
|
+
(gumbo_tagset){TAG(HTML), TAG(TABLE), TAG(TEMPLATE)});
|
1433
1462
|
}
|
1434
1463
|
|
1435
1464
|
// http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#has-an-element-in-select-scope
|
1436
1465
|
static bool has_an_element_in_select_scope(GumboParser* parser, GumboTag tag) {
|
1437
|
-
DECLARE_ONE_ELEMENT_GUMBO_VECTOR(tags, tag);
|
1438
1466
|
return has_an_element_in_specific_scope(
|
1439
|
-
parser, &
|
1440
|
-
GUMBO_TAG_LAST);
|
1467
|
+
parser, 1, &tag, true, (gumbo_tagset){TAG(OPTGROUP), TAG(OPTION)});
|
1441
1468
|
}
|
1442
1469
|
|
1443
|
-
|
1444
1470
|
// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#generate-implied-end-tags
|
1445
1471
|
// "exception" is the "element to exclude from the process" listed in the spec.
|
1446
1472
|
// Pass GUMBO_TAG_LAST to not exclude any of them.
|
1447
1473
|
static void generate_implied_end_tags(GumboParser* parser, GumboTag exception) {
|
1448
|
-
for (;
|
1449
|
-
|
1450
|
-
|
1451
|
-
|
1452
|
-
|
1453
|
-
|
1474
|
+
for (; node_tag_in_set(get_current_node(parser),
|
1475
|
+
(gumbo_tagset){TAG(DD), TAG(DT), TAG(LI), TAG(OPTION),
|
1476
|
+
TAG(OPTGROUP), TAG(P), TAG(RP), TAG(RB), TAG(RT), TAG(RTC)}) &&
|
1477
|
+
!node_html_tag_is(get_current_node(parser), exception);
|
1478
|
+
pop_current_node(parser))
|
1479
|
+
;
|
1480
|
+
}
|
1481
|
+
|
1482
|
+
// This is the "generate all implied end tags thoroughly" clause of the spec.
|
1483
|
+
// https://html.spec.whatwg.org/multipage/syntax.html#closing-elements-that-have-implied-end-tags
|
1484
|
+
static void generate_all_implied_end_tags_thoroughly(GumboParser* parser) {
|
1485
|
+
for (
|
1486
|
+
; node_tag_in_set(get_current_node(parser),
|
1487
|
+
(gumbo_tagset){TAG(CAPTION), TAG(COLGROUP), TAG(DD), TAG(DT), TAG(LI),
|
1488
|
+
TAG(OPTION), TAG(OPTGROUP), TAG(P), TAG(RP), TAG(RT), TAG(RTC),
|
1489
|
+
TAG(TBODY), TAG(TD), TAG(TFOOT), TAG(TH), TAG(HEAD), TAG(TR)});
|
1490
|
+
pop_current_node(parser))
|
1491
|
+
;
|
1454
1492
|
}
|
1455
1493
|
|
1456
1494
|
// This factors out the clauses relating to "act as if an end tag token with tag
|
@@ -1463,7 +1501,7 @@ static bool close_table(GumboParser* parser) {
|
|
1463
1501
|
}
|
1464
1502
|
|
1465
1503
|
GumboNode* node = pop_current_node(parser);
|
1466
|
-
while (!
|
1504
|
+
while (!node_html_tag_is(node, GUMBO_TAG_TABLE)) {
|
1467
1505
|
node = pop_current_node(parser);
|
1468
1506
|
}
|
1469
1507
|
reset_insertion_mode_appropriately(parser);
|
@@ -1472,18 +1510,18 @@ static bool close_table(GumboParser* parser) {
|
|
1472
1510
|
|
1473
1511
|
// This factors out the clauses relating to "act as if an end tag token with tag
|
1474
1512
|
// name `cell_tag` had been seen".
|
1475
|
-
static bool close_table_cell(
|
1476
|
-
|
1513
|
+
static bool close_table_cell(
|
1514
|
+
GumboParser* parser, const GumboToken* token, GumboTag cell_tag) {
|
1477
1515
|
bool result = true;
|
1478
1516
|
generate_implied_end_tags(parser, GUMBO_TAG_LAST);
|
1479
1517
|
const GumboNode* node = get_current_node(parser);
|
1480
|
-
if (!
|
1518
|
+
if (!node_html_tag_is(node, cell_tag)) {
|
1481
1519
|
parser_add_parse_error(parser, token);
|
1482
1520
|
result = false;
|
1483
1521
|
}
|
1484
1522
|
do {
|
1485
1523
|
node = pop_current_node(parser);
|
1486
|
-
} while (!
|
1524
|
+
} while (!node_html_tag_is(node, cell_tag));
|
1487
1525
|
|
1488
1526
|
clear_active_formatting_elements(parser);
|
1489
1527
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_ROW);
|
@@ -1508,7 +1546,7 @@ static bool close_current_cell(GumboParser* parser, const GumboToken* token) {
|
|
1508
1546
|
// resets the insertion mode appropriately.
|
1509
1547
|
static void close_current_select(GumboParser* parser) {
|
1510
1548
|
GumboNode* node = pop_current_node(parser);
|
1511
|
-
while (!
|
1549
|
+
while (!node_html_tag_is(node, GUMBO_TAG_SELECT)) {
|
1512
1550
|
node = pop_current_node(parser);
|
1513
1551
|
}
|
1514
1552
|
reset_insertion_mode_appropriately(parser);
|
@@ -1517,60 +1555,48 @@ static void close_current_select(GumboParser* parser) {
|
|
1517
1555
|
// The list of nodes in the "special" category:
|
1518
1556
|
// http://www.whatwg.org/specs/web-apps/current-work/complete/parsing.html#special
|
1519
1557
|
static bool is_special_node(const GumboNode* node) {
|
1520
|
-
assert(node->type == GUMBO_NODE_ELEMENT);
|
1521
|
-
|
1522
|
-
|
1523
|
-
|
1524
|
-
|
1525
|
-
|
1526
|
-
|
1527
|
-
|
1528
|
-
|
1529
|
-
|
1530
|
-
|
1531
|
-
|
1532
|
-
|
1533
|
-
|
1534
|
-
|
1535
|
-
|
1536
|
-
|
1537
|
-
|
1538
|
-
|
1539
|
-
|
1540
|
-
|
1541
|
-
|
1542
|
-
|
1543
|
-
|
1544
|
-
|
1545
|
-
|
1546
|
-
|
1547
|
-
return node_tag_in(node,
|
1548
|
-
GUMBO_TAG_MI, GUMBO_TAG_MO, GUMBO_TAG_MN, GUMBO_TAG_MS,
|
1549
|
-
GUMBO_TAG_MTEXT, GUMBO_TAG_ANNOTATION_XML, GUMBO_TAG_LAST);
|
1550
|
-
case GUMBO_NAMESPACE_SVG:
|
1551
|
-
return node_tag_in(node,
|
1552
|
-
GUMBO_TAG_FOREIGNOBJECT, GUMBO_TAG_DESC, GUMBO_TAG_LAST);
|
1553
|
-
}
|
1554
|
-
abort();
|
1555
|
-
return false; // Pacify compiler.
|
1556
|
-
}
|
1557
|
-
|
1558
|
-
// Implicitly closes currently open tags until it reaches an element with the
|
1559
|
-
// specified tag name. If the elements closed are in the set handled by
|
1558
|
+
assert(node->type == GUMBO_NODE_ELEMENT || node->type == GUMBO_NODE_TEMPLATE);
|
1559
|
+
return node_tag_in_set(node,
|
1560
|
+
(gumbo_tagset){TAG(ADDRESS), TAG(APPLET), TAG(AREA), TAG(ARTICLE),
|
1561
|
+
TAG(ASIDE), TAG(BASE), TAG(BASEFONT), TAG(BGSOUND), TAG(BLOCKQUOTE),
|
1562
|
+
TAG(BODY), TAG(BR), TAG(BUTTON), TAG(CAPTION), TAG(CENTER), TAG(COL),
|
1563
|
+
TAG(COLGROUP), TAG(MENUITEM), TAG(DD), TAG(DETAILS), TAG(DIR),
|
1564
|
+
TAG(DIV), TAG(DL), TAG(DT), TAG(EMBED), TAG(FIELDSET),
|
1565
|
+
TAG(FIGCAPTION), TAG(FIGURE), TAG(FOOTER), TAG(FORM), TAG(FRAME),
|
1566
|
+
TAG(FRAMESET), TAG(H1), TAG(H2), TAG(H3), TAG(H4), TAG(H5), TAG(H6),
|
1567
|
+
TAG(HEAD), TAG(HEADER), TAG(HGROUP), TAG(HR), TAG(HTML), TAG(IFRAME),
|
1568
|
+
TAG(IMG), TAG(INPUT), TAG(ISINDEX), TAG(LI), TAG(LINK), TAG(LISTING),
|
1569
|
+
TAG(MARQUEE), TAG(MENU), TAG(META), TAG(NAV), TAG(NOEMBED),
|
1570
|
+
TAG(NOFRAMES), TAG(NOSCRIPT), TAG(OBJECT), TAG(OL), TAG(P),
|
1571
|
+
TAG(PARAM), TAG(PLAINTEXT), TAG(PRE), TAG(SCRIPT), TAG(SECTION),
|
1572
|
+
TAG(SELECT), TAG(STYLE), TAG(SUMMARY), TAG(TABLE), TAG(TBODY),
|
1573
|
+
TAG(TD), TAG(TEMPLATE), TAG(TEXTAREA), TAG(TFOOT), TAG(TH),
|
1574
|
+
TAG(THEAD), TAG(TITLE), TAG(TR), TAG(UL), TAG(WBR), TAG(XMP),
|
1575
|
+
|
1576
|
+
TAG_MATHML(MI), TAG_MATHML(MO), TAG_MATHML(MN), TAG_MATHML(MS),
|
1577
|
+
TAG_MATHML(MTEXT), TAG_MATHML(ANNOTATION_XML),
|
1578
|
+
|
1579
|
+
TAG_SVG(FOREIGNOBJECT), TAG_SVG(DESC)});
|
1580
|
+
}
|
1581
|
+
|
1582
|
+
// Implicitly closes currently open elements until it reaches an element with
|
1583
|
+
// the
|
1584
|
+
// specified qualified name. If the elements closed are in the set handled by
|
1560
1585
|
// generate_implied_end_tags, this is normal operation and this function returns
|
1561
1586
|
// true. Otherwise, a parse error is recorded and this function returns false.
|
1562
|
-
static bool implicitly_close_tags(
|
1563
|
-
|
1587
|
+
static bool implicitly_close_tags(GumboParser* parser, GumboToken* token,
|
1588
|
+
GumboNamespaceEnum target_ns, GumboTag target) {
|
1564
1589
|
bool result = true;
|
1565
1590
|
generate_implied_end_tags(parser, target);
|
1566
|
-
if (!
|
1591
|
+
if (!node_qualified_tag_is(get_current_node(parser), target_ns, target)) {
|
1567
1592
|
parser_add_parse_error(parser, token);
|
1568
|
-
while (
|
1593
|
+
while (
|
1594
|
+
!node_qualified_tag_is(get_current_node(parser), target_ns, target)) {
|
1569
1595
|
pop_current_node(parser);
|
1570
1596
|
}
|
1571
1597
|
result = false;
|
1572
1598
|
}
|
1573
|
-
assert(
|
1599
|
+
assert(node_qualified_tag_is(get_current_node(parser), target_ns, target));
|
1574
1600
|
pop_current_node(parser);
|
1575
1601
|
return result;
|
1576
1602
|
}
|
@@ -1579,9 +1605,11 @@ static bool implicitly_close_tags(
|
|
1579
1605
|
// a </p> tag was encountered, implicitly closing tags. Returns false if a
|
1580
1606
|
// parse error occurs. This is a convenience function because this particular
|
1581
1607
|
// clause appears several times in the spec.
|
1582
|
-
static bool maybe_implicitly_close_p_tag(
|
1608
|
+
static bool maybe_implicitly_close_p_tag(
|
1609
|
+
GumboParser* parser, GumboToken* token) {
|
1583
1610
|
if (has_an_element_in_button_scope(parser, GUMBO_TAG_P)) {
|
1584
|
-
return implicitly_close_tags(
|
1611
|
+
return implicitly_close_tags(
|
1612
|
+
parser, token, GUMBO_NAMESPACE_HTML, GUMBO_TAG_P);
|
1585
1613
|
}
|
1586
1614
|
return true;
|
1587
1615
|
}
|
@@ -1592,18 +1620,19 @@ static void maybe_implicitly_close_list_tag(
|
|
1592
1620
|
GumboParser* parser, GumboToken* token, bool is_li) {
|
1593
1621
|
GumboParserState* state = parser->_parser_state;
|
1594
1622
|
state->_frameset_ok = false;
|
1595
|
-
for (int i = state->_open_elements.length; --i >= 0;
|
1623
|
+
for (int i = state->_open_elements.length; --i >= 0;) {
|
1596
1624
|
const GumboNode* node = state->_open_elements.data[i];
|
1597
|
-
bool is_list_tag =
|
1598
|
-
|
1599
|
-
|
1625
|
+
bool is_list_tag =
|
1626
|
+
is_li ? node_html_tag_is(node, GUMBO_TAG_LI)
|
1627
|
+
: node_tag_in_set(node, (gumbo_tagset){TAG(DD), TAG(DT)});
|
1600
1628
|
if (is_list_tag) {
|
1601
|
-
implicitly_close_tags(
|
1629
|
+
implicitly_close_tags(
|
1630
|
+
parser, token, node->v.element.tag_namespace, node->v.element.tag);
|
1602
1631
|
return;
|
1603
1632
|
}
|
1604
1633
|
if (is_special_node(node) &&
|
1605
|
-
!
|
1606
|
-
|
1634
|
+
!node_tag_in_set(
|
1635
|
+
node, (gumbo_tagset){TAG(ADDRESS), TAG(DIV), TAG(P)})) {
|
1607
1636
|
return;
|
1608
1637
|
}
|
1609
1638
|
}
|
@@ -1616,7 +1645,7 @@ static void merge_attributes(
|
|
1616
1645
|
const GumboVector* token_attr = &token->v.start_tag.attributes;
|
1617
1646
|
GumboVector* node_attr = &node->v.element.attributes;
|
1618
1647
|
|
1619
|
-
for (int i = 0; i < token_attr->length; ++i) {
|
1648
|
+
for (unsigned int i = 0; i < token_attr->length; ++i) {
|
1620
1649
|
GumboAttribute* attr = token_attr->data[i];
|
1621
1650
|
if (!gumbo_get_attribute(node_attr, attr->name)) {
|
1622
1651
|
// Ownership of the attribute is transferred by this gumbo_vector_add,
|
@@ -1640,8 +1669,8 @@ static void merge_attributes(
|
|
1640
1669
|
}
|
1641
1670
|
|
1642
1671
|
const char* gumbo_normalize_svg_tagname(const GumboStringPiece* tag) {
|
1643
|
-
for (
|
1644
|
-
|
1672
|
+
for (size_t i = 0; i < sizeof(kSvgTagReplacements) / sizeof(ReplacementEntry);
|
1673
|
+
++i) {
|
1645
1674
|
const ReplacementEntry* entry = &kSvgTagReplacements[i];
|
1646
1675
|
if (gumbo_string_equals_ignore_case(tag, &entry->from)) {
|
1647
1676
|
return entry->to.data;
|
@@ -1656,9 +1685,9 @@ const char* gumbo_normalize_svg_tagname(const GumboStringPiece* tag) {
|
|
1656
1685
|
static void adjust_foreign_attributes(GumboParser* parser, GumboToken* token) {
|
1657
1686
|
assert(token->type == GUMBO_TOKEN_START_TAG);
|
1658
1687
|
const GumboVector* attributes = &token->v.start_tag.attributes;
|
1659
|
-
for (
|
1660
|
-
|
1661
|
-
|
1688
|
+
for (size_t i = 0; i < sizeof(kForeignAttributeReplacements) /
|
1689
|
+
sizeof(NamespacedAttributeReplacement);
|
1690
|
+
++i) {
|
1662
1691
|
const NamespacedAttributeReplacement* entry =
|
1663
1692
|
&kForeignAttributeReplacements[i];
|
1664
1693
|
GumboAttribute* attr = gumbo_get_attribute(attributes, entry->from);
|
@@ -1676,7 +1705,7 @@ static void adjust_foreign_attributes(GumboParser* parser, GumboToken* token) {
|
|
1676
1705
|
static void adjust_svg_attributes(GumboParser* parser, GumboToken* token) {
|
1677
1706
|
assert(token->type == GUMBO_TOKEN_START_TAG);
|
1678
1707
|
const GumboVector* attributes = &token->v.start_tag.attributes;
|
1679
|
-
for (
|
1708
|
+
for (size_t i = 0;
|
1680
1709
|
i < sizeof(kSvgAttributeReplacements) / sizeof(ReplacementEntry); ++i) {
|
1681
1710
|
const ReplacementEntry* entry = &kSvgAttributeReplacements[i];
|
1682
1711
|
GumboAttribute* attr = gumbo_get_attribute(attributes, entry->from.data);
|
@@ -1693,8 +1722,8 @@ static void adjust_svg_attributes(GumboParser* parser, GumboToken* token) {
|
|
1693
1722
|
// value.
|
1694
1723
|
static void adjust_mathml_attributes(GumboParser* parser, GumboToken* token) {
|
1695
1724
|
assert(token->type == GUMBO_TOKEN_START_TAG);
|
1696
|
-
GumboAttribute* attr =
|
1697
|
-
&token->v.start_tag.attributes, "definitionurl");
|
1725
|
+
GumboAttribute* attr =
|
1726
|
+
gumbo_get_attribute(&token->v.start_tag.attributes, "definitionurl");
|
1698
1727
|
if (!attr) {
|
1699
1728
|
return;
|
1700
1729
|
}
|
@@ -1702,32 +1731,30 @@ static void adjust_mathml_attributes(GumboParser* parser, GumboToken* token) {
|
|
1702
1731
|
attr->name = gumbo_copy_stringz(parser, "definitionURL");
|
1703
1732
|
}
|
1704
1733
|
|
1705
|
-
static bool doctype_matches(
|
1706
|
-
const
|
1707
|
-
const GumboStringPiece* public_id,
|
1708
|
-
const GumboStringPiece* system_id,
|
1734
|
+
static bool doctype_matches(const GumboTokenDocType* doctype,
|
1735
|
+
const GumboStringPiece* public_id, const GumboStringPiece* system_id,
|
1709
1736
|
bool allow_missing_system_id) {
|
1710
1737
|
return !strcmp(doctype->public_identifier, public_id->data) &&
|
1711
|
-
|
1712
|
-
|
1738
|
+
(allow_missing_system_id || doctype->has_system_identifier) &&
|
1739
|
+
!strcmp(doctype->system_identifier, system_id->data);
|
1713
1740
|
}
|
1714
1741
|
|
1715
1742
|
static bool maybe_add_doctype_error(
|
1716
1743
|
GumboParser* parser, const GumboToken* token) {
|
1717
1744
|
const GumboTokenDocType* doctype = &token->v.doc_type;
|
1718
1745
|
bool html_doctype = !strcmp(doctype->name, kDoctypeHtml.data);
|
1719
|
-
if ((!html_doctype ||
|
1720
|
-
|
1721
|
-
|
1722
|
-
|
1723
|
-
!(html_doctype && (
|
1724
|
-
|
1725
|
-
|
1726
|
-
|
1727
|
-
|
1728
|
-
|
1729
|
-
|
1730
|
-
|
1746
|
+
if ((!html_doctype || doctype->has_public_identifier ||
|
1747
|
+
(doctype->has_system_identifier &&
|
1748
|
+
!strcmp(
|
1749
|
+
doctype->system_identifier, kSystemIdLegacyCompat.data))) &&
|
1750
|
+
!(html_doctype && (doctype_matches(doctype, &kPublicIdHtml4_0,
|
1751
|
+
&kSystemIdRecHtml4_0, true) ||
|
1752
|
+
doctype_matches(doctype, &kPublicIdHtml4_01,
|
1753
|
+
&kSystemIdHtml4, true) ||
|
1754
|
+
doctype_matches(doctype, &kPublicIdXhtml1_0,
|
1755
|
+
&kSystemIdXhtmlStrict1_1, false) ||
|
1756
|
+
doctype_matches(doctype, &kPublicIdXhtml1_1,
|
1757
|
+
&kSystemIdXhtml1_1, false)))) {
|
1731
1758
|
parser_add_parse_error(parser, token);
|
1732
1759
|
return false;
|
1733
1760
|
}
|
@@ -1750,7 +1777,7 @@ static void remove_from_parent(GumboParser* parser, GumboNode* node) {
|
|
1750
1777
|
gumbo_vector_remove_at(parser, index, children);
|
1751
1778
|
node->parent = NULL;
|
1752
1779
|
node->index_within_parent = -1;
|
1753
|
-
for (int i = index; i < children->length; ++i) {
|
1780
|
+
for (unsigned int i = index; i < children->length; ++i) {
|
1754
1781
|
GumboNode* child = children->data[i];
|
1755
1782
|
child->index_within_parent = i;
|
1756
1783
|
}
|
@@ -1759,29 +1786,38 @@ static void remove_from_parent(GumboParser* parser, GumboNode* node) {
|
|
1759
1786
|
// http://www.whatwg.org/specs/web-apps/current-work/multipage/the-end.html#an-introduction-to-error-handling-and-strange-cases-in-the-parser
|
1760
1787
|
// Also described in the "in body" handling for end formatting tags.
|
1761
1788
|
static bool adoption_agency_algorithm(
|
1762
|
-
GumboParser* parser, GumboToken* token, GumboTag
|
1789
|
+
GumboParser* parser, GumboToken* token, GumboTag subject) {
|
1763
1790
|
GumboParserState* state = parser->_parser_state;
|
1764
1791
|
gumbo_debug("Entering adoption agency algorithm.\n");
|
1765
|
-
//
|
1766
|
-
|
1767
|
-
|
1792
|
+
// Step 1.
|
1793
|
+
GumboNode* current_node = get_current_node(parser);
|
1794
|
+
if (current_node->v.element.tag_namespace == GUMBO_NAMESPACE_HTML &&
|
1795
|
+
current_node->v.element.tag == subject &&
|
1796
|
+
gumbo_vector_index_of(
|
1797
|
+
&state->_active_formatting_elements, current_node) == -1) {
|
1798
|
+
pop_current_node(parser);
|
1799
|
+
return false;
|
1800
|
+
}
|
1801
|
+
// Steps 2-4 & 20:
|
1802
|
+
for (unsigned int i = 0; i < 8; ++i) {
|
1803
|
+
// Step 5.
|
1768
1804
|
GumboNode* formatting_node = NULL;
|
1769
1805
|
int formatting_node_in_open_elements = -1;
|
1770
|
-
for (int j = state->_active_formatting_elements.length; --j >= 0;
|
1806
|
+
for (int j = state->_active_formatting_elements.length; --j >= 0;) {
|
1771
1807
|
GumboNode* current_node = state->_active_formatting_elements.data[j];
|
1772
1808
|
if (current_node == &kActiveFormattingScopeMarker) {
|
1773
1809
|
gumbo_debug("Broke on scope marker; aborting.\n");
|
1774
1810
|
// Last scope marker; abort the algorithm.
|
1775
1811
|
return false;
|
1776
1812
|
}
|
1777
|
-
if (
|
1813
|
+
if (node_html_tag_is(current_node, subject)) {
|
1778
1814
|
// Found it.
|
1779
1815
|
formatting_node = current_node;
|
1780
|
-
formatting_node_in_open_elements =
|
1781
|
-
&state->_open_elements, formatting_node);
|
1816
|
+
formatting_node_in_open_elements =
|
1817
|
+
gumbo_vector_index_of(&state->_open_elements, formatting_node);
|
1782
1818
|
gumbo_debug("Formatting element of tag %s at %d.\n",
|
1783
|
-
|
1784
|
-
|
1819
|
+
gumbo_normalized_tagname(subject),
|
1820
|
+
formatting_node_in_open_elements);
|
1785
1821
|
break;
|
1786
1822
|
}
|
1787
1823
|
}
|
@@ -1793,74 +1829,84 @@ static bool adoption_agency_algorithm(
|
|
1793
1829
|
return false;
|
1794
1830
|
}
|
1795
1831
|
|
1832
|
+
// Step 6
|
1796
1833
|
if (formatting_node_in_open_elements == -1) {
|
1797
1834
|
gumbo_debug("Formatting node not on stack of open elements.\n");
|
1798
|
-
|
1799
|
-
|
1835
|
+
parser_add_parse_error(parser, token);
|
1836
|
+
gumbo_vector_remove(
|
1837
|
+
parser, formatting_node, &state->_active_formatting_elements);
|
1800
1838
|
return false;
|
1801
1839
|
}
|
1802
1840
|
|
1841
|
+
// Step 7
|
1803
1842
|
if (!has_an_element_in_scope(parser, formatting_node->v.element.tag)) {
|
1804
1843
|
parser_add_parse_error(parser, token);
|
1805
1844
|
gumbo_debug("Element not in scope.\n");
|
1806
1845
|
return false;
|
1807
1846
|
}
|
1847
|
+
|
1848
|
+
// Step 8
|
1808
1849
|
if (formatting_node != get_current_node(parser)) {
|
1809
1850
|
parser_add_parse_error(parser, token); // But continue onwards.
|
1810
1851
|
}
|
1811
1852
|
assert(formatting_node);
|
1812
|
-
assert(!
|
1813
|
-
assert(!
|
1853
|
+
assert(!node_html_tag_is(formatting_node, GUMBO_TAG_HTML));
|
1854
|
+
assert(!node_html_tag_is(formatting_node, GUMBO_TAG_BODY));
|
1814
1855
|
|
1815
|
-
// Step
|
1856
|
+
// Step 9 & 10
|
1816
1857
|
GumboNode* furthest_block = NULL;
|
1817
|
-
for (int j = formatting_node_in_open_elements;
|
1858
|
+
for (unsigned int j = formatting_node_in_open_elements;
|
1818
1859
|
j < state->_open_elements.length; ++j) {
|
1819
1860
|
assert(j > 0);
|
1820
1861
|
GumboNode* current = state->_open_elements.data[j];
|
1821
1862
|
if (is_special_node(current)) {
|
1822
|
-
// Step
|
1863
|
+
// Step 9.
|
1823
1864
|
furthest_block = current;
|
1824
1865
|
break;
|
1825
1866
|
}
|
1826
1867
|
}
|
1827
1868
|
if (!furthest_block) {
|
1828
|
-
// Step
|
1869
|
+
// Step 10.
|
1829
1870
|
while (get_current_node(parser) != formatting_node) {
|
1830
1871
|
pop_current_node(parser);
|
1831
1872
|
}
|
1832
1873
|
// And the formatting element itself.
|
1833
1874
|
pop_current_node(parser);
|
1834
|
-
gumbo_vector_remove(
|
1835
|
-
|
1875
|
+
gumbo_vector_remove(
|
1876
|
+
parser, formatting_node, &state->_active_formatting_elements);
|
1836
1877
|
return false;
|
1837
1878
|
}
|
1838
|
-
assert(!
|
1879
|
+
assert(!node_html_tag_is(furthest_block, GUMBO_TAG_HTML));
|
1839
1880
|
assert(furthest_block);
|
1840
1881
|
|
1841
|
-
// Step
|
1882
|
+
// Step 11.
|
1842
1883
|
// Elements may be moved and reparented by this algorithm, so
|
1843
1884
|
// common_ancestor is not necessarily the same as formatting_node->parent.
|
1844
1885
|
GumboNode* common_ancestor =
|
1845
|
-
state->_open_elements.data[gumbo_vector_index_of(
|
1846
|
-
|
1886
|
+
state->_open_elements.data[gumbo_vector_index_of(&state->_open_elements,
|
1887
|
+
formatting_node) -
|
1888
|
+
1];
|
1847
1889
|
gumbo_debug("Common ancestor tag = %s, furthest block tag = %s.\n",
|
1848
|
-
|
1849
|
-
|
1890
|
+
gumbo_normalized_tagname(common_ancestor->v.element.tag),
|
1891
|
+
gumbo_normalized_tagname(furthest_block->v.element.tag));
|
1850
1892
|
|
1851
|
-
// Step
|
1893
|
+
// Step 12.
|
1852
1894
|
int bookmark = gumbo_vector_index_of(
|
1853
|
-
|
1854
|
-
|
1895
|
+
&state->_active_formatting_elements, formatting_node) +
|
1896
|
+
1;
|
1897
|
+
gumbo_debug("Bookmark at %d.\n", bookmark);
|
1898
|
+
// Step 13.
|
1855
1899
|
GumboNode* node = furthest_block;
|
1856
1900
|
GumboNode* last_node = furthest_block;
|
1857
1901
|
// Must be stored explicitly, in case node is removed from the stack of open
|
1858
1902
|
// elements, to handle step 9.4.
|
1859
1903
|
int saved_node_index = gumbo_vector_index_of(&state->_open_elements, node);
|
1860
1904
|
assert(saved_node_index > 0);
|
1861
|
-
// Step
|
1862
|
-
for (int j = 0
|
1863
|
-
// Step
|
1905
|
+
// Step 13.1.
|
1906
|
+
for (int j = 0;;) {
|
1907
|
+
// Step 13.2.
|
1908
|
+
++j;
|
1909
|
+
// Step 13.3.
|
1864
1910
|
int node_index = gumbo_vector_index_of(&state->_open_elements, node);
|
1865
1911
|
gumbo_debug(
|
1866
1912
|
"Current index: %d, last index: %d.\n", node_index, saved_node_index);
|
@@ -1869,59 +1915,72 @@ static bool adoption_agency_algorithm(
|
|
1869
1915
|
}
|
1870
1916
|
saved_node_index = --node_index;
|
1871
1917
|
assert(node_index > 0);
|
1872
|
-
assert(node_index < state->_open_elements.capacity);
|
1918
|
+
assert((unsigned int) node_index < state->_open_elements.capacity);
|
1873
1919
|
node = state->_open_elements.data[node_index];
|
1874
1920
|
assert(node->parent);
|
1875
|
-
|
1876
|
-
|
1877
|
-
|
1921
|
+
if (node == formatting_node) {
|
1922
|
+
// Step 13.4.
|
1923
|
+
break;
|
1924
|
+
}
|
1925
|
+
int formatting_index =
|
1926
|
+
gumbo_vector_index_of(&state->_active_formatting_elements, node);
|
1927
|
+
if (j > 3 && formatting_index != -1) {
|
1928
|
+
// Step 13.5.
|
1929
|
+
gumbo_debug("Removing formatting element at %d.\n", formatting_index);
|
1930
|
+
gumbo_vector_remove_at(
|
1931
|
+
parser, formatting_index, &state->_active_formatting_elements);
|
1932
|
+
// Removing the element shifts all indices over by one, so we may need
|
1933
|
+
// to move the bookmark.
|
1934
|
+
if (formatting_index < bookmark) {
|
1935
|
+
--bookmark;
|
1936
|
+
gumbo_debug("Moving bookmark to %d.\n", bookmark);
|
1937
|
+
}
|
1938
|
+
continue;
|
1939
|
+
}
|
1940
|
+
if (formatting_index == -1) {
|
1941
|
+
// Step 13.6.
|
1878
1942
|
gumbo_vector_remove_at(parser, node_index, &state->_open_elements);
|
1879
1943
|
continue;
|
1880
|
-
} else if (node == formatting_node) {
|
1881
|
-
// Step 9.6.
|
1882
|
-
break;
|
1883
1944
|
}
|
1884
|
-
// Step
|
1885
|
-
|
1886
|
-
|
1945
|
+
// Step 13.7.
|
1946
|
+
// "common ancestor as the intended parent" doesn't actually mean insert
|
1947
|
+
// it into the common ancestor; that happens below.
|
1887
1948
|
node = clone_node(parser, node, GUMBO_INSERTION_ADOPTION_AGENCY_CLONED);
|
1949
|
+
assert(formatting_index >= 0);
|
1888
1950
|
state->_active_formatting_elements.data[formatting_index] = node;
|
1951
|
+
assert(node_index >= 0);
|
1889
1952
|
state->_open_elements.data[node_index] = node;
|
1890
|
-
// Step
|
1953
|
+
// Step 13.8.
|
1891
1954
|
if (last_node == furthest_block) {
|
1892
1955
|
bookmark = formatting_index + 1;
|
1893
|
-
|
1956
|
+
gumbo_debug("Bookmark moved to %d.\n", bookmark);
|
1957
|
+
assert((unsigned int) bookmark <= state->_active_formatting_elements.length);
|
1894
1958
|
}
|
1895
|
-
// Step
|
1959
|
+
// Step 13.9.
|
1896
1960
|
last_node->parse_flags |= GUMBO_INSERTION_ADOPTION_AGENCY_MOVED;
|
1897
1961
|
remove_from_parent(parser, last_node);
|
1898
1962
|
append_node(parser, node, last_node);
|
1899
|
-
// Step
|
1963
|
+
// Step 13.10.
|
1900
1964
|
last_node = node;
|
1901
|
-
}
|
1965
|
+
} // Step 13.11.
|
1902
1966
|
|
1903
|
-
// Step
|
1967
|
+
// Step 14.
|
1904
1968
|
gumbo_debug("Removing %s node from parent ",
|
1905
|
-
|
1969
|
+
gumbo_normalized_tagname(last_node->v.element.tag));
|
1906
1970
|
remove_from_parent(parser, last_node);
|
1907
1971
|
last_node->parse_flags |= GUMBO_INSERTION_ADOPTION_AGENCY_MOVED;
|
1908
|
-
|
1909
|
-
|
1910
|
-
|
1911
|
-
|
1912
|
-
|
1913
|
-
} else {
|
1914
|
-
gumbo_debug("and inserting it into %s.\n",
|
1915
|
-
gumbo_normalized_tagname(common_ancestor->v.element.tag));
|
1916
|
-
append_node(parser, common_ancestor, last_node);
|
1917
|
-
}
|
1972
|
+
InsertionLocation location =
|
1973
|
+
get_appropriate_insertion_location(parser, common_ancestor);
|
1974
|
+
gumbo_debug("and inserting it into %s.\n",
|
1975
|
+
gumbo_normalized_tagname(location.target->v.element.tag));
|
1976
|
+
insert_node(parser, last_node, location);
|
1918
1977
|
|
1919
|
-
// Step
|
1978
|
+
// Step 15.
|
1920
1979
|
GumboNode* new_formatting_node = clone_node(
|
1921
1980
|
parser, formatting_node, GUMBO_INSERTION_ADOPTION_AGENCY_CLONED);
|
1922
1981
|
formatting_node->parse_flags |= GUMBO_INSERTION_IMPLICIT_END_TAG;
|
1923
1982
|
|
1924
|
-
// Step
|
1983
|
+
// Step 16. Instead of appending nodes one-by-one, we swap the children
|
1925
1984
|
// vector of furthest_block with the empty children of new_formatting_node,
|
1926
1985
|
// reducing memory traffic and allocations. We still have to reset their
|
1927
1986
|
// parent pointers, though.
|
@@ -1931,15 +1990,15 @@ static bool adoption_agency_algorithm(
|
|
1931
1990
|
furthest_block->v.element.children = temp;
|
1932
1991
|
|
1933
1992
|
temp = new_formatting_node->v.element.children;
|
1934
|
-
for (int i = 0; i < temp.length; ++i) {
|
1993
|
+
for (unsigned int i = 0; i < temp.length; ++i) {
|
1935
1994
|
GumboNode* child = temp.data[i];
|
1936
1995
|
child->parent = new_formatting_node;
|
1937
1996
|
}
|
1938
1997
|
|
1939
|
-
// Step
|
1998
|
+
// Step 17.
|
1940
1999
|
append_node(parser, furthest_block, new_formatting_node);
|
1941
2000
|
|
1942
|
-
// Step
|
2001
|
+
// Step 18.
|
1943
2002
|
// If the formatting node was before the bookmark, it may shift over all
|
1944
2003
|
// indices after it, so we need to explicitly find the index and possibly
|
1945
2004
|
// adjust the bookmark.
|
@@ -1947,25 +2006,27 @@ static bool adoption_agency_algorithm(
|
|
1947
2006
|
&state->_active_formatting_elements, formatting_node);
|
1948
2007
|
assert(formatting_node_index != -1);
|
1949
2008
|
if (formatting_node_index < bookmark) {
|
2009
|
+
gumbo_debug(
|
2010
|
+
"Formatting node at %d is before bookmark at %d; decrementing.\n",
|
2011
|
+
formatting_node_index, bookmark);
|
1950
2012
|
--bookmark;
|
1951
2013
|
}
|
1952
2014
|
gumbo_vector_remove_at(
|
1953
2015
|
parser, formatting_node_index, &state->_active_formatting_elements);
|
1954
2016
|
assert(bookmark >= 0);
|
1955
|
-
assert(bookmark <= state->_active_formatting_elements.length);
|
2017
|
+
assert((unsigned int) bookmark <= state->_active_formatting_elements.length);
|
1956
2018
|
gumbo_vector_insert_at(parser, new_formatting_node, bookmark,
|
1957
|
-
|
2019
|
+
&state->_active_formatting_elements);
|
1958
2020
|
|
1959
|
-
// Step
|
1960
|
-
gumbo_vector_remove(
|
1961
|
-
|
1962
|
-
|
1963
|
-
&state->_open_elements, furthest_block) + 1;
|
2021
|
+
// Step 19.
|
2022
|
+
gumbo_vector_remove(parser, formatting_node, &state->_open_elements);
|
2023
|
+
int insert_at =
|
2024
|
+
gumbo_vector_index_of(&state->_open_elements, furthest_block) + 1;
|
1964
2025
|
assert(insert_at >= 0);
|
1965
|
-
assert(insert_at <= state->_open_elements.length);
|
2026
|
+
assert((unsigned int) insert_at <= state->_open_elements.length);
|
1966
2027
|
gumbo_vector_insert_at(
|
1967
2028
|
parser, new_formatting_node, insert_at, &state->_open_elements);
|
1968
|
-
}
|
2029
|
+
} // Step 20.
|
1969
2030
|
return true;
|
1970
2031
|
}
|
1971
2032
|
|
@@ -1988,17 +2049,19 @@ static void ignore_token(GumboParser* parser) {
|
|
1988
2049
|
|
1989
2050
|
// http://www.whatwg.org/specs/web-apps/current-work/complete/the-end.html
|
1990
2051
|
static void finish_parsing(GumboParser* parser) {
|
2052
|
+
gumbo_debug("Finishing parsing");
|
1991
2053
|
maybe_flush_text_node_buffer(parser);
|
1992
2054
|
GumboParserState* state = parser->_parser_state;
|
1993
2055
|
for (GumboNode* node = pop_current_node(parser); node;
|
1994
2056
|
node = pop_current_node(parser)) {
|
1995
|
-
if ((
|
1996
|
-
(
|
2057
|
+
if ((node_html_tag_is(node, GUMBO_TAG_BODY) && state->_closed_body_tag) ||
|
2058
|
+
(node_html_tag_is(node, GUMBO_TAG_HTML) && state->_closed_html_tag)) {
|
1997
2059
|
continue;
|
1998
2060
|
}
|
1999
2061
|
node->parse_flags |= GUMBO_INSERTION_IMPLICIT_END_TAG;
|
2000
2062
|
}
|
2001
|
-
while (pop_current_node(parser))
|
2063
|
+
while (pop_current_node(parser))
|
2064
|
+
; // Pop them all.
|
2002
2065
|
}
|
2003
2066
|
|
2004
2067
|
static bool handle_initial(GumboParser* parser, GumboToken* token) {
|
@@ -2042,9 +2105,9 @@ static bool handle_before_html(GumboParser* parser, GumboToken* token) {
|
|
2042
2105
|
parser->_output->root = html_node;
|
2043
2106
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_BEFORE_HEAD);
|
2044
2107
|
return true;
|
2045
|
-
} else if (token->type == GUMBO_TOKEN_END_TAG &&
|
2046
|
-
|
2047
|
-
|
2108
|
+
} else if (token->type == GUMBO_TOKEN_END_TAG &&
|
2109
|
+
!tag_in(token, false,
|
2110
|
+
(gumbo_tagset){TAG(HEAD), TAG(BODY), TAG(HTML), TAG(BR)})) {
|
2048
2111
|
parser_add_parse_error(parser, token);
|
2049
2112
|
ignore_token(parser);
|
2050
2113
|
return false;
|
@@ -2076,9 +2139,9 @@ static bool handle_before_head(GumboParser* parser, GumboToken* token) {
|
|
2076
2139
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_HEAD);
|
2077
2140
|
parser->_parser_state->_head_element = node;
|
2078
2141
|
return true;
|
2079
|
-
} else if (token->type == GUMBO_TOKEN_END_TAG &&
|
2080
|
-
|
2081
|
-
|
2142
|
+
} else if (token->type == GUMBO_TOKEN_END_TAG &&
|
2143
|
+
!tag_in(token, false,
|
2144
|
+
(gumbo_tagset){TAG(HEAD), TAG(BODY), TAG(HTML), TAG(BR)})) {
|
2082
2145
|
parser_add_parse_error(parser, token);
|
2083
2146
|
ignore_token(parser);
|
2084
2147
|
return false;
|
@@ -2110,9 +2173,9 @@ static bool handle_in_head(GumboParser* parser, GumboToken* token) {
|
|
2110
2173
|
return true;
|
2111
2174
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_HTML)) {
|
2112
2175
|
return handle_in_body(parser, token);
|
2113
|
-
} else if (tag_in(token, kStartTag,
|
2114
|
-
|
2115
|
-
|
2176
|
+
} else if (tag_in(token, kStartTag,
|
2177
|
+
(gumbo_tagset){TAG(BASE), TAG(BASEFONT), TAG(BGSOUND),
|
2178
|
+
TAG(MENUITEM), TAG(LINK)})) {
|
2116
2179
|
insert_element_from_token(parser, token);
|
2117
2180
|
pop_current_node(parser);
|
2118
2181
|
acknowledge_self_closing_tag(parser);
|
@@ -2129,8 +2192,8 @@ static bool handle_in_head(GumboParser* parser, GumboToken* token) {
|
|
2129
2192
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_TITLE)) {
|
2130
2193
|
run_generic_parsing_algorithm(parser, token, GUMBO_LEX_RCDATA);
|
2131
2194
|
return true;
|
2132
|
-
} else if (tag_in(
|
2133
|
-
|
2195
|
+
} else if (tag_in(
|
2196
|
+
token, kStartTag, (gumbo_tagset){TAG(NOFRAMES), TAG(STYLE)})) {
|
2134
2197
|
run_generic_parsing_algorithm(parser, token, GUMBO_LEX_RAWTEXT);
|
2135
2198
|
return true;
|
2136
2199
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_NOSCRIPT)) {
|
@@ -2143,32 +2206,51 @@ static bool handle_in_head(GumboParser* parser, GumboToken* token) {
|
|
2143
2206
|
} else if (tag_is(token, kEndTag, GUMBO_TAG_HEAD)) {
|
2144
2207
|
GumboNode* head = pop_current_node(parser);
|
2145
2208
|
AVOID_UNUSED_VARIABLE_WARNING(head);
|
2146
|
-
assert(
|
2209
|
+
assert(node_html_tag_is(head, GUMBO_TAG_HEAD));
|
2147
2210
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_AFTER_HEAD);
|
2148
2211
|
return true;
|
2149
|
-
} else if (
|
2150
|
-
|
2151
|
-
|
2152
|
-
|
2212
|
+
} else if (tag_in(token, kEndTag,
|
2213
|
+
(gumbo_tagset){TAG(BODY), TAG(HTML), TAG(BR)})) {
|
2214
|
+
pop_current_node(parser);
|
2215
|
+
set_insertion_mode(parser, GUMBO_INSERTION_MODE_AFTER_HEAD);
|
2216
|
+
parser->_parser_state->_reprocess_current_token = true;
|
2217
|
+
return true;
|
2218
|
+
} else if (tag_is(token, kStartTag, GUMBO_TAG_TEMPLATE)) {
|
2219
|
+
insert_element_from_token(parser, token);
|
2220
|
+
add_formatting_element(parser, &kActiveFormattingScopeMarker);
|
2221
|
+
parser->_parser_state->_frameset_ok = false;
|
2222
|
+
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TEMPLATE);
|
2223
|
+
push_template_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TEMPLATE);
|
2224
|
+
return true;
|
2225
|
+
} else if (tag_is(token, kEndTag, GUMBO_TAG_TEMPLATE)) {
|
2226
|
+
if (!has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
|
2227
|
+
parser_add_parse_error(parser, token);
|
2228
|
+
ignore_token(parser);
|
2229
|
+
return false;
|
2230
|
+
}
|
2231
|
+
generate_all_implied_end_tags_thoroughly(parser);
|
2232
|
+
bool success = true;
|
2233
|
+
if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_TEMPLATE)) {
|
2234
|
+
parser_add_parse_error(parser, token);
|
2235
|
+
success = false;
|
2236
|
+
}
|
2237
|
+
while (!node_html_tag_is(pop_current_node(parser), GUMBO_TAG_TEMPLATE))
|
2238
|
+
;
|
2239
|
+
clear_active_formatting_elements(parser);
|
2240
|
+
pop_template_insertion_mode(parser);
|
2241
|
+
reset_insertion_mode_appropriately(parser);
|
2242
|
+
return success;
|
2153
2243
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_HEAD) ||
|
2154
|
-
(token->type == GUMBO_TOKEN_END_TAG
|
2155
|
-
!tag_in(token, kEndTag, GUMBO_TAG_BODY, GUMBO_TAG_HTML,
|
2156
|
-
GUMBO_TAG_BR, GUMBO_TAG_LAST))) {
|
2157
|
-
parser_add_parse_error(parser, token);
|
2158
|
-
return false;
|
2159
|
-
} else if (tag_is(token, kStartTag, GUMBO_TAG_UNKNOWN) && token->v.start_tag.is_self_closing) {
|
2244
|
+
(token->type == GUMBO_TOKEN_END_TAG)) {
|
2160
2245
|
parser_add_parse_error(parser, token);
|
2161
2246
|
ignore_token(parser);
|
2162
2247
|
return false;
|
2163
2248
|
} else {
|
2164
|
-
|
2165
|
-
assert(node_tag_is(node, GUMBO_TAG_HEAD));
|
2166
|
-
AVOID_UNUSED_VARIABLE_WARNING(node);
|
2249
|
+
pop_current_node(parser);
|
2167
2250
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_AFTER_HEAD);
|
2168
2251
|
parser->_parser_state->_reprocess_current_token = true;
|
2169
2252
|
return true;
|
2170
2253
|
}
|
2171
|
-
|
2172
2254
|
return true;
|
2173
2255
|
}
|
2174
2256
|
|
@@ -2181,27 +2263,27 @@ static bool handle_in_head_noscript(GumboParser* parser, GumboToken* token) {
|
|
2181
2263
|
return handle_in_body(parser, token);
|
2182
2264
|
} else if (tag_is(token, kEndTag, GUMBO_TAG_NOSCRIPT)) {
|
2183
2265
|
const GumboNode* node = pop_current_node(parser);
|
2184
|
-
assert(
|
2266
|
+
assert(node_html_tag_is(node, GUMBO_TAG_NOSCRIPT));
|
2185
2267
|
AVOID_UNUSED_VARIABLE_WARNING(node);
|
2186
2268
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_HEAD);
|
2187
2269
|
return true;
|
2188
2270
|
} else if (token->type == GUMBO_TOKEN_WHITESPACE ||
|
2189
2271
|
token->type == GUMBO_TOKEN_COMMENT ||
|
2190
|
-
tag_in(token, kStartTag,
|
2191
|
-
|
2192
|
-
|
2272
|
+
tag_in(token, kStartTag,
|
2273
|
+
(gumbo_tagset){TAG(BASEFONT), TAG(BGSOUND), TAG(LINK),
|
2274
|
+
TAG(META), TAG(NOFRAMES), TAG(STYLE)})) {
|
2193
2275
|
return handle_in_head(parser, token);
|
2194
|
-
} else if (tag_in(
|
2195
|
-
|
2196
|
-
|
2197
|
-
|
2276
|
+
} else if (tag_in(
|
2277
|
+
token, kStartTag, (gumbo_tagset){TAG(HEAD), TAG(NOSCRIPT)}) ||
|
2278
|
+
(token->type == GUMBO_TOKEN_END_TAG &&
|
2279
|
+
!tag_is(token, kEndTag, GUMBO_TAG_BR))) {
|
2198
2280
|
parser_add_parse_error(parser, token);
|
2199
2281
|
ignore_token(parser);
|
2200
2282
|
return false;
|
2201
2283
|
} else {
|
2202
2284
|
parser_add_parse_error(parser, token);
|
2203
2285
|
const GumboNode* node = pop_current_node(parser);
|
2204
|
-
assert(
|
2286
|
+
assert(node_html_tag_is(node, GUMBO_TAG_NOSCRIPT));
|
2205
2287
|
AVOID_UNUSED_VARIABLE_WARNING(node);
|
2206
2288
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_HEAD);
|
2207
2289
|
parser->_parser_state->_reprocess_current_token = true;
|
@@ -2233,10 +2315,10 @@ static bool handle_after_head(GumboParser* parser, GumboToken* token) {
|
|
2233
2315
|
insert_element_from_token(parser, token);
|
2234
2316
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_FRAMESET);
|
2235
2317
|
return true;
|
2236
|
-
} else if (tag_in(token, kStartTag,
|
2237
|
-
|
2238
|
-
|
2239
|
-
|
2318
|
+
} else if (tag_in(token, kStartTag,
|
2319
|
+
(gumbo_tagset){TAG(BASE), TAG(BASEFONT), TAG(BGSOUND),
|
2320
|
+
TAG(LINK), TAG(META), TAG(NOFRAMES), TAG(SCRIPT),
|
2321
|
+
TAG(STYLE), TAG(TEMPLATE), TAG(TITLE)})) {
|
2240
2322
|
parser_add_parse_error(parser, token);
|
2241
2323
|
assert(state->_head_element != NULL);
|
2242
2324
|
// This must be flushed before we push the head element on, as there may be
|
@@ -2246,10 +2328,12 @@ static bool handle_after_head(GumboParser* parser, GumboToken* token) {
|
|
2246
2328
|
bool result = handle_in_head(parser, token);
|
2247
2329
|
gumbo_vector_remove(parser, state->_head_element, &state->_open_elements);
|
2248
2330
|
return result;
|
2331
|
+
} else if (tag_is(token, kEndTag, GUMBO_TAG_TEMPLATE)) {
|
2332
|
+
return handle_in_head(parser, token);
|
2249
2333
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_HEAD) ||
|
2250
|
-
|
2251
|
-
|
2252
|
-
|
2334
|
+
(token->type == GUMBO_TOKEN_END_TAG &&
|
2335
|
+
!tag_in(token, kEndTag,
|
2336
|
+
(gumbo_tagset){TAG(BODY), TAG(HTML), TAG(BR)}))) {
|
2253
2337
|
parser_add_parse_error(parser, token);
|
2254
2338
|
ignore_token(parser);
|
2255
2339
|
return false;
|
@@ -2263,24 +2347,23 @@ static bool handle_after_head(GumboParser* parser, GumboToken* token) {
|
|
2263
2347
|
|
2264
2348
|
static void destroy_node(GumboParser* parser, GumboNode* node) {
|
2265
2349
|
switch (node->type) {
|
2266
|
-
case GUMBO_NODE_DOCUMENT:
|
2267
|
-
|
2268
|
-
|
2269
|
-
|
2270
|
-
destroy_node(parser, doc->children.data[i]);
|
2271
|
-
}
|
2272
|
-
gumbo_parser_deallocate(parser, (void*) doc->children.data);
|
2273
|
-
gumbo_parser_deallocate(parser, (void*) doc->name);
|
2274
|
-
gumbo_parser_deallocate(parser, (void*) doc->public_identifier);
|
2275
|
-
gumbo_parser_deallocate(parser, (void*) doc->system_identifier);
|
2350
|
+
case GUMBO_NODE_DOCUMENT: {
|
2351
|
+
GumboDocument* doc = &node->v.document;
|
2352
|
+
for (unsigned int i = 0; i < doc->children.length; ++i) {
|
2353
|
+
destroy_node(parser, doc->children.data[i]);
|
2276
2354
|
}
|
2277
|
-
|
2355
|
+
gumbo_parser_deallocate(parser, (void*) doc->children.data);
|
2356
|
+
gumbo_parser_deallocate(parser, (void*) doc->name);
|
2357
|
+
gumbo_parser_deallocate(parser, (void*) doc->public_identifier);
|
2358
|
+
gumbo_parser_deallocate(parser, (void*) doc->system_identifier);
|
2359
|
+
} break;
|
2360
|
+
case GUMBO_NODE_TEMPLATE:
|
2278
2361
|
case GUMBO_NODE_ELEMENT:
|
2279
|
-
for (int i = 0; i < node->v.element.attributes.length; ++i) {
|
2362
|
+
for (unsigned int i = 0; i < node->v.element.attributes.length; ++i) {
|
2280
2363
|
gumbo_destroy_attribute(parser, node->v.element.attributes.data[i]);
|
2281
2364
|
}
|
2282
2365
|
gumbo_parser_deallocate(parser, node->v.element.attributes.data);
|
2283
|
-
for (int i = 0; i < node->v.element.children.length; ++i) {
|
2366
|
+
for (unsigned int i = 0; i < node->v.element.children.length; ++i) {
|
2284
2367
|
destroy_node(parser, node->v.element.children.data[i]);
|
2285
2368
|
}
|
2286
2369
|
gumbo_parser_deallocate(parser, node->v.element.children.data);
|
@@ -2307,7 +2390,8 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2307
2390
|
reconstruct_active_formatting_elements(parser);
|
2308
2391
|
insert_text_token(parser, token);
|
2309
2392
|
return true;
|
2310
|
-
} else if (token->type == GUMBO_TOKEN_CHARACTER
|
2393
|
+
} else if (token->type == GUMBO_TOKEN_CHARACTER ||
|
2394
|
+
token->type == GUMBO_TOKEN_CDATA) {
|
2311
2395
|
reconstruct_active_formatting_elements(parser);
|
2312
2396
|
insert_text_token(parser, token);
|
2313
2397
|
set_frameset_not_ok(parser);
|
@@ -2320,20 +2404,26 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2320
2404
|
ignore_token(parser);
|
2321
2405
|
return false;
|
2322
2406
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_HTML)) {
|
2407
|
+
parser_add_parse_error(parser, token);
|
2408
|
+
if (has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
|
2409
|
+
ignore_token(parser);
|
2410
|
+
return false;
|
2411
|
+
}
|
2323
2412
|
assert(parser->_output->root != NULL);
|
2324
2413
|
assert(parser->_output->root->type == GUMBO_NODE_ELEMENT);
|
2325
|
-
parser_add_parse_error(parser, token);
|
2326
2414
|
merge_attributes(parser, token, parser->_output->root);
|
2327
2415
|
return false;
|
2328
|
-
} else if (tag_in(token, kStartTag,
|
2329
|
-
|
2330
|
-
|
2331
|
-
|
2416
|
+
} else if (tag_in(token, kStartTag,
|
2417
|
+
(gumbo_tagset){TAG(BASE), TAG(BASEFONT), TAG(BGSOUND),
|
2418
|
+
TAG(MENUITEM), TAG(LINK), TAG(META), TAG(NOFRAMES),
|
2419
|
+
TAG(SCRIPT), TAG(STYLE), TAG(TEMPLATE), TAG(TITLE)}) ||
|
2420
|
+
tag_is(token, kEndTag, GUMBO_TAG_TEMPLATE)) {
|
2332
2421
|
return handle_in_head(parser, token);
|
2333
2422
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_BODY)) {
|
2334
2423
|
parser_add_parse_error(parser, token);
|
2335
2424
|
if (state->_open_elements.length < 2 ||
|
2336
|
-
!
|
2425
|
+
!node_html_tag_is(state->_open_elements.data[1], GUMBO_TAG_BODY) ||
|
2426
|
+
has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
|
2337
2427
|
ignore_token(parser);
|
2338
2428
|
return false;
|
2339
2429
|
}
|
@@ -2343,7 +2433,7 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2343
2433
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_FRAMESET)) {
|
2344
2434
|
parser_add_parse_error(parser, token);
|
2345
2435
|
if (state->_open_elements.length < 2 ||
|
2346
|
-
!
|
2436
|
+
!node_html_tag_is(state->_open_elements.data[1], GUMBO_TAG_BODY) ||
|
2347
2437
|
!state->_frameset_ok) {
|
2348
2438
|
ignore_token(parser);
|
2349
2439
|
return false;
|
@@ -2367,7 +2457,7 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2367
2457
|
// Remove the body node. We may want to factor this out into a generic
|
2368
2458
|
// helper, but right now this is the only code that needs to do this.
|
2369
2459
|
GumboVector* children = &parser->_output->root->v.element.children;
|
2370
|
-
for (int i = 0; i < children->length; ++i) {
|
2460
|
+
for (unsigned int i = 0; i < children->length; ++i) {
|
2371
2461
|
if (children->data[i] == body_node) {
|
2372
2462
|
gumbo_vector_remove_at(parser, i, children);
|
2373
2463
|
break;
|
@@ -2380,33 +2470,32 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2380
2470
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_FRAMESET);
|
2381
2471
|
return true;
|
2382
2472
|
} else if (token->type == GUMBO_TOKEN_EOF) {
|
2383
|
-
for (int i = 0; i < state->_open_elements.length; ++i) {
|
2384
|
-
if (!
|
2385
|
-
|
2386
|
-
|
2387
|
-
|
2388
|
-
GUMBO_TAG_HTML, GUMBO_TAG_LAST)) {
|
2473
|
+
for (unsigned int i = 0; i < state->_open_elements.length; ++i) {
|
2474
|
+
if (!node_tag_in_set(state->_open_elements.data[i],
|
2475
|
+
(gumbo_tagset){TAG(DD), TAG(DT), TAG(LI), TAG(P), TAG(TBODY),
|
2476
|
+
TAG(TD), TAG(TFOOT), TAG(TH), TAG(THEAD), TAG(TR), TAG(BODY),
|
2477
|
+
TAG(HTML)})) {
|
2389
2478
|
parser_add_parse_error(parser, token);
|
2390
|
-
return false;
|
2391
2479
|
}
|
2392
2480
|
}
|
2481
|
+
if (get_current_template_insertion_mode(parser) !=
|
2482
|
+
GUMBO_INSERTION_MODE_INITIAL) {
|
2483
|
+
return handle_in_template(parser, token);
|
2484
|
+
}
|
2393
2485
|
return true;
|
2394
|
-
} else if (tag_in(token, kEndTag,
|
2395
|
-
GUMBO_TAG_LAST)) {
|
2486
|
+
} else if (tag_in(token, kEndTag, (gumbo_tagset){TAG(BODY), TAG(HTML)})) {
|
2396
2487
|
if (!has_an_element_in_scope(parser, GUMBO_TAG_BODY)) {
|
2397
2488
|
parser_add_parse_error(parser, token);
|
2398
2489
|
ignore_token(parser);
|
2399
2490
|
return false;
|
2400
2491
|
}
|
2401
2492
|
bool success = true;
|
2402
|
-
for (int i = 0; i < state->_open_elements.length; ++i) {
|
2403
|
-
if (!
|
2404
|
-
|
2405
|
-
|
2406
|
-
|
2407
|
-
|
2408
|
-
GUMBO_TAG_TR, GUMBO_TAG_BODY, GUMBO_TAG_HTML,
|
2409
|
-
GUMBO_TAG_LAST)) {
|
2493
|
+
for (unsigned int i = 0; i < state->_open_elements.length; ++i) {
|
2494
|
+
if (!node_tag_in_set(state->_open_elements.data[i],
|
2495
|
+
(gumbo_tagset){TAG(DD), TAG(DT), TAG(LI), TAG(OPTGROUP),
|
2496
|
+
TAG(OPTION), TAG(P), TAG(RB), TAG(RP), TAG(RT), TAG(RTC),
|
2497
|
+
TAG(TBODY), TAG(TD), TAG(TFOOT), TAG(TH), TAG(THEAD), TAG(TR),
|
2498
|
+
TAG(BODY), TAG(HTML)})) {
|
2410
2499
|
parser_add_parse_error(parser, token);
|
2411
2500
|
success = false;
|
2412
2501
|
break;
|
@@ -2417,58 +2506,58 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2417
2506
|
parser->_parser_state->_reprocess_current_token = true;
|
2418
2507
|
} else {
|
2419
2508
|
GumboNode* body = state->_open_elements.data[1];
|
2420
|
-
assert(
|
2509
|
+
assert(node_html_tag_is(body, GUMBO_TAG_BODY));
|
2421
2510
|
record_end_of_element(state->_current_token, &body->v.element);
|
2422
2511
|
}
|
2423
2512
|
return success;
|
2424
|
-
} else if (tag_in(token, kStartTag,
|
2425
|
-
|
2426
|
-
|
2427
|
-
|
2428
|
-
|
2429
|
-
|
2430
|
-
|
2431
|
-
GUMBO_TAG_SUMMARY, GUMBO_TAG_UL, GUMBO_TAG_LAST)) {
|
2513
|
+
} else if (tag_in(token, kStartTag,
|
2514
|
+
(gumbo_tagset){TAG(ADDRESS), TAG(ARTICLE), TAG(ASIDE),
|
2515
|
+
TAG(BLOCKQUOTE), TAG(CENTER), TAG(DETAILS), TAG(DIR),
|
2516
|
+
TAG(DIV), TAG(DL), TAG(FIELDSET), TAG(FIGCAPTION),
|
2517
|
+
TAG(FIGURE), TAG(FOOTER), TAG(HEADER), TAG(HGROUP),
|
2518
|
+
TAG(MENU), TAG(MAIN), TAG(NAV), TAG(OL), TAG(P),
|
2519
|
+
TAG(SECTION), TAG(SUMMARY), TAG(UL)})) {
|
2432
2520
|
bool result = maybe_implicitly_close_p_tag(parser, token);
|
2433
2521
|
insert_element_from_token(parser, token);
|
2434
2522
|
return result;
|
2435
|
-
} else if (tag_in(token, kStartTag,
|
2436
|
-
|
2523
|
+
} else if (tag_in(token, kStartTag, (gumbo_tagset){TAG(H1), TAG(H2), TAG(H3),
|
2524
|
+
TAG(H4), TAG(H5), TAG(H6)})) {
|
2437
2525
|
bool result = maybe_implicitly_close_p_tag(parser, token);
|
2438
|
-
if (
|
2439
|
-
|
2440
|
-
|
2526
|
+
if (node_tag_in_set(
|
2527
|
+
get_current_node(parser), (gumbo_tagset){TAG(H1), TAG(H2), TAG(H3),
|
2528
|
+
TAG(H4), TAG(H5), TAG(H6)})) {
|
2441
2529
|
parser_add_parse_error(parser, token);
|
2442
2530
|
pop_current_node(parser);
|
2443
2531
|
result = false;
|
2444
2532
|
}
|
2445
2533
|
insert_element_from_token(parser, token);
|
2446
2534
|
return result;
|
2447
|
-
} else if (tag_in(token, kStartTag,
|
2448
|
-
GUMBO_TAG_LAST)) {
|
2535
|
+
} else if (tag_in(token, kStartTag, (gumbo_tagset){TAG(PRE), TAG(LISTING)})) {
|
2449
2536
|
bool result = maybe_implicitly_close_p_tag(parser, token);
|
2450
2537
|
insert_element_from_token(parser, token);
|
2451
2538
|
state->_ignore_next_linefeed = true;
|
2452
2539
|
state->_frameset_ok = false;
|
2453
2540
|
return result;
|
2454
2541
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_FORM)) {
|
2455
|
-
if (state->_form_element != NULL
|
2542
|
+
if (state->_form_element != NULL &&
|
2543
|
+
!has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
|
2456
2544
|
gumbo_debug("Ignoring nested form.\n");
|
2457
2545
|
parser_add_parse_error(parser, token);
|
2458
2546
|
ignore_token(parser);
|
2459
2547
|
return false;
|
2460
2548
|
}
|
2461
2549
|
bool result = maybe_implicitly_close_p_tag(parser, token);
|
2462
|
-
|
2463
|
-
|
2550
|
+
GumboNode* form_element = insert_element_from_token(parser, token);
|
2551
|
+
if (!has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
|
2552
|
+
state->_form_element = form_element;
|
2553
|
+
}
|
2464
2554
|
return result;
|
2465
2555
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_LI)) {
|
2466
2556
|
maybe_implicitly_close_list_tag(parser, token, true);
|
2467
2557
|
bool result = maybe_implicitly_close_p_tag(parser, token);
|
2468
2558
|
insert_element_from_token(parser, token);
|
2469
2559
|
return result;
|
2470
|
-
} else if (tag_in(token, kStartTag,
|
2471
|
-
GUMBO_TAG_LAST)) {
|
2560
|
+
} else if (tag_in(token, kStartTag, (gumbo_tagset){TAG(DD), TAG(DT)})) {
|
2472
2561
|
maybe_implicitly_close_list_tag(parser, token, false);
|
2473
2562
|
bool result = maybe_implicitly_close_p_tag(parser, token);
|
2474
2563
|
insert_element_from_token(parser, token);
|
@@ -2481,7 +2570,8 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2481
2570
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_BUTTON)) {
|
2482
2571
|
if (has_an_element_in_scope(parser, GUMBO_TAG_BUTTON)) {
|
2483
2572
|
parser_add_parse_error(parser, token);
|
2484
|
-
implicitly_close_tags(
|
2573
|
+
implicitly_close_tags(
|
2574
|
+
parser, token, GUMBO_NAMESPACE_HTML, GUMBO_TAG_BUTTON);
|
2485
2575
|
state->_reprocess_current_token = true;
|
2486
2576
|
return false;
|
2487
2577
|
}
|
@@ -2489,67 +2579,83 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2489
2579
|
insert_element_from_token(parser, token);
|
2490
2580
|
state->_frameset_ok = false;
|
2491
2581
|
return true;
|
2492
|
-
} else if (tag_in(token, kEndTag,
|
2493
|
-
|
2494
|
-
|
2495
|
-
|
2496
|
-
|
2497
|
-
|
2498
|
-
|
2499
|
-
GUMBO_TAG_SECTION, GUMBO_TAG_SUMMARY, GUMBO_TAG_UL,
|
2500
|
-
GUMBO_TAG_LAST)) {
|
2582
|
+
} else if (tag_in(token, kEndTag,
|
2583
|
+
(gumbo_tagset){TAG(ADDRESS), TAG(ARTICLE), TAG(ASIDE),
|
2584
|
+
TAG(BLOCKQUOTE), TAG(BUTTON), TAG(CENTER), TAG(DETAILS),
|
2585
|
+
TAG(DIR), TAG(DIV), TAG(DL), TAG(FIELDSET),
|
2586
|
+
TAG(FIGCAPTION), TAG(FIGURE), TAG(FOOTER), TAG(HEADER),
|
2587
|
+
TAG(HGROUP), TAG(LISTING), TAG(MAIN), TAG(MENU), TAG(NAV),
|
2588
|
+
TAG(OL), TAG(PRE), TAG(SECTION), TAG(SUMMARY), TAG(UL)})) {
|
2501
2589
|
GumboTag tag = token->v.end_tag;
|
2502
2590
|
if (!has_an_element_in_scope(parser, tag)) {
|
2503
2591
|
parser_add_parse_error(parser, token);
|
2504
2592
|
ignore_token(parser);
|
2505
2593
|
return false;
|
2506
2594
|
}
|
2507
|
-
implicitly_close_tags(
|
2595
|
+
implicitly_close_tags(
|
2596
|
+
parser, token, GUMBO_NAMESPACE_HTML, token->v.end_tag);
|
2508
2597
|
return true;
|
2509
2598
|
} else if (tag_is(token, kEndTag, GUMBO_TAG_FORM)) {
|
2510
|
-
|
2511
|
-
|
2512
|
-
|
2513
|
-
|
2514
|
-
|
2515
|
-
|
2516
|
-
|
2517
|
-
|
2518
|
-
|
2519
|
-
|
2520
|
-
|
2521
|
-
|
2522
|
-
|
2523
|
-
|
2524
|
-
|
2525
|
-
|
2526
|
-
|
2599
|
+
if (has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
|
2600
|
+
if (!has_an_element_in_scope(parser, GUMBO_TAG_FORM)) {
|
2601
|
+
parser_add_parse_error(parser, token);
|
2602
|
+
ignore_token(parser);
|
2603
|
+
return false;
|
2604
|
+
}
|
2605
|
+
bool success = true;
|
2606
|
+
generate_implied_end_tags(parser, GUMBO_TAG_LAST);
|
2607
|
+
if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_FORM)) {
|
2608
|
+
parser_add_parse_error(parser, token);
|
2609
|
+
return false;
|
2610
|
+
}
|
2611
|
+
while (!node_html_tag_is(pop_current_node(parser), GUMBO_TAG_FORM))
|
2612
|
+
;
|
2613
|
+
return success;
|
2614
|
+
} else {
|
2615
|
+
bool result = true;
|
2616
|
+
const GumboNode* node = state->_form_element;
|
2617
|
+
assert(!node || node->type == GUMBO_NODE_ELEMENT);
|
2618
|
+
state->_form_element = NULL;
|
2619
|
+
if (!node || !has_node_in_scope(parser, node)) {
|
2620
|
+
gumbo_debug("Closing an unopened form.\n");
|
2621
|
+
parser_add_parse_error(parser, token);
|
2622
|
+
ignore_token(parser);
|
2623
|
+
return false;
|
2624
|
+
}
|
2625
|
+
// This differs from implicitly_close_tags because we remove *only* the
|
2626
|
+
// <form> element; other nodes are left in scope.
|
2627
|
+
generate_implied_end_tags(parser, GUMBO_TAG_LAST);
|
2628
|
+
if (get_current_node(parser) != node) {
|
2629
|
+
parser_add_parse_error(parser, token);
|
2630
|
+
result = false;
|
2631
|
+
}
|
2527
2632
|
|
2528
|
-
|
2529
|
-
|
2530
|
-
|
2531
|
-
|
2532
|
-
|
2533
|
-
|
2633
|
+
GumboVector* open_elements = &state->_open_elements;
|
2634
|
+
int index = gumbo_vector_index_of(open_elements, node);
|
2635
|
+
assert(index >= 0);
|
2636
|
+
gumbo_vector_remove_at(parser, index, open_elements);
|
2637
|
+
return result;
|
2638
|
+
}
|
2534
2639
|
} else if (tag_is(token, kEndTag, GUMBO_TAG_P)) {
|
2535
2640
|
if (!has_an_element_in_button_scope(parser, GUMBO_TAG_P)) {
|
2536
2641
|
parser_add_parse_error(parser, token);
|
2537
|
-
reconstruct_active_formatting_elements(parser);
|
2642
|
+
// reconstruct_active_formatting_elements(parser);
|
2538
2643
|
insert_element_of_tag_type(
|
2539
2644
|
parser, GUMBO_TAG_P, GUMBO_INSERTION_CONVERTED_FROM_END_TAG);
|
2540
2645
|
state->_reprocess_current_token = true;
|
2541
2646
|
return false;
|
2542
2647
|
}
|
2543
|
-
return implicitly_close_tags(
|
2648
|
+
return implicitly_close_tags(
|
2649
|
+
parser, token, GUMBO_NAMESPACE_HTML, GUMBO_TAG_P);
|
2544
2650
|
} else if (tag_is(token, kEndTag, GUMBO_TAG_LI)) {
|
2545
2651
|
if (!has_an_element_in_list_scope(parser, GUMBO_TAG_LI)) {
|
2546
2652
|
parser_add_parse_error(parser, token);
|
2547
2653
|
ignore_token(parser);
|
2548
2654
|
return false;
|
2549
2655
|
}
|
2550
|
-
return implicitly_close_tags(
|
2551
|
-
|
2552
|
-
|
2656
|
+
return implicitly_close_tags(
|
2657
|
+
parser, token, GUMBO_NAMESPACE_HTML, GUMBO_TAG_LI);
|
2658
|
+
} else if (tag_in(token, kEndTag, (gumbo_tagset){TAG(DD), TAG(DT)})) {
|
2553
2659
|
assert(token->type == GUMBO_TOKEN_END_TAG);
|
2554
2660
|
GumboTag token_tag = token->v.end_tag;
|
2555
2661
|
if (!has_an_element_in_scope(parser, token_tag)) {
|
@@ -2557,12 +2663,13 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2557
2663
|
ignore_token(parser);
|
2558
2664
|
return false;
|
2559
2665
|
}
|
2560
|
-
return implicitly_close_tags(
|
2561
|
-
|
2562
|
-
|
2666
|
+
return implicitly_close_tags(
|
2667
|
+
parser, token, GUMBO_NAMESPACE_HTML, token_tag);
|
2668
|
+
} else if (tag_in(token, kEndTag, (gumbo_tagset){TAG(H1), TAG(H2), TAG(H3),
|
2669
|
+
TAG(H4), TAG(H5), TAG(H6)})) {
|
2563
2670
|
if (!has_an_element_in_scope_with_tagname(
|
2564
|
-
parser, GUMBO_TAG_H1, GUMBO_TAG_H2, GUMBO_TAG_H3,
|
2565
|
-
|
2671
|
+
parser, 6, (GumboTag[]){GUMBO_TAG_H1, GUMBO_TAG_H2, GUMBO_TAG_H3,
|
2672
|
+
GUMBO_TAG_H4, GUMBO_TAG_H5, GUMBO_TAG_H6})) {
|
2566
2673
|
// No heading open; ignore the token entirely.
|
2567
2674
|
parser_add_parse_error(parser, token);
|
2568
2675
|
ignore_token(parser);
|
@@ -2570,7 +2677,7 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2570
2677
|
} else {
|
2571
2678
|
generate_implied_end_tags(parser, GUMBO_TAG_LAST);
|
2572
2679
|
const GumboNode* current_node = get_current_node(parser);
|
2573
|
-
bool success =
|
2680
|
+
bool success = node_html_tag_is(current_node, token->v.end_tag);
|
2574
2681
|
if (!success) {
|
2575
2682
|
// There're children of the heading currently open; close them below and
|
2576
2683
|
// record a parse error.
|
@@ -2580,9 +2687,9 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2580
2687
|
}
|
2581
2688
|
do {
|
2582
2689
|
current_node = pop_current_node(parser);
|
2583
|
-
} while (!
|
2584
|
-
|
2585
|
-
|
2690
|
+
} while (!node_tag_in_set(
|
2691
|
+
current_node, (gumbo_tagset){TAG(H1), TAG(H2), TAG(H3),
|
2692
|
+
TAG(H4), TAG(H5), TAG(H6)}));
|
2586
2693
|
return success;
|
2587
2694
|
}
|
2588
2695
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_A)) {
|
@@ -2600,19 +2707,17 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2600
2707
|
if (find_last_anchor_index(parser, &last_a)) {
|
2601
2708
|
void* last_element = gumbo_vector_remove_at(
|
2602
2709
|
parser, last_a, &state->_active_formatting_elements);
|
2603
|
-
gumbo_vector_remove(
|
2604
|
-
parser, last_element, &state->_open_elements);
|
2710
|
+
gumbo_vector_remove(parser, last_element, &state->_open_elements);
|
2605
2711
|
}
|
2606
2712
|
success = false;
|
2607
2713
|
}
|
2608
2714
|
reconstruct_active_formatting_elements(parser);
|
2609
2715
|
add_formatting_element(parser, insert_element_from_token(parser, token));
|
2610
2716
|
return success;
|
2611
|
-
} else if (tag_in(token, kStartTag,
|
2612
|
-
|
2613
|
-
|
2614
|
-
|
2615
|
-
GUMBO_TAG_LAST)) {
|
2717
|
+
} else if (tag_in(token, kStartTag,
|
2718
|
+
(gumbo_tagset){TAG(B), TAG(BIG), TAG(CODE), TAG(EM), TAG(FONT),
|
2719
|
+
TAG(I), TAG(S), TAG(SMALL), TAG(STRIKE), TAG(STRONG),
|
2720
|
+
TAG(TT), TAG(U)})) {
|
2616
2721
|
reconstruct_active_formatting_elements(parser);
|
2617
2722
|
add_formatting_element(parser, insert_element_from_token(parser, token));
|
2618
2723
|
return true;
|
@@ -2628,28 +2733,27 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2628
2733
|
insert_element_from_token(parser, token);
|
2629
2734
|
add_formatting_element(parser, get_current_node(parser));
|
2630
2735
|
return result;
|
2631
|
-
} else if (tag_in(token, kEndTag,
|
2632
|
-
|
2633
|
-
|
2634
|
-
|
2635
|
-
GUMBO_TAG_U, GUMBO_TAG_LAST)) {
|
2736
|
+
} else if (tag_in(token, kEndTag,
|
2737
|
+
(gumbo_tagset){TAG(A), TAG(B), TAG(BIG), TAG(CODE), TAG(EM),
|
2738
|
+
TAG(FONT), TAG(I), TAG(NOBR), TAG(S), TAG(SMALL),
|
2739
|
+
TAG(STRIKE), TAG(STRONG), TAG(TT), TAG(U)})) {
|
2636
2740
|
return adoption_agency_algorithm(parser, token, token->v.end_tag);
|
2637
|
-
} else if (tag_in(token, kStartTag,
|
2638
|
-
|
2741
|
+
} else if (tag_in(token, kStartTag,
|
2742
|
+
(gumbo_tagset){TAG(APPLET), TAG(MARQUEE), TAG(OBJECT)})) {
|
2639
2743
|
reconstruct_active_formatting_elements(parser);
|
2640
2744
|
insert_element_from_token(parser, token);
|
2641
2745
|
add_formatting_element(parser, &kActiveFormattingScopeMarker);
|
2642
2746
|
set_frameset_not_ok(parser);
|
2643
2747
|
return true;
|
2644
|
-
} else if (tag_in(token, kEndTag,
|
2645
|
-
|
2748
|
+
} else if (tag_in(token, kEndTag,
|
2749
|
+
(gumbo_tagset){TAG(APPLET), TAG(MARQUEE), TAG(OBJECT)})) {
|
2646
2750
|
GumboTag token_tag = token->v.end_tag;
|
2647
2751
|
if (!has_an_element_in_table_scope(parser, token_tag)) {
|
2648
2752
|
parser_add_parse_error(parser, token);
|
2649
2753
|
ignore_token(parser);
|
2650
2754
|
return false;
|
2651
2755
|
}
|
2652
|
-
implicitly_close_tags(parser, token, token_tag);
|
2756
|
+
implicitly_close_tags(parser, token, GUMBO_NAMESPACE_HTML, token_tag);
|
2653
2757
|
clear_active_formatting_elements(parser);
|
2654
2758
|
return true;
|
2655
2759
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_TABLE)) {
|
@@ -2661,9 +2765,9 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2661
2765
|
set_frameset_not_ok(parser);
|
2662
2766
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
|
2663
2767
|
return true;
|
2664
|
-
} else if (tag_in(token, kStartTag,
|
2665
|
-
|
2666
|
-
|
2768
|
+
} else if (tag_in(token, kStartTag,
|
2769
|
+
(gumbo_tagset){TAG(AREA), TAG(BR), TAG(EMBED), TAG(IMG),
|
2770
|
+
TAG(IMAGE), TAG(KEYGEN), TAG(WBR)})) {
|
2667
2771
|
bool success = true;
|
2668
2772
|
if (tag_is(token, kStartTag, GUMBO_TAG_IMAGE)) {
|
2669
2773
|
success = false;
|
@@ -2693,8 +2797,8 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2693
2797
|
pop_current_node(parser);
|
2694
2798
|
acknowledge_self_closing_tag(parser);
|
2695
2799
|
return true;
|
2696
|
-
} else if (tag_in(token, kStartTag,
|
2697
|
-
|
2800
|
+
} else if (tag_in(token, kStartTag,
|
2801
|
+
(gumbo_tagset){TAG(PARAM), TAG(SOURCE), TAG(TRACK)})) {
|
2698
2802
|
insert_element_from_token(parser, token);
|
2699
2803
|
pop_current_node(parser);
|
2700
2804
|
acknowledge_self_closing_tag(parser);
|
@@ -2708,7 +2812,8 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2708
2812
|
return result;
|
2709
2813
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_ISINDEX)) {
|
2710
2814
|
parser_add_parse_error(parser, token);
|
2711
|
-
if (parser->_parser_state->_form_element != NULL
|
2815
|
+
if (parser->_parser_state->_form_element != NULL &&
|
2816
|
+
!has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
|
2712
2817
|
ignore_token(parser);
|
2713
2818
|
return false;
|
2714
2819
|
}
|
@@ -2723,15 +2828,18 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2723
2828
|
|
2724
2829
|
GumboNode* form = insert_element_of_tag_type(
|
2725
2830
|
parser, GUMBO_TAG_FORM, GUMBO_INSERTION_FROM_ISINDEX);
|
2831
|
+
if (!has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
|
2832
|
+
parser->_parser_state->_form_element = form;
|
2833
|
+
}
|
2726
2834
|
if (action_attr) {
|
2727
2835
|
gumbo_vector_add(parser, action_attr, &form->v.element.attributes);
|
2728
2836
|
}
|
2729
|
-
insert_element_of_tag_type(
|
2730
|
-
|
2731
|
-
pop_current_node(parser);
|
2837
|
+
insert_element_of_tag_type(
|
2838
|
+
parser, GUMBO_TAG_HR, GUMBO_INSERTION_FROM_ISINDEX);
|
2839
|
+
pop_current_node(parser); // <hr>
|
2732
2840
|
|
2733
|
-
insert_element_of_tag_type(
|
2734
|
-
|
2841
|
+
insert_element_of_tag_type(
|
2842
|
+
parser, GUMBO_TAG_LABEL, GUMBO_INSERTION_FROM_ISINDEX);
|
2735
2843
|
TextNodeBufferState* text_state = &parser->_parser_state->_text_node;
|
2736
2844
|
text_state->_start_original_text = token->original_text.data;
|
2737
2845
|
text_state->_start_position = token->position;
|
@@ -2744,15 +2852,15 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2744
2852
|
text_state->_buffer.capacity = prompt_attr_length + 1;
|
2745
2853
|
gumbo_destroy_attribute(parser, prompt_attr);
|
2746
2854
|
} else {
|
2747
|
-
GumboStringPiece prompt_text =
|
2748
|
-
"This is a searchable index. Enter search keywords: ");
|
2855
|
+
GumboStringPiece prompt_text =
|
2856
|
+
GUMBO_STRING("This is a searchable index. Enter search keywords: ");
|
2749
2857
|
gumbo_string_buffer_append_string(
|
2750
2858
|
parser, &prompt_text, &text_state->_buffer);
|
2751
2859
|
}
|
2752
2860
|
|
2753
2861
|
GumboNode* input = insert_element_of_tag_type(
|
2754
2862
|
parser, GUMBO_TAG_INPUT, GUMBO_INSERTION_FROM_ISINDEX);
|
2755
|
-
for (int i = 0; i < token_attrs->length; ++i) {
|
2863
|
+
for (unsigned int i = 0; i < token_attrs->length; ++i) {
|
2756
2864
|
GumboAttribute* attr = token_attrs->data[i];
|
2757
2865
|
if (attr != prompt_attr && attr != action_attr && attr != name_attr) {
|
2758
2866
|
gumbo_vector_add(parser, attr, &input->v.element.attributes);
|
@@ -2765,6 +2873,13 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2765
2873
|
// touching the attributes.
|
2766
2874
|
ignore_token(parser);
|
2767
2875
|
|
2876
|
+
// The name attribute, if present, should be destroyed since it's ignored
|
2877
|
+
// when copying over. The action attribute should be kept since it's moved
|
2878
|
+
// to the form.
|
2879
|
+
if (name_attr) {
|
2880
|
+
gumbo_destroy_attribute(parser, name_attr);
|
2881
|
+
}
|
2882
|
+
|
2768
2883
|
GumboAttribute* name =
|
2769
2884
|
gumbo_parser_allocate(parser, sizeof(GumboAttribute));
|
2770
2885
|
GumboStringPiece name_str = GUMBO_STRING("name");
|
@@ -2780,12 +2895,15 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2780
2895
|
name->value_end = kGumboEmptySourcePosition;
|
2781
2896
|
gumbo_vector_add(parser, name, &input->v.element.attributes);
|
2782
2897
|
|
2783
|
-
pop_current_node(parser);
|
2784
|
-
pop_current_node(parser);
|
2898
|
+
pop_current_node(parser); // <input>
|
2899
|
+
pop_current_node(parser); // <label>
|
2785
2900
|
insert_element_of_tag_type(
|
2786
2901
|
parser, GUMBO_TAG_HR, GUMBO_INSERTION_FROM_ISINDEX);
|
2787
|
-
pop_current_node(parser);
|
2788
|
-
pop_current_node(parser);
|
2902
|
+
pop_current_node(parser); // <hr>
|
2903
|
+
pop_current_node(parser); // <form>
|
2904
|
+
if (!has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
|
2905
|
+
parser->_parser_state->_form_element = NULL;
|
2906
|
+
}
|
2789
2907
|
return false;
|
2790
2908
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_TEXTAREA)) {
|
2791
2909
|
run_generic_parsing_algorithm(parser, token, GUMBO_LEX_RCDATA);
|
@@ -2820,21 +2938,27 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2820
2938
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_SELECT);
|
2821
2939
|
}
|
2822
2940
|
return true;
|
2823
|
-
} else if (tag_in(token, kStartTag,
|
2824
|
-
|
2825
|
-
if (
|
2941
|
+
} else if (tag_in(token, kStartTag,
|
2942
|
+
(gumbo_tagset){TAG(OPTION), TAG(OPTGROUP)})) {
|
2943
|
+
if (node_html_tag_is(get_current_node(parser), GUMBO_TAG_OPTION)) {
|
2826
2944
|
pop_current_node(parser);
|
2827
2945
|
}
|
2828
2946
|
reconstruct_active_formatting_elements(parser);
|
2829
2947
|
insert_element_from_token(parser, token);
|
2830
2948
|
return true;
|
2831
|
-
} else if (tag_in(token, kStartTag,
|
2832
|
-
|
2949
|
+
} else if (tag_in(token, kStartTag,
|
2950
|
+
(gumbo_tagset){TAG(RB), TAG(RP), TAG(RT), TAG(RTC)})) {
|
2833
2951
|
bool success = true;
|
2952
|
+
GumboTag exception =
|
2953
|
+
tag_in(token, kStartTag, (gumbo_tagset){TAG(RT), TAG(RP)})
|
2954
|
+
? GUMBO_TAG_RTC
|
2955
|
+
: GUMBO_TAG_LAST;
|
2834
2956
|
if (has_an_element_in_scope(parser, GUMBO_TAG_RUBY)) {
|
2835
|
-
generate_implied_end_tags(parser,
|
2957
|
+
generate_implied_end_tags(parser, exception);
|
2836
2958
|
}
|
2837
|
-
if (!
|
2959
|
+
if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_RUBY) &&
|
2960
|
+
!(exception == GUMBO_TAG_LAST ||
|
2961
|
+
node_html_tag_is(get_current_node(parser), GUMBO_TAG_RTC))) {
|
2838
2962
|
parser_add_parse_error(parser, token);
|
2839
2963
|
success = false;
|
2840
2964
|
}
|
@@ -2867,11 +2991,10 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2867
2991
|
acknowledge_self_closing_tag(parser);
|
2868
2992
|
}
|
2869
2993
|
return true;
|
2870
|
-
} else if (tag_in(token, kStartTag,
|
2871
|
-
|
2872
|
-
|
2873
|
-
|
2874
|
-
GUMBO_TAG_LAST)) {
|
2994
|
+
} else if (tag_in(token, kStartTag,
|
2995
|
+
(gumbo_tagset){TAG(CAPTION), TAG(COL), TAG(COLGROUP),
|
2996
|
+
TAG(FRAME), TAG(HEAD), TAG(TBODY), TAG(TD), TAG(TFOOT),
|
2997
|
+
TAG(TH), TAG(THEAD), TAG(TR)})) {
|
2875
2998
|
parser_add_parse_error(parser, token);
|
2876
2999
|
ignore_token(parser);
|
2877
3000
|
return false;
|
@@ -2883,22 +3006,22 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2883
3006
|
assert(token->type == GUMBO_TOKEN_END_TAG);
|
2884
3007
|
GumboTag end_tag = token->v.end_tag;
|
2885
3008
|
assert(state->_open_elements.length > 0);
|
2886
|
-
assert(
|
3009
|
+
assert(node_html_tag_is(state->_open_elements.data[0], GUMBO_TAG_HTML));
|
2887
3010
|
// Walk up the stack of open elements until we find one that either:
|
2888
3011
|
// a) Matches the tag name we saw
|
2889
3012
|
// b) Is in the "special" category.
|
2890
3013
|
// If we see a), implicitly close everything up to and including it. If we
|
2891
3014
|
// see b), then record a parse error, don't close anything (except the
|
2892
3015
|
// implied end tags) and ignore the end tag token.
|
2893
|
-
for (int i = state->_open_elements.length; --i >= 0;
|
3016
|
+
for (int i = state->_open_elements.length; --i >= 0;) {
|
2894
3017
|
const GumboNode* node = state->_open_elements.data[i];
|
2895
|
-
if (node
|
2896
|
-
node_tag_is(node, end_tag)) {
|
3018
|
+
if (node_html_tag_is(node, end_tag)) {
|
2897
3019
|
generate_implied_end_tags(parser, end_tag);
|
2898
3020
|
// TODO(jdtang): Do I need to add a parse error here? The condition in
|
2899
3021
|
// the spec seems like it's the inverse of the loop condition above, and
|
2900
3022
|
// so would never fire.
|
2901
|
-
while (node != pop_current_node(parser))
|
3023
|
+
while (node != pop_current_node(parser))
|
3024
|
+
; // Pop everything.
|
2902
3025
|
return true;
|
2903
3026
|
} else if (is_special_node(node)) {
|
2904
3027
|
parser_add_parse_error(parser, token);
|
@@ -2914,7 +3037,8 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2914
3037
|
|
2915
3038
|
// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#parsing-main-incdata
|
2916
3039
|
static bool handle_text(GumboParser* parser, GumboToken* token) {
|
2917
|
-
if (token->type == GUMBO_TOKEN_CHARACTER ||
|
3040
|
+
if (token->type == GUMBO_TOKEN_CHARACTER ||
|
3041
|
+
token->type == GUMBO_TOKEN_WHITESPACE) {
|
2918
3042
|
insert_text_token(parser, token);
|
2919
3043
|
} else {
|
2920
3044
|
// We provide only bare-bones script handling that doesn't involve any of
|
@@ -2974,13 +3098,12 @@ static bool handle_in_table(GumboParser* parser, GumboToken* token) {
|
|
2974
3098
|
parser->_parser_state->_reprocess_current_token = true;
|
2975
3099
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_COLUMN_GROUP);
|
2976
3100
|
return true;
|
2977
|
-
} else if (tag_in(token, kStartTag,
|
2978
|
-
|
2979
|
-
|
3101
|
+
} else if (tag_in(token, kStartTag,
|
3102
|
+
(gumbo_tagset){TAG(TBODY), TAG(TFOOT), TAG(THEAD), TAG(TD),
|
3103
|
+
TAG(TH), TAG(TR)})) {
|
2980
3104
|
clear_stack_to_table_context(parser);
|
2981
3105
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE_BODY);
|
2982
|
-
if (tag_in(token, kStartTag,
|
2983
|
-
GUMBO_TAG_LAST)) {
|
3106
|
+
if (tag_in(token, kStartTag, (gumbo_tagset){TAG(TD), TAG(TH), TAG(TR)})) {
|
2984
3107
|
insert_element_of_tag_type(
|
2985
3108
|
parser, GUMBO_TAG_TBODY, GUMBO_INSERTION_IMPLIED);
|
2986
3109
|
state->_reprocess_current_token = true;
|
@@ -3002,27 +3125,27 @@ static bool handle_in_table(GumboParser* parser, GumboToken* token) {
|
|
3002
3125
|
return false;
|
3003
3126
|
}
|
3004
3127
|
return true;
|
3005
|
-
} else if (tag_in(token, kEndTag,
|
3006
|
-
|
3007
|
-
|
3008
|
-
|
3009
|
-
GUMBO_TAG_LAST)) {
|
3128
|
+
} else if (tag_in(token, kEndTag,
|
3129
|
+
(gumbo_tagset){TAG(BODY), TAG(CAPTION), TAG(COL),
|
3130
|
+
TAG(COLGROUP), TAG(HTML), TAG(TBODY), TAG(TD), TAG(TFOOT),
|
3131
|
+
TAG(TH), TAG(THEAD), TAG(TR)})) {
|
3010
3132
|
parser_add_parse_error(parser, token);
|
3011
3133
|
ignore_token(parser);
|
3012
3134
|
return false;
|
3013
|
-
} else if (tag_in(token, kStartTag,
|
3014
|
-
|
3135
|
+
} else if (tag_in(token, kStartTag,
|
3136
|
+
(gumbo_tagset){TAG(STYLE), TAG(SCRIPT), TAG(TEMPLATE)}) ||
|
3137
|
+
(tag_is(token, kEndTag, GUMBO_TAG_TEMPLATE))) {
|
3015
3138
|
return handle_in_head(parser, token);
|
3016
3139
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_INPUT) &&
|
3017
|
-
attribute_matches(
|
3018
|
-
|
3140
|
+
attribute_matches(
|
3141
|
+
&token->v.start_tag.attributes, "type", "hidden")) {
|
3019
3142
|
parser_add_parse_error(parser, token);
|
3020
3143
|
insert_element_from_token(parser, token);
|
3021
3144
|
pop_current_node(parser);
|
3022
3145
|
return false;
|
3023
3146
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_FORM)) {
|
3024
3147
|
parser_add_parse_error(parser, token);
|
3025
|
-
if (state->_form_element) {
|
3148
|
+
if (state->_form_element || has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
|
3026
3149
|
ignore_token(parser);
|
3027
3150
|
return false;
|
3028
3151
|
}
|
@@ -3030,11 +3153,7 @@ static bool handle_in_table(GumboParser* parser, GumboToken* token) {
|
|
3030
3153
|
pop_current_node(parser);
|
3031
3154
|
return false;
|
3032
3155
|
} else if (token->type == GUMBO_TOKEN_EOF) {
|
3033
|
-
|
3034
|
-
parser_add_parse_error(parser, token);
|
3035
|
-
return false;
|
3036
|
-
}
|
3037
|
-
return true;
|
3156
|
+
return handle_in_body(parser, token);
|
3038
3157
|
} else {
|
3039
3158
|
parser_add_parse_error(parser, token);
|
3040
3159
|
state->_foster_parent_insertions = true;
|
@@ -3062,8 +3181,9 @@ static bool handle_in_table_text(GumboParser* parser, GumboToken* token) {
|
|
3062
3181
|
// Note that TextNodeBuffer may contain UTF-8 characters, but the presence
|
3063
3182
|
// of any one byte that is not whitespace means we flip the flag, so this
|
3064
3183
|
// loop is still valid.
|
3065
|
-
for (int i = 0; i < buffer->length; ++i) {
|
3066
|
-
if (!isspace(
|
3184
|
+
for (unsigned int i = 0; i < buffer->length; ++i) {
|
3185
|
+
if (!isspace((unsigned char) buffer->data[i]) ||
|
3186
|
+
buffer->data[i] == '\v') {
|
3067
3187
|
state->_foster_parent_insertions = true;
|
3068
3188
|
reconstruct_active_formatting_elements(parser);
|
3069
3189
|
break;
|
@@ -3079,38 +3199,43 @@ static bool handle_in_table_text(GumboParser* parser, GumboToken* token) {
|
|
3079
3199
|
|
3080
3200
|
// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#parsing-main-incaption
|
3081
3201
|
static bool handle_in_caption(GumboParser* parser, GumboToken* token) {
|
3082
|
-
if (
|
3083
|
-
GUMBO_TAG_COLGROUP, GUMBO_TAG_TBODY, GUMBO_TAG_TD,
|
3084
|
-
GUMBO_TAG_TFOOT, GUMBO_TAG_TH, GUMBO_TAG_THEAD,
|
3085
|
-
GUMBO_TAG_TR, GUMBO_TAG_LAST) ||
|
3086
|
-
tag_in(token, kEndTag, GUMBO_TAG_CAPTION, GUMBO_TAG_TABLE,
|
3087
|
-
GUMBO_TAG_LAST)) {
|
3202
|
+
if (tag_is(token, kEndTag, GUMBO_TAG_CAPTION)) {
|
3088
3203
|
if (!has_an_element_in_table_scope(parser, GUMBO_TAG_CAPTION)) {
|
3089
3204
|
parser_add_parse_error(parser, token);
|
3090
3205
|
ignore_token(parser);
|
3091
3206
|
return false;
|
3207
|
+
} else {
|
3208
|
+
generate_implied_end_tags(parser, GUMBO_TAG_LAST);
|
3209
|
+
bool result = true;
|
3210
|
+
if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_CAPTION)) {
|
3211
|
+
parser_add_parse_error(parser, token);
|
3212
|
+
}
|
3213
|
+
while (!node_html_tag_is(pop_current_node(parser), GUMBO_TAG_CAPTION))
|
3214
|
+
;
|
3215
|
+
clear_active_formatting_elements(parser);
|
3216
|
+
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
|
3217
|
+
return result;
|
3092
3218
|
}
|
3093
|
-
|
3094
|
-
|
3095
|
-
|
3096
|
-
|
3097
|
-
|
3098
|
-
|
3099
|
-
if (!node_tag_is(get_current_node(parser), GUMBO_TAG_CAPTION)) {
|
3219
|
+
} else if (tag_in(token, kStartTag,
|
3220
|
+
(gumbo_tagset){TAG(CAPTION), TAG(COL), TAG(COLGROUP),
|
3221
|
+
TAG(TBODY), TAG(TD), TAG(TFOOT), TAG(TH), TAG(THEAD),
|
3222
|
+
TAG(TR)}) ||
|
3223
|
+
(tag_is(token, kEndTag, GUMBO_TAG_TABLE))) {
|
3224
|
+
if (!has_an_element_in_table_scope(parser, GUMBO_TAG_CAPTION)) {
|
3100
3225
|
parser_add_parse_error(parser, token);
|
3101
|
-
|
3102
|
-
|
3103
|
-
}
|
3104
|
-
result = false;
|
3226
|
+
ignore_token(parser);
|
3227
|
+
return false;
|
3105
3228
|
}
|
3106
|
-
pop_current_node(parser)
|
3229
|
+
while (!node_html_tag_is(pop_current_node(parser), GUMBO_TAG_CAPTION))
|
3230
|
+
;
|
3107
3231
|
clear_active_formatting_elements(parser);
|
3108
3232
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
|
3109
|
-
|
3110
|
-
|
3111
|
-
|
3112
|
-
|
3113
|
-
|
3233
|
+
parser->_parser_state->_reprocess_current_token = true;
|
3234
|
+
return true;
|
3235
|
+
} else if (tag_in(token, kEndTag,
|
3236
|
+
(gumbo_tagset){TAG(BODY), TAG(COL), TAG(COLGROUP), TAG(HTML),
|
3237
|
+
TAG(TBODY), TAG(TD), TAG(TFOOT), TAG(TH), TAG(THEAD),
|
3238
|
+
TAG(TR)})) {
|
3114
3239
|
parser_add_parse_error(parser, token);
|
3115
3240
|
ignore_token(parser);
|
3116
3241
|
return false;
|
@@ -3138,24 +3263,33 @@ static bool handle_in_column_group(GumboParser* parser, GumboToken* token) {
|
|
3138
3263
|
pop_current_node(parser);
|
3139
3264
|
acknowledge_self_closing_tag(parser);
|
3140
3265
|
return true;
|
3266
|
+
} else if (tag_is(token, kEndTag, GUMBO_TAG_COLGROUP)) {
|
3267
|
+
if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_COLGROUP)) {
|
3268
|
+
parser_add_parse_error(parser, token);
|
3269
|
+
ignore_token(parser);
|
3270
|
+
return false;
|
3271
|
+
}
|
3272
|
+
pop_current_node(parser);
|
3273
|
+
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
|
3274
|
+
return false;
|
3141
3275
|
} else if (tag_is(token, kEndTag, GUMBO_TAG_COL)) {
|
3142
3276
|
parser_add_parse_error(parser, token);
|
3143
3277
|
ignore_token(parser);
|
3144
3278
|
return false;
|
3145
|
-
} else if (token
|
3146
|
-
|
3147
|
-
return
|
3279
|
+
} else if (tag_is(token, kStartTag, GUMBO_TAG_TEMPLATE) ||
|
3280
|
+
tag_is(token, kEndTag, GUMBO_TAG_TEMPLATE)) {
|
3281
|
+
return handle_in_head(parser, token);
|
3282
|
+
} else if (token->type == GUMBO_TOKEN_EOF) {
|
3283
|
+
return handle_in_body(parser, token);
|
3148
3284
|
} else {
|
3149
|
-
if (get_current_node(parser)
|
3285
|
+
if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_COLGROUP)) {
|
3150
3286
|
parser_add_parse_error(parser, token);
|
3287
|
+
ignore_token(parser);
|
3151
3288
|
return false;
|
3152
3289
|
}
|
3153
|
-
assert(node_tag_is(get_current_node(parser), GUMBO_TAG_COLGROUP));
|
3154
3290
|
pop_current_node(parser);
|
3155
3291
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
|
3156
|
-
|
3157
|
-
parser->_parser_state->_reprocess_current_token = true;
|
3158
|
-
}
|
3292
|
+
parser->_parser_state->_reprocess_current_token = true;
|
3159
3293
|
return true;
|
3160
3294
|
}
|
3161
3295
|
}
|
@@ -3167,16 +3301,15 @@ static bool handle_in_table_body(GumboParser* parser, GumboToken* token) {
|
|
3167
3301
|
insert_element_from_token(parser, token);
|
3168
3302
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_ROW);
|
3169
3303
|
return true;
|
3170
|
-
} else if (tag_in(token, kStartTag,
|
3171
|
-
GUMBO_TAG_LAST)) {
|
3304
|
+
} else if (tag_in(token, kStartTag, (gumbo_tagset){TAG(TD), TAG(TH)})) {
|
3172
3305
|
parser_add_parse_error(parser, token);
|
3173
3306
|
clear_stack_to_table_body_context(parser);
|
3174
3307
|
insert_element_of_tag_type(parser, GUMBO_TAG_TR, GUMBO_INSERTION_IMPLIED);
|
3175
3308
|
parser->_parser_state->_reprocess_current_token = true;
|
3176
3309
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_ROW);
|
3177
3310
|
return false;
|
3178
|
-
} else if (tag_in(token, kEndTag,
|
3179
|
-
|
3311
|
+
} else if (tag_in(token, kEndTag,
|
3312
|
+
(gumbo_tagset){TAG(TBODY), TAG(TFOOT), TAG(THEAD)})) {
|
3180
3313
|
if (!has_an_element_in_table_scope(parser, token->v.end_tag)) {
|
3181
3314
|
parser_add_parse_error(parser, token);
|
3182
3315
|
ignore_token(parser);
|
@@ -3186,13 +3319,13 @@ static bool handle_in_table_body(GumboParser* parser, GumboToken* token) {
|
|
3186
3319
|
pop_current_node(parser);
|
3187
3320
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
|
3188
3321
|
return true;
|
3189
|
-
} else if (tag_in(token, kStartTag,
|
3190
|
-
|
3191
|
-
|
3322
|
+
} else if (tag_in(token, kStartTag,
|
3323
|
+
(gumbo_tagset){TAG(CAPTION), TAG(COL), TAG(COLGROUP),
|
3324
|
+
TAG(TBODY), TAG(TFOOT), TAG(THEAD)}) ||
|
3192
3325
|
tag_is(token, kEndTag, GUMBO_TAG_TABLE)) {
|
3193
3326
|
if (!(has_an_element_in_table_scope(parser, GUMBO_TAG_TBODY) ||
|
3194
|
-
|
3195
|
-
|
3327
|
+
has_an_element_in_table_scope(parser, GUMBO_TAG_THEAD) ||
|
3328
|
+
has_an_element_in_table_scope(parser, GUMBO_TAG_TFOOT))) {
|
3196
3329
|
parser_add_parse_error(parser, token);
|
3197
3330
|
ignore_token(parser);
|
3198
3331
|
return false;
|
@@ -3202,10 +3335,9 @@ static bool handle_in_table_body(GumboParser* parser, GumboToken* token) {
|
|
3202
3335
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
|
3203
3336
|
parser->_parser_state->_reprocess_current_token = true;
|
3204
3337
|
return true;
|
3205
|
-
} else if (tag_in(token, kEndTag,
|
3206
|
-
|
3207
|
-
|
3208
|
-
{
|
3338
|
+
} else if (tag_in(token, kEndTag,
|
3339
|
+
(gumbo_tagset){TAG(BODY), TAG(CAPTION), TAG(COL), TAG(TR),
|
3340
|
+
TAG(COLGROUP), TAG(HTML), TAG(TD), TAG(TH)})) {
|
3209
3341
|
parser_add_parse_error(parser, token);
|
3210
3342
|
ignore_token(parser);
|
3211
3343
|
return false;
|
@@ -3216,48 +3348,55 @@ static bool handle_in_table_body(GumboParser* parser, GumboToken* token) {
|
|
3216
3348
|
|
3217
3349
|
// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#parsing-main-intr
|
3218
3350
|
static bool handle_in_row(GumboParser* parser, GumboToken* token) {
|
3219
|
-
if (tag_in(token, kStartTag,
|
3351
|
+
if (tag_in(token, kStartTag, (gumbo_tagset){TAG(TH), TAG(TD)})) {
|
3220
3352
|
clear_stack_to_table_row_context(parser);
|
3221
3353
|
insert_element_from_token(parser, token);
|
3222
3354
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_CELL);
|
3223
3355
|
add_formatting_element(parser, &kActiveFormattingScopeMarker);
|
3224
3356
|
return true;
|
3225
|
-
} else if (
|
3226
|
-
|
3227
|
-
|
3228
|
-
|
3229
|
-
|
3230
|
-
|
3231
|
-
|
3232
|
-
|
3233
|
-
|
3234
|
-
|
3235
|
-
|
3236
|
-
|
3237
|
-
|
3238
|
-
|
3239
|
-
|
3240
|
-
|
3241
|
-
for (int i = 0; i < parser->_parser_state->_open_elements.length; ++i) {
|
3242
|
-
const GumboNode* node = parser->_parser_state->_open_elements.data[i];
|
3243
|
-
gumbo_debug("%s\n", gumbo_normalized_tagname(node->v.element.tag));
|
3244
|
-
}
|
3357
|
+
} else if (tag_is(token, kEndTag, GUMBO_TAG_TR)) {
|
3358
|
+
if (!has_an_element_in_table_scope(parser, GUMBO_TAG_TR)) {
|
3359
|
+
parser_add_parse_error(parser, token);
|
3360
|
+
ignore_token(parser);
|
3361
|
+
return false;
|
3362
|
+
} else {
|
3363
|
+
clear_stack_to_table_row_context(parser);
|
3364
|
+
pop_current_node(parser);
|
3365
|
+
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE_BODY);
|
3366
|
+
return true;
|
3367
|
+
}
|
3368
|
+
} else if (tag_in(token, kStartTag,
|
3369
|
+
(gumbo_tagset){TAG(CAPTION), TAG(COL), TAG(COLGROUP),
|
3370
|
+
TAG(TBODY), TAG(TFOOT), TAG(THEAD), TAG(TR)}) ||
|
3371
|
+
tag_is(token, kEndTag, GUMBO_TAG_TABLE)) {
|
3372
|
+
if (!has_an_element_in_table_scope(parser, GUMBO_TAG_TR)) {
|
3245
3373
|
parser_add_parse_error(parser, token);
|
3246
3374
|
ignore_token(parser);
|
3247
3375
|
return false;
|
3376
|
+
} else {
|
3377
|
+
clear_stack_to_table_row_context(parser);
|
3378
|
+
pop_current_node(parser);
|
3379
|
+
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE_BODY);
|
3380
|
+
parser->_parser_state->_reprocess_current_token = true;
|
3381
|
+
return true;
|
3248
3382
|
}
|
3249
|
-
|
3250
|
-
|
3251
|
-
|
3252
|
-
|
3253
|
-
|
3254
|
-
|
3383
|
+
} else if (tag_in(token, kEndTag,
|
3384
|
+
(gumbo_tagset){TAG(TBODY), TAG(TFOOT), TAG(THEAD)})) {
|
3385
|
+
if (!has_an_element_in_table_scope(parser, token->v.end_tag) ||
|
3386
|
+
(!has_an_element_in_table_scope(parser, GUMBO_TAG_TR))) {
|
3387
|
+
parser_add_parse_error(parser, token);
|
3388
|
+
ignore_token(parser);
|
3389
|
+
return false;
|
3390
|
+
} else {
|
3391
|
+
clear_stack_to_table_row_context(parser);
|
3392
|
+
pop_current_node(parser);
|
3393
|
+
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE_BODY);
|
3255
3394
|
parser->_parser_state->_reprocess_current_token = true;
|
3395
|
+
return true;
|
3256
3396
|
}
|
3257
|
-
|
3258
|
-
|
3259
|
-
|
3260
|
-
GUMBO_TAG_TD, GUMBO_TAG_TH, GUMBO_TAG_LAST)) {
|
3397
|
+
} else if (tag_in(token, kEndTag,
|
3398
|
+
(gumbo_tagset){TAG(BODY), TAG(CAPTION), TAG(COL),
|
3399
|
+
TAG(COLGROUP), TAG(HTML), TAG(TD), TAG(TH)})) {
|
3261
3400
|
parser_add_parse_error(parser, token);
|
3262
3401
|
ignore_token(parser);
|
3263
3402
|
return false;
|
@@ -3268,17 +3407,18 @@ static bool handle_in_row(GumboParser* parser, GumboToken* token) {
|
|
3268
3407
|
|
3269
3408
|
// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#parsing-main-intd
|
3270
3409
|
static bool handle_in_cell(GumboParser* parser, GumboToken* token) {
|
3271
|
-
if (tag_in(token, kEndTag,
|
3410
|
+
if (tag_in(token, kEndTag, (gumbo_tagset){TAG(TD), TAG(TH)})) {
|
3272
3411
|
GumboTag token_tag = token->v.end_tag;
|
3273
3412
|
if (!has_an_element_in_table_scope(parser, token_tag)) {
|
3274
3413
|
parser_add_parse_error(parser, token);
|
3414
|
+
ignore_token(parser);
|
3275
3415
|
return false;
|
3276
3416
|
}
|
3277
3417
|
return close_table_cell(parser, token, token_tag);
|
3278
|
-
} else if (tag_in(token, kStartTag,
|
3279
|
-
|
3280
|
-
|
3281
|
-
|
3418
|
+
} else if (tag_in(token, kStartTag,
|
3419
|
+
(gumbo_tagset){TAG(CAPTION), TAG(COL), TAG(COLGROUP),
|
3420
|
+
TAG(TBODY), TAG(TD), TAG(TFOOT), TAG(TH), TAG(THEAD),
|
3421
|
+
TAG(TR)})) {
|
3282
3422
|
gumbo_debug("Handling <td> in cell.\n");
|
3283
3423
|
if (!has_an_element_in_table_scope(parser, GUMBO_TAG_TH) &&
|
3284
3424
|
!has_an_element_in_table_scope(parser, GUMBO_TAG_TD)) {
|
@@ -3289,15 +3429,13 @@ static bool handle_in_cell(GumboParser* parser, GumboToken* token) {
|
|
3289
3429
|
}
|
3290
3430
|
parser->_parser_state->_reprocess_current_token = true;
|
3291
3431
|
return close_current_cell(parser, token);
|
3292
|
-
} else if (tag_in(token, kEndTag,
|
3293
|
-
|
3294
|
-
GUMBO_TAG_LAST)) {
|
3432
|
+
} else if (tag_in(token, kEndTag, (gumbo_tagset){TAG(BODY), TAG(CAPTION),
|
3433
|
+
TAG(COL), TAG(COLGROUP), TAG(HTML)})) {
|
3295
3434
|
parser_add_parse_error(parser, token);
|
3296
3435
|
ignore_token(parser);
|
3297
3436
|
return false;
|
3298
|
-
} else if (tag_in(token, kEndTag,
|
3299
|
-
|
3300
|
-
GUMBO_TAG_LAST)) {
|
3437
|
+
} else if (tag_in(token, kEndTag, (gumbo_tagset){TAG(TABLE), TAG(TBODY),
|
3438
|
+
TAG(TFOOT), TAG(THEAD), TAG(TR)})) {
|
3301
3439
|
if (!has_an_element_in_table_scope(parser, token->v.end_tag)) {
|
3302
3440
|
parser_add_parse_error(parser, token);
|
3303
3441
|
ignore_token(parser);
|
@@ -3330,28 +3468,28 @@ static bool handle_in_select(GumboParser* parser, GumboToken* token) {
|
|
3330
3468
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_HTML)) {
|
3331
3469
|
return handle_in_body(parser, token);
|
3332
3470
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_OPTION)) {
|
3333
|
-
if (
|
3471
|
+
if (node_html_tag_is(get_current_node(parser), GUMBO_TAG_OPTION)) {
|
3334
3472
|
pop_current_node(parser);
|
3335
3473
|
}
|
3336
3474
|
insert_element_from_token(parser, token);
|
3337
3475
|
return true;
|
3338
3476
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_OPTGROUP)) {
|
3339
|
-
if (
|
3477
|
+
if (node_html_tag_is(get_current_node(parser), GUMBO_TAG_OPTION)) {
|
3340
3478
|
pop_current_node(parser);
|
3341
3479
|
}
|
3342
|
-
if (
|
3480
|
+
if (node_html_tag_is(get_current_node(parser), GUMBO_TAG_OPTGROUP)) {
|
3343
3481
|
pop_current_node(parser);
|
3344
3482
|
}
|
3345
3483
|
insert_element_from_token(parser, token);
|
3346
3484
|
return true;
|
3347
3485
|
} else if (tag_is(token, kEndTag, GUMBO_TAG_OPTGROUP)) {
|
3348
3486
|
GumboVector* open_elements = &parser->_parser_state->_open_elements;
|
3349
|
-
if (
|
3350
|
-
|
3351
|
-
|
3487
|
+
if (node_html_tag_is(get_current_node(parser), GUMBO_TAG_OPTION) &&
|
3488
|
+
node_html_tag_is(open_elements->data[open_elements->length - 2],
|
3489
|
+
GUMBO_TAG_OPTGROUP)) {
|
3352
3490
|
pop_current_node(parser);
|
3353
3491
|
}
|
3354
|
-
if (
|
3492
|
+
if (node_html_tag_is(get_current_node(parser), GUMBO_TAG_OPTGROUP)) {
|
3355
3493
|
pop_current_node(parser);
|
3356
3494
|
return true;
|
3357
3495
|
} else {
|
@@ -3360,7 +3498,7 @@ static bool handle_in_select(GumboParser* parser, GumboToken* token) {
|
|
3360
3498
|
return false;
|
3361
3499
|
}
|
3362
3500
|
} else if (tag_is(token, kEndTag, GUMBO_TAG_OPTION)) {
|
3363
|
-
if (
|
3501
|
+
if (node_html_tag_is(get_current_node(parser), GUMBO_TAG_OPTION)) {
|
3364
3502
|
pop_current_node(parser);
|
3365
3503
|
return true;
|
3366
3504
|
} else {
|
@@ -3379,10 +3517,12 @@ static bool handle_in_select(GumboParser* parser, GumboToken* token) {
|
|
3379
3517
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_SELECT)) {
|
3380
3518
|
parser_add_parse_error(parser, token);
|
3381
3519
|
ignore_token(parser);
|
3382
|
-
|
3520
|
+
if (has_an_element_in_select_scope(parser, GUMBO_TAG_SELECT)) {
|
3521
|
+
close_current_select(parser);
|
3522
|
+
}
|
3383
3523
|
return false;
|
3384
|
-
} else if (tag_in(token, kStartTag,
|
3385
|
-
|
3524
|
+
} else if (tag_in(token, kStartTag,
|
3525
|
+
(gumbo_tagset){TAG(INPUT), TAG(KEYGEN), TAG(TEXTAREA)})) {
|
3386
3526
|
parser_add_parse_error(parser, token);
|
3387
3527
|
if (!has_an_element_in_select_scope(parser, GUMBO_TAG_SELECT)) {
|
3388
3528
|
ignore_token(parser);
|
@@ -3391,14 +3531,12 @@ static bool handle_in_select(GumboParser* parser, GumboToken* token) {
|
|
3391
3531
|
parser->_parser_state->_reprocess_current_token = true;
|
3392
3532
|
}
|
3393
3533
|
return false;
|
3394
|
-
} else if (
|
3534
|
+
} else if (tag_in(token, kStartTag,
|
3535
|
+
(gumbo_tagset){TAG(SCRIPT), TAG(TEMPLATE)}) ||
|
3536
|
+
tag_is(token, kEndTag, GUMBO_TAG_TEMPLATE)) {
|
3395
3537
|
return handle_in_head(parser, token);
|
3396
3538
|
} else if (token->type == GUMBO_TOKEN_EOF) {
|
3397
|
-
|
3398
|
-
parser_add_parse_error(parser, token);
|
3399
|
-
return false;
|
3400
|
-
}
|
3401
|
-
return true;
|
3539
|
+
return handle_in_body(parser, token);
|
3402
3540
|
} else {
|
3403
3541
|
parser_add_parse_error(parser, token);
|
3404
3542
|
ignore_token(parser);
|
@@ -3408,25 +3546,28 @@ static bool handle_in_select(GumboParser* parser, GumboToken* token) {
|
|
3408
3546
|
|
3409
3547
|
// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#parsing-main-inselectintable
|
3410
3548
|
static bool handle_in_select_in_table(GumboParser* parser, GumboToken* token) {
|
3411
|
-
if (tag_in(token, kStartTag,
|
3412
|
-
|
3413
|
-
|
3549
|
+
if (tag_in(token, kStartTag,
|
3550
|
+
(gumbo_tagset){TAG(CAPTION), TAG(TABLE), TAG(TBODY), TAG(TFOOT),
|
3551
|
+
TAG(THEAD), TAG(TR), TAG(TD), TAG(TH)})) {
|
3414
3552
|
parser_add_parse_error(parser, token);
|
3415
3553
|
close_current_select(parser);
|
3416
3554
|
parser->_parser_state->_reprocess_current_token = true;
|
3417
3555
|
return false;
|
3418
|
-
} else if (tag_in(token, kEndTag,
|
3419
|
-
|
3420
|
-
|
3556
|
+
} else if (tag_in(token, kEndTag,
|
3557
|
+
(gumbo_tagset){TAG(CAPTION), TAG(TABLE), TAG(TBODY),
|
3558
|
+
TAG(TFOOT), TAG(THEAD), TAG(TR), TAG(TD), TAG(TH)})) {
|
3421
3559
|
parser_add_parse_error(parser, token);
|
3422
|
-
if (has_an_element_in_table_scope(parser, token->v.end_tag)) {
|
3560
|
+
if (!has_an_element_in_table_scope(parser, token->v.end_tag)) {
|
3561
|
+
ignore_token(parser);
|
3562
|
+
return false;
|
3563
|
+
} else {
|
3423
3564
|
close_current_select(parser);
|
3424
|
-
|
3565
|
+
// close_current_select already does the
|
3566
|
+
// reset_insertion_mode_appropriately
|
3567
|
+
// reset_insertion_mode_appropriately(parser);
|
3425
3568
|
parser->_parser_state->_reprocess_current_token = true;
|
3426
|
-
|
3427
|
-
ignore_token(parser);
|
3569
|
+
return false;
|
3428
3570
|
}
|
3429
|
-
return false;
|
3430
3571
|
} else {
|
3431
3572
|
return handle_in_select(parser, token);
|
3432
3573
|
}
|
@@ -3434,8 +3575,71 @@ static bool handle_in_select_in_table(GumboParser* parser, GumboToken* token) {
|
|
3434
3575
|
|
3435
3576
|
// http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#parsing-main-intemplate
|
3436
3577
|
static bool handle_in_template(GumboParser* parser, GumboToken* token) {
|
3437
|
-
|
3438
|
-
|
3578
|
+
GumboParserState* state = parser->_parser_state;
|
3579
|
+
if (token->type == GUMBO_TOKEN_WHITESPACE ||
|
3580
|
+
token->type == GUMBO_TOKEN_CHARACTER ||
|
3581
|
+
token->type == GUMBO_TOKEN_COMMENT || token->type == GUMBO_TOKEN_NULL ||
|
3582
|
+
token->type == GUMBO_TOKEN_DOCTYPE) {
|
3583
|
+
return handle_in_body(parser, token);
|
3584
|
+
} else if (tag_in(token, kStartTag,
|
3585
|
+
(gumbo_tagset){TAG(BASE), TAG(BASEFONT), TAG(BGSOUND),
|
3586
|
+
TAG(LINK), TAG(META), TAG(NOFRAMES), TAG(SCRIPT),
|
3587
|
+
TAG(STYLE), TAG(TEMPLATE), TAG(TITLE)}) ||
|
3588
|
+
tag_is(token, kEndTag, GUMBO_TAG_TEMPLATE)) {
|
3589
|
+
return handle_in_head(parser, token);
|
3590
|
+
} else if (tag_in(
|
3591
|
+
token, kStartTag, (gumbo_tagset){TAG(CAPTION), TAG(COLGROUP),
|
3592
|
+
TAG(TBODY), TAG(TFOOT), TAG(THEAD)})) {
|
3593
|
+
pop_template_insertion_mode(parser);
|
3594
|
+
push_template_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
|
3595
|
+
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
|
3596
|
+
state->_reprocess_current_token = true;
|
3597
|
+
return true;
|
3598
|
+
} else if (tag_is(token, kStartTag, GUMBO_TAG_COL)) {
|
3599
|
+
pop_template_insertion_mode(parser);
|
3600
|
+
push_template_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_COLUMN_GROUP);
|
3601
|
+
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_COLUMN_GROUP);
|
3602
|
+
state->_reprocess_current_token = true;
|
3603
|
+
return true;
|
3604
|
+
} else if (tag_is(token, kStartTag, GUMBO_TAG_TR)) {
|
3605
|
+
pop_template_insertion_mode(parser);
|
3606
|
+
push_template_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE_BODY);
|
3607
|
+
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE_BODY);
|
3608
|
+
state->_reprocess_current_token = true;
|
3609
|
+
return true;
|
3610
|
+
} else if (tag_in(token, kStartTag, (gumbo_tagset){TAG(TD), TAG(TH)})) {
|
3611
|
+
pop_template_insertion_mode(parser);
|
3612
|
+
push_template_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_ROW);
|
3613
|
+
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_ROW);
|
3614
|
+
state->_reprocess_current_token = true;
|
3615
|
+
return true;
|
3616
|
+
} else if (token->type == GUMBO_TOKEN_START_TAG) {
|
3617
|
+
pop_template_insertion_mode(parser);
|
3618
|
+
push_template_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_BODY);
|
3619
|
+
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_BODY);
|
3620
|
+
state->_reprocess_current_token = true;
|
3621
|
+
return true;
|
3622
|
+
} else if (token->type == GUMBO_TOKEN_END_TAG) {
|
3623
|
+
parser_add_parse_error(parser, token);
|
3624
|
+
ignore_token(parser);
|
3625
|
+
return false;
|
3626
|
+
} else if (token->type == GUMBO_TOKEN_EOF) {
|
3627
|
+
if (!has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
|
3628
|
+
// Stop parsing.
|
3629
|
+
return true;
|
3630
|
+
}
|
3631
|
+
parser_add_parse_error(parser, token);
|
3632
|
+
while (!node_html_tag_is(pop_current_node(parser), GUMBO_TAG_TEMPLATE))
|
3633
|
+
;
|
3634
|
+
clear_active_formatting_elements(parser);
|
3635
|
+
pop_template_insertion_mode(parser);
|
3636
|
+
reset_insertion_mode_appropriately(parser);
|
3637
|
+
state->_reprocess_current_token = true;
|
3638
|
+
return false;
|
3639
|
+
} else {
|
3640
|
+
assert(0);
|
3641
|
+
return false;
|
3642
|
+
}
|
3439
3643
|
}
|
3440
3644
|
|
3441
3645
|
// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#parsing-main-afterbody
|
@@ -3453,10 +3657,15 @@ static bool handle_after_body(GumboParser* parser, GumboToken* token) {
|
|
3453
3657
|
ignore_token(parser);
|
3454
3658
|
return false;
|
3455
3659
|
} else if (tag_is(token, kEndTag, GUMBO_TAG_HTML)) {
|
3456
|
-
|
3660
|
+
/* fragment case: ignore the closing HTML token */
|
3661
|
+
if (is_fragment_parser(parser)) {
|
3662
|
+
parser_add_parse_error(parser, token);
|
3663
|
+
ignore_token(parser);
|
3664
|
+
return false;
|
3665
|
+
}
|
3457
3666
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_AFTER_AFTER_BODY);
|
3458
3667
|
GumboNode* html = parser->_parser_state->_open_elements.data[0];
|
3459
|
-
assert(
|
3668
|
+
assert(node_html_tag_is(html, GUMBO_TAG_HTML));
|
3460
3669
|
record_end_of_element(
|
3461
3670
|
parser->_parser_state->_current_token, &html->v.element);
|
3462
3671
|
return true;
|
@@ -3488,15 +3697,14 @@ static bool handle_in_frameset(GumboParser* parser, GumboToken* token) {
|
|
3488
3697
|
insert_element_from_token(parser, token);
|
3489
3698
|
return true;
|
3490
3699
|
} else if (tag_is(token, kEndTag, GUMBO_TAG_FRAMESET)) {
|
3491
|
-
if (
|
3700
|
+
if (node_html_tag_is(get_current_node(parser), GUMBO_TAG_HTML)) {
|
3492
3701
|
parser_add_parse_error(parser, token);
|
3493
3702
|
ignore_token(parser);
|
3494
3703
|
return false;
|
3495
3704
|
}
|
3496
3705
|
pop_current_node(parser);
|
3497
|
-
|
3498
|
-
|
3499
|
-
if (!node_tag_is(get_current_node(parser), GUMBO_TAG_FRAMESET)) {
|
3706
|
+
if (!is_fragment_parser(parser) &&
|
3707
|
+
!node_html_tag_is(get_current_node(parser), GUMBO_TAG_FRAMESET)) {
|
3500
3708
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_AFTER_FRAMESET);
|
3501
3709
|
}
|
3502
3710
|
return true;
|
@@ -3508,7 +3716,7 @@ static bool handle_in_frameset(GumboParser* parser, GumboToken* token) {
|
|
3508
3716
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_NOFRAMES)) {
|
3509
3717
|
return handle_in_head(parser, token);
|
3510
3718
|
} else if (token->type == GUMBO_TOKEN_EOF) {
|
3511
|
-
if (!
|
3719
|
+
if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_HTML)) {
|
3512
3720
|
parser_add_parse_error(parser, token);
|
3513
3721
|
return false;
|
3514
3722
|
}
|
@@ -3536,7 +3744,7 @@ static bool handle_after_frameset(GumboParser* parser, GumboToken* token) {
|
|
3536
3744
|
return handle_in_body(parser, token);
|
3537
3745
|
} else if (tag_is(token, kEndTag, GUMBO_TAG_HTML)) {
|
3538
3746
|
GumboNode* html = parser->_parser_state->_open_elements.data[0];
|
3539
|
-
assert(
|
3747
|
+
assert(node_html_tag_is(html, GUMBO_TAG_HTML));
|
3540
3748
|
record_end_of_element(
|
3541
3749
|
parser->_parser_state->_current_token, &html->v.element);
|
3542
3750
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_AFTER_AFTER_FRAMESET);
|
@@ -3595,31 +3803,14 @@ static bool handle_after_after_frameset(
|
|
3595
3803
|
// Function pointers for each insertion mode. Keep in sync with
|
3596
3804
|
// insertion_mode.h.
|
3597
3805
|
typedef bool (*TokenHandler)(GumboParser* parser, GumboToken* token);
|
3598
|
-
static const TokenHandler kTokenHandlers[] = {
|
3599
|
-
|
3600
|
-
|
3601
|
-
|
3602
|
-
|
3603
|
-
|
3604
|
-
|
3605
|
-
|
3606
|
-
handle_text,
|
3607
|
-
handle_in_table,
|
3608
|
-
handle_in_table_text,
|
3609
|
-
handle_in_caption,
|
3610
|
-
handle_in_column_group,
|
3611
|
-
handle_in_table_body,
|
3612
|
-
handle_in_row,
|
3613
|
-
handle_in_cell,
|
3614
|
-
handle_in_select,
|
3615
|
-
handle_in_select_in_table,
|
3616
|
-
handle_in_template,
|
3617
|
-
handle_after_body,
|
3618
|
-
handle_in_frameset,
|
3619
|
-
handle_after_frameset,
|
3620
|
-
handle_after_after_body,
|
3621
|
-
handle_after_after_frameset
|
3622
|
-
};
|
3806
|
+
static const TokenHandler kTokenHandlers[] = {handle_initial,
|
3807
|
+
handle_before_html, handle_before_head, handle_in_head,
|
3808
|
+
handle_in_head_noscript, handle_after_head, handle_in_body, handle_text,
|
3809
|
+
handle_in_table, handle_in_table_text, handle_in_caption,
|
3810
|
+
handle_in_column_group, handle_in_table_body, handle_in_row, handle_in_cell,
|
3811
|
+
handle_in_select, handle_in_select_in_table, handle_in_template,
|
3812
|
+
handle_after_body, handle_in_frameset, handle_after_frameset,
|
3813
|
+
handle_after_after_body, handle_after_after_frameset};
|
3623
3814
|
|
3624
3815
|
static bool handle_html_content(GumboParser* parser, GumboToken* token) {
|
3625
3816
|
return kTokenHandlers[(unsigned int) parser->_parser_state->_insertion_mode](
|
@@ -3628,16 +3819,17 @@ static bool handle_html_content(GumboParser* parser, GumboToken* token) {
|
|
3628
3819
|
|
3629
3820
|
// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#parsing-main-inforeign
|
3630
3821
|
static bool handle_in_foreign_content(GumboParser* parser, GumboToken* token) {
|
3822
|
+
gumbo_debug("Handling foreign content");
|
3631
3823
|
switch (token->type) {
|
3632
3824
|
case GUMBO_TOKEN_NULL:
|
3633
3825
|
parser_add_parse_error(parser, token);
|
3634
|
-
token->type = GUMBO_TOKEN_CHARACTER;
|
3635
3826
|
token->v.character = kUtf8ReplacementChar;
|
3636
3827
|
insert_text_token(parser, token);
|
3637
3828
|
return false;
|
3638
3829
|
case GUMBO_TOKEN_WHITESPACE:
|
3639
3830
|
insert_text_token(parser, token);
|
3640
3831
|
return true;
|
3832
|
+
case GUMBO_TOKEN_CDATA:
|
3641
3833
|
case GUMBO_TOKEN_CHARACTER:
|
3642
3834
|
insert_text_token(parser, token);
|
3643
3835
|
set_frameset_not_ok(parser);
|
@@ -3654,35 +3846,44 @@ static bool handle_in_foreign_content(GumboParser* parser, GumboToken* token) {
|
|
3654
3846
|
break;
|
3655
3847
|
}
|
3656
3848
|
// Order matters for these clauses.
|
3657
|
-
if (tag_in(token, kStartTag,
|
3658
|
-
|
3659
|
-
|
3660
|
-
|
3661
|
-
|
3662
|
-
|
3663
|
-
|
3664
|
-
|
3665
|
-
|
3666
|
-
|
3667
|
-
|
3668
|
-
|
3669
|
-
|
3670
|
-
|
3671
|
-
token_has_attribute(token, "color") ||
|
3672
|
-
token_has_attribute(token, "face") ||
|
3673
|
-
token_has_attribute(token, "size")))) {
|
3849
|
+
if (tag_in(token, kStartTag,
|
3850
|
+
(gumbo_tagset){TAG(B), TAG(BIG), TAG(BLOCKQUOTE), TAG(BODY), TAG(BR),
|
3851
|
+
TAG(CENTER), TAG(CODE), TAG(DD), TAG(DIV), TAG(DL), TAG(DT),
|
3852
|
+
TAG(EM), TAG(EMBED), TAG(H1), TAG(H2), TAG(H3), TAG(H4), TAG(H5),
|
3853
|
+
TAG(H6), TAG(HEAD), TAG(HR), TAG(I), TAG(IMG), TAG(LI),
|
3854
|
+
TAG(LISTING), TAG(MENU), TAG(META), TAG(NOBR), TAG(OL), TAG(P),
|
3855
|
+
TAG(PRE), TAG(RUBY), TAG(S), TAG(SMALL), TAG(SPAN), TAG(STRONG),
|
3856
|
+
TAG(STRIKE), TAG(SUB), TAG(SUP), TAG(TABLE), TAG(TT), TAG(U),
|
3857
|
+
TAG(UL), TAG(VAR)}) ||
|
3858
|
+
(tag_is(token, kStartTag, GUMBO_TAG_FONT) &&
|
3859
|
+
(token_has_attribute(token, "color") ||
|
3860
|
+
token_has_attribute(token, "face") ||
|
3861
|
+
token_has_attribute(token, "size")))) {
|
3862
|
+
/* Parse error */
|
3674
3863
|
parser_add_parse_error(parser, token);
|
3675
|
-
|
3676
|
-
|
3677
|
-
|
3678
|
-
|
3679
|
-
|
3680
|
-
|
3681
|
-
parser
|
3682
|
-
|
3683
|
-
|
3864
|
+
|
3865
|
+
/*
|
3866
|
+
* Fragment case: If the parser was originally created for the HTML
|
3867
|
+
* fragment parsing algorithm, then act as described in the "any other
|
3868
|
+
* start tag" entry below.
|
3869
|
+
*/
|
3870
|
+
if (!is_fragment_parser(parser)) {
|
3871
|
+
do {
|
3872
|
+
pop_current_node(parser);
|
3873
|
+
} while (!(is_mathml_integration_point(get_current_node(parser)) ||
|
3874
|
+
is_html_integration_point(get_current_node(parser)) ||
|
3875
|
+
get_current_node(parser)->v.element.tag_namespace ==
|
3876
|
+
GUMBO_NAMESPACE_HTML));
|
3877
|
+
parser->_parser_state->_reprocess_current_token = true;
|
3878
|
+
return false;
|
3879
|
+
}
|
3880
|
+
|
3881
|
+
assert(token->type == GUMBO_TOKEN_START_TAG);
|
3882
|
+
}
|
3883
|
+
|
3884
|
+
if (token->type == GUMBO_TOKEN_START_TAG) {
|
3684
3885
|
const GumboNamespaceEnum current_namespace =
|
3685
|
-
|
3886
|
+
get_adjusted_current_node(parser)->v.element.tag_namespace;
|
3686
3887
|
if (current_namespace == GUMBO_NAMESPACE_MATHML) {
|
3687
3888
|
adjust_mathml_attributes(parser, token);
|
3688
3889
|
}
|
@@ -3698,8 +3899,8 @@ static bool handle_in_foreign_content(GumboParser* parser, GumboToken* token) {
|
|
3698
3899
|
acknowledge_self_closing_tag(parser);
|
3699
3900
|
}
|
3700
3901
|
return true;
|
3701
|
-
|
3702
|
-
|
3902
|
+
// </script> tags are handled like any other end tag, putting the script's
|
3903
|
+
// text into a text node child and closing the current node.
|
3703
3904
|
} else {
|
3704
3905
|
assert(token->type == GUMBO_TOKEN_END_TAG);
|
3705
3906
|
GumboNode* node = get_current_node(parser);
|
@@ -3715,13 +3916,13 @@ static bool handle_in_foreign_content(GumboParser* parser, GumboToken* token) {
|
|
3715
3916
|
is_success = false;
|
3716
3917
|
}
|
3717
3918
|
int i = parser->_parser_state->_open_elements.length;
|
3718
|
-
for(
|
3919
|
+
for (--i; i > 0;) {
|
3719
3920
|
// Here we move up the stack until we find an HTML element (in which
|
3720
3921
|
// case we do nothing) or we find the element that we're about to
|
3721
3922
|
// close (in which case we pop everything we've seen until that
|
3722
3923
|
// point.)
|
3723
3924
|
gumbo_debug("Foreign %.*s node at %d.\n", node_tagname.length,
|
3724
|
-
|
3925
|
+
node_tagname.data, i);
|
3725
3926
|
if (gumbo_string_equals_ignore_case(&node_tagname, &token_tagname)) {
|
3726
3927
|
gumbo_debug("Matches.\n");
|
3727
3928
|
while (pop_current_node(parser) != node) {
|
@@ -3749,7 +3950,6 @@ static bool handle_in_foreign_content(GumboParser* parser, GumboToken* token) {
|
|
3749
3950
|
}
|
3750
3951
|
}
|
3751
3952
|
|
3752
|
-
|
3753
3953
|
// http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#tree-construction
|
3754
3954
|
static bool handle_token(GumboParser* parser, GumboToken* token) {
|
3755
3955
|
if (parser->_parser_state->_ignore_next_linefeed &&
|
@@ -3771,29 +3971,31 @@ static bool handle_token(GumboParser* parser, GumboToken* token) {
|
|
3771
3971
|
parser->_parser_state->_closed_html_tag = true;
|
3772
3972
|
}
|
3773
3973
|
|
3774
|
-
const GumboNode* current_node =
|
3775
|
-
assert(!current_node || current_node->type == GUMBO_NODE_ELEMENT
|
3974
|
+
const GumboNode* current_node = get_adjusted_current_node(parser);
|
3975
|
+
assert(!current_node || current_node->type == GUMBO_NODE_ELEMENT ||
|
3976
|
+
current_node->type == GUMBO_NODE_TEMPLATE);
|
3776
3977
|
if (current_node) {
|
3777
3978
|
gumbo_debug("Current node: <%s>.\n",
|
3778
|
-
|
3979
|
+
gumbo_normalized_tagname(current_node->v.element.tag));
|
3779
3980
|
}
|
3780
3981
|
if (!current_node ||
|
3781
3982
|
current_node->v.element.tag_namespace == GUMBO_NAMESPACE_HTML ||
|
3782
3983
|
(is_mathml_integration_point(current_node) &&
|
3783
|
-
|
3784
|
-
|
3785
|
-
|
3786
|
-
|
3787
|
-
|
3788
|
-
|
3984
|
+
(token->type == GUMBO_TOKEN_CHARACTER ||
|
3985
|
+
token->type == GUMBO_TOKEN_WHITESPACE ||
|
3986
|
+
token->type == GUMBO_TOKEN_NULL ||
|
3987
|
+
(token->type == GUMBO_TOKEN_START_TAG &&
|
3988
|
+
!tag_in(token, kStartTag,
|
3989
|
+
(gumbo_tagset){TAG(MGLYPH), TAG(MALIGNMARK)})))) ||
|
3789
3990
|
(current_node->v.element.tag_namespace == GUMBO_NAMESPACE_MATHML &&
|
3790
|
-
|
3791
|
-
|
3792
|
-
|
3793
|
-
|
3794
|
-
token->type ==
|
3795
|
-
|
3796
|
-
|
3991
|
+
node_qualified_tag_is(
|
3992
|
+
current_node, GUMBO_NAMESPACE_MATHML, GUMBO_TAG_ANNOTATION_XML) &&
|
3993
|
+
tag_is(token, kStartTag, GUMBO_TAG_SVG)) ||
|
3994
|
+
(is_html_integration_point(current_node) &&
|
3995
|
+
(token->type == GUMBO_TOKEN_START_TAG ||
|
3996
|
+
token->type == GUMBO_TOKEN_CHARACTER ||
|
3997
|
+
token->type == GUMBO_TOKEN_NULL ||
|
3998
|
+
token->type == GUMBO_TOKEN_WHITESPACE)) ||
|
3797
3999
|
token->type == GUMBO_TOKEN_EOF) {
|
3798
4000
|
return handle_html_content(parser, token);
|
3799
4001
|
} else {
|
@@ -3801,6 +4003,66 @@ static bool handle_token(GumboParser* parser, GumboToken* token) {
|
|
3801
4003
|
}
|
3802
4004
|
}
|
3803
4005
|
|
4006
|
+
static void fragment_parser_init(GumboParser* parser, GumboTag fragment_ctx,
|
4007
|
+
GumboNamespaceEnum fragment_namespace) {
|
4008
|
+
GumboNode* root;
|
4009
|
+
assert(fragment_ctx != GUMBO_TAG_LAST);
|
4010
|
+
|
4011
|
+
// 3
|
4012
|
+
parser->_parser_state->_fragment_ctx = create_element(parser, fragment_ctx);
|
4013
|
+
parser->_parser_state->_fragment_ctx->v.element.tag_namespace =
|
4014
|
+
fragment_namespace;
|
4015
|
+
|
4016
|
+
// 4
|
4017
|
+
if (fragment_namespace == GUMBO_NAMESPACE_HTML) {
|
4018
|
+
// Non-HTML namespaces always start in the DATA state.
|
4019
|
+
switch (fragment_ctx) {
|
4020
|
+
case GUMBO_TAG_TITLE:
|
4021
|
+
case GUMBO_TAG_TEXTAREA:
|
4022
|
+
gumbo_tokenizer_set_state(parser, GUMBO_LEX_RCDATA);
|
4023
|
+
break;
|
4024
|
+
|
4025
|
+
case GUMBO_TAG_STYLE:
|
4026
|
+
case GUMBO_TAG_XMP:
|
4027
|
+
case GUMBO_TAG_IFRAME:
|
4028
|
+
case GUMBO_TAG_NOEMBED:
|
4029
|
+
case GUMBO_TAG_NOFRAMES:
|
4030
|
+
gumbo_tokenizer_set_state(parser, GUMBO_LEX_RAWTEXT);
|
4031
|
+
break;
|
4032
|
+
|
4033
|
+
case GUMBO_TAG_SCRIPT:
|
4034
|
+
gumbo_tokenizer_set_state(parser, GUMBO_LEX_SCRIPT);
|
4035
|
+
break;
|
4036
|
+
|
4037
|
+
case GUMBO_TAG_NOSCRIPT:
|
4038
|
+
/* scripting is disabled in Gumbo, so leave the tokenizer
|
4039
|
+
* in the default data state */
|
4040
|
+
break;
|
4041
|
+
|
4042
|
+
case GUMBO_TAG_PLAINTEXT:
|
4043
|
+
gumbo_tokenizer_set_state(parser, GUMBO_LEX_PLAINTEXT);
|
4044
|
+
break;
|
4045
|
+
|
4046
|
+
default:
|
4047
|
+
/* default data state */
|
4048
|
+
break;
|
4049
|
+
}
|
4050
|
+
}
|
4051
|
+
|
4052
|
+
// 5. 6. 7.
|
4053
|
+
root = insert_element_of_tag_type(
|
4054
|
+
parser, GUMBO_TAG_HTML, GUMBO_INSERTION_IMPLIED);
|
4055
|
+
parser->_output->root = root;
|
4056
|
+
|
4057
|
+
// 8.
|
4058
|
+
if (fragment_ctx == GUMBO_TAG_TEMPLATE) {
|
4059
|
+
push_template_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TEMPLATE);
|
4060
|
+
}
|
4061
|
+
|
4062
|
+
// 10.
|
4063
|
+
reset_insertion_mode_appropriately(parser);
|
4064
|
+
}
|
4065
|
+
|
3804
4066
|
GumboOutput* gumbo_parse(const char* buffer) {
|
3805
4067
|
return gumbo_parse_with_options(
|
3806
4068
|
&kGumboDefaultOptions, buffer, strlen(buffer));
|
@@ -3814,6 +4076,11 @@ GumboOutput* gumbo_parse_with_options(
|
|
3814
4076
|
gumbo_tokenizer_state_init(&parser, buffer, length);
|
3815
4077
|
parser_state_init(&parser);
|
3816
4078
|
|
4079
|
+
if (options->fragment_context != GUMBO_TAG_LAST) {
|
4080
|
+
fragment_parser_init(
|
4081
|
+
&parser, options->fragment_context, options->fragment_namespace);
|
4082
|
+
}
|
4083
|
+
|
3817
4084
|
GumboParserState* state = parser._parser_state;
|
3818
4085
|
gumbo_debug("Parsing %.*s.\n", length, buffer);
|
3819
4086
|
|
@@ -3823,14 +4090,15 @@ GumboOutput* gumbo_parse_with_options(
|
|
3823
4090
|
|
3824
4091
|
GumboToken token;
|
3825
4092
|
bool has_error = false;
|
4093
|
+
|
3826
4094
|
do {
|
3827
4095
|
if (state->_reprocess_current_token) {
|
3828
4096
|
state->_reprocess_current_token = false;
|
3829
4097
|
} else {
|
3830
4098
|
GumboNode* current_node = get_current_node(&parser);
|
3831
|
-
gumbo_tokenizer_set_is_current_node_foreign(
|
3832
|
-
|
3833
|
-
|
4099
|
+
gumbo_tokenizer_set_is_current_node_foreign(&parser,
|
4100
|
+
current_node &&
|
4101
|
+
current_node->v.element.tag_namespace != GUMBO_NAMESPACE_HTML);
|
3834
4102
|
has_error = !gumbo_lex(&parser, &token) || has_error;
|
3835
4103
|
}
|
3836
4104
|
const char* token_type = "text";
|
@@ -3850,14 +4118,13 @@ GumboOutput* gumbo_parse_with_options(
|
|
3850
4118
|
default:
|
3851
4119
|
break;
|
3852
4120
|
}
|
3853
|
-
gumbo_debug("Handling %s token @%d:%d in state %d.\n",
|
3854
|
-
|
3855
|
-
state->_insertion_mode);
|
4121
|
+
gumbo_debug("Handling %s token @%d:%d in state %d.\n", (char*) token_type,
|
4122
|
+
token.position.line, token.position.column, state->_insertion_mode);
|
3856
4123
|
|
3857
4124
|
state->_current_token = &token;
|
3858
4125
|
state->_self_closing_flag_acknowledged =
|
3859
4126
|
!(token.type == GUMBO_TOKEN_START_TAG &&
|
3860
|
-
|
4127
|
+
token.v.start_tag.is_self_closing);
|
3861
4128
|
|
3862
4129
|
has_error = !handle_token(&parser, &token) || has_error;
|
3863
4130
|
|
@@ -3913,7 +4180,7 @@ void gumbo_destroy_output(const GumboOptions* options, GumboOutput* output) {
|
|
3913
4180
|
GumboParser parser;
|
3914
4181
|
parser._options = options;
|
3915
4182
|
destroy_node(&parser, output->document);
|
3916
|
-
for (int i = 0; i < output->errors.length; ++i) {
|
4183
|
+
for (unsigned int i = 0; i < output->errors.length; ++i) {
|
3917
4184
|
gumbo_error_destroy(&parser, output->errors.data[i]);
|
3918
4185
|
}
|
3919
4186
|
gumbo_vector_destroy(&parser, &output->errors);
|