nokogumbo 1.3.0 → 1.4.1
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +1 -1
- data/ext/nokogumboc/nokogumbo.c +1 -0
- data/gumbo-parser/src/error.c +6 -3
- data/gumbo-parser/src/gumbo.h +36 -170
- data/gumbo-parser/src/parser.c +1030 -779
- data/gumbo-parser/src/string_buffer.c +8 -1
- data/gumbo-parser/src/string_buffer.h +5 -0
- data/gumbo-parser/src/tag.c +35 -162
- data/gumbo-parser/src/tag.in +150 -0
- data/gumbo-parser/src/tag_enum.h +150 -0
- data/gumbo-parser/src/tag_gperf.h +343 -0
- data/gumbo-parser/src/tag_sizes.h +1 -0
- data/gumbo-parser/src/tag_strings.h +150 -0
- data/gumbo-parser/src/token_type.h +1 -0
- data/gumbo-parser/src/tokenizer.c +29 -21
- data/gumbo-parser/src/utf8.c +9 -8
- data/gumbo-parser/src/vector.c +1 -1
- data/gumbo-parser/visualc/include/strings.h +2 -1
- data/test-nokogumbo.rb +140 -0
- metadata +16 -10
data/README.md
CHANGED
data/ext/nokogumboc/nokogumbo.c
CHANGED
data/gumbo-parser/src/error.c
CHANGED
@@ -35,10 +35,11 @@ static const size_t kMessageBufferSize = 256;
|
|
35
35
|
static int print_message(GumboParser* parser, GumboStringBuffer* output,
|
36
36
|
const char* format, ...) {
|
37
37
|
va_list args;
|
38
|
-
va_start(args, format);
|
39
38
|
int remaining_capacity = output->capacity - output->length;
|
39
|
+
va_start(args, format);
|
40
40
|
int bytes_written = vsnprintf(output->data + output->length,
|
41
41
|
remaining_capacity, format, args);
|
42
|
+
va_end(args);
|
42
43
|
#ifdef _MSC_VER
|
43
44
|
if (bytes_written == -1) {
|
44
45
|
// vsnprintf returns -1 on MSVC++ if there's not enough capacity, instead of
|
@@ -47,6 +48,7 @@ static int print_message(GumboParser* parser, GumboStringBuffer* output,
|
|
47
48
|
// we retry (letting it fail and returning 0 if it doesn't), since there's
|
48
49
|
// no way to smartly resize the buffer.
|
49
50
|
gumbo_string_buffer_reserve(parser, output->capacity * 2, output);
|
51
|
+
va_start(args, format);
|
50
52
|
int result = vsnprintf(output->data + output->length,
|
51
53
|
remaining_capacity, format, args);
|
52
54
|
va_end(args);
|
@@ -55,7 +57,6 @@ static int print_message(GumboParser* parser, GumboStringBuffer* output,
|
|
55
57
|
#else
|
56
58
|
// -1 in standard C99 indicates an encoding error. Return 0 and do nothing.
|
57
59
|
if (bytes_written == -1) {
|
58
|
-
va_end(args);
|
59
60
|
return 0;
|
60
61
|
}
|
61
62
|
#endif
|
@@ -64,11 +65,12 @@ static int print_message(GumboParser* parser, GumboStringBuffer* output,
|
|
64
65
|
gumbo_string_buffer_reserve(
|
65
66
|
parser, output->capacity + bytes_written, output);
|
66
67
|
remaining_capacity = output->capacity - output->length;
|
68
|
+
va_start(args, format);
|
67
69
|
bytes_written = vsnprintf(output->data + output->length,
|
68
70
|
remaining_capacity, format, args);
|
71
|
+
va_end(args);
|
69
72
|
}
|
70
73
|
output->length += bytes_written;
|
71
|
-
va_end(args);
|
72
74
|
return bytes_written;
|
73
75
|
}
|
74
76
|
|
@@ -106,6 +108,7 @@ static void handle_parser_error(GumboParser* parser,
|
|
106
108
|
// But just in case...
|
107
109
|
print_message(parser, output, "Comments aren't legal here");
|
108
110
|
return;
|
111
|
+
case GUMBO_TOKEN_CDATA:
|
109
112
|
case GUMBO_TOKEN_WHITESPACE:
|
110
113
|
case GUMBO_TOKEN_CHARACTER:
|
111
114
|
print_message(parser, output, "Character tokens aren't legal here");
|
data/gumbo-parser/src/gumbo.h
CHANGED
@@ -141,7 +141,7 @@ extern const GumboVector kGumboEmptyVector;
|
|
141
141
|
* Returns the first index at which an element appears in this vector (testing
|
142
142
|
* by pointer equality), or -1 if it never does.
|
143
143
|
*/
|
144
|
-
int gumbo_vector_index_of(GumboVector* vector, void* element);
|
144
|
+
int gumbo_vector_index_of(GumboVector* vector, const void* element);
|
145
145
|
|
146
146
|
|
147
147
|
/**
|
@@ -157,172 +157,10 @@ int gumbo_vector_index_of(GumboVector* vector, void* element);
|
|
157
157
|
* strings.
|
158
158
|
*/
|
159
159
|
typedef enum {
|
160
|
-
//
|
161
|
-
|
162
|
-
//
|
163
|
-
|
164
|
-
GUMBO_TAG_TITLE,
|
165
|
-
GUMBO_TAG_BASE,
|
166
|
-
GUMBO_TAG_LINK,
|
167
|
-
GUMBO_TAG_META,
|
168
|
-
GUMBO_TAG_STYLE,
|
169
|
-
// http://www.whatwg.org/specs/web-apps/current-work/multipage/scripting-1.html#scripting-1
|
170
|
-
GUMBO_TAG_SCRIPT,
|
171
|
-
GUMBO_TAG_NOSCRIPT,
|
172
|
-
GUMBO_TAG_TEMPLATE,
|
173
|
-
// http://www.whatwg.org/specs/web-apps/current-work/multipage/sections.html#sections
|
174
|
-
GUMBO_TAG_BODY,
|
175
|
-
GUMBO_TAG_ARTICLE,
|
176
|
-
GUMBO_TAG_SECTION,
|
177
|
-
GUMBO_TAG_NAV,
|
178
|
-
GUMBO_TAG_ASIDE,
|
179
|
-
GUMBO_TAG_H1,
|
180
|
-
GUMBO_TAG_H2,
|
181
|
-
GUMBO_TAG_H3,
|
182
|
-
GUMBO_TAG_H4,
|
183
|
-
GUMBO_TAG_H5,
|
184
|
-
GUMBO_TAG_H6,
|
185
|
-
GUMBO_TAG_HGROUP,
|
186
|
-
GUMBO_TAG_HEADER,
|
187
|
-
GUMBO_TAG_FOOTER,
|
188
|
-
GUMBO_TAG_ADDRESS,
|
189
|
-
// http://www.whatwg.org/specs/web-apps/current-work/multipage/grouping-content.html#grouping-content
|
190
|
-
GUMBO_TAG_P,
|
191
|
-
GUMBO_TAG_HR,
|
192
|
-
GUMBO_TAG_PRE,
|
193
|
-
GUMBO_TAG_BLOCKQUOTE,
|
194
|
-
GUMBO_TAG_OL,
|
195
|
-
GUMBO_TAG_UL,
|
196
|
-
GUMBO_TAG_LI,
|
197
|
-
GUMBO_TAG_DL,
|
198
|
-
GUMBO_TAG_DT,
|
199
|
-
GUMBO_TAG_DD,
|
200
|
-
GUMBO_TAG_FIGURE,
|
201
|
-
GUMBO_TAG_FIGCAPTION,
|
202
|
-
GUMBO_TAG_MAIN,
|
203
|
-
GUMBO_TAG_DIV,
|
204
|
-
// http://www.whatwg.org/specs/web-apps/current-work/multipage/text-level-semantics.html#text-level-semantics
|
205
|
-
GUMBO_TAG_A,
|
206
|
-
GUMBO_TAG_EM,
|
207
|
-
GUMBO_TAG_STRONG,
|
208
|
-
GUMBO_TAG_SMALL,
|
209
|
-
GUMBO_TAG_S,
|
210
|
-
GUMBO_TAG_CITE,
|
211
|
-
GUMBO_TAG_Q,
|
212
|
-
GUMBO_TAG_DFN,
|
213
|
-
GUMBO_TAG_ABBR,
|
214
|
-
GUMBO_TAG_DATA,
|
215
|
-
GUMBO_TAG_TIME,
|
216
|
-
GUMBO_TAG_CODE,
|
217
|
-
GUMBO_TAG_VAR,
|
218
|
-
GUMBO_TAG_SAMP,
|
219
|
-
GUMBO_TAG_KBD,
|
220
|
-
GUMBO_TAG_SUB,
|
221
|
-
GUMBO_TAG_SUP,
|
222
|
-
GUMBO_TAG_I,
|
223
|
-
GUMBO_TAG_B,
|
224
|
-
GUMBO_TAG_U,
|
225
|
-
GUMBO_TAG_MARK,
|
226
|
-
GUMBO_TAG_RUBY,
|
227
|
-
GUMBO_TAG_RT,
|
228
|
-
GUMBO_TAG_RP,
|
229
|
-
GUMBO_TAG_BDI,
|
230
|
-
GUMBO_TAG_BDO,
|
231
|
-
GUMBO_TAG_SPAN,
|
232
|
-
GUMBO_TAG_BR,
|
233
|
-
GUMBO_TAG_WBR,
|
234
|
-
// http://www.whatwg.org/specs/web-apps/current-work/multipage/edits.html#edits
|
235
|
-
GUMBO_TAG_INS,
|
236
|
-
GUMBO_TAG_DEL,
|
237
|
-
// http://www.whatwg.org/specs/web-apps/current-work/multipage/embedded-content-1.html#embedded-content-1
|
238
|
-
GUMBO_TAG_IMAGE,
|
239
|
-
GUMBO_TAG_IMG,
|
240
|
-
GUMBO_TAG_IFRAME,
|
241
|
-
GUMBO_TAG_EMBED,
|
242
|
-
GUMBO_TAG_OBJECT,
|
243
|
-
GUMBO_TAG_PARAM,
|
244
|
-
GUMBO_TAG_VIDEO,
|
245
|
-
GUMBO_TAG_AUDIO,
|
246
|
-
GUMBO_TAG_SOURCE,
|
247
|
-
GUMBO_TAG_TRACK,
|
248
|
-
GUMBO_TAG_CANVAS,
|
249
|
-
GUMBO_TAG_MAP,
|
250
|
-
GUMBO_TAG_AREA,
|
251
|
-
// http://www.whatwg.org/specs/web-apps/current-work/multipage/the-map-element.html#mathml
|
252
|
-
GUMBO_TAG_MATH,
|
253
|
-
GUMBO_TAG_MI,
|
254
|
-
GUMBO_TAG_MO,
|
255
|
-
GUMBO_TAG_MN,
|
256
|
-
GUMBO_TAG_MS,
|
257
|
-
GUMBO_TAG_MTEXT,
|
258
|
-
GUMBO_TAG_MGLYPH,
|
259
|
-
GUMBO_TAG_MALIGNMARK,
|
260
|
-
GUMBO_TAG_ANNOTATION_XML,
|
261
|
-
// http://www.whatwg.org/specs/web-apps/current-work/multipage/the-map-element.html#svg-0
|
262
|
-
GUMBO_TAG_SVG,
|
263
|
-
GUMBO_TAG_FOREIGNOBJECT,
|
264
|
-
GUMBO_TAG_DESC,
|
265
|
-
// SVG title tags will have GUMBO_TAG_TITLE as with HTML.
|
266
|
-
// http://www.whatwg.org/specs/web-apps/current-work/multipage/tabular-data.html#tabular-data
|
267
|
-
GUMBO_TAG_TABLE,
|
268
|
-
GUMBO_TAG_CAPTION,
|
269
|
-
GUMBO_TAG_COLGROUP,
|
270
|
-
GUMBO_TAG_COL,
|
271
|
-
GUMBO_TAG_TBODY,
|
272
|
-
GUMBO_TAG_THEAD,
|
273
|
-
GUMBO_TAG_TFOOT,
|
274
|
-
GUMBO_TAG_TR,
|
275
|
-
GUMBO_TAG_TD,
|
276
|
-
GUMBO_TAG_TH,
|
277
|
-
// http://www.whatwg.org/specs/web-apps/current-work/multipage/forms.html#forms
|
278
|
-
GUMBO_TAG_FORM,
|
279
|
-
GUMBO_TAG_FIELDSET,
|
280
|
-
GUMBO_TAG_LEGEND,
|
281
|
-
GUMBO_TAG_LABEL,
|
282
|
-
GUMBO_TAG_INPUT,
|
283
|
-
GUMBO_TAG_BUTTON,
|
284
|
-
GUMBO_TAG_SELECT,
|
285
|
-
GUMBO_TAG_DATALIST,
|
286
|
-
GUMBO_TAG_OPTGROUP,
|
287
|
-
GUMBO_TAG_OPTION,
|
288
|
-
GUMBO_TAG_TEXTAREA,
|
289
|
-
GUMBO_TAG_KEYGEN,
|
290
|
-
GUMBO_TAG_OUTPUT,
|
291
|
-
GUMBO_TAG_PROGRESS,
|
292
|
-
GUMBO_TAG_METER,
|
293
|
-
// http://www.whatwg.org/specs/web-apps/current-work/multipage/interactive-elements.html#interactive-elements
|
294
|
-
GUMBO_TAG_DETAILS,
|
295
|
-
GUMBO_TAG_SUMMARY,
|
296
|
-
GUMBO_TAG_MENU,
|
297
|
-
GUMBO_TAG_MENUITEM,
|
298
|
-
// Non-conforming elements that nonetheless appear in the HTML5 spec.
|
299
|
-
// http://www.whatwg.org/specs/web-apps/current-work/multipage/obsolete.html#non-conforming-features
|
300
|
-
GUMBO_TAG_APPLET,
|
301
|
-
GUMBO_TAG_ACRONYM,
|
302
|
-
GUMBO_TAG_BGSOUND,
|
303
|
-
GUMBO_TAG_DIR,
|
304
|
-
GUMBO_TAG_FRAME,
|
305
|
-
GUMBO_TAG_FRAMESET,
|
306
|
-
GUMBO_TAG_NOFRAMES,
|
307
|
-
GUMBO_TAG_ISINDEX,
|
308
|
-
GUMBO_TAG_LISTING,
|
309
|
-
GUMBO_TAG_XMP,
|
310
|
-
GUMBO_TAG_NEXTID,
|
311
|
-
GUMBO_TAG_NOEMBED,
|
312
|
-
GUMBO_TAG_PLAINTEXT,
|
313
|
-
GUMBO_TAG_RB,
|
314
|
-
GUMBO_TAG_STRIKE,
|
315
|
-
GUMBO_TAG_BASEFONT,
|
316
|
-
GUMBO_TAG_BIG,
|
317
|
-
GUMBO_TAG_BLINK,
|
318
|
-
GUMBO_TAG_CENTER,
|
319
|
-
GUMBO_TAG_FONT,
|
320
|
-
GUMBO_TAG_MARQUEE,
|
321
|
-
GUMBO_TAG_MULTICOL,
|
322
|
-
GUMBO_TAG_NOBR,
|
323
|
-
GUMBO_TAG_SPACER,
|
324
|
-
GUMBO_TAG_TT,
|
325
|
-
// Used for all tags that don't have special handling in HTML.
|
160
|
+
// Load all the tags from an external source, generated from tag.in.
|
161
|
+
# include "tag_enum.h"
|
162
|
+
// Used for all tags that don't have special handling in HTML. Add new tags
|
163
|
+
// to the end of tag.in so as to preserve backwards-compatibility.
|
326
164
|
GUMBO_TAG_UNKNOWN,
|
327
165
|
// A marker value to indicate the end of the enum, for iterating over it.
|
328
166
|
// Also used as the terminator for varargs functions that take tags.
|
@@ -364,9 +202,10 @@ const char* gumbo_normalize_svg_tagname(const GumboStringPiece* tagname);
|
|
364
202
|
|
365
203
|
/**
|
366
204
|
* Converts a tag name string (which may be in upper or mixed case) to a tag
|
367
|
-
* enum.
|
205
|
+
* enum. The `tag` version expects `tagname` to be NULL-terminated
|
368
206
|
*/
|
369
207
|
GumboTag gumbo_tag_enum(const char* tagname);
|
208
|
+
GumboTag gumbo_tagn_enum(const char* tagname, unsigned int length);
|
370
209
|
|
371
210
|
/**
|
372
211
|
* Attribute namespaces.
|
@@ -461,10 +300,16 @@ typedef enum {
|
|
461
300
|
GUMBO_NODE_TEXT,
|
462
301
|
/** CDATA node. v will be a GumboText. */
|
463
302
|
GUMBO_NODE_CDATA,
|
464
|
-
/** Comment node. v
|
303
|
+
/** Comment node. v will be a GumboText, excluding comment delimiters. */
|
465
304
|
GUMBO_NODE_COMMENT,
|
466
305
|
/** Text node, where all contents is whitespace. v will be a GumboText. */
|
467
|
-
GUMBO_NODE_WHITESPACE
|
306
|
+
GUMBO_NODE_WHITESPACE,
|
307
|
+
/** Template node. This is separate from GUMBO_NODE_ELEMENT because many
|
308
|
+
* client libraries will want to ignore the contents of template nodes, as
|
309
|
+
* the spec suggests. Recursing on GUMBO_NODE_ELEMENT will do the right thing
|
310
|
+
* here, while clients that want to include template contents should also
|
311
|
+
* check for GUMBO_NODE_TEMPLATE. v will be a GumboElement. */
|
312
|
+
GUMBO_NODE_TEMPLATE
|
468
313
|
} GumboNodeType;
|
469
314
|
|
470
315
|
/**
|
@@ -678,6 +523,19 @@ struct GumboInternalNode {
|
|
678
523
|
/** Pointer back to parent node. Not owned. */
|
679
524
|
GumboNode* parent;
|
680
525
|
|
526
|
+
/**
|
527
|
+
* Pointer to next node in document order. This is the next node by start tag
|
528
|
+
* position in the document, or by position of the tag that forces the parser
|
529
|
+
* to insert it for parser-inserted nodes. It's necessary to maintain API
|
530
|
+
* compatibility with some other libraries, eg. BeautifulSoup. Not owned.
|
531
|
+
*/
|
532
|
+
GumboNode* next;
|
533
|
+
|
534
|
+
/**
|
535
|
+
* Pointer to previous node in document order.
|
536
|
+
*/
|
537
|
+
GumboNode* prev;
|
538
|
+
|
681
539
|
/** The index within the parent's children vector of this node. */
|
682
540
|
size_t index_within_parent;
|
683
541
|
|
@@ -795,6 +653,14 @@ GumboOutput* gumbo_parse(const char* buffer);
|
|
795
653
|
GumboOutput* gumbo_parse_with_options(
|
796
654
|
const GumboOptions* options, const char* buffer, size_t buffer_length);
|
797
655
|
|
656
|
+
/**
|
657
|
+
* Parse a chunk of HTML with the given fragment context. If `fragment_ctx`
|
658
|
+
* is `GUMBO_TAG_LAST`, the fragment will be parsed as a full document.
|
659
|
+
*/
|
660
|
+
GumboOutput* gumbo_parse_fragment(
|
661
|
+
const GumboOptions* options, const char* buffer, size_t length,
|
662
|
+
const GumboTag fragment_ctx, const GumboNamespaceEnum fragment_namespace);
|
663
|
+
|
798
664
|
/** Release the memory used for the parse tree & parse errors. */
|
799
665
|
void gumbo_destroy_output(
|
800
666
|
const GumboOptions* options, GumboOutput* output);
|
data/gumbo-parser/src/parser.c
CHANGED
@@ -32,12 +32,30 @@
|
|
32
32
|
#include "util.h"
|
33
33
|
#include "vector.h"
|
34
34
|
|
35
|
-
|
36
35
|
#define AVOID_UNUSED_VARIABLE_WARNING(i) (void)(i)
|
37
36
|
|
38
37
|
#define GUMBO_STRING(literal) { literal, sizeof(literal) - 1 }
|
39
38
|
#define TERMINATOR { "", 0 }
|
40
39
|
|
40
|
+
typedef char gumbo_tagset[GUMBO_TAG_LAST];
|
41
|
+
#define TAG(tag) [GUMBO_TAG_##tag] = (1 << GUMBO_NAMESPACE_HTML)
|
42
|
+
#define TAG_SVG(tag) [GUMBO_TAG_##tag] = (1 << GUMBO_NAMESPACE_SVG)
|
43
|
+
#define TAG_MATHML(tag) [GUMBO_TAG_##tag] = (1 << GUMBO_NAMESPACE_MATHML)
|
44
|
+
|
45
|
+
#define TAGSET_INCLUDES(tagset, namespace, tag) \
|
46
|
+
(tag < GUMBO_TAG_LAST && \
|
47
|
+
tagset[(int)tag] == (1 << (int)namespace))
|
48
|
+
|
49
|
+
|
50
|
+
|
51
|
+
// selected forward declarations as it is getting hard to find
|
52
|
+
// an appropriate order
|
53
|
+
static bool node_html_tag_is(const GumboNode*, GumboTag);
|
54
|
+
static GumboInsertionMode get_current_template_insertion_mode(const GumboParser*);
|
55
|
+
static bool handle_in_template(GumboParser*, GumboToken*);
|
56
|
+
static GumboNode* destroy_node(GumboParser*, GumboNode*);
|
57
|
+
|
58
|
+
|
41
59
|
static void* malloc_wrapper(void* unused, size_t size) {
|
42
60
|
return malloc(size);
|
43
61
|
}
|
@@ -181,7 +199,7 @@ typedef struct _ReplacementEntry {
|
|
181
199
|
{ GUMBO_STRING(from), GUMBO_STRING(to) }
|
182
200
|
|
183
201
|
// Static data for SVG attribute replacements.
|
184
|
-
//
|
202
|
+
// https://html.spec.whatwg.org/multipage/syntax.html#creating-and-inserting-nodes
|
185
203
|
static const ReplacementEntry kSvgAttributeReplacements[] = {
|
186
204
|
REPLACEMENT_ENTRY("attributename", "attributeName"),
|
187
205
|
REPLACEMENT_ENTRY("attributetype", "attributeType"),
|
@@ -189,12 +207,12 @@ static const ReplacementEntry kSvgAttributeReplacements[] = {
|
|
189
207
|
REPLACEMENT_ENTRY("baseprofile", "baseProfile"),
|
190
208
|
REPLACEMENT_ENTRY("calcmode", "calcMode"),
|
191
209
|
REPLACEMENT_ENTRY("clippathunits", "clipPathUnits"),
|
192
|
-
REPLACEMENT_ENTRY("contentscripttype", "contentScriptType"),
|
193
|
-
REPLACEMENT_ENTRY("contentstyletype", "contentStyleType"),
|
210
|
+
// REPLACEMENT_ENTRY("contentscripttype", "contentScriptType"),
|
211
|
+
// REPLACEMENT_ENTRY("contentstyletype", "contentStyleType"),
|
194
212
|
REPLACEMENT_ENTRY("diffuseconstant", "diffuseConstant"),
|
195
213
|
REPLACEMENT_ENTRY("edgemode", "edgeMode"),
|
196
|
-
REPLACEMENT_ENTRY("externalresourcesrequired", "externalResourcesRequired"),
|
197
|
-
REPLACEMENT_ENTRY("filterres", "filterRes"),
|
214
|
+
// REPLACEMENT_ENTRY("externalresourcesrequired", "externalResourcesRequired"),
|
215
|
+
// REPLACEMENT_ENTRY("filterres", "filterRes"),
|
198
216
|
REPLACEMENT_ENTRY("filterunits", "filterUnits"),
|
199
217
|
REPLACEMENT_ENTRY("glyphref", "glyphRef"),
|
200
218
|
REPLACEMENT_ENTRY("gradienttransform", "gradientTransform"),
|
@@ -336,7 +354,7 @@ typedef struct _TextNodeBufferState {
|
|
336
354
|
// The source position of the start of this text node.
|
337
355
|
GumboSourcePosition _start_position;
|
338
356
|
|
339
|
-
// The type of node that will be inserted (TEXT or WHITESPACE).
|
357
|
+
// The type of node that will be inserted (TEXT, CDATA, or WHITESPACE).
|
340
358
|
GumboNodeType _type;
|
341
359
|
} TextNodeBufferState;
|
342
360
|
|
@@ -362,6 +380,9 @@ typedef struct GumboInternalParserState {
|
|
362
380
|
GumboNode* _head_element;
|
363
381
|
GumboNode* _form_element;
|
364
382
|
|
383
|
+
// The element used as fragment context when parsing in fragment mode
|
384
|
+
GumboNode* _fragment_ctx;
|
385
|
+
|
365
386
|
// The flag for when the spec says "Reprocess the current token in..."
|
366
387
|
bool _reprocess_current_token;
|
367
388
|
|
@@ -390,6 +411,10 @@ typedef struct GumboInternalParserState {
|
|
390
411
|
// The current token.
|
391
412
|
GumboToken* _current_token;
|
392
413
|
|
414
|
+
// The current (most recently inserted) node. This is used to link together
|
415
|
+
// nodes in document order.
|
416
|
+
GumboNode* _current_node;
|
417
|
+
|
393
418
|
// The way that the spec is written, the </body> and </html> tags are *always*
|
394
419
|
// implicit, because encountering one of those tokens merely switches the
|
395
420
|
// insertion mode out of "in body". So we have individual state flags for
|
@@ -442,7 +467,17 @@ static void set_frameset_not_ok(GumboParser* parser) {
|
|
442
467
|
}
|
443
468
|
|
444
469
|
static GumboNode* create_node(GumboParser* parser, GumboNodeType type) {
|
470
|
+
GumboParserState* state = parser->_parser_state;
|
445
471
|
GumboNode* node = gumbo_parser_allocate(parser, sizeof(GumboNode));
|
472
|
+
|
473
|
+
node->next = NULL;
|
474
|
+
node->prev = state->_current_node;
|
475
|
+
if (state->_current_node != NULL) {
|
476
|
+
// May be null for the initial document node.
|
477
|
+
state->_current_node->next = node;
|
478
|
+
}
|
479
|
+
state->_current_node = node;
|
480
|
+
|
446
481
|
node->parent = NULL;
|
447
482
|
node->index_within_parent = -1;
|
448
483
|
node->type = type;
|
@@ -489,7 +524,9 @@ static void parser_state_init(GumboParser* parser) {
|
|
489
524
|
gumbo_vector_init(parser, 5, &parser_state->_template_insertion_modes);
|
490
525
|
parser_state->_head_element = NULL;
|
491
526
|
parser_state->_form_element = NULL;
|
527
|
+
parser_state->_fragment_ctx = NULL;
|
492
528
|
parser_state->_current_token = NULL;
|
529
|
+
parser_state->_current_node = NULL;
|
493
530
|
parser_state->_closed_body_tag = false;
|
494
531
|
parser_state->_closed_html_tag = false;
|
495
532
|
parser->_parser_state = parser_state;
|
@@ -497,17 +534,25 @@ static void parser_state_init(GumboParser* parser) {
|
|
497
534
|
|
498
535
|
static void parser_state_destroy(GumboParser* parser) {
|
499
536
|
GumboParserState* state = parser->_parser_state;
|
537
|
+
if (state->_fragment_ctx) {
|
538
|
+
destroy_node(parser, state->_fragment_ctx);
|
539
|
+
}
|
500
540
|
gumbo_vector_destroy(parser, &state->_active_formatting_elements);
|
501
541
|
gumbo_vector_destroy(parser, &state->_open_elements);
|
502
542
|
gumbo_vector_destroy(parser, &state->_template_insertion_modes);
|
503
543
|
gumbo_string_buffer_destroy(parser, &state->_text_node._buffer);
|
504
544
|
gumbo_parser_deallocate(parser, state);
|
545
|
+
parser->_parser_state = NULL;
|
505
546
|
}
|
506
547
|
|
507
548
|
static GumboNode* get_document_node(GumboParser* parser) {
|
508
549
|
return parser->_output->document;
|
509
550
|
}
|
510
551
|
|
552
|
+
static bool is_fragment_parser(const GumboParser *parser) {
|
553
|
+
return !!parser->_parser_state->_fragment_ctx;
|
554
|
+
}
|
555
|
+
|
511
556
|
// Returns the node at the bottom of the stack of open elements, or NULL if no
|
512
557
|
// elements have been added yet.
|
513
558
|
static GumboNode* get_current_node(GumboParser* parser) {
|
@@ -521,6 +566,14 @@ static GumboNode* get_current_node(GumboParser* parser) {
|
|
521
566
|
return open_elements->data[open_elements->length - 1];
|
522
567
|
}
|
523
568
|
|
569
|
+
static GumboNode* get_adjusted_current_node(GumboParser* parser) {
|
570
|
+
GumboParserState *state = parser->_parser_state;
|
571
|
+
if (state->_open_elements.length == 1 && state->_fragment_ctx) {
|
572
|
+
return state->_fragment_ctx;
|
573
|
+
}
|
574
|
+
return get_current_node(parser);
|
575
|
+
}
|
576
|
+
|
524
577
|
// Returns true if the given needle is in the given array of literal
|
525
578
|
// GumboStringPieces. If exact_match is true, this requires that they match
|
526
579
|
// exactly; otherwise, this performs a prefix match to check if any of the
|
@@ -541,52 +594,80 @@ static void set_insertion_mode(GumboParser* parser, GumboInsertionMode mode) {
|
|
541
594
|
parser->_parser_state->_insertion_mode = mode;
|
542
595
|
}
|
543
596
|
|
597
|
+
|
544
598
|
// http://www.whatwg.org/specs/web-apps/current-work/complete/parsing.html#reset-the-insertion-mode-appropriately
|
545
599
|
// This is a helper function that returns the appropriate insertion mode instead
|
546
600
|
// of setting it. Returns GUMBO_INSERTION_MODE_INITIAL as a sentinel value to
|
547
601
|
// indicate that there is no appropriate insertion mode, and the loop should
|
548
602
|
// continue.
|
549
|
-
static GumboInsertionMode get_appropriate_insertion_mode(
|
550
|
-
|
551
|
-
|
603
|
+
static GumboInsertionMode get_appropriate_insertion_mode(const GumboParser* parser, int index) {
|
604
|
+
const GumboVector* open_elements = &parser->_parser_state->_open_elements;
|
605
|
+
const GumboNode* node = open_elements->data[index];
|
606
|
+
const bool is_last = index == 0;
|
607
|
+
|
608
|
+
if (is_last && is_fragment_parser(parser)) {
|
609
|
+
node = parser->_parser_state->_fragment_ctx;
|
610
|
+
}
|
611
|
+
|
612
|
+
assert(node->type == GUMBO_NODE_ELEMENT || node->type == GUMBO_NODE_TEMPLATE);
|
552
613
|
switch (node->v.element.tag) {
|
553
|
-
|
614
|
+
case GUMBO_TAG_SELECT: {
|
615
|
+
if (is_last) {
|
554
616
|
return GUMBO_INSERTION_MODE_IN_SELECT;
|
555
|
-
|
556
|
-
|
557
|
-
|
558
|
-
|
559
|
-
|
560
|
-
|
561
|
-
|
562
|
-
|
563
|
-
|
564
|
-
|
565
|
-
|
566
|
-
return GUMBO_INSERTION_MODE_IN_CAPTION;
|
567
|
-
case GUMBO_TAG_COLGROUP:
|
568
|
-
return GUMBO_INSERTION_MODE_IN_COLUMN_GROUP;
|
569
|
-
case GUMBO_TAG_TABLE:
|
570
|
-
return GUMBO_INSERTION_MODE_IN_TABLE;
|
571
|
-
case GUMBO_TAG_HEAD:
|
572
|
-
case GUMBO_TAG_BODY:
|
573
|
-
return GUMBO_INSERTION_MODE_IN_BODY;
|
574
|
-
case GUMBO_TAG_FRAMESET:
|
575
|
-
return GUMBO_INSERTION_MODE_IN_FRAMESET;
|
576
|
-
case GUMBO_TAG_HTML:
|
577
|
-
return GUMBO_INSERTION_MODE_BEFORE_HEAD;
|
578
|
-
default:
|
579
|
-
return is_last ?
|
580
|
-
GUMBO_INSERTION_MODE_IN_BODY : GUMBO_INSERTION_MODE_INITIAL;
|
617
|
+
}
|
618
|
+
for (int i = index; i > 0; --i) {
|
619
|
+
const GumboNode* ancestor = open_elements->data[i];
|
620
|
+
if (node_html_tag_is(ancestor, GUMBO_TAG_TEMPLATE)) {
|
621
|
+
return GUMBO_INSERTION_MODE_IN_SELECT;
|
622
|
+
}
|
623
|
+
if (node_html_tag_is(ancestor, GUMBO_TAG_TABLE)) {
|
624
|
+
return GUMBO_INSERTION_MODE_IN_SELECT_IN_TABLE;
|
625
|
+
}
|
626
|
+
}
|
627
|
+
return GUMBO_INSERTION_MODE_IN_SELECT;
|
581
628
|
}
|
629
|
+
case GUMBO_TAG_TD:
|
630
|
+
case GUMBO_TAG_TH:
|
631
|
+
if (!is_last) return GUMBO_INSERTION_MODE_IN_CELL;
|
632
|
+
break;
|
633
|
+
case GUMBO_TAG_TR:
|
634
|
+
return GUMBO_INSERTION_MODE_IN_ROW;
|
635
|
+
case GUMBO_TAG_TBODY:
|
636
|
+
case GUMBO_TAG_THEAD:
|
637
|
+
case GUMBO_TAG_TFOOT:
|
638
|
+
return GUMBO_INSERTION_MODE_IN_TABLE_BODY;
|
639
|
+
case GUMBO_TAG_CAPTION:
|
640
|
+
return GUMBO_INSERTION_MODE_IN_CAPTION;
|
641
|
+
case GUMBO_TAG_COLGROUP:
|
642
|
+
return GUMBO_INSERTION_MODE_IN_COLUMN_GROUP;
|
643
|
+
case GUMBO_TAG_TABLE:
|
644
|
+
return GUMBO_INSERTION_MODE_IN_TABLE;
|
645
|
+
case GUMBO_TAG_TEMPLATE:
|
646
|
+
return get_current_template_insertion_mode(parser);
|
647
|
+
case GUMBO_TAG_HEAD:
|
648
|
+
if (!is_last) return GUMBO_INSERTION_MODE_IN_HEAD;
|
649
|
+
break;
|
650
|
+
case GUMBO_TAG_BODY:
|
651
|
+
return GUMBO_INSERTION_MODE_IN_BODY;
|
652
|
+
case GUMBO_TAG_FRAMESET:
|
653
|
+
return GUMBO_INSERTION_MODE_IN_FRAMESET;
|
654
|
+
case GUMBO_TAG_HTML:
|
655
|
+
return parser->_parser_state->_head_element ?
|
656
|
+
GUMBO_INSERTION_MODE_AFTER_HEAD : GUMBO_INSERTION_MODE_BEFORE_HEAD;
|
657
|
+
default:
|
658
|
+
break;
|
659
|
+
}
|
660
|
+
return is_last ?
|
661
|
+
GUMBO_INSERTION_MODE_IN_BODY : GUMBO_INSERTION_MODE_INITIAL;
|
582
662
|
}
|
583
663
|
|
664
|
+
|
584
665
|
// This performs the actual "reset the insertion mode" loop.
|
585
666
|
static void reset_insertion_mode_appropriately(GumboParser* parser) {
|
586
667
|
const GumboVector* open_elements = &parser->_parser_state->_open_elements;
|
587
668
|
for (int i = open_elements->length; --i >= 0; ) {
|
588
669
|
GumboInsertionMode mode =
|
589
|
-
get_appropriate_insertion_mode(
|
670
|
+
get_appropriate_insertion_mode(parser, i);
|
590
671
|
if (mode != GUMBO_INSERTION_MODE_INITIAL) {
|
591
672
|
set_insertion_mode(parser, mode);
|
592
673
|
return;
|
@@ -620,7 +701,7 @@ static GumboError* parser_add_parse_error(GumboParser* parser, const GumboToken*
|
|
620
701
|
&extra_data->tag_stack);
|
621
702
|
for (int i = 0; i < state->_open_elements.length; ++i) {
|
622
703
|
const GumboNode* node = state->_open_elements.data[i];
|
623
|
-
assert(node->type == GUMBO_NODE_ELEMENT);
|
704
|
+
assert(node->type == GUMBO_NODE_ELEMENT || node->type == GUMBO_NODE_TEMPLATE);
|
624
705
|
gumbo_vector_add(parser, (void*) node->v.element.tag,
|
625
706
|
&extra_data->tag_stack);
|
626
707
|
}
|
@@ -631,13 +712,7 @@ static GumboError* parser_add_parse_error(GumboParser* parser, const GumboToken*
|
|
631
712
|
// by is_start) with one of the tag types in the varargs list. Terminate the
|
632
713
|
// list with GUMBO_TAG_LAST; this functions as a sentinel since no portion of
|
633
714
|
// the spec references tags that are not in the spec.
|
634
|
-
|
635
|
-
// places in the code. This is how it's written in the spec (and it's done this
|
636
|
-
// way so it's easy to verify the code against the spec), but it may be worth
|
637
|
-
// coming up with a notion of a "tag set" that includes a list of tags, and
|
638
|
-
// using that in many places. It'd probably also help performance, but I want
|
639
|
-
// to profile before optimizing.
|
640
|
-
static bool tag_in(const GumboToken* token, bool is_start, ...) {
|
715
|
+
static bool tag_in(const GumboToken* token, bool is_start, const gumbo_tagset tags) {
|
641
716
|
GumboTag token_tag;
|
642
717
|
if (is_start && token->type == GUMBO_TOKEN_START_TAG) {
|
643
718
|
token_tag = token->v.start_tag.tag;
|
@@ -646,19 +721,7 @@ static bool tag_in(const GumboToken* token, bool is_start, ...) {
|
|
646
721
|
} else {
|
647
722
|
return false;
|
648
723
|
}
|
649
|
-
|
650
|
-
va_list tags;
|
651
|
-
va_start(tags, is_start);
|
652
|
-
bool result = false;
|
653
|
-
for (GumboTag tag = va_arg(tags, GumboTag); tag != GUMBO_TAG_LAST;
|
654
|
-
tag = va_arg(tags, GumboTag)) {
|
655
|
-
if (tag == token_tag) {
|
656
|
-
result = true;
|
657
|
-
break;
|
658
|
-
}
|
659
|
-
}
|
660
|
-
va_end(tags);
|
661
|
-
return result;
|
724
|
+
return (token_tag < GUMBO_TAG_LAST && tags[(int)token_tag] != 0);
|
662
725
|
}
|
663
726
|
|
664
727
|
// Like tag_in, but for the single-tag case.
|
@@ -673,52 +736,119 @@ static bool tag_is(const GumboToken* token, bool is_start, GumboTag tag) {
|
|
673
736
|
}
|
674
737
|
|
675
738
|
// Like tag_in, but checks for the tag of a node, rather than a token.
|
676
|
-
static bool
|
739
|
+
static bool node_tag_in_set(const GumboNode* node, const gumbo_tagset tags) {
|
677
740
|
assert(node != NULL);
|
678
|
-
if (node->type != GUMBO_NODE_ELEMENT) {
|
741
|
+
if (node->type != GUMBO_NODE_ELEMENT && node->type != GUMBO_NODE_TEMPLATE) {
|
679
742
|
return false;
|
680
743
|
}
|
681
|
-
|
682
|
-
|
683
|
-
va_list tags;
|
684
|
-
va_start(tags, node);
|
685
|
-
bool result = false;
|
686
|
-
for (GumboTag tag = va_arg(tags, GumboTag); tag != GUMBO_TAG_LAST;
|
687
|
-
tag = va_arg(tags, GumboTag)) {
|
688
|
-
assert(tag <= GUMBO_TAG_LAST);
|
689
|
-
if (tag == node_tag) {
|
690
|
-
result = true;
|
691
|
-
break;
|
692
|
-
}
|
693
|
-
}
|
694
|
-
va_end(tags);
|
695
|
-
return result;
|
744
|
+
return TAGSET_INCLUDES(tags, node->v.element.tag_namespace, node->v.element.tag);
|
696
745
|
}
|
697
746
|
|
747
|
+
|
698
748
|
// Like node_tag_in, but for the single-tag case.
|
699
|
-
static bool
|
700
|
-
return node->type == GUMBO_NODE_ELEMENT
|
749
|
+
static bool node_qualified_tag_is(const GumboNode* node, GumboNamespaceEnum ns, GumboTag tag) {
|
750
|
+
return (node->type == GUMBO_NODE_ELEMENT || node->type == GUMBO_NODE_TEMPLATE) &&
|
751
|
+
node->v.element.tag == tag &&
|
752
|
+
node->v.element.tag_namespace == ns;
|
753
|
+
}
|
754
|
+
|
755
|
+
// Like node_tag_in, but for the single-tag case in the HTML namespace
|
756
|
+
static bool node_html_tag_is(const GumboNode* node, GumboTag tag)
|
757
|
+
{
|
758
|
+
return node_qualified_tag_is(node, GUMBO_NAMESPACE_HTML, tag);
|
759
|
+
}
|
760
|
+
|
761
|
+
static void push_template_insertion_mode(GumboParser* parser, GumboInsertionMode mode) {
|
762
|
+
gumbo_vector_add(parser, (void*) mode, &parser->_parser_state->_template_insertion_modes);
|
763
|
+
}
|
764
|
+
|
765
|
+
static void pop_template_insertion_mode(GumboParser* parser) {
|
766
|
+
gumbo_vector_pop(parser, &parser->_parser_state->_template_insertion_modes);
|
767
|
+
}
|
768
|
+
|
769
|
+
// Returns the current template insertion mode. If the stack of template
|
770
|
+
// insertion modes is empty, this returns GUMBO_INSERTION_MODE_INITIAL.
|
771
|
+
static GumboInsertionMode get_current_template_insertion_mode(const GumboParser* parser) {
|
772
|
+
GumboVector* template_insertion_modes = &parser->_parser_state->_template_insertion_modes;
|
773
|
+
if (template_insertion_modes->length == 0) {
|
774
|
+
return GUMBO_INSERTION_MODE_INITIAL;
|
775
|
+
}
|
776
|
+
return (GumboInsertionMode) template_insertion_modes->data[(template_insertion_modes->length - 1)];
|
701
777
|
}
|
702
778
|
|
703
779
|
// http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#mathml-text-integration-point
|
704
780
|
static bool is_mathml_integration_point(const GumboNode* node) {
|
705
|
-
return
|
706
|
-
|
707
|
-
node->v.element.tag_namespace == GUMBO_NAMESPACE_MATHML;
|
781
|
+
return node_tag_in_set(node, (gumbo_tagset) { TAG_MATHML(MI), TAG_MATHML(MO), TAG_MATHML(MN),
|
782
|
+
TAG_MATHML(MS), TAG_MATHML(MTEXT) });
|
708
783
|
}
|
709
784
|
|
710
785
|
// http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#html-integration-point
|
711
786
|
static bool is_html_integration_point(const GumboNode* node) {
|
712
|
-
return (
|
713
|
-
|
714
|
-
node->v.element.tag_namespace == GUMBO_NAMESPACE_SVG) ||
|
715
|
-
(node_tag_is(node, GUMBO_TAG_ANNOTATION_XML) && (
|
787
|
+
return node_tag_in_set(node, (gumbo_tagset) { TAG_SVG(FOREIGNOBJECT), TAG_SVG(DESC), TAG_SVG(TITLE) }) ||
|
788
|
+
(node_qualified_tag_is(node, GUMBO_NAMESPACE_MATHML, GUMBO_TAG_ANNOTATION_XML) && (
|
716
789
|
attribute_matches(&node->v.element.attributes,
|
717
790
|
"encoding", "text/html") ||
|
718
791
|
attribute_matches(&node->v.element.attributes,
|
719
792
|
"encoding", "application/xhtml+xml")));
|
720
793
|
}
|
721
794
|
|
795
|
+
|
796
|
+
// This represents a place to insert a node, consisting of a target parent and a
|
797
|
+
// child index within that parent. If the node should be inserted at the end of
|
798
|
+
// the parent's child, index will be -1.
|
799
|
+
typedef struct {
|
800
|
+
GumboNode* target;
|
801
|
+
int index;
|
802
|
+
} InsertionLocation;
|
803
|
+
|
804
|
+
InsertionLocation get_appropriate_insertion_location(GumboParser* parser, GumboNode* override_target) {
|
805
|
+
InsertionLocation retval = { override_target, -1 };
|
806
|
+
if (retval.target == NULL) {
|
807
|
+
// No override target; default to the current node, but special-case the
|
808
|
+
// root node since get_current_node() assumes the stack of open elements is
|
809
|
+
// non-empty.
|
810
|
+
retval.target = parser->_output->root != NULL ?
|
811
|
+
get_current_node(parser) : get_document_node(parser);
|
812
|
+
}
|
813
|
+
if (!parser->_parser_state->_foster_parent_insertions ||
|
814
|
+
!node_tag_in_set(retval.target, (gumbo_tagset) { TAG(TABLE), TAG(TBODY),
|
815
|
+
TAG(TFOOT), TAG(THEAD), TAG(TR) })) {
|
816
|
+
return retval;
|
817
|
+
}
|
818
|
+
|
819
|
+
// Foster-parenting case.
|
820
|
+
int last_template_index = -1;
|
821
|
+
int last_table_index = -1;
|
822
|
+
GumboVector* open_elements = &parser->_parser_state->_open_elements;
|
823
|
+
for (int i = 0; i < open_elements->length; ++i) {
|
824
|
+
if (node_html_tag_is(open_elements->data[i], GUMBO_TAG_TEMPLATE)) {
|
825
|
+
last_template_index = i;
|
826
|
+
}
|
827
|
+
if (node_html_tag_is(open_elements->data[i], GUMBO_TAG_TABLE)) {
|
828
|
+
last_table_index = i;
|
829
|
+
}
|
830
|
+
}
|
831
|
+
if (last_template_index != -1 &&
|
832
|
+
(last_table_index == -1 || last_template_index > last_table_index)) {
|
833
|
+
retval.target = open_elements->data[last_template_index];
|
834
|
+
return retval;
|
835
|
+
}
|
836
|
+
if (last_table_index == -1) {
|
837
|
+
retval.target = open_elements->data[0];
|
838
|
+
return retval;
|
839
|
+
}
|
840
|
+
GumboNode* last_table = open_elements->data[last_table_index];
|
841
|
+
if (last_table->parent != NULL) {
|
842
|
+
retval.target = last_table->parent;
|
843
|
+
retval.index = last_table->index_within_parent;
|
844
|
+
return retval;
|
845
|
+
}
|
846
|
+
|
847
|
+
retval.target = open_elements->data[last_table_index - 1];
|
848
|
+
return retval;
|
849
|
+
}
|
850
|
+
|
851
|
+
|
722
852
|
// Appends a node to the end of its parent, setting the "parent" and
|
723
853
|
// "index_within_parent" fields appropriately.
|
724
854
|
static void append_node(
|
@@ -726,7 +856,7 @@ static void append_node(
|
|
726
856
|
assert(node->parent == NULL);
|
727
857
|
assert(node->index_within_parent == -1);
|
728
858
|
GumboVector* children;
|
729
|
-
if (parent->type == GUMBO_NODE_ELEMENT) {
|
859
|
+
if (parent->type == GUMBO_NODE_ELEMENT || parent->type == GUMBO_NODE_TEMPLATE) {
|
730
860
|
children = &parent->v.element.children;
|
731
861
|
} else {
|
732
862
|
assert(parent->type == GUMBO_NODE_DOCUMENT);
|
@@ -738,66 +868,44 @@ static void append_node(
|
|
738
868
|
assert(node->index_within_parent < children->length);
|
739
869
|
}
|
740
870
|
|
741
|
-
// Inserts a node at the specified
|
871
|
+
// Inserts a node at the specified InsertionLocation, updating the
|
742
872
|
// "parent" and "index_within_parent" fields of it and all its siblings.
|
873
|
+
// If the index of the location is -1, this calls append_node.
|
743
874
|
static void insert_node(
|
744
|
-
|
875
|
+
GumboParser* parser, GumboNode* node, InsertionLocation location) {
|
745
876
|
assert(node->parent == NULL);
|
746
877
|
assert(node->index_within_parent == -1);
|
747
|
-
|
748
|
-
|
749
|
-
|
750
|
-
|
751
|
-
|
752
|
-
|
753
|
-
|
754
|
-
|
755
|
-
|
756
|
-
|
757
|
-
|
758
|
-
|
759
|
-
|
760
|
-
}
|
878
|
+
GumboNode* parent = location.target;
|
879
|
+
int index = location.index;
|
880
|
+
if (index != -1) {
|
881
|
+
GumboVector* children = NULL;
|
882
|
+
if (parent->type == GUMBO_NODE_ELEMENT ||
|
883
|
+
parent->type == GUMBO_NODE_TEMPLATE) {
|
884
|
+
children = &parent->v.element.children;
|
885
|
+
} else if (parent->type == GUMBO_NODE_DOCUMENT) {
|
886
|
+
children = &parent->v.document.children;
|
887
|
+
assert(children->length == 0);
|
888
|
+
} else {
|
889
|
+
assert(0);
|
890
|
+
}
|
761
891
|
|
762
|
-
|
763
|
-
|
764
|
-
|
765
|
-
|
766
|
-
|
767
|
-
|
768
|
-
|
769
|
-
|
770
|
-
|
771
|
-
|
772
|
-
GumboNode* table_element = open_elements->data[i];
|
773
|
-
if (node_tag_is(table_element, GUMBO_TAG_TABLE)) {
|
774
|
-
foster_parent_element = table_element->parent;
|
775
|
-
if (!foster_parent_element ||
|
776
|
-
foster_parent_element->type != GUMBO_NODE_ELEMENT) {
|
777
|
-
// Table has no parent; spec says it's possible if a script manipulated
|
778
|
-
// the DOM, although I don't think we have to worry about this case.
|
779
|
-
gumbo_debug("Table has no parent.\n");
|
780
|
-
foster_parent_element = open_elements->data[i - 1];
|
781
|
-
break;
|
782
|
-
}
|
783
|
-
assert(foster_parent_element->type == GUMBO_NODE_ELEMENT);
|
784
|
-
gumbo_debug("Found enclosing table (%x) at %d; parent=%s, index=%d.\n",
|
785
|
-
table_element, i, gumbo_normalized_tagname(
|
786
|
-
foster_parent_element->v.element.tag),
|
787
|
-
table_element->index_within_parent);
|
788
|
-
assert(foster_parent_element->v.element.children.data[
|
789
|
-
table_element->index_within_parent] == table_element);
|
790
|
-
insert_node(parser, foster_parent_element,
|
791
|
-
table_element->index_within_parent, node);
|
792
|
-
return;
|
892
|
+
assert(index >= 0);
|
893
|
+
assert(index < children->length);
|
894
|
+
node->parent = parent;
|
895
|
+
node->index_within_parent = index;
|
896
|
+
gumbo_vector_insert_at(parser, (void*) node, index, children);
|
897
|
+
assert(node->index_within_parent < children->length);
|
898
|
+
for (int i = index + 1; i < children->length; ++i) {
|
899
|
+
GumboNode* sibling = children->data[i];
|
900
|
+
sibling->index_within_parent = i;
|
901
|
+
assert(sibling->index_within_parent < children->length);
|
793
902
|
}
|
903
|
+
} else {
|
904
|
+
append_node(parser, parent, node);
|
794
905
|
}
|
795
|
-
if (node->type == GUMBO_NODE_ELEMENT) {
|
796
|
-
gumbo_vector_add(parser, (void*) node, open_elements);
|
797
|
-
}
|
798
|
-
append_node(parser, foster_parent_element, node);
|
799
906
|
}
|
800
907
|
|
908
|
+
|
801
909
|
static void maybe_flush_text_node_buffer(GumboParser* parser) {
|
802
910
|
GumboParserState* state = parser->_parser_state;
|
803
911
|
TextNodeBufferState* buffer_state = &state->_text_node;
|
@@ -806,7 +914,8 @@ static void maybe_flush_text_node_buffer(GumboParser* parser) {
|
|
806
914
|
}
|
807
915
|
|
808
916
|
assert(buffer_state->_type == GUMBO_NODE_WHITESPACE ||
|
809
|
-
buffer_state->_type == GUMBO_NODE_TEXT
|
917
|
+
buffer_state->_type == GUMBO_NODE_TEXT ||
|
918
|
+
buffer_state->_type == GUMBO_NODE_CDATA);
|
810
919
|
GumboNode* text_node = create_node(parser, buffer_state->_type);
|
811
920
|
GumboText* text_node_data = &text_node->v.text;
|
812
921
|
text_node_data->text = gumbo_string_buffer_to_string(
|
@@ -816,20 +925,20 @@ static void maybe_flush_text_node_buffer(GumboParser* parser) {
|
|
816
925
|
state->_current_token->original_text.data -
|
817
926
|
buffer_state->_start_original_text;
|
818
927
|
text_node_data->start_pos = buffer_state->_start_position;
|
819
|
-
|
820
|
-
get_current_node(parser), GUMBO_TAG_TABLE, GUMBO_TAG_TBODY,
|
821
|
-
GUMBO_TAG_TFOOT, GUMBO_TAG_THEAD, GUMBO_TAG_TR, GUMBO_TAG_LAST)) {
|
822
|
-
foster_parent_element(parser, text_node);
|
823
|
-
} else {
|
824
|
-
append_node(
|
825
|
-
parser, parser->_output->root ?
|
826
|
-
get_current_node(parser) : parser->_output->document, text_node);
|
827
|
-
}
|
928
|
+
|
828
929
|
gumbo_debug("Flushing text node buffer of %.*s.\n",
|
829
930
|
(int) buffer_state->_buffer.length, buffer_state->_buffer.data);
|
830
931
|
|
831
|
-
|
832
|
-
|
932
|
+
InsertionLocation location = get_appropriate_insertion_location(parser, NULL);
|
933
|
+
if (location.target->type == GUMBO_NODE_DOCUMENT) {
|
934
|
+
// The DOM does not allow Document nodes to have Text children, so per the
|
935
|
+
// spec, they are dropped on the floor.
|
936
|
+
destroy_node(parser, text_node);
|
937
|
+
} else {
|
938
|
+
insert_node(parser, text_node, location);
|
939
|
+
}
|
940
|
+
|
941
|
+
gumbo_string_buffer_clear(parser, &buffer_state->_buffer);
|
833
942
|
buffer_state->_type = GUMBO_NODE_WHITESPACE;
|
834
943
|
assert(buffer_state->_buffer.length == 0);
|
835
944
|
}
|
@@ -846,7 +955,7 @@ static GumboNode* pop_current_node(GumboParser* parser) {
|
|
846
955
|
GumboParserState* state = parser->_parser_state;
|
847
956
|
maybe_flush_text_node_buffer(parser);
|
848
957
|
if (state->_open_elements.length > 0) {
|
849
|
-
assert(
|
958
|
+
assert(node_html_tag_is(state->_open_elements.data[0], GUMBO_TAG_HTML));
|
850
959
|
gumbo_debug(
|
851
960
|
"Popping %s node.\n",
|
852
961
|
gumbo_normalized_tagname(get_current_node(parser)->v.element.tag));
|
@@ -856,12 +965,12 @@ static GumboNode* pop_current_node(GumboParser* parser) {
|
|
856
965
|
assert(state->_open_elements.length == 0);
|
857
966
|
return NULL;
|
858
967
|
}
|
859
|
-
assert(current_node->type == GUMBO_NODE_ELEMENT);
|
968
|
+
assert(current_node->type == GUMBO_NODE_ELEMENT || current_node->type == GUMBO_NODE_TEMPLATE);
|
860
969
|
bool is_closed_body_or_html_tag =
|
861
|
-
(
|
862
|
-
(
|
970
|
+
(node_html_tag_is(current_node, GUMBO_TAG_BODY) && state->_closed_body_tag) ||
|
971
|
+
(node_html_tag_is(current_node, GUMBO_TAG_HTML) && state->_closed_html_tag);
|
863
972
|
if ((state->_current_token->type != GUMBO_TOKEN_END_TAG ||
|
864
|
-
!
|
973
|
+
!node_html_tag_is(current_node, state->_current_token->v.end_tag)) &&
|
865
974
|
!is_closed_body_or_html_tag) {
|
866
975
|
current_node->parse_flags |= GUMBO_INSERTION_IMPLICIT_END_TAG;
|
867
976
|
}
|
@@ -885,25 +994,22 @@ static void append_comment_node(
|
|
885
994
|
|
886
995
|
// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#clear-the-stack-back-to-a-table-row-context
|
887
996
|
static void clear_stack_to_table_row_context(GumboParser* parser) {
|
888
|
-
while (!
|
889
|
-
GUMBO_TAG_HTML, GUMBO_TAG_TR, GUMBO_TAG_LAST)) {
|
997
|
+
while (!node_tag_in_set(get_current_node(parser), (gumbo_tagset) { TAG(HTML), TAG(TR), TAG(TEMPLATE)})) {
|
890
998
|
pop_current_node(parser);
|
891
999
|
}
|
892
1000
|
}
|
893
1001
|
|
894
1002
|
// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#clear-the-stack-back-to-a-table-context
|
895
1003
|
static void clear_stack_to_table_context(GumboParser* parser) {
|
896
|
-
while (!
|
897
|
-
GUMBO_TAG_HTML, GUMBO_TAG_TABLE, GUMBO_TAG_LAST)) {
|
1004
|
+
while (!node_tag_in_set(get_current_node(parser), (gumbo_tagset) { TAG(HTML), TAG(TABLE), TAG(TEMPLATE) } )) {
|
898
1005
|
pop_current_node(parser);
|
899
1006
|
}
|
900
1007
|
}
|
901
1008
|
|
902
1009
|
// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#clear-the-stack-back-to-a-table-body-context
|
903
1010
|
void clear_stack_to_table_body_context(GumboParser* parser) {
|
904
|
-
while (!
|
905
|
-
|
906
|
-
GUMBO_TAG_LAST)) {
|
1011
|
+
while (!node_tag_in_set(get_current_node(parser), (gumbo_tagset) { TAG(HTML), TAG(TBODY),
|
1012
|
+
TAG(TFOOT), TAG(THEAD), TAG(TEMPLATE) })) {
|
907
1013
|
pop_current_node(parser);
|
908
1014
|
}
|
909
1015
|
}
|
@@ -918,7 +1024,8 @@ static GumboNode* create_element(GumboParser* parser, GumboTag tag) {
|
|
918
1024
|
element->tag_namespace = GUMBO_NAMESPACE_HTML;
|
919
1025
|
element->original_tag = kGumboEmptyString;
|
920
1026
|
element->original_end_tag = kGumboEmptyString;
|
921
|
-
element->start_pos = parser->_parser_state->_current_token
|
1027
|
+
element->start_pos = (parser->_parser_state->_current_token) ?
|
1028
|
+
parser->_parser_state->_current_token->position : kGumboEmptySourcePosition;
|
922
1029
|
element->end_pos = kGumboEmptySourcePosition;
|
923
1030
|
return node;
|
924
1031
|
}
|
@@ -929,7 +1036,12 @@ static GumboNode* create_element_from_token(
|
|
929
1036
|
assert(token->type == GUMBO_TOKEN_START_TAG);
|
930
1037
|
GumboTokenStartTag* start_tag = &token->v.start_tag;
|
931
1038
|
|
932
|
-
|
1039
|
+
GumboNodeType type = (
|
1040
|
+
tag_namespace == GUMBO_NAMESPACE_HTML &&
|
1041
|
+
start_tag->tag == GUMBO_TAG_TEMPLATE)
|
1042
|
+
? GUMBO_NODE_TEMPLATE : GUMBO_NODE_ELEMENT;
|
1043
|
+
|
1044
|
+
GumboNode* node = create_node(parser, type);
|
933
1045
|
GumboElement* element = &node->v.element;
|
934
1046
|
gumbo_vector_init(parser, 1, &element->children);
|
935
1047
|
element->attributes = start_tag->attributes;
|
@@ -966,20 +1078,9 @@ static void insert_element(GumboParser* parser, GumboNode* node,
|
|
966
1078
|
if (!is_reconstructing_formatting_elements) {
|
967
1079
|
maybe_flush_text_node_buffer(parser);
|
968
1080
|
}
|
969
|
-
|
970
|
-
|
971
|
-
|
972
|
-
foster_parent_element(parser, node);
|
973
|
-
gumbo_vector_add(parser, (void*) node, &state->_open_elements);
|
974
|
-
return;
|
975
|
-
}
|
976
|
-
|
977
|
-
// This is called to insert the root HTML element, but get_current_node
|
978
|
-
// assumes the stack of open elements is non-empty, so we need special
|
979
|
-
// handling for this case.
|
980
|
-
append_node(
|
981
|
-
parser, parser->_output->root ?
|
982
|
-
get_current_node(parser) : parser->_output->document, node);
|
1081
|
+
InsertionLocation location =
|
1082
|
+
get_appropriate_insertion_location(parser, NULL);
|
1083
|
+
insert_node(parser, node, location);
|
983
1084
|
gumbo_vector_add(parser, (void*) node, &state->_open_elements);
|
984
1085
|
}
|
985
1086
|
|
@@ -1035,7 +1136,9 @@ static GumboNode* insert_foreign_element(
|
|
1035
1136
|
|
1036
1137
|
static void insert_text_token(GumboParser* parser, GumboToken* token) {
|
1037
1138
|
assert(token->type == GUMBO_TOKEN_WHITESPACE ||
|
1038
|
-
token->type == GUMBO_TOKEN_CHARACTER
|
1139
|
+
token->type == GUMBO_TOKEN_CHARACTER ||
|
1140
|
+
token->type == GUMBO_TOKEN_NULL ||
|
1141
|
+
token->type == GUMBO_TOKEN_CDATA);
|
1039
1142
|
TextNodeBufferState* buffer_state = &parser->_parser_state->_text_node;
|
1040
1143
|
if (buffer_state->_buffer.length == 0) {
|
1041
1144
|
// Initialize position fields.
|
@@ -1046,6 +1149,8 @@ static void insert_text_token(GumboParser* parser, GumboToken* token) {
|
|
1046
1149
|
parser, token->v.character, &buffer_state->_buffer);
|
1047
1150
|
if (token->type == GUMBO_TOKEN_CHARACTER) {
|
1048
1151
|
buffer_state->_type = GUMBO_NODE_TEXT;
|
1152
|
+
} else if (token->type == GUMBO_TOKEN_CDATA) {
|
1153
|
+
buffer_state->_type = GUMBO_NODE_CDATA;
|
1049
1154
|
}
|
1050
1155
|
gumbo_debug("Inserting text token '%c'.\n", token->v.character);
|
1051
1156
|
}
|
@@ -1073,7 +1178,7 @@ static bool find_last_anchor_index(GumboParser* parser, int* anchor_index) {
|
|
1073
1178
|
if (node == &kActiveFormattingScopeMarker) {
|
1074
1179
|
return false;
|
1075
1180
|
}
|
1076
|
-
if (
|
1181
|
+
if (node_html_tag_is(node, GUMBO_TAG_A)) {
|
1077
1182
|
*anchor_index = i;
|
1078
1183
|
return true;
|
1079
1184
|
}
|
@@ -1097,10 +1202,8 @@ static int count_formatting_elements_of_tag(
|
|
1097
1202
|
break;
|
1098
1203
|
}
|
1099
1204
|
assert(node->type == GUMBO_NODE_ELEMENT);
|
1100
|
-
|
1101
|
-
|
1102
|
-
element->tag_namespace == desired_element->tag_namespace &&
|
1103
|
-
all_attributes_match(&element->attributes,
|
1205
|
+
if (node_qualified_tag_is(node, desired_element->tag_namespace, desired_element->tag) &&
|
1206
|
+
all_attributes_match(&node->v.element.attributes,
|
1104
1207
|
&desired_element->attributes)) {
|
1105
1208
|
num_identical_elements++;
|
1106
1209
|
*earliest_matching_index = i;
|
@@ -1150,7 +1253,7 @@ static bool is_open_element(GumboParser* parser, const GumboNode* node) {
|
|
1150
1253
|
// values are fresh copies.
|
1151
1254
|
GumboNode* clone_node(
|
1152
1255
|
GumboParser* parser, const GumboNode* node, GumboParseFlags reason) {
|
1153
|
-
assert(node->type == GUMBO_NODE_ELEMENT);
|
1256
|
+
assert(node->type == GUMBO_NODE_ELEMENT || node->type == GUMBO_NODE_TEMPLATE);
|
1154
1257
|
GumboNode* new_node = gumbo_parser_allocate(parser, sizeof(GumboNode));
|
1155
1258
|
*new_node = *node;
|
1156
1259
|
new_node->parent = NULL;
|
@@ -1220,7 +1323,10 @@ static void reconstruct_active_formatting_elements(GumboParser* parser) {
|
|
1220
1323
|
GumboNode* clone = clone_node(
|
1221
1324
|
parser, element, GUMBO_INSERTION_RECONSTRUCTED_FORMATTING_ELEMENT);
|
1222
1325
|
// Step 9.
|
1223
|
-
|
1326
|
+
InsertionLocation location = get_appropriate_insertion_location(parser, NULL);
|
1327
|
+
insert_node(parser, clone, location);
|
1328
|
+
gumbo_vector_add(parser, (void*) clone, &parser->_parser_state->_open_elements);
|
1329
|
+
|
1224
1330
|
// Step 10.
|
1225
1331
|
elements->data[i] = clone;
|
1226
1332
|
gumbo_debug("Reconstructed %s element at %d.\n",
|
@@ -1269,83 +1375,47 @@ static GumboQuirksModeEnum compute_quirks_mode(
|
|
1269
1375
|
// The following functions are all defined by the "has an element in __ scope"
|
1270
1376
|
// sections of the HTML5 spec:
|
1271
1377
|
// http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#has-an-element-in-the-specific-scope
|
1272
|
-
// The basic idea behind them is that they check for an element of the given
|
1273
|
-
// name, contained within a scope formed by a set of other
|
1274
|
-
// example, "has an element in list scope" looks for an element of
|
1275
|
-
// within the nearest enclosing <ol> or <ul>, along
|
1276
|
-
// element types that serve to "firewall" their content
|
1277
|
-
// document.
|
1278
|
-
|
1279
|
-
|
1378
|
+
// The basic idea behind them is that they check for an element of the given
|
1379
|
+
// qualified name, contained within a scope formed by a set of other qualified
|
1380
|
+
// names. For example, "has an element in list scope" looks for an element of
|
1381
|
+
// the given qualified name within the nearest enclosing <ol> or <ul>, along
|
1382
|
+
// with a bunch of generic element types that serve to "firewall" their content
|
1383
|
+
// from the rest of the document. Note that because of the way the spec is written,
|
1384
|
+
// all elements are expected to be in the HTML namespace
|
1385
|
+
static bool has_an_element_in_specific_scope(GumboParser* parser,
|
1386
|
+
int expected_size, const GumboTag *expected, bool negate, const gumbo_tagset tags) {
|
1280
1387
|
GumboVector* open_elements = &parser->_parser_state->_open_elements;
|
1281
|
-
va_list args;
|
1282
|
-
va_start(args, negate);
|
1283
|
-
// va_arg can only run through the list once, so we copy it to an GumboVector
|
1284
|
-
// here. I wonder if it'd make more sense to make tags the GumboVector*
|
1285
|
-
// parameter and 'expected' a vararg list, but that'd require changing a lot
|
1286
|
-
// of code for unknown benefit. We may want to change the representation of
|
1287
|
-
// these tag sets anyway, to something more efficient.
|
1288
|
-
GumboVector tags;
|
1289
|
-
gumbo_vector_init(parser, 10, &tags);
|
1290
|
-
for (GumboTag tag = va_arg(args, GumboTag); tag != GUMBO_TAG_LAST;
|
1291
|
-
tag = va_arg(args, GumboTag)) {
|
1292
|
-
// We store the tags inline instead of storing pointers to them.
|
1293
|
-
gumbo_vector_add(parser, (void*) tag, &tags);
|
1294
|
-
}
|
1295
|
-
va_end(args);
|
1296
|
-
|
1297
|
-
bool result = false;
|
1298
1388
|
for (int i = open_elements->length; --i >= 0; ) {
|
1299
1389
|
const GumboNode* node = open_elements->data[i];
|
1300
|
-
if (node->type != GUMBO_NODE_ELEMENT)
|
1390
|
+
if (node->type != GUMBO_NODE_ELEMENT && node->type != GUMBO_NODE_TEMPLATE)
|
1301
1391
|
continue;
|
1302
|
-
|
1392
|
+
|
1303
1393
|
GumboTag node_tag = node->v.element.tag;
|
1304
|
-
|
1305
|
-
|
1306
|
-
if (node_tag ==
|
1307
|
-
|
1308
|
-
goto cleanup;
|
1309
|
-
}
|
1394
|
+
GumboNamespaceEnum node_ns = node->v.element.tag_namespace;
|
1395
|
+
for (int j = 0; j < expected_size; ++j) {
|
1396
|
+
if (node_tag == expected[j] && node_ns == GUMBO_NAMESPACE_HTML)
|
1397
|
+
return true;
|
1310
1398
|
}
|
1311
1399
|
|
1312
|
-
bool
|
1313
|
-
|
1314
|
-
|
1315
|
-
if (tag == node_tag) {
|
1316
|
-
found_tag = true;
|
1317
|
-
break;
|
1318
|
-
}
|
1319
|
-
}
|
1320
|
-
if (negate != found_tag) {
|
1321
|
-
result = false;
|
1322
|
-
goto cleanup;
|
1323
|
-
}
|
1400
|
+
bool found = TAGSET_INCLUDES(tags, node_ns, node_tag);
|
1401
|
+
if (negate != found)
|
1402
|
+
return false;
|
1324
1403
|
}
|
1325
|
-
|
1326
|
-
gumbo_vector_destroy(parser, &tags);
|
1327
|
-
return result;
|
1404
|
+
return false;
|
1328
1405
|
}
|
1329
1406
|
|
1330
|
-
//
|
1331
|
-
|
1332
|
-
|
1333
|
-
|
1334
|
-
// and the data inside it can be freely accessed as if it were a normal
|
1335
|
-
// GumboVector.
|
1336
|
-
#define DECLARE_ONE_ELEMENT_GUMBO_VECTOR(varname, from_var) \
|
1337
|
-
void* varname ## _tmp_array[1] = { (void*) from_var }; \
|
1338
|
-
GumboVector varname = { varname ## _tmp_array, 1, 1 }
|
1407
|
+
// Checks for the presence of an open element of the specified tag type.
|
1408
|
+
static bool has_open_element(GumboParser* parser, GumboTag tag) {
|
1409
|
+
return has_an_element_in_specific_scope(parser, 1, &tag, false, (gumbo_tagset) { TAG(HTML) } );
|
1410
|
+
}
|
1339
1411
|
|
1340
1412
|
// http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#has-an-element-in-scope
|
1341
1413
|
static bool has_an_element_in_scope(GumboParser* parser, GumboTag tag) {
|
1342
|
-
|
1343
|
-
|
1344
|
-
|
1345
|
-
|
1346
|
-
|
1347
|
-
GUMBO_TAG_MTEXT, GUMBO_TAG_ANNOTATION_XML, GUMBO_TAG_FOREIGNOBJECT,
|
1348
|
-
GUMBO_TAG_DESC, GUMBO_TAG_TITLE, GUMBO_TAG_LAST);
|
1414
|
+
return has_an_element_in_specific_scope(parser, 1, &tag, false, (gumbo_tagset) { TAG(APPLET),
|
1415
|
+
TAG(CAPTION), TAG(HTML), TAG(TABLE), TAG(TD), TAG(TH), TAG(MARQUEE),
|
1416
|
+
TAG(OBJECT), TAG(TEMPLATE), TAG_MATHML(MI), TAG_MATHML(MO), TAG_MATHML(MN),
|
1417
|
+
TAG_MATHML(MS), TAG_MATHML(MTEXT), TAG_MATHML(ANNOTATION_XML),
|
1418
|
+
TAG_SVG(FOREIGNOBJECT), TAG_SVG(DESC), TAG_SVG(TITLE) });
|
1349
1419
|
}
|
1350
1420
|
|
1351
1421
|
// Like "has an element in scope", but for the specific case of looking for a
|
@@ -1361,16 +1431,14 @@ static bool has_node_in_scope(GumboParser* parser, const GumboNode* node) {
|
|
1361
1431
|
if (current == node) {
|
1362
1432
|
return true;
|
1363
1433
|
}
|
1364
|
-
if (current->type != GUMBO_NODE_ELEMENT) {
|
1434
|
+
if (current->type != GUMBO_NODE_ELEMENT && current->type != GUMBO_NODE_TEMPLATE) {
|
1365
1435
|
continue;
|
1366
1436
|
}
|
1367
|
-
if (
|
1368
|
-
|
1369
|
-
|
1370
|
-
|
1371
|
-
|
1372
|
-
GUMBO_TAG_FOREIGNOBJECT, GUMBO_TAG_DESC, GUMBO_TAG_TITLE,
|
1373
|
-
GUMBO_TAG_LAST)) {
|
1437
|
+
if (node_tag_in_set(current, (gumbo_tagset) { TAG(APPLET), TAG(CAPTION), TAG(HTML),
|
1438
|
+
TAG(TABLE), TAG(TD), TAG(TH), TAG(MARQUEE), TAG(OBJECT), TAG(TEMPLATE),
|
1439
|
+
TAG_MATHML(MI), TAG_MATHML(MO), TAG_MATHML(MN), TAG_MATHML(MS),
|
1440
|
+
TAG_MATHML(MTEXT), TAG_MATHML(ANNOTATION_XML), TAG_SVG(FOREIGNOBJECT),
|
1441
|
+
TAG_SVG(DESC), TAG_SVG(TITLE) } )) {
|
1374
1442
|
return false;
|
1375
1443
|
}
|
1376
1444
|
}
|
@@ -1378,78 +1446,66 @@ static bool has_node_in_scope(GumboParser* parser, const GumboNode* node) {
|
|
1378
1446
|
return false;
|
1379
1447
|
}
|
1380
1448
|
|
1381
|
-
// Like has_an_element_in_scope, but restricts the expected
|
1382
|
-
// possible
|
1383
|
-
static bool has_an_element_in_scope_with_tagname(GumboParser* parser,
|
1384
|
-
|
1385
|
-
|
1386
|
-
|
1387
|
-
|
1388
|
-
|
1389
|
-
va_start(args, parser);
|
1390
|
-
for (GumboTag tag = va_arg(args, GumboTag); tag != GUMBO_TAG_LAST;
|
1391
|
-
tag = va_arg(args, GumboTag)) {
|
1392
|
-
gumbo_vector_add(parser, (void*) tag, &tags);
|
1393
|
-
}
|
1394
|
-
bool found = has_an_element_in_specific_scope(
|
1395
|
-
parser, &tags, false, GUMBO_TAG_APPLET, GUMBO_TAG_CAPTION, GUMBO_TAG_HTML,
|
1396
|
-
GUMBO_TAG_TABLE, GUMBO_TAG_TD, GUMBO_TAG_TH, GUMBO_TAG_MARQUEE,
|
1397
|
-
GUMBO_TAG_OBJECT, GUMBO_TAG_MI, GUMBO_TAG_MO, GUMBO_TAG_MN, GUMBO_TAG_MS,
|
1398
|
-
GUMBO_TAG_MTEXT, GUMBO_TAG_ANNOTATION_XML, GUMBO_TAG_FOREIGNOBJECT,
|
1399
|
-
GUMBO_TAG_DESC, GUMBO_TAG_TITLE, GUMBO_TAG_LAST);
|
1400
|
-
gumbo_vector_destroy(parser, &tags);
|
1401
|
-
va_end(args);
|
1402
|
-
return found;
|
1449
|
+
// Like has_an_element_in_scope, but restricts the expected qualified name to a
|
1450
|
+
// range of possible qualified names instead of just a single one.
|
1451
|
+
static bool has_an_element_in_scope_with_tagname(GumboParser* parser, int expected_len, const GumboTag expected[]) {
|
1452
|
+
return has_an_element_in_specific_scope(parser, expected_len, expected, false, (gumbo_tagset) {
|
1453
|
+
TAG(APPLET), TAG(CAPTION), TAG(HTML), TAG(TABLE), TAG(TD), TAG(TH), TAG(MARQUEE),
|
1454
|
+
TAG(OBJECT), TAG(TEMPLATE), TAG_MATHML(MI), TAG_MATHML(MO), TAG_MATHML(MN),
|
1455
|
+
TAG_MATHML(MS), TAG_MATHML(MTEXT), TAG_MATHML(ANNOTATION_XML),
|
1456
|
+
TAG_SVG(FOREIGNOBJECT), TAG_SVG(DESC), TAG_SVG(TITLE) });
|
1403
1457
|
}
|
1404
1458
|
|
1405
1459
|
// http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#has-an-element-in-list-item-scope
|
1406
1460
|
static bool has_an_element_in_list_scope(GumboParser* parser, GumboTag tag) {
|
1407
|
-
|
1408
|
-
|
1409
|
-
|
1410
|
-
|
1411
|
-
|
1412
|
-
|
1413
|
-
GUMBO_TAG_DESC, GUMBO_TAG_TITLE, GUMBO_TAG_OL, GUMBO_TAG_UL,
|
1414
|
-
GUMBO_TAG_LAST);
|
1461
|
+
return has_an_element_in_specific_scope(parser, 1, &tag, false, (gumbo_tagset) { TAG(APPLET),
|
1462
|
+
TAG(CAPTION), TAG(HTML), TAG(TABLE), TAG(TD), TAG(TH), TAG(MARQUEE),
|
1463
|
+
TAG(OBJECT), TAG(TEMPLATE), TAG_MATHML(MI), TAG_MATHML(MO), TAG_MATHML(MN),
|
1464
|
+
TAG_MATHML(MS), TAG_MATHML(MTEXT), TAG_MATHML(ANNOTATION_XML),
|
1465
|
+
TAG_SVG(FOREIGNOBJECT), TAG_SVG(DESC), TAG_SVG(TITLE), TAG(OL),
|
1466
|
+
TAG(UL) });
|
1415
1467
|
}
|
1416
1468
|
|
1417
1469
|
// http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#has-an-element-in-button-scope
|
1418
1470
|
static bool has_an_element_in_button_scope(GumboParser* parser, GumboTag tag) {
|
1419
|
-
|
1420
|
-
|
1421
|
-
|
1422
|
-
|
1423
|
-
|
1424
|
-
GUMBO_TAG_MTEXT, GUMBO_TAG_ANNOTATION_XML, GUMBO_TAG_FOREIGNOBJECT,
|
1425
|
-
GUMBO_TAG_DESC, GUMBO_TAG_TITLE, GUMBO_TAG_BUTTON, GUMBO_TAG_LAST);
|
1471
|
+
return has_an_element_in_specific_scope(parser, 1, &tag, false, (gumbo_tagset) { TAG(APPLET),
|
1472
|
+
TAG(CAPTION), TAG(HTML), TAG(TABLE), TAG(TD), TAG(TH), TAG(MARQUEE),
|
1473
|
+
TAG(OBJECT), TAG(TEMPLATE), TAG_MATHML(MI), TAG_MATHML(MO), TAG_MATHML(MN),
|
1474
|
+
TAG_MATHML(MS), TAG_MATHML(MTEXT), TAG_MATHML(ANNOTATION_XML),
|
1475
|
+
TAG_SVG(FOREIGNOBJECT), TAG_SVG(DESC), TAG_SVG(TITLE), TAG(BUTTON) });
|
1426
1476
|
}
|
1427
1477
|
|
1428
1478
|
// http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#has-an-element-in-table-scope
|
1429
1479
|
static bool has_an_element_in_table_scope(GumboParser* parser, GumboTag tag) {
|
1430
|
-
|
1431
|
-
|
1432
|
-
parser, &tags, false, GUMBO_TAG_HTML, GUMBO_TAG_TABLE, GUMBO_TAG_LAST);
|
1480
|
+
return has_an_element_in_specific_scope(parser, 1, &tag, false, (gumbo_tagset) { TAG(HTML),
|
1481
|
+
TAG(TABLE), TAG(TEMPLATE) });
|
1433
1482
|
}
|
1434
1483
|
|
1435
1484
|
// http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#has-an-element-in-select-scope
|
1436
1485
|
static bool has_an_element_in_select_scope(GumboParser* parser, GumboTag tag) {
|
1437
|
-
|
1438
|
-
return has_an_element_in_specific_scope(
|
1439
|
-
parser, &tags, true, GUMBO_TAG_OPTGROUP, GUMBO_TAG_OPTION,
|
1440
|
-
GUMBO_TAG_LAST);
|
1486
|
+
return has_an_element_in_specific_scope(parser, 1, &tag, true, (gumbo_tagset) { TAG(OPTGROUP), TAG(OPTION) });
|
1441
1487
|
}
|
1442
1488
|
|
1443
|
-
|
1444
1489
|
// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#generate-implied-end-tags
|
1445
1490
|
// "exception" is the "element to exclude from the process" listed in the spec.
|
1446
1491
|
// Pass GUMBO_TAG_LAST to not exclude any of them.
|
1447
1492
|
static void generate_implied_end_tags(GumboParser* parser, GumboTag exception) {
|
1448
1493
|
for (;
|
1449
|
-
|
1450
|
-
|
1451
|
-
|
1452
|
-
!
|
1494
|
+
node_tag_in_set(get_current_node(parser), (gumbo_tagset) { TAG(DD),
|
1495
|
+
TAG(DT), TAG(LI), TAG(OPTION), TAG(OPTGROUP), TAG(P), TAG(RP), TAG(RB),
|
1496
|
+
TAG(RT), TAG(RTC) }) &&
|
1497
|
+
!node_html_tag_is(get_current_node(parser), exception);
|
1498
|
+
pop_current_node(parser));
|
1499
|
+
}
|
1500
|
+
|
1501
|
+
// This is the "generate all implied end tags thoroughly" clause of the spec.
|
1502
|
+
// https://html.spec.whatwg.org/multipage/syntax.html#closing-elements-that-have-implied-end-tags
|
1503
|
+
static void generate_all_implied_end_tags_thoroughly(GumboParser* parser) {
|
1504
|
+
for (;
|
1505
|
+
node_tag_in_set(get_current_node(parser), (gumbo_tagset) { TAG(CAPTION),
|
1506
|
+
TAG(COLGROUP), TAG(DD), TAG(DT), TAG(LI), TAG(OPTION), TAG(OPTGROUP),
|
1507
|
+
TAG(P), TAG(RP), TAG(RT), TAG(RTC), TAG(TBODY), TAG(TD),
|
1508
|
+
TAG(TFOOT), TAG(TH), TAG(HEAD), TAG(TR) });
|
1453
1509
|
pop_current_node(parser));
|
1454
1510
|
}
|
1455
1511
|
|
@@ -1463,7 +1519,7 @@ static bool close_table(GumboParser* parser) {
|
|
1463
1519
|
}
|
1464
1520
|
|
1465
1521
|
GumboNode* node = pop_current_node(parser);
|
1466
|
-
while (!
|
1522
|
+
while (!node_html_tag_is(node, GUMBO_TAG_TABLE)) {
|
1467
1523
|
node = pop_current_node(parser);
|
1468
1524
|
}
|
1469
1525
|
reset_insertion_mode_appropriately(parser);
|
@@ -1477,13 +1533,13 @@ static bool close_table_cell(GumboParser* parser, const GumboToken* token,
|
|
1477
1533
|
bool result = true;
|
1478
1534
|
generate_implied_end_tags(parser, GUMBO_TAG_LAST);
|
1479
1535
|
const GumboNode* node = get_current_node(parser);
|
1480
|
-
if (!
|
1536
|
+
if (!node_html_tag_is(node, cell_tag)) {
|
1481
1537
|
parser_add_parse_error(parser, token);
|
1482
1538
|
result = false;
|
1483
1539
|
}
|
1484
1540
|
do {
|
1485
1541
|
node = pop_current_node(parser);
|
1486
|
-
} while (!
|
1542
|
+
} while (!node_html_tag_is(node, cell_tag));
|
1487
1543
|
|
1488
1544
|
clear_active_formatting_elements(parser);
|
1489
1545
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_ROW);
|
@@ -1508,7 +1564,7 @@ static bool close_current_cell(GumboParser* parser, const GumboToken* token) {
|
|
1508
1564
|
// resets the insertion mode appropriately.
|
1509
1565
|
static void close_current_select(GumboParser* parser) {
|
1510
1566
|
GumboNode* node = pop_current_node(parser);
|
1511
|
-
while (!
|
1567
|
+
while (!node_html_tag_is(node, GUMBO_TAG_SELECT)) {
|
1512
1568
|
node = pop_current_node(parser);
|
1513
1569
|
}
|
1514
1570
|
reset_insertion_mode_appropriately(parser);
|
@@ -1517,60 +1573,43 @@ static void close_current_select(GumboParser* parser) {
|
|
1517
1573
|
// The list of nodes in the "special" category:
|
1518
1574
|
// http://www.whatwg.org/specs/web-apps/current-work/complete/parsing.html#special
|
1519
1575
|
static bool is_special_node(const GumboNode* node) {
|
1520
|
-
assert(node->type == GUMBO_NODE_ELEMENT);
|
1521
|
-
|
1522
|
-
|
1523
|
-
|
1524
|
-
|
1525
|
-
|
1526
|
-
|
1527
|
-
|
1528
|
-
|
1529
|
-
|
1530
|
-
|
1531
|
-
|
1532
|
-
|
1533
|
-
|
1534
|
-
|
1535
|
-
|
1536
|
-
|
1537
|
-
|
1538
|
-
|
1539
|
-
|
1540
|
-
|
1541
|
-
|
1542
|
-
|
1543
|
-
GUMBO_TAG_TD, GUMBO_TAG_TEXTAREA, GUMBO_TAG_TFOOT, GUMBO_TAG_TH,
|
1544
|
-
GUMBO_TAG_THEAD, GUMBO_TAG_TITLE, GUMBO_TAG_TR, GUMBO_TAG_UL,
|
1545
|
-
GUMBO_TAG_WBR, GUMBO_TAG_XMP, GUMBO_TAG_LAST);
|
1546
|
-
case GUMBO_NAMESPACE_MATHML:
|
1547
|
-
return node_tag_in(node,
|
1548
|
-
GUMBO_TAG_MI, GUMBO_TAG_MO, GUMBO_TAG_MN, GUMBO_TAG_MS,
|
1549
|
-
GUMBO_TAG_MTEXT, GUMBO_TAG_ANNOTATION_XML, GUMBO_TAG_LAST);
|
1550
|
-
case GUMBO_NAMESPACE_SVG:
|
1551
|
-
return node_tag_in(node,
|
1552
|
-
GUMBO_TAG_FOREIGNOBJECT, GUMBO_TAG_DESC, GUMBO_TAG_LAST);
|
1553
|
-
}
|
1554
|
-
abort();
|
1555
|
-
return false; // Pacify compiler.
|
1556
|
-
}
|
1557
|
-
|
1558
|
-
// Implicitly closes currently open tags until it reaches an element with the
|
1559
|
-
// specified tag name. If the elements closed are in the set handled by
|
1576
|
+
assert(node->type == GUMBO_NODE_ELEMENT || node->type == GUMBO_NODE_TEMPLATE);
|
1577
|
+
return node_tag_in_set(node, (gumbo_tagset) { TAG(ADDRESS), TAG(APPLET), TAG(AREA),
|
1578
|
+
TAG(ARTICLE), TAG(ASIDE), TAG(BASE), TAG(BASEFONT), TAG(BGSOUND), TAG(BLOCKQUOTE),
|
1579
|
+
TAG(BODY), TAG(BR), TAG(BUTTON), TAG(CAPTION), TAG(CENTER), TAG(COL),
|
1580
|
+
TAG(COLGROUP), TAG(MENUITEM), TAG(DD), TAG(DETAILS), TAG(DIR), TAG(DIV), TAG(DL),
|
1581
|
+
TAG(DT), TAG(EMBED), TAG(FIELDSET), TAG(FIGCAPTION), TAG(FIGURE), TAG(FOOTER),
|
1582
|
+
TAG(FORM), TAG(FRAME), TAG(FRAMESET), TAG(H1), TAG(H2), TAG(H3), TAG(H4),
|
1583
|
+
TAG(H5), TAG(H6), TAG(HEAD), TAG(HEADER), TAG(HGROUP), TAG(HR), TAG(HTML),
|
1584
|
+
TAG(IFRAME), TAG(IMG), TAG(INPUT), TAG(ISINDEX), TAG(LI), TAG(LINK),
|
1585
|
+
TAG(LISTING), TAG(MARQUEE), TAG(MENU), TAG(META), TAG(NAV), TAG(NOEMBED),
|
1586
|
+
TAG(NOFRAMES), TAG(NOSCRIPT), TAG(OBJECT), TAG(OL), TAG(P), TAG(PARAM),
|
1587
|
+
TAG(PLAINTEXT), TAG(PRE), TAG(SCRIPT), TAG(SECTION), TAG(SELECT), TAG(STYLE),
|
1588
|
+
TAG(SUMMARY), TAG(TABLE), TAG(TBODY), TAG(TD), TAG(TEMPLATE), TAG(TEXTAREA),
|
1589
|
+
TAG(TFOOT), TAG(TH), TAG(THEAD), TAG(TITLE), TAG(TR), TAG(UL), TAG(WBR), TAG(XMP),
|
1590
|
+
|
1591
|
+
TAG_MATHML(MI), TAG_MATHML(MO), TAG_MATHML(MN), TAG_MATHML(MS),
|
1592
|
+
TAG_MATHML(MTEXT), TAG_MATHML(ANNOTATION_XML),
|
1593
|
+
|
1594
|
+
TAG_SVG(FOREIGNOBJECT), TAG_SVG(DESC) });
|
1595
|
+
}
|
1596
|
+
|
1597
|
+
// Implicitly closes currently open elements until it reaches an element with the
|
1598
|
+
// specified qualified name. If the elements closed are in the set handled by
|
1560
1599
|
// generate_implied_end_tags, this is normal operation and this function returns
|
1561
1600
|
// true. Otherwise, a parse error is recorded and this function returns false.
|
1562
1601
|
static bool implicitly_close_tags(
|
1563
|
-
GumboParser* parser, GumboToken* token, GumboTag target) {
|
1602
|
+
GumboParser* parser, GumboToken* token, GumboNamespaceEnum target_ns, GumboTag target) {
|
1564
1603
|
bool result = true;
|
1565
1604
|
generate_implied_end_tags(parser, target);
|
1566
|
-
if (!
|
1605
|
+
if (!node_qualified_tag_is(get_current_node(parser), target_ns, target)) {
|
1567
1606
|
parser_add_parse_error(parser, token);
|
1568
|
-
while (!
|
1607
|
+
while (!node_qualified_tag_is(get_current_node(parser), target_ns, target)) {
|
1569
1608
|
pop_current_node(parser);
|
1570
1609
|
}
|
1571
1610
|
result = false;
|
1572
1611
|
}
|
1573
|
-
assert(
|
1612
|
+
assert(node_qualified_tag_is(get_current_node(parser), target_ns, target));
|
1574
1613
|
pop_current_node(parser);
|
1575
1614
|
return result;
|
1576
1615
|
}
|
@@ -1581,7 +1620,7 @@ static bool implicitly_close_tags(
|
|
1581
1620
|
// clause appears several times in the spec.
|
1582
1621
|
static bool maybe_implicitly_close_p_tag(GumboParser* parser, GumboToken* token) {
|
1583
1622
|
if (has_an_element_in_button_scope(parser, GUMBO_TAG_P)) {
|
1584
|
-
return implicitly_close_tags(parser, token, GUMBO_TAG_P);
|
1623
|
+
return implicitly_close_tags(parser, token, GUMBO_NAMESPACE_HTML, GUMBO_TAG_P);
|
1585
1624
|
}
|
1586
1625
|
return true;
|
1587
1626
|
}
|
@@ -1595,15 +1634,14 @@ static void maybe_implicitly_close_list_tag(
|
|
1595
1634
|
for (int i = state->_open_elements.length; --i >= 0; ) {
|
1596
1635
|
const GumboNode* node = state->_open_elements.data[i];
|
1597
1636
|
bool is_list_tag = is_li ?
|
1598
|
-
|
1599
|
-
|
1637
|
+
node_html_tag_is(node, GUMBO_TAG_LI) :
|
1638
|
+
node_tag_in_set(node, (gumbo_tagset) { TAG(DD), TAG(DT) } );
|
1600
1639
|
if (is_list_tag) {
|
1601
|
-
implicitly_close_tags(parser, token, node->v.element.tag);
|
1640
|
+
implicitly_close_tags(parser, token, node->v.element.tag_namespace, node->v.element.tag);
|
1602
1641
|
return;
|
1603
1642
|
}
|
1604
1643
|
if (is_special_node(node) &&
|
1605
|
-
!
|
1606
|
-
GUMBO_TAG_LAST)) {
|
1644
|
+
!node_tag_in_set(node, (gumbo_tagset) { TAG(ADDRESS), TAG(DIV), TAG(P) })) {
|
1607
1645
|
return;
|
1608
1646
|
}
|
1609
1647
|
}
|
@@ -1758,13 +1796,20 @@ static void remove_from_parent(GumboParser* parser, GumboNode* node) {
|
|
1758
1796
|
|
1759
1797
|
// http://www.whatwg.org/specs/web-apps/current-work/multipage/the-end.html#an-introduction-to-error-handling-and-strange-cases-in-the-parser
|
1760
1798
|
// Also described in the "in body" handling for end formatting tags.
|
1761
|
-
static bool adoption_agency_algorithm(
|
1762
|
-
GumboParser* parser, GumboToken* token, GumboTag closing_tag) {
|
1799
|
+
static bool adoption_agency_algorithm(GumboParser* parser, GumboToken* token, GumboTag subject) {
|
1763
1800
|
GumboParserState* state = parser->_parser_state;
|
1764
1801
|
gumbo_debug("Entering adoption agency algorithm.\n");
|
1765
|
-
//
|
1802
|
+
// Step 1.
|
1803
|
+
GumboNode* current_node = get_current_node(parser);
|
1804
|
+
if (current_node->v.element.tag_namespace == GUMBO_NAMESPACE_HTML &&
|
1805
|
+
current_node->v.element.tag == subject &&
|
1806
|
+
gumbo_vector_index_of(&state->_active_formatting_elements, current_node) == -1) {
|
1807
|
+
pop_current_node(parser);
|
1808
|
+
return false;
|
1809
|
+
}
|
1810
|
+
// Steps 2-4 & 20:
|
1766
1811
|
for (int i = 0; i < 8; ++i) {
|
1767
|
-
// Step
|
1812
|
+
// Step 5.
|
1768
1813
|
GumboNode* formatting_node = NULL;
|
1769
1814
|
int formatting_node_in_open_elements = -1;
|
1770
1815
|
for (int j = state->_active_formatting_elements.length; --j >= 0; ) {
|
@@ -1774,13 +1819,13 @@ static bool adoption_agency_algorithm(
|
|
1774
1819
|
// Last scope marker; abort the algorithm.
|
1775
1820
|
return false;
|
1776
1821
|
}
|
1777
|
-
if (
|
1822
|
+
if (node_html_tag_is(current_node, subject)) {
|
1778
1823
|
// Found it.
|
1779
1824
|
formatting_node = current_node;
|
1780
1825
|
formatting_node_in_open_elements = gumbo_vector_index_of(
|
1781
|
-
|
1826
|
+
&state->_open_elements, formatting_node);
|
1782
1827
|
gumbo_debug("Formatting element of tag %s at %d.\n",
|
1783
|
-
gumbo_normalized_tagname(
|
1828
|
+
gumbo_normalized_tagname(subject),
|
1784
1829
|
formatting_node_in_open_elements);
|
1785
1830
|
break;
|
1786
1831
|
}
|
@@ -1793,39 +1838,44 @@ static bool adoption_agency_algorithm(
|
|
1793
1838
|
return false;
|
1794
1839
|
}
|
1795
1840
|
|
1841
|
+
// Step 6
|
1796
1842
|
if (formatting_node_in_open_elements == -1) {
|
1797
1843
|
gumbo_debug("Formatting node not on stack of open elements.\n");
|
1844
|
+
parser_add_parse_error(parser, token);
|
1798
1845
|
gumbo_vector_remove(parser, formatting_node,
|
1799
1846
|
&state->_active_formatting_elements);
|
1800
1847
|
return false;
|
1801
1848
|
}
|
1802
1849
|
|
1850
|
+
// Step 7
|
1803
1851
|
if (!has_an_element_in_scope(parser, formatting_node->v.element.tag)) {
|
1804
1852
|
parser_add_parse_error(parser, token);
|
1805
1853
|
gumbo_debug("Element not in scope.\n");
|
1806
1854
|
return false;
|
1807
1855
|
}
|
1856
|
+
|
1857
|
+
// Step 8
|
1808
1858
|
if (formatting_node != get_current_node(parser)) {
|
1809
1859
|
parser_add_parse_error(parser, token); // But continue onwards.
|
1810
1860
|
}
|
1811
1861
|
assert(formatting_node);
|
1812
|
-
assert(!
|
1813
|
-
assert(!
|
1862
|
+
assert(!node_html_tag_is(formatting_node, GUMBO_TAG_HTML));
|
1863
|
+
assert(!node_html_tag_is(formatting_node, GUMBO_TAG_BODY));
|
1814
1864
|
|
1815
|
-
// Step
|
1865
|
+
// Step 9 & 10
|
1816
1866
|
GumboNode* furthest_block = NULL;
|
1817
1867
|
for (int j = formatting_node_in_open_elements;
|
1818
1868
|
j < state->_open_elements.length; ++j) {
|
1819
1869
|
assert(j > 0);
|
1820
1870
|
GumboNode* current = state->_open_elements.data[j];
|
1821
1871
|
if (is_special_node(current)) {
|
1822
|
-
// Step
|
1872
|
+
// Step 9.
|
1823
1873
|
furthest_block = current;
|
1824
1874
|
break;
|
1825
1875
|
}
|
1826
1876
|
}
|
1827
1877
|
if (!furthest_block) {
|
1828
|
-
// Step
|
1878
|
+
// Step 10.
|
1829
1879
|
while (get_current_node(parser) != formatting_node) {
|
1830
1880
|
pop_current_node(parser);
|
1831
1881
|
}
|
@@ -1835,35 +1885,38 @@ static bool adoption_agency_algorithm(
|
|
1835
1885
|
&state->_active_formatting_elements);
|
1836
1886
|
return false;
|
1837
1887
|
}
|
1838
|
-
assert(!
|
1888
|
+
assert(!node_html_tag_is(furthest_block, GUMBO_TAG_HTML));
|
1839
1889
|
assert(furthest_block);
|
1840
1890
|
|
1841
|
-
// Step
|
1891
|
+
// Step 11.
|
1842
1892
|
// Elements may be moved and reparented by this algorithm, so
|
1843
1893
|
// common_ancestor is not necessarily the same as formatting_node->parent.
|
1844
1894
|
GumboNode* common_ancestor =
|
1845
|
-
|
1846
|
-
|
1895
|
+
state->_open_elements.data[gumbo_vector_index_of(
|
1896
|
+
&state->_open_elements, formatting_node) - 1];
|
1847
1897
|
gumbo_debug("Common ancestor tag = %s, furthest block tag = %s.\n",
|
1848
1898
|
gumbo_normalized_tagname(common_ancestor->v.element.tag),
|
1849
1899
|
gumbo_normalized_tagname(furthest_block->v.element.tag));
|
1850
1900
|
|
1851
|
-
// Step
|
1901
|
+
// Step 12.
|
1852
1902
|
int bookmark = gumbo_vector_index_of(
|
1853
|
-
|
1854
|
-
|
1903
|
+
&state->_active_formatting_elements, formatting_node) + 1;
|
1904
|
+
gumbo_debug("Bookmark at %d.\n", bookmark);
|
1905
|
+
// Step 13.
|
1855
1906
|
GumboNode* node = furthest_block;
|
1856
1907
|
GumboNode* last_node = furthest_block;
|
1857
1908
|
// Must be stored explicitly, in case node is removed from the stack of open
|
1858
1909
|
// elements, to handle step 9.4.
|
1859
1910
|
int saved_node_index = gumbo_vector_index_of(&state->_open_elements, node);
|
1860
1911
|
assert(saved_node_index > 0);
|
1861
|
-
// Step
|
1862
|
-
for (int j = 0
|
1863
|
-
// Step
|
1912
|
+
// Step 13.1.
|
1913
|
+
for (int j = 0;;) {
|
1914
|
+
// Step 13.2.
|
1915
|
+
++j;
|
1916
|
+
// Step 13.3.
|
1864
1917
|
int node_index = gumbo_vector_index_of(&state->_open_elements, node);
|
1865
1918
|
gumbo_debug(
|
1866
|
-
|
1919
|
+
"Current index: %d, last index: %d.\n", node_index, saved_node_index);
|
1867
1920
|
if (node_index == -1) {
|
1868
1921
|
node_index = saved_node_index;
|
1869
1922
|
}
|
@@ -1872,62 +1925,78 @@ static bool adoption_agency_algorithm(
|
|
1872
1925
|
assert(node_index < state->_open_elements.capacity);
|
1873
1926
|
node = state->_open_elements.data[node_index];
|
1874
1927
|
assert(node->parent);
|
1875
|
-
|
1876
|
-
|
1877
|
-
|
1928
|
+
if (node == formatting_node) {
|
1929
|
+
// Step 13.4.
|
1930
|
+
break;
|
1931
|
+
}
|
1932
|
+
int formatting_index =
|
1933
|
+
gumbo_vector_index_of(&state->_active_formatting_elements, node);
|
1934
|
+
if (j > 3 && formatting_index != -1) {
|
1935
|
+
// Step 13.5.
|
1936
|
+
gumbo_debug(
|
1937
|
+
"Removing formatting element at %d.\n", formatting_index);
|
1938
|
+
gumbo_vector_remove_at(
|
1939
|
+
parser,
|
1940
|
+
formatting_index,
|
1941
|
+
&state->_active_formatting_elements);
|
1942
|
+
// Removing the element shifts all indices over by one, so we may need
|
1943
|
+
// to move the bookmark.
|
1944
|
+
if (formatting_index < bookmark) {
|
1945
|
+
--bookmark;
|
1946
|
+
gumbo_debug("Moving bookmark to %d.\n", bookmark);
|
1947
|
+
}
|
1948
|
+
continue;
|
1949
|
+
}
|
1950
|
+
if (formatting_index == -1) {
|
1951
|
+
// Step 13.6.
|
1878
1952
|
gumbo_vector_remove_at(parser, node_index, &state->_open_elements);
|
1879
1953
|
continue;
|
1880
|
-
} else if (node == formatting_node) {
|
1881
|
-
// Step 9.6.
|
1882
|
-
break;
|
1883
1954
|
}
|
1884
|
-
// Step
|
1885
|
-
|
1886
|
-
|
1955
|
+
// Step 13.7.
|
1956
|
+
// "common ancestor as the intended parent" doesn't actually mean insert
|
1957
|
+
// it into the common ancestor; that happens below.
|
1887
1958
|
node = clone_node(parser, node, GUMBO_INSERTION_ADOPTION_AGENCY_CLONED);
|
1959
|
+
assert(formatting_index >= 0);
|
1888
1960
|
state->_active_formatting_elements.data[formatting_index] = node;
|
1961
|
+
assert(node_index >= 0);
|
1889
1962
|
state->_open_elements.data[node_index] = node;
|
1890
|
-
// Step
|
1963
|
+
// Step 13.8.
|
1891
1964
|
if (last_node == furthest_block) {
|
1892
1965
|
bookmark = formatting_index + 1;
|
1966
|
+
gumbo_debug("Bookmark moved to %d.\n", bookmark);
|
1893
1967
|
assert(bookmark <= state->_active_formatting_elements.length);
|
1894
1968
|
}
|
1895
|
-
// Step
|
1969
|
+
// Step 13.9.
|
1896
1970
|
last_node->parse_flags |= GUMBO_INSERTION_ADOPTION_AGENCY_MOVED;
|
1897
1971
|
remove_from_parent(parser, last_node);
|
1898
1972
|
append_node(parser, node, last_node);
|
1899
|
-
// Step
|
1973
|
+
// Step 13.10.
|
1900
1974
|
last_node = node;
|
1901
|
-
}
|
1975
|
+
} // Step 13.11.
|
1902
1976
|
|
1903
|
-
// Step
|
1977
|
+
// Step 14.
|
1904
1978
|
gumbo_debug("Removing %s node from parent ",
|
1905
1979
|
gumbo_normalized_tagname(last_node->v.element.tag));
|
1906
1980
|
remove_from_parent(parser, last_node);
|
1907
1981
|
last_node->parse_flags |= GUMBO_INSERTION_ADOPTION_AGENCY_MOVED;
|
1908
|
-
|
1909
|
-
|
1910
|
-
|
1911
|
-
|
1912
|
-
|
1913
|
-
} else {
|
1914
|
-
gumbo_debug("and inserting it into %s.\n",
|
1915
|
-
gumbo_normalized_tagname(common_ancestor->v.element.tag));
|
1916
|
-
append_node(parser, common_ancestor, last_node);
|
1917
|
-
}
|
1982
|
+
InsertionLocation location =
|
1983
|
+
get_appropriate_insertion_location(parser, common_ancestor);
|
1984
|
+
gumbo_debug("and inserting it into %s.\n",
|
1985
|
+
gumbo_normalized_tagname(location.target->v.element.tag));
|
1986
|
+
insert_node(parser, last_node, location);
|
1918
1987
|
|
1919
|
-
// Step
|
1988
|
+
// Step 15.
|
1920
1989
|
GumboNode* new_formatting_node = clone_node(
|
1921
|
-
|
1990
|
+
parser, formatting_node, GUMBO_INSERTION_ADOPTION_AGENCY_CLONED);
|
1922
1991
|
formatting_node->parse_flags |= GUMBO_INSERTION_IMPLICIT_END_TAG;
|
1923
1992
|
|
1924
|
-
// Step
|
1993
|
+
// Step 16. Instead of appending nodes one-by-one, we swap the children
|
1925
1994
|
// vector of furthest_block with the empty children of new_formatting_node,
|
1926
1995
|
// reducing memory traffic and allocations. We still have to reset their
|
1927
1996
|
// parent pointers, though.
|
1928
1997
|
GumboVector temp = new_formatting_node->v.element.children;
|
1929
1998
|
new_formatting_node->v.element.children =
|
1930
|
-
|
1999
|
+
furthest_block->v.element.children;
|
1931
2000
|
furthest_block->v.element.children = temp;
|
1932
2001
|
|
1933
2002
|
temp = new_formatting_node->v.element.children;
|
@@ -1936,36 +2005,39 @@ static bool adoption_agency_algorithm(
|
|
1936
2005
|
child->parent = new_formatting_node;
|
1937
2006
|
}
|
1938
2007
|
|
1939
|
-
// Step
|
2008
|
+
// Step 17.
|
1940
2009
|
append_node(parser, furthest_block, new_formatting_node);
|
1941
2010
|
|
1942
|
-
// Step
|
2011
|
+
// Step 18.
|
1943
2012
|
// If the formatting node was before the bookmark, it may shift over all
|
1944
2013
|
// indices after it, so we need to explicitly find the index and possibly
|
1945
2014
|
// adjust the bookmark.
|
1946
2015
|
int formatting_node_index = gumbo_vector_index_of(
|
1947
|
-
|
2016
|
+
&state->_active_formatting_elements, formatting_node);
|
1948
2017
|
assert(formatting_node_index != -1);
|
1949
2018
|
if (formatting_node_index < bookmark) {
|
2019
|
+
gumbo_debug(
|
2020
|
+
"Formatting node at %d is before bookmark at %d; decrementing.\n",
|
2021
|
+
formatting_node_index, bookmark);
|
1950
2022
|
--bookmark;
|
1951
2023
|
}
|
1952
2024
|
gumbo_vector_remove_at(
|
1953
|
-
|
2025
|
+
parser, formatting_node_index, &state->_active_formatting_elements);
|
1954
2026
|
assert(bookmark >= 0);
|
1955
2027
|
assert(bookmark <= state->_active_formatting_elements.length);
|
1956
2028
|
gumbo_vector_insert_at(parser, new_formatting_node, bookmark,
|
1957
2029
|
&state->_active_formatting_elements);
|
1958
2030
|
|
1959
|
-
// Step
|
2031
|
+
// Step 19.
|
1960
2032
|
gumbo_vector_remove(
|
1961
|
-
|
2033
|
+
parser, formatting_node, &state->_open_elements);
|
1962
2034
|
int insert_at = gumbo_vector_index_of(
|
1963
|
-
|
2035
|
+
&state->_open_elements, furthest_block) + 1;
|
1964
2036
|
assert(insert_at >= 0);
|
1965
2037
|
assert(insert_at <= state->_open_elements.length);
|
1966
2038
|
gumbo_vector_insert_at(
|
1967
|
-
|
1968
|
-
}
|
2039
|
+
parser, new_formatting_node, insert_at, &state->_open_elements);
|
2040
|
+
} // Step 20.
|
1969
2041
|
return true;
|
1970
2042
|
}
|
1971
2043
|
|
@@ -1992,8 +2064,8 @@ static void finish_parsing(GumboParser* parser) {
|
|
1992
2064
|
GumboParserState* state = parser->_parser_state;
|
1993
2065
|
for (GumboNode* node = pop_current_node(parser); node;
|
1994
2066
|
node = pop_current_node(parser)) {
|
1995
|
-
if ((
|
1996
|
-
(
|
2067
|
+
if ((node_html_tag_is(node, GUMBO_TAG_BODY) && state->_closed_body_tag) ||
|
2068
|
+
(node_html_tag_is(node, GUMBO_TAG_HTML) && state->_closed_html_tag)) {
|
1997
2069
|
continue;
|
1998
2070
|
}
|
1999
2071
|
node->parse_flags |= GUMBO_INSERTION_IMPLICIT_END_TAG;
|
@@ -2042,9 +2114,9 @@ static bool handle_before_html(GumboParser* parser, GumboToken* token) {
|
|
2042
2114
|
parser->_output->root = html_node;
|
2043
2115
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_BEFORE_HEAD);
|
2044
2116
|
return true;
|
2045
|
-
} else if (token->type == GUMBO_TOKEN_END_TAG &&
|
2046
|
-
|
2047
|
-
|
2117
|
+
} else if (token->type == GUMBO_TOKEN_END_TAG &&
|
2118
|
+
!tag_in(token, false, (gumbo_tagset) { TAG(HEAD), TAG(BODY), TAG(HTML),
|
2119
|
+
TAG(BR) } )) {
|
2048
2120
|
parser_add_parse_error(parser, token);
|
2049
2121
|
ignore_token(parser);
|
2050
2122
|
return false;
|
@@ -2076,9 +2148,9 @@ static bool handle_before_head(GumboParser* parser, GumboToken* token) {
|
|
2076
2148
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_HEAD);
|
2077
2149
|
parser->_parser_state->_head_element = node;
|
2078
2150
|
return true;
|
2079
|
-
} else if (token->type == GUMBO_TOKEN_END_TAG &&
|
2080
|
-
|
2081
|
-
|
2151
|
+
} else if (token->type == GUMBO_TOKEN_END_TAG &&
|
2152
|
+
!tag_in(token, false, (gumbo_tagset) { TAG(HEAD), TAG(BODY), TAG(HTML),
|
2153
|
+
TAG(BR) })) {
|
2082
2154
|
parser_add_parse_error(parser, token);
|
2083
2155
|
ignore_token(parser);
|
2084
2156
|
return false;
|
@@ -2110,9 +2182,8 @@ static bool handle_in_head(GumboParser* parser, GumboToken* token) {
|
|
2110
2182
|
return true;
|
2111
2183
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_HTML)) {
|
2112
2184
|
return handle_in_body(parser, token);
|
2113
|
-
} else if (tag_in(token, kStartTag,
|
2114
|
-
|
2115
|
-
GUMBO_TAG_LAST)) {
|
2185
|
+
} else if (tag_in(token, kStartTag, (gumbo_tagset) { TAG(BASE), TAG(BASEFONT),
|
2186
|
+
TAG(BGSOUND), TAG(MENUITEM), TAG(LINK) })) {
|
2116
2187
|
insert_element_from_token(parser, token);
|
2117
2188
|
pop_current_node(parser);
|
2118
2189
|
acknowledge_self_closing_tag(parser);
|
@@ -2129,8 +2200,7 @@ static bool handle_in_head(GumboParser* parser, GumboToken* token) {
|
|
2129
2200
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_TITLE)) {
|
2130
2201
|
run_generic_parsing_algorithm(parser, token, GUMBO_LEX_RCDATA);
|
2131
2202
|
return true;
|
2132
|
-
} else if (tag_in(token, kStartTag,
|
2133
|
-
GUMBO_TAG_LAST)) {
|
2203
|
+
} else if (tag_in(token, kStartTag, (gumbo_tagset) { TAG(NOFRAMES), TAG(STYLE) })) {
|
2134
2204
|
run_generic_parsing_algorithm(parser, token, GUMBO_LEX_RAWTEXT);
|
2135
2205
|
return true;
|
2136
2206
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_NOSCRIPT)) {
|
@@ -2143,32 +2213,48 @@ static bool handle_in_head(GumboParser* parser, GumboToken* token) {
|
|
2143
2213
|
} else if (tag_is(token, kEndTag, GUMBO_TAG_HEAD)) {
|
2144
2214
|
GumboNode* head = pop_current_node(parser);
|
2145
2215
|
AVOID_UNUSED_VARIABLE_WARNING(head);
|
2146
|
-
assert(
|
2216
|
+
assert(node_html_tag_is(head, GUMBO_TAG_HEAD));
|
2147
2217
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_AFTER_HEAD);
|
2148
2218
|
return true;
|
2149
|
-
} else if (
|
2150
|
-
|
2151
|
-
|
2152
|
-
|
2153
|
-
|
2154
|
-
|
2155
|
-
|
2156
|
-
|
2157
|
-
|
2158
|
-
|
2159
|
-
|
2219
|
+
} else if (tag_in(token, kEndTag, (gumbo_tagset) { TAG(BODY), TAG(HTML), TAG(BR) })) {
|
2220
|
+
pop_current_node(parser);
|
2221
|
+
set_insertion_mode(parser, GUMBO_INSERTION_MODE_AFTER_HEAD);
|
2222
|
+
parser->_parser_state->_reprocess_current_token = true;
|
2223
|
+
return true;
|
2224
|
+
} else if (tag_is(token, kStartTag, GUMBO_TAG_TEMPLATE)) {
|
2225
|
+
insert_element_from_token(parser, token);
|
2226
|
+
add_formatting_element(parser, &kActiveFormattingScopeMarker);
|
2227
|
+
parser->_parser_state->_frameset_ok = false;
|
2228
|
+
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TEMPLATE);
|
2229
|
+
push_template_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TEMPLATE);
|
2230
|
+
return true;
|
2231
|
+
} else if (tag_is(token, kEndTag, GUMBO_TAG_TEMPLATE)) {
|
2232
|
+
if (!has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
|
2233
|
+
parser_add_parse_error(parser, token);
|
2234
|
+
ignore_token(parser);
|
2235
|
+
return false;
|
2236
|
+
}
|
2237
|
+
generate_all_implied_end_tags_thoroughly(parser);
|
2238
|
+
bool success = true;
|
2239
|
+
if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_TEMPLATE)) {
|
2240
|
+
parser_add_parse_error(parser, token);
|
2241
|
+
success = false;
|
2242
|
+
}
|
2243
|
+
while (!node_html_tag_is(pop_current_node(parser), GUMBO_TAG_TEMPLATE));
|
2244
|
+
clear_active_formatting_elements(parser);
|
2245
|
+
pop_template_insertion_mode(parser);
|
2246
|
+
reset_insertion_mode_appropriately(parser);
|
2247
|
+
return success;
|
2248
|
+
} else if (tag_is(token, kStartTag, GUMBO_TAG_HEAD) || (token->type == GUMBO_TOKEN_END_TAG)) {
|
2160
2249
|
parser_add_parse_error(parser, token);
|
2161
2250
|
ignore_token(parser);
|
2162
2251
|
return false;
|
2163
2252
|
} else {
|
2164
|
-
|
2165
|
-
assert(node_tag_is(node, GUMBO_TAG_HEAD));
|
2166
|
-
AVOID_UNUSED_VARIABLE_WARNING(node);
|
2253
|
+
pop_current_node(parser);
|
2167
2254
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_AFTER_HEAD);
|
2168
2255
|
parser->_parser_state->_reprocess_current_token = true;
|
2169
2256
|
return true;
|
2170
2257
|
}
|
2171
|
-
|
2172
2258
|
return true;
|
2173
2259
|
}
|
2174
2260
|
|
@@ -2181,18 +2267,16 @@ static bool handle_in_head_noscript(GumboParser* parser, GumboToken* token) {
|
|
2181
2267
|
return handle_in_body(parser, token);
|
2182
2268
|
} else if (tag_is(token, kEndTag, GUMBO_TAG_NOSCRIPT)) {
|
2183
2269
|
const GumboNode* node = pop_current_node(parser);
|
2184
|
-
assert(
|
2270
|
+
assert(node_html_tag_is(node, GUMBO_TAG_NOSCRIPT));
|
2185
2271
|
AVOID_UNUSED_VARIABLE_WARNING(node);
|
2186
2272
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_HEAD);
|
2187
2273
|
return true;
|
2188
2274
|
} else if (token->type == GUMBO_TOKEN_WHITESPACE ||
|
2189
2275
|
token->type == GUMBO_TOKEN_COMMENT ||
|
2190
|
-
tag_in(token, kStartTag,
|
2191
|
-
|
2192
|
-
|
2193
|
-
|
2194
|
-
} else if (tag_in(token, kStartTag, GUMBO_TAG_HEAD, GUMBO_TAG_NOSCRIPT,
|
2195
|
-
GUMBO_TAG_LAST) ||
|
2276
|
+
tag_in(token, kStartTag, (gumbo_tagset) { TAG(BASEFONT), TAG(BGSOUND),
|
2277
|
+
TAG(LINK), TAG(META), TAG(NOFRAMES), TAG(STYLE) })) {
|
2278
|
+
return handle_in_head(parser, token);
|
2279
|
+
} else if (tag_in(token, kStartTag, (gumbo_tagset) { TAG(HEAD), TAG(NOSCRIPT) }) ||
|
2196
2280
|
(token->type == GUMBO_TOKEN_END_TAG &&
|
2197
2281
|
!tag_is(token, kEndTag, GUMBO_TAG_BR))) {
|
2198
2282
|
parser_add_parse_error(parser, token);
|
@@ -2201,7 +2285,7 @@ static bool handle_in_head_noscript(GumboParser* parser, GumboToken* token) {
|
|
2201
2285
|
} else {
|
2202
2286
|
parser_add_parse_error(parser, token);
|
2203
2287
|
const GumboNode* node = pop_current_node(parser);
|
2204
|
-
assert(
|
2288
|
+
assert(node_html_tag_is(node, GUMBO_TAG_NOSCRIPT));
|
2205
2289
|
AVOID_UNUSED_VARIABLE_WARNING(node);
|
2206
2290
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_HEAD);
|
2207
2291
|
parser->_parser_state->_reprocess_current_token = true;
|
@@ -2233,10 +2317,10 @@ static bool handle_after_head(GumboParser* parser, GumboToken* token) {
|
|
2233
2317
|
insert_element_from_token(parser, token);
|
2234
2318
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_FRAMESET);
|
2235
2319
|
return true;
|
2236
|
-
} else if (tag_in(token, kStartTag,
|
2237
|
-
|
2238
|
-
|
2239
|
-
|
2320
|
+
} else if (tag_in(token, kStartTag, (gumbo_tagset) { TAG(BASE), TAG(BASEFONT),
|
2321
|
+
TAG(BGSOUND), TAG(LINK), TAG(META),
|
2322
|
+
TAG(NOFRAMES), TAG(SCRIPT), TAG(STYLE),
|
2323
|
+
TAG(TEMPLATE), TAG(TITLE) })) {
|
2240
2324
|
parser_add_parse_error(parser, token);
|
2241
2325
|
assert(state->_head_element != NULL);
|
2242
2326
|
// This must be flushed before we push the head element on, as there may be
|
@@ -2246,10 +2330,11 @@ static bool handle_after_head(GumboParser* parser, GumboToken* token) {
|
|
2246
2330
|
bool result = handle_in_head(parser, token);
|
2247
2331
|
gumbo_vector_remove(parser, state->_head_element, &state->_open_elements);
|
2248
2332
|
return result;
|
2333
|
+
} else if (tag_is(token, kEndTag, GUMBO_TAG_TEMPLATE)) {
|
2334
|
+
return handle_in_head(parser, token);
|
2249
2335
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_HEAD) ||
|
2250
2336
|
(token->type == GUMBO_TOKEN_END_TAG &&
|
2251
|
-
!tag_in(token, kEndTag,
|
2252
|
-
GUMBO_TAG_BR, GUMBO_TAG_LAST))) {
|
2337
|
+
!tag_in(token, kEndTag, (gumbo_tagset) { TAG(BODY), TAG(HTML), TAG(BR) }))) {
|
2253
2338
|
parser_add_parse_error(parser, token);
|
2254
2339
|
ignore_token(parser);
|
2255
2340
|
return false;
|
@@ -2261,28 +2346,23 @@ static bool handle_after_head(GumboParser* parser, GumboToken* token) {
|
|
2261
2346
|
}
|
2262
2347
|
}
|
2263
2348
|
|
2264
|
-
static
|
2349
|
+
static GumboNode* destroy_node(GumboParser* parser, GumboNode* node) {
|
2265
2350
|
switch (node->type) {
|
2266
2351
|
case GUMBO_NODE_DOCUMENT:
|
2267
2352
|
{
|
2268
2353
|
GumboDocument* doc = &node->v.document;
|
2269
|
-
for (int i = 0; i < doc->children.length; ++i) {
|
2270
|
-
destroy_node(parser, doc->children.data[i]);
|
2271
|
-
}
|
2272
2354
|
gumbo_parser_deallocate(parser, (void*) doc->children.data);
|
2273
2355
|
gumbo_parser_deallocate(parser, (void*) doc->name);
|
2274
2356
|
gumbo_parser_deallocate(parser, (void*) doc->public_identifier);
|
2275
2357
|
gumbo_parser_deallocate(parser, (void*) doc->system_identifier);
|
2276
2358
|
}
|
2277
2359
|
break;
|
2360
|
+
case GUMBO_NODE_TEMPLATE:
|
2278
2361
|
case GUMBO_NODE_ELEMENT:
|
2279
2362
|
for (int i = 0; i < node->v.element.attributes.length; ++i) {
|
2280
2363
|
gumbo_destroy_attribute(parser, node->v.element.attributes.data[i]);
|
2281
2364
|
}
|
2282
2365
|
gumbo_parser_deallocate(parser, node->v.element.attributes.data);
|
2283
|
-
for (int i = 0; i < node->v.element.children.length; ++i) {
|
2284
|
-
destroy_node(parser, node->v.element.children.data[i]);
|
2285
|
-
}
|
2286
2366
|
gumbo_parser_deallocate(parser, node->v.element.children.data);
|
2287
2367
|
break;
|
2288
2368
|
case GUMBO_NODE_TEXT:
|
@@ -2292,7 +2372,21 @@ static void destroy_node(GumboParser* parser, GumboNode* node) {
|
|
2292
2372
|
gumbo_parser_deallocate(parser, (void*) node->v.text.text);
|
2293
2373
|
break;
|
2294
2374
|
}
|
2375
|
+
// Remove from the next/prev linked list.
|
2376
|
+
GumboNode* prev = node->prev;
|
2377
|
+
GumboNode* next = node->next;
|
2378
|
+
if (prev != NULL) {
|
2379
|
+
prev->next = next;
|
2380
|
+
}
|
2381
|
+
if (next != NULL) {
|
2382
|
+
next->prev = prev;
|
2383
|
+
}
|
2384
|
+
if (parser->_parser_state && parser->_parser_state->_current_node == node) {
|
2385
|
+
parser->_parser_state->_current_node = prev;
|
2386
|
+
}
|
2387
|
+
|
2295
2388
|
gumbo_parser_deallocate(parser, node);
|
2389
|
+
return next;
|
2296
2390
|
}
|
2297
2391
|
|
2298
2392
|
// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#parsing-main-inbody
|
@@ -2307,7 +2401,8 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2307
2401
|
reconstruct_active_formatting_elements(parser);
|
2308
2402
|
insert_text_token(parser, token);
|
2309
2403
|
return true;
|
2310
|
-
} else if (token->type == GUMBO_TOKEN_CHARACTER
|
2404
|
+
} else if (token->type == GUMBO_TOKEN_CHARACTER ||
|
2405
|
+
token->type == GUMBO_TOKEN_CDATA) {
|
2311
2406
|
reconstruct_active_formatting_elements(parser);
|
2312
2407
|
insert_text_token(parser, token);
|
2313
2408
|
set_frameset_not_ok(parser);
|
@@ -2320,20 +2415,24 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2320
2415
|
ignore_token(parser);
|
2321
2416
|
return false;
|
2322
2417
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_HTML)) {
|
2418
|
+
parser_add_parse_error(parser, token);
|
2419
|
+
if (has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
|
2420
|
+
ignore_token(parser);
|
2421
|
+
return false;
|
2422
|
+
}
|
2323
2423
|
assert(parser->_output->root != NULL);
|
2324
2424
|
assert(parser->_output->root->type == GUMBO_NODE_ELEMENT);
|
2325
|
-
parser_add_parse_error(parser, token);
|
2326
2425
|
merge_attributes(parser, token, parser->_output->root);
|
2327
2426
|
return false;
|
2328
|
-
} else if (tag_in(token, kStartTag,
|
2329
|
-
|
2330
|
-
|
2331
|
-
|
2427
|
+
} else if (tag_in(token, kStartTag, (gumbo_tagset) { TAG(BASE), TAG(BASEFONT),
|
2428
|
+
TAG(BGSOUND), TAG(MENUITEM), TAG(LINK),
|
2429
|
+
TAG(META), TAG(NOFRAMES), TAG(SCRIPT),
|
2430
|
+
TAG(STYLE), TAG(TEMPLATE), TAG(TITLE) } ) || tag_is(token, kEndTag, GUMBO_TAG_TEMPLATE)) {
|
2332
2431
|
return handle_in_head(parser, token);
|
2333
2432
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_BODY)) {
|
2334
2433
|
parser_add_parse_error(parser, token);
|
2335
2434
|
if (state->_open_elements.length < 2 ||
|
2336
|
-
!
|
2435
|
+
!node_html_tag_is(state->_open_elements.data[1], GUMBO_TAG_BODY) || has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
|
2337
2436
|
ignore_token(parser);
|
2338
2437
|
return false;
|
2339
2438
|
}
|
@@ -2343,7 +2442,7 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2343
2442
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_FRAMESET)) {
|
2344
2443
|
parser_add_parse_error(parser, token);
|
2345
2444
|
if (state->_open_elements.length < 2 ||
|
2346
|
-
!
|
2445
|
+
!node_html_tag_is(state->_open_elements.data[1], GUMBO_TAG_BODY) ||
|
2347
2446
|
!state->_frameset_ok) {
|
2348
2447
|
ignore_token(parser);
|
2349
2448
|
return false;
|
@@ -2381,18 +2480,17 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2381
2480
|
return true;
|
2382
2481
|
} else if (token->type == GUMBO_TOKEN_EOF) {
|
2383
2482
|
for (int i = 0; i < state->_open_elements.length; ++i) {
|
2384
|
-
if (!
|
2385
|
-
|
2386
|
-
|
2387
|
-
GUMBO_TAG_THEAD, GUMBO_TAG_TR, GUMBO_TAG_BODY,
|
2388
|
-
GUMBO_TAG_HTML, GUMBO_TAG_LAST)) {
|
2483
|
+
if (!node_tag_in_set(state->_open_elements.data[i], (gumbo_tagset) { TAG(DD),
|
2484
|
+
TAG(DT), TAG(LI), TAG(P), TAG(TBODY), TAG(TD), TAG(TFOOT), TAG(TH),
|
2485
|
+
TAG(THEAD), TAG(TR), TAG(BODY), TAG(HTML) } )) {
|
2389
2486
|
parser_add_parse_error(parser, token);
|
2390
|
-
return false;
|
2391
2487
|
}
|
2392
2488
|
}
|
2489
|
+
if (get_current_template_insertion_mode(parser) != GUMBO_INSERTION_MODE_INITIAL) {
|
2490
|
+
return handle_in_template(parser, token);
|
2491
|
+
}
|
2393
2492
|
return true;
|
2394
|
-
} else if (tag_in(token, kEndTag,
|
2395
|
-
GUMBO_TAG_LAST)) {
|
2493
|
+
} else if (tag_in(token, kEndTag, (gumbo_tagset) { TAG(BODY), TAG(HTML) })) {
|
2396
2494
|
if (!has_an_element_in_scope(parser, GUMBO_TAG_BODY)) {
|
2397
2495
|
parser_add_parse_error(parser, token);
|
2398
2496
|
ignore_token(parser);
|
@@ -2400,13 +2498,11 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2400
2498
|
}
|
2401
2499
|
bool success = true;
|
2402
2500
|
for (int i = 0; i < state->_open_elements.length; ++i) {
|
2403
|
-
if (!
|
2404
|
-
|
2405
|
-
|
2406
|
-
|
2407
|
-
|
2408
|
-
GUMBO_TAG_TR, GUMBO_TAG_BODY, GUMBO_TAG_HTML,
|
2409
|
-
GUMBO_TAG_LAST)) {
|
2501
|
+
if (!node_tag_in_set(state->_open_elements.data[i], (gumbo_tagset) {
|
2502
|
+
TAG(DD), TAG(DT), TAG(LI), TAG(OPTGROUP), TAG(OPTION), TAG(P),
|
2503
|
+
TAG(RB), TAG(RP), TAG(RT), TAG(RTC), TAG(TBODY), TAG(TD),
|
2504
|
+
TAG(TFOOT), TAG(TH), TAG(THEAD), TAG(TR),
|
2505
|
+
TAG(BODY), TAG(HTML) })) {
|
2410
2506
|
parser_add_parse_error(parser, token);
|
2411
2507
|
success = false;
|
2412
2508
|
break;
|
@@ -2417,58 +2513,54 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2417
2513
|
parser->_parser_state->_reprocess_current_token = true;
|
2418
2514
|
} else {
|
2419
2515
|
GumboNode* body = state->_open_elements.data[1];
|
2420
|
-
assert(
|
2516
|
+
assert(node_html_tag_is(body, GUMBO_TAG_BODY));
|
2421
2517
|
record_end_of_element(state->_current_token, &body->v.element);
|
2422
2518
|
}
|
2423
2519
|
return success;
|
2424
|
-
} else if (tag_in(token, kStartTag,
|
2425
|
-
|
2426
|
-
|
2427
|
-
|
2428
|
-
|
2429
|
-
GUMBO_TAG_HGROUP, GUMBO_TAG_MENU, GUMBO_TAG_NAV,
|
2430
|
-
GUMBO_TAG_OL, GUMBO_TAG_P, GUMBO_TAG_SECTION,
|
2431
|
-
GUMBO_TAG_SUMMARY, GUMBO_TAG_UL, GUMBO_TAG_LAST)) {
|
2520
|
+
} else if (tag_in(token, kStartTag, (gumbo_tagset) { TAG(ADDRESS), TAG(ARTICLE),
|
2521
|
+
TAG(ASIDE), TAG(BLOCKQUOTE), TAG(CENTER), TAG(DETAILS),
|
2522
|
+
TAG(DIR), TAG(DIV), TAG(DL), TAG(FIELDSET), TAG(FIGCAPTION),
|
2523
|
+
TAG(FIGURE), TAG(FOOTER), TAG(HEADER), TAG(HGROUP), TAG(MENU), TAG(MAIN),
|
2524
|
+
TAG(NAV), TAG(OL), TAG(P), TAG(SECTION), TAG(SUMMARY), TAG(UL) })) {
|
2432
2525
|
bool result = maybe_implicitly_close_p_tag(parser, token);
|
2433
2526
|
insert_element_from_token(parser, token);
|
2434
2527
|
return result;
|
2435
|
-
} else if (tag_in(token, kStartTag,
|
2436
|
-
|
2528
|
+
} else if (tag_in(token, kStartTag, (gumbo_tagset) { TAG(H1), TAG(H2), TAG(H3),
|
2529
|
+
TAG(H4), TAG(H5), TAG(H6) })) {
|
2437
2530
|
bool result = maybe_implicitly_close_p_tag(parser, token);
|
2438
|
-
if (
|
2439
|
-
|
2440
|
-
GUMBO_TAG_LAST)) {
|
2531
|
+
if (node_tag_in_set(get_current_node(parser), (gumbo_tagset) { TAG(H1), TAG(H2),
|
2532
|
+
TAG(H3), TAG(H4), TAG(H5), TAG(H6) })) {
|
2441
2533
|
parser_add_parse_error(parser, token);
|
2442
2534
|
pop_current_node(parser);
|
2443
2535
|
result = false;
|
2444
2536
|
}
|
2445
2537
|
insert_element_from_token(parser, token);
|
2446
2538
|
return result;
|
2447
|
-
|
2448
|
-
GUMBO_TAG_LAST)) {
|
2539
|
+
} else if (tag_in(token, kStartTag, (gumbo_tagset) { TAG(PRE), TAG(LISTING) })) {
|
2449
2540
|
bool result = maybe_implicitly_close_p_tag(parser, token);
|
2450
2541
|
insert_element_from_token(parser, token);
|
2451
2542
|
state->_ignore_next_linefeed = true;
|
2452
2543
|
state->_frameset_ok = false;
|
2453
2544
|
return result;
|
2454
2545
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_FORM)) {
|
2455
|
-
if (state->_form_element != NULL) {
|
2546
|
+
if (state->_form_element != NULL && !has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
|
2456
2547
|
gumbo_debug("Ignoring nested form.\n");
|
2457
2548
|
parser_add_parse_error(parser, token);
|
2458
2549
|
ignore_token(parser);
|
2459
2550
|
return false;
|
2460
2551
|
}
|
2461
2552
|
bool result = maybe_implicitly_close_p_tag(parser, token);
|
2462
|
-
|
2463
|
-
|
2553
|
+
GumboNode* form_element = insert_element_from_token(parser, token);
|
2554
|
+
if (!has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
|
2555
|
+
state->_form_element = form_element;
|
2556
|
+
}
|
2464
2557
|
return result;
|
2465
2558
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_LI)) {
|
2466
2559
|
maybe_implicitly_close_list_tag(parser, token, true);
|
2467
2560
|
bool result = maybe_implicitly_close_p_tag(parser, token);
|
2468
2561
|
insert_element_from_token(parser, token);
|
2469
2562
|
return result;
|
2470
|
-
|
2471
|
-
GUMBO_TAG_LAST)) {
|
2563
|
+
} else if (tag_in(token, kStartTag, (gumbo_tagset) { TAG(DD), TAG(DT) })) {
|
2472
2564
|
maybe_implicitly_close_list_tag(parser, token, false);
|
2473
2565
|
bool result = maybe_implicitly_close_p_tag(parser, token);
|
2474
2566
|
insert_element_from_token(parser, token);
|
@@ -2481,7 +2573,7 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2481
2573
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_BUTTON)) {
|
2482
2574
|
if (has_an_element_in_scope(parser, GUMBO_TAG_BUTTON)) {
|
2483
2575
|
parser_add_parse_error(parser, token);
|
2484
|
-
implicitly_close_tags(parser, token, GUMBO_TAG_BUTTON);
|
2576
|
+
implicitly_close_tags(parser, token, GUMBO_NAMESPACE_HTML, GUMBO_TAG_BUTTON);
|
2485
2577
|
state->_reprocess_current_token = true;
|
2486
2578
|
return false;
|
2487
2579
|
}
|
@@ -2489,67 +2581,78 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2489
2581
|
insert_element_from_token(parser, token);
|
2490
2582
|
state->_frameset_ok = false;
|
2491
2583
|
return true;
|
2492
|
-
|
2493
|
-
|
2494
|
-
|
2495
|
-
|
2496
|
-
|
2497
|
-
|
2498
|
-
GUMBO_TAG_MENU, GUMBO_TAG_NAV, GUMBO_TAG_OL, GUMBO_TAG_PRE,
|
2499
|
-
GUMBO_TAG_SECTION, GUMBO_TAG_SUMMARY, GUMBO_TAG_UL,
|
2500
|
-
GUMBO_TAG_LAST)) {
|
2584
|
+
} else if (tag_in(token, kEndTag, (gumbo_tagset) { TAG(ADDRESS), TAG(ARTICLE),
|
2585
|
+
TAG(ASIDE), TAG(BLOCKQUOTE), TAG(BUTTON), TAG(CENTER), TAG(DETAILS),
|
2586
|
+
TAG(DIR), TAG(DIV), TAG(DL), TAG(FIELDSET), TAG(FIGCAPTION),
|
2587
|
+
TAG(FIGURE), TAG(FOOTER), TAG(HEADER), TAG(HGROUP), TAG(LISTING),
|
2588
|
+
TAG(MAIN), TAG(MENU), TAG(NAV), TAG(OL), TAG(PRE),
|
2589
|
+
TAG(SECTION), TAG(SUMMARY), TAG(UL) })) {
|
2501
2590
|
GumboTag tag = token->v.end_tag;
|
2502
2591
|
if (!has_an_element_in_scope(parser, tag)) {
|
2503
2592
|
parser_add_parse_error(parser, token);
|
2504
2593
|
ignore_token(parser);
|
2505
2594
|
return false;
|
2506
2595
|
}
|
2507
|
-
implicitly_close_tags(parser, token, token->v.end_tag);
|
2596
|
+
implicitly_close_tags(parser, token, GUMBO_NAMESPACE_HTML, token->v.end_tag);
|
2508
2597
|
return true;
|
2509
2598
|
} else if (tag_is(token, kEndTag, GUMBO_TAG_FORM)) {
|
2510
|
-
|
2511
|
-
|
2512
|
-
|
2513
|
-
|
2514
|
-
|
2515
|
-
|
2516
|
-
|
2517
|
-
|
2518
|
-
|
2519
|
-
|
2520
|
-
|
2521
|
-
|
2522
|
-
|
2523
|
-
|
2524
|
-
|
2525
|
-
result =
|
2526
|
-
|
2599
|
+
if (has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
|
2600
|
+
if (!has_an_element_in_scope(parser, GUMBO_TAG_FORM)) {
|
2601
|
+
parser_add_parse_error(parser, token);
|
2602
|
+
ignore_token(parser);
|
2603
|
+
return false;
|
2604
|
+
}
|
2605
|
+
bool success = true;
|
2606
|
+
generate_implied_end_tags(parser, GUMBO_TAG_LAST);
|
2607
|
+
if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_FORM)) {
|
2608
|
+
parser_add_parse_error(parser, token);
|
2609
|
+
return false;
|
2610
|
+
}
|
2611
|
+
while(!node_html_tag_is(pop_current_node(parser), GUMBO_TAG_FORM));
|
2612
|
+
return success;
|
2613
|
+
} else {
|
2614
|
+
bool result = true;
|
2615
|
+
const GumboNode* node = state->_form_element;
|
2616
|
+
assert(!node || node->type == GUMBO_NODE_ELEMENT);
|
2617
|
+
state->_form_element = NULL;
|
2618
|
+
if (!node || !has_node_in_scope(parser, node)) {
|
2619
|
+
gumbo_debug("Closing an unopened form.\n");
|
2620
|
+
parser_add_parse_error(parser, token);
|
2621
|
+
ignore_token(parser);
|
2622
|
+
return false;
|
2623
|
+
}
|
2624
|
+
// This differs from implicitly_close_tags because we remove *only* the
|
2625
|
+
// <form> element; other nodes are left in scope.
|
2626
|
+
generate_implied_end_tags(parser, GUMBO_TAG_LAST);
|
2627
|
+
if (get_current_node(parser) != node) {
|
2628
|
+
parser_add_parse_error(parser, token);
|
2629
|
+
result = false;
|
2630
|
+
}
|
2527
2631
|
|
2528
|
-
|
2529
|
-
|
2530
|
-
|
2531
|
-
|
2532
|
-
|
2533
|
-
|
2632
|
+
GumboVector* open_elements = &state->_open_elements;
|
2633
|
+
int index = gumbo_vector_index_of(open_elements, node);
|
2634
|
+
assert(index >= 0);
|
2635
|
+
gumbo_vector_remove_at(parser, index, open_elements);
|
2636
|
+
return result;
|
2637
|
+
}
|
2534
2638
|
} else if (tag_is(token, kEndTag, GUMBO_TAG_P)) {
|
2535
2639
|
if (!has_an_element_in_button_scope(parser, GUMBO_TAG_P)) {
|
2536
2640
|
parser_add_parse_error(parser, token);
|
2537
|
-
reconstruct_active_formatting_elements(parser);
|
2641
|
+
// reconstruct_active_formatting_elements(parser);
|
2538
2642
|
insert_element_of_tag_type(
|
2539
2643
|
parser, GUMBO_TAG_P, GUMBO_INSERTION_CONVERTED_FROM_END_TAG);
|
2540
2644
|
state->_reprocess_current_token = true;
|
2541
2645
|
return false;
|
2542
2646
|
}
|
2543
|
-
return implicitly_close_tags(parser, token, GUMBO_TAG_P);
|
2647
|
+
return implicitly_close_tags(parser, token, GUMBO_NAMESPACE_HTML, GUMBO_TAG_P);
|
2544
2648
|
} else if (tag_is(token, kEndTag, GUMBO_TAG_LI)) {
|
2545
2649
|
if (!has_an_element_in_list_scope(parser, GUMBO_TAG_LI)) {
|
2546
2650
|
parser_add_parse_error(parser, token);
|
2547
2651
|
ignore_token(parser);
|
2548
2652
|
return false;
|
2549
2653
|
}
|
2550
|
-
return implicitly_close_tags(parser, token, GUMBO_TAG_LI);
|
2551
|
-
|
2552
|
-
GUMBO_TAG_LAST)) {
|
2654
|
+
return implicitly_close_tags(parser, token, GUMBO_NAMESPACE_HTML, GUMBO_TAG_LI);
|
2655
|
+
} else if (tag_in(token, kEndTag, (gumbo_tagset) { TAG(DD), TAG(DT) })) {
|
2553
2656
|
assert(token->type == GUMBO_TOKEN_END_TAG);
|
2554
2657
|
GumboTag token_tag = token->v.end_tag;
|
2555
2658
|
if (!has_an_element_in_scope(parser, token_tag)) {
|
@@ -2557,12 +2660,12 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2557
2660
|
ignore_token(parser);
|
2558
2661
|
return false;
|
2559
2662
|
}
|
2560
|
-
return implicitly_close_tags(parser, token, token_tag);
|
2561
|
-
|
2562
|
-
|
2563
|
-
if (!has_an_element_in_scope_with_tagname(
|
2564
|
-
|
2565
|
-
|
2663
|
+
return implicitly_close_tags(parser, token, GUMBO_NAMESPACE_HTML, token_tag);
|
2664
|
+
} else if (tag_in(token, kEndTag, (gumbo_tagset) {
|
2665
|
+
TAG(H1), TAG(H2), TAG(H3), TAG(H4), TAG(H5), TAG(H6) })) {
|
2666
|
+
if (!has_an_element_in_scope_with_tagname(parser, 6, (GumboTag[]) {
|
2667
|
+
GUMBO_TAG_H1, GUMBO_TAG_H2, GUMBO_TAG_H3,
|
2668
|
+
GUMBO_TAG_H4, GUMBO_TAG_H5, GUMBO_TAG_H6})) {
|
2566
2669
|
// No heading open; ignore the token entirely.
|
2567
2670
|
parser_add_parse_error(parser, token);
|
2568
2671
|
ignore_token(parser);
|
@@ -2570,7 +2673,7 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2570
2673
|
} else {
|
2571
2674
|
generate_implied_end_tags(parser, GUMBO_TAG_LAST);
|
2572
2675
|
const GumboNode* current_node = get_current_node(parser);
|
2573
|
-
bool success =
|
2676
|
+
bool success = node_html_tag_is(current_node, token->v.end_tag);
|
2574
2677
|
if (!success) {
|
2575
2678
|
// There're children of the heading currently open; close them below and
|
2576
2679
|