nokogumbo 1.4.2 → 1.4.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/gumbo-parser/src/attribute.c +1 -1
- data/gumbo-parser/src/char_ref.c +37 -67
- data/gumbo-parser/src/char_ref.h +3 -4
- data/gumbo-parser/src/char_ref.rl +6 -1
- data/gumbo-parser/src/error.c +50 -51
- data/gumbo-parser/src/error.h +7 -9
- data/gumbo-parser/src/gumbo.h +45 -181
- data/gumbo-parser/src/parser.c +1397 -989
- data/gumbo-parser/src/string_buffer.c +14 -10
- data/gumbo-parser/src/string_buffer.h +9 -6
- data/gumbo-parser/src/string_piece.c +5 -6
- data/gumbo-parser/src/string_piece.h +2 -3
- data/gumbo-parser/src/tag.c +36 -166
- data/gumbo-parser/src/tag.in +150 -0
- data/gumbo-parser/src/tag_enum.h +153 -0
- data/gumbo-parser/src/tag_gperf.h +105 -0
- data/gumbo-parser/src/tag_sizes.h +4 -0
- data/gumbo-parser/src/tag_strings.h +153 -0
- data/gumbo-parser/src/tokenizer.c +264 -360
- data/gumbo-parser/src/tokenizer.h +2 -2
- data/gumbo-parser/src/utf8.c +44 -44
- data/gumbo-parser/src/utf8.h +1 -2
- data/gumbo-parser/src/util.c +1 -1
- data/gumbo-parser/src/util.h +0 -2
- data/gumbo-parser/src/vector.c +17 -17
- data/gumbo-parser/src/vector.h +6 -8
- metadata +8 -3
data/gumbo-parser/src/error.h
CHANGED
@@ -201,24 +201,22 @@ void gumbo_error_destroy(struct GumboInternalParser* parser, GumboError* error);
|
|
201
201
|
// responsible for deleting the buffer. (Note that the buffer is allocated with
|
202
202
|
// the allocator specified in the GumboParser config and hence should be freed
|
203
203
|
// by gumbo_parser_deallocate().)
|
204
|
-
void gumbo_error_to_string(
|
205
|
-
|
206
|
-
GumboStringBuffer* output);
|
204
|
+
void gumbo_error_to_string(struct GumboInternalParser* parser,
|
205
|
+
const GumboError* error, GumboStringBuffer* output);
|
207
206
|
|
208
207
|
// Prints a caret diagnostic to a string. This fills an empty GumboStringBuffer
|
209
208
|
// with a freshly-allocated buffer containing the error message text. The
|
210
209
|
// caller is responsible for deleting the buffer. (Note that the buffer is
|
211
210
|
// allocated with the allocator specified in the GumboParser config and hence
|
212
211
|
// should be freed by gumbo_parser_deallocate().)
|
213
|
-
void gumbo_caret_diagnostic_to_string(
|
214
|
-
|
215
|
-
|
212
|
+
void gumbo_caret_diagnostic_to_string(struct GumboInternalParser* parser,
|
213
|
+
const GumboError* error, const char* source_text,
|
214
|
+
GumboStringBuffer* output);
|
216
215
|
|
217
216
|
// Like gumbo_caret_diagnostic_to_string, but prints the text to stdout instead
|
218
217
|
// of writing to a string.
|
219
|
-
void gumbo_print_caret_diagnostic(
|
220
|
-
|
221
|
-
const char* source_text);
|
218
|
+
void gumbo_print_caret_diagnostic(struct GumboInternalParser* parser,
|
219
|
+
const GumboError* error, const char* source_text);
|
222
220
|
|
223
221
|
#ifdef __cplusplus
|
224
222
|
}
|
data/gumbo-parser/src/gumbo.h
CHANGED
@@ -76,7 +76,6 @@ typedef struct {
|
|
76
76
|
*/
|
77
77
|
extern const GumboSourcePosition kGumboEmptySourcePosition;
|
78
78
|
|
79
|
-
|
80
79
|
/**
|
81
80
|
* A struct representing a string or part of a string. Strings within the
|
82
81
|
* parser are represented by a char* and a length; the char* points into
|
@@ -111,7 +110,6 @@ bool gumbo_string_equals(
|
|
111
110
|
bool gumbo_string_equals_ignore_case(
|
112
111
|
const GumboStringPiece* str1, const GumboStringPiece* str2);
|
113
112
|
|
114
|
-
|
115
113
|
/**
|
116
114
|
* A simple vector implementation. This stores a pointer to a data array and a
|
117
115
|
* length. All elements are stored as void*; client code must cast to the
|
@@ -141,8 +139,7 @@ extern const GumboVector kGumboEmptyVector;
|
|
141
139
|
* Returns the first index at which an element appears in this vector (testing
|
142
140
|
* by pointer equality), or -1 if it never does.
|
143
141
|
*/
|
144
|
-
int gumbo_vector_index_of(GumboVector* vector, void* element);
|
145
|
-
|
142
|
+
int gumbo_vector_index_of(GumboVector* vector, const void* element);
|
146
143
|
|
147
144
|
/**
|
148
145
|
* An enum for all the tags defined in the HTML5 standard. These correspond to
|
@@ -157,172 +154,10 @@ int gumbo_vector_index_of(GumboVector* vector, void* element);
|
|
157
154
|
* strings.
|
158
155
|
*/
|
159
156
|
typedef enum {
|
160
|
-
|
161
|
-
|
162
|
-
//
|
163
|
-
|
164
|
-
GUMBO_TAG_TITLE,
|
165
|
-
GUMBO_TAG_BASE,
|
166
|
-
GUMBO_TAG_LINK,
|
167
|
-
GUMBO_TAG_META,
|
168
|
-
GUMBO_TAG_STYLE,
|
169
|
-
// http://www.whatwg.org/specs/web-apps/current-work/multipage/scripting-1.html#scripting-1
|
170
|
-
GUMBO_TAG_SCRIPT,
|
171
|
-
GUMBO_TAG_NOSCRIPT,
|
172
|
-
GUMBO_TAG_TEMPLATE,
|
173
|
-
// http://www.whatwg.org/specs/web-apps/current-work/multipage/sections.html#sections
|
174
|
-
GUMBO_TAG_BODY,
|
175
|
-
GUMBO_TAG_ARTICLE,
|
176
|
-
GUMBO_TAG_SECTION,
|
177
|
-
GUMBO_TAG_NAV,
|
178
|
-
GUMBO_TAG_ASIDE,
|
179
|
-
GUMBO_TAG_H1,
|
180
|
-
GUMBO_TAG_H2,
|
181
|
-
GUMBO_TAG_H3,
|
182
|
-
GUMBO_TAG_H4,
|
183
|
-
GUMBO_TAG_H5,
|
184
|
-
GUMBO_TAG_H6,
|
185
|
-
GUMBO_TAG_HGROUP,
|
186
|
-
GUMBO_TAG_HEADER,
|
187
|
-
GUMBO_TAG_FOOTER,
|
188
|
-
GUMBO_TAG_ADDRESS,
|
189
|
-
// http://www.whatwg.org/specs/web-apps/current-work/multipage/grouping-content.html#grouping-content
|
190
|
-
GUMBO_TAG_P,
|
191
|
-
GUMBO_TAG_HR,
|
192
|
-
GUMBO_TAG_PRE,
|
193
|
-
GUMBO_TAG_BLOCKQUOTE,
|
194
|
-
GUMBO_TAG_OL,
|
195
|
-
GUMBO_TAG_UL,
|
196
|
-
GUMBO_TAG_LI,
|
197
|
-
GUMBO_TAG_DL,
|
198
|
-
GUMBO_TAG_DT,
|
199
|
-
GUMBO_TAG_DD,
|
200
|
-
GUMBO_TAG_FIGURE,
|
201
|
-
GUMBO_TAG_FIGCAPTION,
|
202
|
-
GUMBO_TAG_MAIN,
|
203
|
-
GUMBO_TAG_DIV,
|
204
|
-
// http://www.whatwg.org/specs/web-apps/current-work/multipage/text-level-semantics.html#text-level-semantics
|
205
|
-
GUMBO_TAG_A,
|
206
|
-
GUMBO_TAG_EM,
|
207
|
-
GUMBO_TAG_STRONG,
|
208
|
-
GUMBO_TAG_SMALL,
|
209
|
-
GUMBO_TAG_S,
|
210
|
-
GUMBO_TAG_CITE,
|
211
|
-
GUMBO_TAG_Q,
|
212
|
-
GUMBO_TAG_DFN,
|
213
|
-
GUMBO_TAG_ABBR,
|
214
|
-
GUMBO_TAG_DATA,
|
215
|
-
GUMBO_TAG_TIME,
|
216
|
-
GUMBO_TAG_CODE,
|
217
|
-
GUMBO_TAG_VAR,
|
218
|
-
GUMBO_TAG_SAMP,
|
219
|
-
GUMBO_TAG_KBD,
|
220
|
-
GUMBO_TAG_SUB,
|
221
|
-
GUMBO_TAG_SUP,
|
222
|
-
GUMBO_TAG_I,
|
223
|
-
GUMBO_TAG_B,
|
224
|
-
GUMBO_TAG_U,
|
225
|
-
GUMBO_TAG_MARK,
|
226
|
-
GUMBO_TAG_RUBY,
|
227
|
-
GUMBO_TAG_RT,
|
228
|
-
GUMBO_TAG_RP,
|
229
|
-
GUMBO_TAG_BDI,
|
230
|
-
GUMBO_TAG_BDO,
|
231
|
-
GUMBO_TAG_SPAN,
|
232
|
-
GUMBO_TAG_BR,
|
233
|
-
GUMBO_TAG_WBR,
|
234
|
-
// http://www.whatwg.org/specs/web-apps/current-work/multipage/edits.html#edits
|
235
|
-
GUMBO_TAG_INS,
|
236
|
-
GUMBO_TAG_DEL,
|
237
|
-
// http://www.whatwg.org/specs/web-apps/current-work/multipage/embedded-content-1.html#embedded-content-1
|
238
|
-
GUMBO_TAG_IMAGE,
|
239
|
-
GUMBO_TAG_IMG,
|
240
|
-
GUMBO_TAG_IFRAME,
|
241
|
-
GUMBO_TAG_EMBED,
|
242
|
-
GUMBO_TAG_OBJECT,
|
243
|
-
GUMBO_TAG_PARAM,
|
244
|
-
GUMBO_TAG_VIDEO,
|
245
|
-
GUMBO_TAG_AUDIO,
|
246
|
-
GUMBO_TAG_SOURCE,
|
247
|
-
GUMBO_TAG_TRACK,
|
248
|
-
GUMBO_TAG_CANVAS,
|
249
|
-
GUMBO_TAG_MAP,
|
250
|
-
GUMBO_TAG_AREA,
|
251
|
-
// http://www.whatwg.org/specs/web-apps/current-work/multipage/the-map-element.html#mathml
|
252
|
-
GUMBO_TAG_MATH,
|
253
|
-
GUMBO_TAG_MI,
|
254
|
-
GUMBO_TAG_MO,
|
255
|
-
GUMBO_TAG_MN,
|
256
|
-
GUMBO_TAG_MS,
|
257
|
-
GUMBO_TAG_MTEXT,
|
258
|
-
GUMBO_TAG_MGLYPH,
|
259
|
-
GUMBO_TAG_MALIGNMARK,
|
260
|
-
GUMBO_TAG_ANNOTATION_XML,
|
261
|
-
// http://www.whatwg.org/specs/web-apps/current-work/multipage/the-map-element.html#svg-0
|
262
|
-
GUMBO_TAG_SVG,
|
263
|
-
GUMBO_TAG_FOREIGNOBJECT,
|
264
|
-
GUMBO_TAG_DESC,
|
265
|
-
// SVG title tags will have GUMBO_TAG_TITLE as with HTML.
|
266
|
-
// http://www.whatwg.org/specs/web-apps/current-work/multipage/tabular-data.html#tabular-data
|
267
|
-
GUMBO_TAG_TABLE,
|
268
|
-
GUMBO_TAG_CAPTION,
|
269
|
-
GUMBO_TAG_COLGROUP,
|
270
|
-
GUMBO_TAG_COL,
|
271
|
-
GUMBO_TAG_TBODY,
|
272
|
-
GUMBO_TAG_THEAD,
|
273
|
-
GUMBO_TAG_TFOOT,
|
274
|
-
GUMBO_TAG_TR,
|
275
|
-
GUMBO_TAG_TD,
|
276
|
-
GUMBO_TAG_TH,
|
277
|
-
// http://www.whatwg.org/specs/web-apps/current-work/multipage/forms.html#forms
|
278
|
-
GUMBO_TAG_FORM,
|
279
|
-
GUMBO_TAG_FIELDSET,
|
280
|
-
GUMBO_TAG_LEGEND,
|
281
|
-
GUMBO_TAG_LABEL,
|
282
|
-
GUMBO_TAG_INPUT,
|
283
|
-
GUMBO_TAG_BUTTON,
|
284
|
-
GUMBO_TAG_SELECT,
|
285
|
-
GUMBO_TAG_DATALIST,
|
286
|
-
GUMBO_TAG_OPTGROUP,
|
287
|
-
GUMBO_TAG_OPTION,
|
288
|
-
GUMBO_TAG_TEXTAREA,
|
289
|
-
GUMBO_TAG_KEYGEN,
|
290
|
-
GUMBO_TAG_OUTPUT,
|
291
|
-
GUMBO_TAG_PROGRESS,
|
292
|
-
GUMBO_TAG_METER,
|
293
|
-
// http://www.whatwg.org/specs/web-apps/current-work/multipage/interactive-elements.html#interactive-elements
|
294
|
-
GUMBO_TAG_DETAILS,
|
295
|
-
GUMBO_TAG_SUMMARY,
|
296
|
-
GUMBO_TAG_MENU,
|
297
|
-
GUMBO_TAG_MENUITEM,
|
298
|
-
// Non-conforming elements that nonetheless appear in the HTML5 spec.
|
299
|
-
// http://www.whatwg.org/specs/web-apps/current-work/multipage/obsolete.html#non-conforming-features
|
300
|
-
GUMBO_TAG_APPLET,
|
301
|
-
GUMBO_TAG_ACRONYM,
|
302
|
-
GUMBO_TAG_BGSOUND,
|
303
|
-
GUMBO_TAG_DIR,
|
304
|
-
GUMBO_TAG_FRAME,
|
305
|
-
GUMBO_TAG_FRAMESET,
|
306
|
-
GUMBO_TAG_NOFRAMES,
|
307
|
-
GUMBO_TAG_ISINDEX,
|
308
|
-
GUMBO_TAG_LISTING,
|
309
|
-
GUMBO_TAG_XMP,
|
310
|
-
GUMBO_TAG_NEXTID,
|
311
|
-
GUMBO_TAG_NOEMBED,
|
312
|
-
GUMBO_TAG_PLAINTEXT,
|
313
|
-
GUMBO_TAG_RB,
|
314
|
-
GUMBO_TAG_STRIKE,
|
315
|
-
GUMBO_TAG_BASEFONT,
|
316
|
-
GUMBO_TAG_BIG,
|
317
|
-
GUMBO_TAG_BLINK,
|
318
|
-
GUMBO_TAG_CENTER,
|
319
|
-
GUMBO_TAG_FONT,
|
320
|
-
GUMBO_TAG_MARQUEE,
|
321
|
-
GUMBO_TAG_MULTICOL,
|
322
|
-
GUMBO_TAG_NOBR,
|
323
|
-
GUMBO_TAG_SPACER,
|
324
|
-
GUMBO_TAG_TT,
|
325
|
-
// Used for all tags that don't have special handling in HTML.
|
157
|
+
// Load all the tags from an external source, generated from tag.in.
|
158
|
+
#include "tag_enum.h"
|
159
|
+
// Used for all tags that don't have special handling in HTML. Add new tags
|
160
|
+
// to the end of tag.in so as to preserve backwards-compatibility.
|
326
161
|
GUMBO_TAG_UNKNOWN,
|
327
162
|
// A marker value to indicate the end of the enum, for iterating over it.
|
328
163
|
// Also used as the terminator for varargs functions that take tags.
|
@@ -364,9 +199,10 @@ const char* gumbo_normalize_svg_tagname(const GumboStringPiece* tagname);
|
|
364
199
|
|
365
200
|
/**
|
366
201
|
* Converts a tag name string (which may be in upper or mixed case) to a tag
|
367
|
-
* enum.
|
202
|
+
* enum. The `tag` version expects `tagname` to be NULL-terminated
|
368
203
|
*/
|
369
204
|
GumboTag gumbo_tag_enum(const char* tagname);
|
205
|
+
GumboTag gumbo_tagn_enum(const char* tagname, unsigned int length);
|
370
206
|
|
371
207
|
/**
|
372
208
|
* Attribute namespaces.
|
@@ -461,10 +297,16 @@ typedef enum {
|
|
461
297
|
GUMBO_NODE_TEXT,
|
462
298
|
/** CDATA node. v will be a GumboText. */
|
463
299
|
GUMBO_NODE_CDATA,
|
464
|
-
/** Comment node. v
|
300
|
+
/** Comment node. v will be a GumboText, excluding comment delimiters. */
|
465
301
|
GUMBO_NODE_COMMENT,
|
466
302
|
/** Text node, where all contents is whitespace. v will be a GumboText. */
|
467
|
-
GUMBO_NODE_WHITESPACE
|
303
|
+
GUMBO_NODE_WHITESPACE,
|
304
|
+
/** Template node. This is separate from GUMBO_NODE_ELEMENT because many
|
305
|
+
* client libraries will want to ignore the contents of template nodes, as
|
306
|
+
* the spec suggests. Recursing on GUMBO_NODE_ELEMENT will do the right thing
|
307
|
+
* here, while clients that want to include template contents should also
|
308
|
+
* check for GUMBO_NODE_TEMPLATE. v will be a GumboElement. */
|
309
|
+
GUMBO_NODE_TEMPLATE
|
468
310
|
} GumboNodeType;
|
469
311
|
|
470
312
|
/**
|
@@ -473,7 +315,9 @@ typedef enum {
|
|
473
315
|
*/
|
474
316
|
typedef struct GumboInternalNode GumboNode;
|
475
317
|
|
476
|
-
/**
|
318
|
+
/**
|
319
|
+
* http://www.whatwg.org/specs/web-apps/current-work/complete/dom.html#quirks-mode
|
320
|
+
*/
|
477
321
|
typedef enum {
|
478
322
|
GUMBO_DOCTYPE_NO_QUIRKS,
|
479
323
|
GUMBO_DOCTYPE_QUIRKS,
|
@@ -571,7 +415,6 @@ typedef enum {
|
|
571
415
|
GUMBO_INSERTION_FOSTER_PARENTED = 1 << 10,
|
572
416
|
} GumboParseFlags;
|
573
417
|
|
574
|
-
|
575
418
|
/**
|
576
419
|
* Information specific to document nodes.
|
577
420
|
*/
|
@@ -690,9 +533,9 @@ struct GumboInternalNode {
|
|
690
533
|
|
691
534
|
/** The actual node data. */
|
692
535
|
union {
|
693
|
-
GumboDocument document;
|
694
|
-
GumboElement element;
|
695
|
-
GumboText text;
|
536
|
+
GumboDocument document; // For GUMBO_NODE_DOCUMENT.
|
537
|
+
GumboElement element; // For GUMBO_NODE_ELEMENT.
|
538
|
+
GumboText text; // For everything else.
|
696
539
|
} v;
|
697
540
|
};
|
698
541
|
|
@@ -750,6 +593,29 @@ typedef struct GumboInternalOptions {
|
|
750
593
|
* Default: -1
|
751
594
|
*/
|
752
595
|
int max_errors;
|
596
|
+
|
597
|
+
/**
|
598
|
+
* The fragment context for parsing:
|
599
|
+
* https://html.spec.whatwg.org/multipage/syntax.html#parsing-html-fragments
|
600
|
+
*
|
601
|
+
* If GUMBO_TAG_LAST is passed here, it is assumed to be "no fragment", i.e.
|
602
|
+
* the regular parsing algorithm. Otherwise, pass the tag enum for the
|
603
|
+
* intended parent of the parsed fragment. We use just the tag enum rather
|
604
|
+
* than a full node because that's enough to set all the parsing context we
|
605
|
+
* need, and it provides some additional flexibility for client code to act as
|
606
|
+
* if parsing a fragment even when a full HTML tree isn't available.
|
607
|
+
*
|
608
|
+
* Default: GUMBO_TAG_LAST
|
609
|
+
*/
|
610
|
+
GumboTag fragment_context;
|
611
|
+
|
612
|
+
/**
|
613
|
+
* The namespace for the fragment context. This lets client code
|
614
|
+
* differentiate between, say, parsing a <title> tag in SVG vs. parsing it in
|
615
|
+
* HTML.
|
616
|
+
* Default: GUMBO_NAMESPACE_HTML
|
617
|
+
*/
|
618
|
+
GumboNamespaceEnum fragment_namespace;
|
753
619
|
} GumboOptions;
|
754
620
|
|
755
621
|
/** Default options struct; use this with gumbo_parse_with_options. */
|
@@ -796,9 +662,7 @@ GumboOutput* gumbo_parse_with_options(
|
|
796
662
|
const GumboOptions* options, const char* buffer, size_t buffer_length);
|
797
663
|
|
798
664
|
/** Release the memory used for the parse tree & parse errors. */
|
799
|
-
void gumbo_destroy_output(
|
800
|
-
const GumboOptions* options, GumboOutput* output);
|
801
|
-
|
665
|
+
void gumbo_destroy_output(const GumboOptions* options, GumboOutput* output);
|
802
666
|
|
803
667
|
#ifdef __cplusplus
|
804
668
|
}
|
data/gumbo-parser/src/parser.c
CHANGED
@@ -34,8 +34,10 @@
|
|
34
34
|
|
35
35
|
#define AVOID_UNUSED_VARIABLE_WARNING(i) (void)(i)
|
36
36
|
|
37
|
-
#define GUMBO_STRING(literal)
|
38
|
-
|
37
|
+
#define GUMBO_STRING(literal) \
|
38
|
+
{ literal, sizeof(literal) - 1 }
|
39
|
+
#define TERMINATOR \
|
40
|
+
{ "", 0 }
|
39
41
|
|
40
42
|
typedef char gumbo_tagset[GUMBO_TAG_LAST];
|
41
43
|
#define TAG(tag) [GUMBO_TAG_##tag] = (1 << GUMBO_NAMESPACE_HTML)
|
@@ -43,46 +45,42 @@ typedef char gumbo_tagset[GUMBO_TAG_LAST];
|
|
43
45
|
#define TAG_MATHML(tag) [GUMBO_TAG_##tag] = (1 << GUMBO_NAMESPACE_MATHML)
|
44
46
|
|
45
47
|
#define TAGSET_INCLUDES(tagset, namespace, tag) \
|
46
|
-
(tag < GUMBO_TAG_LAST &&
|
47
|
-
tagset[(int)tag] == (1 << (int)namespace))
|
48
|
+
(tag < GUMBO_TAG_LAST && tagset[(int) tag] == (1 << (int) namespace))
|
48
49
|
|
50
|
+
// selected forward declarations as it is getting hard to find
|
51
|
+
// an appropriate order
|
52
|
+
static bool node_html_tag_is(const GumboNode*, GumboTag);
|
53
|
+
static GumboInsertionMode get_current_template_insertion_mode(
|
54
|
+
const GumboParser*);
|
55
|
+
static bool handle_in_template(GumboParser*, GumboToken*);
|
56
|
+
static void destroy_node(GumboParser*, GumboNode*);
|
49
57
|
|
50
|
-
static void* malloc_wrapper(void* unused, size_t size) {
|
51
|
-
return malloc(size);
|
52
|
-
}
|
58
|
+
static void* malloc_wrapper(void* unused, size_t size) { return malloc(size); }
|
53
59
|
|
54
|
-
static void free_wrapper(void* unused, void* ptr) {
|
55
|
-
free(ptr);
|
56
|
-
}
|
60
|
+
static void free_wrapper(void* unused, void* ptr) { free(ptr); }
|
57
61
|
|
58
|
-
const GumboOptions kGumboDefaultOptions = {
|
59
|
-
|
60
|
-
&free_wrapper,
|
61
|
-
NULL,
|
62
|
-
8,
|
63
|
-
false,
|
64
|
-
-1,
|
65
|
-
};
|
62
|
+
const GumboOptions kGumboDefaultOptions = {&malloc_wrapper, &free_wrapper, NULL,
|
63
|
+
8, false, -1, GUMBO_TAG_LAST, GUMBO_NAMESPACE_HTML};
|
66
64
|
|
67
65
|
static const GumboStringPiece kDoctypeHtml = GUMBO_STRING("html");
|
68
|
-
static const GumboStringPiece kPublicIdHtml4_0 =
|
69
|
-
"-//W3C//DTD HTML 4.0//EN");
|
70
|
-
static const GumboStringPiece kPublicIdHtml4_01 =
|
71
|
-
"-//W3C//DTD HTML 4.01//EN");
|
72
|
-
static const GumboStringPiece kPublicIdXhtml1_0 =
|
73
|
-
"-//W3C//DTD XHTML 1.0 Strict//EN");
|
74
|
-
static const GumboStringPiece kPublicIdXhtml1_1 =
|
75
|
-
"-//W3C//DTD XHTML 1.1//EN");
|
76
|
-
static const GumboStringPiece kSystemIdRecHtml4_0 =
|
77
|
-
"http://www.w3.org/TR/REC-html40/strict.dtd");
|
78
|
-
static const GumboStringPiece kSystemIdHtml4 =
|
79
|
-
"http://www.w3.org/TR/html4/strict.dtd");
|
80
|
-
static const GumboStringPiece kSystemIdXhtmlStrict1_1 =
|
81
|
-
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd");
|
82
|
-
static const GumboStringPiece kSystemIdXhtml1_1 =
|
83
|
-
"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd");
|
84
|
-
static const GumboStringPiece kSystemIdLegacyCompat =
|
85
|
-
"about:legacy-compat");
|
66
|
+
static const GumboStringPiece kPublicIdHtml4_0 =
|
67
|
+
GUMBO_STRING("-//W3C//DTD HTML 4.0//EN");
|
68
|
+
static const GumboStringPiece kPublicIdHtml4_01 =
|
69
|
+
GUMBO_STRING("-//W3C//DTD HTML 4.01//EN");
|
70
|
+
static const GumboStringPiece kPublicIdXhtml1_0 =
|
71
|
+
GUMBO_STRING("-//W3C//DTD XHTML 1.0 Strict//EN");
|
72
|
+
static const GumboStringPiece kPublicIdXhtml1_1 =
|
73
|
+
GUMBO_STRING("-//W3C//DTD XHTML 1.1//EN");
|
74
|
+
static const GumboStringPiece kSystemIdRecHtml4_0 =
|
75
|
+
GUMBO_STRING("http://www.w3.org/TR/REC-html40/strict.dtd");
|
76
|
+
static const GumboStringPiece kSystemIdHtml4 =
|
77
|
+
GUMBO_STRING("http://www.w3.org/TR/html4/strict.dtd");
|
78
|
+
static const GumboStringPiece kSystemIdXhtmlStrict1_1 =
|
79
|
+
GUMBO_STRING("http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd");
|
80
|
+
static const GumboStringPiece kSystemIdXhtml1_1 =
|
81
|
+
GUMBO_STRING("http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd");
|
82
|
+
static const GumboStringPiece kSystemIdLegacyCompat =
|
83
|
+
GUMBO_STRING("about:legacy-compat");
|
86
84
|
|
87
85
|
// The doctype arrays have an explicit terminator because we want to pass them
|
88
86
|
// to a helper function, and passing them as a pointer discards sizeof
|
@@ -90,96 +88,86 @@ static const GumboStringPiece kSystemIdLegacyCompat = GUMBO_STRING(
|
|
90
88
|
// over them use sizeof directly instead of a terminator.
|
91
89
|
|
92
90
|
static const GumboStringPiece kQuirksModePublicIdPrefixes[] = {
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
};
|
91
|
+
GUMBO_STRING("+//Silmaril//dtd html Pro v0r11 19970101//"),
|
92
|
+
GUMBO_STRING("-//AdvaSoft Ltd//DTD HTML 3.0 asWedit + extensions//"),
|
93
|
+
GUMBO_STRING("-//AS//DTD HTML 3.0 asWedit + extensions//"),
|
94
|
+
GUMBO_STRING("-//IETF//DTD HTML 2.0 Level 1//"),
|
95
|
+
GUMBO_STRING("-//IETF//DTD HTML 2.0 Level 2//"),
|
96
|
+
GUMBO_STRING("-//IETF//DTD HTML 2.0 Strict Level 1//"),
|
97
|
+
GUMBO_STRING("-//IETF//DTD HTML 2.0 Strict Level 2//"),
|
98
|
+
GUMBO_STRING("-//IETF//DTD HTML 2.0 Strict//"),
|
99
|
+
GUMBO_STRING("-//IETF//DTD HTML 2.0//"),
|
100
|
+
GUMBO_STRING("-//IETF//DTD HTML 2.1E//"),
|
101
|
+
GUMBO_STRING("-//IETF//DTD HTML 3.0//"),
|
102
|
+
GUMBO_STRING("-//IETF//DTD HTML 3.2 Final//"),
|
103
|
+
GUMBO_STRING("-//IETF//DTD HTML 3.2//"),
|
104
|
+
GUMBO_STRING("-//IETF//DTD HTML 3//"),
|
105
|
+
GUMBO_STRING("-//IETF//DTD HTML Level 0//"),
|
106
|
+
GUMBO_STRING("-//IETF//DTD HTML Level 1//"),
|
107
|
+
GUMBO_STRING("-//IETF//DTD HTML Level 2//"),
|
108
|
+
GUMBO_STRING("-//IETF//DTD HTML Level 3//"),
|
109
|
+
GUMBO_STRING("-//IETF//DTD HTML Strict Level 0//"),
|
110
|
+
GUMBO_STRING("-//IETF//DTD HTML Strict Level 1//"),
|
111
|
+
GUMBO_STRING("-//IETF//DTD HTML Strict Level 2//"),
|
112
|
+
GUMBO_STRING("-//IETF//DTD HTML Strict Level 3//"),
|
113
|
+
GUMBO_STRING("-//IETF//DTD HTML Strict//"),
|
114
|
+
GUMBO_STRING("-//IETF//DTD HTML//"),
|
115
|
+
GUMBO_STRING("-//Metrius//DTD Metrius Presentational//"),
|
116
|
+
GUMBO_STRING("-//Microsoft//DTD Internet Explorer 2.0 HTML Strict//"),
|
117
|
+
GUMBO_STRING("-//Microsoft//DTD Internet Explorer 2.0 HTML//"),
|
118
|
+
GUMBO_STRING("-//Microsoft//DTD Internet Explorer 2.0 Tables//"),
|
119
|
+
GUMBO_STRING("-//Microsoft//DTD Internet Explorer 3.0 HTML Strict//"),
|
120
|
+
GUMBO_STRING("-//Microsoft//DTD Internet Explorer 3.0 HTML//"),
|
121
|
+
GUMBO_STRING("-//Microsoft//DTD Internet Explorer 3.0 Tables//"),
|
122
|
+
GUMBO_STRING("-//Netscape Comm. Corp.//DTD HTML//"),
|
123
|
+
GUMBO_STRING("-//Netscape Comm. Corp.//DTD Strict HTML//"),
|
124
|
+
GUMBO_STRING("-//O'Reilly and Associates//DTD HTML 2.0//"),
|
125
|
+
GUMBO_STRING("-//O'Reilly and Associates//DTD HTML Extended 1.0//"),
|
126
|
+
GUMBO_STRING("-//O'Reilly and Associates//DTD HTML Extended Relaxed 1.0//"),
|
127
|
+
GUMBO_STRING(
|
128
|
+
"-//SoftQuad Software//DTD HoTMetaL PRO 6.0::19990601::)"
|
129
|
+
"extensions to HTML 4.0//"),
|
130
|
+
GUMBO_STRING(
|
131
|
+
"-//SoftQuad//DTD HoTMetaL PRO 4.0::19971010::"
|
132
|
+
"extensions to HTML 4.0//"),
|
133
|
+
GUMBO_STRING("-//Spyglass//DTD HTML 2.0 Extended//"),
|
134
|
+
GUMBO_STRING("-//SQ//DTD HTML 2.0 HoTMetaL + extensions//"),
|
135
|
+
GUMBO_STRING("-//Sun Microsystems Corp.//DTD HotJava HTML//"),
|
136
|
+
GUMBO_STRING("-//Sun Microsystems Corp.//DTD HotJava Strict HTML//"),
|
137
|
+
GUMBO_STRING("-//W3C//DTD HTML 3 1995-03-24//"),
|
138
|
+
GUMBO_STRING("-//W3C//DTD HTML 3.2 Draft//"),
|
139
|
+
GUMBO_STRING("-//W3C//DTD HTML 3.2 Final//"),
|
140
|
+
GUMBO_STRING("-//W3C//DTD HTML 3.2//"),
|
141
|
+
GUMBO_STRING("-//W3C//DTD HTML 3.2S Draft//"),
|
142
|
+
GUMBO_STRING("-//W3C//DTD HTML 4.0 Frameset//"),
|
143
|
+
GUMBO_STRING("-//W3C//DTD HTML 4.0 Transitional//"),
|
144
|
+
GUMBO_STRING("-//W3C//DTD HTML Experimental 19960712//"),
|
145
|
+
GUMBO_STRING("-//W3C//DTD HTML Experimental 970421//"),
|
146
|
+
GUMBO_STRING("-//W3C//DTD W3 HTML//"),
|
147
|
+
GUMBO_STRING("-//W3O//DTD W3 HTML 3.0//"),
|
148
|
+
GUMBO_STRING("-//WebTechs//DTD Mozilla HTML 2.0//"),
|
149
|
+
GUMBO_STRING("-//WebTechs//DTD Mozilla HTML//"), TERMINATOR};
|
152
150
|
|
153
151
|
static const GumboStringPiece kQuirksModePublicIdExactMatches[] = {
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
TERMINATOR
|
158
|
-
};
|
152
|
+
GUMBO_STRING("-//W3O//DTD W3 HTML Strict 3.0//EN//"),
|
153
|
+
GUMBO_STRING("-/W3C/DTD HTML 4.0 Transitional/EN"), GUMBO_STRING("HTML"),
|
154
|
+
TERMINATOR};
|
159
155
|
|
160
156
|
static const GumboStringPiece kQuirksModeSystemIdExactMatches[] = {
|
161
|
-
|
162
|
-
|
163
|
-
};
|
157
|
+
GUMBO_STRING("http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd"),
|
158
|
+
TERMINATOR};
|
164
159
|
|
165
160
|
static const GumboStringPiece kLimitedQuirksPublicIdPrefixes[] = {
|
166
|
-
|
167
|
-
|
168
|
-
TERMINATOR
|
169
|
-
};
|
161
|
+
GUMBO_STRING("-//W3C//DTD XHTML 1.0 Frameset//"),
|
162
|
+
GUMBO_STRING("-//W3C//DTD XHTML 1.0 Transitional//"), TERMINATOR};
|
170
163
|
|
171
|
-
static const GumboStringPiece kLimitedQuirksRequiresSystemIdPublicIdPrefixes[] =
|
172
|
-
|
173
|
-
|
174
|
-
TERMINATOR
|
175
|
-
};
|
164
|
+
static const GumboStringPiece kLimitedQuirksRequiresSystemIdPublicIdPrefixes[] =
|
165
|
+
{GUMBO_STRING("-//W3C//DTD HTML 4.01 Frameset//"),
|
166
|
+
GUMBO_STRING("-//W3C//DTD HTML 4.01 Transitional//"), TERMINATOR};
|
176
167
|
|
177
168
|
// Indexed by GumboNamespaceEnum; keep in sync with that.
|
178
|
-
static const char* kLegalXmlns[] = {
|
179
|
-
|
180
|
-
"http://www.w3.org/2000/svg",
|
181
|
-
"http://www.w3.org/1998/Math/MathML"
|
182
|
-
};
|
169
|
+
static const char* kLegalXmlns[] = {"http://www.w3.org/1999/xhtml",
|
170
|
+
"http://www.w3.org/2000/svg", "http://www.w3.org/1998/Math/MathML"};
|
183
171
|
|
184
172
|
typedef struct _ReplacementEntry {
|
185
173
|
const GumboStringPiece from;
|
@@ -187,112 +175,112 @@ typedef struct _ReplacementEntry {
|
|
187
175
|
} ReplacementEntry;
|
188
176
|
|
189
177
|
#define REPLACEMENT_ENTRY(from, to) \
|
190
|
-
|
178
|
+
{ GUMBO_STRING(from), GUMBO_STRING(to) }
|
191
179
|
|
192
180
|
// Static data for SVG attribute replacements.
|
193
|
-
//
|
181
|
+
// https://html.spec.whatwg.org/multipage/syntax.html#creating-and-inserting-nodes
|
194
182
|
static const ReplacementEntry kSvgAttributeReplacements[] = {
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
183
|
+
REPLACEMENT_ENTRY("attributename", "attributeName"),
|
184
|
+
REPLACEMENT_ENTRY("attributetype", "attributeType"),
|
185
|
+
REPLACEMENT_ENTRY("basefrequency", "baseFrequency"),
|
186
|
+
REPLACEMENT_ENTRY("baseprofile", "baseProfile"),
|
187
|
+
REPLACEMENT_ENTRY("calcmode", "calcMode"),
|
188
|
+
REPLACEMENT_ENTRY("clippathunits", "clipPathUnits"),
|
189
|
+
// REPLACEMENT_ENTRY("contentscripttype", "contentScriptType"),
|
190
|
+
// REPLACEMENT_ENTRY("contentstyletype", "contentStyleType"),
|
191
|
+
REPLACEMENT_ENTRY("diffuseconstant", "diffuseConstant"),
|
192
|
+
REPLACEMENT_ENTRY("edgemode", "edgeMode"),
|
193
|
+
// REPLACEMENT_ENTRY("externalresourcesrequired",
|
194
|
+
// "externalResourcesRequired"),
|
195
|
+
// REPLACEMENT_ENTRY("filterres", "filterRes"),
|
196
|
+
REPLACEMENT_ENTRY("filterunits", "filterUnits"),
|
197
|
+
REPLACEMENT_ENTRY("glyphref", "glyphRef"),
|
198
|
+
REPLACEMENT_ENTRY("gradienttransform", "gradientTransform"),
|
199
|
+
REPLACEMENT_ENTRY("gradientunits", "gradientUnits"),
|
200
|
+
REPLACEMENT_ENTRY("kernelmatrix", "kernelMatrix"),
|
201
|
+
REPLACEMENT_ENTRY("kernelunitlength", "kernelUnitLength"),
|
202
|
+
REPLACEMENT_ENTRY("keypoints", "keyPoints"),
|
203
|
+
REPLACEMENT_ENTRY("keysplines", "keySplines"),
|
204
|
+
REPLACEMENT_ENTRY("keytimes", "keyTimes"),
|
205
|
+
REPLACEMENT_ENTRY("lengthadjust", "lengthAdjust"),
|
206
|
+
REPLACEMENT_ENTRY("limitingconeangle", "limitingConeAngle"),
|
207
|
+
REPLACEMENT_ENTRY("markerheight", "markerHeight"),
|
208
|
+
REPLACEMENT_ENTRY("markerunits", "markerUnits"),
|
209
|
+
REPLACEMENT_ENTRY("markerwidth", "markerWidth"),
|
210
|
+
REPLACEMENT_ENTRY("maskcontentunits", "maskContentUnits"),
|
211
|
+
REPLACEMENT_ENTRY("maskunits", "maskUnits"),
|
212
|
+
REPLACEMENT_ENTRY("numoctaves", "numOctaves"),
|
213
|
+
REPLACEMENT_ENTRY("pathlength", "pathLength"),
|
214
|
+
REPLACEMENT_ENTRY("patterncontentunits", "patternContentUnits"),
|
215
|
+
REPLACEMENT_ENTRY("patterntransform", "patternTransform"),
|
216
|
+
REPLACEMENT_ENTRY("patternunits", "patternUnits"),
|
217
|
+
REPLACEMENT_ENTRY("pointsatx", "pointsAtX"),
|
218
|
+
REPLACEMENT_ENTRY("pointsaty", "pointsAtY"),
|
219
|
+
REPLACEMENT_ENTRY("pointsatz", "pointsAtZ"),
|
220
|
+
REPLACEMENT_ENTRY("preservealpha", "preserveAlpha"),
|
221
|
+
REPLACEMENT_ENTRY("preserveaspectratio", "preserveAspectRatio"),
|
222
|
+
REPLACEMENT_ENTRY("primitiveunits", "primitiveUnits"),
|
223
|
+
REPLACEMENT_ENTRY("refx", "refX"), REPLACEMENT_ENTRY("refy", "refY"),
|
224
|
+
REPLACEMENT_ENTRY("repeatcount", "repeatCount"),
|
225
|
+
REPLACEMENT_ENTRY("repeatdur", "repeatDur"),
|
226
|
+
REPLACEMENT_ENTRY("requiredextensions", "requiredExtensions"),
|
227
|
+
REPLACEMENT_ENTRY("requiredfeatures", "requiredFeatures"),
|
228
|
+
REPLACEMENT_ENTRY("specularconstant", "specularConstant"),
|
229
|
+
REPLACEMENT_ENTRY("specularexponent", "specularExponent"),
|
230
|
+
REPLACEMENT_ENTRY("spreadmethod", "spreadMethod"),
|
231
|
+
REPLACEMENT_ENTRY("startoffset", "startOffset"),
|
232
|
+
REPLACEMENT_ENTRY("stddeviation", "stdDeviation"),
|
233
|
+
REPLACEMENT_ENTRY("stitchtiles", "stitchTiles"),
|
234
|
+
REPLACEMENT_ENTRY("surfacescale", "surfaceScale"),
|
235
|
+
REPLACEMENT_ENTRY("systemlanguage", "systemLanguage"),
|
236
|
+
REPLACEMENT_ENTRY("tablevalues", "tableValues"),
|
237
|
+
REPLACEMENT_ENTRY("targetx", "targetX"),
|
238
|
+
REPLACEMENT_ENTRY("targety", "targetY"),
|
239
|
+
REPLACEMENT_ENTRY("textlength", "textLength"),
|
240
|
+
REPLACEMENT_ENTRY("viewbox", "viewBox"),
|
241
|
+
REPLACEMENT_ENTRY("viewtarget", "viewTarget"),
|
242
|
+
REPLACEMENT_ENTRY("xchannelselector", "xChannelSelector"),
|
243
|
+
REPLACEMENT_ENTRY("ychannelselector", "yChannelSelector"),
|
244
|
+
REPLACEMENT_ENTRY("zoomandpan", "zoomAndPan"),
|
257
245
|
};
|
258
246
|
|
259
247
|
static const ReplacementEntry kSvgTagReplacements[] = {
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
248
|
+
REPLACEMENT_ENTRY("altglyph", "altGlyph"),
|
249
|
+
REPLACEMENT_ENTRY("altglyphdef", "altGlyphDef"),
|
250
|
+
REPLACEMENT_ENTRY("altglyphitem", "altGlyphItem"),
|
251
|
+
REPLACEMENT_ENTRY("animatecolor", "animateColor"),
|
252
|
+
REPLACEMENT_ENTRY("animatemotion", "animateMotion"),
|
253
|
+
REPLACEMENT_ENTRY("animatetransform", "animateTransform"),
|
254
|
+
REPLACEMENT_ENTRY("clippath", "clipPath"),
|
255
|
+
REPLACEMENT_ENTRY("feblend", "feBlend"),
|
256
|
+
REPLACEMENT_ENTRY("fecolormatrix", "feColorMatrix"),
|
257
|
+
REPLACEMENT_ENTRY("fecomponenttransfer", "feComponentTransfer"),
|
258
|
+
REPLACEMENT_ENTRY("fecomposite", "feComposite"),
|
259
|
+
REPLACEMENT_ENTRY("feconvolvematrix", "feConvolveMatrix"),
|
260
|
+
REPLACEMENT_ENTRY("fediffuselighting", "feDiffuseLighting"),
|
261
|
+
REPLACEMENT_ENTRY("fedisplacementmap", "feDisplacementMap"),
|
262
|
+
REPLACEMENT_ENTRY("fedistantlight", "feDistantLight"),
|
263
|
+
REPLACEMENT_ENTRY("feflood", "feFlood"),
|
264
|
+
REPLACEMENT_ENTRY("fefunca", "feFuncA"),
|
265
|
+
REPLACEMENT_ENTRY("fefuncb", "feFuncB"),
|
266
|
+
REPLACEMENT_ENTRY("fefuncg", "feFuncG"),
|
267
|
+
REPLACEMENT_ENTRY("fefuncr", "feFuncR"),
|
268
|
+
REPLACEMENT_ENTRY("fegaussianblur", "feGaussianBlur"),
|
269
|
+
REPLACEMENT_ENTRY("feimage", "feImage"),
|
270
|
+
REPLACEMENT_ENTRY("femerge", "feMerge"),
|
271
|
+
REPLACEMENT_ENTRY("femergenode", "feMergeNode"),
|
272
|
+
REPLACEMENT_ENTRY("femorphology", "feMorphology"),
|
273
|
+
REPLACEMENT_ENTRY("feoffset", "feOffset"),
|
274
|
+
REPLACEMENT_ENTRY("fepointlight", "fePointLight"),
|
275
|
+
REPLACEMENT_ENTRY("fespecularlighting", "feSpecularLighting"),
|
276
|
+
REPLACEMENT_ENTRY("fespotlight", "feSpotLight"),
|
277
|
+
REPLACEMENT_ENTRY("fetile", "feTile"),
|
278
|
+
REPLACEMENT_ENTRY("feturbulence", "feTurbulence"),
|
279
|
+
REPLACEMENT_ENTRY("foreignobject", "foreignObject"),
|
280
|
+
REPLACEMENT_ENTRY("glyphref", "glyphRef"),
|
281
|
+
REPLACEMENT_ENTRY("lineargradient", "linearGradient"),
|
282
|
+
REPLACEMENT_ENTRY("radialgradient", "radialGradient"),
|
283
|
+
REPLACEMENT_ENTRY("textpath", "textPath"),
|
296
284
|
};
|
297
285
|
|
298
286
|
typedef struct _NamespacedAttributeReplacement {
|
@@ -302,18 +290,18 @@ typedef struct _NamespacedAttributeReplacement {
|
|
302
290
|
} NamespacedAttributeReplacement;
|
303
291
|
|
304
292
|
static const NamespacedAttributeReplacement kForeignAttributeReplacements[] = {
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
293
|
+
{"xlink:actuate", "actuate", GUMBO_ATTR_NAMESPACE_XLINK},
|
294
|
+
{"xlink:actuate", "actuate", GUMBO_ATTR_NAMESPACE_XLINK},
|
295
|
+
{"xlink:href", "href", GUMBO_ATTR_NAMESPACE_XLINK},
|
296
|
+
{"xlink:role", "role", GUMBO_ATTR_NAMESPACE_XLINK},
|
297
|
+
{"xlink:show", "show", GUMBO_ATTR_NAMESPACE_XLINK},
|
298
|
+
{"xlink:title", "title", GUMBO_ATTR_NAMESPACE_XLINK},
|
299
|
+
{"xlink:type", "type", GUMBO_ATTR_NAMESPACE_XLINK},
|
300
|
+
{"xml:base", "base", GUMBO_ATTR_NAMESPACE_XML},
|
301
|
+
{"xml:lang", "lang", GUMBO_ATTR_NAMESPACE_XML},
|
302
|
+
{"xml:space", "space", GUMBO_ATTR_NAMESPACE_XML},
|
303
|
+
{"xmlns", "xmlns", GUMBO_ATTR_NAMESPACE_XMLNS},
|
304
|
+
{"xmlns:xlink", "xlink", GUMBO_ATTR_NAMESPACE_XMLNS},
|
317
305
|
};
|
318
306
|
|
319
307
|
// The "scope marker" for the list of active formatting elements. We use a
|
@@ -371,6 +359,9 @@ typedef struct GumboInternalParserState {
|
|
371
359
|
GumboNode* _head_element;
|
372
360
|
GumboNode* _form_element;
|
373
361
|
|
362
|
+
// The element used as fragment context when parsing in fragment mode
|
363
|
+
GumboNode* _fragment_ctx;
|
364
|
+
|
374
365
|
// The flag for when the spec says "Reprocess the current token in..."
|
375
366
|
bool _reprocess_current_token;
|
376
367
|
|
@@ -427,14 +418,14 @@ static bool attribute_matches(
|
|
427
418
|
static bool attribute_matches_case_sensitive(
|
428
419
|
const GumboVector* attributes, const char* name, const char* value) {
|
429
420
|
const GumboAttribute* attr = gumbo_get_attribute(attributes, name);
|
430
|
-
return attr ?
|
421
|
+
return attr ? strcmp(value, attr->value) == 0 : false;
|
431
422
|
}
|
432
423
|
|
433
424
|
// Checks if the specified attribute vectors are identical.
|
434
425
|
static bool all_attributes_match(
|
435
426
|
const GumboVector* attr1, const GumboVector* attr2) {
|
436
|
-
int num_unmatched_attr2_elements = attr2->length;
|
437
|
-
for (int i = 0; i < attr1->length; ++i) {
|
427
|
+
unsigned int num_unmatched_attr2_elements = attr2->length;
|
428
|
+
for (unsigned int i = 0; i < attr1->length; ++i) {
|
438
429
|
const GumboAttribute* attr = attr1->data[i];
|
439
430
|
if (attribute_matches_case_sensitive(attr2, attr->name, attr->value)) {
|
440
431
|
--num_unmatched_attr2_elements;
|
@@ -462,8 +453,7 @@ static GumboNode* create_node(GumboParser* parser, GumboNodeType type) {
|
|
462
453
|
static GumboNode* new_document_node(GumboParser* parser) {
|
463
454
|
GumboNode* document_node = create_node(parser, GUMBO_NODE_DOCUMENT);
|
464
455
|
document_node->parse_flags = GUMBO_INSERTION_BY_PARSER;
|
465
|
-
gumbo_vector_init(
|
466
|
-
parser, 1, &document_node->v.document.children);
|
456
|
+
gumbo_vector_init(parser, 1, &document_node->v.document.children);
|
467
457
|
|
468
458
|
// Must be initialized explicitly, as there's no guarantee that we'll see a
|
469
459
|
// doc type token.
|
@@ -498,6 +488,7 @@ static void parser_state_init(GumboParser* parser) {
|
|
498
488
|
gumbo_vector_init(parser, 5, &parser_state->_template_insertion_modes);
|
499
489
|
parser_state->_head_element = NULL;
|
500
490
|
parser_state->_form_element = NULL;
|
491
|
+
parser_state->_fragment_ctx = NULL;
|
501
492
|
parser_state->_current_token = NULL;
|
502
493
|
parser_state->_closed_body_tag = false;
|
503
494
|
parser_state->_closed_html_tag = false;
|
@@ -506,6 +497,9 @@ static void parser_state_init(GumboParser* parser) {
|
|
506
497
|
|
507
498
|
static void parser_state_destroy(GumboParser* parser) {
|
508
499
|
GumboParserState* state = parser->_parser_state;
|
500
|
+
if (state->_fragment_ctx) {
|
501
|
+
destroy_node(parser, state->_fragment_ctx);
|
502
|
+
}
|
509
503
|
gumbo_vector_destroy(parser, &state->_active_formatting_elements);
|
510
504
|
gumbo_vector_destroy(parser, &state->_open_elements);
|
511
505
|
gumbo_vector_destroy(parser, &state->_template_insertion_modes);
|
@@ -517,6 +511,10 @@ static GumboNode* get_document_node(GumboParser* parser) {
|
|
517
511
|
return parser->_output->document;
|
518
512
|
}
|
519
513
|
|
514
|
+
static bool is_fragment_parser(const GumboParser* parser) {
|
515
|
+
return !!parser->_parser_state->_fragment_ctx;
|
516
|
+
}
|
517
|
+
|
520
518
|
// Returns the node at the bottom of the stack of open elements, or NULL if no
|
521
519
|
// elements have been added yet.
|
522
520
|
static GumboNode* get_current_node(GumboParser* parser) {
|
@@ -530,6 +528,14 @@ static GumboNode* get_current_node(GumboParser* parser) {
|
|
530
528
|
return open_elements->data[open_elements->length - 1];
|
531
529
|
}
|
532
530
|
|
531
|
+
static GumboNode* get_adjusted_current_node(GumboParser* parser) {
|
532
|
+
GumboParserState* state = parser->_parser_state;
|
533
|
+
if (state->_open_elements.length == 1 && state->_fragment_ctx) {
|
534
|
+
return state->_fragment_ctx;
|
535
|
+
}
|
536
|
+
return get_current_node(parser);
|
537
|
+
}
|
538
|
+
|
533
539
|
// Returns true if the given needle is in the given array of literal
|
534
540
|
// GumboStringPieces. If exact_match is true, this requires that they match
|
535
541
|
// exactly; otherwise, this performs a prefix match to check if any of the
|
@@ -537,7 +543,7 @@ static GumboNode* get_current_node(GumboParser* parser) {
|
|
537
543
|
// case-insensitive match.
|
538
544
|
static bool is_in_static_list(
|
539
545
|
const char* needle, const GumboStringPiece* haystack, bool exact_match) {
|
540
|
-
for (int i = 0; haystack[i].length > 0; ++i) {
|
546
|
+
for (unsigned int i = 0; haystack[i].length > 0; ++i) {
|
541
547
|
if ((exact_match && !strcmp(needle, haystack[i].data)) ||
|
542
548
|
(!exact_match && !strcasecmp(needle, haystack[i].data))) {
|
543
549
|
return true;
|
@@ -556,39 +562,63 @@ static void set_insertion_mode(GumboParser* parser, GumboInsertionMode mode) {
|
|
556
562
|
// indicate that there is no appropriate insertion mode, and the loop should
|
557
563
|
// continue.
|
558
564
|
static GumboInsertionMode get_appropriate_insertion_mode(
|
559
|
-
const
|
560
|
-
|
565
|
+
const GumboParser* parser, int index) {
|
566
|
+
const GumboVector* open_elements = &parser->_parser_state->_open_elements;
|
567
|
+
const GumboNode* node = open_elements->data[index];
|
568
|
+
const bool is_last = index == 0;
|
561
569
|
|
562
|
-
if (
|
563
|
-
|
564
|
-
|
570
|
+
if (is_last && is_fragment_parser(parser)) {
|
571
|
+
node = parser->_parser_state->_fragment_ctx;
|
572
|
+
}
|
573
|
+
|
574
|
+
assert(node->type == GUMBO_NODE_ELEMENT || node->type == GUMBO_NODE_TEMPLATE);
|
575
|
+
switch (node->v.element.tag) {
|
576
|
+
case GUMBO_TAG_SELECT: {
|
577
|
+
if (is_last) {
|
578
|
+
return GUMBO_INSERTION_MODE_IN_SELECT;
|
579
|
+
}
|
580
|
+
for (int i = index; i > 0; --i) {
|
581
|
+
const GumboNode* ancestor = open_elements->data[i];
|
582
|
+
if (node_html_tag_is(ancestor, GUMBO_TAG_TEMPLATE)) {
|
565
583
|
return GUMBO_INSERTION_MODE_IN_SELECT;
|
566
|
-
|
567
|
-
|
568
|
-
return
|
569
|
-
|
570
|
-
|
571
|
-
|
572
|
-
case GUMBO_TAG_TBODY:
|
573
|
-
case GUMBO_TAG_THEAD:
|
574
|
-
case GUMBO_TAG_TFOOT:
|
575
|
-
return GUMBO_INSERTION_MODE_IN_TABLE_BODY;
|
576
|
-
case GUMBO_TAG_CAPTION:
|
577
|
-
return GUMBO_INSERTION_MODE_IN_CAPTION;
|
578
|
-
case GUMBO_TAG_COLGROUP:
|
579
|
-
return GUMBO_INSERTION_MODE_IN_COLUMN_GROUP;
|
580
|
-
case GUMBO_TAG_TABLE:
|
581
|
-
return GUMBO_INSERTION_MODE_IN_TABLE;
|
582
|
-
case GUMBO_TAG_HEAD:
|
583
|
-
case GUMBO_TAG_BODY:
|
584
|
-
return GUMBO_INSERTION_MODE_IN_BODY;
|
585
|
-
case GUMBO_TAG_FRAMESET:
|
586
|
-
return GUMBO_INSERTION_MODE_IN_FRAMESET;
|
587
|
-
case GUMBO_TAG_HTML:
|
588
|
-
return GUMBO_INSERTION_MODE_BEFORE_HEAD;
|
589
|
-
default:
|
590
|
-
break;
|
584
|
+
}
|
585
|
+
if (node_html_tag_is(ancestor, GUMBO_TAG_TABLE)) {
|
586
|
+
return GUMBO_INSERTION_MODE_IN_SELECT_IN_TABLE;
|
587
|
+
}
|
588
|
+
}
|
589
|
+
return GUMBO_INSERTION_MODE_IN_SELECT;
|
591
590
|
}
|
591
|
+
case GUMBO_TAG_TD:
|
592
|
+
case GUMBO_TAG_TH:
|
593
|
+
if (!is_last) return GUMBO_INSERTION_MODE_IN_CELL;
|
594
|
+
break;
|
595
|
+
case GUMBO_TAG_TR:
|
596
|
+
return GUMBO_INSERTION_MODE_IN_ROW;
|
597
|
+
case GUMBO_TAG_TBODY:
|
598
|
+
case GUMBO_TAG_THEAD:
|
599
|
+
case GUMBO_TAG_TFOOT:
|
600
|
+
return GUMBO_INSERTION_MODE_IN_TABLE_BODY;
|
601
|
+
case GUMBO_TAG_CAPTION:
|
602
|
+
return GUMBO_INSERTION_MODE_IN_CAPTION;
|
603
|
+
case GUMBO_TAG_COLGROUP:
|
604
|
+
return GUMBO_INSERTION_MODE_IN_COLUMN_GROUP;
|
605
|
+
case GUMBO_TAG_TABLE:
|
606
|
+
return GUMBO_INSERTION_MODE_IN_TABLE;
|
607
|
+
case GUMBO_TAG_TEMPLATE:
|
608
|
+
return get_current_template_insertion_mode(parser);
|
609
|
+
case GUMBO_TAG_HEAD:
|
610
|
+
if (!is_last) return GUMBO_INSERTION_MODE_IN_HEAD;
|
611
|
+
break;
|
612
|
+
case GUMBO_TAG_BODY:
|
613
|
+
return GUMBO_INSERTION_MODE_IN_BODY;
|
614
|
+
case GUMBO_TAG_FRAMESET:
|
615
|
+
return GUMBO_INSERTION_MODE_IN_FRAMESET;
|
616
|
+
case GUMBO_TAG_HTML:
|
617
|
+
return parser->_parser_state->_head_element
|
618
|
+
? GUMBO_INSERTION_MODE_AFTER_HEAD
|
619
|
+
: GUMBO_INSERTION_MODE_BEFORE_HEAD;
|
620
|
+
default:
|
621
|
+
break;
|
592
622
|
}
|
593
623
|
return is_last ? GUMBO_INSERTION_MODE_IN_BODY : GUMBO_INSERTION_MODE_INITIAL;
|
594
624
|
}
|
@@ -596,9 +626,8 @@ static GumboInsertionMode get_appropriate_insertion_mode(
|
|
596
626
|
// This performs the actual "reset the insertion mode" loop.
|
597
627
|
static void reset_insertion_mode_appropriately(GumboParser* parser) {
|
598
628
|
const GumboVector* open_elements = &parser->_parser_state->_open_elements;
|
599
|
-
for (int i = open_elements->length; --i >= 0;
|
600
|
-
GumboInsertionMode mode =
|
601
|
-
get_appropriate_insertion_mode(open_elements->data[i], i == 0);
|
629
|
+
for (int i = open_elements->length; --i >= 0;) {
|
630
|
+
GumboInsertionMode mode = get_appropriate_insertion_mode(parser, i);
|
602
631
|
if (mode != GUMBO_INSERTION_MODE_INITIAL) {
|
603
632
|
set_insertion_mode(parser, mode);
|
604
633
|
return;
|
@@ -609,7 +638,8 @@ static void reset_insertion_mode_appropriately(GumboParser* parser) {
|
|
609
638
|
assert(0);
|
610
639
|
}
|
611
640
|
|
612
|
-
static GumboError* parser_add_parse_error(
|
641
|
+
static GumboError* parser_add_parse_error(
|
642
|
+
GumboParser* parser, const GumboToken* token) {
|
613
643
|
gumbo_debug("Adding parse error.\n");
|
614
644
|
GumboError* error = gumbo_add_error(parser);
|
615
645
|
if (!error) {
|
@@ -628,13 +658,14 @@ static GumboError* parser_add_parse_error(GumboParser* parser, const GumboToken*
|
|
628
658
|
}
|
629
659
|
GumboParserState* state = parser->_parser_state;
|
630
660
|
extra_data->parser_state = state->_insertion_mode;
|
631
|
-
gumbo_vector_init(
|
632
|
-
|
633
|
-
for (int i = 0; i < state->_open_elements.length; ++i) {
|
661
|
+
gumbo_vector_init(
|
662
|
+
parser, state->_open_elements.length, &extra_data->tag_stack);
|
663
|
+
for (unsigned int i = 0; i < state->_open_elements.length; ++i) {
|
634
664
|
const GumboNode* node = state->_open_elements.data[i];
|
635
|
-
assert(
|
636
|
-
|
637
|
-
|
665
|
+
assert(
|
666
|
+
node->type == GUMBO_NODE_ELEMENT || node->type == GUMBO_NODE_TEMPLATE);
|
667
|
+
gumbo_vector_add(
|
668
|
+
parser, (void*) node->v.element.tag, &extra_data->tag_stack);
|
638
669
|
}
|
639
670
|
return error;
|
640
671
|
}
|
@@ -643,7 +674,8 @@ static GumboError* parser_add_parse_error(GumboParser* parser, const GumboToken*
|
|
643
674
|
// by is_start) with one of the tag types in the varargs list. Terminate the
|
644
675
|
// list with GUMBO_TAG_LAST; this functions as a sentinel since no portion of
|
645
676
|
// the spec references tags that are not in the spec.
|
646
|
-
static bool tag_in(
|
677
|
+
static bool tag_in(
|
678
|
+
const GumboToken* token, bool is_start, const gumbo_tagset tags) {
|
647
679
|
GumboTag token_tag;
|
648
680
|
if (is_start && token->type == GUMBO_TOKEN_START_TAG) {
|
649
681
|
token_tag = token->v.start_tag.tag;
|
@@ -652,7 +684,7 @@ static bool tag_in(const GumboToken* token, bool is_start, const gumbo_tagset ta
|
|
652
684
|
} else {
|
653
685
|
return false;
|
654
686
|
}
|
655
|
-
return (token_tag < GUMBO_TAG_LAST && tags[(int)token_tag] != 0);
|
687
|
+
return (token_tag < GUMBO_TAG_LAST && tags[(int) token_tag] != 0);
|
656
688
|
}
|
657
689
|
|
658
690
|
// Like tag_in, but for the single-tag case.
|
@@ -669,41 +701,123 @@ static bool tag_is(const GumboToken* token, bool is_start, GumboTag tag) {
|
|
669
701
|
// Like tag_in, but checks for the tag of a node, rather than a token.
|
670
702
|
static bool node_tag_in_set(const GumboNode* node, const gumbo_tagset tags) {
|
671
703
|
assert(node != NULL);
|
672
|
-
if (node->type != GUMBO_NODE_ELEMENT) {
|
704
|
+
if (node->type != GUMBO_NODE_ELEMENT && node->type != GUMBO_NODE_TEMPLATE) {
|
673
705
|
return false;
|
674
706
|
}
|
675
|
-
return TAGSET_INCLUDES(
|
707
|
+
return TAGSET_INCLUDES(
|
708
|
+
tags, node->v.element.tag_namespace, node->v.element.tag);
|
676
709
|
}
|
677
710
|
|
678
|
-
|
679
711
|
// Like node_tag_in, but for the single-tag case.
|
680
|
-
static bool node_qualified_tag_is(
|
681
|
-
|
682
|
-
|
683
|
-
|
712
|
+
static bool node_qualified_tag_is(
|
713
|
+
const GumboNode* node, GumboNamespaceEnum ns, GumboTag tag) {
|
714
|
+
assert(node);
|
715
|
+
return (node->type == GUMBO_NODE_ELEMENT ||
|
716
|
+
node->type == GUMBO_NODE_TEMPLATE) &&
|
717
|
+
node->v.element.tag == tag && node->v.element.tag_namespace == ns;
|
684
718
|
}
|
685
719
|
|
686
720
|
// Like node_tag_in, but for the single-tag case in the HTML namespace
|
687
|
-
static bool node_html_tag_is(const GumboNode* node, GumboTag tag)
|
688
|
-
{
|
721
|
+
static bool node_html_tag_is(const GumboNode* node, GumboTag tag) {
|
689
722
|
return node_qualified_tag_is(node, GUMBO_NAMESPACE_HTML, tag);
|
690
723
|
}
|
691
724
|
|
725
|
+
static void push_template_insertion_mode(
|
726
|
+
GumboParser* parser, GumboInsertionMode mode) {
|
727
|
+
gumbo_vector_add(
|
728
|
+
parser, (void*) mode, &parser->_parser_state->_template_insertion_modes);
|
729
|
+
}
|
730
|
+
|
731
|
+
static void pop_template_insertion_mode(GumboParser* parser) {
|
732
|
+
gumbo_vector_pop(parser, &parser->_parser_state->_template_insertion_modes);
|
733
|
+
}
|
734
|
+
|
735
|
+
// Returns the current template insertion mode. If the stack of template
|
736
|
+
// insertion modes is empty, this returns GUMBO_INSERTION_MODE_INITIAL.
|
737
|
+
static GumboInsertionMode get_current_template_insertion_mode(
|
738
|
+
const GumboParser* parser) {
|
739
|
+
GumboVector* template_insertion_modes =
|
740
|
+
&parser->_parser_state->_template_insertion_modes;
|
741
|
+
if (template_insertion_modes->length == 0) {
|
742
|
+
return GUMBO_INSERTION_MODE_INITIAL;
|
743
|
+
}
|
744
|
+
return (GumboInsertionMode)
|
745
|
+
template_insertion_modes->data[(template_insertion_modes->length - 1)];
|
746
|
+
}
|
692
747
|
|
693
748
|
// http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#mathml-text-integration-point
|
694
749
|
static bool is_mathml_integration_point(const GumboNode* node) {
|
695
|
-
return node_tag_in_set(
|
696
|
-
|
750
|
+
return node_tag_in_set(
|
751
|
+
node, (gumbo_tagset){TAG_MATHML(MI), TAG_MATHML(MO), TAG_MATHML(MN),
|
752
|
+
TAG_MATHML(MS), TAG_MATHML(MTEXT)});
|
697
753
|
}
|
698
754
|
|
699
755
|
// http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#html-integration-point
|
700
756
|
static bool is_html_integration_point(const GumboNode* node) {
|
701
|
-
return node_tag_in_set(node, (gumbo_tagset)
|
702
|
-
|
703
|
-
|
704
|
-
|
705
|
-
|
706
|
-
|
757
|
+
return node_tag_in_set(node, (gumbo_tagset){TAG_SVG(FOREIGNOBJECT),
|
758
|
+
TAG_SVG(DESC), TAG_SVG(TITLE)}) ||
|
759
|
+
(node_qualified_tag_is(
|
760
|
+
node, GUMBO_NAMESPACE_MATHML, GUMBO_TAG_ANNOTATION_XML) &&
|
761
|
+
(attribute_matches(
|
762
|
+
&node->v.element.attributes, "encoding", "text/html") ||
|
763
|
+
attribute_matches(&node->v.element.attributes, "encoding",
|
764
|
+
"application/xhtml+xml")));
|
765
|
+
}
|
766
|
+
|
767
|
+
// This represents a place to insert a node, consisting of a target parent and a
|
768
|
+
// child index within that parent. If the node should be inserted at the end of
|
769
|
+
// the parent's child, index will be -1.
|
770
|
+
typedef struct {
|
771
|
+
GumboNode* target;
|
772
|
+
int index;
|
773
|
+
} InsertionLocation;
|
774
|
+
|
775
|
+
InsertionLocation get_appropriate_insertion_location(
|
776
|
+
GumboParser* parser, GumboNode* override_target) {
|
777
|
+
InsertionLocation retval = {override_target, -1};
|
778
|
+
if (retval.target == NULL) {
|
779
|
+
// No override target; default to the current node, but special-case the
|
780
|
+
// root node since get_current_node() assumes the stack of open elements is
|
781
|
+
// non-empty.
|
782
|
+
retval.target = parser->_output->root != NULL ? get_current_node(parser)
|
783
|
+
: get_document_node(parser);
|
784
|
+
}
|
785
|
+
if (!parser->_parser_state->_foster_parent_insertions ||
|
786
|
+
!node_tag_in_set(retval.target, (gumbo_tagset){TAG(TABLE), TAG(TBODY),
|
787
|
+
TAG(TFOOT), TAG(THEAD), TAG(TR)})) {
|
788
|
+
return retval;
|
789
|
+
}
|
790
|
+
|
791
|
+
// Foster-parenting case.
|
792
|
+
int last_template_index = -1;
|
793
|
+
int last_table_index = -1;
|
794
|
+
GumboVector* open_elements = &parser->_parser_state->_open_elements;
|
795
|
+
for (unsigned int i = 0; i < open_elements->length; ++i) {
|
796
|
+
if (node_html_tag_is(open_elements->data[i], GUMBO_TAG_TEMPLATE)) {
|
797
|
+
last_template_index = i;
|
798
|
+
}
|
799
|
+
if (node_html_tag_is(open_elements->data[i], GUMBO_TAG_TABLE)) {
|
800
|
+
last_table_index = i;
|
801
|
+
}
|
802
|
+
}
|
803
|
+
if (last_template_index != -1 &&
|
804
|
+
(last_table_index == -1 || last_template_index > last_table_index)) {
|
805
|
+
retval.target = open_elements->data[last_template_index];
|
806
|
+
return retval;
|
807
|
+
}
|
808
|
+
if (last_table_index == -1) {
|
809
|
+
retval.target = open_elements->data[0];
|
810
|
+
return retval;
|
811
|
+
}
|
812
|
+
GumboNode* last_table = open_elements->data[last_table_index];
|
813
|
+
if (last_table->parent != NULL) {
|
814
|
+
retval.target = last_table->parent;
|
815
|
+
retval.index = last_table->index_within_parent;
|
816
|
+
return retval;
|
817
|
+
}
|
818
|
+
|
819
|
+
retval.target = open_elements->data[last_table_index - 1];
|
820
|
+
return retval;
|
707
821
|
}
|
708
822
|
|
709
823
|
// Appends a node to the end of its parent, setting the "parent" and
|
@@ -713,7 +827,8 @@ static void append_node(
|
|
713
827
|
assert(node->parent == NULL);
|
714
828
|
assert(node->index_within_parent == -1);
|
715
829
|
GumboVector* children;
|
716
|
-
if (parent->type == GUMBO_NODE_ELEMENT
|
830
|
+
if (parent->type == GUMBO_NODE_ELEMENT ||
|
831
|
+
parent->type == GUMBO_NODE_TEMPLATE) {
|
717
832
|
children = &parent->v.element.children;
|
718
833
|
} else {
|
719
834
|
assert(parent->type == GUMBO_NODE_DOCUMENT);
|
@@ -725,64 +840,41 @@ static void append_node(
|
|
725
840
|
assert(node->index_within_parent < children->length);
|
726
841
|
}
|
727
842
|
|
728
|
-
// Inserts a node at the specified
|
843
|
+
// Inserts a node at the specified InsertionLocation, updating the
|
729
844
|
// "parent" and "index_within_parent" fields of it and all its siblings.
|
845
|
+
// If the index of the location is -1, this calls append_node.
|
730
846
|
static void insert_node(
|
731
|
-
GumboParser* parser, GumboNode*
|
847
|
+
GumboParser* parser, GumboNode* node, InsertionLocation location) {
|
732
848
|
assert(node->parent == NULL);
|
733
849
|
assert(node->index_within_parent == -1);
|
734
|
-
|
735
|
-
|
736
|
-
|
737
|
-
|
738
|
-
|
739
|
-
|
740
|
-
|
741
|
-
|
742
|
-
|
743
|
-
|
744
|
-
|
745
|
-
|
746
|
-
|
747
|
-
}
|
850
|
+
GumboNode* parent = location.target;
|
851
|
+
int index = location.index;
|
852
|
+
if (index != -1) {
|
853
|
+
GumboVector* children = NULL;
|
854
|
+
if (parent->type == GUMBO_NODE_ELEMENT ||
|
855
|
+
parent->type == GUMBO_NODE_TEMPLATE) {
|
856
|
+
children = &parent->v.element.children;
|
857
|
+
} else if (parent->type == GUMBO_NODE_DOCUMENT) {
|
858
|
+
children = &parent->v.document.children;
|
859
|
+
assert(children->length == 0);
|
860
|
+
} else {
|
861
|
+
assert(0);
|
862
|
+
}
|
748
863
|
|
749
|
-
|
750
|
-
|
751
|
-
|
752
|
-
|
753
|
-
|
754
|
-
|
755
|
-
|
756
|
-
|
757
|
-
|
758
|
-
|
759
|
-
GumboNode* table_element = open_elements->data[i];
|
760
|
-
if (node_html_tag_is(table_element, GUMBO_TAG_TABLE)) {
|
761
|
-
foster_parent_element = table_element->parent;
|
762
|
-
if (!foster_parent_element ||
|
763
|
-
foster_parent_element->type != GUMBO_NODE_ELEMENT) {
|
764
|
-
// Table has no parent; spec says it's possible if a script manipulated
|
765
|
-
// the DOM, although I don't think we have to worry about this case.
|
766
|
-
gumbo_debug("Table has no parent.\n");
|
767
|
-
foster_parent_element = open_elements->data[i - 1];
|
768
|
-
break;
|
769
|
-
}
|
770
|
-
assert(foster_parent_element->type == GUMBO_NODE_ELEMENT);
|
771
|
-
gumbo_debug("Found enclosing table (%x) at %d; parent=%s, index=%d.\n",
|
772
|
-
table_element, i, gumbo_normalized_tagname(
|
773
|
-
foster_parent_element->v.element.tag),
|
774
|
-
table_element->index_within_parent);
|
775
|
-
assert(foster_parent_element->v.element.children.data[
|
776
|
-
table_element->index_within_parent] == table_element);
|
777
|
-
insert_node(parser, foster_parent_element,
|
778
|
-
table_element->index_within_parent, node);
|
779
|
-
return;
|
864
|
+
assert(index >= 0);
|
865
|
+
assert((unsigned int) index < children->length);
|
866
|
+
node->parent = parent;
|
867
|
+
node->index_within_parent = index;
|
868
|
+
gumbo_vector_insert_at(parser, (void*) node, index, children);
|
869
|
+
assert(node->index_within_parent < children->length);
|
870
|
+
for (unsigned int i = index + 1; i < children->length; ++i) {
|
871
|
+
GumboNode* sibling = children->data[i];
|
872
|
+
sibling->index_within_parent = i;
|
873
|
+
assert(sibling->index_within_parent < children->length);
|
780
874
|
}
|
875
|
+
} else {
|
876
|
+
append_node(parser, parent, node);
|
781
877
|
}
|
782
|
-
if (node->type == GUMBO_NODE_ELEMENT) {
|
783
|
-
gumbo_vector_add(parser, (void*) node, open_elements);
|
784
|
-
}
|
785
|
-
append_node(parser, foster_parent_element, node);
|
786
878
|
}
|
787
879
|
|
788
880
|
static void maybe_flush_text_node_buffer(GumboParser* parser) {
|
@@ -797,27 +889,27 @@ static void maybe_flush_text_node_buffer(GumboParser* parser) {
|
|
797
889
|
buffer_state->_type == GUMBO_NODE_CDATA);
|
798
890
|
GumboNode* text_node = create_node(parser, buffer_state->_type);
|
799
891
|
GumboText* text_node_data = &text_node->v.text;
|
800
|
-
text_node_data->text =
|
801
|
-
parser, &buffer_state->_buffer);
|
892
|
+
text_node_data->text =
|
893
|
+
gumbo_string_buffer_to_string(parser, &buffer_state->_buffer);
|
802
894
|
text_node_data->original_text.data = buffer_state->_start_original_text;
|
803
895
|
text_node_data->original_text.length =
|
804
896
|
state->_current_token->original_text.data -
|
805
897
|
buffer_state->_start_original_text;
|
806
898
|
text_node_data->start_pos = buffer_state->_start_position;
|
807
|
-
|
808
|
-
|
809
|
-
|
810
|
-
|
899
|
+
|
900
|
+
gumbo_debug("Flushing text node buffer of %.*s.\n",
|
901
|
+
(int) buffer_state->_buffer.length, buffer_state->_buffer.data);
|
902
|
+
|
903
|
+
InsertionLocation location = get_appropriate_insertion_location(parser, NULL);
|
904
|
+
if (location.target->type == GUMBO_NODE_DOCUMENT) {
|
905
|
+
// The DOM does not allow Document nodes to have Text children, so per the
|
906
|
+
// spec, they are dropped on the floor.
|
907
|
+
destroy_node(parser, text_node);
|
811
908
|
} else {
|
812
|
-
|
813
|
-
parser, parser->_output->root ?
|
814
|
-
get_current_node(parser) : parser->_output->document, text_node);
|
909
|
+
insert_node(parser, text_node, location);
|
815
910
|
}
|
816
|
-
gumbo_debug("Flushing text node buffer of %.*s.\n",
|
817
|
-
(int) buffer_state->_buffer.length, buffer_state->_buffer.data);
|
818
911
|
|
819
|
-
|
820
|
-
gumbo_string_buffer_init(parser, &buffer_state->_buffer);
|
912
|
+
gumbo_string_buffer_clear(parser, &buffer_state->_buffer);
|
821
913
|
buffer_state->_type = GUMBO_NODE_WHITESPACE;
|
822
914
|
assert(buffer_state->_buffer.length == 0);
|
823
915
|
}
|
@@ -825,9 +917,9 @@ static void maybe_flush_text_node_buffer(GumboParser* parser) {
|
|
825
917
|
static void record_end_of_element(
|
826
918
|
GumboToken* current_token, GumboElement* element) {
|
827
919
|
element->end_pos = current_token->position;
|
828
|
-
element->original_end_tag =
|
829
|
-
|
830
|
-
|
920
|
+
element->original_end_tag = current_token->type == GUMBO_TOKEN_END_TAG
|
921
|
+
? current_token->original_text
|
922
|
+
: kGumboEmptyString;
|
831
923
|
}
|
832
924
|
|
833
925
|
static GumboNode* pop_current_node(GumboParser* parser) {
|
@@ -835,8 +927,7 @@ static GumboNode* pop_current_node(GumboParser* parser) {
|
|
835
927
|
maybe_flush_text_node_buffer(parser);
|
836
928
|
if (state->_open_elements.length > 0) {
|
837
929
|
assert(node_html_tag_is(state->_open_elements.data[0], GUMBO_TAG_HTML));
|
838
|
-
gumbo_debug(
|
839
|
-
"Popping %s node.\n",
|
930
|
+
gumbo_debug("Popping %s node.\n",
|
840
931
|
gumbo_normalized_tagname(get_current_node(parser)->v.element.tag));
|
841
932
|
}
|
842
933
|
GumboNode* current_node = gumbo_vector_pop(parser, &state->_open_elements);
|
@@ -844,13 +935,16 @@ static GumboNode* pop_current_node(GumboParser* parser) {
|
|
844
935
|
assert(state->_open_elements.length == 0);
|
845
936
|
return NULL;
|
846
937
|
}
|
847
|
-
assert(current_node->type == GUMBO_NODE_ELEMENT
|
938
|
+
assert(current_node->type == GUMBO_NODE_ELEMENT ||
|
939
|
+
current_node->type == GUMBO_NODE_TEMPLATE);
|
848
940
|
bool is_closed_body_or_html_tag =
|
849
|
-
(node_html_tag_is(current_node, GUMBO_TAG_BODY) &&
|
850
|
-
|
941
|
+
(node_html_tag_is(current_node, GUMBO_TAG_BODY) &&
|
942
|
+
state->_closed_body_tag) ||
|
943
|
+
(node_html_tag_is(current_node, GUMBO_TAG_HTML) &&
|
944
|
+
state->_closed_html_tag);
|
851
945
|
if ((state->_current_token->type != GUMBO_TOKEN_END_TAG ||
|
852
|
-
|
853
|
-
|
946
|
+
!node_html_tag_is(current_node, state->_current_token->v.end_tag)) &&
|
947
|
+
!is_closed_body_or_html_tag) {
|
854
948
|
current_node->parse_flags |= GUMBO_INSERTION_IMPLICIT_END_TAG;
|
855
949
|
}
|
856
950
|
if (!is_closed_body_or_html_tag) {
|
@@ -873,22 +967,25 @@ static void append_comment_node(
|
|
873
967
|
|
874
968
|
// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#clear-the-stack-back-to-a-table-row-context
|
875
969
|
static void clear_stack_to_table_row_context(GumboParser* parser) {
|
876
|
-
while (!node_tag_in_set(get_current_node(parser),
|
970
|
+
while (!node_tag_in_set(get_current_node(parser),
|
971
|
+
(gumbo_tagset){TAG(HTML), TAG(TR), TAG(TEMPLATE)})) {
|
877
972
|
pop_current_node(parser);
|
878
973
|
}
|
879
974
|
}
|
880
975
|
|
881
976
|
// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#clear-the-stack-back-to-a-table-context
|
882
977
|
static void clear_stack_to_table_context(GumboParser* parser) {
|
883
|
-
while (!node_tag_in_set(get_current_node(parser),
|
978
|
+
while (!node_tag_in_set(get_current_node(parser),
|
979
|
+
(gumbo_tagset){TAG(HTML), TAG(TABLE), TAG(TEMPLATE)})) {
|
884
980
|
pop_current_node(parser);
|
885
981
|
}
|
886
982
|
}
|
887
983
|
|
888
984
|
// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#clear-the-stack-back-to-a-table-body-context
|
889
985
|
void clear_stack_to_table_body_context(GumboParser* parser) {
|
890
|
-
while (!node_tag_in_set(get_current_node(parser),
|
891
|
-
|
986
|
+
while (!node_tag_in_set(get_current_node(parser),
|
987
|
+
(gumbo_tagset){TAG(HTML), TAG(TBODY), TAG(TFOOT), TAG(THEAD),
|
988
|
+
TAG(TEMPLATE)})) {
|
892
989
|
pop_current_node(parser);
|
893
990
|
}
|
894
991
|
}
|
@@ -903,7 +1000,9 @@ static GumboNode* create_element(GumboParser* parser, GumboTag tag) {
|
|
903
1000
|
element->tag_namespace = GUMBO_NAMESPACE_HTML;
|
904
1001
|
element->original_tag = kGumboEmptyString;
|
905
1002
|
element->original_end_tag = kGumboEmptyString;
|
906
|
-
element->start_pos = parser->_parser_state->_current_token
|
1003
|
+
element->start_pos = (parser->_parser_state->_current_token)
|
1004
|
+
? parser->_parser_state->_current_token->position
|
1005
|
+
: kGumboEmptySourcePosition;
|
907
1006
|
element->end_pos = kGumboEmptySourcePosition;
|
908
1007
|
return node;
|
909
1008
|
}
|
@@ -914,7 +1013,12 @@ static GumboNode* create_element_from_token(
|
|
914
1013
|
assert(token->type == GUMBO_TOKEN_START_TAG);
|
915
1014
|
GumboTokenStartTag* start_tag = &token->v.start_tag;
|
916
1015
|
|
917
|
-
|
1016
|
+
GumboNodeType type = (tag_namespace == GUMBO_NAMESPACE_HTML &&
|
1017
|
+
start_tag->tag == GUMBO_TAG_TEMPLATE)
|
1018
|
+
? GUMBO_NODE_TEMPLATE
|
1019
|
+
: GUMBO_NODE_ELEMENT;
|
1020
|
+
|
1021
|
+
GumboNode* node = create_node(parser, type);
|
918
1022
|
GumboElement* element = &node->v.element;
|
919
1023
|
gumbo_vector_init(parser, 1, &element->children);
|
920
1024
|
element->attributes = start_tag->attributes;
|
@@ -937,7 +1041,7 @@ static GumboNode* create_element_from_token(
|
|
937
1041
|
|
938
1042
|
// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#insert-an-html-element
|
939
1043
|
static void insert_element(GumboParser* parser, GumboNode* node,
|
940
|
-
|
1044
|
+
bool is_reconstructing_formatting_elements) {
|
941
1045
|
GumboParserState* state = parser->_parser_state;
|
942
1046
|
// NOTE(jdtang): The text node buffer must always be flushed before inserting
|
943
1047
|
// a node, otherwise we're handling nodes in a different order than the spec
|
@@ -951,20 +1055,8 @@ static void insert_element(GumboParser* parser, GumboNode* node,
|
|
951
1055
|
if (!is_reconstructing_formatting_elements) {
|
952
1056
|
maybe_flush_text_node_buffer(parser);
|
953
1057
|
}
|
954
|
-
|
955
|
-
|
956
|
-
TAG(THEAD), TAG(TR) } )) {
|
957
|
-
foster_parent_element(parser, node);
|
958
|
-
gumbo_vector_add(parser, (void*) node, &state->_open_elements);
|
959
|
-
return;
|
960
|
-
}
|
961
|
-
|
962
|
-
// This is called to insert the root HTML element, but get_current_node
|
963
|
-
// assumes the stack of open elements is non-empty, so we need special
|
964
|
-
// handling for this case.
|
965
|
-
append_node(
|
966
|
-
parser, parser->_output->root ?
|
967
|
-
get_current_node(parser) : parser->_output->document, node);
|
1058
|
+
InsertionLocation location = get_appropriate_insertion_location(parser, NULL);
|
1059
|
+
insert_node(parser, node, location);
|
968
1060
|
gumbo_vector_add(parser, (void*) node, &state->_open_elements);
|
969
1061
|
}
|
970
1062
|
|
@@ -977,7 +1069,7 @@ static GumboNode* insert_element_from_token(
|
|
977
1069
|
create_element_from_token(parser, token, GUMBO_NAMESPACE_HTML);
|
978
1070
|
insert_element(parser, element, false);
|
979
1071
|
gumbo_debug("Inserting <%s> element (@%x) from token.\n",
|
980
|
-
|
1072
|
+
gumbo_normalized_tagname(element->v.element.tag), element);
|
981
1073
|
return element;
|
982
1074
|
}
|
983
1075
|
|
@@ -990,7 +1082,7 @@ static GumboNode* insert_element_of_tag_type(
|
|
990
1082
|
element->parse_flags |= GUMBO_INSERTION_BY_PARSER | reason;
|
991
1083
|
insert_element(parser, element, false);
|
992
1084
|
gumbo_debug("Inserting %s element (@%x) from tag type.\n",
|
993
|
-
|
1085
|
+
gumbo_normalized_tagname(tag), element);
|
994
1086
|
return element;
|
995
1087
|
}
|
996
1088
|
|
@@ -1002,16 +1094,14 @@ static GumboNode* insert_foreign_element(
|
|
1002
1094
|
GumboNode* element = create_element_from_token(parser, token, tag_namespace);
|
1003
1095
|
insert_element(parser, element, false);
|
1004
1096
|
if (token_has_attribute(token, "xmlns") &&
|
1005
|
-
!attribute_matches_case_sensitive(
|
1006
|
-
&token->v.start_tag.attributes, "xmlns",
|
1097
|
+
!attribute_matches_case_sensitive(&token->v.start_tag.attributes, "xmlns",
|
1007
1098
|
kLegalXmlns[tag_namespace])) {
|
1008
1099
|
// TODO(jdtang): Since there're multiple possible error codes here, we
|
1009
1100
|
// eventually need reason codes to differentiate them.
|
1010
1101
|
parser_add_parse_error(parser, token);
|
1011
1102
|
}
|
1012
1103
|
if (token_has_attribute(token, "xmlns:xlink") &&
|
1013
|
-
!attribute_matches_case_sensitive(
|
1014
|
-
&token->v.start_tag.attributes,
|
1104
|
+
!attribute_matches_case_sensitive(&token->v.start_tag.attributes,
|
1015
1105
|
"xmlns:xlink", "http://www.w3.org/1999/xlink")) {
|
1016
1106
|
parser_add_parse_error(parser, token);
|
1017
1107
|
}
|
@@ -1021,8 +1111,7 @@ static GumboNode* insert_foreign_element(
|
|
1021
1111
|
static void insert_text_token(GumboParser* parser, GumboToken* token) {
|
1022
1112
|
assert(token->type == GUMBO_TOKEN_WHITESPACE ||
|
1023
1113
|
token->type == GUMBO_TOKEN_CHARACTER ||
|
1024
|
-
token->type == GUMBO_TOKEN_NULL ||
|
1025
|
-
token->type == GUMBO_TOKEN_CDATA);
|
1114
|
+
token->type == GUMBO_TOKEN_NULL || token->type == GUMBO_TOKEN_CDATA);
|
1026
1115
|
TextNodeBufferState* buffer_state = &parser->_parser_state->_text_node;
|
1027
1116
|
if (buffer_state->_buffer.length == 0) {
|
1028
1117
|
// Initialize position fields.
|
@@ -1057,7 +1146,7 @@ static void acknowledge_self_closing_tag(GumboParser* parser) {
|
|
1057
1146
|
// elements, and fills in its index if so.
|
1058
1147
|
static bool find_last_anchor_index(GumboParser* parser, int* anchor_index) {
|
1059
1148
|
GumboVector* elements = &parser->_parser_state->_active_formatting_elements;
|
1060
|
-
for (int i = elements->length; --i >= 0;
|
1149
|
+
for (int i = elements->length; --i >= 0;) {
|
1061
1150
|
GumboNode* node = elements->data[i];
|
1062
1151
|
if (node == &kActiveFormattingScopeMarker) {
|
1063
1152
|
return false;
|
@@ -1074,21 +1163,21 @@ static bool find_last_anchor_index(GumboParser* parser, int* anchor_index) {
|
|
1074
1163
|
// formatting elements (after the last active scope marker) that have a specific
|
1075
1164
|
// tag. If this is > 0, then earliest_matching_index will be filled in with the
|
1076
1165
|
// index of the first such element.
|
1077
|
-
static int count_formatting_elements_of_tag(
|
1078
|
-
|
1079
|
-
int* earliest_matching_index) {
|
1166
|
+
static int count_formatting_elements_of_tag(GumboParser* parser,
|
1167
|
+
const GumboNode* desired_node, int* earliest_matching_index) {
|
1080
1168
|
const GumboElement* desired_element = &desired_node->v.element;
|
1081
1169
|
GumboVector* elements = &parser->_parser_state->_active_formatting_elements;
|
1082
1170
|
int num_identical_elements = 0;
|
1083
|
-
for (int i = elements->length; --i >= 0;
|
1171
|
+
for (int i = elements->length; --i >= 0;) {
|
1084
1172
|
GumboNode* node = elements->data[i];
|
1085
1173
|
if (node == &kActiveFormattingScopeMarker) {
|
1086
1174
|
break;
|
1087
1175
|
}
|
1088
1176
|
assert(node->type == GUMBO_NODE_ELEMENT);
|
1089
|
-
if (node_qualified_tag_is(
|
1090
|
-
|
1091
|
-
|
1177
|
+
if (node_qualified_tag_is(
|
1178
|
+
node, desired_element->tag_namespace, desired_element->tag) &&
|
1179
|
+
all_attributes_match(
|
1180
|
+
&node->v.element.attributes, &desired_element->attributes)) {
|
1092
1181
|
num_identical_elements++;
|
1093
1182
|
*earliest_matching_index = i;
|
1094
1183
|
}
|
@@ -1115,7 +1204,7 @@ static void add_formatting_element(GumboParser* parser, const GumboNode* node) {
|
|
1115
1204
|
// Noah's Ark clause: if there're at least 3, remove the earliest.
|
1116
1205
|
if (num_identical_elements >= 3) {
|
1117
1206
|
gumbo_debug("Noah's ark clause: removing element at %d.\n",
|
1118
|
-
|
1207
|
+
earliest_identical_element);
|
1119
1208
|
gumbo_vector_remove_at(parser, earliest_identical_element, elements);
|
1120
1209
|
}
|
1121
1210
|
|
@@ -1124,7 +1213,7 @@ static void add_formatting_element(GumboParser* parser, const GumboNode* node) {
|
|
1124
1213
|
|
1125
1214
|
static bool is_open_element(GumboParser* parser, const GumboNode* node) {
|
1126
1215
|
GumboVector* open_elements = &parser->_parser_state->_open_elements;
|
1127
|
-
for (int i = 0; i < open_elements->length; ++i) {
|
1216
|
+
for (unsigned int i = 0; i < open_elements->length; ++i) {
|
1128
1217
|
if (open_elements->data[i] == node) {
|
1129
1218
|
return true;
|
1130
1219
|
}
|
@@ -1136,8 +1225,8 @@ static bool is_open_element(GumboParser* parser, const GumboNode* node) {
|
|
1136
1225
|
// clone shares no structure with the original node: all owned strings and
|
1137
1226
|
// values are fresh copies.
|
1138
1227
|
GumboNode* clone_node(
|
1139
|
-
GumboParser* parser,
|
1140
|
-
assert(node->type == GUMBO_NODE_ELEMENT);
|
1228
|
+
GumboParser* parser, GumboNode* node, GumboParseFlags reason) {
|
1229
|
+
assert(node->type == GUMBO_NODE_ELEMENT || node->type == GUMBO_NODE_TEMPLATE);
|
1141
1230
|
GumboNode* new_node = gumbo_parser_allocate(parser, sizeof(GumboNode));
|
1142
1231
|
*new_node = *node;
|
1143
1232
|
new_node->parent = NULL;
|
@@ -1151,7 +1240,7 @@ GumboNode* clone_node(
|
|
1151
1240
|
|
1152
1241
|
const GumboVector* old_attributes = &node->v.element.attributes;
|
1153
1242
|
gumbo_vector_init(parser, old_attributes->length, &element->attributes);
|
1154
|
-
for (int i = 0; i < old_attributes->length; ++i) {
|
1243
|
+
for (unsigned int i = 0; i < old_attributes->length; ++i) {
|
1155
1244
|
const GumboAttribute* old_attr = old_attributes->data[i];
|
1156
1245
|
GumboAttribute* attr =
|
1157
1246
|
gumbo_parser_allocate(parser, sizeof(GumboAttribute));
|
@@ -1175,8 +1264,8 @@ static void reconstruct_active_formatting_elements(GumboParser* parser) {
|
|
1175
1264
|
}
|
1176
1265
|
|
1177
1266
|
// Step 2 & 3
|
1178
|
-
int i = elements->length - 1;
|
1179
|
-
|
1267
|
+
unsigned int i = elements->length - 1;
|
1268
|
+
GumboNode* element = elements->data[i];
|
1180
1269
|
if (element == &kActiveFormattingScopeMarker ||
|
1181
1270
|
is_open_element(parser, element)) {
|
1182
1271
|
return;
|
@@ -1186,7 +1275,7 @@ static void reconstruct_active_formatting_elements(GumboParser* parser) {
|
|
1186
1275
|
do {
|
1187
1276
|
if (i == 0) {
|
1188
1277
|
// Step 4
|
1189
|
-
i = -1;
|
1278
|
+
i = -1; // Incremented to 0 below.
|
1190
1279
|
break;
|
1191
1280
|
}
|
1192
1281
|
// Step 5
|
@@ -1196,9 +1285,8 @@ static void reconstruct_active_formatting_elements(GumboParser* parser) {
|
|
1196
1285
|
|
1197
1286
|
++i;
|
1198
1287
|
gumbo_debug("Reconstructing elements from %d on %s parent.\n", i,
|
1199
|
-
|
1200
|
-
|
1201
|
-
for(; i < elements->length; ++i) {
|
1288
|
+
gumbo_normalized_tagname(get_current_node(parser)->v.element.tag));
|
1289
|
+
for (; i < elements->length; ++i) {
|
1202
1290
|
// Step 7 & 8.
|
1203
1291
|
assert(elements->length > 0);
|
1204
1292
|
assert(i < elements->length);
|
@@ -1207,11 +1295,16 @@ static void reconstruct_active_formatting_elements(GumboParser* parser) {
|
|
1207
1295
|
GumboNode* clone = clone_node(
|
1208
1296
|
parser, element, GUMBO_INSERTION_RECONSTRUCTED_FORMATTING_ELEMENT);
|
1209
1297
|
// Step 9.
|
1210
|
-
|
1298
|
+
InsertionLocation location =
|
1299
|
+
get_appropriate_insertion_location(parser, NULL);
|
1300
|
+
insert_node(parser, clone, location);
|
1301
|
+
gumbo_vector_add(
|
1302
|
+
parser, (void*) clone, &parser->_parser_state->_open_elements);
|
1303
|
+
|
1211
1304
|
// Step 10.
|
1212
1305
|
elements->data[i] = clone;
|
1213
1306
|
gumbo_debug("Reconstructed %s element at %d.\n",
|
1214
|
-
|
1307
|
+
gumbo_normalized_tagname(clone->v.element.tag), i);
|
1215
1308
|
}
|
1216
1309
|
}
|
1217
1310
|
|
@@ -1222,32 +1315,30 @@ static void clear_active_formatting_elements(GumboParser* parser) {
|
|
1222
1315
|
do {
|
1223
1316
|
node = gumbo_vector_pop(parser, elements);
|
1224
1317
|
++num_elements_cleared;
|
1225
|
-
} while(node && node != &kActiveFormattingScopeMarker);
|
1318
|
+
} while (node && node != &kActiveFormattingScopeMarker);
|
1226
1319
|
gumbo_debug("Cleared %d elements from active formatting list.\n",
|
1227
|
-
|
1320
|
+
num_elements_cleared);
|
1228
1321
|
}
|
1229
1322
|
|
1230
1323
|
// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#the-initial-insertion-mode
|
1231
1324
|
static GumboQuirksModeEnum compute_quirks_mode(
|
1232
1325
|
const GumboTokenDocType* doctype) {
|
1233
|
-
if (doctype->force_quirks ||
|
1234
|
-
|
1235
|
-
|
1236
|
-
|
1237
|
-
|
1238
|
-
|
1239
|
-
|
1240
|
-
kQuirksModeSystemIdExactMatches, true) ||
|
1326
|
+
if (doctype->force_quirks || strcmp(doctype->name, kDoctypeHtml.data) ||
|
1327
|
+
is_in_static_list(
|
1328
|
+
doctype->public_identifier, kQuirksModePublicIdPrefixes, false) ||
|
1329
|
+
is_in_static_list(
|
1330
|
+
doctype->public_identifier, kQuirksModePublicIdExactMatches, true) ||
|
1331
|
+
is_in_static_list(
|
1332
|
+
doctype->system_identifier, kQuirksModeSystemIdExactMatches, true) ||
|
1241
1333
|
(is_in_static_list(doctype->public_identifier,
|
1242
|
-
|
1243
|
-
|
1334
|
+
kLimitedQuirksRequiresSystemIdPublicIdPrefixes, false) &&
|
1335
|
+
!doctype->has_system_identifier)) {
|
1244
1336
|
return GUMBO_DOCTYPE_QUIRKS;
|
1245
|
-
} else if (
|
1246
|
-
|
1247
|
-
|
1248
|
-
|
1249
|
-
|
1250
|
-
&& doctype->has_system_identifier)) {
|
1337
|
+
} else if (is_in_static_list(doctype->public_identifier,
|
1338
|
+
kLimitedQuirksPublicIdPrefixes, false) ||
|
1339
|
+
(is_in_static_list(doctype->public_identifier,
|
1340
|
+
kLimitedQuirksRequiresSystemIdPublicIdPrefixes, false) &&
|
1341
|
+
doctype->has_system_identifier)) {
|
1251
1342
|
return GUMBO_DOCTYPE_LIMITED_QUIRKS;
|
1252
1343
|
}
|
1253
1344
|
return GUMBO_DOCTYPE_NO_QUIRKS;
|
@@ -1261,39 +1352,45 @@ static GumboQuirksModeEnum compute_quirks_mode(
|
|
1261
1352
|
// names. For example, "has an element in list scope" looks for an element of
|
1262
1353
|
// the given qualified name within the nearest enclosing <ol> or <ul>, along
|
1263
1354
|
// with a bunch of generic element types that serve to "firewall" their content
|
1264
|
-
// from the rest of the document.
|
1265
|
-
|
1355
|
+
// from the rest of the document. Note that because of the way the spec is
|
1356
|
+
// written,
|
1357
|
+
// all elements are expected to be in the HTML namespace
|
1358
|
+
static bool has_an_element_in_specific_scope(GumboParser* parser,
|
1359
|
+
int expected_size, const GumboTag* expected, bool negate,
|
1360
|
+
const gumbo_tagset tags) {
|
1266
1361
|
GumboVector* open_elements = &parser->_parser_state->_open_elements;
|
1267
|
-
|
1268
|
-
for (int i = open_elements->length; --i >= 0; ) {
|
1362
|
+
for (int i = open_elements->length; --i >= 0;) {
|
1269
1363
|
const GumboNode* node = open_elements->data[i];
|
1270
|
-
if (node->type != GUMBO_NODE_ELEMENT)
|
1364
|
+
if (node->type != GUMBO_NODE_ELEMENT && node->type != GUMBO_NODE_TEMPLATE)
|
1271
1365
|
continue;
|
1366
|
+
|
1367
|
+
GumboTag node_tag = node->v.element.tag;
|
1368
|
+
GumboNamespaceEnum node_ns = node->v.element.tag_namespace;
|
1369
|
+
for (int j = 0; j < expected_size; ++j) {
|
1370
|
+
if (node_tag == expected[j] && node_ns == GUMBO_NAMESPACE_HTML)
|
1371
|
+
return true;
|
1272
1372
|
}
|
1273
|
-
|
1274
|
-
|
1275
|
-
|
1276
|
-
bool found_qualname = false;
|
1277
|
-
if (TAGSET_INCLUDES(tags, node->v.element.tag_namespace, node->v.element.tag)) {
|
1278
|
-
found_qualname = true;
|
1279
|
-
}
|
1280
|
-
if (negate != found_qualname) {
|
1281
|
-
result = false;
|
1282
|
-
return result;
|
1283
|
-
}
|
1373
|
+
|
1374
|
+
bool found = TAGSET_INCLUDES(tags, node_ns, node_tag);
|
1375
|
+
if (negate != found) return false;
|
1284
1376
|
}
|
1285
|
-
return
|
1377
|
+
return false;
|
1378
|
+
}
|
1379
|
+
|
1380
|
+
// Checks for the presence of an open element of the specified tag type.
|
1381
|
+
static bool has_open_element(GumboParser* parser, GumboTag tag) {
|
1382
|
+
return has_an_element_in_specific_scope(
|
1383
|
+
parser, 1, &tag, false, (gumbo_tagset){TAG(HTML)});
|
1286
1384
|
}
|
1287
1385
|
|
1288
1386
|
// http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#has-an-element-in-scope
|
1289
1387
|
static bool has_an_element_in_scope(GumboParser* parser, GumboTag tag) {
|
1290
|
-
|
1291
|
-
|
1292
|
-
|
1293
|
-
|
1294
|
-
|
1295
|
-
|
1296
|
-
TAG_SVG(FOREIGNOBJECT), TAG_SVG(DESC), TAG_SVG(TITLE) });
|
1388
|
+
return has_an_element_in_specific_scope(parser, 1, &tag, false,
|
1389
|
+
(gumbo_tagset){TAG(APPLET), TAG(CAPTION), TAG(HTML), TAG(TABLE), TAG(TD),
|
1390
|
+
TAG(TH), TAG(MARQUEE), TAG(OBJECT), TAG(TEMPLATE), TAG_MATHML(MI),
|
1391
|
+
TAG_MATHML(MO), TAG_MATHML(MN), TAG_MATHML(MS), TAG_MATHML(MTEXT),
|
1392
|
+
TAG_MATHML(ANNOTATION_XML), TAG_SVG(FOREIGNOBJECT), TAG_SVG(DESC),
|
1393
|
+
TAG_SVG(TITLE)});
|
1297
1394
|
}
|
1298
1395
|
|
1299
1396
|
// Like "has an element in scope", but for the specific case of looking for a
|
@@ -1304,19 +1401,21 @@ static bool has_an_element_in_scope(GumboParser* parser, GumboTag tag) {
|
|
1304
1401
|
// parameterize it.
|
1305
1402
|
static bool has_node_in_scope(GumboParser* parser, const GumboNode* node) {
|
1306
1403
|
GumboVector* open_elements = &parser->_parser_state->_open_elements;
|
1307
|
-
for (int i = open_elements->length; --i >= 0;
|
1404
|
+
for (int i = open_elements->length; --i >= 0;) {
|
1308
1405
|
const GumboNode* current = open_elements->data[i];
|
1309
1406
|
if (current == node) {
|
1310
1407
|
return true;
|
1311
1408
|
}
|
1312
|
-
if (current->type != GUMBO_NODE_ELEMENT
|
1409
|
+
if (current->type != GUMBO_NODE_ELEMENT &&
|
1410
|
+
current->type != GUMBO_NODE_TEMPLATE) {
|
1313
1411
|
continue;
|
1314
1412
|
}
|
1315
|
-
if (node_tag_in_set(current,
|
1316
|
-
|
1317
|
-
|
1318
|
-
|
1319
|
-
|
1413
|
+
if (node_tag_in_set(current,
|
1414
|
+
(gumbo_tagset){TAG(APPLET), TAG(CAPTION), TAG(HTML), TAG(TABLE),
|
1415
|
+
TAG(TD), TAG(TH), TAG(MARQUEE), TAG(OBJECT), TAG(TEMPLATE),
|
1416
|
+
TAG_MATHML(MI), TAG_MATHML(MO), TAG_MATHML(MN), TAG_MATHML(MS),
|
1417
|
+
TAG_MATHML(MTEXT), TAG_MATHML(ANNOTATION_XML),
|
1418
|
+
TAG_SVG(FOREIGNOBJECT), TAG_SVG(DESC), TAG_SVG(TITLE)})) {
|
1320
1419
|
return false;
|
1321
1420
|
}
|
1322
1421
|
}
|
@@ -1326,60 +1425,70 @@ static bool has_node_in_scope(GumboParser* parser, const GumboNode* node) {
|
|
1326
1425
|
|
1327
1426
|
// Like has_an_element_in_scope, but restricts the expected qualified name to a
|
1328
1427
|
// range of possible qualified names instead of just a single one.
|
1329
|
-
static bool has_an_element_in_scope_with_tagname(
|
1330
|
-
|
1331
|
-
|
1332
|
-
|
1333
|
-
|
1334
|
-
|
1428
|
+
static bool has_an_element_in_scope_with_tagname(
|
1429
|
+
GumboParser* parser, int expected_len, const GumboTag expected[]) {
|
1430
|
+
return has_an_element_in_specific_scope(parser, expected_len, expected, false,
|
1431
|
+
(gumbo_tagset){TAG(APPLET), TAG(CAPTION), TAG(HTML), TAG(TABLE), TAG(TD),
|
1432
|
+
TAG(TH), TAG(MARQUEE), TAG(OBJECT), TAG(TEMPLATE), TAG_MATHML(MI),
|
1433
|
+
TAG_MATHML(MO), TAG_MATHML(MN), TAG_MATHML(MS), TAG_MATHML(MTEXT),
|
1434
|
+
TAG_MATHML(ANNOTATION_XML), TAG_SVG(FOREIGNOBJECT), TAG_SVG(DESC),
|
1435
|
+
TAG_SVG(TITLE)});
|
1335
1436
|
}
|
1336
1437
|
|
1337
1438
|
// http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#has-an-element-in-list-item-scope
|
1338
1439
|
static bool has_an_element_in_list_scope(GumboParser* parser, GumboTag tag) {
|
1339
|
-
|
1340
|
-
|
1341
|
-
|
1342
|
-
|
1343
|
-
|
1344
|
-
|
1345
|
-
TAG_SVG(FOREIGNOBJECT), TAG_SVG(DESC), TAG_SVG(TITLE), TAG(OL),
|
1346
|
-
TAG(UL) });
|
1440
|
+
return has_an_element_in_specific_scope(parser, 1, &tag, false,
|
1441
|
+
(gumbo_tagset){TAG(APPLET), TAG(CAPTION), TAG(HTML), TAG(TABLE), TAG(TD),
|
1442
|
+
TAG(TH), TAG(MARQUEE), TAG(OBJECT), TAG(TEMPLATE), TAG_MATHML(MI),
|
1443
|
+
TAG_MATHML(MO), TAG_MATHML(MN), TAG_MATHML(MS), TAG_MATHML(MTEXT),
|
1444
|
+
TAG_MATHML(ANNOTATION_XML), TAG_SVG(FOREIGNOBJECT), TAG_SVG(DESC),
|
1445
|
+
TAG_SVG(TITLE), TAG(OL), TAG(UL)});
|
1347
1446
|
}
|
1348
1447
|
|
1349
1448
|
// http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#has-an-element-in-button-scope
|
1350
1449
|
static bool has_an_element_in_button_scope(GumboParser* parser, GumboTag tag) {
|
1351
|
-
|
1352
|
-
|
1353
|
-
|
1354
|
-
|
1355
|
-
|
1356
|
-
|
1357
|
-
TAG_SVG(FOREIGNOBJECT), TAG_SVG(DESC), TAG_SVG(TITLE), TAG(BUTTON) });
|
1450
|
+
return has_an_element_in_specific_scope(parser, 1, &tag, false,
|
1451
|
+
(gumbo_tagset){TAG(APPLET), TAG(CAPTION), TAG(HTML), TAG(TABLE), TAG(TD),
|
1452
|
+
TAG(TH), TAG(MARQUEE), TAG(OBJECT), TAG(TEMPLATE), TAG_MATHML(MI),
|
1453
|
+
TAG_MATHML(MO), TAG_MATHML(MN), TAG_MATHML(MS), TAG_MATHML(MTEXT),
|
1454
|
+
TAG_MATHML(ANNOTATION_XML), TAG_SVG(FOREIGNOBJECT), TAG_SVG(DESC),
|
1455
|
+
TAG_SVG(TITLE), TAG(BUTTON)});
|
1358
1456
|
}
|
1359
1457
|
|
1360
1458
|
// http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#has-an-element-in-table-scope
|
1361
1459
|
static bool has_an_element_in_table_scope(GumboParser* parser, GumboTag tag) {
|
1362
|
-
|
1363
|
-
|
1364
|
-
return has_an_element_in_specific_scope(parser, qualset, false, (gumbo_tagset) { TAG(HTML), TAG(TABLE) });
|
1460
|
+
return has_an_element_in_specific_scope(parser, 1, &tag, false,
|
1461
|
+
(gumbo_tagset){TAG(HTML), TAG(TABLE), TAG(TEMPLATE)});
|
1365
1462
|
}
|
1366
1463
|
|
1367
1464
|
// http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#has-an-element-in-select-scope
|
1368
1465
|
static bool has_an_element_in_select_scope(GumboParser* parser, GumboTag tag) {
|
1369
|
-
|
1370
|
-
|
1371
|
-
return has_an_element_in_specific_scope(parser, qualset, true, (gumbo_tagset) { TAG(OPTGROUP), TAG(OPTION) });
|
1466
|
+
return has_an_element_in_specific_scope(
|
1467
|
+
parser, 1, &tag, true, (gumbo_tagset){TAG(OPTGROUP), TAG(OPTION)});
|
1372
1468
|
}
|
1373
1469
|
|
1374
1470
|
// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#generate-implied-end-tags
|
1375
1471
|
// "exception" is the "element to exclude from the process" listed in the spec.
|
1376
1472
|
// Pass GUMBO_TAG_LAST to not exclude any of them.
|
1377
1473
|
static void generate_implied_end_tags(GumboParser* parser, GumboTag exception) {
|
1378
|
-
for (;
|
1379
|
-
|
1380
|
-
|
1381
|
-
|
1382
|
-
pop_current_node(parser))
|
1474
|
+
for (; node_tag_in_set(get_current_node(parser),
|
1475
|
+
(gumbo_tagset){TAG(DD), TAG(DT), TAG(LI), TAG(OPTION),
|
1476
|
+
TAG(OPTGROUP), TAG(P), TAG(RP), TAG(RB), TAG(RT), TAG(RTC)}) &&
|
1477
|
+
!node_html_tag_is(get_current_node(parser), exception);
|
1478
|
+
pop_current_node(parser))
|
1479
|
+
;
|
1480
|
+
}
|
1481
|
+
|
1482
|
+
// This is the "generate all implied end tags thoroughly" clause of the spec.
|
1483
|
+
// https://html.spec.whatwg.org/multipage/syntax.html#closing-elements-that-have-implied-end-tags
|
1484
|
+
static void generate_all_implied_end_tags_thoroughly(GumboParser* parser) {
|
1485
|
+
for (
|
1486
|
+
; node_tag_in_set(get_current_node(parser),
|
1487
|
+
(gumbo_tagset){TAG(CAPTION), TAG(COLGROUP), TAG(DD), TAG(DT), TAG(LI),
|
1488
|
+
TAG(OPTION), TAG(OPTGROUP), TAG(P), TAG(RP), TAG(RT), TAG(RTC),
|
1489
|
+
TAG(TBODY), TAG(TD), TAG(TFOOT), TAG(TH), TAG(HEAD), TAG(TR)});
|
1490
|
+
pop_current_node(parser))
|
1491
|
+
;
|
1383
1492
|
}
|
1384
1493
|
|
1385
1494
|
// This factors out the clauses relating to "act as if an end tag token with tag
|
@@ -1401,8 +1510,8 @@ static bool close_table(GumboParser* parser) {
|
|
1401
1510
|
|
1402
1511
|
// This factors out the clauses relating to "act as if an end tag token with tag
|
1403
1512
|
// name `cell_tag` had been seen".
|
1404
|
-
static bool close_table_cell(
|
1405
|
-
|
1513
|
+
static bool close_table_cell(
|
1514
|
+
GumboParser* parser, const GumboToken* token, GumboTag cell_tag) {
|
1406
1515
|
bool result = true;
|
1407
1516
|
generate_implied_end_tags(parser, GUMBO_TAG_LAST);
|
1408
1517
|
const GumboNode* node = get_current_node(parser);
|
@@ -1446,38 +1555,43 @@ static void close_current_select(GumboParser* parser) {
|
|
1446
1555
|
// The list of nodes in the "special" category:
|
1447
1556
|
// http://www.whatwg.org/specs/web-apps/current-work/complete/parsing.html#special
|
1448
1557
|
static bool is_special_node(const GumboNode* node) {
|
1449
|
-
assert(node->type == GUMBO_NODE_ELEMENT);
|
1450
|
-
return node_tag_in_set(node,
|
1451
|
-
|
1452
|
-
|
1453
|
-
|
1454
|
-
|
1455
|
-
|
1456
|
-
|
1457
|
-
|
1458
|
-
|
1459
|
-
|
1460
|
-
|
1461
|
-
|
1462
|
-
|
1463
|
-
|
1464
|
-
|
1465
|
-
|
1466
|
-
|
1467
|
-
|
1468
|
-
|
1469
|
-
|
1470
|
-
|
1558
|
+
assert(node->type == GUMBO_NODE_ELEMENT || node->type == GUMBO_NODE_TEMPLATE);
|
1559
|
+
return node_tag_in_set(node,
|
1560
|
+
(gumbo_tagset){TAG(ADDRESS), TAG(APPLET), TAG(AREA), TAG(ARTICLE),
|
1561
|
+
TAG(ASIDE), TAG(BASE), TAG(BASEFONT), TAG(BGSOUND), TAG(BLOCKQUOTE),
|
1562
|
+
TAG(BODY), TAG(BR), TAG(BUTTON), TAG(CAPTION), TAG(CENTER), TAG(COL),
|
1563
|
+
TAG(COLGROUP), TAG(MENUITEM), TAG(DD), TAG(DETAILS), TAG(DIR),
|
1564
|
+
TAG(DIV), TAG(DL), TAG(DT), TAG(EMBED), TAG(FIELDSET),
|
1565
|
+
TAG(FIGCAPTION), TAG(FIGURE), TAG(FOOTER), TAG(FORM), TAG(FRAME),
|
1566
|
+
TAG(FRAMESET), TAG(H1), TAG(H2), TAG(H3), TAG(H4), TAG(H5), TAG(H6),
|
1567
|
+
TAG(HEAD), TAG(HEADER), TAG(HGROUP), TAG(HR), TAG(HTML), TAG(IFRAME),
|
1568
|
+
TAG(IMG), TAG(INPUT), TAG(ISINDEX), TAG(LI), TAG(LINK), TAG(LISTING),
|
1569
|
+
TAG(MARQUEE), TAG(MENU), TAG(META), TAG(NAV), TAG(NOEMBED),
|
1570
|
+
TAG(NOFRAMES), TAG(NOSCRIPT), TAG(OBJECT), TAG(OL), TAG(P),
|
1571
|
+
TAG(PARAM), TAG(PLAINTEXT), TAG(PRE), TAG(SCRIPT), TAG(SECTION),
|
1572
|
+
TAG(SELECT), TAG(STYLE), TAG(SUMMARY), TAG(TABLE), TAG(TBODY),
|
1573
|
+
TAG(TD), TAG(TEMPLATE), TAG(TEXTAREA), TAG(TFOOT), TAG(TH),
|
1574
|
+
TAG(THEAD), TAG(TITLE), TAG(TR), TAG(UL), TAG(WBR), TAG(XMP),
|
1575
|
+
|
1576
|
+
TAG_MATHML(MI), TAG_MATHML(MO), TAG_MATHML(MN), TAG_MATHML(MS),
|
1577
|
+
TAG_MATHML(MTEXT), TAG_MATHML(ANNOTATION_XML),
|
1578
|
+
|
1579
|
+
TAG_SVG(FOREIGNOBJECT), TAG_SVG(DESC)});
|
1580
|
+
}
|
1581
|
+
|
1582
|
+
// Implicitly closes currently open elements until it reaches an element with
|
1583
|
+
// the
|
1471
1584
|
// specified qualified name. If the elements closed are in the set handled by
|
1472
1585
|
// generate_implied_end_tags, this is normal operation and this function returns
|
1473
1586
|
// true. Otherwise, a parse error is recorded and this function returns false.
|
1474
|
-
static bool implicitly_close_tags(
|
1475
|
-
|
1587
|
+
static bool implicitly_close_tags(GumboParser* parser, GumboToken* token,
|
1588
|
+
GumboNamespaceEnum target_ns, GumboTag target) {
|
1476
1589
|
bool result = true;
|
1477
1590
|
generate_implied_end_tags(parser, target);
|
1478
1591
|
if (!node_qualified_tag_is(get_current_node(parser), target_ns, target)) {
|
1479
1592
|
parser_add_parse_error(parser, token);
|
1480
|
-
while (
|
1593
|
+
while (
|
1594
|
+
!node_qualified_tag_is(get_current_node(parser), target_ns, target)) {
|
1481
1595
|
pop_current_node(parser);
|
1482
1596
|
}
|
1483
1597
|
result = false;
|
@@ -1491,9 +1605,11 @@ static bool implicitly_close_tags(
|
|
1491
1605
|
// a </p> tag was encountered, implicitly closing tags. Returns false if a
|
1492
1606
|
// parse error occurs. This is a convenience function because this particular
|
1493
1607
|
// clause appears several times in the spec.
|
1494
|
-
static bool maybe_implicitly_close_p_tag(
|
1608
|
+
static bool maybe_implicitly_close_p_tag(
|
1609
|
+
GumboParser* parser, GumboToken* token) {
|
1495
1610
|
if (has_an_element_in_button_scope(parser, GUMBO_TAG_P)) {
|
1496
|
-
return implicitly_close_tags(
|
1611
|
+
return implicitly_close_tags(
|
1612
|
+
parser, token, GUMBO_NAMESPACE_HTML, GUMBO_TAG_P);
|
1497
1613
|
}
|
1498
1614
|
return true;
|
1499
1615
|
}
|
@@ -1504,17 +1620,19 @@ static void maybe_implicitly_close_list_tag(
|
|
1504
1620
|
GumboParser* parser, GumboToken* token, bool is_li) {
|
1505
1621
|
GumboParserState* state = parser->_parser_state;
|
1506
1622
|
state->_frameset_ok = false;
|
1507
|
-
for (int i = state->_open_elements.length; --i >= 0;
|
1623
|
+
for (int i = state->_open_elements.length; --i >= 0;) {
|
1508
1624
|
const GumboNode* node = state->_open_elements.data[i];
|
1509
|
-
bool is_list_tag =
|
1510
|
-
node_html_tag_is(node, GUMBO_TAG_LI)
|
1511
|
-
|
1625
|
+
bool is_list_tag =
|
1626
|
+
is_li ? node_html_tag_is(node, GUMBO_TAG_LI)
|
1627
|
+
: node_tag_in_set(node, (gumbo_tagset){TAG(DD), TAG(DT)});
|
1512
1628
|
if (is_list_tag) {
|
1513
|
-
implicitly_close_tags(
|
1629
|
+
implicitly_close_tags(
|
1630
|
+
parser, token, node->v.element.tag_namespace, node->v.element.tag);
|
1514
1631
|
return;
|
1515
1632
|
}
|
1516
1633
|
if (is_special_node(node) &&
|
1517
|
-
!node_tag_in_set(
|
1634
|
+
!node_tag_in_set(
|
1635
|
+
node, (gumbo_tagset){TAG(ADDRESS), TAG(DIV), TAG(P)})) {
|
1518
1636
|
return;
|
1519
1637
|
}
|
1520
1638
|
}
|
@@ -1527,7 +1645,7 @@ static void merge_attributes(
|
|
1527
1645
|
const GumboVector* token_attr = &token->v.start_tag.attributes;
|
1528
1646
|
GumboVector* node_attr = &node->v.element.attributes;
|
1529
1647
|
|
1530
|
-
for (int i = 0; i < token_attr->length; ++i) {
|
1648
|
+
for (unsigned int i = 0; i < token_attr->length; ++i) {
|
1531
1649
|
GumboAttribute* attr = token_attr->data[i];
|
1532
1650
|
if (!gumbo_get_attribute(node_attr, attr->name)) {
|
1533
1651
|
// Ownership of the attribute is transferred by this gumbo_vector_add,
|
@@ -1551,8 +1669,8 @@ static void merge_attributes(
|
|
1551
1669
|
}
|
1552
1670
|
|
1553
1671
|
const char* gumbo_normalize_svg_tagname(const GumboStringPiece* tag) {
|
1554
|
-
for (
|
1555
|
-
|
1672
|
+
for (size_t i = 0; i < sizeof(kSvgTagReplacements) / sizeof(ReplacementEntry);
|
1673
|
+
++i) {
|
1556
1674
|
const ReplacementEntry* entry = &kSvgTagReplacements[i];
|
1557
1675
|
if (gumbo_string_equals_ignore_case(tag, &entry->from)) {
|
1558
1676
|
return entry->to.data;
|
@@ -1567,9 +1685,9 @@ const char* gumbo_normalize_svg_tagname(const GumboStringPiece* tag) {
|
|
1567
1685
|
static void adjust_foreign_attributes(GumboParser* parser, GumboToken* token) {
|
1568
1686
|
assert(token->type == GUMBO_TOKEN_START_TAG);
|
1569
1687
|
const GumboVector* attributes = &token->v.start_tag.attributes;
|
1570
|
-
for (
|
1571
|
-
|
1572
|
-
|
1688
|
+
for (size_t i = 0; i < sizeof(kForeignAttributeReplacements) /
|
1689
|
+
sizeof(NamespacedAttributeReplacement);
|
1690
|
+
++i) {
|
1573
1691
|
const NamespacedAttributeReplacement* entry =
|
1574
1692
|
&kForeignAttributeReplacements[i];
|
1575
1693
|
GumboAttribute* attr = gumbo_get_attribute(attributes, entry->from);
|
@@ -1587,7 +1705,7 @@ static void adjust_foreign_attributes(GumboParser* parser, GumboToken* token) {
|
|
1587
1705
|
static void adjust_svg_attributes(GumboParser* parser, GumboToken* token) {
|
1588
1706
|
assert(token->type == GUMBO_TOKEN_START_TAG);
|
1589
1707
|
const GumboVector* attributes = &token->v.start_tag.attributes;
|
1590
|
-
for (
|
1708
|
+
for (size_t i = 0;
|
1591
1709
|
i < sizeof(kSvgAttributeReplacements) / sizeof(ReplacementEntry); ++i) {
|
1592
1710
|
const ReplacementEntry* entry = &kSvgAttributeReplacements[i];
|
1593
1711
|
GumboAttribute* attr = gumbo_get_attribute(attributes, entry->from.data);
|
@@ -1604,8 +1722,8 @@ static void adjust_svg_attributes(GumboParser* parser, GumboToken* token) {
|
|
1604
1722
|
// value.
|
1605
1723
|
static void adjust_mathml_attributes(GumboParser* parser, GumboToken* token) {
|
1606
1724
|
assert(token->type == GUMBO_TOKEN_START_TAG);
|
1607
|
-
GumboAttribute* attr =
|
1608
|
-
&token->v.start_tag.attributes, "definitionurl");
|
1725
|
+
GumboAttribute* attr =
|
1726
|
+
gumbo_get_attribute(&token->v.start_tag.attributes, "definitionurl");
|
1609
1727
|
if (!attr) {
|
1610
1728
|
return;
|
1611
1729
|
}
|
@@ -1613,32 +1731,30 @@ static void adjust_mathml_attributes(GumboParser* parser, GumboToken* token) {
|
|
1613
1731
|
attr->name = gumbo_copy_stringz(parser, "definitionURL");
|
1614
1732
|
}
|
1615
1733
|
|
1616
|
-
static bool doctype_matches(
|
1617
|
-
const
|
1618
|
-
const GumboStringPiece* public_id,
|
1619
|
-
const GumboStringPiece* system_id,
|
1734
|
+
static bool doctype_matches(const GumboTokenDocType* doctype,
|
1735
|
+
const GumboStringPiece* public_id, const GumboStringPiece* system_id,
|
1620
1736
|
bool allow_missing_system_id) {
|
1621
1737
|
return !strcmp(doctype->public_identifier, public_id->data) &&
|
1622
|
-
|
1623
|
-
|
1738
|
+
(allow_missing_system_id || doctype->has_system_identifier) &&
|
1739
|
+
!strcmp(doctype->system_identifier, system_id->data);
|
1624
1740
|
}
|
1625
1741
|
|
1626
1742
|
static bool maybe_add_doctype_error(
|
1627
1743
|
GumboParser* parser, const GumboToken* token) {
|
1628
1744
|
const GumboTokenDocType* doctype = &token->v.doc_type;
|
1629
1745
|
bool html_doctype = !strcmp(doctype->name, kDoctypeHtml.data);
|
1630
|
-
if ((!html_doctype ||
|
1631
|
-
|
1632
|
-
|
1633
|
-
|
1634
|
-
!(html_doctype && (
|
1635
|
-
|
1636
|
-
|
1637
|
-
|
1638
|
-
|
1639
|
-
|
1640
|
-
|
1641
|
-
|
1746
|
+
if ((!html_doctype || doctype->has_public_identifier ||
|
1747
|
+
(doctype->has_system_identifier &&
|
1748
|
+
!strcmp(
|
1749
|
+
doctype->system_identifier, kSystemIdLegacyCompat.data))) &&
|
1750
|
+
!(html_doctype && (doctype_matches(doctype, &kPublicIdHtml4_0,
|
1751
|
+
&kSystemIdRecHtml4_0, true) ||
|
1752
|
+
doctype_matches(doctype, &kPublicIdHtml4_01,
|
1753
|
+
&kSystemIdHtml4, true) ||
|
1754
|
+
doctype_matches(doctype, &kPublicIdXhtml1_0,
|
1755
|
+
&kSystemIdXhtmlStrict1_1, false) ||
|
1756
|
+
doctype_matches(doctype, &kPublicIdXhtml1_1,
|
1757
|
+
&kSystemIdXhtml1_1, false)))) {
|
1642
1758
|
parser_add_parse_error(parser, token);
|
1643
1759
|
return false;
|
1644
1760
|
}
|
@@ -1661,7 +1777,7 @@ static void remove_from_parent(GumboParser* parser, GumboNode* node) {
|
|
1661
1777
|
gumbo_vector_remove_at(parser, index, children);
|
1662
1778
|
node->parent = NULL;
|
1663
1779
|
node->index_within_parent = -1;
|
1664
|
-
for (int i = index; i < children->length; ++i) {
|
1780
|
+
for (unsigned int i = index; i < children->length; ++i) {
|
1665
1781
|
GumboNode* child = children->data[i];
|
1666
1782
|
child->index_within_parent = i;
|
1667
1783
|
}
|
@@ -1670,29 +1786,38 @@ static void remove_from_parent(GumboParser* parser, GumboNode* node) {
|
|
1670
1786
|
// http://www.whatwg.org/specs/web-apps/current-work/multipage/the-end.html#an-introduction-to-error-handling-and-strange-cases-in-the-parser
|
1671
1787
|
// Also described in the "in body" handling for end formatting tags.
|
1672
1788
|
static bool adoption_agency_algorithm(
|
1673
|
-
GumboParser* parser, GumboToken* token, GumboTag
|
1789
|
+
GumboParser* parser, GumboToken* token, GumboTag subject) {
|
1674
1790
|
GumboParserState* state = parser->_parser_state;
|
1675
1791
|
gumbo_debug("Entering adoption agency algorithm.\n");
|
1676
|
-
//
|
1677
|
-
|
1678
|
-
|
1792
|
+
// Step 1.
|
1793
|
+
GumboNode* current_node = get_current_node(parser);
|
1794
|
+
if (current_node->v.element.tag_namespace == GUMBO_NAMESPACE_HTML &&
|
1795
|
+
current_node->v.element.tag == subject &&
|
1796
|
+
gumbo_vector_index_of(
|
1797
|
+
&state->_active_formatting_elements, current_node) == -1) {
|
1798
|
+
pop_current_node(parser);
|
1799
|
+
return false;
|
1800
|
+
}
|
1801
|
+
// Steps 2-4 & 20:
|
1802
|
+
for (unsigned int i = 0; i < 8; ++i) {
|
1803
|
+
// Step 5.
|
1679
1804
|
GumboNode* formatting_node = NULL;
|
1680
1805
|
int formatting_node_in_open_elements = -1;
|
1681
|
-
for (int j = state->_active_formatting_elements.length; --j >= 0;
|
1806
|
+
for (int j = state->_active_formatting_elements.length; --j >= 0;) {
|
1682
1807
|
GumboNode* current_node = state->_active_formatting_elements.data[j];
|
1683
1808
|
if (current_node == &kActiveFormattingScopeMarker) {
|
1684
1809
|
gumbo_debug("Broke on scope marker; aborting.\n");
|
1685
1810
|
// Last scope marker; abort the algorithm.
|
1686
1811
|
return false;
|
1687
1812
|
}
|
1688
|
-
if (current_node
|
1813
|
+
if (node_html_tag_is(current_node, subject)) {
|
1689
1814
|
// Found it.
|
1690
1815
|
formatting_node = current_node;
|
1691
|
-
formatting_node_in_open_elements =
|
1692
|
-
&state->_open_elements, formatting_node);
|
1816
|
+
formatting_node_in_open_elements =
|
1817
|
+
gumbo_vector_index_of(&state->_open_elements, formatting_node);
|
1693
1818
|
gumbo_debug("Formatting element of tag %s at %d.\n",
|
1694
|
-
|
1695
|
-
|
1819
|
+
gumbo_normalized_tagname(subject),
|
1820
|
+
formatting_node_in_open_elements);
|
1696
1821
|
break;
|
1697
1822
|
}
|
1698
1823
|
}
|
@@ -1704,18 +1829,23 @@ static bool adoption_agency_algorithm(
|
|
1704
1829
|
return false;
|
1705
1830
|
}
|
1706
1831
|
|
1832
|
+
// Step 6
|
1707
1833
|
if (formatting_node_in_open_elements == -1) {
|
1708
1834
|
gumbo_debug("Formatting node not on stack of open elements.\n");
|
1709
|
-
|
1710
|
-
|
1835
|
+
parser_add_parse_error(parser, token);
|
1836
|
+
gumbo_vector_remove(
|
1837
|
+
parser, formatting_node, &state->_active_formatting_elements);
|
1711
1838
|
return false;
|
1712
1839
|
}
|
1713
1840
|
|
1841
|
+
// Step 7
|
1714
1842
|
if (!has_an_element_in_scope(parser, formatting_node->v.element.tag)) {
|
1715
1843
|
parser_add_parse_error(parser, token);
|
1716
1844
|
gumbo_debug("Element not in scope.\n");
|
1717
1845
|
return false;
|
1718
1846
|
}
|
1847
|
+
|
1848
|
+
// Step 8
|
1719
1849
|
if (formatting_node != get_current_node(parser)) {
|
1720
1850
|
parser_add_parse_error(parser, token); // But continue onwards.
|
1721
1851
|
}
|
@@ -1723,55 +1853,60 @@ static bool adoption_agency_algorithm(
|
|
1723
1853
|
assert(!node_html_tag_is(formatting_node, GUMBO_TAG_HTML));
|
1724
1854
|
assert(!node_html_tag_is(formatting_node, GUMBO_TAG_BODY));
|
1725
1855
|
|
1726
|
-
// Step
|
1856
|
+
// Step 9 & 10
|
1727
1857
|
GumboNode* furthest_block = NULL;
|
1728
|
-
for (int j = formatting_node_in_open_elements;
|
1858
|
+
for (unsigned int j = formatting_node_in_open_elements;
|
1729
1859
|
j < state->_open_elements.length; ++j) {
|
1730
1860
|
assert(j > 0);
|
1731
1861
|
GumboNode* current = state->_open_elements.data[j];
|
1732
1862
|
if (is_special_node(current)) {
|
1733
|
-
// Step
|
1863
|
+
// Step 9.
|
1734
1864
|
furthest_block = current;
|
1735
1865
|
break;
|
1736
1866
|
}
|
1737
1867
|
}
|
1738
1868
|
if (!furthest_block) {
|
1739
|
-
// Step
|
1869
|
+
// Step 10.
|
1740
1870
|
while (get_current_node(parser) != formatting_node) {
|
1741
1871
|
pop_current_node(parser);
|
1742
1872
|
}
|
1743
1873
|
// And the formatting element itself.
|
1744
1874
|
pop_current_node(parser);
|
1745
|
-
gumbo_vector_remove(
|
1746
|
-
|
1875
|
+
gumbo_vector_remove(
|
1876
|
+
parser, formatting_node, &state->_active_formatting_elements);
|
1747
1877
|
return false;
|
1748
1878
|
}
|
1749
1879
|
assert(!node_html_tag_is(furthest_block, GUMBO_TAG_HTML));
|
1750
1880
|
assert(furthest_block);
|
1751
1881
|
|
1752
|
-
// Step
|
1882
|
+
// Step 11.
|
1753
1883
|
// Elements may be moved and reparented by this algorithm, so
|
1754
1884
|
// common_ancestor is not necessarily the same as formatting_node->parent.
|
1755
1885
|
GumboNode* common_ancestor =
|
1756
|
-
state->_open_elements.data[gumbo_vector_index_of(
|
1757
|
-
|
1886
|
+
state->_open_elements.data[gumbo_vector_index_of(&state->_open_elements,
|
1887
|
+
formatting_node) -
|
1888
|
+
1];
|
1758
1889
|
gumbo_debug("Common ancestor tag = %s, furthest block tag = %s.\n",
|
1759
|
-
|
1760
|
-
|
1890
|
+
gumbo_normalized_tagname(common_ancestor->v.element.tag),
|
1891
|
+
gumbo_normalized_tagname(furthest_block->v.element.tag));
|
1761
1892
|
|
1762
|
-
// Step
|
1893
|
+
// Step 12.
|
1763
1894
|
int bookmark = gumbo_vector_index_of(
|
1764
|
-
|
1765
|
-
|
1895
|
+
&state->_active_formatting_elements, formatting_node) +
|
1896
|
+
1;
|
1897
|
+
gumbo_debug("Bookmark at %d.\n", bookmark);
|
1898
|
+
// Step 13.
|
1766
1899
|
GumboNode* node = furthest_block;
|
1767
1900
|
GumboNode* last_node = furthest_block;
|
1768
1901
|
// Must be stored explicitly, in case node is removed from the stack of open
|
1769
1902
|
// elements, to handle step 9.4.
|
1770
1903
|
int saved_node_index = gumbo_vector_index_of(&state->_open_elements, node);
|
1771
1904
|
assert(saved_node_index > 0);
|
1772
|
-
// Step
|
1773
|
-
for (int j = 0
|
1774
|
-
// Step
|
1905
|
+
// Step 13.1.
|
1906
|
+
for (int j = 0;;) {
|
1907
|
+
// Step 13.2.
|
1908
|
+
++j;
|
1909
|
+
// Step 13.3.
|
1775
1910
|
int node_index = gumbo_vector_index_of(&state->_open_elements, node);
|
1776
1911
|
gumbo_debug(
|
1777
1912
|
"Current index: %d, last index: %d.\n", node_index, saved_node_index);
|
@@ -1780,58 +1915,72 @@ static bool adoption_agency_algorithm(
|
|
1780
1915
|
}
|
1781
1916
|
saved_node_index = --node_index;
|
1782
1917
|
assert(node_index > 0);
|
1783
|
-
assert(node_index < state->_open_elements.capacity);
|
1918
|
+
assert((unsigned int) node_index < state->_open_elements.capacity);
|
1784
1919
|
node = state->_open_elements.data[node_index];
|
1785
1920
|
assert(node->parent);
|
1786
|
-
|
1787
|
-
|
1788
|
-
|
1921
|
+
if (node == formatting_node) {
|
1922
|
+
// Step 13.4.
|
1923
|
+
break;
|
1924
|
+
}
|
1925
|
+
int formatting_index =
|
1926
|
+
gumbo_vector_index_of(&state->_active_formatting_elements, node);
|
1927
|
+
if (j > 3 && formatting_index != -1) {
|
1928
|
+
// Step 13.5.
|
1929
|
+
gumbo_debug("Removing formatting element at %d.\n", formatting_index);
|
1930
|
+
gumbo_vector_remove_at(
|
1931
|
+
parser, formatting_index, &state->_active_formatting_elements);
|
1932
|
+
// Removing the element shifts all indices over by one, so we may need
|
1933
|
+
// to move the bookmark.
|
1934
|
+
if (formatting_index < bookmark) {
|
1935
|
+
--bookmark;
|
1936
|
+
gumbo_debug("Moving bookmark to %d.\n", bookmark);
|
1937
|
+
}
|
1938
|
+
continue;
|
1939
|
+
}
|
1940
|
+
if (formatting_index == -1) {
|
1941
|
+
// Step 13.6.
|
1789
1942
|
gumbo_vector_remove_at(parser, node_index, &state->_open_elements);
|
1790
1943
|
continue;
|
1791
|
-
} else if (node == formatting_node) {
|
1792
|
-
// Step 9.6.
|
1793
|
-
break;
|
1794
1944
|
}
|
1795
|
-
// Step
|
1796
|
-
|
1797
|
-
|
1945
|
+
// Step 13.7.
|
1946
|
+
// "common ancestor as the intended parent" doesn't actually mean insert
|
1947
|
+
// it into the common ancestor; that happens below.
|
1798
1948
|
node = clone_node(parser, node, GUMBO_INSERTION_ADOPTION_AGENCY_CLONED);
|
1949
|
+
assert(formatting_index >= 0);
|
1799
1950
|
state->_active_formatting_elements.data[formatting_index] = node;
|
1951
|
+
assert(node_index >= 0);
|
1800
1952
|
state->_open_elements.data[node_index] = node;
|
1801
|
-
// Step
|
1953
|
+
// Step 13.8.
|
1802
1954
|
if (last_node == furthest_block) {
|
1803
1955
|
bookmark = formatting_index + 1;
|
1804
|
-
|
1956
|
+
gumbo_debug("Bookmark moved to %d.\n", bookmark);
|
1957
|
+
assert((unsigned int) bookmark <= state->_active_formatting_elements.length);
|
1805
1958
|
}
|
1806
|
-
// Step
|
1959
|
+
// Step 13.9.
|
1807
1960
|
last_node->parse_flags |= GUMBO_INSERTION_ADOPTION_AGENCY_MOVED;
|
1808
1961
|
remove_from_parent(parser, last_node);
|
1809
1962
|
append_node(parser, node, last_node);
|
1810
|
-
// Step
|
1963
|
+
// Step 13.10.
|
1811
1964
|
last_node = node;
|
1812
|
-
}
|
1965
|
+
} // Step 13.11.
|
1813
1966
|
|
1814
|
-
// Step
|
1967
|
+
// Step 14.
|
1815
1968
|
gumbo_debug("Removing %s node from parent ",
|
1816
|
-
|
1969
|
+
gumbo_normalized_tagname(last_node->v.element.tag));
|
1817
1970
|
remove_from_parent(parser, last_node);
|
1818
1971
|
last_node->parse_flags |= GUMBO_INSERTION_ADOPTION_AGENCY_MOVED;
|
1819
|
-
|
1820
|
-
|
1821
|
-
|
1822
|
-
|
1823
|
-
|
1824
|
-
gumbo_debug("and inserting it into %s.\n",
|
1825
|
-
gumbo_normalized_tagname(common_ancestor->v.element.tag));
|
1826
|
-
append_node(parser, common_ancestor, last_node);
|
1827
|
-
}
|
1972
|
+
InsertionLocation location =
|
1973
|
+
get_appropriate_insertion_location(parser, common_ancestor);
|
1974
|
+
gumbo_debug("and inserting it into %s.\n",
|
1975
|
+
gumbo_normalized_tagname(location.target->v.element.tag));
|
1976
|
+
insert_node(parser, last_node, location);
|
1828
1977
|
|
1829
|
-
// Step
|
1978
|
+
// Step 15.
|
1830
1979
|
GumboNode* new_formatting_node = clone_node(
|
1831
1980
|
parser, formatting_node, GUMBO_INSERTION_ADOPTION_AGENCY_CLONED);
|
1832
1981
|
formatting_node->parse_flags |= GUMBO_INSERTION_IMPLICIT_END_TAG;
|
1833
1982
|
|
1834
|
-
// Step
|
1983
|
+
// Step 16. Instead of appending nodes one-by-one, we swap the children
|
1835
1984
|
// vector of furthest_block with the empty children of new_formatting_node,
|
1836
1985
|
// reducing memory traffic and allocations. We still have to reset their
|
1837
1986
|
// parent pointers, though.
|
@@ -1841,15 +1990,15 @@ static bool adoption_agency_algorithm(
|
|
1841
1990
|
furthest_block->v.element.children = temp;
|
1842
1991
|
|
1843
1992
|
temp = new_formatting_node->v.element.children;
|
1844
|
-
for (int i = 0; i < temp.length; ++i) {
|
1993
|
+
for (unsigned int i = 0; i < temp.length; ++i) {
|
1845
1994
|
GumboNode* child = temp.data[i];
|
1846
1995
|
child->parent = new_formatting_node;
|
1847
1996
|
}
|
1848
1997
|
|
1849
|
-
// Step
|
1998
|
+
// Step 17.
|
1850
1999
|
append_node(parser, furthest_block, new_formatting_node);
|
1851
2000
|
|
1852
|
-
// Step
|
2001
|
+
// Step 18.
|
1853
2002
|
// If the formatting node was before the bookmark, it may shift over all
|
1854
2003
|
// indices after it, so we need to explicitly find the index and possibly
|
1855
2004
|
// adjust the bookmark.
|
@@ -1857,25 +2006,27 @@ static bool adoption_agency_algorithm(
|
|
1857
2006
|
&state->_active_formatting_elements, formatting_node);
|
1858
2007
|
assert(formatting_node_index != -1);
|
1859
2008
|
if (formatting_node_index < bookmark) {
|
2009
|
+
gumbo_debug(
|
2010
|
+
"Formatting node at %d is before bookmark at %d; decrementing.\n",
|
2011
|
+
formatting_node_index, bookmark);
|
1860
2012
|
--bookmark;
|
1861
2013
|
}
|
1862
2014
|
gumbo_vector_remove_at(
|
1863
2015
|
parser, formatting_node_index, &state->_active_formatting_elements);
|
1864
2016
|
assert(bookmark >= 0);
|
1865
|
-
assert(bookmark <= state->_active_formatting_elements.length);
|
2017
|
+
assert((unsigned int) bookmark <= state->_active_formatting_elements.length);
|
1866
2018
|
gumbo_vector_insert_at(parser, new_formatting_node, bookmark,
|
1867
|
-
|
2019
|
+
&state->_active_formatting_elements);
|
1868
2020
|
|
1869
|
-
// Step
|
1870
|
-
gumbo_vector_remove(
|
1871
|
-
|
1872
|
-
|
1873
|
-
&state->_open_elements, furthest_block) + 1;
|
2021
|
+
// Step 19.
|
2022
|
+
gumbo_vector_remove(parser, formatting_node, &state->_open_elements);
|
2023
|
+
int insert_at =
|
2024
|
+
gumbo_vector_index_of(&state->_open_elements, furthest_block) + 1;
|
1874
2025
|
assert(insert_at >= 0);
|
1875
|
-
assert(insert_at <= state->_open_elements.length);
|
2026
|
+
assert((unsigned int) insert_at <= state->_open_elements.length);
|
1876
2027
|
gumbo_vector_insert_at(
|
1877
2028
|
parser, new_formatting_node, insert_at, &state->_open_elements);
|
1878
|
-
}
|
2029
|
+
} // Step 20.
|
1879
2030
|
return true;
|
1880
2031
|
}
|
1881
2032
|
|
@@ -1898,6 +2049,7 @@ static void ignore_token(GumboParser* parser) {
|
|
1898
2049
|
|
1899
2050
|
// http://www.whatwg.org/specs/web-apps/current-work/complete/the-end.html
|
1900
2051
|
static void finish_parsing(GumboParser* parser) {
|
2052
|
+
gumbo_debug("Finishing parsing");
|
1901
2053
|
maybe_flush_text_node_buffer(parser);
|
1902
2054
|
GumboParserState* state = parser->_parser_state;
|
1903
2055
|
for (GumboNode* node = pop_current_node(parser); node;
|
@@ -1908,7 +2060,8 @@ static void finish_parsing(GumboParser* parser) {
|
|
1908
2060
|
}
|
1909
2061
|
node->parse_flags |= GUMBO_INSERTION_IMPLICIT_END_TAG;
|
1910
2062
|
}
|
1911
|
-
while (pop_current_node(parser))
|
2063
|
+
while (pop_current_node(parser))
|
2064
|
+
; // Pop them all.
|
1912
2065
|
}
|
1913
2066
|
|
1914
2067
|
static bool handle_initial(GumboParser* parser, GumboToken* token) {
|
@@ -1952,9 +2105,9 @@ static bool handle_before_html(GumboParser* parser, GumboToken* token) {
|
|
1952
2105
|
parser->_output->root = html_node;
|
1953
2106
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_BEFORE_HEAD);
|
1954
2107
|
return true;
|
1955
|
-
} else if (token->type == GUMBO_TOKEN_END_TAG &&
|
1956
|
-
!tag_in(token, false,
|
1957
|
-
|
2108
|
+
} else if (token->type == GUMBO_TOKEN_END_TAG &&
|
2109
|
+
!tag_in(token, false,
|
2110
|
+
(gumbo_tagset){TAG(HEAD), TAG(BODY), TAG(HTML), TAG(BR)})) {
|
1958
2111
|
parser_add_parse_error(parser, token);
|
1959
2112
|
ignore_token(parser);
|
1960
2113
|
return false;
|
@@ -1986,9 +2139,9 @@ static bool handle_before_head(GumboParser* parser, GumboToken* token) {
|
|
1986
2139
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_HEAD);
|
1987
2140
|
parser->_parser_state->_head_element = node;
|
1988
2141
|
return true;
|
1989
|
-
} else if (token->type == GUMBO_TOKEN_END_TAG &&
|
1990
|
-
!tag_in(token, false,
|
1991
|
-
|
2142
|
+
} else if (token->type == GUMBO_TOKEN_END_TAG &&
|
2143
|
+
!tag_in(token, false,
|
2144
|
+
(gumbo_tagset){TAG(HEAD), TAG(BODY), TAG(HTML), TAG(BR)})) {
|
1992
2145
|
parser_add_parse_error(parser, token);
|
1993
2146
|
ignore_token(parser);
|
1994
2147
|
return false;
|
@@ -2020,8 +2173,9 @@ static bool handle_in_head(GumboParser* parser, GumboToken* token) {
|
|
2020
2173
|
return true;
|
2021
2174
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_HTML)) {
|
2022
2175
|
return handle_in_body(parser, token);
|
2023
|
-
} else if (tag_in(token, kStartTag,
|
2024
|
-
|
2176
|
+
} else if (tag_in(token, kStartTag,
|
2177
|
+
(gumbo_tagset){TAG(BASE), TAG(BASEFONT), TAG(BGSOUND),
|
2178
|
+
TAG(MENUITEM), TAG(LINK)})) {
|
2025
2179
|
insert_element_from_token(parser, token);
|
2026
2180
|
pop_current_node(parser);
|
2027
2181
|
acknowledge_self_closing_tag(parser);
|
@@ -2038,7 +2192,8 @@ static bool handle_in_head(GumboParser* parser, GumboToken* token) {
|
|
2038
2192
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_TITLE)) {
|
2039
2193
|
run_generic_parsing_algorithm(parser, token, GUMBO_LEX_RCDATA);
|
2040
2194
|
return true;
|
2041
|
-
} else if (tag_in(
|
2195
|
+
} else if (tag_in(
|
2196
|
+
token, kStartTag, (gumbo_tagset){TAG(NOFRAMES), TAG(STYLE)})) {
|
2042
2197
|
run_generic_parsing_algorithm(parser, token, GUMBO_LEX_RAWTEXT);
|
2043
2198
|
return true;
|
2044
2199
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_NOSCRIPT)) {
|
@@ -2054,29 +2209,48 @@ static bool handle_in_head(GumboParser* parser, GumboToken* token) {
|
|
2054
2209
|
assert(node_html_tag_is(head, GUMBO_TAG_HEAD));
|
2055
2210
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_AFTER_HEAD);
|
2056
2211
|
return true;
|
2057
|
-
} else if (
|
2058
|
-
|
2059
|
-
|
2060
|
-
|
2212
|
+
} else if (tag_in(token, kEndTag,
|
2213
|
+
(gumbo_tagset){TAG(BODY), TAG(HTML), TAG(BR)})) {
|
2214
|
+
pop_current_node(parser);
|
2215
|
+
set_insertion_mode(parser, GUMBO_INSERTION_MODE_AFTER_HEAD);
|
2216
|
+
parser->_parser_state->_reprocess_current_token = true;
|
2217
|
+
return true;
|
2218
|
+
} else if (tag_is(token, kStartTag, GUMBO_TAG_TEMPLATE)) {
|
2219
|
+
insert_element_from_token(parser, token);
|
2220
|
+
add_formatting_element(parser, &kActiveFormattingScopeMarker);
|
2221
|
+
parser->_parser_state->_frameset_ok = false;
|
2222
|
+
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TEMPLATE);
|
2223
|
+
push_template_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TEMPLATE);
|
2224
|
+
return true;
|
2225
|
+
} else if (tag_is(token, kEndTag, GUMBO_TAG_TEMPLATE)) {
|
2226
|
+
if (!has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
|
2227
|
+
parser_add_parse_error(parser, token);
|
2228
|
+
ignore_token(parser);
|
2229
|
+
return false;
|
2230
|
+
}
|
2231
|
+
generate_all_implied_end_tags_thoroughly(parser);
|
2232
|
+
bool success = true;
|
2233
|
+
if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_TEMPLATE)) {
|
2234
|
+
parser_add_parse_error(parser, token);
|
2235
|
+
success = false;
|
2236
|
+
}
|
2237
|
+
while (!node_html_tag_is(pop_current_node(parser), GUMBO_TAG_TEMPLATE))
|
2238
|
+
;
|
2239
|
+
clear_active_formatting_elements(parser);
|
2240
|
+
pop_template_insertion_mode(parser);
|
2241
|
+
reset_insertion_mode_appropriately(parser);
|
2242
|
+
return success;
|
2061
2243
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_HEAD) ||
|
2062
|
-
(token->type == GUMBO_TOKEN_END_TAG
|
2063
|
-
!tag_in(token, kEndTag, (gumbo_tagset) { TAG(BODY), TAG(HTML),
|
2064
|
-
TAG(BR) }))) {
|
2065
|
-
parser_add_parse_error(parser, token);
|
2066
|
-
return false;
|
2067
|
-
} else if (tag_is(token, kStartTag, GUMBO_TAG_UNKNOWN) && token->v.start_tag.is_self_closing) {
|
2244
|
+
(token->type == GUMBO_TOKEN_END_TAG)) {
|
2068
2245
|
parser_add_parse_error(parser, token);
|
2069
2246
|
ignore_token(parser);
|
2070
2247
|
return false;
|
2071
2248
|
} else {
|
2072
|
-
|
2073
|
-
assert(node_html_tag_is(node, GUMBO_TAG_HEAD));
|
2074
|
-
AVOID_UNUSED_VARIABLE_WARNING(node);
|
2249
|
+
pop_current_node(parser);
|
2075
2250
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_AFTER_HEAD);
|
2076
2251
|
parser->_parser_state->_reprocess_current_token = true;
|
2077
2252
|
return true;
|
2078
2253
|
}
|
2079
|
-
|
2080
2254
|
return true;
|
2081
2255
|
}
|
2082
2256
|
|
@@ -2095,12 +2269,14 @@ static bool handle_in_head_noscript(GumboParser* parser, GumboToken* token) {
|
|
2095
2269
|
return true;
|
2096
2270
|
} else if (token->type == GUMBO_TOKEN_WHITESPACE ||
|
2097
2271
|
token->type == GUMBO_TOKEN_COMMENT ||
|
2098
|
-
tag_in(token, kStartTag,
|
2099
|
-
|
2100
|
-
|
2101
|
-
|
2102
|
-
|
2103
|
-
|
2272
|
+
tag_in(token, kStartTag,
|
2273
|
+
(gumbo_tagset){TAG(BASEFONT), TAG(BGSOUND), TAG(LINK),
|
2274
|
+
TAG(META), TAG(NOFRAMES), TAG(STYLE)})) {
|
2275
|
+
return handle_in_head(parser, token);
|
2276
|
+
} else if (tag_in(
|
2277
|
+
token, kStartTag, (gumbo_tagset){TAG(HEAD), TAG(NOSCRIPT)}) ||
|
2278
|
+
(token->type == GUMBO_TOKEN_END_TAG &&
|
2279
|
+
!tag_is(token, kEndTag, GUMBO_TAG_BR))) {
|
2104
2280
|
parser_add_parse_error(parser, token);
|
2105
2281
|
ignore_token(parser);
|
2106
2282
|
return false;
|
@@ -2139,10 +2315,10 @@ static bool handle_after_head(GumboParser* parser, GumboToken* token) {
|
|
2139
2315
|
insert_element_from_token(parser, token);
|
2140
2316
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_FRAMESET);
|
2141
2317
|
return true;
|
2142
|
-
} else if (tag_in(token, kStartTag,
|
2143
|
-
|
2144
|
-
|
2145
|
-
|
2318
|
+
} else if (tag_in(token, kStartTag,
|
2319
|
+
(gumbo_tagset){TAG(BASE), TAG(BASEFONT), TAG(BGSOUND),
|
2320
|
+
TAG(LINK), TAG(META), TAG(NOFRAMES), TAG(SCRIPT),
|
2321
|
+
TAG(STYLE), TAG(TEMPLATE), TAG(TITLE)})) {
|
2146
2322
|
parser_add_parse_error(parser, token);
|
2147
2323
|
assert(state->_head_element != NULL);
|
2148
2324
|
// This must be flushed before we push the head element on, as there may be
|
@@ -2152,9 +2328,12 @@ static bool handle_after_head(GumboParser* parser, GumboToken* token) {
|
|
2152
2328
|
bool result = handle_in_head(parser, token);
|
2153
2329
|
gumbo_vector_remove(parser, state->_head_element, &state->_open_elements);
|
2154
2330
|
return result;
|
2331
|
+
} else if (tag_is(token, kEndTag, GUMBO_TAG_TEMPLATE)) {
|
2332
|
+
return handle_in_head(parser, token);
|
2155
2333
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_HEAD) ||
|
2156
|
-
|
2157
|
-
|
2334
|
+
(token->type == GUMBO_TOKEN_END_TAG &&
|
2335
|
+
!tag_in(token, kEndTag,
|
2336
|
+
(gumbo_tagset){TAG(BODY), TAG(HTML), TAG(BR)}))) {
|
2158
2337
|
parser_add_parse_error(parser, token);
|
2159
2338
|
ignore_token(parser);
|
2160
2339
|
return false;
|
@@ -2168,24 +2347,23 @@ static bool handle_after_head(GumboParser* parser, GumboToken* token) {
|
|
2168
2347
|
|
2169
2348
|
static void destroy_node(GumboParser* parser, GumboNode* node) {
|
2170
2349
|
switch (node->type) {
|
2171
|
-
case GUMBO_NODE_DOCUMENT:
|
2172
|
-
|
2173
|
-
|
2174
|
-
|
2175
|
-
destroy_node(parser, doc->children.data[i]);
|
2176
|
-
}
|
2177
|
-
gumbo_parser_deallocate(parser, (void*) doc->children.data);
|
2178
|
-
gumbo_parser_deallocate(parser, (void*) doc->name);
|
2179
|
-
gumbo_parser_deallocate(parser, (void*) doc->public_identifier);
|
2180
|
-
gumbo_parser_deallocate(parser, (void*) doc->system_identifier);
|
2350
|
+
case GUMBO_NODE_DOCUMENT: {
|
2351
|
+
GumboDocument* doc = &node->v.document;
|
2352
|
+
for (unsigned int i = 0; i < doc->children.length; ++i) {
|
2353
|
+
destroy_node(parser, doc->children.data[i]);
|
2181
2354
|
}
|
2182
|
-
|
2355
|
+
gumbo_parser_deallocate(parser, (void*) doc->children.data);
|
2356
|
+
gumbo_parser_deallocate(parser, (void*) doc->name);
|
2357
|
+
gumbo_parser_deallocate(parser, (void*) doc->public_identifier);
|
2358
|
+
gumbo_parser_deallocate(parser, (void*) doc->system_identifier);
|
2359
|
+
} break;
|
2360
|
+
case GUMBO_NODE_TEMPLATE:
|
2183
2361
|
case GUMBO_NODE_ELEMENT:
|
2184
|
-
for (int i = 0; i < node->v.element.attributes.length; ++i) {
|
2362
|
+
for (unsigned int i = 0; i < node->v.element.attributes.length; ++i) {
|
2185
2363
|
gumbo_destroy_attribute(parser, node->v.element.attributes.data[i]);
|
2186
2364
|
}
|
2187
2365
|
gumbo_parser_deallocate(parser, node->v.element.attributes.data);
|
2188
|
-
for (int i = 0; i < node->v.element.children.length; ++i) {
|
2366
|
+
for (unsigned int i = 0; i < node->v.element.children.length; ++i) {
|
2189
2367
|
destroy_node(parser, node->v.element.children.data[i]);
|
2190
2368
|
}
|
2191
2369
|
gumbo_parser_deallocate(parser, node->v.element.children.data);
|
@@ -2226,20 +2404,26 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2226
2404
|
ignore_token(parser);
|
2227
2405
|
return false;
|
2228
2406
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_HTML)) {
|
2407
|
+
parser_add_parse_error(parser, token);
|
2408
|
+
if (has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
|
2409
|
+
ignore_token(parser);
|
2410
|
+
return false;
|
2411
|
+
}
|
2229
2412
|
assert(parser->_output->root != NULL);
|
2230
2413
|
assert(parser->_output->root->type == GUMBO_NODE_ELEMENT);
|
2231
|
-
parser_add_parse_error(parser, token);
|
2232
2414
|
merge_attributes(parser, token, parser->_output->root);
|
2233
2415
|
return false;
|
2234
|
-
} else if (tag_in(token, kStartTag,
|
2235
|
-
|
2236
|
-
|
2237
|
-
|
2416
|
+
} else if (tag_in(token, kStartTag,
|
2417
|
+
(gumbo_tagset){TAG(BASE), TAG(BASEFONT), TAG(BGSOUND),
|
2418
|
+
TAG(MENUITEM), TAG(LINK), TAG(META), TAG(NOFRAMES),
|
2419
|
+
TAG(SCRIPT), TAG(STYLE), TAG(TEMPLATE), TAG(TITLE)}) ||
|
2420
|
+
tag_is(token, kEndTag, GUMBO_TAG_TEMPLATE)) {
|
2238
2421
|
return handle_in_head(parser, token);
|
2239
2422
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_BODY)) {
|
2240
2423
|
parser_add_parse_error(parser, token);
|
2241
2424
|
if (state->_open_elements.length < 2 ||
|
2242
|
-
!node_html_tag_is(state->_open_elements.data[1], GUMBO_TAG_BODY)
|
2425
|
+
!node_html_tag_is(state->_open_elements.data[1], GUMBO_TAG_BODY) ||
|
2426
|
+
has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
|
2243
2427
|
ignore_token(parser);
|
2244
2428
|
return false;
|
2245
2429
|
}
|
@@ -2273,7 +2457,7 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2273
2457
|
// Remove the body node. We may want to factor this out into a generic
|
2274
2458
|
// helper, but right now this is the only code that needs to do this.
|
2275
2459
|
GumboVector* children = &parser->_output->root->v.element.children;
|
2276
|
-
for (int i = 0; i < children->length; ++i) {
|
2460
|
+
for (unsigned int i = 0; i < children->length; ++i) {
|
2277
2461
|
if (children->data[i] == body_node) {
|
2278
2462
|
gumbo_vector_remove_at(parser, i, children);
|
2279
2463
|
break;
|
@@ -2286,27 +2470,32 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2286
2470
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_FRAMESET);
|
2287
2471
|
return true;
|
2288
2472
|
} else if (token->type == GUMBO_TOKEN_EOF) {
|
2289
|
-
for (int i = 0; i < state->_open_elements.length; ++i) {
|
2290
|
-
if (!node_tag_in_set(state->_open_elements.data[i],
|
2291
|
-
|
2292
|
-
|
2473
|
+
for (unsigned int i = 0; i < state->_open_elements.length; ++i) {
|
2474
|
+
if (!node_tag_in_set(state->_open_elements.data[i],
|
2475
|
+
(gumbo_tagset){TAG(DD), TAG(DT), TAG(LI), TAG(P), TAG(TBODY),
|
2476
|
+
TAG(TD), TAG(TFOOT), TAG(TH), TAG(THEAD), TAG(TR), TAG(BODY),
|
2477
|
+
TAG(HTML)})) {
|
2293
2478
|
parser_add_parse_error(parser, token);
|
2294
|
-
return false;
|
2295
2479
|
}
|
2296
2480
|
}
|
2481
|
+
if (get_current_template_insertion_mode(parser) !=
|
2482
|
+
GUMBO_INSERTION_MODE_INITIAL) {
|
2483
|
+
return handle_in_template(parser, token);
|
2484
|
+
}
|
2297
2485
|
return true;
|
2298
|
-
} else if (tag_in(token, kEndTag, (gumbo_tagset)
|
2486
|
+
} else if (tag_in(token, kEndTag, (gumbo_tagset){TAG(BODY), TAG(HTML)})) {
|
2299
2487
|
if (!has_an_element_in_scope(parser, GUMBO_TAG_BODY)) {
|
2300
2488
|
parser_add_parse_error(parser, token);
|
2301
2489
|
ignore_token(parser);
|
2302
2490
|
return false;
|
2303
2491
|
}
|
2304
2492
|
bool success = true;
|
2305
|
-
for (int i = 0; i < state->_open_elements.length; ++i) {
|
2306
|
-
if (!node_tag_in_set(state->_open_elements.data[i],
|
2307
|
-
|
2308
|
-
|
2309
|
-
|
2493
|
+
for (unsigned int i = 0; i < state->_open_elements.length; ++i) {
|
2494
|
+
if (!node_tag_in_set(state->_open_elements.data[i],
|
2495
|
+
(gumbo_tagset){TAG(DD), TAG(DT), TAG(LI), TAG(OPTGROUP),
|
2496
|
+
TAG(OPTION), TAG(P), TAG(RB), TAG(RP), TAG(RT), TAG(RTC),
|
2497
|
+
TAG(TBODY), TAG(TD), TAG(TFOOT), TAG(TH), TAG(THEAD), TAG(TR),
|
2498
|
+
TAG(BODY), TAG(HTML)})) {
|
2310
2499
|
parser_add_parse_error(parser, token);
|
2311
2500
|
success = false;
|
2312
2501
|
break;
|
@@ -2321,48 +2510,54 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2321
2510
|
record_end_of_element(state->_current_token, &body->v.element);
|
2322
2511
|
}
|
2323
2512
|
return success;
|
2324
|
-
} else if (tag_in(token, kStartTag,
|
2325
|
-
|
2326
|
-
|
2327
|
-
|
2328
|
-
|
2513
|
+
} else if (tag_in(token, kStartTag,
|
2514
|
+
(gumbo_tagset){TAG(ADDRESS), TAG(ARTICLE), TAG(ASIDE),
|
2515
|
+
TAG(BLOCKQUOTE), TAG(CENTER), TAG(DETAILS), TAG(DIR),
|
2516
|
+
TAG(DIV), TAG(DL), TAG(FIELDSET), TAG(FIGCAPTION),
|
2517
|
+
TAG(FIGURE), TAG(FOOTER), TAG(HEADER), TAG(HGROUP),
|
2518
|
+
TAG(MENU), TAG(MAIN), TAG(NAV), TAG(OL), TAG(P),
|
2519
|
+
TAG(SECTION), TAG(SUMMARY), TAG(UL)})) {
|
2329
2520
|
bool result = maybe_implicitly_close_p_tag(parser, token);
|
2330
2521
|
insert_element_from_token(parser, token);
|
2331
2522
|
return result;
|
2332
|
-
} else if (tag_in(token, kStartTag, (gumbo_tagset)
|
2333
|
-
|
2523
|
+
} else if (tag_in(token, kStartTag, (gumbo_tagset){TAG(H1), TAG(H2), TAG(H3),
|
2524
|
+
TAG(H4), TAG(H5), TAG(H6)})) {
|
2334
2525
|
bool result = maybe_implicitly_close_p_tag(parser, token);
|
2335
|
-
if (node_tag_in_set(
|
2336
|
-
|
2526
|
+
if (node_tag_in_set(
|
2527
|
+
get_current_node(parser), (gumbo_tagset){TAG(H1), TAG(H2), TAG(H3),
|
2528
|
+
TAG(H4), TAG(H5), TAG(H6)})) {
|
2337
2529
|
parser_add_parse_error(parser, token);
|
2338
2530
|
pop_current_node(parser);
|
2339
2531
|
result = false;
|
2340
2532
|
}
|
2341
2533
|
insert_element_from_token(parser, token);
|
2342
2534
|
return result;
|
2343
|
-
} else if (tag_in(token, kStartTag, (gumbo_tagset)
|
2535
|
+
} else if (tag_in(token, kStartTag, (gumbo_tagset){TAG(PRE), TAG(LISTING)})) {
|
2344
2536
|
bool result = maybe_implicitly_close_p_tag(parser, token);
|
2345
2537
|
insert_element_from_token(parser, token);
|
2346
2538
|
state->_ignore_next_linefeed = true;
|
2347
2539
|
state->_frameset_ok = false;
|
2348
2540
|
return result;
|
2349
2541
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_FORM)) {
|
2350
|
-
if (state->_form_element != NULL
|
2542
|
+
if (state->_form_element != NULL &&
|
2543
|
+
!has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
|
2351
2544
|
gumbo_debug("Ignoring nested form.\n");
|
2352
2545
|
parser_add_parse_error(parser, token);
|
2353
2546
|
ignore_token(parser);
|
2354
2547
|
return false;
|
2355
2548
|
}
|
2356
2549
|
bool result = maybe_implicitly_close_p_tag(parser, token);
|
2357
|
-
|
2358
|
-
|
2550
|
+
GumboNode* form_element = insert_element_from_token(parser, token);
|
2551
|
+
if (!has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
|
2552
|
+
state->_form_element = form_element;
|
2553
|
+
}
|
2359
2554
|
return result;
|
2360
2555
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_LI)) {
|
2361
2556
|
maybe_implicitly_close_list_tag(parser, token, true);
|
2362
2557
|
bool result = maybe_implicitly_close_p_tag(parser, token);
|
2363
2558
|
insert_element_from_token(parser, token);
|
2364
2559
|
return result;
|
2365
|
-
|
2560
|
+
} else if (tag_in(token, kStartTag, (gumbo_tagset){TAG(DD), TAG(DT)})) {
|
2366
2561
|
maybe_implicitly_close_list_tag(parser, token, false);
|
2367
2562
|
bool result = maybe_implicitly_close_p_tag(parser, token);
|
2368
2563
|
insert_element_from_token(parser, token);
|
@@ -2375,7 +2570,8 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2375
2570
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_BUTTON)) {
|
2376
2571
|
if (has_an_element_in_scope(parser, GUMBO_TAG_BUTTON)) {
|
2377
2572
|
parser_add_parse_error(parser, token);
|
2378
|
-
implicitly_close_tags(
|
2573
|
+
implicitly_close_tags(
|
2574
|
+
parser, token, GUMBO_NAMESPACE_HTML, GUMBO_TAG_BUTTON);
|
2379
2575
|
state->_reprocess_current_token = true;
|
2380
2576
|
return false;
|
2381
2577
|
}
|
@@ -2383,45 +2579,63 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2383
2579
|
insert_element_from_token(parser, token);
|
2384
2580
|
state->_frameset_ok = false;
|
2385
2581
|
return true;
|
2386
|
-
|
2387
|
-
|
2388
|
-
|
2389
|
-
|
2390
|
-
|
2391
|
-
|
2582
|
+
} else if (tag_in(token, kEndTag,
|
2583
|
+
(gumbo_tagset){TAG(ADDRESS), TAG(ARTICLE), TAG(ASIDE),
|
2584
|
+
TAG(BLOCKQUOTE), TAG(BUTTON), TAG(CENTER), TAG(DETAILS),
|
2585
|
+
TAG(DIR), TAG(DIV), TAG(DL), TAG(FIELDSET),
|
2586
|
+
TAG(FIGCAPTION), TAG(FIGURE), TAG(FOOTER), TAG(HEADER),
|
2587
|
+
TAG(HGROUP), TAG(LISTING), TAG(MAIN), TAG(MENU), TAG(NAV),
|
2588
|
+
TAG(OL), TAG(PRE), TAG(SECTION), TAG(SUMMARY), TAG(UL)})) {
|
2392
2589
|
GumboTag tag = token->v.end_tag;
|
2393
2590
|
if (!has_an_element_in_scope(parser, tag)) {
|
2394
2591
|
parser_add_parse_error(parser, token);
|
2395
2592
|
ignore_token(parser);
|
2396
2593
|
return false;
|
2397
2594
|
}
|
2398
|
-
implicitly_close_tags(
|
2595
|
+
implicitly_close_tags(
|
2596
|
+
parser, token, GUMBO_NAMESPACE_HTML, token->v.end_tag);
|
2399
2597
|
return true;
|
2400
2598
|
} else if (tag_is(token, kEndTag, GUMBO_TAG_FORM)) {
|
2401
|
-
|
2402
|
-
|
2403
|
-
|
2404
|
-
|
2405
|
-
|
2406
|
-
|
2407
|
-
|
2408
|
-
|
2409
|
-
|
2410
|
-
|
2411
|
-
|
2412
|
-
|
2413
|
-
|
2414
|
-
|
2415
|
-
|
2416
|
-
|
2417
|
-
|
2599
|
+
if (has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
|
2600
|
+
if (!has_an_element_in_scope(parser, GUMBO_TAG_FORM)) {
|
2601
|
+
parser_add_parse_error(parser, token);
|
2602
|
+
ignore_token(parser);
|
2603
|
+
return false;
|
2604
|
+
}
|
2605
|
+
bool success = true;
|
2606
|
+
generate_implied_end_tags(parser, GUMBO_TAG_LAST);
|
2607
|
+
if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_FORM)) {
|
2608
|
+
parser_add_parse_error(parser, token);
|
2609
|
+
return false;
|
2610
|
+
}
|
2611
|
+
while (!node_html_tag_is(pop_current_node(parser), GUMBO_TAG_FORM))
|
2612
|
+
;
|
2613
|
+
return success;
|
2614
|
+
} else {
|
2615
|
+
bool result = true;
|
2616
|
+
const GumboNode* node = state->_form_element;
|
2617
|
+
assert(!node || node->type == GUMBO_NODE_ELEMENT);
|
2618
|
+
state->_form_element = NULL;
|
2619
|
+
if (!node || !has_node_in_scope(parser, node)) {
|
2620
|
+
gumbo_debug("Closing an unopened form.\n");
|
2621
|
+
parser_add_parse_error(parser, token);
|
2622
|
+
ignore_token(parser);
|
2623
|
+
return false;
|
2624
|
+
}
|
2625
|
+
// This differs from implicitly_close_tags because we remove *only* the
|
2626
|
+
// <form> element; other nodes are left in scope.
|
2627
|
+
generate_implied_end_tags(parser, GUMBO_TAG_LAST);
|
2628
|
+
if (get_current_node(parser) != node) {
|
2629
|
+
parser_add_parse_error(parser, token);
|
2630
|
+
result = false;
|
2631
|
+
}
|
2418
2632
|
|
2419
|
-
|
2420
|
-
|
2421
|
-
|
2422
|
-
|
2423
|
-
|
2424
|
-
|
2633
|
+
GumboVector* open_elements = &state->_open_elements;
|
2634
|
+
int index = gumbo_vector_index_of(open_elements, node);
|
2635
|
+
assert(index >= 0);
|
2636
|
+
gumbo_vector_remove_at(parser, index, open_elements);
|
2637
|
+
return result;
|
2638
|
+
}
|
2425
2639
|
} else if (tag_is(token, kEndTag, GUMBO_TAG_P)) {
|
2426
2640
|
if (!has_an_element_in_button_scope(parser, GUMBO_TAG_P)) {
|
2427
2641
|
parser_add_parse_error(parser, token);
|
@@ -2431,15 +2645,17 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2431
2645
|
state->_reprocess_current_token = true;
|
2432
2646
|
return false;
|
2433
2647
|
}
|
2434
|
-
return implicitly_close_tags(
|
2648
|
+
return implicitly_close_tags(
|
2649
|
+
parser, token, GUMBO_NAMESPACE_HTML, GUMBO_TAG_P);
|
2435
2650
|
} else if (tag_is(token, kEndTag, GUMBO_TAG_LI)) {
|
2436
2651
|
if (!has_an_element_in_list_scope(parser, GUMBO_TAG_LI)) {
|
2437
2652
|
parser_add_parse_error(parser, token);
|
2438
2653
|
ignore_token(parser);
|
2439
2654
|
return false;
|
2440
2655
|
}
|
2441
|
-
return implicitly_close_tags(
|
2442
|
-
|
2656
|
+
return implicitly_close_tags(
|
2657
|
+
parser, token, GUMBO_NAMESPACE_HTML, GUMBO_TAG_LI);
|
2658
|
+
} else if (tag_in(token, kEndTag, (gumbo_tagset){TAG(DD), TAG(DT)})) {
|
2443
2659
|
assert(token->type == GUMBO_TOKEN_END_TAG);
|
2444
2660
|
GumboTag token_tag = token->v.end_tag;
|
2445
2661
|
if (!has_an_element_in_scope(parser, token_tag)) {
|
@@ -2447,11 +2663,13 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2447
2663
|
ignore_token(parser);
|
2448
2664
|
return false;
|
2449
2665
|
}
|
2450
|
-
return implicitly_close_tags(
|
2451
|
-
|
2452
|
-
|
2453
|
-
|
2454
|
-
|
2666
|
+
return implicitly_close_tags(
|
2667
|
+
parser, token, GUMBO_NAMESPACE_HTML, token_tag);
|
2668
|
+
} else if (tag_in(token, kEndTag, (gumbo_tagset){TAG(H1), TAG(H2), TAG(H3),
|
2669
|
+
TAG(H4), TAG(H5), TAG(H6)})) {
|
2670
|
+
if (!has_an_element_in_scope_with_tagname(
|
2671
|
+
parser, 6, (GumboTag[]){GUMBO_TAG_H1, GUMBO_TAG_H2, GUMBO_TAG_H3,
|
2672
|
+
GUMBO_TAG_H4, GUMBO_TAG_H5, GUMBO_TAG_H6})) {
|
2455
2673
|
// No heading open; ignore the token entirely.
|
2456
2674
|
parser_add_parse_error(parser, token);
|
2457
2675
|
ignore_token(parser);
|
@@ -2469,8 +2687,9 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2469
2687
|
}
|
2470
2688
|
do {
|
2471
2689
|
current_node = pop_current_node(parser);
|
2472
|
-
} while (!node_tag_in_set(
|
2473
|
-
|
2690
|
+
} while (!node_tag_in_set(
|
2691
|
+
current_node, (gumbo_tagset){TAG(H1), TAG(H2), TAG(H3),
|
2692
|
+
TAG(H4), TAG(H5), TAG(H6)}));
|
2474
2693
|
return success;
|
2475
2694
|
}
|
2476
2695
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_A)) {
|
@@ -2488,18 +2707,17 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2488
2707
|
if (find_last_anchor_index(parser, &last_a)) {
|
2489
2708
|
void* last_element = gumbo_vector_remove_at(
|
2490
2709
|
parser, last_a, &state->_active_formatting_elements);
|
2491
|
-
gumbo_vector_remove(
|
2492
|
-
parser, last_element, &state->_open_elements);
|
2710
|
+
gumbo_vector_remove(parser, last_element, &state->_open_elements);
|
2493
2711
|
}
|
2494
2712
|
success = false;
|
2495
2713
|
}
|
2496
2714
|
reconstruct_active_formatting_elements(parser);
|
2497
2715
|
add_formatting_element(parser, insert_element_from_token(parser, token));
|
2498
2716
|
return success;
|
2499
|
-
|
2500
|
-
|
2501
|
-
|
2502
|
-
|
2717
|
+
} else if (tag_in(token, kStartTag,
|
2718
|
+
(gumbo_tagset){TAG(B), TAG(BIG), TAG(CODE), TAG(EM), TAG(FONT),
|
2719
|
+
TAG(I), TAG(S), TAG(SMALL), TAG(STRIKE), TAG(STRONG),
|
2720
|
+
TAG(TT), TAG(U)})) {
|
2503
2721
|
reconstruct_active_formatting_elements(parser);
|
2504
2722
|
add_formatting_element(parser, insert_element_from_token(parser, token));
|
2505
2723
|
return true;
|
@@ -2515,20 +2733,20 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2515
2733
|
insert_element_from_token(parser, token);
|
2516
2734
|
add_formatting_element(parser, get_current_node(parser));
|
2517
2735
|
return result;
|
2518
|
-
|
2519
|
-
|
2520
|
-
|
2521
|
-
|
2736
|
+
} else if (tag_in(token, kEndTag,
|
2737
|
+
(gumbo_tagset){TAG(A), TAG(B), TAG(BIG), TAG(CODE), TAG(EM),
|
2738
|
+
TAG(FONT), TAG(I), TAG(NOBR), TAG(S), TAG(SMALL),
|
2739
|
+
TAG(STRIKE), TAG(STRONG), TAG(TT), TAG(U)})) {
|
2522
2740
|
return adoption_agency_algorithm(parser, token, token->v.end_tag);
|
2523
|
-
|
2524
|
-
|
2741
|
+
} else if (tag_in(token, kStartTag,
|
2742
|
+
(gumbo_tagset){TAG(APPLET), TAG(MARQUEE), TAG(OBJECT)})) {
|
2525
2743
|
reconstruct_active_formatting_elements(parser);
|
2526
2744
|
insert_element_from_token(parser, token);
|
2527
2745
|
add_formatting_element(parser, &kActiveFormattingScopeMarker);
|
2528
2746
|
set_frameset_not_ok(parser);
|
2529
2747
|
return true;
|
2530
|
-
|
2531
|
-
|
2748
|
+
} else if (tag_in(token, kEndTag,
|
2749
|
+
(gumbo_tagset){TAG(APPLET), TAG(MARQUEE), TAG(OBJECT)})) {
|
2532
2750
|
GumboTag token_tag = token->v.end_tag;
|
2533
2751
|
if (!has_an_element_in_table_scope(parser, token_tag)) {
|
2534
2752
|
parser_add_parse_error(parser, token);
|
@@ -2547,8 +2765,9 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2547
2765
|
set_frameset_not_ok(parser);
|
2548
2766
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
|
2549
2767
|
return true;
|
2550
|
-
|
2551
|
-
|
2768
|
+
} else if (tag_in(token, kStartTag,
|
2769
|
+
(gumbo_tagset){TAG(AREA), TAG(BR), TAG(EMBED), TAG(IMG),
|
2770
|
+
TAG(IMAGE), TAG(KEYGEN), TAG(WBR)})) {
|
2552
2771
|
bool success = true;
|
2553
2772
|
if (tag_is(token, kStartTag, GUMBO_TAG_IMAGE)) {
|
2554
2773
|
success = false;
|
@@ -2578,7 +2797,8 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2578
2797
|
pop_current_node(parser);
|
2579
2798
|
acknowledge_self_closing_tag(parser);
|
2580
2799
|
return true;
|
2581
|
-
|
2800
|
+
} else if (tag_in(token, kStartTag,
|
2801
|
+
(gumbo_tagset){TAG(PARAM), TAG(SOURCE), TAG(TRACK)})) {
|
2582
2802
|
insert_element_from_token(parser, token);
|
2583
2803
|
pop_current_node(parser);
|
2584
2804
|
acknowledge_self_closing_tag(parser);
|
@@ -2592,7 +2812,8 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2592
2812
|
return result;
|
2593
2813
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_ISINDEX)) {
|
2594
2814
|
parser_add_parse_error(parser, token);
|
2595
|
-
if (parser->_parser_state->_form_element != NULL
|
2815
|
+
if (parser->_parser_state->_form_element != NULL &&
|
2816
|
+
!has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
|
2596
2817
|
ignore_token(parser);
|
2597
2818
|
return false;
|
2598
2819
|
}
|
@@ -2607,15 +2828,18 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2607
2828
|
|
2608
2829
|
GumboNode* form = insert_element_of_tag_type(
|
2609
2830
|
parser, GUMBO_TAG_FORM, GUMBO_INSERTION_FROM_ISINDEX);
|
2831
|
+
if (!has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
|
2832
|
+
parser->_parser_state->_form_element = form;
|
2833
|
+
}
|
2610
2834
|
if (action_attr) {
|
2611
2835
|
gumbo_vector_add(parser, action_attr, &form->v.element.attributes);
|
2612
2836
|
}
|
2613
|
-
insert_element_of_tag_type(
|
2614
|
-
|
2615
|
-
pop_current_node(parser);
|
2837
|
+
insert_element_of_tag_type(
|
2838
|
+
parser, GUMBO_TAG_HR, GUMBO_INSERTION_FROM_ISINDEX);
|
2839
|
+
pop_current_node(parser); // <hr>
|
2616
2840
|
|
2617
|
-
insert_element_of_tag_type(
|
2618
|
-
|
2841
|
+
insert_element_of_tag_type(
|
2842
|
+
parser, GUMBO_TAG_LABEL, GUMBO_INSERTION_FROM_ISINDEX);
|
2619
2843
|
TextNodeBufferState* text_state = &parser->_parser_state->_text_node;
|
2620
2844
|
text_state->_start_original_text = token->original_text.data;
|
2621
2845
|
text_state->_start_position = token->position;
|
@@ -2628,15 +2852,15 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2628
2852
|
text_state->_buffer.capacity = prompt_attr_length + 1;
|
2629
2853
|
gumbo_destroy_attribute(parser, prompt_attr);
|
2630
2854
|
} else {
|
2631
|
-
GumboStringPiece prompt_text =
|
2632
|
-
"This is a searchable index. Enter search keywords: ");
|
2855
|
+
GumboStringPiece prompt_text =
|
2856
|
+
GUMBO_STRING("This is a searchable index. Enter search keywords: ");
|
2633
2857
|
gumbo_string_buffer_append_string(
|
2634
2858
|
parser, &prompt_text, &text_state->_buffer);
|
2635
2859
|
}
|
2636
2860
|
|
2637
2861
|
GumboNode* input = insert_element_of_tag_type(
|
2638
2862
|
parser, GUMBO_TAG_INPUT, GUMBO_INSERTION_FROM_ISINDEX);
|
2639
|
-
for (int i = 0; i < token_attrs->length; ++i) {
|
2863
|
+
for (unsigned int i = 0; i < token_attrs->length; ++i) {
|
2640
2864
|
GumboAttribute* attr = token_attrs->data[i];
|
2641
2865
|
if (attr != prompt_attr && attr != action_attr && attr != name_attr) {
|
2642
2866
|
gumbo_vector_add(parser, attr, &input->v.element.attributes);
|
@@ -2649,6 +2873,13 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2649
2873
|
// touching the attributes.
|
2650
2874
|
ignore_token(parser);
|
2651
2875
|
|
2876
|
+
// The name attribute, if present, should be destroyed since it's ignored
|
2877
|
+
// when copying over. The action attribute should be kept since it's moved
|
2878
|
+
// to the form.
|
2879
|
+
if (name_attr) {
|
2880
|
+
gumbo_destroy_attribute(parser, name_attr);
|
2881
|
+
}
|
2882
|
+
|
2652
2883
|
GumboAttribute* name =
|
2653
2884
|
gumbo_parser_allocate(parser, sizeof(GumboAttribute));
|
2654
2885
|
GumboStringPiece name_str = GUMBO_STRING("name");
|
@@ -2664,12 +2895,15 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2664
2895
|
name->value_end = kGumboEmptySourcePosition;
|
2665
2896
|
gumbo_vector_add(parser, name, &input->v.element.attributes);
|
2666
2897
|
|
2667
|
-
pop_current_node(parser);
|
2668
|
-
pop_current_node(parser);
|
2898
|
+
pop_current_node(parser); // <input>
|
2899
|
+
pop_current_node(parser); // <label>
|
2669
2900
|
insert_element_of_tag_type(
|
2670
2901
|
parser, GUMBO_TAG_HR, GUMBO_INSERTION_FROM_ISINDEX);
|
2671
|
-
pop_current_node(parser);
|
2672
|
-
pop_current_node(parser);
|
2902
|
+
pop_current_node(parser); // <hr>
|
2903
|
+
pop_current_node(parser); // <form>
|
2904
|
+
if (!has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
|
2905
|
+
parser->_parser_state->_form_element = NULL;
|
2906
|
+
}
|
2673
2907
|
return false;
|
2674
2908
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_TEXTAREA)) {
|
2675
2909
|
run_generic_parsing_algorithm(parser, token, GUMBO_LEX_RCDATA);
|
@@ -2704,19 +2938,27 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2704
2938
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_SELECT);
|
2705
2939
|
}
|
2706
2940
|
return true;
|
2707
|
-
|
2941
|
+
} else if (tag_in(token, kStartTag,
|
2942
|
+
(gumbo_tagset){TAG(OPTION), TAG(OPTGROUP)})) {
|
2708
2943
|
if (node_html_tag_is(get_current_node(parser), GUMBO_TAG_OPTION)) {
|
2709
2944
|
pop_current_node(parser);
|
2710
2945
|
}
|
2711
2946
|
reconstruct_active_formatting_elements(parser);
|
2712
2947
|
insert_element_from_token(parser, token);
|
2713
2948
|
return true;
|
2714
|
-
|
2949
|
+
} else if (tag_in(token, kStartTag,
|
2950
|
+
(gumbo_tagset){TAG(RB), TAG(RP), TAG(RT), TAG(RTC)})) {
|
2715
2951
|
bool success = true;
|
2952
|
+
GumboTag exception =
|
2953
|
+
tag_in(token, kStartTag, (gumbo_tagset){TAG(RT), TAG(RP)})
|
2954
|
+
? GUMBO_TAG_RTC
|
2955
|
+
: GUMBO_TAG_LAST;
|
2716
2956
|
if (has_an_element_in_scope(parser, GUMBO_TAG_RUBY)) {
|
2717
|
-
generate_implied_end_tags(parser,
|
2957
|
+
generate_implied_end_tags(parser, exception);
|
2718
2958
|
}
|
2719
|
-
if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_RUBY)
|
2959
|
+
if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_RUBY) &&
|
2960
|
+
!(exception == GUMBO_TAG_LAST ||
|
2961
|
+
node_html_tag_is(get_current_node(parser), GUMBO_TAG_RTC))) {
|
2720
2962
|
parser_add_parse_error(parser, token);
|
2721
2963
|
success = false;
|
2722
2964
|
}
|
@@ -2749,10 +2991,10 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2749
2991
|
acknowledge_self_closing_tag(parser);
|
2750
2992
|
}
|
2751
2993
|
return true;
|
2752
|
-
|
2753
|
-
|
2754
|
-
|
2755
|
-
|
2994
|
+
} else if (tag_in(token, kStartTag,
|
2995
|
+
(gumbo_tagset){TAG(CAPTION), TAG(COL), TAG(COLGROUP),
|
2996
|
+
TAG(FRAME), TAG(HEAD), TAG(TBODY), TAG(TD), TAG(TFOOT),
|
2997
|
+
TAG(TH), TAG(THEAD), TAG(TR)})) {
|
2756
2998
|
parser_add_parse_error(parser, token);
|
2757
2999
|
ignore_token(parser);
|
2758
3000
|
return false;
|
@@ -2771,14 +3013,15 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2771
3013
|
// If we see a), implicitly close everything up to and including it. If we
|
2772
3014
|
// see b), then record a parse error, don't close anything (except the
|
2773
3015
|
// implied end tags) and ignore the end tag token.
|
2774
|
-
for (int i = state->_open_elements.length; --i >= 0;
|
3016
|
+
for (int i = state->_open_elements.length; --i >= 0;) {
|
2775
3017
|
const GumboNode* node = state->_open_elements.data[i];
|
2776
3018
|
if (node_html_tag_is(node, end_tag)) {
|
2777
3019
|
generate_implied_end_tags(parser, end_tag);
|
2778
3020
|
// TODO(jdtang): Do I need to add a parse error here? The condition in
|
2779
3021
|
// the spec seems like it's the inverse of the loop condition above, and
|
2780
3022
|
// so would never fire.
|
2781
|
-
while (node != pop_current_node(parser))
|
3023
|
+
while (node != pop_current_node(parser))
|
3024
|
+
; // Pop everything.
|
2782
3025
|
return true;
|
2783
3026
|
} else if (is_special_node(node)) {
|
2784
3027
|
parser_add_parse_error(parser, token);
|
@@ -2794,7 +3037,8 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2794
3037
|
|
2795
3038
|
// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#parsing-main-incdata
|
2796
3039
|
static bool handle_text(GumboParser* parser, GumboToken* token) {
|
2797
|
-
if (token->type == GUMBO_TOKEN_CHARACTER ||
|
3040
|
+
if (token->type == GUMBO_TOKEN_CHARACTER ||
|
3041
|
+
token->type == GUMBO_TOKEN_WHITESPACE) {
|
2798
3042
|
insert_text_token(parser, token);
|
2799
3043
|
} else {
|
2800
3044
|
// We provide only bare-bones script handling that doesn't involve any of
|
@@ -2854,11 +3098,12 @@ static bool handle_in_table(GumboParser* parser, GumboToken* token) {
|
|
2854
3098
|
parser->_parser_state->_reprocess_current_token = true;
|
2855
3099
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_COLUMN_GROUP);
|
2856
3100
|
return true;
|
2857
|
-
} else if (tag_in(token, kStartTag,
|
2858
|
-
|
3101
|
+
} else if (tag_in(token, kStartTag,
|
3102
|
+
(gumbo_tagset){TAG(TBODY), TAG(TFOOT), TAG(THEAD), TAG(TD),
|
3103
|
+
TAG(TH), TAG(TR)})) {
|
2859
3104
|
clear_stack_to_table_context(parser);
|
2860
3105
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE_BODY);
|
2861
|
-
if (tag_in(token, kStartTag, (gumbo_tagset)
|
3106
|
+
if (tag_in(token, kStartTag, (gumbo_tagset){TAG(TD), TAG(TH), TAG(TR)})) {
|
2862
3107
|
insert_element_of_tag_type(
|
2863
3108
|
parser, GUMBO_TAG_TBODY, GUMBO_INSERTION_IMPLIED);
|
2864
3109
|
state->_reprocess_current_token = true;
|
@@ -2880,25 +3125,27 @@ static bool handle_in_table(GumboParser* parser, GumboToken* token) {
|
|
2880
3125
|
return false;
|
2881
3126
|
}
|
2882
3127
|
return true;
|
2883
|
-
} else if (tag_in(token, kEndTag,
|
2884
|
-
|
2885
|
-
|
2886
|
-
|
3128
|
+
} else if (tag_in(token, kEndTag,
|
3129
|
+
(gumbo_tagset){TAG(BODY), TAG(CAPTION), TAG(COL),
|
3130
|
+
TAG(COLGROUP), TAG(HTML), TAG(TBODY), TAG(TD), TAG(TFOOT),
|
3131
|
+
TAG(TH), TAG(THEAD), TAG(TR)})) {
|
2887
3132
|
parser_add_parse_error(parser, token);
|
2888
3133
|
ignore_token(parser);
|
2889
3134
|
return false;
|
2890
|
-
} else if (tag_in(token, kStartTag,
|
3135
|
+
} else if (tag_in(token, kStartTag,
|
3136
|
+
(gumbo_tagset){TAG(STYLE), TAG(SCRIPT), TAG(TEMPLATE)}) ||
|
3137
|
+
(tag_is(token, kEndTag, GUMBO_TAG_TEMPLATE))) {
|
2891
3138
|
return handle_in_head(parser, token);
|
2892
3139
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_INPUT) &&
|
2893
|
-
attribute_matches(
|
2894
|
-
|
3140
|
+
attribute_matches(
|
3141
|
+
&token->v.start_tag.attributes, "type", "hidden")) {
|
2895
3142
|
parser_add_parse_error(parser, token);
|
2896
3143
|
insert_element_from_token(parser, token);
|
2897
3144
|
pop_current_node(parser);
|
2898
3145
|
return false;
|
2899
3146
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_FORM)) {
|
2900
3147
|
parser_add_parse_error(parser, token);
|
2901
|
-
if (state->_form_element) {
|
3148
|
+
if (state->_form_element || has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
|
2902
3149
|
ignore_token(parser);
|
2903
3150
|
return false;
|
2904
3151
|
}
|
@@ -2906,11 +3153,7 @@ static bool handle_in_table(GumboParser* parser, GumboToken* token) {
|
|
2906
3153
|
pop_current_node(parser);
|
2907
3154
|
return false;
|
2908
3155
|
} else if (token->type == GUMBO_TOKEN_EOF) {
|
2909
|
-
|
2910
|
-
parser_add_parse_error(parser, token);
|
2911
|
-
return false;
|
2912
|
-
}
|
2913
|
-
return true;
|
3156
|
+
return handle_in_body(parser, token);
|
2914
3157
|
} else {
|
2915
3158
|
parser_add_parse_error(parser, token);
|
2916
3159
|
state->_foster_parent_insertions = true;
|
@@ -2938,8 +3181,9 @@ static bool handle_in_table_text(GumboParser* parser, GumboToken* token) {
|
|
2938
3181
|
// Note that TextNodeBuffer may contain UTF-8 characters, but the presence
|
2939
3182
|
// of any one byte that is not whitespace means we flip the flag, so this
|
2940
3183
|
// loop is still valid.
|
2941
|
-
for (int i = 0; i < buffer->length; ++i) {
|
2942
|
-
if (!isspace((unsigned char)buffer->data[i]) ||
|
3184
|
+
for (unsigned int i = 0; i < buffer->length; ++i) {
|
3185
|
+
if (!isspace((unsigned char) buffer->data[i]) ||
|
3186
|
+
buffer->data[i] == '\v') {
|
2943
3187
|
state->_foster_parent_insertions = true;
|
2944
3188
|
reconstruct_active_formatting_elements(parser);
|
2945
3189
|
break;
|
@@ -2955,35 +3199,43 @@ static bool handle_in_table_text(GumboParser* parser, GumboToken* token) {
|
|
2955
3199
|
|
2956
3200
|
// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#parsing-main-incaption
|
2957
3201
|
static bool handle_in_caption(GumboParser* parser, GumboToken* token) {
|
2958
|
-
if (
|
2959
|
-
TAG(COLGROUP), TAG(TBODY), TAG(TD),
|
2960
|
-
TAG(TFOOT), TAG(TH), TAG(THEAD), TAG(TR) }) ||
|
2961
|
-
tag_in(token, kEndTag, (gumbo_tagset) { TAG(CAPTION), TAG(TABLE) })) {
|
3202
|
+
if (tag_is(token, kEndTag, GUMBO_TAG_CAPTION)) {
|
2962
3203
|
if (!has_an_element_in_table_scope(parser, GUMBO_TAG_CAPTION)) {
|
2963
3204
|
parser_add_parse_error(parser, token);
|
2964
3205
|
ignore_token(parser);
|
2965
3206
|
return false;
|
3207
|
+
} else {
|
3208
|
+
generate_implied_end_tags(parser, GUMBO_TAG_LAST);
|
3209
|
+
bool result = true;
|
3210
|
+
if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_CAPTION)) {
|
3211
|
+
parser_add_parse_error(parser, token);
|
3212
|
+
}
|
3213
|
+
while (!node_html_tag_is(pop_current_node(parser), GUMBO_TAG_CAPTION))
|
3214
|
+
;
|
3215
|
+
clear_active_formatting_elements(parser);
|
3216
|
+
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
|
3217
|
+
return result;
|
2966
3218
|
}
|
2967
|
-
|
2968
|
-
|
2969
|
-
|
2970
|
-
|
2971
|
-
|
2972
|
-
|
2973
|
-
if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_CAPTION)) {
|
3219
|
+
} else if (tag_in(token, kStartTag,
|
3220
|
+
(gumbo_tagset){TAG(CAPTION), TAG(COL), TAG(COLGROUP),
|
3221
|
+
TAG(TBODY), TAG(TD), TAG(TFOOT), TAG(TH), TAG(THEAD),
|
3222
|
+
TAG(TR)}) ||
|
3223
|
+
(tag_is(token, kEndTag, GUMBO_TAG_TABLE))) {
|
3224
|
+
if (!has_an_element_in_table_scope(parser, GUMBO_TAG_CAPTION)) {
|
2974
3225
|
parser_add_parse_error(parser, token);
|
2975
|
-
|
2976
|
-
|
2977
|
-
}
|
2978
|
-
result = false;
|
3226
|
+
ignore_token(parser);
|
3227
|
+
return false;
|
2979
3228
|
}
|
2980
|
-
pop_current_node(parser)
|
3229
|
+
while (!node_html_tag_is(pop_current_node(parser), GUMBO_TAG_CAPTION))
|
3230
|
+
;
|
2981
3231
|
clear_active_formatting_elements(parser);
|
2982
3232
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
|
2983
|
-
|
2984
|
-
|
2985
|
-
|
2986
|
-
|
3233
|
+
parser->_parser_state->_reprocess_current_token = true;
|
3234
|
+
return true;
|
3235
|
+
} else if (tag_in(token, kEndTag,
|
3236
|
+
(gumbo_tagset){TAG(BODY), TAG(COL), TAG(COLGROUP), TAG(HTML),
|
3237
|
+
TAG(TBODY), TAG(TD), TAG(TFOOT), TAG(TH), TAG(THEAD),
|
3238
|
+
TAG(TR)})) {
|
2987
3239
|
parser_add_parse_error(parser, token);
|
2988
3240
|
ignore_token(parser);
|
2989
3241
|
return false;
|
@@ -3011,24 +3263,33 @@ static bool handle_in_column_group(GumboParser* parser, GumboToken* token) {
|
|
3011
3263
|
pop_current_node(parser);
|
3012
3264
|
acknowledge_self_closing_tag(parser);
|
3013
3265
|
return true;
|
3266
|
+
} else if (tag_is(token, kEndTag, GUMBO_TAG_COLGROUP)) {
|
3267
|
+
if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_COLGROUP)) {
|
3268
|
+
parser_add_parse_error(parser, token);
|
3269
|
+
ignore_token(parser);
|
3270
|
+
return false;
|
3271
|
+
}
|
3272
|
+
pop_current_node(parser);
|
3273
|
+
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
|
3274
|
+
return false;
|
3014
3275
|
} else if (tag_is(token, kEndTag, GUMBO_TAG_COL)) {
|
3015
3276
|
parser_add_parse_error(parser, token);
|
3016
3277
|
ignore_token(parser);
|
3017
3278
|
return false;
|
3018
|
-
} else if (token
|
3019
|
-
|
3020
|
-
return
|
3279
|
+
} else if (tag_is(token, kStartTag, GUMBO_TAG_TEMPLATE) ||
|
3280
|
+
tag_is(token, kEndTag, GUMBO_TAG_TEMPLATE)) {
|
3281
|
+
return handle_in_head(parser, token);
|
3282
|
+
} else if (token->type == GUMBO_TOKEN_EOF) {
|
3283
|
+
return handle_in_body(parser, token);
|
3021
3284
|
} else {
|
3022
|
-
if (get_current_node(parser)
|
3285
|
+
if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_COLGROUP)) {
|
3023
3286
|
parser_add_parse_error(parser, token);
|
3287
|
+
ignore_token(parser);
|
3024
3288
|
return false;
|
3025
3289
|
}
|
3026
|
-
assert(node_html_tag_is(get_current_node(parser), GUMBO_TAG_COLGROUP));
|
3027
3290
|
pop_current_node(parser);
|
3028
3291
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
|
3029
|
-
|
3030
|
-
parser->_parser_state->_reprocess_current_token = true;
|
3031
|
-
}
|
3292
|
+
parser->_parser_state->_reprocess_current_token = true;
|
3032
3293
|
return true;
|
3033
3294
|
}
|
3034
3295
|
}
|
@@ -3040,14 +3301,15 @@ static bool handle_in_table_body(GumboParser* parser, GumboToken* token) {
|
|
3040
3301
|
insert_element_from_token(parser, token);
|
3041
3302
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_ROW);
|
3042
3303
|
return true;
|
3043
|
-
} else if (tag_in(token, kStartTag, (gumbo_tagset)
|
3304
|
+
} else if (tag_in(token, kStartTag, (gumbo_tagset){TAG(TD), TAG(TH)})) {
|
3044
3305
|
parser_add_parse_error(parser, token);
|
3045
3306
|
clear_stack_to_table_body_context(parser);
|
3046
3307
|
insert_element_of_tag_type(parser, GUMBO_TAG_TR, GUMBO_INSERTION_IMPLIED);
|
3047
3308
|
parser->_parser_state->_reprocess_current_token = true;
|
3048
3309
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_ROW);
|
3049
3310
|
return false;
|
3050
|
-
} else if (tag_in(token, kEndTag,
|
3311
|
+
} else if (tag_in(token, kEndTag,
|
3312
|
+
(gumbo_tagset){TAG(TBODY), TAG(TFOOT), TAG(THEAD)})) {
|
3051
3313
|
if (!has_an_element_in_table_scope(parser, token->v.end_tag)) {
|
3052
3314
|
parser_add_parse_error(parser, token);
|
3053
3315
|
ignore_token(parser);
|
@@ -3057,12 +3319,13 @@ static bool handle_in_table_body(GumboParser* parser, GumboToken* token) {
|
|
3057
3319
|
pop_current_node(parser);
|
3058
3320
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
|
3059
3321
|
return true;
|
3060
|
-
} else if (tag_in(token, kStartTag,
|
3061
|
-
|
3322
|
+
} else if (tag_in(token, kStartTag,
|
3323
|
+
(gumbo_tagset){TAG(CAPTION), TAG(COL), TAG(COLGROUP),
|
3324
|
+
TAG(TBODY), TAG(TFOOT), TAG(THEAD)}) ||
|
3062
3325
|
tag_is(token, kEndTag, GUMBO_TAG_TABLE)) {
|
3063
3326
|
if (!(has_an_element_in_table_scope(parser, GUMBO_TAG_TBODY) ||
|
3064
|
-
|
3065
|
-
|
3327
|
+
has_an_element_in_table_scope(parser, GUMBO_TAG_THEAD) ||
|
3328
|
+
has_an_element_in_table_scope(parser, GUMBO_TAG_TFOOT))) {
|
3066
3329
|
parser_add_parse_error(parser, token);
|
3067
3330
|
ignore_token(parser);
|
3068
3331
|
return false;
|
@@ -3072,9 +3335,9 @@ static bool handle_in_table_body(GumboParser* parser, GumboToken* token) {
|
|
3072
3335
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
|
3073
3336
|
parser->_parser_state->_reprocess_current_token = true;
|
3074
3337
|
return true;
|
3075
|
-
} else if (tag_in(token, kEndTag,
|
3076
|
-
|
3077
|
-
|
3338
|
+
} else if (tag_in(token, kEndTag,
|
3339
|
+
(gumbo_tagset){TAG(BODY), TAG(CAPTION), TAG(COL), TAG(TR),
|
3340
|
+
TAG(COLGROUP), TAG(HTML), TAG(TD), TAG(TH)})) {
|
3078
3341
|
parser_add_parse_error(parser, token);
|
3079
3342
|
ignore_token(parser);
|
3080
3343
|
return false;
|
@@ -3085,45 +3348,55 @@ static bool handle_in_table_body(GumboParser* parser, GumboToken* token) {
|
|
3085
3348
|
|
3086
3349
|
// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#parsing-main-intr
|
3087
3350
|
static bool handle_in_row(GumboParser* parser, GumboToken* token) {
|
3088
|
-
if (tag_in(token, kStartTag, (gumbo_tagset)
|
3351
|
+
if (tag_in(token, kStartTag, (gumbo_tagset){TAG(TH), TAG(TD)})) {
|
3089
3352
|
clear_stack_to_table_row_context(parser);
|
3090
3353
|
insert_element_from_token(parser, token);
|
3091
3354
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_CELL);
|
3092
3355
|
add_formatting_element(parser, &kActiveFormattingScopeMarker);
|
3093
3356
|
return true;
|
3094
|
-
} else if (
|
3095
|
-
|
3096
|
-
tag_in(token, kEndTag, (gumbo_tagset) { TAG(TR), TAG(TABLE),
|
3097
|
-
TAG(TBODY), TAG(TFOOT), TAG(THEAD) })) {
|
3098
|
-
// This case covers 4 clauses of the spec, each of which say "Otherwise, act
|
3099
|
-
// as if an end tag with the tag name "tr" had been seen." The differences
|
3100
|
-
// are in error handling and whether the current token is reprocessed.
|
3101
|
-
GumboTag desired_tag =
|
3102
|
-
tag_in(token, kEndTag, (gumbo_tagset) { TAG(TBODY), TAG(TFOOT),
|
3103
|
-
TAG(THEAD) })
|
3104
|
-
? token->v.end_tag : GUMBO_TAG_TR;
|
3105
|
-
if (!has_an_element_in_table_scope(parser, desired_tag)) {
|
3106
|
-
gumbo_debug("Bailing because there is no tag %s in table scope.\nOpen elements:",
|
3107
|
-
gumbo_normalized_tagname(desired_tag));
|
3108
|
-
for (int i = 0; i < parser->_parser_state->_open_elements.length; ++i) {
|
3109
|
-
const GumboNode* node = parser->_parser_state->_open_elements.data[i];
|
3110
|
-
gumbo_debug("%s\n", gumbo_normalized_tagname(node->v.element.tag));
|
3111
|
-
}
|
3357
|
+
} else if (tag_is(token, kEndTag, GUMBO_TAG_TR)) {
|
3358
|
+
if (!has_an_element_in_table_scope(parser, GUMBO_TAG_TR)) {
|
3112
3359
|
parser_add_parse_error(parser, token);
|
3113
3360
|
ignore_token(parser);
|
3114
3361
|
return false;
|
3362
|
+
} else {
|
3363
|
+
clear_stack_to_table_row_context(parser);
|
3364
|
+
pop_current_node(parser);
|
3365
|
+
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE_BODY);
|
3366
|
+
return true;
|
3115
3367
|
}
|
3116
|
-
|
3117
|
-
|
3118
|
-
|
3119
|
-
|
3120
|
-
|
3121
|
-
|
3368
|
+
} else if (tag_in(token, kStartTag,
|
3369
|
+
(gumbo_tagset){TAG(CAPTION), TAG(COL), TAG(COLGROUP),
|
3370
|
+
TAG(TBODY), TAG(TFOOT), TAG(THEAD), TAG(TR)}) ||
|
3371
|
+
tag_is(token, kEndTag, GUMBO_TAG_TABLE)) {
|
3372
|
+
if (!has_an_element_in_table_scope(parser, GUMBO_TAG_TR)) {
|
3373
|
+
parser_add_parse_error(parser, token);
|
3374
|
+
ignore_token(parser);
|
3375
|
+
return false;
|
3376
|
+
} else {
|
3377
|
+
clear_stack_to_table_row_context(parser);
|
3378
|
+
pop_current_node(parser);
|
3379
|
+
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE_BODY);
|
3122
3380
|
parser->_parser_state->_reprocess_current_token = true;
|
3381
|
+
return true;
|
3123
3382
|
}
|
3124
|
-
|
3125
|
-
|
3126
|
-
|
3383
|
+
} else if (tag_in(token, kEndTag,
|
3384
|
+
(gumbo_tagset){TAG(TBODY), TAG(TFOOT), TAG(THEAD)})) {
|
3385
|
+
if (!has_an_element_in_table_scope(parser, token->v.end_tag) ||
|
3386
|
+
(!has_an_element_in_table_scope(parser, GUMBO_TAG_TR))) {
|
3387
|
+
parser_add_parse_error(parser, token);
|
3388
|
+
ignore_token(parser);
|
3389
|
+
return false;
|
3390
|
+
} else {
|
3391
|
+
clear_stack_to_table_row_context(parser);
|
3392
|
+
pop_current_node(parser);
|
3393
|
+
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE_BODY);
|
3394
|
+
parser->_parser_state->_reprocess_current_token = true;
|
3395
|
+
return true;
|
3396
|
+
}
|
3397
|
+
} else if (tag_in(token, kEndTag,
|
3398
|
+
(gumbo_tagset){TAG(BODY), TAG(CAPTION), TAG(COL),
|
3399
|
+
TAG(COLGROUP), TAG(HTML), TAG(TD), TAG(TH)})) {
|
3127
3400
|
parser_add_parse_error(parser, token);
|
3128
3401
|
ignore_token(parser);
|
3129
3402
|
return false;
|
@@ -3134,16 +3407,18 @@ static bool handle_in_row(GumboParser* parser, GumboToken* token) {
|
|
3134
3407
|
|
3135
3408
|
// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#parsing-main-intd
|
3136
3409
|
static bool handle_in_cell(GumboParser* parser, GumboToken* token) {
|
3137
|
-
if (tag_in(token, kEndTag, (gumbo_tagset)
|
3410
|
+
if (tag_in(token, kEndTag, (gumbo_tagset){TAG(TD), TAG(TH)})) {
|
3138
3411
|
GumboTag token_tag = token->v.end_tag;
|
3139
3412
|
if (!has_an_element_in_table_scope(parser, token_tag)) {
|
3140
3413
|
parser_add_parse_error(parser, token);
|
3414
|
+
ignore_token(parser);
|
3141
3415
|
return false;
|
3142
3416
|
}
|
3143
3417
|
return close_table_cell(parser, token, token_tag);
|
3144
|
-
} else if (tag_in(token, kStartTag,
|
3145
|
-
|
3146
|
-
|
3418
|
+
} else if (tag_in(token, kStartTag,
|
3419
|
+
(gumbo_tagset){TAG(CAPTION), TAG(COL), TAG(COLGROUP),
|
3420
|
+
TAG(TBODY), TAG(TD), TAG(TFOOT), TAG(TH), TAG(THEAD),
|
3421
|
+
TAG(TR)})) {
|
3147
3422
|
gumbo_debug("Handling <td> in cell.\n");
|
3148
3423
|
if (!has_an_element_in_table_scope(parser, GUMBO_TAG_TH) &&
|
3149
3424
|
!has_an_element_in_table_scope(parser, GUMBO_TAG_TD)) {
|
@@ -3154,13 +3429,13 @@ static bool handle_in_cell(GumboParser* parser, GumboToken* token) {
|
|
3154
3429
|
}
|
3155
3430
|
parser->_parser_state->_reprocess_current_token = true;
|
3156
3431
|
return close_current_cell(parser, token);
|
3157
|
-
} else if (tag_in(token, kEndTag, (gumbo_tagset)
|
3158
|
-
|
3432
|
+
} else if (tag_in(token, kEndTag, (gumbo_tagset){TAG(BODY), TAG(CAPTION),
|
3433
|
+
TAG(COL), TAG(COLGROUP), TAG(HTML)})) {
|
3159
3434
|
parser_add_parse_error(parser, token);
|
3160
3435
|
ignore_token(parser);
|
3161
3436
|
return false;
|
3162
|
-
} else if (tag_in(token, kEndTag, (gumbo_tagset)
|
3163
|
-
|
3437
|
+
} else if (tag_in(token, kEndTag, (gumbo_tagset){TAG(TABLE), TAG(TBODY),
|
3438
|
+
TAG(TFOOT), TAG(THEAD), TAG(TR)})) {
|
3164
3439
|
if (!has_an_element_in_table_scope(parser, token->v.end_tag)) {
|
3165
3440
|
parser_add_parse_error(parser, token);
|
3166
3441
|
ignore_token(parser);
|
@@ -3211,7 +3486,7 @@ static bool handle_in_select(GumboParser* parser, GumboToken* token) {
|
|
3211
3486
|
GumboVector* open_elements = &parser->_parser_state->_open_elements;
|
3212
3487
|
if (node_html_tag_is(get_current_node(parser), GUMBO_TAG_OPTION) &&
|
3213
3488
|
node_html_tag_is(open_elements->data[open_elements->length - 2],
|
3214
|
-
|
3489
|
+
GUMBO_TAG_OPTGROUP)) {
|
3215
3490
|
pop_current_node(parser);
|
3216
3491
|
}
|
3217
3492
|
if (node_html_tag_is(get_current_node(parser), GUMBO_TAG_OPTGROUP)) {
|
@@ -3242,9 +3517,12 @@ static bool handle_in_select(GumboParser* parser, GumboToken* token) {
|
|
3242
3517
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_SELECT)) {
|
3243
3518
|
parser_add_parse_error(parser, token);
|
3244
3519
|
ignore_token(parser);
|
3245
|
-
|
3520
|
+
if (has_an_element_in_select_scope(parser, GUMBO_TAG_SELECT)) {
|
3521
|
+
close_current_select(parser);
|
3522
|
+
}
|
3246
3523
|
return false;
|
3247
|
-
} else if (tag_in(token, kStartTag,
|
3524
|
+
} else if (tag_in(token, kStartTag,
|
3525
|
+
(gumbo_tagset){TAG(INPUT), TAG(KEYGEN), TAG(TEXTAREA)})) {
|
3248
3526
|
parser_add_parse_error(parser, token);
|
3249
3527
|
if (!has_an_element_in_select_scope(parser, GUMBO_TAG_SELECT)) {
|
3250
3528
|
ignore_token(parser);
|
@@ -3253,14 +3531,12 @@ static bool handle_in_select(GumboParser* parser, GumboToken* token) {
|
|
3253
3531
|
parser->_parser_state->_reprocess_current_token = true;
|
3254
3532
|
}
|
3255
3533
|
return false;
|
3256
|
-
} else if (
|
3534
|
+
} else if (tag_in(token, kStartTag,
|
3535
|
+
(gumbo_tagset){TAG(SCRIPT), TAG(TEMPLATE)}) ||
|
3536
|
+
tag_is(token, kEndTag, GUMBO_TAG_TEMPLATE)) {
|
3257
3537
|
return handle_in_head(parser, token);
|
3258
3538
|
} else if (token->type == GUMBO_TOKEN_EOF) {
|
3259
|
-
|
3260
|
-
parser_add_parse_error(parser, token);
|
3261
|
-
return false;
|
3262
|
-
}
|
3263
|
-
return true;
|
3539
|
+
return handle_in_body(parser, token);
|
3264
3540
|
} else {
|
3265
3541
|
parser_add_parse_error(parser, token);
|
3266
3542
|
ignore_token(parser);
|
@@ -3270,23 +3546,28 @@ static bool handle_in_select(GumboParser* parser, GumboToken* token) {
|
|
3270
3546
|
|
3271
3547
|
// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#parsing-main-inselectintable
|
3272
3548
|
static bool handle_in_select_in_table(GumboParser* parser, GumboToken* token) {
|
3273
|
-
if (tag_in(token, kStartTag,
|
3274
|
-
|
3549
|
+
if (tag_in(token, kStartTag,
|
3550
|
+
(gumbo_tagset){TAG(CAPTION), TAG(TABLE), TAG(TBODY), TAG(TFOOT),
|
3551
|
+
TAG(THEAD), TAG(TR), TAG(TD), TAG(TH)})) {
|
3275
3552
|
parser_add_parse_error(parser, token);
|
3276
3553
|
close_current_select(parser);
|
3277
3554
|
parser->_parser_state->_reprocess_current_token = true;
|
3278
3555
|
return false;
|
3279
|
-
} else if (tag_in(token, kEndTag,
|
3280
|
-
|
3556
|
+
} else if (tag_in(token, kEndTag,
|
3557
|
+
(gumbo_tagset){TAG(CAPTION), TAG(TABLE), TAG(TBODY),
|
3558
|
+
TAG(TFOOT), TAG(THEAD), TAG(TR), TAG(TD), TAG(TH)})) {
|
3281
3559
|
parser_add_parse_error(parser, token);
|
3282
|
-
if (has_an_element_in_table_scope(parser, token->v.end_tag)) {
|
3560
|
+
if (!has_an_element_in_table_scope(parser, token->v.end_tag)) {
|
3561
|
+
ignore_token(parser);
|
3562
|
+
return false;
|
3563
|
+
} else {
|
3283
3564
|
close_current_select(parser);
|
3284
|
-
|
3565
|
+
// close_current_select already does the
|
3566
|
+
// reset_insertion_mode_appropriately
|
3567
|
+
// reset_insertion_mode_appropriately(parser);
|
3285
3568
|
parser->_parser_state->_reprocess_current_token = true;
|
3286
|
-
|
3287
|
-
ignore_token(parser);
|
3569
|
+
return false;
|
3288
3570
|
}
|
3289
|
-
return false;
|
3290
3571
|
} else {
|
3291
3572
|
return handle_in_select(parser, token);
|
3292
3573
|
}
|
@@ -3294,8 +3575,71 @@ static bool handle_in_select_in_table(GumboParser* parser, GumboToken* token) {
|
|
3294
3575
|
|
3295
3576
|
// http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#parsing-main-intemplate
|
3296
3577
|
static bool handle_in_template(GumboParser* parser, GumboToken* token) {
|
3297
|
-
|
3298
|
-
|
3578
|
+
GumboParserState* state = parser->_parser_state;
|
3579
|
+
if (token->type == GUMBO_TOKEN_WHITESPACE ||
|
3580
|
+
token->type == GUMBO_TOKEN_CHARACTER ||
|
3581
|
+
token->type == GUMBO_TOKEN_COMMENT || token->type == GUMBO_TOKEN_NULL ||
|
3582
|
+
token->type == GUMBO_TOKEN_DOCTYPE) {
|
3583
|
+
return handle_in_body(parser, token);
|
3584
|
+
} else if (tag_in(token, kStartTag,
|
3585
|
+
(gumbo_tagset){TAG(BASE), TAG(BASEFONT), TAG(BGSOUND),
|
3586
|
+
TAG(LINK), TAG(META), TAG(NOFRAMES), TAG(SCRIPT),
|
3587
|
+
TAG(STYLE), TAG(TEMPLATE), TAG(TITLE)}) ||
|
3588
|
+
tag_is(token, kEndTag, GUMBO_TAG_TEMPLATE)) {
|
3589
|
+
return handle_in_head(parser, token);
|
3590
|
+
} else if (tag_in(
|
3591
|
+
token, kStartTag, (gumbo_tagset){TAG(CAPTION), TAG(COLGROUP),
|
3592
|
+
TAG(TBODY), TAG(TFOOT), TAG(THEAD)})) {
|
3593
|
+
pop_template_insertion_mode(parser);
|
3594
|
+
push_template_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
|
3595
|
+
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
|
3596
|
+
state->_reprocess_current_token = true;
|
3597
|
+
return true;
|
3598
|
+
} else if (tag_is(token, kStartTag, GUMBO_TAG_COL)) {
|
3599
|
+
pop_template_insertion_mode(parser);
|
3600
|
+
push_template_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_COLUMN_GROUP);
|
3601
|
+
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_COLUMN_GROUP);
|
3602
|
+
state->_reprocess_current_token = true;
|
3603
|
+
return true;
|
3604
|
+
} else if (tag_is(token, kStartTag, GUMBO_TAG_TR)) {
|
3605
|
+
pop_template_insertion_mode(parser);
|
3606
|
+
push_template_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE_BODY);
|
3607
|
+
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE_BODY);
|
3608
|
+
state->_reprocess_current_token = true;
|
3609
|
+
return true;
|
3610
|
+
} else if (tag_in(token, kStartTag, (gumbo_tagset){TAG(TD), TAG(TH)})) {
|
3611
|
+
pop_template_insertion_mode(parser);
|
3612
|
+
push_template_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_ROW);
|
3613
|
+
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_ROW);
|
3614
|
+
state->_reprocess_current_token = true;
|
3615
|
+
return true;
|
3616
|
+
} else if (token->type == GUMBO_TOKEN_START_TAG) {
|
3617
|
+
pop_template_insertion_mode(parser);
|
3618
|
+
push_template_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_BODY);
|
3619
|
+
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_BODY);
|
3620
|
+
state->_reprocess_current_token = true;
|
3621
|
+
return true;
|
3622
|
+
} else if (token->type == GUMBO_TOKEN_END_TAG) {
|
3623
|
+
parser_add_parse_error(parser, token);
|
3624
|
+
ignore_token(parser);
|
3625
|
+
return false;
|
3626
|
+
} else if (token->type == GUMBO_TOKEN_EOF) {
|
3627
|
+
if (!has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
|
3628
|
+
// Stop parsing.
|
3629
|
+
return true;
|
3630
|
+
}
|
3631
|
+
parser_add_parse_error(parser, token);
|
3632
|
+
while (!node_html_tag_is(pop_current_node(parser), GUMBO_TAG_TEMPLATE))
|
3633
|
+
;
|
3634
|
+
clear_active_formatting_elements(parser);
|
3635
|
+
pop_template_insertion_mode(parser);
|
3636
|
+
reset_insertion_mode_appropriately(parser);
|
3637
|
+
state->_reprocess_current_token = true;
|
3638
|
+
return false;
|
3639
|
+
} else {
|
3640
|
+
assert(0);
|
3641
|
+
return false;
|
3642
|
+
}
|
3299
3643
|
}
|
3300
3644
|
|
3301
3645
|
// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#parsing-main-afterbody
|
@@ -3313,7 +3657,12 @@ static bool handle_after_body(GumboParser* parser, GumboToken* token) {
|
|
3313
3657
|
ignore_token(parser);
|
3314
3658
|
return false;
|
3315
3659
|
} else if (tag_is(token, kEndTag, GUMBO_TAG_HTML)) {
|
3316
|
-
|
3660
|
+
/* fragment case: ignore the closing HTML token */
|
3661
|
+
if (is_fragment_parser(parser)) {
|
3662
|
+
parser_add_parse_error(parser, token);
|
3663
|
+
ignore_token(parser);
|
3664
|
+
return false;
|
3665
|
+
}
|
3317
3666
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_AFTER_AFTER_BODY);
|
3318
3667
|
GumboNode* html = parser->_parser_state->_open_elements.data[0];
|
3319
3668
|
assert(node_html_tag_is(html, GUMBO_TAG_HTML));
|
@@ -3354,9 +3703,8 @@ static bool handle_in_frameset(GumboParser* parser, GumboToken* token) {
|
|
3354
3703
|
return false;
|
3355
3704
|
}
|
3356
3705
|
pop_current_node(parser);
|
3357
|
-
|
3358
|
-
|
3359
|
-
if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_FRAMESET)) {
|
3706
|
+
if (!is_fragment_parser(parser) &&
|
3707
|
+
!node_html_tag_is(get_current_node(parser), GUMBO_TAG_FRAMESET)) {
|
3360
3708
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_AFTER_FRAMESET);
|
3361
3709
|
}
|
3362
3710
|
return true;
|
@@ -3455,31 +3803,14 @@ static bool handle_after_after_frameset(
|
|
3455
3803
|
// Function pointers for each insertion mode. Keep in sync with
|
3456
3804
|
// insertion_mode.h.
|
3457
3805
|
typedef bool (*TokenHandler)(GumboParser* parser, GumboToken* token);
|
3458
|
-
static const TokenHandler kTokenHandlers[] = {
|
3459
|
-
|
3460
|
-
|
3461
|
-
|
3462
|
-
|
3463
|
-
|
3464
|
-
|
3465
|
-
|
3466
|
-
handle_text,
|
3467
|
-
handle_in_table,
|
3468
|
-
handle_in_table_text,
|
3469
|
-
handle_in_caption,
|
3470
|
-
handle_in_column_group,
|
3471
|
-
handle_in_table_body,
|
3472
|
-
handle_in_row,
|
3473
|
-
handle_in_cell,
|
3474
|
-
handle_in_select,
|
3475
|
-
handle_in_select_in_table,
|
3476
|
-
handle_in_template,
|
3477
|
-
handle_after_body,
|
3478
|
-
handle_in_frameset,
|
3479
|
-
handle_after_frameset,
|
3480
|
-
handle_after_after_body,
|
3481
|
-
handle_after_after_frameset
|
3482
|
-
};
|
3806
|
+
static const TokenHandler kTokenHandlers[] = {handle_initial,
|
3807
|
+
handle_before_html, handle_before_head, handle_in_head,
|
3808
|
+
handle_in_head_noscript, handle_after_head, handle_in_body, handle_text,
|
3809
|
+
handle_in_table, handle_in_table_text, handle_in_caption,
|
3810
|
+
handle_in_column_group, handle_in_table_body, handle_in_row, handle_in_cell,
|
3811
|
+
handle_in_select, handle_in_select_in_table, handle_in_template,
|
3812
|
+
handle_after_body, handle_in_frameset, handle_after_frameset,
|
3813
|
+
handle_after_after_body, handle_after_after_frameset};
|
3483
3814
|
|
3484
3815
|
static bool handle_html_content(GumboParser* parser, GumboToken* token) {
|
3485
3816
|
return kTokenHandlers[(unsigned int) parser->_parser_state->_insertion_mode](
|
@@ -3488,6 +3819,7 @@ static bool handle_html_content(GumboParser* parser, GumboToken* token) {
|
|
3488
3819
|
|
3489
3820
|
// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#parsing-main-inforeign
|
3490
3821
|
static bool handle_in_foreign_content(GumboParser* parser, GumboToken* token) {
|
3822
|
+
gumbo_debug("Handling foreign content");
|
3491
3823
|
switch (token->type) {
|
3492
3824
|
case GUMBO_TOKEN_NULL:
|
3493
3825
|
parser_add_parse_error(parser, token);
|
@@ -3514,34 +3846,44 @@ static bool handle_in_foreign_content(GumboParser* parser, GumboToken* token) {
|
|
3514
3846
|
break;
|
3515
3847
|
}
|
3516
3848
|
// Order matters for these clauses.
|
3517
|
-
if (tag_in(token, kStartTag,
|
3518
|
-
TAG(BLOCKQUOTE), TAG(BODY), TAG(BR),
|
3519
|
-
|
3520
|
-
|
3521
|
-
|
3522
|
-
|
3523
|
-
|
3524
|
-
|
3525
|
-
|
3526
|
-
|
3527
|
-
|
3528
|
-
|
3529
|
-
|
3530
|
-
|
3531
|
-
token_has_attribute(token, "face") ||
|
3532
|
-
token_has_attribute(token, "size")))) {
|
3849
|
+
if (tag_in(token, kStartTag,
|
3850
|
+
(gumbo_tagset){TAG(B), TAG(BIG), TAG(BLOCKQUOTE), TAG(BODY), TAG(BR),
|
3851
|
+
TAG(CENTER), TAG(CODE), TAG(DD), TAG(DIV), TAG(DL), TAG(DT),
|
3852
|
+
TAG(EM), TAG(EMBED), TAG(H1), TAG(H2), TAG(H3), TAG(H4), TAG(H5),
|
3853
|
+
TAG(H6), TAG(HEAD), TAG(HR), TAG(I), TAG(IMG), TAG(LI),
|
3854
|
+
TAG(LISTING), TAG(MENU), TAG(META), TAG(NOBR), TAG(OL), TAG(P),
|
3855
|
+
TAG(PRE), TAG(RUBY), TAG(S), TAG(SMALL), TAG(SPAN), TAG(STRONG),
|
3856
|
+
TAG(STRIKE), TAG(SUB), TAG(SUP), TAG(TABLE), TAG(TT), TAG(U),
|
3857
|
+
TAG(UL), TAG(VAR)}) ||
|
3858
|
+
(tag_is(token, kStartTag, GUMBO_TAG_FONT) &&
|
3859
|
+
(token_has_attribute(token, "color") ||
|
3860
|
+
token_has_attribute(token, "face") ||
|
3861
|
+
token_has_attribute(token, "size")))) {
|
3862
|
+
/* Parse error */
|
3533
3863
|
parser_add_parse_error(parser, token);
|
3534
|
-
|
3535
|
-
|
3536
|
-
|
3537
|
-
|
3538
|
-
|
3539
|
-
|
3540
|
-
parser
|
3541
|
-
|
3542
|
-
|
3864
|
+
|
3865
|
+
/*
|
3866
|
+
* Fragment case: If the parser was originally created for the HTML
|
3867
|
+
* fragment parsing algorithm, then act as described in the "any other
|
3868
|
+
* start tag" entry below.
|
3869
|
+
*/
|
3870
|
+
if (!is_fragment_parser(parser)) {
|
3871
|
+
do {
|
3872
|
+
pop_current_node(parser);
|
3873
|
+
} while (!(is_mathml_integration_point(get_current_node(parser)) ||
|
3874
|
+
is_html_integration_point(get_current_node(parser)) ||
|
3875
|
+
get_current_node(parser)->v.element.tag_namespace ==
|
3876
|
+
GUMBO_NAMESPACE_HTML));
|
3877
|
+
parser->_parser_state->_reprocess_current_token = true;
|
3878
|
+
return false;
|
3879
|
+
}
|
3880
|
+
|
3881
|
+
assert(token->type == GUMBO_TOKEN_START_TAG);
|
3882
|
+
}
|
3883
|
+
|
3884
|
+
if (token->type == GUMBO_TOKEN_START_TAG) {
|
3543
3885
|
const GumboNamespaceEnum current_namespace =
|
3544
|
-
|
3886
|
+
get_adjusted_current_node(parser)->v.element.tag_namespace;
|
3545
3887
|
if (current_namespace == GUMBO_NAMESPACE_MATHML) {
|
3546
3888
|
adjust_mathml_attributes(parser, token);
|
3547
3889
|
}
|
@@ -3557,8 +3899,8 @@ static bool handle_in_foreign_content(GumboParser* parser, GumboToken* token) {
|
|
3557
3899
|
acknowledge_self_closing_tag(parser);
|
3558
3900
|
}
|
3559
3901
|
return true;
|
3560
|
-
|
3561
|
-
|
3902
|
+
// </script> tags are handled like any other end tag, putting the script's
|
3903
|
+
// text into a text node child and closing the current node.
|
3562
3904
|
} else {
|
3563
3905
|
assert(token->type == GUMBO_TOKEN_END_TAG);
|
3564
3906
|
GumboNode* node = get_current_node(parser);
|
@@ -3574,13 +3916,13 @@ static bool handle_in_foreign_content(GumboParser* parser, GumboToken* token) {
|
|
3574
3916
|
is_success = false;
|
3575
3917
|
}
|
3576
3918
|
int i = parser->_parser_state->_open_elements.length;
|
3577
|
-
for(
|
3919
|
+
for (--i; i > 0;) {
|
3578
3920
|
// Here we move up the stack until we find an HTML element (in which
|
3579
3921
|
// case we do nothing) or we find the element that we're about to
|
3580
3922
|
// close (in which case we pop everything we've seen until that
|
3581
3923
|
// point.)
|
3582
3924
|
gumbo_debug("Foreign %.*s node at %d.\n", node_tagname.length,
|
3583
|
-
|
3925
|
+
node_tagname.data, i);
|
3584
3926
|
if (gumbo_string_equals_ignore_case(&node_tagname, &token_tagname)) {
|
3585
3927
|
gumbo_debug("Matches.\n");
|
3586
3928
|
while (pop_current_node(parser) != node) {
|
@@ -3608,7 +3950,6 @@ static bool handle_in_foreign_content(GumboParser* parser, GumboToken* token) {
|
|
3608
3950
|
}
|
3609
3951
|
}
|
3610
3952
|
|
3611
|
-
|
3612
3953
|
// http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#tree-construction
|
3613
3954
|
static bool handle_token(GumboParser* parser, GumboToken* token) {
|
3614
3955
|
if (parser->_parser_state->_ignore_next_linefeed &&
|
@@ -3630,28 +3971,31 @@ static bool handle_token(GumboParser* parser, GumboToken* token) {
|
|
3630
3971
|
parser->_parser_state->_closed_html_tag = true;
|
3631
3972
|
}
|
3632
3973
|
|
3633
|
-
const GumboNode* current_node =
|
3634
|
-
assert(!current_node || current_node->type == GUMBO_NODE_ELEMENT
|
3974
|
+
const GumboNode* current_node = get_adjusted_current_node(parser);
|
3975
|
+
assert(!current_node || current_node->type == GUMBO_NODE_ELEMENT ||
|
3976
|
+
current_node->type == GUMBO_NODE_TEMPLATE);
|
3635
3977
|
if (current_node) {
|
3636
3978
|
gumbo_debug("Current node: <%s>.\n",
|
3637
|
-
|
3979
|
+
gumbo_normalized_tagname(current_node->v.element.tag));
|
3638
3980
|
}
|
3639
3981
|
if (!current_node ||
|
3640
3982
|
current_node->v.element.tag_namespace == GUMBO_NAMESPACE_HTML ||
|
3641
3983
|
(is_mathml_integration_point(current_node) &&
|
3642
|
-
|
3643
|
-
|
3644
|
-
|
3645
|
-
|
3646
|
-
|
3984
|
+
(token->type == GUMBO_TOKEN_CHARACTER ||
|
3985
|
+
token->type == GUMBO_TOKEN_WHITESPACE ||
|
3986
|
+
token->type == GUMBO_TOKEN_NULL ||
|
3987
|
+
(token->type == GUMBO_TOKEN_START_TAG &&
|
3988
|
+
!tag_in(token, kStartTag,
|
3989
|
+
(gumbo_tagset){TAG(MGLYPH), TAG(MALIGNMARK)})))) ||
|
3647
3990
|
(current_node->v.element.tag_namespace == GUMBO_NAMESPACE_MATHML &&
|
3648
|
-
|
3649
|
-
|
3650
|
-
|
3651
|
-
|
3652
|
-
token->type ==
|
3653
|
-
|
3654
|
-
|
3991
|
+
node_qualified_tag_is(
|
3992
|
+
current_node, GUMBO_NAMESPACE_MATHML, GUMBO_TAG_ANNOTATION_XML) &&
|
3993
|
+
tag_is(token, kStartTag, GUMBO_TAG_SVG)) ||
|
3994
|
+
(is_html_integration_point(current_node) &&
|
3995
|
+
(token->type == GUMBO_TOKEN_START_TAG ||
|
3996
|
+
token->type == GUMBO_TOKEN_CHARACTER ||
|
3997
|
+
token->type == GUMBO_TOKEN_NULL ||
|
3998
|
+
token->type == GUMBO_TOKEN_WHITESPACE)) ||
|
3655
3999
|
token->type == GUMBO_TOKEN_EOF) {
|
3656
4000
|
return handle_html_content(parser, token);
|
3657
4001
|
} else {
|
@@ -3659,6 +4003,66 @@ static bool handle_token(GumboParser* parser, GumboToken* token) {
|
|
3659
4003
|
}
|
3660
4004
|
}
|
3661
4005
|
|
4006
|
+
static void fragment_parser_init(GumboParser* parser, GumboTag fragment_ctx,
|
4007
|
+
GumboNamespaceEnum fragment_namespace) {
|
4008
|
+
GumboNode* root;
|
4009
|
+
assert(fragment_ctx != GUMBO_TAG_LAST);
|
4010
|
+
|
4011
|
+
// 3
|
4012
|
+
parser->_parser_state->_fragment_ctx = create_element(parser, fragment_ctx);
|
4013
|
+
parser->_parser_state->_fragment_ctx->v.element.tag_namespace =
|
4014
|
+
fragment_namespace;
|
4015
|
+
|
4016
|
+
// 4
|
4017
|
+
if (fragment_namespace == GUMBO_NAMESPACE_HTML) {
|
4018
|
+
// Non-HTML namespaces always start in the DATA state.
|
4019
|
+
switch (fragment_ctx) {
|
4020
|
+
case GUMBO_TAG_TITLE:
|
4021
|
+
case GUMBO_TAG_TEXTAREA:
|
4022
|
+
gumbo_tokenizer_set_state(parser, GUMBO_LEX_RCDATA);
|
4023
|
+
break;
|
4024
|
+
|
4025
|
+
case GUMBO_TAG_STYLE:
|
4026
|
+
case GUMBO_TAG_XMP:
|
4027
|
+
case GUMBO_TAG_IFRAME:
|
4028
|
+
case GUMBO_TAG_NOEMBED:
|
4029
|
+
case GUMBO_TAG_NOFRAMES:
|
4030
|
+
gumbo_tokenizer_set_state(parser, GUMBO_LEX_RAWTEXT);
|
4031
|
+
break;
|
4032
|
+
|
4033
|
+
case GUMBO_TAG_SCRIPT:
|
4034
|
+
gumbo_tokenizer_set_state(parser, GUMBO_LEX_SCRIPT);
|
4035
|
+
break;
|
4036
|
+
|
4037
|
+
case GUMBO_TAG_NOSCRIPT:
|
4038
|
+
/* scripting is disabled in Gumbo, so leave the tokenizer
|
4039
|
+
* in the default data state */
|
4040
|
+
break;
|
4041
|
+
|
4042
|
+
case GUMBO_TAG_PLAINTEXT:
|
4043
|
+
gumbo_tokenizer_set_state(parser, GUMBO_LEX_PLAINTEXT);
|
4044
|
+
break;
|
4045
|
+
|
4046
|
+
default:
|
4047
|
+
/* default data state */
|
4048
|
+
break;
|
4049
|
+
}
|
4050
|
+
}
|
4051
|
+
|
4052
|
+
// 5. 6. 7.
|
4053
|
+
root = insert_element_of_tag_type(
|
4054
|
+
parser, GUMBO_TAG_HTML, GUMBO_INSERTION_IMPLIED);
|
4055
|
+
parser->_output->root = root;
|
4056
|
+
|
4057
|
+
// 8.
|
4058
|
+
if (fragment_ctx == GUMBO_TAG_TEMPLATE) {
|
4059
|
+
push_template_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TEMPLATE);
|
4060
|
+
}
|
4061
|
+
|
4062
|
+
// 10.
|
4063
|
+
reset_insertion_mode_appropriately(parser);
|
4064
|
+
}
|
4065
|
+
|
3662
4066
|
GumboOutput* gumbo_parse(const char* buffer) {
|
3663
4067
|
return gumbo_parse_with_options(
|
3664
4068
|
&kGumboDefaultOptions, buffer, strlen(buffer));
|
@@ -3672,6 +4076,11 @@ GumboOutput* gumbo_parse_with_options(
|
|
3672
4076
|
gumbo_tokenizer_state_init(&parser, buffer, length);
|
3673
4077
|
parser_state_init(&parser);
|
3674
4078
|
|
4079
|
+
if (options->fragment_context != GUMBO_TAG_LAST) {
|
4080
|
+
fragment_parser_init(
|
4081
|
+
&parser, options->fragment_context, options->fragment_namespace);
|
4082
|
+
}
|
4083
|
+
|
3675
4084
|
GumboParserState* state = parser._parser_state;
|
3676
4085
|
gumbo_debug("Parsing %.*s.\n", length, buffer);
|
3677
4086
|
|
@@ -3687,9 +4096,9 @@ GumboOutput* gumbo_parse_with_options(
|
|
3687
4096
|
state->_reprocess_current_token = false;
|
3688
4097
|
} else {
|
3689
4098
|
GumboNode* current_node = get_current_node(&parser);
|
3690
|
-
gumbo_tokenizer_set_is_current_node_foreign(
|
3691
|
-
|
3692
|
-
|
4099
|
+
gumbo_tokenizer_set_is_current_node_foreign(&parser,
|
4100
|
+
current_node &&
|
4101
|
+
current_node->v.element.tag_namespace != GUMBO_NAMESPACE_HTML);
|
3693
4102
|
has_error = !gumbo_lex(&parser, &token) || has_error;
|
3694
4103
|
}
|
3695
4104
|
const char* token_type = "text";
|
@@ -3709,14 +4118,13 @@ GumboOutput* gumbo_parse_with_options(
|
|
3709
4118
|
default:
|
3710
4119
|
break;
|
3711
4120
|
}
|
3712
|
-
gumbo_debug("Handling %s token @%d:%d in state %d.\n",
|
3713
|
-
|
3714
|
-
state->_insertion_mode);
|
4121
|
+
gumbo_debug("Handling %s token @%d:%d in state %d.\n", (char*) token_type,
|
4122
|
+
token.position.line, token.position.column, state->_insertion_mode);
|
3715
4123
|
|
3716
4124
|
state->_current_token = &token;
|
3717
4125
|
state->_self_closing_flag_acknowledged =
|
3718
4126
|
!(token.type == GUMBO_TOKEN_START_TAG &&
|
3719
|
-
|
4127
|
+
token.v.start_tag.is_self_closing);
|
3720
4128
|
|
3721
4129
|
has_error = !handle_token(&parser, &token) || has_error;
|
3722
4130
|
|
@@ -3772,7 +4180,7 @@ void gumbo_destroy_output(const GumboOptions* options, GumboOutput* output) {
|
|
3772
4180
|
GumboParser parser;
|
3773
4181
|
parser._options = options;
|
3774
4182
|
destroy_node(&parser, output->document);
|
3775
|
-
for (int i = 0; i < output->errors.length; ++i) {
|
4183
|
+
for (unsigned int i = 0; i < output->errors.length; ++i) {
|
3776
4184
|
gumbo_error_destroy(&parser, output->errors.data[i]);
|
3777
4185
|
}
|
3778
4186
|
gumbo_vector_destroy(&parser, &output->errors);
|