nokogumbo 1.4.2 → 1.4.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/gumbo-parser/src/attribute.c +1 -1
- data/gumbo-parser/src/char_ref.c +37 -67
- data/gumbo-parser/src/char_ref.h +3 -4
- data/gumbo-parser/src/char_ref.rl +6 -1
- data/gumbo-parser/src/error.c +50 -51
- data/gumbo-parser/src/error.h +7 -9
- data/gumbo-parser/src/gumbo.h +45 -181
- data/gumbo-parser/src/parser.c +1397 -989
- data/gumbo-parser/src/string_buffer.c +14 -10
- data/gumbo-parser/src/string_buffer.h +9 -6
- data/gumbo-parser/src/string_piece.c +5 -6
- data/gumbo-parser/src/string_piece.h +2 -3
- data/gumbo-parser/src/tag.c +36 -166
- data/gumbo-parser/src/tag.in +150 -0
- data/gumbo-parser/src/tag_enum.h +153 -0
- data/gumbo-parser/src/tag_gperf.h +105 -0
- data/gumbo-parser/src/tag_sizes.h +4 -0
- data/gumbo-parser/src/tag_strings.h +153 -0
- data/gumbo-parser/src/tokenizer.c +264 -360
- data/gumbo-parser/src/tokenizer.h +2 -2
- data/gumbo-parser/src/utf8.c +44 -44
- data/gumbo-parser/src/utf8.h +1 -2
- data/gumbo-parser/src/util.c +1 -1
- data/gumbo-parser/src/util.h +0 -2
- data/gumbo-parser/src/vector.c +17 -17
- data/gumbo-parser/src/vector.h +6 -8
- metadata +8 -3
data/gumbo-parser/src/error.h
CHANGED
@@ -201,24 +201,22 @@ void gumbo_error_destroy(struct GumboInternalParser* parser, GumboError* error);
|
|
201
201
|
// responsible for deleting the buffer. (Note that the buffer is allocated with
|
202
202
|
// the allocator specified in the GumboParser config and hence should be freed
|
203
203
|
// by gumbo_parser_deallocate().)
|
204
|
-
void gumbo_error_to_string(
|
205
|
-
|
206
|
-
GumboStringBuffer* output);
|
204
|
+
void gumbo_error_to_string(struct GumboInternalParser* parser,
|
205
|
+
const GumboError* error, GumboStringBuffer* output);
|
207
206
|
|
208
207
|
// Prints a caret diagnostic to a string. This fills an empty GumboStringBuffer
|
209
208
|
// with a freshly-allocated buffer containing the error message text. The
|
210
209
|
// caller is responsible for deleting the buffer. (Note that the buffer is
|
211
210
|
// allocated with the allocator specified in the GumboParser config and hence
|
212
211
|
// should be freed by gumbo_parser_deallocate().)
|
213
|
-
void gumbo_caret_diagnostic_to_string(
|
214
|
-
|
215
|
-
|
212
|
+
void gumbo_caret_diagnostic_to_string(struct GumboInternalParser* parser,
|
213
|
+
const GumboError* error, const char* source_text,
|
214
|
+
GumboStringBuffer* output);
|
216
215
|
|
217
216
|
// Like gumbo_caret_diagnostic_to_string, but prints the text to stdout instead
|
218
217
|
// of writing to a string.
|
219
|
-
void gumbo_print_caret_diagnostic(
|
220
|
-
|
221
|
-
const char* source_text);
|
218
|
+
void gumbo_print_caret_diagnostic(struct GumboInternalParser* parser,
|
219
|
+
const GumboError* error, const char* source_text);
|
222
220
|
|
223
221
|
#ifdef __cplusplus
|
224
222
|
}
|
data/gumbo-parser/src/gumbo.h
CHANGED
@@ -76,7 +76,6 @@ typedef struct {
|
|
76
76
|
*/
|
77
77
|
extern const GumboSourcePosition kGumboEmptySourcePosition;
|
78
78
|
|
79
|
-
|
80
79
|
/**
|
81
80
|
* A struct representing a string or part of a string. Strings within the
|
82
81
|
* parser are represented by a char* and a length; the char* points into
|
@@ -111,7 +110,6 @@ bool gumbo_string_equals(
|
|
111
110
|
bool gumbo_string_equals_ignore_case(
|
112
111
|
const GumboStringPiece* str1, const GumboStringPiece* str2);
|
113
112
|
|
114
|
-
|
115
113
|
/**
|
116
114
|
* A simple vector implementation. This stores a pointer to a data array and a
|
117
115
|
* length. All elements are stored as void*; client code must cast to the
|
@@ -141,8 +139,7 @@ extern const GumboVector kGumboEmptyVector;
|
|
141
139
|
* Returns the first index at which an element appears in this vector (testing
|
142
140
|
* by pointer equality), or -1 if it never does.
|
143
141
|
*/
|
144
|
-
int gumbo_vector_index_of(GumboVector* vector, void* element);
|
145
|
-
|
142
|
+
int gumbo_vector_index_of(GumboVector* vector, const void* element);
|
146
143
|
|
147
144
|
/**
|
148
145
|
* An enum for all the tags defined in the HTML5 standard. These correspond to
|
@@ -157,172 +154,10 @@ int gumbo_vector_index_of(GumboVector* vector, void* element);
|
|
157
154
|
* strings.
|
158
155
|
*/
|
159
156
|
typedef enum {
|
160
|
-
|
161
|
-
|
162
|
-
//
|
163
|
-
|
164
|
-
GUMBO_TAG_TITLE,
|
165
|
-
GUMBO_TAG_BASE,
|
166
|
-
GUMBO_TAG_LINK,
|
167
|
-
GUMBO_TAG_META,
|
168
|
-
GUMBO_TAG_STYLE,
|
169
|
-
// http://www.whatwg.org/specs/web-apps/current-work/multipage/scripting-1.html#scripting-1
|
170
|
-
GUMBO_TAG_SCRIPT,
|
171
|
-
GUMBO_TAG_NOSCRIPT,
|
172
|
-
GUMBO_TAG_TEMPLATE,
|
173
|
-
// http://www.whatwg.org/specs/web-apps/current-work/multipage/sections.html#sections
|
174
|
-
GUMBO_TAG_BODY,
|
175
|
-
GUMBO_TAG_ARTICLE,
|
176
|
-
GUMBO_TAG_SECTION,
|
177
|
-
GUMBO_TAG_NAV,
|
178
|
-
GUMBO_TAG_ASIDE,
|
179
|
-
GUMBO_TAG_H1,
|
180
|
-
GUMBO_TAG_H2,
|
181
|
-
GUMBO_TAG_H3,
|
182
|
-
GUMBO_TAG_H4,
|
183
|
-
GUMBO_TAG_H5,
|
184
|
-
GUMBO_TAG_H6,
|
185
|
-
GUMBO_TAG_HGROUP,
|
186
|
-
GUMBO_TAG_HEADER,
|
187
|
-
GUMBO_TAG_FOOTER,
|
188
|
-
GUMBO_TAG_ADDRESS,
|
189
|
-
// http://www.whatwg.org/specs/web-apps/current-work/multipage/grouping-content.html#grouping-content
|
190
|
-
GUMBO_TAG_P,
|
191
|
-
GUMBO_TAG_HR,
|
192
|
-
GUMBO_TAG_PRE,
|
193
|
-
GUMBO_TAG_BLOCKQUOTE,
|
194
|
-
GUMBO_TAG_OL,
|
195
|
-
GUMBO_TAG_UL,
|
196
|
-
GUMBO_TAG_LI,
|
197
|
-
GUMBO_TAG_DL,
|
198
|
-
GUMBO_TAG_DT,
|
199
|
-
GUMBO_TAG_DD,
|
200
|
-
GUMBO_TAG_FIGURE,
|
201
|
-
GUMBO_TAG_FIGCAPTION,
|
202
|
-
GUMBO_TAG_MAIN,
|
203
|
-
GUMBO_TAG_DIV,
|
204
|
-
// http://www.whatwg.org/specs/web-apps/current-work/multipage/text-level-semantics.html#text-level-semantics
|
205
|
-
GUMBO_TAG_A,
|
206
|
-
GUMBO_TAG_EM,
|
207
|
-
GUMBO_TAG_STRONG,
|
208
|
-
GUMBO_TAG_SMALL,
|
209
|
-
GUMBO_TAG_S,
|
210
|
-
GUMBO_TAG_CITE,
|
211
|
-
GUMBO_TAG_Q,
|
212
|
-
GUMBO_TAG_DFN,
|
213
|
-
GUMBO_TAG_ABBR,
|
214
|
-
GUMBO_TAG_DATA,
|
215
|
-
GUMBO_TAG_TIME,
|
216
|
-
GUMBO_TAG_CODE,
|
217
|
-
GUMBO_TAG_VAR,
|
218
|
-
GUMBO_TAG_SAMP,
|
219
|
-
GUMBO_TAG_KBD,
|
220
|
-
GUMBO_TAG_SUB,
|
221
|
-
GUMBO_TAG_SUP,
|
222
|
-
GUMBO_TAG_I,
|
223
|
-
GUMBO_TAG_B,
|
224
|
-
GUMBO_TAG_U,
|
225
|
-
GUMBO_TAG_MARK,
|
226
|
-
GUMBO_TAG_RUBY,
|
227
|
-
GUMBO_TAG_RT,
|
228
|
-
GUMBO_TAG_RP,
|
229
|
-
GUMBO_TAG_BDI,
|
230
|
-
GUMBO_TAG_BDO,
|
231
|
-
GUMBO_TAG_SPAN,
|
232
|
-
GUMBO_TAG_BR,
|
233
|
-
GUMBO_TAG_WBR,
|
234
|
-
// http://www.whatwg.org/specs/web-apps/current-work/multipage/edits.html#edits
|
235
|
-
GUMBO_TAG_INS,
|
236
|
-
GUMBO_TAG_DEL,
|
237
|
-
// http://www.whatwg.org/specs/web-apps/current-work/multipage/embedded-content-1.html#embedded-content-1
|
238
|
-
GUMBO_TAG_IMAGE,
|
239
|
-
GUMBO_TAG_IMG,
|
240
|
-
GUMBO_TAG_IFRAME,
|
241
|
-
GUMBO_TAG_EMBED,
|
242
|
-
GUMBO_TAG_OBJECT,
|
243
|
-
GUMBO_TAG_PARAM,
|
244
|
-
GUMBO_TAG_VIDEO,
|
245
|
-
GUMBO_TAG_AUDIO,
|
246
|
-
GUMBO_TAG_SOURCE,
|
247
|
-
GUMBO_TAG_TRACK,
|
248
|
-
GUMBO_TAG_CANVAS,
|
249
|
-
GUMBO_TAG_MAP,
|
250
|
-
GUMBO_TAG_AREA,
|
251
|
-
// http://www.whatwg.org/specs/web-apps/current-work/multipage/the-map-element.html#mathml
|
252
|
-
GUMBO_TAG_MATH,
|
253
|
-
GUMBO_TAG_MI,
|
254
|
-
GUMBO_TAG_MO,
|
255
|
-
GUMBO_TAG_MN,
|
256
|
-
GUMBO_TAG_MS,
|
257
|
-
GUMBO_TAG_MTEXT,
|
258
|
-
GUMBO_TAG_MGLYPH,
|
259
|
-
GUMBO_TAG_MALIGNMARK,
|
260
|
-
GUMBO_TAG_ANNOTATION_XML,
|
261
|
-
// http://www.whatwg.org/specs/web-apps/current-work/multipage/the-map-element.html#svg-0
|
262
|
-
GUMBO_TAG_SVG,
|
263
|
-
GUMBO_TAG_FOREIGNOBJECT,
|
264
|
-
GUMBO_TAG_DESC,
|
265
|
-
// SVG title tags will have GUMBO_TAG_TITLE as with HTML.
|
266
|
-
// http://www.whatwg.org/specs/web-apps/current-work/multipage/tabular-data.html#tabular-data
|
267
|
-
GUMBO_TAG_TABLE,
|
268
|
-
GUMBO_TAG_CAPTION,
|
269
|
-
GUMBO_TAG_COLGROUP,
|
270
|
-
GUMBO_TAG_COL,
|
271
|
-
GUMBO_TAG_TBODY,
|
272
|
-
GUMBO_TAG_THEAD,
|
273
|
-
GUMBO_TAG_TFOOT,
|
274
|
-
GUMBO_TAG_TR,
|
275
|
-
GUMBO_TAG_TD,
|
276
|
-
GUMBO_TAG_TH,
|
277
|
-
// http://www.whatwg.org/specs/web-apps/current-work/multipage/forms.html#forms
|
278
|
-
GUMBO_TAG_FORM,
|
279
|
-
GUMBO_TAG_FIELDSET,
|
280
|
-
GUMBO_TAG_LEGEND,
|
281
|
-
GUMBO_TAG_LABEL,
|
282
|
-
GUMBO_TAG_INPUT,
|
283
|
-
GUMBO_TAG_BUTTON,
|
284
|
-
GUMBO_TAG_SELECT,
|
285
|
-
GUMBO_TAG_DATALIST,
|
286
|
-
GUMBO_TAG_OPTGROUP,
|
287
|
-
GUMBO_TAG_OPTION,
|
288
|
-
GUMBO_TAG_TEXTAREA,
|
289
|
-
GUMBO_TAG_KEYGEN,
|
290
|
-
GUMBO_TAG_OUTPUT,
|
291
|
-
GUMBO_TAG_PROGRESS,
|
292
|
-
GUMBO_TAG_METER,
|
293
|
-
// http://www.whatwg.org/specs/web-apps/current-work/multipage/interactive-elements.html#interactive-elements
|
294
|
-
GUMBO_TAG_DETAILS,
|
295
|
-
GUMBO_TAG_SUMMARY,
|
296
|
-
GUMBO_TAG_MENU,
|
297
|
-
GUMBO_TAG_MENUITEM,
|
298
|
-
// Non-conforming elements that nonetheless appear in the HTML5 spec.
|
299
|
-
// http://www.whatwg.org/specs/web-apps/current-work/multipage/obsolete.html#non-conforming-features
|
300
|
-
GUMBO_TAG_APPLET,
|
301
|
-
GUMBO_TAG_ACRONYM,
|
302
|
-
GUMBO_TAG_BGSOUND,
|
303
|
-
GUMBO_TAG_DIR,
|
304
|
-
GUMBO_TAG_FRAME,
|
305
|
-
GUMBO_TAG_FRAMESET,
|
306
|
-
GUMBO_TAG_NOFRAMES,
|
307
|
-
GUMBO_TAG_ISINDEX,
|
308
|
-
GUMBO_TAG_LISTING,
|
309
|
-
GUMBO_TAG_XMP,
|
310
|
-
GUMBO_TAG_NEXTID,
|
311
|
-
GUMBO_TAG_NOEMBED,
|
312
|
-
GUMBO_TAG_PLAINTEXT,
|
313
|
-
GUMBO_TAG_RB,
|
314
|
-
GUMBO_TAG_STRIKE,
|
315
|
-
GUMBO_TAG_BASEFONT,
|
316
|
-
GUMBO_TAG_BIG,
|
317
|
-
GUMBO_TAG_BLINK,
|
318
|
-
GUMBO_TAG_CENTER,
|
319
|
-
GUMBO_TAG_FONT,
|
320
|
-
GUMBO_TAG_MARQUEE,
|
321
|
-
GUMBO_TAG_MULTICOL,
|
322
|
-
GUMBO_TAG_NOBR,
|
323
|
-
GUMBO_TAG_SPACER,
|
324
|
-
GUMBO_TAG_TT,
|
325
|
-
// Used for all tags that don't have special handling in HTML.
|
157
|
+
// Load all the tags from an external source, generated from tag.in.
|
158
|
+
#include "tag_enum.h"
|
159
|
+
// Used for all tags that don't have special handling in HTML. Add new tags
|
160
|
+
// to the end of tag.in so as to preserve backwards-compatibility.
|
326
161
|
GUMBO_TAG_UNKNOWN,
|
327
162
|
// A marker value to indicate the end of the enum, for iterating over it.
|
328
163
|
// Also used as the terminator for varargs functions that take tags.
|
@@ -364,9 +199,10 @@ const char* gumbo_normalize_svg_tagname(const GumboStringPiece* tagname);
|
|
364
199
|
|
365
200
|
/**
|
366
201
|
* Converts a tag name string (which may be in upper or mixed case) to a tag
|
367
|
-
* enum.
|
202
|
+
* enum. The `tag` version expects `tagname` to be NULL-terminated
|
368
203
|
*/
|
369
204
|
GumboTag gumbo_tag_enum(const char* tagname);
|
205
|
+
GumboTag gumbo_tagn_enum(const char* tagname, unsigned int length);
|
370
206
|
|
371
207
|
/**
|
372
208
|
* Attribute namespaces.
|
@@ -461,10 +297,16 @@ typedef enum {
|
|
461
297
|
GUMBO_NODE_TEXT,
|
462
298
|
/** CDATA node. v will be a GumboText. */
|
463
299
|
GUMBO_NODE_CDATA,
|
464
|
-
/** Comment node. v
|
300
|
+
/** Comment node. v will be a GumboText, excluding comment delimiters. */
|
465
301
|
GUMBO_NODE_COMMENT,
|
466
302
|
/** Text node, where all contents is whitespace. v will be a GumboText. */
|
467
|
-
GUMBO_NODE_WHITESPACE
|
303
|
+
GUMBO_NODE_WHITESPACE,
|
304
|
+
/** Template node. This is separate from GUMBO_NODE_ELEMENT because many
|
305
|
+
* client libraries will want to ignore the contents of template nodes, as
|
306
|
+
* the spec suggests. Recursing on GUMBO_NODE_ELEMENT will do the right thing
|
307
|
+
* here, while clients that want to include template contents should also
|
308
|
+
* check for GUMBO_NODE_TEMPLATE. v will be a GumboElement. */
|
309
|
+
GUMBO_NODE_TEMPLATE
|
468
310
|
} GumboNodeType;
|
469
311
|
|
470
312
|
/**
|
@@ -473,7 +315,9 @@ typedef enum {
|
|
473
315
|
*/
|
474
316
|
typedef struct GumboInternalNode GumboNode;
|
475
317
|
|
476
|
-
/**
|
318
|
+
/**
|
319
|
+
* http://www.whatwg.org/specs/web-apps/current-work/complete/dom.html#quirks-mode
|
320
|
+
*/
|
477
321
|
typedef enum {
|
478
322
|
GUMBO_DOCTYPE_NO_QUIRKS,
|
479
323
|
GUMBO_DOCTYPE_QUIRKS,
|
@@ -571,7 +415,6 @@ typedef enum {
|
|
571
415
|
GUMBO_INSERTION_FOSTER_PARENTED = 1 << 10,
|
572
416
|
} GumboParseFlags;
|
573
417
|
|
574
|
-
|
575
418
|
/**
|
576
419
|
* Information specific to document nodes.
|
577
420
|
*/
|
@@ -690,9 +533,9 @@ struct GumboInternalNode {
|
|
690
533
|
|
691
534
|
/** The actual node data. */
|
692
535
|
union {
|
693
|
-
GumboDocument document;
|
694
|
-
GumboElement element;
|
695
|
-
GumboText text;
|
536
|
+
GumboDocument document; // For GUMBO_NODE_DOCUMENT.
|
537
|
+
GumboElement element; // For GUMBO_NODE_ELEMENT.
|
538
|
+
GumboText text; // For everything else.
|
696
539
|
} v;
|
697
540
|
};
|
698
541
|
|
@@ -750,6 +593,29 @@ typedef struct GumboInternalOptions {
|
|
750
593
|
* Default: -1
|
751
594
|
*/
|
752
595
|
int max_errors;
|
596
|
+
|
597
|
+
/**
|
598
|
+
* The fragment context for parsing:
|
599
|
+
* https://html.spec.whatwg.org/multipage/syntax.html#parsing-html-fragments
|
600
|
+
*
|
601
|
+
* If GUMBO_TAG_LAST is passed here, it is assumed to be "no fragment", i.e.
|
602
|
+
* the regular parsing algorithm. Otherwise, pass the tag enum for the
|
603
|
+
* intended parent of the parsed fragment. We use just the tag enum rather
|
604
|
+
* than a full node because that's enough to set all the parsing context we
|
605
|
+
* need, and it provides some additional flexibility for client code to act as
|
606
|
+
* if parsing a fragment even when a full HTML tree isn't available.
|
607
|
+
*
|
608
|
+
* Default: GUMBO_TAG_LAST
|
609
|
+
*/
|
610
|
+
GumboTag fragment_context;
|
611
|
+
|
612
|
+
/**
|
613
|
+
* The namespace for the fragment context. This lets client code
|
614
|
+
* differentiate between, say, parsing a <title> tag in SVG vs. parsing it in
|
615
|
+
* HTML.
|
616
|
+
* Default: GUMBO_NAMESPACE_HTML
|
617
|
+
*/
|
618
|
+
GumboNamespaceEnum fragment_namespace;
|
753
619
|
} GumboOptions;
|
754
620
|
|
755
621
|
/** Default options struct; use this with gumbo_parse_with_options. */
|
@@ -796,9 +662,7 @@ GumboOutput* gumbo_parse_with_options(
|
|
796
662
|
const GumboOptions* options, const char* buffer, size_t buffer_length);
|
797
663
|
|
798
664
|
/** Release the memory used for the parse tree & parse errors. */
|
799
|
-
void gumbo_destroy_output(
|
800
|
-
const GumboOptions* options, GumboOutput* output);
|
801
|
-
|
665
|
+
void gumbo_destroy_output(const GumboOptions* options, GumboOutput* output);
|
802
666
|
|
803
667
|
#ifdef __cplusplus
|
804
668
|
}
|
data/gumbo-parser/src/parser.c
CHANGED
@@ -34,8 +34,10 @@
|
|
34
34
|
|
35
35
|
#define AVOID_UNUSED_VARIABLE_WARNING(i) (void)(i)
|
36
36
|
|
37
|
-
#define GUMBO_STRING(literal)
|
38
|
-
|
37
|
+
#define GUMBO_STRING(literal) \
|
38
|
+
{ literal, sizeof(literal) - 1 }
|
39
|
+
#define TERMINATOR \
|
40
|
+
{ "", 0 }
|
39
41
|
|
40
42
|
typedef char gumbo_tagset[GUMBO_TAG_LAST];
|
41
43
|
#define TAG(tag) [GUMBO_TAG_##tag] = (1 << GUMBO_NAMESPACE_HTML)
|
@@ -43,46 +45,42 @@ typedef char gumbo_tagset[GUMBO_TAG_LAST];
|
|
43
45
|
#define TAG_MATHML(tag) [GUMBO_TAG_##tag] = (1 << GUMBO_NAMESPACE_MATHML)
|
44
46
|
|
45
47
|
#define TAGSET_INCLUDES(tagset, namespace, tag) \
|
46
|
-
(tag < GUMBO_TAG_LAST &&
|
47
|
-
tagset[(int)tag] == (1 << (int)namespace))
|
48
|
+
(tag < GUMBO_TAG_LAST && tagset[(int) tag] == (1 << (int) namespace))
|
48
49
|
|
50
|
+
// selected forward declarations as it is getting hard to find
|
51
|
+
// an appropriate order
|
52
|
+
static bool node_html_tag_is(const GumboNode*, GumboTag);
|
53
|
+
static GumboInsertionMode get_current_template_insertion_mode(
|
54
|
+
const GumboParser*);
|
55
|
+
static bool handle_in_template(GumboParser*, GumboToken*);
|
56
|
+
static void destroy_node(GumboParser*, GumboNode*);
|
49
57
|
|
50
|
-
static void* malloc_wrapper(void* unused, size_t size) {
|
51
|
-
return malloc(size);
|
52
|
-
}
|
58
|
+
static void* malloc_wrapper(void* unused, size_t size) { return malloc(size); }
|
53
59
|
|
54
|
-
static void free_wrapper(void* unused, void* ptr) {
|
55
|
-
free(ptr);
|
56
|
-
}
|
60
|
+
static void free_wrapper(void* unused, void* ptr) { free(ptr); }
|
57
61
|
|
58
|
-
const GumboOptions kGumboDefaultOptions = {
|
59
|
-
|
60
|
-
&free_wrapper,
|
61
|
-
NULL,
|
62
|
-
8,
|
63
|
-
false,
|
64
|
-
-1,
|
65
|
-
};
|
62
|
+
const GumboOptions kGumboDefaultOptions = {&malloc_wrapper, &free_wrapper, NULL,
|
63
|
+
8, false, -1, GUMBO_TAG_LAST, GUMBO_NAMESPACE_HTML};
|
66
64
|
|
67
65
|
static const GumboStringPiece kDoctypeHtml = GUMBO_STRING("html");
|
68
|
-
static const GumboStringPiece kPublicIdHtml4_0 =
|
69
|
-
"-//W3C//DTD HTML 4.0//EN");
|
70
|
-
static const GumboStringPiece kPublicIdHtml4_01 =
|
71
|
-
"-//W3C//DTD HTML 4.01//EN");
|
72
|
-
static const GumboStringPiece kPublicIdXhtml1_0 =
|
73
|
-
"-//W3C//DTD XHTML 1.0 Strict//EN");
|
74
|
-
static const GumboStringPiece kPublicIdXhtml1_1 =
|
75
|
-
"-//W3C//DTD XHTML 1.1//EN");
|
76
|
-
static const GumboStringPiece kSystemIdRecHtml4_0 =
|
77
|
-
"http://www.w3.org/TR/REC-html40/strict.dtd");
|
78
|
-
static const GumboStringPiece kSystemIdHtml4 =
|
79
|
-
"http://www.w3.org/TR/html4/strict.dtd");
|
80
|
-
static const GumboStringPiece kSystemIdXhtmlStrict1_1 =
|
81
|
-
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd");
|
82
|
-
static const GumboStringPiece kSystemIdXhtml1_1 =
|
83
|
-
"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd");
|
84
|
-
static const GumboStringPiece kSystemIdLegacyCompat =
|
85
|
-
"about:legacy-compat");
|
66
|
+
static const GumboStringPiece kPublicIdHtml4_0 =
|
67
|
+
GUMBO_STRING("-//W3C//DTD HTML 4.0//EN");
|
68
|
+
static const GumboStringPiece kPublicIdHtml4_01 =
|
69
|
+
GUMBO_STRING("-//W3C//DTD HTML 4.01//EN");
|
70
|
+
static const GumboStringPiece kPublicIdXhtml1_0 =
|
71
|
+
GUMBO_STRING("-//W3C//DTD XHTML 1.0 Strict//EN");
|
72
|
+
static const GumboStringPiece kPublicIdXhtml1_1 =
|
73
|
+
GUMBO_STRING("-//W3C//DTD XHTML 1.1//EN");
|
74
|
+
static const GumboStringPiece kSystemIdRecHtml4_0 =
|
75
|
+
GUMBO_STRING("http://www.w3.org/TR/REC-html40/strict.dtd");
|
76
|
+
static const GumboStringPiece kSystemIdHtml4 =
|
77
|
+
GUMBO_STRING("http://www.w3.org/TR/html4/strict.dtd");
|
78
|
+
static const GumboStringPiece kSystemIdXhtmlStrict1_1 =
|
79
|
+
GUMBO_STRING("http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd");
|
80
|
+
static const GumboStringPiece kSystemIdXhtml1_1 =
|
81
|
+
GUMBO_STRING("http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd");
|
82
|
+
static const GumboStringPiece kSystemIdLegacyCompat =
|
83
|
+
GUMBO_STRING("about:legacy-compat");
|
86
84
|
|
87
85
|
// The doctype arrays have an explicit terminator because we want to pass them
|
88
86
|
// to a helper function, and passing them as a pointer discards sizeof
|
@@ -90,96 +88,86 @@ static const GumboStringPiece kSystemIdLegacyCompat = GUMBO_STRING(
|
|
90
88
|
// over them use sizeof directly instead of a terminator.
|
91
89
|
|
92
90
|
static const GumboStringPiece kQuirksModePublicIdPrefixes[] = {
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
};
|
91
|
+
GUMBO_STRING("+//Silmaril//dtd html Pro v0r11 19970101//"),
|
92
|
+
GUMBO_STRING("-//AdvaSoft Ltd//DTD HTML 3.0 asWedit + extensions//"),
|
93
|
+
GUMBO_STRING("-//AS//DTD HTML 3.0 asWedit + extensions//"),
|
94
|
+
GUMBO_STRING("-//IETF//DTD HTML 2.0 Level 1//"),
|
95
|
+
GUMBO_STRING("-//IETF//DTD HTML 2.0 Level 2//"),
|
96
|
+
GUMBO_STRING("-//IETF//DTD HTML 2.0 Strict Level 1//"),
|
97
|
+
GUMBO_STRING("-//IETF//DTD HTML 2.0 Strict Level 2//"),
|
98
|
+
GUMBO_STRING("-//IETF//DTD HTML 2.0 Strict//"),
|
99
|
+
GUMBO_STRING("-//IETF//DTD HTML 2.0//"),
|
100
|
+
GUMBO_STRING("-//IETF//DTD HTML 2.1E//"),
|
101
|
+
GUMBO_STRING("-//IETF//DTD HTML 3.0//"),
|
102
|
+
GUMBO_STRING("-//IETF//DTD HTML 3.2 Final//"),
|
103
|
+
GUMBO_STRING("-//IETF//DTD HTML 3.2//"),
|
104
|
+
GUMBO_STRING("-//IETF//DTD HTML 3//"),
|
105
|
+
GUMBO_STRING("-//IETF//DTD HTML Level 0//"),
|
106
|
+
GUMBO_STRING("-//IETF//DTD HTML Level 1//"),
|
107
|
+
GUMBO_STRING("-//IETF//DTD HTML Level 2//"),
|
108
|
+
GUMBO_STRING("-//IETF//DTD HTML Level 3//"),
|
109
|
+
GUMBO_STRING("-//IETF//DTD HTML Strict Level 0//"),
|
110
|
+
GUMBO_STRING("-//IETF//DTD HTML Strict Level 1//"),
|
111
|
+
GUMBO_STRING("-//IETF//DTD HTML Strict Level 2//"),
|
112
|
+
GUMBO_STRING("-//IETF//DTD HTML Strict Level 3//"),
|
113
|
+
GUMBO_STRING("-//IETF//DTD HTML Strict//"),
|
114
|
+
GUMBO_STRING("-//IETF//DTD HTML//"),
|
115
|
+
GUMBO_STRING("-//Metrius//DTD Metrius Presentational//"),
|
116
|
+
GUMBO_STRING("-//Microsoft//DTD Internet Explorer 2.0 HTML Strict//"),
|
117
|
+
GUMBO_STRING("-//Microsoft//DTD Internet Explorer 2.0 HTML//"),
|
118
|
+
GUMBO_STRING("-//Microsoft//DTD Internet Explorer 2.0 Tables//"),
|
119
|
+
GUMBO_STRING("-//Microsoft//DTD Internet Explorer 3.0 HTML Strict//"),
|
120
|
+
GUMBO_STRING("-//Microsoft//DTD Internet Explorer 3.0 HTML//"),
|
121
|
+
GUMBO_STRING("-//Microsoft//DTD Internet Explorer 3.0 Tables//"),
|
122
|
+
GUMBO_STRING("-//Netscape Comm. Corp.//DTD HTML//"),
|
123
|
+
GUMBO_STRING("-//Netscape Comm. Corp.//DTD Strict HTML//"),
|
124
|
+
GUMBO_STRING("-//O'Reilly and Associates//DTD HTML 2.0//"),
|
125
|
+
GUMBO_STRING("-//O'Reilly and Associates//DTD HTML Extended 1.0//"),
|
126
|
+
GUMBO_STRING("-//O'Reilly and Associates//DTD HTML Extended Relaxed 1.0//"),
|
127
|
+
GUMBO_STRING(
|
128
|
+
"-//SoftQuad Software//DTD HoTMetaL PRO 6.0::19990601::)"
|
129
|
+
"extensions to HTML 4.0//"),
|
130
|
+
GUMBO_STRING(
|
131
|
+
"-//SoftQuad//DTD HoTMetaL PRO 4.0::19971010::"
|
132
|
+
"extensions to HTML 4.0//"),
|
133
|
+
GUMBO_STRING("-//Spyglass//DTD HTML 2.0 Extended//"),
|
134
|
+
GUMBO_STRING("-//SQ//DTD HTML 2.0 HoTMetaL + extensions//"),
|
135
|
+
GUMBO_STRING("-//Sun Microsystems Corp.//DTD HotJava HTML//"),
|
136
|
+
GUMBO_STRING("-//Sun Microsystems Corp.//DTD HotJava Strict HTML//"),
|
137
|
+
GUMBO_STRING("-//W3C//DTD HTML 3 1995-03-24//"),
|
138
|
+
GUMBO_STRING("-//W3C//DTD HTML 3.2 Draft//"),
|
139
|
+
GUMBO_STRING("-//W3C//DTD HTML 3.2 Final//"),
|
140
|
+
GUMBO_STRING("-//W3C//DTD HTML 3.2//"),
|
141
|
+
GUMBO_STRING("-//W3C//DTD HTML 3.2S Draft//"),
|
142
|
+
GUMBO_STRING("-//W3C//DTD HTML 4.0 Frameset//"),
|
143
|
+
GUMBO_STRING("-//W3C//DTD HTML 4.0 Transitional//"),
|
144
|
+
GUMBO_STRING("-//W3C//DTD HTML Experimental 19960712//"),
|
145
|
+
GUMBO_STRING("-//W3C//DTD HTML Experimental 970421//"),
|
146
|
+
GUMBO_STRING("-//W3C//DTD W3 HTML//"),
|
147
|
+
GUMBO_STRING("-//W3O//DTD W3 HTML 3.0//"),
|
148
|
+
GUMBO_STRING("-//WebTechs//DTD Mozilla HTML 2.0//"),
|
149
|
+
GUMBO_STRING("-//WebTechs//DTD Mozilla HTML//"), TERMINATOR};
|
152
150
|
|
153
151
|
static const GumboStringPiece kQuirksModePublicIdExactMatches[] = {
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
TERMINATOR
|
158
|
-
};
|
152
|
+
GUMBO_STRING("-//W3O//DTD W3 HTML Strict 3.0//EN//"),
|
153
|
+
GUMBO_STRING("-/W3C/DTD HTML 4.0 Transitional/EN"), GUMBO_STRING("HTML"),
|
154
|
+
TERMINATOR};
|
159
155
|
|
160
156
|
static const GumboStringPiece kQuirksModeSystemIdExactMatches[] = {
|
161
|
-
|
162
|
-
|
163
|
-
};
|
157
|
+
GUMBO_STRING("http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd"),
|
158
|
+
TERMINATOR};
|
164
159
|
|
165
160
|
static const GumboStringPiece kLimitedQuirksPublicIdPrefixes[] = {
|
166
|
-
|
167
|
-
|
168
|
-
TERMINATOR
|
169
|
-
};
|
161
|
+
GUMBO_STRING("-//W3C//DTD XHTML 1.0 Frameset//"),
|
162
|
+
GUMBO_STRING("-//W3C//DTD XHTML 1.0 Transitional//"), TERMINATOR};
|
170
163
|
|
171
|
-
static const GumboStringPiece kLimitedQuirksRequiresSystemIdPublicIdPrefixes[] =
|
172
|
-
|
173
|
-
|
174
|
-
TERMINATOR
|
175
|
-
};
|
164
|
+
static const GumboStringPiece kLimitedQuirksRequiresSystemIdPublicIdPrefixes[] =
|
165
|
+
{GUMBO_STRING("-//W3C//DTD HTML 4.01 Frameset//"),
|
166
|
+
GUMBO_STRING("-//W3C//DTD HTML 4.01 Transitional//"), TERMINATOR};
|
176
167
|
|
177
168
|
// Indexed by GumboNamespaceEnum; keep in sync with that.
|
178
|
-
static const char* kLegalXmlns[] = {
|
179
|
-
|
180
|
-
"http://www.w3.org/2000/svg",
|
181
|
-
"http://www.w3.org/1998/Math/MathML"
|
182
|
-
};
|
169
|
+
static const char* kLegalXmlns[] = {"http://www.w3.org/1999/xhtml",
|
170
|
+
"http://www.w3.org/2000/svg", "http://www.w3.org/1998/Math/MathML"};
|
183
171
|
|
184
172
|
typedef struct _ReplacementEntry {
|
185
173
|
const GumboStringPiece from;
|
@@ -187,112 +175,112 @@ typedef struct _ReplacementEntry {
|
|
187
175
|
} ReplacementEntry;
|
188
176
|
|
189
177
|
#define REPLACEMENT_ENTRY(from, to) \
|
190
|
-
|
178
|
+
{ GUMBO_STRING(from), GUMBO_STRING(to) }
|
191
179
|
|
192
180
|
// Static data for SVG attribute replacements.
|
193
|
-
//
|
181
|
+
// https://html.spec.whatwg.org/multipage/syntax.html#creating-and-inserting-nodes
|
194
182
|
static const ReplacementEntry kSvgAttributeReplacements[] = {
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
183
|
+
REPLACEMENT_ENTRY("attributename", "attributeName"),
|
184
|
+
REPLACEMENT_ENTRY("attributetype", "attributeType"),
|
185
|
+
REPLACEMENT_ENTRY("basefrequency", "baseFrequency"),
|
186
|
+
REPLACEMENT_ENTRY("baseprofile", "baseProfile"),
|
187
|
+
REPLACEMENT_ENTRY("calcmode", "calcMode"),
|
188
|
+
REPLACEMENT_ENTRY("clippathunits", "clipPathUnits"),
|
189
|
+
// REPLACEMENT_ENTRY("contentscripttype", "contentScriptType"),
|
190
|
+
// REPLACEMENT_ENTRY("contentstyletype", "contentStyleType"),
|
191
|
+
REPLACEMENT_ENTRY("diffuseconstant", "diffuseConstant"),
|
192
|
+
REPLACEMENT_ENTRY("edgemode", "edgeMode"),
|
193
|
+
// REPLACEMENT_ENTRY("externalresourcesrequired",
|
194
|
+
// "externalResourcesRequired"),
|
195
|
+
// REPLACEMENT_ENTRY("filterres", "filterRes"),
|
196
|
+
REPLACEMENT_ENTRY("filterunits", "filterUnits"),
|
197
|
+
REPLACEMENT_ENTRY("glyphref", "glyphRef"),
|
198
|
+
REPLACEMENT_ENTRY("gradienttransform", "gradientTransform"),
|
199
|
+
REPLACEMENT_ENTRY("gradientunits", "gradientUnits"),
|
200
|
+
REPLACEMENT_ENTRY("kernelmatrix", "kernelMatrix"),
|
201
|
+
REPLACEMENT_ENTRY("kernelunitlength", "kernelUnitLength"),
|
202
|
+
REPLACEMENT_ENTRY("keypoints", "keyPoints"),
|
203
|
+
REPLACEMENT_ENTRY("keysplines", "keySplines"),
|
204
|
+
REPLACEMENT_ENTRY("keytimes", "keyTimes"),
|
205
|
+
REPLACEMENT_ENTRY("lengthadjust", "lengthAdjust"),
|
206
|
+
REPLACEMENT_ENTRY("limitingconeangle", "limitingConeAngle"),
|
207
|
+
REPLACEMENT_ENTRY("markerheight", "markerHeight"),
|
208
|
+
REPLACEMENT_ENTRY("markerunits", "markerUnits"),
|
209
|
+
REPLACEMENT_ENTRY("markerwidth", "markerWidth"),
|
210
|
+
REPLACEMENT_ENTRY("maskcontentunits", "maskContentUnits"),
|
211
|
+
REPLACEMENT_ENTRY("maskunits", "maskUnits"),
|
212
|
+
REPLACEMENT_ENTRY("numoctaves", "numOctaves"),
|
213
|
+
REPLACEMENT_ENTRY("pathlength", "pathLength"),
|
214
|
+
REPLACEMENT_ENTRY("patterncontentunits", "patternContentUnits"),
|
215
|
+
REPLACEMENT_ENTRY("patterntransform", "patternTransform"),
|
216
|
+
REPLACEMENT_ENTRY("patternunits", "patternUnits"),
|
217
|
+
REPLACEMENT_ENTRY("pointsatx", "pointsAtX"),
|
218
|
+
REPLACEMENT_ENTRY("pointsaty", "pointsAtY"),
|
219
|
+
REPLACEMENT_ENTRY("pointsatz", "pointsAtZ"),
|
220
|
+
REPLACEMENT_ENTRY("preservealpha", "preserveAlpha"),
|
221
|
+
REPLACEMENT_ENTRY("preserveaspectratio", "preserveAspectRatio"),
|
222
|
+
REPLACEMENT_ENTRY("primitiveunits", "primitiveUnits"),
|
223
|
+
REPLACEMENT_ENTRY("refx", "refX"), REPLACEMENT_ENTRY("refy", "refY"),
|
224
|
+
REPLACEMENT_ENTRY("repeatcount", "repeatCount"),
|
225
|
+
REPLACEMENT_ENTRY("repeatdur", "repeatDur"),
|
226
|
+
REPLACEMENT_ENTRY("requiredextensions", "requiredExtensions"),
|
227
|
+
REPLACEMENT_ENTRY("requiredfeatures", "requiredFeatures"),
|
228
|
+
REPLACEMENT_ENTRY("specularconstant", "specularConstant"),
|
229
|
+
REPLACEMENT_ENTRY("specularexponent", "specularExponent"),
|
230
|
+
REPLACEMENT_ENTRY("spreadmethod", "spreadMethod"),
|
231
|
+
REPLACEMENT_ENTRY("startoffset", "startOffset"),
|
232
|
+
REPLACEMENT_ENTRY("stddeviation", "stdDeviation"),
|
233
|
+
REPLACEMENT_ENTRY("stitchtiles", "stitchTiles"),
|
234
|
+
REPLACEMENT_ENTRY("surfacescale", "surfaceScale"),
|
235
|
+
REPLACEMENT_ENTRY("systemlanguage", "systemLanguage"),
|
236
|
+
REPLACEMENT_ENTRY("tablevalues", "tableValues"),
|
237
|
+
REPLACEMENT_ENTRY("targetx", "targetX"),
|
238
|
+
REPLACEMENT_ENTRY("targety", "targetY"),
|
239
|
+
REPLACEMENT_ENTRY("textlength", "textLength"),
|
240
|
+
REPLACEMENT_ENTRY("viewbox", "viewBox"),
|
241
|
+
REPLACEMENT_ENTRY("viewtarget", "viewTarget"),
|
242
|
+
REPLACEMENT_ENTRY("xchannelselector", "xChannelSelector"),
|
243
|
+
REPLACEMENT_ENTRY("ychannelselector", "yChannelSelector"),
|
244
|
+
REPLACEMENT_ENTRY("zoomandpan", "zoomAndPan"),
|
257
245
|
};
|
258
246
|
|
259
247
|
static const ReplacementEntry kSvgTagReplacements[] = {
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
248
|
+
REPLACEMENT_ENTRY("altglyph", "altGlyph"),
|
249
|
+
REPLACEMENT_ENTRY("altglyphdef", "altGlyphDef"),
|
250
|
+
REPLACEMENT_ENTRY("altglyphitem", "altGlyphItem"),
|
251
|
+
REPLACEMENT_ENTRY("animatecolor", "animateColor"),
|
252
|
+
REPLACEMENT_ENTRY("animatemotion", "animateMotion"),
|
253
|
+
REPLACEMENT_ENTRY("animatetransform", "animateTransform"),
|
254
|
+
REPLACEMENT_ENTRY("clippath", "clipPath"),
|
255
|
+
REPLACEMENT_ENTRY("feblend", "feBlend"),
|
256
|
+
REPLACEMENT_ENTRY("fecolormatrix", "feColorMatrix"),
|
257
|
+
REPLACEMENT_ENTRY("fecomponenttransfer", "feComponentTransfer"),
|
258
|
+
REPLACEMENT_ENTRY("fecomposite", "feComposite"),
|
259
|
+
REPLACEMENT_ENTRY("feconvolvematrix", "feConvolveMatrix"),
|
260
|
+
REPLACEMENT_ENTRY("fediffuselighting", "feDiffuseLighting"),
|
261
|
+
REPLACEMENT_ENTRY("fedisplacementmap", "feDisplacementMap"),
|
262
|
+
REPLACEMENT_ENTRY("fedistantlight", "feDistantLight"),
|
263
|
+
REPLACEMENT_ENTRY("feflood", "feFlood"),
|
264
|
+
REPLACEMENT_ENTRY("fefunca", "feFuncA"),
|
265
|
+
REPLACEMENT_ENTRY("fefuncb", "feFuncB"),
|
266
|
+
REPLACEMENT_ENTRY("fefuncg", "feFuncG"),
|
267
|
+
REPLACEMENT_ENTRY("fefuncr", "feFuncR"),
|
268
|
+
REPLACEMENT_ENTRY("fegaussianblur", "feGaussianBlur"),
|
269
|
+
REPLACEMENT_ENTRY("feimage", "feImage"),
|
270
|
+
REPLACEMENT_ENTRY("femerge", "feMerge"),
|
271
|
+
REPLACEMENT_ENTRY("femergenode", "feMergeNode"),
|
272
|
+
REPLACEMENT_ENTRY("femorphology", "feMorphology"),
|
273
|
+
REPLACEMENT_ENTRY("feoffset", "feOffset"),
|
274
|
+
REPLACEMENT_ENTRY("fepointlight", "fePointLight"),
|
275
|
+
REPLACEMENT_ENTRY("fespecularlighting", "feSpecularLighting"),
|
276
|
+
REPLACEMENT_ENTRY("fespotlight", "feSpotLight"),
|
277
|
+
REPLACEMENT_ENTRY("fetile", "feTile"),
|
278
|
+
REPLACEMENT_ENTRY("feturbulence", "feTurbulence"),
|
279
|
+
REPLACEMENT_ENTRY("foreignobject", "foreignObject"),
|
280
|
+
REPLACEMENT_ENTRY("glyphref", "glyphRef"),
|
281
|
+
REPLACEMENT_ENTRY("lineargradient", "linearGradient"),
|
282
|
+
REPLACEMENT_ENTRY("radialgradient", "radialGradient"),
|
283
|
+
REPLACEMENT_ENTRY("textpath", "textPath"),
|
296
284
|
};
|
297
285
|
|
298
286
|
typedef struct _NamespacedAttributeReplacement {
|
@@ -302,18 +290,18 @@ typedef struct _NamespacedAttributeReplacement {
|
|
302
290
|
} NamespacedAttributeReplacement;
|
303
291
|
|
304
292
|
static const NamespacedAttributeReplacement kForeignAttributeReplacements[] = {
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
293
|
+
{"xlink:actuate", "actuate", GUMBO_ATTR_NAMESPACE_XLINK},
|
294
|
+
{"xlink:actuate", "actuate", GUMBO_ATTR_NAMESPACE_XLINK},
|
295
|
+
{"xlink:href", "href", GUMBO_ATTR_NAMESPACE_XLINK},
|
296
|
+
{"xlink:role", "role", GUMBO_ATTR_NAMESPACE_XLINK},
|
297
|
+
{"xlink:show", "show", GUMBO_ATTR_NAMESPACE_XLINK},
|
298
|
+
{"xlink:title", "title", GUMBO_ATTR_NAMESPACE_XLINK},
|
299
|
+
{"xlink:type", "type", GUMBO_ATTR_NAMESPACE_XLINK},
|
300
|
+
{"xml:base", "base", GUMBO_ATTR_NAMESPACE_XML},
|
301
|
+
{"xml:lang", "lang", GUMBO_ATTR_NAMESPACE_XML},
|
302
|
+
{"xml:space", "space", GUMBO_ATTR_NAMESPACE_XML},
|
303
|
+
{"xmlns", "xmlns", GUMBO_ATTR_NAMESPACE_XMLNS},
|
304
|
+
{"xmlns:xlink", "xlink", GUMBO_ATTR_NAMESPACE_XMLNS},
|
317
305
|
};
|
318
306
|
|
319
307
|
// The "scope marker" for the list of active formatting elements. We use a
|
@@ -371,6 +359,9 @@ typedef struct GumboInternalParserState {
|
|
371
359
|
GumboNode* _head_element;
|
372
360
|
GumboNode* _form_element;
|
373
361
|
|
362
|
+
// The element used as fragment context when parsing in fragment mode
|
363
|
+
GumboNode* _fragment_ctx;
|
364
|
+
|
374
365
|
// The flag for when the spec says "Reprocess the current token in..."
|
375
366
|
bool _reprocess_current_token;
|
376
367
|
|
@@ -427,14 +418,14 @@ static bool attribute_matches(
|
|
427
418
|
static bool attribute_matches_case_sensitive(
|
428
419
|
const GumboVector* attributes, const char* name, const char* value) {
|
429
420
|
const GumboAttribute* attr = gumbo_get_attribute(attributes, name);
|
430
|
-
return attr ?
|
421
|
+
return attr ? strcmp(value, attr->value) == 0 : false;
|
431
422
|
}
|
432
423
|
|
433
424
|
// Checks if the specified attribute vectors are identical.
|
434
425
|
static bool all_attributes_match(
|
435
426
|
const GumboVector* attr1, const GumboVector* attr2) {
|
436
|
-
int num_unmatched_attr2_elements = attr2->length;
|
437
|
-
for (int i = 0; i < attr1->length; ++i) {
|
427
|
+
unsigned int num_unmatched_attr2_elements = attr2->length;
|
428
|
+
for (unsigned int i = 0; i < attr1->length; ++i) {
|
438
429
|
const GumboAttribute* attr = attr1->data[i];
|
439
430
|
if (attribute_matches_case_sensitive(attr2, attr->name, attr->value)) {
|
440
431
|
--num_unmatched_attr2_elements;
|
@@ -462,8 +453,7 @@ static GumboNode* create_node(GumboParser* parser, GumboNodeType type) {
|
|
462
453
|
static GumboNode* new_document_node(GumboParser* parser) {
|
463
454
|
GumboNode* document_node = create_node(parser, GUMBO_NODE_DOCUMENT);
|
464
455
|
document_node->parse_flags = GUMBO_INSERTION_BY_PARSER;
|
465
|
-
gumbo_vector_init(
|
466
|
-
parser, 1, &document_node->v.document.children);
|
456
|
+
gumbo_vector_init(parser, 1, &document_node->v.document.children);
|
467
457
|
|
468
458
|
// Must be initialized explicitly, as there's no guarantee that we'll see a
|
469
459
|
// doc type token.
|
@@ -498,6 +488,7 @@ static void parser_state_init(GumboParser* parser) {
|
|
498
488
|
gumbo_vector_init(parser, 5, &parser_state->_template_insertion_modes);
|
499
489
|
parser_state->_head_element = NULL;
|
500
490
|
parser_state->_form_element = NULL;
|
491
|
+
parser_state->_fragment_ctx = NULL;
|
501
492
|
parser_state->_current_token = NULL;
|
502
493
|
parser_state->_closed_body_tag = false;
|
503
494
|
parser_state->_closed_html_tag = false;
|
@@ -506,6 +497,9 @@ static void parser_state_init(GumboParser* parser) {
|
|
506
497
|
|
507
498
|
static void parser_state_destroy(GumboParser* parser) {
|
508
499
|
GumboParserState* state = parser->_parser_state;
|
500
|
+
if (state->_fragment_ctx) {
|
501
|
+
destroy_node(parser, state->_fragment_ctx);
|
502
|
+
}
|
509
503
|
gumbo_vector_destroy(parser, &state->_active_formatting_elements);
|
510
504
|
gumbo_vector_destroy(parser, &state->_open_elements);
|
511
505
|
gumbo_vector_destroy(parser, &state->_template_insertion_modes);
|
@@ -517,6 +511,10 @@ static GumboNode* get_document_node(GumboParser* parser) {
|
|
517
511
|
return parser->_output->document;
|
518
512
|
}
|
519
513
|
|
514
|
+
static bool is_fragment_parser(const GumboParser* parser) {
|
515
|
+
return !!parser->_parser_state->_fragment_ctx;
|
516
|
+
}
|
517
|
+
|
520
518
|
// Returns the node at the bottom of the stack of open elements, or NULL if no
|
521
519
|
// elements have been added yet.
|
522
520
|
static GumboNode* get_current_node(GumboParser* parser) {
|
@@ -530,6 +528,14 @@ static GumboNode* get_current_node(GumboParser* parser) {
|
|
530
528
|
return open_elements->data[open_elements->length - 1];
|
531
529
|
}
|
532
530
|
|
531
|
+
static GumboNode* get_adjusted_current_node(GumboParser* parser) {
|
532
|
+
GumboParserState* state = parser->_parser_state;
|
533
|
+
if (state->_open_elements.length == 1 && state->_fragment_ctx) {
|
534
|
+
return state->_fragment_ctx;
|
535
|
+
}
|
536
|
+
return get_current_node(parser);
|
537
|
+
}
|
538
|
+
|
533
539
|
// Returns true if the given needle is in the given array of literal
|
534
540
|
// GumboStringPieces. If exact_match is true, this requires that they match
|
535
541
|
// exactly; otherwise, this performs a prefix match to check if any of the
|
@@ -537,7 +543,7 @@ static GumboNode* get_current_node(GumboParser* parser) {
|
|
537
543
|
// case-insensitive match.
|
538
544
|
static bool is_in_static_list(
|
539
545
|
const char* needle, const GumboStringPiece* haystack, bool exact_match) {
|
540
|
-
for (int i = 0; haystack[i].length > 0; ++i) {
|
546
|
+
for (unsigned int i = 0; haystack[i].length > 0; ++i) {
|
541
547
|
if ((exact_match && !strcmp(needle, haystack[i].data)) ||
|
542
548
|
(!exact_match && !strcasecmp(needle, haystack[i].data))) {
|
543
549
|
return true;
|
@@ -556,39 +562,63 @@ static void set_insertion_mode(GumboParser* parser, GumboInsertionMode mode) {
|
|
556
562
|
// indicate that there is no appropriate insertion mode, and the loop should
|
557
563
|
// continue.
|
558
564
|
static GumboInsertionMode get_appropriate_insertion_mode(
|
559
|
-
const
|
560
|
-
|
565
|
+
const GumboParser* parser, int index) {
|
566
|
+
const GumboVector* open_elements = &parser->_parser_state->_open_elements;
|
567
|
+
const GumboNode* node = open_elements->data[index];
|
568
|
+
const bool is_last = index == 0;
|
561
569
|
|
562
|
-
if (
|
563
|
-
|
564
|
-
|
570
|
+
if (is_last && is_fragment_parser(parser)) {
|
571
|
+
node = parser->_parser_state->_fragment_ctx;
|
572
|
+
}
|
573
|
+
|
574
|
+
assert(node->type == GUMBO_NODE_ELEMENT || node->type == GUMBO_NODE_TEMPLATE);
|
575
|
+
switch (node->v.element.tag) {
|
576
|
+
case GUMBO_TAG_SELECT: {
|
577
|
+
if (is_last) {
|
578
|
+
return GUMBO_INSERTION_MODE_IN_SELECT;
|
579
|
+
}
|
580
|
+
for (int i = index; i > 0; --i) {
|
581
|
+
const GumboNode* ancestor = open_elements->data[i];
|
582
|
+
if (node_html_tag_is(ancestor, GUMBO_TAG_TEMPLATE)) {
|
565
583
|
return GUMBO_INSERTION_MODE_IN_SELECT;
|
566
|
-
|
567
|
-
|
568
|
-
return
|
569
|
-
|
570
|
-
|
571
|
-
|
572
|
-
case GUMBO_TAG_TBODY:
|
573
|
-
case GUMBO_TAG_THEAD:
|
574
|
-
case GUMBO_TAG_TFOOT:
|
575
|
-
return GUMBO_INSERTION_MODE_IN_TABLE_BODY;
|
576
|
-
case GUMBO_TAG_CAPTION:
|
577
|
-
return GUMBO_INSERTION_MODE_IN_CAPTION;
|
578
|
-
case GUMBO_TAG_COLGROUP:
|
579
|
-
return GUMBO_INSERTION_MODE_IN_COLUMN_GROUP;
|
580
|
-
case GUMBO_TAG_TABLE:
|
581
|
-
return GUMBO_INSERTION_MODE_IN_TABLE;
|
582
|
-
case GUMBO_TAG_HEAD:
|
583
|
-
case GUMBO_TAG_BODY:
|
584
|
-
return GUMBO_INSERTION_MODE_IN_BODY;
|
585
|
-
case GUMBO_TAG_FRAMESET:
|
586
|
-
return GUMBO_INSERTION_MODE_IN_FRAMESET;
|
587
|
-
case GUMBO_TAG_HTML:
|
588
|
-
return GUMBO_INSERTION_MODE_BEFORE_HEAD;
|
589
|
-
default:
|
590
|
-
break;
|
584
|
+
}
|
585
|
+
if (node_html_tag_is(ancestor, GUMBO_TAG_TABLE)) {
|
586
|
+
return GUMBO_INSERTION_MODE_IN_SELECT_IN_TABLE;
|
587
|
+
}
|
588
|
+
}
|
589
|
+
return GUMBO_INSERTION_MODE_IN_SELECT;
|
591
590
|
}
|
591
|
+
case GUMBO_TAG_TD:
|
592
|
+
case GUMBO_TAG_TH:
|
593
|
+
if (!is_last) return GUMBO_INSERTION_MODE_IN_CELL;
|
594
|
+
break;
|
595
|
+
case GUMBO_TAG_TR:
|
596
|
+
return GUMBO_INSERTION_MODE_IN_ROW;
|
597
|
+
case GUMBO_TAG_TBODY:
|
598
|
+
case GUMBO_TAG_THEAD:
|
599
|
+
case GUMBO_TAG_TFOOT:
|
600
|
+
return GUMBO_INSERTION_MODE_IN_TABLE_BODY;
|
601
|
+
case GUMBO_TAG_CAPTION:
|
602
|
+
return GUMBO_INSERTION_MODE_IN_CAPTION;
|
603
|
+
case GUMBO_TAG_COLGROUP:
|
604
|
+
return GUMBO_INSERTION_MODE_IN_COLUMN_GROUP;
|
605
|
+
case GUMBO_TAG_TABLE:
|
606
|
+
return GUMBO_INSERTION_MODE_IN_TABLE;
|
607
|
+
case GUMBO_TAG_TEMPLATE:
|
608
|
+
return get_current_template_insertion_mode(parser);
|
609
|
+
case GUMBO_TAG_HEAD:
|
610
|
+
if (!is_last) return GUMBO_INSERTION_MODE_IN_HEAD;
|
611
|
+
break;
|
612
|
+
case GUMBO_TAG_BODY:
|
613
|
+
return GUMBO_INSERTION_MODE_IN_BODY;
|
614
|
+
case GUMBO_TAG_FRAMESET:
|
615
|
+
return GUMBO_INSERTION_MODE_IN_FRAMESET;
|
616
|
+
case GUMBO_TAG_HTML:
|
617
|
+
return parser->_parser_state->_head_element
|
618
|
+
? GUMBO_INSERTION_MODE_AFTER_HEAD
|
619
|
+
: GUMBO_INSERTION_MODE_BEFORE_HEAD;
|
620
|
+
default:
|
621
|
+
break;
|
592
622
|
}
|
593
623
|
return is_last ? GUMBO_INSERTION_MODE_IN_BODY : GUMBO_INSERTION_MODE_INITIAL;
|
594
624
|
}
|
@@ -596,9 +626,8 @@ static GumboInsertionMode get_appropriate_insertion_mode(
|
|
596
626
|
// This performs the actual "reset the insertion mode" loop.
|
597
627
|
static void reset_insertion_mode_appropriately(GumboParser* parser) {
|
598
628
|
const GumboVector* open_elements = &parser->_parser_state->_open_elements;
|
599
|
-
for (int i = open_elements->length; --i >= 0;
|
600
|
-
GumboInsertionMode mode =
|
601
|
-
get_appropriate_insertion_mode(open_elements->data[i], i == 0);
|
629
|
+
for (int i = open_elements->length; --i >= 0;) {
|
630
|
+
GumboInsertionMode mode = get_appropriate_insertion_mode(parser, i);
|
602
631
|
if (mode != GUMBO_INSERTION_MODE_INITIAL) {
|
603
632
|
set_insertion_mode(parser, mode);
|
604
633
|
return;
|
@@ -609,7 +638,8 @@ static void reset_insertion_mode_appropriately(GumboParser* parser) {
|
|
609
638
|
assert(0);
|
610
639
|
}
|
611
640
|
|
612
|
-
static GumboError* parser_add_parse_error(
|
641
|
+
static GumboError* parser_add_parse_error(
|
642
|
+
GumboParser* parser, const GumboToken* token) {
|
613
643
|
gumbo_debug("Adding parse error.\n");
|
614
644
|
GumboError* error = gumbo_add_error(parser);
|
615
645
|
if (!error) {
|
@@ -628,13 +658,14 @@ static GumboError* parser_add_parse_error(GumboParser* parser, const GumboToken*
|
|
628
658
|
}
|
629
659
|
GumboParserState* state = parser->_parser_state;
|
630
660
|
extra_data->parser_state = state->_insertion_mode;
|
631
|
-
gumbo_vector_init(
|
632
|
-
|
633
|
-
for (int i = 0; i < state->_open_elements.length; ++i) {
|
661
|
+
gumbo_vector_init(
|
662
|
+
parser, state->_open_elements.length, &extra_data->tag_stack);
|
663
|
+
for (unsigned int i = 0; i < state->_open_elements.length; ++i) {
|
634
664
|
const GumboNode* node = state->_open_elements.data[i];
|
635
|
-
assert(
|
636
|
-
|
637
|
-
|
665
|
+
assert(
|
666
|
+
node->type == GUMBO_NODE_ELEMENT || node->type == GUMBO_NODE_TEMPLATE);
|
667
|
+
gumbo_vector_add(
|
668
|
+
parser, (void*) node->v.element.tag, &extra_data->tag_stack);
|
638
669
|
}
|
639
670
|
return error;
|
640
671
|
}
|
@@ -643,7 +674,8 @@ static GumboError* parser_add_parse_error(GumboParser* parser, const GumboToken*
|
|
643
674
|
// by is_start) with one of the tag types in the varargs list. Terminate the
|
644
675
|
// list with GUMBO_TAG_LAST; this functions as a sentinel since no portion of
|
645
676
|
// the spec references tags that are not in the spec.
|
646
|
-
static bool tag_in(
|
677
|
+
static bool tag_in(
|
678
|
+
const GumboToken* token, bool is_start, const gumbo_tagset tags) {
|
647
679
|
GumboTag token_tag;
|
648
680
|
if (is_start && token->type == GUMBO_TOKEN_START_TAG) {
|
649
681
|
token_tag = token->v.start_tag.tag;
|
@@ -652,7 +684,7 @@ static bool tag_in(const GumboToken* token, bool is_start, const gumbo_tagset ta
|
|
652
684
|
} else {
|
653
685
|
return false;
|
654
686
|
}
|
655
|
-
return (token_tag < GUMBO_TAG_LAST && tags[(int)token_tag] != 0);
|
687
|
+
return (token_tag < GUMBO_TAG_LAST && tags[(int) token_tag] != 0);
|
656
688
|
}
|
657
689
|
|
658
690
|
// Like tag_in, but for the single-tag case.
|
@@ -669,41 +701,123 @@ static bool tag_is(const GumboToken* token, bool is_start, GumboTag tag) {
|
|
669
701
|
// Like tag_in, but checks for the tag of a node, rather than a token.
|
670
702
|
static bool node_tag_in_set(const GumboNode* node, const gumbo_tagset tags) {
|
671
703
|
assert(node != NULL);
|
672
|
-
if (node->type != GUMBO_NODE_ELEMENT) {
|
704
|
+
if (node->type != GUMBO_NODE_ELEMENT && node->type != GUMBO_NODE_TEMPLATE) {
|
673
705
|
return false;
|
674
706
|
}
|
675
|
-
return TAGSET_INCLUDES(
|
707
|
+
return TAGSET_INCLUDES(
|
708
|
+
tags, node->v.element.tag_namespace, node->v.element.tag);
|
676
709
|
}
|
677
710
|
|
678
|
-
|
679
711
|
// Like node_tag_in, but for the single-tag case.
|
680
|
-
static bool node_qualified_tag_is(
|
681
|
-
|
682
|
-
|
683
|
-
|
712
|
+
static bool node_qualified_tag_is(
|
713
|
+
const GumboNode* node, GumboNamespaceEnum ns, GumboTag tag) {
|
714
|
+
assert(node);
|
715
|
+
return (node->type == GUMBO_NODE_ELEMENT ||
|
716
|
+
node->type == GUMBO_NODE_TEMPLATE) &&
|
717
|
+
node->v.element.tag == tag && node->v.element.tag_namespace == ns;
|
684
718
|
}
|
685
719
|
|
686
720
|
// Like node_tag_in, but for the single-tag case in the HTML namespace
|
687
|
-
static bool node_html_tag_is(const GumboNode* node, GumboTag tag)
|
688
|
-
{
|
721
|
+
static bool node_html_tag_is(const GumboNode* node, GumboTag tag) {
|
689
722
|
return node_qualified_tag_is(node, GUMBO_NAMESPACE_HTML, tag);
|
690
723
|
}
|
691
724
|
|
725
|
+
static void push_template_insertion_mode(
|
726
|
+
GumboParser* parser, GumboInsertionMode mode) {
|
727
|
+
gumbo_vector_add(
|
728
|
+
parser, (void*) mode, &parser->_parser_state->_template_insertion_modes);
|
729
|
+
}
|
730
|
+
|
731
|
+
static void pop_template_insertion_mode(GumboParser* parser) {
|
732
|
+
gumbo_vector_pop(parser, &parser->_parser_state->_template_insertion_modes);
|
733
|
+
}
|
734
|
+
|
735
|
+
// Returns the current template insertion mode. If the stack of template
|
736
|
+
// insertion modes is empty, this returns GUMBO_INSERTION_MODE_INITIAL.
|
737
|
+
static GumboInsertionMode get_current_template_insertion_mode(
|
738
|
+
const GumboParser* parser) {
|
739
|
+
GumboVector* template_insertion_modes =
|
740
|
+
&parser->_parser_state->_template_insertion_modes;
|
741
|
+
if (template_insertion_modes->length == 0) {
|
742
|
+
return GUMBO_INSERTION_MODE_INITIAL;
|
743
|
+
}
|
744
|
+
return (GumboInsertionMode)
|
745
|
+
template_insertion_modes->data[(template_insertion_modes->length - 1)];
|
746
|
+
}
|
692
747
|
|
693
748
|
// http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#mathml-text-integration-point
|
694
749
|
static bool is_mathml_integration_point(const GumboNode* node) {
|
695
|
-
return node_tag_in_set(
|
696
|
-
|
750
|
+
return node_tag_in_set(
|
751
|
+
node, (gumbo_tagset){TAG_MATHML(MI), TAG_MATHML(MO), TAG_MATHML(MN),
|
752
|
+
TAG_MATHML(MS), TAG_MATHML(MTEXT)});
|
697
753
|
}
|
698
754
|
|
699
755
|
// http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#html-integration-point
|
700
756
|
static bool is_html_integration_point(const GumboNode* node) {
|
701
|
-
return node_tag_in_set(node, (gumbo_tagset)
|
702
|
-
|
703
|
-
|
704
|
-
|
705
|
-
|
706
|
-
|
757
|
+
return node_tag_in_set(node, (gumbo_tagset){TAG_SVG(FOREIGNOBJECT),
|
758
|
+
TAG_SVG(DESC), TAG_SVG(TITLE)}) ||
|
759
|
+
(node_qualified_tag_is(
|
760
|
+
node, GUMBO_NAMESPACE_MATHML, GUMBO_TAG_ANNOTATION_XML) &&
|
761
|
+
(attribute_matches(
|
762
|
+
&node->v.element.attributes, "encoding", "text/html") ||
|
763
|
+
attribute_matches(&node->v.element.attributes, "encoding",
|
764
|
+
"application/xhtml+xml")));
|
765
|
+
}
|
766
|
+
|
767
|
+
// This represents a place to insert a node, consisting of a target parent and a
|
768
|
+
// child index within that parent. If the node should be inserted at the end of
|
769
|
+
// the parent's child, index will be -1.
|
770
|
+
typedef struct {
|
771
|
+
GumboNode* target;
|
772
|
+
int index;
|
773
|
+
} InsertionLocation;
|
774
|
+
|
775
|
+
InsertionLocation get_appropriate_insertion_location(
|
776
|
+
GumboParser* parser, GumboNode* override_target) {
|
777
|
+
InsertionLocation retval = {override_target, -1};
|
778
|
+
if (retval.target == NULL) {
|
779
|
+
// No override target; default to the current node, but special-case the
|
780
|
+
// root node since get_current_node() assumes the stack of open elements is
|
781
|
+
// non-empty.
|
782
|
+
retval.target = parser->_output->root != NULL ? get_current_node(parser)
|
783
|
+
: get_document_node(parser);
|
784
|
+
}
|
785
|
+
if (!parser->_parser_state->_foster_parent_insertions ||
|
786
|
+
!node_tag_in_set(retval.target, (gumbo_tagset){TAG(TABLE), TAG(TBODY),
|
787
|
+
TAG(TFOOT), TAG(THEAD), TAG(TR)})) {
|
788
|
+
return retval;
|
789
|
+
}
|
790
|
+
|
791
|
+
// Foster-parenting case.
|
792
|
+
int last_template_index = -1;
|
793
|
+
int last_table_index = -1;
|
794
|
+
GumboVector* open_elements = &parser->_parser_state->_open_elements;
|
795
|
+
for (unsigned int i = 0; i < open_elements->length; ++i) {
|
796
|
+
if (node_html_tag_is(open_elements->data[i], GUMBO_TAG_TEMPLATE)) {
|
797
|
+
last_template_index = i;
|
798
|
+
}
|
799
|
+
if (node_html_tag_is(open_elements->data[i], GUMBO_TAG_TABLE)) {
|
800
|
+
last_table_index = i;
|
801
|
+
}
|
802
|
+
}
|
803
|
+
if (last_template_index != -1 &&
|
804
|
+
(last_table_index == -1 || last_template_index > last_table_index)) {
|
805
|
+
retval.target = open_elements->data[last_template_index];
|
806
|
+
return retval;
|
807
|
+
}
|
808
|
+
if (last_table_index == -1) {
|
809
|
+
retval.target = open_elements->data[0];
|
810
|
+
return retval;
|
811
|
+
}
|
812
|
+
GumboNode* last_table = open_elements->data[last_table_index];
|
813
|
+
if (last_table->parent != NULL) {
|
814
|
+
retval.target = last_table->parent;
|
815
|
+
retval.index = last_table->index_within_parent;
|
816
|
+
return retval;
|
817
|
+
}
|
818
|
+
|
819
|
+
retval.target = open_elements->data[last_table_index - 1];
|
820
|
+
return retval;
|
707
821
|
}
|
708
822
|
|
709
823
|
// Appends a node to the end of its parent, setting the "parent" and
|
@@ -713,7 +827,8 @@ static void append_node(
|
|
713
827
|
assert(node->parent == NULL);
|
714
828
|
assert(node->index_within_parent == -1);
|
715
829
|
GumboVector* children;
|
716
|
-
if (parent->type == GUMBO_NODE_ELEMENT
|
830
|
+
if (parent->type == GUMBO_NODE_ELEMENT ||
|
831
|
+
parent->type == GUMBO_NODE_TEMPLATE) {
|
717
832
|
children = &parent->v.element.children;
|
718
833
|
} else {
|
719
834
|
assert(parent->type == GUMBO_NODE_DOCUMENT);
|
@@ -725,64 +840,41 @@ static void append_node(
|
|
725
840
|
assert(node->index_within_parent < children->length);
|
726
841
|
}
|
727
842
|
|
728
|
-
// Inserts a node at the specified
|
843
|
+
// Inserts a node at the specified InsertionLocation, updating the
|
729
844
|
// "parent" and "index_within_parent" fields of it and all its siblings.
|
845
|
+
// If the index of the location is -1, this calls append_node.
|
730
846
|
static void insert_node(
|
731
|
-
GumboParser* parser, GumboNode*
|
847
|
+
GumboParser* parser, GumboNode* node, InsertionLocation location) {
|
732
848
|
assert(node->parent == NULL);
|
733
849
|
assert(node->index_within_parent == -1);
|
734
|
-
|
735
|
-
|
736
|
-
|
737
|
-
|
738
|
-
|
739
|
-
|
740
|
-
|
741
|
-
|
742
|
-
|
743
|
-
|
744
|
-
|
745
|
-
|
746
|
-
|
747
|
-
}
|
850
|
+
GumboNode* parent = location.target;
|
851
|
+
int index = location.index;
|
852
|
+
if (index != -1) {
|
853
|
+
GumboVector* children = NULL;
|
854
|
+
if (parent->type == GUMBO_NODE_ELEMENT ||
|
855
|
+
parent->type == GUMBO_NODE_TEMPLATE) {
|
856
|
+
children = &parent->v.element.children;
|
857
|
+
} else if (parent->type == GUMBO_NODE_DOCUMENT) {
|
858
|
+
children = &parent->v.document.children;
|
859
|
+
assert(children->length == 0);
|
860
|
+
} else {
|
861
|
+
assert(0);
|
862
|
+
}
|
748
863
|
|
749
|
-
|
750
|
-
|
751
|
-
|
752
|
-
|
753
|
-
|
754
|
-
|
755
|
-
|
756
|
-
|
757
|
-
|
758
|
-
|
759
|
-
GumboNode* table_element = open_elements->data[i];
|
760
|
-
if (node_html_tag_is(table_element, GUMBO_TAG_TABLE)) {
|
761
|
-
foster_parent_element = table_element->parent;
|
762
|
-
if (!foster_parent_element ||
|
763
|
-
foster_parent_element->type != GUMBO_NODE_ELEMENT) {
|
764
|
-
// Table has no parent; spec says it's possible if a script manipulated
|
765
|
-
// the DOM, although I don't think we have to worry about this case.
|
766
|
-
gumbo_debug("Table has no parent.\n");
|
767
|
-
foster_parent_element = open_elements->data[i - 1];
|
768
|
-
break;
|
769
|
-
}
|
770
|
-
assert(foster_parent_element->type == GUMBO_NODE_ELEMENT);
|
771
|
-
gumbo_debug("Found enclosing table (%x) at %d; parent=%s, index=%d.\n",
|
772
|
-
table_element, i, gumbo_normalized_tagname(
|
773
|
-
foster_parent_element->v.element.tag),
|
774
|
-
table_element->index_within_parent);
|
775
|
-
assert(foster_parent_element->v.element.children.data[
|
776
|
-
table_element->index_within_parent] == table_element);
|
777
|
-
insert_node(parser, foster_parent_element,
|
778
|
-
table_element->index_within_parent, node);
|
779
|
-
return;
|
864
|
+
assert(index >= 0);
|
865
|
+
assert((unsigned int) index < children->length);
|
866
|
+
node->parent = parent;
|
867
|
+
node->index_within_parent = index;
|
868
|
+
gumbo_vector_insert_at(parser, (void*) node, index, children);
|
869
|
+
assert(node->index_within_parent < children->length);
|
870
|
+
for (unsigned int i = index + 1; i < children->length; ++i) {
|
871
|
+
GumboNode* sibling = children->data[i];
|
872
|
+
sibling->index_within_parent = i;
|
873
|
+
assert(sibling->index_within_parent < children->length);
|
780
874
|
}
|
875
|
+
} else {
|
876
|
+
append_node(parser, parent, node);
|
781
877
|
}
|
782
|
-
if (node->type == GUMBO_NODE_ELEMENT) {
|
783
|
-
gumbo_vector_add(parser, (void*) node, open_elements);
|
784
|
-
}
|
785
|
-
append_node(parser, foster_parent_element, node);
|
786
878
|
}
|
787
879
|
|
788
880
|
static void maybe_flush_text_node_buffer(GumboParser* parser) {
|
@@ -797,27 +889,27 @@ static void maybe_flush_text_node_buffer(GumboParser* parser) {
|
|
797
889
|
buffer_state->_type == GUMBO_NODE_CDATA);
|
798
890
|
GumboNode* text_node = create_node(parser, buffer_state->_type);
|
799
891
|
GumboText* text_node_data = &text_node->v.text;
|
800
|
-
text_node_data->text =
|
801
|
-
parser, &buffer_state->_buffer);
|
892
|
+
text_node_data->text =
|
893
|
+
gumbo_string_buffer_to_string(parser, &buffer_state->_buffer);
|
802
894
|
text_node_data->original_text.data = buffer_state->_start_original_text;
|
803
895
|
text_node_data->original_text.length =
|
804
896
|
state->_current_token->original_text.data -
|
805
897
|
buffer_state->_start_original_text;
|
806
898
|
text_node_data->start_pos = buffer_state->_start_position;
|
807
|
-
|
808
|
-
|
809
|
-
|
810
|
-
|
899
|
+
|
900
|
+
gumbo_debug("Flushing text node buffer of %.*s.\n",
|
901
|
+
(int) buffer_state->_buffer.length, buffer_state->_buffer.data);
|
902
|
+
|
903
|
+
InsertionLocation location = get_appropriate_insertion_location(parser, NULL);
|
904
|
+
if (location.target->type == GUMBO_NODE_DOCUMENT) {
|
905
|
+
// The DOM does not allow Document nodes to have Text children, so per the
|
906
|
+
// spec, they are dropped on the floor.
|
907
|
+
destroy_node(parser, text_node);
|
811
908
|
} else {
|
812
|
-
|
813
|
-
parser, parser->_output->root ?
|
814
|
-
get_current_node(parser) : parser->_output->document, text_node);
|
909
|
+
insert_node(parser, text_node, location);
|
815
910
|
}
|
816
|
-
gumbo_debug("Flushing text node buffer of %.*s.\n",
|
817
|
-
(int) buffer_state->_buffer.length, buffer_state->_buffer.data);
|
818
911
|
|
819
|
-
|
820
|
-
gumbo_string_buffer_init(parser, &buffer_state->_buffer);
|
912
|
+
gumbo_string_buffer_clear(parser, &buffer_state->_buffer);
|
821
913
|
buffer_state->_type = GUMBO_NODE_WHITESPACE;
|
822
914
|
assert(buffer_state->_buffer.length == 0);
|
823
915
|
}
|
@@ -825,9 +917,9 @@ static void maybe_flush_text_node_buffer(GumboParser* parser) {
|
|
825
917
|
static void record_end_of_element(
|
826
918
|
GumboToken* current_token, GumboElement* element) {
|
827
919
|
element->end_pos = current_token->position;
|
828
|
-
element->original_end_tag =
|
829
|
-
|
830
|
-
|
920
|
+
element->original_end_tag = current_token->type == GUMBO_TOKEN_END_TAG
|
921
|
+
? current_token->original_text
|
922
|
+
: kGumboEmptyString;
|
831
923
|
}
|
832
924
|
|
833
925
|
static GumboNode* pop_current_node(GumboParser* parser) {
|
@@ -835,8 +927,7 @@ static GumboNode* pop_current_node(GumboParser* parser) {
|
|
835
927
|
maybe_flush_text_node_buffer(parser);
|
836
928
|
if (state->_open_elements.length > 0) {
|
837
929
|
assert(node_html_tag_is(state->_open_elements.data[0], GUMBO_TAG_HTML));
|
838
|
-
gumbo_debug(
|
839
|
-
"Popping %s node.\n",
|
930
|
+
gumbo_debug("Popping %s node.\n",
|
840
931
|
gumbo_normalized_tagname(get_current_node(parser)->v.element.tag));
|
841
932
|
}
|
842
933
|
GumboNode* current_node = gumbo_vector_pop(parser, &state->_open_elements);
|
@@ -844,13 +935,16 @@ static GumboNode* pop_current_node(GumboParser* parser) {
|
|
844
935
|
assert(state->_open_elements.length == 0);
|
845
936
|
return NULL;
|
846
937
|
}
|
847
|
-
assert(current_node->type == GUMBO_NODE_ELEMENT
|
938
|
+
assert(current_node->type == GUMBO_NODE_ELEMENT ||
|
939
|
+
current_node->type == GUMBO_NODE_TEMPLATE);
|
848
940
|
bool is_closed_body_or_html_tag =
|
849
|
-
(node_html_tag_is(current_node, GUMBO_TAG_BODY) &&
|
850
|
-
|
941
|
+
(node_html_tag_is(current_node, GUMBO_TAG_BODY) &&
|
942
|
+
state->_closed_body_tag) ||
|
943
|
+
(node_html_tag_is(current_node, GUMBO_TAG_HTML) &&
|
944
|
+
state->_closed_html_tag);
|
851
945
|
if ((state->_current_token->type != GUMBO_TOKEN_END_TAG ||
|
852
|
-
|
853
|
-
|
946
|
+
!node_html_tag_is(current_node, state->_current_token->v.end_tag)) &&
|
947
|
+
!is_closed_body_or_html_tag) {
|
854
948
|
current_node->parse_flags |= GUMBO_INSERTION_IMPLICIT_END_TAG;
|
855
949
|
}
|
856
950
|
if (!is_closed_body_or_html_tag) {
|
@@ -873,22 +967,25 @@ static void append_comment_node(
|
|
873
967
|
|
874
968
|
// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#clear-the-stack-back-to-a-table-row-context
|
875
969
|
static void clear_stack_to_table_row_context(GumboParser* parser) {
|
876
|
-
while (!node_tag_in_set(get_current_node(parser),
|
970
|
+
while (!node_tag_in_set(get_current_node(parser),
|
971
|
+
(gumbo_tagset){TAG(HTML), TAG(TR), TAG(TEMPLATE)})) {
|
877
972
|
pop_current_node(parser);
|
878
973
|
}
|
879
974
|
}
|
880
975
|
|
881
976
|
// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#clear-the-stack-back-to-a-table-context
|
882
977
|
static void clear_stack_to_table_context(GumboParser* parser) {
|
883
|
-
while (!node_tag_in_set(get_current_node(parser),
|
978
|
+
while (!node_tag_in_set(get_current_node(parser),
|
979
|
+
(gumbo_tagset){TAG(HTML), TAG(TABLE), TAG(TEMPLATE)})) {
|
884
980
|
pop_current_node(parser);
|
885
981
|
}
|
886
982
|
}
|
887
983
|
|
888
984
|
// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#clear-the-stack-back-to-a-table-body-context
|
889
985
|
void clear_stack_to_table_body_context(GumboParser* parser) {
|
890
|
-
while (!node_tag_in_set(get_current_node(parser),
|
891
|
-
|
986
|
+
while (!node_tag_in_set(get_current_node(parser),
|
987
|
+
(gumbo_tagset){TAG(HTML), TAG(TBODY), TAG(TFOOT), TAG(THEAD),
|
988
|
+
TAG(TEMPLATE)})) {
|
892
989
|
pop_current_node(parser);
|
893
990
|
}
|
894
991
|
}
|
@@ -903,7 +1000,9 @@ static GumboNode* create_element(GumboParser* parser, GumboTag tag) {
|
|
903
1000
|
element->tag_namespace = GUMBO_NAMESPACE_HTML;
|
904
1001
|
element->original_tag = kGumboEmptyString;
|
905
1002
|
element->original_end_tag = kGumboEmptyString;
|
906
|
-
element->start_pos = parser->_parser_state->_current_token
|
1003
|
+
element->start_pos = (parser->_parser_state->_current_token)
|
1004
|
+
? parser->_parser_state->_current_token->position
|
1005
|
+
: kGumboEmptySourcePosition;
|
907
1006
|
element->end_pos = kGumboEmptySourcePosition;
|
908
1007
|
return node;
|
909
1008
|
}
|
@@ -914,7 +1013,12 @@ static GumboNode* create_element_from_token(
|
|
914
1013
|
assert(token->type == GUMBO_TOKEN_START_TAG);
|
915
1014
|
GumboTokenStartTag* start_tag = &token->v.start_tag;
|
916
1015
|
|
917
|
-
|
1016
|
+
GumboNodeType type = (tag_namespace == GUMBO_NAMESPACE_HTML &&
|
1017
|
+
start_tag->tag == GUMBO_TAG_TEMPLATE)
|
1018
|
+
? GUMBO_NODE_TEMPLATE
|
1019
|
+
: GUMBO_NODE_ELEMENT;
|
1020
|
+
|
1021
|
+
GumboNode* node = create_node(parser, type);
|
918
1022
|
GumboElement* element = &node->v.element;
|
919
1023
|
gumbo_vector_init(parser, 1, &element->children);
|
920
1024
|
element->attributes = start_tag->attributes;
|
@@ -937,7 +1041,7 @@ static GumboNode* create_element_from_token(
|
|
937
1041
|
|
938
1042
|
// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#insert-an-html-element
|
939
1043
|
static void insert_element(GumboParser* parser, GumboNode* node,
|
940
|
-
|
1044
|
+
bool is_reconstructing_formatting_elements) {
|
941
1045
|
GumboParserState* state = parser->_parser_state;
|
942
1046
|
// NOTE(jdtang): The text node buffer must always be flushed before inserting
|
943
1047
|
// a node, otherwise we're handling nodes in a different order than the spec
|
@@ -951,20 +1055,8 @@ static void insert_element(GumboParser* parser, GumboNode* node,
|
|
951
1055
|
if (!is_reconstructing_formatting_elements) {
|
952
1056
|
maybe_flush_text_node_buffer(parser);
|
953
1057
|
}
|
954
|
-
|
955
|
-
|
956
|
-
TAG(THEAD), TAG(TR) } )) {
|
957
|
-
foster_parent_element(parser, node);
|
958
|
-
gumbo_vector_add(parser, (void*) node, &state->_open_elements);
|
959
|
-
return;
|
960
|
-
}
|
961
|
-
|
962
|
-
// This is called to insert the root HTML element, but get_current_node
|
963
|
-
// assumes the stack of open elements is non-empty, so we need special
|
964
|
-
// handling for this case.
|
965
|
-
append_node(
|
966
|
-
parser, parser->_output->root ?
|
967
|
-
get_current_node(parser) : parser->_output->document, node);
|
1058
|
+
InsertionLocation location = get_appropriate_insertion_location(parser, NULL);
|
1059
|
+
insert_node(parser, node, location);
|
968
1060
|
gumbo_vector_add(parser, (void*) node, &state->_open_elements);
|
969
1061
|
}
|
970
1062
|
|
@@ -977,7 +1069,7 @@ static GumboNode* insert_element_from_token(
|
|
977
1069
|
create_element_from_token(parser, token, GUMBO_NAMESPACE_HTML);
|
978
1070
|
insert_element(parser, element, false);
|
979
1071
|
gumbo_debug("Inserting <%s> element (@%x) from token.\n",
|
980
|
-
|
1072
|
+
gumbo_normalized_tagname(element->v.element.tag), element);
|
981
1073
|
return element;
|
982
1074
|
}
|
983
1075
|
|
@@ -990,7 +1082,7 @@ static GumboNode* insert_element_of_tag_type(
|
|
990
1082
|
element->parse_flags |= GUMBO_INSERTION_BY_PARSER | reason;
|
991
1083
|
insert_element(parser, element, false);
|
992
1084
|
gumbo_debug("Inserting %s element (@%x) from tag type.\n",
|
993
|
-
|
1085
|
+
gumbo_normalized_tagname(tag), element);
|
994
1086
|
return element;
|
995
1087
|
}
|
996
1088
|
|
@@ -1002,16 +1094,14 @@ static GumboNode* insert_foreign_element(
|
|
1002
1094
|
GumboNode* element = create_element_from_token(parser, token, tag_namespace);
|
1003
1095
|
insert_element(parser, element, false);
|
1004
1096
|
if (token_has_attribute(token, "xmlns") &&
|
1005
|
-
!attribute_matches_case_sensitive(
|
1006
|
-
&token->v.start_tag.attributes, "xmlns",
|
1097
|
+
!attribute_matches_case_sensitive(&token->v.start_tag.attributes, "xmlns",
|
1007
1098
|
kLegalXmlns[tag_namespace])) {
|
1008
1099
|
// TODO(jdtang): Since there're multiple possible error codes here, we
|
1009
1100
|
// eventually need reason codes to differentiate them.
|
1010
1101
|
parser_add_parse_error(parser, token);
|
1011
1102
|
}
|
1012
1103
|
if (token_has_attribute(token, "xmlns:xlink") &&
|
1013
|
-
!attribute_matches_case_sensitive(
|
1014
|
-
&token->v.start_tag.attributes,
|
1104
|
+
!attribute_matches_case_sensitive(&token->v.start_tag.attributes,
|
1015
1105
|
"xmlns:xlink", "http://www.w3.org/1999/xlink")) {
|
1016
1106
|
parser_add_parse_error(parser, token);
|
1017
1107
|
}
|
@@ -1021,8 +1111,7 @@ static GumboNode* insert_foreign_element(
|
|
1021
1111
|
static void insert_text_token(GumboParser* parser, GumboToken* token) {
|
1022
1112
|
assert(token->type == GUMBO_TOKEN_WHITESPACE ||
|
1023
1113
|
token->type == GUMBO_TOKEN_CHARACTER ||
|
1024
|
-
token->type == GUMBO_TOKEN_NULL ||
|
1025
|
-
token->type == GUMBO_TOKEN_CDATA);
|
1114
|
+
token->type == GUMBO_TOKEN_NULL || token->type == GUMBO_TOKEN_CDATA);
|
1026
1115
|
TextNodeBufferState* buffer_state = &parser->_parser_state->_text_node;
|
1027
1116
|
if (buffer_state->_buffer.length == 0) {
|
1028
1117
|
// Initialize position fields.
|
@@ -1057,7 +1146,7 @@ static void acknowledge_self_closing_tag(GumboParser* parser) {
|
|
1057
1146
|
// elements, and fills in its index if so.
|
1058
1147
|
static bool find_last_anchor_index(GumboParser* parser, int* anchor_index) {
|
1059
1148
|
GumboVector* elements = &parser->_parser_state->_active_formatting_elements;
|
1060
|
-
for (int i = elements->length; --i >= 0;
|
1149
|
+
for (int i = elements->length; --i >= 0;) {
|
1061
1150
|
GumboNode* node = elements->data[i];
|
1062
1151
|
if (node == &kActiveFormattingScopeMarker) {
|
1063
1152
|
return false;
|
@@ -1074,21 +1163,21 @@ static bool find_last_anchor_index(GumboParser* parser, int* anchor_index) {
|
|
1074
1163
|
// formatting elements (after the last active scope marker) that have a specific
|
1075
1164
|
// tag. If this is > 0, then earliest_matching_index will be filled in with the
|
1076
1165
|
// index of the first such element.
|
1077
|
-
static int count_formatting_elements_of_tag(
|
1078
|
-
|
1079
|
-
int* earliest_matching_index) {
|
1166
|
+
static int count_formatting_elements_of_tag(GumboParser* parser,
|
1167
|
+
const GumboNode* desired_node, int* earliest_matching_index) {
|
1080
1168
|
const GumboElement* desired_element = &desired_node->v.element;
|
1081
1169
|
GumboVector* elements = &parser->_parser_state->_active_formatting_elements;
|
1082
1170
|
int num_identical_elements = 0;
|
1083
|
-
for (int i = elements->length; --i >= 0;
|
1171
|
+
for (int i = elements->length; --i >= 0;) {
|
1084
1172
|
GumboNode* node = elements->data[i];
|
1085
1173
|
if (node == &kActiveFormattingScopeMarker) {
|
1086
1174
|
break;
|
1087
1175
|
}
|
1088
1176
|
assert(node->type == GUMBO_NODE_ELEMENT);
|
1089
|
-
if (node_qualified_tag_is(
|
1090
|
-
|
1091
|
-
|
1177
|
+
if (node_qualified_tag_is(
|
1178
|
+
node, desired_element->tag_namespace, desired_element->tag) &&
|
1179
|
+
all_attributes_match(
|
1180
|
+
&node->v.element.attributes, &desired_element->attributes)) {
|
1092
1181
|
num_identical_elements++;
|
1093
1182
|
*earliest_matching_index = i;
|
1094
1183
|
}
|
@@ -1115,7 +1204,7 @@ static void add_formatting_element(GumboParser* parser, const GumboNode* node) {
|
|
1115
1204
|
// Noah's Ark clause: if there're at least 3, remove the earliest.
|
1116
1205
|
if (num_identical_elements >= 3) {
|
1117
1206
|
gumbo_debug("Noah's ark clause: removing element at %d.\n",
|
1118
|
-
|
1207
|
+
earliest_identical_element);
|
1119
1208
|
gumbo_vector_remove_at(parser, earliest_identical_element, elements);
|
1120
1209
|
}
|
1121
1210
|
|
@@ -1124,7 +1213,7 @@ static void add_formatting_element(GumboParser* parser, const GumboNode* node) {
|
|
1124
1213
|
|
1125
1214
|
static bool is_open_element(GumboParser* parser, const GumboNode* node) {
|
1126
1215
|
GumboVector* open_elements = &parser->_parser_state->_open_elements;
|
1127
|
-
for (int i = 0; i < open_elements->length; ++i) {
|
1216
|
+
for (unsigned int i = 0; i < open_elements->length; ++i) {
|
1128
1217
|
if (open_elements->data[i] == node) {
|
1129
1218
|
return true;
|
1130
1219
|
}
|
@@ -1136,8 +1225,8 @@ static bool is_open_element(GumboParser* parser, const GumboNode* node) {
|
|
1136
1225
|
// clone shares no structure with the original node: all owned strings and
|
1137
1226
|
// values are fresh copies.
|
1138
1227
|
GumboNode* clone_node(
|
1139
|
-
GumboParser* parser,
|
1140
|
-
assert(node->type == GUMBO_NODE_ELEMENT);
|
1228
|
+
GumboParser* parser, GumboNode* node, GumboParseFlags reason) {
|
1229
|
+
assert(node->type == GUMBO_NODE_ELEMENT || node->type == GUMBO_NODE_TEMPLATE);
|
1141
1230
|
GumboNode* new_node = gumbo_parser_allocate(parser, sizeof(GumboNode));
|
1142
1231
|
*new_node = *node;
|
1143
1232
|
new_node->parent = NULL;
|
@@ -1151,7 +1240,7 @@ GumboNode* clone_node(
|
|
1151
1240
|
|
1152
1241
|
const GumboVector* old_attributes = &node->v.element.attributes;
|
1153
1242
|
gumbo_vector_init(parser, old_attributes->length, &element->attributes);
|
1154
|
-
for (int i = 0; i < old_attributes->length; ++i) {
|
1243
|
+
for (unsigned int i = 0; i < old_attributes->length; ++i) {
|
1155
1244
|
const GumboAttribute* old_attr = old_attributes->data[i];
|
1156
1245
|
GumboAttribute* attr =
|
1157
1246
|
gumbo_parser_allocate(parser, sizeof(GumboAttribute));
|
@@ -1175,8 +1264,8 @@ static void reconstruct_active_formatting_elements(GumboParser* parser) {
|
|
1175
1264
|
}
|
1176
1265
|
|
1177
1266
|
// Step 2 & 3
|
1178
|
-
int i = elements->length - 1;
|
1179
|
-
|
1267
|
+
unsigned int i = elements->length - 1;
|
1268
|
+
GumboNode* element = elements->data[i];
|
1180
1269
|
if (element == &kActiveFormattingScopeMarker ||
|
1181
1270
|
is_open_element(parser, element)) {
|
1182
1271
|
return;
|
@@ -1186,7 +1275,7 @@ static void reconstruct_active_formatting_elements(GumboParser* parser) {
|
|
1186
1275
|
do {
|
1187
1276
|
if (i == 0) {
|
1188
1277
|
// Step 4
|
1189
|
-
i = -1;
|
1278
|
+
i = -1; // Incremented to 0 below.
|
1190
1279
|
break;
|
1191
1280
|
}
|
1192
1281
|
// Step 5
|
@@ -1196,9 +1285,8 @@ static void reconstruct_active_formatting_elements(GumboParser* parser) {
|
|
1196
1285
|
|
1197
1286
|
++i;
|
1198
1287
|
gumbo_debug("Reconstructing elements from %d on %s parent.\n", i,
|
1199
|
-
|
1200
|
-
|
1201
|
-
for(; i < elements->length; ++i) {
|
1288
|
+
gumbo_normalized_tagname(get_current_node(parser)->v.element.tag));
|
1289
|
+
for (; i < elements->length; ++i) {
|
1202
1290
|
// Step 7 & 8.
|
1203
1291
|
assert(elements->length > 0);
|
1204
1292
|
assert(i < elements->length);
|
@@ -1207,11 +1295,16 @@ static void reconstruct_active_formatting_elements(GumboParser* parser) {
|
|
1207
1295
|
GumboNode* clone = clone_node(
|
1208
1296
|
parser, element, GUMBO_INSERTION_RECONSTRUCTED_FORMATTING_ELEMENT);
|
1209
1297
|
// Step 9.
|
1210
|
-
|
1298
|
+
InsertionLocation location =
|
1299
|
+
get_appropriate_insertion_location(parser, NULL);
|
1300
|
+
insert_node(parser, clone, location);
|
1301
|
+
gumbo_vector_add(
|
1302
|
+
parser, (void*) clone, &parser->_parser_state->_open_elements);
|
1303
|
+
|
1211
1304
|
// Step 10.
|
1212
1305
|
elements->data[i] = clone;
|
1213
1306
|
gumbo_debug("Reconstructed %s element at %d.\n",
|
1214
|
-
|
1307
|
+
gumbo_normalized_tagname(clone->v.element.tag), i);
|
1215
1308
|
}
|
1216
1309
|
}
|
1217
1310
|
|
@@ -1222,32 +1315,30 @@ static void clear_active_formatting_elements(GumboParser* parser) {
|
|
1222
1315
|
do {
|
1223
1316
|
node = gumbo_vector_pop(parser, elements);
|
1224
1317
|
++num_elements_cleared;
|
1225
|
-
} while(node && node != &kActiveFormattingScopeMarker);
|
1318
|
+
} while (node && node != &kActiveFormattingScopeMarker);
|
1226
1319
|
gumbo_debug("Cleared %d elements from active formatting list.\n",
|
1227
|
-
|
1320
|
+
num_elements_cleared);
|
1228
1321
|
}
|
1229
1322
|
|
1230
1323
|
// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#the-initial-insertion-mode
|
1231
1324
|
static GumboQuirksModeEnum compute_quirks_mode(
|
1232
1325
|
const GumboTokenDocType* doctype) {
|
1233
|
-
if (doctype->force_quirks ||
|
1234
|
-
|
1235
|
-
|
1236
|
-
|
1237
|
-
|
1238
|
-
|
1239
|
-
|
1240
|
-
kQuirksModeSystemIdExactMatches, true) ||
|
1326
|
+
if (doctype->force_quirks || strcmp(doctype->name, kDoctypeHtml.data) ||
|
1327
|
+
is_in_static_list(
|
1328
|
+
doctype->public_identifier, kQuirksModePublicIdPrefixes, false) ||
|
1329
|
+
is_in_static_list(
|
1330
|
+
doctype->public_identifier, kQuirksModePublicIdExactMatches, true) ||
|
1331
|
+
is_in_static_list(
|
1332
|
+
doctype->system_identifier, kQuirksModeSystemIdExactMatches, true) ||
|
1241
1333
|
(is_in_static_list(doctype->public_identifier,
|
1242
|
-
|
1243
|
-
|
1334
|
+
kLimitedQuirksRequiresSystemIdPublicIdPrefixes, false) &&
|
1335
|
+
!doctype->has_system_identifier)) {
|
1244
1336
|
return GUMBO_DOCTYPE_QUIRKS;
|
1245
|
-
} else if (
|
1246
|
-
|
1247
|
-
|
1248
|
-
|
1249
|
-
|
1250
|
-
&& doctype->has_system_identifier)) {
|
1337
|
+
} else if (is_in_static_list(doctype->public_identifier,
|
1338
|
+
kLimitedQuirksPublicIdPrefixes, false) ||
|
1339
|
+
(is_in_static_list(doctype->public_identifier,
|
1340
|
+
kLimitedQuirksRequiresSystemIdPublicIdPrefixes, false) &&
|
1341
|
+
doctype->has_system_identifier)) {
|
1251
1342
|
return GUMBO_DOCTYPE_LIMITED_QUIRKS;
|
1252
1343
|
}
|
1253
1344
|
return GUMBO_DOCTYPE_NO_QUIRKS;
|
@@ -1261,39 +1352,45 @@ static GumboQuirksModeEnum compute_quirks_mode(
|
|
1261
1352
|
// names. For example, "has an element in list scope" looks for an element of
|
1262
1353
|
// the given qualified name within the nearest enclosing <ol> or <ul>, along
|
1263
1354
|
// with a bunch of generic element types that serve to "firewall" their content
|
1264
|
-
// from the rest of the document.
|
1265
|
-
|
1355
|
+
// from the rest of the document. Note that because of the way the spec is
|
1356
|
+
// written,
|
1357
|
+
// all elements are expected to be in the HTML namespace
|
1358
|
+
static bool has_an_element_in_specific_scope(GumboParser* parser,
|
1359
|
+
int expected_size, const GumboTag* expected, bool negate,
|
1360
|
+
const gumbo_tagset tags) {
|
1266
1361
|
GumboVector* open_elements = &parser->_parser_state->_open_elements;
|
1267
|
-
|
1268
|
-
for (int i = open_elements->length; --i >= 0; ) {
|
1362
|
+
for (int i = open_elements->length; --i >= 0;) {
|
1269
1363
|
const GumboNode* node = open_elements->data[i];
|
1270
|
-
if (node->type != GUMBO_NODE_ELEMENT)
|
1364
|
+
if (node->type != GUMBO_NODE_ELEMENT && node->type != GUMBO_NODE_TEMPLATE)
|
1271
1365
|
continue;
|
1366
|
+
|
1367
|
+
GumboTag node_tag = node->v.element.tag;
|
1368
|
+
GumboNamespaceEnum node_ns = node->v.element.tag_namespace;
|
1369
|
+
for (int j = 0; j < expected_size; ++j) {
|
1370
|
+
if (node_tag == expected[j] && node_ns == GUMBO_NAMESPACE_HTML)
|
1371
|
+
return true;
|
1272
1372
|
}
|
1273
|
-
|
1274
|
-
|
1275
|
-
|
1276
|
-
bool found_qualname = false;
|
1277
|
-
if (TAGSET_INCLUDES(tags, node->v.element.tag_namespace, node->v.element.tag)) {
|
1278
|
-
found_qualname = true;
|
1279
|
-
}
|
1280
|
-
if (negate != found_qualname) {
|
1281
|
-
result = false;
|
1282
|
-
return result;
|
1283
|
-
}
|
1373
|
+
|
1374
|
+
bool found = TAGSET_INCLUDES(tags, node_ns, node_tag);
|
1375
|
+
if (negate != found) return false;
|
1284
1376
|
}
|
1285
|
-
return
|
1377
|
+
return false;
|
1378
|
+
}
|
1379
|
+
|
1380
|
+
// Checks for the presence of an open element of the specified tag type.
|
1381
|
+
static bool has_open_element(GumboParser* parser, GumboTag tag) {
|
1382
|
+
return has_an_element_in_specific_scope(
|
1383
|
+
parser, 1, &tag, false, (gumbo_tagset){TAG(HTML)});
|
1286
1384
|
}
|
1287
1385
|
|
1288
1386
|
// http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#has-an-element-in-scope
|
1289
1387
|
static bool has_an_element_in_scope(GumboParser* parser, GumboTag tag) {
|
1290
|
-
|
1291
|
-
|
1292
|
-
|
1293
|
-
|
1294
|
-
|
1295
|
-
|
1296
|
-
TAG_SVG(FOREIGNOBJECT), TAG_SVG(DESC), TAG_SVG(TITLE) });
|
1388
|
+
return has_an_element_in_specific_scope(parser, 1, &tag, false,
|
1389
|
+
(gumbo_tagset){TAG(APPLET), TAG(CAPTION), TAG(HTML), TAG(TABLE), TAG(TD),
|
1390
|
+
TAG(TH), TAG(MARQUEE), TAG(OBJECT), TAG(TEMPLATE), TAG_MATHML(MI),
|
1391
|
+
TAG_MATHML(MO), TAG_MATHML(MN), TAG_MATHML(MS), TAG_MATHML(MTEXT),
|
1392
|
+
TAG_MATHML(ANNOTATION_XML), TAG_SVG(FOREIGNOBJECT), TAG_SVG(DESC),
|
1393
|
+
TAG_SVG(TITLE)});
|
1297
1394
|
}
|
1298
1395
|
|
1299
1396
|
// Like "has an element in scope", but for the specific case of looking for a
|
@@ -1304,19 +1401,21 @@ static bool has_an_element_in_scope(GumboParser* parser, GumboTag tag) {
|
|
1304
1401
|
// parameterize it.
|
1305
1402
|
static bool has_node_in_scope(GumboParser* parser, const GumboNode* node) {
|
1306
1403
|
GumboVector* open_elements = &parser->_parser_state->_open_elements;
|
1307
|
-
for (int i = open_elements->length; --i >= 0;
|
1404
|
+
for (int i = open_elements->length; --i >= 0;) {
|
1308
1405
|
const GumboNode* current = open_elements->data[i];
|
1309
1406
|
if (current == node) {
|
1310
1407
|
return true;
|
1311
1408
|
}
|
1312
|
-
if (current->type != GUMBO_NODE_ELEMENT
|
1409
|
+
if (current->type != GUMBO_NODE_ELEMENT &&
|
1410
|
+
current->type != GUMBO_NODE_TEMPLATE) {
|
1313
1411
|
continue;
|
1314
1412
|
}
|
1315
|
-
if (node_tag_in_set(current,
|
1316
|
-
|
1317
|
-
|
1318
|
-
|
1319
|
-
|
1413
|
+
if (node_tag_in_set(current,
|
1414
|
+
(gumbo_tagset){TAG(APPLET), TAG(CAPTION), TAG(HTML), TAG(TABLE),
|
1415
|
+
TAG(TD), TAG(TH), TAG(MARQUEE), TAG(OBJECT), TAG(TEMPLATE),
|
1416
|
+
TAG_MATHML(MI), TAG_MATHML(MO), TAG_MATHML(MN), TAG_MATHML(MS),
|
1417
|
+
TAG_MATHML(MTEXT), TAG_MATHML(ANNOTATION_XML),
|
1418
|
+
TAG_SVG(FOREIGNOBJECT), TAG_SVG(DESC), TAG_SVG(TITLE)})) {
|
1320
1419
|
return false;
|
1321
1420
|
}
|
1322
1421
|
}
|
@@ -1326,60 +1425,70 @@ static bool has_node_in_scope(GumboParser* parser, const GumboNode* node) {
|
|
1326
1425
|
|
1327
1426
|
// Like has_an_element_in_scope, but restricts the expected qualified name to a
|
1328
1427
|
// range of possible qualified names instead of just a single one.
|
1329
|
-
static bool has_an_element_in_scope_with_tagname(
|
1330
|
-
|
1331
|
-
|
1332
|
-
|
1333
|
-
|
1334
|
-
|
1428
|
+
static bool has_an_element_in_scope_with_tagname(
|
1429
|
+
GumboParser* parser, int expected_len, const GumboTag expected[]) {
|
1430
|
+
return has_an_element_in_specific_scope(parser, expected_len, expected, false,
|
1431
|
+
(gumbo_tagset){TAG(APPLET), TAG(CAPTION), TAG(HTML), TAG(TABLE), TAG(TD),
|
1432
|
+
TAG(TH), TAG(MARQUEE), TAG(OBJECT), TAG(TEMPLATE), TAG_MATHML(MI),
|
1433
|
+
TAG_MATHML(MO), TAG_MATHML(MN), TAG_MATHML(MS), TAG_MATHML(MTEXT),
|
1434
|
+
TAG_MATHML(ANNOTATION_XML), TAG_SVG(FOREIGNOBJECT), TAG_SVG(DESC),
|
1435
|
+
TAG_SVG(TITLE)});
|
1335
1436
|
}
|
1336
1437
|
|
1337
1438
|
// http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#has-an-element-in-list-item-scope
|
1338
1439
|
static bool has_an_element_in_list_scope(GumboParser* parser, GumboTag tag) {
|
1339
|
-
|
1340
|
-
|
1341
|
-
|
1342
|
-
|
1343
|
-
|
1344
|
-
|
1345
|
-
TAG_SVG(FOREIGNOBJECT), TAG_SVG(DESC), TAG_SVG(TITLE), TAG(OL),
|
1346
|
-
TAG(UL) });
|
1440
|
+
return has_an_element_in_specific_scope(parser, 1, &tag, false,
|
1441
|
+
(gumbo_tagset){TAG(APPLET), TAG(CAPTION), TAG(HTML), TAG(TABLE), TAG(TD),
|
1442
|
+
TAG(TH), TAG(MARQUEE), TAG(OBJECT), TAG(TEMPLATE), TAG_MATHML(MI),
|
1443
|
+
TAG_MATHML(MO), TAG_MATHML(MN), TAG_MATHML(MS), TAG_MATHML(MTEXT),
|
1444
|
+
TAG_MATHML(ANNOTATION_XML), TAG_SVG(FOREIGNOBJECT), TAG_SVG(DESC),
|
1445
|
+
TAG_SVG(TITLE), TAG(OL), TAG(UL)});
|
1347
1446
|
}
|
1348
1447
|
|
1349
1448
|
// http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#has-an-element-in-button-scope
|
1350
1449
|
static bool has_an_element_in_button_scope(GumboParser* parser, GumboTag tag) {
|
1351
|
-
|
1352
|
-
|
1353
|
-
|
1354
|
-
|
1355
|
-
|
1356
|
-
|
1357
|
-
TAG_SVG(FOREIGNOBJECT), TAG_SVG(DESC), TAG_SVG(TITLE), TAG(BUTTON) });
|
1450
|
+
return has_an_element_in_specific_scope(parser, 1, &tag, false,
|
1451
|
+
(gumbo_tagset){TAG(APPLET), TAG(CAPTION), TAG(HTML), TAG(TABLE), TAG(TD),
|
1452
|
+
TAG(TH), TAG(MARQUEE), TAG(OBJECT), TAG(TEMPLATE), TAG_MATHML(MI),
|
1453
|
+
TAG_MATHML(MO), TAG_MATHML(MN), TAG_MATHML(MS), TAG_MATHML(MTEXT),
|
1454
|
+
TAG_MATHML(ANNOTATION_XML), TAG_SVG(FOREIGNOBJECT), TAG_SVG(DESC),
|
1455
|
+
TAG_SVG(TITLE), TAG(BUTTON)});
|
1358
1456
|
}
|
1359
1457
|
|
1360
1458
|
// http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#has-an-element-in-table-scope
|
1361
1459
|
static bool has_an_element_in_table_scope(GumboParser* parser, GumboTag tag) {
|
1362
|
-
|
1363
|
-
|
1364
|
-
return has_an_element_in_specific_scope(parser, qualset, false, (gumbo_tagset) { TAG(HTML), TAG(TABLE) });
|
1460
|
+
return has_an_element_in_specific_scope(parser, 1, &tag, false,
|
1461
|
+
(gumbo_tagset){TAG(HTML), TAG(TABLE), TAG(TEMPLATE)});
|
1365
1462
|
}
|
1366
1463
|
|
1367
1464
|
// http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#has-an-element-in-select-scope
|
1368
1465
|
static bool has_an_element_in_select_scope(GumboParser* parser, GumboTag tag) {
|
1369
|
-
|
1370
|
-
|
1371
|
-
return has_an_element_in_specific_scope(parser, qualset, true, (gumbo_tagset) { TAG(OPTGROUP), TAG(OPTION) });
|
1466
|
+
return has_an_element_in_specific_scope(
|
1467
|
+
parser, 1, &tag, true, (gumbo_tagset){TAG(OPTGROUP), TAG(OPTION)});
|
1372
1468
|
}
|
1373
1469
|
|
1374
1470
|
// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#generate-implied-end-tags
|
1375
1471
|
// "exception" is the "element to exclude from the process" listed in the spec.
|
1376
1472
|
// Pass GUMBO_TAG_LAST to not exclude any of them.
|
1377
1473
|
static void generate_implied_end_tags(GumboParser* parser, GumboTag exception) {
|
1378
|
-
for (;
|
1379
|
-
|
1380
|
-
|
1381
|
-
|
1382
|
-
pop_current_node(parser))
|
1474
|
+
for (; node_tag_in_set(get_current_node(parser),
|
1475
|
+
(gumbo_tagset){TAG(DD), TAG(DT), TAG(LI), TAG(OPTION),
|
1476
|
+
TAG(OPTGROUP), TAG(P), TAG(RP), TAG(RB), TAG(RT), TAG(RTC)}) &&
|
1477
|
+
!node_html_tag_is(get_current_node(parser), exception);
|
1478
|
+
pop_current_node(parser))
|
1479
|
+
;
|
1480
|
+
}
|
1481
|
+
|
1482
|
+
// This is the "generate all implied end tags thoroughly" clause of the spec.
|
1483
|
+
// https://html.spec.whatwg.org/multipage/syntax.html#closing-elements-that-have-implied-end-tags
|
1484
|
+
static void generate_all_implied_end_tags_thoroughly(GumboParser* parser) {
|
1485
|
+
for (
|
1486
|
+
; node_tag_in_set(get_current_node(parser),
|
1487
|
+
(gumbo_tagset){TAG(CAPTION), TAG(COLGROUP), TAG(DD), TAG(DT), TAG(LI),
|
1488
|
+
TAG(OPTION), TAG(OPTGROUP), TAG(P), TAG(RP), TAG(RT), TAG(RTC),
|
1489
|
+
TAG(TBODY), TAG(TD), TAG(TFOOT), TAG(TH), TAG(HEAD), TAG(TR)});
|
1490
|
+
pop_current_node(parser))
|
1491
|
+
;
|
1383
1492
|
}
|
1384
1493
|
|
1385
1494
|
// This factors out the clauses relating to "act as if an end tag token with tag
|
@@ -1401,8 +1510,8 @@ static bool close_table(GumboParser* parser) {
|
|
1401
1510
|
|
1402
1511
|
// This factors out the clauses relating to "act as if an end tag token with tag
|
1403
1512
|
// name `cell_tag` had been seen".
|
1404
|
-
static bool close_table_cell(
|
1405
|
-
|
1513
|
+
static bool close_table_cell(
|
1514
|
+
GumboParser* parser, const GumboToken* token, GumboTag cell_tag) {
|
1406
1515
|
bool result = true;
|
1407
1516
|
generate_implied_end_tags(parser, GUMBO_TAG_LAST);
|
1408
1517
|
const GumboNode* node = get_current_node(parser);
|
@@ -1446,38 +1555,43 @@ static void close_current_select(GumboParser* parser) {
|
|
1446
1555
|
// The list of nodes in the "special" category:
|
1447
1556
|
// http://www.whatwg.org/specs/web-apps/current-work/complete/parsing.html#special
|
1448
1557
|
static bool is_special_node(const GumboNode* node) {
|
1449
|
-
assert(node->type == GUMBO_NODE_ELEMENT);
|
1450
|
-
return node_tag_in_set(node,
|
1451
|
-
|
1452
|
-
|
1453
|
-
|
1454
|
-
|
1455
|
-
|
1456
|
-
|
1457
|
-
|
1458
|
-
|
1459
|
-
|
1460
|
-
|
1461
|
-
|
1462
|
-
|
1463
|
-
|
1464
|
-
|
1465
|
-
|
1466
|
-
|
1467
|
-
|
1468
|
-
|
1469
|
-
|
1470
|
-
|
1558
|
+
assert(node->type == GUMBO_NODE_ELEMENT || node->type == GUMBO_NODE_TEMPLATE);
|
1559
|
+
return node_tag_in_set(node,
|
1560
|
+
(gumbo_tagset){TAG(ADDRESS), TAG(APPLET), TAG(AREA), TAG(ARTICLE),
|
1561
|
+
TAG(ASIDE), TAG(BASE), TAG(BASEFONT), TAG(BGSOUND), TAG(BLOCKQUOTE),
|
1562
|
+
TAG(BODY), TAG(BR), TAG(BUTTON), TAG(CAPTION), TAG(CENTER), TAG(COL),
|
1563
|
+
TAG(COLGROUP), TAG(MENUITEM), TAG(DD), TAG(DETAILS), TAG(DIR),
|
1564
|
+
TAG(DIV), TAG(DL), TAG(DT), TAG(EMBED), TAG(FIELDSET),
|
1565
|
+
TAG(FIGCAPTION), TAG(FIGURE), TAG(FOOTER), TAG(FORM), TAG(FRAME),
|
1566
|
+
TAG(FRAMESET), TAG(H1), TAG(H2), TAG(H3), TAG(H4), TAG(H5), TAG(H6),
|
1567
|
+
TAG(HEAD), TAG(HEADER), TAG(HGROUP), TAG(HR), TAG(HTML), TAG(IFRAME),
|
1568
|
+
TAG(IMG), TAG(INPUT), TAG(ISINDEX), TAG(LI), TAG(LINK), TAG(LISTING),
|
1569
|
+
TAG(MARQUEE), TAG(MENU), TAG(META), TAG(NAV), TAG(NOEMBED),
|
1570
|
+
TAG(NOFRAMES), TAG(NOSCRIPT), TAG(OBJECT), TAG(OL), TAG(P),
|
1571
|
+
TAG(PARAM), TAG(PLAINTEXT), TAG(PRE), TAG(SCRIPT), TAG(SECTION),
|
1572
|
+
TAG(SELECT), TAG(STYLE), TAG(SUMMARY), TAG(TABLE), TAG(TBODY),
|
1573
|
+
TAG(TD), TAG(TEMPLATE), TAG(TEXTAREA), TAG(TFOOT), TAG(TH),
|
1574
|
+
TAG(THEAD), TAG(TITLE), TAG(TR), TAG(UL), TAG(WBR), TAG(XMP),
|
1575
|
+
|
1576
|
+
TAG_MATHML(MI), TAG_MATHML(MO), TAG_MATHML(MN), TAG_MATHML(MS),
|
1577
|
+
TAG_MATHML(MTEXT), TAG_MATHML(ANNOTATION_XML),
|
1578
|
+
|
1579
|
+
TAG_SVG(FOREIGNOBJECT), TAG_SVG(DESC)});
|
1580
|
+
}
|
1581
|
+
|
1582
|
+
// Implicitly closes currently open elements until it reaches an element with
|
1583
|
+
// the
|
1471
1584
|
// specified qualified name. If the elements closed are in the set handled by
|
1472
1585
|
// generate_implied_end_tags, this is normal operation and this function returns
|
1473
1586
|
// true. Otherwise, a parse error is recorded and this function returns false.
|
1474
|
-
static bool implicitly_close_tags(
|
1475
|
-
|
1587
|
+
static bool implicitly_close_tags(GumboParser* parser, GumboToken* token,
|
1588
|
+
GumboNamespaceEnum target_ns, GumboTag target) {
|
1476
1589
|
bool result = true;
|
1477
1590
|
generate_implied_end_tags(parser, target);
|
1478
1591
|
if (!node_qualified_tag_is(get_current_node(parser), target_ns, target)) {
|
1479
1592
|
parser_add_parse_error(parser, token);
|
1480
|
-
while (
|
1593
|
+
while (
|
1594
|
+
!node_qualified_tag_is(get_current_node(parser), target_ns, target)) {
|
1481
1595
|
pop_current_node(parser);
|
1482
1596
|
}
|
1483
1597
|
result = false;
|
@@ -1491,9 +1605,11 @@ static bool implicitly_close_tags(
|
|
1491
1605
|
// a </p> tag was encountered, implicitly closing tags. Returns false if a
|
1492
1606
|
// parse error occurs. This is a convenience function because this particular
|
1493
1607
|
// clause appears several times in the spec.
|
1494
|
-
static bool maybe_implicitly_close_p_tag(
|
1608
|
+
static bool maybe_implicitly_close_p_tag(
|
1609
|
+
GumboParser* parser, GumboToken* token) {
|
1495
1610
|
if (has_an_element_in_button_scope(parser, GUMBO_TAG_P)) {
|
1496
|
-
return implicitly_close_tags(
|
1611
|
+
return implicitly_close_tags(
|
1612
|
+
parser, token, GUMBO_NAMESPACE_HTML, GUMBO_TAG_P);
|
1497
1613
|
}
|
1498
1614
|
return true;
|
1499
1615
|
}
|
@@ -1504,17 +1620,19 @@ static void maybe_implicitly_close_list_tag(
|
|
1504
1620
|
GumboParser* parser, GumboToken* token, bool is_li) {
|
1505
1621
|
GumboParserState* state = parser->_parser_state;
|
1506
1622
|
state->_frameset_ok = false;
|
1507
|
-
for (int i = state->_open_elements.length; --i >= 0;
|
1623
|
+
for (int i = state->_open_elements.length; --i >= 0;) {
|
1508
1624
|
const GumboNode* node = state->_open_elements.data[i];
|
1509
|
-
bool is_list_tag =
|
1510
|
-
node_html_tag_is(node, GUMBO_TAG_LI)
|
1511
|
-
|
1625
|
+
bool is_list_tag =
|
1626
|
+
is_li ? node_html_tag_is(node, GUMBO_TAG_LI)
|
1627
|
+
: node_tag_in_set(node, (gumbo_tagset){TAG(DD), TAG(DT)});
|
1512
1628
|
if (is_list_tag) {
|
1513
|
-
implicitly_close_tags(
|
1629
|
+
implicitly_close_tags(
|
1630
|
+
parser, token, node->v.element.tag_namespace, node->v.element.tag);
|
1514
1631
|
return;
|
1515
1632
|
}
|
1516
1633
|
if (is_special_node(node) &&
|
1517
|
-
!node_tag_in_set(
|
1634
|
+
!node_tag_in_set(
|
1635
|
+
node, (gumbo_tagset){TAG(ADDRESS), TAG(DIV), TAG(P)})) {
|
1518
1636
|
return;
|
1519
1637
|
}
|
1520
1638
|
}
|
@@ -1527,7 +1645,7 @@ static void merge_attributes(
|
|
1527
1645
|
const GumboVector* token_attr = &token->v.start_tag.attributes;
|
1528
1646
|
GumboVector* node_attr = &node->v.element.attributes;
|
1529
1647
|
|
1530
|
-
for (int i = 0; i < token_attr->length; ++i) {
|
1648
|
+
for (unsigned int i = 0; i < token_attr->length; ++i) {
|
1531
1649
|
GumboAttribute* attr = token_attr->data[i];
|
1532
1650
|
if (!gumbo_get_attribute(node_attr, attr->name)) {
|
1533
1651
|
// Ownership of the attribute is transferred by this gumbo_vector_add,
|
@@ -1551,8 +1669,8 @@ static void merge_attributes(
|
|
1551
1669
|
}
|
1552
1670
|
|
1553
1671
|
const char* gumbo_normalize_svg_tagname(const GumboStringPiece* tag) {
|
1554
|
-
for (
|
1555
|
-
|
1672
|
+
for (size_t i = 0; i < sizeof(kSvgTagReplacements) / sizeof(ReplacementEntry);
|
1673
|
+
++i) {
|
1556
1674
|
const ReplacementEntry* entry = &kSvgTagReplacements[i];
|
1557
1675
|
if (gumbo_string_equals_ignore_case(tag, &entry->from)) {
|
1558
1676
|
return entry->to.data;
|
@@ -1567,9 +1685,9 @@ const char* gumbo_normalize_svg_tagname(const GumboStringPiece* tag) {
|
|
1567
1685
|
static void adjust_foreign_attributes(GumboParser* parser, GumboToken* token) {
|
1568
1686
|
assert(token->type == GUMBO_TOKEN_START_TAG);
|
1569
1687
|
const GumboVector* attributes = &token->v.start_tag.attributes;
|
1570
|
-
for (
|
1571
|
-
|
1572
|
-
|
1688
|
+
for (size_t i = 0; i < sizeof(kForeignAttributeReplacements) /
|
1689
|
+
sizeof(NamespacedAttributeReplacement);
|
1690
|
+
++i) {
|
1573
1691
|
const NamespacedAttributeReplacement* entry =
|
1574
1692
|
&kForeignAttributeReplacements[i];
|
1575
1693
|
GumboAttribute* attr = gumbo_get_attribute(attributes, entry->from);
|
@@ -1587,7 +1705,7 @@ static void adjust_foreign_attributes(GumboParser* parser, GumboToken* token) {
|
|
1587
1705
|
static void adjust_svg_attributes(GumboParser* parser, GumboToken* token) {
|
1588
1706
|
assert(token->type == GUMBO_TOKEN_START_TAG);
|
1589
1707
|
const GumboVector* attributes = &token->v.start_tag.attributes;
|
1590
|
-
for (
|
1708
|
+
for (size_t i = 0;
|
1591
1709
|
i < sizeof(kSvgAttributeReplacements) / sizeof(ReplacementEntry); ++i) {
|
1592
1710
|
const ReplacementEntry* entry = &kSvgAttributeReplacements[i];
|
1593
1711
|
GumboAttribute* attr = gumbo_get_attribute(attributes, entry->from.data);
|
@@ -1604,8 +1722,8 @@ static void adjust_svg_attributes(GumboParser* parser, GumboToken* token) {
|
|
1604
1722
|
// value.
|
1605
1723
|
static void adjust_mathml_attributes(GumboParser* parser, GumboToken* token) {
|
1606
1724
|
assert(token->type == GUMBO_TOKEN_START_TAG);
|
1607
|
-
GumboAttribute* attr =
|
1608
|
-
&token->v.start_tag.attributes, "definitionurl");
|
1725
|
+
GumboAttribute* attr =
|
1726
|
+
gumbo_get_attribute(&token->v.start_tag.attributes, "definitionurl");
|
1609
1727
|
if (!attr) {
|
1610
1728
|
return;
|
1611
1729
|
}
|
@@ -1613,32 +1731,30 @@ static void adjust_mathml_attributes(GumboParser* parser, GumboToken* token) {
|
|
1613
1731
|
attr->name = gumbo_copy_stringz(parser, "definitionURL");
|
1614
1732
|
}
|
1615
1733
|
|
1616
|
-
static bool doctype_matches(
|
1617
|
-
const
|
1618
|
-
const GumboStringPiece* public_id,
|
1619
|
-
const GumboStringPiece* system_id,
|
1734
|
+
static bool doctype_matches(const GumboTokenDocType* doctype,
|
1735
|
+
const GumboStringPiece* public_id, const GumboStringPiece* system_id,
|
1620
1736
|
bool allow_missing_system_id) {
|
1621
1737
|
return !strcmp(doctype->public_identifier, public_id->data) &&
|
1622
|
-
|
1623
|
-
|
1738
|
+
(allow_missing_system_id || doctype->has_system_identifier) &&
|
1739
|
+
!strcmp(doctype->system_identifier, system_id->data);
|
1624
1740
|
}
|
1625
1741
|
|
1626
1742
|
static bool maybe_add_doctype_error(
|
1627
1743
|
GumboParser* parser, const GumboToken* token) {
|
1628
1744
|
const GumboTokenDocType* doctype = &token->v.doc_type;
|
1629
1745
|
bool html_doctype = !strcmp(doctype->name, kDoctypeHtml.data);
|
1630
|
-
if ((!html_doctype ||
|
1631
|
-
|
1632
|
-
|
1633
|
-
|
1634
|
-
!(html_doctype && (
|
1635
|
-
|
1636
|
-
|
1637
|
-
|
1638
|
-
|
1639
|
-
|
1640
|
-
|
1641
|
-
|
1746
|
+
if ((!html_doctype || doctype->has_public_identifier ||
|
1747
|
+
(doctype->has_system_identifier &&
|
1748
|
+
!strcmp(
|
1749
|
+
doctype->system_identifier, kSystemIdLegacyCompat.data))) &&
|
1750
|
+
!(html_doctype && (doctype_matches(doctype, &kPublicIdHtml4_0,
|
1751
|
+
&kSystemIdRecHtml4_0, true) ||
|
1752
|
+
doctype_matches(doctype, &kPublicIdHtml4_01,
|
1753
|
+
&kSystemIdHtml4, true) ||
|
1754
|
+
doctype_matches(doctype, &kPublicIdXhtml1_0,
|
1755
|
+
&kSystemIdXhtmlStrict1_1, false) ||
|
1756
|
+
doctype_matches(doctype, &kPublicIdXhtml1_1,
|
1757
|
+
&kSystemIdXhtml1_1, false)))) {
|
1642
1758
|
parser_add_parse_error(parser, token);
|
1643
1759
|
return false;
|
1644
1760
|
}
|
@@ -1661,7 +1777,7 @@ static void remove_from_parent(GumboParser* parser, GumboNode* node) {
|
|
1661
1777
|
gumbo_vector_remove_at(parser, index, children);
|
1662
1778
|
node->parent = NULL;
|
1663
1779
|
node->index_within_parent = -1;
|
1664
|
-
for (int i = index; i < children->length; ++i) {
|
1780
|
+
for (unsigned int i = index; i < children->length; ++i) {
|
1665
1781
|
GumboNode* child = children->data[i];
|
1666
1782
|
child->index_within_parent = i;
|
1667
1783
|
}
|
@@ -1670,29 +1786,38 @@ static void remove_from_parent(GumboParser* parser, GumboNode* node) {
|
|
1670
1786
|
// http://www.whatwg.org/specs/web-apps/current-work/multipage/the-end.html#an-introduction-to-error-handling-and-strange-cases-in-the-parser
|
1671
1787
|
// Also described in the "in body" handling for end formatting tags.
|
1672
1788
|
static bool adoption_agency_algorithm(
|
1673
|
-
GumboParser* parser, GumboToken* token, GumboTag
|
1789
|
+
GumboParser* parser, GumboToken* token, GumboTag subject) {
|
1674
1790
|
GumboParserState* state = parser->_parser_state;
|
1675
1791
|
gumbo_debug("Entering adoption agency algorithm.\n");
|
1676
|
-
//
|
1677
|
-
|
1678
|
-
|
1792
|
+
// Step 1.
|
1793
|
+
GumboNode* current_node = get_current_node(parser);
|
1794
|
+
if (current_node->v.element.tag_namespace == GUMBO_NAMESPACE_HTML &&
|
1795
|
+
current_node->v.element.tag == subject &&
|
1796
|
+
gumbo_vector_index_of(
|
1797
|
+
&state->_active_formatting_elements, current_node) == -1) {
|
1798
|
+
pop_current_node(parser);
|
1799
|
+
return false;
|
1800
|
+
}
|
1801
|
+
// Steps 2-4 & 20:
|
1802
|
+
for (unsigned int i = 0; i < 8; ++i) {
|
1803
|
+
// Step 5.
|
1679
1804
|
GumboNode* formatting_node = NULL;
|
1680
1805
|
int formatting_node_in_open_elements = -1;
|
1681
|
-
for (int j = state->_active_formatting_elements.length; --j >= 0;
|
1806
|
+
for (int j = state->_active_formatting_elements.length; --j >= 0;) {
|
1682
1807
|
GumboNode* current_node = state->_active_formatting_elements.data[j];
|
1683
1808
|
if (current_node == &kActiveFormattingScopeMarker) {
|
1684
1809
|
gumbo_debug("Broke on scope marker; aborting.\n");
|
1685
1810
|
// Last scope marker; abort the algorithm.
|
1686
1811
|
return false;
|
1687
1812
|
}
|
1688
|
-
if (current_node
|
1813
|
+
if (node_html_tag_is(current_node, subject)) {
|
1689
1814
|
// Found it.
|
1690
1815
|
formatting_node = current_node;
|
1691
|
-
formatting_node_in_open_elements =
|
1692
|
-
&state->_open_elements, formatting_node);
|
1816
|
+
formatting_node_in_open_elements =
|
1817
|
+
gumbo_vector_index_of(&state->_open_elements, formatting_node);
|
1693
1818
|
gumbo_debug("Formatting element of tag %s at %d.\n",
|
1694
|
-
|
1695
|
-
|
1819
|
+
gumbo_normalized_tagname(subject),
|
1820
|
+
formatting_node_in_open_elements);
|
1696
1821
|
break;
|
1697
1822
|
}
|
1698
1823
|
}
|
@@ -1704,18 +1829,23 @@ static bool adoption_agency_algorithm(
|
|
1704
1829
|
return false;
|
1705
1830
|
}
|
1706
1831
|
|
1832
|
+
// Step 6
|
1707
1833
|
if (formatting_node_in_open_elements == -1) {
|
1708
1834
|
gumbo_debug("Formatting node not on stack of open elements.\n");
|
1709
|
-
|
1710
|
-
|
1835
|
+
parser_add_parse_error(parser, token);
|
1836
|
+
gumbo_vector_remove(
|
1837
|
+
parser, formatting_node, &state->_active_formatting_elements);
|
1711
1838
|
return false;
|
1712
1839
|
}
|
1713
1840
|
|
1841
|
+
// Step 7
|
1714
1842
|
if (!has_an_element_in_scope(parser, formatting_node->v.element.tag)) {
|
1715
1843
|
parser_add_parse_error(parser, token);
|
1716
1844
|
gumbo_debug("Element not in scope.\n");
|
1717
1845
|
return false;
|
1718
1846
|
}
|
1847
|
+
|
1848
|
+
// Step 8
|
1719
1849
|
if (formatting_node != get_current_node(parser)) {
|
1720
1850
|
parser_add_parse_error(parser, token); // But continue onwards.
|
1721
1851
|
}
|
@@ -1723,55 +1853,60 @@ static bool adoption_agency_algorithm(
|
|
1723
1853
|
assert(!node_html_tag_is(formatting_node, GUMBO_TAG_HTML));
|
1724
1854
|
assert(!node_html_tag_is(formatting_node, GUMBO_TAG_BODY));
|
1725
1855
|
|
1726
|
-
// Step
|
1856
|
+
// Step 9 & 10
|
1727
1857
|
GumboNode* furthest_block = NULL;
|
1728
|
-
for (int j = formatting_node_in_open_elements;
|
1858
|
+
for (unsigned int j = formatting_node_in_open_elements;
|
1729
1859
|
j < state->_open_elements.length; ++j) {
|
1730
1860
|
assert(j > 0);
|
1731
1861
|
GumboNode* current = state->_open_elements.data[j];
|
1732
1862
|
if (is_special_node(current)) {
|
1733
|
-
// Step
|
1863
|
+
// Step 9.
|
1734
1864
|
furthest_block = current;
|
1735
1865
|
break;
|
1736
1866
|
}
|
1737
1867
|
}
|
1738
1868
|
if (!furthest_block) {
|
1739
|
-
// Step
|
1869
|
+
// Step 10.
|
1740
1870
|
while (get_current_node(parser) != formatting_node) {
|
1741
1871
|
pop_current_node(parser);
|
1742
1872
|
}
|
1743
1873
|
// And the formatting element itself.
|
1744
1874
|
pop_current_node(parser);
|
1745
|
-
gumbo_vector_remove(
|
1746
|
-
|
1875
|
+
gumbo_vector_remove(
|
1876
|
+
parser, formatting_node, &state->_active_formatting_elements);
|
1747
1877
|
return false;
|
1748
1878
|
}
|
1749
1879
|
assert(!node_html_tag_is(furthest_block, GUMBO_TAG_HTML));
|
1750
1880
|
assert(furthest_block);
|
1751
1881
|
|
1752
|
-
// Step
|
1882
|
+
// Step 11.
|
1753
1883
|
// Elements may be moved and reparented by this algorithm, so
|
1754
1884
|
// common_ancestor is not necessarily the same as formatting_node->parent.
|
1755
1885
|
GumboNode* common_ancestor =
|
1756
|
-
state->_open_elements.data[gumbo_vector_index_of(
|
1757
|
-
|
1886
|
+
state->_open_elements.data[gumbo_vector_index_of(&state->_open_elements,
|
1887
|
+
formatting_node) -
|
1888
|
+
1];
|
1758
1889
|
gumbo_debug("Common ancestor tag = %s, furthest block tag = %s.\n",
|
1759
|
-
|
1760
|
-
|
1890
|
+
gumbo_normalized_tagname(common_ancestor->v.element.tag),
|
1891
|
+
gumbo_normalized_tagname(furthest_block->v.element.tag));
|
1761
1892
|
|
1762
|
-
// Step
|
1893
|
+
// Step 12.
|
1763
1894
|
int bookmark = gumbo_vector_index_of(
|
1764
|
-
|
1765
|
-
|
1895
|
+
&state->_active_formatting_elements, formatting_node) +
|
1896
|
+
1;
|
1897
|
+
gumbo_debug("Bookmark at %d.\n", bookmark);
|
1898
|
+
// Step 13.
|
1766
1899
|
GumboNode* node = furthest_block;
|
1767
1900
|
GumboNode* last_node = furthest_block;
|
1768
1901
|
// Must be stored explicitly, in case node is removed from the stack of open
|
1769
1902
|
// elements, to handle step 9.4.
|
1770
1903
|
int saved_node_index = gumbo_vector_index_of(&state->_open_elements, node);
|
1771
1904
|
assert(saved_node_index > 0);
|
1772
|
-
// Step
|
1773
|
-
for (int j = 0
|
1774
|
-
// Step
|
1905
|
+
// Step 13.1.
|
1906
|
+
for (int j = 0;;) {
|
1907
|
+
// Step 13.2.
|
1908
|
+
++j;
|
1909
|
+
// Step 13.3.
|
1775
1910
|
int node_index = gumbo_vector_index_of(&state->_open_elements, node);
|
1776
1911
|
gumbo_debug(
|
1777
1912
|
"Current index: %d, last index: %d.\n", node_index, saved_node_index);
|
@@ -1780,58 +1915,72 @@ static bool adoption_agency_algorithm(
|
|
1780
1915
|
}
|
1781
1916
|
saved_node_index = --node_index;
|
1782
1917
|
assert(node_index > 0);
|
1783
|
-
assert(node_index < state->_open_elements.capacity);
|
1918
|
+
assert((unsigned int) node_index < state->_open_elements.capacity);
|
1784
1919
|
node = state->_open_elements.data[node_index];
|
1785
1920
|
assert(node->parent);
|
1786
|
-
|
1787
|
-
|
1788
|
-
|
1921
|
+
if (node == formatting_node) {
|
1922
|
+
// Step 13.4.
|
1923
|
+
break;
|
1924
|
+
}
|
1925
|
+
int formatting_index =
|
1926
|
+
gumbo_vector_index_of(&state->_active_formatting_elements, node);
|
1927
|
+
if (j > 3 && formatting_index != -1) {
|
1928
|
+
// Step 13.5.
|
1929
|
+
gumbo_debug("Removing formatting element at %d.\n", formatting_index);
|
1930
|
+
gumbo_vector_remove_at(
|
1931
|
+
parser, formatting_index, &state->_active_formatting_elements);
|
1932
|
+
// Removing the element shifts all indices over by one, so we may need
|
1933
|
+
// to move the bookmark.
|
1934
|
+
if (formatting_index < bookmark) {
|
1935
|
+
--bookmark;
|
1936
|
+
gumbo_debug("Moving bookmark to %d.\n", bookmark);
|
1937
|
+
}
|
1938
|
+
continue;
|
1939
|
+
}
|
1940
|
+
if (formatting_index == -1) {
|
1941
|
+
// Step 13.6.
|
1789
1942
|
gumbo_vector_remove_at(parser, node_index, &state->_open_elements);
|
1790
1943
|
continue;
|
1791
|
-
} else if (node == formatting_node) {
|
1792
|
-
// Step 9.6.
|
1793
|
-
break;
|
1794
1944
|
}
|
1795
|
-
// Step
|
1796
|
-
|
1797
|
-
|
1945
|
+
// Step 13.7.
|
1946
|
+
// "common ancestor as the intended parent" doesn't actually mean insert
|
1947
|
+
// it into the common ancestor; that happens below.
|
1798
1948
|
node = clone_node(parser, node, GUMBO_INSERTION_ADOPTION_AGENCY_CLONED);
|
1949
|
+
assert(formatting_index >= 0);
|
1799
1950
|
state->_active_formatting_elements.data[formatting_index] = node;
|
1951
|
+
assert(node_index >= 0);
|
1800
1952
|
state->_open_elements.data[node_index] = node;
|
1801
|
-
// Step
|
1953
|
+
// Step 13.8.
|
1802
1954
|
if (last_node == furthest_block) {
|
1803
1955
|
bookmark = formatting_index + 1;
|
1804
|
-
|
1956
|
+
gumbo_debug("Bookmark moved to %d.\n", bookmark);
|
1957
|
+
assert((unsigned int) bookmark <= state->_active_formatting_elements.length);
|
1805
1958
|
}
|
1806
|
-
// Step
|
1959
|
+
// Step 13.9.
|
1807
1960
|
last_node->parse_flags |= GUMBO_INSERTION_ADOPTION_AGENCY_MOVED;
|
1808
1961
|
remove_from_parent(parser, last_node);
|
1809
1962
|
append_node(parser, node, last_node);
|
1810
|
-
// Step
|
1963
|
+
// Step 13.10.
|
1811
1964
|
last_node = node;
|
1812
|
-
}
|
1965
|
+
} // Step 13.11.
|
1813
1966
|
|
1814
|
-
// Step
|
1967
|
+
// Step 14.
|
1815
1968
|
gumbo_debug("Removing %s node from parent ",
|
1816
|
-
|
1969
|
+
gumbo_normalized_tagname(last_node->v.element.tag));
|
1817
1970
|
remove_from_parent(parser, last_node);
|
1818
1971
|
last_node->parse_flags |= GUMBO_INSERTION_ADOPTION_AGENCY_MOVED;
|
1819
|
-
|
1820
|
-
|
1821
|
-
|
1822
|
-
|
1823
|
-
|
1824
|
-
gumbo_debug("and inserting it into %s.\n",
|
1825
|
-
gumbo_normalized_tagname(common_ancestor->v.element.tag));
|
1826
|
-
append_node(parser, common_ancestor, last_node);
|
1827
|
-
}
|
1972
|
+
InsertionLocation location =
|
1973
|
+
get_appropriate_insertion_location(parser, common_ancestor);
|
1974
|
+
gumbo_debug("and inserting it into %s.\n",
|
1975
|
+
gumbo_normalized_tagname(location.target->v.element.tag));
|
1976
|
+
insert_node(parser, last_node, location);
|
1828
1977
|
|
1829
|
-
// Step
|
1978
|
+
// Step 15.
|
1830
1979
|
GumboNode* new_formatting_node = clone_node(
|
1831
1980
|
parser, formatting_node, GUMBO_INSERTION_ADOPTION_AGENCY_CLONED);
|
1832
1981
|
formatting_node->parse_flags |= GUMBO_INSERTION_IMPLICIT_END_TAG;
|
1833
1982
|
|
1834
|
-
// Step
|
1983
|
+
// Step 16. Instead of appending nodes one-by-one, we swap the children
|
1835
1984
|
// vector of furthest_block with the empty children of new_formatting_node,
|
1836
1985
|
// reducing memory traffic and allocations. We still have to reset their
|
1837
1986
|
// parent pointers, though.
|
@@ -1841,15 +1990,15 @@ static bool adoption_agency_algorithm(
|
|
1841
1990
|
furthest_block->v.element.children = temp;
|
1842
1991
|
|
1843
1992
|
temp = new_formatting_node->v.element.children;
|
1844
|
-
for (int i = 0; i < temp.length; ++i) {
|
1993
|
+
for (unsigned int i = 0; i < temp.length; ++i) {
|
1845
1994
|
GumboNode* child = temp.data[i];
|
1846
1995
|
child->parent = new_formatting_node;
|
1847
1996
|
}
|
1848
1997
|
|
1849
|
-
// Step
|
1998
|
+
// Step 17.
|
1850
1999
|
append_node(parser, furthest_block, new_formatting_node);
|
1851
2000
|
|
1852
|
-
// Step
|
2001
|
+
// Step 18.
|
1853
2002
|
// If the formatting node was before the bookmark, it may shift over all
|
1854
2003
|
// indices after it, so we need to explicitly find the index and possibly
|
1855
2004
|
// adjust the bookmark.
|
@@ -1857,25 +2006,27 @@ static bool adoption_agency_algorithm(
|
|
1857
2006
|
&state->_active_formatting_elements, formatting_node);
|
1858
2007
|
assert(formatting_node_index != -1);
|
1859
2008
|
if (formatting_node_index < bookmark) {
|
2009
|
+
gumbo_debug(
|
2010
|
+
"Formatting node at %d is before bookmark at %d; decrementing.\n",
|
2011
|
+
formatting_node_index, bookmark);
|
1860
2012
|
--bookmark;
|
1861
2013
|
}
|
1862
2014
|
gumbo_vector_remove_at(
|
1863
2015
|
parser, formatting_node_index, &state->_active_formatting_elements);
|
1864
2016
|
assert(bookmark >= 0);
|
1865
|
-
assert(bookmark <= state->_active_formatting_elements.length);
|
2017
|
+
assert((unsigned int) bookmark <= state->_active_formatting_elements.length);
|
1866
2018
|
gumbo_vector_insert_at(parser, new_formatting_node, bookmark,
|
1867
|
-
|
2019
|
+
&state->_active_formatting_elements);
|
1868
2020
|
|
1869
|
-
// Step
|
1870
|
-
gumbo_vector_remove(
|
1871
|
-
|
1872
|
-
|
1873
|
-
&state->_open_elements, furthest_block) + 1;
|
2021
|
+
// Step 19.
|
2022
|
+
gumbo_vector_remove(parser, formatting_node, &state->_open_elements);
|
2023
|
+
int insert_at =
|
2024
|
+
gumbo_vector_index_of(&state->_open_elements, furthest_block) + 1;
|
1874
2025
|
assert(insert_at >= 0);
|
1875
|
-
assert(insert_at <= state->_open_elements.length);
|
2026
|
+
assert((unsigned int) insert_at <= state->_open_elements.length);
|
1876
2027
|
gumbo_vector_insert_at(
|
1877
2028
|
parser, new_formatting_node, insert_at, &state->_open_elements);
|
1878
|
-
}
|
2029
|
+
} // Step 20.
|
1879
2030
|
return true;
|
1880
2031
|
}
|
1881
2032
|
|
@@ -1898,6 +2049,7 @@ static void ignore_token(GumboParser* parser) {
|
|
1898
2049
|
|
1899
2050
|
// http://www.whatwg.org/specs/web-apps/current-work/complete/the-end.html
|
1900
2051
|
static void finish_parsing(GumboParser* parser) {
|
2052
|
+
gumbo_debug("Finishing parsing");
|
1901
2053
|
maybe_flush_text_node_buffer(parser);
|
1902
2054
|
GumboParserState* state = parser->_parser_state;
|
1903
2055
|
for (GumboNode* node = pop_current_node(parser); node;
|
@@ -1908,7 +2060,8 @@ static void finish_parsing(GumboParser* parser) {
|
|
1908
2060
|
}
|
1909
2061
|
node->parse_flags |= GUMBO_INSERTION_IMPLICIT_END_TAG;
|
1910
2062
|
}
|
1911
|
-
while (pop_current_node(parser))
|
2063
|
+
while (pop_current_node(parser))
|
2064
|
+
; // Pop them all.
|
1912
2065
|
}
|
1913
2066
|
|
1914
2067
|
static bool handle_initial(GumboParser* parser, GumboToken* token) {
|
@@ -1952,9 +2105,9 @@ static bool handle_before_html(GumboParser* parser, GumboToken* token) {
|
|
1952
2105
|
parser->_output->root = html_node;
|
1953
2106
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_BEFORE_HEAD);
|
1954
2107
|
return true;
|
1955
|
-
} else if (token->type == GUMBO_TOKEN_END_TAG &&
|
1956
|
-
!tag_in(token, false,
|
1957
|
-
|
2108
|
+
} else if (token->type == GUMBO_TOKEN_END_TAG &&
|
2109
|
+
!tag_in(token, false,
|
2110
|
+
(gumbo_tagset){TAG(HEAD), TAG(BODY), TAG(HTML), TAG(BR)})) {
|
1958
2111
|
parser_add_parse_error(parser, token);
|
1959
2112
|
ignore_token(parser);
|
1960
2113
|
return false;
|
@@ -1986,9 +2139,9 @@ static bool handle_before_head(GumboParser* parser, GumboToken* token) {
|
|
1986
2139
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_HEAD);
|
1987
2140
|
parser->_parser_state->_head_element = node;
|
1988
2141
|
return true;
|
1989
|
-
} else if (token->type == GUMBO_TOKEN_END_TAG &&
|
1990
|
-
!tag_in(token, false,
|
1991
|
-
|
2142
|
+
} else if (token->type == GUMBO_TOKEN_END_TAG &&
|
2143
|
+
!tag_in(token, false,
|
2144
|
+
(gumbo_tagset){TAG(HEAD), TAG(BODY), TAG(HTML), TAG(BR)})) {
|
1992
2145
|
parser_add_parse_error(parser, token);
|
1993
2146
|
ignore_token(parser);
|
1994
2147
|
return false;
|
@@ -2020,8 +2173,9 @@ static bool handle_in_head(GumboParser* parser, GumboToken* token) {
|
|
2020
2173
|
return true;
|
2021
2174
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_HTML)) {
|
2022
2175
|
return handle_in_body(parser, token);
|
2023
|
-
} else if (tag_in(token, kStartTag,
|
2024
|
-
|
2176
|
+
} else if (tag_in(token, kStartTag,
|
2177
|
+
(gumbo_tagset){TAG(BASE), TAG(BASEFONT), TAG(BGSOUND),
|
2178
|
+
TAG(MENUITEM), TAG(LINK)})) {
|
2025
2179
|
insert_element_from_token(parser, token);
|
2026
2180
|
pop_current_node(parser);
|
2027
2181
|
acknowledge_self_closing_tag(parser);
|
@@ -2038,7 +2192,8 @@ static bool handle_in_head(GumboParser* parser, GumboToken* token) {
|
|
2038
2192
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_TITLE)) {
|
2039
2193
|
run_generic_parsing_algorithm(parser, token, GUMBO_LEX_RCDATA);
|
2040
2194
|
return true;
|
2041
|
-
} else if (tag_in(
|
2195
|
+
} else if (tag_in(
|
2196
|
+
token, kStartTag, (gumbo_tagset){TAG(NOFRAMES), TAG(STYLE)})) {
|
2042
2197
|
run_generic_parsing_algorithm(parser, token, GUMBO_LEX_RAWTEXT);
|
2043
2198
|
return true;
|
2044
2199
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_NOSCRIPT)) {
|
@@ -2054,29 +2209,48 @@ static bool handle_in_head(GumboParser* parser, GumboToken* token) {
|
|
2054
2209
|
assert(node_html_tag_is(head, GUMBO_TAG_HEAD));
|
2055
2210
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_AFTER_HEAD);
|
2056
2211
|
return true;
|
2057
|
-
} else if (
|
2058
|
-
|
2059
|
-
|
2060
|
-
|
2212
|
+
} else if (tag_in(token, kEndTag,
|
2213
|
+
(gumbo_tagset){TAG(BODY), TAG(HTML), TAG(BR)})) {
|
2214
|
+
pop_current_node(parser);
|
2215
|
+
set_insertion_mode(parser, GUMBO_INSERTION_MODE_AFTER_HEAD);
|
2216
|
+
parser->_parser_state->_reprocess_current_token = true;
|
2217
|
+
return true;
|
2218
|
+
} else if (tag_is(token, kStartTag, GUMBO_TAG_TEMPLATE)) {
|
2219
|
+
insert_element_from_token(parser, token);
|
2220
|
+
add_formatting_element(parser, &kActiveFormattingScopeMarker);
|
2221
|
+
parser->_parser_state->_frameset_ok = false;
|
2222
|
+
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TEMPLATE);
|
2223
|
+
push_template_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TEMPLATE);
|
2224
|
+
return true;
|
2225
|
+
} else if (tag_is(token, kEndTag, GUMBO_TAG_TEMPLATE)) {
|
2226
|
+
if (!has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
|
2227
|
+
parser_add_parse_error(parser, token);
|
2228
|
+
ignore_token(parser);
|
2229
|
+
return false;
|
2230
|
+
}
|
2231
|
+
generate_all_implied_end_tags_thoroughly(parser);
|
2232
|
+
bool success = true;
|
2233
|
+
if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_TEMPLATE)) {
|
2234
|
+
parser_add_parse_error(parser, token);
|
2235
|
+
success = false;
|
2236
|
+
}
|
2237
|
+
while (!node_html_tag_is(pop_current_node(parser), GUMBO_TAG_TEMPLATE))
|
2238
|
+
;
|
2239
|
+
clear_active_formatting_elements(parser);
|
2240
|
+
pop_template_insertion_mode(parser);
|
2241
|
+
reset_insertion_mode_appropriately(parser);
|
2242
|
+
return success;
|
2061
2243
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_HEAD) ||
|
2062
|
-
(token->type == GUMBO_TOKEN_END_TAG
|
2063
|
-
!tag_in(token, kEndTag, (gumbo_tagset) { TAG(BODY), TAG(HTML),
|
2064
|
-
TAG(BR) }))) {
|
2065
|
-
parser_add_parse_error(parser, token);
|
2066
|
-
return false;
|
2067
|
-
} else if (tag_is(token, kStartTag, GUMBO_TAG_UNKNOWN) && token->v.start_tag.is_self_closing) {
|
2244
|
+
(token->type == GUMBO_TOKEN_END_TAG)) {
|
2068
2245
|
parser_add_parse_error(parser, token);
|
2069
2246
|
ignore_token(parser);
|
2070
2247
|
return false;
|
2071
2248
|
} else {
|
2072
|
-
|
2073
|
-
assert(node_html_tag_is(node, GUMBO_TAG_HEAD));
|
2074
|
-
AVOID_UNUSED_VARIABLE_WARNING(node);
|
2249
|
+
pop_current_node(parser);
|
2075
2250
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_AFTER_HEAD);
|
2076
2251
|
parser->_parser_state->_reprocess_current_token = true;
|
2077
2252
|
return true;
|
2078
2253
|
}
|
2079
|
-
|
2080
2254
|
return true;
|
2081
2255
|
}
|
2082
2256
|
|
@@ -2095,12 +2269,14 @@ static bool handle_in_head_noscript(GumboParser* parser, GumboToken* token) {
|
|
2095
2269
|
return true;
|
2096
2270
|
} else if (token->type == GUMBO_TOKEN_WHITESPACE ||
|
2097
2271
|
token->type == GUMBO_TOKEN_COMMENT ||
|
2098
|
-
tag_in(token, kStartTag,
|
2099
|
-
|
2100
|
-
|
2101
|
-
|
2102
|
-
|
2103
|
-
|
2272
|
+
tag_in(token, kStartTag,
|
2273
|
+
(gumbo_tagset){TAG(BASEFONT), TAG(BGSOUND), TAG(LINK),
|
2274
|
+
TAG(META), TAG(NOFRAMES), TAG(STYLE)})) {
|
2275
|
+
return handle_in_head(parser, token);
|
2276
|
+
} else if (tag_in(
|
2277
|
+
token, kStartTag, (gumbo_tagset){TAG(HEAD), TAG(NOSCRIPT)}) ||
|
2278
|
+
(token->type == GUMBO_TOKEN_END_TAG &&
|
2279
|
+
!tag_is(token, kEndTag, GUMBO_TAG_BR))) {
|
2104
2280
|
parser_add_parse_error(parser, token);
|
2105
2281
|
ignore_token(parser);
|
2106
2282
|
return false;
|
@@ -2139,10 +2315,10 @@ static bool handle_after_head(GumboParser* parser, GumboToken* token) {
|
|
2139
2315
|
insert_element_from_token(parser, token);
|
2140
2316
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_FRAMESET);
|
2141
2317
|
return true;
|
2142
|
-
} else if (tag_in(token, kStartTag,
|
2143
|
-
|
2144
|
-
|
2145
|
-
|
2318
|
+
} else if (tag_in(token, kStartTag,
|
2319
|
+
(gumbo_tagset){TAG(BASE), TAG(BASEFONT), TAG(BGSOUND),
|
2320
|
+
TAG(LINK), TAG(META), TAG(NOFRAMES), TAG(SCRIPT),
|
2321
|
+
TAG(STYLE), TAG(TEMPLATE), TAG(TITLE)})) {
|
2146
2322
|
parser_add_parse_error(parser, token);
|
2147
2323
|
assert(state->_head_element != NULL);
|
2148
2324
|
// This must be flushed before we push the head element on, as there may be
|
@@ -2152,9 +2328,12 @@ static bool handle_after_head(GumboParser* parser, GumboToken* token) {
|
|
2152
2328
|
bool result = handle_in_head(parser, token);
|
2153
2329
|
gumbo_vector_remove(parser, state->_head_element, &state->_open_elements);
|
2154
2330
|
return result;
|
2331
|
+
} else if (tag_is(token, kEndTag, GUMBO_TAG_TEMPLATE)) {
|
2332
|
+
return handle_in_head(parser, token);
|
2155
2333
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_HEAD) ||
|
2156
|
-
|
2157
|
-
|
2334
|
+
(token->type == GUMBO_TOKEN_END_TAG &&
|
2335
|
+
!tag_in(token, kEndTag,
|
2336
|
+
(gumbo_tagset){TAG(BODY), TAG(HTML), TAG(BR)}))) {
|
2158
2337
|
parser_add_parse_error(parser, token);
|
2159
2338
|
ignore_token(parser);
|
2160
2339
|
return false;
|
@@ -2168,24 +2347,23 @@ static bool handle_after_head(GumboParser* parser, GumboToken* token) {
|
|
2168
2347
|
|
2169
2348
|
static void destroy_node(GumboParser* parser, GumboNode* node) {
|
2170
2349
|
switch (node->type) {
|
2171
|
-
case GUMBO_NODE_DOCUMENT:
|
2172
|
-
|
2173
|
-
|
2174
|
-
|
2175
|
-
destroy_node(parser, doc->children.data[i]);
|
2176
|
-
}
|
2177
|
-
gumbo_parser_deallocate(parser, (void*) doc->children.data);
|
2178
|
-
gumbo_parser_deallocate(parser, (void*) doc->name);
|
2179
|
-
gumbo_parser_deallocate(parser, (void*) doc->public_identifier);
|
2180
|
-
gumbo_parser_deallocate(parser, (void*) doc->system_identifier);
|
2350
|
+
case GUMBO_NODE_DOCUMENT: {
|
2351
|
+
GumboDocument* doc = &node->v.document;
|
2352
|
+
for (unsigned int i = 0; i < doc->children.length; ++i) {
|
2353
|
+
destroy_node(parser, doc->children.data[i]);
|
2181
2354
|
}
|
2182
|
-
|
2355
|
+
gumbo_parser_deallocate(parser, (void*) doc->children.data);
|
2356
|
+
gumbo_parser_deallocate(parser, (void*) doc->name);
|
2357
|
+
gumbo_parser_deallocate(parser, (void*) doc->public_identifier);
|
2358
|
+
gumbo_parser_deallocate(parser, (void*) doc->system_identifier);
|
2359
|
+
} break;
|
2360
|
+
case GUMBO_NODE_TEMPLATE:
|
2183
2361
|
case GUMBO_NODE_ELEMENT:
|
2184
|
-
for (int i = 0; i < node->v.element.attributes.length; ++i) {
|
2362
|
+
for (unsigned int i = 0; i < node->v.element.attributes.length; ++i) {
|
2185
2363
|
gumbo_destroy_attribute(parser, node->v.element.attributes.data[i]);
|
2186
2364
|
}
|
2187
2365
|
gumbo_parser_deallocate(parser, node->v.element.attributes.data);
|
2188
|
-
for (int i = 0; i < node->v.element.children.length; ++i) {
|
2366
|
+
for (unsigned int i = 0; i < node->v.element.children.length; ++i) {
|
2189
2367
|
destroy_node(parser, node->v.element.children.data[i]);
|
2190
2368
|
}
|
2191
2369
|
gumbo_parser_deallocate(parser, node->v.element.children.data);
|
@@ -2226,20 +2404,26 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2226
2404
|
ignore_token(parser);
|
2227
2405
|
return false;
|
2228
2406
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_HTML)) {
|
2407
|
+
parser_add_parse_error(parser, token);
|
2408
|
+
if (has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
|
2409
|
+
ignore_token(parser);
|
2410
|
+
return false;
|
2411
|
+
}
|
2229
2412
|
assert(parser->_output->root != NULL);
|
2230
2413
|
assert(parser->_output->root->type == GUMBO_NODE_ELEMENT);
|
2231
|
-
parser_add_parse_error(parser, token);
|
2232
2414
|
merge_attributes(parser, token, parser->_output->root);
|
2233
2415
|
return false;
|
2234
|
-
} else if (tag_in(token, kStartTag,
|
2235
|
-
|
2236
|
-
|
2237
|
-
|
2416
|
+
} else if (tag_in(token, kStartTag,
|
2417
|
+
(gumbo_tagset){TAG(BASE), TAG(BASEFONT), TAG(BGSOUND),
|
2418
|
+
TAG(MENUITEM), TAG(LINK), TAG(META), TAG(NOFRAMES),
|
2419
|
+
TAG(SCRIPT), TAG(STYLE), TAG(TEMPLATE), TAG(TITLE)}) ||
|
2420
|
+
tag_is(token, kEndTag, GUMBO_TAG_TEMPLATE)) {
|
2238
2421
|
return handle_in_head(parser, token);
|
2239
2422
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_BODY)) {
|
2240
2423
|
parser_add_parse_error(parser, token);
|
2241
2424
|
if (state->_open_elements.length < 2 ||
|
2242
|
-
!node_html_tag_is(state->_open_elements.data[1], GUMBO_TAG_BODY)
|
2425
|
+
!node_html_tag_is(state->_open_elements.data[1], GUMBO_TAG_BODY) ||
|
2426
|
+
has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
|
2243
2427
|
ignore_token(parser);
|
2244
2428
|
return false;
|
2245
2429
|
}
|
@@ -2273,7 +2457,7 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2273
2457
|
// Remove the body node. We may want to factor this out into a generic
|
2274
2458
|
// helper, but right now this is the only code that needs to do this.
|
2275
2459
|
GumboVector* children = &parser->_output->root->v.element.children;
|
2276
|
-
for (int i = 0; i < children->length; ++i) {
|
2460
|
+
for (unsigned int i = 0; i < children->length; ++i) {
|
2277
2461
|
if (children->data[i] == body_node) {
|
2278
2462
|
gumbo_vector_remove_at(parser, i, children);
|
2279
2463
|
break;
|
@@ -2286,27 +2470,32 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2286
2470
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_FRAMESET);
|
2287
2471
|
return true;
|
2288
2472
|
} else if (token->type == GUMBO_TOKEN_EOF) {
|
2289
|
-
for (int i = 0; i < state->_open_elements.length; ++i) {
|
2290
|
-
if (!node_tag_in_set(state->_open_elements.data[i],
|
2291
|
-
|
2292
|
-
|
2473
|
+
for (unsigned int i = 0; i < state->_open_elements.length; ++i) {
|
2474
|
+
if (!node_tag_in_set(state->_open_elements.data[i],
|
2475
|
+
(gumbo_tagset){TAG(DD), TAG(DT), TAG(LI), TAG(P), TAG(TBODY),
|
2476
|
+
TAG(TD), TAG(TFOOT), TAG(TH), TAG(THEAD), TAG(TR), TAG(BODY),
|
2477
|
+
TAG(HTML)})) {
|
2293
2478
|
parser_add_parse_error(parser, token);
|
2294
|
-
return false;
|
2295
2479
|
}
|
2296
2480
|
}
|
2481
|
+
if (get_current_template_insertion_mode(parser) !=
|
2482
|
+
GUMBO_INSERTION_MODE_INITIAL) {
|
2483
|
+
return handle_in_template(parser, token);
|
2484
|
+
}
|
2297
2485
|
return true;
|
2298
|
-
} else if (tag_in(token, kEndTag, (gumbo_tagset)
|
2486
|
+
} else if (tag_in(token, kEndTag, (gumbo_tagset){TAG(BODY), TAG(HTML)})) {
|
2299
2487
|
if (!has_an_element_in_scope(parser, GUMBO_TAG_BODY)) {
|
2300
2488
|
parser_add_parse_error(parser, token);
|
2301
2489
|
ignore_token(parser);
|
2302
2490
|
return false;
|
2303
2491
|
}
|
2304
2492
|
bool success = true;
|
2305
|
-
for (int i = 0; i < state->_open_elements.length; ++i) {
|
2306
|
-
if (!node_tag_in_set(state->_open_elements.data[i],
|
2307
|
-
|
2308
|
-
|
2309
|
-
|
2493
|
+
for (unsigned int i = 0; i < state->_open_elements.length; ++i) {
|
2494
|
+
if (!node_tag_in_set(state->_open_elements.data[i],
|
2495
|
+
(gumbo_tagset){TAG(DD), TAG(DT), TAG(LI), TAG(OPTGROUP),
|
2496
|
+
TAG(OPTION), TAG(P), TAG(RB), TAG(RP), TAG(RT), TAG(RTC),
|
2497
|
+
TAG(TBODY), TAG(TD), TAG(TFOOT), TAG(TH), TAG(THEAD), TAG(TR),
|
2498
|
+
TAG(BODY), TAG(HTML)})) {
|
2310
2499
|
parser_add_parse_error(parser, token);
|
2311
2500
|
success = false;
|
2312
2501
|
break;
|
@@ -2321,48 +2510,54 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2321
2510
|
record_end_of_element(state->_current_token, &body->v.element);
|
2322
2511
|
}
|
2323
2512
|
return success;
|
2324
|
-
} else if (tag_in(token, kStartTag,
|
2325
|
-
|
2326
|
-
|
2327
|
-
|
2328
|
-
|
2513
|
+
} else if (tag_in(token, kStartTag,
|
2514
|
+
(gumbo_tagset){TAG(ADDRESS), TAG(ARTICLE), TAG(ASIDE),
|
2515
|
+
TAG(BLOCKQUOTE), TAG(CENTER), TAG(DETAILS), TAG(DIR),
|
2516
|
+
TAG(DIV), TAG(DL), TAG(FIELDSET), TAG(FIGCAPTION),
|
2517
|
+
TAG(FIGURE), TAG(FOOTER), TAG(HEADER), TAG(HGROUP),
|
2518
|
+
TAG(MENU), TAG(MAIN), TAG(NAV), TAG(OL), TAG(P),
|
2519
|
+
TAG(SECTION), TAG(SUMMARY), TAG(UL)})) {
|
2329
2520
|
bool result = maybe_implicitly_close_p_tag(parser, token);
|
2330
2521
|
insert_element_from_token(parser, token);
|
2331
2522
|
return result;
|
2332
|
-
} else if (tag_in(token, kStartTag, (gumbo_tagset)
|
2333
|
-
|
2523
|
+
} else if (tag_in(token, kStartTag, (gumbo_tagset){TAG(H1), TAG(H2), TAG(H3),
|
2524
|
+
TAG(H4), TAG(H5), TAG(H6)})) {
|
2334
2525
|
bool result = maybe_implicitly_close_p_tag(parser, token);
|
2335
|
-
if (node_tag_in_set(
|
2336
|
-
|
2526
|
+
if (node_tag_in_set(
|
2527
|
+
get_current_node(parser), (gumbo_tagset){TAG(H1), TAG(H2), TAG(H3),
|
2528
|
+
TAG(H4), TAG(H5), TAG(H6)})) {
|
2337
2529
|
parser_add_parse_error(parser, token);
|
2338
2530
|
pop_current_node(parser);
|
2339
2531
|
result = false;
|
2340
2532
|
}
|
2341
2533
|
insert_element_from_token(parser, token);
|
2342
2534
|
return result;
|
2343
|
-
} else if (tag_in(token, kStartTag, (gumbo_tagset)
|
2535
|
+
} else if (tag_in(token, kStartTag, (gumbo_tagset){TAG(PRE), TAG(LISTING)})) {
|
2344
2536
|
bool result = maybe_implicitly_close_p_tag(parser, token);
|
2345
2537
|
insert_element_from_token(parser, token);
|
2346
2538
|
state->_ignore_next_linefeed = true;
|
2347
2539
|
state->_frameset_ok = false;
|
2348
2540
|
return result;
|
2349
2541
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_FORM)) {
|
2350
|
-
if (state->_form_element != NULL
|
2542
|
+
if (state->_form_element != NULL &&
|
2543
|
+
!has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
|
2351
2544
|
gumbo_debug("Ignoring nested form.\n");
|
2352
2545
|
parser_add_parse_error(parser, token);
|
2353
2546
|
ignore_token(parser);
|
2354
2547
|
return false;
|
2355
2548
|
}
|
2356
2549
|
bool result = maybe_implicitly_close_p_tag(parser, token);
|
2357
|
-
|
2358
|
-
|
2550
|
+
GumboNode* form_element = insert_element_from_token(parser, token);
|
2551
|
+
if (!has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
|
2552
|
+
state->_form_element = form_element;
|
2553
|
+
}
|
2359
2554
|
return result;
|
2360
2555
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_LI)) {
|
2361
2556
|
maybe_implicitly_close_list_tag(parser, token, true);
|
2362
2557
|
bool result = maybe_implicitly_close_p_tag(parser, token);
|
2363
2558
|
insert_element_from_token(parser, token);
|
2364
2559
|
return result;
|
2365
|
-
|
2560
|
+
} else if (tag_in(token, kStartTag, (gumbo_tagset){TAG(DD), TAG(DT)})) {
|
2366
2561
|
maybe_implicitly_close_list_tag(parser, token, false);
|
2367
2562
|
bool result = maybe_implicitly_close_p_tag(parser, token);
|
2368
2563
|
insert_element_from_token(parser, token);
|
@@ -2375,7 +2570,8 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2375
2570
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_BUTTON)) {
|
2376
2571
|
if (has_an_element_in_scope(parser, GUMBO_TAG_BUTTON)) {
|
2377
2572
|
parser_add_parse_error(parser, token);
|
2378
|
-
implicitly_close_tags(
|
2573
|
+
implicitly_close_tags(
|
2574
|
+
parser, token, GUMBO_NAMESPACE_HTML, GUMBO_TAG_BUTTON);
|
2379
2575
|
state->_reprocess_current_token = true;
|
2380
2576
|
return false;
|
2381
2577
|
}
|
@@ -2383,45 +2579,63 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2383
2579
|
insert_element_from_token(parser, token);
|
2384
2580
|
state->_frameset_ok = false;
|
2385
2581
|
return true;
|
2386
|
-
|
2387
|
-
|
2388
|
-
|
2389
|
-
|
2390
|
-
|
2391
|
-
|
2582
|
+
} else if (tag_in(token, kEndTag,
|
2583
|
+
(gumbo_tagset){TAG(ADDRESS), TAG(ARTICLE), TAG(ASIDE),
|
2584
|
+
TAG(BLOCKQUOTE), TAG(BUTTON), TAG(CENTER), TAG(DETAILS),
|
2585
|
+
TAG(DIR), TAG(DIV), TAG(DL), TAG(FIELDSET),
|
2586
|
+
TAG(FIGCAPTION), TAG(FIGURE), TAG(FOOTER), TAG(HEADER),
|
2587
|
+
TAG(HGROUP), TAG(LISTING), TAG(MAIN), TAG(MENU), TAG(NAV),
|
2588
|
+
TAG(OL), TAG(PRE), TAG(SECTION), TAG(SUMMARY), TAG(UL)})) {
|
2392
2589
|
GumboTag tag = token->v.end_tag;
|
2393
2590
|
if (!has_an_element_in_scope(parser, tag)) {
|
2394
2591
|
parser_add_parse_error(parser, token);
|
2395
2592
|
ignore_token(parser);
|
2396
2593
|
return false;
|
2397
2594
|
}
|
2398
|
-
implicitly_close_tags(
|
2595
|
+
implicitly_close_tags(
|
2596
|
+
parser, token, GUMBO_NAMESPACE_HTML, token->v.end_tag);
|
2399
2597
|
return true;
|
2400
2598
|
} else if (tag_is(token, kEndTag, GUMBO_TAG_FORM)) {
|
2401
|
-
|
2402
|
-
|
2403
|
-
|
2404
|
-
|
2405
|
-
|
2406
|
-
|
2407
|
-
|
2408
|
-
|
2409
|
-
|
2410
|
-
|
2411
|
-
|
2412
|
-
|
2413
|
-
|
2414
|
-
|
2415
|
-
|
2416
|
-
|
2417
|
-
|
2599
|
+
if (has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
|
2600
|
+
if (!has_an_element_in_scope(parser, GUMBO_TAG_FORM)) {
|
2601
|
+
parser_add_parse_error(parser, token);
|
2602
|
+
ignore_token(parser);
|
2603
|
+
return false;
|
2604
|
+
}
|
2605
|
+
bool success = true;
|
2606
|
+
generate_implied_end_tags(parser, GUMBO_TAG_LAST);
|
2607
|
+
if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_FORM)) {
|
2608
|
+
parser_add_parse_error(parser, token);
|
2609
|
+
return false;
|
2610
|
+
}
|
2611
|
+
while (!node_html_tag_is(pop_current_node(parser), GUMBO_TAG_FORM))
|
2612
|
+
;
|
2613
|
+
return success;
|
2614
|
+
} else {
|
2615
|
+
bool result = true;
|
2616
|
+
const GumboNode* node = state->_form_element;
|
2617
|
+
assert(!node || node->type == GUMBO_NODE_ELEMENT);
|
2618
|
+
state->_form_element = NULL;
|
2619
|
+
if (!node || !has_node_in_scope(parser, node)) {
|
2620
|
+
gumbo_debug("Closing an unopened form.\n");
|
2621
|
+
parser_add_parse_error(parser, token);
|
2622
|
+
ignore_token(parser);
|
2623
|
+
return false;
|
2624
|
+
}
|
2625
|
+
// This differs from implicitly_close_tags because we remove *only* the
|
2626
|
+
// <form> element; other nodes are left in scope.
|
2627
|
+
generate_implied_end_tags(parser, GUMBO_TAG_LAST);
|
2628
|
+
if (get_current_node(parser) != node) {
|
2629
|
+
parser_add_parse_error(parser, token);
|
2630
|
+
result = false;
|
2631
|
+
}
|
2418
2632
|
|
2419
|
-
|
2420
|
-
|
2421
|
-
|
2422
|
-
|
2423
|
-
|
2424
|
-
|
2633
|
+
GumboVector* open_elements = &state->_open_elements;
|
2634
|
+
int index = gumbo_vector_index_of(open_elements, node);
|
2635
|
+
assert(index >= 0);
|
2636
|
+
gumbo_vector_remove_at(parser, index, open_elements);
|
2637
|
+
return result;
|
2638
|
+
}
|
2425
2639
|
} else if (tag_is(token, kEndTag, GUMBO_TAG_P)) {
|
2426
2640
|
if (!has_an_element_in_button_scope(parser, GUMBO_TAG_P)) {
|
2427
2641
|
parser_add_parse_error(parser, token);
|
@@ -2431,15 +2645,17 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2431
2645
|
state->_reprocess_current_token = true;
|
2432
2646
|
return false;
|
2433
2647
|
}
|
2434
|
-
return implicitly_close_tags(
|
2648
|
+
return implicitly_close_tags(
|
2649
|
+
parser, token, GUMBO_NAMESPACE_HTML, GUMBO_TAG_P);
|
2435
2650
|
} else if (tag_is(token, kEndTag, GUMBO_TAG_LI)) {
|
2436
2651
|
if (!has_an_element_in_list_scope(parser, GUMBO_TAG_LI)) {
|
2437
2652
|
parser_add_parse_error(parser, token);
|
2438
2653
|
ignore_token(parser);
|
2439
2654
|
return false;
|
2440
2655
|
}
|
2441
|
-
return implicitly_close_tags(
|
2442
|
-
|
2656
|
+
return implicitly_close_tags(
|
2657
|
+
parser, token, GUMBO_NAMESPACE_HTML, GUMBO_TAG_LI);
|
2658
|
+
} else if (tag_in(token, kEndTag, (gumbo_tagset){TAG(DD), TAG(DT)})) {
|
2443
2659
|
assert(token->type == GUMBO_TOKEN_END_TAG);
|
2444
2660
|
GumboTag token_tag = token->v.end_tag;
|
2445
2661
|
if (!has_an_element_in_scope(parser, token_tag)) {
|
@@ -2447,11 +2663,13 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2447
2663
|
ignore_token(parser);
|
2448
2664
|
return false;
|
2449
2665
|
}
|
2450
|
-
return implicitly_close_tags(
|
2451
|
-
|
2452
|
-
|
2453
|
-
|
2454
|
-
|
2666
|
+
return implicitly_close_tags(
|
2667
|
+
parser, token, GUMBO_NAMESPACE_HTML, token_tag);
|
2668
|
+
} else if (tag_in(token, kEndTag, (gumbo_tagset){TAG(H1), TAG(H2), TAG(H3),
|
2669
|
+
TAG(H4), TAG(H5), TAG(H6)})) {
|
2670
|
+
if (!has_an_element_in_scope_with_tagname(
|
2671
|
+
parser, 6, (GumboTag[]){GUMBO_TAG_H1, GUMBO_TAG_H2, GUMBO_TAG_H3,
|
2672
|
+
GUMBO_TAG_H4, GUMBO_TAG_H5, GUMBO_TAG_H6})) {
|
2455
2673
|
// No heading open; ignore the token entirely.
|
2456
2674
|
parser_add_parse_error(parser, token);
|
2457
2675
|
ignore_token(parser);
|
@@ -2469,8 +2687,9 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2469
2687
|
}
|
2470
2688
|
do {
|
2471
2689
|
current_node = pop_current_node(parser);
|
2472
|
-
} while (!node_tag_in_set(
|
2473
|
-
|
2690
|
+
} while (!node_tag_in_set(
|
2691
|
+
current_node, (gumbo_tagset){TAG(H1), TAG(H2), TAG(H3),
|
2692
|
+
TAG(H4), TAG(H5), TAG(H6)}));
|
2474
2693
|
return success;
|
2475
2694
|
}
|
2476
2695
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_A)) {
|
@@ -2488,18 +2707,17 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2488
2707
|
if (find_last_anchor_index(parser, &last_a)) {
|
2489
2708
|
void* last_element = gumbo_vector_remove_at(
|
2490
2709
|
parser, last_a, &state->_active_formatting_elements);
|
2491
|
-
gumbo_vector_remove(
|
2492
|
-
parser, last_element, &state->_open_elements);
|
2710
|
+
gumbo_vector_remove(parser, last_element, &state->_open_elements);
|
2493
2711
|
}
|
2494
2712
|
success = false;
|
2495
2713
|
}
|
2496
2714
|
reconstruct_active_formatting_elements(parser);
|
2497
2715
|
add_formatting_element(parser, insert_element_from_token(parser, token));
|
2498
2716
|
return success;
|
2499
|
-
|
2500
|
-
|
2501
|
-
|
2502
|
-
|
2717
|
+
} else if (tag_in(token, kStartTag,
|
2718
|
+
(gumbo_tagset){TAG(B), TAG(BIG), TAG(CODE), TAG(EM), TAG(FONT),
|
2719
|
+
TAG(I), TAG(S), TAG(SMALL), TAG(STRIKE), TAG(STRONG),
|
2720
|
+
TAG(TT), TAG(U)})) {
|
2503
2721
|
reconstruct_active_formatting_elements(parser);
|
2504
2722
|
add_formatting_element(parser, insert_element_from_token(parser, token));
|
2505
2723
|
return true;
|
@@ -2515,20 +2733,20 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2515
2733
|
insert_element_from_token(parser, token);
|
2516
2734
|
add_formatting_element(parser, get_current_node(parser));
|
2517
2735
|
return result;
|
2518
|
-
|
2519
|
-
|
2520
|
-
|
2521
|
-
|
2736
|
+
} else if (tag_in(token, kEndTag,
|
2737
|
+
(gumbo_tagset){TAG(A), TAG(B), TAG(BIG), TAG(CODE), TAG(EM),
|
2738
|
+
TAG(FONT), TAG(I), TAG(NOBR), TAG(S), TAG(SMALL),
|
2739
|
+
TAG(STRIKE), TAG(STRONG), TAG(TT), TAG(U)})) {
|
2522
2740
|
return adoption_agency_algorithm(parser, token, token->v.end_tag);
|
2523
|
-
|
2524
|
-
|
2741
|
+
} else if (tag_in(token, kStartTag,
|
2742
|
+
(gumbo_tagset){TAG(APPLET), TAG(MARQUEE), TAG(OBJECT)})) {
|
2525
2743
|
reconstruct_active_formatting_elements(parser);
|
2526
2744
|
insert_element_from_token(parser, token);
|
2527
2745
|
add_formatting_element(parser, &kActiveFormattingScopeMarker);
|
2528
2746
|
set_frameset_not_ok(parser);
|
2529
2747
|
return true;
|
2530
|
-
|
2531
|
-
|
2748
|
+
} else if (tag_in(token, kEndTag,
|
2749
|
+
(gumbo_tagset){TAG(APPLET), TAG(MARQUEE), TAG(OBJECT)})) {
|
2532
2750
|
GumboTag token_tag = token->v.end_tag;
|
2533
2751
|
if (!has_an_element_in_table_scope(parser, token_tag)) {
|
2534
2752
|
parser_add_parse_error(parser, token);
|
@@ -2547,8 +2765,9 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2547
2765
|
set_frameset_not_ok(parser);
|
2548
2766
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
|
2549
2767
|
return true;
|
2550
|
-
|
2551
|
-
|
2768
|
+
} else if (tag_in(token, kStartTag,
|
2769
|
+
(gumbo_tagset){TAG(AREA), TAG(BR), TAG(EMBED), TAG(IMG),
|
2770
|
+
TAG(IMAGE), TAG(KEYGEN), TAG(WBR)})) {
|
2552
2771
|
bool success = true;
|
2553
2772
|
if (tag_is(token, kStartTag, GUMBO_TAG_IMAGE)) {
|
2554
2773
|
success = false;
|
@@ -2578,7 +2797,8 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2578
2797
|
pop_current_node(parser);
|
2579
2798
|
acknowledge_self_closing_tag(parser);
|
2580
2799
|
return true;
|
2581
|
-
|
2800
|
+
} else if (tag_in(token, kStartTag,
|
2801
|
+
(gumbo_tagset){TAG(PARAM), TAG(SOURCE), TAG(TRACK)})) {
|
2582
2802
|
insert_element_from_token(parser, token);
|
2583
2803
|
pop_current_node(parser);
|
2584
2804
|
acknowledge_self_closing_tag(parser);
|
@@ -2592,7 +2812,8 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2592
2812
|
return result;
|
2593
2813
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_ISINDEX)) {
|
2594
2814
|
parser_add_parse_error(parser, token);
|
2595
|
-
if (parser->_parser_state->_form_element != NULL
|
2815
|
+
if (parser->_parser_state->_form_element != NULL &&
|
2816
|
+
!has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
|
2596
2817
|
ignore_token(parser);
|
2597
2818
|
return false;
|
2598
2819
|
}
|
@@ -2607,15 +2828,18 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2607
2828
|
|
2608
2829
|
GumboNode* form = insert_element_of_tag_type(
|
2609
2830
|
parser, GUMBO_TAG_FORM, GUMBO_INSERTION_FROM_ISINDEX);
|
2831
|
+
if (!has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
|
2832
|
+
parser->_parser_state->_form_element = form;
|
2833
|
+
}
|
2610
2834
|
if (action_attr) {
|
2611
2835
|
gumbo_vector_add(parser, action_attr, &form->v.element.attributes);
|
2612
2836
|
}
|
2613
|
-
insert_element_of_tag_type(
|
2614
|
-
|
2615
|
-
pop_current_node(parser);
|
2837
|
+
insert_element_of_tag_type(
|
2838
|
+
parser, GUMBO_TAG_HR, GUMBO_INSERTION_FROM_ISINDEX);
|
2839
|
+
pop_current_node(parser); // <hr>
|
2616
2840
|
|
2617
|
-
insert_element_of_tag_type(
|
2618
|
-
|
2841
|
+
insert_element_of_tag_type(
|
2842
|
+
parser, GUMBO_TAG_LABEL, GUMBO_INSERTION_FROM_ISINDEX);
|
2619
2843
|
TextNodeBufferState* text_state = &parser->_parser_state->_text_node;
|
2620
2844
|
text_state->_start_original_text = token->original_text.data;
|
2621
2845
|
text_state->_start_position = token->position;
|
@@ -2628,15 +2852,15 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2628
2852
|
text_state->_buffer.capacity = prompt_attr_length + 1;
|
2629
2853
|
gumbo_destroy_attribute(parser, prompt_attr);
|
2630
2854
|
} else {
|
2631
|
-
GumboStringPiece prompt_text =
|
2632
|
-
"This is a searchable index. Enter search keywords: ");
|
2855
|
+
GumboStringPiece prompt_text =
|
2856
|
+
GUMBO_STRING("This is a searchable index. Enter search keywords: ");
|
2633
2857
|
gumbo_string_buffer_append_string(
|
2634
2858
|
parser, &prompt_text, &text_state->_buffer);
|
2635
2859
|
}
|
2636
2860
|
|
2637
2861
|
GumboNode* input = insert_element_of_tag_type(
|
2638
2862
|
parser, GUMBO_TAG_INPUT, GUMBO_INSERTION_FROM_ISINDEX);
|
2639
|
-
for (int i = 0; i < token_attrs->length; ++i) {
|
2863
|
+
for (unsigned int i = 0; i < token_attrs->length; ++i) {
|
2640
2864
|
GumboAttribute* attr = token_attrs->data[i];
|
2641
2865
|
if (attr != prompt_attr && attr != action_attr && attr != name_attr) {
|
2642
2866
|
gumbo_vector_add(parser, attr, &input->v.element.attributes);
|
@@ -2649,6 +2873,13 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2649
2873
|
// touching the attributes.
|
2650
2874
|
ignore_token(parser);
|
2651
2875
|
|
2876
|
+
// The name attribute, if present, should be destroyed since it's ignored
|
2877
|
+
// when copying over. The action attribute should be kept since it's moved
|
2878
|
+
// to the form.
|
2879
|
+
if (name_attr) {
|
2880
|
+
gumbo_destroy_attribute(parser, name_attr);
|
2881
|
+
}
|
2882
|
+
|
2652
2883
|
GumboAttribute* name =
|
2653
2884
|
gumbo_parser_allocate(parser, sizeof(GumboAttribute));
|
2654
2885
|
GumboStringPiece name_str = GUMBO_STRING("name");
|
@@ -2664,12 +2895,15 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2664
2895
|
name->value_end = kGumboEmptySourcePosition;
|
2665
2896
|
gumbo_vector_add(parser, name, &input->v.element.attributes);
|
2666
2897
|
|
2667
|
-
pop_current_node(parser);
|
2668
|
-
pop_current_node(parser);
|
2898
|
+
pop_current_node(parser); // <input>
|
2899
|
+
pop_current_node(parser); // <label>
|
2669
2900
|
insert_element_of_tag_type(
|
2670
2901
|
parser, GUMBO_TAG_HR, GUMBO_INSERTION_FROM_ISINDEX);
|
2671
|
-
pop_current_node(parser);
|
2672
|
-
pop_current_node(parser);
|
2902
|
+
pop_current_node(parser); // <hr>
|
2903
|
+
pop_current_node(parser); // <form>
|
2904
|
+
if (!has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
|
2905
|
+
parser->_parser_state->_form_element = NULL;
|
2906
|
+
}
|
2673
2907
|
return false;
|
2674
2908
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_TEXTAREA)) {
|
2675
2909
|
run_generic_parsing_algorithm(parser, token, GUMBO_LEX_RCDATA);
|
@@ -2704,19 +2938,27 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2704
2938
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_SELECT);
|
2705
2939
|
}
|
2706
2940
|
return true;
|
2707
|
-
|
2941
|
+
} else if (tag_in(token, kStartTag,
|
2942
|
+
(gumbo_tagset){TAG(OPTION), TAG(OPTGROUP)})) {
|
2708
2943
|
if (node_html_tag_is(get_current_node(parser), GUMBO_TAG_OPTION)) {
|
2709
2944
|
pop_current_node(parser);
|
2710
2945
|
}
|
2711
2946
|
reconstruct_active_formatting_elements(parser);
|
2712
2947
|
insert_element_from_token(parser, token);
|
2713
2948
|
return true;
|
2714
|
-
|
2949
|
+
} else if (tag_in(token, kStartTag,
|
2950
|
+
(gumbo_tagset){TAG(RB), TAG(RP), TAG(RT), TAG(RTC)})) {
|
2715
2951
|
bool success = true;
|
2952
|
+
GumboTag exception =
|
2953
|
+
tag_in(token, kStartTag, (gumbo_tagset){TAG(RT), TAG(RP)})
|
2954
|
+
? GUMBO_TAG_RTC
|
2955
|
+
: GUMBO_TAG_LAST;
|
2716
2956
|
if (has_an_element_in_scope(parser, GUMBO_TAG_RUBY)) {
|
2717
|
-
generate_implied_end_tags(parser,
|
2957
|
+
generate_implied_end_tags(parser, exception);
|
2718
2958
|
}
|
2719
|
-
if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_RUBY)
|
2959
|
+
if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_RUBY) &&
|
2960
|
+
!(exception == GUMBO_TAG_LAST ||
|
2961
|
+
node_html_tag_is(get_current_node(parser), GUMBO_TAG_RTC))) {
|
2720
2962
|
parser_add_parse_error(parser, token);
|
2721
2963
|
success = false;
|
2722
2964
|
}
|
@@ -2749,10 +2991,10 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2749
2991
|
acknowledge_self_closing_tag(parser);
|
2750
2992
|
}
|
2751
2993
|
return true;
|
2752
|
-
|
2753
|
-
|
2754
|
-
|
2755
|
-
|
2994
|
+
} else if (tag_in(token, kStartTag,
|
2995
|
+
(gumbo_tagset){TAG(CAPTION), TAG(COL), TAG(COLGROUP),
|
2996
|
+
TAG(FRAME), TAG(HEAD), TAG(TBODY), TAG(TD), TAG(TFOOT),
|
2997
|
+
TAG(TH), TAG(THEAD), TAG(TR)})) {
|
2756
2998
|
parser_add_parse_error(parser, token);
|
2757
2999
|
ignore_token(parser);
|
2758
3000
|
return false;
|
@@ -2771,14 +3013,15 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2771
3013
|
// If we see a), implicitly close everything up to and including it. If we
|
2772
3014
|
// see b), then record a parse error, don't close anything (except the
|
2773
3015
|
// implied end tags) and ignore the end tag token.
|
2774
|
-
for (int i = state->_open_elements.length; --i >= 0;
|
3016
|
+
for (int i = state->_open_elements.length; --i >= 0;) {
|
2775
3017
|
const GumboNode* node = state->_open_elements.data[i];
|
2776
3018
|
if (node_html_tag_is(node, end_tag)) {
|
2777
3019
|
generate_implied_end_tags(parser, end_tag);
|
2778
3020
|
// TODO(jdtang): Do I need to add a parse error here? The condition in
|
2779
3021
|
// the spec seems like it's the inverse of the loop condition above, and
|
2780
3022
|
// so would never fire.
|
2781
|
-
while (node != pop_current_node(parser))
|
3023
|
+
while (node != pop_current_node(parser))
|
3024
|
+
; // Pop everything.
|
2782
3025
|
return true;
|
2783
3026
|
} else if (is_special_node(node)) {
|
2784
3027
|
parser_add_parse_error(parser, token);
|
@@ -2794,7 +3037,8 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2794
3037
|
|
2795
3038
|
// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#parsing-main-incdata
|
2796
3039
|
static bool handle_text(GumboParser* parser, GumboToken* token) {
|
2797
|
-
if (token->type == GUMBO_TOKEN_CHARACTER ||
|
3040
|
+
if (token->type == GUMBO_TOKEN_CHARACTER ||
|
3041
|
+
token->type == GUMBO_TOKEN_WHITESPACE) {
|
2798
3042
|
insert_text_token(parser, token);
|
2799
3043
|
} else {
|
2800
3044
|
// We provide only bare-bones script handling that doesn't involve any of
|
@@ -2854,11 +3098,12 @@ static bool handle_in_table(GumboParser* parser, GumboToken* token) {
|
|
2854
3098
|
parser->_parser_state->_reprocess_current_token = true;
|
2855
3099
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_COLUMN_GROUP);
|
2856
3100
|
return true;
|
2857
|
-
} else if (tag_in(token, kStartTag,
|
2858
|
-
|
3101
|
+
} else if (tag_in(token, kStartTag,
|
3102
|
+
(gumbo_tagset){TAG(TBODY), TAG(TFOOT), TAG(THEAD), TAG(TD),
|
3103
|
+
TAG(TH), TAG(TR)})) {
|
2859
3104
|
clear_stack_to_table_context(parser);
|
2860
3105
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE_BODY);
|
2861
|
-
if (tag_in(token, kStartTag, (gumbo_tagset)
|
3106
|
+
if (tag_in(token, kStartTag, (gumbo_tagset){TAG(TD), TAG(TH), TAG(TR)})) {
|
2862
3107
|
insert_element_of_tag_type(
|
2863
3108
|
parser, GUMBO_TAG_TBODY, GUMBO_INSERTION_IMPLIED);
|
2864
3109
|
state->_reprocess_current_token = true;
|
@@ -2880,25 +3125,27 @@ static bool handle_in_table(GumboParser* parser, GumboToken* token) {
|
|
2880
3125
|
return false;
|
2881
3126
|
}
|
2882
3127
|
return true;
|
2883
|
-
} else if (tag_in(token, kEndTag,
|
2884
|
-
|
2885
|
-
|
2886
|
-
|
3128
|
+
} else if (tag_in(token, kEndTag,
|
3129
|
+
(gumbo_tagset){TAG(BODY), TAG(CAPTION), TAG(COL),
|
3130
|
+
TAG(COLGROUP), TAG(HTML), TAG(TBODY), TAG(TD), TAG(TFOOT),
|
3131
|
+
TAG(TH), TAG(THEAD), TAG(TR)})) {
|
2887
3132
|
parser_add_parse_error(parser, token);
|
2888
3133
|
ignore_token(parser);
|
2889
3134
|
return false;
|
2890
|
-
} else if (tag_in(token, kStartTag,
|
3135
|
+
} else if (tag_in(token, kStartTag,
|
3136
|
+
(gumbo_tagset){TAG(STYLE), TAG(SCRIPT), TAG(TEMPLATE)}) ||
|
3137
|
+
(tag_is(token, kEndTag, GUMBO_TAG_TEMPLATE))) {
|
2891
3138
|
return handle_in_head(parser, token);
|
2892
3139
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_INPUT) &&
|
2893
|
-
attribute_matches(
|
2894
|
-
|
3140
|
+
attribute_matches(
|
3141
|
+
&token->v.start_tag.attributes, "type", "hidden")) {
|
2895
3142
|
parser_add_parse_error(parser, token);
|
2896
3143
|
insert_element_from_token(parser, token);
|
2897
3144
|
pop_current_node(parser);
|
2898
3145
|
return false;
|
2899
3146
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_FORM)) {
|
2900
3147
|
parser_add_parse_error(parser, token);
|
2901
|
-
if (state->_form_element) {
|
3148
|
+
if (state->_form_element || has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
|
2902
3149
|
ignore_token(parser);
|
2903
3150
|
return false;
|
2904
3151
|
}
|
@@ -2906,11 +3153,7 @@ static bool handle_in_table(GumboParser* parser, GumboToken* token) {
|
|
2906
3153
|
pop_current_node(parser);
|
2907
3154
|
return false;
|
2908
3155
|
} else if (token->type == GUMBO_TOKEN_EOF) {
|
2909
|
-
|
2910
|
-
parser_add_parse_error(parser, token);
|
2911
|
-
return false;
|
2912
|
-
}
|
2913
|
-
return true;
|
3156
|
+
return handle_in_body(parser, token);
|
2914
3157
|
} else {
|
2915
3158
|
parser_add_parse_error(parser, token);
|
2916
3159
|
state->_foster_parent_insertions = true;
|
@@ -2938,8 +3181,9 @@ static bool handle_in_table_text(GumboParser* parser, GumboToken* token) {
|
|
2938
3181
|
// Note that TextNodeBuffer may contain UTF-8 characters, but the presence
|
2939
3182
|
// of any one byte that is not whitespace means we flip the flag, so this
|
2940
3183
|
// loop is still valid.
|
2941
|
-
for (int i = 0; i < buffer->length; ++i) {
|
2942
|
-
if (!isspace((unsigned char)buffer->data[i]) ||
|
3184
|
+
for (unsigned int i = 0; i < buffer->length; ++i) {
|
3185
|
+
if (!isspace((unsigned char) buffer->data[i]) ||
|
3186
|
+
buffer->data[i] == '\v') {
|
2943
3187
|
state->_foster_parent_insertions = true;
|
2944
3188
|
reconstruct_active_formatting_elements(parser);
|
2945
3189
|
break;
|
@@ -2955,35 +3199,43 @@ static bool handle_in_table_text(GumboParser* parser, GumboToken* token) {
|
|
2955
3199
|
|
2956
3200
|
// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#parsing-main-incaption
|
2957
3201
|
static bool handle_in_caption(GumboParser* parser, GumboToken* token) {
|
2958
|
-
if (
|
2959
|
-
TAG(COLGROUP), TAG(TBODY), TAG(TD),
|
2960
|
-
TAG(TFOOT), TAG(TH), TAG(THEAD), TAG(TR) }) ||
|
2961
|
-
tag_in(token, kEndTag, (gumbo_tagset) { TAG(CAPTION), TAG(TABLE) })) {
|
3202
|
+
if (tag_is(token, kEndTag, GUMBO_TAG_CAPTION)) {
|
2962
3203
|
if (!has_an_element_in_table_scope(parser, GUMBO_TAG_CAPTION)) {
|
2963
3204
|
parser_add_parse_error(parser, token);
|
2964
3205
|
ignore_token(parser);
|
2965
3206
|
return false;
|
3207
|
+
} else {
|
3208
|
+
generate_implied_end_tags(parser, GUMBO_TAG_LAST);
|
3209
|
+
bool result = true;
|
3210
|
+
if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_CAPTION)) {
|
3211
|
+
parser_add_parse_error(parser, token);
|
3212
|
+
}
|
3213
|
+
while (!node_html_tag_is(pop_current_node(parser), GUMBO_TAG_CAPTION))
|
3214
|
+
;
|
3215
|
+
clear_active_formatting_elements(parser);
|
3216
|
+
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
|
3217
|
+
return result;
|
2966
3218
|
}
|
2967
|
-
|
2968
|
-
|
2969
|
-
|
2970
|
-
|
2971
|
-
|
2972
|
-
|
2973
|
-
if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_CAPTION)) {
|
3219
|
+
} else if (tag_in(token, kStartTag,
|
3220
|
+
(gumbo_tagset){TAG(CAPTION), TAG(COL), TAG(COLGROUP),
|
3221
|
+
TAG(TBODY), TAG(TD), TAG(TFOOT), TAG(TH), TAG(THEAD),
|
3222
|
+
TAG(TR)}) ||
|
3223
|
+
(tag_is(token, kEndTag, GUMBO_TAG_TABLE))) {
|
3224
|
+
if (!has_an_element_in_table_scope(parser, GUMBO_TAG_CAPTION)) {
|
2974
3225
|
parser_add_parse_error(parser, token);
|
2975
|
-
|
2976
|
-
|
2977
|
-
}
|
2978
|
-
result = false;
|
3226
|
+
ignore_token(parser);
|
3227
|
+
return false;
|
2979
3228
|
}
|
2980
|
-
pop_current_node(parser)
|
3229
|
+
while (!node_html_tag_is(pop_current_node(parser), GUMBO_TAG_CAPTION))
|
3230
|
+
;
|
2981
3231
|
clear_active_formatting_elements(parser);
|
2982
3232
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
|
2983
|
-
|
2984
|
-
|
2985
|
-
|
2986
|
-
|
3233
|
+
parser->_parser_state->_reprocess_current_token = true;
|
3234
|
+
return true;
|
3235
|
+
} else if (tag_in(token, kEndTag,
|
3236
|
+
(gumbo_tagset){TAG(BODY), TAG(COL), TAG(COLGROUP), TAG(HTML),
|
3237
|
+
TAG(TBODY), TAG(TD), TAG(TFOOT), TAG(TH), TAG(THEAD),
|
3238
|
+
TAG(TR)})) {
|
2987
3239
|
parser_add_parse_error(parser, token);
|
2988
3240
|
ignore_token(parser);
|
2989
3241
|
return false;
|
@@ -3011,24 +3263,33 @@ static bool handle_in_column_group(GumboParser* parser, GumboToken* token) {
|
|
3011
3263
|
pop_current_node(parser);
|
3012
3264
|
acknowledge_self_closing_tag(parser);
|
3013
3265
|
return true;
|
3266
|
+
} else if (tag_is(token, kEndTag, GUMBO_TAG_COLGROUP)) {
|
3267
|
+
if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_COLGROUP)) {
|
3268
|
+
parser_add_parse_error(parser, token);
|
3269
|
+
ignore_token(parser);
|
3270
|
+
return false;
|
3271
|
+
}
|
3272
|
+
pop_current_node(parser);
|
3273
|
+
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
|
3274
|
+
return false;
|
3014
3275
|
} else if (tag_is(token, kEndTag, GUMBO_TAG_COL)) {
|
3015
3276
|
parser_add_parse_error(parser, token);
|
3016
3277
|
ignore_token(parser);
|
3017
3278
|
return false;
|
3018
|
-
} else if (token
|
3019
|
-
|
3020
|
-
return
|
3279
|
+
} else if (tag_is(token, kStartTag, GUMBO_TAG_TEMPLATE) ||
|
3280
|
+
tag_is(token, kEndTag, GUMBO_TAG_TEMPLATE)) {
|
3281
|
+
return handle_in_head(parser, token);
|
3282
|
+
} else if (token->type == GUMBO_TOKEN_EOF) {
|
3283
|
+
return handle_in_body(parser, token);
|
3021
3284
|
} else {
|
3022
|
-
if (get_current_node(parser)
|
3285
|
+
if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_COLGROUP)) {
|
3023
3286
|
parser_add_parse_error(parser, token);
|
3287
|
+
ignore_token(parser);
|
3024
3288
|
return false;
|
3025
3289
|
}
|
3026
|
-
assert(node_html_tag_is(get_current_node(parser), GUMBO_TAG_COLGROUP));
|
3027
3290
|
pop_current_node(parser);
|
3028
3291
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
|
3029
|
-
|
3030
|
-
parser->_parser_state->_reprocess_current_token = true;
|
3031
|
-
}
|
3292
|
+
parser->_parser_state->_reprocess_current_token = true;
|
3032
3293
|
return true;
|
3033
3294
|
}
|
3034
3295
|
}
|
@@ -3040,14 +3301,15 @@ static bool handle_in_table_body(GumboParser* parser, GumboToken* token) {
|
|
3040
3301
|
insert_element_from_token(parser, token);
|
3041
3302
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_ROW);
|
3042
3303
|
return true;
|
3043
|
-
} else if (tag_in(token, kStartTag, (gumbo_tagset)
|
3304
|
+
} else if (tag_in(token, kStartTag, (gumbo_tagset){TAG(TD), TAG(TH)})) {
|
3044
3305
|
parser_add_parse_error(parser, token);
|
3045
3306
|
clear_stack_to_table_body_context(parser);
|
3046
3307
|
insert_element_of_tag_type(parser, GUMBO_TAG_TR, GUMBO_INSERTION_IMPLIED);
|
3047
3308
|
parser->_parser_state->_reprocess_current_token = true;
|
3048
3309
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_ROW);
|
3049
3310
|
return false;
|
3050
|
-
} else if (tag_in(token, kEndTag,
|
3311
|
+
} else if (tag_in(token, kEndTag,
|
3312
|
+
(gumbo_tagset){TAG(TBODY), TAG(TFOOT), TAG(THEAD)})) {
|
3051
3313
|
if (!has_an_element_in_table_scope(parser, token->v.end_tag)) {
|
3052
3314
|
parser_add_parse_error(parser, token);
|
3053
3315
|
ignore_token(parser);
|
@@ -3057,12 +3319,13 @@ static bool handle_in_table_body(GumboParser* parser, GumboToken* token) {
|
|
3057
3319
|
pop_current_node(parser);
|
3058
3320
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
|
3059
3321
|
return true;
|
3060
|
-
} else if (tag_in(token, kStartTag,
|
3061
|
-
|
3322
|
+
} else if (tag_in(token, kStartTag,
|
3323
|
+
(gumbo_tagset){TAG(CAPTION), TAG(COL), TAG(COLGROUP),
|
3324
|
+
TAG(TBODY), TAG(TFOOT), TAG(THEAD)}) ||
|
3062
3325
|
tag_is(token, kEndTag, GUMBO_TAG_TABLE)) {
|
3063
3326
|
if (!(has_an_element_in_table_scope(parser, GUMBO_TAG_TBODY) ||
|
3064
|
-
|
3065
|
-
|
3327
|
+
has_an_element_in_table_scope(parser, GUMBO_TAG_THEAD) ||
|
3328
|
+
has_an_element_in_table_scope(parser, GUMBO_TAG_TFOOT))) {
|
3066
3329
|
parser_add_parse_error(parser, token);
|
3067
3330
|
ignore_token(parser);
|
3068
3331
|
return false;
|
@@ -3072,9 +3335,9 @@ static bool handle_in_table_body(GumboParser* parser, GumboToken* token) {
|
|
3072
3335
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
|
3073
3336
|
parser->_parser_state->_reprocess_current_token = true;
|
3074
3337
|
return true;
|
3075
|
-
} else if (tag_in(token, kEndTag,
|
3076
|
-
|
3077
|
-
|
3338
|
+
} else if (tag_in(token, kEndTag,
|
3339
|
+
(gumbo_tagset){TAG(BODY), TAG(CAPTION), TAG(COL), TAG(TR),
|
3340
|
+
TAG(COLGROUP), TAG(HTML), TAG(TD), TAG(TH)})) {
|
3078
3341
|
parser_add_parse_error(parser, token);
|
3079
3342
|
ignore_token(parser);
|
3080
3343
|
return false;
|
@@ -3085,45 +3348,55 @@ static bool handle_in_table_body(GumboParser* parser, GumboToken* token) {
|
|
3085
3348
|
|
3086
3349
|
// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#parsing-main-intr
|
3087
3350
|
static bool handle_in_row(GumboParser* parser, GumboToken* token) {
|
3088
|
-
if (tag_in(token, kStartTag, (gumbo_tagset)
|
3351
|
+
if (tag_in(token, kStartTag, (gumbo_tagset){TAG(TH), TAG(TD)})) {
|
3089
3352
|
clear_stack_to_table_row_context(parser);
|
3090
3353
|
insert_element_from_token(parser, token);
|
3091
3354
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_CELL);
|
3092
3355
|
add_formatting_element(parser, &kActiveFormattingScopeMarker);
|
3093
3356
|
return true;
|
3094
|
-
} else if (
|
3095
|
-
|
3096
|
-
tag_in(token, kEndTag, (gumbo_tagset) { TAG(TR), TAG(TABLE),
|
3097
|
-
TAG(TBODY), TAG(TFOOT), TAG(THEAD) })) {
|
3098
|
-
// This case covers 4 clauses of the spec, each of which say "Otherwise, act
|
3099
|
-
// as if an end tag with the tag name "tr" had been seen." The differences
|
3100
|
-
// are in error handling and whether the current token is reprocessed.
|
3101
|
-
GumboTag desired_tag =
|
3102
|
-
tag_in(token, kEndTag, (gumbo_tagset) { TAG(TBODY), TAG(TFOOT),
|
3103
|
-
TAG(THEAD) })
|
3104
|
-
? token->v.end_tag : GUMBO_TAG_TR;
|
3105
|
-
if (!has_an_element_in_table_scope(parser, desired_tag)) {
|
3106
|
-
gumbo_debug("Bailing because there is no tag %s in table scope.\nOpen elements:",
|
3107
|
-
gumbo_normalized_tagname(desired_tag));
|
3108
|
-
for (int i = 0; i < parser->_parser_state->_open_elements.length; ++i) {
|
3109
|
-
const GumboNode* node = parser->_parser_state->_open_elements.data[i];
|
3110
|
-
gumbo_debug("%s\n", gumbo_normalized_tagname(node->v.element.tag));
|
3111
|
-
}
|
3357
|
+
} else if (tag_is(token, kEndTag, GUMBO_TAG_TR)) {
|
3358
|
+
if (!has_an_element_in_table_scope(parser, GUMBO_TAG_TR)) {
|
3112
3359
|
parser_add_parse_error(parser, token);
|
3113
3360
|
ignore_token(parser);
|
3114
3361
|
return false;
|
3362
|
+
} else {
|
3363
|
+
clear_stack_to_table_row_context(parser);
|
3364
|
+
pop_current_node(parser);
|
3365
|
+
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE_BODY);
|
3366
|
+
return true;
|
3115
3367
|
}
|
3116
|
-
|
3117
|
-
|
3118
|
-
|
3119
|
-
|
3120
|
-
|
3121
|
-
|
3368
|
+
} else if (tag_in(token, kStartTag,
|
3369
|
+
(gumbo_tagset){TAG(CAPTION), TAG(COL), TAG(COLGROUP),
|
3370
|
+
TAG(TBODY), TAG(TFOOT), TAG(THEAD), TAG(TR)}) ||
|
3371
|
+
tag_is(token, kEndTag, GUMBO_TAG_TABLE)) {
|
3372
|
+
if (!has_an_element_in_table_scope(parser, GUMBO_TAG_TR)) {
|
3373
|
+
parser_add_parse_error(parser, token);
|
3374
|
+
ignore_token(parser);
|
3375
|
+
return false;
|
3376
|
+
} else {
|
3377
|
+
clear_stack_to_table_row_context(parser);
|
3378
|
+
pop_current_node(parser);
|
3379
|
+
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE_BODY);
|
3122
3380
|
parser->_parser_state->_reprocess_current_token = true;
|
3381
|
+
return true;
|
3123
3382
|
}
|
3124
|
-
|
3125
|
-
|
3126
|
-
|
3383
|
+
} else if (tag_in(token, kEndTag,
|
3384
|
+
(gumbo_tagset){TAG(TBODY), TAG(TFOOT), TAG(THEAD)})) {
|
3385
|
+
if (!has_an_element_in_table_scope(parser, token->v.end_tag) ||
|
3386
|
+
(!has_an_element_in_table_scope(parser, GUMBO_TAG_TR))) {
|
3387
|
+
parser_add_parse_error(parser, token);
|
3388
|
+
ignore_token(parser);
|
3389
|
+
return false;
|
3390
|
+
} else {
|
3391
|
+
clear_stack_to_table_row_context(parser);
|
3392
|
+
pop_current_node(parser);
|
3393
|
+
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE_BODY);
|
3394
|
+
parser->_parser_state->_reprocess_current_token = true;
|
3395
|
+
return true;
|
3396
|
+
}
|
3397
|
+
} else if (tag_in(token, kEndTag,
|
3398
|
+
(gumbo_tagset){TAG(BODY), TAG(CAPTION), TAG(COL),
|
3399
|
+
TAG(COLGROUP), TAG(HTML), TAG(TD), TAG(TH)})) {
|
3127
3400
|
parser_add_parse_error(parser, token);
|
3128
3401
|
ignore_token(parser);
|
3129
3402
|
return false;
|
@@ -3134,16 +3407,18 @@ static bool handle_in_row(GumboParser* parser, GumboToken* token) {
|
|
3134
3407
|
|
3135
3408
|
// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#parsing-main-intd
|
3136
3409
|
static bool handle_in_cell(GumboParser* parser, GumboToken* token) {
|
3137
|
-
if (tag_in(token, kEndTag, (gumbo_tagset)
|
3410
|
+
if (tag_in(token, kEndTag, (gumbo_tagset){TAG(TD), TAG(TH)})) {
|
3138
3411
|
GumboTag token_tag = token->v.end_tag;
|
3139
3412
|
if (!has_an_element_in_table_scope(parser, token_tag)) {
|
3140
3413
|
parser_add_parse_error(parser, token);
|
3414
|
+
ignore_token(parser);
|
3141
3415
|
return false;
|
3142
3416
|
}
|
3143
3417
|
return close_table_cell(parser, token, token_tag);
|
3144
|
-
} else if (tag_in(token, kStartTag,
|
3145
|
-
|
3146
|
-
|
3418
|
+
} else if (tag_in(token, kStartTag,
|
3419
|
+
(gumbo_tagset){TAG(CAPTION), TAG(COL), TAG(COLGROUP),
|
3420
|
+
TAG(TBODY), TAG(TD), TAG(TFOOT), TAG(TH), TAG(THEAD),
|
3421
|
+
TAG(TR)})) {
|
3147
3422
|
gumbo_debug("Handling <td> in cell.\n");
|
3148
3423
|
if (!has_an_element_in_table_scope(parser, GUMBO_TAG_TH) &&
|
3149
3424
|
!has_an_element_in_table_scope(parser, GUMBO_TAG_TD)) {
|
@@ -3154,13 +3429,13 @@ static bool handle_in_cell(GumboParser* parser, GumboToken* token) {
|
|
3154
3429
|
}
|
3155
3430
|
parser->_parser_state->_reprocess_current_token = true;
|
3156
3431
|
return close_current_cell(parser, token);
|
3157
|
-
} else if (tag_in(token, kEndTag, (gumbo_tagset)
|
3158
|
-
|
3432
|
+
} else if (tag_in(token, kEndTag, (gumbo_tagset){TAG(BODY), TAG(CAPTION),
|
3433
|
+
TAG(COL), TAG(COLGROUP), TAG(HTML)})) {
|
3159
3434
|
parser_add_parse_error(parser, token);
|
3160
3435
|
ignore_token(parser);
|
3161
3436
|
return false;
|
3162
|
-
} else if (tag_in(token, kEndTag, (gumbo_tagset)
|
3163
|
-
|
3437
|
+
} else if (tag_in(token, kEndTag, (gumbo_tagset){TAG(TABLE), TAG(TBODY),
|
3438
|
+
TAG(TFOOT), TAG(THEAD), TAG(TR)})) {
|
3164
3439
|
if (!has_an_element_in_table_scope(parser, token->v.end_tag)) {
|
3165
3440
|
parser_add_parse_error(parser, token);
|
3166
3441
|
ignore_token(parser);
|
@@ -3211,7 +3486,7 @@ static bool handle_in_select(GumboParser* parser, GumboToken* token) {
|
|
3211
3486
|
GumboVector* open_elements = &parser->_parser_state->_open_elements;
|
3212
3487
|
if (node_html_tag_is(get_current_node(parser), GUMBO_TAG_OPTION) &&
|
3213
3488
|
node_html_tag_is(open_elements->data[open_elements->length - 2],
|
3214
|
-
|
3489
|
+
GUMBO_TAG_OPTGROUP)) {
|
3215
3490
|
pop_current_node(parser);
|
3216
3491
|
}
|
3217
3492
|
if (node_html_tag_is(get_current_node(parser), GUMBO_TAG_OPTGROUP)) {
|
@@ -3242,9 +3517,12 @@ static bool handle_in_select(GumboParser* parser, GumboToken* token) {
|
|
3242
3517
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_SELECT)) {
|
3243
3518
|
parser_add_parse_error(parser, token);
|
3244
3519
|
ignore_token(parser);
|
3245
|
-
|
3520
|
+
if (has_an_element_in_select_scope(parser, GUMBO_TAG_SELECT)) {
|
3521
|
+
close_current_select(parser);
|
3522
|
+
}
|
3246
3523
|
return false;
|
3247
|
-
} else if (tag_in(token, kStartTag,
|
3524
|
+
} else if (tag_in(token, kStartTag,
|
3525
|
+
(gumbo_tagset){TAG(INPUT), TAG(KEYGEN), TAG(TEXTAREA)})) {
|
3248
3526
|
parser_add_parse_error(parser, token);
|
3249
3527
|
if (!has_an_element_in_select_scope(parser, GUMBO_TAG_SELECT)) {
|
3250
3528
|
ignore_token(parser);
|
@@ -3253,14 +3531,12 @@ static bool handle_in_select(GumboParser* parser, GumboToken* token) {
|
|
3253
3531
|
parser->_parser_state->_reprocess_current_token = true;
|
3254
3532
|
}
|
3255
3533
|
return false;
|
3256
|
-
} else if (
|
3534
|
+
} else if (tag_in(token, kStartTag,
|
3535
|
+
(gumbo_tagset){TAG(SCRIPT), TAG(TEMPLATE)}) ||
|
3536
|
+
tag_is(token, kEndTag, GUMBO_TAG_TEMPLATE)) {
|
3257
3537
|
return handle_in_head(parser, token);
|
3258
3538
|
} else if (token->type == GUMBO_TOKEN_EOF) {
|
3259
|
-
|
3260
|
-
parser_add_parse_error(parser, token);
|
3261
|
-
return false;
|
3262
|
-
}
|
3263
|
-
return true;
|
3539
|
+
return handle_in_body(parser, token);
|
3264
3540
|
} else {
|
3265
3541
|
parser_add_parse_error(parser, token);
|
3266
3542
|
ignore_token(parser);
|
@@ -3270,23 +3546,28 @@ static bool handle_in_select(GumboParser* parser, GumboToken* token) {
|
|
3270
3546
|
|
3271
3547
|
// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#parsing-main-inselectintable
|
3272
3548
|
static bool handle_in_select_in_table(GumboParser* parser, GumboToken* token) {
|
3273
|
-
if (tag_in(token, kStartTag,
|
3274
|
-
|
3549
|
+
if (tag_in(token, kStartTag,
|
3550
|
+
(gumbo_tagset){TAG(CAPTION), TAG(TABLE), TAG(TBODY), TAG(TFOOT),
|
3551
|
+
TAG(THEAD), TAG(TR), TAG(TD), TAG(TH)})) {
|
3275
3552
|
parser_add_parse_error(parser, token);
|
3276
3553
|
close_current_select(parser);
|
3277
3554
|
parser->_parser_state->_reprocess_current_token = true;
|
3278
3555
|
return false;
|
3279
|
-
} else if (tag_in(token, kEndTag,
|
3280
|
-
|
3556
|
+
} else if (tag_in(token, kEndTag,
|
3557
|
+
(gumbo_tagset){TAG(CAPTION), TAG(TABLE), TAG(TBODY),
|
3558
|
+
TAG(TFOOT), TAG(THEAD), TAG(TR), TAG(TD), TAG(TH)})) {
|
3281
3559
|
parser_add_parse_error(parser, token);
|
3282
|
-
if (has_an_element_in_table_scope(parser, token->v.end_tag)) {
|
3560
|
+
if (!has_an_element_in_table_scope(parser, token->v.end_tag)) {
|
3561
|
+
ignore_token(parser);
|
3562
|
+
return false;
|
3563
|
+
} else {
|
3283
3564
|
close_current_select(parser);
|
3284
|
-
|
3565
|
+
// close_current_select already does the
|
3566
|
+
// reset_insertion_mode_appropriately
|
3567
|
+
// reset_insertion_mode_appropriately(parser);
|
3285
3568
|
parser->_parser_state->_reprocess_current_token = true;
|
3286
|
-
|
3287
|
-
ignore_token(parser);
|
3569
|
+
return false;
|
3288
3570
|
}
|
3289
|
-
return false;
|
3290
3571
|
} else {
|
3291
3572
|
return handle_in_select(parser, token);
|
3292
3573
|
}
|
@@ -3294,8 +3575,71 @@ static bool handle_in_select_in_table(GumboParser* parser, GumboToken* token) {
|
|
3294
3575
|
|
3295
3576
|
// http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#parsing-main-intemplate
|
3296
3577
|
static bool handle_in_template(GumboParser* parser, GumboToken* token) {
|
3297
|
-
|
3298
|
-
|
3578
|
+
GumboParserState* state = parser->_parser_state;
|
3579
|
+
if (token->type == GUMBO_TOKEN_WHITESPACE ||
|
3580
|
+
token->type == GUMBO_TOKEN_CHARACTER ||
|
3581
|
+
token->type == GUMBO_TOKEN_COMMENT || token->type == GUMBO_TOKEN_NULL ||
|
3582
|
+
token->type == GUMBO_TOKEN_DOCTYPE) {
|
3583
|
+
return handle_in_body(parser, token);
|
3584
|
+
} else if (tag_in(token, kStartTag,
|
3585
|
+
(gumbo_tagset){TAG(BASE), TAG(BASEFONT), TAG(BGSOUND),
|
3586
|
+
TAG(LINK), TAG(META), TAG(NOFRAMES), TAG(SCRIPT),
|
3587
|
+
TAG(STYLE), TAG(TEMPLATE), TAG(TITLE)}) ||
|
3588
|
+
tag_is(token, kEndTag, GUMBO_TAG_TEMPLATE)) {
|
3589
|
+
return handle_in_head(parser, token);
|
3590
|
+
} else if (tag_in(
|
3591
|
+
token, kStartTag, (gumbo_tagset){TAG(CAPTION), TAG(COLGROUP),
|
3592
|
+
TAG(TBODY), TAG(TFOOT), TAG(THEAD)})) {
|
3593
|
+
pop_template_insertion_mode(parser);
|
3594
|
+
push_template_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
|
3595
|
+
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
|
3596
|
+
state->_reprocess_current_token = true;
|
3597
|
+
return true;
|
3598
|
+
} else if (tag_is(token, kStartTag, GUMBO_TAG_COL)) {
|
3599
|
+
pop_template_insertion_mode(parser);
|
3600
|
+
push_template_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_COLUMN_GROUP);
|
3601
|
+
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_COLUMN_GROUP);
|
3602
|
+
state->_reprocess_current_token = true;
|
3603
|
+
return true;
|
3604
|
+
} else if (tag_is(token, kStartTag, GUMBO_TAG_TR)) {
|
3605
|
+
pop_template_insertion_mode(parser);
|
3606
|
+
push_template_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE_BODY);
|
3607
|
+
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE_BODY);
|
3608
|
+
state->_reprocess_current_token = true;
|
3609
|
+
return true;
|
3610
|
+
} else if (tag_in(token, kStartTag, (gumbo_tagset){TAG(TD), TAG(TH)})) {
|
3611
|
+
pop_template_insertion_mode(parser);
|
3612
|
+
push_template_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_ROW);
|
3613
|
+
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_ROW);
|
3614
|
+
state->_reprocess_current_token = true;
|
3615
|
+
return true;
|
3616
|
+
} else if (token->type == GUMBO_TOKEN_START_TAG) {
|
3617
|
+
pop_template_insertion_mode(parser);
|
3618
|
+
push_template_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_BODY);
|
3619
|
+
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_BODY);
|
3620
|
+
state->_reprocess_current_token = true;
|
3621
|
+
return true;
|
3622
|
+
} else if (token->type == GUMBO_TOKEN_END_TAG) {
|
3623
|
+
parser_add_parse_error(parser, token);
|
3624
|
+
ignore_token(parser);
|
3625
|
+
return false;
|
3626
|
+
} else if (token->type == GUMBO_TOKEN_EOF) {
|
3627
|
+
if (!has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
|
3628
|
+
// Stop parsing.
|
3629
|
+
return true;
|
3630
|
+
}
|
3631
|
+
parser_add_parse_error(parser, token);
|
3632
|
+
while (!node_html_tag_is(pop_current_node(parser), GUMBO_TAG_TEMPLATE))
|
3633
|
+
;
|
3634
|
+
clear_active_formatting_elements(parser);
|
3635
|
+
pop_template_insertion_mode(parser);
|
3636
|
+
reset_insertion_mode_appropriately(parser);
|
3637
|
+
state->_reprocess_current_token = true;
|
3638
|
+
return false;
|
3639
|
+
} else {
|
3640
|
+
assert(0);
|
3641
|
+
return false;
|
3642
|
+
}
|
3299
3643
|
}
|
3300
3644
|
|
3301
3645
|
// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#parsing-main-afterbody
|
@@ -3313,7 +3657,12 @@ static bool handle_after_body(GumboParser* parser, GumboToken* token) {
|
|
3313
3657
|
ignore_token(parser);
|
3314
3658
|
return false;
|
3315
3659
|
} else if (tag_is(token, kEndTag, GUMBO_TAG_HTML)) {
|
3316
|
-
|
3660
|
+
/* fragment case: ignore the closing HTML token */
|
3661
|
+
if (is_fragment_parser(parser)) {
|
3662
|
+
parser_add_parse_error(parser, token);
|
3663
|
+
ignore_token(parser);
|
3664
|
+
return false;
|
3665
|
+
}
|
3317
3666
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_AFTER_AFTER_BODY);
|
3318
3667
|
GumboNode* html = parser->_parser_state->_open_elements.data[0];
|
3319
3668
|
assert(node_html_tag_is(html, GUMBO_TAG_HTML));
|
@@ -3354,9 +3703,8 @@ static bool handle_in_frameset(GumboParser* parser, GumboToken* token) {
|
|
3354
3703
|
return false;
|
3355
3704
|
}
|
3356
3705
|
pop_current_node(parser);
|
3357
|
-
|
3358
|
-
|
3359
|
-
if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_FRAMESET)) {
|
3706
|
+
if (!is_fragment_parser(parser) &&
|
3707
|
+
!node_html_tag_is(get_current_node(parser), GUMBO_TAG_FRAMESET)) {
|
3360
3708
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_AFTER_FRAMESET);
|
3361
3709
|
}
|
3362
3710
|
return true;
|
@@ -3455,31 +3803,14 @@ static bool handle_after_after_frameset(
|
|
3455
3803
|
// Function pointers for each insertion mode. Keep in sync with
|
3456
3804
|
// insertion_mode.h.
|
3457
3805
|
typedef bool (*TokenHandler)(GumboParser* parser, GumboToken* token);
|
3458
|
-
static const TokenHandler kTokenHandlers[] = {
|
3459
|
-
|
3460
|
-
|
3461
|
-
|
3462
|
-
|
3463
|
-
|
3464
|
-
|
3465
|
-
|
3466
|
-
handle_text,
|
3467
|
-
handle_in_table,
|
3468
|
-
handle_in_table_text,
|
3469
|
-
handle_in_caption,
|
3470
|
-
handle_in_column_group,
|
3471
|
-
handle_in_table_body,
|
3472
|
-
handle_in_row,
|
3473
|
-
handle_in_cell,
|
3474
|
-
handle_in_select,
|
3475
|
-
handle_in_select_in_table,
|
3476
|
-
handle_in_template,
|
3477
|
-
handle_after_body,
|
3478
|
-
handle_in_frameset,
|
3479
|
-
handle_after_frameset,
|
3480
|
-
handle_after_after_body,
|
3481
|
-
handle_after_after_frameset
|
3482
|
-
};
|
3806
|
+
static const TokenHandler kTokenHandlers[] = {handle_initial,
|
3807
|
+
handle_before_html, handle_before_head, handle_in_head,
|
3808
|
+
handle_in_head_noscript, handle_after_head, handle_in_body, handle_text,
|
3809
|
+
handle_in_table, handle_in_table_text, handle_in_caption,
|
3810
|
+
handle_in_column_group, handle_in_table_body, handle_in_row, handle_in_cell,
|
3811
|
+
handle_in_select, handle_in_select_in_table, handle_in_template,
|
3812
|
+
handle_after_body, handle_in_frameset, handle_after_frameset,
|
3813
|
+
handle_after_after_body, handle_after_after_frameset};
|
3483
3814
|
|
3484
3815
|
static bool handle_html_content(GumboParser* parser, GumboToken* token) {
|
3485
3816
|
return kTokenHandlers[(unsigned int) parser->_parser_state->_insertion_mode](
|
@@ -3488,6 +3819,7 @@ static bool handle_html_content(GumboParser* parser, GumboToken* token) {
|
|
3488
3819
|
|
3489
3820
|
// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#parsing-main-inforeign
|
3490
3821
|
static bool handle_in_foreign_content(GumboParser* parser, GumboToken* token) {
|
3822
|
+
gumbo_debug("Handling foreign content");
|
3491
3823
|
switch (token->type) {
|
3492
3824
|
case GUMBO_TOKEN_NULL:
|
3493
3825
|
parser_add_parse_error(parser, token);
|
@@ -3514,34 +3846,44 @@ static bool handle_in_foreign_content(GumboParser* parser, GumboToken* token) {
|
|
3514
3846
|
break;
|
3515
3847
|
}
|
3516
3848
|
// Order matters for these clauses.
|
3517
|
-
if (tag_in(token, kStartTag,
|
3518
|
-
TAG(BLOCKQUOTE), TAG(BODY), TAG(BR),
|
3519
|
-
|
3520
|
-
|
3521
|
-
|
3522
|
-
|
3523
|
-
|
3524
|
-
|
3525
|
-
|
3526
|
-
|
3527
|
-
|
3528
|
-
|
3529
|
-
|
3530
|
-
|
3531
|
-
token_has_attribute(token, "face") ||
|
3532
|
-
token_has_attribute(token, "size")))) {
|
3849
|
+
if (tag_in(token, kStartTag,
|
3850
|
+
(gumbo_tagset){TAG(B), TAG(BIG), TAG(BLOCKQUOTE), TAG(BODY), TAG(BR),
|
3851
|
+
TAG(CENTER), TAG(CODE), TAG(DD), TAG(DIV), TAG(DL), TAG(DT),
|
3852
|
+
TAG(EM), TAG(EMBED), TAG(H1), TAG(H2), TAG(H3), TAG(H4), TAG(H5),
|
3853
|
+
TAG(H6), TAG(HEAD), TAG(HR), TAG(I), TAG(IMG), TAG(LI),
|
3854
|
+
TAG(LISTING), TAG(MENU), TAG(META), TAG(NOBR), TAG(OL), TAG(P),
|
3855
|
+
TAG(PRE), TAG(RUBY), TAG(S), TAG(SMALL), TAG(SPAN), TAG(STRONG),
|
3856
|
+
TAG(STRIKE), TAG(SUB), TAG(SUP), TAG(TABLE), TAG(TT), TAG(U),
|
3857
|
+
TAG(UL), TAG(VAR)}) ||
|
3858
|
+
(tag_is(token, kStartTag, GUMBO_TAG_FONT) &&
|
3859
|
+
(token_has_attribute(token, "color") ||
|
3860
|
+
token_has_attribute(token, "face") ||
|
3861
|
+
token_has_attribute(token, "size")))) {
|
3862
|
+
/* Parse error */
|
3533
3863
|
parser_add_parse_error(parser, token);
|
3534
|
-
|
3535
|
-
|
3536
|
-
|
3537
|
-
|
3538
|
-
|
3539
|
-
|
3540
|
-
parser
|
3541
|
-
|
3542
|
-
|
3864
|
+
|
3865
|
+
/*
|
3866
|
+
* Fragment case: If the parser was originally created for the HTML
|
3867
|
+
* fragment parsing algorithm, then act as described in the "any other
|
3868
|
+
* start tag" entry below.
|
3869
|
+
*/
|
3870
|
+
if (!is_fragment_parser(parser)) {
|
3871
|
+
do {
|
3872
|
+
pop_current_node(parser);
|
3873
|
+
} while (!(is_mathml_integration_point(get_current_node(parser)) ||
|
3874
|
+
is_html_integration_point(get_current_node(parser)) ||
|
3875
|
+
get_current_node(parser)->v.element.tag_namespace ==
|
3876
|
+
GUMBO_NAMESPACE_HTML));
|
3877
|
+
parser->_parser_state->_reprocess_current_token = true;
|
3878
|
+
return false;
|
3879
|
+
}
|
3880
|
+
|
3881
|
+
assert(token->type == GUMBO_TOKEN_START_TAG);
|
3882
|
+
}
|
3883
|
+
|
3884
|
+
if (token->type == GUMBO_TOKEN_START_TAG) {
|
3543
3885
|
const GumboNamespaceEnum current_namespace =
|
3544
|
-
|
3886
|
+
get_adjusted_current_node(parser)->v.element.tag_namespace;
|
3545
3887
|
if (current_namespace == GUMBO_NAMESPACE_MATHML) {
|
3546
3888
|
adjust_mathml_attributes(parser, token);
|
3547
3889
|
}
|
@@ -3557,8 +3899,8 @@ static bool handle_in_foreign_content(GumboParser* parser, GumboToken* token) {
|
|
3557
3899
|
acknowledge_self_closing_tag(parser);
|
3558
3900
|
}
|
3559
3901
|
return true;
|
3560
|
-
|
3561
|
-
|
3902
|
+
// </script> tags are handled like any other end tag, putting the script's
|
3903
|
+
// text into a text node child and closing the current node.
|
3562
3904
|
} else {
|
3563
3905
|
assert(token->type == GUMBO_TOKEN_END_TAG);
|
3564
3906
|
GumboNode* node = get_current_node(parser);
|
@@ -3574,13 +3916,13 @@ static bool handle_in_foreign_content(GumboParser* parser, GumboToken* token) {
|
|
3574
3916
|
is_success = false;
|
3575
3917
|
}
|
3576
3918
|
int i = parser->_parser_state->_open_elements.length;
|
3577
|
-
for(
|
3919
|
+
for (--i; i > 0;) {
|
3578
3920
|
// Here we move up the stack until we find an HTML element (in which
|
3579
3921
|
// case we do nothing) or we find the element that we're about to
|
3580
3922
|
// close (in which case we pop everything we've seen until that
|
3581
3923
|
// point.)
|
3582
3924
|
gumbo_debug("Foreign %.*s node at %d.\n", node_tagname.length,
|
3583
|
-
|
3925
|
+
node_tagname.data, i);
|
3584
3926
|
if (gumbo_string_equals_ignore_case(&node_tagname, &token_tagname)) {
|
3585
3927
|
gumbo_debug("Matches.\n");
|
3586
3928
|
while (pop_current_node(parser) != node) {
|
@@ -3608,7 +3950,6 @@ static bool handle_in_foreign_content(GumboParser* parser, GumboToken* token) {
|
|
3608
3950
|
}
|
3609
3951
|
}
|
3610
3952
|
|
3611
|
-
|
3612
3953
|
// http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#tree-construction
|
3613
3954
|
static bool handle_token(GumboParser* parser, GumboToken* token) {
|
3614
3955
|
if (parser->_parser_state->_ignore_next_linefeed &&
|
@@ -3630,28 +3971,31 @@ static bool handle_token(GumboParser* parser, GumboToken* token) {
|
|
3630
3971
|
parser->_parser_state->_closed_html_tag = true;
|
3631
3972
|
}
|
3632
3973
|
|
3633
|
-
const GumboNode* current_node =
|
3634
|
-
assert(!current_node || current_node->type == GUMBO_NODE_ELEMENT
|
3974
|
+
const GumboNode* current_node = get_adjusted_current_node(parser);
|
3975
|
+
assert(!current_node || current_node->type == GUMBO_NODE_ELEMENT ||
|
3976
|
+
current_node->type == GUMBO_NODE_TEMPLATE);
|
3635
3977
|
if (current_node) {
|
3636
3978
|
gumbo_debug("Current node: <%s>.\n",
|
3637
|
-
|
3979
|
+
gumbo_normalized_tagname(current_node->v.element.tag));
|
3638
3980
|
}
|
3639
3981
|
if (!current_node ||
|
3640
3982
|
current_node->v.element.tag_namespace == GUMBO_NAMESPACE_HTML ||
|
3641
3983
|
(is_mathml_integration_point(current_node) &&
|
3642
|
-
|
3643
|
-
|
3644
|
-
|
3645
|
-
|
3646
|
-
|
3984
|
+
(token->type == GUMBO_TOKEN_CHARACTER ||
|
3985
|
+
token->type == GUMBO_TOKEN_WHITESPACE ||
|
3986
|
+
token->type == GUMBO_TOKEN_NULL ||
|
3987
|
+
(token->type == GUMBO_TOKEN_START_TAG &&
|
3988
|
+
!tag_in(token, kStartTag,
|
3989
|
+
(gumbo_tagset){TAG(MGLYPH), TAG(MALIGNMARK)})))) ||
|
3647
3990
|
(current_node->v.element.tag_namespace == GUMBO_NAMESPACE_MATHML &&
|
3648
|
-
|
3649
|
-
|
3650
|
-
|
3651
|
-
|
3652
|
-
token->type ==
|
3653
|
-
|
3654
|
-
|
3991
|
+
node_qualified_tag_is(
|
3992
|
+
current_node, GUMBO_NAMESPACE_MATHML, GUMBO_TAG_ANNOTATION_XML) &&
|
3993
|
+
tag_is(token, kStartTag, GUMBO_TAG_SVG)) ||
|
3994
|
+
(is_html_integration_point(current_node) &&
|
3995
|
+
(token->type == GUMBO_TOKEN_START_TAG ||
|
3996
|
+
token->type == GUMBO_TOKEN_CHARACTER ||
|
3997
|
+
token->type == GUMBO_TOKEN_NULL ||
|
3998
|
+
token->type == GUMBO_TOKEN_WHITESPACE)) ||
|
3655
3999
|
token->type == GUMBO_TOKEN_EOF) {
|
3656
4000
|
return handle_html_content(parser, token);
|
3657
4001
|
} else {
|
@@ -3659,6 +4003,66 @@ static bool handle_token(GumboParser* parser, GumboToken* token) {
|
|
3659
4003
|
}
|
3660
4004
|
}
|
3661
4005
|
|
4006
|
+
static void fragment_parser_init(GumboParser* parser, GumboTag fragment_ctx,
|
4007
|
+
GumboNamespaceEnum fragment_namespace) {
|
4008
|
+
GumboNode* root;
|
4009
|
+
assert(fragment_ctx != GUMBO_TAG_LAST);
|
4010
|
+
|
4011
|
+
// 3
|
4012
|
+
parser->_parser_state->_fragment_ctx = create_element(parser, fragment_ctx);
|
4013
|
+
parser->_parser_state->_fragment_ctx->v.element.tag_namespace =
|
4014
|
+
fragment_namespace;
|
4015
|
+
|
4016
|
+
// 4
|
4017
|
+
if (fragment_namespace == GUMBO_NAMESPACE_HTML) {
|
4018
|
+
// Non-HTML namespaces always start in the DATA state.
|
4019
|
+
switch (fragment_ctx) {
|
4020
|
+
case GUMBO_TAG_TITLE:
|
4021
|
+
case GUMBO_TAG_TEXTAREA:
|
4022
|
+
gumbo_tokenizer_set_state(parser, GUMBO_LEX_RCDATA);
|
4023
|
+
break;
|
4024
|
+
|
4025
|
+
case GUMBO_TAG_STYLE:
|
4026
|
+
case GUMBO_TAG_XMP:
|
4027
|
+
case GUMBO_TAG_IFRAME:
|
4028
|
+
case GUMBO_TAG_NOEMBED:
|
4029
|
+
case GUMBO_TAG_NOFRAMES:
|
4030
|
+
gumbo_tokenizer_set_state(parser, GUMBO_LEX_RAWTEXT);
|
4031
|
+
break;
|
4032
|
+
|
4033
|
+
case GUMBO_TAG_SCRIPT:
|
4034
|
+
gumbo_tokenizer_set_state(parser, GUMBO_LEX_SCRIPT);
|
4035
|
+
break;
|
4036
|
+
|
4037
|
+
case GUMBO_TAG_NOSCRIPT:
|
4038
|
+
/* scripting is disabled in Gumbo, so leave the tokenizer
|
4039
|
+
* in the default data state */
|
4040
|
+
break;
|
4041
|
+
|
4042
|
+
case GUMBO_TAG_PLAINTEXT:
|
4043
|
+
gumbo_tokenizer_set_state(parser, GUMBO_LEX_PLAINTEXT);
|
4044
|
+
break;
|
4045
|
+
|
4046
|
+
default:
|
4047
|
+
/* default data state */
|
4048
|
+
break;
|
4049
|
+
}
|
4050
|
+
}
|
4051
|
+
|
4052
|
+
// 5. 6. 7.
|
4053
|
+
root = insert_element_of_tag_type(
|
4054
|
+
parser, GUMBO_TAG_HTML, GUMBO_INSERTION_IMPLIED);
|
4055
|
+
parser->_output->root = root;
|
4056
|
+
|
4057
|
+
// 8.
|
4058
|
+
if (fragment_ctx == GUMBO_TAG_TEMPLATE) {
|
4059
|
+
push_template_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TEMPLATE);
|
4060
|
+
}
|
4061
|
+
|
4062
|
+
// 10.
|
4063
|
+
reset_insertion_mode_appropriately(parser);
|
4064
|
+
}
|
4065
|
+
|
3662
4066
|
GumboOutput* gumbo_parse(const char* buffer) {
|
3663
4067
|
return gumbo_parse_with_options(
|
3664
4068
|
&kGumboDefaultOptions, buffer, strlen(buffer));
|
@@ -3672,6 +4076,11 @@ GumboOutput* gumbo_parse_with_options(
|
|
3672
4076
|
gumbo_tokenizer_state_init(&parser, buffer, length);
|
3673
4077
|
parser_state_init(&parser);
|
3674
4078
|
|
4079
|
+
if (options->fragment_context != GUMBO_TAG_LAST) {
|
4080
|
+
fragment_parser_init(
|
4081
|
+
&parser, options->fragment_context, options->fragment_namespace);
|
4082
|
+
}
|
4083
|
+
|
3675
4084
|
GumboParserState* state = parser._parser_state;
|
3676
4085
|
gumbo_debug("Parsing %.*s.\n", length, buffer);
|
3677
4086
|
|
@@ -3687,9 +4096,9 @@ GumboOutput* gumbo_parse_with_options(
|
|
3687
4096
|
state->_reprocess_current_token = false;
|
3688
4097
|
} else {
|
3689
4098
|
GumboNode* current_node = get_current_node(&parser);
|
3690
|
-
gumbo_tokenizer_set_is_current_node_foreign(
|
3691
|
-
|
3692
|
-
|
4099
|
+
gumbo_tokenizer_set_is_current_node_foreign(&parser,
|
4100
|
+
current_node &&
|
4101
|
+
current_node->v.element.tag_namespace != GUMBO_NAMESPACE_HTML);
|
3693
4102
|
has_error = !gumbo_lex(&parser, &token) || has_error;
|
3694
4103
|
}
|
3695
4104
|
const char* token_type = "text";
|
@@ -3709,14 +4118,13 @@ GumboOutput* gumbo_parse_with_options(
|
|
3709
4118
|
default:
|
3710
4119
|
break;
|
3711
4120
|
}
|
3712
|
-
gumbo_debug("Handling %s token @%d:%d in state %d.\n",
|
3713
|
-
|
3714
|
-
state->_insertion_mode);
|
4121
|
+
gumbo_debug("Handling %s token @%d:%d in state %d.\n", (char*) token_type,
|
4122
|
+
token.position.line, token.position.column, state->_insertion_mode);
|
3715
4123
|
|
3716
4124
|
state->_current_token = &token;
|
3717
4125
|
state->_self_closing_flag_acknowledged =
|
3718
4126
|
!(token.type == GUMBO_TOKEN_START_TAG &&
|
3719
|
-
|
4127
|
+
token.v.start_tag.is_self_closing);
|
3720
4128
|
|
3721
4129
|
has_error = !handle_token(&parser, &token) || has_error;
|
3722
4130
|
|
@@ -3772,7 +4180,7 @@ void gumbo_destroy_output(const GumboOptions* options, GumboOutput* output) {
|
|
3772
4180
|
GumboParser parser;
|
3773
4181
|
parser._options = options;
|
3774
4182
|
destroy_node(&parser, output->document);
|
3775
|
-
for (int i = 0; i < output->errors.length; ++i) {
|
4183
|
+
for (unsigned int i = 0; i < output->errors.length; ++i) {
|
3776
4184
|
gumbo_error_destroy(&parser, output->errors.data[i]);
|
3777
4185
|
}
|
3778
4186
|
gumbo_vector_destroy(&parser, &output->errors);
|