nokogumbo 1.4.1 → 1.4.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/ext/nokogumboc/nokogumbo.c +1 -1
- data/gumbo-parser/src/error.c +3 -5
- data/gumbo-parser/src/gumbo.h +170 -36
- data/gumbo-parser/src/parser.c +403 -795
- data/gumbo-parser/src/string_buffer.c +1 -8
- data/gumbo-parser/src/string_buffer.h +0 -5
- data/gumbo-parser/src/tag.c +162 -35
- data/gumbo-parser/src/tokenizer.c +18 -13
- data/gumbo-parser/src/vector.c +1 -1
- data/test-nokogumbo.rb +1 -1
- metadata +15 -24
- data/gumbo-parser/src/tag.in +0 -150
- data/gumbo-parser/src/tag_enum.h +0 -150
- data/gumbo-parser/src/tag_gperf.h +0 -343
- data/gumbo-parser/src/tag_sizes.h +0 -1
- data/gumbo-parser/src/tag_strings.h +0 -150
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: cf20dd502d8ec6022f2c72193bb0c9a908251088
|
4
|
+
data.tar.gz: 326f85766d0e4f97683f5df026f08f4dc33806e8
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 800800652a5260bf54399e8cca1fc6e63f7ef53aea489245c5315b6e955b38aa4dfc6d7272b99898ab78150464640ac14c995aa38b9c77644dab5d73fc0e46a5
|
7
|
+
data.tar.gz: 18ba647671103cfc2853a88935fe91eb965d1e6fbe1aad981438297a5035ec222b5ae6c5ed3ef127429c8b58edd02a6a5a877ba7e7ec3390d05779f7420f1521
|
data/ext/nokogumboc/nokogumbo.c
CHANGED
@@ -157,7 +157,7 @@ static xmlNodePtr walk_tree(xmlDocPtr document, GumboElement *node) {
|
|
157
157
|
|
158
158
|
switch (child->type) {
|
159
159
|
case GUMBO_NODE_ELEMENT:
|
160
|
-
|
160
|
+
// case GUMBO_NODE_TEMPLATE: /* future */
|
161
161
|
node = walk_tree(document, &child->v.element);
|
162
162
|
break;
|
163
163
|
case GUMBO_NODE_WHITESPACE:
|
data/gumbo-parser/src/error.c
CHANGED
@@ -35,11 +35,10 @@ static const size_t kMessageBufferSize = 256;
|
|
35
35
|
static int print_message(GumboParser* parser, GumboStringBuffer* output,
|
36
36
|
const char* format, ...) {
|
37
37
|
va_list args;
|
38
|
-
int remaining_capacity = output->capacity - output->length;
|
39
38
|
va_start(args, format);
|
39
|
+
int remaining_capacity = output->capacity - output->length;
|
40
40
|
int bytes_written = vsnprintf(output->data + output->length,
|
41
41
|
remaining_capacity, format, args);
|
42
|
-
va_end(args);
|
43
42
|
#ifdef _MSC_VER
|
44
43
|
if (bytes_written == -1) {
|
45
44
|
// vsnprintf returns -1 on MSVC++ if there's not enough capacity, instead of
|
@@ -48,7 +47,6 @@ static int print_message(GumboParser* parser, GumboStringBuffer* output,
|
|
48
47
|
// we retry (letting it fail and returning 0 if it doesn't), since there's
|
49
48
|
// no way to smartly resize the buffer.
|
50
49
|
gumbo_string_buffer_reserve(parser, output->capacity * 2, output);
|
51
|
-
va_start(args, format);
|
52
50
|
int result = vsnprintf(output->data + output->length,
|
53
51
|
remaining_capacity, format, args);
|
54
52
|
va_end(args);
|
@@ -57,6 +55,7 @@ static int print_message(GumboParser* parser, GumboStringBuffer* output,
|
|
57
55
|
#else
|
58
56
|
// -1 in standard C99 indicates an encoding error. Return 0 and do nothing.
|
59
57
|
if (bytes_written == -1) {
|
58
|
+
va_end(args);
|
60
59
|
return 0;
|
61
60
|
}
|
62
61
|
#endif
|
@@ -65,12 +64,11 @@ static int print_message(GumboParser* parser, GumboStringBuffer* output,
|
|
65
64
|
gumbo_string_buffer_reserve(
|
66
65
|
parser, output->capacity + bytes_written, output);
|
67
66
|
remaining_capacity = output->capacity - output->length;
|
68
|
-
va_start(args, format);
|
69
67
|
bytes_written = vsnprintf(output->data + output->length,
|
70
68
|
remaining_capacity, format, args);
|
71
|
-
va_end(args);
|
72
69
|
}
|
73
70
|
output->length += bytes_written;
|
71
|
+
va_end(args);
|
74
72
|
return bytes_written;
|
75
73
|
}
|
76
74
|
|
data/gumbo-parser/src/gumbo.h
CHANGED
@@ -141,7 +141,7 @@ extern const GumboVector kGumboEmptyVector;
|
|
141
141
|
* Returns the first index at which an element appears in this vector (testing
|
142
142
|
* by pointer equality), or -1 if it never does.
|
143
143
|
*/
|
144
|
-
int gumbo_vector_index_of(GumboVector* vector,
|
144
|
+
int gumbo_vector_index_of(GumboVector* vector, void* element);
|
145
145
|
|
146
146
|
|
147
147
|
/**
|
@@ -157,10 +157,172 @@ int gumbo_vector_index_of(GumboVector* vector, const void* element);
|
|
157
157
|
* strings.
|
158
158
|
*/
|
159
159
|
typedef enum {
|
160
|
-
//
|
161
|
-
|
162
|
-
//
|
163
|
-
|
160
|
+
// http://www.whatwg.org/specs/web-apps/current-work/multipage/semantics.html#the-root-element
|
161
|
+
GUMBO_TAG_HTML,
|
162
|
+
// http://www.whatwg.org/specs/web-apps/current-work/multipage/semantics.html#document-metadata
|
163
|
+
GUMBO_TAG_HEAD,
|
164
|
+
GUMBO_TAG_TITLE,
|
165
|
+
GUMBO_TAG_BASE,
|
166
|
+
GUMBO_TAG_LINK,
|
167
|
+
GUMBO_TAG_META,
|
168
|
+
GUMBO_TAG_STYLE,
|
169
|
+
// http://www.whatwg.org/specs/web-apps/current-work/multipage/scripting-1.html#scripting-1
|
170
|
+
GUMBO_TAG_SCRIPT,
|
171
|
+
GUMBO_TAG_NOSCRIPT,
|
172
|
+
GUMBO_TAG_TEMPLATE,
|
173
|
+
// http://www.whatwg.org/specs/web-apps/current-work/multipage/sections.html#sections
|
174
|
+
GUMBO_TAG_BODY,
|
175
|
+
GUMBO_TAG_ARTICLE,
|
176
|
+
GUMBO_TAG_SECTION,
|
177
|
+
GUMBO_TAG_NAV,
|
178
|
+
GUMBO_TAG_ASIDE,
|
179
|
+
GUMBO_TAG_H1,
|
180
|
+
GUMBO_TAG_H2,
|
181
|
+
GUMBO_TAG_H3,
|
182
|
+
GUMBO_TAG_H4,
|
183
|
+
GUMBO_TAG_H5,
|
184
|
+
GUMBO_TAG_H6,
|
185
|
+
GUMBO_TAG_HGROUP,
|
186
|
+
GUMBO_TAG_HEADER,
|
187
|
+
GUMBO_TAG_FOOTER,
|
188
|
+
GUMBO_TAG_ADDRESS,
|
189
|
+
// http://www.whatwg.org/specs/web-apps/current-work/multipage/grouping-content.html#grouping-content
|
190
|
+
GUMBO_TAG_P,
|
191
|
+
GUMBO_TAG_HR,
|
192
|
+
GUMBO_TAG_PRE,
|
193
|
+
GUMBO_TAG_BLOCKQUOTE,
|
194
|
+
GUMBO_TAG_OL,
|
195
|
+
GUMBO_TAG_UL,
|
196
|
+
GUMBO_TAG_LI,
|
197
|
+
GUMBO_TAG_DL,
|
198
|
+
GUMBO_TAG_DT,
|
199
|
+
GUMBO_TAG_DD,
|
200
|
+
GUMBO_TAG_FIGURE,
|
201
|
+
GUMBO_TAG_FIGCAPTION,
|
202
|
+
GUMBO_TAG_MAIN,
|
203
|
+
GUMBO_TAG_DIV,
|
204
|
+
// http://www.whatwg.org/specs/web-apps/current-work/multipage/text-level-semantics.html#text-level-semantics
|
205
|
+
GUMBO_TAG_A,
|
206
|
+
GUMBO_TAG_EM,
|
207
|
+
GUMBO_TAG_STRONG,
|
208
|
+
GUMBO_TAG_SMALL,
|
209
|
+
GUMBO_TAG_S,
|
210
|
+
GUMBO_TAG_CITE,
|
211
|
+
GUMBO_TAG_Q,
|
212
|
+
GUMBO_TAG_DFN,
|
213
|
+
GUMBO_TAG_ABBR,
|
214
|
+
GUMBO_TAG_DATA,
|
215
|
+
GUMBO_TAG_TIME,
|
216
|
+
GUMBO_TAG_CODE,
|
217
|
+
GUMBO_TAG_VAR,
|
218
|
+
GUMBO_TAG_SAMP,
|
219
|
+
GUMBO_TAG_KBD,
|
220
|
+
GUMBO_TAG_SUB,
|
221
|
+
GUMBO_TAG_SUP,
|
222
|
+
GUMBO_TAG_I,
|
223
|
+
GUMBO_TAG_B,
|
224
|
+
GUMBO_TAG_U,
|
225
|
+
GUMBO_TAG_MARK,
|
226
|
+
GUMBO_TAG_RUBY,
|
227
|
+
GUMBO_TAG_RT,
|
228
|
+
GUMBO_TAG_RP,
|
229
|
+
GUMBO_TAG_BDI,
|
230
|
+
GUMBO_TAG_BDO,
|
231
|
+
GUMBO_TAG_SPAN,
|
232
|
+
GUMBO_TAG_BR,
|
233
|
+
GUMBO_TAG_WBR,
|
234
|
+
// http://www.whatwg.org/specs/web-apps/current-work/multipage/edits.html#edits
|
235
|
+
GUMBO_TAG_INS,
|
236
|
+
GUMBO_TAG_DEL,
|
237
|
+
// http://www.whatwg.org/specs/web-apps/current-work/multipage/embedded-content-1.html#embedded-content-1
|
238
|
+
GUMBO_TAG_IMAGE,
|
239
|
+
GUMBO_TAG_IMG,
|
240
|
+
GUMBO_TAG_IFRAME,
|
241
|
+
GUMBO_TAG_EMBED,
|
242
|
+
GUMBO_TAG_OBJECT,
|
243
|
+
GUMBO_TAG_PARAM,
|
244
|
+
GUMBO_TAG_VIDEO,
|
245
|
+
GUMBO_TAG_AUDIO,
|
246
|
+
GUMBO_TAG_SOURCE,
|
247
|
+
GUMBO_TAG_TRACK,
|
248
|
+
GUMBO_TAG_CANVAS,
|
249
|
+
GUMBO_TAG_MAP,
|
250
|
+
GUMBO_TAG_AREA,
|
251
|
+
// http://www.whatwg.org/specs/web-apps/current-work/multipage/the-map-element.html#mathml
|
252
|
+
GUMBO_TAG_MATH,
|
253
|
+
GUMBO_TAG_MI,
|
254
|
+
GUMBO_TAG_MO,
|
255
|
+
GUMBO_TAG_MN,
|
256
|
+
GUMBO_TAG_MS,
|
257
|
+
GUMBO_TAG_MTEXT,
|
258
|
+
GUMBO_TAG_MGLYPH,
|
259
|
+
GUMBO_TAG_MALIGNMARK,
|
260
|
+
GUMBO_TAG_ANNOTATION_XML,
|
261
|
+
// http://www.whatwg.org/specs/web-apps/current-work/multipage/the-map-element.html#svg-0
|
262
|
+
GUMBO_TAG_SVG,
|
263
|
+
GUMBO_TAG_FOREIGNOBJECT,
|
264
|
+
GUMBO_TAG_DESC,
|
265
|
+
// SVG title tags will have GUMBO_TAG_TITLE as with HTML.
|
266
|
+
// http://www.whatwg.org/specs/web-apps/current-work/multipage/tabular-data.html#tabular-data
|
267
|
+
GUMBO_TAG_TABLE,
|
268
|
+
GUMBO_TAG_CAPTION,
|
269
|
+
GUMBO_TAG_COLGROUP,
|
270
|
+
GUMBO_TAG_COL,
|
271
|
+
GUMBO_TAG_TBODY,
|
272
|
+
GUMBO_TAG_THEAD,
|
273
|
+
GUMBO_TAG_TFOOT,
|
274
|
+
GUMBO_TAG_TR,
|
275
|
+
GUMBO_TAG_TD,
|
276
|
+
GUMBO_TAG_TH,
|
277
|
+
// http://www.whatwg.org/specs/web-apps/current-work/multipage/forms.html#forms
|
278
|
+
GUMBO_TAG_FORM,
|
279
|
+
GUMBO_TAG_FIELDSET,
|
280
|
+
GUMBO_TAG_LEGEND,
|
281
|
+
GUMBO_TAG_LABEL,
|
282
|
+
GUMBO_TAG_INPUT,
|
283
|
+
GUMBO_TAG_BUTTON,
|
284
|
+
GUMBO_TAG_SELECT,
|
285
|
+
GUMBO_TAG_DATALIST,
|
286
|
+
GUMBO_TAG_OPTGROUP,
|
287
|
+
GUMBO_TAG_OPTION,
|
288
|
+
GUMBO_TAG_TEXTAREA,
|
289
|
+
GUMBO_TAG_KEYGEN,
|
290
|
+
GUMBO_TAG_OUTPUT,
|
291
|
+
GUMBO_TAG_PROGRESS,
|
292
|
+
GUMBO_TAG_METER,
|
293
|
+
// http://www.whatwg.org/specs/web-apps/current-work/multipage/interactive-elements.html#interactive-elements
|
294
|
+
GUMBO_TAG_DETAILS,
|
295
|
+
GUMBO_TAG_SUMMARY,
|
296
|
+
GUMBO_TAG_MENU,
|
297
|
+
GUMBO_TAG_MENUITEM,
|
298
|
+
// Non-conforming elements that nonetheless appear in the HTML5 spec.
|
299
|
+
// http://www.whatwg.org/specs/web-apps/current-work/multipage/obsolete.html#non-conforming-features
|
300
|
+
GUMBO_TAG_APPLET,
|
301
|
+
GUMBO_TAG_ACRONYM,
|
302
|
+
GUMBO_TAG_BGSOUND,
|
303
|
+
GUMBO_TAG_DIR,
|
304
|
+
GUMBO_TAG_FRAME,
|
305
|
+
GUMBO_TAG_FRAMESET,
|
306
|
+
GUMBO_TAG_NOFRAMES,
|
307
|
+
GUMBO_TAG_ISINDEX,
|
308
|
+
GUMBO_TAG_LISTING,
|
309
|
+
GUMBO_TAG_XMP,
|
310
|
+
GUMBO_TAG_NEXTID,
|
311
|
+
GUMBO_TAG_NOEMBED,
|
312
|
+
GUMBO_TAG_PLAINTEXT,
|
313
|
+
GUMBO_TAG_RB,
|
314
|
+
GUMBO_TAG_STRIKE,
|
315
|
+
GUMBO_TAG_BASEFONT,
|
316
|
+
GUMBO_TAG_BIG,
|
317
|
+
GUMBO_TAG_BLINK,
|
318
|
+
GUMBO_TAG_CENTER,
|
319
|
+
GUMBO_TAG_FONT,
|
320
|
+
GUMBO_TAG_MARQUEE,
|
321
|
+
GUMBO_TAG_MULTICOL,
|
322
|
+
GUMBO_TAG_NOBR,
|
323
|
+
GUMBO_TAG_SPACER,
|
324
|
+
GUMBO_TAG_TT,
|
325
|
+
// Used for all tags that don't have special handling in HTML.
|
164
326
|
GUMBO_TAG_UNKNOWN,
|
165
327
|
// A marker value to indicate the end of the enum, for iterating over it.
|
166
328
|
// Also used as the terminator for varargs functions that take tags.
|
@@ -202,10 +364,9 @@ const char* gumbo_normalize_svg_tagname(const GumboStringPiece* tagname);
|
|
202
364
|
|
203
365
|
/**
|
204
366
|
* Converts a tag name string (which may be in upper or mixed case) to a tag
|
205
|
-
* enum.
|
367
|
+
* enum.
|
206
368
|
*/
|
207
369
|
GumboTag gumbo_tag_enum(const char* tagname);
|
208
|
-
GumboTag gumbo_tagn_enum(const char* tagname, unsigned int length);
|
209
370
|
|
210
371
|
/**
|
211
372
|
* Attribute namespaces.
|
@@ -300,16 +461,10 @@ typedef enum {
|
|
300
461
|
GUMBO_NODE_TEXT,
|
301
462
|
/** CDATA node. v will be a GumboText. */
|
302
463
|
GUMBO_NODE_CDATA,
|
303
|
-
/** Comment node. v will be a GumboText, excluding comment delimiters. */
|
464
|
+
/** Comment node. v. will be a GumboText, excluding comment delimiters. */
|
304
465
|
GUMBO_NODE_COMMENT,
|
305
466
|
/** Text node, where all contents is whitespace. v will be a GumboText. */
|
306
|
-
GUMBO_NODE_WHITESPACE
|
307
|
-
/** Template node. This is separate from GUMBO_NODE_ELEMENT because many
|
308
|
-
* client libraries will want to ignore the contents of template nodes, as
|
309
|
-
* the spec suggests. Recursing on GUMBO_NODE_ELEMENT will do the right thing
|
310
|
-
* here, while clients that want to include template contents should also
|
311
|
-
* check for GUMBO_NODE_TEMPLATE. v will be a GumboElement. */
|
312
|
-
GUMBO_NODE_TEMPLATE
|
467
|
+
GUMBO_NODE_WHITESPACE
|
313
468
|
} GumboNodeType;
|
314
469
|
|
315
470
|
/**
|
@@ -523,19 +678,6 @@ struct GumboInternalNode {
|
|
523
678
|
/** Pointer back to parent node. Not owned. */
|
524
679
|
GumboNode* parent;
|
525
680
|
|
526
|
-
/**
|
527
|
-
* Pointer to next node in document order. This is the next node by start tag
|
528
|
-
* position in the document, or by position of the tag that forces the parser
|
529
|
-
* to insert it for parser-inserted nodes. It's necessary to maintain API
|
530
|
-
* compatibility with some other libraries, eg. BeautifulSoup. Not owned.
|
531
|
-
*/
|
532
|
-
GumboNode* next;
|
533
|
-
|
534
|
-
/**
|
535
|
-
* Pointer to previous node in document order.
|
536
|
-
*/
|
537
|
-
GumboNode* prev;
|
538
|
-
|
539
681
|
/** The index within the parent's children vector of this node. */
|
540
682
|
size_t index_within_parent;
|
541
683
|
|
@@ -653,14 +795,6 @@ GumboOutput* gumbo_parse(const char* buffer);
|
|
653
795
|
GumboOutput* gumbo_parse_with_options(
|
654
796
|
const GumboOptions* options, const char* buffer, size_t buffer_length);
|
655
797
|
|
656
|
-
/**
|
657
|
-
* Parse a chunk of HTML with the given fragment context. If `fragment_ctx`
|
658
|
-
* is `GUMBO_TAG_LAST`, the fragment will be parsed as a full document.
|
659
|
-
*/
|
660
|
-
GumboOutput* gumbo_parse_fragment(
|
661
|
-
const GumboOptions* options, const char* buffer, size_t length,
|
662
|
-
const GumboTag fragment_ctx, const GumboNamespaceEnum fragment_namespace);
|
663
|
-
|
664
798
|
/** Release the memory used for the parse tree & parse errors. */
|
665
799
|
void gumbo_destroy_output(
|
666
800
|
const GumboOptions* options, GumboOutput* output);
|
data/gumbo-parser/src/parser.c
CHANGED
@@ -47,15 +47,6 @@ typedef char gumbo_tagset[GUMBO_TAG_LAST];
|
|
47
47
|
tagset[(int)tag] == (1 << (int)namespace))
|
48
48
|
|
49
49
|
|
50
|
-
|
51
|
-
// selected forward declarations as it is getting hard to find
|
52
|
-
// an appropriate order
|
53
|
-
static bool node_html_tag_is(const GumboNode*, GumboTag);
|
54
|
-
static GumboInsertionMode get_current_template_insertion_mode(const GumboParser*);
|
55
|
-
static bool handle_in_template(GumboParser*, GumboToken*);
|
56
|
-
static GumboNode* destroy_node(GumboParser*, GumboNode*);
|
57
|
-
|
58
|
-
|
59
50
|
static void* malloc_wrapper(void* unused, size_t size) {
|
60
51
|
return malloc(size);
|
61
52
|
}
|
@@ -199,7 +190,7 @@ typedef struct _ReplacementEntry {
|
|
199
190
|
{ GUMBO_STRING(from), GUMBO_STRING(to) }
|
200
191
|
|
201
192
|
// Static data for SVG attribute replacements.
|
202
|
-
//
|
193
|
+
// http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#adjust-svg-attributes
|
203
194
|
static const ReplacementEntry kSvgAttributeReplacements[] = {
|
204
195
|
REPLACEMENT_ENTRY("attributename", "attributeName"),
|
205
196
|
REPLACEMENT_ENTRY("attributetype", "attributeType"),
|
@@ -207,12 +198,12 @@ static const ReplacementEntry kSvgAttributeReplacements[] = {
|
|
207
198
|
REPLACEMENT_ENTRY("baseprofile", "baseProfile"),
|
208
199
|
REPLACEMENT_ENTRY("calcmode", "calcMode"),
|
209
200
|
REPLACEMENT_ENTRY("clippathunits", "clipPathUnits"),
|
210
|
-
|
211
|
-
|
201
|
+
REPLACEMENT_ENTRY("contentscripttype", "contentScriptType"),
|
202
|
+
REPLACEMENT_ENTRY("contentstyletype", "contentStyleType"),
|
212
203
|
REPLACEMENT_ENTRY("diffuseconstant", "diffuseConstant"),
|
213
204
|
REPLACEMENT_ENTRY("edgemode", "edgeMode"),
|
214
|
-
|
215
|
-
|
205
|
+
REPLACEMENT_ENTRY("externalresourcesrequired", "externalResourcesRequired"),
|
206
|
+
REPLACEMENT_ENTRY("filterres", "filterRes"),
|
216
207
|
REPLACEMENT_ENTRY("filterunits", "filterUnits"),
|
217
208
|
REPLACEMENT_ENTRY("glyphref", "glyphRef"),
|
218
209
|
REPLACEMENT_ENTRY("gradienttransform", "gradientTransform"),
|
@@ -380,9 +371,6 @@ typedef struct GumboInternalParserState {
|
|
380
371
|
GumboNode* _head_element;
|
381
372
|
GumboNode* _form_element;
|
382
373
|
|
383
|
-
// The element used as fragment context when parsing in fragment mode
|
384
|
-
GumboNode* _fragment_ctx;
|
385
|
-
|
386
374
|
// The flag for when the spec says "Reprocess the current token in..."
|
387
375
|
bool _reprocess_current_token;
|
388
376
|
|
@@ -411,10 +399,6 @@ typedef struct GumboInternalParserState {
|
|
411
399
|
// The current token.
|
412
400
|
GumboToken* _current_token;
|
413
401
|
|
414
|
-
// The current (most recently inserted) node. This is used to link together
|
415
|
-
// nodes in document order.
|
416
|
-
GumboNode* _current_node;
|
417
|
-
|
418
402
|
// The way that the spec is written, the </body> and </html> tags are *always*
|
419
403
|
// implicit, because encountering one of those tokens merely switches the
|
420
404
|
// insertion mode out of "in body". So we have individual state flags for
|
@@ -467,17 +451,7 @@ static void set_frameset_not_ok(GumboParser* parser) {
|
|
467
451
|
}
|
468
452
|
|
469
453
|
static GumboNode* create_node(GumboParser* parser, GumboNodeType type) {
|
470
|
-
GumboParserState* state = parser->_parser_state;
|
471
454
|
GumboNode* node = gumbo_parser_allocate(parser, sizeof(GumboNode));
|
472
|
-
|
473
|
-
node->next = NULL;
|
474
|
-
node->prev = state->_current_node;
|
475
|
-
if (state->_current_node != NULL) {
|
476
|
-
// May be null for the initial document node.
|
477
|
-
state->_current_node->next = node;
|
478
|
-
}
|
479
|
-
state->_current_node = node;
|
480
|
-
|
481
455
|
node->parent = NULL;
|
482
456
|
node->index_within_parent = -1;
|
483
457
|
node->type = type;
|
@@ -524,9 +498,7 @@ static void parser_state_init(GumboParser* parser) {
|
|
524
498
|
gumbo_vector_init(parser, 5, &parser_state->_template_insertion_modes);
|
525
499
|
parser_state->_head_element = NULL;
|
526
500
|
parser_state->_form_element = NULL;
|
527
|
-
parser_state->_fragment_ctx = NULL;
|
528
501
|
parser_state->_current_token = NULL;
|
529
|
-
parser_state->_current_node = NULL;
|
530
502
|
parser_state->_closed_body_tag = false;
|
531
503
|
parser_state->_closed_html_tag = false;
|
532
504
|
parser->_parser_state = parser_state;
|
@@ -534,25 +506,17 @@ static void parser_state_init(GumboParser* parser) {
|
|
534
506
|
|
535
507
|
static void parser_state_destroy(GumboParser* parser) {
|
536
508
|
GumboParserState* state = parser->_parser_state;
|
537
|
-
if (state->_fragment_ctx) {
|
538
|
-
destroy_node(parser, state->_fragment_ctx);
|
539
|
-
}
|
540
509
|
gumbo_vector_destroy(parser, &state->_active_formatting_elements);
|
541
510
|
gumbo_vector_destroy(parser, &state->_open_elements);
|
542
511
|
gumbo_vector_destroy(parser, &state->_template_insertion_modes);
|
543
512
|
gumbo_string_buffer_destroy(parser, &state->_text_node._buffer);
|
544
513
|
gumbo_parser_deallocate(parser, state);
|
545
|
-
parser->_parser_state = NULL;
|
546
514
|
}
|
547
515
|
|
548
516
|
static GumboNode* get_document_node(GumboParser* parser) {
|
549
517
|
return parser->_output->document;
|
550
518
|
}
|
551
519
|
|
552
|
-
static bool is_fragment_parser(const GumboParser *parser) {
|
553
|
-
return !!parser->_parser_state->_fragment_ctx;
|
554
|
-
}
|
555
|
-
|
556
520
|
// Returns the node at the bottom of the stack of open elements, or NULL if no
|
557
521
|
// elements have been added yet.
|
558
522
|
static GumboNode* get_current_node(GumboParser* parser) {
|
@@ -566,14 +530,6 @@ static GumboNode* get_current_node(GumboParser* parser) {
|
|
566
530
|
return open_elements->data[open_elements->length - 1];
|
567
531
|
}
|
568
532
|
|
569
|
-
static GumboNode* get_adjusted_current_node(GumboParser* parser) {
|
570
|
-
GumboParserState *state = parser->_parser_state;
|
571
|
-
if (state->_open_elements.length == 1 && state->_fragment_ctx) {
|
572
|
-
return state->_fragment_ctx;
|
573
|
-
}
|
574
|
-
return get_current_node(parser);
|
575
|
-
}
|
576
|
-
|
577
533
|
// Returns true if the given needle is in the given array of literal
|
578
534
|
// GumboStringPieces. If exact_match is true, this requires that they match
|
579
535
|
// exactly; otherwise, this performs a prefix match to check if any of the
|
@@ -594,80 +550,55 @@ static void set_insertion_mode(GumboParser* parser, GumboInsertionMode mode) {
|
|
594
550
|
parser->_parser_state->_insertion_mode = mode;
|
595
551
|
}
|
596
552
|
|
597
|
-
|
598
553
|
// http://www.whatwg.org/specs/web-apps/current-work/complete/parsing.html#reset-the-insertion-mode-appropriately
|
599
554
|
// This is a helper function that returns the appropriate insertion mode instead
|
600
555
|
// of setting it. Returns GUMBO_INSERTION_MODE_INITIAL as a sentinel value to
|
601
556
|
// indicate that there is no appropriate insertion mode, and the loop should
|
602
557
|
// continue.
|
603
|
-
static GumboInsertionMode get_appropriate_insertion_mode(
|
604
|
-
|
605
|
-
|
606
|
-
const bool is_last = index == 0;
|
607
|
-
|
608
|
-
if (is_last && is_fragment_parser(parser)) {
|
609
|
-
node = parser->_parser_state->_fragment_ctx;
|
610
|
-
}
|
558
|
+
static GumboInsertionMode get_appropriate_insertion_mode(
|
559
|
+
const GumboNode* node, bool is_last) {
|
560
|
+
assert(node->type == GUMBO_NODE_ELEMENT);
|
611
561
|
|
612
|
-
|
613
|
-
|
614
|
-
|
615
|
-
|
616
|
-
|
617
|
-
|
618
|
-
|
619
|
-
|
620
|
-
|
621
|
-
|
622
|
-
|
623
|
-
|
624
|
-
|
625
|
-
|
626
|
-
|
627
|
-
|
628
|
-
|
629
|
-
|
630
|
-
|
631
|
-
|
632
|
-
|
633
|
-
|
634
|
-
|
635
|
-
|
636
|
-
|
637
|
-
|
638
|
-
|
639
|
-
|
640
|
-
|
641
|
-
|
642
|
-
|
643
|
-
|
644
|
-
return GUMBO_INSERTION_MODE_IN_TABLE;
|
645
|
-
case GUMBO_TAG_TEMPLATE:
|
646
|
-
return get_current_template_insertion_mode(parser);
|
647
|
-
case GUMBO_TAG_HEAD:
|
648
|
-
if (!is_last) return GUMBO_INSERTION_MODE_IN_HEAD;
|
649
|
-
break;
|
650
|
-
case GUMBO_TAG_BODY:
|
651
|
-
return GUMBO_INSERTION_MODE_IN_BODY;
|
652
|
-
case GUMBO_TAG_FRAMESET:
|
653
|
-
return GUMBO_INSERTION_MODE_IN_FRAMESET;
|
654
|
-
case GUMBO_TAG_HTML:
|
655
|
-
return parser->_parser_state->_head_element ?
|
656
|
-
GUMBO_INSERTION_MODE_AFTER_HEAD : GUMBO_INSERTION_MODE_BEFORE_HEAD;
|
657
|
-
default:
|
658
|
-
break;
|
659
|
-
}
|
660
|
-
return is_last ?
|
661
|
-
GUMBO_INSERTION_MODE_IN_BODY : GUMBO_INSERTION_MODE_INITIAL;
|
562
|
+
if (node->v.element.tag_namespace == GUMBO_NAMESPACE_HTML) {
|
563
|
+
switch (node->v.element.tag) {
|
564
|
+
case GUMBO_TAG_SELECT:
|
565
|
+
return GUMBO_INSERTION_MODE_IN_SELECT;
|
566
|
+
case GUMBO_TAG_TD:
|
567
|
+
case GUMBO_TAG_TH:
|
568
|
+
return is_last ?
|
569
|
+
GUMBO_INSERTION_MODE_IN_BODY : GUMBO_INSERTION_MODE_IN_CELL;
|
570
|
+
case GUMBO_TAG_TR:
|
571
|
+
return GUMBO_INSERTION_MODE_IN_ROW;
|
572
|
+
case GUMBO_TAG_TBODY:
|
573
|
+
case GUMBO_TAG_THEAD:
|
574
|
+
case GUMBO_TAG_TFOOT:
|
575
|
+
return GUMBO_INSERTION_MODE_IN_TABLE_BODY;
|
576
|
+
case GUMBO_TAG_CAPTION:
|
577
|
+
return GUMBO_INSERTION_MODE_IN_CAPTION;
|
578
|
+
case GUMBO_TAG_COLGROUP:
|
579
|
+
return GUMBO_INSERTION_MODE_IN_COLUMN_GROUP;
|
580
|
+
case GUMBO_TAG_TABLE:
|
581
|
+
return GUMBO_INSERTION_MODE_IN_TABLE;
|
582
|
+
case GUMBO_TAG_HEAD:
|
583
|
+
case GUMBO_TAG_BODY:
|
584
|
+
return GUMBO_INSERTION_MODE_IN_BODY;
|
585
|
+
case GUMBO_TAG_FRAMESET:
|
586
|
+
return GUMBO_INSERTION_MODE_IN_FRAMESET;
|
587
|
+
case GUMBO_TAG_HTML:
|
588
|
+
return GUMBO_INSERTION_MODE_BEFORE_HEAD;
|
589
|
+
default:
|
590
|
+
break;
|
591
|
+
}
|
592
|
+
}
|
593
|
+
return is_last ? GUMBO_INSERTION_MODE_IN_BODY : GUMBO_INSERTION_MODE_INITIAL;
|
662
594
|
}
|
663
595
|
|
664
|
-
|
665
596
|
// This performs the actual "reset the insertion mode" loop.
|
666
597
|
static void reset_insertion_mode_appropriately(GumboParser* parser) {
|
667
598
|
const GumboVector* open_elements = &parser->_parser_state->_open_elements;
|
668
599
|
for (int i = open_elements->length; --i >= 0; ) {
|
669
600
|
GumboInsertionMode mode =
|
670
|
-
get_appropriate_insertion_mode(
|
601
|
+
get_appropriate_insertion_mode(open_elements->data[i], i == 0);
|
671
602
|
if (mode != GUMBO_INSERTION_MODE_INITIAL) {
|
672
603
|
set_insertion_mode(parser, mode);
|
673
604
|
return;
|
@@ -701,7 +632,7 @@ static GumboError* parser_add_parse_error(GumboParser* parser, const GumboToken*
|
|
701
632
|
&extra_data->tag_stack);
|
702
633
|
for (int i = 0; i < state->_open_elements.length; ++i) {
|
703
634
|
const GumboNode* node = state->_open_elements.data[i];
|
704
|
-
assert(node->type == GUMBO_NODE_ELEMENT
|
635
|
+
assert(node->type == GUMBO_NODE_ELEMENT);
|
705
636
|
gumbo_vector_add(parser, (void*) node->v.element.tag,
|
706
637
|
&extra_data->tag_stack);
|
707
638
|
}
|
@@ -738,7 +669,7 @@ static bool tag_is(const GumboToken* token, bool is_start, GumboTag tag) {
|
|
738
669
|
// Like tag_in, but checks for the tag of a node, rather than a token.
|
739
670
|
static bool node_tag_in_set(const GumboNode* node, const gumbo_tagset tags) {
|
740
671
|
assert(node != NULL);
|
741
|
-
if (node->type != GUMBO_NODE_ELEMENT
|
672
|
+
if (node->type != GUMBO_NODE_ELEMENT) {
|
742
673
|
return false;
|
743
674
|
}
|
744
675
|
return TAGSET_INCLUDES(tags, node->v.element.tag_namespace, node->v.element.tag);
|
@@ -747,7 +678,7 @@ static bool node_tag_in_set(const GumboNode* node, const gumbo_tagset tags) {
|
|
747
678
|
|
748
679
|
// Like node_tag_in, but for the single-tag case.
|
749
680
|
static bool node_qualified_tag_is(const GumboNode* node, GumboNamespaceEnum ns, GumboTag tag) {
|
750
|
-
return
|
681
|
+
return node->type == GUMBO_NODE_ELEMENT &&
|
751
682
|
node->v.element.tag == tag &&
|
752
683
|
node->v.element.tag_namespace == ns;
|
753
684
|
}
|
@@ -758,23 +689,6 @@ static bool node_html_tag_is(const GumboNode* node, GumboTag tag)
|
|
758
689
|
return node_qualified_tag_is(node, GUMBO_NAMESPACE_HTML, tag);
|
759
690
|
}
|
760
691
|
|
761
|
-
static void push_template_insertion_mode(GumboParser* parser, GumboInsertionMode mode) {
|
762
|
-
gumbo_vector_add(parser, (void*) mode, &parser->_parser_state->_template_insertion_modes);
|
763
|
-
}
|
764
|
-
|
765
|
-
static void pop_template_insertion_mode(GumboParser* parser) {
|
766
|
-
gumbo_vector_pop(parser, &parser->_parser_state->_template_insertion_modes);
|
767
|
-
}
|
768
|
-
|
769
|
-
// Returns the current template insertion mode. If the stack of template
|
770
|
-
// insertion modes is empty, this returns GUMBO_INSERTION_MODE_INITIAL.
|
771
|
-
static GumboInsertionMode get_current_template_insertion_mode(const GumboParser* parser) {
|
772
|
-
GumboVector* template_insertion_modes = &parser->_parser_state->_template_insertion_modes;
|
773
|
-
if (template_insertion_modes->length == 0) {
|
774
|
-
return GUMBO_INSERTION_MODE_INITIAL;
|
775
|
-
}
|
776
|
-
return (GumboInsertionMode) template_insertion_modes->data[(template_insertion_modes->length - 1)];
|
777
|
-
}
|
778
692
|
|
779
693
|
// http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#mathml-text-integration-point
|
780
694
|
static bool is_mathml_integration_point(const GumboNode* node) {
|
@@ -792,63 +706,6 @@ static bool is_html_integration_point(const GumboNode* node) {
|
|
792
706
|
"encoding", "application/xhtml+xml")));
|
793
707
|
}
|
794
708
|
|
795
|
-
|
796
|
-
// This represents a place to insert a node, consisting of a target parent and a
|
797
|
-
// child index within that parent. If the node should be inserted at the end of
|
798
|
-
// the parent's child, index will be -1.
|
799
|
-
typedef struct {
|
800
|
-
GumboNode* target;
|
801
|
-
int index;
|
802
|
-
} InsertionLocation;
|
803
|
-
|
804
|
-
InsertionLocation get_appropriate_insertion_location(GumboParser* parser, GumboNode* override_target) {
|
805
|
-
InsertionLocation retval = { override_target, -1 };
|
806
|
-
if (retval.target == NULL) {
|
807
|
-
// No override target; default to the current node, but special-case the
|
808
|
-
// root node since get_current_node() assumes the stack of open elements is
|
809
|
-
// non-empty.
|
810
|
-
retval.target = parser->_output->root != NULL ?
|
811
|
-
get_current_node(parser) : get_document_node(parser);
|
812
|
-
}
|
813
|
-
if (!parser->_parser_state->_foster_parent_insertions ||
|
814
|
-
!node_tag_in_set(retval.target, (gumbo_tagset) { TAG(TABLE), TAG(TBODY),
|
815
|
-
TAG(TFOOT), TAG(THEAD), TAG(TR) })) {
|
816
|
-
return retval;
|
817
|
-
}
|
818
|
-
|
819
|
-
// Foster-parenting case.
|
820
|
-
int last_template_index = -1;
|
821
|
-
int last_table_index = -1;
|
822
|
-
GumboVector* open_elements = &parser->_parser_state->_open_elements;
|
823
|
-
for (int i = 0; i < open_elements->length; ++i) {
|
824
|
-
if (node_html_tag_is(open_elements->data[i], GUMBO_TAG_TEMPLATE)) {
|
825
|
-
last_template_index = i;
|
826
|
-
}
|
827
|
-
if (node_html_tag_is(open_elements->data[i], GUMBO_TAG_TABLE)) {
|
828
|
-
last_table_index = i;
|
829
|
-
}
|
830
|
-
}
|
831
|
-
if (last_template_index != -1 &&
|
832
|
-
(last_table_index == -1 || last_template_index > last_table_index)) {
|
833
|
-
retval.target = open_elements->data[last_template_index];
|
834
|
-
return retval;
|
835
|
-
}
|
836
|
-
if (last_table_index == -1) {
|
837
|
-
retval.target = open_elements->data[0];
|
838
|
-
return retval;
|
839
|
-
}
|
840
|
-
GumboNode* last_table = open_elements->data[last_table_index];
|
841
|
-
if (last_table->parent != NULL) {
|
842
|
-
retval.target = last_table->parent;
|
843
|
-
retval.index = last_table->index_within_parent;
|
844
|
-
return retval;
|
845
|
-
}
|
846
|
-
|
847
|
-
retval.target = open_elements->data[last_table_index - 1];
|
848
|
-
return retval;
|
849
|
-
}
|
850
|
-
|
851
|
-
|
852
709
|
// Appends a node to the end of its parent, setting the "parent" and
|
853
710
|
// "index_within_parent" fields appropriately.
|
854
711
|
static void append_node(
|
@@ -856,7 +713,7 @@ static void append_node(
|
|
856
713
|
assert(node->parent == NULL);
|
857
714
|
assert(node->index_within_parent == -1);
|
858
715
|
GumboVector* children;
|
859
|
-
if (parent->type == GUMBO_NODE_ELEMENT
|
716
|
+
if (parent->type == GUMBO_NODE_ELEMENT) {
|
860
717
|
children = &parent->v.element.children;
|
861
718
|
} else {
|
862
719
|
assert(parent->type == GUMBO_NODE_DOCUMENT);
|
@@ -868,44 +725,66 @@ static void append_node(
|
|
868
725
|
assert(node->index_within_parent < children->length);
|
869
726
|
}
|
870
727
|
|
871
|
-
// Inserts a node at the specified
|
728
|
+
// Inserts a node at the specified index within its parent, updating the
|
872
729
|
// "parent" and "index_within_parent" fields of it and all its siblings.
|
873
|
-
// If the index of the location is -1, this calls append_node.
|
874
730
|
static void insert_node(
|
875
|
-
|
731
|
+
GumboParser* parser, GumboNode* parent, int index, GumboNode* node) {
|
876
732
|
assert(node->parent == NULL);
|
877
733
|
assert(node->index_within_parent == -1);
|
878
|
-
|
879
|
-
|
880
|
-
|
881
|
-
|
882
|
-
|
883
|
-
|
884
|
-
|
885
|
-
|
886
|
-
|
887
|
-
|
888
|
-
|
889
|
-
|
890
|
-
|
734
|
+
assert(parent->type == GUMBO_NODE_ELEMENT);
|
735
|
+
GumboVector* children = &parent->v.element.children;
|
736
|
+
assert(index >= 0);
|
737
|
+
assert(index < children->length);
|
738
|
+
node->parent = parent;
|
739
|
+
node->index_within_parent = index;
|
740
|
+
gumbo_vector_insert_at(parser, (void*) node, index, children);
|
741
|
+
assert(node->index_within_parent < children->length);
|
742
|
+
for (int i = index + 1; i < children->length; ++i) {
|
743
|
+
GumboNode* sibling = children->data[i];
|
744
|
+
sibling->index_within_parent = i;
|
745
|
+
assert(sibling->index_within_parent < children->length);
|
746
|
+
}
|
747
|
+
}
|
891
748
|
|
892
|
-
|
893
|
-
|
894
|
-
|
895
|
-
|
896
|
-
|
897
|
-
|
898
|
-
|
899
|
-
|
900
|
-
|
901
|
-
|
749
|
+
// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#foster-parenting
|
750
|
+
static void foster_parent_element(GumboParser* parser, GumboNode* node) {
|
751
|
+
GumboVector* open_elements = &parser->_parser_state->_open_elements;
|
752
|
+
assert(open_elements->length > 2);
|
753
|
+
|
754
|
+
node->parse_flags |= GUMBO_INSERTION_FOSTER_PARENTED;
|
755
|
+
GumboNode* foster_parent_element = open_elements->data[0];
|
756
|
+
assert(foster_parent_element->type == GUMBO_NODE_ELEMENT);
|
757
|
+
assert(node_html_tag_is(foster_parent_element, GUMBO_TAG_HTML));
|
758
|
+
for (int i = open_elements->length; --i > 1; ) {
|
759
|
+
GumboNode* table_element = open_elements->data[i];
|
760
|
+
if (node_html_tag_is(table_element, GUMBO_TAG_TABLE)) {
|
761
|
+
foster_parent_element = table_element->parent;
|
762
|
+
if (!foster_parent_element ||
|
763
|
+
foster_parent_element->type != GUMBO_NODE_ELEMENT) {
|
764
|
+
// Table has no parent; spec says it's possible if a script manipulated
|
765
|
+
// the DOM, although I don't think we have to worry about this case.
|
766
|
+
gumbo_debug("Table has no parent.\n");
|
767
|
+
foster_parent_element = open_elements->data[i - 1];
|
768
|
+
break;
|
769
|
+
}
|
770
|
+
assert(foster_parent_element->type == GUMBO_NODE_ELEMENT);
|
771
|
+
gumbo_debug("Found enclosing table (%x) at %d; parent=%s, index=%d.\n",
|
772
|
+
table_element, i, gumbo_normalized_tagname(
|
773
|
+
foster_parent_element->v.element.tag),
|
774
|
+
table_element->index_within_parent);
|
775
|
+
assert(foster_parent_element->v.element.children.data[
|
776
|
+
table_element->index_within_parent] == table_element);
|
777
|
+
insert_node(parser, foster_parent_element,
|
778
|
+
table_element->index_within_parent, node);
|
779
|
+
return;
|
902
780
|
}
|
903
|
-
} else {
|
904
|
-
append_node(parser, parent, node);
|
905
781
|
}
|
782
|
+
if (node->type == GUMBO_NODE_ELEMENT) {
|
783
|
+
gumbo_vector_add(parser, (void*) node, open_elements);
|
784
|
+
}
|
785
|
+
append_node(parser, foster_parent_element, node);
|
906
786
|
}
|
907
787
|
|
908
|
-
|
909
788
|
static void maybe_flush_text_node_buffer(GumboParser* parser) {
|
910
789
|
GumboParserState* state = parser->_parser_state;
|
911
790
|
TextNodeBufferState* buffer_state = &state->_text_node;
|
@@ -925,20 +804,20 @@ static void maybe_flush_text_node_buffer(GumboParser* parser) {
|
|
925
804
|
state->_current_token->original_text.data -
|
926
805
|
buffer_state->_start_original_text;
|
927
806
|
text_node_data->start_pos = buffer_state->_start_position;
|
928
|
-
|
929
|
-
|
930
|
-
|
931
|
-
|
932
|
-
InsertionLocation location = get_appropriate_insertion_location(parser, NULL);
|
933
|
-
if (location.target->type == GUMBO_NODE_DOCUMENT) {
|
934
|
-
// The DOM does not allow Document nodes to have Text children, so per the
|
935
|
-
// spec, they are dropped on the floor.
|
936
|
-
destroy_node(parser, text_node);
|
807
|
+
if (state->_foster_parent_insertions &&
|
808
|
+
node_tag_in_set(get_current_node(parser), (gumbo_tagset) { TAG(TABLE), TAG(TBODY), TAG(TFOOT),
|
809
|
+
TAG(THEAD), TAG(TR) })) {
|
810
|
+
foster_parent_element(parser, text_node);
|
937
811
|
} else {
|
938
|
-
|
812
|
+
append_node(
|
813
|
+
parser, parser->_output->root ?
|
814
|
+
get_current_node(parser) : parser->_output->document, text_node);
|
939
815
|
}
|
816
|
+
gumbo_debug("Flushing text node buffer of %.*s.\n",
|
817
|
+
(int) buffer_state->_buffer.length, buffer_state->_buffer.data);
|
940
818
|
|
941
|
-
|
819
|
+
gumbo_string_buffer_destroy(parser, &buffer_state->_buffer);
|
820
|
+
gumbo_string_buffer_init(parser, &buffer_state->_buffer);
|
942
821
|
buffer_state->_type = GUMBO_NODE_WHITESPACE;
|
943
822
|
assert(buffer_state->_buffer.length == 0);
|
944
823
|
}
|
@@ -965,7 +844,7 @@ static GumboNode* pop_current_node(GumboParser* parser) {
|
|
965
844
|
assert(state->_open_elements.length == 0);
|
966
845
|
return NULL;
|
967
846
|
}
|
968
|
-
assert(current_node->type == GUMBO_NODE_ELEMENT
|
847
|
+
assert(current_node->type == GUMBO_NODE_ELEMENT);
|
969
848
|
bool is_closed_body_or_html_tag =
|
970
849
|
(node_html_tag_is(current_node, GUMBO_TAG_BODY) && state->_closed_body_tag) ||
|
971
850
|
(node_html_tag_is(current_node, GUMBO_TAG_HTML) && state->_closed_html_tag);
|
@@ -994,14 +873,14 @@ static void append_comment_node(
|
|
994
873
|
|
995
874
|
// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#clear-the-stack-back-to-a-table-row-context
|
996
875
|
static void clear_stack_to_table_row_context(GumboParser* parser) {
|
997
|
-
while (!node_tag_in_set(get_current_node(parser), (gumbo_tagset) { TAG(HTML), TAG(TR)
|
876
|
+
while (!node_tag_in_set(get_current_node(parser), (gumbo_tagset) { TAG(HTML), TAG(TR) })) {
|
998
877
|
pop_current_node(parser);
|
999
878
|
}
|
1000
879
|
}
|
1001
880
|
|
1002
881
|
// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#clear-the-stack-back-to-a-table-context
|
1003
882
|
static void clear_stack_to_table_context(GumboParser* parser) {
|
1004
|
-
while (!node_tag_in_set(get_current_node(parser), (gumbo_tagset) { TAG(HTML), TAG(TABLE)
|
883
|
+
while (!node_tag_in_set(get_current_node(parser), (gumbo_tagset) { TAG(HTML), TAG(TABLE) } )) {
|
1005
884
|
pop_current_node(parser);
|
1006
885
|
}
|
1007
886
|
}
|
@@ -1009,7 +888,7 @@ static void clear_stack_to_table_context(GumboParser* parser) {
|
|
1009
888
|
// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#clear-the-stack-back-to-a-table-body-context
|
1010
889
|
void clear_stack_to_table_body_context(GumboParser* parser) {
|
1011
890
|
while (!node_tag_in_set(get_current_node(parser), (gumbo_tagset) { TAG(HTML), TAG(TBODY),
|
1012
|
-
TAG(TFOOT), TAG(THEAD)
|
891
|
+
TAG(TFOOT), TAG(THEAD) })) {
|
1013
892
|
pop_current_node(parser);
|
1014
893
|
}
|
1015
894
|
}
|
@@ -1024,8 +903,7 @@ static GumboNode* create_element(GumboParser* parser, GumboTag tag) {
|
|
1024
903
|
element->tag_namespace = GUMBO_NAMESPACE_HTML;
|
1025
904
|
element->original_tag = kGumboEmptyString;
|
1026
905
|
element->original_end_tag = kGumboEmptyString;
|
1027
|
-
element->start_pos =
|
1028
|
-
parser->_parser_state->_current_token->position : kGumboEmptySourcePosition;
|
906
|
+
element->start_pos = parser->_parser_state->_current_token->position;
|
1029
907
|
element->end_pos = kGumboEmptySourcePosition;
|
1030
908
|
return node;
|
1031
909
|
}
|
@@ -1036,12 +914,7 @@ static GumboNode* create_element_from_token(
|
|
1036
914
|
assert(token->type == GUMBO_TOKEN_START_TAG);
|
1037
915
|
GumboTokenStartTag* start_tag = &token->v.start_tag;
|
1038
916
|
|
1039
|
-
|
1040
|
-
tag_namespace == GUMBO_NAMESPACE_HTML &&
|
1041
|
-
start_tag->tag == GUMBO_TAG_TEMPLATE)
|
1042
|
-
? GUMBO_NODE_TEMPLATE : GUMBO_NODE_ELEMENT;
|
1043
|
-
|
1044
|
-
GumboNode* node = create_node(parser, type);
|
917
|
+
GumboNode* node = create_node(parser, GUMBO_NODE_ELEMENT);
|
1045
918
|
GumboElement* element = &node->v.element;
|
1046
919
|
gumbo_vector_init(parser, 1, &element->children);
|
1047
920
|
element->attributes = start_tag->attributes;
|
@@ -1078,9 +951,20 @@ static void insert_element(GumboParser* parser, GumboNode* node,
|
|
1078
951
|
if (!is_reconstructing_formatting_elements) {
|
1079
952
|
maybe_flush_text_node_buffer(parser);
|
1080
953
|
}
|
1081
|
-
|
1082
|
-
|
1083
|
-
|
954
|
+
if (state->_foster_parent_insertions &&
|
955
|
+
node_tag_in_set(get_current_node(parser), (gumbo_tagset) { TAG(TABLE), TAG(TBODY), TAG(TFOOT),
|
956
|
+
TAG(THEAD), TAG(TR) } )) {
|
957
|
+
foster_parent_element(parser, node);
|
958
|
+
gumbo_vector_add(parser, (void*) node, &state->_open_elements);
|
959
|
+
return;
|
960
|
+
}
|
961
|
+
|
962
|
+
// This is called to insert the root HTML element, but get_current_node
|
963
|
+
// assumes the stack of open elements is non-empty, so we need special
|
964
|
+
// handling for this case.
|
965
|
+
append_node(
|
966
|
+
parser, parser->_output->root ?
|
967
|
+
get_current_node(parser) : parser->_output->document, node);
|
1084
968
|
gumbo_vector_add(parser, (void*) node, &state->_open_elements);
|
1085
969
|
}
|
1086
970
|
|
@@ -1253,7 +1137,7 @@ static bool is_open_element(GumboParser* parser, const GumboNode* node) {
|
|
1253
1137
|
// values are fresh copies.
|
1254
1138
|
GumboNode* clone_node(
|
1255
1139
|
GumboParser* parser, const GumboNode* node, GumboParseFlags reason) {
|
1256
|
-
assert(node->type == GUMBO_NODE_ELEMENT
|
1140
|
+
assert(node->type == GUMBO_NODE_ELEMENT);
|
1257
1141
|
GumboNode* new_node = gumbo_parser_allocate(parser, sizeof(GumboNode));
|
1258
1142
|
*new_node = *node;
|
1259
1143
|
new_node->parent = NULL;
|
@@ -1323,10 +1207,7 @@ static void reconstruct_active_formatting_elements(GumboParser* parser) {
|
|
1323
1207
|
GumboNode* clone = clone_node(
|
1324
1208
|
parser, element, GUMBO_INSERTION_RECONSTRUCTED_FORMATTING_ELEMENT);
|
1325
1209
|
// Step 9.
|
1326
|
-
|
1327
|
-
insert_node(parser, clone, location);
|
1328
|
-
gumbo_vector_add(parser, (void*) clone, &parser->_parser_state->_open_elements);
|
1329
|
-
|
1210
|
+
insert_element(parser, clone, true);
|
1330
1211
|
// Step 10.
|
1331
1212
|
elements->data[i] = clone;
|
1332
1213
|
gumbo_debug("Reconstructed %s element at %d.\n",
|
@@ -1380,40 +1261,37 @@ static GumboQuirksModeEnum compute_quirks_mode(
|
|
1380
1261
|
// names. For example, "has an element in list scope" looks for an element of
|
1381
1262
|
// the given qualified name within the nearest enclosing <ol> or <ul>, along
|
1382
1263
|
// with a bunch of generic element types that serve to "firewall" their content
|
1383
|
-
// from the rest of the document.
|
1384
|
-
|
1385
|
-
static bool has_an_element_in_specific_scope(GumboParser* parser,
|
1386
|
-
int expected_size, const GumboTag *expected, bool negate, const gumbo_tagset tags) {
|
1264
|
+
// from the rest of the document.
|
1265
|
+
static bool has_an_element_in_specific_scope(GumboParser* parser, gumbo_tagset expected, bool negate, const gumbo_tagset tags) {
|
1387
1266
|
GumboVector* open_elements = &parser->_parser_state->_open_elements;
|
1267
|
+
bool result = false;
|
1388
1268
|
for (int i = open_elements->length; --i >= 0; ) {
|
1389
1269
|
const GumboNode* node = open_elements->data[i];
|
1390
|
-
if (node->type != GUMBO_NODE_ELEMENT
|
1270
|
+
if (node->type != GUMBO_NODE_ELEMENT) {
|
1391
1271
|
continue;
|
1392
|
-
|
1393
|
-
GumboTag node_tag = node->v.element.tag;
|
1394
|
-
GumboNamespaceEnum node_ns = node->v.element.tag_namespace;
|
1395
|
-
for (int j = 0; j < expected_size; ++j) {
|
1396
|
-
if (node_tag == expected[j] && node_ns == GUMBO_NAMESPACE_HTML)
|
1397
|
-
return true;
|
1398
1272
|
}
|
1399
|
-
|
1400
|
-
|
1401
|
-
|
1402
|
-
|
1273
|
+
if (TAGSET_INCLUDES(expected, node->v.element.tag_namespace, node->v.element.tag)) {
|
1274
|
+
return true;
|
1275
|
+
}
|
1276
|
+
bool found_qualname = false;
|
1277
|
+
if (TAGSET_INCLUDES(tags, node->v.element.tag_namespace, node->v.element.tag)) {
|
1278
|
+
found_qualname = true;
|
1279
|
+
}
|
1280
|
+
if (negate != found_qualname) {
|
1281
|
+
result = false;
|
1282
|
+
return result;
|
1283
|
+
}
|
1403
1284
|
}
|
1404
|
-
return
|
1405
|
-
}
|
1406
|
-
|
1407
|
-
// Checks for the presence of an open element of the specified tag type.
|
1408
|
-
static bool has_open_element(GumboParser* parser, GumboTag tag) {
|
1409
|
-
return has_an_element_in_specific_scope(parser, 1, &tag, false, (gumbo_tagset) { TAG(HTML) } );
|
1285
|
+
return result;
|
1410
1286
|
}
|
1411
1287
|
|
1412
1288
|
// http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#has-an-element-in-scope
|
1413
1289
|
static bool has_an_element_in_scope(GumboParser* parser, GumboTag tag) {
|
1414
|
-
|
1290
|
+
gumbo_tagset qualset = {0};
|
1291
|
+
qualset[(int) tag] = (1 << (int) GUMBO_NAMESPACE_HTML);
|
1292
|
+
return has_an_element_in_specific_scope(parser, qualset, false, (gumbo_tagset) { TAG(APPLET),
|
1415
1293
|
TAG(CAPTION), TAG(HTML), TAG(TABLE), TAG(TD), TAG(TH), TAG(MARQUEE),
|
1416
|
-
TAG(OBJECT),
|
1294
|
+
TAG(OBJECT), TAG_MATHML(MI), TAG_MATHML(MO), TAG_MATHML(MN),
|
1417
1295
|
TAG_MATHML(MS), TAG_MATHML(MTEXT), TAG_MATHML(ANNOTATION_XML),
|
1418
1296
|
TAG_SVG(FOREIGNOBJECT), TAG_SVG(DESC), TAG_SVG(TITLE) });
|
1419
1297
|
}
|
@@ -1431,11 +1309,11 @@ static bool has_node_in_scope(GumboParser* parser, const GumboNode* node) {
|
|
1431
1309
|
if (current == node) {
|
1432
1310
|
return true;
|
1433
1311
|
}
|
1434
|
-
if (current->type != GUMBO_NODE_ELEMENT
|
1312
|
+
if (current->type != GUMBO_NODE_ELEMENT) {
|
1435
1313
|
continue;
|
1436
1314
|
}
|
1437
1315
|
if (node_tag_in_set(current, (gumbo_tagset) { TAG(APPLET), TAG(CAPTION), TAG(HTML),
|
1438
|
-
TAG(TABLE), TAG(TD), TAG(TH), TAG(MARQUEE), TAG(OBJECT),
|
1316
|
+
TAG(TABLE), TAG(TD), TAG(TH), TAG(MARQUEE), TAG(OBJECT),
|
1439
1317
|
TAG_MATHML(MI), TAG_MATHML(MO), TAG_MATHML(MN), TAG_MATHML(MS),
|
1440
1318
|
TAG_MATHML(MTEXT), TAG_MATHML(ANNOTATION_XML), TAG_SVG(FOREIGNOBJECT),
|
1441
1319
|
TAG_SVG(DESC), TAG_SVG(TITLE) } )) {
|
@@ -1448,19 +1326,21 @@ static bool has_node_in_scope(GumboParser* parser, const GumboNode* node) {
|
|
1448
1326
|
|
1449
1327
|
// Like has_an_element_in_scope, but restricts the expected qualified name to a
|
1450
1328
|
// range of possible qualified names instead of just a single one.
|
1451
|
-
static bool has_an_element_in_scope_with_tagname(GumboParser* parser,
|
1452
|
-
return has_an_element_in_specific_scope(parser,
|
1453
|
-
|
1454
|
-
|
1455
|
-
|
1456
|
-
|
1329
|
+
static bool has_an_element_in_scope_with_tagname(GumboParser* parser, gumbo_tagset qualset) {
|
1330
|
+
return has_an_element_in_specific_scope(parser, qualset, false, (gumbo_tagset) { TAG(APPLET),
|
1331
|
+
TAG(CAPTION), TAG(HTML), TAG(TABLE), TAG(TD), TAG(TH), TAG(MARQUEE),
|
1332
|
+
TAG(OBJECT), TAG_MATHML(MI), TAG_MATHML(MO), TAG_MATHML(MN),
|
1333
|
+
TAG_MATHML(MS), TAG_MATHML(MTEXT), TAG_MATHML(ANNOTATION_XML),
|
1334
|
+
TAG_SVG(FOREIGNOBJECT), TAG_SVG(DESC), TAG_SVG(TITLE) });
|
1457
1335
|
}
|
1458
1336
|
|
1459
1337
|
// http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#has-an-element-in-list-item-scope
|
1460
1338
|
static bool has_an_element_in_list_scope(GumboParser* parser, GumboTag tag) {
|
1461
|
-
|
1339
|
+
gumbo_tagset qualset = {0};
|
1340
|
+
qualset[(int)tag] = (1 << (int)(GUMBO_NAMESPACE_HTML));
|
1341
|
+
return has_an_element_in_specific_scope(parser, qualset, false, (gumbo_tagset) { TAG(APPLET),
|
1462
1342
|
TAG(CAPTION), TAG(HTML), TAG(TABLE), TAG(TD), TAG(TH), TAG(MARQUEE),
|
1463
|
-
TAG(OBJECT),
|
1343
|
+
TAG(OBJECT), TAG_MATHML(MI), TAG_MATHML(MO), TAG_MATHML(MN),
|
1464
1344
|
TAG_MATHML(MS), TAG_MATHML(MTEXT), TAG_MATHML(ANNOTATION_XML),
|
1465
1345
|
TAG_SVG(FOREIGNOBJECT), TAG_SVG(DESC), TAG_SVG(TITLE), TAG(OL),
|
1466
1346
|
TAG(UL) });
|
@@ -1468,22 +1348,27 @@ static bool has_an_element_in_list_scope(GumboParser* parser, GumboTag tag) {
|
|
1468
1348
|
|
1469
1349
|
// http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#has-an-element-in-button-scope
|
1470
1350
|
static bool has_an_element_in_button_scope(GumboParser* parser, GumboTag tag) {
|
1471
|
-
|
1351
|
+
gumbo_tagset qualset = {0};
|
1352
|
+
qualset[(int) tag] = (1 << (int)(GUMBO_NAMESPACE_HTML));
|
1353
|
+
return has_an_element_in_specific_scope(parser, qualset, false, (gumbo_tagset) { TAG(APPLET),
|
1472
1354
|
TAG(CAPTION), TAG(HTML), TAG(TABLE), TAG(TD), TAG(TH), TAG(MARQUEE),
|
1473
|
-
TAG(OBJECT),
|
1355
|
+
TAG(OBJECT), TAG_MATHML(MI), TAG_MATHML(MO), TAG_MATHML(MN),
|
1474
1356
|
TAG_MATHML(MS), TAG_MATHML(MTEXT), TAG_MATHML(ANNOTATION_XML),
|
1475
1357
|
TAG_SVG(FOREIGNOBJECT), TAG_SVG(DESC), TAG_SVG(TITLE), TAG(BUTTON) });
|
1476
1358
|
}
|
1477
1359
|
|
1478
1360
|
// http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#has-an-element-in-table-scope
|
1479
1361
|
static bool has_an_element_in_table_scope(GumboParser* parser, GumboTag tag) {
|
1480
|
-
|
1481
|
-
|
1362
|
+
gumbo_tagset qualset = {0};
|
1363
|
+
qualset[(int) tag] = (1 << (int)(GUMBO_NAMESPACE_HTML));
|
1364
|
+
return has_an_element_in_specific_scope(parser, qualset, false, (gumbo_tagset) { TAG(HTML), TAG(TABLE) });
|
1482
1365
|
}
|
1483
1366
|
|
1484
1367
|
// http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#has-an-element-in-select-scope
|
1485
1368
|
static bool has_an_element_in_select_scope(GumboParser* parser, GumboTag tag) {
|
1486
|
-
|
1369
|
+
gumbo_tagset qualset = {0};
|
1370
|
+
qualset[(int) tag] = (1 << (int)(GUMBO_NAMESPACE_HTML));
|
1371
|
+
return has_an_element_in_specific_scope(parser, qualset, true, (gumbo_tagset) { TAG(OPTGROUP), TAG(OPTION) });
|
1487
1372
|
}
|
1488
1373
|
|
1489
1374
|
// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#generate-implied-end-tags
|
@@ -1491,24 +1376,12 @@ static bool has_an_element_in_select_scope(GumboParser* parser, GumboTag tag) {
|
|
1491
1376
|
// Pass GUMBO_TAG_LAST to not exclude any of them.
|
1492
1377
|
static void generate_implied_end_tags(GumboParser* parser, GumboTag exception) {
|
1493
1378
|
for (;
|
1494
|
-
node_tag_in_set(get_current_node(parser), (gumbo_tagset) { TAG(DD),
|
1495
|
-
|
1496
|
-
TAG(RT), TAG(RTC) }) &&
|
1379
|
+
node_tag_in_set(get_current_node(parser), (gumbo_tagset) { TAG(DD), TAG(DT),
|
1380
|
+
TAG(LI), TAG(OPTION), TAG(OPTGROUP), TAG(P), TAG(RP), TAG(RT) }) &&
|
1497
1381
|
!node_html_tag_is(get_current_node(parser), exception);
|
1498
1382
|
pop_current_node(parser));
|
1499
1383
|
}
|
1500
1384
|
|
1501
|
-
// This is the "generate all implied end tags thoroughly" clause of the spec.
|
1502
|
-
// https://html.spec.whatwg.org/multipage/syntax.html#closing-elements-that-have-implied-end-tags
|
1503
|
-
static void generate_all_implied_end_tags_thoroughly(GumboParser* parser) {
|
1504
|
-
for (;
|
1505
|
-
node_tag_in_set(get_current_node(parser), (gumbo_tagset) { TAG(CAPTION),
|
1506
|
-
TAG(COLGROUP), TAG(DD), TAG(DT), TAG(LI), TAG(OPTION), TAG(OPTGROUP),
|
1507
|
-
TAG(P), TAG(RP), TAG(RT), TAG(RTC), TAG(TBODY), TAG(TD),
|
1508
|
-
TAG(TFOOT), TAG(TH), TAG(HEAD), TAG(TR) });
|
1509
|
-
pop_current_node(parser));
|
1510
|
-
}
|
1511
|
-
|
1512
1385
|
// This factors out the clauses relating to "act as if an end tag token with tag
|
1513
1386
|
// name "table" had been seen. Returns true if there's a table element in table
|
1514
1387
|
// scope which was successfully closed, false if not and the token should be
|
@@ -1573,7 +1446,7 @@ static void close_current_select(GumboParser* parser) {
|
|
1573
1446
|
// The list of nodes in the "special" category:
|
1574
1447
|
// http://www.whatwg.org/specs/web-apps/current-work/complete/parsing.html#special
|
1575
1448
|
static bool is_special_node(const GumboNode* node) {
|
1576
|
-
assert(node->type == GUMBO_NODE_ELEMENT
|
1449
|
+
assert(node->type == GUMBO_NODE_ELEMENT);
|
1577
1450
|
return node_tag_in_set(node, (gumbo_tagset) { TAG(ADDRESS), TAG(APPLET), TAG(AREA),
|
1578
1451
|
TAG(ARTICLE), TAG(ASIDE), TAG(BASE), TAG(BASEFONT), TAG(BGSOUND), TAG(BLOCKQUOTE),
|
1579
1452
|
TAG(BODY), TAG(BR), TAG(BUTTON), TAG(CAPTION), TAG(CENTER), TAG(COL),
|
@@ -1585,8 +1458,8 @@ static bool is_special_node(const GumboNode* node) {
|
|
1585
1458
|
TAG(LISTING), TAG(MARQUEE), TAG(MENU), TAG(META), TAG(NAV), TAG(NOEMBED),
|
1586
1459
|
TAG(NOFRAMES), TAG(NOSCRIPT), TAG(OBJECT), TAG(OL), TAG(P), TAG(PARAM),
|
1587
1460
|
TAG(PLAINTEXT), TAG(PRE), TAG(SCRIPT), TAG(SECTION), TAG(SELECT), TAG(STYLE),
|
1588
|
-
TAG(SUMMARY), TAG(TABLE), TAG(TBODY), TAG(TD), TAG(
|
1589
|
-
TAG(
|
1461
|
+
TAG(SUMMARY), TAG(TABLE), TAG(TBODY), TAG(TD), TAG(TEXTAREA), TAG(TFOOT),
|
1462
|
+
TAG(TH), TAG(THEAD), TAG(TITLE), TAG(TR), TAG(UL), TAG(WBR), TAG(XMP),
|
1590
1463
|
|
1591
1464
|
TAG_MATHML(MI), TAG_MATHML(MO), TAG_MATHML(MN), TAG_MATHML(MS),
|
1592
1465
|
TAG_MATHML(MTEXT), TAG_MATHML(ANNOTATION_XML),
|
@@ -1796,20 +1669,13 @@ static void remove_from_parent(GumboParser* parser, GumboNode* node) {
|
|
1796
1669
|
|
1797
1670
|
// http://www.whatwg.org/specs/web-apps/current-work/multipage/the-end.html#an-introduction-to-error-handling-and-strange-cases-in-the-parser
|
1798
1671
|
// Also described in the "in body" handling for end formatting tags.
|
1799
|
-
static bool adoption_agency_algorithm(
|
1672
|
+
static bool adoption_agency_algorithm(
|
1673
|
+
GumboParser* parser, GumboToken* token, GumboTag closing_tag) {
|
1800
1674
|
GumboParserState* state = parser->_parser_state;
|
1801
1675
|
gumbo_debug("Entering adoption agency algorithm.\n");
|
1802
|
-
//
|
1803
|
-
GumboNode* current_node = get_current_node(parser);
|
1804
|
-
if (current_node->v.element.tag_namespace == GUMBO_NAMESPACE_HTML &&
|
1805
|
-
current_node->v.element.tag == subject &&
|
1806
|
-
gumbo_vector_index_of(&state->_active_formatting_elements, current_node) == -1) {
|
1807
|
-
pop_current_node(parser);
|
1808
|
-
return false;
|
1809
|
-
}
|
1810
|
-
// Steps 2-4 & 20:
|
1676
|
+
// Steps 1-3 & 16:
|
1811
1677
|
for (int i = 0; i < 8; ++i) {
|
1812
|
-
// Step
|
1678
|
+
// Step 4.
|
1813
1679
|
GumboNode* formatting_node = NULL;
|
1814
1680
|
int formatting_node_in_open_elements = -1;
|
1815
1681
|
for (int j = state->_active_formatting_elements.length; --j >= 0; ) {
|
@@ -1819,13 +1685,13 @@ static bool adoption_agency_algorithm(GumboParser* parser, GumboToken* token, Gu
|
|
1819
1685
|
// Last scope marker; abort the algorithm.
|
1820
1686
|
return false;
|
1821
1687
|
}
|
1822
|
-
if (
|
1688
|
+
if (current_node->type == GUMBO_NODE_ELEMENT && current_node->v.element.tag == closing_tag) {
|
1823
1689
|
// Found it.
|
1824
1690
|
formatting_node = current_node;
|
1825
1691
|
formatting_node_in_open_elements = gumbo_vector_index_of(
|
1826
|
-
|
1692
|
+
&state->_open_elements, formatting_node);
|
1827
1693
|
gumbo_debug("Formatting element of tag %s at %d.\n",
|
1828
|
-
gumbo_normalized_tagname(
|
1694
|
+
gumbo_normalized_tagname(closing_tag),
|
1829
1695
|
formatting_node_in_open_elements);
|
1830
1696
|
break;
|
1831
1697
|
}
|
@@ -1838,23 +1704,18 @@ static bool adoption_agency_algorithm(GumboParser* parser, GumboToken* token, Gu
|
|
1838
1704
|
return false;
|
1839
1705
|
}
|
1840
1706
|
|
1841
|
-
// Step 6
|
1842
1707
|
if (formatting_node_in_open_elements == -1) {
|
1843
1708
|
gumbo_debug("Formatting node not on stack of open elements.\n");
|
1844
|
-
parser_add_parse_error(parser, token);
|
1845
1709
|
gumbo_vector_remove(parser, formatting_node,
|
1846
1710
|
&state->_active_formatting_elements);
|
1847
1711
|
return false;
|
1848
1712
|
}
|
1849
1713
|
|
1850
|
-
// Step 7
|
1851
1714
|
if (!has_an_element_in_scope(parser, formatting_node->v.element.tag)) {
|
1852
1715
|
parser_add_parse_error(parser, token);
|
1853
1716
|
gumbo_debug("Element not in scope.\n");
|
1854
1717
|
return false;
|
1855
1718
|
}
|
1856
|
-
|
1857
|
-
// Step 8
|
1858
1719
|
if (formatting_node != get_current_node(parser)) {
|
1859
1720
|
parser_add_parse_error(parser, token); // But continue onwards.
|
1860
1721
|
}
|
@@ -1862,20 +1723,20 @@ static bool adoption_agency_algorithm(GumboParser* parser, GumboToken* token, Gu
|
|
1862
1723
|
assert(!node_html_tag_is(formatting_node, GUMBO_TAG_HTML));
|
1863
1724
|
assert(!node_html_tag_is(formatting_node, GUMBO_TAG_BODY));
|
1864
1725
|
|
1865
|
-
// Step
|
1726
|
+
// Step 5 & 6.
|
1866
1727
|
GumboNode* furthest_block = NULL;
|
1867
1728
|
for (int j = formatting_node_in_open_elements;
|
1868
1729
|
j < state->_open_elements.length; ++j) {
|
1869
1730
|
assert(j > 0);
|
1870
1731
|
GumboNode* current = state->_open_elements.data[j];
|
1871
1732
|
if (is_special_node(current)) {
|
1872
|
-
// Step
|
1733
|
+
// Step 5.
|
1873
1734
|
furthest_block = current;
|
1874
1735
|
break;
|
1875
1736
|
}
|
1876
1737
|
}
|
1877
1738
|
if (!furthest_block) {
|
1878
|
-
// Step
|
1739
|
+
// Step 6.
|
1879
1740
|
while (get_current_node(parser) != formatting_node) {
|
1880
1741
|
pop_current_node(parser);
|
1881
1742
|
}
|
@@ -1888,35 +1749,32 @@ static bool adoption_agency_algorithm(GumboParser* parser, GumboToken* token, Gu
|
|
1888
1749
|
assert(!node_html_tag_is(furthest_block, GUMBO_TAG_HTML));
|
1889
1750
|
assert(furthest_block);
|
1890
1751
|
|
1891
|
-
// Step
|
1752
|
+
// Step 7.
|
1892
1753
|
// Elements may be moved and reparented by this algorithm, so
|
1893
1754
|
// common_ancestor is not necessarily the same as formatting_node->parent.
|
1894
1755
|
GumboNode* common_ancestor =
|
1895
|
-
|
1896
|
-
|
1756
|
+
state->_open_elements.data[gumbo_vector_index_of(
|
1757
|
+
&state->_open_elements, formatting_node) - 1];
|
1897
1758
|
gumbo_debug("Common ancestor tag = %s, furthest block tag = %s.\n",
|
1898
1759
|
gumbo_normalized_tagname(common_ancestor->v.element.tag),
|
1899
1760
|
gumbo_normalized_tagname(furthest_block->v.element.tag));
|
1900
1761
|
|
1901
|
-
// Step
|
1762
|
+
// Step 8.
|
1902
1763
|
int bookmark = gumbo_vector_index_of(
|
1903
|
-
|
1904
|
-
|
1905
|
-
// Step 13.
|
1764
|
+
&state->_active_formatting_elements, formatting_node);;
|
1765
|
+
// Step 9.
|
1906
1766
|
GumboNode* node = furthest_block;
|
1907
1767
|
GumboNode* last_node = furthest_block;
|
1908
1768
|
// Must be stored explicitly, in case node is removed from the stack of open
|
1909
1769
|
// elements, to handle step 9.4.
|
1910
1770
|
int saved_node_index = gumbo_vector_index_of(&state->_open_elements, node);
|
1911
1771
|
assert(saved_node_index > 0);
|
1912
|
-
// Step
|
1913
|
-
for (int j = 0
|
1914
|
-
// Step
|
1915
|
-
++j;
|
1916
|
-
// Step 13.3.
|
1772
|
+
// Step 9.1-9.3 & 9.11.
|
1773
|
+
for (int j = 0; j < 3; ++j) {
|
1774
|
+
// Step 9.4.
|
1917
1775
|
int node_index = gumbo_vector_index_of(&state->_open_elements, node);
|
1918
1776
|
gumbo_debug(
|
1919
|
-
|
1777
|
+
"Current index: %d, last index: %d.\n", node_index, saved_node_index);
|
1920
1778
|
if (node_index == -1) {
|
1921
1779
|
node_index = saved_node_index;
|
1922
1780
|
}
|
@@ -1925,78 +1783,61 @@ static bool adoption_agency_algorithm(GumboParser* parser, GumboToken* token, Gu
|
|
1925
1783
|
assert(node_index < state->_open_elements.capacity);
|
1926
1784
|
node = state->_open_elements.data[node_index];
|
1927
1785
|
assert(node->parent);
|
1928
|
-
|
1929
|
-
|
1930
|
-
|
1931
|
-
}
|
1932
|
-
int formatting_index =
|
1933
|
-
gumbo_vector_index_of(&state->_active_formatting_elements, node);
|
1934
|
-
if (j > 3 && formatting_index != -1) {
|
1935
|
-
// Step 13.5.
|
1936
|
-
gumbo_debug(
|
1937
|
-
"Removing formatting element at %d.\n", formatting_index);
|
1938
|
-
gumbo_vector_remove_at(
|
1939
|
-
parser,
|
1940
|
-
formatting_index,
|
1941
|
-
&state->_active_formatting_elements);
|
1942
|
-
// Removing the element shifts all indices over by one, so we may need
|
1943
|
-
// to move the bookmark.
|
1944
|
-
if (formatting_index < bookmark) {
|
1945
|
-
--bookmark;
|
1946
|
-
gumbo_debug("Moving bookmark to %d.\n", bookmark);
|
1947
|
-
}
|
1948
|
-
continue;
|
1949
|
-
}
|
1950
|
-
if (formatting_index == -1) {
|
1951
|
-
// Step 13.6.
|
1786
|
+
// Step 9.5.
|
1787
|
+
if (gumbo_vector_index_of(
|
1788
|
+
&state->_active_formatting_elements, node) == -1) {
|
1952
1789
|
gumbo_vector_remove_at(parser, node_index, &state->_open_elements);
|
1953
1790
|
continue;
|
1791
|
+
} else if (node == formatting_node) {
|
1792
|
+
// Step 9.6.
|
1793
|
+
break;
|
1954
1794
|
}
|
1955
|
-
// Step
|
1956
|
-
|
1957
|
-
|
1795
|
+
// Step 9.7.
|
1796
|
+
int formatting_index = gumbo_vector_index_of(
|
1797
|
+
&state->_active_formatting_elements, node);
|
1958
1798
|
node = clone_node(parser, node, GUMBO_INSERTION_ADOPTION_AGENCY_CLONED);
|
1959
|
-
assert(formatting_index >= 0);
|
1960
1799
|
state->_active_formatting_elements.data[formatting_index] = node;
|
1961
|
-
assert(node_index >= 0);
|
1962
1800
|
state->_open_elements.data[node_index] = node;
|
1963
|
-
// Step
|
1801
|
+
// Step 9.8.
|
1964
1802
|
if (last_node == furthest_block) {
|
1965
1803
|
bookmark = formatting_index + 1;
|
1966
|
-
gumbo_debug("Bookmark moved to %d.\n", bookmark);
|
1967
1804
|
assert(bookmark <= state->_active_formatting_elements.length);
|
1968
1805
|
}
|
1969
|
-
// Step
|
1806
|
+
// Step 9.9.
|
1970
1807
|
last_node->parse_flags |= GUMBO_INSERTION_ADOPTION_AGENCY_MOVED;
|
1971
1808
|
remove_from_parent(parser, last_node);
|
1972
1809
|
append_node(parser, node, last_node);
|
1973
|
-
// Step
|
1810
|
+
// Step 9.10.
|
1974
1811
|
last_node = node;
|
1975
|
-
}
|
1812
|
+
}
|
1976
1813
|
|
1977
|
-
// Step
|
1814
|
+
// Step 10.
|
1978
1815
|
gumbo_debug("Removing %s node from parent ",
|
1979
1816
|
gumbo_normalized_tagname(last_node->v.element.tag));
|
1980
1817
|
remove_from_parent(parser, last_node);
|
1981
1818
|
last_node->parse_flags |= GUMBO_INSERTION_ADOPTION_AGENCY_MOVED;
|
1982
|
-
|
1983
|
-
|
1984
|
-
|
1985
|
-
|
1986
|
-
|
1819
|
+
if (node_tag_in_set(common_ancestor, (gumbo_tagset) { TAG(TABLE), TAG(TBODY),
|
1820
|
+
TAG(TFOOT), TAG(THEAD), TAG(TR) })) {
|
1821
|
+
gumbo_debug("and foster-parenting it.\n");
|
1822
|
+
foster_parent_element(parser, last_node);
|
1823
|
+
} else {
|
1824
|
+
gumbo_debug("and inserting it into %s.\n",
|
1825
|
+
gumbo_normalized_tagname(common_ancestor->v.element.tag));
|
1826
|
+
append_node(parser, common_ancestor, last_node);
|
1827
|
+
}
|
1987
1828
|
|
1988
|
-
// Step
|
1829
|
+
// Step 11.
|
1989
1830
|
GumboNode* new_formatting_node = clone_node(
|
1990
|
-
|
1831
|
+
parser, formatting_node, GUMBO_INSERTION_ADOPTION_AGENCY_CLONED);
|
1991
1832
|
formatting_node->parse_flags |= GUMBO_INSERTION_IMPLICIT_END_TAG;
|
1992
1833
|
|
1993
|
-
// Step
|
1834
|
+
// Step 12. Instead of appending nodes one-by-one, we swap the children
|
1994
1835
|
// vector of furthest_block with the empty children of new_formatting_node,
|
1995
1836
|
// reducing memory traffic and allocations. We still have to reset their
|
1996
1837
|
// parent pointers, though.
|
1997
1838
|
GumboVector temp = new_formatting_node->v.element.children;
|
1998
1839
|
new_formatting_node->v.element.children =
|
1999
|
-
|
1840
|
+
furthest_block->v.element.children;
|
2000
1841
|
furthest_block->v.element.children = temp;
|
2001
1842
|
|
2002
1843
|
temp = new_formatting_node->v.element.children;
|
@@ -2005,39 +1846,36 @@ static bool adoption_agency_algorithm(GumboParser* parser, GumboToken* token, Gu
|
|
2005
1846
|
child->parent = new_formatting_node;
|
2006
1847
|
}
|
2007
1848
|
|
2008
|
-
// Step
|
1849
|
+
// Step 13.
|
2009
1850
|
append_node(parser, furthest_block, new_formatting_node);
|
2010
1851
|
|
2011
|
-
// Step
|
1852
|
+
// Step 14.
|
2012
1853
|
// If the formatting node was before the bookmark, it may shift over all
|
2013
1854
|
// indices after it, so we need to explicitly find the index and possibly
|
2014
1855
|
// adjust the bookmark.
|
2015
1856
|
int formatting_node_index = gumbo_vector_index_of(
|
2016
|
-
|
1857
|
+
&state->_active_formatting_elements, formatting_node);
|
2017
1858
|
assert(formatting_node_index != -1);
|
2018
1859
|
if (formatting_node_index < bookmark) {
|
2019
|
-
gumbo_debug(
|
2020
|
-
"Formatting node at %d is before bookmark at %d; decrementing.\n",
|
2021
|
-
formatting_node_index, bookmark);
|
2022
1860
|
--bookmark;
|
2023
1861
|
}
|
2024
1862
|
gumbo_vector_remove_at(
|
2025
|
-
|
1863
|
+
parser, formatting_node_index, &state->_active_formatting_elements);
|
2026
1864
|
assert(bookmark >= 0);
|
2027
1865
|
assert(bookmark <= state->_active_formatting_elements.length);
|
2028
1866
|
gumbo_vector_insert_at(parser, new_formatting_node, bookmark,
|
2029
1867
|
&state->_active_formatting_elements);
|
2030
1868
|
|
2031
|
-
// Step
|
1869
|
+
// Step 15.
|
2032
1870
|
gumbo_vector_remove(
|
2033
|
-
|
1871
|
+
parser, formatting_node, &state->_open_elements);
|
2034
1872
|
int insert_at = gumbo_vector_index_of(
|
2035
|
-
|
1873
|
+
&state->_open_elements, furthest_block) + 1;
|
2036
1874
|
assert(insert_at >= 0);
|
2037
1875
|
assert(insert_at <= state->_open_elements.length);
|
2038
1876
|
gumbo_vector_insert_at(
|
2039
|
-
|
2040
|
-
}
|
1877
|
+
parser, new_formatting_node, insert_at, &state->_open_elements);
|
1878
|
+
}
|
2041
1879
|
return true;
|
2042
1880
|
}
|
2043
1881
|
|
@@ -2216,45 +2054,29 @@ static bool handle_in_head(GumboParser* parser, GumboToken* token) {
|
|
2216
2054
|
assert(node_html_tag_is(head, GUMBO_TAG_HEAD));
|
2217
2055
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_AFTER_HEAD);
|
2218
2056
|
return true;
|
2219
|
-
} else if (
|
2220
|
-
|
2221
|
-
|
2222
|
-
|
2223
|
-
|
2224
|
-
|
2225
|
-
|
2226
|
-
|
2227
|
-
parser
|
2228
|
-
|
2229
|
-
|
2230
|
-
return true;
|
2231
|
-
} else if (tag_is(token, kEndTag, GUMBO_TAG_TEMPLATE)) {
|
2232
|
-
if (!has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
|
2233
|
-
parser_add_parse_error(parser, token);
|
2234
|
-
ignore_token(parser);
|
2235
|
-
return false;
|
2236
|
-
}
|
2237
|
-
generate_all_implied_end_tags_thoroughly(parser);
|
2238
|
-
bool success = true;
|
2239
|
-
if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_TEMPLATE)) {
|
2240
|
-
parser_add_parse_error(parser, token);
|
2241
|
-
success = false;
|
2242
|
-
}
|
2243
|
-
while (!node_html_tag_is(pop_current_node(parser), GUMBO_TAG_TEMPLATE));
|
2244
|
-
clear_active_formatting_elements(parser);
|
2245
|
-
pop_template_insertion_mode(parser);
|
2246
|
-
reset_insertion_mode_appropriately(parser);
|
2247
|
-
return success;
|
2248
|
-
} else if (tag_is(token, kStartTag, GUMBO_TAG_HEAD) || (token->type == GUMBO_TOKEN_END_TAG)) {
|
2057
|
+
} else if (tag_is(token, kStartTag, GUMBO_TAG_HEAD)) {
|
2058
|
+
parser_add_parse_error(parser, token);
|
2059
|
+
ignore_token(parser);
|
2060
|
+
return false;
|
2061
|
+
} else if (tag_is(token, kStartTag, GUMBO_TAG_HEAD) ||
|
2062
|
+
(token->type == GUMBO_TOKEN_END_TAG &&
|
2063
|
+
!tag_in(token, kEndTag, (gumbo_tagset) { TAG(BODY), TAG(HTML),
|
2064
|
+
TAG(BR) }))) {
|
2065
|
+
parser_add_parse_error(parser, token);
|
2066
|
+
return false;
|
2067
|
+
} else if (tag_is(token, kStartTag, GUMBO_TAG_UNKNOWN) && token->v.start_tag.is_self_closing) {
|
2249
2068
|
parser_add_parse_error(parser, token);
|
2250
2069
|
ignore_token(parser);
|
2251
2070
|
return false;
|
2252
2071
|
} else {
|
2253
|
-
pop_current_node(parser);
|
2072
|
+
const GumboNode* node = pop_current_node(parser);
|
2073
|
+
assert(node_html_tag_is(node, GUMBO_TAG_HEAD));
|
2074
|
+
AVOID_UNUSED_VARIABLE_WARNING(node);
|
2254
2075
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_AFTER_HEAD);
|
2255
2076
|
parser->_parser_state->_reprocess_current_token = true;
|
2256
2077
|
return true;
|
2257
2078
|
}
|
2079
|
+
|
2258
2080
|
return true;
|
2259
2081
|
}
|
2260
2082
|
|
@@ -2320,7 +2142,7 @@ static bool handle_after_head(GumboParser* parser, GumboToken* token) {
|
|
2320
2142
|
} else if (tag_in(token, kStartTag, (gumbo_tagset) { TAG(BASE), TAG(BASEFONT),
|
2321
2143
|
TAG(BGSOUND), TAG(LINK), TAG(META),
|
2322
2144
|
TAG(NOFRAMES), TAG(SCRIPT), TAG(STYLE),
|
2323
|
-
TAG(
|
2145
|
+
TAG(TITLE) })) {
|
2324
2146
|
parser_add_parse_error(parser, token);
|
2325
2147
|
assert(state->_head_element != NULL);
|
2326
2148
|
// This must be flushed before we push the head element on, as there may be
|
@@ -2330,8 +2152,6 @@ static bool handle_after_head(GumboParser* parser, GumboToken* token) {
|
|
2330
2152
|
bool result = handle_in_head(parser, token);
|
2331
2153
|
gumbo_vector_remove(parser, state->_head_element, &state->_open_elements);
|
2332
2154
|
return result;
|
2333
|
-
} else if (tag_is(token, kEndTag, GUMBO_TAG_TEMPLATE)) {
|
2334
|
-
return handle_in_head(parser, token);
|
2335
2155
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_HEAD) ||
|
2336
2156
|
(token->type == GUMBO_TOKEN_END_TAG &&
|
2337
2157
|
!tag_in(token, kEndTag, (gumbo_tagset) { TAG(BODY), TAG(HTML), TAG(BR) }))) {
|
@@ -2346,23 +2166,28 @@ static bool handle_after_head(GumboParser* parser, GumboToken* token) {
|
|
2346
2166
|
}
|
2347
2167
|
}
|
2348
2168
|
|
2349
|
-
static
|
2169
|
+
static void destroy_node(GumboParser* parser, GumboNode* node) {
|
2350
2170
|
switch (node->type) {
|
2351
2171
|
case GUMBO_NODE_DOCUMENT:
|
2352
2172
|
{
|
2353
2173
|
GumboDocument* doc = &node->v.document;
|
2174
|
+
for (int i = 0; i < doc->children.length; ++i) {
|
2175
|
+
destroy_node(parser, doc->children.data[i]);
|
2176
|
+
}
|
2354
2177
|
gumbo_parser_deallocate(parser, (void*) doc->children.data);
|
2355
2178
|
gumbo_parser_deallocate(parser, (void*) doc->name);
|
2356
2179
|
gumbo_parser_deallocate(parser, (void*) doc->public_identifier);
|
2357
2180
|
gumbo_parser_deallocate(parser, (void*) doc->system_identifier);
|
2358
2181
|
}
|
2359
2182
|
break;
|
2360
|
-
case GUMBO_NODE_TEMPLATE:
|
2361
2183
|
case GUMBO_NODE_ELEMENT:
|
2362
2184
|
for (int i = 0; i < node->v.element.attributes.length; ++i) {
|
2363
2185
|
gumbo_destroy_attribute(parser, node->v.element.attributes.data[i]);
|
2364
2186
|
}
|
2365
2187
|
gumbo_parser_deallocate(parser, node->v.element.attributes.data);
|
2188
|
+
for (int i = 0; i < node->v.element.children.length; ++i) {
|
2189
|
+
destroy_node(parser, node->v.element.children.data[i]);
|
2190
|
+
}
|
2366
2191
|
gumbo_parser_deallocate(parser, node->v.element.children.data);
|
2367
2192
|
break;
|
2368
2193
|
case GUMBO_NODE_TEXT:
|
@@ -2372,21 +2197,7 @@ static GumboNode* destroy_node(GumboParser* parser, GumboNode* node) {
|
|
2372
2197
|
gumbo_parser_deallocate(parser, (void*) node->v.text.text);
|
2373
2198
|
break;
|
2374
2199
|
}
|
2375
|
-
// Remove from the next/prev linked list.
|
2376
|
-
GumboNode* prev = node->prev;
|
2377
|
-
GumboNode* next = node->next;
|
2378
|
-
if (prev != NULL) {
|
2379
|
-
prev->next = next;
|
2380
|
-
}
|
2381
|
-
if (next != NULL) {
|
2382
|
-
next->prev = prev;
|
2383
|
-
}
|
2384
|
-
if (parser->_parser_state && parser->_parser_state->_current_node == node) {
|
2385
|
-
parser->_parser_state->_current_node = prev;
|
2386
|
-
}
|
2387
|
-
|
2388
2200
|
gumbo_parser_deallocate(parser, node);
|
2389
|
-
return next;
|
2390
2201
|
}
|
2391
2202
|
|
2392
2203
|
// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#parsing-main-inbody
|
@@ -2415,24 +2226,20 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2415
2226
|
ignore_token(parser);
|
2416
2227
|
return false;
|
2417
2228
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_HTML)) {
|
2418
|
-
parser_add_parse_error(parser, token);
|
2419
|
-
if (has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
|
2420
|
-
ignore_token(parser);
|
2421
|
-
return false;
|
2422
|
-
}
|
2423
2229
|
assert(parser->_output->root != NULL);
|
2424
2230
|
assert(parser->_output->root->type == GUMBO_NODE_ELEMENT);
|
2231
|
+
parser_add_parse_error(parser, token);
|
2425
2232
|
merge_attributes(parser, token, parser->_output->root);
|
2426
2233
|
return false;
|
2427
2234
|
} else if (tag_in(token, kStartTag, (gumbo_tagset) { TAG(BASE), TAG(BASEFONT),
|
2428
2235
|
TAG(BGSOUND), TAG(MENUITEM), TAG(LINK),
|
2429
2236
|
TAG(META), TAG(NOFRAMES), TAG(SCRIPT),
|
2430
|
-
TAG(STYLE), TAG(
|
2237
|
+
TAG(STYLE), TAG(TITLE) } )) {
|
2431
2238
|
return handle_in_head(parser, token);
|
2432
2239
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_BODY)) {
|
2433
2240
|
parser_add_parse_error(parser, token);
|
2434
2241
|
if (state->_open_elements.length < 2 ||
|
2435
|
-
!node_html_tag_is(state->_open_elements.data[1], GUMBO_TAG_BODY)
|
2242
|
+
!node_html_tag_is(state->_open_elements.data[1], GUMBO_TAG_BODY)) {
|
2436
2243
|
ignore_token(parser);
|
2437
2244
|
return false;
|
2438
2245
|
}
|
@@ -2484,11 +2291,9 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2484
2291
|
TAG(DT), TAG(LI), TAG(P), TAG(TBODY), TAG(TD), TAG(TFOOT), TAG(TH),
|
2485
2292
|
TAG(THEAD), TAG(TR), TAG(BODY), TAG(HTML) } )) {
|
2486
2293
|
parser_add_parse_error(parser, token);
|
2294
|
+
return false;
|
2487
2295
|
}
|
2488
2296
|
}
|
2489
|
-
if (get_current_template_insertion_mode(parser) != GUMBO_INSERTION_MODE_INITIAL) {
|
2490
|
-
return handle_in_template(parser, token);
|
2491
|
-
}
|
2492
2297
|
return true;
|
2493
2298
|
} else if (tag_in(token, kEndTag, (gumbo_tagset) { TAG(BODY), TAG(HTML) })) {
|
2494
2299
|
if (!has_an_element_in_scope(parser, GUMBO_TAG_BODY)) {
|
@@ -2498,11 +2303,10 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2498
2303
|
}
|
2499
2304
|
bool success = true;
|
2500
2305
|
for (int i = 0; i < state->_open_elements.length; ++i) {
|
2501
|
-
if (!node_tag_in_set(state->_open_elements.data[i], (gumbo_tagset) {
|
2502
|
-
|
2503
|
-
|
2504
|
-
|
2505
|
-
TAG(BODY), TAG(HTML) })) {
|
2306
|
+
if (!node_tag_in_set(state->_open_elements.data[i], (gumbo_tagset) { TAG(DD),
|
2307
|
+
TAG(DT), TAG(LI), TAG(OPTGROUP), TAG(OPTION), TAG(P), TAG(RP),
|
2308
|
+
TAG(RT), TAG(TBODY), TAG(TD), TAG(TFOOT), TAG(TH), TAG(THEAD),
|
2309
|
+
TAG(TR), TAG(BODY), TAG(HTML) })) {
|
2506
2310
|
parser_add_parse_error(parser, token);
|
2507
2311
|
success = false;
|
2508
2312
|
break;
|
@@ -2520,7 +2324,7 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2520
2324
|
} else if (tag_in(token, kStartTag, (gumbo_tagset) { TAG(ADDRESS), TAG(ARTICLE),
|
2521
2325
|
TAG(ASIDE), TAG(BLOCKQUOTE), TAG(CENTER), TAG(DETAILS),
|
2522
2326
|
TAG(DIR), TAG(DIV), TAG(DL), TAG(FIELDSET), TAG(FIGCAPTION),
|
2523
|
-
TAG(FIGURE), TAG(FOOTER), TAG(HEADER), TAG(HGROUP), TAG(MENU),
|
2327
|
+
TAG(FIGURE), TAG(FOOTER), TAG(HEADER), TAG(HGROUP), TAG(MENU),
|
2524
2328
|
TAG(NAV), TAG(OL), TAG(P), TAG(SECTION), TAG(SUMMARY), TAG(UL) })) {
|
2525
2329
|
bool result = maybe_implicitly_close_p_tag(parser, token);
|
2526
2330
|
insert_element_from_token(parser, token);
|
@@ -2543,17 +2347,15 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2543
2347
|
state->_frameset_ok = false;
|
2544
2348
|
return result;
|
2545
2349
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_FORM)) {
|
2546
|
-
if (state->_form_element != NULL
|
2350
|
+
if (state->_form_element != NULL) {
|
2547
2351
|
gumbo_debug("Ignoring nested form.\n");
|
2548
2352
|
parser_add_parse_error(parser, token);
|
2549
2353
|
ignore_token(parser);
|
2550
2354
|
return false;
|
2551
2355
|
}
|
2552
2356
|
bool result = maybe_implicitly_close_p_tag(parser, token);
|
2553
|
-
|
2554
|
-
|
2555
|
-
state->_form_element = form_element;
|
2556
|
-
}
|
2357
|
+
state->_form_element =
|
2358
|
+
insert_element_from_token(parser, token);
|
2557
2359
|
return result;
|
2558
2360
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_LI)) {
|
2559
2361
|
maybe_implicitly_close_list_tag(parser, token, true);
|
@@ -2585,7 +2387,7 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2585
2387
|
TAG(ASIDE), TAG(BLOCKQUOTE), TAG(BUTTON), TAG(CENTER), TAG(DETAILS),
|
2586
2388
|
TAG(DIR), TAG(DIV), TAG(DL), TAG(FIELDSET), TAG(FIGCAPTION),
|
2587
2389
|
TAG(FIGURE), TAG(FOOTER), TAG(HEADER), TAG(HGROUP), TAG(LISTING),
|
2588
|
-
TAG(
|
2390
|
+
TAG(MENU), TAG(NAV), TAG(OL), TAG(PRE),
|
2589
2391
|
TAG(SECTION), TAG(SUMMARY), TAG(UL) })) {
|
2590
2392
|
GumboTag tag = token->v.end_tag;
|
2591
2393
|
if (!has_an_element_in_scope(parser, tag)) {
|
@@ -2596,45 +2398,30 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2596
2398
|
implicitly_close_tags(parser, token, GUMBO_NAMESPACE_HTML, token->v.end_tag);
|
2597
2399
|
return true;
|
2598
2400
|
} else if (tag_is(token, kEndTag, GUMBO_TAG_FORM)) {
|
2599
|
-
|
2600
|
-
|
2601
|
-
|
2602
|
-
|
2603
|
-
|
2604
|
-
|
2605
|
-
|
2606
|
-
|
2607
|
-
|
2608
|
-
parser_add_parse_error(parser, token);
|
2609
|
-
return false;
|
2610
|
-
}
|
2611
|
-
while(!node_html_tag_is(pop_current_node(parser), GUMBO_TAG_FORM));
|
2612
|
-
return success;
|
2613
|
-
} else {
|
2614
|
-
bool result = true;
|
2615
|
-
const GumboNode* node = state->_form_element;
|
2616
|
-
assert(!node || node->type == GUMBO_NODE_ELEMENT);
|
2617
|
-
state->_form_element = NULL;
|
2618
|
-
if (!node || !has_node_in_scope(parser, node)) {
|
2619
|
-
gumbo_debug("Closing an unopened form.\n");
|
2620
|
-
parser_add_parse_error(parser, token);
|
2621
|
-
ignore_token(parser);
|
2622
|
-
return false;
|
2623
|
-
}
|
2624
|
-
// This differs from implicitly_close_tags because we remove *only* the
|
2625
|
-
// <form> element; other nodes are left in scope.
|
2626
|
-
generate_implied_end_tags(parser, GUMBO_TAG_LAST);
|
2627
|
-
if (get_current_node(parser) != node) {
|
2628
|
-
parser_add_parse_error(parser, token);
|
2629
|
-
result = false;
|
2630
|
-
}
|
2631
|
-
|
2632
|
-
GumboVector* open_elements = &state->_open_elements;
|
2633
|
-
int index = gumbo_vector_index_of(open_elements, node);
|
2634
|
-
assert(index >= 0);
|
2635
|
-
gumbo_vector_remove_at(parser, index, open_elements);
|
2636
|
-
return result;
|
2401
|
+
bool result = true;
|
2402
|
+
const GumboNode* node = state->_form_element;
|
2403
|
+
assert(!node || node->type == GUMBO_NODE_ELEMENT);
|
2404
|
+
state->_form_element = NULL;
|
2405
|
+
if (!node || !has_node_in_scope(parser, node)) {
|
2406
|
+
gumbo_debug("Closing an unopened form.\n");
|
2407
|
+
parser_add_parse_error(parser, token);
|
2408
|
+
ignore_token(parser);
|
2409
|
+
return false;
|
2637
2410
|
}
|
2411
|
+
// This differs from implicitly_close_tags because we remove *only* the
|
2412
|
+
// <form> element; other nodes are left in scope.
|
2413
|
+
generate_implied_end_tags(parser, GUMBO_TAG_LAST);
|
2414
|
+
if (get_current_node(parser) != node) {
|
2415
|
+
parser_add_parse_error(parser, token);
|
2416
|
+
result = false;
|
2417
|
+
}
|
2418
|
+
|
2419
|
+
GumboVector* open_elements = &state->_open_elements;
|
2420
|
+
int index = open_elements->length - 1;
|
2421
|
+
for (; index >= 0 && open_elements->data[index] != node; --index);
|
2422
|
+
assert(index >= 0);
|
2423
|
+
gumbo_vector_remove_at(parser, index, open_elements);
|
2424
|
+
return result;
|
2638
2425
|
} else if (tag_is(token, kEndTag, GUMBO_TAG_P)) {
|
2639
2426
|
if (!has_an_element_in_button_scope(parser, GUMBO_TAG_P)) {
|
2640
2427
|
parser_add_parse_error(parser, token);
|
@@ -2661,11 +2448,10 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2661
2448
|
return false;
|
2662
2449
|
}
|
2663
2450
|
return implicitly_close_tags(parser, token, GUMBO_NAMESPACE_HTML, token_tag);
|
2664
|
-
} else if (tag_in(token, kEndTag, (gumbo_tagset) {
|
2665
|
-
|
2666
|
-
if (!has_an_element_in_scope_with_tagname(parser,
|
2667
|
-
|
2668
|
-
GUMBO_TAG_H4, GUMBO_TAG_H5, GUMBO_TAG_H6})) {
|
2451
|
+
} else if (tag_in(token, kEndTag, (gumbo_tagset) { TAG(H1), TAG(H2), TAG(H3),
|
2452
|
+
TAG(H4), TAG(H5), TAG(H6) })) {
|
2453
|
+
if (!has_an_element_in_scope_with_tagname(parser, (gumbo_tagset) { TAG(H1), TAG(H2), TAG(H3), TAG(H4),
|
2454
|
+
TAG(H5), TAG(H6) })) {
|
2669
2455
|
// No heading open; ignore the token entirely.
|
2670
2456
|
parser_add_parse_error(parser, token);
|
2671
2457
|
ignore_token(parser);
|
@@ -2806,8 +2592,7 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2806
2592
|
return result;
|
2807
2593
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_ISINDEX)) {
|
2808
2594
|
parser_add_parse_error(parser, token);
|
2809
|
-
if (parser->_parser_state->_form_element != NULL
|
2810
|
-
!has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
|
2595
|
+
if (parser->_parser_state->_form_element != NULL) {
|
2811
2596
|
ignore_token(parser);
|
2812
2597
|
return false;
|
2813
2598
|
}
|
@@ -2822,9 +2607,6 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2822
2607
|
|
2823
2608
|
GumboNode* form = insert_element_of_tag_type(
|
2824
2609
|
parser, GUMBO_TAG_FORM, GUMBO_INSERTION_FROM_ISINDEX);
|
2825
|
-
if (!has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
|
2826
|
-
parser->_parser_state->_form_element = form;
|
2827
|
-
}
|
2828
2610
|
if (action_attr) {
|
2829
2611
|
gumbo_vector_add(parser, action_attr, &form->v.element.attributes);
|
2830
2612
|
}
|
@@ -2888,9 +2670,6 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2888
2670
|
parser, GUMBO_TAG_HR, GUMBO_INSERTION_FROM_ISINDEX);
|
2889
2671
|
pop_current_node(parser); // <hr>
|
2890
2672
|
pop_current_node(parser); // <form>
|
2891
|
-
if (!has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
|
2892
|
-
parser->_parser_state->_form_element = NULL;
|
2893
|
-
}
|
2894
2673
|
return false;
|
2895
2674
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_TEXTAREA)) {
|
2896
2675
|
run_generic_parsing_algorithm(parser, token, GUMBO_LEX_RCDATA);
|
@@ -2932,17 +2711,12 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2932
2711
|
reconstruct_active_formatting_elements(parser);
|
2933
2712
|
insert_element_from_token(parser, token);
|
2934
2713
|
return true;
|
2935
|
-
|
2936
|
-
TAG(RB), TAG(RP), TAG(RT), TAG(RTC) })) {
|
2714
|
+
} else if (tag_in(token, kStartTag, (gumbo_tagset) { TAG(RP), TAG(RT) })) {
|
2937
2715
|
bool success = true;
|
2938
|
-
GumboTag exception = tag_in(token, kStartTag, (gumbo_tagset) {
|
2939
|
-
TAG(RT), TAG(RP) }) ? GUMBO_TAG_RTC : GUMBO_TAG_LAST;
|
2940
2716
|
if (has_an_element_in_scope(parser, GUMBO_TAG_RUBY)) {
|
2941
|
-
generate_implied_end_tags(parser,
|
2717
|
+
generate_implied_end_tags(parser, GUMBO_TAG_LAST);
|
2942
2718
|
}
|
2943
|
-
if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_RUBY)
|
2944
|
-
!(exception == GUMBO_TAG_LAST ||
|
2945
|
-
node_html_tag_is(get_current_node(parser), GUMBO_TAG_RTC))) {
|
2719
|
+
if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_RUBY)) {
|
2946
2720
|
parser_add_parse_error(parser, token);
|
2947
2721
|
success = false;
|
2948
2722
|
}
|
@@ -3113,8 +2887,7 @@ static bool handle_in_table(GumboParser* parser, GumboToken* token) {
|
|
3113
2887
|
parser_add_parse_error(parser, token);
|
3114
2888
|
ignore_token(parser);
|
3115
2889
|
return false;
|
3116
|
-
} else if (tag_in(token, kStartTag, (gumbo_tagset) { TAG(STYLE), TAG(SCRIPT)
|
3117
|
-
(tag_is(token, kEndTag, GUMBO_TAG_TEMPLATE))) {
|
2890
|
+
} else if (tag_in(token, kStartTag, (gumbo_tagset) { TAG(STYLE), TAG(SCRIPT) })) {
|
3118
2891
|
return handle_in_head(parser, token);
|
3119
2892
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_INPUT) &&
|
3120
2893
|
attribute_matches(&token->v.start_tag.attributes,
|
@@ -3125,7 +2898,7 @@ static bool handle_in_table(GumboParser* parser, GumboToken* token) {
|
|
3125
2898
|
return false;
|
3126
2899
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_FORM)) {
|
3127
2900
|
parser_add_parse_error(parser, token);
|
3128
|
-
if (state->_form_element
|
2901
|
+
if (state->_form_element) {
|
3129
2902
|
ignore_token(parser);
|
3130
2903
|
return false;
|
3131
2904
|
}
|
@@ -3133,7 +2906,11 @@ static bool handle_in_table(GumboParser* parser, GumboToken* token) {
|
|
3133
2906
|
pop_current_node(parser);
|
3134
2907
|
return false;
|
3135
2908
|
} else if (token->type == GUMBO_TOKEN_EOF) {
|
3136
|
-
|
2909
|
+
if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_HTML)) {
|
2910
|
+
parser_add_parse_error(parser, token);
|
2911
|
+
return false;
|
2912
|
+
}
|
2913
|
+
return true;
|
3137
2914
|
} else {
|
3138
2915
|
parser_add_parse_error(parser, token);
|
3139
2916
|
state->_foster_parent_insertions = true;
|
@@ -3178,37 +2955,35 @@ static bool handle_in_table_text(GumboParser* parser, GumboToken* token) {
|
|
3178
2955
|
|
3179
2956
|
// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#parsing-main-incaption
|
3180
2957
|
static bool handle_in_caption(GumboParser* parser, GumboToken* token) {
|
3181
|
-
if (
|
2958
|
+
if (tag_in(token, kStartTag, (gumbo_tagset) { TAG(CAPTION), TAG(COL),
|
2959
|
+
TAG(COLGROUP), TAG(TBODY), TAG(TD),
|
2960
|
+
TAG(TFOOT), TAG(TH), TAG(THEAD), TAG(TR) }) ||
|
2961
|
+
tag_in(token, kEndTag, (gumbo_tagset) { TAG(CAPTION), TAG(TABLE) })) {
|
3182
2962
|
if (!has_an_element_in_table_scope(parser, GUMBO_TAG_CAPTION)) {
|
3183
2963
|
parser_add_parse_error(parser, token);
|
3184
2964
|
ignore_token(parser);
|
3185
2965
|
return false;
|
3186
|
-
} else {
|
3187
|
-
generate_implied_end_tags(parser, GUMBO_TAG_LAST);
|
3188
|
-
bool result = true;
|
3189
|
-
if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_CAPTION)) {
|
3190
|
-
parser_add_parse_error(parser, token);
|
3191
|
-
}
|
3192
|
-
while (!node_html_tag_is(pop_current_node(parser), GUMBO_TAG_CAPTION));
|
3193
|
-
clear_active_formatting_elements(parser);
|
3194
|
-
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
|
3195
|
-
return result;
|
3196
2966
|
}
|
3197
|
-
|
3198
|
-
TAG(COLGROUP), TAG(TBODY), TAG(TD), TAG(TFOOT), TAG(TH), TAG(THEAD), TAG(TR) }) ||
|
3199
|
-
(tag_is(token, kEndTag, GUMBO_TAG_TABLE))) {
|
3200
|
-
if (!has_an_element_in_table_scope(parser, GUMBO_TAG_CAPTION)) {
|
2967
|
+
if (!tag_is(token, kEndTag, GUMBO_TAG_CAPTION)) {
|
3201
2968
|
parser_add_parse_error(parser, token);
|
3202
|
-
|
3203
|
-
|
2969
|
+
parser->_parser_state->_reprocess_current_token = true;
|
2970
|
+
}
|
2971
|
+
generate_implied_end_tags(parser, GUMBO_TAG_LAST);
|
2972
|
+
bool result = true;
|
2973
|
+
if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_CAPTION)) {
|
2974
|
+
parser_add_parse_error(parser, token);
|
2975
|
+
while (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_CAPTION)) {
|
2976
|
+
pop_current_node(parser);
|
2977
|
+
}
|
2978
|
+
result = false;
|
3204
2979
|
}
|
3205
|
-
|
2980
|
+
pop_current_node(parser); // The <caption> itself.
|
3206
2981
|
clear_active_formatting_elements(parser);
|
3207
2982
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
|
3208
|
-
|
3209
|
-
|
3210
|
-
|
3211
|
-
TAG(
|
2983
|
+
return result;
|
2984
|
+
} else if (tag_in(token, kEndTag, (gumbo_tagset) { TAG(BODY), TAG(COL),
|
2985
|
+
TAG(COLGROUP), TAG(HTML), TAG(TBODY), TAG(TD), TAG(TFOOT),
|
2986
|
+
TAG(TH), TAG(THEAD), TAG(TR) })) {
|
3212
2987
|
parser_add_parse_error(parser, token);
|
3213
2988
|
ignore_token(parser);
|
3214
2989
|
return false;
|
@@ -3236,33 +3011,24 @@ static bool handle_in_column_group(GumboParser* parser, GumboToken* token) {
|
|
3236
3011
|
pop_current_node(parser);
|
3237
3012
|
acknowledge_self_closing_tag(parser);
|
3238
3013
|
return true;
|
3239
|
-
} else if (tag_is(token, kEndTag, GUMBO_TAG_COLGROUP)) {
|
3240
|
-
if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_COLGROUP)) {
|
3241
|
-
parser_add_parse_error(parser, token);
|
3242
|
-
ignore_token(parser);
|
3243
|
-
return false;
|
3244
|
-
}
|
3245
|
-
pop_current_node(parser);
|
3246
|
-
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
|
3247
|
-
return false;
|
3248
3014
|
} else if (tag_is(token, kEndTag, GUMBO_TAG_COL)) {
|
3249
3015
|
parser_add_parse_error(parser, token);
|
3250
3016
|
ignore_token(parser);
|
3251
3017
|
return false;
|
3252
|
-
} else if (
|
3253
|
-
|
3254
|
-
return
|
3255
|
-
} else if (token->type == GUMBO_TOKEN_EOF) {
|
3256
|
-
return handle_in_body(parser, token);
|
3018
|
+
} else if (token->type == GUMBO_TOKEN_EOF &&
|
3019
|
+
get_current_node(parser) == parser->_output->root) {
|
3020
|
+
return true;
|
3257
3021
|
} else {
|
3258
|
-
if (
|
3022
|
+
if (get_current_node(parser) == parser->_output->root) {
|
3259
3023
|
parser_add_parse_error(parser, token);
|
3260
|
-
ignore_token(parser);
|
3261
3024
|
return false;
|
3262
3025
|
}
|
3026
|
+
assert(node_html_tag_is(get_current_node(parser), GUMBO_TAG_COLGROUP));
|
3263
3027
|
pop_current_node(parser);
|
3264
3028
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
|
3265
|
-
|
3029
|
+
if (!tag_is(token, kEndTag, GUMBO_TAG_COLGROUP)) {
|
3030
|
+
parser->_parser_state->_reprocess_current_token = true;
|
3031
|
+
}
|
3266
3032
|
return true;
|
3267
3033
|
}
|
3268
3034
|
}
|
@@ -3325,48 +3091,42 @@ static bool handle_in_row(GumboParser* parser, GumboToken* token) {
|
|
3325
3091
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_CELL);
|
3326
3092
|
add_formatting_element(parser, &kActiveFormattingScopeMarker);
|
3327
3093
|
return true;
|
3328
|
-
} else if (
|
3329
|
-
|
3330
|
-
|
3331
|
-
|
3332
|
-
|
3333
|
-
|
3334
|
-
|
3335
|
-
|
3336
|
-
|
3337
|
-
|
3338
|
-
|
3339
|
-
|
3340
|
-
|
3341
|
-
|
3094
|
+
} else if (tag_in(token, kStartTag, (gumbo_tagset) { TAG(CAPTION), TAG(COLGROUP),
|
3095
|
+
TAG(TBODY), TAG(TFOOT), TAG(THEAD), TAG(TR) }) ||
|
3096
|
+
tag_in(token, kEndTag, (gumbo_tagset) { TAG(TR), TAG(TABLE),
|
3097
|
+
TAG(TBODY), TAG(TFOOT), TAG(THEAD) })) {
|
3098
|
+
// This case covers 4 clauses of the spec, each of which say "Otherwise, act
|
3099
|
+
// as if an end tag with the tag name "tr" had been seen." The differences
|
3100
|
+
// are in error handling and whether the current token is reprocessed.
|
3101
|
+
GumboTag desired_tag =
|
3102
|
+
tag_in(token, kEndTag, (gumbo_tagset) { TAG(TBODY), TAG(TFOOT),
|
3103
|
+
TAG(THEAD) })
|
3104
|
+
? token->v.end_tag : GUMBO_TAG_TR;
|
3105
|
+
if (!has_an_element_in_table_scope(parser, desired_tag)) {
|
3106
|
+
gumbo_debug("Bailing because there is no tag %s in table scope.\nOpen elements:",
|
3107
|
+
gumbo_normalized_tagname(desired_tag));
|
3108
|
+
for (int i = 0; i < parser->_parser_state->_open_elements.length; ++i) {
|
3109
|
+
const GumboNode* node = parser->_parser_state->_open_elements.data[i];
|
3110
|
+
gumbo_debug("%s\n", gumbo_normalized_tagname(node->v.element.tag));
|
3111
|
+
}
|
3342
3112
|
parser_add_parse_error(parser, token);
|
3343
3113
|
ignore_token(parser);
|
3344
3114
|
return false;
|
3345
|
-
} else {
|
3346
|
-
clear_stack_to_table_row_context(parser);
|
3347
|
-
pop_current_node(parser);
|
3348
|
-
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE_BODY);
|
3349
|
-
parser->_parser_state->_reprocess_current_token = true;
|
3350
|
-
return true;
|
3351
3115
|
}
|
3352
|
-
|
3353
|
-
|
3354
|
-
|
3355
|
-
|
3356
|
-
|
3357
|
-
|
3358
|
-
} else {
|
3359
|
-
clear_stack_to_table_row_context(parser);
|
3360
|
-
pop_current_node(parser);
|
3361
|
-
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE_BODY);
|
3116
|
+
clear_stack_to_table_row_context(parser);
|
3117
|
+
GumboNode* last_element = pop_current_node(parser);
|
3118
|
+
assert(node_html_tag_is(last_element, GUMBO_TAG_TR));
|
3119
|
+
AVOID_UNUSED_VARIABLE_WARNING(last_element);
|
3120
|
+
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE_BODY);
|
3121
|
+
if (!tag_is(token, kEndTag, GUMBO_TAG_TR)) {
|
3362
3122
|
parser->_parser_state->_reprocess_current_token = true;
|
3363
|
-
return true;
|
3364
3123
|
}
|
3365
|
-
|
3366
|
-
|
3367
|
-
|
3368
|
-
|
3369
|
-
|
3124
|
+
return true;
|
3125
|
+
} else if (tag_in(token, kEndTag, (gumbo_tagset) { TAG(BODY), TAG(CAPTION),
|
3126
|
+
TAG(COL), TAG(COLGROUP), TAG(HTML), TAG(TD), TAG(TH) })) {
|
3127
|
+
parser_add_parse_error(parser, token);
|
3128
|
+
ignore_token(parser);
|
3129
|
+
return false;
|
3370
3130
|
} else {
|
3371
3131
|
return handle_in_table(parser, token);
|
3372
3132
|
}
|
@@ -3378,7 +3138,6 @@ static bool handle_in_cell(GumboParser* parser, GumboToken* token) {
|
|
3378
3138
|
GumboTag token_tag = token->v.end_tag;
|
3379
3139
|
if (!has_an_element_in_table_scope(parser, token_tag)) {
|
3380
3140
|
parser_add_parse_error(parser, token);
|
3381
|
-
ignore_token(parser);
|
3382
3141
|
return false;
|
3383
3142
|
}
|
3384
3143
|
return close_table_cell(parser, token, token_tag);
|
@@ -3494,11 +3253,14 @@ static bool handle_in_select(GumboParser* parser, GumboToken* token) {
|
|
3494
3253
|
parser->_parser_state->_reprocess_current_token = true;
|
3495
3254
|
}
|
3496
3255
|
return false;
|
3497
|
-
} else if (
|
3498
|
-
tag_is(token, kEndTag, GUMBO_TAG_TEMPLATE)) {
|
3256
|
+
} else if (tag_is(token, kStartTag, GUMBO_TAG_SCRIPT)) {
|
3499
3257
|
return handle_in_head(parser, token);
|
3500
3258
|
} else if (token->type == GUMBO_TOKEN_EOF) {
|
3501
|
-
|
3259
|
+
if (get_current_node(parser) != parser->_output->root) {
|
3260
|
+
parser_add_parse_error(parser, token);
|
3261
|
+
return false;
|
3262
|
+
}
|
3263
|
+
return true;
|
3502
3264
|
} else {
|
3503
3265
|
parser_add_parse_error(parser, token);
|
3504
3266
|
ignore_token(parser);
|
@@ -3517,16 +3279,14 @@ static bool handle_in_select_in_table(GumboParser* parser, GumboToken* token) {
|
|
3517
3279
|
} else if (tag_in(token, kEndTag, (gumbo_tagset) { TAG(CAPTION), TAG(TABLE),
|
3518
3280
|
TAG(TBODY), TAG(TFOOT), TAG(THEAD), TAG(TR), TAG(TD), TAG(TH) })) {
|
3519
3281
|
parser_add_parse_error(parser, token);
|
3520
|
-
if (
|
3521
|
-
ignore_token(parser);
|
3522
|
-
return false;
|
3523
|
-
} else {
|
3282
|
+
if (has_an_element_in_table_scope(parser, token->v.end_tag)) {
|
3524
3283
|
close_current_select(parser);
|
3525
|
-
|
3526
|
-
// reset_insertion_mode_appropriately(parser);
|
3284
|
+
reset_insertion_mode_appropriately(parser);
|
3527
3285
|
parser->_parser_state->_reprocess_current_token = true;
|
3528
|
-
|
3286
|
+
} else {
|
3287
|
+
ignore_token(parser);
|
3529
3288
|
}
|
3289
|
+
return false;
|
3530
3290
|
} else {
|
3531
3291
|
return handle_in_select(parser, token);
|
3532
3292
|
}
|
@@ -3534,68 +3294,8 @@ static bool handle_in_select_in_table(GumboParser* parser, GumboToken* token) {
|
|
3534
3294
|
|
3535
3295
|
// http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#parsing-main-intemplate
|
3536
3296
|
static bool handle_in_template(GumboParser* parser, GumboToken* token) {
|
3537
|
-
|
3538
|
-
|
3539
|
-
token->type == GUMBO_TOKEN_CHARACTER ||
|
3540
|
-
token->type == GUMBO_TOKEN_COMMENT ||
|
3541
|
-
token->type == GUMBO_TOKEN_DOCTYPE) {
|
3542
|
-
return handle_in_body(parser, token);
|
3543
|
-
} else if (tag_in(token, kStartTag, (gumbo_tagset) { TAG(BASE), TAG(BASEFONT), TAG(BGSOUND),
|
3544
|
-
TAG(LINK), TAG(META), TAG(NOFRAMES), TAG(SCRIPT), TAG(STYLE),
|
3545
|
-
TAG(TEMPLATE), TAG(TITLE) }) ||
|
3546
|
-
tag_is(token, kEndTag, GUMBO_TAG_TEMPLATE)) {
|
3547
|
-
return handle_in_head(parser, token);
|
3548
|
-
} else if (tag_in(token, kStartTag, (gumbo_tagset) { TAG(CAPTION), TAG(COLGROUP),
|
3549
|
-
TAG(TBODY), TAG(TFOOT), TAG(THEAD) })) {
|
3550
|
-
pop_template_insertion_mode(parser);
|
3551
|
-
push_template_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
|
3552
|
-
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
|
3553
|
-
state->_reprocess_current_token = true;
|
3554
|
-
return true;
|
3555
|
-
} else if (tag_is(token, kStartTag, GUMBO_TAG_COL)) {
|
3556
|
-
pop_template_insertion_mode(parser);
|
3557
|
-
push_template_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_COLUMN_GROUP);
|
3558
|
-
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_COLUMN_GROUP);
|
3559
|
-
state->_reprocess_current_token = true;
|
3560
|
-
return true;
|
3561
|
-
} else if (tag_is(token, kStartTag, GUMBO_TAG_TR)) {
|
3562
|
-
pop_template_insertion_mode(parser);
|
3563
|
-
push_template_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE_BODY);
|
3564
|
-
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE_BODY);
|
3565
|
-
state->_reprocess_current_token = true;
|
3566
|
-
return true;
|
3567
|
-
} else if (tag_in(token, kStartTag, (gumbo_tagset) { TAG(TD), TAG(TH) })) {
|
3568
|
-
pop_template_insertion_mode(parser);
|
3569
|
-
push_template_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_ROW);
|
3570
|
-
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_ROW);
|
3571
|
-
state->_reprocess_current_token = true;
|
3572
|
-
return true;
|
3573
|
-
} else if (token->type == GUMBO_TOKEN_START_TAG) {
|
3574
|
-
pop_template_insertion_mode(parser);
|
3575
|
-
push_template_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_BODY);
|
3576
|
-
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_BODY);
|
3577
|
-
state->_reprocess_current_token = true;
|
3578
|
-
return true;
|
3579
|
-
} else if (token->type == GUMBO_TOKEN_END_TAG) {
|
3580
|
-
parser_add_parse_error(parser, token);
|
3581
|
-
ignore_token(parser);
|
3582
|
-
return false;
|
3583
|
-
} else if (token->type == GUMBO_TOKEN_EOF) {
|
3584
|
-
if (!has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
|
3585
|
-
// Stop parsing.
|
3586
|
-
return true;
|
3587
|
-
}
|
3588
|
-
parser_add_parse_error(parser, token);
|
3589
|
-
while(!node_html_tag_is(pop_current_node(parser), GUMBO_TAG_TEMPLATE));
|
3590
|
-
clear_active_formatting_elements(parser);
|
3591
|
-
pop_template_insertion_mode(parser);
|
3592
|
-
reset_insertion_mode_appropriately(parser);
|
3593
|
-
state->_reprocess_current_token = true;
|
3594
|
-
return false;
|
3595
|
-
} else {
|
3596
|
-
assert(0);
|
3597
|
-
return false;
|
3598
|
-
}
|
3297
|
+
// TODO(jdtang): Implement this.
|
3298
|
+
return true;
|
3599
3299
|
}
|
3600
3300
|
|
3601
3301
|
// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#parsing-main-afterbody
|
@@ -3613,12 +3313,7 @@ static bool handle_after_body(GumboParser* parser, GumboToken* token) {
|
|
3613
3313
|
ignore_token(parser);
|
3614
3314
|
return false;
|
3615
3315
|
} else if (tag_is(token, kEndTag, GUMBO_TAG_HTML)) {
|
3616
|
-
|
3617
|
-
if (is_fragment_parser(parser)) {
|
3618
|
-
parser_add_parse_error(parser, token);
|
3619
|
-
ignore_token(parser);
|
3620
|
-
return false;
|
3621
|
-
}
|
3316
|
+
// TODO(jdtang): Handle fragment parsing algorithm case.
|
3622
3317
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_AFTER_AFTER_BODY);
|
3623
3318
|
GumboNode* html = parser->_parser_state->_open_elements.data[0];
|
3624
3319
|
assert(node_html_tag_is(html, GUMBO_TAG_HTML));
|
@@ -3659,8 +3354,9 @@ static bool handle_in_frameset(GumboParser* parser, GumboToken* token) {
|
|
3659
3354
|
return false;
|
3660
3355
|
}
|
3661
3356
|
pop_current_node(parser);
|
3662
|
-
|
3663
|
-
|
3357
|
+
// TODO(jdtang): Add a condition to ignore this for the fragment parsing
|
3358
|
+
// algorithm.
|
3359
|
+
if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_FRAMESET)) {
|
3664
3360
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_AFTER_FRAMESET);
|
3665
3361
|
}
|
3666
3362
|
return true;
|
@@ -3834,32 +3530,18 @@ static bool handle_in_foreign_content(GumboParser* parser, GumboToken* token) {
|
|
3834
3530
|
token_has_attribute(token, "color") ||
|
3835
3531
|
token_has_attribute(token, "face") ||
|
3836
3532
|
token_has_attribute(token, "size")))) {
|
3837
|
-
|
3838
|
-
/* Parse error */
|
3839
3533
|
parser_add_parse_error(parser, token);
|
3840
|
-
|
3841
|
-
|
3842
|
-
|
3843
|
-
|
3844
|
-
|
3845
|
-
|
3846
|
-
|
3847
|
-
|
3848
|
-
|
3849
|
-
} while(!(is_mathml_integration_point(get_current_node(parser)) ||
|
3850
|
-
is_html_integration_point(get_current_node(parser)) ||
|
3851
|
-
get_current_node(parser)->v.element.tag_namespace ==
|
3852
|
-
GUMBO_NAMESPACE_HTML));
|
3853
|
-
parser->_parser_state->_reprocess_current_token = true;
|
3854
|
-
return false;
|
3855
|
-
}
|
3856
|
-
|
3857
|
-
assert(token->type == GUMBO_TOKEN_START_TAG);
|
3858
|
-
}
|
3859
|
-
|
3860
|
-
if (token->type == GUMBO_TOKEN_START_TAG) {
|
3534
|
+
do {
|
3535
|
+
pop_current_node(parser);
|
3536
|
+
} while(!(is_mathml_integration_point(get_current_node(parser)) ||
|
3537
|
+
is_html_integration_point(get_current_node(parser)) ||
|
3538
|
+
get_current_node(parser)->v.element.tag_namespace ==
|
3539
|
+
GUMBO_NAMESPACE_HTML));
|
3540
|
+
parser->_parser_state->_reprocess_current_token = true;
|
3541
|
+
return false;
|
3542
|
+
} else if (token->type == GUMBO_TOKEN_START_TAG) {
|
3861
3543
|
const GumboNamespaceEnum current_namespace =
|
3862
|
-
|
3544
|
+
get_current_node(parser)->v.element.tag_namespace;
|
3863
3545
|
if (current_namespace == GUMBO_NAMESPACE_MATHML) {
|
3864
3546
|
adjust_mathml_attributes(parser, token);
|
3865
3547
|
}
|
@@ -3948,10 +3630,8 @@ static bool handle_token(GumboParser* parser, GumboToken* token) {
|
|
3948
3630
|
parser->_parser_state->_closed_html_tag = true;
|
3949
3631
|
}
|
3950
3632
|
|
3951
|
-
const GumboNode* current_node =
|
3952
|
-
assert(!current_node ||
|
3953
|
-
current_node->type == GUMBO_NODE_ELEMENT ||
|
3954
|
-
current_node->type == GUMBO_NODE_TEMPLATE);
|
3633
|
+
const GumboNode* current_node = get_current_node(parser);
|
3634
|
+
assert(!current_node || current_node->type == GUMBO_NODE_ELEMENT);
|
3955
3635
|
if (current_node) {
|
3956
3636
|
gumbo_debug("Current node: <%s>.\n",
|
3957
3637
|
gumbo_normalized_tagname(current_node->v.element.tag));
|
@@ -3979,66 +3659,6 @@ static bool handle_token(GumboParser* parser, GumboToken* token) {
|
|
3979
3659
|
}
|
3980
3660
|
}
|
3981
3661
|
|
3982
|
-
static void fragment_parser_init(
|
3983
|
-
GumboParser *parser, GumboTag fragment_ctx,
|
3984
|
-
GumboNamespaceEnum fragment_namespace) {
|
3985
|
-
GumboNode *root;
|
3986
|
-
assert(fragment_ctx != GUMBO_TAG_LAST);
|
3987
|
-
|
3988
|
-
// 3
|
3989
|
-
parser->_parser_state->_fragment_ctx = create_element(parser, fragment_ctx);
|
3990
|
-
parser->_parser_state->_fragment_ctx->v.element.tag_namespace =
|
3991
|
-
fragment_namespace;
|
3992
|
-
|
3993
|
-
// 4
|
3994
|
-
if (fragment_namespace == GUMBO_NAMESPACE_HTML) {
|
3995
|
-
// Non-HTML namespaces always start in the DATA state.
|
3996
|
-
switch (fragment_ctx) {
|
3997
|
-
case GUMBO_TAG_TITLE:
|
3998
|
-
case GUMBO_TAG_TEXTAREA:
|
3999
|
-
gumbo_tokenizer_set_state(parser, GUMBO_LEX_RCDATA);
|
4000
|
-
break;
|
4001
|
-
|
4002
|
-
case GUMBO_TAG_STYLE:
|
4003
|
-
case GUMBO_TAG_XMP:
|
4004
|
-
case GUMBO_TAG_IFRAME:
|
4005
|
-
case GUMBO_TAG_NOEMBED:
|
4006
|
-
case GUMBO_TAG_NOFRAMES:
|
4007
|
-
gumbo_tokenizer_set_state(parser, GUMBO_LEX_RAWTEXT);
|
4008
|
-
break;
|
4009
|
-
|
4010
|
-
case GUMBO_TAG_SCRIPT:
|
4011
|
-
gumbo_tokenizer_set_state(parser, GUMBO_LEX_SCRIPT);
|
4012
|
-
break;
|
4013
|
-
|
4014
|
-
case GUMBO_TAG_NOSCRIPT:
|
4015
|
-
/* scripting is disabled in Gumbo, so leave the tokenizer
|
4016
|
-
* in the default data state */
|
4017
|
-
break;
|
4018
|
-
|
4019
|
-
case GUMBO_TAG_PLAINTEXT:
|
4020
|
-
gumbo_tokenizer_set_state(parser, GUMBO_LEX_PLAINTEXT);
|
4021
|
-
break;
|
4022
|
-
|
4023
|
-
default:
|
4024
|
-
/* default data state */
|
4025
|
-
break;
|
4026
|
-
}
|
4027
|
-
}
|
4028
|
-
|
4029
|
-
// 5. 6. 7.
|
4030
|
-
root = insert_element_of_tag_type(parser, GUMBO_TAG_HTML, GUMBO_INSERTION_IMPLIED);
|
4031
|
-
parser->_output->root = root;
|
4032
|
-
|
4033
|
-
// 8.
|
4034
|
-
if (fragment_ctx == GUMBO_TAG_TEMPLATE) {
|
4035
|
-
push_template_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TEMPLATE);
|
4036
|
-
}
|
4037
|
-
|
4038
|
-
// 10.
|
4039
|
-
reset_insertion_mode_appropriately(parser);
|
4040
|
-
}
|
4041
|
-
|
4042
3662
|
GumboOutput* gumbo_parse(const char* buffer) {
|
4043
3663
|
return gumbo_parse_with_options(
|
4044
3664
|
&kGumboDefaultOptions, buffer, strlen(buffer));
|
@@ -4046,27 +3666,11 @@ GumboOutput* gumbo_parse(const char* buffer) {
|
|
4046
3666
|
|
4047
3667
|
GumboOutput* gumbo_parse_with_options(
|
4048
3668
|
const GumboOptions* options, const char* buffer, size_t length) {
|
4049
|
-
return gumbo_parse_fragment(
|
4050
|
-
options, buffer, length, GUMBO_TAG_LAST, GUMBO_NAMESPACE_HTML);
|
4051
|
-
}
|
4052
|
-
|
4053
|
-
GumboOutput* gumbo_parse_fragment(
|
4054
|
-
const GumboOptions* options, const char* buffer, size_t length,
|
4055
|
-
const GumboTag fragment_ctx, const GumboNamespaceEnum fragment_namespace) {
|
4056
3669
|
GumboParser parser;
|
4057
3670
|
parser._options = options;
|
4058
|
-
parser_state_init(&parser);
|
4059
|
-
// Must come after parser_state_init, since creating the document node must
|
4060
|
-
// reference parser_state->_current_node.
|
4061
3671
|
output_init(&parser);
|
4062
|
-
// And this must come after output_init, because initializing the tokenizer
|
4063
|
-
// reads the first character and that may cause a UTF-8 decode error
|
4064
|
-
// (inserting into output->errors) if that's invalid.
|
4065
3672
|
gumbo_tokenizer_state_init(&parser, buffer, length);
|
4066
|
-
|
4067
|
-
if (fragment_ctx != GUMBO_TAG_LAST) {
|
4068
|
-
fragment_parser_init(&parser, fragment_ctx, fragment_namespace);
|
4069
|
-
}
|
3673
|
+
parser_state_init(&parser);
|
4070
3674
|
|
4071
3675
|
GumboParserState* state = parser._parser_state;
|
4072
3676
|
gumbo_debug("Parsing %.*s.\n", length, buffer);
|
@@ -4154,16 +3758,20 @@ GumboOutput* gumbo_parse_fragment(
|
|
4154
3758
|
return parser._output;
|
4155
3759
|
}
|
4156
3760
|
|
3761
|
+
void gumbo_destroy_node(GumboOptions* options, GumboNode* node) {
|
3762
|
+
// Need a dummy GumboParser because the allocator comes along with the
|
3763
|
+
// options object.
|
3764
|
+
GumboParser parser;
|
3765
|
+
parser._options = options;
|
3766
|
+
destroy_node(&parser, node);
|
3767
|
+
}
|
3768
|
+
|
4157
3769
|
void gumbo_destroy_output(const GumboOptions* options, GumboOutput* output) {
|
4158
3770
|
// Need a dummy GumboParser because the allocator comes along with the
|
4159
3771
|
// options object.
|
4160
3772
|
GumboParser parser;
|
4161
|
-
parser._parser_state = NULL;
|
4162
3773
|
parser._options = options;
|
4163
|
-
|
4164
|
-
while (current) {
|
4165
|
-
current = destroy_node(&parser, current);
|
4166
|
-
}
|
3774
|
+
destroy_node(&parser, output->document);
|
4167
3775
|
for (int i = 0; i < output->errors.length; ++i) {
|
4168
3776
|
gumbo_error_destroy(&parser, output->errors.data[i]);
|
4169
3777
|
}
|