nokogumbo 1.4.1 → 1.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/ext/nokogumboc/nokogumbo.c +1 -1
- data/gumbo-parser/src/error.c +3 -5
- data/gumbo-parser/src/gumbo.h +170 -36
- data/gumbo-parser/src/parser.c +403 -795
- data/gumbo-parser/src/string_buffer.c +1 -8
- data/gumbo-parser/src/string_buffer.h +0 -5
- data/gumbo-parser/src/tag.c +162 -35
- data/gumbo-parser/src/tokenizer.c +18 -13
- data/gumbo-parser/src/vector.c +1 -1
- data/test-nokogumbo.rb +1 -1
- metadata +15 -24
- data/gumbo-parser/src/tag.in +0 -150
- data/gumbo-parser/src/tag_enum.h +0 -150
- data/gumbo-parser/src/tag_gperf.h +0 -343
- data/gumbo-parser/src/tag_sizes.h +0 -1
- data/gumbo-parser/src/tag_strings.h +0 -150
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: cf20dd502d8ec6022f2c72193bb0c9a908251088
|
4
|
+
data.tar.gz: 326f85766d0e4f97683f5df026f08f4dc33806e8
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 800800652a5260bf54399e8cca1fc6e63f7ef53aea489245c5315b6e955b38aa4dfc6d7272b99898ab78150464640ac14c995aa38b9c77644dab5d73fc0e46a5
|
7
|
+
data.tar.gz: 18ba647671103cfc2853a88935fe91eb965d1e6fbe1aad981438297a5035ec222b5ae6c5ed3ef127429c8b58edd02a6a5a877ba7e7ec3390d05779f7420f1521
|
data/ext/nokogumboc/nokogumbo.c
CHANGED
@@ -157,7 +157,7 @@ static xmlNodePtr walk_tree(xmlDocPtr document, GumboElement *node) {
|
|
157
157
|
|
158
158
|
switch (child->type) {
|
159
159
|
case GUMBO_NODE_ELEMENT:
|
160
|
-
|
160
|
+
// case GUMBO_NODE_TEMPLATE: /* future */
|
161
161
|
node = walk_tree(document, &child->v.element);
|
162
162
|
break;
|
163
163
|
case GUMBO_NODE_WHITESPACE:
|
data/gumbo-parser/src/error.c
CHANGED
@@ -35,11 +35,10 @@ static const size_t kMessageBufferSize = 256;
|
|
35
35
|
static int print_message(GumboParser* parser, GumboStringBuffer* output,
|
36
36
|
const char* format, ...) {
|
37
37
|
va_list args;
|
38
|
-
int remaining_capacity = output->capacity - output->length;
|
39
38
|
va_start(args, format);
|
39
|
+
int remaining_capacity = output->capacity - output->length;
|
40
40
|
int bytes_written = vsnprintf(output->data + output->length,
|
41
41
|
remaining_capacity, format, args);
|
42
|
-
va_end(args);
|
43
42
|
#ifdef _MSC_VER
|
44
43
|
if (bytes_written == -1) {
|
45
44
|
// vsnprintf returns -1 on MSVC++ if there's not enough capacity, instead of
|
@@ -48,7 +47,6 @@ static int print_message(GumboParser* parser, GumboStringBuffer* output,
|
|
48
47
|
// we retry (letting it fail and returning 0 if it doesn't), since there's
|
49
48
|
// no way to smartly resize the buffer.
|
50
49
|
gumbo_string_buffer_reserve(parser, output->capacity * 2, output);
|
51
|
-
va_start(args, format);
|
52
50
|
int result = vsnprintf(output->data + output->length,
|
53
51
|
remaining_capacity, format, args);
|
54
52
|
va_end(args);
|
@@ -57,6 +55,7 @@ static int print_message(GumboParser* parser, GumboStringBuffer* output,
|
|
57
55
|
#else
|
58
56
|
// -1 in standard C99 indicates an encoding error. Return 0 and do nothing.
|
59
57
|
if (bytes_written == -1) {
|
58
|
+
va_end(args);
|
60
59
|
return 0;
|
61
60
|
}
|
62
61
|
#endif
|
@@ -65,12 +64,11 @@ static int print_message(GumboParser* parser, GumboStringBuffer* output,
|
|
65
64
|
gumbo_string_buffer_reserve(
|
66
65
|
parser, output->capacity + bytes_written, output);
|
67
66
|
remaining_capacity = output->capacity - output->length;
|
68
|
-
va_start(args, format);
|
69
67
|
bytes_written = vsnprintf(output->data + output->length,
|
70
68
|
remaining_capacity, format, args);
|
71
|
-
va_end(args);
|
72
69
|
}
|
73
70
|
output->length += bytes_written;
|
71
|
+
va_end(args);
|
74
72
|
return bytes_written;
|
75
73
|
}
|
76
74
|
|
data/gumbo-parser/src/gumbo.h
CHANGED
@@ -141,7 +141,7 @@ extern const GumboVector kGumboEmptyVector;
|
|
141
141
|
* Returns the first index at which an element appears in this vector (testing
|
142
142
|
* by pointer equality), or -1 if it never does.
|
143
143
|
*/
|
144
|
-
int gumbo_vector_index_of(GumboVector* vector,
|
144
|
+
int gumbo_vector_index_of(GumboVector* vector, void* element);
|
145
145
|
|
146
146
|
|
147
147
|
/**
|
@@ -157,10 +157,172 @@ int gumbo_vector_index_of(GumboVector* vector, const void* element);
|
|
157
157
|
* strings.
|
158
158
|
*/
|
159
159
|
typedef enum {
|
160
|
-
//
|
161
|
-
|
162
|
-
//
|
163
|
-
|
160
|
+
// http://www.whatwg.org/specs/web-apps/current-work/multipage/semantics.html#the-root-element
|
161
|
+
GUMBO_TAG_HTML,
|
162
|
+
// http://www.whatwg.org/specs/web-apps/current-work/multipage/semantics.html#document-metadata
|
163
|
+
GUMBO_TAG_HEAD,
|
164
|
+
GUMBO_TAG_TITLE,
|
165
|
+
GUMBO_TAG_BASE,
|
166
|
+
GUMBO_TAG_LINK,
|
167
|
+
GUMBO_TAG_META,
|
168
|
+
GUMBO_TAG_STYLE,
|
169
|
+
// http://www.whatwg.org/specs/web-apps/current-work/multipage/scripting-1.html#scripting-1
|
170
|
+
GUMBO_TAG_SCRIPT,
|
171
|
+
GUMBO_TAG_NOSCRIPT,
|
172
|
+
GUMBO_TAG_TEMPLATE,
|
173
|
+
// http://www.whatwg.org/specs/web-apps/current-work/multipage/sections.html#sections
|
174
|
+
GUMBO_TAG_BODY,
|
175
|
+
GUMBO_TAG_ARTICLE,
|
176
|
+
GUMBO_TAG_SECTION,
|
177
|
+
GUMBO_TAG_NAV,
|
178
|
+
GUMBO_TAG_ASIDE,
|
179
|
+
GUMBO_TAG_H1,
|
180
|
+
GUMBO_TAG_H2,
|
181
|
+
GUMBO_TAG_H3,
|
182
|
+
GUMBO_TAG_H4,
|
183
|
+
GUMBO_TAG_H5,
|
184
|
+
GUMBO_TAG_H6,
|
185
|
+
GUMBO_TAG_HGROUP,
|
186
|
+
GUMBO_TAG_HEADER,
|
187
|
+
GUMBO_TAG_FOOTER,
|
188
|
+
GUMBO_TAG_ADDRESS,
|
189
|
+
// http://www.whatwg.org/specs/web-apps/current-work/multipage/grouping-content.html#grouping-content
|
190
|
+
GUMBO_TAG_P,
|
191
|
+
GUMBO_TAG_HR,
|
192
|
+
GUMBO_TAG_PRE,
|
193
|
+
GUMBO_TAG_BLOCKQUOTE,
|
194
|
+
GUMBO_TAG_OL,
|
195
|
+
GUMBO_TAG_UL,
|
196
|
+
GUMBO_TAG_LI,
|
197
|
+
GUMBO_TAG_DL,
|
198
|
+
GUMBO_TAG_DT,
|
199
|
+
GUMBO_TAG_DD,
|
200
|
+
GUMBO_TAG_FIGURE,
|
201
|
+
GUMBO_TAG_FIGCAPTION,
|
202
|
+
GUMBO_TAG_MAIN,
|
203
|
+
GUMBO_TAG_DIV,
|
204
|
+
// http://www.whatwg.org/specs/web-apps/current-work/multipage/text-level-semantics.html#text-level-semantics
|
205
|
+
GUMBO_TAG_A,
|
206
|
+
GUMBO_TAG_EM,
|
207
|
+
GUMBO_TAG_STRONG,
|
208
|
+
GUMBO_TAG_SMALL,
|
209
|
+
GUMBO_TAG_S,
|
210
|
+
GUMBO_TAG_CITE,
|
211
|
+
GUMBO_TAG_Q,
|
212
|
+
GUMBO_TAG_DFN,
|
213
|
+
GUMBO_TAG_ABBR,
|
214
|
+
GUMBO_TAG_DATA,
|
215
|
+
GUMBO_TAG_TIME,
|
216
|
+
GUMBO_TAG_CODE,
|
217
|
+
GUMBO_TAG_VAR,
|
218
|
+
GUMBO_TAG_SAMP,
|
219
|
+
GUMBO_TAG_KBD,
|
220
|
+
GUMBO_TAG_SUB,
|
221
|
+
GUMBO_TAG_SUP,
|
222
|
+
GUMBO_TAG_I,
|
223
|
+
GUMBO_TAG_B,
|
224
|
+
GUMBO_TAG_U,
|
225
|
+
GUMBO_TAG_MARK,
|
226
|
+
GUMBO_TAG_RUBY,
|
227
|
+
GUMBO_TAG_RT,
|
228
|
+
GUMBO_TAG_RP,
|
229
|
+
GUMBO_TAG_BDI,
|
230
|
+
GUMBO_TAG_BDO,
|
231
|
+
GUMBO_TAG_SPAN,
|
232
|
+
GUMBO_TAG_BR,
|
233
|
+
GUMBO_TAG_WBR,
|
234
|
+
// http://www.whatwg.org/specs/web-apps/current-work/multipage/edits.html#edits
|
235
|
+
GUMBO_TAG_INS,
|
236
|
+
GUMBO_TAG_DEL,
|
237
|
+
// http://www.whatwg.org/specs/web-apps/current-work/multipage/embedded-content-1.html#embedded-content-1
|
238
|
+
GUMBO_TAG_IMAGE,
|
239
|
+
GUMBO_TAG_IMG,
|
240
|
+
GUMBO_TAG_IFRAME,
|
241
|
+
GUMBO_TAG_EMBED,
|
242
|
+
GUMBO_TAG_OBJECT,
|
243
|
+
GUMBO_TAG_PARAM,
|
244
|
+
GUMBO_TAG_VIDEO,
|
245
|
+
GUMBO_TAG_AUDIO,
|
246
|
+
GUMBO_TAG_SOURCE,
|
247
|
+
GUMBO_TAG_TRACK,
|
248
|
+
GUMBO_TAG_CANVAS,
|
249
|
+
GUMBO_TAG_MAP,
|
250
|
+
GUMBO_TAG_AREA,
|
251
|
+
// http://www.whatwg.org/specs/web-apps/current-work/multipage/the-map-element.html#mathml
|
252
|
+
GUMBO_TAG_MATH,
|
253
|
+
GUMBO_TAG_MI,
|
254
|
+
GUMBO_TAG_MO,
|
255
|
+
GUMBO_TAG_MN,
|
256
|
+
GUMBO_TAG_MS,
|
257
|
+
GUMBO_TAG_MTEXT,
|
258
|
+
GUMBO_TAG_MGLYPH,
|
259
|
+
GUMBO_TAG_MALIGNMARK,
|
260
|
+
GUMBO_TAG_ANNOTATION_XML,
|
261
|
+
// http://www.whatwg.org/specs/web-apps/current-work/multipage/the-map-element.html#svg-0
|
262
|
+
GUMBO_TAG_SVG,
|
263
|
+
GUMBO_TAG_FOREIGNOBJECT,
|
264
|
+
GUMBO_TAG_DESC,
|
265
|
+
// SVG title tags will have GUMBO_TAG_TITLE as with HTML.
|
266
|
+
// http://www.whatwg.org/specs/web-apps/current-work/multipage/tabular-data.html#tabular-data
|
267
|
+
GUMBO_TAG_TABLE,
|
268
|
+
GUMBO_TAG_CAPTION,
|
269
|
+
GUMBO_TAG_COLGROUP,
|
270
|
+
GUMBO_TAG_COL,
|
271
|
+
GUMBO_TAG_TBODY,
|
272
|
+
GUMBO_TAG_THEAD,
|
273
|
+
GUMBO_TAG_TFOOT,
|
274
|
+
GUMBO_TAG_TR,
|
275
|
+
GUMBO_TAG_TD,
|
276
|
+
GUMBO_TAG_TH,
|
277
|
+
// http://www.whatwg.org/specs/web-apps/current-work/multipage/forms.html#forms
|
278
|
+
GUMBO_TAG_FORM,
|
279
|
+
GUMBO_TAG_FIELDSET,
|
280
|
+
GUMBO_TAG_LEGEND,
|
281
|
+
GUMBO_TAG_LABEL,
|
282
|
+
GUMBO_TAG_INPUT,
|
283
|
+
GUMBO_TAG_BUTTON,
|
284
|
+
GUMBO_TAG_SELECT,
|
285
|
+
GUMBO_TAG_DATALIST,
|
286
|
+
GUMBO_TAG_OPTGROUP,
|
287
|
+
GUMBO_TAG_OPTION,
|
288
|
+
GUMBO_TAG_TEXTAREA,
|
289
|
+
GUMBO_TAG_KEYGEN,
|
290
|
+
GUMBO_TAG_OUTPUT,
|
291
|
+
GUMBO_TAG_PROGRESS,
|
292
|
+
GUMBO_TAG_METER,
|
293
|
+
// http://www.whatwg.org/specs/web-apps/current-work/multipage/interactive-elements.html#interactive-elements
|
294
|
+
GUMBO_TAG_DETAILS,
|
295
|
+
GUMBO_TAG_SUMMARY,
|
296
|
+
GUMBO_TAG_MENU,
|
297
|
+
GUMBO_TAG_MENUITEM,
|
298
|
+
// Non-conforming elements that nonetheless appear in the HTML5 spec.
|
299
|
+
// http://www.whatwg.org/specs/web-apps/current-work/multipage/obsolete.html#non-conforming-features
|
300
|
+
GUMBO_TAG_APPLET,
|
301
|
+
GUMBO_TAG_ACRONYM,
|
302
|
+
GUMBO_TAG_BGSOUND,
|
303
|
+
GUMBO_TAG_DIR,
|
304
|
+
GUMBO_TAG_FRAME,
|
305
|
+
GUMBO_TAG_FRAMESET,
|
306
|
+
GUMBO_TAG_NOFRAMES,
|
307
|
+
GUMBO_TAG_ISINDEX,
|
308
|
+
GUMBO_TAG_LISTING,
|
309
|
+
GUMBO_TAG_XMP,
|
310
|
+
GUMBO_TAG_NEXTID,
|
311
|
+
GUMBO_TAG_NOEMBED,
|
312
|
+
GUMBO_TAG_PLAINTEXT,
|
313
|
+
GUMBO_TAG_RB,
|
314
|
+
GUMBO_TAG_STRIKE,
|
315
|
+
GUMBO_TAG_BASEFONT,
|
316
|
+
GUMBO_TAG_BIG,
|
317
|
+
GUMBO_TAG_BLINK,
|
318
|
+
GUMBO_TAG_CENTER,
|
319
|
+
GUMBO_TAG_FONT,
|
320
|
+
GUMBO_TAG_MARQUEE,
|
321
|
+
GUMBO_TAG_MULTICOL,
|
322
|
+
GUMBO_TAG_NOBR,
|
323
|
+
GUMBO_TAG_SPACER,
|
324
|
+
GUMBO_TAG_TT,
|
325
|
+
// Used for all tags that don't have special handling in HTML.
|
164
326
|
GUMBO_TAG_UNKNOWN,
|
165
327
|
// A marker value to indicate the end of the enum, for iterating over it.
|
166
328
|
// Also used as the terminator for varargs functions that take tags.
|
@@ -202,10 +364,9 @@ const char* gumbo_normalize_svg_tagname(const GumboStringPiece* tagname);
|
|
202
364
|
|
203
365
|
/**
|
204
366
|
* Converts a tag name string (which may be in upper or mixed case) to a tag
|
205
|
-
* enum.
|
367
|
+
* enum.
|
206
368
|
*/
|
207
369
|
GumboTag gumbo_tag_enum(const char* tagname);
|
208
|
-
GumboTag gumbo_tagn_enum(const char* tagname, unsigned int length);
|
209
370
|
|
210
371
|
/**
|
211
372
|
* Attribute namespaces.
|
@@ -300,16 +461,10 @@ typedef enum {
|
|
300
461
|
GUMBO_NODE_TEXT,
|
301
462
|
/** CDATA node. v will be a GumboText. */
|
302
463
|
GUMBO_NODE_CDATA,
|
303
|
-
/** Comment node. v will be a GumboText, excluding comment delimiters. */
|
464
|
+
/** Comment node. v. will be a GumboText, excluding comment delimiters. */
|
304
465
|
GUMBO_NODE_COMMENT,
|
305
466
|
/** Text node, where all contents is whitespace. v will be a GumboText. */
|
306
|
-
GUMBO_NODE_WHITESPACE
|
307
|
-
/** Template node. This is separate from GUMBO_NODE_ELEMENT because many
|
308
|
-
* client libraries will want to ignore the contents of template nodes, as
|
309
|
-
* the spec suggests. Recursing on GUMBO_NODE_ELEMENT will do the right thing
|
310
|
-
* here, while clients that want to include template contents should also
|
311
|
-
* check for GUMBO_NODE_TEMPLATE. v will be a GumboElement. */
|
312
|
-
GUMBO_NODE_TEMPLATE
|
467
|
+
GUMBO_NODE_WHITESPACE
|
313
468
|
} GumboNodeType;
|
314
469
|
|
315
470
|
/**
|
@@ -523,19 +678,6 @@ struct GumboInternalNode {
|
|
523
678
|
/** Pointer back to parent node. Not owned. */
|
524
679
|
GumboNode* parent;
|
525
680
|
|
526
|
-
/**
|
527
|
-
* Pointer to next node in document order. This is the next node by start tag
|
528
|
-
* position in the document, or by position of the tag that forces the parser
|
529
|
-
* to insert it for parser-inserted nodes. It's necessary to maintain API
|
530
|
-
* compatibility with some other libraries, eg. BeautifulSoup. Not owned.
|
531
|
-
*/
|
532
|
-
GumboNode* next;
|
533
|
-
|
534
|
-
/**
|
535
|
-
* Pointer to previous node in document order.
|
536
|
-
*/
|
537
|
-
GumboNode* prev;
|
538
|
-
|
539
681
|
/** The index within the parent's children vector of this node. */
|
540
682
|
size_t index_within_parent;
|
541
683
|
|
@@ -653,14 +795,6 @@ GumboOutput* gumbo_parse(const char* buffer);
|
|
653
795
|
GumboOutput* gumbo_parse_with_options(
|
654
796
|
const GumboOptions* options, const char* buffer, size_t buffer_length);
|
655
797
|
|
656
|
-
/**
|
657
|
-
* Parse a chunk of HTML with the given fragment context. If `fragment_ctx`
|
658
|
-
* is `GUMBO_TAG_LAST`, the fragment will be parsed as a full document.
|
659
|
-
*/
|
660
|
-
GumboOutput* gumbo_parse_fragment(
|
661
|
-
const GumboOptions* options, const char* buffer, size_t length,
|
662
|
-
const GumboTag fragment_ctx, const GumboNamespaceEnum fragment_namespace);
|
663
|
-
|
664
798
|
/** Release the memory used for the parse tree & parse errors. */
|
665
799
|
void gumbo_destroy_output(
|
666
800
|
const GumboOptions* options, GumboOutput* output);
|
data/gumbo-parser/src/parser.c
CHANGED
@@ -47,15 +47,6 @@ typedef char gumbo_tagset[GUMBO_TAG_LAST];
|
|
47
47
|
tagset[(int)tag] == (1 << (int)namespace))
|
48
48
|
|
49
49
|
|
50
|
-
|
51
|
-
// selected forward declarations as it is getting hard to find
|
52
|
-
// an appropriate order
|
53
|
-
static bool node_html_tag_is(const GumboNode*, GumboTag);
|
54
|
-
static GumboInsertionMode get_current_template_insertion_mode(const GumboParser*);
|
55
|
-
static bool handle_in_template(GumboParser*, GumboToken*);
|
56
|
-
static GumboNode* destroy_node(GumboParser*, GumboNode*);
|
57
|
-
|
58
|
-
|
59
50
|
static void* malloc_wrapper(void* unused, size_t size) {
|
60
51
|
return malloc(size);
|
61
52
|
}
|
@@ -199,7 +190,7 @@ typedef struct _ReplacementEntry {
|
|
199
190
|
{ GUMBO_STRING(from), GUMBO_STRING(to) }
|
200
191
|
|
201
192
|
// Static data for SVG attribute replacements.
|
202
|
-
//
|
193
|
+
// http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#adjust-svg-attributes
|
203
194
|
static const ReplacementEntry kSvgAttributeReplacements[] = {
|
204
195
|
REPLACEMENT_ENTRY("attributename", "attributeName"),
|
205
196
|
REPLACEMENT_ENTRY("attributetype", "attributeType"),
|
@@ -207,12 +198,12 @@ static const ReplacementEntry kSvgAttributeReplacements[] = {
|
|
207
198
|
REPLACEMENT_ENTRY("baseprofile", "baseProfile"),
|
208
199
|
REPLACEMENT_ENTRY("calcmode", "calcMode"),
|
209
200
|
REPLACEMENT_ENTRY("clippathunits", "clipPathUnits"),
|
210
|
-
|
211
|
-
|
201
|
+
REPLACEMENT_ENTRY("contentscripttype", "contentScriptType"),
|
202
|
+
REPLACEMENT_ENTRY("contentstyletype", "contentStyleType"),
|
212
203
|
REPLACEMENT_ENTRY("diffuseconstant", "diffuseConstant"),
|
213
204
|
REPLACEMENT_ENTRY("edgemode", "edgeMode"),
|
214
|
-
|
215
|
-
|
205
|
+
REPLACEMENT_ENTRY("externalresourcesrequired", "externalResourcesRequired"),
|
206
|
+
REPLACEMENT_ENTRY("filterres", "filterRes"),
|
216
207
|
REPLACEMENT_ENTRY("filterunits", "filterUnits"),
|
217
208
|
REPLACEMENT_ENTRY("glyphref", "glyphRef"),
|
218
209
|
REPLACEMENT_ENTRY("gradienttransform", "gradientTransform"),
|
@@ -380,9 +371,6 @@ typedef struct GumboInternalParserState {
|
|
380
371
|
GumboNode* _head_element;
|
381
372
|
GumboNode* _form_element;
|
382
373
|
|
383
|
-
// The element used as fragment context when parsing in fragment mode
|
384
|
-
GumboNode* _fragment_ctx;
|
385
|
-
|
386
374
|
// The flag for when the spec says "Reprocess the current token in..."
|
387
375
|
bool _reprocess_current_token;
|
388
376
|
|
@@ -411,10 +399,6 @@ typedef struct GumboInternalParserState {
|
|
411
399
|
// The current token.
|
412
400
|
GumboToken* _current_token;
|
413
401
|
|
414
|
-
// The current (most recently inserted) node. This is used to link together
|
415
|
-
// nodes in document order.
|
416
|
-
GumboNode* _current_node;
|
417
|
-
|
418
402
|
// The way that the spec is written, the </body> and </html> tags are *always*
|
419
403
|
// implicit, because encountering one of those tokens merely switches the
|
420
404
|
// insertion mode out of "in body". So we have individual state flags for
|
@@ -467,17 +451,7 @@ static void set_frameset_not_ok(GumboParser* parser) {
|
|
467
451
|
}
|
468
452
|
|
469
453
|
static GumboNode* create_node(GumboParser* parser, GumboNodeType type) {
|
470
|
-
GumboParserState* state = parser->_parser_state;
|
471
454
|
GumboNode* node = gumbo_parser_allocate(parser, sizeof(GumboNode));
|
472
|
-
|
473
|
-
node->next = NULL;
|
474
|
-
node->prev = state->_current_node;
|
475
|
-
if (state->_current_node != NULL) {
|
476
|
-
// May be null for the initial document node.
|
477
|
-
state->_current_node->next = node;
|
478
|
-
}
|
479
|
-
state->_current_node = node;
|
480
|
-
|
481
455
|
node->parent = NULL;
|
482
456
|
node->index_within_parent = -1;
|
483
457
|
node->type = type;
|
@@ -524,9 +498,7 @@ static void parser_state_init(GumboParser* parser) {
|
|
524
498
|
gumbo_vector_init(parser, 5, &parser_state->_template_insertion_modes);
|
525
499
|
parser_state->_head_element = NULL;
|
526
500
|
parser_state->_form_element = NULL;
|
527
|
-
parser_state->_fragment_ctx = NULL;
|
528
501
|
parser_state->_current_token = NULL;
|
529
|
-
parser_state->_current_node = NULL;
|
530
502
|
parser_state->_closed_body_tag = false;
|
531
503
|
parser_state->_closed_html_tag = false;
|
532
504
|
parser->_parser_state = parser_state;
|
@@ -534,25 +506,17 @@ static void parser_state_init(GumboParser* parser) {
|
|
534
506
|
|
535
507
|
static void parser_state_destroy(GumboParser* parser) {
|
536
508
|
GumboParserState* state = parser->_parser_state;
|
537
|
-
if (state->_fragment_ctx) {
|
538
|
-
destroy_node(parser, state->_fragment_ctx);
|
539
|
-
}
|
540
509
|
gumbo_vector_destroy(parser, &state->_active_formatting_elements);
|
541
510
|
gumbo_vector_destroy(parser, &state->_open_elements);
|
542
511
|
gumbo_vector_destroy(parser, &state->_template_insertion_modes);
|
543
512
|
gumbo_string_buffer_destroy(parser, &state->_text_node._buffer);
|
544
513
|
gumbo_parser_deallocate(parser, state);
|
545
|
-
parser->_parser_state = NULL;
|
546
514
|
}
|
547
515
|
|
548
516
|
static GumboNode* get_document_node(GumboParser* parser) {
|
549
517
|
return parser->_output->document;
|
550
518
|
}
|
551
519
|
|
552
|
-
static bool is_fragment_parser(const GumboParser *parser) {
|
553
|
-
return !!parser->_parser_state->_fragment_ctx;
|
554
|
-
}
|
555
|
-
|
556
520
|
// Returns the node at the bottom of the stack of open elements, or NULL if no
|
557
521
|
// elements have been added yet.
|
558
522
|
static GumboNode* get_current_node(GumboParser* parser) {
|
@@ -566,14 +530,6 @@ static GumboNode* get_current_node(GumboParser* parser) {
|
|
566
530
|
return open_elements->data[open_elements->length - 1];
|
567
531
|
}
|
568
532
|
|
569
|
-
static GumboNode* get_adjusted_current_node(GumboParser* parser) {
|
570
|
-
GumboParserState *state = parser->_parser_state;
|
571
|
-
if (state->_open_elements.length == 1 && state->_fragment_ctx) {
|
572
|
-
return state->_fragment_ctx;
|
573
|
-
}
|
574
|
-
return get_current_node(parser);
|
575
|
-
}
|
576
|
-
|
577
533
|
// Returns true if the given needle is in the given array of literal
|
578
534
|
// GumboStringPieces. If exact_match is true, this requires that they match
|
579
535
|
// exactly; otherwise, this performs a prefix match to check if any of the
|
@@ -594,80 +550,55 @@ static void set_insertion_mode(GumboParser* parser, GumboInsertionMode mode) {
|
|
594
550
|
parser->_parser_state->_insertion_mode = mode;
|
595
551
|
}
|
596
552
|
|
597
|
-
|
598
553
|
// http://www.whatwg.org/specs/web-apps/current-work/complete/parsing.html#reset-the-insertion-mode-appropriately
|
599
554
|
// This is a helper function that returns the appropriate insertion mode instead
|
600
555
|
// of setting it. Returns GUMBO_INSERTION_MODE_INITIAL as a sentinel value to
|
601
556
|
// indicate that there is no appropriate insertion mode, and the loop should
|
602
557
|
// continue.
|
603
|
-
static GumboInsertionMode get_appropriate_insertion_mode(
|
604
|
-
|
605
|
-
|
606
|
-
const bool is_last = index == 0;
|
607
|
-
|
608
|
-
if (is_last && is_fragment_parser(parser)) {
|
609
|
-
node = parser->_parser_state->_fragment_ctx;
|
610
|
-
}
|
558
|
+
static GumboInsertionMode get_appropriate_insertion_mode(
|
559
|
+
const GumboNode* node, bool is_last) {
|
560
|
+
assert(node->type == GUMBO_NODE_ELEMENT);
|
611
561
|
|
612
|
-
|
613
|
-
|
614
|
-
|
615
|
-
|
616
|
-
|
617
|
-
|
618
|
-
|
619
|
-
|
620
|
-
|
621
|
-
|
622
|
-
|
623
|
-
|
624
|
-
|
625
|
-
|
626
|
-
|
627
|
-
|
628
|
-
|
629
|
-
|
630
|
-
|
631
|
-
|
632
|
-
|
633
|
-
|
634
|
-
|
635
|
-
|
636
|
-
|
637
|
-
|
638
|
-
|
639
|
-
|
640
|
-
|
641
|
-
|
642
|
-
|
643
|
-
|
644
|
-
return GUMBO_INSERTION_MODE_IN_TABLE;
|
645
|
-
case GUMBO_TAG_TEMPLATE:
|
646
|
-
return get_current_template_insertion_mode(parser);
|
647
|
-
case GUMBO_TAG_HEAD:
|
648
|
-
if (!is_last) return GUMBO_INSERTION_MODE_IN_HEAD;
|
649
|
-
break;
|
650
|
-
case GUMBO_TAG_BODY:
|
651
|
-
return GUMBO_INSERTION_MODE_IN_BODY;
|
652
|
-
case GUMBO_TAG_FRAMESET:
|
653
|
-
return GUMBO_INSERTION_MODE_IN_FRAMESET;
|
654
|
-
case GUMBO_TAG_HTML:
|
655
|
-
return parser->_parser_state->_head_element ?
|
656
|
-
GUMBO_INSERTION_MODE_AFTER_HEAD : GUMBO_INSERTION_MODE_BEFORE_HEAD;
|
657
|
-
default:
|
658
|
-
break;
|
659
|
-
}
|
660
|
-
return is_last ?
|
661
|
-
GUMBO_INSERTION_MODE_IN_BODY : GUMBO_INSERTION_MODE_INITIAL;
|
562
|
+
if (node->v.element.tag_namespace == GUMBO_NAMESPACE_HTML) {
|
563
|
+
switch (node->v.element.tag) {
|
564
|
+
case GUMBO_TAG_SELECT:
|
565
|
+
return GUMBO_INSERTION_MODE_IN_SELECT;
|
566
|
+
case GUMBO_TAG_TD:
|
567
|
+
case GUMBO_TAG_TH:
|
568
|
+
return is_last ?
|
569
|
+
GUMBO_INSERTION_MODE_IN_BODY : GUMBO_INSERTION_MODE_IN_CELL;
|
570
|
+
case GUMBO_TAG_TR:
|
571
|
+
return GUMBO_INSERTION_MODE_IN_ROW;
|
572
|
+
case GUMBO_TAG_TBODY:
|
573
|
+
case GUMBO_TAG_THEAD:
|
574
|
+
case GUMBO_TAG_TFOOT:
|
575
|
+
return GUMBO_INSERTION_MODE_IN_TABLE_BODY;
|
576
|
+
case GUMBO_TAG_CAPTION:
|
577
|
+
return GUMBO_INSERTION_MODE_IN_CAPTION;
|
578
|
+
case GUMBO_TAG_COLGROUP:
|
579
|
+
return GUMBO_INSERTION_MODE_IN_COLUMN_GROUP;
|
580
|
+
case GUMBO_TAG_TABLE:
|
581
|
+
return GUMBO_INSERTION_MODE_IN_TABLE;
|
582
|
+
case GUMBO_TAG_HEAD:
|
583
|
+
case GUMBO_TAG_BODY:
|
584
|
+
return GUMBO_INSERTION_MODE_IN_BODY;
|
585
|
+
case GUMBO_TAG_FRAMESET:
|
586
|
+
return GUMBO_INSERTION_MODE_IN_FRAMESET;
|
587
|
+
case GUMBO_TAG_HTML:
|
588
|
+
return GUMBO_INSERTION_MODE_BEFORE_HEAD;
|
589
|
+
default:
|
590
|
+
break;
|
591
|
+
}
|
592
|
+
}
|
593
|
+
return is_last ? GUMBO_INSERTION_MODE_IN_BODY : GUMBO_INSERTION_MODE_INITIAL;
|
662
594
|
}
|
663
595
|
|
664
|
-
|
665
596
|
// This performs the actual "reset the insertion mode" loop.
|
666
597
|
static void reset_insertion_mode_appropriately(GumboParser* parser) {
|
667
598
|
const GumboVector* open_elements = &parser->_parser_state->_open_elements;
|
668
599
|
for (int i = open_elements->length; --i >= 0; ) {
|
669
600
|
GumboInsertionMode mode =
|
670
|
-
get_appropriate_insertion_mode(
|
601
|
+
get_appropriate_insertion_mode(open_elements->data[i], i == 0);
|
671
602
|
if (mode != GUMBO_INSERTION_MODE_INITIAL) {
|
672
603
|
set_insertion_mode(parser, mode);
|
673
604
|
return;
|
@@ -701,7 +632,7 @@ static GumboError* parser_add_parse_error(GumboParser* parser, const GumboToken*
|
|
701
632
|
&extra_data->tag_stack);
|
702
633
|
for (int i = 0; i < state->_open_elements.length; ++i) {
|
703
634
|
const GumboNode* node = state->_open_elements.data[i];
|
704
|
-
assert(node->type == GUMBO_NODE_ELEMENT
|
635
|
+
assert(node->type == GUMBO_NODE_ELEMENT);
|
705
636
|
gumbo_vector_add(parser, (void*) node->v.element.tag,
|
706
637
|
&extra_data->tag_stack);
|
707
638
|
}
|
@@ -738,7 +669,7 @@ static bool tag_is(const GumboToken* token, bool is_start, GumboTag tag) {
|
|
738
669
|
// Like tag_in, but checks for the tag of a node, rather than a token.
|
739
670
|
static bool node_tag_in_set(const GumboNode* node, const gumbo_tagset tags) {
|
740
671
|
assert(node != NULL);
|
741
|
-
if (node->type != GUMBO_NODE_ELEMENT
|
672
|
+
if (node->type != GUMBO_NODE_ELEMENT) {
|
742
673
|
return false;
|
743
674
|
}
|
744
675
|
return TAGSET_INCLUDES(tags, node->v.element.tag_namespace, node->v.element.tag);
|
@@ -747,7 +678,7 @@ static bool node_tag_in_set(const GumboNode* node, const gumbo_tagset tags) {
|
|
747
678
|
|
748
679
|
// Like node_tag_in, but for the single-tag case.
|
749
680
|
static bool node_qualified_tag_is(const GumboNode* node, GumboNamespaceEnum ns, GumboTag tag) {
|
750
|
-
return
|
681
|
+
return node->type == GUMBO_NODE_ELEMENT &&
|
751
682
|
node->v.element.tag == tag &&
|
752
683
|
node->v.element.tag_namespace == ns;
|
753
684
|
}
|
@@ -758,23 +689,6 @@ static bool node_html_tag_is(const GumboNode* node, GumboTag tag)
|
|
758
689
|
return node_qualified_tag_is(node, GUMBO_NAMESPACE_HTML, tag);
|
759
690
|
}
|
760
691
|
|
761
|
-
static void push_template_insertion_mode(GumboParser* parser, GumboInsertionMode mode) {
|
762
|
-
gumbo_vector_add(parser, (void*) mode, &parser->_parser_state->_template_insertion_modes);
|
763
|
-
}
|
764
|
-
|
765
|
-
static void pop_template_insertion_mode(GumboParser* parser) {
|
766
|
-
gumbo_vector_pop(parser, &parser->_parser_state->_template_insertion_modes);
|
767
|
-
}
|
768
|
-
|
769
|
-
// Returns the current template insertion mode. If the stack of template
|
770
|
-
// insertion modes is empty, this returns GUMBO_INSERTION_MODE_INITIAL.
|
771
|
-
static GumboInsertionMode get_current_template_insertion_mode(const GumboParser* parser) {
|
772
|
-
GumboVector* template_insertion_modes = &parser->_parser_state->_template_insertion_modes;
|
773
|
-
if (template_insertion_modes->length == 0) {
|
774
|
-
return GUMBO_INSERTION_MODE_INITIAL;
|
775
|
-
}
|
776
|
-
return (GumboInsertionMode) template_insertion_modes->data[(template_insertion_modes->length - 1)];
|
777
|
-
}
|
778
692
|
|
779
693
|
// http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#mathml-text-integration-point
|
780
694
|
static bool is_mathml_integration_point(const GumboNode* node) {
|
@@ -792,63 +706,6 @@ static bool is_html_integration_point(const GumboNode* node) {
|
|
792
706
|
"encoding", "application/xhtml+xml")));
|
793
707
|
}
|
794
708
|
|
795
|
-
|
796
|
-
// This represents a place to insert a node, consisting of a target parent and a
|
797
|
-
// child index within that parent. If the node should be inserted at the end of
|
798
|
-
// the parent's child, index will be -1.
|
799
|
-
typedef struct {
|
800
|
-
GumboNode* target;
|
801
|
-
int index;
|
802
|
-
} InsertionLocation;
|
803
|
-
|
804
|
-
InsertionLocation get_appropriate_insertion_location(GumboParser* parser, GumboNode* override_target) {
|
805
|
-
InsertionLocation retval = { override_target, -1 };
|
806
|
-
if (retval.target == NULL) {
|
807
|
-
// No override target; default to the current node, but special-case the
|
808
|
-
// root node since get_current_node() assumes the stack of open elements is
|
809
|
-
// non-empty.
|
810
|
-
retval.target = parser->_output->root != NULL ?
|
811
|
-
get_current_node(parser) : get_document_node(parser);
|
812
|
-
}
|
813
|
-
if (!parser->_parser_state->_foster_parent_insertions ||
|
814
|
-
!node_tag_in_set(retval.target, (gumbo_tagset) { TAG(TABLE), TAG(TBODY),
|
815
|
-
TAG(TFOOT), TAG(THEAD), TAG(TR) })) {
|
816
|
-
return retval;
|
817
|
-
}
|
818
|
-
|
819
|
-
// Foster-parenting case.
|
820
|
-
int last_template_index = -1;
|
821
|
-
int last_table_index = -1;
|
822
|
-
GumboVector* open_elements = &parser->_parser_state->_open_elements;
|
823
|
-
for (int i = 0; i < open_elements->length; ++i) {
|
824
|
-
if (node_html_tag_is(open_elements->data[i], GUMBO_TAG_TEMPLATE)) {
|
825
|
-
last_template_index = i;
|
826
|
-
}
|
827
|
-
if (node_html_tag_is(open_elements->data[i], GUMBO_TAG_TABLE)) {
|
828
|
-
last_table_index = i;
|
829
|
-
}
|
830
|
-
}
|
831
|
-
if (last_template_index != -1 &&
|
832
|
-
(last_table_index == -1 || last_template_index > last_table_index)) {
|
833
|
-
retval.target = open_elements->data[last_template_index];
|
834
|
-
return retval;
|
835
|
-
}
|
836
|
-
if (last_table_index == -1) {
|
837
|
-
retval.target = open_elements->data[0];
|
838
|
-
return retval;
|
839
|
-
}
|
840
|
-
GumboNode* last_table = open_elements->data[last_table_index];
|
841
|
-
if (last_table->parent != NULL) {
|
842
|
-
retval.target = last_table->parent;
|
843
|
-
retval.index = last_table->index_within_parent;
|
844
|
-
return retval;
|
845
|
-
}
|
846
|
-
|
847
|
-
retval.target = open_elements->data[last_table_index - 1];
|
848
|
-
return retval;
|
849
|
-
}
|
850
|
-
|
851
|
-
|
852
709
|
// Appends a node to the end of its parent, setting the "parent" and
|
853
710
|
// "index_within_parent" fields appropriately.
|
854
711
|
static void append_node(
|
@@ -856,7 +713,7 @@ static void append_node(
|
|
856
713
|
assert(node->parent == NULL);
|
857
714
|
assert(node->index_within_parent == -1);
|
858
715
|
GumboVector* children;
|
859
|
-
if (parent->type == GUMBO_NODE_ELEMENT
|
716
|
+
if (parent->type == GUMBO_NODE_ELEMENT) {
|
860
717
|
children = &parent->v.element.children;
|
861
718
|
} else {
|
862
719
|
assert(parent->type == GUMBO_NODE_DOCUMENT);
|
@@ -868,44 +725,66 @@ static void append_node(
|
|
868
725
|
assert(node->index_within_parent < children->length);
|
869
726
|
}
|
870
727
|
|
871
|
-
// Inserts a node at the specified
|
728
|
+
// Inserts a node at the specified index within its parent, updating the
|
872
729
|
// "parent" and "index_within_parent" fields of it and all its siblings.
|
873
|
-
// If the index of the location is -1, this calls append_node.
|
874
730
|
static void insert_node(
|
875
|
-
|
731
|
+
GumboParser* parser, GumboNode* parent, int index, GumboNode* node) {
|
876
732
|
assert(node->parent == NULL);
|
877
733
|
assert(node->index_within_parent == -1);
|
878
|
-
|
879
|
-
|
880
|
-
|
881
|
-
|
882
|
-
|
883
|
-
|
884
|
-
|
885
|
-
|
886
|
-
|
887
|
-
|
888
|
-
|
889
|
-
|
890
|
-
|
734
|
+
assert(parent->type == GUMBO_NODE_ELEMENT);
|
735
|
+
GumboVector* children = &parent->v.element.children;
|
736
|
+
assert(index >= 0);
|
737
|
+
assert(index < children->length);
|
738
|
+
node->parent = parent;
|
739
|
+
node->index_within_parent = index;
|
740
|
+
gumbo_vector_insert_at(parser, (void*) node, index, children);
|
741
|
+
assert(node->index_within_parent < children->length);
|
742
|
+
for (int i = index + 1; i < children->length; ++i) {
|
743
|
+
GumboNode* sibling = children->data[i];
|
744
|
+
sibling->index_within_parent = i;
|
745
|
+
assert(sibling->index_within_parent < children->length);
|
746
|
+
}
|
747
|
+
}
|
891
748
|
|
892
|
-
|
893
|
-
|
894
|
-
|
895
|
-
|
896
|
-
|
897
|
-
|
898
|
-
|
899
|
-
|
900
|
-
|
901
|
-
|
749
|
+
// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#foster-parenting
|
750
|
+
static void foster_parent_element(GumboParser* parser, GumboNode* node) {
|
751
|
+
GumboVector* open_elements = &parser->_parser_state->_open_elements;
|
752
|
+
assert(open_elements->length > 2);
|
753
|
+
|
754
|
+
node->parse_flags |= GUMBO_INSERTION_FOSTER_PARENTED;
|
755
|
+
GumboNode* foster_parent_element = open_elements->data[0];
|
756
|
+
assert(foster_parent_element->type == GUMBO_NODE_ELEMENT);
|
757
|
+
assert(node_html_tag_is(foster_parent_element, GUMBO_TAG_HTML));
|
758
|
+
for (int i = open_elements->length; --i > 1; ) {
|
759
|
+
GumboNode* table_element = open_elements->data[i];
|
760
|
+
if (node_html_tag_is(table_element, GUMBO_TAG_TABLE)) {
|
761
|
+
foster_parent_element = table_element->parent;
|
762
|
+
if (!foster_parent_element ||
|
763
|
+
foster_parent_element->type != GUMBO_NODE_ELEMENT) {
|
764
|
+
// Table has no parent; spec says it's possible if a script manipulated
|
765
|
+
// the DOM, although I don't think we have to worry about this case.
|
766
|
+
gumbo_debug("Table has no parent.\n");
|
767
|
+
foster_parent_element = open_elements->data[i - 1];
|
768
|
+
break;
|
769
|
+
}
|
770
|
+
assert(foster_parent_element->type == GUMBO_NODE_ELEMENT);
|
771
|
+
gumbo_debug("Found enclosing table (%x) at %d; parent=%s, index=%d.\n",
|
772
|
+
table_element, i, gumbo_normalized_tagname(
|
773
|
+
foster_parent_element->v.element.tag),
|
774
|
+
table_element->index_within_parent);
|
775
|
+
assert(foster_parent_element->v.element.children.data[
|
776
|
+
table_element->index_within_parent] == table_element);
|
777
|
+
insert_node(parser, foster_parent_element,
|
778
|
+
table_element->index_within_parent, node);
|
779
|
+
return;
|
902
780
|
}
|
903
|
-
} else {
|
904
|
-
append_node(parser, parent, node);
|
905
781
|
}
|
782
|
+
if (node->type == GUMBO_NODE_ELEMENT) {
|
783
|
+
gumbo_vector_add(parser, (void*) node, open_elements);
|
784
|
+
}
|
785
|
+
append_node(parser, foster_parent_element, node);
|
906
786
|
}
|
907
787
|
|
908
|
-
|
909
788
|
static void maybe_flush_text_node_buffer(GumboParser* parser) {
|
910
789
|
GumboParserState* state = parser->_parser_state;
|
911
790
|
TextNodeBufferState* buffer_state = &state->_text_node;
|
@@ -925,20 +804,20 @@ static void maybe_flush_text_node_buffer(GumboParser* parser) {
|
|
925
804
|
state->_current_token->original_text.data -
|
926
805
|
buffer_state->_start_original_text;
|
927
806
|
text_node_data->start_pos = buffer_state->_start_position;
|
928
|
-
|
929
|
-
|
930
|
-
|
931
|
-
|
932
|
-
InsertionLocation location = get_appropriate_insertion_location(parser, NULL);
|
933
|
-
if (location.target->type == GUMBO_NODE_DOCUMENT) {
|
934
|
-
// The DOM does not allow Document nodes to have Text children, so per the
|
935
|
-
// spec, they are dropped on the floor.
|
936
|
-
destroy_node(parser, text_node);
|
807
|
+
if (state->_foster_parent_insertions &&
|
808
|
+
node_tag_in_set(get_current_node(parser), (gumbo_tagset) { TAG(TABLE), TAG(TBODY), TAG(TFOOT),
|
809
|
+
TAG(THEAD), TAG(TR) })) {
|
810
|
+
foster_parent_element(parser, text_node);
|
937
811
|
} else {
|
938
|
-
|
812
|
+
append_node(
|
813
|
+
parser, parser->_output->root ?
|
814
|
+
get_current_node(parser) : parser->_output->document, text_node);
|
939
815
|
}
|
816
|
+
gumbo_debug("Flushing text node buffer of %.*s.\n",
|
817
|
+
(int) buffer_state->_buffer.length, buffer_state->_buffer.data);
|
940
818
|
|
941
|
-
|
819
|
+
gumbo_string_buffer_destroy(parser, &buffer_state->_buffer);
|
820
|
+
gumbo_string_buffer_init(parser, &buffer_state->_buffer);
|
942
821
|
buffer_state->_type = GUMBO_NODE_WHITESPACE;
|
943
822
|
assert(buffer_state->_buffer.length == 0);
|
944
823
|
}
|
@@ -965,7 +844,7 @@ static GumboNode* pop_current_node(GumboParser* parser) {
|
|
965
844
|
assert(state->_open_elements.length == 0);
|
966
845
|
return NULL;
|
967
846
|
}
|
968
|
-
assert(current_node->type == GUMBO_NODE_ELEMENT
|
847
|
+
assert(current_node->type == GUMBO_NODE_ELEMENT);
|
969
848
|
bool is_closed_body_or_html_tag =
|
970
849
|
(node_html_tag_is(current_node, GUMBO_TAG_BODY) && state->_closed_body_tag) ||
|
971
850
|
(node_html_tag_is(current_node, GUMBO_TAG_HTML) && state->_closed_html_tag);
|
@@ -994,14 +873,14 @@ static void append_comment_node(
|
|
994
873
|
|
995
874
|
// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#clear-the-stack-back-to-a-table-row-context
|
996
875
|
static void clear_stack_to_table_row_context(GumboParser* parser) {
|
997
|
-
while (!node_tag_in_set(get_current_node(parser), (gumbo_tagset) { TAG(HTML), TAG(TR)
|
876
|
+
while (!node_tag_in_set(get_current_node(parser), (gumbo_tagset) { TAG(HTML), TAG(TR) })) {
|
998
877
|
pop_current_node(parser);
|
999
878
|
}
|
1000
879
|
}
|
1001
880
|
|
1002
881
|
// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#clear-the-stack-back-to-a-table-context
|
1003
882
|
static void clear_stack_to_table_context(GumboParser* parser) {
|
1004
|
-
while (!node_tag_in_set(get_current_node(parser), (gumbo_tagset) { TAG(HTML), TAG(TABLE)
|
883
|
+
while (!node_tag_in_set(get_current_node(parser), (gumbo_tagset) { TAG(HTML), TAG(TABLE) } )) {
|
1005
884
|
pop_current_node(parser);
|
1006
885
|
}
|
1007
886
|
}
|
@@ -1009,7 +888,7 @@ static void clear_stack_to_table_context(GumboParser* parser) {
|
|
1009
888
|
// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#clear-the-stack-back-to-a-table-body-context
|
1010
889
|
void clear_stack_to_table_body_context(GumboParser* parser) {
|
1011
890
|
while (!node_tag_in_set(get_current_node(parser), (gumbo_tagset) { TAG(HTML), TAG(TBODY),
|
1012
|
-
TAG(TFOOT), TAG(THEAD)
|
891
|
+
TAG(TFOOT), TAG(THEAD) })) {
|
1013
892
|
pop_current_node(parser);
|
1014
893
|
}
|
1015
894
|
}
|
@@ -1024,8 +903,7 @@ static GumboNode* create_element(GumboParser* parser, GumboTag tag) {
|
|
1024
903
|
element->tag_namespace = GUMBO_NAMESPACE_HTML;
|
1025
904
|
element->original_tag = kGumboEmptyString;
|
1026
905
|
element->original_end_tag = kGumboEmptyString;
|
1027
|
-
element->start_pos =
|
1028
|
-
parser->_parser_state->_current_token->position : kGumboEmptySourcePosition;
|
906
|
+
element->start_pos = parser->_parser_state->_current_token->position;
|
1029
907
|
element->end_pos = kGumboEmptySourcePosition;
|
1030
908
|
return node;
|
1031
909
|
}
|
@@ -1036,12 +914,7 @@ static GumboNode* create_element_from_token(
|
|
1036
914
|
assert(token->type == GUMBO_TOKEN_START_TAG);
|
1037
915
|
GumboTokenStartTag* start_tag = &token->v.start_tag;
|
1038
916
|
|
1039
|
-
|
1040
|
-
tag_namespace == GUMBO_NAMESPACE_HTML &&
|
1041
|
-
start_tag->tag == GUMBO_TAG_TEMPLATE)
|
1042
|
-
? GUMBO_NODE_TEMPLATE : GUMBO_NODE_ELEMENT;
|
1043
|
-
|
1044
|
-
GumboNode* node = create_node(parser, type);
|
917
|
+
GumboNode* node = create_node(parser, GUMBO_NODE_ELEMENT);
|
1045
918
|
GumboElement* element = &node->v.element;
|
1046
919
|
gumbo_vector_init(parser, 1, &element->children);
|
1047
920
|
element->attributes = start_tag->attributes;
|
@@ -1078,9 +951,20 @@ static void insert_element(GumboParser* parser, GumboNode* node,
|
|
1078
951
|
if (!is_reconstructing_formatting_elements) {
|
1079
952
|
maybe_flush_text_node_buffer(parser);
|
1080
953
|
}
|
1081
|
-
|
1082
|
-
|
1083
|
-
|
954
|
+
if (state->_foster_parent_insertions &&
|
955
|
+
node_tag_in_set(get_current_node(parser), (gumbo_tagset) { TAG(TABLE), TAG(TBODY), TAG(TFOOT),
|
956
|
+
TAG(THEAD), TAG(TR) } )) {
|
957
|
+
foster_parent_element(parser, node);
|
958
|
+
gumbo_vector_add(parser, (void*) node, &state->_open_elements);
|
959
|
+
return;
|
960
|
+
}
|
961
|
+
|
962
|
+
// This is called to insert the root HTML element, but get_current_node
|
963
|
+
// assumes the stack of open elements is non-empty, so we need special
|
964
|
+
// handling for this case.
|
965
|
+
append_node(
|
966
|
+
parser, parser->_output->root ?
|
967
|
+
get_current_node(parser) : parser->_output->document, node);
|
1084
968
|
gumbo_vector_add(parser, (void*) node, &state->_open_elements);
|
1085
969
|
}
|
1086
970
|
|
@@ -1253,7 +1137,7 @@ static bool is_open_element(GumboParser* parser, const GumboNode* node) {
|
|
1253
1137
|
// values are fresh copies.
|
1254
1138
|
GumboNode* clone_node(
|
1255
1139
|
GumboParser* parser, const GumboNode* node, GumboParseFlags reason) {
|
1256
|
-
assert(node->type == GUMBO_NODE_ELEMENT
|
1140
|
+
assert(node->type == GUMBO_NODE_ELEMENT);
|
1257
1141
|
GumboNode* new_node = gumbo_parser_allocate(parser, sizeof(GumboNode));
|
1258
1142
|
*new_node = *node;
|
1259
1143
|
new_node->parent = NULL;
|
@@ -1323,10 +1207,7 @@ static void reconstruct_active_formatting_elements(GumboParser* parser) {
|
|
1323
1207
|
GumboNode* clone = clone_node(
|
1324
1208
|
parser, element, GUMBO_INSERTION_RECONSTRUCTED_FORMATTING_ELEMENT);
|
1325
1209
|
// Step 9.
|
1326
|
-
|
1327
|
-
insert_node(parser, clone, location);
|
1328
|
-
gumbo_vector_add(parser, (void*) clone, &parser->_parser_state->_open_elements);
|
1329
|
-
|
1210
|
+
insert_element(parser, clone, true);
|
1330
1211
|
// Step 10.
|
1331
1212
|
elements->data[i] = clone;
|
1332
1213
|
gumbo_debug("Reconstructed %s element at %d.\n",
|
@@ -1380,40 +1261,37 @@ static GumboQuirksModeEnum compute_quirks_mode(
|
|
1380
1261
|
// names. For example, "has an element in list scope" looks for an element of
|
1381
1262
|
// the given qualified name within the nearest enclosing <ol> or <ul>, along
|
1382
1263
|
// with a bunch of generic element types that serve to "firewall" their content
|
1383
|
-
// from the rest of the document.
|
1384
|
-
|
1385
|
-
static bool has_an_element_in_specific_scope(GumboParser* parser,
|
1386
|
-
int expected_size, const GumboTag *expected, bool negate, const gumbo_tagset tags) {
|
1264
|
+
// from the rest of the document.
|
1265
|
+
static bool has_an_element_in_specific_scope(GumboParser* parser, gumbo_tagset expected, bool negate, const gumbo_tagset tags) {
|
1387
1266
|
GumboVector* open_elements = &parser->_parser_state->_open_elements;
|
1267
|
+
bool result = false;
|
1388
1268
|
for (int i = open_elements->length; --i >= 0; ) {
|
1389
1269
|
const GumboNode* node = open_elements->data[i];
|
1390
|
-
if (node->type != GUMBO_NODE_ELEMENT
|
1270
|
+
if (node->type != GUMBO_NODE_ELEMENT) {
|
1391
1271
|
continue;
|
1392
|
-
|
1393
|
-
GumboTag node_tag = node->v.element.tag;
|
1394
|
-
GumboNamespaceEnum node_ns = node->v.element.tag_namespace;
|
1395
|
-
for (int j = 0; j < expected_size; ++j) {
|
1396
|
-
if (node_tag == expected[j] && node_ns == GUMBO_NAMESPACE_HTML)
|
1397
|
-
return true;
|
1398
1272
|
}
|
1399
|
-
|
1400
|
-
|
1401
|
-
|
1402
|
-
|
1273
|
+
if (TAGSET_INCLUDES(expected, node->v.element.tag_namespace, node->v.element.tag)) {
|
1274
|
+
return true;
|
1275
|
+
}
|
1276
|
+
bool found_qualname = false;
|
1277
|
+
if (TAGSET_INCLUDES(tags, node->v.element.tag_namespace, node->v.element.tag)) {
|
1278
|
+
found_qualname = true;
|
1279
|
+
}
|
1280
|
+
if (negate != found_qualname) {
|
1281
|
+
result = false;
|
1282
|
+
return result;
|
1283
|
+
}
|
1403
1284
|
}
|
1404
|
-
return
|
1405
|
-
}
|
1406
|
-
|
1407
|
-
// Checks for the presence of an open element of the specified tag type.
|
1408
|
-
static bool has_open_element(GumboParser* parser, GumboTag tag) {
|
1409
|
-
return has_an_element_in_specific_scope(parser, 1, &tag, false, (gumbo_tagset) { TAG(HTML) } );
|
1285
|
+
return result;
|
1410
1286
|
}
|
1411
1287
|
|
1412
1288
|
// http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#has-an-element-in-scope
|
1413
1289
|
static bool has_an_element_in_scope(GumboParser* parser, GumboTag tag) {
|
1414
|
-
|
1290
|
+
gumbo_tagset qualset = {0};
|
1291
|
+
qualset[(int) tag] = (1 << (int) GUMBO_NAMESPACE_HTML);
|
1292
|
+
return has_an_element_in_specific_scope(parser, qualset, false, (gumbo_tagset) { TAG(APPLET),
|
1415
1293
|
TAG(CAPTION), TAG(HTML), TAG(TABLE), TAG(TD), TAG(TH), TAG(MARQUEE),
|
1416
|
-
TAG(OBJECT),
|
1294
|
+
TAG(OBJECT), TAG_MATHML(MI), TAG_MATHML(MO), TAG_MATHML(MN),
|
1417
1295
|
TAG_MATHML(MS), TAG_MATHML(MTEXT), TAG_MATHML(ANNOTATION_XML),
|
1418
1296
|
TAG_SVG(FOREIGNOBJECT), TAG_SVG(DESC), TAG_SVG(TITLE) });
|
1419
1297
|
}
|
@@ -1431,11 +1309,11 @@ static bool has_node_in_scope(GumboParser* parser, const GumboNode* node) {
|
|
1431
1309
|
if (current == node) {
|
1432
1310
|
return true;
|
1433
1311
|
}
|
1434
|
-
if (current->type != GUMBO_NODE_ELEMENT
|
1312
|
+
if (current->type != GUMBO_NODE_ELEMENT) {
|
1435
1313
|
continue;
|
1436
1314
|
}
|
1437
1315
|
if (node_tag_in_set(current, (gumbo_tagset) { TAG(APPLET), TAG(CAPTION), TAG(HTML),
|
1438
|
-
TAG(TABLE), TAG(TD), TAG(TH), TAG(MARQUEE), TAG(OBJECT),
|
1316
|
+
TAG(TABLE), TAG(TD), TAG(TH), TAG(MARQUEE), TAG(OBJECT),
|
1439
1317
|
TAG_MATHML(MI), TAG_MATHML(MO), TAG_MATHML(MN), TAG_MATHML(MS),
|
1440
1318
|
TAG_MATHML(MTEXT), TAG_MATHML(ANNOTATION_XML), TAG_SVG(FOREIGNOBJECT),
|
1441
1319
|
TAG_SVG(DESC), TAG_SVG(TITLE) } )) {
|
@@ -1448,19 +1326,21 @@ static bool has_node_in_scope(GumboParser* parser, const GumboNode* node) {
|
|
1448
1326
|
|
1449
1327
|
// Like has_an_element_in_scope, but restricts the expected qualified name to a
|
1450
1328
|
// range of possible qualified names instead of just a single one.
|
1451
|
-
static bool has_an_element_in_scope_with_tagname(GumboParser* parser,
|
1452
|
-
return has_an_element_in_specific_scope(parser,
|
1453
|
-
|
1454
|
-
|
1455
|
-
|
1456
|
-
|
1329
|
+
static bool has_an_element_in_scope_with_tagname(GumboParser* parser, gumbo_tagset qualset) {
|
1330
|
+
return has_an_element_in_specific_scope(parser, qualset, false, (gumbo_tagset) { TAG(APPLET),
|
1331
|
+
TAG(CAPTION), TAG(HTML), TAG(TABLE), TAG(TD), TAG(TH), TAG(MARQUEE),
|
1332
|
+
TAG(OBJECT), TAG_MATHML(MI), TAG_MATHML(MO), TAG_MATHML(MN),
|
1333
|
+
TAG_MATHML(MS), TAG_MATHML(MTEXT), TAG_MATHML(ANNOTATION_XML),
|
1334
|
+
TAG_SVG(FOREIGNOBJECT), TAG_SVG(DESC), TAG_SVG(TITLE) });
|
1457
1335
|
}
|
1458
1336
|
|
1459
1337
|
// http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#has-an-element-in-list-item-scope
|
1460
1338
|
static bool has_an_element_in_list_scope(GumboParser* parser, GumboTag tag) {
|
1461
|
-
|
1339
|
+
gumbo_tagset qualset = {0};
|
1340
|
+
qualset[(int)tag] = (1 << (int)(GUMBO_NAMESPACE_HTML));
|
1341
|
+
return has_an_element_in_specific_scope(parser, qualset, false, (gumbo_tagset) { TAG(APPLET),
|
1462
1342
|
TAG(CAPTION), TAG(HTML), TAG(TABLE), TAG(TD), TAG(TH), TAG(MARQUEE),
|
1463
|
-
TAG(OBJECT),
|
1343
|
+
TAG(OBJECT), TAG_MATHML(MI), TAG_MATHML(MO), TAG_MATHML(MN),
|
1464
1344
|
TAG_MATHML(MS), TAG_MATHML(MTEXT), TAG_MATHML(ANNOTATION_XML),
|
1465
1345
|
TAG_SVG(FOREIGNOBJECT), TAG_SVG(DESC), TAG_SVG(TITLE), TAG(OL),
|
1466
1346
|
TAG(UL) });
|
@@ -1468,22 +1348,27 @@ static bool has_an_element_in_list_scope(GumboParser* parser, GumboTag tag) {
|
|
1468
1348
|
|
1469
1349
|
// http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#has-an-element-in-button-scope
|
1470
1350
|
static bool has_an_element_in_button_scope(GumboParser* parser, GumboTag tag) {
|
1471
|
-
|
1351
|
+
gumbo_tagset qualset = {0};
|
1352
|
+
qualset[(int) tag] = (1 << (int)(GUMBO_NAMESPACE_HTML));
|
1353
|
+
return has_an_element_in_specific_scope(parser, qualset, false, (gumbo_tagset) { TAG(APPLET),
|
1472
1354
|
TAG(CAPTION), TAG(HTML), TAG(TABLE), TAG(TD), TAG(TH), TAG(MARQUEE),
|
1473
|
-
TAG(OBJECT),
|
1355
|
+
TAG(OBJECT), TAG_MATHML(MI), TAG_MATHML(MO), TAG_MATHML(MN),
|
1474
1356
|
TAG_MATHML(MS), TAG_MATHML(MTEXT), TAG_MATHML(ANNOTATION_XML),
|
1475
1357
|
TAG_SVG(FOREIGNOBJECT), TAG_SVG(DESC), TAG_SVG(TITLE), TAG(BUTTON) });
|
1476
1358
|
}
|
1477
1359
|
|
1478
1360
|
// http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#has-an-element-in-table-scope
|
1479
1361
|
static bool has_an_element_in_table_scope(GumboParser* parser, GumboTag tag) {
|
1480
|
-
|
1481
|
-
|
1362
|
+
gumbo_tagset qualset = {0};
|
1363
|
+
qualset[(int) tag] = (1 << (int)(GUMBO_NAMESPACE_HTML));
|
1364
|
+
return has_an_element_in_specific_scope(parser, qualset, false, (gumbo_tagset) { TAG(HTML), TAG(TABLE) });
|
1482
1365
|
}
|
1483
1366
|
|
1484
1367
|
// http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#has-an-element-in-select-scope
|
1485
1368
|
static bool has_an_element_in_select_scope(GumboParser* parser, GumboTag tag) {
|
1486
|
-
|
1369
|
+
gumbo_tagset qualset = {0};
|
1370
|
+
qualset[(int) tag] = (1 << (int)(GUMBO_NAMESPACE_HTML));
|
1371
|
+
return has_an_element_in_specific_scope(parser, qualset, true, (gumbo_tagset) { TAG(OPTGROUP), TAG(OPTION) });
|
1487
1372
|
}
|
1488
1373
|
|
1489
1374
|
// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#generate-implied-end-tags
|
@@ -1491,24 +1376,12 @@ static bool has_an_element_in_select_scope(GumboParser* parser, GumboTag tag) {
|
|
1491
1376
|
// Pass GUMBO_TAG_LAST to not exclude any of them.
|
1492
1377
|
static void generate_implied_end_tags(GumboParser* parser, GumboTag exception) {
|
1493
1378
|
for (;
|
1494
|
-
node_tag_in_set(get_current_node(parser), (gumbo_tagset) { TAG(DD),
|
1495
|
-
|
1496
|
-
TAG(RT), TAG(RTC) }) &&
|
1379
|
+
node_tag_in_set(get_current_node(parser), (gumbo_tagset) { TAG(DD), TAG(DT),
|
1380
|
+
TAG(LI), TAG(OPTION), TAG(OPTGROUP), TAG(P), TAG(RP), TAG(RT) }) &&
|
1497
1381
|
!node_html_tag_is(get_current_node(parser), exception);
|
1498
1382
|
pop_current_node(parser));
|
1499
1383
|
}
|
1500
1384
|
|
1501
|
-
// This is the "generate all implied end tags thoroughly" clause of the spec.
|
1502
|
-
// https://html.spec.whatwg.org/multipage/syntax.html#closing-elements-that-have-implied-end-tags
|
1503
|
-
static void generate_all_implied_end_tags_thoroughly(GumboParser* parser) {
|
1504
|
-
for (;
|
1505
|
-
node_tag_in_set(get_current_node(parser), (gumbo_tagset) { TAG(CAPTION),
|
1506
|
-
TAG(COLGROUP), TAG(DD), TAG(DT), TAG(LI), TAG(OPTION), TAG(OPTGROUP),
|
1507
|
-
TAG(P), TAG(RP), TAG(RT), TAG(RTC), TAG(TBODY), TAG(TD),
|
1508
|
-
TAG(TFOOT), TAG(TH), TAG(HEAD), TAG(TR) });
|
1509
|
-
pop_current_node(parser));
|
1510
|
-
}
|
1511
|
-
|
1512
1385
|
// This factors out the clauses relating to "act as if an end tag token with tag
|
1513
1386
|
// name "table" had been seen. Returns true if there's a table element in table
|
1514
1387
|
// scope which was successfully closed, false if not and the token should be
|
@@ -1573,7 +1446,7 @@ static void close_current_select(GumboParser* parser) {
|
|
1573
1446
|
// The list of nodes in the "special" category:
|
1574
1447
|
// http://www.whatwg.org/specs/web-apps/current-work/complete/parsing.html#special
|
1575
1448
|
static bool is_special_node(const GumboNode* node) {
|
1576
|
-
assert(node->type == GUMBO_NODE_ELEMENT
|
1449
|
+
assert(node->type == GUMBO_NODE_ELEMENT);
|
1577
1450
|
return node_tag_in_set(node, (gumbo_tagset) { TAG(ADDRESS), TAG(APPLET), TAG(AREA),
|
1578
1451
|
TAG(ARTICLE), TAG(ASIDE), TAG(BASE), TAG(BASEFONT), TAG(BGSOUND), TAG(BLOCKQUOTE),
|
1579
1452
|
TAG(BODY), TAG(BR), TAG(BUTTON), TAG(CAPTION), TAG(CENTER), TAG(COL),
|
@@ -1585,8 +1458,8 @@ static bool is_special_node(const GumboNode* node) {
|
|
1585
1458
|
TAG(LISTING), TAG(MARQUEE), TAG(MENU), TAG(META), TAG(NAV), TAG(NOEMBED),
|
1586
1459
|
TAG(NOFRAMES), TAG(NOSCRIPT), TAG(OBJECT), TAG(OL), TAG(P), TAG(PARAM),
|
1587
1460
|
TAG(PLAINTEXT), TAG(PRE), TAG(SCRIPT), TAG(SECTION), TAG(SELECT), TAG(STYLE),
|
1588
|
-
TAG(SUMMARY), TAG(TABLE), TAG(TBODY), TAG(TD), TAG(
|
1589
|
-
TAG(
|
1461
|
+
TAG(SUMMARY), TAG(TABLE), TAG(TBODY), TAG(TD), TAG(TEXTAREA), TAG(TFOOT),
|
1462
|
+
TAG(TH), TAG(THEAD), TAG(TITLE), TAG(TR), TAG(UL), TAG(WBR), TAG(XMP),
|
1590
1463
|
|
1591
1464
|
TAG_MATHML(MI), TAG_MATHML(MO), TAG_MATHML(MN), TAG_MATHML(MS),
|
1592
1465
|
TAG_MATHML(MTEXT), TAG_MATHML(ANNOTATION_XML),
|
@@ -1796,20 +1669,13 @@ static void remove_from_parent(GumboParser* parser, GumboNode* node) {
|
|
1796
1669
|
|
1797
1670
|
// http://www.whatwg.org/specs/web-apps/current-work/multipage/the-end.html#an-introduction-to-error-handling-and-strange-cases-in-the-parser
|
1798
1671
|
// Also described in the "in body" handling for end formatting tags.
|
1799
|
-
static bool adoption_agency_algorithm(
|
1672
|
+
static bool adoption_agency_algorithm(
|
1673
|
+
GumboParser* parser, GumboToken* token, GumboTag closing_tag) {
|
1800
1674
|
GumboParserState* state = parser->_parser_state;
|
1801
1675
|
gumbo_debug("Entering adoption agency algorithm.\n");
|
1802
|
-
//
|
1803
|
-
GumboNode* current_node = get_current_node(parser);
|
1804
|
-
if (current_node->v.element.tag_namespace == GUMBO_NAMESPACE_HTML &&
|
1805
|
-
current_node->v.element.tag == subject &&
|
1806
|
-
gumbo_vector_index_of(&state->_active_formatting_elements, current_node) == -1) {
|
1807
|
-
pop_current_node(parser);
|
1808
|
-
return false;
|
1809
|
-
}
|
1810
|
-
// Steps 2-4 & 20:
|
1676
|
+
// Steps 1-3 & 16:
|
1811
1677
|
for (int i = 0; i < 8; ++i) {
|
1812
|
-
// Step
|
1678
|
+
// Step 4.
|
1813
1679
|
GumboNode* formatting_node = NULL;
|
1814
1680
|
int formatting_node_in_open_elements = -1;
|
1815
1681
|
for (int j = state->_active_formatting_elements.length; --j >= 0; ) {
|
@@ -1819,13 +1685,13 @@ static bool adoption_agency_algorithm(GumboParser* parser, GumboToken* token, Gu
|
|
1819
1685
|
// Last scope marker; abort the algorithm.
|
1820
1686
|
return false;
|
1821
1687
|
}
|
1822
|
-
if (
|
1688
|
+
if (current_node->type == GUMBO_NODE_ELEMENT && current_node->v.element.tag == closing_tag) {
|
1823
1689
|
// Found it.
|
1824
1690
|
formatting_node = current_node;
|
1825
1691
|
formatting_node_in_open_elements = gumbo_vector_index_of(
|
1826
|
-
|
1692
|
+
&state->_open_elements, formatting_node);
|
1827
1693
|
gumbo_debug("Formatting element of tag %s at %d.\n",
|
1828
|
-
gumbo_normalized_tagname(
|
1694
|
+
gumbo_normalized_tagname(closing_tag),
|
1829
1695
|
formatting_node_in_open_elements);
|
1830
1696
|
break;
|
1831
1697
|
}
|
@@ -1838,23 +1704,18 @@ static bool adoption_agency_algorithm(GumboParser* parser, GumboToken* token, Gu
|
|
1838
1704
|
return false;
|
1839
1705
|
}
|
1840
1706
|
|
1841
|
-
// Step 6
|
1842
1707
|
if (formatting_node_in_open_elements == -1) {
|
1843
1708
|
gumbo_debug("Formatting node not on stack of open elements.\n");
|
1844
|
-
parser_add_parse_error(parser, token);
|
1845
1709
|
gumbo_vector_remove(parser, formatting_node,
|
1846
1710
|
&state->_active_formatting_elements);
|
1847
1711
|
return false;
|
1848
1712
|
}
|
1849
1713
|
|
1850
|
-
// Step 7
|
1851
1714
|
if (!has_an_element_in_scope(parser, formatting_node->v.element.tag)) {
|
1852
1715
|
parser_add_parse_error(parser, token);
|
1853
1716
|
gumbo_debug("Element not in scope.\n");
|
1854
1717
|
return false;
|
1855
1718
|
}
|
1856
|
-
|
1857
|
-
// Step 8
|
1858
1719
|
if (formatting_node != get_current_node(parser)) {
|
1859
1720
|
parser_add_parse_error(parser, token); // But continue onwards.
|
1860
1721
|
}
|
@@ -1862,20 +1723,20 @@ static bool adoption_agency_algorithm(GumboParser* parser, GumboToken* token, Gu
|
|
1862
1723
|
assert(!node_html_tag_is(formatting_node, GUMBO_TAG_HTML));
|
1863
1724
|
assert(!node_html_tag_is(formatting_node, GUMBO_TAG_BODY));
|
1864
1725
|
|
1865
|
-
// Step
|
1726
|
+
// Step 5 & 6.
|
1866
1727
|
GumboNode* furthest_block = NULL;
|
1867
1728
|
for (int j = formatting_node_in_open_elements;
|
1868
1729
|
j < state->_open_elements.length; ++j) {
|
1869
1730
|
assert(j > 0);
|
1870
1731
|
GumboNode* current = state->_open_elements.data[j];
|
1871
1732
|
if (is_special_node(current)) {
|
1872
|
-
// Step
|
1733
|
+
// Step 5.
|
1873
1734
|
furthest_block = current;
|
1874
1735
|
break;
|
1875
1736
|
}
|
1876
1737
|
}
|
1877
1738
|
if (!furthest_block) {
|
1878
|
-
// Step
|
1739
|
+
// Step 6.
|
1879
1740
|
while (get_current_node(parser) != formatting_node) {
|
1880
1741
|
pop_current_node(parser);
|
1881
1742
|
}
|
@@ -1888,35 +1749,32 @@ static bool adoption_agency_algorithm(GumboParser* parser, GumboToken* token, Gu
|
|
1888
1749
|
assert(!node_html_tag_is(furthest_block, GUMBO_TAG_HTML));
|
1889
1750
|
assert(furthest_block);
|
1890
1751
|
|
1891
|
-
// Step
|
1752
|
+
// Step 7.
|
1892
1753
|
// Elements may be moved and reparented by this algorithm, so
|
1893
1754
|
// common_ancestor is not necessarily the same as formatting_node->parent.
|
1894
1755
|
GumboNode* common_ancestor =
|
1895
|
-
|
1896
|
-
|
1756
|
+
state->_open_elements.data[gumbo_vector_index_of(
|
1757
|
+
&state->_open_elements, formatting_node) - 1];
|
1897
1758
|
gumbo_debug("Common ancestor tag = %s, furthest block tag = %s.\n",
|
1898
1759
|
gumbo_normalized_tagname(common_ancestor->v.element.tag),
|
1899
1760
|
gumbo_normalized_tagname(furthest_block->v.element.tag));
|
1900
1761
|
|
1901
|
-
// Step
|
1762
|
+
// Step 8.
|
1902
1763
|
int bookmark = gumbo_vector_index_of(
|
1903
|
-
|
1904
|
-
|
1905
|
-
// Step 13.
|
1764
|
+
&state->_active_formatting_elements, formatting_node);;
|
1765
|
+
// Step 9.
|
1906
1766
|
GumboNode* node = furthest_block;
|
1907
1767
|
GumboNode* last_node = furthest_block;
|
1908
1768
|
// Must be stored explicitly, in case node is removed from the stack of open
|
1909
1769
|
// elements, to handle step 9.4.
|
1910
1770
|
int saved_node_index = gumbo_vector_index_of(&state->_open_elements, node);
|
1911
1771
|
assert(saved_node_index > 0);
|
1912
|
-
// Step
|
1913
|
-
for (int j = 0
|
1914
|
-
// Step
|
1915
|
-
++j;
|
1916
|
-
// Step 13.3.
|
1772
|
+
// Step 9.1-9.3 & 9.11.
|
1773
|
+
for (int j = 0; j < 3; ++j) {
|
1774
|
+
// Step 9.4.
|
1917
1775
|
int node_index = gumbo_vector_index_of(&state->_open_elements, node);
|
1918
1776
|
gumbo_debug(
|
1919
|
-
|
1777
|
+
"Current index: %d, last index: %d.\n", node_index, saved_node_index);
|
1920
1778
|
if (node_index == -1) {
|
1921
1779
|
node_index = saved_node_index;
|
1922
1780
|
}
|
@@ -1925,78 +1783,61 @@ static bool adoption_agency_algorithm(GumboParser* parser, GumboToken* token, Gu
|
|
1925
1783
|
assert(node_index < state->_open_elements.capacity);
|
1926
1784
|
node = state->_open_elements.data[node_index];
|
1927
1785
|
assert(node->parent);
|
1928
|
-
|
1929
|
-
|
1930
|
-
|
1931
|
-
}
|
1932
|
-
int formatting_index =
|
1933
|
-
gumbo_vector_index_of(&state->_active_formatting_elements, node);
|
1934
|
-
if (j > 3 && formatting_index != -1) {
|
1935
|
-
// Step 13.5.
|
1936
|
-
gumbo_debug(
|
1937
|
-
"Removing formatting element at %d.\n", formatting_index);
|
1938
|
-
gumbo_vector_remove_at(
|
1939
|
-
parser,
|
1940
|
-
formatting_index,
|
1941
|
-
&state->_active_formatting_elements);
|
1942
|
-
// Removing the element shifts all indices over by one, so we may need
|
1943
|
-
// to move the bookmark.
|
1944
|
-
if (formatting_index < bookmark) {
|
1945
|
-
--bookmark;
|
1946
|
-
gumbo_debug("Moving bookmark to %d.\n", bookmark);
|
1947
|
-
}
|
1948
|
-
continue;
|
1949
|
-
}
|
1950
|
-
if (formatting_index == -1) {
|
1951
|
-
// Step 13.6.
|
1786
|
+
// Step 9.5.
|
1787
|
+
if (gumbo_vector_index_of(
|
1788
|
+
&state->_active_formatting_elements, node) == -1) {
|
1952
1789
|
gumbo_vector_remove_at(parser, node_index, &state->_open_elements);
|
1953
1790
|
continue;
|
1791
|
+
} else if (node == formatting_node) {
|
1792
|
+
// Step 9.6.
|
1793
|
+
break;
|
1954
1794
|
}
|
1955
|
-
// Step
|
1956
|
-
|
1957
|
-
|
1795
|
+
// Step 9.7.
|
1796
|
+
int formatting_index = gumbo_vector_index_of(
|
1797
|
+
&state->_active_formatting_elements, node);
|
1958
1798
|
node = clone_node(parser, node, GUMBO_INSERTION_ADOPTION_AGENCY_CLONED);
|
1959
|
-
assert(formatting_index >= 0);
|
1960
1799
|
state->_active_formatting_elements.data[formatting_index] = node;
|
1961
|
-
assert(node_index >= 0);
|
1962
1800
|
state->_open_elements.data[node_index] = node;
|
1963
|
-
// Step
|
1801
|
+
// Step 9.8.
|
1964
1802
|
if (last_node == furthest_block) {
|
1965
1803
|
bookmark = formatting_index + 1;
|
1966
|
-
gumbo_debug("Bookmark moved to %d.\n", bookmark);
|
1967
1804
|
assert(bookmark <= state->_active_formatting_elements.length);
|
1968
1805
|
}
|
1969
|
-
// Step
|
1806
|
+
// Step 9.9.
|
1970
1807
|
last_node->parse_flags |= GUMBO_INSERTION_ADOPTION_AGENCY_MOVED;
|
1971
1808
|
remove_from_parent(parser, last_node);
|
1972
1809
|
append_node(parser, node, last_node);
|
1973
|
-
// Step
|
1810
|
+
// Step 9.10.
|
1974
1811
|
last_node = node;
|
1975
|
-
}
|
1812
|
+
}
|
1976
1813
|
|
1977
|
-
// Step
|
1814
|
+
// Step 10.
|
1978
1815
|
gumbo_debug("Removing %s node from parent ",
|
1979
1816
|
gumbo_normalized_tagname(last_node->v.element.tag));
|
1980
1817
|
remove_from_parent(parser, last_node);
|
1981
1818
|
last_node->parse_flags |= GUMBO_INSERTION_ADOPTION_AGENCY_MOVED;
|
1982
|
-
|
1983
|
-
|
1984
|
-
|
1985
|
-
|
1986
|
-
|
1819
|
+
if (node_tag_in_set(common_ancestor, (gumbo_tagset) { TAG(TABLE), TAG(TBODY),
|
1820
|
+
TAG(TFOOT), TAG(THEAD), TAG(TR) })) {
|
1821
|
+
gumbo_debug("and foster-parenting it.\n");
|
1822
|
+
foster_parent_element(parser, last_node);
|
1823
|
+
} else {
|
1824
|
+
gumbo_debug("and inserting it into %s.\n",
|
1825
|
+
gumbo_normalized_tagname(common_ancestor->v.element.tag));
|
1826
|
+
append_node(parser, common_ancestor, last_node);
|
1827
|
+
}
|
1987
1828
|
|
1988
|
-
// Step
|
1829
|
+
// Step 11.
|
1989
1830
|
GumboNode* new_formatting_node = clone_node(
|
1990
|
-
|
1831
|
+
parser, formatting_node, GUMBO_INSERTION_ADOPTION_AGENCY_CLONED);
|
1991
1832
|
formatting_node->parse_flags |= GUMBO_INSERTION_IMPLICIT_END_TAG;
|
1992
1833
|
|
1993
|
-
// Step
|
1834
|
+
// Step 12. Instead of appending nodes one-by-one, we swap the children
|
1994
1835
|
// vector of furthest_block with the empty children of new_formatting_node,
|
1995
1836
|
// reducing memory traffic and allocations. We still have to reset their
|
1996
1837
|
// parent pointers, though.
|
1997
1838
|
GumboVector temp = new_formatting_node->v.element.children;
|
1998
1839
|
new_formatting_node->v.element.children =
|
1999
|
-
|
1840
|
+
furthest_block->v.element.children;
|
2000
1841
|
furthest_block->v.element.children = temp;
|
2001
1842
|
|
2002
1843
|
temp = new_formatting_node->v.element.children;
|
@@ -2005,39 +1846,36 @@ static bool adoption_agency_algorithm(GumboParser* parser, GumboToken* token, Gu
|
|
2005
1846
|
child->parent = new_formatting_node;
|
2006
1847
|
}
|
2007
1848
|
|
2008
|
-
// Step
|
1849
|
+
// Step 13.
|
2009
1850
|
append_node(parser, furthest_block, new_formatting_node);
|
2010
1851
|
|
2011
|
-
// Step
|
1852
|
+
// Step 14.
|
2012
1853
|
// If the formatting node was before the bookmark, it may shift over all
|
2013
1854
|
// indices after it, so we need to explicitly find the index and possibly
|
2014
1855
|
// adjust the bookmark.
|
2015
1856
|
int formatting_node_index = gumbo_vector_index_of(
|
2016
|
-
|
1857
|
+
&state->_active_formatting_elements, formatting_node);
|
2017
1858
|
assert(formatting_node_index != -1);
|
2018
1859
|
if (formatting_node_index < bookmark) {
|
2019
|
-
gumbo_debug(
|
2020
|
-
"Formatting node at %d is before bookmark at %d; decrementing.\n",
|
2021
|
-
formatting_node_index, bookmark);
|
2022
1860
|
--bookmark;
|
2023
1861
|
}
|
2024
1862
|
gumbo_vector_remove_at(
|
2025
|
-
|
1863
|
+
parser, formatting_node_index, &state->_active_formatting_elements);
|
2026
1864
|
assert(bookmark >= 0);
|
2027
1865
|
assert(bookmark <= state->_active_formatting_elements.length);
|
2028
1866
|
gumbo_vector_insert_at(parser, new_formatting_node, bookmark,
|
2029
1867
|
&state->_active_formatting_elements);
|
2030
1868
|
|
2031
|
-
// Step
|
1869
|
+
// Step 15.
|
2032
1870
|
gumbo_vector_remove(
|
2033
|
-
|
1871
|
+
parser, formatting_node, &state->_open_elements);
|
2034
1872
|
int insert_at = gumbo_vector_index_of(
|
2035
|
-
|
1873
|
+
&state->_open_elements, furthest_block) + 1;
|
2036
1874
|
assert(insert_at >= 0);
|
2037
1875
|
assert(insert_at <= state->_open_elements.length);
|
2038
1876
|
gumbo_vector_insert_at(
|
2039
|
-
|
2040
|
-
}
|
1877
|
+
parser, new_formatting_node, insert_at, &state->_open_elements);
|
1878
|
+
}
|
2041
1879
|
return true;
|
2042
1880
|
}
|
2043
1881
|
|
@@ -2216,45 +2054,29 @@ static bool handle_in_head(GumboParser* parser, GumboToken* token) {
|
|
2216
2054
|
assert(node_html_tag_is(head, GUMBO_TAG_HEAD));
|
2217
2055
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_AFTER_HEAD);
|
2218
2056
|
return true;
|
2219
|
-
} else if (
|
2220
|
-
|
2221
|
-
|
2222
|
-
|
2223
|
-
|
2224
|
-
|
2225
|
-
|
2226
|
-
|
2227
|
-
parser
|
2228
|
-
|
2229
|
-
|
2230
|
-
return true;
|
2231
|
-
} else if (tag_is(token, kEndTag, GUMBO_TAG_TEMPLATE)) {
|
2232
|
-
if (!has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
|
2233
|
-
parser_add_parse_error(parser, token);
|
2234
|
-
ignore_token(parser);
|
2235
|
-
return false;
|
2236
|
-
}
|
2237
|
-
generate_all_implied_end_tags_thoroughly(parser);
|
2238
|
-
bool success = true;
|
2239
|
-
if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_TEMPLATE)) {
|
2240
|
-
parser_add_parse_error(parser, token);
|
2241
|
-
success = false;
|
2242
|
-
}
|
2243
|
-
while (!node_html_tag_is(pop_current_node(parser), GUMBO_TAG_TEMPLATE));
|
2244
|
-
clear_active_formatting_elements(parser);
|
2245
|
-
pop_template_insertion_mode(parser);
|
2246
|
-
reset_insertion_mode_appropriately(parser);
|
2247
|
-
return success;
|
2248
|
-
} else if (tag_is(token, kStartTag, GUMBO_TAG_HEAD) || (token->type == GUMBO_TOKEN_END_TAG)) {
|
2057
|
+
} else if (tag_is(token, kStartTag, GUMBO_TAG_HEAD)) {
|
2058
|
+
parser_add_parse_error(parser, token);
|
2059
|
+
ignore_token(parser);
|
2060
|
+
return false;
|
2061
|
+
} else if (tag_is(token, kStartTag, GUMBO_TAG_HEAD) ||
|
2062
|
+
(token->type == GUMBO_TOKEN_END_TAG &&
|
2063
|
+
!tag_in(token, kEndTag, (gumbo_tagset) { TAG(BODY), TAG(HTML),
|
2064
|
+
TAG(BR) }))) {
|
2065
|
+
parser_add_parse_error(parser, token);
|
2066
|
+
return false;
|
2067
|
+
} else if (tag_is(token, kStartTag, GUMBO_TAG_UNKNOWN) && token->v.start_tag.is_self_closing) {
|
2249
2068
|
parser_add_parse_error(parser, token);
|
2250
2069
|
ignore_token(parser);
|
2251
2070
|
return false;
|
2252
2071
|
} else {
|
2253
|
-
pop_current_node(parser);
|
2072
|
+
const GumboNode* node = pop_current_node(parser);
|
2073
|
+
assert(node_html_tag_is(node, GUMBO_TAG_HEAD));
|
2074
|
+
AVOID_UNUSED_VARIABLE_WARNING(node);
|
2254
2075
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_AFTER_HEAD);
|
2255
2076
|
parser->_parser_state->_reprocess_current_token = true;
|
2256
2077
|
return true;
|
2257
2078
|
}
|
2079
|
+
|
2258
2080
|
return true;
|
2259
2081
|
}
|
2260
2082
|
|
@@ -2320,7 +2142,7 @@ static bool handle_after_head(GumboParser* parser, GumboToken* token) {
|
|
2320
2142
|
} else if (tag_in(token, kStartTag, (gumbo_tagset) { TAG(BASE), TAG(BASEFONT),
|
2321
2143
|
TAG(BGSOUND), TAG(LINK), TAG(META),
|
2322
2144
|
TAG(NOFRAMES), TAG(SCRIPT), TAG(STYLE),
|
2323
|
-
TAG(
|
2145
|
+
TAG(TITLE) })) {
|
2324
2146
|
parser_add_parse_error(parser, token);
|
2325
2147
|
assert(state->_head_element != NULL);
|
2326
2148
|
// This must be flushed before we push the head element on, as there may be
|
@@ -2330,8 +2152,6 @@ static bool handle_after_head(GumboParser* parser, GumboToken* token) {
|
|
2330
2152
|
bool result = handle_in_head(parser, token);
|
2331
2153
|
gumbo_vector_remove(parser, state->_head_element, &state->_open_elements);
|
2332
2154
|
return result;
|
2333
|
-
} else if (tag_is(token, kEndTag, GUMBO_TAG_TEMPLATE)) {
|
2334
|
-
return handle_in_head(parser, token);
|
2335
2155
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_HEAD) ||
|
2336
2156
|
(token->type == GUMBO_TOKEN_END_TAG &&
|
2337
2157
|
!tag_in(token, kEndTag, (gumbo_tagset) { TAG(BODY), TAG(HTML), TAG(BR) }))) {
|
@@ -2346,23 +2166,28 @@ static bool handle_after_head(GumboParser* parser, GumboToken* token) {
|
|
2346
2166
|
}
|
2347
2167
|
}
|
2348
2168
|
|
2349
|
-
static
|
2169
|
+
static void destroy_node(GumboParser* parser, GumboNode* node) {
|
2350
2170
|
switch (node->type) {
|
2351
2171
|
case GUMBO_NODE_DOCUMENT:
|
2352
2172
|
{
|
2353
2173
|
GumboDocument* doc = &node->v.document;
|
2174
|
+
for (int i = 0; i < doc->children.length; ++i) {
|
2175
|
+
destroy_node(parser, doc->children.data[i]);
|
2176
|
+
}
|
2354
2177
|
gumbo_parser_deallocate(parser, (void*) doc->children.data);
|
2355
2178
|
gumbo_parser_deallocate(parser, (void*) doc->name);
|
2356
2179
|
gumbo_parser_deallocate(parser, (void*) doc->public_identifier);
|
2357
2180
|
gumbo_parser_deallocate(parser, (void*) doc->system_identifier);
|
2358
2181
|
}
|
2359
2182
|
break;
|
2360
|
-
case GUMBO_NODE_TEMPLATE:
|
2361
2183
|
case GUMBO_NODE_ELEMENT:
|
2362
2184
|
for (int i = 0; i < node->v.element.attributes.length; ++i) {
|
2363
2185
|
gumbo_destroy_attribute(parser, node->v.element.attributes.data[i]);
|
2364
2186
|
}
|
2365
2187
|
gumbo_parser_deallocate(parser, node->v.element.attributes.data);
|
2188
|
+
for (int i = 0; i < node->v.element.children.length; ++i) {
|
2189
|
+
destroy_node(parser, node->v.element.children.data[i]);
|
2190
|
+
}
|
2366
2191
|
gumbo_parser_deallocate(parser, node->v.element.children.data);
|
2367
2192
|
break;
|
2368
2193
|
case GUMBO_NODE_TEXT:
|
@@ -2372,21 +2197,7 @@ static GumboNode* destroy_node(GumboParser* parser, GumboNode* node) {
|
|
2372
2197
|
gumbo_parser_deallocate(parser, (void*) node->v.text.text);
|
2373
2198
|
break;
|
2374
2199
|
}
|
2375
|
-
// Remove from the next/prev linked list.
|
2376
|
-
GumboNode* prev = node->prev;
|
2377
|
-
GumboNode* next = node->next;
|
2378
|
-
if (prev != NULL) {
|
2379
|
-
prev->next = next;
|
2380
|
-
}
|
2381
|
-
if (next != NULL) {
|
2382
|
-
next->prev = prev;
|
2383
|
-
}
|
2384
|
-
if (parser->_parser_state && parser->_parser_state->_current_node == node) {
|
2385
|
-
parser->_parser_state->_current_node = prev;
|
2386
|
-
}
|
2387
|
-
|
2388
2200
|
gumbo_parser_deallocate(parser, node);
|
2389
|
-
return next;
|
2390
2201
|
}
|
2391
2202
|
|
2392
2203
|
// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#parsing-main-inbody
|
@@ -2415,24 +2226,20 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2415
2226
|
ignore_token(parser);
|
2416
2227
|
return false;
|
2417
2228
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_HTML)) {
|
2418
|
-
parser_add_parse_error(parser, token);
|
2419
|
-
if (has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
|
2420
|
-
ignore_token(parser);
|
2421
|
-
return false;
|
2422
|
-
}
|
2423
2229
|
assert(parser->_output->root != NULL);
|
2424
2230
|
assert(parser->_output->root->type == GUMBO_NODE_ELEMENT);
|
2231
|
+
parser_add_parse_error(parser, token);
|
2425
2232
|
merge_attributes(parser, token, parser->_output->root);
|
2426
2233
|
return false;
|
2427
2234
|
} else if (tag_in(token, kStartTag, (gumbo_tagset) { TAG(BASE), TAG(BASEFONT),
|
2428
2235
|
TAG(BGSOUND), TAG(MENUITEM), TAG(LINK),
|
2429
2236
|
TAG(META), TAG(NOFRAMES), TAG(SCRIPT),
|
2430
|
-
TAG(STYLE), TAG(
|
2237
|
+
TAG(STYLE), TAG(TITLE) } )) {
|
2431
2238
|
return handle_in_head(parser, token);
|
2432
2239
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_BODY)) {
|
2433
2240
|
parser_add_parse_error(parser, token);
|
2434
2241
|
if (state->_open_elements.length < 2 ||
|
2435
|
-
!node_html_tag_is(state->_open_elements.data[1], GUMBO_TAG_BODY)
|
2242
|
+
!node_html_tag_is(state->_open_elements.data[1], GUMBO_TAG_BODY)) {
|
2436
2243
|
ignore_token(parser);
|
2437
2244
|
return false;
|
2438
2245
|
}
|
@@ -2484,11 +2291,9 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2484
2291
|
TAG(DT), TAG(LI), TAG(P), TAG(TBODY), TAG(TD), TAG(TFOOT), TAG(TH),
|
2485
2292
|
TAG(THEAD), TAG(TR), TAG(BODY), TAG(HTML) } )) {
|
2486
2293
|
parser_add_parse_error(parser, token);
|
2294
|
+
return false;
|
2487
2295
|
}
|
2488
2296
|
}
|
2489
|
-
if (get_current_template_insertion_mode(parser) != GUMBO_INSERTION_MODE_INITIAL) {
|
2490
|
-
return handle_in_template(parser, token);
|
2491
|
-
}
|
2492
2297
|
return true;
|
2493
2298
|
} else if (tag_in(token, kEndTag, (gumbo_tagset) { TAG(BODY), TAG(HTML) })) {
|
2494
2299
|
if (!has_an_element_in_scope(parser, GUMBO_TAG_BODY)) {
|
@@ -2498,11 +2303,10 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2498
2303
|
}
|
2499
2304
|
bool success = true;
|
2500
2305
|
for (int i = 0; i < state->_open_elements.length; ++i) {
|
2501
|
-
if (!node_tag_in_set(state->_open_elements.data[i], (gumbo_tagset) {
|
2502
|
-
|
2503
|
-
|
2504
|
-
|
2505
|
-
TAG(BODY), TAG(HTML) })) {
|
2306
|
+
if (!node_tag_in_set(state->_open_elements.data[i], (gumbo_tagset) { TAG(DD),
|
2307
|
+
TAG(DT), TAG(LI), TAG(OPTGROUP), TAG(OPTION), TAG(P), TAG(RP),
|
2308
|
+
TAG(RT), TAG(TBODY), TAG(TD), TAG(TFOOT), TAG(TH), TAG(THEAD),
|
2309
|
+
TAG(TR), TAG(BODY), TAG(HTML) })) {
|
2506
2310
|
parser_add_parse_error(parser, token);
|
2507
2311
|
success = false;
|
2508
2312
|
break;
|
@@ -2520,7 +2324,7 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2520
2324
|
} else if (tag_in(token, kStartTag, (gumbo_tagset) { TAG(ADDRESS), TAG(ARTICLE),
|
2521
2325
|
TAG(ASIDE), TAG(BLOCKQUOTE), TAG(CENTER), TAG(DETAILS),
|
2522
2326
|
TAG(DIR), TAG(DIV), TAG(DL), TAG(FIELDSET), TAG(FIGCAPTION),
|
2523
|
-
TAG(FIGURE), TAG(FOOTER), TAG(HEADER), TAG(HGROUP), TAG(MENU),
|
2327
|
+
TAG(FIGURE), TAG(FOOTER), TAG(HEADER), TAG(HGROUP), TAG(MENU),
|
2524
2328
|
TAG(NAV), TAG(OL), TAG(P), TAG(SECTION), TAG(SUMMARY), TAG(UL) })) {
|
2525
2329
|
bool result = maybe_implicitly_close_p_tag(parser, token);
|
2526
2330
|
insert_element_from_token(parser, token);
|
@@ -2543,17 +2347,15 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2543
2347
|
state->_frameset_ok = false;
|
2544
2348
|
return result;
|
2545
2349
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_FORM)) {
|
2546
|
-
if (state->_form_element != NULL
|
2350
|
+
if (state->_form_element != NULL) {
|
2547
2351
|
gumbo_debug("Ignoring nested form.\n");
|
2548
2352
|
parser_add_parse_error(parser, token);
|
2549
2353
|
ignore_token(parser);
|
2550
2354
|
return false;
|
2551
2355
|
}
|
2552
2356
|
bool result = maybe_implicitly_close_p_tag(parser, token);
|
2553
|
-
|
2554
|
-
|
2555
|
-
state->_form_element = form_element;
|
2556
|
-
}
|
2357
|
+
state->_form_element =
|
2358
|
+
insert_element_from_token(parser, token);
|
2557
2359
|
return result;
|
2558
2360
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_LI)) {
|
2559
2361
|
maybe_implicitly_close_list_tag(parser, token, true);
|
@@ -2585,7 +2387,7 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2585
2387
|
TAG(ASIDE), TAG(BLOCKQUOTE), TAG(BUTTON), TAG(CENTER), TAG(DETAILS),
|
2586
2388
|
TAG(DIR), TAG(DIV), TAG(DL), TAG(FIELDSET), TAG(FIGCAPTION),
|
2587
2389
|
TAG(FIGURE), TAG(FOOTER), TAG(HEADER), TAG(HGROUP), TAG(LISTING),
|
2588
|
-
TAG(
|
2390
|
+
TAG(MENU), TAG(NAV), TAG(OL), TAG(PRE),
|
2589
2391
|
TAG(SECTION), TAG(SUMMARY), TAG(UL) })) {
|
2590
2392
|
GumboTag tag = token->v.end_tag;
|
2591
2393
|
if (!has_an_element_in_scope(parser, tag)) {
|
@@ -2596,45 +2398,30 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2596
2398
|
implicitly_close_tags(parser, token, GUMBO_NAMESPACE_HTML, token->v.end_tag);
|
2597
2399
|
return true;
|
2598
2400
|
} else if (tag_is(token, kEndTag, GUMBO_TAG_FORM)) {
|
2599
|
-
|
2600
|
-
|
2601
|
-
|
2602
|
-
|
2603
|
-
|
2604
|
-
|
2605
|
-
|
2606
|
-
|
2607
|
-
|
2608
|
-
parser_add_parse_error(parser, token);
|
2609
|
-
return false;
|
2610
|
-
}
|
2611
|
-
while(!node_html_tag_is(pop_current_node(parser), GUMBO_TAG_FORM));
|
2612
|
-
return success;
|
2613
|
-
} else {
|
2614
|
-
bool result = true;
|
2615
|
-
const GumboNode* node = state->_form_element;
|
2616
|
-
assert(!node || node->type == GUMBO_NODE_ELEMENT);
|
2617
|
-
state->_form_element = NULL;
|
2618
|
-
if (!node || !has_node_in_scope(parser, node)) {
|
2619
|
-
gumbo_debug("Closing an unopened form.\n");
|
2620
|
-
parser_add_parse_error(parser, token);
|
2621
|
-
ignore_token(parser);
|
2622
|
-
return false;
|
2623
|
-
}
|
2624
|
-
// This differs from implicitly_close_tags because we remove *only* the
|
2625
|
-
// <form> element; other nodes are left in scope.
|
2626
|
-
generate_implied_end_tags(parser, GUMBO_TAG_LAST);
|
2627
|
-
if (get_current_node(parser) != node) {
|
2628
|
-
parser_add_parse_error(parser, token);
|
2629
|
-
result = false;
|
2630
|
-
}
|
2631
|
-
|
2632
|
-
GumboVector* open_elements = &state->_open_elements;
|
2633
|
-
int index = gumbo_vector_index_of(open_elements, node);
|
2634
|
-
assert(index >= 0);
|
2635
|
-
gumbo_vector_remove_at(parser, index, open_elements);
|
2636
|
-
return result;
|
2401
|
+
bool result = true;
|
2402
|
+
const GumboNode* node = state->_form_element;
|
2403
|
+
assert(!node || node->type == GUMBO_NODE_ELEMENT);
|
2404
|
+
state->_form_element = NULL;
|
2405
|
+
if (!node || !has_node_in_scope(parser, node)) {
|
2406
|
+
gumbo_debug("Closing an unopened form.\n");
|
2407
|
+
parser_add_parse_error(parser, token);
|
2408
|
+
ignore_token(parser);
|
2409
|
+
return false;
|
2637
2410
|
}
|
2411
|
+
// This differs from implicitly_close_tags because we remove *only* the
|
2412
|
+
// <form> element; other nodes are left in scope.
|
2413
|
+
generate_implied_end_tags(parser, GUMBO_TAG_LAST);
|
2414
|
+
if (get_current_node(parser) != node) {
|
2415
|
+
parser_add_parse_error(parser, token);
|
2416
|
+
result = false;
|
2417
|
+
}
|
2418
|
+
|
2419
|
+
GumboVector* open_elements = &state->_open_elements;
|
2420
|
+
int index = open_elements->length - 1;
|
2421
|
+
for (; index >= 0 && open_elements->data[index] != node; --index);
|
2422
|
+
assert(index >= 0);
|
2423
|
+
gumbo_vector_remove_at(parser, index, open_elements);
|
2424
|
+
return result;
|
2638
2425
|
} else if (tag_is(token, kEndTag, GUMBO_TAG_P)) {
|
2639
2426
|
if (!has_an_element_in_button_scope(parser, GUMBO_TAG_P)) {
|
2640
2427
|
parser_add_parse_error(parser, token);
|
@@ -2661,11 +2448,10 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2661
2448
|
return false;
|
2662
2449
|
}
|
2663
2450
|
return implicitly_close_tags(parser, token, GUMBO_NAMESPACE_HTML, token_tag);
|
2664
|
-
} else if (tag_in(token, kEndTag, (gumbo_tagset) {
|
2665
|
-
|
2666
|
-
if (!has_an_element_in_scope_with_tagname(parser,
|
2667
|
-
|
2668
|
-
GUMBO_TAG_H4, GUMBO_TAG_H5, GUMBO_TAG_H6})) {
|
2451
|
+
} else if (tag_in(token, kEndTag, (gumbo_tagset) { TAG(H1), TAG(H2), TAG(H3),
|
2452
|
+
TAG(H4), TAG(H5), TAG(H6) })) {
|
2453
|
+
if (!has_an_element_in_scope_with_tagname(parser, (gumbo_tagset) { TAG(H1), TAG(H2), TAG(H3), TAG(H4),
|
2454
|
+
TAG(H5), TAG(H6) })) {
|
2669
2455
|
// No heading open; ignore the token entirely.
|
2670
2456
|
parser_add_parse_error(parser, token);
|
2671
2457
|
ignore_token(parser);
|
@@ -2806,8 +2592,7 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2806
2592
|
return result;
|
2807
2593
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_ISINDEX)) {
|
2808
2594
|
parser_add_parse_error(parser, token);
|
2809
|
-
if (parser->_parser_state->_form_element != NULL
|
2810
|
-
!has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
|
2595
|
+
if (parser->_parser_state->_form_element != NULL) {
|
2811
2596
|
ignore_token(parser);
|
2812
2597
|
return false;
|
2813
2598
|
}
|
@@ -2822,9 +2607,6 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2822
2607
|
|
2823
2608
|
GumboNode* form = insert_element_of_tag_type(
|
2824
2609
|
parser, GUMBO_TAG_FORM, GUMBO_INSERTION_FROM_ISINDEX);
|
2825
|
-
if (!has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
|
2826
|
-
parser->_parser_state->_form_element = form;
|
2827
|
-
}
|
2828
2610
|
if (action_attr) {
|
2829
2611
|
gumbo_vector_add(parser, action_attr, &form->v.element.attributes);
|
2830
2612
|
}
|
@@ -2888,9 +2670,6 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2888
2670
|
parser, GUMBO_TAG_HR, GUMBO_INSERTION_FROM_ISINDEX);
|
2889
2671
|
pop_current_node(parser); // <hr>
|
2890
2672
|
pop_current_node(parser); // <form>
|
2891
|
-
if (!has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
|
2892
|
-
parser->_parser_state->_form_element = NULL;
|
2893
|
-
}
|
2894
2673
|
return false;
|
2895
2674
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_TEXTAREA)) {
|
2896
2675
|
run_generic_parsing_algorithm(parser, token, GUMBO_LEX_RCDATA);
|
@@ -2932,17 +2711,12 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2932
2711
|
reconstruct_active_formatting_elements(parser);
|
2933
2712
|
insert_element_from_token(parser, token);
|
2934
2713
|
return true;
|
2935
|
-
|
2936
|
-
TAG(RB), TAG(RP), TAG(RT), TAG(RTC) })) {
|
2714
|
+
} else if (tag_in(token, kStartTag, (gumbo_tagset) { TAG(RP), TAG(RT) })) {
|
2937
2715
|
bool success = true;
|
2938
|
-
GumboTag exception = tag_in(token, kStartTag, (gumbo_tagset) {
|
2939
|
-
TAG(RT), TAG(RP) }) ? GUMBO_TAG_RTC : GUMBO_TAG_LAST;
|
2940
2716
|
if (has_an_element_in_scope(parser, GUMBO_TAG_RUBY)) {
|
2941
|
-
generate_implied_end_tags(parser,
|
2717
|
+
generate_implied_end_tags(parser, GUMBO_TAG_LAST);
|
2942
2718
|
}
|
2943
|
-
if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_RUBY)
|
2944
|
-
!(exception == GUMBO_TAG_LAST ||
|
2945
|
-
node_html_tag_is(get_current_node(parser), GUMBO_TAG_RTC))) {
|
2719
|
+
if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_RUBY)) {
|
2946
2720
|
parser_add_parse_error(parser, token);
|
2947
2721
|
success = false;
|
2948
2722
|
}
|
@@ -3113,8 +2887,7 @@ static bool handle_in_table(GumboParser* parser, GumboToken* token) {
|
|
3113
2887
|
parser_add_parse_error(parser, token);
|
3114
2888
|
ignore_token(parser);
|
3115
2889
|
return false;
|
3116
|
-
} else if (tag_in(token, kStartTag, (gumbo_tagset) { TAG(STYLE), TAG(SCRIPT)
|
3117
|
-
(tag_is(token, kEndTag, GUMBO_TAG_TEMPLATE))) {
|
2890
|
+
} else if (tag_in(token, kStartTag, (gumbo_tagset) { TAG(STYLE), TAG(SCRIPT) })) {
|
3118
2891
|
return handle_in_head(parser, token);
|
3119
2892
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_INPUT) &&
|
3120
2893
|
attribute_matches(&token->v.start_tag.attributes,
|
@@ -3125,7 +2898,7 @@ static bool handle_in_table(GumboParser* parser, GumboToken* token) {
|
|
3125
2898
|
return false;
|
3126
2899
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_FORM)) {
|
3127
2900
|
parser_add_parse_error(parser, token);
|
3128
|
-
if (state->_form_element
|
2901
|
+
if (state->_form_element) {
|
3129
2902
|
ignore_token(parser);
|
3130
2903
|
return false;
|
3131
2904
|
}
|
@@ -3133,7 +2906,11 @@ static bool handle_in_table(GumboParser* parser, GumboToken* token) {
|
|
3133
2906
|
pop_current_node(parser);
|
3134
2907
|
return false;
|
3135
2908
|
} else if (token->type == GUMBO_TOKEN_EOF) {
|
3136
|
-
|
2909
|
+
if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_HTML)) {
|
2910
|
+
parser_add_parse_error(parser, token);
|
2911
|
+
return false;
|
2912
|
+
}
|
2913
|
+
return true;
|
3137
2914
|
} else {
|
3138
2915
|
parser_add_parse_error(parser, token);
|
3139
2916
|
state->_foster_parent_insertions = true;
|
@@ -3178,37 +2955,35 @@ static bool handle_in_table_text(GumboParser* parser, GumboToken* token) {
|
|
3178
2955
|
|
3179
2956
|
// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#parsing-main-incaption
|
3180
2957
|
static bool handle_in_caption(GumboParser* parser, GumboToken* token) {
|
3181
|
-
if (
|
2958
|
+
if (tag_in(token, kStartTag, (gumbo_tagset) { TAG(CAPTION), TAG(COL),
|
2959
|
+
TAG(COLGROUP), TAG(TBODY), TAG(TD),
|
2960
|
+
TAG(TFOOT), TAG(TH), TAG(THEAD), TAG(TR) }) ||
|
2961
|
+
tag_in(token, kEndTag, (gumbo_tagset) { TAG(CAPTION), TAG(TABLE) })) {
|
3182
2962
|
if (!has_an_element_in_table_scope(parser, GUMBO_TAG_CAPTION)) {
|
3183
2963
|
parser_add_parse_error(parser, token);
|
3184
2964
|
ignore_token(parser);
|
3185
2965
|
return false;
|
3186
|
-
} else {
|
3187
|
-
generate_implied_end_tags(parser, GUMBO_TAG_LAST);
|
3188
|
-
bool result = true;
|
3189
|
-
if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_CAPTION)) {
|
3190
|
-
parser_add_parse_error(parser, token);
|
3191
|
-
}
|
3192
|
-
while (!node_html_tag_is(pop_current_node(parser), GUMBO_TAG_CAPTION));
|
3193
|
-
clear_active_formatting_elements(parser);
|
3194
|
-
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
|
3195
|
-
return result;
|
3196
2966
|
}
|
3197
|
-
|
3198
|
-
TAG(COLGROUP), TAG(TBODY), TAG(TD), TAG(TFOOT), TAG(TH), TAG(THEAD), TAG(TR) }) ||
|
3199
|
-
(tag_is(token, kEndTag, GUMBO_TAG_TABLE))) {
|
3200
|
-
if (!has_an_element_in_table_scope(parser, GUMBO_TAG_CAPTION)) {
|
2967
|
+
if (!tag_is(token, kEndTag, GUMBO_TAG_CAPTION)) {
|
3201
2968
|
parser_add_parse_error(parser, token);
|
3202
|
-
|
3203
|
-
|
2969
|
+
parser->_parser_state->_reprocess_current_token = true;
|
2970
|
+
}
|
2971
|
+
generate_implied_end_tags(parser, GUMBO_TAG_LAST);
|
2972
|
+
bool result = true;
|
2973
|
+
if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_CAPTION)) {
|
2974
|
+
parser_add_parse_error(parser, token);
|
2975
|
+
while (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_CAPTION)) {
|
2976
|
+
pop_current_node(parser);
|
2977
|
+
}
|
2978
|
+
result = false;
|
3204
2979
|
}
|
3205
|
-
|
2980
|
+
pop_current_node(parser); // The <caption> itself.
|
3206
2981
|
clear_active_formatting_elements(parser);
|
3207
2982
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
|
3208
|
-
|
3209
|
-
|
3210
|
-
|
3211
|
-
TAG(
|
2983
|
+
return result;
|
2984
|
+
} else if (tag_in(token, kEndTag, (gumbo_tagset) { TAG(BODY), TAG(COL),
|
2985
|
+
TAG(COLGROUP), TAG(HTML), TAG(TBODY), TAG(TD), TAG(TFOOT),
|
2986
|
+
TAG(TH), TAG(THEAD), TAG(TR) })) {
|
3212
2987
|
parser_add_parse_error(parser, token);
|
3213
2988
|
ignore_token(parser);
|
3214
2989
|
return false;
|
@@ -3236,33 +3011,24 @@ static bool handle_in_column_group(GumboParser* parser, GumboToken* token) {
|
|
3236
3011
|
pop_current_node(parser);
|
3237
3012
|
acknowledge_self_closing_tag(parser);
|
3238
3013
|
return true;
|
3239
|
-
} else if (tag_is(token, kEndTag, GUMBO_TAG_COLGROUP)) {
|
3240
|
-
if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_COLGROUP)) {
|
3241
|
-
parser_add_parse_error(parser, token);
|
3242
|
-
ignore_token(parser);
|
3243
|
-
return false;
|
3244
|
-
}
|
3245
|
-
pop_current_node(parser);
|
3246
|
-
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
|
3247
|
-
return false;
|
3248
3014
|
} else if (tag_is(token, kEndTag, GUMBO_TAG_COL)) {
|
3249
3015
|
parser_add_parse_error(parser, token);
|
3250
3016
|
ignore_token(parser);
|
3251
3017
|
return false;
|
3252
|
-
} else if (
|
3253
|
-
|
3254
|
-
return
|
3255
|
-
} else if (token->type == GUMBO_TOKEN_EOF) {
|
3256
|
-
return handle_in_body(parser, token);
|
3018
|
+
} else if (token->type == GUMBO_TOKEN_EOF &&
|
3019
|
+
get_current_node(parser) == parser->_output->root) {
|
3020
|
+
return true;
|
3257
3021
|
} else {
|
3258
|
-
if (
|
3022
|
+
if (get_current_node(parser) == parser->_output->root) {
|
3259
3023
|
parser_add_parse_error(parser, token);
|
3260
|
-
ignore_token(parser);
|
3261
3024
|
return false;
|
3262
3025
|
}
|
3026
|
+
assert(node_html_tag_is(get_current_node(parser), GUMBO_TAG_COLGROUP));
|
3263
3027
|
pop_current_node(parser);
|
3264
3028
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
|
3265
|
-
|
3029
|
+
if (!tag_is(token, kEndTag, GUMBO_TAG_COLGROUP)) {
|
3030
|
+
parser->_parser_state->_reprocess_current_token = true;
|
3031
|
+
}
|
3266
3032
|
return true;
|
3267
3033
|
}
|
3268
3034
|
}
|
@@ -3325,48 +3091,42 @@ static bool handle_in_row(GumboParser* parser, GumboToken* token) {
|
|
3325
3091
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_CELL);
|
3326
3092
|
add_formatting_element(parser, &kActiveFormattingScopeMarker);
|
3327
3093
|
return true;
|
3328
|
-
} else if (
|
3329
|
-
|
3330
|
-
|
3331
|
-
|
3332
|
-
|
3333
|
-
|
3334
|
-
|
3335
|
-
|
3336
|
-
|
3337
|
-
|
3338
|
-
|
3339
|
-
|
3340
|
-
|
3341
|
-
|
3094
|
+
} else if (tag_in(token, kStartTag, (gumbo_tagset) { TAG(CAPTION), TAG(COLGROUP),
|
3095
|
+
TAG(TBODY), TAG(TFOOT), TAG(THEAD), TAG(TR) }) ||
|
3096
|
+
tag_in(token, kEndTag, (gumbo_tagset) { TAG(TR), TAG(TABLE),
|
3097
|
+
TAG(TBODY), TAG(TFOOT), TAG(THEAD) })) {
|
3098
|
+
// This case covers 4 clauses of the spec, each of which say "Otherwise, act
|
3099
|
+
// as if an end tag with the tag name "tr" had been seen." The differences
|
3100
|
+
// are in error handling and whether the current token is reprocessed.
|
3101
|
+
GumboTag desired_tag =
|
3102
|
+
tag_in(token, kEndTag, (gumbo_tagset) { TAG(TBODY), TAG(TFOOT),
|
3103
|
+
TAG(THEAD) })
|
3104
|
+
? token->v.end_tag : GUMBO_TAG_TR;
|
3105
|
+
if (!has_an_element_in_table_scope(parser, desired_tag)) {
|
3106
|
+
gumbo_debug("Bailing because there is no tag %s in table scope.\nOpen elements:",
|
3107
|
+
gumbo_normalized_tagname(desired_tag));
|
3108
|
+
for (int i = 0; i < parser->_parser_state->_open_elements.length; ++i) {
|
3109
|
+
const GumboNode* node = parser->_parser_state->_open_elements.data[i];
|
3110
|
+
gumbo_debug("%s\n", gumbo_normalized_tagname(node->v.element.tag));
|
3111
|
+
}
|
3342
3112
|
parser_add_parse_error(parser, token);
|
3343
3113
|
ignore_token(parser);
|
3344
3114
|
return false;
|
3345
|
-
} else {
|
3346
|
-
clear_stack_to_table_row_context(parser);
|
3347
|
-
pop_current_node(parser);
|
3348
|
-
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE_BODY);
|
3349
|
-
parser->_parser_state->_reprocess_current_token = true;
|
3350
|
-
return true;
|
3351
3115
|
}
|
3352
|
-
|
3353
|
-
|
3354
|
-
|
3355
|
-
|
3356
|
-
|
3357
|
-
|
3358
|
-
} else {
|
3359
|
-
clear_stack_to_table_row_context(parser);
|
3360
|
-
pop_current_node(parser);
|
3361
|
-
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE_BODY);
|
3116
|
+
clear_stack_to_table_row_context(parser);
|
3117
|
+
GumboNode* last_element = pop_current_node(parser);
|
3118
|
+
assert(node_html_tag_is(last_element, GUMBO_TAG_TR));
|
3119
|
+
AVOID_UNUSED_VARIABLE_WARNING(last_element);
|
3120
|
+
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE_BODY);
|
3121
|
+
if (!tag_is(token, kEndTag, GUMBO_TAG_TR)) {
|
3362
3122
|
parser->_parser_state->_reprocess_current_token = true;
|
3363
|
-
return true;
|
3364
3123
|
}
|
3365
|
-
|
3366
|
-
|
3367
|
-
|
3368
|
-
|
3369
|
-
|
3124
|
+
return true;
|
3125
|
+
} else if (tag_in(token, kEndTag, (gumbo_tagset) { TAG(BODY), TAG(CAPTION),
|
3126
|
+
TAG(COL), TAG(COLGROUP), TAG(HTML), TAG(TD), TAG(TH) })) {
|
3127
|
+
parser_add_parse_error(parser, token);
|
3128
|
+
ignore_token(parser);
|
3129
|
+
return false;
|
3370
3130
|
} else {
|
3371
3131
|
return handle_in_table(parser, token);
|
3372
3132
|
}
|
@@ -3378,7 +3138,6 @@ static bool handle_in_cell(GumboParser* parser, GumboToken* token) {
|
|
3378
3138
|
GumboTag token_tag = token->v.end_tag;
|
3379
3139
|
if (!has_an_element_in_table_scope(parser, token_tag)) {
|
3380
3140
|
parser_add_parse_error(parser, token);
|
3381
|
-
ignore_token(parser);
|
3382
3141
|
return false;
|
3383
3142
|
}
|
3384
3143
|
return close_table_cell(parser, token, token_tag);
|
@@ -3494,11 +3253,14 @@ static bool handle_in_select(GumboParser* parser, GumboToken* token) {
|
|
3494
3253
|
parser->_parser_state->_reprocess_current_token = true;
|
3495
3254
|
}
|
3496
3255
|
return false;
|
3497
|
-
} else if (
|
3498
|
-
tag_is(token, kEndTag, GUMBO_TAG_TEMPLATE)) {
|
3256
|
+
} else if (tag_is(token, kStartTag, GUMBO_TAG_SCRIPT)) {
|
3499
3257
|
return handle_in_head(parser, token);
|
3500
3258
|
} else if (token->type == GUMBO_TOKEN_EOF) {
|
3501
|
-
|
3259
|
+
if (get_current_node(parser) != parser->_output->root) {
|
3260
|
+
parser_add_parse_error(parser, token);
|
3261
|
+
return false;
|
3262
|
+
}
|
3263
|
+
return true;
|
3502
3264
|
} else {
|
3503
3265
|
parser_add_parse_error(parser, token);
|
3504
3266
|
ignore_token(parser);
|
@@ -3517,16 +3279,14 @@ static bool handle_in_select_in_table(GumboParser* parser, GumboToken* token) {
|
|
3517
3279
|
} else if (tag_in(token, kEndTag, (gumbo_tagset) { TAG(CAPTION), TAG(TABLE),
|
3518
3280
|
TAG(TBODY), TAG(TFOOT), TAG(THEAD), TAG(TR), TAG(TD), TAG(TH) })) {
|
3519
3281
|
parser_add_parse_error(parser, token);
|
3520
|
-
if (
|
3521
|
-
ignore_token(parser);
|
3522
|
-
return false;
|
3523
|
-
} else {
|
3282
|
+
if (has_an_element_in_table_scope(parser, token->v.end_tag)) {
|
3524
3283
|
close_current_select(parser);
|
3525
|
-
|
3526
|
-
// reset_insertion_mode_appropriately(parser);
|
3284
|
+
reset_insertion_mode_appropriately(parser);
|
3527
3285
|
parser->_parser_state->_reprocess_current_token = true;
|
3528
|
-
|
3286
|
+
} else {
|
3287
|
+
ignore_token(parser);
|
3529
3288
|
}
|
3289
|
+
return false;
|
3530
3290
|
} else {
|
3531
3291
|
return handle_in_select(parser, token);
|
3532
3292
|
}
|
@@ -3534,68 +3294,8 @@ static bool handle_in_select_in_table(GumboParser* parser, GumboToken* token) {
|
|
3534
3294
|
|
3535
3295
|
// http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#parsing-main-intemplate
|
3536
3296
|
static bool handle_in_template(GumboParser* parser, GumboToken* token) {
|
3537
|
-
|
3538
|
-
|
3539
|
-
token->type == GUMBO_TOKEN_CHARACTER ||
|
3540
|
-
token->type == GUMBO_TOKEN_COMMENT ||
|
3541
|
-
token->type == GUMBO_TOKEN_DOCTYPE) {
|
3542
|
-
return handle_in_body(parser, token);
|
3543
|
-
} else if (tag_in(token, kStartTag, (gumbo_tagset) { TAG(BASE), TAG(BASEFONT), TAG(BGSOUND),
|
3544
|
-
TAG(LINK), TAG(META), TAG(NOFRAMES), TAG(SCRIPT), TAG(STYLE),
|
3545
|
-
TAG(TEMPLATE), TAG(TITLE) }) ||
|
3546
|
-
tag_is(token, kEndTag, GUMBO_TAG_TEMPLATE)) {
|
3547
|
-
return handle_in_head(parser, token);
|
3548
|
-
} else if (tag_in(token, kStartTag, (gumbo_tagset) { TAG(CAPTION), TAG(COLGROUP),
|
3549
|
-
TAG(TBODY), TAG(TFOOT), TAG(THEAD) })) {
|
3550
|
-
pop_template_insertion_mode(parser);
|
3551
|
-
push_template_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
|
3552
|
-
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
|
3553
|
-
state->_reprocess_current_token = true;
|
3554
|
-
return true;
|
3555
|
-
} else if (tag_is(token, kStartTag, GUMBO_TAG_COL)) {
|
3556
|
-
pop_template_insertion_mode(parser);
|
3557
|
-
push_template_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_COLUMN_GROUP);
|
3558
|
-
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_COLUMN_GROUP);
|
3559
|
-
state->_reprocess_current_token = true;
|
3560
|
-
return true;
|
3561
|
-
} else if (tag_is(token, kStartTag, GUMBO_TAG_TR)) {
|
3562
|
-
pop_template_insertion_mode(parser);
|
3563
|
-
push_template_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE_BODY);
|
3564
|
-
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE_BODY);
|
3565
|
-
state->_reprocess_current_token = true;
|
3566
|
-
return true;
|
3567
|
-
} else if (tag_in(token, kStartTag, (gumbo_tagset) { TAG(TD), TAG(TH) })) {
|
3568
|
-
pop_template_insertion_mode(parser);
|
3569
|
-
push_template_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_ROW);
|
3570
|
-
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_ROW);
|
3571
|
-
state->_reprocess_current_token = true;
|
3572
|
-
return true;
|
3573
|
-
} else if (token->type == GUMBO_TOKEN_START_TAG) {
|
3574
|
-
pop_template_insertion_mode(parser);
|
3575
|
-
push_template_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_BODY);
|
3576
|
-
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_BODY);
|
3577
|
-
state->_reprocess_current_token = true;
|
3578
|
-
return true;
|
3579
|
-
} else if (token->type == GUMBO_TOKEN_END_TAG) {
|
3580
|
-
parser_add_parse_error(parser, token);
|
3581
|
-
ignore_token(parser);
|
3582
|
-
return false;
|
3583
|
-
} else if (token->type == GUMBO_TOKEN_EOF) {
|
3584
|
-
if (!has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
|
3585
|
-
// Stop parsing.
|
3586
|
-
return true;
|
3587
|
-
}
|
3588
|
-
parser_add_parse_error(parser, token);
|
3589
|
-
while(!node_html_tag_is(pop_current_node(parser), GUMBO_TAG_TEMPLATE));
|
3590
|
-
clear_active_formatting_elements(parser);
|
3591
|
-
pop_template_insertion_mode(parser);
|
3592
|
-
reset_insertion_mode_appropriately(parser);
|
3593
|
-
state->_reprocess_current_token = true;
|
3594
|
-
return false;
|
3595
|
-
} else {
|
3596
|
-
assert(0);
|
3597
|
-
return false;
|
3598
|
-
}
|
3297
|
+
// TODO(jdtang): Implement this.
|
3298
|
+
return true;
|
3599
3299
|
}
|
3600
3300
|
|
3601
3301
|
// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#parsing-main-afterbody
|
@@ -3613,12 +3313,7 @@ static bool handle_after_body(GumboParser* parser, GumboToken* token) {
|
|
3613
3313
|
ignore_token(parser);
|
3614
3314
|
return false;
|
3615
3315
|
} else if (tag_is(token, kEndTag, GUMBO_TAG_HTML)) {
|
3616
|
-
|
3617
|
-
if (is_fragment_parser(parser)) {
|
3618
|
-
parser_add_parse_error(parser, token);
|
3619
|
-
ignore_token(parser);
|
3620
|
-
return false;
|
3621
|
-
}
|
3316
|
+
// TODO(jdtang): Handle fragment parsing algorithm case.
|
3622
3317
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_AFTER_AFTER_BODY);
|
3623
3318
|
GumboNode* html = parser->_parser_state->_open_elements.data[0];
|
3624
3319
|
assert(node_html_tag_is(html, GUMBO_TAG_HTML));
|
@@ -3659,8 +3354,9 @@ static bool handle_in_frameset(GumboParser* parser, GumboToken* token) {
|
|
3659
3354
|
return false;
|
3660
3355
|
}
|
3661
3356
|
pop_current_node(parser);
|
3662
|
-
|
3663
|
-
|
3357
|
+
// TODO(jdtang): Add a condition to ignore this for the fragment parsing
|
3358
|
+
// algorithm.
|
3359
|
+
if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_FRAMESET)) {
|
3664
3360
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_AFTER_FRAMESET);
|
3665
3361
|
}
|
3666
3362
|
return true;
|
@@ -3834,32 +3530,18 @@ static bool handle_in_foreign_content(GumboParser* parser, GumboToken* token) {
|
|
3834
3530
|
token_has_attribute(token, "color") ||
|
3835
3531
|
token_has_attribute(token, "face") ||
|
3836
3532
|
token_has_attribute(token, "size")))) {
|
3837
|
-
|
3838
|
-
/* Parse error */
|
3839
3533
|
parser_add_parse_error(parser, token);
|
3840
|
-
|
3841
|
-
|
3842
|
-
|
3843
|
-
|
3844
|
-
|
3845
|
-
|
3846
|
-
|
3847
|
-
|
3848
|
-
|
3849
|
-
} while(!(is_mathml_integration_point(get_current_node(parser)) ||
|
3850
|
-
is_html_integration_point(get_current_node(parser)) ||
|
3851
|
-
get_current_node(parser)->v.element.tag_namespace ==
|
3852
|
-
GUMBO_NAMESPACE_HTML));
|
3853
|
-
parser->_parser_state->_reprocess_current_token = true;
|
3854
|
-
return false;
|
3855
|
-
}
|
3856
|
-
|
3857
|
-
assert(token->type == GUMBO_TOKEN_START_TAG);
|
3858
|
-
}
|
3859
|
-
|
3860
|
-
if (token->type == GUMBO_TOKEN_START_TAG) {
|
3534
|
+
do {
|
3535
|
+
pop_current_node(parser);
|
3536
|
+
} while(!(is_mathml_integration_point(get_current_node(parser)) ||
|
3537
|
+
is_html_integration_point(get_current_node(parser)) ||
|
3538
|
+
get_current_node(parser)->v.element.tag_namespace ==
|
3539
|
+
GUMBO_NAMESPACE_HTML));
|
3540
|
+
parser->_parser_state->_reprocess_current_token = true;
|
3541
|
+
return false;
|
3542
|
+
} else if (token->type == GUMBO_TOKEN_START_TAG) {
|
3861
3543
|
const GumboNamespaceEnum current_namespace =
|
3862
|
-
|
3544
|
+
get_current_node(parser)->v.element.tag_namespace;
|
3863
3545
|
if (current_namespace == GUMBO_NAMESPACE_MATHML) {
|
3864
3546
|
adjust_mathml_attributes(parser, token);
|
3865
3547
|
}
|
@@ -3948,10 +3630,8 @@ static bool handle_token(GumboParser* parser, GumboToken* token) {
|
|
3948
3630
|
parser->_parser_state->_closed_html_tag = true;
|
3949
3631
|
}
|
3950
3632
|
|
3951
|
-
const GumboNode* current_node =
|
3952
|
-
assert(!current_node ||
|
3953
|
-
current_node->type == GUMBO_NODE_ELEMENT ||
|
3954
|
-
current_node->type == GUMBO_NODE_TEMPLATE);
|
3633
|
+
const GumboNode* current_node = get_current_node(parser);
|
3634
|
+
assert(!current_node || current_node->type == GUMBO_NODE_ELEMENT);
|
3955
3635
|
if (current_node) {
|
3956
3636
|
gumbo_debug("Current node: <%s>.\n",
|
3957
3637
|
gumbo_normalized_tagname(current_node->v.element.tag));
|
@@ -3979,66 +3659,6 @@ static bool handle_token(GumboParser* parser, GumboToken* token) {
|
|
3979
3659
|
}
|
3980
3660
|
}
|
3981
3661
|
|
3982
|
-
static void fragment_parser_init(
|
3983
|
-
GumboParser *parser, GumboTag fragment_ctx,
|
3984
|
-
GumboNamespaceEnum fragment_namespace) {
|
3985
|
-
GumboNode *root;
|
3986
|
-
assert(fragment_ctx != GUMBO_TAG_LAST);
|
3987
|
-
|
3988
|
-
// 3
|
3989
|
-
parser->_parser_state->_fragment_ctx = create_element(parser, fragment_ctx);
|
3990
|
-
parser->_parser_state->_fragment_ctx->v.element.tag_namespace =
|
3991
|
-
fragment_namespace;
|
3992
|
-
|
3993
|
-
// 4
|
3994
|
-
if (fragment_namespace == GUMBO_NAMESPACE_HTML) {
|
3995
|
-
// Non-HTML namespaces always start in the DATA state.
|
3996
|
-
switch (fragment_ctx) {
|
3997
|
-
case GUMBO_TAG_TITLE:
|
3998
|
-
case GUMBO_TAG_TEXTAREA:
|
3999
|
-
gumbo_tokenizer_set_state(parser, GUMBO_LEX_RCDATA);
|
4000
|
-
break;
|
4001
|
-
|
4002
|
-
case GUMBO_TAG_STYLE:
|
4003
|
-
case GUMBO_TAG_XMP:
|
4004
|
-
case GUMBO_TAG_IFRAME:
|
4005
|
-
case GUMBO_TAG_NOEMBED:
|
4006
|
-
case GUMBO_TAG_NOFRAMES:
|
4007
|
-
gumbo_tokenizer_set_state(parser, GUMBO_LEX_RAWTEXT);
|
4008
|
-
break;
|
4009
|
-
|
4010
|
-
case GUMBO_TAG_SCRIPT:
|
4011
|
-
gumbo_tokenizer_set_state(parser, GUMBO_LEX_SCRIPT);
|
4012
|
-
break;
|
4013
|
-
|
4014
|
-
case GUMBO_TAG_NOSCRIPT:
|
4015
|
-
/* scripting is disabled in Gumbo, so leave the tokenizer
|
4016
|
-
* in the default data state */
|
4017
|
-
break;
|
4018
|
-
|
4019
|
-
case GUMBO_TAG_PLAINTEXT:
|
4020
|
-
gumbo_tokenizer_set_state(parser, GUMBO_LEX_PLAINTEXT);
|
4021
|
-
break;
|
4022
|
-
|
4023
|
-
default:
|
4024
|
-
/* default data state */
|
4025
|
-
break;
|
4026
|
-
}
|
4027
|
-
}
|
4028
|
-
|
4029
|
-
// 5. 6. 7.
|
4030
|
-
root = insert_element_of_tag_type(parser, GUMBO_TAG_HTML, GUMBO_INSERTION_IMPLIED);
|
4031
|
-
parser->_output->root = root;
|
4032
|
-
|
4033
|
-
// 8.
|
4034
|
-
if (fragment_ctx == GUMBO_TAG_TEMPLATE) {
|
4035
|
-
push_template_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TEMPLATE);
|
4036
|
-
}
|
4037
|
-
|
4038
|
-
// 10.
|
4039
|
-
reset_insertion_mode_appropriately(parser);
|
4040
|
-
}
|
4041
|
-
|
4042
3662
|
GumboOutput* gumbo_parse(const char* buffer) {
|
4043
3663
|
return gumbo_parse_with_options(
|
4044
3664
|
&kGumboDefaultOptions, buffer, strlen(buffer));
|
@@ -4046,27 +3666,11 @@ GumboOutput* gumbo_parse(const char* buffer) {
|
|
4046
3666
|
|
4047
3667
|
GumboOutput* gumbo_parse_with_options(
|
4048
3668
|
const GumboOptions* options, const char* buffer, size_t length) {
|
4049
|
-
return gumbo_parse_fragment(
|
4050
|
-
options, buffer, length, GUMBO_TAG_LAST, GUMBO_NAMESPACE_HTML);
|
4051
|
-
}
|
4052
|
-
|
4053
|
-
GumboOutput* gumbo_parse_fragment(
|
4054
|
-
const GumboOptions* options, const char* buffer, size_t length,
|
4055
|
-
const GumboTag fragment_ctx, const GumboNamespaceEnum fragment_namespace) {
|
4056
3669
|
GumboParser parser;
|
4057
3670
|
parser._options = options;
|
4058
|
-
parser_state_init(&parser);
|
4059
|
-
// Must come after parser_state_init, since creating the document node must
|
4060
|
-
// reference parser_state->_current_node.
|
4061
3671
|
output_init(&parser);
|
4062
|
-
// And this must come after output_init, because initializing the tokenizer
|
4063
|
-
// reads the first character and that may cause a UTF-8 decode error
|
4064
|
-
// (inserting into output->errors) if that's invalid.
|
4065
3672
|
gumbo_tokenizer_state_init(&parser, buffer, length);
|
4066
|
-
|
4067
|
-
if (fragment_ctx != GUMBO_TAG_LAST) {
|
4068
|
-
fragment_parser_init(&parser, fragment_ctx, fragment_namespace);
|
4069
|
-
}
|
3673
|
+
parser_state_init(&parser);
|
4070
3674
|
|
4071
3675
|
GumboParserState* state = parser._parser_state;
|
4072
3676
|
gumbo_debug("Parsing %.*s.\n", length, buffer);
|
@@ -4154,16 +3758,20 @@ GumboOutput* gumbo_parse_fragment(
|
|
4154
3758
|
return parser._output;
|
4155
3759
|
}
|
4156
3760
|
|
3761
|
+
void gumbo_destroy_node(GumboOptions* options, GumboNode* node) {
|
3762
|
+
// Need a dummy GumboParser because the allocator comes along with the
|
3763
|
+
// options object.
|
3764
|
+
GumboParser parser;
|
3765
|
+
parser._options = options;
|
3766
|
+
destroy_node(&parser, node);
|
3767
|
+
}
|
3768
|
+
|
4157
3769
|
void gumbo_destroy_output(const GumboOptions* options, GumboOutput* output) {
|
4158
3770
|
// Need a dummy GumboParser because the allocator comes along with the
|
4159
3771
|
// options object.
|
4160
3772
|
GumboParser parser;
|
4161
|
-
parser._parser_state = NULL;
|
4162
3773
|
parser._options = options;
|
4163
|
-
|
4164
|
-
while (current) {
|
4165
|
-
current = destroy_node(&parser, current);
|
4166
|
-
}
|
3774
|
+
destroy_node(&parser, output->document);
|
4167
3775
|
for (int i = 0; i < output->errors.length; ++i) {
|
4168
3776
|
gumbo_error_destroy(&parser, output->errors.data[i]);
|
4169
3777
|
}
|