nokogumbo 1.3.0 → 1.5.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -49,10 +49,9 @@ typedef struct {
49
49
  // errors to the GumboParser's errors vector, if the spec calls for it. Pass a
50
50
  // space for the "additional allowed char" when the spec says "with no
51
51
  // additional allowed char". Returns false on parse error, true otherwise.
52
- bool consume_char_ref(
53
- struct GumboInternalParser* parser, struct GumboInternalUtf8Iterator* input,
54
- int additional_allowed_char, bool is_in_attribute,
55
- OneOrTwoCodepoints* output);
52
+ bool consume_char_ref(struct GumboInternalParser* parser,
53
+ struct GumboInternalUtf8Iterator* input, int additional_allowed_char,
54
+ bool is_in_attribute, OneOrTwoCodepoints* output);
56
55
 
57
56
  #ifdef __cplusplus
58
57
  }
@@ -2464,7 +2464,9 @@ valid_named_ref := |*
2464
2464
  *|;
2465
2465
  }%%
2466
2466
 
2467
- %% write data;
2467
+ // clang-format off
2468
+ %% write data noerror nofinal;
2469
+ // clang-format on
2468
2470
 
2469
2471
  static bool consume_named_ref(
2470
2472
  struct GumboInternalParser* parser, Utf8Iterator* input, bool is_in_attribute,
@@ -2477,13 +2479,16 @@ static bool consume_named_ref(
2477
2479
  const char *ts, *start;
2478
2480
  int cs, act;
2479
2481
 
2482
+ // clang-format off
2480
2483
  %% write init;
2481
2484
  // Avoid unused variable warnings.
2482
2485
  (void) act;
2483
2486
  (void) ts;
2487
+ (void) char_ref_en_valid_named_ref;
2484
2488
 
2485
2489
  start = p;
2486
2490
  %% write exec;
2491
+ // clang-format on
2487
2492
 
2488
2493
  if (cs >= %%{ write first_final; }%%) {
2489
2494
  assert(output->first != kGumboNoChar);
@@ -27,18 +27,17 @@
27
27
  #include "util.h"
28
28
  #include "vector.h"
29
29
 
30
- static const size_t kMessageBufferSize = 256;
31
-
32
30
  // Prints a formatted message to a StringBuffer. This automatically resizes the
33
31
  // StringBuffer as necessary to fit the message. Returns the number of bytes
34
32
  // written.
35
- static int print_message(GumboParser* parser, GumboStringBuffer* output,
36
- const char* format, ...) {
33
+ static int print_message(
34
+ GumboParser* parser, GumboStringBuffer* output, const char* format, ...) {
37
35
  va_list args;
38
- va_start(args, format);
39
36
  int remaining_capacity = output->capacity - output->length;
40
- int bytes_written = vsnprintf(output->data + output->length,
41
- remaining_capacity, format, args);
37
+ va_start(args, format);
38
+ int bytes_written = vsnprintf(
39
+ output->data + output->length, remaining_capacity, format, args);
40
+ va_end(args);
42
41
  #ifdef _MSC_VER
43
42
  if (bytes_written == -1) {
44
43
  // vsnprintf returns -1 on MSVC++ if there's not enough capacity, instead of
@@ -47,15 +46,15 @@ static int print_message(GumboParser* parser, GumboStringBuffer* output,
47
46
  // we retry (letting it fail and returning 0 if it doesn't), since there's
48
47
  // no way to smartly resize the buffer.
49
48
  gumbo_string_buffer_reserve(parser, output->capacity * 2, output);
50
- int result = vsnprintf(output->data + output->length,
51
- remaining_capacity, format, args);
49
+ va_start(args, format);
50
+ int result = vsnprintf(
51
+ output->data + output->length, remaining_capacity, format, args);
52
52
  va_end(args);
53
53
  return result == -1 ? 0 : result;
54
54
  }
55
55
  #else
56
56
  // -1 in standard C99 indicates an encoding error. Return 0 and do nothing.
57
57
  if (bytes_written == -1) {
58
- va_end(args);
59
58
  return 0;
60
59
  }
61
60
  #endif
@@ -64,19 +63,19 @@ static int print_message(GumboParser* parser, GumboStringBuffer* output,
64
63
  gumbo_string_buffer_reserve(
65
64
  parser, output->capacity + bytes_written, output);
66
65
  remaining_capacity = output->capacity - output->length;
67
- bytes_written = vsnprintf(output->data + output->length,
68
- remaining_capacity, format, args);
66
+ va_start(args, format);
67
+ bytes_written = vsnprintf(
68
+ output->data + output->length, remaining_capacity, format, args);
69
+ va_end(args);
69
70
  }
70
71
  output->length += bytes_written;
71
- va_end(args);
72
72
  return bytes_written;
73
73
  }
74
74
 
75
- static void print_tag_stack(
76
- GumboParser* parser, const GumboParserError* error,
75
+ static void print_tag_stack(GumboParser* parser, const GumboParserError* error,
77
76
  GumboStringBuffer* output) {
78
77
  print_message(parser, output, " Currently open tags: ");
79
- for (int i = 0; i < error->tag_stack.length; ++i) {
78
+ for (unsigned int i = 0; i < error->tag_stack.length; ++i) {
80
79
  if (i) {
81
80
  print_message(parser, output, ", ");
82
81
  }
@@ -87,12 +86,11 @@ static void print_tag_stack(
87
86
  }
88
87
 
89
88
  static void handle_parser_error(GumboParser* parser,
90
- const GumboParserError* error,
91
- GumboStringBuffer* output) {
89
+ const GumboParserError* error, GumboStringBuffer* output) {
92
90
  if (error->parser_state == GUMBO_INSERTION_MODE_INITIAL &&
93
91
  error->input_type != GUMBO_TOKEN_DOCTYPE) {
94
- print_message(parser, output,
95
- "The doctype must be the first token in the document");
92
+ print_message(
93
+ parser, output, "The doctype must be the first token in the document");
96
94
  return;
97
95
  }
98
96
 
@@ -106,6 +104,7 @@ static void handle_parser_error(GumboParser* parser,
106
104
  // But just in case...
107
105
  print_message(parser, output, "Comments aren't legal here");
108
106
  return;
107
+ case GUMBO_TOKEN_CDATA:
109
108
  case GUMBO_TOKEN_WHITESPACE:
110
109
  case GUMBO_TOKEN_CHARACTER:
111
110
  print_message(parser, output, "Character tokens aren't legal here");
@@ -150,13 +149,14 @@ static const char* find_last_newline(
150
149
  static const char* find_next_newline(
151
150
  const char* original_text, const char* error_location) {
152
151
  const char* c = error_location;
153
- for (; *c && *c != '\n'; ++c);
152
+ for (; *c && *c != '\n'; ++c)
153
+ ;
154
154
  return c;
155
155
  }
156
156
 
157
157
  GumboError* gumbo_add_error(GumboParser* parser) {
158
158
  int max_errors = parser->_options->max_errors;
159
- if (max_errors >= 0 && parser->_output->errors.length >= max_errors) {
159
+ if (max_errors >= 0 && parser->_output->errors.length >= (unsigned int) max_errors) {
160
160
  return NULL;
161
161
  }
162
162
  GumboError* error = gumbo_parser_allocate(parser, sizeof(GumboError));
@@ -166,50 +166,52 @@ GumboError* gumbo_add_error(GumboParser* parser) {
166
166
 
167
167
  void gumbo_error_to_string(
168
168
  GumboParser* parser, const GumboError* error, GumboStringBuffer* output) {
169
- print_message(parser, output, "@%d:%d: ",
170
- error->position.line, error->position.column);
169
+ print_message(
170
+ parser, output, "@%d:%d: ", error->position.line, error->position.column);
171
171
  switch (error->type) {
172
172
  case GUMBO_ERR_UTF8_INVALID:
173
- print_message(parser, output, "Invalid UTF8 character 0x%x",
174
- error->v.codepoint);
173
+ print_message(
174
+ parser, output, "Invalid UTF8 character 0x%x", error->v.codepoint);
175
175
  break;
176
176
  case GUMBO_ERR_UTF8_TRUNCATED:
177
177
  print_message(parser, output,
178
- "Input stream ends with a truncated UTF8 character 0x%x",
179
- error->v.codepoint);
178
+ "Input stream ends with a truncated UTF8 character 0x%x",
179
+ error->v.codepoint);
180
180
  break;
181
181
  case GUMBO_ERR_NUMERIC_CHAR_REF_NO_DIGITS:
182
- print_message(parser, output,
183
- "No digits after &# in numeric character reference");
182
+ print_message(
183
+ parser, output, "No digits after &# in numeric character reference");
184
184
  break;
185
185
  case GUMBO_ERR_NUMERIC_CHAR_REF_WITHOUT_SEMICOLON:
186
186
  print_message(parser, output,
187
- "The numeric character reference &#%d should be followed "
188
- "by a semicolon", error->v.codepoint);
187
+ "The numeric character reference &#%d should be followed "
188
+ "by a semicolon",
189
+ error->v.codepoint);
189
190
  break;
190
191
  case GUMBO_ERR_NUMERIC_CHAR_REF_INVALID:
191
192
  print_message(parser, output,
192
- "The numeric character reference &#%d; encodes an invalid "
193
- "unicode codepoint", error->v.codepoint);
193
+ "The numeric character reference &#%d; encodes an invalid "
194
+ "unicode codepoint",
195
+ error->v.codepoint);
194
196
  break;
195
197
  case GUMBO_ERR_NAMED_CHAR_REF_WITHOUT_SEMICOLON:
196
198
  // The textual data came from one of the literal strings in the table, and
197
199
  // so it'll be null-terminated.
198
200
  print_message(parser, output,
199
- "The named character reference &%.*s should be followed by a "
200
- "semicolon", (int) error->v.text.length, error->v.text.data);
201
+ "The named character reference &%.*s should be followed by a "
202
+ "semicolon",
203
+ (int) error->v.text.length, error->v.text.data);
201
204
  break;
202
205
  case GUMBO_ERR_NAMED_CHAR_REF_INVALID:
203
206
  print_message(parser, output,
204
- "The named character reference &%.*s; is not a valid entity name",
205
- (int) error->v.text.length, error->v.text.data);
207
+ "The named character reference &%.*s; is not a valid entity name",
208
+ (int) error->v.text.length, error->v.text.data);
206
209
  break;
207
210
  case GUMBO_ERR_DUPLICATE_ATTR:
208
211
  print_message(parser, output,
209
- "Attribute %s occurs multiple times, at positions %d and %d",
210
- error->v.duplicate_attr.name,
211
- error->v.duplicate_attr.original_index,
212
- error->v.duplicate_attr.new_index);
212
+ "Attribute %s occurs multiple times, at positions %d and %d",
213
+ error->v.duplicate_attr.name, error->v.duplicate_attr.original_index,
214
+ error->v.duplicate_attr.new_index);
213
215
  break;
214
216
  case GUMBO_ERR_PARSER:
215
217
  case GUMBO_ERR_UNACKNOWLEDGED_SELF_CLOSING_TAG:
@@ -217,21 +219,19 @@ void gumbo_error_to_string(
217
219
  break;
218
220
  default:
219
221
  print_message(parser, output,
220
- "Tokenizer error with an unimplemented error message");
222
+ "Tokenizer error with an unimplemented error message");
221
223
  break;
222
224
  }
223
225
  gumbo_string_buffer_append_codepoint(parser, '.', output);
224
226
  }
225
227
 
226
- void gumbo_caret_diagnostic_to_string(
227
- GumboParser* parser, const GumboError* error,
228
- const char* source_text, GumboStringBuffer* output) {
228
+ void gumbo_caret_diagnostic_to_string(GumboParser* parser,
229
+ const GumboError* error, const char* source_text,
230
+ GumboStringBuffer* output) {
229
231
  gumbo_error_to_string(parser, error, output);
230
232
 
231
- const char* line_start =
232
- find_last_newline(source_text, error->original_text);
233
- const char* line_end =
234
- find_next_newline(source_text, error->original_text);
233
+ const char* line_start = find_last_newline(source_text, error->original_text);
234
+ const char* line_end = find_next_newline(source_text, error->original_text);
235
235
  GumboStringPiece original_line;
236
236
  original_line.data = line_start;
237
237
  original_line.length = line_end - line_start;
@@ -272,7 +272,7 @@ void gumbo_init_errors(GumboParser* parser) {
272
272
  }
273
273
 
274
274
  void gumbo_destroy_errors(GumboParser* parser) {
275
- for (int i = 0; i < parser->_output->errors.length; ++i) {
275
+ for (unsigned int i = 0; i < parser->_output->errors.length; ++i) {
276
276
  gumbo_error_destroy(parser, parser->_output->errors.data[i]);
277
277
  }
278
278
  gumbo_vector_destroy(parser, &parser->_output->errors);
@@ -201,24 +201,22 @@ void gumbo_error_destroy(struct GumboInternalParser* parser, GumboError* error);
201
201
  // responsible for deleting the buffer. (Note that the buffer is allocated with
202
202
  // the allocator specified in the GumboParser config and hence should be freed
203
203
  // by gumbo_parser_deallocate().)
204
- void gumbo_error_to_string(
205
- struct GumboInternalParser* parser, const GumboError* error,
206
- GumboStringBuffer* output);
204
+ void gumbo_error_to_string(struct GumboInternalParser* parser,
205
+ const GumboError* error, GumboStringBuffer* output);
207
206
 
208
207
  // Prints a caret diagnostic to a string. This fills an empty GumboStringBuffer
209
208
  // with a freshly-allocated buffer containing the error message text. The
210
209
  // caller is responsible for deleting the buffer. (Note that the buffer is
211
210
  // allocated with the allocator specified in the GumboParser config and hence
212
211
  // should be freed by gumbo_parser_deallocate().)
213
- void gumbo_caret_diagnostic_to_string(
214
- struct GumboInternalParser* parser, const GumboError* error,
215
- const char* source_text, GumboStringBuffer* output);
212
+ void gumbo_caret_diagnostic_to_string(struct GumboInternalParser* parser,
213
+ const GumboError* error, const char* source_text,
214
+ GumboStringBuffer* output);
216
215
 
217
216
  // Like gumbo_caret_diagnostic_to_string, but prints the text to stdout instead
218
217
  // of writing to a string.
219
- void gumbo_print_caret_diagnostic(
220
- struct GumboInternalParser* parser, const GumboError* error,
221
- const char* source_text);
218
+ void gumbo_print_caret_diagnostic(struct GumboInternalParser* parser,
219
+ const GumboError* error, const char* source_text);
222
220
 
223
221
  #ifdef __cplusplus
224
222
  }
@@ -76,7 +76,6 @@ typedef struct {
76
76
  */
77
77
  extern const GumboSourcePosition kGumboEmptySourcePosition;
78
78
 
79
-
80
79
  /**
81
80
  * A struct representing a string or part of a string. Strings within the
82
81
  * parser are represented by a char* and a length; the char* points into
@@ -111,7 +110,6 @@ bool gumbo_string_equals(
111
110
  bool gumbo_string_equals_ignore_case(
112
111
  const GumboStringPiece* str1, const GumboStringPiece* str2);
113
112
 
114
-
115
113
  /**
116
114
  * A simple vector implementation. This stores a pointer to a data array and a
117
115
  * length. All elements are stored as void*; client code must cast to the
@@ -141,8 +139,7 @@ extern const GumboVector kGumboEmptyVector;
141
139
  * Returns the first index at which an element appears in this vector (testing
142
140
  * by pointer equality), or -1 if it never does.
143
141
  */
144
- int gumbo_vector_index_of(GumboVector* vector, void* element);
145
-
142
+ int gumbo_vector_index_of(GumboVector* vector, const void* element);
146
143
 
147
144
  /**
148
145
  * An enum for all the tags defined in the HTML5 standard. These correspond to
@@ -157,172 +154,10 @@ int gumbo_vector_index_of(GumboVector* vector, void* element);
157
154
  * strings.
158
155
  */
159
156
  typedef enum {
160
- // http://www.whatwg.org/specs/web-apps/current-work/multipage/semantics.html#the-root-element
161
- GUMBO_TAG_HTML,
162
- // http://www.whatwg.org/specs/web-apps/current-work/multipage/semantics.html#document-metadata
163
- GUMBO_TAG_HEAD,
164
- GUMBO_TAG_TITLE,
165
- GUMBO_TAG_BASE,
166
- GUMBO_TAG_LINK,
167
- GUMBO_TAG_META,
168
- GUMBO_TAG_STYLE,
169
- // http://www.whatwg.org/specs/web-apps/current-work/multipage/scripting-1.html#scripting-1
170
- GUMBO_TAG_SCRIPT,
171
- GUMBO_TAG_NOSCRIPT,
172
- GUMBO_TAG_TEMPLATE,
173
- // http://www.whatwg.org/specs/web-apps/current-work/multipage/sections.html#sections
174
- GUMBO_TAG_BODY,
175
- GUMBO_TAG_ARTICLE,
176
- GUMBO_TAG_SECTION,
177
- GUMBO_TAG_NAV,
178
- GUMBO_TAG_ASIDE,
179
- GUMBO_TAG_H1,
180
- GUMBO_TAG_H2,
181
- GUMBO_TAG_H3,
182
- GUMBO_TAG_H4,
183
- GUMBO_TAG_H5,
184
- GUMBO_TAG_H6,
185
- GUMBO_TAG_HGROUP,
186
- GUMBO_TAG_HEADER,
187
- GUMBO_TAG_FOOTER,
188
- GUMBO_TAG_ADDRESS,
189
- // http://www.whatwg.org/specs/web-apps/current-work/multipage/grouping-content.html#grouping-content
190
- GUMBO_TAG_P,
191
- GUMBO_TAG_HR,
192
- GUMBO_TAG_PRE,
193
- GUMBO_TAG_BLOCKQUOTE,
194
- GUMBO_TAG_OL,
195
- GUMBO_TAG_UL,
196
- GUMBO_TAG_LI,
197
- GUMBO_TAG_DL,
198
- GUMBO_TAG_DT,
199
- GUMBO_TAG_DD,
200
- GUMBO_TAG_FIGURE,
201
- GUMBO_TAG_FIGCAPTION,
202
- GUMBO_TAG_MAIN,
203
- GUMBO_TAG_DIV,
204
- // http://www.whatwg.org/specs/web-apps/current-work/multipage/text-level-semantics.html#text-level-semantics
205
- GUMBO_TAG_A,
206
- GUMBO_TAG_EM,
207
- GUMBO_TAG_STRONG,
208
- GUMBO_TAG_SMALL,
209
- GUMBO_TAG_S,
210
- GUMBO_TAG_CITE,
211
- GUMBO_TAG_Q,
212
- GUMBO_TAG_DFN,
213
- GUMBO_TAG_ABBR,
214
- GUMBO_TAG_DATA,
215
- GUMBO_TAG_TIME,
216
- GUMBO_TAG_CODE,
217
- GUMBO_TAG_VAR,
218
- GUMBO_TAG_SAMP,
219
- GUMBO_TAG_KBD,
220
- GUMBO_TAG_SUB,
221
- GUMBO_TAG_SUP,
222
- GUMBO_TAG_I,
223
- GUMBO_TAG_B,
224
- GUMBO_TAG_U,
225
- GUMBO_TAG_MARK,
226
- GUMBO_TAG_RUBY,
227
- GUMBO_TAG_RT,
228
- GUMBO_TAG_RP,
229
- GUMBO_TAG_BDI,
230
- GUMBO_TAG_BDO,
231
- GUMBO_TAG_SPAN,
232
- GUMBO_TAG_BR,
233
- GUMBO_TAG_WBR,
234
- // http://www.whatwg.org/specs/web-apps/current-work/multipage/edits.html#edits
235
- GUMBO_TAG_INS,
236
- GUMBO_TAG_DEL,
237
- // http://www.whatwg.org/specs/web-apps/current-work/multipage/embedded-content-1.html#embedded-content-1
238
- GUMBO_TAG_IMAGE,
239
- GUMBO_TAG_IMG,
240
- GUMBO_TAG_IFRAME,
241
- GUMBO_TAG_EMBED,
242
- GUMBO_TAG_OBJECT,
243
- GUMBO_TAG_PARAM,
244
- GUMBO_TAG_VIDEO,
245
- GUMBO_TAG_AUDIO,
246
- GUMBO_TAG_SOURCE,
247
- GUMBO_TAG_TRACK,
248
- GUMBO_TAG_CANVAS,
249
- GUMBO_TAG_MAP,
250
- GUMBO_TAG_AREA,
251
- // http://www.whatwg.org/specs/web-apps/current-work/multipage/the-map-element.html#mathml
252
- GUMBO_TAG_MATH,
253
- GUMBO_TAG_MI,
254
- GUMBO_TAG_MO,
255
- GUMBO_TAG_MN,
256
- GUMBO_TAG_MS,
257
- GUMBO_TAG_MTEXT,
258
- GUMBO_TAG_MGLYPH,
259
- GUMBO_TAG_MALIGNMARK,
260
- GUMBO_TAG_ANNOTATION_XML,
261
- // http://www.whatwg.org/specs/web-apps/current-work/multipage/the-map-element.html#svg-0
262
- GUMBO_TAG_SVG,
263
- GUMBO_TAG_FOREIGNOBJECT,
264
- GUMBO_TAG_DESC,
265
- // SVG title tags will have GUMBO_TAG_TITLE as with HTML.
266
- // http://www.whatwg.org/specs/web-apps/current-work/multipage/tabular-data.html#tabular-data
267
- GUMBO_TAG_TABLE,
268
- GUMBO_TAG_CAPTION,
269
- GUMBO_TAG_COLGROUP,
270
- GUMBO_TAG_COL,
271
- GUMBO_TAG_TBODY,
272
- GUMBO_TAG_THEAD,
273
- GUMBO_TAG_TFOOT,
274
- GUMBO_TAG_TR,
275
- GUMBO_TAG_TD,
276
- GUMBO_TAG_TH,
277
- // http://www.whatwg.org/specs/web-apps/current-work/multipage/forms.html#forms
278
- GUMBO_TAG_FORM,
279
- GUMBO_TAG_FIELDSET,
280
- GUMBO_TAG_LEGEND,
281
- GUMBO_TAG_LABEL,
282
- GUMBO_TAG_INPUT,
283
- GUMBO_TAG_BUTTON,
284
- GUMBO_TAG_SELECT,
285
- GUMBO_TAG_DATALIST,
286
- GUMBO_TAG_OPTGROUP,
287
- GUMBO_TAG_OPTION,
288
- GUMBO_TAG_TEXTAREA,
289
- GUMBO_TAG_KEYGEN,
290
- GUMBO_TAG_OUTPUT,
291
- GUMBO_TAG_PROGRESS,
292
- GUMBO_TAG_METER,
293
- // http://www.whatwg.org/specs/web-apps/current-work/multipage/interactive-elements.html#interactive-elements
294
- GUMBO_TAG_DETAILS,
295
- GUMBO_TAG_SUMMARY,
296
- GUMBO_TAG_MENU,
297
- GUMBO_TAG_MENUITEM,
298
- // Non-conforming elements that nonetheless appear in the HTML5 spec.
299
- // http://www.whatwg.org/specs/web-apps/current-work/multipage/obsolete.html#non-conforming-features
300
- GUMBO_TAG_APPLET,
301
- GUMBO_TAG_ACRONYM,
302
- GUMBO_TAG_BGSOUND,
303
- GUMBO_TAG_DIR,
304
- GUMBO_TAG_FRAME,
305
- GUMBO_TAG_FRAMESET,
306
- GUMBO_TAG_NOFRAMES,
307
- GUMBO_TAG_ISINDEX,
308
- GUMBO_TAG_LISTING,
309
- GUMBO_TAG_XMP,
310
- GUMBO_TAG_NEXTID,
311
- GUMBO_TAG_NOEMBED,
312
- GUMBO_TAG_PLAINTEXT,
313
- GUMBO_TAG_RB,
314
- GUMBO_TAG_STRIKE,
315
- GUMBO_TAG_BASEFONT,
316
- GUMBO_TAG_BIG,
317
- GUMBO_TAG_BLINK,
318
- GUMBO_TAG_CENTER,
319
- GUMBO_TAG_FONT,
320
- GUMBO_TAG_MARQUEE,
321
- GUMBO_TAG_MULTICOL,
322
- GUMBO_TAG_NOBR,
323
- GUMBO_TAG_SPACER,
324
- GUMBO_TAG_TT,
325
- // Used for all tags that don't have special handling in HTML.
157
+ // Load all the tags from an external source, generated from tag.in.
158
+ #include "tag_enum.h"
159
+ // Used for all tags that don't have special handling in HTML. Add new tags
160
+ // to the end of tag.in so as to preserve backwards-compatibility.
326
161
  GUMBO_TAG_UNKNOWN,
327
162
  // A marker value to indicate the end of the enum, for iterating over it.
328
163
  // Also used as the terminator for varargs functions that take tags.
@@ -364,9 +199,10 @@ const char* gumbo_normalize_svg_tagname(const GumboStringPiece* tagname);
364
199
 
365
200
  /**
366
201
  * Converts a tag name string (which may be in upper or mixed case) to a tag
367
- * enum.
202
+ * enum. The `tag` version expects `tagname` to be NULL-terminated
368
203
  */
369
204
  GumboTag gumbo_tag_enum(const char* tagname);
205
+ GumboTag gumbo_tagn_enum(const char* tagname, unsigned int length);
370
206
 
371
207
  /**
372
208
  * Attribute namespaces.
@@ -461,10 +297,16 @@ typedef enum {
461
297
  GUMBO_NODE_TEXT,
462
298
  /** CDATA node. v will be a GumboText. */
463
299
  GUMBO_NODE_CDATA,
464
- /** Comment node. v. will be a GumboText, excluding comment delimiters. */
300
+ /** Comment node. v will be a GumboText, excluding comment delimiters. */
465
301
  GUMBO_NODE_COMMENT,
466
302
  /** Text node, where all contents is whitespace. v will be a GumboText. */
467
- GUMBO_NODE_WHITESPACE
303
+ GUMBO_NODE_WHITESPACE,
304
+ /** Template node. This is separate from GUMBO_NODE_ELEMENT because many
305
+ * client libraries will want to ignore the contents of template nodes, as
306
+ * the spec suggests. Recursing on GUMBO_NODE_ELEMENT will do the right thing
307
+ * here, while clients that want to include template contents should also
308
+ * check for GUMBO_NODE_TEMPLATE. v will be a GumboElement. */
309
+ GUMBO_NODE_TEMPLATE
468
310
  } GumboNodeType;
469
311
 
470
312
  /**
@@ -473,7 +315,9 @@ typedef enum {
473
315
  */
474
316
  typedef struct GumboInternalNode GumboNode;
475
317
 
476
- /** http://www.whatwg.org/specs/web-apps/current-work/complete/dom.html#quirks-mode */
318
+ /**
319
+ * http://www.whatwg.org/specs/web-apps/current-work/complete/dom.html#quirks-mode
320
+ */
477
321
  typedef enum {
478
322
  GUMBO_DOCTYPE_NO_QUIRKS,
479
323
  GUMBO_DOCTYPE_QUIRKS,
@@ -571,7 +415,6 @@ typedef enum {
571
415
  GUMBO_INSERTION_FOSTER_PARENTED = 1 << 10,
572
416
  } GumboParseFlags;
573
417
 
574
-
575
418
  /**
576
419
  * Information specific to document nodes.
577
420
  */
@@ -690,9 +533,9 @@ struct GumboInternalNode {
690
533
 
691
534
  /** The actual node data. */
692
535
  union {
693
- GumboDocument document; // For GUMBO_NODE_DOCUMENT.
694
- GumboElement element; // For GUMBO_NODE_ELEMENT.
695
- GumboText text; // For everything else.
536
+ GumboDocument document; // For GUMBO_NODE_DOCUMENT.
537
+ GumboElement element; // For GUMBO_NODE_ELEMENT.
538
+ GumboText text; // For everything else.
696
539
  } v;
697
540
  };
698
541
 
@@ -750,6 +593,29 @@ typedef struct GumboInternalOptions {
750
593
  * Default: -1
751
594
  */
752
595
  int max_errors;
596
+
597
+ /**
598
+ * The fragment context for parsing:
599
+ * https://html.spec.whatwg.org/multipage/syntax.html#parsing-html-fragments
600
+ *
601
+ * If GUMBO_TAG_LAST is passed here, it is assumed to be "no fragment", i.e.
602
+ * the regular parsing algorithm. Otherwise, pass the tag enum for the
603
+ * intended parent of the parsed fragment. We use just the tag enum rather
604
+ * than a full node because that's enough to set all the parsing context we
605
+ * need, and it provides some additional flexibility for client code to act as
606
+ * if parsing a fragment even when a full HTML tree isn't available.
607
+ *
608
+ * Default: GUMBO_TAG_LAST
609
+ */
610
+ GumboTag fragment_context;
611
+
612
+ /**
613
+ * The namespace for the fragment context. This lets client code
614
+ * differentiate between, say, parsing a <title> tag in SVG vs. parsing it in
615
+ * HTML.
616
+ * Default: GUMBO_NAMESPACE_HTML
617
+ */
618
+ GumboNamespaceEnum fragment_namespace;
753
619
  } GumboOptions;
754
620
 
755
621
  /** Default options struct; use this with gumbo_parse_with_options. */
@@ -796,9 +662,7 @@ GumboOutput* gumbo_parse_with_options(
796
662
  const GumboOptions* options, const char* buffer, size_t buffer_length);
797
663
 
798
664
  /** Release the memory used for the parse tree & parse errors. */
799
- void gumbo_destroy_output(
800
- const GumboOptions* options, GumboOutput* output);
801
-
665
+ void gumbo_destroy_output(const GumboOptions* options, GumboOutput* output);
802
666
 
803
667
  #ifdef __cplusplus
804
668
  }