nokogumbo 1.3.0 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -49,10 +49,9 @@ typedef struct {
49
49
  // errors to the GumboParser's errors vector, if the spec calls for it. Pass a
50
50
  // space for the "additional allowed char" when the spec says "with no
51
51
  // additional allowed char". Returns false on parse error, true otherwise.
52
- bool consume_char_ref(
53
- struct GumboInternalParser* parser, struct GumboInternalUtf8Iterator* input,
54
- int additional_allowed_char, bool is_in_attribute,
55
- OneOrTwoCodepoints* output);
52
+ bool consume_char_ref(struct GumboInternalParser* parser,
53
+ struct GumboInternalUtf8Iterator* input, int additional_allowed_char,
54
+ bool is_in_attribute, OneOrTwoCodepoints* output);
56
55
 
57
56
  #ifdef __cplusplus
58
57
  }
@@ -2464,7 +2464,9 @@ valid_named_ref := |*
2464
2464
  *|;
2465
2465
  }%%
2466
2466
 
2467
- %% write data;
2467
+ // clang-format off
2468
+ %% write data noerror nofinal;
2469
+ // clang-format on
2468
2470
 
2469
2471
  static bool consume_named_ref(
2470
2472
  struct GumboInternalParser* parser, Utf8Iterator* input, bool is_in_attribute,
@@ -2477,13 +2479,16 @@ static bool consume_named_ref(
2477
2479
  const char *ts, *start;
2478
2480
  int cs, act;
2479
2481
 
2482
+ // clang-format off
2480
2483
  %% write init;
2481
2484
  // Avoid unused variable warnings.
2482
2485
  (void) act;
2483
2486
  (void) ts;
2487
+ (void) char_ref_en_valid_named_ref;
2484
2488
 
2485
2489
  start = p;
2486
2490
  %% write exec;
2491
+ // clang-format on
2487
2492
 
2488
2493
  if (cs >= %%{ write first_final; }%%) {
2489
2494
  assert(output->first != kGumboNoChar);
@@ -27,18 +27,17 @@
27
27
  #include "util.h"
28
28
  #include "vector.h"
29
29
 
30
- static const size_t kMessageBufferSize = 256;
31
-
32
30
  // Prints a formatted message to a StringBuffer. This automatically resizes the
33
31
  // StringBuffer as necessary to fit the message. Returns the number of bytes
34
32
  // written.
35
- static int print_message(GumboParser* parser, GumboStringBuffer* output,
36
- const char* format, ...) {
33
+ static int print_message(
34
+ GumboParser* parser, GumboStringBuffer* output, const char* format, ...) {
37
35
  va_list args;
38
- va_start(args, format);
39
36
  int remaining_capacity = output->capacity - output->length;
40
- int bytes_written = vsnprintf(output->data + output->length,
41
- remaining_capacity, format, args);
37
+ va_start(args, format);
38
+ int bytes_written = vsnprintf(
39
+ output->data + output->length, remaining_capacity, format, args);
40
+ va_end(args);
42
41
  #ifdef _MSC_VER
43
42
  if (bytes_written == -1) {
44
43
  // vsnprintf returns -1 on MSVC++ if there's not enough capacity, instead of
@@ -47,15 +46,15 @@ static int print_message(GumboParser* parser, GumboStringBuffer* output,
47
46
  // we retry (letting it fail and returning 0 if it doesn't), since there's
48
47
  // no way to smartly resize the buffer.
49
48
  gumbo_string_buffer_reserve(parser, output->capacity * 2, output);
50
- int result = vsnprintf(output->data + output->length,
51
- remaining_capacity, format, args);
49
+ va_start(args, format);
50
+ int result = vsnprintf(
51
+ output->data + output->length, remaining_capacity, format, args);
52
52
  va_end(args);
53
53
  return result == -1 ? 0 : result;
54
54
  }
55
55
  #else
56
56
  // -1 in standard C99 indicates an encoding error. Return 0 and do nothing.
57
57
  if (bytes_written == -1) {
58
- va_end(args);
59
58
  return 0;
60
59
  }
61
60
  #endif
@@ -64,19 +63,19 @@ static int print_message(GumboParser* parser, GumboStringBuffer* output,
64
63
  gumbo_string_buffer_reserve(
65
64
  parser, output->capacity + bytes_written, output);
66
65
  remaining_capacity = output->capacity - output->length;
67
- bytes_written = vsnprintf(output->data + output->length,
68
- remaining_capacity, format, args);
66
+ va_start(args, format);
67
+ bytes_written = vsnprintf(
68
+ output->data + output->length, remaining_capacity, format, args);
69
+ va_end(args);
69
70
  }
70
71
  output->length += bytes_written;
71
- va_end(args);
72
72
  return bytes_written;
73
73
  }
74
74
 
75
- static void print_tag_stack(
76
- GumboParser* parser, const GumboParserError* error,
75
+ static void print_tag_stack(GumboParser* parser, const GumboParserError* error,
77
76
  GumboStringBuffer* output) {
78
77
  print_message(parser, output, " Currently open tags: ");
79
- for (int i = 0; i < error->tag_stack.length; ++i) {
78
+ for (unsigned int i = 0; i < error->tag_stack.length; ++i) {
80
79
  if (i) {
81
80
  print_message(parser, output, ", ");
82
81
  }
@@ -87,12 +86,11 @@ static void print_tag_stack(
87
86
  }
88
87
 
89
88
  static void handle_parser_error(GumboParser* parser,
90
- const GumboParserError* error,
91
- GumboStringBuffer* output) {
89
+ const GumboParserError* error, GumboStringBuffer* output) {
92
90
  if (error->parser_state == GUMBO_INSERTION_MODE_INITIAL &&
93
91
  error->input_type != GUMBO_TOKEN_DOCTYPE) {
94
- print_message(parser, output,
95
- "The doctype must be the first token in the document");
92
+ print_message(
93
+ parser, output, "The doctype must be the first token in the document");
96
94
  return;
97
95
  }
98
96
 
@@ -106,6 +104,7 @@ static void handle_parser_error(GumboParser* parser,
106
104
  // But just in case...
107
105
  print_message(parser, output, "Comments aren't legal here");
108
106
  return;
107
+ case GUMBO_TOKEN_CDATA:
109
108
  case GUMBO_TOKEN_WHITESPACE:
110
109
  case GUMBO_TOKEN_CHARACTER:
111
110
  print_message(parser, output, "Character tokens aren't legal here");
@@ -150,13 +149,14 @@ static const char* find_last_newline(
150
149
  static const char* find_next_newline(
151
150
  const char* original_text, const char* error_location) {
152
151
  const char* c = error_location;
153
- for (; *c && *c != '\n'; ++c);
152
+ for (; *c && *c != '\n'; ++c)
153
+ ;
154
154
  return c;
155
155
  }
156
156
 
157
157
  GumboError* gumbo_add_error(GumboParser* parser) {
158
158
  int max_errors = parser->_options->max_errors;
159
- if (max_errors >= 0 && parser->_output->errors.length >= max_errors) {
159
+ if (max_errors >= 0 && parser->_output->errors.length >= (unsigned int) max_errors) {
160
160
  return NULL;
161
161
  }
162
162
  GumboError* error = gumbo_parser_allocate(parser, sizeof(GumboError));
@@ -166,50 +166,52 @@ GumboError* gumbo_add_error(GumboParser* parser) {
166
166
 
167
167
  void gumbo_error_to_string(
168
168
  GumboParser* parser, const GumboError* error, GumboStringBuffer* output) {
169
- print_message(parser, output, "@%d:%d: ",
170
- error->position.line, error->position.column);
169
+ print_message(
170
+ parser, output, "@%d:%d: ", error->position.line, error->position.column);
171
171
  switch (error->type) {
172
172
  case GUMBO_ERR_UTF8_INVALID:
173
- print_message(parser, output, "Invalid UTF8 character 0x%x",
174
- error->v.codepoint);
173
+ print_message(
174
+ parser, output, "Invalid UTF8 character 0x%x", error->v.codepoint);
175
175
  break;
176
176
  case GUMBO_ERR_UTF8_TRUNCATED:
177
177
  print_message(parser, output,
178
- "Input stream ends with a truncated UTF8 character 0x%x",
179
- error->v.codepoint);
178
+ "Input stream ends with a truncated UTF8 character 0x%x",
179
+ error->v.codepoint);
180
180
  break;
181
181
  case GUMBO_ERR_NUMERIC_CHAR_REF_NO_DIGITS:
182
- print_message(parser, output,
183
- "No digits after &# in numeric character reference");
182
+ print_message(
183
+ parser, output, "No digits after &# in numeric character reference");
184
184
  break;
185
185
  case GUMBO_ERR_NUMERIC_CHAR_REF_WITHOUT_SEMICOLON:
186
186
  print_message(parser, output,
187
- "The numeric character reference &#%d should be followed "
188
- "by a semicolon", error->v.codepoint);
187
+ "The numeric character reference &#%d should be followed "
188
+ "by a semicolon",
189
+ error->v.codepoint);
189
190
  break;
190
191
  case GUMBO_ERR_NUMERIC_CHAR_REF_INVALID:
191
192
  print_message(parser, output,
192
- "The numeric character reference &#%d; encodes an invalid "
193
- "unicode codepoint", error->v.codepoint);
193
+ "The numeric character reference &#%d; encodes an invalid "
194
+ "unicode codepoint",
195
+ error->v.codepoint);
194
196
  break;
195
197
  case GUMBO_ERR_NAMED_CHAR_REF_WITHOUT_SEMICOLON:
196
198
  // The textual data came from one of the literal strings in the table, and
197
199
  // so it'll be null-terminated.
198
200
  print_message(parser, output,
199
- "The named character reference &%.*s should be followed by a "
200
- "semicolon", (int) error->v.text.length, error->v.text.data);
201
+ "The named character reference &%.*s should be followed by a "
202
+ "semicolon",
203
+ (int) error->v.text.length, error->v.text.data);
201
204
  break;
202
205
  case GUMBO_ERR_NAMED_CHAR_REF_INVALID:
203
206
  print_message(parser, output,
204
- "The named character reference &%.*s; is not a valid entity name",
205
- (int) error->v.text.length, error->v.text.data);
207
+ "The named character reference &%.*s; is not a valid entity name",
208
+ (int) error->v.text.length, error->v.text.data);
206
209
  break;
207
210
  case GUMBO_ERR_DUPLICATE_ATTR:
208
211
  print_message(parser, output,
209
- "Attribute %s occurs multiple times, at positions %d and %d",
210
- error->v.duplicate_attr.name,
211
- error->v.duplicate_attr.original_index,
212
- error->v.duplicate_attr.new_index);
212
+ "Attribute %s occurs multiple times, at positions %d and %d",
213
+ error->v.duplicate_attr.name, error->v.duplicate_attr.original_index,
214
+ error->v.duplicate_attr.new_index);
213
215
  break;
214
216
  case GUMBO_ERR_PARSER:
215
217
  case GUMBO_ERR_UNACKNOWLEDGED_SELF_CLOSING_TAG:
@@ -217,21 +219,19 @@ void gumbo_error_to_string(
217
219
  break;
218
220
  default:
219
221
  print_message(parser, output,
220
- "Tokenizer error with an unimplemented error message");
222
+ "Tokenizer error with an unimplemented error message");
221
223
  break;
222
224
  }
223
225
  gumbo_string_buffer_append_codepoint(parser, '.', output);
224
226
  }
225
227
 
226
- void gumbo_caret_diagnostic_to_string(
227
- GumboParser* parser, const GumboError* error,
228
- const char* source_text, GumboStringBuffer* output) {
228
+ void gumbo_caret_diagnostic_to_string(GumboParser* parser,
229
+ const GumboError* error, const char* source_text,
230
+ GumboStringBuffer* output) {
229
231
  gumbo_error_to_string(parser, error, output);
230
232
 
231
- const char* line_start =
232
- find_last_newline(source_text, error->original_text);
233
- const char* line_end =
234
- find_next_newline(source_text, error->original_text);
233
+ const char* line_start = find_last_newline(source_text, error->original_text);
234
+ const char* line_end = find_next_newline(source_text, error->original_text);
235
235
  GumboStringPiece original_line;
236
236
  original_line.data = line_start;
237
237
  original_line.length = line_end - line_start;
@@ -272,7 +272,7 @@ void gumbo_init_errors(GumboParser* parser) {
272
272
  }
273
273
 
274
274
  void gumbo_destroy_errors(GumboParser* parser) {
275
- for (int i = 0; i < parser->_output->errors.length; ++i) {
275
+ for (unsigned int i = 0; i < parser->_output->errors.length; ++i) {
276
276
  gumbo_error_destroy(parser, parser->_output->errors.data[i]);
277
277
  }
278
278
  gumbo_vector_destroy(parser, &parser->_output->errors);
@@ -201,24 +201,22 @@ void gumbo_error_destroy(struct GumboInternalParser* parser, GumboError* error);
201
201
  // responsible for deleting the buffer. (Note that the buffer is allocated with
202
202
  // the allocator specified in the GumboParser config and hence should be freed
203
203
  // by gumbo_parser_deallocate().)
204
- void gumbo_error_to_string(
205
- struct GumboInternalParser* parser, const GumboError* error,
206
- GumboStringBuffer* output);
204
+ void gumbo_error_to_string(struct GumboInternalParser* parser,
205
+ const GumboError* error, GumboStringBuffer* output);
207
206
 
208
207
  // Prints a caret diagnostic to a string. This fills an empty GumboStringBuffer
209
208
  // with a freshly-allocated buffer containing the error message text. The
210
209
  // caller is responsible for deleting the buffer. (Note that the buffer is
211
210
  // allocated with the allocator specified in the GumboParser config and hence
212
211
  // should be freed by gumbo_parser_deallocate().)
213
- void gumbo_caret_diagnostic_to_string(
214
- struct GumboInternalParser* parser, const GumboError* error,
215
- const char* source_text, GumboStringBuffer* output);
212
+ void gumbo_caret_diagnostic_to_string(struct GumboInternalParser* parser,
213
+ const GumboError* error, const char* source_text,
214
+ GumboStringBuffer* output);
216
215
 
217
216
  // Like gumbo_caret_diagnostic_to_string, but prints the text to stdout instead
218
217
  // of writing to a string.
219
- void gumbo_print_caret_diagnostic(
220
- struct GumboInternalParser* parser, const GumboError* error,
221
- const char* source_text);
218
+ void gumbo_print_caret_diagnostic(struct GumboInternalParser* parser,
219
+ const GumboError* error, const char* source_text);
222
220
 
223
221
  #ifdef __cplusplus
224
222
  }
@@ -76,7 +76,6 @@ typedef struct {
76
76
  */
77
77
  extern const GumboSourcePosition kGumboEmptySourcePosition;
78
78
 
79
-
80
79
  /**
81
80
  * A struct representing a string or part of a string. Strings within the
82
81
  * parser are represented by a char* and a length; the char* points into
@@ -111,7 +110,6 @@ bool gumbo_string_equals(
111
110
  bool gumbo_string_equals_ignore_case(
112
111
  const GumboStringPiece* str1, const GumboStringPiece* str2);
113
112
 
114
-
115
113
  /**
116
114
  * A simple vector implementation. This stores a pointer to a data array and a
117
115
  * length. All elements are stored as void*; client code must cast to the
@@ -141,8 +139,7 @@ extern const GumboVector kGumboEmptyVector;
141
139
  * Returns the first index at which an element appears in this vector (testing
142
140
  * by pointer equality), or -1 if it never does.
143
141
  */
144
- int gumbo_vector_index_of(GumboVector* vector, void* element);
145
-
142
+ int gumbo_vector_index_of(GumboVector* vector, const void* element);
146
143
 
147
144
  /**
148
145
  * An enum for all the tags defined in the HTML5 standard. These correspond to
@@ -157,172 +154,10 @@ int gumbo_vector_index_of(GumboVector* vector, void* element);
157
154
  * strings.
158
155
  */
159
156
  typedef enum {
160
- // http://www.whatwg.org/specs/web-apps/current-work/multipage/semantics.html#the-root-element
161
- GUMBO_TAG_HTML,
162
- // http://www.whatwg.org/specs/web-apps/current-work/multipage/semantics.html#document-metadata
163
- GUMBO_TAG_HEAD,
164
- GUMBO_TAG_TITLE,
165
- GUMBO_TAG_BASE,
166
- GUMBO_TAG_LINK,
167
- GUMBO_TAG_META,
168
- GUMBO_TAG_STYLE,
169
- // http://www.whatwg.org/specs/web-apps/current-work/multipage/scripting-1.html#scripting-1
170
- GUMBO_TAG_SCRIPT,
171
- GUMBO_TAG_NOSCRIPT,
172
- GUMBO_TAG_TEMPLATE,
173
- // http://www.whatwg.org/specs/web-apps/current-work/multipage/sections.html#sections
174
- GUMBO_TAG_BODY,
175
- GUMBO_TAG_ARTICLE,
176
- GUMBO_TAG_SECTION,
177
- GUMBO_TAG_NAV,
178
- GUMBO_TAG_ASIDE,
179
- GUMBO_TAG_H1,
180
- GUMBO_TAG_H2,
181
- GUMBO_TAG_H3,
182
- GUMBO_TAG_H4,
183
- GUMBO_TAG_H5,
184
- GUMBO_TAG_H6,
185
- GUMBO_TAG_HGROUP,
186
- GUMBO_TAG_HEADER,
187
- GUMBO_TAG_FOOTER,
188
- GUMBO_TAG_ADDRESS,
189
- // http://www.whatwg.org/specs/web-apps/current-work/multipage/grouping-content.html#grouping-content
190
- GUMBO_TAG_P,
191
- GUMBO_TAG_HR,
192
- GUMBO_TAG_PRE,
193
- GUMBO_TAG_BLOCKQUOTE,
194
- GUMBO_TAG_OL,
195
- GUMBO_TAG_UL,
196
- GUMBO_TAG_LI,
197
- GUMBO_TAG_DL,
198
- GUMBO_TAG_DT,
199
- GUMBO_TAG_DD,
200
- GUMBO_TAG_FIGURE,
201
- GUMBO_TAG_FIGCAPTION,
202
- GUMBO_TAG_MAIN,
203
- GUMBO_TAG_DIV,
204
- // http://www.whatwg.org/specs/web-apps/current-work/multipage/text-level-semantics.html#text-level-semantics
205
- GUMBO_TAG_A,
206
- GUMBO_TAG_EM,
207
- GUMBO_TAG_STRONG,
208
- GUMBO_TAG_SMALL,
209
- GUMBO_TAG_S,
210
- GUMBO_TAG_CITE,
211
- GUMBO_TAG_Q,
212
- GUMBO_TAG_DFN,
213
- GUMBO_TAG_ABBR,
214
- GUMBO_TAG_DATA,
215
- GUMBO_TAG_TIME,
216
- GUMBO_TAG_CODE,
217
- GUMBO_TAG_VAR,
218
- GUMBO_TAG_SAMP,
219
- GUMBO_TAG_KBD,
220
- GUMBO_TAG_SUB,
221
- GUMBO_TAG_SUP,
222
- GUMBO_TAG_I,
223
- GUMBO_TAG_B,
224
- GUMBO_TAG_U,
225
- GUMBO_TAG_MARK,
226
- GUMBO_TAG_RUBY,
227
- GUMBO_TAG_RT,
228
- GUMBO_TAG_RP,
229
- GUMBO_TAG_BDI,
230
- GUMBO_TAG_BDO,
231
- GUMBO_TAG_SPAN,
232
- GUMBO_TAG_BR,
233
- GUMBO_TAG_WBR,
234
- // http://www.whatwg.org/specs/web-apps/current-work/multipage/edits.html#edits
235
- GUMBO_TAG_INS,
236
- GUMBO_TAG_DEL,
237
- // http://www.whatwg.org/specs/web-apps/current-work/multipage/embedded-content-1.html#embedded-content-1
238
- GUMBO_TAG_IMAGE,
239
- GUMBO_TAG_IMG,
240
- GUMBO_TAG_IFRAME,
241
- GUMBO_TAG_EMBED,
242
- GUMBO_TAG_OBJECT,
243
- GUMBO_TAG_PARAM,
244
- GUMBO_TAG_VIDEO,
245
- GUMBO_TAG_AUDIO,
246
- GUMBO_TAG_SOURCE,
247
- GUMBO_TAG_TRACK,
248
- GUMBO_TAG_CANVAS,
249
- GUMBO_TAG_MAP,
250
- GUMBO_TAG_AREA,
251
- // http://www.whatwg.org/specs/web-apps/current-work/multipage/the-map-element.html#mathml
252
- GUMBO_TAG_MATH,
253
- GUMBO_TAG_MI,
254
- GUMBO_TAG_MO,
255
- GUMBO_TAG_MN,
256
- GUMBO_TAG_MS,
257
- GUMBO_TAG_MTEXT,
258
- GUMBO_TAG_MGLYPH,
259
- GUMBO_TAG_MALIGNMARK,
260
- GUMBO_TAG_ANNOTATION_XML,
261
- // http://www.whatwg.org/specs/web-apps/current-work/multipage/the-map-element.html#svg-0
262
- GUMBO_TAG_SVG,
263
- GUMBO_TAG_FOREIGNOBJECT,
264
- GUMBO_TAG_DESC,
265
- // SVG title tags will have GUMBO_TAG_TITLE as with HTML.
266
- // http://www.whatwg.org/specs/web-apps/current-work/multipage/tabular-data.html#tabular-data
267
- GUMBO_TAG_TABLE,
268
- GUMBO_TAG_CAPTION,
269
- GUMBO_TAG_COLGROUP,
270
- GUMBO_TAG_COL,
271
- GUMBO_TAG_TBODY,
272
- GUMBO_TAG_THEAD,
273
- GUMBO_TAG_TFOOT,
274
- GUMBO_TAG_TR,
275
- GUMBO_TAG_TD,
276
- GUMBO_TAG_TH,
277
- // http://www.whatwg.org/specs/web-apps/current-work/multipage/forms.html#forms
278
- GUMBO_TAG_FORM,
279
- GUMBO_TAG_FIELDSET,
280
- GUMBO_TAG_LEGEND,
281
- GUMBO_TAG_LABEL,
282
- GUMBO_TAG_INPUT,
283
- GUMBO_TAG_BUTTON,
284
- GUMBO_TAG_SELECT,
285
- GUMBO_TAG_DATALIST,
286
- GUMBO_TAG_OPTGROUP,
287
- GUMBO_TAG_OPTION,
288
- GUMBO_TAG_TEXTAREA,
289
- GUMBO_TAG_KEYGEN,
290
- GUMBO_TAG_OUTPUT,
291
- GUMBO_TAG_PROGRESS,
292
- GUMBO_TAG_METER,
293
- // http://www.whatwg.org/specs/web-apps/current-work/multipage/interactive-elements.html#interactive-elements
294
- GUMBO_TAG_DETAILS,
295
- GUMBO_TAG_SUMMARY,
296
- GUMBO_TAG_MENU,
297
- GUMBO_TAG_MENUITEM,
298
- // Non-conforming elements that nonetheless appear in the HTML5 spec.
299
- // http://www.whatwg.org/specs/web-apps/current-work/multipage/obsolete.html#non-conforming-features
300
- GUMBO_TAG_APPLET,
301
- GUMBO_TAG_ACRONYM,
302
- GUMBO_TAG_BGSOUND,
303
- GUMBO_TAG_DIR,
304
- GUMBO_TAG_FRAME,
305
- GUMBO_TAG_FRAMESET,
306
- GUMBO_TAG_NOFRAMES,
307
- GUMBO_TAG_ISINDEX,
308
- GUMBO_TAG_LISTING,
309
- GUMBO_TAG_XMP,
310
- GUMBO_TAG_NEXTID,
311
- GUMBO_TAG_NOEMBED,
312
- GUMBO_TAG_PLAINTEXT,
313
- GUMBO_TAG_RB,
314
- GUMBO_TAG_STRIKE,
315
- GUMBO_TAG_BASEFONT,
316
- GUMBO_TAG_BIG,
317
- GUMBO_TAG_BLINK,
318
- GUMBO_TAG_CENTER,
319
- GUMBO_TAG_FONT,
320
- GUMBO_TAG_MARQUEE,
321
- GUMBO_TAG_MULTICOL,
322
- GUMBO_TAG_NOBR,
323
- GUMBO_TAG_SPACER,
324
- GUMBO_TAG_TT,
325
- // Used for all tags that don't have special handling in HTML.
157
+ // Load all the tags from an external source, generated from tag.in.
158
+ #include "tag_enum.h"
159
+ // Used for all tags that don't have special handling in HTML. Add new tags
160
+ // to the end of tag.in so as to preserve backwards-compatibility.
326
161
  GUMBO_TAG_UNKNOWN,
327
162
  // A marker value to indicate the end of the enum, for iterating over it.
328
163
  // Also used as the terminator for varargs functions that take tags.
@@ -364,9 +199,10 @@ const char* gumbo_normalize_svg_tagname(const GumboStringPiece* tagname);
364
199
 
365
200
  /**
366
201
  * Converts a tag name string (which may be in upper or mixed case) to a tag
367
- * enum.
202
+ * enum. The `tag` version expects `tagname` to be NULL-terminated
368
203
  */
369
204
  GumboTag gumbo_tag_enum(const char* tagname);
205
+ GumboTag gumbo_tagn_enum(const char* tagname, unsigned int length);
370
206
 
371
207
  /**
372
208
  * Attribute namespaces.
@@ -461,10 +297,16 @@ typedef enum {
461
297
  GUMBO_NODE_TEXT,
462
298
  /** CDATA node. v will be a GumboText. */
463
299
  GUMBO_NODE_CDATA,
464
- /** Comment node. v. will be a GumboText, excluding comment delimiters. */
300
+ /** Comment node. v will be a GumboText, excluding comment delimiters. */
465
301
  GUMBO_NODE_COMMENT,
466
302
  /** Text node, where all contents is whitespace. v will be a GumboText. */
467
- GUMBO_NODE_WHITESPACE
303
+ GUMBO_NODE_WHITESPACE,
304
+ /** Template node. This is separate from GUMBO_NODE_ELEMENT because many
305
+ * client libraries will want to ignore the contents of template nodes, as
306
+ * the spec suggests. Recursing on GUMBO_NODE_ELEMENT will do the right thing
307
+ * here, while clients that want to include template contents should also
308
+ * check for GUMBO_NODE_TEMPLATE. v will be a GumboElement. */
309
+ GUMBO_NODE_TEMPLATE
468
310
  } GumboNodeType;
469
311
 
470
312
  /**
@@ -473,7 +315,9 @@ typedef enum {
473
315
  */
474
316
  typedef struct GumboInternalNode GumboNode;
475
317
 
476
- /** http://www.whatwg.org/specs/web-apps/current-work/complete/dom.html#quirks-mode */
318
+ /**
319
+ * http://www.whatwg.org/specs/web-apps/current-work/complete/dom.html#quirks-mode
320
+ */
477
321
  typedef enum {
478
322
  GUMBO_DOCTYPE_NO_QUIRKS,
479
323
  GUMBO_DOCTYPE_QUIRKS,
@@ -571,7 +415,6 @@ typedef enum {
571
415
  GUMBO_INSERTION_FOSTER_PARENTED = 1 << 10,
572
416
  } GumboParseFlags;
573
417
 
574
-
575
418
  /**
576
419
  * Information specific to document nodes.
577
420
  */
@@ -690,9 +533,9 @@ struct GumboInternalNode {
690
533
 
691
534
  /** The actual node data. */
692
535
  union {
693
- GumboDocument document; // For GUMBO_NODE_DOCUMENT.
694
- GumboElement element; // For GUMBO_NODE_ELEMENT.
695
- GumboText text; // For everything else.
536
+ GumboDocument document; // For GUMBO_NODE_DOCUMENT.
537
+ GumboElement element; // For GUMBO_NODE_ELEMENT.
538
+ GumboText text; // For everything else.
696
539
  } v;
697
540
  };
698
541
 
@@ -750,6 +593,29 @@ typedef struct GumboInternalOptions {
750
593
  * Default: -1
751
594
  */
752
595
  int max_errors;
596
+
597
+ /**
598
+ * The fragment context for parsing:
599
+ * https://html.spec.whatwg.org/multipage/syntax.html#parsing-html-fragments
600
+ *
601
+ * If GUMBO_TAG_LAST is passed here, it is assumed to be "no fragment", i.e.
602
+ * the regular parsing algorithm. Otherwise, pass the tag enum for the
603
+ * intended parent of the parsed fragment. We use just the tag enum rather
604
+ * than a full node because that's enough to set all the parsing context we
605
+ * need, and it provides some additional flexibility for client code to act as
606
+ * if parsing a fragment even when a full HTML tree isn't available.
607
+ *
608
+ * Default: GUMBO_TAG_LAST
609
+ */
610
+ GumboTag fragment_context;
611
+
612
+ /**
613
+ * The namespace for the fragment context. This lets client code
614
+ * differentiate between, say, parsing a <title> tag in SVG vs. parsing it in
615
+ * HTML.
616
+ * Default: GUMBO_NAMESPACE_HTML
617
+ */
618
+ GumboNamespaceEnum fragment_namespace;
753
619
  } GumboOptions;
754
620
 
755
621
  /** Default options struct; use this with gumbo_parse_with_options. */
@@ -796,9 +662,7 @@ GumboOutput* gumbo_parse_with_options(
796
662
  const GumboOptions* options, const char* buffer, size_t buffer_length);
797
663
 
798
664
  /** Release the memory used for the parse tree & parse errors. */
799
- void gumbo_destroy_output(
800
- const GumboOptions* options, GumboOutput* output);
801
-
665
+ void gumbo_destroy_output(const GumboOptions* options, GumboOutput* output);
802
666
 
803
667
  #ifdef __cplusplus
804
668
  }