nokogumbo 1.5.0 → 2.0.0.pre.alpha

Sign up to get free protection for your applications and to get access to all the features.
Files changed (55) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +56 -0
  3. data/README.md +146 -22
  4. data/ext/nokogumbo/extconf.rb +116 -0
  5. data/ext/{nokogumboc → nokogumbo}/nokogumbo.c +174 -71
  6. data/gumbo-parser/src/ascii.c +33 -0
  7. data/gumbo-parser/src/ascii.h +31 -0
  8. data/gumbo-parser/src/attribute.c +26 -28
  9. data/gumbo-parser/src/attribute.h +3 -23
  10. data/gumbo-parser/src/char_ref.c +135 -2351
  11. data/gumbo-parser/src/char_ref.h +13 -29
  12. data/gumbo-parser/src/error.c +215 -133
  13. data/gumbo-parser/src/error.h +34 -49
  14. data/gumbo-parser/src/foreign_attrs.c +104 -0
  15. data/gumbo-parser/src/gumbo.h +506 -304
  16. data/gumbo-parser/src/insertion_mode.h +4 -28
  17. data/gumbo-parser/src/macros.h +91 -0
  18. data/gumbo-parser/src/parser.c +1989 -1431
  19. data/gumbo-parser/src/parser.h +6 -22
  20. data/gumbo-parser/src/replacement.h +33 -0
  21. data/gumbo-parser/src/string_buffer.c +43 -50
  22. data/gumbo-parser/src/string_buffer.h +24 -40
  23. data/gumbo-parser/src/string_piece.c +39 -39
  24. data/gumbo-parser/src/svg_attrs.c +174 -0
  25. data/gumbo-parser/src/svg_tags.c +137 -0
  26. data/gumbo-parser/src/tag.c +186 -59
  27. data/gumbo-parser/src/tag_lookup.c +382 -0
  28. data/gumbo-parser/src/tag_lookup.h +13 -0
  29. data/gumbo-parser/src/token_type.h +1 -25
  30. data/gumbo-parser/src/tokenizer.c +899 -495
  31. data/gumbo-parser/src/tokenizer.h +37 -37
  32. data/gumbo-parser/src/tokenizer_states.h +6 -22
  33. data/gumbo-parser/src/utf8.c +103 -86
  34. data/gumbo-parser/src/utf8.h +37 -41
  35. data/gumbo-parser/src/util.c +48 -38
  36. data/gumbo-parser/src/util.h +10 -40
  37. data/gumbo-parser/src/vector.c +45 -57
  38. data/gumbo-parser/src/vector.h +17 -39
  39. data/lib/nokogumbo.rb +10 -174
  40. data/lib/nokogumbo/html5.rb +250 -0
  41. data/lib/nokogumbo/html5/document.rb +37 -0
  42. data/lib/nokogumbo/html5/document_fragment.rb +46 -0
  43. data/lib/nokogumbo/version.rb +3 -0
  44. data/lib/nokogumbo/xml/node.rb +57 -0
  45. metadata +32 -19
  46. data/ext/nokogumboc/extconf.rb +0 -60
  47. data/gumbo-parser/src/char_ref.rl +0 -2554
  48. data/gumbo-parser/src/string_piece.h +0 -38
  49. data/gumbo-parser/src/tag.in +0 -150
  50. data/gumbo-parser/src/tag_enum.h +0 -153
  51. data/gumbo-parser/src/tag_gperf.h +0 -105
  52. data/gumbo-parser/src/tag_sizes.h +0 -4
  53. data/gumbo-parser/src/tag_strings.h +0 -153
  54. data/gumbo-parser/visualc/include/strings.h +0 -4
  55. data/test-nokogumbo.rb +0 -190
@@ -1,23 +1,3 @@
1
- // Copyright 2011 Google Inc. All Rights Reserved.
2
- //
3
- // Licensed under the Apache License, Version 2.0 (the "License");
4
- // you may not use this file except in compliance with the License.
5
- // You may obtain a copy of the License at
6
- //
7
- // http://www.apache.org/licenses/LICENSE-2.0
8
- //
9
- // Unless required by applicable law or agreed to in writing, software
10
- // distributed under the License is distributed on an "AS IS" BASIS,
11
- // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- // See the License for the specific language governing permissions and
13
- // limitations under the License.
14
- //
15
- // Author: jdtang@google.com (Jonathan Tang)
16
- //
17
- // Internal header for character reference handling; this should not be exposed
18
- // transitively by any public API header. This is why the functions aren't
19
- // namespaced.
20
-
21
1
  #ifndef GUMBO_CHAR_REF_H_
22
2
  #define GUMBO_CHAR_REF_H_
23
3
 
@@ -34,8 +14,8 @@ struct GumboInternalUtf8Iterator;
34
14
  extern const int kGumboNoChar;
35
15
 
36
16
  // Certain named character references generate two codepoints, not one, and so
37
- // the consume_char_ref subroutine needs to return this instead of an int. The
38
- // first field will be kGumboNoChar if no character reference was found; the
17
+ // the gumbo_consume_char_ref subroutine needs to return this instead of an int.
18
+ // The first field will be kGumboNoChar if no character reference was found; the
39
19
  // second field will be kGumboNoChar if that is the case or if the character
40
20
  // reference returns only a single codepoint.
41
21
  typedef struct {
@@ -45,16 +25,20 @@ typedef struct {
45
25
 
46
26
  // Implements the "consume a character reference" section of the spec.
47
27
  // This reads in characters from the input as necessary, and fills in a
48
- // OneOrTwoCodepoints struct containing the characters read. It may add parse
49
- // errors to the GumboParser's errors vector, if the spec calls for it. Pass a
28
+ // OneOrTwoCodepoints struct containing the characters read. It may add parse
29
+ // errors to the GumboParser's errors vector, if the spec calls for it. Pass a
50
30
  // space for the "additional allowed char" when the spec says "with no
51
- // additional allowed char". Returns false on parse error, true otherwise.
52
- bool consume_char_ref(struct GumboInternalParser* parser,
53
- struct GumboInternalUtf8Iterator* input, int additional_allowed_char,
54
- bool is_in_attribute, OneOrTwoCodepoints* output);
31
+ // additional allowed char". Returns false on parse error, true otherwise.
32
+ bool gumbo_consume_char_ref (
33
+ struct GumboInternalParser* parser,
34
+ struct GumboInternalUtf8Iterator* input,
35
+ int additional_allowed_char,
36
+ bool is_in_attribute,
37
+ OneOrTwoCodepoints* output
38
+ );
55
39
 
56
40
  #ifdef __cplusplus
57
41
  }
58
42
  #endif
59
43
 
60
- #endif // GUMBO_CHAR_REF_H_
44
+ #endif // GUMBO_CHAR_REF_H_
@@ -1,156 +1,183 @@
1
- // Copyright 2010 Google Inc. All Rights Reserved.
2
- //
3
- // Licensed under the Apache License, Version 2.0 (the "License");
4
- // you may not use this file except in compliance with the License.
5
- // You may obtain a copy of the License at
6
- //
7
- // http://www.apache.org/licenses/LICENSE-2.0
8
- //
9
- // Unless required by applicable law or agreed to in writing, software
10
- // distributed under the License is distributed on an "AS IS" BASIS,
11
- // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- // See the License for the specific language governing permissions and
13
- // limitations under the License.
14
- //
15
- // Author: jdtang@google.com (Jonathan Tang)
1
+ /*
2
+ Copyright 2010 Google Inc.
16
3
 
17
- #include "error.h"
4
+ Licensed under the Apache License, Version 2.0 (the "License");
5
+ you may not use this file except in compliance with the License.
6
+ You may obtain a copy of the License at
7
+
8
+ https://www.apache.org/licenses/LICENSE-2.0
9
+
10
+ Unless required by applicable law or agreed to in writing, software
11
+ distributed under the License is distributed on an "AS IS" BASIS,
12
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ See the License for the specific language governing permissions and
14
+ limitations under the License.
15
+ */
18
16
 
19
17
  #include <assert.h>
18
+ #include <inttypes.h>
20
19
  #include <stdarg.h>
21
20
  #include <stdio.h>
22
21
  #include <string.h>
23
-
22
+ #include "error.h"
24
23
  #include "gumbo.h"
24
+ #include "macros.h"
25
25
  #include "parser.h"
26
26
  #include "string_buffer.h"
27
27
  #include "util.h"
28
28
  #include "vector.h"
29
29
 
30
- // Prints a formatted message to a StringBuffer. This automatically resizes the
31
- // StringBuffer as necessary to fit the message. Returns the number of bytes
30
+ // Prints a formatted message to a StringBuffer. This automatically resizes the
31
+ // StringBuffer as necessary to fit the message. Returns the number of bytes
32
32
  // written.
33
- static int print_message(
34
- GumboParser* parser, GumboStringBuffer* output, const char* format, ...) {
33
+ static int PRINTF(2) print_message (
34
+ GumboStringBuffer* output,
35
+ const char* format,
36
+ ...
37
+ ) {
35
38
  va_list args;
36
39
  int remaining_capacity = output->capacity - output->length;
37
40
  va_start(args, format);
38
- int bytes_written = vsnprintf(
39
- output->data + output->length, remaining_capacity, format, args);
41
+ int bytes_written = vsnprintf (
42
+ output->data + output->length,
43
+ remaining_capacity,
44
+ format,
45
+ args
46
+ );
40
47
  va_end(args);
41
48
  #ifdef _MSC_VER
42
49
  if (bytes_written == -1) {
43
50
  // vsnprintf returns -1 on MSVC++ if there's not enough capacity, instead of
44
51
  // returning the number of bytes that would've been written had there been
45
- // enough. In this case, we'll double the buffer size and hope it fits when
52
+ // enough. In this case, we'll double the buffer size and hope it fits when
46
53
  // we retry (letting it fail and returning 0 if it doesn't), since there's
47
54
  // no way to smartly resize the buffer.
48
- gumbo_string_buffer_reserve(parser, output->capacity * 2, output);
55
+ gumbo_string_buffer_reserve(output->capacity * 2, output);
49
56
  va_start(args, format);
50
- int result = vsnprintf(
51
- output->data + output->length, remaining_capacity, format, args);
57
+ int result = vsnprintf (
58
+ output->data + output->length,
59
+ remaining_capacity,
60
+ format,
61
+ args
62
+ );
52
63
  va_end(args);
53
64
  return result == -1 ? 0 : result;
54
65
  }
55
66
  #else
56
- // -1 in standard C99 indicates an encoding error. Return 0 and do nothing.
67
+ // -1 in standard C99 indicates an encoding error. Return 0 and do nothing.
57
68
  if (bytes_written == -1) {
58
69
  return 0;
59
70
  }
60
71
  #endif
61
72
 
62
- if (bytes_written > remaining_capacity) {
63
- gumbo_string_buffer_reserve(
64
- parser, output->capacity + bytes_written, output);
73
+ if (bytes_written >= remaining_capacity) {
74
+ gumbo_string_buffer_reserve(output->capacity + bytes_written, output);
65
75
  remaining_capacity = output->capacity - output->length;
66
76
  va_start(args, format);
67
- bytes_written = vsnprintf(
68
- output->data + output->length, remaining_capacity, format, args);
77
+ bytes_written = vsnprintf (
78
+ output->data + output->length,
79
+ remaining_capacity,
80
+ format,
81
+ args
82
+ );
69
83
  va_end(args);
70
84
  }
71
85
  output->length += bytes_written;
72
86
  return bytes_written;
73
87
  }
74
88
 
75
- static void print_tag_stack(GumboParser* parser, const GumboParserError* error,
76
- GumboStringBuffer* output) {
77
- print_message(parser, output, " Currently open tags: ");
89
+ static void print_tag_stack (
90
+ const GumboParserError* error,
91
+ GumboStringBuffer* output
92
+ ) {
93
+ print_message(output, " Currently open tags: ");
78
94
  for (unsigned int i = 0; i < error->tag_stack.length; ++i) {
79
95
  if (i) {
80
- print_message(parser, output, ", ");
96
+ print_message(output, ", ");
81
97
  }
82
98
  GumboTag tag = (GumboTag) error->tag_stack.data[i];
83
- print_message(parser, output, gumbo_normalized_tagname(tag));
99
+ print_message(output, "%s", gumbo_normalized_tagname(tag));
84
100
  }
85
- gumbo_string_buffer_append_codepoint(parser, '.', output);
101
+ gumbo_string_buffer_append_codepoint('.', output);
86
102
  }
87
103
 
88
- static void handle_parser_error(GumboParser* parser,
89
- const GumboParserError* error, GumboStringBuffer* output) {
90
- if (error->parser_state == GUMBO_INSERTION_MODE_INITIAL &&
91
- error->input_type != GUMBO_TOKEN_DOCTYPE) {
92
- print_message(
93
- parser, output, "The doctype must be the first token in the document");
104
+ static void handle_parser_error (
105
+ const GumboParserError* error,
106
+ GumboStringBuffer* output
107
+ ) {
108
+ if (
109
+ error->parser_state == GUMBO_INSERTION_MODE_INITIAL
110
+ && error->input_type != GUMBO_TOKEN_DOCTYPE
111
+ ) {
112
+ print_message (
113
+ output,
114
+ "The doctype must be the first token in the document"
115
+ );
94
116
  return;
95
117
  }
96
118
 
97
119
  switch (error->input_type) {
98
120
  case GUMBO_TOKEN_DOCTYPE:
99
- print_message(parser, output, "This is not a legal doctype");
121
+ print_message(output, "This is not a legal doctype");
100
122
  return;
101
123
  case GUMBO_TOKEN_COMMENT:
102
124
  // Should never happen; comments are always legal.
103
125
  assert(0);
104
126
  // But just in case...
105
- print_message(parser, output, "Comments aren't legal here");
127
+ print_message(output, "Comments aren't legal here");
106
128
  return;
107
129
  case GUMBO_TOKEN_CDATA:
108
130
  case GUMBO_TOKEN_WHITESPACE:
109
131
  case GUMBO_TOKEN_CHARACTER:
110
- print_message(parser, output, "Character tokens aren't legal here");
132
+ print_message(output, "Character tokens aren't legal here");
111
133
  return;
112
134
  case GUMBO_TOKEN_NULL:
113
- print_message(parser, output, "Null bytes are not allowed in HTML5");
135
+ print_message(output, "Null bytes are not allowed in HTML5");
114
136
  return;
115
137
  case GUMBO_TOKEN_EOF:
116
138
  if (error->parser_state == GUMBO_INSERTION_MODE_INITIAL) {
117
- print_message(parser, output, "You must provide a doctype");
139
+ print_message(output, "You must provide a doctype");
118
140
  } else {
119
- print_message(parser, output, "Premature end of file");
120
- print_tag_stack(parser, error, output);
141
+ print_message(output, "Premature end of file");
142
+ print_tag_stack(error, output);
121
143
  }
122
144
  return;
123
145
  case GUMBO_TOKEN_START_TAG:
124
146
  case GUMBO_TOKEN_END_TAG:
125
- print_message(parser, output, "That tag isn't allowed here");
126
- print_tag_stack(parser, error, output);
147
+ print_message(output, "That tag isn't allowed here");
148
+ print_tag_stack(error, output);
127
149
  // TODO(jdtang): Give more specific messaging.
128
150
  return;
129
151
  }
130
152
  }
131
153
 
132
154
  // Finds the preceding newline in an original source buffer from a given byte
133
- // location. Returns a character pointer to the character after that, or a
155
+ // location. Returns a character pointer to the character after that, or a
134
156
  // pointer to the beginning of the string if this is the first line.
135
- static const char* find_last_newline(
136
- const char* original_text, const char* error_location) {
137
- assert(error_location >= original_text);
157
+ static const char* find_prev_newline (
158
+ const char* source_text,
159
+ const char* error_location
160
+ ) {
161
+ assert(error_location >= source_text);
138
162
  const char* c = error_location;
139
- for (; c != original_text && *c != '\n'; --c) {
140
- // There may be an error at EOF, which would be a nul byte.
141
- assert(*c || c == error_location);
142
- }
143
- return c == original_text ? c : c + 1;
163
+ if (*c == '\n' && c != source_text)
164
+ --c;
165
+ while (c != source_text && *c != '\n')
166
+ --c;
167
+ return c == source_text ? c : c + 1;
144
168
  }
145
169
 
146
170
  // Finds the next newline in the original source buffer from a given byte
147
- // location. Returns a character pointer to that newline, or a pointer to the
171
+ // location. Returns a character pointer to that newline, or a pointer to the
148
172
  // terminating null byte if this is the last line.
149
173
  static const char* find_next_newline(
150
- const char* original_text, const char* error_location) {
174
+ const char* source_text_end,
175
+ const char* error_location
176
+ ) {
177
+ assert(error_location <= source_text_end);
151
178
  const char* c = error_location;
152
- for (; *c && *c != '\n'; ++c)
153
- ;
179
+ while (c != source_text_end && *c != '\n')
180
+ ++c;
154
181
  return c;
155
182
  }
156
183
 
@@ -159,121 +186,176 @@ GumboError* gumbo_add_error(GumboParser* parser) {
159
186
  if (max_errors >= 0 && parser->_output->errors.length >= (unsigned int) max_errors) {
160
187
  return NULL;
161
188
  }
162
- GumboError* error = gumbo_parser_allocate(parser, sizeof(GumboError));
163
- gumbo_vector_add(parser, error, &parser->_output->errors);
189
+ GumboError* error = gumbo_alloc(sizeof(GumboError));
190
+ gumbo_vector_add(error, &parser->_output->errors);
164
191
  return error;
165
192
  }
166
193
 
167
- void gumbo_error_to_string(
168
- GumboParser* parser, const GumboError* error, GumboStringBuffer* output) {
169
- print_message(
170
- parser, output, "@%d:%d: ", error->position.line, error->position.column);
194
+ void gumbo_error_to_string (
195
+ const GumboError* error,
196
+ GumboStringBuffer* output
197
+ ) {
198
+ print_message (
199
+ output,
200
+ "@%zu:%zu: ",
201
+ error->position.line,
202
+ error->position.column
203
+ );
171
204
  switch (error->type) {
172
205
  case GUMBO_ERR_UTF8_INVALID:
173
- print_message(
174
- parser, output, "Invalid UTF8 character 0x%x", error->v.codepoint);
206
+ print_message (
207
+ output,
208
+ "Invalid UTF8 character 0x%" PRIx32,
209
+ error->v.codepoint
210
+ );
175
211
  break;
176
212
  case GUMBO_ERR_UTF8_TRUNCATED:
177
- print_message(parser, output,
178
- "Input stream ends with a truncated UTF8 character 0x%x",
179
- error->v.codepoint);
213
+ print_message (
214
+ output,
215
+ "Input stream ends with a truncated UTF8 character 0x%" PRIx32,
216
+ error->v.codepoint
217
+ );
218
+ break;
219
+ case GUMBO_ERR_UTF8_NULL:
220
+ print_message (
221
+ output,
222
+ "Unexpected NULL character in the input stream"
223
+ );
180
224
  break;
181
225
  case GUMBO_ERR_NUMERIC_CHAR_REF_NO_DIGITS:
182
- print_message(
183
- parser, output, "No digits after &# in numeric character reference");
226
+ print_message (
227
+ output,
228
+ "No digits after &# in numeric character reference"
229
+ );
184
230
  break;
185
231
  case GUMBO_ERR_NUMERIC_CHAR_REF_WITHOUT_SEMICOLON:
186
- print_message(parser, output,
187
- "The numeric character reference &#%d should be followed "
188
- "by a semicolon",
189
- error->v.codepoint);
232
+ print_message (
233
+ output,
234
+ "The numeric character reference &#%" PRIu32 " should be followed "
235
+ "by a semicolon",
236
+ error->v.codepoint
237
+ );
190
238
  break;
191
239
  case GUMBO_ERR_NUMERIC_CHAR_REF_INVALID:
192
- print_message(parser, output,
193
- "The numeric character reference &#%d; encodes an invalid "
194
- "unicode codepoint",
195
- error->v.codepoint);
240
+ print_message (
241
+ output,
242
+ "The numeric character reference &#%" PRIu32 "; encodes an invalid "
243
+ "unicode codepoint",
244
+ error->v.codepoint
245
+ );
196
246
  break;
197
247
  case GUMBO_ERR_NAMED_CHAR_REF_WITHOUT_SEMICOLON:
198
248
  // The textual data came from one of the literal strings in the table, and
199
249
  // so it'll be null-terminated.
200
- print_message(parser, output,
201
- "The named character reference &%.*s should be followed by a "
202
- "semicolon",
203
- (int) error->v.text.length, error->v.text.data);
250
+ print_message (
251
+ output,
252
+ "The named character reference &%.*s should be followed by a "
253
+ "semicolon",
254
+ (int) error->v.text.length,
255
+ error->v.text.data
256
+ );
204
257
  break;
205
258
  case GUMBO_ERR_NAMED_CHAR_REF_INVALID:
206
- print_message(parser, output,
207
- "The named character reference &%.*s; is not a valid entity name",
208
- (int) error->v.text.length, error->v.text.data);
259
+ print_message (
260
+ output,
261
+ "The named character reference &%.*s; is not a valid entity name",
262
+ (int) error->v.text.length,
263
+ error->v.text.data
264
+ );
209
265
  break;
210
266
  case GUMBO_ERR_DUPLICATE_ATTR:
211
- print_message(parser, output,
212
- "Attribute %s occurs multiple times, at positions %d and %d",
213
- error->v.duplicate_attr.name, error->v.duplicate_attr.original_index,
214
- error->v.duplicate_attr.new_index);
267
+ print_message (
268
+ output,
269
+ "Attribute %s occurs multiple times, at positions %u and %u",
270
+ error->v.duplicate_attr.name,
271
+ error->v.duplicate_attr.original_index,
272
+ error->v.duplicate_attr.new_index
273
+ );
274
+ break;
275
+ case GUMBO_ERR_DASHES_OR_DOCTYPE:
276
+ print_message (
277
+ output,
278
+ "Incorrectly opened comment; expected '--', 'DOCTYPE', or '[CDATA['"
279
+ );
215
280
  break;
216
281
  case GUMBO_ERR_PARSER:
282
+ handle_parser_error(&error->v.parser, output);
283
+ break;
217
284
  case GUMBO_ERR_UNACKNOWLEDGED_SELF_CLOSING_TAG:
218
- handle_parser_error(parser, &error->v.parser, output);
285
+ case GUMBO_ERR_SELF_CLOSING_END_TAG:
286
+ print_message (
287
+ output,
288
+ "Tag cannot be self-closing");
219
289
  break;
220
290
  default:
221
- print_message(parser, output,
222
- "Tokenizer error with an unimplemented error message");
291
+ print_message (
292
+ output,
293
+ "Tokenizer error with an unimplemented error message"
294
+ );
223
295
  break;
224
296
  }
225
- gumbo_string_buffer_append_codepoint(parser, '.', output);
297
+ gumbo_string_buffer_append_codepoint('.', output);
226
298
  }
227
299
 
228
- void gumbo_caret_diagnostic_to_string(GumboParser* parser,
229
- const GumboError* error, const char* source_text,
230
- GumboStringBuffer* output) {
231
- gumbo_error_to_string(parser, error, output);
300
+ void gumbo_caret_diagnostic_to_string (
301
+ const GumboError* error,
302
+ const char* source_text,
303
+ size_t source_length,
304
+ GumboStringBuffer* output
305
+ ) {
306
+ gumbo_error_to_string(error, output);
232
307
 
233
- const char* line_start = find_last_newline(source_text, error->original_text);
234
- const char* line_end = find_next_newline(source_text, error->original_text);
308
+ const char* line_start = find_prev_newline(source_text, error->original_text);
309
+ const char* line_end = find_next_newline(source_text + source_length, error->original_text);
235
310
  GumboStringPiece original_line;
236
311
  original_line.data = line_start;
237
312
  original_line.length = line_end - line_start;
238
313
 
239
- gumbo_string_buffer_append_codepoint(parser, '\n', output);
240
- gumbo_string_buffer_append_string(parser, &original_line, output);
241
- gumbo_string_buffer_append_codepoint(parser, '\n', output);
242
- gumbo_string_buffer_reserve(
243
- parser, output->length + error->position.column, output);
244
- int num_spaces = error->position.column - 1;
245
- memset(output->data + output->length, ' ', num_spaces);
246
- output->length += num_spaces;
247
- gumbo_string_buffer_append_codepoint(parser, '^', output);
248
- gumbo_string_buffer_append_codepoint(parser, '\n', output);
314
+ gumbo_string_buffer_append_codepoint('\n', output);
315
+ gumbo_string_buffer_append_string(&original_line, output);
316
+ gumbo_string_buffer_append_codepoint('\n', output);
317
+ gumbo_string_buffer_reserve(output->length + error->position.column, output);
318
+ if (error->position.column >= 2) {
319
+ size_t num_spaces = error->position.column - 1;
320
+ memset(output->data + output->length, ' ', num_spaces);
321
+ output->length += num_spaces;
322
+ }
323
+ gumbo_string_buffer_append_codepoint('^', output);
324
+ gumbo_string_buffer_append_codepoint('\n', output);
249
325
  }
250
326
 
251
- void gumbo_print_caret_diagnostic(
252
- GumboParser* parser, const GumboError* error, const char* source_text) {
327
+ void gumbo_print_caret_diagnostic (
328
+ const GumboError* error,
329
+ const char* source_text,
330
+ size_t source_length
331
+ ) {
253
332
  GumboStringBuffer text;
254
- gumbo_string_buffer_init(parser, &text);
255
- gumbo_caret_diagnostic_to_string(parser, error, source_text, &text);
333
+ gumbo_string_buffer_init(&text);
334
+ gumbo_caret_diagnostic_to_string(error, source_text, source_length, &text);
256
335
  printf("%.*s", (int) text.length, text.data);
257
- gumbo_string_buffer_destroy(parser, &text);
336
+ gumbo_string_buffer_destroy(&text);
258
337
  }
259
338
 
260
- void gumbo_error_destroy(GumboParser* parser, GumboError* error) {
261
- if (error->type == GUMBO_ERR_PARSER ||
262
- error->type == GUMBO_ERR_UNACKNOWLEDGED_SELF_CLOSING_TAG) {
263
- gumbo_vector_destroy(parser, &error->v.parser.tag_stack);
339
+ void gumbo_error_destroy(GumboError* error) {
340
+ if (
341
+ error->type == GUMBO_ERR_PARSER
342
+ || error->type == GUMBO_ERR_UNACKNOWLEDGED_SELF_CLOSING_TAG
343
+ || error->type == GUMBO_ERR_SELF_CLOSING_END_TAG
344
+ ) {
345
+ gumbo_vector_destroy(&error->v.parser.tag_stack);
264
346
  } else if (error->type == GUMBO_ERR_DUPLICATE_ATTR) {
265
- gumbo_parser_deallocate(parser, (void*) error->v.duplicate_attr.name);
347
+ gumbo_free((void*) error->v.duplicate_attr.name);
266
348
  }
267
- gumbo_parser_deallocate(parser, error);
349
+ gumbo_free(error);
268
350
  }
269
351
 
270
352
  void gumbo_init_errors(GumboParser* parser) {
271
- gumbo_vector_init(parser, 5, &parser->_output->errors);
353
+ gumbo_vector_init(5, &parser->_output->errors);
272
354
  }
273
355
 
274
356
  void gumbo_destroy_errors(GumboParser* parser) {
275
357
  for (unsigned int i = 0; i < parser->_output->errors.length; ++i) {
276
- gumbo_error_destroy(parser, parser->_output->errors.data[i]);
358
+ gumbo_error_destroy(parser->_output->errors.data[i]);
277
359
  }
278
- gumbo_vector_destroy(parser, &parser->_output->errors);
360
+ gumbo_vector_destroy(&parser->_output->errors);
279
361
  }