nokogumbo 1.5.0 → 2.0.0.pre.alpha

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +56 -0
  3. data/README.md +146 -22
  4. data/ext/nokogumbo/extconf.rb +116 -0
  5. data/ext/{nokogumboc → nokogumbo}/nokogumbo.c +174 -71
  6. data/gumbo-parser/src/ascii.c +33 -0
  7. data/gumbo-parser/src/ascii.h +31 -0
  8. data/gumbo-parser/src/attribute.c +26 -28
  9. data/gumbo-parser/src/attribute.h +3 -23
  10. data/gumbo-parser/src/char_ref.c +135 -2351
  11. data/gumbo-parser/src/char_ref.h +13 -29
  12. data/gumbo-parser/src/error.c +215 -133
  13. data/gumbo-parser/src/error.h +34 -49
  14. data/gumbo-parser/src/foreign_attrs.c +104 -0
  15. data/gumbo-parser/src/gumbo.h +506 -304
  16. data/gumbo-parser/src/insertion_mode.h +4 -28
  17. data/gumbo-parser/src/macros.h +91 -0
  18. data/gumbo-parser/src/parser.c +1989 -1431
  19. data/gumbo-parser/src/parser.h +6 -22
  20. data/gumbo-parser/src/replacement.h +33 -0
  21. data/gumbo-parser/src/string_buffer.c +43 -50
  22. data/gumbo-parser/src/string_buffer.h +24 -40
  23. data/gumbo-parser/src/string_piece.c +39 -39
  24. data/gumbo-parser/src/svg_attrs.c +174 -0
  25. data/gumbo-parser/src/svg_tags.c +137 -0
  26. data/gumbo-parser/src/tag.c +186 -59
  27. data/gumbo-parser/src/tag_lookup.c +382 -0
  28. data/gumbo-parser/src/tag_lookup.h +13 -0
  29. data/gumbo-parser/src/token_type.h +1 -25
  30. data/gumbo-parser/src/tokenizer.c +899 -495
  31. data/gumbo-parser/src/tokenizer.h +37 -37
  32. data/gumbo-parser/src/tokenizer_states.h +6 -22
  33. data/gumbo-parser/src/utf8.c +103 -86
  34. data/gumbo-parser/src/utf8.h +37 -41
  35. data/gumbo-parser/src/util.c +48 -38
  36. data/gumbo-parser/src/util.h +10 -40
  37. data/gumbo-parser/src/vector.c +45 -57
  38. data/gumbo-parser/src/vector.h +17 -39
  39. data/lib/nokogumbo.rb +10 -174
  40. data/lib/nokogumbo/html5.rb +250 -0
  41. data/lib/nokogumbo/html5/document.rb +37 -0
  42. data/lib/nokogumbo/html5/document_fragment.rb +46 -0
  43. data/lib/nokogumbo/version.rb +3 -0
  44. data/lib/nokogumbo/xml/node.rb +57 -0
  45. metadata +32 -19
  46. data/ext/nokogumboc/extconf.rb +0 -60
  47. data/gumbo-parser/src/char_ref.rl +0 -2554
  48. data/gumbo-parser/src/string_piece.h +0 -38
  49. data/gumbo-parser/src/tag.in +0 -150
  50. data/gumbo-parser/src/tag_enum.h +0 -153
  51. data/gumbo-parser/src/tag_gperf.h +0 -105
  52. data/gumbo-parser/src/tag_sizes.h +0 -4
  53. data/gumbo-parser/src/tag_strings.h +0 -153
  54. data/gumbo-parser/visualc/include/strings.h +0 -4
  55. data/test-nokogumbo.rb +0 -190
@@ -1,23 +1,3 @@
1
- // Copyright 2011 Google Inc. All Rights Reserved.
2
- //
3
- // Licensed under the Apache License, Version 2.0 (the "License");
4
- // you may not use this file except in compliance with the License.
5
- // You may obtain a copy of the License at
6
- //
7
- // http://www.apache.org/licenses/LICENSE-2.0
8
- //
9
- // Unless required by applicable law or agreed to in writing, software
10
- // distributed under the License is distributed on an "AS IS" BASIS,
11
- // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- // See the License for the specific language governing permissions and
13
- // limitations under the License.
14
- //
15
- // Author: jdtang@google.com (Jonathan Tang)
16
- //
17
- // Internal header for character reference handling; this should not be exposed
18
- // transitively by any public API header. This is why the functions aren't
19
- // namespaced.
20
-
21
1
  #ifndef GUMBO_CHAR_REF_H_
22
2
  #define GUMBO_CHAR_REF_H_
23
3
 
@@ -34,8 +14,8 @@ struct GumboInternalUtf8Iterator;
34
14
  extern const int kGumboNoChar;
35
15
 
36
16
  // Certain named character references generate two codepoints, not one, and so
37
- // the consume_char_ref subroutine needs to return this instead of an int. The
38
- // first field will be kGumboNoChar if no character reference was found; the
17
+ // the gumbo_consume_char_ref subroutine needs to return this instead of an int.
18
+ // The first field will be kGumboNoChar if no character reference was found; the
39
19
  // second field will be kGumboNoChar if that is the case or if the character
40
20
  // reference returns only a single codepoint.
41
21
  typedef struct {
@@ -45,16 +25,20 @@ typedef struct {
45
25
 
46
26
  // Implements the "consume a character reference" section of the spec.
47
27
  // This reads in characters from the input as necessary, and fills in a
48
- // OneOrTwoCodepoints struct containing the characters read. It may add parse
49
- // errors to the GumboParser's errors vector, if the spec calls for it. Pass a
28
+ // OneOrTwoCodepoints struct containing the characters read. It may add parse
29
+ // errors to the GumboParser's errors vector, if the spec calls for it. Pass a
50
30
  // space for the "additional allowed char" when the spec says "with no
51
- // additional allowed char". Returns false on parse error, true otherwise.
52
- bool consume_char_ref(struct GumboInternalParser* parser,
53
- struct GumboInternalUtf8Iterator* input, int additional_allowed_char,
54
- bool is_in_attribute, OneOrTwoCodepoints* output);
31
+ // additional allowed char". Returns false on parse error, true otherwise.
32
+ bool gumbo_consume_char_ref (
33
+ struct GumboInternalParser* parser,
34
+ struct GumboInternalUtf8Iterator* input,
35
+ int additional_allowed_char,
36
+ bool is_in_attribute,
37
+ OneOrTwoCodepoints* output
38
+ );
55
39
 
56
40
  #ifdef __cplusplus
57
41
  }
58
42
  #endif
59
43
 
60
- #endif // GUMBO_CHAR_REF_H_
44
+ #endif // GUMBO_CHAR_REF_H_
@@ -1,156 +1,183 @@
1
- // Copyright 2010 Google Inc. All Rights Reserved.
2
- //
3
- // Licensed under the Apache License, Version 2.0 (the "License");
4
- // you may not use this file except in compliance with the License.
5
- // You may obtain a copy of the License at
6
- //
7
- // http://www.apache.org/licenses/LICENSE-2.0
8
- //
9
- // Unless required by applicable law or agreed to in writing, software
10
- // distributed under the License is distributed on an "AS IS" BASIS,
11
- // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- // See the License for the specific language governing permissions and
13
- // limitations under the License.
14
- //
15
- // Author: jdtang@google.com (Jonathan Tang)
1
+ /*
2
+ Copyright 2010 Google Inc.
16
3
 
17
- #include "error.h"
4
+ Licensed under the Apache License, Version 2.0 (the "License");
5
+ you may not use this file except in compliance with the License.
6
+ You may obtain a copy of the License at
7
+
8
+ https://www.apache.org/licenses/LICENSE-2.0
9
+
10
+ Unless required by applicable law or agreed to in writing, software
11
+ distributed under the License is distributed on an "AS IS" BASIS,
12
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ See the License for the specific language governing permissions and
14
+ limitations under the License.
15
+ */
18
16
 
19
17
  #include <assert.h>
18
+ #include <inttypes.h>
20
19
  #include <stdarg.h>
21
20
  #include <stdio.h>
22
21
  #include <string.h>
23
-
22
+ #include "error.h"
24
23
  #include "gumbo.h"
24
+ #include "macros.h"
25
25
  #include "parser.h"
26
26
  #include "string_buffer.h"
27
27
  #include "util.h"
28
28
  #include "vector.h"
29
29
 
30
- // Prints a formatted message to a StringBuffer. This automatically resizes the
31
- // StringBuffer as necessary to fit the message. Returns the number of bytes
30
+ // Prints a formatted message to a StringBuffer. This automatically resizes the
31
+ // StringBuffer as necessary to fit the message. Returns the number of bytes
32
32
  // written.
33
- static int print_message(
34
- GumboParser* parser, GumboStringBuffer* output, const char* format, ...) {
33
+ static int PRINTF(2) print_message (
34
+ GumboStringBuffer* output,
35
+ const char* format,
36
+ ...
37
+ ) {
35
38
  va_list args;
36
39
  int remaining_capacity = output->capacity - output->length;
37
40
  va_start(args, format);
38
- int bytes_written = vsnprintf(
39
- output->data + output->length, remaining_capacity, format, args);
41
+ int bytes_written = vsnprintf (
42
+ output->data + output->length,
43
+ remaining_capacity,
44
+ format,
45
+ args
46
+ );
40
47
  va_end(args);
41
48
  #ifdef _MSC_VER
42
49
  if (bytes_written == -1) {
43
50
  // vsnprintf returns -1 on MSVC++ if there's not enough capacity, instead of
44
51
  // returning the number of bytes that would've been written had there been
45
- // enough. In this case, we'll double the buffer size and hope it fits when
52
+ // enough. In this case, we'll double the buffer size and hope it fits when
46
53
  // we retry (letting it fail and returning 0 if it doesn't), since there's
47
54
  // no way to smartly resize the buffer.
48
- gumbo_string_buffer_reserve(parser, output->capacity * 2, output);
55
+ gumbo_string_buffer_reserve(output->capacity * 2, output);
49
56
  va_start(args, format);
50
- int result = vsnprintf(
51
- output->data + output->length, remaining_capacity, format, args);
57
+ int result = vsnprintf (
58
+ output->data + output->length,
59
+ remaining_capacity,
60
+ format,
61
+ args
62
+ );
52
63
  va_end(args);
53
64
  return result == -1 ? 0 : result;
54
65
  }
55
66
  #else
56
- // -1 in standard C99 indicates an encoding error. Return 0 and do nothing.
67
+ // -1 in standard C99 indicates an encoding error. Return 0 and do nothing.
57
68
  if (bytes_written == -1) {
58
69
  return 0;
59
70
  }
60
71
  #endif
61
72
 
62
- if (bytes_written > remaining_capacity) {
63
- gumbo_string_buffer_reserve(
64
- parser, output->capacity + bytes_written, output);
73
+ if (bytes_written >= remaining_capacity) {
74
+ gumbo_string_buffer_reserve(output->capacity + bytes_written, output);
65
75
  remaining_capacity = output->capacity - output->length;
66
76
  va_start(args, format);
67
- bytes_written = vsnprintf(
68
- output->data + output->length, remaining_capacity, format, args);
77
+ bytes_written = vsnprintf (
78
+ output->data + output->length,
79
+ remaining_capacity,
80
+ format,
81
+ args
82
+ );
69
83
  va_end(args);
70
84
  }
71
85
  output->length += bytes_written;
72
86
  return bytes_written;
73
87
  }
74
88
 
75
- static void print_tag_stack(GumboParser* parser, const GumboParserError* error,
76
- GumboStringBuffer* output) {
77
- print_message(parser, output, " Currently open tags: ");
89
+ static void print_tag_stack (
90
+ const GumboParserError* error,
91
+ GumboStringBuffer* output
92
+ ) {
93
+ print_message(output, " Currently open tags: ");
78
94
  for (unsigned int i = 0; i < error->tag_stack.length; ++i) {
79
95
  if (i) {
80
- print_message(parser, output, ", ");
96
+ print_message(output, ", ");
81
97
  }
82
98
  GumboTag tag = (GumboTag) error->tag_stack.data[i];
83
- print_message(parser, output, gumbo_normalized_tagname(tag));
99
+ print_message(output, "%s", gumbo_normalized_tagname(tag));
84
100
  }
85
- gumbo_string_buffer_append_codepoint(parser, '.', output);
101
+ gumbo_string_buffer_append_codepoint('.', output);
86
102
  }
87
103
 
88
- static void handle_parser_error(GumboParser* parser,
89
- const GumboParserError* error, GumboStringBuffer* output) {
90
- if (error->parser_state == GUMBO_INSERTION_MODE_INITIAL &&
91
- error->input_type != GUMBO_TOKEN_DOCTYPE) {
92
- print_message(
93
- parser, output, "The doctype must be the first token in the document");
104
+ static void handle_parser_error (
105
+ const GumboParserError* error,
106
+ GumboStringBuffer* output
107
+ ) {
108
+ if (
109
+ error->parser_state == GUMBO_INSERTION_MODE_INITIAL
110
+ && error->input_type != GUMBO_TOKEN_DOCTYPE
111
+ ) {
112
+ print_message (
113
+ output,
114
+ "The doctype must be the first token in the document"
115
+ );
94
116
  return;
95
117
  }
96
118
 
97
119
  switch (error->input_type) {
98
120
  case GUMBO_TOKEN_DOCTYPE:
99
- print_message(parser, output, "This is not a legal doctype");
121
+ print_message(output, "This is not a legal doctype");
100
122
  return;
101
123
  case GUMBO_TOKEN_COMMENT:
102
124
  // Should never happen; comments are always legal.
103
125
  assert(0);
104
126
  // But just in case...
105
- print_message(parser, output, "Comments aren't legal here");
127
+ print_message(output, "Comments aren't legal here");
106
128
  return;
107
129
  case GUMBO_TOKEN_CDATA:
108
130
  case GUMBO_TOKEN_WHITESPACE:
109
131
  case GUMBO_TOKEN_CHARACTER:
110
- print_message(parser, output, "Character tokens aren't legal here");
132
+ print_message(output, "Character tokens aren't legal here");
111
133
  return;
112
134
  case GUMBO_TOKEN_NULL:
113
- print_message(parser, output, "Null bytes are not allowed in HTML5");
135
+ print_message(output, "Null bytes are not allowed in HTML5");
114
136
  return;
115
137
  case GUMBO_TOKEN_EOF:
116
138
  if (error->parser_state == GUMBO_INSERTION_MODE_INITIAL) {
117
- print_message(parser, output, "You must provide a doctype");
139
+ print_message(output, "You must provide a doctype");
118
140
  } else {
119
- print_message(parser, output, "Premature end of file");
120
- print_tag_stack(parser, error, output);
141
+ print_message(output, "Premature end of file");
142
+ print_tag_stack(error, output);
121
143
  }
122
144
  return;
123
145
  case GUMBO_TOKEN_START_TAG:
124
146
  case GUMBO_TOKEN_END_TAG:
125
- print_message(parser, output, "That tag isn't allowed here");
126
- print_tag_stack(parser, error, output);
147
+ print_message(output, "That tag isn't allowed here");
148
+ print_tag_stack(error, output);
127
149
  // TODO(jdtang): Give more specific messaging.
128
150
  return;
129
151
  }
130
152
  }
131
153
 
132
154
  // Finds the preceding newline in an original source buffer from a given byte
133
- // location. Returns a character pointer to the character after that, or a
155
+ // location. Returns a character pointer to the character after that, or a
134
156
  // pointer to the beginning of the string if this is the first line.
135
- static const char* find_last_newline(
136
- const char* original_text, const char* error_location) {
137
- assert(error_location >= original_text);
157
+ static const char* find_prev_newline (
158
+ const char* source_text,
159
+ const char* error_location
160
+ ) {
161
+ assert(error_location >= source_text);
138
162
  const char* c = error_location;
139
- for (; c != original_text && *c != '\n'; --c) {
140
- // There may be an error at EOF, which would be a nul byte.
141
- assert(*c || c == error_location);
142
- }
143
- return c == original_text ? c : c + 1;
163
+ if (*c == '\n' && c != source_text)
164
+ --c;
165
+ while (c != source_text && *c != '\n')
166
+ --c;
167
+ return c == source_text ? c : c + 1;
144
168
  }
145
169
 
146
170
  // Finds the next newline in the original source buffer from a given byte
147
- // location. Returns a character pointer to that newline, or a pointer to the
171
+ // location. Returns a character pointer to that newline, or a pointer to the
148
172
  // terminating null byte if this is the last line.
149
173
  static const char* find_next_newline(
150
- const char* original_text, const char* error_location) {
174
+ const char* source_text_end,
175
+ const char* error_location
176
+ ) {
177
+ assert(error_location <= source_text_end);
151
178
  const char* c = error_location;
152
- for (; *c && *c != '\n'; ++c)
153
- ;
179
+ while (c != source_text_end && *c != '\n')
180
+ ++c;
154
181
  return c;
155
182
  }
156
183
 
@@ -159,121 +186,176 @@ GumboError* gumbo_add_error(GumboParser* parser) {
159
186
  if (max_errors >= 0 && parser->_output->errors.length >= (unsigned int) max_errors) {
160
187
  return NULL;
161
188
  }
162
- GumboError* error = gumbo_parser_allocate(parser, sizeof(GumboError));
163
- gumbo_vector_add(parser, error, &parser->_output->errors);
189
+ GumboError* error = gumbo_alloc(sizeof(GumboError));
190
+ gumbo_vector_add(error, &parser->_output->errors);
164
191
  return error;
165
192
  }
166
193
 
167
- void gumbo_error_to_string(
168
- GumboParser* parser, const GumboError* error, GumboStringBuffer* output) {
169
- print_message(
170
- parser, output, "@%d:%d: ", error->position.line, error->position.column);
194
+ void gumbo_error_to_string (
195
+ const GumboError* error,
196
+ GumboStringBuffer* output
197
+ ) {
198
+ print_message (
199
+ output,
200
+ "@%zu:%zu: ",
201
+ error->position.line,
202
+ error->position.column
203
+ );
171
204
  switch (error->type) {
172
205
  case GUMBO_ERR_UTF8_INVALID:
173
- print_message(
174
- parser, output, "Invalid UTF8 character 0x%x", error->v.codepoint);
206
+ print_message (
207
+ output,
208
+ "Invalid UTF8 character 0x%" PRIx32,
209
+ error->v.codepoint
210
+ );
175
211
  break;
176
212
  case GUMBO_ERR_UTF8_TRUNCATED:
177
- print_message(parser, output,
178
- "Input stream ends with a truncated UTF8 character 0x%x",
179
- error->v.codepoint);
213
+ print_message (
214
+ output,
215
+ "Input stream ends with a truncated UTF8 character 0x%" PRIx32,
216
+ error->v.codepoint
217
+ );
218
+ break;
219
+ case GUMBO_ERR_UTF8_NULL:
220
+ print_message (
221
+ output,
222
+ "Unexpected NULL character in the input stream"
223
+ );
180
224
  break;
181
225
  case GUMBO_ERR_NUMERIC_CHAR_REF_NO_DIGITS:
182
- print_message(
183
- parser, output, "No digits after &# in numeric character reference");
226
+ print_message (
227
+ output,
228
+ "No digits after &# in numeric character reference"
229
+ );
184
230
  break;
185
231
  case GUMBO_ERR_NUMERIC_CHAR_REF_WITHOUT_SEMICOLON:
186
- print_message(parser, output,
187
- "The numeric character reference &#%d should be followed "
188
- "by a semicolon",
189
- error->v.codepoint);
232
+ print_message (
233
+ output,
234
+ "The numeric character reference &#%" PRIu32 " should be followed "
235
+ "by a semicolon",
236
+ error->v.codepoint
237
+ );
190
238
  break;
191
239
  case GUMBO_ERR_NUMERIC_CHAR_REF_INVALID:
192
- print_message(parser, output,
193
- "The numeric character reference &#%d; encodes an invalid "
194
- "unicode codepoint",
195
- error->v.codepoint);
240
+ print_message (
241
+ output,
242
+ "The numeric character reference &#%" PRIu32 "; encodes an invalid "
243
+ "unicode codepoint",
244
+ error->v.codepoint
245
+ );
196
246
  break;
197
247
  case GUMBO_ERR_NAMED_CHAR_REF_WITHOUT_SEMICOLON:
198
248
  // The textual data came from one of the literal strings in the table, and
199
249
  // so it'll be null-terminated.
200
- print_message(parser, output,
201
- "The named character reference &%.*s should be followed by a "
202
- "semicolon",
203
- (int) error->v.text.length, error->v.text.data);
250
+ print_message (
251
+ output,
252
+ "The named character reference &%.*s should be followed by a "
253
+ "semicolon",
254
+ (int) error->v.text.length,
255
+ error->v.text.data
256
+ );
204
257
  break;
205
258
  case GUMBO_ERR_NAMED_CHAR_REF_INVALID:
206
- print_message(parser, output,
207
- "The named character reference &%.*s; is not a valid entity name",
208
- (int) error->v.text.length, error->v.text.data);
259
+ print_message (
260
+ output,
261
+ "The named character reference &%.*s; is not a valid entity name",
262
+ (int) error->v.text.length,
263
+ error->v.text.data
264
+ );
209
265
  break;
210
266
  case GUMBO_ERR_DUPLICATE_ATTR:
211
- print_message(parser, output,
212
- "Attribute %s occurs multiple times, at positions %d and %d",
213
- error->v.duplicate_attr.name, error->v.duplicate_attr.original_index,
214
- error->v.duplicate_attr.new_index);
267
+ print_message (
268
+ output,
269
+ "Attribute %s occurs multiple times, at positions %u and %u",
270
+ error->v.duplicate_attr.name,
271
+ error->v.duplicate_attr.original_index,
272
+ error->v.duplicate_attr.new_index
273
+ );
274
+ break;
275
+ case GUMBO_ERR_DASHES_OR_DOCTYPE:
276
+ print_message (
277
+ output,
278
+ "Incorrectly opened comment; expected '--', 'DOCTYPE', or '[CDATA['"
279
+ );
215
280
  break;
216
281
  case GUMBO_ERR_PARSER:
282
+ handle_parser_error(&error->v.parser, output);
283
+ break;
217
284
  case GUMBO_ERR_UNACKNOWLEDGED_SELF_CLOSING_TAG:
218
- handle_parser_error(parser, &error->v.parser, output);
285
+ case GUMBO_ERR_SELF_CLOSING_END_TAG:
286
+ print_message (
287
+ output,
288
+ "Tag cannot be self-closing");
219
289
  break;
220
290
  default:
221
- print_message(parser, output,
222
- "Tokenizer error with an unimplemented error message");
291
+ print_message (
292
+ output,
293
+ "Tokenizer error with an unimplemented error message"
294
+ );
223
295
  break;
224
296
  }
225
- gumbo_string_buffer_append_codepoint(parser, '.', output);
297
+ gumbo_string_buffer_append_codepoint('.', output);
226
298
  }
227
299
 
228
- void gumbo_caret_diagnostic_to_string(GumboParser* parser,
229
- const GumboError* error, const char* source_text,
230
- GumboStringBuffer* output) {
231
- gumbo_error_to_string(parser, error, output);
300
+ void gumbo_caret_diagnostic_to_string (
301
+ const GumboError* error,
302
+ const char* source_text,
303
+ size_t source_length,
304
+ GumboStringBuffer* output
305
+ ) {
306
+ gumbo_error_to_string(error, output);
232
307
 
233
- const char* line_start = find_last_newline(source_text, error->original_text);
234
- const char* line_end = find_next_newline(source_text, error->original_text);
308
+ const char* line_start = find_prev_newline(source_text, error->original_text);
309
+ const char* line_end = find_next_newline(source_text + source_length, error->original_text);
235
310
  GumboStringPiece original_line;
236
311
  original_line.data = line_start;
237
312
  original_line.length = line_end - line_start;
238
313
 
239
- gumbo_string_buffer_append_codepoint(parser, '\n', output);
240
- gumbo_string_buffer_append_string(parser, &original_line, output);
241
- gumbo_string_buffer_append_codepoint(parser, '\n', output);
242
- gumbo_string_buffer_reserve(
243
- parser, output->length + error->position.column, output);
244
- int num_spaces = error->position.column - 1;
245
- memset(output->data + output->length, ' ', num_spaces);
246
- output->length += num_spaces;
247
- gumbo_string_buffer_append_codepoint(parser, '^', output);
248
- gumbo_string_buffer_append_codepoint(parser, '\n', output);
314
+ gumbo_string_buffer_append_codepoint('\n', output);
315
+ gumbo_string_buffer_append_string(&original_line, output);
316
+ gumbo_string_buffer_append_codepoint('\n', output);
317
+ gumbo_string_buffer_reserve(output->length + error->position.column, output);
318
+ if (error->position.column >= 2) {
319
+ size_t num_spaces = error->position.column - 1;
320
+ memset(output->data + output->length, ' ', num_spaces);
321
+ output->length += num_spaces;
322
+ }
323
+ gumbo_string_buffer_append_codepoint('^', output);
324
+ gumbo_string_buffer_append_codepoint('\n', output);
249
325
  }
250
326
 
251
- void gumbo_print_caret_diagnostic(
252
- GumboParser* parser, const GumboError* error, const char* source_text) {
327
+ void gumbo_print_caret_diagnostic (
328
+ const GumboError* error,
329
+ const char* source_text,
330
+ size_t source_length
331
+ ) {
253
332
  GumboStringBuffer text;
254
- gumbo_string_buffer_init(parser, &text);
255
- gumbo_caret_diagnostic_to_string(parser, error, source_text, &text);
333
+ gumbo_string_buffer_init(&text);
334
+ gumbo_caret_diagnostic_to_string(error, source_text, source_length, &text);
256
335
  printf("%.*s", (int) text.length, text.data);
257
- gumbo_string_buffer_destroy(parser, &text);
336
+ gumbo_string_buffer_destroy(&text);
258
337
  }
259
338
 
260
- void gumbo_error_destroy(GumboParser* parser, GumboError* error) {
261
- if (error->type == GUMBO_ERR_PARSER ||
262
- error->type == GUMBO_ERR_UNACKNOWLEDGED_SELF_CLOSING_TAG) {
263
- gumbo_vector_destroy(parser, &error->v.parser.tag_stack);
339
+ void gumbo_error_destroy(GumboError* error) {
340
+ if (
341
+ error->type == GUMBO_ERR_PARSER
342
+ || error->type == GUMBO_ERR_UNACKNOWLEDGED_SELF_CLOSING_TAG
343
+ || error->type == GUMBO_ERR_SELF_CLOSING_END_TAG
344
+ ) {
345
+ gumbo_vector_destroy(&error->v.parser.tag_stack);
264
346
  } else if (error->type == GUMBO_ERR_DUPLICATE_ATTR) {
265
- gumbo_parser_deallocate(parser, (void*) error->v.duplicate_attr.name);
347
+ gumbo_free((void*) error->v.duplicate_attr.name);
266
348
  }
267
- gumbo_parser_deallocate(parser, error);
349
+ gumbo_free(error);
268
350
  }
269
351
 
270
352
  void gumbo_init_errors(GumboParser* parser) {
271
- gumbo_vector_init(parser, 5, &parser->_output->errors);
353
+ gumbo_vector_init(5, &parser->_output->errors);
272
354
  }
273
355
 
274
356
  void gumbo_destroy_errors(GumboParser* parser) {
275
357
  for (unsigned int i = 0; i < parser->_output->errors.length; ++i) {
276
- gumbo_error_destroy(parser, parser->_output->errors.data[i]);
358
+ gumbo_error_destroy(parser->_output->errors.data[i]);
277
359
  }
278
- gumbo_vector_destroy(parser, &parser->_output->errors);
360
+ gumbo_vector_destroy(&parser->_output->errors);
279
361
  }