nokogumbo 1.4.2 → 1.4.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/gumbo-parser/src/attribute.c +1 -1
- data/gumbo-parser/src/char_ref.c +37 -67
- data/gumbo-parser/src/char_ref.h +3 -4
- data/gumbo-parser/src/char_ref.rl +6 -1
- data/gumbo-parser/src/error.c +50 -51
- data/gumbo-parser/src/error.h +7 -9
- data/gumbo-parser/src/gumbo.h +45 -181
- data/gumbo-parser/src/parser.c +1397 -989
- data/gumbo-parser/src/string_buffer.c +14 -10
- data/gumbo-parser/src/string_buffer.h +9 -6
- data/gumbo-parser/src/string_piece.c +5 -6
- data/gumbo-parser/src/string_piece.h +2 -3
- data/gumbo-parser/src/tag.c +36 -166
- data/gumbo-parser/src/tag.in +150 -0
- data/gumbo-parser/src/tag_enum.h +153 -0
- data/gumbo-parser/src/tag_gperf.h +105 -0
- data/gumbo-parser/src/tag_sizes.h +4 -0
- data/gumbo-parser/src/tag_strings.h +153 -0
- data/gumbo-parser/src/tokenizer.c +264 -360
- data/gumbo-parser/src/tokenizer.h +2 -2
- data/gumbo-parser/src/utf8.c +44 -44
- data/gumbo-parser/src/utf8.h +1 -2
- data/gumbo-parser/src/util.c +1 -1
- data/gumbo-parser/src/util.h +0 -2
- data/gumbo-parser/src/vector.c +17 -17
- data/gumbo-parser/src/vector.h +6 -8
- metadata +8 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2ea9a9bee29cbf479c7afab79a4df38040ed60e7
|
4
|
+
data.tar.gz: f7e87af169388de1f0b32fd457e73182169bc503
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6413991d638e6bacc465442546c7ca7756fb0cd8f577ccb9b65d60da87c009b7b2d212bb3fe928b7bbde97ba66f9180d8c5e3efdd02648e33e5d7546b452476f
|
7
|
+
data.tar.gz: 4a3486ed36ab147b828e9f79244c36a2d33b2a8dccc61fa8d6abf75556ebb76313c7d009c2502bc3142e38644759c1e6c6bab5d1c1b21b346bf16702802650e4
|
@@ -27,7 +27,7 @@ struct GumboInternalParser;
|
|
27
27
|
|
28
28
|
GumboAttribute* gumbo_get_attribute(
|
29
29
|
const GumboVector* attributes, const char* name) {
|
30
|
-
for (int i = 0; i < attributes->length; ++i) {
|
30
|
+
for (unsigned int i = 0; i < attributes->length; ++i) {
|
31
31
|
GumboAttribute* attr = attributes->data[i];
|
32
32
|
if (!strcasecmp(attr->name, name)) {
|
33
33
|
return attr;
|
data/gumbo-parser/src/char_ref.c
CHANGED
@@ -30,7 +30,7 @@
|
|
30
30
|
#include <ctype.h>
|
31
31
|
#include <stddef.h>
|
32
32
|
#include <stdio.h>
|
33
|
-
#include <string.h>
|
33
|
+
#include <string.h> // Only for debug assertions at present.
|
34
34
|
|
35
35
|
#include "error.h"
|
36
36
|
#include "string_piece.h"
|
@@ -49,44 +49,18 @@ typedef struct {
|
|
49
49
|
int to_char;
|
50
50
|
} CharReplacement;
|
51
51
|
|
52
|
-
static const CharReplacement kCharReplacements[] = {
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
{ 0x89, 0x2030 },
|
65
|
-
{ 0x8A, 0x0160 },
|
66
|
-
{ 0x8B, 0x2039 },
|
67
|
-
{ 0x8C, 0x0152 },
|
68
|
-
{ 0x8D, 0x008D },
|
69
|
-
{ 0x8E, 0x017D },
|
70
|
-
{ 0x8F, 0x008F },
|
71
|
-
{ 0x90, 0x0090 },
|
72
|
-
{ 0x91, 0x2018 },
|
73
|
-
{ 0x92, 0x2019 },
|
74
|
-
{ 0x93, 0x201C },
|
75
|
-
{ 0x94, 0x201D },
|
76
|
-
{ 0x95, 0x2022 },
|
77
|
-
{ 0x96, 0x2013 },
|
78
|
-
{ 0x97, 0x2014 },
|
79
|
-
{ 0x98, 0x02DC },
|
80
|
-
{ 0x99, 0x2122 },
|
81
|
-
{ 0x9A, 0x0161 },
|
82
|
-
{ 0x9B, 0x203A },
|
83
|
-
{ 0x9C, 0x0153 },
|
84
|
-
{ 0x9D, 0x009D },
|
85
|
-
{ 0x9E, 0x017E },
|
86
|
-
{ 0x9F, 0x0178 },
|
87
|
-
// Terminator.
|
88
|
-
{ -1, -1 }
|
89
|
-
};
|
52
|
+
static const CharReplacement kCharReplacements[] = {{0x00, 0xfffd},
|
53
|
+
{0x0d, 0x000d}, {0x80, 0x20ac}, {0x81, 0x0081}, {0x82, 0x201A},
|
54
|
+
{0x83, 0x0192}, {0x84, 0x201E}, {0x85, 0x2026}, {0x86, 0x2020},
|
55
|
+
{0x87, 0x2021}, {0x88, 0x02C6}, {0x89, 0x2030}, {0x8A, 0x0160},
|
56
|
+
{0x8B, 0x2039}, {0x8C, 0x0152}, {0x8D, 0x008D}, {0x8E, 0x017D},
|
57
|
+
{0x8F, 0x008F}, {0x90, 0x0090}, {0x91, 0x2018}, {0x92, 0x2019},
|
58
|
+
{0x93, 0x201C}, {0x94, 0x201D}, {0x95, 0x2022}, {0x96, 0x2013},
|
59
|
+
{0x97, 0x2014}, {0x98, 0x02DC}, {0x99, 0x2122}, {0x9A, 0x0161},
|
60
|
+
{0x9B, 0x203A}, {0x9C, 0x0153}, {0x9D, 0x009D}, {0x9E, 0x017E},
|
61
|
+
{0x9F, 0x0178},
|
62
|
+
// Terminator.
|
63
|
+
{-1, -1}};
|
90
64
|
|
91
65
|
static int parse_digit(int c, bool allow_hex) {
|
92
66
|
if (c >= '0' && c <= '9') {
|
@@ -111,9 +85,8 @@ static void add_no_digit_error(
|
|
111
85
|
error->type = GUMBO_ERR_NUMERIC_CHAR_REF_NO_DIGITS;
|
112
86
|
}
|
113
87
|
|
114
|
-
static void add_codepoint_error(
|
115
|
-
|
116
|
-
GumboErrorType type, int codepoint) {
|
88
|
+
static void add_codepoint_error(struct GumboInternalParser* parser,
|
89
|
+
Utf8Iterator* input, GumboErrorType type, int codepoint) {
|
117
90
|
GumboError* error = gumbo_add_error(parser);
|
118
91
|
if (!error) {
|
119
92
|
return;
|
@@ -123,9 +96,8 @@ static void add_codepoint_error(
|
|
123
96
|
error->v.codepoint = codepoint;
|
124
97
|
}
|
125
98
|
|
126
|
-
static void add_named_reference_error(
|
127
|
-
|
128
|
-
GumboErrorType type, GumboStringPiece text) {
|
99
|
+
static void add_named_reference_error(struct GumboInternalParser* parser,
|
100
|
+
Utf8Iterator* input, GumboErrorType type, GumboStringPiece text) {
|
129
101
|
GumboError* error = gumbo_add_error(parser);
|
130
102
|
if (!error) {
|
131
103
|
return;
|
@@ -211,8 +183,7 @@ static bool maybe_add_invalid_named_reference(
|
|
211
183
|
// worry about consuming characters.
|
212
184
|
const char* start = utf8iterator_get_char_pointer(input);
|
213
185
|
int c = utf8iterator_current(input);
|
214
|
-
while ((c >= 'a' && c <= 'z') ||
|
215
|
-
(c >= 'A' && c <= 'Z') ||
|
186
|
+
while ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') ||
|
216
187
|
(c >= '0' && c <= '9')) {
|
217
188
|
utf8iterator_next(input);
|
218
189
|
c = utf8iterator_current(input);
|
@@ -228,12 +199,11 @@ static bool maybe_add_invalid_named_reference(
|
|
228
199
|
return true;
|
229
200
|
}
|
230
201
|
|
231
|
-
|
232
202
|
#line 2465 "char_ref.rl"
|
233
203
|
|
204
|
+
// clang-format off
|
234
205
|
|
235
|
-
|
236
|
-
#line 237 "char_ref.c"
|
206
|
+
#line 238 "char_ref.c"
|
237
207
|
static const short _char_ref_actions[] = {
|
238
208
|
0, 1, 0, 1, 1, 1, 2, 1,
|
239
209
|
3, 1, 4, 1, 5, 1, 6, 1,
|
@@ -13960,17 +13930,15 @@ static const short _char_ref_eof_trans[] = {
|
|
13960
13930
|
};
|
13961
13931
|
|
13962
13932
|
static const int char_ref_start = 7623;
|
13963
|
-
static const int char_ref_first_final = 7623;
|
13964
|
-
static const int char_ref_error = 0;
|
13965
13933
|
|
13966
13934
|
static const int char_ref_en_valid_named_ref = 7623;
|
13967
13935
|
|
13968
13936
|
|
13969
|
-
#line
|
13937
|
+
#line 2469 "char_ref.rl"
|
13938
|
+
// clang-format on
|
13970
13939
|
|
13971
|
-
static bool consume_named_ref(
|
13972
|
-
|
13973
|
-
OneOrTwoCodepoints* output) {
|
13940
|
+
static bool consume_named_ref(struct GumboInternalParser* parser,
|
13941
|
+
Utf8Iterator* input, bool is_in_attribute, OneOrTwoCodepoints* output) {
|
13974
13942
|
assert(output->first == kGumboNoChar);
|
13975
13943
|
const char* p = utf8iterator_get_char_pointer(input);
|
13976
13944
|
const char* pe = utf8iterator_get_end_pointer(input);
|
@@ -13979,8 +13947,9 @@ static bool consume_named_ref(
|
|
13979
13947
|
const char *ts, *start;
|
13980
13948
|
int cs, act;
|
13981
13949
|
|
13950
|
+
// clang-format off
|
13982
13951
|
|
13983
|
-
#line
|
13952
|
+
#line 13985 "char_ref.c"
|
13984
13953
|
{
|
13985
13954
|
cs = char_ref_start;
|
13986
13955
|
ts = 0;
|
@@ -13988,14 +13957,15 @@ static bool consume_named_ref(
|
|
13988
13957
|
act = 0;
|
13989
13958
|
}
|
13990
13959
|
|
13991
|
-
#line
|
13960
|
+
#line 2484 "char_ref.rl"
|
13992
13961
|
// Avoid unused variable warnings.
|
13993
13962
|
(void) act;
|
13994
13963
|
(void) ts;
|
13964
|
+
(void) char_ref_en_valid_named_ref;
|
13995
13965
|
|
13996
13966
|
start = p;
|
13997
13967
|
|
13998
|
-
#line
|
13968
|
+
#line 14001 "char_ref.c"
|
13999
13969
|
{
|
14000
13970
|
int _slen;
|
14001
13971
|
int _trans;
|
@@ -14017,7 +13987,7 @@ _resume:
|
|
14017
13987
|
#line 1 "NONE"
|
14018
13988
|
{ts = p;}
|
14019
13989
|
break;
|
14020
|
-
#line
|
13990
|
+
#line 14023 "char_ref.c"
|
14021
13991
|
}
|
14022
13992
|
}
|
14023
13993
|
|
@@ -23000,7 +22970,7 @@ _eof_trans:
|
|
23000
22970
|
#line 2273 "char_ref.rl"
|
23001
22971
|
{{p = ((te))-1;}{ output->first = 0xd7; {p++; goto _out; } }}
|
23002
22972
|
break;
|
23003
|
-
#line
|
22973
|
+
#line 23006 "char_ref.c"
|
23004
22974
|
}
|
23005
22975
|
}
|
23006
22976
|
|
@@ -23013,7 +22983,7 @@ _again:
|
|
23013
22983
|
#line 1 "NONE"
|
23014
22984
|
{ts = 0;}
|
23015
22985
|
break;
|
23016
|
-
#line
|
22986
|
+
#line 23019 "char_ref.c"
|
23017
22987
|
}
|
23018
22988
|
}
|
23019
22989
|
|
@@ -23033,7 +23003,8 @@ _again:
|
|
23033
23003
|
_out: {}
|
23034
23004
|
}
|
23035
23005
|
|
23036
|
-
#line
|
23006
|
+
#line 2491 "char_ref.rl"
|
23007
|
+
// clang-format on
|
23037
23008
|
|
23038
23009
|
if (cs >= 7623) {
|
23039
23010
|
assert(output->first != kGumboNoChar);
|
@@ -23067,10 +23038,9 @@ _again:
|
|
23067
23038
|
}
|
23068
23039
|
}
|
23069
23040
|
|
23070
|
-
bool consume_char_ref(
|
23071
|
-
struct
|
23072
|
-
|
23073
|
-
OneOrTwoCodepoints* output) {
|
23041
|
+
bool consume_char_ref(struct GumboInternalParser* parser,
|
23042
|
+
struct GumboInternalUtf8Iterator* input, int additional_allowed_char,
|
23043
|
+
bool is_in_attribute, OneOrTwoCodepoints* output) {
|
23074
23044
|
utf8iterator_mark(input);
|
23075
23045
|
utf8iterator_next(input);
|
23076
23046
|
int c = utf8iterator_current(input);
|
data/gumbo-parser/src/char_ref.h
CHANGED
@@ -49,10 +49,9 @@ typedef struct {
|
|
49
49
|
// errors to the GumboParser's errors vector, if the spec calls for it. Pass a
|
50
50
|
// space for the "additional allowed char" when the spec says "with no
|
51
51
|
// additional allowed char". Returns false on parse error, true otherwise.
|
52
|
-
bool consume_char_ref(
|
53
|
-
struct
|
54
|
-
|
55
|
-
OneOrTwoCodepoints* output);
|
52
|
+
bool consume_char_ref(struct GumboInternalParser* parser,
|
53
|
+
struct GumboInternalUtf8Iterator* input, int additional_allowed_char,
|
54
|
+
bool is_in_attribute, OneOrTwoCodepoints* output);
|
56
55
|
|
57
56
|
#ifdef __cplusplus
|
58
57
|
}
|
@@ -2464,7 +2464,9 @@ valid_named_ref := |*
|
|
2464
2464
|
*|;
|
2465
2465
|
}%%
|
2466
2466
|
|
2467
|
-
|
2467
|
+
// clang-format off
|
2468
|
+
%% write data noerror nofinal;
|
2469
|
+
// clang-format on
|
2468
2470
|
|
2469
2471
|
static bool consume_named_ref(
|
2470
2472
|
struct GumboInternalParser* parser, Utf8Iterator* input, bool is_in_attribute,
|
@@ -2477,13 +2479,16 @@ static bool consume_named_ref(
|
|
2477
2479
|
const char *ts, *start;
|
2478
2480
|
int cs, act;
|
2479
2481
|
|
2482
|
+
// clang-format off
|
2480
2483
|
%% write init;
|
2481
2484
|
// Avoid unused variable warnings.
|
2482
2485
|
(void) act;
|
2483
2486
|
(void) ts;
|
2487
|
+
(void) char_ref_en_valid_named_ref;
|
2484
2488
|
|
2485
2489
|
start = p;
|
2486
2490
|
%% write exec;
|
2491
|
+
// clang-format on
|
2487
2492
|
|
2488
2493
|
if (cs >= %%{ write first_final; }%%) {
|
2489
2494
|
assert(output->first != kGumboNoChar);
|
data/gumbo-parser/src/error.c
CHANGED
@@ -27,18 +27,17 @@
|
|
27
27
|
#include "util.h"
|
28
28
|
#include "vector.h"
|
29
29
|
|
30
|
-
static const size_t kMessageBufferSize = 256;
|
31
|
-
|
32
30
|
// Prints a formatted message to a StringBuffer. This automatically resizes the
|
33
31
|
// StringBuffer as necessary to fit the message. Returns the number of bytes
|
34
32
|
// written.
|
35
|
-
static int print_message(
|
36
|
-
|
33
|
+
static int print_message(
|
34
|
+
GumboParser* parser, GumboStringBuffer* output, const char* format, ...) {
|
37
35
|
va_list args;
|
38
|
-
va_start(args, format);
|
39
36
|
int remaining_capacity = output->capacity - output->length;
|
40
|
-
|
41
|
-
|
37
|
+
va_start(args, format);
|
38
|
+
int bytes_written = vsnprintf(
|
39
|
+
output->data + output->length, remaining_capacity, format, args);
|
40
|
+
va_end(args);
|
42
41
|
#ifdef _MSC_VER
|
43
42
|
if (bytes_written == -1) {
|
44
43
|
// vsnprintf returns -1 on MSVC++ if there's not enough capacity, instead of
|
@@ -47,15 +46,15 @@ static int print_message(GumboParser* parser, GumboStringBuffer* output,
|
|
47
46
|
// we retry (letting it fail and returning 0 if it doesn't), since there's
|
48
47
|
// no way to smartly resize the buffer.
|
49
48
|
gumbo_string_buffer_reserve(parser, output->capacity * 2, output);
|
50
|
-
|
51
|
-
|
49
|
+
va_start(args, format);
|
50
|
+
int result = vsnprintf(
|
51
|
+
output->data + output->length, remaining_capacity, format, args);
|
52
52
|
va_end(args);
|
53
53
|
return result == -1 ? 0 : result;
|
54
54
|
}
|
55
55
|
#else
|
56
56
|
// -1 in standard C99 indicates an encoding error. Return 0 and do nothing.
|
57
57
|
if (bytes_written == -1) {
|
58
|
-
va_end(args);
|
59
58
|
return 0;
|
60
59
|
}
|
61
60
|
#endif
|
@@ -64,19 +63,19 @@ static int print_message(GumboParser* parser, GumboStringBuffer* output,
|
|
64
63
|
gumbo_string_buffer_reserve(
|
65
64
|
parser, output->capacity + bytes_written, output);
|
66
65
|
remaining_capacity = output->capacity - output->length;
|
67
|
-
|
68
|
-
|
66
|
+
va_start(args, format);
|
67
|
+
bytes_written = vsnprintf(
|
68
|
+
output->data + output->length, remaining_capacity, format, args);
|
69
|
+
va_end(args);
|
69
70
|
}
|
70
71
|
output->length += bytes_written;
|
71
|
-
va_end(args);
|
72
72
|
return bytes_written;
|
73
73
|
}
|
74
74
|
|
75
|
-
static void print_tag_stack(
|
76
|
-
GumboParser* parser, const GumboParserError* error,
|
75
|
+
static void print_tag_stack(GumboParser* parser, const GumboParserError* error,
|
77
76
|
GumboStringBuffer* output) {
|
78
77
|
print_message(parser, output, " Currently open tags: ");
|
79
|
-
for (int i = 0; i < error->tag_stack.length; ++i) {
|
78
|
+
for (unsigned int i = 0; i < error->tag_stack.length; ++i) {
|
80
79
|
if (i) {
|
81
80
|
print_message(parser, output, ", ");
|
82
81
|
}
|
@@ -87,12 +86,11 @@ static void print_tag_stack(
|
|
87
86
|
}
|
88
87
|
|
89
88
|
static void handle_parser_error(GumboParser* parser,
|
90
|
-
|
91
|
-
GumboStringBuffer* output) {
|
89
|
+
const GumboParserError* error, GumboStringBuffer* output) {
|
92
90
|
if (error->parser_state == GUMBO_INSERTION_MODE_INITIAL &&
|
93
91
|
error->input_type != GUMBO_TOKEN_DOCTYPE) {
|
94
|
-
print_message(
|
95
|
-
|
92
|
+
print_message(
|
93
|
+
parser, output, "The doctype must be the first token in the document");
|
96
94
|
return;
|
97
95
|
}
|
98
96
|
|
@@ -151,13 +149,14 @@ static const char* find_last_newline(
|
|
151
149
|
static const char* find_next_newline(
|
152
150
|
const char* original_text, const char* error_location) {
|
153
151
|
const char* c = error_location;
|
154
|
-
for (; *c && *c != '\n'; ++c)
|
152
|
+
for (; *c && *c != '\n'; ++c)
|
153
|
+
;
|
155
154
|
return c;
|
156
155
|
}
|
157
156
|
|
158
157
|
GumboError* gumbo_add_error(GumboParser* parser) {
|
159
158
|
int max_errors = parser->_options->max_errors;
|
160
|
-
if (max_errors >= 0 && parser->_output->errors.length >= max_errors) {
|
159
|
+
if (max_errors >= 0 && parser->_output->errors.length >= (unsigned int) max_errors) {
|
161
160
|
return NULL;
|
162
161
|
}
|
163
162
|
GumboError* error = gumbo_parser_allocate(parser, sizeof(GumboError));
|
@@ -167,50 +166,52 @@ GumboError* gumbo_add_error(GumboParser* parser) {
|
|
167
166
|
|
168
167
|
void gumbo_error_to_string(
|
169
168
|
GumboParser* parser, const GumboError* error, GumboStringBuffer* output) {
|
170
|
-
print_message(
|
171
|
-
|
169
|
+
print_message(
|
170
|
+
parser, output, "@%d:%d: ", error->position.line, error->position.column);
|
172
171
|
switch (error->type) {
|
173
172
|
case GUMBO_ERR_UTF8_INVALID:
|
174
|
-
print_message(
|
175
|
-
|
173
|
+
print_message(
|
174
|
+
parser, output, "Invalid UTF8 character 0x%x", error->v.codepoint);
|
176
175
|
break;
|
177
176
|
case GUMBO_ERR_UTF8_TRUNCATED:
|
178
177
|
print_message(parser, output,
|
179
|
-
|
180
|
-
|
178
|
+
"Input stream ends with a truncated UTF8 character 0x%x",
|
179
|
+
error->v.codepoint);
|
181
180
|
break;
|
182
181
|
case GUMBO_ERR_NUMERIC_CHAR_REF_NO_DIGITS:
|
183
|
-
print_message(
|
184
|
-
|
182
|
+
print_message(
|
183
|
+
parser, output, "No digits after &# in numeric character reference");
|
185
184
|
break;
|
186
185
|
case GUMBO_ERR_NUMERIC_CHAR_REF_WITHOUT_SEMICOLON:
|
187
186
|
print_message(parser, output,
|
188
|
-
|
189
|
-
|
187
|
+
"The numeric character reference &#%d should be followed "
|
188
|
+
"by a semicolon",
|
189
|
+
error->v.codepoint);
|
190
190
|
break;
|
191
191
|
case GUMBO_ERR_NUMERIC_CHAR_REF_INVALID:
|
192
192
|
print_message(parser, output,
|
193
|
-
|
194
|
-
|
193
|
+
"The numeric character reference &#%d; encodes an invalid "
|
194
|
+
"unicode codepoint",
|
195
|
+
error->v.codepoint);
|
195
196
|
break;
|
196
197
|
case GUMBO_ERR_NAMED_CHAR_REF_WITHOUT_SEMICOLON:
|
197
198
|
// The textual data came from one of the literal strings in the table, and
|
198
199
|
// so it'll be null-terminated.
|
199
200
|
print_message(parser, output,
|
200
|
-
|
201
|
-
|
201
|
+
"The named character reference &%.*s should be followed by a "
|
202
|
+
"semicolon",
|
203
|
+
(int) error->v.text.length, error->v.text.data);
|
202
204
|
break;
|
203
205
|
case GUMBO_ERR_NAMED_CHAR_REF_INVALID:
|
204
206
|
print_message(parser, output,
|
205
|
-
|
206
|
-
|
207
|
+
"The named character reference &%.*s; is not a valid entity name",
|
208
|
+
(int) error->v.text.length, error->v.text.data);
|
207
209
|
break;
|
208
210
|
case GUMBO_ERR_DUPLICATE_ATTR:
|
209
211
|
print_message(parser, output,
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
error->v.duplicate_attr.new_index);
|
212
|
+
"Attribute %s occurs multiple times, at positions %d and %d",
|
213
|
+
error->v.duplicate_attr.name, error->v.duplicate_attr.original_index,
|
214
|
+
error->v.duplicate_attr.new_index);
|
214
215
|
break;
|
215
216
|
case GUMBO_ERR_PARSER:
|
216
217
|
case GUMBO_ERR_UNACKNOWLEDGED_SELF_CLOSING_TAG:
|
@@ -218,21 +219,19 @@ void gumbo_error_to_string(
|
|
218
219
|
break;
|
219
220
|
default:
|
220
221
|
print_message(parser, output,
|
221
|
-
|
222
|
+
"Tokenizer error with an unimplemented error message");
|
222
223
|
break;
|
223
224
|
}
|
224
225
|
gumbo_string_buffer_append_codepoint(parser, '.', output);
|
225
226
|
}
|
226
227
|
|
227
|
-
void gumbo_caret_diagnostic_to_string(
|
228
|
-
|
229
|
-
|
228
|
+
void gumbo_caret_diagnostic_to_string(GumboParser* parser,
|
229
|
+
const GumboError* error, const char* source_text,
|
230
|
+
GumboStringBuffer* output) {
|
230
231
|
gumbo_error_to_string(parser, error, output);
|
231
232
|
|
232
|
-
const char* line_start =
|
233
|
-
|
234
|
-
const char* line_end =
|
235
|
-
find_next_newline(source_text, error->original_text);
|
233
|
+
const char* line_start = find_last_newline(source_text, error->original_text);
|
234
|
+
const char* line_end = find_next_newline(source_text, error->original_text);
|
236
235
|
GumboStringPiece original_line;
|
237
236
|
original_line.data = line_start;
|
238
237
|
original_line.length = line_end - line_start;
|
@@ -273,7 +272,7 @@ void gumbo_init_errors(GumboParser* parser) {
|
|
273
272
|
}
|
274
273
|
|
275
274
|
void gumbo_destroy_errors(GumboParser* parser) {
|
276
|
-
for (int i = 0; i < parser->_output->errors.length; ++i) {
|
275
|
+
for (unsigned int i = 0; i < parser->_output->errors.length; ++i) {
|
277
276
|
gumbo_error_destroy(parser, parser->_output->errors.data[i]);
|
278
277
|
}
|
279
278
|
gumbo_vector_destroy(parser, &parser->_output->errors);
|