nokogumbo 1.4.2 → 1.4.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/gumbo-parser/src/attribute.c +1 -1
- data/gumbo-parser/src/char_ref.c +37 -67
- data/gumbo-parser/src/char_ref.h +3 -4
- data/gumbo-parser/src/char_ref.rl +6 -1
- data/gumbo-parser/src/error.c +50 -51
- data/gumbo-parser/src/error.h +7 -9
- data/gumbo-parser/src/gumbo.h +45 -181
- data/gumbo-parser/src/parser.c +1397 -989
- data/gumbo-parser/src/string_buffer.c +14 -10
- data/gumbo-parser/src/string_buffer.h +9 -6
- data/gumbo-parser/src/string_piece.c +5 -6
- data/gumbo-parser/src/string_piece.h +2 -3
- data/gumbo-parser/src/tag.c +36 -166
- data/gumbo-parser/src/tag.in +150 -0
- data/gumbo-parser/src/tag_enum.h +153 -0
- data/gumbo-parser/src/tag_gperf.h +105 -0
- data/gumbo-parser/src/tag_sizes.h +4 -0
- data/gumbo-parser/src/tag_strings.h +153 -0
- data/gumbo-parser/src/tokenizer.c +264 -360
- data/gumbo-parser/src/tokenizer.h +2 -2
- data/gumbo-parser/src/utf8.c +44 -44
- data/gumbo-parser/src/utf8.h +1 -2
- data/gumbo-parser/src/util.c +1 -1
- data/gumbo-parser/src/util.h +0 -2
- data/gumbo-parser/src/vector.c +17 -17
- data/gumbo-parser/src/vector.h +6 -8
- metadata +8 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2ea9a9bee29cbf479c7afab79a4df38040ed60e7
|
4
|
+
data.tar.gz: f7e87af169388de1f0b32fd457e73182169bc503
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6413991d638e6bacc465442546c7ca7756fb0cd8f577ccb9b65d60da87c009b7b2d212bb3fe928b7bbde97ba66f9180d8c5e3efdd02648e33e5d7546b452476f
|
7
|
+
data.tar.gz: 4a3486ed36ab147b828e9f79244c36a2d33b2a8dccc61fa8d6abf75556ebb76313c7d009c2502bc3142e38644759c1e6c6bab5d1c1b21b346bf16702802650e4
|
@@ -27,7 +27,7 @@ struct GumboInternalParser;
|
|
27
27
|
|
28
28
|
GumboAttribute* gumbo_get_attribute(
|
29
29
|
const GumboVector* attributes, const char* name) {
|
30
|
-
for (int i = 0; i < attributes->length; ++i) {
|
30
|
+
for (unsigned int i = 0; i < attributes->length; ++i) {
|
31
31
|
GumboAttribute* attr = attributes->data[i];
|
32
32
|
if (!strcasecmp(attr->name, name)) {
|
33
33
|
return attr;
|
data/gumbo-parser/src/char_ref.c
CHANGED
@@ -30,7 +30,7 @@
|
|
30
30
|
#include <ctype.h>
|
31
31
|
#include <stddef.h>
|
32
32
|
#include <stdio.h>
|
33
|
-
#include <string.h>
|
33
|
+
#include <string.h> // Only for debug assertions at present.
|
34
34
|
|
35
35
|
#include "error.h"
|
36
36
|
#include "string_piece.h"
|
@@ -49,44 +49,18 @@ typedef struct {
|
|
49
49
|
int to_char;
|
50
50
|
} CharReplacement;
|
51
51
|
|
52
|
-
static const CharReplacement kCharReplacements[] = {
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
{ 0x89, 0x2030 },
|
65
|
-
{ 0x8A, 0x0160 },
|
66
|
-
{ 0x8B, 0x2039 },
|
67
|
-
{ 0x8C, 0x0152 },
|
68
|
-
{ 0x8D, 0x008D },
|
69
|
-
{ 0x8E, 0x017D },
|
70
|
-
{ 0x8F, 0x008F },
|
71
|
-
{ 0x90, 0x0090 },
|
72
|
-
{ 0x91, 0x2018 },
|
73
|
-
{ 0x92, 0x2019 },
|
74
|
-
{ 0x93, 0x201C },
|
75
|
-
{ 0x94, 0x201D },
|
76
|
-
{ 0x95, 0x2022 },
|
77
|
-
{ 0x96, 0x2013 },
|
78
|
-
{ 0x97, 0x2014 },
|
79
|
-
{ 0x98, 0x02DC },
|
80
|
-
{ 0x99, 0x2122 },
|
81
|
-
{ 0x9A, 0x0161 },
|
82
|
-
{ 0x9B, 0x203A },
|
83
|
-
{ 0x9C, 0x0153 },
|
84
|
-
{ 0x9D, 0x009D },
|
85
|
-
{ 0x9E, 0x017E },
|
86
|
-
{ 0x9F, 0x0178 },
|
87
|
-
// Terminator.
|
88
|
-
{ -1, -1 }
|
89
|
-
};
|
52
|
+
static const CharReplacement kCharReplacements[] = {{0x00, 0xfffd},
|
53
|
+
{0x0d, 0x000d}, {0x80, 0x20ac}, {0x81, 0x0081}, {0x82, 0x201A},
|
54
|
+
{0x83, 0x0192}, {0x84, 0x201E}, {0x85, 0x2026}, {0x86, 0x2020},
|
55
|
+
{0x87, 0x2021}, {0x88, 0x02C6}, {0x89, 0x2030}, {0x8A, 0x0160},
|
56
|
+
{0x8B, 0x2039}, {0x8C, 0x0152}, {0x8D, 0x008D}, {0x8E, 0x017D},
|
57
|
+
{0x8F, 0x008F}, {0x90, 0x0090}, {0x91, 0x2018}, {0x92, 0x2019},
|
58
|
+
{0x93, 0x201C}, {0x94, 0x201D}, {0x95, 0x2022}, {0x96, 0x2013},
|
59
|
+
{0x97, 0x2014}, {0x98, 0x02DC}, {0x99, 0x2122}, {0x9A, 0x0161},
|
60
|
+
{0x9B, 0x203A}, {0x9C, 0x0153}, {0x9D, 0x009D}, {0x9E, 0x017E},
|
61
|
+
{0x9F, 0x0178},
|
62
|
+
// Terminator.
|
63
|
+
{-1, -1}};
|
90
64
|
|
91
65
|
static int parse_digit(int c, bool allow_hex) {
|
92
66
|
if (c >= '0' && c <= '9') {
|
@@ -111,9 +85,8 @@ static void add_no_digit_error(
|
|
111
85
|
error->type = GUMBO_ERR_NUMERIC_CHAR_REF_NO_DIGITS;
|
112
86
|
}
|
113
87
|
|
114
|
-
static void add_codepoint_error(
|
115
|
-
|
116
|
-
GumboErrorType type, int codepoint) {
|
88
|
+
static void add_codepoint_error(struct GumboInternalParser* parser,
|
89
|
+
Utf8Iterator* input, GumboErrorType type, int codepoint) {
|
117
90
|
GumboError* error = gumbo_add_error(parser);
|
118
91
|
if (!error) {
|
119
92
|
return;
|
@@ -123,9 +96,8 @@ static void add_codepoint_error(
|
|
123
96
|
error->v.codepoint = codepoint;
|
124
97
|
}
|
125
98
|
|
126
|
-
static void add_named_reference_error(
|
127
|
-
|
128
|
-
GumboErrorType type, GumboStringPiece text) {
|
99
|
+
static void add_named_reference_error(struct GumboInternalParser* parser,
|
100
|
+
Utf8Iterator* input, GumboErrorType type, GumboStringPiece text) {
|
129
101
|
GumboError* error = gumbo_add_error(parser);
|
130
102
|
if (!error) {
|
131
103
|
return;
|
@@ -211,8 +183,7 @@ static bool maybe_add_invalid_named_reference(
|
|
211
183
|
// worry about consuming characters.
|
212
184
|
const char* start = utf8iterator_get_char_pointer(input);
|
213
185
|
int c = utf8iterator_current(input);
|
214
|
-
while ((c >= 'a' && c <= 'z') ||
|
215
|
-
(c >= 'A' && c <= 'Z') ||
|
186
|
+
while ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') ||
|
216
187
|
(c >= '0' && c <= '9')) {
|
217
188
|
utf8iterator_next(input);
|
218
189
|
c = utf8iterator_current(input);
|
@@ -228,12 +199,11 @@ static bool maybe_add_invalid_named_reference(
|
|
228
199
|
return true;
|
229
200
|
}
|
230
201
|
|
231
|
-
|
232
202
|
#line 2465 "char_ref.rl"
|
233
203
|
|
204
|
+
// clang-format off
|
234
205
|
|
235
|
-
|
236
|
-
#line 237 "char_ref.c"
|
206
|
+
#line 238 "char_ref.c"
|
237
207
|
static const short _char_ref_actions[] = {
|
238
208
|
0, 1, 0, 1, 1, 1, 2, 1,
|
239
209
|
3, 1, 4, 1, 5, 1, 6, 1,
|
@@ -13960,17 +13930,15 @@ static const short _char_ref_eof_trans[] = {
|
|
13960
13930
|
};
|
13961
13931
|
|
13962
13932
|
static const int char_ref_start = 7623;
|
13963
|
-
static const int char_ref_first_final = 7623;
|
13964
|
-
static const int char_ref_error = 0;
|
13965
13933
|
|
13966
13934
|
static const int char_ref_en_valid_named_ref = 7623;
|
13967
13935
|
|
13968
13936
|
|
13969
|
-
#line
|
13937
|
+
#line 2469 "char_ref.rl"
|
13938
|
+
// clang-format on
|
13970
13939
|
|
13971
|
-
static bool consume_named_ref(
|
13972
|
-
|
13973
|
-
OneOrTwoCodepoints* output) {
|
13940
|
+
static bool consume_named_ref(struct GumboInternalParser* parser,
|
13941
|
+
Utf8Iterator* input, bool is_in_attribute, OneOrTwoCodepoints* output) {
|
13974
13942
|
assert(output->first == kGumboNoChar);
|
13975
13943
|
const char* p = utf8iterator_get_char_pointer(input);
|
13976
13944
|
const char* pe = utf8iterator_get_end_pointer(input);
|
@@ -13979,8 +13947,9 @@ static bool consume_named_ref(
|
|
13979
13947
|
const char *ts, *start;
|
13980
13948
|
int cs, act;
|
13981
13949
|
|
13950
|
+
// clang-format off
|
13982
13951
|
|
13983
|
-
#line
|
13952
|
+
#line 13985 "char_ref.c"
|
13984
13953
|
{
|
13985
13954
|
cs = char_ref_start;
|
13986
13955
|
ts = 0;
|
@@ -13988,14 +13957,15 @@ static bool consume_named_ref(
|
|
13988
13957
|
act = 0;
|
13989
13958
|
}
|
13990
13959
|
|
13991
|
-
#line
|
13960
|
+
#line 2484 "char_ref.rl"
|
13992
13961
|
// Avoid unused variable warnings.
|
13993
13962
|
(void) act;
|
13994
13963
|
(void) ts;
|
13964
|
+
(void) char_ref_en_valid_named_ref;
|
13995
13965
|
|
13996
13966
|
start = p;
|
13997
13967
|
|
13998
|
-
#line
|
13968
|
+
#line 14001 "char_ref.c"
|
13999
13969
|
{
|
14000
13970
|
int _slen;
|
14001
13971
|
int _trans;
|
@@ -14017,7 +13987,7 @@ _resume:
|
|
14017
13987
|
#line 1 "NONE"
|
14018
13988
|
{ts = p;}
|
14019
13989
|
break;
|
14020
|
-
#line
|
13990
|
+
#line 14023 "char_ref.c"
|
14021
13991
|
}
|
14022
13992
|
}
|
14023
13993
|
|
@@ -23000,7 +22970,7 @@ _eof_trans:
|
|
23000
22970
|
#line 2273 "char_ref.rl"
|
23001
22971
|
{{p = ((te))-1;}{ output->first = 0xd7; {p++; goto _out; } }}
|
23002
22972
|
break;
|
23003
|
-
#line
|
22973
|
+
#line 23006 "char_ref.c"
|
23004
22974
|
}
|
23005
22975
|
}
|
23006
22976
|
|
@@ -23013,7 +22983,7 @@ _again:
|
|
23013
22983
|
#line 1 "NONE"
|
23014
22984
|
{ts = 0;}
|
23015
22985
|
break;
|
23016
|
-
#line
|
22986
|
+
#line 23019 "char_ref.c"
|
23017
22987
|
}
|
23018
22988
|
}
|
23019
22989
|
|
@@ -23033,7 +23003,8 @@ _again:
|
|
23033
23003
|
_out: {}
|
23034
23004
|
}
|
23035
23005
|
|
23036
|
-
#line
|
23006
|
+
#line 2491 "char_ref.rl"
|
23007
|
+
// clang-format on
|
23037
23008
|
|
23038
23009
|
if (cs >= 7623) {
|
23039
23010
|
assert(output->first != kGumboNoChar);
|
@@ -23067,10 +23038,9 @@ _again:
|
|
23067
23038
|
}
|
23068
23039
|
}
|
23069
23040
|
|
23070
|
-
bool consume_char_ref(
|
23071
|
-
struct
|
23072
|
-
|
23073
|
-
OneOrTwoCodepoints* output) {
|
23041
|
+
bool consume_char_ref(struct GumboInternalParser* parser,
|
23042
|
+
struct GumboInternalUtf8Iterator* input, int additional_allowed_char,
|
23043
|
+
bool is_in_attribute, OneOrTwoCodepoints* output) {
|
23074
23044
|
utf8iterator_mark(input);
|
23075
23045
|
utf8iterator_next(input);
|
23076
23046
|
int c = utf8iterator_current(input);
|
data/gumbo-parser/src/char_ref.h
CHANGED
@@ -49,10 +49,9 @@ typedef struct {
|
|
49
49
|
// errors to the GumboParser's errors vector, if the spec calls for it. Pass a
|
50
50
|
// space for the "additional allowed char" when the spec says "with no
|
51
51
|
// additional allowed char". Returns false on parse error, true otherwise.
|
52
|
-
bool consume_char_ref(
|
53
|
-
struct
|
54
|
-
|
55
|
-
OneOrTwoCodepoints* output);
|
52
|
+
bool consume_char_ref(struct GumboInternalParser* parser,
|
53
|
+
struct GumboInternalUtf8Iterator* input, int additional_allowed_char,
|
54
|
+
bool is_in_attribute, OneOrTwoCodepoints* output);
|
56
55
|
|
57
56
|
#ifdef __cplusplus
|
58
57
|
}
|
@@ -2464,7 +2464,9 @@ valid_named_ref := |*
|
|
2464
2464
|
*|;
|
2465
2465
|
}%%
|
2466
2466
|
|
2467
|
-
|
2467
|
+
// clang-format off
|
2468
|
+
%% write data noerror nofinal;
|
2469
|
+
// clang-format on
|
2468
2470
|
|
2469
2471
|
static bool consume_named_ref(
|
2470
2472
|
struct GumboInternalParser* parser, Utf8Iterator* input, bool is_in_attribute,
|
@@ -2477,13 +2479,16 @@ static bool consume_named_ref(
|
|
2477
2479
|
const char *ts, *start;
|
2478
2480
|
int cs, act;
|
2479
2481
|
|
2482
|
+
// clang-format off
|
2480
2483
|
%% write init;
|
2481
2484
|
// Avoid unused variable warnings.
|
2482
2485
|
(void) act;
|
2483
2486
|
(void) ts;
|
2487
|
+
(void) char_ref_en_valid_named_ref;
|
2484
2488
|
|
2485
2489
|
start = p;
|
2486
2490
|
%% write exec;
|
2491
|
+
// clang-format on
|
2487
2492
|
|
2488
2493
|
if (cs >= %%{ write first_final; }%%) {
|
2489
2494
|
assert(output->first != kGumboNoChar);
|
data/gumbo-parser/src/error.c
CHANGED
@@ -27,18 +27,17 @@
|
|
27
27
|
#include "util.h"
|
28
28
|
#include "vector.h"
|
29
29
|
|
30
|
-
static const size_t kMessageBufferSize = 256;
|
31
|
-
|
32
30
|
// Prints a formatted message to a StringBuffer. This automatically resizes the
|
33
31
|
// StringBuffer as necessary to fit the message. Returns the number of bytes
|
34
32
|
// written.
|
35
|
-
static int print_message(
|
36
|
-
|
33
|
+
static int print_message(
|
34
|
+
GumboParser* parser, GumboStringBuffer* output, const char* format, ...) {
|
37
35
|
va_list args;
|
38
|
-
va_start(args, format);
|
39
36
|
int remaining_capacity = output->capacity - output->length;
|
40
|
-
|
41
|
-
|
37
|
+
va_start(args, format);
|
38
|
+
int bytes_written = vsnprintf(
|
39
|
+
output->data + output->length, remaining_capacity, format, args);
|
40
|
+
va_end(args);
|
42
41
|
#ifdef _MSC_VER
|
43
42
|
if (bytes_written == -1) {
|
44
43
|
// vsnprintf returns -1 on MSVC++ if there's not enough capacity, instead of
|
@@ -47,15 +46,15 @@ static int print_message(GumboParser* parser, GumboStringBuffer* output,
|
|
47
46
|
// we retry (letting it fail and returning 0 if it doesn't), since there's
|
48
47
|
// no way to smartly resize the buffer.
|
49
48
|
gumbo_string_buffer_reserve(parser, output->capacity * 2, output);
|
50
|
-
|
51
|
-
|
49
|
+
va_start(args, format);
|
50
|
+
int result = vsnprintf(
|
51
|
+
output->data + output->length, remaining_capacity, format, args);
|
52
52
|
va_end(args);
|
53
53
|
return result == -1 ? 0 : result;
|
54
54
|
}
|
55
55
|
#else
|
56
56
|
// -1 in standard C99 indicates an encoding error. Return 0 and do nothing.
|
57
57
|
if (bytes_written == -1) {
|
58
|
-
va_end(args);
|
59
58
|
return 0;
|
60
59
|
}
|
61
60
|
#endif
|
@@ -64,19 +63,19 @@ static int print_message(GumboParser* parser, GumboStringBuffer* output,
|
|
64
63
|
gumbo_string_buffer_reserve(
|
65
64
|
parser, output->capacity + bytes_written, output);
|
66
65
|
remaining_capacity = output->capacity - output->length;
|
67
|
-
|
68
|
-
|
66
|
+
va_start(args, format);
|
67
|
+
bytes_written = vsnprintf(
|
68
|
+
output->data + output->length, remaining_capacity, format, args);
|
69
|
+
va_end(args);
|
69
70
|
}
|
70
71
|
output->length += bytes_written;
|
71
|
-
va_end(args);
|
72
72
|
return bytes_written;
|
73
73
|
}
|
74
74
|
|
75
|
-
static void print_tag_stack(
|
76
|
-
GumboParser* parser, const GumboParserError* error,
|
75
|
+
static void print_tag_stack(GumboParser* parser, const GumboParserError* error,
|
77
76
|
GumboStringBuffer* output) {
|
78
77
|
print_message(parser, output, " Currently open tags: ");
|
79
|
-
for (int i = 0; i < error->tag_stack.length; ++i) {
|
78
|
+
for (unsigned int i = 0; i < error->tag_stack.length; ++i) {
|
80
79
|
if (i) {
|
81
80
|
print_message(parser, output, ", ");
|
82
81
|
}
|
@@ -87,12 +86,11 @@ static void print_tag_stack(
|
|
87
86
|
}
|
88
87
|
|
89
88
|
static void handle_parser_error(GumboParser* parser,
|
90
|
-
|
91
|
-
GumboStringBuffer* output) {
|
89
|
+
const GumboParserError* error, GumboStringBuffer* output) {
|
92
90
|
if (error->parser_state == GUMBO_INSERTION_MODE_INITIAL &&
|
93
91
|
error->input_type != GUMBO_TOKEN_DOCTYPE) {
|
94
|
-
print_message(
|
95
|
-
|
92
|
+
print_message(
|
93
|
+
parser, output, "The doctype must be the first token in the document");
|
96
94
|
return;
|
97
95
|
}
|
98
96
|
|
@@ -151,13 +149,14 @@ static const char* find_last_newline(
|
|
151
149
|
static const char* find_next_newline(
|
152
150
|
const char* original_text, const char* error_location) {
|
153
151
|
const char* c = error_location;
|
154
|
-
for (; *c && *c != '\n'; ++c)
|
152
|
+
for (; *c && *c != '\n'; ++c)
|
153
|
+
;
|
155
154
|
return c;
|
156
155
|
}
|
157
156
|
|
158
157
|
GumboError* gumbo_add_error(GumboParser* parser) {
|
159
158
|
int max_errors = parser->_options->max_errors;
|
160
|
-
if (max_errors >= 0 && parser->_output->errors.length >= max_errors) {
|
159
|
+
if (max_errors >= 0 && parser->_output->errors.length >= (unsigned int) max_errors) {
|
161
160
|
return NULL;
|
162
161
|
}
|
163
162
|
GumboError* error = gumbo_parser_allocate(parser, sizeof(GumboError));
|
@@ -167,50 +166,52 @@ GumboError* gumbo_add_error(GumboParser* parser) {
|
|
167
166
|
|
168
167
|
void gumbo_error_to_string(
|
169
168
|
GumboParser* parser, const GumboError* error, GumboStringBuffer* output) {
|
170
|
-
print_message(
|
171
|
-
|
169
|
+
print_message(
|
170
|
+
parser, output, "@%d:%d: ", error->position.line, error->position.column);
|
172
171
|
switch (error->type) {
|
173
172
|
case GUMBO_ERR_UTF8_INVALID:
|
174
|
-
print_message(
|
175
|
-
|
173
|
+
print_message(
|
174
|
+
parser, output, "Invalid UTF8 character 0x%x", error->v.codepoint);
|
176
175
|
break;
|
177
176
|
case GUMBO_ERR_UTF8_TRUNCATED:
|
178
177
|
print_message(parser, output,
|
179
|
-
|
180
|
-
|
178
|
+
"Input stream ends with a truncated UTF8 character 0x%x",
|
179
|
+
error->v.codepoint);
|
181
180
|
break;
|
182
181
|
case GUMBO_ERR_NUMERIC_CHAR_REF_NO_DIGITS:
|
183
|
-
print_message(
|
184
|
-
|
182
|
+
print_message(
|
183
|
+
parser, output, "No digits after &# in numeric character reference");
|
185
184
|
break;
|
186
185
|
case GUMBO_ERR_NUMERIC_CHAR_REF_WITHOUT_SEMICOLON:
|
187
186
|
print_message(parser, output,
|
188
|
-
|
189
|
-
|
187
|
+
"The numeric character reference &#%d should be followed "
|
188
|
+
"by a semicolon",
|
189
|
+
error->v.codepoint);
|
190
190
|
break;
|
191
191
|
case GUMBO_ERR_NUMERIC_CHAR_REF_INVALID:
|
192
192
|
print_message(parser, output,
|
193
|
-
|
194
|
-
|
193
|
+
"The numeric character reference &#%d; encodes an invalid "
|
194
|
+
"unicode codepoint",
|
195
|
+
error->v.codepoint);
|
195
196
|
break;
|
196
197
|
case GUMBO_ERR_NAMED_CHAR_REF_WITHOUT_SEMICOLON:
|
197
198
|
// The textual data came from one of the literal strings in the table, and
|
198
199
|
// so it'll be null-terminated.
|
199
200
|
print_message(parser, output,
|
200
|
-
|
201
|
-
|
201
|
+
"The named character reference &%.*s should be followed by a "
|
202
|
+
"semicolon",
|
203
|
+
(int) error->v.text.length, error->v.text.data);
|
202
204
|
break;
|
203
205
|
case GUMBO_ERR_NAMED_CHAR_REF_INVALID:
|
204
206
|
print_message(parser, output,
|
205
|
-
|
206
|
-
|
207
|
+
"The named character reference &%.*s; is not a valid entity name",
|
208
|
+
(int) error->v.text.length, error->v.text.data);
|
207
209
|
break;
|
208
210
|
case GUMBO_ERR_DUPLICATE_ATTR:
|
209
211
|
print_message(parser, output,
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
error->v.duplicate_attr.new_index);
|
212
|
+
"Attribute %s occurs multiple times, at positions %d and %d",
|
213
|
+
error->v.duplicate_attr.name, error->v.duplicate_attr.original_index,
|
214
|
+
error->v.duplicate_attr.new_index);
|
214
215
|
break;
|
215
216
|
case GUMBO_ERR_PARSER:
|
216
217
|
case GUMBO_ERR_UNACKNOWLEDGED_SELF_CLOSING_TAG:
|
@@ -218,21 +219,19 @@ void gumbo_error_to_string(
|
|
218
219
|
break;
|
219
220
|
default:
|
220
221
|
print_message(parser, output,
|
221
|
-
|
222
|
+
"Tokenizer error with an unimplemented error message");
|
222
223
|
break;
|
223
224
|
}
|
224
225
|
gumbo_string_buffer_append_codepoint(parser, '.', output);
|
225
226
|
}
|
226
227
|
|
227
|
-
void gumbo_caret_diagnostic_to_string(
|
228
|
-
|
229
|
-
|
228
|
+
void gumbo_caret_diagnostic_to_string(GumboParser* parser,
|
229
|
+
const GumboError* error, const char* source_text,
|
230
|
+
GumboStringBuffer* output) {
|
230
231
|
gumbo_error_to_string(parser, error, output);
|
231
232
|
|
232
|
-
const char* line_start =
|
233
|
-
|
234
|
-
const char* line_end =
|
235
|
-
find_next_newline(source_text, error->original_text);
|
233
|
+
const char* line_start = find_last_newline(source_text, error->original_text);
|
234
|
+
const char* line_end = find_next_newline(source_text, error->original_text);
|
236
235
|
GumboStringPiece original_line;
|
237
236
|
original_line.data = line_start;
|
238
237
|
original_line.length = line_end - line_start;
|
@@ -273,7 +272,7 @@ void gumbo_init_errors(GumboParser* parser) {
|
|
273
272
|
}
|
274
273
|
|
275
274
|
void gumbo_destroy_errors(GumboParser* parser) {
|
276
|
-
for (int i = 0; i < parser->_output->errors.length; ++i) {
|
275
|
+
for (unsigned int i = 0; i < parser->_output->errors.length; ++i) {
|
277
276
|
gumbo_error_destroy(parser, parser->_output->errors.data[i]);
|
278
277
|
}
|
279
278
|
gumbo_vector_destroy(parser, &parser->_output->errors);
|