nokogumbo 1.4.2 → 1.4.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: cf20dd502d8ec6022f2c72193bb0c9a908251088
4
- data.tar.gz: 326f85766d0e4f97683f5df026f08f4dc33806e8
3
+ metadata.gz: 2ea9a9bee29cbf479c7afab79a4df38040ed60e7
4
+ data.tar.gz: f7e87af169388de1f0b32fd457e73182169bc503
5
5
  SHA512:
6
- metadata.gz: 800800652a5260bf54399e8cca1fc6e63f7ef53aea489245c5315b6e955b38aa4dfc6d7272b99898ab78150464640ac14c995aa38b9c77644dab5d73fc0e46a5
7
- data.tar.gz: 18ba647671103cfc2853a88935fe91eb965d1e6fbe1aad981438297a5035ec222b5ae6c5ed3ef127429c8b58edd02a6a5a877ba7e7ec3390d05779f7420f1521
6
+ metadata.gz: 6413991d638e6bacc465442546c7ca7756fb0cd8f577ccb9b65d60da87c009b7b2d212bb3fe928b7bbde97ba66f9180d8c5e3efdd02648e33e5d7546b452476f
7
+ data.tar.gz: 4a3486ed36ab147b828e9f79244c36a2d33b2a8dccc61fa8d6abf75556ebb76313c7d009c2502bc3142e38644759c1e6c6bab5d1c1b21b346bf16702802650e4
@@ -27,7 +27,7 @@ struct GumboInternalParser;
27
27
 
28
28
  GumboAttribute* gumbo_get_attribute(
29
29
  const GumboVector* attributes, const char* name) {
30
- for (int i = 0; i < attributes->length; ++i) {
30
+ for (unsigned int i = 0; i < attributes->length; ++i) {
31
31
  GumboAttribute* attr = attributes->data[i];
32
32
  if (!strcasecmp(attr->name, name)) {
33
33
  return attr;
@@ -30,7 +30,7 @@
30
30
  #include <ctype.h>
31
31
  #include <stddef.h>
32
32
  #include <stdio.h>
33
- #include <string.h> // Only for debug assertions at present.
33
+ #include <string.h> // Only for debug assertions at present.
34
34
 
35
35
  #include "error.h"
36
36
  #include "string_piece.h"
@@ -49,44 +49,18 @@ typedef struct {
49
49
  int to_char;
50
50
  } CharReplacement;
51
51
 
52
- static const CharReplacement kCharReplacements[] = {
53
- { 0x00, 0xfffd },
54
- { 0x0d, 0x000d },
55
- { 0x80, 0x20ac },
56
- { 0x81, 0x0081 },
57
- { 0x82, 0x201A },
58
- { 0x83, 0x0192 },
59
- { 0x84, 0x201E },
60
- { 0x85, 0x2026 },
61
- { 0x86, 0x2020 },
62
- { 0x87, 0x2021 },
63
- { 0x88, 0x02C6 },
64
- { 0x89, 0x2030 },
65
- { 0x8A, 0x0160 },
66
- { 0x8B, 0x2039 },
67
- { 0x8C, 0x0152 },
68
- { 0x8D, 0x008D },
69
- { 0x8E, 0x017D },
70
- { 0x8F, 0x008F },
71
- { 0x90, 0x0090 },
72
- { 0x91, 0x2018 },
73
- { 0x92, 0x2019 },
74
- { 0x93, 0x201C },
75
- { 0x94, 0x201D },
76
- { 0x95, 0x2022 },
77
- { 0x96, 0x2013 },
78
- { 0x97, 0x2014 },
79
- { 0x98, 0x02DC },
80
- { 0x99, 0x2122 },
81
- { 0x9A, 0x0161 },
82
- { 0x9B, 0x203A },
83
- { 0x9C, 0x0153 },
84
- { 0x9D, 0x009D },
85
- { 0x9E, 0x017E },
86
- { 0x9F, 0x0178 },
87
- // Terminator.
88
- { -1, -1 }
89
- };
52
+ static const CharReplacement kCharReplacements[] = {{0x00, 0xfffd},
53
+ {0x0d, 0x000d}, {0x80, 0x20ac}, {0x81, 0x0081}, {0x82, 0x201A},
54
+ {0x83, 0x0192}, {0x84, 0x201E}, {0x85, 0x2026}, {0x86, 0x2020},
55
+ {0x87, 0x2021}, {0x88, 0x02C6}, {0x89, 0x2030}, {0x8A, 0x0160},
56
+ {0x8B, 0x2039}, {0x8C, 0x0152}, {0x8D, 0x008D}, {0x8E, 0x017D},
57
+ {0x8F, 0x008F}, {0x90, 0x0090}, {0x91, 0x2018}, {0x92, 0x2019},
58
+ {0x93, 0x201C}, {0x94, 0x201D}, {0x95, 0x2022}, {0x96, 0x2013},
59
+ {0x97, 0x2014}, {0x98, 0x02DC}, {0x99, 0x2122}, {0x9A, 0x0161},
60
+ {0x9B, 0x203A}, {0x9C, 0x0153}, {0x9D, 0x009D}, {0x9E, 0x017E},
61
+ {0x9F, 0x0178},
62
+ // Terminator.
63
+ {-1, -1}};
90
64
 
91
65
  static int parse_digit(int c, bool allow_hex) {
92
66
  if (c >= '0' && c <= '9') {
@@ -111,9 +85,8 @@ static void add_no_digit_error(
111
85
  error->type = GUMBO_ERR_NUMERIC_CHAR_REF_NO_DIGITS;
112
86
  }
113
87
 
114
- static void add_codepoint_error(
115
- struct GumboInternalParser* parser, Utf8Iterator* input,
116
- GumboErrorType type, int codepoint) {
88
+ static void add_codepoint_error(struct GumboInternalParser* parser,
89
+ Utf8Iterator* input, GumboErrorType type, int codepoint) {
117
90
  GumboError* error = gumbo_add_error(parser);
118
91
  if (!error) {
119
92
  return;
@@ -123,9 +96,8 @@ static void add_codepoint_error(
123
96
  error->v.codepoint = codepoint;
124
97
  }
125
98
 
126
- static void add_named_reference_error(
127
- struct GumboInternalParser* parser, Utf8Iterator* input,
128
- GumboErrorType type, GumboStringPiece text) {
99
+ static void add_named_reference_error(struct GumboInternalParser* parser,
100
+ Utf8Iterator* input, GumboErrorType type, GumboStringPiece text) {
129
101
  GumboError* error = gumbo_add_error(parser);
130
102
  if (!error) {
131
103
  return;
@@ -211,8 +183,7 @@ static bool maybe_add_invalid_named_reference(
211
183
  // worry about consuming characters.
212
184
  const char* start = utf8iterator_get_char_pointer(input);
213
185
  int c = utf8iterator_current(input);
214
- while ((c >= 'a' && c <= 'z') ||
215
- (c >= 'A' && c <= 'Z') ||
186
+ while ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') ||
216
187
  (c >= '0' && c <= '9')) {
217
188
  utf8iterator_next(input);
218
189
  c = utf8iterator_current(input);
@@ -228,12 +199,11 @@ static bool maybe_add_invalid_named_reference(
228
199
  return true;
229
200
  }
230
201
 
231
-
232
202
  #line 2465 "char_ref.rl"
233
203
 
204
+ // clang-format off
234
205
 
235
-
236
- #line 237 "char_ref.c"
206
+ #line 238 "char_ref.c"
237
207
  static const short _char_ref_actions[] = {
238
208
  0, 1, 0, 1, 1, 1, 2, 1,
239
209
  3, 1, 4, 1, 5, 1, 6, 1,
@@ -13960,17 +13930,15 @@ static const short _char_ref_eof_trans[] = {
13960
13930
  };
13961
13931
 
13962
13932
  static const int char_ref_start = 7623;
13963
- static const int char_ref_first_final = 7623;
13964
- static const int char_ref_error = 0;
13965
13933
 
13966
13934
  static const int char_ref_en_valid_named_ref = 7623;
13967
13935
 
13968
13936
 
13969
- #line 2468 "char_ref.rl"
13937
+ #line 2469 "char_ref.rl"
13938
+ // clang-format on
13970
13939
 
13971
- static bool consume_named_ref(
13972
- struct GumboInternalParser* parser, Utf8Iterator* input, bool is_in_attribute,
13973
- OneOrTwoCodepoints* output) {
13940
+ static bool consume_named_ref(struct GumboInternalParser* parser,
13941
+ Utf8Iterator* input, bool is_in_attribute, OneOrTwoCodepoints* output) {
13974
13942
  assert(output->first == kGumboNoChar);
13975
13943
  const char* p = utf8iterator_get_char_pointer(input);
13976
13944
  const char* pe = utf8iterator_get_end_pointer(input);
@@ -13979,8 +13947,9 @@ static bool consume_named_ref(
13979
13947
  const char *ts, *start;
13980
13948
  int cs, act;
13981
13949
 
13950
+ // clang-format off
13982
13951
 
13983
- #line 13984 "char_ref.c"
13952
+ #line 13985 "char_ref.c"
13984
13953
  {
13985
13954
  cs = char_ref_start;
13986
13955
  ts = 0;
@@ -13988,14 +13957,15 @@ static bool consume_named_ref(
13988
13957
  act = 0;
13989
13958
  }
13990
13959
 
13991
- #line 2481 "char_ref.rl"
13960
+ #line 2484 "char_ref.rl"
13992
13961
  // Avoid unused variable warnings.
13993
13962
  (void) act;
13994
13963
  (void) ts;
13964
+ (void) char_ref_en_valid_named_ref;
13995
13965
 
13996
13966
  start = p;
13997
13967
 
13998
- #line 13999 "char_ref.c"
13968
+ #line 14001 "char_ref.c"
13999
13969
  {
14000
13970
  int _slen;
14001
13971
  int _trans;
@@ -14017,7 +13987,7 @@ _resume:
14017
13987
  #line 1 "NONE"
14018
13988
  {ts = p;}
14019
13989
  break;
14020
- #line 14021 "char_ref.c"
13990
+ #line 14023 "char_ref.c"
14021
13991
  }
14022
13992
  }
14023
13993
 
@@ -23000,7 +22970,7 @@ _eof_trans:
23000
22970
  #line 2273 "char_ref.rl"
23001
22971
  {{p = ((te))-1;}{ output->first = 0xd7; {p++; goto _out; } }}
23002
22972
  break;
23003
- #line 23004 "char_ref.c"
22973
+ #line 23006 "char_ref.c"
23004
22974
  }
23005
22975
  }
23006
22976
 
@@ -23013,7 +22983,7 @@ _again:
23013
22983
  #line 1 "NONE"
23014
22984
  {ts = 0;}
23015
22985
  break;
23016
- #line 23017 "char_ref.c"
22986
+ #line 23019 "char_ref.c"
23017
22987
  }
23018
22988
  }
23019
22989
 
@@ -23033,7 +23003,8 @@ _again:
23033
23003
  _out: {}
23034
23004
  }
23035
23005
 
23036
- #line 2487 "char_ref.rl"
23006
+ #line 2491 "char_ref.rl"
23007
+ // clang-format on
23037
23008
 
23038
23009
  if (cs >= 7623) {
23039
23010
  assert(output->first != kGumboNoChar);
@@ -23067,10 +23038,9 @@ _again:
23067
23038
  }
23068
23039
  }
23069
23040
 
23070
- bool consume_char_ref(
23071
- struct GumboInternalParser* parser, struct GumboInternalUtf8Iterator* input,
23072
- int additional_allowed_char, bool is_in_attribute,
23073
- OneOrTwoCodepoints* output) {
23041
+ bool consume_char_ref(struct GumboInternalParser* parser,
23042
+ struct GumboInternalUtf8Iterator* input, int additional_allowed_char,
23043
+ bool is_in_attribute, OneOrTwoCodepoints* output) {
23074
23044
  utf8iterator_mark(input);
23075
23045
  utf8iterator_next(input);
23076
23046
  int c = utf8iterator_current(input);
@@ -49,10 +49,9 @@ typedef struct {
49
49
  // errors to the GumboParser's errors vector, if the spec calls for it. Pass a
50
50
  // space for the "additional allowed char" when the spec says "with no
51
51
  // additional allowed char". Returns false on parse error, true otherwise.
52
- bool consume_char_ref(
53
- struct GumboInternalParser* parser, struct GumboInternalUtf8Iterator* input,
54
- int additional_allowed_char, bool is_in_attribute,
55
- OneOrTwoCodepoints* output);
52
+ bool consume_char_ref(struct GumboInternalParser* parser,
53
+ struct GumboInternalUtf8Iterator* input, int additional_allowed_char,
54
+ bool is_in_attribute, OneOrTwoCodepoints* output);
56
55
 
57
56
  #ifdef __cplusplus
58
57
  }
@@ -2464,7 +2464,9 @@ valid_named_ref := |*
2464
2464
  *|;
2465
2465
  }%%
2466
2466
 
2467
- %% write data;
2467
+ // clang-format off
2468
+ %% write data noerror nofinal;
2469
+ // clang-format on
2468
2470
 
2469
2471
  static bool consume_named_ref(
2470
2472
  struct GumboInternalParser* parser, Utf8Iterator* input, bool is_in_attribute,
@@ -2477,13 +2479,16 @@ static bool consume_named_ref(
2477
2479
  const char *ts, *start;
2478
2480
  int cs, act;
2479
2481
 
2482
+ // clang-format off
2480
2483
  %% write init;
2481
2484
  // Avoid unused variable warnings.
2482
2485
  (void) act;
2483
2486
  (void) ts;
2487
+ (void) char_ref_en_valid_named_ref;
2484
2488
 
2485
2489
  start = p;
2486
2490
  %% write exec;
2491
+ // clang-format on
2487
2492
 
2488
2493
  if (cs >= %%{ write first_final; }%%) {
2489
2494
  assert(output->first != kGumboNoChar);
@@ -27,18 +27,17 @@
27
27
  #include "util.h"
28
28
  #include "vector.h"
29
29
 
30
- static const size_t kMessageBufferSize = 256;
31
-
32
30
  // Prints a formatted message to a StringBuffer. This automatically resizes the
33
31
  // StringBuffer as necessary to fit the message. Returns the number of bytes
34
32
  // written.
35
- static int print_message(GumboParser* parser, GumboStringBuffer* output,
36
- const char* format, ...) {
33
+ static int print_message(
34
+ GumboParser* parser, GumboStringBuffer* output, const char* format, ...) {
37
35
  va_list args;
38
- va_start(args, format);
39
36
  int remaining_capacity = output->capacity - output->length;
40
- int bytes_written = vsnprintf(output->data + output->length,
41
- remaining_capacity, format, args);
37
+ va_start(args, format);
38
+ int bytes_written = vsnprintf(
39
+ output->data + output->length, remaining_capacity, format, args);
40
+ va_end(args);
42
41
  #ifdef _MSC_VER
43
42
  if (bytes_written == -1) {
44
43
  // vsnprintf returns -1 on MSVC++ if there's not enough capacity, instead of
@@ -47,15 +46,15 @@ static int print_message(GumboParser* parser, GumboStringBuffer* output,
47
46
  // we retry (letting it fail and returning 0 if it doesn't), since there's
48
47
  // no way to smartly resize the buffer.
49
48
  gumbo_string_buffer_reserve(parser, output->capacity * 2, output);
50
- int result = vsnprintf(output->data + output->length,
51
- remaining_capacity, format, args);
49
+ va_start(args, format);
50
+ int result = vsnprintf(
51
+ output->data + output->length, remaining_capacity, format, args);
52
52
  va_end(args);
53
53
  return result == -1 ? 0 : result;
54
54
  }
55
55
  #else
56
56
  // -1 in standard C99 indicates an encoding error. Return 0 and do nothing.
57
57
  if (bytes_written == -1) {
58
- va_end(args);
59
58
  return 0;
60
59
  }
61
60
  #endif
@@ -64,19 +63,19 @@ static int print_message(GumboParser* parser, GumboStringBuffer* output,
64
63
  gumbo_string_buffer_reserve(
65
64
  parser, output->capacity + bytes_written, output);
66
65
  remaining_capacity = output->capacity - output->length;
67
- bytes_written = vsnprintf(output->data + output->length,
68
- remaining_capacity, format, args);
66
+ va_start(args, format);
67
+ bytes_written = vsnprintf(
68
+ output->data + output->length, remaining_capacity, format, args);
69
+ va_end(args);
69
70
  }
70
71
  output->length += bytes_written;
71
- va_end(args);
72
72
  return bytes_written;
73
73
  }
74
74
 
75
- static void print_tag_stack(
76
- GumboParser* parser, const GumboParserError* error,
75
+ static void print_tag_stack(GumboParser* parser, const GumboParserError* error,
77
76
  GumboStringBuffer* output) {
78
77
  print_message(parser, output, " Currently open tags: ");
79
- for (int i = 0; i < error->tag_stack.length; ++i) {
78
+ for (unsigned int i = 0; i < error->tag_stack.length; ++i) {
80
79
  if (i) {
81
80
  print_message(parser, output, ", ");
82
81
  }
@@ -87,12 +86,11 @@ static void print_tag_stack(
87
86
  }
88
87
 
89
88
  static void handle_parser_error(GumboParser* parser,
90
- const GumboParserError* error,
91
- GumboStringBuffer* output) {
89
+ const GumboParserError* error, GumboStringBuffer* output) {
92
90
  if (error->parser_state == GUMBO_INSERTION_MODE_INITIAL &&
93
91
  error->input_type != GUMBO_TOKEN_DOCTYPE) {
94
- print_message(parser, output,
95
- "The doctype must be the first token in the document");
92
+ print_message(
93
+ parser, output, "The doctype must be the first token in the document");
96
94
  return;
97
95
  }
98
96
 
@@ -151,13 +149,14 @@ static const char* find_last_newline(
151
149
  static const char* find_next_newline(
152
150
  const char* original_text, const char* error_location) {
153
151
  const char* c = error_location;
154
- for (; *c && *c != '\n'; ++c);
152
+ for (; *c && *c != '\n'; ++c)
153
+ ;
155
154
  return c;
156
155
  }
157
156
 
158
157
  GumboError* gumbo_add_error(GumboParser* parser) {
159
158
  int max_errors = parser->_options->max_errors;
160
- if (max_errors >= 0 && parser->_output->errors.length >= max_errors) {
159
+ if (max_errors >= 0 && parser->_output->errors.length >= (unsigned int) max_errors) {
161
160
  return NULL;
162
161
  }
163
162
  GumboError* error = gumbo_parser_allocate(parser, sizeof(GumboError));
@@ -167,50 +166,52 @@ GumboError* gumbo_add_error(GumboParser* parser) {
167
166
 
168
167
  void gumbo_error_to_string(
169
168
  GumboParser* parser, const GumboError* error, GumboStringBuffer* output) {
170
- print_message(parser, output, "@%d:%d: ",
171
- error->position.line, error->position.column);
169
+ print_message(
170
+ parser, output, "@%d:%d: ", error->position.line, error->position.column);
172
171
  switch (error->type) {
173
172
  case GUMBO_ERR_UTF8_INVALID:
174
- print_message(parser, output, "Invalid UTF8 character 0x%x",
175
- error->v.codepoint);
173
+ print_message(
174
+ parser, output, "Invalid UTF8 character 0x%x", error->v.codepoint);
176
175
  break;
177
176
  case GUMBO_ERR_UTF8_TRUNCATED:
178
177
  print_message(parser, output,
179
- "Input stream ends with a truncated UTF8 character 0x%x",
180
- error->v.codepoint);
178
+ "Input stream ends with a truncated UTF8 character 0x%x",
179
+ error->v.codepoint);
181
180
  break;
182
181
  case GUMBO_ERR_NUMERIC_CHAR_REF_NO_DIGITS:
183
- print_message(parser, output,
184
- "No digits after &# in numeric character reference");
182
+ print_message(
183
+ parser, output, "No digits after &# in numeric character reference");
185
184
  break;
186
185
  case GUMBO_ERR_NUMERIC_CHAR_REF_WITHOUT_SEMICOLON:
187
186
  print_message(parser, output,
188
- "The numeric character reference &#%d should be followed "
189
- "by a semicolon", error->v.codepoint);
187
+ "The numeric character reference &#%d should be followed "
188
+ "by a semicolon",
189
+ error->v.codepoint);
190
190
  break;
191
191
  case GUMBO_ERR_NUMERIC_CHAR_REF_INVALID:
192
192
  print_message(parser, output,
193
- "The numeric character reference &#%d; encodes an invalid "
194
- "unicode codepoint", error->v.codepoint);
193
+ "The numeric character reference &#%d; encodes an invalid "
194
+ "unicode codepoint",
195
+ error->v.codepoint);
195
196
  break;
196
197
  case GUMBO_ERR_NAMED_CHAR_REF_WITHOUT_SEMICOLON:
197
198
  // The textual data came from one of the literal strings in the table, and
198
199
  // so it'll be null-terminated.
199
200
  print_message(parser, output,
200
- "The named character reference &%.*s should be followed by a "
201
- "semicolon", (int) error->v.text.length, error->v.text.data);
201
+ "The named character reference &%.*s should be followed by a "
202
+ "semicolon",
203
+ (int) error->v.text.length, error->v.text.data);
202
204
  break;
203
205
  case GUMBO_ERR_NAMED_CHAR_REF_INVALID:
204
206
  print_message(parser, output,
205
- "The named character reference &%.*s; is not a valid entity name",
206
- (int) error->v.text.length, error->v.text.data);
207
+ "The named character reference &%.*s; is not a valid entity name",
208
+ (int) error->v.text.length, error->v.text.data);
207
209
  break;
208
210
  case GUMBO_ERR_DUPLICATE_ATTR:
209
211
  print_message(parser, output,
210
- "Attribute %s occurs multiple times, at positions %d and %d",
211
- error->v.duplicate_attr.name,
212
- error->v.duplicate_attr.original_index,
213
- error->v.duplicate_attr.new_index);
212
+ "Attribute %s occurs multiple times, at positions %d and %d",
213
+ error->v.duplicate_attr.name, error->v.duplicate_attr.original_index,
214
+ error->v.duplicate_attr.new_index);
214
215
  break;
215
216
  case GUMBO_ERR_PARSER:
216
217
  case GUMBO_ERR_UNACKNOWLEDGED_SELF_CLOSING_TAG:
@@ -218,21 +219,19 @@ void gumbo_error_to_string(
218
219
  break;
219
220
  default:
220
221
  print_message(parser, output,
221
- "Tokenizer error with an unimplemented error message");
222
+ "Tokenizer error with an unimplemented error message");
222
223
  break;
223
224
  }
224
225
  gumbo_string_buffer_append_codepoint(parser, '.', output);
225
226
  }
226
227
 
227
- void gumbo_caret_diagnostic_to_string(
228
- GumboParser* parser, const GumboError* error,
229
- const char* source_text, GumboStringBuffer* output) {
228
+ void gumbo_caret_diagnostic_to_string(GumboParser* parser,
229
+ const GumboError* error, const char* source_text,
230
+ GumboStringBuffer* output) {
230
231
  gumbo_error_to_string(parser, error, output);
231
232
 
232
- const char* line_start =
233
- find_last_newline(source_text, error->original_text);
234
- const char* line_end =
235
- find_next_newline(source_text, error->original_text);
233
+ const char* line_start = find_last_newline(source_text, error->original_text);
234
+ const char* line_end = find_next_newline(source_text, error->original_text);
236
235
  GumboStringPiece original_line;
237
236
  original_line.data = line_start;
238
237
  original_line.length = line_end - line_start;
@@ -273,7 +272,7 @@ void gumbo_init_errors(GumboParser* parser) {
273
272
  }
274
273
 
275
274
  void gumbo_destroy_errors(GumboParser* parser) {
276
- for (int i = 0; i < parser->_output->errors.length; ++i) {
275
+ for (unsigned int i = 0; i < parser->_output->errors.length; ++i) {
277
276
  gumbo_error_destroy(parser, parser->_output->errors.data[i]);
278
277
  }
279
278
  gumbo_vector_destroy(parser, &parser->_output->errors);