RubyGems - nokogumbo - Versions diffs - 1.4.2 → 1.4.3 - Mend

nokogumbo 1.4.2 → 1.4.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

checksums.yaml +4 -4
data/gumbo-parser/src/attribute.c +1 -1
data/gumbo-parser/src/char_ref.c +37 -67
data/gumbo-parser/src/char_ref.h +3 -4
data/gumbo-parser/src/char_ref.rl +6 -1
data/gumbo-parser/src/error.c +50 -51
data/gumbo-parser/src/error.h +7 -9
data/gumbo-parser/src/gumbo.h +45 -181
data/gumbo-parser/src/parser.c +1397 -989
data/gumbo-parser/src/string_buffer.c +14 -10
data/gumbo-parser/src/string_buffer.h +9 -6
data/gumbo-parser/src/string_piece.c +5 -6
data/gumbo-parser/src/string_piece.h +2 -3
data/gumbo-parser/src/tag.c +36 -166
data/gumbo-parser/src/tag.in +150 -0
data/gumbo-parser/src/tag_enum.h +153 -0
data/gumbo-parser/src/tag_gperf.h +105 -0
data/gumbo-parser/src/tag_sizes.h +4 -0
data/gumbo-parser/src/tag_strings.h +153 -0
data/gumbo-parser/src/tokenizer.c +264 -360
data/gumbo-parser/src/tokenizer.h +2 -2
data/gumbo-parser/src/utf8.c +44 -44
data/gumbo-parser/src/utf8.h +1 -2
data/gumbo-parser/src/util.c +1 -1
data/gumbo-parser/src/util.h +0 -2
data/gumbo-parser/src/vector.c +17 -17
data/gumbo-parser/src/vector.h +6 -8
metadata +8 -3

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: cf20dd502d8ec6022f2c72193bb0c9a908251088
-  data.tar.gz: 326f85766d0e4f97683f5df026f08f4dc33806e8
+  metadata.gz: 2ea9a9bee29cbf479c7afab79a4df38040ed60e7
+  data.tar.gz: f7e87af169388de1f0b32fd457e73182169bc503
 SHA512:
-  metadata.gz: 800800652a5260bf54399e8cca1fc6e63f7ef53aea489245c5315b6e955b38aa4dfc6d7272b99898ab78150464640ac14c995aa38b9c77644dab5d73fc0e46a5
-  data.tar.gz: 18ba647671103cfc2853a88935fe91eb965d1e6fbe1aad981438297a5035ec222b5ae6c5ed3ef127429c8b58edd02a6a5a877ba7e7ec3390d05779f7420f1521
+  metadata.gz: 6413991d638e6bacc465442546c7ca7756fb0cd8f577ccb9b65d60da87c009b7b2d212bb3fe928b7bbde97ba66f9180d8c5e3efdd02648e33e5d7546b452476f
+  data.tar.gz: 4a3486ed36ab147b828e9f79244c36a2d33b2a8dccc61fa8d6abf75556ebb76313c7d009c2502bc3142e38644759c1e6c6bab5d1c1b21b346bf16702802650e4

data/gumbo-parser/src/attribute.c CHANGED Viewed

@@ -27,7 +27,7 @@ struct GumboInternalParser;
 GumboAttribute* gumbo_get_attribute(
     const GumboVector* attributes, const char* name) {
-  for (int i = 0; i < attributes->length; ++i) {
+  for (unsigned int i = 0; i < attributes->length; ++i) {
     GumboAttribute* attr = attributes->data[i];
     if (!strcasecmp(attr->name, name)) {
       return attr;

data/gumbo-parser/src/char_ref.c CHANGED Viewed

@@ -30,7 +30,7 @@
 #include <ctype.h>
 #include <stddef.h>
 #include <stdio.h>
-#include <string.h>     // Only for debug assertions at present.
+#include <string.h>  // Only for debug assertions at present.
 #include "error.h"
 #include "string_piece.h"
@@ -49,44 +49,18 @@ typedef struct {
   int to_char;
 } CharReplacement;
-static const CharReplacement kCharReplacements[] = {
-  { 0x00, 0xfffd },
-  { 0x0d, 0x000d },
-  { 0x80, 0x20ac },
-  { 0x81, 0x0081 },
-  { 0x82, 0x201A },
-  { 0x83, 0x0192 },
-  { 0x84, 0x201E },
-  { 0x85, 0x2026 },
-  { 0x86, 0x2020 },
-  { 0x87, 0x2021 },
-  { 0x88, 0x02C6 },
-  { 0x89, 0x2030 },
-  { 0x8A, 0x0160 },
-  { 0x8B, 0x2039 },
-  { 0x8C, 0x0152 },
-  { 0x8D, 0x008D },
-  { 0x8E, 0x017D },
-  { 0x8F, 0x008F },
-  { 0x90, 0x0090 },
-  { 0x91, 0x2018 },
-  { 0x92, 0x2019 },
-  { 0x93, 0x201C },
-  { 0x94, 0x201D },
-  { 0x95, 0x2022 },
-  { 0x96, 0x2013 },
-  { 0x97, 0x2014 },
-  { 0x98, 0x02DC },
-  { 0x99, 0x2122 },
-  { 0x9A, 0x0161 },
-  { 0x9B, 0x203A },
-  { 0x9C, 0x0153 },
-  { 0x9D, 0x009D },
-  { 0x9E, 0x017E },
-  { 0x9F, 0x0178 },
-  // Terminator.
-  { -1, -1 }
-};
+static const CharReplacement kCharReplacements[] = {{0x00, 0xfffd},
+    {0x0d, 0x000d}, {0x80, 0x20ac}, {0x81, 0x0081}, {0x82, 0x201A},
+    {0x83, 0x0192}, {0x84, 0x201E}, {0x85, 0x2026}, {0x86, 0x2020},
+    {0x87, 0x2021}, {0x88, 0x02C6}, {0x89, 0x2030}, {0x8A, 0x0160},
+    {0x8B, 0x2039}, {0x8C, 0x0152}, {0x8D, 0x008D}, {0x8E, 0x017D},
+    {0x8F, 0x008F}, {0x90, 0x0090}, {0x91, 0x2018}, {0x92, 0x2019},
+    {0x93, 0x201C}, {0x94, 0x201D}, {0x95, 0x2022}, {0x96, 0x2013},
+    {0x97, 0x2014}, {0x98, 0x02DC}, {0x99, 0x2122}, {0x9A, 0x0161},
+    {0x9B, 0x203A}, {0x9C, 0x0153}, {0x9D, 0x009D}, {0x9E, 0x017E},
+    {0x9F, 0x0178},
+    // Terminator.
+    {-1, -1}};
 static int parse_digit(int c, bool allow_hex) {
   if (c >= '0' && c <= '9') {
@@ -111,9 +85,8 @@ static void add_no_digit_error(
   error->type = GUMBO_ERR_NUMERIC_CHAR_REF_NO_DIGITS;
 }
-static void add_codepoint_error(
-    struct GumboInternalParser* parser, Utf8Iterator* input,
-    GumboErrorType type, int codepoint) {
+static void add_codepoint_error(struct GumboInternalParser* parser,
+    Utf8Iterator* input, GumboErrorType type, int codepoint) {
   GumboError* error = gumbo_add_error(parser);
   if (!error) {
     return;
@@ -123,9 +96,8 @@ static void add_codepoint_error(
   error->v.codepoint = codepoint;
 }
-static void add_named_reference_error(
-    struct GumboInternalParser* parser, Utf8Iterator* input,
-    GumboErrorType type, GumboStringPiece text) {
+static void add_named_reference_error(struct GumboInternalParser* parser,
+    Utf8Iterator* input, GumboErrorType type, GumboStringPiece text) {
   GumboError* error = gumbo_add_error(parser);
   if (!error) {
     return;
@@ -211,8 +183,7 @@ static bool maybe_add_invalid_named_reference(
   // worry about consuming characters.
   const char* start = utf8iterator_get_char_pointer(input);
   int c = utf8iterator_current(input);
-  while ((c >= 'a' && c <= 'z') ||
-         (c >= 'A' && c <= 'Z') ||
+  while ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') ||
          (c >= '0' && c <= '9')) {
     utf8iterator_next(input);
     c = utf8iterator_current(input);
@@ -228,12 +199,11 @@ static bool maybe_add_invalid_named_reference(
   return true;
 }
 #line 2465 "char_ref.rl"
+// clang-format off
-#line 237 "char_ref.c"
+#line 238 "char_ref.c"
 static const short _char_ref_actions[] = {
 	0, 1, 0, 1, 1, 1, 2, 1,
 	3, 1, 4, 1, 5, 1, 6, 1,
@@ -13960,17 +13930,15 @@ static const short _char_ref_eof_trans[] = {
 };
 static const int char_ref_start = 7623;
-static const int char_ref_first_final = 7623;
-static const int char_ref_error = 0;
 static const int char_ref_en_valid_named_ref = 7623;
-#line 2468 "char_ref.rl"
+#line 2469 "char_ref.rl"
+// clang-format on
-static bool consume_named_ref(
-    struct GumboInternalParser* parser, Utf8Iterator* input, bool is_in_attribute,
-    OneOrTwoCodepoints* output) {
+static bool consume_named_ref(struct GumboInternalParser* parser,
+    Utf8Iterator* input, bool is_in_attribute, OneOrTwoCodepoints* output) {
   assert(output->first == kGumboNoChar);
   const char* p = utf8iterator_get_char_pointer(input);
   const char* pe = utf8iterator_get_end_pointer(input);
@@ -13979,8 +13947,9 @@ static bool consume_named_ref(
   const char *ts, *start;
   int cs, act;
+// clang-format off
-#line 13984 "char_ref.c"
+#line 13985 "char_ref.c"
 	{
 	cs = char_ref_start;
 	ts = 0;
@@ -13988,14 +13957,15 @@ static bool consume_named_ref(
 	act = 0;
 	}
-#line 2481 "char_ref.rl"
+#line 2484 "char_ref.rl"
   // Avoid unused variable warnings.
   (void) act;
   (void) ts;
+  (void) char_ref_en_valid_named_ref;
   start = p;
-#line 13999 "char_ref.c"
+#line 14001 "char_ref.c"
 	{
 	int _slen;
 	int _trans;
@@ -14017,7 +13987,7 @@ _resume:
 #line 1 "NONE"
 	{ts = p;}
 	break;
-#line 14021 "char_ref.c"
+#line 14023 "char_ref.c"
 		}
 	}
@@ -23000,7 +22970,7 @@ _eof_trans:
 #line 2273 "char_ref.rl"
 	{{p = ((te))-1;}{ output->first = 0xd7; {p++; goto _out; } }}
 	break;
-#line 23004 "char_ref.c"
+#line 23006 "char_ref.c"
 		}
 	}
@@ -23013,7 +22983,7 @@ _again:
 #line 1 "NONE"
 	{ts = 0;}
 	break;
-#line 23017 "char_ref.c"
+#line 23019 "char_ref.c"
 		}
 	}
@@ -23033,7 +23003,8 @@ _again:
 	_out: {}
 	}
-#line 2487 "char_ref.rl"
+#line 2491 "char_ref.rl"
+  // clang-format on
   if (cs >= 7623) {
     assert(output->first != kGumboNoChar);
@@ -23067,10 +23038,9 @@ _again:
   }
 }
-bool consume_char_ref(
-    struct GumboInternalParser* parser, struct GumboInternalUtf8Iterator* input,
-    int additional_allowed_char, bool is_in_attribute,
-    OneOrTwoCodepoints* output) {
+bool consume_char_ref(struct GumboInternalParser* parser,
+    struct GumboInternalUtf8Iterator* input, int additional_allowed_char,
+    bool is_in_attribute, OneOrTwoCodepoints* output) {
   utf8iterator_mark(input);
   utf8iterator_next(input);
   int c = utf8iterator_current(input);

data/gumbo-parser/src/char_ref.h CHANGED Viewed

@@ -49,10 +49,9 @@ typedef struct {
 // errors to the GumboParser's errors vector, if the spec calls for it.  Pass a
 // space for the "additional allowed char" when the spec says "with no
 // additional allowed char".  Returns false on parse error, true otherwise.
-bool consume_char_ref(
-    struct GumboInternalParser* parser, struct GumboInternalUtf8Iterator* input,
-    int additional_allowed_char, bool is_in_attribute,
-    OneOrTwoCodepoints* output);
+bool consume_char_ref(struct GumboInternalParser* parser,
+    struct GumboInternalUtf8Iterator* input, int additional_allowed_char,
+    bool is_in_attribute, OneOrTwoCodepoints* output);
 #ifdef __cplusplus
 }

data/gumbo-parser/src/char_ref.rl CHANGED Viewed

@@ -2464,7 +2464,9 @@ valid_named_ref := |*
 *|;
 }%%
-%% write data;
+// clang-format off
+%% write data noerror nofinal;
+// clang-format on
 static bool consume_named_ref(
     struct GumboInternalParser* parser, Utf8Iterator* input, bool is_in_attribute,
@@ -2477,13 +2479,16 @@ static bool consume_named_ref(
   const char *ts, *start;
   int cs, act;
+  // clang-format off
   %% write init;
   // Avoid unused variable warnings.
   (void) act;
   (void) ts;
+  (void) char_ref_en_valid_named_ref;
   start = p;
   %% write exec;
+  // clang-format on
   if (cs >= %%{ write first_final; }%%) {
     assert(output->first != kGumboNoChar);

data/gumbo-parser/src/error.c CHANGED Viewed

@@ -27,18 +27,17 @@
 #include "util.h"
 #include "vector.h"
-static const size_t kMessageBufferSize = 256;
 // Prints a formatted message to a StringBuffer.  This automatically resizes the
 // StringBuffer as necessary to fit the message.  Returns the number of bytes
 // written.
-static int print_message(GumboParser* parser, GumboStringBuffer* output,
-                         const char* format, ...) {
+static int print_message(
+    GumboParser* parser, GumboStringBuffer* output, const char* format, ...) {
   va_list args;
-  va_start(args, format);
   int remaining_capacity = output->capacity - output->length;
-  int bytes_written = vsnprintf(output->data + output->length,
-                                remaining_capacity, format, args);
+  va_start(args, format);
+  int bytes_written = vsnprintf(
+      output->data + output->length, remaining_capacity, format, args);
+  va_end(args);
 #ifdef _MSC_VER
   if (bytes_written == -1) {
     // vsnprintf returns -1 on MSVC++ if there's not enough capacity, instead of
@@ -47,15 +46,15 @@ static int print_message(GumboParser* parser, GumboStringBuffer* output,
     // we retry (letting it fail and returning 0 if it doesn't), since there's
     // no way to smartly resize the buffer.
     gumbo_string_buffer_reserve(parser, output->capacity * 2, output);
-    int result = vsnprintf(output->data + output->length,
-                           remaining_capacity, format, args);
+    va_start(args, format);
+    int result = vsnprintf(
+        output->data + output->length, remaining_capacity, format, args);
     va_end(args);
     return result == -1 ? 0 : result;
   }
 #else
   // -1 in standard C99 indicates an encoding error.  Return 0 and do nothing.
   if (bytes_written == -1) {
-    va_end(args);
     return 0;
   }
 #endif
@@ -64,19 +63,19 @@ static int print_message(GumboParser* parser, GumboStringBuffer* output,
     gumbo_string_buffer_reserve(
         parser, output->capacity + bytes_written, output);
     remaining_capacity = output->capacity - output->length;
-    bytes_written = vsnprintf(output->data + output->length,
-                              remaining_capacity, format, args);
+    va_start(args, format);
+    bytes_written = vsnprintf(
+        output->data + output->length, remaining_capacity, format, args);
+    va_end(args);
   }
   output->length += bytes_written;
-  va_end(args);
   return bytes_written;
 }
-static void print_tag_stack(
-    GumboParser* parser, const GumboParserError* error,
+static void print_tag_stack(GumboParser* parser, const GumboParserError* error,
     GumboStringBuffer* output) {
   print_message(parser, output, "  Currently open tags: ");
-  for (int i = 0; i < error->tag_stack.length; ++i) {
+  for (unsigned int i = 0; i < error->tag_stack.length; ++i) {
     if (i) {
       print_message(parser, output, ", ");
     }
@@ -87,12 +86,11 @@ static void print_tag_stack(
 }
 static void handle_parser_error(GumboParser* parser,
-                                const GumboParserError* error,
-                                GumboStringBuffer* output) {
+    const GumboParserError* error, GumboStringBuffer* output) {
   if (error->parser_state == GUMBO_INSERTION_MODE_INITIAL &&
       error->input_type != GUMBO_TOKEN_DOCTYPE) {
-    print_message(parser, output,
-                  "The doctype must be the first token in the document");
+    print_message(
+        parser, output, "The doctype must be the first token in the document");
     return;
   }
@@ -151,13 +149,14 @@ static const char* find_last_newline(
 static const char* find_next_newline(
     const char* original_text, const char* error_location) {
   const char* c = error_location;
-  for (; *c && *c != '\n'; ++c);
+  for (; *c && *c != '\n'; ++c)
+    ;
   return c;
 }
 GumboError* gumbo_add_error(GumboParser* parser) {
   int max_errors = parser->_options->max_errors;
-  if (max_errors >= 0 && parser->_output->errors.length >= max_errors) {
+  if (max_errors >= 0 && parser->_output->errors.length >= (unsigned int) max_errors) {
     return NULL;
   }
   GumboError* error = gumbo_parser_allocate(parser, sizeof(GumboError));
@@ -167,50 +166,52 @@ GumboError* gumbo_add_error(GumboParser* parser) {
 void gumbo_error_to_string(
     GumboParser* parser, const GumboError* error, GumboStringBuffer* output) {
-  print_message(parser, output, "@%d:%d: ",
-                error->position.line, error->position.column);
+  print_message(
+      parser, output, "@%d:%d: ", error->position.line, error->position.column);
   switch (error->type) {
     case GUMBO_ERR_UTF8_INVALID:
-      print_message(parser, output, "Invalid UTF8 character 0x%x",
-               error->v.codepoint);
+      print_message(
+          parser, output, "Invalid UTF8 character 0x%x", error->v.codepoint);
       break;
     case GUMBO_ERR_UTF8_TRUNCATED:
       print_message(parser, output,
-               "Input stream ends with a truncated UTF8 character 0x%x",
-               error->v.codepoint);
+          "Input stream ends with a truncated UTF8 character 0x%x",
+          error->v.codepoint);
       break;
     case GUMBO_ERR_NUMERIC_CHAR_REF_NO_DIGITS:
-      print_message(parser, output,
-               "No digits after &# in numeric character reference");
+      print_message(
+          parser, output, "No digits after &# in numeric character reference");
       break;
     case GUMBO_ERR_NUMERIC_CHAR_REF_WITHOUT_SEMICOLON:
       print_message(parser, output,
-               "The numeric character reference &#%d should be followed "
-               "by a semicolon", error->v.codepoint);
+          "The numeric character reference &#%d should be followed "
+          "by a semicolon",
+          error->v.codepoint);
       break;
     case GUMBO_ERR_NUMERIC_CHAR_REF_INVALID:
       print_message(parser, output,
-               "The numeric character reference &#%d; encodes an invalid "
-               "unicode codepoint", error->v.codepoint);
+          "The numeric character reference &#%d; encodes an invalid "
+          "unicode codepoint",
+          error->v.codepoint);
       break;
     case GUMBO_ERR_NAMED_CHAR_REF_WITHOUT_SEMICOLON:
       // The textual data came from one of the literal strings in the table, and
       // so it'll be null-terminated.
       print_message(parser, output,
-               "The named character reference &%.*s should be followed by a "
-               "semicolon", (int) error->v.text.length, error->v.text.data);
+          "The named character reference &%.*s should be followed by a "
+          "semicolon",
+          (int) error->v.text.length, error->v.text.data);
       break;
     case GUMBO_ERR_NAMED_CHAR_REF_INVALID:
       print_message(parser, output,
-               "The named character reference &%.*s; is not a valid entity name",
-               (int) error->v.text.length, error->v.text.data);
+          "The named character reference &%.*s; is not a valid entity name",
+          (int) error->v.text.length, error->v.text.data);
       break;
     case GUMBO_ERR_DUPLICATE_ATTR:
       print_message(parser, output,
-               "Attribute %s occurs multiple times, at positions %d and %d",
-               error->v.duplicate_attr.name,
-               error->v.duplicate_attr.original_index,
-               error->v.duplicate_attr.new_index);
+          "Attribute %s occurs multiple times, at positions %d and %d",
+          error->v.duplicate_attr.name, error->v.duplicate_attr.original_index,
+          error->v.duplicate_attr.new_index);
       break;
     case GUMBO_ERR_PARSER:
     case GUMBO_ERR_UNACKNOWLEDGED_SELF_CLOSING_TAG:
@@ -218,21 +219,19 @@ void gumbo_error_to_string(
       break;
     default:
       print_message(parser, output,
-               "Tokenizer error with an unimplemented error message");
+          "Tokenizer error with an unimplemented error message");
       break;
   }
   gumbo_string_buffer_append_codepoint(parser, '.', output);
 }
-void gumbo_caret_diagnostic_to_string(
-    GumboParser* parser, const GumboError* error,
-    const char* source_text, GumboStringBuffer* output) {
+void gumbo_caret_diagnostic_to_string(GumboParser* parser,
+    const GumboError* error, const char* source_text,
+    GumboStringBuffer* output) {
   gumbo_error_to_string(parser, error, output);
-  const char* line_start =
-      find_last_newline(source_text, error->original_text);
-  const char* line_end =
-      find_next_newline(source_text, error->original_text);
+  const char* line_start = find_last_newline(source_text, error->original_text);
+  const char* line_end = find_next_newline(source_text, error->original_text);
   GumboStringPiece original_line;
   original_line.data = line_start;
   original_line.length = line_end - line_start;
@@ -273,7 +272,7 @@ void gumbo_init_errors(GumboParser* parser) {
 }
 void gumbo_destroy_errors(GumboParser* parser) {
-  for (int i = 0; i < parser->_output->errors.length; ++i) {
+  for (unsigned int i = 0; i < parser->_output->errors.length; ++i) {
     gumbo_error_destroy(parser, parser->_output->errors.data[i]);
   }
   gumbo_vector_destroy(parser, &parser->_output->errors);