RubyGems - nokogumbo - Versions diffs - 1.4.2 → 1.4.3 - Mend

nokogumbo 1.4.2 → 1.4.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

checksums.yaml +4 -4
data/gumbo-parser/src/attribute.c +1 -1
data/gumbo-parser/src/char_ref.c +37 -67
data/gumbo-parser/src/char_ref.h +3 -4
data/gumbo-parser/src/char_ref.rl +6 -1
data/gumbo-parser/src/error.c +50 -51
data/gumbo-parser/src/error.h +7 -9
data/gumbo-parser/src/gumbo.h +45 -181
data/gumbo-parser/src/parser.c +1397 -989
data/gumbo-parser/src/string_buffer.c +14 -10
data/gumbo-parser/src/string_buffer.h +9 -6
data/gumbo-parser/src/string_piece.c +5 -6
data/gumbo-parser/src/string_piece.h +2 -3
data/gumbo-parser/src/tag.c +36 -166
data/gumbo-parser/src/tag.in +150 -0
data/gumbo-parser/src/tag_enum.h +153 -0
data/gumbo-parser/src/tag_gperf.h +105 -0
data/gumbo-parser/src/tag_sizes.h +4 -0
data/gumbo-parser/src/tag_strings.h +153 -0
data/gumbo-parser/src/tokenizer.c +264 -360
data/gumbo-parser/src/tokenizer.h +2 -2
data/gumbo-parser/src/utf8.c +44 -44
data/gumbo-parser/src/utf8.h +1 -2
data/gumbo-parser/src/util.c +1 -1
data/gumbo-parser/src/util.h +0 -2
data/gumbo-parser/src/vector.c +17 -17
data/gumbo-parser/src/vector.h +6 -8
metadata +8 -3

data/gumbo-parser/src/tokenizer.h CHANGED Viewed

@@ -64,8 +64,8 @@ typedef struct GumboInternalToken {
     GumboTokenDocType doc_type;
     GumboTokenStartTag start_tag;
     GumboTag end_tag;
-    const char* text;    // For comments.
-    int character;      // For character, whitespace, null, and EOF tokens.
+    const char* text;  // For comments.
+    int character;     // For character, whitespace, null, and EOF tokens.
   } v;
 } GumboToken;

data/gumbo-parser/src/utf8.c CHANGED Viewed

@@ -19,7 +19,7 @@
 #include <assert.h>
 #include <stdint.h>
 #include <string.h>
-#include <strings.h>    // For strncasecmp.
+#include <strings.h>  // For strncasecmp.
 #include "error.h"
 #include "gumbo.h"
@@ -47,9 +47,11 @@ const int kUtf8ReplacementChar = 0xFFFD;
 // Permission is hereby granted, free of charge, to any person obtaining a copy
 // of this software and associated documentation files (the "Software"), to deal
-// in the Software without restriction, including without limitation the rights to
+// in the Software without restriction, including without limitation the rights
+// to
 // use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
-// of the Software, and to permit persons to whom the Software is furnished to do
+// of the Software, and to permit persons to whom the Software is furnished to
+// do
 // so, subject to the following conditions:
 // The above copyright notice and this permission notice shall be included in
@@ -59,32 +61,35 @@ const int kUtf8ReplacementChar = 0xFFFD;
 #define UTF8_REJECT 12
 static const uint8_t utf8d[] = {
-  // The first part of the table maps bytes to character classes that
-  // to reduce the size of the transition table and create bitmasks.
-   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-   1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,  9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
-   7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
-   8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
-  10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8,
-  // The second part is a transition table that maps a combination
-  // of a state of the automaton and a character class to a state.
-   0,12,24,36,60,96,84,12,12,12,48,72, 12,12,12,12,12,12,12,12,12,12,12,12,
-  12, 0,12,12,12,12,12, 0,12, 0,12,12, 12,24,12,12,12,12,12,24,12,24,12,12,
-  12,12,12,12,12,12,12,24,12,12,12,12, 12,24,12,12,12,12,12,12,12,24,12,12,
-  12,12,12,12,12,12,12,36,12,36,12,12, 12,36,12,12,12,12,12,36,12,36,12,12,
-  12,36,12,12,12,12,12,12,12,12,12,12,
+    // The first part of the table maps bytes to character classes that
+    // to reduce the size of the transition table and create bitmasks.
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 9, 9, 9, 9, 9, 9,
+    9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 2, 2, 2, 2, 2, 2,
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 10,
+    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 3, 3, 11, 6, 6, 6, 5, 8, 8, 8, 8, 8,
+    8, 8, 8, 8, 8, 8,
+    // The second part is a transition table that maps a combination
+    // of a state of the automaton and a character class to a state.
+    0, 12, 24, 36, 60, 96, 84, 12, 12, 12, 48, 72, 12, 12, 12, 12, 12, 12, 12,
+    12, 12, 12, 12, 12, 12, 0, 12, 12, 12, 12, 12, 0, 12, 0, 12, 12, 12, 24, 12,
+    12, 12, 12, 12, 24, 12, 24, 12, 12, 12, 12, 12, 12, 12, 12, 12, 24, 12, 12,
+    12, 12, 12, 24, 12, 12, 12, 12, 12, 12, 12, 24, 12, 12, 12, 12, 12, 12, 12,
+    12, 12, 36, 12, 36, 12, 12, 12, 36, 12, 12, 12, 12, 12, 36, 12, 36, 12, 12,
+    12, 36, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
 };
 uint32_t static inline decode(uint32_t* state, uint32_t* codep, uint32_t byte) {
   uint32_t type = utf8d[byte];
-  *codep = (*state != UTF8_ACCEPT) ?
-    (byte & 0x3fu) | (*codep << 6) :
-    (0xff >> type) & (byte);
+  *codep = (*state != UTF8_ACCEPT) ? (byte & 0x3fu) | (*codep << 6)
+                                   : (0xff >> type) & (byte);
   *state = utf8d[256 + *state + type];
   return *state;
@@ -130,7 +135,7 @@ static void read_char(Utf8Iterator* iter) {
   uint32_t code_point = 0;
   uint32_t state = UTF8_ACCEPT;
   for (const char* c = iter->_start; c < iter->_end; ++c) {
-    decode(&state, &code_point, (uint32_t) (unsigned char) (*c));
+    decode(&state, &code_point, (uint32_t)(unsigned char) (*c));
     if (state == UTF8_ACCEPT) {
       iter->_width = c - iter->_start + 1;
       // This is the special handling for carriage returns that is mandated by
@@ -180,10 +185,10 @@ static void update_position(Utf8Iterator* iter) {
   if (iter->_current == '\n') {
     ++iter->_pos.line;
     iter->_pos.column = 1;
-  } else if(iter->_current == '\t') {
+  } else if (iter->_current == '\t') {
     int tab_stop = iter->_parser->_options->tab_stop;
     iter->_pos.column = ((iter->_pos.column / tab_stop) + 1) * tab_stop;
-  } else if(iter->_current != -1) {
+  } else if (iter->_current != -1) {
     ++iter->_pos.column;
   }
 }
@@ -192,13 +197,12 @@ static void update_position(Utf8Iterator* iter) {
 // forbidden by the HTML5 spec, such as undefined control chars.
 bool utf8_is_invalid_code_point(int c) {
   return (c >= 0x1 && c <= 0x8) || c == 0xB || (c >= 0xE && c <= 0x1F) ||
-      (c >= 0x7F && c <= 0x9F) || (c >= 0xFDD0 && c <= 0xFDEF) ||
-      ((c & 0xFFFF) == 0xFFFE) || ((c & 0xFFFF) == 0xFFFF);
+         (c >= 0x7F && c <= 0x9F) || (c >= 0xFDD0 && c <= 0xFDEF) ||
+         ((c & 0xFFFF) == 0xFFFE) || ((c & 0xFFFF) == 0xFFFF);
 }
-void utf8iterator_init(
-    GumboParser* parser, const char* source, size_t source_length,
-    Utf8Iterator* iter) {
+void utf8iterator_init(GumboParser* parser, const char* source,
+    size_t source_length, Utf8Iterator* iter) {
   iter->_start = source;
   iter->_end = source + source_length;
   iter->_pos.line = 1;
@@ -216,9 +220,7 @@ void utf8iterator_next(Utf8Iterator* iter) {
   read_char(iter);
 }
-int utf8iterator_current(const Utf8Iterator* iter) {
-  return iter->_current;
-}
+int utf8iterator_current(const Utf8Iterator* iter) { return iter->_current; }
 void utf8iterator_get_position(
     const Utf8Iterator* iter, GumboSourcePosition* output) {
@@ -233,14 +235,13 @@ const char* utf8iterator_get_end_pointer(const Utf8Iterator* iter) {
   return iter->_end;
 }
-bool utf8iterator_maybe_consume_match(
-    Utf8Iterator* iter, const char* prefix, size_t length,
-    bool case_sensitive) {
-  bool matched = (iter->_start + length <= iter->_end) && (case_sensitive ?
-      !strncmp(iter->_start, prefix, length) :
-      !strncasecmp(iter->_start, prefix, length));
+bool utf8iterator_maybe_consume_match(Utf8Iterator* iter, const char* prefix,
+    size_t length, bool case_sensitive) {
+  bool matched = (iter->_start + length <= iter->_end) &&
+                 (case_sensitive ? !strncmp(iter->_start, prefix, length)
+                                 : !strncasecmp(iter->_start, prefix, length));
   if (matched) {
-    for (int i = 0; i < length; ++i) {
+    for (unsigned int i = 0; i < length; ++i) {
       utf8iterator_next(iter);
     }
     return true;
@@ -263,8 +264,7 @@ void utf8iterator_reset(Utf8Iterator* iter) {
 // Sets the position and original text fields of an error to the value at the
 // mark.
-void utf8iterator_fill_error_at_mark(
-    Utf8Iterator* iter, GumboError* error) {
+void utf8iterator_fill_error_at_mark(Utf8Iterator* iter, GumboError* error) {
   error->position = iter->_mark_pos;
   error->original_text = iter->_mark;
 }

data/gumbo-parser/src/utf8.h CHANGED Viewed

@@ -81,8 +81,7 @@ bool utf8_is_invalid_code_point(int c);
 // Initializes a new Utf8Iterator from the given byte buffer.  The source does
 // not have to be NUL-terminated, but the length must be passed in explicitly.
-void utf8iterator_init(
-    struct GumboInternalParser* parser, const char* source,
+void utf8iterator_init(struct GumboInternalParser* parser, const char* source,
     size_t source_length, Utf8Iterator* iter);
 // Advances the current position by one code point.

data/gumbo-parser/src/util.c CHANGED Viewed

@@ -29,7 +29,7 @@
 // TODO(jdtang): This should be elsewhere, but there's no .c file for
 // SourcePositions and yet the constant needs some linkage, so this is as good
 // as any.
-const GumboSourcePosition kGumboEmptySourcePosition = { 0, 0, 0 };
+const GumboSourcePosition kGumboEmptySourcePosition = {0, 0, 0};
 void* gumbo_parser_allocate(GumboParser* parser, size_t num_bytes) {
   return parser->_options->allocator(parser->_options->userdata, num_bytes);

data/gumbo-parser/src/util.h CHANGED Viewed

@@ -25,8 +25,6 @@
 #include <stdbool.h>
 #include <stddef.h>
 #ifdef __cplusplus
 extern "C" {
 #endif

data/gumbo-parser/src/vector.c CHANGED Viewed

@@ -25,21 +25,22 @@
 struct GumboInternalParser;
-const GumboVector kGumboEmptyVector = { NULL, 0, 0 };
+const GumboVector kGumboEmptyVector = {NULL, 0, 0};
-void gumbo_vector_init(
-    struct GumboInternalParser* parser, size_t initial_capacity, GumboVector* vector) {
+void gumbo_vector_init(struct GumboInternalParser* parser,
+    size_t initial_capacity, GumboVector* vector) {
   vector->length = 0;
   vector->capacity = initial_capacity;
   if (initial_capacity > 0) {
-    vector->data = gumbo_parser_allocate(
-        parser, sizeof(void*) * initial_capacity);
+    vector->data =
+        gumbo_parser_allocate(parser, sizeof(void*) * initial_capacity);
   } else {
     vector->data = NULL;
   }
 }
-void gumbo_vector_destroy(struct GumboInternalParser* parser, GumboVector* vector) {
+void gumbo_vector_destroy(
+    struct GumboInternalParser* parser, GumboVector* vector) {
   if (vector->capacity > 0) {
     gumbo_parser_deallocate(parser, vector->data);
   }
@@ -59,8 +60,8 @@ static void enlarge_vector_if_full(
     } else {
       // 0-capacity vector; no previous array to deallocate.
       vector->capacity = 2;
-      vector->data = gumbo_parser_allocate(
-          parser, sizeof(void*) * vector->capacity);
+      vector->data =
+          gumbo_parser_allocate(parser, sizeof(void*) * vector->capacity);
     }
   }
 }
@@ -81,8 +82,8 @@ void* gumbo_vector_pop(
   return vector->data[--vector->length];
 }
-int gumbo_vector_index_of(GumboVector* vector, void* element) {
-  for (int i = 0; i < vector->length; ++i) {
+int gumbo_vector_index_of(GumboVector* vector, const void* element) {
+  for (unsigned int i = 0; i < vector->length; ++i) {
     if (vector->data[i] == element) {
       return i;
     }
@@ -90,15 +91,14 @@ int gumbo_vector_index_of(GumboVector* vector, void* element) {
   return -1;
 }
-void gumbo_vector_insert_at(
-    struct GumboInternalParser* parser, void* element, int index,
-    GumboVector* vector) {
+void gumbo_vector_insert_at(struct GumboInternalParser* parser, void* element,
+    unsigned int index, GumboVector* vector) {
   assert(index >= 0);
   assert(index <= vector->length);
   enlarge_vector_if_full(parser, vector);
   ++vector->length;
   memmove(&vector->data[index + 1], &vector->data[index],
-          sizeof(void*) * (vector->length - index - 1));
+      sizeof(void*) * (vector->length - index - 1));
   vector->data[index] = element;
 }
@@ -111,13 +111,13 @@ void gumbo_vector_remove(
   gumbo_vector_remove_at(parser, index, vector);
 }
-void* gumbo_vector_remove_at(
-    struct GumboInternalParser* parser, int index, GumboVector* vector) {
+void* gumbo_vector_remove_at(struct GumboInternalParser* parser,
+    unsigned int index, GumboVector* vector) {
   assert(index >= 0);
   assert(index < vector->length);
   void* result = vector->data[index];
   memmove(&vector->data[index], &vector->data[index + 1],
-          sizeof(void*) * (vector->length - index - 1));
+      sizeof(void*) * (vector->length - index - 1));
   --vector->length;
   return result;
 }

data/gumbo-parser/src/vector.h CHANGED Viewed

@@ -28,9 +28,8 @@ extern "C" {
 struct GumboInternalParser;
 // Initializes a new GumboVector with the specified initial capacity.
-void gumbo_vector_init(
-    struct GumboInternalParser* parser, size_t initial_capacity,
-    GumboVector* vector);
+void gumbo_vector_init(struct GumboInternalParser* parser,
+    size_t initial_capacity, GumboVector* vector);
 // Frees the memory used by an GumboVector.  Does not free the contained
 // pointers.
@@ -48,9 +47,8 @@ void* gumbo_vector_pop(struct GumboInternalParser* parser, GumboVector* vector);
 // Inserts an element at a specific index.  This is potentially O(N) time, but
 // is necessary for some of the spec's behavior.
-void gumbo_vector_insert_at(
-    struct GumboInternalParser* parser, void* element, int index,
-    GumboVector* vector);
+void gumbo_vector_insert_at(struct GumboInternalParser* parser, void* element,
+    unsigned int index, GumboVector* vector);
 // Removes an element from the vector, or does nothing if the element is not in
 // the vector.
@@ -59,8 +57,8 @@ void gumbo_vector_remove(
 // Removes and returns an element at a specific index.  Note that this is
 // potentially O(N) time and should be used sparingly.
-void* gumbo_vector_remove_at(
-    struct GumboInternalParser* parser, int index, GumboVector* vector);
+void* gumbo_vector_remove_at(struct GumboInternalParser* parser,
+    unsigned int index, GumboVector* vector);
 #ifdef __cplusplus
 }

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: nokogumbo
 version: !ruby/object:Gem::Version
-  version: 1.4.2
+  version: 1.4.3
 platform: ruby
 authors:
 - Sam Ruby
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2015-05-12 00:00:00.000000000 Z
+date: 2015-09-23 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: nokogiri
@@ -52,6 +52,11 @@ files:
 - gumbo-parser/src/string_piece.c
 - gumbo-parser/src/string_piece.h
 - gumbo-parser/src/tag.c
+- gumbo-parser/src/tag.in
+- gumbo-parser/src/tag_enum.h
+- gumbo-parser/src/tag_gperf.h
+- gumbo-parser/src/tag_sizes.h
+- gumbo-parser/src/tag_strings.h
 - gumbo-parser/src/token_type.h
 - gumbo-parser/src/tokenizer.c
 - gumbo-parser/src/tokenizer.h
@@ -85,7 +90,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
       version: '0'
 requirements: []
 rubyforge_project:
-rubygems_version: 2.4.5
+rubygems_version: 2.4.5.1
 signing_key:
 specification_version: 4
 summary: Nokogiri interface to the Gumbo HTML5 parser