RubyGems - nokogumbo - Versions diffs - 2.0.1 → 2.0.2 - Mend

nokogumbo 2.0.1 → 2.0.2

Files changed (10) hide show

checksums.yaml +4 -4
data/ext/nokogumbo/extconf.rb +1 -1
data/ext/nokogumbo/nokogumbo.c +1 -0
data/gumbo-parser/src/error.c +17 -8
data/gumbo-parser/src/gumbo.h +8 -0
data/gumbo-parser/src/parser.c +473 -480
data/gumbo-parser/src/tokenizer.c +12 -25
data/gumbo-parser/src/tokenizer.h +2 -13
data/lib/nokogumbo/version.rb +1 -1
metadata +3 -4

data/gumbo-parser/src/tokenizer.c CHANGED

@@ -20,10 +20,7 @@
  Coding conventions specific to this file:
  1. Functions that fill in a token should be named emit_*, and should be
-    followed immediately by a return from the tokenizer (true if no error
-    occurred, false if an error occurred). Sometimes the emit functions
-    themselves return a boolean so that they can be combined with the return
-    statement; in this case, they should match this convention.
+    followed immediately by a return from the tokenizer.
  2. Functions that shuffle data from temporaries to final API structures
     should be named finish_*, and be called just before the tokenizer exits the
     state that accumulates the temporary.
@@ -141,10 +138,6 @@ typedef struct GumboInternalTokenizerState {
   // text tokens emitted will be GUMBO_TOKEN_CDATA.
   bool _is_in_cdata;
-  // A flag indicating whether the tokenizer has seen a parse error since the
-  // last token was emitted.
-  bool _parse_error;
   // Certain states (notably character references) may emit two character tokens
   // at once, but the contract for lex() fills in only one token at a time. The
   // extra character is buffered here, and then this is checked on entry to
@@ -207,7 +200,6 @@ static void tokenizer_add_parse_error (
   GumboErrorType type
 ) {
   GumboTokenizerState* tokenizer = parser->_tokenizer_state;
-  tokenizer->_parse_error = true;
   GumboError* error = gumbo_add_error(parser);
   if (!error) {
     return;
@@ -228,7 +220,6 @@ static void tokenizer_add_char_ref_error (
   int codepoint
 ) {
   GumboTokenizerState* tokenizer = parser->_tokenizer_state;
-  tokenizer->_parse_error = true;
   GumboError* error = gumbo_add_error(parser);
   if (!error)
     return;
@@ -248,7 +239,6 @@ static void tokenizer_add_token_parse_error (
   GumboErrorType type
 ) {
   GumboTokenizerState* tokenizer = parser->_tokenizer_state;
-  tokenizer->_parse_error = true;
   GumboError* error = gumbo_add_error(parser);
   if (!error)
     return;
@@ -732,7 +722,10 @@ static void copy_over_original_tag_text (
   original_text->data = tag_state->_original_text;
   original_text->length = utf8iterator_get_char_pointer(&tokenizer->_input) -
                           tag_state->_original_text;
-  if (original_text->data[original_text->length - 1] == '\r') {
+  if (
+    original_text->length
+    && original_text->data[original_text->length - 1] == '\r'
+  ) {
     // Since \r is skipped by the UTF-8 iterator, it can sometimes end up
     // appended to the end of original text even when it's really the first part
     // of the next character. If we detect this situation, shrink the length of
@@ -770,7 +763,6 @@ static void finish_tag_name(GumboParser* parser) {
 // Adds an ERR_DUPLICATE_ATTR parse error to the parser's error struct.
 static void add_duplicate_attr_error(GumboParser* parser) {
   GumboTokenizerState* tokenizer = parser->_tokenizer_state;
-  tokenizer->_parse_error = true;
   GumboError* error = gumbo_add_error(parser);
   if (!error) {
     return;
@@ -788,9 +780,8 @@ static void add_duplicate_attr_error(GumboParser* parser) {
 // the attribute's name. The attribute's value starts out as the empty string
 // (following the "Boolean attributes" section of the spec) and is only
 // overwritten on finish_attribute_value(). If the attribute has already been
-// specified, the new attribute is dropped, a parse error is added, and the
-// function returns false. Otherwise, this returns true.
-static bool finish_attribute_name(GumboParser* parser) {
+// specified, the new attribute is dropped and a parse error is added
+static void finish_attribute_name(GumboParser* parser) {
   GumboTokenizerState* tokenizer = parser->_tokenizer_state;
   GumboTagState* tag_state = &tokenizer->_tag_state;
   // May've been set by a previous attribute without a value; reset it here.
@@ -813,7 +804,7 @@ static bool finish_attribute_name(GumboParser* parser) {
       add_duplicate_attr_error(parser);
       reinitialize_tag_buffer(parser);
       tag_state->_drop_next_attr_value = true;
-      return false;
+      return;
     }
   }
@@ -835,7 +826,6 @@ static bool finish_attribute_name(GumboParser* parser) {
   );
   gumbo_vector_add(attr, attributes);
   reinitialize_tag_buffer(parser);
-  return true;
 }
 // Finishes an attribute value. This sets the value of the most recently added
@@ -881,7 +871,6 @@ void gumbo_tokenizer_state_init (
   tokenizer->_reconsume_current_input = false;
   tokenizer->_is_adjusted_current_node_foreign = false;
   tokenizer->_is_in_cdata = false;
-  tokenizer->_parse_error = false;
   tokenizer->_tag_state._last_start_tag = GUMBO_TAG_LAST;
   tokenizer->_tag_state._name = NULL;
@@ -3373,7 +3362,7 @@ static GumboLexerStateFunction dispatch_table[] = {
   [GUMBO_LEX_NUMERIC_CHARACTER_REFERENCE_END] = handle_numeric_character_reference_end_state,
 };
-bool gumbo_lex(GumboParser* parser, GumboToken* output) {
+void gumbo_lex(GumboParser* parser, GumboToken* output) {
   // Because of the spec requirements that...
   //
   // 1. Tokens be handled immediately by the parser upon emission.
@@ -3398,15 +3387,13 @@ bool gumbo_lex(GumboParser* parser, GumboToken* output) {
     // isn't consumed twice.
     tokenizer->_reconsume_current_input = false;
     tokenizer->_buffered_emit_char = kGumboNoChar;
-    return true;
+    return;
   }
   if (maybe_emit_from_mark(parser, output) == EMIT_TOKEN) {
-    // Return no error.
-    return true;
+    return;
   }
-  tokenizer->_parse_error = false;
   while (1) {
     assert(!tokenizer->_resume_pos);
     assert(tokenizer->_buffered_emit_char == kGumboNoChar);
@@ -3420,7 +3407,7 @@ bool gumbo_lex(GumboParser* parser, GumboToken* output) {
     tokenizer->_reconsume_current_input = false;
     if (result == EMIT_TOKEN)
-      return !tokenizer->_parse_error;
+      return;
     if (should_advance) {
       utf8iterator_next(&tokenizer->_input);

data/gumbo-parser/src/tokenizer.h CHANGED

@@ -93,19 +93,8 @@ void gumbo_tokenizer_set_is_adjusted_current_node_foreign (
 );
 // Lexes a single token from the specified buffer, filling the output with the
-// parsed GumboToken data structure. Returns true for a successful
-// tokenization, false if a parse error occurs.
-//
-// Example:
-//   struct GumboInternalParser parser;
-//   GumboToken output;
-//   gumbo_tokenizer_state_init(&parser, text, strlen(text));
-//   while (gumbo_lex(&parser, &output)) {
-//     ...do stuff with output.
-//     gumbo_token_destroy(&token);
-//   }
-//   gumbo_tokenizer_state_destroy(&parser);
-bool gumbo_lex(struct GumboInternalParser* parser, GumboToken* output);
+// parsed GumboToken data structure.
+void gumbo_lex(struct GumboInternalParser* parser, GumboToken* output);
 // Frees the internally-allocated pointers within a GumboToken. Note that this
 // doesn't free the token itself, since oftentimes it will be allocated on the

data/lib/nokogumbo/version.rb CHANGED

@@ -1,3 +1,3 @@
 module Nokogumbo
-  VERSION = "2.0.1"
+  VERSION = "2.0.2"
 end

metadata CHANGED

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: nokogumbo
 version: !ruby/object:Gem::Version
-  version: 2.0.1
+  version: 2.0.2
 platform: ruby
 authors:
 - Sam Ruby
@@ -9,7 +9,7 @@ authors:
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2018-11-11 00:00:00.000000000 Z
+date: 2019-11-19 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: nokogiri
@@ -109,8 +109,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
     - !ruby/object:Gem::Version
       version: '0'
 requirements: []
-rubyforge_project:
-rubygems_version: 2.7.7
+rubygems_version: 3.0.6
 signing_key:
 specification_version: 4
 summary: Nokogiri interface to the Gumbo HTML5 parser