nokogumbo 1.1.3 → 1.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,15 +1,15 @@
1
1
  ---
2
2
  !binary "U0hBMQ==":
3
3
  metadata.gz: !binary |-
4
- NTBmOGQzODk2MTllYjczZmQ0NDlmMjFiZTgwNDBlODY0ZGRhYmFhOA==
4
+ OGU3MmZhYzUyY2IzYTZiN2RhMDk4YWE3NDViMDg5Zjg4NGZkN2YyNA==
5
5
  data.tar.gz: !binary |-
6
- MDU5ZGZjZjRjNmUyYTBmMGYxOGMxMDgwMDI5YzJmZDFmYmNjZmM3OA==
6
+ NzRlMGZkOWUwN2I1YmM5ZmY4ZWEyYTZlOTBlMjJhMjM5ZTJiZDJlYQ==
7
7
  !binary "U0hBNTEy":
8
8
  metadata.gz: !binary |-
9
- MDc2MzdiYzdjNmFlMjRmZTdiODFlMzI5MjAxOTdmM2I1ZmZjNGZhMzBmNDYy
10
- ZmU1NDM3NDhmMjBlYzQ2MDY2NGFlNzBiNWExNGIzYmExYTc5NGRiMzViYjZh
11
- NzQ2NmFhOGY5NzIxMDQ2Nzg2OWViNTFmY2VkYWEzYjI0ZTA4NjE=
9
+ ZGJiYmM4NGYyY2U5MWFhMjAzMWI4MmE2MjQ3OTQ0OTM0MTMyY2E5MWExNjU0
10
+ MTJjOTExZTBlZDM3NGNjNjBmNjMwZTA1MjQ2MWFkZjI5ODZkY2Q3NzMzZmFi
11
+ MWE2ODNkZjA0OGU4M2ZjYjE4NWVkZTdmZjM0MTQ2NGQ3OTQzYjU=
12
12
  data.tar.gz: !binary |-
13
- MzEwOGJlY2UxOGFhM2M5MzVhNGI3ODJlYWMwNzIzM2U0YTdhY2U4ZDYwNmY4
14
- MzAzOGJhY2ZlZGQwYTA0ZWY4NmJkYWE5OWNkYmViNTgwNWU4ZmVjMThkZjVk
15
- YWMwMmZjYTA5ZTEyNGMwZjQ3OTAxYTgyMWQyODBmMzY5OTY1OTQ=
13
+ NTY5NWIwOGNmYjc1ZDcxYzZkMjA0MTUzZTUyYWU2ODIwZTQ0OGE5Mzc0MjUz
14
+ NTg0Y2Y3ZjRhMjgzYzYzMDE4NmY4NTVkZGQ1ZjMwOTkwM2NiOTcyMTRmOTM0
15
+ MGNmOWI0OWQ0Y2E4MTdkMmU1MWU0YWM5NTQ2YjU5YzcwYjI2Yzk=
@@ -135,7 +135,7 @@ static const char* find_next_newline(
135
135
 
136
136
  GumboError* gumbo_add_error(GumboParser* parser) {
137
137
  int max_errors = parser->_options->max_errors;
138
- if (max_errors < 0 && parser->_output->errors.length >= max_errors) {
138
+ if (max_errors >= 0 && parser->_output->errors.length >= max_errors) {
139
139
  return NULL;
140
140
  }
141
141
  GumboError* error = gumbo_parser_allocate(parser, sizeof(GumboError));
@@ -366,7 +366,7 @@ GumboTag gumbo_tag_enum(const char* tagname);
366
366
  /**
367
367
  * Attribute namespaces.
368
368
  * HTML includes special handling for XLink, XML, and XMLNS namespaces on
369
- * attributes. Everything else goes in the generatic "NONE" namespace.
369
+ * attributes. Everything else goes in the generic "NONE" namespace.
370
370
  */
371
371
  typedef enum {
372
372
  GUMBO_ATTR_NAMESPACE_NONE,
@@ -43,7 +43,7 @@ static void* malloc_wrapper(void* unused, size_t size) {
43
43
  }
44
44
 
45
45
  static void free_wrapper(void* unused, void* ptr) {
46
- return free(ptr);
46
+ free(ptr);
47
47
  }
48
48
 
49
49
  const GumboOptions kGumboDefaultOptions = {
@@ -537,25 +537,6 @@ static bool is_in_static_list(
537
537
  return false;
538
538
  }
539
539
 
540
- static void push_template_insertion_mode(
541
- GumboParser* parser, GumboInsertionMode mode) {
542
- gumbo_vector_add(
543
- parser, (void*) mode, &parser->_parser_state->_template_insertion_modes);
544
- }
545
-
546
- static void pop_template_insertion_mode(GumboParser* parser) {
547
- gumbo_vector_pop(parser, &parser->_parser_state->_template_insertion_modes);
548
- }
549
-
550
- static GumboInsertionMode get_current_template_insertion_mode(
551
- GumboParser* parser) {
552
- GumboVector* template_insertion_modes =
553
- &parser->_parser_state->_template_insertion_modes;
554
- assert(template_insertion_modes->length > 0);
555
- return (GumboInsertionMode) template_insertion_modes->data[
556
- template_insertion_modes->length - 1];
557
- }
558
-
559
540
  static void set_insertion_mode(GumboParser* parser, GumboInsertionMode mode) {
560
541
  parser->_parser_state->_insertion_mode = mode;
561
542
  }
@@ -1735,10 +1716,10 @@ static bool maybe_add_doctype_error(
1735
1716
  GumboParser* parser, const GumboToken* token) {
1736
1717
  const GumboTokenDocType* doctype = &token->v.doc_type;
1737
1718
  bool html_doctype = !strcmp(doctype->name, kDoctypeHtml.data);
1738
- if (!html_doctype ||
1739
- doctype->has_public_identifier ||
1740
- (doctype->has_system_identifier && !strcmp(
1741
- doctype->system_identifier, kSystemIdLegacyCompat.data)) ||
1719
+ if ((!html_doctype ||
1720
+ doctype->has_public_identifier ||
1721
+ (doctype->has_system_identifier && !strcmp(
1722
+ doctype->system_identifier, kSystemIdLegacyCompat.data))) &&
1742
1723
  !(html_doctype && (
1743
1724
  doctype_matches(doctype, &kPublicIdHtml4_0,
1744
1725
  &kSystemIdRecHtml4_0, true) ||
@@ -2372,6 +2353,13 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2372
2353
  node = pop_current_node(parser);
2373
2354
  } while (node != state->_open_elements.data[1]);
2374
2355
 
2356
+ // Removing & destroying the body node is going to kill any nodes that have
2357
+ // been added to the list of active formatting elements, and so we should
2358
+ // clear it to prevent a use-after-free if the list of active formatting
2359
+ // elements is reconstructed afterwards. This may happen if whitespace
2360
+ // follows the </frameset>.
2361
+ clear_active_formatting_elements(parser);
2362
+
2375
2363
  // Remove the body node. We may want to factor this out into a generic
2376
2364
  // helper, but right now this is the only code that needs to do this.
2377
2365
  GumboVector* children = &parser->_output->root->v.element.children;
@@ -3543,6 +3531,10 @@ static bool handle_after_frameset(GumboParser* parser, GumboToken* token) {
3543
3531
  } else if (tag_is(token, kStartTag, GUMBO_TAG_HTML)) {
3544
3532
  return handle_in_body(parser, token);
3545
3533
  } else if (tag_is(token, kEndTag, GUMBO_TAG_HTML)) {
3534
+ GumboNode* html = parser->_parser_state->_open_elements.data[0];
3535
+ assert(node_tag_is(html, GUMBO_TAG_HTML));
3536
+ record_end_of_element(
3537
+ parser->_parser_state->_current_token, &html->v.element);
3546
3538
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_AFTER_AFTER_FRAMESET);
3547
3539
  return true;
3548
3540
  } else if (tag_is(token, kStartTag, GUMBO_TAG_NOFRAMES)) {
@@ -442,7 +442,8 @@ static void finish_token(GumboParser* parser, GumboToken* token) {
442
442
  reset_token_start_point(tokenizer);
443
443
  token->original_text.length =
444
444
  tokenizer->_token_start - token->original_text.data;
445
- if (token->original_text.length > 0 && token->original_text.data[token->original_text.length - 1] == '\r') {
445
+ if (token->original_text.length > 0 &&
446
+ token->original_text.data[token->original_text.length - 1] == '\r') {
446
447
  // The UTF8 iterator will ignore carriage returns in the input stream, which
447
448
  // means that the next token may start one past a \r character. The pointer
448
449
  // arithmetic above results in that \r being appended to the original text
@@ -815,6 +816,7 @@ static void finish_attribute_value(GumboParser* parser) {
815
816
  // Duplicate attribute name detected in an earlier state, so we have to
816
817
  // ignore the value.
817
818
  tag_state->_drop_next_attr_value = false;
819
+ reinitialize_tag_buffer(parser);
818
820
  return;
819
821
  }
820
822
 
@@ -208,6 +208,12 @@ void utf8iterator_init(
208
208
  }
209
209
 
210
210
  void utf8iterator_next(Utf8Iterator* iter) {
211
+ if (iter->_current == -1) {
212
+ // If we're already at EOF, bail out before advancing anything to avoid
213
+ // reading past the end of the buffer. It's easier to catch this case here
214
+ // than litter the code with lots of individual checks for EOF.
215
+ return;
216
+ }
211
217
  iter->_start += iter->_width;
212
218
  // We update positions based on the *last* character read, so that the first
213
219
  // character following a newline is at column 1 in the next line.
@@ -36,7 +36,7 @@ void* gumbo_parser_allocate(GumboParser* parser, size_t num_bytes) {
36
36
  }
37
37
 
38
38
  void gumbo_parser_deallocate(GumboParser* parser, void* ptr) {
39
- return parser->_options->deallocator(parser->_options->userdata, ptr);
39
+ parser->_options->deallocator(parser->_options->userdata, ptr);
40
40
  }
41
41
 
42
42
  char* gumbo_copy_stringz(GumboParser* parser, const char* str) {
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: nokogumbo
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.3
4
+ version: 1.1.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sam Ruby
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-11-16 00:00:00.000000000 Z
11
+ date: 2014-03-18 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri