nokogumbo 1.1.3 → 1.1.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +8 -8
- data/gumbo-parser/src/error.c +1 -1
- data/gumbo-parser/src/gumbo.h +1 -1
- data/gumbo-parser/src/parser.c +16 -24
- data/gumbo-parser/src/tokenizer.c +3 -1
- data/gumbo-parser/src/utf8.c +6 -0
- data/gumbo-parser/src/util.c +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
OGU3MmZhYzUyY2IzYTZiN2RhMDk4YWE3NDViMDg5Zjg4NGZkN2YyNA==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
NzRlMGZkOWUwN2I1YmM5ZmY4ZWEyYTZlOTBlMjJhMjM5ZTJiZDJlYQ==
|
7
7
|
!binary "U0hBNTEy":
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
ZGJiYmM4NGYyY2U5MWFhMjAzMWI4MmE2MjQ3OTQ0OTM0MTMyY2E5MWExNjU0
|
10
|
+
MTJjOTExZTBlZDM3NGNjNjBmNjMwZTA1MjQ2MWFkZjI5ODZkY2Q3NzMzZmFi
|
11
|
+
MWE2ODNkZjA0OGU4M2ZjYjE4NWVkZTdmZjM0MTQ2NGQ3OTQzYjU=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
NTY5NWIwOGNmYjc1ZDcxYzZkMjA0MTUzZTUyYWU2ODIwZTQ0OGE5Mzc0MjUz
|
14
|
+
NTg0Y2Y3ZjRhMjgzYzYzMDE4NmY4NTVkZGQ1ZjMwOTkwM2NiOTcyMTRmOTM0
|
15
|
+
MGNmOWI0OWQ0Y2E4MTdkMmU1MWU0YWM5NTQ2YjU5YzcwYjI2Yzk=
|
data/gumbo-parser/src/error.c
CHANGED
@@ -135,7 +135,7 @@ static const char* find_next_newline(
|
|
135
135
|
|
136
136
|
GumboError* gumbo_add_error(GumboParser* parser) {
|
137
137
|
int max_errors = parser->_options->max_errors;
|
138
|
-
if (max_errors
|
138
|
+
if (max_errors >= 0 && parser->_output->errors.length >= max_errors) {
|
139
139
|
return NULL;
|
140
140
|
}
|
141
141
|
GumboError* error = gumbo_parser_allocate(parser, sizeof(GumboError));
|
data/gumbo-parser/src/gumbo.h
CHANGED
@@ -366,7 +366,7 @@ GumboTag gumbo_tag_enum(const char* tagname);
|
|
366
366
|
/**
|
367
367
|
* Attribute namespaces.
|
368
368
|
* HTML includes special handling for XLink, XML, and XMLNS namespaces on
|
369
|
-
* attributes. Everything else goes in the
|
369
|
+
* attributes. Everything else goes in the generic "NONE" namespace.
|
370
370
|
*/
|
371
371
|
typedef enum {
|
372
372
|
GUMBO_ATTR_NAMESPACE_NONE,
|
data/gumbo-parser/src/parser.c
CHANGED
@@ -43,7 +43,7 @@ static void* malloc_wrapper(void* unused, size_t size) {
|
|
43
43
|
}
|
44
44
|
|
45
45
|
static void free_wrapper(void* unused, void* ptr) {
|
46
|
-
|
46
|
+
free(ptr);
|
47
47
|
}
|
48
48
|
|
49
49
|
const GumboOptions kGumboDefaultOptions = {
|
@@ -537,25 +537,6 @@ static bool is_in_static_list(
|
|
537
537
|
return false;
|
538
538
|
}
|
539
539
|
|
540
|
-
static void push_template_insertion_mode(
|
541
|
-
GumboParser* parser, GumboInsertionMode mode) {
|
542
|
-
gumbo_vector_add(
|
543
|
-
parser, (void*) mode, &parser->_parser_state->_template_insertion_modes);
|
544
|
-
}
|
545
|
-
|
546
|
-
static void pop_template_insertion_mode(GumboParser* parser) {
|
547
|
-
gumbo_vector_pop(parser, &parser->_parser_state->_template_insertion_modes);
|
548
|
-
}
|
549
|
-
|
550
|
-
static GumboInsertionMode get_current_template_insertion_mode(
|
551
|
-
GumboParser* parser) {
|
552
|
-
GumboVector* template_insertion_modes =
|
553
|
-
&parser->_parser_state->_template_insertion_modes;
|
554
|
-
assert(template_insertion_modes->length > 0);
|
555
|
-
return (GumboInsertionMode) template_insertion_modes->data[
|
556
|
-
template_insertion_modes->length - 1];
|
557
|
-
}
|
558
|
-
|
559
540
|
static void set_insertion_mode(GumboParser* parser, GumboInsertionMode mode) {
|
560
541
|
parser->_parser_state->_insertion_mode = mode;
|
561
542
|
}
|
@@ -1735,10 +1716,10 @@ static bool maybe_add_doctype_error(
|
|
1735
1716
|
GumboParser* parser, const GumboToken* token) {
|
1736
1717
|
const GumboTokenDocType* doctype = &token->v.doc_type;
|
1737
1718
|
bool html_doctype = !strcmp(doctype->name, kDoctypeHtml.data);
|
1738
|
-
if (!html_doctype ||
|
1739
|
-
|
1740
|
-
|
1741
|
-
doctype->system_identifier, kSystemIdLegacyCompat.data))
|
1719
|
+
if ((!html_doctype ||
|
1720
|
+
doctype->has_public_identifier ||
|
1721
|
+
(doctype->has_system_identifier && !strcmp(
|
1722
|
+
doctype->system_identifier, kSystemIdLegacyCompat.data))) &&
|
1742
1723
|
!(html_doctype && (
|
1743
1724
|
doctype_matches(doctype, &kPublicIdHtml4_0,
|
1744
1725
|
&kSystemIdRecHtml4_0, true) ||
|
@@ -2372,6 +2353,13 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2372
2353
|
node = pop_current_node(parser);
|
2373
2354
|
} while (node != state->_open_elements.data[1]);
|
2374
2355
|
|
2356
|
+
// Removing & destroying the body node is going to kill any nodes that have
|
2357
|
+
// been added to the list of active formatting elements, and so we should
|
2358
|
+
// clear it to prevent a use-after-free if the list of active formatting
|
2359
|
+
// elements is reconstructed afterwards. This may happen if whitespace
|
2360
|
+
// follows the </frameset>.
|
2361
|
+
clear_active_formatting_elements(parser);
|
2362
|
+
|
2375
2363
|
// Remove the body node. We may want to factor this out into a generic
|
2376
2364
|
// helper, but right now this is the only code that needs to do this.
|
2377
2365
|
GumboVector* children = &parser->_output->root->v.element.children;
|
@@ -3543,6 +3531,10 @@ static bool handle_after_frameset(GumboParser* parser, GumboToken* token) {
|
|
3543
3531
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_HTML)) {
|
3544
3532
|
return handle_in_body(parser, token);
|
3545
3533
|
} else if (tag_is(token, kEndTag, GUMBO_TAG_HTML)) {
|
3534
|
+
GumboNode* html = parser->_parser_state->_open_elements.data[0];
|
3535
|
+
assert(node_tag_is(html, GUMBO_TAG_HTML));
|
3536
|
+
record_end_of_element(
|
3537
|
+
parser->_parser_state->_current_token, &html->v.element);
|
3546
3538
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_AFTER_AFTER_FRAMESET);
|
3547
3539
|
return true;
|
3548
3540
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_NOFRAMES)) {
|
@@ -442,7 +442,8 @@ static void finish_token(GumboParser* parser, GumboToken* token) {
|
|
442
442
|
reset_token_start_point(tokenizer);
|
443
443
|
token->original_text.length =
|
444
444
|
tokenizer->_token_start - token->original_text.data;
|
445
|
-
if (token->original_text.length > 0 &&
|
445
|
+
if (token->original_text.length > 0 &&
|
446
|
+
token->original_text.data[token->original_text.length - 1] == '\r') {
|
446
447
|
// The UTF8 iterator will ignore carriage returns in the input stream, which
|
447
448
|
// means that the next token may start one past a \r character. The pointer
|
448
449
|
// arithmetic above results in that \r being appended to the original text
|
@@ -815,6 +816,7 @@ static void finish_attribute_value(GumboParser* parser) {
|
|
815
816
|
// Duplicate attribute name detected in an earlier state, so we have to
|
816
817
|
// ignore the value.
|
817
818
|
tag_state->_drop_next_attr_value = false;
|
819
|
+
reinitialize_tag_buffer(parser);
|
818
820
|
return;
|
819
821
|
}
|
820
822
|
|
data/gumbo-parser/src/utf8.c
CHANGED
@@ -208,6 +208,12 @@ void utf8iterator_init(
|
|
208
208
|
}
|
209
209
|
|
210
210
|
void utf8iterator_next(Utf8Iterator* iter) {
|
211
|
+
if (iter->_current == -1) {
|
212
|
+
// If we're already at EOF, bail out before advancing anything to avoid
|
213
|
+
// reading past the end of the buffer. It's easier to catch this case here
|
214
|
+
// than litter the code with lots of individual checks for EOF.
|
215
|
+
return;
|
216
|
+
}
|
211
217
|
iter->_start += iter->_width;
|
212
218
|
// We update positions based on the *last* character read, so that the first
|
213
219
|
// character following a newline is at column 1 in the next line.
|
data/gumbo-parser/src/util.c
CHANGED
@@ -36,7 +36,7 @@ void* gumbo_parser_allocate(GumboParser* parser, size_t num_bytes) {
|
|
36
36
|
}
|
37
37
|
|
38
38
|
void gumbo_parser_deallocate(GumboParser* parser, void* ptr) {
|
39
|
-
|
39
|
+
parser->_options->deallocator(parser->_options->userdata, ptr);
|
40
40
|
}
|
41
41
|
|
42
42
|
char* gumbo_copy_stringz(GumboParser* parser, const char* str) {
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: nokogumbo
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.1.
|
4
|
+
version: 1.1.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sam Ruby
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2014-03-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|