nokogumbo 1.1.3 → 1.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +8 -8
- data/gumbo-parser/src/error.c +1 -1
- data/gumbo-parser/src/gumbo.h +1 -1
- data/gumbo-parser/src/parser.c +16 -24
- data/gumbo-parser/src/tokenizer.c +3 -1
- data/gumbo-parser/src/utf8.c +6 -0
- data/gumbo-parser/src/util.c +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
OGU3MmZhYzUyY2IzYTZiN2RhMDk4YWE3NDViMDg5Zjg4NGZkN2YyNA==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
NzRlMGZkOWUwN2I1YmM5ZmY4ZWEyYTZlOTBlMjJhMjM5ZTJiZDJlYQ==
|
7
7
|
!binary "U0hBNTEy":
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
ZGJiYmM4NGYyY2U5MWFhMjAzMWI4MmE2MjQ3OTQ0OTM0MTMyY2E5MWExNjU0
|
10
|
+
MTJjOTExZTBlZDM3NGNjNjBmNjMwZTA1MjQ2MWFkZjI5ODZkY2Q3NzMzZmFi
|
11
|
+
MWE2ODNkZjA0OGU4M2ZjYjE4NWVkZTdmZjM0MTQ2NGQ3OTQzYjU=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
NTY5NWIwOGNmYjc1ZDcxYzZkMjA0MTUzZTUyYWU2ODIwZTQ0OGE5Mzc0MjUz
|
14
|
+
NTg0Y2Y3ZjRhMjgzYzYzMDE4NmY4NTVkZGQ1ZjMwOTkwM2NiOTcyMTRmOTM0
|
15
|
+
MGNmOWI0OWQ0Y2E4MTdkMmU1MWU0YWM5NTQ2YjU5YzcwYjI2Yzk=
|
data/gumbo-parser/src/error.c
CHANGED
@@ -135,7 +135,7 @@ static const char* find_next_newline(
|
|
135
135
|
|
136
136
|
GumboError* gumbo_add_error(GumboParser* parser) {
|
137
137
|
int max_errors = parser->_options->max_errors;
|
138
|
-
if (max_errors
|
138
|
+
if (max_errors >= 0 && parser->_output->errors.length >= max_errors) {
|
139
139
|
return NULL;
|
140
140
|
}
|
141
141
|
GumboError* error = gumbo_parser_allocate(parser, sizeof(GumboError));
|
data/gumbo-parser/src/gumbo.h
CHANGED
@@ -366,7 +366,7 @@ GumboTag gumbo_tag_enum(const char* tagname);
|
|
366
366
|
/**
|
367
367
|
* Attribute namespaces.
|
368
368
|
* HTML includes special handling for XLink, XML, and XMLNS namespaces on
|
369
|
-
* attributes. Everything else goes in the
|
369
|
+
* attributes. Everything else goes in the generic "NONE" namespace.
|
370
370
|
*/
|
371
371
|
typedef enum {
|
372
372
|
GUMBO_ATTR_NAMESPACE_NONE,
|
data/gumbo-parser/src/parser.c
CHANGED
@@ -43,7 +43,7 @@ static void* malloc_wrapper(void* unused, size_t size) {
|
|
43
43
|
}
|
44
44
|
|
45
45
|
static void free_wrapper(void* unused, void* ptr) {
|
46
|
-
|
46
|
+
free(ptr);
|
47
47
|
}
|
48
48
|
|
49
49
|
const GumboOptions kGumboDefaultOptions = {
|
@@ -537,25 +537,6 @@ static bool is_in_static_list(
|
|
537
537
|
return false;
|
538
538
|
}
|
539
539
|
|
540
|
-
static void push_template_insertion_mode(
|
541
|
-
GumboParser* parser, GumboInsertionMode mode) {
|
542
|
-
gumbo_vector_add(
|
543
|
-
parser, (void*) mode, &parser->_parser_state->_template_insertion_modes);
|
544
|
-
}
|
545
|
-
|
546
|
-
static void pop_template_insertion_mode(GumboParser* parser) {
|
547
|
-
gumbo_vector_pop(parser, &parser->_parser_state->_template_insertion_modes);
|
548
|
-
}
|
549
|
-
|
550
|
-
static GumboInsertionMode get_current_template_insertion_mode(
|
551
|
-
GumboParser* parser) {
|
552
|
-
GumboVector* template_insertion_modes =
|
553
|
-
&parser->_parser_state->_template_insertion_modes;
|
554
|
-
assert(template_insertion_modes->length > 0);
|
555
|
-
return (GumboInsertionMode) template_insertion_modes->data[
|
556
|
-
template_insertion_modes->length - 1];
|
557
|
-
}
|
558
|
-
|
559
540
|
static void set_insertion_mode(GumboParser* parser, GumboInsertionMode mode) {
|
560
541
|
parser->_parser_state->_insertion_mode = mode;
|
561
542
|
}
|
@@ -1735,10 +1716,10 @@ static bool maybe_add_doctype_error(
|
|
1735
1716
|
GumboParser* parser, const GumboToken* token) {
|
1736
1717
|
const GumboTokenDocType* doctype = &token->v.doc_type;
|
1737
1718
|
bool html_doctype = !strcmp(doctype->name, kDoctypeHtml.data);
|
1738
|
-
if (!html_doctype ||
|
1739
|
-
|
1740
|
-
|
1741
|
-
doctype->system_identifier, kSystemIdLegacyCompat.data))
|
1719
|
+
if ((!html_doctype ||
|
1720
|
+
doctype->has_public_identifier ||
|
1721
|
+
(doctype->has_system_identifier && !strcmp(
|
1722
|
+
doctype->system_identifier, kSystemIdLegacyCompat.data))) &&
|
1742
1723
|
!(html_doctype && (
|
1743
1724
|
doctype_matches(doctype, &kPublicIdHtml4_0,
|
1744
1725
|
&kSystemIdRecHtml4_0, true) ||
|
@@ -2372,6 +2353,13 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2372
2353
|
node = pop_current_node(parser);
|
2373
2354
|
} while (node != state->_open_elements.data[1]);
|
2374
2355
|
|
2356
|
+
// Removing & destroying the body node is going to kill any nodes that have
|
2357
|
+
// been added to the list of active formatting elements, and so we should
|
2358
|
+
// clear it to prevent a use-after-free if the list of active formatting
|
2359
|
+
// elements is reconstructed afterwards. This may happen if whitespace
|
2360
|
+
// follows the </frameset>.
|
2361
|
+
clear_active_formatting_elements(parser);
|
2362
|
+
|
2375
2363
|
// Remove the body node. We may want to factor this out into a generic
|
2376
2364
|
// helper, but right now this is the only code that needs to do this.
|
2377
2365
|
GumboVector* children = &parser->_output->root->v.element.children;
|
@@ -3543,6 +3531,10 @@ static bool handle_after_frameset(GumboParser* parser, GumboToken* token) {
|
|
3543
3531
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_HTML)) {
|
3544
3532
|
return handle_in_body(parser, token);
|
3545
3533
|
} else if (tag_is(token, kEndTag, GUMBO_TAG_HTML)) {
|
3534
|
+
GumboNode* html = parser->_parser_state->_open_elements.data[0];
|
3535
|
+
assert(node_tag_is(html, GUMBO_TAG_HTML));
|
3536
|
+
record_end_of_element(
|
3537
|
+
parser->_parser_state->_current_token, &html->v.element);
|
3546
3538
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_AFTER_AFTER_FRAMESET);
|
3547
3539
|
return true;
|
3548
3540
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_NOFRAMES)) {
|
@@ -442,7 +442,8 @@ static void finish_token(GumboParser* parser, GumboToken* token) {
|
|
442
442
|
reset_token_start_point(tokenizer);
|
443
443
|
token->original_text.length =
|
444
444
|
tokenizer->_token_start - token->original_text.data;
|
445
|
-
if (token->original_text.length > 0 &&
|
445
|
+
if (token->original_text.length > 0 &&
|
446
|
+
token->original_text.data[token->original_text.length - 1] == '\r') {
|
446
447
|
// The UTF8 iterator will ignore carriage returns in the input stream, which
|
447
448
|
// means that the next token may start one past a \r character. The pointer
|
448
449
|
// arithmetic above results in that \r being appended to the original text
|
@@ -815,6 +816,7 @@ static void finish_attribute_value(GumboParser* parser) {
|
|
815
816
|
// Duplicate attribute name detected in an earlier state, so we have to
|
816
817
|
// ignore the value.
|
817
818
|
tag_state->_drop_next_attr_value = false;
|
819
|
+
reinitialize_tag_buffer(parser);
|
818
820
|
return;
|
819
821
|
}
|
820
822
|
|
data/gumbo-parser/src/utf8.c
CHANGED
@@ -208,6 +208,12 @@ void utf8iterator_init(
|
|
208
208
|
}
|
209
209
|
|
210
210
|
void utf8iterator_next(Utf8Iterator* iter) {
|
211
|
+
if (iter->_current == -1) {
|
212
|
+
// If we're already at EOF, bail out before advancing anything to avoid
|
213
|
+
// reading past the end of the buffer. It's easier to catch this case here
|
214
|
+
// than litter the code with lots of individual checks for EOF.
|
215
|
+
return;
|
216
|
+
}
|
211
217
|
iter->_start += iter->_width;
|
212
218
|
// We update positions based on the *last* character read, so that the first
|
213
219
|
// character following a newline is at column 1 in the next line.
|
data/gumbo-parser/src/util.c
CHANGED
@@ -36,7 +36,7 @@ void* gumbo_parser_allocate(GumboParser* parser, size_t num_bytes) {
|
|
36
36
|
}
|
37
37
|
|
38
38
|
void gumbo_parser_deallocate(GumboParser* parser, void* ptr) {
|
39
|
-
|
39
|
+
parser->_options->deallocator(parser->_options->userdata, ptr);
|
40
40
|
}
|
41
41
|
|
42
42
|
char* gumbo_copy_stringz(GumboParser* parser, const char* str) {
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: nokogumbo
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.1.
|
4
|
+
version: 1.1.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sam Ruby
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2014-03-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|