nokogumbo 1.3.0 → 1.4.1
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +1 -1
- data/ext/nokogumboc/nokogumbo.c +1 -0
- data/gumbo-parser/src/error.c +6 -3
- data/gumbo-parser/src/gumbo.h +36 -170
- data/gumbo-parser/src/parser.c +1030 -779
- data/gumbo-parser/src/string_buffer.c +8 -1
- data/gumbo-parser/src/string_buffer.h +5 -0
- data/gumbo-parser/src/tag.c +35 -162
- data/gumbo-parser/src/tag.in +150 -0
- data/gumbo-parser/src/tag_enum.h +150 -0
- data/gumbo-parser/src/tag_gperf.h +343 -0
- data/gumbo-parser/src/tag_sizes.h +1 -0
- data/gumbo-parser/src/tag_strings.h +150 -0
- data/gumbo-parser/src/token_type.h +1 -0
- data/gumbo-parser/src/tokenizer.c +29 -21
- data/gumbo-parser/src/utf8.c +9 -8
- data/gumbo-parser/src/vector.c +1 -1
- data/gumbo-parser/visualc/include/strings.h +2 -1
- data/test-nokogumbo.rb +140 -0
- metadata +16 -10
@@ -136,6 +136,10 @@ typedef struct GumboInternalTokenizerState {
|
|
136
136
|
// markup declaration state.
|
137
137
|
bool _is_current_node_foreign;
|
138
138
|
|
139
|
+
// A flag indicating whether the tokenizer is in a CDATA section. If so, then
|
140
|
+
// text tokens emitted will be GUMBO_TOKEN_CDATA.
|
141
|
+
bool _is_in_cdata;
|
142
|
+
|
139
143
|
// Certain states (notably character references) may emit two character tokens
|
140
144
|
// at once, but the contract for lex() fills in only one token at a time. The
|
141
145
|
// extra character is buffered here, and then this is checked on entry to
|
@@ -315,7 +319,11 @@ static int ensure_lowercase(int c) {
|
|
315
319
|
return c >= 'A' && c <= 'Z' ? c + 0x20 : c;
|
316
320
|
}
|
317
321
|
|
318
|
-
static GumboTokenType get_char_token_type(int c) {
|
322
|
+
static GumboTokenType get_char_token_type(bool is_in_cdata, int c) {
|
323
|
+
if (is_in_cdata && c > 0) {
|
324
|
+
return GUMBO_TOKEN_CDATA;
|
325
|
+
}
|
326
|
+
|
319
327
|
switch (c) {
|
320
328
|
case '\t':
|
321
329
|
case '\n':
|
@@ -348,12 +356,10 @@ static void clear_temporary_buffer(GumboParser* parser) {
|
|
348
356
|
GumboTokenizerState* tokenizer = parser->_tokenizer_state;
|
349
357
|
assert(!tokenizer->_temporary_buffer_emit);
|
350
358
|
utf8iterator_mark(&tokenizer->_input);
|
351
|
-
|
352
|
-
gumbo_string_buffer_init(parser, &tokenizer->_temporary_buffer);
|
359
|
+
gumbo_string_buffer_clear(parser, &tokenizer->_temporary_buffer);
|
353
360
|
// The temporary buffer and script data buffer are the same object in the
|
354
361
|
// spec, so the script data buffer should be cleared as well.
|
355
|
-
|
356
|
-
gumbo_string_buffer_init(parser, &tokenizer->_script_data_buffer);
|
362
|
+
gumbo_string_buffer_clear(parser, &tokenizer->_script_data_buffer);
|
357
363
|
}
|
358
364
|
|
359
365
|
// Appends a codepoint to the temporary buffer.
|
@@ -475,7 +481,7 @@ static void finish_doctype_system_id(GumboParser* parser) {
|
|
475
481
|
|
476
482
|
// Writes a single specified character to the output token.
|
477
483
|
static void emit_char(GumboParser* parser, int c, GumboToken* output) {
|
478
|
-
output->type = get_char_token_type(c);
|
484
|
+
output->type = get_char_token_type(parser->_tokenizer_state->_is_in_cdata, c);
|
479
485
|
output->v.character = c;
|
480
486
|
finish_token(parser, output);
|
481
487
|
}
|
@@ -689,7 +695,11 @@ static void start_new_tag(GumboParser* parser, bool is_start_tag) {
|
|
689
695
|
gumbo_string_buffer_append_codepoint(parser, c, &tag_state->_buffer);
|
690
696
|
|
691
697
|
assert(tag_state->_attributes.data == NULL);
|
692
|
-
|
698
|
+
// Initial size chosen by statistical analysis of a corpus of 60k webpages.
|
699
|
+
// 99.5% of elements have 0 attributes, 93% of the remainder have 1. These
|
700
|
+
// numbers are a bit higher for more modern websites (eg. ~45% = 0, ~40% = 1
|
701
|
+
// for the HTML5 Spec), but still have basically 99% of nodes with <= 2 attrs.
|
702
|
+
gumbo_vector_init(parser, 1, &tag_state->_attributes);
|
693
703
|
tag_state->_drop_next_attr_value = false;
|
694
704
|
tag_state->_is_start_tag = is_start_tag;
|
695
705
|
tag_state->_is_self_closing = false;
|
@@ -743,11 +753,9 @@ static void finish_tag_name(GumboParser* parser) {
|
|
743
753
|
GumboTokenizerState* tokenizer = parser->_tokenizer_state;
|
744
754
|
GumboTagState* tag_state = &tokenizer->_tag_state;
|
745
755
|
|
746
|
-
|
747
|
-
|
748
|
-
tag_state->_tag = gumbo_tag_enum(temp);
|
756
|
+
tag_state->_tag = gumbo_tagn_enum(
|
757
|
+
tag_state->_buffer.data, tag_state->_buffer.length);
|
749
758
|
reinitialize_tag_buffer(parser);
|
750
|
-
gumbo_parser_deallocate(parser, (void*) temp);
|
751
759
|
}
|
752
760
|
|
753
761
|
// Adds an ERR_DUPLICATE_ATTR parse error to the parser's error struct.
|
@@ -833,13 +841,9 @@ static void finish_attribute_value(GumboParser* parser) {
|
|
833
841
|
static bool is_appropriate_end_tag(GumboParser* parser) {
|
834
842
|
GumboTagState* tag_state = &parser->_tokenizer_state->_tag_state;
|
835
843
|
assert(!tag_state->_is_start_tag);
|
836
|
-
// Null terminate the current string buffer, so it can be passed to
|
837
|
-
// gumbo_tag_enum, but don't increment the length in case we need to dump the
|
838
|
-
// buffer as character tokens.
|
839
|
-
gumbo_string_buffer_append_codepoint(parser, '\0', &tag_state->_buffer);
|
840
|
-
--tag_state->_buffer.length;
|
841
844
|
return tag_state->_last_start_tag != GUMBO_TAG_LAST &&
|
842
|
-
tag_state->_last_start_tag ==
|
845
|
+
tag_state->_last_start_tag ==
|
846
|
+
gumbo_tagn_enum(tag_state->_buffer.data, tag_state->_buffer.length);
|
843
847
|
}
|
844
848
|
|
845
849
|
void gumbo_tokenizer_state_init(
|
@@ -850,6 +854,7 @@ void gumbo_tokenizer_state_init(
|
|
850
854
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
851
855
|
tokenizer->_reconsume_current_input = false;
|
852
856
|
tokenizer->_is_current_node_foreign = false;
|
857
|
+
tokenizer->_is_in_cdata = false;
|
853
858
|
tokenizer->_tag_state._last_start_tag = GUMBO_TAG_LAST;
|
854
859
|
|
855
860
|
tokenizer->_buffered_emit_char = kGumboNoChar;
|
@@ -1588,8 +1593,7 @@ static StateResult handle_script_double_escaped_lt_state(
|
|
1588
1593
|
int c, GumboToken* output) {
|
1589
1594
|
if (c == '/') {
|
1590
1595
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_SCRIPT_DOUBLE_ESCAPED_END);
|
1591
|
-
|
1592
|
-
gumbo_string_buffer_init(parser, &tokenizer->_script_data_buffer);
|
1596
|
+
gumbo_string_buffer_clear(parser, &tokenizer->_script_data_buffer);
|
1593
1597
|
return emit_current_char(parser, output);
|
1594
1598
|
} else {
|
1595
1599
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_SCRIPT_DOUBLE_ESCAPED);
|
@@ -2041,6 +2045,7 @@ static StateResult handle_markup_declaration_state(
|
|
2041
2045
|
utf8iterator_maybe_consume_match(
|
2042
2046
|
&tokenizer->_input, "[CDATA[", sizeof("[CDATA[") - 1, true)) {
|
2043
2047
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_CDATA);
|
2048
|
+
tokenizer->_is_in_cdata = true;
|
2044
2049
|
tokenizer->_reconsume_current_input = true;
|
2045
2050
|
} else {
|
2046
2051
|
tokenizer_add_parse_error(parser, GUMBO_ERR_DASHES_OR_DOCTYPE);
|
@@ -2568,7 +2573,8 @@ static StateResult handle_after_doctype_public_id_state(
|
|
2568
2573
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
2569
2574
|
tokenizer->_reconsume_current_input = true;
|
2570
2575
|
tokenizer->_doc_type_state.force_quirks = true;
|
2571
|
-
|
2576
|
+
emit_doctype(parser, output);
|
2577
|
+
return RETURN_ERROR;
|
2572
2578
|
default:
|
2573
2579
|
tokenizer_add_parse_error(parser, GUMBO_ERR_DOCTYPE_INVALID);
|
2574
2580
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_BOGUS_DOCTYPE);
|
@@ -2813,6 +2819,7 @@ static StateResult handle_cdata_state(
|
|
2813
2819
|
tokenizer->_reconsume_current_input = true;
|
2814
2820
|
reset_token_start_point(tokenizer);
|
2815
2821
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
2822
|
+
tokenizer->_is_in_cdata = false;
|
2816
2823
|
return NEXT_CHAR;
|
2817
2824
|
} else {
|
2818
2825
|
return emit_current_char(parser, output);
|
@@ -2929,7 +2936,8 @@ bool gumbo_lex(GumboParser* parser, GumboToken* output) {
|
|
2929
2936
|
assert(!tokenizer->_temporary_buffer_emit);
|
2930
2937
|
assert(tokenizer->_buffered_emit_char == kGumboNoChar);
|
2931
2938
|
int c = utf8iterator_current(&tokenizer->_input);
|
2932
|
-
gumbo_debug("Lexing character '%c' in state %d.\n",
|
2939
|
+
gumbo_debug("Lexing character '%c' (%d) in state %d.\n",
|
2940
|
+
c, c, tokenizer->_state);
|
2933
2941
|
StateResult result =
|
2934
2942
|
dispatch_table[tokenizer->_state](parser, tokenizer, c, output);
|
2935
2943
|
// We need to clear reconsume_current_input before returning to prevent
|
data/gumbo-parser/src/utf8.c
CHANGED
@@ -133,10 +133,10 @@ static void read_char(Utf8Iterator* iter) {
|
|
133
133
|
decode(&state, &code_point, (uint32_t) (unsigned char) (*c));
|
134
134
|
if (state == UTF8_ACCEPT) {
|
135
135
|
iter->_width = c - iter->_start + 1;
|
136
|
-
// This is the special handling for carriage returns that is mandated by
|
137
|
-
// HTML5 spec. Since we're looking for particular 7-bit literal
|
138
|
-
// we operate in terms of chars and only need a check for iter
|
139
|
-
// instead of having to read in a full next code point.
|
136
|
+
// This is the special handling for carriage returns that is mandated by
|
137
|
+
// the HTML5 spec. Since we're looking for particular 7-bit literal
|
138
|
+
// characters, we operate in terms of chars and only need a check for iter
|
139
|
+
// overrun, instead of having to read in a full next code point.
|
140
140
|
// http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#preprocessing-the-input-stream
|
141
141
|
if (code_point == '\r') {
|
142
142
|
assert(iter->_width == 1);
|
@@ -165,10 +165,11 @@ static void read_char(Utf8Iterator* iter) {
|
|
165
165
|
return;
|
166
166
|
}
|
167
167
|
}
|
168
|
-
// If we got here without exiting early, then we've reached the end of the
|
169
|
-
// Add an error for truncated input, set the width to consume the
|
170
|
-
// iterator, and emit a replacement character. The next time we
|
171
|
-
// it will detect that there's no input to consume and
|
168
|
+
// If we got here without exiting early, then we've reached the end of the
|
169
|
+
// iterator. Add an error for truncated input, set the width to consume the
|
170
|
+
// rest of the iterator, and emit a replacement character. The next time we
|
171
|
+
// enter this method, it will detect that there's no input to consume and
|
172
|
+
// output an EOF.
|
172
173
|
iter->_current = kUtf8ReplacementChar;
|
173
174
|
iter->_width = iter->_end - iter->_start;
|
174
175
|
add_error(iter, GUMBO_ERR_UTF8_TRUNCATED);
|
data/gumbo-parser/src/vector.c
CHANGED
@@ -81,7 +81,7 @@ void* gumbo_vector_pop(
|
|
81
81
|
return vector->data[--vector->length];
|
82
82
|
}
|
83
83
|
|
84
|
-
int gumbo_vector_index_of(GumboVector* vector, void* element) {
|
84
|
+
int gumbo_vector_index_of(GumboVector* vector, const void* element) {
|
85
85
|
for (int i = 0; i < vector->length; ++i) {
|
86
86
|
if (vector->data[i] == element) {
|
87
87
|
return i;
|
data/test-nokogumbo.rb
ADDED
@@ -0,0 +1,140 @@
|
|
1
|
+
$:.unshift('lib')
|
2
|
+
$:.unshift('ext/nokogumboc')
|
3
|
+
|
4
|
+
gem 'minitest'
|
5
|
+
|
6
|
+
require 'nokogumbo'
|
7
|
+
require 'minitest/autorun'
|
8
|
+
|
9
|
+
class TestNokogumbo < Minitest::Test
|
10
|
+
def test_element_text
|
11
|
+
doc = Nokogiri::HTML5(buffer)
|
12
|
+
assert_equal "content", doc.at('span').text
|
13
|
+
end
|
14
|
+
|
15
|
+
def test_element_cdata
|
16
|
+
doc = Nokogiri::HTML5(buffer)
|
17
|
+
assert_equal "foo<x>bar", doc.at('textarea').text.strip
|
18
|
+
end
|
19
|
+
|
20
|
+
def test_attr_value
|
21
|
+
doc = Nokogiri::HTML5(buffer)
|
22
|
+
assert_equal "utf-8", doc.at('meta')['charset']
|
23
|
+
end
|
24
|
+
|
25
|
+
def test_comment
|
26
|
+
doc = Nokogiri::HTML5(buffer)
|
27
|
+
assert_equal " test comment ", doc.xpath('//comment()').text
|
28
|
+
end
|
29
|
+
|
30
|
+
def test_unknown_element
|
31
|
+
doc = Nokogiri::HTML5(buffer)
|
32
|
+
assert_equal "main", doc.at('main').name
|
33
|
+
end
|
34
|
+
|
35
|
+
def test_IO
|
36
|
+
require 'stringio'
|
37
|
+
doc = Nokogiri::HTML5(StringIO.new(buffer))
|
38
|
+
assert_equal 'textarea', doc.at('form').element_children.first.name
|
39
|
+
end
|
40
|
+
|
41
|
+
def test_nil
|
42
|
+
doc = Nokogiri::HTML5(nil)
|
43
|
+
assert_equal 1, doc.search('body').count
|
44
|
+
end
|
45
|
+
|
46
|
+
if ''.respond_to? 'encoding'
|
47
|
+
def test_macroman_encoding
|
48
|
+
mac="<span>\xCA</span>".force_encoding('macroman')
|
49
|
+
doc = Nokogiri::HTML5(mac)
|
50
|
+
assert_equal '<span> </span>', doc.at('span').to_xml
|
51
|
+
end
|
52
|
+
|
53
|
+
def test_iso8859_encoding
|
54
|
+
iso8859="<span>Se\xF1or</span>".force_encoding(Encoding::ASCII_8BIT)
|
55
|
+
doc = Nokogiri::HTML5(iso8859)
|
56
|
+
assert_equal '<span>Señor</span>', doc.at('span').to_xml
|
57
|
+
end
|
58
|
+
|
59
|
+
def test_charset_encoding
|
60
|
+
utf8="<meta charset='utf-8'><span>Se\xC3\xB1or</span>".
|
61
|
+
force_encoding(Encoding::ASCII_8BIT)
|
62
|
+
doc = Nokogiri::HTML5(utf8)
|
63
|
+
assert_equal '<span>Señor</span>', doc.at('span').to_xml
|
64
|
+
end
|
65
|
+
|
66
|
+
def test_bogus_encoding
|
67
|
+
bogus="<meta charset='bogus'><span>Se\xF1or</span>".
|
68
|
+
force_encoding(Encoding::ASCII_8BIT)
|
69
|
+
doc = Nokogiri::HTML5(bogus)
|
70
|
+
assert_equal '<span>Señor</span>', doc.at('span').to_xml
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
def test_html5_doctype
|
75
|
+
doc = Nokogumbo.parse("<!DOCTYPE html><html></html>")
|
76
|
+
assert_match /<!DOCTYPE html>/, doc.to_html
|
77
|
+
end
|
78
|
+
|
79
|
+
def test_fragment_head
|
80
|
+
doc = Nokogiri::HTML5.fragment(buffer[/<head>(.*?)<\/head>/m, 1])
|
81
|
+
assert_equal "hello world", doc.xpath('title').text
|
82
|
+
assert_equal "utf-8", doc.xpath('meta').first['charset']
|
83
|
+
end
|
84
|
+
|
85
|
+
def test_fragment_body
|
86
|
+
doc = Nokogiri::HTML5.fragment(buffer[/<body>(.*?)<\/body>/m, 1])
|
87
|
+
assert_equal '<span>content</span>', doc.xpath('main/span').to_xml
|
88
|
+
assert_equal " test comment ", doc.xpath('comment()').text
|
89
|
+
end
|
90
|
+
|
91
|
+
def test_xlink_attribute
|
92
|
+
source = <<-EOF.gsub(/^ {6}/, '')
|
93
|
+
<svg xmlns="http://www.w3.org/2000/svg">
|
94
|
+
<a xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#s1"/>
|
95
|
+
</svg>
|
96
|
+
EOF
|
97
|
+
doc = Nokogiri::HTML5.fragment(source)
|
98
|
+
a = doc.at('a')
|
99
|
+
assert_equal ["xlink:href", "xmlns:xlink"], a.attributes.keys.sort
|
100
|
+
end
|
101
|
+
|
102
|
+
def test_template
|
103
|
+
source = <<-EOF.gsub(/^ {6}/, '')
|
104
|
+
<template id="productrow">
|
105
|
+
<tr>
|
106
|
+
<td class="record"></td>
|
107
|
+
<td></td>
|
108
|
+
</tr>
|
109
|
+
</template>
|
110
|
+
EOF
|
111
|
+
doc = Nokogiri::HTML5.fragment(source)
|
112
|
+
template = doc.at('template')
|
113
|
+
assert_equal "productrow", template['id']
|
114
|
+
assert_equal "record", template.at('td')['class']
|
115
|
+
end
|
116
|
+
|
117
|
+
private
|
118
|
+
|
119
|
+
def buffer
|
120
|
+
<<-EOF.gsub(/^ /, '')
|
121
|
+
<html>
|
122
|
+
<head>
|
123
|
+
<meta charset="utf-8"/>
|
124
|
+
<title>hello world</title>
|
125
|
+
</head>
|
126
|
+
<body>
|
127
|
+
<h1>hello world</h1>
|
128
|
+
<main>
|
129
|
+
<span>content</span>
|
130
|
+
</main>
|
131
|
+
<!-- test comment -->
|
132
|
+
<form>
|
133
|
+
<textarea>foo<x>bar</textarea>
|
134
|
+
</form>
|
135
|
+
</body>
|
136
|
+
</html>
|
137
|
+
EOF
|
138
|
+
end
|
139
|
+
|
140
|
+
end
|
metadata
CHANGED
@@ -1,32 +1,32 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: nokogumbo
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.3.0
|
5
4
|
prerelease:
|
5
|
+
version: 1.4.1
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
8
8
|
- Sam Ruby
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2015-
|
12
|
+
date: 2015-03-13 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
|
-
|
16
|
-
requirement: !ruby/object:Gem::Requirement
|
17
|
-
none: false
|
15
|
+
version_requirements: !ruby/object:Gem::Requirement
|
18
16
|
requirements:
|
19
17
|
- - ! '>='
|
20
18
|
- !ruby/object:Gem::Version
|
21
19
|
version: '0'
|
20
|
+
none: false
|
22
21
|
type: :runtime
|
22
|
+
name: nokogiri
|
23
23
|
prerelease: false
|
24
|
-
|
25
|
-
none: false
|
24
|
+
requirement: !ruby/object:Gem::Requirement
|
26
25
|
requirements:
|
27
26
|
- - ! '>='
|
28
27
|
- !ruby/object:Gem::Version
|
29
28
|
version: '0'
|
29
|
+
none: false
|
30
30
|
description: Nokogumbo allows a Ruby program to invoke the Gumbo HTML5 parser and
|
31
31
|
access the result as a Nokogiri parsed document.
|
32
32
|
email: rubys@intertwingly.net
|
@@ -56,6 +56,11 @@ files:
|
|
56
56
|
- gumbo-parser/src/string_piece.c
|
57
57
|
- gumbo-parser/src/string_piece.h
|
58
58
|
- gumbo-parser/src/tag.c
|
59
|
+
- gumbo-parser/src/tag.in
|
60
|
+
- gumbo-parser/src/tag_enum.h
|
61
|
+
- gumbo-parser/src/tag_gperf.h
|
62
|
+
- gumbo-parser/src/tag_sizes.h
|
63
|
+
- gumbo-parser/src/tag_strings.h
|
59
64
|
- gumbo-parser/src/token_type.h
|
60
65
|
- gumbo-parser/src/tokenizer.c
|
61
66
|
- gumbo-parser/src/tokenizer.h
|
@@ -67,6 +72,7 @@ files:
|
|
67
72
|
- gumbo-parser/src/vector.c
|
68
73
|
- gumbo-parser/src/vector.h
|
69
74
|
- gumbo-parser/visualc/include/strings.h
|
75
|
+
- test-nokogumbo.rb
|
70
76
|
homepage: https://github.com/rubys/nokogumbo/#readme
|
71
77
|
licenses:
|
72
78
|
- Apache 2.0
|
@@ -75,20 +81,20 @@ rdoc_options: []
|
|
75
81
|
require_paths:
|
76
82
|
- lib
|
77
83
|
required_ruby_version: !ruby/object:Gem::Requirement
|
78
|
-
none: false
|
79
84
|
requirements:
|
80
85
|
- - ! '>='
|
81
86
|
- !ruby/object:Gem::Version
|
82
87
|
version: '0'
|
83
|
-
required_rubygems_version: !ruby/object:Gem::Requirement
|
84
88
|
none: false
|
89
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
85
90
|
requirements:
|
86
91
|
- - ! '>='
|
87
92
|
- !ruby/object:Gem::Version
|
88
93
|
version: '0'
|
94
|
+
none: false
|
89
95
|
requirements: []
|
90
96
|
rubyforge_project:
|
91
|
-
rubygems_version: 1.8.23
|
97
|
+
rubygems_version: 1.8.23.2
|
92
98
|
signing_key:
|
93
99
|
specification_version: 3
|
94
100
|
summary: Nokogiri interface to the Gumbo HTML5 parser
|