nokogumbo 2.0.2 → 2.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +18 -2
- data/ext/nokogumbo/nokogumbo.c +49 -11
- data/gumbo-parser/src/gumbo.h +19 -0
- data/gumbo-parser/src/parser.c +3 -0
- data/gumbo-parser/src/tokenizer.c +11 -1
- data/gumbo-parser/src/utf8.c +5 -0
- data/gumbo-parser/src/utf8.h +1 -0
- data/lib/nokogumbo.rb +3 -0
- data/lib/nokogumbo/html5.rb +15 -14
- data/lib/nokogumbo/html5/document.rb +7 -2
- data/lib/nokogumbo/html5/document_fragment.rb +2 -1
- data/lib/nokogumbo/version.rb +1 -1
- metadata +6 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e4694cf3eefbeee2a55cd4bb355b7ec6159c64eac4454dff02b1fbf7e5e8375a
|
4
|
+
data.tar.gz: 67832a7c26148f59755360758fcc0b0c1969949bf1e5a1b27f5cabe4b9e8b40b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3a415817caaf0c3c03037664bda8ed8aa17cc14419e75672dcaa2e2a7dd6d9a20e6ab59095a2295f90da5e45de2c3d72f9a25557533836d55dc67966fe8c7a14
|
7
|
+
data.tar.gz: 8dc8f9f2d55936a63097301dc5eb6fb54ed1e4c274b03cdcd6f45e2b4ac2cdc911a54e8e5838ce468820ebade731a62f7cfc167817528fd0adb415087ce924b6
|
data/README.md
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
# Nokogumbo - a Nokogiri interface to the Gumbo HTML5 parser.
|
2
2
|
|
3
|
-
Nokogumbo provides the ability for a Ruby program to invoke
|
4
|
-
[Gumbo HTML5 parser](https://github.com/
|
3
|
+
Nokogumbo provides the ability for a Ruby program to invoke
|
4
|
+
[our version of the Gumbo HTML5 parser](https://github.com/rubys/nokogumbo/tree/master/gumbo-parser/src)
|
5
5
|
and to access the result as a
|
6
6
|
[Nokogiri::HTML::Document](http://rdoc.info/github/sparklemotion/nokogiri/Nokogiri/HTML/Document).
|
7
7
|
|
@@ -128,6 +128,22 @@ doc = Nokogiri.HTML5(html)
|
|
128
128
|
doc = Nokogiri.HTML5(html, max_tree_depth: -1)
|
129
129
|
```
|
130
130
|
|
131
|
+
### Attribute limit per element
|
132
|
+
The maximum number of attributes per DOM element is configurable by the
|
133
|
+
`:max_attributes` option. If a given element would exceed this limit, then an
|
134
|
+
[ArgumentError](https://ruby-doc.org/core-2.5.0/ArgumentError.html) is thrown.
|
135
|
+
|
136
|
+
This limit (which defaults to `Nokogumbo::DEFAULT_MAX_ATTRIBUTES = 400`) can
|
137
|
+
be removed by giving the option `max_attributes: -1`.
|
138
|
+
|
139
|
+
``` ruby
|
140
|
+
html = '<!DOCTYPE html><div ' + (1..1000).map { |x| "attr-#{x}" }.join(' ') + '>'
|
141
|
+
# "<!DOCTYPE html><div attr-1 attr-2 attr-3 ... attr-1000>"
|
142
|
+
doc = Nokogiri.HTML5(html)
|
143
|
+
# raises ArgumentError: Attributes per element limit exceeded
|
144
|
+
doc = Nokogiri.HTML5(html, max_attributes: -1)
|
145
|
+
```
|
146
|
+
|
131
147
|
## HTML Serialization
|
132
148
|
|
133
149
|
After parsing HTML, it may be serialized using any of the Nokogiri
|
data/ext/nokogumbo/nokogumbo.c
CHANGED
@@ -281,6 +281,7 @@ static GumboOutput *perform_parse(const GumboOptions *options, VALUE input) {
|
|
281
281
|
switch (output->status) {
|
282
282
|
case GUMBO_STATUS_OK:
|
283
283
|
break;
|
284
|
+
case GUMBO_STATUS_TOO_MANY_ATTRIBUTES:
|
284
285
|
case GUMBO_STATUS_TREE_TOO_DEEP:
|
285
286
|
gumbo_destroy_output(output);
|
286
287
|
rb_raise(rb_eArgError, "%s", status_string);
|
@@ -479,19 +480,43 @@ typedef struct {
|
|
479
480
|
xmlDocPtr doc;
|
480
481
|
} ParseArgs;
|
481
482
|
|
482
|
-
static
|
483
|
+
static void parse_args_mark(void *parse_args) {
|
484
|
+
ParseArgs *args = parse_args;
|
485
|
+
rb_gc_mark_maybe(args->input);
|
486
|
+
rb_gc_mark_maybe(args->url_or_frag);
|
487
|
+
}
|
488
|
+
|
489
|
+
// Wrap a ParseArgs pointer. The underlying ParseArgs must outlive the
|
490
|
+
// wrapper.
|
491
|
+
static VALUE wrap_parse_args(ParseArgs *args) {
|
492
|
+
return Data_Wrap_Struct(rb_cData, parse_args_mark, RUBY_NEVER_FREE, args);
|
493
|
+
}
|
494
|
+
|
495
|
+
// Returnsd the underlying ParseArgs wrapped by wrap_parse_args.
|
496
|
+
static ParseArgs *unwrap_parse_args(VALUE obj) {
|
497
|
+
ParseArgs *args;
|
498
|
+
Data_Get_Struct(obj, ParseArgs, args);
|
499
|
+
return args;
|
500
|
+
}
|
501
|
+
|
502
|
+
static VALUE parse_cleanup(VALUE parse_args) {
|
503
|
+
ParseArgs *args = unwrap_parse_args(parse_args);
|
483
504
|
gumbo_destroy_output(args->output);
|
505
|
+
// Make sure garbage collection doesn't mark the objects as being live based
|
506
|
+
// on references from the ParseArgs. This may be unnecessary.
|
507
|
+
args->input = Qnil;
|
508
|
+
args->url_or_frag = Qnil;
|
484
509
|
if (args->doc != NIL)
|
485
510
|
xmlFreeDoc(args->doc);
|
486
511
|
return Qnil;
|
487
512
|
}
|
488
513
|
|
489
|
-
|
490
|
-
static VALUE parse_continue(ParseArgs *args);
|
514
|
+
static VALUE parse_continue(VALUE parse_args);
|
491
515
|
|
492
516
|
// Parse a string using gumbo_parse into a Nokogiri document
|
493
|
-
static VALUE parse(VALUE self, VALUE input, VALUE url, VALUE max_errors, VALUE max_depth) {
|
517
|
+
static VALUE parse(VALUE self, VALUE input, VALUE url, VALUE max_attributes, VALUE max_errors, VALUE max_depth) {
|
494
518
|
GumboOptions options = kGumboDefaultOptions;
|
519
|
+
options.max_attributes = NUM2INT(max_attributes);
|
495
520
|
options.max_errors = NUM2INT(max_errors);
|
496
521
|
options.max_tree_depth = NUM2INT(max_depth);
|
497
522
|
|
@@ -502,10 +527,13 @@ static VALUE parse(VALUE self, VALUE input, VALUE url, VALUE max_errors, VALUE m
|
|
502
527
|
.url_or_frag = url,
|
503
528
|
.doc = NIL,
|
504
529
|
};
|
505
|
-
|
530
|
+
VALUE parse_args = wrap_parse_args(&args);
|
531
|
+
|
532
|
+
return rb_ensure(parse_continue, parse_args, parse_cleanup, parse_args);
|
506
533
|
}
|
507
534
|
|
508
|
-
static VALUE parse_continue(
|
535
|
+
static VALUE parse_continue(VALUE parse_args) {
|
536
|
+
ParseArgs *args = unwrap_parse_args(parse_args);
|
509
537
|
GumboOutput *output = args->output;
|
510
538
|
xmlDocPtr doc;
|
511
539
|
if (output->document->v.document.has_doctype) {
|
@@ -563,13 +591,14 @@ static xmlNodePtr extract_xml_node(VALUE node) {
|
|
563
591
|
#endif
|
564
592
|
}
|
565
593
|
|
566
|
-
static VALUE fragment_continue(
|
594
|
+
static VALUE fragment_continue(VALUE parse_args);
|
567
595
|
|
568
596
|
static VALUE fragment (
|
569
597
|
VALUE self,
|
570
598
|
VALUE doc_fragment,
|
571
599
|
VALUE tags,
|
572
600
|
VALUE ctx,
|
601
|
+
VALUE max_attributes,
|
573
602
|
VALUE max_errors,
|
574
603
|
VALUE max_depth
|
575
604
|
) {
|
@@ -676,6 +705,7 @@ static VALUE fragment (
|
|
676
705
|
// Perform a fragment parse.
|
677
706
|
int depth = NUM2INT(max_depth);
|
678
707
|
GumboOptions options = kGumboDefaultOptions;
|
708
|
+
options.max_attributes = NUM2INT(max_attributes);
|
679
709
|
options.max_errors = NUM2INT(max_errors);
|
680
710
|
// Add one to account for the HTML element.
|
681
711
|
options.max_tree_depth = depth < 0 ? -1 : (depth + 1);
|
@@ -692,11 +722,13 @@ static VALUE fragment (
|
|
692
722
|
.url_or_frag = doc_fragment,
|
693
723
|
.doc = (xmlDocPtr)extract_xml_node(doc),
|
694
724
|
};
|
695
|
-
|
725
|
+
VALUE parse_args = wrap_parse_args(&args);
|
726
|
+
rb_ensure(fragment_continue, parse_args, parse_cleanup, parse_args);
|
696
727
|
return Qnil;
|
697
728
|
}
|
698
729
|
|
699
|
-
static VALUE fragment_continue(
|
730
|
+
static VALUE fragment_continue(VALUE parse_args) {
|
731
|
+
ParseArgs *args = unwrap_parse_args(parse_args);
|
700
732
|
GumboOutput *output = args->output;
|
701
733
|
VALUE doc_fragment = args->url_or_frag;
|
702
734
|
xmlDocPtr xml_doc = args->doc;
|
@@ -720,10 +752,15 @@ void Init_nokogumbo() {
|
|
720
752
|
VALUE mNokogiri = rb_const_get(rb_cObject, rb_intern_const("Nokogiri"));
|
721
753
|
VALUE mNokogiriXml = rb_const_get(mNokogiri, rb_intern_const("XML"));
|
722
754
|
cNokogiriXmlSyntaxError = rb_const_get(mNokogiriXml, rb_intern_const("SyntaxError"));
|
755
|
+
rb_gc_register_mark_object(cNokogiriXmlSyntaxError);
|
723
756
|
cNokogiriXmlElement = rb_const_get(mNokogiriXml, rb_intern_const("Element"));
|
757
|
+
rb_gc_register_mark_object(cNokogiriXmlElement);
|
724
758
|
cNokogiriXmlText = rb_const_get(mNokogiriXml, rb_intern_const("Text"));
|
759
|
+
rb_gc_register_mark_object(cNokogiriXmlText);
|
725
760
|
cNokogiriXmlCData = rb_const_get(mNokogiriXml, rb_intern_const("CDATA"));
|
761
|
+
rb_gc_register_mark_object(cNokogiriXmlCData);
|
726
762
|
cNokogiriXmlComment = rb_const_get(mNokogiriXml, rb_intern_const("Comment"));
|
763
|
+
rb_gc_register_mark_object(cNokogiriXmlComment);
|
727
764
|
|
728
765
|
// Interned symbols.
|
729
766
|
new = rb_intern_const("new");
|
@@ -736,6 +773,7 @@ void Init_nokogumbo() {
|
|
736
773
|
// Class constants.
|
737
774
|
VALUE HTML5 = rb_const_get(mNokogiri, rb_intern_const("HTML5"));
|
738
775
|
Document = rb_const_get(HTML5, rb_intern_const("Document"));
|
776
|
+
rb_gc_register_mark_object(Document);
|
739
777
|
|
740
778
|
// Interned symbols.
|
741
779
|
internal_subset = rb_intern_const("internal_subset");
|
@@ -743,8 +781,8 @@ void Init_nokogumbo() {
|
|
743
781
|
|
744
782
|
// Define Nokogumbo module with parse and fragment methods.
|
745
783
|
VALUE Gumbo = rb_define_module("Nokogumbo");
|
746
|
-
rb_define_singleton_method(Gumbo, "parse", parse,
|
747
|
-
rb_define_singleton_method(Gumbo, "fragment", fragment,
|
784
|
+
rb_define_singleton_method(Gumbo, "parse", parse, 5);
|
785
|
+
rb_define_singleton_method(Gumbo, "fragment", fragment, 6);
|
748
786
|
|
749
787
|
// Add private constant for testing.
|
750
788
|
rb_define_const(Gumbo, "LINE_SUPPORTED", line_supported);
|
data/gumbo-parser/src/gumbo.h
CHANGED
@@ -706,6 +706,15 @@ typedef struct GumboInternalOptions {
|
|
706
706
|
*/
|
707
707
|
bool stop_on_first_error;
|
708
708
|
|
709
|
+
/**
|
710
|
+
* Maximum allowed number of attributes per element. If this limit is
|
711
|
+
* exceeded, the parser will return early with a partial document and
|
712
|
+
* the returned `GumboOutput` will have its `status` field set to
|
713
|
+
* `GUMBO_STATUS_TOO_MANY_ATTRIBUTES`. Set to `-1` to disable the limit.
|
714
|
+
* Default: `400`.
|
715
|
+
*/
|
716
|
+
int max_attributes;
|
717
|
+
|
709
718
|
/**
|
710
719
|
* Maximum allowed depth for the parse tree. If this limit is exceeded,
|
711
720
|
* the parser will return early with a partial document and the returned
|
@@ -796,6 +805,16 @@ typedef enum {
|
|
796
805
|
*/
|
797
806
|
GUMBO_STATUS_TREE_TOO_DEEP,
|
798
807
|
|
808
|
+
/**
|
809
|
+
* Indicates that the maximum number of attributes per element
|
810
|
+
* (`GumboOptions::max_attributes`) was reached during parsing. The
|
811
|
+
* resulting tree will be a partial document, with no further nodes
|
812
|
+
* created after the point where the limit was reached. The partial
|
813
|
+
* document may be useful for constructing an error message but
|
814
|
+
* typically shouldn't be used for other purposes.
|
815
|
+
*/
|
816
|
+
GUMBO_STATUS_TOO_MANY_ATTRIBUTES,
|
817
|
+
|
799
818
|
// Currently unused
|
800
819
|
GUMBO_STATUS_OUT_OF_MEMORY,
|
801
820
|
} GumboOutputStatus;
|
data/gumbo-parser/src/parser.c
CHANGED
@@ -48,6 +48,7 @@ typedef uint8_t TagSet[GUMBO_TAG_LAST + 1];
|
|
48
48
|
const GumboOptions kGumboDefaultOptions = {
|
49
49
|
.tab_stop = 8,
|
50
50
|
.stop_on_first_error = false,
|
51
|
+
.max_attributes = 400,
|
51
52
|
.max_tree_depth = 400,
|
52
53
|
.max_errors = -1,
|
53
54
|
.fragment_context = NULL,
|
@@ -4858,6 +4859,8 @@ const char* gumbo_status_to_string(GumboOutputStatus status) {
|
|
4858
4859
|
return "OK";
|
4859
4860
|
case GUMBO_STATUS_OUT_OF_MEMORY:
|
4860
4861
|
return "System allocator returned NULL during parsing";
|
4862
|
+
case GUMBO_STATUS_TOO_MANY_ATTRIBUTES:
|
4863
|
+
return "Attributes per element limit exceeded";
|
4861
4864
|
case GUMBO_STATUS_TREE_TOO_DEEP:
|
4862
4865
|
return "Document tree depth limit exceeded";
|
4863
4866
|
default:
|
@@ -784,12 +784,22 @@ static void add_duplicate_attr_error(GumboParser* parser) {
|
|
784
784
|
static void finish_attribute_name(GumboParser* parser) {
|
785
785
|
GumboTokenizerState* tokenizer = parser->_tokenizer_state;
|
786
786
|
GumboTagState* tag_state = &tokenizer->_tag_state;
|
787
|
+
GumboVector* /* GumboAttribute* */ attributes = &tag_state->_attributes;
|
788
|
+
|
789
|
+
int max_attributes = parser->_options->max_attributes;
|
790
|
+
if (unlikely(max_attributes >= 0 && attributes->length >= (unsigned int) max_attributes)) {
|
791
|
+
parser->_output->status = GUMBO_STATUS_TOO_MANY_ATTRIBUTES;
|
792
|
+
gumbo_debug("Attributes limit exceeded.\n");
|
793
|
+
reinitialize_tag_buffer(parser);
|
794
|
+
tag_state->_drop_next_attr_value = true;
|
795
|
+
return;
|
796
|
+
}
|
797
|
+
|
787
798
|
// May've been set by a previous attribute without a value; reset it here.
|
788
799
|
tag_state->_drop_next_attr_value = false;
|
789
800
|
assert(tag_state->_attributes.data);
|
790
801
|
assert(tag_state->_attributes.capacity);
|
791
802
|
|
792
|
-
GumboVector* /* GumboAttribute* */ attributes = &tag_state->_attributes;
|
793
803
|
for (unsigned int i = 0; i < attributes->length; ++i) {
|
794
804
|
GumboAttribute* attr = attributes->data[i];
|
795
805
|
if (
|
data/gumbo-parser/src/utf8.c
CHANGED
@@ -193,6 +193,11 @@ void utf8iterator_init (
|
|
193
193
|
iter->_pos.offset = 0;
|
194
194
|
iter->_parser = parser;
|
195
195
|
read_char(iter);
|
196
|
+
if (iter->_current == kUtf8BomChar) {
|
197
|
+
iter->_start += iter->_width;
|
198
|
+
iter->_pos.offset += iter->_width;
|
199
|
+
read_char(iter);
|
200
|
+
}
|
196
201
|
}
|
197
202
|
|
198
203
|
void utf8iterator_next(Utf8Iterator* iter) {
|
data/gumbo-parser/src/utf8.h
CHANGED
data/lib/nokogumbo.rb
CHANGED
@@ -5,6 +5,9 @@ require 'nokogumbo/html5'
|
|
5
5
|
require 'nokogumbo/nokogumbo'
|
6
6
|
|
7
7
|
module Nokogumbo
|
8
|
+
# The default maximum number of attributes per element.
|
9
|
+
DEFAULT_MAX_ATTRIBUTES = 400
|
10
|
+
|
8
11
|
# The default maximum number of errors for parsing a document or a fragment.
|
9
12
|
DEFAULT_MAX_ERRORS = 0
|
10
13
|
|
data/lib/nokogumbo/html5.rb
CHANGED
@@ -19,7 +19,7 @@ module Nokogiri
|
|
19
19
|
|
20
20
|
# Parse an HTML 5 document. Convenience method for Nokogiri::HTML5::Document.parse
|
21
21
|
def self.parse(string, url = nil, encoding = nil, **options, &block)
|
22
|
-
Document.parse(string, url, encoding, options, &block)
|
22
|
+
Document.parse(string, url, encoding, **options, &block)
|
23
23
|
end
|
24
24
|
|
25
25
|
# Parse a fragment from +string+. Convenience method for
|
@@ -92,19 +92,20 @@ module Nokogiri
|
|
92
92
|
if encoding.nil?
|
93
93
|
string = string.read
|
94
94
|
else
|
95
|
-
|
95
|
+
string = string.read(encoding: encoding)
|
96
96
|
end
|
97
97
|
else
|
98
98
|
# Otherwise the string has the given encoding.
|
99
|
-
|
99
|
+
string = string.to_str
|
100
|
+
if encoding
|
100
101
|
string = string.dup
|
101
102
|
string.force_encoding(encoding)
|
102
103
|
end
|
103
104
|
end
|
104
105
|
|
105
|
-
# convert to UTF-8
|
106
|
-
if string.
|
107
|
-
string = reencode(string
|
106
|
+
# convert to UTF-8
|
107
|
+
if string.encoding != Encoding::UTF_8
|
108
|
+
string = reencode(string)
|
108
109
|
end
|
109
110
|
string
|
110
111
|
end
|
@@ -123,18 +124,17 @@ module Nokogiri
|
|
123
124
|
# http://www.w3.org/TR/html5/syntax.html#determining-the-character-encoding
|
124
125
|
#
|
125
126
|
def self.reencode(body, content_type=nil)
|
126
|
-
return body unless body.respond_to? :encoding
|
127
|
-
|
128
127
|
if body.encoding == Encoding::ASCII_8BIT
|
129
128
|
encoding = nil
|
130
129
|
|
131
130
|
# look for a Byte Order Mark (BOM)
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
131
|
+
initial_bytes = body[0..2].bytes
|
132
|
+
if initial_bytes[0..2] == [0xEF, 0xBB, 0xBF]
|
133
|
+
encoding = Encoding::UTF_8
|
134
|
+
elsif initial_bytes[0..1] == [0xFE, 0xFF]
|
135
|
+
encoding = Encoding::UTF_16BE
|
136
|
+
elsif initial_bytes[0..1] == [0xFF, 0xFE]
|
137
|
+
encoding = Encoding::UTF_16LE
|
138
138
|
end
|
139
139
|
|
140
140
|
# look for a charset in a content-encoding header
|
@@ -154,6 +154,7 @@ module Nokogiri
|
|
154
154
|
encoding ||= Encoding::ISO_8859_1
|
155
155
|
|
156
156
|
# change the encoding to match the detected or inferred encoding
|
157
|
+
body = body.dup
|
157
158
|
begin
|
158
159
|
body.force_encoding(encoding)
|
159
160
|
rescue ArgumentError
|
@@ -12,6 +12,9 @@ module Nokogiri
|
|
12
12
|
if string_or_io.respond_to?(:read) && string_or_io.respond_to?(:path)
|
13
13
|
url ||= string_or_io.path
|
14
14
|
end
|
15
|
+
unless string_or_io.respond_to?(:read) || string_or_io.respond_to?(:to_str)
|
16
|
+
raise ArgumentError.new("not a string or IO object")
|
17
|
+
end
|
15
18
|
do_parse(string_or_io, url, encoding, options)
|
16
19
|
end
|
17
20
|
|
@@ -21,7 +24,8 @@ module Nokogiri
|
|
21
24
|
end
|
22
25
|
|
23
26
|
def self.read_memory(string, url = nil, encoding = nil, **options)
|
24
|
-
|
27
|
+
raise ArgumentError.new("string object doesn't respond to :to_str") unless string.respond_to?(:to_str)
|
28
|
+
do_parse(string, url, encoding, options)
|
25
29
|
end
|
26
30
|
|
27
31
|
def fragment(tags = nil)
|
@@ -37,9 +41,10 @@ module Nokogiri
|
|
37
41
|
private
|
38
42
|
def self.do_parse(string_or_io, url, encoding, options)
|
39
43
|
string = HTML5.read_and_encode(string_or_io, encoding)
|
44
|
+
max_attributes = options[:max_attributes] || Nokogumbo::DEFAULT_MAX_ATTRIBUTES
|
40
45
|
max_errors = options[:max_errors] || options[:max_parse_errors] || Nokogumbo::DEFAULT_MAX_ERRORS
|
41
46
|
max_depth = options[:max_tree_depth] || Nokogumbo::DEFAULT_MAX_TREE_DEPTH
|
42
|
-
doc = Nokogumbo.parse(string
|
47
|
+
doc = Nokogumbo.parse(string, url, max_attributes, max_errors, max_depth)
|
43
48
|
doc.encoding = 'UTF-8'
|
44
49
|
doc
|
45
50
|
end
|
@@ -12,10 +12,11 @@ module Nokogiri
|
|
12
12
|
self.errors = []
|
13
13
|
return self unless tags
|
14
14
|
|
15
|
+
max_attributes = options[:max_attributes] || Nokogumbo::DEFAULT_MAX_ATTRIBUTES
|
15
16
|
max_errors = options[:max_errors] || Nokogumbo::DEFAULT_MAX_ERRORS
|
16
17
|
max_depth = options[:max_tree_depth] || Nokogumbo::DEFAULT_MAX_TREE_DEPTH
|
17
18
|
tags = Nokogiri::HTML5.read_and_encode(tags, nil)
|
18
|
-
Nokogumbo.fragment(self, tags, ctx, max_errors, max_depth)
|
19
|
+
Nokogumbo.fragment(self, tags, ctx, max_attributes, max_errors, max_depth)
|
19
20
|
end
|
20
21
|
|
21
22
|
def serialize(options = {}, &block)
|
data/lib/nokogumbo/version.rb
CHANGED
metadata
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: nokogumbo
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.0.
|
4
|
+
version: 2.0.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sam Ruby
|
8
8
|
- Stephen Checkoway
|
9
|
-
autorequire:
|
9
|
+
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2020-11-22 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: nokogiri
|
@@ -94,7 +94,7 @@ metadata:
|
|
94
94
|
changelog_uri: https://github.com/rubys/nokogumbo/blob/master/CHANGELOG.md
|
95
95
|
homepage_uri: https://github.com/rubys/nokogumbo/#readme
|
96
96
|
source_code_uri: https://github.com/rubys/nokogumbo
|
97
|
-
post_install_message:
|
97
|
+
post_install_message:
|
98
98
|
rdoc_options: []
|
99
99
|
require_paths:
|
100
100
|
- lib
|
@@ -109,8 +109,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
109
109
|
- !ruby/object:Gem::Version
|
110
110
|
version: '0'
|
111
111
|
requirements: []
|
112
|
-
rubygems_version: 3.
|
113
|
-
signing_key:
|
112
|
+
rubygems_version: 3.1.2
|
113
|
+
signing_key:
|
114
114
|
specification_version: 4
|
115
115
|
summary: Nokogiri interface to the Gumbo HTML5 parser
|
116
116
|
test_files: []
|