nokogumbo 2.0.0.pre.alpha → 2.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +101 -14
- data/ext/nokogumbo/extconf.rb +7 -2
- data/ext/nokogumbo/nokogumbo.c +630 -235
- data/gumbo-parser/src/ascii.c +42 -0
- data/gumbo-parser/src/ascii.h +91 -7
- data/gumbo-parser/src/char_ref.c +5973 -4601
- data/gumbo-parser/src/char_ref.h +13 -28
- data/gumbo-parser/src/error.c +391 -126
- data/gumbo-parser/src/error.h +63 -125
- data/gumbo-parser/src/gumbo.h +74 -4
- data/gumbo-parser/src/parser.c +1161 -1025
- data/gumbo-parser/src/string_buffer.c +1 -1
- data/gumbo-parser/src/string_buffer.h +1 -1
- data/gumbo-parser/src/token_buffer.c +79 -0
- data/gumbo-parser/src/token_buffer.h +71 -0
- data/gumbo-parser/src/tokenizer.c +1440 -1278
- data/gumbo-parser/src/tokenizer.h +7 -18
- data/gumbo-parser/src/tokenizer_states.h +275 -23
- data/gumbo-parser/src/utf8.c +17 -59
- data/gumbo-parser/src/utf8.h +52 -16
- data/lib/nokogumbo.rb +3 -1
- data/lib/nokogumbo/html5.rb +17 -15
- data/lib/nokogumbo/html5/document.rb +19 -3
- data/lib/nokogumbo/html5/document_fragment.rb +36 -20
- data/lib/nokogumbo/{xml → html5}/node.rb +28 -13
- data/lib/nokogumbo/version.rb +1 -1
- metadata +20 -14
- data/CHANGELOG.md +0 -56
data/gumbo-parser/src/utf8.h
CHANGED
@@ -30,7 +30,9 @@ struct GumboInternalError;
|
|
30
30
|
struct GumboInternalParser;
|
31
31
|
|
32
32
|
// Unicode replacement char.
|
33
|
-
|
33
|
+
#define kUtf8ReplacementChar 0xFFFD
|
34
|
+
#define kUtf8BomChar 0xFEFF
|
35
|
+
#define kUtf8MaxChar 0x10FFFF
|
34
36
|
|
35
37
|
typedef struct GumboInternalUtf8Iterator {
|
36
38
|
// Points at the start of the code point most recently read into 'current'.
|
@@ -60,9 +62,23 @@ typedef struct GumboInternalUtf8Iterator {
|
|
60
62
|
struct GumboInternalParser* _parser;
|
61
63
|
} Utf8Iterator;
|
62
64
|
|
63
|
-
// Returns true if this Unicode code point is
|
64
|
-
|
65
|
-
|
65
|
+
// Returns true if this Unicode code point is a surrogate.
|
66
|
+
CONST_FN static inline bool utf8_is_surrogate(int c) {
|
67
|
+
return c >= 0xD800 && c <= 0xDFFF;
|
68
|
+
}
|
69
|
+
|
70
|
+
// Returns true if this Unicode code point is a noncharacter.
|
71
|
+
CONST_FN static inline bool utf8_is_noncharacter(int c) {
|
72
|
+
return
|
73
|
+
(c >= 0xFDD0 && c <= 0xFDEF)
|
74
|
+
|| ((c & 0xFFFF) == 0xFFFE)
|
75
|
+
|| ((c & 0xFFFF) == 0xFFFF);
|
76
|
+
}
|
77
|
+
|
78
|
+
// Returns true if this Unicode code point is a control.
|
79
|
+
CONST_FN static inline bool utf8_is_control(int c) {
|
80
|
+
return ((unsigned int)c < 0x1Fu) || (c >= 0x7F && c <= 0x9F);
|
81
|
+
}
|
66
82
|
|
67
83
|
// Initializes a new Utf8Iterator from the given byte buffer. The source does
|
68
84
|
// not have to be NUL-terminated, but the length must be passed in explicitly.
|
@@ -77,20 +93,47 @@ void utf8iterator_init (
|
|
77
93
|
void utf8iterator_next(Utf8Iterator* iter);
|
78
94
|
|
79
95
|
// Returns the current code point as an integer.
|
80
|
-
int utf8iterator_current(const Utf8Iterator* iter)
|
96
|
+
static inline int utf8iterator_current(const Utf8Iterator* iter) {
|
97
|
+
return iter->_current;
|
98
|
+
}
|
81
99
|
|
82
100
|
// Retrieves and fills the output parameter with the current source position.
|
83
|
-
void utf8iterator_get_position(
|
84
|
-
|
101
|
+
static inline void utf8iterator_get_position (
|
102
|
+
const Utf8Iterator* iter,
|
103
|
+
GumboSourcePosition* output
|
104
|
+
) {
|
105
|
+
*output = iter->_pos;
|
106
|
+
}
|
107
|
+
|
108
|
+
// Retrieves the marked position.
|
109
|
+
static inline GumboSourcePosition utf8iterator_get_mark_position (
|
110
|
+
const Utf8Iterator* iter
|
111
|
+
) {
|
112
|
+
return iter->_mark_pos;
|
113
|
+
}
|
85
114
|
|
86
115
|
// Retrieves a character pointer to the start of the current character.
|
87
|
-
const char* utf8iterator_get_char_pointer(const Utf8Iterator* iter)
|
116
|
+
static inline const char* utf8iterator_get_char_pointer(const Utf8Iterator* iter) {
|
117
|
+
return iter->_start;
|
118
|
+
}
|
119
|
+
|
120
|
+
// Retrieves the width of the current character.
|
121
|
+
static inline size_t utf8iterator_get_width(const Utf8Iterator* iter) {
|
122
|
+
return iter->_width;
|
123
|
+
}
|
88
124
|
|
89
125
|
// Retrieves a character pointer to 1 past the end of the buffer. This is
|
90
126
|
// necessary for certain state machines and string comparisons that would like
|
91
127
|
// to look directly for ASCII text in the buffer without going through the
|
92
128
|
// decoder.
|
93
|
-
const char* utf8iterator_get_end_pointer(const Utf8Iterator* iter)
|
129
|
+
static inline const char* utf8iterator_get_end_pointer(const Utf8Iterator* iter) {
|
130
|
+
return iter->_end;
|
131
|
+
}
|
132
|
+
|
133
|
+
// Retrieves a character pointer to the marked position.
|
134
|
+
static inline const char* utf8iterator_get_mark_pointer(const Utf8Iterator* iter) {
|
135
|
+
return iter->_mark;
|
136
|
+
}
|
94
137
|
|
95
138
|
// If the upcoming text in the buffer matches the specified prefix (which has
|
96
139
|
// length 'length'), consume it and return true. Otherwise, return false with
|
@@ -114,13 +157,6 @@ void utf8iterator_mark(Utf8Iterator* iter);
|
|
114
157
|
// Returns the current input stream position to the mark.
|
115
158
|
void utf8iterator_reset(Utf8Iterator* iter);
|
116
159
|
|
117
|
-
// Sets the position and original text fields of an error to the value at the
|
118
|
-
// mark.
|
119
|
-
void utf8iterator_fill_error_at_mark (
|
120
|
-
Utf8Iterator* iter,
|
121
|
-
struct GumboInternalError* error
|
122
|
-
);
|
123
|
-
|
124
160
|
#ifdef __cplusplus
|
125
161
|
}
|
126
162
|
#endif
|
data/lib/nokogumbo.rb
CHANGED
@@ -1,11 +1,13 @@
|
|
1
1
|
require 'nokogiri'
|
2
2
|
require 'nokogumbo/version'
|
3
3
|
require 'nokogumbo/html5'
|
4
|
-
require 'nokogumbo/xml/node.rb'
|
5
4
|
|
6
5
|
require 'nokogumbo/nokogumbo'
|
7
6
|
|
8
7
|
module Nokogumbo
|
8
|
+
# The default maximum number of attributes per element.
|
9
|
+
DEFAULT_MAX_ATTRIBUTES = 400
|
10
|
+
|
9
11
|
# The default maximum number of errors for parsing a document or a fragment.
|
10
12
|
DEFAULT_MAX_ERRORS = 0
|
11
13
|
|
data/lib/nokogumbo/html5.rb
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
require 'nokogumbo/html5/document'
|
2
2
|
require 'nokogumbo/html5/document_fragment'
|
3
|
+
require 'nokogumbo/html5/node'
|
3
4
|
|
4
5
|
module Nokogiri
|
5
6
|
# Parse an HTML 5 document. Convenience method for Nokogiri::HTML5::Document.parse
|
@@ -18,7 +19,7 @@ module Nokogiri
|
|
18
19
|
|
19
20
|
# Parse an HTML 5 document. Convenience method for Nokogiri::HTML5::Document.parse
|
20
21
|
def self.parse(string, url = nil, encoding = nil, **options, &block)
|
21
|
-
Document.parse(string, url, encoding, options, &block)
|
22
|
+
Document.parse(string, url, encoding, **options, &block)
|
22
23
|
end
|
23
24
|
|
24
25
|
# Parse a fragment from +string+. Convenience method for
|
@@ -91,19 +92,20 @@ module Nokogiri
|
|
91
92
|
if encoding.nil?
|
92
93
|
string = string.read
|
93
94
|
else
|
94
|
-
|
95
|
+
string = string.read(encoding: encoding)
|
95
96
|
end
|
96
97
|
else
|
97
98
|
# Otherwise the string has the given encoding.
|
98
|
-
|
99
|
+
string = string.to_s
|
100
|
+
if encoding
|
99
101
|
string = string.dup
|
100
102
|
string.force_encoding(encoding)
|
101
103
|
end
|
102
104
|
end
|
103
105
|
|
104
|
-
# convert to UTF-8
|
105
|
-
if string.
|
106
|
-
string = reencode(string
|
106
|
+
# convert to UTF-8
|
107
|
+
if string.encoding != Encoding::UTF_8
|
108
|
+
string = reencode(string)
|
107
109
|
end
|
108
110
|
string
|
109
111
|
end
|
@@ -122,18 +124,17 @@ module Nokogiri
|
|
122
124
|
# http://www.w3.org/TR/html5/syntax.html#determining-the-character-encoding
|
123
125
|
#
|
124
126
|
def self.reencode(body, content_type=nil)
|
125
|
-
return body unless body.respond_to? :encoding
|
126
|
-
|
127
127
|
if body.encoding == Encoding::ASCII_8BIT
|
128
128
|
encoding = nil
|
129
129
|
|
130
130
|
# look for a Byte Order Mark (BOM)
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
131
|
+
initial_bytes = body[0..2].bytes
|
132
|
+
if initial_bytes[0..2] == [0xEF, 0xBB, 0xBF]
|
133
|
+
encoding = Encoding::UTF_8
|
134
|
+
elsif initial_bytes[0..1] == [0xFE, 0xFF]
|
135
|
+
encoding = Encoding::UTF_16BE
|
136
|
+
elsif initial_bytes[0..1] == [0xFF, 0xFE]
|
137
|
+
encoding = Encoding::UTF_16LE
|
137
138
|
end
|
138
139
|
|
139
140
|
# look for a charset in a content-encoding header
|
@@ -153,6 +154,7 @@ module Nokogiri
|
|
153
154
|
encoding ||= Encoding::ISO_8859_1
|
154
155
|
|
155
156
|
# change the encoding to match the detected or inferred encoding
|
157
|
+
body = body.dup
|
156
158
|
begin
|
157
159
|
body.force_encoding(encoding)
|
158
160
|
rescue ArgumentError
|
@@ -167,7 +169,7 @@ module Nokogiri
|
|
167
169
|
case current_node.type
|
168
170
|
when XML::Node::ELEMENT_NODE
|
169
171
|
ns = current_node.namespace
|
170
|
-
ns_uri = ns.nil? ? nil : ns.
|
172
|
+
ns_uri = ns.nil? ? nil : ns.href
|
171
173
|
# XXX(sfc): attach namespaces to all nodes, even html?
|
172
174
|
if ns_uri.nil? || ns_uri == HTML_NAMESPACE || ns_uri == MATHML_NAMESPACE || ns_uri == SVG_NAMESPACE
|
173
175
|
tagname = current_node.name
|
@@ -3,6 +3,7 @@ module Nokogiri
|
|
3
3
|
class Document < Nokogiri::HTML::Document
|
4
4
|
def self.parse(string_or_io, url = nil, encoding = nil, **options, &block)
|
5
5
|
yield options if block_given?
|
6
|
+
string_or_io = '' unless string_or_io
|
6
7
|
|
7
8
|
if string_or_io.respond_to?(:encoding) && string_or_io.encoding.name != 'ASCII-8BIT'
|
8
9
|
encoding ||= string_or_io.encoding.name
|
@@ -11,24 +12,39 @@ module Nokogiri
|
|
11
12
|
if string_or_io.respond_to?(:read) && string_or_io.respond_to?(:path)
|
12
13
|
url ||= string_or_io.path
|
13
14
|
end
|
15
|
+
unless string_or_io.respond_to?(:read) || string_or_io.respond_to?(:to_str)
|
16
|
+
raise ArgumentError.new("not a string or IO object")
|
17
|
+
end
|
14
18
|
do_parse(string_or_io, url, encoding, options)
|
15
19
|
end
|
16
20
|
|
17
21
|
def self.read_io(io, url = nil, encoding = nil, **options)
|
18
|
-
raise ArgumentError.new("io object doesn't respond to :read") unless io.
|
22
|
+
raise ArgumentError.new("io object doesn't respond to :read") unless io.respond_to?(:read)
|
19
23
|
do_parse(io, url, encoding, options)
|
20
24
|
end
|
21
25
|
|
22
26
|
def self.read_memory(string, url = nil, encoding = nil, **options)
|
23
|
-
|
27
|
+
raise ArgumentError.new("string object doesn't respond to :to_str") unless string.respond_to?(:to_str)
|
28
|
+
do_parse(string, url, encoding, options)
|
29
|
+
end
|
30
|
+
|
31
|
+
def fragment(tags = nil)
|
32
|
+
DocumentFragment.new(self, tags, self.root)
|
33
|
+
end
|
34
|
+
|
35
|
+
def to_xml(options = {}, &block)
|
36
|
+
# Bypass XML::Document#to_xml which doesn't add
|
37
|
+
# XML::Node::SaveOptions::AS_XML like XML::Node#to_xml does.
|
38
|
+
XML::Node.instance_method(:to_xml).bind(self).call(options, &block)
|
24
39
|
end
|
25
40
|
|
26
41
|
private
|
27
42
|
def self.do_parse(string_or_io, url, encoding, options)
|
28
43
|
string = HTML5.read_and_encode(string_or_io, encoding)
|
44
|
+
max_attributes = options[:max_attributes] || Nokogumbo::DEFAULT_MAX_ATTRIBUTES
|
29
45
|
max_errors = options[:max_errors] || options[:max_parse_errors] || Nokogumbo::DEFAULT_MAX_ERRORS
|
30
46
|
max_depth = options[:max_tree_depth] || Nokogumbo::DEFAULT_MAX_TREE_DEPTH
|
31
|
-
doc = Nokogumbo.parse(string
|
47
|
+
doc = Nokogumbo.parse(string, url, max_attributes, max_errors, max_depth)
|
32
48
|
doc.encoding = 'UTF-8'
|
33
49
|
doc
|
34
50
|
end
|
@@ -3,29 +3,20 @@ require 'nokogiri'
|
|
3
3
|
module Nokogiri
|
4
4
|
module HTML5
|
5
5
|
class DocumentFragment < Nokogiri::HTML::DocumentFragment
|
6
|
+
attr_accessor :document
|
7
|
+
attr_accessor :errors
|
8
|
+
|
6
9
|
# Create a document fragment.
|
7
10
|
def initialize(doc, tags = nil, ctx = nil, options = {})
|
11
|
+
self.document = doc
|
12
|
+
self.errors = []
|
8
13
|
return self unless tags
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
# hack."
|
16
|
-
if tags.strip =~ /^<body/i
|
17
|
-
path = "/html/body"
|
18
|
-
else
|
19
|
-
path = "/html/body/node()"
|
20
|
-
end
|
21
|
-
# Add 2 for <html> and <body>.
|
22
|
-
max_depth = (options[:max_tree_depth] || Nokogumbo::DEFAULT_MAX_TREE_DEPTH) + 2
|
23
|
-
options = options.dup
|
24
|
-
options[:max_tree_depth] = max_depth
|
25
|
-
temp_doc = HTML5.parse("<!DOCTYPE html><html><body>#{tags}", options)
|
26
|
-
temp_doc.xpath(path).each { |child| child.parent = self }
|
27
|
-
self.errors = temp_doc.errors
|
28
|
-
end
|
14
|
+
|
15
|
+
max_attributes = options[:max_attributes] || Nokogumbo::DEFAULT_MAX_ATTRIBUTES
|
16
|
+
max_errors = options[:max_errors] || Nokogumbo::DEFAULT_MAX_ERRORS
|
17
|
+
max_depth = options[:max_tree_depth] || Nokogumbo::DEFAULT_MAX_TREE_DEPTH
|
18
|
+
tags = Nokogiri::HTML5.read_and_encode(tags, nil)
|
19
|
+
Nokogumbo.fragment(self, tags, ctx, max_attributes, max_errors, max_depth)
|
29
20
|
end
|
30
21
|
|
31
22
|
def serialize(options = {}, &block)
|
@@ -41,6 +32,31 @@ module Nokogiri
|
|
41
32
|
doc.encoding = 'UTF-8'
|
42
33
|
new(doc, tags, nil, options)
|
43
34
|
end
|
35
|
+
|
36
|
+
def extract_params params # :nodoc:
|
37
|
+
handler = params.find do |param|
|
38
|
+
![Hash, String, Symbol].include?(param.class)
|
39
|
+
end
|
40
|
+
params -= [handler] if handler
|
41
|
+
|
42
|
+
hashes = []
|
43
|
+
while Hash === params.last || params.last.nil?
|
44
|
+
hashes << params.pop
|
45
|
+
break if params.empty?
|
46
|
+
end
|
47
|
+
ns, binds = hashes.reverse
|
48
|
+
|
49
|
+
ns ||=
|
50
|
+
begin
|
51
|
+
ns = Hash.new
|
52
|
+
children.each { |child| ns.merge!(child.namespaces) }
|
53
|
+
ns
|
54
|
+
end
|
55
|
+
|
56
|
+
[params, handler, ns, binds]
|
57
|
+
end
|
58
|
+
|
44
59
|
end
|
45
60
|
end
|
46
61
|
end
|
62
|
+
# vim: set shiftwidth=2 softtabstop=2 tabstop=8 expandtab:
|
@@ -1,57 +1,72 @@
|
|
1
1
|
require 'nokogiri'
|
2
2
|
|
3
3
|
module Nokogiri
|
4
|
-
|
5
|
-
|
6
|
-
class Node
|
4
|
+
module HTML5
|
5
|
+
module Node
|
7
6
|
# HTML elements can have attributes that contain colons.
|
8
7
|
# Nokogiri::XML::Node#[]= treats names with colons as a prefixed QName
|
9
8
|
# and tries to create an attribute in a namespace. This is especially
|
10
9
|
# annoying with attribute names like xml:lang since libxml2 will
|
11
10
|
# actually create the xml namespace if it doesn't exist already.
|
12
|
-
|
11
|
+
def add_child_node_and_reparent_attrs(node)
|
12
|
+
return super(node) unless document.is_a?(HTML5::Document)
|
13
|
+
# I'm not sure what this method is supposed to do. Reparenting
|
14
|
+
# namespaces is handled by libxml2, including child namespaces which
|
15
|
+
# this method wouldn't handle.
|
16
|
+
# https://github.com/sparklemotion/nokogiri/issues/1790
|
13
17
|
add_child_node(node)
|
14
|
-
node.attribute_nodes.find_all { |a| a.namespace }.each do |attr|
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
+
#node.attribute_nodes.find_all { |a| a.namespace }.each do |attr|
|
19
|
+
# attr.remove
|
20
|
+
# ns = attr.namespace
|
21
|
+
# a["#{ns.prefix}:#{attr.name}"] = attr.value
|
22
|
+
#end
|
18
23
|
end
|
19
24
|
|
20
25
|
def inner_html(options = {})
|
26
|
+
return super(options) unless document.is_a?(HTML5::Document)
|
21
27
|
result = options[:preserve_newline] && HTML5.prepend_newline?(self) ? "\n" : ""
|
22
28
|
result << children.map { |child| child.to_html(options) }.join
|
23
29
|
result
|
24
30
|
end
|
25
31
|
|
26
32
|
def write_to(io, *options)
|
33
|
+
return super(io, *options) unless document.is_a?(HTML5::Document)
|
27
34
|
options = options.first.is_a?(Hash) ? options.shift : {}
|
28
35
|
encoding = options[:encoding] || options[0]
|
29
36
|
if Nokogiri.jruby?
|
30
37
|
save_options = options[:save_with] || options[1]
|
31
38
|
indent_times = options[:indent] || 0
|
32
39
|
else
|
33
|
-
save_options = options[:save_with] || options[1] || SaveOptions::FORMAT
|
40
|
+
save_options = options[:save_with] || options[1] || XML::Node::SaveOptions::FORMAT
|
34
41
|
indent_times = options[:indent] || 2
|
35
42
|
end
|
36
43
|
indent_string = (options[:indent_text] || ' ') * indent_times
|
37
44
|
|
38
|
-
config = SaveOptions.new(save_options.to_i)
|
45
|
+
config = XML::Node::SaveOptions.new(save_options.to_i)
|
39
46
|
yield config if block_given?
|
40
47
|
|
41
48
|
config_options = config.options
|
42
|
-
if (config_options & (SaveOptions::AS_XML | SaveOptions::AS_XHTML) != 0)
|
49
|
+
if (config_options & (XML::Node::SaveOptions::AS_XML | XML::Node::SaveOptions::AS_XHTML) != 0)
|
43
50
|
# Use Nokogiri's serializing code.
|
44
51
|
native_write_to(io, encoding, indent_string, config_options)
|
45
52
|
else
|
46
53
|
# Serialize including the current node.
|
47
54
|
encoding ||= document.encoding || Encoding::UTF_8
|
48
55
|
internal_ops = {
|
49
|
-
trailing_nl: config_options & SaveOptions::FORMAT != 0,
|
50
56
|
preserve_newline: options[:preserve_newline] || false
|
51
57
|
}
|
52
|
-
HTML5.serialize_node_internal(self, io, encoding,
|
58
|
+
HTML5.serialize_node_internal(self, io, encoding, internal_ops)
|
53
59
|
end
|
54
60
|
end
|
61
|
+
|
62
|
+
def fragment(tags)
|
63
|
+
return super(tags) unless document.is_a?(HTML5::Document)
|
64
|
+
DocumentFragment.new(document, tags, self)
|
65
|
+
end
|
55
66
|
end
|
67
|
+
# Monkey patch
|
68
|
+
XML::Node.prepend(HTML5::Node)
|
56
69
|
end
|
57
70
|
end
|
71
|
+
|
72
|
+
# vim: set shiftwidth=2 softtabstop=2 tabstop=8 expandtab:
|
data/lib/nokogumbo/version.rb
CHANGED
metadata
CHANGED
@@ -1,30 +1,36 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: nokogumbo
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.0.
|
4
|
+
version: 2.0.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sam Ruby
|
8
8
|
- Stephen Checkoway
|
9
|
-
autorequire:
|
9
|
+
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2020-11-27 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: nokogiri
|
16
16
|
requirement: !ruby/object:Gem::Requirement
|
17
17
|
requirements:
|
18
|
+
- - "~>"
|
19
|
+
- !ruby/object:Gem::Version
|
20
|
+
version: '1.8'
|
18
21
|
- - ">="
|
19
22
|
- !ruby/object:Gem::Version
|
20
|
-
version:
|
23
|
+
version: 1.8.4
|
21
24
|
type: :runtime
|
22
25
|
prerelease: false
|
23
26
|
version_requirements: !ruby/object:Gem::Requirement
|
24
27
|
requirements:
|
28
|
+
- - "~>"
|
29
|
+
- !ruby/object:Gem::Version
|
30
|
+
version: '1.8'
|
25
31
|
- - ">="
|
26
32
|
- !ruby/object:Gem::Version
|
27
|
-
version:
|
33
|
+
version: 1.8.4
|
28
34
|
description: Nokogumbo allows a Ruby program to invoke the Gumbo HTML5 parser and
|
29
35
|
access the result as a Nokogiri parsed document.
|
30
36
|
email:
|
@@ -35,7 +41,6 @@ extensions:
|
|
35
41
|
- ext/nokogumbo/extconf.rb
|
36
42
|
extra_rdoc_files: []
|
37
43
|
files:
|
38
|
-
- CHANGELOG.md
|
39
44
|
- LICENSE.txt
|
40
45
|
- README.md
|
41
46
|
- ext/nokogumbo/extconf.rb
|
@@ -63,6 +68,8 @@ files:
|
|
63
68
|
- gumbo-parser/src/tag.c
|
64
69
|
- gumbo-parser/src/tag_lookup.c
|
65
70
|
- gumbo-parser/src/tag_lookup.h
|
71
|
+
- gumbo-parser/src/token_buffer.c
|
72
|
+
- gumbo-parser/src/token_buffer.h
|
66
73
|
- gumbo-parser/src/token_type.h
|
67
74
|
- gumbo-parser/src/tokenizer.c
|
68
75
|
- gumbo-parser/src/tokenizer.h
|
@@ -77,8 +84,8 @@ files:
|
|
77
84
|
- lib/nokogumbo/html5.rb
|
78
85
|
- lib/nokogumbo/html5/document.rb
|
79
86
|
- lib/nokogumbo/html5/document_fragment.rb
|
87
|
+
- lib/nokogumbo/html5/node.rb
|
80
88
|
- lib/nokogumbo/version.rb
|
81
|
-
- lib/nokogumbo/xml/node.rb
|
82
89
|
homepage: https://github.com/rubys/nokogumbo/#readme
|
83
90
|
licenses:
|
84
91
|
- Apache-2.0
|
@@ -87,7 +94,7 @@ metadata:
|
|
87
94
|
changelog_uri: https://github.com/rubys/nokogumbo/blob/master/CHANGELOG.md
|
88
95
|
homepage_uri: https://github.com/rubys/nokogumbo/#readme
|
89
96
|
source_code_uri: https://github.com/rubys/nokogumbo
|
90
|
-
post_install_message:
|
97
|
+
post_install_message:
|
91
98
|
rdoc_options: []
|
92
99
|
require_paths:
|
93
100
|
- lib
|
@@ -95,16 +102,15 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
95
102
|
requirements:
|
96
103
|
- - ">="
|
97
104
|
- !ruby/object:Gem::Version
|
98
|
-
version: '
|
105
|
+
version: '2.1'
|
99
106
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
100
107
|
requirements:
|
101
|
-
- - "
|
108
|
+
- - ">="
|
102
109
|
- !ruby/object:Gem::Version
|
103
|
-
version:
|
110
|
+
version: '0'
|
104
111
|
requirements: []
|
105
|
-
|
106
|
-
|
107
|
-
signing_key:
|
112
|
+
rubygems_version: 3.1.4
|
113
|
+
signing_key:
|
108
114
|
specification_version: 4
|
109
115
|
summary: Nokogiri interface to the Gumbo HTML5 parser
|
110
116
|
test_files: []
|