nokogumbo 2.0.0.pre.alpha → 2.0.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +101 -14
- data/ext/nokogumbo/extconf.rb +7 -2
- data/ext/nokogumbo/nokogumbo.c +630 -235
- data/gumbo-parser/src/ascii.c +42 -0
- data/gumbo-parser/src/ascii.h +91 -7
- data/gumbo-parser/src/char_ref.c +5973 -4601
- data/gumbo-parser/src/char_ref.h +13 -28
- data/gumbo-parser/src/error.c +391 -126
- data/gumbo-parser/src/error.h +63 -125
- data/gumbo-parser/src/gumbo.h +74 -4
- data/gumbo-parser/src/parser.c +1161 -1025
- data/gumbo-parser/src/string_buffer.c +1 -1
- data/gumbo-parser/src/string_buffer.h +1 -1
- data/gumbo-parser/src/token_buffer.c +79 -0
- data/gumbo-parser/src/token_buffer.h +71 -0
- data/gumbo-parser/src/tokenizer.c +1440 -1278
- data/gumbo-parser/src/tokenizer.h +7 -18
- data/gumbo-parser/src/tokenizer_states.h +275 -23
- data/gumbo-parser/src/utf8.c +17 -59
- data/gumbo-parser/src/utf8.h +52 -16
- data/lib/nokogumbo.rb +3 -1
- data/lib/nokogumbo/html5.rb +17 -15
- data/lib/nokogumbo/html5/document.rb +19 -3
- data/lib/nokogumbo/html5/document_fragment.rb +36 -20
- data/lib/nokogumbo/{xml → html5}/node.rb +28 -13
- data/lib/nokogumbo/version.rb +1 -1
- metadata +20 -14
- data/CHANGELOG.md +0 -56
data/gumbo-parser/src/utf8.h
CHANGED
@@ -30,7 +30,9 @@ struct GumboInternalError;
|
|
30
30
|
struct GumboInternalParser;
|
31
31
|
|
32
32
|
// Unicode replacement char.
|
33
|
-
|
33
|
+
#define kUtf8ReplacementChar 0xFFFD
|
34
|
+
#define kUtf8BomChar 0xFEFF
|
35
|
+
#define kUtf8MaxChar 0x10FFFF
|
34
36
|
|
35
37
|
typedef struct GumboInternalUtf8Iterator {
|
36
38
|
// Points at the start of the code point most recently read into 'current'.
|
@@ -60,9 +62,23 @@ typedef struct GumboInternalUtf8Iterator {
|
|
60
62
|
struct GumboInternalParser* _parser;
|
61
63
|
} Utf8Iterator;
|
62
64
|
|
63
|
-
// Returns true if this Unicode code point is
|
64
|
-
|
65
|
-
|
65
|
+
// Returns true if this Unicode code point is a surrogate.
|
66
|
+
CONST_FN static inline bool utf8_is_surrogate(int c) {
|
67
|
+
return c >= 0xD800 && c <= 0xDFFF;
|
68
|
+
}
|
69
|
+
|
70
|
+
// Returns true if this Unicode code point is a noncharacter.
|
71
|
+
CONST_FN static inline bool utf8_is_noncharacter(int c) {
|
72
|
+
return
|
73
|
+
(c >= 0xFDD0 && c <= 0xFDEF)
|
74
|
+
|| ((c & 0xFFFF) == 0xFFFE)
|
75
|
+
|| ((c & 0xFFFF) == 0xFFFF);
|
76
|
+
}
|
77
|
+
|
78
|
+
// Returns true if this Unicode code point is a control.
|
79
|
+
CONST_FN static inline bool utf8_is_control(int c) {
|
80
|
+
return ((unsigned int)c < 0x1Fu) || (c >= 0x7F && c <= 0x9F);
|
81
|
+
}
|
66
82
|
|
67
83
|
// Initializes a new Utf8Iterator from the given byte buffer. The source does
|
68
84
|
// not have to be NUL-terminated, but the length must be passed in explicitly.
|
@@ -77,20 +93,47 @@ void utf8iterator_init (
|
|
77
93
|
void utf8iterator_next(Utf8Iterator* iter);
|
78
94
|
|
79
95
|
// Returns the current code point as an integer.
|
80
|
-
int utf8iterator_current(const Utf8Iterator* iter)
|
96
|
+
static inline int utf8iterator_current(const Utf8Iterator* iter) {
|
97
|
+
return iter->_current;
|
98
|
+
}
|
81
99
|
|
82
100
|
// Retrieves and fills the output parameter with the current source position.
|
83
|
-
void utf8iterator_get_position(
|
84
|
-
|
101
|
+
static inline void utf8iterator_get_position (
|
102
|
+
const Utf8Iterator* iter,
|
103
|
+
GumboSourcePosition* output
|
104
|
+
) {
|
105
|
+
*output = iter->_pos;
|
106
|
+
}
|
107
|
+
|
108
|
+
// Retrieves the marked position.
|
109
|
+
static inline GumboSourcePosition utf8iterator_get_mark_position (
|
110
|
+
const Utf8Iterator* iter
|
111
|
+
) {
|
112
|
+
return iter->_mark_pos;
|
113
|
+
}
|
85
114
|
|
86
115
|
// Retrieves a character pointer to the start of the current character.
|
87
|
-
const char* utf8iterator_get_char_pointer(const Utf8Iterator* iter)
|
116
|
+
static inline const char* utf8iterator_get_char_pointer(const Utf8Iterator* iter) {
|
117
|
+
return iter->_start;
|
118
|
+
}
|
119
|
+
|
120
|
+
// Retrieves the width of the current character.
|
121
|
+
static inline size_t utf8iterator_get_width(const Utf8Iterator* iter) {
|
122
|
+
return iter->_width;
|
123
|
+
}
|
88
124
|
|
89
125
|
// Retrieves a character pointer to 1 past the end of the buffer. This is
|
90
126
|
// necessary for certain state machines and string comparisons that would like
|
91
127
|
// to look directly for ASCII text in the buffer without going through the
|
92
128
|
// decoder.
|
93
|
-
const char* utf8iterator_get_end_pointer(const Utf8Iterator* iter)
|
129
|
+
static inline const char* utf8iterator_get_end_pointer(const Utf8Iterator* iter) {
|
130
|
+
return iter->_end;
|
131
|
+
}
|
132
|
+
|
133
|
+
// Retrieves a character pointer to the marked position.
|
134
|
+
static inline const char* utf8iterator_get_mark_pointer(const Utf8Iterator* iter) {
|
135
|
+
return iter->_mark;
|
136
|
+
}
|
94
137
|
|
95
138
|
// If the upcoming text in the buffer matches the specified prefix (which has
|
96
139
|
// length 'length'), consume it and return true. Otherwise, return false with
|
@@ -114,13 +157,6 @@ void utf8iterator_mark(Utf8Iterator* iter);
|
|
114
157
|
// Returns the current input stream position to the mark.
|
115
158
|
void utf8iterator_reset(Utf8Iterator* iter);
|
116
159
|
|
117
|
-
// Sets the position and original text fields of an error to the value at the
|
118
|
-
// mark.
|
119
|
-
void utf8iterator_fill_error_at_mark (
|
120
|
-
Utf8Iterator* iter,
|
121
|
-
struct GumboInternalError* error
|
122
|
-
);
|
123
|
-
|
124
160
|
#ifdef __cplusplus
|
125
161
|
}
|
126
162
|
#endif
|
data/lib/nokogumbo.rb
CHANGED
@@ -1,11 +1,13 @@
|
|
1
1
|
require 'nokogiri'
|
2
2
|
require 'nokogumbo/version'
|
3
3
|
require 'nokogumbo/html5'
|
4
|
-
require 'nokogumbo/xml/node.rb'
|
5
4
|
|
6
5
|
require 'nokogumbo/nokogumbo'
|
7
6
|
|
8
7
|
module Nokogumbo
|
8
|
+
# The default maximum number of attributes per element.
|
9
|
+
DEFAULT_MAX_ATTRIBUTES = 400
|
10
|
+
|
9
11
|
# The default maximum number of errors for parsing a document or a fragment.
|
10
12
|
DEFAULT_MAX_ERRORS = 0
|
11
13
|
|
data/lib/nokogumbo/html5.rb
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
require 'nokogumbo/html5/document'
|
2
2
|
require 'nokogumbo/html5/document_fragment'
|
3
|
+
require 'nokogumbo/html5/node'
|
3
4
|
|
4
5
|
module Nokogiri
|
5
6
|
# Parse an HTML 5 document. Convenience method for Nokogiri::HTML5::Document.parse
|
@@ -18,7 +19,7 @@ module Nokogiri
|
|
18
19
|
|
19
20
|
# Parse an HTML 5 document. Convenience method for Nokogiri::HTML5::Document.parse
|
20
21
|
def self.parse(string, url = nil, encoding = nil, **options, &block)
|
21
|
-
Document.parse(string, url, encoding, options, &block)
|
22
|
+
Document.parse(string, url, encoding, **options, &block)
|
22
23
|
end
|
23
24
|
|
24
25
|
# Parse a fragment from +string+. Convenience method for
|
@@ -91,19 +92,20 @@ module Nokogiri
|
|
91
92
|
if encoding.nil?
|
92
93
|
string = string.read
|
93
94
|
else
|
94
|
-
|
95
|
+
string = string.read(encoding: encoding)
|
95
96
|
end
|
96
97
|
else
|
97
98
|
# Otherwise the string has the given encoding.
|
98
|
-
|
99
|
+
string = string.to_s
|
100
|
+
if encoding
|
99
101
|
string = string.dup
|
100
102
|
string.force_encoding(encoding)
|
101
103
|
end
|
102
104
|
end
|
103
105
|
|
104
|
-
# convert to UTF-8
|
105
|
-
if string.
|
106
|
-
string = reencode(string
|
106
|
+
# convert to UTF-8
|
107
|
+
if string.encoding != Encoding::UTF_8
|
108
|
+
string = reencode(string)
|
107
109
|
end
|
108
110
|
string
|
109
111
|
end
|
@@ -122,18 +124,17 @@ module Nokogiri
|
|
122
124
|
# http://www.w3.org/TR/html5/syntax.html#determining-the-character-encoding
|
123
125
|
#
|
124
126
|
def self.reencode(body, content_type=nil)
|
125
|
-
return body unless body.respond_to? :encoding
|
126
|
-
|
127
127
|
if body.encoding == Encoding::ASCII_8BIT
|
128
128
|
encoding = nil
|
129
129
|
|
130
130
|
# look for a Byte Order Mark (BOM)
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
131
|
+
initial_bytes = body[0..2].bytes
|
132
|
+
if initial_bytes[0..2] == [0xEF, 0xBB, 0xBF]
|
133
|
+
encoding = Encoding::UTF_8
|
134
|
+
elsif initial_bytes[0..1] == [0xFE, 0xFF]
|
135
|
+
encoding = Encoding::UTF_16BE
|
136
|
+
elsif initial_bytes[0..1] == [0xFF, 0xFE]
|
137
|
+
encoding = Encoding::UTF_16LE
|
137
138
|
end
|
138
139
|
|
139
140
|
# look for a charset in a content-encoding header
|
@@ -153,6 +154,7 @@ module Nokogiri
|
|
153
154
|
encoding ||= Encoding::ISO_8859_1
|
154
155
|
|
155
156
|
# change the encoding to match the detected or inferred encoding
|
157
|
+
body = body.dup
|
156
158
|
begin
|
157
159
|
body.force_encoding(encoding)
|
158
160
|
rescue ArgumentError
|
@@ -167,7 +169,7 @@ module Nokogiri
|
|
167
169
|
case current_node.type
|
168
170
|
when XML::Node::ELEMENT_NODE
|
169
171
|
ns = current_node.namespace
|
170
|
-
ns_uri = ns.nil? ? nil : ns.
|
172
|
+
ns_uri = ns.nil? ? nil : ns.href
|
171
173
|
# XXX(sfc): attach namespaces to all nodes, even html?
|
172
174
|
if ns_uri.nil? || ns_uri == HTML_NAMESPACE || ns_uri == MATHML_NAMESPACE || ns_uri == SVG_NAMESPACE
|
173
175
|
tagname = current_node.name
|
@@ -3,6 +3,7 @@ module Nokogiri
|
|
3
3
|
class Document < Nokogiri::HTML::Document
|
4
4
|
def self.parse(string_or_io, url = nil, encoding = nil, **options, &block)
|
5
5
|
yield options if block_given?
|
6
|
+
string_or_io = '' unless string_or_io
|
6
7
|
|
7
8
|
if string_or_io.respond_to?(:encoding) && string_or_io.encoding.name != 'ASCII-8BIT'
|
8
9
|
encoding ||= string_or_io.encoding.name
|
@@ -11,24 +12,39 @@ module Nokogiri
|
|
11
12
|
if string_or_io.respond_to?(:read) && string_or_io.respond_to?(:path)
|
12
13
|
url ||= string_or_io.path
|
13
14
|
end
|
15
|
+
unless string_or_io.respond_to?(:read) || string_or_io.respond_to?(:to_str)
|
16
|
+
raise ArgumentError.new("not a string or IO object")
|
17
|
+
end
|
14
18
|
do_parse(string_or_io, url, encoding, options)
|
15
19
|
end
|
16
20
|
|
17
21
|
def self.read_io(io, url = nil, encoding = nil, **options)
|
18
|
-
raise ArgumentError.new("io object doesn't respond to :read") unless io.
|
22
|
+
raise ArgumentError.new("io object doesn't respond to :read") unless io.respond_to?(:read)
|
19
23
|
do_parse(io, url, encoding, options)
|
20
24
|
end
|
21
25
|
|
22
26
|
def self.read_memory(string, url = nil, encoding = nil, **options)
|
23
|
-
|
27
|
+
raise ArgumentError.new("string object doesn't respond to :to_str") unless string.respond_to?(:to_str)
|
28
|
+
do_parse(string, url, encoding, options)
|
29
|
+
end
|
30
|
+
|
31
|
+
def fragment(tags = nil)
|
32
|
+
DocumentFragment.new(self, tags, self.root)
|
33
|
+
end
|
34
|
+
|
35
|
+
def to_xml(options = {}, &block)
|
36
|
+
# Bypass XML::Document#to_xml which doesn't add
|
37
|
+
# XML::Node::SaveOptions::AS_XML like XML::Node#to_xml does.
|
38
|
+
XML::Node.instance_method(:to_xml).bind(self).call(options, &block)
|
24
39
|
end
|
25
40
|
|
26
41
|
private
|
27
42
|
def self.do_parse(string_or_io, url, encoding, options)
|
28
43
|
string = HTML5.read_and_encode(string_or_io, encoding)
|
44
|
+
max_attributes = options[:max_attributes] || Nokogumbo::DEFAULT_MAX_ATTRIBUTES
|
29
45
|
max_errors = options[:max_errors] || options[:max_parse_errors] || Nokogumbo::DEFAULT_MAX_ERRORS
|
30
46
|
max_depth = options[:max_tree_depth] || Nokogumbo::DEFAULT_MAX_TREE_DEPTH
|
31
|
-
doc = Nokogumbo.parse(string
|
47
|
+
doc = Nokogumbo.parse(string, url, max_attributes, max_errors, max_depth)
|
32
48
|
doc.encoding = 'UTF-8'
|
33
49
|
doc
|
34
50
|
end
|
@@ -3,29 +3,20 @@ require 'nokogiri'
|
|
3
3
|
module Nokogiri
|
4
4
|
module HTML5
|
5
5
|
class DocumentFragment < Nokogiri::HTML::DocumentFragment
|
6
|
+
attr_accessor :document
|
7
|
+
attr_accessor :errors
|
8
|
+
|
6
9
|
# Create a document fragment.
|
7
10
|
def initialize(doc, tags = nil, ctx = nil, options = {})
|
11
|
+
self.document = doc
|
12
|
+
self.errors = []
|
8
13
|
return self unless tags
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
# hack."
|
16
|
-
if tags.strip =~ /^<body/i
|
17
|
-
path = "/html/body"
|
18
|
-
else
|
19
|
-
path = "/html/body/node()"
|
20
|
-
end
|
21
|
-
# Add 2 for <html> and <body>.
|
22
|
-
max_depth = (options[:max_tree_depth] || Nokogumbo::DEFAULT_MAX_TREE_DEPTH) + 2
|
23
|
-
options = options.dup
|
24
|
-
options[:max_tree_depth] = max_depth
|
25
|
-
temp_doc = HTML5.parse("<!DOCTYPE html><html><body>#{tags}", options)
|
26
|
-
temp_doc.xpath(path).each { |child| child.parent = self }
|
27
|
-
self.errors = temp_doc.errors
|
28
|
-
end
|
14
|
+
|
15
|
+
max_attributes = options[:max_attributes] || Nokogumbo::DEFAULT_MAX_ATTRIBUTES
|
16
|
+
max_errors = options[:max_errors] || Nokogumbo::DEFAULT_MAX_ERRORS
|
17
|
+
max_depth = options[:max_tree_depth] || Nokogumbo::DEFAULT_MAX_TREE_DEPTH
|
18
|
+
tags = Nokogiri::HTML5.read_and_encode(tags, nil)
|
19
|
+
Nokogumbo.fragment(self, tags, ctx, max_attributes, max_errors, max_depth)
|
29
20
|
end
|
30
21
|
|
31
22
|
def serialize(options = {}, &block)
|
@@ -41,6 +32,31 @@ module Nokogiri
|
|
41
32
|
doc.encoding = 'UTF-8'
|
42
33
|
new(doc, tags, nil, options)
|
43
34
|
end
|
35
|
+
|
36
|
+
def extract_params params # :nodoc:
|
37
|
+
handler = params.find do |param|
|
38
|
+
![Hash, String, Symbol].include?(param.class)
|
39
|
+
end
|
40
|
+
params -= [handler] if handler
|
41
|
+
|
42
|
+
hashes = []
|
43
|
+
while Hash === params.last || params.last.nil?
|
44
|
+
hashes << params.pop
|
45
|
+
break if params.empty?
|
46
|
+
end
|
47
|
+
ns, binds = hashes.reverse
|
48
|
+
|
49
|
+
ns ||=
|
50
|
+
begin
|
51
|
+
ns = Hash.new
|
52
|
+
children.each { |child| ns.merge!(child.namespaces) }
|
53
|
+
ns
|
54
|
+
end
|
55
|
+
|
56
|
+
[params, handler, ns, binds]
|
57
|
+
end
|
58
|
+
|
44
59
|
end
|
45
60
|
end
|
46
61
|
end
|
62
|
+
# vim: set shiftwidth=2 softtabstop=2 tabstop=8 expandtab:
|
@@ -1,57 +1,72 @@
|
|
1
1
|
require 'nokogiri'
|
2
2
|
|
3
3
|
module Nokogiri
|
4
|
-
|
5
|
-
|
6
|
-
class Node
|
4
|
+
module HTML5
|
5
|
+
module Node
|
7
6
|
# HTML elements can have attributes that contain colons.
|
8
7
|
# Nokogiri::XML::Node#[]= treats names with colons as a prefixed QName
|
9
8
|
# and tries to create an attribute in a namespace. This is especially
|
10
9
|
# annoying with attribute names like xml:lang since libxml2 will
|
11
10
|
# actually create the xml namespace if it doesn't exist already.
|
12
|
-
|
11
|
+
def add_child_node_and_reparent_attrs(node)
|
12
|
+
return super(node) unless document.is_a?(HTML5::Document)
|
13
|
+
# I'm not sure what this method is supposed to do. Reparenting
|
14
|
+
# namespaces is handled by libxml2, including child namespaces which
|
15
|
+
# this method wouldn't handle.
|
16
|
+
# https://github.com/sparklemotion/nokogiri/issues/1790
|
13
17
|
add_child_node(node)
|
14
|
-
node.attribute_nodes.find_all { |a| a.namespace }.each do |attr|
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
+
#node.attribute_nodes.find_all { |a| a.namespace }.each do |attr|
|
19
|
+
# attr.remove
|
20
|
+
# ns = attr.namespace
|
21
|
+
# a["#{ns.prefix}:#{attr.name}"] = attr.value
|
22
|
+
#end
|
18
23
|
end
|
19
24
|
|
20
25
|
def inner_html(options = {})
|
26
|
+
return super(options) unless document.is_a?(HTML5::Document)
|
21
27
|
result = options[:preserve_newline] && HTML5.prepend_newline?(self) ? "\n" : ""
|
22
28
|
result << children.map { |child| child.to_html(options) }.join
|
23
29
|
result
|
24
30
|
end
|
25
31
|
|
26
32
|
def write_to(io, *options)
|
33
|
+
return super(io, *options) unless document.is_a?(HTML5::Document)
|
27
34
|
options = options.first.is_a?(Hash) ? options.shift : {}
|
28
35
|
encoding = options[:encoding] || options[0]
|
29
36
|
if Nokogiri.jruby?
|
30
37
|
save_options = options[:save_with] || options[1]
|
31
38
|
indent_times = options[:indent] || 0
|
32
39
|
else
|
33
|
-
save_options = options[:save_with] || options[1] || SaveOptions::FORMAT
|
40
|
+
save_options = options[:save_with] || options[1] || XML::Node::SaveOptions::FORMAT
|
34
41
|
indent_times = options[:indent] || 2
|
35
42
|
end
|
36
43
|
indent_string = (options[:indent_text] || ' ') * indent_times
|
37
44
|
|
38
|
-
config = SaveOptions.new(save_options.to_i)
|
45
|
+
config = XML::Node::SaveOptions.new(save_options.to_i)
|
39
46
|
yield config if block_given?
|
40
47
|
|
41
48
|
config_options = config.options
|
42
|
-
if (config_options & (SaveOptions::AS_XML | SaveOptions::AS_XHTML) != 0)
|
49
|
+
if (config_options & (XML::Node::SaveOptions::AS_XML | XML::Node::SaveOptions::AS_XHTML) != 0)
|
43
50
|
# Use Nokogiri's serializing code.
|
44
51
|
native_write_to(io, encoding, indent_string, config_options)
|
45
52
|
else
|
46
53
|
# Serialize including the current node.
|
47
54
|
encoding ||= document.encoding || Encoding::UTF_8
|
48
55
|
internal_ops = {
|
49
|
-
trailing_nl: config_options & SaveOptions::FORMAT != 0,
|
50
56
|
preserve_newline: options[:preserve_newline] || false
|
51
57
|
}
|
52
|
-
HTML5.serialize_node_internal(self, io, encoding,
|
58
|
+
HTML5.serialize_node_internal(self, io, encoding, internal_ops)
|
53
59
|
end
|
54
60
|
end
|
61
|
+
|
62
|
+
def fragment(tags)
|
63
|
+
return super(tags) unless document.is_a?(HTML5::Document)
|
64
|
+
DocumentFragment.new(document, tags, self)
|
65
|
+
end
|
55
66
|
end
|
67
|
+
# Monkey patch
|
68
|
+
XML::Node.prepend(HTML5::Node)
|
56
69
|
end
|
57
70
|
end
|
71
|
+
|
72
|
+
# vim: set shiftwidth=2 softtabstop=2 tabstop=8 expandtab:
|
data/lib/nokogumbo/version.rb
CHANGED
metadata
CHANGED
@@ -1,30 +1,36 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: nokogumbo
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.0.
|
4
|
+
version: 2.0.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sam Ruby
|
8
8
|
- Stephen Checkoway
|
9
|
-
autorequire:
|
9
|
+
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2020-11-27 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: nokogiri
|
16
16
|
requirement: !ruby/object:Gem::Requirement
|
17
17
|
requirements:
|
18
|
+
- - "~>"
|
19
|
+
- !ruby/object:Gem::Version
|
20
|
+
version: '1.8'
|
18
21
|
- - ">="
|
19
22
|
- !ruby/object:Gem::Version
|
20
|
-
version:
|
23
|
+
version: 1.8.4
|
21
24
|
type: :runtime
|
22
25
|
prerelease: false
|
23
26
|
version_requirements: !ruby/object:Gem::Requirement
|
24
27
|
requirements:
|
28
|
+
- - "~>"
|
29
|
+
- !ruby/object:Gem::Version
|
30
|
+
version: '1.8'
|
25
31
|
- - ">="
|
26
32
|
- !ruby/object:Gem::Version
|
27
|
-
version:
|
33
|
+
version: 1.8.4
|
28
34
|
description: Nokogumbo allows a Ruby program to invoke the Gumbo HTML5 parser and
|
29
35
|
access the result as a Nokogiri parsed document.
|
30
36
|
email:
|
@@ -35,7 +41,6 @@ extensions:
|
|
35
41
|
- ext/nokogumbo/extconf.rb
|
36
42
|
extra_rdoc_files: []
|
37
43
|
files:
|
38
|
-
- CHANGELOG.md
|
39
44
|
- LICENSE.txt
|
40
45
|
- README.md
|
41
46
|
- ext/nokogumbo/extconf.rb
|
@@ -63,6 +68,8 @@ files:
|
|
63
68
|
- gumbo-parser/src/tag.c
|
64
69
|
- gumbo-parser/src/tag_lookup.c
|
65
70
|
- gumbo-parser/src/tag_lookup.h
|
71
|
+
- gumbo-parser/src/token_buffer.c
|
72
|
+
- gumbo-parser/src/token_buffer.h
|
66
73
|
- gumbo-parser/src/token_type.h
|
67
74
|
- gumbo-parser/src/tokenizer.c
|
68
75
|
- gumbo-parser/src/tokenizer.h
|
@@ -77,8 +84,8 @@ files:
|
|
77
84
|
- lib/nokogumbo/html5.rb
|
78
85
|
- lib/nokogumbo/html5/document.rb
|
79
86
|
- lib/nokogumbo/html5/document_fragment.rb
|
87
|
+
- lib/nokogumbo/html5/node.rb
|
80
88
|
- lib/nokogumbo/version.rb
|
81
|
-
- lib/nokogumbo/xml/node.rb
|
82
89
|
homepage: https://github.com/rubys/nokogumbo/#readme
|
83
90
|
licenses:
|
84
91
|
- Apache-2.0
|
@@ -87,7 +94,7 @@ metadata:
|
|
87
94
|
changelog_uri: https://github.com/rubys/nokogumbo/blob/master/CHANGELOG.md
|
88
95
|
homepage_uri: https://github.com/rubys/nokogumbo/#readme
|
89
96
|
source_code_uri: https://github.com/rubys/nokogumbo
|
90
|
-
post_install_message:
|
97
|
+
post_install_message:
|
91
98
|
rdoc_options: []
|
92
99
|
require_paths:
|
93
100
|
- lib
|
@@ -95,16 +102,15 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
95
102
|
requirements:
|
96
103
|
- - ">="
|
97
104
|
- !ruby/object:Gem::Version
|
98
|
-
version: '
|
105
|
+
version: '2.1'
|
99
106
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
100
107
|
requirements:
|
101
|
-
- - "
|
108
|
+
- - ">="
|
102
109
|
- !ruby/object:Gem::Version
|
103
|
-
version:
|
110
|
+
version: '0'
|
104
111
|
requirements: []
|
105
|
-
|
106
|
-
|
107
|
-
signing_key:
|
112
|
+
rubygems_version: 3.1.4
|
113
|
+
signing_key:
|
108
114
|
specification_version: 4
|
109
115
|
summary: Nokogiri interface to the Gumbo HTML5 parser
|
110
116
|
test_files: []
|