nokogumbo 1.5.0 → 2.0.0.pre.alpha
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +56 -0
- data/README.md +146 -22
- data/ext/nokogumbo/extconf.rb +116 -0
- data/ext/{nokogumboc → nokogumbo}/nokogumbo.c +174 -71
- data/gumbo-parser/src/ascii.c +33 -0
- data/gumbo-parser/src/ascii.h +31 -0
- data/gumbo-parser/src/attribute.c +26 -28
- data/gumbo-parser/src/attribute.h +3 -23
- data/gumbo-parser/src/char_ref.c +135 -2351
- data/gumbo-parser/src/char_ref.h +13 -29
- data/gumbo-parser/src/error.c +215 -133
- data/gumbo-parser/src/error.h +34 -49
- data/gumbo-parser/src/foreign_attrs.c +104 -0
- data/gumbo-parser/src/gumbo.h +506 -304
- data/gumbo-parser/src/insertion_mode.h +4 -28
- data/gumbo-parser/src/macros.h +91 -0
- data/gumbo-parser/src/parser.c +1989 -1431
- data/gumbo-parser/src/parser.h +6 -22
- data/gumbo-parser/src/replacement.h +33 -0
- data/gumbo-parser/src/string_buffer.c +43 -50
- data/gumbo-parser/src/string_buffer.h +24 -40
- data/gumbo-parser/src/string_piece.c +39 -39
- data/gumbo-parser/src/svg_attrs.c +174 -0
- data/gumbo-parser/src/svg_tags.c +137 -0
- data/gumbo-parser/src/tag.c +186 -59
- data/gumbo-parser/src/tag_lookup.c +382 -0
- data/gumbo-parser/src/tag_lookup.h +13 -0
- data/gumbo-parser/src/token_type.h +1 -25
- data/gumbo-parser/src/tokenizer.c +899 -495
- data/gumbo-parser/src/tokenizer.h +37 -37
- data/gumbo-parser/src/tokenizer_states.h +6 -22
- data/gumbo-parser/src/utf8.c +103 -86
- data/gumbo-parser/src/utf8.h +37 -41
- data/gumbo-parser/src/util.c +48 -38
- data/gumbo-parser/src/util.h +10 -40
- data/gumbo-parser/src/vector.c +45 -57
- data/gumbo-parser/src/vector.h +17 -39
- data/lib/nokogumbo.rb +10 -174
- data/lib/nokogumbo/html5.rb +250 -0
- data/lib/nokogumbo/html5/document.rb +37 -0
- data/lib/nokogumbo/html5/document_fragment.rb +46 -0
- data/lib/nokogumbo/version.rb +3 -0
- data/lib/nokogumbo/xml/node.rb +57 -0
- metadata +32 -19
- data/ext/nokogumboc/extconf.rb +0 -60
- data/gumbo-parser/src/char_ref.rl +0 -2554
- data/gumbo-parser/src/string_piece.h +0 -38
- data/gumbo-parser/src/tag.in +0 -150
- data/gumbo-parser/src/tag_enum.h +0 -153
- data/gumbo-parser/src/tag_gperf.h +0 -105
- data/gumbo-parser/src/tag_sizes.h +0 -4
- data/gumbo-parser/src/tag_strings.h +0 -153
- data/gumbo-parser/visualc/include/strings.h +0 -4
- data/test-nokogumbo.rb +0 -190
@@ -0,0 +1,250 @@
|
|
1
|
+
require 'nokogumbo/html5/document'
|
2
|
+
require 'nokogumbo/html5/document_fragment'
|
3
|
+
|
4
|
+
module Nokogiri
|
5
|
+
# Parse an HTML 5 document. Convenience method for Nokogiri::HTML5::Document.parse
|
6
|
+
def self.HTML5(string_or_io, url = nil, encoding = nil, **options, &block)
|
7
|
+
Nokogiri::HTML5::Document.parse(string_or_io, url, encoding, **options, &block)
|
8
|
+
end
|
9
|
+
|
10
|
+
module HTML5
|
11
|
+
# HTML uses the XHTML namespace.
|
12
|
+
HTML_NAMESPACE = 'http://www.w3.org/1999/xhtml'.freeze
|
13
|
+
MATHML_NAMESPACE = 'http://www.w3.org/1998/Math/MathML'.freeze
|
14
|
+
SVG_NAMESPACE = 'http://www.w3.org/2000/svg'.freeze
|
15
|
+
XLINK_NAMESPACE = 'http://www.w3.org/1999/xlink'.freeze
|
16
|
+
XML_NAMESPACE = 'http://www.w3.org/XML/1998/namespace'.freeze
|
17
|
+
XMLNS_NAMESPACE = 'http://www.w3.org/2000/xmlns/'.freeze
|
18
|
+
|
19
|
+
# Parse an HTML 5 document. Convenience method for Nokogiri::HTML5::Document.parse
|
20
|
+
def self.parse(string, url = nil, encoding = nil, **options, &block)
|
21
|
+
Document.parse(string, url, encoding, options, &block)
|
22
|
+
end
|
23
|
+
|
24
|
+
# Parse a fragment from +string+. Convenience method for
|
25
|
+
# Nokogiri::HTML5::DocumentFragment.parse.
|
26
|
+
def self.fragment(string, encoding = nil, **options)
|
27
|
+
DocumentFragment.parse(string, encoding, options)
|
28
|
+
end
|
29
|
+
|
30
|
+
# Fetch and parse a HTML document from the web, following redirects,
|
31
|
+
# handling https, and determining the character encoding using HTML5
|
32
|
+
# rules. +uri+ may be a +String+ or a +URI+. +options+ contains
|
33
|
+
# http headers and special options. Everything which is not a
|
34
|
+
# special option is considered a header. Special options include:
|
35
|
+
# * :follow_limit => number of redirects which are followed
|
36
|
+
# * :basic_auth => [username, password]
|
37
|
+
def self.get(uri, options={})
|
38
|
+
headers = options.clone
|
39
|
+
headers = {:follow_limit => headers} if Numeric === headers # deprecated
|
40
|
+
limit=headers[:follow_limit] ? headers.delete(:follow_limit).to_i : 10
|
41
|
+
|
42
|
+
require 'net/http'
|
43
|
+
uri = URI(uri) unless URI === uri
|
44
|
+
|
45
|
+
http = Net::HTTP.new(uri.host, uri.port)
|
46
|
+
|
47
|
+
# TLS / SSL support
|
48
|
+
http.use_ssl = true if uri.scheme == 'https'
|
49
|
+
|
50
|
+
# Pass through Net::HTTP override values, which currently include:
|
51
|
+
# :ca_file, :ca_path, :cert, :cert_store, :ciphers,
|
52
|
+
# :close_on_empty_response, :continue_timeout, :key, :open_timeout,
|
53
|
+
# :read_timeout, :ssl_timeout, :ssl_version, :use_ssl,
|
54
|
+
# :verify_callback, :verify_depth, :verify_mode
|
55
|
+
options.each do |key, value|
|
56
|
+
http.send "#{key}=", headers.delete(key) if http.respond_to? "#{key}="
|
57
|
+
end
|
58
|
+
|
59
|
+
request = Net::HTTP::Get.new(uri.request_uri)
|
60
|
+
|
61
|
+
# basic authentication
|
62
|
+
auth = headers.delete(:basic_auth)
|
63
|
+
auth ||= [uri.user, uri.password] if uri.user && uri.password
|
64
|
+
request.basic_auth auth.first, auth.last if auth
|
65
|
+
|
66
|
+
# remaining options are treated as headers
|
67
|
+
headers.each {|key, value| request[key.to_s] = value.to_s}
|
68
|
+
|
69
|
+
response = http.request(request)
|
70
|
+
|
71
|
+
case response
|
72
|
+
when Net::HTTPSuccess
|
73
|
+
doc = parse(reencode(response.body, response['content-type']), options)
|
74
|
+
doc.instance_variable_set('@response', response)
|
75
|
+
doc.class.send(:attr_reader, :response)
|
76
|
+
doc
|
77
|
+
when Net::HTTPRedirection
|
78
|
+
response.value if limit <= 1
|
79
|
+
location = URI.join(uri, response['location'])
|
80
|
+
get(location, options.merge(:follow_limit => limit-1))
|
81
|
+
else
|
82
|
+
response.value
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
private
|
87
|
+
|
88
|
+
def self.read_and_encode(string, encoding)
|
89
|
+
# Read the string with the given encoding.
|
90
|
+
if string.respond_to?(:read)
|
91
|
+
if encoding.nil?
|
92
|
+
string = string.read
|
93
|
+
else
|
94
|
+
string = string.read(encoding: encoding)
|
95
|
+
end
|
96
|
+
else
|
97
|
+
# Otherwise the string has the given encoding.
|
98
|
+
if encoding && string.respond_to?(:force_encoding)
|
99
|
+
string = string.dup
|
100
|
+
string.force_encoding(encoding)
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
# convert to UTF-8 (Ruby 1.9+)
|
105
|
+
if string.respond_to?(:encoding) && string.encoding != Encoding::UTF_8
|
106
|
+
string = reencode(string.dup)
|
107
|
+
end
|
108
|
+
string
|
109
|
+
end
|
110
|
+
|
111
|
+
# Charset sniffing is a complex and controversial topic that understandably
|
112
|
+
# isn't done _by default_ by the Ruby Net::HTTP library. This being said,
|
113
|
+
# it is a very real problem for consumers of HTML as the default for HTML
|
114
|
+
# is iso-8859-1, most "good" producers use utf-8, and the Gumbo parser
|
115
|
+
# *only* supports utf-8.
|
116
|
+
#
|
117
|
+
# Accordingly, Nokogiri::HTML::Document.parse provides limited encoding
|
118
|
+
# detection. Following this lead, Nokogiri::HTML5 attempts to do likewise,
|
119
|
+
# while attempting to more closely follow the HTML5 standard.
|
120
|
+
#
|
121
|
+
# http://bugs.ruby-lang.org/issues/2567
|
122
|
+
# http://www.w3.org/TR/html5/syntax.html#determining-the-character-encoding
|
123
|
+
#
|
124
|
+
def self.reencode(body, content_type=nil)
|
125
|
+
return body unless body.respond_to? :encoding
|
126
|
+
|
127
|
+
if body.encoding == Encoding::ASCII_8BIT
|
128
|
+
encoding = nil
|
129
|
+
|
130
|
+
# look for a Byte Order Mark (BOM)
|
131
|
+
if body[0..1] == "\xFE\xFF"
|
132
|
+
encoding = 'utf-16be'
|
133
|
+
elsif body[0..1] == "\xFF\xFE"
|
134
|
+
encoding = 'utf-16le'
|
135
|
+
elsif body[0..2] == "\xEF\xBB\xBF"
|
136
|
+
encoding = 'utf-8'
|
137
|
+
end
|
138
|
+
|
139
|
+
# look for a charset in a content-encoding header
|
140
|
+
if content_type
|
141
|
+
encoding ||= content_type[/charset=["']?(.*?)($|["';\s])/i, 1]
|
142
|
+
end
|
143
|
+
|
144
|
+
# look for a charset in a meta tag in the first 1024 bytes
|
145
|
+
if not encoding
|
146
|
+
data = body[0..1023].gsub(/<!--.*?(-->|\Z)/m, '')
|
147
|
+
data.scan(/<meta.*?>/m).each do |meta|
|
148
|
+
encoding ||= meta[/charset=["']?([^>]*?)($|["'\s>])/im, 1]
|
149
|
+
end
|
150
|
+
end
|
151
|
+
|
152
|
+
# if all else fails, default to the official default encoding for HTML
|
153
|
+
encoding ||= Encoding::ISO_8859_1
|
154
|
+
|
155
|
+
# change the encoding to match the detected or inferred encoding
|
156
|
+
begin
|
157
|
+
body.force_encoding(encoding)
|
158
|
+
rescue ArgumentError
|
159
|
+
body.force_encoding(Encoding::ISO_8859_1)
|
160
|
+
end
|
161
|
+
end
|
162
|
+
|
163
|
+
body.encode(Encoding::UTF_8)
|
164
|
+
end
|
165
|
+
|
166
|
+
def self.serialize_node_internal(current_node, io, encoding, options)
|
167
|
+
case current_node.type
|
168
|
+
when XML::Node::ELEMENT_NODE
|
169
|
+
ns = current_node.namespace
|
170
|
+
ns_uri = ns.nil? ? nil : ns.uri
|
171
|
+
# XXX(sfc): attach namespaces to all nodes, even html?
|
172
|
+
if ns_uri.nil? || ns_uri == HTML_NAMESPACE || ns_uri == MATHML_NAMESPACE || ns_uri == SVG_NAMESPACE
|
173
|
+
tagname = current_node.name
|
174
|
+
else
|
175
|
+
tagname = "#{ns.prefix}:#{current_node.name}"
|
176
|
+
end
|
177
|
+
io << '<' << tagname
|
178
|
+
current_node.attribute_nodes.each do |attr|
|
179
|
+
attr_ns = attr.namespace
|
180
|
+
if attr_ns.nil?
|
181
|
+
attr_name = attr.name
|
182
|
+
else
|
183
|
+
ns_uri = attr_ns.href
|
184
|
+
if ns_uri == XML_NAMESPACE
|
185
|
+
attr_name = 'xml:' + attr.name.sub(/^[^:]*:/, '')
|
186
|
+
elsif ns_uri == XMLNS_NAMESPACE && attr.name.sub(/^[^:]*:/, '') == 'xmlns'
|
187
|
+
attr_name = 'xmlns'
|
188
|
+
elsif ns_uri == XMLNS_NAMESPACE
|
189
|
+
attr_name = 'xmlns:' + attr.name.sub(/^[^:]*:/, '')
|
190
|
+
elsif ns_uri == XLINK_NAMESPACE
|
191
|
+
attr_name = 'xlink:' + attr.name.sub(/^[^:]*:/, '')
|
192
|
+
else
|
193
|
+
attr_name = "#{attr_ns.prefix}:#{attr.name}"
|
194
|
+
end
|
195
|
+
end
|
196
|
+
io << ' ' << attr_name << '="' << escape_text(attr.content, encoding, true) << '"'
|
197
|
+
end
|
198
|
+
io << '>'
|
199
|
+
if !%w[area base basefont bgsound br col embed frame hr img input keygen
|
200
|
+
link meta param source track wbr].include?(current_node.name)
|
201
|
+
io << "\n" if options[:preserve_newline] && prepend_newline?(current_node)
|
202
|
+
current_node.children.each do |child|
|
203
|
+
# XXX(sfc): Templates handled specially?
|
204
|
+
serialize_node_internal(child, io, encoding, options)
|
205
|
+
end
|
206
|
+
io << '</' << tagname << '>'
|
207
|
+
end
|
208
|
+
when XML::Node::TEXT_NODE
|
209
|
+
parent = current_node.parent
|
210
|
+
if parent.element? && %w[style script xmp iframe noembed noframes plaintext noscript].include?(parent.name)
|
211
|
+
io << current_node.content
|
212
|
+
else
|
213
|
+
io << escape_text(current_node.content, encoding, false)
|
214
|
+
end
|
215
|
+
when XML::Node::CDATA_SECTION_NODE
|
216
|
+
io << '<![CDATA[' << current_node.content << ']]>'
|
217
|
+
when XML::Node::COMMENT_NODE
|
218
|
+
io << '<!--' << current_node.content << '-->'
|
219
|
+
when XML::Node::PI_NODE
|
220
|
+
io << '<?' << current_node.content << '>'
|
221
|
+
when XML::Node::DOCUMENT_TYPE_NODE, XML::Node::DTD_NODE
|
222
|
+
io << '<!DOCTYPE ' << current_node.name << '>'
|
223
|
+
when XML::Node::HTML_DOCUMENT_NODE, XML::Node::DOCUMENT_FRAG_NODE
|
224
|
+
current_node.children.each do |child|
|
225
|
+
serialize_node_internal(child, io, encoding, options)
|
226
|
+
end
|
227
|
+
else
|
228
|
+
raise "Unexpected node '#{current_node.name}' of type #{current_node.type}"
|
229
|
+
end
|
230
|
+
end
|
231
|
+
|
232
|
+
def self.escape_text(text, encoding, attribute_mode)
|
233
|
+
if attribute_mode
|
234
|
+
text = text.gsub(/[&\u00a0"]/,
|
235
|
+
'&' => '&', "\u00a0" => ' ', '"' => '"')
|
236
|
+
else
|
237
|
+
text = text.gsub(/[&\u00a0<>]/,
|
238
|
+
'&' => '&', "\u00a0" => ' ', '<' => '<', '>' => '>')
|
239
|
+
end
|
240
|
+
# Not part of the standard
|
241
|
+
text.encode(encoding, fallback: lambda { |c| "&\#x#{c.ord.to_s(16)};" })
|
242
|
+
end
|
243
|
+
|
244
|
+
def self.prepend_newline?(node)
|
245
|
+
return false unless %w[pre textarea listing].include?(node.name) && !node.children.empty?
|
246
|
+
first_child = node.children[0]
|
247
|
+
first_child.text? && first_child.content.start_with?("\n")
|
248
|
+
end
|
249
|
+
end
|
250
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
module Nokogiri
|
2
|
+
module HTML5
|
3
|
+
class Document < Nokogiri::HTML::Document
|
4
|
+
def self.parse(string_or_io, url = nil, encoding = nil, **options, &block)
|
5
|
+
yield options if block_given?
|
6
|
+
|
7
|
+
if string_or_io.respond_to?(:encoding) && string_or_io.encoding.name != 'ASCII-8BIT'
|
8
|
+
encoding ||= string_or_io.encoding.name
|
9
|
+
end
|
10
|
+
|
11
|
+
if string_or_io.respond_to?(:read) && string_or_io.respond_to?(:path)
|
12
|
+
url ||= string_or_io.path
|
13
|
+
end
|
14
|
+
do_parse(string_or_io, url, encoding, options)
|
15
|
+
end
|
16
|
+
|
17
|
+
def self.read_io(io, url = nil, encoding = nil, **options)
|
18
|
+
raise ArgumentError.new("io object doesn't respond to :read") unless io.respon_to?(:read)
|
19
|
+
do_parse(io, url, encoding, options)
|
20
|
+
end
|
21
|
+
|
22
|
+
def self.read_memory(string, url = nil, encoding = nil, **options)
|
23
|
+
do_parse(string.to_s, url, encoding, options)
|
24
|
+
end
|
25
|
+
|
26
|
+
private
|
27
|
+
def self.do_parse(string_or_io, url, encoding, options)
|
28
|
+
string = HTML5.read_and_encode(string_or_io, encoding)
|
29
|
+
max_errors = options[:max_errors] || options[:max_parse_errors] || Nokogumbo::DEFAULT_MAX_ERRORS
|
30
|
+
max_depth = options[:max_tree_depth] || Nokogumbo::DEFAULT_MAX_TREE_DEPTH
|
31
|
+
doc = Nokogumbo.parse(string.to_s, url, max_errors, max_depth)
|
32
|
+
doc.encoding = 'UTF-8'
|
33
|
+
doc
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
|
3
|
+
module Nokogiri
|
4
|
+
module HTML5
|
5
|
+
class DocumentFragment < Nokogiri::HTML::DocumentFragment
|
6
|
+
# Create a document fragment.
|
7
|
+
def initialize(doc, tags = nil, ctx = nil, options = {})
|
8
|
+
return self unless tags
|
9
|
+
if ctx
|
10
|
+
raise Argument.new("Fragment parsing with context not supported")
|
11
|
+
else
|
12
|
+
tags = Nokogiri::HTML5.read_and_encode(tags, nil)
|
13
|
+
|
14
|
+
# Copied from Nokogiri's document_fragment.rb and labled "a horrible
|
15
|
+
# hack."
|
16
|
+
if tags.strip =~ /^<body/i
|
17
|
+
path = "/html/body"
|
18
|
+
else
|
19
|
+
path = "/html/body/node()"
|
20
|
+
end
|
21
|
+
# Add 2 for <html> and <body>.
|
22
|
+
max_depth = (options[:max_tree_depth] || Nokogumbo::DEFAULT_MAX_TREE_DEPTH) + 2
|
23
|
+
options = options.dup
|
24
|
+
options[:max_tree_depth] = max_depth
|
25
|
+
temp_doc = HTML5.parse("<!DOCTYPE html><html><body>#{tags}", options)
|
26
|
+
temp_doc.xpath(path).each { |child| child.parent = self }
|
27
|
+
self.errors = temp_doc.errors
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
def serialize(options = {}, &block)
|
32
|
+
# Bypass XML::Document.serialize which doesn't support options even
|
33
|
+
# though XML::Node.serialize does!
|
34
|
+
XML::Node.instance_method(:serialize).bind(self).call(options, &block)
|
35
|
+
end
|
36
|
+
|
37
|
+
# Parse a document fragment from +tags+, returning a Nodeset.
|
38
|
+
def self.parse(tags, encoding = nil, options = {})
|
39
|
+
doc = HTML5::Document.new
|
40
|
+
tags = HTML5.read_and_encode(tags, encoding)
|
41
|
+
doc.encoding = 'UTF-8'
|
42
|
+
new(doc, tags, nil, options)
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
@@ -0,0 +1,57 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
|
3
|
+
module Nokogiri
|
4
|
+
# Monkey patch
|
5
|
+
module XML
|
6
|
+
class Node
|
7
|
+
# HTML elements can have attributes that contain colons.
|
8
|
+
# Nokogiri::XML::Node#[]= treats names with colons as a prefixed QName
|
9
|
+
# and tries to create an attribute in a namespace. This is especially
|
10
|
+
# annoying with attribute names like xml:lang since libxml2 will
|
11
|
+
# actually create the xml namespace if it doesn't exist already.
|
12
|
+
define_method(:add_child_node_and_reparent_attrs) do |node|
|
13
|
+
add_child_node(node)
|
14
|
+
node.attribute_nodes.find_all { |a| a.namespace }.each do |attr|
|
15
|
+
attr.remove
|
16
|
+
node[attr.name] = attr.value
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
def inner_html(options = {})
|
21
|
+
result = options[:preserve_newline] && HTML5.prepend_newline?(self) ? "\n" : ""
|
22
|
+
result << children.map { |child| child.to_html(options) }.join
|
23
|
+
result
|
24
|
+
end
|
25
|
+
|
26
|
+
def write_to(io, *options)
|
27
|
+
options = options.first.is_a?(Hash) ? options.shift : {}
|
28
|
+
encoding = options[:encoding] || options[0]
|
29
|
+
if Nokogiri.jruby?
|
30
|
+
save_options = options[:save_with] || options[1]
|
31
|
+
indent_times = options[:indent] || 0
|
32
|
+
else
|
33
|
+
save_options = options[:save_with] || options[1] || SaveOptions::FORMAT
|
34
|
+
indent_times = options[:indent] || 2
|
35
|
+
end
|
36
|
+
indent_string = (options[:indent_text] || ' ') * indent_times
|
37
|
+
|
38
|
+
config = SaveOptions.new(save_options.to_i)
|
39
|
+
yield config if block_given?
|
40
|
+
|
41
|
+
config_options = config.options
|
42
|
+
if (config_options & (SaveOptions::AS_XML | SaveOptions::AS_XHTML) != 0) || !document.is_a?(HTML5::Document)
|
43
|
+
# Use Nokogiri's serializing code.
|
44
|
+
native_write_to(io, encoding, indent_string, config_options)
|
45
|
+
else
|
46
|
+
# Serialize including the current node.
|
47
|
+
encoding ||= document.encoding || Encoding::UTF_8
|
48
|
+
internal_ops = {
|
49
|
+
trailing_nl: config_options & SaveOptions::FORMAT != 0,
|
50
|
+
preserve_newline: options[:preserve_newline] || false
|
51
|
+
}
|
52
|
+
HTML5.serialize_node_internal(self, io, encoding, options)
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,15 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: nokogumbo
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 2.0.0.pre.alpha
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sam Ruby
|
8
|
+
- Stephen Checkoway
|
8
9
|
autorequire:
|
9
10
|
bindir: bin
|
10
11
|
cert_chain: []
|
11
|
-
date: 2018-
|
12
|
+
date: 2018-08-31 00:00:00.000000000 Z
|
12
13
|
dependencies:
|
13
14
|
- !ruby/object:Gem::Dependency
|
14
15
|
name: nokogiri
|
@@ -26,37 +27,42 @@ dependencies:
|
|
26
27
|
version: '0'
|
27
28
|
description: Nokogumbo allows a Ruby program to invoke the Gumbo HTML5 parser and
|
28
29
|
access the result as a Nokogiri parsed document.
|
29
|
-
email:
|
30
|
+
email:
|
31
|
+
- rubys@intertwingly.net
|
32
|
+
- s@pahtak.org
|
30
33
|
executables: []
|
31
34
|
extensions:
|
32
|
-
- ext/
|
35
|
+
- ext/nokogumbo/extconf.rb
|
33
36
|
extra_rdoc_files: []
|
34
37
|
files:
|
38
|
+
- CHANGELOG.md
|
35
39
|
- LICENSE.txt
|
36
40
|
- README.md
|
37
|
-
- ext/
|
38
|
-
- ext/
|
41
|
+
- ext/nokogumbo/extconf.rb
|
42
|
+
- ext/nokogumbo/nokogumbo.c
|
43
|
+
- gumbo-parser/src/ascii.c
|
44
|
+
- gumbo-parser/src/ascii.h
|
39
45
|
- gumbo-parser/src/attribute.c
|
40
46
|
- gumbo-parser/src/attribute.h
|
41
47
|
- gumbo-parser/src/char_ref.c
|
42
48
|
- gumbo-parser/src/char_ref.h
|
43
|
-
- gumbo-parser/src/char_ref.rl
|
44
49
|
- gumbo-parser/src/error.c
|
45
50
|
- gumbo-parser/src/error.h
|
51
|
+
- gumbo-parser/src/foreign_attrs.c
|
46
52
|
- gumbo-parser/src/gumbo.h
|
47
53
|
- gumbo-parser/src/insertion_mode.h
|
54
|
+
- gumbo-parser/src/macros.h
|
48
55
|
- gumbo-parser/src/parser.c
|
49
56
|
- gumbo-parser/src/parser.h
|
57
|
+
- gumbo-parser/src/replacement.h
|
50
58
|
- gumbo-parser/src/string_buffer.c
|
51
59
|
- gumbo-parser/src/string_buffer.h
|
52
60
|
- gumbo-parser/src/string_piece.c
|
53
|
-
- gumbo-parser/src/
|
61
|
+
- gumbo-parser/src/svg_attrs.c
|
62
|
+
- gumbo-parser/src/svg_tags.c
|
54
63
|
- gumbo-parser/src/tag.c
|
55
|
-
- gumbo-parser/src/
|
56
|
-
- gumbo-parser/src/
|
57
|
-
- gumbo-parser/src/tag_gperf.h
|
58
|
-
- gumbo-parser/src/tag_sizes.h
|
59
|
-
- gumbo-parser/src/tag_strings.h
|
64
|
+
- gumbo-parser/src/tag_lookup.c
|
65
|
+
- gumbo-parser/src/tag_lookup.h
|
60
66
|
- gumbo-parser/src/token_type.h
|
61
67
|
- gumbo-parser/src/tokenizer.c
|
62
68
|
- gumbo-parser/src/tokenizer.h
|
@@ -67,13 +73,20 @@ files:
|
|
67
73
|
- gumbo-parser/src/util.h
|
68
74
|
- gumbo-parser/src/vector.c
|
69
75
|
- gumbo-parser/src/vector.h
|
70
|
-
- gumbo-parser/visualc/include/strings.h
|
71
76
|
- lib/nokogumbo.rb
|
72
|
-
-
|
77
|
+
- lib/nokogumbo/html5.rb
|
78
|
+
- lib/nokogumbo/html5/document.rb
|
79
|
+
- lib/nokogumbo/html5/document_fragment.rb
|
80
|
+
- lib/nokogumbo/version.rb
|
81
|
+
- lib/nokogumbo/xml/node.rb
|
73
82
|
homepage: https://github.com/rubys/nokogumbo/#readme
|
74
83
|
licenses:
|
75
84
|
- Apache-2.0
|
76
|
-
metadata:
|
85
|
+
metadata:
|
86
|
+
bug_tracker_uri: https://github.com/rubys/nokogumbo/issues
|
87
|
+
changelog_uri: https://github.com/rubys/nokogumbo/blob/master/CHANGELOG.md
|
88
|
+
homepage_uri: https://github.com/rubys/nokogumbo/#readme
|
89
|
+
source_code_uri: https://github.com/rubys/nokogumbo
|
77
90
|
post_install_message:
|
78
91
|
rdoc_options: []
|
79
92
|
require_paths:
|
@@ -85,12 +98,12 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
85
98
|
version: '0'
|
86
99
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
87
100
|
requirements:
|
88
|
-
- - "
|
101
|
+
- - ">"
|
89
102
|
- !ruby/object:Gem::Version
|
90
|
-
version:
|
103
|
+
version: 1.3.1
|
91
104
|
requirements: []
|
92
105
|
rubyforge_project:
|
93
|
-
rubygems_version: 2.7.
|
106
|
+
rubygems_version: 2.7.6
|
94
107
|
signing_key:
|
95
108
|
specification_version: 4
|
96
109
|
summary: Nokogiri interface to the Gumbo HTML5 parser
|