nokogumbo 1.5.0 → 2.0.0.pre.alpha
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +56 -0
- data/README.md +146 -22
- data/ext/nokogumbo/extconf.rb +116 -0
- data/ext/{nokogumboc → nokogumbo}/nokogumbo.c +174 -71
- data/gumbo-parser/src/ascii.c +33 -0
- data/gumbo-parser/src/ascii.h +31 -0
- data/gumbo-parser/src/attribute.c +26 -28
- data/gumbo-parser/src/attribute.h +3 -23
- data/gumbo-parser/src/char_ref.c +135 -2351
- data/gumbo-parser/src/char_ref.h +13 -29
- data/gumbo-parser/src/error.c +215 -133
- data/gumbo-parser/src/error.h +34 -49
- data/gumbo-parser/src/foreign_attrs.c +104 -0
- data/gumbo-parser/src/gumbo.h +506 -304
- data/gumbo-parser/src/insertion_mode.h +4 -28
- data/gumbo-parser/src/macros.h +91 -0
- data/gumbo-parser/src/parser.c +1989 -1431
- data/gumbo-parser/src/parser.h +6 -22
- data/gumbo-parser/src/replacement.h +33 -0
- data/gumbo-parser/src/string_buffer.c +43 -50
- data/gumbo-parser/src/string_buffer.h +24 -40
- data/gumbo-parser/src/string_piece.c +39 -39
- data/gumbo-parser/src/svg_attrs.c +174 -0
- data/gumbo-parser/src/svg_tags.c +137 -0
- data/gumbo-parser/src/tag.c +186 -59
- data/gumbo-parser/src/tag_lookup.c +382 -0
- data/gumbo-parser/src/tag_lookup.h +13 -0
- data/gumbo-parser/src/token_type.h +1 -25
- data/gumbo-parser/src/tokenizer.c +899 -495
- data/gumbo-parser/src/tokenizer.h +37 -37
- data/gumbo-parser/src/tokenizer_states.h +6 -22
- data/gumbo-parser/src/utf8.c +103 -86
- data/gumbo-parser/src/utf8.h +37 -41
- data/gumbo-parser/src/util.c +48 -38
- data/gumbo-parser/src/util.h +10 -40
- data/gumbo-parser/src/vector.c +45 -57
- data/gumbo-parser/src/vector.h +17 -39
- data/lib/nokogumbo.rb +10 -174
- data/lib/nokogumbo/html5.rb +250 -0
- data/lib/nokogumbo/html5/document.rb +37 -0
- data/lib/nokogumbo/html5/document_fragment.rb +46 -0
- data/lib/nokogumbo/version.rb +3 -0
- data/lib/nokogumbo/xml/node.rb +57 -0
- metadata +32 -19
- data/ext/nokogumboc/extconf.rb +0 -60
- data/gumbo-parser/src/char_ref.rl +0 -2554
- data/gumbo-parser/src/string_piece.h +0 -38
- data/gumbo-parser/src/tag.in +0 -150
- data/gumbo-parser/src/tag_enum.h +0 -153
- data/gumbo-parser/src/tag_gperf.h +0 -105
- data/gumbo-parser/src/tag_sizes.h +0 -4
- data/gumbo-parser/src/tag_strings.h +0 -153
- data/gumbo-parser/visualc/include/strings.h +0 -4
- data/test-nokogumbo.rb +0 -190
@@ -0,0 +1,250 @@
|
|
1
|
+
require 'nokogumbo/html5/document'
|
2
|
+
require 'nokogumbo/html5/document_fragment'
|
3
|
+
|
4
|
+
module Nokogiri
|
5
|
+
# Parse an HTML 5 document. Convenience method for Nokogiri::HTML5::Document.parse
|
6
|
+
def self.HTML5(string_or_io, url = nil, encoding = nil, **options, &block)
|
7
|
+
Nokogiri::HTML5::Document.parse(string_or_io, url, encoding, **options, &block)
|
8
|
+
end
|
9
|
+
|
10
|
+
module HTML5
|
11
|
+
# HTML uses the XHTML namespace.
|
12
|
+
HTML_NAMESPACE = 'http://www.w3.org/1999/xhtml'.freeze
|
13
|
+
MATHML_NAMESPACE = 'http://www.w3.org/1998/Math/MathML'.freeze
|
14
|
+
SVG_NAMESPACE = 'http://www.w3.org/2000/svg'.freeze
|
15
|
+
XLINK_NAMESPACE = 'http://www.w3.org/1999/xlink'.freeze
|
16
|
+
XML_NAMESPACE = 'http://www.w3.org/XML/1998/namespace'.freeze
|
17
|
+
XMLNS_NAMESPACE = 'http://www.w3.org/2000/xmlns/'.freeze
|
18
|
+
|
19
|
+
# Parse an HTML 5 document. Convenience method for Nokogiri::HTML5::Document.parse
|
20
|
+
def self.parse(string, url = nil, encoding = nil, **options, &block)
|
21
|
+
Document.parse(string, url, encoding, options, &block)
|
22
|
+
end
|
23
|
+
|
24
|
+
# Parse a fragment from +string+. Convenience method for
|
25
|
+
# Nokogiri::HTML5::DocumentFragment.parse.
|
26
|
+
def self.fragment(string, encoding = nil, **options)
|
27
|
+
DocumentFragment.parse(string, encoding, options)
|
28
|
+
end
|
29
|
+
|
30
|
+
# Fetch and parse a HTML document from the web, following redirects,
|
31
|
+
# handling https, and determining the character encoding using HTML5
|
32
|
+
# rules. +uri+ may be a +String+ or a +URI+. +options+ contains
|
33
|
+
# http headers and special options. Everything which is not a
|
34
|
+
# special option is considered a header. Special options include:
|
35
|
+
# * :follow_limit => number of redirects which are followed
|
36
|
+
# * :basic_auth => [username, password]
|
37
|
+
def self.get(uri, options={})
|
38
|
+
headers = options.clone
|
39
|
+
headers = {:follow_limit => headers} if Numeric === headers # deprecated
|
40
|
+
limit=headers[:follow_limit] ? headers.delete(:follow_limit).to_i : 10
|
41
|
+
|
42
|
+
require 'net/http'
|
43
|
+
uri = URI(uri) unless URI === uri
|
44
|
+
|
45
|
+
http = Net::HTTP.new(uri.host, uri.port)
|
46
|
+
|
47
|
+
# TLS / SSL support
|
48
|
+
http.use_ssl = true if uri.scheme == 'https'
|
49
|
+
|
50
|
+
# Pass through Net::HTTP override values, which currently include:
|
51
|
+
# :ca_file, :ca_path, :cert, :cert_store, :ciphers,
|
52
|
+
# :close_on_empty_response, :continue_timeout, :key, :open_timeout,
|
53
|
+
# :read_timeout, :ssl_timeout, :ssl_version, :use_ssl,
|
54
|
+
# :verify_callback, :verify_depth, :verify_mode
|
55
|
+
options.each do |key, value|
|
56
|
+
http.send "#{key}=", headers.delete(key) if http.respond_to? "#{key}="
|
57
|
+
end
|
58
|
+
|
59
|
+
request = Net::HTTP::Get.new(uri.request_uri)
|
60
|
+
|
61
|
+
# basic authentication
|
62
|
+
auth = headers.delete(:basic_auth)
|
63
|
+
auth ||= [uri.user, uri.password] if uri.user && uri.password
|
64
|
+
request.basic_auth auth.first, auth.last if auth
|
65
|
+
|
66
|
+
# remaining options are treated as headers
|
67
|
+
headers.each {|key, value| request[key.to_s] = value.to_s}
|
68
|
+
|
69
|
+
response = http.request(request)
|
70
|
+
|
71
|
+
case response
|
72
|
+
when Net::HTTPSuccess
|
73
|
+
doc = parse(reencode(response.body, response['content-type']), options)
|
74
|
+
doc.instance_variable_set('@response', response)
|
75
|
+
doc.class.send(:attr_reader, :response)
|
76
|
+
doc
|
77
|
+
when Net::HTTPRedirection
|
78
|
+
response.value if limit <= 1
|
79
|
+
location = URI.join(uri, response['location'])
|
80
|
+
get(location, options.merge(:follow_limit => limit-1))
|
81
|
+
else
|
82
|
+
response.value
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
private
|
87
|
+
|
88
|
+
def self.read_and_encode(string, encoding)
|
89
|
+
# Read the string with the given encoding.
|
90
|
+
if string.respond_to?(:read)
|
91
|
+
if encoding.nil?
|
92
|
+
string = string.read
|
93
|
+
else
|
94
|
+
string = string.read(encoding: encoding)
|
95
|
+
end
|
96
|
+
else
|
97
|
+
# Otherwise the string has the given encoding.
|
98
|
+
if encoding && string.respond_to?(:force_encoding)
|
99
|
+
string = string.dup
|
100
|
+
string.force_encoding(encoding)
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
# convert to UTF-8 (Ruby 1.9+)
|
105
|
+
if string.respond_to?(:encoding) && string.encoding != Encoding::UTF_8
|
106
|
+
string = reencode(string.dup)
|
107
|
+
end
|
108
|
+
string
|
109
|
+
end
|
110
|
+
|
111
|
+
# Charset sniffing is a complex and controversial topic that understandably
|
112
|
+
# isn't done _by default_ by the Ruby Net::HTTP library. This being said,
|
113
|
+
# it is a very real problem for consumers of HTML as the default for HTML
|
114
|
+
# is iso-8859-1, most "good" producers use utf-8, and the Gumbo parser
|
115
|
+
# *only* supports utf-8.
|
116
|
+
#
|
117
|
+
# Accordingly, Nokogiri::HTML::Document.parse provides limited encoding
|
118
|
+
# detection. Following this lead, Nokogiri::HTML5 attempts to do likewise,
|
119
|
+
# while attempting to more closely follow the HTML5 standard.
|
120
|
+
#
|
121
|
+
# http://bugs.ruby-lang.org/issues/2567
|
122
|
+
# http://www.w3.org/TR/html5/syntax.html#determining-the-character-encoding
|
123
|
+
#
|
124
|
+
def self.reencode(body, content_type=nil)
|
125
|
+
return body unless body.respond_to? :encoding
|
126
|
+
|
127
|
+
if body.encoding == Encoding::ASCII_8BIT
|
128
|
+
encoding = nil
|
129
|
+
|
130
|
+
# look for a Byte Order Mark (BOM)
|
131
|
+
if body[0..1] == "\xFE\xFF"
|
132
|
+
encoding = 'utf-16be'
|
133
|
+
elsif body[0..1] == "\xFF\xFE"
|
134
|
+
encoding = 'utf-16le'
|
135
|
+
elsif body[0..2] == "\xEF\xBB\xBF"
|
136
|
+
encoding = 'utf-8'
|
137
|
+
end
|
138
|
+
|
139
|
+
# look for a charset in a content-encoding header
|
140
|
+
if content_type
|
141
|
+
encoding ||= content_type[/charset=["']?(.*?)($|["';\s])/i, 1]
|
142
|
+
end
|
143
|
+
|
144
|
+
# look for a charset in a meta tag in the first 1024 bytes
|
145
|
+
if not encoding
|
146
|
+
data = body[0..1023].gsub(/<!--.*?(-->|\Z)/m, '')
|
147
|
+
data.scan(/<meta.*?>/m).each do |meta|
|
148
|
+
encoding ||= meta[/charset=["']?([^>]*?)($|["'\s>])/im, 1]
|
149
|
+
end
|
150
|
+
end
|
151
|
+
|
152
|
+
# if all else fails, default to the official default encoding for HTML
|
153
|
+
encoding ||= Encoding::ISO_8859_1
|
154
|
+
|
155
|
+
# change the encoding to match the detected or inferred encoding
|
156
|
+
begin
|
157
|
+
body.force_encoding(encoding)
|
158
|
+
rescue ArgumentError
|
159
|
+
body.force_encoding(Encoding::ISO_8859_1)
|
160
|
+
end
|
161
|
+
end
|
162
|
+
|
163
|
+
body.encode(Encoding::UTF_8)
|
164
|
+
end
|
165
|
+
|
166
|
+
def self.serialize_node_internal(current_node, io, encoding, options)
|
167
|
+
case current_node.type
|
168
|
+
when XML::Node::ELEMENT_NODE
|
169
|
+
ns = current_node.namespace
|
170
|
+
ns_uri = ns.nil? ? nil : ns.uri
|
171
|
+
# XXX(sfc): attach namespaces to all nodes, even html?
|
172
|
+
if ns_uri.nil? || ns_uri == HTML_NAMESPACE || ns_uri == MATHML_NAMESPACE || ns_uri == SVG_NAMESPACE
|
173
|
+
tagname = current_node.name
|
174
|
+
else
|
175
|
+
tagname = "#{ns.prefix}:#{current_node.name}"
|
176
|
+
end
|
177
|
+
io << '<' << tagname
|
178
|
+
current_node.attribute_nodes.each do |attr|
|
179
|
+
attr_ns = attr.namespace
|
180
|
+
if attr_ns.nil?
|
181
|
+
attr_name = attr.name
|
182
|
+
else
|
183
|
+
ns_uri = attr_ns.href
|
184
|
+
if ns_uri == XML_NAMESPACE
|
185
|
+
attr_name = 'xml:' + attr.name.sub(/^[^:]*:/, '')
|
186
|
+
elsif ns_uri == XMLNS_NAMESPACE && attr.name.sub(/^[^:]*:/, '') == 'xmlns'
|
187
|
+
attr_name = 'xmlns'
|
188
|
+
elsif ns_uri == XMLNS_NAMESPACE
|
189
|
+
attr_name = 'xmlns:' + attr.name.sub(/^[^:]*:/, '')
|
190
|
+
elsif ns_uri == XLINK_NAMESPACE
|
191
|
+
attr_name = 'xlink:' + attr.name.sub(/^[^:]*:/, '')
|
192
|
+
else
|
193
|
+
attr_name = "#{attr_ns.prefix}:#{attr.name}"
|
194
|
+
end
|
195
|
+
end
|
196
|
+
io << ' ' << attr_name << '="' << escape_text(attr.content, encoding, true) << '"'
|
197
|
+
end
|
198
|
+
io << '>'
|
199
|
+
if !%w[area base basefont bgsound br col embed frame hr img input keygen
|
200
|
+
link meta param source track wbr].include?(current_node.name)
|
201
|
+
io << "\n" if options[:preserve_newline] && prepend_newline?(current_node)
|
202
|
+
current_node.children.each do |child|
|
203
|
+
# XXX(sfc): Templates handled specially?
|
204
|
+
serialize_node_internal(child, io, encoding, options)
|
205
|
+
end
|
206
|
+
io << '</' << tagname << '>'
|
207
|
+
end
|
208
|
+
when XML::Node::TEXT_NODE
|
209
|
+
parent = current_node.parent
|
210
|
+
if parent.element? && %w[style script xmp iframe noembed noframes plaintext noscript].include?(parent.name)
|
211
|
+
io << current_node.content
|
212
|
+
else
|
213
|
+
io << escape_text(current_node.content, encoding, false)
|
214
|
+
end
|
215
|
+
when XML::Node::CDATA_SECTION_NODE
|
216
|
+
io << '<![CDATA[' << current_node.content << ']]>'
|
217
|
+
when XML::Node::COMMENT_NODE
|
218
|
+
io << '<!--' << current_node.content << '-->'
|
219
|
+
when XML::Node::PI_NODE
|
220
|
+
io << '<?' << current_node.content << '>'
|
221
|
+
when XML::Node::DOCUMENT_TYPE_NODE, XML::Node::DTD_NODE
|
222
|
+
io << '<!DOCTYPE ' << current_node.name << '>'
|
223
|
+
when XML::Node::HTML_DOCUMENT_NODE, XML::Node::DOCUMENT_FRAG_NODE
|
224
|
+
current_node.children.each do |child|
|
225
|
+
serialize_node_internal(child, io, encoding, options)
|
226
|
+
end
|
227
|
+
else
|
228
|
+
raise "Unexpected node '#{current_node.name}' of type #{current_node.type}"
|
229
|
+
end
|
230
|
+
end
|
231
|
+
|
232
|
+
def self.escape_text(text, encoding, attribute_mode)
|
233
|
+
if attribute_mode
|
234
|
+
text = text.gsub(/[&\u00a0"]/,
|
235
|
+
'&' => '&', "\u00a0" => ' ', '"' => '"')
|
236
|
+
else
|
237
|
+
text = text.gsub(/[&\u00a0<>]/,
|
238
|
+
'&' => '&', "\u00a0" => ' ', '<' => '<', '>' => '>')
|
239
|
+
end
|
240
|
+
# Not part of the standard
|
241
|
+
text.encode(encoding, fallback: lambda { |c| "&\#x#{c.ord.to_s(16)};" })
|
242
|
+
end
|
243
|
+
|
244
|
+
def self.prepend_newline?(node)
|
245
|
+
return false unless %w[pre textarea listing].include?(node.name) && !node.children.empty?
|
246
|
+
first_child = node.children[0]
|
247
|
+
first_child.text? && first_child.content.start_with?("\n")
|
248
|
+
end
|
249
|
+
end
|
250
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
module Nokogiri
|
2
|
+
module HTML5
|
3
|
+
class Document < Nokogiri::HTML::Document
|
4
|
+
def self.parse(string_or_io, url = nil, encoding = nil, **options, &block)
|
5
|
+
yield options if block_given?
|
6
|
+
|
7
|
+
if string_or_io.respond_to?(:encoding) && string_or_io.encoding.name != 'ASCII-8BIT'
|
8
|
+
encoding ||= string_or_io.encoding.name
|
9
|
+
end
|
10
|
+
|
11
|
+
if string_or_io.respond_to?(:read) && string_or_io.respond_to?(:path)
|
12
|
+
url ||= string_or_io.path
|
13
|
+
end
|
14
|
+
do_parse(string_or_io, url, encoding, options)
|
15
|
+
end
|
16
|
+
|
17
|
+
def self.read_io(io, url = nil, encoding = nil, **options)
|
18
|
+
raise ArgumentError.new("io object doesn't respond to :read") unless io.respon_to?(:read)
|
19
|
+
do_parse(io, url, encoding, options)
|
20
|
+
end
|
21
|
+
|
22
|
+
def self.read_memory(string, url = nil, encoding = nil, **options)
|
23
|
+
do_parse(string.to_s, url, encoding, options)
|
24
|
+
end
|
25
|
+
|
26
|
+
private
|
27
|
+
def self.do_parse(string_or_io, url, encoding, options)
|
28
|
+
string = HTML5.read_and_encode(string_or_io, encoding)
|
29
|
+
max_errors = options[:max_errors] || options[:max_parse_errors] || Nokogumbo::DEFAULT_MAX_ERRORS
|
30
|
+
max_depth = options[:max_tree_depth] || Nokogumbo::DEFAULT_MAX_TREE_DEPTH
|
31
|
+
doc = Nokogumbo.parse(string.to_s, url, max_errors, max_depth)
|
32
|
+
doc.encoding = 'UTF-8'
|
33
|
+
doc
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
|
3
|
+
module Nokogiri
|
4
|
+
module HTML5
|
5
|
+
class DocumentFragment < Nokogiri::HTML::DocumentFragment
|
6
|
+
# Create a document fragment.
|
7
|
+
def initialize(doc, tags = nil, ctx = nil, options = {})
|
8
|
+
return self unless tags
|
9
|
+
if ctx
|
10
|
+
raise Argument.new("Fragment parsing with context not supported")
|
11
|
+
else
|
12
|
+
tags = Nokogiri::HTML5.read_and_encode(tags, nil)
|
13
|
+
|
14
|
+
# Copied from Nokogiri's document_fragment.rb and labled "a horrible
|
15
|
+
# hack."
|
16
|
+
if tags.strip =~ /^<body/i
|
17
|
+
path = "/html/body"
|
18
|
+
else
|
19
|
+
path = "/html/body/node()"
|
20
|
+
end
|
21
|
+
# Add 2 for <html> and <body>.
|
22
|
+
max_depth = (options[:max_tree_depth] || Nokogumbo::DEFAULT_MAX_TREE_DEPTH) + 2
|
23
|
+
options = options.dup
|
24
|
+
options[:max_tree_depth] = max_depth
|
25
|
+
temp_doc = HTML5.parse("<!DOCTYPE html><html><body>#{tags}", options)
|
26
|
+
temp_doc.xpath(path).each { |child| child.parent = self }
|
27
|
+
self.errors = temp_doc.errors
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
def serialize(options = {}, &block)
|
32
|
+
# Bypass XML::Document.serialize which doesn't support options even
|
33
|
+
# though XML::Node.serialize does!
|
34
|
+
XML::Node.instance_method(:serialize).bind(self).call(options, &block)
|
35
|
+
end
|
36
|
+
|
37
|
+
# Parse a document fragment from +tags+, returning a Nodeset.
|
38
|
+
def self.parse(tags, encoding = nil, options = {})
|
39
|
+
doc = HTML5::Document.new
|
40
|
+
tags = HTML5.read_and_encode(tags, encoding)
|
41
|
+
doc.encoding = 'UTF-8'
|
42
|
+
new(doc, tags, nil, options)
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
@@ -0,0 +1,57 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
|
3
|
+
module Nokogiri
|
4
|
+
# Monkey patch
|
5
|
+
module XML
|
6
|
+
class Node
|
7
|
+
# HTML elements can have attributes that contain colons.
|
8
|
+
# Nokogiri::XML::Node#[]= treats names with colons as a prefixed QName
|
9
|
+
# and tries to create an attribute in a namespace. This is especially
|
10
|
+
# annoying with attribute names like xml:lang since libxml2 will
|
11
|
+
# actually create the xml namespace if it doesn't exist already.
|
12
|
+
define_method(:add_child_node_and_reparent_attrs) do |node|
|
13
|
+
add_child_node(node)
|
14
|
+
node.attribute_nodes.find_all { |a| a.namespace }.each do |attr|
|
15
|
+
attr.remove
|
16
|
+
node[attr.name] = attr.value
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
def inner_html(options = {})
|
21
|
+
result = options[:preserve_newline] && HTML5.prepend_newline?(self) ? "\n" : ""
|
22
|
+
result << children.map { |child| child.to_html(options) }.join
|
23
|
+
result
|
24
|
+
end
|
25
|
+
|
26
|
+
def write_to(io, *options)
|
27
|
+
options = options.first.is_a?(Hash) ? options.shift : {}
|
28
|
+
encoding = options[:encoding] || options[0]
|
29
|
+
if Nokogiri.jruby?
|
30
|
+
save_options = options[:save_with] || options[1]
|
31
|
+
indent_times = options[:indent] || 0
|
32
|
+
else
|
33
|
+
save_options = options[:save_with] || options[1] || SaveOptions::FORMAT
|
34
|
+
indent_times = options[:indent] || 2
|
35
|
+
end
|
36
|
+
indent_string = (options[:indent_text] || ' ') * indent_times
|
37
|
+
|
38
|
+
config = SaveOptions.new(save_options.to_i)
|
39
|
+
yield config if block_given?
|
40
|
+
|
41
|
+
config_options = config.options
|
42
|
+
if (config_options & (SaveOptions::AS_XML | SaveOptions::AS_XHTML) != 0) || !document.is_a?(HTML5::Document)
|
43
|
+
# Use Nokogiri's serializing code.
|
44
|
+
native_write_to(io, encoding, indent_string, config_options)
|
45
|
+
else
|
46
|
+
# Serialize including the current node.
|
47
|
+
encoding ||= document.encoding || Encoding::UTF_8
|
48
|
+
internal_ops = {
|
49
|
+
trailing_nl: config_options & SaveOptions::FORMAT != 0,
|
50
|
+
preserve_newline: options[:preserve_newline] || false
|
51
|
+
}
|
52
|
+
HTML5.serialize_node_internal(self, io, encoding, options)
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,15 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: nokogumbo
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 2.0.0.pre.alpha
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sam Ruby
|
8
|
+
- Stephen Checkoway
|
8
9
|
autorequire:
|
9
10
|
bindir: bin
|
10
11
|
cert_chain: []
|
11
|
-
date: 2018-
|
12
|
+
date: 2018-08-31 00:00:00.000000000 Z
|
12
13
|
dependencies:
|
13
14
|
- !ruby/object:Gem::Dependency
|
14
15
|
name: nokogiri
|
@@ -26,37 +27,42 @@ dependencies:
|
|
26
27
|
version: '0'
|
27
28
|
description: Nokogumbo allows a Ruby program to invoke the Gumbo HTML5 parser and
|
28
29
|
access the result as a Nokogiri parsed document.
|
29
|
-
email:
|
30
|
+
email:
|
31
|
+
- rubys@intertwingly.net
|
32
|
+
- s@pahtak.org
|
30
33
|
executables: []
|
31
34
|
extensions:
|
32
|
-
- ext/
|
35
|
+
- ext/nokogumbo/extconf.rb
|
33
36
|
extra_rdoc_files: []
|
34
37
|
files:
|
38
|
+
- CHANGELOG.md
|
35
39
|
- LICENSE.txt
|
36
40
|
- README.md
|
37
|
-
- ext/
|
38
|
-
- ext/
|
41
|
+
- ext/nokogumbo/extconf.rb
|
42
|
+
- ext/nokogumbo/nokogumbo.c
|
43
|
+
- gumbo-parser/src/ascii.c
|
44
|
+
- gumbo-parser/src/ascii.h
|
39
45
|
- gumbo-parser/src/attribute.c
|
40
46
|
- gumbo-parser/src/attribute.h
|
41
47
|
- gumbo-parser/src/char_ref.c
|
42
48
|
- gumbo-parser/src/char_ref.h
|
43
|
-
- gumbo-parser/src/char_ref.rl
|
44
49
|
- gumbo-parser/src/error.c
|
45
50
|
- gumbo-parser/src/error.h
|
51
|
+
- gumbo-parser/src/foreign_attrs.c
|
46
52
|
- gumbo-parser/src/gumbo.h
|
47
53
|
- gumbo-parser/src/insertion_mode.h
|
54
|
+
- gumbo-parser/src/macros.h
|
48
55
|
- gumbo-parser/src/parser.c
|
49
56
|
- gumbo-parser/src/parser.h
|
57
|
+
- gumbo-parser/src/replacement.h
|
50
58
|
- gumbo-parser/src/string_buffer.c
|
51
59
|
- gumbo-parser/src/string_buffer.h
|
52
60
|
- gumbo-parser/src/string_piece.c
|
53
|
-
- gumbo-parser/src/
|
61
|
+
- gumbo-parser/src/svg_attrs.c
|
62
|
+
- gumbo-parser/src/svg_tags.c
|
54
63
|
- gumbo-parser/src/tag.c
|
55
|
-
- gumbo-parser/src/
|
56
|
-
- gumbo-parser/src/
|
57
|
-
- gumbo-parser/src/tag_gperf.h
|
58
|
-
- gumbo-parser/src/tag_sizes.h
|
59
|
-
- gumbo-parser/src/tag_strings.h
|
64
|
+
- gumbo-parser/src/tag_lookup.c
|
65
|
+
- gumbo-parser/src/tag_lookup.h
|
60
66
|
- gumbo-parser/src/token_type.h
|
61
67
|
- gumbo-parser/src/tokenizer.c
|
62
68
|
- gumbo-parser/src/tokenizer.h
|
@@ -67,13 +73,20 @@ files:
|
|
67
73
|
- gumbo-parser/src/util.h
|
68
74
|
- gumbo-parser/src/vector.c
|
69
75
|
- gumbo-parser/src/vector.h
|
70
|
-
- gumbo-parser/visualc/include/strings.h
|
71
76
|
- lib/nokogumbo.rb
|
72
|
-
-
|
77
|
+
- lib/nokogumbo/html5.rb
|
78
|
+
- lib/nokogumbo/html5/document.rb
|
79
|
+
- lib/nokogumbo/html5/document_fragment.rb
|
80
|
+
- lib/nokogumbo/version.rb
|
81
|
+
- lib/nokogumbo/xml/node.rb
|
73
82
|
homepage: https://github.com/rubys/nokogumbo/#readme
|
74
83
|
licenses:
|
75
84
|
- Apache-2.0
|
76
|
-
metadata:
|
85
|
+
metadata:
|
86
|
+
bug_tracker_uri: https://github.com/rubys/nokogumbo/issues
|
87
|
+
changelog_uri: https://github.com/rubys/nokogumbo/blob/master/CHANGELOG.md
|
88
|
+
homepage_uri: https://github.com/rubys/nokogumbo/#readme
|
89
|
+
source_code_uri: https://github.com/rubys/nokogumbo
|
77
90
|
post_install_message:
|
78
91
|
rdoc_options: []
|
79
92
|
require_paths:
|
@@ -85,12 +98,12 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
85
98
|
version: '0'
|
86
99
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
87
100
|
requirements:
|
88
|
-
- - "
|
101
|
+
- - ">"
|
89
102
|
- !ruby/object:Gem::Version
|
90
|
-
version:
|
103
|
+
version: 1.3.1
|
91
104
|
requirements: []
|
92
105
|
rubyforge_project:
|
93
|
-
rubygems_version: 2.7.
|
106
|
+
rubygems_version: 2.7.6
|
94
107
|
signing_key:
|
95
108
|
specification_version: 4
|
96
109
|
summary: Nokogiri interface to the Gumbo HTML5 parser
|