nokogumbo 1.5.0 → 2.0.0.pre.alpha

Sign up to get free protection for your applications and to get access to all the features.
Files changed (55) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +56 -0
  3. data/README.md +146 -22
  4. data/ext/nokogumbo/extconf.rb +116 -0
  5. data/ext/{nokogumboc → nokogumbo}/nokogumbo.c +174 -71
  6. data/gumbo-parser/src/ascii.c +33 -0
  7. data/gumbo-parser/src/ascii.h +31 -0
  8. data/gumbo-parser/src/attribute.c +26 -28
  9. data/gumbo-parser/src/attribute.h +3 -23
  10. data/gumbo-parser/src/char_ref.c +135 -2351
  11. data/gumbo-parser/src/char_ref.h +13 -29
  12. data/gumbo-parser/src/error.c +215 -133
  13. data/gumbo-parser/src/error.h +34 -49
  14. data/gumbo-parser/src/foreign_attrs.c +104 -0
  15. data/gumbo-parser/src/gumbo.h +506 -304
  16. data/gumbo-parser/src/insertion_mode.h +4 -28
  17. data/gumbo-parser/src/macros.h +91 -0
  18. data/gumbo-parser/src/parser.c +1989 -1431
  19. data/gumbo-parser/src/parser.h +6 -22
  20. data/gumbo-parser/src/replacement.h +33 -0
  21. data/gumbo-parser/src/string_buffer.c +43 -50
  22. data/gumbo-parser/src/string_buffer.h +24 -40
  23. data/gumbo-parser/src/string_piece.c +39 -39
  24. data/gumbo-parser/src/svg_attrs.c +174 -0
  25. data/gumbo-parser/src/svg_tags.c +137 -0
  26. data/gumbo-parser/src/tag.c +186 -59
  27. data/gumbo-parser/src/tag_lookup.c +382 -0
  28. data/gumbo-parser/src/tag_lookup.h +13 -0
  29. data/gumbo-parser/src/token_type.h +1 -25
  30. data/gumbo-parser/src/tokenizer.c +899 -495
  31. data/gumbo-parser/src/tokenizer.h +37 -37
  32. data/gumbo-parser/src/tokenizer_states.h +6 -22
  33. data/gumbo-parser/src/utf8.c +103 -86
  34. data/gumbo-parser/src/utf8.h +37 -41
  35. data/gumbo-parser/src/util.c +48 -38
  36. data/gumbo-parser/src/util.h +10 -40
  37. data/gumbo-parser/src/vector.c +45 -57
  38. data/gumbo-parser/src/vector.h +17 -39
  39. data/lib/nokogumbo.rb +10 -174
  40. data/lib/nokogumbo/html5.rb +250 -0
  41. data/lib/nokogumbo/html5/document.rb +37 -0
  42. data/lib/nokogumbo/html5/document_fragment.rb +46 -0
  43. data/lib/nokogumbo/version.rb +3 -0
  44. data/lib/nokogumbo/xml/node.rb +57 -0
  45. metadata +32 -19
  46. data/ext/nokogumboc/extconf.rb +0 -60
  47. data/gumbo-parser/src/char_ref.rl +0 -2554
  48. data/gumbo-parser/src/string_piece.h +0 -38
  49. data/gumbo-parser/src/tag.in +0 -150
  50. data/gumbo-parser/src/tag_enum.h +0 -153
  51. data/gumbo-parser/src/tag_gperf.h +0 -105
  52. data/gumbo-parser/src/tag_sizes.h +0 -4
  53. data/gumbo-parser/src/tag_strings.h +0 -153
  54. data/gumbo-parser/visualc/include/strings.h +0 -4
  55. data/test-nokogumbo.rb +0 -190
@@ -0,0 +1,250 @@
1
+ require 'nokogumbo/html5/document'
2
+ require 'nokogumbo/html5/document_fragment'
3
+
4
+ module Nokogiri
5
+ # Parse an HTML 5 document. Convenience method for Nokogiri::HTML5::Document.parse
6
+ def self.HTML5(string_or_io, url = nil, encoding = nil, **options, &block)
7
+ Nokogiri::HTML5::Document.parse(string_or_io, url, encoding, **options, &block)
8
+ end
9
+
10
+ module HTML5
11
+ # HTML uses the XHTML namespace.
12
+ HTML_NAMESPACE = 'http://www.w3.org/1999/xhtml'.freeze
13
+ MATHML_NAMESPACE = 'http://www.w3.org/1998/Math/MathML'.freeze
14
+ SVG_NAMESPACE = 'http://www.w3.org/2000/svg'.freeze
15
+ XLINK_NAMESPACE = 'http://www.w3.org/1999/xlink'.freeze
16
+ XML_NAMESPACE = 'http://www.w3.org/XML/1998/namespace'.freeze
17
+ XMLNS_NAMESPACE = 'http://www.w3.org/2000/xmlns/'.freeze
18
+
19
+ # Parse an HTML 5 document. Convenience method for Nokogiri::HTML5::Document.parse
20
+ def self.parse(string, url = nil, encoding = nil, **options, &block)
21
+ Document.parse(string, url, encoding, options, &block)
22
+ end
23
+
24
+ # Parse a fragment from +string+. Convenience method for
25
+ # Nokogiri::HTML5::DocumentFragment.parse.
26
+ def self.fragment(string, encoding = nil, **options)
27
+ DocumentFragment.parse(string, encoding, options)
28
+ end
29
+
30
+ # Fetch and parse a HTML document from the web, following redirects,
31
+ # handling https, and determining the character encoding using HTML5
32
+ # rules. +uri+ may be a +String+ or a +URI+. +options+ contains
33
+ # http headers and special options. Everything which is not a
34
+ # special option is considered a header. Special options include:
35
+ # * :follow_limit => number of redirects which are followed
36
+ # * :basic_auth => [username, password]
37
+ def self.get(uri, options={})
38
+ headers = options.clone
39
+ headers = {:follow_limit => headers} if Numeric === headers # deprecated
40
+ limit=headers[:follow_limit] ? headers.delete(:follow_limit).to_i : 10
41
+
42
+ require 'net/http'
43
+ uri = URI(uri) unless URI === uri
44
+
45
+ http = Net::HTTP.new(uri.host, uri.port)
46
+
47
+ # TLS / SSL support
48
+ http.use_ssl = true if uri.scheme == 'https'
49
+
50
+ # Pass through Net::HTTP override values, which currently include:
51
+ # :ca_file, :ca_path, :cert, :cert_store, :ciphers,
52
+ # :close_on_empty_response, :continue_timeout, :key, :open_timeout,
53
+ # :read_timeout, :ssl_timeout, :ssl_version, :use_ssl,
54
+ # :verify_callback, :verify_depth, :verify_mode
55
+ options.each do |key, value|
56
+ http.send "#{key}=", headers.delete(key) if http.respond_to? "#{key}="
57
+ end
58
+
59
+ request = Net::HTTP::Get.new(uri.request_uri)
60
+
61
+ # basic authentication
62
+ auth = headers.delete(:basic_auth)
63
+ auth ||= [uri.user, uri.password] if uri.user && uri.password
64
+ request.basic_auth auth.first, auth.last if auth
65
+
66
+ # remaining options are treated as headers
67
+ headers.each {|key, value| request[key.to_s] = value.to_s}
68
+
69
+ response = http.request(request)
70
+
71
+ case response
72
+ when Net::HTTPSuccess
73
+ doc = parse(reencode(response.body, response['content-type']), options)
74
+ doc.instance_variable_set('@response', response)
75
+ doc.class.send(:attr_reader, :response)
76
+ doc
77
+ when Net::HTTPRedirection
78
+ response.value if limit <= 1
79
+ location = URI.join(uri, response['location'])
80
+ get(location, options.merge(:follow_limit => limit-1))
81
+ else
82
+ response.value
83
+ end
84
+ end
85
+
86
+ private
87
+
88
+ def self.read_and_encode(string, encoding)
89
+ # Read the string with the given encoding.
90
+ if string.respond_to?(:read)
91
+ if encoding.nil?
92
+ string = string.read
93
+ else
94
+ string = string.read(encoding: encoding)
95
+ end
96
+ else
97
+ # Otherwise the string has the given encoding.
98
+ if encoding && string.respond_to?(:force_encoding)
99
+ string = string.dup
100
+ string.force_encoding(encoding)
101
+ end
102
+ end
103
+
104
+ # convert to UTF-8 (Ruby 1.9+)
105
+ if string.respond_to?(:encoding) && string.encoding != Encoding::UTF_8
106
+ string = reencode(string.dup)
107
+ end
108
+ string
109
+ end
110
+
111
+ # Charset sniffing is a complex and controversial topic that understandably
112
+ # isn't done _by default_ by the Ruby Net::HTTP library. This being said,
113
+ # it is a very real problem for consumers of HTML as the default for HTML
114
+ # is iso-8859-1, most "good" producers use utf-8, and the Gumbo parser
115
+ # *only* supports utf-8.
116
+ #
117
+ # Accordingly, Nokogiri::HTML::Document.parse provides limited encoding
118
+ # detection. Following this lead, Nokogiri::HTML5 attempts to do likewise,
119
+ # while attempting to more closely follow the HTML5 standard.
120
+ #
121
+ # http://bugs.ruby-lang.org/issues/2567
122
+ # http://www.w3.org/TR/html5/syntax.html#determining-the-character-encoding
123
+ #
124
+ def self.reencode(body, content_type=nil)
125
+ return body unless body.respond_to? :encoding
126
+
127
+ if body.encoding == Encoding::ASCII_8BIT
128
+ encoding = nil
129
+
130
+ # look for a Byte Order Mark (BOM)
131
+ if body[0..1] == "\xFE\xFF"
132
+ encoding = 'utf-16be'
133
+ elsif body[0..1] == "\xFF\xFE"
134
+ encoding = 'utf-16le'
135
+ elsif body[0..2] == "\xEF\xBB\xBF"
136
+ encoding = 'utf-8'
137
+ end
138
+
139
+ # look for a charset in a content-encoding header
140
+ if content_type
141
+ encoding ||= content_type[/charset=["']?(.*?)($|["';\s])/i, 1]
142
+ end
143
+
144
+ # look for a charset in a meta tag in the first 1024 bytes
145
+ if not encoding
146
+ data = body[0..1023].gsub(/<!--.*?(-->|\Z)/m, '')
147
+ data.scan(/<meta.*?>/m).each do |meta|
148
+ encoding ||= meta[/charset=["']?([^>]*?)($|["'\s>])/im, 1]
149
+ end
150
+ end
151
+
152
+ # if all else fails, default to the official default encoding for HTML
153
+ encoding ||= Encoding::ISO_8859_1
154
+
155
+ # change the encoding to match the detected or inferred encoding
156
+ begin
157
+ body.force_encoding(encoding)
158
+ rescue ArgumentError
159
+ body.force_encoding(Encoding::ISO_8859_1)
160
+ end
161
+ end
162
+
163
+ body.encode(Encoding::UTF_8)
164
+ end
165
+
166
+ def self.serialize_node_internal(current_node, io, encoding, options)
167
+ case current_node.type
168
+ when XML::Node::ELEMENT_NODE
169
+ ns = current_node.namespace
170
+ ns_uri = ns.nil? ? nil : ns.uri
171
+ # XXX(sfc): attach namespaces to all nodes, even html?
172
+ if ns_uri.nil? || ns_uri == HTML_NAMESPACE || ns_uri == MATHML_NAMESPACE || ns_uri == SVG_NAMESPACE
173
+ tagname = current_node.name
174
+ else
175
+ tagname = "#{ns.prefix}:#{current_node.name}"
176
+ end
177
+ io << '<' << tagname
178
+ current_node.attribute_nodes.each do |attr|
179
+ attr_ns = attr.namespace
180
+ if attr_ns.nil?
181
+ attr_name = attr.name
182
+ else
183
+ ns_uri = attr_ns.href
184
+ if ns_uri == XML_NAMESPACE
185
+ attr_name = 'xml:' + attr.name.sub(/^[^:]*:/, '')
186
+ elsif ns_uri == XMLNS_NAMESPACE && attr.name.sub(/^[^:]*:/, '') == 'xmlns'
187
+ attr_name = 'xmlns'
188
+ elsif ns_uri == XMLNS_NAMESPACE
189
+ attr_name = 'xmlns:' + attr.name.sub(/^[^:]*:/, '')
190
+ elsif ns_uri == XLINK_NAMESPACE
191
+ attr_name = 'xlink:' + attr.name.sub(/^[^:]*:/, '')
192
+ else
193
+ attr_name = "#{attr_ns.prefix}:#{attr.name}"
194
+ end
195
+ end
196
+ io << ' ' << attr_name << '="' << escape_text(attr.content, encoding, true) << '"'
197
+ end
198
+ io << '>'
199
+ if !%w[area base basefont bgsound br col embed frame hr img input keygen
200
+ link meta param source track wbr].include?(current_node.name)
201
+ io << "\n" if options[:preserve_newline] && prepend_newline?(current_node)
202
+ current_node.children.each do |child|
203
+ # XXX(sfc): Templates handled specially?
204
+ serialize_node_internal(child, io, encoding, options)
205
+ end
206
+ io << '</' << tagname << '>'
207
+ end
208
+ when XML::Node::TEXT_NODE
209
+ parent = current_node.parent
210
+ if parent.element? && %w[style script xmp iframe noembed noframes plaintext noscript].include?(parent.name)
211
+ io << current_node.content
212
+ else
213
+ io << escape_text(current_node.content, encoding, false)
214
+ end
215
+ when XML::Node::CDATA_SECTION_NODE
216
+ io << '<![CDATA[' << current_node.content << ']]>'
217
+ when XML::Node::COMMENT_NODE
218
+ io << '<!--' << current_node.content << '-->'
219
+ when XML::Node::PI_NODE
220
+ io << '<?' << current_node.content << '>'
221
+ when XML::Node::DOCUMENT_TYPE_NODE, XML::Node::DTD_NODE
222
+ io << '<!DOCTYPE ' << current_node.name << '>'
223
+ when XML::Node::HTML_DOCUMENT_NODE, XML::Node::DOCUMENT_FRAG_NODE
224
+ current_node.children.each do |child|
225
+ serialize_node_internal(child, io, encoding, options)
226
+ end
227
+ else
228
+ raise "Unexpected node '#{current_node.name}' of type #{current_node.type}"
229
+ end
230
+ end
231
+
232
+ def self.escape_text(text, encoding, attribute_mode)
233
+ if attribute_mode
234
+ text = text.gsub(/[&\u00a0"]/,
235
+ '&' => '&amp;', "\u00a0" => '&nbsp;', '"' => '&quot;')
236
+ else
237
+ text = text.gsub(/[&\u00a0<>]/,
238
+ '&' => '&amp;', "\u00a0" => '&nbsp;', '<' => '&lt;', '>' => '&gt;')
239
+ end
240
+ # Not part of the standard
241
+ text.encode(encoding, fallback: lambda { |c| "&\#x#{c.ord.to_s(16)};" })
242
+ end
243
+
244
+ def self.prepend_newline?(node)
245
+ return false unless %w[pre textarea listing].include?(node.name) && !node.children.empty?
246
+ first_child = node.children[0]
247
+ first_child.text? && first_child.content.start_with?("\n")
248
+ end
249
+ end
250
+ end
@@ -0,0 +1,37 @@
1
+ module Nokogiri
2
+ module HTML5
3
+ class Document < Nokogiri::HTML::Document
4
+ def self.parse(string_or_io, url = nil, encoding = nil, **options, &block)
5
+ yield options if block_given?
6
+
7
+ if string_or_io.respond_to?(:encoding) && string_or_io.encoding.name != 'ASCII-8BIT'
8
+ encoding ||= string_or_io.encoding.name
9
+ end
10
+
11
+ if string_or_io.respond_to?(:read) && string_or_io.respond_to?(:path)
12
+ url ||= string_or_io.path
13
+ end
14
+ do_parse(string_or_io, url, encoding, options)
15
+ end
16
+
17
+ def self.read_io(io, url = nil, encoding = nil, **options)
18
+ raise ArgumentError.new("io object doesn't respond to :read") unless io.respon_to?(:read)
19
+ do_parse(io, url, encoding, options)
20
+ end
21
+
22
+ def self.read_memory(string, url = nil, encoding = nil, **options)
23
+ do_parse(string.to_s, url, encoding, options)
24
+ end
25
+
26
+ private
27
+ def self.do_parse(string_or_io, url, encoding, options)
28
+ string = HTML5.read_and_encode(string_or_io, encoding)
29
+ max_errors = options[:max_errors] || options[:max_parse_errors] || Nokogumbo::DEFAULT_MAX_ERRORS
30
+ max_depth = options[:max_tree_depth] || Nokogumbo::DEFAULT_MAX_TREE_DEPTH
31
+ doc = Nokogumbo.parse(string.to_s, url, max_errors, max_depth)
32
+ doc.encoding = 'UTF-8'
33
+ doc
34
+ end
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,46 @@
1
+ require 'nokogiri'
2
+
3
+ module Nokogiri
4
+ module HTML5
5
+ class DocumentFragment < Nokogiri::HTML::DocumentFragment
6
+ # Create a document fragment.
7
+ def initialize(doc, tags = nil, ctx = nil, options = {})
8
+ return self unless tags
9
+ if ctx
10
+ raise Argument.new("Fragment parsing with context not supported")
11
+ else
12
+ tags = Nokogiri::HTML5.read_and_encode(tags, nil)
13
+
14
+ # Copied from Nokogiri's document_fragment.rb and labled "a horrible
15
+ # hack."
16
+ if tags.strip =~ /^<body/i
17
+ path = "/html/body"
18
+ else
19
+ path = "/html/body/node()"
20
+ end
21
+ # Add 2 for <html> and <body>.
22
+ max_depth = (options[:max_tree_depth] || Nokogumbo::DEFAULT_MAX_TREE_DEPTH) + 2
23
+ options = options.dup
24
+ options[:max_tree_depth] = max_depth
25
+ temp_doc = HTML5.parse("<!DOCTYPE html><html><body>#{tags}", options)
26
+ temp_doc.xpath(path).each { |child| child.parent = self }
27
+ self.errors = temp_doc.errors
28
+ end
29
+ end
30
+
31
+ def serialize(options = {}, &block)
32
+ # Bypass XML::Document.serialize which doesn't support options even
33
+ # though XML::Node.serialize does!
34
+ XML::Node.instance_method(:serialize).bind(self).call(options, &block)
35
+ end
36
+
37
+ # Parse a document fragment from +tags+, returning a Nodeset.
38
+ def self.parse(tags, encoding = nil, options = {})
39
+ doc = HTML5::Document.new
40
+ tags = HTML5.read_and_encode(tags, encoding)
41
+ doc.encoding = 'UTF-8'
42
+ new(doc, tags, nil, options)
43
+ end
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,3 @@
1
+ module Nokogumbo
2
+ VERSION = "2.0.0-alpha"
3
+ end
@@ -0,0 +1,57 @@
1
+ require 'nokogiri'
2
+
3
+ module Nokogiri
4
+ # Monkey patch
5
+ module XML
6
+ class Node
7
+ # HTML elements can have attributes that contain colons.
8
+ # Nokogiri::XML::Node#[]= treats names with colons as a prefixed QName
9
+ # and tries to create an attribute in a namespace. This is especially
10
+ # annoying with attribute names like xml:lang since libxml2 will
11
+ # actually create the xml namespace if it doesn't exist already.
12
+ define_method(:add_child_node_and_reparent_attrs) do |node|
13
+ add_child_node(node)
14
+ node.attribute_nodes.find_all { |a| a.namespace }.each do |attr|
15
+ attr.remove
16
+ node[attr.name] = attr.value
17
+ end
18
+ end
19
+
20
+ def inner_html(options = {})
21
+ result = options[:preserve_newline] && HTML5.prepend_newline?(self) ? "\n" : ""
22
+ result << children.map { |child| child.to_html(options) }.join
23
+ result
24
+ end
25
+
26
+ def write_to(io, *options)
27
+ options = options.first.is_a?(Hash) ? options.shift : {}
28
+ encoding = options[:encoding] || options[0]
29
+ if Nokogiri.jruby?
30
+ save_options = options[:save_with] || options[1]
31
+ indent_times = options[:indent] || 0
32
+ else
33
+ save_options = options[:save_with] || options[1] || SaveOptions::FORMAT
34
+ indent_times = options[:indent] || 2
35
+ end
36
+ indent_string = (options[:indent_text] || ' ') * indent_times
37
+
38
+ config = SaveOptions.new(save_options.to_i)
39
+ yield config if block_given?
40
+
41
+ config_options = config.options
42
+ if (config_options & (SaveOptions::AS_XML | SaveOptions::AS_XHTML) != 0) || !document.is_a?(HTML5::Document)
43
+ # Use Nokogiri's serializing code.
44
+ native_write_to(io, encoding, indent_string, config_options)
45
+ else
46
+ # Serialize including the current node.
47
+ encoding ||= document.encoding || Encoding::UTF_8
48
+ internal_ops = {
49
+ trailing_nl: config_options & SaveOptions::FORMAT != 0,
50
+ preserve_newline: options[:preserve_newline] || false
51
+ }
52
+ HTML5.serialize_node_internal(self, io, encoding, options)
53
+ end
54
+ end
55
+ end
56
+ end
57
+ end
metadata CHANGED
@@ -1,14 +1,15 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: nokogumbo
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.5.0
4
+ version: 2.0.0.pre.alpha
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sam Ruby
8
+ - Stephen Checkoway
8
9
  autorequire:
9
10
  bindir: bin
10
11
  cert_chain: []
11
- date: 2018-01-27 00:00:00.000000000 Z
12
+ date: 2018-08-31 00:00:00.000000000 Z
12
13
  dependencies:
13
14
  - !ruby/object:Gem::Dependency
14
15
  name: nokogiri
@@ -26,37 +27,42 @@ dependencies:
26
27
  version: '0'
27
28
  description: Nokogumbo allows a Ruby program to invoke the Gumbo HTML5 parser and
28
29
  access the result as a Nokogiri parsed document.
29
- email: rubys@intertwingly.net
30
+ email:
31
+ - rubys@intertwingly.net
32
+ - s@pahtak.org
30
33
  executables: []
31
34
  extensions:
32
- - ext/nokogumboc/extconf.rb
35
+ - ext/nokogumbo/extconf.rb
33
36
  extra_rdoc_files: []
34
37
  files:
38
+ - CHANGELOG.md
35
39
  - LICENSE.txt
36
40
  - README.md
37
- - ext/nokogumboc/extconf.rb
38
- - ext/nokogumboc/nokogumbo.c
41
+ - ext/nokogumbo/extconf.rb
42
+ - ext/nokogumbo/nokogumbo.c
43
+ - gumbo-parser/src/ascii.c
44
+ - gumbo-parser/src/ascii.h
39
45
  - gumbo-parser/src/attribute.c
40
46
  - gumbo-parser/src/attribute.h
41
47
  - gumbo-parser/src/char_ref.c
42
48
  - gumbo-parser/src/char_ref.h
43
- - gumbo-parser/src/char_ref.rl
44
49
  - gumbo-parser/src/error.c
45
50
  - gumbo-parser/src/error.h
51
+ - gumbo-parser/src/foreign_attrs.c
46
52
  - gumbo-parser/src/gumbo.h
47
53
  - gumbo-parser/src/insertion_mode.h
54
+ - gumbo-parser/src/macros.h
48
55
  - gumbo-parser/src/parser.c
49
56
  - gumbo-parser/src/parser.h
57
+ - gumbo-parser/src/replacement.h
50
58
  - gumbo-parser/src/string_buffer.c
51
59
  - gumbo-parser/src/string_buffer.h
52
60
  - gumbo-parser/src/string_piece.c
53
- - gumbo-parser/src/string_piece.h
61
+ - gumbo-parser/src/svg_attrs.c
62
+ - gumbo-parser/src/svg_tags.c
54
63
  - gumbo-parser/src/tag.c
55
- - gumbo-parser/src/tag.in
56
- - gumbo-parser/src/tag_enum.h
57
- - gumbo-parser/src/tag_gperf.h
58
- - gumbo-parser/src/tag_sizes.h
59
- - gumbo-parser/src/tag_strings.h
64
+ - gumbo-parser/src/tag_lookup.c
65
+ - gumbo-parser/src/tag_lookup.h
60
66
  - gumbo-parser/src/token_type.h
61
67
  - gumbo-parser/src/tokenizer.c
62
68
  - gumbo-parser/src/tokenizer.h
@@ -67,13 +73,20 @@ files:
67
73
  - gumbo-parser/src/util.h
68
74
  - gumbo-parser/src/vector.c
69
75
  - gumbo-parser/src/vector.h
70
- - gumbo-parser/visualc/include/strings.h
71
76
  - lib/nokogumbo.rb
72
- - test-nokogumbo.rb
77
+ - lib/nokogumbo/html5.rb
78
+ - lib/nokogumbo/html5/document.rb
79
+ - lib/nokogumbo/html5/document_fragment.rb
80
+ - lib/nokogumbo/version.rb
81
+ - lib/nokogumbo/xml/node.rb
73
82
  homepage: https://github.com/rubys/nokogumbo/#readme
74
83
  licenses:
75
84
  - Apache-2.0
76
- metadata: {}
85
+ metadata:
86
+ bug_tracker_uri: https://github.com/rubys/nokogumbo/issues
87
+ changelog_uri: https://github.com/rubys/nokogumbo/blob/master/CHANGELOG.md
88
+ homepage_uri: https://github.com/rubys/nokogumbo/#readme
89
+ source_code_uri: https://github.com/rubys/nokogumbo
77
90
  post_install_message:
78
91
  rdoc_options: []
79
92
  require_paths:
@@ -85,12 +98,12 @@ required_ruby_version: !ruby/object:Gem::Requirement
85
98
  version: '0'
86
99
  required_rubygems_version: !ruby/object:Gem::Requirement
87
100
  requirements:
88
- - - ">="
101
+ - - ">"
89
102
  - !ruby/object:Gem::Version
90
- version: '0'
103
+ version: 1.3.1
91
104
  requirements: []
92
105
  rubyforge_project:
93
- rubygems_version: 2.7.4
106
+ rubygems_version: 2.7.6
94
107
  signing_key:
95
108
  specification_version: 4
96
109
  summary: Nokogiri interface to the Gumbo HTML5 parser