nokogumbo 1.5.0 → 2.0.0.pre.alpha

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +56 -0
  3. data/README.md +146 -22
  4. data/ext/nokogumbo/extconf.rb +116 -0
  5. data/ext/{nokogumboc → nokogumbo}/nokogumbo.c +174 -71
  6. data/gumbo-parser/src/ascii.c +33 -0
  7. data/gumbo-parser/src/ascii.h +31 -0
  8. data/gumbo-parser/src/attribute.c +26 -28
  9. data/gumbo-parser/src/attribute.h +3 -23
  10. data/gumbo-parser/src/char_ref.c +135 -2351
  11. data/gumbo-parser/src/char_ref.h +13 -29
  12. data/gumbo-parser/src/error.c +215 -133
  13. data/gumbo-parser/src/error.h +34 -49
  14. data/gumbo-parser/src/foreign_attrs.c +104 -0
  15. data/gumbo-parser/src/gumbo.h +506 -304
  16. data/gumbo-parser/src/insertion_mode.h +4 -28
  17. data/gumbo-parser/src/macros.h +91 -0
  18. data/gumbo-parser/src/parser.c +1989 -1431
  19. data/gumbo-parser/src/parser.h +6 -22
  20. data/gumbo-parser/src/replacement.h +33 -0
  21. data/gumbo-parser/src/string_buffer.c +43 -50
  22. data/gumbo-parser/src/string_buffer.h +24 -40
  23. data/gumbo-parser/src/string_piece.c +39 -39
  24. data/gumbo-parser/src/svg_attrs.c +174 -0
  25. data/gumbo-parser/src/svg_tags.c +137 -0
  26. data/gumbo-parser/src/tag.c +186 -59
  27. data/gumbo-parser/src/tag_lookup.c +382 -0
  28. data/gumbo-parser/src/tag_lookup.h +13 -0
  29. data/gumbo-parser/src/token_type.h +1 -25
  30. data/gumbo-parser/src/tokenizer.c +899 -495
  31. data/gumbo-parser/src/tokenizer.h +37 -37
  32. data/gumbo-parser/src/tokenizer_states.h +6 -22
  33. data/gumbo-parser/src/utf8.c +103 -86
  34. data/gumbo-parser/src/utf8.h +37 -41
  35. data/gumbo-parser/src/util.c +48 -38
  36. data/gumbo-parser/src/util.h +10 -40
  37. data/gumbo-parser/src/vector.c +45 -57
  38. data/gumbo-parser/src/vector.h +17 -39
  39. data/lib/nokogumbo.rb +10 -174
  40. data/lib/nokogumbo/html5.rb +250 -0
  41. data/lib/nokogumbo/html5/document.rb +37 -0
  42. data/lib/nokogumbo/html5/document_fragment.rb +46 -0
  43. data/lib/nokogumbo/version.rb +3 -0
  44. data/lib/nokogumbo/xml/node.rb +57 -0
  45. metadata +32 -19
  46. data/ext/nokogumboc/extconf.rb +0 -60
  47. data/gumbo-parser/src/char_ref.rl +0 -2554
  48. data/gumbo-parser/src/string_piece.h +0 -38
  49. data/gumbo-parser/src/tag.in +0 -150
  50. data/gumbo-parser/src/tag_enum.h +0 -153
  51. data/gumbo-parser/src/tag_gperf.h +0 -105
  52. data/gumbo-parser/src/tag_sizes.h +0 -4
  53. data/gumbo-parser/src/tag_strings.h +0 -153
  54. data/gumbo-parser/visualc/include/strings.h +0 -4
  55. data/test-nokogumbo.rb +0 -190
@@ -0,0 +1,250 @@
1
+ require 'nokogumbo/html5/document'
2
+ require 'nokogumbo/html5/document_fragment'
3
+
4
+ module Nokogiri
5
+ # Parse an HTML 5 document. Convenience method for Nokogiri::HTML5::Document.parse
6
+ def self.HTML5(string_or_io, url = nil, encoding = nil, **options, &block)
7
+ Nokogiri::HTML5::Document.parse(string_or_io, url, encoding, **options, &block)
8
+ end
9
+
10
+ module HTML5
11
+ # HTML uses the XHTML namespace.
12
+ HTML_NAMESPACE = 'http://www.w3.org/1999/xhtml'.freeze
13
+ MATHML_NAMESPACE = 'http://www.w3.org/1998/Math/MathML'.freeze
14
+ SVG_NAMESPACE = 'http://www.w3.org/2000/svg'.freeze
15
+ XLINK_NAMESPACE = 'http://www.w3.org/1999/xlink'.freeze
16
+ XML_NAMESPACE = 'http://www.w3.org/XML/1998/namespace'.freeze
17
+ XMLNS_NAMESPACE = 'http://www.w3.org/2000/xmlns/'.freeze
18
+
19
+ # Parse an HTML 5 document. Convenience method for Nokogiri::HTML5::Document.parse
20
+ def self.parse(string, url = nil, encoding = nil, **options, &block)
21
+ Document.parse(string, url, encoding, options, &block)
22
+ end
23
+
24
+ # Parse a fragment from +string+. Convenience method for
25
+ # Nokogiri::HTML5::DocumentFragment.parse.
26
+ def self.fragment(string, encoding = nil, **options)
27
+ DocumentFragment.parse(string, encoding, options)
28
+ end
29
+
30
+ # Fetch and parse a HTML document from the web, following redirects,
31
+ # handling https, and determining the character encoding using HTML5
32
+ # rules. +uri+ may be a +String+ or a +URI+. +options+ contains
33
+ # http headers and special options. Everything which is not a
34
+ # special option is considered a header. Special options include:
35
+ # * :follow_limit => number of redirects which are followed
36
+ # * :basic_auth => [username, password]
37
+ def self.get(uri, options={})
38
+ headers = options.clone
39
+ headers = {:follow_limit => headers} if Numeric === headers # deprecated
40
+ limit=headers[:follow_limit] ? headers.delete(:follow_limit).to_i : 10
41
+
42
+ require 'net/http'
43
+ uri = URI(uri) unless URI === uri
44
+
45
+ http = Net::HTTP.new(uri.host, uri.port)
46
+
47
+ # TLS / SSL support
48
+ http.use_ssl = true if uri.scheme == 'https'
49
+
50
+ # Pass through Net::HTTP override values, which currently include:
51
+ # :ca_file, :ca_path, :cert, :cert_store, :ciphers,
52
+ # :close_on_empty_response, :continue_timeout, :key, :open_timeout,
53
+ # :read_timeout, :ssl_timeout, :ssl_version, :use_ssl,
54
+ # :verify_callback, :verify_depth, :verify_mode
55
+ options.each do |key, value|
56
+ http.send "#{key}=", headers.delete(key) if http.respond_to? "#{key}="
57
+ end
58
+
59
+ request = Net::HTTP::Get.new(uri.request_uri)
60
+
61
+ # basic authentication
62
+ auth = headers.delete(:basic_auth)
63
+ auth ||= [uri.user, uri.password] if uri.user && uri.password
64
+ request.basic_auth auth.first, auth.last if auth
65
+
66
+ # remaining options are treated as headers
67
+ headers.each {|key, value| request[key.to_s] = value.to_s}
68
+
69
+ response = http.request(request)
70
+
71
+ case response
72
+ when Net::HTTPSuccess
73
+ doc = parse(reencode(response.body, response['content-type']), options)
74
+ doc.instance_variable_set('@response', response)
75
+ doc.class.send(:attr_reader, :response)
76
+ doc
77
+ when Net::HTTPRedirection
78
+ response.value if limit <= 1
79
+ location = URI.join(uri, response['location'])
80
+ get(location, options.merge(:follow_limit => limit-1))
81
+ else
82
+ response.value
83
+ end
84
+ end
85
+
86
+ private
87
+
88
+ def self.read_and_encode(string, encoding)
89
+ # Read the string with the given encoding.
90
+ if string.respond_to?(:read)
91
+ if encoding.nil?
92
+ string = string.read
93
+ else
94
+ string = string.read(encoding: encoding)
95
+ end
96
+ else
97
+ # Otherwise the string has the given encoding.
98
+ if encoding && string.respond_to?(:force_encoding)
99
+ string = string.dup
100
+ string.force_encoding(encoding)
101
+ end
102
+ end
103
+
104
+ # convert to UTF-8 (Ruby 1.9+)
105
+ if string.respond_to?(:encoding) && string.encoding != Encoding::UTF_8
106
+ string = reencode(string.dup)
107
+ end
108
+ string
109
+ end
110
+
111
+ # Charset sniffing is a complex and controversial topic that understandably
112
+ # isn't done _by default_ by the Ruby Net::HTTP library. This being said,
113
+ # it is a very real problem for consumers of HTML as the default for HTML
114
+ # is iso-8859-1, most "good" producers use utf-8, and the Gumbo parser
115
+ # *only* supports utf-8.
116
+ #
117
+ # Accordingly, Nokogiri::HTML::Document.parse provides limited encoding
118
+ # detection. Following this lead, Nokogiri::HTML5 attempts to do likewise,
119
+ # while attempting to more closely follow the HTML5 standard.
120
+ #
121
+ # http://bugs.ruby-lang.org/issues/2567
122
+ # http://www.w3.org/TR/html5/syntax.html#determining-the-character-encoding
123
+ #
124
+ def self.reencode(body, content_type=nil)
125
+ return body unless body.respond_to? :encoding
126
+
127
+ if body.encoding == Encoding::ASCII_8BIT
128
+ encoding = nil
129
+
130
+ # look for a Byte Order Mark (BOM)
131
+ if body[0..1] == "\xFE\xFF"
132
+ encoding = 'utf-16be'
133
+ elsif body[0..1] == "\xFF\xFE"
134
+ encoding = 'utf-16le'
135
+ elsif body[0..2] == "\xEF\xBB\xBF"
136
+ encoding = 'utf-8'
137
+ end
138
+
139
+ # look for a charset in a content-encoding header
140
+ if content_type
141
+ encoding ||= content_type[/charset=["']?(.*?)($|["';\s])/i, 1]
142
+ end
143
+
144
+ # look for a charset in a meta tag in the first 1024 bytes
145
+ if not encoding
146
+ data = body[0..1023].gsub(/<!--.*?(-->|\Z)/m, '')
147
+ data.scan(/<meta.*?>/m).each do |meta|
148
+ encoding ||= meta[/charset=["']?([^>]*?)($|["'\s>])/im, 1]
149
+ end
150
+ end
151
+
152
+ # if all else fails, default to the official default encoding for HTML
153
+ encoding ||= Encoding::ISO_8859_1
154
+
155
+ # change the encoding to match the detected or inferred encoding
156
+ begin
157
+ body.force_encoding(encoding)
158
+ rescue ArgumentError
159
+ body.force_encoding(Encoding::ISO_8859_1)
160
+ end
161
+ end
162
+
163
+ body.encode(Encoding::UTF_8)
164
+ end
165
+
166
+ def self.serialize_node_internal(current_node, io, encoding, options)
167
+ case current_node.type
168
+ when XML::Node::ELEMENT_NODE
169
+ ns = current_node.namespace
170
+ ns_uri = ns.nil? ? nil : ns.uri
171
+ # XXX(sfc): attach namespaces to all nodes, even html?
172
+ if ns_uri.nil? || ns_uri == HTML_NAMESPACE || ns_uri == MATHML_NAMESPACE || ns_uri == SVG_NAMESPACE
173
+ tagname = current_node.name
174
+ else
175
+ tagname = "#{ns.prefix}:#{current_node.name}"
176
+ end
177
+ io << '<' << tagname
178
+ current_node.attribute_nodes.each do |attr|
179
+ attr_ns = attr.namespace
180
+ if attr_ns.nil?
181
+ attr_name = attr.name
182
+ else
183
+ ns_uri = attr_ns.href
184
+ if ns_uri == XML_NAMESPACE
185
+ attr_name = 'xml:' + attr.name.sub(/^[^:]*:/, '')
186
+ elsif ns_uri == XMLNS_NAMESPACE && attr.name.sub(/^[^:]*:/, '') == 'xmlns'
187
+ attr_name = 'xmlns'
188
+ elsif ns_uri == XMLNS_NAMESPACE
189
+ attr_name = 'xmlns:' + attr.name.sub(/^[^:]*:/, '')
190
+ elsif ns_uri == XLINK_NAMESPACE
191
+ attr_name = 'xlink:' + attr.name.sub(/^[^:]*:/, '')
192
+ else
193
+ attr_name = "#{attr_ns.prefix}:#{attr.name}"
194
+ end
195
+ end
196
+ io << ' ' << attr_name << '="' << escape_text(attr.content, encoding, true) << '"'
197
+ end
198
+ io << '>'
199
+ if !%w[area base basefont bgsound br col embed frame hr img input keygen
200
+ link meta param source track wbr].include?(current_node.name)
201
+ io << "\n" if options[:preserve_newline] && prepend_newline?(current_node)
202
+ current_node.children.each do |child|
203
+ # XXX(sfc): Templates handled specially?
204
+ serialize_node_internal(child, io, encoding, options)
205
+ end
206
+ io << '</' << tagname << '>'
207
+ end
208
+ when XML::Node::TEXT_NODE
209
+ parent = current_node.parent
210
+ if parent.element? && %w[style script xmp iframe noembed noframes plaintext noscript].include?(parent.name)
211
+ io << current_node.content
212
+ else
213
+ io << escape_text(current_node.content, encoding, false)
214
+ end
215
+ when XML::Node::CDATA_SECTION_NODE
216
+ io << '<![CDATA[' << current_node.content << ']]>'
217
+ when XML::Node::COMMENT_NODE
218
+ io << '<!--' << current_node.content << '-->'
219
+ when XML::Node::PI_NODE
220
+ io << '<?' << current_node.content << '>'
221
+ when XML::Node::DOCUMENT_TYPE_NODE, XML::Node::DTD_NODE
222
+ io << '<!DOCTYPE ' << current_node.name << '>'
223
+ when XML::Node::HTML_DOCUMENT_NODE, XML::Node::DOCUMENT_FRAG_NODE
224
+ current_node.children.each do |child|
225
+ serialize_node_internal(child, io, encoding, options)
226
+ end
227
+ else
228
+ raise "Unexpected node '#{current_node.name}' of type #{current_node.type}"
229
+ end
230
+ end
231
+
232
+ def self.escape_text(text, encoding, attribute_mode)
233
+ if attribute_mode
234
+ text = text.gsub(/[&\u00a0"]/,
235
+ '&' => '&amp;', "\u00a0" => '&nbsp;', '"' => '&quot;')
236
+ else
237
+ text = text.gsub(/[&\u00a0<>]/,
238
+ '&' => '&amp;', "\u00a0" => '&nbsp;', '<' => '&lt;', '>' => '&gt;')
239
+ end
240
+ # Not part of the standard
241
+ text.encode(encoding, fallback: lambda { |c| "&\#x#{c.ord.to_s(16)};" })
242
+ end
243
+
244
+ def self.prepend_newline?(node)
245
+ return false unless %w[pre textarea listing].include?(node.name) && !node.children.empty?
246
+ first_child = node.children[0]
247
+ first_child.text? && first_child.content.start_with?("\n")
248
+ end
249
+ end
250
+ end
@@ -0,0 +1,37 @@
1
+ module Nokogiri
2
+ module HTML5
3
+ class Document < Nokogiri::HTML::Document
4
+ def self.parse(string_or_io, url = nil, encoding = nil, **options, &block)
5
+ yield options if block_given?
6
+
7
+ if string_or_io.respond_to?(:encoding) && string_or_io.encoding.name != 'ASCII-8BIT'
8
+ encoding ||= string_or_io.encoding.name
9
+ end
10
+
11
+ if string_or_io.respond_to?(:read) && string_or_io.respond_to?(:path)
12
+ url ||= string_or_io.path
13
+ end
14
+ do_parse(string_or_io, url, encoding, options)
15
+ end
16
+
17
+ def self.read_io(io, url = nil, encoding = nil, **options)
18
+ raise ArgumentError.new("io object doesn't respond to :read") unless io.respon_to?(:read)
19
+ do_parse(io, url, encoding, options)
20
+ end
21
+
22
+ def self.read_memory(string, url = nil, encoding = nil, **options)
23
+ do_parse(string.to_s, url, encoding, options)
24
+ end
25
+
26
+ private
27
+ def self.do_parse(string_or_io, url, encoding, options)
28
+ string = HTML5.read_and_encode(string_or_io, encoding)
29
+ max_errors = options[:max_errors] || options[:max_parse_errors] || Nokogumbo::DEFAULT_MAX_ERRORS
30
+ max_depth = options[:max_tree_depth] || Nokogumbo::DEFAULT_MAX_TREE_DEPTH
31
+ doc = Nokogumbo.parse(string.to_s, url, max_errors, max_depth)
32
+ doc.encoding = 'UTF-8'
33
+ doc
34
+ end
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,46 @@
1
+ require 'nokogiri'
2
+
3
+ module Nokogiri
4
+ module HTML5
5
+ class DocumentFragment < Nokogiri::HTML::DocumentFragment
6
+ # Create a document fragment.
7
+ def initialize(doc, tags = nil, ctx = nil, options = {})
8
+ return self unless tags
9
+ if ctx
10
+ raise Argument.new("Fragment parsing with context not supported")
11
+ else
12
+ tags = Nokogiri::HTML5.read_and_encode(tags, nil)
13
+
14
+ # Copied from Nokogiri's document_fragment.rb and labled "a horrible
15
+ # hack."
16
+ if tags.strip =~ /^<body/i
17
+ path = "/html/body"
18
+ else
19
+ path = "/html/body/node()"
20
+ end
21
+ # Add 2 for <html> and <body>.
22
+ max_depth = (options[:max_tree_depth] || Nokogumbo::DEFAULT_MAX_TREE_DEPTH) + 2
23
+ options = options.dup
24
+ options[:max_tree_depth] = max_depth
25
+ temp_doc = HTML5.parse("<!DOCTYPE html><html><body>#{tags}", options)
26
+ temp_doc.xpath(path).each { |child| child.parent = self }
27
+ self.errors = temp_doc.errors
28
+ end
29
+ end
30
+
31
+ def serialize(options = {}, &block)
32
+ # Bypass XML::Document.serialize which doesn't support options even
33
+ # though XML::Node.serialize does!
34
+ XML::Node.instance_method(:serialize).bind(self).call(options, &block)
35
+ end
36
+
37
+ # Parse a document fragment from +tags+, returning a Nodeset.
38
+ def self.parse(tags, encoding = nil, options = {})
39
+ doc = HTML5::Document.new
40
+ tags = HTML5.read_and_encode(tags, encoding)
41
+ doc.encoding = 'UTF-8'
42
+ new(doc, tags, nil, options)
43
+ end
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,3 @@
1
+ module Nokogumbo
2
+ VERSION = "2.0.0-alpha"
3
+ end
@@ -0,0 +1,57 @@
1
+ require 'nokogiri'
2
+
3
+ module Nokogiri
4
+ # Monkey patch
5
+ module XML
6
+ class Node
7
+ # HTML elements can have attributes that contain colons.
8
+ # Nokogiri::XML::Node#[]= treats names with colons as a prefixed QName
9
+ # and tries to create an attribute in a namespace. This is especially
10
+ # annoying with attribute names like xml:lang since libxml2 will
11
+ # actually create the xml namespace if it doesn't exist already.
12
+ define_method(:add_child_node_and_reparent_attrs) do |node|
13
+ add_child_node(node)
14
+ node.attribute_nodes.find_all { |a| a.namespace }.each do |attr|
15
+ attr.remove
16
+ node[attr.name] = attr.value
17
+ end
18
+ end
19
+
20
+ def inner_html(options = {})
21
+ result = options[:preserve_newline] && HTML5.prepend_newline?(self) ? "\n" : ""
22
+ result << children.map { |child| child.to_html(options) }.join
23
+ result
24
+ end
25
+
26
+ def write_to(io, *options)
27
+ options = options.first.is_a?(Hash) ? options.shift : {}
28
+ encoding = options[:encoding] || options[0]
29
+ if Nokogiri.jruby?
30
+ save_options = options[:save_with] || options[1]
31
+ indent_times = options[:indent] || 0
32
+ else
33
+ save_options = options[:save_with] || options[1] || SaveOptions::FORMAT
34
+ indent_times = options[:indent] || 2
35
+ end
36
+ indent_string = (options[:indent_text] || ' ') * indent_times
37
+
38
+ config = SaveOptions.new(save_options.to_i)
39
+ yield config if block_given?
40
+
41
+ config_options = config.options
42
+ if (config_options & (SaveOptions::AS_XML | SaveOptions::AS_XHTML) != 0) || !document.is_a?(HTML5::Document)
43
+ # Use Nokogiri's serializing code.
44
+ native_write_to(io, encoding, indent_string, config_options)
45
+ else
46
+ # Serialize including the current node.
47
+ encoding ||= document.encoding || Encoding::UTF_8
48
+ internal_ops = {
49
+ trailing_nl: config_options & SaveOptions::FORMAT != 0,
50
+ preserve_newline: options[:preserve_newline] || false
51
+ }
52
+ HTML5.serialize_node_internal(self, io, encoding, options)
53
+ end
54
+ end
55
+ end
56
+ end
57
+ end
metadata CHANGED
@@ -1,14 +1,15 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: nokogumbo
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.5.0
4
+ version: 2.0.0.pre.alpha
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sam Ruby
8
+ - Stephen Checkoway
8
9
  autorequire:
9
10
  bindir: bin
10
11
  cert_chain: []
11
- date: 2018-01-27 00:00:00.000000000 Z
12
+ date: 2018-08-31 00:00:00.000000000 Z
12
13
  dependencies:
13
14
  - !ruby/object:Gem::Dependency
14
15
  name: nokogiri
@@ -26,37 +27,42 @@ dependencies:
26
27
  version: '0'
27
28
  description: Nokogumbo allows a Ruby program to invoke the Gumbo HTML5 parser and
28
29
  access the result as a Nokogiri parsed document.
29
- email: rubys@intertwingly.net
30
+ email:
31
+ - rubys@intertwingly.net
32
+ - s@pahtak.org
30
33
  executables: []
31
34
  extensions:
32
- - ext/nokogumboc/extconf.rb
35
+ - ext/nokogumbo/extconf.rb
33
36
  extra_rdoc_files: []
34
37
  files:
38
+ - CHANGELOG.md
35
39
  - LICENSE.txt
36
40
  - README.md
37
- - ext/nokogumboc/extconf.rb
38
- - ext/nokogumboc/nokogumbo.c
41
+ - ext/nokogumbo/extconf.rb
42
+ - ext/nokogumbo/nokogumbo.c
43
+ - gumbo-parser/src/ascii.c
44
+ - gumbo-parser/src/ascii.h
39
45
  - gumbo-parser/src/attribute.c
40
46
  - gumbo-parser/src/attribute.h
41
47
  - gumbo-parser/src/char_ref.c
42
48
  - gumbo-parser/src/char_ref.h
43
- - gumbo-parser/src/char_ref.rl
44
49
  - gumbo-parser/src/error.c
45
50
  - gumbo-parser/src/error.h
51
+ - gumbo-parser/src/foreign_attrs.c
46
52
  - gumbo-parser/src/gumbo.h
47
53
  - gumbo-parser/src/insertion_mode.h
54
+ - gumbo-parser/src/macros.h
48
55
  - gumbo-parser/src/parser.c
49
56
  - gumbo-parser/src/parser.h
57
+ - gumbo-parser/src/replacement.h
50
58
  - gumbo-parser/src/string_buffer.c
51
59
  - gumbo-parser/src/string_buffer.h
52
60
  - gumbo-parser/src/string_piece.c
53
- - gumbo-parser/src/string_piece.h
61
+ - gumbo-parser/src/svg_attrs.c
62
+ - gumbo-parser/src/svg_tags.c
54
63
  - gumbo-parser/src/tag.c
55
- - gumbo-parser/src/tag.in
56
- - gumbo-parser/src/tag_enum.h
57
- - gumbo-parser/src/tag_gperf.h
58
- - gumbo-parser/src/tag_sizes.h
59
- - gumbo-parser/src/tag_strings.h
64
+ - gumbo-parser/src/tag_lookup.c
65
+ - gumbo-parser/src/tag_lookup.h
60
66
  - gumbo-parser/src/token_type.h
61
67
  - gumbo-parser/src/tokenizer.c
62
68
  - gumbo-parser/src/tokenizer.h
@@ -67,13 +73,20 @@ files:
67
73
  - gumbo-parser/src/util.h
68
74
  - gumbo-parser/src/vector.c
69
75
  - gumbo-parser/src/vector.h
70
- - gumbo-parser/visualc/include/strings.h
71
76
  - lib/nokogumbo.rb
72
- - test-nokogumbo.rb
77
+ - lib/nokogumbo/html5.rb
78
+ - lib/nokogumbo/html5/document.rb
79
+ - lib/nokogumbo/html5/document_fragment.rb
80
+ - lib/nokogumbo/version.rb
81
+ - lib/nokogumbo/xml/node.rb
73
82
  homepage: https://github.com/rubys/nokogumbo/#readme
74
83
  licenses:
75
84
  - Apache-2.0
76
- metadata: {}
85
+ metadata:
86
+ bug_tracker_uri: https://github.com/rubys/nokogumbo/issues
87
+ changelog_uri: https://github.com/rubys/nokogumbo/blob/master/CHANGELOG.md
88
+ homepage_uri: https://github.com/rubys/nokogumbo/#readme
89
+ source_code_uri: https://github.com/rubys/nokogumbo
77
90
  post_install_message:
78
91
  rdoc_options: []
79
92
  require_paths:
@@ -85,12 +98,12 @@ required_ruby_version: !ruby/object:Gem::Requirement
85
98
  version: '0'
86
99
  required_rubygems_version: !ruby/object:Gem::Requirement
87
100
  requirements:
88
- - - ">="
101
+ - - ">"
89
102
  - !ruby/object:Gem::Version
90
- version: '0'
103
+ version: 1.3.1
91
104
  requirements: []
92
105
  rubyforge_project:
93
- rubygems_version: 2.7.4
106
+ rubygems_version: 2.7.6
94
107
  signing_key:
95
108
  specification_version: 4
96
109
  summary: Nokogiri interface to the Gumbo HTML5 parser