nokogumbo 1.5.0 → 2.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +237 -26
  3. data/ext/nokogumbo/extconf.rb +121 -0
  4. data/ext/nokogumbo/nokogumbo.c +793 -0
  5. data/gumbo-parser/src/ascii.c +75 -0
  6. data/gumbo-parser/src/ascii.h +115 -0
  7. data/gumbo-parser/src/attribute.c +26 -28
  8. data/gumbo-parser/src/attribute.h +3 -23
  9. data/gumbo-parser/src/char_ref.c +5972 -6816
  10. data/gumbo-parser/src/char_ref.h +14 -45
  11. data/gumbo-parser/src/error.c +510 -163
  12. data/gumbo-parser/src/error.h +70 -147
  13. data/gumbo-parser/src/foreign_attrs.c +104 -0
  14. data/gumbo-parser/src/gumbo.h +577 -305
  15. data/gumbo-parser/src/insertion_mode.h +4 -28
  16. data/gumbo-parser/src/macros.h +91 -0
  17. data/gumbo-parser/src/parser.c +2922 -2228
  18. data/gumbo-parser/src/parser.h +6 -22
  19. data/gumbo-parser/src/replacement.h +33 -0
  20. data/gumbo-parser/src/string_buffer.c +43 -50
  21. data/gumbo-parser/src/string_buffer.h +24 -40
  22. data/gumbo-parser/src/string_piece.c +39 -39
  23. data/gumbo-parser/src/svg_attrs.c +174 -0
  24. data/gumbo-parser/src/svg_tags.c +137 -0
  25. data/gumbo-parser/src/tag.c +186 -59
  26. data/gumbo-parser/src/tag_lookup.c +382 -0
  27. data/gumbo-parser/src/tag_lookup.h +13 -0
  28. data/gumbo-parser/src/token_buffer.c +79 -0
  29. data/gumbo-parser/src/token_buffer.h +71 -0
  30. data/gumbo-parser/src/token_type.h +1 -25
  31. data/gumbo-parser/src/tokenizer.c +2127 -1561
  32. data/gumbo-parser/src/tokenizer.h +41 -52
  33. data/gumbo-parser/src/tokenizer_states.h +281 -45
  34. data/gumbo-parser/src/utf8.c +98 -123
  35. data/gumbo-parser/src/utf8.h +84 -52
  36. data/gumbo-parser/src/util.c +48 -38
  37. data/gumbo-parser/src/util.h +10 -40
  38. data/gumbo-parser/src/vector.c +45 -57
  39. data/gumbo-parser/src/vector.h +17 -39
  40. data/lib/nokogumbo.rb +11 -173
  41. data/lib/nokogumbo/html5.rb +252 -0
  42. data/lib/nokogumbo/html5/document.rb +53 -0
  43. data/lib/nokogumbo/html5/document_fragment.rb +62 -0
  44. data/lib/nokogumbo/html5/node.rb +72 -0
  45. data/lib/nokogumbo/version.rb +3 -0
  46. metadata +43 -24
  47. data/ext/nokogumboc/extconf.rb +0 -60
  48. data/ext/nokogumboc/nokogumbo.c +0 -295
  49. data/gumbo-parser/src/char_ref.rl +0 -2554
  50. data/gumbo-parser/src/string_piece.h +0 -38
  51. data/gumbo-parser/src/tag.in +0 -150
  52. data/gumbo-parser/src/tag_enum.h +0 -153
  53. data/gumbo-parser/src/tag_gperf.h +0 -105
  54. data/gumbo-parser/src/tag_sizes.h +0 -4
  55. data/gumbo-parser/src/tag_strings.h +0 -153
  56. data/gumbo-parser/visualc/include/strings.h +0 -4
  57. data/test-nokogumbo.rb +0 -190
@@ -0,0 +1,252 @@
1
+ require 'nokogumbo/html5/document'
2
+ require 'nokogumbo/html5/document_fragment'
3
+ require 'nokogumbo/html5/node'
4
+
5
+ module Nokogiri
6
+ # Parse an HTML 5 document. Convenience method for Nokogiri::HTML5::Document.parse
7
+ def self.HTML5(string_or_io, url = nil, encoding = nil, **options, &block)
8
+ Nokogiri::HTML5::Document.parse(string_or_io, url, encoding, **options, &block)
9
+ end
10
+
11
+ module HTML5
12
+ # HTML uses the XHTML namespace.
13
+ HTML_NAMESPACE = 'http://www.w3.org/1999/xhtml'.freeze
14
+ MATHML_NAMESPACE = 'http://www.w3.org/1998/Math/MathML'.freeze
15
+ SVG_NAMESPACE = 'http://www.w3.org/2000/svg'.freeze
16
+ XLINK_NAMESPACE = 'http://www.w3.org/1999/xlink'.freeze
17
+ XML_NAMESPACE = 'http://www.w3.org/XML/1998/namespace'.freeze
18
+ XMLNS_NAMESPACE = 'http://www.w3.org/2000/xmlns/'.freeze
19
+
20
+ # Parse an HTML 5 document. Convenience method for Nokogiri::HTML5::Document.parse
21
+ def self.parse(string, url = nil, encoding = nil, **options, &block)
22
+ Document.parse(string, url, encoding, **options, &block)
23
+ end
24
+
25
+ # Parse a fragment from +string+. Convenience method for
26
+ # Nokogiri::HTML5::DocumentFragment.parse.
27
+ def self.fragment(string, encoding = nil, **options)
28
+ DocumentFragment.parse(string, encoding, options)
29
+ end
30
+
31
+ # Fetch and parse a HTML document from the web, following redirects,
32
+ # handling https, and determining the character encoding using HTML5
33
+ # rules. +uri+ may be a +String+ or a +URI+. +options+ contains
34
+ # http headers and special options. Everything which is not a
35
+ # special option is considered a header. Special options include:
36
+ # * :follow_limit => number of redirects which are followed
37
+ # * :basic_auth => [username, password]
38
+ def self.get(uri, options={})
39
+ headers = options.clone
40
+ headers = {:follow_limit => headers} if Numeric === headers # deprecated
41
+ limit=headers[:follow_limit] ? headers.delete(:follow_limit).to_i : 10
42
+
43
+ require 'net/http'
44
+ uri = URI(uri) unless URI === uri
45
+
46
+ http = Net::HTTP.new(uri.host, uri.port)
47
+
48
+ # TLS / SSL support
49
+ http.use_ssl = true if uri.scheme == 'https'
50
+
51
+ # Pass through Net::HTTP override values, which currently include:
52
+ # :ca_file, :ca_path, :cert, :cert_store, :ciphers,
53
+ # :close_on_empty_response, :continue_timeout, :key, :open_timeout,
54
+ # :read_timeout, :ssl_timeout, :ssl_version, :use_ssl,
55
+ # :verify_callback, :verify_depth, :verify_mode
56
+ options.each do |key, value|
57
+ http.send "#{key}=", headers.delete(key) if http.respond_to? "#{key}="
58
+ end
59
+
60
+ request = Net::HTTP::Get.new(uri.request_uri)
61
+
62
+ # basic authentication
63
+ auth = headers.delete(:basic_auth)
64
+ auth ||= [uri.user, uri.password] if uri.user && uri.password
65
+ request.basic_auth auth.first, auth.last if auth
66
+
67
+ # remaining options are treated as headers
68
+ headers.each {|key, value| request[key.to_s] = value.to_s}
69
+
70
+ response = http.request(request)
71
+
72
+ case response
73
+ when Net::HTTPSuccess
74
+ doc = parse(reencode(response.body, response['content-type']), options)
75
+ doc.instance_variable_set('@response', response)
76
+ doc.class.send(:attr_reader, :response)
77
+ doc
78
+ when Net::HTTPRedirection
79
+ response.value if limit <= 1
80
+ location = URI.join(uri, response['location'])
81
+ get(location, options.merge(:follow_limit => limit-1))
82
+ else
83
+ response.value
84
+ end
85
+ end
86
+
87
+ private
88
+
89
+ def self.read_and_encode(string, encoding)
90
+ # Read the string with the given encoding.
91
+ if string.respond_to?(:read)
92
+ if encoding.nil?
93
+ string = string.read
94
+ else
95
+ string = string.read(encoding: encoding)
96
+ end
97
+ else
98
+ # Otherwise the string has the given encoding.
99
+ string = string.to_str
100
+ if encoding
101
+ string = string.dup
102
+ string.force_encoding(encoding)
103
+ end
104
+ end
105
+
106
+ # convert to UTF-8
107
+ if string.encoding != Encoding::UTF_8
108
+ string = reencode(string)
109
+ end
110
+ string
111
+ end
112
+
113
+ # Charset sniffing is a complex and controversial topic that understandably
114
+ # isn't done _by default_ by the Ruby Net::HTTP library. This being said,
115
+ # it is a very real problem for consumers of HTML as the default for HTML
116
+ # is iso-8859-1, most "good" producers use utf-8, and the Gumbo parser
117
+ # *only* supports utf-8.
118
+ #
119
+ # Accordingly, Nokogiri::HTML::Document.parse provides limited encoding
120
+ # detection. Following this lead, Nokogiri::HTML5 attempts to do likewise,
121
+ # while attempting to more closely follow the HTML5 standard.
122
+ #
123
+ # http://bugs.ruby-lang.org/issues/2567
124
+ # http://www.w3.org/TR/html5/syntax.html#determining-the-character-encoding
125
+ #
126
+ def self.reencode(body, content_type=nil)
127
+ if body.encoding == Encoding::ASCII_8BIT
128
+ encoding = nil
129
+
130
+ # look for a Byte Order Mark (BOM)
131
+ initial_bytes = body[0..2].bytes
132
+ if initial_bytes[0..2] == [0xEF, 0xBB, 0xBF]
133
+ encoding = Encoding::UTF_8
134
+ elsif initial_bytes[0..1] == [0xFE, 0xFF]
135
+ encoding = Encoding::UTF_16BE
136
+ elsif initial_bytes[0..1] == [0xFF, 0xFE]
137
+ encoding = Encoding::UTF_16LE
138
+ end
139
+
140
+ # look for a charset in a content-encoding header
141
+ if content_type
142
+ encoding ||= content_type[/charset=["']?(.*?)($|["';\s])/i, 1]
143
+ end
144
+
145
+ # look for a charset in a meta tag in the first 1024 bytes
146
+ if not encoding
147
+ data = body[0..1023].gsub(/<!--.*?(-->|\Z)/m, '')
148
+ data.scan(/<meta.*?>/m).each do |meta|
149
+ encoding ||= meta[/charset=["']?([^>]*?)($|["'\s>])/im, 1]
150
+ end
151
+ end
152
+
153
+ # if all else fails, default to the official default encoding for HTML
154
+ encoding ||= Encoding::ISO_8859_1
155
+
156
+ # change the encoding to match the detected or inferred encoding
157
+ body = body.dup
158
+ begin
159
+ body.force_encoding(encoding)
160
+ rescue ArgumentError
161
+ body.force_encoding(Encoding::ISO_8859_1)
162
+ end
163
+ end
164
+
165
+ body.encode(Encoding::UTF_8)
166
+ end
167
+
168
+ def self.serialize_node_internal(current_node, io, encoding, options)
169
+ case current_node.type
170
+ when XML::Node::ELEMENT_NODE
171
+ ns = current_node.namespace
172
+ ns_uri = ns.nil? ? nil : ns.href
173
+ # XXX(sfc): attach namespaces to all nodes, even html?
174
+ if ns_uri.nil? || ns_uri == HTML_NAMESPACE || ns_uri == MATHML_NAMESPACE || ns_uri == SVG_NAMESPACE
175
+ tagname = current_node.name
176
+ else
177
+ tagname = "#{ns.prefix}:#{current_node.name}"
178
+ end
179
+ io << '<' << tagname
180
+ current_node.attribute_nodes.each do |attr|
181
+ attr_ns = attr.namespace
182
+ if attr_ns.nil?
183
+ attr_name = attr.name
184
+ else
185
+ ns_uri = attr_ns.href
186
+ if ns_uri == XML_NAMESPACE
187
+ attr_name = 'xml:' + attr.name.sub(/^[^:]*:/, '')
188
+ elsif ns_uri == XMLNS_NAMESPACE && attr.name.sub(/^[^:]*:/, '') == 'xmlns'
189
+ attr_name = 'xmlns'
190
+ elsif ns_uri == XMLNS_NAMESPACE
191
+ attr_name = 'xmlns:' + attr.name.sub(/^[^:]*:/, '')
192
+ elsif ns_uri == XLINK_NAMESPACE
193
+ attr_name = 'xlink:' + attr.name.sub(/^[^:]*:/, '')
194
+ else
195
+ attr_name = "#{attr_ns.prefix}:#{attr.name}"
196
+ end
197
+ end
198
+ io << ' ' << attr_name << '="' << escape_text(attr.content, encoding, true) << '"'
199
+ end
200
+ io << '>'
201
+ if !%w[area base basefont bgsound br col embed frame hr img input keygen
202
+ link meta param source track wbr].include?(current_node.name)
203
+ io << "\n" if options[:preserve_newline] && prepend_newline?(current_node)
204
+ current_node.children.each do |child|
205
+ # XXX(sfc): Templates handled specially?
206
+ serialize_node_internal(child, io, encoding, options)
207
+ end
208
+ io << '</' << tagname << '>'
209
+ end
210
+ when XML::Node::TEXT_NODE
211
+ parent = current_node.parent
212
+ if parent.element? && %w[style script xmp iframe noembed noframes plaintext noscript].include?(parent.name)
213
+ io << current_node.content
214
+ else
215
+ io << escape_text(current_node.content, encoding, false)
216
+ end
217
+ when XML::Node::CDATA_SECTION_NODE
218
+ io << '<![CDATA[' << current_node.content << ']]>'
219
+ when XML::Node::COMMENT_NODE
220
+ io << '<!--' << current_node.content << '-->'
221
+ when XML::Node::PI_NODE
222
+ io << '<?' << current_node.content << '>'
223
+ when XML::Node::DOCUMENT_TYPE_NODE, XML::Node::DTD_NODE
224
+ io << '<!DOCTYPE ' << current_node.name << '>'
225
+ when XML::Node::HTML_DOCUMENT_NODE, XML::Node::DOCUMENT_FRAG_NODE
226
+ current_node.children.each do |child|
227
+ serialize_node_internal(child, io, encoding, options)
228
+ end
229
+ else
230
+ raise "Unexpected node '#{current_node.name}' of type #{current_node.type}"
231
+ end
232
+ end
233
+
234
+ def self.escape_text(text, encoding, attribute_mode)
235
+ if attribute_mode
236
+ text = text.gsub(/[&\u00a0"]/,
237
+ '&' => '&amp;', "\u00a0" => '&nbsp;', '"' => '&quot;')
238
+ else
239
+ text = text.gsub(/[&\u00a0<>]/,
240
+ '&' => '&amp;', "\u00a0" => '&nbsp;', '<' => '&lt;', '>' => '&gt;')
241
+ end
242
+ # Not part of the standard
243
+ text.encode(encoding, fallback: lambda { |c| "&\#x#{c.ord.to_s(16)};" })
244
+ end
245
+
246
+ def self.prepend_newline?(node)
247
+ return false unless %w[pre textarea listing].include?(node.name) && !node.children.empty?
248
+ first_child = node.children[0]
249
+ first_child.text? && first_child.content.start_with?("\n")
250
+ end
251
+ end
252
+ end
@@ -0,0 +1,53 @@
1
+ module Nokogiri
2
+ module HTML5
3
+ class Document < Nokogiri::HTML::Document
4
+ def self.parse(string_or_io, url = nil, encoding = nil, **options, &block)
5
+ yield options if block_given?
6
+ string_or_io = '' unless string_or_io
7
+
8
+ if string_or_io.respond_to?(:encoding) && string_or_io.encoding.name != 'ASCII-8BIT'
9
+ encoding ||= string_or_io.encoding.name
10
+ end
11
+
12
+ if string_or_io.respond_to?(:read) && string_or_io.respond_to?(:path)
13
+ url ||= string_or_io.path
14
+ end
15
+ unless string_or_io.respond_to?(:read) || string_or_io.respond_to?(:to_str)
16
+ raise ArgumentError.new("not a string or IO object")
17
+ end
18
+ do_parse(string_or_io, url, encoding, options)
19
+ end
20
+
21
+ def self.read_io(io, url = nil, encoding = nil, **options)
22
+ raise ArgumentError.new("io object doesn't respond to :read") unless io.respond_to?(:read)
23
+ do_parse(io, url, encoding, options)
24
+ end
25
+
26
+ def self.read_memory(string, url = nil, encoding = nil, **options)
27
+ raise ArgumentError.new("string object doesn't respond to :to_str") unless string.respond_to?(:to_str)
28
+ do_parse(string, url, encoding, options)
29
+ end
30
+
31
+ def fragment(tags = nil)
32
+ DocumentFragment.new(self, tags, self.root)
33
+ end
34
+
35
+ def to_xml(options = {}, &block)
36
+ # Bypass XML::Document#to_xml which doesn't add
37
+ # XML::Node::SaveOptions::AS_XML like XML::Node#to_xml does.
38
+ XML::Node.instance_method(:to_xml).bind(self).call(options, &block)
39
+ end
40
+
41
+ private
42
+ def self.do_parse(string_or_io, url, encoding, options)
43
+ string = HTML5.read_and_encode(string_or_io, encoding)
44
+ max_attributes = options[:max_attributes] || Nokogumbo::DEFAULT_MAX_ATTRIBUTES
45
+ max_errors = options[:max_errors] || options[:max_parse_errors] || Nokogumbo::DEFAULT_MAX_ERRORS
46
+ max_depth = options[:max_tree_depth] || Nokogumbo::DEFAULT_MAX_TREE_DEPTH
47
+ doc = Nokogumbo.parse(string, url, max_attributes, max_errors, max_depth)
48
+ doc.encoding = 'UTF-8'
49
+ doc
50
+ end
51
+ end
52
+ end
53
+ end
@@ -0,0 +1,62 @@
1
+ require 'nokogiri'
2
+
3
+ module Nokogiri
4
+ module HTML5
5
+ class DocumentFragment < Nokogiri::HTML::DocumentFragment
6
+ attr_accessor :document
7
+ attr_accessor :errors
8
+
9
+ # Create a document fragment.
10
+ def initialize(doc, tags = nil, ctx = nil, options = {})
11
+ self.document = doc
12
+ self.errors = []
13
+ return self unless tags
14
+
15
+ max_attributes = options[:max_attributes] || Nokogumbo::DEFAULT_MAX_ATTRIBUTES
16
+ max_errors = options[:max_errors] || Nokogumbo::DEFAULT_MAX_ERRORS
17
+ max_depth = options[:max_tree_depth] || Nokogumbo::DEFAULT_MAX_TREE_DEPTH
18
+ tags = Nokogiri::HTML5.read_and_encode(tags, nil)
19
+ Nokogumbo.fragment(self, tags, ctx, max_attributes, max_errors, max_depth)
20
+ end
21
+
22
+ def serialize(options = {}, &block)
23
+ # Bypass XML::Document.serialize which doesn't support options even
24
+ # though XML::Node.serialize does!
25
+ XML::Node.instance_method(:serialize).bind(self).call(options, &block)
26
+ end
27
+
28
+ # Parse a document fragment from +tags+, returning a Nodeset.
29
+ def self.parse(tags, encoding = nil, options = {})
30
+ doc = HTML5::Document.new
31
+ tags = HTML5.read_and_encode(tags, encoding)
32
+ doc.encoding = 'UTF-8'
33
+ new(doc, tags, nil, options)
34
+ end
35
+
36
+ def extract_params params # :nodoc:
37
+ handler = params.find do |param|
38
+ ![Hash, String, Symbol].include?(param.class)
39
+ end
40
+ params -= [handler] if handler
41
+
42
+ hashes = []
43
+ while Hash === params.last || params.last.nil?
44
+ hashes << params.pop
45
+ break if params.empty?
46
+ end
47
+ ns, binds = hashes.reverse
48
+
49
+ ns ||=
50
+ begin
51
+ ns = Hash.new
52
+ children.each { |child| ns.merge!(child.namespaces) }
53
+ ns
54
+ end
55
+
56
+ [params, handler, ns, binds]
57
+ end
58
+
59
+ end
60
+ end
61
+ end
62
+ # vim: set shiftwidth=2 softtabstop=2 tabstop=8 expandtab:
@@ -0,0 +1,72 @@
1
+ require 'nokogiri'
2
+
3
+ module Nokogiri
4
+ module HTML5
5
+ module Node
6
+ # HTML elements can have attributes that contain colons.
7
+ # Nokogiri::XML::Node#[]= treats names with colons as a prefixed QName
8
+ # and tries to create an attribute in a namespace. This is especially
9
+ # annoying with attribute names like xml:lang since libxml2 will
10
+ # actually create the xml namespace if it doesn't exist already.
11
+ def add_child_node_and_reparent_attrs(node)
12
+ return super(node) unless document.is_a?(HTML5::Document)
13
+ # I'm not sure what this method is supposed to do. Reparenting
14
+ # namespaces is handled by libxml2, including child namespaces which
15
+ # this method wouldn't handle.
16
+ # https://github.com/sparklemotion/nokogiri/issues/1790
17
+ add_child_node(node)
18
+ #node.attribute_nodes.find_all { |a| a.namespace }.each do |attr|
19
+ # attr.remove
20
+ # ns = attr.namespace
21
+ # a["#{ns.prefix}:#{attr.name}"] = attr.value
22
+ #end
23
+ end
24
+
25
+ def inner_html(options = {})
26
+ return super(options) unless document.is_a?(HTML5::Document)
27
+ result = options[:preserve_newline] && HTML5.prepend_newline?(self) ? "\n" : ""
28
+ result << children.map { |child| child.to_html(options) }.join
29
+ result
30
+ end
31
+
32
+ def write_to(io, *options)
33
+ return super(io, *options) unless document.is_a?(HTML5::Document)
34
+ options = options.first.is_a?(Hash) ? options.shift : {}
35
+ encoding = options[:encoding] || options[0]
36
+ if Nokogiri.jruby?
37
+ save_options = options[:save_with] || options[1]
38
+ indent_times = options[:indent] || 0
39
+ else
40
+ save_options = options[:save_with] || options[1] || XML::Node::SaveOptions::FORMAT
41
+ indent_times = options[:indent] || 2
42
+ end
43
+ indent_string = (options[:indent_text] || ' ') * indent_times
44
+
45
+ config = XML::Node::SaveOptions.new(save_options.to_i)
46
+ yield config if block_given?
47
+
48
+ config_options = config.options
49
+ if (config_options & (XML::Node::SaveOptions::AS_XML | XML::Node::SaveOptions::AS_XHTML) != 0)
50
+ # Use Nokogiri's serializing code.
51
+ native_write_to(io, encoding, indent_string, config_options)
52
+ else
53
+ # Serialize including the current node.
54
+ encoding ||= document.encoding || Encoding::UTF_8
55
+ internal_ops = {
56
+ preserve_newline: options[:preserve_newline] || false
57
+ }
58
+ HTML5.serialize_node_internal(self, io, encoding, internal_ops)
59
+ end
60
+ end
61
+
62
+ def fragment(tags)
63
+ return super(tags) unless document.is_a?(HTML5::Document)
64
+ DocumentFragment.new(document, tags, self)
65
+ end
66
+ end
67
+ # Monkey patch
68
+ XML::Node.prepend(HTML5::Node)
69
+ end
70
+ end
71
+
72
+ # vim: set shiftwidth=2 softtabstop=2 tabstop=8 expandtab:
@@ -0,0 +1,3 @@
1
+ module Nokogumbo
2
+ VERSION = "2.0.3"
3
+ end
metadata CHANGED
@@ -1,62 +1,75 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: nokogumbo
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.5.0
4
+ version: 2.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sam Ruby
8
- autorequire:
8
+ - Stephen Checkoway
9
+ autorequire:
9
10
  bindir: bin
10
11
  cert_chain: []
11
- date: 2018-01-27 00:00:00.000000000 Z
12
+ date: 2020-11-22 00:00:00.000000000 Z
12
13
  dependencies:
13
14
  - !ruby/object:Gem::Dependency
14
15
  name: nokogiri
15
16
  requirement: !ruby/object:Gem::Requirement
16
17
  requirements:
18
+ - - "~>"
19
+ - !ruby/object:Gem::Version
20
+ version: '1.8'
17
21
  - - ">="
18
22
  - !ruby/object:Gem::Version
19
- version: '0'
23
+ version: 1.8.4
20
24
  type: :runtime
21
25
  prerelease: false
22
26
  version_requirements: !ruby/object:Gem::Requirement
23
27
  requirements:
28
+ - - "~>"
29
+ - !ruby/object:Gem::Version
30
+ version: '1.8'
24
31
  - - ">="
25
32
  - !ruby/object:Gem::Version
26
- version: '0'
33
+ version: 1.8.4
27
34
  description: Nokogumbo allows a Ruby program to invoke the Gumbo HTML5 parser and
28
35
  access the result as a Nokogiri parsed document.
29
- email: rubys@intertwingly.net
36
+ email:
37
+ - rubys@intertwingly.net
38
+ - s@pahtak.org
30
39
  executables: []
31
40
  extensions:
32
- - ext/nokogumboc/extconf.rb
41
+ - ext/nokogumbo/extconf.rb
33
42
  extra_rdoc_files: []
34
43
  files:
35
44
  - LICENSE.txt
36
45
  - README.md
37
- - ext/nokogumboc/extconf.rb
38
- - ext/nokogumboc/nokogumbo.c
46
+ - ext/nokogumbo/extconf.rb
47
+ - ext/nokogumbo/nokogumbo.c
48
+ - gumbo-parser/src/ascii.c
49
+ - gumbo-parser/src/ascii.h
39
50
  - gumbo-parser/src/attribute.c
40
51
  - gumbo-parser/src/attribute.h
41
52
  - gumbo-parser/src/char_ref.c
42
53
  - gumbo-parser/src/char_ref.h
43
- - gumbo-parser/src/char_ref.rl
44
54
  - gumbo-parser/src/error.c
45
55
  - gumbo-parser/src/error.h
56
+ - gumbo-parser/src/foreign_attrs.c
46
57
  - gumbo-parser/src/gumbo.h
47
58
  - gumbo-parser/src/insertion_mode.h
59
+ - gumbo-parser/src/macros.h
48
60
  - gumbo-parser/src/parser.c
49
61
  - gumbo-parser/src/parser.h
62
+ - gumbo-parser/src/replacement.h
50
63
  - gumbo-parser/src/string_buffer.c
51
64
  - gumbo-parser/src/string_buffer.h
52
65
  - gumbo-parser/src/string_piece.c
53
- - gumbo-parser/src/string_piece.h
66
+ - gumbo-parser/src/svg_attrs.c
67
+ - gumbo-parser/src/svg_tags.c
54
68
  - gumbo-parser/src/tag.c
55
- - gumbo-parser/src/tag.in
56
- - gumbo-parser/src/tag_enum.h
57
- - gumbo-parser/src/tag_gperf.h
58
- - gumbo-parser/src/tag_sizes.h
59
- - gumbo-parser/src/tag_strings.h
69
+ - gumbo-parser/src/tag_lookup.c
70
+ - gumbo-parser/src/tag_lookup.h
71
+ - gumbo-parser/src/token_buffer.c
72
+ - gumbo-parser/src/token_buffer.h
60
73
  - gumbo-parser/src/token_type.h
61
74
  - gumbo-parser/src/tokenizer.c
62
75
  - gumbo-parser/src/tokenizer.h
@@ -67,14 +80,21 @@ files:
67
80
  - gumbo-parser/src/util.h
68
81
  - gumbo-parser/src/vector.c
69
82
  - gumbo-parser/src/vector.h
70
- - gumbo-parser/visualc/include/strings.h
71
83
  - lib/nokogumbo.rb
72
- - test-nokogumbo.rb
84
+ - lib/nokogumbo/html5.rb
85
+ - lib/nokogumbo/html5/document.rb
86
+ - lib/nokogumbo/html5/document_fragment.rb
87
+ - lib/nokogumbo/html5/node.rb
88
+ - lib/nokogumbo/version.rb
73
89
  homepage: https://github.com/rubys/nokogumbo/#readme
74
90
  licenses:
75
91
  - Apache-2.0
76
- metadata: {}
77
- post_install_message:
92
+ metadata:
93
+ bug_tracker_uri: https://github.com/rubys/nokogumbo/issues
94
+ changelog_uri: https://github.com/rubys/nokogumbo/blob/master/CHANGELOG.md
95
+ homepage_uri: https://github.com/rubys/nokogumbo/#readme
96
+ source_code_uri: https://github.com/rubys/nokogumbo
97
+ post_install_message:
78
98
  rdoc_options: []
79
99
  require_paths:
80
100
  - lib
@@ -82,16 +102,15 @@ required_ruby_version: !ruby/object:Gem::Requirement
82
102
  requirements:
83
103
  - - ">="
84
104
  - !ruby/object:Gem::Version
85
- version: '0'
105
+ version: '2.1'
86
106
  required_rubygems_version: !ruby/object:Gem::Requirement
87
107
  requirements:
88
108
  - - ">="
89
109
  - !ruby/object:Gem::Version
90
110
  version: '0'
91
111
  requirements: []
92
- rubyforge_project:
93
- rubygems_version: 2.7.4
94
- signing_key:
112
+ rubygems_version: 3.1.2
113
+ signing_key:
95
114
  specification_version: 4
96
115
  summary: Nokogiri interface to the Gumbo HTML5 parser
97
116
  test_files: []