nokogumbo 1.5.0 → 2.0.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (57) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +237 -26
  3. data/ext/nokogumbo/extconf.rb +144 -0
  4. data/ext/nokogumbo/nokogumbo.c +793 -0
  5. data/gumbo-parser/src/ascii.c +75 -0
  6. data/gumbo-parser/src/ascii.h +115 -0
  7. data/gumbo-parser/src/attribute.c +26 -28
  8. data/gumbo-parser/src/attribute.h +3 -23
  9. data/gumbo-parser/src/char_ref.c +5972 -6816
  10. data/gumbo-parser/src/char_ref.h +14 -45
  11. data/gumbo-parser/src/error.c +510 -163
  12. data/gumbo-parser/src/error.h +70 -147
  13. data/gumbo-parser/src/foreign_attrs.c +104 -0
  14. data/gumbo-parser/src/gumbo.h +577 -305
  15. data/gumbo-parser/src/insertion_mode.h +4 -28
  16. data/gumbo-parser/src/macros.h +91 -0
  17. data/gumbo-parser/src/parser.c +2922 -2228
  18. data/gumbo-parser/src/parser.h +6 -22
  19. data/gumbo-parser/src/replacement.h +33 -0
  20. data/gumbo-parser/src/string_buffer.c +43 -50
  21. data/gumbo-parser/src/string_buffer.h +24 -40
  22. data/gumbo-parser/src/string_piece.c +39 -39
  23. data/gumbo-parser/src/svg_attrs.c +174 -0
  24. data/gumbo-parser/src/svg_tags.c +137 -0
  25. data/gumbo-parser/src/tag.c +186 -59
  26. data/gumbo-parser/src/tag_lookup.c +382 -0
  27. data/gumbo-parser/src/tag_lookup.h +13 -0
  28. data/gumbo-parser/src/token_buffer.c +79 -0
  29. data/gumbo-parser/src/token_buffer.h +71 -0
  30. data/gumbo-parser/src/token_type.h +1 -25
  31. data/gumbo-parser/src/tokenizer.c +2128 -1562
  32. data/gumbo-parser/src/tokenizer.h +41 -52
  33. data/gumbo-parser/src/tokenizer_states.h +281 -45
  34. data/gumbo-parser/src/utf8.c +98 -123
  35. data/gumbo-parser/src/utf8.h +84 -52
  36. data/gumbo-parser/src/util.c +48 -38
  37. data/gumbo-parser/src/util.h +10 -40
  38. data/gumbo-parser/src/vector.c +45 -57
  39. data/gumbo-parser/src/vector.h +17 -39
  40. data/lib/nokogumbo.rb +18 -170
  41. data/lib/nokogumbo/html5.rb +252 -0
  42. data/lib/nokogumbo/html5/document.rb +53 -0
  43. data/lib/nokogumbo/html5/document_fragment.rb +62 -0
  44. data/lib/nokogumbo/html5/node.rb +72 -0
  45. data/lib/nokogumbo/version.rb +3 -0
  46. metadata +40 -21
  47. data/ext/nokogumboc/extconf.rb +0 -60
  48. data/ext/nokogumboc/nokogumbo.c +0 -295
  49. data/gumbo-parser/src/char_ref.rl +0 -2554
  50. data/gumbo-parser/src/string_piece.h +0 -38
  51. data/gumbo-parser/src/tag.in +0 -150
  52. data/gumbo-parser/src/tag_enum.h +0 -153
  53. data/gumbo-parser/src/tag_gperf.h +0 -105
  54. data/gumbo-parser/src/tag_sizes.h +0 -4
  55. data/gumbo-parser/src/tag_strings.h +0 -153
  56. data/gumbo-parser/visualc/include/strings.h +0 -4
  57. data/test-nokogumbo.rb +0 -190
@@ -0,0 +1,252 @@
1
+ require 'nokogumbo/html5/document'
2
+ require 'nokogumbo/html5/document_fragment'
3
+ require 'nokogumbo/html5/node'
4
+
5
+ module Nokogiri
6
+ # Parse an HTML 5 document. Convenience method for Nokogiri::HTML5::Document.parse
7
+ def self.HTML5(string_or_io, url = nil, encoding = nil, **options, &block)
8
+ Nokogiri::HTML5::Document.parse(string_or_io, url, encoding, **options, &block)
9
+ end
10
+
11
+ module HTML5
12
+ # HTML uses the XHTML namespace.
13
+ HTML_NAMESPACE = 'http://www.w3.org/1999/xhtml'.freeze
14
+ MATHML_NAMESPACE = 'http://www.w3.org/1998/Math/MathML'.freeze
15
+ SVG_NAMESPACE = 'http://www.w3.org/2000/svg'.freeze
16
+ XLINK_NAMESPACE = 'http://www.w3.org/1999/xlink'.freeze
17
+ XML_NAMESPACE = 'http://www.w3.org/XML/1998/namespace'.freeze
18
+ XMLNS_NAMESPACE = 'http://www.w3.org/2000/xmlns/'.freeze
19
+
20
+ # Parse an HTML 5 document. Convenience method for Nokogiri::HTML5::Document.parse
21
+ def self.parse(string, url = nil, encoding = nil, **options, &block)
22
+ Document.parse(string, url, encoding, **options, &block)
23
+ end
24
+
25
+ # Parse a fragment from +string+. Convenience method for
26
+ # Nokogiri::HTML5::DocumentFragment.parse.
27
+ def self.fragment(string, encoding = nil, **options)
28
+ DocumentFragment.parse(string, encoding, options)
29
+ end
30
+
31
+ # Fetch and parse a HTML document from the web, following redirects,
32
+ # handling https, and determining the character encoding using HTML5
33
+ # rules. +uri+ may be a +String+ or a +URI+. +options+ contains
34
+ # http headers and special options. Everything which is not a
35
+ # special option is considered a header. Special options include:
36
+ # * :follow_limit => number of redirects which are followed
37
+ # * :basic_auth => [username, password]
38
+ def self.get(uri, options={})
39
+ headers = options.clone
40
+ headers = {:follow_limit => headers} if Numeric === headers # deprecated
41
+ limit=headers[:follow_limit] ? headers.delete(:follow_limit).to_i : 10
42
+
43
+ require 'net/http'
44
+ uri = URI(uri) unless URI === uri
45
+
46
+ http = Net::HTTP.new(uri.host, uri.port)
47
+
48
+ # TLS / SSL support
49
+ http.use_ssl = true if uri.scheme == 'https'
50
+
51
+ # Pass through Net::HTTP override values, which currently include:
52
+ # :ca_file, :ca_path, :cert, :cert_store, :ciphers,
53
+ # :close_on_empty_response, :continue_timeout, :key, :open_timeout,
54
+ # :read_timeout, :ssl_timeout, :ssl_version, :use_ssl,
55
+ # :verify_callback, :verify_depth, :verify_mode
56
+ options.each do |key, value|
57
+ http.send "#{key}=", headers.delete(key) if http.respond_to? "#{key}="
58
+ end
59
+
60
+ request = Net::HTTP::Get.new(uri.request_uri)
61
+
62
+ # basic authentication
63
+ auth = headers.delete(:basic_auth)
64
+ auth ||= [uri.user, uri.password] if uri.user && uri.password
65
+ request.basic_auth auth.first, auth.last if auth
66
+
67
+ # remaining options are treated as headers
68
+ headers.each {|key, value| request[key.to_s] = value.to_s}
69
+
70
+ response = http.request(request)
71
+
72
+ case response
73
+ when Net::HTTPSuccess
74
+ doc = parse(reencode(response.body, response['content-type']), options)
75
+ doc.instance_variable_set('@response', response)
76
+ doc.class.send(:attr_reader, :response)
77
+ doc
78
+ when Net::HTTPRedirection
79
+ response.value if limit <= 1
80
+ location = URI.join(uri, response['location'])
81
+ get(location, options.merge(:follow_limit => limit-1))
82
+ else
83
+ response.value
84
+ end
85
+ end
86
+
87
+ private
88
+
89
+ def self.read_and_encode(string, encoding)
90
+ # Read the string with the given encoding.
91
+ if string.respond_to?(:read)
92
+ if encoding.nil?
93
+ string = string.read
94
+ else
95
+ string = string.read(encoding: encoding)
96
+ end
97
+ else
98
+ # Otherwise the string has the given encoding.
99
+ string = string.to_s
100
+ if encoding
101
+ string = string.dup
102
+ string.force_encoding(encoding)
103
+ end
104
+ end
105
+
106
+ # convert to UTF-8
107
+ if string.encoding != Encoding::UTF_8
108
+ string = reencode(string)
109
+ end
110
+ string
111
+ end
112
+
113
+ # Charset sniffing is a complex and controversial topic that understandably
114
+ # isn't done _by default_ by the Ruby Net::HTTP library. This being said,
115
+ # it is a very real problem for consumers of HTML as the default for HTML
116
+ # is iso-8859-1, most "good" producers use utf-8, and the Gumbo parser
117
+ # *only* supports utf-8.
118
+ #
119
+ # Accordingly, Nokogiri::HTML::Document.parse provides limited encoding
120
+ # detection. Following this lead, Nokogiri::HTML5 attempts to do likewise,
121
+ # while attempting to more closely follow the HTML5 standard.
122
+ #
123
+ # http://bugs.ruby-lang.org/issues/2567
124
+ # http://www.w3.org/TR/html5/syntax.html#determining-the-character-encoding
125
+ #
126
+ def self.reencode(body, content_type=nil)
127
+ if body.encoding == Encoding::ASCII_8BIT
128
+ encoding = nil
129
+
130
+ # look for a Byte Order Mark (BOM)
131
+ initial_bytes = body[0..2].bytes
132
+ if initial_bytes[0..2] == [0xEF, 0xBB, 0xBF]
133
+ encoding = Encoding::UTF_8
134
+ elsif initial_bytes[0..1] == [0xFE, 0xFF]
135
+ encoding = Encoding::UTF_16BE
136
+ elsif initial_bytes[0..1] == [0xFF, 0xFE]
137
+ encoding = Encoding::UTF_16LE
138
+ end
139
+
140
+ # look for a charset in a content-encoding header
141
+ if content_type
142
+ encoding ||= content_type[/charset=["']?(.*?)($|["';\s])/i, 1]
143
+ end
144
+
145
+ # look for a charset in a meta tag in the first 1024 bytes
146
+ if not encoding
147
+ data = body[0..1023].gsub(/<!--.*?(-->|\Z)/m, '')
148
+ data.scan(/<meta.*?>/m).each do |meta|
149
+ encoding ||= meta[/charset=["']?([^>]*?)($|["'\s>])/im, 1]
150
+ end
151
+ end
152
+
153
+ # if all else fails, default to the official default encoding for HTML
154
+ encoding ||= Encoding::ISO_8859_1
155
+
156
+ # change the encoding to match the detected or inferred encoding
157
+ body = body.dup
158
+ begin
159
+ body.force_encoding(encoding)
160
+ rescue ArgumentError
161
+ body.force_encoding(Encoding::ISO_8859_1)
162
+ end
163
+ end
164
+
165
+ body.encode(Encoding::UTF_8)
166
+ end
167
+
168
+ def self.serialize_node_internal(current_node, io, encoding, options)
169
+ case current_node.type
170
+ when XML::Node::ELEMENT_NODE
171
+ ns = current_node.namespace
172
+ ns_uri = ns.nil? ? nil : ns.href
173
+ # XXX(sfc): attach namespaces to all nodes, even html?
174
+ if ns_uri.nil? || ns_uri == HTML_NAMESPACE || ns_uri == MATHML_NAMESPACE || ns_uri == SVG_NAMESPACE
175
+ tagname = current_node.name
176
+ else
177
+ tagname = "#{ns.prefix}:#{current_node.name}"
178
+ end
179
+ io << '<' << tagname
180
+ current_node.attribute_nodes.each do |attr|
181
+ attr_ns = attr.namespace
182
+ if attr_ns.nil?
183
+ attr_name = attr.name
184
+ else
185
+ ns_uri = attr_ns.href
186
+ if ns_uri == XML_NAMESPACE
187
+ attr_name = 'xml:' + attr.name.sub(/^[^:]*:/, '')
188
+ elsif ns_uri == XMLNS_NAMESPACE && attr.name.sub(/^[^:]*:/, '') == 'xmlns'
189
+ attr_name = 'xmlns'
190
+ elsif ns_uri == XMLNS_NAMESPACE
191
+ attr_name = 'xmlns:' + attr.name.sub(/^[^:]*:/, '')
192
+ elsif ns_uri == XLINK_NAMESPACE
193
+ attr_name = 'xlink:' + attr.name.sub(/^[^:]*:/, '')
194
+ else
195
+ attr_name = "#{attr_ns.prefix}:#{attr.name}"
196
+ end
197
+ end
198
+ io << ' ' << attr_name << '="' << escape_text(attr.content, encoding, true) << '"'
199
+ end
200
+ io << '>'
201
+ if !%w[area base basefont bgsound br col embed frame hr img input keygen
202
+ link meta param source track wbr].include?(current_node.name)
203
+ io << "\n" if options[:preserve_newline] && prepend_newline?(current_node)
204
+ current_node.children.each do |child|
205
+ # XXX(sfc): Templates handled specially?
206
+ serialize_node_internal(child, io, encoding, options)
207
+ end
208
+ io << '</' << tagname << '>'
209
+ end
210
+ when XML::Node::TEXT_NODE
211
+ parent = current_node.parent
212
+ if parent.element? && %w[style script xmp iframe noembed noframes plaintext noscript].include?(parent.name)
213
+ io << current_node.content
214
+ else
215
+ io << escape_text(current_node.content, encoding, false)
216
+ end
217
+ when XML::Node::CDATA_SECTION_NODE
218
+ io << '<![CDATA[' << current_node.content << ']]>'
219
+ when XML::Node::COMMENT_NODE
220
+ io << '<!--' << current_node.content << '-->'
221
+ when XML::Node::PI_NODE
222
+ io << '<?' << current_node.content << '>'
223
+ when XML::Node::DOCUMENT_TYPE_NODE, XML::Node::DTD_NODE
224
+ io << '<!DOCTYPE ' << current_node.name << '>'
225
+ when XML::Node::HTML_DOCUMENT_NODE, XML::Node::DOCUMENT_FRAG_NODE
226
+ current_node.children.each do |child|
227
+ serialize_node_internal(child, io, encoding, options)
228
+ end
229
+ else
230
+ raise "Unexpected node '#{current_node.name}' of type #{current_node.type}"
231
+ end
232
+ end
233
+
234
+ def self.escape_text(text, encoding, attribute_mode)
235
+ if attribute_mode
236
+ text = text.gsub(/[&\u00a0"]/,
237
+ '&' => '&amp;', "\u00a0" => '&nbsp;', '"' => '&quot;')
238
+ else
239
+ text = text.gsub(/[&\u00a0<>]/,
240
+ '&' => '&amp;', "\u00a0" => '&nbsp;', '<' => '&lt;', '>' => '&gt;')
241
+ end
242
+ # Not part of the standard
243
+ text.encode(encoding, fallback: lambda { |c| "&\#x#{c.ord.to_s(16)};" })
244
+ end
245
+
246
+ def self.prepend_newline?(node)
247
+ return false unless %w[pre textarea listing].include?(node.name) && !node.children.empty?
248
+ first_child = node.children[0]
249
+ first_child.text? && first_child.content.start_with?("\n")
250
+ end
251
+ end
252
+ end
@@ -0,0 +1,53 @@
1
+ module Nokogiri
2
+ module HTML5
3
+ class Document < Nokogiri::HTML::Document
4
+ def self.parse(string_or_io, url = nil, encoding = nil, **options, &block)
5
+ yield options if block_given?
6
+ string_or_io = '' unless string_or_io
7
+
8
+ if string_or_io.respond_to?(:encoding) && string_or_io.encoding.name != 'ASCII-8BIT'
9
+ encoding ||= string_or_io.encoding.name
10
+ end
11
+
12
+ if string_or_io.respond_to?(:read) && string_or_io.respond_to?(:path)
13
+ url ||= string_or_io.path
14
+ end
15
+ unless string_or_io.respond_to?(:read) || string_or_io.respond_to?(:to_str)
16
+ raise ArgumentError.new("not a string or IO object")
17
+ end
18
+ do_parse(string_or_io, url, encoding, options)
19
+ end
20
+
21
+ def self.read_io(io, url = nil, encoding = nil, **options)
22
+ raise ArgumentError.new("io object doesn't respond to :read") unless io.respond_to?(:read)
23
+ do_parse(io, url, encoding, options)
24
+ end
25
+
26
+ def self.read_memory(string, url = nil, encoding = nil, **options)
27
+ raise ArgumentError.new("string object doesn't respond to :to_str") unless string.respond_to?(:to_str)
28
+ do_parse(string, url, encoding, options)
29
+ end
30
+
31
+ def fragment(tags = nil)
32
+ DocumentFragment.new(self, tags, self.root)
33
+ end
34
+
35
+ def to_xml(options = {}, &block)
36
+ # Bypass XML::Document#to_xml which doesn't add
37
+ # XML::Node::SaveOptions::AS_XML like XML::Node#to_xml does.
38
+ XML::Node.instance_method(:to_xml).bind(self).call(options, &block)
39
+ end
40
+
41
+ private
42
+ def self.do_parse(string_or_io, url, encoding, options)
43
+ string = HTML5.read_and_encode(string_or_io, encoding)
44
+ max_attributes = options[:max_attributes] || Nokogumbo::DEFAULT_MAX_ATTRIBUTES
45
+ max_errors = options[:max_errors] || options[:max_parse_errors] || Nokogumbo::DEFAULT_MAX_ERRORS
46
+ max_depth = options[:max_tree_depth] || Nokogumbo::DEFAULT_MAX_TREE_DEPTH
47
+ doc = Nokogumbo.parse(string, url, max_attributes, max_errors, max_depth)
48
+ doc.encoding = 'UTF-8'
49
+ doc
50
+ end
51
+ end
52
+ end
53
+ end
@@ -0,0 +1,62 @@
1
+ require 'nokogiri'
2
+
3
+ module Nokogiri
4
+ module HTML5
5
+ class DocumentFragment < Nokogiri::HTML::DocumentFragment
6
+ attr_accessor :document
7
+ attr_accessor :errors
8
+
9
+ # Create a document fragment.
10
+ def initialize(doc, tags = nil, ctx = nil, options = {})
11
+ self.document = doc
12
+ self.errors = []
13
+ return self unless tags
14
+
15
+ max_attributes = options[:max_attributes] || Nokogumbo::DEFAULT_MAX_ATTRIBUTES
16
+ max_errors = options[:max_errors] || Nokogumbo::DEFAULT_MAX_ERRORS
17
+ max_depth = options[:max_tree_depth] || Nokogumbo::DEFAULT_MAX_TREE_DEPTH
18
+ tags = Nokogiri::HTML5.read_and_encode(tags, nil)
19
+ Nokogumbo.fragment(self, tags, ctx, max_attributes, max_errors, max_depth)
20
+ end
21
+
22
+ def serialize(options = {}, &block)
23
+ # Bypass XML::Document.serialize which doesn't support options even
24
+ # though XML::Node.serialize does!
25
+ XML::Node.instance_method(:serialize).bind(self).call(options, &block)
26
+ end
27
+
28
+ # Parse a document fragment from +tags+, returning a Nodeset.
29
+ def self.parse(tags, encoding = nil, options = {})
30
+ doc = HTML5::Document.new
31
+ tags = HTML5.read_and_encode(tags, encoding)
32
+ doc.encoding = 'UTF-8'
33
+ new(doc, tags, nil, options)
34
+ end
35
+
36
+ def extract_params params # :nodoc:
37
+ handler = params.find do |param|
38
+ ![Hash, String, Symbol].include?(param.class)
39
+ end
40
+ params -= [handler] if handler
41
+
42
+ hashes = []
43
+ while Hash === params.last || params.last.nil?
44
+ hashes << params.pop
45
+ break if params.empty?
46
+ end
47
+ ns, binds = hashes.reverse
48
+
49
+ ns ||=
50
+ begin
51
+ ns = Hash.new
52
+ children.each { |child| ns.merge!(child.namespaces) }
53
+ ns
54
+ end
55
+
56
+ [params, handler, ns, binds]
57
+ end
58
+
59
+ end
60
+ end
61
+ end
62
+ # vim: set shiftwidth=2 softtabstop=2 tabstop=8 expandtab:
@@ -0,0 +1,72 @@
1
+ require 'nokogiri'
2
+
3
+ module Nokogiri
4
+ module HTML5
5
+ module Node
6
+ # HTML elements can have attributes that contain colons.
7
+ # Nokogiri::XML::Node#[]= treats names with colons as a prefixed QName
8
+ # and tries to create an attribute in a namespace. This is especially
9
+ # annoying with attribute names like xml:lang since libxml2 will
10
+ # actually create the xml namespace if it doesn't exist already.
11
+ def add_child_node_and_reparent_attrs(node)
12
+ return super(node) unless document.is_a?(HTML5::Document)
13
+ # I'm not sure what this method is supposed to do. Reparenting
14
+ # namespaces is handled by libxml2, including child namespaces which
15
+ # this method wouldn't handle.
16
+ # https://github.com/sparklemotion/nokogiri/issues/1790
17
+ add_child_node(node)
18
+ #node.attribute_nodes.find_all { |a| a.namespace }.each do |attr|
19
+ # attr.remove
20
+ # ns = attr.namespace
21
+ # a["#{ns.prefix}:#{attr.name}"] = attr.value
22
+ #end
23
+ end
24
+
25
+ def inner_html(options = {})
26
+ return super(options) unless document.is_a?(HTML5::Document)
27
+ result = options[:preserve_newline] && HTML5.prepend_newline?(self) ? "\n" : ""
28
+ result << children.map { |child| child.to_html(options) }.join
29
+ result
30
+ end
31
+
32
+ def write_to(io, *options)
33
+ return super(io, *options) unless document.is_a?(HTML5::Document)
34
+ options = options.first.is_a?(Hash) ? options.shift : {}
35
+ encoding = options[:encoding] || options[0]
36
+ if Nokogiri.jruby?
37
+ save_options = options[:save_with] || options[1]
38
+ indent_times = options[:indent] || 0
39
+ else
40
+ save_options = options[:save_with] || options[1] || XML::Node::SaveOptions::FORMAT
41
+ indent_times = options[:indent] || 2
42
+ end
43
+ indent_string = (options[:indent_text] || ' ') * indent_times
44
+
45
+ config = XML::Node::SaveOptions.new(save_options.to_i)
46
+ yield config if block_given?
47
+
48
+ config_options = config.options
49
+ if (config_options & (XML::Node::SaveOptions::AS_XML | XML::Node::SaveOptions::AS_XHTML) != 0)
50
+ # Use Nokogiri's serializing code.
51
+ native_write_to(io, encoding, indent_string, config_options)
52
+ else
53
+ # Serialize including the current node.
54
+ encoding ||= document.encoding || Encoding::UTF_8
55
+ internal_ops = {
56
+ preserve_newline: options[:preserve_newline] || false
57
+ }
58
+ HTML5.serialize_node_internal(self, io, encoding, internal_ops)
59
+ end
60
+ end
61
+
62
+ def fragment(tags)
63
+ return super(tags) unless document.is_a?(HTML5::Document)
64
+ DocumentFragment.new(document, tags, self)
65
+ end
66
+ end
67
+ # Monkey patch
68
+ XML::Node.prepend(HTML5::Node)
69
+ end
70
+ end
71
+
72
+ # vim: set shiftwidth=2 softtabstop=2 tabstop=8 expandtab:
@@ -0,0 +1,3 @@
1
+ module Nokogumbo
2
+ VERSION = "2.0.5"
3
+ end
metadata CHANGED
@@ -1,62 +1,75 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: nokogumbo
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.5.0
4
+ version: 2.0.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sam Ruby
8
+ - Stephen Checkoway
8
9
  autorequire:
9
10
  bindir: bin
10
11
  cert_chain: []
11
- date: 2018-01-27 00:00:00.000000000 Z
12
+ date: 2021-03-19 00:00:00.000000000 Z
12
13
  dependencies:
13
14
  - !ruby/object:Gem::Dependency
14
15
  name: nokogiri
15
16
  requirement: !ruby/object:Gem::Requirement
16
17
  requirements:
18
+ - - "~>"
19
+ - !ruby/object:Gem::Version
20
+ version: '1.8'
17
21
  - - ">="
18
22
  - !ruby/object:Gem::Version
19
- version: '0'
23
+ version: 1.8.4
20
24
  type: :runtime
21
25
  prerelease: false
22
26
  version_requirements: !ruby/object:Gem::Requirement
23
27
  requirements:
28
+ - - "~>"
29
+ - !ruby/object:Gem::Version
30
+ version: '1.8'
24
31
  - - ">="
25
32
  - !ruby/object:Gem::Version
26
- version: '0'
33
+ version: 1.8.4
27
34
  description: Nokogumbo allows a Ruby program to invoke the Gumbo HTML5 parser and
28
35
  access the result as a Nokogiri parsed document.
29
- email: rubys@intertwingly.net
36
+ email:
37
+ - rubys@intertwingly.net
38
+ - s@pahtak.org
30
39
  executables: []
31
40
  extensions:
32
- - ext/nokogumboc/extconf.rb
41
+ - ext/nokogumbo/extconf.rb
33
42
  extra_rdoc_files: []
34
43
  files:
35
44
  - LICENSE.txt
36
45
  - README.md
37
- - ext/nokogumboc/extconf.rb
38
- - ext/nokogumboc/nokogumbo.c
46
+ - ext/nokogumbo/extconf.rb
47
+ - ext/nokogumbo/nokogumbo.c
48
+ - gumbo-parser/src/ascii.c
49
+ - gumbo-parser/src/ascii.h
39
50
  - gumbo-parser/src/attribute.c
40
51
  - gumbo-parser/src/attribute.h
41
52
  - gumbo-parser/src/char_ref.c
42
53
  - gumbo-parser/src/char_ref.h
43
- - gumbo-parser/src/char_ref.rl
44
54
  - gumbo-parser/src/error.c
45
55
  - gumbo-parser/src/error.h
56
+ - gumbo-parser/src/foreign_attrs.c
46
57
  - gumbo-parser/src/gumbo.h
47
58
  - gumbo-parser/src/insertion_mode.h
59
+ - gumbo-parser/src/macros.h
48
60
  - gumbo-parser/src/parser.c
49
61
  - gumbo-parser/src/parser.h
62
+ - gumbo-parser/src/replacement.h
50
63
  - gumbo-parser/src/string_buffer.c
51
64
  - gumbo-parser/src/string_buffer.h
52
65
  - gumbo-parser/src/string_piece.c
53
- - gumbo-parser/src/string_piece.h
66
+ - gumbo-parser/src/svg_attrs.c
67
+ - gumbo-parser/src/svg_tags.c
54
68
  - gumbo-parser/src/tag.c
55
- - gumbo-parser/src/tag.in
56
- - gumbo-parser/src/tag_enum.h
57
- - gumbo-parser/src/tag_gperf.h
58
- - gumbo-parser/src/tag_sizes.h
59
- - gumbo-parser/src/tag_strings.h
69
+ - gumbo-parser/src/tag_lookup.c
70
+ - gumbo-parser/src/tag_lookup.h
71
+ - gumbo-parser/src/token_buffer.c
72
+ - gumbo-parser/src/token_buffer.h
60
73
  - gumbo-parser/src/token_type.h
61
74
  - gumbo-parser/src/tokenizer.c
62
75
  - gumbo-parser/src/tokenizer.h
@@ -67,13 +80,20 @@ files:
67
80
  - gumbo-parser/src/util.h
68
81
  - gumbo-parser/src/vector.c
69
82
  - gumbo-parser/src/vector.h
70
- - gumbo-parser/visualc/include/strings.h
71
83
  - lib/nokogumbo.rb
72
- - test-nokogumbo.rb
84
+ - lib/nokogumbo/html5.rb
85
+ - lib/nokogumbo/html5/document.rb
86
+ - lib/nokogumbo/html5/document_fragment.rb
87
+ - lib/nokogumbo/html5/node.rb
88
+ - lib/nokogumbo/version.rb
73
89
  homepage: https://github.com/rubys/nokogumbo/#readme
74
90
  licenses:
75
91
  - Apache-2.0
76
- metadata: {}
92
+ metadata:
93
+ bug_tracker_uri: https://github.com/rubys/nokogumbo/issues
94
+ changelog_uri: https://github.com/rubys/nokogumbo/blob/master/CHANGELOG.md
95
+ homepage_uri: https://github.com/rubys/nokogumbo/#readme
96
+ source_code_uri: https://github.com/rubys/nokogumbo
77
97
  post_install_message:
78
98
  rdoc_options: []
79
99
  require_paths:
@@ -82,15 +102,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
82
102
  requirements:
83
103
  - - ">="
84
104
  - !ruby/object:Gem::Version
85
- version: '0'
105
+ version: '2.1'
86
106
  required_rubygems_version: !ruby/object:Gem::Requirement
87
107
  requirements:
88
108
  - - ">="
89
109
  - !ruby/object:Gem::Version
90
110
  version: '0'
91
111
  requirements: []
92
- rubyforge_project:
93
- rubygems_version: 2.7.4
112
+ rubygems_version: 3.1.4
94
113
  signing_key:
95
114
  specification_version: 4
96
115
  summary: Nokogiri interface to the Gumbo HTML5 parser