RubyGems - nokogumbo - Versions diffs - 1.5.0 → 2.0.5 - Mend

nokogumbo 1.5.0 → 2.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (57) hide show

checksums.yaml +4 -4
data/README.md +237 -26
data/ext/nokogumbo/extconf.rb +144 -0
data/ext/nokogumbo/nokogumbo.c +793 -0
data/gumbo-parser/src/ascii.c +75 -0
data/gumbo-parser/src/ascii.h +115 -0
data/gumbo-parser/src/attribute.c +26 -28
data/gumbo-parser/src/attribute.h +3 -23
data/gumbo-parser/src/char_ref.c +5972 -6816
data/gumbo-parser/src/char_ref.h +14 -45
data/gumbo-parser/src/error.c +510 -163
data/gumbo-parser/src/error.h +70 -147
data/gumbo-parser/src/foreign_attrs.c +104 -0
data/gumbo-parser/src/gumbo.h +577 -305
data/gumbo-parser/src/insertion_mode.h +4 -28
data/gumbo-parser/src/macros.h +91 -0
data/gumbo-parser/src/parser.c +2922 -2228
data/gumbo-parser/src/parser.h +6 -22
data/gumbo-parser/src/replacement.h +33 -0
data/gumbo-parser/src/string_buffer.c +43 -50
data/gumbo-parser/src/string_buffer.h +24 -40
data/gumbo-parser/src/string_piece.c +39 -39
data/gumbo-parser/src/svg_attrs.c +174 -0
data/gumbo-parser/src/svg_tags.c +137 -0
data/gumbo-parser/src/tag.c +186 -59
data/gumbo-parser/src/tag_lookup.c +382 -0
data/gumbo-parser/src/tag_lookup.h +13 -0
data/gumbo-parser/src/token_buffer.c +79 -0
data/gumbo-parser/src/token_buffer.h +71 -0
data/gumbo-parser/src/token_type.h +1 -25
data/gumbo-parser/src/tokenizer.c +2128 -1562
data/gumbo-parser/src/tokenizer.h +41 -52
data/gumbo-parser/src/tokenizer_states.h +281 -45
data/gumbo-parser/src/utf8.c +98 -123
data/gumbo-parser/src/utf8.h +84 -52
data/gumbo-parser/src/util.c +48 -38
data/gumbo-parser/src/util.h +10 -40
data/gumbo-parser/src/vector.c +45 -57
data/gumbo-parser/src/vector.h +17 -39
data/lib/nokogumbo.rb +18 -170
data/lib/nokogumbo/html5.rb +252 -0
data/lib/nokogumbo/html5/document.rb +53 -0
data/lib/nokogumbo/html5/document_fragment.rb +62 -0
data/lib/nokogumbo/html5/node.rb +72 -0
data/lib/nokogumbo/version.rb +3 -0
metadata +40 -21
data/ext/nokogumboc/extconf.rb +0 -60
data/ext/nokogumboc/nokogumbo.c +0 -295
data/gumbo-parser/src/char_ref.rl +0 -2554
data/gumbo-parser/src/string_piece.h +0 -38
data/gumbo-parser/src/tag.in +0 -150
data/gumbo-parser/src/tag_enum.h +0 -153
data/gumbo-parser/src/tag_gperf.h +0 -105
data/gumbo-parser/src/tag_sizes.h +0 -4
data/gumbo-parser/src/tag_strings.h +0 -153
data/gumbo-parser/visualc/include/strings.h +0 -4
data/test-nokogumbo.rb +0 -190

data/lib/nokogumbo/html5.rb ADDED Viewed

@@ -0,0 +1,252 @@
+require 'nokogumbo/html5/document'
+require 'nokogumbo/html5/document_fragment'
+require 'nokogumbo/html5/node'
+module Nokogiri
+  # Parse an HTML 5 document. Convenience method for Nokogiri::HTML5::Document.parse
+  def self.HTML5(string_or_io, url = nil, encoding = nil, **options, &block)
+    Nokogiri::HTML5::Document.parse(string_or_io, url, encoding, **options, &block)
+  end
+  module HTML5
+    # HTML uses the XHTML namespace.
+    HTML_NAMESPACE = 'http://www.w3.org/1999/xhtml'.freeze
+    MATHML_NAMESPACE = 'http://www.w3.org/1998/Math/MathML'.freeze
+    SVG_NAMESPACE = 'http://www.w3.org/2000/svg'.freeze
+    XLINK_NAMESPACE = 'http://www.w3.org/1999/xlink'.freeze
+    XML_NAMESPACE = 'http://www.w3.org/XML/1998/namespace'.freeze
+    XMLNS_NAMESPACE = 'http://www.w3.org/2000/xmlns/'.freeze
+    # Parse an HTML 5 document. Convenience method for Nokogiri::HTML5::Document.parse
+    def self.parse(string, url = nil, encoding = nil, **options, &block)
+      Document.parse(string, url, encoding, **options, &block)
+    end
+    # Parse a fragment from +string+. Convenience method for
+    # Nokogiri::HTML5::DocumentFragment.parse.
+    def self.fragment(string, encoding = nil, **options)
+      DocumentFragment.parse(string, encoding, options)
+    end
+    # Fetch and parse a HTML document from the web, following redirects,
+    # handling https, and determining the character encoding using HTML5
+    # rules.  +uri+ may be a +String+ or a +URI+.  +options+ contains
+    # http headers and special options.  Everything which is not a
+    # special option is considered a header.  Special options include:
+    #  * :follow_limit => number of redirects which are followed
+    #  * :basic_auth => [username, password]
+    def self.get(uri, options={})
+      headers = options.clone
+      headers = {:follow_limit => headers} if Numeric === headers # deprecated
+      limit=headers[:follow_limit] ? headers.delete(:follow_limit).to_i : 10
+      require 'net/http'
+      uri = URI(uri) unless URI === uri
+      http = Net::HTTP.new(uri.host, uri.port)
+      # TLS / SSL support
+      http.use_ssl = true if uri.scheme == 'https'
+      # Pass through Net::HTTP override values, which currently include:
+      #   :ca_file, :ca_path, :cert, :cert_store, :ciphers,
+      #   :close_on_empty_response, :continue_timeout, :key, :open_timeout,
+      #   :read_timeout, :ssl_timeout, :ssl_version, :use_ssl,
+      #   :verify_callback, :verify_depth, :verify_mode
+      options.each do |key, value|
+        http.send "#{key}=", headers.delete(key) if http.respond_to? "#{key}="
+      end
+      request = Net::HTTP::Get.new(uri.request_uri)
+      # basic authentication
+      auth = headers.delete(:basic_auth)
+      auth ||= [uri.user, uri.password] if uri.user && uri.password
+      request.basic_auth auth.first, auth.last if auth
+      # remaining options are treated as headers
+      headers.each {|key, value| request[key.to_s] = value.to_s}
+      response = http.request(request)
+      case response
+      when Net::HTTPSuccess
+        doc = parse(reencode(response.body, response['content-type']), options)
+        doc.instance_variable_set('@response', response)
+        doc.class.send(:attr_reader, :response)
+        doc
+      when Net::HTTPRedirection
+        response.value if limit <= 1
+        location = URI.join(uri, response['location'])
+        get(location, options.merge(:follow_limit => limit-1))
+      else
+        response.value
+      end
+    end
+    private
+    def self.read_and_encode(string, encoding)
+      # Read the string with the given encoding.
+      if string.respond_to?(:read)
+        if encoding.nil?
+          string = string.read
+        else
+          string = string.read(encoding: encoding)
+        end
+      else
+        # Otherwise the string has the given encoding.
+        string = string.to_s
+        if encoding
+          string = string.dup
+          string.force_encoding(encoding)
+        end
+      end
+      # convert to UTF-8
+      if string.encoding != Encoding::UTF_8
+        string = reencode(string)
+      end
+      string
+    end
+    # Charset sniffing is a complex and controversial topic that understandably
+    # isn't done _by default_ by the Ruby Net::HTTP library.  This being said,
+    # it is a very real problem for consumers of HTML as the default for HTML
+    # is iso-8859-1, most "good" producers use utf-8, and the Gumbo parser
+    # *only* supports utf-8.
+    #
+    # Accordingly, Nokogiri::HTML::Document.parse provides limited encoding
+    # detection.  Following this lead, Nokogiri::HTML5 attempts to do likewise,
+    # while attempting to more closely follow the HTML5 standard.
+    #
+    # http://bugs.ruby-lang.org/issues/2567
+    # http://www.w3.org/TR/html5/syntax.html#determining-the-character-encoding
+    #
+    def self.reencode(body, content_type=nil)
+      if body.encoding == Encoding::ASCII_8BIT
+        encoding = nil
+        # look for a Byte Order Mark (BOM)
+        initial_bytes = body[0..2].bytes
+        if initial_bytes[0..2] == [0xEF, 0xBB, 0xBF]
+          encoding = Encoding::UTF_8
+        elsif initial_bytes[0..1] == [0xFE, 0xFF]
+          encoding = Encoding::UTF_16BE
+        elsif initial_bytes[0..1] == [0xFF, 0xFE]
+          encoding = Encoding::UTF_16LE
+        end
+        # look for a charset in a content-encoding header
+        if content_type
+          encoding ||= content_type[/charset=["']?(.*?)($|["';\s])/i, 1]
+        end
+        # look for a charset in a meta tag in the first 1024 bytes
+        if not encoding
+          data = body[0..1023].gsub(/<!--.*?(-->|\Z)/m, '')
+          data.scan(/<meta.*?>/m).each do |meta|
+            encoding ||= meta[/charset=["']?([^>]*?)($|["'\s>])/im, 1]
+          end
+        end
+        # if all else fails, default to the official default encoding for HTML
+        encoding ||= Encoding::ISO_8859_1
+        # change the encoding to match the detected or inferred encoding
+        body = body.dup
+        begin
+          body.force_encoding(encoding)
+        rescue ArgumentError
+          body.force_encoding(Encoding::ISO_8859_1)
+        end
+      end
+      body.encode(Encoding::UTF_8)
+    end
+    def self.serialize_node_internal(current_node, io, encoding, options)
+      case current_node.type
+      when XML::Node::ELEMENT_NODE
+        ns = current_node.namespace
+        ns_uri = ns.nil? ? nil : ns.href
+        # XXX(sfc): attach namespaces to all nodes, even html?
+        if ns_uri.nil? || ns_uri == HTML_NAMESPACE || ns_uri == MATHML_NAMESPACE || ns_uri == SVG_NAMESPACE
+          tagname = current_node.name
+        else
+          tagname = "#{ns.prefix}:#{current_node.name}"
+        end
+        io << '<' << tagname
+        current_node.attribute_nodes.each do |attr|
+          attr_ns = attr.namespace
+          if attr_ns.nil?
+            attr_name = attr.name
+          else
+            ns_uri = attr_ns.href
+            if ns_uri == XML_NAMESPACE
+              attr_name = 'xml:' + attr.name.sub(/^[^:]*:/, '')
+            elsif ns_uri == XMLNS_NAMESPACE && attr.name.sub(/^[^:]*:/, '') == 'xmlns'
+              attr_name = 'xmlns'
+            elsif ns_uri == XMLNS_NAMESPACE
+              attr_name = 'xmlns:' + attr.name.sub(/^[^:]*:/, '')
+            elsif ns_uri == XLINK_NAMESPACE
+              attr_name = 'xlink:' + attr.name.sub(/^[^:]*:/, '')
+            else
+              attr_name = "#{attr_ns.prefix}:#{attr.name}"
+            end
+          end
+          io << ' ' << attr_name << '="' << escape_text(attr.content, encoding, true) << '"'
+        end
+        io << '>'
+        if !%w[area base basefont bgsound br col embed frame hr img input keygen
+               link meta param source track wbr].include?(current_node.name)
+          io << "\n" if options[:preserve_newline] && prepend_newline?(current_node)
+          current_node.children.each do |child|
+            # XXX(sfc): Templates handled specially?
+            serialize_node_internal(child, io, encoding, options)
+          end
+          io << '</' << tagname << '>'
+        end
+      when XML::Node::TEXT_NODE
+        parent = current_node.parent
+        if parent.element? && %w[style script xmp iframe noembed noframes plaintext noscript].include?(parent.name)
+          io << current_node.content
+        else
+          io << escape_text(current_node.content, encoding, false)
+        end
+      when XML::Node::CDATA_SECTION_NODE
+        io << '<![CDATA[' << current_node.content << ']]>'
+      when XML::Node::COMMENT_NODE
+        io << '<!--' << current_node.content << '-->'
+      when XML::Node::PI_NODE
+        io << '<?' << current_node.content << '>'
+      when XML::Node::DOCUMENT_TYPE_NODE, XML::Node::DTD_NODE
+          io << '<!DOCTYPE ' << current_node.name << '>'
+      when XML::Node::HTML_DOCUMENT_NODE, XML::Node::DOCUMENT_FRAG_NODE
+        current_node.children.each do |child|
+          serialize_node_internal(child, io, encoding, options)
+        end
+      else
+        raise "Unexpected node '#{current_node.name}' of type #{current_node.type}"
+      end
+    end
+    def self.escape_text(text, encoding, attribute_mode)
+      if attribute_mode
+        text = text.gsub(/[&\u00a0"]/,
+                           '&' => '&amp;', "\u00a0" => '&nbsp;', '"' => '&quot;')
+      else
+        text = text.gsub(/[&\u00a0<>]/,
+                           '&' => '&amp;', "\u00a0" => '&nbsp;',  '<' => '&lt;', '>' => '&gt;')
+      end
+      # Not part of the standard
+      text.encode(encoding, fallback: lambda { |c| "&\#x#{c.ord.to_s(16)};" })
+    end
+    def self.prepend_newline?(node)
+      return false unless %w[pre textarea listing].include?(node.name) && !node.children.empty?
+      first_child = node.children[0]
+      first_child.text? && first_child.content.start_with?("\n")
+    end
+  end
+end

data/lib/nokogumbo/html5/document.rb ADDED Viewed

@@ -0,0 +1,53 @@
+module Nokogiri
+  module HTML5
+    class Document < Nokogiri::HTML::Document
+      def self.parse(string_or_io, url = nil, encoding = nil, **options, &block)
+        yield options if block_given?
+	string_or_io = '' unless string_or_io
+        if string_or_io.respond_to?(:encoding) && string_or_io.encoding.name != 'ASCII-8BIT'
+          encoding ||= string_or_io.encoding.name
+        end
+        if string_or_io.respond_to?(:read) && string_or_io.respond_to?(:path)
+          url ||= string_or_io.path
+        end
+        unless string_or_io.respond_to?(:read) || string_or_io.respond_to?(:to_str)
+          raise ArgumentError.new("not a string or IO object")
+        end
+        do_parse(string_or_io, url, encoding, options)
+      end
+      def self.read_io(io, url = nil, encoding = nil, **options)
+        raise ArgumentError.new("io object doesn't respond to :read") unless io.respond_to?(:read)
+        do_parse(io, url, encoding, options)
+      end
+      def self.read_memory(string, url = nil, encoding = nil, **options)
+        raise ArgumentError.new("string object doesn't respond to :to_str") unless string.respond_to?(:to_str)
+        do_parse(string, url, encoding, options)
+      end
+      def fragment(tags = nil)
+        DocumentFragment.new(self, tags, self.root)
+      end
+      def to_xml(options = {}, &block)
+        # Bypass XML::Document#to_xml which doesn't add
+        # XML::Node::SaveOptions::AS_XML like XML::Node#to_xml does.
+        XML::Node.instance_method(:to_xml).bind(self).call(options, &block)
+      end
+      private
+      def self.do_parse(string_or_io, url, encoding, options)
+        string = HTML5.read_and_encode(string_or_io, encoding)
+        max_attributes = options[:max_attributes] || Nokogumbo::DEFAULT_MAX_ATTRIBUTES
+        max_errors = options[:max_errors] || options[:max_parse_errors] || Nokogumbo::DEFAULT_MAX_ERRORS
+        max_depth = options[:max_tree_depth] || Nokogumbo::DEFAULT_MAX_TREE_DEPTH
+        doc = Nokogumbo.parse(string, url, max_attributes, max_errors, max_depth)
+        doc.encoding = 'UTF-8'
+        doc
+      end
+    end
+  end
+end

data/lib/nokogumbo/html5/document_fragment.rb ADDED Viewed

@@ -0,0 +1,62 @@
+require 'nokogiri'
+module Nokogiri
+  module HTML5
+    class DocumentFragment < Nokogiri::HTML::DocumentFragment
+      attr_accessor :document
+      attr_accessor :errors
+      # Create a document fragment.
+      def initialize(doc, tags = nil, ctx = nil, options = {})
+        self.document = doc
+        self.errors = []
+        return self unless tags
+        max_attributes = options[:max_attributes] || Nokogumbo::DEFAULT_MAX_ATTRIBUTES
+        max_errors = options[:max_errors] || Nokogumbo::DEFAULT_MAX_ERRORS
+        max_depth = options[:max_tree_depth] || Nokogumbo::DEFAULT_MAX_TREE_DEPTH
+        tags = Nokogiri::HTML5.read_and_encode(tags, nil)
+        Nokogumbo.fragment(self, tags, ctx, max_attributes, max_errors, max_depth)
+      end
+      def serialize(options = {}, &block)
+        # Bypass XML::Document.serialize which doesn't support options even
+        # though XML::Node.serialize does!
+        XML::Node.instance_method(:serialize).bind(self).call(options, &block)
+      end
+      # Parse a document fragment from +tags+, returning a Nodeset.
+      def self.parse(tags, encoding = nil, options = {})
+        doc = HTML5::Document.new
+        tags = HTML5.read_and_encode(tags, encoding)
+        doc.encoding = 'UTF-8'
+        new(doc, tags, nil, options)
+      end
+      def extract_params params # :nodoc:
+        handler = params.find do |param|
+          ![Hash, String, Symbol].include?(param.class)
+        end
+        params -= [handler] if handler
+        hashes = []
+        while Hash === params.last || params.last.nil?
+          hashes << params.pop
+          break if params.empty?
+        end
+        ns, binds = hashes.reverse
+        ns ||=
+          begin
+            ns = Hash.new
+            children.each { |child| ns.merge!(child.namespaces) }
+            ns
+          end
+        [params, handler, ns, binds]
+      end
+    end
+  end
+end
+# vim: set shiftwidth=2 softtabstop=2 tabstop=8 expandtab:

data/lib/nokogumbo/html5/node.rb ADDED Viewed

@@ -0,0 +1,72 @@
+require 'nokogiri'
+module Nokogiri
+  module HTML5
+    module Node
+      # HTML elements can have attributes that contain colons.
+      # Nokogiri::XML::Node#[]= treats names with colons as a prefixed QName
+      # and tries to create an attribute in a namespace. This is especially
+      # annoying with attribute names like xml:lang since libxml2 will
+      # actually create the xml namespace if it doesn't exist already.
+      def add_child_node_and_reparent_attrs(node)
+        return super(node) unless document.is_a?(HTML5::Document)
+        # I'm not sure what this method is supposed to do. Reparenting
+        # namespaces is handled by libxml2, including child namespaces which
+        # this method wouldn't handle.
+        # https://github.com/sparklemotion/nokogiri/issues/1790
+        add_child_node(node)
+        #node.attribute_nodes.find_all { |a| a.namespace }.each do |attr|
+        #  attr.remove
+        #  ns = attr.namespace
+        #  a["#{ns.prefix}:#{attr.name}"] = attr.value
+        #end
+      end
+      def inner_html(options = {})
+        return super(options) unless document.is_a?(HTML5::Document)
+        result = options[:preserve_newline] && HTML5.prepend_newline?(self) ? "\n" : ""
+        result << children.map { |child| child.to_html(options) }.join
+        result
+      end
+      def write_to(io, *options)
+        return super(io, *options) unless document.is_a?(HTML5::Document)
+        options = options.first.is_a?(Hash) ? options.shift : {}
+        encoding = options[:encoding] || options[0]
+        if Nokogiri.jruby?
+          save_options = options[:save_with] || options[1]
+          indent_times = options[:indent] || 0
+        else
+          save_options = options[:save_with] || options[1] || XML::Node::SaveOptions::FORMAT
+          indent_times = options[:indent] || 2
+        end
+        indent_string = (options[:indent_text] || ' ') * indent_times
+        config = XML::Node::SaveOptions.new(save_options.to_i)
+        yield config if block_given?
+        config_options = config.options
+        if (config_options & (XML::Node::SaveOptions::AS_XML | XML::Node::SaveOptions::AS_XHTML) != 0)
+          # Use Nokogiri's serializing code.
+          native_write_to(io, encoding, indent_string, config_options)
+        else
+          # Serialize including the current node.
+          encoding ||= document.encoding || Encoding::UTF_8
+          internal_ops = {
+            preserve_newline: options[:preserve_newline] || false
+          }
+          HTML5.serialize_node_internal(self, io, encoding, internal_ops)
+        end
+      end
+      def fragment(tags)
+        return super(tags) unless document.is_a?(HTML5::Document)
+        DocumentFragment.new(document, tags, self)
+      end
+    end
+    # Monkey patch
+    XML::Node.prepend(HTML5::Node)
+  end
+end
+# vim: set shiftwidth=2 softtabstop=2 tabstop=8 expandtab:

data/lib/nokogumbo/version.rb ADDED Viewed

@@ -0,0 +1,3 @@
+module Nokogumbo
+  VERSION = "2.0.5"
+end

metadata CHANGED Viewed

@@ -1,62 +1,75 @@
 --- !ruby/object:Gem::Specification
 name: nokogumbo
 version: !ruby/object:Gem::Version
-  version: 1.5.0
+  version: 2.0.5
 platform: ruby
 authors:
 - Sam Ruby
+- Stephen Checkoway
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2018-01-27 00:00:00.000000000 Z
+date: 2021-03-19 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: nokogiri
   requirement: !ruby/object:Gem::Requirement
     requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '1.8'
     - - ">="
       - !ruby/object:Gem::Version
-        version: '0'
+        version: 1.8.4
   type: :runtime
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '1.8'
     - - ">="
       - !ruby/object:Gem::Version
-        version: '0'
+        version: 1.8.4
 description: Nokogumbo allows a Ruby program to invoke the Gumbo HTML5 parser and
   access the result as a Nokogiri parsed document.
-email: rubys@intertwingly.net
+email:
+- rubys@intertwingly.net
+- s@pahtak.org
 executables: []
 extensions:
-- ext/nokogumboc/extconf.rb
+- ext/nokogumbo/extconf.rb
 extra_rdoc_files: []
 files:
 - LICENSE.txt
 - README.md
-- ext/nokogumboc/extconf.rb
-- ext/nokogumboc/nokogumbo.c
+- ext/nokogumbo/extconf.rb
+- ext/nokogumbo/nokogumbo.c
+- gumbo-parser/src/ascii.c
+- gumbo-parser/src/ascii.h
 - gumbo-parser/src/attribute.c
 - gumbo-parser/src/attribute.h
 - gumbo-parser/src/char_ref.c
 - gumbo-parser/src/char_ref.h
-- gumbo-parser/src/char_ref.rl
 - gumbo-parser/src/error.c
 - gumbo-parser/src/error.h
+- gumbo-parser/src/foreign_attrs.c
 - gumbo-parser/src/gumbo.h
 - gumbo-parser/src/insertion_mode.h
+- gumbo-parser/src/macros.h
 - gumbo-parser/src/parser.c
 - gumbo-parser/src/parser.h
+- gumbo-parser/src/replacement.h
 - gumbo-parser/src/string_buffer.c
 - gumbo-parser/src/string_buffer.h
 - gumbo-parser/src/string_piece.c
-- gumbo-parser/src/string_piece.h
+- gumbo-parser/src/svg_attrs.c
+- gumbo-parser/src/svg_tags.c
 - gumbo-parser/src/tag.c
-- gumbo-parser/src/tag.in
-- gumbo-parser/src/tag_enum.h
-- gumbo-parser/src/tag_gperf.h
-- gumbo-parser/src/tag_sizes.h
-- gumbo-parser/src/tag_strings.h
+- gumbo-parser/src/tag_lookup.c
+- gumbo-parser/src/tag_lookup.h
+- gumbo-parser/src/token_buffer.c
+- gumbo-parser/src/token_buffer.h
 - gumbo-parser/src/token_type.h
 - gumbo-parser/src/tokenizer.c
 - gumbo-parser/src/tokenizer.h
@@ -67,13 +80,20 @@ files:
 - gumbo-parser/src/util.h
 - gumbo-parser/src/vector.c
 - gumbo-parser/src/vector.h
-- gumbo-parser/visualc/include/strings.h
 - lib/nokogumbo.rb
-- test-nokogumbo.rb
+- lib/nokogumbo/html5.rb
+- lib/nokogumbo/html5/document.rb
+- lib/nokogumbo/html5/document_fragment.rb
+- lib/nokogumbo/html5/node.rb
+- lib/nokogumbo/version.rb
 homepage: https://github.com/rubys/nokogumbo/#readme
 licenses:
 - Apache-2.0
-metadata: {}
+metadata:
+  bug_tracker_uri: https://github.com/rubys/nokogumbo/issues
+  changelog_uri: https://github.com/rubys/nokogumbo/blob/master/CHANGELOG.md
+  homepage_uri: https://github.com/rubys/nokogumbo/#readme
+  source_code_uri: https://github.com/rubys/nokogumbo
 post_install_message:
 rdoc_options: []
 require_paths:
@@ -82,15 +102,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
   requirements:
   - - ">="
     - !ruby/object:Gem::Version
-      version: '0'
+      version: '2.1'
 required_rubygems_version: !ruby/object:Gem::Requirement
   requirements:
   - - ">="
     - !ruby/object:Gem::Version
       version: '0'
 requirements: []
-rubyforge_project:
-rubygems_version: 2.7.4
+rubygems_version: 3.1.4
 signing_key:
 specification_version: 4
 summary: Nokogiri interface to the Gumbo HTML5 parser