RubyGems - nokogumbo - Versions diffs - 1.5.0 → 2.0.3 - Mend

nokogumbo 1.5.0 → 2.0.3

Files changed (57) hide show

checksums.yaml +4 -4
data/README.md +237 -26
data/ext/nokogumbo/extconf.rb +121 -0
data/ext/nokogumbo/nokogumbo.c +793 -0
data/gumbo-parser/src/ascii.c +75 -0
data/gumbo-parser/src/ascii.h +115 -0
data/gumbo-parser/src/attribute.c +26 -28
data/gumbo-parser/src/attribute.h +3 -23
data/gumbo-parser/src/char_ref.c +5972 -6816
data/gumbo-parser/src/char_ref.h +14 -45
data/gumbo-parser/src/error.c +510 -163
data/gumbo-parser/src/error.h +70 -147
data/gumbo-parser/src/foreign_attrs.c +104 -0
data/gumbo-parser/src/gumbo.h +577 -305
data/gumbo-parser/src/insertion_mode.h +4 -28
data/gumbo-parser/src/macros.h +91 -0
data/gumbo-parser/src/parser.c +2922 -2228
data/gumbo-parser/src/parser.h +6 -22
data/gumbo-parser/src/replacement.h +33 -0
data/gumbo-parser/src/string_buffer.c +43 -50
data/gumbo-parser/src/string_buffer.h +24 -40
data/gumbo-parser/src/string_piece.c +39 -39
data/gumbo-parser/src/svg_attrs.c +174 -0
data/gumbo-parser/src/svg_tags.c +137 -0
data/gumbo-parser/src/tag.c +186 -59
data/gumbo-parser/src/tag_lookup.c +382 -0
data/gumbo-parser/src/tag_lookup.h +13 -0
data/gumbo-parser/src/token_buffer.c +79 -0
data/gumbo-parser/src/token_buffer.h +71 -0
data/gumbo-parser/src/token_type.h +1 -25
data/gumbo-parser/src/tokenizer.c +2127 -1561
data/gumbo-parser/src/tokenizer.h +41 -52
data/gumbo-parser/src/tokenizer_states.h +281 -45
data/gumbo-parser/src/utf8.c +98 -123
data/gumbo-parser/src/utf8.h +84 -52
data/gumbo-parser/src/util.c +48 -38
data/gumbo-parser/src/util.h +10 -40
data/gumbo-parser/src/vector.c +45 -57
data/gumbo-parser/src/vector.h +17 -39
data/lib/nokogumbo.rb +11 -173
data/lib/nokogumbo/html5.rb +252 -0
data/lib/nokogumbo/html5/document.rb +53 -0
data/lib/nokogumbo/html5/document_fragment.rb +62 -0
data/lib/nokogumbo/html5/node.rb +72 -0
data/lib/nokogumbo/version.rb +3 -0
metadata +43 -24
data/ext/nokogumboc/extconf.rb +0 -60
data/ext/nokogumboc/nokogumbo.c +0 -295
data/gumbo-parser/src/char_ref.rl +0 -2554
data/gumbo-parser/src/string_piece.h +0 -38
data/gumbo-parser/src/tag.in +0 -150
data/gumbo-parser/src/tag_enum.h +0 -153
data/gumbo-parser/src/tag_gperf.h +0 -105
data/gumbo-parser/src/tag_sizes.h +0 -4
data/gumbo-parser/src/tag_strings.h +0 -153
data/gumbo-parser/visualc/include/strings.h +0 -4
data/test-nokogumbo.rb +0 -190

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 96fa61565f78d5491e0b6d5b505cf936524745eb848b8b6584fc15e20c7ae35b
-  data.tar.gz: e5416f71bbe90323f04b8aad4dc48b28947e43a9eb46f446f8ca1444f519a07b
+  metadata.gz: e4694cf3eefbeee2a55cd4bb355b7ec6159c64eac4454dff02b1fbf7e5e8375a
+  data.tar.gz: 67832a7c26148f59755360758fcc0b0c1969949bf1e5a1b27f5cabe4b9e8b40b
 SHA512:
-  metadata.gz: 676bf3585d38cd4ad5c72b8b3afd4952e248c747683ae1072dd43f6ce1ccd279177e4d0c75a9821ed76d32806333128152231349d8d113ae5d81279580b13004
-  data.tar.gz: 3459078d96977399e75551c4a3ee5623091f48569984b771e540ec111125f5af91e39a8d78cbd3ce9280326b1b9395dc4a0b0d7f0a72294876682cb9fe35e3d9
+  metadata.gz: 3a415817caaf0c3c03037664bda8ed8aa17cc14419e75672dcaa2e2a7dd6d9a20e6ab59095a2295f90da5e45de2c3d72f9a25557533836d55dc67966fe8c7a14
+  data.tar.gz: 8dc8f9f2d55936a63097301dc5eb6fb54ed1e4c274b03cdcd6f45e2b4ac2cdc911a54e8e5838ce468820ebade731a62f7cfc167817528fd0adb415087ce924b6

data/README.md CHANGED

@@ -1,23 +1,21 @@
-Nokogumbo - a Nokogiri interface to the Gumbo HTML5 parser.
-===========
+# Nokogumbo - a Nokogiri interface to the Gumbo HTML5 parser.
-Nokogumbo provides the ability for a Ruby program to invoke the
-[Gumbo HTML5 parser](https://github.com/google/gumbo-parser#readme)
+Nokogumbo provides the ability for a Ruby program to invoke
+[our version of the  Gumbo HTML5 parser](https://github.com/rubys/nokogumbo/tree/master/gumbo-parser/src)
 and to access the result as a
 [Nokogiri::HTML::Document](http://rdoc.info/github/sparklemotion/nokogiri/Nokogiri/HTML/Document).
-[![Build Status](https://travis-ci.org/rubys/nokogumbo.svg)](https://travis-ci.org/rubys/nokogumbo)
+[![Travis-CI Build Status](https://travis-ci.org/rubys/nokogumbo.svg)](https://travis-ci.org/rubys/nokogumbo)
+[![Appveyor Build Status](https://ci.appveyor.com/api/projects/status/github/rubys/nokogumbo)](https://ci.appveyor.com/project/rubys/nokogumbo/branch/master)
-Usage
------
+## Usage
 ```ruby
 require 'nokogumbo'
-doc = Nokogiri::HTML5(string)
+doc = Nokogiri.HTML5(string)
 ```
-An experimental _fragment_ method is also provided.  While not HTML5
-compliant, it may be useful:
+To parse an HTML fragment, a `fragment` method is provided.
 ```ruby
 require 'nokogumbo'
@@ -32,21 +30,207 @@ require 'nokogumbo'
 doc = Nokogiri::HTML5.get(uri)
 ```
-Example
------
+## Parsing options
+The document and fragment parsing methods,
+- `Nokogiri.HTML5(html, url = nil, encoding = nil, options = {})`
+- `Nokogiri::HTML5.parse(html, url = nil, encoding = nil, options = {})`
+- `Nokogiri::HTML5::Document.parse(html, url = nil, encoding = nil, options = {})`
+- `Nokogiri::HTML5.fragment(html, encoding = nil, options = {})`
+- `Nokogiri::HTML5::DocumentFragment.parse(html, encoding = nil, options = {})`
+support options that are different from Nokogiri's.
+The two currently supported options are `:max_errors` and `:max_tree_depth`,
+described below.
+### Error reporting
+Nokogumbo contains an experimental parse error reporting facility. By default,
+no parse errors are reported but this can be configured by passing the
+`:max_errors` option to `::parse` or `::fragment`.
 ```ruby
 require 'nokogumbo'
-puts Nokogiri::HTML5.get('http://nokogiri.org').search('ol li')[2].text
+doc = Nokogiri::HTML5.parse('<span/>Hi there!</span foo=bar />', max_errors: 10)
+doc.errors.each do |err|
+  puts(err)
+end
 ```
-Use `.to_html` instead of `.to_s` when parsing and serializing multiple times
+This prints the following.
 ```
+1:1: ERROR: Expected a doctype token
+<span/>Hi there!</span foo=bar />
+^
+1:1: ERROR: Start tag of nonvoid HTML element ends with '/>', use '>'.
+<span/>Hi there!</span foo=bar />
+^
+1:17: ERROR: End tag ends with '/>', use '>'.
+<span/>Hi there!</span foo=bar />
+                ^
+1:17: ERROR: End tag contains attributes.
+<span/>Hi there!</span foo=bar />
+                ^
+```
+Using `max_errors: -1` results in an unlimited number of errors being
+returned.
+The errors returned by `#errors` are instances of
+[`Nokogiri::XML::SyntaxError`](https://www.rubydoc.info/github/sparklemotion/nokogiri/Nokogiri/XML/SyntaxError).
+The [HTML
+standard](https://html.spec.whatwg.org/multipage/parsing.html#parse-errors)
+defines a number of standard parse error codes. These error codes only cover
+the "tokenization" stage of parsing HTML. The parse errors in the
+"tree construction" stage do not have standardized error codes (yet).
+As a convenience to Nokogumbo users, the defined error codes are available
+via the
+[`Nokogiri::XML::SyntaxError#str1`](https://www.rubydoc.info/github/sparklemotion/nokogiri/Nokogiri/XML/SyntaxError#str1-instance_method)
+method.
+```ruby
 require 'nokogumbo'
-Nokogiri::HTML5.parse(Nokogiri::HTML5.parse('<div></div> a').to_html).to_html
+doc = Nokogiri::HTML5.parse('<span/>Hi there!</span foo=bar />', max_errors: 10)
+doc.errors.each do |err|
+  puts("#{err.line}:#{err.column}: #{err.str1}")
+end
+```
+This prints the following.
+```
+1:1: generic-parser
+1:1: non-void-html-element-start-tag-with-trailing-solidus
+1:17: end-tag-with-trailing-solidus
+1:17: end-tag-with-attributes
+```
+Note that the first error is `generic-parser` because it's an error from the
+tree construction stage and doesn't have a standardized error code.
+For the purposes of semantic versioning, the error messages, error locations,
+and error codes are not part of Nokogumbo's public API. That is, these are
+subject to change without Nokogumbo's major version number changing. These may
+be stabilized in the future.
+### Maximum tree depth
+The maximum depth of the DOM tree parsed by the various parsing methods is
+configurable by the `:max_tree_depth` option. If the depth of the tree would
+exceed this limit, then an
+[ArgumentError](https://ruby-doc.org/core-2.5.0/ArgumentError.html) is thrown.
+This limit (which defaults to `Nokogumbo::DEFAULT_MAX_TREE_DEPTH = 400`) can
+be removed by giving the option `max_tree_depth: -1`.
+``` ruby
+html = '<!DOCTYPE html>' + '<div>' * 1000
+doc = Nokogiri.HTML5(html)
+# raises ArgumentError: Document tree depth limit exceeded
+doc = Nokogiri.HTML5(html, max_tree_depth: -1)
 ```
-Notes
------
+### Attribute limit per element
+The maximum number of attributes per DOM element is configurable by the
+`:max_attributes` option. If a given element would exceed this limit, then an
+[ArgumentError](https://ruby-doc.org/core-2.5.0/ArgumentError.html) is thrown.
+This limit (which defaults to `Nokogumbo::DEFAULT_MAX_ATTRIBUTES = 400`) can
+be removed by giving the option `max_attributes: -1`.
+``` ruby
+html = '<!DOCTYPE html><div ' + (1..1000).map { |x| "attr-#{x}" }.join(' ') + '>'
+# "<!DOCTYPE html><div attr-1 attr-2 attr-3 ... attr-1000>"
+doc = Nokogiri.HTML5(html)
+# raises ArgumentError: Attributes per element limit exceeded
+doc = Nokogiri.HTML5(html, max_attributes: -1)
+```
+## HTML Serialization
+After parsing HTML, it may be serialized using any of the Nokogiri
+[serialization
+methods](https://www.rubydoc.info/gems/nokogiri/Nokogiri/XML/Node). In
+particular, `#serialize`, `#to_html`, and `#to_s` will serialize a given node
+and its children. (This is the equivalent of JavaScript's
+`Element.outerHTML`.) Similarly, `#inner_html` will serialize the children of
+a given node. (This is the equivalent of JavaScript's `Element.innerHTML`.)
+``` ruby
+doc = Nokogiri::HTML5("<!DOCTYPE html><span>Hello world!</span>")
+puts doc.serialize
+# Prints: <!DOCTYPE html><html><head></head><body><span>Hello world!</span></body></html>
+```
+Due to quirks in how HTML is parsed and serialized, it's possible for a DOM
+tree to be serialized and then re-parsed, resulting in a different DOM.
+Mostly, this happens with DOMs produced from invalid HTML. Unfortunately, even
+valid HTML may not survive serialization and re-parsing.
+In particular, a newline at the start of `pre`, `listing`, and `textarea`
+elements is ignored by the parser.
+``` ruby
+doc = Nokogiri::HTML5(<<-EOF)
+<!DOCTYPE html>
+<pre>
+Content</pre>
+EOF
+puts doc.at('/html/body/pre').serialize
+# Prints: <pre>Content</pre>
+```
+In this case, the original HTML is semantically equivalent to the serialized
+version. If the `pre`, `listing`, or `textarea` content starts with two
+newlines, the first newline will be stripped on the first parse and the second
+newline will be stripped on the second, leading to semantically different
+DOMs. Passing the parameter `preserve_newline: true` will cause two or more
+newlines to be preserved. (A single leading newline will still be removed.)
+``` ruby
+doc = Nokogiri::HTML5(<<-EOF)
+<!DOCTYPE html>
+<listing>
+Content</listing>
+EOF
+puts doc.at('/html/body/listing').serialize(preserve_newline: true)
+# Prints: <listing>
+#
+# Content</listing>
+```
+## Encodings
+Nokogumbo always parses HTML using
+[UTF-8](https://en.wikipedia.org/wiki/UTF-8); however, the encoding of the
+input can be explicitly selected via the optional `encoding` parameter. This
+is most useful when the input comes not from a string but from an IO object.
+When serializing a document or node, the encoding of the output string can be
+specified via the `:encoding` options. Characters that cannot be encoded in
+the selected encoding will be encoded as [HTML numeric
+entities](https://en.wikipedia.org/wiki/List_of_XML_and_HTML_character_entity_references).
+``` ruby
+frag = Nokogiri::HTML5.fragment('<span>아는 길도 물어가라</span>')
+html = frag.serialize(encoding: 'US-ASCII')
+puts html
+# Prints: <span>&#xc544;&#xb294; &#xae38;&#xb3c4; &#xbb3c;&#xc5b4;&#xac00;&#xb77c;</span>
+frag = Nokogiri::HTML5.fragment(html)
+puts frag.serialize
+# Prints: <span>아는 길도 물어가라</span>
+```
+(There's a [bug](https://bugs.ruby-lang.org/issues/15033) in all current
+versions of Ruby that can cause the entity encoding to fail. Of the mandated
+supported encodings for HTML, the only encoding I'm aware of that has this bug
+is `'ISO-2022-JP'`. I recommend avoiding this encoding.)
+## Examples
+```ruby
+require 'nokogumbo'
+puts Nokogiri::HTML5.get('http://nokogiri.org').search('ol li')[2].text
+```
+## Notes
 * The `Nokogiri::HTML5.fragment` function takes a string and parses it
   as a HTML5 document.  The `<html>`, `<head>`, and `<body>` elements are
@@ -74,20 +258,47 @@ rules defined in the HTML5 specification for doing so.
 * Instead of returning `unknown` as the element name for unknown tags, the
 original tag name is returned verbatim.
-* If the Gumbo HTML5 parser is not already installed, the source for the
-parser will be downloaded and compiled into the Gem itself.
+# Flavors of Nokogumbo
+Nokogumbo uses libxml2, the XML library underlying Nokogiri, to speed up
+parsing. If the libxml2 headers are not available, then Nokogumbo resorts to
+using Nokogiri's Ruby API to construct the DOM tree.
+Nokogiri can be configured to either use the system library version of libxml2
+or use a bundled version. By default (as of Nokogiri version 1.8.4), Nokogiri
+will use a bundled version.
+To prevent differences between versions of libxml2, Nokogumbo will only use
+libxml2 if the build process can find the exact same version used by Nokogiri.
+This leads to three possibilities
+1. Nokogiri is compiled with the bundled libxml2. In this case, Nokogumbo will
+   (by default) use the same version of libxml2.
+2. Nokogiri is compiled with the system libxml2. In this case, if the libxml2
+   headers are available, then Nokogumbo will (by default) use the system
+   version and headers.
+3. Nokogiri is compiled with the system libxml2 but its headers aren't
+   available at build time for Nokogumbo. In this case, Nokogumbo will use the
+   slower Ruby API.
+Using libxml2 can be required by passing `-- --with-libxml2` to `bundle exec
+rake` or to `gem install`. Using libxml2 can be prohibited by instead passing
+`-- --without-libxml2`.
+Functionally, the only difference between using libxml2 or not is in the
+behavior of `Nokogiri::XML::Node#line`. If it is used, then `#line` will
+return the line number of the corresponding node. Otherwise, it will return 0.
-Installation
-============
+# Installation
-    git clone --recursive https://github.com/rubys/nokogumbo.git
+    git clone https://github.com/rubys/nokogumbo.git
     cd nokogumbo
     bundle install
     rake gem
     gem install pkg/nokogumbo*.gem
-Related efforts
-============
+# Related efforts
-* [ruby-gumbo](https://github.com/nevir/ruby-gumbo#readme) - a ruby binding
-for the Gumbo HTML5 parser.
+* [ruby-gumbo](https://github.com/nevir/ruby-gumbo#readme) -- a ruby binding
+  for the Gumbo HTML5 parser.
+* [lua-gumbo](https://gitlab.com/craigbarnes/lua-gumbo) -- a lua binding for
+  the Gumbo HTML5 parser.

data/ext/nokogumbo/extconf.rb ADDED

@@ -0,0 +1,121 @@
+require 'rubygems'
+require 'fileutils'
+require 'mkmf'
+require 'nokogiri'
+$CFLAGS += " -std=c99"
+$LDFLAGS.gsub!('-Wl,--no-undefined', '')
+$warnflags = CONFIG['warnflags'] = '-Wall'
+NG_SPEC = Gem::Specification.find_by_name('nokogiri', "= #{Nokogiri::VERSION}")
+def download_headers
+  begin
+    require 'yaml'
+    dependencies = YAML.load_file(File.join(NG_SPEC.gem_dir, 'dependencies.yml'))
+    version = dependencies['libxml2']['version']
+    host = RbConfig::CONFIG["host_alias"].empty? ? RbConfig::CONFIG["host"] : RbConfig::CONFIG["host_alias"]
+    path = File.join('ports', host, 'libxml2', version, 'include/libxml2')
+    return path if File.directory?(path)
+    # Make sure we're using the same version Nokogiri uses
+    dep_index = NG_SPEC.dependencies.index { |dep| dep.name == 'mini_portile2' and dep.type == :runtime }
+    return nil if dep_index.nil?
+    requirement = NG_SPEC.dependencies[dep_index].requirement.to_s
+    gem 'mini_portile2', requirement
+    require 'mini_portile2'
+    p = MiniPortile::new('libxml2', version).tap do |r|
+      r.host = RbConfig::CONFIG["host_alias"].empty? ? RbConfig::CONFIG["host"] : RbConfig::CONFIG["host_alias"]
+      r.files = [{
+        url: "http://xmlsoft.org/sources/libxml2-#{r.version}.tar.gz",
+        sha256: dependencies['libxml2']['sha256']
+      }]
+      r.configure_options += [
+        "--without-python",
+        "--without-readline",
+        "--with-c14n",
+        "--with-debug",
+        "--with-threads"
+      ]
+    end
+    p.download unless p.downloaded?
+    p.extract
+    p.configure unless p.configured?
+    system('make', '-C', "tmp/#{p.host}/ports/libxml2/#{version}/libxml2-#{version}/include/libxml", 'install-xmlincHEADERS')
+    path
+  rescue
+    puts 'failed to download/install headers'
+    nil
+  end
+end
+required = arg_config('--with-libxml2')
+prohibited = arg_config('--without-libxml2')
+if required and prohibited
+  abort "cannot use both --with-libxml2 and --without-libxml2"
+end
+have_libxml2 = false
+have_ng = false
+if !prohibited
+  if Nokogiri::VERSION_INFO.include?('libxml') and
+     Nokogiri::VERSION_INFO['libxml']['source'] == 'packaged'
+    # Nokogiri has libxml2 built in. Find the headers.
+    libxml2_path = File.join(Nokogiri::VERSION_INFO['libxml']['libxml2_path'],
+                             'include/libxml2')
+    if find_header('libxml/tree.h', libxml2_path)
+      have_libxml2 = true
+    else
+      # Unfortunately, some versions of Nokogiri delete these files.
+      # https://github.com/sparklemotion/nokogiri/pull/1788
+      # Try to download them
+      libxml2_path = download_headers
+      unless libxml2_path.nil?
+        have_libxml2 = find_header('libxml/tree.h', libxml2_path)
+      end
+    end
+  else
+    # Nokogiri is compiled with system headers.
+    # Hack to work around broken mkmf on macOS
+    # (https://bugs.ruby-lang.org/issues/14992 fixed now)
+    if RbConfig::MAKEFILE_CONFIG['LIBPATHENV'] == 'DYLD_LIBRARY_PATH'
+      RbConfig::MAKEFILE_CONFIG['LIBPATHENV'] = 'DYLD_FALLBACK_LIBRARY_PATH'
+    end
+    pkg_config('libxml-2.0')
+    have_libxml2 = have_library('xml2', 'xmlNewDoc')
+  end
+  if required and !have_libxml2
+    abort "libxml2 required but could not be located"
+  end
+  if have_libxml2
+    # Find nokogiri.h
+    have_ng = find_header('nokogiri.h', File.join(NG_SPEC.gem_dir, 'ext/nokogiri'))
+  end
+end
+if have_libxml2 and have_ng
+  $CFLAGS += " -DNGLIB=1"
+end
+# Symlink gumbo-parser source files.
+ext_dir = File.dirname(__FILE__)
+gumbo_src = File.join(ext_dir, 'gumbo_src')
+Dir.chdir(ext_dir) do
+  $srcs = Dir['*.c', '../../gumbo-parser/src/*.c']
+  $hdrs = Dir['*.h', '../../gumbo-parser/src/*.h']
+end
+$INCFLAGS << ' -I$(srcdir)/../../gumbo-parser/src'
+$VPATH << '$(srcdir)/../../gumbo-parser/src'
+create_makefile('nokogumbo/nokogumbo') do |conf|
+  conf.map! do |chunk|
+    chunk.gsub(/^HDRS = .*$/, "HDRS = #{$hdrs.map { |h| File.join('$(srcdir)', h)}.join(' ')}")
+  end
+end
+# vim: set sw=2 sts=2 ts=8 et:

data/ext/nokogumbo/nokogumbo.c ADDED

@@ -0,0 +1,793 @@
+//
+// nokogumbo.c defines the following:
+//
+//   class Nokogumbo
+//     def parse(utf8_string) # returns Nokogiri::HTML5::Document
+//   end
+//
+// Processing starts by calling gumbo_parse_with_options.  The resulting
+// document tree is then walked:
+//
+//  * if Nokogiri and libxml2 headers are available at compile time,
+//    (if NGLIB) then a parallel libxml2 tree is constructed, and the
+//    final document is then wrapped using Nokogiri_wrap_xml_document.
+//    This approach reduces memory and CPU requirements as Ruby objects
+//    are only built when necessary.
+//
+//  * if the necessary headers are not available at compile time, Nokogiri
+//    methods are called instead, producing the equivalent functionality.
+//
+#include <assert.h>
+#include <ruby.h>
+#include <ruby/version.h>
+#include "gumbo.h"
+// class constants
+static VALUE Document;
+// Interned symbols
+static ID internal_subset;
+static ID parent;
+/* Backwards compatibility to Ruby 2.1.0 */
+#if RUBY_API_VERSION_CODE < 20200
+#define ONIG_ESCAPE_UCHAR_COLLISION 1
+#include <ruby/encoding.h>
+static VALUE rb_utf8_str_new(const char *str, long length) {
+  return rb_enc_str_new(str, length, rb_utf8_encoding());
+}
+static VALUE rb_utf8_str_new_cstr(const char *str) {
+  return rb_enc_str_new_cstr(str, rb_utf8_encoding());
+}
+static VALUE rb_utf8_str_new_static(const char *str, long length) {
+  return rb_enc_str_new(str, length, rb_utf8_encoding());
+}
+#endif
+#if NGLIB
+#include <nokogiri.h>
+#include <libxml/tree.h>
+#include <libxml/HTMLtree.h>
+#define NIL NULL
+#else
+#define NIL Qnil
+// These are defined by nokogiri.h
+static VALUE cNokogiriXmlSyntaxError;
+static VALUE cNokogiriXmlElement;
+static VALUE cNokogiriXmlText;
+static VALUE cNokogiriXmlCData;
+static VALUE cNokogiriXmlComment;
+// Interned symbols.
+static ID new;
+static ID node_name_;
+// Map libxml2 types to Ruby VALUE.
+typedef VALUE xmlNodePtr;
+typedef VALUE xmlDocPtr;
+typedef VALUE xmlNsPtr;
+typedef VALUE xmlDtdPtr;
+typedef char xmlChar;
+#define BAD_CAST
+// Redefine libxml2 API as Ruby function calls.
+static xmlNodePtr xmlNewDocNode(xmlDocPtr doc, xmlNsPtr ns, const xmlChar *name, const xmlChar *content) {
+  assert(ns == NIL && content == NULL);
+  return rb_funcall(cNokogiriXmlElement, new, 2, rb_utf8_str_new_cstr(name), doc);
+}
+static xmlNodePtr xmlNewDocText(xmlDocPtr doc, const xmlChar *content) {
+  VALUE str = rb_utf8_str_new_cstr(content);
+  return rb_funcall(cNokogiriXmlText, new, 2, str, doc);
+}
+static xmlNodePtr xmlNewCDataBlock(xmlDocPtr doc, const xmlChar *content, int len) {
+  VALUE str = rb_utf8_str_new(content, len);
+  // CDATA.new takes arguments in the opposite order from Text.new.
+  return rb_funcall(cNokogiriXmlCData, new, 2, doc, str);
+}
+static xmlNodePtr xmlNewDocComment(xmlDocPtr doc, const xmlChar *content) {
+  VALUE str = rb_utf8_str_new_cstr(content);
+  return rb_funcall(cNokogiriXmlComment, new, 2, doc, str);
+}
+static xmlNodePtr xmlAddChild(xmlNodePtr parent, xmlNodePtr cur) {
+  ID add_child;
+  CONST_ID(add_child, "add_child");
+  return rb_funcall(parent, add_child, 1, cur);
+}
+static void xmlSetNs(xmlNodePtr node, xmlNsPtr ns) {
+  ID namespace_;
+  CONST_ID(namespace_, "namespace=");
+  rb_funcall(node, namespace_, 1, ns);
+}
+static void xmlFreeDoc(xmlDocPtr doc) { }
+static VALUE Nokogiri_wrap_xml_document(VALUE klass, xmlDocPtr doc) {
+  return doc;
+}
+static VALUE find_dummy_key(VALUE collection) {
+  VALUE r_dummy = Qnil;
+  char dummy[5] = "a";
+  size_t len = 1;
+  ID key_;
+  CONST_ID(key_, "key?");
+  while (len < sizeof dummy) {
+    r_dummy = rb_utf8_str_new(dummy, len);
+    if (rb_funcall(collection, key_, 1, r_dummy) == Qfalse)
+      return r_dummy;
+    for (size_t i = 0; ; ++i) {
+      if (dummy[i] == 0) {
+        dummy[i] = 'a';
+        ++len;
+        break;
+      }
+      if (dummy[i] == 'z')
+        dummy[i] = 'a';
+      else {
+        ++dummy[i];
+        break;
+      }
+    }
+  }
+  // This collection has 475254 elements?? Give up.
+  rb_raise(rb_eArgError, "Failed to find a dummy key.");
+}
+// This should return an xmlAttrPtr, but we don't need it and it's easier to
+// not get the result.
+static void xmlNewNsProp (
+  xmlNodePtr node,
+  xmlNsPtr ns,
+  const xmlChar *name,
+  const xmlChar *value
+) {
+  ID set_attribute;
+  CONST_ID(set_attribute, "set_attribute");
+  VALUE rvalue = rb_utf8_str_new_cstr(value);
+  if (RTEST(ns)) {
+    // This is an easy case, we have a namespace so it's enough to do
+    // node["#{ns.prefix}:#{name}"] = value
+    ID prefix;
+    CONST_ID(prefix, "prefix");
+    VALUE ns_prefix = rb_funcall(ns, prefix, 0);
+    VALUE qname = rb_sprintf("%" PRIsVALUE ":%s", ns_prefix, name);
+    rb_funcall(node, set_attribute, 2, qname, rvalue);
+    return;
+  }
+  size_t len = strlen(name);
+  VALUE rname = rb_utf8_str_new(name, len);
+  if (memchr(name, ':', len) == NULL) {
+    // This is the easiest case. There's no colon so we can do
+    // node[name] = value.
+    rb_funcall(node, set_attribute, 2, rname, rvalue);
+    return;
+  }
+  // Nokogiri::XML::Node#set_attribute calls xmlSetProp(node, name, value)
+  // which behaves roughly as
+  // if name is a QName prefix:local
+  //   if node->doc has a namespace ns corresponding to prefix
+  //     return xmlSetNsProp(node, ns, local, value)
+  // return xmlSetNsProp(node, NULL, name, value)
+  //
+  // If the prefix is "xml", then the namespace lookup will create it.
+  //
+  // By contrast, xmlNewNsProp does not do this parsing and creates an attribute
+  // with the name and value exactly as given. This is the behavior that we
+  // want.
+  //
+  // Thus, for attribute names like "xml:lang", #set_attribute will create an
+  // attribute with namespace "xml" and name "lang". This is incorrect for
+  // html elements (but correct for foreign elements).
+  //
+  // Work around this by inserting a dummy attribute and then changing the
+  // name, if needed.
+  // Find a dummy attribute string that doesn't already exist.
+  VALUE dummy = find_dummy_key(node);
+  // Add the dummy attribute.
+  rb_funcall(node, set_attribute, 2, dummy, rvalue);
+  // Remove the old attribute, if it exists.
+  ID remove_attribute;
+  CONST_ID(remove_attribute, "remove_attribute");
+  rb_funcall(node, remove_attribute, 1, rname);
+  // Rename the dummy
+  ID attribute;
+  CONST_ID(attribute, "attribute");
+  VALUE attr = rb_funcall(node, attribute, 1, dummy);
+  rb_funcall(attr, node_name_, 1, rname);
+}
+#endif
+// URI = system id
+// external id = public id
+static xmlDocPtr new_html_doc(const char *dtd_name, const char *system, const char *public)
+{
+#if NGLIB
+  // These two libxml2 functions take the public and system ids in
+  // opposite orders.
+  htmlDocPtr doc = htmlNewDocNoDtD(/* URI */ NULL, /* ExternalID */NULL);
+  assert(doc);
+  if (dtd_name)
+    xmlCreateIntSubset(doc, BAD_CAST dtd_name, BAD_CAST public, BAD_CAST system);
+  return doc;
+#else
+  // remove internal subset from newly created documents
+  VALUE doc;
+  // If system and public are both NULL, Document#new is going to set default
+  // values for them so we're going to have to remove the internal subset
+  // which seems to leak memory in Nokogiri, so leak as little as possible.
+  if (system == NULL && public == NULL) {
+    ID remove;
+    CONST_ID(remove, "remove");
+    doc = rb_funcall(Document, new, 2, /* URI */ Qnil, /* external_id */ rb_utf8_str_new_static("", 0));
+    rb_funcall(rb_funcall(doc, internal_subset, 0), remove, 0);
+    if (dtd_name) {
+      // We need to create an internal subset now.
+      ID create_internal_subset;
+      CONST_ID(create_internal_subset, "create_internal_subset");
+      rb_funcall(doc, create_internal_subset, 3, rb_utf8_str_new_cstr(dtd_name), Qnil, Qnil);
+    }
+  } else {
+    assert(dtd_name);
+    // Rather than removing and creating the internal subset as we did above,
+    // just create and then rename one.
+    VALUE r_system = system ? rb_utf8_str_new_cstr(system) : Qnil;
+    VALUE r_public = public ? rb_utf8_str_new_cstr(public) : Qnil;
+    doc = rb_funcall(Document, new, 2, r_system, r_public);
+    rb_funcall(rb_funcall(doc, internal_subset, 0), node_name_, 1, rb_utf8_str_new_cstr(dtd_name));
+  }
+  return doc;
+#endif
+}
+static xmlNodePtr get_parent(xmlNodePtr node) {
+#if NGLIB
+  return node->parent;
+#else
+  if (!rb_respond_to(node, parent))
+    return Qnil;
+  return rb_funcall(node, parent, 0);
+#endif
+}
+static GumboOutput *perform_parse(const GumboOptions *options, VALUE input) {
+  assert(RTEST(input));
+  Check_Type(input, T_STRING);
+  GumboOutput *output = gumbo_parse_with_options (
+    options,
+    RSTRING_PTR(input),
+    RSTRING_LEN(input)
+  );
+  const char *status_string = gumbo_status_to_string(output->status);
+  switch (output->status) {
+  case GUMBO_STATUS_OK:
+    break;
+  case GUMBO_STATUS_TOO_MANY_ATTRIBUTES:
+  case GUMBO_STATUS_TREE_TOO_DEEP:
+    gumbo_destroy_output(output);
+    rb_raise(rb_eArgError, "%s", status_string);
+  case GUMBO_STATUS_OUT_OF_MEMORY:
+    gumbo_destroy_output(output);
+    rb_raise(rb_eNoMemError, "%s", status_string);
+  }
+  return output;
+}
+static xmlNsPtr lookup_or_add_ns (
+  xmlDocPtr doc,
+  xmlNodePtr root,
+  const char *href,
+  const char *prefix
+) {
+#if NGLIB
+  xmlNsPtr ns = xmlSearchNs(doc, root, BAD_CAST prefix);
+  if (ns)
+    return ns;
+  return xmlNewNs(root, BAD_CAST href, BAD_CAST prefix);
+#else
+  ID add_namespace_definition;
+  CONST_ID(add_namespace_definition, "add_namespace_definition");
+  VALUE rprefix = rb_utf8_str_new_cstr(prefix);
+  VALUE rhref = rb_utf8_str_new_cstr(href);
+  return rb_funcall(root, add_namespace_definition, 2, rprefix, rhref);
+#endif
+}
+static void set_line(xmlNodePtr node, size_t line) {
+#if NGLIB
+  // libxml2 uses 65535 to mean look elsewhere for the line number on some
+  // nodes.
+  if (line < 65535)
+    node->line = (unsigned short)line;
+#else
+  // XXX: If Nokogiri gets a `#line=` method, we'll use that.
+#endif
+}
+// Construct an XML tree rooted at xml_output_node from the Gumbo tree rooted
+// at gumbo_node.
+static void build_tree (
+  xmlDocPtr doc,
+  xmlNodePtr xml_output_node,
+  const GumboNode *gumbo_node
+) {
+  xmlNodePtr xml_root = NIL;
+  xmlNodePtr xml_node = xml_output_node;
+  size_t child_index = 0;
+  while (true) {
+    assert(gumbo_node != NULL);
+    const GumboVector *children = gumbo_node->type == GUMBO_NODE_DOCUMENT?
+      &gumbo_node->v.document.children : &gumbo_node->v.element.children;
+    if (child_index >= children->length) {
+      // Move up the tree and to the next child.
+      if (xml_node == xml_output_node) {
+        // We've built as much of the tree as we can.
+        return;
+      }
+      child_index = gumbo_node->index_within_parent + 1;
+      gumbo_node = gumbo_node->parent;
+      xml_node = get_parent(xml_node);
+      // Children of fragments don't share the same root, so reset it and
+      // it'll be set below. In the non-fragment case, this will only happen
+      // after the html element has been finished at which point there are no
+      // further elements.
+      if (xml_node == xml_output_node)
+        xml_root = NIL;
+      continue;
+    }
+    const GumboNode *gumbo_child = children->data[child_index++];
+    xmlNodePtr xml_child;
+    switch (gumbo_child->type) {
+      case GUMBO_NODE_DOCUMENT:
+        abort(); // Bug in Gumbo.
+      case GUMBO_NODE_TEXT:
+      case GUMBO_NODE_WHITESPACE:
+        xml_child = xmlNewDocText(doc, BAD_CAST gumbo_child->v.text.text);
+        set_line(xml_child, gumbo_child->v.text.start_pos.line);
+        xmlAddChild(xml_node, xml_child);
+        break;
+      case GUMBO_NODE_CDATA:
+        xml_child = xmlNewCDataBlock(doc, BAD_CAST gumbo_child->v.text.text,
+                                     (int) strlen(gumbo_child->v.text.text));
+        set_line(xml_child, gumbo_child->v.text.start_pos.line);
+        xmlAddChild(xml_node, xml_child);
+        break;
+      case GUMBO_NODE_COMMENT:
+        xml_child = xmlNewDocComment(doc, BAD_CAST gumbo_child->v.text.text);
+        set_line(xml_child, gumbo_child->v.text.start_pos.line);
+        xmlAddChild(xml_node, xml_child);
+        break;
+      case GUMBO_NODE_TEMPLATE:
+        // XXX: Should create a template element and a new DocumentFragment
+      case GUMBO_NODE_ELEMENT:
+      {
+        xml_child = xmlNewDocNode(doc, NIL, BAD_CAST gumbo_child->v.element.name, NULL);
+        set_line(xml_child, gumbo_child->v.element.start_pos.line);
+        if (xml_root == NIL)
+          xml_root = xml_child;
+        xmlNsPtr ns = NIL;
+        switch (gumbo_child->v.element.tag_namespace) {
+        case GUMBO_NAMESPACE_HTML:
+          break;
+        case GUMBO_NAMESPACE_SVG:
+          ns = lookup_or_add_ns(doc, xml_root, "http://www.w3.org/2000/svg", "svg");
+          break;
+        case GUMBO_NAMESPACE_MATHML:
+          ns = lookup_or_add_ns(doc, xml_root, "http://www.w3.org/1998/Math/MathML", "math");
+          break;
+        }
+        if (ns != NIL)
+          xmlSetNs(xml_child, ns);
+        xmlAddChild(xml_node, xml_child);
+        // Add the attributes.
+        const GumboVector* attrs = &gumbo_child->v.element.attributes;
+        for (size_t i=0; i < attrs->length; i++) {
+          const GumboAttribute *attr = attrs->data[i];
+          switch (attr->attr_namespace) {
+            case GUMBO_ATTR_NAMESPACE_XLINK:
+              ns = lookup_or_add_ns(doc, xml_root, "http://www.w3.org/1999/xlink", "xlink");
+              break;
+            case GUMBO_ATTR_NAMESPACE_XML:
+              ns = lookup_or_add_ns(doc, xml_root, "http://www.w3.org/XML/1998/namespace", "xml");
+              break;
+            case GUMBO_ATTR_NAMESPACE_XMLNS:
+              ns = lookup_or_add_ns(doc, xml_root, "http://www.w3.org/2000/xmlns/", "xmlns");
+              break;
+            default:
+              ns = NIL;
+          }
+          xmlNewNsProp(xml_child, ns, BAD_CAST attr->name, BAD_CAST attr->value);
+        }
+        // Add children for this element.
+        child_index = 0;
+        gumbo_node = gumbo_child;
+        xml_node = xml_child;
+      }
+    }
+  }
+}
+static void add_errors(const GumboOutput *output, VALUE rdoc, VALUE input, VALUE url) {
+  const char *input_str = RSTRING_PTR(input);
+  size_t input_len = RSTRING_LEN(input);
+  // Add parse errors to rdoc.
+  if (output->errors.length) {
+    const GumboVector *errors = &output->errors;
+    VALUE rerrors = rb_ary_new2(errors->length);
+    for (size_t i=0; i < errors->length; i++) {
+      GumboError *err = errors->data[i];
+      GumboSourcePosition position = gumbo_error_position(err);
+      char *msg;
+      size_t size = gumbo_caret_diagnostic_to_string(err, input_str, input_len, &msg);
+      VALUE err_str = rb_utf8_str_new(msg, size);
+      free(msg);
+      VALUE syntax_error = rb_class_new_instance(1, &err_str, cNokogiriXmlSyntaxError);
+      const char *error_code = gumbo_error_code(err);
+      VALUE str1 = error_code? rb_utf8_str_new_static(error_code, strlen(error_code)) : Qnil;
+      rb_iv_set(syntax_error, "@domain", INT2NUM(1)); // XML_FROM_PARSER
+      rb_iv_set(syntax_error, "@code", INT2NUM(1));   // XML_ERR_INTERNAL_ERROR
+      rb_iv_set(syntax_error, "@level", INT2NUM(2));  // XML_ERR_ERROR
+      rb_iv_set(syntax_error, "@file", url);
+      rb_iv_set(syntax_error, "@line", INT2NUM(position.line));
+      rb_iv_set(syntax_error, "@str1", str1);
+      rb_iv_set(syntax_error, "@str2", Qnil);
+      rb_iv_set(syntax_error, "@str3", Qnil);
+      rb_iv_set(syntax_error, "@int1", INT2NUM(0));
+      rb_iv_set(syntax_error, "@column", INT2NUM(position.column));
+      rb_ary_push(rerrors, syntax_error);
+    }
+    rb_iv_set(rdoc, "@errors", rerrors);
+  }
+}
+typedef struct {
+  GumboOutput *output;
+  VALUE input;
+  VALUE url_or_frag;
+  xmlDocPtr doc;
+} ParseArgs;
+static void parse_args_mark(void *parse_args) {
+  ParseArgs *args = parse_args;
+  rb_gc_mark_maybe(args->input);
+  rb_gc_mark_maybe(args->url_or_frag);
+}
+// Wrap a ParseArgs pointer. The underlying ParseArgs must outlive the
+// wrapper.
+static VALUE wrap_parse_args(ParseArgs *args) {
+  return Data_Wrap_Struct(rb_cData, parse_args_mark, RUBY_NEVER_FREE, args);
+}
+// Returnsd the underlying ParseArgs wrapped by wrap_parse_args.
+static ParseArgs *unwrap_parse_args(VALUE obj) {
+  ParseArgs *args;
+  Data_Get_Struct(obj, ParseArgs, args);
+  return args;
+}
+static VALUE parse_cleanup(VALUE parse_args) {
+  ParseArgs *args = unwrap_parse_args(parse_args);
+  gumbo_destroy_output(args->output);
+  // Make sure garbage collection doesn't mark the objects as being live based
+  // on references from the ParseArgs. This may be unnecessary.
+  args->input = Qnil;
+  args->url_or_frag = Qnil;
+  if (args->doc != NIL)
+    xmlFreeDoc(args->doc);
+  return Qnil;
+}
+static VALUE parse_continue(VALUE parse_args);
+// Parse a string using gumbo_parse into a Nokogiri document
+static VALUE parse(VALUE self, VALUE input, VALUE url, VALUE max_attributes, VALUE max_errors, VALUE max_depth) {
+  GumboOptions options = kGumboDefaultOptions;
+  options.max_attributes = NUM2INT(max_attributes);
+  options.max_errors = NUM2INT(max_errors);
+  options.max_tree_depth = NUM2INT(max_depth);
+  GumboOutput *output = perform_parse(&options, input);
+  ParseArgs args = {
+    .output = output,
+    .input = input,
+    .url_or_frag = url,
+    .doc = NIL,
+  };
+  VALUE parse_args = wrap_parse_args(&args);
+  return rb_ensure(parse_continue, parse_args, parse_cleanup, parse_args);
+}
+static VALUE parse_continue(VALUE parse_args) {
+  ParseArgs *args = unwrap_parse_args(parse_args);
+  GumboOutput *output = args->output;
+  xmlDocPtr doc;
+  if (output->document->v.document.has_doctype) {
+    const char *name   = output->document->v.document.name;
+    const char *public = output->document->v.document.public_identifier;
+    const char *system = output->document->v.document.system_identifier;
+    public = public[0] ? public : NULL;
+    system = system[0] ? system : NULL;
+    doc = new_html_doc(name, system, public);
+  } else {
+    doc = new_html_doc(NULL, NULL, NULL);
+  }
+  args->doc = doc; // Make sure doc gets cleaned up if an error is thrown.
+  build_tree(doc, (xmlNodePtr)doc, output->document);
+  VALUE rdoc = Nokogiri_wrap_xml_document(Document, doc);
+  args->doc = NIL; // The Ruby runtime now owns doc so don't delete it.
+  add_errors(output, rdoc, args->input, args->url_or_frag);
+  return rdoc;
+}
+static int lookup_namespace(VALUE node, bool require_known_ns) {
+  ID namespace, href;
+  CONST_ID(namespace, "namespace");
+  CONST_ID(href, "href");
+  VALUE ns = rb_funcall(node, namespace, 0);
+  if (NIL_P(ns))
+    return GUMBO_NAMESPACE_HTML;
+  ns = rb_funcall(ns, href, 0);
+  assert(RTEST(ns));
+  Check_Type(ns, T_STRING);
+  const char *href_ptr = RSTRING_PTR(ns);
+  size_t href_len = RSTRING_LEN(ns);
+#define NAMESPACE_P(uri) (href_len == sizeof uri - 1 && !memcmp(href_ptr, uri, href_len))
+  if (NAMESPACE_P("http://www.w3.org/1999/xhtml"))
+    return GUMBO_NAMESPACE_HTML;
+  if (NAMESPACE_P("http://www.w3.org/1998/Math/MathML"))
+    return GUMBO_NAMESPACE_MATHML;
+  if (NAMESPACE_P("http://www.w3.org/2000/svg"))
+    return GUMBO_NAMESPACE_SVG;
+#undef NAMESPACE_P
+  if (require_known_ns)
+    rb_raise(rb_eArgError, "Unexpected namespace URI \"%*s\"", (int)href_len, href_ptr);
+  return -1;
+}
+static xmlNodePtr extract_xml_node(VALUE node) {
+#if NGLIB
+  xmlNodePtr xml_node;
+  Data_Get_Struct(node, xmlNode, xml_node);
+  return xml_node;
+#else
+  return node;
+#endif
+}
+static VALUE fragment_continue(VALUE parse_args);
+static VALUE fragment (
+  VALUE self,
+  VALUE doc_fragment,
+  VALUE tags,
+  VALUE ctx,
+  VALUE max_attributes,
+  VALUE max_errors,
+  VALUE max_depth
+) {
+  ID name = rb_intern_const("name");
+  const char *ctx_tag;
+  GumboNamespaceEnum ctx_ns;
+  GumboQuirksModeEnum quirks_mode;
+  bool form = false;
+  const char *encoding = NULL;
+  if (NIL_P(ctx)) {
+    ctx_tag = "body";
+    ctx_ns = GUMBO_NAMESPACE_HTML;
+  } else if (TYPE(ctx) == T_STRING) {
+    ctx_tag = StringValueCStr(ctx);
+    ctx_ns = GUMBO_NAMESPACE_HTML;
+    size_t len = RSTRING_LEN(ctx);
+    const char *colon = memchr(ctx_tag, ':', len);
+    if (colon) {
+      switch (colon - ctx_tag) {
+      case 3:
+        if (st_strncasecmp(ctx_tag, "svg", 3) != 0)
+          goto error;
+        ctx_ns = GUMBO_NAMESPACE_SVG;
+        break;
+      case 4:
+        if (st_strncasecmp(ctx_tag, "html", 4) == 0)
+          ctx_ns = GUMBO_NAMESPACE_HTML;
+        else if (st_strncasecmp(ctx_tag, "math", 4) == 0)
+          ctx_ns = GUMBO_NAMESPACE_MATHML;
+        else
+          goto error;
+        break;
+      default:
+      error:
+        rb_raise(rb_eArgError, "Invalid context namespace '%*s'", (int)(colon - ctx_tag), ctx_tag);
+      }
+      ctx_tag = colon+1;
+    } else {
+      // For convenience, put 'svg' and 'math' in their namespaces.
+      if (len == 3 && st_strncasecmp(ctx_tag, "svg", 3) == 0)
+        ctx_ns = GUMBO_NAMESPACE_SVG;
+      else if (len == 4 && st_strncasecmp(ctx_tag, "math", 4) == 0)
+        ctx_ns = GUMBO_NAMESPACE_MATHML;
+    }
+    // Check if it's a form.
+    form = ctx_ns == GUMBO_NAMESPACE_HTML && st_strcasecmp(ctx_tag, "form") == 0;
+  } else {
+    ID element_ = rb_intern_const("element?");
+    // Context fragment name.
+    VALUE tag_name = rb_funcall(ctx, name, 0);
+    assert(RTEST(tag_name));
+    Check_Type(tag_name, T_STRING);
+    ctx_tag = StringValueCStr(tag_name);
+    // Context fragment namespace.
+    ctx_ns = lookup_namespace(ctx, true);
+    // Check for a form ancestor, including self.
+    for (VALUE node = ctx;
+         !NIL_P(node);
+         node = rb_respond_to(node, parent) ? rb_funcall(node, parent, 0) : Qnil) {
+      if (!RTEST(rb_funcall(node, element_, 0)))
+        continue;
+      VALUE element_name = rb_funcall(node, name, 0);
+      if (RSTRING_LEN(element_name) == 4
+          && !st_strcasecmp(RSTRING_PTR(element_name), "form")
+          && lookup_namespace(node, false) == GUMBO_NAMESPACE_HTML) {
+        form = true;
+        break;
+      }
+    }
+    // Encoding.
+    if (RSTRING_LEN(tag_name) == 14
+        && !st_strcasecmp(ctx_tag, "annotation-xml")) {
+      VALUE enc = rb_funcall(ctx, rb_intern_const("[]"),
+                             rb_utf8_str_new_static("encoding", 8));
+      if (RTEST(enc)) {
+        Check_Type(enc, T_STRING);
+        encoding = StringValueCStr(enc);
+      }
+    }
+  }
+  // Quirks mode.
+  VALUE doc = rb_funcall(doc_fragment, rb_intern_const("document"), 0);
+  VALUE dtd = rb_funcall(doc, internal_subset, 0);
+  if (NIL_P(dtd)) {
+    quirks_mode = GUMBO_DOCTYPE_NO_QUIRKS;
+  } else {
+    VALUE dtd_name = rb_funcall(dtd, name, 0);
+    VALUE pubid = rb_funcall(dtd, rb_intern_const("external_id"), 0);
+    VALUE sysid = rb_funcall(dtd, rb_intern_const("system_id"), 0);
+    quirks_mode = gumbo_compute_quirks_mode (
+      NIL_P(dtd_name)? NULL:StringValueCStr(dtd_name),
+      NIL_P(pubid)? NULL:StringValueCStr(pubid),
+      NIL_P(sysid)? NULL:StringValueCStr(sysid)
+    );
+  }
+  // Perform a fragment parse.
+  int depth = NUM2INT(max_depth);
+  GumboOptions options = kGumboDefaultOptions;
+  options.max_attributes = NUM2INT(max_attributes);
+  options.max_errors = NUM2INT(max_errors);
+  // Add one to account for the HTML element.
+  options.max_tree_depth = depth < 0 ? -1 : (depth + 1);
+  options.fragment_context = ctx_tag;
+  options.fragment_namespace = ctx_ns;
+  options.fragment_encoding = encoding;
+  options.quirks_mode = quirks_mode;
+  options.fragment_context_has_form_ancestor = form;
+  GumboOutput *output = perform_parse(&options, tags);
+  ParseArgs args = {
+    .output = output,
+    .input = tags,
+    .url_or_frag = doc_fragment,
+    .doc = (xmlDocPtr)extract_xml_node(doc),
+  };
+  VALUE parse_args = wrap_parse_args(&args);
+  rb_ensure(fragment_continue, parse_args, parse_cleanup, parse_args);
+  return Qnil;
+}
+static VALUE fragment_continue(VALUE parse_args) {
+  ParseArgs *args = unwrap_parse_args(parse_args);
+  GumboOutput *output = args->output;
+  VALUE doc_fragment = args->url_or_frag;
+  xmlDocPtr xml_doc = args->doc;
+  args->doc = NIL; // The Ruby runtime owns doc so make sure we don't delete it.
+  xmlNodePtr xml_frag = extract_xml_node(doc_fragment);
+  build_tree(xml_doc, xml_frag, output->root);
+  add_errors(output, doc_fragment, args->input, rb_utf8_str_new_static("#fragment", 9));
+  return Qnil;
+}
+// Initialize the Nokogumbo class and fetch constants we will use later.
+void Init_nokogumbo() {
+  rb_funcall(rb_mKernel, rb_intern_const("gem"), 1, rb_utf8_str_new_static("nokogiri", 8));
+  rb_require("nokogiri");
+  VALUE line_supported = Qtrue;
+#if !NGLIB
+  // Class constants.
+  VALUE mNokogiri = rb_const_get(rb_cObject, rb_intern_const("Nokogiri"));
+  VALUE mNokogiriXml = rb_const_get(mNokogiri, rb_intern_const("XML"));
+  cNokogiriXmlSyntaxError = rb_const_get(mNokogiriXml, rb_intern_const("SyntaxError"));
+  rb_gc_register_mark_object(cNokogiriXmlSyntaxError);
+  cNokogiriXmlElement = rb_const_get(mNokogiriXml, rb_intern_const("Element"));
+  rb_gc_register_mark_object(cNokogiriXmlElement);
+  cNokogiriXmlText = rb_const_get(mNokogiriXml, rb_intern_const("Text"));
+  rb_gc_register_mark_object(cNokogiriXmlText);
+  cNokogiriXmlCData = rb_const_get(mNokogiriXml, rb_intern_const("CDATA"));
+  rb_gc_register_mark_object(cNokogiriXmlCData);
+  cNokogiriXmlComment = rb_const_get(mNokogiriXml, rb_intern_const("Comment"));
+  rb_gc_register_mark_object(cNokogiriXmlComment);
+  // Interned symbols.
+  new = rb_intern_const("new");
+  node_name_ = rb_intern_const("node_name=");
+  // #line is not supported (returns 0)
+  line_supported = Qfalse;
+#endif
+  // Class constants.
+  VALUE HTML5 = rb_const_get(mNokogiri, rb_intern_const("HTML5"));
+  Document = rb_const_get(HTML5, rb_intern_const("Document"));
+  rb_gc_register_mark_object(Document);
+  // Interned symbols.
+  internal_subset = rb_intern_const("internal_subset");
+  parent = rb_intern_const("parent");
+  // Define Nokogumbo module with parse and fragment methods.
+  VALUE Gumbo = rb_define_module("Nokogumbo");
+  rb_define_singleton_method(Gumbo, "parse", parse, 5);
+  rb_define_singleton_method(Gumbo, "fragment", fragment, 6);
+  // Add private constant for testing.
+  rb_define_const(Gumbo, "LINE_SUPPORTED", line_supported);
+  rb_funcall(Gumbo, rb_intern_const("private_constant"), 1,
+             rb_utf8_str_new_cstr("LINE_SUPPORTED"));
+}
+// vim: set shiftwidth=2 softtabstop=2 tabstop=8 expandtab: