nokogiri 1.18.0.rc1-x86_64-linux-gnu
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +7 -0
- data/Gemfile +38 -0
- data/LICENSE-DEPENDENCIES.md +2224 -0
- data/LICENSE.md +9 -0
- data/README.md +293 -0
- data/bin/nokogiri +131 -0
- data/dependencies.yml +42 -0
- data/ext/nokogiri/depend +38 -0
- data/ext/nokogiri/extconf.rb +1173 -0
- data/ext/nokogiri/gumbo.c +610 -0
- data/ext/nokogiri/html4_document.c +171 -0
- data/ext/nokogiri/html4_element_description.c +299 -0
- data/ext/nokogiri/html4_entity_lookup.c +37 -0
- data/ext/nokogiri/html4_sax_parser.c +40 -0
- data/ext/nokogiri/html4_sax_parser_context.c +98 -0
- data/ext/nokogiri/html4_sax_push_parser.c +96 -0
- data/ext/nokogiri/include/libexslt/exslt.h +108 -0
- data/ext/nokogiri/include/libexslt/exsltconfig.h +70 -0
- data/ext/nokogiri/include/libexslt/exsltexports.h +63 -0
- data/ext/nokogiri/include/libxml2/libxml/HTMLparser.h +336 -0
- data/ext/nokogiri/include/libxml2/libxml/HTMLtree.h +147 -0
- data/ext/nokogiri/include/libxml2/libxml/SAX.h +202 -0
- data/ext/nokogiri/include/libxml2/libxml/SAX2.h +171 -0
- data/ext/nokogiri/include/libxml2/libxml/c14n.h +115 -0
- data/ext/nokogiri/include/libxml2/libxml/catalog.h +182 -0
- data/ext/nokogiri/include/libxml2/libxml/chvalid.h +230 -0
- data/ext/nokogiri/include/libxml2/libxml/debugXML.h +217 -0
- data/ext/nokogiri/include/libxml2/libxml/dict.h +82 -0
- data/ext/nokogiri/include/libxml2/libxml/encoding.h +244 -0
- data/ext/nokogiri/include/libxml2/libxml/entities.h +166 -0
- data/ext/nokogiri/include/libxml2/libxml/globals.h +41 -0
- data/ext/nokogiri/include/libxml2/libxml/hash.h +251 -0
- data/ext/nokogiri/include/libxml2/libxml/list.h +137 -0
- data/ext/nokogiri/include/libxml2/libxml/nanoftp.h +186 -0
- data/ext/nokogiri/include/libxml2/libxml/nanohttp.h +98 -0
- data/ext/nokogiri/include/libxml2/libxml/parser.h +1390 -0
- data/ext/nokogiri/include/libxml2/libxml/parserInternals.h +671 -0
- data/ext/nokogiri/include/libxml2/libxml/pattern.h +106 -0
- data/ext/nokogiri/include/libxml2/libxml/relaxng.h +219 -0
- data/ext/nokogiri/include/libxml2/libxml/schemasInternals.h +959 -0
- data/ext/nokogiri/include/libxml2/libxml/schematron.h +143 -0
- data/ext/nokogiri/include/libxml2/libxml/threads.h +87 -0
- data/ext/nokogiri/include/libxml2/libxml/tree.h +1382 -0
- data/ext/nokogiri/include/libxml2/libxml/uri.h +106 -0
- data/ext/nokogiri/include/libxml2/libxml/valid.h +477 -0
- data/ext/nokogiri/include/libxml2/libxml/xinclude.h +136 -0
- data/ext/nokogiri/include/libxml2/libxml/xlink.h +189 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlIO.h +438 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlautomata.h +146 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlerror.h +962 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlexports.h +146 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlmemory.h +188 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlmodule.h +57 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlreader.h +436 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlregexp.h +215 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlsave.h +102 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlschemas.h +249 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlschemastypes.h +152 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlstring.h +140 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlunicode.h +366 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlversion.h +347 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlwriter.h +489 -0
- data/ext/nokogiri/include/libxml2/libxml/xpath.h +579 -0
- data/ext/nokogiri/include/libxml2/libxml/xpathInternals.h +633 -0
- data/ext/nokogiri/include/libxml2/libxml/xpointer.h +138 -0
- data/ext/nokogiri/include/libxslt/attributes.h +39 -0
- data/ext/nokogiri/include/libxslt/documents.h +93 -0
- data/ext/nokogiri/include/libxslt/extensions.h +262 -0
- data/ext/nokogiri/include/libxslt/extra.h +72 -0
- data/ext/nokogiri/include/libxslt/functions.h +78 -0
- data/ext/nokogiri/include/libxslt/imports.h +75 -0
- data/ext/nokogiri/include/libxslt/keys.h +53 -0
- data/ext/nokogiri/include/libxslt/namespaces.h +68 -0
- data/ext/nokogiri/include/libxslt/numbersInternals.h +73 -0
- data/ext/nokogiri/include/libxslt/pattern.h +84 -0
- data/ext/nokogiri/include/libxslt/preproc.h +43 -0
- data/ext/nokogiri/include/libxslt/security.h +104 -0
- data/ext/nokogiri/include/libxslt/templates.h +77 -0
- data/ext/nokogiri/include/libxslt/transform.h +207 -0
- data/ext/nokogiri/include/libxslt/variables.h +118 -0
- data/ext/nokogiri/include/libxslt/xslt.h +110 -0
- data/ext/nokogiri/include/libxslt/xsltInternals.h +1995 -0
- data/ext/nokogiri/include/libxslt/xsltconfig.h +146 -0
- data/ext/nokogiri/include/libxslt/xsltexports.h +64 -0
- data/ext/nokogiri/include/libxslt/xsltlocale.h +44 -0
- data/ext/nokogiri/include/libxslt/xsltutils.h +343 -0
- data/ext/nokogiri/libxml2_polyfill.c +114 -0
- data/ext/nokogiri/nokogiri.c +294 -0
- data/ext/nokogiri/nokogiri.h +238 -0
- data/ext/nokogiri/test_global_handlers.c +40 -0
- data/ext/nokogiri/xml_attr.c +103 -0
- data/ext/nokogiri/xml_attribute_decl.c +70 -0
- data/ext/nokogiri/xml_cdata.c +62 -0
- data/ext/nokogiri/xml_comment.c +57 -0
- data/ext/nokogiri/xml_document.c +784 -0
- data/ext/nokogiri/xml_document_fragment.c +29 -0
- data/ext/nokogiri/xml_dtd.c +208 -0
- data/ext/nokogiri/xml_element_content.c +131 -0
- data/ext/nokogiri/xml_element_decl.c +69 -0
- data/ext/nokogiri/xml_encoding_handler.c +112 -0
- data/ext/nokogiri/xml_entity_decl.c +112 -0
- data/ext/nokogiri/xml_entity_reference.c +50 -0
- data/ext/nokogiri/xml_namespace.c +181 -0
- data/ext/nokogiri/xml_node.c +2459 -0
- data/ext/nokogiri/xml_node_set.c +518 -0
- data/ext/nokogiri/xml_processing_instruction.c +54 -0
- data/ext/nokogiri/xml_reader.c +777 -0
- data/ext/nokogiri/xml_relax_ng.c +149 -0
- data/ext/nokogiri/xml_sax_parser.c +403 -0
- data/ext/nokogiri/xml_sax_parser_context.c +390 -0
- data/ext/nokogiri/xml_sax_push_parser.c +206 -0
- data/ext/nokogiri/xml_schema.c +226 -0
- data/ext/nokogiri/xml_syntax_error.c +93 -0
- data/ext/nokogiri/xml_text.c +59 -0
- data/ext/nokogiri/xml_xpath_context.c +502 -0
- data/ext/nokogiri/xslt_stylesheet.c +421 -0
- data/gumbo-parser/CHANGES.md +63 -0
- data/gumbo-parser/Makefile +129 -0
- data/gumbo-parser/THANKS +27 -0
- data/lib/nokogiri/3.1/nokogiri.so +0 -0
- data/lib/nokogiri/3.2/nokogiri.so +0 -0
- data/lib/nokogiri/3.3/nokogiri.so +0 -0
- data/lib/nokogiri/3.4/nokogiri.so +0 -0
- data/lib/nokogiri/class_resolver.rb +67 -0
- data/lib/nokogiri/css/node.rb +58 -0
- data/lib/nokogiri/css/parser.rb +772 -0
- data/lib/nokogiri/css/parser.y +277 -0
- data/lib/nokogiri/css/parser_extras.rb +36 -0
- data/lib/nokogiri/css/selector_cache.rb +38 -0
- data/lib/nokogiri/css/syntax_error.rb +9 -0
- data/lib/nokogiri/css/tokenizer.rb +155 -0
- data/lib/nokogiri/css/tokenizer.rex +57 -0
- data/lib/nokogiri/css/xpath_visitor.rb +375 -0
- data/lib/nokogiri/css.rb +132 -0
- data/lib/nokogiri/decorators/slop.rb +42 -0
- data/lib/nokogiri/encoding_handler.rb +57 -0
- data/lib/nokogiri/extension.rb +32 -0
- data/lib/nokogiri/gumbo.rb +15 -0
- data/lib/nokogiri/html.rb +48 -0
- data/lib/nokogiri/html4/builder.rb +37 -0
- data/lib/nokogiri/html4/document.rb +235 -0
- data/lib/nokogiri/html4/document_fragment.rb +166 -0
- data/lib/nokogiri/html4/element_description.rb +25 -0
- data/lib/nokogiri/html4/element_description_defaults.rb +2040 -0
- data/lib/nokogiri/html4/encoding_reader.rb +121 -0
- data/lib/nokogiri/html4/entity_lookup.rb +15 -0
- data/lib/nokogiri/html4/sax/parser.rb +48 -0
- data/lib/nokogiri/html4/sax/parser_context.rb +15 -0
- data/lib/nokogiri/html4/sax/push_parser.rb +37 -0
- data/lib/nokogiri/html4.rb +42 -0
- data/lib/nokogiri/html5/builder.rb +40 -0
- data/lib/nokogiri/html5/document.rb +199 -0
- data/lib/nokogiri/html5/document_fragment.rb +200 -0
- data/lib/nokogiri/html5/node.rb +103 -0
- data/lib/nokogiri/html5.rb +368 -0
- data/lib/nokogiri/jruby/dependencies.rb +3 -0
- data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
- data/lib/nokogiri/syntax_error.rb +6 -0
- data/lib/nokogiri/version/constant.rb +6 -0
- data/lib/nokogiri/version/info.rb +224 -0
- data/lib/nokogiri/version.rb +4 -0
- data/lib/nokogiri/xml/attr.rb +66 -0
- data/lib/nokogiri/xml/attribute_decl.rb +22 -0
- data/lib/nokogiri/xml/builder.rb +494 -0
- data/lib/nokogiri/xml/cdata.rb +13 -0
- data/lib/nokogiri/xml/character_data.rb +9 -0
- data/lib/nokogiri/xml/document.rb +514 -0
- data/lib/nokogiri/xml/document_fragment.rb +276 -0
- data/lib/nokogiri/xml/dtd.rb +34 -0
- data/lib/nokogiri/xml/element_content.rb +46 -0
- data/lib/nokogiri/xml/element_decl.rb +17 -0
- data/lib/nokogiri/xml/entity_decl.rb +23 -0
- data/lib/nokogiri/xml/entity_reference.rb +20 -0
- data/lib/nokogiri/xml/namespace.rb +57 -0
- data/lib/nokogiri/xml/node/save_options.rb +76 -0
- data/lib/nokogiri/xml/node.rb +1650 -0
- data/lib/nokogiri/xml/node_set.rb +449 -0
- data/lib/nokogiri/xml/notation.rb +19 -0
- data/lib/nokogiri/xml/parse_options.rb +213 -0
- data/lib/nokogiri/xml/pp/character_data.rb +21 -0
- data/lib/nokogiri/xml/pp/node.rb +73 -0
- data/lib/nokogiri/xml/pp.rb +4 -0
- data/lib/nokogiri/xml/processing_instruction.rb +11 -0
- data/lib/nokogiri/xml/reader.rb +139 -0
- data/lib/nokogiri/xml/relax_ng.rb +75 -0
- data/lib/nokogiri/xml/sax/document.rb +258 -0
- data/lib/nokogiri/xml/sax/parser.rb +199 -0
- data/lib/nokogiri/xml/sax/parser_context.rb +129 -0
- data/lib/nokogiri/xml/sax/push_parser.rb +64 -0
- data/lib/nokogiri/xml/sax.rb +54 -0
- data/lib/nokogiri/xml/schema.rb +140 -0
- data/lib/nokogiri/xml/searchable.rb +297 -0
- data/lib/nokogiri/xml/syntax_error.rb +94 -0
- data/lib/nokogiri/xml/text.rb +11 -0
- data/lib/nokogiri/xml/xpath/syntax_error.rb +13 -0
- data/lib/nokogiri/xml/xpath.rb +21 -0
- data/lib/nokogiri/xml/xpath_context.rb +49 -0
- data/lib/nokogiri/xml.rb +65 -0
- data/lib/nokogiri/xslt/stylesheet.rb +49 -0
- data/lib/nokogiri/xslt.rb +129 -0
- data/lib/nokogiri.rb +128 -0
- data/lib/xsd/xmlparser/nokogiri.rb +105 -0
- metadata +324 -0
| @@ -0,0 +1,610 @@ | |
| 1 | 
            +
            //
         | 
| 2 | 
            +
            //  Copyright 2013-2021 Sam Ruby, Stephen Checkoway
         | 
| 3 | 
            +
            //
         | 
| 4 | 
            +
            //  Licensed under the Apache License, Version 2.0 (the "License");
         | 
| 5 | 
            +
            //  you may not use this file except in compliance with the License.
         | 
| 6 | 
            +
            //  You may obtain a copy of the License at
         | 
| 7 | 
            +
            //
         | 
| 8 | 
            +
            //      http://www.apache.org/licenses/LICENSE-2.0
         | 
| 9 | 
            +
            //
         | 
| 10 | 
            +
            //  Unless required by applicable law or agreed to in writing, software
         | 
| 11 | 
            +
            //  distributed under the License is distributed on an "AS IS" BASIS,
         | 
| 12 | 
            +
            //  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
         | 
| 13 | 
            +
            //  See the License for the specific language governing permissions and
         | 
| 14 | 
            +
            //  limitations under the License.
         | 
| 15 | 
            +
            //
         | 
| 16 | 
            +
             | 
| 17 | 
            +
            //
         | 
| 18 | 
            +
            // nokogumbo.c defines the following:
         | 
| 19 | 
            +
            //
         | 
| 20 | 
            +
            //   class Nokogumbo
         | 
| 21 | 
            +
            //     def parse(utf8_string) # returns Nokogiri::HTML5::Document
         | 
| 22 | 
            +
            //   end
         | 
| 23 | 
            +
            //
         | 
| 24 | 
            +
            // Processing starts by calling gumbo_parse_with_options. The resulting document tree
         | 
| 25 | 
            +
            // is then walked, a parallel libxml2 tree is constructed, and the final document is
         | 
| 26 | 
            +
            // then wrapped using noko_xml_document_wrap. This approach reduces memory and CPU
         | 
| 27 | 
            +
            // requirements as Ruby objects are only built when necessary.
         | 
| 28 | 
            +
            //
         | 
| 29 | 
            +
             | 
| 30 | 
            +
            #include <nokogiri.h>
         | 
| 31 | 
            +
             | 
| 32 | 
            +
            #include "nokogiri_gumbo.h"
         | 
| 33 | 
            +
             | 
| 34 | 
            +
            VALUE cNokogiriHtml5Document;
         | 
| 35 | 
            +
             | 
| 36 | 
            +
            // Interned symbols
         | 
| 37 | 
            +
            static ID internal_subset;
         | 
| 38 | 
            +
            static ID parent;
         | 
| 39 | 
            +
             | 
| 40 | 
            +
            #include <nokogiri.h>
         | 
| 41 | 
            +
            #include <libxml/tree.h>
         | 
| 42 | 
            +
            #include <libxml/HTMLtree.h>
         | 
| 43 | 
            +
             | 
| 44 | 
            +
            // URI = system id
         | 
| 45 | 
            +
            // external id = public id
         | 
| 46 | 
            +
            static xmlDocPtr
         | 
| 47 | 
            +
            new_html_doc(const char *dtd_name, const char *system, const char *public)
         | 
| 48 | 
            +
            {
         | 
| 49 | 
            +
              // These two libxml2 functions take the public and system ids in
         | 
| 50 | 
            +
              // opposite orders.
         | 
| 51 | 
            +
              htmlDocPtr doc = htmlNewDocNoDtD(/* URI */ NULL, /* ExternalID */NULL);
         | 
| 52 | 
            +
              assert(doc);
         | 
| 53 | 
            +
              if (dtd_name) {
         | 
| 54 | 
            +
                xmlCreateIntSubset(doc, (const xmlChar *)dtd_name, (const xmlChar *)public, (const xmlChar *)system);
         | 
| 55 | 
            +
              }
         | 
| 56 | 
            +
              return doc;
         | 
| 57 | 
            +
            }
         | 
| 58 | 
            +
             | 
| 59 | 
            +
            static xmlNodePtr
         | 
| 60 | 
            +
            get_parent(xmlNodePtr node)
         | 
| 61 | 
            +
            {
         | 
| 62 | 
            +
              return node->parent;
         | 
| 63 | 
            +
            }
         | 
| 64 | 
            +
             | 
| 65 | 
            +
            static GumboOutput *
         | 
| 66 | 
            +
            perform_parse(const GumboOptions *options, VALUE input)
         | 
| 67 | 
            +
            {
         | 
| 68 | 
            +
              assert(RTEST(input));
         | 
| 69 | 
            +
              Check_Type(input, T_STRING);
         | 
| 70 | 
            +
              GumboOutput *output = gumbo_parse_with_options(
         | 
| 71 | 
            +
                                      options,
         | 
| 72 | 
            +
                                      RSTRING_PTR(input),
         | 
| 73 | 
            +
                                      (size_t)RSTRING_LEN(input)
         | 
| 74 | 
            +
                                    );
         | 
| 75 | 
            +
             | 
| 76 | 
            +
              const char *status_string = gumbo_status_to_string(output->status);
         | 
| 77 | 
            +
              switch (output->status) {
         | 
| 78 | 
            +
                case GUMBO_STATUS_OK:
         | 
| 79 | 
            +
                  break;
         | 
| 80 | 
            +
                case GUMBO_STATUS_TOO_MANY_ATTRIBUTES:
         | 
| 81 | 
            +
                case GUMBO_STATUS_TREE_TOO_DEEP:
         | 
| 82 | 
            +
                  gumbo_destroy_output(output);
         | 
| 83 | 
            +
                  rb_raise(rb_eArgError, "%s", status_string);
         | 
| 84 | 
            +
                case GUMBO_STATUS_OUT_OF_MEMORY:
         | 
| 85 | 
            +
                  gumbo_destroy_output(output);
         | 
| 86 | 
            +
                  rb_raise(rb_eNoMemError, "%s", status_string);
         | 
| 87 | 
            +
              }
         | 
| 88 | 
            +
              return output;
         | 
| 89 | 
            +
            }
         | 
| 90 | 
            +
             | 
| 91 | 
            +
            static xmlNsPtr
         | 
| 92 | 
            +
            lookup_or_add_ns(
         | 
| 93 | 
            +
              xmlDocPtr doc,
         | 
| 94 | 
            +
              xmlNodePtr root,
         | 
| 95 | 
            +
              const char *href,
         | 
| 96 | 
            +
              const char *prefix
         | 
| 97 | 
            +
            )
         | 
| 98 | 
            +
            {
         | 
| 99 | 
            +
              xmlNsPtr ns = xmlSearchNs(doc, root, (const xmlChar *)prefix);
         | 
| 100 | 
            +
              if (ns) {
         | 
| 101 | 
            +
                return ns;
         | 
| 102 | 
            +
              }
         | 
| 103 | 
            +
              return xmlNewNs(root, (const xmlChar *)href, (const xmlChar *)prefix);
         | 
| 104 | 
            +
            }
         | 
| 105 | 
            +
             | 
| 106 | 
            +
            static void
         | 
| 107 | 
            +
            set_line(xmlNodePtr node, size_t line)
         | 
| 108 | 
            +
            {
         | 
| 109 | 
            +
              // libxml2 uses 65535 to mean look elsewhere for the line number on some
         | 
| 110 | 
            +
              // nodes.
         | 
| 111 | 
            +
              if (line < 65535) {
         | 
| 112 | 
            +
                node->line = (unsigned short)line;
         | 
| 113 | 
            +
              }
         | 
| 114 | 
            +
            }
         | 
| 115 | 
            +
             | 
| 116 | 
            +
            // Construct an XML tree rooted at xml_output_node from the Gumbo tree rooted
         | 
| 117 | 
            +
            // at gumbo_node.
         | 
| 118 | 
            +
            static void
         | 
| 119 | 
            +
            build_tree(
         | 
| 120 | 
            +
              xmlDocPtr doc,
         | 
| 121 | 
            +
              xmlNodePtr xml_output_node,
         | 
| 122 | 
            +
              const GumboNode *gumbo_node
         | 
| 123 | 
            +
            )
         | 
| 124 | 
            +
            {
         | 
| 125 | 
            +
              xmlNodePtr xml_root = NULL;
         | 
| 126 | 
            +
              xmlNodePtr xml_node = xml_output_node;
         | 
| 127 | 
            +
              size_t child_index = 0;
         | 
| 128 | 
            +
             | 
| 129 | 
            +
              while (true) {
         | 
| 130 | 
            +
                assert(gumbo_node != NULL);
         | 
| 131 | 
            +
                const GumboVector *children = gumbo_node->type == GUMBO_NODE_DOCUMENT ?
         | 
| 132 | 
            +
                                              &gumbo_node->v.document.children : &gumbo_node->v.element.children;
         | 
| 133 | 
            +
                if (child_index >= children->length) {
         | 
| 134 | 
            +
                  // Move up the tree and to the next child.
         | 
| 135 | 
            +
                  if (xml_node == xml_output_node) {
         | 
| 136 | 
            +
                    // We've built as much of the tree as we can.
         | 
| 137 | 
            +
                    return;
         | 
| 138 | 
            +
                  }
         | 
| 139 | 
            +
                  child_index = gumbo_node->index_within_parent + 1;
         | 
| 140 | 
            +
                  gumbo_node = gumbo_node->parent;
         | 
| 141 | 
            +
                  xml_node = get_parent(xml_node);
         | 
| 142 | 
            +
                  // Children of fragments don't share the same root, so reset it and
         | 
| 143 | 
            +
                  // it'll be set below. In the non-fragment case, this will only happen
         | 
| 144 | 
            +
                  // after the html element has been finished at which point there are no
         | 
| 145 | 
            +
                  // further elements.
         | 
| 146 | 
            +
                  if (xml_node == xml_output_node) {
         | 
| 147 | 
            +
                    xml_root = NULL;
         | 
| 148 | 
            +
                  }
         | 
| 149 | 
            +
                  continue;
         | 
| 150 | 
            +
                }
         | 
| 151 | 
            +
                const GumboNode *gumbo_child = children->data[child_index++];
         | 
| 152 | 
            +
                xmlNodePtr xml_child;
         | 
| 153 | 
            +
             | 
| 154 | 
            +
                switch (gumbo_child->type) {
         | 
| 155 | 
            +
                  case GUMBO_NODE_DOCUMENT:
         | 
| 156 | 
            +
                    abort(); // Bug in Gumbo.
         | 
| 157 | 
            +
             | 
| 158 | 
            +
                  case GUMBO_NODE_TEXT:
         | 
| 159 | 
            +
                  case GUMBO_NODE_WHITESPACE:
         | 
| 160 | 
            +
                    xml_child = xmlNewDocText(doc, (const xmlChar *)gumbo_child->v.text.text);
         | 
| 161 | 
            +
                    set_line(xml_child, gumbo_child->v.text.start_pos.line);
         | 
| 162 | 
            +
                    xmlAddChild(xml_node, xml_child);
         | 
| 163 | 
            +
                    break;
         | 
| 164 | 
            +
             | 
| 165 | 
            +
                  case GUMBO_NODE_CDATA:
         | 
| 166 | 
            +
                    xml_child = xmlNewCDataBlock(doc, (const xmlChar *)gumbo_child->v.text.text,
         | 
| 167 | 
            +
                                                 (int) strlen(gumbo_child->v.text.text));
         | 
| 168 | 
            +
                    set_line(xml_child, gumbo_child->v.text.start_pos.line);
         | 
| 169 | 
            +
                    xmlAddChild(xml_node, xml_child);
         | 
| 170 | 
            +
                    break;
         | 
| 171 | 
            +
             | 
| 172 | 
            +
                  case GUMBO_NODE_COMMENT:
         | 
| 173 | 
            +
                    xml_child = xmlNewDocComment(doc, (const xmlChar *)gumbo_child->v.text.text);
         | 
| 174 | 
            +
                    set_line(xml_child, gumbo_child->v.text.start_pos.line);
         | 
| 175 | 
            +
                    xmlAddChild(xml_node, xml_child);
         | 
| 176 | 
            +
                    break;
         | 
| 177 | 
            +
             | 
| 178 | 
            +
                  case GUMBO_NODE_TEMPLATE:
         | 
| 179 | 
            +
                  // XXX: Should create a template element and a new DocumentFragment
         | 
| 180 | 
            +
                  case GUMBO_NODE_ELEMENT: {
         | 
| 181 | 
            +
                    xml_child = xmlNewDocNode(doc, NULL, (const xmlChar *)gumbo_child->v.element.name, NULL);
         | 
| 182 | 
            +
                    set_line(xml_child, gumbo_child->v.element.start_pos.line);
         | 
| 183 | 
            +
                    if (xml_root == NULL) {
         | 
| 184 | 
            +
                      xml_root = xml_child;
         | 
| 185 | 
            +
                    }
         | 
| 186 | 
            +
                    xmlNsPtr ns = NULL;
         | 
| 187 | 
            +
                    switch (gumbo_child->v.element.tag_namespace) {
         | 
| 188 | 
            +
                      case GUMBO_NAMESPACE_HTML:
         | 
| 189 | 
            +
                        break;
         | 
| 190 | 
            +
                      case GUMBO_NAMESPACE_SVG:
         | 
| 191 | 
            +
                        ns = lookup_or_add_ns(doc, xml_root, "http://www.w3.org/2000/svg", "svg");
         | 
| 192 | 
            +
                        break;
         | 
| 193 | 
            +
                      case GUMBO_NAMESPACE_MATHML:
         | 
| 194 | 
            +
                        ns = lookup_or_add_ns(doc, xml_root, "http://www.w3.org/1998/Math/MathML", "math");
         | 
| 195 | 
            +
                        break;
         | 
| 196 | 
            +
                    }
         | 
| 197 | 
            +
                    if (ns != NULL) {
         | 
| 198 | 
            +
                      xmlSetNs(xml_child, ns);
         | 
| 199 | 
            +
                    }
         | 
| 200 | 
            +
                    xmlAddChild(xml_node, xml_child);
         | 
| 201 | 
            +
             | 
| 202 | 
            +
                    // Add the attributes.
         | 
| 203 | 
            +
                    const GumboVector *attrs = &gumbo_child->v.element.attributes;
         | 
| 204 | 
            +
                    for (size_t i = 0; i < attrs->length; i++) {
         | 
| 205 | 
            +
                      const GumboAttribute *attr = attrs->data[i];
         | 
| 206 | 
            +
             | 
| 207 | 
            +
                      switch (attr->attr_namespace) {
         | 
| 208 | 
            +
                        case GUMBO_ATTR_NAMESPACE_XLINK:
         | 
| 209 | 
            +
                          ns = lookup_or_add_ns(doc, xml_root, "http://www.w3.org/1999/xlink", "xlink");
         | 
| 210 | 
            +
                          break;
         | 
| 211 | 
            +
             | 
| 212 | 
            +
                        case GUMBO_ATTR_NAMESPACE_XML:
         | 
| 213 | 
            +
                          ns = lookup_or_add_ns(doc, xml_root, "http://www.w3.org/XML/1998/namespace", "xml");
         | 
| 214 | 
            +
                          break;
         | 
| 215 | 
            +
             | 
| 216 | 
            +
                        case GUMBO_ATTR_NAMESPACE_XMLNS:
         | 
| 217 | 
            +
                          ns = lookup_or_add_ns(doc, xml_root, "http://www.w3.org/2000/xmlns/", "xmlns");
         | 
| 218 | 
            +
                          break;
         | 
| 219 | 
            +
             | 
| 220 | 
            +
                        default:
         | 
| 221 | 
            +
                          ns = NULL;
         | 
| 222 | 
            +
                      }
         | 
| 223 | 
            +
                      xmlNewNsProp(xml_child, ns, (const xmlChar *)attr->name, (const xmlChar *)attr->value);
         | 
| 224 | 
            +
                    }
         | 
| 225 | 
            +
             | 
| 226 | 
            +
                    // Add children for this element.
         | 
| 227 | 
            +
                    child_index = 0;
         | 
| 228 | 
            +
                    gumbo_node = gumbo_child;
         | 
| 229 | 
            +
                    xml_node = xml_child;
         | 
| 230 | 
            +
                  }
         | 
| 231 | 
            +
                }
         | 
| 232 | 
            +
              }
         | 
| 233 | 
            +
            }
         | 
| 234 | 
            +
             | 
| 235 | 
            +
            static void
         | 
| 236 | 
            +
            add_errors(const GumboOutput *output, VALUE rdoc, VALUE input, VALUE url)
         | 
| 237 | 
            +
            {
         | 
| 238 | 
            +
              const char *input_str = RSTRING_PTR(input);
         | 
| 239 | 
            +
              size_t input_len = (size_t)RSTRING_LEN(input);
         | 
| 240 | 
            +
             | 
| 241 | 
            +
              // Add parse errors to rdoc.
         | 
| 242 | 
            +
              if (output->errors.length) {
         | 
| 243 | 
            +
                const GumboVector *errors = &output->errors;
         | 
| 244 | 
            +
                VALUE rerrors = rb_ary_new2(errors->length);
         | 
| 245 | 
            +
             | 
| 246 | 
            +
                for (size_t i = 0; i < errors->length; i++) {
         | 
| 247 | 
            +
                  GumboError *err = errors->data[i];
         | 
| 248 | 
            +
                  GumboSourcePosition position = gumbo_error_position(err);
         | 
| 249 | 
            +
                  char *msg;
         | 
| 250 | 
            +
                  size_t size = gumbo_caret_diagnostic_to_string(err, input_str, input_len, &msg);
         | 
| 251 | 
            +
                  VALUE err_str = rb_utf8_str_new(msg, (int)size);
         | 
| 252 | 
            +
                  free(msg);
         | 
| 253 | 
            +
                  VALUE syntax_error = rb_class_new_instance(1, &err_str, cNokogiriXmlSyntaxError);
         | 
| 254 | 
            +
                  const char *error_code = gumbo_error_code(err);
         | 
| 255 | 
            +
                  VALUE str1 = error_code ? rb_utf8_str_new_static(error_code, (int)strlen(error_code)) : Qnil;
         | 
| 256 | 
            +
                  rb_iv_set(syntax_error, "@domain", INT2NUM(1)); // XML_FROM_PARSER
         | 
| 257 | 
            +
                  rb_iv_set(syntax_error, "@code", INT2NUM(1));   // XML_ERR_INTERNAL_ERROR
         | 
| 258 | 
            +
                  rb_iv_set(syntax_error, "@level", INT2NUM(2));  // XML_ERR_ERROR
         | 
| 259 | 
            +
                  rb_iv_set(syntax_error, "@file", url);
         | 
| 260 | 
            +
                  rb_iv_set(syntax_error, "@line", SIZET2NUM(position.line));
         | 
| 261 | 
            +
                  rb_iv_set(syntax_error, "@str1", str1);
         | 
| 262 | 
            +
                  rb_iv_set(syntax_error, "@str2", Qnil);
         | 
| 263 | 
            +
                  rb_iv_set(syntax_error, "@str3", Qnil);
         | 
| 264 | 
            +
                  rb_iv_set(syntax_error, "@int1", INT2NUM(0));
         | 
| 265 | 
            +
                  rb_iv_set(syntax_error, "@column", SIZET2NUM(position.column));
         | 
| 266 | 
            +
                  rb_ary_push(rerrors, syntax_error);
         | 
| 267 | 
            +
                }
         | 
| 268 | 
            +
                rb_iv_set(rdoc, "@errors", rerrors);
         | 
| 269 | 
            +
              }
         | 
| 270 | 
            +
            }
         | 
| 271 | 
            +
             | 
| 272 | 
            +
            typedef struct {
         | 
| 273 | 
            +
              GumboOutput *output;
         | 
| 274 | 
            +
              VALUE input;
         | 
| 275 | 
            +
              VALUE url_or_frag;
         | 
| 276 | 
            +
              VALUE klass;
         | 
| 277 | 
            +
              xmlDocPtr doc;
         | 
| 278 | 
            +
            } ParseArgs;
         | 
| 279 | 
            +
             | 
| 280 | 
            +
            static VALUE
         | 
| 281 | 
            +
            parse_cleanup(VALUE parse_args)
         | 
| 282 | 
            +
            {
         | 
| 283 | 
            +
              ParseArgs *args = (ParseArgs *)parse_args;
         | 
| 284 | 
            +
              gumbo_destroy_output(args->output);
         | 
| 285 | 
            +
              // Make sure garbage collection doesn't mark the objects as being live based
         | 
| 286 | 
            +
              // on references from the ParseArgs. This may be unnecessary.
         | 
| 287 | 
            +
              args->input = Qnil;
         | 
| 288 | 
            +
              args->url_or_frag = Qnil;
         | 
| 289 | 
            +
              if (args->doc != NULL) {
         | 
| 290 | 
            +
                xmlFreeDoc(args->doc);
         | 
| 291 | 
            +
              }
         | 
| 292 | 
            +
              return Qnil;
         | 
| 293 | 
            +
            }
         | 
| 294 | 
            +
             | 
| 295 | 
            +
            // Scan the keyword arguments for options common to the document and fragment
         | 
| 296 | 
            +
            // parse.
         | 
| 297 | 
            +
            static GumboOptions
         | 
| 298 | 
            +
            common_options(VALUE kwargs)
         | 
| 299 | 
            +
            {
         | 
| 300 | 
            +
              // The order of the keywords determines the order of the values below.
         | 
| 301 | 
            +
              // If this order is changed, then setting the options below must change as
         | 
| 302 | 
            +
              // well.
         | 
| 303 | 
            +
              ID keywords[] = {
         | 
| 304 | 
            +
                // Required keywords.
         | 
| 305 | 
            +
                rb_intern_const("max_attributes"),
         | 
| 306 | 
            +
                rb_intern_const("max_errors"),
         | 
| 307 | 
            +
                rb_intern_const("max_tree_depth"),
         | 
| 308 | 
            +
             | 
| 309 | 
            +
                // Optional keywords.
         | 
| 310 | 
            +
                rb_intern_const("parse_noscript_content_as_text"),
         | 
| 311 | 
            +
              };
         | 
| 312 | 
            +
              VALUE values[sizeof keywords / sizeof keywords[0]];
         | 
| 313 | 
            +
             | 
| 314 | 
            +
              // Extract the values coresponding to the required keywords. Raise an error
         | 
| 315 | 
            +
              // if required arguments are missing.
         | 
| 316 | 
            +
              rb_get_kwargs(kwargs, keywords, 3, 1, values);
         | 
| 317 | 
            +
             | 
| 318 | 
            +
              GumboOptions options = kGumboDefaultOptions;
         | 
| 319 | 
            +
              options.max_attributes = NUM2INT(values[0]);
         | 
| 320 | 
            +
              options.max_errors = NUM2INT(values[1]);
         | 
| 321 | 
            +
             | 
| 322 | 
            +
              // handle negative values
         | 
| 323 | 
            +
              int depth = NUM2INT(values[2]);
         | 
| 324 | 
            +
              options.max_tree_depth = depth < 0 ? UINT_MAX : (unsigned int)depth;
         | 
| 325 | 
            +
             | 
| 326 | 
            +
              options.parse_noscript_content_as_text = values[3] != Qundef && RTEST(values[3]);
         | 
| 327 | 
            +
             | 
| 328 | 
            +
              return options;
         | 
| 329 | 
            +
            }
         | 
| 330 | 
            +
             | 
| 331 | 
            +
            static VALUE parse_continue(VALUE parse_args);
         | 
| 332 | 
            +
             | 
| 333 | 
            +
            /*
         | 
| 334 | 
            +
             *  @!visibility protected
         | 
| 335 | 
            +
             */
         | 
| 336 | 
            +
            static VALUE
         | 
| 337 | 
            +
            noko_gumbo_s_parse(int argc, VALUE *argv, VALUE _self)
         | 
| 338 | 
            +
            {
         | 
| 339 | 
            +
              VALUE input, url, klass, kwargs;
         | 
| 340 | 
            +
             | 
| 341 | 
            +
              rb_scan_args(argc, argv, "3:", &input, &url, &klass, &kwargs);
         | 
| 342 | 
            +
              if (NIL_P(kwargs)) {
         | 
| 343 | 
            +
                kwargs = rb_hash_new();
         | 
| 344 | 
            +
              }
         | 
| 345 | 
            +
             | 
| 346 | 
            +
              GumboOptions options = common_options(kwargs);
         | 
| 347 | 
            +
             | 
| 348 | 
            +
              GumboOutput *output = perform_parse(&options, input);
         | 
| 349 | 
            +
              ParseArgs args = {
         | 
| 350 | 
            +
                .output = output,
         | 
| 351 | 
            +
                .input = input,
         | 
| 352 | 
            +
                .url_or_frag = url,
         | 
| 353 | 
            +
                .klass = klass,
         | 
| 354 | 
            +
                .doc = NULL,
         | 
| 355 | 
            +
              };
         | 
| 356 | 
            +
             | 
| 357 | 
            +
              return rb_ensure(parse_continue, (VALUE)(&args), parse_cleanup, (VALUE)(&args));
         | 
| 358 | 
            +
            }
         | 
| 359 | 
            +
             | 
| 360 | 
            +
            static VALUE
         | 
| 361 | 
            +
            parse_continue(VALUE parse_args)
         | 
| 362 | 
            +
            {
         | 
| 363 | 
            +
              ParseArgs *args = (ParseArgs *)parse_args;
         | 
| 364 | 
            +
              GumboOutput *output = args->output;
         | 
| 365 | 
            +
              xmlDocPtr doc;
         | 
| 366 | 
            +
              if (output->document->v.document.has_doctype) {
         | 
| 367 | 
            +
                const char *name   = output->document->v.document.name;
         | 
| 368 | 
            +
                const char *public = output->document->v.document.public_identifier;
         | 
| 369 | 
            +
                const char *system = output->document->v.document.system_identifier;
         | 
| 370 | 
            +
                public = public[0] ? public : NULL;
         | 
| 371 | 
            +
                system = system[0] ? system : NULL;
         | 
| 372 | 
            +
                doc = new_html_doc(name, system, public);
         | 
| 373 | 
            +
              } else {
         | 
| 374 | 
            +
                doc = new_html_doc(NULL, NULL, NULL);
         | 
| 375 | 
            +
              }
         | 
| 376 | 
            +
              args->doc = doc; // Make sure doc gets cleaned up if an error is thrown.
         | 
| 377 | 
            +
              build_tree(doc, (xmlNodePtr)doc, output->document);
         | 
| 378 | 
            +
              VALUE rdoc = noko_xml_document_wrap(args->klass, doc);
         | 
| 379 | 
            +
              rb_iv_set(rdoc, "@url", args->url_or_frag);
         | 
| 380 | 
            +
              rb_iv_set(rdoc, "@quirks_mode", INT2NUM(output->document->v.document.doc_type_quirks_mode));
         | 
| 381 | 
            +
              args->doc = NULL; // The Ruby runtime now owns doc so don't delete it.
         | 
| 382 | 
            +
              add_errors(output, rdoc, args->input, args->url_or_frag);
         | 
| 383 | 
            +
              return rdoc;
         | 
| 384 | 
            +
            }
         | 
| 385 | 
            +
             | 
| 386 | 
            +
            static int
         | 
| 387 | 
            +
            lookup_namespace(VALUE node, bool require_known_ns)
         | 
| 388 | 
            +
            {
         | 
| 389 | 
            +
              ID namespace, href;
         | 
| 390 | 
            +
              CONST_ID(namespace, "namespace");
         | 
| 391 | 
            +
              CONST_ID(href, "href");
         | 
| 392 | 
            +
              VALUE ns = rb_funcall(node, namespace, 0);
         | 
| 393 | 
            +
             | 
| 394 | 
            +
              if (NIL_P(ns)) {
         | 
| 395 | 
            +
                return GUMBO_NAMESPACE_HTML;
         | 
| 396 | 
            +
              }
         | 
| 397 | 
            +
              ns = rb_funcall(ns, href, 0);
         | 
| 398 | 
            +
              assert(RTEST(ns));
         | 
| 399 | 
            +
              Check_Type(ns, T_STRING);
         | 
| 400 | 
            +
             | 
| 401 | 
            +
              const char *href_ptr = RSTRING_PTR(ns);
         | 
| 402 | 
            +
              size_t href_len = (size_t)RSTRING_LEN(ns);
         | 
| 403 | 
            +
            #define NAMESPACE_P(uri) (href_len == sizeof uri - 1 && !memcmp(href_ptr, uri, href_len))
         | 
| 404 | 
            +
              if (NAMESPACE_P("http://www.w3.org/1999/xhtml")) {
         | 
| 405 | 
            +
                return GUMBO_NAMESPACE_HTML;
         | 
| 406 | 
            +
              }
         | 
| 407 | 
            +
              if (NAMESPACE_P("http://www.w3.org/1998/Math/MathML")) {
         | 
| 408 | 
            +
                return GUMBO_NAMESPACE_MATHML;
         | 
| 409 | 
            +
              }
         | 
| 410 | 
            +
              if (NAMESPACE_P("http://www.w3.org/2000/svg")) {
         | 
| 411 | 
            +
                return GUMBO_NAMESPACE_SVG;
         | 
| 412 | 
            +
              }
         | 
| 413 | 
            +
            #undef NAMESPACE_P
         | 
| 414 | 
            +
              if (require_known_ns) {
         | 
| 415 | 
            +
                rb_raise(rb_eArgError, "Unexpected namespace URI \"%*s\"", (int)href_len, href_ptr);
         | 
| 416 | 
            +
              }
         | 
| 417 | 
            +
              return -1;
         | 
| 418 | 
            +
            }
         | 
| 419 | 
            +
             | 
| 420 | 
            +
            static xmlNodePtr
         | 
| 421 | 
            +
            extract_xml_node(VALUE node)
         | 
| 422 | 
            +
            {
         | 
| 423 | 
            +
              xmlNodePtr xml_node;
         | 
| 424 | 
            +
              Noko_Node_Get_Struct(node, xmlNode, xml_node);
         | 
| 425 | 
            +
              return xml_node;
         | 
| 426 | 
            +
            }
         | 
| 427 | 
            +
             | 
| 428 | 
            +
            static VALUE fragment_continue(VALUE parse_args);
         | 
| 429 | 
            +
             | 
| 430 | 
            +
            /*
         | 
| 431 | 
            +
             *  @!visibility protected
         | 
| 432 | 
            +
             */
         | 
| 433 | 
            +
            static VALUE
         | 
| 434 | 
            +
            noko_gumbo_s_fragment(int argc, VALUE *argv, VALUE _self)
         | 
| 435 | 
            +
            {
         | 
| 436 | 
            +
              VALUE doc_fragment;
         | 
| 437 | 
            +
              VALUE tags;
         | 
| 438 | 
            +
              VALUE ctx;
         | 
| 439 | 
            +
              VALUE kwargs;
         | 
| 440 | 
            +
              ID name = rb_intern_const("name");
         | 
| 441 | 
            +
              const char *ctx_tag;
         | 
| 442 | 
            +
              GumboNamespaceEnum ctx_ns;
         | 
| 443 | 
            +
              GumboQuirksModeEnum quirks_mode;
         | 
| 444 | 
            +
              bool form = false;
         | 
| 445 | 
            +
              const char *encoding = NULL;
         | 
| 446 | 
            +
             | 
| 447 | 
            +
              rb_scan_args(argc, argv, "3:", &doc_fragment, &tags, &ctx, &kwargs);
         | 
| 448 | 
            +
              if (NIL_P(kwargs)) {
         | 
| 449 | 
            +
                kwargs = rb_hash_new();
         | 
| 450 | 
            +
              }
         | 
| 451 | 
            +
             | 
| 452 | 
            +
              GumboOptions options = common_options(kwargs);
         | 
| 453 | 
            +
             | 
| 454 | 
            +
              if (NIL_P(ctx)) {
         | 
| 455 | 
            +
                ctx_tag = "body";
         | 
| 456 | 
            +
                ctx_ns = GUMBO_NAMESPACE_HTML;
         | 
| 457 | 
            +
              } else if (TYPE(ctx) == T_STRING) {
         | 
| 458 | 
            +
                ctx_tag = StringValueCStr(ctx);
         | 
| 459 | 
            +
                ctx_ns = GUMBO_NAMESPACE_HTML;
         | 
| 460 | 
            +
                size_t len = (size_t)RSTRING_LEN(ctx);
         | 
| 461 | 
            +
                const char *colon = memchr(ctx_tag, ':', len);
         | 
| 462 | 
            +
                if (colon) {
         | 
| 463 | 
            +
                  switch (colon - ctx_tag) {
         | 
| 464 | 
            +
                    case 3:
         | 
| 465 | 
            +
                      if (st_strncasecmp(ctx_tag, "svg", 3) != 0) {
         | 
| 466 | 
            +
                        goto error;
         | 
| 467 | 
            +
                      }
         | 
| 468 | 
            +
                      ctx_ns = GUMBO_NAMESPACE_SVG;
         | 
| 469 | 
            +
                      break;
         | 
| 470 | 
            +
                    case 4:
         | 
| 471 | 
            +
                      if (st_strncasecmp(ctx_tag, "html", 4) == 0) {
         | 
| 472 | 
            +
                        ctx_ns = GUMBO_NAMESPACE_HTML;
         | 
| 473 | 
            +
                      } else if (st_strncasecmp(ctx_tag, "math", 4) == 0) {
         | 
| 474 | 
            +
                        ctx_ns = GUMBO_NAMESPACE_MATHML;
         | 
| 475 | 
            +
                      } else {
         | 
| 476 | 
            +
                        goto error;
         | 
| 477 | 
            +
                      }
         | 
| 478 | 
            +
                      break;
         | 
| 479 | 
            +
                    default:
         | 
| 480 | 
            +
            error:
         | 
| 481 | 
            +
                      rb_raise(rb_eArgError, "Invalid context namespace '%*s'", (int)(colon - ctx_tag), ctx_tag);
         | 
| 482 | 
            +
                  }
         | 
| 483 | 
            +
                  ctx_tag = colon + 1;
         | 
| 484 | 
            +
                } else {
         | 
| 485 | 
            +
                  // For convenience, put 'svg' and 'math' in their namespaces.
         | 
| 486 | 
            +
                  if (len == 3 && st_strncasecmp(ctx_tag, "svg", 3) == 0) {
         | 
| 487 | 
            +
                    ctx_ns = GUMBO_NAMESPACE_SVG;
         | 
| 488 | 
            +
                  } else if (len == 4 && st_strncasecmp(ctx_tag, "math", 4) == 0) {
         | 
| 489 | 
            +
                    ctx_ns = GUMBO_NAMESPACE_MATHML;
         | 
| 490 | 
            +
                  }
         | 
| 491 | 
            +
                }
         | 
| 492 | 
            +
             | 
| 493 | 
            +
                // Check if it's a form.
         | 
| 494 | 
            +
                form = ctx_ns == GUMBO_NAMESPACE_HTML && st_strcasecmp(ctx_tag, "form") == 0;
         | 
| 495 | 
            +
              } else {
         | 
| 496 | 
            +
                ID element_ = rb_intern_const("element?");
         | 
| 497 | 
            +
             | 
| 498 | 
            +
                // Context fragment name.
         | 
| 499 | 
            +
                VALUE tag_name = rb_funcall(ctx, name, 0);
         | 
| 500 | 
            +
                assert(RTEST(tag_name));
         | 
| 501 | 
            +
                Check_Type(tag_name, T_STRING);
         | 
| 502 | 
            +
                ctx_tag = StringValueCStr(tag_name);
         | 
| 503 | 
            +
             | 
| 504 | 
            +
                // Context fragment namespace.
         | 
| 505 | 
            +
                ctx_ns = lookup_namespace(ctx, true);
         | 
| 506 | 
            +
             | 
| 507 | 
            +
                // Check for a form ancestor, including self.
         | 
| 508 | 
            +
                for (VALUE node = ctx;
         | 
| 509 | 
            +
                     !NIL_P(node);
         | 
| 510 | 
            +
                     node = rb_respond_to(node, parent) ? rb_funcall(node, parent, 0) : Qnil) {
         | 
| 511 | 
            +
                  if (!RTEST(rb_funcall(node, element_, 0))) {
         | 
| 512 | 
            +
                    continue;
         | 
| 513 | 
            +
                  }
         | 
| 514 | 
            +
                  VALUE element_name = rb_funcall(node, name, 0);
         | 
| 515 | 
            +
                  if (RSTRING_LEN(element_name) == 4
         | 
| 516 | 
            +
                      && !st_strcasecmp(RSTRING_PTR(element_name), "form")
         | 
| 517 | 
            +
                      && lookup_namespace(node, false) == GUMBO_NAMESPACE_HTML) {
         | 
| 518 | 
            +
                    form = true;
         | 
| 519 | 
            +
                    break;
         | 
| 520 | 
            +
                  }
         | 
| 521 | 
            +
                }
         | 
| 522 | 
            +
             | 
| 523 | 
            +
                // Encoding.
         | 
| 524 | 
            +
                if (ctx_ns == GUMBO_NAMESPACE_MATHML
         | 
| 525 | 
            +
                    && RSTRING_LEN(tag_name) == 14
         | 
| 526 | 
            +
                    && !st_strcasecmp(ctx_tag, "annotation-xml")) {
         | 
| 527 | 
            +
                  VALUE enc = rb_funcall(ctx, rb_intern_const("[]"),
         | 
| 528 | 
            +
                                         1,
         | 
| 529 | 
            +
                                         rb_utf8_str_new_static("encoding", 8));
         | 
| 530 | 
            +
                  if (RTEST(enc)) {
         | 
| 531 | 
            +
                    Check_Type(enc, T_STRING);
         | 
| 532 | 
            +
                    encoding = StringValueCStr(enc);
         | 
| 533 | 
            +
                  }
         | 
| 534 | 
            +
                }
         | 
| 535 | 
            +
              }
         | 
| 536 | 
            +
             | 
| 537 | 
            +
              // Quirks mode.
         | 
| 538 | 
            +
              VALUE doc = rb_funcall(doc_fragment, rb_intern_const("document"), 0);
         | 
| 539 | 
            +
              VALUE dtd = rb_funcall(doc, internal_subset, 0);
         | 
| 540 | 
            +
              VALUE doc_quirks_mode = rb_iv_get(doc, "@quirks_mode");
         | 
| 541 | 
            +
              if (NIL_P(ctx) || (TYPE(ctx) == T_STRING) || NIL_P(doc_quirks_mode)) {
         | 
| 542 | 
            +
                quirks_mode = GUMBO_DOCTYPE_NO_QUIRKS;
         | 
| 543 | 
            +
              } else if (NIL_P(dtd)) {
         | 
| 544 | 
            +
                quirks_mode = GUMBO_DOCTYPE_QUIRKS;
         | 
| 545 | 
            +
              } else {
         | 
| 546 | 
            +
                VALUE dtd_name = rb_funcall(dtd, name, 0);
         | 
| 547 | 
            +
                VALUE pubid = rb_funcall(dtd, rb_intern_const("external_id"), 0);
         | 
| 548 | 
            +
                VALUE sysid = rb_funcall(dtd, rb_intern_const("system_id"), 0);
         | 
| 549 | 
            +
                quirks_mode = gumbo_compute_quirks_mode(
         | 
| 550 | 
            +
                                NIL_P(dtd_name) ? NULL : StringValueCStr(dtd_name),
         | 
| 551 | 
            +
                                NIL_P(pubid) ? NULL : StringValueCStr(pubid),
         | 
| 552 | 
            +
                                NIL_P(sysid) ? NULL : StringValueCStr(sysid)
         | 
| 553 | 
            +
                              );
         | 
| 554 | 
            +
              }
         | 
| 555 | 
            +
             | 
| 556 | 
            +
              // Perform a fragment parse.
         | 
| 557 | 
            +
              options.fragment_context = ctx_tag;
         | 
| 558 | 
            +
              options.fragment_namespace = ctx_ns;
         | 
| 559 | 
            +
              options.fragment_encoding = encoding;
         | 
| 560 | 
            +
              options.quirks_mode = quirks_mode;
         | 
| 561 | 
            +
              options.fragment_context_has_form_ancestor = form;
         | 
| 562 | 
            +
             | 
| 563 | 
            +
              // Add one to the max tree depth to account for the HTML element.
         | 
| 564 | 
            +
              if (options.max_tree_depth < UINT_MAX) { options.max_tree_depth++; }
         | 
| 565 | 
            +
             | 
| 566 | 
            +
              GumboOutput *output = perform_parse(&options, tags);
         | 
| 567 | 
            +
              ParseArgs args = {
         | 
| 568 | 
            +
                .output = output,
         | 
| 569 | 
            +
                .input = tags,
         | 
| 570 | 
            +
                .url_or_frag = doc_fragment,
         | 
| 571 | 
            +
                .doc = (xmlDocPtr)extract_xml_node(doc),
         | 
| 572 | 
            +
              };
         | 
| 573 | 
            +
              rb_ensure(fragment_continue, (VALUE)(&args), parse_cleanup, (VALUE)(&args));
         | 
| 574 | 
            +
              return Qnil;
         | 
| 575 | 
            +
            }
         | 
| 576 | 
            +
             | 
| 577 | 
            +
            static VALUE
         | 
| 578 | 
            +
            fragment_continue(VALUE parse_args)
         | 
| 579 | 
            +
            {
         | 
| 580 | 
            +
              ParseArgs *args = (ParseArgs *)parse_args;
         | 
| 581 | 
            +
              GumboOutput *output = args->output;
         | 
| 582 | 
            +
              VALUE doc_fragment = args->url_or_frag;
         | 
| 583 | 
            +
              xmlDocPtr xml_doc = args->doc;
         | 
| 584 | 
            +
             | 
| 585 | 
            +
              args->doc = NULL; // The Ruby runtime owns doc so make sure we don't delete it.
         | 
| 586 | 
            +
              xmlNodePtr xml_frag = extract_xml_node(doc_fragment);
         | 
| 587 | 
            +
              build_tree(xml_doc, xml_frag, output->root);
         | 
| 588 | 
            +
              rb_iv_set(doc_fragment, "@quirks_mode", INT2NUM(output->document->v.document.doc_type_quirks_mode));
         | 
| 589 | 
            +
              add_errors(output, doc_fragment, args->input, rb_utf8_str_new_static("#fragment", 9));
         | 
| 590 | 
            +
              return Qnil;
         | 
| 591 | 
            +
            }
         | 
| 592 | 
            +
             | 
| 593 | 
            +
            // Initialize the Nokogumbo class and fetch constants we will use later.
         | 
| 594 | 
            +
            void
         | 
| 595 | 
            +
            noko_init_gumbo(void)
         | 
| 596 | 
            +
            {
         | 
| 597 | 
            +
              // Class constants.
         | 
| 598 | 
            +
              cNokogiriHtml5Document = rb_define_class_under(mNokogiriHtml5, "Document", cNokogiriHtml4Document);
         | 
| 599 | 
            +
              rb_gc_register_mark_object(cNokogiriHtml5Document);
         | 
| 600 | 
            +
             | 
| 601 | 
            +
              // Interned symbols.
         | 
| 602 | 
            +
              internal_subset = rb_intern_const("internal_subset");
         | 
| 603 | 
            +
              parent = rb_intern_const("parent");
         | 
| 604 | 
            +
             | 
| 605 | 
            +
              // Define Nokogumbo module with parse and fragment methods.
         | 
| 606 | 
            +
              rb_define_singleton_method(mNokogiriGumbo, "parse", noko_gumbo_s_parse, -1);
         | 
| 607 | 
            +
              rb_define_singleton_method(mNokogiriGumbo, "fragment", noko_gumbo_s_fragment, -1);
         | 
| 608 | 
            +
            }
         | 
| 609 | 
            +
             | 
| 610 | 
            +
            // vim: set shiftwidth=2 softtabstop=2 tabstop=8 expandtab:
         |