RubyGems - nokogumbo - Versions diffs - 0.9 → 0.10 - Mend

nokogumbo 0.9 → 0.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

checksums.yaml +15 -0
data/README.md +15 -10
data/ext/nokogumboc/extconf.rb +34 -0
data/ext/nokogumboc/nokogumbo.c +192 -0
data/gumbo-parser/src/attribute.c +3 -3
data/gumbo-parser/src/attribute.h +2 -2
data/gumbo-parser/src/char_ref.c +8 -8
data/gumbo-parser/src/char_ref.h +3 -3
data/gumbo-parser/src/error.h +13 -13
data/gumbo-parser/src/gumbo.h +21 -22
data/gumbo-parser/src/insertion_mode.h +1 -1
data/gumbo-parser/src/parser.c +1 -1
data/gumbo-parser/src/parser.h +9 -9
data/gumbo-parser/src/string_buffer.c +8 -8
data/gumbo-parser/src/string_buffer.h +10 -11
data/gumbo-parser/src/string_piece.c +2 -2
data/gumbo-parser/src/string_piece.h +2 -2
data/gumbo-parser/src/token_type.h +1 -1
data/gumbo-parser/src/tokenizer.c +2 -2
data/gumbo-parser/src/tokenizer.h +12 -12
data/gumbo-parser/src/tokenizer_states.h +1 -1
data/gumbo-parser/src/utf8.h +7 -7
data/gumbo-parser/src/util.h +5 -4
data/gumbo-parser/src/vector.c +11 -9
data/gumbo-parser/src/vector.h +11 -8
metadata +27 -31
data/work/extconf.rb +0 -21
data/work/nokogumbo.c +0 -100

checksums.yaml ADDED Viewed

@@ -0,0 +1,15 @@
+---
+!binary "U0hBMQ==":
+  metadata.gz: !binary |-
+    MDQzNzc1N2Q4MDg5Y2Q3OTgyMTg1MGExOGViNmIxOGIzODcyYzgwOQ==
+  data.tar.gz: !binary |-
+    YTYyNmMzZTEyNjFjODYyMjY3MTRmNmU1YWIwZjMyZTZiN2ZkMGU4Yg==
+!binary "U0hBNTEy":
+  metadata.gz: !binary |-
+    NjMzYjg0YmU2NzZkZjUyN2YxZDZjOTM3ZWM5MDkxMjk0OWE5YjA3M2NjY2Y3
+    NDgwZTgxN2U0NWIyNzBkMmNlN2E4NjA1ODhmNGNiNTBhYzE3YWJkOTRiNjg3
+    YmMyZDVkYzIxZjljM2FjNDBhMmFiZTZmOWI1ODgzNmQwNzM4ZGY=
+  data.tar.gz: !binary |-
+    NjgzNTljOWNmMjY1MDBkZDc4ZmYzZGZlZDg2MDMxNzY0YTM2OWI3Y2Q1NDdl
+    M2FhNWE1M2U5YTQ0MjdlNTc0ZWM1OTg3NTkxMzdlNGU3MDdkNzBhZmNhZTUy
+    OTkzYWE0MjBiZTYyNmEyMGJiNDc2MTE4YWFiYWZkM2YwZDRlNTk=

data/README.md CHANGED Viewed

@@ -34,13 +34,16 @@ Notes
 * The `Nokogiri::HTML5.parse` function takes a string and passes it to the
 <code>gumbo_parse_with_options</code> method, using the default options.
-The resulting Gumbo parse tree is the walked, producing a
-[libxml2](http://xmlsoft.org/html/)
-[xmlDoc](http://xmlsoft.org/html/libxml-tree.html#xmlDoc).
-The original Gumbo parse tree is then destroyed, and single Nokogiri Ruby
-object is constructed to wrap the xmlDoc structure.  Nokogiri only produces
-Ruby objects as necessary, so all searching is done using the underlying
-libxml2 libraries.
+The resulting Gumbo parse tree is the walked.
+  * If the necessary Nokogiri and [libxml2](http://xmlsoft.org/html/) headers
+    can be found at installation time then an
+    [xmlDoc](http://xmlsoft.org/html/libxml-tree.html#xmlDoc) tree is produced
+    and a single Nokogiri Ruby object is constructed to wrap the xmlDoc
+    structure.  Nokogiri only produces Ruby objects as necessary, so all
+    searching is done using the underlying libxml2 libraries.
+  * If the necessary headers are not present at installation time, then
+    Nokogiri Ruby objects are created for each Gumbo node.  Other than
+    memory usage and CPU time, the results should be equivalent.
 * The `Nokogiri::HTML5.get` function takes care of following redirects,
 https, and determining the character encoding of the result, based on the
@@ -57,9 +60,11 @@ parser will be downloaded and compiled into the Gem itself.
 Installation
 ============
-* Execute `rake gem`
-* [sudo] gem install pkg/nokogumbo*.gem
+    git clone --recursive https://github.com/rubys/nokogumbo.git
+    cd nokogumbo
+    bundle install
+    rake gem
+    gem install pkg/nokogumbo*.gem
 Related efforts
 ============

data/ext/nokogumboc/extconf.rb ADDED Viewed

@@ -0,0 +1,34 @@
+require 'mkmf'
+$CFLAGS += " -std=c99"
+if have_library('xml2', 'xmlNewDoc')
+  # libxml2 libraries from http://www.xmlsoft.org/
+  pkg_config('libxml-2.0')
+  # nokogiri configuration from gem install
+  nokogiri_lib = Gem.find_files('nokogiri').
+    sort_by {|name| name[/nokogiri-([\d.]+)/,1].split('.').map(&:to_i)}.last
+  if nokogiri_lib
+    nokogiri_ext = nokogiri_lib.sub(%r(lib/nokogiri(.rb)?$), 'ext/nokogiri')
+    # if that doesn't work, try workarounds found in Nokogiri's extconf
+    unless find_header('nokogiri.h', nokogiri_ext)
+      require "#{nokogiri_ext}/extconf.rb"
+    end
+    # if found, enable direct calls to Nokogiri (and libxml2)
+    $CFLAGS += ' -DNGLIB' if find_header('nokogiri.h', nokogiri_ext)
+  end
+end
+# add in gumbo-parser source from github if not already installed
+unless have_library('gumbo', 'gumbo_parse')
+  rakehome = ENV['RAKEHOME'] || File.expand_path('../..')
+  unless File.exist? "#{rakehome}/ext/nokogumboc/gumbo.h"
+    require 'fileutils'
+    FileUtils.cp Dir["#{rakehome}/gumbo-parser/src/*"],
+      "#{rakehome}/ext/nokogumboc"
+  end
+end
+create_makefile('nokogumboc')

data/ext/nokogumboc/nokogumbo.c ADDED Viewed

@@ -0,0 +1,192 @@
+//
+// nokogumbo.c defines the following:
+//
+//   class Nokogumbo
+//     def parse(utf8_string) # returns Nokogiri::HTML::Document
+//   end
+//
+// Processing starts by calling gumbo_parse_with_options.  The resulting
+// document tree is then walked:
+//
+//  * if Nokogiri and libxml2 headers are available at compile time,
+//    (ifdef NGLIB) then a parallel libxml2 tree is constructed, and the
+//    final document is then wrapped using Nokogiri_wrap_xml_document.
+//    This approach reduces memory and CPU requirements as Ruby objects
+//    are only built when necessary.
+//
+//  * if the necessary headers are not available at compile time, Nokogiri
+//    methods are called instead, producing the equivalent functionality.
+//
+#include <ruby.h>
+#include <gumbo.h>
+// class constants
+static VALUE Document;
+#ifdef NGLIB
+#include <nokogiri.h>
+#include <libxml/tree.h>
+#define NIL NULL
+#define CONST_CAST (xmlChar const*)
+#else
+#define NIL 0
+#define CONST_CAST
+// more class constants
+static VALUE Element;
+static VALUE Text;
+static VALUE CDATA;
+static VALUE Comment;
+// interned symbols
+static VALUE new;
+static VALUE set_attribute;
+static VALUE add_child;
+static VALUE internal_subset;
+static VALUE remove_;
+static VALUE create_internal_subset;
+// map libxml2 types to Ruby VALUE
+#define xmlNodePtr VALUE
+#define xmlDocPtr VALUE
+// redefine libxml2 API as Ruby function calls
+#define xmlNewDocNode(doc, ns, name, content) \
+  rb_funcall(Element, new, 2, rb_str_new2(name), doc)
+#define xmlNewProp(element, name, value) \
+  rb_funcall(element, set_attribute, 2, rb_str_new2(name), rb_str_new2(value))
+#define xmlNewDocText(doc, text) \
+  rb_funcall(Text, new, 2, rb_str_new2(text), doc)
+#define xmlNewCDataBlock(doc, content, length) \
+  rb_funcall(CDATA, new, 2, rb_str_new(content, length), doc)
+#define xmlNewDocComment(doc, text) \
+  rb_funcall(Comment, new, 2, doc, rb_str_new2(text))
+#define xmlAddChild(element, node) \
+  rb_funcall(element, add_child, 1, node)
+#define xmlDocSetRootElement(doc, root) \
+  rb_funcall(doc, add_child, 1, root)
+#define xmlCreateIntSubset(doc, name, external, system) \
+  rb_funcall(doc, create_internal_subset, 3, rb_str_new2(name), \
+    (external ? rb_str_new2(external) : Qnil), \
+    (system ? rb_str_new2(system) : Qnil));
+#define Nokogiri_wrap_xml_document(klass, doc) \
+  doc
+// remove internal subset from newly created documents
+static VALUE xmlNewDoc(char* version) {
+  VALUE doc = rb_funcall(Document, new, 0);
+  rb_funcall(rb_funcall(doc, internal_subset, 0), remove_, 0);
+  return doc;
+}
+#endif
+// Build a Nokogiri Element for a given GumboElement (recursively)
+static xmlNodePtr walk_tree(xmlDocPtr document, GumboElement *node) {
+  // determine tag name for a given node
+  xmlNodePtr element;
+  if (node->tag != GUMBO_TAG_UNKNOWN) {
+    element = xmlNewDocNode(document, NIL,
+      CONST_CAST gumbo_normalized_tagname(node->tag), NIL);
+  } else {
+    GumboStringPiece tag = node->original_tag;
+    gumbo_tag_from_original_text(&tag);
+    char name[tag.length+1];
+    strncpy(name, tag.data, tag.length);
+    name[tag.length] = '\0';
+    element = xmlNewDocNode(document, NIL, CONST_CAST name, NIL);
+  }
+  // add in the attributes
+  GumboVector* attrs = &node->attributes;
+  for (int i=0; i < attrs->length; i++) {
+    GumboAttribute *attr = attrs->data[i];
+    xmlNewProp(element, CONST_CAST attr->name, CONST_CAST attr->value);
+  }
+  // add in the children
+  GumboVector* children = &node->children;
+  for (int i=0; i < children->length; i++) {
+    GumboNode* child = children->data[i];
+    xmlNodePtr node = NIL;
+    switch (child->type) {
+      case GUMBO_NODE_ELEMENT:
+        node = walk_tree(document, &child->v.element);
+        break;
+      case GUMBO_NODE_WHITESPACE:
+      case GUMBO_NODE_TEXT:
+        node = xmlNewDocText(document, CONST_CAST child->v.text.text);
+        break;
+      case GUMBO_NODE_CDATA:
+        node = xmlNewCDataBlock(document,
+          CONST_CAST child->v.text.original_text.data,
+          (int) child->v.text.original_text.length);
+        break;
+      case GUMBO_NODE_COMMENT:
+        node = xmlNewDocComment(document, CONST_CAST child->v.text.text);
+        break;
+      case GUMBO_NODE_DOCUMENT:
+        break; // should never happen -- ignore
+    }
+    if (node) xmlAddChild(element, node);
+  }
+  return element;
+}
+// Parse a string using gumbo_parse into a Nokogiri document
+static VALUE parse(VALUE self, VALUE string) {
+  GumboOutput *output = gumbo_parse_with_options(
+    &kGumboDefaultOptions, RSTRING_PTR(string),
+    (size_t) RSTRING_LEN(string)
+  );
+  xmlDocPtr doc = xmlNewDoc(CONST_CAST "1.0");
+  xmlNodePtr root = walk_tree(doc, &output->root->v.element);
+  xmlDocSetRootElement(doc, root);
+  if (output->document->v.document.has_doctype) {
+    const char *public = output->document->v.document.public_identifier;
+    const char *system = output->document->v.document.system_identifier;
+    xmlCreateIntSubset(doc, CONST_CAST "html",
+      (strlen(public) ? CONST_CAST public : NIL),
+      (strlen(system) ? CONST_CAST system : NIL));
+  }
+  gumbo_destroy_output(&kGumboDefaultOptions, output);
+  return Nokogiri_wrap_xml_document(Document, doc);
+}
+// Initialize the Nokogumbo class and fetch constants we will use later
+void Init_nokogumboc() {
+  rb_funcall(rb_mKernel, rb_intern("gem"), 1, rb_str_new2("nokogiri"));
+  rb_require("nokogiri");
+  // class constants
+  VALUE Nokogiri = rb_const_get(rb_cObject, rb_intern("Nokogiri"));
+  VALUE HTML = rb_const_get(Nokogiri, rb_intern("HTML"));
+  Document = rb_const_get(HTML, rb_intern("Document"));
+#ifndef NGLIB
+  // more class constants
+  VALUE XML = rb_const_get(Nokogiri, rb_intern("XML"));
+  Element = rb_const_get(XML, rb_intern("Element"));
+  Text = rb_const_get(XML, rb_intern("Text"));
+  CDATA = rb_const_get(XML, rb_intern("CDATA"));
+  Comment = rb_const_get(XML, rb_intern("Comment"));
+  // interned symbols
+  new = rb_intern("new");
+  set_attribute = rb_intern("set_attribute");
+  add_child = rb_intern("add_child");
+  internal_subset = rb_intern("internal_subset");
+  remove_ = rb_intern("remove");
+  create_internal_subset = rb_intern("create_internal_subset");
+#endif
+  // define Nokogumbo class with a singleton parse method
+  VALUE Gumbo = rb_define_class("Nokogumbo", rb_cObject);
+  rb_define_singleton_method(Gumbo, "parse", parse, 1);
+}

data/gumbo-parser/src/attribute.c CHANGED Viewed

@@ -23,10 +23,10 @@
 #include "util.h"
-struct _GumboParser;
+struct GumboInternalParser;
 GumboAttribute* gumbo_get_attribute(
-    const struct _GumboVector* attributes, const char* name) {
+    const GumboVector* attributes, const char* name) {
   for (int i = 0; i < attributes->length; ++i) {
     GumboAttribute* attr = attributes->data[i];
     if (!strcasecmp(attr->name, name)) {
@@ -37,7 +37,7 @@ GumboAttribute* gumbo_get_attribute(
 }
 void gumbo_destroy_attribute(
-    struct _GumboParser* parser, GumboAttribute* attribute) {
+    struct GumboInternalParser* parser, GumboAttribute* attribute) {
   gumbo_parser_deallocate(parser, (void*) attribute->name);
   gumbo_parser_deallocate(parser, (void*) attribute->value);
   gumbo_parser_deallocate(parser, (void*) attribute);

data/gumbo-parser/src/attribute.h CHANGED Viewed

@@ -23,12 +23,12 @@
 extern "C" {
 #endif
-struct _GumboParser;
+struct GumboInternalParser;
 // Release the memory used for an GumboAttribute, including the attribute
 // itself.
 void gumbo_destroy_attribute(
-    struct _GumboParser* parser, GumboAttribute* attribute);
+    struct GumboInternalParser* parser, GumboAttribute* attribute);
 #ifdef __cplusplus
 }

data/gumbo-parser/src/char_ref.c CHANGED Viewed

@@ -26,7 +26,7 @@
 #include "utf8.h"
 #include "util.h"
-struct _GumboParser;
+struct GumboInternalParser;
 const int kGumboNoChar = -1;
@@ -2351,7 +2351,7 @@ static int parse_digit(int c, bool allow_hex) {
 }
 static void add_no_digit_error(
-    struct _GumboParser* parser, Utf8Iterator* input) {
+    struct GumboInternalParser* parser, Utf8Iterator* input) {
   GumboError* error = gumbo_add_error(parser);
   if (!error) {
     return;
@@ -2361,7 +2361,7 @@ static void add_no_digit_error(
 }
 static void add_codepoint_error(
-    struct _GumboParser* parser, Utf8Iterator* input,
+    struct GumboInternalParser* parser, Utf8Iterator* input,
     GumboErrorType type, int codepoint) {
   GumboError* error = gumbo_add_error(parser);
   if (!error) {
@@ -2373,7 +2373,7 @@ static void add_codepoint_error(
 }
 static void add_named_reference_error(
-    struct _GumboParser* parser, Utf8Iterator* input,
+    struct GumboInternalParser* parser, Utf8Iterator* input,
     GumboErrorType type, GumboStringPiece text) {
   GumboError* error = gumbo_add_error(parser);
   if (!error) {
@@ -2394,7 +2394,7 @@ static int maybe_replace_codepoint(int codepoint) {
 }
 static bool consume_numeric_ref(
-    struct _GumboParser* parser, Utf8Iterator* input, int* output) {
+    struct GumboInternalParser* parser, Utf8Iterator* input, int* output) {
   utf8iterator_next(input);
   bool is_hex = false;
   int c = utf8iterator_current(input);
@@ -2475,7 +2475,7 @@ static bool is_legal_attribute_char_next(Utf8Iterator* input) {
 }
 static bool maybe_add_invalid_named_reference(
-    struct _GumboParser* parser, Utf8Iterator* input) {
+    struct GumboInternalParser* parser, Utf8Iterator* input) {
   // The iterator will always be reset in this code path, so we don't need to
   // worry about consuming characters.
   const char* start = utf8iterator_get_char_pointer(input);
@@ -2498,7 +2498,7 @@ static bool maybe_add_invalid_named_reference(
 }
 static bool consume_named_ref(
-    struct _GumboParser* parser, Utf8Iterator* input, bool is_in_attribute,
+    struct GumboInternalParser* parser, Utf8Iterator* input, bool is_in_attribute,
     OneOrTwoCodepoints* output) {
   assert(output->first == kGumboNoChar);
   const NamedCharRef* char_ref = find_named_char_ref(input);
@@ -2530,7 +2530,7 @@ static bool consume_named_ref(
 }
 bool consume_char_ref(
-    struct _GumboParser* parser, struct _Utf8Iterator* input,
+    struct GumboInternalParser* parser, struct GumboInternalUtf8Iterator* input,
     int additional_allowed_char, bool is_in_attribute,
     OneOrTwoCodepoints* output) {
   utf8iterator_mark(input);

data/gumbo-parser/src/char_ref.h CHANGED Viewed

@@ -27,8 +27,8 @@
 extern "C" {
 #endif
-struct _GumboParser;
-struct _Utf8Iterator;
+struct GumboInternalParser;
+struct GumboInternalUtf8Iterator;
 // Value that indicates no character was produced.
 extern const int kGumboNoChar;
@@ -50,7 +50,7 @@ typedef struct {
 // space for the "additional allowed char" when the spec says "with no
 // additional allowed char".  Returns false on parse error, true otherwise.
 bool consume_char_ref(
-    struct _GumboParser* parser, struct _Utf8Iterator* input,
+    struct GumboInternalParser* parser, struct GumboInternalUtf8Iterator* input,
     int additional_allowed_char, bool is_in_attribute,
     OneOrTwoCodepoints* output);

data/gumbo-parser/src/error.h CHANGED Viewed

@@ -30,7 +30,7 @@
 extern "C" {
 #endif
-struct _GumboParser;
+struct GumboInternalParser;
 typedef enum {
   GUMBO_ERR_UTF8_INVALID,
@@ -78,7 +78,7 @@ typedef enum {
 } GumboErrorType;
 // Additional data for duplicated attributes.
-typedef struct _GumboDuplicateAttrError {
+typedef struct GumboInternalDuplicateAttrError {
   // The name of the attribute.  Owned by this struct.
   const char* name;
@@ -114,7 +114,7 @@ typedef enum {
 // Additional data for tokenizer errors.
 // This records the current state and codepoint encountered - this is usually
 // enough to reconstruct what went wrong and provide a friendly error message.
-typedef struct _GumboTokenizerError {
+typedef struct GumboInternalTokenizerError {
   // The bad codepoint encountered.
   int codepoint;
@@ -123,7 +123,7 @@ typedef struct _GumboTokenizerError {
 } GumboTokenizerError;
 // Additional data for parse errors.
-typedef struct _GumboParserError {
+typedef struct GumboInternalParserError {
   // The type of input token that resulted in this error.
   GumboTokenType input_type;
@@ -142,7 +142,7 @@ typedef struct _GumboParserError {
 // The overall error struct representing an error in decoding/tokenizing/parsing
 // the HTML.  This contains an enumerated type flag, a source position, and then
 // a union of fields containing data specific to the error.
-typedef struct _GumboError {
+typedef struct GumboInternalError {
   // The type of error.
   GumboErrorType type;
@@ -176,23 +176,23 @@ typedef struct _GumboError {
     // Parser state, for GUMBO_ERR_PARSER and
     // GUMBO_ERR_UNACKNOWLEDGE_SELF_CLOSING_TAG.
-    struct _GumboParserError parser;
+    struct GumboInternalParserError parser;
   } v;
 } GumboError;
 // Adds a new error to the parser's error list, and returns a pointer to it so
 // that clients can fill out the rest of its fields.  May return NULL if we're
 // already over the max_errors field specified in GumboOptions.
-GumboError* gumbo_add_error(struct _GumboParser* parser);
+GumboError* gumbo_add_error(struct GumboInternalParser* parser);
 // Initializes the errors vector in the parser.
-void gumbo_init_errors(struct _GumboParser* errors);
+void gumbo_init_errors(struct GumboInternalParser* errors);
 // Frees all the errors in the 'errors_' field of the parser.
-void gumbo_destroy_errors(struct _GumboParser* errors);
+void gumbo_destroy_errors(struct GumboInternalParser* errors);
 // Frees the memory used for a single GumboError.
-void gumbo_error_destroy(struct _GumboParser* parser, GumboError* error);
+void gumbo_error_destroy(struct GumboInternalParser* parser, GumboError* error);
 // Prints an error to a string.  This fills an empty GumboStringBuffer with a
 // freshly-allocated buffer containing the error message text.  The caller is
@@ -200,7 +200,7 @@ void gumbo_error_destroy(struct _GumboParser* parser, GumboError* error);
 // the allocator specified in the GumboParser config and hence should be freed
 // by gumbo_parser_deallocate().)
 void gumbo_error_to_string(
-    struct _GumboParser* parser, const GumboError* error,
+    struct GumboInternalParser* parser, const GumboError* error,
     GumboStringBuffer* output);
 // Prints a caret diagnostic to a string.  This fills an empty GumboStringBuffer
@@ -209,13 +209,13 @@ void gumbo_error_to_string(
 // allocated with the allocator specified in the GumboParser config and hence
 // should be freed by gumbo_parser_deallocate().)
 void gumbo_caret_diagnostic_to_string(
-    struct _GumboParser* parser, const GumboError* error,
+    struct GumboInternalParser* parser, const GumboError* error,
     const char* source_text, GumboStringBuffer* output);
 // Like gumbo_caret_diagnostic_to_string, but prints the text to stdout instead
 // of writing to a string.
 void gumbo_print_caret_diagnostic(
-    struct _GumboParser* parser, const GumboError* error,
+    struct GumboInternalParser* parser, const GumboError* error,
     const char* source_text);
 #ifdef __cplusplus

data/gumbo-parser/src/gumbo.h CHANGED Viewed

@@ -59,7 +59,7 @@ extern "C" {
  * buffer of bytes), while the column field is often used to reference a
  * particular column on a printable display, which nowadays is usually UTF-8.
  */
-typedef struct _GumboSourcePosition {
+typedef struct {
   unsigned int line;
   unsigned int column;
   unsigned int offset;
@@ -81,7 +81,7 @@ extern const GumboSourcePosition kGumboEmptySourcePosition;
  * Clients should assume that it is not NUL-terminated, and should always use
  * explicit lengths when manipulating them.
  */
-typedef struct _GumboStringPiece {
+typedef struct {
   /** A pointer to the beginning of the string.  NULL iff length == 0. */
   const char* data;
@@ -116,7 +116,7 @@ bool gumbo_string_equals_ignore_case(
  * library.  Iteration can be done through inspecting the structure directly in
  * a for-loop.
  */
-typedef struct _GumboVector {
+typedef struct {
   /** Data elements.  This points to a dynamically-allocated array of capacity
    * elements, each a void* to the element itself.
    */
@@ -151,7 +151,7 @@ int gumbo_vector_index_of(GumboVector* vector, void* element);
  * efficiency benefits, by letting the parser work with enums instead of
  * strings.
  */
-typedef enum _GumboTag {
+typedef enum {
   // http://www.whatwg.org/specs/web-apps/current-work/multipage/semantics.html#the-root-element
   GUMBO_TAG_HTML,
   // http://www.whatwg.org/specs/web-apps/current-work/multipage/semantics.html#document-metadata
@@ -365,7 +365,7 @@ GumboTag gumbo_tag_enum(const char* tagname);
  * HTML includes special handling for XLink, XML, and XMLNS namespaces on
  * attributes.  Everything else goes in the generatic "NONE" namespace.
  */
-typedef enum _GumboAttributeNamespaceEnum {
+typedef enum {
   GUMBO_ATTR_NAMESPACE_NONE,
   GUMBO_ATTR_NAMESPACE_XLINK,
   GUMBO_ATTR_NAMESPACE_XML,
@@ -377,7 +377,7 @@ typedef enum _GumboAttributeNamespaceEnum {
  * name-value pair, but also includes information about source locations and
  * original source text.
  */
-typedef struct _GumboAttribute {
+typedef struct {
   /**
    * The namespace for the attribute.  This will usually be
    * GUMBO_ATTR_NAMESPACE_NONE, but some XLink/XMLNS/XML attributes take special
@@ -438,14 +438,13 @@ typedef struct _GumboAttribute {
  * and return it, or NULL if no such attribute exists.  This uses a
  * case-insensitive match, as HTML is case-insensitive.
  */
-GumboAttribute* gumbo_get_attribute(
-    const struct _GumboVector* attrs, const char* name);
+GumboAttribute* gumbo_get_attribute(const GumboVector* attrs, const char* name);
 /**
  * Enum denoting the type of node.  This determines the type of the node.v
  * union.
  */
-typedef enum _GumboNodeType {
+typedef enum {
   /** Document node.  v will be a GumboDocument. */
   GUMBO_NODE_DOCUMENT,
   /** Element node.  v will be a GumboElement. */
@@ -464,10 +463,10 @@ typedef enum _GumboNodeType {
  * Forward declaration of GumboNode so it can be used recursively in
  * GumboNode.parent.
  */
-typedef struct _GumboNode GumboNode;
+typedef struct GumboInternalNode GumboNode;
 /** http://www.whatwg.org/specs/web-apps/current-work/complete/dom.html#quirks-mode */
-typedef enum _GumboQuirksModeEnum {
+typedef enum {
   GUMBO_DOCTYPE_NO_QUIRKS,
   GUMBO_DOCTYPE_QUIRKS,
   GUMBO_DOCTYPE_LIMITED_QUIRKS
@@ -480,7 +479,7 @@ typedef enum _GumboQuirksModeEnum {
  * <math> tag is in the MathML namespace, and anything else is inside the HTML
  * namespace.  No other namespaces are supported, so this can be an enum only.
  */
-typedef enum _GumboNamespaceEnum {
+typedef enum {
   GUMBO_NAMESPACE_HTML,
   GUMBO_NAMESPACE_SVG,
   GUMBO_NAMESPACE_MATHML
@@ -494,7 +493,7 @@ typedef enum _GumboNamespaceEnum {
  * may not be allowed by a style guide, or track the prevalence of incorrect or
  * tricky HTML code.
  */
-typedef enum _GumboParseFlags {
+typedef enum {
   /**
    * A normal node - both start and end tags appear in the source, nothing has
    * been reparented.
@@ -568,7 +567,7 @@ typedef enum _GumboParseFlags {
 /**
  * Information specific to document nodes.
  */
-typedef struct _GumboDocument {
+typedef struct {
   /**
    * An array of GumboNodes, containing the children of this element.  This will
    * normally consist of the <html> element and any comment nodes found.
@@ -595,7 +594,7 @@ typedef struct _GumboDocument {
  * The struct used to represent TEXT, CDATA, COMMENT, and WHITESPACE elements.
  * This contains just a block of text and its position.
  */
-typedef struct _GumboText {
+typedef struct {
   /**
    * The text of this node, after entities have been parsed and decoded.  For
    * comment/cdata nodes, this does not include the comment delimiters.
@@ -619,7 +618,7 @@ typedef struct _GumboText {
  * The struct used to represent all HTML elements.  This contains information
  * about the tag, attributes, and child nodes.
  */
-typedef struct _GumboElement {
+typedef struct {
   /**
    * An array of GumboNodes, containing the children of this element.  Pointers
    * are owned.
@@ -664,7 +663,7 @@ typedef struct _GumboElement {
  * A supertype for GumboElement and GumboText, so that we can include one
  * generic type in lists of children and cast as necessary to subtypes.
  */
-struct _GumboNode {
+struct GumboInternalNode {
   /** The type of node that this is. */
   GumboNodeType type;
@@ -710,7 +709,7 @@ typedef void (*GumboDeallocatorFunction)(void* userdata, void* ptr);
  * handling, etc.
  * Use kGumboDefaultOptions for sensible defaults, and only set what you need.
  */
-typedef struct _GumboOptions {
+typedef struct GumboInternalOptions {
   /** A memory allocator function.  Default: malloc. */
   GumboAllocatorFunction allocator;
@@ -749,7 +748,7 @@ typedef struct _GumboOptions {
 extern const GumboOptions kGumboDefaultOptions;
 /** The output struct containing the results of the parse. */
-typedef struct _GumboOutput {
+typedef struct GumboInternalOutput {
   /**
    * Pointer to the document node.  This is a GumboNode of type NODE_DOCUMENT
    * that contains the entire document as its child.
@@ -779,18 +778,18 @@ typedef struct _GumboOutput {
  *
  * This doesn't support buffers longer than 4 gigabytes.
  */
-struct _GumboOutput* gumbo_parse(const char* buffer);
+GumboOutput* gumbo_parse(const char* buffer);
 /**
  * Extended version of gumbo_parse that takes an explicit options structure,
  * buffer, and length.
  */
-struct _GumboOutput* gumbo_parse_with_options(
+GumboOutput* gumbo_parse_with_options(
     const GumboOptions* options, const char* buffer, size_t buffer_length);
 /** Release the memory used for the parse tree & parse errors. */
 void gumbo_destroy_output(
-    const struct _GumboOptions* options, GumboOutput* output);
+    const GumboOptions* options, GumboOutput* output);
 #ifdef __cplusplus