RubyGems - nokogumbo - Versions diffs - 1.1.14 → 1.2.0 - Mend

nokogumbo 1.1.14 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

data/README.md CHANGED Viewed

@@ -16,6 +16,14 @@ require 'nokogumbo'
 doc = Nokogiri::HTML5(string)
 ```
+An experimental _fragment_ method is also provided.  While not HTML5
+compliant, it may be useful:
+```ruby
+require 'nokogumbo'
+doc = Nokogiri::HTML5.fragment(string)
+```
 Because HTML is often fetched via the web, a convenience interface to
 HTTP get is also provided:
@@ -34,6 +42,10 @@ puts Nokogiri::HTML5.get('http://nokogiri.org').at('h1 abbr')['title']
 Notes
 -----
+* The `Nokogiri::HTML5.fragment` function takes a string and parses it
+  as a HTML5 document.  The `<html>`, `<head>`, and `<body>` elements are
+  removed from this document, and any children of these elements that remain
+  are returned as a `Nokogiri::HTML::DocumentFragment`.
 * The `Nokogiri::HTML5.parse` function takes a string and passes it to the
 <code>gumbo_parse_with_options</code> method, using the default options.
 The resulting Gumbo parse tree is then walked.

data/ext/nokogumboc/nokogumbo.c CHANGED Viewed

@@ -104,10 +104,49 @@ static xmlNodePtr walk_tree(xmlDocPtr document, GumboElement *node) {
   // add in the attributes
   GumboVector* attrs = &node->attributes;
+  char *name = NULL;
+  int namelen = 0;
+  char *ns;
   for (int i=0; i < attrs->length; i++) {
     GumboAttribute *attr = attrs->data[i];
-    xmlNewProp(element, CONST_CAST attr->name, CONST_CAST attr->value);
+    switch (attr->attr_namespace) {
+      case GUMBO_ATTR_NAMESPACE_XLINK:
+        ns = "xlink:";
+        break;
+      case GUMBO_ATTR_NAMESPACE_XML:
+        ns = "xml:";
+        break;
+      case GUMBO_ATTR_NAMESPACE_XMLNS:
+        ns = "xmlns:";
+        if (!strcmp(attr->name, "xmlns")) ns = NULL;
+        break;
+      default:
+        ns = NULL;
+    }
+    if (ns) {
+      if (strlen(ns) + strlen(attr->name) + 1 > namelen) {
+        free(name);
+        name = NULL;
+      }
+      if (!name) {
+        namelen = strlen(ns) + strlen(attr->name) + 1;
+        name = malloc(namelen);
+      }
+      strcpy(name, ns);
+      strcat(name, attr->name);
+      xmlNewProp(element, CONST_CAST name, CONST_CAST attr->value);
+    } else {
+      xmlNewProp(element, CONST_CAST attr->name, CONST_CAST attr->value);
+    }
   }
+  if (name) free(name);
   // add in the children
   GumboVector* children = &node->children;

data/lib/nokogumbo.rb CHANGED Viewed

@@ -74,6 +74,45 @@ module Nokogiri
       end
     end
+    # while fragment is on the Gumbo TODO list, simulate it by doing
+    # a full document parse and ignoring the parent <html>, <head>, and <body>
+    # tags, and collecting up the children of each.
+    def self.fragment(string)
+      doc = parse(string)
+      fragment = Nokogiri::HTML::DocumentFragment.new(doc)
+      if doc.children.length != 1 or doc.children.first.name != 'html'
+        # no HTML?  Return document as is
+        fragment = doc
+      else
+        # examine children of HTML element
+        children = doc.children.first.children
+        # head is always first.  If present, take children but otherwise
+        # ignore the head element
+        if children.length > 0 and doc.children.first.name = 'head'
+          fragment << children.shift.children
+        end
+        # body may be next, or last.  If found, take children but otherwise
+        # ignore the body element.  Also take any remaining elements, taking
+        # care to preserve order.
+        if children.length > 0 and doc.children.first.name = 'body'
+          fragment << children.shift.children
+          fragment << children
+        elsif children.length > 0 and doc.children.last.name = 'body'
+          body = children.pop
+          fragment << children
+          fragment << body.children
+        else
+          fragment << children
+        end
+      end
+      # return result
+      fragment
+    end
   private
     # Charset sniffing is a complex and controversial topic that understandably

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: nokogumbo
 version: !ruby/object:Gem::Version
-  version: 1.1.14
+  version: 1.2.0
   prerelease:
 platform: ruby
 authors: