nokogumbo 0.8 → 0.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/work/extconf.rb +7 -4
- data/work/nokogumbo.c +14 -11
- metadata +1 -1
data/work/extconf.rb
CHANGED
@@ -4,10 +4,13 @@ $CFLAGS = " -std=c99"
|
|
4
4
|
# libxml2 libraries from http://www.xmlsoft.org/
|
5
5
|
pkg_config('libxml-2.0')
|
6
6
|
|
7
|
-
# nokogiri
|
8
|
-
nokogiri_lib = Gem.find_files('nokogiri').
|
9
|
-
nokogiri_ext = nokogiri_lib.sub(%r(lib/nokogiri
|
10
|
-
find_header('nokogiri.h', nokogiri_ext)
|
7
|
+
# nokogiri configuration from gem install
|
8
|
+
nokogiri_lib = Gem.find_files('nokogiri').sort.last or gem 'nokogiri'
|
9
|
+
nokogiri_ext = nokogiri_lib.sub(%r(lib/nokogiri(.rb)?$), 'ext/nokogiri')
|
10
|
+
unless find_header('nokogiri.h', nokogiri_ext)
|
11
|
+
require "#{nokogiri_ext}/extconf.rb"
|
12
|
+
find_header('nokogiri.h', nokogiri_ext)
|
13
|
+
end
|
11
14
|
|
12
15
|
# add in gumbo-parser source from github if not already installed
|
13
16
|
unless have_library('gumbo', 'gumbo_parse') or File.exist? 'work/gumbo.h'
|
data/work/nokogumbo.c
CHANGED
@@ -3,6 +3,8 @@
|
|
3
3
|
#include <nokogiri.h>
|
4
4
|
#include <libxml/tree.h>
|
5
5
|
|
6
|
+
#define CONST_CAST (xmlChar const*)
|
7
|
+
|
6
8
|
// class constants
|
7
9
|
static VALUE Document;
|
8
10
|
|
@@ -11,7 +13,7 @@ static xmlNodePtr walk_tree(xmlDocPtr document, GumboElement *node) {
|
|
11
13
|
// determine tag name for a given node
|
12
14
|
xmlNodePtr element;
|
13
15
|
if (node->tag != GUMBO_TAG_UNKNOWN) {
|
14
|
-
element = xmlNewNode(NULL,
|
16
|
+
element = xmlNewNode(NULL, CONST_CAST gumbo_normalized_tagname(node->tag));
|
15
17
|
} else {
|
16
18
|
GumboStringPiece tag = node->original_tag;
|
17
19
|
gumbo_tag_from_original_text(&tag);
|
@@ -25,7 +27,7 @@ static xmlNodePtr walk_tree(xmlDocPtr document, GumboElement *node) {
|
|
25
27
|
GumboVector* attrs = &node->attributes;
|
26
28
|
for (int i=0; i < attrs->length; i++) {
|
27
29
|
GumboAttribute *attr = attrs->data[i];
|
28
|
-
xmlNewProp(element,
|
30
|
+
xmlNewProp(element, CONST_CAST attr->name, CONST_CAST attr->value);
|
29
31
|
}
|
30
32
|
|
31
33
|
// add in the children
|
@@ -41,15 +43,15 @@ static xmlNodePtr walk_tree(xmlDocPtr document, GumboElement *node) {
|
|
41
43
|
break;
|
42
44
|
case GUMBO_NODE_WHITESPACE:
|
43
45
|
case GUMBO_NODE_TEXT:
|
44
|
-
node = xmlNewText(
|
46
|
+
node = xmlNewText(CONST_CAST child->v.text.text);
|
45
47
|
break;
|
46
48
|
case GUMBO_NODE_CDATA:
|
47
49
|
node = xmlNewCDataBlock(document,
|
48
|
-
|
49
|
-
child->v.text.original_text.length);
|
50
|
+
CONST_CAST child->v.text.original_text.data,
|
51
|
+
(int) child->v.text.original_text.length);
|
50
52
|
break;
|
51
53
|
case GUMBO_NODE_COMMENT:
|
52
|
-
node = xmlNewComment(
|
54
|
+
node = xmlNewComment(CONST_CAST child->v.text.text);
|
53
55
|
break;
|
54
56
|
case GUMBO_NODE_DOCUMENT:
|
55
57
|
break; // should never happen -- ignore
|
@@ -64,17 +66,18 @@ static xmlNodePtr walk_tree(xmlDocPtr document, GumboElement *node) {
|
|
64
66
|
// Parse a string using gumbo_parse into a Nokogiri document
|
65
67
|
static VALUE parse(VALUE self, VALUE string) {
|
66
68
|
GumboOutput *output = gumbo_parse_with_options(
|
67
|
-
&kGumboDefaultOptions, RSTRING_PTR(string),
|
69
|
+
&kGumboDefaultOptions, RSTRING_PTR(string),
|
70
|
+
(size_t) RSTRING_LEN(string)
|
68
71
|
);
|
69
|
-
xmlDocPtr doc = xmlNewDoc(
|
72
|
+
xmlDocPtr doc = xmlNewDoc(CONST_CAST "1.0");
|
70
73
|
xmlNodePtr root = walk_tree(doc, &output->root->v.element);
|
71
74
|
xmlDocSetRootElement(doc, root);
|
72
75
|
if (output->document->v.document.has_doctype) {
|
73
76
|
const char *public = output->document->v.document.public_identifier;
|
74
77
|
const char *system = output->document->v.document.system_identifier;
|
75
|
-
xmlCreateIntSubset(doc,
|
76
|
-
(strlen(public) ? public : NULL),
|
77
|
-
(strlen(system) ? system : NULL));
|
78
|
+
xmlCreateIntSubset(doc, CONST_CAST "html",
|
79
|
+
(strlen(public) ? CONST_CAST public : NULL),
|
80
|
+
(strlen(system) ? CONST_CAST system : NULL));
|
78
81
|
}
|
79
82
|
gumbo_destroy_output(&kGumboDefaultOptions, output);
|
80
83
|
|