nokogumbo 0.8 → 0.9
Sign up to get free protection for your applications and to get access to all the features.
- data/work/extconf.rb +7 -4
- data/work/nokogumbo.c +14 -11
- metadata +1 -1
data/work/extconf.rb
CHANGED
@@ -4,10 +4,13 @@ $CFLAGS = " -std=c99"
|
|
4
4
|
# libxml2 libraries from http://www.xmlsoft.org/
|
5
5
|
pkg_config('libxml-2.0')
|
6
6
|
|
7
|
-
# nokogiri
|
8
|
-
nokogiri_lib = Gem.find_files('nokogiri').
|
9
|
-
nokogiri_ext = nokogiri_lib.sub(%r(lib/nokogiri
|
10
|
-
find_header('nokogiri.h', nokogiri_ext)
|
7
|
+
# nokogiri configuration from gem install
|
8
|
+
nokogiri_lib = Gem.find_files('nokogiri').sort.last or gem 'nokogiri'
|
9
|
+
nokogiri_ext = nokogiri_lib.sub(%r(lib/nokogiri(.rb)?$), 'ext/nokogiri')
|
10
|
+
unless find_header('nokogiri.h', nokogiri_ext)
|
11
|
+
require "#{nokogiri_ext}/extconf.rb"
|
12
|
+
find_header('nokogiri.h', nokogiri_ext)
|
13
|
+
end
|
11
14
|
|
12
15
|
# add in gumbo-parser source from github if not already installed
|
13
16
|
unless have_library('gumbo', 'gumbo_parse') or File.exist? 'work/gumbo.h'
|
data/work/nokogumbo.c
CHANGED
@@ -3,6 +3,8 @@
|
|
3
3
|
#include <nokogiri.h>
|
4
4
|
#include <libxml/tree.h>
|
5
5
|
|
6
|
+
#define CONST_CAST (xmlChar const*)
|
7
|
+
|
6
8
|
// class constants
|
7
9
|
static VALUE Document;
|
8
10
|
|
@@ -11,7 +13,7 @@ static xmlNodePtr walk_tree(xmlDocPtr document, GumboElement *node) {
|
|
11
13
|
// determine tag name for a given node
|
12
14
|
xmlNodePtr element;
|
13
15
|
if (node->tag != GUMBO_TAG_UNKNOWN) {
|
14
|
-
element = xmlNewNode(NULL,
|
16
|
+
element = xmlNewNode(NULL, CONST_CAST gumbo_normalized_tagname(node->tag));
|
15
17
|
} else {
|
16
18
|
GumboStringPiece tag = node->original_tag;
|
17
19
|
gumbo_tag_from_original_text(&tag);
|
@@ -25,7 +27,7 @@ static xmlNodePtr walk_tree(xmlDocPtr document, GumboElement *node) {
|
|
25
27
|
GumboVector* attrs = &node->attributes;
|
26
28
|
for (int i=0; i < attrs->length; i++) {
|
27
29
|
GumboAttribute *attr = attrs->data[i];
|
28
|
-
xmlNewProp(element,
|
30
|
+
xmlNewProp(element, CONST_CAST attr->name, CONST_CAST attr->value);
|
29
31
|
}
|
30
32
|
|
31
33
|
// add in the children
|
@@ -41,15 +43,15 @@ static xmlNodePtr walk_tree(xmlDocPtr document, GumboElement *node) {
|
|
41
43
|
break;
|
42
44
|
case GUMBO_NODE_WHITESPACE:
|
43
45
|
case GUMBO_NODE_TEXT:
|
44
|
-
node = xmlNewText(
|
46
|
+
node = xmlNewText(CONST_CAST child->v.text.text);
|
45
47
|
break;
|
46
48
|
case GUMBO_NODE_CDATA:
|
47
49
|
node = xmlNewCDataBlock(document,
|
48
|
-
|
49
|
-
child->v.text.original_text.length);
|
50
|
+
CONST_CAST child->v.text.original_text.data,
|
51
|
+
(int) child->v.text.original_text.length);
|
50
52
|
break;
|
51
53
|
case GUMBO_NODE_COMMENT:
|
52
|
-
node = xmlNewComment(
|
54
|
+
node = xmlNewComment(CONST_CAST child->v.text.text);
|
53
55
|
break;
|
54
56
|
case GUMBO_NODE_DOCUMENT:
|
55
57
|
break; // should never happen -- ignore
|
@@ -64,17 +66,18 @@ static xmlNodePtr walk_tree(xmlDocPtr document, GumboElement *node) {
|
|
64
66
|
// Parse a string using gumbo_parse into a Nokogiri document
|
65
67
|
static VALUE parse(VALUE self, VALUE string) {
|
66
68
|
GumboOutput *output = gumbo_parse_with_options(
|
67
|
-
&kGumboDefaultOptions, RSTRING_PTR(string),
|
69
|
+
&kGumboDefaultOptions, RSTRING_PTR(string),
|
70
|
+
(size_t) RSTRING_LEN(string)
|
68
71
|
);
|
69
|
-
xmlDocPtr doc = xmlNewDoc(
|
72
|
+
xmlDocPtr doc = xmlNewDoc(CONST_CAST "1.0");
|
70
73
|
xmlNodePtr root = walk_tree(doc, &output->root->v.element);
|
71
74
|
xmlDocSetRootElement(doc, root);
|
72
75
|
if (output->document->v.document.has_doctype) {
|
73
76
|
const char *public = output->document->v.document.public_identifier;
|
74
77
|
const char *system = output->document->v.document.system_identifier;
|
75
|
-
xmlCreateIntSubset(doc,
|
76
|
-
(strlen(public) ? public : NULL),
|
77
|
-
(strlen(system) ? system : NULL));
|
78
|
+
xmlCreateIntSubset(doc, CONST_CAST "html",
|
79
|
+
(strlen(public) ? CONST_CAST public : NULL),
|
80
|
+
(strlen(system) ? CONST_CAST system : NULL));
|
78
81
|
}
|
79
82
|
gumbo_destroy_output(&kGumboDefaultOptions, output);
|
80
83
|
|