tenderlove-nokogiri 0.0.0.20081001111445

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. data/History.txt +6 -0
  2. data/Manifest.txt +105 -0
  3. data/README.txt +51 -0
  4. data/Rakefile +70 -0
  5. data/ext/nokogiri/extconf.rb +24 -0
  6. data/ext/nokogiri/html_document.c +85 -0
  7. data/ext/nokogiri/html_document.h +10 -0
  8. data/ext/nokogiri/html_sax_parser.c +32 -0
  9. data/ext/nokogiri/html_sax_parser.h +11 -0
  10. data/ext/nokogiri/native.c +35 -0
  11. data/ext/nokogiri/native.h +32 -0
  12. data/ext/nokogiri/xml_cdata.c +36 -0
  13. data/ext/nokogiri/xml_cdata.h +9 -0
  14. data/ext/nokogiri/xml_document.c +159 -0
  15. data/ext/nokogiri/xml_document.h +10 -0
  16. data/ext/nokogiri/xml_node.c +573 -0
  17. data/ext/nokogiri/xml_node.h +13 -0
  18. data/ext/nokogiri/xml_node_set.c +90 -0
  19. data/ext/nokogiri/xml_node_set.h +9 -0
  20. data/ext/nokogiri/xml_reader.c +420 -0
  21. data/ext/nokogiri/xml_reader.h +10 -0
  22. data/ext/nokogiri/xml_sax_parser.c +161 -0
  23. data/ext/nokogiri/xml_sax_parser.h +10 -0
  24. data/ext/nokogiri/xml_text.c +25 -0
  25. data/ext/nokogiri/xml_text.h +9 -0
  26. data/ext/nokogiri/xml_xpath.c +39 -0
  27. data/ext/nokogiri/xml_xpath.h +11 -0
  28. data/ext/nokogiri/xml_xpath_context.c +69 -0
  29. data/ext/nokogiri/xml_xpath_context.h +9 -0
  30. data/ext/nokogiri/xslt_stylesheet.c +83 -0
  31. data/ext/nokogiri/xslt_stylesheet.h +9 -0
  32. data/lib/nokogiri.rb +45 -0
  33. data/lib/nokogiri/css.rb +6 -0
  34. data/lib/nokogiri/css/node.rb +95 -0
  35. data/lib/nokogiri/css/parser.rb +24 -0
  36. data/lib/nokogiri/css/parser.y +198 -0
  37. data/lib/nokogiri/css/tokenizer.rb +9 -0
  38. data/lib/nokogiri/css/tokenizer.rex +63 -0
  39. data/lib/nokogiri/css/xpath_visitor.rb +153 -0
  40. data/lib/nokogiri/decorators.rb +1 -0
  41. data/lib/nokogiri/decorators/hpricot.rb +3 -0
  42. data/lib/nokogiri/decorators/hpricot/node.rb +47 -0
  43. data/lib/nokogiri/decorators/hpricot/node_set.rb +14 -0
  44. data/lib/nokogiri/decorators/hpricot/xpath_visitor.rb +13 -0
  45. data/lib/nokogiri/hpricot.rb +46 -0
  46. data/lib/nokogiri/html.rb +64 -0
  47. data/lib/nokogiri/html/builder.rb +9 -0
  48. data/lib/nokogiri/html/document.rb +9 -0
  49. data/lib/nokogiri/html/sax/parser.rb +21 -0
  50. data/lib/nokogiri/version.rb +3 -0
  51. data/lib/nokogiri/xml.rb +29 -0
  52. data/lib/nokogiri/xml/after_handler.rb +18 -0
  53. data/lib/nokogiri/xml/before_handler.rb +32 -0
  54. data/lib/nokogiri/xml/builder.rb +79 -0
  55. data/lib/nokogiri/xml/document.rb +22 -0
  56. data/lib/nokogiri/xml/node.rb +162 -0
  57. data/lib/nokogiri/xml/node_set.rb +136 -0
  58. data/lib/nokogiri/xml/reader.rb +14 -0
  59. data/lib/nokogiri/xml/sax.rb +9 -0
  60. data/lib/nokogiri/xml/sax/document.rb +59 -0
  61. data/lib/nokogiri/xml/sax/parser.rb +33 -0
  62. data/lib/nokogiri/xml/text.rb +6 -0
  63. data/lib/nokogiri/xml/xpath.rb +6 -0
  64. data/lib/nokogiri/xslt.rb +11 -0
  65. data/lib/nokogiri/xslt/stylesheet.rb +6 -0
  66. data/nokogiri.gemspec +33 -0
  67. data/test/css/test_nthiness.rb +141 -0
  68. data/test/css/test_parser.rb +214 -0
  69. data/test/css/test_tokenizer.rb +162 -0
  70. data/test/files/staff.xml +57 -0
  71. data/test/files/staff.xslt +32 -0
  72. data/test/files/tlm.html +850 -0
  73. data/test/helper.rb +70 -0
  74. data/test/hpricot/files/basic.xhtml +17 -0
  75. data/test/hpricot/files/boingboing.html +2266 -0
  76. data/test/hpricot/files/cy0.html +3653 -0
  77. data/test/hpricot/files/immob.html +400 -0
  78. data/test/hpricot/files/pace_application.html +1320 -0
  79. data/test/hpricot/files/tenderlove.html +16 -0
  80. data/test/hpricot/files/uswebgen.html +220 -0
  81. data/test/hpricot/files/utf8.html +1054 -0
  82. data/test/hpricot/files/week9.html +1723 -0
  83. data/test/hpricot/files/why.xml +19 -0
  84. data/test/hpricot/load_files.rb +7 -0
  85. data/test/hpricot/test_alter.rb +67 -0
  86. data/test/hpricot/test_builder.rb +27 -0
  87. data/test/hpricot/test_parser.rb +412 -0
  88. data/test/hpricot/test_paths.rb +15 -0
  89. data/test/hpricot/test_preserved.rb +72 -0
  90. data/test/hpricot/test_xml.rb +26 -0
  91. data/test/html/sax/test_parser.rb +27 -0
  92. data/test/html/test_builder.rb +78 -0
  93. data/test/html/test_document.rb +22 -0
  94. data/test/test_convert_xpath.rb +173 -0
  95. data/test/test_nokogiri.rb +36 -0
  96. data/test/test_reader.rb +222 -0
  97. data/test/test_xslt_transforms.rb +29 -0
  98. data/test/xml/sax/test_parser.rb +93 -0
  99. data/test/xml/test_builder.rb +16 -0
  100. data/test/xml/test_document.rb +141 -0
  101. data/test/xml/test_node.rb +148 -0
  102. data/test/xml/test_node_set.rb +54 -0
  103. data/test/xml/test_text.rb +13 -0
  104. metadata +191 -0
@@ -0,0 +1,10 @@
1
+ #ifndef NOKOGIRI_XML_READER
2
+ #define NOKOGIRI_XML_READER
3
+
4
+ #include <native.h>
5
+
6
+ void init_xml_reader();
7
+
8
+ extern VALUE cNokogiriXmlReader;
9
+
10
+ #endif
@@ -0,0 +1,161 @@
1
+ #define _GNU_SOURCE
2
+ #include <stdio.h>
3
+ #include <xml_sax_parser.h>
4
+
5
+ /*
6
+ * call-seq:
7
+ * parse_memory(data)
8
+ *
9
+ * Parse the document stored in +data+
10
+ */
11
+ static VALUE parse_memory(VALUE self, VALUE data)
12
+ {
13
+ xmlSAXHandlerPtr handler;
14
+ Data_Get_Struct(self, xmlSAXHandler, handler);
15
+ xmlSAXUserParseMemory( handler,
16
+ (void *)self,
17
+ StringValuePtr(data),
18
+ NUM2INT(rb_funcall(data, rb_intern("length"), 0))
19
+ );
20
+ return data;
21
+ }
22
+
23
+ static VALUE native_parse_file(VALUE self, VALUE data)
24
+ {
25
+ xmlSAXHandlerPtr handler;
26
+ Data_Get_Struct(self, xmlSAXHandler, handler);
27
+ xmlSAXUserParseFile( handler,
28
+ (void *)self,
29
+ StringValuePtr(data)
30
+ );
31
+ return data;
32
+ }
33
+
34
+ static void start_document(void * ctx)
35
+ {
36
+ VALUE self = (VALUE)ctx;
37
+ VALUE doc = rb_funcall(self, rb_intern("document"), 0);
38
+ rb_funcall(doc, rb_intern("start_document"), 0);
39
+ }
40
+
41
+ static void end_document(void * ctx)
42
+ {
43
+ VALUE self = (VALUE)ctx;
44
+ VALUE doc = rb_funcall(self, rb_intern("document"), 0);
45
+ rb_funcall(doc, rb_intern("end_document"), 0);
46
+ }
47
+
48
+ static void start_element(void * ctx, const xmlChar *name, const xmlChar **atts)
49
+ {
50
+ VALUE self = (VALUE)ctx;
51
+ VALUE doc = rb_funcall(self, rb_intern("document"), 0);
52
+ VALUE attributes = rb_ary_new();
53
+ const xmlChar * attr;
54
+ int i = 0;
55
+ if(atts) {
56
+ while((attr = atts[i]) != NULL) {
57
+ rb_funcall(attributes, rb_intern("<<"), 1, rb_str_new2((const char *)attr));
58
+ i++;
59
+ }
60
+ }
61
+
62
+ rb_funcall( doc,
63
+ rb_intern("start_element"),
64
+ 2,
65
+ rb_str_new2((const char *)name),
66
+ attributes
67
+ );
68
+ }
69
+
70
+ static void end_element(void * ctx, const xmlChar *name)
71
+ {
72
+ VALUE self = (VALUE)ctx;
73
+ VALUE doc = rb_funcall(self, rb_intern("document"), 0);
74
+ rb_funcall(doc, rb_intern("end_element"), 1, rb_str_new2((const char *)name));
75
+ }
76
+
77
+ static void characters_func(void * ctx, const xmlChar * ch, int len)
78
+ {
79
+ VALUE self = (VALUE)ctx;
80
+ VALUE doc = rb_funcall(self, rb_intern("document"), 0);
81
+ VALUE str = rb_str_new((const char *)ch, (long)len);
82
+ rb_funcall(doc, rb_intern("characters"), 1, str);
83
+ }
84
+
85
+ static void comment_func(void * ctx, const xmlChar * value)
86
+ {
87
+ VALUE self = (VALUE)ctx;
88
+ VALUE doc = rb_funcall(self, rb_intern("document"), 0);
89
+ VALUE str = rb_str_new2((const char *)value);
90
+ rb_funcall(doc, rb_intern("comment"), 1, str);
91
+ }
92
+
93
+ static void warning_func(void * ctx, const char *msg, ...)
94
+ {
95
+ VALUE self = (VALUE)ctx;
96
+ VALUE doc = rb_funcall(self, rb_intern("document"), 0);
97
+ char * message;
98
+
99
+ va_list args;
100
+ va_start(args, msg);
101
+ vasprintf(&message, msg, args);
102
+ va_end(args);
103
+
104
+ rb_funcall(doc, rb_intern("warning"), 1, rb_str_new2(message));
105
+ free(message);
106
+ }
107
+
108
+ static void error_func(void * ctx, const char *msg, ...)
109
+ {
110
+ VALUE self = (VALUE)ctx;
111
+ VALUE doc = rb_funcall(self, rb_intern("document"), 0);
112
+ char * message;
113
+
114
+ va_list args;
115
+ va_start(args, msg);
116
+ vasprintf(&message, msg, args);
117
+ va_end(args);
118
+
119
+ rb_funcall(doc, rb_intern("error"), 1, rb_str_new2(message));
120
+ free(message);
121
+ }
122
+
123
+ static void cdata_block(void * ctx, const xmlChar * value, int len)
124
+ {
125
+ VALUE self = (VALUE)ctx;
126
+ VALUE doc = rb_funcall(self, rb_intern("document"), 0);
127
+ VALUE string = rb_str_new((const char *)value, (long)len);
128
+ rb_funcall(doc, rb_intern("cdata_block"), 1, string);
129
+ }
130
+
131
+ static void deallocate(xmlSAXHandlerPtr handler)
132
+ {
133
+ free(handler);
134
+ }
135
+
136
+ static VALUE allocate(VALUE klass)
137
+ {
138
+ xmlSAXHandlerPtr handler = calloc(1, sizeof(xmlSAXHandler));
139
+
140
+ handler->startDocument = start_document;
141
+ handler->endDocument = end_document;
142
+ handler->startElement = start_element;
143
+ handler->endElement = end_element;
144
+ handler->characters = characters_func;
145
+ handler->comment = comment_func;
146
+ handler->warning = warning_func;
147
+ handler->error = error_func;
148
+ handler->cdataBlock = cdata_block;
149
+
150
+ return Data_Wrap_Struct(klass, NULL, deallocate, handler);
151
+ }
152
+
153
+ VALUE cNokogiriXmlSaxParser ;
154
+ void init_xml_sax_parser()
155
+ {
156
+ VALUE klass = cNokogiriXmlSaxParser =
157
+ rb_const_get(mNokogiriXmlSax, rb_intern("Parser"));
158
+ rb_define_alloc_func(klass, allocate);
159
+ rb_define_method(klass, "parse_memory", parse_memory, 1);
160
+ rb_define_private_method(klass, "native_parse_file", native_parse_file, 1);
161
+ }
@@ -0,0 +1,10 @@
1
+ #ifndef NOKOGIRI_XML_SAX_PARSER
2
+ #define NOKOGIRI_XML_SAX_PARSER
3
+
4
+ #include <native.h>
5
+
6
+ void init_xml_sax_parser();
7
+
8
+ extern VALUE cNokogiriXmlSaxParser ;
9
+ #endif
10
+
@@ -0,0 +1,25 @@
1
+ #include <xml_text.h>
2
+
3
+ static void dealloc(xmlNodePtr node)
4
+ {
5
+ if(node->doc == NULL) xmlFreeNode(node);
6
+ }
7
+
8
+ static VALUE new(VALUE klass, VALUE string)
9
+ {
10
+ xmlNodePtr node = xmlNewText((xmlChar *)StringValuePtr(string));
11
+ VALUE rb_node = Data_Wrap_Struct(klass, NULL, dealloc, node);
12
+ node->_private = (void *)rb_node;
13
+
14
+ if(rb_block_given_p()) rb_yield(rb_node);
15
+
16
+ return rb_node;
17
+ }
18
+
19
+ VALUE cNokogiriXmlText ;
20
+ void init_xml_text()
21
+ {
22
+ VALUE klass = cNokogiriXmlText = rb_const_get(mNokogiriXml, rb_intern("Text"));
23
+
24
+ rb_define_singleton_method(klass, "new", new, 1);
25
+ }
@@ -0,0 +1,9 @@
1
+ #ifndef NOKOGIRI_XML_TEXT
2
+ #define NOKOGIRI_XML_TEXT
3
+
4
+ #include <native.h>
5
+
6
+ void init_xml_text();
7
+
8
+ extern VALUE cNokogiriXmlText ;
9
+ #endif
@@ -0,0 +1,39 @@
1
+ #include <xml_xpath.h>
2
+
3
+ static void deallocate(xmlXPathObjectPtr xpath)
4
+ {
5
+ xmlXPathFreeNodeSetList(xpath); // despite the name, this frees the xpath but not the contained node set
6
+ }
7
+
8
+ VALUE Nokogiri_wrap_xml_xpath(xmlXPathObjectPtr xpath)
9
+ {
10
+ return Data_Wrap_Struct(cNokogiriXmlXpath, 0, deallocate, xpath);
11
+ }
12
+
13
+ /*
14
+ * call-seq:
15
+ * node_set
16
+ *
17
+ * Fetch the node set associated with this xpath context.
18
+ */
19
+ static VALUE node_set(VALUE self)
20
+ {
21
+ xmlXPathObjectPtr xpath;
22
+ Data_Get_Struct(self, xmlXPathObject, xpath);
23
+
24
+ if (xpath->nodesetval)
25
+ return Nokogiri_wrap_xml_node_set(xpath->nodesetval);
26
+
27
+ return Nokogiri_wrap_xml_node_set(xmlXPathNodeSetCreate(NULL));
28
+ }
29
+
30
+ VALUE cNokogiriXmlXpath;
31
+ void init_xml_xpath(void)
32
+ {
33
+ VALUE module = rb_define_module("Nokogiri");
34
+ VALUE xml = rb_define_module_under(module, "XML");
35
+ VALUE klass = rb_define_class_under(xml, "XPath", rb_cObject);
36
+
37
+ cNokogiriXmlXpath = klass;
38
+ rb_define_method(klass, "node_set", node_set, 0);
39
+ }
@@ -0,0 +1,11 @@
1
+ #ifndef NOKOGIRI_XML_XPATH
2
+ #define NOKOGIRI_XML_XPATH
3
+
4
+ #include <native.h>
5
+
6
+ void init_xml_xpath();
7
+ VALUE Nokogiri_wrap_xml_xpath(xmlXPathObjectPtr xpath);
8
+
9
+ extern VALUE cNokogiriXmlXpath;
10
+ #endif
11
+
@@ -0,0 +1,69 @@
1
+ #include <xml_xpath_context.h>
2
+
3
+ static void deallocate(xmlXPathContextPtr ctx)
4
+ {
5
+ xmlXPathFreeContext(ctx);
6
+ }
7
+
8
+ /*
9
+ * call-seq:
10
+ * register_ns(prefix, uri)
11
+ *
12
+ * Register the namespace with +prefix+ and +uri+.
13
+ */
14
+ static VALUE register_ns(VALUE self, VALUE prefix, VALUE uri)
15
+ {
16
+ xmlXPathContextPtr ctx;
17
+ Data_Get_Struct(self, xmlXPathContext, ctx);
18
+
19
+ xmlXPathRegisterNs( ctx,
20
+ (const xmlChar *)StringValuePtr(prefix),
21
+ (const xmlChar *)StringValuePtr(uri)
22
+ );
23
+ return self;
24
+ }
25
+
26
+ /*
27
+ * call-seq:
28
+ * evaluate(search_path)
29
+ *
30
+ * Evaluate the +search_path+ returning an XML::XPath object.
31
+ */
32
+ static VALUE evaluate(VALUE self, VALUE search_path)
33
+ {
34
+ xmlXPathContextPtr ctx;
35
+ Data_Get_Struct(self, xmlXPathContext, ctx);
36
+
37
+ xmlChar* query = (xmlChar *)StringValuePtr(search_path);
38
+ xmlXPathObjectPtr xpath = xmlXPathEvalExpression(query, ctx);
39
+ if(xpath == NULL) {
40
+ rb_raise(rb_eRuntimeError, "Couldn't evaluate expression '%s'", query);
41
+ }
42
+ return Nokogiri_wrap_xml_xpath(xpath);
43
+ }
44
+
45
+ static VALUE new(VALUE klass, VALUE nodeobj)
46
+ {
47
+ xmlXPathInit();
48
+
49
+ xmlNodePtr node ;
50
+ Data_Get_Struct(nodeobj, xmlNode, node);
51
+
52
+ xmlXPathContextPtr ctx = xmlXPathNewContext(node->doc);
53
+ ctx->node = node ;
54
+ return Data_Wrap_Struct(klass, 0, deallocate, ctx);
55
+ }
56
+
57
+ VALUE cNokogiriXmlXpathContext;
58
+ void init_xml_xpath_context(void)
59
+ {
60
+ VALUE module = rb_define_module("Nokogiri");
61
+ VALUE xml = rb_define_module_under(module, "XML");
62
+ VALUE klass = rb_define_class_under(xml, "XPathContext", rb_cObject);
63
+
64
+ cNokogiriXmlXpathContext = klass;
65
+
66
+ rb_define_singleton_method(klass, "new", new, 1);
67
+ rb_define_method(klass, "evaluate", evaluate, 1);
68
+ rb_define_method(klass, "register_ns", register_ns, 2);
69
+ }
@@ -0,0 +1,9 @@
1
+ #ifndef NOKOGIRI_XML_XPATH_CONTEXT
2
+ #define NOKOGIRI_XML_XPATH_CONTEXT
3
+
4
+ #include <native.h>
5
+
6
+ void init_xml_xpath_context();
7
+
8
+ extern VALUE cNokogiriXmlXpathContext;
9
+ #endif
@@ -0,0 +1,83 @@
1
+ #include <xslt_stylesheet.h>
2
+
3
+ #include "libxslt/xsltInternals.h"
4
+ #include "libxslt/xsltutils.h"
5
+ #include "libxslt/transform.h"
6
+
7
+ static void dealloc(xsltStylesheetPtr doc)
8
+ {
9
+ xsltFreeStylesheet(doc); // commented out for now.
10
+ }
11
+
12
+ static VALUE parse_stylesheet_doc(VALUE klass, VALUE xmldocobj)
13
+ {
14
+ xmlDocPtr xml ;
15
+ xsltStylesheetPtr ss ;
16
+ Data_Get_Struct(xmldocobj, xmlDoc, xml);
17
+ ss = xsltParseStylesheetDoc(xmlCopyDoc(xml, 1)); /* 1 => recursive */
18
+ return Data_Wrap_Struct(klass, NULL, dealloc, ss);
19
+ }
20
+
21
+
22
+ static VALUE serialize(VALUE self, VALUE xmlobj)
23
+ {
24
+ xmlDocPtr xml ;
25
+ xsltStylesheetPtr ss ;
26
+ xmlChar* doc_ptr ;
27
+ int doc_len ;
28
+ VALUE rval ;
29
+
30
+ Data_Get_Struct(xmlobj, xmlDoc, xml);
31
+ Data_Get_Struct(self, xsltStylesheet, ss);
32
+ xsltSaveResultToString(&doc_ptr, &doc_len, xml, ss);
33
+ rval = rb_str_new((char*)doc_ptr, doc_len);
34
+ free(doc_ptr);
35
+ return rval ;
36
+ }
37
+
38
+
39
+ /*
40
+ * call-seq:
41
+ * apply_to(Nokogiri::XML::Document, params)
42
+ *
43
+ * Apply an XSLT stylesheet to an XML document.
44
+ * +params+ is an array of strings.
45
+ */
46
+ static VALUE apply_to(int argc, VALUE* argv, VALUE self)
47
+ {
48
+ VALUE xmldoc, paramobj ;
49
+ xmlDocPtr xml ;
50
+ xmlDocPtr result ;
51
+ xsltStylesheetPtr ss ;
52
+ const char** params ;
53
+ int param_len, j ;
54
+ VALUE resultobj ;
55
+
56
+ rb_scan_args(argc, argv, "11", &xmldoc, &paramobj);
57
+ if (paramobj == Qnil) { paramobj = rb_ary_new2(0) ; }
58
+
59
+ Data_Get_Struct(xmldoc, xmlDoc, xml);
60
+ Data_Get_Struct(self, xsltStylesheet, ss);
61
+
62
+ param_len = RARRAY_LEN(paramobj);
63
+ params = calloc((size_t)param_len+1, sizeof(char*));
64
+ for (j = 0 ; j < param_len ; j++) {
65
+ params[j] = RSTRING(rb_ary_entry(paramobj, j))->ptr ;
66
+ }
67
+ params[param_len] = 0 ;
68
+
69
+ result = xsltApplyStylesheet(ss, xml, params);
70
+ free(params);
71
+ resultobj = Nokogiri_wrap_xml_document(0, result) ;
72
+ return rb_funcall(self, rb_intern("serialize"), 1, resultobj);
73
+ }
74
+
75
+ VALUE cNokogiriXsltStylesheet ;
76
+ void init_xslt_stylesheet()
77
+ {
78
+ VALUE klass = cNokogiriXsltStylesheet = rb_const_get(mNokogiriXslt, rb_intern("Stylesheet"));
79
+
80
+ rb_define_singleton_method(klass, "parse_stylesheet_doc", parse_stylesheet_doc, 1);
81
+ rb_define_method(klass, "serialize", serialize, 1);
82
+ rb_define_method(klass, "apply_to", apply_to, -1);
83
+ }
@@ -0,0 +1,9 @@
1
+ #ifndef NOKOGIRI_XSLT_STYLESHEET
2
+ #define NOKOGIRI_XSLT_STYLESHEET
3
+
4
+ #include <native.h>
5
+
6
+ void init_xslt_stylesheet();
7
+
8
+ extern VALUE cNokogiriXsltStylesheet ;
9
+ #endif