tenderlove-nokogiri 0.0.0.20081001111445

Sign up to get free protection for your applications and to get access to all the features.
Files changed (104) hide show
  1. data/History.txt +6 -0
  2. data/Manifest.txt +105 -0
  3. data/README.txt +51 -0
  4. data/Rakefile +70 -0
  5. data/ext/nokogiri/extconf.rb +24 -0
  6. data/ext/nokogiri/html_document.c +85 -0
  7. data/ext/nokogiri/html_document.h +10 -0
  8. data/ext/nokogiri/html_sax_parser.c +32 -0
  9. data/ext/nokogiri/html_sax_parser.h +11 -0
  10. data/ext/nokogiri/native.c +35 -0
  11. data/ext/nokogiri/native.h +32 -0
  12. data/ext/nokogiri/xml_cdata.c +36 -0
  13. data/ext/nokogiri/xml_cdata.h +9 -0
  14. data/ext/nokogiri/xml_document.c +159 -0
  15. data/ext/nokogiri/xml_document.h +10 -0
  16. data/ext/nokogiri/xml_node.c +573 -0
  17. data/ext/nokogiri/xml_node.h +13 -0
  18. data/ext/nokogiri/xml_node_set.c +90 -0
  19. data/ext/nokogiri/xml_node_set.h +9 -0
  20. data/ext/nokogiri/xml_reader.c +420 -0
  21. data/ext/nokogiri/xml_reader.h +10 -0
  22. data/ext/nokogiri/xml_sax_parser.c +161 -0
  23. data/ext/nokogiri/xml_sax_parser.h +10 -0
  24. data/ext/nokogiri/xml_text.c +25 -0
  25. data/ext/nokogiri/xml_text.h +9 -0
  26. data/ext/nokogiri/xml_xpath.c +39 -0
  27. data/ext/nokogiri/xml_xpath.h +11 -0
  28. data/ext/nokogiri/xml_xpath_context.c +69 -0
  29. data/ext/nokogiri/xml_xpath_context.h +9 -0
  30. data/ext/nokogiri/xslt_stylesheet.c +83 -0
  31. data/ext/nokogiri/xslt_stylesheet.h +9 -0
  32. data/lib/nokogiri.rb +45 -0
  33. data/lib/nokogiri/css.rb +6 -0
  34. data/lib/nokogiri/css/node.rb +95 -0
  35. data/lib/nokogiri/css/parser.rb +24 -0
  36. data/lib/nokogiri/css/parser.y +198 -0
  37. data/lib/nokogiri/css/tokenizer.rb +9 -0
  38. data/lib/nokogiri/css/tokenizer.rex +63 -0
  39. data/lib/nokogiri/css/xpath_visitor.rb +153 -0
  40. data/lib/nokogiri/decorators.rb +1 -0
  41. data/lib/nokogiri/decorators/hpricot.rb +3 -0
  42. data/lib/nokogiri/decorators/hpricot/node.rb +47 -0
  43. data/lib/nokogiri/decorators/hpricot/node_set.rb +14 -0
  44. data/lib/nokogiri/decorators/hpricot/xpath_visitor.rb +13 -0
  45. data/lib/nokogiri/hpricot.rb +46 -0
  46. data/lib/nokogiri/html.rb +64 -0
  47. data/lib/nokogiri/html/builder.rb +9 -0
  48. data/lib/nokogiri/html/document.rb +9 -0
  49. data/lib/nokogiri/html/sax/parser.rb +21 -0
  50. data/lib/nokogiri/version.rb +3 -0
  51. data/lib/nokogiri/xml.rb +29 -0
  52. data/lib/nokogiri/xml/after_handler.rb +18 -0
  53. data/lib/nokogiri/xml/before_handler.rb +32 -0
  54. data/lib/nokogiri/xml/builder.rb +79 -0
  55. data/lib/nokogiri/xml/document.rb +22 -0
  56. data/lib/nokogiri/xml/node.rb +162 -0
  57. data/lib/nokogiri/xml/node_set.rb +136 -0
  58. data/lib/nokogiri/xml/reader.rb +14 -0
  59. data/lib/nokogiri/xml/sax.rb +9 -0
  60. data/lib/nokogiri/xml/sax/document.rb +59 -0
  61. data/lib/nokogiri/xml/sax/parser.rb +33 -0
  62. data/lib/nokogiri/xml/text.rb +6 -0
  63. data/lib/nokogiri/xml/xpath.rb +6 -0
  64. data/lib/nokogiri/xslt.rb +11 -0
  65. data/lib/nokogiri/xslt/stylesheet.rb +6 -0
  66. data/nokogiri.gemspec +33 -0
  67. data/test/css/test_nthiness.rb +141 -0
  68. data/test/css/test_parser.rb +214 -0
  69. data/test/css/test_tokenizer.rb +162 -0
  70. data/test/files/staff.xml +57 -0
  71. data/test/files/staff.xslt +32 -0
  72. data/test/files/tlm.html +850 -0
  73. data/test/helper.rb +70 -0
  74. data/test/hpricot/files/basic.xhtml +17 -0
  75. data/test/hpricot/files/boingboing.html +2266 -0
  76. data/test/hpricot/files/cy0.html +3653 -0
  77. data/test/hpricot/files/immob.html +400 -0
  78. data/test/hpricot/files/pace_application.html +1320 -0
  79. data/test/hpricot/files/tenderlove.html +16 -0
  80. data/test/hpricot/files/uswebgen.html +220 -0
  81. data/test/hpricot/files/utf8.html +1054 -0
  82. data/test/hpricot/files/week9.html +1723 -0
  83. data/test/hpricot/files/why.xml +19 -0
  84. data/test/hpricot/load_files.rb +7 -0
  85. data/test/hpricot/test_alter.rb +67 -0
  86. data/test/hpricot/test_builder.rb +27 -0
  87. data/test/hpricot/test_parser.rb +412 -0
  88. data/test/hpricot/test_paths.rb +15 -0
  89. data/test/hpricot/test_preserved.rb +72 -0
  90. data/test/hpricot/test_xml.rb +26 -0
  91. data/test/html/sax/test_parser.rb +27 -0
  92. data/test/html/test_builder.rb +78 -0
  93. data/test/html/test_document.rb +22 -0
  94. data/test/test_convert_xpath.rb +173 -0
  95. data/test/test_nokogiri.rb +36 -0
  96. data/test/test_reader.rb +222 -0
  97. data/test/test_xslt_transforms.rb +29 -0
  98. data/test/xml/sax/test_parser.rb +93 -0
  99. data/test/xml/test_builder.rb +16 -0
  100. data/test/xml/test_document.rb +141 -0
  101. data/test/xml/test_node.rb +148 -0
  102. data/test/xml/test_node_set.rb +54 -0
  103. data/test/xml/test_text.rb +13 -0
  104. metadata +191 -0
@@ -0,0 +1,10 @@
1
+ #ifndef NOKOGIRI_XML_READER
2
+ #define NOKOGIRI_XML_READER
3
+
4
+ #include <native.h>
5
+
6
+ void init_xml_reader();
7
+
8
+ extern VALUE cNokogiriXmlReader;
9
+
10
+ #endif
@@ -0,0 +1,161 @@
1
+ #define _GNU_SOURCE
2
+ #include <stdio.h>
3
+ #include <xml_sax_parser.h>
4
+
5
+ /*
6
+ * call-seq:
7
+ * parse_memory(data)
8
+ *
9
+ * Parse the document stored in +data+
10
+ */
11
+ static VALUE parse_memory(VALUE self, VALUE data)
12
+ {
13
+ xmlSAXHandlerPtr handler;
14
+ Data_Get_Struct(self, xmlSAXHandler, handler);
15
+ xmlSAXUserParseMemory( handler,
16
+ (void *)self,
17
+ StringValuePtr(data),
18
+ NUM2INT(rb_funcall(data, rb_intern("length"), 0))
19
+ );
20
+ return data;
21
+ }
22
+
23
+ static VALUE native_parse_file(VALUE self, VALUE data)
24
+ {
25
+ xmlSAXHandlerPtr handler;
26
+ Data_Get_Struct(self, xmlSAXHandler, handler);
27
+ xmlSAXUserParseFile( handler,
28
+ (void *)self,
29
+ StringValuePtr(data)
30
+ );
31
+ return data;
32
+ }
33
+
34
+ static void start_document(void * ctx)
35
+ {
36
+ VALUE self = (VALUE)ctx;
37
+ VALUE doc = rb_funcall(self, rb_intern("document"), 0);
38
+ rb_funcall(doc, rb_intern("start_document"), 0);
39
+ }
40
+
41
+ static void end_document(void * ctx)
42
+ {
43
+ VALUE self = (VALUE)ctx;
44
+ VALUE doc = rb_funcall(self, rb_intern("document"), 0);
45
+ rb_funcall(doc, rb_intern("end_document"), 0);
46
+ }
47
+
48
+ static void start_element(void * ctx, const xmlChar *name, const xmlChar **atts)
49
+ {
50
+ VALUE self = (VALUE)ctx;
51
+ VALUE doc = rb_funcall(self, rb_intern("document"), 0);
52
+ VALUE attributes = rb_ary_new();
53
+ const xmlChar * attr;
54
+ int i = 0;
55
+ if(atts) {
56
+ while((attr = atts[i]) != NULL) {
57
+ rb_funcall(attributes, rb_intern("<<"), 1, rb_str_new2((const char *)attr));
58
+ i++;
59
+ }
60
+ }
61
+
62
+ rb_funcall( doc,
63
+ rb_intern("start_element"),
64
+ 2,
65
+ rb_str_new2((const char *)name),
66
+ attributes
67
+ );
68
+ }
69
+
70
+ static void end_element(void * ctx, const xmlChar *name)
71
+ {
72
+ VALUE self = (VALUE)ctx;
73
+ VALUE doc = rb_funcall(self, rb_intern("document"), 0);
74
+ rb_funcall(doc, rb_intern("end_element"), 1, rb_str_new2((const char *)name));
75
+ }
76
+
77
+ static void characters_func(void * ctx, const xmlChar * ch, int len)
78
+ {
79
+ VALUE self = (VALUE)ctx;
80
+ VALUE doc = rb_funcall(self, rb_intern("document"), 0);
81
+ VALUE str = rb_str_new((const char *)ch, (long)len);
82
+ rb_funcall(doc, rb_intern("characters"), 1, str);
83
+ }
84
+
85
+ static void comment_func(void * ctx, const xmlChar * value)
86
+ {
87
+ VALUE self = (VALUE)ctx;
88
+ VALUE doc = rb_funcall(self, rb_intern("document"), 0);
89
+ VALUE str = rb_str_new2((const char *)value);
90
+ rb_funcall(doc, rb_intern("comment"), 1, str);
91
+ }
92
+
93
+ static void warning_func(void * ctx, const char *msg, ...)
94
+ {
95
+ VALUE self = (VALUE)ctx;
96
+ VALUE doc = rb_funcall(self, rb_intern("document"), 0);
97
+ char * message;
98
+
99
+ va_list args;
100
+ va_start(args, msg);
101
+ vasprintf(&message, msg, args);
102
+ va_end(args);
103
+
104
+ rb_funcall(doc, rb_intern("warning"), 1, rb_str_new2(message));
105
+ free(message);
106
+ }
107
+
108
+ static void error_func(void * ctx, const char *msg, ...)
109
+ {
110
+ VALUE self = (VALUE)ctx;
111
+ VALUE doc = rb_funcall(self, rb_intern("document"), 0);
112
+ char * message;
113
+
114
+ va_list args;
115
+ va_start(args, msg);
116
+ vasprintf(&message, msg, args);
117
+ va_end(args);
118
+
119
+ rb_funcall(doc, rb_intern("error"), 1, rb_str_new2(message));
120
+ free(message);
121
+ }
122
+
123
+ static void cdata_block(void * ctx, const xmlChar * value, int len)
124
+ {
125
+ VALUE self = (VALUE)ctx;
126
+ VALUE doc = rb_funcall(self, rb_intern("document"), 0);
127
+ VALUE string = rb_str_new((const char *)value, (long)len);
128
+ rb_funcall(doc, rb_intern("cdata_block"), 1, string);
129
+ }
130
+
131
+ static void deallocate(xmlSAXHandlerPtr handler)
132
+ {
133
+ free(handler);
134
+ }
135
+
136
+ static VALUE allocate(VALUE klass)
137
+ {
138
+ xmlSAXHandlerPtr handler = calloc(1, sizeof(xmlSAXHandler));
139
+
140
+ handler->startDocument = start_document;
141
+ handler->endDocument = end_document;
142
+ handler->startElement = start_element;
143
+ handler->endElement = end_element;
144
+ handler->characters = characters_func;
145
+ handler->comment = comment_func;
146
+ handler->warning = warning_func;
147
+ handler->error = error_func;
148
+ handler->cdataBlock = cdata_block;
149
+
150
+ return Data_Wrap_Struct(klass, NULL, deallocate, handler);
151
+ }
152
+
153
+ VALUE cNokogiriXmlSaxParser ;
154
+ void init_xml_sax_parser()
155
+ {
156
+ VALUE klass = cNokogiriXmlSaxParser =
157
+ rb_const_get(mNokogiriXmlSax, rb_intern("Parser"));
158
+ rb_define_alloc_func(klass, allocate);
159
+ rb_define_method(klass, "parse_memory", parse_memory, 1);
160
+ rb_define_private_method(klass, "native_parse_file", native_parse_file, 1);
161
+ }
@@ -0,0 +1,10 @@
1
+ #ifndef NOKOGIRI_XML_SAX_PARSER
2
+ #define NOKOGIRI_XML_SAX_PARSER
3
+
4
+ #include <native.h>
5
+
6
+ void init_xml_sax_parser();
7
+
8
+ extern VALUE cNokogiriXmlSaxParser ;
9
+ #endif
10
+
@@ -0,0 +1,25 @@
1
+ #include <xml_text.h>
2
+
3
+ static void dealloc(xmlNodePtr node)
4
+ {
5
+ if(node->doc == NULL) xmlFreeNode(node);
6
+ }
7
+
8
+ static VALUE new(VALUE klass, VALUE string)
9
+ {
10
+ xmlNodePtr node = xmlNewText((xmlChar *)StringValuePtr(string));
11
+ VALUE rb_node = Data_Wrap_Struct(klass, NULL, dealloc, node);
12
+ node->_private = (void *)rb_node;
13
+
14
+ if(rb_block_given_p()) rb_yield(rb_node);
15
+
16
+ return rb_node;
17
+ }
18
+
19
+ VALUE cNokogiriXmlText ;
20
+ void init_xml_text()
21
+ {
22
+ VALUE klass = cNokogiriXmlText = rb_const_get(mNokogiriXml, rb_intern("Text"));
23
+
24
+ rb_define_singleton_method(klass, "new", new, 1);
25
+ }
@@ -0,0 +1,9 @@
1
+ #ifndef NOKOGIRI_XML_TEXT
2
+ #define NOKOGIRI_XML_TEXT
3
+
4
+ #include <native.h>
5
+
6
+ void init_xml_text();
7
+
8
+ extern VALUE cNokogiriXmlText ;
9
+ #endif
@@ -0,0 +1,39 @@
1
+ #include <xml_xpath.h>
2
+
3
+ static void deallocate(xmlXPathObjectPtr xpath)
4
+ {
5
+ xmlXPathFreeNodeSetList(xpath); // despite the name, this frees the xpath but not the contained node set
6
+ }
7
+
8
+ VALUE Nokogiri_wrap_xml_xpath(xmlXPathObjectPtr xpath)
9
+ {
10
+ return Data_Wrap_Struct(cNokogiriXmlXpath, 0, deallocate, xpath);
11
+ }
12
+
13
+ /*
14
+ * call-seq:
15
+ * node_set
16
+ *
17
+ * Fetch the node set associated with this xpath context.
18
+ */
19
+ static VALUE node_set(VALUE self)
20
+ {
21
+ xmlXPathObjectPtr xpath;
22
+ Data_Get_Struct(self, xmlXPathObject, xpath);
23
+
24
+ if (xpath->nodesetval)
25
+ return Nokogiri_wrap_xml_node_set(xpath->nodesetval);
26
+
27
+ return Nokogiri_wrap_xml_node_set(xmlXPathNodeSetCreate(NULL));
28
+ }
29
+
30
+ VALUE cNokogiriXmlXpath;
31
+ void init_xml_xpath(void)
32
+ {
33
+ VALUE module = rb_define_module("Nokogiri");
34
+ VALUE xml = rb_define_module_under(module, "XML");
35
+ VALUE klass = rb_define_class_under(xml, "XPath", rb_cObject);
36
+
37
+ cNokogiriXmlXpath = klass;
38
+ rb_define_method(klass, "node_set", node_set, 0);
39
+ }
@@ -0,0 +1,11 @@
1
+ #ifndef NOKOGIRI_XML_XPATH
2
+ #define NOKOGIRI_XML_XPATH
3
+
4
+ #include <native.h>
5
+
6
+ void init_xml_xpath();
7
+ VALUE Nokogiri_wrap_xml_xpath(xmlXPathObjectPtr xpath);
8
+
9
+ extern VALUE cNokogiriXmlXpath;
10
+ #endif
11
+
@@ -0,0 +1,69 @@
1
+ #include <xml_xpath_context.h>
2
+
3
+ static void deallocate(xmlXPathContextPtr ctx)
4
+ {
5
+ xmlXPathFreeContext(ctx);
6
+ }
7
+
8
+ /*
9
+ * call-seq:
10
+ * register_ns(prefix, uri)
11
+ *
12
+ * Register the namespace with +prefix+ and +uri+.
13
+ */
14
+ static VALUE register_ns(VALUE self, VALUE prefix, VALUE uri)
15
+ {
16
+ xmlXPathContextPtr ctx;
17
+ Data_Get_Struct(self, xmlXPathContext, ctx);
18
+
19
+ xmlXPathRegisterNs( ctx,
20
+ (const xmlChar *)StringValuePtr(prefix),
21
+ (const xmlChar *)StringValuePtr(uri)
22
+ );
23
+ return self;
24
+ }
25
+
26
+ /*
27
+ * call-seq:
28
+ * evaluate(search_path)
29
+ *
30
+ * Evaluate the +search_path+ returning an XML::XPath object.
31
+ */
32
+ static VALUE evaluate(VALUE self, VALUE search_path)
33
+ {
34
+ xmlXPathContextPtr ctx;
35
+ Data_Get_Struct(self, xmlXPathContext, ctx);
36
+
37
+ xmlChar* query = (xmlChar *)StringValuePtr(search_path);
38
+ xmlXPathObjectPtr xpath = xmlXPathEvalExpression(query, ctx);
39
+ if(xpath == NULL) {
40
+ rb_raise(rb_eRuntimeError, "Couldn't evaluate expression '%s'", query);
41
+ }
42
+ return Nokogiri_wrap_xml_xpath(xpath);
43
+ }
44
+
45
+ static VALUE new(VALUE klass, VALUE nodeobj)
46
+ {
47
+ xmlXPathInit();
48
+
49
+ xmlNodePtr node ;
50
+ Data_Get_Struct(nodeobj, xmlNode, node);
51
+
52
+ xmlXPathContextPtr ctx = xmlXPathNewContext(node->doc);
53
+ ctx->node = node ;
54
+ return Data_Wrap_Struct(klass, 0, deallocate, ctx);
55
+ }
56
+
57
+ VALUE cNokogiriXmlXpathContext;
58
+ void init_xml_xpath_context(void)
59
+ {
60
+ VALUE module = rb_define_module("Nokogiri");
61
+ VALUE xml = rb_define_module_under(module, "XML");
62
+ VALUE klass = rb_define_class_under(xml, "XPathContext", rb_cObject);
63
+
64
+ cNokogiriXmlXpathContext = klass;
65
+
66
+ rb_define_singleton_method(klass, "new", new, 1);
67
+ rb_define_method(klass, "evaluate", evaluate, 1);
68
+ rb_define_method(klass, "register_ns", register_ns, 2);
69
+ }
@@ -0,0 +1,9 @@
1
+ #ifndef NOKOGIRI_XML_XPATH_CONTEXT
2
+ #define NOKOGIRI_XML_XPATH_CONTEXT
3
+
4
+ #include <native.h>
5
+
6
+ void init_xml_xpath_context();
7
+
8
+ extern VALUE cNokogiriXmlXpathContext;
9
+ #endif
@@ -0,0 +1,83 @@
1
+ #include <xslt_stylesheet.h>
2
+
3
+ #include "libxslt/xsltInternals.h"
4
+ #include "libxslt/xsltutils.h"
5
+ #include "libxslt/transform.h"
6
+
7
+ static void dealloc(xsltStylesheetPtr doc)
8
+ {
9
+ xsltFreeStylesheet(doc); // commented out for now.
10
+ }
11
+
12
+ static VALUE parse_stylesheet_doc(VALUE klass, VALUE xmldocobj)
13
+ {
14
+ xmlDocPtr xml ;
15
+ xsltStylesheetPtr ss ;
16
+ Data_Get_Struct(xmldocobj, xmlDoc, xml);
17
+ ss = xsltParseStylesheetDoc(xmlCopyDoc(xml, 1)); /* 1 => recursive */
18
+ return Data_Wrap_Struct(klass, NULL, dealloc, ss);
19
+ }
20
+
21
+
22
+ static VALUE serialize(VALUE self, VALUE xmlobj)
23
+ {
24
+ xmlDocPtr xml ;
25
+ xsltStylesheetPtr ss ;
26
+ xmlChar* doc_ptr ;
27
+ int doc_len ;
28
+ VALUE rval ;
29
+
30
+ Data_Get_Struct(xmlobj, xmlDoc, xml);
31
+ Data_Get_Struct(self, xsltStylesheet, ss);
32
+ xsltSaveResultToString(&doc_ptr, &doc_len, xml, ss);
33
+ rval = rb_str_new((char*)doc_ptr, doc_len);
34
+ free(doc_ptr);
35
+ return rval ;
36
+ }
37
+
38
+
39
+ /*
40
+ * call-seq:
41
+ * apply_to(Nokogiri::XML::Document, params)
42
+ *
43
+ * Apply an XSLT stylesheet to an XML document.
44
+ * +params+ is an array of strings.
45
+ */
46
+ static VALUE apply_to(int argc, VALUE* argv, VALUE self)
47
+ {
48
+ VALUE xmldoc, paramobj ;
49
+ xmlDocPtr xml ;
50
+ xmlDocPtr result ;
51
+ xsltStylesheetPtr ss ;
52
+ const char** params ;
53
+ int param_len, j ;
54
+ VALUE resultobj ;
55
+
56
+ rb_scan_args(argc, argv, "11", &xmldoc, &paramobj);
57
+ if (paramobj == Qnil) { paramobj = rb_ary_new2(0) ; }
58
+
59
+ Data_Get_Struct(xmldoc, xmlDoc, xml);
60
+ Data_Get_Struct(self, xsltStylesheet, ss);
61
+
62
+ param_len = RARRAY_LEN(paramobj);
63
+ params = calloc((size_t)param_len+1, sizeof(char*));
64
+ for (j = 0 ; j < param_len ; j++) {
65
+ params[j] = RSTRING(rb_ary_entry(paramobj, j))->ptr ;
66
+ }
67
+ params[param_len] = 0 ;
68
+
69
+ result = xsltApplyStylesheet(ss, xml, params);
70
+ free(params);
71
+ resultobj = Nokogiri_wrap_xml_document(0, result) ;
72
+ return rb_funcall(self, rb_intern("serialize"), 1, resultobj);
73
+ }
74
+
75
+ VALUE cNokogiriXsltStylesheet ;
76
+ void init_xslt_stylesheet()
77
+ {
78
+ VALUE klass = cNokogiriXsltStylesheet = rb_const_get(mNokogiriXslt, rb_intern("Stylesheet"));
79
+
80
+ rb_define_singleton_method(klass, "parse_stylesheet_doc", parse_stylesheet_doc, 1);
81
+ rb_define_method(klass, "serialize", serialize, 1);
82
+ rb_define_method(klass, "apply_to", apply_to, -1);
83
+ }
@@ -0,0 +1,9 @@
1
+ #ifndef NOKOGIRI_XSLT_STYLESHEET
2
+ #define NOKOGIRI_XSLT_STYLESHEET
3
+
4
+ #include <native.h>
5
+
6
+ void init_xslt_stylesheet();
7
+
8
+ extern VALUE cNokogiriXsltStylesheet ;
9
+ #endif