nokogiri 1.1.1 → 1.2.0

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (88) hide show
  1. data/History.ja.txt +34 -0
  2. data/History.txt +36 -0
  3. data/Manifest.txt +21 -0
  4. data/README.ja.txt +1 -1
  5. data/README.txt +1 -1
  6. data/Rakefile +27 -89
  7. data/ext/nokogiri/extconf.rb +48 -63
  8. data/ext/nokogiri/html_document.c +90 -29
  9. data/ext/nokogiri/html_sax_parser.c +23 -2
  10. data/ext/nokogiri/native.c +18 -8
  11. data/ext/nokogiri/native.h +22 -0
  12. data/ext/nokogiri/xml_attr.c +83 -0
  13. data/ext/nokogiri/xml_attr.h +9 -0
  14. data/ext/nokogiri/xml_cdata.c +1 -1
  15. data/ext/nokogiri/xml_document.c +84 -18
  16. data/ext/nokogiri/xml_document_fragment.c +38 -0
  17. data/ext/nokogiri/xml_document_fragment.h +10 -0
  18. data/ext/nokogiri/xml_dtd.c +2 -22
  19. data/ext/nokogiri/xml_entity_reference.c +41 -0
  20. data/ext/nokogiri/xml_entity_reference.h +9 -0
  21. data/ext/nokogiri/xml_io.c +10 -3
  22. data/ext/nokogiri/xml_io.h +1 -0
  23. data/ext/nokogiri/xml_node.c +116 -66
  24. data/ext/nokogiri/xml_node_set.c +5 -1
  25. data/ext/nokogiri/xml_processing_instruction.c +44 -0
  26. data/ext/nokogiri/xml_processing_instruction.h +9 -0
  27. data/ext/nokogiri/xml_reader.c +20 -4
  28. data/ext/nokogiri/xml_sax_parser.c +51 -15
  29. data/ext/nokogiri/xml_sax_push_parser.c +85 -0
  30. data/ext/nokogiri/xml_sax_push_parser.h +9 -0
  31. data/ext/nokogiri/xml_syntax_error.c +12 -8
  32. data/ext/nokogiri/xml_syntax_error.h +2 -1
  33. data/ext/nokogiri/xml_xpath_context.c +11 -2
  34. data/ext/nokogiri/xslt_stylesheet.c +1 -6
  35. data/lib/nokogiri.rb +10 -13
  36. data/lib/nokogiri/css.rb +1 -1
  37. data/lib/nokogiri/css/generated_parser.rb +287 -295
  38. data/lib/nokogiri/css/generated_tokenizer.rb +36 -51
  39. data/lib/nokogiri/css/node.rb +1 -3
  40. data/lib/nokogiri/css/parser.rb +21 -12
  41. data/lib/nokogiri/css/parser.y +55 -44
  42. data/lib/nokogiri/css/syntax_error.rb +2 -1
  43. data/lib/nokogiri/css/tokenizer.rex +23 -32
  44. data/lib/nokogiri/decorators/hpricot/node_set.rb +1 -1
  45. data/lib/nokogiri/html.rb +10 -4
  46. data/lib/nokogiri/html/document.rb +6 -2
  47. data/lib/nokogiri/syntax_error.rb +4 -0
  48. data/lib/nokogiri/version.rb +2 -1
  49. data/lib/nokogiri/xml.rb +3 -1
  50. data/lib/nokogiri/xml/attr.rb +3 -4
  51. data/lib/nokogiri/xml/cdata.rb +1 -1
  52. data/lib/nokogiri/xml/document.rb +4 -7
  53. data/lib/nokogiri/xml/document_fragment.rb +9 -0
  54. data/lib/nokogiri/xml/dtd.rb +3 -0
  55. data/lib/nokogiri/xml/node.rb +144 -40
  56. data/lib/nokogiri/xml/node/save_options.rb +32 -0
  57. data/lib/nokogiri/xml/node_set.rb +11 -20
  58. data/lib/nokogiri/xml/processing_instruction.rb +6 -0
  59. data/lib/nokogiri/xml/reader.rb +5 -0
  60. data/lib/nokogiri/xml/sax.rb +1 -0
  61. data/lib/nokogiri/xml/sax/push_parser.rb +47 -0
  62. data/lib/nokogiri/xml/syntax_error.rb +3 -1
  63. data/lib/nokogiri/xml/xpath/syntax_error.rb +1 -1
  64. data/tasks/test.rb +136 -0
  65. data/test/css/test_parser.rb +4 -0
  66. data/test/css/test_tokenizer.rb +30 -17
  67. data/test/css/test_xpath_visitor.rb +11 -0
  68. data/test/helper.rb +11 -0
  69. data/test/hpricot/test_builder.rb +2 -9
  70. data/test/hpricot/test_parser.rb +4 -4
  71. data/test/html/test_builder.rb +7 -7
  72. data/test/html/test_document.rb +90 -4
  73. data/test/html/test_node.rb +1 -0
  74. data/test/test_css_cache.rb +1 -3
  75. data/test/test_reader.rb +19 -1
  76. data/test/test_xslt_transforms.rb +1 -1
  77. data/test/xml/node/test_save_options.rb +20 -0
  78. data/test/xml/sax/test_parser.rb +17 -0
  79. data/test/xml/sax/test_push_parser.rb +67 -0
  80. data/test/xml/test_attr.rb +16 -0
  81. data/test/xml/test_cdata.rb +1 -1
  82. data/test/xml/test_document.rb +45 -0
  83. data/test/xml/test_document_fragment.rb +18 -0
  84. data/test/xml/test_dtd.rb +2 -4
  85. data/test/xml/test_entity_reference.rb +16 -0
  86. data/test/xml/test_node.rb +149 -80
  87. data/test/xml/test_processing_instruction.rb +24 -0
  88. metadata +28 -2
@@ -2,21 +2,71 @@
2
2
 
3
3
  /*
4
4
  * call-seq:
5
- * serialize
5
+ * new
6
6
  *
7
- * Serialize this document
7
+ * Create a new document
8
8
  */
9
- static VALUE serialize(VALUE self)
9
+ static VALUE new(int argc, VALUE *argv, VALUE klass)
10
10
  {
11
- xmlDocPtr doc;
12
- xmlChar *buf;
13
- int size;
14
- Data_Get_Struct(self, xmlDoc, doc);
11
+ VALUE uri, external_id;
12
+
13
+ rb_scan_args(argc, argv, "02", &uri, &external_id);
14
+
15
+ htmlDocPtr doc = htmlNewDoc(
16
+ RTEST(uri) ? (const xmlChar *)StringValuePtr(uri) : NULL,
17
+ RTEST(external_id) ? (const xmlChar *)StringValuePtr(external_id) : NULL
18
+ );
19
+ return Nokogiri_wrap_xml_document(klass, doc);
20
+ }
21
+
22
+ /*
23
+ * call-seq:
24
+ * read_io(io, url, encoding, options)
25
+ *
26
+ * Read the HTML document from +io+ with given +url+, +encoding+,
27
+ * and +options+. See Nokogiri::HTML.parse
28
+ */
29
+ static VALUE read_io( VALUE klass,
30
+ VALUE io,
31
+ VALUE url,
32
+ VALUE encoding,
33
+ VALUE options )
34
+ {
35
+ const char * c_url = (url == Qnil) ? NULL : StringValuePtr(url);
36
+ const char * c_enc = (encoding == Qnil) ? NULL : StringValuePtr(encoding);
37
+ VALUE error_list = rb_ary_new();
15
38
 
16
- htmlDocDumpMemory(doc, &buf, &size);
17
- VALUE rb_str = rb_str_new((char *)buf, (long)size);
18
- xmlFree(buf);
19
- return rb_str;
39
+ xmlInitParser();
40
+ xmlResetLastError();
41
+ xmlSetStructuredErrorFunc((void *)error_list, Nokogiri_error_array_pusher);
42
+
43
+ htmlDocPtr doc = htmlReadIO(
44
+ io_read_callback,
45
+ io_close_callback,
46
+ (void *)io,
47
+ c_url,
48
+ c_enc,
49
+ NUM2INT(options)
50
+ );
51
+ xmlSetStructuredErrorFunc(NULL, NULL);
52
+
53
+ if(doc == NULL) {
54
+ xmlFreeDoc(doc);
55
+
56
+ xmlErrorPtr error = xmlGetLastError();
57
+ if(error)
58
+ rb_funcall(rb_mKernel, rb_intern("raise"), 1,
59
+ Nokogiri_wrap_xml_syntax_error((VALUE)NULL, error)
60
+ );
61
+ else
62
+ rb_raise(rb_eRuntimeError, "Could not parse document");
63
+
64
+ return Qnil;
65
+ }
66
+
67
+ VALUE document = Nokogiri_wrap_xml_document(klass, doc);
68
+ rb_funcall(document, rb_intern("errors="), 1, error_list);
69
+ return document;
20
70
  }
21
71
 
22
72
  /*
@@ -35,17 +85,33 @@ static VALUE read_memory( VALUE klass,
35
85
  const char * c_buffer = StringValuePtr(string);
36
86
  const char * c_url = (url == Qnil) ? NULL : StringValuePtr(url);
37
87
  const char * c_enc = (encoding == Qnil) ? NULL : StringValuePtr(encoding);
38
- int len = NUM2INT(rb_funcall(string, rb_intern("length"), 0));
88
+ int len = RSTRING_LEN(string);
89
+ VALUE error_list = rb_ary_new();
90
+
91
+ xmlInitParser();
92
+ xmlResetLastError();
93
+ xmlSetStructuredErrorFunc((void *)error_list, Nokogiri_error_array_pusher);
39
94
 
40
95
  htmlDocPtr doc = htmlReadMemory(c_buffer, len, c_url, c_enc, NUM2INT(options));
96
+ xmlSetStructuredErrorFunc(NULL, NULL);
41
97
 
42
98
  if(doc == NULL) {
43
99
  xmlFreeDoc(doc);
44
- rb_raise(rb_eRuntimeError, "Couldn't create a document");
100
+
101
+ xmlErrorPtr error = xmlGetLastError();
102
+ if(error)
103
+ rb_funcall(rb_mKernel, rb_intern("raise"), 1,
104
+ Nokogiri_wrap_xml_syntax_error((VALUE)NULL, error)
105
+ );
106
+ else
107
+ rb_raise(rb_eRuntimeError, "Could not parse document");
108
+
45
109
  return Qnil;
46
110
  }
47
111
 
48
- return Nokogiri_wrap_xml_document(klass, doc);
112
+ VALUE document = Nokogiri_wrap_xml_document(klass, doc);
113
+ rb_funcall(document, rb_intern("errors="), 1, error_list);
114
+ return document;
49
115
  }
50
116
 
51
117
  /*
@@ -64,23 +130,18 @@ static VALUE type(VALUE self)
64
130
  VALUE cNokogiriHtmlDocument ;
65
131
  void init_html_document()
66
132
  {
67
- /*
68
- * HACK. This is so that rdoc will work with this C file.
69
- */
70
- /*
71
- VALUE nokogiri = rb_define_module("Nokogiri");
72
- VALUE html = rb_define_module_under(nokogiri, "HTML");
73
- VALUE xml = rb_define_module_under(nokogiri, "XML");
74
- VALUE node = rb_define_class_under(xml, "Node", rb_cObject);
75
- VALUE xml_doc = rb_define_class_under(xml, "Document", node);
76
- VALUE klass = rb_define_class_under(html, "Document", xml_doc);
77
- */
78
-
79
- VALUE klass ;
80
- klass = cNokogiriHtmlDocument = rb_const_get(mNokogiriHtml, rb_intern("Document"));
133
+ VALUE nokogiri = rb_define_module("Nokogiri");
134
+ VALUE html = rb_define_module_under(nokogiri, "HTML");
135
+ VALUE xml = rb_define_module_under(nokogiri, "XML");
136
+ VALUE node = rb_define_class_under(xml, "Node", rb_cObject);
137
+ VALUE xml_doc = rb_define_class_under(xml, "Document", node);
138
+ VALUE klass = rb_define_class_under(html, "Document", xml_doc);
139
+
140
+ cNokogiriHtmlDocument = klass;
81
141
 
82
142
  rb_define_singleton_method(klass, "read_memory", read_memory, 4);
143
+ rb_define_singleton_method(klass, "read_io", read_io, 4);
144
+ rb_define_singleton_method(klass, "new", new, -1);
83
145
 
84
146
  rb_define_method(klass, "type", type, 0);
85
- rb_define_method(klass, "serialize", serialize, 0);
86
147
  }
@@ -1,5 +1,11 @@
1
1
  #include <html_sax_parser.h>
2
2
 
3
+ /*
4
+ * call-seq:
5
+ * native_parse_file(data, encoding)
6
+ *
7
+ * Parse +data+ with +encoding+
8
+ */
3
9
  static VALUE native_parse_file(VALUE self, VALUE data, VALUE encoding)
4
10
  {
5
11
  xmlSAXHandlerPtr handler;
@@ -13,6 +19,12 @@ static VALUE native_parse_file(VALUE self, VALUE data, VALUE encoding)
13
19
  return data;
14
20
  }
15
21
 
22
+ /*
23
+ * call-seq:
24
+ * native_parse_memory(data, encoding)
25
+ *
26
+ * Parse +data+ with +encoding+
27
+ */
16
28
  static VALUE native_parse_memory(VALUE self, VALUE data, VALUE encoding)
17
29
  {
18
30
  xmlSAXHandlerPtr handler;
@@ -29,8 +41,17 @@ static VALUE native_parse_memory(VALUE self, VALUE data, VALUE encoding)
29
41
  VALUE cNokogiriHtmlSaxParser ;
30
42
  void init_html_sax_parser()
31
43
  {
32
- VALUE klass = cNokogiriHtmlSaxParser =
33
- rb_const_get(mNokogiriHtmlSax, rb_intern("Parser"));
44
+ VALUE nokogiri = rb_define_module("Nokogiri");
45
+ VALUE html = rb_define_module_under(nokogiri, "HTML");
46
+ VALUE sax = rb_define_module_under(html, "SAX");
47
+ /*
48
+ * Nokogiri::HTML::SAX::Parser is used for parsing HTML with SAX
49
+ * callbacks.
50
+ */
51
+ VALUE klass = rb_define_class_under(sax, "Parser", cNokogiriXmlSaxParser);
52
+
53
+ cNokogiriHtmlSaxParser = klass;
54
+
34
55
  rb_define_private_method(klass, "native_parse_memory", native_parse_memory, 2);
35
56
  rb_define_private_method(klass, "native_parse_file", native_parse_file, 2);
36
57
  }
@@ -9,30 +9,40 @@ VALUE mNokogiriHtmlSax ;
9
9
 
10
10
  void Init_native()
11
11
  {
12
- mNokogiri = rb_const_get(rb_cObject, rb_intern("Nokogiri"));
13
- mNokogiriXml = rb_const_get(mNokogiri, rb_intern("XML"));
14
- mNokogiriHtml = rb_const_get(mNokogiri, rb_intern("HTML"));
15
- mNokogiriXslt = rb_const_get(mNokogiri, rb_intern("XSLT"));
16
- mNokogiriXmlSax = rb_const_get(mNokogiriXml, rb_intern("SAX"));
17
- mNokogiriHtmlSax = rb_const_get(mNokogiriHtml, rb_intern("SAX"));
12
+ xmlMemSetup(
13
+ (xmlFreeFunc)ruby_xfree,
14
+ (xmlMallocFunc)ruby_xmalloc,
15
+ (xmlReallocFunc)ruby_xrealloc,
16
+ strdup
17
+ );
18
+
19
+ mNokogiri = rb_define_module("Nokogiri");
20
+ mNokogiriXml = rb_define_module_under(mNokogiri, "XML");
21
+ mNokogiriHtml = rb_define_module_under(mNokogiri, "HTML");
22
+ mNokogiriXslt = rb_define_module_under(mNokogiri, "XSLT");
23
+ mNokogiriXmlSax = rb_define_module_under(mNokogiriXml, "SAX");
24
+ mNokogiriHtmlSax = rb_define_module_under(mNokogiriHtml, "SAX");
18
25
 
19
26
  rb_const_set( mNokogiri,
20
27
  rb_intern("LIBXML_VERSION"),
21
28
  rb_str_new2(LIBXML_DOTTED_VERSION)
22
29
  );
23
30
 
24
- xmlSetStructuredErrorFunc(NULL, Nokogiri_error_handler);
25
-
26
31
  init_xml_document();
27
32
  init_html_document();
28
33
  init_xml_node();
34
+ init_xml_document_fragment();
29
35
  init_xml_text();
30
36
  init_xml_cdata();
37
+ init_xml_processing_instruction();
38
+ init_xml_attr();
39
+ init_xml_entity_reference();
31
40
  init_xml_comment();
32
41
  init_xml_node_set();
33
42
  init_xml_xpath_context();
34
43
  init_xml_xpath();
35
44
  init_xml_sax_parser();
45
+ init_xml_sax_push_parser();
36
46
  init_xml_reader();
37
47
  init_xml_dtd();
38
48
  init_html_sax_parser();
@@ -8,6 +8,7 @@
8
8
  #include <libxml/xpath.h>
9
9
  #include <libxml/xpathInternals.h>
10
10
  #include <libxml/xmlreader.h>
11
+ #include <libxml/xmlsave.h>
11
12
  #include <libxml/HTMLparser.h>
12
13
  #include <libxml/HTMLtree.h>
13
14
 
@@ -17,12 +18,17 @@
17
18
  #include <xml_node.h>
18
19
  #include <xml_text.h>
19
20
  #include <xml_cdata.h>
21
+ #include <xml_attr.h>
22
+ #include <xml_processing_instruction.h>
23
+ #include <xml_entity_reference.h>
24
+ #include <xml_document_fragment.h>
20
25
  #include <xml_comment.h>
21
26
  #include <xml_node_set.h>
22
27
  #include <xml_xpath.h>
23
28
  #include <xml_dtd.h>
24
29
  #include <xml_xpath_context.h>
25
30
  #include <xml_sax_parser.h>
31
+ #include <xml_sax_push_parser.h>
26
32
  #include <xml_reader.h>
27
33
  #include <html_sax_parser.h>
28
34
  #include <xslt_stylesheet.h>
@@ -45,6 +51,22 @@ extern VALUE mNokogiriXslt ;
45
51
  #define NOKOGIRI_DEBUG_START(p)
46
52
  #define NOKOGIRI_DEBUG_END(p)
47
53
 
54
+ #ifndef RSTRING_PTR
55
+ #define RSTRING_PTR(s) (RSTRING(s)->ptr)
56
+ #endif
57
+
58
+ #ifndef RSTRING_LEN
59
+ #define RSTRING_LEN(s) (RSTRING(s)->len)
60
+ #endif
61
+
62
+ #ifndef RARRAY_PTR
63
+ #define RARRAY_PTR(a) RARRAY(a)->ptr
64
+ #endif
65
+
66
+ #ifndef RARRAY_LEN
67
+ #define RARRAY_LEN(a) RARRAY(a)->len
68
+ #endif
69
+
48
70
  #endif
49
71
 
50
72
  #endif
@@ -0,0 +1,83 @@
1
+ #include <xml_attr.h>
2
+
3
+ /*
4
+ * call-seq:
5
+ * value=(content)
6
+ *
7
+ * Set the value for this Attr to +content+
8
+ */
9
+ static VALUE set_value(VALUE self, VALUE content)
10
+ {
11
+ xmlAttrPtr attr;
12
+ Data_Get_Struct(self, xmlAttr, attr);
13
+
14
+ if(attr->children) xmlFreeNodeList(attr->children);
15
+
16
+ attr->children = attr->last = NULL;
17
+
18
+ if(content) {
19
+ xmlChar *buffer;
20
+ xmlNode *tmp;
21
+
22
+ // Encode our content
23
+ buffer = xmlEncodeEntitiesReentrant(attr->doc, (unsigned char *)StringValuePtr(content));
24
+
25
+ attr->children = xmlStringGetNodeList(attr->doc, buffer);
26
+ attr->last = NULL;
27
+ tmp = attr->children;
28
+
29
+ // Loop through the children
30
+ for(tmp = attr->children; tmp; tmp = tmp->next) {
31
+ tmp->parent = (xmlNode *)attr;
32
+ tmp->doc = attr->doc;
33
+ if(tmp->next == NULL) attr->last = tmp;
34
+ }
35
+
36
+ // Free up memory
37
+ xmlFree(buffer);
38
+ }
39
+
40
+ return content;
41
+ }
42
+
43
+ /*
44
+ * call-seq:
45
+ * new(document, content)
46
+ *
47
+ * Create a new Attr element on the +document+ with +name+
48
+ */
49
+ static VALUE new(VALUE klass, VALUE doc, VALUE name)
50
+ {
51
+ xmlDocPtr xml_doc;
52
+ Data_Get_Struct(doc, xmlDoc, xml_doc);
53
+
54
+ xmlAttrPtr node = xmlNewDocProp(
55
+ xml_doc,
56
+ (const xmlChar *)StringValuePtr(name),
57
+ NULL
58
+ );
59
+
60
+ VALUE rb_node = Nokogiri_wrap_xml_node((xmlNodePtr)node);
61
+
62
+ if(rb_block_given_p()) rb_yield(rb_node);
63
+
64
+ return rb_node;
65
+ }
66
+
67
+ VALUE cNokogiriXmlAttr;
68
+ void init_xml_attr()
69
+ {
70
+ VALUE nokogiri = rb_define_module("Nokogiri");
71
+ VALUE xml = rb_define_module_under(nokogiri, "XML");
72
+ VALUE node = rb_define_class_under(xml, "Node", rb_cObject);
73
+
74
+ /*
75
+ * Attr represents a Attr node in an xml document.
76
+ */
77
+ VALUE klass = rb_define_class_under(xml, "Attr", node);
78
+
79
+ cNokogiriXmlAttr = klass;
80
+
81
+ rb_define_singleton_method(klass, "new", new, 2);
82
+ rb_define_method(klass, "value=", set_value, 1);
83
+ }
@@ -0,0 +1,9 @@
1
+ #ifndef NOKOGIRI_XML_ATTR
2
+ #define NOKOGIRI_XML_ATTR
3
+
4
+ #include <native.h>
5
+
6
+ void init_xml_attr();
7
+
8
+ extern VALUE cNokogiriXmlAttr;
9
+ #endif
@@ -14,7 +14,7 @@ static VALUE new(VALUE klass, VALUE doc, VALUE content)
14
14
  xmlNodePtr node = xmlNewCDataBlock(
15
15
  xml_doc,
16
16
  (const xmlChar *)StringValuePtr(content),
17
- NUM2INT(rb_funcall(content, rb_intern("length"), 0))
17
+ RSTRING_LEN(content)
18
18
  );
19
19
 
20
20
  VALUE rb_node = Nokogiri_wrap_xml_node(node);
@@ -10,21 +10,19 @@ static void dealloc(xmlDocPtr doc)
10
10
 
11
11
  /*
12
12
  * call-seq:
13
- * serialize
13
+ * url
14
14
  *
15
- * Serialize this document
15
+ * Get the url name for this document.
16
16
  */
17
- static VALUE serialize(VALUE self)
17
+ static VALUE url(VALUE self)
18
18
  {
19
19
  xmlDocPtr doc;
20
- xmlChar *buf;
21
- int size;
22
20
  Data_Get_Struct(self, xmlDoc, doc);
23
21
 
24
- xmlDocDumpMemory(doc, &buf, &size);
25
- VALUE rb_str = rb_str_new((char *)buf, (long)size);
26
- xmlFree(buf);
27
- return rb_str;
22
+ if(doc->URL)
23
+ return rb_str_new2((const char *)doc->URL);
24
+
25
+ return Qnil;
28
26
  }
29
27
 
30
28
  /*
@@ -62,6 +60,21 @@ static VALUE root(VALUE self)
62
60
  return Nokogiri_wrap_xml_node(root) ;
63
61
  }
64
62
 
63
+ /*
64
+ * call-seq:
65
+ * encoding
66
+ *
67
+ * Get the encoding for this Document
68
+ */
69
+ static VALUE encoding(VALUE self)
70
+ {
71
+ xmlDocPtr doc;
72
+ Data_Get_Struct(self, xmlDoc, doc);
73
+
74
+ if(!doc->encoding) return Qnil;
75
+ return rb_str_new2((const char *)doc->encoding);
76
+ }
77
+
65
78
  /*
66
79
  * call-seq:
67
80
  * read_io(io, url, encoding, options)
@@ -76,8 +89,11 @@ static VALUE read_io( VALUE klass,
76
89
  {
77
90
  const char * c_url = (url == Qnil) ? NULL : StringValuePtr(url);
78
91
  const char * c_enc = (encoding == Qnil) ? NULL : StringValuePtr(encoding);
92
+ VALUE error_list = rb_ary_new();
79
93
 
80
94
  xmlInitParser();
95
+ xmlResetLastError();
96
+ xmlSetStructuredErrorFunc((void *)error_list, Nokogiri_error_array_pusher);
81
97
 
82
98
  xmlDocPtr doc = xmlReadIO(
83
99
  (xmlInputReadCallback)io_read_callback,
@@ -87,14 +103,25 @@ static VALUE read_io( VALUE klass,
87
103
  c_enc,
88
104
  NUM2INT(options)
89
105
  );
106
+ xmlSetStructuredErrorFunc(NULL, NULL);
90
107
 
91
108
  if(doc == NULL) {
92
109
  xmlFreeDoc(doc);
93
- rb_raise(rb_eRuntimeError, "Couldn't create a document");
110
+
111
+ xmlErrorPtr error = xmlGetLastError();
112
+ if(error)
113
+ rb_funcall(rb_mKernel, rb_intern("raise"), 1,
114
+ Nokogiri_wrap_xml_syntax_error((VALUE)NULL, error)
115
+ );
116
+ else
117
+ rb_raise(rb_eRuntimeError, "Could not parse document");
118
+
94
119
  return Qnil;
95
120
  }
96
121
 
97
- return Nokogiri_wrap_xml_document(klass, doc);
122
+ VALUE document = Nokogiri_wrap_xml_document(klass, doc);
123
+ rb_funcall(document, rb_intern("errors="), 1, error_list);
124
+ return document;
98
125
  }
99
126
 
100
127
  /*
@@ -112,18 +139,55 @@ static VALUE read_memory( VALUE klass,
112
139
  const char * c_buffer = StringValuePtr(string);
113
140
  const char * c_url = (url == Qnil) ? NULL : StringValuePtr(url);
114
141
  const char * c_enc = (encoding == Qnil) ? NULL : StringValuePtr(encoding);
115
- int len = NUM2INT(rb_funcall(string, rb_intern("length"), 0));
142
+ int len = RSTRING_LEN(string);
143
+ VALUE error_list = rb_ary_new();
116
144
 
117
145
  xmlInitParser();
146
+ xmlResetLastError();
147
+ xmlSetStructuredErrorFunc((void *)error_list, Nokogiri_error_array_pusher);
118
148
  xmlDocPtr doc = xmlReadMemory(c_buffer, len, c_url, c_enc, NUM2INT(options));
149
+ xmlSetStructuredErrorFunc(NULL, NULL);
119
150
 
120
151
  if(doc == NULL) {
121
152
  xmlFreeDoc(doc);
122
- rb_raise(rb_eRuntimeError, "Couldn't create a document");
153
+
154
+ xmlErrorPtr error = xmlGetLastError();
155
+ if(error)
156
+ rb_funcall(rb_mKernel, rb_intern("raise"), 1,
157
+ Nokogiri_wrap_xml_syntax_error((VALUE)NULL, error)
158
+ );
159
+ else
160
+ rb_raise(rb_eRuntimeError, "Could not parse document");
161
+
123
162
  return Qnil;
124
163
  }
125
164
 
126
- return Nokogiri_wrap_xml_document(klass, doc);
165
+ VALUE document = Nokogiri_wrap_xml_document(klass, doc);
166
+ rb_funcall(document, rb_intern("errors="), 1, error_list);
167
+ return document;
168
+ }
169
+
170
+ /*
171
+ * call-seq:
172
+ * dup
173
+ *
174
+ * Copy this Document. An optional depth may be passed in, but it defaults
175
+ * to a deep copy. 0 is a shallow copy, 1 is a deep copy.
176
+ */
177
+ static VALUE duplicate_node(int argc, VALUE *argv, VALUE self)
178
+ {
179
+ VALUE level;
180
+
181
+ if(rb_scan_args(argc, argv, "01", &level) == 0)
182
+ level = INT2NUM(1);
183
+
184
+ xmlDocPtr doc, dup;
185
+ Data_Get_Struct(self, xmlDoc, doc);
186
+
187
+ dup = xmlCopyDoc(doc, NUM2INT(level));
188
+ if(dup == NULL) return Qnil;
189
+
190
+ return Nokogiri_wrap_xml_document(cNokogiriXmlDocument, dup);
127
191
  }
128
192
 
129
193
  /*
@@ -169,9 +233,9 @@ static VALUE load_external_subsets_set(VALUE klass, VALUE value)
169
233
  VALUE cNokogiriXmlDocument ;
170
234
  void init_xml_document()
171
235
  {
172
- VALUE nokogiri = rb_define_module("Nokogiri");
173
- VALUE xml = rb_define_module_under(nokogiri, "XML");
174
- VALUE node = rb_define_class_under(xml, "Node", rb_cObject);
236
+ VALUE nokogiri = rb_define_module("Nokogiri");
237
+ VALUE xml = rb_define_module_under(nokogiri, "XML");
238
+ VALUE node = rb_define_class_under(xml, "Node", rb_cObject);
175
239
 
176
240
  /*
177
241
  * Nokogiri::XML::Document wraps an xml document.
@@ -188,7 +252,9 @@ void init_xml_document()
188
252
 
189
253
  rb_define_method(klass, "root", root, 0);
190
254
  rb_define_method(klass, "root=", set_root, 1);
191
- rb_define_method(klass, "serialize", serialize, 0);
255
+ rb_define_method(klass, "encoding", encoding, 0);
256
+ rb_define_method(klass, "dup", duplicate_node, -1);
257
+ rb_define_method(klass, "url", url, 0);
192
258
  rb_undef_method(klass, "parent");
193
259
  }
194
260