libxml-ruby 0.9.7 → 0.9.8

Sign up to get free protection for your applications and to get access to all the features.
Files changed (97) hide show
  1. data/CHANGES +53 -0
  2. data/Rakefile +1 -0
  3. data/ext/libxml/build.log +4 -0
  4. data/ext/libxml/cbg.c +86 -86
  5. data/ext/libxml/libxml.c +878 -876
  6. data/ext/libxml/ruby_libxml.h +8 -4
  7. data/ext/libxml/ruby_xml_attr.c +36 -168
  8. data/ext/libxml/ruby_xml_attr.h +2 -4
  9. data/ext/libxml/ruby_xml_attr_decl.c +177 -0
  10. data/ext/libxml/ruby_xml_attr_decl.h +13 -0
  11. data/ext/libxml/ruby_xml_attributes.c +29 -20
  12. data/ext/libxml/ruby_xml_document.c +895 -898
  13. data/ext/libxml/ruby_xml_dtd.c +18 -1
  14. data/ext/libxml/ruby_xml_dtd.h +1 -0
  15. data/ext/libxml/ruby_xml_encoding.c +116 -0
  16. data/ext/libxml/ruby_xml_encoding.h +12 -0
  17. data/ext/libxml/ruby_xml_error.c +8 -2
  18. data/ext/libxml/ruby_xml_html_parser.c +53 -74
  19. data/ext/libxml/ruby_xml_html_parser.h +2 -3
  20. data/ext/libxml/ruby_xml_html_parser_context.c +145 -0
  21. data/ext/libxml/ruby_xml_html_parser_context.h +12 -0
  22. data/ext/libxml/ruby_xml_html_parser_options.c +48 -0
  23. data/ext/libxml/ruby_xml_html_parser_options.h +12 -0
  24. data/ext/libxml/ruby_xml_input_cbg.c +1 -1
  25. data/ext/libxml/ruby_xml_io.c +30 -0
  26. data/ext/libxml/ruby_xml_io.h +9 -0
  27. data/ext/libxml/ruby_xml_namespace.c +34 -16
  28. data/ext/libxml/ruby_xml_namespace.h +2 -2
  29. data/ext/libxml/ruby_xml_namespaces.c +6 -6
  30. data/ext/libxml/ruby_xml_node.c +1367 -1324
  31. data/ext/libxml/ruby_xml_node.h +2 -2
  32. data/ext/libxml/ruby_xml_parser.c +26 -78
  33. data/ext/libxml/ruby_xml_parser.h +1 -1
  34. data/ext/libxml/ruby_xml_parser_context.c +284 -13
  35. data/ext/libxml/ruby_xml_parser_context.h +1 -2
  36. data/ext/libxml/ruby_xml_parser_options.c +75 -0
  37. data/ext/libxml/ruby_xml_parser_options.h +14 -0
  38. data/ext/libxml/ruby_xml_reader.c +277 -183
  39. data/ext/libxml/ruby_xml_sax_parser.c +60 -57
  40. data/ext/libxml/ruby_xml_xpath_context.c +43 -8
  41. data/ext/libxml/ruby_xml_xpath_expression.c +6 -0
  42. data/ext/libxml/ruby_xml_xpath_object.c +107 -95
  43. data/ext/libxml/ruby_xml_xpath_object.h +9 -1
  44. data/ext/libxml/ruby_xml_xpointer.c +107 -107
  45. data/ext/libxml/version.h +2 -2
  46. data/ext/vc/libxml_ruby.vcproj +43 -3
  47. data/lib/libxml.rb +2 -3
  48. data/lib/libxml/attr.rb +71 -2
  49. data/lib/libxml/attr_decl.rb +81 -0
  50. data/lib/libxml/document.rb +78 -14
  51. data/lib/libxml/html_parser.rb +75 -42
  52. data/lib/libxml/node.rb +11 -0
  53. data/lib/libxml/parser.rb +106 -62
  54. data/lib/libxml/reader.rb +12 -0
  55. data/lib/libxml/sax_parser.rb +42 -52
  56. data/lib/libxml/xpath_object.rb +15 -0
  57. data/test/model/atom.xml +12 -12
  58. data/test/model/bands.xml +4 -4
  59. data/test/model/books.xml +146 -147
  60. data/test/model/merge_bug_data.xml +1 -1
  61. data/test/model/rubynet.xml +1 -0
  62. data/test/model/shiporder.rng +1 -1
  63. data/test/model/shiporder.xml +22 -22
  64. data/test/model/shiporder.xsd +30 -30
  65. data/test/model/xinclude.xml +1 -1
  66. data/test/{tc_node_attr.rb → tc_attr.rb} +1 -1
  67. data/test/tc_attr_decl.rb +131 -0
  68. data/test/tc_deprecated_require.rb +1 -3
  69. data/test/tc_document.rb +13 -3
  70. data/test/tc_document_write.rb +5 -5
  71. data/test/tc_dtd.rb +13 -5
  72. data/test/tc_html_parser.rb +14 -26
  73. data/test/tc_node_cdata.rb +1 -3
  74. data/test/tc_node_comment.rb +2 -4
  75. data/test/tc_node_edit.rb +2 -3
  76. data/test/tc_node_text.rb +35 -1
  77. data/test/tc_node_write.rb +3 -3
  78. data/test/tc_node_xlink.rb +2 -4
  79. data/test/tc_parser.rb +163 -70
  80. data/test/tc_parser_context.rb +103 -42
  81. data/test/tc_reader.rb +173 -45
  82. data/test/tc_relaxng.rb +2 -2
  83. data/test/tc_sax_parser.rb +48 -52
  84. data/test/tc_schema.rb +2 -2
  85. data/test/tc_xpath.rb +37 -6
  86. data/test/tc_xpath_context.rb +7 -1
  87. data/test/tc_xpath_expression.rb +1 -3
  88. data/test/tc_xpointer.rb +1 -3
  89. data/test/test_suite.rb +2 -3
  90. metadata +20 -13
  91. data/ext/libxml/ruby_xml_input.c +0 -329
  92. data/ext/libxml/ruby_xml_input.h +0 -20
  93. data/lib/libxml/parser_context.rb +0 -17
  94. data/lib/libxml/parser_options.rb +0 -25
  95. data/test/model/simple.xml +0 -7
  96. data/test/tc_input.rb +0 -13
  97. data/test/tc_well_formed.rb +0 -11
@@ -1,4 +1,4 @@
1
- /* $Id: ruby_xml_parser_context.h 666 2008-12-07 00:16:50Z cfis $ */
1
+ /* $Id: ruby_xml_parser_context.h 711 2009-01-20 07:17:15Z cfis $ */
2
2
 
3
3
  /* Please see the LICENSE file for copyright and distribution information */
4
4
 
@@ -8,6 +8,5 @@
8
8
  extern VALUE cXMLParserContext;
9
9
 
10
10
  void ruby_init_xml_parser_context(void);
11
- VALUE rxml_parser_context_wrap(xmlParserCtxtPtr ctxt);
12
11
 
13
12
  #endif
@@ -0,0 +1,75 @@
1
+ /* $Id: ruby_xml_parser.c 710 2009-01-20 05:30:51Z cfis $ */
2
+
3
+ /* Please see the LICENSE file for copyright and distribution information */
4
+
5
+ #include <stdarg.h>
6
+ #include "ruby_libxml.h"
7
+
8
+ /* Document-class: LibXML::XML::ParserOptions
9
+ *
10
+ * Options that control the operation of the HTMLParser. The easiest
11
+ * way to set a parser's options is to use the methods
12
+ * XML::Parser.file, XML::Parser.io or XML::Parser.string.
13
+ * For additional control, see XML::Parser::Context#options=.
14
+ */
15
+
16
+ VALUE mXMLParserOptions;
17
+
18
+
19
+ // Rdoc needs to know
20
+ #ifdef RDOC_NEVER_DEFINED
21
+ mLibXML = rb_define_module("LibXML");
22
+ mXML = rb_define_module_under(mLibXML, "XML");
23
+ #endif
24
+
25
+ void ruby_init_parser_options(void)
26
+ {
27
+ mXMLParserOptions = rb_define_module_under(cXMLParser, "Options");
28
+
29
+ /* recover on errors */
30
+ rb_define_const(mXMLParserOptions, "RECOVER", INT2NUM(XML_PARSE_RECOVER));
31
+ /* substitute entities */
32
+ rb_define_const(mXMLParserOptions, "NOENT", INT2NUM(XML_PARSE_NOENT));
33
+ /* load the external subset */
34
+ rb_define_const(mXMLParserOptions, "DTDLOAD", INT2NUM(XML_PARSE_DTDLOAD));
35
+ /* default DTD attributes */
36
+ rb_define_const(mXMLParserOptions, "DTDATTR", INT2NUM(XML_PARSE_DTDATTR));
37
+ /* validate with the DTD */
38
+ rb_define_const(mXMLParserOptions, "DTDVALID", INT2NUM(XML_PARSE_DTDVALID));
39
+ /* suppress error reports */
40
+ rb_define_const(mXMLParserOptions, "NOERROR", INT2NUM(XML_PARSE_NOERROR));
41
+ /* suppress warning reports */
42
+ rb_define_const(mXMLParserOptions, "NOWARNING", INT2NUM(XML_PARSE_NOWARNING));
43
+ /* pedantic error reporting */
44
+ rb_define_const(mXMLParserOptions, "PEDANTIC", INT2NUM(XML_PARSE_PEDANTIC));
45
+ /* remove blank nodes */
46
+ rb_define_const(mXMLParserOptions, "NOBLANKS", INT2NUM(XML_PARSE_NOBLANKS));
47
+ /* use the SAX1 interface internally */
48
+ rb_define_const(mXMLParserOptions, "SAX1", INT2NUM(XML_PARSE_SAX1));
49
+ /* Implement XInclude substitition */
50
+ rb_define_const(mXMLParserOptions, "XINCLUDE", INT2NUM(XML_PARSE_XINCLUDE));
51
+ /* Forbid network access */
52
+ rb_define_const(mXMLParserOptions, "NONET", INT2NUM(XML_PARSE_NONET));
53
+ /* Do not reuse the context dictionnary */
54
+ rb_define_const(mXMLParserOptions, "NODICT", INT2NUM(XML_PARSE_NODICT));
55
+ /* remove redundant namespaces declarations */
56
+ rb_define_const(mXMLParserOptions, "NSCLEAN", INT2NUM(XML_PARSE_NSCLEAN));
57
+ /* merge CDATA as text nodes */
58
+ rb_define_const(mXMLParserOptions, "NOCDATA", INT2NUM(XML_PARSE_NOCDATA));
59
+ #if LIBXML_VERSION >= 20621
60
+ /* do not generate XINCLUDE START/END nodes */
61
+ rb_define_const(mXMLParserOptions, "NOXINCNODE", INT2NUM(XML_PARSE_NOXINCNODE));
62
+ #endif
63
+ #if LIBXML_VERSION >= 20700
64
+ /* compact small text nodes */
65
+ rb_define_const(mXMLParserOptions, "COMPACT", INT2NUM(XML_PARSE_COMPACT));
66
+ /* parse using XML-1.0 before update 5 */
67
+ rb_define_const(mXMLParserOptions, "PARSE_OLD10", INT2NUM(XML_PARSE_OLD10));
68
+ /* do not fixup XINCLUDE xml:base uris */
69
+ rb_define_const(mXMLParserOptions, "NOBASEFIX", INT2NUM(XML_PARSE_NOBASEFIX));
70
+ #endif
71
+ #if LIBXML_VERSION >= 20703
72
+ /* relax any hardcoded limit from the parser */
73
+ rb_define_const(mXMLParserOptions, "HUGE", INT2NUM(XML_PARSE_HUGE));
74
+ #endif
75
+ }
@@ -0,0 +1,14 @@
1
+ /* $Id: ruby_xml_parser.h 710 2009-01-20 05:30:51Z cfis $ */
2
+
3
+ /* Please see the LICENSE file for copyright and distribution information */
4
+
5
+ #ifndef __RXML_PARSER_OPTIONS__
6
+ #define __RXML_PARSER_OPTIONS__
7
+
8
+ #define MAX_LIBXML_FEATURES_LEN 50
9
+
10
+ extern VALUE mXMLParserOptions;
11
+
12
+ void ruby_init_parser_options();
13
+
14
+ #endif
@@ -4,8 +4,6 @@
4
4
  #include "ruby_libxml.h"
5
5
  #include "ruby_xml_reader.h"
6
6
 
7
- VALUE cXMLReader;
8
-
9
7
  /*
10
8
  * Document-class: LibXML::XML::Reader
11
9
  *
@@ -22,27 +20,37 @@ VALUE cXMLReader;
22
20
  *
23
21
  * Example:
24
22
  *
25
- * parser = XML::Reader.new("<foo><bar>1</bar><bar>2</bar><bar>3</bar></foo>")
26
- * parser.read
27
- * assert_equal('foo', parser.name)
28
- * assert_equal(nil, parser.value)
23
+ * parser = XML::Reader.string("<foo><bar>1</bar><bar>2</bar><bar>3</bar></foo>")
24
+ * reader.read
25
+ * assert_equal('foo', reader.name)
26
+ * assert_equal(nil, reader.value)
29
27
  *
30
28
  * 3.times do |i|
31
- * parser.read
32
- * assert_equal(XML::Reader::TYPE_ELEMENT, parser.node_type)
33
- * assert_equal('bar', parser.name)
34
- * parser.read
35
- * assert_equal(XML::Reader::TYPE_TEXT, parser.node_type)
36
- * assert_equal((i + 1).to_s, parser.value)
37
- * parser.read
38
- * assert_equal(XML::Reader::TYPE_END_ELEMENT, parser.node_type)
29
+ * reader.read
30
+ * assert_equal(XML::Reader::TYPE_ELEMENT, reader.node_type)
31
+ * assert_equal('bar', reader.name)
32
+ * reader.read
33
+ * assert_equal(XML::Reader::TYPE_TEXT, reader.node_type)
34
+ * assert_equal((i + 1).to_s, reader.value)
35
+ * reader.read
36
+ * assert_equal(XML::Reader::TYPE_END_ELEMENT, reader.node_type)
39
37
  * end
40
38
  *
39
+ * You can also parse documents (see XML::Reader.document),
40
+ * strings (see XML::Parser.string) and io objects (see
41
+ * XML::Parser.io).
42
+ *
41
43
  * For a more in depth tutorial, albeit in C, see http://xmlsoft.org/xmlreader.html.*/
42
44
 
43
- static VALUE rxml_reader_new(VALUE class, xmlTextReaderPtr reader)
45
+ VALUE cXMLReader;
46
+
47
+ ID base_uri_SYMBOL;
48
+ ID ENCODING_SYMBOL;
49
+ ID OPTIONS_SYMBOL;
50
+
51
+ static VALUE rxml_reader_wrap(xmlTextReaderPtr reader)
44
52
  {
45
- return Data_Wrap_Struct(class, NULL, xmlFreeTextReader, reader);
53
+ return Data_Wrap_Struct(cXMLReader, NULL, xmlFreeTextReader, reader);
46
54
  }
47
55
 
48
56
  static xmlTextReaderPtr rxml_text_reader_get(VALUE obj)
@@ -54,120 +62,193 @@ static xmlTextReaderPtr rxml_text_reader_get(VALUE obj)
54
62
 
55
63
  /*
56
64
  * call-seq:
57
- * XML::Reader.file(path, encoding=nil, options=0) -> reader
65
+ * XML::Reader.document(doc) -> XML::Reader
58
66
  *
59
- * Parse an XML file from the filesystem or the network. The parsing flags
60
- * options are a combination of xmlParserOption.
67
+ * Create an new reader for the specified document.
61
68
  */
62
- static VALUE rxml_reader_new_file(int argc, VALUE *argv, VALUE self)
69
+ VALUE rxml_reader_document(VALUE klass, VALUE doc)
63
70
  {
71
+ xmlDocPtr xdoc;
64
72
  xmlTextReaderPtr xreader;
65
- VALUE rpath, rencoding, roptions;
66
- char *xpath;
67
- char *xencoding;
68
- int options;
69
-
70
- rb_scan_args(argc, argv, "12", &rpath, &rencoding, &roptions);
71
73
 
72
- xpath = NIL_P(rpath) ? NULL : StringValueCStr(rpath);
73
- xencoding = NIL_P(rencoding) ? NULL : StringValueCStr(rencoding);
74
- options = NIL_P(roptions) ? 0 : FIX2INT(roptions);
74
+ Data_Get_Struct(doc, xmlDoc, xdoc);
75
75
 
76
- xreader = xmlReaderForFile(xpath, xencoding, options);
76
+ xreader = xmlReaderWalker(xdoc);
77
77
 
78
78
  if (xreader == NULL)
79
79
  rxml_raise(&xmlLastError);
80
80
 
81
- return rxml_reader_new(self, xreader);
81
+ return rxml_reader_wrap(xreader);
82
82
  }
83
83
 
84
- /*
85
- * call-seq:
86
- * XML::Reader.io(io, url=nil, encoding=nil, options=0) -> reader
84
+ /* call-seq:
85
+ * XML::Reader.file(path) -> XML::Reader
86
+ * XML::Reader.file(path, :encoding => XML::Encoding::UTF_8,
87
+ * :options => XML::Parser::Options::NOENT) -> XML::Parser
87
88
  *
88
- * Parse an XML file from a file handle. The parsing flags options are
89
- * a combination of xmlParserOption.
89
+ * Creates a new reader by parsing the specified file or uri.
90
+ *
91
+ * You may provide an optional hash table to control how the
92
+ * parsing is performed. Valid options are:
93
+ *
94
+ * encoding - The document encoding, defaults to nil. Valid values
95
+ * are the encoding constants defined on XML::Encoding.
96
+ * options - Controls the execution of the parser, defaults to 0.
97
+ * Valid values are the constants defined on
98
+ * XML::Parser::Options. Mutliple options can be combined
99
+ * by using Bitwise OR (|).
90
100
  */
91
- static VALUE rxml_reader_new_io(int argc, VALUE *argv, VALUE self)
101
+ static VALUE rxml_reader_file(int argc, VALUE *argv, VALUE klass)
92
102
  {
93
103
  xmlTextReaderPtr xreader;
94
- VALUE rio, rurl, rencoding, roptions;
95
- char *xurl;
96
- char *xencoding;
97
- int options;
104
+ VALUE path;
105
+ VALUE options;
98
106
 
99
- rb_scan_args(argc, argv, "13", &rio, &rurl, &rencoding, &roptions);
107
+ const char *xencoding = NULL;
108
+ int xoptions = 0;
100
109
 
101
- xurl = NIL_P(rurl) ? NULL : StringValueCStr(rurl);
102
- xencoding = NIL_P(rencoding) ? NULL : StringValueCStr(rencoding);
103
- options = NIL_P(roptions) ? 0 : FIX2INT(roptions);
110
+ rb_scan_args(argc, argv, "11", &path, &options);
111
+ Check_Type(path, T_STRING);
104
112
 
105
- xreader = xmlReaderForIO((xmlInputReadCallback) rxml_read_callback, NULL,
106
- (void *) rio, xurl, xencoding, options);
113
+ if (!NIL_P(options))
114
+ {
115
+ VALUE encoding = Qnil;
116
+ VALUE parserOptions = Qnil;
117
+
118
+ Check_Type(options, T_HASH);
119
+
120
+ encoding = rb_hash_aref(options, base_uri_SYMBOL);
121
+ xencoding = NIL_P(encoding) ? NULL : xmlGetCharEncodingName(NUM2INT(encoding));
122
+
123
+ parserOptions = rb_hash_aref(options, OPTIONS_SYMBOL);
124
+ xoptions = NIL_P(parserOptions) ? 0 : NUM2INT(parserOptions);
125
+ }
126
+
127
+ xreader = xmlReaderForFile(StringValueCStr(path), xencoding, xoptions);
107
128
 
108
129
  if (xreader == NULL)
109
130
  rxml_raise(&xmlLastError);
110
131
 
111
- return rxml_reader_new(self, xreader);
132
+ return rxml_reader_wrap(xreader);
112
133
  }
113
134
 
114
- /*
115
- * call-seq:
116
- * XML::Reader.walker(doc) -> reader
117
- * XML::Reader.document(doc) -> reader
135
+ /* call-seq:
136
+ * XML::Reader.io(io) -> XML::Reader
137
+ * XML::Reader.io(io, :encoding => XML::Encoding::UTF_8,
138
+ * :options => XML::Parser::Options::NOENT) -> XML::Parser
139
+ *
140
+ * Creates a new reader by parsing the specified io object.
118
141
  *
119
- * Create an XML text reader for a preparsed document.
142
+ * You may provide an optional hash table to control how the
143
+ * parsing is performed. Valid options are:
144
+ *
145
+ * base_uri - The base url for the parsed document.
146
+ * encoding - The document encoding, defaults to nil. Valid values
147
+ * are the encoding constants defined on XML::Encoding.
148
+ * options - Controls the execution of the parser, defaults to 0.
149
+ * Valid values are the constants defined on
150
+ * XML::Parser::Options. Mutliple options can be combined
151
+ * by using Bitwise OR (|).
120
152
  */
121
- VALUE rxml_reader_new_walker(VALUE self, VALUE doc)
153
+ static VALUE rxml_reader_io(int argc, VALUE *argv, VALUE klass)
122
154
  {
123
- xmlDocPtr xdoc;
124
155
  xmlTextReaderPtr xreader;
156
+ VALUE io;
157
+ VALUE options;
158
+ char *xbaseurl = NULL;
159
+ const char *xencoding = NULL;
160
+ int xoptions = 0;
125
161
 
126
- Data_Get_Struct(doc, xmlDoc, xdoc);
162
+ rb_scan_args(argc, argv, "11", &io, &options);
127
163
 
128
- xreader = xmlReaderWalker(xdoc);
164
+ if (!NIL_P(options))
165
+ {
166
+ VALUE baseurl = Qnil;
167
+ VALUE encoding = Qnil;
168
+ VALUE parserOptions = Qnil;
169
+
170
+ Check_Type(options, T_HASH);
171
+
172
+ baseurl = rb_hash_aref(options, base_uri_SYMBOL);
173
+ xbaseurl = NIL_P(baseurl) ? NULL : StringValueCStr(baseurl);
174
+
175
+ encoding = rb_hash_aref(options, ENCODING_SYMBOL);
176
+ xencoding = NIL_P(encoding) ? NULL : xmlGetCharEncodingName(NUM2INT(encoding));
177
+
178
+ parserOptions = rb_hash_aref(options, OPTIONS_SYMBOL);
179
+ xoptions = NIL_P(parserOptions) ? 0 : NUM2INT(parserOptions);
180
+ }
181
+
182
+ xreader = xmlReaderForIO((xmlInputReadCallback) rxml_read_callback, NULL,
183
+ (void *) io,
184
+ xbaseurl, xencoding, xoptions);
129
185
 
130
186
  if (xreader == NULL)
131
187
  rxml_raise(&xmlLastError);
132
188
 
133
- return rxml_reader_new(self, xreader);
189
+ return rxml_reader_wrap(xreader);
134
190
  }
135
191
 
136
- /*
137
- * call-seq:
138
- * XML::Reader.new(data, url=nil, encoding=nil, options=0) -> reader
139
- * XML::Reader.string(data, url=nil, encoding=nil, options=0) -> reader
192
+ /* call-seq:
193
+ * XML::Reader.string(io) -> XML::Reader
194
+ * XML::Reader.string(io, :encoding => XML::Encoding::UTF_8,
195
+ * :options => XML::Parser::Options::NOENT) -> XML::Parser
140
196
  *
141
- * Create an XML text reader for an XML in-memory document. The parsing flags
142
- * options are a combination of xmlParserOption.
197
+ * Creates a new reader by parsing the specified string.
198
+ *
199
+ * You may provide an optional hash table to control how the
200
+ * parsing is performed. Valid options are:
201
+ *
202
+ * base_uri - The base url for the parsed document.
203
+ * encoding - The document encoding, defaults to nil. Valid values
204
+ * are the encoding constants defined on XML::Encoding.
205
+ * options - Controls the execution of the parser, defaults to 0.
206
+ * Valid values are the constants defined on
207
+ * XML::Parser::Options. Mutliple options can be combined
208
+ * by using Bitwise OR (|).
143
209
  */
144
- static VALUE rxml_reader_new_data(int argc, VALUE *argv, VALUE self)
210
+ static VALUE rxml_reader_string(int argc, VALUE *argv, VALUE klass)
145
211
  {
146
212
  xmlTextReaderPtr xreader;
147
- VALUE rdata, rurl, rencoding, roptions;
148
- char *xdata;
149
- char *xurl;
150
- char *xencoding;
151
- int options;
213
+ VALUE string;
214
+ VALUE options;
215
+ char *xbaseurl = NULL;
216
+ const char *xencoding = NULL;
217
+ int xoptions = 0;
152
218
 
153
- rb_scan_args(argc, argv, "13", &rdata, &rurl, &rencoding, &roptions);
219
+ rb_scan_args(argc, argv, "11", &string, &options);
220
+ Check_Type(string, T_STRING);
221
+
222
+ if (!NIL_P(options))
223
+ {
224
+ VALUE baseurl = Qnil;
225
+ VALUE encoding = Qnil;
226
+ VALUE parserOptions = Qnil;
154
227
 
155
- xdata = NIL_P(rdata) ? NULL : StringValueCStr(rdata);
156
- xurl = NIL_P(rurl) ? NULL : StringValueCStr(rurl);
157
- xencoding = NIL_P(rencoding) ? NULL : StringValueCStr(rencoding);
158
- options = NIL_P(roptions) ? 0 : FIX2INT(roptions);
228
+ Check_Type(options, T_HASH);
159
229
 
160
- xreader = xmlReaderForMemory(xdata, strlen(xdata), xurl, xencoding, options);
230
+ baseurl = rb_hash_aref(options, base_uri_SYMBOL);
231
+ xbaseurl = NIL_P(baseurl) ? NULL : StringValueCStr(baseurl);
232
+
233
+ encoding = rb_hash_aref(options, ENCODING_SYMBOL);
234
+ xencoding = NIL_P(encoding) ? NULL : xmlGetCharEncodingName(NUM2INT(encoding));
235
+
236
+ parserOptions = rb_hash_aref(options, OPTIONS_SYMBOL);
237
+ xoptions = NIL_P(parserOptions) ? 0 : NUM2INT(parserOptions);
238
+ }
239
+
240
+ xreader = xmlReaderForMemory(StringValueCStr(string), RSTRING_LEN(string),
241
+ xbaseurl, xencoding, xoptions);
161
242
 
162
243
  if (xreader == NULL)
163
244
  rxml_raise(&xmlLastError);
164
245
 
165
- return rxml_reader_new(self, xreader);
246
+ return rxml_reader_wrap(xreader);
166
247
  }
167
248
 
168
249
  /*
169
250
  * call-seq:
170
- * parser.close -> code
251
+ * reader.close -> code
171
252
  *
172
253
  * This method releases any resources allocated by the current instance
173
254
  * changes the state to Closed and close any underlying input.
@@ -179,7 +260,7 @@ static VALUE rxml_reader_close(VALUE self)
179
260
 
180
261
  /*
181
262
  * call-seq:
182
- * parser.move_to_attribute(val) -> code
263
+ * reader.move_to_attribute(val) -> code
183
264
  *
184
265
  * Move the position of the current instance to the attribute with the
185
266
  * specified index (if +val+ is an integer) or name (if +val+ is a string)
@@ -266,6 +347,21 @@ static VALUE rxml_reader_next_sibling(VALUE self)
266
347
  return INT2FIX(xmlTextReaderNextSibling(rxml_text_reader_get(self)));
267
348
  }
268
349
 
350
+ /*
351
+ * call-seq:
352
+ * reader.node -> XML::Node
353
+ *
354
+ * Returns the reader's current node.
355
+ * WARNING - Using this method is dangerous because the
356
+ * the node may be destroyed on the next #read.
357
+ */
358
+ static VALUE rxml_reader_node(VALUE self)
359
+ {
360
+ xmlTextReaderPtr xreader = rxml_text_reader_get(self);
361
+ xmlNodePtr xnode = xmlTextReaderCurrentNode(xreader);
362
+ return rxml_node_wrap(xnode);
363
+ }
364
+
269
365
  /*
270
366
  * call-seq:
271
367
  * reader.node_type -> type
@@ -299,15 +395,27 @@ static VALUE rxml_reader_normalization(VALUE self)
299
395
  * call-seq:
300
396
  * reader.read -> code
301
397
  *
302
- * Move the position of the current instance to the next node in the stream,
303
- * exposing its properties.
398
+ * Causes the reader to move to the next node in the stream, exposing its properties.
304
399
  *
305
- * Return 1 if the node was read successfully, 0 if there is no more nodes to
306
- * read, or -1 in case of error.
307
- */
400
+ * Returns true if a node was successfully read or false if there are no more
401
+ * nodes to read. On errors, an exception is raised.*/
308
402
  static VALUE rxml_reader_read(VALUE self)
309
403
  {
310
- return INT2FIX(xmlTextReaderRead(rxml_text_reader_get(self)));
404
+ int result = xmlTextReaderRead(rxml_text_reader_get(self));
405
+ switch(result)
406
+ {
407
+ case -1:
408
+ rxml_raise(&xmlLastError);
409
+ return Qnil;
410
+ break;
411
+ case 0:
412
+ return Qfalse;
413
+ case 1:
414
+ return Qtrue;
415
+ default:
416
+ rb_raise(rb_eRuntimeError,
417
+ "xmlTextReaderRead did not return -1, 0 or 1. Return value was: %d", result);
418
+ }
311
419
  }
312
420
 
313
421
  /*
@@ -456,14 +564,28 @@ static VALUE rxml_reader_attr_count(VALUE self)
456
564
 
457
565
  /*
458
566
  * call-seq:
459
- * reader.encoding -> encoding
567
+ * reader.encoding -> XML::Encoding::UTF_8
568
+ *
569
+ * Returns the encoding of the document being read. Note you
570
+ * first have to read data from the reader for encoding
571
+ * to return a value
572
+ *
573
+ * reader = XML::Reader.file(XML_FILE)
574
+ * assert_nil(reader.encoding)
575
+ * reader.read
576
+ * assert_equal(XML::Encoding::UTF_8, reader.encoding)
460
577
  *
461
- * Determine the encoding of the document being read.
578
+ * In addition, libxml always appears to return nil for the encoding
579
+ * when parsing strings.
462
580
  */
463
581
  static VALUE rxml_reader_encoding(VALUE self)
464
582
  {
465
- const xmlChar *result = xmlTextReaderConstEncoding(rxml_text_reader_get(self));
466
- return (result == NULL ? Qnil : rb_str_new2((const char*)result));
583
+ xmlTextReaderPtr xreader = rxml_text_reader_get(self);
584
+ const xmlChar *xencoding = xmlTextReaderConstEncoding(xreader);
585
+ if (xencoding)
586
+ return INT2NUM(xmlParseCharEncoding(xencoding));
587
+ else
588
+ return INT2NUM(XML_CHAR_ENCODING_NONE);
467
589
  }
468
590
 
469
591
  /*
@@ -668,7 +790,7 @@ static VALUE rxml_reader_expand(VALUE self)
668
790
  doc = xmlTextReaderCurrentDoc(reader);
669
791
  rxml_document_wrap(doc);
670
792
 
671
- return rxml_node_wrap(cXMLNode, node);
793
+ return rxml_node_wrap(node);
672
794
  }
673
795
 
674
796
  #if LIBXML_VERSION >= 20618
@@ -768,132 +890,104 @@ mXML = rb_define_module_under(mLibXML, "XML");
768
890
 
769
891
  void ruby_init_xml_reader(void)
770
892
  {
893
+ base_uri_SYMBOL = ID2SYM(rb_intern("base_uri"));
894
+ ENCODING_SYMBOL = ID2SYM(rb_intern("encoding"));
895
+ OPTIONS_SYMBOL = ID2SYM(rb_intern("options"));
896
+
771
897
  cXMLReader = rb_define_class_under(mXML, "Reader", rb_cObject);
772
898
 
773
- rb_define_singleton_method(cXMLReader, "file", rxml_reader_new_file, -1);
774
- rb_define_singleton_method(cXMLReader, "io", rxml_reader_new_io, -1);
775
- rb_define_singleton_method(cXMLReader, "walker", rxml_reader_new_walker, 1);
776
- rb_define_alias(CLASS_OF(cXMLReader), "document", "walker");
777
- rb_define_singleton_method(cXMLReader, "new", rxml_reader_new_data, -1);
778
- rb_define_alias(CLASS_OF(cXMLReader), "string", "new");
899
+ rb_define_singleton_method(cXMLReader, "document", rxml_reader_document, 1);
900
+ rb_define_singleton_method(cXMLReader, "file", rxml_reader_file, -1);
901
+ rb_define_singleton_method(cXMLReader, "io", rxml_reader_io, -1);
902
+ rb_define_singleton_method(cXMLReader, "string", rxml_reader_string, -1);
779
903
 
904
+ rb_define_method(cXMLReader, "[]", rxml_reader_attribute, 1);
905
+ rb_define_method(cXMLReader, "attribute_count", rxml_reader_attr_count, 0);
906
+ rb_define_method(cXMLReader, "base_uri", rxml_reader_base_uri, 0);
907
+ #if LIBXML_VERSION >= 20618
908
+ rb_define_method(cXMLReader, "byte_consumed", rxml_reader_byte_consumed, 0);
909
+ #endif
780
910
  rb_define_method(cXMLReader, "close", rxml_reader_close, 0);
781
-
911
+ #if LIBXML_VERSION >= 20617
912
+ rb_define_method(cXMLReader, "column_number", rxml_reader_column_number, 0);
913
+ #endif
914
+ rb_define_method(cXMLReader, "depth", rxml_reader_depth, 0);
915
+ rb_define_method(cXMLReader, "encoding", rxml_reader_encoding, 0);
916
+ rb_define_method(cXMLReader, "expand", rxml_reader_expand, 0);
917
+ rb_define_method(cXMLReader, "has_attributes?", rxml_reader_has_attributes, 0);
918
+ rb_define_method(cXMLReader, "has_value?", rxml_reader_has_value, 0);
919
+ #if LIBXML_VERSION >= 20617
920
+ rb_define_method(cXMLReader, "line_number", rxml_reader_line_number, 0);
921
+ #endif
922
+ rb_define_method(cXMLReader, "local_name", rxml_reader_local_name, 0);
923
+ rb_define_method(cXMLReader, "lookup_namespace", rxml_reader_lookup_namespace, 1);
782
924
  rb_define_method(cXMLReader, "move_to_attribute", rxml_reader_move_to_attr, 1);
783
- rb_define_method(cXMLReader, "move_to_first_attribute",
784
- rxml_reader_move_to_first_attr, 0);
785
- rb_define_method(cXMLReader, "move_to_next_attribute",
786
- rxml_reader_move_to_next_attr, 0);
787
- rb_define_method(cXMLReader, "move_to_element", rxml_reader_move_to_element,
788
- 0);
925
+ rb_define_method(cXMLReader, "move_to_first_attribute", rxml_reader_move_to_first_attr, 0);
926
+ rb_define_method(cXMLReader, "move_to_next_attribute", rxml_reader_move_to_next_attr, 0);
927
+ rb_define_method(cXMLReader, "move_to_element", rxml_reader_move_to_element, 0);
928
+ rb_define_method(cXMLReader, "name", rxml_reader_name, 0);
929
+ rb_define_method(cXMLReader, "namespace_uri", rxml_reader_namespace_uri, 0);
789
930
  rb_define_method(cXMLReader, "next", rxml_reader_next, 0);
790
931
  rb_define_method(cXMLReader, "next_sibling", rxml_reader_next_sibling, 0);
932
+ rb_define_method(cXMLReader, "node", rxml_reader_node, 0);
933
+ rb_define_method(cXMLReader, "node_type", rxml_reader_node_type, 0);
934
+ rb_define_method(cXMLReader, "normalization", rxml_reader_normalization, 0);
935
+ rb_define_method(cXMLReader, "prefix", rxml_reader_prefix, 0);
936
+ rb_define_method(cXMLReader, "quote_char", rxml_reader_quote_char, 0);
791
937
  rb_define_method(cXMLReader, "read", rxml_reader_read, 0);
792
- rb_define_method(cXMLReader, "read_attribute_value",
793
- rxml_reader_read_attr_value, 0);
938
+ rb_define_method(cXMLReader, "read_attribute_value", rxml_reader_read_attr_value, 0);
794
939
  rb_define_method(cXMLReader, "read_inner_xml", rxml_reader_read_inner_xml, 0);
795
940
  rb_define_method(cXMLReader, "read_outer_xml", rxml_reader_read_outer_xml, 0);
796
941
  rb_define_method(cXMLReader, "read_state", rxml_reader_read_state, 0);
797
942
  rb_define_method(cXMLReader, "read_string", rxml_reader_read_string, 0);
798
-
799
- rb_define_method(cXMLReader, "relax_ng_validate",
800
- rxml_reader_relax_ng_validate, 1);
943
+ rb_define_method(cXMLReader, "relax_ng_validate", rxml_reader_relax_ng_validate, 1);
944
+ rb_define_method(cXMLReader, "standalone", rxml_reader_standalone, 0);
801
945
  #if LIBXML_VERSION >= 20620
802
946
  rb_define_method(cXMLReader, "schema_validate", rxml_reader_schema_validate, 1);
803
947
  #endif
804
-
805
- rb_define_method(cXMLReader, "node_type", rxml_reader_node_type, 0);
806
- rb_define_method(cXMLReader, "normalization", rxml_reader_normalization, 0);
807
- rb_define_method(cXMLReader, "attribute_count", rxml_reader_attr_count, 0);
808
- rb_define_method(cXMLReader, "name", rxml_reader_name, 0);
809
- rb_define_method(cXMLReader, "local_name", rxml_reader_local_name, 0);
810
- rb_define_method(cXMLReader, "encoding", rxml_reader_encoding, 0);
811
- rb_define_method(cXMLReader, "base_uri", rxml_reader_base_uri, 0);
812
- rb_define_method(cXMLReader, "namespace_uri", rxml_reader_namespace_uri, 0);
948
+ rb_define_method(cXMLReader, "value", rxml_reader_value, 0);
813
949
  rb_define_method(cXMLReader, "xml_lang", rxml_reader_xml_lang, 0);
814
950
  rb_define_method(cXMLReader, "xml_version", rxml_reader_xml_version, 0);
815
- rb_define_method(cXMLReader, "prefix", rxml_reader_prefix, 0);
816
- rb_define_method(cXMLReader, "depth", rxml_reader_depth, 0);
817
- rb_define_method(cXMLReader, "quote_char", rxml_reader_quote_char, 0);
818
- rb_define_method(cXMLReader, "standalone", rxml_reader_standalone, 0);
819
-
820
- rb_define_method(cXMLReader, "has_attributes?", rxml_reader_has_attributes, 0);
821
- rb_define_method(cXMLReader, "[]", rxml_reader_attribute, 1);
822
- rb_define_method(cXMLReader, "has_value?", rxml_reader_has_value, 0);
823
- rb_define_method(cXMLReader, "value", rxml_reader_value, 0);
824
-
825
- rb_define_method(cXMLReader, "lookup_namespace",
826
- rxml_reader_lookup_namespace, 1);
827
- rb_define_method(cXMLReader, "expand", rxml_reader_expand, 0);
828
-
829
- #if LIBXML_VERSION >= 20618
830
- rb_define_method(cXMLReader, "byte_consumed", rxml_reader_byte_consumed, 0);
831
- #endif
832
- #if LIBXML_VERSION >= 20617
833
- rb_define_method(cXMLReader, "column_number", rxml_reader_column_number, 0);
834
- rb_define_method(cXMLReader, "line_number", rxml_reader_line_number, 0);
835
- #endif
836
951
  rb_define_method(cXMLReader, "default?", rxml_reader_default, 0);
837
952
  rb_define_method(cXMLReader, "empty_element?", rxml_reader_empty_element, 0);
838
- rb_define_method(cXMLReader, "namespace_declaration?",
839
- rxml_reader_namespace_declaration, 0);
953
+ rb_define_method(cXMLReader, "namespace_declaration?", rxml_reader_namespace_declaration, 0);
840
954
  rb_define_method(cXMLReader, "valid?", rxml_reader_valid, 0);
841
955
 
956
+ /* Constants */
842
957
  rb_define_const(cXMLReader, "LOADDTD", INT2FIX(XML_PARSER_LOADDTD));
843
958
  rb_define_const(cXMLReader, "DEFAULTATTRS", INT2FIX(XML_PARSER_DEFAULTATTRS));
844
959
  rb_define_const(cXMLReader, "VALIDATE", INT2FIX(XML_PARSER_VALIDATE));
845
- rb_define_const(cXMLReader, "SUBST_ENTITIES", INT2FIX(
846
- XML_PARSER_SUBST_ENTITIES));
847
-
848
- rb_define_const(cXMLReader, "SEVERITY_VALIDITY_WARNING", INT2FIX(
849
- XML_PARSER_SEVERITY_VALIDITY_WARNING));
850
- rb_define_const(cXMLReader, "SEVERITY_VALIDITY_ERROR", INT2FIX(
851
- XML_PARSER_SEVERITY_VALIDITY_ERROR));
852
- rb_define_const(cXMLReader, "SEVERITY_WARNING", INT2FIX(
853
- XML_PARSER_SEVERITY_WARNING));
854
- rb_define_const(cXMLReader, "SEVERITY_ERROR", INT2FIX(
855
- XML_PARSER_SEVERITY_ERROR));
960
+ rb_define_const(cXMLReader, "SUBST_ENTITIES", INT2FIX(XML_PARSER_SUBST_ENTITIES));
961
+
962
+ rb_define_const(cXMLReader, "SEVERITY_VALIDITY_WARNING", INT2FIX(XML_PARSER_SEVERITY_VALIDITY_WARNING));
963
+ rb_define_const(cXMLReader, "SEVERITY_VALIDITY_ERROR", INT2FIX(XML_PARSER_SEVERITY_VALIDITY_ERROR));
964
+ rb_define_const(cXMLReader, "SEVERITY_WARNING", INT2FIX(XML_PARSER_SEVERITY_WARNING));
965
+ rb_define_const(cXMLReader, "SEVERITY_ERROR", INT2FIX(XML_PARSER_SEVERITY_ERROR));
856
966
 
857
967
  rb_define_const(cXMLReader, "TYPE_NONE", INT2FIX(XML_READER_TYPE_NONE));
858
968
  rb_define_const(cXMLReader, "TYPE_ELEMENT", INT2FIX(XML_READER_TYPE_ELEMENT));
859
- rb_define_const(cXMLReader, "TYPE_ATTRIBUTE", INT2FIX(
860
- XML_READER_TYPE_ATTRIBUTE));
969
+ rb_define_const(cXMLReader, "TYPE_ATTRIBUTE", INT2FIX(XML_READER_TYPE_ATTRIBUTE));
861
970
  rb_define_const(cXMLReader, "TYPE_TEXT", INT2FIX(XML_READER_TYPE_TEXT));
862
971
  rb_define_const(cXMLReader, "TYPE_CDATA", INT2FIX(XML_READER_TYPE_CDATA));
863
- rb_define_const(cXMLReader, "TYPE_ENTITY_REFERENCE", INT2FIX(
864
- XML_READER_TYPE_ENTITY_REFERENCE));
972
+ rb_define_const(cXMLReader, "TYPE_ENTITY_REFERENCE", INT2FIX(XML_READER_TYPE_ENTITY_REFERENCE));
865
973
  rb_define_const(cXMLReader, "TYPE_ENTITY", INT2FIX(XML_READER_TYPE_ENTITY));
866
- rb_define_const(cXMLReader, "TYPE_PROCESSING_INSTRUCTION", INT2FIX(
867
- XML_READER_TYPE_PROCESSING_INSTRUCTION));
974
+ rb_define_const(cXMLReader, "TYPE_PROCESSING_INSTRUCTION", INT2FIX(XML_READER_TYPE_PROCESSING_INSTRUCTION));
868
975
  rb_define_const(cXMLReader, "TYPE_COMMENT", INT2FIX(XML_READER_TYPE_COMMENT));
869
- rb_define_const(cXMLReader, "TYPE_DOCUMENT",
870
- INT2FIX(XML_READER_TYPE_DOCUMENT));
871
- rb_define_const(cXMLReader, "TYPE_DOCUMENT_TYPE", INT2FIX(
872
- XML_READER_TYPE_DOCUMENT_TYPE));
873
- rb_define_const(cXMLReader, "TYPE_DOCUMENT_FRAGMENT", INT2FIX(
874
- XML_READER_TYPE_DOCUMENT_FRAGMENT));
875
- rb_define_const(cXMLReader, "TYPE_NOTATION",
876
- INT2FIX(XML_READER_TYPE_NOTATION));
877
- rb_define_const(cXMLReader, "TYPE_WHITESPACE", INT2FIX(
878
- XML_READER_TYPE_WHITESPACE));
879
- rb_define_const(cXMLReader, "TYPE_SIGNIFICANT_WHITESPACE", INT2FIX(
880
- XML_READER_TYPE_SIGNIFICANT_WHITESPACE));
881
- rb_define_const(cXMLReader, "TYPE_END_ELEMENT", INT2FIX(
882
- XML_READER_TYPE_END_ELEMENT));
883
- rb_define_const(cXMLReader, "TYPE_END_ENTITY", INT2FIX(
884
- XML_READER_TYPE_END_ENTITY));
885
- rb_define_const(cXMLReader, "TYPE_XML_DECLARATION", INT2FIX(
886
- XML_READER_TYPE_XML_DECLARATION));
976
+ rb_define_const(cXMLReader, "TYPE_DOCUMENT", INT2FIX(XML_READER_TYPE_DOCUMENT));
977
+ rb_define_const(cXMLReader, "TYPE_DOCUMENT_TYPE", INT2FIX(XML_READER_TYPE_DOCUMENT_TYPE));
978
+ rb_define_const(cXMLReader, "TYPE_DOCUMENT_FRAGMENT", INT2FIX(XML_READER_TYPE_DOCUMENT_FRAGMENT));
979
+ rb_define_const(cXMLReader, "TYPE_NOTATION", INT2FIX(XML_READER_TYPE_NOTATION));
980
+ rb_define_const(cXMLReader, "TYPE_WHITESPACE", INT2FIX(XML_READER_TYPE_WHITESPACE));
981
+ rb_define_const(cXMLReader, "TYPE_SIGNIFICANT_WHITESPACE", INT2FIX(XML_READER_TYPE_SIGNIFICANT_WHITESPACE));
982
+ rb_define_const(cXMLReader, "TYPE_END_ELEMENT", INT2FIX(XML_READER_TYPE_END_ELEMENT));
983
+ rb_define_const(cXMLReader, "TYPE_END_ENTITY", INT2FIX(XML_READER_TYPE_END_ENTITY));
984
+ rb_define_const(cXMLReader, "TYPE_XML_DECLARATION", INT2FIX(XML_READER_TYPE_XML_DECLARATION));
887
985
 
888
986
  /* Read states */
889
- rb_define_const(cXMLReader, "MODE_INITIAL", INT2FIX(
890
- XML_TEXTREADER_MODE_INITIAL));
891
- rb_define_const(cXMLReader, "MODE_INTERACTIVE", INT2FIX(
892
- XML_TEXTREADER_MODE_INTERACTIVE));
987
+ rb_define_const(cXMLReader, "MODE_INITIAL", INT2FIX(XML_TEXTREADER_MODE_INITIAL));
988
+ rb_define_const(cXMLReader, "MODE_INTERACTIVE", INT2FIX(XML_TEXTREADER_MODE_INTERACTIVE));
893
989
  rb_define_const(cXMLReader, "MODE_ERROR", INT2FIX(XML_TEXTREADER_MODE_ERROR));
894
990
  rb_define_const(cXMLReader, "MODE_EOF", INT2FIX(XML_TEXTREADER_MODE_EOF));
895
- rb_define_const(cXMLReader, "MODE_CLOSED",
896
- INT2FIX(XML_TEXTREADER_MODE_CLOSED));
897
- rb_define_const(cXMLReader, "MODE_READING", INT2FIX(
898
- XML_TEXTREADER_MODE_READING));
991
+ rb_define_const(cXMLReader, "MODE_CLOSED", INT2FIX(XML_TEXTREADER_MODE_CLOSED));
992
+ rb_define_const(cXMLReader, "MODE_READING", INT2FIX(XML_TEXTREADER_MODE_READING));
899
993
  }