libxml-ruby 0.9.7-x86-mswin32-60 → 0.9.8-x86-mswin32-60

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (99) hide show
  1. data/CHANGES +53 -0
  2. data/Rakefile +1 -0
  3. data/ext/libxml/build.log +4 -0
  4. data/ext/libxml/cbg.c +86 -86
  5. data/ext/libxml/libxml.c +878 -876
  6. data/ext/libxml/ruby_libxml.h +8 -4
  7. data/ext/libxml/ruby_xml_attr.c +36 -168
  8. data/ext/libxml/ruby_xml_attr.h +2 -4
  9. data/ext/libxml/ruby_xml_attr_decl.c +177 -0
  10. data/ext/libxml/ruby_xml_attr_decl.h +13 -0
  11. data/ext/libxml/ruby_xml_attributes.c +29 -20
  12. data/ext/libxml/ruby_xml_document.c +895 -898
  13. data/ext/libxml/ruby_xml_dtd.c +18 -1
  14. data/ext/libxml/ruby_xml_dtd.h +1 -0
  15. data/ext/libxml/ruby_xml_encoding.c +116 -0
  16. data/ext/libxml/ruby_xml_encoding.h +12 -0
  17. data/ext/libxml/ruby_xml_error.c +8 -2
  18. data/ext/libxml/ruby_xml_html_parser.c +53 -74
  19. data/ext/libxml/ruby_xml_html_parser.h +2 -3
  20. data/ext/libxml/ruby_xml_html_parser_context.c +145 -0
  21. data/ext/libxml/ruby_xml_html_parser_context.h +12 -0
  22. data/ext/libxml/ruby_xml_html_parser_options.c +48 -0
  23. data/ext/libxml/ruby_xml_html_parser_options.h +12 -0
  24. data/ext/libxml/ruby_xml_input_cbg.c +1 -1
  25. data/ext/libxml/ruby_xml_io.c +30 -0
  26. data/ext/libxml/ruby_xml_io.h +9 -0
  27. data/ext/libxml/ruby_xml_namespace.c +34 -16
  28. data/ext/libxml/ruby_xml_namespace.h +2 -2
  29. data/ext/libxml/ruby_xml_namespaces.c +6 -6
  30. data/ext/libxml/ruby_xml_node.c +1367 -1324
  31. data/ext/libxml/ruby_xml_node.h +2 -2
  32. data/ext/libxml/ruby_xml_parser.c +26 -78
  33. data/ext/libxml/ruby_xml_parser.h +1 -1
  34. data/ext/libxml/ruby_xml_parser_context.c +284 -13
  35. data/ext/libxml/ruby_xml_parser_context.h +1 -2
  36. data/ext/libxml/ruby_xml_parser_options.c +75 -0
  37. data/ext/libxml/ruby_xml_parser_options.h +14 -0
  38. data/ext/libxml/ruby_xml_reader.c +277 -183
  39. data/ext/libxml/ruby_xml_sax_parser.c +60 -57
  40. data/ext/libxml/ruby_xml_xpath_context.c +43 -8
  41. data/ext/libxml/ruby_xml_xpath_expression.c +6 -0
  42. data/ext/libxml/ruby_xml_xpath_object.c +107 -95
  43. data/ext/libxml/ruby_xml_xpath_object.h +9 -1
  44. data/ext/libxml/ruby_xml_xpointer.c +107 -107
  45. data/ext/libxml/version.h +2 -2
  46. data/ext/mingw/libxml_ruby.dll.a +0 -0
  47. data/ext/mingw/libxml_ruby.so +0 -0
  48. data/ext/vc/libxml_ruby.vcproj +43 -3
  49. data/lib/libxml.rb +2 -3
  50. data/lib/libxml/attr.rb +71 -2
  51. data/lib/libxml/attr_decl.rb +81 -0
  52. data/lib/libxml/document.rb +78 -14
  53. data/lib/libxml/html_parser.rb +75 -42
  54. data/lib/libxml/node.rb +11 -0
  55. data/lib/libxml/parser.rb +106 -62
  56. data/lib/libxml/reader.rb +12 -0
  57. data/lib/libxml/sax_parser.rb +42 -52
  58. data/lib/libxml/xpath_object.rb +15 -0
  59. data/test/model/atom.xml +12 -12
  60. data/test/model/bands.xml +4 -4
  61. data/test/model/books.xml +146 -147
  62. data/test/model/merge_bug_data.xml +1 -1
  63. data/test/model/rubynet.xml +1 -0
  64. data/test/model/shiporder.rng +1 -1
  65. data/test/model/shiporder.xml +22 -22
  66. data/test/model/shiporder.xsd +30 -30
  67. data/test/model/xinclude.xml +1 -1
  68. data/test/{tc_node_attr.rb → tc_attr.rb} +1 -1
  69. data/test/tc_attr_decl.rb +131 -0
  70. data/test/tc_deprecated_require.rb +1 -3
  71. data/test/tc_document.rb +13 -3
  72. data/test/tc_document_write.rb +5 -5
  73. data/test/tc_dtd.rb +13 -5
  74. data/test/tc_html_parser.rb +14 -26
  75. data/test/tc_node_cdata.rb +1 -3
  76. data/test/tc_node_comment.rb +2 -4
  77. data/test/tc_node_edit.rb +2 -3
  78. data/test/tc_node_text.rb +35 -1
  79. data/test/tc_node_write.rb +3 -3
  80. data/test/tc_node_xlink.rb +2 -4
  81. data/test/tc_parser.rb +163 -70
  82. data/test/tc_parser_context.rb +103 -42
  83. data/test/tc_reader.rb +173 -45
  84. data/test/tc_relaxng.rb +2 -2
  85. data/test/tc_sax_parser.rb +48 -52
  86. data/test/tc_schema.rb +2 -2
  87. data/test/tc_xpath.rb +37 -6
  88. data/test/tc_xpath_context.rb +7 -1
  89. data/test/tc_xpath_expression.rb +1 -3
  90. data/test/tc_xpointer.rb +1 -3
  91. data/test/test_suite.rb +2 -3
  92. metadata +20 -13
  93. data/ext/libxml/ruby_xml_input.c +0 -329
  94. data/ext/libxml/ruby_xml_input.h +0 -20
  95. data/lib/libxml/parser_context.rb +0 -17
  96. data/lib/libxml/parser_options.rb +0 -25
  97. data/test/model/simple.xml +0 -7
  98. data/test/tc_input.rb +0 -13
  99. data/test/tc_well_formed.rb +0 -11
@@ -1,4 +1,4 @@
1
- /* $Id: ruby_xml_parser_context.h 666 2008-12-07 00:16:50Z cfis $ */
1
+ /* $Id: ruby_xml_parser_context.h 711 2009-01-20 07:17:15Z cfis $ */
2
2
 
3
3
  /* Please see the LICENSE file for copyright and distribution information */
4
4
 
@@ -8,6 +8,5 @@
8
8
  extern VALUE cXMLParserContext;
9
9
 
10
10
  void ruby_init_xml_parser_context(void);
11
- VALUE rxml_parser_context_wrap(xmlParserCtxtPtr ctxt);
12
11
 
13
12
  #endif
@@ -0,0 +1,75 @@
1
+ /* $Id: ruby_xml_parser.c 710 2009-01-20 05:30:51Z cfis $ */
2
+
3
+ /* Please see the LICENSE file for copyright and distribution information */
4
+
5
+ #include <stdarg.h>
6
+ #include "ruby_libxml.h"
7
+
8
+ /* Document-class: LibXML::XML::ParserOptions
9
+ *
10
+ * Options that control the operation of the HTMLParser. The easiest
11
+ * way to set a parser's options is to use the methods
12
+ * XML::Parser.file, XML::Parser.io or XML::Parser.string.
13
+ * For additional control, see XML::Parser::Context#options=.
14
+ */
15
+
16
+ VALUE mXMLParserOptions;
17
+
18
+
19
+ // Rdoc needs to know
20
+ #ifdef RDOC_NEVER_DEFINED
21
+ mLibXML = rb_define_module("LibXML");
22
+ mXML = rb_define_module_under(mLibXML, "XML");
23
+ #endif
24
+
25
+ void ruby_init_parser_options(void)
26
+ {
27
+ mXMLParserOptions = rb_define_module_under(cXMLParser, "Options");
28
+
29
+ /* recover on errors */
30
+ rb_define_const(mXMLParserOptions, "RECOVER", INT2NUM(XML_PARSE_RECOVER));
31
+ /* substitute entities */
32
+ rb_define_const(mXMLParserOptions, "NOENT", INT2NUM(XML_PARSE_NOENT));
33
+ /* load the external subset */
34
+ rb_define_const(mXMLParserOptions, "DTDLOAD", INT2NUM(XML_PARSE_DTDLOAD));
35
+ /* default DTD attributes */
36
+ rb_define_const(mXMLParserOptions, "DTDATTR", INT2NUM(XML_PARSE_DTDATTR));
37
+ /* validate with the DTD */
38
+ rb_define_const(mXMLParserOptions, "DTDVALID", INT2NUM(XML_PARSE_DTDVALID));
39
+ /* suppress error reports */
40
+ rb_define_const(mXMLParserOptions, "NOERROR", INT2NUM(XML_PARSE_NOERROR));
41
+ /* suppress warning reports */
42
+ rb_define_const(mXMLParserOptions, "NOWARNING", INT2NUM(XML_PARSE_NOWARNING));
43
+ /* pedantic error reporting */
44
+ rb_define_const(mXMLParserOptions, "PEDANTIC", INT2NUM(XML_PARSE_PEDANTIC));
45
+ /* remove blank nodes */
46
+ rb_define_const(mXMLParserOptions, "NOBLANKS", INT2NUM(XML_PARSE_NOBLANKS));
47
+ /* use the SAX1 interface internally */
48
+ rb_define_const(mXMLParserOptions, "SAX1", INT2NUM(XML_PARSE_SAX1));
49
+ /* Implement XInclude substitition */
50
+ rb_define_const(mXMLParserOptions, "XINCLUDE", INT2NUM(XML_PARSE_XINCLUDE));
51
+ /* Forbid network access */
52
+ rb_define_const(mXMLParserOptions, "NONET", INT2NUM(XML_PARSE_NONET));
53
+ /* Do not reuse the context dictionnary */
54
+ rb_define_const(mXMLParserOptions, "NODICT", INT2NUM(XML_PARSE_NODICT));
55
+ /* remove redundant namespaces declarations */
56
+ rb_define_const(mXMLParserOptions, "NSCLEAN", INT2NUM(XML_PARSE_NSCLEAN));
57
+ /* merge CDATA as text nodes */
58
+ rb_define_const(mXMLParserOptions, "NOCDATA", INT2NUM(XML_PARSE_NOCDATA));
59
+ #if LIBXML_VERSION >= 20621
60
+ /* do not generate XINCLUDE START/END nodes */
61
+ rb_define_const(mXMLParserOptions, "NOXINCNODE", INT2NUM(XML_PARSE_NOXINCNODE));
62
+ #endif
63
+ #if LIBXML_VERSION >= 20700
64
+ /* compact small text nodes */
65
+ rb_define_const(mXMLParserOptions, "COMPACT", INT2NUM(XML_PARSE_COMPACT));
66
+ /* parse using XML-1.0 before update 5 */
67
+ rb_define_const(mXMLParserOptions, "PARSE_OLD10", INT2NUM(XML_PARSE_OLD10));
68
+ /* do not fixup XINCLUDE xml:base uris */
69
+ rb_define_const(mXMLParserOptions, "NOBASEFIX", INT2NUM(XML_PARSE_NOBASEFIX));
70
+ #endif
71
+ #if LIBXML_VERSION >= 20703
72
+ /* relax any hardcoded limit from the parser */
73
+ rb_define_const(mXMLParserOptions, "HUGE", INT2NUM(XML_PARSE_HUGE));
74
+ #endif
75
+ }
@@ -0,0 +1,14 @@
1
+ /* $Id: ruby_xml_parser.h 710 2009-01-20 05:30:51Z cfis $ */
2
+
3
+ /* Please see the LICENSE file for copyright and distribution information */
4
+
5
+ #ifndef __RXML_PARSER_OPTIONS__
6
+ #define __RXML_PARSER_OPTIONS__
7
+
8
+ #define MAX_LIBXML_FEATURES_LEN 50
9
+
10
+ extern VALUE mXMLParserOptions;
11
+
12
+ void ruby_init_parser_options();
13
+
14
+ #endif
@@ -4,8 +4,6 @@
4
4
  #include "ruby_libxml.h"
5
5
  #include "ruby_xml_reader.h"
6
6
 
7
- VALUE cXMLReader;
8
-
9
7
  /*
10
8
  * Document-class: LibXML::XML::Reader
11
9
  *
@@ -22,27 +20,37 @@ VALUE cXMLReader;
22
20
  *
23
21
  * Example:
24
22
  *
25
- * parser = XML::Reader.new("<foo><bar>1</bar><bar>2</bar><bar>3</bar></foo>")
26
- * parser.read
27
- * assert_equal('foo', parser.name)
28
- * assert_equal(nil, parser.value)
23
+ * parser = XML::Reader.string("<foo><bar>1</bar><bar>2</bar><bar>3</bar></foo>")
24
+ * reader.read
25
+ * assert_equal('foo', reader.name)
26
+ * assert_equal(nil, reader.value)
29
27
  *
30
28
  * 3.times do |i|
31
- * parser.read
32
- * assert_equal(XML::Reader::TYPE_ELEMENT, parser.node_type)
33
- * assert_equal('bar', parser.name)
34
- * parser.read
35
- * assert_equal(XML::Reader::TYPE_TEXT, parser.node_type)
36
- * assert_equal((i + 1).to_s, parser.value)
37
- * parser.read
38
- * assert_equal(XML::Reader::TYPE_END_ELEMENT, parser.node_type)
29
+ * reader.read
30
+ * assert_equal(XML::Reader::TYPE_ELEMENT, reader.node_type)
31
+ * assert_equal('bar', reader.name)
32
+ * reader.read
33
+ * assert_equal(XML::Reader::TYPE_TEXT, reader.node_type)
34
+ * assert_equal((i + 1).to_s, reader.value)
35
+ * reader.read
36
+ * assert_equal(XML::Reader::TYPE_END_ELEMENT, reader.node_type)
39
37
  * end
40
38
  *
39
+ * You can also parse documents (see XML::Reader.document),
40
+ * strings (see XML::Parser.string) and io objects (see
41
+ * XML::Parser.io).
42
+ *
41
43
  * For a more in depth tutorial, albeit in C, see http://xmlsoft.org/xmlreader.html.*/
42
44
 
43
- static VALUE rxml_reader_new(VALUE class, xmlTextReaderPtr reader)
45
+ VALUE cXMLReader;
46
+
47
+ ID base_uri_SYMBOL;
48
+ ID ENCODING_SYMBOL;
49
+ ID OPTIONS_SYMBOL;
50
+
51
+ static VALUE rxml_reader_wrap(xmlTextReaderPtr reader)
44
52
  {
45
- return Data_Wrap_Struct(class, NULL, xmlFreeTextReader, reader);
53
+ return Data_Wrap_Struct(cXMLReader, NULL, xmlFreeTextReader, reader);
46
54
  }
47
55
 
48
56
  static xmlTextReaderPtr rxml_text_reader_get(VALUE obj)
@@ -54,120 +62,193 @@ static xmlTextReaderPtr rxml_text_reader_get(VALUE obj)
54
62
 
55
63
  /*
56
64
  * call-seq:
57
- * XML::Reader.file(path, encoding=nil, options=0) -> reader
65
+ * XML::Reader.document(doc) -> XML::Reader
58
66
  *
59
- * Parse an XML file from the filesystem or the network. The parsing flags
60
- * options are a combination of xmlParserOption.
67
+ * Create an new reader for the specified document.
61
68
  */
62
- static VALUE rxml_reader_new_file(int argc, VALUE *argv, VALUE self)
69
+ VALUE rxml_reader_document(VALUE klass, VALUE doc)
63
70
  {
71
+ xmlDocPtr xdoc;
64
72
  xmlTextReaderPtr xreader;
65
- VALUE rpath, rencoding, roptions;
66
- char *xpath;
67
- char *xencoding;
68
- int options;
69
-
70
- rb_scan_args(argc, argv, "12", &rpath, &rencoding, &roptions);
71
73
 
72
- xpath = NIL_P(rpath) ? NULL : StringValueCStr(rpath);
73
- xencoding = NIL_P(rencoding) ? NULL : StringValueCStr(rencoding);
74
- options = NIL_P(roptions) ? 0 : FIX2INT(roptions);
74
+ Data_Get_Struct(doc, xmlDoc, xdoc);
75
75
 
76
- xreader = xmlReaderForFile(xpath, xencoding, options);
76
+ xreader = xmlReaderWalker(xdoc);
77
77
 
78
78
  if (xreader == NULL)
79
79
  rxml_raise(&xmlLastError);
80
80
 
81
- return rxml_reader_new(self, xreader);
81
+ return rxml_reader_wrap(xreader);
82
82
  }
83
83
 
84
- /*
85
- * call-seq:
86
- * XML::Reader.io(io, url=nil, encoding=nil, options=0) -> reader
84
+ /* call-seq:
85
+ * XML::Reader.file(path) -> XML::Reader
86
+ * XML::Reader.file(path, :encoding => XML::Encoding::UTF_8,
87
+ * :options => XML::Parser::Options::NOENT) -> XML::Parser
87
88
  *
88
- * Parse an XML file from a file handle. The parsing flags options are
89
- * a combination of xmlParserOption.
89
+ * Creates a new reader by parsing the specified file or uri.
90
+ *
91
+ * You may provide an optional hash table to control how the
92
+ * parsing is performed. Valid options are:
93
+ *
94
+ * encoding - The document encoding, defaults to nil. Valid values
95
+ * are the encoding constants defined on XML::Encoding.
96
+ * options - Controls the execution of the parser, defaults to 0.
97
+ * Valid values are the constants defined on
98
+ * XML::Parser::Options. Mutliple options can be combined
99
+ * by using Bitwise OR (|).
90
100
  */
91
- static VALUE rxml_reader_new_io(int argc, VALUE *argv, VALUE self)
101
+ static VALUE rxml_reader_file(int argc, VALUE *argv, VALUE klass)
92
102
  {
93
103
  xmlTextReaderPtr xreader;
94
- VALUE rio, rurl, rencoding, roptions;
95
- char *xurl;
96
- char *xencoding;
97
- int options;
104
+ VALUE path;
105
+ VALUE options;
98
106
 
99
- rb_scan_args(argc, argv, "13", &rio, &rurl, &rencoding, &roptions);
107
+ const char *xencoding = NULL;
108
+ int xoptions = 0;
100
109
 
101
- xurl = NIL_P(rurl) ? NULL : StringValueCStr(rurl);
102
- xencoding = NIL_P(rencoding) ? NULL : StringValueCStr(rencoding);
103
- options = NIL_P(roptions) ? 0 : FIX2INT(roptions);
110
+ rb_scan_args(argc, argv, "11", &path, &options);
111
+ Check_Type(path, T_STRING);
104
112
 
105
- xreader = xmlReaderForIO((xmlInputReadCallback) rxml_read_callback, NULL,
106
- (void *) rio, xurl, xencoding, options);
113
+ if (!NIL_P(options))
114
+ {
115
+ VALUE encoding = Qnil;
116
+ VALUE parserOptions = Qnil;
117
+
118
+ Check_Type(options, T_HASH);
119
+
120
+ encoding = rb_hash_aref(options, base_uri_SYMBOL);
121
+ xencoding = NIL_P(encoding) ? NULL : xmlGetCharEncodingName(NUM2INT(encoding));
122
+
123
+ parserOptions = rb_hash_aref(options, OPTIONS_SYMBOL);
124
+ xoptions = NIL_P(parserOptions) ? 0 : NUM2INT(parserOptions);
125
+ }
126
+
127
+ xreader = xmlReaderForFile(StringValueCStr(path), xencoding, xoptions);
107
128
 
108
129
  if (xreader == NULL)
109
130
  rxml_raise(&xmlLastError);
110
131
 
111
- return rxml_reader_new(self, xreader);
132
+ return rxml_reader_wrap(xreader);
112
133
  }
113
134
 
114
- /*
115
- * call-seq:
116
- * XML::Reader.walker(doc) -> reader
117
- * XML::Reader.document(doc) -> reader
135
+ /* call-seq:
136
+ * XML::Reader.io(io) -> XML::Reader
137
+ * XML::Reader.io(io, :encoding => XML::Encoding::UTF_8,
138
+ * :options => XML::Parser::Options::NOENT) -> XML::Parser
139
+ *
140
+ * Creates a new reader by parsing the specified io object.
118
141
  *
119
- * Create an XML text reader for a preparsed document.
142
+ * You may provide an optional hash table to control how the
143
+ * parsing is performed. Valid options are:
144
+ *
145
+ * base_uri - The base url for the parsed document.
146
+ * encoding - The document encoding, defaults to nil. Valid values
147
+ * are the encoding constants defined on XML::Encoding.
148
+ * options - Controls the execution of the parser, defaults to 0.
149
+ * Valid values are the constants defined on
150
+ * XML::Parser::Options. Mutliple options can be combined
151
+ * by using Bitwise OR (|).
120
152
  */
121
- VALUE rxml_reader_new_walker(VALUE self, VALUE doc)
153
+ static VALUE rxml_reader_io(int argc, VALUE *argv, VALUE klass)
122
154
  {
123
- xmlDocPtr xdoc;
124
155
  xmlTextReaderPtr xreader;
156
+ VALUE io;
157
+ VALUE options;
158
+ char *xbaseurl = NULL;
159
+ const char *xencoding = NULL;
160
+ int xoptions = 0;
125
161
 
126
- Data_Get_Struct(doc, xmlDoc, xdoc);
162
+ rb_scan_args(argc, argv, "11", &io, &options);
127
163
 
128
- xreader = xmlReaderWalker(xdoc);
164
+ if (!NIL_P(options))
165
+ {
166
+ VALUE baseurl = Qnil;
167
+ VALUE encoding = Qnil;
168
+ VALUE parserOptions = Qnil;
169
+
170
+ Check_Type(options, T_HASH);
171
+
172
+ baseurl = rb_hash_aref(options, base_uri_SYMBOL);
173
+ xbaseurl = NIL_P(baseurl) ? NULL : StringValueCStr(baseurl);
174
+
175
+ encoding = rb_hash_aref(options, ENCODING_SYMBOL);
176
+ xencoding = NIL_P(encoding) ? NULL : xmlGetCharEncodingName(NUM2INT(encoding));
177
+
178
+ parserOptions = rb_hash_aref(options, OPTIONS_SYMBOL);
179
+ xoptions = NIL_P(parserOptions) ? 0 : NUM2INT(parserOptions);
180
+ }
181
+
182
+ xreader = xmlReaderForIO((xmlInputReadCallback) rxml_read_callback, NULL,
183
+ (void *) io,
184
+ xbaseurl, xencoding, xoptions);
129
185
 
130
186
  if (xreader == NULL)
131
187
  rxml_raise(&xmlLastError);
132
188
 
133
- return rxml_reader_new(self, xreader);
189
+ return rxml_reader_wrap(xreader);
134
190
  }
135
191
 
136
- /*
137
- * call-seq:
138
- * XML::Reader.new(data, url=nil, encoding=nil, options=0) -> reader
139
- * XML::Reader.string(data, url=nil, encoding=nil, options=0) -> reader
192
+ /* call-seq:
193
+ * XML::Reader.string(io) -> XML::Reader
194
+ * XML::Reader.string(io, :encoding => XML::Encoding::UTF_8,
195
+ * :options => XML::Parser::Options::NOENT) -> XML::Parser
140
196
  *
141
- * Create an XML text reader for an XML in-memory document. The parsing flags
142
- * options are a combination of xmlParserOption.
197
+ * Creates a new reader by parsing the specified string.
198
+ *
199
+ * You may provide an optional hash table to control how the
200
+ * parsing is performed. Valid options are:
201
+ *
202
+ * base_uri - The base url for the parsed document.
203
+ * encoding - The document encoding, defaults to nil. Valid values
204
+ * are the encoding constants defined on XML::Encoding.
205
+ * options - Controls the execution of the parser, defaults to 0.
206
+ * Valid values are the constants defined on
207
+ * XML::Parser::Options. Mutliple options can be combined
208
+ * by using Bitwise OR (|).
143
209
  */
144
- static VALUE rxml_reader_new_data(int argc, VALUE *argv, VALUE self)
210
+ static VALUE rxml_reader_string(int argc, VALUE *argv, VALUE klass)
145
211
  {
146
212
  xmlTextReaderPtr xreader;
147
- VALUE rdata, rurl, rencoding, roptions;
148
- char *xdata;
149
- char *xurl;
150
- char *xencoding;
151
- int options;
213
+ VALUE string;
214
+ VALUE options;
215
+ char *xbaseurl = NULL;
216
+ const char *xencoding = NULL;
217
+ int xoptions = 0;
152
218
 
153
- rb_scan_args(argc, argv, "13", &rdata, &rurl, &rencoding, &roptions);
219
+ rb_scan_args(argc, argv, "11", &string, &options);
220
+ Check_Type(string, T_STRING);
221
+
222
+ if (!NIL_P(options))
223
+ {
224
+ VALUE baseurl = Qnil;
225
+ VALUE encoding = Qnil;
226
+ VALUE parserOptions = Qnil;
154
227
 
155
- xdata = NIL_P(rdata) ? NULL : StringValueCStr(rdata);
156
- xurl = NIL_P(rurl) ? NULL : StringValueCStr(rurl);
157
- xencoding = NIL_P(rencoding) ? NULL : StringValueCStr(rencoding);
158
- options = NIL_P(roptions) ? 0 : FIX2INT(roptions);
228
+ Check_Type(options, T_HASH);
159
229
 
160
- xreader = xmlReaderForMemory(xdata, strlen(xdata), xurl, xencoding, options);
230
+ baseurl = rb_hash_aref(options, base_uri_SYMBOL);
231
+ xbaseurl = NIL_P(baseurl) ? NULL : StringValueCStr(baseurl);
232
+
233
+ encoding = rb_hash_aref(options, ENCODING_SYMBOL);
234
+ xencoding = NIL_P(encoding) ? NULL : xmlGetCharEncodingName(NUM2INT(encoding));
235
+
236
+ parserOptions = rb_hash_aref(options, OPTIONS_SYMBOL);
237
+ xoptions = NIL_P(parserOptions) ? 0 : NUM2INT(parserOptions);
238
+ }
239
+
240
+ xreader = xmlReaderForMemory(StringValueCStr(string), RSTRING_LEN(string),
241
+ xbaseurl, xencoding, xoptions);
161
242
 
162
243
  if (xreader == NULL)
163
244
  rxml_raise(&xmlLastError);
164
245
 
165
- return rxml_reader_new(self, xreader);
246
+ return rxml_reader_wrap(xreader);
166
247
  }
167
248
 
168
249
  /*
169
250
  * call-seq:
170
- * parser.close -> code
251
+ * reader.close -> code
171
252
  *
172
253
  * This method releases any resources allocated by the current instance
173
254
  * changes the state to Closed and close any underlying input.
@@ -179,7 +260,7 @@ static VALUE rxml_reader_close(VALUE self)
179
260
 
180
261
  /*
181
262
  * call-seq:
182
- * parser.move_to_attribute(val) -> code
263
+ * reader.move_to_attribute(val) -> code
183
264
  *
184
265
  * Move the position of the current instance to the attribute with the
185
266
  * specified index (if +val+ is an integer) or name (if +val+ is a string)
@@ -266,6 +347,21 @@ static VALUE rxml_reader_next_sibling(VALUE self)
266
347
  return INT2FIX(xmlTextReaderNextSibling(rxml_text_reader_get(self)));
267
348
  }
268
349
 
350
+ /*
351
+ * call-seq:
352
+ * reader.node -> XML::Node
353
+ *
354
+ * Returns the reader's current node.
355
+ * WARNING - Using this method is dangerous because the
356
+ * the node may be destroyed on the next #read.
357
+ */
358
+ static VALUE rxml_reader_node(VALUE self)
359
+ {
360
+ xmlTextReaderPtr xreader = rxml_text_reader_get(self);
361
+ xmlNodePtr xnode = xmlTextReaderCurrentNode(xreader);
362
+ return rxml_node_wrap(xnode);
363
+ }
364
+
269
365
  /*
270
366
  * call-seq:
271
367
  * reader.node_type -> type
@@ -299,15 +395,27 @@ static VALUE rxml_reader_normalization(VALUE self)
299
395
  * call-seq:
300
396
  * reader.read -> code
301
397
  *
302
- * Move the position of the current instance to the next node in the stream,
303
- * exposing its properties.
398
+ * Causes the reader to move to the next node in the stream, exposing its properties.
304
399
  *
305
- * Return 1 if the node was read successfully, 0 if there is no more nodes to
306
- * read, or -1 in case of error.
307
- */
400
+ * Returns true if a node was successfully read or false if there are no more
401
+ * nodes to read. On errors, an exception is raised.*/
308
402
  static VALUE rxml_reader_read(VALUE self)
309
403
  {
310
- return INT2FIX(xmlTextReaderRead(rxml_text_reader_get(self)));
404
+ int result = xmlTextReaderRead(rxml_text_reader_get(self));
405
+ switch(result)
406
+ {
407
+ case -1:
408
+ rxml_raise(&xmlLastError);
409
+ return Qnil;
410
+ break;
411
+ case 0:
412
+ return Qfalse;
413
+ case 1:
414
+ return Qtrue;
415
+ default:
416
+ rb_raise(rb_eRuntimeError,
417
+ "xmlTextReaderRead did not return -1, 0 or 1. Return value was: %d", result);
418
+ }
311
419
  }
312
420
 
313
421
  /*
@@ -456,14 +564,28 @@ static VALUE rxml_reader_attr_count(VALUE self)
456
564
 
457
565
  /*
458
566
  * call-seq:
459
- * reader.encoding -> encoding
567
+ * reader.encoding -> XML::Encoding::UTF_8
568
+ *
569
+ * Returns the encoding of the document being read. Note you
570
+ * first have to read data from the reader for encoding
571
+ * to return a value
572
+ *
573
+ * reader = XML::Reader.file(XML_FILE)
574
+ * assert_nil(reader.encoding)
575
+ * reader.read
576
+ * assert_equal(XML::Encoding::UTF_8, reader.encoding)
460
577
  *
461
- * Determine the encoding of the document being read.
578
+ * In addition, libxml always appears to return nil for the encoding
579
+ * when parsing strings.
462
580
  */
463
581
  static VALUE rxml_reader_encoding(VALUE self)
464
582
  {
465
- const xmlChar *result = xmlTextReaderConstEncoding(rxml_text_reader_get(self));
466
- return (result == NULL ? Qnil : rb_str_new2((const char*)result));
583
+ xmlTextReaderPtr xreader = rxml_text_reader_get(self);
584
+ const xmlChar *xencoding = xmlTextReaderConstEncoding(xreader);
585
+ if (xencoding)
586
+ return INT2NUM(xmlParseCharEncoding(xencoding));
587
+ else
588
+ return INT2NUM(XML_CHAR_ENCODING_NONE);
467
589
  }
468
590
 
469
591
  /*
@@ -668,7 +790,7 @@ static VALUE rxml_reader_expand(VALUE self)
668
790
  doc = xmlTextReaderCurrentDoc(reader);
669
791
  rxml_document_wrap(doc);
670
792
 
671
- return rxml_node_wrap(cXMLNode, node);
793
+ return rxml_node_wrap(node);
672
794
  }
673
795
 
674
796
  #if LIBXML_VERSION >= 20618
@@ -768,132 +890,104 @@ mXML = rb_define_module_under(mLibXML, "XML");
768
890
 
769
891
  void ruby_init_xml_reader(void)
770
892
  {
893
+ base_uri_SYMBOL = ID2SYM(rb_intern("base_uri"));
894
+ ENCODING_SYMBOL = ID2SYM(rb_intern("encoding"));
895
+ OPTIONS_SYMBOL = ID2SYM(rb_intern("options"));
896
+
771
897
  cXMLReader = rb_define_class_under(mXML, "Reader", rb_cObject);
772
898
 
773
- rb_define_singleton_method(cXMLReader, "file", rxml_reader_new_file, -1);
774
- rb_define_singleton_method(cXMLReader, "io", rxml_reader_new_io, -1);
775
- rb_define_singleton_method(cXMLReader, "walker", rxml_reader_new_walker, 1);
776
- rb_define_alias(CLASS_OF(cXMLReader), "document", "walker");
777
- rb_define_singleton_method(cXMLReader, "new", rxml_reader_new_data, -1);
778
- rb_define_alias(CLASS_OF(cXMLReader), "string", "new");
899
+ rb_define_singleton_method(cXMLReader, "document", rxml_reader_document, 1);
900
+ rb_define_singleton_method(cXMLReader, "file", rxml_reader_file, -1);
901
+ rb_define_singleton_method(cXMLReader, "io", rxml_reader_io, -1);
902
+ rb_define_singleton_method(cXMLReader, "string", rxml_reader_string, -1);
779
903
 
904
+ rb_define_method(cXMLReader, "[]", rxml_reader_attribute, 1);
905
+ rb_define_method(cXMLReader, "attribute_count", rxml_reader_attr_count, 0);
906
+ rb_define_method(cXMLReader, "base_uri", rxml_reader_base_uri, 0);
907
+ #if LIBXML_VERSION >= 20618
908
+ rb_define_method(cXMLReader, "byte_consumed", rxml_reader_byte_consumed, 0);
909
+ #endif
780
910
  rb_define_method(cXMLReader, "close", rxml_reader_close, 0);
781
-
911
+ #if LIBXML_VERSION >= 20617
912
+ rb_define_method(cXMLReader, "column_number", rxml_reader_column_number, 0);
913
+ #endif
914
+ rb_define_method(cXMLReader, "depth", rxml_reader_depth, 0);
915
+ rb_define_method(cXMLReader, "encoding", rxml_reader_encoding, 0);
916
+ rb_define_method(cXMLReader, "expand", rxml_reader_expand, 0);
917
+ rb_define_method(cXMLReader, "has_attributes?", rxml_reader_has_attributes, 0);
918
+ rb_define_method(cXMLReader, "has_value?", rxml_reader_has_value, 0);
919
+ #if LIBXML_VERSION >= 20617
920
+ rb_define_method(cXMLReader, "line_number", rxml_reader_line_number, 0);
921
+ #endif
922
+ rb_define_method(cXMLReader, "local_name", rxml_reader_local_name, 0);
923
+ rb_define_method(cXMLReader, "lookup_namespace", rxml_reader_lookup_namespace, 1);
782
924
  rb_define_method(cXMLReader, "move_to_attribute", rxml_reader_move_to_attr, 1);
783
- rb_define_method(cXMLReader, "move_to_first_attribute",
784
- rxml_reader_move_to_first_attr, 0);
785
- rb_define_method(cXMLReader, "move_to_next_attribute",
786
- rxml_reader_move_to_next_attr, 0);
787
- rb_define_method(cXMLReader, "move_to_element", rxml_reader_move_to_element,
788
- 0);
925
+ rb_define_method(cXMLReader, "move_to_first_attribute", rxml_reader_move_to_first_attr, 0);
926
+ rb_define_method(cXMLReader, "move_to_next_attribute", rxml_reader_move_to_next_attr, 0);
927
+ rb_define_method(cXMLReader, "move_to_element", rxml_reader_move_to_element, 0);
928
+ rb_define_method(cXMLReader, "name", rxml_reader_name, 0);
929
+ rb_define_method(cXMLReader, "namespace_uri", rxml_reader_namespace_uri, 0);
789
930
  rb_define_method(cXMLReader, "next", rxml_reader_next, 0);
790
931
  rb_define_method(cXMLReader, "next_sibling", rxml_reader_next_sibling, 0);
932
+ rb_define_method(cXMLReader, "node", rxml_reader_node, 0);
933
+ rb_define_method(cXMLReader, "node_type", rxml_reader_node_type, 0);
934
+ rb_define_method(cXMLReader, "normalization", rxml_reader_normalization, 0);
935
+ rb_define_method(cXMLReader, "prefix", rxml_reader_prefix, 0);
936
+ rb_define_method(cXMLReader, "quote_char", rxml_reader_quote_char, 0);
791
937
  rb_define_method(cXMLReader, "read", rxml_reader_read, 0);
792
- rb_define_method(cXMLReader, "read_attribute_value",
793
- rxml_reader_read_attr_value, 0);
938
+ rb_define_method(cXMLReader, "read_attribute_value", rxml_reader_read_attr_value, 0);
794
939
  rb_define_method(cXMLReader, "read_inner_xml", rxml_reader_read_inner_xml, 0);
795
940
  rb_define_method(cXMLReader, "read_outer_xml", rxml_reader_read_outer_xml, 0);
796
941
  rb_define_method(cXMLReader, "read_state", rxml_reader_read_state, 0);
797
942
  rb_define_method(cXMLReader, "read_string", rxml_reader_read_string, 0);
798
-
799
- rb_define_method(cXMLReader, "relax_ng_validate",
800
- rxml_reader_relax_ng_validate, 1);
943
+ rb_define_method(cXMLReader, "relax_ng_validate", rxml_reader_relax_ng_validate, 1);
944
+ rb_define_method(cXMLReader, "standalone", rxml_reader_standalone, 0);
801
945
  #if LIBXML_VERSION >= 20620
802
946
  rb_define_method(cXMLReader, "schema_validate", rxml_reader_schema_validate, 1);
803
947
  #endif
804
-
805
- rb_define_method(cXMLReader, "node_type", rxml_reader_node_type, 0);
806
- rb_define_method(cXMLReader, "normalization", rxml_reader_normalization, 0);
807
- rb_define_method(cXMLReader, "attribute_count", rxml_reader_attr_count, 0);
808
- rb_define_method(cXMLReader, "name", rxml_reader_name, 0);
809
- rb_define_method(cXMLReader, "local_name", rxml_reader_local_name, 0);
810
- rb_define_method(cXMLReader, "encoding", rxml_reader_encoding, 0);
811
- rb_define_method(cXMLReader, "base_uri", rxml_reader_base_uri, 0);
812
- rb_define_method(cXMLReader, "namespace_uri", rxml_reader_namespace_uri, 0);
948
+ rb_define_method(cXMLReader, "value", rxml_reader_value, 0);
813
949
  rb_define_method(cXMLReader, "xml_lang", rxml_reader_xml_lang, 0);
814
950
  rb_define_method(cXMLReader, "xml_version", rxml_reader_xml_version, 0);
815
- rb_define_method(cXMLReader, "prefix", rxml_reader_prefix, 0);
816
- rb_define_method(cXMLReader, "depth", rxml_reader_depth, 0);
817
- rb_define_method(cXMLReader, "quote_char", rxml_reader_quote_char, 0);
818
- rb_define_method(cXMLReader, "standalone", rxml_reader_standalone, 0);
819
-
820
- rb_define_method(cXMLReader, "has_attributes?", rxml_reader_has_attributes, 0);
821
- rb_define_method(cXMLReader, "[]", rxml_reader_attribute, 1);
822
- rb_define_method(cXMLReader, "has_value?", rxml_reader_has_value, 0);
823
- rb_define_method(cXMLReader, "value", rxml_reader_value, 0);
824
-
825
- rb_define_method(cXMLReader, "lookup_namespace",
826
- rxml_reader_lookup_namespace, 1);
827
- rb_define_method(cXMLReader, "expand", rxml_reader_expand, 0);
828
-
829
- #if LIBXML_VERSION >= 20618
830
- rb_define_method(cXMLReader, "byte_consumed", rxml_reader_byte_consumed, 0);
831
- #endif
832
- #if LIBXML_VERSION >= 20617
833
- rb_define_method(cXMLReader, "column_number", rxml_reader_column_number, 0);
834
- rb_define_method(cXMLReader, "line_number", rxml_reader_line_number, 0);
835
- #endif
836
951
  rb_define_method(cXMLReader, "default?", rxml_reader_default, 0);
837
952
  rb_define_method(cXMLReader, "empty_element?", rxml_reader_empty_element, 0);
838
- rb_define_method(cXMLReader, "namespace_declaration?",
839
- rxml_reader_namespace_declaration, 0);
953
+ rb_define_method(cXMLReader, "namespace_declaration?", rxml_reader_namespace_declaration, 0);
840
954
  rb_define_method(cXMLReader, "valid?", rxml_reader_valid, 0);
841
955
 
956
+ /* Constants */
842
957
  rb_define_const(cXMLReader, "LOADDTD", INT2FIX(XML_PARSER_LOADDTD));
843
958
  rb_define_const(cXMLReader, "DEFAULTATTRS", INT2FIX(XML_PARSER_DEFAULTATTRS));
844
959
  rb_define_const(cXMLReader, "VALIDATE", INT2FIX(XML_PARSER_VALIDATE));
845
- rb_define_const(cXMLReader, "SUBST_ENTITIES", INT2FIX(
846
- XML_PARSER_SUBST_ENTITIES));
847
-
848
- rb_define_const(cXMLReader, "SEVERITY_VALIDITY_WARNING", INT2FIX(
849
- XML_PARSER_SEVERITY_VALIDITY_WARNING));
850
- rb_define_const(cXMLReader, "SEVERITY_VALIDITY_ERROR", INT2FIX(
851
- XML_PARSER_SEVERITY_VALIDITY_ERROR));
852
- rb_define_const(cXMLReader, "SEVERITY_WARNING", INT2FIX(
853
- XML_PARSER_SEVERITY_WARNING));
854
- rb_define_const(cXMLReader, "SEVERITY_ERROR", INT2FIX(
855
- XML_PARSER_SEVERITY_ERROR));
960
+ rb_define_const(cXMLReader, "SUBST_ENTITIES", INT2FIX(XML_PARSER_SUBST_ENTITIES));
961
+
962
+ rb_define_const(cXMLReader, "SEVERITY_VALIDITY_WARNING", INT2FIX(XML_PARSER_SEVERITY_VALIDITY_WARNING));
963
+ rb_define_const(cXMLReader, "SEVERITY_VALIDITY_ERROR", INT2FIX(XML_PARSER_SEVERITY_VALIDITY_ERROR));
964
+ rb_define_const(cXMLReader, "SEVERITY_WARNING", INT2FIX(XML_PARSER_SEVERITY_WARNING));
965
+ rb_define_const(cXMLReader, "SEVERITY_ERROR", INT2FIX(XML_PARSER_SEVERITY_ERROR));
856
966
 
857
967
  rb_define_const(cXMLReader, "TYPE_NONE", INT2FIX(XML_READER_TYPE_NONE));
858
968
  rb_define_const(cXMLReader, "TYPE_ELEMENT", INT2FIX(XML_READER_TYPE_ELEMENT));
859
- rb_define_const(cXMLReader, "TYPE_ATTRIBUTE", INT2FIX(
860
- XML_READER_TYPE_ATTRIBUTE));
969
+ rb_define_const(cXMLReader, "TYPE_ATTRIBUTE", INT2FIX(XML_READER_TYPE_ATTRIBUTE));
861
970
  rb_define_const(cXMLReader, "TYPE_TEXT", INT2FIX(XML_READER_TYPE_TEXT));
862
971
  rb_define_const(cXMLReader, "TYPE_CDATA", INT2FIX(XML_READER_TYPE_CDATA));
863
- rb_define_const(cXMLReader, "TYPE_ENTITY_REFERENCE", INT2FIX(
864
- XML_READER_TYPE_ENTITY_REFERENCE));
972
+ rb_define_const(cXMLReader, "TYPE_ENTITY_REFERENCE", INT2FIX(XML_READER_TYPE_ENTITY_REFERENCE));
865
973
  rb_define_const(cXMLReader, "TYPE_ENTITY", INT2FIX(XML_READER_TYPE_ENTITY));
866
- rb_define_const(cXMLReader, "TYPE_PROCESSING_INSTRUCTION", INT2FIX(
867
- XML_READER_TYPE_PROCESSING_INSTRUCTION));
974
+ rb_define_const(cXMLReader, "TYPE_PROCESSING_INSTRUCTION", INT2FIX(XML_READER_TYPE_PROCESSING_INSTRUCTION));
868
975
  rb_define_const(cXMLReader, "TYPE_COMMENT", INT2FIX(XML_READER_TYPE_COMMENT));
869
- rb_define_const(cXMLReader, "TYPE_DOCUMENT",
870
- INT2FIX(XML_READER_TYPE_DOCUMENT));
871
- rb_define_const(cXMLReader, "TYPE_DOCUMENT_TYPE", INT2FIX(
872
- XML_READER_TYPE_DOCUMENT_TYPE));
873
- rb_define_const(cXMLReader, "TYPE_DOCUMENT_FRAGMENT", INT2FIX(
874
- XML_READER_TYPE_DOCUMENT_FRAGMENT));
875
- rb_define_const(cXMLReader, "TYPE_NOTATION",
876
- INT2FIX(XML_READER_TYPE_NOTATION));
877
- rb_define_const(cXMLReader, "TYPE_WHITESPACE", INT2FIX(
878
- XML_READER_TYPE_WHITESPACE));
879
- rb_define_const(cXMLReader, "TYPE_SIGNIFICANT_WHITESPACE", INT2FIX(
880
- XML_READER_TYPE_SIGNIFICANT_WHITESPACE));
881
- rb_define_const(cXMLReader, "TYPE_END_ELEMENT", INT2FIX(
882
- XML_READER_TYPE_END_ELEMENT));
883
- rb_define_const(cXMLReader, "TYPE_END_ENTITY", INT2FIX(
884
- XML_READER_TYPE_END_ENTITY));
885
- rb_define_const(cXMLReader, "TYPE_XML_DECLARATION", INT2FIX(
886
- XML_READER_TYPE_XML_DECLARATION));
976
+ rb_define_const(cXMLReader, "TYPE_DOCUMENT", INT2FIX(XML_READER_TYPE_DOCUMENT));
977
+ rb_define_const(cXMLReader, "TYPE_DOCUMENT_TYPE", INT2FIX(XML_READER_TYPE_DOCUMENT_TYPE));
978
+ rb_define_const(cXMLReader, "TYPE_DOCUMENT_FRAGMENT", INT2FIX(XML_READER_TYPE_DOCUMENT_FRAGMENT));
979
+ rb_define_const(cXMLReader, "TYPE_NOTATION", INT2FIX(XML_READER_TYPE_NOTATION));
980
+ rb_define_const(cXMLReader, "TYPE_WHITESPACE", INT2FIX(XML_READER_TYPE_WHITESPACE));
981
+ rb_define_const(cXMLReader, "TYPE_SIGNIFICANT_WHITESPACE", INT2FIX(XML_READER_TYPE_SIGNIFICANT_WHITESPACE));
982
+ rb_define_const(cXMLReader, "TYPE_END_ELEMENT", INT2FIX(XML_READER_TYPE_END_ELEMENT));
983
+ rb_define_const(cXMLReader, "TYPE_END_ENTITY", INT2FIX(XML_READER_TYPE_END_ENTITY));
984
+ rb_define_const(cXMLReader, "TYPE_XML_DECLARATION", INT2FIX(XML_READER_TYPE_XML_DECLARATION));
887
985
 
888
986
  /* Read states */
889
- rb_define_const(cXMLReader, "MODE_INITIAL", INT2FIX(
890
- XML_TEXTREADER_MODE_INITIAL));
891
- rb_define_const(cXMLReader, "MODE_INTERACTIVE", INT2FIX(
892
- XML_TEXTREADER_MODE_INTERACTIVE));
987
+ rb_define_const(cXMLReader, "MODE_INITIAL", INT2FIX(XML_TEXTREADER_MODE_INITIAL));
988
+ rb_define_const(cXMLReader, "MODE_INTERACTIVE", INT2FIX(XML_TEXTREADER_MODE_INTERACTIVE));
893
989
  rb_define_const(cXMLReader, "MODE_ERROR", INT2FIX(XML_TEXTREADER_MODE_ERROR));
894
990
  rb_define_const(cXMLReader, "MODE_EOF", INT2FIX(XML_TEXTREADER_MODE_EOF));
895
- rb_define_const(cXMLReader, "MODE_CLOSED",
896
- INT2FIX(XML_TEXTREADER_MODE_CLOSED));
897
- rb_define_const(cXMLReader, "MODE_READING", INT2FIX(
898
- XML_TEXTREADER_MODE_READING));
991
+ rb_define_const(cXMLReader, "MODE_CLOSED", INT2FIX(XML_TEXTREADER_MODE_CLOSED));
992
+ rb_define_const(cXMLReader, "MODE_READING", INT2FIX(XML_TEXTREADER_MODE_READING));
899
993
  }