libxml-ruby 0.9.8 → 0.9.9

Sign up to get free protection for your applications and to get access to all the features.
Files changed (96) hide show
  1. data/CHANGES +41 -1
  2. data/LICENSE +3 -4
  3. data/README +37 -24
  4. data/Rakefile +2 -2
  5. data/ext/libxml/extconf.rb +31 -12
  6. data/ext/libxml/libxml.c +56 -858
  7. data/ext/libxml/ruby_libxml.h +93 -96
  8. data/ext/libxml/ruby_xml.c +855 -0
  9. data/ext/libxml/ruby_xml.h +9 -0
  10. data/ext/libxml/ruby_xml_attr.c +3 -9
  11. data/ext/libxml/ruby_xml_attr.h +2 -2
  12. data/ext/libxml/ruby_xml_attr_decl.c +2 -8
  13. data/ext/libxml/ruby_xml_attr_decl.h +1 -1
  14. data/ext/libxml/ruby_xml_attributes.c +6 -8
  15. data/ext/libxml/ruby_xml_attributes.h +1 -1
  16. data/ext/libxml/ruby_xml_document.c +915 -895
  17. data/ext/libxml/ruby_xml_document.h +2 -2
  18. data/ext/libxml/ruby_xml_dtd.c +257 -136
  19. data/ext/libxml/ruby_xml_dtd.h +1 -1
  20. data/ext/libxml/ruby_xml_encoding.c +55 -37
  21. data/ext/libxml/ruby_xml_encoding.h +1 -1
  22. data/ext/libxml/ruby_xml_error.c +526 -1058
  23. data/ext/libxml/ruby_xml_error.h +1 -1
  24. data/ext/libxml/ruby_xml_html_parser.c +2 -8
  25. data/ext/libxml/ruby_xml_html_parser.h +2 -2
  26. data/ext/libxml/ruby_xml_html_parser_context.c +175 -145
  27. data/ext/libxml/ruby_xml_html_parser_context.h +1 -1
  28. data/ext/libxml/ruby_xml_html_parser_options.c +12 -20
  29. data/ext/libxml/ruby_xml_html_parser_options.h +1 -1
  30. data/ext/libxml/ruby_xml_input_cbg.c +2 -8
  31. data/ext/libxml/ruby_xml_input_cbg.h +1 -1
  32. data/ext/libxml/ruby_xml_namespace.c +2 -8
  33. data/ext/libxml/ruby_xml_namespace.h +2 -2
  34. data/ext/libxml/ruby_xml_namespaces.c +1 -9
  35. data/ext/libxml/ruby_xml_namespaces.h +1 -1
  36. data/ext/libxml/ruby_xml_node.c +182 -121
  37. data/ext/libxml/ruby_xml_node.h +2 -2
  38. data/ext/libxml/ruby_xml_parser.c +2 -8
  39. data/ext/libxml/ruby_xml_parser.h +2 -2
  40. data/ext/libxml/ruby_xml_parser_context.c +952 -901
  41. data/ext/libxml/ruby_xml_parser_context.h +2 -2
  42. data/ext/libxml/ruby_xml_parser_options.c +2 -9
  43. data/ext/libxml/ruby_xml_parser_options.h +1 -1
  44. data/ext/libxml/ruby_xml_reader.c +1002 -993
  45. data/ext/libxml/ruby_xml_reader.h +1 -1
  46. data/ext/libxml/ruby_xml_relaxng.c +1 -7
  47. data/ext/libxml/ruby_xml_relaxng.h +1 -1
  48. data/ext/libxml/ruby_xml_sax2_handler.c +2 -2
  49. data/ext/libxml/ruby_xml_sax2_handler.h +1 -1
  50. data/ext/libxml/ruby_xml_sax_parser.c +2 -8
  51. data/ext/libxml/ruby_xml_sax_parser.h +2 -2
  52. data/ext/libxml/ruby_xml_schema.c +1 -7
  53. data/ext/libxml/ruby_xml_schema.h +1 -1
  54. data/ext/libxml/{version.h → ruby_xml_version.h} +2 -2
  55. data/ext/libxml/ruby_xml_xinclude.c +2 -8
  56. data/ext/libxml/ruby_xml_xinclude.h +2 -2
  57. data/ext/libxml/ruby_xml_xpath.c +17 -18
  58. data/ext/libxml/ruby_xml_xpath.h +2 -2
  59. data/ext/libxml/ruby_xml_xpath_context.c +387 -389
  60. data/ext/libxml/ruby_xml_xpath_context.h +2 -2
  61. data/ext/libxml/ruby_xml_xpath_expression.c +18 -8
  62. data/ext/libxml/ruby_xml_xpath_expression.h +1 -1
  63. data/ext/libxml/ruby_xml_xpath_object.c +19 -8
  64. data/ext/libxml/ruby_xml_xpath_object.h +1 -1
  65. data/ext/libxml/ruby_xml_xpointer.c +2 -8
  66. data/ext/libxml/ruby_xml_xpointer.h +2 -2
  67. data/ext/vc/libxml_ruby.sln +7 -1
  68. data/lib/libxml.rb +1 -12
  69. data/lib/libxml/attr.rb +0 -3
  70. data/lib/libxml/attr_decl.rb +0 -3
  71. data/lib/libxml/attributes.rb +0 -3
  72. data/lib/libxml/document.rb +31 -5
  73. data/lib/libxml/error.rb +8 -4
  74. data/lib/libxml/properties.rb +0 -5
  75. data/lib/libxml/sax_callbacks.rb +30 -19
  76. data/lib/libxml/tree.rb +0 -1
  77. data/lib/libxml/xpath_object.rb +0 -13
  78. data/test/model/definition.dtd +8 -0
  79. data/test/tc_attributes.rb +4 -1
  80. data/test/tc_document.rb +16 -0
  81. data/test/tc_dtd.rb +30 -2
  82. data/test/tc_html_parser.rb +55 -10
  83. data/test/tc_node.rb +67 -1
  84. data/test/tc_node_edit.rb +26 -6
  85. data/test/tc_node_text.rb +41 -23
  86. data/test/tc_parser.rb +50 -0
  87. data/test/tc_reader.rb +15 -0
  88. data/test/tc_relaxng.rb +1 -1
  89. data/test/tc_sax_parser.rb +37 -5
  90. data/test/tc_schema.rb +1 -1
  91. data/test/tc_xpath.rb +1 -0
  92. data/test/tc_xpath_expression.rb +4 -2
  93. metadata +6 -6
  94. data/ext/libxml/ruby_xml_state.c +0 -51
  95. data/ext/libxml/ruby_xml_state.h +0 -11
  96. data/ext/vc/libxml_ruby.vcproj +0 -460
@@ -1,4 +1,4 @@
1
- /* $Id: ruby_xml_parser_context.h 711 2009-01-20 07:17:15Z cfis $ */
1
+ /* $Id: ruby_xml_parser_context.h 758 2009-01-25 20:36:03Z cfis $ */
2
2
 
3
3
  /* Please see the LICENSE file for copyright and distribution information */
4
4
 
@@ -7,6 +7,6 @@
7
7
 
8
8
  extern VALUE cXMLParserContext;
9
9
 
10
- void ruby_init_xml_parser_context(void);
10
+ void rxml_init_parser_context(void);
11
11
 
12
12
  #endif
@@ -5,7 +5,7 @@
5
5
  #include <stdarg.h>
6
6
  #include "ruby_libxml.h"
7
7
 
8
- /* Document-class: LibXML::XML::ParserOptions
8
+ /* Document-class: LibXML::XML::Parser::Options
9
9
  *
10
10
  * Options that control the operation of the HTMLParser. The easiest
11
11
  * way to set a parser's options is to use the methods
@@ -15,14 +15,7 @@
15
15
 
16
16
  VALUE mXMLParserOptions;
17
17
 
18
-
19
- // Rdoc needs to know
20
- #ifdef RDOC_NEVER_DEFINED
21
- mLibXML = rb_define_module("LibXML");
22
- mXML = rb_define_module_under(mLibXML, "XML");
23
- #endif
24
-
25
- void ruby_init_parser_options(void)
18
+ void rxml_init_parser_options(void)
26
19
  {
27
20
  mXMLParserOptions = rb_define_module_under(cXMLParser, "Options");
28
21
 
@@ -9,6 +9,6 @@
9
9
 
10
10
  extern VALUE mXMLParserOptions;
11
11
 
12
- void ruby_init_parser_options();
12
+ void rxml_init_parser_options();
13
13
 
14
14
  #endif
@@ -1,993 +1,1002 @@
1
- /* Copyright (c) 2006-2007 Apple Inc.
2
- * Please see the LICENSE file for copyright and distribution information. */
3
-
4
- #include "ruby_libxml.h"
5
- #include "ruby_xml_reader.h"
6
-
7
- /*
8
- * Document-class: LibXML::XML::Reader
9
- *
10
- * The XML::Reader class provides a simpler, alternative way of parsing an XML
11
- * document in contrast to XML::Parser or XML::SaxParser. A XML::Reader instance
12
- * acts like a cursor going forward in a document stream, stopping at each node
13
- * it encounters. To advance to the next node, simply cadd XML::Reader#read.
14
- *
15
- * The XML::Reader API closely matches the DOM Core specification and supports
16
- * namespaces, xml:base, entity handling and DTDs.
17
- *
18
- * To summarize, XML::Reader provides a far simpler API to use versus XML::SaxParser
19
- * and is more memory efficient than using XML::Parser to create a DOM tree.
20
- *
21
- * Example:
22
- *
23
- * parser = XML::Reader.string("<foo><bar>1</bar><bar>2</bar><bar>3</bar></foo>")
24
- * reader.read
25
- * assert_equal('foo', reader.name)
26
- * assert_equal(nil, reader.value)
27
- *
28
- * 3.times do |i|
29
- * reader.read
30
- * assert_equal(XML::Reader::TYPE_ELEMENT, reader.node_type)
31
- * assert_equal('bar', reader.name)
32
- * reader.read
33
- * assert_equal(XML::Reader::TYPE_TEXT, reader.node_type)
34
- * assert_equal((i + 1).to_s, reader.value)
35
- * reader.read
36
- * assert_equal(XML::Reader::TYPE_END_ELEMENT, reader.node_type)
37
- * end
38
- *
39
- * You can also parse documents (see XML::Reader.document),
40
- * strings (see XML::Parser.string) and io objects (see
41
- * XML::Parser.io).
42
- *
43
- * For a more in depth tutorial, albeit in C, see http://xmlsoft.org/xmlreader.html.*/
44
-
45
- VALUE cXMLReader;
46
-
47
- ID base_uri_SYMBOL;
48
- ID ENCODING_SYMBOL;
49
- ID OPTIONS_SYMBOL;
50
-
51
- static VALUE rxml_reader_wrap(xmlTextReaderPtr reader)
52
- {
53
- return Data_Wrap_Struct(cXMLReader, NULL, xmlFreeTextReader, reader);
54
- }
55
-
56
- static xmlTextReaderPtr rxml_text_reader_get(VALUE obj)
57
- {
58
- xmlTextReaderPtr xreader;
59
- Data_Get_Struct(obj, xmlTextReader, xreader);
60
- return xreader;
61
- }
62
-
63
- /*
64
- * call-seq:
65
- * XML::Reader.document(doc) -> XML::Reader
66
- *
67
- * Create an new reader for the specified document.
68
- */
69
- VALUE rxml_reader_document(VALUE klass, VALUE doc)
70
- {
71
- xmlDocPtr xdoc;
72
- xmlTextReaderPtr xreader;
73
-
74
- Data_Get_Struct(doc, xmlDoc, xdoc);
75
-
76
- xreader = xmlReaderWalker(xdoc);
77
-
78
- if (xreader == NULL)
79
- rxml_raise(&xmlLastError);
80
-
81
- return rxml_reader_wrap(xreader);
82
- }
83
-
84
- /* call-seq:
85
- * XML::Reader.file(path) -> XML::Reader
86
- * XML::Reader.file(path, :encoding => XML::Encoding::UTF_8,
87
- * :options => XML::Parser::Options::NOENT) -> XML::Parser
88
- *
89
- * Creates a new reader by parsing the specified file or uri.
90
- *
91
- * You may provide an optional hash table to control how the
92
- * parsing is performed. Valid options are:
93
- *
94
- * encoding - The document encoding, defaults to nil. Valid values
95
- * are the encoding constants defined on XML::Encoding.
96
- * options - Controls the execution of the parser, defaults to 0.
97
- * Valid values are the constants defined on
98
- * XML::Parser::Options. Mutliple options can be combined
99
- * by using Bitwise OR (|).
100
- */
101
- static VALUE rxml_reader_file(int argc, VALUE *argv, VALUE klass)
102
- {
103
- xmlTextReaderPtr xreader;
104
- VALUE path;
105
- VALUE options;
106
-
107
- const char *xencoding = NULL;
108
- int xoptions = 0;
109
-
110
- rb_scan_args(argc, argv, "11", &path, &options);
111
- Check_Type(path, T_STRING);
112
-
113
- if (!NIL_P(options))
114
- {
115
- VALUE encoding = Qnil;
116
- VALUE parserOptions = Qnil;
117
-
118
- Check_Type(options, T_HASH);
119
-
120
- encoding = rb_hash_aref(options, base_uri_SYMBOL);
121
- xencoding = NIL_P(encoding) ? NULL : xmlGetCharEncodingName(NUM2INT(encoding));
122
-
123
- parserOptions = rb_hash_aref(options, OPTIONS_SYMBOL);
124
- xoptions = NIL_P(parserOptions) ? 0 : NUM2INT(parserOptions);
125
- }
126
-
127
- xreader = xmlReaderForFile(StringValueCStr(path), xencoding, xoptions);
128
-
129
- if (xreader == NULL)
130
- rxml_raise(&xmlLastError);
131
-
132
- return rxml_reader_wrap(xreader);
133
- }
134
-
135
- /* call-seq:
136
- * XML::Reader.io(io) -> XML::Reader
137
- * XML::Reader.io(io, :encoding => XML::Encoding::UTF_8,
138
- * :options => XML::Parser::Options::NOENT) -> XML::Parser
139
- *
140
- * Creates a new reader by parsing the specified io object.
141
- *
142
- * You may provide an optional hash table to control how the
143
- * parsing is performed. Valid options are:
144
- *
145
- * base_uri - The base url for the parsed document.
146
- * encoding - The document encoding, defaults to nil. Valid values
147
- * are the encoding constants defined on XML::Encoding.
148
- * options - Controls the execution of the parser, defaults to 0.
149
- * Valid values are the constants defined on
150
- * XML::Parser::Options. Mutliple options can be combined
151
- * by using Bitwise OR (|).
152
- */
153
- static VALUE rxml_reader_io(int argc, VALUE *argv, VALUE klass)
154
- {
155
- xmlTextReaderPtr xreader;
156
- VALUE io;
157
- VALUE options;
158
- char *xbaseurl = NULL;
159
- const char *xencoding = NULL;
160
- int xoptions = 0;
161
-
162
- rb_scan_args(argc, argv, "11", &io, &options);
163
-
164
- if (!NIL_P(options))
165
- {
166
- VALUE baseurl = Qnil;
167
- VALUE encoding = Qnil;
168
- VALUE parserOptions = Qnil;
169
-
170
- Check_Type(options, T_HASH);
171
-
172
- baseurl = rb_hash_aref(options, base_uri_SYMBOL);
173
- xbaseurl = NIL_P(baseurl) ? NULL : StringValueCStr(baseurl);
174
-
175
- encoding = rb_hash_aref(options, ENCODING_SYMBOL);
176
- xencoding = NIL_P(encoding) ? NULL : xmlGetCharEncodingName(NUM2INT(encoding));
177
-
178
- parserOptions = rb_hash_aref(options, OPTIONS_SYMBOL);
179
- xoptions = NIL_P(parserOptions) ? 0 : NUM2INT(parserOptions);
180
- }
181
-
182
- xreader = xmlReaderForIO((xmlInputReadCallback) rxml_read_callback, NULL,
183
- (void *) io,
184
- xbaseurl, xencoding, xoptions);
185
-
186
- if (xreader == NULL)
187
- rxml_raise(&xmlLastError);
188
-
189
- return rxml_reader_wrap(xreader);
190
- }
191
-
192
- /* call-seq:
193
- * XML::Reader.string(io) -> XML::Reader
194
- * XML::Reader.string(io, :encoding => XML::Encoding::UTF_8,
195
- * :options => XML::Parser::Options::NOENT) -> XML::Parser
196
- *
197
- * Creates a new reader by parsing the specified string.
198
- *
199
- * You may provide an optional hash table to control how the
200
- * parsing is performed. Valid options are:
201
- *
202
- * base_uri - The base url for the parsed document.
203
- * encoding - The document encoding, defaults to nil. Valid values
204
- * are the encoding constants defined on XML::Encoding.
205
- * options - Controls the execution of the parser, defaults to 0.
206
- * Valid values are the constants defined on
207
- * XML::Parser::Options. Mutliple options can be combined
208
- * by using Bitwise OR (|).
209
- */
210
- static VALUE rxml_reader_string(int argc, VALUE *argv, VALUE klass)
211
- {
212
- xmlTextReaderPtr xreader;
213
- VALUE string;
214
- VALUE options;
215
- char *xbaseurl = NULL;
216
- const char *xencoding = NULL;
217
- int xoptions = 0;
218
-
219
- rb_scan_args(argc, argv, "11", &string, &options);
220
- Check_Type(string, T_STRING);
221
-
222
- if (!NIL_P(options))
223
- {
224
- VALUE baseurl = Qnil;
225
- VALUE encoding = Qnil;
226
- VALUE parserOptions = Qnil;
227
-
228
- Check_Type(options, T_HASH);
229
-
230
- baseurl = rb_hash_aref(options, base_uri_SYMBOL);
231
- xbaseurl = NIL_P(baseurl) ? NULL : StringValueCStr(baseurl);
232
-
233
- encoding = rb_hash_aref(options, ENCODING_SYMBOL);
234
- xencoding = NIL_P(encoding) ? NULL : xmlGetCharEncodingName(NUM2INT(encoding));
235
-
236
- parserOptions = rb_hash_aref(options, OPTIONS_SYMBOL);
237
- xoptions = NIL_P(parserOptions) ? 0 : NUM2INT(parserOptions);
238
- }
239
-
240
- xreader = xmlReaderForMemory(StringValueCStr(string), RSTRING_LEN(string),
241
- xbaseurl, xencoding, xoptions);
242
-
243
- if (xreader == NULL)
244
- rxml_raise(&xmlLastError);
245
-
246
- return rxml_reader_wrap(xreader);
247
- }
248
-
249
- /*
250
- * call-seq:
251
- * reader.close -> code
252
- *
253
- * This method releases any resources allocated by the current instance
254
- * changes the state to Closed and close any underlying input.
255
- */
256
- static VALUE rxml_reader_close(VALUE self)
257
- {
258
- return INT2FIX(xmlTextReaderClose(rxml_text_reader_get(self)));
259
- }
260
-
261
- /*
262
- * call-seq:
263
- * reader.move_to_attribute(val) -> code
264
- *
265
- * Move the position of the current instance to the attribute with the
266
- * specified index (if +val+ is an integer) or name (if +val+ is a string)
267
- * relative to the containing element.
268
- */
269
- static VALUE rxml_reader_move_to_attr(VALUE self, VALUE val)
270
- {
271
- xmlTextReaderPtr xreader;
272
- int ret;
273
-
274
- xreader = rxml_text_reader_get(self);
275
-
276
- if (TYPE(val) == T_FIXNUM)
277
- {
278
- ret = xmlTextReaderMoveToAttributeNo(xreader, FIX2INT(val));
279
- }
280
- else
281
- {
282
- ret = xmlTextReaderMoveToAttribute(xreader,
283
- (const xmlChar *) StringValueCStr(val));
284
- }
285
-
286
- return INT2FIX(ret);
287
- }
288
-
289
- /*
290
- * call-seq:
291
- * reader.move_to_first_attribute -> code
292
- *
293
- * Move the position of the current instance to the first attribute associated
294
- * with the current node.
295
- */
296
- static VALUE rxml_reader_move_to_first_attr(VALUE self)
297
- {
298
- return INT2FIX(xmlTextReaderMoveToFirstAttribute(rxml_text_reader_get(self)));
299
- }
300
-
301
- /*
302
- * call-seq:
303
- * reader.move_to_next_attribute -> code
304
- *
305
- * Move the position of the current instance to the next attribute associated
306
- * with the current node.
307
- */
308
- static VALUE rxml_reader_move_to_next_attr(VALUE self)
309
- {
310
- return INT2FIX(xmlTextReaderMoveToNextAttribute(rxml_text_reader_get(self)));
311
- }
312
-
313
- /*
314
- * call-seq:
315
- * reader.move_to_element -> code
316
- *
317
- * Move the position of the current instance to the node that contains the
318
- * current attribute node.
319
- */
320
- static VALUE rxml_reader_move_to_element(VALUE self)
321
- {
322
- return INT2FIX(xmlTextReaderMoveToElement(rxml_text_reader_get(self)));
323
- }
324
-
325
- /*
326
- * call-seq:
327
- * reader.next -> code
328
- *
329
- * Skip to the node following the current one in document order while avoiding
330
- * the subtree if any.
331
- */
332
- static VALUE rxml_reader_next(VALUE self)
333
- {
334
- return INT2FIX(xmlTextReaderNext(rxml_text_reader_get(self)));
335
- }
336
-
337
- /*
338
- * call-seq:
339
- * reader.next_sibling -> code
340
- *
341
- * Skip to the node following the current one in document order while avoiding
342
- * the subtree if any. Currently implemented only for Readers built on a
343
- * document.
344
- */
345
- static VALUE rxml_reader_next_sibling(VALUE self)
346
- {
347
- return INT2FIX(xmlTextReaderNextSibling(rxml_text_reader_get(self)));
348
- }
349
-
350
- /*
351
- * call-seq:
352
- * reader.node -> XML::Node
353
- *
354
- * Returns the reader's current node.
355
- * WARNING - Using this method is dangerous because the
356
- * the node may be destroyed on the next #read.
357
- */
358
- static VALUE rxml_reader_node(VALUE self)
359
- {
360
- xmlTextReaderPtr xreader = rxml_text_reader_get(self);
361
- xmlNodePtr xnode = xmlTextReaderCurrentNode(xreader);
362
- return rxml_node_wrap(xnode);
363
- }
364
-
365
- /*
366
- * call-seq:
367
- * reader.node_type -> type
368
- *
369
- * Get the node type of the current node. Reference:
370
- * http://dotgnu.org/pnetlib-doc/System/Xml/XmlNodeType.html
371
- */
372
- static VALUE rxml_reader_node_type(VALUE self)
373
- {
374
- return INT2FIX(xmlTextReaderNodeType(rxml_text_reader_get(self)));
375
- }
376
-
377
- /*
378
- * call-seq:
379
- * reader.normalization -> value
380
- *
381
- * The value indicating whether to normalize white space and attribute values.
382
- * Since attribute value and end of line normalizations are a MUST in the XML
383
- * specification only the value true is accepted. The broken bahaviour of
384
- * accepting out of range character entities like &#0; is of course not
385
- * supported either.
386
- *
387
- * Return 1 or -1 in case of error.
388
- */
389
- static VALUE rxml_reader_normalization(VALUE self)
390
- {
391
- return INT2FIX(xmlTextReaderNormalization(rxml_text_reader_get(self)));
392
- }
393
-
394
- /*
395
- * call-seq:
396
- * reader.read -> code
397
- *
398
- * Causes the reader to move to the next node in the stream, exposing its properties.
399
- *
400
- * Returns true if a node was successfully read or false if there are no more
401
- * nodes to read. On errors, an exception is raised.*/
402
- static VALUE rxml_reader_read(VALUE self)
403
- {
404
- int result = xmlTextReaderRead(rxml_text_reader_get(self));
405
- switch(result)
406
- {
407
- case -1:
408
- rxml_raise(&xmlLastError);
409
- return Qnil;
410
- break;
411
- case 0:
412
- return Qfalse;
413
- case 1:
414
- return Qtrue;
415
- default:
416
- rb_raise(rb_eRuntimeError,
417
- "xmlTextReaderRead did not return -1, 0 or 1. Return value was: %d", result);
418
- }
419
- }
420
-
421
- /*
422
- * call-seq:
423
- * reader.read_attribute_value -> code
424
- *
425
- * Parse an attribute value into one or more Text and EntityReference nodes.
426
- *
427
- * Return 1 in case of success, 0 if the reader was not positionned on an
428
- * attribute node or all the attribute values have been read, or -1 in case of
429
- * error.
430
- */
431
- static VALUE rxml_reader_read_attr_value(VALUE self)
432
- {
433
- return INT2FIX(xmlTextReaderReadAttributeValue(rxml_text_reader_get(self)));
434
- }
435
-
436
- /*
437
- * call-seq:
438
- * reader.read_inner_xml -> data
439
- *
440
- * Read the contents of the current node, including child nodes and markup.
441
- *
442
- * Return a string containing the XML content, or nil if the current node is
443
- * neither an element nor attribute, or has no child nodes.
444
- */
445
- static VALUE rxml_reader_read_inner_xml(VALUE self)
446
- {
447
- const xmlChar *result = xmlTextReaderReadInnerXml(rxml_text_reader_get(self));
448
- return (result == NULL ? Qnil : rb_str_new2((const char*)result));
449
- }
450
-
451
- /*
452
- * call-seq:
453
- * reader.read_outer_xml -> data
454
- *
455
- * Read the contents of the current node, including child nodes and markup.
456
- *
457
- * Return a string containing the XML content, or nil if the current node is
458
- * neither an element nor attribute, or has no child nodes.
459
- */
460
- static VALUE rxml_reader_read_outer_xml(VALUE self)
461
- {
462
- const xmlChar *result = xmlTextReaderReadOuterXml(rxml_text_reader_get(self));
463
- return (result == NULL ? Qnil : rb_str_new2((const char*)result));
464
- }
465
-
466
- /*
467
- * call-seq:
468
- * reader.read_state -> state
469
- *
470
- * Get the read state of the reader.
471
- */
472
- static VALUE rxml_reader_read_state(VALUE self)
473
- {
474
- return INT2FIX(xmlTextReaderReadState(rxml_text_reader_get(self)));
475
- }
476
-
477
- /*
478
- * call-seq:
479
- * reader.read_string -> string
480
- *
481
- * Read the contents of an element or a text node as a string.
482
- *
483
- * Return a string containing the contents of the Element or Text node, or nil
484
- * if the reader is positioned on any other type of node.
485
- */
486
- static VALUE rxml_reader_read_string(VALUE self)
487
- {
488
- const xmlChar *result = xmlTextReaderReadString(rxml_text_reader_get(self));
489
- return (result == NULL ? Qnil : rb_str_new2((const char*)result));
490
- }
491
-
492
- /*
493
- * call-seq:
494
- * reader.relax_ng_validate(rng) -> code
495
- *
496
- * Use RelaxNG to validate the document as it is processed. Activation is only
497
- * possible before the first read. If +rng+ is nil, the RelaxNG validation is
498
- * desactivated.
499
- *
500
- * Return 0 in case the RelaxNG validation could be (des)activated and -1 in
501
- * case of error.
502
- */
503
- static VALUE rxml_reader_relax_ng_validate(VALUE self, VALUE rng)
504
- {
505
- char *xrng = NIL_P(rng) ? NULL : StringValueCStr(rng);
506
- return INT2FIX(xmlTextReaderRelaxNGValidate(rxml_text_reader_get(self), xrng));
507
- }
508
-
509
- #if LIBXML_VERSION >= 20620
510
- /*
511
- * call-seq:
512
- * reader.schema_validate(schema) -> code
513
- *
514
- * Use W3C XSD schema to validate the document as it is processed. Activation
515
- * is only possible before the first read. If +schema+ is nil, then XML Schema
516
- * validation is desactivated.
517
- *
518
- * Return 0 in case the schemas validation could be (de)activated and -1 in
519
- * case of error.
520
- */
521
- static VALUE
522
- rxml_reader_schema_validate(VALUE self, VALUE xsd)
523
- {
524
- char *xxsd = NIL_P(xsd) ? NULL : StringValueCStr(xsd);
525
- int status = xmlTextReaderSchemaValidate(rxml_text_reader_get(self), xxsd);
526
- return INT2FIX(status);
527
- }
528
- #endif
529
-
530
- /*
531
- * call-seq:
532
- * reader.name -> name
533
- *
534
- * Return the qualified name of the node.
535
- */
536
- static VALUE rxml_reader_name(VALUE self)
537
- {
538
- const xmlChar *result = xmlTextReaderConstName(rxml_text_reader_get(self));
539
- return (result == NULL ? Qnil : rb_str_new2((const char*)result));
540
- }
541
-
542
- /*
543
- * call-seq:
544
- * reader.local_name -> name
545
- *
546
- * Return the local name of the node.
547
- */
548
- static VALUE rxml_reader_local_name(VALUE self)
549
- {
550
- const xmlChar *result = xmlTextReaderConstLocalName(rxml_text_reader_get(self));
551
- return (result == NULL ? Qnil : rb_str_new2((const char*)result));
552
- }
553
-
554
- /*
555
- * call-seq:
556
- * reader.attribute_count -> count
557
- *
558
- * Provide the number of attributes of the current node.
559
- */
560
- static VALUE rxml_reader_attr_count(VALUE self)
561
- {
562
- return INT2FIX(xmlTextReaderAttributeCount(rxml_text_reader_get(self)));
563
- }
564
-
565
- /*
566
- * call-seq:
567
- * reader.encoding -> XML::Encoding::UTF_8
568
- *
569
- * Returns the encoding of the document being read. Note you
570
- * first have to read data from the reader for encoding
571
- * to return a value
572
- *
573
- * reader = XML::Reader.file(XML_FILE)
574
- * assert_nil(reader.encoding)
575
- * reader.read
576
- * assert_equal(XML::Encoding::UTF_8, reader.encoding)
577
- *
578
- * In addition, libxml always appears to return nil for the encoding
579
- * when parsing strings.
580
- */
581
- static VALUE rxml_reader_encoding(VALUE self)
582
- {
583
- xmlTextReaderPtr xreader = rxml_text_reader_get(self);
584
- const xmlChar *xencoding = xmlTextReaderConstEncoding(xreader);
585
- if (xencoding)
586
- return INT2NUM(xmlParseCharEncoding(xencoding));
587
- else
588
- return INT2NUM(XML_CHAR_ENCODING_NONE);
589
- }
590
-
591
- /*
592
- * call-seq:
593
- * reader.base_uri -> URI
594
- *
595
- * Determine the base URI of the node.
596
- */
597
- static VALUE rxml_reader_base_uri(VALUE self)
598
- {
599
- const xmlChar *result = xmlTextReaderConstBaseUri(rxml_text_reader_get(self));
600
- return (result == NULL ? Qnil : rb_str_new2((const char*)result));
601
- }
602
-
603
- /*
604
- * call-seq:
605
- * reader.namespace_uri -> URI
606
- *
607
- * Determine the namespace URI of the node.
608
- */
609
- static VALUE rxml_reader_namespace_uri(VALUE self)
610
- {
611
- const xmlChar *result = xmlTextReaderConstNamespaceUri(rxml_text_reader_get(self));
612
- return (result == NULL ? Qnil : rb_str_new2((const char*)result));
613
- }
614
-
615
- /*
616
- * call-seq:
617
- * reader.value -> text
618
- *
619
- * Provide the text value of the node if present.
620
- */
621
- static VALUE rxml_reader_value(VALUE self)
622
- {
623
- const xmlChar *result = xmlTextReaderConstValue(rxml_text_reader_get(self));
624
- return (result == NULL ? Qnil : rb_str_new2((const char*)result));
625
- }
626
-
627
- /*
628
- * call-seq:
629
- * reader.prefix -> prefix
630
- *
631
- * Get a shorthand reference to the namespace associated with the node.
632
- */
633
- static VALUE rxml_reader_prefix(VALUE self)
634
- {
635
- const xmlChar *result = xmlTextReaderConstPrefix(rxml_text_reader_get(self));
636
- return (result == NULL ? Qnil : rb_str_new2((const char*)result));
637
- }
638
-
639
- /*
640
- * call-seq:
641
- * reader.depth -> depth
642
- *
643
- * Get the depth of the node in the tree.
644
- */
645
- static VALUE rxml_reader_depth(VALUE self)
646
- {
647
- return INT2FIX(xmlTextReaderDepth(rxml_text_reader_get(self)));
648
- }
649
-
650
- /*
651
- * call-seq:
652
- * reader.quote_char -> char
653
- *
654
- * Get the quotation mark character used to enclose the value of an attribute,
655
- * as an integer value (and -1 in case of error).
656
- */
657
- static VALUE rxml_reader_quote_char(VALUE self)
658
- {
659
- return INT2FIX(xmlTextReaderQuoteChar(rxml_text_reader_get(self)));
660
- }
661
-
662
- /*
663
- * call-seq:
664
- * reader.standalone -> code
665
- *
666
- * Determine the standalone status of the document being read.
667
- *
668
- * Return 1 if the document was declared to be standalone, 0 if it was
669
- * declared to be not standalone, or -1 if the document did not specify its
670
- * standalone status or in case of error.
671
- */
672
- static VALUE rxml_reader_standalone(VALUE self)
673
- {
674
- return INT2FIX(xmlTextReaderStandalone(rxml_text_reader_get(self)));
675
- }
676
-
677
- /*
678
- * call-seq:
679
- * reader.xml_lang -> value
680
- *
681
- * Get the xml:lang scope within which the node resides.
682
- */
683
- static VALUE rxml_reader_xml_lang(VALUE self)
684
- {
685
- const xmlChar *result = xmlTextReaderConstXmlLang(rxml_text_reader_get(self));
686
- return (result == NULL ? Qnil : rb_str_new2((const char*)result));
687
- }
688
-
689
- /*
690
- * call-seq:
691
- * reader.xml_version -> version
692
- *
693
- * Determine the XML version of the document being read.
694
- */
695
- static VALUE rxml_reader_xml_version(VALUE self)
696
- {
697
- const xmlChar *result = xmlTextReaderConstXmlVersion(rxml_text_reader_get(self));
698
- return (result == NULL ? Qnil : rb_str_new2((const char*)result));
699
- }
700
-
701
- /*
702
- * call-seq:
703
- * reader.has_attributes? -> bool
704
- *
705
- * Get whether the node has attributes.
706
- */
707
- static VALUE rxml_reader_has_attributes(VALUE self)
708
- {
709
- return xmlTextReaderHasAttributes(rxml_text_reader_get(self)) ? Qtrue
710
- : Qfalse;
711
- }
712
-
713
- /*
714
- * call-seq:
715
- * reader.has_value? -> bool
716
- *
717
- * Get whether the node can have a text value.
718
- */
719
- static VALUE rxml_reader_has_value(VALUE self)
720
- {
721
- return xmlTextReaderHasValue(rxml_text_reader_get(self)) ? Qtrue : Qfalse;
722
- }
723
-
724
- /*
725
- * call-seq:
726
- * reader[key] -> value
727
- *
728
- * Provide the value of the attribute with the specified index (if +key+ is an
729
- * integer) or with the specified name (if +key+ is a string) relative to the
730
- * containing element, as a string.
731
- */
732
- static VALUE rxml_reader_attribute(VALUE self, VALUE key)
733
- {
734
- xmlTextReaderPtr reader;
735
- xmlChar *attr;
736
-
737
- reader = rxml_text_reader_get(self);
738
-
739
- if (TYPE(key) == T_FIXNUM)
740
- {
741
- attr = xmlTextReaderGetAttributeNo(reader, FIX2INT(key));
742
- }
743
- else
744
- {
745
- attr = xmlTextReaderGetAttribute(reader, (const xmlChar *) StringValueCStr(key));
746
- }
747
- return (attr == NULL ? Qnil : rb_str_new2((const char*)attr));
748
- }
749
-
750
- /*
751
- * call-seq:
752
- * reader.lookup_namespace(prefix) -> value
753
- *
754
- * Resolve a namespace prefix in the scope of the current element.
755
- * To return the default namespace, specify nil as +prefix+.
756
- */
757
- static VALUE rxml_reader_lookup_namespace(VALUE self, VALUE prefix)
758
- {
759
- const xmlChar *result = xmlTextReaderLookupNamespace(rxml_text_reader_get(
760
- self), (const xmlChar *) StringValueCStr(prefix));
761
- return (result == NULL ? Qnil : rb_str_new2((const char*)result));
762
- }
763
-
764
- /*
765
- * call-seq:
766
- * reader.expand -> node
767
- *
768
- * Read the contents of the current node and the full subtree. It then makes
769
- * the subtree available until the next read call.
770
- *
771
- * Return an XML::Node object, or nil in case of error.
772
- */
773
- static VALUE rxml_reader_expand(VALUE self)
774
- {
775
- xmlNodePtr node;
776
- xmlDocPtr doc;
777
- xmlTextReaderPtr reader = rxml_text_reader_get(self);
778
- node = xmlTextReaderExpand(reader);
779
-
780
- if (!node)
781
- return Qnil;
782
-
783
- /* Okay this is tricky. By accessing the returned node, we
784
- take ownership of the reader's document. Thus we need to
785
- tell the reader to not free it. Otherwise it will be
786
- freed twice - once when the Ruby document wrapper goes
787
- out of scope and once when the reader goes out of scope. */
788
-
789
- xmlTextReaderPreserve(reader);
790
- doc = xmlTextReaderCurrentDoc(reader);
791
- rxml_document_wrap(doc);
792
-
793
- return rxml_node_wrap(node);
794
- }
795
-
796
- #if LIBXML_VERSION >= 20618
797
- /*
798
- * call-seq:
799
- * reader.byte_consumed -> value
800
- *
801
- * This method provides the current index of the parser used by the reader,
802
- * relative to the start of the current entity.
803
- */
804
- static VALUE
805
- rxml_reader_byte_consumed(VALUE self)
806
- {
807
- return INT2NUM(xmlTextReaderByteConsumed(rxml_text_reader_get(self)));
808
- }
809
- #endif
810
-
811
- #if LIBXML_VERSION >= 20617
812
- /*
813
- * call-seq:
814
- * reader.column_number -> number
815
- *
816
- * Provide the column number of the current parsing point.
817
- */
818
- static VALUE
819
- rxml_reader_column_number(VALUE self)
820
- {
821
- return INT2NUM(xmlTextReaderGetParserColumnNumber(rxml_text_reader_get(self)));
822
- }
823
-
824
- /*
825
- * call-seq:
826
- * reader.line_number -> number
827
- *
828
- * Provide the line number of the current parsing point.
829
- */
830
- static VALUE
831
- rxml_reader_line_number(VALUE self)
832
- {
833
- return INT2NUM(xmlTextReaderGetParserLineNumber(rxml_text_reader_get(self)));
834
- }
835
- #endif
836
-
837
- /*
838
- * call-seq:
839
- * reader.default? -> bool
840
- *
841
- * Return whether an Attribute node was generated from the default value
842
- * defined in the DTD or schema.
843
- */
844
- static VALUE rxml_reader_default(VALUE self)
845
- {
846
- return xmlTextReaderIsDefault(rxml_text_reader_get(self)) ? Qtrue : Qfalse;
847
- }
848
-
849
- /*
850
- * call-seq:
851
- * reader.namespace_declaration? -> bool
852
- *
853
- * Determine whether the current node is a namespace declaration rather than a
854
- * regular attribute.
855
- */
856
- static VALUE rxml_reader_namespace_declaration(VALUE self)
857
- {
858
- return xmlTextReaderIsNamespaceDecl(rxml_text_reader_get(self)) ? Qtrue
859
- : Qfalse;
860
- }
861
-
862
- /*
863
- * call-seq:
864
- * reader.empty_element? -> bool
865
- *
866
- * Check if the current node is empty.
867
- */
868
- static VALUE rxml_reader_empty_element(VALUE self)
869
- {
870
- return xmlTextReaderIsEmptyElement(rxml_text_reader_get(self)) ? Qtrue
871
- : Qfalse;
872
- }
873
-
874
- /*
875
- * call-seq:
876
- * reader.valid? -> bool
877
- *
878
- * Retrieve the validity status from the parser context.
879
- */
880
- static VALUE rxml_reader_valid(VALUE self)
881
- {
882
- return xmlTextReaderIsValid(rxml_text_reader_get(self)) ? Qtrue : Qfalse;
883
- }
884
-
885
- /* Rdoc needs to know. */
886
- #ifdef RDOC_NEVER_DEFINED
887
- mLibXML = rb_define_module("LibXML");
888
- mXML = rb_define_module_under(mLibXML, "XML");
889
- #endif
890
-
891
- void ruby_init_xml_reader(void)
892
- {
893
- base_uri_SYMBOL = ID2SYM(rb_intern("base_uri"));
894
- ENCODING_SYMBOL = ID2SYM(rb_intern("encoding"));
895
- OPTIONS_SYMBOL = ID2SYM(rb_intern("options"));
896
-
897
- cXMLReader = rb_define_class_under(mXML, "Reader", rb_cObject);
898
-
899
- rb_define_singleton_method(cXMLReader, "document", rxml_reader_document, 1);
900
- rb_define_singleton_method(cXMLReader, "file", rxml_reader_file, -1);
901
- rb_define_singleton_method(cXMLReader, "io", rxml_reader_io, -1);
902
- rb_define_singleton_method(cXMLReader, "string", rxml_reader_string, -1);
903
-
904
- rb_define_method(cXMLReader, "[]", rxml_reader_attribute, 1);
905
- rb_define_method(cXMLReader, "attribute_count", rxml_reader_attr_count, 0);
906
- rb_define_method(cXMLReader, "base_uri", rxml_reader_base_uri, 0);
907
- #if LIBXML_VERSION >= 20618
908
- rb_define_method(cXMLReader, "byte_consumed", rxml_reader_byte_consumed, 0);
909
- #endif
910
- rb_define_method(cXMLReader, "close", rxml_reader_close, 0);
911
- #if LIBXML_VERSION >= 20617
912
- rb_define_method(cXMLReader, "column_number", rxml_reader_column_number, 0);
913
- #endif
914
- rb_define_method(cXMLReader, "depth", rxml_reader_depth, 0);
915
- rb_define_method(cXMLReader, "encoding", rxml_reader_encoding, 0);
916
- rb_define_method(cXMLReader, "expand", rxml_reader_expand, 0);
917
- rb_define_method(cXMLReader, "has_attributes?", rxml_reader_has_attributes, 0);
918
- rb_define_method(cXMLReader, "has_value?", rxml_reader_has_value, 0);
919
- #if LIBXML_VERSION >= 20617
920
- rb_define_method(cXMLReader, "line_number", rxml_reader_line_number, 0);
921
- #endif
922
- rb_define_method(cXMLReader, "local_name", rxml_reader_local_name, 0);
923
- rb_define_method(cXMLReader, "lookup_namespace", rxml_reader_lookup_namespace, 1);
924
- rb_define_method(cXMLReader, "move_to_attribute", rxml_reader_move_to_attr, 1);
925
- rb_define_method(cXMLReader, "move_to_first_attribute", rxml_reader_move_to_first_attr, 0);
926
- rb_define_method(cXMLReader, "move_to_next_attribute", rxml_reader_move_to_next_attr, 0);
927
- rb_define_method(cXMLReader, "move_to_element", rxml_reader_move_to_element, 0);
928
- rb_define_method(cXMLReader, "name", rxml_reader_name, 0);
929
- rb_define_method(cXMLReader, "namespace_uri", rxml_reader_namespace_uri, 0);
930
- rb_define_method(cXMLReader, "next", rxml_reader_next, 0);
931
- rb_define_method(cXMLReader, "next_sibling", rxml_reader_next_sibling, 0);
932
- rb_define_method(cXMLReader, "node", rxml_reader_node, 0);
933
- rb_define_method(cXMLReader, "node_type", rxml_reader_node_type, 0);
934
- rb_define_method(cXMLReader, "normalization", rxml_reader_normalization, 0);
935
- rb_define_method(cXMLReader, "prefix", rxml_reader_prefix, 0);
936
- rb_define_method(cXMLReader, "quote_char", rxml_reader_quote_char, 0);
937
- rb_define_method(cXMLReader, "read", rxml_reader_read, 0);
938
- rb_define_method(cXMLReader, "read_attribute_value", rxml_reader_read_attr_value, 0);
939
- rb_define_method(cXMLReader, "read_inner_xml", rxml_reader_read_inner_xml, 0);
940
- rb_define_method(cXMLReader, "read_outer_xml", rxml_reader_read_outer_xml, 0);
941
- rb_define_method(cXMLReader, "read_state", rxml_reader_read_state, 0);
942
- rb_define_method(cXMLReader, "read_string", rxml_reader_read_string, 0);
943
- rb_define_method(cXMLReader, "relax_ng_validate", rxml_reader_relax_ng_validate, 1);
944
- rb_define_method(cXMLReader, "standalone", rxml_reader_standalone, 0);
945
- #if LIBXML_VERSION >= 20620
946
- rb_define_method(cXMLReader, "schema_validate", rxml_reader_schema_validate, 1);
947
- #endif
948
- rb_define_method(cXMLReader, "value", rxml_reader_value, 0);
949
- rb_define_method(cXMLReader, "xml_lang", rxml_reader_xml_lang, 0);
950
- rb_define_method(cXMLReader, "xml_version", rxml_reader_xml_version, 0);
951
- rb_define_method(cXMLReader, "default?", rxml_reader_default, 0);
952
- rb_define_method(cXMLReader, "empty_element?", rxml_reader_empty_element, 0);
953
- rb_define_method(cXMLReader, "namespace_declaration?", rxml_reader_namespace_declaration, 0);
954
- rb_define_method(cXMLReader, "valid?", rxml_reader_valid, 0);
955
-
956
- /* Constants */
957
- rb_define_const(cXMLReader, "LOADDTD", INT2FIX(XML_PARSER_LOADDTD));
958
- rb_define_const(cXMLReader, "DEFAULTATTRS", INT2FIX(XML_PARSER_DEFAULTATTRS));
959
- rb_define_const(cXMLReader, "VALIDATE", INT2FIX(XML_PARSER_VALIDATE));
960
- rb_define_const(cXMLReader, "SUBST_ENTITIES", INT2FIX(XML_PARSER_SUBST_ENTITIES));
961
-
962
- rb_define_const(cXMLReader, "SEVERITY_VALIDITY_WARNING", INT2FIX(XML_PARSER_SEVERITY_VALIDITY_WARNING));
963
- rb_define_const(cXMLReader, "SEVERITY_VALIDITY_ERROR", INT2FIX(XML_PARSER_SEVERITY_VALIDITY_ERROR));
964
- rb_define_const(cXMLReader, "SEVERITY_WARNING", INT2FIX(XML_PARSER_SEVERITY_WARNING));
965
- rb_define_const(cXMLReader, "SEVERITY_ERROR", INT2FIX(XML_PARSER_SEVERITY_ERROR));
966
-
967
- rb_define_const(cXMLReader, "TYPE_NONE", INT2FIX(XML_READER_TYPE_NONE));
968
- rb_define_const(cXMLReader, "TYPE_ELEMENT", INT2FIX(XML_READER_TYPE_ELEMENT));
969
- rb_define_const(cXMLReader, "TYPE_ATTRIBUTE", INT2FIX(XML_READER_TYPE_ATTRIBUTE));
970
- rb_define_const(cXMLReader, "TYPE_TEXT", INT2FIX(XML_READER_TYPE_TEXT));
971
- rb_define_const(cXMLReader, "TYPE_CDATA", INT2FIX(XML_READER_TYPE_CDATA));
972
- rb_define_const(cXMLReader, "TYPE_ENTITY_REFERENCE", INT2FIX(XML_READER_TYPE_ENTITY_REFERENCE));
973
- rb_define_const(cXMLReader, "TYPE_ENTITY", INT2FIX(XML_READER_TYPE_ENTITY));
974
- rb_define_const(cXMLReader, "TYPE_PROCESSING_INSTRUCTION", INT2FIX(XML_READER_TYPE_PROCESSING_INSTRUCTION));
975
- rb_define_const(cXMLReader, "TYPE_COMMENT", INT2FIX(XML_READER_TYPE_COMMENT));
976
- rb_define_const(cXMLReader, "TYPE_DOCUMENT", INT2FIX(XML_READER_TYPE_DOCUMENT));
977
- rb_define_const(cXMLReader, "TYPE_DOCUMENT_TYPE", INT2FIX(XML_READER_TYPE_DOCUMENT_TYPE));
978
- rb_define_const(cXMLReader, "TYPE_DOCUMENT_FRAGMENT", INT2FIX(XML_READER_TYPE_DOCUMENT_FRAGMENT));
979
- rb_define_const(cXMLReader, "TYPE_NOTATION", INT2FIX(XML_READER_TYPE_NOTATION));
980
- rb_define_const(cXMLReader, "TYPE_WHITESPACE", INT2FIX(XML_READER_TYPE_WHITESPACE));
981
- rb_define_const(cXMLReader, "TYPE_SIGNIFICANT_WHITESPACE", INT2FIX(XML_READER_TYPE_SIGNIFICANT_WHITESPACE));
982
- rb_define_const(cXMLReader, "TYPE_END_ELEMENT", INT2FIX(XML_READER_TYPE_END_ELEMENT));
983
- rb_define_const(cXMLReader, "TYPE_END_ENTITY", INT2FIX(XML_READER_TYPE_END_ENTITY));
984
- rb_define_const(cXMLReader, "TYPE_XML_DECLARATION", INT2FIX(XML_READER_TYPE_XML_DECLARATION));
985
-
986
- /* Read states */
987
- rb_define_const(cXMLReader, "MODE_INITIAL", INT2FIX(XML_TEXTREADER_MODE_INITIAL));
988
- rb_define_const(cXMLReader, "MODE_INTERACTIVE", INT2FIX(XML_TEXTREADER_MODE_INTERACTIVE));
989
- rb_define_const(cXMLReader, "MODE_ERROR", INT2FIX(XML_TEXTREADER_MODE_ERROR));
990
- rb_define_const(cXMLReader, "MODE_EOF", INT2FIX(XML_TEXTREADER_MODE_EOF));
991
- rb_define_const(cXMLReader, "MODE_CLOSED", INT2FIX(XML_TEXTREADER_MODE_CLOSED));
992
- rb_define_const(cXMLReader, "MODE_READING", INT2FIX(XML_TEXTREADER_MODE_READING));
993
- }
1
+ /* Copyright (c) 2006-2007 Apple Inc.
2
+ * Please see the LICENSE file for copyright and distribution information. */
3
+
4
+ #include "ruby_libxml.h"
5
+ #include "ruby_xml_reader.h"
6
+
7
+ /*
8
+ * Document-class: LibXML::XML::Reader
9
+ *
10
+ * The XML::Reader class provides a simpler, alternative way of parsing an XML
11
+ * document in contrast to XML::Parser or XML::SaxParser. A XML::Reader instance
12
+ * acts like a cursor going forward in a document stream, stopping at each node
13
+ * it encounters. To advance to the next node, simply cadd XML::Reader#read.
14
+ *
15
+ * The XML::Reader API closely matches the DOM Core specification and supports
16
+ * namespaces, xml:base, entity handling and DTDs.
17
+ *
18
+ * To summarize, XML::Reader provides a far simpler API to use versus XML::SaxParser
19
+ * and is more memory efficient than using XML::Parser to create a DOM tree.
20
+ *
21
+ * Example:
22
+ *
23
+ * parser = XML::Reader.string("<foo><bar>1</bar><bar>2</bar><bar>3</bar></foo>")
24
+ * reader.read
25
+ * assert_equal('foo', reader.name)
26
+ * assert_equal(nil, reader.value)
27
+ *
28
+ * 3.times do |i|
29
+ * reader.read
30
+ * assert_equal(XML::Reader::TYPE_ELEMENT, reader.node_type)
31
+ * assert_equal('bar', reader.name)
32
+ * reader.read
33
+ * assert_equal(XML::Reader::TYPE_TEXT, reader.node_type)
34
+ * assert_equal((i + 1).to_s, reader.value)
35
+ * reader.read
36
+ * assert_equal(XML::Reader::TYPE_END_ELEMENT, reader.node_type)
37
+ * end
38
+ *
39
+ * You can also parse documents (see XML::Reader.document),
40
+ * strings (see XML::Parser.string) and io objects (see
41
+ * XML::Parser.io).
42
+ *
43
+ * For a more in depth tutorial, albeit in C, see http://xmlsoft.org/xmlreader.html.*/
44
+
45
+ VALUE cXMLReader;
46
+
47
+ ID BASE_URI_SYMBOL;
48
+ ID ENCODING_SYMBOL;
49
+ ID IO_ATTR;
50
+ ID OPTIONS_SYMBOL;
51
+
52
+
53
+ static void rxml_reader_free(xmlTextReaderPtr reader)
54
+ {
55
+ xmlFreeTextReader(reader);
56
+ }
57
+
58
+ static VALUE rxml_reader_wrap(xmlTextReaderPtr reader)
59
+ {
60
+ return Data_Wrap_Struct(cXMLReader, NULL, rxml_reader_free, reader);
61
+ }
62
+
63
+ static xmlTextReaderPtr rxml_text_reader_get(VALUE obj)
64
+ {
65
+ xmlTextReaderPtr xreader;
66
+ Data_Get_Struct(obj, xmlTextReader, xreader);
67
+ return xreader;
68
+ }
69
+
70
+ /*
71
+ * call-seq:
72
+ * XML::Reader.document(doc) -> XML::Reader
73
+ *
74
+ * Create an new reader for the specified document.
75
+ */
76
+ VALUE rxml_reader_document(VALUE klass, VALUE doc)
77
+ {
78
+ xmlDocPtr xdoc;
79
+ xmlTextReaderPtr xreader;
80
+
81
+ Data_Get_Struct(doc, xmlDoc, xdoc);
82
+
83
+ xreader = xmlReaderWalker(xdoc);
84
+
85
+ if (xreader == NULL)
86
+ rxml_raise(&xmlLastError);
87
+
88
+ return rxml_reader_wrap(xreader);
89
+ }
90
+
91
+ /* call-seq:
92
+ * XML::Reader.file(path) -> XML::Reader
93
+ * XML::Reader.file(path, :encoding => XML::Encoding::UTF_8,
94
+ * :options => XML::Parser::Options::NOENT) -> XML::Parser
95
+ *
96
+ * Creates a new reader by parsing the specified file or uri.
97
+ *
98
+ * You may provide an optional hash table to control how the
99
+ * parsing is performed. Valid options are:
100
+ *
101
+ * encoding - The document encoding, defaults to nil. Valid values
102
+ * are the encoding constants defined on XML::Encoding.
103
+ * options - Controls the execution of the parser, defaults to 0.
104
+ * Valid values are the constants defined on
105
+ * XML::Parser::Options. Mutliple options can be combined
106
+ * by using Bitwise OR (|).
107
+ */
108
+ static VALUE rxml_reader_file(int argc, VALUE *argv, VALUE klass)
109
+ {
110
+ xmlTextReaderPtr xreader;
111
+ VALUE path;
112
+ VALUE options;
113
+
114
+ const char *xencoding = NULL;
115
+ int xoptions = 0;
116
+
117
+ rb_scan_args(argc, argv, "11", &path, &options);
118
+ Check_Type(path, T_STRING);
119
+
120
+ if (!NIL_P(options))
121
+ {
122
+ VALUE encoding = Qnil;
123
+ VALUE parserOptions = Qnil;
124
+
125
+ Check_Type(options, T_HASH);
126
+
127
+ encoding = rb_hash_aref(options, BASE_URI_SYMBOL);
128
+ xencoding = NIL_P(encoding) ? NULL : xmlGetCharEncodingName(NUM2INT(encoding));
129
+
130
+ parserOptions = rb_hash_aref(options, OPTIONS_SYMBOL);
131
+ xoptions = NIL_P(parserOptions) ? 0 : NUM2INT(parserOptions);
132
+ }
133
+
134
+ xreader = xmlReaderForFile(StringValueCStr(path), xencoding, xoptions);
135
+
136
+ if (xreader == NULL)
137
+ rxml_raise(&xmlLastError);
138
+
139
+ return rxml_reader_wrap(xreader);
140
+ }
141
+
142
+ /* call-seq:
143
+ * XML::Reader.io(io) -> XML::Reader
144
+ * XML::Reader.io(io, :encoding => XML::Encoding::UTF_8,
145
+ * :options => XML::Parser::Options::NOENT) -> XML::Parser
146
+ *
147
+ * Creates a new reader by parsing the specified io object.
148
+ *
149
+ * You may provide an optional hash table to control how the
150
+ * parsing is performed. Valid options are:
151
+ *
152
+ * base_uri - The base url for the parsed document.
153
+ * encoding - The document encoding, defaults to nil. Valid values
154
+ * are the encoding constants defined on XML::Encoding.
155
+ * options - Controls the execution of the parser, defaults to 0.
156
+ * Valid values are the constants defined on
157
+ * XML::Parser::Options. Mutliple options can be combined
158
+ * by using Bitwise OR (|).
159
+ */
160
+ static VALUE rxml_reader_io(int argc, VALUE *argv, VALUE klass)
161
+ {
162
+ xmlTextReaderPtr xreader;
163
+ VALUE result;
164
+ VALUE io;
165
+ VALUE options;
166
+ char *xbaseurl = NULL;
167
+ const char *xencoding = NULL;
168
+ int xoptions = 0;
169
+
170
+ rb_scan_args(argc, argv, "11", &io, &options);
171
+
172
+ if (!NIL_P(options))
173
+ {
174
+ VALUE baseurl = Qnil;
175
+ VALUE encoding = Qnil;
176
+ VALUE parserOptions = Qnil;
177
+
178
+ Check_Type(options, T_HASH);
179
+
180
+ baseurl = rb_hash_aref(options, BASE_URI_SYMBOL);
181
+ xbaseurl = NIL_P(baseurl) ? NULL : StringValueCStr(baseurl);
182
+
183
+ encoding = rb_hash_aref(options, ENCODING_SYMBOL);
184
+ xencoding = NIL_P(encoding) ? NULL : xmlGetCharEncodingName(NUM2INT(encoding));
185
+
186
+ parserOptions = rb_hash_aref(options, OPTIONS_SYMBOL);
187
+ xoptions = NIL_P(parserOptions) ? 0 : NUM2INT(parserOptions);
188
+ }
189
+
190
+ xreader = xmlReaderForIO((xmlInputReadCallback) rxml_read_callback, NULL,
191
+ (void *) io,
192
+ xbaseurl, xencoding, xoptions);
193
+
194
+ if (xreader == NULL)
195
+ rxml_raise(&xmlLastError);
196
+
197
+ result = rxml_reader_wrap(xreader);
198
+
199
+ /* Attach io object to parser so it won't get freed.*/
200
+ rb_ivar_set(result, IO_ATTR, io);
201
+
202
+ return result;
203
+ }
204
+
205
+ /* call-seq:
206
+ * XML::Reader.string(io) -> XML::Reader
207
+ * XML::Reader.string(io, :encoding => XML::Encoding::UTF_8,
208
+ * :options => XML::Parser::Options::NOENT) -> XML::Parser
209
+ *
210
+ * Creates a new reader by parsing the specified string.
211
+ *
212
+ * You may provide an optional hash table to control how the
213
+ * parsing is performed. Valid options are:
214
+ *
215
+ * base_uri - The base url for the parsed document.
216
+ * encoding - The document encoding, defaults to nil. Valid values
217
+ * are the encoding constants defined on XML::Encoding.
218
+ * options - Controls the execution of the parser, defaults to 0.
219
+ * Valid values are the constants defined on
220
+ * XML::Parser::Options. Mutliple options can be combined
221
+ * by using Bitwise OR (|).
222
+ */
223
+ static VALUE rxml_reader_string(int argc, VALUE *argv, VALUE klass)
224
+ {
225
+ xmlTextReaderPtr xreader;
226
+ VALUE string;
227
+ VALUE options;
228
+ char *xbaseurl = NULL;
229
+ const char *xencoding = NULL;
230
+ int xoptions = 0;
231
+
232
+ rb_scan_args(argc, argv, "11", &string, &options);
233
+ Check_Type(string, T_STRING);
234
+
235
+ if (!NIL_P(options))
236
+ {
237
+ VALUE baseurl = Qnil;
238
+ VALUE encoding = Qnil;
239
+ VALUE parserOptions = Qnil;
240
+
241
+ Check_Type(options, T_HASH);
242
+
243
+ baseurl = rb_hash_aref(options, BASE_URI_SYMBOL);
244
+ xbaseurl = NIL_P(baseurl) ? NULL : StringValueCStr(baseurl);
245
+
246
+ encoding = rb_hash_aref(options, ENCODING_SYMBOL);
247
+ xencoding = NIL_P(encoding) ? NULL : xmlGetCharEncodingName(NUM2INT(encoding));
248
+
249
+ parserOptions = rb_hash_aref(options, OPTIONS_SYMBOL);
250
+ xoptions = NIL_P(parserOptions) ? 0 : NUM2INT(parserOptions);
251
+ }
252
+
253
+ xreader = xmlReaderForMemory(StringValueCStr(string), RSTRING_LEN(string),
254
+ xbaseurl, xencoding, xoptions);
255
+
256
+ if (xreader == NULL)
257
+ rxml_raise(&xmlLastError);
258
+
259
+ return rxml_reader_wrap(xreader);
260
+ }
261
+
262
+ /*
263
+ * call-seq:
264
+ * reader.close -> code
265
+ *
266
+ * This method releases any resources allocated by the current instance
267
+ * changes the state to Closed and close any underlying input.
268
+ */
269
+ static VALUE rxml_reader_close(VALUE self)
270
+ {
271
+ return INT2FIX(xmlTextReaderClose(rxml_text_reader_get(self)));
272
+ }
273
+
274
+ /*
275
+ * call-seq:
276
+ * reader.move_to_attribute(val) -> code
277
+ *
278
+ * Move the position of the current instance to the attribute with the
279
+ * specified index (if +val+ is an integer) or name (if +val+ is a string)
280
+ * relative to the containing element.
281
+ */
282
+ static VALUE rxml_reader_move_to_attr(VALUE self, VALUE val)
283
+ {
284
+ xmlTextReaderPtr xreader;
285
+ int ret;
286
+
287
+ xreader = rxml_text_reader_get(self);
288
+
289
+ if (TYPE(val) == T_FIXNUM)
290
+ {
291
+ ret = xmlTextReaderMoveToAttributeNo(xreader, FIX2INT(val));
292
+ }
293
+ else
294
+ {
295
+ ret = xmlTextReaderMoveToAttribute(xreader,
296
+ (const xmlChar *) StringValueCStr(val));
297
+ }
298
+
299
+ return INT2FIX(ret);
300
+ }
301
+
302
+ /*
303
+ * call-seq:
304
+ * reader.move_to_first_attribute -> code
305
+ *
306
+ * Move the position of the current instance to the first attribute associated
307
+ * with the current node.
308
+ */
309
+ static VALUE rxml_reader_move_to_first_attr(VALUE self)
310
+ {
311
+ return INT2FIX(xmlTextReaderMoveToFirstAttribute(rxml_text_reader_get(self)));
312
+ }
313
+
314
+ /*
315
+ * call-seq:
316
+ * reader.move_to_next_attribute -> code
317
+ *
318
+ * Move the position of the current instance to the next attribute associated
319
+ * with the current node.
320
+ */
321
+ static VALUE rxml_reader_move_to_next_attr(VALUE self)
322
+ {
323
+ return INT2FIX(xmlTextReaderMoveToNextAttribute(rxml_text_reader_get(self)));
324
+ }
325
+
326
+ /*
327
+ * call-seq:
328
+ * reader.move_to_element -> code
329
+ *
330
+ * Move the position of the current instance to the node that contains the
331
+ * current attribute node.
332
+ */
333
+ static VALUE rxml_reader_move_to_element(VALUE self)
334
+ {
335
+ return INT2FIX(xmlTextReaderMoveToElement(rxml_text_reader_get(self)));
336
+ }
337
+
338
+ /*
339
+ * call-seq:
340
+ * reader.next -> code
341
+ *
342
+ * Skip to the node following the current one in document order while avoiding
343
+ * the subtree if any.
344
+ */
345
+ static VALUE rxml_reader_next(VALUE self)
346
+ {
347
+ return INT2FIX(xmlTextReaderNext(rxml_text_reader_get(self)));
348
+ }
349
+
350
+ /*
351
+ * call-seq:
352
+ * reader.next_sibling -> code
353
+ *
354
+ * Skip to the node following the current one in document order while avoiding
355
+ * the subtree if any. Currently implemented only for Readers built on a
356
+ * document.
357
+ */
358
+ static VALUE rxml_reader_next_sibling(VALUE self)
359
+ {
360
+ return INT2FIX(xmlTextReaderNextSibling(rxml_text_reader_get(self)));
361
+ }
362
+
363
+ /*
364
+ * call-seq:
365
+ * reader.node -> XML::Node
366
+ *
367
+ * Returns the reader's current node. It will return
368
+ * nil if Reader#read has not yet been called.
369
+ * WARNING - Using this method is dangerous because the
370
+ * the node may be destroyed on the next #read.
371
+ */
372
+ static VALUE rxml_reader_node(VALUE self)
373
+ {
374
+ xmlTextReaderPtr xreader = rxml_text_reader_get(self);
375
+ xmlNodePtr xnode = xmlTextReaderCurrentNode(xreader);
376
+ return xnode ? rxml_node_wrap(xnode) : Qnil;
377
+ }
378
+
379
+ /*
380
+ * call-seq:
381
+ * reader.node_type -> type
382
+ *
383
+ * Get the node type of the current node. Reference:
384
+ * http://dotgnu.org/pnetlib-doc/System/Xml/XmlNodeType.html
385
+ */
386
+ static VALUE rxml_reader_node_type(VALUE self)
387
+ {
388
+ return INT2FIX(xmlTextReaderNodeType(rxml_text_reader_get(self)));
389
+ }
390
+
391
+ /*
392
+ * call-seq:
393
+ * reader.normalization -> value
394
+ *
395
+ * The value indicating whether to normalize white space and attribute values.
396
+ * Since attribute value and end of line normalizations are a MUST in the XML
397
+ * specification only the value true is accepted. The broken bahaviour of
398
+ * accepting out of range character entities like &#0; is of course not
399
+ * supported either.
400
+ *
401
+ * Return 1 or -1 in case of error.
402
+ */
403
+ static VALUE rxml_reader_normalization(VALUE self)
404
+ {
405
+ return INT2FIX(xmlTextReaderNormalization(rxml_text_reader_get(self)));
406
+ }
407
+
408
+ /*
409
+ * call-seq:
410
+ * reader.read -> code
411
+ *
412
+ * Causes the reader to move to the next node in the stream, exposing its properties.
413
+ *
414
+ * Returns true if a node was successfully read or false if there are no more
415
+ * nodes to read. On errors, an exception is raised.*/
416
+ static VALUE rxml_reader_read(VALUE self)
417
+ {
418
+ int result = xmlTextReaderRead(rxml_text_reader_get(self));
419
+ switch(result)
420
+ {
421
+ case -1:
422
+ rxml_raise(&xmlLastError);
423
+ return Qnil;
424
+ break;
425
+ case 0:
426
+ return Qfalse;
427
+ case 1:
428
+ return Qtrue;
429
+ default:
430
+ rb_raise(rb_eRuntimeError,
431
+ "xmlTextReaderRead did not return -1, 0 or 1. Return value was: %d", result);
432
+ }
433
+ }
434
+
435
+ /*
436
+ * call-seq:
437
+ * reader.read_attribute_value -> code
438
+ *
439
+ * Parse an attribute value into one or more Text and EntityReference nodes.
440
+ *
441
+ * Return 1 in case of success, 0 if the reader was not positionned on an
442
+ * attribute node or all the attribute values have been read, or -1 in case of
443
+ * error.
444
+ */
445
+ static VALUE rxml_reader_read_attr_value(VALUE self)
446
+ {
447
+ return INT2FIX(xmlTextReaderReadAttributeValue(rxml_text_reader_get(self)));
448
+ }
449
+
450
+ /*
451
+ * call-seq:
452
+ * reader.read_inner_xml -> data
453
+ *
454
+ * Read the contents of the current node, including child nodes and markup.
455
+ *
456
+ * Return a string containing the XML content, or nil if the current node is
457
+ * neither an element nor attribute, or has no child nodes.
458
+ */
459
+ static VALUE rxml_reader_read_inner_xml(VALUE self)
460
+ {
461
+ const xmlChar *result = xmlTextReaderReadInnerXml(rxml_text_reader_get(self));
462
+ return (result == NULL ? Qnil : rb_str_new2((const char*)result));
463
+ }
464
+
465
+ /*
466
+ * call-seq:
467
+ * reader.read_outer_xml -> data
468
+ *
469
+ * Read the contents of the current node, including child nodes and markup.
470
+ *
471
+ * Return a string containing the XML content, or nil if the current node is
472
+ * neither an element nor attribute, or has no child nodes.
473
+ */
474
+ static VALUE rxml_reader_read_outer_xml(VALUE self)
475
+ {
476
+ const xmlChar *result = xmlTextReaderReadOuterXml(rxml_text_reader_get(self));
477
+ return (result == NULL ? Qnil : rb_str_new2((const char*)result));
478
+ }
479
+
480
+ /*
481
+ * call-seq:
482
+ * reader.read_state -> state
483
+ *
484
+ * Get the read state of the reader.
485
+ */
486
+ static VALUE rxml_reader_read_state(VALUE self)
487
+ {
488
+ return INT2FIX(xmlTextReaderReadState(rxml_text_reader_get(self)));
489
+ }
490
+
491
+ /*
492
+ * call-seq:
493
+ * reader.read_string -> string
494
+ *
495
+ * Read the contents of an element or a text node as a string.
496
+ *
497
+ * Return a string containing the contents of the Element or Text node, or nil
498
+ * if the reader is positioned on any other type of node.
499
+ */
500
+ static VALUE rxml_reader_read_string(VALUE self)
501
+ {
502
+ const xmlChar *result = xmlTextReaderReadString(rxml_text_reader_get(self));
503
+ return (result == NULL ? Qnil : rb_str_new2((const char*)result));
504
+ }
505
+
506
+ /*
507
+ * call-seq:
508
+ * reader.relax_ng_validate(rng) -> code
509
+ *
510
+ * Use RelaxNG to validate the document as it is processed. Activation is only
511
+ * possible before the first read. If +rng+ is nil, the RelaxNG validation is
512
+ * desactivated.
513
+ *
514
+ * Return 0 in case the RelaxNG validation could be (des)activated and -1 in
515
+ * case of error.
516
+ */
517
+ static VALUE rxml_reader_relax_ng_validate(VALUE self, VALUE rng)
518
+ {
519
+ char *xrng = NIL_P(rng) ? NULL : StringValueCStr(rng);
520
+ return INT2FIX(xmlTextReaderRelaxNGValidate(rxml_text_reader_get(self), xrng));
521
+ }
522
+
523
+ #if LIBXML_VERSION >= 20620
524
+ /*
525
+ * call-seq:
526
+ * reader.schema_validate(schema) -> code
527
+ *
528
+ * Use W3C XSD schema to validate the document as it is processed. Activation
529
+ * is only possible before the first read. If +schema+ is nil, then XML Schema
530
+ * validation is desactivated.
531
+ *
532
+ * Return 0 in case the schemas validation could be (de)activated and -1 in
533
+ * case of error.
534
+ */
535
+ static VALUE
536
+ rxml_reader_schema_validate(VALUE self, VALUE xsd)
537
+ {
538
+ char *xxsd = NIL_P(xsd) ? NULL : StringValueCStr(xsd);
539
+ int status = xmlTextReaderSchemaValidate(rxml_text_reader_get(self), xxsd);
540
+ return INT2FIX(status);
541
+ }
542
+ #endif
543
+
544
+ /*
545
+ * call-seq:
546
+ * reader.name -> name
547
+ *
548
+ * Return the qualified name of the node.
549
+ */
550
+ static VALUE rxml_reader_name(VALUE self)
551
+ {
552
+ const xmlChar *result = xmlTextReaderConstName(rxml_text_reader_get(self));
553
+ return (result == NULL ? Qnil : rb_str_new2((const char*)result));
554
+ }
555
+
556
+ /*
557
+ * call-seq:
558
+ * reader.local_name -> name
559
+ *
560
+ * Return the local name of the node.
561
+ */
562
+ static VALUE rxml_reader_local_name(VALUE self)
563
+ {
564
+ const xmlChar *result = xmlTextReaderConstLocalName(rxml_text_reader_get(self));
565
+ return (result == NULL ? Qnil : rb_str_new2((const char*)result));
566
+ }
567
+
568
+ /*
569
+ * call-seq:
570
+ * reader.attribute_count -> count
571
+ *
572
+ * Provide the number of attributes of the current node.
573
+ */
574
+ static VALUE rxml_reader_attr_count(VALUE self)
575
+ {
576
+ return INT2FIX(xmlTextReaderAttributeCount(rxml_text_reader_get(self)));
577
+ }
578
+
579
+ /*
580
+ * call-seq:
581
+ * reader.encoding -> XML::Encoding::UTF_8
582
+ *
583
+ * Returns the encoding of the document being read. Note you
584
+ * first have to read data from the reader for encoding
585
+ * to return a value
586
+ *
587
+ * reader = XML::Reader.file(XML_FILE)
588
+ * assert_nil(reader.encoding)
589
+ * reader.read
590
+ * assert_equal(XML::Encoding::UTF_8, reader.encoding)
591
+ *
592
+ * In addition, libxml always appears to return nil for the encoding
593
+ * when parsing strings.
594
+ */
595
+ static VALUE rxml_reader_encoding(VALUE self)
596
+ {
597
+ xmlTextReaderPtr xreader = rxml_text_reader_get(self);
598
+ const xmlChar *xencoding = xmlTextReaderConstEncoding(xreader);
599
+ if (xencoding)
600
+ return INT2NUM(xmlParseCharEncoding(xencoding));
601
+ else
602
+ return INT2NUM(XML_CHAR_ENCODING_NONE);
603
+ }
604
+
605
+ /*
606
+ * call-seq:
607
+ * reader.base_uri -> URI
608
+ *
609
+ * Determine the base URI of the node.
610
+ */
611
+ static VALUE rxml_reader_base_uri(VALUE self)
612
+ {
613
+ const xmlChar *result = xmlTextReaderConstBaseUri(rxml_text_reader_get(self));
614
+ return (result == NULL ? Qnil : rb_str_new2((const char*)result));
615
+ }
616
+
617
+ /*
618
+ * call-seq:
619
+ * reader.namespace_uri -> URI
620
+ *
621
+ * Determine the namespace URI of the node.
622
+ */
623
+ static VALUE rxml_reader_namespace_uri(VALUE self)
624
+ {
625
+ const xmlChar *result = xmlTextReaderConstNamespaceUri(rxml_text_reader_get(self));
626
+ return (result == NULL ? Qnil : rb_str_new2((const char*)result));
627
+ }
628
+
629
+ /*
630
+ * call-seq:
631
+ * reader.value -> text
632
+ *
633
+ * Provide the text value of the node if present.
634
+ */
635
+ static VALUE rxml_reader_value(VALUE self)
636
+ {
637
+ const xmlChar *result = xmlTextReaderConstValue(rxml_text_reader_get(self));
638
+ return (result == NULL ? Qnil : rb_str_new2((const char*)result));
639
+ }
640
+
641
+ /*
642
+ * call-seq:
643
+ * reader.prefix -> prefix
644
+ *
645
+ * Get a shorthand reference to the namespace associated with the node.
646
+ */
647
+ static VALUE rxml_reader_prefix(VALUE self)
648
+ {
649
+ const xmlChar *result = xmlTextReaderConstPrefix(rxml_text_reader_get(self));
650
+ return (result == NULL ? Qnil : rb_str_new2((const char*)result));
651
+ }
652
+
653
+ /*
654
+ * call-seq:
655
+ * reader.depth -> depth
656
+ *
657
+ * Get the depth of the node in the tree.
658
+ */
659
+ static VALUE rxml_reader_depth(VALUE self)
660
+ {
661
+ return INT2FIX(xmlTextReaderDepth(rxml_text_reader_get(self)));
662
+ }
663
+
664
+ /*
665
+ * call-seq:
666
+ * reader.quote_char -> char
667
+ *
668
+ * Get the quotation mark character used to enclose the value of an attribute,
669
+ * as an integer value (and -1 in case of error).
670
+ */
671
+ static VALUE rxml_reader_quote_char(VALUE self)
672
+ {
673
+ return INT2FIX(xmlTextReaderQuoteChar(rxml_text_reader_get(self)));
674
+ }
675
+
676
+ /*
677
+ * call-seq:
678
+ * reader.standalone -> code
679
+ *
680
+ * Determine the standalone status of the document being read.
681
+ *
682
+ * Return 1 if the document was declared to be standalone, 0 if it was
683
+ * declared to be not standalone, or -1 if the document did not specify its
684
+ * standalone status or in case of error.
685
+ */
686
+ static VALUE rxml_reader_standalone(VALUE self)
687
+ {
688
+ return INT2FIX(xmlTextReaderStandalone(rxml_text_reader_get(self)));
689
+ }
690
+
691
+ /*
692
+ * call-seq:
693
+ * reader.xml_lang -> value
694
+ *
695
+ * Get the xml:lang scope within which the node resides.
696
+ */
697
+ static VALUE rxml_reader_xml_lang(VALUE self)
698
+ {
699
+ const xmlChar *result = xmlTextReaderConstXmlLang(rxml_text_reader_get(self));
700
+ return (result == NULL ? Qnil : rb_str_new2((const char*)result));
701
+ }
702
+
703
+ /*
704
+ * call-seq:
705
+ * reader.xml_version -> version
706
+ *
707
+ * Determine the XML version of the document being read.
708
+ */
709
+ static VALUE rxml_reader_xml_version(VALUE self)
710
+ {
711
+ const xmlChar *result = xmlTextReaderConstXmlVersion(rxml_text_reader_get(self));
712
+ return (result == NULL ? Qnil : rb_str_new2((const char*)result));
713
+ }
714
+
715
+ /*
716
+ * call-seq:
717
+ * reader.has_attributes? -> bool
718
+ *
719
+ * Get whether the node has attributes.
720
+ */
721
+ static VALUE rxml_reader_has_attributes(VALUE self)
722
+ {
723
+ return xmlTextReaderHasAttributes(rxml_text_reader_get(self)) ? Qtrue
724
+ : Qfalse;
725
+ }
726
+
727
+ /*
728
+ * call-seq:
729
+ * reader.has_value? -> bool
730
+ *
731
+ * Get whether the node can have a text value.
732
+ */
733
+ static VALUE rxml_reader_has_value(VALUE self)
734
+ {
735
+ return xmlTextReaderHasValue(rxml_text_reader_get(self)) ? Qtrue : Qfalse;
736
+ }
737
+
738
+ /*
739
+ * call-seq:
740
+ * reader[key] -> value
741
+ *
742
+ * Provide the value of the attribute with the specified index (if +key+ is an
743
+ * integer) or with the specified name (if +key+ is a string) relative to the
744
+ * containing element, as a string.
745
+ */
746
+ static VALUE rxml_reader_attribute(VALUE self, VALUE key)
747
+ {
748
+ xmlTextReaderPtr reader;
749
+ xmlChar *attr;
750
+
751
+ reader = rxml_text_reader_get(self);
752
+
753
+ if (TYPE(key) == T_FIXNUM)
754
+ {
755
+ attr = xmlTextReaderGetAttributeNo(reader, FIX2INT(key));
756
+ }
757
+ else
758
+ {
759
+ attr = xmlTextReaderGetAttribute(reader, (const xmlChar *) StringValueCStr(key));
760
+ }
761
+ return (attr == NULL ? Qnil : rb_str_new2((const char*)attr));
762
+ }
763
+
764
+ /*
765
+ * call-seq:
766
+ * reader.lookup_namespace(prefix) -> value
767
+ *
768
+ * Resolve a namespace prefix in the scope of the current element.
769
+ * To return the default namespace, specify nil as +prefix+.
770
+ */
771
+ static VALUE rxml_reader_lookup_namespace(VALUE self, VALUE prefix)
772
+ {
773
+ const xmlChar *result = xmlTextReaderLookupNamespace(rxml_text_reader_get(
774
+ self), (const xmlChar *) StringValueCStr(prefix));
775
+ return (result == NULL ? Qnil : rb_str_new2((const char*)result));
776
+ }
777
+
778
+ /*
779
+ * call-seq:
780
+ * reader.expand -> node
781
+ *
782
+ * Read the contents of the current node and the full subtree. It then makes
783
+ * the subtree available until the next read call.
784
+ *
785
+ * Return an XML::Node object, or nil in case of error.
786
+ */
787
+ static VALUE rxml_reader_expand(VALUE self)
788
+ {
789
+ xmlNodePtr node;
790
+ xmlDocPtr doc;
791
+ xmlTextReaderPtr reader = rxml_text_reader_get(self);
792
+ node = xmlTextReaderExpand(reader);
793
+
794
+ if (!node)
795
+ return Qnil;
796
+
797
+ /* Okay this is tricky. By accessing the returned node, we
798
+ take ownership of the reader's document. Thus we need to
799
+ tell the reader to not free it. Otherwise it will be
800
+ freed twice - once when the Ruby document wrapper goes
801
+ out of scope and once when the reader goes out of scope. */
802
+
803
+ xmlTextReaderPreserve(reader);
804
+ doc = xmlTextReaderCurrentDoc(reader);
805
+ rxml_document_wrap(doc);
806
+
807
+ return rxml_node_wrap(node);
808
+ }
809
+
810
+ #if LIBXML_VERSION >= 20618
811
+ /*
812
+ * call-seq:
813
+ * reader.byte_consumed -> value
814
+ *
815
+ * This method provides the current index of the parser used by the reader,
816
+ * relative to the start of the current entity.
817
+ */
818
+ static VALUE
819
+ rxml_reader_byte_consumed(VALUE self)
820
+ {
821
+ return INT2NUM(xmlTextReaderByteConsumed(rxml_text_reader_get(self)));
822
+ }
823
+ #endif
824
+
825
+ #if LIBXML_VERSION >= 20617
826
+ /*
827
+ * call-seq:
828
+ * reader.column_number -> number
829
+ *
830
+ * Provide the column number of the current parsing point.
831
+ */
832
+ static VALUE
833
+ rxml_reader_column_number(VALUE self)
834
+ {
835
+ return INT2NUM(xmlTextReaderGetParserColumnNumber(rxml_text_reader_get(self)));
836
+ }
837
+
838
+ /*
839
+ * call-seq:
840
+ * reader.line_number -> number
841
+ *
842
+ * Provide the line number of the current parsing point.
843
+ */
844
+ static VALUE
845
+ rxml_reader_line_number(VALUE self)
846
+ {
847
+ return INT2NUM(xmlTextReaderGetParserLineNumber(rxml_text_reader_get(self)));
848
+ }
849
+ #endif
850
+
851
+ /*
852
+ * call-seq:
853
+ * reader.default? -> bool
854
+ *
855
+ * Return whether an Attribute node was generated from the default value
856
+ * defined in the DTD or schema.
857
+ */
858
+ static VALUE rxml_reader_default(VALUE self)
859
+ {
860
+ return xmlTextReaderIsDefault(rxml_text_reader_get(self)) ? Qtrue : Qfalse;
861
+ }
862
+
863
+ /*
864
+ * call-seq:
865
+ * reader.namespace_declaration? -> bool
866
+ *
867
+ * Determine whether the current node is a namespace declaration rather than a
868
+ * regular attribute.
869
+ */
870
+ static VALUE rxml_reader_namespace_declaration(VALUE self)
871
+ {
872
+ return xmlTextReaderIsNamespaceDecl(rxml_text_reader_get(self)) ? Qtrue
873
+ : Qfalse;
874
+ }
875
+
876
+ /*
877
+ * call-seq:
878
+ * reader.empty_element? -> bool
879
+ *
880
+ * Check if the current node is empty.
881
+ */
882
+ static VALUE rxml_reader_empty_element(VALUE self)
883
+ {
884
+ return xmlTextReaderIsEmptyElement(rxml_text_reader_get(self)) ? Qtrue
885
+ : Qfalse;
886
+ }
887
+
888
+ /*
889
+ * call-seq:
890
+ * reader.valid? -> bool
891
+ *
892
+ * Retrieve the validity status from the parser context.
893
+ */
894
+ static VALUE rxml_reader_valid(VALUE self)
895
+ {
896
+ return xmlTextReaderIsValid(rxml_text_reader_get(self)) ? Qtrue : Qfalse;
897
+ }
898
+
899
+ void rxml_init_reader(void)
900
+ {
901
+ BASE_URI_SYMBOL = ID2SYM(rb_intern("base_uri"));
902
+ ENCODING_SYMBOL = ID2SYM(rb_intern("encoding"));
903
+ IO_ATTR = ID2SYM(rb_intern("@io"));
904
+ OPTIONS_SYMBOL = ID2SYM(rb_intern("options"));
905
+
906
+ cXMLReader = rb_define_class_under(mXML, "Reader", rb_cObject);
907
+
908
+ rb_define_singleton_method(cXMLReader, "document", rxml_reader_document, 1);
909
+ rb_define_singleton_method(cXMLReader, "file", rxml_reader_file, -1);
910
+ rb_define_singleton_method(cXMLReader, "io", rxml_reader_io, -1);
911
+ rb_define_singleton_method(cXMLReader, "string", rxml_reader_string, -1);
912
+
913
+ rb_define_method(cXMLReader, "[]", rxml_reader_attribute, 1);
914
+ rb_define_method(cXMLReader, "attribute_count", rxml_reader_attr_count, 0);
915
+ rb_define_method(cXMLReader, "base_uri", rxml_reader_base_uri, 0);
916
+ #if LIBXML_VERSION >= 20618
917
+ rb_define_method(cXMLReader, "byte_consumed", rxml_reader_byte_consumed, 0);
918
+ #endif
919
+ rb_define_method(cXMLReader, "close", rxml_reader_close, 0);
920
+ #if LIBXML_VERSION >= 20617
921
+ rb_define_method(cXMLReader, "column_number", rxml_reader_column_number, 0);
922
+ #endif
923
+ rb_define_method(cXMLReader, "depth", rxml_reader_depth, 0);
924
+ rb_define_method(cXMLReader, "encoding", rxml_reader_encoding, 0);
925
+ rb_define_method(cXMLReader, "expand", rxml_reader_expand, 0);
926
+ rb_define_method(cXMLReader, "has_attributes?", rxml_reader_has_attributes, 0);
927
+ rb_define_method(cXMLReader, "has_value?", rxml_reader_has_value, 0);
928
+ #if LIBXML_VERSION >= 20617
929
+ rb_define_method(cXMLReader, "line_number", rxml_reader_line_number, 0);
930
+ #endif
931
+ rb_define_method(cXMLReader, "local_name", rxml_reader_local_name, 0);
932
+ rb_define_method(cXMLReader, "lookup_namespace", rxml_reader_lookup_namespace, 1);
933
+ rb_define_method(cXMLReader, "move_to_attribute", rxml_reader_move_to_attr, 1);
934
+ rb_define_method(cXMLReader, "move_to_first_attribute", rxml_reader_move_to_first_attr, 0);
935
+ rb_define_method(cXMLReader, "move_to_next_attribute", rxml_reader_move_to_next_attr, 0);
936
+ rb_define_method(cXMLReader, "move_to_element", rxml_reader_move_to_element, 0);
937
+ rb_define_method(cXMLReader, "name", rxml_reader_name, 0);
938
+ rb_define_method(cXMLReader, "namespace_uri", rxml_reader_namespace_uri, 0);
939
+ rb_define_method(cXMLReader, "next", rxml_reader_next, 0);
940
+ rb_define_method(cXMLReader, "next_sibling", rxml_reader_next_sibling, 0);
941
+ rb_define_method(cXMLReader, "node", rxml_reader_node, 0);
942
+ rb_define_method(cXMLReader, "node_type", rxml_reader_node_type, 0);
943
+ rb_define_method(cXMLReader, "normalization", rxml_reader_normalization, 0);
944
+ rb_define_method(cXMLReader, "prefix", rxml_reader_prefix, 0);
945
+ rb_define_method(cXMLReader, "quote_char", rxml_reader_quote_char, 0);
946
+ rb_define_method(cXMLReader, "read", rxml_reader_read, 0);
947
+ rb_define_method(cXMLReader, "read_attribute_value", rxml_reader_read_attr_value, 0);
948
+ rb_define_method(cXMLReader, "read_inner_xml", rxml_reader_read_inner_xml, 0);
949
+ rb_define_method(cXMLReader, "read_outer_xml", rxml_reader_read_outer_xml, 0);
950
+ rb_define_method(cXMLReader, "read_state", rxml_reader_read_state, 0);
951
+ rb_define_method(cXMLReader, "read_string", rxml_reader_read_string, 0);
952
+ rb_define_method(cXMLReader, "relax_ng_validate", rxml_reader_relax_ng_validate, 1);
953
+ rb_define_method(cXMLReader, "standalone", rxml_reader_standalone, 0);
954
+ #if LIBXML_VERSION >= 20620
955
+ rb_define_method(cXMLReader, "schema_validate", rxml_reader_schema_validate, 1);
956
+ #endif
957
+ rb_define_method(cXMLReader, "value", rxml_reader_value, 0);
958
+ rb_define_method(cXMLReader, "xml_lang", rxml_reader_xml_lang, 0);
959
+ rb_define_method(cXMLReader, "xml_version", rxml_reader_xml_version, 0);
960
+ rb_define_method(cXMLReader, "default?", rxml_reader_default, 0);
961
+ rb_define_method(cXMLReader, "empty_element?", rxml_reader_empty_element, 0);
962
+ rb_define_method(cXMLReader, "namespace_declaration?", rxml_reader_namespace_declaration, 0);
963
+ rb_define_method(cXMLReader, "valid?", rxml_reader_valid, 0);
964
+
965
+ /* Constants */
966
+ rb_define_const(cXMLReader, "LOADDTD", INT2FIX(XML_PARSER_LOADDTD));
967
+ rb_define_const(cXMLReader, "DEFAULTATTRS", INT2FIX(XML_PARSER_DEFAULTATTRS));
968
+ rb_define_const(cXMLReader, "VALIDATE", INT2FIX(XML_PARSER_VALIDATE));
969
+ rb_define_const(cXMLReader, "SUBST_ENTITIES", INT2FIX(XML_PARSER_SUBST_ENTITIES));
970
+
971
+ rb_define_const(cXMLReader, "SEVERITY_VALIDITY_WARNING", INT2FIX(XML_PARSER_SEVERITY_VALIDITY_WARNING));
972
+ rb_define_const(cXMLReader, "SEVERITY_VALIDITY_ERROR", INT2FIX(XML_PARSER_SEVERITY_VALIDITY_ERROR));
973
+ rb_define_const(cXMLReader, "SEVERITY_WARNING", INT2FIX(XML_PARSER_SEVERITY_WARNING));
974
+ rb_define_const(cXMLReader, "SEVERITY_ERROR", INT2FIX(XML_PARSER_SEVERITY_ERROR));
975
+
976
+ rb_define_const(cXMLReader, "TYPE_NONE", INT2FIX(XML_READER_TYPE_NONE));
977
+ rb_define_const(cXMLReader, "TYPE_ELEMENT", INT2FIX(XML_READER_TYPE_ELEMENT));
978
+ rb_define_const(cXMLReader, "TYPE_ATTRIBUTE", INT2FIX(XML_READER_TYPE_ATTRIBUTE));
979
+ rb_define_const(cXMLReader, "TYPE_TEXT", INT2FIX(XML_READER_TYPE_TEXT));
980
+ rb_define_const(cXMLReader, "TYPE_CDATA", INT2FIX(XML_READER_TYPE_CDATA));
981
+ rb_define_const(cXMLReader, "TYPE_ENTITY_REFERENCE", INT2FIX(XML_READER_TYPE_ENTITY_REFERENCE));
982
+ rb_define_const(cXMLReader, "TYPE_ENTITY", INT2FIX(XML_READER_TYPE_ENTITY));
983
+ rb_define_const(cXMLReader, "TYPE_PROCESSING_INSTRUCTION", INT2FIX(XML_READER_TYPE_PROCESSING_INSTRUCTION));
984
+ rb_define_const(cXMLReader, "TYPE_COMMENT", INT2FIX(XML_READER_TYPE_COMMENT));
985
+ rb_define_const(cXMLReader, "TYPE_DOCUMENT", INT2FIX(XML_READER_TYPE_DOCUMENT));
986
+ rb_define_const(cXMLReader, "TYPE_DOCUMENT_TYPE", INT2FIX(XML_READER_TYPE_DOCUMENT_TYPE));
987
+ rb_define_const(cXMLReader, "TYPE_DOCUMENT_FRAGMENT", INT2FIX(XML_READER_TYPE_DOCUMENT_FRAGMENT));
988
+ rb_define_const(cXMLReader, "TYPE_NOTATION", INT2FIX(XML_READER_TYPE_NOTATION));
989
+ rb_define_const(cXMLReader, "TYPE_WHITESPACE", INT2FIX(XML_READER_TYPE_WHITESPACE));
990
+ rb_define_const(cXMLReader, "TYPE_SIGNIFICANT_WHITESPACE", INT2FIX(XML_READER_TYPE_SIGNIFICANT_WHITESPACE));
991
+ rb_define_const(cXMLReader, "TYPE_END_ELEMENT", INT2FIX(XML_READER_TYPE_END_ELEMENT));
992
+ rb_define_const(cXMLReader, "TYPE_END_ENTITY", INT2FIX(XML_READER_TYPE_END_ENTITY));
993
+ rb_define_const(cXMLReader, "TYPE_XML_DECLARATION", INT2FIX(XML_READER_TYPE_XML_DECLARATION));
994
+
995
+ /* Read states */
996
+ rb_define_const(cXMLReader, "MODE_INITIAL", INT2FIX(XML_TEXTREADER_MODE_INITIAL));
997
+ rb_define_const(cXMLReader, "MODE_INTERACTIVE", INT2FIX(XML_TEXTREADER_MODE_INTERACTIVE));
998
+ rb_define_const(cXMLReader, "MODE_ERROR", INT2FIX(XML_TEXTREADER_MODE_ERROR));
999
+ rb_define_const(cXMLReader, "MODE_EOF", INT2FIX(XML_TEXTREADER_MODE_EOF));
1000
+ rb_define_const(cXMLReader, "MODE_CLOSED", INT2FIX(XML_TEXTREADER_MODE_CLOSED));
1001
+ rb_define_const(cXMLReader, "MODE_READING", INT2FIX(XML_TEXTREADER_MODE_READING));
1002
+ }