libxml-ruby 2.0.4-x86-mingw32 → 2.0.5-x86-mingw32

Sign up to get free protection for your applications and to get access to all the features.
data/HISTORY CHANGED
@@ -1,5 +1,10 @@
1
1
  = Release History
2
2
 
3
+ == 2.0.5 / 2011-05-05 Charlie Savage
4
+
5
+ * Document#validate_dtd would sometimes cause segmentation faults due to
6
+ an improperly initialized data structure (Charlie Savage)
7
+
3
8
  == 2.0.4 / 2011-05-02 Charlie Savage
4
9
 
5
10
  * Fix compile issues on platforms using older versions of libxml2.
data/ext/libxml/extconf.h CHANGED
@@ -1,5 +1,5 @@
1
- #ifndef EXTCONF_H
2
- #define EXTCONF_H
3
- #define HAVE_ZLIB_H 1
4
- #define HAVE_LIBXML_XMLVERSION_H 1
5
- #endif
1
+ #ifndef EXTCONF_H
2
+ #define EXTCONF_H
3
+ #define HAVE_ZLIB_H 1
4
+ #define HAVE_LIBXML_XMLVERSION_H 1
5
+ #endif
@@ -902,13 +902,10 @@ static VALUE rxml_document_validate_dtd(VALUE self, VALUE dtd)
902
902
  Data_Get_Struct(self, xmlDoc, xdoc);
903
903
  Data_Get_Struct(dtd, xmlDtd, xdtd);
904
904
 
905
+ /* Setup context */
906
+ memset(&ctxt, 0, sizeof(xmlValidCtxt));
905
907
  ctxt.userData = &error;
906
908
 
907
- ctxt.nodeNr = 0;
908
- ctxt.nodeTab = NULL;
909
- ctxt.vstateNr = 0;
910
- ctxt.vstateTab = NULL;
911
-
912
909
  if (xmlValidateDtd(&ctxt, xdoc, xdtd))
913
910
  {
914
911
  return (Qtrue);
@@ -509,7 +509,7 @@ static VALUE rxml_node_doc(VALUE self)
509
509
  return (VALUE) xdoc->_private;
510
510
  else
511
511
  /* This can happen by calling Reader#expand.doc */
512
- rb_raise(eXMLError, "Document is not accessible to Ruby (hint - did you call Reader#expand?)");
512
+ rb_raise(eXMLError, "Document is not accessible to Ruby (hint - did you call Reader#expand?)");
513
513
  }
514
514
 
515
515
  /*
@@ -1,1085 +1,1085 @@
1
- /* Copyright (c) 2006-2007 Apple Inc.
2
- * Please see the LICENSE file for copyright and distribution information. */
3
-
4
- #include "ruby_libxml.h"
5
- #include "ruby_xml_reader.h"
6
-
7
- /*
8
- * Document-class: LibXML::XML::Reader
9
- *
10
- * The XML::Reader class provides a simpler, alternative way of parsing an XML
11
- * document in contrast to XML::Parser or XML::SaxParser. A XML::Reader instance
12
- * acts like a cursor going forward in a document stream, stopping at each node
13
- * it encounters. To advance to the next node, simply cadd XML::Reader#read.
14
- *
15
- * The XML::Reader API closely matches the DOM Core specification and supports
16
- * namespaces, xml:base, entity handling and DTDs.
17
- *
18
- * To summarize, XML::Reader provides a far simpler API to use versus XML::SaxParser
19
- * and is more memory efficient than using XML::Parser to create a DOM tree.
20
- *
21
- * Example:
22
- *
23
- * parser = XML::Reader.string("<foo><bar>1</bar><bar>2</bar><bar>3</bar></foo>")
24
- * reader.read
25
- * assert_equal('foo', reader.name)
26
- * assert_equal(nil, reader.value)
27
- *
28
- * 3.times do |i|
29
- * reader.read
30
- * assert_equal(XML::Reader::TYPE_ELEMENT, reader.node_type)
31
- * assert_equal('bar', reader.name)
32
- * reader.read
33
- * assert_equal(XML::Reader::TYPE_TEXT, reader.node_type)
34
- * assert_equal((i + 1).to_s, reader.value)
35
- * reader.read
36
- * assert_equal(XML::Reader::TYPE_END_ELEMENT, reader.node_type)
37
- * end
38
- *
39
- * You can also parse documents (see XML::Reader.document),
40
- * strings (see XML::Parser.string) and io objects (see
41
- * XML::Parser.io).
42
- *
43
- * For a more in depth tutorial, albeit in C, see http://xmlsoft.org/xmlreader.html.*/
44
-
45
- VALUE cXMLReader;
46
-
47
- static ID BASE_URI_SYMBOL;
48
- static ID ENCODING_SYMBOL;
49
- static ID IO_ATTR;
50
- static ID OPTIONS_SYMBOL;
51
-
52
- static void rxml_reader_free(xmlTextReaderPtr xreader)
53
- {
54
- xmlFreeTextReader(xreader);
55
- }
56
-
57
- static VALUE rxml_reader_wrap(xmlTextReaderPtr xreader)
58
- {
59
- return Data_Wrap_Struct(cXMLReader, NULL, rxml_reader_free, xreader);
60
- }
61
-
62
- static xmlTextReaderPtr rxml_text_reader_get(VALUE obj)
63
- {
64
- xmlTextReaderPtr xreader;
65
- Data_Get_Struct(obj, xmlTextReader, xreader);
66
- return xreader;
67
- }
68
-
69
- /*
70
- * call-seq:
71
- * XML::Reader.document(doc) -> XML::Reader
72
- *
73
- * Create an new reader for the specified document.
74
- */
75
- VALUE rxml_reader_document(VALUE klass, VALUE doc)
76
- {
77
- xmlDocPtr xdoc;
78
- xmlTextReaderPtr xreader;
79
-
80
- Data_Get_Struct(doc, xmlDoc, xdoc);
81
-
82
- xreader = xmlReaderWalker(xdoc);
83
-
84
- if (xreader == NULL)
85
- rxml_raise(&xmlLastError);
86
-
87
- return rxml_reader_wrap(xreader);
88
- }
89
-
90
- /* call-seq:
91
- * XML::Reader.file(path) -> XML::Reader
92
- * XML::Reader.file(path, :encoding => XML::Encoding::UTF_8,
93
- * :options => XML::Parser::Options::NOENT) -> XML::Parser
94
- *
95
- * Creates a new reader by parsing the specified file or uri.
96
- *
97
- * You may provide an optional hash table to control how the
98
- * parsing is performed. Valid options are:
99
- *
100
- * encoding - The document encoding, defaults to nil. Valid values
101
- * are the encoding constants defined on XML::Encoding.
102
- * options - Controls the execution of the parser, defaults to 0.
103
- * Valid values are the constants defined on
104
- * XML::Parser::Options. Mutliple options can be combined
105
- * by using Bitwise OR (|).
106
- */
107
- static VALUE rxml_reader_file(int argc, VALUE *argv, VALUE klass)
108
- {
109
- xmlTextReaderPtr xreader;
110
- VALUE path;
111
- VALUE options;
112
-
113
- const char *xencoding = NULL;
114
- int xoptions = 0;
115
-
116
- rb_scan_args(argc, argv, "11", &path, &options);
117
- Check_Type(path, T_STRING);
118
-
119
- if (!NIL_P(options))
120
- {
121
- VALUE encoding = Qnil;
122
- VALUE parserOptions = Qnil;
123
-
124
- Check_Type(options, T_HASH);
125
-
126
- encoding = rb_hash_aref(options, BASE_URI_SYMBOL);
127
- xencoding = NIL_P(encoding) ? NULL : xmlGetCharEncodingName(NUM2INT(encoding));
128
-
129
- parserOptions = rb_hash_aref(options, OPTIONS_SYMBOL);
130
- xoptions = NIL_P(parserOptions) ? 0 : NUM2INT(parserOptions);
131
- }
132
-
133
- xreader = xmlReaderForFile(StringValueCStr(path), xencoding, xoptions);
134
-
135
- if (xreader == NULL)
136
- rxml_raise(&xmlLastError);
137
-
138
- return rxml_reader_wrap(xreader);
139
- }
140
-
141
- /* call-seq:
142
- * XML::Reader.io(io) -> XML::Reader
143
- * XML::Reader.io(io, :encoding => XML::Encoding::UTF_8,
144
- * :options => XML::Parser::Options::NOENT) -> XML::Parser
145
- *
146
- * Creates a new reader by parsing the specified io object.
147
- *
148
- * You may provide an optional hash table to control how the
149
- * parsing is performed. Valid options are:
150
- *
151
- * base_uri - The base url for the parsed document.
152
- * encoding - The document encoding, defaults to nil. Valid values
153
- * are the encoding constants defined on XML::Encoding.
154
- * options - Controls the execution of the parser, defaults to 0.
155
- * Valid values are the constants defined on
156
- * XML::Parser::Options. Mutliple options can be combined
157
- * by using Bitwise OR (|).
158
- */
159
- static VALUE rxml_reader_io(int argc, VALUE *argv, VALUE klass)
160
- {
161
- xmlTextReaderPtr xreader;
162
- VALUE result;
163
- VALUE io;
164
- VALUE options;
165
- char *xbaseurl = NULL;
166
- const char *xencoding = NULL;
167
- int xoptions = 0;
168
-
169
- rb_scan_args(argc, argv, "11", &io, &options);
170
-
171
- if (!NIL_P(options))
172
- {
173
- VALUE baseurl = Qnil;
174
- VALUE encoding = Qnil;
175
- VALUE parserOptions = Qnil;
176
-
177
- Check_Type(options, T_HASH);
178
-
179
- baseurl = rb_hash_aref(options, BASE_URI_SYMBOL);
180
- xbaseurl = NIL_P(baseurl) ? NULL : StringValueCStr(baseurl);
181
-
182
- encoding = rb_hash_aref(options, ENCODING_SYMBOL);
183
- xencoding = NIL_P(encoding) ? NULL : xmlGetCharEncodingName(NUM2INT(encoding));
184
-
185
- parserOptions = rb_hash_aref(options, OPTIONS_SYMBOL);
186
- xoptions = NIL_P(parserOptions) ? 0 : NUM2INT(parserOptions);
187
- }
188
-
189
- xreader = xmlReaderForIO((xmlInputReadCallback) rxml_read_callback, NULL,
190
- (void *) io,
191
- xbaseurl, xencoding, xoptions);
192
-
193
- if (xreader == NULL)
194
- rxml_raise(&xmlLastError);
195
-
196
- result = rxml_reader_wrap(xreader);
197
-
198
- /* Attach io object to parser so it won't get freed.*/
199
- rb_ivar_set(result, IO_ATTR, io);
200
-
201
- return result;
202
- }
203
-
204
- /* call-seq:
205
- * XML::Reader.string(io) -> XML::Reader
206
- * XML::Reader.string(io, :encoding => XML::Encoding::UTF_8,
207
- * :options => XML::Parser::Options::NOENT) -> XML::Parser
208
- *
209
- * Creates a new reader by parsing the specified string.
210
- *
211
- * You may provide an optional hash table to control how the
212
- * parsing is performed. Valid options are:
213
- *
214
- * base_uri - The base url for the parsed document.
215
- * encoding - The document encoding, defaults to nil. Valid values
216
- * are the encoding constants defined on XML::Encoding.
217
- * options - Controls the execution of the parser, defaults to 0.
218
- * Valid values are the constants defined on
219
- * XML::Parser::Options. Mutliple options can be combined
220
- * by using Bitwise OR (|).
221
- */
222
- static VALUE rxml_reader_string(int argc, VALUE *argv, VALUE klass)
223
- {
224
- xmlTextReaderPtr xreader;
225
- VALUE string;
226
- VALUE options;
227
- char *xbaseurl = NULL;
228
- const char *xencoding = NULL;
229
- int xoptions = 0;
230
-
231
- rb_scan_args(argc, argv, "11", &string, &options);
232
- Check_Type(string, T_STRING);
233
-
234
- if (!NIL_P(options))
235
- {
236
- VALUE baseurl = Qnil;
237
- VALUE encoding = Qnil;
238
- VALUE parserOptions = Qnil;
239
-
240
- Check_Type(options, T_HASH);
241
-
242
- baseurl = rb_hash_aref(options, BASE_URI_SYMBOL);
243
- xbaseurl = NIL_P(baseurl) ? NULL : StringValueCStr(baseurl);
244
-
245
- encoding = rb_hash_aref(options, ENCODING_SYMBOL);
246
- xencoding = NIL_P(encoding) ? NULL : xmlGetCharEncodingName(NUM2INT(encoding));
247
-
248
- parserOptions = rb_hash_aref(options, OPTIONS_SYMBOL);
249
- xoptions = NIL_P(parserOptions) ? 0 : NUM2INT(parserOptions);
250
- }
251
-
252
- xreader = xmlReaderForMemory(StringValueCStr(string), RSTRING_LEN(string),
253
- xbaseurl, xencoding, xoptions);
254
-
255
- if (xreader == NULL)
256
- rxml_raise(&xmlLastError);
257
-
258
- return rxml_reader_wrap(xreader);
259
- }
260
-
261
- /*
262
- * call-seq:
263
- * reader.close -> code
264
- *
265
- * This method releases any resources allocated by the current instance
266
- * changes the state to Closed and close any underlying input.
267
- */
268
- static VALUE rxml_reader_close(VALUE self)
269
- {
270
- xmlTextReaderPtr xreader = rxml_text_reader_get(self);
271
- return INT2FIX(xmlTextReaderClose(xreader));
272
- }
273
-
274
- /*
275
- * call-seq:
276
- * reader.move_to_attribute(val) -> code
277
- *
278
- * Move the position of the current instance to the attribute with the
279
- * specified index (if +val+ is an integer) or name (if +val+ is a string)
280
- * relative to the containing element.
281
- */
282
- static VALUE rxml_reader_move_to_attr(VALUE self, VALUE val)
283
- {
284
- xmlTextReaderPtr xreader;
285
- int ret;
286
-
287
- xreader = rxml_text_reader_get(self);
288
-
289
- if (TYPE(val) == T_FIXNUM)
290
- {
291
- ret = xmlTextReaderMoveToAttributeNo(xreader, FIX2INT(val));
292
- }
293
- else
294
- {
295
- ret = xmlTextReaderMoveToAttribute(xreader,
296
- (const xmlChar *) StringValueCStr(val));
297
- }
298
-
299
- return INT2FIX(ret);
300
- }
301
-
302
- /*
303
- * call-seq:
304
- * reader.move_to_first_attribute -> code
305
- *
306
- * Move the position of the current instance to the first attribute associated
307
- * with the current node.
308
- */
309
- static VALUE rxml_reader_move_to_first_attr(VALUE self)
310
- {
311
- xmlTextReaderPtr xreader = rxml_text_reader_get(self);
312
- return INT2FIX(xmlTextReaderMoveToFirstAttribute(xreader));
313
- }
314
-
315
- /*
316
- * call-seq:
317
- * reader.move_to_next_attribute -> code
318
- *
319
- * Move the position of the current instance to the next attribute associated
320
- * with the current node.
321
- */
322
- static VALUE rxml_reader_move_to_next_attr(VALUE self)
323
- {
324
- xmlTextReaderPtr xreader = rxml_text_reader_get(self);
325
- return INT2FIX(xmlTextReaderMoveToNextAttribute(xreader));
326
- }
327
-
328
- /*
329
- * call-seq:
330
- * reader.move_to_element -> code
331
- *
332
- * Move the position of the current instance to the node that contains the
333
- * current attribute node.
334
- */
335
- static VALUE rxml_reader_move_to_element(VALUE self)
336
- {
337
- xmlTextReaderPtr xreader = rxml_text_reader_get(self);
338
- return INT2FIX(xmlTextReaderMoveToElement(xreader));
339
- }
340
-
341
- /*
342
- * call-seq:
343
- * reader.next -> code
344
- *
345
- * Skip to the node following the current one in document order while avoiding
346
- * the subtree if any.
347
- */
348
- static VALUE rxml_reader_next(VALUE self)
349
- {
350
- xmlTextReaderPtr xreader = rxml_text_reader_get(self);
351
- return INT2FIX(xmlTextReaderNext(xreader));
352
- }
353
-
354
- /*
355
- * call-seq:
356
- * reader.next_sibling -> code
357
- *
358
- * Skip to the node following the current one in document order while avoiding
359
- * the subtree if any. Currently implemented only for Readers built on a
360
- * document.
361
- */
362
- static VALUE rxml_reader_next_sibling(VALUE self)
363
- {
364
- xmlTextReaderPtr xreader = rxml_text_reader_get(self);
365
- return INT2FIX(xmlTextReaderNextSibling(xreader));
366
- }
367
-
368
- /*
369
- * call-seq:
370
- * reader.node -> XML::Node
371
- *
372
- * Returns the reader's current node. It will return
373
- * nil if Reader#read has not yet been called.
374
- * WARNING - Using this method is dangerous because the
375
- * the node may be destroyed on the next #read.
376
- */
377
- static VALUE rxml_reader_node(VALUE self)
378
- {
379
- xmlTextReaderPtr xreader = rxml_text_reader_get(self);
380
- xmlNodePtr xnode = xmlTextReaderCurrentNode(xreader);
381
- return xnode ? rxml_node_wrap(xnode) : Qnil;
382
- }
383
-
384
- /*
385
- * call-seq:
386
- * reader.node_type -> type
387
- *
388
- * Get the node type of the current node. Reference:
389
- * http://dotgnu.org/pnetlib-doc/System/Xml/XmlNodeType.html
390
- */
391
- static VALUE rxml_reader_node_type(VALUE self)
392
- {
393
- xmlTextReaderPtr xreader = rxml_text_reader_get(self);
394
- return INT2FIX(xmlTextReaderNodeType(xreader));
395
- }
396
-
397
- /*
398
- * call-seq:
399
- * reader.normalization -> value
400
- *
401
- * The value indicating whether to normalize white space and attribute values.
402
- * Since attribute value and end of line normalizations are a MUST in the XML
403
- * specification only the value true is accepted. The broken bahaviour of
404
- * accepting out of range character entities like &#0; is of course not
405
- * supported either.
406
- *
407
- * Return 1 or -1 in case of error.
408
- */
409
- static VALUE rxml_reader_normalization(VALUE self)
410
- {
411
- xmlTextReaderPtr xreader = rxml_text_reader_get(self);
412
- return INT2FIX(xmlTextReaderNormalization(xreader));
413
- }
414
-
415
- /*
416
- * call-seq:
417
- * reader.read -> code
418
- *
419
- * Causes the reader to move to the next node in the stream, exposing its properties.
420
- *
421
- * Returns true if a node was successfully read or false if there are no more
422
- * nodes to read. On errors, an exception is raised.*/
423
- static VALUE rxml_reader_read(VALUE self)
424
- {
425
- xmlTextReaderPtr xreader = rxml_text_reader_get(self);
426
- int result = xmlTextReaderRead(xreader);
427
- switch(result)
428
- {
429
- case -1:
430
- rxml_raise(&xmlLastError);
431
- return Qnil;
432
- break;
433
- case 0:
434
- return Qfalse;
435
- case 1:
436
- return Qtrue;
437
- default:
438
- rb_raise(rb_eRuntimeError,
439
- "xmlTextReaderRead did not return -1, 0 or 1. Return value was: %d", result);
440
- }
441
- }
442
-
443
- /*
444
- * call-seq:
445
- * reader.read_attribute_value -> code
446
- *
447
- * Parse an attribute value into one or more Text and EntityReference nodes.
448
- *
449
- * Return 1 in case of success, 0 if the reader was not positionned on an
450
- * attribute node or all the attribute values have been read, or -1 in case of
451
- * error.
452
- */
453
- static VALUE rxml_reader_read_attr_value(VALUE self)
454
- {
455
- xmlTextReaderPtr xreader = rxml_text_reader_get(self);
456
- return INT2FIX(xmlTextReaderReadAttributeValue(xreader));
457
- }
458
-
459
- /*
460
- * call-seq:
461
- * reader.read_inner_xml -> data
462
- *
463
- * Read the contents of the current node, including child nodes and markup.
464
- *
465
- * Return a string containing the XML content, or nil if the current node is
466
- * neither an element nor attribute, or has no child nodes.
467
- */
468
- static VALUE rxml_reader_read_inner_xml(VALUE self)
469
- {
470
- VALUE result = Qnil;
471
- xmlTextReaderPtr xReader = rxml_text_reader_get(self);
472
-
473
- xmlChar *xml = xmlTextReaderReadInnerXml(xReader);
474
-
475
- if (xml)
476
- {
477
- const xmlChar *xencoding = xmlTextReaderConstEncoding(xReader);
478
- result = rxml_str_new2((const char*) xml, xencoding);
479
- xmlFree(xml);
480
- }
481
-
482
- return result;
483
- }
484
-
485
- /*
486
- * call-seq:
487
- * reader.read_outer_xml -> data
488
- *
489
- * Read the contents of the current node, including child nodes and markup.
490
- *
491
- * Return a string containing the XML content, or nil if the current node is
492
- * neither an element nor attribute, or has no child nodes.
493
- */
494
- static VALUE rxml_reader_read_outer_xml(VALUE self)
495
- {
496
- VALUE result = Qnil;
497
- xmlTextReaderPtr xReader = rxml_text_reader_get(self);
498
-
499
- xmlChar *xml = xmlTextReaderReadOuterXml(xReader);
500
-
501
- if (xml)
502
- {
503
- const xmlChar *xencoding = xmlTextReaderConstEncoding(xReader);
504
- result = rxml_str_new2((const char*) xml, xencoding);
505
- xmlFree(xml);
506
- }
507
-
508
- return result;
509
- }
510
-
511
- /*
512
- * call-seq:
513
- * reader.read_state -> state
514
- *
515
- * Get the read state of the reader.
516
- */
517
- static VALUE rxml_reader_read_state(VALUE self)
518
- {
519
- xmlTextReaderPtr xreader = rxml_text_reader_get(self);
520
- return INT2FIX(xmlTextReaderReadState(xreader));
521
- }
522
-
523
- /*
524
- * call-seq:
525
- * reader.read_string -> string
526
- *
527
- * Read the contents of an element or a text node as a string.
528
- *
529
- * Return a string containing the contents of the Element or Text node, or nil
530
- * if the reader is positioned on any other type of node.
531
- */
532
- static VALUE rxml_reader_read_string(VALUE self)
533
- {
534
- VALUE result = Qnil;
535
- xmlTextReaderPtr xReader = rxml_text_reader_get(self);
536
-
537
- xmlChar *xml = xmlTextReaderReadString(xReader);
538
-
539
- if (xml)
540
- {
541
- const xmlChar *xencoding = xmlTextReaderConstEncoding(xReader);
542
- result = rxml_str_new2((const char*) xml, xencoding);
543
- xmlFree(xml);
544
- }
545
-
546
- return result;
547
- }
548
-
549
- /*
550
- * call-seq:
551
- * reader.relax_ng_validate(rng) -> code
552
- *
553
- * Use RelaxNG to validate the document as it is processed. Activation is only
554
- * possible before the first read. If +rng+ is nil, the RelaxNG validation is
555
- * desactivated.
556
- *
557
- * Return 0 in case the RelaxNG validation could be (des)activated and -1 in
558
- * case of error.
559
- */
560
- static VALUE rxml_reader_relax_ng_validate(VALUE self, VALUE rng)
561
- {
562
- char *xrng = NIL_P(rng) ? NULL : StringValueCStr(rng);
563
- xmlTextReaderPtr xreader = rxml_text_reader_get(self);
564
- return INT2FIX(xmlTextReaderRelaxNGValidate(xreader, xrng));
565
- }
566
-
567
- #if LIBXML_VERSION >= 20620
568
- /*
569
- * call-seq:
570
- * reader.schema_validate(schema) -> code
571
- *
572
- * Use W3C XSD schema to validate the document as it is processed. Activation
573
- * is only possible before the first read. If +schema+ is nil, then XML Schema
574
- * validation is desactivated.
575
- *
576
- * Return 0 in case the schemas validation could be (de)activated and -1 in
577
- * case of error.
578
- */
579
- static VALUE
580
- rxml_reader_schema_validate(VALUE self, VALUE xsd)
581
- {
582
- char *xxsd = NIL_P(xsd) ? NULL : StringValueCStr(xsd);
583
- xmlTextReaderPtr xreader = rxml_text_reader_get(self);
584
- int status = xmlTextReaderSchemaValidate(xreader, xxsd);
585
- return INT2FIX(status);
586
- }
587
- #endif
588
-
589
- /*
590
- * call-seq:
591
- * reader.name -> name
592
- *
593
- * Return the qualified name of the node.
594
- */
595
- static VALUE rxml_reader_name(VALUE self)
596
- {
597
- xmlTextReaderPtr xReader = rxml_text_reader_get(self);
598
- const xmlChar *result = xmlTextReaderConstName(xReader);
599
- const xmlChar *xencoding = xmlTextReaderConstEncoding(xReader);
600
-
601
- return (result == NULL ? Qnil : rxml_str_new2(result, xencoding));
602
- }
603
-
604
- /*
605
- * call-seq:
606
- * reader.local_name -> name
607
- *
608
- * Return the local name of the node.
609
- */
610
- static VALUE rxml_reader_local_name(VALUE self)
611
- {
612
- xmlTextReaderPtr xReader = rxml_text_reader_get(self);
613
- const xmlChar *result = xmlTextReaderConstLocalName(xReader);
614
- const xmlChar *xencoding = xmlTextReaderConstEncoding(xReader);
615
-
616
- return (result == NULL ? Qnil : rxml_str_new2(result, xencoding));
617
- }
618
-
619
- /*
620
- * call-seq:
621
- * reader.attribute_count -> count
622
- *
623
- * Provide the number of attributes of the current node.
624
- */
625
- static VALUE rxml_reader_attr_count(VALUE self)
626
- {
627
- xmlTextReaderPtr xreader = rxml_text_reader_get(self);
628
- return INT2FIX(xmlTextReaderAttributeCount(xreader));
629
- }
630
-
631
- /*
632
- * call-seq:
633
- * reader.encoding -> XML::Encoding::UTF_8
634
- *
635
- * Returns the encoding of the document being read. Note you
636
- * first have to read data from the reader for encoding
637
- * to return a value
638
- *
639
- * reader = XML::Reader.file(XML_FILE)
640
- * assert_nil(reader.encoding)
641
- * reader.read
642
- * assert_equal(XML::Encoding::UTF_8, reader.encoding)
643
- *
644
- * In addition, libxml always appears to return nil for the encoding
645
- * when parsing strings.
646
- */
647
- static VALUE rxml_reader_encoding(VALUE self)
648
- {
649
- xmlTextReaderPtr xreader = rxml_text_reader_get(self);
650
- const xmlChar *xencoding = xmlTextReaderConstEncoding(xreader);
651
- if (xencoding)
652
- return INT2NUM(xmlParseCharEncoding(xencoding));
653
- else
654
- return INT2NUM(XML_CHAR_ENCODING_NONE);
655
- }
656
-
657
- /*
658
- * call-seq:
659
- * reader.base_uri -> URI
660
- *
661
- * Determine the base URI of the node.
662
- */
663
- static VALUE rxml_reader_base_uri(VALUE self)
664
- {
665
- xmlTextReaderPtr xReader = rxml_text_reader_get(self);
666
- const xmlChar *result = xmlTextReaderConstBaseUri(xReader);
667
- const xmlChar *xencoding = xmlTextReaderConstEncoding(xReader);
668
-
669
- return (result == NULL ? Qnil : rxml_str_new2(result, xencoding));
670
- }
671
-
672
- /*
673
- * call-seq:
674
- * reader.namespace_uri -> URI
675
- *
676
- * Determine the namespace URI of the node.
677
- */
678
- static VALUE rxml_reader_namespace_uri(VALUE self)
679
- {
680
- xmlTextReaderPtr xReader = rxml_text_reader_get(self);
681
- const xmlChar *result = xmlTextReaderConstNamespaceUri(xReader);
682
- const xmlChar *xencoding = xmlTextReaderConstEncoding(xReader);
683
-
684
- return (result == NULL ? Qnil : rxml_str_new2(result, xencoding));
685
- }
686
-
687
- /*
688
- * call-seq:
689
- * reader.value -> text
690
- *
691
- * Provide the text value of the node if present.
692
- */
693
- static VALUE rxml_reader_value(VALUE self)
694
- {
695
- xmlTextReaderPtr xReader = rxml_text_reader_get(self);
696
- const xmlChar *result = xmlTextReaderConstValue(xReader);
697
- const xmlChar *xencoding = xmlTextReaderConstEncoding(xReader);
698
-
699
- return (result == NULL ? Qnil : rxml_str_new2(result, xencoding));
700
- }
701
-
702
- /*
703
- * call-seq:
704
- * reader.prefix -> prefix
705
- *
706
- * Get a shorthand reference to the namespace associated with the node.
707
- */
708
- static VALUE rxml_reader_prefix(VALUE self)
709
- {
710
- xmlTextReaderPtr xReader = rxml_text_reader_get(self);
711
- const xmlChar *result = xmlTextReaderConstPrefix(xReader);
712
- const xmlChar *xencoding = xmlTextReaderConstEncoding(xReader);
713
-
714
- return (result == NULL ? Qnil : rxml_str_new2(result, xencoding));
715
- }
716
-
717
- /*
718
- * call-seq:
719
- * reader.depth -> depth
720
- *
721
- * Get the depth of the node in the tree.
722
- */
723
- static VALUE rxml_reader_depth(VALUE self)
724
- {
725
- xmlTextReaderPtr xreader = rxml_text_reader_get(self);
726
- return INT2FIX(xmlTextReaderDepth(xreader));
727
- }
728
-
729
- /*
730
- * call-seq:
731
- * reader.quote_char -> char
732
- *
733
- * Get the quotation mark character used to enclose the value of an attribute,
734
- * as an integer value (and -1 in case of error).
735
- */
736
- static VALUE rxml_reader_quote_char(VALUE self)
737
- {
738
- xmlTextReaderPtr xreader = rxml_text_reader_get(self);
739
- return INT2FIX(xmlTextReaderQuoteChar(xreader));
740
- }
741
-
742
- /*
743
- * call-seq:
744
- * reader.standalone -> code
745
- *
746
- * Determine the standalone status of the document being read.
747
- *
748
- * Return 1 if the document was declared to be standalone, 0 if it was
749
- * declared to be not standalone, or -1 if the document did not specify its
750
- * standalone status or in case of error.
751
- */
752
- static VALUE rxml_reader_standalone(VALUE self)
753
- {
754
- xmlTextReaderPtr xreader = rxml_text_reader_get(self);
755
- return INT2FIX(xmlTextReaderStandalone(xreader));
756
- }
757
-
758
- /*
759
- * call-seq:
760
- * reader.xml_lang -> value
761
- *
762
- * Get the xml:lang scope within which the node resides.
763
- */
764
- static VALUE rxml_reader_xml_lang(VALUE self)
765
- {
766
- xmlTextReaderPtr xReader = rxml_text_reader_get(self);
767
- const xmlChar *result = xmlTextReaderConstXmlLang(xReader);
768
- const xmlChar *xencoding = xmlTextReaderConstEncoding(xReader);
769
-
770
- return (result == NULL ? Qnil : rxml_str_new2(result, xencoding));
771
- }
772
-
773
- /*
774
- * call-seq:
775
- * reader.xml_version -> version
776
- *
777
- * Determine the XML version of the document being read.
778
- */
779
- static VALUE rxml_reader_xml_version(VALUE self)
780
- {
781
- xmlTextReaderPtr xReader = rxml_text_reader_get(self);
782
- const xmlChar *result = xmlTextReaderConstXmlVersion(xReader);
783
- const xmlChar *xencoding = xmlTextReaderConstEncoding(xReader);
784
-
785
- return (result == NULL ? Qnil : rxml_str_new2(result, xencoding));
786
- }
787
-
788
- /*
789
- * call-seq:
790
- * reader.has_attributes? -> bool
791
- *
792
- * Get whether the node has attributes.
793
- */
794
- static VALUE rxml_reader_has_attributes(VALUE self)
795
- {
796
- xmlTextReaderPtr xreader = rxml_text_reader_get(self);
797
- return xmlTextReaderHasAttributes(xreader) ? Qtrue : Qfalse;
798
- }
799
-
800
- /*
801
- * call-seq:
802
- * reader.has_value? -> bool
803
- *
804
- * Get whether the node can have a text value.
805
- */
806
- static VALUE rxml_reader_has_value(VALUE self)
807
- {
808
- xmlTextReaderPtr xreader = rxml_text_reader_get(self);
809
- return xmlTextReaderHasValue(xreader) ? Qtrue : Qfalse;
810
- }
811
-
812
- /*
813
- * call-seq:
814
- * reader[key] -> value
815
- *
816
- * Provide the value of the attribute with the specified index (if +key+ is an
817
- * integer) or with the specified name (if +key+ is a string) relative to the
818
- * containing element, as a string.
819
- */
820
- static VALUE rxml_reader_attribute(VALUE self, VALUE key)
821
- {
822
- VALUE result = Qnil;
823
- xmlChar *xattr;
824
- xmlTextReaderPtr xReader = rxml_text_reader_get(self);
825
- const xmlChar *xencoding = xmlTextReaderConstEncoding(xReader);
826
-
827
- if (TYPE(key) == T_FIXNUM)
828
- {
829
- xattr = xmlTextReaderGetAttributeNo(xReader, FIX2INT(key));
830
- }
831
- else
832
- {
833
- xattr = xmlTextReaderGetAttribute(xReader, (const xmlChar *) StringValueCStr(key));
834
- }
835
-
836
- if (xattr)
837
- {
838
- result = rxml_str_new2(xattr, xencoding);
839
- xmlFree(xattr);
840
- }
841
- return result;
842
- }
843
-
844
- /*
845
- * call-seq:
846
- * reader.lookup_namespace(prefix) -> value
847
- *
848
- * Resolve a namespace prefix in the scope of the current element.
849
- * To return the default namespace, specify nil as +prefix+.
850
- */
851
- static VALUE rxml_reader_lookup_namespace(VALUE self, VALUE prefix)
852
- {
853
- VALUE result = Qnil;
854
- xmlTextReaderPtr xReader = rxml_text_reader_get(self);
855
- const xmlChar *xnamespace = xmlTextReaderLookupNamespace(xReader, (const xmlChar *) StringValueCStr(prefix));
856
- const xmlChar *xencoding = xmlTextReaderConstEncoding(xReader);
857
-
858
- if (xnamespace)
859
- {
860
- result = rxml_str_new2((const char*)xnamespace, (const char*)xencoding);
861
- xmlFree((void *)xnamespace);
862
- }
863
- return result;
864
- }
865
-
866
- /*
867
- * call-seq:
868
- * reader.expand -> node
869
- *
870
- * Returns the current node and its full subtree. Note the returned node
871
- * is valid ONLY until the next read call.
872
- */
873
- static VALUE rxml_reader_expand(VALUE self)
874
- {
875
- xmlTextReaderPtr xreader = rxml_text_reader_get(self);
876
- xmlNodePtr xnode = xmlTextReaderExpand(xreader);
877
-
878
- if (!xnode)
879
- {
880
- return Qnil;
881
- }
882
- else
883
- {
884
- return rxml_node_wrap(xnode);
885
- }
886
- }
887
-
888
- #if LIBXML_VERSION >= 20618
889
- /*
890
- * call-seq:
891
- * reader.byte_consumed -> value
892
- *
893
- * This method provides the current index of the parser used by the reader,
894
- * relative to the start of the current entity.
895
- */
896
- static VALUE
897
- rxml_reader_byte_consumed(VALUE self)
898
- {
899
- xmlTextReaderPtr xreader = rxml_text_reader_get(self);
900
- return INT2NUM(xmlTextReaderByteConsumed(xreader));
901
- }
902
- #endif
903
-
904
- #if LIBXML_VERSION >= 20617
905
- /*
906
- * call-seq:
907
- * reader.column_number -> number
908
- *
909
- * Provide the column number of the current parsing point.
910
- */
911
- static VALUE
912
- rxml_reader_column_number(VALUE self)
913
- {
914
- xmlTextReaderPtr xreader = rxml_text_reader_get(self);
915
- return INT2NUM(xmlTextReaderGetParserColumnNumber(xreader));
916
- }
917
-
918
- /*
919
- * call-seq:
920
- * reader.line_number -> number
921
- *
922
- * Provide the line number of the current parsing point.
923
- */
924
- static VALUE
925
- rxml_reader_line_number(VALUE self)
926
- {
927
- xmlTextReaderPtr xreader = rxml_text_reader_get(self);
928
- return INT2NUM(xmlTextReaderGetParserLineNumber(xreader));
929
- }
930
- #endif
931
-
932
- /*
933
- * call-seq:
934
- * reader.default? -> bool
935
- *
936
- * Return whether an Attribute node was generated from the default value
937
- * defined in the DTD or schema.
938
- */
939
- static VALUE rxml_reader_default(VALUE self)
940
- {
941
- xmlTextReaderPtr xreader = rxml_text_reader_get(self);
942
- return xmlTextReaderIsDefault(xreader) ? Qtrue : Qfalse;
943
- }
944
-
945
- /*
946
- * call-seq:
947
- * reader.namespace_declaration? -> bool
948
- *
949
- * Determine whether the current node is a namespace declaration rather than a
950
- * regular attribute.
951
- */
952
- static VALUE rxml_reader_namespace_declaration(VALUE self)
953
- {
954
- xmlTextReaderPtr xreader = rxml_text_reader_get(self);
955
- return xmlTextReaderIsNamespaceDecl(xreader) ? Qtrue : Qfalse;
956
- }
957
-
958
- /*
959
- * call-seq:
960
- * reader.empty_element? -> bool
961
- *
962
- * Check if the current node is empty.
963
- */
964
- static VALUE rxml_reader_empty_element(VALUE self)
965
- {
966
- xmlTextReaderPtr xreader = rxml_text_reader_get(self);
967
- return xmlTextReaderIsEmptyElement(xreader) ? Qtrue : Qfalse;
968
- }
969
-
970
- /*
971
- * call-seq:
972
- * reader.valid? -> bool
973
- *
974
- * Retrieve the validity status from the parser context.
975
- */
976
- static VALUE rxml_reader_valid(VALUE self)
977
- {
978
- xmlTextReaderPtr xreader = rxml_text_reader_get(self);
979
- return xmlTextReaderIsValid(xreader) ? Qtrue : Qfalse;
980
- }
981
-
982
- void rxml_init_reader(void)
983
- {
984
- BASE_URI_SYMBOL = ID2SYM(rb_intern("base_uri"));
985
- ENCODING_SYMBOL = ID2SYM(rb_intern("encoding"));
986
- IO_ATTR = rb_intern("@io");
987
- OPTIONS_SYMBOL = ID2SYM(rb_intern("options"));
988
-
989
- cXMLReader = rb_define_class_under(mXML, "Reader", rb_cObject);
990
-
991
- rb_define_singleton_method(cXMLReader, "document", rxml_reader_document, 1);
992
- rb_define_singleton_method(cXMLReader, "file", rxml_reader_file, -1);
993
- rb_define_singleton_method(cXMLReader, "io", rxml_reader_io, -1);
994
- rb_define_singleton_method(cXMLReader, "string", rxml_reader_string, -1);
995
-
996
- rb_define_method(cXMLReader, "[]", rxml_reader_attribute, 1);
997
- rb_define_method(cXMLReader, "attribute_count", rxml_reader_attr_count, 0);
998
- rb_define_method(cXMLReader, "base_uri", rxml_reader_base_uri, 0);
999
- #if LIBXML_VERSION >= 20618
1000
- rb_define_method(cXMLReader, "byte_consumed", rxml_reader_byte_consumed, 0);
1001
- #endif
1002
- rb_define_method(cXMLReader, "close", rxml_reader_close, 0);
1003
- #if LIBXML_VERSION >= 20617
1004
- rb_define_method(cXMLReader, "column_number", rxml_reader_column_number, 0);
1005
- #endif
1006
- rb_define_method(cXMLReader, "depth", rxml_reader_depth, 0);
1007
- rb_define_method(cXMLReader, "encoding", rxml_reader_encoding, 0);
1008
- rb_define_method(cXMLReader, "expand", rxml_reader_expand, 0);
1009
- rb_define_method(cXMLReader, "has_attributes?", rxml_reader_has_attributes, 0);
1010
- rb_define_method(cXMLReader, "has_value?", rxml_reader_has_value, 0);
1011
- #if LIBXML_VERSION >= 20617
1012
- rb_define_method(cXMLReader, "line_number", rxml_reader_line_number, 0);
1013
- #endif
1014
- rb_define_method(cXMLReader, "local_name", rxml_reader_local_name, 0);
1015
- rb_define_method(cXMLReader, "lookup_namespace", rxml_reader_lookup_namespace, 1);
1016
- rb_define_method(cXMLReader, "move_to_attribute", rxml_reader_move_to_attr, 1);
1017
- rb_define_method(cXMLReader, "move_to_first_attribute", rxml_reader_move_to_first_attr, 0);
1018
- rb_define_method(cXMLReader, "move_to_next_attribute", rxml_reader_move_to_next_attr, 0);
1019
- rb_define_method(cXMLReader, "move_to_element", rxml_reader_move_to_element, 0);
1020
- rb_define_method(cXMLReader, "name", rxml_reader_name, 0);
1021
- rb_define_method(cXMLReader, "namespace_uri", rxml_reader_namespace_uri, 0);
1022
- rb_define_method(cXMLReader, "next", rxml_reader_next, 0);
1023
- rb_define_method(cXMLReader, "next_sibling", rxml_reader_next_sibling, 0);
1024
- rb_define_method(cXMLReader, "node", rxml_reader_node, 0);
1025
- rb_define_method(cXMLReader, "node_type", rxml_reader_node_type, 0);
1026
- rb_define_method(cXMLReader, "normalization", rxml_reader_normalization, 0);
1027
- rb_define_method(cXMLReader, "prefix", rxml_reader_prefix, 0);
1028
- rb_define_method(cXMLReader, "quote_char", rxml_reader_quote_char, 0);
1029
- rb_define_method(cXMLReader, "read", rxml_reader_read, 0);
1030
- rb_define_method(cXMLReader, "read_attribute_value", rxml_reader_read_attr_value, 0);
1031
- rb_define_method(cXMLReader, "read_inner_xml", rxml_reader_read_inner_xml, 0);
1032
- rb_define_method(cXMLReader, "read_outer_xml", rxml_reader_read_outer_xml, 0);
1033
- rb_define_method(cXMLReader, "read_state", rxml_reader_read_state, 0);
1034
- rb_define_method(cXMLReader, "read_string", rxml_reader_read_string, 0);
1035
- rb_define_method(cXMLReader, "relax_ng_validate", rxml_reader_relax_ng_validate, 1);
1036
- rb_define_method(cXMLReader, "standalone", rxml_reader_standalone, 0);
1037
- #if LIBXML_VERSION >= 20620
1038
- rb_define_method(cXMLReader, "schema_validate", rxml_reader_schema_validate, 1);
1039
- #endif
1040
- rb_define_method(cXMLReader, "value", rxml_reader_value, 0);
1041
- rb_define_method(cXMLReader, "xml_lang", rxml_reader_xml_lang, 0);
1042
- rb_define_method(cXMLReader, "xml_version", rxml_reader_xml_version, 0);
1043
- rb_define_method(cXMLReader, "default?", rxml_reader_default, 0);
1044
- rb_define_method(cXMLReader, "empty_element?", rxml_reader_empty_element, 0);
1045
- rb_define_method(cXMLReader, "namespace_declaration?", rxml_reader_namespace_declaration, 0);
1046
- rb_define_method(cXMLReader, "valid?", rxml_reader_valid, 0);
1047
-
1048
- /* Constants */
1049
- rb_define_const(cXMLReader, "LOADDTD", INT2FIX(XML_PARSER_LOADDTD));
1050
- rb_define_const(cXMLReader, "DEFAULTATTRS", INT2FIX(XML_PARSER_DEFAULTATTRS));
1051
- rb_define_const(cXMLReader, "VALIDATE", INT2FIX(XML_PARSER_VALIDATE));
1052
- rb_define_const(cXMLReader, "SUBST_ENTITIES", INT2FIX(XML_PARSER_SUBST_ENTITIES));
1053
-
1054
- rb_define_const(cXMLReader, "SEVERITY_VALIDITY_WARNING", INT2FIX(XML_PARSER_SEVERITY_VALIDITY_WARNING));
1055
- rb_define_const(cXMLReader, "SEVERITY_VALIDITY_ERROR", INT2FIX(XML_PARSER_SEVERITY_VALIDITY_ERROR));
1056
- rb_define_const(cXMLReader, "SEVERITY_WARNING", INT2FIX(XML_PARSER_SEVERITY_WARNING));
1057
- rb_define_const(cXMLReader, "SEVERITY_ERROR", INT2FIX(XML_PARSER_SEVERITY_ERROR));
1058
-
1059
- rb_define_const(cXMLReader, "TYPE_NONE", INT2FIX(XML_READER_TYPE_NONE));
1060
- rb_define_const(cXMLReader, "TYPE_ELEMENT", INT2FIX(XML_READER_TYPE_ELEMENT));
1061
- rb_define_const(cXMLReader, "TYPE_ATTRIBUTE", INT2FIX(XML_READER_TYPE_ATTRIBUTE));
1062
- rb_define_const(cXMLReader, "TYPE_TEXT", INT2FIX(XML_READER_TYPE_TEXT));
1063
- rb_define_const(cXMLReader, "TYPE_CDATA", INT2FIX(XML_READER_TYPE_CDATA));
1064
- rb_define_const(cXMLReader, "TYPE_ENTITY_REFERENCE", INT2FIX(XML_READER_TYPE_ENTITY_REFERENCE));
1065
- rb_define_const(cXMLReader, "TYPE_ENTITY", INT2FIX(XML_READER_TYPE_ENTITY));
1066
- rb_define_const(cXMLReader, "TYPE_PROCESSING_INSTRUCTION", INT2FIX(XML_READER_TYPE_PROCESSING_INSTRUCTION));
1067
- rb_define_const(cXMLReader, "TYPE_COMMENT", INT2FIX(XML_READER_TYPE_COMMENT));
1068
- rb_define_const(cXMLReader, "TYPE_DOCUMENT", INT2FIX(XML_READER_TYPE_DOCUMENT));
1069
- rb_define_const(cXMLReader, "TYPE_DOCUMENT_TYPE", INT2FIX(XML_READER_TYPE_DOCUMENT_TYPE));
1070
- rb_define_const(cXMLReader, "TYPE_DOCUMENT_FRAGMENT", INT2FIX(XML_READER_TYPE_DOCUMENT_FRAGMENT));
1071
- rb_define_const(cXMLReader, "TYPE_NOTATION", INT2FIX(XML_READER_TYPE_NOTATION));
1072
- rb_define_const(cXMLReader, "TYPE_WHITESPACE", INT2FIX(XML_READER_TYPE_WHITESPACE));
1073
- rb_define_const(cXMLReader, "TYPE_SIGNIFICANT_WHITESPACE", INT2FIX(XML_READER_TYPE_SIGNIFICANT_WHITESPACE));
1074
- rb_define_const(cXMLReader, "TYPE_END_ELEMENT", INT2FIX(XML_READER_TYPE_END_ELEMENT));
1075
- rb_define_const(cXMLReader, "TYPE_END_ENTITY", INT2FIX(XML_READER_TYPE_END_ENTITY));
1076
- rb_define_const(cXMLReader, "TYPE_XML_DECLARATION", INT2FIX(XML_READER_TYPE_XML_DECLARATION));
1077
-
1078
- /* Read states */
1079
- rb_define_const(cXMLReader, "MODE_INITIAL", INT2FIX(XML_TEXTREADER_MODE_INITIAL));
1080
- rb_define_const(cXMLReader, "MODE_INTERACTIVE", INT2FIX(XML_TEXTREADER_MODE_INTERACTIVE));
1081
- rb_define_const(cXMLReader, "MODE_ERROR", INT2FIX(XML_TEXTREADER_MODE_ERROR));
1082
- rb_define_const(cXMLReader, "MODE_EOF", INT2FIX(XML_TEXTREADER_MODE_EOF));
1083
- rb_define_const(cXMLReader, "MODE_CLOSED", INT2FIX(XML_TEXTREADER_MODE_CLOSED));
1084
- rb_define_const(cXMLReader, "MODE_READING", INT2FIX(XML_TEXTREADER_MODE_READING));
1085
- }
1
+ /* Copyright (c) 2006-2007 Apple Inc.
2
+ * Please see the LICENSE file for copyright and distribution information. */
3
+
4
+ #include "ruby_libxml.h"
5
+ #include "ruby_xml_reader.h"
6
+
7
+ /*
8
+ * Document-class: LibXML::XML::Reader
9
+ *
10
+ * The XML::Reader class provides a simpler, alternative way of parsing an XML
11
+ * document in contrast to XML::Parser or XML::SaxParser. A XML::Reader instance
12
+ * acts like a cursor going forward in a document stream, stopping at each node
13
+ * it encounters. To advance to the next node, simply cadd XML::Reader#read.
14
+ *
15
+ * The XML::Reader API closely matches the DOM Core specification and supports
16
+ * namespaces, xml:base, entity handling and DTDs.
17
+ *
18
+ * To summarize, XML::Reader provides a far simpler API to use versus XML::SaxParser
19
+ * and is more memory efficient than using XML::Parser to create a DOM tree.
20
+ *
21
+ * Example:
22
+ *
23
+ * parser = XML::Reader.string("<foo><bar>1</bar><bar>2</bar><bar>3</bar></foo>")
24
+ * reader.read
25
+ * assert_equal('foo', reader.name)
26
+ * assert_equal(nil, reader.value)
27
+ *
28
+ * 3.times do |i|
29
+ * reader.read
30
+ * assert_equal(XML::Reader::TYPE_ELEMENT, reader.node_type)
31
+ * assert_equal('bar', reader.name)
32
+ * reader.read
33
+ * assert_equal(XML::Reader::TYPE_TEXT, reader.node_type)
34
+ * assert_equal((i + 1).to_s, reader.value)
35
+ * reader.read
36
+ * assert_equal(XML::Reader::TYPE_END_ELEMENT, reader.node_type)
37
+ * end
38
+ *
39
+ * You can also parse documents (see XML::Reader.document),
40
+ * strings (see XML::Parser.string) and io objects (see
41
+ * XML::Parser.io).
42
+ *
43
+ * For a more in depth tutorial, albeit in C, see http://xmlsoft.org/xmlreader.html.*/
44
+
45
+ VALUE cXMLReader;
46
+
47
+ static ID BASE_URI_SYMBOL;
48
+ static ID ENCODING_SYMBOL;
49
+ static ID IO_ATTR;
50
+ static ID OPTIONS_SYMBOL;
51
+
52
+ static void rxml_reader_free(xmlTextReaderPtr xreader)
53
+ {
54
+ xmlFreeTextReader(xreader);
55
+ }
56
+
57
+ static VALUE rxml_reader_wrap(xmlTextReaderPtr xreader)
58
+ {
59
+ return Data_Wrap_Struct(cXMLReader, NULL, rxml_reader_free, xreader);
60
+ }
61
+
62
+ static xmlTextReaderPtr rxml_text_reader_get(VALUE obj)
63
+ {
64
+ xmlTextReaderPtr xreader;
65
+ Data_Get_Struct(obj, xmlTextReader, xreader);
66
+ return xreader;
67
+ }
68
+
69
+ /*
70
+ * call-seq:
71
+ * XML::Reader.document(doc) -> XML::Reader
72
+ *
73
+ * Create an new reader for the specified document.
74
+ */
75
+ VALUE rxml_reader_document(VALUE klass, VALUE doc)
76
+ {
77
+ xmlDocPtr xdoc;
78
+ xmlTextReaderPtr xreader;
79
+
80
+ Data_Get_Struct(doc, xmlDoc, xdoc);
81
+
82
+ xreader = xmlReaderWalker(xdoc);
83
+
84
+ if (xreader == NULL)
85
+ rxml_raise(&xmlLastError);
86
+
87
+ return rxml_reader_wrap(xreader);
88
+ }
89
+
90
+ /* call-seq:
91
+ * XML::Reader.file(path) -> XML::Reader
92
+ * XML::Reader.file(path, :encoding => XML::Encoding::UTF_8,
93
+ * :options => XML::Parser::Options::NOENT) -> XML::Parser
94
+ *
95
+ * Creates a new reader by parsing the specified file or uri.
96
+ *
97
+ * You may provide an optional hash table to control how the
98
+ * parsing is performed. Valid options are:
99
+ *
100
+ * encoding - The document encoding, defaults to nil. Valid values
101
+ * are the encoding constants defined on XML::Encoding.
102
+ * options - Controls the execution of the parser, defaults to 0.
103
+ * Valid values are the constants defined on
104
+ * XML::Parser::Options. Mutliple options can be combined
105
+ * by using Bitwise OR (|).
106
+ */
107
+ static VALUE rxml_reader_file(int argc, VALUE *argv, VALUE klass)
108
+ {
109
+ xmlTextReaderPtr xreader;
110
+ VALUE path;
111
+ VALUE options;
112
+
113
+ const char *xencoding = NULL;
114
+ int xoptions = 0;
115
+
116
+ rb_scan_args(argc, argv, "11", &path, &options);
117
+ Check_Type(path, T_STRING);
118
+
119
+ if (!NIL_P(options))
120
+ {
121
+ VALUE encoding = Qnil;
122
+ VALUE parserOptions = Qnil;
123
+
124
+ Check_Type(options, T_HASH);
125
+
126
+ encoding = rb_hash_aref(options, BASE_URI_SYMBOL);
127
+ xencoding = NIL_P(encoding) ? NULL : xmlGetCharEncodingName(NUM2INT(encoding));
128
+
129
+ parserOptions = rb_hash_aref(options, OPTIONS_SYMBOL);
130
+ xoptions = NIL_P(parserOptions) ? 0 : NUM2INT(parserOptions);
131
+ }
132
+
133
+ xreader = xmlReaderForFile(StringValueCStr(path), xencoding, xoptions);
134
+
135
+ if (xreader == NULL)
136
+ rxml_raise(&xmlLastError);
137
+
138
+ return rxml_reader_wrap(xreader);
139
+ }
140
+
141
+ /* call-seq:
142
+ * XML::Reader.io(io) -> XML::Reader
143
+ * XML::Reader.io(io, :encoding => XML::Encoding::UTF_8,
144
+ * :options => XML::Parser::Options::NOENT) -> XML::Parser
145
+ *
146
+ * Creates a new reader by parsing the specified io object.
147
+ *
148
+ * You may provide an optional hash table to control how the
149
+ * parsing is performed. Valid options are:
150
+ *
151
+ * base_uri - The base url for the parsed document.
152
+ * encoding - The document encoding, defaults to nil. Valid values
153
+ * are the encoding constants defined on XML::Encoding.
154
+ * options - Controls the execution of the parser, defaults to 0.
155
+ * Valid values are the constants defined on
156
+ * XML::Parser::Options. Mutliple options can be combined
157
+ * by using Bitwise OR (|).
158
+ */
159
+ static VALUE rxml_reader_io(int argc, VALUE *argv, VALUE klass)
160
+ {
161
+ xmlTextReaderPtr xreader;
162
+ VALUE result;
163
+ VALUE io;
164
+ VALUE options;
165
+ char *xbaseurl = NULL;
166
+ const char *xencoding = NULL;
167
+ int xoptions = 0;
168
+
169
+ rb_scan_args(argc, argv, "11", &io, &options);
170
+
171
+ if (!NIL_P(options))
172
+ {
173
+ VALUE baseurl = Qnil;
174
+ VALUE encoding = Qnil;
175
+ VALUE parserOptions = Qnil;
176
+
177
+ Check_Type(options, T_HASH);
178
+
179
+ baseurl = rb_hash_aref(options, BASE_URI_SYMBOL);
180
+ xbaseurl = NIL_P(baseurl) ? NULL : StringValueCStr(baseurl);
181
+
182
+ encoding = rb_hash_aref(options, ENCODING_SYMBOL);
183
+ xencoding = NIL_P(encoding) ? NULL : xmlGetCharEncodingName(NUM2INT(encoding));
184
+
185
+ parserOptions = rb_hash_aref(options, OPTIONS_SYMBOL);
186
+ xoptions = NIL_P(parserOptions) ? 0 : NUM2INT(parserOptions);
187
+ }
188
+
189
+ xreader = xmlReaderForIO((xmlInputReadCallback) rxml_read_callback, NULL,
190
+ (void *) io,
191
+ xbaseurl, xencoding, xoptions);
192
+
193
+ if (xreader == NULL)
194
+ rxml_raise(&xmlLastError);
195
+
196
+ result = rxml_reader_wrap(xreader);
197
+
198
+ /* Attach io object to parser so it won't get freed.*/
199
+ rb_ivar_set(result, IO_ATTR, io);
200
+
201
+ return result;
202
+ }
203
+
204
+ /* call-seq:
205
+ * XML::Reader.string(io) -> XML::Reader
206
+ * XML::Reader.string(io, :encoding => XML::Encoding::UTF_8,
207
+ * :options => XML::Parser::Options::NOENT) -> XML::Parser
208
+ *
209
+ * Creates a new reader by parsing the specified string.
210
+ *
211
+ * You may provide an optional hash table to control how the
212
+ * parsing is performed. Valid options are:
213
+ *
214
+ * base_uri - The base url for the parsed document.
215
+ * encoding - The document encoding, defaults to nil. Valid values
216
+ * are the encoding constants defined on XML::Encoding.
217
+ * options - Controls the execution of the parser, defaults to 0.
218
+ * Valid values are the constants defined on
219
+ * XML::Parser::Options. Mutliple options can be combined
220
+ * by using Bitwise OR (|).
221
+ */
222
+ static VALUE rxml_reader_string(int argc, VALUE *argv, VALUE klass)
223
+ {
224
+ xmlTextReaderPtr xreader;
225
+ VALUE string;
226
+ VALUE options;
227
+ char *xbaseurl = NULL;
228
+ const char *xencoding = NULL;
229
+ int xoptions = 0;
230
+
231
+ rb_scan_args(argc, argv, "11", &string, &options);
232
+ Check_Type(string, T_STRING);
233
+
234
+ if (!NIL_P(options))
235
+ {
236
+ VALUE baseurl = Qnil;
237
+ VALUE encoding = Qnil;
238
+ VALUE parserOptions = Qnil;
239
+
240
+ Check_Type(options, T_HASH);
241
+
242
+ baseurl = rb_hash_aref(options, BASE_URI_SYMBOL);
243
+ xbaseurl = NIL_P(baseurl) ? NULL : StringValueCStr(baseurl);
244
+
245
+ encoding = rb_hash_aref(options, ENCODING_SYMBOL);
246
+ xencoding = NIL_P(encoding) ? NULL : xmlGetCharEncodingName(NUM2INT(encoding));
247
+
248
+ parserOptions = rb_hash_aref(options, OPTIONS_SYMBOL);
249
+ xoptions = NIL_P(parserOptions) ? 0 : NUM2INT(parserOptions);
250
+ }
251
+
252
+ xreader = xmlReaderForMemory(StringValueCStr(string), RSTRING_LEN(string),
253
+ xbaseurl, xencoding, xoptions);
254
+
255
+ if (xreader == NULL)
256
+ rxml_raise(&xmlLastError);
257
+
258
+ return rxml_reader_wrap(xreader);
259
+ }
260
+
261
+ /*
262
+ * call-seq:
263
+ * reader.close -> code
264
+ *
265
+ * This method releases any resources allocated by the current instance
266
+ * changes the state to Closed and close any underlying input.
267
+ */
268
+ static VALUE rxml_reader_close(VALUE self)
269
+ {
270
+ xmlTextReaderPtr xreader = rxml_text_reader_get(self);
271
+ return INT2FIX(xmlTextReaderClose(xreader));
272
+ }
273
+
274
+ /*
275
+ * call-seq:
276
+ * reader.move_to_attribute(val) -> code
277
+ *
278
+ * Move the position of the current instance to the attribute with the
279
+ * specified index (if +val+ is an integer) or name (if +val+ is a string)
280
+ * relative to the containing element.
281
+ */
282
+ static VALUE rxml_reader_move_to_attr(VALUE self, VALUE val)
283
+ {
284
+ xmlTextReaderPtr xreader;
285
+ int ret;
286
+
287
+ xreader = rxml_text_reader_get(self);
288
+
289
+ if (TYPE(val) == T_FIXNUM)
290
+ {
291
+ ret = xmlTextReaderMoveToAttributeNo(xreader, FIX2INT(val));
292
+ }
293
+ else
294
+ {
295
+ ret = xmlTextReaderMoveToAttribute(xreader,
296
+ (const xmlChar *) StringValueCStr(val));
297
+ }
298
+
299
+ return INT2FIX(ret);
300
+ }
301
+
302
+ /*
303
+ * call-seq:
304
+ * reader.move_to_first_attribute -> code
305
+ *
306
+ * Move the position of the current instance to the first attribute associated
307
+ * with the current node.
308
+ */
309
+ static VALUE rxml_reader_move_to_first_attr(VALUE self)
310
+ {
311
+ xmlTextReaderPtr xreader = rxml_text_reader_get(self);
312
+ return INT2FIX(xmlTextReaderMoveToFirstAttribute(xreader));
313
+ }
314
+
315
+ /*
316
+ * call-seq:
317
+ * reader.move_to_next_attribute -> code
318
+ *
319
+ * Move the position of the current instance to the next attribute associated
320
+ * with the current node.
321
+ */
322
+ static VALUE rxml_reader_move_to_next_attr(VALUE self)
323
+ {
324
+ xmlTextReaderPtr xreader = rxml_text_reader_get(self);
325
+ return INT2FIX(xmlTextReaderMoveToNextAttribute(xreader));
326
+ }
327
+
328
+ /*
329
+ * call-seq:
330
+ * reader.move_to_element -> code
331
+ *
332
+ * Move the position of the current instance to the node that contains the
333
+ * current attribute node.
334
+ */
335
+ static VALUE rxml_reader_move_to_element(VALUE self)
336
+ {
337
+ xmlTextReaderPtr xreader = rxml_text_reader_get(self);
338
+ return INT2FIX(xmlTextReaderMoveToElement(xreader));
339
+ }
340
+
341
+ /*
342
+ * call-seq:
343
+ * reader.next -> code
344
+ *
345
+ * Skip to the node following the current one in document order while avoiding
346
+ * the subtree if any.
347
+ */
348
+ static VALUE rxml_reader_next(VALUE self)
349
+ {
350
+ xmlTextReaderPtr xreader = rxml_text_reader_get(self);
351
+ return INT2FIX(xmlTextReaderNext(xreader));
352
+ }
353
+
354
+ /*
355
+ * call-seq:
356
+ * reader.next_sibling -> code
357
+ *
358
+ * Skip to the node following the current one in document order while avoiding
359
+ * the subtree if any. Currently implemented only for Readers built on a
360
+ * document.
361
+ */
362
+ static VALUE rxml_reader_next_sibling(VALUE self)
363
+ {
364
+ xmlTextReaderPtr xreader = rxml_text_reader_get(self);
365
+ return INT2FIX(xmlTextReaderNextSibling(xreader));
366
+ }
367
+
368
+ /*
369
+ * call-seq:
370
+ * reader.node -> XML::Node
371
+ *
372
+ * Returns the reader's current node. It will return
373
+ * nil if Reader#read has not yet been called.
374
+ * WARNING - Using this method is dangerous because the
375
+ * the node may be destroyed on the next #read.
376
+ */
377
+ static VALUE rxml_reader_node(VALUE self)
378
+ {
379
+ xmlTextReaderPtr xreader = rxml_text_reader_get(self);
380
+ xmlNodePtr xnode = xmlTextReaderCurrentNode(xreader);
381
+ return xnode ? rxml_node_wrap(xnode) : Qnil;
382
+ }
383
+
384
+ /*
385
+ * call-seq:
386
+ * reader.node_type -> type
387
+ *
388
+ * Get the node type of the current node. Reference:
389
+ * http://dotgnu.org/pnetlib-doc/System/Xml/XmlNodeType.html
390
+ */
391
+ static VALUE rxml_reader_node_type(VALUE self)
392
+ {
393
+ xmlTextReaderPtr xreader = rxml_text_reader_get(self);
394
+ return INT2FIX(xmlTextReaderNodeType(xreader));
395
+ }
396
+
397
+ /*
398
+ * call-seq:
399
+ * reader.normalization -> value
400
+ *
401
+ * The value indicating whether to normalize white space and attribute values.
402
+ * Since attribute value and end of line normalizations are a MUST in the XML
403
+ * specification only the value true is accepted. The broken bahaviour of
404
+ * accepting out of range character entities like &#0; is of course not
405
+ * supported either.
406
+ *
407
+ * Return 1 or -1 in case of error.
408
+ */
409
+ static VALUE rxml_reader_normalization(VALUE self)
410
+ {
411
+ xmlTextReaderPtr xreader = rxml_text_reader_get(self);
412
+ return INT2FIX(xmlTextReaderNormalization(xreader));
413
+ }
414
+
415
+ /*
416
+ * call-seq:
417
+ * reader.read -> code
418
+ *
419
+ * Causes the reader to move to the next node in the stream, exposing its properties.
420
+ *
421
+ * Returns true if a node was successfully read or false if there are no more
422
+ * nodes to read. On errors, an exception is raised.*/
423
+ static VALUE rxml_reader_read(VALUE self)
424
+ {
425
+ xmlTextReaderPtr xreader = rxml_text_reader_get(self);
426
+ int result = xmlTextReaderRead(xreader);
427
+ switch(result)
428
+ {
429
+ case -1:
430
+ rxml_raise(&xmlLastError);
431
+ return Qnil;
432
+ break;
433
+ case 0:
434
+ return Qfalse;
435
+ case 1:
436
+ return Qtrue;
437
+ default:
438
+ rb_raise(rb_eRuntimeError,
439
+ "xmlTextReaderRead did not return -1, 0 or 1. Return value was: %d", result);
440
+ }
441
+ }
442
+
443
+ /*
444
+ * call-seq:
445
+ * reader.read_attribute_value -> code
446
+ *
447
+ * Parse an attribute value into one or more Text and EntityReference nodes.
448
+ *
449
+ * Return 1 in case of success, 0 if the reader was not positionned on an
450
+ * attribute node or all the attribute values have been read, or -1 in case of
451
+ * error.
452
+ */
453
+ static VALUE rxml_reader_read_attr_value(VALUE self)
454
+ {
455
+ xmlTextReaderPtr xreader = rxml_text_reader_get(self);
456
+ return INT2FIX(xmlTextReaderReadAttributeValue(xreader));
457
+ }
458
+
459
+ /*
460
+ * call-seq:
461
+ * reader.read_inner_xml -> data
462
+ *
463
+ * Read the contents of the current node, including child nodes and markup.
464
+ *
465
+ * Return a string containing the XML content, or nil if the current node is
466
+ * neither an element nor attribute, or has no child nodes.
467
+ */
468
+ static VALUE rxml_reader_read_inner_xml(VALUE self)
469
+ {
470
+ VALUE result = Qnil;
471
+ xmlTextReaderPtr xReader = rxml_text_reader_get(self);
472
+
473
+ xmlChar *xml = xmlTextReaderReadInnerXml(xReader);
474
+
475
+ if (xml)
476
+ {
477
+ const xmlChar *xencoding = xmlTextReaderConstEncoding(xReader);
478
+ result = rxml_str_new2((const char*) xml, xencoding);
479
+ xmlFree(xml);
480
+ }
481
+
482
+ return result;
483
+ }
484
+
485
+ /*
486
+ * call-seq:
487
+ * reader.read_outer_xml -> data
488
+ *
489
+ * Read the contents of the current node, including child nodes and markup.
490
+ *
491
+ * Return a string containing the XML content, or nil if the current node is
492
+ * neither an element nor attribute, or has no child nodes.
493
+ */
494
+ static VALUE rxml_reader_read_outer_xml(VALUE self)
495
+ {
496
+ VALUE result = Qnil;
497
+ xmlTextReaderPtr xReader = rxml_text_reader_get(self);
498
+
499
+ xmlChar *xml = xmlTextReaderReadOuterXml(xReader);
500
+
501
+ if (xml)
502
+ {
503
+ const xmlChar *xencoding = xmlTextReaderConstEncoding(xReader);
504
+ result = rxml_str_new2((const char*) xml, xencoding);
505
+ xmlFree(xml);
506
+ }
507
+
508
+ return result;
509
+ }
510
+
511
+ /*
512
+ * call-seq:
513
+ * reader.read_state -> state
514
+ *
515
+ * Get the read state of the reader.
516
+ */
517
+ static VALUE rxml_reader_read_state(VALUE self)
518
+ {
519
+ xmlTextReaderPtr xreader = rxml_text_reader_get(self);
520
+ return INT2FIX(xmlTextReaderReadState(xreader));
521
+ }
522
+
523
+ /*
524
+ * call-seq:
525
+ * reader.read_string -> string
526
+ *
527
+ * Read the contents of an element or a text node as a string.
528
+ *
529
+ * Return a string containing the contents of the Element or Text node, or nil
530
+ * if the reader is positioned on any other type of node.
531
+ */
532
+ static VALUE rxml_reader_read_string(VALUE self)
533
+ {
534
+ VALUE result = Qnil;
535
+ xmlTextReaderPtr xReader = rxml_text_reader_get(self);
536
+
537
+ xmlChar *xml = xmlTextReaderReadString(xReader);
538
+
539
+ if (xml)
540
+ {
541
+ const xmlChar *xencoding = xmlTextReaderConstEncoding(xReader);
542
+ result = rxml_str_new2((const char*) xml, xencoding);
543
+ xmlFree(xml);
544
+ }
545
+
546
+ return result;
547
+ }
548
+
549
+ /*
550
+ * call-seq:
551
+ * reader.relax_ng_validate(rng) -> code
552
+ *
553
+ * Use RelaxNG to validate the document as it is processed. Activation is only
554
+ * possible before the first read. If +rng+ is nil, the RelaxNG validation is
555
+ * desactivated.
556
+ *
557
+ * Return 0 in case the RelaxNG validation could be (des)activated and -1 in
558
+ * case of error.
559
+ */
560
+ static VALUE rxml_reader_relax_ng_validate(VALUE self, VALUE rng)
561
+ {
562
+ char *xrng = NIL_P(rng) ? NULL : StringValueCStr(rng);
563
+ xmlTextReaderPtr xreader = rxml_text_reader_get(self);
564
+ return INT2FIX(xmlTextReaderRelaxNGValidate(xreader, xrng));
565
+ }
566
+
567
+ #if LIBXML_VERSION >= 20620
568
+ /*
569
+ * call-seq:
570
+ * reader.schema_validate(schema) -> code
571
+ *
572
+ * Use W3C XSD schema to validate the document as it is processed. Activation
573
+ * is only possible before the first read. If +schema+ is nil, then XML Schema
574
+ * validation is desactivated.
575
+ *
576
+ * Return 0 in case the schemas validation could be (de)activated and -1 in
577
+ * case of error.
578
+ */
579
+ static VALUE
580
+ rxml_reader_schema_validate(VALUE self, VALUE xsd)
581
+ {
582
+ char *xxsd = NIL_P(xsd) ? NULL : StringValueCStr(xsd);
583
+ xmlTextReaderPtr xreader = rxml_text_reader_get(self);
584
+ int status = xmlTextReaderSchemaValidate(xreader, xxsd);
585
+ return INT2FIX(status);
586
+ }
587
+ #endif
588
+
589
+ /*
590
+ * call-seq:
591
+ * reader.name -> name
592
+ *
593
+ * Return the qualified name of the node.
594
+ */
595
+ static VALUE rxml_reader_name(VALUE self)
596
+ {
597
+ xmlTextReaderPtr xReader = rxml_text_reader_get(self);
598
+ const xmlChar *result = xmlTextReaderConstName(xReader);
599
+ const xmlChar *xencoding = xmlTextReaderConstEncoding(xReader);
600
+
601
+ return (result == NULL ? Qnil : rxml_str_new2(result, xencoding));
602
+ }
603
+
604
+ /*
605
+ * call-seq:
606
+ * reader.local_name -> name
607
+ *
608
+ * Return the local name of the node.
609
+ */
610
+ static VALUE rxml_reader_local_name(VALUE self)
611
+ {
612
+ xmlTextReaderPtr xReader = rxml_text_reader_get(self);
613
+ const xmlChar *result = xmlTextReaderConstLocalName(xReader);
614
+ const xmlChar *xencoding = xmlTextReaderConstEncoding(xReader);
615
+
616
+ return (result == NULL ? Qnil : rxml_str_new2(result, xencoding));
617
+ }
618
+
619
+ /*
620
+ * call-seq:
621
+ * reader.attribute_count -> count
622
+ *
623
+ * Provide the number of attributes of the current node.
624
+ */
625
+ static VALUE rxml_reader_attr_count(VALUE self)
626
+ {
627
+ xmlTextReaderPtr xreader = rxml_text_reader_get(self);
628
+ return INT2FIX(xmlTextReaderAttributeCount(xreader));
629
+ }
630
+
631
+ /*
632
+ * call-seq:
633
+ * reader.encoding -> XML::Encoding::UTF_8
634
+ *
635
+ * Returns the encoding of the document being read. Note you
636
+ * first have to read data from the reader for encoding
637
+ * to return a value
638
+ *
639
+ * reader = XML::Reader.file(XML_FILE)
640
+ * assert_nil(reader.encoding)
641
+ * reader.read
642
+ * assert_equal(XML::Encoding::UTF_8, reader.encoding)
643
+ *
644
+ * In addition, libxml always appears to return nil for the encoding
645
+ * when parsing strings.
646
+ */
647
+ static VALUE rxml_reader_encoding(VALUE self)
648
+ {
649
+ xmlTextReaderPtr xreader = rxml_text_reader_get(self);
650
+ const xmlChar *xencoding = xmlTextReaderConstEncoding(xreader);
651
+ if (xencoding)
652
+ return INT2NUM(xmlParseCharEncoding(xencoding));
653
+ else
654
+ return INT2NUM(XML_CHAR_ENCODING_NONE);
655
+ }
656
+
657
+ /*
658
+ * call-seq:
659
+ * reader.base_uri -> URI
660
+ *
661
+ * Determine the base URI of the node.
662
+ */
663
+ static VALUE rxml_reader_base_uri(VALUE self)
664
+ {
665
+ xmlTextReaderPtr xReader = rxml_text_reader_get(self);
666
+ const xmlChar *result = xmlTextReaderConstBaseUri(xReader);
667
+ const xmlChar *xencoding = xmlTextReaderConstEncoding(xReader);
668
+
669
+ return (result == NULL ? Qnil : rxml_str_new2(result, xencoding));
670
+ }
671
+
672
+ /*
673
+ * call-seq:
674
+ * reader.namespace_uri -> URI
675
+ *
676
+ * Determine the namespace URI of the node.
677
+ */
678
+ static VALUE rxml_reader_namespace_uri(VALUE self)
679
+ {
680
+ xmlTextReaderPtr xReader = rxml_text_reader_get(self);
681
+ const xmlChar *result = xmlTextReaderConstNamespaceUri(xReader);
682
+ const xmlChar *xencoding = xmlTextReaderConstEncoding(xReader);
683
+
684
+ return (result == NULL ? Qnil : rxml_str_new2(result, xencoding));
685
+ }
686
+
687
+ /*
688
+ * call-seq:
689
+ * reader.value -> text
690
+ *
691
+ * Provide the text value of the node if present.
692
+ */
693
+ static VALUE rxml_reader_value(VALUE self)
694
+ {
695
+ xmlTextReaderPtr xReader = rxml_text_reader_get(self);
696
+ const xmlChar *result = xmlTextReaderConstValue(xReader);
697
+ const xmlChar *xencoding = xmlTextReaderConstEncoding(xReader);
698
+
699
+ return (result == NULL ? Qnil : rxml_str_new2(result, xencoding));
700
+ }
701
+
702
+ /*
703
+ * call-seq:
704
+ * reader.prefix -> prefix
705
+ *
706
+ * Get a shorthand reference to the namespace associated with the node.
707
+ */
708
+ static VALUE rxml_reader_prefix(VALUE self)
709
+ {
710
+ xmlTextReaderPtr xReader = rxml_text_reader_get(self);
711
+ const xmlChar *result = xmlTextReaderConstPrefix(xReader);
712
+ const xmlChar *xencoding = xmlTextReaderConstEncoding(xReader);
713
+
714
+ return (result == NULL ? Qnil : rxml_str_new2(result, xencoding));
715
+ }
716
+
717
+ /*
718
+ * call-seq:
719
+ * reader.depth -> depth
720
+ *
721
+ * Get the depth of the node in the tree.
722
+ */
723
+ static VALUE rxml_reader_depth(VALUE self)
724
+ {
725
+ xmlTextReaderPtr xreader = rxml_text_reader_get(self);
726
+ return INT2FIX(xmlTextReaderDepth(xreader));
727
+ }
728
+
729
+ /*
730
+ * call-seq:
731
+ * reader.quote_char -> char
732
+ *
733
+ * Get the quotation mark character used to enclose the value of an attribute,
734
+ * as an integer value (and -1 in case of error).
735
+ */
736
+ static VALUE rxml_reader_quote_char(VALUE self)
737
+ {
738
+ xmlTextReaderPtr xreader = rxml_text_reader_get(self);
739
+ return INT2FIX(xmlTextReaderQuoteChar(xreader));
740
+ }
741
+
742
+ /*
743
+ * call-seq:
744
+ * reader.standalone -> code
745
+ *
746
+ * Determine the standalone status of the document being read.
747
+ *
748
+ * Return 1 if the document was declared to be standalone, 0 if it was
749
+ * declared to be not standalone, or -1 if the document did not specify its
750
+ * standalone status or in case of error.
751
+ */
752
+ static VALUE rxml_reader_standalone(VALUE self)
753
+ {
754
+ xmlTextReaderPtr xreader = rxml_text_reader_get(self);
755
+ return INT2FIX(xmlTextReaderStandalone(xreader));
756
+ }
757
+
758
+ /*
759
+ * call-seq:
760
+ * reader.xml_lang -> value
761
+ *
762
+ * Get the xml:lang scope within which the node resides.
763
+ */
764
+ static VALUE rxml_reader_xml_lang(VALUE self)
765
+ {
766
+ xmlTextReaderPtr xReader = rxml_text_reader_get(self);
767
+ const xmlChar *result = xmlTextReaderConstXmlLang(xReader);
768
+ const xmlChar *xencoding = xmlTextReaderConstEncoding(xReader);
769
+
770
+ return (result == NULL ? Qnil : rxml_str_new2(result, xencoding));
771
+ }
772
+
773
+ /*
774
+ * call-seq:
775
+ * reader.xml_version -> version
776
+ *
777
+ * Determine the XML version of the document being read.
778
+ */
779
+ static VALUE rxml_reader_xml_version(VALUE self)
780
+ {
781
+ xmlTextReaderPtr xReader = rxml_text_reader_get(self);
782
+ const xmlChar *result = xmlTextReaderConstXmlVersion(xReader);
783
+ const xmlChar *xencoding = xmlTextReaderConstEncoding(xReader);
784
+
785
+ return (result == NULL ? Qnil : rxml_str_new2(result, xencoding));
786
+ }
787
+
788
+ /*
789
+ * call-seq:
790
+ * reader.has_attributes? -> bool
791
+ *
792
+ * Get whether the node has attributes.
793
+ */
794
+ static VALUE rxml_reader_has_attributes(VALUE self)
795
+ {
796
+ xmlTextReaderPtr xreader = rxml_text_reader_get(self);
797
+ return xmlTextReaderHasAttributes(xreader) ? Qtrue : Qfalse;
798
+ }
799
+
800
+ /*
801
+ * call-seq:
802
+ * reader.has_value? -> bool
803
+ *
804
+ * Get whether the node can have a text value.
805
+ */
806
+ static VALUE rxml_reader_has_value(VALUE self)
807
+ {
808
+ xmlTextReaderPtr xreader = rxml_text_reader_get(self);
809
+ return xmlTextReaderHasValue(xreader) ? Qtrue : Qfalse;
810
+ }
811
+
812
+ /*
813
+ * call-seq:
814
+ * reader[key] -> value
815
+ *
816
+ * Provide the value of the attribute with the specified index (if +key+ is an
817
+ * integer) or with the specified name (if +key+ is a string) relative to the
818
+ * containing element, as a string.
819
+ */
820
+ static VALUE rxml_reader_attribute(VALUE self, VALUE key)
821
+ {
822
+ VALUE result = Qnil;
823
+ xmlChar *xattr;
824
+ xmlTextReaderPtr xReader = rxml_text_reader_get(self);
825
+ const xmlChar *xencoding = xmlTextReaderConstEncoding(xReader);
826
+
827
+ if (TYPE(key) == T_FIXNUM)
828
+ {
829
+ xattr = xmlTextReaderGetAttributeNo(xReader, FIX2INT(key));
830
+ }
831
+ else
832
+ {
833
+ xattr = xmlTextReaderGetAttribute(xReader, (const xmlChar *) StringValueCStr(key));
834
+ }
835
+
836
+ if (xattr)
837
+ {
838
+ result = rxml_str_new2(xattr, xencoding);
839
+ xmlFree(xattr);
840
+ }
841
+ return result;
842
+ }
843
+
844
+ /*
845
+ * call-seq:
846
+ * reader.lookup_namespace(prefix) -> value
847
+ *
848
+ * Resolve a namespace prefix in the scope of the current element.
849
+ * To return the default namespace, specify nil as +prefix+.
850
+ */
851
+ static VALUE rxml_reader_lookup_namespace(VALUE self, VALUE prefix)
852
+ {
853
+ VALUE result = Qnil;
854
+ xmlTextReaderPtr xReader = rxml_text_reader_get(self);
855
+ const xmlChar *xnamespace = xmlTextReaderLookupNamespace(xReader, (const xmlChar *) StringValueCStr(prefix));
856
+ const xmlChar *xencoding = xmlTextReaderConstEncoding(xReader);
857
+
858
+ if (xnamespace)
859
+ {
860
+ result = rxml_str_new2((const char*)xnamespace, (const char*)xencoding);
861
+ xmlFree((void *)xnamespace);
862
+ }
863
+ return result;
864
+ }
865
+
866
+ /*
867
+ * call-seq:
868
+ * reader.expand -> node
869
+ *
870
+ * Returns the current node and its full subtree. Note the returned node
871
+ * is valid ONLY until the next read call.
872
+ */
873
+ static VALUE rxml_reader_expand(VALUE self)
874
+ {
875
+ xmlTextReaderPtr xreader = rxml_text_reader_get(self);
876
+ xmlNodePtr xnode = xmlTextReaderExpand(xreader);
877
+
878
+ if (!xnode)
879
+ {
880
+ return Qnil;
881
+ }
882
+ else
883
+ {
884
+ return rxml_node_wrap(xnode);
885
+ }
886
+ }
887
+
888
+ #if LIBXML_VERSION >= 20618
889
+ /*
890
+ * call-seq:
891
+ * reader.byte_consumed -> value
892
+ *
893
+ * This method provides the current index of the parser used by the reader,
894
+ * relative to the start of the current entity.
895
+ */
896
+ static VALUE
897
+ rxml_reader_byte_consumed(VALUE self)
898
+ {
899
+ xmlTextReaderPtr xreader = rxml_text_reader_get(self);
900
+ return INT2NUM(xmlTextReaderByteConsumed(xreader));
901
+ }
902
+ #endif
903
+
904
+ #if LIBXML_VERSION >= 20617
905
+ /*
906
+ * call-seq:
907
+ * reader.column_number -> number
908
+ *
909
+ * Provide the column number of the current parsing point.
910
+ */
911
+ static VALUE
912
+ rxml_reader_column_number(VALUE self)
913
+ {
914
+ xmlTextReaderPtr xreader = rxml_text_reader_get(self);
915
+ return INT2NUM(xmlTextReaderGetParserColumnNumber(xreader));
916
+ }
917
+
918
+ /*
919
+ * call-seq:
920
+ * reader.line_number -> number
921
+ *
922
+ * Provide the line number of the current parsing point.
923
+ */
924
+ static VALUE
925
+ rxml_reader_line_number(VALUE self)
926
+ {
927
+ xmlTextReaderPtr xreader = rxml_text_reader_get(self);
928
+ return INT2NUM(xmlTextReaderGetParserLineNumber(xreader));
929
+ }
930
+ #endif
931
+
932
+ /*
933
+ * call-seq:
934
+ * reader.default? -> bool
935
+ *
936
+ * Return whether an Attribute node was generated from the default value
937
+ * defined in the DTD or schema.
938
+ */
939
+ static VALUE rxml_reader_default(VALUE self)
940
+ {
941
+ xmlTextReaderPtr xreader = rxml_text_reader_get(self);
942
+ return xmlTextReaderIsDefault(xreader) ? Qtrue : Qfalse;
943
+ }
944
+
945
+ /*
946
+ * call-seq:
947
+ * reader.namespace_declaration? -> bool
948
+ *
949
+ * Determine whether the current node is a namespace declaration rather than a
950
+ * regular attribute.
951
+ */
952
+ static VALUE rxml_reader_namespace_declaration(VALUE self)
953
+ {
954
+ xmlTextReaderPtr xreader = rxml_text_reader_get(self);
955
+ return xmlTextReaderIsNamespaceDecl(xreader) ? Qtrue : Qfalse;
956
+ }
957
+
958
+ /*
959
+ * call-seq:
960
+ * reader.empty_element? -> bool
961
+ *
962
+ * Check if the current node is empty.
963
+ */
964
+ static VALUE rxml_reader_empty_element(VALUE self)
965
+ {
966
+ xmlTextReaderPtr xreader = rxml_text_reader_get(self);
967
+ return xmlTextReaderIsEmptyElement(xreader) ? Qtrue : Qfalse;
968
+ }
969
+
970
+ /*
971
+ * call-seq:
972
+ * reader.valid? -> bool
973
+ *
974
+ * Retrieve the validity status from the parser context.
975
+ */
976
+ static VALUE rxml_reader_valid(VALUE self)
977
+ {
978
+ xmlTextReaderPtr xreader = rxml_text_reader_get(self);
979
+ return xmlTextReaderIsValid(xreader) ? Qtrue : Qfalse;
980
+ }
981
+
982
+ void rxml_init_reader(void)
983
+ {
984
+ BASE_URI_SYMBOL = ID2SYM(rb_intern("base_uri"));
985
+ ENCODING_SYMBOL = ID2SYM(rb_intern("encoding"));
986
+ IO_ATTR = rb_intern("@io");
987
+ OPTIONS_SYMBOL = ID2SYM(rb_intern("options"));
988
+
989
+ cXMLReader = rb_define_class_under(mXML, "Reader", rb_cObject);
990
+
991
+ rb_define_singleton_method(cXMLReader, "document", rxml_reader_document, 1);
992
+ rb_define_singleton_method(cXMLReader, "file", rxml_reader_file, -1);
993
+ rb_define_singleton_method(cXMLReader, "io", rxml_reader_io, -1);
994
+ rb_define_singleton_method(cXMLReader, "string", rxml_reader_string, -1);
995
+
996
+ rb_define_method(cXMLReader, "[]", rxml_reader_attribute, 1);
997
+ rb_define_method(cXMLReader, "attribute_count", rxml_reader_attr_count, 0);
998
+ rb_define_method(cXMLReader, "base_uri", rxml_reader_base_uri, 0);
999
+ #if LIBXML_VERSION >= 20618
1000
+ rb_define_method(cXMLReader, "byte_consumed", rxml_reader_byte_consumed, 0);
1001
+ #endif
1002
+ rb_define_method(cXMLReader, "close", rxml_reader_close, 0);
1003
+ #if LIBXML_VERSION >= 20617
1004
+ rb_define_method(cXMLReader, "column_number", rxml_reader_column_number, 0);
1005
+ #endif
1006
+ rb_define_method(cXMLReader, "depth", rxml_reader_depth, 0);
1007
+ rb_define_method(cXMLReader, "encoding", rxml_reader_encoding, 0);
1008
+ rb_define_method(cXMLReader, "expand", rxml_reader_expand, 0);
1009
+ rb_define_method(cXMLReader, "has_attributes?", rxml_reader_has_attributes, 0);
1010
+ rb_define_method(cXMLReader, "has_value?", rxml_reader_has_value, 0);
1011
+ #if LIBXML_VERSION >= 20617
1012
+ rb_define_method(cXMLReader, "line_number", rxml_reader_line_number, 0);
1013
+ #endif
1014
+ rb_define_method(cXMLReader, "local_name", rxml_reader_local_name, 0);
1015
+ rb_define_method(cXMLReader, "lookup_namespace", rxml_reader_lookup_namespace, 1);
1016
+ rb_define_method(cXMLReader, "move_to_attribute", rxml_reader_move_to_attr, 1);
1017
+ rb_define_method(cXMLReader, "move_to_first_attribute", rxml_reader_move_to_first_attr, 0);
1018
+ rb_define_method(cXMLReader, "move_to_next_attribute", rxml_reader_move_to_next_attr, 0);
1019
+ rb_define_method(cXMLReader, "move_to_element", rxml_reader_move_to_element, 0);
1020
+ rb_define_method(cXMLReader, "name", rxml_reader_name, 0);
1021
+ rb_define_method(cXMLReader, "namespace_uri", rxml_reader_namespace_uri, 0);
1022
+ rb_define_method(cXMLReader, "next", rxml_reader_next, 0);
1023
+ rb_define_method(cXMLReader, "next_sibling", rxml_reader_next_sibling, 0);
1024
+ rb_define_method(cXMLReader, "node", rxml_reader_node, 0);
1025
+ rb_define_method(cXMLReader, "node_type", rxml_reader_node_type, 0);
1026
+ rb_define_method(cXMLReader, "normalization", rxml_reader_normalization, 0);
1027
+ rb_define_method(cXMLReader, "prefix", rxml_reader_prefix, 0);
1028
+ rb_define_method(cXMLReader, "quote_char", rxml_reader_quote_char, 0);
1029
+ rb_define_method(cXMLReader, "read", rxml_reader_read, 0);
1030
+ rb_define_method(cXMLReader, "read_attribute_value", rxml_reader_read_attr_value, 0);
1031
+ rb_define_method(cXMLReader, "read_inner_xml", rxml_reader_read_inner_xml, 0);
1032
+ rb_define_method(cXMLReader, "read_outer_xml", rxml_reader_read_outer_xml, 0);
1033
+ rb_define_method(cXMLReader, "read_state", rxml_reader_read_state, 0);
1034
+ rb_define_method(cXMLReader, "read_string", rxml_reader_read_string, 0);
1035
+ rb_define_method(cXMLReader, "relax_ng_validate", rxml_reader_relax_ng_validate, 1);
1036
+ rb_define_method(cXMLReader, "standalone", rxml_reader_standalone, 0);
1037
+ #if LIBXML_VERSION >= 20620
1038
+ rb_define_method(cXMLReader, "schema_validate", rxml_reader_schema_validate, 1);
1039
+ #endif
1040
+ rb_define_method(cXMLReader, "value", rxml_reader_value, 0);
1041
+ rb_define_method(cXMLReader, "xml_lang", rxml_reader_xml_lang, 0);
1042
+ rb_define_method(cXMLReader, "xml_version", rxml_reader_xml_version, 0);
1043
+ rb_define_method(cXMLReader, "default?", rxml_reader_default, 0);
1044
+ rb_define_method(cXMLReader, "empty_element?", rxml_reader_empty_element, 0);
1045
+ rb_define_method(cXMLReader, "namespace_declaration?", rxml_reader_namespace_declaration, 0);
1046
+ rb_define_method(cXMLReader, "valid?", rxml_reader_valid, 0);
1047
+
1048
+ /* Constants */
1049
+ rb_define_const(cXMLReader, "LOADDTD", INT2FIX(XML_PARSER_LOADDTD));
1050
+ rb_define_const(cXMLReader, "DEFAULTATTRS", INT2FIX(XML_PARSER_DEFAULTATTRS));
1051
+ rb_define_const(cXMLReader, "VALIDATE", INT2FIX(XML_PARSER_VALIDATE));
1052
+ rb_define_const(cXMLReader, "SUBST_ENTITIES", INT2FIX(XML_PARSER_SUBST_ENTITIES));
1053
+
1054
+ rb_define_const(cXMLReader, "SEVERITY_VALIDITY_WARNING", INT2FIX(XML_PARSER_SEVERITY_VALIDITY_WARNING));
1055
+ rb_define_const(cXMLReader, "SEVERITY_VALIDITY_ERROR", INT2FIX(XML_PARSER_SEVERITY_VALIDITY_ERROR));
1056
+ rb_define_const(cXMLReader, "SEVERITY_WARNING", INT2FIX(XML_PARSER_SEVERITY_WARNING));
1057
+ rb_define_const(cXMLReader, "SEVERITY_ERROR", INT2FIX(XML_PARSER_SEVERITY_ERROR));
1058
+
1059
+ rb_define_const(cXMLReader, "TYPE_NONE", INT2FIX(XML_READER_TYPE_NONE));
1060
+ rb_define_const(cXMLReader, "TYPE_ELEMENT", INT2FIX(XML_READER_TYPE_ELEMENT));
1061
+ rb_define_const(cXMLReader, "TYPE_ATTRIBUTE", INT2FIX(XML_READER_TYPE_ATTRIBUTE));
1062
+ rb_define_const(cXMLReader, "TYPE_TEXT", INT2FIX(XML_READER_TYPE_TEXT));
1063
+ rb_define_const(cXMLReader, "TYPE_CDATA", INT2FIX(XML_READER_TYPE_CDATA));
1064
+ rb_define_const(cXMLReader, "TYPE_ENTITY_REFERENCE", INT2FIX(XML_READER_TYPE_ENTITY_REFERENCE));
1065
+ rb_define_const(cXMLReader, "TYPE_ENTITY", INT2FIX(XML_READER_TYPE_ENTITY));
1066
+ rb_define_const(cXMLReader, "TYPE_PROCESSING_INSTRUCTION", INT2FIX(XML_READER_TYPE_PROCESSING_INSTRUCTION));
1067
+ rb_define_const(cXMLReader, "TYPE_COMMENT", INT2FIX(XML_READER_TYPE_COMMENT));
1068
+ rb_define_const(cXMLReader, "TYPE_DOCUMENT", INT2FIX(XML_READER_TYPE_DOCUMENT));
1069
+ rb_define_const(cXMLReader, "TYPE_DOCUMENT_TYPE", INT2FIX(XML_READER_TYPE_DOCUMENT_TYPE));
1070
+ rb_define_const(cXMLReader, "TYPE_DOCUMENT_FRAGMENT", INT2FIX(XML_READER_TYPE_DOCUMENT_FRAGMENT));
1071
+ rb_define_const(cXMLReader, "TYPE_NOTATION", INT2FIX(XML_READER_TYPE_NOTATION));
1072
+ rb_define_const(cXMLReader, "TYPE_WHITESPACE", INT2FIX(XML_READER_TYPE_WHITESPACE));
1073
+ rb_define_const(cXMLReader, "TYPE_SIGNIFICANT_WHITESPACE", INT2FIX(XML_READER_TYPE_SIGNIFICANT_WHITESPACE));
1074
+ rb_define_const(cXMLReader, "TYPE_END_ELEMENT", INT2FIX(XML_READER_TYPE_END_ELEMENT));
1075
+ rb_define_const(cXMLReader, "TYPE_END_ENTITY", INT2FIX(XML_READER_TYPE_END_ENTITY));
1076
+ rb_define_const(cXMLReader, "TYPE_XML_DECLARATION", INT2FIX(XML_READER_TYPE_XML_DECLARATION));
1077
+
1078
+ /* Read states */
1079
+ rb_define_const(cXMLReader, "MODE_INITIAL", INT2FIX(XML_TEXTREADER_MODE_INITIAL));
1080
+ rb_define_const(cXMLReader, "MODE_INTERACTIVE", INT2FIX(XML_TEXTREADER_MODE_INTERACTIVE));
1081
+ rb_define_const(cXMLReader, "MODE_ERROR", INT2FIX(XML_TEXTREADER_MODE_ERROR));
1082
+ rb_define_const(cXMLReader, "MODE_EOF", INT2FIX(XML_TEXTREADER_MODE_EOF));
1083
+ rb_define_const(cXMLReader, "MODE_CLOSED", INT2FIX(XML_TEXTREADER_MODE_CLOSED));
1084
+ rb_define_const(cXMLReader, "MODE_READING", INT2FIX(XML_TEXTREADER_MODE_READING));
1085
+ }