libxml-ruby 4.1.0 → 4.1.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,1242 +1,1245 @@
1
- /* Copyright (c) 2006-2007 Apple Inc.
2
- * Please see the LICENSE file for copyright and distribution information. */
3
-
4
- #include "ruby_libxml.h"
5
- #include "ruby_xml_reader.h"
6
-
7
- #include <libxml/xmlreader.h>
8
- #include <libxml/xmlschemas.h>
9
-
10
- /*
11
- * Document-class: LibXML::XML::Reader
12
- *
13
- * The XML::Reader class provides a simpler, alternative way of parsing an XML
14
- * document in contrast to XML::Parser or XML::SaxParser. A XML::Reader instance
15
- * acts like a cursor going forward in a document stream, stopping at each node
16
- * it encounters. To advance to the next node, simply cadd XML::Reader#read.
17
- *
18
- * The XML::Reader API closely matches the DOM Core specification and supports
19
- * namespaces, xml:base, entity handling and DTDs.
20
- *
21
- * To summarize, XML::Reader provides a far simpler API to use versus XML::SaxParser
22
- * and is more memory efficient than using XML::Parser to create a DOM tree.
23
- *
24
- * Example:
25
- *
26
- * reader = XML::Reader.string("<foo><bar>1</bar><bar>2</bar><bar>3</bar></foo>")
27
- * reader.read
28
- * assert_equal('foo', reader.name)
29
- * assert_nil(reader.value)
30
- *
31
- * 3.times do |i|
32
- * reader.read
33
- * assert_equal(XML::Reader::TYPE_ELEMENT, reader.node_type)
34
- * assert_equal('bar', reader.name)
35
- * reader.read
36
- * assert_equal(XML::Reader::TYPE_TEXT, reader.node_type)
37
- * assert_equal((i + 1).to_s, reader.value)
38
- * reader.read
39
- * assert_equal(XML::Reader::TYPE_END_ELEMENT, reader.node_type)
40
- * end
41
- *
42
- * You can also parse documents (see XML::Reader.document),
43
- * strings (see XML::Parser.string) and io objects (see
44
- * XML::Parser.io).
45
- *
46
- * For a more in depth tutorial, albeit in C, see http://xmlsoft.org/xmlreader.html.*/
47
-
48
-
49
- /* NOTE - We need to wrap the readers document to support Reader.read.node.find('/').
50
- To do this we need to use xmlTextReaderCurrentDoc which means we have to free the
51
- document ourselves. Annoying... */
52
-
53
- VALUE cXMLReader;
54
-
55
- static ID BASE_URI_SYMBOL;
56
- static ID ENCODING_SYMBOL;
57
- static ID IO_ATTR;
58
- static ID OPTIONS_SYMBOL;
59
-
60
- static void rxml_reader_free(xmlTextReaderPtr xreader)
61
- {
62
- xmlFreeTextReader(xreader);
63
- }
64
-
65
- static void rxml_reader_mark(xmlTextReaderPtr xreader)
66
- {
67
- xmlDocPtr xdoc = xmlTextReaderCurrentDoc(xreader);
68
- VALUE doc = (VALUE)xdoc->_private;
69
- rb_gc_mark(doc);
70
- }
71
-
72
- static VALUE rxml_reader_wrap(xmlTextReaderPtr xreader)
73
- {
74
- return Data_Wrap_Struct(cXMLReader, NULL, rxml_reader_free, xreader);
75
- }
76
-
77
-
78
- static xmlTextReaderPtr rxml_text_reader_get(VALUE obj)
79
- {
80
- xmlTextReaderPtr xreader;
81
- Data_Get_Struct(obj, xmlTextReader, xreader);
82
- return xreader;
83
- }
84
-
85
- /*
86
- * call-seq:
87
- * XML::Reader.document(doc) -> XML::Reader
88
- *
89
- * Create an new reader for the specified document.
90
- */
91
- VALUE rxml_reader_document(VALUE klass, VALUE doc)
92
- {
93
- xmlDocPtr xdoc;
94
- xmlTextReaderPtr xreader;
95
-
96
- Data_Get_Struct(doc, xmlDoc, xdoc);
97
-
98
- xreader = xmlReaderWalker(xdoc);
99
-
100
- if (xreader == NULL)
101
- rxml_raise(&xmlLastError);
102
-
103
- return rxml_reader_wrap(xreader);
104
- }
105
-
106
- /* call-seq:
107
- * XML::Reader.file(path) -> XML::Reader
108
- * XML::Reader.file(path, :encoding => XML::Encoding::UTF_8,
109
- * :options => XML::Parser::Options::NOENT) -> XML::Parser
110
- *
111
- * Creates a new reader by parsing the specified file or uri.
112
- *
113
- * You may provide an optional hash table to control how the
114
- * parsing is performed. Valid options are:
115
- *
116
- * encoding - The document encoding, defaults to nil. Valid values
117
- * are the encoding constants defined on XML::Encoding.
118
- * options - Controls the execution of the parser, defaults to 0.
119
- * Valid values are the constants defined on
120
- * XML::Parser::Options. Mutliple options can be combined
121
- * by using Bitwise OR (|).
122
- */
123
- static VALUE rxml_reader_file(int argc, VALUE *argv, VALUE klass)
124
- {
125
- xmlTextReaderPtr xreader;
126
- VALUE path;
127
- VALUE options;
128
-
129
- const char *xencoding = NULL;
130
- int xoptions = 0;
131
-
132
- rb_scan_args(argc, argv, "11", &path, &options);
133
- Check_Type(path, T_STRING);
134
-
135
- if (!NIL_P(options))
136
- {
137
- VALUE encoding = Qnil;
138
- VALUE parserOptions = Qnil;
139
-
140
- Check_Type(options, T_HASH);
141
-
142
- encoding = rb_hash_aref(options, BASE_URI_SYMBOL);
143
- xencoding = NIL_P(encoding) ? NULL : xmlGetCharEncodingName(NUM2INT(encoding));
144
-
145
- parserOptions = rb_hash_aref(options, OPTIONS_SYMBOL);
146
- xoptions = NIL_P(parserOptions) ? 0 : NUM2INT(parserOptions);
147
- }
148
-
149
- xreader = xmlReaderForFile(StringValueCStr(path), xencoding, xoptions);
150
-
151
- if (xreader == NULL)
152
- rxml_raise(&xmlLastError);
153
-
154
- return rxml_reader_wrap(xreader);
155
- }
156
-
157
- /* call-seq:
158
- * XML::Reader.io(io) -> XML::Reader
159
- * XML::Reader.io(io, :encoding => XML::Encoding::UTF_8,
160
- * :options => XML::Parser::Options::NOENT) -> XML::Parser
161
- *
162
- * Creates a new reader by parsing the specified io object.
163
- *
164
- * You may provide an optional hash table to control how the
165
- * parsing is performed. Valid options are:
166
- *
167
- * base_uri - The base url for the parsed document.
168
- * encoding - The document encoding, defaults to nil. Valid values
169
- * are the encoding constants defined on XML::Encoding.
170
- * options - Controls the execution of the parser, defaults to 0.
171
- * Valid values are the constants defined on
172
- * XML::Parser::Options. Mutliple options can be combined
173
- * by using Bitwise OR (|).
174
- */
175
- static VALUE rxml_reader_io(int argc, VALUE *argv, VALUE klass)
176
- {
177
- xmlTextReaderPtr xreader;
178
- VALUE result;
179
- VALUE io;
180
- VALUE options;
181
- char *xbaseurl = NULL;
182
- const char *xencoding = NULL;
183
- int xoptions = 0;
184
-
185
- rb_scan_args(argc, argv, "11", &io, &options);
186
-
187
- if (!NIL_P(options))
188
- {
189
- VALUE baseurl = Qnil;
190
- VALUE encoding = Qnil;
191
- VALUE parserOptions = Qnil;
192
-
193
- Check_Type(options, T_HASH);
194
-
195
- baseurl = rb_hash_aref(options, BASE_URI_SYMBOL);
196
- xbaseurl = NIL_P(baseurl) ? NULL : StringValueCStr(baseurl);
197
-
198
- encoding = rb_hash_aref(options, ENCODING_SYMBOL);
199
- xencoding = NIL_P(encoding) ? NULL : xmlGetCharEncodingName(NUM2INT(encoding));
200
-
201
- parserOptions = rb_hash_aref(options, OPTIONS_SYMBOL);
202
- xoptions = NIL_P(parserOptions) ? 0 : NUM2INT(parserOptions);
203
- }
204
-
205
- xreader = xmlReaderForIO((xmlInputReadCallback) rxml_read_callback, NULL,
206
- (void *) io,
207
- xbaseurl, xencoding, xoptions);
208
-
209
- if (xreader == NULL)
210
- rxml_raise(&xmlLastError);
211
-
212
- result = rxml_reader_wrap(xreader);
213
-
214
- /* Attach io object to parser so it won't get freed.*/
215
- rb_ivar_set(result, IO_ATTR, io);
216
-
217
- return result;
218
- }
219
-
220
- /* call-seq:
221
- * XML::Reader.string(io) -> XML::Reader
222
- * XML::Reader.string(io, :encoding => XML::Encoding::UTF_8,
223
- * :options => XML::Parser::Options::NOENT) -> XML::Parser
224
- *
225
- * Creates a new reader by parsing the specified string.
226
- *
227
- * You may provide an optional hash table to control how the
228
- * parsing is performed. Valid options are:
229
- *
230
- * base_uri - The base url for the parsed document.
231
- * encoding - The document encoding, defaults to nil. Valid values
232
- * are the encoding constants defined on XML::Encoding.
233
- * options - Controls the execution of the parser, defaults to 0.
234
- * Valid values are the constants defined on
235
- * XML::Parser::Options. Mutliple options can be combined
236
- * by using Bitwise OR (|).
237
- */
238
- static VALUE rxml_reader_string(int argc, VALUE *argv, VALUE klass)
239
- {
240
- xmlTextReaderPtr xreader;
241
- VALUE string;
242
- VALUE options;
243
- char *xbaseurl = NULL;
244
- const char *xencoding = NULL;
245
- int xoptions = 0;
246
-
247
- rb_scan_args(argc, argv, "11", &string, &options);
248
- Check_Type(string, T_STRING);
249
-
250
- if (!NIL_P(options))
251
- {
252
- VALUE baseurl = Qnil;
253
- VALUE encoding = Qnil;
254
- VALUE parserOptions = Qnil;
255
-
256
- Check_Type(options, T_HASH);
257
-
258
- baseurl = rb_hash_aref(options, BASE_URI_SYMBOL);
259
- xbaseurl = NIL_P(baseurl) ? NULL : StringValueCStr(baseurl);
260
-
261
- encoding = rb_hash_aref(options, ENCODING_SYMBOL);
262
- xencoding = NIL_P(encoding) ? NULL : xmlGetCharEncodingName(NUM2INT(encoding));
263
-
264
- parserOptions = rb_hash_aref(options, OPTIONS_SYMBOL);
265
- xoptions = NIL_P(parserOptions) ? 0 : NUM2INT(parserOptions);
266
- }
267
-
268
- xreader = xmlReaderForMemory(StringValueCStr(string), (int)RSTRING_LEN(string),
269
- xbaseurl, xencoding, xoptions);
270
-
271
- if (xreader == NULL)
272
- rxml_raise(&xmlLastError);
273
-
274
- return rxml_reader_wrap(xreader);
275
- }
276
-
277
- /*
278
- * call-seq:
279
- * reader.close -> code
280
- *
281
- * This method releases any resources allocated by the current instance
282
- * changes the state to Closed and close any underlying input.
283
- */
284
- static VALUE rxml_reader_close(VALUE self)
285
- {
286
- xmlTextReaderPtr xreader = rxml_text_reader_get(self);
287
- return INT2FIX(xmlTextReaderClose(xreader));
288
- }
289
-
290
- /*
291
- * call-seq:
292
- * reader.move_to_attribute_no(index) -> code
293
- *
294
- * Move the position of the current instance to the attribute with the
295
- * specified index relative to the containing element.
296
- */
297
- static VALUE rxml_reader_move_to_attr_no(VALUE self, VALUE index)
298
- {
299
- int ret;
300
- xmlTextReaderPtr xreader;
301
-
302
- xreader = rxml_text_reader_get(self);
303
- ret = xmlTextReaderMoveToAttributeNo(xreader, FIX2INT(index));
304
-
305
- return INT2FIX(ret);
306
- }
307
-
308
- /*
309
- * call-seq:
310
- * reader.move_to_attribute(localName) -> code
311
- *
312
- * Move the position of the current instance to the attribute with the
313
- * specified name relative to the containing element.
314
- */
315
- static VALUE rxml_reader_move_to_attr(VALUE self, VALUE val)
316
- {
317
- int ret;
318
- xmlTextReaderPtr xreader;
319
-
320
- xreader = rxml_text_reader_get(self);
321
- ret = xmlTextReaderMoveToAttribute(xreader,
322
- (const xmlChar *) StringValueCStr(val));
323
-
324
- return INT2FIX(ret);
325
- }
326
-
327
- /*
328
- * call-seq:
329
- * reader.move_to_attribute_ns(localName, namespaceURI) -> code
330
- *
331
- * Move the position of the current instance to the attribute with the
332
- * specified name and namespace relative to the containing element.
333
- */
334
- static VALUE rxml_reader_move_to_attr_ns(VALUE self, VALUE name, VALUE ns)
335
- {
336
- int ret;
337
- xmlTextReaderPtr xreader;
338
-
339
- xreader = rxml_text_reader_get(self);
340
- ret = xmlTextReaderMoveToAttributeNs(xreader,
341
- (const xmlChar *) StringValueCStr(name),
342
- (const xmlChar *) StringValueCStr(ns));
343
-
344
- return INT2FIX(ret);
345
- }
346
-
347
- /*
348
- * call-seq:
349
- * reader.move_to_first_attribute -> code
350
- *
351
- * Move the position of the current instance to the first attribute associated
352
- * with the current node.
353
- */
354
- static VALUE rxml_reader_move_to_first_attr(VALUE self)
355
- {
356
- xmlTextReaderPtr xreader = rxml_text_reader_get(self);
357
- return INT2FIX(xmlTextReaderMoveToFirstAttribute(xreader));
358
- }
359
-
360
- /*
361
- * call-seq:
362
- * reader.move_to_next_attribute -> code
363
- *
364
- * Move the position of the current instance to the next attribute associated
365
- * with the current node.
366
- */
367
- static VALUE rxml_reader_move_to_next_attr(VALUE self)
368
- {
369
- xmlTextReaderPtr xreader = rxml_text_reader_get(self);
370
- return INT2FIX(xmlTextReaderMoveToNextAttribute(xreader));
371
- }
372
-
373
- /*
374
- * call-seq:
375
- * reader.move_to_element -> code
376
- *
377
- * Move the position of the current instance to the node that contains the
378
- * current attribute node.
379
- */
380
- static VALUE rxml_reader_move_to_element(VALUE self)
381
- {
382
- xmlTextReaderPtr xreader = rxml_text_reader_get(self);
383
- return INT2FIX(xmlTextReaderMoveToElement(xreader));
384
- }
385
-
386
- /*
387
- * call-seq:
388
- * reader.next -> code
389
- *
390
- * Skip to the node following the current one in document order while avoiding
391
- * the subtree if any.
392
- */
393
- static VALUE rxml_reader_next(VALUE self)
394
- {
395
- xmlTextReaderPtr xreader = rxml_text_reader_get(self);
396
- return INT2FIX(xmlTextReaderNext(xreader));
397
- }
398
-
399
- /*
400
- * call-seq:
401
- * reader.next_sibling -> code
402
- *
403
- * Skip to the node following the current one in document order while avoiding
404
- * the subtree if any. Currently implemented only for Readers built on a
405
- * document.
406
- */
407
- static VALUE rxml_reader_next_sibling(VALUE self)
408
- {
409
- xmlTextReaderPtr xreader = rxml_text_reader_get(self);
410
- return INT2FIX(xmlTextReaderNextSibling(xreader));
411
- }
412
-
413
- /*
414
- * call-seq:
415
- * reader.node -> XML::Node
416
- *
417
- * Returns the reader's current node. It will return
418
- * nil if Reader#read has not yet been called.
419
- * WARNING - Using this method is dangerous because the
420
- * the node may be destroyed on the next #read.
421
- */
422
- static VALUE rxml_reader_node(VALUE self)
423
- {
424
- xmlTextReaderPtr xreader = rxml_text_reader_get(self);
425
- xmlNodePtr xnode = xmlTextReaderCurrentNode(xreader);
426
- return xnode ? rxml_node_wrap(xnode) : Qnil;
427
- }
428
-
429
- /*
430
- * call-seq:
431
- * reader.node_type -> type
432
- *
433
- * Get the node type of the current node. Reference:
434
- * http://dotgnu.org/pnetlib-doc/System/Xml/XmlNodeType.html
435
- */
436
- static VALUE rxml_reader_node_type(VALUE self)
437
- {
438
- xmlTextReaderPtr xreader = rxml_text_reader_get(self);
439
- return INT2FIX(xmlTextReaderNodeType(xreader));
440
- }
441
-
442
- /*
443
- * call-seq:
444
- * reader.normalization -> value
445
- *
446
- * The value indicating whether to normalize white space and attribute values.
447
- * Since attribute value and end of line normalizations are a MUST in the XML
448
- * specification only the value true is accepted. The broken bahaviour of
449
- * accepting out of range character entities like &#0; is of course not
450
- * supported either.
451
- *
452
- * Return 1 or -1 in case of error.
453
- */
454
- static VALUE rxml_reader_normalization(VALUE self)
455
- {
456
- xmlTextReaderPtr xreader = rxml_text_reader_get(self);
457
- return INT2FIX(xmlTextReaderNormalization(xreader));
458
- }
459
-
460
- /*
461
- * call-seq:
462
- * reader.read -> nil|true|false
463
- *
464
- * Causes the reader to move to the next node in the stream, exposing its properties.
465
- *
466
- * Returns true if a node was successfully read or false if there are no more
467
- * nodes to read. On errors, an exception is raised.*/
468
- static VALUE rxml_reader_read(VALUE self)
469
- {
470
- xmlTextReaderPtr xreader = rxml_text_reader_get(self);
471
- int result = xmlTextReaderRead(xreader);
472
- switch(result)
473
- {
474
- case -1:
475
- rxml_raise(&xmlLastError);
476
- return Qnil;
477
- break;
478
- case 0:
479
- return Qfalse;
480
- case 1:
481
- return Qtrue;
482
- default:
483
- rb_raise(rb_eRuntimeError,
484
- "xmlTextReaderRead did not return -1, 0 or 1. Return value was: %d", result);
485
- }
486
- }
487
-
488
- /*
489
- * call-seq:
490
- * reader.read_attribute_value -> code
491
- *
492
- * Parse an attribute value into one or more Text and EntityReference nodes.
493
- *
494
- * Return 1 in case of success, 0 if the reader was not positionned on an
495
- * attribute node or all the attribute values have been read, or -1 in case of
496
- * error.
497
- */
498
- static VALUE rxml_reader_read_attr_value(VALUE self)
499
- {
500
- xmlTextReaderPtr xreader = rxml_text_reader_get(self);
501
- return INT2FIX(xmlTextReaderReadAttributeValue(xreader));
502
- }
503
-
504
- /*
505
- * call-seq:
506
- * reader.read_inner_xml -> data
507
- *
508
- * Read the contents of the current node, including child nodes and markup.
509
- *
510
- * Return a string containing the XML content, or nil if the current node is
511
- * neither an element nor attribute, or has no child nodes.
512
- */
513
- static VALUE rxml_reader_read_inner_xml(VALUE self)
514
- {
515
- VALUE result = Qnil;
516
- xmlTextReaderPtr xReader = rxml_text_reader_get(self);
517
-
518
- xmlChar *xml = xmlTextReaderReadInnerXml(xReader);
519
-
520
- if (xml)
521
- {
522
- const xmlChar *xencoding = xmlTextReaderConstEncoding(xReader);
523
- result = rxml_new_cstr( xml, xencoding);
524
- xmlFree(xml);
525
- }
526
-
527
- return result;
528
- }
529
-
530
- /*
531
- * call-seq:
532
- * reader.read_outer_xml -> data
533
- *
534
- * Read the contents of the current node, including child nodes and markup.
535
- *
536
- * Return a string containing the XML content, or nil if the current node is
537
- * neither an element nor attribute, or has no child nodes.
538
- */
539
- static VALUE rxml_reader_read_outer_xml(VALUE self)
540
- {
541
- VALUE result = Qnil;
542
- xmlTextReaderPtr xReader = rxml_text_reader_get(self);
543
-
544
- xmlChar *xml = xmlTextReaderReadOuterXml(xReader);
545
-
546
- if (xml)
547
- {
548
- const xmlChar *xencoding = xmlTextReaderConstEncoding(xReader);
549
- result = rxml_new_cstr( xml, xencoding);
550
- xmlFree(xml);
551
- }
552
-
553
- return result;
554
- }
555
-
556
- /*
557
- * call-seq:
558
- * reader.read_state -> state
559
- *
560
- * Get the read state of the reader.
561
- */
562
- static VALUE rxml_reader_read_state(VALUE self)
563
- {
564
- xmlTextReaderPtr xreader = rxml_text_reader_get(self);
565
- return INT2FIX(xmlTextReaderReadState(xreader));
566
- }
567
-
568
- /*
569
- * call-seq:
570
- * reader.read_string -> string
571
- *
572
- * Read the contents of an element or a text node as a string.
573
- *
574
- * Return a string containing the contents of the Element or Text node, or nil
575
- * if the reader is positioned on any other type of node.
576
- */
577
- static VALUE rxml_reader_read_string(VALUE self)
578
- {
579
- VALUE result = Qnil;
580
- xmlTextReaderPtr xReader = rxml_text_reader_get(self);
581
-
582
- xmlChar *xml = xmlTextReaderReadString(xReader);
583
-
584
- if (xml)
585
- {
586
- const xmlChar *xencoding = xmlTextReaderConstEncoding(xReader);
587
- result = rxml_new_cstr( xml, xencoding);
588
- xmlFree(xml);
589
- }
590
-
591
- return result;
592
- }
593
-
594
- /*
595
- * call-seq:
596
- * reader.relax_ng_validate(rng) -> boolean
597
- *
598
- * Use RelaxNG to validate the document as it is processed. Activation is only
599
- * possible before the first read. If +rng+ is nil, the RelaxNG validation is
600
- * desactivated.
601
- *
602
- * Return true in case the RelaxNG validation could be (des)activated and false in
603
- * case of error.
604
- */
605
- static VALUE rxml_reader_relax_ng_validate(VALUE self, VALUE rng)
606
- {
607
- xmlTextReaderPtr xreader = rxml_text_reader_get(self);
608
- xmlRelaxNGPtr xrelax;
609
- int status;
610
- Data_Get_Struct(rng, xmlRelaxNG, xrelax);
611
-
612
- status = xmlTextReaderRelaxNGSetSchema(xreader, xrelax);
613
- return (status == 0 ? Qtrue : Qfalse);
614
- }
615
-
616
- #if LIBXML_VERSION >= 20620
617
- /*
618
- * call-seq:
619
- * reader.schema_validate(schema) -> boolean
620
- *
621
- * Use W3C XSD schema to validate the document as it is processed. Activation
622
- * is only possible before the first read. If +schema+ is nil, then XML Schema
623
- * validation is deactivated.
624
- *
625
- * Return false if if the schema's validation could be (de)activated and true
626
- * otherwise.
627
- */
628
- static VALUE
629
- rxml_reader_schema_validate(VALUE self, VALUE xsd)
630
- {
631
- xmlTextReaderPtr xreader = rxml_text_reader_get(self);
632
- xmlSchemaPtr xschema;
633
- int status;
634
-
635
- Data_Get_Struct(xsd, xmlSchema, xschema);
636
- status = xmlTextReaderSetSchema(xreader, xschema);
637
- return (status == 0 ? Qtrue : Qfalse);
638
- }
639
- #endif
640
-
641
- /*
642
- * call-seq:
643
- * reader.name -> name
644
- *
645
- * Return the qualified name of the node.
646
- */
647
- static VALUE rxml_reader_name(VALUE self)
648
- {
649
- xmlTextReaderPtr xReader = rxml_text_reader_get(self);
650
- const xmlChar *result = xmlTextReaderConstName(xReader);
651
- const xmlChar *xencoding = xmlTextReaderConstEncoding(xReader);
652
-
653
- return (result == NULL ? Qnil : rxml_new_cstr(result, xencoding));
654
- }
655
-
656
- /*
657
- * call-seq:
658
- * reader.local_name -> name
659
- *
660
- * Return the local name of the node.
661
- */
662
- static VALUE rxml_reader_local_name(VALUE self)
663
- {
664
- xmlTextReaderPtr xReader = rxml_text_reader_get(self);
665
- const xmlChar *result = xmlTextReaderConstLocalName(xReader);
666
- const xmlChar *xencoding = xmlTextReaderConstEncoding(xReader);
667
-
668
- return (result == NULL ? Qnil : rxml_new_cstr(result, xencoding));
669
- }
670
-
671
- /*
672
- * call-seq:
673
- * reader.attribute_count -> count
674
- *
675
- * Provide the number of attributes of the current node.
676
- */
677
- static VALUE rxml_reader_attr_count(VALUE self)
678
- {
679
- xmlTextReaderPtr xreader = rxml_text_reader_get(self);
680
- return INT2FIX(xmlTextReaderAttributeCount(xreader));
681
- }
682
-
683
- /*
684
- * call-seq:
685
- * reader.encoding -> XML::Encoding::UTF_8
686
- *
687
- * Returns the encoding of the document being read. Note you
688
- * first have to read data from the reader for encoding
689
- * to return a value
690
- *
691
- * reader = XML::Reader.file(XML_FILE)
692
- * assert_nil(reader.encoding)
693
- * reader.read
694
- * assert_equal(XML::Encoding::UTF_8, reader.encoding)
695
- *
696
- * In addition, libxml always appears to return nil for the encoding
697
- * when parsing strings.
698
- */
699
- static VALUE rxml_reader_encoding(VALUE self)
700
- {
701
- xmlTextReaderPtr xreader = rxml_text_reader_get(self);
702
- const xmlChar *xencoding = xmlTextReaderConstEncoding(xreader);
703
- if (xencoding)
704
- return INT2NUM(xmlParseCharEncoding((const char*)xencoding));
705
- else
706
- return INT2NUM(XML_CHAR_ENCODING_NONE);
707
- }
708
-
709
- /*
710
- * call-seq:
711
- * reader.base_uri -> URI
712
- *
713
- * Determine the base URI of the node.
714
- */
715
- static VALUE rxml_reader_base_uri(VALUE self)
716
- {
717
- xmlTextReaderPtr xReader = rxml_text_reader_get(self);
718
- const xmlChar *result = xmlTextReaderConstBaseUri(xReader);
719
- const xmlChar *xencoding = xmlTextReaderConstEncoding(xReader);
720
-
721
- return (result == NULL ? Qnil : rxml_new_cstr(result, xencoding));
722
- }
723
-
724
- /*
725
- * call-seq:
726
- * reader.namespace_uri -> URI
727
- *
728
- * Determine the namespace URI of the node.
729
- */
730
- static VALUE rxml_reader_namespace_uri(VALUE self)
731
- {
732
- xmlTextReaderPtr xReader = rxml_text_reader_get(self);
733
- const xmlChar *result = xmlTextReaderConstNamespaceUri(xReader);
734
- const xmlChar *xencoding = xmlTextReaderConstEncoding(xReader);
735
-
736
- return (result == NULL ? Qnil : rxml_new_cstr(result, xencoding));
737
- }
738
-
739
- /*
740
- * call-seq:
741
- * reader.value -> text
742
- *
743
- * Provide the text value of the node if present.
744
- */
745
- static VALUE rxml_reader_value(VALUE self)
746
- {
747
- xmlTextReaderPtr xReader = rxml_text_reader_get(self);
748
- const xmlChar *result = xmlTextReaderConstValue(xReader);
749
- const xmlChar *xencoding = xmlTextReaderConstEncoding(xReader);
750
-
751
- return (result == NULL ? Qnil : rxml_new_cstr(result, xencoding));
752
- }
753
-
754
- /*
755
- * call-seq:
756
- * reader.prefix -> prefix
757
- *
758
- * Get a shorthand reference to the namespace associated with the node.
759
- */
760
- static VALUE rxml_reader_prefix(VALUE self)
761
- {
762
- xmlTextReaderPtr xReader = rxml_text_reader_get(self);
763
- const xmlChar *result = xmlTextReaderConstPrefix(xReader);
764
- const xmlChar *xencoding = xmlTextReaderConstEncoding(xReader);
765
-
766
- return (result == NULL ? Qnil : rxml_new_cstr(result, xencoding));
767
- }
768
-
769
- /*
770
- * call-seq:
771
- * reader.depth -> depth
772
- *
773
- * Get the depth of the node in the tree.
774
- */
775
- static VALUE rxml_reader_depth(VALUE self)
776
- {
777
- xmlTextReaderPtr xreader = rxml_text_reader_get(self);
778
- return INT2FIX(xmlTextReaderDepth(xreader));
779
- }
780
-
781
- /*
782
- * call-seq:
783
- * reader.quote_char -> char
784
- *
785
- * Get the quotation mark character used to enclose the value of an attribute,
786
- * as an integer value (and -1 in case of error).
787
- */
788
- static VALUE rxml_reader_quote_char(VALUE self)
789
- {
790
- xmlTextReaderPtr xreader = rxml_text_reader_get(self);
791
- return INT2FIX(xmlTextReaderQuoteChar(xreader));
792
- }
793
-
794
- /*
795
- * call-seq:
796
- * reader.standalone -> code
797
- *
798
- * Determine the standalone status of the document being read.
799
- *
800
- * Return 1 if the document was declared to be standalone, 0 if it was
801
- * declared to be not standalone, or -1 if the document did not specify its
802
- * standalone status or in case of error.
803
- */
804
- static VALUE rxml_reader_standalone(VALUE self)
805
- {
806
- xmlTextReaderPtr xreader = rxml_text_reader_get(self);
807
- return INT2FIX(xmlTextReaderStandalone(xreader));
808
- }
809
-
810
- /*
811
- * call-seq:
812
- * reader.xml_lang -> value
813
- *
814
- * Get the xml:lang scope within which the node resides.
815
- */
816
- static VALUE rxml_reader_xml_lang(VALUE self)
817
- {
818
- xmlTextReaderPtr xReader = rxml_text_reader_get(self);
819
- const xmlChar *result = xmlTextReaderConstXmlLang(xReader);
820
- const xmlChar *xencoding = xmlTextReaderConstEncoding(xReader);
821
-
822
- return (result == NULL ? Qnil : rxml_new_cstr(result, xencoding));
823
- }
824
-
825
- /*
826
- * call-seq:
827
- * reader.xml_version -> version
828
- *
829
- * Determine the XML version of the document being read.
830
- */
831
- static VALUE rxml_reader_xml_version(VALUE self)
832
- {
833
- xmlTextReaderPtr xReader = rxml_text_reader_get(self);
834
- const xmlChar *result = xmlTextReaderConstXmlVersion(xReader);
835
- const xmlChar *xencoding = xmlTextReaderConstEncoding(xReader);
836
-
837
- return (result == NULL ? Qnil : rxml_new_cstr(result, xencoding));
838
- }
839
-
840
- /*
841
- * call-seq:
842
- * reader.has_attributes? -> bool
843
- *
844
- * Get whether the node has attributes.
845
- */
846
- static VALUE rxml_reader_has_attributes(VALUE self)
847
- {
848
- xmlTextReaderPtr xreader = rxml_text_reader_get(self);
849
- return xmlTextReaderHasAttributes(xreader) ? Qtrue : Qfalse;
850
- }
851
-
852
- /*
853
- * call-seq:
854
- * reader.has_value? -> bool
855
- *
856
- * Get whether the node can have a text value.
857
- */
858
- static VALUE rxml_reader_has_value(VALUE self)
859
- {
860
- xmlTextReaderPtr xreader = rxml_text_reader_get(self);
861
- return xmlTextReaderHasValue(xreader) ? Qtrue : Qfalse;
862
- }
863
-
864
- /*
865
- * call-seq:
866
- * reader[key] -> value
867
- *
868
- * Provide the value of the attribute with the specified index (if +key+ is an
869
- * integer) or with the specified name (if +key+ is a string) relative to the
870
- * containing element, as a string.
871
- */
872
- static VALUE rxml_reader_attribute(VALUE self, VALUE key)
873
- {
874
- VALUE result = Qnil;
875
- xmlChar *xattr;
876
- xmlTextReaderPtr xReader = rxml_text_reader_get(self);
877
- const xmlChar *xencoding = xmlTextReaderConstEncoding(xReader);
878
-
879
- if (TYPE(key) == T_FIXNUM)
880
- {
881
- xattr = xmlTextReaderGetAttributeNo(xReader, FIX2INT(key));
882
- }
883
- else
884
- {
885
- xattr = xmlTextReaderGetAttribute(xReader, (const xmlChar *) StringValueCStr(key));
886
- }
887
-
888
- if (xattr)
889
- {
890
- result = rxml_new_cstr(xattr, xencoding);
891
- xmlFree(xattr);
892
- }
893
- return result;
894
- }
895
-
896
- /*
897
- * call-seq:
898
- * reader.get_attribute(localName) -> value
899
- *
900
- * Provide the value of the attribute with the specified name
901
- * relative to the containing element.
902
- */
903
- static VALUE rxml_reader_get_attribute(VALUE self, VALUE name)
904
- {
905
- VALUE result = Qnil;
906
- xmlChar *xattr;
907
- xmlTextReaderPtr xReader = rxml_text_reader_get(self);
908
- const xmlChar *xencoding = xmlTextReaderConstEncoding(xReader);
909
-
910
- xattr = xmlTextReaderGetAttribute(xReader, (const xmlChar *) StringValueCStr(name));
911
- if (xattr)
912
- {
913
- result = rxml_new_cstr(xattr, xencoding);
914
- xmlFree(xattr);
915
- }
916
- return result;
917
- }
918
-
919
- /*
920
- * call-seq:
921
- * reader.get_attribute_no(index) -> value
922
- *
923
- * Provide the value of the attribute with the specified index
924
- * relative to the containing element.
925
- */
926
- static VALUE rxml_reader_get_attribute_no(VALUE self, VALUE index)
927
- {
928
- VALUE result = Qnil;
929
- xmlChar *xattr;
930
- xmlTextReaderPtr xReader = rxml_text_reader_get(self);
931
- const xmlChar *xencoding = xmlTextReaderConstEncoding(xReader);
932
-
933
- xattr = xmlTextReaderGetAttributeNo(xReader, FIX2INT(index));
934
- if (xattr)
935
- {
936
- result = rxml_new_cstr(xattr, xencoding);
937
- xmlFree(xattr);
938
- }
939
- return result;
940
- }
941
-
942
- static VALUE rxml_reader_get_attribute_ns(VALUE self, VALUE name, VALUE ns)
943
- {
944
- VALUE result = Qnil;
945
- xmlChar *xattr;
946
- xmlTextReaderPtr xReader = rxml_text_reader_get(self);
947
- const xmlChar *xencoding = xmlTextReaderConstEncoding(xReader);
948
-
949
- xattr = xmlTextReaderGetAttributeNs(xReader,
950
- (const xmlChar *) StringValueCStr(name),
951
- (const xmlChar *) StringValueCStr(ns));
952
- if (xattr)
953
- {
954
- result = rxml_new_cstr(xattr, xencoding);
955
- xmlFree(xattr);
956
- }
957
- return result;
958
- }
959
-
960
- /*
961
- * call-seq:
962
- * reader.lookup_namespace(prefix) -> value
963
- *
964
- * Resolve a namespace prefix in the scope of the current element.
965
- * To return the default namespace, specify nil as +prefix+.
966
- */
967
- static VALUE rxml_reader_lookup_namespace(VALUE self, VALUE prefix)
968
- {
969
- VALUE result = Qnil;
970
- xmlTextReaderPtr xReader = rxml_text_reader_get(self);
971
- const xmlChar *xnamespace = xmlTextReaderLookupNamespace(xReader, (const xmlChar *) StringValueCStr(prefix));
972
- const xmlChar *xencoding = xmlTextReaderConstEncoding(xReader);
973
-
974
- if (xnamespace)
975
- {
976
- result = rxml_new_cstr(xnamespace, xencoding);
977
- xmlFree((void *)xnamespace);
978
- }
979
- return result;
980
- }
981
-
982
- /*
983
- * call-seq:
984
- * reader.expand -> node
985
- *
986
- * Returns the current node and its full subtree. Note the returned node
987
- * is valid ONLY until the next read call. If you would like to preserve
988
- * the node, or search it via xpath, call reader.doc first.
989
- */
990
- static VALUE rxml_reader_expand(VALUE self)
991
- {
992
- xmlTextReaderPtr xreader = rxml_text_reader_get(self);
993
- xmlNodePtr xnode = xmlTextReaderExpand(xreader);
994
-
995
- if (!xnode)
996
- {
997
- return Qnil;
998
- }
999
- else
1000
- {
1001
- /* We cannot call rxml_node_wrap here because its sets up a mark function
1002
- for the node. But according to the libxml docs (http://xmlsoft.org/html/libxml-xmlreader.html#xmlTextReaderExpand)
1003
- this is only valid until the next xmlTextReaderRead call. At that point the node is freed (from reading
1004
- the libxml2 source code. So don't set a mark or free function, because they will get called in the next
1005
- garbage collection run and cause a segfault.*/
1006
- return Data_Wrap_Struct(cXMLNode, NULL, NULL, xnode);
1007
- }
1008
- }
1009
-
1010
- /*
1011
- * call-seq:
1012
- * reader.document -> doc
1013
- *
1014
- * Hacking interface that provides access to the current document being accessed by the
1015
- * reader. NOTE: as a result of this call, the reader will not destroy the associated XML
1016
- * document. Instead, it will be destroyed when the returned document goes out of scope.
1017
- *
1018
- * Returns: document
1019
- */
1020
- static VALUE rxml_reader_doc(VALUE self)
1021
- {
1022
- VALUE result = Qnil;
1023
- xmlTextReaderPtr xreader = rxml_text_reader_get(self);
1024
- xmlDocPtr xdoc = xmlTextReaderCurrentDoc(xreader);
1025
-
1026
- if (!xdoc)
1027
- rb_raise(rb_eRuntimeError, "The reader does not have a document. Did you forget to call read?");
1028
-
1029
- result = rxml_document_wrap(xdoc);
1030
-
1031
- // And now hook in a mark function to keep the document alive as long as the reader is valid
1032
- RDATA(self)->dmark = (RUBY_DATA_FUNC)rxml_reader_mark;
1033
-
1034
- return result;
1035
- }
1036
-
1037
-
1038
-
1039
- #if LIBXML_VERSION >= 20618
1040
- /*
1041
- * call-seq:
1042
- * reader.byte_consumed -> value
1043
- *
1044
- * This method provides the current index of the parser used by the reader,
1045
- * relative to the start of the current entity.
1046
- */
1047
- static VALUE
1048
- rxml_reader_byte_consumed(VALUE self)
1049
- {
1050
- xmlTextReaderPtr xreader = rxml_text_reader_get(self);
1051
- return INT2NUM(xmlTextReaderByteConsumed(xreader));
1052
- }
1053
- #endif
1054
-
1055
- #if LIBXML_VERSION >= 20617
1056
- /*
1057
- * call-seq:
1058
- * reader.column_number -> number
1059
- *
1060
- * Provide the column number of the current parsing point.
1061
- */
1062
- static VALUE
1063
- rxml_reader_column_number(VALUE self)
1064
- {
1065
- xmlTextReaderPtr xreader = rxml_text_reader_get(self);
1066
- return INT2NUM(xmlTextReaderGetParserColumnNumber(xreader));
1067
- }
1068
-
1069
- /*
1070
- * call-seq:
1071
- * reader.line_number -> number
1072
- *
1073
- * Provide the line number of the current parsing point.
1074
- */
1075
- static VALUE
1076
- rxml_reader_line_number(VALUE self)
1077
- {
1078
- xmlTextReaderPtr xreader = rxml_text_reader_get(self);
1079
- return INT2NUM(xmlTextReaderGetParserLineNumber(xreader));
1080
- }
1081
- #endif
1082
-
1083
- /*
1084
- * call-seq:
1085
- * reader.default? -> bool
1086
- *
1087
- * Return whether an Attribute node was generated from the default value
1088
- * defined in the DTD or schema.
1089
- */
1090
- static VALUE rxml_reader_default(VALUE self)
1091
- {
1092
- xmlTextReaderPtr xreader = rxml_text_reader_get(self);
1093
- return xmlTextReaderIsDefault(xreader) ? Qtrue : Qfalse;
1094
- }
1095
-
1096
- /*
1097
- * call-seq:
1098
- * reader.namespace_declaration? -> bool
1099
- *
1100
- * Determine whether the current node is a namespace declaration rather than a
1101
- * regular attribute.
1102
- */
1103
- static VALUE rxml_reader_namespace_declaration(VALUE self)
1104
- {
1105
- xmlTextReaderPtr xreader = rxml_text_reader_get(self);
1106
- return xmlTextReaderIsNamespaceDecl(xreader) ? Qtrue : Qfalse;
1107
- }
1108
-
1109
- /*
1110
- * call-seq:
1111
- * reader.empty_element? -> bool
1112
- *
1113
- * Check if the current node is empty.
1114
- */
1115
- static VALUE rxml_reader_empty_element(VALUE self)
1116
- {
1117
- xmlTextReaderPtr xreader = rxml_text_reader_get(self);
1118
- return xmlTextReaderIsEmptyElement(xreader) ? Qtrue : Qfalse;
1119
- }
1120
-
1121
- /*
1122
- * call-seq:
1123
- * reader.valid? -> bool
1124
- *
1125
- * Retrieve the validity status from the parser context.
1126
- */
1127
- static VALUE rxml_reader_valid(VALUE self)
1128
- {
1129
- xmlTextReaderPtr xreader = rxml_text_reader_get(self);
1130
- return xmlTextReaderIsValid(xreader) ? Qtrue : Qfalse;
1131
- }
1132
-
1133
- void rxml_init_reader(void)
1134
- {
1135
- BASE_URI_SYMBOL = ID2SYM(rb_intern("base_uri"));
1136
- ENCODING_SYMBOL = ID2SYM(rb_intern("encoding"));
1137
- IO_ATTR = rb_intern("@io");
1138
- OPTIONS_SYMBOL = ID2SYM(rb_intern("options"));
1139
-
1140
- cXMLReader = rb_define_class_under(mXML, "Reader", rb_cObject);
1141
-
1142
- rb_define_singleton_method(cXMLReader, "document", rxml_reader_document, 1);
1143
- rb_define_singleton_method(cXMLReader, "file", rxml_reader_file, -1);
1144
- rb_define_singleton_method(cXMLReader, "io", rxml_reader_io, -1);
1145
- rb_define_singleton_method(cXMLReader, "string", rxml_reader_string, -1);
1146
-
1147
- rb_define_method(cXMLReader, "[]", rxml_reader_attribute, 1);
1148
- rb_define_method(cXMLReader, "attribute_count", rxml_reader_attr_count, 0);
1149
- rb_define_method(cXMLReader, "base_uri", rxml_reader_base_uri, 0);
1150
- #if LIBXML_VERSION >= 20618
1151
- rb_define_method(cXMLReader, "byte_consumed", rxml_reader_byte_consumed, 0);
1152
- #endif
1153
- rb_define_method(cXMLReader, "close", rxml_reader_close, 0);
1154
- #if LIBXML_VERSION >= 20617
1155
- rb_define_method(cXMLReader, "column_number", rxml_reader_column_number, 0);
1156
- #endif
1157
- rb_define_method(cXMLReader, "depth", rxml_reader_depth, 0);
1158
- rb_define_method(cXMLReader, "doc", rxml_reader_doc, 0);
1159
- rb_define_method(cXMLReader, "encoding", rxml_reader_encoding, 0);
1160
- rb_define_method(cXMLReader, "expand", rxml_reader_expand, 0);
1161
- rb_define_method(cXMLReader, "get_attribute", rxml_reader_get_attribute, 1);
1162
- rb_define_method(cXMLReader, "get_attribute_no", rxml_reader_get_attribute_no, 1);
1163
- rb_define_method(cXMLReader, "get_attribute_ns", rxml_reader_get_attribute_ns, 2);
1164
- rb_define_method(cXMLReader, "has_attributes?", rxml_reader_has_attributes, 0);
1165
- rb_define_method(cXMLReader, "has_value?", rxml_reader_has_value, 0);
1166
- #if LIBXML_VERSION >= 20617
1167
- rb_define_method(cXMLReader, "line_number", rxml_reader_line_number, 0);
1168
- #endif
1169
- rb_define_method(cXMLReader, "local_name", rxml_reader_local_name, 0);
1170
- rb_define_method(cXMLReader, "lookup_namespace", rxml_reader_lookup_namespace, 1);
1171
- rb_define_method(cXMLReader, "move_to_attribute", rxml_reader_move_to_attr, 1);
1172
- rb_define_method(cXMLReader, "move_to_attribute_no", rxml_reader_move_to_attr_no, 1);
1173
- rb_define_method(cXMLReader, "move_to_attribute_ns", rxml_reader_move_to_attr_ns, 2);
1174
- rb_define_method(cXMLReader, "move_to_first_attribute", rxml_reader_move_to_first_attr, 0);
1175
- rb_define_method(cXMLReader, "move_to_next_attribute", rxml_reader_move_to_next_attr, 0);
1176
- rb_define_method(cXMLReader, "move_to_element", rxml_reader_move_to_element, 0);
1177
- rb_define_method(cXMLReader, "name", rxml_reader_name, 0);
1178
- rb_define_method(cXMLReader, "namespace_uri", rxml_reader_namespace_uri, 0);
1179
- rb_define_method(cXMLReader, "next", rxml_reader_next, 0);
1180
- rb_define_method(cXMLReader, "next_sibling", rxml_reader_next_sibling, 0);
1181
- rb_define_method(cXMLReader, "node", rxml_reader_node, 0);
1182
- rb_define_method(cXMLReader, "node_type", rxml_reader_node_type, 0);
1183
- rb_define_method(cXMLReader, "normalization", rxml_reader_normalization, 0);
1184
- rb_define_method(cXMLReader, "prefix", rxml_reader_prefix, 0);
1185
- rb_define_method(cXMLReader, "quote_char", rxml_reader_quote_char, 0);
1186
- rb_define_method(cXMLReader, "read", rxml_reader_read, 0);
1187
- rb_define_method(cXMLReader, "read_attribute_value", rxml_reader_read_attr_value, 0);
1188
- rb_define_method(cXMLReader, "read_inner_xml", rxml_reader_read_inner_xml, 0);
1189
- rb_define_method(cXMLReader, "read_outer_xml", rxml_reader_read_outer_xml, 0);
1190
- rb_define_method(cXMLReader, "read_state", rxml_reader_read_state, 0);
1191
- rb_define_method(cXMLReader, "read_string", rxml_reader_read_string, 0);
1192
- rb_define_method(cXMLReader, "relax_ng_validate", rxml_reader_relax_ng_validate, 1);
1193
- rb_define_method(cXMLReader, "standalone", rxml_reader_standalone, 0);
1194
- #if LIBXML_VERSION >= 20620
1195
- rb_define_method(cXMLReader, "schema_validate", rxml_reader_schema_validate, 1);
1196
- #endif
1197
- rb_define_method(cXMLReader, "value", rxml_reader_value, 0);
1198
- rb_define_method(cXMLReader, "xml_lang", rxml_reader_xml_lang, 0);
1199
- rb_define_method(cXMLReader, "xml_version", rxml_reader_xml_version, 0);
1200
- rb_define_method(cXMLReader, "default?", rxml_reader_default, 0);
1201
- rb_define_method(cXMLReader, "empty_element?", rxml_reader_empty_element, 0);
1202
- rb_define_method(cXMLReader, "namespace_declaration?", rxml_reader_namespace_declaration, 0);
1203
- rb_define_method(cXMLReader, "valid?", rxml_reader_valid, 0);
1204
-
1205
- /* Constants */
1206
- rb_define_const(cXMLReader, "LOADDTD", INT2FIX(XML_PARSER_LOADDTD));
1207
- rb_define_const(cXMLReader, "DEFAULTATTRS", INT2FIX(XML_PARSER_DEFAULTATTRS));
1208
- rb_define_const(cXMLReader, "VALIDATE", INT2FIX(XML_PARSER_VALIDATE));
1209
- rb_define_const(cXMLReader, "SUBST_ENTITIES", INT2FIX(XML_PARSER_SUBST_ENTITIES));
1210
-
1211
- rb_define_const(cXMLReader, "SEVERITY_VALIDITY_WARNING", INT2FIX(XML_PARSER_SEVERITY_VALIDITY_WARNING));
1212
- rb_define_const(cXMLReader, "SEVERITY_VALIDITY_ERROR", INT2FIX(XML_PARSER_SEVERITY_VALIDITY_ERROR));
1213
- rb_define_const(cXMLReader, "SEVERITY_WARNING", INT2FIX(XML_PARSER_SEVERITY_WARNING));
1214
- rb_define_const(cXMLReader, "SEVERITY_ERROR", INT2FIX(XML_PARSER_SEVERITY_ERROR));
1215
-
1216
- rb_define_const(cXMLReader, "TYPE_NONE", INT2FIX(XML_READER_TYPE_NONE));
1217
- rb_define_const(cXMLReader, "TYPE_ELEMENT", INT2FIX(XML_READER_TYPE_ELEMENT));
1218
- rb_define_const(cXMLReader, "TYPE_ATTRIBUTE", INT2FIX(XML_READER_TYPE_ATTRIBUTE));
1219
- rb_define_const(cXMLReader, "TYPE_TEXT", INT2FIX(XML_READER_TYPE_TEXT));
1220
- rb_define_const(cXMLReader, "TYPE_CDATA", INT2FIX(XML_READER_TYPE_CDATA));
1221
- rb_define_const(cXMLReader, "TYPE_ENTITY_REFERENCE", INT2FIX(XML_READER_TYPE_ENTITY_REFERENCE));
1222
- rb_define_const(cXMLReader, "TYPE_ENTITY", INT2FIX(XML_READER_TYPE_ENTITY));
1223
- rb_define_const(cXMLReader, "TYPE_PROCESSING_INSTRUCTION", INT2FIX(XML_READER_TYPE_PROCESSING_INSTRUCTION));
1224
- rb_define_const(cXMLReader, "TYPE_COMMENT", INT2FIX(XML_READER_TYPE_COMMENT));
1225
- rb_define_const(cXMLReader, "TYPE_DOCUMENT", INT2FIX(XML_READER_TYPE_DOCUMENT));
1226
- rb_define_const(cXMLReader, "TYPE_DOCUMENT_TYPE", INT2FIX(XML_READER_TYPE_DOCUMENT_TYPE));
1227
- rb_define_const(cXMLReader, "TYPE_DOCUMENT_FRAGMENT", INT2FIX(XML_READER_TYPE_DOCUMENT_FRAGMENT));
1228
- rb_define_const(cXMLReader, "TYPE_NOTATION", INT2FIX(XML_READER_TYPE_NOTATION));
1229
- rb_define_const(cXMLReader, "TYPE_WHITESPACE", INT2FIX(XML_READER_TYPE_WHITESPACE));
1230
- rb_define_const(cXMLReader, "TYPE_SIGNIFICANT_WHITESPACE", INT2FIX(XML_READER_TYPE_SIGNIFICANT_WHITESPACE));
1231
- rb_define_const(cXMLReader, "TYPE_END_ELEMENT", INT2FIX(XML_READER_TYPE_END_ELEMENT));
1232
- rb_define_const(cXMLReader, "TYPE_END_ENTITY", INT2FIX(XML_READER_TYPE_END_ENTITY));
1233
- rb_define_const(cXMLReader, "TYPE_XML_DECLARATION", INT2FIX(XML_READER_TYPE_XML_DECLARATION));
1234
-
1235
- /* Read states */
1236
- rb_define_const(cXMLReader, "MODE_INITIAL", INT2FIX(XML_TEXTREADER_MODE_INITIAL));
1237
- rb_define_const(cXMLReader, "MODE_INTERACTIVE", INT2FIX(XML_TEXTREADER_MODE_INTERACTIVE));
1238
- rb_define_const(cXMLReader, "MODE_ERROR", INT2FIX(XML_TEXTREADER_MODE_ERROR));
1239
- rb_define_const(cXMLReader, "MODE_EOF", INT2FIX(XML_TEXTREADER_MODE_EOF));
1240
- rb_define_const(cXMLReader, "MODE_CLOSED", INT2FIX(XML_TEXTREADER_MODE_CLOSED));
1241
- rb_define_const(cXMLReader, "MODE_READING", INT2FIX(XML_TEXTREADER_MODE_READING));
1242
- }
1
+ /* Copyright (c) 2006-2007 Apple Inc.
2
+ * Please see the LICENSE file for copyright and distribution information. */
3
+
4
+ #include "ruby_libxml.h"
5
+ #include "ruby_xml_reader.h"
6
+
7
+ #include <libxml/xmlreader.h>
8
+ #include <libxml/xmlschemas.h>
9
+
10
+ /*
11
+ * Document-class: LibXML::XML::Reader
12
+ *
13
+ * The XML::Reader class provides a simpler, alternative way of parsing an XML
14
+ * document in contrast to XML::Parser or XML::SaxParser. A XML::Reader instance
15
+ * acts like a cursor going forward in a document stream, stopping at each node
16
+ * it encounters. To advance to the next node, simply cadd XML::Reader#read.
17
+ *
18
+ * The XML::Reader API closely matches the DOM Core specification and supports
19
+ * namespaces, xml:base, entity handling and DTDs.
20
+ *
21
+ * To summarize, XML::Reader provides a far simpler API to use versus XML::SaxParser
22
+ * and is more memory efficient than using XML::Parser to create a DOM tree.
23
+ *
24
+ * Example:
25
+ *
26
+ * reader = XML::Reader.string("<foo><bar>1</bar><bar>2</bar><bar>3</bar></foo>")
27
+ * reader.read
28
+ * assert_equal('foo', reader.name)
29
+ * assert_nil(reader.value)
30
+ *
31
+ * 3.times do |i|
32
+ * reader.read
33
+ * assert_equal(XML::Reader::TYPE_ELEMENT, reader.node_type)
34
+ * assert_equal('bar', reader.name)
35
+ * reader.read
36
+ * assert_equal(XML::Reader::TYPE_TEXT, reader.node_type)
37
+ * assert_equal((i + 1).to_s, reader.value)
38
+ * reader.read
39
+ * assert_equal(XML::Reader::TYPE_END_ELEMENT, reader.node_type)
40
+ * end
41
+ *
42
+ * You can also parse documents (see XML::Reader.document),
43
+ * strings (see XML::Parser.string) and io objects (see
44
+ * XML::Parser.io).
45
+ *
46
+ * For a more in depth tutorial, albeit in C, see http://xmlsoft.org/xmlreader.html.*/
47
+
48
+
49
+ /* NOTE - We need to wrap the readers document to support Reader.read.node.find('/').
50
+ To do this we need to use xmlTextReaderCurrentDoc which means we have to free the
51
+ document ourselves. Annoying... */
52
+
53
+ VALUE cXMLReader;
54
+
55
+ static ID BASE_URI_SYMBOL;
56
+ static ID ENCODING_SYMBOL;
57
+ static ID IO_ATTR;
58
+ static ID OPTIONS_SYMBOL;
59
+
60
+ static void rxml_reader_free(xmlTextReaderPtr xreader)
61
+ {
62
+ xmlFreeTextReader(xreader);
63
+ }
64
+
65
+ static void rxml_reader_mark(xmlTextReaderPtr xreader)
66
+ {
67
+ xmlDocPtr xdoc = xmlTextReaderCurrentDoc(xreader);
68
+ VALUE doc = (VALUE)xdoc->_private;
69
+ rb_gc_mark(doc);
70
+ }
71
+
72
+ static VALUE rxml_reader_wrap(xmlTextReaderPtr xreader)
73
+ {
74
+ return Data_Wrap_Struct(cXMLReader, NULL, rxml_reader_free, xreader);
75
+ }
76
+
77
+
78
+ static xmlTextReaderPtr rxml_text_reader_get(VALUE obj)
79
+ {
80
+ xmlTextReaderPtr xreader;
81
+ Data_Get_Struct(obj, xmlTextReader, xreader);
82
+ return xreader;
83
+ }
84
+
85
+ /*
86
+ * call-seq:
87
+ * XML::Reader.document(doc) -> XML::Reader
88
+ *
89
+ * Create an new reader for the specified document.
90
+ */
91
+ VALUE rxml_reader_document(VALUE klass, VALUE doc)
92
+ {
93
+ xmlDocPtr xdoc;
94
+ xmlTextReaderPtr xreader;
95
+
96
+ Data_Get_Struct(doc, xmlDoc, xdoc);
97
+
98
+ xreader = xmlReaderWalker(xdoc);
99
+
100
+ if (xreader == NULL)
101
+ rxml_raise(&xmlLastError);
102
+
103
+ return rxml_reader_wrap(xreader);
104
+ }
105
+
106
+ /* call-seq:
107
+ * XML::Reader.file(path) -> XML::Reader
108
+ * XML::Reader.file(path, :encoding => XML::Encoding::UTF_8,
109
+ * :options => XML::Parser::Options::NOENT) -> XML::Parser
110
+ *
111
+ * Creates a new reader by parsing the specified file or uri.
112
+ *
113
+ * You may provide an optional hash table to control how the
114
+ * parsing is performed. Valid options are:
115
+ *
116
+ * encoding - The document encoding, defaults to nil. Valid values
117
+ * are the encoding constants defined on XML::Encoding.
118
+ * options - Controls the execution of the parser, defaults to 0.
119
+ * Valid values are the constants defined on
120
+ * XML::Parser::Options. Mutliple options can be combined
121
+ * by using Bitwise OR (|).
122
+ */
123
+ static VALUE rxml_reader_file(int argc, VALUE *argv, VALUE klass)
124
+ {
125
+ xmlTextReaderPtr xreader;
126
+ VALUE path;
127
+ VALUE options;
128
+
129
+ const char *xencoding = NULL;
130
+ int xoptions = 0;
131
+
132
+ rb_scan_args(argc, argv, "11", &path, &options);
133
+ Check_Type(path, T_STRING);
134
+
135
+ if (!NIL_P(options))
136
+ {
137
+ VALUE encoding = Qnil;
138
+ VALUE parserOptions = Qnil;
139
+
140
+ Check_Type(options, T_HASH);
141
+
142
+ encoding = rb_hash_aref(options, BASE_URI_SYMBOL);
143
+ xencoding = NIL_P(encoding) ? NULL : xmlGetCharEncodingName(NUM2INT(encoding));
144
+
145
+ parserOptions = rb_hash_aref(options, OPTIONS_SYMBOL);
146
+ xoptions = NIL_P(parserOptions) ? 0 : NUM2INT(parserOptions);
147
+ }
148
+
149
+ xreader = xmlReaderForFile(StringValueCStr(path), xencoding, xoptions);
150
+
151
+ if (xreader == NULL)
152
+ rxml_raise(&xmlLastError);
153
+
154
+ return rxml_reader_wrap(xreader);
155
+ }
156
+
157
+ /* call-seq:
158
+ * XML::Reader.io(io) -> XML::Reader
159
+ * XML::Reader.io(io, :encoding => XML::Encoding::UTF_8,
160
+ * :options => XML::Parser::Options::NOENT) -> XML::Parser
161
+ *
162
+ * Creates a new reader by parsing the specified io object.
163
+ *
164
+ * You may provide an optional hash table to control how the
165
+ * parsing is performed. Valid options are:
166
+ *
167
+ * base_uri - The base url for the parsed document.
168
+ * encoding - The document encoding, defaults to nil. Valid values
169
+ * are the encoding constants defined on XML::Encoding.
170
+ * options - Controls the execution of the parser, defaults to 0.
171
+ * Valid values are the constants defined on
172
+ * XML::Parser::Options. Mutliple options can be combined
173
+ * by using Bitwise OR (|).
174
+ */
175
+ static VALUE rxml_reader_io(int argc, VALUE *argv, VALUE klass)
176
+ {
177
+ xmlTextReaderPtr xreader;
178
+ VALUE result;
179
+ VALUE io;
180
+ VALUE options;
181
+ char *xbaseurl = NULL;
182
+ const char *xencoding = NULL;
183
+ int xoptions = 0;
184
+
185
+ rb_scan_args(argc, argv, "11", &io, &options);
186
+
187
+ if (!NIL_P(options))
188
+ {
189
+ VALUE baseurl = Qnil;
190
+ VALUE encoding = Qnil;
191
+ VALUE parserOptions = Qnil;
192
+
193
+ Check_Type(options, T_HASH);
194
+
195
+ baseurl = rb_hash_aref(options, BASE_URI_SYMBOL);
196
+ xbaseurl = NIL_P(baseurl) ? NULL : StringValueCStr(baseurl);
197
+
198
+ encoding = rb_hash_aref(options, ENCODING_SYMBOL);
199
+ xencoding = NIL_P(encoding) ? NULL : xmlGetCharEncodingName(NUM2INT(encoding));
200
+
201
+ parserOptions = rb_hash_aref(options, OPTIONS_SYMBOL);
202
+ xoptions = NIL_P(parserOptions) ? 0 : NUM2INT(parserOptions);
203
+ }
204
+
205
+ xreader = xmlReaderForIO((xmlInputReadCallback) rxml_read_callback, NULL,
206
+ (void *) io,
207
+ xbaseurl, xencoding, xoptions);
208
+
209
+ if (xreader == NULL)
210
+ rxml_raise(&xmlLastError);
211
+
212
+ result = rxml_reader_wrap(xreader);
213
+
214
+ /* Attach io object to parser so it won't get freed.*/
215
+ rb_ivar_set(result, IO_ATTR, io);
216
+
217
+ return result;
218
+ }
219
+
220
+ /* call-seq:
221
+ * XML::Reader.string(io) -> XML::Reader
222
+ * XML::Reader.string(io, :encoding => XML::Encoding::UTF_8,
223
+ * :options => XML::Parser::Options::NOENT) -> XML::Parser
224
+ *
225
+ * Creates a new reader by parsing the specified string.
226
+ *
227
+ * You may provide an optional hash table to control how the
228
+ * parsing is performed. Valid options are:
229
+ *
230
+ * base_uri - The base url for the parsed document.
231
+ * encoding - The document encoding, defaults to nil. Valid values
232
+ * are the encoding constants defined on XML::Encoding.
233
+ * options - Controls the execution of the parser, defaults to 0.
234
+ * Valid values are the constants defined on
235
+ * XML::Parser::Options. Mutliple options can be combined
236
+ * by using Bitwise OR (|).
237
+ */
238
+ static VALUE rxml_reader_string(int argc, VALUE *argv, VALUE klass)
239
+ {
240
+ xmlTextReaderPtr xreader;
241
+ VALUE string;
242
+ VALUE options;
243
+ char *xbaseurl = NULL;
244
+ const char *xencoding = NULL;
245
+ int xoptions = 0;
246
+
247
+ rb_scan_args(argc, argv, "11", &string, &options);
248
+ Check_Type(string, T_STRING);
249
+
250
+ if (!NIL_P(options))
251
+ {
252
+ VALUE baseurl = Qnil;
253
+ VALUE encoding = Qnil;
254
+ VALUE parserOptions = Qnil;
255
+
256
+ Check_Type(options, T_HASH);
257
+
258
+ baseurl = rb_hash_aref(options, BASE_URI_SYMBOL);
259
+ xbaseurl = NIL_P(baseurl) ? NULL : StringValueCStr(baseurl);
260
+
261
+ encoding = rb_hash_aref(options, ENCODING_SYMBOL);
262
+ xencoding = NIL_P(encoding) ? NULL : xmlGetCharEncodingName(NUM2INT(encoding));
263
+
264
+ parserOptions = rb_hash_aref(options, OPTIONS_SYMBOL);
265
+ xoptions = NIL_P(parserOptions) ? 0 : NUM2INT(parserOptions);
266
+ }
267
+
268
+ xreader = xmlReaderForMemory(StringValueCStr(string), (int)RSTRING_LEN(string),
269
+ xbaseurl, xencoding, xoptions);
270
+
271
+ if (xreader == NULL)
272
+ rxml_raise(&xmlLastError);
273
+
274
+ return rxml_reader_wrap(xreader);
275
+ }
276
+
277
+ /*
278
+ * call-seq:
279
+ * reader.close -> code
280
+ *
281
+ * This method releases any resources allocated by the current instance
282
+ * changes the state to Closed and close any underlying input.
283
+ */
284
+ static VALUE rxml_reader_close(VALUE self)
285
+ {
286
+ xmlTextReaderPtr xreader = rxml_text_reader_get(self);
287
+ return INT2FIX(xmlTextReaderClose(xreader));
288
+ }
289
+
290
+ /*
291
+ * call-seq:
292
+ * reader.move_to_attribute_no(index) -> code
293
+ *
294
+ * Move the position of the current instance to the attribute with the
295
+ * specified index relative to the containing element.
296
+ */
297
+ static VALUE rxml_reader_move_to_attr_no(VALUE self, VALUE index)
298
+ {
299
+ int ret;
300
+ xmlTextReaderPtr xreader;
301
+
302
+ xreader = rxml_text_reader_get(self);
303
+ ret = xmlTextReaderMoveToAttributeNo(xreader, FIX2INT(index));
304
+
305
+ return INT2FIX(ret);
306
+ }
307
+
308
+ /*
309
+ * call-seq:
310
+ * reader.move_to_attribute(localName) -> code
311
+ *
312
+ * Move the position of the current instance to the attribute with the
313
+ * specified name relative to the containing element.
314
+ */
315
+ static VALUE rxml_reader_move_to_attr(VALUE self, VALUE val)
316
+ {
317
+ int ret;
318
+ xmlTextReaderPtr xreader;
319
+
320
+ xreader = rxml_text_reader_get(self);
321
+ ret = xmlTextReaderMoveToAttribute(xreader,
322
+ (const xmlChar *) StringValueCStr(val));
323
+
324
+ return INT2FIX(ret);
325
+ }
326
+
327
+ /*
328
+ * call-seq:
329
+ * reader.move_to_attribute_ns(localName, namespaceURI) -> code
330
+ *
331
+ * Move the position of the current instance to the attribute with the
332
+ * specified name and namespace relative to the containing element.
333
+ */
334
+ static VALUE rxml_reader_move_to_attr_ns(VALUE self, VALUE name, VALUE ns)
335
+ {
336
+ int ret;
337
+ xmlTextReaderPtr xreader;
338
+
339
+ xreader = rxml_text_reader_get(self);
340
+ ret = xmlTextReaderMoveToAttributeNs(xreader,
341
+ (const xmlChar *) StringValueCStr(name),
342
+ (const xmlChar *) StringValueCStr(ns));
343
+
344
+ return INT2FIX(ret);
345
+ }
346
+
347
+ /*
348
+ * call-seq:
349
+ * reader.move_to_first_attribute -> code
350
+ *
351
+ * Move the position of the current instance to the first attribute associated
352
+ * with the current node.
353
+ */
354
+ static VALUE rxml_reader_move_to_first_attr(VALUE self)
355
+ {
356
+ xmlTextReaderPtr xreader = rxml_text_reader_get(self);
357
+ return INT2FIX(xmlTextReaderMoveToFirstAttribute(xreader));
358
+ }
359
+
360
+ /*
361
+ * call-seq:
362
+ * reader.move_to_next_attribute -> code
363
+ *
364
+ * Move the position of the current instance to the next attribute associated
365
+ * with the current node.
366
+ */
367
+ static VALUE rxml_reader_move_to_next_attr(VALUE self)
368
+ {
369
+ xmlTextReaderPtr xreader = rxml_text_reader_get(self);
370
+ return INT2FIX(xmlTextReaderMoveToNextAttribute(xreader));
371
+ }
372
+
373
+ /*
374
+ * call-seq:
375
+ * reader.move_to_element -> code
376
+ *
377
+ * Move the position of the current instance to the node that contains the
378
+ * current attribute node.
379
+ */
380
+ static VALUE rxml_reader_move_to_element(VALUE self)
381
+ {
382
+ xmlTextReaderPtr xreader = rxml_text_reader_get(self);
383
+ return INT2FIX(xmlTextReaderMoveToElement(xreader));
384
+ }
385
+
386
+ /*
387
+ * call-seq:
388
+ * reader.next -> code
389
+ *
390
+ * Skip to the node following the current one in document order while avoiding
391
+ * the subtree if any.
392
+ */
393
+ static VALUE rxml_reader_next(VALUE self)
394
+ {
395
+ xmlTextReaderPtr xreader = rxml_text_reader_get(self);
396
+ return INT2FIX(xmlTextReaderNext(xreader));
397
+ }
398
+
399
+ /*
400
+ * call-seq:
401
+ * reader.next_sibling -> code
402
+ *
403
+ * Skip to the node following the current one in document order while avoiding
404
+ * the subtree if any. Currently implemented only for Readers built on a
405
+ * document.
406
+ */
407
+ static VALUE rxml_reader_next_sibling(VALUE self)
408
+ {
409
+ xmlTextReaderPtr xreader = rxml_text_reader_get(self);
410
+ return INT2FIX(xmlTextReaderNextSibling(xreader));
411
+ }
412
+
413
+ /*
414
+ * call-seq:
415
+ * reader.node -> XML::Node
416
+ *
417
+ * Returns the reader's current node. It will return
418
+ * nil if Reader#read has not yet been called.
419
+ * WARNING - Using this method is dangerous because the
420
+ * the node may be destroyed on the next #read.
421
+ */
422
+ static VALUE rxml_reader_node(VALUE self)
423
+ {
424
+ xmlTextReaderPtr xreader = rxml_text_reader_get(self);
425
+ xmlNodePtr xnode = xmlTextReaderCurrentNode(xreader);
426
+ return xnode ? rxml_node_wrap(xnode) : Qnil;
427
+ }
428
+
429
+ /*
430
+ * call-seq:
431
+ * reader.node_type -> type
432
+ *
433
+ * Get the node type of the current node. Reference:
434
+ * http://dotgnu.org/pnetlib-doc/System/Xml/XmlNodeType.html
435
+ */
436
+ static VALUE rxml_reader_node_type(VALUE self)
437
+ {
438
+ xmlTextReaderPtr xreader = rxml_text_reader_get(self);
439
+ return INT2FIX(xmlTextReaderNodeType(xreader));
440
+ }
441
+
442
+ /*
443
+ * call-seq:
444
+ * reader.normalization -> value
445
+ *
446
+ * The value indicating whether to normalize white space and attribute values.
447
+ * Since attribute value and end of line normalizations are a MUST in the XML
448
+ * specification only the value true is accepted. The broken bahaviour of
449
+ * accepting out of range character entities like &#0; is of course not
450
+ * supported either.
451
+ *
452
+ * Return 1 or -1 in case of error.
453
+ */
454
+ static VALUE rxml_reader_normalization(VALUE self)
455
+ {
456
+ xmlTextReaderPtr xreader = rxml_text_reader_get(self);
457
+ return INT2FIX(xmlTextReaderNormalization(xreader));
458
+ }
459
+
460
+ /*
461
+ * call-seq:
462
+ * reader.read -> nil|true|false
463
+ *
464
+ * Causes the reader to move to the next node in the stream, exposing its properties.
465
+ *
466
+ * Returns true if a node was successfully read or false if there are no more
467
+ * nodes to read. On errors, an exception is raised.*/
468
+ static VALUE rxml_reader_read(VALUE self)
469
+ {
470
+ xmlTextReaderPtr xreader = rxml_text_reader_get(self);
471
+ int result = xmlTextReaderRead(xreader);
472
+ switch(result)
473
+ {
474
+ case -1:
475
+ rxml_raise(&xmlLastError);
476
+ return Qnil;
477
+ break;
478
+ case 0:
479
+ return Qfalse;
480
+ case 1:
481
+ return Qtrue;
482
+ default:
483
+ rb_raise(rb_eRuntimeError,
484
+ "xmlTextReaderRead did not return -1, 0 or 1. Return value was: %d", result);
485
+ }
486
+ }
487
+
488
+ /*
489
+ * call-seq:
490
+ * reader.read_attribute_value -> code
491
+ *
492
+ * Parse an attribute value into one or more Text and EntityReference nodes.
493
+ *
494
+ * Return 1 in case of success, 0 if the reader was not positionned on an
495
+ * attribute node or all the attribute values have been read, or -1 in case of
496
+ * error.
497
+ */
498
+ static VALUE rxml_reader_read_attr_value(VALUE self)
499
+ {
500
+ xmlTextReaderPtr xreader = rxml_text_reader_get(self);
501
+ return INT2FIX(xmlTextReaderReadAttributeValue(xreader));
502
+ }
503
+
504
+ /*
505
+ * call-seq:
506
+ * reader.read_inner_xml -> data
507
+ *
508
+ * Read the contents of the current node, including child nodes and markup.
509
+ *
510
+ * Return a string containing the XML content, or nil if the current node is
511
+ * neither an element nor attribute, or has no child nodes.
512
+ */
513
+ static VALUE rxml_reader_read_inner_xml(VALUE self)
514
+ {
515
+ VALUE result = Qnil;
516
+ xmlTextReaderPtr xReader = rxml_text_reader_get(self);
517
+
518
+ xmlChar *xml = xmlTextReaderReadInnerXml(xReader);
519
+
520
+ if (xml)
521
+ {
522
+ const xmlChar *xencoding = xmlTextReaderConstEncoding(xReader);
523
+ result = rxml_new_cstr( xml, xencoding);
524
+ xmlFree(xml);
525
+ }
526
+
527
+ return result;
528
+ }
529
+
530
+ /*
531
+ * call-seq:
532
+ * reader.read_outer_xml -> data
533
+ *
534
+ * Read the contents of the current node, including child nodes and markup.
535
+ *
536
+ * Return a string containing the XML content, or nil if the current node is
537
+ * neither an element nor attribute, or has no child nodes.
538
+ */
539
+ static VALUE rxml_reader_read_outer_xml(VALUE self)
540
+ {
541
+ VALUE result = Qnil;
542
+ xmlTextReaderPtr xReader = rxml_text_reader_get(self);
543
+
544
+ xmlChar *xml = xmlTextReaderReadOuterXml(xReader);
545
+
546
+ if (xml)
547
+ {
548
+ const xmlChar *xencoding = xmlTextReaderConstEncoding(xReader);
549
+ result = rxml_new_cstr( xml, xencoding);
550
+ xmlFree(xml);
551
+ }
552
+
553
+ return result;
554
+ }
555
+
556
+ /*
557
+ * call-seq:
558
+ * reader.read_state -> state
559
+ *
560
+ * Get the read state of the reader.
561
+ */
562
+ static VALUE rxml_reader_read_state(VALUE self)
563
+ {
564
+ xmlTextReaderPtr xreader = rxml_text_reader_get(self);
565
+ return INT2FIX(xmlTextReaderReadState(xreader));
566
+ }
567
+
568
+ /*
569
+ * call-seq:
570
+ * reader.read_string -> string
571
+ *
572
+ * Read the contents of an element or a text node as a string.
573
+ *
574
+ * Return a string containing the contents of the Element or Text node, or nil
575
+ * if the reader is positioned on any other type of node.
576
+ */
577
+ static VALUE rxml_reader_read_string(VALUE self)
578
+ {
579
+ VALUE result = Qnil;
580
+ xmlTextReaderPtr xReader = rxml_text_reader_get(self);
581
+
582
+ xmlChar *xml = xmlTextReaderReadString(xReader);
583
+
584
+ if (xml)
585
+ {
586
+ const xmlChar *xencoding = xmlTextReaderConstEncoding(xReader);
587
+ result = rxml_new_cstr( xml, xencoding);
588
+ xmlFree(xml);
589
+ }
590
+
591
+ return result;
592
+ }
593
+
594
+ /*
595
+ * call-seq:
596
+ * reader.relax_ng_validate(rng) -> boolean
597
+ *
598
+ * Use RelaxNG to validate the document as it is processed. Activation is only
599
+ * possible before the first read. If +rng+ is nil, the RelaxNG validation is
600
+ * desactivated.
601
+ *
602
+ * Return true in case the RelaxNG validation could be (des)activated and false in
603
+ * case of error.
604
+ */
605
+ static VALUE rxml_reader_relax_ng_validate(VALUE self, VALUE rng)
606
+ {
607
+ xmlTextReaderPtr xreader = rxml_text_reader_get(self);
608
+ xmlRelaxNGPtr xrelax;
609
+ int status;
610
+ Data_Get_Struct(rng, xmlRelaxNG, xrelax);
611
+
612
+ status = xmlTextReaderRelaxNGSetSchema(xreader, xrelax);
613
+ return (status == 0 ? Qtrue : Qfalse);
614
+ }
615
+
616
+ #if LIBXML_VERSION >= 20620
617
+ /*
618
+ * call-seq:
619
+ * reader.schema_validate(schema) -> boolean
620
+ *
621
+ * Use W3C XSD schema to validate the document as it is processed. Activation
622
+ * is only possible before the first read. If +schema+ is nil, then XML Schema
623
+ * validation is deactivated.
624
+ *
625
+ * Return false if if the schema's validation could be (de)activated and true
626
+ * otherwise.
627
+ */
628
+ static VALUE
629
+ rxml_reader_schema_validate(VALUE self, VALUE xsd)
630
+ {
631
+ xmlTextReaderPtr xreader = rxml_text_reader_get(self);
632
+ xmlSchemaPtr xschema;
633
+ int status;
634
+
635
+ Data_Get_Struct(xsd, xmlSchema, xschema);
636
+ status = xmlTextReaderSetSchema(xreader, xschema);
637
+ return (status == 0 ? Qtrue : Qfalse);
638
+ }
639
+ #endif
640
+
641
+ /*
642
+ * call-seq:
643
+ * reader.name -> name
644
+ *
645
+ * Return the qualified name of the node.
646
+ */
647
+ static VALUE rxml_reader_name(VALUE self)
648
+ {
649
+ xmlTextReaderPtr xReader = rxml_text_reader_get(self);
650
+ const xmlChar *result = xmlTextReaderConstName(xReader);
651
+ const xmlChar *xencoding = xmlTextReaderConstEncoding(xReader);
652
+
653
+ return (result == NULL ? Qnil : rxml_new_cstr(result, xencoding));
654
+ }
655
+
656
+ /*
657
+ * call-seq:
658
+ * reader.local_name -> name
659
+ *
660
+ * Return the local name of the node.
661
+ */
662
+ static VALUE rxml_reader_local_name(VALUE self)
663
+ {
664
+ xmlTextReaderPtr xReader = rxml_text_reader_get(self);
665
+ const xmlChar *result = xmlTextReaderConstLocalName(xReader);
666
+ const xmlChar *xencoding = xmlTextReaderConstEncoding(xReader);
667
+
668
+ return (result == NULL ? Qnil : rxml_new_cstr(result, xencoding));
669
+ }
670
+
671
+ /*
672
+ * call-seq:
673
+ * reader.attribute_count -> count
674
+ *
675
+ * Provide the number of attributes of the current node.
676
+ */
677
+ static VALUE rxml_reader_attr_count(VALUE self)
678
+ {
679
+ xmlTextReaderPtr xreader = rxml_text_reader_get(self);
680
+ return INT2FIX(xmlTextReaderAttributeCount(xreader));
681
+ }
682
+
683
+ /*
684
+ * call-seq:
685
+ * reader.encoding -> XML::Encoding::UTF_8
686
+ *
687
+ * Returns the encoding of the document being read. Note you
688
+ * first have to read data from the reader for encoding
689
+ * to return a value
690
+ *
691
+ * reader = XML::Reader.file(XML_FILE)
692
+ * assert_nil(reader.encoding)
693
+ * reader.read
694
+ * assert_equal(XML::Encoding::UTF_8, reader.encoding)
695
+ *
696
+ * In addition, libxml always appears to return nil for the encoding
697
+ * when parsing strings.
698
+ */
699
+ static VALUE rxml_reader_encoding(VALUE self)
700
+ {
701
+ xmlTextReaderPtr xreader = rxml_text_reader_get(self);
702
+ const xmlChar *xencoding = xmlTextReaderConstEncoding(xreader);
703
+ if (xencoding)
704
+ return INT2NUM(xmlParseCharEncoding((const char*)xencoding));
705
+ else
706
+ return INT2NUM(XML_CHAR_ENCODING_NONE);
707
+ }
708
+
709
+ /*
710
+ * call-seq:
711
+ * reader.base_uri -> URI
712
+ *
713
+ * Determine the base URI of the node.
714
+ */
715
+ static VALUE rxml_reader_base_uri(VALUE self)
716
+ {
717
+ xmlTextReaderPtr xReader = rxml_text_reader_get(self);
718
+ const xmlChar *result = xmlTextReaderConstBaseUri(xReader);
719
+ const xmlChar *xencoding = xmlTextReaderConstEncoding(xReader);
720
+
721
+ return (result == NULL ? Qnil : rxml_new_cstr(result, xencoding));
722
+ }
723
+
724
+ /*
725
+ * call-seq:
726
+ * reader.namespace_uri -> URI
727
+ *
728
+ * Determine the namespace URI of the node.
729
+ */
730
+ static VALUE rxml_reader_namespace_uri(VALUE self)
731
+ {
732
+ xmlTextReaderPtr xReader = rxml_text_reader_get(self);
733
+ const xmlChar *result = xmlTextReaderConstNamespaceUri(xReader);
734
+ const xmlChar *xencoding = xmlTextReaderConstEncoding(xReader);
735
+
736
+ return (result == NULL ? Qnil : rxml_new_cstr(result, xencoding));
737
+ }
738
+
739
+ /*
740
+ * call-seq:
741
+ * reader.value -> text
742
+ *
743
+ * Provide the text value of the node if present.
744
+ */
745
+ static VALUE rxml_reader_value(VALUE self)
746
+ {
747
+ xmlTextReaderPtr xReader = rxml_text_reader_get(self);
748
+ const xmlChar *result = xmlTextReaderConstValue(xReader);
749
+ const xmlChar *xencoding = xmlTextReaderConstEncoding(xReader);
750
+
751
+ return (result == NULL ? Qnil : rxml_new_cstr(result, xencoding));
752
+ }
753
+
754
+ /*
755
+ * call-seq:
756
+ * reader.prefix -> prefix
757
+ *
758
+ * Get a shorthand reference to the namespace associated with the node.
759
+ */
760
+ static VALUE rxml_reader_prefix(VALUE self)
761
+ {
762
+ xmlTextReaderPtr xReader = rxml_text_reader_get(self);
763
+ const xmlChar *result = xmlTextReaderConstPrefix(xReader);
764
+ const xmlChar *xencoding = xmlTextReaderConstEncoding(xReader);
765
+
766
+ return (result == NULL ? Qnil : rxml_new_cstr(result, xencoding));
767
+ }
768
+
769
+ /*
770
+ * call-seq:
771
+ * reader.depth -> depth
772
+ *
773
+ * Get the depth of the node in the tree.
774
+ */
775
+ static VALUE rxml_reader_depth(VALUE self)
776
+ {
777
+ xmlTextReaderPtr xreader = rxml_text_reader_get(self);
778
+ return INT2FIX(xmlTextReaderDepth(xreader));
779
+ }
780
+
781
+ /*
782
+ * call-seq:
783
+ * reader.quote_char -> char
784
+ *
785
+ * Get the quotation mark character used to enclose the value of an attribute,
786
+ * as an integer value (and -1 in case of error).
787
+ */
788
+ static VALUE rxml_reader_quote_char(VALUE self)
789
+ {
790
+ xmlTextReaderPtr xreader = rxml_text_reader_get(self);
791
+ return INT2FIX(xmlTextReaderQuoteChar(xreader));
792
+ }
793
+
794
+ /*
795
+ * call-seq:
796
+ * reader.standalone -> code
797
+ *
798
+ * Determine the standalone status of the document being read.
799
+ *
800
+ * Return 1 if the document was declared to be standalone, 0 if it was
801
+ * declared to be not standalone, or -1 if the document did not specify its
802
+ * standalone status or in case of error.
803
+ */
804
+ static VALUE rxml_reader_standalone(VALUE self)
805
+ {
806
+ xmlTextReaderPtr xreader = rxml_text_reader_get(self);
807
+ return INT2FIX(xmlTextReaderStandalone(xreader));
808
+ }
809
+
810
+ /*
811
+ * call-seq:
812
+ * reader.xml_lang -> value
813
+ *
814
+ * Get the xml:lang scope within which the node resides.
815
+ */
816
+ static VALUE rxml_reader_xml_lang(VALUE self)
817
+ {
818
+ xmlTextReaderPtr xReader = rxml_text_reader_get(self);
819
+ const xmlChar *result = xmlTextReaderConstXmlLang(xReader);
820
+ const xmlChar *xencoding = xmlTextReaderConstEncoding(xReader);
821
+
822
+ return (result == NULL ? Qnil : rxml_new_cstr(result, xencoding));
823
+ }
824
+
825
+ /*
826
+ * call-seq:
827
+ * reader.xml_version -> version
828
+ *
829
+ * Determine the XML version of the document being read.
830
+ */
831
+ static VALUE rxml_reader_xml_version(VALUE self)
832
+ {
833
+ xmlTextReaderPtr xReader = rxml_text_reader_get(self);
834
+ const xmlChar *result = xmlTextReaderConstXmlVersion(xReader);
835
+ const xmlChar *xencoding = xmlTextReaderConstEncoding(xReader);
836
+
837
+ return (result == NULL ? Qnil : rxml_new_cstr(result, xencoding));
838
+ }
839
+
840
+ /*
841
+ * call-seq:
842
+ * reader.has_attributes? -> bool
843
+ *
844
+ * Get whether the node has attributes.
845
+ */
846
+ static VALUE rxml_reader_has_attributes(VALUE self)
847
+ {
848
+ xmlTextReaderPtr xreader = rxml_text_reader_get(self);
849
+ return xmlTextReaderHasAttributes(xreader) ? Qtrue : Qfalse;
850
+ }
851
+
852
+ /*
853
+ * call-seq:
854
+ * reader.has_value? -> bool
855
+ *
856
+ * Get whether the node can have a text value.
857
+ */
858
+ static VALUE rxml_reader_has_value(VALUE self)
859
+ {
860
+ xmlTextReaderPtr xreader = rxml_text_reader_get(self);
861
+ return xmlTextReaderHasValue(xreader) ? Qtrue : Qfalse;
862
+ }
863
+
864
+ /*
865
+ * call-seq:
866
+ * reader[key] -> value
867
+ *
868
+ * Provide the value of the attribute with the specified index (if +key+ is an
869
+ * integer) or with the specified name (if +key+ is a string) relative to the
870
+ * containing element, as a string.
871
+ */
872
+ static VALUE rxml_reader_attribute(VALUE self, VALUE key)
873
+ {
874
+ VALUE result = Qnil;
875
+ xmlChar *xattr;
876
+ xmlTextReaderPtr xReader = rxml_text_reader_get(self);
877
+ const xmlChar *xencoding = xmlTextReaderConstEncoding(xReader);
878
+
879
+ if (TYPE(key) == T_FIXNUM)
880
+ {
881
+ xattr = xmlTextReaderGetAttributeNo(xReader, FIX2INT(key));
882
+ }
883
+ else
884
+ {
885
+ xattr = xmlTextReaderGetAttribute(xReader, (const xmlChar *) StringValueCStr(key));
886
+ }
887
+
888
+ if (xattr)
889
+ {
890
+ result = rxml_new_cstr(xattr, xencoding);
891
+ xmlFree(xattr);
892
+ }
893
+ return result;
894
+ }
895
+
896
+ /*
897
+ * call-seq:
898
+ * reader.get_attribute(localName) -> value
899
+ *
900
+ * Provide the value of the attribute with the specified name
901
+ * relative to the containing element.
902
+ */
903
+ static VALUE rxml_reader_get_attribute(VALUE self, VALUE name)
904
+ {
905
+ VALUE result = Qnil;
906
+ xmlChar *xattr;
907
+ xmlTextReaderPtr xReader = rxml_text_reader_get(self);
908
+ const xmlChar *xencoding = xmlTextReaderConstEncoding(xReader);
909
+
910
+ xattr = xmlTextReaderGetAttribute(xReader, (const xmlChar *) StringValueCStr(name));
911
+ if (xattr)
912
+ {
913
+ result = rxml_new_cstr(xattr, xencoding);
914
+ xmlFree(xattr);
915
+ }
916
+ return result;
917
+ }
918
+
919
+ /*
920
+ * call-seq:
921
+ * reader.get_attribute_no(index) -> value
922
+ *
923
+ * Provide the value of the attribute with the specified index
924
+ * relative to the containing element.
925
+ */
926
+ static VALUE rxml_reader_get_attribute_no(VALUE self, VALUE index)
927
+ {
928
+ VALUE result = Qnil;
929
+ xmlChar *xattr;
930
+ xmlTextReaderPtr xReader = rxml_text_reader_get(self);
931
+ const xmlChar *xencoding = xmlTextReaderConstEncoding(xReader);
932
+
933
+ xattr = xmlTextReaderGetAttributeNo(xReader, FIX2INT(index));
934
+ if (xattr)
935
+ {
936
+ result = rxml_new_cstr(xattr, xencoding);
937
+ xmlFree(xattr);
938
+ }
939
+ return result;
940
+ }
941
+
942
+ static VALUE rxml_reader_get_attribute_ns(VALUE self, VALUE name, VALUE ns)
943
+ {
944
+ VALUE result = Qnil;
945
+ xmlChar *xattr;
946
+ xmlTextReaderPtr xReader = rxml_text_reader_get(self);
947
+ const xmlChar *xencoding = xmlTextReaderConstEncoding(xReader);
948
+
949
+ xattr = xmlTextReaderGetAttributeNs(xReader,
950
+ (const xmlChar *) StringValueCStr(name),
951
+ (const xmlChar *) StringValueCStr(ns));
952
+ if (xattr)
953
+ {
954
+ result = rxml_new_cstr(xattr, xencoding);
955
+ xmlFree(xattr);
956
+ }
957
+ return result;
958
+ }
959
+
960
+ /*
961
+ * call-seq:
962
+ * reader.lookup_namespace(prefix) -> value
963
+ *
964
+ * Resolve a namespace prefix in the scope of the current element.
965
+ * To return the default namespace, specify nil as +prefix+.
966
+ */
967
+ static VALUE rxml_reader_lookup_namespace(VALUE self, VALUE prefix)
968
+ {
969
+ VALUE result = Qnil;
970
+ xmlTextReaderPtr xReader = rxml_text_reader_get(self);
971
+ const xmlChar *xnamespace = xmlTextReaderLookupNamespace(xReader, (const xmlChar *) StringValueCStr(prefix));
972
+ const xmlChar *xencoding = xmlTextReaderConstEncoding(xReader);
973
+
974
+ if (xnamespace)
975
+ {
976
+ result = rxml_new_cstr(xnamespace, xencoding);
977
+ xmlFree((void *)xnamespace);
978
+ }
979
+ return result;
980
+ }
981
+
982
+ /*
983
+ * call-seq:
984
+ * reader.expand -> node
985
+ *
986
+ * Returns the current node and its full subtree. Note the returned node
987
+ * is valid ONLY until the next read call. If you would like to preserve
988
+ * the node, or search it via xpath, call reader.doc first.
989
+ */
990
+ static VALUE rxml_reader_expand(VALUE self)
991
+ {
992
+ xmlTextReaderPtr xreader = rxml_text_reader_get(self);
993
+ xmlNodePtr xnode = xmlTextReaderExpand(xreader);
994
+
995
+ if (!xnode)
996
+ {
997
+ return Qnil;
998
+ }
999
+ else
1000
+ {
1001
+ /* We cannot call rxml_node_wrap here because its sets up a mark function
1002
+ for the node. But according to the libxml docs (http://xmlsoft.org/html/libxml-xmlreader.html#xmlTextReaderExpand)
1003
+ this is only valid until the next xmlTextReaderRead call. At that point the node is freed (from reading
1004
+ the libxml2 source code. So don't set a mark or free function, because they will get called in the next
1005
+ garbage collection run and cause a segfault.*/
1006
+ return Data_Wrap_Struct(cXMLNode, NULL, NULL, xnode);
1007
+ }
1008
+ }
1009
+
1010
+ /*
1011
+ * call-seq:
1012
+ * reader.document -> doc
1013
+ *
1014
+ * Hacking interface that provides access to the current document being accessed by the
1015
+ * reader. NOTE: as a result of this call, the reader will not destroy the associated XML
1016
+ * document. Instead, it will be destroyed when the returned document goes out of scope.
1017
+ *
1018
+ * Returns: document
1019
+ */
1020
+ static VALUE rxml_reader_doc(VALUE self)
1021
+ {
1022
+ VALUE result = Qnil;
1023
+ xmlTextReaderPtr xreader = rxml_text_reader_get(self);
1024
+ xmlDocPtr xdoc = xmlTextReaderCurrentDoc(xreader);
1025
+
1026
+ if (!xdoc)
1027
+ rb_raise(rb_eRuntimeError, "The reader does not have a document. Did you forget to call read?");
1028
+
1029
+ result = rxml_document_wrap(xdoc);
1030
+
1031
+ // And now hook in a mark function to keep the document alive as long as the reader is valid
1032
+ RDATA(self)->dmark = (RUBY_DATA_FUNC)rxml_reader_mark;
1033
+
1034
+ return result;
1035
+ }
1036
+
1037
+
1038
+
1039
+ #if LIBXML_VERSION >= 20618
1040
+ /*
1041
+ * call-seq:
1042
+ * reader.byte_consumed -> value
1043
+ *
1044
+ * This method provides the current index of the parser used by the reader,
1045
+ * relative to the start of the current entity.
1046
+ */
1047
+ static VALUE
1048
+ rxml_reader_byte_consumed(VALUE self)
1049
+ {
1050
+ xmlTextReaderPtr xreader = rxml_text_reader_get(self);
1051
+ return INT2NUM(xmlTextReaderByteConsumed(xreader));
1052
+ }
1053
+ #endif
1054
+
1055
+ #if LIBXML_VERSION >= 20617
1056
+ /*
1057
+ * call-seq:
1058
+ * reader.column_number -> number
1059
+ *
1060
+ * Provide the column number of the current parsing point.
1061
+ */
1062
+ static VALUE
1063
+ rxml_reader_column_number(VALUE self)
1064
+ {
1065
+ xmlTextReaderPtr xreader = rxml_text_reader_get(self);
1066
+ return INT2NUM(xmlTextReaderGetParserColumnNumber(xreader));
1067
+ }
1068
+
1069
+ /*
1070
+ * call-seq:
1071
+ * reader.line_number -> number
1072
+ *
1073
+ * Provide the line number of the current parsing point.
1074
+ */
1075
+ static VALUE
1076
+ rxml_reader_line_number(VALUE self)
1077
+ {
1078
+ xmlTextReaderPtr xreader = rxml_text_reader_get(self);
1079
+ return INT2NUM(xmlTextReaderGetParserLineNumber(xreader));
1080
+ }
1081
+ #endif
1082
+
1083
+ /*
1084
+ * call-seq:
1085
+ * reader.default? -> bool
1086
+ *
1087
+ * Return whether an Attribute node was generated from the default value
1088
+ * defined in the DTD or schema.
1089
+ */
1090
+ static VALUE rxml_reader_default(VALUE self)
1091
+ {
1092
+ xmlTextReaderPtr xreader = rxml_text_reader_get(self);
1093
+ return xmlTextReaderIsDefault(xreader) ? Qtrue : Qfalse;
1094
+ }
1095
+
1096
+ /*
1097
+ * call-seq:
1098
+ * reader.namespace_declaration? -> bool
1099
+ *
1100
+ * Determine whether the current node is a namespace declaration rather than a
1101
+ * regular attribute.
1102
+ */
1103
+ static VALUE rxml_reader_namespace_declaration(VALUE self)
1104
+ {
1105
+ xmlTextReaderPtr xreader = rxml_text_reader_get(self);
1106
+ return xmlTextReaderIsNamespaceDecl(xreader) ? Qtrue : Qfalse;
1107
+ }
1108
+
1109
+ /*
1110
+ * call-seq:
1111
+ * reader.empty_element? -> bool
1112
+ *
1113
+ * Check if the current node is empty.
1114
+ */
1115
+ static VALUE rxml_reader_empty_element(VALUE self)
1116
+ {
1117
+ xmlTextReaderPtr xreader = rxml_text_reader_get(self);
1118
+ return xmlTextReaderIsEmptyElement(xreader) ? Qtrue : Qfalse;
1119
+ }
1120
+
1121
+ /*
1122
+ * call-seq:
1123
+ * reader.valid? -> bool
1124
+ *
1125
+ * Retrieve the validity status from the parser context.
1126
+ */
1127
+ static VALUE rxml_reader_valid(VALUE self)
1128
+ {
1129
+ xmlTextReaderPtr xreader = rxml_text_reader_get(self);
1130
+ return xmlTextReaderIsValid(xreader) ? Qtrue : Qfalse;
1131
+ }
1132
+
1133
+ void rxml_init_reader(void)
1134
+ {
1135
+ BASE_URI_SYMBOL = ID2SYM(rb_intern("base_uri"));
1136
+ ENCODING_SYMBOL = ID2SYM(rb_intern("encoding"));
1137
+ IO_ATTR = rb_intern("@io");
1138
+ OPTIONS_SYMBOL = ID2SYM(rb_intern("options"));
1139
+
1140
+ cXMLReader = rb_define_class_under(mXML, "Reader", rb_cObject);
1141
+ rb_undef_alloc_func(cXMLReader);
1142
+
1143
+ rb_define_singleton_method(cXMLReader, "document", rxml_reader_document, 1);
1144
+ rb_define_singleton_method(cXMLReader, "file", rxml_reader_file, -1);
1145
+ rb_define_singleton_method(cXMLReader, "io", rxml_reader_io, -1);
1146
+ rb_define_singleton_method(cXMLReader, "string", rxml_reader_string, -1);
1147
+
1148
+ rb_define_method(cXMLReader, "[]", rxml_reader_attribute, 1);
1149
+ rb_define_method(cXMLReader, "attribute_count", rxml_reader_attr_count, 0);
1150
+ rb_define_method(cXMLReader, "base_uri", rxml_reader_base_uri, 0);
1151
+ #if LIBXML_VERSION >= 20618
1152
+ rb_define_method(cXMLReader, "byte_consumed", rxml_reader_byte_consumed, 0);
1153
+ #endif
1154
+ rb_define_method(cXMLReader, "close", rxml_reader_close, 0);
1155
+ #if LIBXML_VERSION >= 20617
1156
+ rb_define_method(cXMLReader, "column_number", rxml_reader_column_number, 0);
1157
+ #endif
1158
+ rb_define_method(cXMLReader, "depth", rxml_reader_depth, 0);
1159
+ rb_define_method(cXMLReader, "doc", rxml_reader_doc, 0);
1160
+ rb_define_method(cXMLReader, "encoding", rxml_reader_encoding, 0);
1161
+ rb_define_method(cXMLReader, "expand", rxml_reader_expand, 0);
1162
+ rb_define_method(cXMLReader, "get_attribute", rxml_reader_get_attribute, 1);
1163
+ rb_define_method(cXMLReader, "get_attribute_no", rxml_reader_get_attribute_no, 1);
1164
+ rb_define_method(cXMLReader, "get_attribute_ns", rxml_reader_get_attribute_ns, 2);
1165
+ rb_define_method(cXMLReader, "has_attributes?", rxml_reader_has_attributes, 0);
1166
+ rb_define_method(cXMLReader, "has_value?", rxml_reader_has_value, 0);
1167
+ #if LIBXML_VERSION >= 20617
1168
+ rb_define_method(cXMLReader, "line_number", rxml_reader_line_number, 0);
1169
+ #endif
1170
+ rb_define_method(cXMLReader, "local_name", rxml_reader_local_name, 0);
1171
+ rb_define_method(cXMLReader, "lookup_namespace", rxml_reader_lookup_namespace, 1);
1172
+ rb_define_method(cXMLReader, "move_to_attribute", rxml_reader_move_to_attr, 1);
1173
+ rb_define_method(cXMLReader, "move_to_attribute_no", rxml_reader_move_to_attr_no, 1);
1174
+ rb_define_method(cXMLReader, "move_to_attribute_ns", rxml_reader_move_to_attr_ns, 2);
1175
+ rb_define_method(cXMLReader, "move_to_first_attribute", rxml_reader_move_to_first_attr, 0);
1176
+ rb_define_method(cXMLReader, "move_to_next_attribute", rxml_reader_move_to_next_attr, 0);
1177
+ rb_define_method(cXMLReader, "move_to_element", rxml_reader_move_to_element, 0);
1178
+ rb_define_method(cXMLReader, "name", rxml_reader_name, 0);
1179
+ rb_define_method(cXMLReader, "namespace_uri", rxml_reader_namespace_uri, 0);
1180
+ rb_define_method(cXMLReader, "next", rxml_reader_next, 0);
1181
+ rb_define_method(cXMLReader, "next_sibling", rxml_reader_next_sibling, 0);
1182
+ rb_define_method(cXMLReader, "node", rxml_reader_node, 0);
1183
+ rb_define_method(cXMLReader, "node_type", rxml_reader_node_type, 0);
1184
+ rb_define_method(cXMLReader, "normalization", rxml_reader_normalization, 0);
1185
+ rb_define_method(cXMLReader, "prefix", rxml_reader_prefix, 0);
1186
+ rb_define_method(cXMLReader, "quote_char", rxml_reader_quote_char, 0);
1187
+ rb_define_method(cXMLReader, "read", rxml_reader_read, 0);
1188
+ rb_define_method(cXMLReader, "read_attribute_value", rxml_reader_read_attr_value, 0);
1189
+ rb_define_method(cXMLReader, "read_inner_xml", rxml_reader_read_inner_xml, 0);
1190
+ rb_define_method(cXMLReader, "read_outer_xml", rxml_reader_read_outer_xml, 0);
1191
+ rb_define_method(cXMLReader, "read_state", rxml_reader_read_state, 0);
1192
+ rb_define_method(cXMLReader, "read_string", rxml_reader_read_string, 0);
1193
+ rb_define_method(cXMLReader, "relax_ng_validate", rxml_reader_relax_ng_validate, 1);
1194
+ rb_define_method(cXMLReader, "standalone", rxml_reader_standalone, 0);
1195
+ #if LIBXML_VERSION >= 20620
1196
+ rb_define_method(cXMLReader, "schema_validate", rxml_reader_schema_validate, 1);
1197
+ #endif
1198
+ rb_define_method(cXMLReader, "value", rxml_reader_value, 0);
1199
+ rb_define_method(cXMLReader, "xml_lang", rxml_reader_xml_lang, 0);
1200
+ rb_define_method(cXMLReader, "xml_version", rxml_reader_xml_version, 0);
1201
+ rb_define_method(cXMLReader, "default?", rxml_reader_default, 0);
1202
+ rb_define_method(cXMLReader, "empty_element?", rxml_reader_empty_element, 0);
1203
+ rb_define_method(cXMLReader, "namespace_declaration?", rxml_reader_namespace_declaration, 0);
1204
+ rb_define_method(cXMLReader, "valid?", rxml_reader_valid, 0);
1205
+
1206
+ /* Constants */
1207
+ rb_define_const(cXMLReader, "LOADDTD", INT2FIX(XML_PARSER_LOADDTD));
1208
+ rb_define_const(cXMLReader, "DEFAULTATTRS", INT2FIX(XML_PARSER_DEFAULTATTRS));
1209
+ rb_define_const(cXMLReader, "VALIDATE", INT2FIX(XML_PARSER_VALIDATE));
1210
+ rb_define_const(cXMLReader, "SUBST_ENTITIES", INT2FIX(XML_PARSER_SUBST_ENTITIES));
1211
+
1212
+ rb_define_const(cXMLReader, "SEVERITY_VALIDITY_WARNING", INT2FIX(XML_PARSER_SEVERITY_VALIDITY_WARNING));
1213
+ rb_define_const(cXMLReader, "SEVERITY_VALIDITY_ERROR", INT2FIX(XML_PARSER_SEVERITY_VALIDITY_ERROR));
1214
+ rb_define_const(cXMLReader, "SEVERITY_WARNING", INT2FIX(XML_PARSER_SEVERITY_WARNING));
1215
+ rb_define_const(cXMLReader, "SEVERITY_ERROR", INT2FIX(XML_PARSER_SEVERITY_ERROR));
1216
+
1217
+ rb_define_const(cXMLReader, "TYPE_NONE", INT2FIX(XML_READER_TYPE_NONE));
1218
+ rb_define_const(cXMLReader, "TYPE_ELEMENT", INT2FIX(XML_READER_TYPE_ELEMENT));
1219
+ rb_define_const(cXMLReader, "TYPE_ATTRIBUTE", INT2FIX(XML_READER_TYPE_ATTRIBUTE));
1220
+ rb_define_const(cXMLReader, "TYPE_TEXT", INT2FIX(XML_READER_TYPE_TEXT));
1221
+ rb_define_const(cXMLReader, "TYPE_CDATA", INT2FIX(XML_READER_TYPE_CDATA));
1222
+ rb_define_const(cXMLReader, "TYPE_ENTITY_REFERENCE", INT2FIX(XML_READER_TYPE_ENTITY_REFERENCE));
1223
+ rb_define_const(cXMLReader, "TYPE_ENTITY", INT2FIX(XML_READER_TYPE_ENTITY));
1224
+ rb_define_const(cXMLReader, "TYPE_PROCESSING_INSTRUCTION", INT2FIX(XML_READER_TYPE_PROCESSING_INSTRUCTION));
1225
+ rb_define_const(cXMLReader, "TYPE_COMMENT", INT2FIX(XML_READER_TYPE_COMMENT));
1226
+ rb_define_const(cXMLReader, "TYPE_DOCUMENT", INT2FIX(XML_READER_TYPE_DOCUMENT));
1227
+ rb_define_const(cXMLReader, "TYPE_DOCUMENT_TYPE", INT2FIX(XML_READER_TYPE_DOCUMENT_TYPE));
1228
+ rb_define_const(cXMLReader, "TYPE_DOCUMENT_FRAGMENT", INT2FIX(XML_READER_TYPE_DOCUMENT_FRAGMENT));
1229
+ rb_define_const(cXMLReader, "TYPE_NOTATION", INT2FIX(XML_READER_TYPE_NOTATION));
1230
+ rb_define_const(cXMLReader, "TYPE_WHITESPACE", INT2FIX(XML_READER_TYPE_WHITESPACE));
1231
+ rb_define_const(cXMLReader, "TYPE_SIGNIFICANT_WHITESPACE", INT2FIX(XML_READER_TYPE_SIGNIFICANT_WHITESPACE));
1232
+ rb_define_const(cXMLReader, "TYPE_END_ELEMENT", INT2FIX(XML_READER_TYPE_END_ELEMENT));
1233
+ rb_define_const(cXMLReader, "TYPE_END_ENTITY", INT2FIX(XML_READER_TYPE_END_ENTITY));
1234
+ rb_define_const(cXMLReader, "TYPE_XML_DECLARATION", INT2FIX(XML_READER_TYPE_XML_DECLARATION));
1235
+
1236
+ /* Read states */
1237
+ rb_define_const(cXMLReader, "MODE_INITIAL", INT2FIX(XML_TEXTREADER_MODE_INITIAL));
1238
+ rb_define_const(cXMLReader, "MODE_INTERACTIVE", INT2FIX(XML_TEXTREADER_MODE_INTERACTIVE));
1239
+ rb_define_const(cXMLReader, "MODE_ERROR", INT2FIX(XML_TEXTREADER_MODE_ERROR));
1240
+ rb_define_const(cXMLReader, "MODE_EOF", INT2FIX(XML_TEXTREADER_MODE_EOF));
1241
+ rb_define_const(cXMLReader, "MODE_CLOSED", INT2FIX(XML_TEXTREADER_MODE_CLOSED));
1242
+ rb_define_const(cXMLReader, "MODE_READING", INT2FIX(XML_TEXTREADER_MODE_READING));
1243
+
1244
+ rb_undef_method(CLASS_OF(cXMLReader), "new");
1245
+ }