libxml-ruby 0.9.9 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,1002 +1,1002 @@
1
- /* Copyright (c) 2006-2007 Apple Inc.
2
- * Please see the LICENSE file for copyright and distribution information. */
3
-
4
- #include "ruby_libxml.h"
5
- #include "ruby_xml_reader.h"
6
-
7
- /*
8
- * Document-class: LibXML::XML::Reader
9
- *
10
- * The XML::Reader class provides a simpler, alternative way of parsing an XML
11
- * document in contrast to XML::Parser or XML::SaxParser. A XML::Reader instance
12
- * acts like a cursor going forward in a document stream, stopping at each node
13
- * it encounters. To advance to the next node, simply cadd XML::Reader#read.
14
- *
15
- * The XML::Reader API closely matches the DOM Core specification and supports
16
- * namespaces, xml:base, entity handling and DTDs.
17
- *
18
- * To summarize, XML::Reader provides a far simpler API to use versus XML::SaxParser
19
- * and is more memory efficient than using XML::Parser to create a DOM tree.
20
- *
21
- * Example:
22
- *
23
- * parser = XML::Reader.string("<foo><bar>1</bar><bar>2</bar><bar>3</bar></foo>")
24
- * reader.read
25
- * assert_equal('foo', reader.name)
26
- * assert_equal(nil, reader.value)
27
- *
28
- * 3.times do |i|
29
- * reader.read
30
- * assert_equal(XML::Reader::TYPE_ELEMENT, reader.node_type)
31
- * assert_equal('bar', reader.name)
32
- * reader.read
33
- * assert_equal(XML::Reader::TYPE_TEXT, reader.node_type)
34
- * assert_equal((i + 1).to_s, reader.value)
35
- * reader.read
36
- * assert_equal(XML::Reader::TYPE_END_ELEMENT, reader.node_type)
37
- * end
38
- *
39
- * You can also parse documents (see XML::Reader.document),
40
- * strings (see XML::Parser.string) and io objects (see
41
- * XML::Parser.io).
42
- *
43
- * For a more in depth tutorial, albeit in C, see http://xmlsoft.org/xmlreader.html.*/
44
-
45
- VALUE cXMLReader;
46
-
47
- ID BASE_URI_SYMBOL;
48
- ID ENCODING_SYMBOL;
49
- ID IO_ATTR;
50
- ID OPTIONS_SYMBOL;
51
-
52
-
53
- static void rxml_reader_free(xmlTextReaderPtr reader)
54
- {
55
- xmlFreeTextReader(reader);
56
- }
57
-
58
- static VALUE rxml_reader_wrap(xmlTextReaderPtr reader)
59
- {
60
- return Data_Wrap_Struct(cXMLReader, NULL, rxml_reader_free, reader);
61
- }
62
-
63
- static xmlTextReaderPtr rxml_text_reader_get(VALUE obj)
64
- {
65
- xmlTextReaderPtr xreader;
66
- Data_Get_Struct(obj, xmlTextReader, xreader);
67
- return xreader;
68
- }
69
-
70
- /*
71
- * call-seq:
72
- * XML::Reader.document(doc) -> XML::Reader
73
- *
74
- * Create an new reader for the specified document.
75
- */
76
- VALUE rxml_reader_document(VALUE klass, VALUE doc)
77
- {
78
- xmlDocPtr xdoc;
79
- xmlTextReaderPtr xreader;
80
-
81
- Data_Get_Struct(doc, xmlDoc, xdoc);
82
-
83
- xreader = xmlReaderWalker(xdoc);
84
-
85
- if (xreader == NULL)
86
- rxml_raise(&xmlLastError);
87
-
88
- return rxml_reader_wrap(xreader);
89
- }
90
-
91
- /* call-seq:
92
- * XML::Reader.file(path) -> XML::Reader
93
- * XML::Reader.file(path, :encoding => XML::Encoding::UTF_8,
94
- * :options => XML::Parser::Options::NOENT) -> XML::Parser
95
- *
96
- * Creates a new reader by parsing the specified file or uri.
97
- *
98
- * You may provide an optional hash table to control how the
99
- * parsing is performed. Valid options are:
100
- *
101
- * encoding - The document encoding, defaults to nil. Valid values
102
- * are the encoding constants defined on XML::Encoding.
103
- * options - Controls the execution of the parser, defaults to 0.
104
- * Valid values are the constants defined on
105
- * XML::Parser::Options. Mutliple options can be combined
106
- * by using Bitwise OR (|).
107
- */
108
- static VALUE rxml_reader_file(int argc, VALUE *argv, VALUE klass)
109
- {
110
- xmlTextReaderPtr xreader;
111
- VALUE path;
112
- VALUE options;
113
-
114
- const char *xencoding = NULL;
115
- int xoptions = 0;
116
-
117
- rb_scan_args(argc, argv, "11", &path, &options);
118
- Check_Type(path, T_STRING);
119
-
120
- if (!NIL_P(options))
121
- {
122
- VALUE encoding = Qnil;
123
- VALUE parserOptions = Qnil;
124
-
125
- Check_Type(options, T_HASH);
126
-
127
- encoding = rb_hash_aref(options, BASE_URI_SYMBOL);
128
- xencoding = NIL_P(encoding) ? NULL : xmlGetCharEncodingName(NUM2INT(encoding));
129
-
130
- parserOptions = rb_hash_aref(options, OPTIONS_SYMBOL);
131
- xoptions = NIL_P(parserOptions) ? 0 : NUM2INT(parserOptions);
132
- }
133
-
134
- xreader = xmlReaderForFile(StringValueCStr(path), xencoding, xoptions);
135
-
136
- if (xreader == NULL)
137
- rxml_raise(&xmlLastError);
138
-
139
- return rxml_reader_wrap(xreader);
140
- }
141
-
142
- /* call-seq:
143
- * XML::Reader.io(io) -> XML::Reader
144
- * XML::Reader.io(io, :encoding => XML::Encoding::UTF_8,
145
- * :options => XML::Parser::Options::NOENT) -> XML::Parser
146
- *
147
- * Creates a new reader by parsing the specified io object.
148
- *
149
- * You may provide an optional hash table to control how the
150
- * parsing is performed. Valid options are:
151
- *
152
- * base_uri - The base url for the parsed document.
153
- * encoding - The document encoding, defaults to nil. Valid values
154
- * are the encoding constants defined on XML::Encoding.
155
- * options - Controls the execution of the parser, defaults to 0.
156
- * Valid values are the constants defined on
157
- * XML::Parser::Options. Mutliple options can be combined
158
- * by using Bitwise OR (|).
159
- */
160
- static VALUE rxml_reader_io(int argc, VALUE *argv, VALUE klass)
161
- {
162
- xmlTextReaderPtr xreader;
163
- VALUE result;
164
- VALUE io;
165
- VALUE options;
166
- char *xbaseurl = NULL;
167
- const char *xencoding = NULL;
168
- int xoptions = 0;
169
-
170
- rb_scan_args(argc, argv, "11", &io, &options);
171
-
172
- if (!NIL_P(options))
173
- {
174
- VALUE baseurl = Qnil;
175
- VALUE encoding = Qnil;
176
- VALUE parserOptions = Qnil;
177
-
178
- Check_Type(options, T_HASH);
179
-
180
- baseurl = rb_hash_aref(options, BASE_URI_SYMBOL);
181
- xbaseurl = NIL_P(baseurl) ? NULL : StringValueCStr(baseurl);
182
-
183
- encoding = rb_hash_aref(options, ENCODING_SYMBOL);
184
- xencoding = NIL_P(encoding) ? NULL : xmlGetCharEncodingName(NUM2INT(encoding));
185
-
186
- parserOptions = rb_hash_aref(options, OPTIONS_SYMBOL);
187
- xoptions = NIL_P(parserOptions) ? 0 : NUM2INT(parserOptions);
188
- }
189
-
190
- xreader = xmlReaderForIO((xmlInputReadCallback) rxml_read_callback, NULL,
191
- (void *) io,
192
- xbaseurl, xencoding, xoptions);
193
-
194
- if (xreader == NULL)
195
- rxml_raise(&xmlLastError);
196
-
197
- result = rxml_reader_wrap(xreader);
198
-
199
- /* Attach io object to parser so it won't get freed.*/
200
- rb_ivar_set(result, IO_ATTR, io);
201
-
202
- return result;
203
- }
204
-
205
- /* call-seq:
206
- * XML::Reader.string(io) -> XML::Reader
207
- * XML::Reader.string(io, :encoding => XML::Encoding::UTF_8,
208
- * :options => XML::Parser::Options::NOENT) -> XML::Parser
209
- *
210
- * Creates a new reader by parsing the specified string.
211
- *
212
- * You may provide an optional hash table to control how the
213
- * parsing is performed. Valid options are:
214
- *
215
- * base_uri - The base url for the parsed document.
216
- * encoding - The document encoding, defaults to nil. Valid values
217
- * are the encoding constants defined on XML::Encoding.
218
- * options - Controls the execution of the parser, defaults to 0.
219
- * Valid values are the constants defined on
220
- * XML::Parser::Options. Mutliple options can be combined
221
- * by using Bitwise OR (|).
222
- */
223
- static VALUE rxml_reader_string(int argc, VALUE *argv, VALUE klass)
224
- {
225
- xmlTextReaderPtr xreader;
226
- VALUE string;
227
- VALUE options;
228
- char *xbaseurl = NULL;
229
- const char *xencoding = NULL;
230
- int xoptions = 0;
231
-
232
- rb_scan_args(argc, argv, "11", &string, &options);
233
- Check_Type(string, T_STRING);
234
-
235
- if (!NIL_P(options))
236
- {
237
- VALUE baseurl = Qnil;
238
- VALUE encoding = Qnil;
239
- VALUE parserOptions = Qnil;
240
-
241
- Check_Type(options, T_HASH);
242
-
243
- baseurl = rb_hash_aref(options, BASE_URI_SYMBOL);
244
- xbaseurl = NIL_P(baseurl) ? NULL : StringValueCStr(baseurl);
245
-
246
- encoding = rb_hash_aref(options, ENCODING_SYMBOL);
247
- xencoding = NIL_P(encoding) ? NULL : xmlGetCharEncodingName(NUM2INT(encoding));
248
-
249
- parserOptions = rb_hash_aref(options, OPTIONS_SYMBOL);
250
- xoptions = NIL_P(parserOptions) ? 0 : NUM2INT(parserOptions);
251
- }
252
-
253
- xreader = xmlReaderForMemory(StringValueCStr(string), RSTRING_LEN(string),
254
- xbaseurl, xencoding, xoptions);
255
-
256
- if (xreader == NULL)
257
- rxml_raise(&xmlLastError);
258
-
259
- return rxml_reader_wrap(xreader);
260
- }
261
-
262
- /*
263
- * call-seq:
264
- * reader.close -> code
265
- *
266
- * This method releases any resources allocated by the current instance
267
- * changes the state to Closed and close any underlying input.
268
- */
269
- static VALUE rxml_reader_close(VALUE self)
270
- {
271
- return INT2FIX(xmlTextReaderClose(rxml_text_reader_get(self)));
272
- }
273
-
274
- /*
275
- * call-seq:
276
- * reader.move_to_attribute(val) -> code
277
- *
278
- * Move the position of the current instance to the attribute with the
279
- * specified index (if +val+ is an integer) or name (if +val+ is a string)
280
- * relative to the containing element.
281
- */
282
- static VALUE rxml_reader_move_to_attr(VALUE self, VALUE val)
283
- {
284
- xmlTextReaderPtr xreader;
285
- int ret;
286
-
287
- xreader = rxml_text_reader_get(self);
288
-
289
- if (TYPE(val) == T_FIXNUM)
290
- {
291
- ret = xmlTextReaderMoveToAttributeNo(xreader, FIX2INT(val));
292
- }
293
- else
294
- {
295
- ret = xmlTextReaderMoveToAttribute(xreader,
296
- (const xmlChar *) StringValueCStr(val));
297
- }
298
-
299
- return INT2FIX(ret);
300
- }
301
-
302
- /*
303
- * call-seq:
304
- * reader.move_to_first_attribute -> code
305
- *
306
- * Move the position of the current instance to the first attribute associated
307
- * with the current node.
308
- */
309
- static VALUE rxml_reader_move_to_first_attr(VALUE self)
310
- {
311
- return INT2FIX(xmlTextReaderMoveToFirstAttribute(rxml_text_reader_get(self)));
312
- }
313
-
314
- /*
315
- * call-seq:
316
- * reader.move_to_next_attribute -> code
317
- *
318
- * Move the position of the current instance to the next attribute associated
319
- * with the current node.
320
- */
321
- static VALUE rxml_reader_move_to_next_attr(VALUE self)
322
- {
323
- return INT2FIX(xmlTextReaderMoveToNextAttribute(rxml_text_reader_get(self)));
324
- }
325
-
326
- /*
327
- * call-seq:
328
- * reader.move_to_element -> code
329
- *
330
- * Move the position of the current instance to the node that contains the
331
- * current attribute node.
332
- */
333
- static VALUE rxml_reader_move_to_element(VALUE self)
334
- {
335
- return INT2FIX(xmlTextReaderMoveToElement(rxml_text_reader_get(self)));
336
- }
337
-
338
- /*
339
- * call-seq:
340
- * reader.next -> code
341
- *
342
- * Skip to the node following the current one in document order while avoiding
343
- * the subtree if any.
344
- */
345
- static VALUE rxml_reader_next(VALUE self)
346
- {
347
- return INT2FIX(xmlTextReaderNext(rxml_text_reader_get(self)));
348
- }
349
-
350
- /*
351
- * call-seq:
352
- * reader.next_sibling -> code
353
- *
354
- * Skip to the node following the current one in document order while avoiding
355
- * the subtree if any. Currently implemented only for Readers built on a
356
- * document.
357
- */
358
- static VALUE rxml_reader_next_sibling(VALUE self)
359
- {
360
- return INT2FIX(xmlTextReaderNextSibling(rxml_text_reader_get(self)));
361
- }
362
-
363
- /*
364
- * call-seq:
365
- * reader.node -> XML::Node
366
- *
367
- * Returns the reader's current node. It will return
368
- * nil if Reader#read has not yet been called.
369
- * WARNING - Using this method is dangerous because the
370
- * the node may be destroyed on the next #read.
371
- */
372
- static VALUE rxml_reader_node(VALUE self)
373
- {
374
- xmlTextReaderPtr xreader = rxml_text_reader_get(self);
375
- xmlNodePtr xnode = xmlTextReaderCurrentNode(xreader);
376
- return xnode ? rxml_node_wrap(xnode) : Qnil;
377
- }
378
-
379
- /*
380
- * call-seq:
381
- * reader.node_type -> type
382
- *
383
- * Get the node type of the current node. Reference:
384
- * http://dotgnu.org/pnetlib-doc/System/Xml/XmlNodeType.html
385
- */
386
- static VALUE rxml_reader_node_type(VALUE self)
387
- {
388
- return INT2FIX(xmlTextReaderNodeType(rxml_text_reader_get(self)));
389
- }
390
-
391
- /*
392
- * call-seq:
393
- * reader.normalization -> value
394
- *
395
- * The value indicating whether to normalize white space and attribute values.
396
- * Since attribute value and end of line normalizations are a MUST in the XML
397
- * specification only the value true is accepted. The broken bahaviour of
398
- * accepting out of range character entities like &#0; is of course not
399
- * supported either.
400
- *
401
- * Return 1 or -1 in case of error.
402
- */
403
- static VALUE rxml_reader_normalization(VALUE self)
404
- {
405
- return INT2FIX(xmlTextReaderNormalization(rxml_text_reader_get(self)));
406
- }
407
-
408
- /*
409
- * call-seq:
410
- * reader.read -> code
411
- *
412
- * Causes the reader to move to the next node in the stream, exposing its properties.
413
- *
414
- * Returns true if a node was successfully read or false if there are no more
415
- * nodes to read. On errors, an exception is raised.*/
416
- static VALUE rxml_reader_read(VALUE self)
417
- {
418
- int result = xmlTextReaderRead(rxml_text_reader_get(self));
419
- switch(result)
420
- {
421
- case -1:
422
- rxml_raise(&xmlLastError);
423
- return Qnil;
424
- break;
425
- case 0:
426
- return Qfalse;
427
- case 1:
428
- return Qtrue;
429
- default:
430
- rb_raise(rb_eRuntimeError,
431
- "xmlTextReaderRead did not return -1, 0 or 1. Return value was: %d", result);
432
- }
433
- }
434
-
435
- /*
436
- * call-seq:
437
- * reader.read_attribute_value -> code
438
- *
439
- * Parse an attribute value into one or more Text and EntityReference nodes.
440
- *
441
- * Return 1 in case of success, 0 if the reader was not positionned on an
442
- * attribute node or all the attribute values have been read, or -1 in case of
443
- * error.
444
- */
445
- static VALUE rxml_reader_read_attr_value(VALUE self)
446
- {
447
- return INT2FIX(xmlTextReaderReadAttributeValue(rxml_text_reader_get(self)));
448
- }
449
-
450
- /*
451
- * call-seq:
452
- * reader.read_inner_xml -> data
453
- *
454
- * Read the contents of the current node, including child nodes and markup.
455
- *
456
- * Return a string containing the XML content, or nil if the current node is
457
- * neither an element nor attribute, or has no child nodes.
458
- */
459
- static VALUE rxml_reader_read_inner_xml(VALUE self)
460
- {
461
- const xmlChar *result = xmlTextReaderReadInnerXml(rxml_text_reader_get(self));
462
- return (result == NULL ? Qnil : rb_str_new2((const char*)result));
463
- }
464
-
465
- /*
466
- * call-seq:
467
- * reader.read_outer_xml -> data
468
- *
469
- * Read the contents of the current node, including child nodes and markup.
470
- *
471
- * Return a string containing the XML content, or nil if the current node is
472
- * neither an element nor attribute, or has no child nodes.
473
- */
474
- static VALUE rxml_reader_read_outer_xml(VALUE self)
475
- {
476
- const xmlChar *result = xmlTextReaderReadOuterXml(rxml_text_reader_get(self));
477
- return (result == NULL ? Qnil : rb_str_new2((const char*)result));
478
- }
479
-
480
- /*
481
- * call-seq:
482
- * reader.read_state -> state
483
- *
484
- * Get the read state of the reader.
485
- */
486
- static VALUE rxml_reader_read_state(VALUE self)
487
- {
488
- return INT2FIX(xmlTextReaderReadState(rxml_text_reader_get(self)));
489
- }
490
-
491
- /*
492
- * call-seq:
493
- * reader.read_string -> string
494
- *
495
- * Read the contents of an element or a text node as a string.
496
- *
497
- * Return a string containing the contents of the Element or Text node, or nil
498
- * if the reader is positioned on any other type of node.
499
- */
500
- static VALUE rxml_reader_read_string(VALUE self)
501
- {
502
- const xmlChar *result = xmlTextReaderReadString(rxml_text_reader_get(self));
503
- return (result == NULL ? Qnil : rb_str_new2((const char*)result));
504
- }
505
-
506
- /*
507
- * call-seq:
508
- * reader.relax_ng_validate(rng) -> code
509
- *
510
- * Use RelaxNG to validate the document as it is processed. Activation is only
511
- * possible before the first read. If +rng+ is nil, the RelaxNG validation is
512
- * desactivated.
513
- *
514
- * Return 0 in case the RelaxNG validation could be (des)activated and -1 in
515
- * case of error.
516
- */
517
- static VALUE rxml_reader_relax_ng_validate(VALUE self, VALUE rng)
518
- {
519
- char *xrng = NIL_P(rng) ? NULL : StringValueCStr(rng);
520
- return INT2FIX(xmlTextReaderRelaxNGValidate(rxml_text_reader_get(self), xrng));
521
- }
522
-
523
- #if LIBXML_VERSION >= 20620
524
- /*
525
- * call-seq:
526
- * reader.schema_validate(schema) -> code
527
- *
528
- * Use W3C XSD schema to validate the document as it is processed. Activation
529
- * is only possible before the first read. If +schema+ is nil, then XML Schema
530
- * validation is desactivated.
531
- *
532
- * Return 0 in case the schemas validation could be (de)activated and -1 in
533
- * case of error.
534
- */
535
- static VALUE
536
- rxml_reader_schema_validate(VALUE self, VALUE xsd)
537
- {
538
- char *xxsd = NIL_P(xsd) ? NULL : StringValueCStr(xsd);
539
- int status = xmlTextReaderSchemaValidate(rxml_text_reader_get(self), xxsd);
540
- return INT2FIX(status);
541
- }
542
- #endif
543
-
544
- /*
545
- * call-seq:
546
- * reader.name -> name
547
- *
548
- * Return the qualified name of the node.
549
- */
550
- static VALUE rxml_reader_name(VALUE self)
551
- {
552
- const xmlChar *result = xmlTextReaderConstName(rxml_text_reader_get(self));
553
- return (result == NULL ? Qnil : rb_str_new2((const char*)result));
554
- }
555
-
556
- /*
557
- * call-seq:
558
- * reader.local_name -> name
559
- *
560
- * Return the local name of the node.
561
- */
562
- static VALUE rxml_reader_local_name(VALUE self)
563
- {
564
- const xmlChar *result = xmlTextReaderConstLocalName(rxml_text_reader_get(self));
565
- return (result == NULL ? Qnil : rb_str_new2((const char*)result));
566
- }
567
-
568
- /*
569
- * call-seq:
570
- * reader.attribute_count -> count
571
- *
572
- * Provide the number of attributes of the current node.
573
- */
574
- static VALUE rxml_reader_attr_count(VALUE self)
575
- {
576
- return INT2FIX(xmlTextReaderAttributeCount(rxml_text_reader_get(self)));
577
- }
578
-
579
- /*
580
- * call-seq:
581
- * reader.encoding -> XML::Encoding::UTF_8
582
- *
583
- * Returns the encoding of the document being read. Note you
584
- * first have to read data from the reader for encoding
585
- * to return a value
586
- *
587
- * reader = XML::Reader.file(XML_FILE)
588
- * assert_nil(reader.encoding)
589
- * reader.read
590
- * assert_equal(XML::Encoding::UTF_8, reader.encoding)
591
- *
592
- * In addition, libxml always appears to return nil for the encoding
593
- * when parsing strings.
594
- */
595
- static VALUE rxml_reader_encoding(VALUE self)
596
- {
597
- xmlTextReaderPtr xreader = rxml_text_reader_get(self);
598
- const xmlChar *xencoding = xmlTextReaderConstEncoding(xreader);
599
- if (xencoding)
600
- return INT2NUM(xmlParseCharEncoding(xencoding));
601
- else
602
- return INT2NUM(XML_CHAR_ENCODING_NONE);
603
- }
604
-
605
- /*
606
- * call-seq:
607
- * reader.base_uri -> URI
608
- *
609
- * Determine the base URI of the node.
610
- */
611
- static VALUE rxml_reader_base_uri(VALUE self)
612
- {
613
- const xmlChar *result = xmlTextReaderConstBaseUri(rxml_text_reader_get(self));
614
- return (result == NULL ? Qnil : rb_str_new2((const char*)result));
615
- }
616
-
617
- /*
618
- * call-seq:
619
- * reader.namespace_uri -> URI
620
- *
621
- * Determine the namespace URI of the node.
622
- */
623
- static VALUE rxml_reader_namespace_uri(VALUE self)
624
- {
625
- const xmlChar *result = xmlTextReaderConstNamespaceUri(rxml_text_reader_get(self));
626
- return (result == NULL ? Qnil : rb_str_new2((const char*)result));
627
- }
628
-
629
- /*
630
- * call-seq:
631
- * reader.value -> text
632
- *
633
- * Provide the text value of the node if present.
634
- */
635
- static VALUE rxml_reader_value(VALUE self)
636
- {
637
- const xmlChar *result = xmlTextReaderConstValue(rxml_text_reader_get(self));
638
- return (result == NULL ? Qnil : rb_str_new2((const char*)result));
639
- }
640
-
641
- /*
642
- * call-seq:
643
- * reader.prefix -> prefix
644
- *
645
- * Get a shorthand reference to the namespace associated with the node.
646
- */
647
- static VALUE rxml_reader_prefix(VALUE self)
648
- {
649
- const xmlChar *result = xmlTextReaderConstPrefix(rxml_text_reader_get(self));
650
- return (result == NULL ? Qnil : rb_str_new2((const char*)result));
651
- }
652
-
653
- /*
654
- * call-seq:
655
- * reader.depth -> depth
656
- *
657
- * Get the depth of the node in the tree.
658
- */
659
- static VALUE rxml_reader_depth(VALUE self)
660
- {
661
- return INT2FIX(xmlTextReaderDepth(rxml_text_reader_get(self)));
662
- }
663
-
664
- /*
665
- * call-seq:
666
- * reader.quote_char -> char
667
- *
668
- * Get the quotation mark character used to enclose the value of an attribute,
669
- * as an integer value (and -1 in case of error).
670
- */
671
- static VALUE rxml_reader_quote_char(VALUE self)
672
- {
673
- return INT2FIX(xmlTextReaderQuoteChar(rxml_text_reader_get(self)));
674
- }
675
-
676
- /*
677
- * call-seq:
678
- * reader.standalone -> code
679
- *
680
- * Determine the standalone status of the document being read.
681
- *
682
- * Return 1 if the document was declared to be standalone, 0 if it was
683
- * declared to be not standalone, or -1 if the document did not specify its
684
- * standalone status or in case of error.
685
- */
686
- static VALUE rxml_reader_standalone(VALUE self)
687
- {
688
- return INT2FIX(xmlTextReaderStandalone(rxml_text_reader_get(self)));
689
- }
690
-
691
- /*
692
- * call-seq:
693
- * reader.xml_lang -> value
694
- *
695
- * Get the xml:lang scope within which the node resides.
696
- */
697
- static VALUE rxml_reader_xml_lang(VALUE self)
698
- {
699
- const xmlChar *result = xmlTextReaderConstXmlLang(rxml_text_reader_get(self));
700
- return (result == NULL ? Qnil : rb_str_new2((const char*)result));
701
- }
702
-
703
- /*
704
- * call-seq:
705
- * reader.xml_version -> version
706
- *
707
- * Determine the XML version of the document being read.
708
- */
709
- static VALUE rxml_reader_xml_version(VALUE self)
710
- {
711
- const xmlChar *result = xmlTextReaderConstXmlVersion(rxml_text_reader_get(self));
712
- return (result == NULL ? Qnil : rb_str_new2((const char*)result));
713
- }
714
-
715
- /*
716
- * call-seq:
717
- * reader.has_attributes? -> bool
718
- *
719
- * Get whether the node has attributes.
720
- */
721
- static VALUE rxml_reader_has_attributes(VALUE self)
722
- {
723
- return xmlTextReaderHasAttributes(rxml_text_reader_get(self)) ? Qtrue
724
- : Qfalse;
725
- }
726
-
727
- /*
728
- * call-seq:
729
- * reader.has_value? -> bool
730
- *
731
- * Get whether the node can have a text value.
732
- */
733
- static VALUE rxml_reader_has_value(VALUE self)
734
- {
735
- return xmlTextReaderHasValue(rxml_text_reader_get(self)) ? Qtrue : Qfalse;
736
- }
737
-
738
- /*
739
- * call-seq:
740
- * reader[key] -> value
741
- *
742
- * Provide the value of the attribute with the specified index (if +key+ is an
743
- * integer) or with the specified name (if +key+ is a string) relative to the
744
- * containing element, as a string.
745
- */
746
- static VALUE rxml_reader_attribute(VALUE self, VALUE key)
747
- {
748
- xmlTextReaderPtr reader;
749
- xmlChar *attr;
750
-
751
- reader = rxml_text_reader_get(self);
752
-
753
- if (TYPE(key) == T_FIXNUM)
754
- {
755
- attr = xmlTextReaderGetAttributeNo(reader, FIX2INT(key));
756
- }
757
- else
758
- {
759
- attr = xmlTextReaderGetAttribute(reader, (const xmlChar *) StringValueCStr(key));
760
- }
761
- return (attr == NULL ? Qnil : rb_str_new2((const char*)attr));
762
- }
763
-
764
- /*
765
- * call-seq:
766
- * reader.lookup_namespace(prefix) -> value
767
- *
768
- * Resolve a namespace prefix in the scope of the current element.
769
- * To return the default namespace, specify nil as +prefix+.
770
- */
771
- static VALUE rxml_reader_lookup_namespace(VALUE self, VALUE prefix)
772
- {
773
- const xmlChar *result = xmlTextReaderLookupNamespace(rxml_text_reader_get(
774
- self), (const xmlChar *) StringValueCStr(prefix));
775
- return (result == NULL ? Qnil : rb_str_new2((const char*)result));
776
- }
777
-
778
- /*
779
- * call-seq:
780
- * reader.expand -> node
781
- *
782
- * Read the contents of the current node and the full subtree. It then makes
783
- * the subtree available until the next read call.
784
- *
785
- * Return an XML::Node object, or nil in case of error.
786
- */
787
- static VALUE rxml_reader_expand(VALUE self)
788
- {
789
- xmlNodePtr node;
790
- xmlDocPtr doc;
791
- xmlTextReaderPtr reader = rxml_text_reader_get(self);
792
- node = xmlTextReaderExpand(reader);
793
-
794
- if (!node)
795
- return Qnil;
796
-
797
- /* Okay this is tricky. By accessing the returned node, we
798
- take ownership of the reader's document. Thus we need to
799
- tell the reader to not free it. Otherwise it will be
800
- freed twice - once when the Ruby document wrapper goes
801
- out of scope and once when the reader goes out of scope. */
802
-
803
- xmlTextReaderPreserve(reader);
804
- doc = xmlTextReaderCurrentDoc(reader);
805
- rxml_document_wrap(doc);
806
-
807
- return rxml_node_wrap(node);
808
- }
809
-
810
- #if LIBXML_VERSION >= 20618
811
- /*
812
- * call-seq:
813
- * reader.byte_consumed -> value
814
- *
815
- * This method provides the current index of the parser used by the reader,
816
- * relative to the start of the current entity.
817
- */
818
- static VALUE
819
- rxml_reader_byte_consumed(VALUE self)
820
- {
821
- return INT2NUM(xmlTextReaderByteConsumed(rxml_text_reader_get(self)));
822
- }
823
- #endif
824
-
825
- #if LIBXML_VERSION >= 20617
826
- /*
827
- * call-seq:
828
- * reader.column_number -> number
829
- *
830
- * Provide the column number of the current parsing point.
831
- */
832
- static VALUE
833
- rxml_reader_column_number(VALUE self)
834
- {
835
- return INT2NUM(xmlTextReaderGetParserColumnNumber(rxml_text_reader_get(self)));
836
- }
837
-
838
- /*
839
- * call-seq:
840
- * reader.line_number -> number
841
- *
842
- * Provide the line number of the current parsing point.
843
- */
844
- static VALUE
845
- rxml_reader_line_number(VALUE self)
846
- {
847
- return INT2NUM(xmlTextReaderGetParserLineNumber(rxml_text_reader_get(self)));
848
- }
849
- #endif
850
-
851
- /*
852
- * call-seq:
853
- * reader.default? -> bool
854
- *
855
- * Return whether an Attribute node was generated from the default value
856
- * defined in the DTD or schema.
857
- */
858
- static VALUE rxml_reader_default(VALUE self)
859
- {
860
- return xmlTextReaderIsDefault(rxml_text_reader_get(self)) ? Qtrue : Qfalse;
861
- }
862
-
863
- /*
864
- * call-seq:
865
- * reader.namespace_declaration? -> bool
866
- *
867
- * Determine whether the current node is a namespace declaration rather than a
868
- * regular attribute.
869
- */
870
- static VALUE rxml_reader_namespace_declaration(VALUE self)
871
- {
872
- return xmlTextReaderIsNamespaceDecl(rxml_text_reader_get(self)) ? Qtrue
873
- : Qfalse;
874
- }
875
-
876
- /*
877
- * call-seq:
878
- * reader.empty_element? -> bool
879
- *
880
- * Check if the current node is empty.
881
- */
882
- static VALUE rxml_reader_empty_element(VALUE self)
883
- {
884
- return xmlTextReaderIsEmptyElement(rxml_text_reader_get(self)) ? Qtrue
885
- : Qfalse;
886
- }
887
-
888
- /*
889
- * call-seq:
890
- * reader.valid? -> bool
891
- *
892
- * Retrieve the validity status from the parser context.
893
- */
894
- static VALUE rxml_reader_valid(VALUE self)
895
- {
896
- return xmlTextReaderIsValid(rxml_text_reader_get(self)) ? Qtrue : Qfalse;
897
- }
898
-
899
- void rxml_init_reader(void)
900
- {
901
- BASE_URI_SYMBOL = ID2SYM(rb_intern("base_uri"));
902
- ENCODING_SYMBOL = ID2SYM(rb_intern("encoding"));
903
- IO_ATTR = ID2SYM(rb_intern("@io"));
904
- OPTIONS_SYMBOL = ID2SYM(rb_intern("options"));
905
-
906
- cXMLReader = rb_define_class_under(mXML, "Reader", rb_cObject);
907
-
908
- rb_define_singleton_method(cXMLReader, "document", rxml_reader_document, 1);
909
- rb_define_singleton_method(cXMLReader, "file", rxml_reader_file, -1);
910
- rb_define_singleton_method(cXMLReader, "io", rxml_reader_io, -1);
911
- rb_define_singleton_method(cXMLReader, "string", rxml_reader_string, -1);
912
-
913
- rb_define_method(cXMLReader, "[]", rxml_reader_attribute, 1);
914
- rb_define_method(cXMLReader, "attribute_count", rxml_reader_attr_count, 0);
915
- rb_define_method(cXMLReader, "base_uri", rxml_reader_base_uri, 0);
916
- #if LIBXML_VERSION >= 20618
917
- rb_define_method(cXMLReader, "byte_consumed", rxml_reader_byte_consumed, 0);
918
- #endif
919
- rb_define_method(cXMLReader, "close", rxml_reader_close, 0);
920
- #if LIBXML_VERSION >= 20617
921
- rb_define_method(cXMLReader, "column_number", rxml_reader_column_number, 0);
922
- #endif
923
- rb_define_method(cXMLReader, "depth", rxml_reader_depth, 0);
924
- rb_define_method(cXMLReader, "encoding", rxml_reader_encoding, 0);
925
- rb_define_method(cXMLReader, "expand", rxml_reader_expand, 0);
926
- rb_define_method(cXMLReader, "has_attributes?", rxml_reader_has_attributes, 0);
927
- rb_define_method(cXMLReader, "has_value?", rxml_reader_has_value, 0);
928
- #if LIBXML_VERSION >= 20617
929
- rb_define_method(cXMLReader, "line_number", rxml_reader_line_number, 0);
930
- #endif
931
- rb_define_method(cXMLReader, "local_name", rxml_reader_local_name, 0);
932
- rb_define_method(cXMLReader, "lookup_namespace", rxml_reader_lookup_namespace, 1);
933
- rb_define_method(cXMLReader, "move_to_attribute", rxml_reader_move_to_attr, 1);
934
- rb_define_method(cXMLReader, "move_to_first_attribute", rxml_reader_move_to_first_attr, 0);
935
- rb_define_method(cXMLReader, "move_to_next_attribute", rxml_reader_move_to_next_attr, 0);
936
- rb_define_method(cXMLReader, "move_to_element", rxml_reader_move_to_element, 0);
937
- rb_define_method(cXMLReader, "name", rxml_reader_name, 0);
938
- rb_define_method(cXMLReader, "namespace_uri", rxml_reader_namespace_uri, 0);
939
- rb_define_method(cXMLReader, "next", rxml_reader_next, 0);
940
- rb_define_method(cXMLReader, "next_sibling", rxml_reader_next_sibling, 0);
941
- rb_define_method(cXMLReader, "node", rxml_reader_node, 0);
942
- rb_define_method(cXMLReader, "node_type", rxml_reader_node_type, 0);
943
- rb_define_method(cXMLReader, "normalization", rxml_reader_normalization, 0);
944
- rb_define_method(cXMLReader, "prefix", rxml_reader_prefix, 0);
945
- rb_define_method(cXMLReader, "quote_char", rxml_reader_quote_char, 0);
946
- rb_define_method(cXMLReader, "read", rxml_reader_read, 0);
947
- rb_define_method(cXMLReader, "read_attribute_value", rxml_reader_read_attr_value, 0);
948
- rb_define_method(cXMLReader, "read_inner_xml", rxml_reader_read_inner_xml, 0);
949
- rb_define_method(cXMLReader, "read_outer_xml", rxml_reader_read_outer_xml, 0);
950
- rb_define_method(cXMLReader, "read_state", rxml_reader_read_state, 0);
951
- rb_define_method(cXMLReader, "read_string", rxml_reader_read_string, 0);
952
- rb_define_method(cXMLReader, "relax_ng_validate", rxml_reader_relax_ng_validate, 1);
953
- rb_define_method(cXMLReader, "standalone", rxml_reader_standalone, 0);
954
- #if LIBXML_VERSION >= 20620
955
- rb_define_method(cXMLReader, "schema_validate", rxml_reader_schema_validate, 1);
956
- #endif
957
- rb_define_method(cXMLReader, "value", rxml_reader_value, 0);
958
- rb_define_method(cXMLReader, "xml_lang", rxml_reader_xml_lang, 0);
959
- rb_define_method(cXMLReader, "xml_version", rxml_reader_xml_version, 0);
960
- rb_define_method(cXMLReader, "default?", rxml_reader_default, 0);
961
- rb_define_method(cXMLReader, "empty_element?", rxml_reader_empty_element, 0);
962
- rb_define_method(cXMLReader, "namespace_declaration?", rxml_reader_namespace_declaration, 0);
963
- rb_define_method(cXMLReader, "valid?", rxml_reader_valid, 0);
964
-
965
- /* Constants */
966
- rb_define_const(cXMLReader, "LOADDTD", INT2FIX(XML_PARSER_LOADDTD));
967
- rb_define_const(cXMLReader, "DEFAULTATTRS", INT2FIX(XML_PARSER_DEFAULTATTRS));
968
- rb_define_const(cXMLReader, "VALIDATE", INT2FIX(XML_PARSER_VALIDATE));
969
- rb_define_const(cXMLReader, "SUBST_ENTITIES", INT2FIX(XML_PARSER_SUBST_ENTITIES));
970
-
971
- rb_define_const(cXMLReader, "SEVERITY_VALIDITY_WARNING", INT2FIX(XML_PARSER_SEVERITY_VALIDITY_WARNING));
972
- rb_define_const(cXMLReader, "SEVERITY_VALIDITY_ERROR", INT2FIX(XML_PARSER_SEVERITY_VALIDITY_ERROR));
973
- rb_define_const(cXMLReader, "SEVERITY_WARNING", INT2FIX(XML_PARSER_SEVERITY_WARNING));
974
- rb_define_const(cXMLReader, "SEVERITY_ERROR", INT2FIX(XML_PARSER_SEVERITY_ERROR));
975
-
976
- rb_define_const(cXMLReader, "TYPE_NONE", INT2FIX(XML_READER_TYPE_NONE));
977
- rb_define_const(cXMLReader, "TYPE_ELEMENT", INT2FIX(XML_READER_TYPE_ELEMENT));
978
- rb_define_const(cXMLReader, "TYPE_ATTRIBUTE", INT2FIX(XML_READER_TYPE_ATTRIBUTE));
979
- rb_define_const(cXMLReader, "TYPE_TEXT", INT2FIX(XML_READER_TYPE_TEXT));
980
- rb_define_const(cXMLReader, "TYPE_CDATA", INT2FIX(XML_READER_TYPE_CDATA));
981
- rb_define_const(cXMLReader, "TYPE_ENTITY_REFERENCE", INT2FIX(XML_READER_TYPE_ENTITY_REFERENCE));
982
- rb_define_const(cXMLReader, "TYPE_ENTITY", INT2FIX(XML_READER_TYPE_ENTITY));
983
- rb_define_const(cXMLReader, "TYPE_PROCESSING_INSTRUCTION", INT2FIX(XML_READER_TYPE_PROCESSING_INSTRUCTION));
984
- rb_define_const(cXMLReader, "TYPE_COMMENT", INT2FIX(XML_READER_TYPE_COMMENT));
985
- rb_define_const(cXMLReader, "TYPE_DOCUMENT", INT2FIX(XML_READER_TYPE_DOCUMENT));
986
- rb_define_const(cXMLReader, "TYPE_DOCUMENT_TYPE", INT2FIX(XML_READER_TYPE_DOCUMENT_TYPE));
987
- rb_define_const(cXMLReader, "TYPE_DOCUMENT_FRAGMENT", INT2FIX(XML_READER_TYPE_DOCUMENT_FRAGMENT));
988
- rb_define_const(cXMLReader, "TYPE_NOTATION", INT2FIX(XML_READER_TYPE_NOTATION));
989
- rb_define_const(cXMLReader, "TYPE_WHITESPACE", INT2FIX(XML_READER_TYPE_WHITESPACE));
990
- rb_define_const(cXMLReader, "TYPE_SIGNIFICANT_WHITESPACE", INT2FIX(XML_READER_TYPE_SIGNIFICANT_WHITESPACE));
991
- rb_define_const(cXMLReader, "TYPE_END_ELEMENT", INT2FIX(XML_READER_TYPE_END_ELEMENT));
992
- rb_define_const(cXMLReader, "TYPE_END_ENTITY", INT2FIX(XML_READER_TYPE_END_ENTITY));
993
- rb_define_const(cXMLReader, "TYPE_XML_DECLARATION", INT2FIX(XML_READER_TYPE_XML_DECLARATION));
994
-
995
- /* Read states */
996
- rb_define_const(cXMLReader, "MODE_INITIAL", INT2FIX(XML_TEXTREADER_MODE_INITIAL));
997
- rb_define_const(cXMLReader, "MODE_INTERACTIVE", INT2FIX(XML_TEXTREADER_MODE_INTERACTIVE));
998
- rb_define_const(cXMLReader, "MODE_ERROR", INT2FIX(XML_TEXTREADER_MODE_ERROR));
999
- rb_define_const(cXMLReader, "MODE_EOF", INT2FIX(XML_TEXTREADER_MODE_EOF));
1000
- rb_define_const(cXMLReader, "MODE_CLOSED", INT2FIX(XML_TEXTREADER_MODE_CLOSED));
1001
- rb_define_const(cXMLReader, "MODE_READING", INT2FIX(XML_TEXTREADER_MODE_READING));
1002
- }
1
+ /* Copyright (c) 2006-2007 Apple Inc.
2
+ * Please see the LICENSE file for copyright and distribution information. */
3
+
4
+ #include "ruby_libxml.h"
5
+ #include "ruby_xml_reader.h"
6
+
7
+ /*
8
+ * Document-class: LibXML::XML::Reader
9
+ *
10
+ * The XML::Reader class provides a simpler, alternative way of parsing an XML
11
+ * document in contrast to XML::Parser or XML::SaxParser. A XML::Reader instance
12
+ * acts like a cursor going forward in a document stream, stopping at each node
13
+ * it encounters. To advance to the next node, simply cadd XML::Reader#read.
14
+ *
15
+ * The XML::Reader API closely matches the DOM Core specification and supports
16
+ * namespaces, xml:base, entity handling and DTDs.
17
+ *
18
+ * To summarize, XML::Reader provides a far simpler API to use versus XML::SaxParser
19
+ * and is more memory efficient than using XML::Parser to create a DOM tree.
20
+ *
21
+ * Example:
22
+ *
23
+ * parser = XML::Reader.string("<foo><bar>1</bar><bar>2</bar><bar>3</bar></foo>")
24
+ * reader.read
25
+ * assert_equal('foo', reader.name)
26
+ * assert_equal(nil, reader.value)
27
+ *
28
+ * 3.times do |i|
29
+ * reader.read
30
+ * assert_equal(XML::Reader::TYPE_ELEMENT, reader.node_type)
31
+ * assert_equal('bar', reader.name)
32
+ * reader.read
33
+ * assert_equal(XML::Reader::TYPE_TEXT, reader.node_type)
34
+ * assert_equal((i + 1).to_s, reader.value)
35
+ * reader.read
36
+ * assert_equal(XML::Reader::TYPE_END_ELEMENT, reader.node_type)
37
+ * end
38
+ *
39
+ * You can also parse documents (see XML::Reader.document),
40
+ * strings (see XML::Parser.string) and io objects (see
41
+ * XML::Parser.io).
42
+ *
43
+ * For a more in depth tutorial, albeit in C, see http://xmlsoft.org/xmlreader.html.*/
44
+
45
+ VALUE cXMLReader;
46
+
47
+ static ID BASE_URI_SYMBOL;
48
+ static ID ENCODING_SYMBOL;
49
+ static ID IO_ATTR;
50
+ static ID OPTIONS_SYMBOL;
51
+
52
+
53
+ static void rxml_reader_free(xmlTextReaderPtr reader)
54
+ {
55
+ xmlFreeTextReader(reader);
56
+ }
57
+
58
+ static VALUE rxml_reader_wrap(xmlTextReaderPtr reader)
59
+ {
60
+ return Data_Wrap_Struct(cXMLReader, NULL, rxml_reader_free, reader);
61
+ }
62
+
63
+ static xmlTextReaderPtr rxml_text_reader_get(VALUE obj)
64
+ {
65
+ xmlTextReaderPtr xreader;
66
+ Data_Get_Struct(obj, xmlTextReader, xreader);
67
+ return xreader;
68
+ }
69
+
70
+ /*
71
+ * call-seq:
72
+ * XML::Reader.document(doc) -> XML::Reader
73
+ *
74
+ * Create an new reader for the specified document.
75
+ */
76
+ VALUE rxml_reader_document(VALUE klass, VALUE doc)
77
+ {
78
+ xmlDocPtr xdoc;
79
+ xmlTextReaderPtr xreader;
80
+
81
+ Data_Get_Struct(doc, xmlDoc, xdoc);
82
+
83
+ xreader = xmlReaderWalker(xdoc);
84
+
85
+ if (xreader == NULL)
86
+ rxml_raise(&xmlLastError);
87
+
88
+ return rxml_reader_wrap(xreader);
89
+ }
90
+
91
+ /* call-seq:
92
+ * XML::Reader.file(path) -> XML::Reader
93
+ * XML::Reader.file(path, :encoding => XML::Encoding::UTF_8,
94
+ * :options => XML::Parser::Options::NOENT) -> XML::Parser
95
+ *
96
+ * Creates a new reader by parsing the specified file or uri.
97
+ *
98
+ * You may provide an optional hash table to control how the
99
+ * parsing is performed. Valid options are:
100
+ *
101
+ * encoding - The document encoding, defaults to nil. Valid values
102
+ * are the encoding constants defined on XML::Encoding.
103
+ * options - Controls the execution of the parser, defaults to 0.
104
+ * Valid values are the constants defined on
105
+ * XML::Parser::Options. Mutliple options can be combined
106
+ * by using Bitwise OR (|).
107
+ */
108
+ static VALUE rxml_reader_file(int argc, VALUE *argv, VALUE klass)
109
+ {
110
+ xmlTextReaderPtr xreader;
111
+ VALUE path;
112
+ VALUE options;
113
+
114
+ const char *xencoding = NULL;
115
+ int xoptions = 0;
116
+
117
+ rb_scan_args(argc, argv, "11", &path, &options);
118
+ Check_Type(path, T_STRING);
119
+
120
+ if (!NIL_P(options))
121
+ {
122
+ VALUE encoding = Qnil;
123
+ VALUE parserOptions = Qnil;
124
+
125
+ Check_Type(options, T_HASH);
126
+
127
+ encoding = rb_hash_aref(options, BASE_URI_SYMBOL);
128
+ xencoding = NIL_P(encoding) ? NULL : xmlGetCharEncodingName(NUM2INT(encoding));
129
+
130
+ parserOptions = rb_hash_aref(options, OPTIONS_SYMBOL);
131
+ xoptions = NIL_P(parserOptions) ? 0 : NUM2INT(parserOptions);
132
+ }
133
+
134
+ xreader = xmlReaderForFile(StringValueCStr(path), xencoding, xoptions);
135
+
136
+ if (xreader == NULL)
137
+ rxml_raise(&xmlLastError);
138
+
139
+ return rxml_reader_wrap(xreader);
140
+ }
141
+
142
+ /* call-seq:
143
+ * XML::Reader.io(io) -> XML::Reader
144
+ * XML::Reader.io(io, :encoding => XML::Encoding::UTF_8,
145
+ * :options => XML::Parser::Options::NOENT) -> XML::Parser
146
+ *
147
+ * Creates a new reader by parsing the specified io object.
148
+ *
149
+ * You may provide an optional hash table to control how the
150
+ * parsing is performed. Valid options are:
151
+ *
152
+ * base_uri - The base url for the parsed document.
153
+ * encoding - The document encoding, defaults to nil. Valid values
154
+ * are the encoding constants defined on XML::Encoding.
155
+ * options - Controls the execution of the parser, defaults to 0.
156
+ * Valid values are the constants defined on
157
+ * XML::Parser::Options. Mutliple options can be combined
158
+ * by using Bitwise OR (|).
159
+ */
160
+ static VALUE rxml_reader_io(int argc, VALUE *argv, VALUE klass)
161
+ {
162
+ xmlTextReaderPtr xreader;
163
+ VALUE result;
164
+ VALUE io;
165
+ VALUE options;
166
+ char *xbaseurl = NULL;
167
+ const char *xencoding = NULL;
168
+ int xoptions = 0;
169
+
170
+ rb_scan_args(argc, argv, "11", &io, &options);
171
+
172
+ if (!NIL_P(options))
173
+ {
174
+ VALUE baseurl = Qnil;
175
+ VALUE encoding = Qnil;
176
+ VALUE parserOptions = Qnil;
177
+
178
+ Check_Type(options, T_HASH);
179
+
180
+ baseurl = rb_hash_aref(options, BASE_URI_SYMBOL);
181
+ xbaseurl = NIL_P(baseurl) ? NULL : StringValueCStr(baseurl);
182
+
183
+ encoding = rb_hash_aref(options, ENCODING_SYMBOL);
184
+ xencoding = NIL_P(encoding) ? NULL : xmlGetCharEncodingName(NUM2INT(encoding));
185
+
186
+ parserOptions = rb_hash_aref(options, OPTIONS_SYMBOL);
187
+ xoptions = NIL_P(parserOptions) ? 0 : NUM2INT(parserOptions);
188
+ }
189
+
190
+ xreader = xmlReaderForIO((xmlInputReadCallback) rxml_read_callback, NULL,
191
+ (void *) io,
192
+ xbaseurl, xencoding, xoptions);
193
+
194
+ if (xreader == NULL)
195
+ rxml_raise(&xmlLastError);
196
+
197
+ result = rxml_reader_wrap(xreader);
198
+
199
+ /* Attach io object to parser so it won't get freed.*/
200
+ rb_ivar_set(result, IO_ATTR, io);
201
+
202
+ return result;
203
+ }
204
+
205
+ /* call-seq:
206
+ * XML::Reader.string(io) -> XML::Reader
207
+ * XML::Reader.string(io, :encoding => XML::Encoding::UTF_8,
208
+ * :options => XML::Parser::Options::NOENT) -> XML::Parser
209
+ *
210
+ * Creates a new reader by parsing the specified string.
211
+ *
212
+ * You may provide an optional hash table to control how the
213
+ * parsing is performed. Valid options are:
214
+ *
215
+ * base_uri - The base url for the parsed document.
216
+ * encoding - The document encoding, defaults to nil. Valid values
217
+ * are the encoding constants defined on XML::Encoding.
218
+ * options - Controls the execution of the parser, defaults to 0.
219
+ * Valid values are the constants defined on
220
+ * XML::Parser::Options. Mutliple options can be combined
221
+ * by using Bitwise OR (|).
222
+ */
223
+ static VALUE rxml_reader_string(int argc, VALUE *argv, VALUE klass)
224
+ {
225
+ xmlTextReaderPtr xreader;
226
+ VALUE string;
227
+ VALUE options;
228
+ char *xbaseurl = NULL;
229
+ const char *xencoding = NULL;
230
+ int xoptions = 0;
231
+
232
+ rb_scan_args(argc, argv, "11", &string, &options);
233
+ Check_Type(string, T_STRING);
234
+
235
+ if (!NIL_P(options))
236
+ {
237
+ VALUE baseurl = Qnil;
238
+ VALUE encoding = Qnil;
239
+ VALUE parserOptions = Qnil;
240
+
241
+ Check_Type(options, T_HASH);
242
+
243
+ baseurl = rb_hash_aref(options, BASE_URI_SYMBOL);
244
+ xbaseurl = NIL_P(baseurl) ? NULL : StringValueCStr(baseurl);
245
+
246
+ encoding = rb_hash_aref(options, ENCODING_SYMBOL);
247
+ xencoding = NIL_P(encoding) ? NULL : xmlGetCharEncodingName(NUM2INT(encoding));
248
+
249
+ parserOptions = rb_hash_aref(options, OPTIONS_SYMBOL);
250
+ xoptions = NIL_P(parserOptions) ? 0 : NUM2INT(parserOptions);
251
+ }
252
+
253
+ xreader = xmlReaderForMemory(StringValueCStr(string), RSTRING_LEN(string),
254
+ xbaseurl, xencoding, xoptions);
255
+
256
+ if (xreader == NULL)
257
+ rxml_raise(&xmlLastError);
258
+
259
+ return rxml_reader_wrap(xreader);
260
+ }
261
+
262
+ /*
263
+ * call-seq:
264
+ * reader.close -> code
265
+ *
266
+ * This method releases any resources allocated by the current instance
267
+ * changes the state to Closed and close any underlying input.
268
+ */
269
+ static VALUE rxml_reader_close(VALUE self)
270
+ {
271
+ return INT2FIX(xmlTextReaderClose(rxml_text_reader_get(self)));
272
+ }
273
+
274
+ /*
275
+ * call-seq:
276
+ * reader.move_to_attribute(val) -> code
277
+ *
278
+ * Move the position of the current instance to the attribute with the
279
+ * specified index (if +val+ is an integer) or name (if +val+ is a string)
280
+ * relative to the containing element.
281
+ */
282
+ static VALUE rxml_reader_move_to_attr(VALUE self, VALUE val)
283
+ {
284
+ xmlTextReaderPtr xreader;
285
+ int ret;
286
+
287
+ xreader = rxml_text_reader_get(self);
288
+
289
+ if (TYPE(val) == T_FIXNUM)
290
+ {
291
+ ret = xmlTextReaderMoveToAttributeNo(xreader, FIX2INT(val));
292
+ }
293
+ else
294
+ {
295
+ ret = xmlTextReaderMoveToAttribute(xreader,
296
+ (const xmlChar *) StringValueCStr(val));
297
+ }
298
+
299
+ return INT2FIX(ret);
300
+ }
301
+
302
+ /*
303
+ * call-seq:
304
+ * reader.move_to_first_attribute -> code
305
+ *
306
+ * Move the position of the current instance to the first attribute associated
307
+ * with the current node.
308
+ */
309
+ static VALUE rxml_reader_move_to_first_attr(VALUE self)
310
+ {
311
+ return INT2FIX(xmlTextReaderMoveToFirstAttribute(rxml_text_reader_get(self)));
312
+ }
313
+
314
+ /*
315
+ * call-seq:
316
+ * reader.move_to_next_attribute -> code
317
+ *
318
+ * Move the position of the current instance to the next attribute associated
319
+ * with the current node.
320
+ */
321
+ static VALUE rxml_reader_move_to_next_attr(VALUE self)
322
+ {
323
+ return INT2FIX(xmlTextReaderMoveToNextAttribute(rxml_text_reader_get(self)));
324
+ }
325
+
326
+ /*
327
+ * call-seq:
328
+ * reader.move_to_element -> code
329
+ *
330
+ * Move the position of the current instance to the node that contains the
331
+ * current attribute node.
332
+ */
333
+ static VALUE rxml_reader_move_to_element(VALUE self)
334
+ {
335
+ return INT2FIX(xmlTextReaderMoveToElement(rxml_text_reader_get(self)));
336
+ }
337
+
338
+ /*
339
+ * call-seq:
340
+ * reader.next -> code
341
+ *
342
+ * Skip to the node following the current one in document order while avoiding
343
+ * the subtree if any.
344
+ */
345
+ static VALUE rxml_reader_next(VALUE self)
346
+ {
347
+ return INT2FIX(xmlTextReaderNext(rxml_text_reader_get(self)));
348
+ }
349
+
350
+ /*
351
+ * call-seq:
352
+ * reader.next_sibling -> code
353
+ *
354
+ * Skip to the node following the current one in document order while avoiding
355
+ * the subtree if any. Currently implemented only for Readers built on a
356
+ * document.
357
+ */
358
+ static VALUE rxml_reader_next_sibling(VALUE self)
359
+ {
360
+ return INT2FIX(xmlTextReaderNextSibling(rxml_text_reader_get(self)));
361
+ }
362
+
363
+ /*
364
+ * call-seq:
365
+ * reader.node -> XML::Node
366
+ *
367
+ * Returns the reader's current node. It will return
368
+ * nil if Reader#read has not yet been called.
369
+ * WARNING - Using this method is dangerous because the
370
+ * the node may be destroyed on the next #read.
371
+ */
372
+ static VALUE rxml_reader_node(VALUE self)
373
+ {
374
+ xmlTextReaderPtr xreader = rxml_text_reader_get(self);
375
+ xmlNodePtr xnode = xmlTextReaderCurrentNode(xreader);
376
+ return xnode ? rxml_node_wrap(xnode) : Qnil;
377
+ }
378
+
379
+ /*
380
+ * call-seq:
381
+ * reader.node_type -> type
382
+ *
383
+ * Get the node type of the current node. Reference:
384
+ * http://dotgnu.org/pnetlib-doc/System/Xml/XmlNodeType.html
385
+ */
386
+ static VALUE rxml_reader_node_type(VALUE self)
387
+ {
388
+ return INT2FIX(xmlTextReaderNodeType(rxml_text_reader_get(self)));
389
+ }
390
+
391
+ /*
392
+ * call-seq:
393
+ * reader.normalization -> value
394
+ *
395
+ * The value indicating whether to normalize white space and attribute values.
396
+ * Since attribute value and end of line normalizations are a MUST in the XML
397
+ * specification only the value true is accepted. The broken bahaviour of
398
+ * accepting out of range character entities like &#0; is of course not
399
+ * supported either.
400
+ *
401
+ * Return 1 or -1 in case of error.
402
+ */
403
+ static VALUE rxml_reader_normalization(VALUE self)
404
+ {
405
+ return INT2FIX(xmlTextReaderNormalization(rxml_text_reader_get(self)));
406
+ }
407
+
408
+ /*
409
+ * call-seq:
410
+ * reader.read -> code
411
+ *
412
+ * Causes the reader to move to the next node in the stream, exposing its properties.
413
+ *
414
+ * Returns true if a node was successfully read or false if there are no more
415
+ * nodes to read. On errors, an exception is raised.*/
416
+ static VALUE rxml_reader_read(VALUE self)
417
+ {
418
+ int result = xmlTextReaderRead(rxml_text_reader_get(self));
419
+ switch(result)
420
+ {
421
+ case -1:
422
+ rxml_raise(&xmlLastError);
423
+ return Qnil;
424
+ break;
425
+ case 0:
426
+ return Qfalse;
427
+ case 1:
428
+ return Qtrue;
429
+ default:
430
+ rb_raise(rb_eRuntimeError,
431
+ "xmlTextReaderRead did not return -1, 0 or 1. Return value was: %d", result);
432
+ }
433
+ }
434
+
435
+ /*
436
+ * call-seq:
437
+ * reader.read_attribute_value -> code
438
+ *
439
+ * Parse an attribute value into one or more Text and EntityReference nodes.
440
+ *
441
+ * Return 1 in case of success, 0 if the reader was not positionned on an
442
+ * attribute node or all the attribute values have been read, or -1 in case of
443
+ * error.
444
+ */
445
+ static VALUE rxml_reader_read_attr_value(VALUE self)
446
+ {
447
+ return INT2FIX(xmlTextReaderReadAttributeValue(rxml_text_reader_get(self)));
448
+ }
449
+
450
+ /*
451
+ * call-seq:
452
+ * reader.read_inner_xml -> data
453
+ *
454
+ * Read the contents of the current node, including child nodes and markup.
455
+ *
456
+ * Return a string containing the XML content, or nil if the current node is
457
+ * neither an element nor attribute, or has no child nodes.
458
+ */
459
+ static VALUE rxml_reader_read_inner_xml(VALUE self)
460
+ {
461
+ const xmlChar *result = xmlTextReaderReadInnerXml(rxml_text_reader_get(self));
462
+ return (result == NULL ? Qnil : rb_str_new2((const char*)result));
463
+ }
464
+
465
+ /*
466
+ * call-seq:
467
+ * reader.read_outer_xml -> data
468
+ *
469
+ * Read the contents of the current node, including child nodes and markup.
470
+ *
471
+ * Return a string containing the XML content, or nil if the current node is
472
+ * neither an element nor attribute, or has no child nodes.
473
+ */
474
+ static VALUE rxml_reader_read_outer_xml(VALUE self)
475
+ {
476
+ const xmlChar *result = xmlTextReaderReadOuterXml(rxml_text_reader_get(self));
477
+ return (result == NULL ? Qnil : rb_str_new2((const char*)result));
478
+ }
479
+
480
+ /*
481
+ * call-seq:
482
+ * reader.read_state -> state
483
+ *
484
+ * Get the read state of the reader.
485
+ */
486
+ static VALUE rxml_reader_read_state(VALUE self)
487
+ {
488
+ return INT2FIX(xmlTextReaderReadState(rxml_text_reader_get(self)));
489
+ }
490
+
491
+ /*
492
+ * call-seq:
493
+ * reader.read_string -> string
494
+ *
495
+ * Read the contents of an element or a text node as a string.
496
+ *
497
+ * Return a string containing the contents of the Element or Text node, or nil
498
+ * if the reader is positioned on any other type of node.
499
+ */
500
+ static VALUE rxml_reader_read_string(VALUE self)
501
+ {
502
+ const xmlChar *result = xmlTextReaderReadString(rxml_text_reader_get(self));
503
+ return (result == NULL ? Qnil : rb_str_new2((const char*)result));
504
+ }
505
+
506
+ /*
507
+ * call-seq:
508
+ * reader.relax_ng_validate(rng) -> code
509
+ *
510
+ * Use RelaxNG to validate the document as it is processed. Activation is only
511
+ * possible before the first read. If +rng+ is nil, the RelaxNG validation is
512
+ * desactivated.
513
+ *
514
+ * Return 0 in case the RelaxNG validation could be (des)activated and -1 in
515
+ * case of error.
516
+ */
517
+ static VALUE rxml_reader_relax_ng_validate(VALUE self, VALUE rng)
518
+ {
519
+ char *xrng = NIL_P(rng) ? NULL : StringValueCStr(rng);
520
+ return INT2FIX(xmlTextReaderRelaxNGValidate(rxml_text_reader_get(self), xrng));
521
+ }
522
+
523
+ #if LIBXML_VERSION >= 20620
524
+ /*
525
+ * call-seq:
526
+ * reader.schema_validate(schema) -> code
527
+ *
528
+ * Use W3C XSD schema to validate the document as it is processed. Activation
529
+ * is only possible before the first read. If +schema+ is nil, then XML Schema
530
+ * validation is desactivated.
531
+ *
532
+ * Return 0 in case the schemas validation could be (de)activated and -1 in
533
+ * case of error.
534
+ */
535
+ static VALUE
536
+ rxml_reader_schema_validate(VALUE self, VALUE xsd)
537
+ {
538
+ char *xxsd = NIL_P(xsd) ? NULL : StringValueCStr(xsd);
539
+ int status = xmlTextReaderSchemaValidate(rxml_text_reader_get(self), xxsd);
540
+ return INT2FIX(status);
541
+ }
542
+ #endif
543
+
544
+ /*
545
+ * call-seq:
546
+ * reader.name -> name
547
+ *
548
+ * Return the qualified name of the node.
549
+ */
550
+ static VALUE rxml_reader_name(VALUE self)
551
+ {
552
+ const xmlChar *result = xmlTextReaderConstName(rxml_text_reader_get(self));
553
+ return (result == NULL ? Qnil : rb_str_new2((const char*)result));
554
+ }
555
+
556
+ /*
557
+ * call-seq:
558
+ * reader.local_name -> name
559
+ *
560
+ * Return the local name of the node.
561
+ */
562
+ static VALUE rxml_reader_local_name(VALUE self)
563
+ {
564
+ const xmlChar *result = xmlTextReaderConstLocalName(rxml_text_reader_get(self));
565
+ return (result == NULL ? Qnil : rb_str_new2((const char*)result));
566
+ }
567
+
568
+ /*
569
+ * call-seq:
570
+ * reader.attribute_count -> count
571
+ *
572
+ * Provide the number of attributes of the current node.
573
+ */
574
+ static VALUE rxml_reader_attr_count(VALUE self)
575
+ {
576
+ return INT2FIX(xmlTextReaderAttributeCount(rxml_text_reader_get(self)));
577
+ }
578
+
579
+ /*
580
+ * call-seq:
581
+ * reader.encoding -> XML::Encoding::UTF_8
582
+ *
583
+ * Returns the encoding of the document being read. Note you
584
+ * first have to read data from the reader for encoding
585
+ * to return a value
586
+ *
587
+ * reader = XML::Reader.file(XML_FILE)
588
+ * assert_nil(reader.encoding)
589
+ * reader.read
590
+ * assert_equal(XML::Encoding::UTF_8, reader.encoding)
591
+ *
592
+ * In addition, libxml always appears to return nil for the encoding
593
+ * when parsing strings.
594
+ */
595
+ static VALUE rxml_reader_encoding(VALUE self)
596
+ {
597
+ xmlTextReaderPtr xreader = rxml_text_reader_get(self);
598
+ const xmlChar *xencoding = xmlTextReaderConstEncoding(xreader);
599
+ if (xencoding)
600
+ return INT2NUM(xmlParseCharEncoding(xencoding));
601
+ else
602
+ return INT2NUM(XML_CHAR_ENCODING_NONE);
603
+ }
604
+
605
+ /*
606
+ * call-seq:
607
+ * reader.base_uri -> URI
608
+ *
609
+ * Determine the base URI of the node.
610
+ */
611
+ static VALUE rxml_reader_base_uri(VALUE self)
612
+ {
613
+ const xmlChar *result = xmlTextReaderConstBaseUri(rxml_text_reader_get(self));
614
+ return (result == NULL ? Qnil : rb_str_new2((const char*)result));
615
+ }
616
+
617
+ /*
618
+ * call-seq:
619
+ * reader.namespace_uri -> URI
620
+ *
621
+ * Determine the namespace URI of the node.
622
+ */
623
+ static VALUE rxml_reader_namespace_uri(VALUE self)
624
+ {
625
+ const xmlChar *result = xmlTextReaderConstNamespaceUri(rxml_text_reader_get(self));
626
+ return (result == NULL ? Qnil : rb_str_new2((const char*)result));
627
+ }
628
+
629
+ /*
630
+ * call-seq:
631
+ * reader.value -> text
632
+ *
633
+ * Provide the text value of the node if present.
634
+ */
635
+ static VALUE rxml_reader_value(VALUE self)
636
+ {
637
+ const xmlChar *result = xmlTextReaderConstValue(rxml_text_reader_get(self));
638
+ return (result == NULL ? Qnil : rb_str_new2((const char*)result));
639
+ }
640
+
641
+ /*
642
+ * call-seq:
643
+ * reader.prefix -> prefix
644
+ *
645
+ * Get a shorthand reference to the namespace associated with the node.
646
+ */
647
+ static VALUE rxml_reader_prefix(VALUE self)
648
+ {
649
+ const xmlChar *result = xmlTextReaderConstPrefix(rxml_text_reader_get(self));
650
+ return (result == NULL ? Qnil : rb_str_new2((const char*)result));
651
+ }
652
+
653
+ /*
654
+ * call-seq:
655
+ * reader.depth -> depth
656
+ *
657
+ * Get the depth of the node in the tree.
658
+ */
659
+ static VALUE rxml_reader_depth(VALUE self)
660
+ {
661
+ return INT2FIX(xmlTextReaderDepth(rxml_text_reader_get(self)));
662
+ }
663
+
664
+ /*
665
+ * call-seq:
666
+ * reader.quote_char -> char
667
+ *
668
+ * Get the quotation mark character used to enclose the value of an attribute,
669
+ * as an integer value (and -1 in case of error).
670
+ */
671
+ static VALUE rxml_reader_quote_char(VALUE self)
672
+ {
673
+ return INT2FIX(xmlTextReaderQuoteChar(rxml_text_reader_get(self)));
674
+ }
675
+
676
+ /*
677
+ * call-seq:
678
+ * reader.standalone -> code
679
+ *
680
+ * Determine the standalone status of the document being read.
681
+ *
682
+ * Return 1 if the document was declared to be standalone, 0 if it was
683
+ * declared to be not standalone, or -1 if the document did not specify its
684
+ * standalone status or in case of error.
685
+ */
686
+ static VALUE rxml_reader_standalone(VALUE self)
687
+ {
688
+ return INT2FIX(xmlTextReaderStandalone(rxml_text_reader_get(self)));
689
+ }
690
+
691
+ /*
692
+ * call-seq:
693
+ * reader.xml_lang -> value
694
+ *
695
+ * Get the xml:lang scope within which the node resides.
696
+ */
697
+ static VALUE rxml_reader_xml_lang(VALUE self)
698
+ {
699
+ const xmlChar *result = xmlTextReaderConstXmlLang(rxml_text_reader_get(self));
700
+ return (result == NULL ? Qnil : rb_str_new2((const char*)result));
701
+ }
702
+
703
+ /*
704
+ * call-seq:
705
+ * reader.xml_version -> version
706
+ *
707
+ * Determine the XML version of the document being read.
708
+ */
709
+ static VALUE rxml_reader_xml_version(VALUE self)
710
+ {
711
+ const xmlChar *result = xmlTextReaderConstXmlVersion(rxml_text_reader_get(self));
712
+ return (result == NULL ? Qnil : rb_str_new2((const char*)result));
713
+ }
714
+
715
+ /*
716
+ * call-seq:
717
+ * reader.has_attributes? -> bool
718
+ *
719
+ * Get whether the node has attributes.
720
+ */
721
+ static VALUE rxml_reader_has_attributes(VALUE self)
722
+ {
723
+ return xmlTextReaderHasAttributes(rxml_text_reader_get(self)) ? Qtrue
724
+ : Qfalse;
725
+ }
726
+
727
+ /*
728
+ * call-seq:
729
+ * reader.has_value? -> bool
730
+ *
731
+ * Get whether the node can have a text value.
732
+ */
733
+ static VALUE rxml_reader_has_value(VALUE self)
734
+ {
735
+ return xmlTextReaderHasValue(rxml_text_reader_get(self)) ? Qtrue : Qfalse;
736
+ }
737
+
738
+ /*
739
+ * call-seq:
740
+ * reader[key] -> value
741
+ *
742
+ * Provide the value of the attribute with the specified index (if +key+ is an
743
+ * integer) or with the specified name (if +key+ is a string) relative to the
744
+ * containing element, as a string.
745
+ */
746
+ static VALUE rxml_reader_attribute(VALUE self, VALUE key)
747
+ {
748
+ xmlTextReaderPtr reader;
749
+ xmlChar *attr;
750
+
751
+ reader = rxml_text_reader_get(self);
752
+
753
+ if (TYPE(key) == T_FIXNUM)
754
+ {
755
+ attr = xmlTextReaderGetAttributeNo(reader, FIX2INT(key));
756
+ }
757
+ else
758
+ {
759
+ attr = xmlTextReaderGetAttribute(reader, (const xmlChar *) StringValueCStr(key));
760
+ }
761
+ return (attr == NULL ? Qnil : rb_str_new2((const char*)attr));
762
+ }
763
+
764
+ /*
765
+ * call-seq:
766
+ * reader.lookup_namespace(prefix) -> value
767
+ *
768
+ * Resolve a namespace prefix in the scope of the current element.
769
+ * To return the default namespace, specify nil as +prefix+.
770
+ */
771
+ static VALUE rxml_reader_lookup_namespace(VALUE self, VALUE prefix)
772
+ {
773
+ const xmlChar *result = xmlTextReaderLookupNamespace(rxml_text_reader_get(
774
+ self), (const xmlChar *) StringValueCStr(prefix));
775
+ return (result == NULL ? Qnil : rb_str_new2((const char*)result));
776
+ }
777
+
778
+ /*
779
+ * call-seq:
780
+ * reader.expand -> node
781
+ *
782
+ * Read the contents of the current node and the full subtree. It then makes
783
+ * the subtree available until the next read call.
784
+ *
785
+ * Return an XML::Node object, or nil in case of error.
786
+ */
787
+ static VALUE rxml_reader_expand(VALUE self)
788
+ {
789
+ xmlNodePtr node;
790
+ xmlDocPtr doc;
791
+ xmlTextReaderPtr reader = rxml_text_reader_get(self);
792
+ node = xmlTextReaderExpand(reader);
793
+
794
+ if (!node)
795
+ return Qnil;
796
+
797
+ /* Okay this is tricky. By accessing the returned node, we
798
+ take ownership of the reader's document. Thus we need to
799
+ tell the reader to not free it. Otherwise it will be
800
+ freed twice - once when the Ruby document wrapper goes
801
+ out of scope and once when the reader goes out of scope. */
802
+
803
+ xmlTextReaderPreserve(reader);
804
+ doc = xmlTextReaderCurrentDoc(reader);
805
+ rxml_document_wrap(doc);
806
+
807
+ return rxml_node_wrap(node);
808
+ }
809
+
810
+ #if LIBXML_VERSION >= 20618
811
+ /*
812
+ * call-seq:
813
+ * reader.byte_consumed -> value
814
+ *
815
+ * This method provides the current index of the parser used by the reader,
816
+ * relative to the start of the current entity.
817
+ */
818
+ static VALUE
819
+ rxml_reader_byte_consumed(VALUE self)
820
+ {
821
+ return INT2NUM(xmlTextReaderByteConsumed(rxml_text_reader_get(self)));
822
+ }
823
+ #endif
824
+
825
+ #if LIBXML_VERSION >= 20617
826
+ /*
827
+ * call-seq:
828
+ * reader.column_number -> number
829
+ *
830
+ * Provide the column number of the current parsing point.
831
+ */
832
+ static VALUE
833
+ rxml_reader_column_number(VALUE self)
834
+ {
835
+ return INT2NUM(xmlTextReaderGetParserColumnNumber(rxml_text_reader_get(self)));
836
+ }
837
+
838
+ /*
839
+ * call-seq:
840
+ * reader.line_number -> number
841
+ *
842
+ * Provide the line number of the current parsing point.
843
+ */
844
+ static VALUE
845
+ rxml_reader_line_number(VALUE self)
846
+ {
847
+ return INT2NUM(xmlTextReaderGetParserLineNumber(rxml_text_reader_get(self)));
848
+ }
849
+ #endif
850
+
851
+ /*
852
+ * call-seq:
853
+ * reader.default? -> bool
854
+ *
855
+ * Return whether an Attribute node was generated from the default value
856
+ * defined in the DTD or schema.
857
+ */
858
+ static VALUE rxml_reader_default(VALUE self)
859
+ {
860
+ return xmlTextReaderIsDefault(rxml_text_reader_get(self)) ? Qtrue : Qfalse;
861
+ }
862
+
863
+ /*
864
+ * call-seq:
865
+ * reader.namespace_declaration? -> bool
866
+ *
867
+ * Determine whether the current node is a namespace declaration rather than a
868
+ * regular attribute.
869
+ */
870
+ static VALUE rxml_reader_namespace_declaration(VALUE self)
871
+ {
872
+ return xmlTextReaderIsNamespaceDecl(rxml_text_reader_get(self)) ? Qtrue
873
+ : Qfalse;
874
+ }
875
+
876
+ /*
877
+ * call-seq:
878
+ * reader.empty_element? -> bool
879
+ *
880
+ * Check if the current node is empty.
881
+ */
882
+ static VALUE rxml_reader_empty_element(VALUE self)
883
+ {
884
+ return xmlTextReaderIsEmptyElement(rxml_text_reader_get(self)) ? Qtrue
885
+ : Qfalse;
886
+ }
887
+
888
+ /*
889
+ * call-seq:
890
+ * reader.valid? -> bool
891
+ *
892
+ * Retrieve the validity status from the parser context.
893
+ */
894
+ static VALUE rxml_reader_valid(VALUE self)
895
+ {
896
+ return xmlTextReaderIsValid(rxml_text_reader_get(self)) ? Qtrue : Qfalse;
897
+ }
898
+
899
+ void rxml_init_reader(void)
900
+ {
901
+ BASE_URI_SYMBOL = ID2SYM(rb_intern("base_uri"));
902
+ ENCODING_SYMBOL = ID2SYM(rb_intern("encoding"));
903
+ IO_ATTR = ID2SYM(rb_intern("@io"));
904
+ OPTIONS_SYMBOL = ID2SYM(rb_intern("options"));
905
+
906
+ cXMLReader = rb_define_class_under(mXML, "Reader", rb_cObject);
907
+
908
+ rb_define_singleton_method(cXMLReader, "document", rxml_reader_document, 1);
909
+ rb_define_singleton_method(cXMLReader, "file", rxml_reader_file, -1);
910
+ rb_define_singleton_method(cXMLReader, "io", rxml_reader_io, -1);
911
+ rb_define_singleton_method(cXMLReader, "string", rxml_reader_string, -1);
912
+
913
+ rb_define_method(cXMLReader, "[]", rxml_reader_attribute, 1);
914
+ rb_define_method(cXMLReader, "attribute_count", rxml_reader_attr_count, 0);
915
+ rb_define_method(cXMLReader, "base_uri", rxml_reader_base_uri, 0);
916
+ #if LIBXML_VERSION >= 20618
917
+ rb_define_method(cXMLReader, "byte_consumed", rxml_reader_byte_consumed, 0);
918
+ #endif
919
+ rb_define_method(cXMLReader, "close", rxml_reader_close, 0);
920
+ #if LIBXML_VERSION >= 20617
921
+ rb_define_method(cXMLReader, "column_number", rxml_reader_column_number, 0);
922
+ #endif
923
+ rb_define_method(cXMLReader, "depth", rxml_reader_depth, 0);
924
+ rb_define_method(cXMLReader, "encoding", rxml_reader_encoding, 0);
925
+ rb_define_method(cXMLReader, "expand", rxml_reader_expand, 0);
926
+ rb_define_method(cXMLReader, "has_attributes?", rxml_reader_has_attributes, 0);
927
+ rb_define_method(cXMLReader, "has_value?", rxml_reader_has_value, 0);
928
+ #if LIBXML_VERSION >= 20617
929
+ rb_define_method(cXMLReader, "line_number", rxml_reader_line_number, 0);
930
+ #endif
931
+ rb_define_method(cXMLReader, "local_name", rxml_reader_local_name, 0);
932
+ rb_define_method(cXMLReader, "lookup_namespace", rxml_reader_lookup_namespace, 1);
933
+ rb_define_method(cXMLReader, "move_to_attribute", rxml_reader_move_to_attr, 1);
934
+ rb_define_method(cXMLReader, "move_to_first_attribute", rxml_reader_move_to_first_attr, 0);
935
+ rb_define_method(cXMLReader, "move_to_next_attribute", rxml_reader_move_to_next_attr, 0);
936
+ rb_define_method(cXMLReader, "move_to_element", rxml_reader_move_to_element, 0);
937
+ rb_define_method(cXMLReader, "name", rxml_reader_name, 0);
938
+ rb_define_method(cXMLReader, "namespace_uri", rxml_reader_namespace_uri, 0);
939
+ rb_define_method(cXMLReader, "next", rxml_reader_next, 0);
940
+ rb_define_method(cXMLReader, "next_sibling", rxml_reader_next_sibling, 0);
941
+ rb_define_method(cXMLReader, "node", rxml_reader_node, 0);
942
+ rb_define_method(cXMLReader, "node_type", rxml_reader_node_type, 0);
943
+ rb_define_method(cXMLReader, "normalization", rxml_reader_normalization, 0);
944
+ rb_define_method(cXMLReader, "prefix", rxml_reader_prefix, 0);
945
+ rb_define_method(cXMLReader, "quote_char", rxml_reader_quote_char, 0);
946
+ rb_define_method(cXMLReader, "read", rxml_reader_read, 0);
947
+ rb_define_method(cXMLReader, "read_attribute_value", rxml_reader_read_attr_value, 0);
948
+ rb_define_method(cXMLReader, "read_inner_xml", rxml_reader_read_inner_xml, 0);
949
+ rb_define_method(cXMLReader, "read_outer_xml", rxml_reader_read_outer_xml, 0);
950
+ rb_define_method(cXMLReader, "read_state", rxml_reader_read_state, 0);
951
+ rb_define_method(cXMLReader, "read_string", rxml_reader_read_string, 0);
952
+ rb_define_method(cXMLReader, "relax_ng_validate", rxml_reader_relax_ng_validate, 1);
953
+ rb_define_method(cXMLReader, "standalone", rxml_reader_standalone, 0);
954
+ #if LIBXML_VERSION >= 20620
955
+ rb_define_method(cXMLReader, "schema_validate", rxml_reader_schema_validate, 1);
956
+ #endif
957
+ rb_define_method(cXMLReader, "value", rxml_reader_value, 0);
958
+ rb_define_method(cXMLReader, "xml_lang", rxml_reader_xml_lang, 0);
959
+ rb_define_method(cXMLReader, "xml_version", rxml_reader_xml_version, 0);
960
+ rb_define_method(cXMLReader, "default?", rxml_reader_default, 0);
961
+ rb_define_method(cXMLReader, "empty_element?", rxml_reader_empty_element, 0);
962
+ rb_define_method(cXMLReader, "namespace_declaration?", rxml_reader_namespace_declaration, 0);
963
+ rb_define_method(cXMLReader, "valid?", rxml_reader_valid, 0);
964
+
965
+ /* Constants */
966
+ rb_define_const(cXMLReader, "LOADDTD", INT2FIX(XML_PARSER_LOADDTD));
967
+ rb_define_const(cXMLReader, "DEFAULTATTRS", INT2FIX(XML_PARSER_DEFAULTATTRS));
968
+ rb_define_const(cXMLReader, "VALIDATE", INT2FIX(XML_PARSER_VALIDATE));
969
+ rb_define_const(cXMLReader, "SUBST_ENTITIES", INT2FIX(XML_PARSER_SUBST_ENTITIES));
970
+
971
+ rb_define_const(cXMLReader, "SEVERITY_VALIDITY_WARNING", INT2FIX(XML_PARSER_SEVERITY_VALIDITY_WARNING));
972
+ rb_define_const(cXMLReader, "SEVERITY_VALIDITY_ERROR", INT2FIX(XML_PARSER_SEVERITY_VALIDITY_ERROR));
973
+ rb_define_const(cXMLReader, "SEVERITY_WARNING", INT2FIX(XML_PARSER_SEVERITY_WARNING));
974
+ rb_define_const(cXMLReader, "SEVERITY_ERROR", INT2FIX(XML_PARSER_SEVERITY_ERROR));
975
+
976
+ rb_define_const(cXMLReader, "TYPE_NONE", INT2FIX(XML_READER_TYPE_NONE));
977
+ rb_define_const(cXMLReader, "TYPE_ELEMENT", INT2FIX(XML_READER_TYPE_ELEMENT));
978
+ rb_define_const(cXMLReader, "TYPE_ATTRIBUTE", INT2FIX(XML_READER_TYPE_ATTRIBUTE));
979
+ rb_define_const(cXMLReader, "TYPE_TEXT", INT2FIX(XML_READER_TYPE_TEXT));
980
+ rb_define_const(cXMLReader, "TYPE_CDATA", INT2FIX(XML_READER_TYPE_CDATA));
981
+ rb_define_const(cXMLReader, "TYPE_ENTITY_REFERENCE", INT2FIX(XML_READER_TYPE_ENTITY_REFERENCE));
982
+ rb_define_const(cXMLReader, "TYPE_ENTITY", INT2FIX(XML_READER_TYPE_ENTITY));
983
+ rb_define_const(cXMLReader, "TYPE_PROCESSING_INSTRUCTION", INT2FIX(XML_READER_TYPE_PROCESSING_INSTRUCTION));
984
+ rb_define_const(cXMLReader, "TYPE_COMMENT", INT2FIX(XML_READER_TYPE_COMMENT));
985
+ rb_define_const(cXMLReader, "TYPE_DOCUMENT", INT2FIX(XML_READER_TYPE_DOCUMENT));
986
+ rb_define_const(cXMLReader, "TYPE_DOCUMENT_TYPE", INT2FIX(XML_READER_TYPE_DOCUMENT_TYPE));
987
+ rb_define_const(cXMLReader, "TYPE_DOCUMENT_FRAGMENT", INT2FIX(XML_READER_TYPE_DOCUMENT_FRAGMENT));
988
+ rb_define_const(cXMLReader, "TYPE_NOTATION", INT2FIX(XML_READER_TYPE_NOTATION));
989
+ rb_define_const(cXMLReader, "TYPE_WHITESPACE", INT2FIX(XML_READER_TYPE_WHITESPACE));
990
+ rb_define_const(cXMLReader, "TYPE_SIGNIFICANT_WHITESPACE", INT2FIX(XML_READER_TYPE_SIGNIFICANT_WHITESPACE));
991
+ rb_define_const(cXMLReader, "TYPE_END_ELEMENT", INT2FIX(XML_READER_TYPE_END_ELEMENT));
992
+ rb_define_const(cXMLReader, "TYPE_END_ENTITY", INT2FIX(XML_READER_TYPE_END_ENTITY));
993
+ rb_define_const(cXMLReader, "TYPE_XML_DECLARATION", INT2FIX(XML_READER_TYPE_XML_DECLARATION));
994
+
995
+ /* Read states */
996
+ rb_define_const(cXMLReader, "MODE_INITIAL", INT2FIX(XML_TEXTREADER_MODE_INITIAL));
997
+ rb_define_const(cXMLReader, "MODE_INTERACTIVE", INT2FIX(XML_TEXTREADER_MODE_INTERACTIVE));
998
+ rb_define_const(cXMLReader, "MODE_ERROR", INT2FIX(XML_TEXTREADER_MODE_ERROR));
999
+ rb_define_const(cXMLReader, "MODE_EOF", INT2FIX(XML_TEXTREADER_MODE_EOF));
1000
+ rb_define_const(cXMLReader, "MODE_CLOSED", INT2FIX(XML_TEXTREADER_MODE_CLOSED));
1001
+ rb_define_const(cXMLReader, "MODE_READING", INT2FIX(XML_TEXTREADER_MODE_READING));
1002
+ }