libxml-ruby 2.0.2-x86-mingw32 → 2.0.3-x86-mingw32

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,1057 +1,1085 @@
1
- /* Copyright (c) 2006-2007 Apple Inc.
2
- * Please see the LICENSE file for copyright and distribution information. */
3
-
4
- #include "ruby_libxml.h"
5
- #include "ruby_xml_reader.h"
6
-
7
- /*
8
- * Document-class: LibXML::XML::Reader
9
- *
10
- * The XML::Reader class provides a simpler, alternative way of parsing an XML
11
- * document in contrast to XML::Parser or XML::SaxParser. A XML::Reader instance
12
- * acts like a cursor going forward in a document stream, stopping at each node
13
- * it encounters. To advance to the next node, simply cadd XML::Reader#read.
14
- *
15
- * The XML::Reader API closely matches the DOM Core specification and supports
16
- * namespaces, xml:base, entity handling and DTDs.
17
- *
18
- * To summarize, XML::Reader provides a far simpler API to use versus XML::SaxParser
19
- * and is more memory efficient than using XML::Parser to create a DOM tree.
20
- *
21
- * Example:
22
- *
23
- * parser = XML::Reader.string("<foo><bar>1</bar><bar>2</bar><bar>3</bar></foo>")
24
- * reader.read
25
- * assert_equal('foo', reader.name)
26
- * assert_equal(nil, reader.value)
27
- *
28
- * 3.times do |i|
29
- * reader.read
30
- * assert_equal(XML::Reader::TYPE_ELEMENT, reader.node_type)
31
- * assert_equal('bar', reader.name)
32
- * reader.read
33
- * assert_equal(XML::Reader::TYPE_TEXT, reader.node_type)
34
- * assert_equal((i + 1).to_s, reader.value)
35
- * reader.read
36
- * assert_equal(XML::Reader::TYPE_END_ELEMENT, reader.node_type)
37
- * end
38
- *
39
- * You can also parse documents (see XML::Reader.document),
40
- * strings (see XML::Parser.string) and io objects (see
41
- * XML::Parser.io).
42
- *
43
- * For a more in depth tutorial, albeit in C, see http://xmlsoft.org/xmlreader.html.*/
44
-
45
- VALUE cXMLReader;
46
-
47
- static ID BASE_URI_SYMBOL;
48
- static ID ENCODING_SYMBOL;
49
- static ID IO_ATTR;
50
- static ID OPTIONS_SYMBOL;
51
-
52
-
53
- static void rxml_reader_free(xmlTextReaderPtr reader)
54
- {
55
- xmlFreeTextReader(reader);
56
- }
57
-
58
- static VALUE rxml_reader_wrap(xmlTextReaderPtr reader)
59
- {
60
- return Data_Wrap_Struct(cXMLReader, NULL, rxml_reader_free, reader);
61
- }
62
-
63
- static xmlTextReaderPtr rxml_text_reader_get(VALUE obj)
64
- {
65
- xmlTextReaderPtr xreader;
66
- Data_Get_Struct(obj, xmlTextReader, xreader);
67
- return xreader;
68
- }
69
-
70
- /*
71
- * call-seq:
72
- * XML::Reader.document(doc) -> XML::Reader
73
- *
74
- * Create an new reader for the specified document.
75
- */
76
- VALUE rxml_reader_document(VALUE klass, VALUE doc)
77
- {
78
- xmlDocPtr xdoc;
79
- xmlTextReaderPtr xreader;
80
-
81
- Data_Get_Struct(doc, xmlDoc, xdoc);
82
-
83
- xreader = xmlReaderWalker(xdoc);
84
-
85
- if (xreader == NULL)
86
- rxml_raise(&xmlLastError);
87
-
88
- return rxml_reader_wrap(xreader);
89
- }
90
-
91
- /* call-seq:
92
- * XML::Reader.file(path) -> XML::Reader
93
- * XML::Reader.file(path, :encoding => XML::Encoding::UTF_8,
94
- * :options => XML::Parser::Options::NOENT) -> XML::Parser
95
- *
96
- * Creates a new reader by parsing the specified file or uri.
97
- *
98
- * You may provide an optional hash table to control how the
99
- * parsing is performed. Valid options are:
100
- *
101
- * encoding - The document encoding, defaults to nil. Valid values
102
- * are the encoding constants defined on XML::Encoding.
103
- * options - Controls the execution of the parser, defaults to 0.
104
- * Valid values are the constants defined on
105
- * XML::Parser::Options. Mutliple options can be combined
106
- * by using Bitwise OR (|).
107
- */
108
- static VALUE rxml_reader_file(int argc, VALUE *argv, VALUE klass)
109
- {
110
- xmlTextReaderPtr xreader;
111
- VALUE path;
112
- VALUE options;
113
-
114
- const char *xencoding = NULL;
115
- int xoptions = 0;
116
-
117
- rb_scan_args(argc, argv, "11", &path, &options);
118
- Check_Type(path, T_STRING);
119
-
120
- if (!NIL_P(options))
121
- {
122
- VALUE encoding = Qnil;
123
- VALUE parserOptions = Qnil;
124
-
125
- Check_Type(options, T_HASH);
126
-
127
- encoding = rb_hash_aref(options, BASE_URI_SYMBOL);
128
- xencoding = NIL_P(encoding) ? NULL : xmlGetCharEncodingName(NUM2INT(encoding));
129
-
130
- parserOptions = rb_hash_aref(options, OPTIONS_SYMBOL);
131
- xoptions = NIL_P(parserOptions) ? 0 : NUM2INT(parserOptions);
132
- }
133
-
134
- xreader = xmlReaderForFile(StringValueCStr(path), xencoding, xoptions);
135
-
136
- if (xreader == NULL)
137
- rxml_raise(&xmlLastError);
138
-
139
- return rxml_reader_wrap(xreader);
140
- }
141
-
142
- /* call-seq:
143
- * XML::Reader.io(io) -> XML::Reader
144
- * XML::Reader.io(io, :encoding => XML::Encoding::UTF_8,
145
- * :options => XML::Parser::Options::NOENT) -> XML::Parser
146
- *
147
- * Creates a new reader by parsing the specified io object.
148
- *
149
- * You may provide an optional hash table to control how the
150
- * parsing is performed. Valid options are:
151
- *
152
- * base_uri - The base url for the parsed document.
153
- * encoding - The document encoding, defaults to nil. Valid values
154
- * are the encoding constants defined on XML::Encoding.
155
- * options - Controls the execution of the parser, defaults to 0.
156
- * Valid values are the constants defined on
157
- * XML::Parser::Options. Mutliple options can be combined
158
- * by using Bitwise OR (|).
159
- */
160
- static VALUE rxml_reader_io(int argc, VALUE *argv, VALUE klass)
161
- {
162
- xmlTextReaderPtr xreader;
163
- VALUE result;
164
- VALUE io;
165
- VALUE options;
166
- char *xbaseurl = NULL;
167
- const char *xencoding = NULL;
168
- int xoptions = 0;
169
-
170
- rb_scan_args(argc, argv, "11", &io, &options);
171
-
172
- if (!NIL_P(options))
173
- {
174
- VALUE baseurl = Qnil;
175
- VALUE encoding = Qnil;
176
- VALUE parserOptions = Qnil;
177
-
178
- Check_Type(options, T_HASH);
179
-
180
- baseurl = rb_hash_aref(options, BASE_URI_SYMBOL);
181
- xbaseurl = NIL_P(baseurl) ? NULL : StringValueCStr(baseurl);
182
-
183
- encoding = rb_hash_aref(options, ENCODING_SYMBOL);
184
- xencoding = NIL_P(encoding) ? NULL : xmlGetCharEncodingName(NUM2INT(encoding));
185
-
186
- parserOptions = rb_hash_aref(options, OPTIONS_SYMBOL);
187
- xoptions = NIL_P(parserOptions) ? 0 : NUM2INT(parserOptions);
188
- }
189
-
190
- xreader = xmlReaderForIO((xmlInputReadCallback) rxml_read_callback, NULL,
191
- (void *) io,
192
- xbaseurl, xencoding, xoptions);
193
-
194
- if (xreader == NULL)
195
- rxml_raise(&xmlLastError);
196
-
197
- result = rxml_reader_wrap(xreader);
198
-
199
- /* Attach io object to parser so it won't get freed.*/
200
- rb_ivar_set(result, IO_ATTR, io);
201
-
202
- return result;
203
- }
204
-
205
- /* call-seq:
206
- * XML::Reader.string(io) -> XML::Reader
207
- * XML::Reader.string(io, :encoding => XML::Encoding::UTF_8,
208
- * :options => XML::Parser::Options::NOENT) -> XML::Parser
209
- *
210
- * Creates a new reader by parsing the specified string.
211
- *
212
- * You may provide an optional hash table to control how the
213
- * parsing is performed. Valid options are:
214
- *
215
- * base_uri - The base url for the parsed document.
216
- * encoding - The document encoding, defaults to nil. Valid values
217
- * are the encoding constants defined on XML::Encoding.
218
- * options - Controls the execution of the parser, defaults to 0.
219
- * Valid values are the constants defined on
220
- * XML::Parser::Options. Mutliple options can be combined
221
- * by using Bitwise OR (|).
222
- */
223
- static VALUE rxml_reader_string(int argc, VALUE *argv, VALUE klass)
224
- {
225
- xmlTextReaderPtr xreader;
226
- VALUE string;
227
- VALUE options;
228
- char *xbaseurl = NULL;
229
- const char *xencoding = NULL;
230
- int xoptions = 0;
231
-
232
- rb_scan_args(argc, argv, "11", &string, &options);
233
- Check_Type(string, T_STRING);
234
-
235
- if (!NIL_P(options))
236
- {
237
- VALUE baseurl = Qnil;
238
- VALUE encoding = Qnil;
239
- VALUE parserOptions = Qnil;
240
-
241
- int foo;
242
- Check_Type(options, T_HASH);
243
-
244
- baseurl = rb_hash_aref(options, BASE_URI_SYMBOL);
245
- xbaseurl = NIL_P(baseurl) ? NULL : StringValueCStr(baseurl);
246
-
247
- encoding = rb_hash_aref(options, ENCODING_SYMBOL);
248
- xencoding = NIL_P(encoding) ? NULL : xmlGetCharEncodingName(NUM2INT(encoding));
249
-
250
- parserOptions = rb_hash_aref(options, OPTIONS_SYMBOL);
251
- xoptions = NIL_P(parserOptions) ? 0 : NUM2INT(parserOptions);
252
- }
253
-
254
- xreader = xmlReaderForMemory(StringValueCStr(string), RSTRING_LEN(string),
255
- xbaseurl, xencoding, xoptions);
256
-
257
- if (xreader == NULL)
258
- rxml_raise(&xmlLastError);
259
-
260
- return rxml_reader_wrap(xreader);
261
- }
262
-
263
- /*
264
- * call-seq:
265
- * reader.close -> code
266
- *
267
- * This method releases any resources allocated by the current instance
268
- * changes the state to Closed and close any underlying input.
269
- */
270
- static VALUE rxml_reader_close(VALUE self)
271
- {
272
- return INT2FIX(xmlTextReaderClose(rxml_text_reader_get(self)));
273
- }
274
-
275
- /*
276
- * call-seq:
277
- * reader.move_to_attribute(val) -> code
278
- *
279
- * Move the position of the current instance to the attribute with the
280
- * specified index (if +val+ is an integer) or name (if +val+ is a string)
281
- * relative to the containing element.
282
- */
283
- static VALUE rxml_reader_move_to_attr(VALUE self, VALUE val)
284
- {
285
- xmlTextReaderPtr xreader;
286
- int ret;
287
-
288
- xreader = rxml_text_reader_get(self);
289
-
290
- if (TYPE(val) == T_FIXNUM)
291
- {
292
- ret = xmlTextReaderMoveToAttributeNo(xreader, FIX2INT(val));
293
- }
294
- else
295
- {
296
- ret = xmlTextReaderMoveToAttribute(xreader,
297
- (const xmlChar *) StringValueCStr(val));
298
- }
299
-
300
- return INT2FIX(ret);
301
- }
302
-
303
- /*
304
- * call-seq:
305
- * reader.move_to_first_attribute -> code
306
- *
307
- * Move the position of the current instance to the first attribute associated
308
- * with the current node.
309
- */
310
- static VALUE rxml_reader_move_to_first_attr(VALUE self)
311
- {
312
- return INT2FIX(xmlTextReaderMoveToFirstAttribute(rxml_text_reader_get(self)));
313
- }
314
-
315
- /*
316
- * call-seq:
317
- * reader.move_to_next_attribute -> code
318
- *
319
- * Move the position of the current instance to the next attribute associated
320
- * with the current node.
321
- */
322
- static VALUE rxml_reader_move_to_next_attr(VALUE self)
323
- {
324
- return INT2FIX(xmlTextReaderMoveToNextAttribute(rxml_text_reader_get(self)));
325
- }
326
-
327
- /*
328
- * call-seq:
329
- * reader.move_to_element -> code
330
- *
331
- * Move the position of the current instance to the node that contains the
332
- * current attribute node.
333
- */
334
- static VALUE rxml_reader_move_to_element(VALUE self)
335
- {
336
- return INT2FIX(xmlTextReaderMoveToElement(rxml_text_reader_get(self)));
337
- }
338
-
339
- /*
340
- * call-seq:
341
- * reader.next -> code
342
- *
343
- * Skip to the node following the current one in document order while avoiding
344
- * the subtree if any.
345
- */
346
- static VALUE rxml_reader_next(VALUE self)
347
- {
348
- return INT2FIX(xmlTextReaderNext(rxml_text_reader_get(self)));
349
- }
350
-
351
- /*
352
- * call-seq:
353
- * reader.next_sibling -> code
354
- *
355
- * Skip to the node following the current one in document order while avoiding
356
- * the subtree if any. Currently implemented only for Readers built on a
357
- * document.
358
- */
359
- static VALUE rxml_reader_next_sibling(VALUE self)
360
- {
361
- return INT2FIX(xmlTextReaderNextSibling(rxml_text_reader_get(self)));
362
- }
363
-
364
- /*
365
- * call-seq:
366
- * reader.node -> XML::Node
367
- *
368
- * Returns the reader's current node. It will return
369
- * nil if Reader#read has not yet been called.
370
- * WARNING - Using this method is dangerous because the
371
- * the node may be destroyed on the next #read.
372
- */
373
- static VALUE rxml_reader_node(VALUE self)
374
- {
375
- xmlTextReaderPtr xreader = rxml_text_reader_get(self);
376
- xmlNodePtr xnode = xmlTextReaderCurrentNode(xreader);
377
- return xnode ? rxml_node_wrap(xnode) : Qnil;
378
- }
379
-
380
- /*
381
- * call-seq:
382
- * reader.node_type -> type
383
- *
384
- * Get the node type of the current node. Reference:
385
- * http://dotgnu.org/pnetlib-doc/System/Xml/XmlNodeType.html
386
- */
387
- static VALUE rxml_reader_node_type(VALUE self)
388
- {
389
- return INT2FIX(xmlTextReaderNodeType(rxml_text_reader_get(self)));
390
- }
391
-
392
- /*
393
- * call-seq:
394
- * reader.normalization -> value
395
- *
396
- * The value indicating whether to normalize white space and attribute values.
397
- * Since attribute value and end of line normalizations are a MUST in the XML
398
- * specification only the value true is accepted. The broken bahaviour of
399
- * accepting out of range character entities like &#0; is of course not
400
- * supported either.
401
- *
402
- * Return 1 or -1 in case of error.
403
- */
404
- static VALUE rxml_reader_normalization(VALUE self)
405
- {
406
- return INT2FIX(xmlTextReaderNormalization(rxml_text_reader_get(self)));
407
- }
408
-
409
- /*
410
- * call-seq:
411
- * reader.read -> code
412
- *
413
- * Causes the reader to move to the next node in the stream, exposing its properties.
414
- *
415
- * Returns true if a node was successfully read or false if there are no more
416
- * nodes to read. On errors, an exception is raised.*/
417
- static VALUE rxml_reader_read(VALUE self)
418
- {
419
- int result = xmlTextReaderRead(rxml_text_reader_get(self));
420
- switch(result)
421
- {
422
- case -1:
423
- rxml_raise(&xmlLastError);
424
- return Qnil;
425
- break;
426
- case 0:
427
- return Qfalse;
428
- case 1:
429
- return Qtrue;
430
- default:
431
- rb_raise(rb_eRuntimeError,
432
- "xmlTextReaderRead did not return -1, 0 or 1. Return value was: %d", result);
433
- }
434
- }
435
-
436
- /*
437
- * call-seq:
438
- * reader.read_attribute_value -> code
439
- *
440
- * Parse an attribute value into one or more Text and EntityReference nodes.
441
- *
442
- * Return 1 in case of success, 0 if the reader was not positionned on an
443
- * attribute node or all the attribute values have been read, or -1 in case of
444
- * error.
445
- */
446
- static VALUE rxml_reader_read_attr_value(VALUE self)
447
- {
448
- return INT2FIX(xmlTextReaderReadAttributeValue(rxml_text_reader_get(self)));
449
- }
450
-
451
- /*
452
- * call-seq:
453
- * reader.read_inner_xml -> data
454
- *
455
- * Read the contents of the current node, including child nodes and markup.
456
- *
457
- * Return a string containing the XML content, or nil if the current node is
458
- * neither an element nor attribute, or has no child nodes.
459
- */
460
- static VALUE rxml_reader_read_inner_xml(VALUE self)
461
- {
462
- VALUE result = Qnil;
463
- xmlTextReaderPtr xReader = rxml_text_reader_get(self);
464
-
465
- xmlChar *xml = xmlTextReaderReadInnerXml(xReader);
466
-
467
- if (xml != NULL)
468
- {
469
- const xmlChar *xencoding = xmlTextReaderConstEncoding(xReader);
470
- result = rxml_str_new2((const char*) xml, xencoding);
471
- xmlFree(xml);
472
- }
473
-
474
- return result;
475
- }
476
-
477
- /*
478
- * call-seq:
479
- * reader.read_outer_xml -> data
480
- *
481
- * Read the contents of the current node, including child nodes and markup.
482
- *
483
- * Return a string containing the XML content, or nil if the current node is
484
- * neither an element nor attribute, or has no child nodes.
485
- */
486
- static VALUE rxml_reader_read_outer_xml(VALUE self)
487
- {
488
- VALUE result = Qnil;
489
- xmlTextReaderPtr xReader = rxml_text_reader_get(self);
490
-
491
- xmlChar *xml = xmlTextReaderReadOuterXml(xReader);
492
-
493
- if (xml != NULL)
494
- {
495
- const xmlChar *xencoding = xmlTextReaderConstEncoding(xReader);
496
- result = rxml_str_new2((const char*) xml, xencoding);
497
- xmlFree(xml);
498
- }
499
-
500
- return result;
501
- }
502
-
503
- /*
504
- * call-seq:
505
- * reader.read_state -> state
506
- *
507
- * Get the read state of the reader.
508
- */
509
- static VALUE rxml_reader_read_state(VALUE self)
510
- {
511
- return INT2FIX(xmlTextReaderReadState(rxml_text_reader_get(self)));
512
- }
513
-
514
- /*
515
- * call-seq:
516
- * reader.read_string -> string
517
- *
518
- * Read the contents of an element or a text node as a string.
519
- *
520
- * Return a string containing the contents of the Element or Text node, or nil
521
- * if the reader is positioned on any other type of node.
522
- */
523
- static VALUE rxml_reader_read_string(VALUE self)
524
- {
525
- VALUE result = Qnil;
526
- xmlTextReaderPtr xReader = rxml_text_reader_get(self);
527
-
528
- xmlChar *xml = xmlTextReaderReadString(xReader);
529
-
530
- if (xml != NULL)
531
- {
532
- const xmlChar *xencoding = xmlTextReaderConstEncoding(xReader);
533
- result = rxml_str_new2((const char*) xml, xencoding);
534
- xmlFree(xml);
535
- }
536
-
537
- return result;
538
- }
539
-
540
- /*
541
- * call-seq:
542
- * reader.relax_ng_validate(rng) -> code
543
- *
544
- * Use RelaxNG to validate the document as it is processed. Activation is only
545
- * possible before the first read. If +rng+ is nil, the RelaxNG validation is
546
- * desactivated.
547
- *
548
- * Return 0 in case the RelaxNG validation could be (des)activated and -1 in
549
- * case of error.
550
- */
551
- static VALUE rxml_reader_relax_ng_validate(VALUE self, VALUE rng)
552
- {
553
- char *xrng = NIL_P(rng) ? NULL : StringValueCStr(rng);
554
- return INT2FIX(xmlTextReaderRelaxNGValidate(rxml_text_reader_get(self), xrng));
555
- }
556
-
557
- #if LIBXML_VERSION >= 20620
558
- /*
559
- * call-seq:
560
- * reader.schema_validate(schema) -> code
561
- *
562
- * Use W3C XSD schema to validate the document as it is processed. Activation
563
- * is only possible before the first read. If +schema+ is nil, then XML Schema
564
- * validation is desactivated.
565
- *
566
- * Return 0 in case the schemas validation could be (de)activated and -1 in
567
- * case of error.
568
- */
569
- static VALUE
570
- rxml_reader_schema_validate(VALUE self, VALUE xsd)
571
- {
572
- char *xxsd = NIL_P(xsd) ? NULL : StringValueCStr(xsd);
573
- int status = xmlTextReaderSchemaValidate(rxml_text_reader_get(self), xxsd);
574
- return INT2FIX(status);
575
- }
576
- #endif
577
-
578
- /*
579
- * call-seq:
580
- * reader.name -> name
581
- *
582
- * Return the qualified name of the node.
583
- */
584
- static VALUE rxml_reader_name(VALUE self)
585
- {
586
- xmlTextReaderPtr xReader = rxml_text_reader_get(self);
587
- const xmlChar *result = xmlTextReaderConstName(xReader);
588
- const xmlChar *xencoding = xmlTextReaderConstEncoding(xReader);
589
-
590
- return (result == NULL ? Qnil : rxml_str_new2(result, xencoding));
591
- }
592
-
593
- /*
594
- * call-seq:
595
- * reader.local_name -> name
596
- *
597
- * Return the local name of the node.
598
- */
599
- static VALUE rxml_reader_local_name(VALUE self)
600
- {
601
- xmlTextReaderPtr xReader = rxml_text_reader_get(self);
602
- const xmlChar *result = xmlTextReaderConstLocalName(xReader);
603
- const xmlChar *xencoding = xmlTextReaderConstEncoding(xReader);
604
-
605
- return (result == NULL ? Qnil : rxml_str_new2(result, xencoding));
606
- }
607
-
608
- /*
609
- * call-seq:
610
- * reader.attribute_count -> count
611
- *
612
- * Provide the number of attributes of the current node.
613
- */
614
- static VALUE rxml_reader_attr_count(VALUE self)
615
- {
616
- return INT2FIX(xmlTextReaderAttributeCount(rxml_text_reader_get(self)));
617
- }
618
-
619
- /*
620
- * call-seq:
621
- * reader.encoding -> XML::Encoding::UTF_8
622
- *
623
- * Returns the encoding of the document being read. Note you
624
- * first have to read data from the reader for encoding
625
- * to return a value
626
- *
627
- * reader = XML::Reader.file(XML_FILE)
628
- * assert_nil(reader.encoding)
629
- * reader.read
630
- * assert_equal(XML::Encoding::UTF_8, reader.encoding)
631
- *
632
- * In addition, libxml always appears to return nil for the encoding
633
- * when parsing strings.
634
- */
635
- static VALUE rxml_reader_encoding(VALUE self)
636
- {
637
- xmlTextReaderPtr xreader = rxml_text_reader_get(self);
638
- const xmlChar *xencoding = xmlTextReaderConstEncoding(xreader);
639
- if (xencoding)
640
- return INT2NUM(xmlParseCharEncoding(xencoding));
641
- else
642
- return INT2NUM(XML_CHAR_ENCODING_NONE);
643
- }
644
-
645
- /*
646
- * call-seq:
647
- * reader.base_uri -> URI
648
- *
649
- * Determine the base URI of the node.
650
- */
651
- static VALUE rxml_reader_base_uri(VALUE self)
652
- {
653
- xmlTextReaderPtr xReader = rxml_text_reader_get(self);
654
- const xmlChar *result = xmlTextReaderConstBaseUri(xReader);
655
- const xmlChar *xencoding = xmlTextReaderConstEncoding(xReader);
656
-
657
- return (result == NULL ? Qnil : rxml_str_new2(result, xencoding));
658
- }
659
-
660
- /*
661
- * call-seq:
662
- * reader.namespace_uri -> URI
663
- *
664
- * Determine the namespace URI of the node.
665
- */
666
- static VALUE rxml_reader_namespace_uri(VALUE self)
667
- {
668
- xmlTextReaderPtr xReader = rxml_text_reader_get(self);
669
- const xmlChar *result = xmlTextReaderConstNamespaceUri(xReader);
670
- const xmlChar *xencoding = xmlTextReaderConstEncoding(xReader);
671
-
672
- return (result == NULL ? Qnil : rxml_str_new2(result, xencoding));
673
- }
674
-
675
- /*
676
- * call-seq:
677
- * reader.value -> text
678
- *
679
- * Provide the text value of the node if present.
680
- */
681
- static VALUE rxml_reader_value(VALUE self)
682
- {
683
- xmlTextReaderPtr xReader = rxml_text_reader_get(self);
684
- const xmlChar *result = xmlTextReaderConstValue(xReader);
685
- const xmlChar *xencoding = xmlTextReaderConstEncoding(xReader);
686
-
687
- return (result == NULL ? Qnil : rxml_str_new2(result, xencoding));
688
- }
689
-
690
- /*
691
- * call-seq:
692
- * reader.prefix -> prefix
693
- *
694
- * Get a shorthand reference to the namespace associated with the node.
695
- */
696
- static VALUE rxml_reader_prefix(VALUE self)
697
- {
698
- xmlTextReaderPtr xReader = rxml_text_reader_get(self);
699
- const xmlChar *result = xmlTextReaderConstPrefix(xReader);
700
- const xmlChar *xencoding = xmlTextReaderConstEncoding(xReader);
701
-
702
- return (result == NULL ? Qnil : rxml_str_new2(result, xencoding));
703
- }
704
-
705
- /*
706
- * call-seq:
707
- * reader.depth -> depth
708
- *
709
- * Get the depth of the node in the tree.
710
- */
711
- static VALUE rxml_reader_depth(VALUE self)
712
- {
713
- return INT2FIX(xmlTextReaderDepth(rxml_text_reader_get(self)));
714
- }
715
-
716
- /*
717
- * call-seq:
718
- * reader.quote_char -> char
719
- *
720
- * Get the quotation mark character used to enclose the value of an attribute,
721
- * as an integer value (and -1 in case of error).
722
- */
723
- static VALUE rxml_reader_quote_char(VALUE self)
724
- {
725
- return INT2FIX(xmlTextReaderQuoteChar(rxml_text_reader_get(self)));
726
- }
727
-
728
- /*
729
- * call-seq:
730
- * reader.standalone -> code
731
- *
732
- * Determine the standalone status of the document being read.
733
- *
734
- * Return 1 if the document was declared to be standalone, 0 if it was
735
- * declared to be not standalone, or -1 if the document did not specify its
736
- * standalone status or in case of error.
737
- */
738
- static VALUE rxml_reader_standalone(VALUE self)
739
- {
740
- return INT2FIX(xmlTextReaderStandalone(rxml_text_reader_get(self)));
741
- }
742
-
743
- /*
744
- * call-seq:
745
- * reader.xml_lang -> value
746
- *
747
- * Get the xml:lang scope within which the node resides.
748
- */
749
- static VALUE rxml_reader_xml_lang(VALUE self)
750
- {
751
- xmlTextReaderPtr xReader = rxml_text_reader_get(self);
752
- const xmlChar *result = xmlTextReaderConstXmlLang(xReader);
753
- const xmlChar *xencoding = xmlTextReaderConstEncoding(xReader);
754
-
755
- return (result == NULL ? Qnil : rxml_str_new2(result, xencoding));
756
- }
757
-
758
- /*
759
- * call-seq:
760
- * reader.xml_version -> version
761
- *
762
- * Determine the XML version of the document being read.
763
- */
764
- static VALUE rxml_reader_xml_version(VALUE self)
765
- {
766
- xmlTextReaderPtr xReader = rxml_text_reader_get(self);
767
- const xmlChar *result = xmlTextReaderConstXmlVersion(xReader);
768
- const xmlChar *xencoding = xmlTextReaderConstEncoding(xReader);
769
-
770
- return (result == NULL ? Qnil : rxml_str_new2(result, xencoding));
771
- }
772
-
773
- /*
774
- * call-seq:
775
- * reader.has_attributes? -> bool
776
- *
777
- * Get whether the node has attributes.
778
- */
779
- static VALUE rxml_reader_has_attributes(VALUE self)
780
- {
781
- return xmlTextReaderHasAttributes(rxml_text_reader_get(self)) ? Qtrue
782
- : Qfalse;
783
- }
784
-
785
- /*
786
- * call-seq:
787
- * reader.has_value? -> bool
788
- *
789
- * Get whether the node can have a text value.
790
- */
791
- static VALUE rxml_reader_has_value(VALUE self)
792
- {
793
- return xmlTextReaderHasValue(rxml_text_reader_get(self)) ? Qtrue : Qfalse;
794
- }
795
-
796
- /*
797
- * call-seq:
798
- * reader[key] -> value
799
- *
800
- * Provide the value of the attribute with the specified index (if +key+ is an
801
- * integer) or with the specified name (if +key+ is a string) relative to the
802
- * containing element, as a string.
803
- */
804
- static VALUE rxml_reader_attribute(VALUE self, VALUE key)
805
- {
806
- xmlTextReaderPtr reader;
807
- xmlChar *attr;
808
- xmlTextReaderPtr xReader = rxml_text_reader_get(self);
809
- const xmlChar *xencoding = xmlTextReaderConstEncoding(xReader);
810
-
811
-
812
- if (TYPE(key) == T_FIXNUM)
813
- {
814
- attr = xmlTextReaderGetAttributeNo(xReader, FIX2INT(key));
815
- }
816
- else
817
- {
818
- attr = xmlTextReaderGetAttribute(xReader, (const xmlChar *) StringValueCStr(key));
819
- }
820
- return (attr == NULL ? Qnil : rxml_str_new2(attr, xencoding));
821
- }
822
-
823
- /*
824
- * call-seq:
825
- * reader.lookup_namespace(prefix) -> value
826
- *
827
- * Resolve a namespace prefix in the scope of the current element.
828
- * To return the default namespace, specify nil as +prefix+.
829
- */
830
- static VALUE rxml_reader_lookup_namespace(VALUE self, VALUE prefix)
831
- {
832
- xmlTextReaderPtr xReader = rxml_text_reader_get(self);
833
- const xmlChar *result = xmlTextReaderLookupNamespace(xReader, (const xmlChar *) StringValueCStr(prefix));
834
- const xmlChar *xencoding = xmlTextReaderConstEncoding(xReader);
835
-
836
- return (result == NULL ? Qnil : rxml_str_new2(result, xencoding));
837
- }
838
-
839
- /*
840
- * call-seq:
841
- * reader.expand -> node
842
- *
843
- * Read the contents of the current node and the full subtree. It then makes
844
- * the subtree available until the next read call.
845
- *
846
- * Return an XML::Node object, or nil in case of error.
847
- */
848
- static VALUE rxml_reader_expand(VALUE self)
849
- {
850
- xmlNodePtr node, nodec;
851
- xmlTextReaderPtr reader = rxml_text_reader_get(self);
852
- node = xmlTextReaderExpand(reader);
853
-
854
- if (!node)
855
- return Qnil;
856
- else {
857
- nodec = xmlCopyNode(node, 1);
858
- if (!nodec)
859
- return Qnil;
860
- else
861
- return rxml_node_wrap(nodec);
862
- }
863
- }
864
-
865
- #if LIBXML_VERSION >= 20618
866
- /*
867
- * call-seq:
868
- * reader.byte_consumed -> value
869
- *
870
- * This method provides the current index of the parser used by the reader,
871
- * relative to the start of the current entity.
872
- */
873
- static VALUE
874
- rxml_reader_byte_consumed(VALUE self)
875
- {
876
- return INT2NUM(xmlTextReaderByteConsumed(rxml_text_reader_get(self)));
877
- }
878
- #endif
879
-
880
- #if LIBXML_VERSION >= 20617
881
- /*
882
- * call-seq:
883
- * reader.column_number -> number
884
- *
885
- * Provide the column number of the current parsing point.
886
- */
887
- static VALUE
888
- rxml_reader_column_number(VALUE self)
889
- {
890
- return INT2NUM(xmlTextReaderGetParserColumnNumber(rxml_text_reader_get(self)));
891
- }
892
-
893
- /*
894
- * call-seq:
895
- * reader.line_number -> number
896
- *
897
- * Provide the line number of the current parsing point.
898
- */
899
- static VALUE
900
- rxml_reader_line_number(VALUE self)
901
- {
902
- return INT2NUM(xmlTextReaderGetParserLineNumber(rxml_text_reader_get(self)));
903
- }
904
- #endif
905
-
906
- /*
907
- * call-seq:
908
- * reader.default? -> bool
909
- *
910
- * Return whether an Attribute node was generated from the default value
911
- * defined in the DTD or schema.
912
- */
913
- static VALUE rxml_reader_default(VALUE self)
914
- {
915
- return xmlTextReaderIsDefault(rxml_text_reader_get(self)) ? Qtrue : Qfalse;
916
- }
917
-
918
- /*
919
- * call-seq:
920
- * reader.namespace_declaration? -> bool
921
- *
922
- * Determine whether the current node is a namespace declaration rather than a
923
- * regular attribute.
924
- */
925
- static VALUE rxml_reader_namespace_declaration(VALUE self)
926
- {
927
- return xmlTextReaderIsNamespaceDecl(rxml_text_reader_get(self)) ? Qtrue
928
- : Qfalse;
929
- }
930
-
931
- /*
932
- * call-seq:
933
- * reader.empty_element? -> bool
934
- *
935
- * Check if the current node is empty.
936
- */
937
- static VALUE rxml_reader_empty_element(VALUE self)
938
- {
939
- return xmlTextReaderIsEmptyElement(rxml_text_reader_get(self)) ? Qtrue
940
- : Qfalse;
941
- }
942
-
943
- /*
944
- * call-seq:
945
- * reader.valid? -> bool
946
- *
947
- * Retrieve the validity status from the parser context.
948
- */
949
- static VALUE rxml_reader_valid(VALUE self)
950
- {
951
- return xmlTextReaderIsValid(rxml_text_reader_get(self)) ? Qtrue : Qfalse;
952
- }
953
-
954
- void rxml_init_reader(void)
955
- {
956
- BASE_URI_SYMBOL = ID2SYM(rb_intern("base_uri"));
957
- ENCODING_SYMBOL = ID2SYM(rb_intern("encoding"));
958
- IO_ATTR = ID2SYM(rb_intern("@io"));
959
- OPTIONS_SYMBOL = ID2SYM(rb_intern("options"));
960
-
961
- cXMLReader = rb_define_class_under(mXML, "Reader", rb_cObject);
962
-
963
- rb_define_singleton_method(cXMLReader, "document", rxml_reader_document, 1);
964
- rb_define_singleton_method(cXMLReader, "file", rxml_reader_file, -1);
965
- rb_define_singleton_method(cXMLReader, "io", rxml_reader_io, -1);
966
- rb_define_singleton_method(cXMLReader, "string", rxml_reader_string, -1);
967
-
968
- rb_define_method(cXMLReader, "[]", rxml_reader_attribute, 1);
969
- rb_define_method(cXMLReader, "attribute_count", rxml_reader_attr_count, 0);
970
- rb_define_method(cXMLReader, "base_uri", rxml_reader_base_uri, 0);
971
- #if LIBXML_VERSION >= 20618
972
- rb_define_method(cXMLReader, "byte_consumed", rxml_reader_byte_consumed, 0);
973
- #endif
974
- rb_define_method(cXMLReader, "close", rxml_reader_close, 0);
975
- #if LIBXML_VERSION >= 20617
976
- rb_define_method(cXMLReader, "column_number", rxml_reader_column_number, 0);
977
- #endif
978
- rb_define_method(cXMLReader, "depth", rxml_reader_depth, 0);
979
- rb_define_method(cXMLReader, "encoding", rxml_reader_encoding, 0);
980
- rb_define_method(cXMLReader, "expand", rxml_reader_expand, 0);
981
- rb_define_method(cXMLReader, "has_attributes?", rxml_reader_has_attributes, 0);
982
- rb_define_method(cXMLReader, "has_value?", rxml_reader_has_value, 0);
983
- #if LIBXML_VERSION >= 20617
984
- rb_define_method(cXMLReader, "line_number", rxml_reader_line_number, 0);
985
- #endif
986
- rb_define_method(cXMLReader, "local_name", rxml_reader_local_name, 0);
987
- rb_define_method(cXMLReader, "lookup_namespace", rxml_reader_lookup_namespace, 1);
988
- rb_define_method(cXMLReader, "move_to_attribute", rxml_reader_move_to_attr, 1);
989
- rb_define_method(cXMLReader, "move_to_first_attribute", rxml_reader_move_to_first_attr, 0);
990
- rb_define_method(cXMLReader, "move_to_next_attribute", rxml_reader_move_to_next_attr, 0);
991
- rb_define_method(cXMLReader, "move_to_element", rxml_reader_move_to_element, 0);
992
- rb_define_method(cXMLReader, "name", rxml_reader_name, 0);
993
- rb_define_method(cXMLReader, "namespace_uri", rxml_reader_namespace_uri, 0);
994
- rb_define_method(cXMLReader, "next", rxml_reader_next, 0);
995
- rb_define_method(cXMLReader, "next_sibling", rxml_reader_next_sibling, 0);
996
- rb_define_method(cXMLReader, "node", rxml_reader_node, 0);
997
- rb_define_method(cXMLReader, "node_type", rxml_reader_node_type, 0);
998
- rb_define_method(cXMLReader, "normalization", rxml_reader_normalization, 0);
999
- rb_define_method(cXMLReader, "prefix", rxml_reader_prefix, 0);
1000
- rb_define_method(cXMLReader, "quote_char", rxml_reader_quote_char, 0);
1001
- rb_define_method(cXMLReader, "read", rxml_reader_read, 0);
1002
- rb_define_method(cXMLReader, "read_attribute_value", rxml_reader_read_attr_value, 0);
1003
- rb_define_method(cXMLReader, "read_inner_xml", rxml_reader_read_inner_xml, 0);
1004
- rb_define_method(cXMLReader, "read_outer_xml", rxml_reader_read_outer_xml, 0);
1005
- rb_define_method(cXMLReader, "read_state", rxml_reader_read_state, 0);
1006
- rb_define_method(cXMLReader, "read_string", rxml_reader_read_string, 0);
1007
- rb_define_method(cXMLReader, "relax_ng_validate", rxml_reader_relax_ng_validate, 1);
1008
- rb_define_method(cXMLReader, "standalone", rxml_reader_standalone, 0);
1009
- #if LIBXML_VERSION >= 20620
1010
- rb_define_method(cXMLReader, "schema_validate", rxml_reader_schema_validate, 1);
1011
- #endif
1012
- rb_define_method(cXMLReader, "value", rxml_reader_value, 0);
1013
- rb_define_method(cXMLReader, "xml_lang", rxml_reader_xml_lang, 0);
1014
- rb_define_method(cXMLReader, "xml_version", rxml_reader_xml_version, 0);
1015
- rb_define_method(cXMLReader, "default?", rxml_reader_default, 0);
1016
- rb_define_method(cXMLReader, "empty_element?", rxml_reader_empty_element, 0);
1017
- rb_define_method(cXMLReader, "namespace_declaration?", rxml_reader_namespace_declaration, 0);
1018
- rb_define_method(cXMLReader, "valid?", rxml_reader_valid, 0);
1019
-
1020
- /* Constants */
1021
- rb_define_const(cXMLReader, "LOADDTD", INT2FIX(XML_PARSER_LOADDTD));
1022
- rb_define_const(cXMLReader, "DEFAULTATTRS", INT2FIX(XML_PARSER_DEFAULTATTRS));
1023
- rb_define_const(cXMLReader, "VALIDATE", INT2FIX(XML_PARSER_VALIDATE));
1024
- rb_define_const(cXMLReader, "SUBST_ENTITIES", INT2FIX(XML_PARSER_SUBST_ENTITIES));
1025
-
1026
- rb_define_const(cXMLReader, "SEVERITY_VALIDITY_WARNING", INT2FIX(XML_PARSER_SEVERITY_VALIDITY_WARNING));
1027
- rb_define_const(cXMLReader, "SEVERITY_VALIDITY_ERROR", INT2FIX(XML_PARSER_SEVERITY_VALIDITY_ERROR));
1028
- rb_define_const(cXMLReader, "SEVERITY_WARNING", INT2FIX(XML_PARSER_SEVERITY_WARNING));
1029
- rb_define_const(cXMLReader, "SEVERITY_ERROR", INT2FIX(XML_PARSER_SEVERITY_ERROR));
1030
-
1031
- rb_define_const(cXMLReader, "TYPE_NONE", INT2FIX(XML_READER_TYPE_NONE));
1032
- rb_define_const(cXMLReader, "TYPE_ELEMENT", INT2FIX(XML_READER_TYPE_ELEMENT));
1033
- rb_define_const(cXMLReader, "TYPE_ATTRIBUTE", INT2FIX(XML_READER_TYPE_ATTRIBUTE));
1034
- rb_define_const(cXMLReader, "TYPE_TEXT", INT2FIX(XML_READER_TYPE_TEXT));
1035
- rb_define_const(cXMLReader, "TYPE_CDATA", INT2FIX(XML_READER_TYPE_CDATA));
1036
- rb_define_const(cXMLReader, "TYPE_ENTITY_REFERENCE", INT2FIX(XML_READER_TYPE_ENTITY_REFERENCE));
1037
- rb_define_const(cXMLReader, "TYPE_ENTITY", INT2FIX(XML_READER_TYPE_ENTITY));
1038
- rb_define_const(cXMLReader, "TYPE_PROCESSING_INSTRUCTION", INT2FIX(XML_READER_TYPE_PROCESSING_INSTRUCTION));
1039
- rb_define_const(cXMLReader, "TYPE_COMMENT", INT2FIX(XML_READER_TYPE_COMMENT));
1040
- rb_define_const(cXMLReader, "TYPE_DOCUMENT", INT2FIX(XML_READER_TYPE_DOCUMENT));
1041
- rb_define_const(cXMLReader, "TYPE_DOCUMENT_TYPE", INT2FIX(XML_READER_TYPE_DOCUMENT_TYPE));
1042
- rb_define_const(cXMLReader, "TYPE_DOCUMENT_FRAGMENT", INT2FIX(XML_READER_TYPE_DOCUMENT_FRAGMENT));
1043
- rb_define_const(cXMLReader, "TYPE_NOTATION", INT2FIX(XML_READER_TYPE_NOTATION));
1044
- rb_define_const(cXMLReader, "TYPE_WHITESPACE", INT2FIX(XML_READER_TYPE_WHITESPACE));
1045
- rb_define_const(cXMLReader, "TYPE_SIGNIFICANT_WHITESPACE", INT2FIX(XML_READER_TYPE_SIGNIFICANT_WHITESPACE));
1046
- rb_define_const(cXMLReader, "TYPE_END_ELEMENT", INT2FIX(XML_READER_TYPE_END_ELEMENT));
1047
- rb_define_const(cXMLReader, "TYPE_END_ENTITY", INT2FIX(XML_READER_TYPE_END_ENTITY));
1048
- rb_define_const(cXMLReader, "TYPE_XML_DECLARATION", INT2FIX(XML_READER_TYPE_XML_DECLARATION));
1049
-
1050
- /* Read states */
1051
- rb_define_const(cXMLReader, "MODE_INITIAL", INT2FIX(XML_TEXTREADER_MODE_INITIAL));
1052
- rb_define_const(cXMLReader, "MODE_INTERACTIVE", INT2FIX(XML_TEXTREADER_MODE_INTERACTIVE));
1053
- rb_define_const(cXMLReader, "MODE_ERROR", INT2FIX(XML_TEXTREADER_MODE_ERROR));
1054
- rb_define_const(cXMLReader, "MODE_EOF", INT2FIX(XML_TEXTREADER_MODE_EOF));
1055
- rb_define_const(cXMLReader, "MODE_CLOSED", INT2FIX(XML_TEXTREADER_MODE_CLOSED));
1056
- rb_define_const(cXMLReader, "MODE_READING", INT2FIX(XML_TEXTREADER_MODE_READING));
1057
- }
1
+ /* Copyright (c) 2006-2007 Apple Inc.
2
+ * Please see the LICENSE file for copyright and distribution information. */
3
+
4
+ #include "ruby_libxml.h"
5
+ #include "ruby_xml_reader.h"
6
+
7
+ /*
8
+ * Document-class: LibXML::XML::Reader
9
+ *
10
+ * The XML::Reader class provides a simpler, alternative way of parsing an XML
11
+ * document in contrast to XML::Parser or XML::SaxParser. A XML::Reader instance
12
+ * acts like a cursor going forward in a document stream, stopping at each node
13
+ * it encounters. To advance to the next node, simply cadd XML::Reader#read.
14
+ *
15
+ * The XML::Reader API closely matches the DOM Core specification and supports
16
+ * namespaces, xml:base, entity handling and DTDs.
17
+ *
18
+ * To summarize, XML::Reader provides a far simpler API to use versus XML::SaxParser
19
+ * and is more memory efficient than using XML::Parser to create a DOM tree.
20
+ *
21
+ * Example:
22
+ *
23
+ * parser = XML::Reader.string("<foo><bar>1</bar><bar>2</bar><bar>3</bar></foo>")
24
+ * reader.read
25
+ * assert_equal('foo', reader.name)
26
+ * assert_equal(nil, reader.value)
27
+ *
28
+ * 3.times do |i|
29
+ * reader.read
30
+ * assert_equal(XML::Reader::TYPE_ELEMENT, reader.node_type)
31
+ * assert_equal('bar', reader.name)
32
+ * reader.read
33
+ * assert_equal(XML::Reader::TYPE_TEXT, reader.node_type)
34
+ * assert_equal((i + 1).to_s, reader.value)
35
+ * reader.read
36
+ * assert_equal(XML::Reader::TYPE_END_ELEMENT, reader.node_type)
37
+ * end
38
+ *
39
+ * You can also parse documents (see XML::Reader.document),
40
+ * strings (see XML::Parser.string) and io objects (see
41
+ * XML::Parser.io).
42
+ *
43
+ * For a more in depth tutorial, albeit in C, see http://xmlsoft.org/xmlreader.html.*/
44
+
45
+ VALUE cXMLReader;
46
+
47
+ static ID BASE_URI_SYMBOL;
48
+ static ID ENCODING_SYMBOL;
49
+ static ID IO_ATTR;
50
+ static ID OPTIONS_SYMBOL;
51
+
52
+ static void rxml_reader_free(xmlTextReaderPtr xreader)
53
+ {
54
+ xmlFreeTextReader(xreader);
55
+ }
56
+
57
+ static VALUE rxml_reader_wrap(xmlTextReaderPtr xreader)
58
+ {
59
+ return Data_Wrap_Struct(cXMLReader, NULL, rxml_reader_free, xreader);
60
+ }
61
+
62
+ static xmlTextReaderPtr rxml_text_reader_get(VALUE obj)
63
+ {
64
+ xmlTextReaderPtr xreader;
65
+ Data_Get_Struct(obj, xmlTextReader, xreader);
66
+ return xreader;
67
+ }
68
+
69
+ /*
70
+ * call-seq:
71
+ * XML::Reader.document(doc) -> XML::Reader
72
+ *
73
+ * Create an new reader for the specified document.
74
+ */
75
+ VALUE rxml_reader_document(VALUE klass, VALUE doc)
76
+ {
77
+ xmlDocPtr xdoc;
78
+ xmlTextReaderPtr xreader;
79
+
80
+ Data_Get_Struct(doc, xmlDoc, xdoc);
81
+
82
+ xreader = xmlReaderWalker(xdoc);
83
+
84
+ if (xreader == NULL)
85
+ rxml_raise(&xmlLastError);
86
+
87
+ return rxml_reader_wrap(xreader);
88
+ }
89
+
90
+ /* call-seq:
91
+ * XML::Reader.file(path) -> XML::Reader
92
+ * XML::Reader.file(path, :encoding => XML::Encoding::UTF_8,
93
+ * :options => XML::Parser::Options::NOENT) -> XML::Parser
94
+ *
95
+ * Creates a new reader by parsing the specified file or uri.
96
+ *
97
+ * You may provide an optional hash table to control how the
98
+ * parsing is performed. Valid options are:
99
+ *
100
+ * encoding - The document encoding, defaults to nil. Valid values
101
+ * are the encoding constants defined on XML::Encoding.
102
+ * options - Controls the execution of the parser, defaults to 0.
103
+ * Valid values are the constants defined on
104
+ * XML::Parser::Options. Mutliple options can be combined
105
+ * by using Bitwise OR (|).
106
+ */
107
+ static VALUE rxml_reader_file(int argc, VALUE *argv, VALUE klass)
108
+ {
109
+ xmlTextReaderPtr xreader;
110
+ VALUE path;
111
+ VALUE options;
112
+
113
+ const char *xencoding = NULL;
114
+ int xoptions = 0;
115
+
116
+ rb_scan_args(argc, argv, "11", &path, &options);
117
+ Check_Type(path, T_STRING);
118
+
119
+ if (!NIL_P(options))
120
+ {
121
+ VALUE encoding = Qnil;
122
+ VALUE parserOptions = Qnil;
123
+
124
+ Check_Type(options, T_HASH);
125
+
126
+ encoding = rb_hash_aref(options, BASE_URI_SYMBOL);
127
+ xencoding = NIL_P(encoding) ? NULL : xmlGetCharEncodingName(NUM2INT(encoding));
128
+
129
+ parserOptions = rb_hash_aref(options, OPTIONS_SYMBOL);
130
+ xoptions = NIL_P(parserOptions) ? 0 : NUM2INT(parserOptions);
131
+ }
132
+
133
+ xreader = xmlReaderForFile(StringValueCStr(path), xencoding, xoptions);
134
+
135
+ if (xreader == NULL)
136
+ rxml_raise(&xmlLastError);
137
+
138
+ return rxml_reader_wrap(xreader);
139
+ }
140
+
141
+ /* call-seq:
142
+ * XML::Reader.io(io) -> XML::Reader
143
+ * XML::Reader.io(io, :encoding => XML::Encoding::UTF_8,
144
+ * :options => XML::Parser::Options::NOENT) -> XML::Parser
145
+ *
146
+ * Creates a new reader by parsing the specified io object.
147
+ *
148
+ * You may provide an optional hash table to control how the
149
+ * parsing is performed. Valid options are:
150
+ *
151
+ * base_uri - The base url for the parsed document.
152
+ * encoding - The document encoding, defaults to nil. Valid values
153
+ * are the encoding constants defined on XML::Encoding.
154
+ * options - Controls the execution of the parser, defaults to 0.
155
+ * Valid values are the constants defined on
156
+ * XML::Parser::Options. Mutliple options can be combined
157
+ * by using Bitwise OR (|).
158
+ */
159
+ static VALUE rxml_reader_io(int argc, VALUE *argv, VALUE klass)
160
+ {
161
+ xmlTextReaderPtr xreader;
162
+ VALUE result;
163
+ VALUE io;
164
+ VALUE options;
165
+ char *xbaseurl = NULL;
166
+ const char *xencoding = NULL;
167
+ int xoptions = 0;
168
+
169
+ rb_scan_args(argc, argv, "11", &io, &options);
170
+
171
+ if (!NIL_P(options))
172
+ {
173
+ VALUE baseurl = Qnil;
174
+ VALUE encoding = Qnil;
175
+ VALUE parserOptions = Qnil;
176
+
177
+ Check_Type(options, T_HASH);
178
+
179
+ baseurl = rb_hash_aref(options, BASE_URI_SYMBOL);
180
+ xbaseurl = NIL_P(baseurl) ? NULL : StringValueCStr(baseurl);
181
+
182
+ encoding = rb_hash_aref(options, ENCODING_SYMBOL);
183
+ xencoding = NIL_P(encoding) ? NULL : xmlGetCharEncodingName(NUM2INT(encoding));
184
+
185
+ parserOptions = rb_hash_aref(options, OPTIONS_SYMBOL);
186
+ xoptions = NIL_P(parserOptions) ? 0 : NUM2INT(parserOptions);
187
+ }
188
+
189
+ xreader = xmlReaderForIO((xmlInputReadCallback) rxml_read_callback, NULL,
190
+ (void *) io,
191
+ xbaseurl, xencoding, xoptions);
192
+
193
+ if (xreader == NULL)
194
+ rxml_raise(&xmlLastError);
195
+
196
+ result = rxml_reader_wrap(xreader);
197
+
198
+ /* Attach io object to parser so it won't get freed.*/
199
+ rb_ivar_set(result, IO_ATTR, io);
200
+
201
+ return result;
202
+ }
203
+
204
+ /* call-seq:
205
+ * XML::Reader.string(io) -> XML::Reader
206
+ * XML::Reader.string(io, :encoding => XML::Encoding::UTF_8,
207
+ * :options => XML::Parser::Options::NOENT) -> XML::Parser
208
+ *
209
+ * Creates a new reader by parsing the specified string.
210
+ *
211
+ * You may provide an optional hash table to control how the
212
+ * parsing is performed. Valid options are:
213
+ *
214
+ * base_uri - The base url for the parsed document.
215
+ * encoding - The document encoding, defaults to nil. Valid values
216
+ * are the encoding constants defined on XML::Encoding.
217
+ * options - Controls the execution of the parser, defaults to 0.
218
+ * Valid values are the constants defined on
219
+ * XML::Parser::Options. Mutliple options can be combined
220
+ * by using Bitwise OR (|).
221
+ */
222
+ static VALUE rxml_reader_string(int argc, VALUE *argv, VALUE klass)
223
+ {
224
+ xmlTextReaderPtr xreader;
225
+ VALUE string;
226
+ VALUE options;
227
+ char *xbaseurl = NULL;
228
+ const char *xencoding = NULL;
229
+ int xoptions = 0;
230
+
231
+ rb_scan_args(argc, argv, "11", &string, &options);
232
+ Check_Type(string, T_STRING);
233
+
234
+ if (!NIL_P(options))
235
+ {
236
+ VALUE baseurl = Qnil;
237
+ VALUE encoding = Qnil;
238
+ VALUE parserOptions = Qnil;
239
+
240
+ Check_Type(options, T_HASH);
241
+
242
+ baseurl = rb_hash_aref(options, BASE_URI_SYMBOL);
243
+ xbaseurl = NIL_P(baseurl) ? NULL : StringValueCStr(baseurl);
244
+
245
+ encoding = rb_hash_aref(options, ENCODING_SYMBOL);
246
+ xencoding = NIL_P(encoding) ? NULL : xmlGetCharEncodingName(NUM2INT(encoding));
247
+
248
+ parserOptions = rb_hash_aref(options, OPTIONS_SYMBOL);
249
+ xoptions = NIL_P(parserOptions) ? 0 : NUM2INT(parserOptions);
250
+ }
251
+
252
+ xreader = xmlReaderForMemory(StringValueCStr(string), RSTRING_LEN(string),
253
+ xbaseurl, xencoding, xoptions);
254
+
255
+ if (xreader == NULL)
256
+ rxml_raise(&xmlLastError);
257
+
258
+ return rxml_reader_wrap(xreader);
259
+ }
260
+
261
+ /*
262
+ * call-seq:
263
+ * reader.close -> code
264
+ *
265
+ * This method releases any resources allocated by the current instance
266
+ * changes the state to Closed and close any underlying input.
267
+ */
268
+ static VALUE rxml_reader_close(VALUE self)
269
+ {
270
+ xmlTextReaderPtr xreader = rxml_text_reader_get(self);
271
+ return INT2FIX(xmlTextReaderClose(xreader));
272
+ }
273
+
274
+ /*
275
+ * call-seq:
276
+ * reader.move_to_attribute(val) -> code
277
+ *
278
+ * Move the position of the current instance to the attribute with the
279
+ * specified index (if +val+ is an integer) or name (if +val+ is a string)
280
+ * relative to the containing element.
281
+ */
282
+ static VALUE rxml_reader_move_to_attr(VALUE self, VALUE val)
283
+ {
284
+ xmlTextReaderPtr xreader;
285
+ int ret;
286
+
287
+ xreader = rxml_text_reader_get(self);
288
+
289
+ if (TYPE(val) == T_FIXNUM)
290
+ {
291
+ ret = xmlTextReaderMoveToAttributeNo(xreader, FIX2INT(val));
292
+ }
293
+ else
294
+ {
295
+ ret = xmlTextReaderMoveToAttribute(xreader,
296
+ (const xmlChar *) StringValueCStr(val));
297
+ }
298
+
299
+ return INT2FIX(ret);
300
+ }
301
+
302
+ /*
303
+ * call-seq:
304
+ * reader.move_to_first_attribute -> code
305
+ *
306
+ * Move the position of the current instance to the first attribute associated
307
+ * with the current node.
308
+ */
309
+ static VALUE rxml_reader_move_to_first_attr(VALUE self)
310
+ {
311
+ xmlTextReaderPtr xreader = rxml_text_reader_get(self);
312
+ return INT2FIX(xmlTextReaderMoveToFirstAttribute(xreader));
313
+ }
314
+
315
+ /*
316
+ * call-seq:
317
+ * reader.move_to_next_attribute -> code
318
+ *
319
+ * Move the position of the current instance to the next attribute associated
320
+ * with the current node.
321
+ */
322
+ static VALUE rxml_reader_move_to_next_attr(VALUE self)
323
+ {
324
+ xmlTextReaderPtr xreader = rxml_text_reader_get(self);
325
+ return INT2FIX(xmlTextReaderMoveToNextAttribute(xreader));
326
+ }
327
+
328
+ /*
329
+ * call-seq:
330
+ * reader.move_to_element -> code
331
+ *
332
+ * Move the position of the current instance to the node that contains the
333
+ * current attribute node.
334
+ */
335
+ static VALUE rxml_reader_move_to_element(VALUE self)
336
+ {
337
+ xmlTextReaderPtr xreader = rxml_text_reader_get(self);
338
+ return INT2FIX(xmlTextReaderMoveToElement(xreader));
339
+ }
340
+
341
+ /*
342
+ * call-seq:
343
+ * reader.next -> code
344
+ *
345
+ * Skip to the node following the current one in document order while avoiding
346
+ * the subtree if any.
347
+ */
348
+ static VALUE rxml_reader_next(VALUE self)
349
+ {
350
+ xmlTextReaderPtr xreader = rxml_text_reader_get(self);
351
+ return INT2FIX(xmlTextReaderNext(xreader));
352
+ }
353
+
354
+ /*
355
+ * call-seq:
356
+ * reader.next_sibling -> code
357
+ *
358
+ * Skip to the node following the current one in document order while avoiding
359
+ * the subtree if any. Currently implemented only for Readers built on a
360
+ * document.
361
+ */
362
+ static VALUE rxml_reader_next_sibling(VALUE self)
363
+ {
364
+ xmlTextReaderPtr xreader = rxml_text_reader_get(self);
365
+ return INT2FIX(xmlTextReaderNextSibling(xreader));
366
+ }
367
+
368
+ /*
369
+ * call-seq:
370
+ * reader.node -> XML::Node
371
+ *
372
+ * Returns the reader's current node. It will return
373
+ * nil if Reader#read has not yet been called.
374
+ * WARNING - Using this method is dangerous because the
375
+ * the node may be destroyed on the next #read.
376
+ */
377
+ static VALUE rxml_reader_node(VALUE self)
378
+ {
379
+ xmlTextReaderPtr xreader = rxml_text_reader_get(self);
380
+ xmlNodePtr xnode = xmlTextReaderCurrentNode(xreader);
381
+ return xnode ? rxml_node_wrap(xnode) : Qnil;
382
+ }
383
+
384
+ /*
385
+ * call-seq:
386
+ * reader.node_type -> type
387
+ *
388
+ * Get the node type of the current node. Reference:
389
+ * http://dotgnu.org/pnetlib-doc/System/Xml/XmlNodeType.html
390
+ */
391
+ static VALUE rxml_reader_node_type(VALUE self)
392
+ {
393
+ xmlTextReaderPtr xreader = rxml_text_reader_get(self);
394
+ return INT2FIX(xmlTextReaderNodeType(xreader));
395
+ }
396
+
397
+ /*
398
+ * call-seq:
399
+ * reader.normalization -> value
400
+ *
401
+ * The value indicating whether to normalize white space and attribute values.
402
+ * Since attribute value and end of line normalizations are a MUST in the XML
403
+ * specification only the value true is accepted. The broken bahaviour of
404
+ * accepting out of range character entities like &#0; is of course not
405
+ * supported either.
406
+ *
407
+ * Return 1 or -1 in case of error.
408
+ */
409
+ static VALUE rxml_reader_normalization(VALUE self)
410
+ {
411
+ xmlTextReaderPtr xreader = rxml_text_reader_get(self);
412
+ return INT2FIX(xmlTextReaderNormalization(xreader));
413
+ }
414
+
415
+ /*
416
+ * call-seq:
417
+ * reader.read -> code
418
+ *
419
+ * Causes the reader to move to the next node in the stream, exposing its properties.
420
+ *
421
+ * Returns true if a node was successfully read or false if there are no more
422
+ * nodes to read. On errors, an exception is raised.*/
423
+ static VALUE rxml_reader_read(VALUE self)
424
+ {
425
+ xmlTextReaderPtr xreader = rxml_text_reader_get(self);
426
+ int result = xmlTextReaderRead(xreader);
427
+ switch(result)
428
+ {
429
+ case -1:
430
+ rxml_raise(&xmlLastError);
431
+ return Qnil;
432
+ break;
433
+ case 0:
434
+ return Qfalse;
435
+ case 1:
436
+ return Qtrue;
437
+ default:
438
+ rb_raise(rb_eRuntimeError,
439
+ "xmlTextReaderRead did not return -1, 0 or 1. Return value was: %d", result);
440
+ }
441
+ }
442
+
443
+ /*
444
+ * call-seq:
445
+ * reader.read_attribute_value -> code
446
+ *
447
+ * Parse an attribute value into one or more Text and EntityReference nodes.
448
+ *
449
+ * Return 1 in case of success, 0 if the reader was not positionned on an
450
+ * attribute node or all the attribute values have been read, or -1 in case of
451
+ * error.
452
+ */
453
+ static VALUE rxml_reader_read_attr_value(VALUE self)
454
+ {
455
+ xmlTextReaderPtr xreader = rxml_text_reader_get(self);
456
+ return INT2FIX(xmlTextReaderReadAttributeValue(xreader));
457
+ }
458
+
459
+ /*
460
+ * call-seq:
461
+ * reader.read_inner_xml -> data
462
+ *
463
+ * Read the contents of the current node, including child nodes and markup.
464
+ *
465
+ * Return a string containing the XML content, or nil if the current node is
466
+ * neither an element nor attribute, or has no child nodes.
467
+ */
468
+ static VALUE rxml_reader_read_inner_xml(VALUE self)
469
+ {
470
+ VALUE result = Qnil;
471
+ xmlTextReaderPtr xReader = rxml_text_reader_get(self);
472
+
473
+ xmlChar *xml = xmlTextReaderReadInnerXml(xReader);
474
+
475
+ if (xml)
476
+ {
477
+ const xmlChar *xencoding = xmlTextReaderConstEncoding(xReader);
478
+ result = rxml_str_new2((const char*) xml, xencoding);
479
+ xmlFree(xml);
480
+ }
481
+
482
+ return result;
483
+ }
484
+
485
+ /*
486
+ * call-seq:
487
+ * reader.read_outer_xml -> data
488
+ *
489
+ * Read the contents of the current node, including child nodes and markup.
490
+ *
491
+ * Return a string containing the XML content, or nil if the current node is
492
+ * neither an element nor attribute, or has no child nodes.
493
+ */
494
+ static VALUE rxml_reader_read_outer_xml(VALUE self)
495
+ {
496
+ VALUE result = Qnil;
497
+ xmlTextReaderPtr xReader = rxml_text_reader_get(self);
498
+
499
+ xmlChar *xml = xmlTextReaderReadOuterXml(xReader);
500
+
501
+ if (xml)
502
+ {
503
+ const xmlChar *xencoding = xmlTextReaderConstEncoding(xReader);
504
+ result = rxml_str_new2((const char*) xml, xencoding);
505
+ xmlFree(xml);
506
+ }
507
+
508
+ return result;
509
+ }
510
+
511
+ /*
512
+ * call-seq:
513
+ * reader.read_state -> state
514
+ *
515
+ * Get the read state of the reader.
516
+ */
517
+ static VALUE rxml_reader_read_state(VALUE self)
518
+ {
519
+ xmlTextReaderPtr xreader = rxml_text_reader_get(self);
520
+ return INT2FIX(xmlTextReaderReadState(xreader));
521
+ }
522
+
523
+ /*
524
+ * call-seq:
525
+ * reader.read_string -> string
526
+ *
527
+ * Read the contents of an element or a text node as a string.
528
+ *
529
+ * Return a string containing the contents of the Element or Text node, or nil
530
+ * if the reader is positioned on any other type of node.
531
+ */
532
+ static VALUE rxml_reader_read_string(VALUE self)
533
+ {
534
+ VALUE result = Qnil;
535
+ xmlTextReaderPtr xReader = rxml_text_reader_get(self);
536
+
537
+ xmlChar *xml = xmlTextReaderReadString(xReader);
538
+
539
+ if (xml)
540
+ {
541
+ const xmlChar *xencoding = xmlTextReaderConstEncoding(xReader);
542
+ result = rxml_str_new2((const char*) xml, xencoding);
543
+ xmlFree(xml);
544
+ }
545
+
546
+ return result;
547
+ }
548
+
549
+ /*
550
+ * call-seq:
551
+ * reader.relax_ng_validate(rng) -> code
552
+ *
553
+ * Use RelaxNG to validate the document as it is processed. Activation is only
554
+ * possible before the first read. If +rng+ is nil, the RelaxNG validation is
555
+ * desactivated.
556
+ *
557
+ * Return 0 in case the RelaxNG validation could be (des)activated and -1 in
558
+ * case of error.
559
+ */
560
+ static VALUE rxml_reader_relax_ng_validate(VALUE self, VALUE rng)
561
+ {
562
+ char *xrng = NIL_P(rng) ? NULL : StringValueCStr(rng);
563
+ xmlTextReaderPtr xreader = rxml_text_reader_get(self);
564
+ return INT2FIX(xmlTextReaderRelaxNGValidate(xreader, xrng));
565
+ }
566
+
567
+ #if LIBXML_VERSION >= 20620
568
+ /*
569
+ * call-seq:
570
+ * reader.schema_validate(schema) -> code
571
+ *
572
+ * Use W3C XSD schema to validate the document as it is processed. Activation
573
+ * is only possible before the first read. If +schema+ is nil, then XML Schema
574
+ * validation is desactivated.
575
+ *
576
+ * Return 0 in case the schemas validation could be (de)activated and -1 in
577
+ * case of error.
578
+ */
579
+ static VALUE
580
+ rxml_reader_schema_validate(VALUE self, VALUE xsd)
581
+ {
582
+ char *xxsd = NIL_P(xsd) ? NULL : StringValueCStr(xsd);
583
+ xmlTextReaderPtr xreader = rxml_text_reader_get(self);
584
+ int status = xmlTextReaderSchemaValidate(xreader, xxsd);
585
+ return INT2FIX(status);
586
+ }
587
+ #endif
588
+
589
+ /*
590
+ * call-seq:
591
+ * reader.name -> name
592
+ *
593
+ * Return the qualified name of the node.
594
+ */
595
+ static VALUE rxml_reader_name(VALUE self)
596
+ {
597
+ xmlTextReaderPtr xReader = rxml_text_reader_get(self);
598
+ const xmlChar *result = xmlTextReaderConstName(xReader);
599
+ const xmlChar *xencoding = xmlTextReaderConstEncoding(xReader);
600
+
601
+ return (result == NULL ? Qnil : rxml_str_new2(result, xencoding));
602
+ }
603
+
604
+ /*
605
+ * call-seq:
606
+ * reader.local_name -> name
607
+ *
608
+ * Return the local name of the node.
609
+ */
610
+ static VALUE rxml_reader_local_name(VALUE self)
611
+ {
612
+ xmlTextReaderPtr xReader = rxml_text_reader_get(self);
613
+ const xmlChar *result = xmlTextReaderConstLocalName(xReader);
614
+ const xmlChar *xencoding = xmlTextReaderConstEncoding(xReader);
615
+
616
+ return (result == NULL ? Qnil : rxml_str_new2(result, xencoding));
617
+ }
618
+
619
+ /*
620
+ * call-seq:
621
+ * reader.attribute_count -> count
622
+ *
623
+ * Provide the number of attributes of the current node.
624
+ */
625
+ static VALUE rxml_reader_attr_count(VALUE self)
626
+ {
627
+ xmlTextReaderPtr xreader = rxml_text_reader_get(self);
628
+ return INT2FIX(xmlTextReaderAttributeCount(xreader));
629
+ }
630
+
631
+ /*
632
+ * call-seq:
633
+ * reader.encoding -> XML::Encoding::UTF_8
634
+ *
635
+ * Returns the encoding of the document being read. Note you
636
+ * first have to read data from the reader for encoding
637
+ * to return a value
638
+ *
639
+ * reader = XML::Reader.file(XML_FILE)
640
+ * assert_nil(reader.encoding)
641
+ * reader.read
642
+ * assert_equal(XML::Encoding::UTF_8, reader.encoding)
643
+ *
644
+ * In addition, libxml always appears to return nil for the encoding
645
+ * when parsing strings.
646
+ */
647
+ static VALUE rxml_reader_encoding(VALUE self)
648
+ {
649
+ xmlTextReaderPtr xreader = rxml_text_reader_get(self);
650
+ const xmlChar *xencoding = xmlTextReaderConstEncoding(xreader);
651
+ if (xencoding)
652
+ return INT2NUM(xmlParseCharEncoding(xencoding));
653
+ else
654
+ return INT2NUM(XML_CHAR_ENCODING_NONE);
655
+ }
656
+
657
+ /*
658
+ * call-seq:
659
+ * reader.base_uri -> URI
660
+ *
661
+ * Determine the base URI of the node.
662
+ */
663
+ static VALUE rxml_reader_base_uri(VALUE self)
664
+ {
665
+ xmlTextReaderPtr xReader = rxml_text_reader_get(self);
666
+ const xmlChar *result = xmlTextReaderConstBaseUri(xReader);
667
+ const xmlChar *xencoding = xmlTextReaderConstEncoding(xReader);
668
+
669
+ return (result == NULL ? Qnil : rxml_str_new2(result, xencoding));
670
+ }
671
+
672
+ /*
673
+ * call-seq:
674
+ * reader.namespace_uri -> URI
675
+ *
676
+ * Determine the namespace URI of the node.
677
+ */
678
+ static VALUE rxml_reader_namespace_uri(VALUE self)
679
+ {
680
+ xmlTextReaderPtr xReader = rxml_text_reader_get(self);
681
+ const xmlChar *result = xmlTextReaderConstNamespaceUri(xReader);
682
+ const xmlChar *xencoding = xmlTextReaderConstEncoding(xReader);
683
+
684
+ return (result == NULL ? Qnil : rxml_str_new2(result, xencoding));
685
+ }
686
+
687
+ /*
688
+ * call-seq:
689
+ * reader.value -> text
690
+ *
691
+ * Provide the text value of the node if present.
692
+ */
693
+ static VALUE rxml_reader_value(VALUE self)
694
+ {
695
+ xmlTextReaderPtr xReader = rxml_text_reader_get(self);
696
+ const xmlChar *result = xmlTextReaderConstValue(xReader);
697
+ const xmlChar *xencoding = xmlTextReaderConstEncoding(xReader);
698
+
699
+ return (result == NULL ? Qnil : rxml_str_new2(result, xencoding));
700
+ }
701
+
702
+ /*
703
+ * call-seq:
704
+ * reader.prefix -> prefix
705
+ *
706
+ * Get a shorthand reference to the namespace associated with the node.
707
+ */
708
+ static VALUE rxml_reader_prefix(VALUE self)
709
+ {
710
+ xmlTextReaderPtr xReader = rxml_text_reader_get(self);
711
+ const xmlChar *result = xmlTextReaderConstPrefix(xReader);
712
+ const xmlChar *xencoding = xmlTextReaderConstEncoding(xReader);
713
+
714
+ return (result == NULL ? Qnil : rxml_str_new2(result, xencoding));
715
+ }
716
+
717
+ /*
718
+ * call-seq:
719
+ * reader.depth -> depth
720
+ *
721
+ * Get the depth of the node in the tree.
722
+ */
723
+ static VALUE rxml_reader_depth(VALUE self)
724
+ {
725
+ xmlTextReaderPtr xreader = rxml_text_reader_get(self);
726
+ return INT2FIX(xmlTextReaderDepth(xreader));
727
+ }
728
+
729
+ /*
730
+ * call-seq:
731
+ * reader.quote_char -> char
732
+ *
733
+ * Get the quotation mark character used to enclose the value of an attribute,
734
+ * as an integer value (and -1 in case of error).
735
+ */
736
+ static VALUE rxml_reader_quote_char(VALUE self)
737
+ {
738
+ xmlTextReaderPtr xreader = rxml_text_reader_get(self);
739
+ return INT2FIX(xmlTextReaderQuoteChar(xreader));
740
+ }
741
+
742
+ /*
743
+ * call-seq:
744
+ * reader.standalone -> code
745
+ *
746
+ * Determine the standalone status of the document being read.
747
+ *
748
+ * Return 1 if the document was declared to be standalone, 0 if it was
749
+ * declared to be not standalone, or -1 if the document did not specify its
750
+ * standalone status or in case of error.
751
+ */
752
+ static VALUE rxml_reader_standalone(VALUE self)
753
+ {
754
+ xmlTextReaderPtr xreader = rxml_text_reader_get(self);
755
+ return INT2FIX(xmlTextReaderStandalone(xreader));
756
+ }
757
+
758
+ /*
759
+ * call-seq:
760
+ * reader.xml_lang -> value
761
+ *
762
+ * Get the xml:lang scope within which the node resides.
763
+ */
764
+ static VALUE rxml_reader_xml_lang(VALUE self)
765
+ {
766
+ xmlTextReaderPtr xReader = rxml_text_reader_get(self);
767
+ const xmlChar *result = xmlTextReaderConstXmlLang(xReader);
768
+ const xmlChar *xencoding = xmlTextReaderConstEncoding(xReader);
769
+
770
+ return (result == NULL ? Qnil : rxml_str_new2(result, xencoding));
771
+ }
772
+
773
+ /*
774
+ * call-seq:
775
+ * reader.xml_version -> version
776
+ *
777
+ * Determine the XML version of the document being read.
778
+ */
779
+ static VALUE rxml_reader_xml_version(VALUE self)
780
+ {
781
+ xmlTextReaderPtr xReader = rxml_text_reader_get(self);
782
+ const xmlChar *result = xmlTextReaderConstXmlVersion(xReader);
783
+ const xmlChar *xencoding = xmlTextReaderConstEncoding(xReader);
784
+
785
+ return (result == NULL ? Qnil : rxml_str_new2(result, xencoding));
786
+ }
787
+
788
+ /*
789
+ * call-seq:
790
+ * reader.has_attributes? -> bool
791
+ *
792
+ * Get whether the node has attributes.
793
+ */
794
+ static VALUE rxml_reader_has_attributes(VALUE self)
795
+ {
796
+ xmlTextReaderPtr xreader = rxml_text_reader_get(self);
797
+ return xmlTextReaderHasAttributes(xreader) ? Qtrue : Qfalse;
798
+ }
799
+
800
+ /*
801
+ * call-seq:
802
+ * reader.has_value? -> bool
803
+ *
804
+ * Get whether the node can have a text value.
805
+ */
806
+ static VALUE rxml_reader_has_value(VALUE self)
807
+ {
808
+ xmlTextReaderPtr xreader = rxml_text_reader_get(self);
809
+ return xmlTextReaderHasValue(xreader) ? Qtrue : Qfalse;
810
+ }
811
+
812
+ /*
813
+ * call-seq:
814
+ * reader[key] -> value
815
+ *
816
+ * Provide the value of the attribute with the specified index (if +key+ is an
817
+ * integer) or with the specified name (if +key+ is a string) relative to the
818
+ * containing element, as a string.
819
+ */
820
+ static VALUE rxml_reader_attribute(VALUE self, VALUE key)
821
+ {
822
+ VALUE result = Qnil;
823
+ xmlChar *xattr;
824
+ xmlTextReaderPtr xReader = rxml_text_reader_get(self);
825
+ const xmlChar *xencoding = xmlTextReaderConstEncoding(xReader);
826
+
827
+ if (TYPE(key) == T_FIXNUM)
828
+ {
829
+ xattr = xmlTextReaderGetAttributeNo(xReader, FIX2INT(key));
830
+ }
831
+ else
832
+ {
833
+ xattr = xmlTextReaderGetAttribute(xReader, (const xmlChar *) StringValueCStr(key));
834
+ }
835
+
836
+ if (xattr)
837
+ {
838
+ result = rxml_str_new2(xattr, xencoding);
839
+ xmlFree(xattr);
840
+ }
841
+ return result;
842
+ }
843
+
844
+ /*
845
+ * call-seq:
846
+ * reader.lookup_namespace(prefix) -> value
847
+ *
848
+ * Resolve a namespace prefix in the scope of the current element.
849
+ * To return the default namespace, specify nil as +prefix+.
850
+ */
851
+ static VALUE rxml_reader_lookup_namespace(VALUE self, VALUE prefix)
852
+ {
853
+ VALUE result = Qnil;
854
+ xmlTextReaderPtr xReader = rxml_text_reader_get(self);
855
+ const xmlChar *xnamespace = xmlTextReaderLookupNamespace(xReader, (const xmlChar *) StringValueCStr(prefix));
856
+ const xmlChar *xencoding = xmlTextReaderConstEncoding(xReader);
857
+
858
+ if (xnamespace)
859
+ {
860
+ result = rxml_str_new2((const char*)xnamespace, (const char*)xencoding);
861
+ xmlFree((void *)xnamespace);
862
+ }
863
+ return result;
864
+ }
865
+
866
+ /*
867
+ * call-seq:
868
+ * reader.expand -> node
869
+ *
870
+ * Returns the current node and its full subtree. Note the returned node
871
+ * is valid ONLY until the next read call.
872
+ */
873
+ static VALUE rxml_reader_expand(VALUE self)
874
+ {
875
+ xmlTextReaderPtr xreader = rxml_text_reader_get(self);
876
+ xmlNodePtr xnode = xmlTextReaderExpand(xreader);
877
+
878
+ if (!xnode)
879
+ {
880
+ return Qnil;
881
+ }
882
+ else
883
+ {
884
+ return rxml_node_wrap(xnode);
885
+ }
886
+ }
887
+
888
+ #if LIBXML_VERSION >= 20618
889
+ /*
890
+ * call-seq:
891
+ * reader.byte_consumed -> value
892
+ *
893
+ * This method provides the current index of the parser used by the reader,
894
+ * relative to the start of the current entity.
895
+ */
896
+ static VALUE
897
+ rxml_reader_byte_consumed(VALUE self)
898
+ {
899
+ xmlTextReaderPtr xreader = rxml_text_reader_get(self);
900
+ return INT2NUM(xmlTextReaderByteConsumed(xreader));
901
+ }
902
+ #endif
903
+
904
+ #if LIBXML_VERSION >= 20617
905
+ /*
906
+ * call-seq:
907
+ * reader.column_number -> number
908
+ *
909
+ * Provide the column number of the current parsing point.
910
+ */
911
+ static VALUE
912
+ rxml_reader_column_number(VALUE self)
913
+ {
914
+ xmlTextReaderPtr xreader = rxml_text_reader_get(self);
915
+ return INT2NUM(xmlTextReaderGetParserColumnNumber(xreader));
916
+ }
917
+
918
+ /*
919
+ * call-seq:
920
+ * reader.line_number -> number
921
+ *
922
+ * Provide the line number of the current parsing point.
923
+ */
924
+ static VALUE
925
+ rxml_reader_line_number(VALUE self)
926
+ {
927
+ xmlTextReaderPtr xreader = rxml_text_reader_get(self);
928
+ return INT2NUM(xmlTextReaderGetParserLineNumber(xreader));
929
+ }
930
+ #endif
931
+
932
+ /*
933
+ * call-seq:
934
+ * reader.default? -> bool
935
+ *
936
+ * Return whether an Attribute node was generated from the default value
937
+ * defined in the DTD or schema.
938
+ */
939
+ static VALUE rxml_reader_default(VALUE self)
940
+ {
941
+ xmlTextReaderPtr xreader = rxml_text_reader_get(self);
942
+ return xmlTextReaderIsDefault(xreader) ? Qtrue : Qfalse;
943
+ }
944
+
945
+ /*
946
+ * call-seq:
947
+ * reader.namespace_declaration? -> bool
948
+ *
949
+ * Determine whether the current node is a namespace declaration rather than a
950
+ * regular attribute.
951
+ */
952
+ static VALUE rxml_reader_namespace_declaration(VALUE self)
953
+ {
954
+ xmlTextReaderPtr xreader = rxml_text_reader_get(self);
955
+ return xmlTextReaderIsNamespaceDecl(xreader) ? Qtrue : Qfalse;
956
+ }
957
+
958
+ /*
959
+ * call-seq:
960
+ * reader.empty_element? -> bool
961
+ *
962
+ * Check if the current node is empty.
963
+ */
964
+ static VALUE rxml_reader_empty_element(VALUE self)
965
+ {
966
+ xmlTextReaderPtr xreader = rxml_text_reader_get(self);
967
+ return xmlTextReaderIsEmptyElement(xreader) ? Qtrue : Qfalse;
968
+ }
969
+
970
+ /*
971
+ * call-seq:
972
+ * reader.valid? -> bool
973
+ *
974
+ * Retrieve the validity status from the parser context.
975
+ */
976
+ static VALUE rxml_reader_valid(VALUE self)
977
+ {
978
+ xmlTextReaderPtr xreader = rxml_text_reader_get(self);
979
+ return xmlTextReaderIsValid(xreader) ? Qtrue : Qfalse;
980
+ }
981
+
982
+ void rxml_init_reader(void)
983
+ {
984
+ BASE_URI_SYMBOL = ID2SYM(rb_intern("base_uri"));
985
+ ENCODING_SYMBOL = ID2SYM(rb_intern("encoding"));
986
+ IO_ATTR = rb_intern("@io");
987
+ OPTIONS_SYMBOL = ID2SYM(rb_intern("options"));
988
+
989
+ cXMLReader = rb_define_class_under(mXML, "Reader", rb_cObject);
990
+
991
+ rb_define_singleton_method(cXMLReader, "document", rxml_reader_document, 1);
992
+ rb_define_singleton_method(cXMLReader, "file", rxml_reader_file, -1);
993
+ rb_define_singleton_method(cXMLReader, "io", rxml_reader_io, -1);
994
+ rb_define_singleton_method(cXMLReader, "string", rxml_reader_string, -1);
995
+
996
+ rb_define_method(cXMLReader, "[]", rxml_reader_attribute, 1);
997
+ rb_define_method(cXMLReader, "attribute_count", rxml_reader_attr_count, 0);
998
+ rb_define_method(cXMLReader, "base_uri", rxml_reader_base_uri, 0);
999
+ #if LIBXML_VERSION >= 20618
1000
+ rb_define_method(cXMLReader, "byte_consumed", rxml_reader_byte_consumed, 0);
1001
+ #endif
1002
+ rb_define_method(cXMLReader, "close", rxml_reader_close, 0);
1003
+ #if LIBXML_VERSION >= 20617
1004
+ rb_define_method(cXMLReader, "column_number", rxml_reader_column_number, 0);
1005
+ #endif
1006
+ rb_define_method(cXMLReader, "depth", rxml_reader_depth, 0);
1007
+ rb_define_method(cXMLReader, "encoding", rxml_reader_encoding, 0);
1008
+ rb_define_method(cXMLReader, "expand", rxml_reader_expand, 0);
1009
+ rb_define_method(cXMLReader, "has_attributes?", rxml_reader_has_attributes, 0);
1010
+ rb_define_method(cXMLReader, "has_value?", rxml_reader_has_value, 0);
1011
+ #if LIBXML_VERSION >= 20617
1012
+ rb_define_method(cXMLReader, "line_number", rxml_reader_line_number, 0);
1013
+ #endif
1014
+ rb_define_method(cXMLReader, "local_name", rxml_reader_local_name, 0);
1015
+ rb_define_method(cXMLReader, "lookup_namespace", rxml_reader_lookup_namespace, 1);
1016
+ rb_define_method(cXMLReader, "move_to_attribute", rxml_reader_move_to_attr, 1);
1017
+ rb_define_method(cXMLReader, "move_to_first_attribute", rxml_reader_move_to_first_attr, 0);
1018
+ rb_define_method(cXMLReader, "move_to_next_attribute", rxml_reader_move_to_next_attr, 0);
1019
+ rb_define_method(cXMLReader, "move_to_element", rxml_reader_move_to_element, 0);
1020
+ rb_define_method(cXMLReader, "name", rxml_reader_name, 0);
1021
+ rb_define_method(cXMLReader, "namespace_uri", rxml_reader_namespace_uri, 0);
1022
+ rb_define_method(cXMLReader, "next", rxml_reader_next, 0);
1023
+ rb_define_method(cXMLReader, "next_sibling", rxml_reader_next_sibling, 0);
1024
+ rb_define_method(cXMLReader, "node", rxml_reader_node, 0);
1025
+ rb_define_method(cXMLReader, "node_type", rxml_reader_node_type, 0);
1026
+ rb_define_method(cXMLReader, "normalization", rxml_reader_normalization, 0);
1027
+ rb_define_method(cXMLReader, "prefix", rxml_reader_prefix, 0);
1028
+ rb_define_method(cXMLReader, "quote_char", rxml_reader_quote_char, 0);
1029
+ rb_define_method(cXMLReader, "read", rxml_reader_read, 0);
1030
+ rb_define_method(cXMLReader, "read_attribute_value", rxml_reader_read_attr_value, 0);
1031
+ rb_define_method(cXMLReader, "read_inner_xml", rxml_reader_read_inner_xml, 0);
1032
+ rb_define_method(cXMLReader, "read_outer_xml", rxml_reader_read_outer_xml, 0);
1033
+ rb_define_method(cXMLReader, "read_state", rxml_reader_read_state, 0);
1034
+ rb_define_method(cXMLReader, "read_string", rxml_reader_read_string, 0);
1035
+ rb_define_method(cXMLReader, "relax_ng_validate", rxml_reader_relax_ng_validate, 1);
1036
+ rb_define_method(cXMLReader, "standalone", rxml_reader_standalone, 0);
1037
+ #if LIBXML_VERSION >= 20620
1038
+ rb_define_method(cXMLReader, "schema_validate", rxml_reader_schema_validate, 1);
1039
+ #endif
1040
+ rb_define_method(cXMLReader, "value", rxml_reader_value, 0);
1041
+ rb_define_method(cXMLReader, "xml_lang", rxml_reader_xml_lang, 0);
1042
+ rb_define_method(cXMLReader, "xml_version", rxml_reader_xml_version, 0);
1043
+ rb_define_method(cXMLReader, "default?", rxml_reader_default, 0);
1044
+ rb_define_method(cXMLReader, "empty_element?", rxml_reader_empty_element, 0);
1045
+ rb_define_method(cXMLReader, "namespace_declaration?", rxml_reader_namespace_declaration, 0);
1046
+ rb_define_method(cXMLReader, "valid?", rxml_reader_valid, 0);
1047
+
1048
+ /* Constants */
1049
+ rb_define_const(cXMLReader, "LOADDTD", INT2FIX(XML_PARSER_LOADDTD));
1050
+ rb_define_const(cXMLReader, "DEFAULTATTRS", INT2FIX(XML_PARSER_DEFAULTATTRS));
1051
+ rb_define_const(cXMLReader, "VALIDATE", INT2FIX(XML_PARSER_VALIDATE));
1052
+ rb_define_const(cXMLReader, "SUBST_ENTITIES", INT2FIX(XML_PARSER_SUBST_ENTITIES));
1053
+
1054
+ rb_define_const(cXMLReader, "SEVERITY_VALIDITY_WARNING", INT2FIX(XML_PARSER_SEVERITY_VALIDITY_WARNING));
1055
+ rb_define_const(cXMLReader, "SEVERITY_VALIDITY_ERROR", INT2FIX(XML_PARSER_SEVERITY_VALIDITY_ERROR));
1056
+ rb_define_const(cXMLReader, "SEVERITY_WARNING", INT2FIX(XML_PARSER_SEVERITY_WARNING));
1057
+ rb_define_const(cXMLReader, "SEVERITY_ERROR", INT2FIX(XML_PARSER_SEVERITY_ERROR));
1058
+
1059
+ rb_define_const(cXMLReader, "TYPE_NONE", INT2FIX(XML_READER_TYPE_NONE));
1060
+ rb_define_const(cXMLReader, "TYPE_ELEMENT", INT2FIX(XML_READER_TYPE_ELEMENT));
1061
+ rb_define_const(cXMLReader, "TYPE_ATTRIBUTE", INT2FIX(XML_READER_TYPE_ATTRIBUTE));
1062
+ rb_define_const(cXMLReader, "TYPE_TEXT", INT2FIX(XML_READER_TYPE_TEXT));
1063
+ rb_define_const(cXMLReader, "TYPE_CDATA", INT2FIX(XML_READER_TYPE_CDATA));
1064
+ rb_define_const(cXMLReader, "TYPE_ENTITY_REFERENCE", INT2FIX(XML_READER_TYPE_ENTITY_REFERENCE));
1065
+ rb_define_const(cXMLReader, "TYPE_ENTITY", INT2FIX(XML_READER_TYPE_ENTITY));
1066
+ rb_define_const(cXMLReader, "TYPE_PROCESSING_INSTRUCTION", INT2FIX(XML_READER_TYPE_PROCESSING_INSTRUCTION));
1067
+ rb_define_const(cXMLReader, "TYPE_COMMENT", INT2FIX(XML_READER_TYPE_COMMENT));
1068
+ rb_define_const(cXMLReader, "TYPE_DOCUMENT", INT2FIX(XML_READER_TYPE_DOCUMENT));
1069
+ rb_define_const(cXMLReader, "TYPE_DOCUMENT_TYPE", INT2FIX(XML_READER_TYPE_DOCUMENT_TYPE));
1070
+ rb_define_const(cXMLReader, "TYPE_DOCUMENT_FRAGMENT", INT2FIX(XML_READER_TYPE_DOCUMENT_FRAGMENT));
1071
+ rb_define_const(cXMLReader, "TYPE_NOTATION", INT2FIX(XML_READER_TYPE_NOTATION));
1072
+ rb_define_const(cXMLReader, "TYPE_WHITESPACE", INT2FIX(XML_READER_TYPE_WHITESPACE));
1073
+ rb_define_const(cXMLReader, "TYPE_SIGNIFICANT_WHITESPACE", INT2FIX(XML_READER_TYPE_SIGNIFICANT_WHITESPACE));
1074
+ rb_define_const(cXMLReader, "TYPE_END_ELEMENT", INT2FIX(XML_READER_TYPE_END_ELEMENT));
1075
+ rb_define_const(cXMLReader, "TYPE_END_ENTITY", INT2FIX(XML_READER_TYPE_END_ENTITY));
1076
+ rb_define_const(cXMLReader, "TYPE_XML_DECLARATION", INT2FIX(XML_READER_TYPE_XML_DECLARATION));
1077
+
1078
+ /* Read states */
1079
+ rb_define_const(cXMLReader, "MODE_INITIAL", INT2FIX(XML_TEXTREADER_MODE_INITIAL));
1080
+ rb_define_const(cXMLReader, "MODE_INTERACTIVE", INT2FIX(XML_TEXTREADER_MODE_INTERACTIVE));
1081
+ rb_define_const(cXMLReader, "MODE_ERROR", INT2FIX(XML_TEXTREADER_MODE_ERROR));
1082
+ rb_define_const(cXMLReader, "MODE_EOF", INT2FIX(XML_TEXTREADER_MODE_EOF));
1083
+ rb_define_const(cXMLReader, "MODE_CLOSED", INT2FIX(XML_TEXTREADER_MODE_CLOSED));
1084
+ rb_define_const(cXMLReader, "MODE_READING", INT2FIX(XML_TEXTREADER_MODE_READING));
1085
+ }