mame-xmlparser 0.6.81.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (119) hide show
  1. data/Encodings/README.ja +18 -0
  2. data/Encodings/euc-jp.enc +0 -0
  3. data/Encodings/shift_jis.enc +0 -0
  4. data/History.txt +5 -0
  5. data/Manifest.txt +118 -0
  6. data/README +697 -0
  7. data/README.ja +789 -0
  8. data/README.txt +49 -0
  9. data/Rakefile +30 -0
  10. data/ext/encoding.h +91 -0
  11. data/ext/extconf.rb +58 -0
  12. data/ext/xmlparser.c +2231 -0
  13. data/lib/sax.rb +1 -0
  14. data/lib/saxdriver.rb +1 -0
  15. data/lib/wget.rb +47 -0
  16. data/lib/xml/dom/builder-ja.rb +58 -0
  17. data/lib/xml/dom/builder.rb +310 -0
  18. data/lib/xml/dom/core.rb +3276 -0
  19. data/lib/xml/dom/digest.rb +94 -0
  20. data/lib/xml/dom/visitor.rb +182 -0
  21. data/lib/xml/dom2/attr.rb +213 -0
  22. data/lib/xml/dom2/cdatasection.rb +76 -0
  23. data/lib/xml/dom2/characterdata.rb +177 -0
  24. data/lib/xml/dom2/comment.rb +81 -0
  25. data/lib/xml/dom2/core.rb +19 -0
  26. data/lib/xml/dom2/document.rb +317 -0
  27. data/lib/xml/dom2/documentfragment.rb +82 -0
  28. data/lib/xml/dom2/documenttype.rb +102 -0
  29. data/lib/xml/dom2/dombuilder.rb +277 -0
  30. data/lib/xml/dom2/dombuilderfilter.rb +12 -0
  31. data/lib/xml/dom2/domentityresolver.rb +13 -0
  32. data/lib/xml/dom2/domentityresolverimpl.rb +37 -0
  33. data/lib/xml/dom2/domexception.rb +95 -0
  34. data/lib/xml/dom2/domimplementation.rb +61 -0
  35. data/lib/xml/dom2/dominputsource.rb +29 -0
  36. data/lib/xml/dom2/element.rb +533 -0
  37. data/lib/xml/dom2/entity.rb +110 -0
  38. data/lib/xml/dom2/entityreference.rb +107 -0
  39. data/lib/xml/dom2/namednodemap.rb +138 -0
  40. data/lib/xml/dom2/node.rb +587 -0
  41. data/lib/xml/dom2/nodelist.rb +231 -0
  42. data/lib/xml/dom2/notation.rb +86 -0
  43. data/lib/xml/dom2/processinginstruction.rb +155 -0
  44. data/lib/xml/dom2/text.rb +128 -0
  45. data/lib/xml/dom2/xpath.rb +398 -0
  46. data/lib/xml/encoding-ja.rb +42 -0
  47. data/lib/xml/parser.rb +13 -0
  48. data/lib/xml/parserns.rb +236 -0
  49. data/lib/xml/sax.rb +353 -0
  50. data/lib/xml/saxdriver.rb +370 -0
  51. data/lib/xml/xpath.rb +3284 -0
  52. data/lib/xml/xpath.ry +2352 -0
  53. data/lib/xmldigest.rb +1 -0
  54. data/lib/xmlencoding-ja.rb +11 -0
  55. data/lib/xmltree.rb +1 -0
  56. data/lib/xmltreebuilder-ja.rb +9 -0
  57. data/lib/xmltreebuilder.rb +1 -0
  58. data/lib/xmltreevisitor.rb +1 -0
  59. data/samples/buildertest.rb +47 -0
  60. data/samples/buildertest2.rb +50 -0
  61. data/samples/digesttest.rb +26 -0
  62. data/samples/digesttest2.rb +192 -0
  63. data/samples/doctype.rb +40 -0
  64. data/samples/doctype.xml +21 -0
  65. data/samples/doctypei.rb +19 -0
  66. data/samples/document.dtd +77 -0
  67. data/samples/dom2/dom2test1.rb +7 -0
  68. data/samples/dom2/dom2test2.rb +10 -0
  69. data/samples/dom2/gtkxpath.rb +259 -0
  70. data/samples/dom2/test1.xml +16 -0
  71. data/samples/dom2/test2.xml +7 -0
  72. data/samples/dtd/ext1.dtd +4 -0
  73. data/samples/dtd/ext2.dtd +1 -0
  74. data/samples/dtd/extdtd.rb +34 -0
  75. data/samples/dtd/extdtd.xml +5 -0
  76. data/samples/expat-1.2/CVS/Entries +8 -0
  77. data/samples/expat-1.2/CVS/Repository +1 -0
  78. data/samples/expat-1.2/CVS/Root +1 -0
  79. data/samples/expat-1.2/ext.ent +2 -0
  80. data/samples/expat-1.2/exttest.rb +82 -0
  81. data/samples/expat-1.2/exttesti.rb +81 -0
  82. data/samples/expat-1.2/hoge.dtd +7 -0
  83. data/samples/expat-1.2/idattr.xml +8 -0
  84. data/samples/expat-1.2/idtest.rb +21 -0
  85. data/samples/expat-1.2/idtest.xml +12 -0
  86. data/samples/expat-1.2/xmlextparser.rb +39 -0
  87. data/samples/gtktree.rb +146 -0
  88. data/samples/idattrtest.rb +28 -0
  89. data/samples/index_euc.xml +72 -0
  90. data/samples/index_jis.xml +72 -0
  91. data/samples/index_noenc.xml +71 -0
  92. data/samples/index_sjis.xml +72 -0
  93. data/samples/index_u16.xml +0 -0
  94. data/samples/index_u8.xml +72 -0
  95. data/samples/my-html.rb +65 -0
  96. data/samples/namespaces/CVS/Entries +5 -0
  97. data/samples/namespaces/CVS/Repository +1 -0
  98. data/samples/namespaces/CVS/Root +1 -0
  99. data/samples/namespaces/namespace1.rb +29 -0
  100. data/samples/namespaces/namespace1.xml +10 -0
  101. data/samples/namespaces/namespace2.rb +41 -0
  102. data/samples/namespaces/namespace2.xml +12 -0
  103. data/samples/nstest.rb +21 -0
  104. data/samples/saxtest.rb +70 -0
  105. data/samples/test/featurelist.rb +11 -0
  106. data/samples/test/skippedentity.rb +48 -0
  107. data/samples/test/useforeigndtd.rb +42 -0
  108. data/samples/treetest.rb +14 -0
  109. data/samples/visitor.rb +29 -0
  110. data/samples/visitortest.rb +36 -0
  111. data/samples/writer.rb +43 -0
  112. data/samples/xmlcheck.rb +40 -0
  113. data/samples/xmlcomments.rb +30 -0
  114. data/samples/xmlevent.rb +76 -0
  115. data/samples/xmliter.rb +57 -0
  116. data/samples/xmlstats.rb +167 -0
  117. data/samples/xpointer.rb +233 -0
  118. data/samples/xpointertest.rb +23 -0
  119. metadata +185 -0
@@ -0,0 +1,49 @@
1
+ = xmlparser
2
+
3
+ * http://www.yoshidam.net/Ruby.html#xmlparser
4
+ * http://rubyforge.org/projects/xmlparser/
5
+
6
+ == DESCRIPTION:
7
+
8
+ Expat (XML Parser Toolkit) Module for Ruby
9
+
10
+ This is a module to access to James Clark's XML Parser
11
+ Toolkit "expat" (http://www.jclark.com/xml/expat.html) from
12
+ Ruby.
13
+
14
+ This is a modified version of xmlparser:
15
+
16
+ - works on Ruby 1.9
17
+ - depends on hoe instead of mkrf
18
+
19
+ == FEATURES/PROBLEMS:
20
+
21
+ see README or README.ja.
22
+
23
+ == SYNOPSIS:
24
+
25
+ see README or README.ja.
26
+
27
+ == REQUIREMENTS:
28
+
29
+ None
30
+
31
+ == INSTALL:
32
+
33
+ * gem install mame-xmlparser
34
+
35
+ == LICENSE:
36
+
37
+ This extension module is copyrighted free software by
38
+ Yoshida Masato.
39
+
40
+ You can redistribute it and/or modify it under the same term as
41
+ Ruby or expat.
42
+
43
+ encoding.h and the functions of encoding map are part of
44
+ XML::Parser for Perl.
45
+
46
+ Copyright (c) 1998 Larry Wall and Clark Cooper.
47
+ All rights reserved.
48
+ This program is free software; you can redistribute it and/or modify it
49
+ under the same terms as Perl itself.
@@ -0,0 +1,30 @@
1
+ # -*- ruby -*-
2
+
3
+ require 'rubygems'
4
+ require 'rbconfig'
5
+ require 'hoe'
6
+
7
+ EXT = "ext/xmlparser.#{RbConfig::CONFIG["DLEXT"]}"
8
+
9
+ class XMLParser
10
+ VERSION = "0.6.81.1"
11
+ end
12
+
13
+ Hoe.spec('xmlparser') do
14
+ self.version = XMLParser::VERSION
15
+ self.rubyforge_name = 'xmlparser' # if different than lowercase project name
16
+ self.developer('Yoshida Mataso with Jeff Hodges with Yusuke Endoh', 'mame@tsg.ne.jp')
17
+ self.spec_extras[:extensions] = "ext/extconf.rb"
18
+ self.clean_globs << EXT << "ext/*.o" << "ext/Makefile" << "ext/mkmf.log"
19
+ end
20
+
21
+ task :test => EXT
22
+
23
+ file EXT => ["ext/extconf.rb", "ext/xmlparser.c"] do
24
+ Dir.chdir "ext" do
25
+ ruby "extconf.rb"
26
+ sh "make"
27
+ end
28
+ end
29
+
30
+ # vim: syntax=ruby
@@ -0,0 +1,91 @@
1
+ /*****************************************************************
2
+ ** encoding.h
3
+ **
4
+ ** Copyright 1998 Clark Cooper
5
+ ** All rights reserved.
6
+ **
7
+ ** This program is free software; you can redistribute it and/or
8
+ ** modify it under the same terms as Perl itself.
9
+ */
10
+
11
+ #ifndef ENCODING_H
12
+ #define ENCODING_H 1
13
+
14
+ #define ENCMAP_MAGIC 0xfeebface
15
+
16
+ typedef struct prefixmap {
17
+ unsigned char min;
18
+ unsigned char len; /* 0 => 256 */
19
+ unsigned short bmap_start;
20
+ unsigned char ispfx[32];
21
+ unsigned char ischar[32];
22
+ } PrefixMap;
23
+
24
+ typedef struct encinf
25
+ {
26
+ unsigned short prefixes_size;
27
+ unsigned short bytemap_size;
28
+ int firstmap[256];
29
+ PrefixMap *prefixes;
30
+ unsigned short *bytemap;
31
+ } Encinfo;
32
+
33
+ typedef struct encmaphdr
34
+ {
35
+ unsigned int magic;
36
+ char name[40];
37
+ unsigned short pfsize;
38
+ unsigned short bmsize;
39
+ int map[256];
40
+ } Encmap_Header;
41
+
42
+ /*================================================================
43
+ ** Structure of Encoding map binary encoding
44
+ **
45
+ ** Note that all shorts and ints are in network order,
46
+ ** so when packing or unpacking with perl, use 'n' and 'N' respectively.
47
+ ** In C, use the htonl family of functions.
48
+ **
49
+ ** The basic structure is:
50
+ **
51
+ ** _______________________
52
+ ** |Header (including map expat needs for 1st byte)
53
+ ** |PrefixMap * pfsize
54
+ ** | This section isn't included for single-byte encodings.
55
+ ** | For multiple byte encodings, when a byte represents a prefix
56
+ ** | then it indexes into this vector instead of mapping to a
57
+ ** | Unicode character. The PrefixMap type is declared above. The
58
+ ** | ispfx and ischar fields are bitvectors indicating whether
59
+ ** | the byte being mapped is a prefix or character respectively.
60
+ ** | If neither is set, then the character is not mapped to Unicode.
61
+ ** |
62
+ ** | The min field is the 1st byte mapped for this prefix; the
63
+ ** | len field is the number of bytes mapped; and bmap_start is
64
+ ** | the starting index of the map for this prefix in the overall
65
+ ** | map (next section).
66
+ ** |unsigned short * bmsize
67
+ ** | This section also is omitted for single-byte encodings.
68
+ ** | Each short is either a Unicode scalar or an index into the
69
+ ** | PrefixMap vector.
70
+ **
71
+ ** The header for these files is declared above as the Encmap_Header type.
72
+ ** The magic field is a magic number which should match the ENCMAP_MAGIC
73
+ ** macro above. The next 40 bytes stores IANA registered name for the
74
+ ** encoding. The pfsize field holds the number of PrefixMaps, which should
75
+ ** be zero for single byte encodings. The bmsize field holds the number of
76
+ ** shorts used for the overall map.
77
+ **
78
+ ** The map field contains either the Unicode scalar encoded by the 1st byte
79
+ ** or -n where n is the number of bytes that such a 1st byte implies (Expat
80
+ ** requires that the number of bytes to encode a character is indicated by
81
+ ** the 1st byte) or -1 if the byte doesn't map to any Unicode character.
82
+ **
83
+ ** If the encoding is a multiple byte encoding, then there will be PrefixMap
84
+ ** and character map sections. The 1st PrefixMap (index 0), covers a range
85
+ ** of bytes that includes all 1st byte prefixes.
86
+ **
87
+ ** Look at convert_to_unicode in Expat.xs to see how this data structure
88
+ ** is used.
89
+ */
90
+
91
+ #endif /* ndef ENCODING_H */
@@ -0,0 +1,58 @@
1
+ #
2
+ # ruby extconf.rb
3
+ # --with-perl-enc-map[=/path/to/enc-map]
4
+ # --with-expat-dir=/path/to/expat
5
+ # --with-expat-lib=/path/to/expat/lib
6
+ # --with-expat-include=/path/to/expat/include
7
+ #
8
+ require 'mkmf'
9
+
10
+ cwd=`pwd`.chomp!
11
+ perl= ENV['PERL'] || 'perl'
12
+
13
+ ## Encoding maps may be stored in $perl_archlib/XML/Parser/Encodins/
14
+ #perl_archlib = '/usr/lib/perl5/site_perl/5.005/i586-linux'
15
+ #perl_archlib = '/usr/local/lib'
16
+ perl_archlib = `#{perl} -e 'use Config; print $Config{"archlib"}'`
17
+ xml_enc_path = with_config("perl-enc-map")
18
+ if xml_enc_path == true
19
+ xml_enc_path = perl_archlib + "/XML/Parser/Encodings"
20
+ end
21
+
22
+ ##$CFLAGS="-I#{cwd}/expat/xmlparse -I#{cwd}/expat/xmltok" +
23
+ ## ' -DXML_ENC_PATH=getenv\(\"XML_ENC_PATH\"\)' +
24
+ ## " -DNEW_EXPAT"
25
+ #$CFLAGS = "-I#{cwd}/expat/xmlparse -I#{cwd}/expat/xmltok"
26
+ #$LDFLAGS = "-L#{cwd}/expat/xmlparse -Wl,-rpath,/usr/local/lib"
27
+ #$LDFLAGS = "-L#{cwd}/expat/xmlparse"
28
+ dir_config("expat")
29
+ #dir_config("xmltok")
30
+ #dir_config("xmlparse")
31
+ if xml_enc_path
32
+ $CFLAGS += " -DXML_ENC_PATH=\\\"#{xml_enc_path}\\\""
33
+ end
34
+
35
+ #if have_header("xmlparse.h") || have_header("expat.h")
36
+ if have_header("expat.h") || have_header("xmlparse.h")
37
+ if have_library("expat", "XML_ParserCreate") ||
38
+ have_library("xmltok", "XML_ParserCreate")
39
+ if have_func("XML_SetNotStandaloneHandler")
40
+ $CFLAGS += " -DNEW_EXPAT"
41
+ end
42
+ if have_func("XML_SetParamEntityParsing")
43
+ $CFLAGS += " -DXML_DTD"
44
+ end
45
+ # if have_func("XML_SetExternalParsedEntityDeclHandler")
46
+ # $CFLAGS += " -DEXPAT_1_2"
47
+ # end
48
+ have_func("XML_SetDoctypeDeclHandler")
49
+ have_func("XML_ParserReset")
50
+ have_func("XML_SetSkippedEntityHandler")
51
+ have_func("XML_GetFeatureList")
52
+ have_func("XML_UseForeignDTD")
53
+ have_func("XML_GetIdAttributeIndex")
54
+ have_library("socket", "ntohl")
55
+ have_library("wsock32") if RUBY_PLATFORM =~ /mswin32|mingw/
56
+ create_makefile("xmlparser")
57
+ end
58
+ end
@@ -0,0 +1,2231 @@
1
+ /*
2
+ * Expat (XML Parser Toolkit) wrapper for Ruby
3
+ * Feb 16, 2004 yoshidam version 0.6.8 taint output string
4
+ * Feb 16, 2004 yoshidam version 0.6.7 fix buffer overflow
5
+ * Mar 11, 2003 yoshidam version 0.6.6 fix skippedEntity handler
6
+ * Sep 20, 2002 yoshidam version 0.6.5 fix reset method
7
+ * Apr 4, 2002 yoshidam version 0.6.3 change event code values
8
+ * Oct 10, 2000 yoshidam version 0.6.1 support expat-1.2
9
+ * Oct 6, 2000 yoshidam version 0.6.0 support expat-1.95.0
10
+ * Jun 28, 1999 yoshidam version 0.5.18 define initialize for Ruby 1.5
11
+ * Jun 28, 1999 yoshidam version 0.5.15 support start/endDoctypeDecl
12
+ * Jun 28, 1999 yoshidam version 0.5.14 support setParamEntityParsing
13
+ * Apr 28, 1999 yoshidam version 0.5.11 support notStandalone
14
+ * Mar 29, 1998 yoshidam version 0.5.9 optimize for Ruby 1.3
15
+ * Mar 8, 1998 yoshidam version 0.5.7 support start/endNamespaceDecl
16
+ * Jan 14, 1998 yoshidam version 0.5.4 support start/endCdataSection
17
+ * Jan 10, 1998 yoshidam version 0.5.3 support encoding map
18
+ * Nov 24, 1998 yoshidam version 0.5.0 support TEST version of expat
19
+ * Nov 5, 1998 yoshidam version 0.4.18 mIDs are initialized in Init_xmlparser
20
+ * Oct 28, 1998 yoshidam version 0.4.17 mIDs are stored into static vars
21
+ * Oct 13, 1998 yoshidam version 0.4.12 debug and speed up myEncodingConv
22
+ * Oct 7, 1998 yoshidam version 0.4.11 hold internal object into ivar
23
+ * Sep 18, 1998 yoshidam version 0.4.6
24
+ * Sep 8, 1998 yoshidam version 0.4.4
25
+ * Sep 3, 1998 yoshidam version 0.4.3
26
+ * Sep 1, 1998 yoshidam version 0.4.2
27
+ * Aug 28, 1998 yoshidam version 0.4.1
28
+ * Aug 22, 1998 yoshidam version 0.4.0
29
+ * Jul 6, 1998 yoshidam version 0.2
30
+ * Jun 30, 1998 yoshidam version 0.1
31
+ *
32
+ * XML_ENC_PATH: path of encoding map for Perl
33
+ * HAVE_XML_USEFOREIGNDTD: expat 1.95.5
34
+ * HAVE_XML_GETFEATURELIST: expat 1.95.5
35
+ * HAVE_XML_SETSKIPPEDENTITYHANDLER: expat 1.95.4
36
+ * HAVE_XML_PARSERRESET: expat 1.95.3
37
+ * HAVE_EXPAT_H: expat 1.95.0
38
+ * HAVE_XML_SETDOCTYPEDECLHANDLER: expat 19990728
39
+ * XML_DTD: expat 19990626
40
+ * NEW_EXPAT: expat 1.1
41
+ */
42
+
43
+ #include "ruby.h"
44
+ #ifdef HAVE_RUBY_IO_H
45
+ #include "ruby/io.h"
46
+ #else
47
+ #include "rubyio.h"
48
+ #endif
49
+ #include <stdio.h>
50
+ #include <ctype.h>
51
+ #ifdef HAVE_EXPAT_H
52
+ # include "expat.h"
53
+ #else
54
+ # include "xmlparse.h"
55
+ #endif
56
+ #ifdef XML_ENC_PATH
57
+ # include <limits.h>
58
+ # include <sys/stat.h>
59
+ # include "encoding.h"
60
+ # ifndef PATH_MAX
61
+ # define PATH_MAX 256
62
+ # endif
63
+ #endif
64
+
65
+ static VALUE eXMLParserError;
66
+ static VALUE cXMLParser;
67
+ static VALUE cXMLEncoding;
68
+ static ID id_map;
69
+ static ID id_startElementHandler;
70
+ static ID id_endElementHandler;
71
+ static ID id_characterDataHandler;
72
+ static ID id_processingInstructionHandler;
73
+ static ID id_defaultHandler;
74
+ static ID id_defaultExpandHandler;
75
+ static ID id_unparsedEntityDeclHandler;
76
+ static ID id_notationDeclHandler;
77
+ static ID id_externalEntityRefHandler;
78
+ static ID id_unknownEncoding;
79
+ static ID id_convert;
80
+ #ifdef NEW_EXPAT
81
+ static ID id_commentHandler;
82
+ static ID id_startCdataSectionHandler;
83
+ static ID id_endCdataSectionHandler;
84
+ static ID id_startNamespaceDeclHandler;
85
+ static ID id_endNamespaceDeclHandler;
86
+ static ID id_notStandaloneHandler;
87
+ #endif
88
+ #ifdef HAVE_XML_SETDOCTYPEDECLHANDLER
89
+ static ID id_startDoctypeDeclHandler;
90
+ static ID id_endDoctypeDeclHandler;
91
+ #endif
92
+ #ifdef HAVE_EXPAT_H
93
+ static ID id_elementDeclHandler;
94
+ static ID id_attlistDeclHandler;
95
+ static ID id_xmlDeclHandler;
96
+ static ID id_entityDeclHandler;
97
+ #endif
98
+ #if 0
99
+ static ID id_externalParsedEntityDeclHandler;
100
+ static ID id_internalParsedEntityDeclHandler;
101
+ #endif
102
+ #ifdef HAVE_XML_SETSKIPPEDENTITYHANDLER
103
+ static ID id_skippedEntityHandler;
104
+ #endif
105
+
106
+ #define GET_PARSER(obj, parser) \
107
+ Data_Get_Struct(obj, XMLParser, parser)
108
+
109
+ typedef struct _XMLParser {
110
+ XML_Parser parser;
111
+ int iterator;
112
+ int defaultCurrent;
113
+ #ifdef NEW_EXPAT
114
+ const XML_Char** lastAttrs;
115
+ #endif
116
+ int tainted;
117
+ VALUE parent;
118
+ char* context;
119
+ } XMLParser;
120
+
121
+ static VALUE symDEFAULT;
122
+ static VALUE symSTART_ELEM;
123
+ static VALUE symEND_ELEM;
124
+ static VALUE symCDATA;
125
+ static VALUE symPI;
126
+ static VALUE symUNPARSED_ENTITY_DECL;
127
+ static VALUE symNOTATION_DECL;
128
+ static VALUE symEXTERNAL_ENTITY_REF;
129
+ #ifdef NEW_EXPAT
130
+ static VALUE symCOMMENT;
131
+ static VALUE symSTART_CDATA;
132
+ static VALUE symEND_CDATA;
133
+ static VALUE symSTART_NAMESPACE_DECL;
134
+ static VALUE symEND_NAMESPACE_DECL;
135
+ #endif
136
+ #ifdef HAVE_XML_SETDOCTYPEDECLHANDLER
137
+ static VALUE symSTART_DOCTYPE_DECL;
138
+ static VALUE symEND_DOCTYPE_DECL;
139
+ #endif
140
+ #ifdef HAVE_EXPAT_H
141
+ static VALUE symELEMENT_DECL;
142
+ static VALUE symATTLIST_DECL;
143
+ static VALUE symXML_DECL;
144
+ static VALUE symENTITY_DECL;
145
+ #endif
146
+ #if 0
147
+ static VALUE symEXTERNAL_PARSED_ENTITY_DECL;
148
+ static VALUE symINTERNAL_PARSED_ENTITY_DECL;
149
+ #endif
150
+ #if 0
151
+ static VALUE symUNKNOWN_ENCODING;
152
+ #endif
153
+ #ifdef HAVE_XML_SETSKIPPEDENTITYHANDLER
154
+ static VALUE symSKIPPED_ENTITY;
155
+ #endif
156
+
157
+ /* destructor */
158
+ static void
159
+ XMLParser_free(XMLParser* parser)
160
+ {
161
+ /* fprintf(stderr, "Delete XMLParser: %p->%p\n", parser, parser->parser);*/
162
+ if (parser->parser) {
163
+ XML_ParserFree(parser->parser);
164
+ parser->parser = NULL;
165
+ }
166
+ free(parser);
167
+ }
168
+
169
+ static void
170
+ XMLParser_mark(XMLParser* parser)
171
+ {
172
+ /* fprintf(stderr, "Mark XMLParser: %p->%p\n", parser, parser->parser);*/
173
+ if (!NIL_P(parser->parent)) {
174
+ XMLParser* parent;
175
+ GET_PARSER(parser->parent, parent);
176
+ rb_gc_mark(parser->parent);
177
+ }
178
+ }
179
+
180
+ static void
181
+ taintParser(XMLParser* parser) {
182
+ parser->tainted |= 1;
183
+ if (!NIL_P(parser->parent) && !parser->context) {
184
+ XMLParser* parent;
185
+ GET_PARSER(parser->parent, parent);
186
+ taintParser(parent);
187
+ }
188
+ }
189
+
190
+ inline static VALUE
191
+ taintObject(XMLParser* parser, VALUE obj) {
192
+ if (parser->tainted)
193
+ OBJ_TAINT(obj);
194
+ return obj;
195
+ }
196
+ #define TO_(o) (taintObject(parser, o))
197
+
198
+ inline static VALUE
199
+ freezeObject(VALUE obj) {
200
+ OBJ_FREEZE(obj);
201
+ return obj;
202
+ }
203
+ #define FO_(o) (freezeObject(o))
204
+
205
+
206
+ /* Event handlers for iterator */
207
+ static void
208
+ iterStartElementHandler(void *recv,
209
+ const XML_Char *name, const XML_Char **atts)
210
+ {
211
+ XMLParser* parser;
212
+ VALUE attrhash;
213
+
214
+ GET_PARSER(recv, parser);
215
+ #ifdef NEW_EXPAT
216
+ parser->lastAttrs = atts;
217
+ #endif
218
+ attrhash = rb_hash_new();
219
+ while (*atts) {
220
+ const char* key = *atts++;
221
+ const char* val = *atts++;
222
+ rb_hash_aset(attrhash,
223
+ FO_(TO_(rb_str_new2((char*)key))),
224
+ TO_(rb_str_new2((char*)val)));
225
+ }
226
+
227
+ rb_yield(rb_ary_new3(4, symSTART_ELEM,
228
+ TO_(rb_str_new2((char*)name)), attrhash, recv));
229
+ if (parser->defaultCurrent) {
230
+ parser->defaultCurrent = 0;
231
+ XML_DefaultCurrent(parser->parser);
232
+ }
233
+ }
234
+
235
+ static void
236
+ iterEndElementHandler(void *recv,
237
+ const XML_Char *name)
238
+ {
239
+ XMLParser* parser;
240
+ GET_PARSER(recv, parser);
241
+ rb_yield(rb_ary_new3(4, symEND_ELEM,
242
+ TO_(rb_str_new2((char*)name)), Qnil, recv));
243
+ if (parser->defaultCurrent) {
244
+ parser->defaultCurrent = 0;
245
+ XML_DefaultCurrent(parser->parser);
246
+ }
247
+ }
248
+
249
+ static void
250
+ iterCharacterDataHandler(void *recv,
251
+ const XML_Char *s,
252
+ int len)
253
+ {
254
+ XMLParser* parser;
255
+ GET_PARSER(recv, parser);
256
+ rb_yield(rb_ary_new3(4, symCDATA,
257
+ Qnil, TO_(rb_str_new((char*)s, len)), recv));
258
+ if (parser->defaultCurrent) {
259
+ parser->defaultCurrent = 0;
260
+ XML_DefaultCurrent(parser->parser);
261
+ }
262
+ }
263
+
264
+ static void
265
+ iterProcessingInstructionHandler(void *recv,
266
+ const XML_Char *target,
267
+ const XML_Char *data)
268
+ {
269
+ XMLParser* parser;
270
+ GET_PARSER(recv, parser);
271
+ rb_yield(rb_ary_new3(4, symPI,
272
+ TO_(rb_str_new2((char*)target)),
273
+ TO_(rb_str_new2((char*)data)), recv));
274
+ if (parser->defaultCurrent) {
275
+ parser->defaultCurrent = 0;
276
+ XML_DefaultCurrent(parser->parser);
277
+ }
278
+ }
279
+
280
+ static void
281
+ iterDefaultHandler(void *recv,
282
+ const XML_Char *s,
283
+ int len)
284
+ {
285
+ XMLParser* parser;
286
+ GET_PARSER(recv, parser);
287
+ rb_yield(rb_ary_new3(4, symDEFAULT,
288
+ Qnil, TO_(rb_str_new((char*)s, len)), recv));
289
+ if (parser->defaultCurrent) {
290
+ parser->defaultCurrent = 0;
291
+ /* XML_DefaultCurrent shoould not call in defaultHandler */
292
+ /* XML_DefaultCurrent(parser->parser); */
293
+ }
294
+ }
295
+
296
+ void
297
+ iterUnparsedEntityDeclHandler(void *recv,
298
+ const XML_Char *entityName,
299
+ const XML_Char *base,
300
+ const XML_Char *systemId,
301
+ const XML_Char *publicId,
302
+ const XML_Char *notationName)
303
+ {
304
+ XMLParser* parser;
305
+ VALUE valary;
306
+
307
+ GET_PARSER(recv, parser);
308
+ valary = rb_ary_new3(4, (base ? TO_(rb_str_new2((char*)base)) : Qnil),
309
+ TO_(rb_str_new2((char*)systemId)),
310
+ (publicId ? TO_(rb_str_new2((char*)publicId)) : Qnil),
311
+ TO_(rb_str_new2((char*)notationName)));
312
+ rb_yield(rb_ary_new3(4, symUNPARSED_ENTITY_DECL,
313
+ TO_(rb_str_new2((char*)entityName)),
314
+ valary, recv));
315
+ if (parser->defaultCurrent) {
316
+ parser->defaultCurrent = 0;
317
+ XML_DefaultCurrent(parser->parser);
318
+ }
319
+ }
320
+
321
+ void
322
+ iterNotationDeclHandler(void *recv,
323
+ const XML_Char *notationName,
324
+ const XML_Char *base,
325
+ const XML_Char *systemId,
326
+ const XML_Char *publicId)
327
+ {
328
+ XMLParser* parser;
329
+ VALUE valary;
330
+
331
+ GET_PARSER(recv, parser);
332
+ valary = rb_ary_new3(3,
333
+ (base ? TO_(rb_str_new2((char*)base)) : Qnil),
334
+ (systemId ? TO_(rb_str_new2((char*)systemId)) : Qnil),
335
+ (publicId ? TO_(rb_str_new2((char*)publicId)) : Qnil));
336
+ rb_yield(rb_ary_new3(4, symNOTATION_DECL,
337
+ TO_(rb_str_new2((char*)notationName)),
338
+ valary, recv));
339
+ if (parser->defaultCurrent) {
340
+ parser->defaultCurrent = 0;
341
+ XML_DefaultCurrent(parser->parser);
342
+ }
343
+ }
344
+
345
+ int
346
+ iterExternalEntityRefHandler(XML_Parser xmlparser,
347
+ const XML_Char *context,
348
+ const XML_Char *base,
349
+ const XML_Char *systemId,
350
+ const XML_Char *publicId)
351
+ {
352
+ XMLParser* parser;
353
+ VALUE recv;
354
+ VALUE valary;
355
+ VALUE ret;
356
+
357
+ recv = (VALUE)XML_GetUserData(xmlparser);
358
+ GET_PARSER(recv, parser);
359
+ valary = rb_ary_new3(3,
360
+ (base ? TO_(rb_str_new2((char*)base)) : Qnil),
361
+ (systemId ? TO_(rb_str_new2((char*)systemId)) : Qnil),
362
+ (publicId ? TO_(rb_str_new2((char*)publicId)) : Qnil));
363
+ ret = rb_yield(rb_ary_new3(4, symEXTERNAL_ENTITY_REF,
364
+ (context ? TO_(rb_str_new2((char*)context)) : Qnil),
365
+ valary, recv));
366
+ if (parser->defaultCurrent) {
367
+ parser->defaultCurrent = 0;
368
+ XML_DefaultCurrent(parser->parser);
369
+ }
370
+ /* The error status in this iterator block should be returned
371
+ by the exception. */
372
+ return 1;
373
+ }
374
+
375
+ #ifdef NEW_EXPAT
376
+ static void
377
+ iterCommentHandler(void *recv,
378
+ const XML_Char *s)
379
+ {
380
+ XMLParser* parser;
381
+ GET_PARSER(recv, parser);
382
+ rb_yield(rb_ary_new3(4, symCOMMENT,
383
+ Qnil, TO_(rb_str_new2((char*)s)), recv));
384
+ if (parser->defaultCurrent) {
385
+ parser->defaultCurrent = 0;
386
+ XML_DefaultCurrent(parser->parser);
387
+ }
388
+ }
389
+
390
+ static void
391
+ iterStartCdataSectionHandler(void *recv)
392
+ {
393
+ XMLParser* parser;
394
+ GET_PARSER(recv, parser);
395
+ rb_yield(rb_ary_new3(4, symSTART_CDATA, Qnil, Qnil, recv));
396
+ if (parser->defaultCurrent) {
397
+ parser->defaultCurrent = 0;
398
+ XML_DefaultCurrent(parser->parser);
399
+ }
400
+ }
401
+
402
+ static void
403
+ iterEndCdataSectionHandler(void *recv)
404
+ {
405
+ XMLParser* parser;
406
+ GET_PARSER(recv, parser);
407
+ rb_yield(rb_ary_new3(4, symEND_CDATA, Qnil, Qnil, recv));
408
+ if (parser->defaultCurrent) {
409
+ parser->defaultCurrent = 0;
410
+ XML_DefaultCurrent(parser->parser);
411
+ }
412
+ }
413
+
414
+ static void
415
+ iterStartNamespaceDeclHandler(void *recv,
416
+ const XML_Char *prefix,
417
+ const XML_Char *uri)
418
+ {
419
+ XMLParser* parser;
420
+ GET_PARSER(recv, parser);
421
+ rb_yield(rb_ary_new3(4, symSTART_NAMESPACE_DECL,
422
+ (prefix ? TO_(rb_str_new2((char*)prefix)) : Qnil),
423
+ (uri ? TO_(rb_str_new2((char*)uri)) : Qnil), recv));
424
+ if (parser->defaultCurrent) {
425
+ parser->defaultCurrent = 0;
426
+ XML_DefaultCurrent(parser->parser);
427
+ }
428
+ }
429
+
430
+ static void
431
+ iterEndNamespaceDeclHandler(void *recv,
432
+ const XML_Char *prefix)
433
+ {
434
+ XMLParser* parser;
435
+ GET_PARSER(recv, parser);
436
+ rb_yield(rb_ary_new3(4, symEND_NAMESPACE_DECL,
437
+ (prefix ? TO_(rb_str_new2((char*)prefix)) : Qnil),
438
+ Qnil, recv));
439
+ if (parser->defaultCurrent) {
440
+ parser->defaultCurrent = 0;
441
+ XML_DefaultCurrent(parser->parser);
442
+ }
443
+ }
444
+ #endif
445
+
446
+ #ifdef HAVE_XML_SETPARAMENTITYPARSING
447
+ static void
448
+ #ifdef HAVE_EXPAT_H
449
+ iterStartDoctypeDeclHandler(void *recv,
450
+ const XML_Char *doctypeName,
451
+ const XML_Char *sysid,
452
+ const XML_Char *pubid,
453
+ int has_internal_subset)
454
+ #else
455
+ iterStartDoctypeDeclHandler(void *recv,
456
+ const XML_Char *doctypeName)
457
+ #endif
458
+ {
459
+ XMLParser* parser;
460
+ VALUE valary = Qnil;
461
+
462
+ GET_PARSER(recv, parser);
463
+ #ifdef HAVE_EXPAT_H
464
+ valary = rb_ary_new3(3,
465
+ (sysid ? TO_(rb_str_new2((char*)sysid)) : Qnil),
466
+ (pubid ? TO_(rb_str_new2((char*)pubid)) : Qnil),
467
+ (has_internal_subset ? Qtrue : Qfalse));
468
+ #endif
469
+ rb_yield(rb_ary_new3(4, symSTART_DOCTYPE_DECL,
470
+ TO_(rb_str_new2((char*)doctypeName)),
471
+ valary, recv));
472
+ if (parser->defaultCurrent) {
473
+ parser->defaultCurrent = 0;
474
+ XML_DefaultCurrent(parser->parser);
475
+ }
476
+ }
477
+
478
+ static void
479
+ iterEndDoctypeDeclHandler(void *recv)
480
+ {
481
+ XMLParser* parser;
482
+ GET_PARSER(recv, parser);
483
+ rb_yield(rb_ary_new3(4, symEND_DOCTYPE_DECL,
484
+ Qnil,
485
+ Qnil, recv));
486
+ if (parser->defaultCurrent) {
487
+ parser->defaultCurrent = 0;
488
+ XML_DefaultCurrent(parser->parser);
489
+ }
490
+ }
491
+ #endif
492
+
493
+
494
+ #ifdef HAVE_EXPAT_H
495
+
496
+ static VALUE
497
+ makeContentArray(XMLParser* parser, XML_Content* model)
498
+ {
499
+ static const char* content_type_name[] = {
500
+ NULL, "EMPTY", "ANY", "MIXED", "NAME", "CHOICE", "SEQ"
501
+ };
502
+ static const char* content_quant_name[] = {
503
+ "", "?", "*", "+"
504
+ };
505
+ int i;
506
+ VALUE children = Qnil;
507
+ const char* type_name = content_type_name[model->type];
508
+ const char* quant_name = content_quant_name[model->quant];
509
+ VALUE ret = rb_ary_new3(3,
510
+ TO_(rb_str_new2((char*)type_name)),
511
+ TO_(rb_str_new2((char*)quant_name)),
512
+ (model->name ? TO_(rb_str_new2((char*)model->name)) :
513
+ Qnil));
514
+ if (model->numchildren > 0) {
515
+ children = rb_ary_new();
516
+ for (i =0; i < model->numchildren; i++) {
517
+ VALUE child = makeContentArray(parser, model->children + i);
518
+ rb_ary_push(children, child);
519
+ }
520
+ }
521
+ rb_ary_push(ret, children);
522
+ return ret;
523
+ }
524
+
525
+
526
+
527
+ static void
528
+ iterElementDeclHandler(void *recv,
529
+ const XML_Char *name,
530
+ XML_Content *model)
531
+ {
532
+ XMLParser* parser;
533
+ VALUE content;
534
+ GET_PARSER(recv, parser);
535
+ content = makeContentArray(parser, model);
536
+ rb_yield(rb_ary_new3(4, symELEMENT_DECL,
537
+ TO_(rb_str_new2(name)),
538
+ content, recv));
539
+ if (parser->defaultCurrent) {
540
+ parser->defaultCurrent = 0;
541
+ XML_DefaultCurrent(parser->parser);
542
+ }
543
+ }
544
+
545
+ static void
546
+ iterAttlistDeclHandler (void *recv,
547
+ const XML_Char *elname,
548
+ const XML_Char *attname,
549
+ const XML_Char *att_type,
550
+ const XML_Char *dflt,
551
+ int isrequired)
552
+ {
553
+ XMLParser* parser;
554
+ VALUE valary;
555
+
556
+ GET_PARSER(recv, parser);
557
+ valary = rb_ary_new3(4,
558
+ TO_(rb_str_new2((char*)attname)),
559
+ TO_(rb_str_new2((char*)att_type)),
560
+ (dflt ? TO_(rb_str_new2((char*)dflt)) : Qnil),
561
+ (isrequired ? Qtrue : Qfalse));
562
+ rb_yield(rb_ary_new3(4, symATTLIST_DECL,
563
+ TO_(rb_str_new2(elname)),
564
+ valary, recv));
565
+ if (parser->defaultCurrent) {
566
+ parser->defaultCurrent = 0;
567
+ XML_DefaultCurrent(parser->parser);
568
+ }
569
+ }
570
+
571
+ static void
572
+ iterXmlDeclHandler (void *recv,
573
+ const XML_Char *version,
574
+ const XML_Char *encoding,
575
+ int standalone)
576
+ {
577
+ XMLParser* parser;
578
+ VALUE valary;
579
+
580
+ GET_PARSER(recv, parser);
581
+ valary = rb_ary_new3(3,
582
+ (version ? TO_(rb_str_new2(version)) : Qnil),
583
+ (encoding ? TO_(rb_str_new2((char*)encoding)) : Qnil),
584
+ INT2FIX(standalone));
585
+ rb_yield(rb_ary_new3(4, symXML_DECL,
586
+ Qnil,
587
+ valary, recv));
588
+ if (parser->defaultCurrent) {
589
+ parser->defaultCurrent = 0;
590
+ XML_DefaultCurrent(parser->parser);
591
+ }
592
+ }
593
+
594
+ static void
595
+ iterEntityDeclHandler (void *recv,
596
+ const XML_Char *entityName,
597
+ int is_parameter_entity,
598
+ const XML_Char *value,
599
+ int value_length,
600
+ const XML_Char *base,
601
+ const XML_Char *systemId,
602
+ const XML_Char *publicId,
603
+ const XML_Char *notationName)
604
+ {
605
+ XMLParser* parser;
606
+ VALUE valary;
607
+
608
+ GET_PARSER(recv, parser);
609
+ valary = rb_ary_new3(6,
610
+ (is_parameter_entity ? Qtrue : Qfalse),
611
+ TO_(rb_str_new((char*)value, value_length)),
612
+ (base ? TO_(rb_str_new2((char*)base)) : Qnil),
613
+ (systemId ? TO_(rb_str_new2((char*)systemId)) : Qnil),
614
+ (publicId ? TO_(rb_str_new2((char*)publicId)) : Qnil),
615
+ (notationName ? TO_(rb_str_new2((char*)notationName))
616
+ : Qnil));
617
+ rb_yield(rb_ary_new3(4, symENTITY_DECL,
618
+ TO_(rb_str_new2(entityName)),
619
+ valary, recv));
620
+ if (parser->defaultCurrent) {
621
+ parser->defaultCurrent = 0;
622
+ XML_DefaultCurrent(parser->parser);
623
+ }
624
+ }
625
+
626
+ #endif
627
+
628
+ #if 0
629
+ static void
630
+ iterExternalParsedEntityDeclHandler(void *recv,
631
+ const XML_Char *entityName,
632
+ const XML_Char *base,
633
+ const XML_Char *systemId,
634
+ const XML_Char *publicId)
635
+ {
636
+ XMLParser* parser;
637
+ VALUE valary;
638
+
639
+ GET_PARSER(recv, parser);
640
+ valary = rb_ary_new3(3, (base ? TO_(rb_str_new2((char*)base)) : Qnil),
641
+ TO_(rb_str_new2((char*)systemId)),
642
+ (publicId ? TO_(rb_str_new2((char*)publicId)) : Qnil));
643
+ rb_yield(rb_ary_new3(4, symEXTERNAL_PARSED_ENTITY_DECL,
644
+ TO_(rb_str_new2((char*)entityName)),
645
+ valary, recv));
646
+ if (parser->defaultCurrent) {
647
+ parser->defaultCurrent = 0;
648
+ XML_DefaultCurrent(parser->parser);
649
+ }
650
+ }
651
+
652
+ static void
653
+ iterInternalParsedEntityDeclHandler(void *recv,
654
+ const XML_Char *entityName,
655
+ const XML_Char *replacementText,
656
+ int replacementTextLength)
657
+ {
658
+ XMLParser* parser;
659
+ GET_PARSER(recv, parser);
660
+ rb_yield(rb_ary_new3(4, symINTERNAL_PARSED_ENTITY_DECL,
661
+ TO_(rb_str_new2((char*)entityName)),
662
+ TO_(rb_str_new((char*)replacementText,
663
+ replacementTextLength)), recv));
664
+ if (parser->defaultCurrent) {
665
+ parser->defaultCurrent = 0;
666
+ XML_DefaultCurrent(parser->parser);
667
+ }
668
+ }
669
+ #endif
670
+
671
+ #ifdef HAVE_XML_SETSKIPPEDENTITYHANDLER
672
+ static void
673
+ iterSkippedEntityHandler(void *recv,
674
+ const XML_Char *entityName,
675
+ int is_parameter_entity)
676
+ {
677
+ XMLParser* parser;
678
+ GET_PARSER(recv, parser);
679
+ rb_yield(rb_ary_new3(4, symSKIPPED_ENTITY,
680
+ TO_(rb_str_new2((char*)entityName)),
681
+ INT2FIX(is_parameter_entity), recv));
682
+ if (parser->defaultCurrent) {
683
+ parser->defaultCurrent = 0;
684
+ XML_DefaultCurrent(parser->parser);
685
+ }
686
+ }
687
+ #endif
688
+
689
+
690
+
691
+ /* Event handlers for instance method */
692
+ static void
693
+ myStartElementHandler(void *recv,
694
+ const XML_Char *name, const XML_Char **atts)
695
+ {
696
+ XMLParser* parser;
697
+ VALUE attrhash;
698
+
699
+ GET_PARSER(recv, parser);
700
+ #ifdef NEW_EXPAT
701
+ parser->lastAttrs = atts;
702
+ #endif
703
+ attrhash = rb_hash_new();
704
+ while (*atts) {
705
+ const char* key = *atts++;
706
+ const char* val = *atts++;
707
+ rb_hash_aset(attrhash,
708
+ FO_(TO_(rb_str_new2((char*)key))),
709
+ TO_(rb_str_new2((char*)val)));
710
+ }
711
+ rb_funcall((VALUE)recv, id_startElementHandler, 2,
712
+ TO_(rb_str_new2((char*)name)), attrhash);
713
+ }
714
+
715
+ static void
716
+ myEndElementHandler(void *recv,
717
+ const XML_Char *name)
718
+ {
719
+ XMLParser* parser;
720
+ GET_PARSER(recv, parser);
721
+ rb_funcall((VALUE)recv, id_endElementHandler, 1,
722
+ TO_(rb_str_new2((char*)name)));
723
+ }
724
+
725
+ static void
726
+ myCharacterDataHandler(void *recv,
727
+ const XML_Char *s,
728
+ int len)
729
+ {
730
+ XMLParser* parser;
731
+ GET_PARSER(recv, parser);
732
+ rb_funcall((VALUE)recv, id_characterDataHandler, 1,
733
+ TO_(rb_str_new((char*)s, len)));
734
+ }
735
+
736
+ static void
737
+ myProcessingInstructionHandler(void *recv,
738
+ const XML_Char *target,
739
+ const XML_Char *data)
740
+ {
741
+ XMLParser* parser;
742
+ GET_PARSER(recv, parser);
743
+ rb_funcall((VALUE)recv, id_processingInstructionHandler, 2,
744
+ TO_(rb_str_new2((char*)target)),
745
+ TO_(rb_str_new2((char*)data)));
746
+ }
747
+
748
+ static void
749
+ myDefaultHandler(void *recv,
750
+ const XML_Char *s,
751
+ int len)
752
+ {
753
+ XMLParser* parser;
754
+ GET_PARSER(recv, parser);
755
+ rb_funcall((VALUE)recv, id_defaultHandler, 1,
756
+ TO_(rb_str_new((char*)s, len)));
757
+ }
758
+
759
+ #ifdef NEW_EXPAT
760
+ static void
761
+ myDefaultExpandHandler(void *recv,
762
+ const XML_Char *s,
763
+ int len)
764
+ {
765
+ XMLParser* parser;
766
+ GET_PARSER(recv, parser);
767
+ rb_funcall((VALUE)recv, id_defaultExpandHandler, 1,
768
+ TO_(rb_str_new((char*)s, len)));
769
+ }
770
+ #endif
771
+
772
+ void
773
+ myUnparsedEntityDeclHandler(void *recv,
774
+ const XML_Char *entityName,
775
+ const XML_Char *base,
776
+ const XML_Char *systemId,
777
+ const XML_Char *publicId,
778
+ const XML_Char *notationName)
779
+ {
780
+ XMLParser* parser;
781
+ GET_PARSER(recv, parser);
782
+ rb_funcall((VALUE)recv, id_unparsedEntityDeclHandler, 5,
783
+ TO_(rb_str_new2((char*)entityName)),
784
+ (base ? TO_(rb_str_new2((char*)base)) : Qnil),
785
+ TO_(rb_str_new2((char*)systemId)),
786
+ (publicId ? TO_(rb_str_new2((char*)publicId)) : Qnil),
787
+ TO_(rb_str_new2((char*)notationName)));
788
+ }
789
+
790
+ void
791
+ myNotationDeclHandler(void *recv,
792
+ const XML_Char *notationName,
793
+ const XML_Char *base,
794
+ const XML_Char *systemId,
795
+ const XML_Char *publicId)
796
+ {
797
+ XMLParser* parser;
798
+ GET_PARSER(recv, parser);
799
+ rb_funcall((VALUE)recv, id_notationDeclHandler, 4,
800
+ TO_(rb_str_new2((char*)notationName)),
801
+ (base ? TO_(rb_str_new2((char*)base)) : Qnil),
802
+ (systemId ? TO_(rb_str_new2((char*)systemId)) : Qnil),
803
+ (publicId ? TO_(rb_str_new2((char*)publicId)) : Qnil));
804
+ }
805
+
806
+ int
807
+ myExternalEntityRefHandler(XML_Parser xmlparser,
808
+ const XML_Char *context,
809
+ const XML_Char *base,
810
+ const XML_Char *systemId,
811
+ const XML_Char *publicId)
812
+ {
813
+ XMLParser* parser;
814
+ VALUE recv;
815
+ VALUE ret;
816
+
817
+ recv = (VALUE)XML_GetUserData(xmlparser);
818
+ GET_PARSER(recv, parser);
819
+ ret = rb_funcall(recv, id_externalEntityRefHandler, 4,
820
+ (context ? TO_(rb_str_new2((char*)context)): Qnil),
821
+ (base ? TO_(rb_str_new2((char*)base)) : Qnil),
822
+ (systemId ? TO_(rb_str_new2((char*)systemId)) : Qnil),
823
+ (publicId ? TO_(rb_str_new2((char*)publicId)) : Qnil));
824
+ /* The error status in this handler should be returned
825
+ by the exception. */
826
+ return Qnil;
827
+ }
828
+
829
+ #ifdef NEW_EXPAT
830
+ static void
831
+ myCommentHandler(void *recv,
832
+ const XML_Char *s)
833
+ {
834
+ XMLParser* parser;
835
+ GET_PARSER(recv, parser);
836
+ rb_funcall((VALUE)recv, id_commentHandler, 1,
837
+ TO_(rb_str_new2((char*)s)));
838
+ }
839
+
840
+ static void
841
+ myStartCdataSectionHandler(void *recv)
842
+ {
843
+ XMLParser* parser;
844
+ GET_PARSER(recv, parser);
845
+ rb_funcall((VALUE)recv, id_startCdataSectionHandler, 0);
846
+ }
847
+
848
+ static void
849
+ myEndCdataSectionHandler(void *recv)
850
+ {
851
+ XMLParser* parser;
852
+ GET_PARSER(recv, parser);
853
+ rb_funcall((VALUE)recv, id_endCdataSectionHandler, 0);
854
+ }
855
+
856
+ static void
857
+ myStartNamespaceDeclHandler(void *recv,
858
+ const XML_Char *prefix,
859
+ const XML_Char *uri)
860
+ {
861
+ XMLParser* parser;
862
+ GET_PARSER(recv, parser);
863
+ rb_funcall((VALUE)recv, id_startNamespaceDeclHandler, 2,
864
+ (prefix ? TO_(rb_str_new2((char*)prefix)) : Qnil),
865
+ (uri ? TO_(rb_str_new2((char*)uri)) : Qnil));
866
+ }
867
+
868
+ static void
869
+ myEndNamespaceDeclHandler(void *recv,
870
+ const XML_Char *prefix)
871
+ {
872
+ XMLParser* parser;
873
+ GET_PARSER(recv, parser);
874
+ rb_funcall((VALUE)recv, id_endNamespaceDeclHandler, 1,
875
+ (prefix ? TO_(rb_str_new2((char*)prefix)) : Qnil));
876
+ }
877
+
878
+ static int
879
+ myNotStandaloneHandler(void *recv)
880
+ {
881
+ XMLParser* parser;
882
+ VALUE v;
883
+
884
+ GET_PARSER(recv, parser);
885
+ v = rb_funcall((VALUE)recv, id_notStandaloneHandler, 0);
886
+ Check_Type(v, T_FIXNUM);
887
+ return FIX2INT(v);
888
+ }
889
+ #endif
890
+
891
+ #ifdef HAVE_XML_SETPARAMENTITYPARSING
892
+ static void
893
+ #ifdef HAVE_EXPAT_H
894
+ myStartDoctypeDeclHandler(void *recv,
895
+ const XML_Char *doctypeName,
896
+ const XML_Char *sysid,
897
+ const XML_Char *pubid,
898
+ int has_internal_subset)
899
+ #else
900
+ myStartDoctypeDeclHandler(void *recv,
901
+ const XML_Char *doctypeName)
902
+ #endif
903
+ {
904
+ XMLParser* parser;
905
+ GET_PARSER(recv, parser);
906
+ #ifdef HAVE_EXPAT_H
907
+ rb_funcall((VALUE)recv, id_startDoctypeDeclHandler, 4,
908
+ TO_(rb_str_new2((char*)doctypeName)),
909
+ (sysid ? TO_(rb_str_new2((char*)sysid)) : Qnil),
910
+ (pubid ? TO_(rb_str_new2((char*)pubid)) : Qnil),
911
+ (has_internal_subset ? Qtrue : Qfalse));
912
+ #else
913
+ rb_funcall((VALUE)recv, id_startDoctypeDeclHandler, 4,
914
+ TO_(rb_str_new2((char*)doctypeName)),
915
+ Qnil, Qnil, Qfalse);
916
+ #endif
917
+ }
918
+
919
+ static void
920
+ myEndDoctypeDeclHandler(void *recv)
921
+ {
922
+ XMLParser* parser;
923
+ GET_PARSER(recv, parser);
924
+ rb_funcall((VALUE)recv, id_endDoctypeDeclHandler, 0);
925
+ }
926
+ #endif
927
+
928
+
929
+ #ifdef HAVE_EXPAT_H
930
+
931
+ static void
932
+ myElementDeclHandler(void *recv,
933
+ const XML_Char *name,
934
+ XML_Content *model)
935
+ {
936
+ XMLParser* parser;
937
+ VALUE content;
938
+ GET_PARSER(recv, parser);
939
+ content = makeContentArray(parser, model);
940
+ rb_funcall((VALUE)recv, id_elementDeclHandler, 2,
941
+ TO_(rb_str_new2(name)), content);
942
+ }
943
+
944
+ static void
945
+ myAttlistDeclHandler (void *recv,
946
+ const XML_Char *elname,
947
+ const XML_Char *attname,
948
+ const XML_Char *att_type,
949
+ const XML_Char *dflt,
950
+ int isrequired)
951
+ {
952
+ XMLParser* parser;
953
+ GET_PARSER(recv, parser);
954
+ rb_funcall((VALUE)recv, id_attlistDeclHandler, 5,
955
+ TO_(rb_str_new2(elname)),
956
+ TO_(rb_str_new2((char*)attname)),
957
+ TO_(rb_str_new2((char*)att_type)),
958
+ (dflt ? TO_(rb_str_new2((char*)dflt)) : Qnil),
959
+ (isrequired ? Qtrue : Qfalse));
960
+ }
961
+
962
+ static void
963
+ myXmlDeclHandler (void *recv,
964
+ const XML_Char *version,
965
+ const XML_Char *encoding,
966
+ int standalone)
967
+ {
968
+ XMLParser* parser;
969
+ GET_PARSER(recv, parser);
970
+ rb_funcall((VALUE)recv, id_xmlDeclHandler, 3,
971
+ (version ? TO_(rb_str_new2(version)) : Qnil),
972
+ (encoding ? TO_(rb_str_new2((char*)encoding)) : Qnil),
973
+ INT2FIX(standalone));
974
+ }
975
+
976
+ static void
977
+ myEntityDeclHandler (void *recv,
978
+ const XML_Char *entityName,
979
+ int is_parameter_entity,
980
+ const XML_Char *value,
981
+ int value_length,
982
+ const XML_Char *base,
983
+ const XML_Char *systemId,
984
+ const XML_Char *publicId,
985
+ const XML_Char *notationName)
986
+ {
987
+ XMLParser* parser;
988
+ GET_PARSER(recv, parser);
989
+ rb_funcall((VALUE)recv, id_entityDeclHandler, 7,
990
+ TO_(rb_str_new2(entityName)),
991
+ (is_parameter_entity ? Qtrue : Qfalse),
992
+ TO_(rb_str_new((char*)value, value_length)),
993
+ (base ? TO_(rb_str_new2((char*)base)) : Qnil),
994
+ (systemId ? TO_(rb_str_new2((char*)systemId)) : Qnil),
995
+ (publicId ? TO_(rb_str_new2((char*)publicId)) : Qnil),
996
+ (notationName ? TO_(rb_str_new2((char*)notationName))
997
+ : Qnil));
998
+ }
999
+
1000
+ #endif
1001
+
1002
+ #if 0
1003
+ static void
1004
+ myExternalParsedEntityDeclHandler(void *recv,
1005
+ const XML_Char *entityName,
1006
+ const XML_Char *base,
1007
+ const XML_Char *systemId,
1008
+ const XML_Char *publicId)
1009
+ {
1010
+ XMLParser* parser;
1011
+ GET_PARSER(recv, parser);
1012
+ rb_funcall((VALUE)recv, id_externalParsedEntityDeclHandler, 4,
1013
+ TO_(rb_str_new2((char*)entityName)),
1014
+ (base ? TO_(rb_str_new2((char*)base)) : Qnil),
1015
+ TO_(rb_str_new2((char*)systemId)),
1016
+ (publicId ? TO_(rb_str_new2((char*)publicId)) : Qnil));
1017
+ }
1018
+
1019
+ static void
1020
+ myInternalParsedEntityDeclHandler(void *recv,
1021
+ const XML_Char *entityName,
1022
+ const XML_Char *replacementText,
1023
+ int replacementTextLength)
1024
+ {
1025
+ XMLParser* parser;
1026
+ GET_PARSER(recv, parser);
1027
+ rb_funcall((VALUE)recv, id_internalParsedEntityDeclHandler, 2,
1028
+ TO_(rb_str_new2((char*)entityName)),
1029
+ TO_(rb_str_new((char*)replacementText,
1030
+ replacementTextLength)));
1031
+ }
1032
+ #endif
1033
+
1034
+
1035
+ static VALUE
1036
+ XMLEncoding_map(VALUE obj, VALUE i)
1037
+ {
1038
+ return i;
1039
+ }
1040
+
1041
+ static VALUE
1042
+ XMLEncoding_convert(VALUE obj, VALUE str)
1043
+ {
1044
+ return INT2FIX('?');
1045
+ }
1046
+
1047
+ static int
1048
+ myEncodingConv(void *data, const char *s)
1049
+ {
1050
+ VALUE v;
1051
+ int len;
1052
+ int slen;
1053
+
1054
+ v = rb_ivar_get((VALUE)data, id_map);
1055
+ slen = RSTRING_PTR(v)[*(unsigned char*)s];
1056
+
1057
+ v = rb_funcall((VALUE)data, id_convert, 1, rb_str_new((char*)s, -slen));
1058
+ switch (TYPE(v)) {
1059
+ case T_FIXNUM:
1060
+ return FIX2INT(v);
1061
+ case T_STRING:
1062
+ len = RSTRING_LEN(v);
1063
+ if (len == 1) {
1064
+ return (unsigned char)*(RSTRING_PTR(v));
1065
+ }
1066
+ else if (len >= 2) {
1067
+ return (unsigned char)*(RSTRING_PTR(v)) |
1068
+ (unsigned char)*(RSTRING_PTR(v) + 1) << 8;
1069
+ }
1070
+ }
1071
+ return 0;
1072
+ }
1073
+
1074
+ #if 0
1075
+ static int
1076
+ iterUnknownEncodingHandler(void *recv,
1077
+ const XML_Char *name,
1078
+ XML_Encoding *info)
1079
+ {
1080
+ XMLParser* parser;
1081
+ VALUE ret;
1082
+
1083
+ if (!rb_method_boundp(CLASS_OF((VALUE)recv), id_unknownEncoding, 0))
1084
+ return 0;
1085
+
1086
+ GET_PARSER(recv, parser);
1087
+ ret = rb_yield(rb_ary_new3(4, symUNKNOWN_ENCODING,
1088
+ TO_(rb_str_new2((char*)name)), Qnil, recv));
1089
+ if (TYPE(ret) == T_OBJECT && rb_obj_is_kind_of(ret, cXMLEncoding)) {
1090
+ int i;
1091
+ ID mid = rb_intern("map");
1092
+ VALUE cmap = rb_str_new(NULL, 256);
1093
+ rb_ivar_set(ret, id_map, cmap);
1094
+
1095
+ for (i = 0; i < 256; i++) {
1096
+ VALUE m = rb_funcall(ret, mid, 1, INT2FIX(i));
1097
+ RSTRING(cmap)->ptr[i] = info->map[i] = FIX2INT(m);
1098
+ }
1099
+ /* protect object form GC */
1100
+ rb_ivar_set(recv, rb_intern("_encoding"), ret);
1101
+ info->data = (void*)ret;
1102
+ info->convert = myEncodingConv;
1103
+ return 1;
1104
+ }
1105
+
1106
+ return 0;
1107
+ }
1108
+ #endif
1109
+
1110
+ #ifdef XML_ENC_PATH
1111
+ /*
1112
+ * Encoding map functions come from XML::Parser Version 2.19
1113
+ *
1114
+ * Copyright 1998 Larry Wall and Clark Cooper
1115
+ * All rights reserved.
1116
+ *
1117
+ * This program is free software; you can redistribute it and/or
1118
+ * modify it under the same terms as Perl itself.
1119
+ */
1120
+ static Encinfo*
1121
+ getEncinfo(char* data, int size)
1122
+ {
1123
+ Encmap_Header* header = (Encmap_Header*)data;
1124
+ unsigned short prefixes_size;
1125
+ unsigned short bytemap_size;
1126
+ Encinfo* ret;
1127
+ int i;
1128
+ PrefixMap* prefixes;
1129
+ unsigned short *bytemap;
1130
+
1131
+ if (size < sizeof(Encmap_Header) || ntohl(header->magic) != ENCMAP_MAGIC)
1132
+ return NULL;
1133
+ prefixes_size = ntohs(header->pfsize);
1134
+ bytemap_size = ntohs(header->bmsize);
1135
+ if (size != (sizeof(Encmap_Header) +
1136
+ prefixes_size * sizeof(PrefixMap) +
1137
+ bytemap_size * sizeof(unsigned short)))
1138
+ return NULL;
1139
+ if ((ret = (Encinfo*)malloc(sizeof(Encinfo))) == NULL) {
1140
+ return NULL;
1141
+ }
1142
+ ret->prefixes_size = prefixes_size;
1143
+ ret->bytemap_size = bytemap_size;
1144
+ for (i = 0; i < 256; i++)
1145
+ ret->firstmap[i] = ntohl(header->map[i]);
1146
+ prefixes = (PrefixMap*)(data + sizeof(Encmap_Header));
1147
+ bytemap = (unsigned short*)(data + sizeof(Encmap_Header)
1148
+ + sizeof(PrefixMap)*prefixes_size);
1149
+ if ((ret->prefixes =
1150
+ (PrefixMap*)malloc(sizeof(PrefixMap)*prefixes_size)) == NULL) {
1151
+ free(ret);
1152
+ return NULL;
1153
+ }
1154
+ if ((ret->bytemap =
1155
+ (unsigned short*)malloc(sizeof(unsigned short)*bytemap_size)) == NULL) {
1156
+ free(ret->prefixes);
1157
+ free(ret);
1158
+ return NULL;
1159
+ }
1160
+ for (i = 0; i < prefixes_size; i++, prefixes++) {
1161
+ ret->prefixes[i].min = prefixes->min;
1162
+ ret->prefixes[i].len = prefixes->len;
1163
+ ret->prefixes[i].bmap_start = ntohs(prefixes->bmap_start);
1164
+ memcpy(ret->prefixes[i].ispfx, prefixes->ispfx,
1165
+ sizeof(prefixes->ispfx) + sizeof(prefixes->ischar));
1166
+ }
1167
+ for (i = 0; i < bytemap_size; i++)
1168
+ ret->bytemap[i] = ntohs(bytemap[i]);
1169
+
1170
+ return ret;
1171
+ }
1172
+
1173
+ static int
1174
+ convertEncoding(Encinfo* enc, const char* seq)
1175
+ {
1176
+ PrefixMap* curpfx;
1177
+ int count;
1178
+ int index = 0;
1179
+
1180
+ for (count = 0; count < 4; count++) {
1181
+ unsigned char byte = (unsigned char)seq[count];
1182
+ unsigned char bndx;
1183
+ unsigned char bmsk;
1184
+ int offset;
1185
+
1186
+ curpfx = &enc->prefixes[index];
1187
+ offset = ((int)byte) - curpfx->min;
1188
+ if (offset < 0)
1189
+ break;
1190
+ if (offset >= curpfx->len && curpfx->len != 0)
1191
+ break;
1192
+
1193
+ bndx = byte >> 3;
1194
+ bmsk = 1 << (byte & 0x7);
1195
+
1196
+ if (curpfx->ispfx[bndx] & bmsk) {
1197
+ index = enc->bytemap[curpfx->bmap_start + offset];
1198
+ }
1199
+ else if (curpfx->ischar[bndx] & bmsk) {
1200
+ return enc->bytemap[curpfx->bmap_start + offset];
1201
+ }
1202
+ else
1203
+ break;
1204
+ }
1205
+
1206
+ return -1;
1207
+ }
1208
+
1209
+ static void
1210
+ releaseEncoding(Encinfo* enc)
1211
+ {
1212
+ if (enc) {
1213
+ if (enc->prefixes)
1214
+ free(enc->prefixes);
1215
+ if (enc->bytemap)
1216
+ free(enc->bytemap);
1217
+ free(enc);
1218
+ }
1219
+ }
1220
+
1221
+ static Encinfo*
1222
+ findEncoding(const char* encname)
1223
+ {
1224
+ FILE* fp;
1225
+ Encinfo* enc;
1226
+ struct stat st;
1227
+ int size;
1228
+ int len;
1229
+ char file[PATH_MAX] = "\0";
1230
+ const char* p;
1231
+ char* buf;
1232
+ #ifdef DOSISH
1233
+ const char sepchar = '\\';
1234
+ #else
1235
+ const char sepchar = '/';
1236
+ #endif
1237
+ const char* const encext = ".enc";
1238
+
1239
+ rb_secure(2);
1240
+ /* make map file path */
1241
+ if (XML_ENC_PATH != NULL) {
1242
+ strncpy(file, XML_ENC_PATH, PATH_MAX - 1);
1243
+ file[PATH_MAX - 1] = '\0';
1244
+ }
1245
+ len = strlen(file);
1246
+ if (len > 0 && len < PATH_MAX - 1 && file[len - 1] != sepchar)
1247
+ file[len++] = sepchar;
1248
+ for (p = encname; *p && len < PATH_MAX - 1; p++, len++) {
1249
+ file[len] = tolower(*p);
1250
+ }
1251
+ file[len] = '\0';
1252
+ strncat(file, encext, PATH_MAX - len -1);
1253
+
1254
+ if ((fp = fopen(file, "rb")) == NULL) {
1255
+ return NULL;
1256
+ }
1257
+
1258
+ /* get file length */
1259
+ fstat(fileno(fp), &st);
1260
+ size = st.st_size;
1261
+
1262
+ if ((buf = (char*)malloc(size)) == NULL) {
1263
+ fclose(fp);
1264
+ return NULL;
1265
+ }
1266
+
1267
+ fread(buf, 1, size, fp);
1268
+ fclose(fp);
1269
+ enc = getEncinfo(buf, size);
1270
+ free(buf);
1271
+ return enc;
1272
+ }
1273
+
1274
+ #endif
1275
+
1276
+ static int
1277
+ myUnknownEncodingHandler(void *recv,
1278
+ const XML_Char *name,
1279
+ XML_Encoding *info)
1280
+ {
1281
+ XMLParser* parser;
1282
+ VALUE ret;
1283
+ if (!rb_method_boundp(CLASS_OF((VALUE)recv), id_unknownEncoding, 0))
1284
+ #ifndef XML_ENC_PATH
1285
+ return 0;
1286
+ #else
1287
+ {
1288
+ Encinfo* enc;
1289
+
1290
+ if ((enc = findEncoding(name)) != NULL) {
1291
+ memcpy(info->map, enc->firstmap, sizeof(int)*256);
1292
+ info->data = enc;
1293
+ info->convert = (int(*)(void*,const char*))convertEncoding;
1294
+ info->release = (void(*)(void*))releaseEncoding;
1295
+ return 1;
1296
+ }
1297
+ else
1298
+ return 0;
1299
+ }
1300
+ #endif
1301
+
1302
+ GET_PARSER(recv, parser);
1303
+ ret = rb_funcall((VALUE)recv, id_unknownEncoding, 1,
1304
+ TO_(rb_str_new2((char*)name)));
1305
+ if (TYPE(ret) == T_OBJECT && rb_obj_is_kind_of(ret, cXMLEncoding)) {
1306
+ int i;
1307
+ ID mid = rb_intern("map");
1308
+ VALUE cmap = rb_str_new(NULL, 256);
1309
+ rb_ivar_set(ret, id_map, cmap);
1310
+
1311
+ if (OBJ_TAINTED(ret))
1312
+ taintParser(parser);
1313
+ TO_(cmap);
1314
+
1315
+ for (i = 0; i < 256; i++) {
1316
+ VALUE m = rb_funcall(ret, mid, 1, INT2FIX(i));
1317
+ RSTRING_PTR(cmap)[i] = info->map[i] = FIX2INT(m);
1318
+ }
1319
+ /* protect object form GC */
1320
+ rb_ivar_set((VALUE)recv, rb_intern("_encoding"), ret);
1321
+ info->data = (void*)ret;
1322
+ info->convert = myEncodingConv;
1323
+
1324
+ return 1;
1325
+ }
1326
+
1327
+ return 0;
1328
+ }
1329
+
1330
+ #ifdef HAVE_XML_SETSKIPPEDENTITYHANDLER
1331
+ static void
1332
+ mySkippedEntityHandler(void *recv,
1333
+ const XML_Char *entityName,
1334
+ int is_parameter_entity)
1335
+ {
1336
+ XMLParser* parser;
1337
+ GET_PARSER(recv, parser);
1338
+ rb_funcall((VALUE)recv, id_skippedEntityHandler, 2,
1339
+ TO_(rb_str_new2((char*)entityName)),
1340
+ INT2FIX(is_parameter_entity));
1341
+ }
1342
+ #endif
1343
+
1344
+
1345
+ /* constructor */
1346
+ static VALUE
1347
+ XMLParser_new(int argc, VALUE* argv, VALUE klass)
1348
+ {
1349
+ XMLParser* parser;
1350
+ VALUE obj;
1351
+ VALUE arg1;
1352
+ VALUE arg2;
1353
+ VALUE arg3;
1354
+ int count;
1355
+ char* encoding = NULL;
1356
+ #ifdef NEW_EXPAT
1357
+ char* nssep = NULL;
1358
+ #endif
1359
+ char* context = NULL;
1360
+ XMLParser* rootparser = NULL;
1361
+ VALUE parent = Qnil;
1362
+
1363
+ count = rb_scan_args(argc, argv, "03", &arg1, &arg2, &arg3);
1364
+ if (count == 1) {
1365
+ /* new(encoding) */
1366
+ if (TYPE(arg1) != T_NIL) {
1367
+ Check_Type(arg1, T_STRING); /* encoding */
1368
+ encoding = RSTRING_PTR(arg1);
1369
+ }
1370
+ }
1371
+ else if (count == 2) {
1372
+ /* new(encoding, nschar) */
1373
+ /* new(parser, context) */
1374
+ #ifdef NEW_EXPAT
1375
+ if (TYPE(arg1) != T_DATA) {
1376
+ if (TYPE(arg1) != T_NIL) {
1377
+ Check_Type(arg1, T_STRING); /* encoding */
1378
+ encoding = RSTRING_PTR(arg1);
1379
+ }
1380
+ Check_Type(arg2, T_STRING); /* nschar */
1381
+ nssep = RSTRING_PTR(arg2);
1382
+ }
1383
+ else {
1384
+ #endif
1385
+ Check_Type(arg1, T_DATA); /* parser */
1386
+ GET_PARSER(arg1, rootparser);
1387
+ if (!NIL_P(arg2)) {
1388
+ Check_Type(arg2, T_STRING); /* context */
1389
+ context = RSTRING_PTR(arg2);
1390
+ }
1391
+ parent = arg1;
1392
+ #ifdef NEW_EXPAT
1393
+ }
1394
+ #endif
1395
+ }
1396
+ else if (count == 3) {
1397
+ /* new(parser, context, encoding) */
1398
+ Check_Type(arg1, T_DATA); /* parser */
1399
+ GET_PARSER(arg1, rootparser);
1400
+ if (!NIL_P(arg2)) {
1401
+ Check_Type(arg2, T_STRING); /* context */
1402
+ context = RSTRING_PTR(arg2);
1403
+ }
1404
+ Check_Type(arg3, T_STRING); /* encoding */
1405
+ encoding = RSTRING_PTR(arg3);
1406
+ parent = arg1;
1407
+ }
1408
+
1409
+ /* create object */
1410
+ obj = Data_Make_Struct(klass, XMLParser,
1411
+ XMLParser_mark, XMLParser_free, parser);
1412
+ /* create parser */
1413
+ if (rootparser == NULL) {
1414
+ #ifdef NEW_EXPAT
1415
+ if (nssep == NULL)
1416
+ parser->parser = XML_ParserCreate(encoding);
1417
+ else
1418
+ parser->parser = XML_ParserCreateNS(encoding, nssep[0]);
1419
+ #else
1420
+ parser->parser = XML_ParserCreate(encoding);
1421
+ #endif
1422
+ parser->tainted = 0;
1423
+ parser->context = NULL;
1424
+ }
1425
+ else {
1426
+ parser->parser = XML_ExternalEntityParserCreate(rootparser->parser,
1427
+ context, encoding);
1428
+ /* clear all inhrited handlers,
1429
+ because handlers should be set in "parse" method */
1430
+ XML_SetElementHandler(parser->parser, NULL, NULL);
1431
+ XML_SetCharacterDataHandler(parser->parser, NULL);
1432
+ XML_SetProcessingInstructionHandler(parser->parser, NULL);
1433
+ XML_SetDefaultHandler(parser->parser, NULL);
1434
+ XML_SetUnparsedEntityDeclHandler(parser->parser, NULL);
1435
+ XML_SetNotationDeclHandler(parser->parser, NULL);
1436
+ XML_SetExternalEntityRefHandler(parser->parser, NULL);
1437
+ #ifdef NEW_EXPAT
1438
+ XML_SetCommentHandler(parser->parser, NULL);
1439
+ XML_SetCdataSectionHandler(parser->parser, NULL, NULL);
1440
+ XML_SetNamespaceDeclHandler(parser->parser, NULL, NULL);
1441
+ XML_SetNotStandaloneHandler(parser->parser, NULL);
1442
+ #endif
1443
+ #ifdef HAVE_XML_SETDOCTYPEDECLHANDLER
1444
+ XML_SetDoctypeDeclHandler(parser->parser, NULL, NULL);
1445
+ #endif
1446
+ #ifdef HAVE_EXPAT_H
1447
+ XML_SetElementDeclHandler(parser->parser, NULL);
1448
+ XML_SetAttlistDeclHandler(parser->parser, NULL);
1449
+ XML_SetXmlDeclHandler(parser->parser, NULL);
1450
+ XML_SetEntityDeclHandler(parser->parser, NULL);
1451
+ #endif
1452
+ #if 0
1453
+ XML_SetExternalParsedEntityDeclHandler(parser->parser, NULL);
1454
+ XML_SetInternalParsedEntityDeclHandler(parser->parser, NULL);
1455
+ #endif
1456
+ #ifdef HAVE_XML_SETSKIPPEDENTITYHANDLER
1457
+ XML_SetSkippedEntityHandler(parser->parser, NULL);
1458
+ #endif
1459
+ if (rootparser->tainted)
1460
+ parser->tainted |= 1;
1461
+ parser->context = context;
1462
+ }
1463
+ if (!parser->parser)
1464
+ rb_raise(eXMLParserError, "cannot create parser");
1465
+
1466
+ /* setting up internal data */
1467
+ XML_SetUserData(parser->parser, (void*)obj);
1468
+ parser->iterator = 0;
1469
+ parser->defaultCurrent = 0;
1470
+ #ifdef NEW_EXPAT
1471
+ parser->lastAttrs = NULL;
1472
+ #endif
1473
+ parser->parent = parent;
1474
+
1475
+ rb_obj_call_init(obj, argc, argv);
1476
+
1477
+ return obj;
1478
+ }
1479
+
1480
+ static VALUE
1481
+ XMLParser_initialize(VALUE obj)
1482
+ {
1483
+ return Qnil;
1484
+ }
1485
+
1486
+ #ifdef HAVE_XML_PARSERRESET
1487
+ static VALUE
1488
+ XMLParser_reset(int argc, VALUE* argv, VALUE obj)
1489
+ {
1490
+ XMLParser* parser;
1491
+ VALUE vencoding = Qnil;
1492
+ char* encoding = NULL;
1493
+ int count;
1494
+
1495
+ count = rb_scan_args(argc, argv, "01", &vencoding);
1496
+
1497
+ GET_PARSER(obj, parser);
1498
+ if (count > 0 && TYPE(vencoding) != T_NIL) {
1499
+ Check_Type(vencoding, T_STRING);
1500
+ encoding = RSTRING_PTR(vencoding);
1501
+ }
1502
+ XML_ParserReset(parser->parser, encoding);
1503
+ /* setting up internal data */
1504
+ XML_SetUserData(parser->parser, (void*)obj);
1505
+ parser->iterator = 0;
1506
+ parser->defaultCurrent = 0;
1507
+ #ifdef NEW_EXPAT
1508
+ parser->lastAttrs = NULL;
1509
+ #endif
1510
+ parser->tainted = 0;
1511
+
1512
+ return obj;
1513
+ }
1514
+ #endif
1515
+
1516
+ /* parse method */
1517
+ static VALUE
1518
+ XMLParser_parse(int argc, VALUE* argv, VALUE obj)
1519
+ {
1520
+ XMLParser* parser;
1521
+ int ret;
1522
+ XML_StartElementHandler start = NULL;
1523
+ XML_EndElementHandler end = NULL;
1524
+ #ifdef NEW_EXPAT
1525
+ XML_StartCdataSectionHandler startC = NULL;
1526
+ XML_EndCdataSectionHandler endC = NULL;
1527
+ XML_StartNamespaceDeclHandler startNS = NULL;
1528
+ XML_EndNamespaceDeclHandler endNS = NULL;
1529
+ #endif
1530
+ #ifdef HAVE_XML_SETDOCTYPEDECLHANDLER
1531
+ XML_StartDoctypeDeclHandler startDoctype = NULL;
1532
+ XML_EndDoctypeDeclHandler endDoctype = NULL;
1533
+ #endif
1534
+ VALUE str;
1535
+ VALUE isFinal;
1536
+ int final = 1;
1537
+ int count;
1538
+ int fromStream = 0;
1539
+ ID mid = rb_intern("gets");
1540
+ ID linebuf = rb_intern("_linebuf");
1541
+
1542
+ count = rb_scan_args(argc, argv, "02", &str, &isFinal);
1543
+ /* If "str" has public "gets" method, it will be considered *stream* */
1544
+ if (!rb_obj_is_kind_of(str, rb_cString) &&
1545
+ rb_method_boundp(CLASS_OF(str), mid, 1)) {
1546
+ fromStream = 1;
1547
+ }
1548
+ else if (!NIL_P(str)) {
1549
+ Check_Type(str, T_STRING);
1550
+ }
1551
+ if (count >= 2) {
1552
+ if (isFinal == Qtrue)
1553
+ final = 1;
1554
+ else if (isFinal == Qfalse)
1555
+ final = 0;
1556
+ else
1557
+ rb_raise(rb_eTypeError, "not valid value");
1558
+ }
1559
+
1560
+ GET_PARSER(obj, parser);
1561
+
1562
+ parser->iterator = rb_block_given_p();
1563
+
1564
+ /* Setup event handlers */
1565
+
1566
+ /* Call as iterator */
1567
+ if (parser->iterator) {
1568
+ XML_SetElementHandler(parser->parser,
1569
+ iterStartElementHandler, iterEndElementHandler);
1570
+ XML_SetCharacterDataHandler(parser->parser,
1571
+ iterCharacterDataHandler);
1572
+ XML_SetProcessingInstructionHandler(parser->parser,
1573
+ iterProcessingInstructionHandler);
1574
+ /* check dummy default handler */
1575
+ #ifdef NEW_EXPAT
1576
+ if (rb_method_boundp(CLASS_OF(obj), id_defaultExpandHandler, 0))
1577
+ XML_SetDefaultHandlerExpand(parser->parser, iterDefaultHandler);
1578
+ else
1579
+ #endif
1580
+ if (rb_method_boundp(CLASS_OF(obj), id_defaultHandler, 0))
1581
+ XML_SetDefaultHandler(parser->parser, iterDefaultHandler);
1582
+
1583
+ if (rb_method_boundp(CLASS_OF(obj), id_unparsedEntityDeclHandler, 0))
1584
+ XML_SetUnparsedEntityDeclHandler(parser->parser,
1585
+ iterUnparsedEntityDeclHandler);
1586
+ if (rb_method_boundp(CLASS_OF(obj), id_notationDeclHandler, 0))
1587
+ XML_SetNotationDeclHandler(parser->parser,
1588
+ iterNotationDeclHandler);
1589
+ if (rb_method_boundp(CLASS_OF(obj), id_externalEntityRefHandler, 0))
1590
+ XML_SetExternalEntityRefHandler(parser->parser,
1591
+ iterExternalEntityRefHandler);
1592
+ #ifdef NEW_EXPAT
1593
+ if (rb_method_boundp(CLASS_OF(obj), id_commentHandler, 0))
1594
+ XML_SetCommentHandler(parser->parser, iterCommentHandler);
1595
+
1596
+ if (rb_method_boundp(CLASS_OF(obj), id_startCdataSectionHandler, 0))
1597
+ startC = iterStartCdataSectionHandler;
1598
+ if (rb_method_boundp(CLASS_OF(obj), id_endCdataSectionHandler, 0))
1599
+ endC = iterEndCdataSectionHandler;
1600
+ if (startC || endC)
1601
+ XML_SetCdataSectionHandler(parser->parser, startC, endC);
1602
+
1603
+ if (rb_method_boundp(CLASS_OF(obj), id_startNamespaceDeclHandler, 0))
1604
+ startNS = iterStartNamespaceDeclHandler;
1605
+ if (rb_method_boundp(CLASS_OF(obj), id_endNamespaceDeclHandler, 0))
1606
+ endNS = iterEndNamespaceDeclHandler;
1607
+ if (startNS || endNS)
1608
+ XML_SetNamespaceDeclHandler(parser->parser, startNS, endNS);
1609
+ if (rb_method_boundp(CLASS_OF(obj), id_notStandaloneHandler, 0))
1610
+ XML_SetNotStandaloneHandler(parser->parser, myNotStandaloneHandler);
1611
+ #endif
1612
+ #ifdef HAVE_XML_SETDOCTYPEDECLHANDLER
1613
+ if (rb_method_boundp(CLASS_OF(obj), id_startDoctypeDeclHandler, 0))
1614
+ startDoctype = iterStartDoctypeDeclHandler;
1615
+ if (rb_method_boundp(CLASS_OF(obj), id_endDoctypeDeclHandler, 0))
1616
+ endDoctype = iterEndDoctypeDeclHandler;
1617
+ if (startDoctype || endDoctype)
1618
+ XML_SetDoctypeDeclHandler(parser->parser, startDoctype, endDoctype);
1619
+ #endif
1620
+ #ifdef HAVE_EXPAT_H
1621
+ if (rb_method_boundp(CLASS_OF(obj), id_elementDeclHandler, 0))
1622
+ XML_SetElementDeclHandler(parser->parser, iterElementDeclHandler);
1623
+ if (rb_method_boundp(CLASS_OF(obj), id_attlistDeclHandler, 0))
1624
+ XML_SetAttlistDeclHandler(parser->parser, iterAttlistDeclHandler);
1625
+ if (rb_method_boundp(CLASS_OF(obj), id_xmlDeclHandler, 0))
1626
+ XML_SetXmlDeclHandler(parser->parser, iterXmlDeclHandler);
1627
+ if (rb_method_boundp(CLASS_OF(obj), id_entityDeclHandler, 0))
1628
+ XML_SetEntityDeclHandler(parser->parser, iterEntityDeclHandler);
1629
+ #endif
1630
+ #if 0
1631
+ if (rb_method_boundp(CLASS_OF(obj), id_externalParsedEntityDeclHandler, 0))
1632
+ XML_SetExternalParsedEntityDeclHandler(parser->parser,
1633
+ iterExternalParsedEntityDeclHandler);
1634
+ if (rb_method_boundp(CLASS_OF(obj), id_internalParsedEntityDeclHandler, 0))
1635
+ XML_SetInternalParsedEntityDeclHandler(parser->parser,
1636
+ iterInternalParsedEntityDeclHandler);
1637
+ #endif
1638
+ /* Call non-iterator version of UnknownEncoding handler,
1639
+ because the porcedure block often returns the unexpected value. */
1640
+ XML_SetUnknownEncodingHandler(parser->parser,
1641
+ myUnknownEncodingHandler,
1642
+ (void*)obj);
1643
+ #ifdef HAVE_XML_SETSKIPPEDENTITYHANDLER
1644
+ if (rb_method_boundp(CLASS_OF(obj), id_skippedEntityHandler, 0))
1645
+ XML_SetSkippedEntityHandler(parser->parser, iterSkippedEntityHandler);
1646
+ #endif
1647
+ }
1648
+ /* Call as not iterator */
1649
+ else {
1650
+ if (rb_method_boundp(CLASS_OF(obj), id_startElementHandler, 0))
1651
+ start = myStartElementHandler;
1652
+ if (rb_method_boundp(CLASS_OF(obj), id_endElementHandler, 0))
1653
+ end = myEndElementHandler;
1654
+ if (start || end)
1655
+ XML_SetElementHandler(parser->parser, start, end);
1656
+ if (rb_method_boundp(CLASS_OF(obj), id_characterDataHandler, 0))
1657
+ XML_SetCharacterDataHandler(parser->parser,
1658
+ myCharacterDataHandler);
1659
+ if (rb_method_boundp(CLASS_OF(obj),
1660
+ id_processingInstructionHandler, 0))
1661
+ XML_SetProcessingInstructionHandler(parser->parser,
1662
+ myProcessingInstructionHandler);
1663
+ #ifdef NEW_EXPAT
1664
+ if (rb_method_boundp(CLASS_OF(obj), id_defaultExpandHandler, 0))
1665
+ XML_SetDefaultHandlerExpand(parser->parser, myDefaultExpandHandler);
1666
+ else
1667
+ #endif
1668
+ if (rb_method_boundp(CLASS_OF(obj), id_defaultHandler, 0)) {
1669
+ XML_SetDefaultHandler(parser->parser, myDefaultHandler);
1670
+ }
1671
+ if (rb_method_boundp(CLASS_OF(obj), id_unparsedEntityDeclHandler, 0))
1672
+ XML_SetUnparsedEntityDeclHandler(parser->parser,
1673
+ myUnparsedEntityDeclHandler);
1674
+ if (rb_method_boundp(CLASS_OF(obj), id_notationDeclHandler, 0))
1675
+ XML_SetNotationDeclHandler(parser->parser,
1676
+ myNotationDeclHandler);
1677
+ if (rb_method_boundp(CLASS_OF(obj), id_externalEntityRefHandler, 0))
1678
+ XML_SetExternalEntityRefHandler(parser->parser,
1679
+ myExternalEntityRefHandler);
1680
+ #ifdef NEW_EXPAT
1681
+ if (rb_method_boundp(CLASS_OF(obj), id_commentHandler, 0))
1682
+ XML_SetCommentHandler(parser->parser, myCommentHandler);
1683
+
1684
+ if (rb_method_boundp(CLASS_OF(obj), id_startCdataSectionHandler, 0))
1685
+ startC = myStartCdataSectionHandler;
1686
+ if (rb_method_boundp(CLASS_OF(obj), id_endCdataSectionHandler, 0))
1687
+ endC = myEndCdataSectionHandler;
1688
+ if (startC || endC)
1689
+ XML_SetCdataSectionHandler(parser->parser, startC, endC);
1690
+
1691
+ if (rb_method_boundp(CLASS_OF(obj), id_startNamespaceDeclHandler, 0))
1692
+ startNS = myStartNamespaceDeclHandler;
1693
+ if (rb_method_boundp(CLASS_OF(obj), id_endNamespaceDeclHandler, 0))
1694
+ endNS = myEndNamespaceDeclHandler;
1695
+ if (startNS || endNS)
1696
+ XML_SetNamespaceDeclHandler(parser->parser, startNS, endNS);
1697
+ if (rb_method_boundp(CLASS_OF(obj), id_notStandaloneHandler, 0))
1698
+ XML_SetNotStandaloneHandler(parser->parser, myNotStandaloneHandler);
1699
+ #endif
1700
+ #ifdef HAVE_XML_SETDOCTYPEDECLHANDLER
1701
+ if (rb_method_boundp(CLASS_OF(obj), id_startDoctypeDeclHandler, 0))
1702
+ startDoctype = myStartDoctypeDeclHandler;
1703
+ if (rb_method_boundp(CLASS_OF(obj), id_endDoctypeDeclHandler, 0))
1704
+ endDoctype = myEndDoctypeDeclHandler;
1705
+ if (startDoctype || endDoctype)
1706
+ XML_SetDoctypeDeclHandler(parser->parser, startDoctype, endDoctype);
1707
+ #endif
1708
+ #ifdef HAVE_EXPAT_H
1709
+ if (rb_method_boundp(CLASS_OF(obj), id_elementDeclHandler, 0))
1710
+ XML_SetElementDeclHandler(parser->parser, myElementDeclHandler);
1711
+ if (rb_method_boundp(CLASS_OF(obj), id_attlistDeclHandler, 0))
1712
+ XML_SetAttlistDeclHandler(parser->parser, myAttlistDeclHandler);
1713
+ if (rb_method_boundp(CLASS_OF(obj), id_xmlDeclHandler, 0))
1714
+ XML_SetXmlDeclHandler(parser->parser, myXmlDeclHandler);
1715
+ if (rb_method_boundp(CLASS_OF(obj), id_entityDeclHandler, 0))
1716
+ XML_SetEntityDeclHandler(parser->parser, myEntityDeclHandler);
1717
+ #endif
1718
+ #if 0
1719
+ if (rb_method_boundp(CLASS_OF(obj), id_externalParsedEntityDeclHandler, 0))
1720
+ XML_SetExternalParsedEntityDeclHandler(parser->parser,
1721
+ myExternalParsedEntityDeclHandler);
1722
+ if (rb_method_boundp(CLASS_OF(obj), id_internalParsedEntityDeclHandler, 0))
1723
+ XML_SetInternalParsedEntityDeclHandler(parser->parser,
1724
+ myInternalParsedEntityDeclHandler);
1725
+ #endif
1726
+ XML_SetUnknownEncodingHandler(parser->parser,
1727
+ myUnknownEncodingHandler,
1728
+ (void*)obj);
1729
+ #ifdef HAVE_XML_SETSKIPPEDENTITYHANDLER
1730
+ if (rb_method_boundp(CLASS_OF(obj), id_skippedEntityHandler, 0))
1731
+ XML_SetSkippedEntityHandler(parser->parser, mySkippedEntityHandler);
1732
+ #endif
1733
+ }
1734
+
1735
+ /* Parse from stream (probably slightly slow) */
1736
+ if (fromStream) {
1737
+ VALUE buf;
1738
+
1739
+ if (OBJ_TAINTED(str))
1740
+ taintParser(parser);
1741
+ do {
1742
+ buf = rb_funcall(str, mid, 0);
1743
+ if (!NIL_P(buf)) {
1744
+ Check_Type(buf, T_STRING);
1745
+ if (OBJ_TAINTED(buf))
1746
+ taintParser(parser);
1747
+ rb_ivar_set(obj, linebuf, buf); /* protect buf from GC (reasonable?)*/
1748
+ ret = XML_Parse(parser->parser,
1749
+ RSTRING_PTR(buf), RSTRING_LEN(buf), 0);
1750
+ }
1751
+ else {
1752
+ ret = XML_Parse(parser->parser, NULL, 0, 1);
1753
+ }
1754
+ if (!ret) {
1755
+ int err = XML_GetErrorCode(parser->parser);
1756
+ const char* errStr = XML_ErrorString(err);
1757
+ rb_raise(eXMLParserError, (char*)errStr);
1758
+ }
1759
+ } while (!NIL_P(buf));
1760
+ return Qnil;
1761
+ }
1762
+
1763
+ /* Parse string */
1764
+ if (!NIL_P(str)) {
1765
+ if (OBJ_TAINTED(str))
1766
+ taintParser(parser);
1767
+ ret = XML_Parse(parser->parser,
1768
+ RSTRING_PTR(str), RSTRING_LEN(str), final);
1769
+ }
1770
+ else
1771
+ ret = XML_Parse(parser->parser, NULL, 0, final);
1772
+ if (!ret) {
1773
+ int err = XML_GetErrorCode(parser->parser);
1774
+ const char* errStr = XML_ErrorString(err);
1775
+ rb_raise(eXMLParserError, (char*)errStr);
1776
+ }
1777
+
1778
+ return Qnil;
1779
+ }
1780
+
1781
+ /* done method */
1782
+ static VALUE
1783
+ XMLParser_done(VALUE obj)
1784
+ {
1785
+ XMLParser* parser;
1786
+
1787
+ GET_PARSER(obj, parser);
1788
+ if (parser->parser) {
1789
+ XML_ParserFree(parser->parser);
1790
+ parser->parser = NULL;
1791
+ }
1792
+ return Qnil;
1793
+ }
1794
+
1795
+ /* defaultCurrent method */
1796
+ static VALUE
1797
+ XMLParser_defaultCurrent(VALUE obj)
1798
+ {
1799
+ XMLParser* parser;
1800
+
1801
+ GET_PARSER(obj, parser);
1802
+ if (!(parser->iterator)) {
1803
+ XML_DefaultCurrent(parser->parser);
1804
+ }
1805
+ else {
1806
+ parser->defaultCurrent = 1;
1807
+ }
1808
+ return Qnil;
1809
+ }
1810
+
1811
+ /* line method */
1812
+ static VALUE
1813
+ XMLParser_getCurrentLineNumber(VALUE obj)
1814
+ {
1815
+ XMLParser* parser;
1816
+ int line;
1817
+
1818
+ GET_PARSER(obj, parser);
1819
+ line = XML_GetCurrentLineNumber(parser->parser);
1820
+
1821
+ return INT2FIX(line);
1822
+ }
1823
+
1824
+ /* column method */
1825
+ static VALUE
1826
+ XMLParser_getCurrentColumnNumber(VALUE obj)
1827
+ {
1828
+ XMLParser* parser;
1829
+ int column;
1830
+
1831
+ GET_PARSER(obj, parser);
1832
+ column = XML_GetCurrentColumnNumber(parser->parser);
1833
+
1834
+ return INT2FIX(column);
1835
+ }
1836
+
1837
+ /* byte index method */
1838
+ static VALUE
1839
+ XMLParser_getCurrentByteIndex(VALUE obj)
1840
+ {
1841
+ XMLParser* parser;
1842
+ long pos;
1843
+
1844
+ GET_PARSER(obj, parser);
1845
+ pos = XML_GetCurrentByteIndex(parser->parser);
1846
+
1847
+ return INT2FIX(pos);
1848
+ }
1849
+
1850
+ /* set URI base */
1851
+ static VALUE
1852
+ XMLParser_setBase(VALUE obj, VALUE base)
1853
+ {
1854
+ XMLParser* parser;
1855
+ int ret;
1856
+
1857
+ Check_Type(base, T_STRING);
1858
+ GET_PARSER(obj, parser);
1859
+ if (OBJ_TAINTED(base))
1860
+ taintParser(parser);
1861
+ ret = XML_SetBase(parser->parser, RSTRING_PTR(base));
1862
+
1863
+ return INT2FIX(ret);
1864
+ }
1865
+
1866
+ /* get URI base */
1867
+ static VALUE
1868
+ XMLParser_getBase(VALUE obj)
1869
+ {
1870
+ XMLParser* parser;
1871
+ const XML_Char* ret;
1872
+
1873
+ GET_PARSER(obj, parser);
1874
+ ret = XML_GetBase(parser->parser);
1875
+ if (!ret)
1876
+ return Qnil;
1877
+
1878
+ return TO_(rb_str_new2((char*)ret));
1879
+ }
1880
+
1881
+ #ifdef NEW_EXPAT
1882
+ #if 0
1883
+ static VALUE
1884
+ XMLParser_getSpecifiedAttributes(VALUE obj)
1885
+ {
1886
+ XMLParser* parser;
1887
+ int count;
1888
+ const XML_Char** atts;
1889
+ VALUE attrhash;
1890
+
1891
+ GET_PARSER(obj, parser);
1892
+ atts = parser->lastAttrs;
1893
+ if (!atts)
1894
+ return Qnil;
1895
+ count = XML_GetSpecifiedAttributeCount(parser->parser)/2;
1896
+ attrhash = rb_hash_new();
1897
+ while (*atts) {
1898
+ const char* key = *atts++;
1899
+ atts++;
1900
+ rb_hash_aset(attrhash, FO_(TO_(rb_str_new2((char*)key))),
1901
+ (count-- > 0) ? Qtrue: Qfalse);
1902
+ }
1903
+
1904
+ return attrhash;
1905
+ }
1906
+ #else
1907
+ static VALUE
1908
+ XMLParser_getSpecifiedAttributes(VALUE obj)
1909
+ {
1910
+ XMLParser* parser;
1911
+ int i, count;
1912
+ const XML_Char** atts;
1913
+ VALUE attrarray;
1914
+
1915
+ GET_PARSER(obj, parser);
1916
+ atts = parser->lastAttrs;
1917
+ if (!atts)
1918
+ return Qnil;
1919
+ count = XML_GetSpecifiedAttributeCount(parser->parser)/2;
1920
+ attrarray = rb_ary_new2(count);
1921
+ for (i = 0; i < count; i++, atts+=2) {
1922
+ const char* key = *atts;
1923
+ rb_ary_push(attrarray, TO_(rb_str_new2((char*)key)));
1924
+ }
1925
+
1926
+ return attrarray;
1927
+ }
1928
+ #endif
1929
+
1930
+ static VALUE
1931
+ XMLParser_getCurrentByteCount(VALUE obj)
1932
+ {
1933
+ XMLParser* parser;
1934
+
1935
+ GET_PARSER(obj, parser);
1936
+ return INT2FIX(XML_GetCurrentByteCount(parser->parser));
1937
+ }
1938
+ #endif
1939
+
1940
+ #ifdef XML_DTD
1941
+ static VALUE
1942
+ XMLParser_setParamEntityParsing(VALUE obj, VALUE parsing)
1943
+ {
1944
+ XMLParser* parser;
1945
+ int ret;
1946
+
1947
+ Check_Type(parsing, T_FIXNUM);
1948
+ GET_PARSER(obj, parser);
1949
+ ret = XML_SetParamEntityParsing(parser->parser, FIX2INT(parsing));
1950
+
1951
+ return INT2FIX(ret);
1952
+ }
1953
+ #endif
1954
+
1955
+ static VALUE
1956
+ XMLParser_s_expatVersion(VALUE obj)
1957
+ {
1958
+ #if defined(HAVE_EXPAT_H)
1959
+ return rb_str_new2(XML_ExpatVersion());
1960
+ #elif defined(EXPAT_1_2)
1961
+ return rb_str_new2("1.2");
1962
+ #elif defined(NEW_EXPAT)
1963
+ return rb_str_new2("1.1");
1964
+ #else
1965
+ return rb_str_new2("1.0");
1966
+ #endif
1967
+ }
1968
+
1969
+ #ifdef HAVE_EXPAT_H
1970
+ static VALUE
1971
+ XMLParser_setReturnNSTriplet(VALUE obj, VALUE do_nst)
1972
+ {
1973
+ XMLParser* parser;
1974
+ int nst;
1975
+
1976
+ GET_PARSER(obj, parser);
1977
+ switch (TYPE(do_nst)) {
1978
+ case T_TRUE:
1979
+ nst = 1;
1980
+ break;
1981
+ case T_FALSE:
1982
+ nst = 0;
1983
+ break;
1984
+ case T_FIXNUM:
1985
+ nst = FIX2INT(do_nst);
1986
+ break;
1987
+ default:
1988
+ rb_raise(rb_eTypeError, "not valid value");
1989
+ }
1990
+ XML_SetReturnNSTriplet(parser->parser, nst);
1991
+
1992
+ return Qnil;
1993
+ }
1994
+
1995
+
1996
+ static VALUE
1997
+ XMLParser_getInputContext(VALUE obj)
1998
+ {
1999
+ XMLParser* parser;
2000
+ const char* buffer;
2001
+ int offset;
2002
+ int size;
2003
+ VALUE ret = Qnil;
2004
+
2005
+ GET_PARSER(obj, parser);
2006
+ buffer = XML_GetInputContext(parser->parser,
2007
+ &offset,
2008
+ &size);
2009
+ if (buffer && size > 0) {
2010
+ ret = rb_ary_new3(2,
2011
+ TO_(rb_str_new(buffer, size)),
2012
+ INT2FIX(offset));
2013
+ }
2014
+
2015
+ return ret;
2016
+ }
2017
+
2018
+
2019
+ static VALUE
2020
+ XMLParser_getIdAttrribute(VALUE obj)
2021
+ {
2022
+ XMLParser* parser;
2023
+ int idattr;
2024
+ const XML_Char** atts;
2025
+
2026
+ GET_PARSER(obj, parser);
2027
+ atts = parser->lastAttrs;
2028
+ if (!atts)
2029
+ return Qnil;
2030
+ idattr = XML_GetIdAttributeIndex(parser->parser);
2031
+ if (idattr < 0)
2032
+ return Qnil;
2033
+ return TO_(rb_str_new2((char*)atts[idattr]));
2034
+ }
2035
+ #endif
2036
+
2037
+ #ifdef HAVE_XML_USEFOREIGNDTD
2038
+ static VALUE
2039
+ XMLParser_useForeignDTD(VALUE obj, VALUE useDTD)
2040
+ {
2041
+ XMLParser* parser;
2042
+ int dtd;
2043
+ int ret;
2044
+
2045
+ GET_PARSER(obj, parser);
2046
+ switch (TYPE(useDTD)) {
2047
+ case T_TRUE:
2048
+ dtd = 1;
2049
+ break;
2050
+ case T_FALSE:
2051
+ dtd = 0;
2052
+ break;
2053
+ case T_FIXNUM:
2054
+ dtd = FIX2INT(useDTD);
2055
+ break;
2056
+ default:
2057
+ rb_raise(rb_eTypeError, "not valid value");
2058
+ }
2059
+ ret = XML_UseForeignDTD(parser->parser, dtd);
2060
+
2061
+ return INT2FIX(ret);
2062
+ }
2063
+ #endif
2064
+
2065
+ #ifdef HAVE_XML_GETFEATURELIST
2066
+ static VALUE
2067
+ XMLParser_s_getFeatureList(VALUE obj)
2068
+ {
2069
+ const XML_Feature* list;
2070
+ VALUE ret = rb_hash_new();
2071
+
2072
+ list = XML_GetFeatureList();
2073
+ while (list && list->feature) {
2074
+ rb_hash_aset(ret, FO_(rb_str_new2(list->name)), INT2NUM(list->value));
2075
+ list++;
2076
+ }
2077
+
2078
+ return ret;
2079
+ }
2080
+ #endif
2081
+
2082
+ void
2083
+ Init_xmlparser()
2084
+ {
2085
+ VALUE mXML;
2086
+
2087
+ eXMLParserError = rb_define_class("XMLParserError", rb_eStandardError);
2088
+ cXMLParser = rb_define_class("XMLParser", rb_cObject);
2089
+ cXMLEncoding = rb_define_class("XMLEncoding", rb_cObject);
2090
+
2091
+ /* Class name aliases */
2092
+ if (rb_const_defined(rb_cObject, rb_intern("XML")) == Qtrue)
2093
+ mXML = rb_const_get(rb_cObject, rb_intern("XML"));
2094
+ else
2095
+ mXML = rb_define_module("XML");
2096
+ rb_define_const(mXML, "ParserError", eXMLParserError);
2097
+ rb_define_const(cXMLParser, "Error", eXMLParserError);
2098
+ rb_define_const(mXML, "Parser", cXMLParser);
2099
+ rb_define_const(mXML, "Encoding", cXMLEncoding);
2100
+
2101
+ rb_define_singleton_method(cXMLParser, "new", XMLParser_new, -1);
2102
+ rb_define_singleton_method(cXMLParser, "expatVersion",
2103
+ XMLParser_s_expatVersion, 0);
2104
+ rb_define_method(cXMLParser, "initialize", XMLParser_initialize, -1);
2105
+ rb_define_method(cXMLParser, "parse", XMLParser_parse, -1);
2106
+ rb_define_method(cXMLParser, "done", XMLParser_done, 0);
2107
+ rb_define_method(cXMLParser, "defaultCurrent", XMLParser_defaultCurrent, 0);
2108
+ rb_define_method(cXMLParser, "line", XMLParser_getCurrentLineNumber, 0);
2109
+ rb_define_method(cXMLParser, "column", XMLParser_getCurrentColumnNumber, 0);
2110
+ rb_define_method(cXMLParser, "byteIndex", XMLParser_getCurrentByteIndex, 0);
2111
+ rb_define_method(cXMLParser, "setBase", XMLParser_setBase, 1);
2112
+ rb_define_method(cXMLParser, "getBase", XMLParser_getBase, 0);
2113
+ #ifdef NEW_EXPAT
2114
+ rb_define_method(cXMLParser, "getSpecifiedAttributes",
2115
+ XMLParser_getSpecifiedAttributes, 0);
2116
+ rb_define_method(cXMLParser, "byteCount", XMLParser_getCurrentByteCount, 0);
2117
+ #endif
2118
+ #ifdef XML_DTD
2119
+ rb_define_method(cXMLParser, "setParamEntityParsing",
2120
+ XMLParser_setParamEntityParsing, 1);
2121
+ #endif
2122
+ #ifdef HAVE_EXPAT_H
2123
+ rb_define_method(cXMLParser, "setReturnNSTriplet",
2124
+ XMLParser_setReturnNSTriplet, 1);
2125
+ rb_define_method(cXMLParser, "getInputContext",
2126
+ XMLParser_getInputContext, 0);
2127
+ rb_define_method(cXMLParser, "getIdAttribute",
2128
+ XMLParser_getIdAttrribute, 0);
2129
+ #endif
2130
+
2131
+ #ifdef HAVE_XML_PARSERRESET
2132
+ rb_define_method(cXMLParser, "reset", XMLParser_reset, -1);
2133
+ #endif
2134
+
2135
+ rb_define_method(cXMLEncoding, "map", XMLEncoding_map, 1);
2136
+ rb_define_method(cXMLEncoding, "convert", XMLEncoding_convert, 1);
2137
+
2138
+ #ifdef HAVE_XML_USEFOREIGNDTD
2139
+ rb_define_method(cXMLParser, "useForeignDTD",
2140
+ XMLParser_useForeignDTD, 1);
2141
+ #endif
2142
+ #ifdef HAVE_XML_GETFEATURELIST
2143
+ rb_define_singleton_method(cXMLParser, "getFeatureList",
2144
+ XMLParser_s_getFeatureList, 0);
2145
+ #endif
2146
+
2147
+ #define DEFINE_EVENT_CODE(klass, name) \
2148
+ rb_define_const(klass, #name, sym##name = ID2SYM(rb_intern(#name)))
2149
+
2150
+ DEFINE_EVENT_CODE(cXMLParser, START_ELEM);
2151
+ DEFINE_EVENT_CODE(cXMLParser, END_ELEM);
2152
+ DEFINE_EVENT_CODE(cXMLParser, CDATA);
2153
+ DEFINE_EVENT_CODE(cXMLParser, PI);
2154
+ DEFINE_EVENT_CODE(cXMLParser, DEFAULT);
2155
+ DEFINE_EVENT_CODE(cXMLParser, UNPARSED_ENTITY_DECL);
2156
+ DEFINE_EVENT_CODE(cXMLParser, NOTATION_DECL);
2157
+ DEFINE_EVENT_CODE(cXMLParser, EXTERNAL_ENTITY_REF);
2158
+ #ifdef NEW_EXPAT
2159
+ DEFINE_EVENT_CODE(cXMLParser, COMMENT);
2160
+ DEFINE_EVENT_CODE(cXMLParser, START_CDATA);
2161
+ DEFINE_EVENT_CODE(cXMLParser, END_CDATA);
2162
+ DEFINE_EVENT_CODE(cXMLParser, START_NAMESPACE_DECL);
2163
+ DEFINE_EVENT_CODE(cXMLParser, END_NAMESPACE_DECL);
2164
+ #endif
2165
+ #ifdef HAVE_XML_SETSKIPPEDENTITYHANDLER
2166
+ DEFINE_EVENT_CODE(cXMLParser, SKIPPED_ENTITY);
2167
+ #endif
2168
+ #ifdef XML_DTD
2169
+ rb_define_const(cXMLParser, "PARAM_ENTITY_PARSING_NEVER",
2170
+ XML_PARAM_ENTITY_PARSING_NEVER);
2171
+ rb_define_const(cXMLParser, "PARAM_ENTITY_PARSING_UNLESS_STANDALONE",
2172
+ XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE);
2173
+ rb_define_const(cXMLParser, "PARAM_ENTITY_PARSING_ALWAYS",
2174
+ XML_PARAM_ENTITY_PARSING_ALWAYS);
2175
+ #endif
2176
+ #ifdef HAVE_XML_SETDOCTYPEDECLHANDLER
2177
+ DEFINE_EVENT_CODE(cXMLParser, START_DOCTYPE_DECL);
2178
+ DEFINE_EVENT_CODE(cXMLParser, END_DOCTYPE_DECL);
2179
+ #endif
2180
+ #ifdef HAVE_EXPAT_H
2181
+ DEFINE_EVENT_CODE(cXMLParser, ELEMENT_DECL);
2182
+ DEFINE_EVENT_CODE(cXMLParser, ATTLIST_DECL);
2183
+ DEFINE_EVENT_CODE(cXMLParser, XML_DECL);
2184
+ DEFINE_EVENT_CODE(cXMLParser, ENTITY_DECL);
2185
+ #endif
2186
+ #if 0
2187
+ DEFINE_EVENT_CODE(cXMLParser, EXTERNAL_PARSED_ENTITY_DECL);
2188
+ DEFINE_EVENT_CODE(cXMLParser, INTERNAL_PARSED_ENTITY_DECL);
2189
+ #endif
2190
+ #if 0
2191
+ DEFINE_EVENT_CODE(cXMLParser, UNKNOWN_ENCODING);
2192
+ #endif
2193
+
2194
+ id_map = rb_intern("_map");
2195
+ id_startElementHandler = rb_intern("startElement");
2196
+ id_endElementHandler = rb_intern("endElement");
2197
+ id_characterDataHandler = rb_intern("character");
2198
+ id_processingInstructionHandler = rb_intern("processingInstruction");
2199
+ id_defaultHandler = rb_intern("default");
2200
+ id_unparsedEntityDeclHandler = rb_intern("unparsedEntityDecl");
2201
+ id_notationDeclHandler = rb_intern("notationDecl");
2202
+ id_externalEntityRefHandler = rb_intern("externalEntityRef");
2203
+ #ifdef NEW_EXPAT
2204
+ id_defaultExpandHandler = rb_intern("defaultExpand");
2205
+ id_commentHandler = rb_intern("comment");
2206
+ id_startCdataSectionHandler = rb_intern("startCdata");
2207
+ id_endCdataSectionHandler = rb_intern("endCdata");
2208
+ id_startNamespaceDeclHandler = rb_intern("startNamespaceDecl");
2209
+ id_endNamespaceDeclHandler = rb_intern("endNamespaceDecl");
2210
+ id_notStandaloneHandler = rb_intern("notStandalone");
2211
+ #endif
2212
+ #ifdef HAVE_XML_SETDOCTYPEDECLHANDLER
2213
+ id_startDoctypeDeclHandler = rb_intern("startDoctypeDecl");
2214
+ id_endDoctypeDeclHandler = rb_intern("endDoctypeDecl");
2215
+ #endif
2216
+ id_unknownEncoding = rb_intern("unknownEncoding");
2217
+ id_convert = rb_intern("convert");
2218
+ #ifdef HAVE_EXPAT_H
2219
+ id_elementDeclHandler = rb_intern("elementDecl");
2220
+ id_attlistDeclHandler = rb_intern("attlistDecl");
2221
+ id_xmlDeclHandler = rb_intern("xmlDecl");
2222
+ id_entityDeclHandler = rb_intern("entityDecl");
2223
+ #endif
2224
+ #if 0
2225
+ id_externalParsedEntityDeclHandler = rb_intern("externalParsedEntityDecl");
2226
+ id_internalParsedEntityDeclHandler = rb_intern("internalParsedEntityDecl");
2227
+ #endif
2228
+ #ifdef HAVE_XML_SETSKIPPEDENTITYHANDLER
2229
+ id_skippedEntityHandler = rb_intern("skippedEntity");
2230
+ #endif
2231
+ }