xmlparser 0.6.81

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. data/MANIFEST +112 -0
  2. data/README +697 -0
  3. data/README.ja +789 -0
  4. data/Rakefile +34 -0
  5. data/ext/encoding.h +91 -0
  6. data/ext/xmlparser/mkrf_conf.rb +28 -0
  7. data/ext/xmlparser/xmlparser.c +2226 -0
  8. data/lib/sax.rb +1 -0
  9. data/lib/saxdriver.rb +1 -0
  10. data/lib/wget.rb +47 -0
  11. data/lib/xml/dom/builder-ja.rb +58 -0
  12. data/lib/xml/dom/builder.rb +310 -0
  13. data/lib/xml/dom/core.rb +3276 -0
  14. data/lib/xml/dom/digest.rb +94 -0
  15. data/lib/xml/dom/visitor.rb +182 -0
  16. data/lib/xml/dom2/attr.rb +213 -0
  17. data/lib/xml/dom2/cdatasection.rb +76 -0
  18. data/lib/xml/dom2/characterdata.rb +177 -0
  19. data/lib/xml/dom2/comment.rb +81 -0
  20. data/lib/xml/dom2/core.rb +19 -0
  21. data/lib/xml/dom2/document.rb +317 -0
  22. data/lib/xml/dom2/documentfragment.rb +82 -0
  23. data/lib/xml/dom2/documenttype.rb +102 -0
  24. data/lib/xml/dom2/dombuilder.rb +277 -0
  25. data/lib/xml/dom2/dombuilderfilter.rb +12 -0
  26. data/lib/xml/dom2/domentityresolver.rb +13 -0
  27. data/lib/xml/dom2/domentityresolverimpl.rb +37 -0
  28. data/lib/xml/dom2/domexception.rb +95 -0
  29. data/lib/xml/dom2/domimplementation.rb +61 -0
  30. data/lib/xml/dom2/dominputsource.rb +29 -0
  31. data/lib/xml/dom2/element.rb +533 -0
  32. data/lib/xml/dom2/entity.rb +110 -0
  33. data/lib/xml/dom2/entityreference.rb +107 -0
  34. data/lib/xml/dom2/namednodemap.rb +138 -0
  35. data/lib/xml/dom2/node.rb +587 -0
  36. data/lib/xml/dom2/nodelist.rb +231 -0
  37. data/lib/xml/dom2/notation.rb +86 -0
  38. data/lib/xml/dom2/processinginstruction.rb +155 -0
  39. data/lib/xml/dom2/text.rb +128 -0
  40. data/lib/xml/dom2/xpath.rb +398 -0
  41. data/lib/xml/encoding-ja.rb +42 -0
  42. data/lib/xml/parser.rb +13 -0
  43. data/lib/xml/parserns.rb +236 -0
  44. data/lib/xml/sax.rb +353 -0
  45. data/lib/xml/saxdriver.rb +370 -0
  46. data/lib/xml/xpath.rb +3284 -0
  47. data/lib/xml/xpath.ry +2352 -0
  48. data/lib/xmldigest.rb +1 -0
  49. data/lib/xmltree.rb +1 -0
  50. data/lib/xmltreebuilder.rb +1 -0
  51. data/lib/xmltreevisitor.rb +1 -0
  52. metadata +111 -0
data/Rakefile ADDED
@@ -0,0 +1,34 @@
1
+ #!/usr/bin/ruby
2
+ require 'rubygems'
3
+ require 'rake'
4
+ require 'rake/testtask'
5
+ require 'rake/clean'
6
+ require 'rake/gempackagetask'
7
+ require 'rake/rdoctask'
8
+ require 'mkrf/rakehelper'
9
+ require 'fileutils'
10
+ include FileUtils
11
+
12
+ setup_clean ["pkg", "lib/*.bundle", "*.gem", ".config", "ext/**/Rakefile"]
13
+
14
+ setup_extension('xmlparser','xmlparser')
15
+
16
+ desc "Does a full compile"
17
+ task :default => [:xmlparser]
18
+
19
+ task 'extension' => :default
20
+ version = "0.6.81"
21
+ name = "xmlparser"
22
+
23
+ setup_gem(name, version) do |spec|
24
+ spec.summary = "Ruby bindings to the Expat XML parsing library"
25
+ spec.description = spec.summary
26
+ spec.author = "Yoshida Mataso with Jeff Hodges"
27
+ spec.add_dependency("mkrf", ">=0.2.1")
28
+ spec.extensions << "Rakefile"
29
+ spec.files = FileList["lib/**/*","ext/**/*"].exclude("rdoc").to_a
30
+ spec.has_rdoc = false
31
+ spec.extra_rdoc_files=['README', 'README.ja', 'MANIFEST', 'samples']
32
+ spec.rubyforge_project = 'xmlparser'
33
+ end
34
+
data/ext/encoding.h ADDED
@@ -0,0 +1,91 @@
1
+ /*****************************************************************
2
+ ** encoding.h
3
+ **
4
+ ** Copyright 1998 Clark Cooper
5
+ ** All rights reserved.
6
+ **
7
+ ** This program is free software; you can redistribute it and/or
8
+ ** modify it under the same terms as Perl itself.
9
+ */
10
+
11
+ #ifndef ENCODING_H
12
+ #define ENCODING_H 1
13
+
14
+ #define ENCMAP_MAGIC 0xfeebface
15
+
16
+ typedef struct prefixmap {
17
+ unsigned char min;
18
+ unsigned char len; /* 0 => 256 */
19
+ unsigned short bmap_start;
20
+ unsigned char ispfx[32];
21
+ unsigned char ischar[32];
22
+ } PrefixMap;
23
+
24
+ typedef struct encinf
25
+ {
26
+ unsigned short prefixes_size;
27
+ unsigned short bytemap_size;
28
+ int firstmap[256];
29
+ PrefixMap *prefixes;
30
+ unsigned short *bytemap;
31
+ } Encinfo;
32
+
33
+ typedef struct encmaphdr
34
+ {
35
+ unsigned int magic;
36
+ char name[40];
37
+ unsigned short pfsize;
38
+ unsigned short bmsize;
39
+ int map[256];
40
+ } Encmap_Header;
41
+
42
+ /*================================================================
43
+ ** Structure of Encoding map binary encoding
44
+ **
45
+ ** Note that all shorts and ints are in network order,
46
+ ** so when packing or unpacking with perl, use 'n' and 'N' respectively.
47
+ ** In C, use the htonl family of functions.
48
+ **
49
+ ** The basic structure is:
50
+ **
51
+ ** _______________________
52
+ ** |Header (including map expat needs for 1st byte)
53
+ ** |PrefixMap * pfsize
54
+ ** | This section isn't included for single-byte encodings.
55
+ ** | For multiple byte encodings, when a byte represents a prefix
56
+ ** | then it indexes into this vector instead of mapping to a
57
+ ** | Unicode character. The PrefixMap type is declared above. The
58
+ ** | ispfx and ischar fields are bitvectors indicating whether
59
+ ** | the byte being mapped is a prefix or character respectively.
60
+ ** | If neither is set, then the character is not mapped to Unicode.
61
+ ** |
62
+ ** | The min field is the 1st byte mapped for this prefix; the
63
+ ** | len field is the number of bytes mapped; and bmap_start is
64
+ ** | the starting index of the map for this prefix in the overall
65
+ ** | map (next section).
66
+ ** |unsigned short * bmsize
67
+ ** | This section also is omitted for single-byte encodings.
68
+ ** | Each short is either a Unicode scalar or an index into the
69
+ ** | PrefixMap vector.
70
+ **
71
+ ** The header for these files is declared above as the Encmap_Header type.
72
+ ** The magic field is a magic number which should match the ENCMAP_MAGIC
73
+ ** macro above. The next 40 bytes stores IANA registered name for the
74
+ ** encoding. The pfsize field holds the number of PrefixMaps, which should
75
+ ** be zero for single byte encodings. The bmsize field holds the number of
76
+ ** shorts used for the overall map.
77
+ **
78
+ ** The map field contains either the Unicode scalar encoded by the 1st byte
79
+ ** or -n where n is the number of bytes that such a 1st byte implies (Expat
80
+ ** requires that the number of bytes to encode a character is indicated by
81
+ ** the 1st byte) or -1 if the byte doesn't map to any Unicode character.
82
+ **
83
+ ** If the encoding is a multiple byte encoding, then there will be PrefixMap
84
+ ** and character map sections. The 1st PrefixMap (index 0), covers a range
85
+ ** of bytes that includes all 1st byte prefixes.
86
+ **
87
+ ** Look at convert_to_unicode in Expat.xs to see how this data structure
88
+ ** is used.
89
+ */
90
+
91
+ #endif /* ndef ENCODING_H */
@@ -0,0 +1,28 @@
1
+ #!/usr/bin/ruby
2
+
3
+ require 'rubygems'
4
+ require 'mkrf'
5
+
6
+ def crash(str)
7
+ printf(" extconf failure: %s\n", str)
8
+ exit 1
9
+ end
10
+
11
+ Mkrf::Generator.new('xmlparser') do |g|
12
+ if g.include_header('expat.h', '/opt/include','/usr/local/include','/usr/include')
13
+ g.include_library('expat',function='XML_ParserCreate', '/opt/lib', '/usr/local/lib', '/usr/lib')
14
+ elsif g.include_header('libxmltok', '/opt/include','/usr/local/include','/usr/include')
15
+ g.include_library('xmltok', function='XML_ParserCreate', '/opt/lib', '/usr/local/lib', '/usr/lib')
16
+ end
17
+ if g.has_function?("XML_SetNotStandaloneHandler")
18
+ #g.cflags << ' -DNEW_EXPAT'
19
+ g.add_define('NEW_EXPAT')
20
+ end
21
+ if g.has_function?("XML_SetParamEntityParsing")
22
+ #g.cflags << ' -DXML_DTD'
23
+ g.add_define('XML_DTD')
24
+ end
25
+
26
+ #g.include_library("socket", function="ntohl")
27
+ g.include_library("wsock32") if RUBY_PLATFORM =~ /mswin32|mingw/
28
+ end
@@ -0,0 +1,2226 @@
1
+ /*
2
+ * Expat (XML Parser Toolkit) wrapper for Ruby
3
+ * Feb 16, 2004 yoshidam version 0.6.8 taint output string
4
+ * Feb 16, 2004 yoshidam version 0.6.7 fix buffer overflow
5
+ * Mar 11, 2003 yoshidam version 0.6.6 fix skippedEntity handler
6
+ * Sep 20, 2002 yoshidam version 0.6.5 fix reset method
7
+ * Apr 4, 2002 yoshidam version 0.6.3 change event code values
8
+ * Oct 10, 2000 yoshidam version 0.6.1 support expat-1.2
9
+ * Oct 6, 2000 yoshidam version 0.6.0 support expat-1.95.0
10
+ * Jun 28, 1999 yoshidam version 0.5.18 define initialize for Ruby 1.5
11
+ * Jun 28, 1999 yoshidam version 0.5.15 support start/endDoctypeDecl
12
+ * Jun 28, 1999 yoshidam version 0.5.14 support setParamEntityParsing
13
+ * Apr 28, 1999 yoshidam version 0.5.11 support notStandalone
14
+ * Mar 29, 1998 yoshidam version 0.5.9 optimize for Ruby 1.3
15
+ * Mar 8, 1998 yoshidam version 0.5.7 support start/endNamespaceDecl
16
+ * Jan 14, 1998 yoshidam version 0.5.4 support start/endCdataSection
17
+ * Jan 10, 1998 yoshidam version 0.5.3 support encoding map
18
+ * Nov 24, 1998 yoshidam version 0.5.0 support TEST version of expat
19
+ * Nov 5, 1998 yoshidam version 0.4.18 mIDs are initialized in Init_xmlparser
20
+ * Oct 28, 1998 yoshidam version 0.4.17 mIDs are stored into static vars
21
+ * Oct 13, 1998 yoshidam version 0.4.12 debug and speed up myEncodingConv
22
+ * Oct 7, 1998 yoshidam version 0.4.11 hold internal object into ivar
23
+ * Sep 18, 1998 yoshidam version 0.4.6
24
+ * Sep 8, 1998 yoshidam version 0.4.4
25
+ * Sep 3, 1998 yoshidam version 0.4.3
26
+ * Sep 1, 1998 yoshidam version 0.4.2
27
+ * Aug 28, 1998 yoshidam version 0.4.1
28
+ * Aug 22, 1998 yoshidam version 0.4.0
29
+ * Jul 6, 1998 yoshidam version 0.2
30
+ * Jun 30, 1998 yoshidam version 0.1
31
+ *
32
+ * XML_ENC_PATH: path of encoding map for Perl
33
+ * HAVE_XML_USEFOREIGNDTD: expat 1.95.5
34
+ * HAVE_XML_GETFEATURELIST: expat 1.95.5
35
+ * HAVE_XML_SETSKIPPEDENTITYHANDLER: expat 1.95.4
36
+ * HAVE_XML_PARSERRESET: expat 1.95.3
37
+ * HAVE_EXPAT_H: expat 1.95.0
38
+ * HAVE_XML_SETDOCTYPEDECLHANDLER: expat 19990728
39
+ * XML_DTD: expat 19990626
40
+ * NEW_EXPAT: expat 1.1
41
+ */
42
+
43
+ #include "ruby.h"
44
+ #include "rubyio.h"
45
+ #include <stdio.h>
46
+ #include <ctype.h>
47
+ #ifdef HAVE_EXPAT_H
48
+ # include "expat.h"
49
+ #else
50
+ # include "xmlparse.h"
51
+ #endif
52
+ #ifdef XML_ENC_PATH
53
+ # include <limits.h>
54
+ # include <sys/stat.h>
55
+ # include "encoding.h"
56
+ # ifndef PATH_MAX
57
+ # define PATH_MAX 256
58
+ # endif
59
+ #endif
60
+
61
+ static VALUE eXMLParserError;
62
+ static VALUE cXMLParser;
63
+ static VALUE cXMLEncoding;
64
+ static ID id_map;
65
+ static ID id_startElementHandler;
66
+ static ID id_endElementHandler;
67
+ static ID id_characterDataHandler;
68
+ static ID id_processingInstructionHandler;
69
+ static ID id_defaultHandler;
70
+ static ID id_defaultExpandHandler;
71
+ static ID id_unparsedEntityDeclHandler;
72
+ static ID id_notationDeclHandler;
73
+ static ID id_externalEntityRefHandler;
74
+ static ID id_unknownEncoding;
75
+ static ID id_convert;
76
+ #ifdef NEW_EXPAT
77
+ static ID id_commentHandler;
78
+ static ID id_startCdataSectionHandler;
79
+ static ID id_endCdataSectionHandler;
80
+ static ID id_startNamespaceDeclHandler;
81
+ static ID id_endNamespaceDeclHandler;
82
+ static ID id_notStandaloneHandler;
83
+ #endif
84
+ #ifdef HAVE_XML_SETDOCTYPEDECLHANDLER
85
+ static ID id_startDoctypeDeclHandler;
86
+ static ID id_endDoctypeDeclHandler;
87
+ #endif
88
+ #ifdef HAVE_EXPAT_H
89
+ static ID id_elementDeclHandler;
90
+ static ID id_attlistDeclHandler;
91
+ static ID id_xmlDeclHandler;
92
+ static ID id_entityDeclHandler;
93
+ #endif
94
+ #if 0
95
+ static ID id_externalParsedEntityDeclHandler;
96
+ static ID id_internalParsedEntityDeclHandler;
97
+ #endif
98
+ #ifdef HAVE_XML_SETSKIPPEDENTITYHANDLER
99
+ static ID id_skippedEntityHandler;
100
+ #endif
101
+
102
+ #define GET_PARSER(obj, parser) \
103
+ Data_Get_Struct(obj, XMLParser, parser)
104
+
105
+ typedef struct _XMLParser {
106
+ XML_Parser parser;
107
+ int iterator;
108
+ int defaultCurrent;
109
+ #ifdef NEW_EXPAT
110
+ const XML_Char** lastAttrs;
111
+ #endif
112
+ int tainted;
113
+ VALUE parent;
114
+ char* context;
115
+ } XMLParser;
116
+
117
+ static VALUE symDEFAULT;
118
+ static VALUE symSTART_ELEM;
119
+ static VALUE symEND_ELEM;
120
+ static VALUE symCDATA;
121
+ static VALUE symPI;
122
+ static VALUE symUNPARSED_ENTITY_DECL;
123
+ static VALUE symNOTATION_DECL;
124
+ static VALUE symEXTERNAL_ENTITY_REF;
125
+ #ifdef NEW_EXPAT
126
+ static VALUE symCOMMENT;
127
+ static VALUE symSTART_CDATA;
128
+ static VALUE symEND_CDATA;
129
+ static VALUE symSTART_NAMESPACE_DECL;
130
+ static VALUE symEND_NAMESPACE_DECL;
131
+ #endif
132
+ #ifdef HAVE_XML_SETDOCTYPEDECLHANDLER
133
+ static VALUE symSTART_DOCTYPE_DECL;
134
+ static VALUE symEND_DOCTYPE_DECL;
135
+ #endif
136
+ #ifdef HAVE_EXPAT_H
137
+ static VALUE symELEMENT_DECL;
138
+ static VALUE symATTLIST_DECL;
139
+ static VALUE symXML_DECL;
140
+ static VALUE symENTITY_DECL;
141
+ #endif
142
+ #if 0
143
+ static VALUE symEXTERNAL_PARSED_ENTITY_DECL;
144
+ static VALUE symINTERNAL_PARSED_ENTITY_DECL;
145
+ #endif
146
+ #if 0
147
+ static VALUE symUNKNOWN_ENCODING;
148
+ #endif
149
+ #ifdef HAVE_XML_SETSKIPPEDENTITYHANDLER
150
+ static VALUE symSKIPPED_ENTITY;
151
+ #endif
152
+
153
+ /* destructor */
154
+ static void
155
+ XMLParser_free(XMLParser* parser)
156
+ {
157
+ /* fprintf(stderr, "Delete XMLParser: %p->%p\n", parser, parser->parser);*/
158
+ if (parser->parser) {
159
+ XML_ParserFree(parser->parser);
160
+ parser->parser = NULL;
161
+ }
162
+ free(parser);
163
+ }
164
+
165
+ static void
166
+ XMLParser_mark(XMLParser* parser)
167
+ {
168
+ /* fprintf(stderr, "Mark XMLParser: %p->%p\n", parser, parser->parser);*/
169
+ if (!NIL_P(parser->parent)) {
170
+ XMLParser* parent;
171
+ GET_PARSER(parser->parent, parent);
172
+ rb_gc_mark(parser->parent);
173
+ }
174
+ }
175
+
176
+ static void
177
+ taintParser(XMLParser* parser) {
178
+ parser->tainted |= 1;
179
+ if (!NIL_P(parser->parent) && !parser->context) {
180
+ XMLParser* parent;
181
+ GET_PARSER(parser->parent, parent);
182
+ taintParser(parent);
183
+ }
184
+ }
185
+
186
+ inline static VALUE
187
+ taintObject(XMLParser* parser, VALUE obj) {
188
+ if (parser->tainted)
189
+ OBJ_TAINT(obj);
190
+ return obj;
191
+ }
192
+ #define TO_(o) (taintObject(parser, o))
193
+
194
+ inline static VALUE
195
+ freezeObject(VALUE obj) {
196
+ OBJ_FREEZE(obj);
197
+ return obj;
198
+ }
199
+ #define FO_(o) (freezeObject(o))
200
+
201
+
202
+ /* Event handlers for iterator */
203
+ static void
204
+ iterStartElementHandler(void *recv,
205
+ const XML_Char *name, const XML_Char **atts)
206
+ {
207
+ XMLParser* parser;
208
+ VALUE attrhash;
209
+
210
+ GET_PARSER(recv, parser);
211
+ #ifdef NEW_EXPAT
212
+ parser->lastAttrs = atts;
213
+ #endif
214
+ attrhash = rb_hash_new();
215
+ while (*atts) {
216
+ const char* key = *atts++;
217
+ const char* val = *atts++;
218
+ rb_hash_aset(attrhash,
219
+ FO_(TO_(rb_str_new2((char*)key))),
220
+ TO_(rb_str_new2((char*)val)));
221
+ }
222
+
223
+ rb_yield(rb_ary_new3(4, symSTART_ELEM,
224
+ TO_(rb_str_new2((char*)name)), attrhash, recv));
225
+ if (parser->defaultCurrent) {
226
+ parser->defaultCurrent = 0;
227
+ XML_DefaultCurrent(parser->parser);
228
+ }
229
+ }
230
+
231
+ static void
232
+ iterEndElementHandler(void *recv,
233
+ const XML_Char *name)
234
+ {
235
+ XMLParser* parser;
236
+ GET_PARSER(recv, parser);
237
+ rb_yield(rb_ary_new3(4, symEND_ELEM,
238
+ TO_(rb_str_new2((char*)name)), Qnil, recv));
239
+ if (parser->defaultCurrent) {
240
+ parser->defaultCurrent = 0;
241
+ XML_DefaultCurrent(parser->parser);
242
+ }
243
+ }
244
+
245
+ static void
246
+ iterCharacterDataHandler(void *recv,
247
+ const XML_Char *s,
248
+ int len)
249
+ {
250
+ XMLParser* parser;
251
+ GET_PARSER(recv, parser);
252
+ rb_yield(rb_ary_new3(4, symCDATA,
253
+ Qnil, TO_(rb_str_new((char*)s, len)), recv));
254
+ if (parser->defaultCurrent) {
255
+ parser->defaultCurrent = 0;
256
+ XML_DefaultCurrent(parser->parser);
257
+ }
258
+ }
259
+
260
+ static void
261
+ iterProcessingInstructionHandler(void *recv,
262
+ const XML_Char *target,
263
+ const XML_Char *data)
264
+ {
265
+ XMLParser* parser;
266
+ GET_PARSER(recv, parser);
267
+ rb_yield(rb_ary_new3(4, symPI,
268
+ TO_(rb_str_new2((char*)target)),
269
+ TO_(rb_str_new2((char*)data)), recv));
270
+ if (parser->defaultCurrent) {
271
+ parser->defaultCurrent = 0;
272
+ XML_DefaultCurrent(parser->parser);
273
+ }
274
+ }
275
+
276
+ static void
277
+ iterDefaultHandler(void *recv,
278
+ const XML_Char *s,
279
+ int len)
280
+ {
281
+ XMLParser* parser;
282
+ GET_PARSER(recv, parser);
283
+ rb_yield(rb_ary_new3(4, symDEFAULT,
284
+ Qnil, TO_(rb_str_new((char*)s, len)), recv));
285
+ if (parser->defaultCurrent) {
286
+ parser->defaultCurrent = 0;
287
+ /* XML_DefaultCurrent shoould not call in defaultHandler */
288
+ /* XML_DefaultCurrent(parser->parser); */
289
+ }
290
+ }
291
+
292
+ void
293
+ iterUnparsedEntityDeclHandler(void *recv,
294
+ const XML_Char *entityName,
295
+ const XML_Char *base,
296
+ const XML_Char *systemId,
297
+ const XML_Char *publicId,
298
+ const XML_Char *notationName)
299
+ {
300
+ XMLParser* parser;
301
+ VALUE valary;
302
+
303
+ GET_PARSER(recv, parser);
304
+ valary = rb_ary_new3(4, (base ? TO_(rb_str_new2((char*)base)) : Qnil),
305
+ TO_(rb_str_new2((char*)systemId)),
306
+ (publicId ? TO_(rb_str_new2((char*)publicId)) : Qnil),
307
+ TO_(rb_str_new2((char*)notationName)));
308
+ rb_yield(rb_ary_new3(4, symUNPARSED_ENTITY_DECL,
309
+ TO_(rb_str_new2((char*)entityName)),
310
+ valary, recv));
311
+ if (parser->defaultCurrent) {
312
+ parser->defaultCurrent = 0;
313
+ XML_DefaultCurrent(parser->parser);
314
+ }
315
+ }
316
+
317
+ void
318
+ iterNotationDeclHandler(void *recv,
319
+ const XML_Char *notationName,
320
+ const XML_Char *base,
321
+ const XML_Char *systemId,
322
+ const XML_Char *publicId)
323
+ {
324
+ XMLParser* parser;
325
+ VALUE valary;
326
+
327
+ GET_PARSER(recv, parser);
328
+ valary = rb_ary_new3(3,
329
+ (base ? TO_(rb_str_new2((char*)base)) : Qnil),
330
+ (systemId ? TO_(rb_str_new2((char*)systemId)) : Qnil),
331
+ (publicId ? TO_(rb_str_new2((char*)publicId)) : Qnil));
332
+ rb_yield(rb_ary_new3(4, symNOTATION_DECL,
333
+ TO_(rb_str_new2((char*)notationName)),
334
+ valary, recv));
335
+ if (parser->defaultCurrent) {
336
+ parser->defaultCurrent = 0;
337
+ XML_DefaultCurrent(parser->parser);
338
+ }
339
+ }
340
+
341
+ int
342
+ iterExternalEntityRefHandler(XML_Parser xmlparser,
343
+ const XML_Char *context,
344
+ const XML_Char *base,
345
+ const XML_Char *systemId,
346
+ const XML_Char *publicId)
347
+ {
348
+ XMLParser* parser;
349
+ VALUE recv;
350
+ VALUE valary;
351
+ VALUE ret;
352
+
353
+ recv = (VALUE)XML_GetUserData(xmlparser);
354
+ GET_PARSER(recv, parser);
355
+ valary = rb_ary_new3(3,
356
+ (base ? TO_(rb_str_new2((char*)base)) : Qnil),
357
+ (systemId ? TO_(rb_str_new2((char*)systemId)) : Qnil),
358
+ (publicId ? TO_(rb_str_new2((char*)publicId)) : Qnil));
359
+ ret = rb_yield(rb_ary_new3(4, symEXTERNAL_ENTITY_REF,
360
+ (context ? TO_(rb_str_new2((char*)context)) : Qnil),
361
+ valary, recv));
362
+ if (parser->defaultCurrent) {
363
+ parser->defaultCurrent = 0;
364
+ XML_DefaultCurrent(parser->parser);
365
+ }
366
+ /* The error status in this iterator block should be returned
367
+ by the exception. */
368
+ return 1;
369
+ }
370
+
371
+ #ifdef NEW_EXPAT
372
+ static void
373
+ iterCommentHandler(void *recv,
374
+ const XML_Char *s)
375
+ {
376
+ XMLParser* parser;
377
+ GET_PARSER(recv, parser);
378
+ rb_yield(rb_ary_new3(4, symCOMMENT,
379
+ Qnil, TO_(rb_str_new2((char*)s)), recv));
380
+ if (parser->defaultCurrent) {
381
+ parser->defaultCurrent = 0;
382
+ XML_DefaultCurrent(parser->parser);
383
+ }
384
+ }
385
+
386
+ static void
387
+ iterStartCdataSectionHandler(void *recv)
388
+ {
389
+ XMLParser* parser;
390
+ GET_PARSER(recv, parser);
391
+ rb_yield(rb_ary_new3(4, symSTART_CDATA, Qnil, Qnil, recv));
392
+ if (parser->defaultCurrent) {
393
+ parser->defaultCurrent = 0;
394
+ XML_DefaultCurrent(parser->parser);
395
+ }
396
+ }
397
+
398
+ static void
399
+ iterEndCdataSectionHandler(void *recv)
400
+ {
401
+ XMLParser* parser;
402
+ GET_PARSER(recv, parser);
403
+ rb_yield(rb_ary_new3(4, symEND_CDATA, Qnil, Qnil, recv));
404
+ if (parser->defaultCurrent) {
405
+ parser->defaultCurrent = 0;
406
+ XML_DefaultCurrent(parser->parser);
407
+ }
408
+ }
409
+
410
+ static void
411
+ iterStartNamespaceDeclHandler(void *recv,
412
+ const XML_Char *prefix,
413
+ const XML_Char *uri)
414
+ {
415
+ XMLParser* parser;
416
+ GET_PARSER(recv, parser);
417
+ rb_yield(rb_ary_new3(4, symSTART_NAMESPACE_DECL,
418
+ (prefix ? TO_(rb_str_new2((char*)prefix)) : Qnil),
419
+ (uri ? TO_(rb_str_new2((char*)uri)) : Qnil), recv));
420
+ if (parser->defaultCurrent) {
421
+ parser->defaultCurrent = 0;
422
+ XML_DefaultCurrent(parser->parser);
423
+ }
424
+ }
425
+
426
+ static void
427
+ iterEndNamespaceDeclHandler(void *recv,
428
+ const XML_Char *prefix)
429
+ {
430
+ XMLParser* parser;
431
+ GET_PARSER(recv, parser);
432
+ rb_yield(rb_ary_new3(4, symEND_NAMESPACE_DECL,
433
+ (prefix ? TO_(rb_str_new2((char*)prefix)) : Qnil),
434
+ Qnil, recv));
435
+ if (parser->defaultCurrent) {
436
+ parser->defaultCurrent = 0;
437
+ XML_DefaultCurrent(parser->parser);
438
+ }
439
+ }
440
+ #endif
441
+
442
+ #ifdef HAVE_XML_SETPARAMENTITYPARSING
443
+ static void
444
+ #ifdef HAVE_EXPAT_H
445
+ iterStartDoctypeDeclHandler(void *recv,
446
+ const XML_Char *doctypeName,
447
+ const XML_Char *sysid,
448
+ const XML_Char *pubid,
449
+ int has_internal_subset)
450
+ #else
451
+ iterStartDoctypeDeclHandler(void *recv,
452
+ const XML_Char *doctypeName)
453
+ #endif
454
+ {
455
+ XMLParser* parser;
456
+ VALUE valary = Qnil;
457
+
458
+ GET_PARSER(recv, parser);
459
+ #ifdef HAVE_EXPAT_H
460
+ valary = rb_ary_new3(3,
461
+ (sysid ? TO_(rb_str_new2((char*)sysid)) : Qnil),
462
+ (pubid ? TO_(rb_str_new2((char*)pubid)) : Qnil),
463
+ (has_internal_subset ? Qtrue : Qfalse));
464
+ #endif
465
+ rb_yield(rb_ary_new3(4, symSTART_DOCTYPE_DECL,
466
+ TO_(rb_str_new2((char*)doctypeName)),
467
+ valary, recv));
468
+ if (parser->defaultCurrent) {
469
+ parser->defaultCurrent = 0;
470
+ XML_DefaultCurrent(parser->parser);
471
+ }
472
+ }
473
+
474
+ static void
475
+ iterEndDoctypeDeclHandler(void *recv)
476
+ {
477
+ XMLParser* parser;
478
+ GET_PARSER(recv, parser);
479
+ rb_yield(rb_ary_new3(4, symEND_DOCTYPE_DECL,
480
+ Qnil,
481
+ Qnil, recv));
482
+ if (parser->defaultCurrent) {
483
+ parser->defaultCurrent = 0;
484
+ XML_DefaultCurrent(parser->parser);
485
+ }
486
+ }
487
+ #endif
488
+
489
+
490
+ #ifdef HAVE_EXPAT_H
491
+
492
+ static VALUE
493
+ makeContentArray(XMLParser* parser, XML_Content* model)
494
+ {
495
+ static const char* content_type_name[] = {
496
+ NULL, "EMPTY", "ANY", "MIXED", "NAME", "CHOICE", "SEQ"
497
+ };
498
+ static const char* content_quant_name[] = {
499
+ "", "?", "*", "+"
500
+ };
501
+ int i;
502
+ VALUE children = Qnil;
503
+ const char* type_name = content_type_name[model->type];
504
+ const char* quant_name = content_quant_name[model->quant];
505
+ VALUE ret = rb_ary_new3(3,
506
+ TO_(rb_str_new2((char*)type_name)),
507
+ TO_(rb_str_new2((char*)quant_name)),
508
+ (model->name ? TO_(rb_str_new2((char*)model->name)) :
509
+ Qnil));
510
+ if (model->numchildren > 0) {
511
+ children = rb_ary_new();
512
+ for (i =0; i < model->numchildren; i++) {
513
+ VALUE child = makeContentArray(parser, model->children + i);
514
+ rb_ary_push(children, child);
515
+ }
516
+ }
517
+ rb_ary_push(ret, children);
518
+ return ret;
519
+ }
520
+
521
+
522
+
523
+ static void
524
+ iterElementDeclHandler(void *recv,
525
+ const XML_Char *name,
526
+ XML_Content *model)
527
+ {
528
+ XMLParser* parser;
529
+ GET_PARSER(recv, parser);
530
+ VALUE content = makeContentArray(parser, model);
531
+ rb_yield(rb_ary_new3(4, symELEMENT_DECL,
532
+ TO_(rb_str_new2(name)),
533
+ content, recv));
534
+ if (parser->defaultCurrent) {
535
+ parser->defaultCurrent = 0;
536
+ XML_DefaultCurrent(parser->parser);
537
+ }
538
+ }
539
+
540
+ static void
541
+ iterAttlistDeclHandler (void *recv,
542
+ const XML_Char *elname,
543
+ const XML_Char *attname,
544
+ const XML_Char *att_type,
545
+ const XML_Char *dflt,
546
+ int isrequired)
547
+ {
548
+ XMLParser* parser;
549
+ VALUE valary;
550
+
551
+ GET_PARSER(recv, parser);
552
+ valary = rb_ary_new3(4,
553
+ TO_(rb_str_new2((char*)attname)),
554
+ TO_(rb_str_new2((char*)att_type)),
555
+ (dflt ? TO_(rb_str_new2((char*)dflt)) : Qnil),
556
+ (isrequired ? Qtrue : Qfalse));
557
+ rb_yield(rb_ary_new3(4, symATTLIST_DECL,
558
+ TO_(rb_str_new2(elname)),
559
+ valary, recv));
560
+ if (parser->defaultCurrent) {
561
+ parser->defaultCurrent = 0;
562
+ XML_DefaultCurrent(parser->parser);
563
+ }
564
+ }
565
+
566
+ static void
567
+ iterXmlDeclHandler (void *recv,
568
+ const XML_Char *version,
569
+ const XML_Char *encoding,
570
+ int standalone)
571
+ {
572
+ XMLParser* parser;
573
+ VALUE valary;
574
+
575
+ GET_PARSER(recv, parser);
576
+ valary = rb_ary_new3(3,
577
+ (version ? TO_(rb_str_new2(version)) : Qnil),
578
+ (encoding ? TO_(rb_str_new2((char*)encoding)) : Qnil),
579
+ INT2FIX(standalone));
580
+ rb_yield(rb_ary_new3(4, symXML_DECL,
581
+ Qnil,
582
+ valary, recv));
583
+ if (parser->defaultCurrent) {
584
+ parser->defaultCurrent = 0;
585
+ XML_DefaultCurrent(parser->parser);
586
+ }
587
+ }
588
+
589
+ static void
590
+ iterEntityDeclHandler (void *recv,
591
+ const XML_Char *entityName,
592
+ int is_parameter_entity,
593
+ const XML_Char *value,
594
+ int value_length,
595
+ const XML_Char *base,
596
+ const XML_Char *systemId,
597
+ const XML_Char *publicId,
598
+ const XML_Char *notationName)
599
+ {
600
+ XMLParser* parser;
601
+ VALUE valary;
602
+
603
+ GET_PARSER(recv, parser);
604
+ valary = rb_ary_new3(6,
605
+ (is_parameter_entity ? Qtrue : Qfalse),
606
+ TO_(rb_str_new((char*)value, value_length)),
607
+ (base ? TO_(rb_str_new2((char*)base)) : Qnil),
608
+ (systemId ? TO_(rb_str_new2((char*)systemId)) : Qnil),
609
+ (publicId ? TO_(rb_str_new2((char*)publicId)) : Qnil),
610
+ (notationName ? TO_(rb_str_new2((char*)notationName))
611
+ : Qnil));
612
+ rb_yield(rb_ary_new3(4, symENTITY_DECL,
613
+ TO_(rb_str_new2(entityName)),
614
+ valary, recv));
615
+ if (parser->defaultCurrent) {
616
+ parser->defaultCurrent = 0;
617
+ XML_DefaultCurrent(parser->parser);
618
+ }
619
+ }
620
+
621
+ #endif
622
+
623
+ #if 0
624
+ static void
625
+ iterExternalParsedEntityDeclHandler(void *recv,
626
+ const XML_Char *entityName,
627
+ const XML_Char *base,
628
+ const XML_Char *systemId,
629
+ const XML_Char *publicId)
630
+ {
631
+ XMLParser* parser;
632
+ VALUE valary;
633
+
634
+ GET_PARSER(recv, parser);
635
+ valary = rb_ary_new3(3, (base ? TO_(rb_str_new2((char*)base)) : Qnil),
636
+ TO_(rb_str_new2((char*)systemId)),
637
+ (publicId ? TO_(rb_str_new2((char*)publicId)) : Qnil));
638
+ rb_yield(rb_ary_new3(4, symEXTERNAL_PARSED_ENTITY_DECL,
639
+ TO_(rb_str_new2((char*)entityName)),
640
+ valary, recv));
641
+ if (parser->defaultCurrent) {
642
+ parser->defaultCurrent = 0;
643
+ XML_DefaultCurrent(parser->parser);
644
+ }
645
+ }
646
+
647
+ static void
648
+ iterInternalParsedEntityDeclHandler(void *recv,
649
+ const XML_Char *entityName,
650
+ const XML_Char *replacementText,
651
+ int replacementTextLength)
652
+ {
653
+ XMLParser* parser;
654
+ GET_PARSER(recv, parser);
655
+ rb_yield(rb_ary_new3(4, symINTERNAL_PARSED_ENTITY_DECL,
656
+ TO_(rb_str_new2((char*)entityName)),
657
+ TO_(rb_str_new((char*)replacementText,
658
+ replacementTextLength)), recv));
659
+ if (parser->defaultCurrent) {
660
+ parser->defaultCurrent = 0;
661
+ XML_DefaultCurrent(parser->parser);
662
+ }
663
+ }
664
+ #endif
665
+
666
+ #ifdef HAVE_XML_SETSKIPPEDENTITYHANDLER
667
+ static void
668
+ iterSkippedEntityHandler(void *recv,
669
+ const XML_Char *entityName,
670
+ int is_parameter_entity)
671
+ {
672
+ XMLParser* parser;
673
+ GET_PARSER(recv, parser);
674
+ rb_yield(rb_ary_new3(4, symSKIPPED_ENTITY,
675
+ TO_(rb_str_new2((char*)entityName)),
676
+ INT2FIX(is_parameter_entity), recv));
677
+ if (parser->defaultCurrent) {
678
+ parser->defaultCurrent = 0;
679
+ XML_DefaultCurrent(parser->parser);
680
+ }
681
+ }
682
+ #endif
683
+
684
+
685
+
686
+ /* Event handlers for instance method */
687
+ static void
688
+ myStartElementHandler(void *recv,
689
+ const XML_Char *name, const XML_Char **atts)
690
+ {
691
+ XMLParser* parser;
692
+ VALUE attrhash;
693
+
694
+ GET_PARSER(recv, parser);
695
+ #ifdef NEW_EXPAT
696
+ parser->lastAttrs = atts;
697
+ #endif
698
+ attrhash = rb_hash_new();
699
+ while (*atts) {
700
+ const char* key = *atts++;
701
+ const char* val = *atts++;
702
+ rb_hash_aset(attrhash,
703
+ FO_(TO_(rb_str_new2((char*)key))),
704
+ TO_(rb_str_new2((char*)val)));
705
+ }
706
+ rb_funcall((VALUE)recv, id_startElementHandler, 2,
707
+ TO_(rb_str_new2((char*)name)), attrhash);
708
+ }
709
+
710
+ static void
711
+ myEndElementHandler(void *recv,
712
+ const XML_Char *name)
713
+ {
714
+ XMLParser* parser;
715
+ GET_PARSER(recv, parser);
716
+ rb_funcall((VALUE)recv, id_endElementHandler, 1,
717
+ TO_(rb_str_new2((char*)name)));
718
+ }
719
+
720
+ static void
721
+ myCharacterDataHandler(void *recv,
722
+ const XML_Char *s,
723
+ int len)
724
+ {
725
+ XMLParser* parser;
726
+ GET_PARSER(recv, parser);
727
+ rb_funcall((VALUE)recv, id_characterDataHandler, 1,
728
+ TO_(rb_str_new((char*)s, len)));
729
+ }
730
+
731
+ static void
732
+ myProcessingInstructionHandler(void *recv,
733
+ const XML_Char *target,
734
+ const XML_Char *data)
735
+ {
736
+ XMLParser* parser;
737
+ GET_PARSER(recv, parser);
738
+ rb_funcall((VALUE)recv, id_processingInstructionHandler, 2,
739
+ TO_(rb_str_new2((char*)target)),
740
+ TO_(rb_str_new2((char*)data)));
741
+ }
742
+
743
+ static void
744
+ myDefaultHandler(void *recv,
745
+ const XML_Char *s,
746
+ int len)
747
+ {
748
+ XMLParser* parser;
749
+ GET_PARSER(recv, parser);
750
+ rb_funcall((VALUE)recv, id_defaultHandler, 1,
751
+ TO_(rb_str_new((char*)s, len)));
752
+ }
753
+
754
+ #ifdef NEW_EXPAT
755
+ static void
756
+ myDefaultExpandHandler(void *recv,
757
+ const XML_Char *s,
758
+ int len)
759
+ {
760
+ XMLParser* parser;
761
+ GET_PARSER(recv, parser);
762
+ rb_funcall((VALUE)recv, id_defaultExpandHandler, 1,
763
+ TO_(rb_str_new((char*)s, len)));
764
+ }
765
+ #endif
766
+
767
+ void
768
+ myUnparsedEntityDeclHandler(void *recv,
769
+ const XML_Char *entityName,
770
+ const XML_Char *base,
771
+ const XML_Char *systemId,
772
+ const XML_Char *publicId,
773
+ const XML_Char *notationName)
774
+ {
775
+ XMLParser* parser;
776
+ GET_PARSER(recv, parser);
777
+ rb_funcall((VALUE)recv, id_unparsedEntityDeclHandler, 5,
778
+ TO_(rb_str_new2((char*)entityName)),
779
+ (base ? TO_(rb_str_new2((char*)base)) : Qnil),
780
+ TO_(rb_str_new2((char*)systemId)),
781
+ (publicId ? TO_(rb_str_new2((char*)publicId)) : Qnil),
782
+ TO_(rb_str_new2((char*)notationName)));
783
+ }
784
+
785
+ void
786
+ myNotationDeclHandler(void *recv,
787
+ const XML_Char *notationName,
788
+ const XML_Char *base,
789
+ const XML_Char *systemId,
790
+ const XML_Char *publicId)
791
+ {
792
+ XMLParser* parser;
793
+ GET_PARSER(recv, parser);
794
+ rb_funcall((VALUE)recv, id_notationDeclHandler, 4,
795
+ TO_(rb_str_new2((char*)notationName)),
796
+ (base ? TO_(rb_str_new2((char*)base)) : Qnil),
797
+ (systemId ? TO_(rb_str_new2((char*)systemId)) : Qnil),
798
+ (publicId ? TO_(rb_str_new2((char*)publicId)) : Qnil));
799
+ }
800
+
801
+ int
802
+ myExternalEntityRefHandler(XML_Parser xmlparser,
803
+ const XML_Char *context,
804
+ const XML_Char *base,
805
+ const XML_Char *systemId,
806
+ const XML_Char *publicId)
807
+ {
808
+ XMLParser* parser;
809
+ VALUE recv;
810
+ VALUE ret;
811
+
812
+ recv = (VALUE)XML_GetUserData(xmlparser);
813
+ GET_PARSER(recv, parser);
814
+ ret = rb_funcall(recv, id_externalEntityRefHandler, 4,
815
+ (context ? TO_(rb_str_new2((char*)context)): Qnil),
816
+ (base ? TO_(rb_str_new2((char*)base)) : Qnil),
817
+ (systemId ? TO_(rb_str_new2((char*)systemId)) : Qnil),
818
+ (publicId ? TO_(rb_str_new2((char*)publicId)) : Qnil));
819
+ /* The error status in this handler should be returned
820
+ by the exception. */
821
+ return Qnil;
822
+ }
823
+
824
+ #ifdef NEW_EXPAT
825
+ static void
826
+ myCommentHandler(void *recv,
827
+ const XML_Char *s)
828
+ {
829
+ XMLParser* parser;
830
+ GET_PARSER(recv, parser);
831
+ rb_funcall((VALUE)recv, id_commentHandler, 1,
832
+ TO_(rb_str_new2((char*)s)));
833
+ }
834
+
835
+ static void
836
+ myStartCdataSectionHandler(void *recv)
837
+ {
838
+ XMLParser* parser;
839
+ GET_PARSER(recv, parser);
840
+ rb_funcall((VALUE)recv, id_startCdataSectionHandler, 0);
841
+ }
842
+
843
+ static void
844
+ myEndCdataSectionHandler(void *recv)
845
+ {
846
+ XMLParser* parser;
847
+ GET_PARSER(recv, parser);
848
+ rb_funcall((VALUE)recv, id_endCdataSectionHandler, 0);
849
+ }
850
+
851
+ static void
852
+ myStartNamespaceDeclHandler(void *recv,
853
+ const XML_Char *prefix,
854
+ const XML_Char *uri)
855
+ {
856
+ XMLParser* parser;
857
+ GET_PARSER(recv, parser);
858
+ rb_funcall((VALUE)recv, id_startNamespaceDeclHandler, 2,
859
+ (prefix ? TO_(rb_str_new2((char*)prefix)) : Qnil),
860
+ (uri ? TO_(rb_str_new2((char*)uri)) : Qnil));
861
+ }
862
+
863
+ static void
864
+ myEndNamespaceDeclHandler(void *recv,
865
+ const XML_Char *prefix)
866
+ {
867
+ XMLParser* parser;
868
+ GET_PARSER(recv, parser);
869
+ rb_funcall((VALUE)recv, id_endNamespaceDeclHandler, 1,
870
+ (prefix ? TO_(rb_str_new2((char*)prefix)) : Qnil));
871
+ }
872
+
873
+ static int
874
+ myNotStandaloneHandler(void *recv)
875
+ {
876
+ XMLParser* parser;
877
+ VALUE v;
878
+
879
+ GET_PARSER(recv, parser);
880
+ v = rb_funcall((VALUE)recv, id_notStandaloneHandler, 0);
881
+ Check_Type(v, T_FIXNUM);
882
+ return FIX2INT(v);
883
+ }
884
+ #endif
885
+
886
+ #ifdef HAVE_XML_SETPARAMENTITYPARSING
887
+ static void
888
+ #ifdef HAVE_EXPAT_H
889
+ myStartDoctypeDeclHandler(void *recv,
890
+ const XML_Char *doctypeName,
891
+ const XML_Char *sysid,
892
+ const XML_Char *pubid,
893
+ int has_internal_subset)
894
+ #else
895
+ myStartDoctypeDeclHandler(void *recv,
896
+ const XML_Char *doctypeName)
897
+ #endif
898
+ {
899
+ XMLParser* parser;
900
+ GET_PARSER(recv, parser);
901
+ #ifdef HAVE_EXPAT_H
902
+ rb_funcall((VALUE)recv, id_startDoctypeDeclHandler, 4,
903
+ TO_(rb_str_new2((char*)doctypeName)),
904
+ (sysid ? TO_(rb_str_new2((char*)sysid)) : Qnil),
905
+ (pubid ? TO_(rb_str_new2((char*)pubid)) : Qnil),
906
+ (has_internal_subset ? Qtrue : Qfalse));
907
+ #else
908
+ rb_funcall((VALUE)recv, id_startDoctypeDeclHandler, 4,
909
+ TO_(rb_str_new2((char*)doctypeName)),
910
+ Qnil, Qnil, Qfalse);
911
+ #endif
912
+ }
913
+
914
+ static void
915
+ myEndDoctypeDeclHandler(void *recv)
916
+ {
917
+ XMLParser* parser;
918
+ GET_PARSER(recv, parser);
919
+ rb_funcall((VALUE)recv, id_endDoctypeDeclHandler, 0);
920
+ }
921
+ #endif
922
+
923
+
924
+ #ifdef HAVE_EXPAT_H
925
+
926
+ static void
927
+ myElementDeclHandler(void *recv,
928
+ const XML_Char *name,
929
+ XML_Content *model)
930
+ {
931
+ XMLParser* parser;
932
+ GET_PARSER(recv, parser);
933
+ VALUE content = makeContentArray(parser, model);
934
+ rb_funcall((VALUE)recv, id_elementDeclHandler, 2,
935
+ TO_(rb_str_new2(name)), content);
936
+ }
937
+
938
+ static void
939
+ myAttlistDeclHandler (void *recv,
940
+ const XML_Char *elname,
941
+ const XML_Char *attname,
942
+ const XML_Char *att_type,
943
+ const XML_Char *dflt,
944
+ int isrequired)
945
+ {
946
+ XMLParser* parser;
947
+ GET_PARSER(recv, parser);
948
+ rb_funcall((VALUE)recv, id_attlistDeclHandler, 5,
949
+ TO_(rb_str_new2(elname)),
950
+ TO_(rb_str_new2((char*)attname)),
951
+ TO_(rb_str_new2((char*)att_type)),
952
+ (dflt ? TO_(rb_str_new2((char*)dflt)) : Qnil),
953
+ (isrequired ? Qtrue : Qfalse));
954
+ }
955
+
956
+ static void
957
+ myXmlDeclHandler (void *recv,
958
+ const XML_Char *version,
959
+ const XML_Char *encoding,
960
+ int standalone)
961
+ {
962
+ XMLParser* parser;
963
+ GET_PARSER(recv, parser);
964
+ rb_funcall((VALUE)recv, id_xmlDeclHandler, 3,
965
+ (version ? TO_(rb_str_new2(version)) : Qnil),
966
+ (encoding ? TO_(rb_str_new2((char*)encoding)) : Qnil),
967
+ INT2FIX(standalone));
968
+ }
969
+
970
+ static void
971
+ myEntityDeclHandler (void *recv,
972
+ const XML_Char *entityName,
973
+ int is_parameter_entity,
974
+ const XML_Char *value,
975
+ int value_length,
976
+ const XML_Char *base,
977
+ const XML_Char *systemId,
978
+ const XML_Char *publicId,
979
+ const XML_Char *notationName)
980
+ {
981
+ XMLParser* parser;
982
+ GET_PARSER(recv, parser);
983
+ rb_funcall((VALUE)recv, id_entityDeclHandler, 7,
984
+ TO_(rb_str_new2(entityName)),
985
+ (is_parameter_entity ? Qtrue : Qfalse),
986
+ TO_(rb_str_new((char*)value, value_length)),
987
+ (base ? TO_(rb_str_new2((char*)base)) : Qnil),
988
+ (systemId ? TO_(rb_str_new2((char*)systemId)) : Qnil),
989
+ (publicId ? TO_(rb_str_new2((char*)publicId)) : Qnil),
990
+ (notationName ? TO_(rb_str_new2((char*)notationName))
991
+ : Qnil));
992
+ }
993
+
994
+ #endif
995
+
996
+ #if 0
997
+ static void
998
+ myExternalParsedEntityDeclHandler(void *recv,
999
+ const XML_Char *entityName,
1000
+ const XML_Char *base,
1001
+ const XML_Char *systemId,
1002
+ const XML_Char *publicId)
1003
+ {
1004
+ XMLParser* parser;
1005
+ GET_PARSER(recv, parser);
1006
+ rb_funcall((VALUE)recv, id_externalParsedEntityDeclHandler, 4,
1007
+ TO_(rb_str_new2((char*)entityName)),
1008
+ (base ? TO_(rb_str_new2((char*)base)) : Qnil),
1009
+ TO_(rb_str_new2((char*)systemId)),
1010
+ (publicId ? TO_(rb_str_new2((char*)publicId)) : Qnil));
1011
+ }
1012
+
1013
+ static void
1014
+ myInternalParsedEntityDeclHandler(void *recv,
1015
+ const XML_Char *entityName,
1016
+ const XML_Char *replacementText,
1017
+ int replacementTextLength)
1018
+ {
1019
+ XMLParser* parser;
1020
+ GET_PARSER(recv, parser);
1021
+ rb_funcall((VALUE)recv, id_internalParsedEntityDeclHandler, 2,
1022
+ TO_(rb_str_new2((char*)entityName)),
1023
+ TO_(rb_str_new((char*)replacementText,
1024
+ replacementTextLength)));
1025
+ }
1026
+ #endif
1027
+
1028
+
1029
+ static VALUE
1030
+ XMLEncoding_map(VALUE obj, VALUE i)
1031
+ {
1032
+ return i;
1033
+ }
1034
+
1035
+ static VALUE
1036
+ XMLEncoding_convert(VALUE obj, VALUE str)
1037
+ {
1038
+ return INT2FIX('?');
1039
+ }
1040
+
1041
+ static int
1042
+ myEncodingConv(void *data, const char *s)
1043
+ {
1044
+ VALUE v;
1045
+ int len;
1046
+ int slen = RSTRING(rb_ivar_get((VALUE)data,
1047
+ id_map))->ptr[*(unsigned char*)s];
1048
+
1049
+ v = rb_funcall((VALUE)data, id_convert, 1, rb_str_new((char*)s, -slen));
1050
+ switch (TYPE(v)) {
1051
+ case T_FIXNUM:
1052
+ return FIX2INT(v);
1053
+ case T_STRING:
1054
+ len = RSTRING(v)->len;
1055
+ if (len == 1) {
1056
+ return (unsigned char)*(RSTRING(v)->ptr);
1057
+ }
1058
+ else if (len >= 2) {
1059
+ return (unsigned char)*(RSTRING(v)->ptr) |
1060
+ (unsigned char)*(RSTRING(v)->ptr + 1) << 8;
1061
+ }
1062
+ }
1063
+ return 0;
1064
+ }
1065
+
1066
+ #if 0
1067
+ static int
1068
+ iterUnknownEncodingHandler(void *recv,
1069
+ const XML_Char *name,
1070
+ XML_Encoding *info)
1071
+ {
1072
+ XMLParser* parser;
1073
+ VALUE ret;
1074
+
1075
+ if (!rb_method_boundp(CLASS_OF((VALUE)recv), id_unknownEncoding, 0))
1076
+ return 0;
1077
+
1078
+ GET_PARSER(recv, parser);
1079
+ ret = rb_yield(rb_ary_new3(4, symUNKNOWN_ENCODING,
1080
+ TO_(rb_str_new2((char*)name)), Qnil, recv));
1081
+ if (TYPE(ret) == T_OBJECT && rb_obj_is_kind_of(ret, cXMLEncoding)) {
1082
+ int i;
1083
+ ID mid = rb_intern("map");
1084
+ VALUE cmap = rb_str_new(NULL, 256);
1085
+ rb_ivar_set(ret, id_map, cmap);
1086
+
1087
+ for (i = 0; i < 256; i++) {
1088
+ VALUE m = rb_funcall(ret, mid, 1, INT2FIX(i));
1089
+ RSTRING(cmap)->ptr[i] = info->map[i] = FIX2INT(m);
1090
+ }
1091
+ /* protect object form GC */
1092
+ rb_ivar_set(recv, rb_intern("_encoding"), ret);
1093
+ info->data = (void*)ret;
1094
+ info->convert = myEncodingConv;
1095
+ return 1;
1096
+ }
1097
+
1098
+ return 0;
1099
+ }
1100
+ #endif
1101
+
1102
+ #ifdef XML_ENC_PATH
1103
+ /*
1104
+ * Encoding map functions come from XML::Parser Version 2.19
1105
+ *
1106
+ * Copyright 1998 Larry Wall and Clark Cooper
1107
+ * All rights reserved.
1108
+ *
1109
+ * This program is free software; you can redistribute it and/or
1110
+ * modify it under the same terms as Perl itself.
1111
+ */
1112
+ static Encinfo*
1113
+ getEncinfo(char* data, int size)
1114
+ {
1115
+ Encmap_Header* header = (Encmap_Header*)data;
1116
+ unsigned short prefixes_size;
1117
+ unsigned short bytemap_size;
1118
+ Encinfo* ret;
1119
+ int i;
1120
+ PrefixMap* prefixes;
1121
+ unsigned short *bytemap;
1122
+
1123
+ if (size < sizeof(Encmap_Header) || ntohl(header->magic) != ENCMAP_MAGIC)
1124
+ return NULL;
1125
+ prefixes_size = ntohs(header->pfsize);
1126
+ bytemap_size = ntohs(header->bmsize);
1127
+ if (size != (sizeof(Encmap_Header) +
1128
+ prefixes_size * sizeof(PrefixMap) +
1129
+ bytemap_size * sizeof(unsigned short)))
1130
+ return NULL;
1131
+ if ((ret = (Encinfo*)malloc(sizeof(Encinfo))) == NULL) {
1132
+ return NULL;
1133
+ }
1134
+ ret->prefixes_size = prefixes_size;
1135
+ ret->bytemap_size = bytemap_size;
1136
+ for (i = 0; i < 256; i++)
1137
+ ret->firstmap[i] = ntohl(header->map[i]);
1138
+ prefixes = (PrefixMap*)(data + sizeof(Encmap_Header));
1139
+ bytemap = (unsigned short*)(data + sizeof(Encmap_Header)
1140
+ + sizeof(PrefixMap)*prefixes_size);
1141
+ if ((ret->prefixes =
1142
+ (PrefixMap*)malloc(sizeof(PrefixMap)*prefixes_size)) == NULL) {
1143
+ free(ret);
1144
+ return NULL;
1145
+ }
1146
+ if ((ret->bytemap =
1147
+ (unsigned short*)malloc(sizeof(unsigned short)*bytemap_size)) == NULL) {
1148
+ free(ret->prefixes);
1149
+ free(ret);
1150
+ return NULL;
1151
+ }
1152
+ for (i = 0; i < prefixes_size; i++, prefixes++) {
1153
+ ret->prefixes[i].min = prefixes->min;
1154
+ ret->prefixes[i].len = prefixes->len;
1155
+ ret->prefixes[i].bmap_start = ntohs(prefixes->bmap_start);
1156
+ memcpy(ret->prefixes[i].ispfx, prefixes->ispfx,
1157
+ sizeof(prefixes->ispfx) + sizeof(prefixes->ischar));
1158
+ }
1159
+ for (i = 0; i < bytemap_size; i++)
1160
+ ret->bytemap[i] = ntohs(bytemap[i]);
1161
+
1162
+ return ret;
1163
+ }
1164
+
1165
+ static int
1166
+ convertEncoding(Encinfo* enc, const char* seq)
1167
+ {
1168
+ PrefixMap* curpfx;
1169
+ int count;
1170
+ int index = 0;
1171
+
1172
+ for (count = 0; count < 4; count++) {
1173
+ unsigned char byte = (unsigned char)seq[count];
1174
+ unsigned char bndx;
1175
+ unsigned char bmsk;
1176
+ int offset;
1177
+
1178
+ curpfx = &enc->prefixes[index];
1179
+ offset = ((int)byte) - curpfx->min;
1180
+ if (offset < 0)
1181
+ break;
1182
+ if (offset >= curpfx->len && curpfx->len != 0)
1183
+ break;
1184
+
1185
+ bndx = byte >> 3;
1186
+ bmsk = 1 << (byte & 0x7);
1187
+
1188
+ if (curpfx->ispfx[bndx] & bmsk) {
1189
+ index = enc->bytemap[curpfx->bmap_start + offset];
1190
+ }
1191
+ else if (curpfx->ischar[bndx] & bmsk) {
1192
+ return enc->bytemap[curpfx->bmap_start + offset];
1193
+ }
1194
+ else
1195
+ break;
1196
+ }
1197
+
1198
+ return -1;
1199
+ }
1200
+
1201
+ static void
1202
+ releaseEncoding(Encinfo* enc)
1203
+ {
1204
+ if (enc) {
1205
+ if (enc->prefixes)
1206
+ free(enc->prefixes);
1207
+ if (enc->bytemap)
1208
+ free(enc->bytemap);
1209
+ free(enc);
1210
+ }
1211
+ }
1212
+
1213
+ static Encinfo*
1214
+ findEncoding(const char* encname)
1215
+ {
1216
+ FILE* fp;
1217
+ Encinfo* enc;
1218
+ struct stat st;
1219
+ int size;
1220
+ int len;
1221
+ char file[PATH_MAX] = "\0";
1222
+ const char* p;
1223
+ char* buf;
1224
+ #ifdef DOSISH
1225
+ const char sepchar = '\\';
1226
+ #else
1227
+ const char sepchar = '/';
1228
+ #endif
1229
+ const char* const encext = ".enc";
1230
+
1231
+ rb_secure(2);
1232
+ /* make map file path */
1233
+ if (XML_ENC_PATH != NULL) {
1234
+ strncpy(file, XML_ENC_PATH, PATH_MAX - 1);
1235
+ file[PATH_MAX - 1] = '\0';
1236
+ }
1237
+ len = strlen(file);
1238
+ if (len > 0 && len < PATH_MAX - 1 && file[len - 1] != sepchar)
1239
+ file[len++] = sepchar;
1240
+ for (p = encname; *p && len < PATH_MAX - 1; p++, len++) {
1241
+ file[len] = tolower(*p);
1242
+ }
1243
+ file[len] = '\0';
1244
+ // if (len < PATH_MAX - sizeof(encext))
1245
+ // strcat(file, encext);
1246
+ strncat(file, encext, PATH_MAX - len -1);
1247
+
1248
+ if ((fp = fopen(file, "rb")) == NULL) {
1249
+ return NULL;
1250
+ }
1251
+
1252
+ /* get file length */
1253
+ fstat(fileno(fp), &st);
1254
+ size = st.st_size;
1255
+
1256
+ if ((buf = (char*)malloc(size)) == NULL) {
1257
+ fclose(fp);
1258
+ return NULL;
1259
+ }
1260
+
1261
+ fread(buf, 1, size, fp);
1262
+ fclose(fp);
1263
+ enc = getEncinfo(buf, size);
1264
+ free(buf);
1265
+ return enc;
1266
+ }
1267
+
1268
+ #endif
1269
+
1270
+ static int
1271
+ myUnknownEncodingHandler(void *recv,
1272
+ const XML_Char *name,
1273
+ XML_Encoding *info)
1274
+ {
1275
+ XMLParser* parser;
1276
+ VALUE ret;
1277
+ if (!rb_method_boundp(CLASS_OF((VALUE)recv), id_unknownEncoding, 0))
1278
+ #ifndef XML_ENC_PATH
1279
+ return 0;
1280
+ #else
1281
+ {
1282
+ Encinfo* enc;
1283
+
1284
+ if ((enc = findEncoding(name)) != NULL) {
1285
+ memcpy(info->map, enc->firstmap, sizeof(int)*256);
1286
+ info->data = enc;
1287
+ info->convert = (int(*)(void*,const char*))convertEncoding;
1288
+ info->release = (void(*)(void*))releaseEncoding;
1289
+ return 1;
1290
+ }
1291
+ else
1292
+ return 0;
1293
+ }
1294
+ #endif
1295
+
1296
+ GET_PARSER(recv, parser);
1297
+ ret = rb_funcall((VALUE)recv, id_unknownEncoding, 1,
1298
+ TO_(rb_str_new2((char*)name)));
1299
+ if (TYPE(ret) == T_OBJECT && rb_obj_is_kind_of(ret, cXMLEncoding)) {
1300
+ int i;
1301
+ ID mid = rb_intern("map");
1302
+ VALUE cmap = rb_str_new(NULL, 256);
1303
+ rb_ivar_set(ret, id_map, cmap);
1304
+
1305
+ if (OBJ_TAINTED(ret))
1306
+ taintParser(parser);
1307
+ TO_(cmap);
1308
+
1309
+ for (i = 0; i < 256; i++) {
1310
+ VALUE m = rb_funcall(ret, mid, 1, INT2FIX(i));
1311
+ RSTRING(cmap)->ptr[i] = info->map[i] = FIX2INT(m);
1312
+ }
1313
+ /* protect object form GC */
1314
+ rb_ivar_set((VALUE)recv, rb_intern("_encoding"), ret);
1315
+ info->data = (void*)ret;
1316
+ info->convert = myEncodingConv;
1317
+
1318
+ return 1;
1319
+ }
1320
+
1321
+ return 0;
1322
+ }
1323
+
1324
+ #ifdef HAVE_XML_SETSKIPPEDENTITYHANDLER
1325
+ static void
1326
+ mySkippedEntityHandler(void *recv,
1327
+ const XML_Char *entityName,
1328
+ int is_parameter_entity)
1329
+ {
1330
+ XMLParser* parser;
1331
+ GET_PARSER(recv, parser);
1332
+ rb_funcall((VALUE)recv, id_skippedEntityHandler, 2,
1333
+ TO_(rb_str_new2((char*)entityName)),
1334
+ INT2FIX(is_parameter_entity));
1335
+ }
1336
+ #endif
1337
+
1338
+
1339
+ /* constructor */
1340
+ static VALUE
1341
+ XMLParser_new(int argc, VALUE* argv, VALUE klass)
1342
+ {
1343
+ XMLParser* parser;
1344
+ VALUE obj;
1345
+ VALUE arg1;
1346
+ VALUE arg2;
1347
+ VALUE arg3;
1348
+ int count;
1349
+ char* encoding = NULL;
1350
+ #ifdef NEW_EXPAT
1351
+ char* nssep = NULL;
1352
+ #endif
1353
+ char* context = NULL;
1354
+ XMLParser* rootparser = NULL;
1355
+ VALUE parent = Qnil;
1356
+
1357
+ count = rb_scan_args(argc, argv, "03", &arg1, &arg2, &arg3);
1358
+ if (count == 1) {
1359
+ /* new(encoding) */
1360
+ if (TYPE(arg1) != T_NIL) {
1361
+ Check_Type(arg1, T_STRING); /* encoding */
1362
+ encoding = RSTRING(arg1)->ptr;
1363
+ }
1364
+ }
1365
+ else if (count == 2) {
1366
+ /* new(encoding, nschar) */
1367
+ /* new(parser, context) */
1368
+ #ifdef NEW_EXPAT
1369
+ if (TYPE(arg1) != T_DATA) {
1370
+ if (TYPE(arg1) != T_NIL) {
1371
+ Check_Type(arg1, T_STRING); /* encoding */
1372
+ encoding = RSTRING(arg1)->ptr;
1373
+ }
1374
+ Check_Type(arg2, T_STRING); /* nschar */
1375
+ nssep = RSTRING(arg2)->ptr;
1376
+ }
1377
+ else {
1378
+ #endif
1379
+ Check_Type(arg1, T_DATA); /* parser */
1380
+ GET_PARSER(arg1, rootparser);
1381
+ if (!NIL_P(arg2)) {
1382
+ Check_Type(arg2, T_STRING); /* context */
1383
+ context = RSTRING(arg2)->ptr;
1384
+ }
1385
+ parent = arg1;
1386
+ #ifdef NEW_EXPAT
1387
+ }
1388
+ #endif
1389
+ }
1390
+ else if (count == 3) {
1391
+ /* new(parser, context, encoding) */
1392
+ Check_Type(arg1, T_DATA); /* parser */
1393
+ GET_PARSER(arg1, rootparser);
1394
+ if (!NIL_P(arg2)) {
1395
+ Check_Type(arg2, T_STRING); /* context */
1396
+ context = RSTRING(arg2)->ptr;
1397
+ }
1398
+ Check_Type(arg3, T_STRING); /* encoding */
1399
+ encoding = RSTRING(arg3)->ptr;
1400
+ parent = arg1;
1401
+ }
1402
+
1403
+ /* create object */
1404
+ obj = Data_Make_Struct(klass, XMLParser,
1405
+ XMLParser_mark, XMLParser_free, parser);
1406
+ /* create parser */
1407
+ if (rootparser == NULL) {
1408
+ #ifdef NEW_EXPAT
1409
+ if (nssep == NULL)
1410
+ parser->parser = XML_ParserCreate(encoding);
1411
+ else
1412
+ parser->parser = XML_ParserCreateNS(encoding, nssep[0]);
1413
+ #else
1414
+ parser->parser = XML_ParserCreate(encoding);
1415
+ #endif
1416
+ parser->tainted = 0;
1417
+ parser->context = NULL;
1418
+ }
1419
+ else {
1420
+ parser->parser = XML_ExternalEntityParserCreate(rootparser->parser,
1421
+ context, encoding);
1422
+ /* clear all inhrited handlers,
1423
+ because handlers should be set in "parse" method */
1424
+ XML_SetElementHandler(parser->parser, NULL, NULL);
1425
+ XML_SetCharacterDataHandler(parser->parser, NULL);
1426
+ XML_SetProcessingInstructionHandler(parser->parser, NULL);
1427
+ XML_SetDefaultHandler(parser->parser, NULL);
1428
+ XML_SetUnparsedEntityDeclHandler(parser->parser, NULL);
1429
+ XML_SetNotationDeclHandler(parser->parser, NULL);
1430
+ XML_SetExternalEntityRefHandler(parser->parser, NULL);
1431
+ #ifdef NEW_EXPAT
1432
+ XML_SetCommentHandler(parser->parser, NULL);
1433
+ XML_SetCdataSectionHandler(parser->parser, NULL, NULL);
1434
+ XML_SetNamespaceDeclHandler(parser->parser, NULL, NULL);
1435
+ XML_SetNotStandaloneHandler(parser->parser, NULL);
1436
+ #endif
1437
+ #ifdef HAVE_XML_SETDOCTYPEDECLHANDLER
1438
+ XML_SetDoctypeDeclHandler(parser->parser, NULL, NULL);
1439
+ #endif
1440
+ #ifdef HAVE_EXPAT_H
1441
+ XML_SetElementDeclHandler(parser->parser, NULL);
1442
+ XML_SetAttlistDeclHandler(parser->parser, NULL);
1443
+ XML_SetXmlDeclHandler(parser->parser, NULL);
1444
+ XML_SetEntityDeclHandler(parser->parser, NULL);
1445
+ #endif
1446
+ #if 0
1447
+ XML_SetExternalParsedEntityDeclHandler(parser->parser, NULL);
1448
+ XML_SetInternalParsedEntityDeclHandler(parser->parser, NULL);
1449
+ #endif
1450
+ #ifdef HAVE_XML_SETSKIPPEDENTITYHANDLER
1451
+ XML_SetSkippedEntityHandler(parser->parser, NULL);
1452
+ #endif
1453
+ if (rootparser->tainted)
1454
+ parser->tainted |= 1;
1455
+ parser->context = context;
1456
+ }
1457
+ if (!parser->parser)
1458
+ rb_raise(eXMLParserError, "cannot create parser");
1459
+
1460
+ /* setting up internal data */
1461
+ XML_SetUserData(parser->parser, (void*)obj);
1462
+ parser->iterator = 0;
1463
+ parser->defaultCurrent = 0;
1464
+ #ifdef NEW_EXPAT
1465
+ parser->lastAttrs = NULL;
1466
+ #endif
1467
+ parser->parent = parent;
1468
+
1469
+ rb_obj_call_init(obj, argc, argv);
1470
+
1471
+ return obj;
1472
+ }
1473
+
1474
+ static VALUE
1475
+ XMLParser_initialize(VALUE obj)
1476
+ {
1477
+ return Qnil;
1478
+ }
1479
+
1480
+ #ifdef HAVE_XML_PARSERRESET
1481
+ static VALUE
1482
+ XMLParser_reset(int argc, VALUE* argv, VALUE obj)
1483
+ {
1484
+ XMLParser* parser;
1485
+ VALUE vencoding = Qnil;
1486
+ char* encoding = NULL;
1487
+ int count;
1488
+
1489
+ count = rb_scan_args(argc, argv, "01", &vencoding);
1490
+
1491
+ GET_PARSER(obj, parser);
1492
+ if (count > 0 && TYPE(vencoding) != T_NIL) {
1493
+ Check_Type(vencoding, T_STRING);
1494
+ encoding = RSTRING(vencoding)->ptr;
1495
+ }
1496
+ XML_ParserReset(parser->parser, encoding);
1497
+ /* setting up internal data */
1498
+ XML_SetUserData(parser->parser, (void*)obj);
1499
+ parser->iterator = 0;
1500
+ parser->defaultCurrent = 0;
1501
+ #ifdef NEW_EXPAT
1502
+ parser->lastAttrs = NULL;
1503
+ #endif
1504
+ parser->tainted = 0;
1505
+
1506
+ return obj;
1507
+ }
1508
+ #endif
1509
+
1510
+ /* parse method */
1511
+ static VALUE
1512
+ XMLParser_parse(int argc, VALUE* argv, VALUE obj)
1513
+ {
1514
+ XMLParser* parser;
1515
+ int ret;
1516
+ XML_StartElementHandler start = NULL;
1517
+ XML_EndElementHandler end = NULL;
1518
+ #ifdef NEW_EXPAT
1519
+ XML_StartCdataSectionHandler startC = NULL;
1520
+ XML_EndCdataSectionHandler endC = NULL;
1521
+ XML_StartNamespaceDeclHandler startNS = NULL;
1522
+ XML_EndNamespaceDeclHandler endNS = NULL;
1523
+ #endif
1524
+ #ifdef HAVE_XML_SETDOCTYPEDECLHANDLER
1525
+ XML_StartDoctypeDeclHandler startDoctype = NULL;
1526
+ XML_EndDoctypeDeclHandler endDoctype = NULL;
1527
+ #endif
1528
+ VALUE str;
1529
+ VALUE isFinal;
1530
+ int final = 1;
1531
+ int count;
1532
+ int fromStream = 0;
1533
+ ID mid = rb_intern("gets");
1534
+ ID linebuf = rb_intern("_linebuf");
1535
+
1536
+ count = rb_scan_args(argc, argv, "02", &str, &isFinal);
1537
+ /* If "str" has public "gets" method, it will be considered *stream* */
1538
+ if (!rb_obj_is_kind_of(str, rb_cString) &&
1539
+ rb_method_boundp(CLASS_OF(str), mid, 1)) {
1540
+ fromStream = 1;
1541
+ }
1542
+ else if (!NIL_P(str)) {
1543
+ Check_Type(str, T_STRING);
1544
+ }
1545
+ if (count >= 2) {
1546
+ if (isFinal == Qtrue)
1547
+ final = 1;
1548
+ else if (isFinal == Qfalse)
1549
+ final = 0;
1550
+ else
1551
+ rb_raise(rb_eTypeError, "not valid value");
1552
+ }
1553
+
1554
+ GET_PARSER(obj, parser);
1555
+
1556
+ // parser->iterator = rb_iterator_p();
1557
+ parser->iterator = rb_block_given_p();
1558
+
1559
+ /* Setup event handlers */
1560
+
1561
+ /* Call as iterator */
1562
+ if (parser->iterator) {
1563
+ XML_SetElementHandler(parser->parser,
1564
+ iterStartElementHandler, iterEndElementHandler);
1565
+ XML_SetCharacterDataHandler(parser->parser,
1566
+ iterCharacterDataHandler);
1567
+ XML_SetProcessingInstructionHandler(parser->parser,
1568
+ iterProcessingInstructionHandler);
1569
+ /* check dummy default handler */
1570
+ #ifdef NEW_EXPAT
1571
+ if (rb_method_boundp(CLASS_OF(obj), id_defaultExpandHandler, 0))
1572
+ XML_SetDefaultHandlerExpand(parser->parser, iterDefaultHandler);
1573
+ else
1574
+ #endif
1575
+ if (rb_method_boundp(CLASS_OF(obj), id_defaultHandler, 0))
1576
+ XML_SetDefaultHandler(parser->parser, iterDefaultHandler);
1577
+
1578
+ if (rb_method_boundp(CLASS_OF(obj), id_unparsedEntityDeclHandler, 0))
1579
+ XML_SetUnparsedEntityDeclHandler(parser->parser,
1580
+ iterUnparsedEntityDeclHandler);
1581
+ if (rb_method_boundp(CLASS_OF(obj), id_notationDeclHandler, 0))
1582
+ XML_SetNotationDeclHandler(parser->parser,
1583
+ iterNotationDeclHandler);
1584
+ if (rb_method_boundp(CLASS_OF(obj), id_externalEntityRefHandler, 0))
1585
+ XML_SetExternalEntityRefHandler(parser->parser,
1586
+ iterExternalEntityRefHandler);
1587
+ #ifdef NEW_EXPAT
1588
+ if (rb_method_boundp(CLASS_OF(obj), id_commentHandler, 0))
1589
+ XML_SetCommentHandler(parser->parser, iterCommentHandler);
1590
+
1591
+ if (rb_method_boundp(CLASS_OF(obj), id_startCdataSectionHandler, 0))
1592
+ startC = iterStartCdataSectionHandler;
1593
+ if (rb_method_boundp(CLASS_OF(obj), id_endCdataSectionHandler, 0))
1594
+ endC = iterEndCdataSectionHandler;
1595
+ if (startC || endC)
1596
+ XML_SetCdataSectionHandler(parser->parser, startC, endC);
1597
+
1598
+ if (rb_method_boundp(CLASS_OF(obj), id_startNamespaceDeclHandler, 0))
1599
+ startNS = iterStartNamespaceDeclHandler;
1600
+ if (rb_method_boundp(CLASS_OF(obj), id_endNamespaceDeclHandler, 0))
1601
+ endNS = iterEndNamespaceDeclHandler;
1602
+ if (startNS || endNS)
1603
+ XML_SetNamespaceDeclHandler(parser->parser, startNS, endNS);
1604
+ if (rb_method_boundp(CLASS_OF(obj), id_notStandaloneHandler, 0))
1605
+ XML_SetNotStandaloneHandler(parser->parser, myNotStandaloneHandler);
1606
+ #endif
1607
+ #ifdef HAVE_XML_SETDOCTYPEDECLHANDLER
1608
+ if (rb_method_boundp(CLASS_OF(obj), id_startDoctypeDeclHandler, 0))
1609
+ startDoctype = iterStartDoctypeDeclHandler;
1610
+ if (rb_method_boundp(CLASS_OF(obj), id_endDoctypeDeclHandler, 0))
1611
+ endDoctype = iterEndDoctypeDeclHandler;
1612
+ if (startDoctype || endDoctype)
1613
+ XML_SetDoctypeDeclHandler(parser->parser, startDoctype, endDoctype);
1614
+ #endif
1615
+ #ifdef HAVE_EXPAT_H
1616
+ if (rb_method_boundp(CLASS_OF(obj), id_elementDeclHandler, 0))
1617
+ XML_SetElementDeclHandler(parser->parser, iterElementDeclHandler);
1618
+ if (rb_method_boundp(CLASS_OF(obj), id_attlistDeclHandler, 0))
1619
+ XML_SetAttlistDeclHandler(parser->parser, iterAttlistDeclHandler);
1620
+ if (rb_method_boundp(CLASS_OF(obj), id_xmlDeclHandler, 0))
1621
+ XML_SetXmlDeclHandler(parser->parser, iterXmlDeclHandler);
1622
+ if (rb_method_boundp(CLASS_OF(obj), id_entityDeclHandler, 0))
1623
+ XML_SetEntityDeclHandler(parser->parser, iterEntityDeclHandler);
1624
+ #endif
1625
+ #if 0
1626
+ if (rb_method_boundp(CLASS_OF(obj), id_externalParsedEntityDeclHandler, 0))
1627
+ XML_SetExternalParsedEntityDeclHandler(parser->parser,
1628
+ iterExternalParsedEntityDeclHandler);
1629
+ if (rb_method_boundp(CLASS_OF(obj), id_internalParsedEntityDeclHandler, 0))
1630
+ XML_SetInternalParsedEntityDeclHandler(parser->parser,
1631
+ iterInternalParsedEntityDeclHandler);
1632
+ #endif
1633
+ /* Call non-iterator version of UnknownEncoding handler,
1634
+ because the porcedure block often returns the unexpected value. */
1635
+ XML_SetUnknownEncodingHandler(parser->parser,
1636
+ myUnknownEncodingHandler,
1637
+ (void*)obj);
1638
+ #ifdef HAVE_XML_SETSKIPPEDENTITYHANDLER
1639
+ if (rb_method_boundp(CLASS_OF(obj), id_skippedEntityHandler, 0))
1640
+ XML_SetSkippedEntityHandler(parser->parser, iterSkippedEntityHandler);
1641
+ #endif
1642
+ }
1643
+ /* Call as not iterator */
1644
+ else {
1645
+ if (rb_method_boundp(CLASS_OF(obj), id_startElementHandler, 0))
1646
+ start = myStartElementHandler;
1647
+ if (rb_method_boundp(CLASS_OF(obj), id_endElementHandler, 0))
1648
+ end = myEndElementHandler;
1649
+ if (start || end)
1650
+ XML_SetElementHandler(parser->parser, start, end);
1651
+ if (rb_method_boundp(CLASS_OF(obj), id_characterDataHandler, 0))
1652
+ XML_SetCharacterDataHandler(parser->parser,
1653
+ myCharacterDataHandler);
1654
+ if (rb_method_boundp(CLASS_OF(obj),
1655
+ id_processingInstructionHandler, 0))
1656
+ XML_SetProcessingInstructionHandler(parser->parser,
1657
+ myProcessingInstructionHandler);
1658
+ #ifdef NEW_EXPAT
1659
+ if (rb_method_boundp(CLASS_OF(obj), id_defaultExpandHandler, 0))
1660
+ XML_SetDefaultHandlerExpand(parser->parser, myDefaultExpandHandler);
1661
+ else
1662
+ #endif
1663
+ if (rb_method_boundp(CLASS_OF(obj), id_defaultHandler, 0)) {
1664
+ XML_SetDefaultHandler(parser->parser, myDefaultHandler);
1665
+ }
1666
+ if (rb_method_boundp(CLASS_OF(obj), id_unparsedEntityDeclHandler, 0))
1667
+ XML_SetUnparsedEntityDeclHandler(parser->parser,
1668
+ myUnparsedEntityDeclHandler);
1669
+ if (rb_method_boundp(CLASS_OF(obj), id_notationDeclHandler, 0))
1670
+ XML_SetNotationDeclHandler(parser->parser,
1671
+ myNotationDeclHandler);
1672
+ if (rb_method_boundp(CLASS_OF(obj), id_externalEntityRefHandler, 0))
1673
+ XML_SetExternalEntityRefHandler(parser->parser,
1674
+ myExternalEntityRefHandler);
1675
+ #ifdef NEW_EXPAT
1676
+ if (rb_method_boundp(CLASS_OF(obj), id_commentHandler, 0))
1677
+ XML_SetCommentHandler(parser->parser, myCommentHandler);
1678
+
1679
+ if (rb_method_boundp(CLASS_OF(obj), id_startCdataSectionHandler, 0))
1680
+ startC = myStartCdataSectionHandler;
1681
+ if (rb_method_boundp(CLASS_OF(obj), id_endCdataSectionHandler, 0))
1682
+ endC = myEndCdataSectionHandler;
1683
+ if (startC || endC)
1684
+ XML_SetCdataSectionHandler(parser->parser, startC, endC);
1685
+
1686
+ if (rb_method_boundp(CLASS_OF(obj), id_startNamespaceDeclHandler, 0))
1687
+ startNS = myStartNamespaceDeclHandler;
1688
+ if (rb_method_boundp(CLASS_OF(obj), id_endNamespaceDeclHandler, 0))
1689
+ endNS = myEndNamespaceDeclHandler;
1690
+ if (startNS || endNS)
1691
+ XML_SetNamespaceDeclHandler(parser->parser, startNS, endNS);
1692
+ if (rb_method_boundp(CLASS_OF(obj), id_notStandaloneHandler, 0))
1693
+ XML_SetNotStandaloneHandler(parser->parser, myNotStandaloneHandler);
1694
+ #endif
1695
+ #ifdef HAVE_XML_SETDOCTYPEDECLHANDLER
1696
+ if (rb_method_boundp(CLASS_OF(obj), id_startDoctypeDeclHandler, 0))
1697
+ startDoctype = myStartDoctypeDeclHandler;
1698
+ if (rb_method_boundp(CLASS_OF(obj), id_endDoctypeDeclHandler, 0))
1699
+ endDoctype = myEndDoctypeDeclHandler;
1700
+ if (startDoctype || endDoctype)
1701
+ XML_SetDoctypeDeclHandler(parser->parser, startDoctype, endDoctype);
1702
+ #endif
1703
+ #ifdef HAVE_EXPAT_H
1704
+ if (rb_method_boundp(CLASS_OF(obj), id_elementDeclHandler, 0))
1705
+ XML_SetElementDeclHandler(parser->parser, myElementDeclHandler);
1706
+ if (rb_method_boundp(CLASS_OF(obj), id_attlistDeclHandler, 0))
1707
+ XML_SetAttlistDeclHandler(parser->parser, myAttlistDeclHandler);
1708
+ if (rb_method_boundp(CLASS_OF(obj), id_xmlDeclHandler, 0))
1709
+ XML_SetXmlDeclHandler(parser->parser, myXmlDeclHandler);
1710
+ if (rb_method_boundp(CLASS_OF(obj), id_entityDeclHandler, 0))
1711
+ XML_SetEntityDeclHandler(parser->parser, myEntityDeclHandler);
1712
+ #endif
1713
+ #if 0
1714
+ if (rb_method_boundp(CLASS_OF(obj), id_externalParsedEntityDeclHandler, 0))
1715
+ XML_SetExternalParsedEntityDeclHandler(parser->parser,
1716
+ myExternalParsedEntityDeclHandler);
1717
+ if (rb_method_boundp(CLASS_OF(obj), id_internalParsedEntityDeclHandler, 0))
1718
+ XML_SetInternalParsedEntityDeclHandler(parser->parser,
1719
+ myInternalParsedEntityDeclHandler);
1720
+ #endif
1721
+ XML_SetUnknownEncodingHandler(parser->parser,
1722
+ myUnknownEncodingHandler,
1723
+ (void*)obj);
1724
+ #ifdef HAVE_XML_SETSKIPPEDENTITYHANDLER
1725
+ if (rb_method_boundp(CLASS_OF(obj), id_skippedEntityHandler, 0))
1726
+ XML_SetSkippedEntityHandler(parser->parser, mySkippedEntityHandler);
1727
+ #endif
1728
+ }
1729
+
1730
+ /* Parse from stream (probably slightly slow) */
1731
+ if (fromStream) {
1732
+ VALUE buf;
1733
+
1734
+ if (OBJ_TAINTED(str))
1735
+ taintParser(parser);
1736
+ do {
1737
+ buf = rb_funcall(str, mid, 0);
1738
+ if (!NIL_P(buf)) {
1739
+ Check_Type(buf, T_STRING);
1740
+ if (OBJ_TAINTED(buf))
1741
+ taintParser(parser);
1742
+ rb_ivar_set(obj, linebuf, buf); /* protect buf from GC (reasonable?)*/
1743
+ ret = XML_Parse(parser->parser,
1744
+ RSTRING(buf)->ptr, RSTRING(buf)->len, 0);
1745
+ }
1746
+ else {
1747
+ ret = XML_Parse(parser->parser, NULL, 0, 1);
1748
+ }
1749
+ if (!ret) {
1750
+ int err = XML_GetErrorCode(parser->parser);
1751
+ const char* errStr = XML_ErrorString(err);
1752
+ rb_raise(eXMLParserError, (char*)errStr);
1753
+ }
1754
+ } while (!NIL_P(buf));
1755
+ return Qnil;
1756
+ }
1757
+
1758
+ /* Parse string */
1759
+ if (!NIL_P(str)) {
1760
+ if (OBJ_TAINTED(str))
1761
+ taintParser(parser);
1762
+ ret = XML_Parse(parser->parser,
1763
+ RSTRING(str)->ptr, RSTRING(str)->len, final);
1764
+ }
1765
+ else
1766
+ ret = XML_Parse(parser->parser, NULL, 0, final);
1767
+ if (!ret) {
1768
+ int err = XML_GetErrorCode(parser->parser);
1769
+ const char* errStr = XML_ErrorString(err);
1770
+ rb_raise(eXMLParserError, (char*)errStr);
1771
+ }
1772
+
1773
+ return Qnil;
1774
+ }
1775
+
1776
+ /* done method */
1777
+ static VALUE
1778
+ XMLParser_done(VALUE obj)
1779
+ {
1780
+ XMLParser* parser;
1781
+
1782
+ GET_PARSER(obj, parser);
1783
+ if (parser->parser) {
1784
+ XML_ParserFree(parser->parser);
1785
+ parser->parser = NULL;
1786
+ }
1787
+ return Qnil;
1788
+ }
1789
+
1790
+ /* defaultCurrent method */
1791
+ static VALUE
1792
+ XMLParser_defaultCurrent(VALUE obj)
1793
+ {
1794
+ XMLParser* parser;
1795
+
1796
+ GET_PARSER(obj, parser);
1797
+ if (!(parser->iterator)) {
1798
+ XML_DefaultCurrent(parser->parser);
1799
+ }
1800
+ else {
1801
+ parser->defaultCurrent = 1;
1802
+ }
1803
+ return Qnil;
1804
+ }
1805
+
1806
+ /* line method */
1807
+ static VALUE
1808
+ XMLParser_getCurrentLineNumber(VALUE obj)
1809
+ {
1810
+ XMLParser* parser;
1811
+ int line;
1812
+
1813
+ GET_PARSER(obj, parser);
1814
+ line = XML_GetCurrentLineNumber(parser->parser);
1815
+
1816
+ return INT2FIX(line);
1817
+ }
1818
+
1819
+ /* column method */
1820
+ static VALUE
1821
+ XMLParser_getCurrentColumnNumber(VALUE obj)
1822
+ {
1823
+ XMLParser* parser;
1824
+ int column;
1825
+
1826
+ GET_PARSER(obj, parser);
1827
+ column = XML_GetCurrentColumnNumber(parser->parser);
1828
+
1829
+ return INT2FIX(column);
1830
+ }
1831
+
1832
+ /* byte index method */
1833
+ static VALUE
1834
+ XMLParser_getCurrentByteIndex(VALUE obj)
1835
+ {
1836
+ XMLParser* parser;
1837
+ long pos;
1838
+
1839
+ GET_PARSER(obj, parser);
1840
+ pos = XML_GetCurrentByteIndex(parser->parser);
1841
+
1842
+ return INT2FIX(pos);
1843
+ }
1844
+
1845
+ /* set URI base */
1846
+ static VALUE
1847
+ XMLParser_setBase(VALUE obj, VALUE base)
1848
+ {
1849
+ XMLParser* parser;
1850
+ int ret;
1851
+
1852
+ Check_Type(base, T_STRING);
1853
+ GET_PARSER(obj, parser);
1854
+ if (OBJ_TAINTED(base))
1855
+ taintParser(parser);
1856
+ ret = XML_SetBase(parser->parser, RSTRING(base)->ptr);
1857
+
1858
+ return INT2FIX(ret);
1859
+ }
1860
+
1861
+ /* get URI base */
1862
+ static VALUE
1863
+ XMLParser_getBase(VALUE obj)
1864
+ {
1865
+ XMLParser* parser;
1866
+ const XML_Char* ret;
1867
+
1868
+ GET_PARSER(obj, parser);
1869
+ ret = XML_GetBase(parser->parser);
1870
+ if (!ret)
1871
+ return Qnil;
1872
+
1873
+ return TO_(rb_str_new2((char*)ret));
1874
+ }
1875
+
1876
+ #ifdef NEW_EXPAT
1877
+ #if 0
1878
+ static VALUE
1879
+ XMLParser_getSpecifiedAttributes(VALUE obj)
1880
+ {
1881
+ XMLParser* parser;
1882
+ int count;
1883
+ const XML_Char** atts;
1884
+ VALUE attrhash;
1885
+
1886
+ GET_PARSER(obj, parser);
1887
+ atts = parser->lastAttrs;
1888
+ if (!atts)
1889
+ return Qnil;
1890
+ count = XML_GetSpecifiedAttributeCount(parser->parser)/2;
1891
+ attrhash = rb_hash_new();
1892
+ while (*atts) {
1893
+ const char* key = *atts++;
1894
+ atts++;
1895
+ rb_hash_aset(attrhash, FO_(TO_(rb_str_new2((char*)key))),
1896
+ (count-- > 0) ? Qtrue: Qfalse);
1897
+ }
1898
+
1899
+ return attrhash;
1900
+ }
1901
+ #else
1902
+ static VALUE
1903
+ XMLParser_getSpecifiedAttributes(VALUE obj)
1904
+ {
1905
+ XMLParser* parser;
1906
+ int i, count;
1907
+ const XML_Char** atts;
1908
+ VALUE attrarray;
1909
+
1910
+ GET_PARSER(obj, parser);
1911
+ atts = parser->lastAttrs;
1912
+ if (!atts)
1913
+ return Qnil;
1914
+ count = XML_GetSpecifiedAttributeCount(parser->parser)/2;
1915
+ attrarray = rb_ary_new2(count);
1916
+ for (i = 0; i < count; i++, atts+=2) {
1917
+ const char* key = *atts;
1918
+ rb_ary_push(attrarray, TO_(rb_str_new2((char*)key)));
1919
+ }
1920
+
1921
+ return attrarray;
1922
+ }
1923
+ #endif
1924
+
1925
+ static VALUE
1926
+ XMLParser_getCurrentByteCount(VALUE obj)
1927
+ {
1928
+ XMLParser* parser;
1929
+
1930
+ GET_PARSER(obj, parser);
1931
+ return INT2FIX(XML_GetCurrentByteCount(parser->parser));
1932
+ }
1933
+ #endif
1934
+
1935
+ #ifdef XML_DTD
1936
+ static VALUE
1937
+ XMLParser_setParamEntityParsing(VALUE obj, VALUE parsing)
1938
+ {
1939
+ XMLParser* parser;
1940
+ int ret;
1941
+
1942
+ Check_Type(parsing, T_FIXNUM);
1943
+ GET_PARSER(obj, parser);
1944
+ ret = XML_SetParamEntityParsing(parser->parser, FIX2INT(parsing));
1945
+
1946
+ return INT2FIX(ret);
1947
+ }
1948
+ #endif
1949
+
1950
+ static VALUE
1951
+ XMLParser_s_expatVersion(VALUE obj)
1952
+ {
1953
+ #if defined(HAVE_EXPAT_H)
1954
+ return rb_str_new2(XML_ExpatVersion());
1955
+ #elif defined(EXPAT_1_2)
1956
+ return rb_str_new2("1.2");
1957
+ #elif defined(NEW_EXPAT)
1958
+ return rb_str_new2("1.1");
1959
+ #else
1960
+ return rb_str_new2("1.0");
1961
+ #endif
1962
+ }
1963
+
1964
+ #ifdef HAVE_EXPAT_H
1965
+ static VALUE
1966
+ XMLParser_setReturnNSTriplet(VALUE obj, VALUE do_nst)
1967
+ {
1968
+ XMLParser* parser;
1969
+ int nst;
1970
+
1971
+ GET_PARSER(obj, parser);
1972
+ switch (TYPE(do_nst)) {
1973
+ case T_TRUE:
1974
+ nst = 1;
1975
+ break;
1976
+ case T_FALSE:
1977
+ nst = 0;
1978
+ break;
1979
+ case T_FIXNUM:
1980
+ nst = FIX2INT(do_nst);
1981
+ break;
1982
+ default:
1983
+ rb_raise(rb_eTypeError, "not valid value");
1984
+ }
1985
+ XML_SetReturnNSTriplet(parser->parser, nst);
1986
+
1987
+ return Qnil;
1988
+ }
1989
+
1990
+
1991
+ static VALUE
1992
+ XMLParser_getInputContext(VALUE obj)
1993
+ {
1994
+ XMLParser* parser;
1995
+ const char* buffer;
1996
+ int offset;
1997
+ int size;
1998
+ VALUE ret = Qnil;
1999
+
2000
+ GET_PARSER(obj, parser);
2001
+ buffer = XML_GetInputContext(parser->parser,
2002
+ &offset,
2003
+ &size);
2004
+ if (buffer && size > 0) {
2005
+ ret = rb_ary_new3(2,
2006
+ TO_(rb_str_new(buffer, size)),
2007
+ INT2FIX(offset));
2008
+ }
2009
+
2010
+ return ret;
2011
+ }
2012
+
2013
+
2014
+ static VALUE
2015
+ XMLParser_getIdAttrribute(VALUE obj)
2016
+ {
2017
+ XMLParser* parser;
2018
+ int idattr;
2019
+ const XML_Char** atts;
2020
+
2021
+ GET_PARSER(obj, parser);
2022
+ atts = parser->lastAttrs;
2023
+ if (!atts)
2024
+ return Qnil;
2025
+ idattr = XML_GetIdAttributeIndex(parser->parser);
2026
+ if (idattr < 0)
2027
+ return Qnil;
2028
+ return TO_(rb_str_new2((char*)atts[idattr]));
2029
+ }
2030
+ #endif
2031
+
2032
+ #ifdef HAVE_XML_USEFOREIGNDTD
2033
+ static VALUE
2034
+ XMLParser_useForeignDTD(VALUE obj, VALUE useDTD)
2035
+ {
2036
+ XMLParser* parser;
2037
+ int dtd;
2038
+ int ret;
2039
+
2040
+ GET_PARSER(obj, parser);
2041
+ switch (TYPE(useDTD)) {
2042
+ case T_TRUE:
2043
+ dtd = 1;
2044
+ break;
2045
+ case T_FALSE:
2046
+ dtd = 0;
2047
+ break;
2048
+ case T_FIXNUM:
2049
+ dtd = FIX2INT(useDTD);
2050
+ break;
2051
+ default:
2052
+ rb_raise(rb_eTypeError, "not valid value");
2053
+ }
2054
+ ret = XML_UseForeignDTD(parser->parser, dtd);
2055
+
2056
+ return INT2FIX(ret);
2057
+ }
2058
+ #endif
2059
+
2060
+ #ifdef HAVE_XML_GETFEATURELIST
2061
+ static VALUE
2062
+ XMLParser_s_getFeatureList(VALUE obj)
2063
+ {
2064
+ const XML_Feature* list;
2065
+ VALUE ret = rb_hash_new();
2066
+
2067
+ list = XML_GetFeatureList();
2068
+ while (list && list->feature) {
2069
+ rb_hash_aset(ret, FO_(rb_str_new2(list->name)), INT2NUM(list->value));
2070
+ list++;
2071
+ }
2072
+
2073
+ return ret;
2074
+ }
2075
+ #endif
2076
+
2077
+ void
2078
+ Init_xmlparser()
2079
+ {
2080
+ VALUE mXML;
2081
+
2082
+ eXMLParserError = rb_define_class("XMLParserError", rb_eStandardError);
2083
+ cXMLParser = rb_define_class("XMLParser", rb_cObject);
2084
+ cXMLEncoding = rb_define_class("XMLEncoding", rb_cObject);
2085
+
2086
+ /* Class name aliases */
2087
+ if (rb_const_defined(rb_cObject, rb_intern("XML")) == Qtrue)
2088
+ mXML = rb_const_get(rb_cObject, rb_intern("XML"));
2089
+ else
2090
+ mXML = rb_define_module("XML");
2091
+ rb_define_const(mXML, "ParserError", eXMLParserError);
2092
+ rb_define_const(cXMLParser, "Error", eXMLParserError);
2093
+ rb_define_const(mXML, "Parser", cXMLParser);
2094
+ rb_define_const(mXML, "Encoding", cXMLEncoding);
2095
+
2096
+ rb_define_singleton_method(cXMLParser, "new", XMLParser_new, -1);
2097
+ rb_define_singleton_method(cXMLParser, "expatVersion",
2098
+ XMLParser_s_expatVersion, 0);
2099
+ rb_define_method(cXMLParser, "initialize", XMLParser_initialize, -1);
2100
+ rb_define_method(cXMLParser, "parse", XMLParser_parse, -1);
2101
+ rb_define_method(cXMLParser, "done", XMLParser_done, 0);
2102
+ rb_define_method(cXMLParser, "defaultCurrent", XMLParser_defaultCurrent, 0);
2103
+ rb_define_method(cXMLParser, "line", XMLParser_getCurrentLineNumber, 0);
2104
+ rb_define_method(cXMLParser, "column", XMLParser_getCurrentColumnNumber, 0);
2105
+ rb_define_method(cXMLParser, "byteIndex", XMLParser_getCurrentByteIndex, 0);
2106
+ rb_define_method(cXMLParser, "setBase", XMLParser_setBase, 1);
2107
+ rb_define_method(cXMLParser, "getBase", XMLParser_getBase, 0);
2108
+ #ifdef NEW_EXPAT
2109
+ rb_define_method(cXMLParser, "getSpecifiedAttributes",
2110
+ XMLParser_getSpecifiedAttributes, 0);
2111
+ rb_define_method(cXMLParser, "byteCount", XMLParser_getCurrentByteCount, 0);
2112
+ #endif
2113
+ #ifdef XML_DTD
2114
+ rb_define_method(cXMLParser, "setParamEntityParsing",
2115
+ XMLParser_setParamEntityParsing, 1);
2116
+ #endif
2117
+ #ifdef HAVE_EXPAT_H
2118
+ rb_define_method(cXMLParser, "setReturnNSTriplet",
2119
+ XMLParser_setReturnNSTriplet, 1);
2120
+ rb_define_method(cXMLParser, "getInputContext",
2121
+ XMLParser_getInputContext, 0);
2122
+ rb_define_method(cXMLParser, "getIdAttribute",
2123
+ XMLParser_getIdAttrribute, 0);
2124
+ #endif
2125
+
2126
+ #ifdef HAVE_XML_PARSERRESET
2127
+ rb_define_method(cXMLParser, "reset", XMLParser_reset, -1);
2128
+ #endif
2129
+
2130
+ rb_define_method(cXMLEncoding, "map", XMLEncoding_map, 1);
2131
+ rb_define_method(cXMLEncoding, "convert", XMLEncoding_convert, 1);
2132
+
2133
+ #ifdef HAVE_XML_USEFOREIGNDTD
2134
+ rb_define_method(cXMLParser, "useForeignDTD",
2135
+ XMLParser_useForeignDTD, 1);
2136
+ #endif
2137
+ #ifdef HAVE_XML_GETFEATURELIST
2138
+ rb_define_singleton_method(cXMLParser, "getFeatureList",
2139
+ XMLParser_s_getFeatureList, 0);
2140
+ #endif
2141
+
2142
+ #define DEFINE_EVENT_CODE(klass, name) \
2143
+ rb_define_const(klass, #name, sym##name = ID2SYM(rb_intern(#name)))
2144
+
2145
+ DEFINE_EVENT_CODE(cXMLParser, START_ELEM);
2146
+ DEFINE_EVENT_CODE(cXMLParser, END_ELEM);
2147
+ DEFINE_EVENT_CODE(cXMLParser, CDATA);
2148
+ DEFINE_EVENT_CODE(cXMLParser, PI);
2149
+ DEFINE_EVENT_CODE(cXMLParser, DEFAULT);
2150
+ DEFINE_EVENT_CODE(cXMLParser, UNPARSED_ENTITY_DECL);
2151
+ DEFINE_EVENT_CODE(cXMLParser, NOTATION_DECL);
2152
+ DEFINE_EVENT_CODE(cXMLParser, EXTERNAL_ENTITY_REF);
2153
+ #ifdef NEW_EXPAT
2154
+ DEFINE_EVENT_CODE(cXMLParser, COMMENT);
2155
+ DEFINE_EVENT_CODE(cXMLParser, START_CDATA);
2156
+ DEFINE_EVENT_CODE(cXMLParser, END_CDATA);
2157
+ DEFINE_EVENT_CODE(cXMLParser, START_NAMESPACE_DECL);
2158
+ DEFINE_EVENT_CODE(cXMLParser, END_NAMESPACE_DECL);
2159
+ #endif
2160
+ #ifdef HAVE_XML_SETSKIPPEDENTITYHANDLER
2161
+ DEFINE_EVENT_CODE(cXMLParser, SKIPPED_ENTITY);
2162
+ #endif
2163
+ #ifdef XML_DTD
2164
+ rb_define_const(cXMLParser, "PARAM_ENTITY_PARSING_NEVER",
2165
+ XML_PARAM_ENTITY_PARSING_NEVER);
2166
+ rb_define_const(cXMLParser, "PARAM_ENTITY_PARSING_UNLESS_STANDALONE",
2167
+ XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE);
2168
+ rb_define_const(cXMLParser, "PARAM_ENTITY_PARSING_ALWAYS",
2169
+ XML_PARAM_ENTITY_PARSING_ALWAYS);
2170
+ #endif
2171
+ #ifdef HAVE_XML_SETDOCTYPEDECLHANDLER
2172
+ DEFINE_EVENT_CODE(cXMLParser, START_DOCTYPE_DECL);
2173
+ DEFINE_EVENT_CODE(cXMLParser, END_DOCTYPE_DECL);
2174
+ #endif
2175
+ #ifdef HAVE_EXPAT_H
2176
+ DEFINE_EVENT_CODE(cXMLParser, ELEMENT_DECL);
2177
+ DEFINE_EVENT_CODE(cXMLParser, ATTLIST_DECL);
2178
+ DEFINE_EVENT_CODE(cXMLParser, XML_DECL);
2179
+ DEFINE_EVENT_CODE(cXMLParser, ENTITY_DECL);
2180
+ #endif
2181
+ #if 0
2182
+ DEFINE_EVENT_CODE(cXMLParser, EXTERNAL_PARSED_ENTITY_DECL);
2183
+ DEFINE_EVENT_CODE(cXMLParser, INTERNAL_PARSED_ENTITY_DECL);
2184
+ #endif
2185
+ #if 0
2186
+ DEFINE_EVENT_CODE(cXMLParser, UNKNOWN_ENCODING);
2187
+ #endif
2188
+
2189
+ id_map = rb_intern("_map");
2190
+ id_startElementHandler = rb_intern("startElement");
2191
+ id_endElementHandler = rb_intern("endElement");
2192
+ id_characterDataHandler = rb_intern("character");
2193
+ id_processingInstructionHandler = rb_intern("processingInstruction");
2194
+ id_defaultHandler = rb_intern("default");
2195
+ id_unparsedEntityDeclHandler = rb_intern("unparsedEntityDecl");
2196
+ id_notationDeclHandler = rb_intern("notationDecl");
2197
+ id_externalEntityRefHandler = rb_intern("externalEntityRef");
2198
+ #ifdef NEW_EXPAT
2199
+ id_defaultExpandHandler = rb_intern("defaultExpand");
2200
+ id_commentHandler = rb_intern("comment");
2201
+ id_startCdataSectionHandler = rb_intern("startCdata");
2202
+ id_endCdataSectionHandler = rb_intern("endCdata");
2203
+ id_startNamespaceDeclHandler = rb_intern("startNamespaceDecl");
2204
+ id_endNamespaceDeclHandler = rb_intern("endNamespaceDecl");
2205
+ id_notStandaloneHandler = rb_intern("notStandalone");
2206
+ #endif
2207
+ #ifdef HAVE_XML_SETDOCTYPEDECLHANDLER
2208
+ id_startDoctypeDeclHandler = rb_intern("startDoctypeDecl");
2209
+ id_endDoctypeDeclHandler = rb_intern("endDoctypeDecl");
2210
+ #endif
2211
+ id_unknownEncoding = rb_intern("unknownEncoding");
2212
+ id_convert = rb_intern("convert");
2213
+ #ifdef HAVE_EXPAT_H
2214
+ id_elementDeclHandler = rb_intern("elementDecl");
2215
+ id_attlistDeclHandler = rb_intern("attlistDecl");
2216
+ id_xmlDeclHandler = rb_intern("xmlDecl");
2217
+ id_entityDeclHandler = rb_intern("entityDecl");
2218
+ #endif
2219
+ #if 0
2220
+ id_externalParsedEntityDeclHandler = rb_intern("externalParsedEntityDecl");
2221
+ id_internalParsedEntityDeclHandler = rb_intern("internalParsedEntityDecl");
2222
+ #endif
2223
+ #ifdef HAVE_XML_SETSKIPPEDENTITYHANDLER
2224
+ id_skippedEntityHandler = rb_intern("skippedEntity");
2225
+ #endif
2226
+ }