xmlparser 0.6.81

Sign up to get free protection for your applications and to get access to all the features.
Files changed (52) hide show
  1. data/MANIFEST +112 -0
  2. data/README +697 -0
  3. data/README.ja +789 -0
  4. data/Rakefile +34 -0
  5. data/ext/encoding.h +91 -0
  6. data/ext/xmlparser/mkrf_conf.rb +28 -0
  7. data/ext/xmlparser/xmlparser.c +2226 -0
  8. data/lib/sax.rb +1 -0
  9. data/lib/saxdriver.rb +1 -0
  10. data/lib/wget.rb +47 -0
  11. data/lib/xml/dom/builder-ja.rb +58 -0
  12. data/lib/xml/dom/builder.rb +310 -0
  13. data/lib/xml/dom/core.rb +3276 -0
  14. data/lib/xml/dom/digest.rb +94 -0
  15. data/lib/xml/dom/visitor.rb +182 -0
  16. data/lib/xml/dom2/attr.rb +213 -0
  17. data/lib/xml/dom2/cdatasection.rb +76 -0
  18. data/lib/xml/dom2/characterdata.rb +177 -0
  19. data/lib/xml/dom2/comment.rb +81 -0
  20. data/lib/xml/dom2/core.rb +19 -0
  21. data/lib/xml/dom2/document.rb +317 -0
  22. data/lib/xml/dom2/documentfragment.rb +82 -0
  23. data/lib/xml/dom2/documenttype.rb +102 -0
  24. data/lib/xml/dom2/dombuilder.rb +277 -0
  25. data/lib/xml/dom2/dombuilderfilter.rb +12 -0
  26. data/lib/xml/dom2/domentityresolver.rb +13 -0
  27. data/lib/xml/dom2/domentityresolverimpl.rb +37 -0
  28. data/lib/xml/dom2/domexception.rb +95 -0
  29. data/lib/xml/dom2/domimplementation.rb +61 -0
  30. data/lib/xml/dom2/dominputsource.rb +29 -0
  31. data/lib/xml/dom2/element.rb +533 -0
  32. data/lib/xml/dom2/entity.rb +110 -0
  33. data/lib/xml/dom2/entityreference.rb +107 -0
  34. data/lib/xml/dom2/namednodemap.rb +138 -0
  35. data/lib/xml/dom2/node.rb +587 -0
  36. data/lib/xml/dom2/nodelist.rb +231 -0
  37. data/lib/xml/dom2/notation.rb +86 -0
  38. data/lib/xml/dom2/processinginstruction.rb +155 -0
  39. data/lib/xml/dom2/text.rb +128 -0
  40. data/lib/xml/dom2/xpath.rb +398 -0
  41. data/lib/xml/encoding-ja.rb +42 -0
  42. data/lib/xml/parser.rb +13 -0
  43. data/lib/xml/parserns.rb +236 -0
  44. data/lib/xml/sax.rb +353 -0
  45. data/lib/xml/saxdriver.rb +370 -0
  46. data/lib/xml/xpath.rb +3284 -0
  47. data/lib/xml/xpath.ry +2352 -0
  48. data/lib/xmldigest.rb +1 -0
  49. data/lib/xmltree.rb +1 -0
  50. data/lib/xmltreebuilder.rb +1 -0
  51. data/lib/xmltreevisitor.rb +1 -0
  52. metadata +111 -0
data/Rakefile ADDED
@@ -0,0 +1,34 @@
1
+ #!/usr/bin/ruby
2
+ require 'rubygems'
3
+ require 'rake'
4
+ require 'rake/testtask'
5
+ require 'rake/clean'
6
+ require 'rake/gempackagetask'
7
+ require 'rake/rdoctask'
8
+ require 'mkrf/rakehelper'
9
+ require 'fileutils'
10
+ include FileUtils
11
+
12
+ setup_clean ["pkg", "lib/*.bundle", "*.gem", ".config", "ext/**/Rakefile"]
13
+
14
+ setup_extension('xmlparser','xmlparser')
15
+
16
+ desc "Does a full compile"
17
+ task :default => [:xmlparser]
18
+
19
+ task 'extension' => :default
20
+ version = "0.6.81"
21
+ name = "xmlparser"
22
+
23
+ setup_gem(name, version) do |spec|
24
+ spec.summary = "Ruby bindings to the Expat XML parsing library"
25
+ spec.description = spec.summary
26
+ spec.author = "Yoshida Mataso with Jeff Hodges"
27
+ spec.add_dependency("mkrf", ">=0.2.1")
28
+ spec.extensions << "Rakefile"
29
+ spec.files = FileList["lib/**/*","ext/**/*"].exclude("rdoc").to_a
30
+ spec.has_rdoc = false
31
+ spec.extra_rdoc_files=['README', 'README.ja', 'MANIFEST', 'samples']
32
+ spec.rubyforge_project = 'xmlparser'
33
+ end
34
+
data/ext/encoding.h ADDED
@@ -0,0 +1,91 @@
1
+ /*****************************************************************
2
+ ** encoding.h
3
+ **
4
+ ** Copyright 1998 Clark Cooper
5
+ ** All rights reserved.
6
+ **
7
+ ** This program is free software; you can redistribute it and/or
8
+ ** modify it under the same terms as Perl itself.
9
+ */
10
+
11
+ #ifndef ENCODING_H
12
+ #define ENCODING_H 1
13
+
14
+ #define ENCMAP_MAGIC 0xfeebface
15
+
16
+ typedef struct prefixmap {
17
+ unsigned char min;
18
+ unsigned char len; /* 0 => 256 */
19
+ unsigned short bmap_start;
20
+ unsigned char ispfx[32];
21
+ unsigned char ischar[32];
22
+ } PrefixMap;
23
+
24
+ typedef struct encinf
25
+ {
26
+ unsigned short prefixes_size;
27
+ unsigned short bytemap_size;
28
+ int firstmap[256];
29
+ PrefixMap *prefixes;
30
+ unsigned short *bytemap;
31
+ } Encinfo;
32
+
33
+ typedef struct encmaphdr
34
+ {
35
+ unsigned int magic;
36
+ char name[40];
37
+ unsigned short pfsize;
38
+ unsigned short bmsize;
39
+ int map[256];
40
+ } Encmap_Header;
41
+
42
+ /*================================================================
43
+ ** Structure of Encoding map binary encoding
44
+ **
45
+ ** Note that all shorts and ints are in network order,
46
+ ** so when packing or unpacking with perl, use 'n' and 'N' respectively.
47
+ ** In C, use the htonl family of functions.
48
+ **
49
+ ** The basic structure is:
50
+ **
51
+ ** _______________________
52
+ ** |Header (including map expat needs for 1st byte)
53
+ ** |PrefixMap * pfsize
54
+ ** | This section isn't included for single-byte encodings.
55
+ ** | For multiple byte encodings, when a byte represents a prefix
56
+ ** | then it indexes into this vector instead of mapping to a
57
+ ** | Unicode character. The PrefixMap type is declared above. The
58
+ ** | ispfx and ischar fields are bitvectors indicating whether
59
+ ** | the byte being mapped is a prefix or character respectively.
60
+ ** | If neither is set, then the character is not mapped to Unicode.
61
+ ** |
62
+ ** | The min field is the 1st byte mapped for this prefix; the
63
+ ** | len field is the number of bytes mapped; and bmap_start is
64
+ ** | the starting index of the map for this prefix in the overall
65
+ ** | map (next section).
66
+ ** |unsigned short * bmsize
67
+ ** | This section also is omitted for single-byte encodings.
68
+ ** | Each short is either a Unicode scalar or an index into the
69
+ ** | PrefixMap vector.
70
+ **
71
+ ** The header for these files is declared above as the Encmap_Header type.
72
+ ** The magic field is a magic number which should match the ENCMAP_MAGIC
73
+ ** macro above. The next 40 bytes stores IANA registered name for the
74
+ ** encoding. The pfsize field holds the number of PrefixMaps, which should
75
+ ** be zero for single byte encodings. The bmsize field holds the number of
76
+ ** shorts used for the overall map.
77
+ **
78
+ ** The map field contains either the Unicode scalar encoded by the 1st byte
79
+ ** or -n where n is the number of bytes that such a 1st byte implies (Expat
80
+ ** requires that the number of bytes to encode a character is indicated by
81
+ ** the 1st byte) or -1 if the byte doesn't map to any Unicode character.
82
+ **
83
+ ** If the encoding is a multiple byte encoding, then there will be PrefixMap
84
+ ** and character map sections. The 1st PrefixMap (index 0), covers a range
85
+ ** of bytes that includes all 1st byte prefixes.
86
+ **
87
+ ** Look at convert_to_unicode in Expat.xs to see how this data structure
88
+ ** is used.
89
+ */
90
+
91
+ #endif /* ndef ENCODING_H */
@@ -0,0 +1,28 @@
1
+ #!/usr/bin/ruby
2
+
3
+ require 'rubygems'
4
+ require 'mkrf'
5
+
6
+ def crash(str)
7
+ printf(" extconf failure: %s\n", str)
8
+ exit 1
9
+ end
10
+
11
+ Mkrf::Generator.new('xmlparser') do |g|
12
+ if g.include_header('expat.h', '/opt/include','/usr/local/include','/usr/include')
13
+ g.include_library('expat',function='XML_ParserCreate', '/opt/lib', '/usr/local/lib', '/usr/lib')
14
+ elsif g.include_header('libxmltok', '/opt/include','/usr/local/include','/usr/include')
15
+ g.include_library('xmltok', function='XML_ParserCreate', '/opt/lib', '/usr/local/lib', '/usr/lib')
16
+ end
17
+ if g.has_function?("XML_SetNotStandaloneHandler")
18
+ #g.cflags << ' -DNEW_EXPAT'
19
+ g.add_define('NEW_EXPAT')
20
+ end
21
+ if g.has_function?("XML_SetParamEntityParsing")
22
+ #g.cflags << ' -DXML_DTD'
23
+ g.add_define('XML_DTD')
24
+ end
25
+
26
+ #g.include_library("socket", function="ntohl")
27
+ g.include_library("wsock32") if RUBY_PLATFORM =~ /mswin32|mingw/
28
+ end
@@ -0,0 +1,2226 @@
1
+ /*
2
+ * Expat (XML Parser Toolkit) wrapper for Ruby
3
+ * Feb 16, 2004 yoshidam version 0.6.8 taint output string
4
+ * Feb 16, 2004 yoshidam version 0.6.7 fix buffer overflow
5
+ * Mar 11, 2003 yoshidam version 0.6.6 fix skippedEntity handler
6
+ * Sep 20, 2002 yoshidam version 0.6.5 fix reset method
7
+ * Apr 4, 2002 yoshidam version 0.6.3 change event code values
8
+ * Oct 10, 2000 yoshidam version 0.6.1 support expat-1.2
9
+ * Oct 6, 2000 yoshidam version 0.6.0 support expat-1.95.0
10
+ * Jun 28, 1999 yoshidam version 0.5.18 define initialize for Ruby 1.5
11
+ * Jun 28, 1999 yoshidam version 0.5.15 support start/endDoctypeDecl
12
+ * Jun 28, 1999 yoshidam version 0.5.14 support setParamEntityParsing
13
+ * Apr 28, 1999 yoshidam version 0.5.11 support notStandalone
14
+ * Mar 29, 1998 yoshidam version 0.5.9 optimize for Ruby 1.3
15
+ * Mar 8, 1998 yoshidam version 0.5.7 support start/endNamespaceDecl
16
+ * Jan 14, 1998 yoshidam version 0.5.4 support start/endCdataSection
17
+ * Jan 10, 1998 yoshidam version 0.5.3 support encoding map
18
+ * Nov 24, 1998 yoshidam version 0.5.0 support TEST version of expat
19
+ * Nov 5, 1998 yoshidam version 0.4.18 mIDs are initialized in Init_xmlparser
20
+ * Oct 28, 1998 yoshidam version 0.4.17 mIDs are stored into static vars
21
+ * Oct 13, 1998 yoshidam version 0.4.12 debug and speed up myEncodingConv
22
+ * Oct 7, 1998 yoshidam version 0.4.11 hold internal object into ivar
23
+ * Sep 18, 1998 yoshidam version 0.4.6
24
+ * Sep 8, 1998 yoshidam version 0.4.4
25
+ * Sep 3, 1998 yoshidam version 0.4.3
26
+ * Sep 1, 1998 yoshidam version 0.4.2
27
+ * Aug 28, 1998 yoshidam version 0.4.1
28
+ * Aug 22, 1998 yoshidam version 0.4.0
29
+ * Jul 6, 1998 yoshidam version 0.2
30
+ * Jun 30, 1998 yoshidam version 0.1
31
+ *
32
+ * XML_ENC_PATH: path of encoding map for Perl
33
+ * HAVE_XML_USEFOREIGNDTD: expat 1.95.5
34
+ * HAVE_XML_GETFEATURELIST: expat 1.95.5
35
+ * HAVE_XML_SETSKIPPEDENTITYHANDLER: expat 1.95.4
36
+ * HAVE_XML_PARSERRESET: expat 1.95.3
37
+ * HAVE_EXPAT_H: expat 1.95.0
38
+ * HAVE_XML_SETDOCTYPEDECLHANDLER: expat 19990728
39
+ * XML_DTD: expat 19990626
40
+ * NEW_EXPAT: expat 1.1
41
+ */
42
+
43
+ #include "ruby.h"
44
+ #include "rubyio.h"
45
+ #include <stdio.h>
46
+ #include <ctype.h>
47
+ #ifdef HAVE_EXPAT_H
48
+ # include "expat.h"
49
+ #else
50
+ # include "xmlparse.h"
51
+ #endif
52
+ #ifdef XML_ENC_PATH
53
+ # include <limits.h>
54
+ # include <sys/stat.h>
55
+ # include "encoding.h"
56
+ # ifndef PATH_MAX
57
+ # define PATH_MAX 256
58
+ # endif
59
+ #endif
60
+
61
+ static VALUE eXMLParserError;
62
+ static VALUE cXMLParser;
63
+ static VALUE cXMLEncoding;
64
+ static ID id_map;
65
+ static ID id_startElementHandler;
66
+ static ID id_endElementHandler;
67
+ static ID id_characterDataHandler;
68
+ static ID id_processingInstructionHandler;
69
+ static ID id_defaultHandler;
70
+ static ID id_defaultExpandHandler;
71
+ static ID id_unparsedEntityDeclHandler;
72
+ static ID id_notationDeclHandler;
73
+ static ID id_externalEntityRefHandler;
74
+ static ID id_unknownEncoding;
75
+ static ID id_convert;
76
+ #ifdef NEW_EXPAT
77
+ static ID id_commentHandler;
78
+ static ID id_startCdataSectionHandler;
79
+ static ID id_endCdataSectionHandler;
80
+ static ID id_startNamespaceDeclHandler;
81
+ static ID id_endNamespaceDeclHandler;
82
+ static ID id_notStandaloneHandler;
83
+ #endif
84
+ #ifdef HAVE_XML_SETDOCTYPEDECLHANDLER
85
+ static ID id_startDoctypeDeclHandler;
86
+ static ID id_endDoctypeDeclHandler;
87
+ #endif
88
+ #ifdef HAVE_EXPAT_H
89
+ static ID id_elementDeclHandler;
90
+ static ID id_attlistDeclHandler;
91
+ static ID id_xmlDeclHandler;
92
+ static ID id_entityDeclHandler;
93
+ #endif
94
+ #if 0
95
+ static ID id_externalParsedEntityDeclHandler;
96
+ static ID id_internalParsedEntityDeclHandler;
97
+ #endif
98
+ #ifdef HAVE_XML_SETSKIPPEDENTITYHANDLER
99
+ static ID id_skippedEntityHandler;
100
+ #endif
101
+
102
+ #define GET_PARSER(obj, parser) \
103
+ Data_Get_Struct(obj, XMLParser, parser)
104
+
105
+ typedef struct _XMLParser {
106
+ XML_Parser parser;
107
+ int iterator;
108
+ int defaultCurrent;
109
+ #ifdef NEW_EXPAT
110
+ const XML_Char** lastAttrs;
111
+ #endif
112
+ int tainted;
113
+ VALUE parent;
114
+ char* context;
115
+ } XMLParser;
116
+
117
+ static VALUE symDEFAULT;
118
+ static VALUE symSTART_ELEM;
119
+ static VALUE symEND_ELEM;
120
+ static VALUE symCDATA;
121
+ static VALUE symPI;
122
+ static VALUE symUNPARSED_ENTITY_DECL;
123
+ static VALUE symNOTATION_DECL;
124
+ static VALUE symEXTERNAL_ENTITY_REF;
125
+ #ifdef NEW_EXPAT
126
+ static VALUE symCOMMENT;
127
+ static VALUE symSTART_CDATA;
128
+ static VALUE symEND_CDATA;
129
+ static VALUE symSTART_NAMESPACE_DECL;
130
+ static VALUE symEND_NAMESPACE_DECL;
131
+ #endif
132
+ #ifdef HAVE_XML_SETDOCTYPEDECLHANDLER
133
+ static VALUE symSTART_DOCTYPE_DECL;
134
+ static VALUE symEND_DOCTYPE_DECL;
135
+ #endif
136
+ #ifdef HAVE_EXPAT_H
137
+ static VALUE symELEMENT_DECL;
138
+ static VALUE symATTLIST_DECL;
139
+ static VALUE symXML_DECL;
140
+ static VALUE symENTITY_DECL;
141
+ #endif
142
+ #if 0
143
+ static VALUE symEXTERNAL_PARSED_ENTITY_DECL;
144
+ static VALUE symINTERNAL_PARSED_ENTITY_DECL;
145
+ #endif
146
+ #if 0
147
+ static VALUE symUNKNOWN_ENCODING;
148
+ #endif
149
+ #ifdef HAVE_XML_SETSKIPPEDENTITYHANDLER
150
+ static VALUE symSKIPPED_ENTITY;
151
+ #endif
152
+
153
+ /* destructor */
154
+ static void
155
+ XMLParser_free(XMLParser* parser)
156
+ {
157
+ /* fprintf(stderr, "Delete XMLParser: %p->%p\n", parser, parser->parser);*/
158
+ if (parser->parser) {
159
+ XML_ParserFree(parser->parser);
160
+ parser->parser = NULL;
161
+ }
162
+ free(parser);
163
+ }
164
+
165
+ static void
166
+ XMLParser_mark(XMLParser* parser)
167
+ {
168
+ /* fprintf(stderr, "Mark XMLParser: %p->%p\n", parser, parser->parser);*/
169
+ if (!NIL_P(parser->parent)) {
170
+ XMLParser* parent;
171
+ GET_PARSER(parser->parent, parent);
172
+ rb_gc_mark(parser->parent);
173
+ }
174
+ }
175
+
176
+ static void
177
+ taintParser(XMLParser* parser) {
178
+ parser->tainted |= 1;
179
+ if (!NIL_P(parser->parent) && !parser->context) {
180
+ XMLParser* parent;
181
+ GET_PARSER(parser->parent, parent);
182
+ taintParser(parent);
183
+ }
184
+ }
185
+
186
+ inline static VALUE
187
+ taintObject(XMLParser* parser, VALUE obj) {
188
+ if (parser->tainted)
189
+ OBJ_TAINT(obj);
190
+ return obj;
191
+ }
192
+ #define TO_(o) (taintObject(parser, o))
193
+
194
+ inline static VALUE
195
+ freezeObject(VALUE obj) {
196
+ OBJ_FREEZE(obj);
197
+ return obj;
198
+ }
199
+ #define FO_(o) (freezeObject(o))
200
+
201
+
202
+ /* Event handlers for iterator */
203
+ static void
204
+ iterStartElementHandler(void *recv,
205
+ const XML_Char *name, const XML_Char **atts)
206
+ {
207
+ XMLParser* parser;
208
+ VALUE attrhash;
209
+
210
+ GET_PARSER(recv, parser);
211
+ #ifdef NEW_EXPAT
212
+ parser->lastAttrs = atts;
213
+ #endif
214
+ attrhash = rb_hash_new();
215
+ while (*atts) {
216
+ const char* key = *atts++;
217
+ const char* val = *atts++;
218
+ rb_hash_aset(attrhash,
219
+ FO_(TO_(rb_str_new2((char*)key))),
220
+ TO_(rb_str_new2((char*)val)));
221
+ }
222
+
223
+ rb_yield(rb_ary_new3(4, symSTART_ELEM,
224
+ TO_(rb_str_new2((char*)name)), attrhash, recv));
225
+ if (parser->defaultCurrent) {
226
+ parser->defaultCurrent = 0;
227
+ XML_DefaultCurrent(parser->parser);
228
+ }
229
+ }
230
+
231
+ static void
232
+ iterEndElementHandler(void *recv,
233
+ const XML_Char *name)
234
+ {
235
+ XMLParser* parser;
236
+ GET_PARSER(recv, parser);
237
+ rb_yield(rb_ary_new3(4, symEND_ELEM,
238
+ TO_(rb_str_new2((char*)name)), Qnil, recv));
239
+ if (parser->defaultCurrent) {
240
+ parser->defaultCurrent = 0;
241
+ XML_DefaultCurrent(parser->parser);
242
+ }
243
+ }
244
+
245
+ static void
246
+ iterCharacterDataHandler(void *recv,
247
+ const XML_Char *s,
248
+ int len)
249
+ {
250
+ XMLParser* parser;
251
+ GET_PARSER(recv, parser);
252
+ rb_yield(rb_ary_new3(4, symCDATA,
253
+ Qnil, TO_(rb_str_new((char*)s, len)), recv));
254
+ if (parser->defaultCurrent) {
255
+ parser->defaultCurrent = 0;
256
+ XML_DefaultCurrent(parser->parser);
257
+ }
258
+ }
259
+
260
+ static void
261
+ iterProcessingInstructionHandler(void *recv,
262
+ const XML_Char *target,
263
+ const XML_Char *data)
264
+ {
265
+ XMLParser* parser;
266
+ GET_PARSER(recv, parser);
267
+ rb_yield(rb_ary_new3(4, symPI,
268
+ TO_(rb_str_new2((char*)target)),
269
+ TO_(rb_str_new2((char*)data)), recv));
270
+ if (parser->defaultCurrent) {
271
+ parser->defaultCurrent = 0;
272
+ XML_DefaultCurrent(parser->parser);
273
+ }
274
+ }
275
+
276
+ static void
277
+ iterDefaultHandler(void *recv,
278
+ const XML_Char *s,
279
+ int len)
280
+ {
281
+ XMLParser* parser;
282
+ GET_PARSER(recv, parser);
283
+ rb_yield(rb_ary_new3(4, symDEFAULT,
284
+ Qnil, TO_(rb_str_new((char*)s, len)), recv));
285
+ if (parser->defaultCurrent) {
286
+ parser->defaultCurrent = 0;
287
+ /* XML_DefaultCurrent shoould not call in defaultHandler */
288
+ /* XML_DefaultCurrent(parser->parser); */
289
+ }
290
+ }
291
+
292
+ void
293
+ iterUnparsedEntityDeclHandler(void *recv,
294
+ const XML_Char *entityName,
295
+ const XML_Char *base,
296
+ const XML_Char *systemId,
297
+ const XML_Char *publicId,
298
+ const XML_Char *notationName)
299
+ {
300
+ XMLParser* parser;
301
+ VALUE valary;
302
+
303
+ GET_PARSER(recv, parser);
304
+ valary = rb_ary_new3(4, (base ? TO_(rb_str_new2((char*)base)) : Qnil),
305
+ TO_(rb_str_new2((char*)systemId)),
306
+ (publicId ? TO_(rb_str_new2((char*)publicId)) : Qnil),
307
+ TO_(rb_str_new2((char*)notationName)));
308
+ rb_yield(rb_ary_new3(4, symUNPARSED_ENTITY_DECL,
309
+ TO_(rb_str_new2((char*)entityName)),
310
+ valary, recv));
311
+ if (parser->defaultCurrent) {
312
+ parser->defaultCurrent = 0;
313
+ XML_DefaultCurrent(parser->parser);
314
+ }
315
+ }
316
+
317
+ void
318
+ iterNotationDeclHandler(void *recv,
319
+ const XML_Char *notationName,
320
+ const XML_Char *base,
321
+ const XML_Char *systemId,
322
+ const XML_Char *publicId)
323
+ {
324
+ XMLParser* parser;
325
+ VALUE valary;
326
+
327
+ GET_PARSER(recv, parser);
328
+ valary = rb_ary_new3(3,
329
+ (base ? TO_(rb_str_new2((char*)base)) : Qnil),
330
+ (systemId ? TO_(rb_str_new2((char*)systemId)) : Qnil),
331
+ (publicId ? TO_(rb_str_new2((char*)publicId)) : Qnil));
332
+ rb_yield(rb_ary_new3(4, symNOTATION_DECL,
333
+ TO_(rb_str_new2((char*)notationName)),
334
+ valary, recv));
335
+ if (parser->defaultCurrent) {
336
+ parser->defaultCurrent = 0;
337
+ XML_DefaultCurrent(parser->parser);
338
+ }
339
+ }
340
+
341
+ int
342
+ iterExternalEntityRefHandler(XML_Parser xmlparser,
343
+ const XML_Char *context,
344
+ const XML_Char *base,
345
+ const XML_Char *systemId,
346
+ const XML_Char *publicId)
347
+ {
348
+ XMLParser* parser;
349
+ VALUE recv;
350
+ VALUE valary;
351
+ VALUE ret;
352
+
353
+ recv = (VALUE)XML_GetUserData(xmlparser);
354
+ GET_PARSER(recv, parser);
355
+ valary = rb_ary_new3(3,
356
+ (base ? TO_(rb_str_new2((char*)base)) : Qnil),
357
+ (systemId ? TO_(rb_str_new2((char*)systemId)) : Qnil),
358
+ (publicId ? TO_(rb_str_new2((char*)publicId)) : Qnil));
359
+ ret = rb_yield(rb_ary_new3(4, symEXTERNAL_ENTITY_REF,
360
+ (context ? TO_(rb_str_new2((char*)context)) : Qnil),
361
+ valary, recv));
362
+ if (parser->defaultCurrent) {
363
+ parser->defaultCurrent = 0;
364
+ XML_DefaultCurrent(parser->parser);
365
+ }
366
+ /* The error status in this iterator block should be returned
367
+ by the exception. */
368
+ return 1;
369
+ }
370
+
371
+ #ifdef NEW_EXPAT
372
+ static void
373
+ iterCommentHandler(void *recv,
374
+ const XML_Char *s)
375
+ {
376
+ XMLParser* parser;
377
+ GET_PARSER(recv, parser);
378
+ rb_yield(rb_ary_new3(4, symCOMMENT,
379
+ Qnil, TO_(rb_str_new2((char*)s)), recv));
380
+ if (parser->defaultCurrent) {
381
+ parser->defaultCurrent = 0;
382
+ XML_DefaultCurrent(parser->parser);
383
+ }
384
+ }
385
+
386
+ static void
387
+ iterStartCdataSectionHandler(void *recv)
388
+ {
389
+ XMLParser* parser;
390
+ GET_PARSER(recv, parser);
391
+ rb_yield(rb_ary_new3(4, symSTART_CDATA, Qnil, Qnil, recv));
392
+ if (parser->defaultCurrent) {
393
+ parser->defaultCurrent = 0;
394
+ XML_DefaultCurrent(parser->parser);
395
+ }
396
+ }
397
+
398
+ static void
399
+ iterEndCdataSectionHandler(void *recv)
400
+ {
401
+ XMLParser* parser;
402
+ GET_PARSER(recv, parser);
403
+ rb_yield(rb_ary_new3(4, symEND_CDATA, Qnil, Qnil, recv));
404
+ if (parser->defaultCurrent) {
405
+ parser->defaultCurrent = 0;
406
+ XML_DefaultCurrent(parser->parser);
407
+ }
408
+ }
409
+
410
+ static void
411
+ iterStartNamespaceDeclHandler(void *recv,
412
+ const XML_Char *prefix,
413
+ const XML_Char *uri)
414
+ {
415
+ XMLParser* parser;
416
+ GET_PARSER(recv, parser);
417
+ rb_yield(rb_ary_new3(4, symSTART_NAMESPACE_DECL,
418
+ (prefix ? TO_(rb_str_new2((char*)prefix)) : Qnil),
419
+ (uri ? TO_(rb_str_new2((char*)uri)) : Qnil), recv));
420
+ if (parser->defaultCurrent) {
421
+ parser->defaultCurrent = 0;
422
+ XML_DefaultCurrent(parser->parser);
423
+ }
424
+ }
425
+
426
+ static void
427
+ iterEndNamespaceDeclHandler(void *recv,
428
+ const XML_Char *prefix)
429
+ {
430
+ XMLParser* parser;
431
+ GET_PARSER(recv, parser);
432
+ rb_yield(rb_ary_new3(4, symEND_NAMESPACE_DECL,
433
+ (prefix ? TO_(rb_str_new2((char*)prefix)) : Qnil),
434
+ Qnil, recv));
435
+ if (parser->defaultCurrent) {
436
+ parser->defaultCurrent = 0;
437
+ XML_DefaultCurrent(parser->parser);
438
+ }
439
+ }
440
+ #endif
441
+
442
+ #ifdef HAVE_XML_SETPARAMENTITYPARSING
443
+ static void
444
+ #ifdef HAVE_EXPAT_H
445
+ iterStartDoctypeDeclHandler(void *recv,
446
+ const XML_Char *doctypeName,
447
+ const XML_Char *sysid,
448
+ const XML_Char *pubid,
449
+ int has_internal_subset)
450
+ #else
451
+ iterStartDoctypeDeclHandler(void *recv,
452
+ const XML_Char *doctypeName)
453
+ #endif
454
+ {
455
+ XMLParser* parser;
456
+ VALUE valary = Qnil;
457
+
458
+ GET_PARSER(recv, parser);
459
+ #ifdef HAVE_EXPAT_H
460
+ valary = rb_ary_new3(3,
461
+ (sysid ? TO_(rb_str_new2((char*)sysid)) : Qnil),
462
+ (pubid ? TO_(rb_str_new2((char*)pubid)) : Qnil),
463
+ (has_internal_subset ? Qtrue : Qfalse));
464
+ #endif
465
+ rb_yield(rb_ary_new3(4, symSTART_DOCTYPE_DECL,
466
+ TO_(rb_str_new2((char*)doctypeName)),
467
+ valary, recv));
468
+ if (parser->defaultCurrent) {
469
+ parser->defaultCurrent = 0;
470
+ XML_DefaultCurrent(parser->parser);
471
+ }
472
+ }
473
+
474
+ static void
475
+ iterEndDoctypeDeclHandler(void *recv)
476
+ {
477
+ XMLParser* parser;
478
+ GET_PARSER(recv, parser);
479
+ rb_yield(rb_ary_new3(4, symEND_DOCTYPE_DECL,
480
+ Qnil,
481
+ Qnil, recv));
482
+ if (parser->defaultCurrent) {
483
+ parser->defaultCurrent = 0;
484
+ XML_DefaultCurrent(parser->parser);
485
+ }
486
+ }
487
+ #endif
488
+
489
+
490
+ #ifdef HAVE_EXPAT_H
491
+
492
+ static VALUE
493
+ makeContentArray(XMLParser* parser, XML_Content* model)
494
+ {
495
+ static const char* content_type_name[] = {
496
+ NULL, "EMPTY", "ANY", "MIXED", "NAME", "CHOICE", "SEQ"
497
+ };
498
+ static const char* content_quant_name[] = {
499
+ "", "?", "*", "+"
500
+ };
501
+ int i;
502
+ VALUE children = Qnil;
503
+ const char* type_name = content_type_name[model->type];
504
+ const char* quant_name = content_quant_name[model->quant];
505
+ VALUE ret = rb_ary_new3(3,
506
+ TO_(rb_str_new2((char*)type_name)),
507
+ TO_(rb_str_new2((char*)quant_name)),
508
+ (model->name ? TO_(rb_str_new2((char*)model->name)) :
509
+ Qnil));
510
+ if (model->numchildren > 0) {
511
+ children = rb_ary_new();
512
+ for (i =0; i < model->numchildren; i++) {
513
+ VALUE child = makeContentArray(parser, model->children + i);
514
+ rb_ary_push(children, child);
515
+ }
516
+ }
517
+ rb_ary_push(ret, children);
518
+ return ret;
519
+ }
520
+
521
+
522
+
523
+ static void
524
+ iterElementDeclHandler(void *recv,
525
+ const XML_Char *name,
526
+ XML_Content *model)
527
+ {
528
+ XMLParser* parser;
529
+ GET_PARSER(recv, parser);
530
+ VALUE content = makeContentArray(parser, model);
531
+ rb_yield(rb_ary_new3(4, symELEMENT_DECL,
532
+ TO_(rb_str_new2(name)),
533
+ content, recv));
534
+ if (parser->defaultCurrent) {
535
+ parser->defaultCurrent = 0;
536
+ XML_DefaultCurrent(parser->parser);
537
+ }
538
+ }
539
+
540
+ static void
541
+ iterAttlistDeclHandler (void *recv,
542
+ const XML_Char *elname,
543
+ const XML_Char *attname,
544
+ const XML_Char *att_type,
545
+ const XML_Char *dflt,
546
+ int isrequired)
547
+ {
548
+ XMLParser* parser;
549
+ VALUE valary;
550
+
551
+ GET_PARSER(recv, parser);
552
+ valary = rb_ary_new3(4,
553
+ TO_(rb_str_new2((char*)attname)),
554
+ TO_(rb_str_new2((char*)att_type)),
555
+ (dflt ? TO_(rb_str_new2((char*)dflt)) : Qnil),
556
+ (isrequired ? Qtrue : Qfalse));
557
+ rb_yield(rb_ary_new3(4, symATTLIST_DECL,
558
+ TO_(rb_str_new2(elname)),
559
+ valary, recv));
560
+ if (parser->defaultCurrent) {
561
+ parser->defaultCurrent = 0;
562
+ XML_DefaultCurrent(parser->parser);
563
+ }
564
+ }
565
+
566
+ static void
567
+ iterXmlDeclHandler (void *recv,
568
+ const XML_Char *version,
569
+ const XML_Char *encoding,
570
+ int standalone)
571
+ {
572
+ XMLParser* parser;
573
+ VALUE valary;
574
+
575
+ GET_PARSER(recv, parser);
576
+ valary = rb_ary_new3(3,
577
+ (version ? TO_(rb_str_new2(version)) : Qnil),
578
+ (encoding ? TO_(rb_str_new2((char*)encoding)) : Qnil),
579
+ INT2FIX(standalone));
580
+ rb_yield(rb_ary_new3(4, symXML_DECL,
581
+ Qnil,
582
+ valary, recv));
583
+ if (parser->defaultCurrent) {
584
+ parser->defaultCurrent = 0;
585
+ XML_DefaultCurrent(parser->parser);
586
+ }
587
+ }
588
+
589
+ static void
590
+ iterEntityDeclHandler (void *recv,
591
+ const XML_Char *entityName,
592
+ int is_parameter_entity,
593
+ const XML_Char *value,
594
+ int value_length,
595
+ const XML_Char *base,
596
+ const XML_Char *systemId,
597
+ const XML_Char *publicId,
598
+ const XML_Char *notationName)
599
+ {
600
+ XMLParser* parser;
601
+ VALUE valary;
602
+
603
+ GET_PARSER(recv, parser);
604
+ valary = rb_ary_new3(6,
605
+ (is_parameter_entity ? Qtrue : Qfalse),
606
+ TO_(rb_str_new((char*)value, value_length)),
607
+ (base ? TO_(rb_str_new2((char*)base)) : Qnil),
608
+ (systemId ? TO_(rb_str_new2((char*)systemId)) : Qnil),
609
+ (publicId ? TO_(rb_str_new2((char*)publicId)) : Qnil),
610
+ (notationName ? TO_(rb_str_new2((char*)notationName))
611
+ : Qnil));
612
+ rb_yield(rb_ary_new3(4, symENTITY_DECL,
613
+ TO_(rb_str_new2(entityName)),
614
+ valary, recv));
615
+ if (parser->defaultCurrent) {
616
+ parser->defaultCurrent = 0;
617
+ XML_DefaultCurrent(parser->parser);
618
+ }
619
+ }
620
+
621
+ #endif
622
+
623
+ #if 0
624
+ static void
625
+ iterExternalParsedEntityDeclHandler(void *recv,
626
+ const XML_Char *entityName,
627
+ const XML_Char *base,
628
+ const XML_Char *systemId,
629
+ const XML_Char *publicId)
630
+ {
631
+ XMLParser* parser;
632
+ VALUE valary;
633
+
634
+ GET_PARSER(recv, parser);
635
+ valary = rb_ary_new3(3, (base ? TO_(rb_str_new2((char*)base)) : Qnil),
636
+ TO_(rb_str_new2((char*)systemId)),
637
+ (publicId ? TO_(rb_str_new2((char*)publicId)) : Qnil));
638
+ rb_yield(rb_ary_new3(4, symEXTERNAL_PARSED_ENTITY_DECL,
639
+ TO_(rb_str_new2((char*)entityName)),
640
+ valary, recv));
641
+ if (parser->defaultCurrent) {
642
+ parser->defaultCurrent = 0;
643
+ XML_DefaultCurrent(parser->parser);
644
+ }
645
+ }
646
+
647
+ static void
648
+ iterInternalParsedEntityDeclHandler(void *recv,
649
+ const XML_Char *entityName,
650
+ const XML_Char *replacementText,
651
+ int replacementTextLength)
652
+ {
653
+ XMLParser* parser;
654
+ GET_PARSER(recv, parser);
655
+ rb_yield(rb_ary_new3(4, symINTERNAL_PARSED_ENTITY_DECL,
656
+ TO_(rb_str_new2((char*)entityName)),
657
+ TO_(rb_str_new((char*)replacementText,
658
+ replacementTextLength)), recv));
659
+ if (parser->defaultCurrent) {
660
+ parser->defaultCurrent = 0;
661
+ XML_DefaultCurrent(parser->parser);
662
+ }
663
+ }
664
+ #endif
665
+
666
+ #ifdef HAVE_XML_SETSKIPPEDENTITYHANDLER
667
+ static void
668
+ iterSkippedEntityHandler(void *recv,
669
+ const XML_Char *entityName,
670
+ int is_parameter_entity)
671
+ {
672
+ XMLParser* parser;
673
+ GET_PARSER(recv, parser);
674
+ rb_yield(rb_ary_new3(4, symSKIPPED_ENTITY,
675
+ TO_(rb_str_new2((char*)entityName)),
676
+ INT2FIX(is_parameter_entity), recv));
677
+ if (parser->defaultCurrent) {
678
+ parser->defaultCurrent = 0;
679
+ XML_DefaultCurrent(parser->parser);
680
+ }
681
+ }
682
+ #endif
683
+
684
+
685
+
686
+ /* Event handlers for instance method */
687
+ static void
688
+ myStartElementHandler(void *recv,
689
+ const XML_Char *name, const XML_Char **atts)
690
+ {
691
+ XMLParser* parser;
692
+ VALUE attrhash;
693
+
694
+ GET_PARSER(recv, parser);
695
+ #ifdef NEW_EXPAT
696
+ parser->lastAttrs = atts;
697
+ #endif
698
+ attrhash = rb_hash_new();
699
+ while (*atts) {
700
+ const char* key = *atts++;
701
+ const char* val = *atts++;
702
+ rb_hash_aset(attrhash,
703
+ FO_(TO_(rb_str_new2((char*)key))),
704
+ TO_(rb_str_new2((char*)val)));
705
+ }
706
+ rb_funcall((VALUE)recv, id_startElementHandler, 2,
707
+ TO_(rb_str_new2((char*)name)), attrhash);
708
+ }
709
+
710
+ static void
711
+ myEndElementHandler(void *recv,
712
+ const XML_Char *name)
713
+ {
714
+ XMLParser* parser;
715
+ GET_PARSER(recv, parser);
716
+ rb_funcall((VALUE)recv, id_endElementHandler, 1,
717
+ TO_(rb_str_new2((char*)name)));
718
+ }
719
+
720
+ static void
721
+ myCharacterDataHandler(void *recv,
722
+ const XML_Char *s,
723
+ int len)
724
+ {
725
+ XMLParser* parser;
726
+ GET_PARSER(recv, parser);
727
+ rb_funcall((VALUE)recv, id_characterDataHandler, 1,
728
+ TO_(rb_str_new((char*)s, len)));
729
+ }
730
+
731
+ static void
732
+ myProcessingInstructionHandler(void *recv,
733
+ const XML_Char *target,
734
+ const XML_Char *data)
735
+ {
736
+ XMLParser* parser;
737
+ GET_PARSER(recv, parser);
738
+ rb_funcall((VALUE)recv, id_processingInstructionHandler, 2,
739
+ TO_(rb_str_new2((char*)target)),
740
+ TO_(rb_str_new2((char*)data)));
741
+ }
742
+
743
+ static void
744
+ myDefaultHandler(void *recv,
745
+ const XML_Char *s,
746
+ int len)
747
+ {
748
+ XMLParser* parser;
749
+ GET_PARSER(recv, parser);
750
+ rb_funcall((VALUE)recv, id_defaultHandler, 1,
751
+ TO_(rb_str_new((char*)s, len)));
752
+ }
753
+
754
+ #ifdef NEW_EXPAT
755
+ static void
756
+ myDefaultExpandHandler(void *recv,
757
+ const XML_Char *s,
758
+ int len)
759
+ {
760
+ XMLParser* parser;
761
+ GET_PARSER(recv, parser);
762
+ rb_funcall((VALUE)recv, id_defaultExpandHandler, 1,
763
+ TO_(rb_str_new((char*)s, len)));
764
+ }
765
+ #endif
766
+
767
+ void
768
+ myUnparsedEntityDeclHandler(void *recv,
769
+ const XML_Char *entityName,
770
+ const XML_Char *base,
771
+ const XML_Char *systemId,
772
+ const XML_Char *publicId,
773
+ const XML_Char *notationName)
774
+ {
775
+ XMLParser* parser;
776
+ GET_PARSER(recv, parser);
777
+ rb_funcall((VALUE)recv, id_unparsedEntityDeclHandler, 5,
778
+ TO_(rb_str_new2((char*)entityName)),
779
+ (base ? TO_(rb_str_new2((char*)base)) : Qnil),
780
+ TO_(rb_str_new2((char*)systemId)),
781
+ (publicId ? TO_(rb_str_new2((char*)publicId)) : Qnil),
782
+ TO_(rb_str_new2((char*)notationName)));
783
+ }
784
+
785
+ void
786
+ myNotationDeclHandler(void *recv,
787
+ const XML_Char *notationName,
788
+ const XML_Char *base,
789
+ const XML_Char *systemId,
790
+ const XML_Char *publicId)
791
+ {
792
+ XMLParser* parser;
793
+ GET_PARSER(recv, parser);
794
+ rb_funcall((VALUE)recv, id_notationDeclHandler, 4,
795
+ TO_(rb_str_new2((char*)notationName)),
796
+ (base ? TO_(rb_str_new2((char*)base)) : Qnil),
797
+ (systemId ? TO_(rb_str_new2((char*)systemId)) : Qnil),
798
+ (publicId ? TO_(rb_str_new2((char*)publicId)) : Qnil));
799
+ }
800
+
801
+ int
802
+ myExternalEntityRefHandler(XML_Parser xmlparser,
803
+ const XML_Char *context,
804
+ const XML_Char *base,
805
+ const XML_Char *systemId,
806
+ const XML_Char *publicId)
807
+ {
808
+ XMLParser* parser;
809
+ VALUE recv;
810
+ VALUE ret;
811
+
812
+ recv = (VALUE)XML_GetUserData(xmlparser);
813
+ GET_PARSER(recv, parser);
814
+ ret = rb_funcall(recv, id_externalEntityRefHandler, 4,
815
+ (context ? TO_(rb_str_new2((char*)context)): Qnil),
816
+ (base ? TO_(rb_str_new2((char*)base)) : Qnil),
817
+ (systemId ? TO_(rb_str_new2((char*)systemId)) : Qnil),
818
+ (publicId ? TO_(rb_str_new2((char*)publicId)) : Qnil));
819
+ /* The error status in this handler should be returned
820
+ by the exception. */
821
+ return Qnil;
822
+ }
823
+
824
+ #ifdef NEW_EXPAT
825
+ static void
826
+ myCommentHandler(void *recv,
827
+ const XML_Char *s)
828
+ {
829
+ XMLParser* parser;
830
+ GET_PARSER(recv, parser);
831
+ rb_funcall((VALUE)recv, id_commentHandler, 1,
832
+ TO_(rb_str_new2((char*)s)));
833
+ }
834
+
835
+ static void
836
+ myStartCdataSectionHandler(void *recv)
837
+ {
838
+ XMLParser* parser;
839
+ GET_PARSER(recv, parser);
840
+ rb_funcall((VALUE)recv, id_startCdataSectionHandler, 0);
841
+ }
842
+
843
+ static void
844
+ myEndCdataSectionHandler(void *recv)
845
+ {
846
+ XMLParser* parser;
847
+ GET_PARSER(recv, parser);
848
+ rb_funcall((VALUE)recv, id_endCdataSectionHandler, 0);
849
+ }
850
+
851
+ static void
852
+ myStartNamespaceDeclHandler(void *recv,
853
+ const XML_Char *prefix,
854
+ const XML_Char *uri)
855
+ {
856
+ XMLParser* parser;
857
+ GET_PARSER(recv, parser);
858
+ rb_funcall((VALUE)recv, id_startNamespaceDeclHandler, 2,
859
+ (prefix ? TO_(rb_str_new2((char*)prefix)) : Qnil),
860
+ (uri ? TO_(rb_str_new2((char*)uri)) : Qnil));
861
+ }
862
+
863
+ static void
864
+ myEndNamespaceDeclHandler(void *recv,
865
+ const XML_Char *prefix)
866
+ {
867
+ XMLParser* parser;
868
+ GET_PARSER(recv, parser);
869
+ rb_funcall((VALUE)recv, id_endNamespaceDeclHandler, 1,
870
+ (prefix ? TO_(rb_str_new2((char*)prefix)) : Qnil));
871
+ }
872
+
873
+ static int
874
+ myNotStandaloneHandler(void *recv)
875
+ {
876
+ XMLParser* parser;
877
+ VALUE v;
878
+
879
+ GET_PARSER(recv, parser);
880
+ v = rb_funcall((VALUE)recv, id_notStandaloneHandler, 0);
881
+ Check_Type(v, T_FIXNUM);
882
+ return FIX2INT(v);
883
+ }
884
+ #endif
885
+
886
+ #ifdef HAVE_XML_SETPARAMENTITYPARSING
887
+ static void
888
+ #ifdef HAVE_EXPAT_H
889
+ myStartDoctypeDeclHandler(void *recv,
890
+ const XML_Char *doctypeName,
891
+ const XML_Char *sysid,
892
+ const XML_Char *pubid,
893
+ int has_internal_subset)
894
+ #else
895
+ myStartDoctypeDeclHandler(void *recv,
896
+ const XML_Char *doctypeName)
897
+ #endif
898
+ {
899
+ XMLParser* parser;
900
+ GET_PARSER(recv, parser);
901
+ #ifdef HAVE_EXPAT_H
902
+ rb_funcall((VALUE)recv, id_startDoctypeDeclHandler, 4,
903
+ TO_(rb_str_new2((char*)doctypeName)),
904
+ (sysid ? TO_(rb_str_new2((char*)sysid)) : Qnil),
905
+ (pubid ? TO_(rb_str_new2((char*)pubid)) : Qnil),
906
+ (has_internal_subset ? Qtrue : Qfalse));
907
+ #else
908
+ rb_funcall((VALUE)recv, id_startDoctypeDeclHandler, 4,
909
+ TO_(rb_str_new2((char*)doctypeName)),
910
+ Qnil, Qnil, Qfalse);
911
+ #endif
912
+ }
913
+
914
+ static void
915
+ myEndDoctypeDeclHandler(void *recv)
916
+ {
917
+ XMLParser* parser;
918
+ GET_PARSER(recv, parser);
919
+ rb_funcall((VALUE)recv, id_endDoctypeDeclHandler, 0);
920
+ }
921
+ #endif
922
+
923
+
924
+ #ifdef HAVE_EXPAT_H
925
+
926
+ static void
927
+ myElementDeclHandler(void *recv,
928
+ const XML_Char *name,
929
+ XML_Content *model)
930
+ {
931
+ XMLParser* parser;
932
+ GET_PARSER(recv, parser);
933
+ VALUE content = makeContentArray(parser, model);
934
+ rb_funcall((VALUE)recv, id_elementDeclHandler, 2,
935
+ TO_(rb_str_new2(name)), content);
936
+ }
937
+
938
+ static void
939
+ myAttlistDeclHandler (void *recv,
940
+ const XML_Char *elname,
941
+ const XML_Char *attname,
942
+ const XML_Char *att_type,
943
+ const XML_Char *dflt,
944
+ int isrequired)
945
+ {
946
+ XMLParser* parser;
947
+ GET_PARSER(recv, parser);
948
+ rb_funcall((VALUE)recv, id_attlistDeclHandler, 5,
949
+ TO_(rb_str_new2(elname)),
950
+ TO_(rb_str_new2((char*)attname)),
951
+ TO_(rb_str_new2((char*)att_type)),
952
+ (dflt ? TO_(rb_str_new2((char*)dflt)) : Qnil),
953
+ (isrequired ? Qtrue : Qfalse));
954
+ }
955
+
956
+ static void
957
+ myXmlDeclHandler (void *recv,
958
+ const XML_Char *version,
959
+ const XML_Char *encoding,
960
+ int standalone)
961
+ {
962
+ XMLParser* parser;
963
+ GET_PARSER(recv, parser);
964
+ rb_funcall((VALUE)recv, id_xmlDeclHandler, 3,
965
+ (version ? TO_(rb_str_new2(version)) : Qnil),
966
+ (encoding ? TO_(rb_str_new2((char*)encoding)) : Qnil),
967
+ INT2FIX(standalone));
968
+ }
969
+
970
+ static void
971
+ myEntityDeclHandler (void *recv,
972
+ const XML_Char *entityName,
973
+ int is_parameter_entity,
974
+ const XML_Char *value,
975
+ int value_length,
976
+ const XML_Char *base,
977
+ const XML_Char *systemId,
978
+ const XML_Char *publicId,
979
+ const XML_Char *notationName)
980
+ {
981
+ XMLParser* parser;
982
+ GET_PARSER(recv, parser);
983
+ rb_funcall((VALUE)recv, id_entityDeclHandler, 7,
984
+ TO_(rb_str_new2(entityName)),
985
+ (is_parameter_entity ? Qtrue : Qfalse),
986
+ TO_(rb_str_new((char*)value, value_length)),
987
+ (base ? TO_(rb_str_new2((char*)base)) : Qnil),
988
+ (systemId ? TO_(rb_str_new2((char*)systemId)) : Qnil),
989
+ (publicId ? TO_(rb_str_new2((char*)publicId)) : Qnil),
990
+ (notationName ? TO_(rb_str_new2((char*)notationName))
991
+ : Qnil));
992
+ }
993
+
994
+ #endif
995
+
996
+ #if 0
997
+ static void
998
+ myExternalParsedEntityDeclHandler(void *recv,
999
+ const XML_Char *entityName,
1000
+ const XML_Char *base,
1001
+ const XML_Char *systemId,
1002
+ const XML_Char *publicId)
1003
+ {
1004
+ XMLParser* parser;
1005
+ GET_PARSER(recv, parser);
1006
+ rb_funcall((VALUE)recv, id_externalParsedEntityDeclHandler, 4,
1007
+ TO_(rb_str_new2((char*)entityName)),
1008
+ (base ? TO_(rb_str_new2((char*)base)) : Qnil),
1009
+ TO_(rb_str_new2((char*)systemId)),
1010
+ (publicId ? TO_(rb_str_new2((char*)publicId)) : Qnil));
1011
+ }
1012
+
1013
+ static void
1014
+ myInternalParsedEntityDeclHandler(void *recv,
1015
+ const XML_Char *entityName,
1016
+ const XML_Char *replacementText,
1017
+ int replacementTextLength)
1018
+ {
1019
+ XMLParser* parser;
1020
+ GET_PARSER(recv, parser);
1021
+ rb_funcall((VALUE)recv, id_internalParsedEntityDeclHandler, 2,
1022
+ TO_(rb_str_new2((char*)entityName)),
1023
+ TO_(rb_str_new((char*)replacementText,
1024
+ replacementTextLength)));
1025
+ }
1026
+ #endif
1027
+
1028
+
1029
+ static VALUE
1030
+ XMLEncoding_map(VALUE obj, VALUE i)
1031
+ {
1032
+ return i;
1033
+ }
1034
+
1035
+ static VALUE
1036
+ XMLEncoding_convert(VALUE obj, VALUE str)
1037
+ {
1038
+ return INT2FIX('?');
1039
+ }
1040
+
1041
+ static int
1042
+ myEncodingConv(void *data, const char *s)
1043
+ {
1044
+ VALUE v;
1045
+ int len;
1046
+ int slen = RSTRING(rb_ivar_get((VALUE)data,
1047
+ id_map))->ptr[*(unsigned char*)s];
1048
+
1049
+ v = rb_funcall((VALUE)data, id_convert, 1, rb_str_new((char*)s, -slen));
1050
+ switch (TYPE(v)) {
1051
+ case T_FIXNUM:
1052
+ return FIX2INT(v);
1053
+ case T_STRING:
1054
+ len = RSTRING(v)->len;
1055
+ if (len == 1) {
1056
+ return (unsigned char)*(RSTRING(v)->ptr);
1057
+ }
1058
+ else if (len >= 2) {
1059
+ return (unsigned char)*(RSTRING(v)->ptr) |
1060
+ (unsigned char)*(RSTRING(v)->ptr + 1) << 8;
1061
+ }
1062
+ }
1063
+ return 0;
1064
+ }
1065
+
1066
+ #if 0
1067
+ static int
1068
+ iterUnknownEncodingHandler(void *recv,
1069
+ const XML_Char *name,
1070
+ XML_Encoding *info)
1071
+ {
1072
+ XMLParser* parser;
1073
+ VALUE ret;
1074
+
1075
+ if (!rb_method_boundp(CLASS_OF((VALUE)recv), id_unknownEncoding, 0))
1076
+ return 0;
1077
+
1078
+ GET_PARSER(recv, parser);
1079
+ ret = rb_yield(rb_ary_new3(4, symUNKNOWN_ENCODING,
1080
+ TO_(rb_str_new2((char*)name)), Qnil, recv));
1081
+ if (TYPE(ret) == T_OBJECT && rb_obj_is_kind_of(ret, cXMLEncoding)) {
1082
+ int i;
1083
+ ID mid = rb_intern("map");
1084
+ VALUE cmap = rb_str_new(NULL, 256);
1085
+ rb_ivar_set(ret, id_map, cmap);
1086
+
1087
+ for (i = 0; i < 256; i++) {
1088
+ VALUE m = rb_funcall(ret, mid, 1, INT2FIX(i));
1089
+ RSTRING(cmap)->ptr[i] = info->map[i] = FIX2INT(m);
1090
+ }
1091
+ /* protect object form GC */
1092
+ rb_ivar_set(recv, rb_intern("_encoding"), ret);
1093
+ info->data = (void*)ret;
1094
+ info->convert = myEncodingConv;
1095
+ return 1;
1096
+ }
1097
+
1098
+ return 0;
1099
+ }
1100
+ #endif
1101
+
1102
+ #ifdef XML_ENC_PATH
1103
+ /*
1104
+ * Encoding map functions come from XML::Parser Version 2.19
1105
+ *
1106
+ * Copyright 1998 Larry Wall and Clark Cooper
1107
+ * All rights reserved.
1108
+ *
1109
+ * This program is free software; you can redistribute it and/or
1110
+ * modify it under the same terms as Perl itself.
1111
+ */
1112
+ static Encinfo*
1113
+ getEncinfo(char* data, int size)
1114
+ {
1115
+ Encmap_Header* header = (Encmap_Header*)data;
1116
+ unsigned short prefixes_size;
1117
+ unsigned short bytemap_size;
1118
+ Encinfo* ret;
1119
+ int i;
1120
+ PrefixMap* prefixes;
1121
+ unsigned short *bytemap;
1122
+
1123
+ if (size < sizeof(Encmap_Header) || ntohl(header->magic) != ENCMAP_MAGIC)
1124
+ return NULL;
1125
+ prefixes_size = ntohs(header->pfsize);
1126
+ bytemap_size = ntohs(header->bmsize);
1127
+ if (size != (sizeof(Encmap_Header) +
1128
+ prefixes_size * sizeof(PrefixMap) +
1129
+ bytemap_size * sizeof(unsigned short)))
1130
+ return NULL;
1131
+ if ((ret = (Encinfo*)malloc(sizeof(Encinfo))) == NULL) {
1132
+ return NULL;
1133
+ }
1134
+ ret->prefixes_size = prefixes_size;
1135
+ ret->bytemap_size = bytemap_size;
1136
+ for (i = 0; i < 256; i++)
1137
+ ret->firstmap[i] = ntohl(header->map[i]);
1138
+ prefixes = (PrefixMap*)(data + sizeof(Encmap_Header));
1139
+ bytemap = (unsigned short*)(data + sizeof(Encmap_Header)
1140
+ + sizeof(PrefixMap)*prefixes_size);
1141
+ if ((ret->prefixes =
1142
+ (PrefixMap*)malloc(sizeof(PrefixMap)*prefixes_size)) == NULL) {
1143
+ free(ret);
1144
+ return NULL;
1145
+ }
1146
+ if ((ret->bytemap =
1147
+ (unsigned short*)malloc(sizeof(unsigned short)*bytemap_size)) == NULL) {
1148
+ free(ret->prefixes);
1149
+ free(ret);
1150
+ return NULL;
1151
+ }
1152
+ for (i = 0; i < prefixes_size; i++, prefixes++) {
1153
+ ret->prefixes[i].min = prefixes->min;
1154
+ ret->prefixes[i].len = prefixes->len;
1155
+ ret->prefixes[i].bmap_start = ntohs(prefixes->bmap_start);
1156
+ memcpy(ret->prefixes[i].ispfx, prefixes->ispfx,
1157
+ sizeof(prefixes->ispfx) + sizeof(prefixes->ischar));
1158
+ }
1159
+ for (i = 0; i < bytemap_size; i++)
1160
+ ret->bytemap[i] = ntohs(bytemap[i]);
1161
+
1162
+ return ret;
1163
+ }
1164
+
1165
+ static int
1166
+ convertEncoding(Encinfo* enc, const char* seq)
1167
+ {
1168
+ PrefixMap* curpfx;
1169
+ int count;
1170
+ int index = 0;
1171
+
1172
+ for (count = 0; count < 4; count++) {
1173
+ unsigned char byte = (unsigned char)seq[count];
1174
+ unsigned char bndx;
1175
+ unsigned char bmsk;
1176
+ int offset;
1177
+
1178
+ curpfx = &enc->prefixes[index];
1179
+ offset = ((int)byte) - curpfx->min;
1180
+ if (offset < 0)
1181
+ break;
1182
+ if (offset >= curpfx->len && curpfx->len != 0)
1183
+ break;
1184
+
1185
+ bndx = byte >> 3;
1186
+ bmsk = 1 << (byte & 0x7);
1187
+
1188
+ if (curpfx->ispfx[bndx] & bmsk) {
1189
+ index = enc->bytemap[curpfx->bmap_start + offset];
1190
+ }
1191
+ else if (curpfx->ischar[bndx] & bmsk) {
1192
+ return enc->bytemap[curpfx->bmap_start + offset];
1193
+ }
1194
+ else
1195
+ break;
1196
+ }
1197
+
1198
+ return -1;
1199
+ }
1200
+
1201
+ static void
1202
+ releaseEncoding(Encinfo* enc)
1203
+ {
1204
+ if (enc) {
1205
+ if (enc->prefixes)
1206
+ free(enc->prefixes);
1207
+ if (enc->bytemap)
1208
+ free(enc->bytemap);
1209
+ free(enc);
1210
+ }
1211
+ }
1212
+
1213
+ static Encinfo*
1214
+ findEncoding(const char* encname)
1215
+ {
1216
+ FILE* fp;
1217
+ Encinfo* enc;
1218
+ struct stat st;
1219
+ int size;
1220
+ int len;
1221
+ char file[PATH_MAX] = "\0";
1222
+ const char* p;
1223
+ char* buf;
1224
+ #ifdef DOSISH
1225
+ const char sepchar = '\\';
1226
+ #else
1227
+ const char sepchar = '/';
1228
+ #endif
1229
+ const char* const encext = ".enc";
1230
+
1231
+ rb_secure(2);
1232
+ /* make map file path */
1233
+ if (XML_ENC_PATH != NULL) {
1234
+ strncpy(file, XML_ENC_PATH, PATH_MAX - 1);
1235
+ file[PATH_MAX - 1] = '\0';
1236
+ }
1237
+ len = strlen(file);
1238
+ if (len > 0 && len < PATH_MAX - 1 && file[len - 1] != sepchar)
1239
+ file[len++] = sepchar;
1240
+ for (p = encname; *p && len < PATH_MAX - 1; p++, len++) {
1241
+ file[len] = tolower(*p);
1242
+ }
1243
+ file[len] = '\0';
1244
+ // if (len < PATH_MAX - sizeof(encext))
1245
+ // strcat(file, encext);
1246
+ strncat(file, encext, PATH_MAX - len -1);
1247
+
1248
+ if ((fp = fopen(file, "rb")) == NULL) {
1249
+ return NULL;
1250
+ }
1251
+
1252
+ /* get file length */
1253
+ fstat(fileno(fp), &st);
1254
+ size = st.st_size;
1255
+
1256
+ if ((buf = (char*)malloc(size)) == NULL) {
1257
+ fclose(fp);
1258
+ return NULL;
1259
+ }
1260
+
1261
+ fread(buf, 1, size, fp);
1262
+ fclose(fp);
1263
+ enc = getEncinfo(buf, size);
1264
+ free(buf);
1265
+ return enc;
1266
+ }
1267
+
1268
+ #endif
1269
+
1270
+ static int
1271
+ myUnknownEncodingHandler(void *recv,
1272
+ const XML_Char *name,
1273
+ XML_Encoding *info)
1274
+ {
1275
+ XMLParser* parser;
1276
+ VALUE ret;
1277
+ if (!rb_method_boundp(CLASS_OF((VALUE)recv), id_unknownEncoding, 0))
1278
+ #ifndef XML_ENC_PATH
1279
+ return 0;
1280
+ #else
1281
+ {
1282
+ Encinfo* enc;
1283
+
1284
+ if ((enc = findEncoding(name)) != NULL) {
1285
+ memcpy(info->map, enc->firstmap, sizeof(int)*256);
1286
+ info->data = enc;
1287
+ info->convert = (int(*)(void*,const char*))convertEncoding;
1288
+ info->release = (void(*)(void*))releaseEncoding;
1289
+ return 1;
1290
+ }
1291
+ else
1292
+ return 0;
1293
+ }
1294
+ #endif
1295
+
1296
+ GET_PARSER(recv, parser);
1297
+ ret = rb_funcall((VALUE)recv, id_unknownEncoding, 1,
1298
+ TO_(rb_str_new2((char*)name)));
1299
+ if (TYPE(ret) == T_OBJECT && rb_obj_is_kind_of(ret, cXMLEncoding)) {
1300
+ int i;
1301
+ ID mid = rb_intern("map");
1302
+ VALUE cmap = rb_str_new(NULL, 256);
1303
+ rb_ivar_set(ret, id_map, cmap);
1304
+
1305
+ if (OBJ_TAINTED(ret))
1306
+ taintParser(parser);
1307
+ TO_(cmap);
1308
+
1309
+ for (i = 0; i < 256; i++) {
1310
+ VALUE m = rb_funcall(ret, mid, 1, INT2FIX(i));
1311
+ RSTRING(cmap)->ptr[i] = info->map[i] = FIX2INT(m);
1312
+ }
1313
+ /* protect object form GC */
1314
+ rb_ivar_set((VALUE)recv, rb_intern("_encoding"), ret);
1315
+ info->data = (void*)ret;
1316
+ info->convert = myEncodingConv;
1317
+
1318
+ return 1;
1319
+ }
1320
+
1321
+ return 0;
1322
+ }
1323
+
1324
+ #ifdef HAVE_XML_SETSKIPPEDENTITYHANDLER
1325
+ static void
1326
+ mySkippedEntityHandler(void *recv,
1327
+ const XML_Char *entityName,
1328
+ int is_parameter_entity)
1329
+ {
1330
+ XMLParser* parser;
1331
+ GET_PARSER(recv, parser);
1332
+ rb_funcall((VALUE)recv, id_skippedEntityHandler, 2,
1333
+ TO_(rb_str_new2((char*)entityName)),
1334
+ INT2FIX(is_parameter_entity));
1335
+ }
1336
+ #endif
1337
+
1338
+
1339
+ /* constructor */
1340
+ static VALUE
1341
+ XMLParser_new(int argc, VALUE* argv, VALUE klass)
1342
+ {
1343
+ XMLParser* parser;
1344
+ VALUE obj;
1345
+ VALUE arg1;
1346
+ VALUE arg2;
1347
+ VALUE arg3;
1348
+ int count;
1349
+ char* encoding = NULL;
1350
+ #ifdef NEW_EXPAT
1351
+ char* nssep = NULL;
1352
+ #endif
1353
+ char* context = NULL;
1354
+ XMLParser* rootparser = NULL;
1355
+ VALUE parent = Qnil;
1356
+
1357
+ count = rb_scan_args(argc, argv, "03", &arg1, &arg2, &arg3);
1358
+ if (count == 1) {
1359
+ /* new(encoding) */
1360
+ if (TYPE(arg1) != T_NIL) {
1361
+ Check_Type(arg1, T_STRING); /* encoding */
1362
+ encoding = RSTRING(arg1)->ptr;
1363
+ }
1364
+ }
1365
+ else if (count == 2) {
1366
+ /* new(encoding, nschar) */
1367
+ /* new(parser, context) */
1368
+ #ifdef NEW_EXPAT
1369
+ if (TYPE(arg1) != T_DATA) {
1370
+ if (TYPE(arg1) != T_NIL) {
1371
+ Check_Type(arg1, T_STRING); /* encoding */
1372
+ encoding = RSTRING(arg1)->ptr;
1373
+ }
1374
+ Check_Type(arg2, T_STRING); /* nschar */
1375
+ nssep = RSTRING(arg2)->ptr;
1376
+ }
1377
+ else {
1378
+ #endif
1379
+ Check_Type(arg1, T_DATA); /* parser */
1380
+ GET_PARSER(arg1, rootparser);
1381
+ if (!NIL_P(arg2)) {
1382
+ Check_Type(arg2, T_STRING); /* context */
1383
+ context = RSTRING(arg2)->ptr;
1384
+ }
1385
+ parent = arg1;
1386
+ #ifdef NEW_EXPAT
1387
+ }
1388
+ #endif
1389
+ }
1390
+ else if (count == 3) {
1391
+ /* new(parser, context, encoding) */
1392
+ Check_Type(arg1, T_DATA); /* parser */
1393
+ GET_PARSER(arg1, rootparser);
1394
+ if (!NIL_P(arg2)) {
1395
+ Check_Type(arg2, T_STRING); /* context */
1396
+ context = RSTRING(arg2)->ptr;
1397
+ }
1398
+ Check_Type(arg3, T_STRING); /* encoding */
1399
+ encoding = RSTRING(arg3)->ptr;
1400
+ parent = arg1;
1401
+ }
1402
+
1403
+ /* create object */
1404
+ obj = Data_Make_Struct(klass, XMLParser,
1405
+ XMLParser_mark, XMLParser_free, parser);
1406
+ /* create parser */
1407
+ if (rootparser == NULL) {
1408
+ #ifdef NEW_EXPAT
1409
+ if (nssep == NULL)
1410
+ parser->parser = XML_ParserCreate(encoding);
1411
+ else
1412
+ parser->parser = XML_ParserCreateNS(encoding, nssep[0]);
1413
+ #else
1414
+ parser->parser = XML_ParserCreate(encoding);
1415
+ #endif
1416
+ parser->tainted = 0;
1417
+ parser->context = NULL;
1418
+ }
1419
+ else {
1420
+ parser->parser = XML_ExternalEntityParserCreate(rootparser->parser,
1421
+ context, encoding);
1422
+ /* clear all inhrited handlers,
1423
+ because handlers should be set in "parse" method */
1424
+ XML_SetElementHandler(parser->parser, NULL, NULL);
1425
+ XML_SetCharacterDataHandler(parser->parser, NULL);
1426
+ XML_SetProcessingInstructionHandler(parser->parser, NULL);
1427
+ XML_SetDefaultHandler(parser->parser, NULL);
1428
+ XML_SetUnparsedEntityDeclHandler(parser->parser, NULL);
1429
+ XML_SetNotationDeclHandler(parser->parser, NULL);
1430
+ XML_SetExternalEntityRefHandler(parser->parser, NULL);
1431
+ #ifdef NEW_EXPAT
1432
+ XML_SetCommentHandler(parser->parser, NULL);
1433
+ XML_SetCdataSectionHandler(parser->parser, NULL, NULL);
1434
+ XML_SetNamespaceDeclHandler(parser->parser, NULL, NULL);
1435
+ XML_SetNotStandaloneHandler(parser->parser, NULL);
1436
+ #endif
1437
+ #ifdef HAVE_XML_SETDOCTYPEDECLHANDLER
1438
+ XML_SetDoctypeDeclHandler(parser->parser, NULL, NULL);
1439
+ #endif
1440
+ #ifdef HAVE_EXPAT_H
1441
+ XML_SetElementDeclHandler(parser->parser, NULL);
1442
+ XML_SetAttlistDeclHandler(parser->parser, NULL);
1443
+ XML_SetXmlDeclHandler(parser->parser, NULL);
1444
+ XML_SetEntityDeclHandler(parser->parser, NULL);
1445
+ #endif
1446
+ #if 0
1447
+ XML_SetExternalParsedEntityDeclHandler(parser->parser, NULL);
1448
+ XML_SetInternalParsedEntityDeclHandler(parser->parser, NULL);
1449
+ #endif
1450
+ #ifdef HAVE_XML_SETSKIPPEDENTITYHANDLER
1451
+ XML_SetSkippedEntityHandler(parser->parser, NULL);
1452
+ #endif
1453
+ if (rootparser->tainted)
1454
+ parser->tainted |= 1;
1455
+ parser->context = context;
1456
+ }
1457
+ if (!parser->parser)
1458
+ rb_raise(eXMLParserError, "cannot create parser");
1459
+
1460
+ /* setting up internal data */
1461
+ XML_SetUserData(parser->parser, (void*)obj);
1462
+ parser->iterator = 0;
1463
+ parser->defaultCurrent = 0;
1464
+ #ifdef NEW_EXPAT
1465
+ parser->lastAttrs = NULL;
1466
+ #endif
1467
+ parser->parent = parent;
1468
+
1469
+ rb_obj_call_init(obj, argc, argv);
1470
+
1471
+ return obj;
1472
+ }
1473
+
1474
+ static VALUE
1475
+ XMLParser_initialize(VALUE obj)
1476
+ {
1477
+ return Qnil;
1478
+ }
1479
+
1480
+ #ifdef HAVE_XML_PARSERRESET
1481
+ static VALUE
1482
+ XMLParser_reset(int argc, VALUE* argv, VALUE obj)
1483
+ {
1484
+ XMLParser* parser;
1485
+ VALUE vencoding = Qnil;
1486
+ char* encoding = NULL;
1487
+ int count;
1488
+
1489
+ count = rb_scan_args(argc, argv, "01", &vencoding);
1490
+
1491
+ GET_PARSER(obj, parser);
1492
+ if (count > 0 && TYPE(vencoding) != T_NIL) {
1493
+ Check_Type(vencoding, T_STRING);
1494
+ encoding = RSTRING(vencoding)->ptr;
1495
+ }
1496
+ XML_ParserReset(parser->parser, encoding);
1497
+ /* setting up internal data */
1498
+ XML_SetUserData(parser->parser, (void*)obj);
1499
+ parser->iterator = 0;
1500
+ parser->defaultCurrent = 0;
1501
+ #ifdef NEW_EXPAT
1502
+ parser->lastAttrs = NULL;
1503
+ #endif
1504
+ parser->tainted = 0;
1505
+
1506
+ return obj;
1507
+ }
1508
+ #endif
1509
+
1510
+ /* parse method */
1511
+ static VALUE
1512
+ XMLParser_parse(int argc, VALUE* argv, VALUE obj)
1513
+ {
1514
+ XMLParser* parser;
1515
+ int ret;
1516
+ XML_StartElementHandler start = NULL;
1517
+ XML_EndElementHandler end = NULL;
1518
+ #ifdef NEW_EXPAT
1519
+ XML_StartCdataSectionHandler startC = NULL;
1520
+ XML_EndCdataSectionHandler endC = NULL;
1521
+ XML_StartNamespaceDeclHandler startNS = NULL;
1522
+ XML_EndNamespaceDeclHandler endNS = NULL;
1523
+ #endif
1524
+ #ifdef HAVE_XML_SETDOCTYPEDECLHANDLER
1525
+ XML_StartDoctypeDeclHandler startDoctype = NULL;
1526
+ XML_EndDoctypeDeclHandler endDoctype = NULL;
1527
+ #endif
1528
+ VALUE str;
1529
+ VALUE isFinal;
1530
+ int final = 1;
1531
+ int count;
1532
+ int fromStream = 0;
1533
+ ID mid = rb_intern("gets");
1534
+ ID linebuf = rb_intern("_linebuf");
1535
+
1536
+ count = rb_scan_args(argc, argv, "02", &str, &isFinal);
1537
+ /* If "str" has public "gets" method, it will be considered *stream* */
1538
+ if (!rb_obj_is_kind_of(str, rb_cString) &&
1539
+ rb_method_boundp(CLASS_OF(str), mid, 1)) {
1540
+ fromStream = 1;
1541
+ }
1542
+ else if (!NIL_P(str)) {
1543
+ Check_Type(str, T_STRING);
1544
+ }
1545
+ if (count >= 2) {
1546
+ if (isFinal == Qtrue)
1547
+ final = 1;
1548
+ else if (isFinal == Qfalse)
1549
+ final = 0;
1550
+ else
1551
+ rb_raise(rb_eTypeError, "not valid value");
1552
+ }
1553
+
1554
+ GET_PARSER(obj, parser);
1555
+
1556
+ // parser->iterator = rb_iterator_p();
1557
+ parser->iterator = rb_block_given_p();
1558
+
1559
+ /* Setup event handlers */
1560
+
1561
+ /* Call as iterator */
1562
+ if (parser->iterator) {
1563
+ XML_SetElementHandler(parser->parser,
1564
+ iterStartElementHandler, iterEndElementHandler);
1565
+ XML_SetCharacterDataHandler(parser->parser,
1566
+ iterCharacterDataHandler);
1567
+ XML_SetProcessingInstructionHandler(parser->parser,
1568
+ iterProcessingInstructionHandler);
1569
+ /* check dummy default handler */
1570
+ #ifdef NEW_EXPAT
1571
+ if (rb_method_boundp(CLASS_OF(obj), id_defaultExpandHandler, 0))
1572
+ XML_SetDefaultHandlerExpand(parser->parser, iterDefaultHandler);
1573
+ else
1574
+ #endif
1575
+ if (rb_method_boundp(CLASS_OF(obj), id_defaultHandler, 0))
1576
+ XML_SetDefaultHandler(parser->parser, iterDefaultHandler);
1577
+
1578
+ if (rb_method_boundp(CLASS_OF(obj), id_unparsedEntityDeclHandler, 0))
1579
+ XML_SetUnparsedEntityDeclHandler(parser->parser,
1580
+ iterUnparsedEntityDeclHandler);
1581
+ if (rb_method_boundp(CLASS_OF(obj), id_notationDeclHandler, 0))
1582
+ XML_SetNotationDeclHandler(parser->parser,
1583
+ iterNotationDeclHandler);
1584
+ if (rb_method_boundp(CLASS_OF(obj), id_externalEntityRefHandler, 0))
1585
+ XML_SetExternalEntityRefHandler(parser->parser,
1586
+ iterExternalEntityRefHandler);
1587
+ #ifdef NEW_EXPAT
1588
+ if (rb_method_boundp(CLASS_OF(obj), id_commentHandler, 0))
1589
+ XML_SetCommentHandler(parser->parser, iterCommentHandler);
1590
+
1591
+ if (rb_method_boundp(CLASS_OF(obj), id_startCdataSectionHandler, 0))
1592
+ startC = iterStartCdataSectionHandler;
1593
+ if (rb_method_boundp(CLASS_OF(obj), id_endCdataSectionHandler, 0))
1594
+ endC = iterEndCdataSectionHandler;
1595
+ if (startC || endC)
1596
+ XML_SetCdataSectionHandler(parser->parser, startC, endC);
1597
+
1598
+ if (rb_method_boundp(CLASS_OF(obj), id_startNamespaceDeclHandler, 0))
1599
+ startNS = iterStartNamespaceDeclHandler;
1600
+ if (rb_method_boundp(CLASS_OF(obj), id_endNamespaceDeclHandler, 0))
1601
+ endNS = iterEndNamespaceDeclHandler;
1602
+ if (startNS || endNS)
1603
+ XML_SetNamespaceDeclHandler(parser->parser, startNS, endNS);
1604
+ if (rb_method_boundp(CLASS_OF(obj), id_notStandaloneHandler, 0))
1605
+ XML_SetNotStandaloneHandler(parser->parser, myNotStandaloneHandler);
1606
+ #endif
1607
+ #ifdef HAVE_XML_SETDOCTYPEDECLHANDLER
1608
+ if (rb_method_boundp(CLASS_OF(obj), id_startDoctypeDeclHandler, 0))
1609
+ startDoctype = iterStartDoctypeDeclHandler;
1610
+ if (rb_method_boundp(CLASS_OF(obj), id_endDoctypeDeclHandler, 0))
1611
+ endDoctype = iterEndDoctypeDeclHandler;
1612
+ if (startDoctype || endDoctype)
1613
+ XML_SetDoctypeDeclHandler(parser->parser, startDoctype, endDoctype);
1614
+ #endif
1615
+ #ifdef HAVE_EXPAT_H
1616
+ if (rb_method_boundp(CLASS_OF(obj), id_elementDeclHandler, 0))
1617
+ XML_SetElementDeclHandler(parser->parser, iterElementDeclHandler);
1618
+ if (rb_method_boundp(CLASS_OF(obj), id_attlistDeclHandler, 0))
1619
+ XML_SetAttlistDeclHandler(parser->parser, iterAttlistDeclHandler);
1620
+ if (rb_method_boundp(CLASS_OF(obj), id_xmlDeclHandler, 0))
1621
+ XML_SetXmlDeclHandler(parser->parser, iterXmlDeclHandler);
1622
+ if (rb_method_boundp(CLASS_OF(obj), id_entityDeclHandler, 0))
1623
+ XML_SetEntityDeclHandler(parser->parser, iterEntityDeclHandler);
1624
+ #endif
1625
+ #if 0
1626
+ if (rb_method_boundp(CLASS_OF(obj), id_externalParsedEntityDeclHandler, 0))
1627
+ XML_SetExternalParsedEntityDeclHandler(parser->parser,
1628
+ iterExternalParsedEntityDeclHandler);
1629
+ if (rb_method_boundp(CLASS_OF(obj), id_internalParsedEntityDeclHandler, 0))
1630
+ XML_SetInternalParsedEntityDeclHandler(parser->parser,
1631
+ iterInternalParsedEntityDeclHandler);
1632
+ #endif
1633
+ /* Call non-iterator version of UnknownEncoding handler,
1634
+ because the porcedure block often returns the unexpected value. */
1635
+ XML_SetUnknownEncodingHandler(parser->parser,
1636
+ myUnknownEncodingHandler,
1637
+ (void*)obj);
1638
+ #ifdef HAVE_XML_SETSKIPPEDENTITYHANDLER
1639
+ if (rb_method_boundp(CLASS_OF(obj), id_skippedEntityHandler, 0))
1640
+ XML_SetSkippedEntityHandler(parser->parser, iterSkippedEntityHandler);
1641
+ #endif
1642
+ }
1643
+ /* Call as not iterator */
1644
+ else {
1645
+ if (rb_method_boundp(CLASS_OF(obj), id_startElementHandler, 0))
1646
+ start = myStartElementHandler;
1647
+ if (rb_method_boundp(CLASS_OF(obj), id_endElementHandler, 0))
1648
+ end = myEndElementHandler;
1649
+ if (start || end)
1650
+ XML_SetElementHandler(parser->parser, start, end);
1651
+ if (rb_method_boundp(CLASS_OF(obj), id_characterDataHandler, 0))
1652
+ XML_SetCharacterDataHandler(parser->parser,
1653
+ myCharacterDataHandler);
1654
+ if (rb_method_boundp(CLASS_OF(obj),
1655
+ id_processingInstructionHandler, 0))
1656
+ XML_SetProcessingInstructionHandler(parser->parser,
1657
+ myProcessingInstructionHandler);
1658
+ #ifdef NEW_EXPAT
1659
+ if (rb_method_boundp(CLASS_OF(obj), id_defaultExpandHandler, 0))
1660
+ XML_SetDefaultHandlerExpand(parser->parser, myDefaultExpandHandler);
1661
+ else
1662
+ #endif
1663
+ if (rb_method_boundp(CLASS_OF(obj), id_defaultHandler, 0)) {
1664
+ XML_SetDefaultHandler(parser->parser, myDefaultHandler);
1665
+ }
1666
+ if (rb_method_boundp(CLASS_OF(obj), id_unparsedEntityDeclHandler, 0))
1667
+ XML_SetUnparsedEntityDeclHandler(parser->parser,
1668
+ myUnparsedEntityDeclHandler);
1669
+ if (rb_method_boundp(CLASS_OF(obj), id_notationDeclHandler, 0))
1670
+ XML_SetNotationDeclHandler(parser->parser,
1671
+ myNotationDeclHandler);
1672
+ if (rb_method_boundp(CLASS_OF(obj), id_externalEntityRefHandler, 0))
1673
+ XML_SetExternalEntityRefHandler(parser->parser,
1674
+ myExternalEntityRefHandler);
1675
+ #ifdef NEW_EXPAT
1676
+ if (rb_method_boundp(CLASS_OF(obj), id_commentHandler, 0))
1677
+ XML_SetCommentHandler(parser->parser, myCommentHandler);
1678
+
1679
+ if (rb_method_boundp(CLASS_OF(obj), id_startCdataSectionHandler, 0))
1680
+ startC = myStartCdataSectionHandler;
1681
+ if (rb_method_boundp(CLASS_OF(obj), id_endCdataSectionHandler, 0))
1682
+ endC = myEndCdataSectionHandler;
1683
+ if (startC || endC)
1684
+ XML_SetCdataSectionHandler(parser->parser, startC, endC);
1685
+
1686
+ if (rb_method_boundp(CLASS_OF(obj), id_startNamespaceDeclHandler, 0))
1687
+ startNS = myStartNamespaceDeclHandler;
1688
+ if (rb_method_boundp(CLASS_OF(obj), id_endNamespaceDeclHandler, 0))
1689
+ endNS = myEndNamespaceDeclHandler;
1690
+ if (startNS || endNS)
1691
+ XML_SetNamespaceDeclHandler(parser->parser, startNS, endNS);
1692
+ if (rb_method_boundp(CLASS_OF(obj), id_notStandaloneHandler, 0))
1693
+ XML_SetNotStandaloneHandler(parser->parser, myNotStandaloneHandler);
1694
+ #endif
1695
+ #ifdef HAVE_XML_SETDOCTYPEDECLHANDLER
1696
+ if (rb_method_boundp(CLASS_OF(obj), id_startDoctypeDeclHandler, 0))
1697
+ startDoctype = myStartDoctypeDeclHandler;
1698
+ if (rb_method_boundp(CLASS_OF(obj), id_endDoctypeDeclHandler, 0))
1699
+ endDoctype = myEndDoctypeDeclHandler;
1700
+ if (startDoctype || endDoctype)
1701
+ XML_SetDoctypeDeclHandler(parser->parser, startDoctype, endDoctype);
1702
+ #endif
1703
+ #ifdef HAVE_EXPAT_H
1704
+ if (rb_method_boundp(CLASS_OF(obj), id_elementDeclHandler, 0))
1705
+ XML_SetElementDeclHandler(parser->parser, myElementDeclHandler);
1706
+ if (rb_method_boundp(CLASS_OF(obj), id_attlistDeclHandler, 0))
1707
+ XML_SetAttlistDeclHandler(parser->parser, myAttlistDeclHandler);
1708
+ if (rb_method_boundp(CLASS_OF(obj), id_xmlDeclHandler, 0))
1709
+ XML_SetXmlDeclHandler(parser->parser, myXmlDeclHandler);
1710
+ if (rb_method_boundp(CLASS_OF(obj), id_entityDeclHandler, 0))
1711
+ XML_SetEntityDeclHandler(parser->parser, myEntityDeclHandler);
1712
+ #endif
1713
+ #if 0
1714
+ if (rb_method_boundp(CLASS_OF(obj), id_externalParsedEntityDeclHandler, 0))
1715
+ XML_SetExternalParsedEntityDeclHandler(parser->parser,
1716
+ myExternalParsedEntityDeclHandler);
1717
+ if (rb_method_boundp(CLASS_OF(obj), id_internalParsedEntityDeclHandler, 0))
1718
+ XML_SetInternalParsedEntityDeclHandler(parser->parser,
1719
+ myInternalParsedEntityDeclHandler);
1720
+ #endif
1721
+ XML_SetUnknownEncodingHandler(parser->parser,
1722
+ myUnknownEncodingHandler,
1723
+ (void*)obj);
1724
+ #ifdef HAVE_XML_SETSKIPPEDENTITYHANDLER
1725
+ if (rb_method_boundp(CLASS_OF(obj), id_skippedEntityHandler, 0))
1726
+ XML_SetSkippedEntityHandler(parser->parser, mySkippedEntityHandler);
1727
+ #endif
1728
+ }
1729
+
1730
+ /* Parse from stream (probably slightly slow) */
1731
+ if (fromStream) {
1732
+ VALUE buf;
1733
+
1734
+ if (OBJ_TAINTED(str))
1735
+ taintParser(parser);
1736
+ do {
1737
+ buf = rb_funcall(str, mid, 0);
1738
+ if (!NIL_P(buf)) {
1739
+ Check_Type(buf, T_STRING);
1740
+ if (OBJ_TAINTED(buf))
1741
+ taintParser(parser);
1742
+ rb_ivar_set(obj, linebuf, buf); /* protect buf from GC (reasonable?)*/
1743
+ ret = XML_Parse(parser->parser,
1744
+ RSTRING(buf)->ptr, RSTRING(buf)->len, 0);
1745
+ }
1746
+ else {
1747
+ ret = XML_Parse(parser->parser, NULL, 0, 1);
1748
+ }
1749
+ if (!ret) {
1750
+ int err = XML_GetErrorCode(parser->parser);
1751
+ const char* errStr = XML_ErrorString(err);
1752
+ rb_raise(eXMLParserError, (char*)errStr);
1753
+ }
1754
+ } while (!NIL_P(buf));
1755
+ return Qnil;
1756
+ }
1757
+
1758
+ /* Parse string */
1759
+ if (!NIL_P(str)) {
1760
+ if (OBJ_TAINTED(str))
1761
+ taintParser(parser);
1762
+ ret = XML_Parse(parser->parser,
1763
+ RSTRING(str)->ptr, RSTRING(str)->len, final);
1764
+ }
1765
+ else
1766
+ ret = XML_Parse(parser->parser, NULL, 0, final);
1767
+ if (!ret) {
1768
+ int err = XML_GetErrorCode(parser->parser);
1769
+ const char* errStr = XML_ErrorString(err);
1770
+ rb_raise(eXMLParserError, (char*)errStr);
1771
+ }
1772
+
1773
+ return Qnil;
1774
+ }
1775
+
1776
+ /* done method */
1777
+ static VALUE
1778
+ XMLParser_done(VALUE obj)
1779
+ {
1780
+ XMLParser* parser;
1781
+
1782
+ GET_PARSER(obj, parser);
1783
+ if (parser->parser) {
1784
+ XML_ParserFree(parser->parser);
1785
+ parser->parser = NULL;
1786
+ }
1787
+ return Qnil;
1788
+ }
1789
+
1790
+ /* defaultCurrent method */
1791
+ static VALUE
1792
+ XMLParser_defaultCurrent(VALUE obj)
1793
+ {
1794
+ XMLParser* parser;
1795
+
1796
+ GET_PARSER(obj, parser);
1797
+ if (!(parser->iterator)) {
1798
+ XML_DefaultCurrent(parser->parser);
1799
+ }
1800
+ else {
1801
+ parser->defaultCurrent = 1;
1802
+ }
1803
+ return Qnil;
1804
+ }
1805
+
1806
+ /* line method */
1807
+ static VALUE
1808
+ XMLParser_getCurrentLineNumber(VALUE obj)
1809
+ {
1810
+ XMLParser* parser;
1811
+ int line;
1812
+
1813
+ GET_PARSER(obj, parser);
1814
+ line = XML_GetCurrentLineNumber(parser->parser);
1815
+
1816
+ return INT2FIX(line);
1817
+ }
1818
+
1819
+ /* column method */
1820
+ static VALUE
1821
+ XMLParser_getCurrentColumnNumber(VALUE obj)
1822
+ {
1823
+ XMLParser* parser;
1824
+ int column;
1825
+
1826
+ GET_PARSER(obj, parser);
1827
+ column = XML_GetCurrentColumnNumber(parser->parser);
1828
+
1829
+ return INT2FIX(column);
1830
+ }
1831
+
1832
+ /* byte index method */
1833
+ static VALUE
1834
+ XMLParser_getCurrentByteIndex(VALUE obj)
1835
+ {
1836
+ XMLParser* parser;
1837
+ long pos;
1838
+
1839
+ GET_PARSER(obj, parser);
1840
+ pos = XML_GetCurrentByteIndex(parser->parser);
1841
+
1842
+ return INT2FIX(pos);
1843
+ }
1844
+
1845
+ /* set URI base */
1846
+ static VALUE
1847
+ XMLParser_setBase(VALUE obj, VALUE base)
1848
+ {
1849
+ XMLParser* parser;
1850
+ int ret;
1851
+
1852
+ Check_Type(base, T_STRING);
1853
+ GET_PARSER(obj, parser);
1854
+ if (OBJ_TAINTED(base))
1855
+ taintParser(parser);
1856
+ ret = XML_SetBase(parser->parser, RSTRING(base)->ptr);
1857
+
1858
+ return INT2FIX(ret);
1859
+ }
1860
+
1861
+ /* get URI base */
1862
+ static VALUE
1863
+ XMLParser_getBase(VALUE obj)
1864
+ {
1865
+ XMLParser* parser;
1866
+ const XML_Char* ret;
1867
+
1868
+ GET_PARSER(obj, parser);
1869
+ ret = XML_GetBase(parser->parser);
1870
+ if (!ret)
1871
+ return Qnil;
1872
+
1873
+ return TO_(rb_str_new2((char*)ret));
1874
+ }
1875
+
1876
+ #ifdef NEW_EXPAT
1877
+ #if 0
1878
+ static VALUE
1879
+ XMLParser_getSpecifiedAttributes(VALUE obj)
1880
+ {
1881
+ XMLParser* parser;
1882
+ int count;
1883
+ const XML_Char** atts;
1884
+ VALUE attrhash;
1885
+
1886
+ GET_PARSER(obj, parser);
1887
+ atts = parser->lastAttrs;
1888
+ if (!atts)
1889
+ return Qnil;
1890
+ count = XML_GetSpecifiedAttributeCount(parser->parser)/2;
1891
+ attrhash = rb_hash_new();
1892
+ while (*atts) {
1893
+ const char* key = *atts++;
1894
+ atts++;
1895
+ rb_hash_aset(attrhash, FO_(TO_(rb_str_new2((char*)key))),
1896
+ (count-- > 0) ? Qtrue: Qfalse);
1897
+ }
1898
+
1899
+ return attrhash;
1900
+ }
1901
+ #else
1902
+ static VALUE
1903
+ XMLParser_getSpecifiedAttributes(VALUE obj)
1904
+ {
1905
+ XMLParser* parser;
1906
+ int i, count;
1907
+ const XML_Char** atts;
1908
+ VALUE attrarray;
1909
+
1910
+ GET_PARSER(obj, parser);
1911
+ atts = parser->lastAttrs;
1912
+ if (!atts)
1913
+ return Qnil;
1914
+ count = XML_GetSpecifiedAttributeCount(parser->parser)/2;
1915
+ attrarray = rb_ary_new2(count);
1916
+ for (i = 0; i < count; i++, atts+=2) {
1917
+ const char* key = *atts;
1918
+ rb_ary_push(attrarray, TO_(rb_str_new2((char*)key)));
1919
+ }
1920
+
1921
+ return attrarray;
1922
+ }
1923
+ #endif
1924
+
1925
+ static VALUE
1926
+ XMLParser_getCurrentByteCount(VALUE obj)
1927
+ {
1928
+ XMLParser* parser;
1929
+
1930
+ GET_PARSER(obj, parser);
1931
+ return INT2FIX(XML_GetCurrentByteCount(parser->parser));
1932
+ }
1933
+ #endif
1934
+
1935
+ #ifdef XML_DTD
1936
+ static VALUE
1937
+ XMLParser_setParamEntityParsing(VALUE obj, VALUE parsing)
1938
+ {
1939
+ XMLParser* parser;
1940
+ int ret;
1941
+
1942
+ Check_Type(parsing, T_FIXNUM);
1943
+ GET_PARSER(obj, parser);
1944
+ ret = XML_SetParamEntityParsing(parser->parser, FIX2INT(parsing));
1945
+
1946
+ return INT2FIX(ret);
1947
+ }
1948
+ #endif
1949
+
1950
+ static VALUE
1951
+ XMLParser_s_expatVersion(VALUE obj)
1952
+ {
1953
+ #if defined(HAVE_EXPAT_H)
1954
+ return rb_str_new2(XML_ExpatVersion());
1955
+ #elif defined(EXPAT_1_2)
1956
+ return rb_str_new2("1.2");
1957
+ #elif defined(NEW_EXPAT)
1958
+ return rb_str_new2("1.1");
1959
+ #else
1960
+ return rb_str_new2("1.0");
1961
+ #endif
1962
+ }
1963
+
1964
+ #ifdef HAVE_EXPAT_H
1965
+ static VALUE
1966
+ XMLParser_setReturnNSTriplet(VALUE obj, VALUE do_nst)
1967
+ {
1968
+ XMLParser* parser;
1969
+ int nst;
1970
+
1971
+ GET_PARSER(obj, parser);
1972
+ switch (TYPE(do_nst)) {
1973
+ case T_TRUE:
1974
+ nst = 1;
1975
+ break;
1976
+ case T_FALSE:
1977
+ nst = 0;
1978
+ break;
1979
+ case T_FIXNUM:
1980
+ nst = FIX2INT(do_nst);
1981
+ break;
1982
+ default:
1983
+ rb_raise(rb_eTypeError, "not valid value");
1984
+ }
1985
+ XML_SetReturnNSTriplet(parser->parser, nst);
1986
+
1987
+ return Qnil;
1988
+ }
1989
+
1990
+
1991
+ static VALUE
1992
+ XMLParser_getInputContext(VALUE obj)
1993
+ {
1994
+ XMLParser* parser;
1995
+ const char* buffer;
1996
+ int offset;
1997
+ int size;
1998
+ VALUE ret = Qnil;
1999
+
2000
+ GET_PARSER(obj, parser);
2001
+ buffer = XML_GetInputContext(parser->parser,
2002
+ &offset,
2003
+ &size);
2004
+ if (buffer && size > 0) {
2005
+ ret = rb_ary_new3(2,
2006
+ TO_(rb_str_new(buffer, size)),
2007
+ INT2FIX(offset));
2008
+ }
2009
+
2010
+ return ret;
2011
+ }
2012
+
2013
+
2014
+ static VALUE
2015
+ XMLParser_getIdAttrribute(VALUE obj)
2016
+ {
2017
+ XMLParser* parser;
2018
+ int idattr;
2019
+ const XML_Char** atts;
2020
+
2021
+ GET_PARSER(obj, parser);
2022
+ atts = parser->lastAttrs;
2023
+ if (!atts)
2024
+ return Qnil;
2025
+ idattr = XML_GetIdAttributeIndex(parser->parser);
2026
+ if (idattr < 0)
2027
+ return Qnil;
2028
+ return TO_(rb_str_new2((char*)atts[idattr]));
2029
+ }
2030
+ #endif
2031
+
2032
+ #ifdef HAVE_XML_USEFOREIGNDTD
2033
+ static VALUE
2034
+ XMLParser_useForeignDTD(VALUE obj, VALUE useDTD)
2035
+ {
2036
+ XMLParser* parser;
2037
+ int dtd;
2038
+ int ret;
2039
+
2040
+ GET_PARSER(obj, parser);
2041
+ switch (TYPE(useDTD)) {
2042
+ case T_TRUE:
2043
+ dtd = 1;
2044
+ break;
2045
+ case T_FALSE:
2046
+ dtd = 0;
2047
+ break;
2048
+ case T_FIXNUM:
2049
+ dtd = FIX2INT(useDTD);
2050
+ break;
2051
+ default:
2052
+ rb_raise(rb_eTypeError, "not valid value");
2053
+ }
2054
+ ret = XML_UseForeignDTD(parser->parser, dtd);
2055
+
2056
+ return INT2FIX(ret);
2057
+ }
2058
+ #endif
2059
+
2060
+ #ifdef HAVE_XML_GETFEATURELIST
2061
+ static VALUE
2062
+ XMLParser_s_getFeatureList(VALUE obj)
2063
+ {
2064
+ const XML_Feature* list;
2065
+ VALUE ret = rb_hash_new();
2066
+
2067
+ list = XML_GetFeatureList();
2068
+ while (list && list->feature) {
2069
+ rb_hash_aset(ret, FO_(rb_str_new2(list->name)), INT2NUM(list->value));
2070
+ list++;
2071
+ }
2072
+
2073
+ return ret;
2074
+ }
2075
+ #endif
2076
+
2077
+ void
2078
+ Init_xmlparser()
2079
+ {
2080
+ VALUE mXML;
2081
+
2082
+ eXMLParserError = rb_define_class("XMLParserError", rb_eStandardError);
2083
+ cXMLParser = rb_define_class("XMLParser", rb_cObject);
2084
+ cXMLEncoding = rb_define_class("XMLEncoding", rb_cObject);
2085
+
2086
+ /* Class name aliases */
2087
+ if (rb_const_defined(rb_cObject, rb_intern("XML")) == Qtrue)
2088
+ mXML = rb_const_get(rb_cObject, rb_intern("XML"));
2089
+ else
2090
+ mXML = rb_define_module("XML");
2091
+ rb_define_const(mXML, "ParserError", eXMLParserError);
2092
+ rb_define_const(cXMLParser, "Error", eXMLParserError);
2093
+ rb_define_const(mXML, "Parser", cXMLParser);
2094
+ rb_define_const(mXML, "Encoding", cXMLEncoding);
2095
+
2096
+ rb_define_singleton_method(cXMLParser, "new", XMLParser_new, -1);
2097
+ rb_define_singleton_method(cXMLParser, "expatVersion",
2098
+ XMLParser_s_expatVersion, 0);
2099
+ rb_define_method(cXMLParser, "initialize", XMLParser_initialize, -1);
2100
+ rb_define_method(cXMLParser, "parse", XMLParser_parse, -1);
2101
+ rb_define_method(cXMLParser, "done", XMLParser_done, 0);
2102
+ rb_define_method(cXMLParser, "defaultCurrent", XMLParser_defaultCurrent, 0);
2103
+ rb_define_method(cXMLParser, "line", XMLParser_getCurrentLineNumber, 0);
2104
+ rb_define_method(cXMLParser, "column", XMLParser_getCurrentColumnNumber, 0);
2105
+ rb_define_method(cXMLParser, "byteIndex", XMLParser_getCurrentByteIndex, 0);
2106
+ rb_define_method(cXMLParser, "setBase", XMLParser_setBase, 1);
2107
+ rb_define_method(cXMLParser, "getBase", XMLParser_getBase, 0);
2108
+ #ifdef NEW_EXPAT
2109
+ rb_define_method(cXMLParser, "getSpecifiedAttributes",
2110
+ XMLParser_getSpecifiedAttributes, 0);
2111
+ rb_define_method(cXMLParser, "byteCount", XMLParser_getCurrentByteCount, 0);
2112
+ #endif
2113
+ #ifdef XML_DTD
2114
+ rb_define_method(cXMLParser, "setParamEntityParsing",
2115
+ XMLParser_setParamEntityParsing, 1);
2116
+ #endif
2117
+ #ifdef HAVE_EXPAT_H
2118
+ rb_define_method(cXMLParser, "setReturnNSTriplet",
2119
+ XMLParser_setReturnNSTriplet, 1);
2120
+ rb_define_method(cXMLParser, "getInputContext",
2121
+ XMLParser_getInputContext, 0);
2122
+ rb_define_method(cXMLParser, "getIdAttribute",
2123
+ XMLParser_getIdAttrribute, 0);
2124
+ #endif
2125
+
2126
+ #ifdef HAVE_XML_PARSERRESET
2127
+ rb_define_method(cXMLParser, "reset", XMLParser_reset, -1);
2128
+ #endif
2129
+
2130
+ rb_define_method(cXMLEncoding, "map", XMLEncoding_map, 1);
2131
+ rb_define_method(cXMLEncoding, "convert", XMLEncoding_convert, 1);
2132
+
2133
+ #ifdef HAVE_XML_USEFOREIGNDTD
2134
+ rb_define_method(cXMLParser, "useForeignDTD",
2135
+ XMLParser_useForeignDTD, 1);
2136
+ #endif
2137
+ #ifdef HAVE_XML_GETFEATURELIST
2138
+ rb_define_singleton_method(cXMLParser, "getFeatureList",
2139
+ XMLParser_s_getFeatureList, 0);
2140
+ #endif
2141
+
2142
+ #define DEFINE_EVENT_CODE(klass, name) \
2143
+ rb_define_const(klass, #name, sym##name = ID2SYM(rb_intern(#name)))
2144
+
2145
+ DEFINE_EVENT_CODE(cXMLParser, START_ELEM);
2146
+ DEFINE_EVENT_CODE(cXMLParser, END_ELEM);
2147
+ DEFINE_EVENT_CODE(cXMLParser, CDATA);
2148
+ DEFINE_EVENT_CODE(cXMLParser, PI);
2149
+ DEFINE_EVENT_CODE(cXMLParser, DEFAULT);
2150
+ DEFINE_EVENT_CODE(cXMLParser, UNPARSED_ENTITY_DECL);
2151
+ DEFINE_EVENT_CODE(cXMLParser, NOTATION_DECL);
2152
+ DEFINE_EVENT_CODE(cXMLParser, EXTERNAL_ENTITY_REF);
2153
+ #ifdef NEW_EXPAT
2154
+ DEFINE_EVENT_CODE(cXMLParser, COMMENT);
2155
+ DEFINE_EVENT_CODE(cXMLParser, START_CDATA);
2156
+ DEFINE_EVENT_CODE(cXMLParser, END_CDATA);
2157
+ DEFINE_EVENT_CODE(cXMLParser, START_NAMESPACE_DECL);
2158
+ DEFINE_EVENT_CODE(cXMLParser, END_NAMESPACE_DECL);
2159
+ #endif
2160
+ #ifdef HAVE_XML_SETSKIPPEDENTITYHANDLER
2161
+ DEFINE_EVENT_CODE(cXMLParser, SKIPPED_ENTITY);
2162
+ #endif
2163
+ #ifdef XML_DTD
2164
+ rb_define_const(cXMLParser, "PARAM_ENTITY_PARSING_NEVER",
2165
+ XML_PARAM_ENTITY_PARSING_NEVER);
2166
+ rb_define_const(cXMLParser, "PARAM_ENTITY_PARSING_UNLESS_STANDALONE",
2167
+ XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE);
2168
+ rb_define_const(cXMLParser, "PARAM_ENTITY_PARSING_ALWAYS",
2169
+ XML_PARAM_ENTITY_PARSING_ALWAYS);
2170
+ #endif
2171
+ #ifdef HAVE_XML_SETDOCTYPEDECLHANDLER
2172
+ DEFINE_EVENT_CODE(cXMLParser, START_DOCTYPE_DECL);
2173
+ DEFINE_EVENT_CODE(cXMLParser, END_DOCTYPE_DECL);
2174
+ #endif
2175
+ #ifdef HAVE_EXPAT_H
2176
+ DEFINE_EVENT_CODE(cXMLParser, ELEMENT_DECL);
2177
+ DEFINE_EVENT_CODE(cXMLParser, ATTLIST_DECL);
2178
+ DEFINE_EVENT_CODE(cXMLParser, XML_DECL);
2179
+ DEFINE_EVENT_CODE(cXMLParser, ENTITY_DECL);
2180
+ #endif
2181
+ #if 0
2182
+ DEFINE_EVENT_CODE(cXMLParser, EXTERNAL_PARSED_ENTITY_DECL);
2183
+ DEFINE_EVENT_CODE(cXMLParser, INTERNAL_PARSED_ENTITY_DECL);
2184
+ #endif
2185
+ #if 0
2186
+ DEFINE_EVENT_CODE(cXMLParser, UNKNOWN_ENCODING);
2187
+ #endif
2188
+
2189
+ id_map = rb_intern("_map");
2190
+ id_startElementHandler = rb_intern("startElement");
2191
+ id_endElementHandler = rb_intern("endElement");
2192
+ id_characterDataHandler = rb_intern("character");
2193
+ id_processingInstructionHandler = rb_intern("processingInstruction");
2194
+ id_defaultHandler = rb_intern("default");
2195
+ id_unparsedEntityDeclHandler = rb_intern("unparsedEntityDecl");
2196
+ id_notationDeclHandler = rb_intern("notationDecl");
2197
+ id_externalEntityRefHandler = rb_intern("externalEntityRef");
2198
+ #ifdef NEW_EXPAT
2199
+ id_defaultExpandHandler = rb_intern("defaultExpand");
2200
+ id_commentHandler = rb_intern("comment");
2201
+ id_startCdataSectionHandler = rb_intern("startCdata");
2202
+ id_endCdataSectionHandler = rb_intern("endCdata");
2203
+ id_startNamespaceDeclHandler = rb_intern("startNamespaceDecl");
2204
+ id_endNamespaceDeclHandler = rb_intern("endNamespaceDecl");
2205
+ id_notStandaloneHandler = rb_intern("notStandalone");
2206
+ #endif
2207
+ #ifdef HAVE_XML_SETDOCTYPEDECLHANDLER
2208
+ id_startDoctypeDeclHandler = rb_intern("startDoctypeDecl");
2209
+ id_endDoctypeDeclHandler = rb_intern("endDoctypeDecl");
2210
+ #endif
2211
+ id_unknownEncoding = rb_intern("unknownEncoding");
2212
+ id_convert = rb_intern("convert");
2213
+ #ifdef HAVE_EXPAT_H
2214
+ id_elementDeclHandler = rb_intern("elementDecl");
2215
+ id_attlistDeclHandler = rb_intern("attlistDecl");
2216
+ id_xmlDeclHandler = rb_intern("xmlDecl");
2217
+ id_entityDeclHandler = rb_intern("entityDecl");
2218
+ #endif
2219
+ #if 0
2220
+ id_externalParsedEntityDeclHandler = rb_intern("externalParsedEntityDecl");
2221
+ id_internalParsedEntityDeclHandler = rb_intern("internalParsedEntityDecl");
2222
+ #endif
2223
+ #ifdef HAVE_XML_SETSKIPPEDENTITYHANDLER
2224
+ id_skippedEntityHandler = rb_intern("skippedEntity");
2225
+ #endif
2226
+ }