xmlparser 0.6.81 → 0.7.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/MANIFEST +7 -68
- data/README +42 -39
- data/README.ja +54 -57
- data/ext/extconf.rb +58 -0
- data/ext/{xmlparser/xmlparser.c → xmlparser.c} +246 -180
- data/lib/xml/dom/digest.rb +37 -25
- data/lib/xml/dom2/domentityresolverimpl.rb +2 -13
- metadata +79 -93
- data/Rakefile +0 -34
- data/ext/xmlparser/mkrf_conf.rb +0 -28
- data/lib/xml/dom/builder-ja.rb +0 -58
- data/lib/xml/encoding-ja.rb +0 -42
data/ext/extconf.rb
ADDED
@@ -0,0 +1,58 @@
|
|
1
|
+
#
|
2
|
+
# ruby extconf.rb
|
3
|
+
# --with-perl-enc-map[=/path/to/enc-map]
|
4
|
+
# --with-expat-dir=/path/to/expat
|
5
|
+
# --with-expat-lib=/path/to/expat/lib
|
6
|
+
# --with-expat-include=/path/to/expat/include
|
7
|
+
#
|
8
|
+
require 'mkmf'
|
9
|
+
|
10
|
+
cwd=`pwd`.chomp!
|
11
|
+
perl= ENV['PERL'] || 'perl'
|
12
|
+
|
13
|
+
## Encoding maps may be stored in $perl_archlib/XML/Parser/Encodins/
|
14
|
+
#perl_archlib = '/usr/lib/perl5/site_perl/5.005/i586-linux'
|
15
|
+
#perl_archlib = '/usr/local/lib'
|
16
|
+
perl_archlib = `#{perl} -e 'use Config; print $Config{"archlib"}'`
|
17
|
+
xml_enc_path = with_config("perl-enc-map")
|
18
|
+
if xml_enc_path == true
|
19
|
+
xml_enc_path = perl_archlib + "/XML/Parser/Encodings"
|
20
|
+
end
|
21
|
+
|
22
|
+
##$CFLAGS="-I#{cwd}/expat/xmlparse -I#{cwd}/expat/xmltok" +
|
23
|
+
## ' -DXML_ENC_PATH=getenv\(\"XML_ENC_PATH\"\)' +
|
24
|
+
## " -DNEW_EXPAT"
|
25
|
+
#$CFLAGS = "-I#{cwd}/expat/xmlparse -I#{cwd}/expat/xmltok"
|
26
|
+
#$LDFLAGS = "-L#{cwd}/expat/xmlparse -Wl,-rpath,/usr/local/lib"
|
27
|
+
#$LDFLAGS = "-L#{cwd}/expat/xmlparse"
|
28
|
+
dir_config("expat")
|
29
|
+
#dir_config("xmltok")
|
30
|
+
#dir_config("xmlparse")
|
31
|
+
if xml_enc_path
|
32
|
+
$CFLAGS += " -DXML_ENC_PATH=\\\"#{xml_enc_path}\\\""
|
33
|
+
end
|
34
|
+
|
35
|
+
#if have_header("xmlparse.h") || have_header("expat.h")
|
36
|
+
if have_header("expat.h") || have_header("xmlparse.h")
|
37
|
+
if have_library("expat", "XML_ParserCreate") ||
|
38
|
+
have_library("xmltok", "XML_ParserCreate")
|
39
|
+
if have_func("XML_SetNotStandaloneHandler")
|
40
|
+
$CFLAGS += " -DNEW_EXPAT"
|
41
|
+
end
|
42
|
+
if have_func("XML_SetParamEntityParsing")
|
43
|
+
$CFLAGS += " -DXML_DTD"
|
44
|
+
end
|
45
|
+
# if have_func("XML_SetExternalParsedEntityDeclHandler")
|
46
|
+
# $CFLAGS += " -DEXPAT_1_2"
|
47
|
+
# end
|
48
|
+
have_func("XML_SetDoctypeDeclHandler")
|
49
|
+
have_func("XML_ParserReset")
|
50
|
+
have_func("XML_SetSkippedEntityHandler")
|
51
|
+
have_func("XML_GetFeatureList")
|
52
|
+
have_func("XML_UseForeignDTD")
|
53
|
+
have_func("XML_GetIdAttributeIndex")
|
54
|
+
have_library("socket", "ntohl")
|
55
|
+
have_library("wsock32") if RUBY_PLATFORM =~ /mswin32|mingw/
|
56
|
+
create_makefile("xmlparser")
|
57
|
+
end
|
58
|
+
end
|
@@ -1,5 +1,6 @@
|
|
1
1
|
/*
|
2
2
|
* Expat (XML Parser Toolkit) wrapper for Ruby
|
3
|
+
* Dec 15, 2009 yoshidam version 0.7.0 support Ruby 1.9.1
|
3
4
|
* Feb 16, 2004 yoshidam version 0.6.8 taint output string
|
4
5
|
* Feb 16, 2004 yoshidam version 0.6.7 fix buffer overflow
|
5
6
|
* Mar 11, 2003 yoshidam version 0.6.6 fix skippedEntity handler
|
@@ -41,7 +42,11 @@
|
|
41
42
|
*/
|
42
43
|
|
43
44
|
#include "ruby.h"
|
44
|
-
#
|
45
|
+
#ifdef HAVE_RUBY_IO_H
|
46
|
+
# include "ruby/io.h"
|
47
|
+
#else
|
48
|
+
# include "rubyio.h"
|
49
|
+
#endif
|
45
50
|
#include <stdio.h>
|
46
51
|
#include <ctype.h>
|
47
52
|
#ifdef HAVE_EXPAT_H
|
@@ -58,6 +63,15 @@
|
|
58
63
|
# endif
|
59
64
|
#endif
|
60
65
|
|
66
|
+
#ifndef RSTRING_PTR
|
67
|
+
# define RSTRING_PTR(s) (RSTRING(s)->ptr)
|
68
|
+
# define RSTRING_LEN(s) (RSTRING(s)->len)
|
69
|
+
#endif
|
70
|
+
|
71
|
+
#ifdef HAVE_RUBY_ENCODING_H
|
72
|
+
static rb_encoding* enc_xml;
|
73
|
+
#endif
|
74
|
+
|
61
75
|
static VALUE eXMLParserError;
|
62
76
|
static VALUE cXMLParser;
|
63
77
|
static VALUE cXMLEncoding;
|
@@ -112,6 +126,7 @@ typedef struct _XMLParser {
|
|
112
126
|
int tainted;
|
113
127
|
VALUE parent;
|
114
128
|
char* context;
|
129
|
+
const XML_Char *detectedEncoding;
|
115
130
|
} XMLParser;
|
116
131
|
|
117
132
|
static VALUE symDEFAULT;
|
@@ -198,6 +213,12 @@ freezeObject(VALUE obj) {
|
|
198
213
|
}
|
199
214
|
#define FO_(o) (freezeObject(o))
|
200
215
|
|
216
|
+
#ifdef HAVE_RUBY_ENCODING_H
|
217
|
+
# define ENC_(o) (rb_enc_associate(o, enc_xml))
|
218
|
+
#else
|
219
|
+
# define ENC_(o) (o)
|
220
|
+
#endif
|
221
|
+
|
201
222
|
|
202
223
|
/* Event handlers for iterator */
|
203
224
|
static void
|
@@ -216,12 +237,12 @@ iterStartElementHandler(void *recv,
|
|
216
237
|
const char* key = *atts++;
|
217
238
|
const char* val = *atts++;
|
218
239
|
rb_hash_aset(attrhash,
|
219
|
-
FO_(TO_(rb_str_new2((char*)key))),
|
220
|
-
TO_(rb_str_new2((char*)val)));
|
240
|
+
FO_(TO_(ENC_(rb_str_new2((char*)key)))),
|
241
|
+
TO_(ENC_(rb_str_new2((char*)val))));
|
221
242
|
}
|
222
243
|
|
223
244
|
rb_yield(rb_ary_new3(4, symSTART_ELEM,
|
224
|
-
TO_(rb_str_new2((char*)name)), attrhash, recv));
|
245
|
+
TO_(ENC_(rb_str_new2((char*)name))), attrhash, recv));
|
225
246
|
if (parser->defaultCurrent) {
|
226
247
|
parser->defaultCurrent = 0;
|
227
248
|
XML_DefaultCurrent(parser->parser);
|
@@ -235,7 +256,7 @@ iterEndElementHandler(void *recv,
|
|
235
256
|
XMLParser* parser;
|
236
257
|
GET_PARSER(recv, parser);
|
237
258
|
rb_yield(rb_ary_new3(4, symEND_ELEM,
|
238
|
-
TO_(rb_str_new2((char*)name)), Qnil, recv));
|
259
|
+
TO_(ENC_(rb_str_new2((char*)name))), Qnil, recv));
|
239
260
|
if (parser->defaultCurrent) {
|
240
261
|
parser->defaultCurrent = 0;
|
241
262
|
XML_DefaultCurrent(parser->parser);
|
@@ -250,7 +271,7 @@ iterCharacterDataHandler(void *recv,
|
|
250
271
|
XMLParser* parser;
|
251
272
|
GET_PARSER(recv, parser);
|
252
273
|
rb_yield(rb_ary_new3(4, symCDATA,
|
253
|
-
Qnil, TO_(rb_str_new((char*)s, len)), recv));
|
274
|
+
Qnil, TO_(ENC_(rb_str_new((char*)s, len))), recv));
|
254
275
|
if (parser->defaultCurrent) {
|
255
276
|
parser->defaultCurrent = 0;
|
256
277
|
XML_DefaultCurrent(parser->parser);
|
@@ -265,8 +286,8 @@ iterProcessingInstructionHandler(void *recv,
|
|
265
286
|
XMLParser* parser;
|
266
287
|
GET_PARSER(recv, parser);
|
267
288
|
rb_yield(rb_ary_new3(4, symPI,
|
268
|
-
TO_(rb_str_new2((char*)target)),
|
269
|
-
TO_(rb_str_new2((char*)data)), recv));
|
289
|
+
TO_(ENC_(rb_str_new2((char*)target))),
|
290
|
+
TO_(ENC_(rb_str_new2((char*)data))), recv));
|
270
291
|
if (parser->defaultCurrent) {
|
271
292
|
parser->defaultCurrent = 0;
|
272
293
|
XML_DefaultCurrent(parser->parser);
|
@@ -281,7 +302,7 @@ iterDefaultHandler(void *recv,
|
|
281
302
|
XMLParser* parser;
|
282
303
|
GET_PARSER(recv, parser);
|
283
304
|
rb_yield(rb_ary_new3(4, symDEFAULT,
|
284
|
-
Qnil, TO_(rb_str_new((char*)s, len)), recv));
|
305
|
+
Qnil, TO_(ENC_(rb_str_new((char*)s, len))), recv));
|
285
306
|
if (parser->defaultCurrent) {
|
286
307
|
parser->defaultCurrent = 0;
|
287
308
|
/* XML_DefaultCurrent shoould not call in defaultHandler */
|
@@ -301,12 +322,12 @@ iterUnparsedEntityDeclHandler(void *recv,
|
|
301
322
|
VALUE valary;
|
302
323
|
|
303
324
|
GET_PARSER(recv, parser);
|
304
|
-
valary = rb_ary_new3(4, (base ? TO_(rb_str_new2((char*)base)) : Qnil),
|
305
|
-
TO_(rb_str_new2((char*)systemId)),
|
306
|
-
(publicId ? TO_(rb_str_new2((char*)publicId)) : Qnil),
|
307
|
-
TO_(rb_str_new2((char*)notationName)));
|
325
|
+
valary = rb_ary_new3(4, (base ? TO_(ENC_(rb_str_new2((char*)base))) : Qnil),
|
326
|
+
TO_(ENC_(rb_str_new2((char*)systemId))),
|
327
|
+
(publicId ? TO_(ENC_(rb_str_new2((char*)publicId))) : Qnil),
|
328
|
+
TO_(ENC_(rb_str_new2((char*)notationName))));
|
308
329
|
rb_yield(rb_ary_new3(4, symUNPARSED_ENTITY_DECL,
|
309
|
-
TO_(rb_str_new2((char*)entityName)),
|
330
|
+
TO_(ENC_(rb_str_new2((char*)entityName))),
|
310
331
|
valary, recv));
|
311
332
|
if (parser->defaultCurrent) {
|
312
333
|
parser->defaultCurrent = 0;
|
@@ -326,11 +347,11 @@ iterNotationDeclHandler(void *recv,
|
|
326
347
|
|
327
348
|
GET_PARSER(recv, parser);
|
328
349
|
valary = rb_ary_new3(3,
|
329
|
-
(base ? TO_(rb_str_new2((char*)base)) : Qnil),
|
330
|
-
(systemId ? TO_(rb_str_new2((char*)systemId)) : Qnil),
|
331
|
-
(publicId ? TO_(rb_str_new2((char*)publicId)) : Qnil));
|
350
|
+
(base ? TO_(ENC_(rb_str_new2((char*)base))) : Qnil),
|
351
|
+
(systemId ? TO_(ENC_(rb_str_new2((char*)systemId))) : Qnil),
|
352
|
+
(publicId ? TO_(ENC_(rb_str_new2((char*)publicId))) : Qnil));
|
332
353
|
rb_yield(rb_ary_new3(4, symNOTATION_DECL,
|
333
|
-
TO_(rb_str_new2((char*)notationName)),
|
354
|
+
TO_(ENC_(rb_str_new2((char*)notationName))),
|
334
355
|
valary, recv));
|
335
356
|
if (parser->defaultCurrent) {
|
336
357
|
parser->defaultCurrent = 0;
|
@@ -353,11 +374,11 @@ iterExternalEntityRefHandler(XML_Parser xmlparser,
|
|
353
374
|
recv = (VALUE)XML_GetUserData(xmlparser);
|
354
375
|
GET_PARSER(recv, parser);
|
355
376
|
valary = rb_ary_new3(3,
|
356
|
-
(base ? TO_(rb_str_new2((char*)base)) : Qnil),
|
357
|
-
(systemId ? TO_(rb_str_new2((char*)systemId)) : Qnil),
|
358
|
-
(publicId ? TO_(rb_str_new2((char*)publicId)) : Qnil));
|
377
|
+
(base ? TO_(ENC_(rb_str_new2((char*)base))) : Qnil),
|
378
|
+
(systemId ? TO_(ENC_(rb_str_new2((char*)systemId))) : Qnil),
|
379
|
+
(publicId ? TO_(ENC_(rb_str_new2((char*)publicId))) : Qnil));
|
359
380
|
ret = rb_yield(rb_ary_new3(4, symEXTERNAL_ENTITY_REF,
|
360
|
-
(context ? TO_(rb_str_new2((char*)context)) : Qnil),
|
381
|
+
(context ? TO_(ENC_(rb_str_new2((char*)context))) : Qnil),
|
361
382
|
valary, recv));
|
362
383
|
if (parser->defaultCurrent) {
|
363
384
|
parser->defaultCurrent = 0;
|
@@ -376,7 +397,7 @@ iterCommentHandler(void *recv,
|
|
376
397
|
XMLParser* parser;
|
377
398
|
GET_PARSER(recv, parser);
|
378
399
|
rb_yield(rb_ary_new3(4, symCOMMENT,
|
379
|
-
Qnil, TO_(rb_str_new2((char*)s)), recv));
|
400
|
+
Qnil, TO_(ENC_(rb_str_new2((char*)s))), recv));
|
380
401
|
if (parser->defaultCurrent) {
|
381
402
|
parser->defaultCurrent = 0;
|
382
403
|
XML_DefaultCurrent(parser->parser);
|
@@ -415,8 +436,8 @@ iterStartNamespaceDeclHandler(void *recv,
|
|
415
436
|
XMLParser* parser;
|
416
437
|
GET_PARSER(recv, parser);
|
417
438
|
rb_yield(rb_ary_new3(4, symSTART_NAMESPACE_DECL,
|
418
|
-
(prefix ? TO_(rb_str_new2((char*)prefix)) : Qnil),
|
419
|
-
(uri ? TO_(rb_str_new2((char*)uri)) : Qnil), recv));
|
439
|
+
(prefix ? TO_(ENC_(rb_str_new2((char*)prefix))) : Qnil),
|
440
|
+
(uri ? TO_(ENC_(rb_str_new2((char*)uri))) : Qnil), recv));
|
420
441
|
if (parser->defaultCurrent) {
|
421
442
|
parser->defaultCurrent = 0;
|
422
443
|
XML_DefaultCurrent(parser->parser);
|
@@ -430,7 +451,7 @@ iterEndNamespaceDeclHandler(void *recv,
|
|
430
451
|
XMLParser* parser;
|
431
452
|
GET_PARSER(recv, parser);
|
432
453
|
rb_yield(rb_ary_new3(4, symEND_NAMESPACE_DECL,
|
433
|
-
(prefix ? TO_(rb_str_new2((char*)prefix)) : Qnil),
|
454
|
+
(prefix ? TO_(ENC_(rb_str_new2((char*)prefix))) : Qnil),
|
434
455
|
Qnil, recv));
|
435
456
|
if (parser->defaultCurrent) {
|
436
457
|
parser->defaultCurrent = 0;
|
@@ -458,12 +479,12 @@ iterStartDoctypeDeclHandler(void *recv,
|
|
458
479
|
GET_PARSER(recv, parser);
|
459
480
|
#ifdef HAVE_EXPAT_H
|
460
481
|
valary = rb_ary_new3(3,
|
461
|
-
(sysid ? TO_(rb_str_new2((char*)sysid)) : Qnil),
|
462
|
-
(pubid ? TO_(rb_str_new2((char*)pubid)) : Qnil),
|
482
|
+
(sysid ? TO_(ENC_(rb_str_new2((char*)sysid))) : Qnil),
|
483
|
+
(pubid ? TO_(ENC_(rb_str_new2((char*)pubid))) : Qnil),
|
463
484
|
(has_internal_subset ? Qtrue : Qfalse));
|
464
485
|
#endif
|
465
486
|
rb_yield(rb_ary_new3(4, symSTART_DOCTYPE_DECL,
|
466
|
-
TO_(rb_str_new2((char*)doctypeName)),
|
487
|
+
TO_(ENC_(rb_str_new2((char*)doctypeName))),
|
467
488
|
valary, recv));
|
468
489
|
if (parser->defaultCurrent) {
|
469
490
|
parser->defaultCurrent = 0;
|
@@ -498,18 +519,18 @@ makeContentArray(XMLParser* parser, XML_Content* model)
|
|
498
519
|
static const char* content_quant_name[] = {
|
499
520
|
"", "?", "*", "+"
|
500
521
|
};
|
501
|
-
int i;
|
522
|
+
unsigned int i;
|
502
523
|
VALUE children = Qnil;
|
503
524
|
const char* type_name = content_type_name[model->type];
|
504
525
|
const char* quant_name = content_quant_name[model->quant];
|
505
526
|
VALUE ret = rb_ary_new3(3,
|
506
|
-
TO_(rb_str_new2((char*)type_name)),
|
507
|
-
TO_(rb_str_new2((char*)quant_name)),
|
508
|
-
(model->name ? TO_(rb_str_new2((char*)model->name)) :
|
527
|
+
TO_(ENC_(rb_str_new2((char*)type_name))),
|
528
|
+
TO_(ENC_(rb_str_new2((char*)quant_name))),
|
529
|
+
(model->name ? TO_(ENC_(rb_str_new2((char*)model->name))) :
|
509
530
|
Qnil));
|
510
531
|
if (model->numchildren > 0) {
|
511
532
|
children = rb_ary_new();
|
512
|
-
for (i =0; i < model->numchildren; i++) {
|
533
|
+
for (i = 0; i < model->numchildren; i++) {
|
513
534
|
VALUE child = makeContentArray(parser, model->children + i);
|
514
535
|
rb_ary_push(children, child);
|
515
536
|
}
|
@@ -526,10 +547,11 @@ iterElementDeclHandler(void *recv,
|
|
526
547
|
XML_Content *model)
|
527
548
|
{
|
528
549
|
XMLParser* parser;
|
550
|
+
VALUE content;
|
529
551
|
GET_PARSER(recv, parser);
|
530
|
-
|
552
|
+
content = makeContentArray(parser, model);
|
531
553
|
rb_yield(rb_ary_new3(4, symELEMENT_DECL,
|
532
|
-
TO_(rb_str_new2(name)),
|
554
|
+
TO_(ENC_(rb_str_new2(name))),
|
533
555
|
content, recv));
|
534
556
|
if (parser->defaultCurrent) {
|
535
557
|
parser->defaultCurrent = 0;
|
@@ -550,12 +572,12 @@ iterAttlistDeclHandler (void *recv,
|
|
550
572
|
|
551
573
|
GET_PARSER(recv, parser);
|
552
574
|
valary = rb_ary_new3(4,
|
553
|
-
TO_(rb_str_new2((char*)attname)),
|
554
|
-
TO_(rb_str_new2((char*)att_type)),
|
555
|
-
(dflt ? TO_(rb_str_new2((char*)dflt)) : Qnil),
|
575
|
+
TO_(ENC_(rb_str_new2((char*)attname))),
|
576
|
+
TO_(ENC_(rb_str_new2((char*)att_type))),
|
577
|
+
(dflt ? TO_(ENC_(rb_str_new2((char*)dflt))) : Qnil),
|
556
578
|
(isrequired ? Qtrue : Qfalse));
|
557
579
|
rb_yield(rb_ary_new3(4, symATTLIST_DECL,
|
558
|
-
TO_(rb_str_new2(elname)),
|
580
|
+
TO_(ENC_(rb_str_new2(elname))),
|
559
581
|
valary, recv));
|
560
582
|
if (parser->defaultCurrent) {
|
561
583
|
parser->defaultCurrent = 0;
|
@@ -574,8 +596,8 @@ iterXmlDeclHandler (void *recv,
|
|
574
596
|
|
575
597
|
GET_PARSER(recv, parser);
|
576
598
|
valary = rb_ary_new3(3,
|
577
|
-
(version ? TO_(rb_str_new2(version)) : Qnil),
|
578
|
-
(encoding ? TO_(rb_str_new2((char*)encoding)) : Qnil),
|
599
|
+
(version ? TO_(ENC_(rb_str_new2(version))) : Qnil),
|
600
|
+
(encoding ? TO_(ENC_(rb_str_new2((char*)encoding))) : Qnil),
|
579
601
|
INT2FIX(standalone));
|
580
602
|
rb_yield(rb_ary_new3(4, symXML_DECL,
|
581
603
|
Qnil,
|
@@ -603,14 +625,14 @@ iterEntityDeclHandler (void *recv,
|
|
603
625
|
GET_PARSER(recv, parser);
|
604
626
|
valary = rb_ary_new3(6,
|
605
627
|
(is_parameter_entity ? Qtrue : Qfalse),
|
606
|
-
TO_(rb_str_new((char*)value, value_length)),
|
607
|
-
(base ? TO_(rb_str_new2((char*)base)) : Qnil),
|
608
|
-
(systemId ? TO_(rb_str_new2((char*)systemId)) : Qnil),
|
609
|
-
(publicId ? TO_(rb_str_new2((char*)publicId)) : Qnil),
|
610
|
-
(notationName ? TO_(rb_str_new2((char*)notationName))
|
628
|
+
TO_(ENC_(rb_str_new((char*)value, value_length))),
|
629
|
+
(base ? TO_(ENC_(rb_str_new2((char*)base))) : Qnil),
|
630
|
+
(systemId ? TO_(ENC_(rb_str_new2((char*)systemId))) : Qnil),
|
631
|
+
(publicId ? TO_(ENC_(rb_str_new2((char*)publicId))) : Qnil),
|
632
|
+
(notationName ? TO_(ENC_(rb_str_new2((char*)notationName)))
|
611
633
|
: Qnil));
|
612
634
|
rb_yield(rb_ary_new3(4, symENTITY_DECL,
|
613
|
-
TO_(rb_str_new2(entityName)),
|
635
|
+
TO_(ENC_(rb_str_new2(entityName))),
|
614
636
|
valary, recv));
|
615
637
|
if (parser->defaultCurrent) {
|
616
638
|
parser->defaultCurrent = 0;
|
@@ -632,11 +654,11 @@ iterExternalParsedEntityDeclHandler(void *recv,
|
|
632
654
|
VALUE valary;
|
633
655
|
|
634
656
|
GET_PARSER(recv, parser);
|
635
|
-
valary = rb_ary_new3(3, (base ? TO_(rb_str_new2((char*)base)) : Qnil),
|
636
|
-
TO_(rb_str_new2((char*)systemId)),
|
637
|
-
(publicId ? TO_(rb_str_new2((char*)publicId)) : Qnil));
|
657
|
+
valary = rb_ary_new3(3, (base ? TO_(ENC_(rb_str_new2((char*)base))) : Qnil),
|
658
|
+
TO_(ENC_(rb_str_new2((char*)systemId))),
|
659
|
+
(publicId ? TO_(ENC_(rb_str_new2((char*)publicId))) : Qnil));
|
638
660
|
rb_yield(rb_ary_new3(4, symEXTERNAL_PARSED_ENTITY_DECL,
|
639
|
-
TO_(rb_str_new2((char*)entityName)),
|
661
|
+
TO_(ENC_(rb_str_new2((char*)entityName))),
|
640
662
|
valary, recv));
|
641
663
|
if (parser->defaultCurrent) {
|
642
664
|
parser->defaultCurrent = 0;
|
@@ -653,9 +675,9 @@ iterInternalParsedEntityDeclHandler(void *recv,
|
|
653
675
|
XMLParser* parser;
|
654
676
|
GET_PARSER(recv, parser);
|
655
677
|
rb_yield(rb_ary_new3(4, symINTERNAL_PARSED_ENTITY_DECL,
|
656
|
-
TO_(rb_str_new2((char*)entityName)),
|
657
|
-
TO_(rb_str_new((char*)replacementText,
|
658
|
-
|
678
|
+
TO_(ENC_(rb_str_new2((char*)entityName))),
|
679
|
+
TO_(ENC_(rb_str_new((char*)replacementText,
|
680
|
+
replacementTextLength))), recv));
|
659
681
|
if (parser->defaultCurrent) {
|
660
682
|
parser->defaultCurrent = 0;
|
661
683
|
XML_DefaultCurrent(parser->parser);
|
@@ -672,7 +694,7 @@ iterSkippedEntityHandler(void *recv,
|
|
672
694
|
XMLParser* parser;
|
673
695
|
GET_PARSER(recv, parser);
|
674
696
|
rb_yield(rb_ary_new3(4, symSKIPPED_ENTITY,
|
675
|
-
TO_(rb_str_new2((char*)entityName)),
|
697
|
+
TO_(ENC_(rb_str_new2((char*)entityName))),
|
676
698
|
INT2FIX(is_parameter_entity), recv));
|
677
699
|
if (parser->defaultCurrent) {
|
678
700
|
parser->defaultCurrent = 0;
|
@@ -700,11 +722,11 @@ myStartElementHandler(void *recv,
|
|
700
722
|
const char* key = *atts++;
|
701
723
|
const char* val = *atts++;
|
702
724
|
rb_hash_aset(attrhash,
|
703
|
-
FO_(TO_(rb_str_new2((char*)key))),
|
704
|
-
TO_(rb_str_new2((char*)val)));
|
725
|
+
FO_(TO_(ENC_(rb_str_new2((char*)key)))),
|
726
|
+
TO_(ENC_(rb_str_new2((char*)val))));
|
705
727
|
}
|
706
728
|
rb_funcall((VALUE)recv, id_startElementHandler, 2,
|
707
|
-
TO_(rb_str_new2((char*)name)), attrhash);
|
729
|
+
TO_(ENC_(rb_str_new2((char*)name))), attrhash);
|
708
730
|
}
|
709
731
|
|
710
732
|
static void
|
@@ -714,7 +736,7 @@ myEndElementHandler(void *recv,
|
|
714
736
|
XMLParser* parser;
|
715
737
|
GET_PARSER(recv, parser);
|
716
738
|
rb_funcall((VALUE)recv, id_endElementHandler, 1,
|
717
|
-
TO_(rb_str_new2((char*)name)));
|
739
|
+
TO_(ENC_(rb_str_new2((char*)name))));
|
718
740
|
}
|
719
741
|
|
720
742
|
static void
|
@@ -725,7 +747,7 @@ myCharacterDataHandler(void *recv,
|
|
725
747
|
XMLParser* parser;
|
726
748
|
GET_PARSER(recv, parser);
|
727
749
|
rb_funcall((VALUE)recv, id_characterDataHandler, 1,
|
728
|
-
TO_(rb_str_new((char*)s, len)));
|
750
|
+
TO_(ENC_(rb_str_new((char*)s, len))));
|
729
751
|
}
|
730
752
|
|
731
753
|
static void
|
@@ -736,8 +758,8 @@ myProcessingInstructionHandler(void *recv,
|
|
736
758
|
XMLParser* parser;
|
737
759
|
GET_PARSER(recv, parser);
|
738
760
|
rb_funcall((VALUE)recv, id_processingInstructionHandler, 2,
|
739
|
-
TO_(rb_str_new2((char*)target)),
|
740
|
-
TO_(rb_str_new2((char*)data)));
|
761
|
+
TO_(ENC_(rb_str_new2((char*)target))),
|
762
|
+
TO_(ENC_(rb_str_new2((char*)data))));
|
741
763
|
}
|
742
764
|
|
743
765
|
static void
|
@@ -748,7 +770,7 @@ myDefaultHandler(void *recv,
|
|
748
770
|
XMLParser* parser;
|
749
771
|
GET_PARSER(recv, parser);
|
750
772
|
rb_funcall((VALUE)recv, id_defaultHandler, 1,
|
751
|
-
TO_(rb_str_new((char*)s, len)));
|
773
|
+
TO_(ENC_(rb_str_new((char*)s, len))));
|
752
774
|
}
|
753
775
|
|
754
776
|
#ifdef NEW_EXPAT
|
@@ -760,7 +782,7 @@ myDefaultExpandHandler(void *recv,
|
|
760
782
|
XMLParser* parser;
|
761
783
|
GET_PARSER(recv, parser);
|
762
784
|
rb_funcall((VALUE)recv, id_defaultExpandHandler, 1,
|
763
|
-
TO_(rb_str_new((char*)s, len)));
|
785
|
+
TO_(ENC_(rb_str_new((char*)s, len))));
|
764
786
|
}
|
765
787
|
#endif
|
766
788
|
|
@@ -775,11 +797,11 @@ myUnparsedEntityDeclHandler(void *recv,
|
|
775
797
|
XMLParser* parser;
|
776
798
|
GET_PARSER(recv, parser);
|
777
799
|
rb_funcall((VALUE)recv, id_unparsedEntityDeclHandler, 5,
|
778
|
-
TO_(rb_str_new2((char*)entityName)),
|
779
|
-
(base ? TO_(rb_str_new2((char*)base)) : Qnil),
|
780
|
-
TO_(rb_str_new2((char*)systemId)),
|
781
|
-
(publicId ? TO_(rb_str_new2((char*)publicId)) : Qnil),
|
782
|
-
TO_(rb_str_new2((char*)notationName)));
|
800
|
+
TO_(ENC_(rb_str_new2((char*)entityName))),
|
801
|
+
(base ? TO_(ENC_(rb_str_new2((char*)base))) : Qnil),
|
802
|
+
TO_(ENC_(rb_str_new2((char*)systemId))),
|
803
|
+
(publicId ? TO_(ENC_(rb_str_new2((char*)publicId))) : Qnil),
|
804
|
+
TO_(ENC_(rb_str_new2((char*)notationName))));
|
783
805
|
}
|
784
806
|
|
785
807
|
void
|
@@ -792,10 +814,10 @@ myNotationDeclHandler(void *recv,
|
|
792
814
|
XMLParser* parser;
|
793
815
|
GET_PARSER(recv, parser);
|
794
816
|
rb_funcall((VALUE)recv, id_notationDeclHandler, 4,
|
795
|
-
TO_(rb_str_new2((char*)notationName)),
|
796
|
-
(base ? TO_(rb_str_new2((char*)base)) : Qnil),
|
797
|
-
(systemId ? TO_(rb_str_new2((char*)systemId)) : Qnil),
|
798
|
-
(publicId ? TO_(rb_str_new2((char*)publicId)) : Qnil));
|
817
|
+
TO_(ENC_(rb_str_new2((char*)notationName))),
|
818
|
+
(base ? TO_(ENC_(rb_str_new2((char*)base))) : Qnil),
|
819
|
+
(systemId ? TO_(ENC_(rb_str_new2((char*)systemId))) : Qnil),
|
820
|
+
(publicId ? TO_(ENC_(rb_str_new2((char*)publicId))) : Qnil));
|
799
821
|
}
|
800
822
|
|
801
823
|
int
|
@@ -812,10 +834,10 @@ myExternalEntityRefHandler(XML_Parser xmlparser,
|
|
812
834
|
recv = (VALUE)XML_GetUserData(xmlparser);
|
813
835
|
GET_PARSER(recv, parser);
|
814
836
|
ret = rb_funcall(recv, id_externalEntityRefHandler, 4,
|
815
|
-
(context ? TO_(rb_str_new2((char*)context)): Qnil),
|
816
|
-
(base ? TO_(rb_str_new2((char*)base)) : Qnil),
|
817
|
-
(systemId ? TO_(rb_str_new2((char*)systemId)) : Qnil),
|
818
|
-
(publicId ? TO_(rb_str_new2((char*)publicId)) : Qnil));
|
837
|
+
(context ? TO_(ENC_(rb_str_new2((char*)context))): Qnil),
|
838
|
+
(base ? TO_(ENC_(rb_str_new2((char*)base))) : Qnil),
|
839
|
+
(systemId ? TO_(ENC_(rb_str_new2((char*)systemId))) : Qnil),
|
840
|
+
(publicId ? TO_(ENC_(rb_str_new2((char*)publicId))) : Qnil));
|
819
841
|
/* The error status in this handler should be returned
|
820
842
|
by the exception. */
|
821
843
|
return Qnil;
|
@@ -829,7 +851,7 @@ myCommentHandler(void *recv,
|
|
829
851
|
XMLParser* parser;
|
830
852
|
GET_PARSER(recv, parser);
|
831
853
|
rb_funcall((VALUE)recv, id_commentHandler, 1,
|
832
|
-
TO_(rb_str_new2((char*)s)));
|
854
|
+
TO_(ENC_(rb_str_new2((char*)s))));
|
833
855
|
}
|
834
856
|
|
835
857
|
static void
|
@@ -856,8 +878,8 @@ myStartNamespaceDeclHandler(void *recv,
|
|
856
878
|
XMLParser* parser;
|
857
879
|
GET_PARSER(recv, parser);
|
858
880
|
rb_funcall((VALUE)recv, id_startNamespaceDeclHandler, 2,
|
859
|
-
(prefix ? TO_(rb_str_new2((char*)prefix)) : Qnil),
|
860
|
-
(uri ? TO_(rb_str_new2((char*)uri)) : Qnil));
|
881
|
+
(prefix ? TO_(ENC_(rb_str_new2((char*)prefix))) : Qnil),
|
882
|
+
(uri ? TO_(ENC_(rb_str_new2((char*)uri))) : Qnil));
|
861
883
|
}
|
862
884
|
|
863
885
|
static void
|
@@ -867,7 +889,7 @@ myEndNamespaceDeclHandler(void *recv,
|
|
867
889
|
XMLParser* parser;
|
868
890
|
GET_PARSER(recv, parser);
|
869
891
|
rb_funcall((VALUE)recv, id_endNamespaceDeclHandler, 1,
|
870
|
-
(prefix ? TO_(rb_str_new2((char*)prefix)) : Qnil));
|
892
|
+
(prefix ? TO_(ENC_(rb_str_new2((char*)prefix))) : Qnil));
|
871
893
|
}
|
872
894
|
|
873
895
|
static int
|
@@ -900,13 +922,13 @@ myStartDoctypeDeclHandler(void *recv,
|
|
900
922
|
GET_PARSER(recv, parser);
|
901
923
|
#ifdef HAVE_EXPAT_H
|
902
924
|
rb_funcall((VALUE)recv, id_startDoctypeDeclHandler, 4,
|
903
|
-
TO_(rb_str_new2((char*)doctypeName)),
|
904
|
-
(sysid ? TO_(rb_str_new2((char*)sysid)) : Qnil),
|
905
|
-
(pubid ? TO_(rb_str_new2((char*)pubid)) : Qnil),
|
925
|
+
TO_(ENC_(rb_str_new2((char*)doctypeName))),
|
926
|
+
(sysid ? TO_(ENC_(rb_str_new2((char*)sysid))) : Qnil),
|
927
|
+
(pubid ? TO_(ENC_(rb_str_new2((char*)pubid))) : Qnil),
|
906
928
|
(has_internal_subset ? Qtrue : Qfalse));
|
907
929
|
#else
|
908
930
|
rb_funcall((VALUE)recv, id_startDoctypeDeclHandler, 4,
|
909
|
-
TO_(rb_str_new2((char*)doctypeName)),
|
931
|
+
TO_(ENC_(rb_str_new2((char*)doctypeName))),
|
910
932
|
Qnil, Qnil, Qfalse);
|
911
933
|
#endif
|
912
934
|
}
|
@@ -929,10 +951,11 @@ myElementDeclHandler(void *recv,
|
|
929
951
|
XML_Content *model)
|
930
952
|
{
|
931
953
|
XMLParser* parser;
|
954
|
+
VALUE content;
|
932
955
|
GET_PARSER(recv, parser);
|
933
|
-
|
956
|
+
content = makeContentArray(parser, model);
|
934
957
|
rb_funcall((VALUE)recv, id_elementDeclHandler, 2,
|
935
|
-
TO_(rb_str_new2(name)), content);
|
958
|
+
TO_(ENC_(rb_str_new2(name))), content);
|
936
959
|
}
|
937
960
|
|
938
961
|
static void
|
@@ -946,10 +969,10 @@ myAttlistDeclHandler (void *recv,
|
|
946
969
|
XMLParser* parser;
|
947
970
|
GET_PARSER(recv, parser);
|
948
971
|
rb_funcall((VALUE)recv, id_attlistDeclHandler, 5,
|
949
|
-
TO_(rb_str_new2(elname)),
|
950
|
-
TO_(rb_str_new2((char*)attname)),
|
951
|
-
TO_(rb_str_new2((char*)att_type)),
|
952
|
-
(dflt ? TO_(rb_str_new2((char*)dflt)) : Qnil),
|
972
|
+
TO_(ENC_(rb_str_new2(elname))),
|
973
|
+
TO_(ENC_(rb_str_new2((char*)attname))),
|
974
|
+
TO_(ENC_(rb_str_new2((char*)att_type))),
|
975
|
+
(dflt ? TO_(ENC_(rb_str_new2((char*)dflt))) : Qnil),
|
953
976
|
(isrequired ? Qtrue : Qfalse));
|
954
977
|
}
|
955
978
|
|
@@ -962,8 +985,8 @@ myXmlDeclHandler (void *recv,
|
|
962
985
|
XMLParser* parser;
|
963
986
|
GET_PARSER(recv, parser);
|
964
987
|
rb_funcall((VALUE)recv, id_xmlDeclHandler, 3,
|
965
|
-
(version ? TO_(rb_str_new2(version)) : Qnil),
|
966
|
-
(encoding ? TO_(rb_str_new2((char*)encoding)) : Qnil),
|
988
|
+
(version ? TO_(ENC_(rb_str_new2(version))) : Qnil),
|
989
|
+
(encoding ? TO_(ENC_(rb_str_new2((char*)encoding))) : Qnil),
|
967
990
|
INT2FIX(standalone));
|
968
991
|
}
|
969
992
|
|
@@ -981,13 +1004,13 @@ myEntityDeclHandler (void *recv,
|
|
981
1004
|
XMLParser* parser;
|
982
1005
|
GET_PARSER(recv, parser);
|
983
1006
|
rb_funcall((VALUE)recv, id_entityDeclHandler, 7,
|
984
|
-
TO_(rb_str_new2(entityName)),
|
1007
|
+
TO_(ENC_(rb_str_new2(entityName))),
|
985
1008
|
(is_parameter_entity ? Qtrue : Qfalse),
|
986
|
-
TO_(rb_str_new((char*)value, value_length)),
|
987
|
-
(base ? TO_(rb_str_new2((char*)base)) : Qnil),
|
988
|
-
(systemId ? TO_(rb_str_new2((char*)systemId)) : Qnil),
|
989
|
-
(publicId ? TO_(rb_str_new2((char*)publicId)) : Qnil),
|
990
|
-
(notationName ? TO_(rb_str_new2((char*)notationName))
|
1009
|
+
TO_(ENC_(rb_str_new((char*)value, value_length))),
|
1010
|
+
(base ? TO_(ENC_(rb_str_new2((char*)base))) : Qnil),
|
1011
|
+
(systemId ? TO_(ENC_(rb_str_new2((char*)systemId))) : Qnil),
|
1012
|
+
(publicId ? TO_(ENC_(rb_str_new2((char*)publicId))) : Qnil),
|
1013
|
+
(notationName ? TO_(ENC_(rb_str_new2((char*)notationName)))
|
991
1014
|
: Qnil));
|
992
1015
|
}
|
993
1016
|
|
@@ -1004,10 +1027,10 @@ myExternalParsedEntityDeclHandler(void *recv,
|
|
1004
1027
|
XMLParser* parser;
|
1005
1028
|
GET_PARSER(recv, parser);
|
1006
1029
|
rb_funcall((VALUE)recv, id_externalParsedEntityDeclHandler, 4,
|
1007
|
-
TO_(rb_str_new2((char*)entityName)),
|
1008
|
-
(base ? TO_(rb_str_new2((char*)base)) : Qnil),
|
1009
|
-
TO_(rb_str_new2((char*)systemId)),
|
1010
|
-
(publicId ? TO_(rb_str_new2((char*)publicId)) : Qnil));
|
1030
|
+
TO_(ENC_(rb_str_new2((char*)entityName))),
|
1031
|
+
(base ? TO_(ENC_(rb_str_new2((char*)base))) : Qnil),
|
1032
|
+
TO_(ENC_(rb_str_new2((char*)systemId))),
|
1033
|
+
(publicId ? TO_(ENC_(rb_str_new2((char*)publicId))) : Qnil));
|
1011
1034
|
}
|
1012
1035
|
|
1013
1036
|
static void
|
@@ -1019,9 +1042,9 @@ myInternalParsedEntityDeclHandler(void *recv,
|
|
1019
1042
|
XMLParser* parser;
|
1020
1043
|
GET_PARSER(recv, parser);
|
1021
1044
|
rb_funcall((VALUE)recv, id_internalParsedEntityDeclHandler, 2,
|
1022
|
-
TO_(rb_str_new2((char*)entityName)),
|
1023
|
-
TO_(rb_str_new((char*)replacementText,
|
1024
|
-
|
1045
|
+
TO_(ENC_(rb_str_new2((char*)entityName))),
|
1046
|
+
TO_(ENC_(rb_str_new((char*)replacementText,
|
1047
|
+
replacementTextLength))));
|
1025
1048
|
}
|
1026
1049
|
#endif
|
1027
1050
|
|
@@ -1043,21 +1066,21 @@ myEncodingConv(void *data, const char *s)
|
|
1043
1066
|
{
|
1044
1067
|
VALUE v;
|
1045
1068
|
int len;
|
1046
|
-
int slen =
|
1047
|
-
|
1069
|
+
int slen = RSTRING_PTR(rb_ivar_get((VALUE)data,
|
1070
|
+
id_map))[*(unsigned char*)s];
|
1048
1071
|
|
1049
|
-
v = rb_funcall((VALUE)data, id_convert, 1, rb_str_new((char*)s, -slen));
|
1072
|
+
v = rb_funcall((VALUE)data, id_convert, 1, ENC_(rb_str_new((char*)s, -slen)));
|
1050
1073
|
switch (TYPE(v)) {
|
1051
1074
|
case T_FIXNUM:
|
1052
1075
|
return FIX2INT(v);
|
1053
1076
|
case T_STRING:
|
1054
|
-
len =
|
1077
|
+
len = RSTRING_LEN(v);
|
1055
1078
|
if (len == 1) {
|
1056
|
-
return (unsigned char)*(
|
1079
|
+
return (unsigned char)*RSTRING_PTR(v);
|
1057
1080
|
}
|
1058
1081
|
else if (len >= 2) {
|
1059
|
-
return (unsigned char)*(
|
1060
|
-
(unsigned char)*(
|
1082
|
+
return (unsigned char)*RSTRING_PTR(v) |
|
1083
|
+
(unsigned char)*(RSTRING_PTR(v) + 1) << 8;
|
1061
1084
|
}
|
1062
1085
|
}
|
1063
1086
|
return 0;
|
@@ -1077,7 +1100,7 @@ iterUnknownEncodingHandler(void *recv,
|
|
1077
1100
|
|
1078
1101
|
GET_PARSER(recv, parser);
|
1079
1102
|
ret = rb_yield(rb_ary_new3(4, symUNKNOWN_ENCODING,
|
1080
|
-
TO_(rb_str_new2((char*)name)), Qnil, recv));
|
1103
|
+
TO_(ENC_(rb_str_new2((char*)name))), Qnil, recv));
|
1081
1104
|
if (TYPE(ret) == T_OBJECT && rb_obj_is_kind_of(ret, cXMLEncoding)) {
|
1082
1105
|
int i;
|
1083
1106
|
ID mid = rb_intern("map");
|
@@ -1086,7 +1109,7 @@ iterUnknownEncodingHandler(void *recv,
|
|
1086
1109
|
|
1087
1110
|
for (i = 0; i < 256; i++) {
|
1088
1111
|
VALUE m = rb_funcall(ret, mid, 1, INT2FIX(i));
|
1089
|
-
|
1112
|
+
RSTRING_PTR(cmap)[i] = info->map[i] = FIX2INT(m);
|
1090
1113
|
}
|
1091
1114
|
/* protect object form GC */
|
1092
1115
|
rb_ivar_set(recv, rb_intern("_encoding"), ret);
|
@@ -1241,8 +1264,6 @@ findEncoding(const char* encname)
|
|
1241
1264
|
file[len] = tolower(*p);
|
1242
1265
|
}
|
1243
1266
|
file[len] = '\0';
|
1244
|
-
// if (len < PATH_MAX - sizeof(encext))
|
1245
|
-
// strcat(file, encext);
|
1246
1267
|
strncat(file, encext, PATH_MAX - len -1);
|
1247
1268
|
|
1248
1269
|
if ((fp = fopen(file, "rb")) == NULL) {
|
@@ -1274,6 +1295,10 @@ myUnknownEncodingHandler(void *recv,
|
|
1274
1295
|
{
|
1275
1296
|
XMLParser* parser;
|
1276
1297
|
VALUE ret;
|
1298
|
+
|
1299
|
+
GET_PARSER(recv, parser);
|
1300
|
+
parser->detectedEncoding = name;
|
1301
|
+
|
1277
1302
|
if (!rb_method_boundp(CLASS_OF((VALUE)recv), id_unknownEncoding, 0))
|
1278
1303
|
#ifndef XML_ENC_PATH
|
1279
1304
|
return 0;
|
@@ -1293,9 +1318,8 @@ myUnknownEncodingHandler(void *recv,
|
|
1293
1318
|
}
|
1294
1319
|
#endif
|
1295
1320
|
|
1296
|
-
GET_PARSER(recv, parser);
|
1297
1321
|
ret = rb_funcall((VALUE)recv, id_unknownEncoding, 1,
|
1298
|
-
TO_(rb_str_new2((char*)name)));
|
1322
|
+
TO_(ENC_(rb_str_new2((char*)name))));
|
1299
1323
|
if (TYPE(ret) == T_OBJECT && rb_obj_is_kind_of(ret, cXMLEncoding)) {
|
1300
1324
|
int i;
|
1301
1325
|
ID mid = rb_intern("map");
|
@@ -1308,7 +1332,7 @@ myUnknownEncodingHandler(void *recv,
|
|
1308
1332
|
|
1309
1333
|
for (i = 0; i < 256; i++) {
|
1310
1334
|
VALUE m = rb_funcall(ret, mid, 1, INT2FIX(i));
|
1311
|
-
|
1335
|
+
RSTRING_PTR(cmap)[i] = info->map[i] = FIX2INT(m);
|
1312
1336
|
}
|
1313
1337
|
/* protect object form GC */
|
1314
1338
|
rb_ivar_set((VALUE)recv, rb_intern("_encoding"), ret);
|
@@ -1330,7 +1354,7 @@ mySkippedEntityHandler(void *recv,
|
|
1330
1354
|
XMLParser* parser;
|
1331
1355
|
GET_PARSER(recv, parser);
|
1332
1356
|
rb_funcall((VALUE)recv, id_skippedEntityHandler, 2,
|
1333
|
-
TO_(rb_str_new2((char*)entityName)),
|
1357
|
+
TO_(ENC_(rb_str_new2((char*)entityName))),
|
1334
1358
|
INT2FIX(is_parameter_entity));
|
1335
1359
|
}
|
1336
1360
|
#endif
|
@@ -1359,7 +1383,7 @@ XMLParser_new(int argc, VALUE* argv, VALUE klass)
|
|
1359
1383
|
/* new(encoding) */
|
1360
1384
|
if (TYPE(arg1) != T_NIL) {
|
1361
1385
|
Check_Type(arg1, T_STRING); /* encoding */
|
1362
|
-
encoding =
|
1386
|
+
encoding = RSTRING_PTR(arg1);
|
1363
1387
|
}
|
1364
1388
|
}
|
1365
1389
|
else if (count == 2) {
|
@@ -1369,10 +1393,10 @@ XMLParser_new(int argc, VALUE* argv, VALUE klass)
|
|
1369
1393
|
if (TYPE(arg1) != T_DATA) {
|
1370
1394
|
if (TYPE(arg1) != T_NIL) {
|
1371
1395
|
Check_Type(arg1, T_STRING); /* encoding */
|
1372
|
-
encoding =
|
1396
|
+
encoding = RSTRING_PTR(arg1);
|
1373
1397
|
}
|
1374
1398
|
Check_Type(arg2, T_STRING); /* nschar */
|
1375
|
-
nssep =
|
1399
|
+
nssep = RSTRING_PTR(arg2);
|
1376
1400
|
}
|
1377
1401
|
else {
|
1378
1402
|
#endif
|
@@ -1380,7 +1404,7 @@ XMLParser_new(int argc, VALUE* argv, VALUE klass)
|
|
1380
1404
|
GET_PARSER(arg1, rootparser);
|
1381
1405
|
if (!NIL_P(arg2)) {
|
1382
1406
|
Check_Type(arg2, T_STRING); /* context */
|
1383
|
-
context =
|
1407
|
+
context = RSTRING_PTR(arg2);
|
1384
1408
|
}
|
1385
1409
|
parent = arg1;
|
1386
1410
|
#ifdef NEW_EXPAT
|
@@ -1393,10 +1417,10 @@ XMLParser_new(int argc, VALUE* argv, VALUE klass)
|
|
1393
1417
|
GET_PARSER(arg1, rootparser);
|
1394
1418
|
if (!NIL_P(arg2)) {
|
1395
1419
|
Check_Type(arg2, T_STRING); /* context */
|
1396
|
-
context =
|
1420
|
+
context = RSTRING_PTR(arg2);
|
1397
1421
|
}
|
1398
1422
|
Check_Type(arg3, T_STRING); /* encoding */
|
1399
|
-
encoding =
|
1423
|
+
encoding = RSTRING_PTR(arg3);
|
1400
1424
|
parent = arg1;
|
1401
1425
|
}
|
1402
1426
|
|
@@ -1465,6 +1489,7 @@ XMLParser_new(int argc, VALUE* argv, VALUE klass)
|
|
1465
1489
|
parser->lastAttrs = NULL;
|
1466
1490
|
#endif
|
1467
1491
|
parser->parent = parent;
|
1492
|
+
parser->detectedEncoding = NULL;
|
1468
1493
|
|
1469
1494
|
rb_obj_call_init(obj, argc, argv);
|
1470
1495
|
|
@@ -1491,7 +1516,7 @@ XMLParser_reset(int argc, VALUE* argv, VALUE obj)
|
|
1491
1516
|
GET_PARSER(obj, parser);
|
1492
1517
|
if (count > 0 && TYPE(vencoding) != T_NIL) {
|
1493
1518
|
Check_Type(vencoding, T_STRING);
|
1494
|
-
encoding =
|
1519
|
+
encoding = RSTRING_PTR(vencoding);
|
1495
1520
|
}
|
1496
1521
|
XML_ParserReset(parser->parser, encoding);
|
1497
1522
|
/* setting up internal data */
|
@@ -1502,17 +1527,14 @@ XMLParser_reset(int argc, VALUE* argv, VALUE obj)
|
|
1502
1527
|
parser->lastAttrs = NULL;
|
1503
1528
|
#endif
|
1504
1529
|
parser->tainted = 0;
|
1530
|
+
parser->detectedEncoding = NULL;
|
1505
1531
|
|
1506
1532
|
return obj;
|
1507
1533
|
}
|
1508
1534
|
#endif
|
1509
1535
|
|
1510
|
-
|
1511
|
-
|
1512
|
-
XMLParser_parse(int argc, VALUE* argv, VALUE obj)
|
1513
|
-
{
|
1514
|
-
XMLParser* parser;
|
1515
|
-
int ret;
|
1536
|
+
static void
|
1537
|
+
setup_evnet_handlers(XMLParser* parser, VALUE obj) {
|
1516
1538
|
XML_StartElementHandler start = NULL;
|
1517
1539
|
XML_EndElementHandler end = NULL;
|
1518
1540
|
#ifdef NEW_EXPAT
|
@@ -1525,38 +1547,6 @@ XMLParser_parse(int argc, VALUE* argv, VALUE obj)
|
|
1525
1547
|
XML_StartDoctypeDeclHandler startDoctype = NULL;
|
1526
1548
|
XML_EndDoctypeDeclHandler endDoctype = NULL;
|
1527
1549
|
#endif
|
1528
|
-
VALUE str;
|
1529
|
-
VALUE isFinal;
|
1530
|
-
int final = 1;
|
1531
|
-
int count;
|
1532
|
-
int fromStream = 0;
|
1533
|
-
ID mid = rb_intern("gets");
|
1534
|
-
ID linebuf = rb_intern("_linebuf");
|
1535
|
-
|
1536
|
-
count = rb_scan_args(argc, argv, "02", &str, &isFinal);
|
1537
|
-
/* If "str" has public "gets" method, it will be considered *stream* */
|
1538
|
-
if (!rb_obj_is_kind_of(str, rb_cString) &&
|
1539
|
-
rb_method_boundp(CLASS_OF(str), mid, 1)) {
|
1540
|
-
fromStream = 1;
|
1541
|
-
}
|
1542
|
-
else if (!NIL_P(str)) {
|
1543
|
-
Check_Type(str, T_STRING);
|
1544
|
-
}
|
1545
|
-
if (count >= 2) {
|
1546
|
-
if (isFinal == Qtrue)
|
1547
|
-
final = 1;
|
1548
|
-
else if (isFinal == Qfalse)
|
1549
|
-
final = 0;
|
1550
|
-
else
|
1551
|
-
rb_raise(rb_eTypeError, "not valid value");
|
1552
|
-
}
|
1553
|
-
|
1554
|
-
GET_PARSER(obj, parser);
|
1555
|
-
|
1556
|
-
// parser->iterator = rb_iterator_p();
|
1557
|
-
parser->iterator = rb_block_given_p();
|
1558
|
-
|
1559
|
-
/* Setup event handlers */
|
1560
1550
|
|
1561
1551
|
/* Call as iterator */
|
1562
1552
|
if (parser->iterator) {
|
@@ -1726,6 +1716,47 @@ XMLParser_parse(int argc, VALUE* argv, VALUE obj)
|
|
1726
1716
|
XML_SetSkippedEntityHandler(parser->parser, mySkippedEntityHandler);
|
1727
1717
|
#endif
|
1728
1718
|
}
|
1719
|
+
}
|
1720
|
+
|
1721
|
+
|
1722
|
+
/* parse method */
|
1723
|
+
static VALUE
|
1724
|
+
XMLParser_parse(int argc, VALUE* argv, VALUE obj)
|
1725
|
+
{
|
1726
|
+
XMLParser* parser;
|
1727
|
+
int ret;
|
1728
|
+
VALUE str;
|
1729
|
+
VALUE isFinal;
|
1730
|
+
int final = 1;
|
1731
|
+
int count;
|
1732
|
+
int fromStream = 0;
|
1733
|
+
ID mid = rb_intern("gets");
|
1734
|
+
ID linebuf = rb_intern("_linebuf");
|
1735
|
+
|
1736
|
+
count = rb_scan_args(argc, argv, "02", &str, &isFinal);
|
1737
|
+
/* If "str" has public "gets" method, it will be considered *stream* */
|
1738
|
+
if (!rb_obj_is_kind_of(str, rb_cString) &&
|
1739
|
+
rb_method_boundp(CLASS_OF(str), mid, 1)) {
|
1740
|
+
fromStream = 1;
|
1741
|
+
}
|
1742
|
+
else if (!NIL_P(str)) {
|
1743
|
+
Check_Type(str, T_STRING);
|
1744
|
+
}
|
1745
|
+
if (count >= 2) {
|
1746
|
+
if (isFinal == Qtrue)
|
1747
|
+
final = 1;
|
1748
|
+
else if (isFinal == Qfalse)
|
1749
|
+
final = 0;
|
1750
|
+
else
|
1751
|
+
rb_raise(rb_eTypeError, "not valid value");
|
1752
|
+
}
|
1753
|
+
|
1754
|
+
GET_PARSER(obj, parser);
|
1755
|
+
|
1756
|
+
parser->iterator = rb_block_given_p();
|
1757
|
+
|
1758
|
+
/* Setup event handlers */
|
1759
|
+
setup_evnet_handlers(parser, obj);
|
1729
1760
|
|
1730
1761
|
/* Parse from stream (probably slightly slow) */
|
1731
1762
|
if (fromStream) {
|
@@ -1741,7 +1772,7 @@ XMLParser_parse(int argc, VALUE* argv, VALUE obj)
|
|
1741
1772
|
taintParser(parser);
|
1742
1773
|
rb_ivar_set(obj, linebuf, buf); /* protect buf from GC (reasonable?)*/
|
1743
1774
|
ret = XML_Parse(parser->parser,
|
1744
|
-
|
1775
|
+
RSTRING_PTR(buf), RSTRING_LEN(buf), 0);
|
1745
1776
|
}
|
1746
1777
|
else {
|
1747
1778
|
ret = XML_Parse(parser->parser, NULL, 0, 1);
|
@@ -1757,10 +1788,41 @@ XMLParser_parse(int argc, VALUE* argv, VALUE obj)
|
|
1757
1788
|
|
1758
1789
|
/* Parse string */
|
1759
1790
|
if (!NIL_P(str)) {
|
1791
|
+
#if defined(HAVE_RUBY_ENCODING_H) && defined(HAVE_XML_PARSERRESET)
|
1792
|
+
int err;
|
1793
|
+
#endif
|
1760
1794
|
if (OBJ_TAINTED(str))
|
1761
1795
|
taintParser(parser);
|
1762
1796
|
ret = XML_Parse(parser->parser,
|
1763
|
-
|
1797
|
+
RSTRING_PTR(str), RSTRING_LEN(str), final);
|
1798
|
+
#if defined(HAVE_RUBY_ENCODING_H) && defined(HAVE_XML_PARSERRESET)
|
1799
|
+
/* Ruby 1.9.1 Encoding conversion */
|
1800
|
+
err = XML_GetErrorCode(parser->parser);
|
1801
|
+
if (final && err == XML_ERROR_UNKNOWN_ENCODING) {
|
1802
|
+
rb_encoding* enc;
|
1803
|
+
volatile VALUE encobj;
|
1804
|
+
volatile VALUE ustr;
|
1805
|
+
enc = rb_enc_find(parser->detectedEncoding);
|
1806
|
+
if ((int)ENC_TO_ENCINDEX(enc) != rb_ascii8bit_encindex()) {
|
1807
|
+
rb_enc_associate(str, enc);
|
1808
|
+
encobj = rb_enc_from_encoding(enc_xml);
|
1809
|
+
/* rb_str_encode may raises an exception */
|
1810
|
+
ustr = rb_str_encode(str, encobj, 0, Qnil);
|
1811
|
+
if (!NIL_P(ustr)) {
|
1812
|
+
XML_ParserReset(parser->parser, "utf-8");
|
1813
|
+
XML_SetUserData(parser->parser, (void*)obj);
|
1814
|
+
parser->defaultCurrent = 0;
|
1815
|
+
#ifdef NEW_EXPAT
|
1816
|
+
parser->lastAttrs = NULL;
|
1817
|
+
#endif
|
1818
|
+
parser->detectedEncoding = NULL;
|
1819
|
+
setup_evnet_handlers(parser, obj);
|
1820
|
+
ret = XML_Parse(parser->parser,
|
1821
|
+
RSTRING_PTR(ustr), RSTRING_LEN(ustr), final);
|
1822
|
+
}
|
1823
|
+
}
|
1824
|
+
}
|
1825
|
+
#endif
|
1764
1826
|
}
|
1765
1827
|
else
|
1766
1828
|
ret = XML_Parse(parser->parser, NULL, 0, final);
|
@@ -1853,7 +1915,7 @@ XMLParser_setBase(VALUE obj, VALUE base)
|
|
1853
1915
|
GET_PARSER(obj, parser);
|
1854
1916
|
if (OBJ_TAINTED(base))
|
1855
1917
|
taintParser(parser);
|
1856
|
-
ret = XML_SetBase(parser->parser,
|
1918
|
+
ret = XML_SetBase(parser->parser, RSTRING_PTR(base));
|
1857
1919
|
|
1858
1920
|
return INT2FIX(ret);
|
1859
1921
|
}
|
@@ -1870,7 +1932,7 @@ XMLParser_getBase(VALUE obj)
|
|
1870
1932
|
if (!ret)
|
1871
1933
|
return Qnil;
|
1872
1934
|
|
1873
|
-
return TO_(rb_str_new2((char*)ret));
|
1935
|
+
return TO_(ENC_(rb_str_new2((char*)ret)));
|
1874
1936
|
}
|
1875
1937
|
|
1876
1938
|
#ifdef NEW_EXPAT
|
@@ -1892,7 +1954,7 @@ XMLParser_getSpecifiedAttributes(VALUE obj)
|
|
1892
1954
|
while (*atts) {
|
1893
1955
|
const char* key = *atts++;
|
1894
1956
|
atts++;
|
1895
|
-
rb_hash_aset(attrhash, FO_(TO_(rb_str_new2((char*)key))),
|
1957
|
+
rb_hash_aset(attrhash, FO_(TO_(ENC_(rb_str_new2((char*)key)))),
|
1896
1958
|
(count-- > 0) ? Qtrue: Qfalse);
|
1897
1959
|
}
|
1898
1960
|
|
@@ -1915,7 +1977,7 @@ XMLParser_getSpecifiedAttributes(VALUE obj)
|
|
1915
1977
|
attrarray = rb_ary_new2(count);
|
1916
1978
|
for (i = 0; i < count; i++, atts+=2) {
|
1917
1979
|
const char* key = *atts;
|
1918
|
-
rb_ary_push(attrarray, TO_(rb_str_new2((char*)key)));
|
1980
|
+
rb_ary_push(attrarray, TO_(ENC_(rb_str_new2((char*)key))));
|
1919
1981
|
}
|
1920
1982
|
|
1921
1983
|
return attrarray;
|
@@ -1951,13 +2013,13 @@ static VALUE
|
|
1951
2013
|
XMLParser_s_expatVersion(VALUE obj)
|
1952
2014
|
{
|
1953
2015
|
#if defined(HAVE_EXPAT_H)
|
1954
|
-
return rb_str_new2(XML_ExpatVersion());
|
2016
|
+
return ENC_(rb_str_new2(XML_ExpatVersion()));
|
1955
2017
|
#elif defined(EXPAT_1_2)
|
1956
|
-
return rb_str_new2("1.2");
|
2018
|
+
return ENC_(rb_str_new2("1.2"));
|
1957
2019
|
#elif defined(NEW_EXPAT)
|
1958
|
-
return rb_str_new2("1.1");
|
2020
|
+
return ENC_(rb_str_new2("1.1"));
|
1959
2021
|
#else
|
1960
|
-
return rb_str_new2("1.0");
|
2022
|
+
return ENC_(rb_str_new2("1.0"));
|
1961
2023
|
#endif
|
1962
2024
|
}
|
1963
2025
|
|
@@ -2003,7 +2065,7 @@ XMLParser_getInputContext(VALUE obj)
|
|
2003
2065
|
&size);
|
2004
2066
|
if (buffer && size > 0) {
|
2005
2067
|
ret = rb_ary_new3(2,
|
2006
|
-
TO_(rb_str_new(buffer, size)),
|
2068
|
+
TO_(ENC_(rb_str_new(buffer, size))),
|
2007
2069
|
INT2FIX(offset));
|
2008
2070
|
}
|
2009
2071
|
|
@@ -2025,7 +2087,7 @@ XMLParser_getIdAttrribute(VALUE obj)
|
|
2025
2087
|
idattr = XML_GetIdAttributeIndex(parser->parser);
|
2026
2088
|
if (idattr < 0)
|
2027
2089
|
return Qnil;
|
2028
|
-
return TO_(rb_str_new2((char*)atts[idattr]));
|
2090
|
+
return TO_(ENC_(rb_str_new2((char*)atts[idattr])));
|
2029
2091
|
}
|
2030
2092
|
#endif
|
2031
2093
|
|
@@ -2066,7 +2128,7 @@ XMLParser_s_getFeatureList(VALUE obj)
|
|
2066
2128
|
|
2067
2129
|
list = XML_GetFeatureList();
|
2068
2130
|
while (list && list->feature) {
|
2069
|
-
rb_hash_aset(ret, FO_(rb_str_new2(list->name)), INT2NUM(list->value));
|
2131
|
+
rb_hash_aset(ret, FO_(ENC_(rb_str_new2(list->name))), INT2NUM(list->value));
|
2070
2132
|
list++;
|
2071
2133
|
}
|
2072
2134
|
|
@@ -2079,6 +2141,10 @@ Init_xmlparser()
|
|
2079
2141
|
{
|
2080
2142
|
VALUE mXML;
|
2081
2143
|
|
2144
|
+
#ifdef HAVE_RUBY_ENCODING_H
|
2145
|
+
enc_xml = rb_utf8_encoding();
|
2146
|
+
#endif
|
2147
|
+
|
2082
2148
|
eXMLParserError = rb_define_class("XMLParserError", rb_eStandardError);
|
2083
2149
|
cXMLParser = rb_define_class("XMLParser", rb_cObject);
|
2084
2150
|
cXMLEncoding = rb_define_class("XMLEncoding", rb_cObject);
|