xmlparser 0.6.81 → 0.7.2.1
Sign up to get free protection for your applications and to get access to all the features.
- data/MANIFEST +7 -68
- data/README +42 -39
- data/README.ja +54 -57
- data/ext/extconf.rb +58 -0
- data/ext/{xmlparser/xmlparser.c → xmlparser.c} +246 -180
- data/lib/xml/dom/digest.rb +37 -25
- data/lib/xml/dom2/domentityresolverimpl.rb +2 -13
- metadata +79 -93
- data/Rakefile +0 -34
- data/ext/xmlparser/mkrf_conf.rb +0 -28
- data/lib/xml/dom/builder-ja.rb +0 -58
- data/lib/xml/encoding-ja.rb +0 -42
data/ext/extconf.rb
ADDED
@@ -0,0 +1,58 @@
|
|
1
|
+
#
|
2
|
+
# ruby extconf.rb
|
3
|
+
# --with-perl-enc-map[=/path/to/enc-map]
|
4
|
+
# --with-expat-dir=/path/to/expat
|
5
|
+
# --with-expat-lib=/path/to/expat/lib
|
6
|
+
# --with-expat-include=/path/to/expat/include
|
7
|
+
#
|
8
|
+
require 'mkmf'
|
9
|
+
|
10
|
+
cwd=`pwd`.chomp!
|
11
|
+
perl= ENV['PERL'] || 'perl'
|
12
|
+
|
13
|
+
## Encoding maps may be stored in $perl_archlib/XML/Parser/Encodins/
|
14
|
+
#perl_archlib = '/usr/lib/perl5/site_perl/5.005/i586-linux'
|
15
|
+
#perl_archlib = '/usr/local/lib'
|
16
|
+
perl_archlib = `#{perl} -e 'use Config; print $Config{"archlib"}'`
|
17
|
+
xml_enc_path = with_config("perl-enc-map")
|
18
|
+
if xml_enc_path == true
|
19
|
+
xml_enc_path = perl_archlib + "/XML/Parser/Encodings"
|
20
|
+
end
|
21
|
+
|
22
|
+
##$CFLAGS="-I#{cwd}/expat/xmlparse -I#{cwd}/expat/xmltok" +
|
23
|
+
## ' -DXML_ENC_PATH=getenv\(\"XML_ENC_PATH\"\)' +
|
24
|
+
## " -DNEW_EXPAT"
|
25
|
+
#$CFLAGS = "-I#{cwd}/expat/xmlparse -I#{cwd}/expat/xmltok"
|
26
|
+
#$LDFLAGS = "-L#{cwd}/expat/xmlparse -Wl,-rpath,/usr/local/lib"
|
27
|
+
#$LDFLAGS = "-L#{cwd}/expat/xmlparse"
|
28
|
+
dir_config("expat")
|
29
|
+
#dir_config("xmltok")
|
30
|
+
#dir_config("xmlparse")
|
31
|
+
if xml_enc_path
|
32
|
+
$CFLAGS += " -DXML_ENC_PATH=\\\"#{xml_enc_path}\\\""
|
33
|
+
end
|
34
|
+
|
35
|
+
#if have_header("xmlparse.h") || have_header("expat.h")
|
36
|
+
if have_header("expat.h") || have_header("xmlparse.h")
|
37
|
+
if have_library("expat", "XML_ParserCreate") ||
|
38
|
+
have_library("xmltok", "XML_ParserCreate")
|
39
|
+
if have_func("XML_SetNotStandaloneHandler")
|
40
|
+
$CFLAGS += " -DNEW_EXPAT"
|
41
|
+
end
|
42
|
+
if have_func("XML_SetParamEntityParsing")
|
43
|
+
$CFLAGS += " -DXML_DTD"
|
44
|
+
end
|
45
|
+
# if have_func("XML_SetExternalParsedEntityDeclHandler")
|
46
|
+
# $CFLAGS += " -DEXPAT_1_2"
|
47
|
+
# end
|
48
|
+
have_func("XML_SetDoctypeDeclHandler")
|
49
|
+
have_func("XML_ParserReset")
|
50
|
+
have_func("XML_SetSkippedEntityHandler")
|
51
|
+
have_func("XML_GetFeatureList")
|
52
|
+
have_func("XML_UseForeignDTD")
|
53
|
+
have_func("XML_GetIdAttributeIndex")
|
54
|
+
have_library("socket", "ntohl")
|
55
|
+
have_library("wsock32") if RUBY_PLATFORM =~ /mswin32|mingw/
|
56
|
+
create_makefile("xmlparser")
|
57
|
+
end
|
58
|
+
end
|
@@ -1,5 +1,6 @@
|
|
1
1
|
/*
|
2
2
|
* Expat (XML Parser Toolkit) wrapper for Ruby
|
3
|
+
* Dec 15, 2009 yoshidam version 0.7.0 support Ruby 1.9.1
|
3
4
|
* Feb 16, 2004 yoshidam version 0.6.8 taint output string
|
4
5
|
* Feb 16, 2004 yoshidam version 0.6.7 fix buffer overflow
|
5
6
|
* Mar 11, 2003 yoshidam version 0.6.6 fix skippedEntity handler
|
@@ -41,7 +42,11 @@
|
|
41
42
|
*/
|
42
43
|
|
43
44
|
#include "ruby.h"
|
44
|
-
#
|
45
|
+
#ifdef HAVE_RUBY_IO_H
|
46
|
+
# include "ruby/io.h"
|
47
|
+
#else
|
48
|
+
# include "rubyio.h"
|
49
|
+
#endif
|
45
50
|
#include <stdio.h>
|
46
51
|
#include <ctype.h>
|
47
52
|
#ifdef HAVE_EXPAT_H
|
@@ -58,6 +63,15 @@
|
|
58
63
|
# endif
|
59
64
|
#endif
|
60
65
|
|
66
|
+
#ifndef RSTRING_PTR
|
67
|
+
# define RSTRING_PTR(s) (RSTRING(s)->ptr)
|
68
|
+
# define RSTRING_LEN(s) (RSTRING(s)->len)
|
69
|
+
#endif
|
70
|
+
|
71
|
+
#ifdef HAVE_RUBY_ENCODING_H
|
72
|
+
static rb_encoding* enc_xml;
|
73
|
+
#endif
|
74
|
+
|
61
75
|
static VALUE eXMLParserError;
|
62
76
|
static VALUE cXMLParser;
|
63
77
|
static VALUE cXMLEncoding;
|
@@ -112,6 +126,7 @@ typedef struct _XMLParser {
|
|
112
126
|
int tainted;
|
113
127
|
VALUE parent;
|
114
128
|
char* context;
|
129
|
+
const XML_Char *detectedEncoding;
|
115
130
|
} XMLParser;
|
116
131
|
|
117
132
|
static VALUE symDEFAULT;
|
@@ -198,6 +213,12 @@ freezeObject(VALUE obj) {
|
|
198
213
|
}
|
199
214
|
#define FO_(o) (freezeObject(o))
|
200
215
|
|
216
|
+
#ifdef HAVE_RUBY_ENCODING_H
|
217
|
+
# define ENC_(o) (rb_enc_associate(o, enc_xml))
|
218
|
+
#else
|
219
|
+
# define ENC_(o) (o)
|
220
|
+
#endif
|
221
|
+
|
201
222
|
|
202
223
|
/* Event handlers for iterator */
|
203
224
|
static void
|
@@ -216,12 +237,12 @@ iterStartElementHandler(void *recv,
|
|
216
237
|
const char* key = *atts++;
|
217
238
|
const char* val = *atts++;
|
218
239
|
rb_hash_aset(attrhash,
|
219
|
-
FO_(TO_(rb_str_new2((char*)key))),
|
220
|
-
TO_(rb_str_new2((char*)val)));
|
240
|
+
FO_(TO_(ENC_(rb_str_new2((char*)key)))),
|
241
|
+
TO_(ENC_(rb_str_new2((char*)val))));
|
221
242
|
}
|
222
243
|
|
223
244
|
rb_yield(rb_ary_new3(4, symSTART_ELEM,
|
224
|
-
TO_(rb_str_new2((char*)name)), attrhash, recv));
|
245
|
+
TO_(ENC_(rb_str_new2((char*)name))), attrhash, recv));
|
225
246
|
if (parser->defaultCurrent) {
|
226
247
|
parser->defaultCurrent = 0;
|
227
248
|
XML_DefaultCurrent(parser->parser);
|
@@ -235,7 +256,7 @@ iterEndElementHandler(void *recv,
|
|
235
256
|
XMLParser* parser;
|
236
257
|
GET_PARSER(recv, parser);
|
237
258
|
rb_yield(rb_ary_new3(4, symEND_ELEM,
|
238
|
-
TO_(rb_str_new2((char*)name)), Qnil, recv));
|
259
|
+
TO_(ENC_(rb_str_new2((char*)name))), Qnil, recv));
|
239
260
|
if (parser->defaultCurrent) {
|
240
261
|
parser->defaultCurrent = 0;
|
241
262
|
XML_DefaultCurrent(parser->parser);
|
@@ -250,7 +271,7 @@ iterCharacterDataHandler(void *recv,
|
|
250
271
|
XMLParser* parser;
|
251
272
|
GET_PARSER(recv, parser);
|
252
273
|
rb_yield(rb_ary_new3(4, symCDATA,
|
253
|
-
Qnil, TO_(rb_str_new((char*)s, len)), recv));
|
274
|
+
Qnil, TO_(ENC_(rb_str_new((char*)s, len))), recv));
|
254
275
|
if (parser->defaultCurrent) {
|
255
276
|
parser->defaultCurrent = 0;
|
256
277
|
XML_DefaultCurrent(parser->parser);
|
@@ -265,8 +286,8 @@ iterProcessingInstructionHandler(void *recv,
|
|
265
286
|
XMLParser* parser;
|
266
287
|
GET_PARSER(recv, parser);
|
267
288
|
rb_yield(rb_ary_new3(4, symPI,
|
268
|
-
TO_(rb_str_new2((char*)target)),
|
269
|
-
TO_(rb_str_new2((char*)data)), recv));
|
289
|
+
TO_(ENC_(rb_str_new2((char*)target))),
|
290
|
+
TO_(ENC_(rb_str_new2((char*)data))), recv));
|
270
291
|
if (parser->defaultCurrent) {
|
271
292
|
parser->defaultCurrent = 0;
|
272
293
|
XML_DefaultCurrent(parser->parser);
|
@@ -281,7 +302,7 @@ iterDefaultHandler(void *recv,
|
|
281
302
|
XMLParser* parser;
|
282
303
|
GET_PARSER(recv, parser);
|
283
304
|
rb_yield(rb_ary_new3(4, symDEFAULT,
|
284
|
-
Qnil, TO_(rb_str_new((char*)s, len)), recv));
|
305
|
+
Qnil, TO_(ENC_(rb_str_new((char*)s, len))), recv));
|
285
306
|
if (parser->defaultCurrent) {
|
286
307
|
parser->defaultCurrent = 0;
|
287
308
|
/* XML_DefaultCurrent shoould not call in defaultHandler */
|
@@ -301,12 +322,12 @@ iterUnparsedEntityDeclHandler(void *recv,
|
|
301
322
|
VALUE valary;
|
302
323
|
|
303
324
|
GET_PARSER(recv, parser);
|
304
|
-
valary = rb_ary_new3(4, (base ? TO_(rb_str_new2((char*)base)) : Qnil),
|
305
|
-
TO_(rb_str_new2((char*)systemId)),
|
306
|
-
(publicId ? TO_(rb_str_new2((char*)publicId)) : Qnil),
|
307
|
-
TO_(rb_str_new2((char*)notationName)));
|
325
|
+
valary = rb_ary_new3(4, (base ? TO_(ENC_(rb_str_new2((char*)base))) : Qnil),
|
326
|
+
TO_(ENC_(rb_str_new2((char*)systemId))),
|
327
|
+
(publicId ? TO_(ENC_(rb_str_new2((char*)publicId))) : Qnil),
|
328
|
+
TO_(ENC_(rb_str_new2((char*)notationName))));
|
308
329
|
rb_yield(rb_ary_new3(4, symUNPARSED_ENTITY_DECL,
|
309
|
-
TO_(rb_str_new2((char*)entityName)),
|
330
|
+
TO_(ENC_(rb_str_new2((char*)entityName))),
|
310
331
|
valary, recv));
|
311
332
|
if (parser->defaultCurrent) {
|
312
333
|
parser->defaultCurrent = 0;
|
@@ -326,11 +347,11 @@ iterNotationDeclHandler(void *recv,
|
|
326
347
|
|
327
348
|
GET_PARSER(recv, parser);
|
328
349
|
valary = rb_ary_new3(3,
|
329
|
-
(base ? TO_(rb_str_new2((char*)base)) : Qnil),
|
330
|
-
(systemId ? TO_(rb_str_new2((char*)systemId)) : Qnil),
|
331
|
-
(publicId ? TO_(rb_str_new2((char*)publicId)) : Qnil));
|
350
|
+
(base ? TO_(ENC_(rb_str_new2((char*)base))) : Qnil),
|
351
|
+
(systemId ? TO_(ENC_(rb_str_new2((char*)systemId))) : Qnil),
|
352
|
+
(publicId ? TO_(ENC_(rb_str_new2((char*)publicId))) : Qnil));
|
332
353
|
rb_yield(rb_ary_new3(4, symNOTATION_DECL,
|
333
|
-
TO_(rb_str_new2((char*)notationName)),
|
354
|
+
TO_(ENC_(rb_str_new2((char*)notationName))),
|
334
355
|
valary, recv));
|
335
356
|
if (parser->defaultCurrent) {
|
336
357
|
parser->defaultCurrent = 0;
|
@@ -353,11 +374,11 @@ iterExternalEntityRefHandler(XML_Parser xmlparser,
|
|
353
374
|
recv = (VALUE)XML_GetUserData(xmlparser);
|
354
375
|
GET_PARSER(recv, parser);
|
355
376
|
valary = rb_ary_new3(3,
|
356
|
-
(base ? TO_(rb_str_new2((char*)base)) : Qnil),
|
357
|
-
(systemId ? TO_(rb_str_new2((char*)systemId)) : Qnil),
|
358
|
-
(publicId ? TO_(rb_str_new2((char*)publicId)) : Qnil));
|
377
|
+
(base ? TO_(ENC_(rb_str_new2((char*)base))) : Qnil),
|
378
|
+
(systemId ? TO_(ENC_(rb_str_new2((char*)systemId))) : Qnil),
|
379
|
+
(publicId ? TO_(ENC_(rb_str_new2((char*)publicId))) : Qnil));
|
359
380
|
ret = rb_yield(rb_ary_new3(4, symEXTERNAL_ENTITY_REF,
|
360
|
-
(context ? TO_(rb_str_new2((char*)context)) : Qnil),
|
381
|
+
(context ? TO_(ENC_(rb_str_new2((char*)context))) : Qnil),
|
361
382
|
valary, recv));
|
362
383
|
if (parser->defaultCurrent) {
|
363
384
|
parser->defaultCurrent = 0;
|
@@ -376,7 +397,7 @@ iterCommentHandler(void *recv,
|
|
376
397
|
XMLParser* parser;
|
377
398
|
GET_PARSER(recv, parser);
|
378
399
|
rb_yield(rb_ary_new3(4, symCOMMENT,
|
379
|
-
Qnil, TO_(rb_str_new2((char*)s)), recv));
|
400
|
+
Qnil, TO_(ENC_(rb_str_new2((char*)s))), recv));
|
380
401
|
if (parser->defaultCurrent) {
|
381
402
|
parser->defaultCurrent = 0;
|
382
403
|
XML_DefaultCurrent(parser->parser);
|
@@ -415,8 +436,8 @@ iterStartNamespaceDeclHandler(void *recv,
|
|
415
436
|
XMLParser* parser;
|
416
437
|
GET_PARSER(recv, parser);
|
417
438
|
rb_yield(rb_ary_new3(4, symSTART_NAMESPACE_DECL,
|
418
|
-
(prefix ? TO_(rb_str_new2((char*)prefix)) : Qnil),
|
419
|
-
(uri ? TO_(rb_str_new2((char*)uri)) : Qnil), recv));
|
439
|
+
(prefix ? TO_(ENC_(rb_str_new2((char*)prefix))) : Qnil),
|
440
|
+
(uri ? TO_(ENC_(rb_str_new2((char*)uri))) : Qnil), recv));
|
420
441
|
if (parser->defaultCurrent) {
|
421
442
|
parser->defaultCurrent = 0;
|
422
443
|
XML_DefaultCurrent(parser->parser);
|
@@ -430,7 +451,7 @@ iterEndNamespaceDeclHandler(void *recv,
|
|
430
451
|
XMLParser* parser;
|
431
452
|
GET_PARSER(recv, parser);
|
432
453
|
rb_yield(rb_ary_new3(4, symEND_NAMESPACE_DECL,
|
433
|
-
(prefix ? TO_(rb_str_new2((char*)prefix)) : Qnil),
|
454
|
+
(prefix ? TO_(ENC_(rb_str_new2((char*)prefix))) : Qnil),
|
434
455
|
Qnil, recv));
|
435
456
|
if (parser->defaultCurrent) {
|
436
457
|
parser->defaultCurrent = 0;
|
@@ -458,12 +479,12 @@ iterStartDoctypeDeclHandler(void *recv,
|
|
458
479
|
GET_PARSER(recv, parser);
|
459
480
|
#ifdef HAVE_EXPAT_H
|
460
481
|
valary = rb_ary_new3(3,
|
461
|
-
(sysid ? TO_(rb_str_new2((char*)sysid)) : Qnil),
|
462
|
-
(pubid ? TO_(rb_str_new2((char*)pubid)) : Qnil),
|
482
|
+
(sysid ? TO_(ENC_(rb_str_new2((char*)sysid))) : Qnil),
|
483
|
+
(pubid ? TO_(ENC_(rb_str_new2((char*)pubid))) : Qnil),
|
463
484
|
(has_internal_subset ? Qtrue : Qfalse));
|
464
485
|
#endif
|
465
486
|
rb_yield(rb_ary_new3(4, symSTART_DOCTYPE_DECL,
|
466
|
-
TO_(rb_str_new2((char*)doctypeName)),
|
487
|
+
TO_(ENC_(rb_str_new2((char*)doctypeName))),
|
467
488
|
valary, recv));
|
468
489
|
if (parser->defaultCurrent) {
|
469
490
|
parser->defaultCurrent = 0;
|
@@ -498,18 +519,18 @@ makeContentArray(XMLParser* parser, XML_Content* model)
|
|
498
519
|
static const char* content_quant_name[] = {
|
499
520
|
"", "?", "*", "+"
|
500
521
|
};
|
501
|
-
int i;
|
522
|
+
unsigned int i;
|
502
523
|
VALUE children = Qnil;
|
503
524
|
const char* type_name = content_type_name[model->type];
|
504
525
|
const char* quant_name = content_quant_name[model->quant];
|
505
526
|
VALUE ret = rb_ary_new3(3,
|
506
|
-
TO_(rb_str_new2((char*)type_name)),
|
507
|
-
TO_(rb_str_new2((char*)quant_name)),
|
508
|
-
(model->name ? TO_(rb_str_new2((char*)model->name)) :
|
527
|
+
TO_(ENC_(rb_str_new2((char*)type_name))),
|
528
|
+
TO_(ENC_(rb_str_new2((char*)quant_name))),
|
529
|
+
(model->name ? TO_(ENC_(rb_str_new2((char*)model->name))) :
|
509
530
|
Qnil));
|
510
531
|
if (model->numchildren > 0) {
|
511
532
|
children = rb_ary_new();
|
512
|
-
for (i =0; i < model->numchildren; i++) {
|
533
|
+
for (i = 0; i < model->numchildren; i++) {
|
513
534
|
VALUE child = makeContentArray(parser, model->children + i);
|
514
535
|
rb_ary_push(children, child);
|
515
536
|
}
|
@@ -526,10 +547,11 @@ iterElementDeclHandler(void *recv,
|
|
526
547
|
XML_Content *model)
|
527
548
|
{
|
528
549
|
XMLParser* parser;
|
550
|
+
VALUE content;
|
529
551
|
GET_PARSER(recv, parser);
|
530
|
-
|
552
|
+
content = makeContentArray(parser, model);
|
531
553
|
rb_yield(rb_ary_new3(4, symELEMENT_DECL,
|
532
|
-
TO_(rb_str_new2(name)),
|
554
|
+
TO_(ENC_(rb_str_new2(name))),
|
533
555
|
content, recv));
|
534
556
|
if (parser->defaultCurrent) {
|
535
557
|
parser->defaultCurrent = 0;
|
@@ -550,12 +572,12 @@ iterAttlistDeclHandler (void *recv,
|
|
550
572
|
|
551
573
|
GET_PARSER(recv, parser);
|
552
574
|
valary = rb_ary_new3(4,
|
553
|
-
TO_(rb_str_new2((char*)attname)),
|
554
|
-
TO_(rb_str_new2((char*)att_type)),
|
555
|
-
(dflt ? TO_(rb_str_new2((char*)dflt)) : Qnil),
|
575
|
+
TO_(ENC_(rb_str_new2((char*)attname))),
|
576
|
+
TO_(ENC_(rb_str_new2((char*)att_type))),
|
577
|
+
(dflt ? TO_(ENC_(rb_str_new2((char*)dflt))) : Qnil),
|
556
578
|
(isrequired ? Qtrue : Qfalse));
|
557
579
|
rb_yield(rb_ary_new3(4, symATTLIST_DECL,
|
558
|
-
TO_(rb_str_new2(elname)),
|
580
|
+
TO_(ENC_(rb_str_new2(elname))),
|
559
581
|
valary, recv));
|
560
582
|
if (parser->defaultCurrent) {
|
561
583
|
parser->defaultCurrent = 0;
|
@@ -574,8 +596,8 @@ iterXmlDeclHandler (void *recv,
|
|
574
596
|
|
575
597
|
GET_PARSER(recv, parser);
|
576
598
|
valary = rb_ary_new3(3,
|
577
|
-
(version ? TO_(rb_str_new2(version)) : Qnil),
|
578
|
-
(encoding ? TO_(rb_str_new2((char*)encoding)) : Qnil),
|
599
|
+
(version ? TO_(ENC_(rb_str_new2(version))) : Qnil),
|
600
|
+
(encoding ? TO_(ENC_(rb_str_new2((char*)encoding))) : Qnil),
|
579
601
|
INT2FIX(standalone));
|
580
602
|
rb_yield(rb_ary_new3(4, symXML_DECL,
|
581
603
|
Qnil,
|
@@ -603,14 +625,14 @@ iterEntityDeclHandler (void *recv,
|
|
603
625
|
GET_PARSER(recv, parser);
|
604
626
|
valary = rb_ary_new3(6,
|
605
627
|
(is_parameter_entity ? Qtrue : Qfalse),
|
606
|
-
TO_(rb_str_new((char*)value, value_length)),
|
607
|
-
(base ? TO_(rb_str_new2((char*)base)) : Qnil),
|
608
|
-
(systemId ? TO_(rb_str_new2((char*)systemId)) : Qnil),
|
609
|
-
(publicId ? TO_(rb_str_new2((char*)publicId)) : Qnil),
|
610
|
-
(notationName ? TO_(rb_str_new2((char*)notationName))
|
628
|
+
TO_(ENC_(rb_str_new((char*)value, value_length))),
|
629
|
+
(base ? TO_(ENC_(rb_str_new2((char*)base))) : Qnil),
|
630
|
+
(systemId ? TO_(ENC_(rb_str_new2((char*)systemId))) : Qnil),
|
631
|
+
(publicId ? TO_(ENC_(rb_str_new2((char*)publicId))) : Qnil),
|
632
|
+
(notationName ? TO_(ENC_(rb_str_new2((char*)notationName)))
|
611
633
|
: Qnil));
|
612
634
|
rb_yield(rb_ary_new3(4, symENTITY_DECL,
|
613
|
-
TO_(rb_str_new2(entityName)),
|
635
|
+
TO_(ENC_(rb_str_new2(entityName))),
|
614
636
|
valary, recv));
|
615
637
|
if (parser->defaultCurrent) {
|
616
638
|
parser->defaultCurrent = 0;
|
@@ -632,11 +654,11 @@ iterExternalParsedEntityDeclHandler(void *recv,
|
|
632
654
|
VALUE valary;
|
633
655
|
|
634
656
|
GET_PARSER(recv, parser);
|
635
|
-
valary = rb_ary_new3(3, (base ? TO_(rb_str_new2((char*)base)) : Qnil),
|
636
|
-
TO_(rb_str_new2((char*)systemId)),
|
637
|
-
(publicId ? TO_(rb_str_new2((char*)publicId)) : Qnil));
|
657
|
+
valary = rb_ary_new3(3, (base ? TO_(ENC_(rb_str_new2((char*)base))) : Qnil),
|
658
|
+
TO_(ENC_(rb_str_new2((char*)systemId))),
|
659
|
+
(publicId ? TO_(ENC_(rb_str_new2((char*)publicId))) : Qnil));
|
638
660
|
rb_yield(rb_ary_new3(4, symEXTERNAL_PARSED_ENTITY_DECL,
|
639
|
-
TO_(rb_str_new2((char*)entityName)),
|
661
|
+
TO_(ENC_(rb_str_new2((char*)entityName))),
|
640
662
|
valary, recv));
|
641
663
|
if (parser->defaultCurrent) {
|
642
664
|
parser->defaultCurrent = 0;
|
@@ -653,9 +675,9 @@ iterInternalParsedEntityDeclHandler(void *recv,
|
|
653
675
|
XMLParser* parser;
|
654
676
|
GET_PARSER(recv, parser);
|
655
677
|
rb_yield(rb_ary_new3(4, symINTERNAL_PARSED_ENTITY_DECL,
|
656
|
-
TO_(rb_str_new2((char*)entityName)),
|
657
|
-
TO_(rb_str_new((char*)replacementText,
|
658
|
-
|
678
|
+
TO_(ENC_(rb_str_new2((char*)entityName))),
|
679
|
+
TO_(ENC_(rb_str_new((char*)replacementText,
|
680
|
+
replacementTextLength))), recv));
|
659
681
|
if (parser->defaultCurrent) {
|
660
682
|
parser->defaultCurrent = 0;
|
661
683
|
XML_DefaultCurrent(parser->parser);
|
@@ -672,7 +694,7 @@ iterSkippedEntityHandler(void *recv,
|
|
672
694
|
XMLParser* parser;
|
673
695
|
GET_PARSER(recv, parser);
|
674
696
|
rb_yield(rb_ary_new3(4, symSKIPPED_ENTITY,
|
675
|
-
TO_(rb_str_new2((char*)entityName)),
|
697
|
+
TO_(ENC_(rb_str_new2((char*)entityName))),
|
676
698
|
INT2FIX(is_parameter_entity), recv));
|
677
699
|
if (parser->defaultCurrent) {
|
678
700
|
parser->defaultCurrent = 0;
|
@@ -700,11 +722,11 @@ myStartElementHandler(void *recv,
|
|
700
722
|
const char* key = *atts++;
|
701
723
|
const char* val = *atts++;
|
702
724
|
rb_hash_aset(attrhash,
|
703
|
-
FO_(TO_(rb_str_new2((char*)key))),
|
704
|
-
TO_(rb_str_new2((char*)val)));
|
725
|
+
FO_(TO_(ENC_(rb_str_new2((char*)key)))),
|
726
|
+
TO_(ENC_(rb_str_new2((char*)val))));
|
705
727
|
}
|
706
728
|
rb_funcall((VALUE)recv, id_startElementHandler, 2,
|
707
|
-
TO_(rb_str_new2((char*)name)), attrhash);
|
729
|
+
TO_(ENC_(rb_str_new2((char*)name))), attrhash);
|
708
730
|
}
|
709
731
|
|
710
732
|
static void
|
@@ -714,7 +736,7 @@ myEndElementHandler(void *recv,
|
|
714
736
|
XMLParser* parser;
|
715
737
|
GET_PARSER(recv, parser);
|
716
738
|
rb_funcall((VALUE)recv, id_endElementHandler, 1,
|
717
|
-
TO_(rb_str_new2((char*)name)));
|
739
|
+
TO_(ENC_(rb_str_new2((char*)name))));
|
718
740
|
}
|
719
741
|
|
720
742
|
static void
|
@@ -725,7 +747,7 @@ myCharacterDataHandler(void *recv,
|
|
725
747
|
XMLParser* parser;
|
726
748
|
GET_PARSER(recv, parser);
|
727
749
|
rb_funcall((VALUE)recv, id_characterDataHandler, 1,
|
728
|
-
TO_(rb_str_new((char*)s, len)));
|
750
|
+
TO_(ENC_(rb_str_new((char*)s, len))));
|
729
751
|
}
|
730
752
|
|
731
753
|
static void
|
@@ -736,8 +758,8 @@ myProcessingInstructionHandler(void *recv,
|
|
736
758
|
XMLParser* parser;
|
737
759
|
GET_PARSER(recv, parser);
|
738
760
|
rb_funcall((VALUE)recv, id_processingInstructionHandler, 2,
|
739
|
-
TO_(rb_str_new2((char*)target)),
|
740
|
-
TO_(rb_str_new2((char*)data)));
|
761
|
+
TO_(ENC_(rb_str_new2((char*)target))),
|
762
|
+
TO_(ENC_(rb_str_new2((char*)data))));
|
741
763
|
}
|
742
764
|
|
743
765
|
static void
|
@@ -748,7 +770,7 @@ myDefaultHandler(void *recv,
|
|
748
770
|
XMLParser* parser;
|
749
771
|
GET_PARSER(recv, parser);
|
750
772
|
rb_funcall((VALUE)recv, id_defaultHandler, 1,
|
751
|
-
TO_(rb_str_new((char*)s, len)));
|
773
|
+
TO_(ENC_(rb_str_new((char*)s, len))));
|
752
774
|
}
|
753
775
|
|
754
776
|
#ifdef NEW_EXPAT
|
@@ -760,7 +782,7 @@ myDefaultExpandHandler(void *recv,
|
|
760
782
|
XMLParser* parser;
|
761
783
|
GET_PARSER(recv, parser);
|
762
784
|
rb_funcall((VALUE)recv, id_defaultExpandHandler, 1,
|
763
|
-
TO_(rb_str_new((char*)s, len)));
|
785
|
+
TO_(ENC_(rb_str_new((char*)s, len))));
|
764
786
|
}
|
765
787
|
#endif
|
766
788
|
|
@@ -775,11 +797,11 @@ myUnparsedEntityDeclHandler(void *recv,
|
|
775
797
|
XMLParser* parser;
|
776
798
|
GET_PARSER(recv, parser);
|
777
799
|
rb_funcall((VALUE)recv, id_unparsedEntityDeclHandler, 5,
|
778
|
-
TO_(rb_str_new2((char*)entityName)),
|
779
|
-
(base ? TO_(rb_str_new2((char*)base)) : Qnil),
|
780
|
-
TO_(rb_str_new2((char*)systemId)),
|
781
|
-
(publicId ? TO_(rb_str_new2((char*)publicId)) : Qnil),
|
782
|
-
TO_(rb_str_new2((char*)notationName)));
|
800
|
+
TO_(ENC_(rb_str_new2((char*)entityName))),
|
801
|
+
(base ? TO_(ENC_(rb_str_new2((char*)base))) : Qnil),
|
802
|
+
TO_(ENC_(rb_str_new2((char*)systemId))),
|
803
|
+
(publicId ? TO_(ENC_(rb_str_new2((char*)publicId))) : Qnil),
|
804
|
+
TO_(ENC_(rb_str_new2((char*)notationName))));
|
783
805
|
}
|
784
806
|
|
785
807
|
void
|
@@ -792,10 +814,10 @@ myNotationDeclHandler(void *recv,
|
|
792
814
|
XMLParser* parser;
|
793
815
|
GET_PARSER(recv, parser);
|
794
816
|
rb_funcall((VALUE)recv, id_notationDeclHandler, 4,
|
795
|
-
TO_(rb_str_new2((char*)notationName)),
|
796
|
-
(base ? TO_(rb_str_new2((char*)base)) : Qnil),
|
797
|
-
(systemId ? TO_(rb_str_new2((char*)systemId)) : Qnil),
|
798
|
-
(publicId ? TO_(rb_str_new2((char*)publicId)) : Qnil));
|
817
|
+
TO_(ENC_(rb_str_new2((char*)notationName))),
|
818
|
+
(base ? TO_(ENC_(rb_str_new2((char*)base))) : Qnil),
|
819
|
+
(systemId ? TO_(ENC_(rb_str_new2((char*)systemId))) : Qnil),
|
820
|
+
(publicId ? TO_(ENC_(rb_str_new2((char*)publicId))) : Qnil));
|
799
821
|
}
|
800
822
|
|
801
823
|
int
|
@@ -812,10 +834,10 @@ myExternalEntityRefHandler(XML_Parser xmlparser,
|
|
812
834
|
recv = (VALUE)XML_GetUserData(xmlparser);
|
813
835
|
GET_PARSER(recv, parser);
|
814
836
|
ret = rb_funcall(recv, id_externalEntityRefHandler, 4,
|
815
|
-
(context ? TO_(rb_str_new2((char*)context)): Qnil),
|
816
|
-
(base ? TO_(rb_str_new2((char*)base)) : Qnil),
|
817
|
-
(systemId ? TO_(rb_str_new2((char*)systemId)) : Qnil),
|
818
|
-
(publicId ? TO_(rb_str_new2((char*)publicId)) : Qnil));
|
837
|
+
(context ? TO_(ENC_(rb_str_new2((char*)context))): Qnil),
|
838
|
+
(base ? TO_(ENC_(rb_str_new2((char*)base))) : Qnil),
|
839
|
+
(systemId ? TO_(ENC_(rb_str_new2((char*)systemId))) : Qnil),
|
840
|
+
(publicId ? TO_(ENC_(rb_str_new2((char*)publicId))) : Qnil));
|
819
841
|
/* The error status in this handler should be returned
|
820
842
|
by the exception. */
|
821
843
|
return Qnil;
|
@@ -829,7 +851,7 @@ myCommentHandler(void *recv,
|
|
829
851
|
XMLParser* parser;
|
830
852
|
GET_PARSER(recv, parser);
|
831
853
|
rb_funcall((VALUE)recv, id_commentHandler, 1,
|
832
|
-
TO_(rb_str_new2((char*)s)));
|
854
|
+
TO_(ENC_(rb_str_new2((char*)s))));
|
833
855
|
}
|
834
856
|
|
835
857
|
static void
|
@@ -856,8 +878,8 @@ myStartNamespaceDeclHandler(void *recv,
|
|
856
878
|
XMLParser* parser;
|
857
879
|
GET_PARSER(recv, parser);
|
858
880
|
rb_funcall((VALUE)recv, id_startNamespaceDeclHandler, 2,
|
859
|
-
(prefix ? TO_(rb_str_new2((char*)prefix)) : Qnil),
|
860
|
-
(uri ? TO_(rb_str_new2((char*)uri)) : Qnil));
|
881
|
+
(prefix ? TO_(ENC_(rb_str_new2((char*)prefix))) : Qnil),
|
882
|
+
(uri ? TO_(ENC_(rb_str_new2((char*)uri))) : Qnil));
|
861
883
|
}
|
862
884
|
|
863
885
|
static void
|
@@ -867,7 +889,7 @@ myEndNamespaceDeclHandler(void *recv,
|
|
867
889
|
XMLParser* parser;
|
868
890
|
GET_PARSER(recv, parser);
|
869
891
|
rb_funcall((VALUE)recv, id_endNamespaceDeclHandler, 1,
|
870
|
-
(prefix ? TO_(rb_str_new2((char*)prefix)) : Qnil));
|
892
|
+
(prefix ? TO_(ENC_(rb_str_new2((char*)prefix))) : Qnil));
|
871
893
|
}
|
872
894
|
|
873
895
|
static int
|
@@ -900,13 +922,13 @@ myStartDoctypeDeclHandler(void *recv,
|
|
900
922
|
GET_PARSER(recv, parser);
|
901
923
|
#ifdef HAVE_EXPAT_H
|
902
924
|
rb_funcall((VALUE)recv, id_startDoctypeDeclHandler, 4,
|
903
|
-
TO_(rb_str_new2((char*)doctypeName)),
|
904
|
-
(sysid ? TO_(rb_str_new2((char*)sysid)) : Qnil),
|
905
|
-
(pubid ? TO_(rb_str_new2((char*)pubid)) : Qnil),
|
925
|
+
TO_(ENC_(rb_str_new2((char*)doctypeName))),
|
926
|
+
(sysid ? TO_(ENC_(rb_str_new2((char*)sysid))) : Qnil),
|
927
|
+
(pubid ? TO_(ENC_(rb_str_new2((char*)pubid))) : Qnil),
|
906
928
|
(has_internal_subset ? Qtrue : Qfalse));
|
907
929
|
#else
|
908
930
|
rb_funcall((VALUE)recv, id_startDoctypeDeclHandler, 4,
|
909
|
-
TO_(rb_str_new2((char*)doctypeName)),
|
931
|
+
TO_(ENC_(rb_str_new2((char*)doctypeName))),
|
910
932
|
Qnil, Qnil, Qfalse);
|
911
933
|
#endif
|
912
934
|
}
|
@@ -929,10 +951,11 @@ myElementDeclHandler(void *recv,
|
|
929
951
|
XML_Content *model)
|
930
952
|
{
|
931
953
|
XMLParser* parser;
|
954
|
+
VALUE content;
|
932
955
|
GET_PARSER(recv, parser);
|
933
|
-
|
956
|
+
content = makeContentArray(parser, model);
|
934
957
|
rb_funcall((VALUE)recv, id_elementDeclHandler, 2,
|
935
|
-
TO_(rb_str_new2(name)), content);
|
958
|
+
TO_(ENC_(rb_str_new2(name))), content);
|
936
959
|
}
|
937
960
|
|
938
961
|
static void
|
@@ -946,10 +969,10 @@ myAttlistDeclHandler (void *recv,
|
|
946
969
|
XMLParser* parser;
|
947
970
|
GET_PARSER(recv, parser);
|
948
971
|
rb_funcall((VALUE)recv, id_attlistDeclHandler, 5,
|
949
|
-
TO_(rb_str_new2(elname)),
|
950
|
-
TO_(rb_str_new2((char*)attname)),
|
951
|
-
TO_(rb_str_new2((char*)att_type)),
|
952
|
-
(dflt ? TO_(rb_str_new2((char*)dflt)) : Qnil),
|
972
|
+
TO_(ENC_(rb_str_new2(elname))),
|
973
|
+
TO_(ENC_(rb_str_new2((char*)attname))),
|
974
|
+
TO_(ENC_(rb_str_new2((char*)att_type))),
|
975
|
+
(dflt ? TO_(ENC_(rb_str_new2((char*)dflt))) : Qnil),
|
953
976
|
(isrequired ? Qtrue : Qfalse));
|
954
977
|
}
|
955
978
|
|
@@ -962,8 +985,8 @@ myXmlDeclHandler (void *recv,
|
|
962
985
|
XMLParser* parser;
|
963
986
|
GET_PARSER(recv, parser);
|
964
987
|
rb_funcall((VALUE)recv, id_xmlDeclHandler, 3,
|
965
|
-
(version ? TO_(rb_str_new2(version)) : Qnil),
|
966
|
-
(encoding ? TO_(rb_str_new2((char*)encoding)) : Qnil),
|
988
|
+
(version ? TO_(ENC_(rb_str_new2(version))) : Qnil),
|
989
|
+
(encoding ? TO_(ENC_(rb_str_new2((char*)encoding))) : Qnil),
|
967
990
|
INT2FIX(standalone));
|
968
991
|
}
|
969
992
|
|
@@ -981,13 +1004,13 @@ myEntityDeclHandler (void *recv,
|
|
981
1004
|
XMLParser* parser;
|
982
1005
|
GET_PARSER(recv, parser);
|
983
1006
|
rb_funcall((VALUE)recv, id_entityDeclHandler, 7,
|
984
|
-
TO_(rb_str_new2(entityName)),
|
1007
|
+
TO_(ENC_(rb_str_new2(entityName))),
|
985
1008
|
(is_parameter_entity ? Qtrue : Qfalse),
|
986
|
-
TO_(rb_str_new((char*)value, value_length)),
|
987
|
-
(base ? TO_(rb_str_new2((char*)base)) : Qnil),
|
988
|
-
(systemId ? TO_(rb_str_new2((char*)systemId)) : Qnil),
|
989
|
-
(publicId ? TO_(rb_str_new2((char*)publicId)) : Qnil),
|
990
|
-
(notationName ? TO_(rb_str_new2((char*)notationName))
|
1009
|
+
TO_(ENC_(rb_str_new((char*)value, value_length))),
|
1010
|
+
(base ? TO_(ENC_(rb_str_new2((char*)base))) : Qnil),
|
1011
|
+
(systemId ? TO_(ENC_(rb_str_new2((char*)systemId))) : Qnil),
|
1012
|
+
(publicId ? TO_(ENC_(rb_str_new2((char*)publicId))) : Qnil),
|
1013
|
+
(notationName ? TO_(ENC_(rb_str_new2((char*)notationName)))
|
991
1014
|
: Qnil));
|
992
1015
|
}
|
993
1016
|
|
@@ -1004,10 +1027,10 @@ myExternalParsedEntityDeclHandler(void *recv,
|
|
1004
1027
|
XMLParser* parser;
|
1005
1028
|
GET_PARSER(recv, parser);
|
1006
1029
|
rb_funcall((VALUE)recv, id_externalParsedEntityDeclHandler, 4,
|
1007
|
-
TO_(rb_str_new2((char*)entityName)),
|
1008
|
-
(base ? TO_(rb_str_new2((char*)base)) : Qnil),
|
1009
|
-
TO_(rb_str_new2((char*)systemId)),
|
1010
|
-
(publicId ? TO_(rb_str_new2((char*)publicId)) : Qnil));
|
1030
|
+
TO_(ENC_(rb_str_new2((char*)entityName))),
|
1031
|
+
(base ? TO_(ENC_(rb_str_new2((char*)base))) : Qnil),
|
1032
|
+
TO_(ENC_(rb_str_new2((char*)systemId))),
|
1033
|
+
(publicId ? TO_(ENC_(rb_str_new2((char*)publicId))) : Qnil));
|
1011
1034
|
}
|
1012
1035
|
|
1013
1036
|
static void
|
@@ -1019,9 +1042,9 @@ myInternalParsedEntityDeclHandler(void *recv,
|
|
1019
1042
|
XMLParser* parser;
|
1020
1043
|
GET_PARSER(recv, parser);
|
1021
1044
|
rb_funcall((VALUE)recv, id_internalParsedEntityDeclHandler, 2,
|
1022
|
-
TO_(rb_str_new2((char*)entityName)),
|
1023
|
-
TO_(rb_str_new((char*)replacementText,
|
1024
|
-
|
1045
|
+
TO_(ENC_(rb_str_new2((char*)entityName))),
|
1046
|
+
TO_(ENC_(rb_str_new((char*)replacementText,
|
1047
|
+
replacementTextLength))));
|
1025
1048
|
}
|
1026
1049
|
#endif
|
1027
1050
|
|
@@ -1043,21 +1066,21 @@ myEncodingConv(void *data, const char *s)
|
|
1043
1066
|
{
|
1044
1067
|
VALUE v;
|
1045
1068
|
int len;
|
1046
|
-
int slen =
|
1047
|
-
|
1069
|
+
int slen = RSTRING_PTR(rb_ivar_get((VALUE)data,
|
1070
|
+
id_map))[*(unsigned char*)s];
|
1048
1071
|
|
1049
|
-
v = rb_funcall((VALUE)data, id_convert, 1, rb_str_new((char*)s, -slen));
|
1072
|
+
v = rb_funcall((VALUE)data, id_convert, 1, ENC_(rb_str_new((char*)s, -slen)));
|
1050
1073
|
switch (TYPE(v)) {
|
1051
1074
|
case T_FIXNUM:
|
1052
1075
|
return FIX2INT(v);
|
1053
1076
|
case T_STRING:
|
1054
|
-
len =
|
1077
|
+
len = RSTRING_LEN(v);
|
1055
1078
|
if (len == 1) {
|
1056
|
-
return (unsigned char)*(
|
1079
|
+
return (unsigned char)*RSTRING_PTR(v);
|
1057
1080
|
}
|
1058
1081
|
else if (len >= 2) {
|
1059
|
-
return (unsigned char)*(
|
1060
|
-
(unsigned char)*(
|
1082
|
+
return (unsigned char)*RSTRING_PTR(v) |
|
1083
|
+
(unsigned char)*(RSTRING_PTR(v) + 1) << 8;
|
1061
1084
|
}
|
1062
1085
|
}
|
1063
1086
|
return 0;
|
@@ -1077,7 +1100,7 @@ iterUnknownEncodingHandler(void *recv,
|
|
1077
1100
|
|
1078
1101
|
GET_PARSER(recv, parser);
|
1079
1102
|
ret = rb_yield(rb_ary_new3(4, symUNKNOWN_ENCODING,
|
1080
|
-
TO_(rb_str_new2((char*)name)), Qnil, recv));
|
1103
|
+
TO_(ENC_(rb_str_new2((char*)name))), Qnil, recv));
|
1081
1104
|
if (TYPE(ret) == T_OBJECT && rb_obj_is_kind_of(ret, cXMLEncoding)) {
|
1082
1105
|
int i;
|
1083
1106
|
ID mid = rb_intern("map");
|
@@ -1086,7 +1109,7 @@ iterUnknownEncodingHandler(void *recv,
|
|
1086
1109
|
|
1087
1110
|
for (i = 0; i < 256; i++) {
|
1088
1111
|
VALUE m = rb_funcall(ret, mid, 1, INT2FIX(i));
|
1089
|
-
|
1112
|
+
RSTRING_PTR(cmap)[i] = info->map[i] = FIX2INT(m);
|
1090
1113
|
}
|
1091
1114
|
/* protect object form GC */
|
1092
1115
|
rb_ivar_set(recv, rb_intern("_encoding"), ret);
|
@@ -1241,8 +1264,6 @@ findEncoding(const char* encname)
|
|
1241
1264
|
file[len] = tolower(*p);
|
1242
1265
|
}
|
1243
1266
|
file[len] = '\0';
|
1244
|
-
// if (len < PATH_MAX - sizeof(encext))
|
1245
|
-
// strcat(file, encext);
|
1246
1267
|
strncat(file, encext, PATH_MAX - len -1);
|
1247
1268
|
|
1248
1269
|
if ((fp = fopen(file, "rb")) == NULL) {
|
@@ -1274,6 +1295,10 @@ myUnknownEncodingHandler(void *recv,
|
|
1274
1295
|
{
|
1275
1296
|
XMLParser* parser;
|
1276
1297
|
VALUE ret;
|
1298
|
+
|
1299
|
+
GET_PARSER(recv, parser);
|
1300
|
+
parser->detectedEncoding = name;
|
1301
|
+
|
1277
1302
|
if (!rb_method_boundp(CLASS_OF((VALUE)recv), id_unknownEncoding, 0))
|
1278
1303
|
#ifndef XML_ENC_PATH
|
1279
1304
|
return 0;
|
@@ -1293,9 +1318,8 @@ myUnknownEncodingHandler(void *recv,
|
|
1293
1318
|
}
|
1294
1319
|
#endif
|
1295
1320
|
|
1296
|
-
GET_PARSER(recv, parser);
|
1297
1321
|
ret = rb_funcall((VALUE)recv, id_unknownEncoding, 1,
|
1298
|
-
TO_(rb_str_new2((char*)name)));
|
1322
|
+
TO_(ENC_(rb_str_new2((char*)name))));
|
1299
1323
|
if (TYPE(ret) == T_OBJECT && rb_obj_is_kind_of(ret, cXMLEncoding)) {
|
1300
1324
|
int i;
|
1301
1325
|
ID mid = rb_intern("map");
|
@@ -1308,7 +1332,7 @@ myUnknownEncodingHandler(void *recv,
|
|
1308
1332
|
|
1309
1333
|
for (i = 0; i < 256; i++) {
|
1310
1334
|
VALUE m = rb_funcall(ret, mid, 1, INT2FIX(i));
|
1311
|
-
|
1335
|
+
RSTRING_PTR(cmap)[i] = info->map[i] = FIX2INT(m);
|
1312
1336
|
}
|
1313
1337
|
/* protect object form GC */
|
1314
1338
|
rb_ivar_set((VALUE)recv, rb_intern("_encoding"), ret);
|
@@ -1330,7 +1354,7 @@ mySkippedEntityHandler(void *recv,
|
|
1330
1354
|
XMLParser* parser;
|
1331
1355
|
GET_PARSER(recv, parser);
|
1332
1356
|
rb_funcall((VALUE)recv, id_skippedEntityHandler, 2,
|
1333
|
-
TO_(rb_str_new2((char*)entityName)),
|
1357
|
+
TO_(ENC_(rb_str_new2((char*)entityName))),
|
1334
1358
|
INT2FIX(is_parameter_entity));
|
1335
1359
|
}
|
1336
1360
|
#endif
|
@@ -1359,7 +1383,7 @@ XMLParser_new(int argc, VALUE* argv, VALUE klass)
|
|
1359
1383
|
/* new(encoding) */
|
1360
1384
|
if (TYPE(arg1) != T_NIL) {
|
1361
1385
|
Check_Type(arg1, T_STRING); /* encoding */
|
1362
|
-
encoding =
|
1386
|
+
encoding = RSTRING_PTR(arg1);
|
1363
1387
|
}
|
1364
1388
|
}
|
1365
1389
|
else if (count == 2) {
|
@@ -1369,10 +1393,10 @@ XMLParser_new(int argc, VALUE* argv, VALUE klass)
|
|
1369
1393
|
if (TYPE(arg1) != T_DATA) {
|
1370
1394
|
if (TYPE(arg1) != T_NIL) {
|
1371
1395
|
Check_Type(arg1, T_STRING); /* encoding */
|
1372
|
-
encoding =
|
1396
|
+
encoding = RSTRING_PTR(arg1);
|
1373
1397
|
}
|
1374
1398
|
Check_Type(arg2, T_STRING); /* nschar */
|
1375
|
-
nssep =
|
1399
|
+
nssep = RSTRING_PTR(arg2);
|
1376
1400
|
}
|
1377
1401
|
else {
|
1378
1402
|
#endif
|
@@ -1380,7 +1404,7 @@ XMLParser_new(int argc, VALUE* argv, VALUE klass)
|
|
1380
1404
|
GET_PARSER(arg1, rootparser);
|
1381
1405
|
if (!NIL_P(arg2)) {
|
1382
1406
|
Check_Type(arg2, T_STRING); /* context */
|
1383
|
-
context =
|
1407
|
+
context = RSTRING_PTR(arg2);
|
1384
1408
|
}
|
1385
1409
|
parent = arg1;
|
1386
1410
|
#ifdef NEW_EXPAT
|
@@ -1393,10 +1417,10 @@ XMLParser_new(int argc, VALUE* argv, VALUE klass)
|
|
1393
1417
|
GET_PARSER(arg1, rootparser);
|
1394
1418
|
if (!NIL_P(arg2)) {
|
1395
1419
|
Check_Type(arg2, T_STRING); /* context */
|
1396
|
-
context =
|
1420
|
+
context = RSTRING_PTR(arg2);
|
1397
1421
|
}
|
1398
1422
|
Check_Type(arg3, T_STRING); /* encoding */
|
1399
|
-
encoding =
|
1423
|
+
encoding = RSTRING_PTR(arg3);
|
1400
1424
|
parent = arg1;
|
1401
1425
|
}
|
1402
1426
|
|
@@ -1465,6 +1489,7 @@ XMLParser_new(int argc, VALUE* argv, VALUE klass)
|
|
1465
1489
|
parser->lastAttrs = NULL;
|
1466
1490
|
#endif
|
1467
1491
|
parser->parent = parent;
|
1492
|
+
parser->detectedEncoding = NULL;
|
1468
1493
|
|
1469
1494
|
rb_obj_call_init(obj, argc, argv);
|
1470
1495
|
|
@@ -1491,7 +1516,7 @@ XMLParser_reset(int argc, VALUE* argv, VALUE obj)
|
|
1491
1516
|
GET_PARSER(obj, parser);
|
1492
1517
|
if (count > 0 && TYPE(vencoding) != T_NIL) {
|
1493
1518
|
Check_Type(vencoding, T_STRING);
|
1494
|
-
encoding =
|
1519
|
+
encoding = RSTRING_PTR(vencoding);
|
1495
1520
|
}
|
1496
1521
|
XML_ParserReset(parser->parser, encoding);
|
1497
1522
|
/* setting up internal data */
|
@@ -1502,17 +1527,14 @@ XMLParser_reset(int argc, VALUE* argv, VALUE obj)
|
|
1502
1527
|
parser->lastAttrs = NULL;
|
1503
1528
|
#endif
|
1504
1529
|
parser->tainted = 0;
|
1530
|
+
parser->detectedEncoding = NULL;
|
1505
1531
|
|
1506
1532
|
return obj;
|
1507
1533
|
}
|
1508
1534
|
#endif
|
1509
1535
|
|
1510
|
-
|
1511
|
-
|
1512
|
-
XMLParser_parse(int argc, VALUE* argv, VALUE obj)
|
1513
|
-
{
|
1514
|
-
XMLParser* parser;
|
1515
|
-
int ret;
|
1536
|
+
static void
|
1537
|
+
setup_evnet_handlers(XMLParser* parser, VALUE obj) {
|
1516
1538
|
XML_StartElementHandler start = NULL;
|
1517
1539
|
XML_EndElementHandler end = NULL;
|
1518
1540
|
#ifdef NEW_EXPAT
|
@@ -1525,38 +1547,6 @@ XMLParser_parse(int argc, VALUE* argv, VALUE obj)
|
|
1525
1547
|
XML_StartDoctypeDeclHandler startDoctype = NULL;
|
1526
1548
|
XML_EndDoctypeDeclHandler endDoctype = NULL;
|
1527
1549
|
#endif
|
1528
|
-
VALUE str;
|
1529
|
-
VALUE isFinal;
|
1530
|
-
int final = 1;
|
1531
|
-
int count;
|
1532
|
-
int fromStream = 0;
|
1533
|
-
ID mid = rb_intern("gets");
|
1534
|
-
ID linebuf = rb_intern("_linebuf");
|
1535
|
-
|
1536
|
-
count = rb_scan_args(argc, argv, "02", &str, &isFinal);
|
1537
|
-
/* If "str" has public "gets" method, it will be considered *stream* */
|
1538
|
-
if (!rb_obj_is_kind_of(str, rb_cString) &&
|
1539
|
-
rb_method_boundp(CLASS_OF(str), mid, 1)) {
|
1540
|
-
fromStream = 1;
|
1541
|
-
}
|
1542
|
-
else if (!NIL_P(str)) {
|
1543
|
-
Check_Type(str, T_STRING);
|
1544
|
-
}
|
1545
|
-
if (count >= 2) {
|
1546
|
-
if (isFinal == Qtrue)
|
1547
|
-
final = 1;
|
1548
|
-
else if (isFinal == Qfalse)
|
1549
|
-
final = 0;
|
1550
|
-
else
|
1551
|
-
rb_raise(rb_eTypeError, "not valid value");
|
1552
|
-
}
|
1553
|
-
|
1554
|
-
GET_PARSER(obj, parser);
|
1555
|
-
|
1556
|
-
// parser->iterator = rb_iterator_p();
|
1557
|
-
parser->iterator = rb_block_given_p();
|
1558
|
-
|
1559
|
-
/* Setup event handlers */
|
1560
1550
|
|
1561
1551
|
/* Call as iterator */
|
1562
1552
|
if (parser->iterator) {
|
@@ -1726,6 +1716,47 @@ XMLParser_parse(int argc, VALUE* argv, VALUE obj)
|
|
1726
1716
|
XML_SetSkippedEntityHandler(parser->parser, mySkippedEntityHandler);
|
1727
1717
|
#endif
|
1728
1718
|
}
|
1719
|
+
}
|
1720
|
+
|
1721
|
+
|
1722
|
+
/* parse method */
|
1723
|
+
static VALUE
|
1724
|
+
XMLParser_parse(int argc, VALUE* argv, VALUE obj)
|
1725
|
+
{
|
1726
|
+
XMLParser* parser;
|
1727
|
+
int ret;
|
1728
|
+
VALUE str;
|
1729
|
+
VALUE isFinal;
|
1730
|
+
int final = 1;
|
1731
|
+
int count;
|
1732
|
+
int fromStream = 0;
|
1733
|
+
ID mid = rb_intern("gets");
|
1734
|
+
ID linebuf = rb_intern("_linebuf");
|
1735
|
+
|
1736
|
+
count = rb_scan_args(argc, argv, "02", &str, &isFinal);
|
1737
|
+
/* If "str" has public "gets" method, it will be considered *stream* */
|
1738
|
+
if (!rb_obj_is_kind_of(str, rb_cString) &&
|
1739
|
+
rb_method_boundp(CLASS_OF(str), mid, 1)) {
|
1740
|
+
fromStream = 1;
|
1741
|
+
}
|
1742
|
+
else if (!NIL_P(str)) {
|
1743
|
+
Check_Type(str, T_STRING);
|
1744
|
+
}
|
1745
|
+
if (count >= 2) {
|
1746
|
+
if (isFinal == Qtrue)
|
1747
|
+
final = 1;
|
1748
|
+
else if (isFinal == Qfalse)
|
1749
|
+
final = 0;
|
1750
|
+
else
|
1751
|
+
rb_raise(rb_eTypeError, "not valid value");
|
1752
|
+
}
|
1753
|
+
|
1754
|
+
GET_PARSER(obj, parser);
|
1755
|
+
|
1756
|
+
parser->iterator = rb_block_given_p();
|
1757
|
+
|
1758
|
+
/* Setup event handlers */
|
1759
|
+
setup_evnet_handlers(parser, obj);
|
1729
1760
|
|
1730
1761
|
/* Parse from stream (probably slightly slow) */
|
1731
1762
|
if (fromStream) {
|
@@ -1741,7 +1772,7 @@ XMLParser_parse(int argc, VALUE* argv, VALUE obj)
|
|
1741
1772
|
taintParser(parser);
|
1742
1773
|
rb_ivar_set(obj, linebuf, buf); /* protect buf from GC (reasonable?)*/
|
1743
1774
|
ret = XML_Parse(parser->parser,
|
1744
|
-
|
1775
|
+
RSTRING_PTR(buf), RSTRING_LEN(buf), 0);
|
1745
1776
|
}
|
1746
1777
|
else {
|
1747
1778
|
ret = XML_Parse(parser->parser, NULL, 0, 1);
|
@@ -1757,10 +1788,41 @@ XMLParser_parse(int argc, VALUE* argv, VALUE obj)
|
|
1757
1788
|
|
1758
1789
|
/* Parse string */
|
1759
1790
|
if (!NIL_P(str)) {
|
1791
|
+
#if defined(HAVE_RUBY_ENCODING_H) && defined(HAVE_XML_PARSERRESET)
|
1792
|
+
int err;
|
1793
|
+
#endif
|
1760
1794
|
if (OBJ_TAINTED(str))
|
1761
1795
|
taintParser(parser);
|
1762
1796
|
ret = XML_Parse(parser->parser,
|
1763
|
-
|
1797
|
+
RSTRING_PTR(str), RSTRING_LEN(str), final);
|
1798
|
+
#if defined(HAVE_RUBY_ENCODING_H) && defined(HAVE_XML_PARSERRESET)
|
1799
|
+
/* Ruby 1.9.1 Encoding conversion */
|
1800
|
+
err = XML_GetErrorCode(parser->parser);
|
1801
|
+
if (final && err == XML_ERROR_UNKNOWN_ENCODING) {
|
1802
|
+
rb_encoding* enc;
|
1803
|
+
volatile VALUE encobj;
|
1804
|
+
volatile VALUE ustr;
|
1805
|
+
enc = rb_enc_find(parser->detectedEncoding);
|
1806
|
+
if ((int)ENC_TO_ENCINDEX(enc) != rb_ascii8bit_encindex()) {
|
1807
|
+
rb_enc_associate(str, enc);
|
1808
|
+
encobj = rb_enc_from_encoding(enc_xml);
|
1809
|
+
/* rb_str_encode may raises an exception */
|
1810
|
+
ustr = rb_str_encode(str, encobj, 0, Qnil);
|
1811
|
+
if (!NIL_P(ustr)) {
|
1812
|
+
XML_ParserReset(parser->parser, "utf-8");
|
1813
|
+
XML_SetUserData(parser->parser, (void*)obj);
|
1814
|
+
parser->defaultCurrent = 0;
|
1815
|
+
#ifdef NEW_EXPAT
|
1816
|
+
parser->lastAttrs = NULL;
|
1817
|
+
#endif
|
1818
|
+
parser->detectedEncoding = NULL;
|
1819
|
+
setup_evnet_handlers(parser, obj);
|
1820
|
+
ret = XML_Parse(parser->parser,
|
1821
|
+
RSTRING_PTR(ustr), RSTRING_LEN(ustr), final);
|
1822
|
+
}
|
1823
|
+
}
|
1824
|
+
}
|
1825
|
+
#endif
|
1764
1826
|
}
|
1765
1827
|
else
|
1766
1828
|
ret = XML_Parse(parser->parser, NULL, 0, final);
|
@@ -1853,7 +1915,7 @@ XMLParser_setBase(VALUE obj, VALUE base)
|
|
1853
1915
|
GET_PARSER(obj, parser);
|
1854
1916
|
if (OBJ_TAINTED(base))
|
1855
1917
|
taintParser(parser);
|
1856
|
-
ret = XML_SetBase(parser->parser,
|
1918
|
+
ret = XML_SetBase(parser->parser, RSTRING_PTR(base));
|
1857
1919
|
|
1858
1920
|
return INT2FIX(ret);
|
1859
1921
|
}
|
@@ -1870,7 +1932,7 @@ XMLParser_getBase(VALUE obj)
|
|
1870
1932
|
if (!ret)
|
1871
1933
|
return Qnil;
|
1872
1934
|
|
1873
|
-
return TO_(rb_str_new2((char*)ret));
|
1935
|
+
return TO_(ENC_(rb_str_new2((char*)ret)));
|
1874
1936
|
}
|
1875
1937
|
|
1876
1938
|
#ifdef NEW_EXPAT
|
@@ -1892,7 +1954,7 @@ XMLParser_getSpecifiedAttributes(VALUE obj)
|
|
1892
1954
|
while (*atts) {
|
1893
1955
|
const char* key = *atts++;
|
1894
1956
|
atts++;
|
1895
|
-
rb_hash_aset(attrhash, FO_(TO_(rb_str_new2((char*)key))),
|
1957
|
+
rb_hash_aset(attrhash, FO_(TO_(ENC_(rb_str_new2((char*)key)))),
|
1896
1958
|
(count-- > 0) ? Qtrue: Qfalse);
|
1897
1959
|
}
|
1898
1960
|
|
@@ -1915,7 +1977,7 @@ XMLParser_getSpecifiedAttributes(VALUE obj)
|
|
1915
1977
|
attrarray = rb_ary_new2(count);
|
1916
1978
|
for (i = 0; i < count; i++, atts+=2) {
|
1917
1979
|
const char* key = *atts;
|
1918
|
-
rb_ary_push(attrarray, TO_(rb_str_new2((char*)key)));
|
1980
|
+
rb_ary_push(attrarray, TO_(ENC_(rb_str_new2((char*)key))));
|
1919
1981
|
}
|
1920
1982
|
|
1921
1983
|
return attrarray;
|
@@ -1951,13 +2013,13 @@ static VALUE
|
|
1951
2013
|
XMLParser_s_expatVersion(VALUE obj)
|
1952
2014
|
{
|
1953
2015
|
#if defined(HAVE_EXPAT_H)
|
1954
|
-
return rb_str_new2(XML_ExpatVersion());
|
2016
|
+
return ENC_(rb_str_new2(XML_ExpatVersion()));
|
1955
2017
|
#elif defined(EXPAT_1_2)
|
1956
|
-
return rb_str_new2("1.2");
|
2018
|
+
return ENC_(rb_str_new2("1.2"));
|
1957
2019
|
#elif defined(NEW_EXPAT)
|
1958
|
-
return rb_str_new2("1.1");
|
2020
|
+
return ENC_(rb_str_new2("1.1"));
|
1959
2021
|
#else
|
1960
|
-
return rb_str_new2("1.0");
|
2022
|
+
return ENC_(rb_str_new2("1.0"));
|
1961
2023
|
#endif
|
1962
2024
|
}
|
1963
2025
|
|
@@ -2003,7 +2065,7 @@ XMLParser_getInputContext(VALUE obj)
|
|
2003
2065
|
&size);
|
2004
2066
|
if (buffer && size > 0) {
|
2005
2067
|
ret = rb_ary_new3(2,
|
2006
|
-
TO_(rb_str_new(buffer, size)),
|
2068
|
+
TO_(ENC_(rb_str_new(buffer, size))),
|
2007
2069
|
INT2FIX(offset));
|
2008
2070
|
}
|
2009
2071
|
|
@@ -2025,7 +2087,7 @@ XMLParser_getIdAttrribute(VALUE obj)
|
|
2025
2087
|
idattr = XML_GetIdAttributeIndex(parser->parser);
|
2026
2088
|
if (idattr < 0)
|
2027
2089
|
return Qnil;
|
2028
|
-
return TO_(rb_str_new2((char*)atts[idattr]));
|
2090
|
+
return TO_(ENC_(rb_str_new2((char*)atts[idattr])));
|
2029
2091
|
}
|
2030
2092
|
#endif
|
2031
2093
|
|
@@ -2066,7 +2128,7 @@ XMLParser_s_getFeatureList(VALUE obj)
|
|
2066
2128
|
|
2067
2129
|
list = XML_GetFeatureList();
|
2068
2130
|
while (list && list->feature) {
|
2069
|
-
rb_hash_aset(ret, FO_(rb_str_new2(list->name)), INT2NUM(list->value));
|
2131
|
+
rb_hash_aset(ret, FO_(ENC_(rb_str_new2(list->name))), INT2NUM(list->value));
|
2070
2132
|
list++;
|
2071
2133
|
}
|
2072
2134
|
|
@@ -2079,6 +2141,10 @@ Init_xmlparser()
|
|
2079
2141
|
{
|
2080
2142
|
VALUE mXML;
|
2081
2143
|
|
2144
|
+
#ifdef HAVE_RUBY_ENCODING_H
|
2145
|
+
enc_xml = rb_utf8_encoding();
|
2146
|
+
#endif
|
2147
|
+
|
2082
2148
|
eXMLParserError = rb_define_class("XMLParserError", rb_eStandardError);
|
2083
2149
|
cXMLParser = rb_define_class("XMLParser", rb_cObject);
|
2084
2150
|
cXMLEncoding = rb_define_class("XMLEncoding", rb_cObject);
|