xmlparser 0.6.81 → 0.7.2.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,58 @@
1
+ #
2
+ # ruby extconf.rb
3
+ # --with-perl-enc-map[=/path/to/enc-map]
4
+ # --with-expat-dir=/path/to/expat
5
+ # --with-expat-lib=/path/to/expat/lib
6
+ # --with-expat-include=/path/to/expat/include
7
+ #
8
+ require 'mkmf'
9
+
10
+ cwd=`pwd`.chomp!
11
+ perl= ENV['PERL'] || 'perl'
12
+
13
+ ## Encoding maps may be stored in $perl_archlib/XML/Parser/Encodins/
14
+ #perl_archlib = '/usr/lib/perl5/site_perl/5.005/i586-linux'
15
+ #perl_archlib = '/usr/local/lib'
16
+ perl_archlib = `#{perl} -e 'use Config; print $Config{"archlib"}'`
17
+ xml_enc_path = with_config("perl-enc-map")
18
+ if xml_enc_path == true
19
+ xml_enc_path = perl_archlib + "/XML/Parser/Encodings"
20
+ end
21
+
22
+ ##$CFLAGS="-I#{cwd}/expat/xmlparse -I#{cwd}/expat/xmltok" +
23
+ ## ' -DXML_ENC_PATH=getenv\(\"XML_ENC_PATH\"\)' +
24
+ ## " -DNEW_EXPAT"
25
+ #$CFLAGS = "-I#{cwd}/expat/xmlparse -I#{cwd}/expat/xmltok"
26
+ #$LDFLAGS = "-L#{cwd}/expat/xmlparse -Wl,-rpath,/usr/local/lib"
27
+ #$LDFLAGS = "-L#{cwd}/expat/xmlparse"
28
+ dir_config("expat")
29
+ #dir_config("xmltok")
30
+ #dir_config("xmlparse")
31
+ if xml_enc_path
32
+ $CFLAGS += " -DXML_ENC_PATH=\\\"#{xml_enc_path}\\\""
33
+ end
34
+
35
+ #if have_header("xmlparse.h") || have_header("expat.h")
36
+ if have_header("expat.h") || have_header("xmlparse.h")
37
+ if have_library("expat", "XML_ParserCreate") ||
38
+ have_library("xmltok", "XML_ParserCreate")
39
+ if have_func("XML_SetNotStandaloneHandler")
40
+ $CFLAGS += " -DNEW_EXPAT"
41
+ end
42
+ if have_func("XML_SetParamEntityParsing")
43
+ $CFLAGS += " -DXML_DTD"
44
+ end
45
+ # if have_func("XML_SetExternalParsedEntityDeclHandler")
46
+ # $CFLAGS += " -DEXPAT_1_2"
47
+ # end
48
+ have_func("XML_SetDoctypeDeclHandler")
49
+ have_func("XML_ParserReset")
50
+ have_func("XML_SetSkippedEntityHandler")
51
+ have_func("XML_GetFeatureList")
52
+ have_func("XML_UseForeignDTD")
53
+ have_func("XML_GetIdAttributeIndex")
54
+ have_library("socket", "ntohl")
55
+ have_library("wsock32") if RUBY_PLATFORM =~ /mswin32|mingw/
56
+ create_makefile("xmlparser")
57
+ end
58
+ end
@@ -1,5 +1,6 @@
1
1
  /*
2
2
  * Expat (XML Parser Toolkit) wrapper for Ruby
3
+ * Dec 15, 2009 yoshidam version 0.7.0 support Ruby 1.9.1
3
4
  * Feb 16, 2004 yoshidam version 0.6.8 taint output string
4
5
  * Feb 16, 2004 yoshidam version 0.6.7 fix buffer overflow
5
6
  * Mar 11, 2003 yoshidam version 0.6.6 fix skippedEntity handler
@@ -41,7 +42,11 @@
41
42
  */
42
43
 
43
44
  #include "ruby.h"
44
- #include "rubyio.h"
45
+ #ifdef HAVE_RUBY_IO_H
46
+ # include "ruby/io.h"
47
+ #else
48
+ # include "rubyio.h"
49
+ #endif
45
50
  #include <stdio.h>
46
51
  #include <ctype.h>
47
52
  #ifdef HAVE_EXPAT_H
@@ -58,6 +63,15 @@
58
63
  # endif
59
64
  #endif
60
65
 
66
+ #ifndef RSTRING_PTR
67
+ # define RSTRING_PTR(s) (RSTRING(s)->ptr)
68
+ # define RSTRING_LEN(s) (RSTRING(s)->len)
69
+ #endif
70
+
71
+ #ifdef HAVE_RUBY_ENCODING_H
72
+ static rb_encoding* enc_xml;
73
+ #endif
74
+
61
75
  static VALUE eXMLParserError;
62
76
  static VALUE cXMLParser;
63
77
  static VALUE cXMLEncoding;
@@ -112,6 +126,7 @@ typedef struct _XMLParser {
112
126
  int tainted;
113
127
  VALUE parent;
114
128
  char* context;
129
+ const XML_Char *detectedEncoding;
115
130
  } XMLParser;
116
131
 
117
132
  static VALUE symDEFAULT;
@@ -198,6 +213,12 @@ freezeObject(VALUE obj) {
198
213
  }
199
214
  #define FO_(o) (freezeObject(o))
200
215
 
216
+ #ifdef HAVE_RUBY_ENCODING_H
217
+ # define ENC_(o) (rb_enc_associate(o, enc_xml))
218
+ #else
219
+ # define ENC_(o) (o)
220
+ #endif
221
+
201
222
 
202
223
  /* Event handlers for iterator */
203
224
  static void
@@ -216,12 +237,12 @@ iterStartElementHandler(void *recv,
216
237
  const char* key = *atts++;
217
238
  const char* val = *atts++;
218
239
  rb_hash_aset(attrhash,
219
- FO_(TO_(rb_str_new2((char*)key))),
220
- TO_(rb_str_new2((char*)val)));
240
+ FO_(TO_(ENC_(rb_str_new2((char*)key)))),
241
+ TO_(ENC_(rb_str_new2((char*)val))));
221
242
  }
222
243
 
223
244
  rb_yield(rb_ary_new3(4, symSTART_ELEM,
224
- TO_(rb_str_new2((char*)name)), attrhash, recv));
245
+ TO_(ENC_(rb_str_new2((char*)name))), attrhash, recv));
225
246
  if (parser->defaultCurrent) {
226
247
  parser->defaultCurrent = 0;
227
248
  XML_DefaultCurrent(parser->parser);
@@ -235,7 +256,7 @@ iterEndElementHandler(void *recv,
235
256
  XMLParser* parser;
236
257
  GET_PARSER(recv, parser);
237
258
  rb_yield(rb_ary_new3(4, symEND_ELEM,
238
- TO_(rb_str_new2((char*)name)), Qnil, recv));
259
+ TO_(ENC_(rb_str_new2((char*)name))), Qnil, recv));
239
260
  if (parser->defaultCurrent) {
240
261
  parser->defaultCurrent = 0;
241
262
  XML_DefaultCurrent(parser->parser);
@@ -250,7 +271,7 @@ iterCharacterDataHandler(void *recv,
250
271
  XMLParser* parser;
251
272
  GET_PARSER(recv, parser);
252
273
  rb_yield(rb_ary_new3(4, symCDATA,
253
- Qnil, TO_(rb_str_new((char*)s, len)), recv));
274
+ Qnil, TO_(ENC_(rb_str_new((char*)s, len))), recv));
254
275
  if (parser->defaultCurrent) {
255
276
  parser->defaultCurrent = 0;
256
277
  XML_DefaultCurrent(parser->parser);
@@ -265,8 +286,8 @@ iterProcessingInstructionHandler(void *recv,
265
286
  XMLParser* parser;
266
287
  GET_PARSER(recv, parser);
267
288
  rb_yield(rb_ary_new3(4, symPI,
268
- TO_(rb_str_new2((char*)target)),
269
- TO_(rb_str_new2((char*)data)), recv));
289
+ TO_(ENC_(rb_str_new2((char*)target))),
290
+ TO_(ENC_(rb_str_new2((char*)data))), recv));
270
291
  if (parser->defaultCurrent) {
271
292
  parser->defaultCurrent = 0;
272
293
  XML_DefaultCurrent(parser->parser);
@@ -281,7 +302,7 @@ iterDefaultHandler(void *recv,
281
302
  XMLParser* parser;
282
303
  GET_PARSER(recv, parser);
283
304
  rb_yield(rb_ary_new3(4, symDEFAULT,
284
- Qnil, TO_(rb_str_new((char*)s, len)), recv));
305
+ Qnil, TO_(ENC_(rb_str_new((char*)s, len))), recv));
285
306
  if (parser->defaultCurrent) {
286
307
  parser->defaultCurrent = 0;
287
308
  /* XML_DefaultCurrent shoould not call in defaultHandler */
@@ -301,12 +322,12 @@ iterUnparsedEntityDeclHandler(void *recv,
301
322
  VALUE valary;
302
323
 
303
324
  GET_PARSER(recv, parser);
304
- valary = rb_ary_new3(4, (base ? TO_(rb_str_new2((char*)base)) : Qnil),
305
- TO_(rb_str_new2((char*)systemId)),
306
- (publicId ? TO_(rb_str_new2((char*)publicId)) : Qnil),
307
- TO_(rb_str_new2((char*)notationName)));
325
+ valary = rb_ary_new3(4, (base ? TO_(ENC_(rb_str_new2((char*)base))) : Qnil),
326
+ TO_(ENC_(rb_str_new2((char*)systemId))),
327
+ (publicId ? TO_(ENC_(rb_str_new2((char*)publicId))) : Qnil),
328
+ TO_(ENC_(rb_str_new2((char*)notationName))));
308
329
  rb_yield(rb_ary_new3(4, symUNPARSED_ENTITY_DECL,
309
- TO_(rb_str_new2((char*)entityName)),
330
+ TO_(ENC_(rb_str_new2((char*)entityName))),
310
331
  valary, recv));
311
332
  if (parser->defaultCurrent) {
312
333
  parser->defaultCurrent = 0;
@@ -326,11 +347,11 @@ iterNotationDeclHandler(void *recv,
326
347
 
327
348
  GET_PARSER(recv, parser);
328
349
  valary = rb_ary_new3(3,
329
- (base ? TO_(rb_str_new2((char*)base)) : Qnil),
330
- (systemId ? TO_(rb_str_new2((char*)systemId)) : Qnil),
331
- (publicId ? TO_(rb_str_new2((char*)publicId)) : Qnil));
350
+ (base ? TO_(ENC_(rb_str_new2((char*)base))) : Qnil),
351
+ (systemId ? TO_(ENC_(rb_str_new2((char*)systemId))) : Qnil),
352
+ (publicId ? TO_(ENC_(rb_str_new2((char*)publicId))) : Qnil));
332
353
  rb_yield(rb_ary_new3(4, symNOTATION_DECL,
333
- TO_(rb_str_new2((char*)notationName)),
354
+ TO_(ENC_(rb_str_new2((char*)notationName))),
334
355
  valary, recv));
335
356
  if (parser->defaultCurrent) {
336
357
  parser->defaultCurrent = 0;
@@ -353,11 +374,11 @@ iterExternalEntityRefHandler(XML_Parser xmlparser,
353
374
  recv = (VALUE)XML_GetUserData(xmlparser);
354
375
  GET_PARSER(recv, parser);
355
376
  valary = rb_ary_new3(3,
356
- (base ? TO_(rb_str_new2((char*)base)) : Qnil),
357
- (systemId ? TO_(rb_str_new2((char*)systemId)) : Qnil),
358
- (publicId ? TO_(rb_str_new2((char*)publicId)) : Qnil));
377
+ (base ? TO_(ENC_(rb_str_new2((char*)base))) : Qnil),
378
+ (systemId ? TO_(ENC_(rb_str_new2((char*)systemId))) : Qnil),
379
+ (publicId ? TO_(ENC_(rb_str_new2((char*)publicId))) : Qnil));
359
380
  ret = rb_yield(rb_ary_new3(4, symEXTERNAL_ENTITY_REF,
360
- (context ? TO_(rb_str_new2((char*)context)) : Qnil),
381
+ (context ? TO_(ENC_(rb_str_new2((char*)context))) : Qnil),
361
382
  valary, recv));
362
383
  if (parser->defaultCurrent) {
363
384
  parser->defaultCurrent = 0;
@@ -376,7 +397,7 @@ iterCommentHandler(void *recv,
376
397
  XMLParser* parser;
377
398
  GET_PARSER(recv, parser);
378
399
  rb_yield(rb_ary_new3(4, symCOMMENT,
379
- Qnil, TO_(rb_str_new2((char*)s)), recv));
400
+ Qnil, TO_(ENC_(rb_str_new2((char*)s))), recv));
380
401
  if (parser->defaultCurrent) {
381
402
  parser->defaultCurrent = 0;
382
403
  XML_DefaultCurrent(parser->parser);
@@ -415,8 +436,8 @@ iterStartNamespaceDeclHandler(void *recv,
415
436
  XMLParser* parser;
416
437
  GET_PARSER(recv, parser);
417
438
  rb_yield(rb_ary_new3(4, symSTART_NAMESPACE_DECL,
418
- (prefix ? TO_(rb_str_new2((char*)prefix)) : Qnil),
419
- (uri ? TO_(rb_str_new2((char*)uri)) : Qnil), recv));
439
+ (prefix ? TO_(ENC_(rb_str_new2((char*)prefix))) : Qnil),
440
+ (uri ? TO_(ENC_(rb_str_new2((char*)uri))) : Qnil), recv));
420
441
  if (parser->defaultCurrent) {
421
442
  parser->defaultCurrent = 0;
422
443
  XML_DefaultCurrent(parser->parser);
@@ -430,7 +451,7 @@ iterEndNamespaceDeclHandler(void *recv,
430
451
  XMLParser* parser;
431
452
  GET_PARSER(recv, parser);
432
453
  rb_yield(rb_ary_new3(4, symEND_NAMESPACE_DECL,
433
- (prefix ? TO_(rb_str_new2((char*)prefix)) : Qnil),
454
+ (prefix ? TO_(ENC_(rb_str_new2((char*)prefix))) : Qnil),
434
455
  Qnil, recv));
435
456
  if (parser->defaultCurrent) {
436
457
  parser->defaultCurrent = 0;
@@ -458,12 +479,12 @@ iterStartDoctypeDeclHandler(void *recv,
458
479
  GET_PARSER(recv, parser);
459
480
  #ifdef HAVE_EXPAT_H
460
481
  valary = rb_ary_new3(3,
461
- (sysid ? TO_(rb_str_new2((char*)sysid)) : Qnil),
462
- (pubid ? TO_(rb_str_new2((char*)pubid)) : Qnil),
482
+ (sysid ? TO_(ENC_(rb_str_new2((char*)sysid))) : Qnil),
483
+ (pubid ? TO_(ENC_(rb_str_new2((char*)pubid))) : Qnil),
463
484
  (has_internal_subset ? Qtrue : Qfalse));
464
485
  #endif
465
486
  rb_yield(rb_ary_new3(4, symSTART_DOCTYPE_DECL,
466
- TO_(rb_str_new2((char*)doctypeName)),
487
+ TO_(ENC_(rb_str_new2((char*)doctypeName))),
467
488
  valary, recv));
468
489
  if (parser->defaultCurrent) {
469
490
  parser->defaultCurrent = 0;
@@ -498,18 +519,18 @@ makeContentArray(XMLParser* parser, XML_Content* model)
498
519
  static const char* content_quant_name[] = {
499
520
  "", "?", "*", "+"
500
521
  };
501
- int i;
522
+ unsigned int i;
502
523
  VALUE children = Qnil;
503
524
  const char* type_name = content_type_name[model->type];
504
525
  const char* quant_name = content_quant_name[model->quant];
505
526
  VALUE ret = rb_ary_new3(3,
506
- TO_(rb_str_new2((char*)type_name)),
507
- TO_(rb_str_new2((char*)quant_name)),
508
- (model->name ? TO_(rb_str_new2((char*)model->name)) :
527
+ TO_(ENC_(rb_str_new2((char*)type_name))),
528
+ TO_(ENC_(rb_str_new2((char*)quant_name))),
529
+ (model->name ? TO_(ENC_(rb_str_new2((char*)model->name))) :
509
530
  Qnil));
510
531
  if (model->numchildren > 0) {
511
532
  children = rb_ary_new();
512
- for (i =0; i < model->numchildren; i++) {
533
+ for (i = 0; i < model->numchildren; i++) {
513
534
  VALUE child = makeContentArray(parser, model->children + i);
514
535
  rb_ary_push(children, child);
515
536
  }
@@ -526,10 +547,11 @@ iterElementDeclHandler(void *recv,
526
547
  XML_Content *model)
527
548
  {
528
549
  XMLParser* parser;
550
+ VALUE content;
529
551
  GET_PARSER(recv, parser);
530
- VALUE content = makeContentArray(parser, model);
552
+ content = makeContentArray(parser, model);
531
553
  rb_yield(rb_ary_new3(4, symELEMENT_DECL,
532
- TO_(rb_str_new2(name)),
554
+ TO_(ENC_(rb_str_new2(name))),
533
555
  content, recv));
534
556
  if (parser->defaultCurrent) {
535
557
  parser->defaultCurrent = 0;
@@ -550,12 +572,12 @@ iterAttlistDeclHandler (void *recv,
550
572
 
551
573
  GET_PARSER(recv, parser);
552
574
  valary = rb_ary_new3(4,
553
- TO_(rb_str_new2((char*)attname)),
554
- TO_(rb_str_new2((char*)att_type)),
555
- (dflt ? TO_(rb_str_new2((char*)dflt)) : Qnil),
575
+ TO_(ENC_(rb_str_new2((char*)attname))),
576
+ TO_(ENC_(rb_str_new2((char*)att_type))),
577
+ (dflt ? TO_(ENC_(rb_str_new2((char*)dflt))) : Qnil),
556
578
  (isrequired ? Qtrue : Qfalse));
557
579
  rb_yield(rb_ary_new3(4, symATTLIST_DECL,
558
- TO_(rb_str_new2(elname)),
580
+ TO_(ENC_(rb_str_new2(elname))),
559
581
  valary, recv));
560
582
  if (parser->defaultCurrent) {
561
583
  parser->defaultCurrent = 0;
@@ -574,8 +596,8 @@ iterXmlDeclHandler (void *recv,
574
596
 
575
597
  GET_PARSER(recv, parser);
576
598
  valary = rb_ary_new3(3,
577
- (version ? TO_(rb_str_new2(version)) : Qnil),
578
- (encoding ? TO_(rb_str_new2((char*)encoding)) : Qnil),
599
+ (version ? TO_(ENC_(rb_str_new2(version))) : Qnil),
600
+ (encoding ? TO_(ENC_(rb_str_new2((char*)encoding))) : Qnil),
579
601
  INT2FIX(standalone));
580
602
  rb_yield(rb_ary_new3(4, symXML_DECL,
581
603
  Qnil,
@@ -603,14 +625,14 @@ iterEntityDeclHandler (void *recv,
603
625
  GET_PARSER(recv, parser);
604
626
  valary = rb_ary_new3(6,
605
627
  (is_parameter_entity ? Qtrue : Qfalse),
606
- TO_(rb_str_new((char*)value, value_length)),
607
- (base ? TO_(rb_str_new2((char*)base)) : Qnil),
608
- (systemId ? TO_(rb_str_new2((char*)systemId)) : Qnil),
609
- (publicId ? TO_(rb_str_new2((char*)publicId)) : Qnil),
610
- (notationName ? TO_(rb_str_new2((char*)notationName))
628
+ TO_(ENC_(rb_str_new((char*)value, value_length))),
629
+ (base ? TO_(ENC_(rb_str_new2((char*)base))) : Qnil),
630
+ (systemId ? TO_(ENC_(rb_str_new2((char*)systemId))) : Qnil),
631
+ (publicId ? TO_(ENC_(rb_str_new2((char*)publicId))) : Qnil),
632
+ (notationName ? TO_(ENC_(rb_str_new2((char*)notationName)))
611
633
  : Qnil));
612
634
  rb_yield(rb_ary_new3(4, symENTITY_DECL,
613
- TO_(rb_str_new2(entityName)),
635
+ TO_(ENC_(rb_str_new2(entityName))),
614
636
  valary, recv));
615
637
  if (parser->defaultCurrent) {
616
638
  parser->defaultCurrent = 0;
@@ -632,11 +654,11 @@ iterExternalParsedEntityDeclHandler(void *recv,
632
654
  VALUE valary;
633
655
 
634
656
  GET_PARSER(recv, parser);
635
- valary = rb_ary_new3(3, (base ? TO_(rb_str_new2((char*)base)) : Qnil),
636
- TO_(rb_str_new2((char*)systemId)),
637
- (publicId ? TO_(rb_str_new2((char*)publicId)) : Qnil));
657
+ valary = rb_ary_new3(3, (base ? TO_(ENC_(rb_str_new2((char*)base))) : Qnil),
658
+ TO_(ENC_(rb_str_new2((char*)systemId))),
659
+ (publicId ? TO_(ENC_(rb_str_new2((char*)publicId))) : Qnil));
638
660
  rb_yield(rb_ary_new3(4, symEXTERNAL_PARSED_ENTITY_DECL,
639
- TO_(rb_str_new2((char*)entityName)),
661
+ TO_(ENC_(rb_str_new2((char*)entityName))),
640
662
  valary, recv));
641
663
  if (parser->defaultCurrent) {
642
664
  parser->defaultCurrent = 0;
@@ -653,9 +675,9 @@ iterInternalParsedEntityDeclHandler(void *recv,
653
675
  XMLParser* parser;
654
676
  GET_PARSER(recv, parser);
655
677
  rb_yield(rb_ary_new3(4, symINTERNAL_PARSED_ENTITY_DECL,
656
- TO_(rb_str_new2((char*)entityName)),
657
- TO_(rb_str_new((char*)replacementText,
658
- replacementTextLength)), recv));
678
+ TO_(ENC_(rb_str_new2((char*)entityName))),
679
+ TO_(ENC_(rb_str_new((char*)replacementText,
680
+ replacementTextLength))), recv));
659
681
  if (parser->defaultCurrent) {
660
682
  parser->defaultCurrent = 0;
661
683
  XML_DefaultCurrent(parser->parser);
@@ -672,7 +694,7 @@ iterSkippedEntityHandler(void *recv,
672
694
  XMLParser* parser;
673
695
  GET_PARSER(recv, parser);
674
696
  rb_yield(rb_ary_new3(4, symSKIPPED_ENTITY,
675
- TO_(rb_str_new2((char*)entityName)),
697
+ TO_(ENC_(rb_str_new2((char*)entityName))),
676
698
  INT2FIX(is_parameter_entity), recv));
677
699
  if (parser->defaultCurrent) {
678
700
  parser->defaultCurrent = 0;
@@ -700,11 +722,11 @@ myStartElementHandler(void *recv,
700
722
  const char* key = *atts++;
701
723
  const char* val = *atts++;
702
724
  rb_hash_aset(attrhash,
703
- FO_(TO_(rb_str_new2((char*)key))),
704
- TO_(rb_str_new2((char*)val)));
725
+ FO_(TO_(ENC_(rb_str_new2((char*)key)))),
726
+ TO_(ENC_(rb_str_new2((char*)val))));
705
727
  }
706
728
  rb_funcall((VALUE)recv, id_startElementHandler, 2,
707
- TO_(rb_str_new2((char*)name)), attrhash);
729
+ TO_(ENC_(rb_str_new2((char*)name))), attrhash);
708
730
  }
709
731
 
710
732
  static void
@@ -714,7 +736,7 @@ myEndElementHandler(void *recv,
714
736
  XMLParser* parser;
715
737
  GET_PARSER(recv, parser);
716
738
  rb_funcall((VALUE)recv, id_endElementHandler, 1,
717
- TO_(rb_str_new2((char*)name)));
739
+ TO_(ENC_(rb_str_new2((char*)name))));
718
740
  }
719
741
 
720
742
  static void
@@ -725,7 +747,7 @@ myCharacterDataHandler(void *recv,
725
747
  XMLParser* parser;
726
748
  GET_PARSER(recv, parser);
727
749
  rb_funcall((VALUE)recv, id_characterDataHandler, 1,
728
- TO_(rb_str_new((char*)s, len)));
750
+ TO_(ENC_(rb_str_new((char*)s, len))));
729
751
  }
730
752
 
731
753
  static void
@@ -736,8 +758,8 @@ myProcessingInstructionHandler(void *recv,
736
758
  XMLParser* parser;
737
759
  GET_PARSER(recv, parser);
738
760
  rb_funcall((VALUE)recv, id_processingInstructionHandler, 2,
739
- TO_(rb_str_new2((char*)target)),
740
- TO_(rb_str_new2((char*)data)));
761
+ TO_(ENC_(rb_str_new2((char*)target))),
762
+ TO_(ENC_(rb_str_new2((char*)data))));
741
763
  }
742
764
 
743
765
  static void
@@ -748,7 +770,7 @@ myDefaultHandler(void *recv,
748
770
  XMLParser* parser;
749
771
  GET_PARSER(recv, parser);
750
772
  rb_funcall((VALUE)recv, id_defaultHandler, 1,
751
- TO_(rb_str_new((char*)s, len)));
773
+ TO_(ENC_(rb_str_new((char*)s, len))));
752
774
  }
753
775
 
754
776
  #ifdef NEW_EXPAT
@@ -760,7 +782,7 @@ myDefaultExpandHandler(void *recv,
760
782
  XMLParser* parser;
761
783
  GET_PARSER(recv, parser);
762
784
  rb_funcall((VALUE)recv, id_defaultExpandHandler, 1,
763
- TO_(rb_str_new((char*)s, len)));
785
+ TO_(ENC_(rb_str_new((char*)s, len))));
764
786
  }
765
787
  #endif
766
788
 
@@ -775,11 +797,11 @@ myUnparsedEntityDeclHandler(void *recv,
775
797
  XMLParser* parser;
776
798
  GET_PARSER(recv, parser);
777
799
  rb_funcall((VALUE)recv, id_unparsedEntityDeclHandler, 5,
778
- TO_(rb_str_new2((char*)entityName)),
779
- (base ? TO_(rb_str_new2((char*)base)) : Qnil),
780
- TO_(rb_str_new2((char*)systemId)),
781
- (publicId ? TO_(rb_str_new2((char*)publicId)) : Qnil),
782
- TO_(rb_str_new2((char*)notationName)));
800
+ TO_(ENC_(rb_str_new2((char*)entityName))),
801
+ (base ? TO_(ENC_(rb_str_new2((char*)base))) : Qnil),
802
+ TO_(ENC_(rb_str_new2((char*)systemId))),
803
+ (publicId ? TO_(ENC_(rb_str_new2((char*)publicId))) : Qnil),
804
+ TO_(ENC_(rb_str_new2((char*)notationName))));
783
805
  }
784
806
 
785
807
  void
@@ -792,10 +814,10 @@ myNotationDeclHandler(void *recv,
792
814
  XMLParser* parser;
793
815
  GET_PARSER(recv, parser);
794
816
  rb_funcall((VALUE)recv, id_notationDeclHandler, 4,
795
- TO_(rb_str_new2((char*)notationName)),
796
- (base ? TO_(rb_str_new2((char*)base)) : Qnil),
797
- (systemId ? TO_(rb_str_new2((char*)systemId)) : Qnil),
798
- (publicId ? TO_(rb_str_new2((char*)publicId)) : Qnil));
817
+ TO_(ENC_(rb_str_new2((char*)notationName))),
818
+ (base ? TO_(ENC_(rb_str_new2((char*)base))) : Qnil),
819
+ (systemId ? TO_(ENC_(rb_str_new2((char*)systemId))) : Qnil),
820
+ (publicId ? TO_(ENC_(rb_str_new2((char*)publicId))) : Qnil));
799
821
  }
800
822
 
801
823
  int
@@ -812,10 +834,10 @@ myExternalEntityRefHandler(XML_Parser xmlparser,
812
834
  recv = (VALUE)XML_GetUserData(xmlparser);
813
835
  GET_PARSER(recv, parser);
814
836
  ret = rb_funcall(recv, id_externalEntityRefHandler, 4,
815
- (context ? TO_(rb_str_new2((char*)context)): Qnil),
816
- (base ? TO_(rb_str_new2((char*)base)) : Qnil),
817
- (systemId ? TO_(rb_str_new2((char*)systemId)) : Qnil),
818
- (publicId ? TO_(rb_str_new2((char*)publicId)) : Qnil));
837
+ (context ? TO_(ENC_(rb_str_new2((char*)context))): Qnil),
838
+ (base ? TO_(ENC_(rb_str_new2((char*)base))) : Qnil),
839
+ (systemId ? TO_(ENC_(rb_str_new2((char*)systemId))) : Qnil),
840
+ (publicId ? TO_(ENC_(rb_str_new2((char*)publicId))) : Qnil));
819
841
  /* The error status in this handler should be returned
820
842
  by the exception. */
821
843
  return Qnil;
@@ -829,7 +851,7 @@ myCommentHandler(void *recv,
829
851
  XMLParser* parser;
830
852
  GET_PARSER(recv, parser);
831
853
  rb_funcall((VALUE)recv, id_commentHandler, 1,
832
- TO_(rb_str_new2((char*)s)));
854
+ TO_(ENC_(rb_str_new2((char*)s))));
833
855
  }
834
856
 
835
857
  static void
@@ -856,8 +878,8 @@ myStartNamespaceDeclHandler(void *recv,
856
878
  XMLParser* parser;
857
879
  GET_PARSER(recv, parser);
858
880
  rb_funcall((VALUE)recv, id_startNamespaceDeclHandler, 2,
859
- (prefix ? TO_(rb_str_new2((char*)prefix)) : Qnil),
860
- (uri ? TO_(rb_str_new2((char*)uri)) : Qnil));
881
+ (prefix ? TO_(ENC_(rb_str_new2((char*)prefix))) : Qnil),
882
+ (uri ? TO_(ENC_(rb_str_new2((char*)uri))) : Qnil));
861
883
  }
862
884
 
863
885
  static void
@@ -867,7 +889,7 @@ myEndNamespaceDeclHandler(void *recv,
867
889
  XMLParser* parser;
868
890
  GET_PARSER(recv, parser);
869
891
  rb_funcall((VALUE)recv, id_endNamespaceDeclHandler, 1,
870
- (prefix ? TO_(rb_str_new2((char*)prefix)) : Qnil));
892
+ (prefix ? TO_(ENC_(rb_str_new2((char*)prefix))) : Qnil));
871
893
  }
872
894
 
873
895
  static int
@@ -900,13 +922,13 @@ myStartDoctypeDeclHandler(void *recv,
900
922
  GET_PARSER(recv, parser);
901
923
  #ifdef HAVE_EXPAT_H
902
924
  rb_funcall((VALUE)recv, id_startDoctypeDeclHandler, 4,
903
- TO_(rb_str_new2((char*)doctypeName)),
904
- (sysid ? TO_(rb_str_new2((char*)sysid)) : Qnil),
905
- (pubid ? TO_(rb_str_new2((char*)pubid)) : Qnil),
925
+ TO_(ENC_(rb_str_new2((char*)doctypeName))),
926
+ (sysid ? TO_(ENC_(rb_str_new2((char*)sysid))) : Qnil),
927
+ (pubid ? TO_(ENC_(rb_str_new2((char*)pubid))) : Qnil),
906
928
  (has_internal_subset ? Qtrue : Qfalse));
907
929
  #else
908
930
  rb_funcall((VALUE)recv, id_startDoctypeDeclHandler, 4,
909
- TO_(rb_str_new2((char*)doctypeName)),
931
+ TO_(ENC_(rb_str_new2((char*)doctypeName))),
910
932
  Qnil, Qnil, Qfalse);
911
933
  #endif
912
934
  }
@@ -929,10 +951,11 @@ myElementDeclHandler(void *recv,
929
951
  XML_Content *model)
930
952
  {
931
953
  XMLParser* parser;
954
+ VALUE content;
932
955
  GET_PARSER(recv, parser);
933
- VALUE content = makeContentArray(parser, model);
956
+ content = makeContentArray(parser, model);
934
957
  rb_funcall((VALUE)recv, id_elementDeclHandler, 2,
935
- TO_(rb_str_new2(name)), content);
958
+ TO_(ENC_(rb_str_new2(name))), content);
936
959
  }
937
960
 
938
961
  static void
@@ -946,10 +969,10 @@ myAttlistDeclHandler (void *recv,
946
969
  XMLParser* parser;
947
970
  GET_PARSER(recv, parser);
948
971
  rb_funcall((VALUE)recv, id_attlistDeclHandler, 5,
949
- TO_(rb_str_new2(elname)),
950
- TO_(rb_str_new2((char*)attname)),
951
- TO_(rb_str_new2((char*)att_type)),
952
- (dflt ? TO_(rb_str_new2((char*)dflt)) : Qnil),
972
+ TO_(ENC_(rb_str_new2(elname))),
973
+ TO_(ENC_(rb_str_new2((char*)attname))),
974
+ TO_(ENC_(rb_str_new2((char*)att_type))),
975
+ (dflt ? TO_(ENC_(rb_str_new2((char*)dflt))) : Qnil),
953
976
  (isrequired ? Qtrue : Qfalse));
954
977
  }
955
978
 
@@ -962,8 +985,8 @@ myXmlDeclHandler (void *recv,
962
985
  XMLParser* parser;
963
986
  GET_PARSER(recv, parser);
964
987
  rb_funcall((VALUE)recv, id_xmlDeclHandler, 3,
965
- (version ? TO_(rb_str_new2(version)) : Qnil),
966
- (encoding ? TO_(rb_str_new2((char*)encoding)) : Qnil),
988
+ (version ? TO_(ENC_(rb_str_new2(version))) : Qnil),
989
+ (encoding ? TO_(ENC_(rb_str_new2((char*)encoding))) : Qnil),
967
990
  INT2FIX(standalone));
968
991
  }
969
992
 
@@ -981,13 +1004,13 @@ myEntityDeclHandler (void *recv,
981
1004
  XMLParser* parser;
982
1005
  GET_PARSER(recv, parser);
983
1006
  rb_funcall((VALUE)recv, id_entityDeclHandler, 7,
984
- TO_(rb_str_new2(entityName)),
1007
+ TO_(ENC_(rb_str_new2(entityName))),
985
1008
  (is_parameter_entity ? Qtrue : Qfalse),
986
- TO_(rb_str_new((char*)value, value_length)),
987
- (base ? TO_(rb_str_new2((char*)base)) : Qnil),
988
- (systemId ? TO_(rb_str_new2((char*)systemId)) : Qnil),
989
- (publicId ? TO_(rb_str_new2((char*)publicId)) : Qnil),
990
- (notationName ? TO_(rb_str_new2((char*)notationName))
1009
+ TO_(ENC_(rb_str_new((char*)value, value_length))),
1010
+ (base ? TO_(ENC_(rb_str_new2((char*)base))) : Qnil),
1011
+ (systemId ? TO_(ENC_(rb_str_new2((char*)systemId))) : Qnil),
1012
+ (publicId ? TO_(ENC_(rb_str_new2((char*)publicId))) : Qnil),
1013
+ (notationName ? TO_(ENC_(rb_str_new2((char*)notationName)))
991
1014
  : Qnil));
992
1015
  }
993
1016
 
@@ -1004,10 +1027,10 @@ myExternalParsedEntityDeclHandler(void *recv,
1004
1027
  XMLParser* parser;
1005
1028
  GET_PARSER(recv, parser);
1006
1029
  rb_funcall((VALUE)recv, id_externalParsedEntityDeclHandler, 4,
1007
- TO_(rb_str_new2((char*)entityName)),
1008
- (base ? TO_(rb_str_new2((char*)base)) : Qnil),
1009
- TO_(rb_str_new2((char*)systemId)),
1010
- (publicId ? TO_(rb_str_new2((char*)publicId)) : Qnil));
1030
+ TO_(ENC_(rb_str_new2((char*)entityName))),
1031
+ (base ? TO_(ENC_(rb_str_new2((char*)base))) : Qnil),
1032
+ TO_(ENC_(rb_str_new2((char*)systemId))),
1033
+ (publicId ? TO_(ENC_(rb_str_new2((char*)publicId))) : Qnil));
1011
1034
  }
1012
1035
 
1013
1036
  static void
@@ -1019,9 +1042,9 @@ myInternalParsedEntityDeclHandler(void *recv,
1019
1042
  XMLParser* parser;
1020
1043
  GET_PARSER(recv, parser);
1021
1044
  rb_funcall((VALUE)recv, id_internalParsedEntityDeclHandler, 2,
1022
- TO_(rb_str_new2((char*)entityName)),
1023
- TO_(rb_str_new((char*)replacementText,
1024
- replacementTextLength)));
1045
+ TO_(ENC_(rb_str_new2((char*)entityName))),
1046
+ TO_(ENC_(rb_str_new((char*)replacementText,
1047
+ replacementTextLength))));
1025
1048
  }
1026
1049
  #endif
1027
1050
 
@@ -1043,21 +1066,21 @@ myEncodingConv(void *data, const char *s)
1043
1066
  {
1044
1067
  VALUE v;
1045
1068
  int len;
1046
- int slen = RSTRING(rb_ivar_get((VALUE)data,
1047
- id_map))->ptr[*(unsigned char*)s];
1069
+ int slen = RSTRING_PTR(rb_ivar_get((VALUE)data,
1070
+ id_map))[*(unsigned char*)s];
1048
1071
 
1049
- v = rb_funcall((VALUE)data, id_convert, 1, rb_str_new((char*)s, -slen));
1072
+ v = rb_funcall((VALUE)data, id_convert, 1, ENC_(rb_str_new((char*)s, -slen)));
1050
1073
  switch (TYPE(v)) {
1051
1074
  case T_FIXNUM:
1052
1075
  return FIX2INT(v);
1053
1076
  case T_STRING:
1054
- len = RSTRING(v)->len;
1077
+ len = RSTRING_LEN(v);
1055
1078
  if (len == 1) {
1056
- return (unsigned char)*(RSTRING(v)->ptr);
1079
+ return (unsigned char)*RSTRING_PTR(v);
1057
1080
  }
1058
1081
  else if (len >= 2) {
1059
- return (unsigned char)*(RSTRING(v)->ptr) |
1060
- (unsigned char)*(RSTRING(v)->ptr + 1) << 8;
1082
+ return (unsigned char)*RSTRING_PTR(v) |
1083
+ (unsigned char)*(RSTRING_PTR(v) + 1) << 8;
1061
1084
  }
1062
1085
  }
1063
1086
  return 0;
@@ -1077,7 +1100,7 @@ iterUnknownEncodingHandler(void *recv,
1077
1100
 
1078
1101
  GET_PARSER(recv, parser);
1079
1102
  ret = rb_yield(rb_ary_new3(4, symUNKNOWN_ENCODING,
1080
- TO_(rb_str_new2((char*)name)), Qnil, recv));
1103
+ TO_(ENC_(rb_str_new2((char*)name))), Qnil, recv));
1081
1104
  if (TYPE(ret) == T_OBJECT && rb_obj_is_kind_of(ret, cXMLEncoding)) {
1082
1105
  int i;
1083
1106
  ID mid = rb_intern("map");
@@ -1086,7 +1109,7 @@ iterUnknownEncodingHandler(void *recv,
1086
1109
 
1087
1110
  for (i = 0; i < 256; i++) {
1088
1111
  VALUE m = rb_funcall(ret, mid, 1, INT2FIX(i));
1089
- RSTRING(cmap)->ptr[i] = info->map[i] = FIX2INT(m);
1112
+ RSTRING_PTR(cmap)[i] = info->map[i] = FIX2INT(m);
1090
1113
  }
1091
1114
  /* protect object form GC */
1092
1115
  rb_ivar_set(recv, rb_intern("_encoding"), ret);
@@ -1241,8 +1264,6 @@ findEncoding(const char* encname)
1241
1264
  file[len] = tolower(*p);
1242
1265
  }
1243
1266
  file[len] = '\0';
1244
- // if (len < PATH_MAX - sizeof(encext))
1245
- // strcat(file, encext);
1246
1267
  strncat(file, encext, PATH_MAX - len -1);
1247
1268
 
1248
1269
  if ((fp = fopen(file, "rb")) == NULL) {
@@ -1274,6 +1295,10 @@ myUnknownEncodingHandler(void *recv,
1274
1295
  {
1275
1296
  XMLParser* parser;
1276
1297
  VALUE ret;
1298
+
1299
+ GET_PARSER(recv, parser);
1300
+ parser->detectedEncoding = name;
1301
+
1277
1302
  if (!rb_method_boundp(CLASS_OF((VALUE)recv), id_unknownEncoding, 0))
1278
1303
  #ifndef XML_ENC_PATH
1279
1304
  return 0;
@@ -1293,9 +1318,8 @@ myUnknownEncodingHandler(void *recv,
1293
1318
  }
1294
1319
  #endif
1295
1320
 
1296
- GET_PARSER(recv, parser);
1297
1321
  ret = rb_funcall((VALUE)recv, id_unknownEncoding, 1,
1298
- TO_(rb_str_new2((char*)name)));
1322
+ TO_(ENC_(rb_str_new2((char*)name))));
1299
1323
  if (TYPE(ret) == T_OBJECT && rb_obj_is_kind_of(ret, cXMLEncoding)) {
1300
1324
  int i;
1301
1325
  ID mid = rb_intern("map");
@@ -1308,7 +1332,7 @@ myUnknownEncodingHandler(void *recv,
1308
1332
 
1309
1333
  for (i = 0; i < 256; i++) {
1310
1334
  VALUE m = rb_funcall(ret, mid, 1, INT2FIX(i));
1311
- RSTRING(cmap)->ptr[i] = info->map[i] = FIX2INT(m);
1335
+ RSTRING_PTR(cmap)[i] = info->map[i] = FIX2INT(m);
1312
1336
  }
1313
1337
  /* protect object form GC */
1314
1338
  rb_ivar_set((VALUE)recv, rb_intern("_encoding"), ret);
@@ -1330,7 +1354,7 @@ mySkippedEntityHandler(void *recv,
1330
1354
  XMLParser* parser;
1331
1355
  GET_PARSER(recv, parser);
1332
1356
  rb_funcall((VALUE)recv, id_skippedEntityHandler, 2,
1333
- TO_(rb_str_new2((char*)entityName)),
1357
+ TO_(ENC_(rb_str_new2((char*)entityName))),
1334
1358
  INT2FIX(is_parameter_entity));
1335
1359
  }
1336
1360
  #endif
@@ -1359,7 +1383,7 @@ XMLParser_new(int argc, VALUE* argv, VALUE klass)
1359
1383
  /* new(encoding) */
1360
1384
  if (TYPE(arg1) != T_NIL) {
1361
1385
  Check_Type(arg1, T_STRING); /* encoding */
1362
- encoding = RSTRING(arg1)->ptr;
1386
+ encoding = RSTRING_PTR(arg1);
1363
1387
  }
1364
1388
  }
1365
1389
  else if (count == 2) {
@@ -1369,10 +1393,10 @@ XMLParser_new(int argc, VALUE* argv, VALUE klass)
1369
1393
  if (TYPE(arg1) != T_DATA) {
1370
1394
  if (TYPE(arg1) != T_NIL) {
1371
1395
  Check_Type(arg1, T_STRING); /* encoding */
1372
- encoding = RSTRING(arg1)->ptr;
1396
+ encoding = RSTRING_PTR(arg1);
1373
1397
  }
1374
1398
  Check_Type(arg2, T_STRING); /* nschar */
1375
- nssep = RSTRING(arg2)->ptr;
1399
+ nssep = RSTRING_PTR(arg2);
1376
1400
  }
1377
1401
  else {
1378
1402
  #endif
@@ -1380,7 +1404,7 @@ XMLParser_new(int argc, VALUE* argv, VALUE klass)
1380
1404
  GET_PARSER(arg1, rootparser);
1381
1405
  if (!NIL_P(arg2)) {
1382
1406
  Check_Type(arg2, T_STRING); /* context */
1383
- context = RSTRING(arg2)->ptr;
1407
+ context = RSTRING_PTR(arg2);
1384
1408
  }
1385
1409
  parent = arg1;
1386
1410
  #ifdef NEW_EXPAT
@@ -1393,10 +1417,10 @@ XMLParser_new(int argc, VALUE* argv, VALUE klass)
1393
1417
  GET_PARSER(arg1, rootparser);
1394
1418
  if (!NIL_P(arg2)) {
1395
1419
  Check_Type(arg2, T_STRING); /* context */
1396
- context = RSTRING(arg2)->ptr;
1420
+ context = RSTRING_PTR(arg2);
1397
1421
  }
1398
1422
  Check_Type(arg3, T_STRING); /* encoding */
1399
- encoding = RSTRING(arg3)->ptr;
1423
+ encoding = RSTRING_PTR(arg3);
1400
1424
  parent = arg1;
1401
1425
  }
1402
1426
 
@@ -1465,6 +1489,7 @@ XMLParser_new(int argc, VALUE* argv, VALUE klass)
1465
1489
  parser->lastAttrs = NULL;
1466
1490
  #endif
1467
1491
  parser->parent = parent;
1492
+ parser->detectedEncoding = NULL;
1468
1493
 
1469
1494
  rb_obj_call_init(obj, argc, argv);
1470
1495
 
@@ -1491,7 +1516,7 @@ XMLParser_reset(int argc, VALUE* argv, VALUE obj)
1491
1516
  GET_PARSER(obj, parser);
1492
1517
  if (count > 0 && TYPE(vencoding) != T_NIL) {
1493
1518
  Check_Type(vencoding, T_STRING);
1494
- encoding = RSTRING(vencoding)->ptr;
1519
+ encoding = RSTRING_PTR(vencoding);
1495
1520
  }
1496
1521
  XML_ParserReset(parser->parser, encoding);
1497
1522
  /* setting up internal data */
@@ -1502,17 +1527,14 @@ XMLParser_reset(int argc, VALUE* argv, VALUE obj)
1502
1527
  parser->lastAttrs = NULL;
1503
1528
  #endif
1504
1529
  parser->tainted = 0;
1530
+ parser->detectedEncoding = NULL;
1505
1531
 
1506
1532
  return obj;
1507
1533
  }
1508
1534
  #endif
1509
1535
 
1510
- /* parse method */
1511
- static VALUE
1512
- XMLParser_parse(int argc, VALUE* argv, VALUE obj)
1513
- {
1514
- XMLParser* parser;
1515
- int ret;
1536
+ static void
1537
+ setup_evnet_handlers(XMLParser* parser, VALUE obj) {
1516
1538
  XML_StartElementHandler start = NULL;
1517
1539
  XML_EndElementHandler end = NULL;
1518
1540
  #ifdef NEW_EXPAT
@@ -1525,38 +1547,6 @@ XMLParser_parse(int argc, VALUE* argv, VALUE obj)
1525
1547
  XML_StartDoctypeDeclHandler startDoctype = NULL;
1526
1548
  XML_EndDoctypeDeclHandler endDoctype = NULL;
1527
1549
  #endif
1528
- VALUE str;
1529
- VALUE isFinal;
1530
- int final = 1;
1531
- int count;
1532
- int fromStream = 0;
1533
- ID mid = rb_intern("gets");
1534
- ID linebuf = rb_intern("_linebuf");
1535
-
1536
- count = rb_scan_args(argc, argv, "02", &str, &isFinal);
1537
- /* If "str" has public "gets" method, it will be considered *stream* */
1538
- if (!rb_obj_is_kind_of(str, rb_cString) &&
1539
- rb_method_boundp(CLASS_OF(str), mid, 1)) {
1540
- fromStream = 1;
1541
- }
1542
- else if (!NIL_P(str)) {
1543
- Check_Type(str, T_STRING);
1544
- }
1545
- if (count >= 2) {
1546
- if (isFinal == Qtrue)
1547
- final = 1;
1548
- else if (isFinal == Qfalse)
1549
- final = 0;
1550
- else
1551
- rb_raise(rb_eTypeError, "not valid value");
1552
- }
1553
-
1554
- GET_PARSER(obj, parser);
1555
-
1556
- // parser->iterator = rb_iterator_p();
1557
- parser->iterator = rb_block_given_p();
1558
-
1559
- /* Setup event handlers */
1560
1550
 
1561
1551
  /* Call as iterator */
1562
1552
  if (parser->iterator) {
@@ -1726,6 +1716,47 @@ XMLParser_parse(int argc, VALUE* argv, VALUE obj)
1726
1716
  XML_SetSkippedEntityHandler(parser->parser, mySkippedEntityHandler);
1727
1717
  #endif
1728
1718
  }
1719
+ }
1720
+
1721
+
1722
+ /* parse method */
1723
+ static VALUE
1724
+ XMLParser_parse(int argc, VALUE* argv, VALUE obj)
1725
+ {
1726
+ XMLParser* parser;
1727
+ int ret;
1728
+ VALUE str;
1729
+ VALUE isFinal;
1730
+ int final = 1;
1731
+ int count;
1732
+ int fromStream = 0;
1733
+ ID mid = rb_intern("gets");
1734
+ ID linebuf = rb_intern("_linebuf");
1735
+
1736
+ count = rb_scan_args(argc, argv, "02", &str, &isFinal);
1737
+ /* If "str" has public "gets" method, it will be considered *stream* */
1738
+ if (!rb_obj_is_kind_of(str, rb_cString) &&
1739
+ rb_method_boundp(CLASS_OF(str), mid, 1)) {
1740
+ fromStream = 1;
1741
+ }
1742
+ else if (!NIL_P(str)) {
1743
+ Check_Type(str, T_STRING);
1744
+ }
1745
+ if (count >= 2) {
1746
+ if (isFinal == Qtrue)
1747
+ final = 1;
1748
+ else if (isFinal == Qfalse)
1749
+ final = 0;
1750
+ else
1751
+ rb_raise(rb_eTypeError, "not valid value");
1752
+ }
1753
+
1754
+ GET_PARSER(obj, parser);
1755
+
1756
+ parser->iterator = rb_block_given_p();
1757
+
1758
+ /* Setup event handlers */
1759
+ setup_evnet_handlers(parser, obj);
1729
1760
 
1730
1761
  /* Parse from stream (probably slightly slow) */
1731
1762
  if (fromStream) {
@@ -1741,7 +1772,7 @@ XMLParser_parse(int argc, VALUE* argv, VALUE obj)
1741
1772
  taintParser(parser);
1742
1773
  rb_ivar_set(obj, linebuf, buf); /* protect buf from GC (reasonable?)*/
1743
1774
  ret = XML_Parse(parser->parser,
1744
- RSTRING(buf)->ptr, RSTRING(buf)->len, 0);
1775
+ RSTRING_PTR(buf), RSTRING_LEN(buf), 0);
1745
1776
  }
1746
1777
  else {
1747
1778
  ret = XML_Parse(parser->parser, NULL, 0, 1);
@@ -1757,10 +1788,41 @@ XMLParser_parse(int argc, VALUE* argv, VALUE obj)
1757
1788
 
1758
1789
  /* Parse string */
1759
1790
  if (!NIL_P(str)) {
1791
+ #if defined(HAVE_RUBY_ENCODING_H) && defined(HAVE_XML_PARSERRESET)
1792
+ int err;
1793
+ #endif
1760
1794
  if (OBJ_TAINTED(str))
1761
1795
  taintParser(parser);
1762
1796
  ret = XML_Parse(parser->parser,
1763
- RSTRING(str)->ptr, RSTRING(str)->len, final);
1797
+ RSTRING_PTR(str), RSTRING_LEN(str), final);
1798
+ #if defined(HAVE_RUBY_ENCODING_H) && defined(HAVE_XML_PARSERRESET)
1799
+ /* Ruby 1.9.1 Encoding conversion */
1800
+ err = XML_GetErrorCode(parser->parser);
1801
+ if (final && err == XML_ERROR_UNKNOWN_ENCODING) {
1802
+ rb_encoding* enc;
1803
+ volatile VALUE encobj;
1804
+ volatile VALUE ustr;
1805
+ enc = rb_enc_find(parser->detectedEncoding);
1806
+ if ((int)ENC_TO_ENCINDEX(enc) != rb_ascii8bit_encindex()) {
1807
+ rb_enc_associate(str, enc);
1808
+ encobj = rb_enc_from_encoding(enc_xml);
1809
+ /* rb_str_encode may raises an exception */
1810
+ ustr = rb_str_encode(str, encobj, 0, Qnil);
1811
+ if (!NIL_P(ustr)) {
1812
+ XML_ParserReset(parser->parser, "utf-8");
1813
+ XML_SetUserData(parser->parser, (void*)obj);
1814
+ parser->defaultCurrent = 0;
1815
+ #ifdef NEW_EXPAT
1816
+ parser->lastAttrs = NULL;
1817
+ #endif
1818
+ parser->detectedEncoding = NULL;
1819
+ setup_evnet_handlers(parser, obj);
1820
+ ret = XML_Parse(parser->parser,
1821
+ RSTRING_PTR(ustr), RSTRING_LEN(ustr), final);
1822
+ }
1823
+ }
1824
+ }
1825
+ #endif
1764
1826
  }
1765
1827
  else
1766
1828
  ret = XML_Parse(parser->parser, NULL, 0, final);
@@ -1853,7 +1915,7 @@ XMLParser_setBase(VALUE obj, VALUE base)
1853
1915
  GET_PARSER(obj, parser);
1854
1916
  if (OBJ_TAINTED(base))
1855
1917
  taintParser(parser);
1856
- ret = XML_SetBase(parser->parser, RSTRING(base)->ptr);
1918
+ ret = XML_SetBase(parser->parser, RSTRING_PTR(base));
1857
1919
 
1858
1920
  return INT2FIX(ret);
1859
1921
  }
@@ -1870,7 +1932,7 @@ XMLParser_getBase(VALUE obj)
1870
1932
  if (!ret)
1871
1933
  return Qnil;
1872
1934
 
1873
- return TO_(rb_str_new2((char*)ret));
1935
+ return TO_(ENC_(rb_str_new2((char*)ret)));
1874
1936
  }
1875
1937
 
1876
1938
  #ifdef NEW_EXPAT
@@ -1892,7 +1954,7 @@ XMLParser_getSpecifiedAttributes(VALUE obj)
1892
1954
  while (*atts) {
1893
1955
  const char* key = *atts++;
1894
1956
  atts++;
1895
- rb_hash_aset(attrhash, FO_(TO_(rb_str_new2((char*)key))),
1957
+ rb_hash_aset(attrhash, FO_(TO_(ENC_(rb_str_new2((char*)key)))),
1896
1958
  (count-- > 0) ? Qtrue: Qfalse);
1897
1959
  }
1898
1960
 
@@ -1915,7 +1977,7 @@ XMLParser_getSpecifiedAttributes(VALUE obj)
1915
1977
  attrarray = rb_ary_new2(count);
1916
1978
  for (i = 0; i < count; i++, atts+=2) {
1917
1979
  const char* key = *atts;
1918
- rb_ary_push(attrarray, TO_(rb_str_new2((char*)key)));
1980
+ rb_ary_push(attrarray, TO_(ENC_(rb_str_new2((char*)key))));
1919
1981
  }
1920
1982
 
1921
1983
  return attrarray;
@@ -1951,13 +2013,13 @@ static VALUE
1951
2013
  XMLParser_s_expatVersion(VALUE obj)
1952
2014
  {
1953
2015
  #if defined(HAVE_EXPAT_H)
1954
- return rb_str_new2(XML_ExpatVersion());
2016
+ return ENC_(rb_str_new2(XML_ExpatVersion()));
1955
2017
  #elif defined(EXPAT_1_2)
1956
- return rb_str_new2("1.2");
2018
+ return ENC_(rb_str_new2("1.2"));
1957
2019
  #elif defined(NEW_EXPAT)
1958
- return rb_str_new2("1.1");
2020
+ return ENC_(rb_str_new2("1.1"));
1959
2021
  #else
1960
- return rb_str_new2("1.0");
2022
+ return ENC_(rb_str_new2("1.0"));
1961
2023
  #endif
1962
2024
  }
1963
2025
 
@@ -2003,7 +2065,7 @@ XMLParser_getInputContext(VALUE obj)
2003
2065
  &size);
2004
2066
  if (buffer && size > 0) {
2005
2067
  ret = rb_ary_new3(2,
2006
- TO_(rb_str_new(buffer, size)),
2068
+ TO_(ENC_(rb_str_new(buffer, size))),
2007
2069
  INT2FIX(offset));
2008
2070
  }
2009
2071
 
@@ -2025,7 +2087,7 @@ XMLParser_getIdAttrribute(VALUE obj)
2025
2087
  idattr = XML_GetIdAttributeIndex(parser->parser);
2026
2088
  if (idattr < 0)
2027
2089
  return Qnil;
2028
- return TO_(rb_str_new2((char*)atts[idattr]));
2090
+ return TO_(ENC_(rb_str_new2((char*)atts[idattr])));
2029
2091
  }
2030
2092
  #endif
2031
2093
 
@@ -2066,7 +2128,7 @@ XMLParser_s_getFeatureList(VALUE obj)
2066
2128
 
2067
2129
  list = XML_GetFeatureList();
2068
2130
  while (list && list->feature) {
2069
- rb_hash_aset(ret, FO_(rb_str_new2(list->name)), INT2NUM(list->value));
2131
+ rb_hash_aset(ret, FO_(ENC_(rb_str_new2(list->name))), INT2NUM(list->value));
2070
2132
  list++;
2071
2133
  }
2072
2134
 
@@ -2079,6 +2141,10 @@ Init_xmlparser()
2079
2141
  {
2080
2142
  VALUE mXML;
2081
2143
 
2144
+ #ifdef HAVE_RUBY_ENCODING_H
2145
+ enc_xml = rb_utf8_encoding();
2146
+ #endif
2147
+
2082
2148
  eXMLParserError = rb_define_class("XMLParserError", rb_eStandardError);
2083
2149
  cXMLParser = rb_define_class("XMLParser", rb_cObject);
2084
2150
  cXMLEncoding = rb_define_class("XMLEncoding", rb_cObject);