xmlparser 0.6.81 → 0.7.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,58 @@
1
+ #
2
+ # ruby extconf.rb
3
+ # --with-perl-enc-map[=/path/to/enc-map]
4
+ # --with-expat-dir=/path/to/expat
5
+ # --with-expat-lib=/path/to/expat/lib
6
+ # --with-expat-include=/path/to/expat/include
7
+ #
8
+ require 'mkmf'
9
+
10
+ cwd=`pwd`.chomp!
11
+ perl= ENV['PERL'] || 'perl'
12
+
13
+ ## Encoding maps may be stored in $perl_archlib/XML/Parser/Encodins/
14
+ #perl_archlib = '/usr/lib/perl5/site_perl/5.005/i586-linux'
15
+ #perl_archlib = '/usr/local/lib'
16
+ perl_archlib = `#{perl} -e 'use Config; print $Config{"archlib"}'`
17
+ xml_enc_path = with_config("perl-enc-map")
18
+ if xml_enc_path == true
19
+ xml_enc_path = perl_archlib + "/XML/Parser/Encodings"
20
+ end
21
+
22
+ ##$CFLAGS="-I#{cwd}/expat/xmlparse -I#{cwd}/expat/xmltok" +
23
+ ## ' -DXML_ENC_PATH=getenv\(\"XML_ENC_PATH\"\)' +
24
+ ## " -DNEW_EXPAT"
25
+ #$CFLAGS = "-I#{cwd}/expat/xmlparse -I#{cwd}/expat/xmltok"
26
+ #$LDFLAGS = "-L#{cwd}/expat/xmlparse -Wl,-rpath,/usr/local/lib"
27
+ #$LDFLAGS = "-L#{cwd}/expat/xmlparse"
28
+ dir_config("expat")
29
+ #dir_config("xmltok")
30
+ #dir_config("xmlparse")
31
+ if xml_enc_path
32
+ $CFLAGS += " -DXML_ENC_PATH=\\\"#{xml_enc_path}\\\""
33
+ end
34
+
35
+ #if have_header("xmlparse.h") || have_header("expat.h")
36
+ if have_header("expat.h") || have_header("xmlparse.h")
37
+ if have_library("expat", "XML_ParserCreate") ||
38
+ have_library("xmltok", "XML_ParserCreate")
39
+ if have_func("XML_SetNotStandaloneHandler")
40
+ $CFLAGS += " -DNEW_EXPAT"
41
+ end
42
+ if have_func("XML_SetParamEntityParsing")
43
+ $CFLAGS += " -DXML_DTD"
44
+ end
45
+ # if have_func("XML_SetExternalParsedEntityDeclHandler")
46
+ # $CFLAGS += " -DEXPAT_1_2"
47
+ # end
48
+ have_func("XML_SetDoctypeDeclHandler")
49
+ have_func("XML_ParserReset")
50
+ have_func("XML_SetSkippedEntityHandler")
51
+ have_func("XML_GetFeatureList")
52
+ have_func("XML_UseForeignDTD")
53
+ have_func("XML_GetIdAttributeIndex")
54
+ have_library("socket", "ntohl")
55
+ have_library("wsock32") if RUBY_PLATFORM =~ /mswin32|mingw/
56
+ create_makefile("xmlparser")
57
+ end
58
+ end
@@ -1,5 +1,6 @@
1
1
  /*
2
2
  * Expat (XML Parser Toolkit) wrapper for Ruby
3
+ * Dec 15, 2009 yoshidam version 0.7.0 support Ruby 1.9.1
3
4
  * Feb 16, 2004 yoshidam version 0.6.8 taint output string
4
5
  * Feb 16, 2004 yoshidam version 0.6.7 fix buffer overflow
5
6
  * Mar 11, 2003 yoshidam version 0.6.6 fix skippedEntity handler
@@ -41,7 +42,11 @@
41
42
  */
42
43
 
43
44
  #include "ruby.h"
44
- #include "rubyio.h"
45
+ #ifdef HAVE_RUBY_IO_H
46
+ # include "ruby/io.h"
47
+ #else
48
+ # include "rubyio.h"
49
+ #endif
45
50
  #include <stdio.h>
46
51
  #include <ctype.h>
47
52
  #ifdef HAVE_EXPAT_H
@@ -58,6 +63,15 @@
58
63
  # endif
59
64
  #endif
60
65
 
66
+ #ifndef RSTRING_PTR
67
+ # define RSTRING_PTR(s) (RSTRING(s)->ptr)
68
+ # define RSTRING_LEN(s) (RSTRING(s)->len)
69
+ #endif
70
+
71
+ #ifdef HAVE_RUBY_ENCODING_H
72
+ static rb_encoding* enc_xml;
73
+ #endif
74
+
61
75
  static VALUE eXMLParserError;
62
76
  static VALUE cXMLParser;
63
77
  static VALUE cXMLEncoding;
@@ -112,6 +126,7 @@ typedef struct _XMLParser {
112
126
  int tainted;
113
127
  VALUE parent;
114
128
  char* context;
129
+ const XML_Char *detectedEncoding;
115
130
  } XMLParser;
116
131
 
117
132
  static VALUE symDEFAULT;
@@ -198,6 +213,12 @@ freezeObject(VALUE obj) {
198
213
  }
199
214
  #define FO_(o) (freezeObject(o))
200
215
 
216
+ #ifdef HAVE_RUBY_ENCODING_H
217
+ # define ENC_(o) (rb_enc_associate(o, enc_xml))
218
+ #else
219
+ # define ENC_(o) (o)
220
+ #endif
221
+
201
222
 
202
223
  /* Event handlers for iterator */
203
224
  static void
@@ -216,12 +237,12 @@ iterStartElementHandler(void *recv,
216
237
  const char* key = *atts++;
217
238
  const char* val = *atts++;
218
239
  rb_hash_aset(attrhash,
219
- FO_(TO_(rb_str_new2((char*)key))),
220
- TO_(rb_str_new2((char*)val)));
240
+ FO_(TO_(ENC_(rb_str_new2((char*)key)))),
241
+ TO_(ENC_(rb_str_new2((char*)val))));
221
242
  }
222
243
 
223
244
  rb_yield(rb_ary_new3(4, symSTART_ELEM,
224
- TO_(rb_str_new2((char*)name)), attrhash, recv));
245
+ TO_(ENC_(rb_str_new2((char*)name))), attrhash, recv));
225
246
  if (parser->defaultCurrent) {
226
247
  parser->defaultCurrent = 0;
227
248
  XML_DefaultCurrent(parser->parser);
@@ -235,7 +256,7 @@ iterEndElementHandler(void *recv,
235
256
  XMLParser* parser;
236
257
  GET_PARSER(recv, parser);
237
258
  rb_yield(rb_ary_new3(4, symEND_ELEM,
238
- TO_(rb_str_new2((char*)name)), Qnil, recv));
259
+ TO_(ENC_(rb_str_new2((char*)name))), Qnil, recv));
239
260
  if (parser->defaultCurrent) {
240
261
  parser->defaultCurrent = 0;
241
262
  XML_DefaultCurrent(parser->parser);
@@ -250,7 +271,7 @@ iterCharacterDataHandler(void *recv,
250
271
  XMLParser* parser;
251
272
  GET_PARSER(recv, parser);
252
273
  rb_yield(rb_ary_new3(4, symCDATA,
253
- Qnil, TO_(rb_str_new((char*)s, len)), recv));
274
+ Qnil, TO_(ENC_(rb_str_new((char*)s, len))), recv));
254
275
  if (parser->defaultCurrent) {
255
276
  parser->defaultCurrent = 0;
256
277
  XML_DefaultCurrent(parser->parser);
@@ -265,8 +286,8 @@ iterProcessingInstructionHandler(void *recv,
265
286
  XMLParser* parser;
266
287
  GET_PARSER(recv, parser);
267
288
  rb_yield(rb_ary_new3(4, symPI,
268
- TO_(rb_str_new2((char*)target)),
269
- TO_(rb_str_new2((char*)data)), recv));
289
+ TO_(ENC_(rb_str_new2((char*)target))),
290
+ TO_(ENC_(rb_str_new2((char*)data))), recv));
270
291
  if (parser->defaultCurrent) {
271
292
  parser->defaultCurrent = 0;
272
293
  XML_DefaultCurrent(parser->parser);
@@ -281,7 +302,7 @@ iterDefaultHandler(void *recv,
281
302
  XMLParser* parser;
282
303
  GET_PARSER(recv, parser);
283
304
  rb_yield(rb_ary_new3(4, symDEFAULT,
284
- Qnil, TO_(rb_str_new((char*)s, len)), recv));
305
+ Qnil, TO_(ENC_(rb_str_new((char*)s, len))), recv));
285
306
  if (parser->defaultCurrent) {
286
307
  parser->defaultCurrent = 0;
287
308
  /* XML_DefaultCurrent shoould not call in defaultHandler */
@@ -301,12 +322,12 @@ iterUnparsedEntityDeclHandler(void *recv,
301
322
  VALUE valary;
302
323
 
303
324
  GET_PARSER(recv, parser);
304
- valary = rb_ary_new3(4, (base ? TO_(rb_str_new2((char*)base)) : Qnil),
305
- TO_(rb_str_new2((char*)systemId)),
306
- (publicId ? TO_(rb_str_new2((char*)publicId)) : Qnil),
307
- TO_(rb_str_new2((char*)notationName)));
325
+ valary = rb_ary_new3(4, (base ? TO_(ENC_(rb_str_new2((char*)base))) : Qnil),
326
+ TO_(ENC_(rb_str_new2((char*)systemId))),
327
+ (publicId ? TO_(ENC_(rb_str_new2((char*)publicId))) : Qnil),
328
+ TO_(ENC_(rb_str_new2((char*)notationName))));
308
329
  rb_yield(rb_ary_new3(4, symUNPARSED_ENTITY_DECL,
309
- TO_(rb_str_new2((char*)entityName)),
330
+ TO_(ENC_(rb_str_new2((char*)entityName))),
310
331
  valary, recv));
311
332
  if (parser->defaultCurrent) {
312
333
  parser->defaultCurrent = 0;
@@ -326,11 +347,11 @@ iterNotationDeclHandler(void *recv,
326
347
 
327
348
  GET_PARSER(recv, parser);
328
349
  valary = rb_ary_new3(3,
329
- (base ? TO_(rb_str_new2((char*)base)) : Qnil),
330
- (systemId ? TO_(rb_str_new2((char*)systemId)) : Qnil),
331
- (publicId ? TO_(rb_str_new2((char*)publicId)) : Qnil));
350
+ (base ? TO_(ENC_(rb_str_new2((char*)base))) : Qnil),
351
+ (systemId ? TO_(ENC_(rb_str_new2((char*)systemId))) : Qnil),
352
+ (publicId ? TO_(ENC_(rb_str_new2((char*)publicId))) : Qnil));
332
353
  rb_yield(rb_ary_new3(4, symNOTATION_DECL,
333
- TO_(rb_str_new2((char*)notationName)),
354
+ TO_(ENC_(rb_str_new2((char*)notationName))),
334
355
  valary, recv));
335
356
  if (parser->defaultCurrent) {
336
357
  parser->defaultCurrent = 0;
@@ -353,11 +374,11 @@ iterExternalEntityRefHandler(XML_Parser xmlparser,
353
374
  recv = (VALUE)XML_GetUserData(xmlparser);
354
375
  GET_PARSER(recv, parser);
355
376
  valary = rb_ary_new3(3,
356
- (base ? TO_(rb_str_new2((char*)base)) : Qnil),
357
- (systemId ? TO_(rb_str_new2((char*)systemId)) : Qnil),
358
- (publicId ? TO_(rb_str_new2((char*)publicId)) : Qnil));
377
+ (base ? TO_(ENC_(rb_str_new2((char*)base))) : Qnil),
378
+ (systemId ? TO_(ENC_(rb_str_new2((char*)systemId))) : Qnil),
379
+ (publicId ? TO_(ENC_(rb_str_new2((char*)publicId))) : Qnil));
359
380
  ret = rb_yield(rb_ary_new3(4, symEXTERNAL_ENTITY_REF,
360
- (context ? TO_(rb_str_new2((char*)context)) : Qnil),
381
+ (context ? TO_(ENC_(rb_str_new2((char*)context))) : Qnil),
361
382
  valary, recv));
362
383
  if (parser->defaultCurrent) {
363
384
  parser->defaultCurrent = 0;
@@ -376,7 +397,7 @@ iterCommentHandler(void *recv,
376
397
  XMLParser* parser;
377
398
  GET_PARSER(recv, parser);
378
399
  rb_yield(rb_ary_new3(4, symCOMMENT,
379
- Qnil, TO_(rb_str_new2((char*)s)), recv));
400
+ Qnil, TO_(ENC_(rb_str_new2((char*)s))), recv));
380
401
  if (parser->defaultCurrent) {
381
402
  parser->defaultCurrent = 0;
382
403
  XML_DefaultCurrent(parser->parser);
@@ -415,8 +436,8 @@ iterStartNamespaceDeclHandler(void *recv,
415
436
  XMLParser* parser;
416
437
  GET_PARSER(recv, parser);
417
438
  rb_yield(rb_ary_new3(4, symSTART_NAMESPACE_DECL,
418
- (prefix ? TO_(rb_str_new2((char*)prefix)) : Qnil),
419
- (uri ? TO_(rb_str_new2((char*)uri)) : Qnil), recv));
439
+ (prefix ? TO_(ENC_(rb_str_new2((char*)prefix))) : Qnil),
440
+ (uri ? TO_(ENC_(rb_str_new2((char*)uri))) : Qnil), recv));
420
441
  if (parser->defaultCurrent) {
421
442
  parser->defaultCurrent = 0;
422
443
  XML_DefaultCurrent(parser->parser);
@@ -430,7 +451,7 @@ iterEndNamespaceDeclHandler(void *recv,
430
451
  XMLParser* parser;
431
452
  GET_PARSER(recv, parser);
432
453
  rb_yield(rb_ary_new3(4, symEND_NAMESPACE_DECL,
433
- (prefix ? TO_(rb_str_new2((char*)prefix)) : Qnil),
454
+ (prefix ? TO_(ENC_(rb_str_new2((char*)prefix))) : Qnil),
434
455
  Qnil, recv));
435
456
  if (parser->defaultCurrent) {
436
457
  parser->defaultCurrent = 0;
@@ -458,12 +479,12 @@ iterStartDoctypeDeclHandler(void *recv,
458
479
  GET_PARSER(recv, parser);
459
480
  #ifdef HAVE_EXPAT_H
460
481
  valary = rb_ary_new3(3,
461
- (sysid ? TO_(rb_str_new2((char*)sysid)) : Qnil),
462
- (pubid ? TO_(rb_str_new2((char*)pubid)) : Qnil),
482
+ (sysid ? TO_(ENC_(rb_str_new2((char*)sysid))) : Qnil),
483
+ (pubid ? TO_(ENC_(rb_str_new2((char*)pubid))) : Qnil),
463
484
  (has_internal_subset ? Qtrue : Qfalse));
464
485
  #endif
465
486
  rb_yield(rb_ary_new3(4, symSTART_DOCTYPE_DECL,
466
- TO_(rb_str_new2((char*)doctypeName)),
487
+ TO_(ENC_(rb_str_new2((char*)doctypeName))),
467
488
  valary, recv));
468
489
  if (parser->defaultCurrent) {
469
490
  parser->defaultCurrent = 0;
@@ -498,18 +519,18 @@ makeContentArray(XMLParser* parser, XML_Content* model)
498
519
  static const char* content_quant_name[] = {
499
520
  "", "?", "*", "+"
500
521
  };
501
- int i;
522
+ unsigned int i;
502
523
  VALUE children = Qnil;
503
524
  const char* type_name = content_type_name[model->type];
504
525
  const char* quant_name = content_quant_name[model->quant];
505
526
  VALUE ret = rb_ary_new3(3,
506
- TO_(rb_str_new2((char*)type_name)),
507
- TO_(rb_str_new2((char*)quant_name)),
508
- (model->name ? TO_(rb_str_new2((char*)model->name)) :
527
+ TO_(ENC_(rb_str_new2((char*)type_name))),
528
+ TO_(ENC_(rb_str_new2((char*)quant_name))),
529
+ (model->name ? TO_(ENC_(rb_str_new2((char*)model->name))) :
509
530
  Qnil));
510
531
  if (model->numchildren > 0) {
511
532
  children = rb_ary_new();
512
- for (i =0; i < model->numchildren; i++) {
533
+ for (i = 0; i < model->numchildren; i++) {
513
534
  VALUE child = makeContentArray(parser, model->children + i);
514
535
  rb_ary_push(children, child);
515
536
  }
@@ -526,10 +547,11 @@ iterElementDeclHandler(void *recv,
526
547
  XML_Content *model)
527
548
  {
528
549
  XMLParser* parser;
550
+ VALUE content;
529
551
  GET_PARSER(recv, parser);
530
- VALUE content = makeContentArray(parser, model);
552
+ content = makeContentArray(parser, model);
531
553
  rb_yield(rb_ary_new3(4, symELEMENT_DECL,
532
- TO_(rb_str_new2(name)),
554
+ TO_(ENC_(rb_str_new2(name))),
533
555
  content, recv));
534
556
  if (parser->defaultCurrent) {
535
557
  parser->defaultCurrent = 0;
@@ -550,12 +572,12 @@ iterAttlistDeclHandler (void *recv,
550
572
 
551
573
  GET_PARSER(recv, parser);
552
574
  valary = rb_ary_new3(4,
553
- TO_(rb_str_new2((char*)attname)),
554
- TO_(rb_str_new2((char*)att_type)),
555
- (dflt ? TO_(rb_str_new2((char*)dflt)) : Qnil),
575
+ TO_(ENC_(rb_str_new2((char*)attname))),
576
+ TO_(ENC_(rb_str_new2((char*)att_type))),
577
+ (dflt ? TO_(ENC_(rb_str_new2((char*)dflt))) : Qnil),
556
578
  (isrequired ? Qtrue : Qfalse));
557
579
  rb_yield(rb_ary_new3(4, symATTLIST_DECL,
558
- TO_(rb_str_new2(elname)),
580
+ TO_(ENC_(rb_str_new2(elname))),
559
581
  valary, recv));
560
582
  if (parser->defaultCurrent) {
561
583
  parser->defaultCurrent = 0;
@@ -574,8 +596,8 @@ iterXmlDeclHandler (void *recv,
574
596
 
575
597
  GET_PARSER(recv, parser);
576
598
  valary = rb_ary_new3(3,
577
- (version ? TO_(rb_str_new2(version)) : Qnil),
578
- (encoding ? TO_(rb_str_new2((char*)encoding)) : Qnil),
599
+ (version ? TO_(ENC_(rb_str_new2(version))) : Qnil),
600
+ (encoding ? TO_(ENC_(rb_str_new2((char*)encoding))) : Qnil),
579
601
  INT2FIX(standalone));
580
602
  rb_yield(rb_ary_new3(4, symXML_DECL,
581
603
  Qnil,
@@ -603,14 +625,14 @@ iterEntityDeclHandler (void *recv,
603
625
  GET_PARSER(recv, parser);
604
626
  valary = rb_ary_new3(6,
605
627
  (is_parameter_entity ? Qtrue : Qfalse),
606
- TO_(rb_str_new((char*)value, value_length)),
607
- (base ? TO_(rb_str_new2((char*)base)) : Qnil),
608
- (systemId ? TO_(rb_str_new2((char*)systemId)) : Qnil),
609
- (publicId ? TO_(rb_str_new2((char*)publicId)) : Qnil),
610
- (notationName ? TO_(rb_str_new2((char*)notationName))
628
+ TO_(ENC_(rb_str_new((char*)value, value_length))),
629
+ (base ? TO_(ENC_(rb_str_new2((char*)base))) : Qnil),
630
+ (systemId ? TO_(ENC_(rb_str_new2((char*)systemId))) : Qnil),
631
+ (publicId ? TO_(ENC_(rb_str_new2((char*)publicId))) : Qnil),
632
+ (notationName ? TO_(ENC_(rb_str_new2((char*)notationName)))
611
633
  : Qnil));
612
634
  rb_yield(rb_ary_new3(4, symENTITY_DECL,
613
- TO_(rb_str_new2(entityName)),
635
+ TO_(ENC_(rb_str_new2(entityName))),
614
636
  valary, recv));
615
637
  if (parser->defaultCurrent) {
616
638
  parser->defaultCurrent = 0;
@@ -632,11 +654,11 @@ iterExternalParsedEntityDeclHandler(void *recv,
632
654
  VALUE valary;
633
655
 
634
656
  GET_PARSER(recv, parser);
635
- valary = rb_ary_new3(3, (base ? TO_(rb_str_new2((char*)base)) : Qnil),
636
- TO_(rb_str_new2((char*)systemId)),
637
- (publicId ? TO_(rb_str_new2((char*)publicId)) : Qnil));
657
+ valary = rb_ary_new3(3, (base ? TO_(ENC_(rb_str_new2((char*)base))) : Qnil),
658
+ TO_(ENC_(rb_str_new2((char*)systemId))),
659
+ (publicId ? TO_(ENC_(rb_str_new2((char*)publicId))) : Qnil));
638
660
  rb_yield(rb_ary_new3(4, symEXTERNAL_PARSED_ENTITY_DECL,
639
- TO_(rb_str_new2((char*)entityName)),
661
+ TO_(ENC_(rb_str_new2((char*)entityName))),
640
662
  valary, recv));
641
663
  if (parser->defaultCurrent) {
642
664
  parser->defaultCurrent = 0;
@@ -653,9 +675,9 @@ iterInternalParsedEntityDeclHandler(void *recv,
653
675
  XMLParser* parser;
654
676
  GET_PARSER(recv, parser);
655
677
  rb_yield(rb_ary_new3(4, symINTERNAL_PARSED_ENTITY_DECL,
656
- TO_(rb_str_new2((char*)entityName)),
657
- TO_(rb_str_new((char*)replacementText,
658
- replacementTextLength)), recv));
678
+ TO_(ENC_(rb_str_new2((char*)entityName))),
679
+ TO_(ENC_(rb_str_new((char*)replacementText,
680
+ replacementTextLength))), recv));
659
681
  if (parser->defaultCurrent) {
660
682
  parser->defaultCurrent = 0;
661
683
  XML_DefaultCurrent(parser->parser);
@@ -672,7 +694,7 @@ iterSkippedEntityHandler(void *recv,
672
694
  XMLParser* parser;
673
695
  GET_PARSER(recv, parser);
674
696
  rb_yield(rb_ary_new3(4, symSKIPPED_ENTITY,
675
- TO_(rb_str_new2((char*)entityName)),
697
+ TO_(ENC_(rb_str_new2((char*)entityName))),
676
698
  INT2FIX(is_parameter_entity), recv));
677
699
  if (parser->defaultCurrent) {
678
700
  parser->defaultCurrent = 0;
@@ -700,11 +722,11 @@ myStartElementHandler(void *recv,
700
722
  const char* key = *atts++;
701
723
  const char* val = *atts++;
702
724
  rb_hash_aset(attrhash,
703
- FO_(TO_(rb_str_new2((char*)key))),
704
- TO_(rb_str_new2((char*)val)));
725
+ FO_(TO_(ENC_(rb_str_new2((char*)key)))),
726
+ TO_(ENC_(rb_str_new2((char*)val))));
705
727
  }
706
728
  rb_funcall((VALUE)recv, id_startElementHandler, 2,
707
- TO_(rb_str_new2((char*)name)), attrhash);
729
+ TO_(ENC_(rb_str_new2((char*)name))), attrhash);
708
730
  }
709
731
 
710
732
  static void
@@ -714,7 +736,7 @@ myEndElementHandler(void *recv,
714
736
  XMLParser* parser;
715
737
  GET_PARSER(recv, parser);
716
738
  rb_funcall((VALUE)recv, id_endElementHandler, 1,
717
- TO_(rb_str_new2((char*)name)));
739
+ TO_(ENC_(rb_str_new2((char*)name))));
718
740
  }
719
741
 
720
742
  static void
@@ -725,7 +747,7 @@ myCharacterDataHandler(void *recv,
725
747
  XMLParser* parser;
726
748
  GET_PARSER(recv, parser);
727
749
  rb_funcall((VALUE)recv, id_characterDataHandler, 1,
728
- TO_(rb_str_new((char*)s, len)));
750
+ TO_(ENC_(rb_str_new((char*)s, len))));
729
751
  }
730
752
 
731
753
  static void
@@ -736,8 +758,8 @@ myProcessingInstructionHandler(void *recv,
736
758
  XMLParser* parser;
737
759
  GET_PARSER(recv, parser);
738
760
  rb_funcall((VALUE)recv, id_processingInstructionHandler, 2,
739
- TO_(rb_str_new2((char*)target)),
740
- TO_(rb_str_new2((char*)data)));
761
+ TO_(ENC_(rb_str_new2((char*)target))),
762
+ TO_(ENC_(rb_str_new2((char*)data))));
741
763
  }
742
764
 
743
765
  static void
@@ -748,7 +770,7 @@ myDefaultHandler(void *recv,
748
770
  XMLParser* parser;
749
771
  GET_PARSER(recv, parser);
750
772
  rb_funcall((VALUE)recv, id_defaultHandler, 1,
751
- TO_(rb_str_new((char*)s, len)));
773
+ TO_(ENC_(rb_str_new((char*)s, len))));
752
774
  }
753
775
 
754
776
  #ifdef NEW_EXPAT
@@ -760,7 +782,7 @@ myDefaultExpandHandler(void *recv,
760
782
  XMLParser* parser;
761
783
  GET_PARSER(recv, parser);
762
784
  rb_funcall((VALUE)recv, id_defaultExpandHandler, 1,
763
- TO_(rb_str_new((char*)s, len)));
785
+ TO_(ENC_(rb_str_new((char*)s, len))));
764
786
  }
765
787
  #endif
766
788
 
@@ -775,11 +797,11 @@ myUnparsedEntityDeclHandler(void *recv,
775
797
  XMLParser* parser;
776
798
  GET_PARSER(recv, parser);
777
799
  rb_funcall((VALUE)recv, id_unparsedEntityDeclHandler, 5,
778
- TO_(rb_str_new2((char*)entityName)),
779
- (base ? TO_(rb_str_new2((char*)base)) : Qnil),
780
- TO_(rb_str_new2((char*)systemId)),
781
- (publicId ? TO_(rb_str_new2((char*)publicId)) : Qnil),
782
- TO_(rb_str_new2((char*)notationName)));
800
+ TO_(ENC_(rb_str_new2((char*)entityName))),
801
+ (base ? TO_(ENC_(rb_str_new2((char*)base))) : Qnil),
802
+ TO_(ENC_(rb_str_new2((char*)systemId))),
803
+ (publicId ? TO_(ENC_(rb_str_new2((char*)publicId))) : Qnil),
804
+ TO_(ENC_(rb_str_new2((char*)notationName))));
783
805
  }
784
806
 
785
807
  void
@@ -792,10 +814,10 @@ myNotationDeclHandler(void *recv,
792
814
  XMLParser* parser;
793
815
  GET_PARSER(recv, parser);
794
816
  rb_funcall((VALUE)recv, id_notationDeclHandler, 4,
795
- TO_(rb_str_new2((char*)notationName)),
796
- (base ? TO_(rb_str_new2((char*)base)) : Qnil),
797
- (systemId ? TO_(rb_str_new2((char*)systemId)) : Qnil),
798
- (publicId ? TO_(rb_str_new2((char*)publicId)) : Qnil));
817
+ TO_(ENC_(rb_str_new2((char*)notationName))),
818
+ (base ? TO_(ENC_(rb_str_new2((char*)base))) : Qnil),
819
+ (systemId ? TO_(ENC_(rb_str_new2((char*)systemId))) : Qnil),
820
+ (publicId ? TO_(ENC_(rb_str_new2((char*)publicId))) : Qnil));
799
821
  }
800
822
 
801
823
  int
@@ -812,10 +834,10 @@ myExternalEntityRefHandler(XML_Parser xmlparser,
812
834
  recv = (VALUE)XML_GetUserData(xmlparser);
813
835
  GET_PARSER(recv, parser);
814
836
  ret = rb_funcall(recv, id_externalEntityRefHandler, 4,
815
- (context ? TO_(rb_str_new2((char*)context)): Qnil),
816
- (base ? TO_(rb_str_new2((char*)base)) : Qnil),
817
- (systemId ? TO_(rb_str_new2((char*)systemId)) : Qnil),
818
- (publicId ? TO_(rb_str_new2((char*)publicId)) : Qnil));
837
+ (context ? TO_(ENC_(rb_str_new2((char*)context))): Qnil),
838
+ (base ? TO_(ENC_(rb_str_new2((char*)base))) : Qnil),
839
+ (systemId ? TO_(ENC_(rb_str_new2((char*)systemId))) : Qnil),
840
+ (publicId ? TO_(ENC_(rb_str_new2((char*)publicId))) : Qnil));
819
841
  /* The error status in this handler should be returned
820
842
  by the exception. */
821
843
  return Qnil;
@@ -829,7 +851,7 @@ myCommentHandler(void *recv,
829
851
  XMLParser* parser;
830
852
  GET_PARSER(recv, parser);
831
853
  rb_funcall((VALUE)recv, id_commentHandler, 1,
832
- TO_(rb_str_new2((char*)s)));
854
+ TO_(ENC_(rb_str_new2((char*)s))));
833
855
  }
834
856
 
835
857
  static void
@@ -856,8 +878,8 @@ myStartNamespaceDeclHandler(void *recv,
856
878
  XMLParser* parser;
857
879
  GET_PARSER(recv, parser);
858
880
  rb_funcall((VALUE)recv, id_startNamespaceDeclHandler, 2,
859
- (prefix ? TO_(rb_str_new2((char*)prefix)) : Qnil),
860
- (uri ? TO_(rb_str_new2((char*)uri)) : Qnil));
881
+ (prefix ? TO_(ENC_(rb_str_new2((char*)prefix))) : Qnil),
882
+ (uri ? TO_(ENC_(rb_str_new2((char*)uri))) : Qnil));
861
883
  }
862
884
 
863
885
  static void
@@ -867,7 +889,7 @@ myEndNamespaceDeclHandler(void *recv,
867
889
  XMLParser* parser;
868
890
  GET_PARSER(recv, parser);
869
891
  rb_funcall((VALUE)recv, id_endNamespaceDeclHandler, 1,
870
- (prefix ? TO_(rb_str_new2((char*)prefix)) : Qnil));
892
+ (prefix ? TO_(ENC_(rb_str_new2((char*)prefix))) : Qnil));
871
893
  }
872
894
 
873
895
  static int
@@ -900,13 +922,13 @@ myStartDoctypeDeclHandler(void *recv,
900
922
  GET_PARSER(recv, parser);
901
923
  #ifdef HAVE_EXPAT_H
902
924
  rb_funcall((VALUE)recv, id_startDoctypeDeclHandler, 4,
903
- TO_(rb_str_new2((char*)doctypeName)),
904
- (sysid ? TO_(rb_str_new2((char*)sysid)) : Qnil),
905
- (pubid ? TO_(rb_str_new2((char*)pubid)) : Qnil),
925
+ TO_(ENC_(rb_str_new2((char*)doctypeName))),
926
+ (sysid ? TO_(ENC_(rb_str_new2((char*)sysid))) : Qnil),
927
+ (pubid ? TO_(ENC_(rb_str_new2((char*)pubid))) : Qnil),
906
928
  (has_internal_subset ? Qtrue : Qfalse));
907
929
  #else
908
930
  rb_funcall((VALUE)recv, id_startDoctypeDeclHandler, 4,
909
- TO_(rb_str_new2((char*)doctypeName)),
931
+ TO_(ENC_(rb_str_new2((char*)doctypeName))),
910
932
  Qnil, Qnil, Qfalse);
911
933
  #endif
912
934
  }
@@ -929,10 +951,11 @@ myElementDeclHandler(void *recv,
929
951
  XML_Content *model)
930
952
  {
931
953
  XMLParser* parser;
954
+ VALUE content;
932
955
  GET_PARSER(recv, parser);
933
- VALUE content = makeContentArray(parser, model);
956
+ content = makeContentArray(parser, model);
934
957
  rb_funcall((VALUE)recv, id_elementDeclHandler, 2,
935
- TO_(rb_str_new2(name)), content);
958
+ TO_(ENC_(rb_str_new2(name))), content);
936
959
  }
937
960
 
938
961
  static void
@@ -946,10 +969,10 @@ myAttlistDeclHandler (void *recv,
946
969
  XMLParser* parser;
947
970
  GET_PARSER(recv, parser);
948
971
  rb_funcall((VALUE)recv, id_attlistDeclHandler, 5,
949
- TO_(rb_str_new2(elname)),
950
- TO_(rb_str_new2((char*)attname)),
951
- TO_(rb_str_new2((char*)att_type)),
952
- (dflt ? TO_(rb_str_new2((char*)dflt)) : Qnil),
972
+ TO_(ENC_(rb_str_new2(elname))),
973
+ TO_(ENC_(rb_str_new2((char*)attname))),
974
+ TO_(ENC_(rb_str_new2((char*)att_type))),
975
+ (dflt ? TO_(ENC_(rb_str_new2((char*)dflt))) : Qnil),
953
976
  (isrequired ? Qtrue : Qfalse));
954
977
  }
955
978
 
@@ -962,8 +985,8 @@ myXmlDeclHandler (void *recv,
962
985
  XMLParser* parser;
963
986
  GET_PARSER(recv, parser);
964
987
  rb_funcall((VALUE)recv, id_xmlDeclHandler, 3,
965
- (version ? TO_(rb_str_new2(version)) : Qnil),
966
- (encoding ? TO_(rb_str_new2((char*)encoding)) : Qnil),
988
+ (version ? TO_(ENC_(rb_str_new2(version))) : Qnil),
989
+ (encoding ? TO_(ENC_(rb_str_new2((char*)encoding))) : Qnil),
967
990
  INT2FIX(standalone));
968
991
  }
969
992
 
@@ -981,13 +1004,13 @@ myEntityDeclHandler (void *recv,
981
1004
  XMLParser* parser;
982
1005
  GET_PARSER(recv, parser);
983
1006
  rb_funcall((VALUE)recv, id_entityDeclHandler, 7,
984
- TO_(rb_str_new2(entityName)),
1007
+ TO_(ENC_(rb_str_new2(entityName))),
985
1008
  (is_parameter_entity ? Qtrue : Qfalse),
986
- TO_(rb_str_new((char*)value, value_length)),
987
- (base ? TO_(rb_str_new2((char*)base)) : Qnil),
988
- (systemId ? TO_(rb_str_new2((char*)systemId)) : Qnil),
989
- (publicId ? TO_(rb_str_new2((char*)publicId)) : Qnil),
990
- (notationName ? TO_(rb_str_new2((char*)notationName))
1009
+ TO_(ENC_(rb_str_new((char*)value, value_length))),
1010
+ (base ? TO_(ENC_(rb_str_new2((char*)base))) : Qnil),
1011
+ (systemId ? TO_(ENC_(rb_str_new2((char*)systemId))) : Qnil),
1012
+ (publicId ? TO_(ENC_(rb_str_new2((char*)publicId))) : Qnil),
1013
+ (notationName ? TO_(ENC_(rb_str_new2((char*)notationName)))
991
1014
  : Qnil));
992
1015
  }
993
1016
 
@@ -1004,10 +1027,10 @@ myExternalParsedEntityDeclHandler(void *recv,
1004
1027
  XMLParser* parser;
1005
1028
  GET_PARSER(recv, parser);
1006
1029
  rb_funcall((VALUE)recv, id_externalParsedEntityDeclHandler, 4,
1007
- TO_(rb_str_new2((char*)entityName)),
1008
- (base ? TO_(rb_str_new2((char*)base)) : Qnil),
1009
- TO_(rb_str_new2((char*)systemId)),
1010
- (publicId ? TO_(rb_str_new2((char*)publicId)) : Qnil));
1030
+ TO_(ENC_(rb_str_new2((char*)entityName))),
1031
+ (base ? TO_(ENC_(rb_str_new2((char*)base))) : Qnil),
1032
+ TO_(ENC_(rb_str_new2((char*)systemId))),
1033
+ (publicId ? TO_(ENC_(rb_str_new2((char*)publicId))) : Qnil));
1011
1034
  }
1012
1035
 
1013
1036
  static void
@@ -1019,9 +1042,9 @@ myInternalParsedEntityDeclHandler(void *recv,
1019
1042
  XMLParser* parser;
1020
1043
  GET_PARSER(recv, parser);
1021
1044
  rb_funcall((VALUE)recv, id_internalParsedEntityDeclHandler, 2,
1022
- TO_(rb_str_new2((char*)entityName)),
1023
- TO_(rb_str_new((char*)replacementText,
1024
- replacementTextLength)));
1045
+ TO_(ENC_(rb_str_new2((char*)entityName))),
1046
+ TO_(ENC_(rb_str_new((char*)replacementText,
1047
+ replacementTextLength))));
1025
1048
  }
1026
1049
  #endif
1027
1050
 
@@ -1043,21 +1066,21 @@ myEncodingConv(void *data, const char *s)
1043
1066
  {
1044
1067
  VALUE v;
1045
1068
  int len;
1046
- int slen = RSTRING(rb_ivar_get((VALUE)data,
1047
- id_map))->ptr[*(unsigned char*)s];
1069
+ int slen = RSTRING_PTR(rb_ivar_get((VALUE)data,
1070
+ id_map))[*(unsigned char*)s];
1048
1071
 
1049
- v = rb_funcall((VALUE)data, id_convert, 1, rb_str_new((char*)s, -slen));
1072
+ v = rb_funcall((VALUE)data, id_convert, 1, ENC_(rb_str_new((char*)s, -slen)));
1050
1073
  switch (TYPE(v)) {
1051
1074
  case T_FIXNUM:
1052
1075
  return FIX2INT(v);
1053
1076
  case T_STRING:
1054
- len = RSTRING(v)->len;
1077
+ len = RSTRING_LEN(v);
1055
1078
  if (len == 1) {
1056
- return (unsigned char)*(RSTRING(v)->ptr);
1079
+ return (unsigned char)*RSTRING_PTR(v);
1057
1080
  }
1058
1081
  else if (len >= 2) {
1059
- return (unsigned char)*(RSTRING(v)->ptr) |
1060
- (unsigned char)*(RSTRING(v)->ptr + 1) << 8;
1082
+ return (unsigned char)*RSTRING_PTR(v) |
1083
+ (unsigned char)*(RSTRING_PTR(v) + 1) << 8;
1061
1084
  }
1062
1085
  }
1063
1086
  return 0;
@@ -1077,7 +1100,7 @@ iterUnknownEncodingHandler(void *recv,
1077
1100
 
1078
1101
  GET_PARSER(recv, parser);
1079
1102
  ret = rb_yield(rb_ary_new3(4, symUNKNOWN_ENCODING,
1080
- TO_(rb_str_new2((char*)name)), Qnil, recv));
1103
+ TO_(ENC_(rb_str_new2((char*)name))), Qnil, recv));
1081
1104
  if (TYPE(ret) == T_OBJECT && rb_obj_is_kind_of(ret, cXMLEncoding)) {
1082
1105
  int i;
1083
1106
  ID mid = rb_intern("map");
@@ -1086,7 +1109,7 @@ iterUnknownEncodingHandler(void *recv,
1086
1109
 
1087
1110
  for (i = 0; i < 256; i++) {
1088
1111
  VALUE m = rb_funcall(ret, mid, 1, INT2FIX(i));
1089
- RSTRING(cmap)->ptr[i] = info->map[i] = FIX2INT(m);
1112
+ RSTRING_PTR(cmap)[i] = info->map[i] = FIX2INT(m);
1090
1113
  }
1091
1114
  /* protect object form GC */
1092
1115
  rb_ivar_set(recv, rb_intern("_encoding"), ret);
@@ -1241,8 +1264,6 @@ findEncoding(const char* encname)
1241
1264
  file[len] = tolower(*p);
1242
1265
  }
1243
1266
  file[len] = '\0';
1244
- // if (len < PATH_MAX - sizeof(encext))
1245
- // strcat(file, encext);
1246
1267
  strncat(file, encext, PATH_MAX - len -1);
1247
1268
 
1248
1269
  if ((fp = fopen(file, "rb")) == NULL) {
@@ -1274,6 +1295,10 @@ myUnknownEncodingHandler(void *recv,
1274
1295
  {
1275
1296
  XMLParser* parser;
1276
1297
  VALUE ret;
1298
+
1299
+ GET_PARSER(recv, parser);
1300
+ parser->detectedEncoding = name;
1301
+
1277
1302
  if (!rb_method_boundp(CLASS_OF((VALUE)recv), id_unknownEncoding, 0))
1278
1303
  #ifndef XML_ENC_PATH
1279
1304
  return 0;
@@ -1293,9 +1318,8 @@ myUnknownEncodingHandler(void *recv,
1293
1318
  }
1294
1319
  #endif
1295
1320
 
1296
- GET_PARSER(recv, parser);
1297
1321
  ret = rb_funcall((VALUE)recv, id_unknownEncoding, 1,
1298
- TO_(rb_str_new2((char*)name)));
1322
+ TO_(ENC_(rb_str_new2((char*)name))));
1299
1323
  if (TYPE(ret) == T_OBJECT && rb_obj_is_kind_of(ret, cXMLEncoding)) {
1300
1324
  int i;
1301
1325
  ID mid = rb_intern("map");
@@ -1308,7 +1332,7 @@ myUnknownEncodingHandler(void *recv,
1308
1332
 
1309
1333
  for (i = 0; i < 256; i++) {
1310
1334
  VALUE m = rb_funcall(ret, mid, 1, INT2FIX(i));
1311
- RSTRING(cmap)->ptr[i] = info->map[i] = FIX2INT(m);
1335
+ RSTRING_PTR(cmap)[i] = info->map[i] = FIX2INT(m);
1312
1336
  }
1313
1337
  /* protect object form GC */
1314
1338
  rb_ivar_set((VALUE)recv, rb_intern("_encoding"), ret);
@@ -1330,7 +1354,7 @@ mySkippedEntityHandler(void *recv,
1330
1354
  XMLParser* parser;
1331
1355
  GET_PARSER(recv, parser);
1332
1356
  rb_funcall((VALUE)recv, id_skippedEntityHandler, 2,
1333
- TO_(rb_str_new2((char*)entityName)),
1357
+ TO_(ENC_(rb_str_new2((char*)entityName))),
1334
1358
  INT2FIX(is_parameter_entity));
1335
1359
  }
1336
1360
  #endif
@@ -1359,7 +1383,7 @@ XMLParser_new(int argc, VALUE* argv, VALUE klass)
1359
1383
  /* new(encoding) */
1360
1384
  if (TYPE(arg1) != T_NIL) {
1361
1385
  Check_Type(arg1, T_STRING); /* encoding */
1362
- encoding = RSTRING(arg1)->ptr;
1386
+ encoding = RSTRING_PTR(arg1);
1363
1387
  }
1364
1388
  }
1365
1389
  else if (count == 2) {
@@ -1369,10 +1393,10 @@ XMLParser_new(int argc, VALUE* argv, VALUE klass)
1369
1393
  if (TYPE(arg1) != T_DATA) {
1370
1394
  if (TYPE(arg1) != T_NIL) {
1371
1395
  Check_Type(arg1, T_STRING); /* encoding */
1372
- encoding = RSTRING(arg1)->ptr;
1396
+ encoding = RSTRING_PTR(arg1);
1373
1397
  }
1374
1398
  Check_Type(arg2, T_STRING); /* nschar */
1375
- nssep = RSTRING(arg2)->ptr;
1399
+ nssep = RSTRING_PTR(arg2);
1376
1400
  }
1377
1401
  else {
1378
1402
  #endif
@@ -1380,7 +1404,7 @@ XMLParser_new(int argc, VALUE* argv, VALUE klass)
1380
1404
  GET_PARSER(arg1, rootparser);
1381
1405
  if (!NIL_P(arg2)) {
1382
1406
  Check_Type(arg2, T_STRING); /* context */
1383
- context = RSTRING(arg2)->ptr;
1407
+ context = RSTRING_PTR(arg2);
1384
1408
  }
1385
1409
  parent = arg1;
1386
1410
  #ifdef NEW_EXPAT
@@ -1393,10 +1417,10 @@ XMLParser_new(int argc, VALUE* argv, VALUE klass)
1393
1417
  GET_PARSER(arg1, rootparser);
1394
1418
  if (!NIL_P(arg2)) {
1395
1419
  Check_Type(arg2, T_STRING); /* context */
1396
- context = RSTRING(arg2)->ptr;
1420
+ context = RSTRING_PTR(arg2);
1397
1421
  }
1398
1422
  Check_Type(arg3, T_STRING); /* encoding */
1399
- encoding = RSTRING(arg3)->ptr;
1423
+ encoding = RSTRING_PTR(arg3);
1400
1424
  parent = arg1;
1401
1425
  }
1402
1426
 
@@ -1465,6 +1489,7 @@ XMLParser_new(int argc, VALUE* argv, VALUE klass)
1465
1489
  parser->lastAttrs = NULL;
1466
1490
  #endif
1467
1491
  parser->parent = parent;
1492
+ parser->detectedEncoding = NULL;
1468
1493
 
1469
1494
  rb_obj_call_init(obj, argc, argv);
1470
1495
 
@@ -1491,7 +1516,7 @@ XMLParser_reset(int argc, VALUE* argv, VALUE obj)
1491
1516
  GET_PARSER(obj, parser);
1492
1517
  if (count > 0 && TYPE(vencoding) != T_NIL) {
1493
1518
  Check_Type(vencoding, T_STRING);
1494
- encoding = RSTRING(vencoding)->ptr;
1519
+ encoding = RSTRING_PTR(vencoding);
1495
1520
  }
1496
1521
  XML_ParserReset(parser->parser, encoding);
1497
1522
  /* setting up internal data */
@@ -1502,17 +1527,14 @@ XMLParser_reset(int argc, VALUE* argv, VALUE obj)
1502
1527
  parser->lastAttrs = NULL;
1503
1528
  #endif
1504
1529
  parser->tainted = 0;
1530
+ parser->detectedEncoding = NULL;
1505
1531
 
1506
1532
  return obj;
1507
1533
  }
1508
1534
  #endif
1509
1535
 
1510
- /* parse method */
1511
- static VALUE
1512
- XMLParser_parse(int argc, VALUE* argv, VALUE obj)
1513
- {
1514
- XMLParser* parser;
1515
- int ret;
1536
+ static void
1537
+ setup_evnet_handlers(XMLParser* parser, VALUE obj) {
1516
1538
  XML_StartElementHandler start = NULL;
1517
1539
  XML_EndElementHandler end = NULL;
1518
1540
  #ifdef NEW_EXPAT
@@ -1525,38 +1547,6 @@ XMLParser_parse(int argc, VALUE* argv, VALUE obj)
1525
1547
  XML_StartDoctypeDeclHandler startDoctype = NULL;
1526
1548
  XML_EndDoctypeDeclHandler endDoctype = NULL;
1527
1549
  #endif
1528
- VALUE str;
1529
- VALUE isFinal;
1530
- int final = 1;
1531
- int count;
1532
- int fromStream = 0;
1533
- ID mid = rb_intern("gets");
1534
- ID linebuf = rb_intern("_linebuf");
1535
-
1536
- count = rb_scan_args(argc, argv, "02", &str, &isFinal);
1537
- /* If "str" has public "gets" method, it will be considered *stream* */
1538
- if (!rb_obj_is_kind_of(str, rb_cString) &&
1539
- rb_method_boundp(CLASS_OF(str), mid, 1)) {
1540
- fromStream = 1;
1541
- }
1542
- else if (!NIL_P(str)) {
1543
- Check_Type(str, T_STRING);
1544
- }
1545
- if (count >= 2) {
1546
- if (isFinal == Qtrue)
1547
- final = 1;
1548
- else if (isFinal == Qfalse)
1549
- final = 0;
1550
- else
1551
- rb_raise(rb_eTypeError, "not valid value");
1552
- }
1553
-
1554
- GET_PARSER(obj, parser);
1555
-
1556
- // parser->iterator = rb_iterator_p();
1557
- parser->iterator = rb_block_given_p();
1558
-
1559
- /* Setup event handlers */
1560
1550
 
1561
1551
  /* Call as iterator */
1562
1552
  if (parser->iterator) {
@@ -1726,6 +1716,47 @@ XMLParser_parse(int argc, VALUE* argv, VALUE obj)
1726
1716
  XML_SetSkippedEntityHandler(parser->parser, mySkippedEntityHandler);
1727
1717
  #endif
1728
1718
  }
1719
+ }
1720
+
1721
+
1722
+ /* parse method */
1723
+ static VALUE
1724
+ XMLParser_parse(int argc, VALUE* argv, VALUE obj)
1725
+ {
1726
+ XMLParser* parser;
1727
+ int ret;
1728
+ VALUE str;
1729
+ VALUE isFinal;
1730
+ int final = 1;
1731
+ int count;
1732
+ int fromStream = 0;
1733
+ ID mid = rb_intern("gets");
1734
+ ID linebuf = rb_intern("_linebuf");
1735
+
1736
+ count = rb_scan_args(argc, argv, "02", &str, &isFinal);
1737
+ /* If "str" has public "gets" method, it will be considered *stream* */
1738
+ if (!rb_obj_is_kind_of(str, rb_cString) &&
1739
+ rb_method_boundp(CLASS_OF(str), mid, 1)) {
1740
+ fromStream = 1;
1741
+ }
1742
+ else if (!NIL_P(str)) {
1743
+ Check_Type(str, T_STRING);
1744
+ }
1745
+ if (count >= 2) {
1746
+ if (isFinal == Qtrue)
1747
+ final = 1;
1748
+ else if (isFinal == Qfalse)
1749
+ final = 0;
1750
+ else
1751
+ rb_raise(rb_eTypeError, "not valid value");
1752
+ }
1753
+
1754
+ GET_PARSER(obj, parser);
1755
+
1756
+ parser->iterator = rb_block_given_p();
1757
+
1758
+ /* Setup event handlers */
1759
+ setup_evnet_handlers(parser, obj);
1729
1760
 
1730
1761
  /* Parse from stream (probably slightly slow) */
1731
1762
  if (fromStream) {
@@ -1741,7 +1772,7 @@ XMLParser_parse(int argc, VALUE* argv, VALUE obj)
1741
1772
  taintParser(parser);
1742
1773
  rb_ivar_set(obj, linebuf, buf); /* protect buf from GC (reasonable?)*/
1743
1774
  ret = XML_Parse(parser->parser,
1744
- RSTRING(buf)->ptr, RSTRING(buf)->len, 0);
1775
+ RSTRING_PTR(buf), RSTRING_LEN(buf), 0);
1745
1776
  }
1746
1777
  else {
1747
1778
  ret = XML_Parse(parser->parser, NULL, 0, 1);
@@ -1757,10 +1788,41 @@ XMLParser_parse(int argc, VALUE* argv, VALUE obj)
1757
1788
 
1758
1789
  /* Parse string */
1759
1790
  if (!NIL_P(str)) {
1791
+ #if defined(HAVE_RUBY_ENCODING_H) && defined(HAVE_XML_PARSERRESET)
1792
+ int err;
1793
+ #endif
1760
1794
  if (OBJ_TAINTED(str))
1761
1795
  taintParser(parser);
1762
1796
  ret = XML_Parse(parser->parser,
1763
- RSTRING(str)->ptr, RSTRING(str)->len, final);
1797
+ RSTRING_PTR(str), RSTRING_LEN(str), final);
1798
+ #if defined(HAVE_RUBY_ENCODING_H) && defined(HAVE_XML_PARSERRESET)
1799
+ /* Ruby 1.9.1 Encoding conversion */
1800
+ err = XML_GetErrorCode(parser->parser);
1801
+ if (final && err == XML_ERROR_UNKNOWN_ENCODING) {
1802
+ rb_encoding* enc;
1803
+ volatile VALUE encobj;
1804
+ volatile VALUE ustr;
1805
+ enc = rb_enc_find(parser->detectedEncoding);
1806
+ if ((int)ENC_TO_ENCINDEX(enc) != rb_ascii8bit_encindex()) {
1807
+ rb_enc_associate(str, enc);
1808
+ encobj = rb_enc_from_encoding(enc_xml);
1809
+ /* rb_str_encode may raises an exception */
1810
+ ustr = rb_str_encode(str, encobj, 0, Qnil);
1811
+ if (!NIL_P(ustr)) {
1812
+ XML_ParserReset(parser->parser, "utf-8");
1813
+ XML_SetUserData(parser->parser, (void*)obj);
1814
+ parser->defaultCurrent = 0;
1815
+ #ifdef NEW_EXPAT
1816
+ parser->lastAttrs = NULL;
1817
+ #endif
1818
+ parser->detectedEncoding = NULL;
1819
+ setup_evnet_handlers(parser, obj);
1820
+ ret = XML_Parse(parser->parser,
1821
+ RSTRING_PTR(ustr), RSTRING_LEN(ustr), final);
1822
+ }
1823
+ }
1824
+ }
1825
+ #endif
1764
1826
  }
1765
1827
  else
1766
1828
  ret = XML_Parse(parser->parser, NULL, 0, final);
@@ -1853,7 +1915,7 @@ XMLParser_setBase(VALUE obj, VALUE base)
1853
1915
  GET_PARSER(obj, parser);
1854
1916
  if (OBJ_TAINTED(base))
1855
1917
  taintParser(parser);
1856
- ret = XML_SetBase(parser->parser, RSTRING(base)->ptr);
1918
+ ret = XML_SetBase(parser->parser, RSTRING_PTR(base));
1857
1919
 
1858
1920
  return INT2FIX(ret);
1859
1921
  }
@@ -1870,7 +1932,7 @@ XMLParser_getBase(VALUE obj)
1870
1932
  if (!ret)
1871
1933
  return Qnil;
1872
1934
 
1873
- return TO_(rb_str_new2((char*)ret));
1935
+ return TO_(ENC_(rb_str_new2((char*)ret)));
1874
1936
  }
1875
1937
 
1876
1938
  #ifdef NEW_EXPAT
@@ -1892,7 +1954,7 @@ XMLParser_getSpecifiedAttributes(VALUE obj)
1892
1954
  while (*atts) {
1893
1955
  const char* key = *atts++;
1894
1956
  atts++;
1895
- rb_hash_aset(attrhash, FO_(TO_(rb_str_new2((char*)key))),
1957
+ rb_hash_aset(attrhash, FO_(TO_(ENC_(rb_str_new2((char*)key)))),
1896
1958
  (count-- > 0) ? Qtrue: Qfalse);
1897
1959
  }
1898
1960
 
@@ -1915,7 +1977,7 @@ XMLParser_getSpecifiedAttributes(VALUE obj)
1915
1977
  attrarray = rb_ary_new2(count);
1916
1978
  for (i = 0; i < count; i++, atts+=2) {
1917
1979
  const char* key = *atts;
1918
- rb_ary_push(attrarray, TO_(rb_str_new2((char*)key)));
1980
+ rb_ary_push(attrarray, TO_(ENC_(rb_str_new2((char*)key))));
1919
1981
  }
1920
1982
 
1921
1983
  return attrarray;
@@ -1951,13 +2013,13 @@ static VALUE
1951
2013
  XMLParser_s_expatVersion(VALUE obj)
1952
2014
  {
1953
2015
  #if defined(HAVE_EXPAT_H)
1954
- return rb_str_new2(XML_ExpatVersion());
2016
+ return ENC_(rb_str_new2(XML_ExpatVersion()));
1955
2017
  #elif defined(EXPAT_1_2)
1956
- return rb_str_new2("1.2");
2018
+ return ENC_(rb_str_new2("1.2"));
1957
2019
  #elif defined(NEW_EXPAT)
1958
- return rb_str_new2("1.1");
2020
+ return ENC_(rb_str_new2("1.1"));
1959
2021
  #else
1960
- return rb_str_new2("1.0");
2022
+ return ENC_(rb_str_new2("1.0"));
1961
2023
  #endif
1962
2024
  }
1963
2025
 
@@ -2003,7 +2065,7 @@ XMLParser_getInputContext(VALUE obj)
2003
2065
  &size);
2004
2066
  if (buffer && size > 0) {
2005
2067
  ret = rb_ary_new3(2,
2006
- TO_(rb_str_new(buffer, size)),
2068
+ TO_(ENC_(rb_str_new(buffer, size))),
2007
2069
  INT2FIX(offset));
2008
2070
  }
2009
2071
 
@@ -2025,7 +2087,7 @@ XMLParser_getIdAttrribute(VALUE obj)
2025
2087
  idattr = XML_GetIdAttributeIndex(parser->parser);
2026
2088
  if (idattr < 0)
2027
2089
  return Qnil;
2028
- return TO_(rb_str_new2((char*)atts[idattr]));
2090
+ return TO_(ENC_(rb_str_new2((char*)atts[idattr])));
2029
2091
  }
2030
2092
  #endif
2031
2093
 
@@ -2066,7 +2128,7 @@ XMLParser_s_getFeatureList(VALUE obj)
2066
2128
 
2067
2129
  list = XML_GetFeatureList();
2068
2130
  while (list && list->feature) {
2069
- rb_hash_aset(ret, FO_(rb_str_new2(list->name)), INT2NUM(list->value));
2131
+ rb_hash_aset(ret, FO_(ENC_(rb_str_new2(list->name))), INT2NUM(list->value));
2070
2132
  list++;
2071
2133
  }
2072
2134
 
@@ -2079,6 +2141,10 @@ Init_xmlparser()
2079
2141
  {
2080
2142
  VALUE mXML;
2081
2143
 
2144
+ #ifdef HAVE_RUBY_ENCODING_H
2145
+ enc_xml = rb_utf8_encoding();
2146
+ #endif
2147
+
2082
2148
  eXMLParserError = rb_define_class("XMLParserError", rb_eStandardError);
2083
2149
  cXMLParser = rb_define_class("XMLParser", rb_cObject);
2084
2150
  cXMLEncoding = rb_define_class("XMLEncoding", rb_cObject);