nokogiri 1.3.3 → 1.4.0

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (201) hide show
  1. data/CHANGELOG.ja.rdoc +48 -3
  2. data/CHANGELOG.rdoc +42 -0
  3. data/Manifest.txt +44 -29
  4. data/README.ja.rdoc +0 -2
  5. data/README.rdoc +4 -7
  6. data/Rakefile +42 -6
  7. data/bin/nokogiri +7 -5
  8. data/ext/nokogiri/extconf.rb +5 -21
  9. data/ext/nokogiri/html_document.c +14 -50
  10. data/ext/nokogiri/html_element_description.c +7 -7
  11. data/ext/nokogiri/html_entity_lookup.c +6 -4
  12. data/ext/nokogiri/html_sax_parser_context.c +92 -0
  13. data/ext/nokogiri/html_sax_parser_context.h +11 -0
  14. data/ext/nokogiri/nokogiri.c +9 -3
  15. data/ext/nokogiri/nokogiri.h +16 -20
  16. data/ext/nokogiri/xml_attr.c +1 -1
  17. data/ext/nokogiri/xml_attribute_decl.c +67 -0
  18. data/ext/nokogiri/xml_attribute_decl.h +9 -0
  19. data/ext/nokogiri/xml_cdata.c +6 -5
  20. data/ext/nokogiri/xml_comment.c +3 -2
  21. data/ext/nokogiri/xml_document.c +93 -23
  22. data/ext/nokogiri/xml_document_fragment.c +1 -3
  23. data/ext/nokogiri/xml_dtd.c +63 -6
  24. data/ext/nokogiri/xml_element_content.c +123 -0
  25. data/ext/nokogiri/xml_element_content.h +10 -0
  26. data/ext/nokogiri/xml_element_decl.c +69 -0
  27. data/ext/nokogiri/xml_element_decl.h +9 -0
  28. data/ext/nokogiri/xml_entity_decl.c +97 -0
  29. data/ext/nokogiri/xml_entity_decl.h +10 -0
  30. data/ext/nokogiri/xml_entity_reference.c +1 -1
  31. data/ext/nokogiri/xml_io.c +10 -3
  32. data/ext/nokogiri/xml_io.h +1 -0
  33. data/ext/nokogiri/xml_namespace.c +2 -2
  34. data/ext/nokogiri/xml_node.c +139 -34
  35. data/ext/nokogiri/xml_node.h +0 -1
  36. data/ext/nokogiri/xml_node_set.c +23 -16
  37. data/ext/nokogiri/xml_processing_instruction.c +1 -1
  38. data/ext/nokogiri/xml_reader.c +78 -50
  39. data/ext/nokogiri/xml_sax_parser.c +109 -168
  40. data/ext/nokogiri/xml_sax_parser.h +33 -0
  41. data/ext/nokogiri/xml_sax_parser_context.c +155 -0
  42. data/ext/nokogiri/xml_sax_parser_context.h +10 -0
  43. data/ext/nokogiri/xml_sax_push_parser.c +11 -6
  44. data/ext/nokogiri/xml_syntax_error.c +63 -12
  45. data/ext/nokogiri/xml_text.c +4 -3
  46. data/ext/nokogiri/xml_xpath.c +1 -1
  47. data/ext/nokogiri/xml_xpath_context.c +12 -25
  48. data/ext/nokogiri/xslt_stylesheet.c +3 -3
  49. data/lib/nokogiri.rb +4 -4
  50. data/lib/nokogiri/css/generated_tokenizer.rb +1 -0
  51. data/lib/nokogiri/css/node.rb +1 -9
  52. data/lib/nokogiri/css/xpath_visitor.rb +11 -21
  53. data/lib/nokogiri/ffi/html/document.rb +0 -9
  54. data/lib/nokogiri/ffi/html/sax/parser_context.rb +38 -0
  55. data/lib/nokogiri/ffi/io_callbacks.rb +4 -2
  56. data/lib/nokogiri/ffi/libxml.rb +44 -10
  57. data/lib/nokogiri/ffi/structs/common_node.rb +1 -1
  58. data/lib/nokogiri/ffi/structs/xml_attribute.rb +27 -0
  59. data/lib/nokogiri/ffi/structs/xml_dtd.rb +3 -1
  60. data/lib/nokogiri/ffi/structs/xml_element.rb +26 -0
  61. data/lib/nokogiri/ffi/structs/xml_element_content.rb +17 -0
  62. data/lib/nokogiri/ffi/structs/xml_entity.rb +32 -0
  63. data/lib/nokogiri/ffi/structs/xml_enumeration.rb +12 -0
  64. data/lib/nokogiri/ffi/structs/xml_parser_context.rb +19 -0
  65. data/lib/nokogiri/ffi/structs/xml_sax_push_parser_context.rb +4 -3
  66. data/lib/nokogiri/ffi/structs/xml_syntax_error.rb +1 -1
  67. data/lib/nokogiri/ffi/xml/attribute_decl.rb +27 -0
  68. data/lib/nokogiri/ffi/xml/comment.rb +2 -2
  69. data/lib/nokogiri/ffi/xml/document.rb +29 -12
  70. data/lib/nokogiri/ffi/xml/document_fragment.rb +0 -5
  71. data/lib/nokogiri/ffi/xml/dtd.rb +14 -3
  72. data/lib/nokogiri/ffi/xml/element_content.rb +43 -0
  73. data/lib/nokogiri/ffi/xml/element_decl.rb +19 -0
  74. data/lib/nokogiri/ffi/xml/entity_decl.rb +27 -0
  75. data/lib/nokogiri/ffi/xml/node.rb +45 -5
  76. data/lib/nokogiri/ffi/xml/node_set.rb +1 -1
  77. data/lib/nokogiri/ffi/xml/reader.rb +45 -24
  78. data/lib/nokogiri/ffi/xml/sax/parser.rb +27 -34
  79. data/lib/nokogiri/ffi/xml/sax/parser_context.rb +67 -0
  80. data/lib/nokogiri/ffi/xml/sax/push_parser.rb +5 -4
  81. data/lib/nokogiri/ffi/xml/syntax_error.rb +31 -16
  82. data/lib/nokogiri/ffi/xml/text.rb +2 -2
  83. data/lib/nokogiri/html.rb +1 -0
  84. data/lib/nokogiri/html/document.rb +39 -24
  85. data/lib/nokogiri/html/sax/parser.rb +2 -2
  86. data/lib/nokogiri/html/sax/parser_context.rb +16 -0
  87. data/lib/nokogiri/version.rb +1 -1
  88. data/lib/nokogiri/xml.rb +6 -1
  89. data/lib/nokogiri/xml/attr.rb +5 -0
  90. data/lib/nokogiri/xml/attribute_decl.rb +18 -0
  91. data/lib/nokogiri/xml/builder.rb +121 -13
  92. data/lib/nokogiri/xml/character_data.rb +7 -0
  93. data/lib/nokogiri/xml/document.rb +43 -29
  94. data/lib/nokogiri/xml/document_fragment.rb +26 -6
  95. data/lib/nokogiri/xml/dtd.rb +5 -5
  96. data/lib/nokogiri/xml/element_content.rb +36 -0
  97. data/lib/nokogiri/xml/element_decl.rb +13 -0
  98. data/lib/nokogiri/xml/entity_decl.rb +15 -0
  99. data/lib/nokogiri/xml/fragment_handler.rb +22 -11
  100. data/lib/nokogiri/xml/namespace.rb +6 -0
  101. data/lib/nokogiri/xml/node.rb +33 -15
  102. data/lib/nokogiri/xml/node_set.rb +66 -44
  103. data/lib/nokogiri/xml/pp.rb +2 -0
  104. data/lib/nokogiri/xml/pp/character_data.rb +18 -0
  105. data/lib/nokogiri/xml/pp/node.rb +56 -0
  106. data/lib/nokogiri/xml/reader.rb +8 -0
  107. data/lib/nokogiri/xml/sax.rb +1 -1
  108. data/lib/nokogiri/xml/sax/document.rb +18 -1
  109. data/lib/nokogiri/xml/sax/parser.rb +15 -8
  110. data/lib/nokogiri/xml/sax/parser_context.rb +16 -0
  111. data/lib/nokogiri/xml/sax/push_parser.rb +0 -3
  112. data/lib/nokogiri/xml/syntax_error.rb +4 -0
  113. data/lib/nokogiri/xslt/stylesheet.rb +1 -1
  114. data/test/css/test_nthiness.rb +1 -1
  115. data/test/css/test_parser.rb +1 -1
  116. data/test/css/test_tokenizer.rb +1 -1
  117. data/test/css/test_xpath_visitor.rb +1 -1
  118. data/test/ffi/test_document.rb +1 -1
  119. data/test/files/shift_jis.html +10 -0
  120. data/test/files/staff.dtd +10 -0
  121. data/test/helper.rb +12 -3
  122. data/test/html/sax/test_parser.rb +1 -1
  123. data/test/html/sax/test_parser_context.rb +48 -0
  124. data/test/html/test_builder.rb +8 -2
  125. data/test/html/test_document.rb +23 -1
  126. data/test/html/test_document_encoding.rb +15 -1
  127. data/test/html/test_document_fragment.rb +10 -1
  128. data/test/html/test_element_description.rb +1 -2
  129. data/test/html/test_named_characters.rb +1 -1
  130. data/test/html/test_node.rb +61 -1
  131. data/test/html/test_node_encoding.rb +27 -0
  132. data/test/test_convert_xpath.rb +1 -3
  133. data/test/test_css_cache.rb +1 -1
  134. data/test/test_gc.rb +1 -1
  135. data/test/test_memory_leak.rb +1 -1
  136. data/test/test_nokogiri.rb +3 -3
  137. data/test/test_reader.rb +29 -1
  138. data/test/test_xslt_transforms.rb +1 -1
  139. data/test/xml/node/test_save_options.rb +1 -1
  140. data/test/xml/node/test_subclass.rb +1 -1
  141. data/test/xml/sax/test_parser.rb +64 -3
  142. data/test/xml/sax/test_parser_context.rb +56 -0
  143. data/test/xml/sax/test_push_parser.rb +11 -1
  144. data/test/xml/test_attr.rb +1 -1
  145. data/test/xml/test_attribute_decl.rb +82 -0
  146. data/test/xml/test_builder.rb +95 -1
  147. data/test/xml/test_cdata.rb +1 -1
  148. data/test/xml/test_comment.rb +7 -1
  149. data/test/xml/test_document.rb +147 -6
  150. data/test/xml/test_document_encoding.rb +1 -1
  151. data/test/xml/test_document_fragment.rb +55 -5
  152. data/test/xml/test_dtd.rb +40 -5
  153. data/test/xml/test_dtd_encoding.rb +3 -1
  154. data/test/xml/test_element_content.rb +56 -0
  155. data/test/xml/test_element_decl.rb +73 -0
  156. data/test/xml/test_entity_decl.rb +83 -0
  157. data/test/xml/test_entity_reference.rb +1 -1
  158. data/test/xml/test_namespace.rb +21 -1
  159. data/test/xml/test_node.rb +70 -4
  160. data/test/xml/test_node_attributes.rb +1 -1
  161. data/test/xml/test_node_encoding.rb +1 -1
  162. data/test/xml/test_node_set.rb +136 -2
  163. data/test/xml/test_parse_options.rb +1 -1
  164. data/test/xml/test_processing_instruction.rb +1 -1
  165. data/test/xml/test_reader_encoding.rb +1 -1
  166. data/test/xml/test_relax_ng.rb +1 -1
  167. data/test/xml/test_schema.rb +1 -1
  168. data/test/xml/test_syntax_error.rb +27 -0
  169. data/test/xml/test_text.rb +13 -1
  170. data/test/xml/test_unparented_node.rb +1 -1
  171. data/test/xml/test_xpath.rb +1 -1
  172. metadata +57 -40
  173. data/ext/nokogiri/html_sax_parser.c +0 -57
  174. data/ext/nokogiri/html_sax_parser.h +0 -11
  175. data/lib/action-nokogiri.rb +0 -38
  176. data/lib/nokogiri/decorators.rb +0 -2
  177. data/lib/nokogiri/decorators/hpricot.rb +0 -3
  178. data/lib/nokogiri/decorators/hpricot/node.rb +0 -56
  179. data/lib/nokogiri/decorators/hpricot/node_set.rb +0 -54
  180. data/lib/nokogiri/decorators/hpricot/xpath_visitor.rb +0 -30
  181. data/lib/nokogiri/ffi/html/sax/parser.rb +0 -21
  182. data/lib/nokogiri/hpricot.rb +0 -92
  183. data/lib/nokogiri/xml/entity_declaration.rb +0 -11
  184. data/lib/nokogiri/xml/sax/legacy_handlers.rb +0 -65
  185. data/test/hpricot/files/basic.xhtml +0 -17
  186. data/test/hpricot/files/boingboing.html +0 -2266
  187. data/test/hpricot/files/cy0.html +0 -3653
  188. data/test/hpricot/files/immob.html +0 -400
  189. data/test/hpricot/files/pace_application.html +0 -1320
  190. data/test/hpricot/files/tenderlove.html +0 -16
  191. data/test/hpricot/files/uswebgen.html +0 -220
  192. data/test/hpricot/files/utf8.html +0 -1054
  193. data/test/hpricot/files/week9.html +0 -1723
  194. data/test/hpricot/files/why.xml +0 -19
  195. data/test/hpricot/load_files.rb +0 -11
  196. data/test/hpricot/test_alter.rb +0 -68
  197. data/test/hpricot/test_builder.rb +0 -20
  198. data/test/hpricot/test_parser.rb +0 -350
  199. data/test/hpricot/test_paths.rb +0 -15
  200. data/test/hpricot/test_preserved.rb +0 -77
  201. data/test/hpricot/test_xml.rb +0 -30
@@ -7,7 +7,6 @@ void init_xml_node();
7
7
 
8
8
  extern VALUE cNokogiriXmlNode ;
9
9
  extern VALUE cNokogiriXmlElement ;
10
- extern VALUE cNokogiriXmlEntityDeclaration ;
11
10
 
12
11
  VALUE Nokogiri_wrap_xml_node(VALUE klass, xmlNodePtr node) ;
13
12
  void Nokogiri_xml_node_properties(xmlNodePtr node, VALUE attr_hash) ;
@@ -45,7 +45,7 @@ static VALUE push(VALUE self, VALUE rb_node)
45
45
  xmlNodeSetPtr node_set;
46
46
  xmlNodePtr node;
47
47
 
48
- if(! rb_funcall(rb_node, rb_intern("is_a?"), 1, cNokogiriXmlNode))
48
+ if(!rb_obj_is_kind_of(rb_node, cNokogiriXmlNode))
49
49
  rb_raise(rb_eArgError, "node must be a Nokogiri::XML::Node");
50
50
 
51
51
  Data_Get_Struct(self, xmlNodeSet, node_set);
@@ -66,7 +66,7 @@ static VALUE delete(VALUE self, VALUE rb_node)
66
66
  xmlNodeSetPtr node_set ;
67
67
  xmlNodePtr node ;
68
68
 
69
- if(! rb_funcall(rb_node, rb_intern("is_a?"), 1, cNokogiriXmlNode))
69
+ if(!rb_obj_is_kind_of(rb_node, cNokogiriXmlNode))
70
70
  rb_raise(rb_eArgError, "node must be a Nokogiri::XML::Node");
71
71
 
72
72
  Data_Get_Struct(self, xmlNodeSet, node_set);
@@ -92,7 +92,7 @@ static VALUE intersection(VALUE self, VALUE rb_other)
92
92
  xmlNodeSetPtr node_set;
93
93
  xmlNodeSetPtr other;
94
94
 
95
- if(! rb_funcall(rb_other, rb_intern("is_a?"), 1, cNokogiriXmlNodeSet))
95
+ if(!rb_obj_is_kind_of(rb_other, cNokogiriXmlNodeSet))
96
96
  rb_raise(rb_eArgError, "node_set must be a Nokogiri::XML::NodeSet");
97
97
 
98
98
  Data_Get_Struct(self, xmlNodeSet, node_set);
@@ -113,7 +113,7 @@ static VALUE include_eh(VALUE self, VALUE rb_node)
113
113
  xmlNodeSetPtr node_set;
114
114
  xmlNodePtr node;
115
115
 
116
- if(! rb_funcall(rb_node, rb_intern("is_a?"), 1, cNokogiriXmlNode))
116
+ if(!rb_obj_is_kind_of(rb_node, cNokogiriXmlNode))
117
117
  rb_raise(rb_eArgError, "node must be a Nokogiri::XML::Node");
118
118
 
119
119
  Data_Get_Struct(self, xmlNodeSet, node_set);
@@ -125,18 +125,18 @@ static VALUE include_eh(VALUE self, VALUE rb_node)
125
125
 
126
126
  /*
127
127
  * call-seq:
128
- * +(node_set)
128
+ * |(node_set)
129
129
  *
130
- * Concatenation - returns a new NodeSet built by concatenating the node set
131
- * with +node_set+ to produce a third NodeSet
130
+ * Returns a new set built by merging the set and the elements of the given
131
+ * set.
132
132
  */
133
- static VALUE plus(VALUE self, VALUE rb_other)
133
+ static VALUE set_union(VALUE self, VALUE rb_other)
134
134
  {
135
135
  xmlNodeSetPtr node_set;
136
136
  xmlNodeSetPtr other;
137
137
  xmlNodeSetPtr new;
138
138
 
139
- if(! rb_funcall(rb_other, rb_intern("is_a?"), 1, cNokogiriXmlNodeSet))
139
+ if(!rb_obj_is_kind_of(rb_other, cNokogiriXmlNodeSet))
140
140
  rb_raise(rb_eArgError, "node_set must be a Nokogiri::XML::NodeSet");
141
141
 
142
142
  Data_Get_Struct(self, xmlNodeSet, node_set);
@@ -166,7 +166,7 @@ static VALUE minus(VALUE self, VALUE rb_other)
166
166
  xmlNodeSetPtr new;
167
167
  int j ;
168
168
 
169
- if(! rb_funcall(rb_other, rb_intern("is_a?"), 1, cNokogiriXmlNodeSet))
169
+ if(!rb_obj_is_kind_of(rb_other, cNokogiriXmlNodeSet))
170
170
  rb_raise(rb_eArgError, "node_set must be a Nokogiri::XML::NodeSet");
171
171
 
172
172
  Data_Get_Struct(self, xmlNodeSet, node_set);
@@ -238,6 +238,8 @@ static VALUE slice(int argc, VALUE *argv, VALUE self)
238
238
  if (beg < 0) {
239
239
  beg += node_set->nodeNr ;
240
240
  }
241
+ if (len > node_set->nodeNr) len = node_set->nodeNr;
242
+
241
243
  return subseq(self, beg, len);
242
244
  }
243
245
 
@@ -278,16 +280,21 @@ static VALUE to_array(VALUE self, VALUE rb_node)
278
280
  VALUE *elts = calloc((size_t)set->nodeNr, sizeof(VALUE *));
279
281
  int i;
280
282
  for(i = 0; i < set->nodeNr; i++) {
281
- if(set->nodeTab[i]->_private) {
282
- elts[i] = (VALUE)set->nodeTab[i]->_private;
283
+ xmlNodePtr node = set->nodeTab[i];
284
+
285
+ if(node->_private) {
286
+ if(node->type == XML_DOCUMENT_NODE || node->type == XML_HTML_DOCUMENT_NODE)
287
+ elts[i] = DOC_RUBY_OBJECT(node->doc);
288
+ else
289
+ elts[i] = (VALUE)node->_private;
283
290
  } else {
284
- elts[i] = Nokogiri_wrap_xml_node(Qnil, set->nodeTab[i]);
291
+ elts[i] = Nokogiri_wrap_xml_node(Qnil, node);
285
292
  }
286
293
  }
287
294
 
288
- VALUE list = rb_ary_new4(set->nodeNr, elts);
295
+ VALUE list = rb_ary_new4((long)set->nodeNr, elts);
289
296
 
290
- free(elts);
297
+ //free(elts);
291
298
 
292
299
  return list;
293
300
  }
@@ -379,7 +386,7 @@ void init_xml_node_set(void)
379
386
  rb_define_method(klass, "[]", slice, -1);
380
387
  rb_define_method(klass, "slice", slice, -1);
381
388
  rb_define_method(klass, "push", push, 1);
382
- rb_define_method(klass, "+", plus, 1);
389
+ rb_define_method(klass, "|", set_union, 1);
383
390
  rb_define_method(klass, "-", minus, 1);
384
391
  rb_define_method(klass, "unlink", unlink_nodeset, 0);
385
392
  rb_define_method(klass, "to_a", to_array, 0);
@@ -28,7 +28,7 @@ static VALUE new(int argc, VALUE *argv, VALUE klass)
28
28
  NOKOGIRI_ROOT_NODE(node);
29
29
 
30
30
  VALUE rb_node = Nokogiri_wrap_xml_node(klass, node);
31
- rb_funcall2(rb_node, rb_intern("initialize"), argc, argv);
31
+ rb_obj_call_init(rb_node, argc, argv);
32
32
 
33
33
  if(rb_block_given_p()) rb_yield(rb_node);
34
34
 
@@ -54,8 +54,8 @@ static void Nokogiri_xml_node_namespaces(xmlNodePtr node, VALUE attr_hash)
54
54
  }
55
55
 
56
56
  rb_hash_aset(attr_hash,
57
- NOKOGIRI_STR_NEW2(key, node->doc->encoding),
58
- (ns->href ? NOKOGIRI_STR_NEW2(ns->href, node->doc->encoding) : Qnil)
57
+ NOKOGIRI_STR_NEW2(key),
58
+ (ns->href ? NOKOGIRI_STR_NEW2(ns->href) : Qnil)
59
59
  );
60
60
  if (key != buffer) {
61
61
  free(key);
@@ -163,17 +163,6 @@ static VALUE attribute_nodes(VALUE self)
163
163
  xmlNodePtr ptr = xmlTextReaderExpand(reader);
164
164
  if(ptr == NULL) return Qnil;
165
165
 
166
- VALUE enc = rb_iv_get(self, "@encoding");
167
-
168
- if(enc != Qnil && NULL == ptr->doc->encoding) {
169
- ptr->doc->encoding = calloc((size_t)RSTRING_LEN(enc), sizeof(char));
170
- strncpy(
171
- (char *)ptr->doc->encoding,
172
- StringValuePtr(enc),
173
- (size_t)RSTRING_LEN(enc)
174
- );
175
- }
176
-
177
166
  Nokogiri_xml_node_properties(ptr, attr);
178
167
 
179
168
  return attr ;
@@ -190,8 +179,8 @@ static VALUE attribute_at(VALUE self, VALUE index)
190
179
  xmlTextReaderPtr reader;
191
180
  Data_Get_Struct(self, xmlTextReader, reader);
192
181
 
193
- if(index == Qnil) return Qnil;
194
- index = rb_funcall(index, rb_intern("to_i"), 0);
182
+ if(NIL_P(index)) return Qnil;
183
+ index = rb_Integer(index);
195
184
 
196
185
  xmlChar * value = xmlTextReaderGetAttributeNo(
197
186
  reader,
@@ -199,9 +188,7 @@ static VALUE attribute_at(VALUE self, VALUE index)
199
188
  );
200
189
  if(value == NULL) return Qnil;
201
190
 
202
- VALUE MAYBE_UNUSED(enc) = rb_iv_get(self, "@encoding");
203
- VALUE rb_value = NOKOGIRI_STR_NEW2(value,
204
- RTEST(enc) ? StringValuePtr(enc) : NULL);
191
+ VALUE rb_value = NOKOGIRI_STR_NEW2(value);
205
192
  xmlFree(value);
206
193
  return rb_value;
207
194
  }
@@ -218,7 +205,7 @@ static VALUE reader_attribute(VALUE self, VALUE name)
218
205
  xmlChar *value ;
219
206
  Data_Get_Struct(self, xmlTextReader, reader);
220
207
 
221
- if(name == Qnil) return Qnil;
208
+ if(NIL_P(name)) return Qnil;
222
209
  name = StringValue(name) ;
223
210
 
224
211
  value = xmlTextReaderGetAttribute(reader, (xmlChar*)StringValuePtr(name));
@@ -237,9 +224,7 @@ static VALUE reader_attribute(VALUE self, VALUE name)
237
224
  }
238
225
  if(value == NULL) return Qnil;
239
226
 
240
- VALUE MAYBE_UNUSED(enc) = rb_iv_get(self, "@encoding");
241
- VALUE rb_value = NOKOGIRI_STR_NEW2(value,
242
- RTEST(enc) ? StringValuePtr(enc) : NULL);
227
+ VALUE rb_value = NOKOGIRI_STR_NEW2(value);
243
228
  xmlFree(value);
244
229
  return rb_value;
245
230
  }
@@ -257,7 +242,7 @@ static VALUE attribute_count(VALUE self)
257
242
  int count = xmlTextReaderAttributeCount(reader);
258
243
  if(count == -1) return Qnil;
259
244
 
260
- return INT2NUM(count);
245
+ return INT2NUM((long)count);
261
246
  }
262
247
 
263
248
  /*
@@ -273,7 +258,7 @@ static VALUE depth(VALUE self)
273
258
  int depth = xmlTextReaderDepth(reader);
274
259
  if(depth == -1) return Qnil;
275
260
 
276
- return INT2NUM(depth);
261
+ return INT2NUM((long)depth);
277
262
  }
278
263
 
279
264
  /*
@@ -289,7 +274,7 @@ static VALUE xml_version(VALUE self)
289
274
  const char * version = (const char *)xmlTextReaderConstXmlVersion(reader);
290
275
  if(version == NULL) return Qnil;
291
276
 
292
- return NOKOGIRI_STR_NEW2(version, "UTF-8");
277
+ return NOKOGIRI_STR_NEW2(version);
293
278
  }
294
279
 
295
280
  /*
@@ -305,9 +290,7 @@ static VALUE lang(VALUE self)
305
290
  const char * lang = (const char *)xmlTextReaderConstXmlLang(reader);
306
291
  if(lang == NULL) return Qnil;
307
292
 
308
- VALUE MAYBE_UNUSED(enc) = rb_iv_get(self, "@encoding");
309
- return NOKOGIRI_STR_NEW2(lang,
310
- RTEST(enc) ? StringValuePtr(enc) : NULL);
293
+ return NOKOGIRI_STR_NEW2(lang);
311
294
  }
312
295
 
313
296
  /*
@@ -323,9 +306,7 @@ static VALUE value(VALUE self)
323
306
  const char * value = (const char *)xmlTextReaderConstValue(reader);
324
307
  if(value == NULL) return Qnil;
325
308
 
326
- VALUE MAYBE_UNUSED(enc) = rb_iv_get(self, "@encoding");
327
- return NOKOGIRI_STR_NEW2(value,
328
- RTEST(enc) ? StringValuePtr(enc) : NULL);
309
+ return NOKOGIRI_STR_NEW2(value);
329
310
  }
330
311
 
331
312
  /*
@@ -341,9 +322,7 @@ static VALUE prefix(VALUE self)
341
322
  const char * prefix = (const char *)xmlTextReaderConstPrefix(reader);
342
323
  if(prefix == NULL) return Qnil;
343
324
 
344
- VALUE MAYBE_UNUSED(enc) = rb_iv_get(self, "@encoding");
345
- return NOKOGIRI_STR_NEW2(prefix,
346
- RTEST(enc) ? StringValuePtr(enc) : NULL);
325
+ return NOKOGIRI_STR_NEW2(prefix);
347
326
  }
348
327
 
349
328
  /*
@@ -359,9 +338,7 @@ static VALUE namespace_uri(VALUE self)
359
338
  const char * uri = (const char *)xmlTextReaderConstNamespaceUri(reader);
360
339
  if(uri == NULL) return Qnil;
361
340
 
362
- VALUE MAYBE_UNUSED(enc) = rb_iv_get(self, "@encoding");
363
- return NOKOGIRI_STR_NEW2(uri,
364
- RTEST(enc) ? StringValuePtr(enc) : NULL);
341
+ return NOKOGIRI_STR_NEW2(uri);
365
342
  }
366
343
 
367
344
  /*
@@ -377,9 +354,7 @@ static VALUE local_name(VALUE self)
377
354
  const char * name = (const char *)xmlTextReaderConstLocalName(reader);
378
355
  if(name == NULL) return Qnil;
379
356
 
380
- VALUE MAYBE_UNUSED(enc) = rb_iv_get(self, "@encoding");
381
- return NOKOGIRI_STR_NEW2(name,
382
- RTEST(enc) ? StringValuePtr(enc) : NULL);
357
+ return NOKOGIRI_STR_NEW2(name);
383
358
  }
384
359
 
385
360
  /*
@@ -395,9 +370,7 @@ static VALUE name(VALUE self)
395
370
  const char * name = (const char *)xmlTextReaderConstName(reader);
396
371
  if(name == NULL) return Qnil;
397
372
 
398
- VALUE MAYBE_UNUSED(enc) = rb_iv_get(self, "@encoding");
399
- return NOKOGIRI_STR_NEW2(name,
400
- RTEST(enc) ? StringValuePtr(enc) : NULL);
373
+ return NOKOGIRI_STR_NEW2(name);
401
374
  }
402
375
 
403
376
  /*
@@ -410,7 +383,20 @@ static VALUE state(VALUE self)
410
383
  {
411
384
  xmlTextReaderPtr reader;
412
385
  Data_Get_Struct(self, xmlTextReader, reader);
413
- return INT2NUM(xmlTextReaderReadState(reader));
386
+ return INT2NUM((long)xmlTextReaderReadState(reader));
387
+ }
388
+
389
+ /*
390
+ * call-seq:
391
+ * node_type
392
+ *
393
+ * Get the type of readers current node
394
+ */
395
+ static VALUE node_type(VALUE self)
396
+ {
397
+ xmlTextReaderPtr reader;
398
+ Data_Get_Struct(self, xmlTextReader, reader);
399
+ return INT2NUM((long)xmlTextReaderNodeType(reader));
414
400
  }
415
401
 
416
402
  /*
@@ -435,15 +421,51 @@ static VALUE read_more(VALUE self)
435
421
 
436
422
  xmlErrorPtr error = xmlGetLastError();
437
423
  if(error)
438
- rb_funcall(rb_mKernel, rb_intern("raise"), 1,
439
- Nokogiri_wrap_xml_syntax_error((VALUE)NULL, error)
440
- );
424
+ rb_exc_raise(Nokogiri_wrap_xml_syntax_error((VALUE)NULL, error));
441
425
  else
442
426
  rb_raise(rb_eRuntimeError, "Error pulling: %d", ret);
443
427
 
444
428
  return Qnil;
445
429
  }
446
430
 
431
+ /*
432
+ * call-seq:
433
+ * inner_xml
434
+ *
435
+ * Read the contents of the current node, including child nodes and markup.
436
+ */
437
+ static VALUE inner_xml(VALUE self)
438
+ {
439
+ xmlTextReaderPtr reader;
440
+ Data_Get_Struct(self, xmlTextReader, reader);
441
+
442
+ const char * value = (const char *)xmlTextReaderReadInnerXml(reader);
443
+
444
+ if(value == NULL)
445
+ return Qnil;
446
+ else
447
+ return NOKOGIRI_STR_NEW2(value);
448
+ }
449
+
450
+ /*
451
+ * call-seq:
452
+ * outer_xml
453
+ *
454
+ * Read the current node and its contents, including child nodes and markup.
455
+ */
456
+ static VALUE outer_xml(VALUE self)
457
+ {
458
+ xmlTextReaderPtr reader;
459
+ Data_Get_Struct(self, xmlTextReader, reader);
460
+
461
+ const char * value = (const char *)xmlTextReaderReadOuterXml(reader);
462
+
463
+ if(value == NULL)
464
+ return Qnil;
465
+ else
466
+ return NOKOGIRI_STR_NEW2(value);
467
+ }
468
+
447
469
  /*
448
470
  * call-seq:
449
471
  * from_memory(string, url = nil, encoding = nil, options = 0)
@@ -479,7 +501,8 @@ static VALUE from_memory(int argc, VALUE *argv, VALUE klass)
479
501
  }
480
502
 
481
503
  VALUE rb_reader = Data_Wrap_Struct(klass, NULL, dealloc, reader);
482
- rb_funcall(rb_reader, rb_intern("initialize"), 3, rb_buffer, rb_url, encoding);
504
+ VALUE args[3] = {rb_buffer, rb_url, encoding};
505
+ rb_obj_call_init(rb_reader, 3, args);
483
506
 
484
507
  return rb_reader;
485
508
  }
@@ -520,7 +543,8 @@ static VALUE from_io(int argc, VALUE *argv, VALUE klass)
520
543
  }
521
544
 
522
545
  VALUE rb_reader = Data_Wrap_Struct(klass, NULL, dealloc, reader);
523
- rb_funcall(rb_reader, rb_intern("initialize"), 3, rb_io, rb_url, encoding);
546
+ VALUE args[3] = {rb_io, rb_url, encoding};
547
+ rb_obj_call_init(rb_reader, 3, args);
524
548
 
525
549
  return rb_reader;
526
550
  }
@@ -545,7 +569,10 @@ void init_xml_reader()
545
569
  rb_define_singleton_method(klass, "from_io", from_io, -1);
546
570
 
547
571
  rb_define_method(klass, "read", read_more, 0);
572
+ rb_define_method(klass, "inner_xml", inner_xml, 0);
573
+ rb_define_method(klass, "outer_xml", outer_xml, 0);
548
574
  rb_define_method(klass, "state", state, 0);
575
+ rb_define_method(klass, "node_type", node_type, 0);
549
576
  rb_define_method(klass, "name", name, 0);
550
577
  rb_define_method(klass, "local_name", local_name, 0);
551
578
  rb_define_method(klass, "namespace_uri", namespace_uri, 0);
@@ -558,8 +585,9 @@ void init_xml_reader()
558
585
  rb_define_method(klass, "attribute", reader_attribute, 1);
559
586
  rb_define_method(klass, "namespaces", namespaces, 0);
560
587
  rb_define_method(klass, "attribute_at", attribute_at, 1);
561
- rb_define_method(klass, "attribute_nodes", attribute_nodes, 0);
562
588
  rb_define_method(klass, "attributes?", attributes_eh, 0);
563
589
  rb_define_method(klass, "value?", value_eh, 0);
564
590
  rb_define_method(klass, "default?", default_eh, 0);
591
+
592
+ rb_define_private_method(klass, "attr_nodes", attribute_nodes, 0);
565
593
  }
@@ -1,122 +1,86 @@
1
- #include <nokogiri.h>
1
+ #include <xml_sax_parser.h>
2
+
3
+ static ID id_start_document, id_end_document, id_start_element, id_end_element;
4
+ static ID id_start_element_namespace, id_end_element_namespace;
5
+ static ID id_comment, id_characters, id_xmldecl, id_error, id_warning;
6
+ static ID id_cdata_block, id_cAttribute;
2
7
 
3
8
  #define STRING_OR_NULL(str) \
4
9
  (RTEST(str) ? StringValuePtr(str) : NULL)
5
10
 
6
- #define RBSTR_OR_QNIL(_str, rb_enc) \
7
- (_str ? NOKOGIRI_STR_NEW2(_str, STRING_OR_NULL(rb_enc)) : Qnil)
8
-
9
- /*
10
- * call-seq:
11
- * parse_memory(data)
12
- *
13
- * Parse the document stored in +data+
14
- */
15
- static VALUE parse_memory(VALUE self, VALUE data)
16
- {
17
- xmlSAXHandlerPtr handler;
18
- Data_Get_Struct(self, xmlSAXHandler, handler);
19
-
20
- if(Qnil == data) rb_raise(rb_eArgError, "data cannot be nil");
21
-
22
- xmlSAXUserParseMemory( handler,
23
- (void *)self,
24
- StringValuePtr(data),
25
- RSTRING_LEN(data)
26
- );
27
- return data;
28
- }
29
-
30
- /*
31
- * call-seq:
32
- * native_parse_io(data, encoding)
33
- *
34
- * Parse the document accessable via +io+
35
- */
36
- static VALUE native_parse_io(VALUE self, VALUE io, VALUE encoding)
37
- {
38
- xmlSAXHandlerPtr handler;
39
- Data_Get_Struct(self, xmlSAXHandler, handler);
40
-
41
- xmlCharEncoding enc = (xmlCharEncoding)NUM2INT(encoding);
42
-
43
- xmlParserCtxtPtr sax_ctx = xmlCreateIOParserCtxt(
44
- handler,
45
- (void *)self,
46
- (xmlInputReadCallback)io_read_callback,
47
- (xmlInputCloseCallback)io_close_callback,
48
- (void *)io,
49
- enc
50
- );
51
- xmlParseDocument(sax_ctx);
52
- xmlFreeParserCtxt(sax_ctx);
53
- return io;
54
- }
55
-
56
- /*
57
- * call-seq:
58
- * native_parse_file(data)
59
- *
60
- * Parse the document stored in +data+
61
- */
62
- static VALUE native_parse_file(VALUE self, VALUE data)
63
- {
64
- xmlSAXHandlerPtr handler;
65
- Data_Get_Struct(self, xmlSAXHandler, handler);
66
- xmlSAXUserParseFile( handler,
67
- (void *)self,
68
- StringValuePtr(data)
69
- );
70
- return data;
71
- }
11
+ #define RBSTR_OR_QNIL(_str) \
12
+ (_str ? NOKOGIRI_STR_NEW2(_str) : Qnil)
72
13
 
73
14
  static void start_document(void * ctx)
74
15
  {
75
- VALUE self = (VALUE)ctx;
76
- VALUE doc = rb_funcall(self, rb_intern("document"), 0);
77
- rb_funcall(doc, rb_intern("start_document"), 0);
16
+ VALUE self = NOKOGIRI_SAX_SELF(ctx);
17
+ VALUE doc = rb_iv_get(self, "@document");
18
+
19
+ xmlParserCtxtPtr ctxt = NOKOGIRI_SAX_CTXT(ctx);
20
+
21
+ if(NULL != ctxt && ctxt->html != 1) {
22
+ if(ctxt->standalone != -1) { // -1 means there was no declaration
23
+ VALUE encoding = ctxt->encoding ?
24
+ NOKOGIRI_STR_NEW2(ctxt->encoding) :
25
+ Qnil;
26
+
27
+ VALUE version = ctxt->version ?
28
+ NOKOGIRI_STR_NEW2(ctxt->version) :
29
+ Qnil;
30
+
31
+ VALUE standalone = Qnil;
32
+
33
+ switch(ctxt->standalone)
34
+ {
35
+ case 0:
36
+ standalone = NOKOGIRI_STR_NEW2("no");
37
+ break;
38
+ case 1:
39
+ standalone = NOKOGIRI_STR_NEW2("yes");
40
+ break;
41
+ }
42
+
43
+ rb_funcall(doc, id_xmldecl, 3, version, encoding, standalone);
44
+ }
45
+ }
46
+
47
+ rb_funcall(doc, id_start_document, 0);
78
48
  }
79
49
 
80
50
  static void end_document(void * ctx)
81
51
  {
82
- VALUE self = (VALUE)ctx;
83
- VALUE doc = rb_funcall(self, rb_intern("document"), 0);
84
- rb_funcall(doc, rb_intern("end_document"), 0);
52
+ VALUE self = NOKOGIRI_SAX_SELF(ctx);
53
+ VALUE doc = rb_iv_get(self, "@document");
54
+ rb_funcall(doc, id_end_document, 0);
85
55
  }
86
56
 
87
57
  static void start_element(void * ctx, const xmlChar *name, const xmlChar **atts)
88
58
  {
89
- VALUE self = (VALUE)ctx;
90
- VALUE doc = rb_funcall(self, rb_intern("document"), 0);
59
+ VALUE self = NOKOGIRI_SAX_SELF(ctx);
60
+ VALUE doc = rb_iv_get(self, "@document");
91
61
  VALUE attributes = rb_ary_new();
92
- VALUE MAYBE_UNUSED(enc) = rb_iv_get(self, "@encoding");
93
62
  const xmlChar * attr;
94
63
  int i = 0;
95
64
  if(atts) {
96
65
  while((attr = atts[i]) != NULL) {
97
- rb_funcall(attributes, rb_intern("<<"), 1,
98
- NOKOGIRI_STR_NEW2(attr, STRING_OR_NULL(enc))
99
- );
66
+ rb_ary_push(attributes, NOKOGIRI_STR_NEW2(attr));
100
67
  i++;
101
68
  }
102
69
  }
103
70
 
104
71
  rb_funcall( doc,
105
- rb_intern("start_element"),
72
+ id_start_element,
106
73
  2,
107
- NOKOGIRI_STR_NEW2(name, STRING_OR_NULL(enc)),
74
+ NOKOGIRI_STR_NEW2(name),
108
75
  attributes
109
76
  );
110
77
  }
111
78
 
112
79
  static void end_element(void * ctx, const xmlChar *name)
113
80
  {
114
- VALUE self = (VALUE)ctx;
115
- VALUE MAYBE_UNUSED(enc) = rb_iv_get(self, "@encoding");
116
- VALUE doc = rb_funcall(self, rb_intern("document"), 0);
117
- rb_funcall(doc, rb_intern("end_element"), 1,
118
- NOKOGIRI_STR_NEW2(name, STRING_OR_NULL(enc))
119
- );
81
+ VALUE self = NOKOGIRI_SAX_SELF(ctx);
82
+ VALUE doc = rb_iv_get(self, "@document");
83
+ rb_funcall(doc, id_end_element, 1, NOKOGIRI_STR_NEW2(name));
120
84
  }
121
85
 
122
86
  static VALUE attributes_as_list(
@@ -124,29 +88,23 @@ static VALUE attributes_as_list(
124
88
  int nb_attributes,
125
89
  const xmlChar ** attributes)
126
90
  {
127
- VALUE list = rb_ary_new2(nb_attributes);
128
- VALUE MAYBE_UNUSED(enc) = rb_iv_get(self, "@encoding");
91
+ VALUE list = rb_ary_new2((long)nb_attributes);
129
92
 
130
- VALUE attr_klass = rb_const_get(cNokogiriXmlSaxParser, rb_intern("Attribute"));
93
+ VALUE attr_klass = rb_const_get(cNokogiriXmlSaxParser, id_cAttribute);
131
94
  if (attributes) {
132
95
  /* Each attribute is an array of [localname, prefix, URI, value, end] */
133
96
  int i;
134
97
  for (i = 0; i < nb_attributes * 5; i += 5) {
135
- VALUE attribute = rb_funcall(attr_klass, rb_intern("new"), 4,
136
- /* localname */
137
- RBSTR_OR_QNIL(attributes[i + 0], enc),
138
-
139
- /* prefix */
140
- RBSTR_OR_QNIL(attributes[i + 1], enc),
98
+ VALUE argv[4];
99
+ argv[0] = RBSTR_OR_QNIL(attributes[i + 0]); /* localname */
100
+ argv[1] = RBSTR_OR_QNIL(attributes[i + 1]); /* prefix */
101
+ argv[2] = RBSTR_OR_QNIL(attributes[i + 2]); /* URI */
141
102
 
142
- /* URI */
143
- RBSTR_OR_QNIL(attributes[i + 2], enc),
103
+ /* value */
104
+ argv[3] = NOKOGIRI_STR_NEW((const char*)attributes[i+3],
105
+ (attributes[i+4] - attributes[i+3]));
144
106
 
145
- /* value */
146
- NOKOGIRI_STR_NEW((const char*)attributes[i+3],
147
- (attributes[i+4] - attributes[i+3]),
148
- STRING_OR_NULL(enc))
149
- );
107
+ VALUE attribute = rb_class_new_instance(4, argv, attr_klass);
150
108
  rb_ary_push(list, attribute);
151
109
  }
152
110
  }
@@ -166,44 +124,33 @@ start_element_ns (
166
124
  int nb_defaulted,
167
125
  const xmlChar ** attributes)
168
126
  {
169
- VALUE self = (VALUE)ctx;
170
- VALUE doc = rb_funcall(self, rb_intern("document"), 0);
171
- VALUE MAYBE_UNUSED(enc) = rb_iv_get(self, "@encoding");
127
+ VALUE self = NOKOGIRI_SAX_SELF(ctx);
128
+ VALUE doc = rb_iv_get(self, "@document");
172
129
 
173
130
  VALUE attribute_list = attributes_as_list(self, nb_attributes, attributes);
174
131
 
175
- VALUE ns_list = rb_ary_new2(nb_namespaces);
132
+ VALUE ns_list = rb_ary_new2((long)nb_namespaces);
176
133
 
177
134
  if (namespaces) {
178
135
  int i;
179
136
  for (i = 0; i < nb_namespaces * 2; i += 2)
180
137
  {
181
138
  rb_ary_push(ns_list,
182
- rb_ary_new3(2,
183
- RBSTR_OR_QNIL(namespaces[i + 0], enc),
184
- RBSTR_OR_QNIL(namespaces[i + 1], enc)
139
+ rb_ary_new3((long)2,
140
+ RBSTR_OR_QNIL(namespaces[i + 0]),
141
+ RBSTR_OR_QNIL(namespaces[i + 1])
185
142
  )
186
143
  );
187
144
  }
188
145
  }
189
146
 
190
147
  rb_funcall( doc,
191
- rb_intern("start_element_namespace"),
148
+ id_start_element_namespace,
192
149
  5,
193
- NOKOGIRI_STR_NEW2(localname, STRING_OR_NULL(enc)),
150
+ NOKOGIRI_STR_NEW2(localname),
194
151
  attribute_list,
195
- RBSTR_OR_QNIL(prefix, enc),
196
- RBSTR_OR_QNIL(uri, enc),
197
- ns_list
198
- );
199
-
200
- rb_funcall( self,
201
- rb_intern("start_element_namespace"),
202
- 5,
203
- NOKOGIRI_STR_NEW2(localname, STRING_OR_NULL(enc)),
204
- attribute_list,
205
- RBSTR_OR_QNIL(prefix, enc),
206
- RBSTR_OR_QNIL(uri, enc),
152
+ RBSTR_OR_QNIL(prefix),
153
+ RBSTR_OR_QNIL(uri),
207
154
  ns_list
208
155
  );
209
156
  }
@@ -218,46 +165,36 @@ end_element_ns (
218
165
  const xmlChar * prefix,
219
166
  const xmlChar * uri)
220
167
  {
221
- VALUE self = (VALUE)ctx;
222
- VALUE doc = rb_funcall(self, rb_intern("document"), 0);
223
- VALUE MAYBE_UNUSED(enc) = rb_iv_get(self, "@encoding");
224
-
225
- rb_funcall(doc, rb_intern("end_element_namespace"), 3,
226
- NOKOGIRI_STR_NEW2(localname, STRING_OR_NULL(enc)),
227
- RBSTR_OR_QNIL(prefix, enc),
228
- RBSTR_OR_QNIL(uri, enc)
229
- );
168
+ VALUE self = NOKOGIRI_SAX_SELF(ctx);
169
+ VALUE doc = rb_iv_get(self, "@document");
230
170
 
231
- rb_funcall(self, rb_intern("end_element_namespace"), 3,
232
- NOKOGIRI_STR_NEW2(localname, STRING_OR_NULL(enc)),
233
- RBSTR_OR_QNIL(prefix, enc),
234
- RBSTR_OR_QNIL(uri, enc)
171
+ rb_funcall(doc, id_end_element_namespace, 3,
172
+ NOKOGIRI_STR_NEW2(localname),
173
+ RBSTR_OR_QNIL(prefix),
174
+ RBSTR_OR_QNIL(uri)
235
175
  );
236
176
  }
237
177
 
238
178
  static void characters_func(void * ctx, const xmlChar * ch, int len)
239
179
  {
240
- VALUE self = (VALUE)ctx;
241
- VALUE MAYBE_UNUSED(enc) = rb_iv_get(self, "@encoding");
242
- VALUE doc = rb_funcall(self, rb_intern("document"), 0);
243
- VALUE str = NOKOGIRI_STR_NEW(ch, len, STRING_OR_NULL(enc));
244
- rb_funcall(doc, rb_intern("characters"), 1, str);
180
+ VALUE self = NOKOGIRI_SAX_SELF(ctx);
181
+ VALUE doc = rb_iv_get(self, "@document");
182
+ VALUE str = NOKOGIRI_STR_NEW(ch, len);
183
+ rb_funcall(doc, id_characters, 1, str);
245
184
  }
246
185
 
247
186
  static void comment_func(void * ctx, const xmlChar * value)
248
187
  {
249
- VALUE self = (VALUE)ctx;
250
- VALUE MAYBE_UNUSED(enc) = rb_iv_get(self, "@encoding");
251
- VALUE doc = rb_funcall(self, rb_intern("document"), 0);
252
- VALUE str = NOKOGIRI_STR_NEW2(value, STRING_OR_NULL(enc));
253
- rb_funcall(doc, rb_intern("comment"), 1, str);
188
+ VALUE self = NOKOGIRI_SAX_SELF(ctx);
189
+ VALUE doc = rb_iv_get(self, "@document");
190
+ VALUE str = NOKOGIRI_STR_NEW2(value);
191
+ rb_funcall(doc, id_comment, 1, str);
254
192
  }
255
193
 
256
194
  static void warning_func(void * ctx, const char *msg, ...)
257
195
  {
258
- VALUE self = (VALUE)ctx;
259
- VALUE doc = rb_funcall(self, rb_intern("document"), 0);
260
- VALUE MAYBE_UNUSED(enc) = rb_iv_get(self, "@encoding");
196
+ VALUE self = NOKOGIRI_SAX_SELF(ctx);
197
+ VALUE doc = rb_iv_get(self, "@document");
261
198
  char * message;
262
199
 
263
200
  va_list args;
@@ -265,17 +202,14 @@ static void warning_func(void * ctx, const char *msg, ...)
265
202
  vasprintf(&message, msg, args);
266
203
  va_end(args);
267
204
 
268
- rb_funcall(doc, rb_intern("warning"), 1,
269
- NOKOGIRI_STR_NEW2(message, STRING_OR_NULL(enc))
270
- );
205
+ rb_funcall(doc, id_warning, 1, NOKOGIRI_STR_NEW2(message));
271
206
  free(message);
272
207
  }
273
208
 
274
209
  static void error_func(void * ctx, const char *msg, ...)
275
210
  {
276
- VALUE self = (VALUE)ctx;
277
- VALUE MAYBE_UNUSED(enc) = rb_iv_get(self, "@encoding");
278
- VALUE doc = rb_funcall(self, rb_intern("document"), 0);
211
+ VALUE self = NOKOGIRI_SAX_SELF(ctx);
212
+ VALUE doc = rb_iv_get(self, "@document");
279
213
  char * message;
280
214
 
281
215
  va_list args;
@@ -283,20 +217,16 @@ static void error_func(void * ctx, const char *msg, ...)
283
217
  vasprintf(&message, msg, args);
284
218
  va_end(args);
285
219
 
286
- rb_funcall(doc, rb_intern("error"), 1,
287
- NOKOGIRI_STR_NEW2(message, STRING_OR_NULL(enc))
288
- );
220
+ rb_funcall(doc, id_error, 1, NOKOGIRI_STR_NEW2(message));
289
221
  free(message);
290
222
  }
291
223
 
292
224
  static void cdata_block(void * ctx, const xmlChar * value, int len)
293
225
  {
294
- VALUE self = (VALUE)ctx;
295
- VALUE MAYBE_UNUSED(enc) = rb_iv_get(self, "@encoding");
296
- VALUE doc = rb_funcall(self, rb_intern("document"), 0);
297
- VALUE string =
298
- NOKOGIRI_STR_NEW(value, len, STRING_OR_NULL(enc));
299
- rb_funcall(doc, rb_intern("cdata_block"), 1, string);
226
+ VALUE self = NOKOGIRI_SAX_SELF(ctx);
227
+ VALUE doc = rb_iv_get(self, "@document");
228
+ VALUE string = NOKOGIRI_STR_NEW(value, len);
229
+ rb_funcall(doc, id_cdata_block, 1, string);
300
230
  }
301
231
 
302
232
  static void deallocate(xmlSAXHandlerPtr handler)
@@ -339,7 +269,18 @@ void init_xml_sax_parser()
339
269
  cNokogiriXmlSaxParser = klass;
340
270
 
341
271
  rb_define_alloc_func(klass, allocate);
342
- rb_define_method(klass, "parse_memory", parse_memory, 1);
343
- rb_define_private_method(klass, "native_parse_file", native_parse_file, 1);
344
- rb_define_private_method(klass, "native_parse_io", native_parse_io, 2);
272
+
273
+ id_start_document = rb_intern("start_document");
274
+ id_end_document = rb_intern("end_document");
275
+ id_start_element = rb_intern("start_element");
276
+ id_end_element = rb_intern("end_element");
277
+ id_comment = rb_intern("comment");
278
+ id_characters = rb_intern("characters");
279
+ id_xmldecl = rb_intern("xmldecl");
280
+ id_error = rb_intern("error");
281
+ id_warning = rb_intern("warning");
282
+ id_cdata_block = rb_intern("cdata_block");
283
+ id_cAttribute = rb_intern("Attribute");
284
+ id_start_element_namespace = rb_intern("start_element_namespace");
285
+ id_end_element_namespace = rb_intern("end_element_namespace");
345
286
  }