nokogiri 1.3.3 → 1.4.0

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (201) hide show
  1. data/CHANGELOG.ja.rdoc +48 -3
  2. data/CHANGELOG.rdoc +42 -0
  3. data/Manifest.txt +44 -29
  4. data/README.ja.rdoc +0 -2
  5. data/README.rdoc +4 -7
  6. data/Rakefile +42 -6
  7. data/bin/nokogiri +7 -5
  8. data/ext/nokogiri/extconf.rb +5 -21
  9. data/ext/nokogiri/html_document.c +14 -50
  10. data/ext/nokogiri/html_element_description.c +7 -7
  11. data/ext/nokogiri/html_entity_lookup.c +6 -4
  12. data/ext/nokogiri/html_sax_parser_context.c +92 -0
  13. data/ext/nokogiri/html_sax_parser_context.h +11 -0
  14. data/ext/nokogiri/nokogiri.c +9 -3
  15. data/ext/nokogiri/nokogiri.h +16 -20
  16. data/ext/nokogiri/xml_attr.c +1 -1
  17. data/ext/nokogiri/xml_attribute_decl.c +67 -0
  18. data/ext/nokogiri/xml_attribute_decl.h +9 -0
  19. data/ext/nokogiri/xml_cdata.c +6 -5
  20. data/ext/nokogiri/xml_comment.c +3 -2
  21. data/ext/nokogiri/xml_document.c +93 -23
  22. data/ext/nokogiri/xml_document_fragment.c +1 -3
  23. data/ext/nokogiri/xml_dtd.c +63 -6
  24. data/ext/nokogiri/xml_element_content.c +123 -0
  25. data/ext/nokogiri/xml_element_content.h +10 -0
  26. data/ext/nokogiri/xml_element_decl.c +69 -0
  27. data/ext/nokogiri/xml_element_decl.h +9 -0
  28. data/ext/nokogiri/xml_entity_decl.c +97 -0
  29. data/ext/nokogiri/xml_entity_decl.h +10 -0
  30. data/ext/nokogiri/xml_entity_reference.c +1 -1
  31. data/ext/nokogiri/xml_io.c +10 -3
  32. data/ext/nokogiri/xml_io.h +1 -0
  33. data/ext/nokogiri/xml_namespace.c +2 -2
  34. data/ext/nokogiri/xml_node.c +139 -34
  35. data/ext/nokogiri/xml_node.h +0 -1
  36. data/ext/nokogiri/xml_node_set.c +23 -16
  37. data/ext/nokogiri/xml_processing_instruction.c +1 -1
  38. data/ext/nokogiri/xml_reader.c +78 -50
  39. data/ext/nokogiri/xml_sax_parser.c +109 -168
  40. data/ext/nokogiri/xml_sax_parser.h +33 -0
  41. data/ext/nokogiri/xml_sax_parser_context.c +155 -0
  42. data/ext/nokogiri/xml_sax_parser_context.h +10 -0
  43. data/ext/nokogiri/xml_sax_push_parser.c +11 -6
  44. data/ext/nokogiri/xml_syntax_error.c +63 -12
  45. data/ext/nokogiri/xml_text.c +4 -3
  46. data/ext/nokogiri/xml_xpath.c +1 -1
  47. data/ext/nokogiri/xml_xpath_context.c +12 -25
  48. data/ext/nokogiri/xslt_stylesheet.c +3 -3
  49. data/lib/nokogiri.rb +4 -4
  50. data/lib/nokogiri/css/generated_tokenizer.rb +1 -0
  51. data/lib/nokogiri/css/node.rb +1 -9
  52. data/lib/nokogiri/css/xpath_visitor.rb +11 -21
  53. data/lib/nokogiri/ffi/html/document.rb +0 -9
  54. data/lib/nokogiri/ffi/html/sax/parser_context.rb +38 -0
  55. data/lib/nokogiri/ffi/io_callbacks.rb +4 -2
  56. data/lib/nokogiri/ffi/libxml.rb +44 -10
  57. data/lib/nokogiri/ffi/structs/common_node.rb +1 -1
  58. data/lib/nokogiri/ffi/structs/xml_attribute.rb +27 -0
  59. data/lib/nokogiri/ffi/structs/xml_dtd.rb +3 -1
  60. data/lib/nokogiri/ffi/structs/xml_element.rb +26 -0
  61. data/lib/nokogiri/ffi/structs/xml_element_content.rb +17 -0
  62. data/lib/nokogiri/ffi/structs/xml_entity.rb +32 -0
  63. data/lib/nokogiri/ffi/structs/xml_enumeration.rb +12 -0
  64. data/lib/nokogiri/ffi/structs/xml_parser_context.rb +19 -0
  65. data/lib/nokogiri/ffi/structs/xml_sax_push_parser_context.rb +4 -3
  66. data/lib/nokogiri/ffi/structs/xml_syntax_error.rb +1 -1
  67. data/lib/nokogiri/ffi/xml/attribute_decl.rb +27 -0
  68. data/lib/nokogiri/ffi/xml/comment.rb +2 -2
  69. data/lib/nokogiri/ffi/xml/document.rb +29 -12
  70. data/lib/nokogiri/ffi/xml/document_fragment.rb +0 -5
  71. data/lib/nokogiri/ffi/xml/dtd.rb +14 -3
  72. data/lib/nokogiri/ffi/xml/element_content.rb +43 -0
  73. data/lib/nokogiri/ffi/xml/element_decl.rb +19 -0
  74. data/lib/nokogiri/ffi/xml/entity_decl.rb +27 -0
  75. data/lib/nokogiri/ffi/xml/node.rb +45 -5
  76. data/lib/nokogiri/ffi/xml/node_set.rb +1 -1
  77. data/lib/nokogiri/ffi/xml/reader.rb +45 -24
  78. data/lib/nokogiri/ffi/xml/sax/parser.rb +27 -34
  79. data/lib/nokogiri/ffi/xml/sax/parser_context.rb +67 -0
  80. data/lib/nokogiri/ffi/xml/sax/push_parser.rb +5 -4
  81. data/lib/nokogiri/ffi/xml/syntax_error.rb +31 -16
  82. data/lib/nokogiri/ffi/xml/text.rb +2 -2
  83. data/lib/nokogiri/html.rb +1 -0
  84. data/lib/nokogiri/html/document.rb +39 -24
  85. data/lib/nokogiri/html/sax/parser.rb +2 -2
  86. data/lib/nokogiri/html/sax/parser_context.rb +16 -0
  87. data/lib/nokogiri/version.rb +1 -1
  88. data/lib/nokogiri/xml.rb +6 -1
  89. data/lib/nokogiri/xml/attr.rb +5 -0
  90. data/lib/nokogiri/xml/attribute_decl.rb +18 -0
  91. data/lib/nokogiri/xml/builder.rb +121 -13
  92. data/lib/nokogiri/xml/character_data.rb +7 -0
  93. data/lib/nokogiri/xml/document.rb +43 -29
  94. data/lib/nokogiri/xml/document_fragment.rb +26 -6
  95. data/lib/nokogiri/xml/dtd.rb +5 -5
  96. data/lib/nokogiri/xml/element_content.rb +36 -0
  97. data/lib/nokogiri/xml/element_decl.rb +13 -0
  98. data/lib/nokogiri/xml/entity_decl.rb +15 -0
  99. data/lib/nokogiri/xml/fragment_handler.rb +22 -11
  100. data/lib/nokogiri/xml/namespace.rb +6 -0
  101. data/lib/nokogiri/xml/node.rb +33 -15
  102. data/lib/nokogiri/xml/node_set.rb +66 -44
  103. data/lib/nokogiri/xml/pp.rb +2 -0
  104. data/lib/nokogiri/xml/pp/character_data.rb +18 -0
  105. data/lib/nokogiri/xml/pp/node.rb +56 -0
  106. data/lib/nokogiri/xml/reader.rb +8 -0
  107. data/lib/nokogiri/xml/sax.rb +1 -1
  108. data/lib/nokogiri/xml/sax/document.rb +18 -1
  109. data/lib/nokogiri/xml/sax/parser.rb +15 -8
  110. data/lib/nokogiri/xml/sax/parser_context.rb +16 -0
  111. data/lib/nokogiri/xml/sax/push_parser.rb +0 -3
  112. data/lib/nokogiri/xml/syntax_error.rb +4 -0
  113. data/lib/nokogiri/xslt/stylesheet.rb +1 -1
  114. data/test/css/test_nthiness.rb +1 -1
  115. data/test/css/test_parser.rb +1 -1
  116. data/test/css/test_tokenizer.rb +1 -1
  117. data/test/css/test_xpath_visitor.rb +1 -1
  118. data/test/ffi/test_document.rb +1 -1
  119. data/test/files/shift_jis.html +10 -0
  120. data/test/files/staff.dtd +10 -0
  121. data/test/helper.rb +12 -3
  122. data/test/html/sax/test_parser.rb +1 -1
  123. data/test/html/sax/test_parser_context.rb +48 -0
  124. data/test/html/test_builder.rb +8 -2
  125. data/test/html/test_document.rb +23 -1
  126. data/test/html/test_document_encoding.rb +15 -1
  127. data/test/html/test_document_fragment.rb +10 -1
  128. data/test/html/test_element_description.rb +1 -2
  129. data/test/html/test_named_characters.rb +1 -1
  130. data/test/html/test_node.rb +61 -1
  131. data/test/html/test_node_encoding.rb +27 -0
  132. data/test/test_convert_xpath.rb +1 -3
  133. data/test/test_css_cache.rb +1 -1
  134. data/test/test_gc.rb +1 -1
  135. data/test/test_memory_leak.rb +1 -1
  136. data/test/test_nokogiri.rb +3 -3
  137. data/test/test_reader.rb +29 -1
  138. data/test/test_xslt_transforms.rb +1 -1
  139. data/test/xml/node/test_save_options.rb +1 -1
  140. data/test/xml/node/test_subclass.rb +1 -1
  141. data/test/xml/sax/test_parser.rb +64 -3
  142. data/test/xml/sax/test_parser_context.rb +56 -0
  143. data/test/xml/sax/test_push_parser.rb +11 -1
  144. data/test/xml/test_attr.rb +1 -1
  145. data/test/xml/test_attribute_decl.rb +82 -0
  146. data/test/xml/test_builder.rb +95 -1
  147. data/test/xml/test_cdata.rb +1 -1
  148. data/test/xml/test_comment.rb +7 -1
  149. data/test/xml/test_document.rb +147 -6
  150. data/test/xml/test_document_encoding.rb +1 -1
  151. data/test/xml/test_document_fragment.rb +55 -5
  152. data/test/xml/test_dtd.rb +40 -5
  153. data/test/xml/test_dtd_encoding.rb +3 -1
  154. data/test/xml/test_element_content.rb +56 -0
  155. data/test/xml/test_element_decl.rb +73 -0
  156. data/test/xml/test_entity_decl.rb +83 -0
  157. data/test/xml/test_entity_reference.rb +1 -1
  158. data/test/xml/test_namespace.rb +21 -1
  159. data/test/xml/test_node.rb +70 -4
  160. data/test/xml/test_node_attributes.rb +1 -1
  161. data/test/xml/test_node_encoding.rb +1 -1
  162. data/test/xml/test_node_set.rb +136 -2
  163. data/test/xml/test_parse_options.rb +1 -1
  164. data/test/xml/test_processing_instruction.rb +1 -1
  165. data/test/xml/test_reader_encoding.rb +1 -1
  166. data/test/xml/test_relax_ng.rb +1 -1
  167. data/test/xml/test_schema.rb +1 -1
  168. data/test/xml/test_syntax_error.rb +27 -0
  169. data/test/xml/test_text.rb +13 -1
  170. data/test/xml/test_unparented_node.rb +1 -1
  171. data/test/xml/test_xpath.rb +1 -1
  172. metadata +57 -40
  173. data/ext/nokogiri/html_sax_parser.c +0 -57
  174. data/ext/nokogiri/html_sax_parser.h +0 -11
  175. data/lib/action-nokogiri.rb +0 -38
  176. data/lib/nokogiri/decorators.rb +0 -2
  177. data/lib/nokogiri/decorators/hpricot.rb +0 -3
  178. data/lib/nokogiri/decorators/hpricot/node.rb +0 -56
  179. data/lib/nokogiri/decorators/hpricot/node_set.rb +0 -54
  180. data/lib/nokogiri/decorators/hpricot/xpath_visitor.rb +0 -30
  181. data/lib/nokogiri/ffi/html/sax/parser.rb +0 -21
  182. data/lib/nokogiri/hpricot.rb +0 -92
  183. data/lib/nokogiri/xml/entity_declaration.rb +0 -11
  184. data/lib/nokogiri/xml/sax/legacy_handlers.rb +0 -65
  185. data/test/hpricot/files/basic.xhtml +0 -17
  186. data/test/hpricot/files/boingboing.html +0 -2266
  187. data/test/hpricot/files/cy0.html +0 -3653
  188. data/test/hpricot/files/immob.html +0 -400
  189. data/test/hpricot/files/pace_application.html +0 -1320
  190. data/test/hpricot/files/tenderlove.html +0 -16
  191. data/test/hpricot/files/uswebgen.html +0 -220
  192. data/test/hpricot/files/utf8.html +0 -1054
  193. data/test/hpricot/files/week9.html +0 -1723
  194. data/test/hpricot/files/why.xml +0 -19
  195. data/test/hpricot/load_files.rb +0 -11
  196. data/test/hpricot/test_alter.rb +0 -68
  197. data/test/hpricot/test_builder.rb +0 -20
  198. data/test/hpricot/test_parser.rb +0 -350
  199. data/test/hpricot/test_paths.rb +0 -15
  200. data/test/hpricot/test_preserved.rb +0 -77
  201. data/test/hpricot/test_xml.rb +0 -30
@@ -66,7 +66,7 @@ static VALUE new(int argc, VALUE *argv, VALUE klass)
66
66
  NOKOGIRI_ROOT_NODE((xmlNodePtr)node);
67
67
 
68
68
  VALUE rb_node = Nokogiri_wrap_xml_node(klass, (xmlNodePtr)node);
69
- rb_funcall2(rb_node, rb_intern("initialize"), argc, argv);
69
+ rb_obj_call_init(rb_node, argc, argv);
70
70
 
71
71
  if(rb_block_given_p()) rb_yield(rb_node);
72
72
 
@@ -0,0 +1,67 @@
1
+ #include <xml_attribute_decl.h>
2
+
3
+ /*
4
+ * call-seq:
5
+ * attribute_type
6
+ *
7
+ * The attribute_type for this AttributeDecl
8
+ */
9
+ static VALUE attribute_type(VALUE self)
10
+ {
11
+ xmlAttributePtr node;
12
+ Data_Get_Struct(self, xmlAttribute, node);
13
+ return INT2NUM((long)node->atype);
14
+ }
15
+
16
+ /*
17
+ * call-seq:
18
+ * default
19
+ *
20
+ * The default value
21
+ */
22
+ static VALUE default_value(VALUE self)
23
+ {
24
+ xmlAttributePtr node;
25
+ Data_Get_Struct(self, xmlAttribute, node);
26
+
27
+ if(node->defaultValue) return NOKOGIRI_STR_NEW2(node->defaultValue);
28
+ return Qnil;
29
+ }
30
+
31
+ /*
32
+ * call-seq:
33
+ * enumeration
34
+ *
35
+ * An enumeration of possible values
36
+ */
37
+ static VALUE enumeration(VALUE self)
38
+ {
39
+ xmlAttributePtr node;
40
+ Data_Get_Struct(self, xmlAttribute, node);
41
+
42
+ VALUE list = rb_ary_new();
43
+ xmlEnumerationPtr enm = node->tree;
44
+
45
+ while(enm) {
46
+ rb_ary_push(list, NOKOGIRI_STR_NEW2(enm->name));
47
+ enm = enm->next;
48
+ }
49
+
50
+ return list;
51
+ }
52
+
53
+ VALUE cNokogiriXmlAttributeDecl;
54
+
55
+ void init_xml_attribute_decl()
56
+ {
57
+ VALUE nokogiri = rb_define_module("Nokogiri");
58
+ VALUE xml = rb_define_module_under(nokogiri, "XML");
59
+ VALUE node = rb_define_class_under(xml, "Node", rb_cObject);
60
+ VALUE klass = rb_define_class_under(xml, "AttributeDecl", node);
61
+
62
+ cNokogiriXmlAttributeDecl = klass;
63
+
64
+ rb_define_method(klass, "attribute_type", attribute_type, 0);
65
+ rb_define_method(klass, "default", default_value, 0);
66
+ rb_define_method(klass, "enumeration", enumeration, 0);
67
+ }
@@ -0,0 +1,9 @@
1
+ #ifndef NOKOGIRI_XML_ATTRIBUTE_DECL
2
+ #define NOKOGIRI_XML_ATTRIBUTE_DECL
3
+
4
+ #include <nokogiri.h>
5
+
6
+ void init_xml_attribute_decl();
7
+
8
+ extern VALUE cNokogiriXmlAttributeDecl;
9
+ #endif
@@ -4,7 +4,7 @@
4
4
  * call-seq:
5
5
  * new(document, content)
6
6
  *
7
- * Create a new CData element on the +document+ with +content+
7
+ * Create a new CDATA element on the +document+ with +content+
8
8
  */
9
9
  static VALUE new(int argc, VALUE *argv, VALUE klass)
10
10
  {
@@ -19,14 +19,14 @@ static VALUE new(int argc, VALUE *argv, VALUE klass)
19
19
 
20
20
  xmlNodePtr node = xmlNewCDataBlock(
21
21
  xml_doc->doc,
22
- Qnil == content ? NULL : (const xmlChar *)StringValuePtr(content),
23
- Qnil == content ? 0 : RSTRING_LEN(content)
22
+ NIL_P(content) ? NULL : (const xmlChar *)StringValuePtr(content),
23
+ NIL_P(content) ? 0 : (int)RSTRING_LEN(content)
24
24
  );
25
25
 
26
26
  NOKOGIRI_ROOT_NODE(node);
27
27
 
28
28
  VALUE rb_node = Nokogiri_wrap_xml_node(klass, node);
29
- rb_funcall2(rb_node, rb_intern("initialize"), argc, argv);
29
+ rb_obj_call_init(rb_node, argc, argv);
30
30
 
31
31
  if(rb_block_given_p()) rb_yield(rb_node);
32
32
 
@@ -39,7 +39,8 @@ void init_xml_cdata()
39
39
  VALUE nokogiri = rb_define_module("Nokogiri");
40
40
  VALUE xml = rb_define_module_under(nokogiri, "XML");
41
41
  VALUE node = rb_define_class_under(xml, "Node", rb_cObject);
42
- VALUE text = rb_define_class_under(xml, "Text", node);
42
+ VALUE char_data = rb_define_class_under(xml, "CharacterData", node);
43
+ VALUE text = rb_define_class_under(xml, "Text", char_data);
43
44
 
44
45
  /*
45
46
  * CData represents a CData node in an xml document.
@@ -23,7 +23,7 @@ static VALUE new(int argc, VALUE *argv, VALUE klass)
23
23
  );
24
24
 
25
25
  VALUE rb_node = Nokogiri_wrap_xml_node(klass, node);
26
- rb_funcall2(rb_node, rb_intern("initialize"), argc, argv);
26
+ rb_obj_call_init(rb_node, argc, argv);
27
27
 
28
28
  NOKOGIRI_ROOT_NODE(node);
29
29
 
@@ -38,11 +38,12 @@ void init_xml_comment()
38
38
  VALUE nokogiri = rb_define_module("Nokogiri");
39
39
  VALUE xml = rb_define_module_under(nokogiri, "XML");
40
40
  VALUE node = rb_define_class_under(xml, "Node", rb_cObject);
41
+ VALUE char_data = rb_define_class_under(xml, "CharacterData", node);
41
42
 
42
43
  /*
43
44
  * Comment represents a comment node in an xml document.
44
45
  */
45
- VALUE klass = rb_define_class_under(xml, "Comment", node);
46
+ VALUE klass = rb_define_class_under(xml, "Comment", char_data);
46
47
 
47
48
 
48
49
  cNokogiriXmlComment = klass;
@@ -34,6 +34,16 @@ static void dealloc(xmlDocPtr doc)
34
34
  NOKOGIRI_DEBUG_END(doc);
35
35
  }
36
36
 
37
+ static void recursively_remove_namespaces_from_node(xmlNodePtr node)
38
+ {
39
+ xmlNodePtr child ;
40
+
41
+ xmlSetNs(node, NULL);
42
+
43
+ for (child = node->children ; child ; child = child->next)
44
+ recursively_remove_namespaces_from_node(child);
45
+ }
46
+
37
47
  /*
38
48
  * call-seq:
39
49
  * url
@@ -45,8 +55,7 @@ static VALUE url(VALUE self)
45
55
  xmlDocPtr doc;
46
56
  Data_Get_Struct(self, xmlDoc, doc);
47
57
 
48
- if(doc->URL)
49
- return NOKOGIRI_STR_NEW2(doc->URL, doc->encoding);
58
+ if(doc->URL) return NOKOGIRI_STR_NEW2(doc->URL);
50
59
 
51
60
  return Qnil;
52
61
  }
@@ -126,7 +135,22 @@ static VALUE encoding(VALUE self)
126
135
  Data_Get_Struct(self, xmlDoc, doc);
127
136
 
128
137
  if(!doc->encoding) return Qnil;
129
- return NOKOGIRI_STR_NEW2(doc->encoding, doc->encoding);
138
+ return NOKOGIRI_STR_NEW2(doc->encoding);
139
+ }
140
+
141
+ /*
142
+ * call-seq:
143
+ * version
144
+ *
145
+ * Get the XML version for this Document
146
+ */
147
+ static VALUE version(VALUE self)
148
+ {
149
+ xmlDocPtr doc;
150
+ Data_Get_Struct(self, xmlDoc, doc);
151
+
152
+ if(!doc->version) return Qnil;
153
+ return NOKOGIRI_STR_NEW2(doc->version);
130
154
  }
131
155
 
132
156
  /*
@@ -141,8 +165,8 @@ static VALUE read_io( VALUE klass,
141
165
  VALUE encoding,
142
166
  VALUE options )
143
167
  {
144
- const char * c_url = (url == Qnil) ? NULL : StringValuePtr(url);
145
- const char * c_enc = (encoding == Qnil) ? NULL : StringValuePtr(encoding);
168
+ const char * c_url = NIL_P(url) ? NULL : StringValuePtr(url);
169
+ const char * c_enc = NIL_P(encoding) ? NULL : StringValuePtr(encoding);
146
170
  VALUE error_list = rb_ary_new();
147
171
 
148
172
  xmlResetLastError();
@@ -154,7 +178,7 @@ static VALUE read_io( VALUE klass,
154
178
  (void *)io,
155
179
  c_url,
156
180
  c_enc,
157
- NUM2INT(options)
181
+ (int)NUM2INT(options)
158
182
  );
159
183
  xmlSetStructuredErrorFunc(NULL, NULL);
160
184
 
@@ -163,9 +187,7 @@ static VALUE read_io( VALUE klass,
163
187
 
164
188
  xmlErrorPtr error = xmlGetLastError();
165
189
  if(error)
166
- rb_funcall(rb_mKernel, rb_intern("raise"), 1,
167
- Nokogiri_wrap_xml_syntax_error((VALUE)NULL, error)
168
- );
190
+ rb_exc_raise(Nokogiri_wrap_xml_syntax_error((VALUE)NULL, error));
169
191
  else
170
192
  rb_raise(rb_eRuntimeError, "Could not parse document");
171
193
 
@@ -173,7 +195,7 @@ static VALUE read_io( VALUE klass,
173
195
  }
174
196
 
175
197
  VALUE document = Nokogiri_wrap_xml_document(klass, doc);
176
- rb_funcall(document, rb_intern("errors="), 1, error_list);
198
+ rb_iv_set(document, "@errors", error_list);
177
199
  return document;
178
200
  }
179
201
 
@@ -190,14 +212,14 @@ static VALUE read_memory( VALUE klass,
190
212
  VALUE options )
191
213
  {
192
214
  const char * c_buffer = StringValuePtr(string);
193
- const char * c_url = (url == Qnil) ? NULL : StringValuePtr(url);
194
- const char * c_enc = (encoding == Qnil) ? NULL : StringValuePtr(encoding);
215
+ const char * c_url = NIL_P(url) ? NULL : StringValuePtr(url);
216
+ const char * c_enc = NIL_P(encoding) ? NULL : StringValuePtr(encoding);
195
217
  int len = RSTRING_LEN(string);
196
218
  VALUE error_list = rb_ary_new();
197
219
 
198
220
  xmlResetLastError();
199
221
  xmlSetStructuredErrorFunc((void *)error_list, Nokogiri_error_array_pusher);
200
- xmlDocPtr doc = xmlReadMemory(c_buffer, len, c_url, c_enc, NUM2INT(options));
222
+ xmlDocPtr doc = xmlReadMemory(c_buffer, len, c_url, c_enc, (int)NUM2INT(options));
201
223
  xmlSetStructuredErrorFunc(NULL, NULL);
202
224
 
203
225
  if(doc == NULL) {
@@ -205,9 +227,7 @@ static VALUE read_memory( VALUE klass,
205
227
 
206
228
  xmlErrorPtr error = xmlGetLastError();
207
229
  if(error)
208
- rb_funcall(rb_mKernel, rb_intern("raise"), 1,
209
- Nokogiri_wrap_xml_syntax_error((VALUE)NULL, error)
210
- );
230
+ rb_exc_raise(Nokogiri_wrap_xml_syntax_error((VALUE)NULL, error));
211
231
  else
212
232
  rb_raise(rb_eRuntimeError, "Could not parse document");
213
233
 
@@ -215,7 +235,7 @@ static VALUE read_memory( VALUE klass,
215
235
  }
216
236
 
217
237
  VALUE document = Nokogiri_wrap_xml_document(klass, doc);
218
- rb_funcall(document, rb_intern("errors="), 1, error_list);
238
+ rb_iv_set(document, "@errors", error_list);
219
239
  return document;
220
240
  }
221
241
 
@@ -231,12 +251,12 @@ static VALUE duplicate_node(int argc, VALUE *argv, VALUE self)
231
251
  VALUE level;
232
252
 
233
253
  if(rb_scan_args(argc, argv, "01", &level) == 0)
234
- level = INT2NUM(1);
254
+ level = INT2NUM((long)1);
235
255
 
236
256
  xmlDocPtr doc, dup;
237
257
  Data_Get_Struct(self, xmlDoc, doc);
238
258
 
239
- dup = xmlCopyDoc(doc, NUM2INT(level));
259
+ dup = xmlCopyDoc(doc, (int)NUM2INT(level));
240
260
  if(dup == NULL) return Qnil;
241
261
 
242
262
  dup->type = doc->type;
@@ -254,17 +274,64 @@ static VALUE new(int argc, VALUE *argv, VALUE klass)
254
274
  VALUE version, rest, rb_doc ;
255
275
 
256
276
  rb_scan_args(argc, argv, "0*", &rest);
257
- version = rb_ary_entry(rest, 0);
258
- if (version == Qnil) {
277
+ version = rb_ary_entry(rest, (long)0);
278
+ if (NIL_P(Qnil)) {
259
279
  version = rb_str_new2("1.0");
260
280
  }
261
281
 
262
282
  xmlDocPtr doc = xmlNewDoc((xmlChar *)StringValuePtr(version));
263
283
  rb_doc = Nokogiri_wrap_xml_document(klass, doc);
264
- rb_funcall2(rb_doc, rb_intern("initialize"), argc, argv);
284
+ rb_obj_call_init(rb_doc, argc, argv);
265
285
  return rb_doc ;
266
286
  }
267
287
 
288
+ /*
289
+ * call-seq:
290
+ * remove_namespaces!
291
+ *
292
+ * Remove all namespaces from all nodes in the document.
293
+ *
294
+ * This could be useful for developers who either don't understand namespaces
295
+ * or don't care about them.
296
+ *
297
+ * The following example shows a use case, and you can decide for yourself
298
+ * whether this is a good thing or not:
299
+ *
300
+ * doc = Nokogiri::XML <<-EOXML
301
+ * <root>
302
+ * <car xmlns:part="http://general-motors.com/">
303
+ * <part:tire>Michelin Model XGV</part:tire>
304
+ * </car>
305
+ * <bicycle xmlns:part="http://schwinn.com/">
306
+ * <part:tire>I'm a bicycle tire!</part:tire>
307
+ * </bicycle>
308
+ * </root>
309
+ * EOXML
310
+ *
311
+ * doc.xpath("//tire").to_s # => ""
312
+ * doc.xpath("//part:tire", "part" => "http://general-motors.com/").to_s # => "<part:tire>Michelin Model XGV</part:tire>"
313
+ * doc.xpath("//part:tire", "part" => "http://schwinn.com/").to_s # => "<part:tire>I'm a bicycle tire!</part:tire>"
314
+ *
315
+ * doc.remove_namespaces!
316
+ *
317
+ * doc.xpath("//tire").to_s # => "<tire>Michelin Model XGV</tire><tire>I'm a bicycle tire!</tire>"
318
+ * doc.xpath("//part:tire", "part" => "http://general-motors.com/").to_s # => ""
319
+ * doc.xpath("//part:tire", "part" => "http://schwinn.com/").to_s # => ""
320
+ *
321
+ * For more information on why this probably is *not* a good thing in general,
322
+ * please direct your browser to
323
+ * http://tenderlovemaking.com/2009/04/23/namespaces-in-xml/
324
+ */
325
+ VALUE remove_namespaces_bang(VALUE self)
326
+ {
327
+ xmlDocPtr doc ;
328
+ Data_Get_Struct(self, xmlDoc, doc);
329
+
330
+ recursively_remove_namespaces_from_node(doc);
331
+ return self;
332
+ }
333
+
334
+
268
335
  VALUE cNokogiriXmlDocument ;
269
336
  void init_xml_document()
270
337
  {
@@ -287,8 +354,10 @@ void init_xml_document()
287
354
  rb_define_method(klass, "root=", set_root, 1);
288
355
  rb_define_method(klass, "encoding", encoding, 0);
289
356
  rb_define_method(klass, "encoding=", set_encoding, 1);
357
+ rb_define_method(klass, "version", version, 0);
290
358
  rb_define_method(klass, "dup", duplicate_node, -1);
291
359
  rb_define_method(klass, "url", url, 0);
360
+ rb_define_method(klass, "remove_namespaces!", remove_namespaces_bang, 0);
292
361
  }
293
362
 
294
363
 
@@ -307,12 +376,13 @@ VALUE Nokogiri_wrap_xml_document(VALUE klass, xmlDocPtr doc)
307
376
  VALUE cache = rb_ary_new();
308
377
  rb_iv_set(rb_doc, "@decorators", Qnil);
309
378
  rb_iv_set(rb_doc, "@node_cache", cache);
310
- rb_funcall(rb_doc, rb_intern("initialize"), 0);
311
379
 
312
380
  tuple->doc = (void *)rb_doc;
313
381
  tuple->unlinkedNodes = st_init_numtable_with_size(128);
314
382
  tuple->node_cache = cache;
315
383
  doc->_private = tuple ;
316
384
 
385
+ rb_obj_call_init(rb_doc, 0, NULL);
386
+
317
387
  return rb_doc ;
318
388
  }
@@ -17,13 +17,11 @@ static VALUE new(int argc, VALUE *argv, VALUE klass)
17
17
  Data_Get_Struct(document, xmlDoc, xml_doc);
18
18
 
19
19
  xmlNodePtr node = xmlNewDocFragment(xml_doc->doc);
20
- if(node->doc->children)
21
- node->ns = node->doc->children->ns;
22
20
 
23
21
  NOKOGIRI_ROOT_NODE(node);
24
22
 
25
23
  VALUE rb_node = Nokogiri_wrap_xml_node(klass, node);
26
- rb_funcall2(rb_node, rb_intern("initialize"), argc, argv);
24
+ rb_obj_call_init(rb_node, argc, argv);
27
25
 
28
26
  if(rb_block_given_p()) rb_yield(rb_node);
29
27
 
@@ -7,12 +7,14 @@ static void notation_copier(void *payload, void *data, xmlChar *name)
7
7
 
8
8
  xmlNotationPtr c_notation = (xmlNotationPtr)payload;
9
9
 
10
- VALUE notation = rb_funcall(klass, rb_intern("new"), 3,
11
- c_notation->name ? NOKOGIRI_STR_NEW2(c_notation->name, "UTF-8") : Qnil,
12
- c_notation->PublicID ? NOKOGIRI_STR_NEW2(c_notation->PublicID, "UTF-8") : Qnil,
13
- c_notation->SystemID ? NOKOGIRI_STR_NEW2(c_notation->SystemID, "UTF-8") : Qnil);
10
+ VALUE argv[3];
11
+ argv[0] = (c_notation->name ? NOKOGIRI_STR_NEW2(c_notation->name) : Qnil);
12
+ argv[1] = (c_notation->PublicID ? NOKOGIRI_STR_NEW2(c_notation->PublicID) : Qnil);
13
+ argv[2] = (c_notation->SystemID ? NOKOGIRI_STR_NEW2(c_notation->SystemID) : Qnil);
14
14
 
15
- rb_hash_aset(hash, NOKOGIRI_STR_NEW2(name, "UTF-8"),notation);
15
+ VALUE notation = rb_class_new_instance(3, argv, klass);
16
+
17
+ rb_hash_aset(hash, NOKOGIRI_STR_NEW2(name),notation);
16
18
  }
17
19
 
18
20
  static void element_copier(void *_payload, void *data, xmlChar *name)
@@ -22,7 +24,7 @@ static void element_copier(void *_payload, void *data, xmlChar *name)
22
24
 
23
25
  VALUE element = Nokogiri_wrap_xml_node(Qnil, payload);
24
26
 
25
- rb_hash_aset(hash, NOKOGIRI_STR_NEW2(name, payload->doc->encoding), element);
27
+ rb_hash_aset(hash, NOKOGIRI_STR_NEW2(name), element);
26
28
  }
27
29
 
28
30
  /*
@@ -65,6 +67,26 @@ static VALUE notations(VALUE self)
65
67
  return hash;
66
68
  }
67
69
 
70
+ /*
71
+ * call-seq:
72
+ * attributes
73
+ *
74
+ * Get a hash of the attributes for this DTD.
75
+ */
76
+ static VALUE attributes(VALUE self)
77
+ {
78
+ xmlDtdPtr dtd;
79
+ Data_Get_Struct(self, xmlDtd, dtd);
80
+
81
+ if(!dtd->attributes) return Qnil;
82
+
83
+ VALUE hash = rb_hash_new();
84
+
85
+ xmlHashScan((xmlHashTablePtr)dtd->attributes, element_copier, (void *)hash);
86
+
87
+ return hash;
88
+ }
89
+
68
90
  /*
69
91
  * call-seq:
70
92
  * elements
@@ -113,6 +135,38 @@ static VALUE validate(VALUE self, VALUE document)
113
135
  return error_list;
114
136
  }
115
137
 
138
+ /*
139
+ * call-seq:
140
+ * system_id
141
+ *
142
+ * Get the System ID for this DTD
143
+ */
144
+ static VALUE system_id(VALUE self)
145
+ {
146
+ xmlDtdPtr dtd;
147
+ Data_Get_Struct(self, xmlDtd, dtd);
148
+
149
+ if(!dtd->SystemID) return Qnil;
150
+
151
+ return NOKOGIRI_STR_NEW2(dtd->SystemID);
152
+ }
153
+
154
+ /*
155
+ * call-seq:
156
+ * external_id
157
+ *
158
+ * Get the External ID for this DTD
159
+ */
160
+ static VALUE external_id(VALUE self)
161
+ {
162
+ xmlDtdPtr dtd;
163
+ Data_Get_Struct(self, xmlDtd, dtd);
164
+
165
+ if(!dtd->ExternalID) return Qnil;
166
+
167
+ return NOKOGIRI_STR_NEW2(dtd->ExternalID);
168
+ }
169
+
116
170
  VALUE cNokogiriXmlDtd;
117
171
 
118
172
  void init_xml_dtd()
@@ -132,4 +186,7 @@ void init_xml_dtd()
132
186
  rb_define_method(klass, "elements", elements, 0);
133
187
  rb_define_method(klass, "entities", entities, 0);
134
188
  rb_define_method(klass, "validate", validate, 1);
189
+ rb_define_method(klass, "attributes", attributes, 0);
190
+ rb_define_method(klass, "system_id", system_id, 0);
191
+ rb_define_method(klass, "external_id", external_id, 0);
135
192
  }