nokogiri 1.3.3 → 1.4.0

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (201) hide show
  1. data/CHANGELOG.ja.rdoc +48 -3
  2. data/CHANGELOG.rdoc +42 -0
  3. data/Manifest.txt +44 -29
  4. data/README.ja.rdoc +0 -2
  5. data/README.rdoc +4 -7
  6. data/Rakefile +42 -6
  7. data/bin/nokogiri +7 -5
  8. data/ext/nokogiri/extconf.rb +5 -21
  9. data/ext/nokogiri/html_document.c +14 -50
  10. data/ext/nokogiri/html_element_description.c +7 -7
  11. data/ext/nokogiri/html_entity_lookup.c +6 -4
  12. data/ext/nokogiri/html_sax_parser_context.c +92 -0
  13. data/ext/nokogiri/html_sax_parser_context.h +11 -0
  14. data/ext/nokogiri/nokogiri.c +9 -3
  15. data/ext/nokogiri/nokogiri.h +16 -20
  16. data/ext/nokogiri/xml_attr.c +1 -1
  17. data/ext/nokogiri/xml_attribute_decl.c +67 -0
  18. data/ext/nokogiri/xml_attribute_decl.h +9 -0
  19. data/ext/nokogiri/xml_cdata.c +6 -5
  20. data/ext/nokogiri/xml_comment.c +3 -2
  21. data/ext/nokogiri/xml_document.c +93 -23
  22. data/ext/nokogiri/xml_document_fragment.c +1 -3
  23. data/ext/nokogiri/xml_dtd.c +63 -6
  24. data/ext/nokogiri/xml_element_content.c +123 -0
  25. data/ext/nokogiri/xml_element_content.h +10 -0
  26. data/ext/nokogiri/xml_element_decl.c +69 -0
  27. data/ext/nokogiri/xml_element_decl.h +9 -0
  28. data/ext/nokogiri/xml_entity_decl.c +97 -0
  29. data/ext/nokogiri/xml_entity_decl.h +10 -0
  30. data/ext/nokogiri/xml_entity_reference.c +1 -1
  31. data/ext/nokogiri/xml_io.c +10 -3
  32. data/ext/nokogiri/xml_io.h +1 -0
  33. data/ext/nokogiri/xml_namespace.c +2 -2
  34. data/ext/nokogiri/xml_node.c +139 -34
  35. data/ext/nokogiri/xml_node.h +0 -1
  36. data/ext/nokogiri/xml_node_set.c +23 -16
  37. data/ext/nokogiri/xml_processing_instruction.c +1 -1
  38. data/ext/nokogiri/xml_reader.c +78 -50
  39. data/ext/nokogiri/xml_sax_parser.c +109 -168
  40. data/ext/nokogiri/xml_sax_parser.h +33 -0
  41. data/ext/nokogiri/xml_sax_parser_context.c +155 -0
  42. data/ext/nokogiri/xml_sax_parser_context.h +10 -0
  43. data/ext/nokogiri/xml_sax_push_parser.c +11 -6
  44. data/ext/nokogiri/xml_syntax_error.c +63 -12
  45. data/ext/nokogiri/xml_text.c +4 -3
  46. data/ext/nokogiri/xml_xpath.c +1 -1
  47. data/ext/nokogiri/xml_xpath_context.c +12 -25
  48. data/ext/nokogiri/xslt_stylesheet.c +3 -3
  49. data/lib/nokogiri.rb +4 -4
  50. data/lib/nokogiri/css/generated_tokenizer.rb +1 -0
  51. data/lib/nokogiri/css/node.rb +1 -9
  52. data/lib/nokogiri/css/xpath_visitor.rb +11 -21
  53. data/lib/nokogiri/ffi/html/document.rb +0 -9
  54. data/lib/nokogiri/ffi/html/sax/parser_context.rb +38 -0
  55. data/lib/nokogiri/ffi/io_callbacks.rb +4 -2
  56. data/lib/nokogiri/ffi/libxml.rb +44 -10
  57. data/lib/nokogiri/ffi/structs/common_node.rb +1 -1
  58. data/lib/nokogiri/ffi/structs/xml_attribute.rb +27 -0
  59. data/lib/nokogiri/ffi/structs/xml_dtd.rb +3 -1
  60. data/lib/nokogiri/ffi/structs/xml_element.rb +26 -0
  61. data/lib/nokogiri/ffi/structs/xml_element_content.rb +17 -0
  62. data/lib/nokogiri/ffi/structs/xml_entity.rb +32 -0
  63. data/lib/nokogiri/ffi/structs/xml_enumeration.rb +12 -0
  64. data/lib/nokogiri/ffi/structs/xml_parser_context.rb +19 -0
  65. data/lib/nokogiri/ffi/structs/xml_sax_push_parser_context.rb +4 -3
  66. data/lib/nokogiri/ffi/structs/xml_syntax_error.rb +1 -1
  67. data/lib/nokogiri/ffi/xml/attribute_decl.rb +27 -0
  68. data/lib/nokogiri/ffi/xml/comment.rb +2 -2
  69. data/lib/nokogiri/ffi/xml/document.rb +29 -12
  70. data/lib/nokogiri/ffi/xml/document_fragment.rb +0 -5
  71. data/lib/nokogiri/ffi/xml/dtd.rb +14 -3
  72. data/lib/nokogiri/ffi/xml/element_content.rb +43 -0
  73. data/lib/nokogiri/ffi/xml/element_decl.rb +19 -0
  74. data/lib/nokogiri/ffi/xml/entity_decl.rb +27 -0
  75. data/lib/nokogiri/ffi/xml/node.rb +45 -5
  76. data/lib/nokogiri/ffi/xml/node_set.rb +1 -1
  77. data/lib/nokogiri/ffi/xml/reader.rb +45 -24
  78. data/lib/nokogiri/ffi/xml/sax/parser.rb +27 -34
  79. data/lib/nokogiri/ffi/xml/sax/parser_context.rb +67 -0
  80. data/lib/nokogiri/ffi/xml/sax/push_parser.rb +5 -4
  81. data/lib/nokogiri/ffi/xml/syntax_error.rb +31 -16
  82. data/lib/nokogiri/ffi/xml/text.rb +2 -2
  83. data/lib/nokogiri/html.rb +1 -0
  84. data/lib/nokogiri/html/document.rb +39 -24
  85. data/lib/nokogiri/html/sax/parser.rb +2 -2
  86. data/lib/nokogiri/html/sax/parser_context.rb +16 -0
  87. data/lib/nokogiri/version.rb +1 -1
  88. data/lib/nokogiri/xml.rb +6 -1
  89. data/lib/nokogiri/xml/attr.rb +5 -0
  90. data/lib/nokogiri/xml/attribute_decl.rb +18 -0
  91. data/lib/nokogiri/xml/builder.rb +121 -13
  92. data/lib/nokogiri/xml/character_data.rb +7 -0
  93. data/lib/nokogiri/xml/document.rb +43 -29
  94. data/lib/nokogiri/xml/document_fragment.rb +26 -6
  95. data/lib/nokogiri/xml/dtd.rb +5 -5
  96. data/lib/nokogiri/xml/element_content.rb +36 -0
  97. data/lib/nokogiri/xml/element_decl.rb +13 -0
  98. data/lib/nokogiri/xml/entity_decl.rb +15 -0
  99. data/lib/nokogiri/xml/fragment_handler.rb +22 -11
  100. data/lib/nokogiri/xml/namespace.rb +6 -0
  101. data/lib/nokogiri/xml/node.rb +33 -15
  102. data/lib/nokogiri/xml/node_set.rb +66 -44
  103. data/lib/nokogiri/xml/pp.rb +2 -0
  104. data/lib/nokogiri/xml/pp/character_data.rb +18 -0
  105. data/lib/nokogiri/xml/pp/node.rb +56 -0
  106. data/lib/nokogiri/xml/reader.rb +8 -0
  107. data/lib/nokogiri/xml/sax.rb +1 -1
  108. data/lib/nokogiri/xml/sax/document.rb +18 -1
  109. data/lib/nokogiri/xml/sax/parser.rb +15 -8
  110. data/lib/nokogiri/xml/sax/parser_context.rb +16 -0
  111. data/lib/nokogiri/xml/sax/push_parser.rb +0 -3
  112. data/lib/nokogiri/xml/syntax_error.rb +4 -0
  113. data/lib/nokogiri/xslt/stylesheet.rb +1 -1
  114. data/test/css/test_nthiness.rb +1 -1
  115. data/test/css/test_parser.rb +1 -1
  116. data/test/css/test_tokenizer.rb +1 -1
  117. data/test/css/test_xpath_visitor.rb +1 -1
  118. data/test/ffi/test_document.rb +1 -1
  119. data/test/files/shift_jis.html +10 -0
  120. data/test/files/staff.dtd +10 -0
  121. data/test/helper.rb +12 -3
  122. data/test/html/sax/test_parser.rb +1 -1
  123. data/test/html/sax/test_parser_context.rb +48 -0
  124. data/test/html/test_builder.rb +8 -2
  125. data/test/html/test_document.rb +23 -1
  126. data/test/html/test_document_encoding.rb +15 -1
  127. data/test/html/test_document_fragment.rb +10 -1
  128. data/test/html/test_element_description.rb +1 -2
  129. data/test/html/test_named_characters.rb +1 -1
  130. data/test/html/test_node.rb +61 -1
  131. data/test/html/test_node_encoding.rb +27 -0
  132. data/test/test_convert_xpath.rb +1 -3
  133. data/test/test_css_cache.rb +1 -1
  134. data/test/test_gc.rb +1 -1
  135. data/test/test_memory_leak.rb +1 -1
  136. data/test/test_nokogiri.rb +3 -3
  137. data/test/test_reader.rb +29 -1
  138. data/test/test_xslt_transforms.rb +1 -1
  139. data/test/xml/node/test_save_options.rb +1 -1
  140. data/test/xml/node/test_subclass.rb +1 -1
  141. data/test/xml/sax/test_parser.rb +64 -3
  142. data/test/xml/sax/test_parser_context.rb +56 -0
  143. data/test/xml/sax/test_push_parser.rb +11 -1
  144. data/test/xml/test_attr.rb +1 -1
  145. data/test/xml/test_attribute_decl.rb +82 -0
  146. data/test/xml/test_builder.rb +95 -1
  147. data/test/xml/test_cdata.rb +1 -1
  148. data/test/xml/test_comment.rb +7 -1
  149. data/test/xml/test_document.rb +147 -6
  150. data/test/xml/test_document_encoding.rb +1 -1
  151. data/test/xml/test_document_fragment.rb +55 -5
  152. data/test/xml/test_dtd.rb +40 -5
  153. data/test/xml/test_dtd_encoding.rb +3 -1
  154. data/test/xml/test_element_content.rb +56 -0
  155. data/test/xml/test_element_decl.rb +73 -0
  156. data/test/xml/test_entity_decl.rb +83 -0
  157. data/test/xml/test_entity_reference.rb +1 -1
  158. data/test/xml/test_namespace.rb +21 -1
  159. data/test/xml/test_node.rb +70 -4
  160. data/test/xml/test_node_attributes.rb +1 -1
  161. data/test/xml/test_node_encoding.rb +1 -1
  162. data/test/xml/test_node_set.rb +136 -2
  163. data/test/xml/test_parse_options.rb +1 -1
  164. data/test/xml/test_processing_instruction.rb +1 -1
  165. data/test/xml/test_reader_encoding.rb +1 -1
  166. data/test/xml/test_relax_ng.rb +1 -1
  167. data/test/xml/test_schema.rb +1 -1
  168. data/test/xml/test_syntax_error.rb +27 -0
  169. data/test/xml/test_text.rb +13 -1
  170. data/test/xml/test_unparented_node.rb +1 -1
  171. data/test/xml/test_xpath.rb +1 -1
  172. metadata +57 -40
  173. data/ext/nokogiri/html_sax_parser.c +0 -57
  174. data/ext/nokogiri/html_sax_parser.h +0 -11
  175. data/lib/action-nokogiri.rb +0 -38
  176. data/lib/nokogiri/decorators.rb +0 -2
  177. data/lib/nokogiri/decorators/hpricot.rb +0 -3
  178. data/lib/nokogiri/decorators/hpricot/node.rb +0 -56
  179. data/lib/nokogiri/decorators/hpricot/node_set.rb +0 -54
  180. data/lib/nokogiri/decorators/hpricot/xpath_visitor.rb +0 -30
  181. data/lib/nokogiri/ffi/html/sax/parser.rb +0 -21
  182. data/lib/nokogiri/hpricot.rb +0 -92
  183. data/lib/nokogiri/xml/entity_declaration.rb +0 -11
  184. data/lib/nokogiri/xml/sax/legacy_handlers.rb +0 -65
  185. data/test/hpricot/files/basic.xhtml +0 -17
  186. data/test/hpricot/files/boingboing.html +0 -2266
  187. data/test/hpricot/files/cy0.html +0 -3653
  188. data/test/hpricot/files/immob.html +0 -400
  189. data/test/hpricot/files/pace_application.html +0 -1320
  190. data/test/hpricot/files/tenderlove.html +0 -16
  191. data/test/hpricot/files/uswebgen.html +0 -220
  192. data/test/hpricot/files/utf8.html +0 -1054
  193. data/test/hpricot/files/week9.html +0 -1723
  194. data/test/hpricot/files/why.xml +0 -19
  195. data/test/hpricot/load_files.rb +0 -11
  196. data/test/hpricot/test_alter.rb +0 -68
  197. data/test/hpricot/test_builder.rb +0 -20
  198. data/test/hpricot/test_parser.rb +0 -350
  199. data/test/hpricot/test_paths.rb +0 -15
  200. data/test/hpricot/test_preserved.rb +0 -77
  201. data/test/hpricot/test_xml.rb +0 -30
@@ -0,0 +1,123 @@
1
+ #include <xml_element_content.h>
2
+
3
+ VALUE cNokogiriXmlElementContent;
4
+
5
+ /*
6
+ * call-seq:
7
+ * name
8
+ *
9
+ * Get the require element +name+
10
+ */
11
+ static VALUE get_name(VALUE self)
12
+ {
13
+ xmlElementContentPtr elem;
14
+ Data_Get_Struct(self, xmlElementContent, elem);
15
+
16
+ if(!elem->name) return Qnil;
17
+ return NOKOGIRI_STR_NEW2(elem->name);
18
+ }
19
+
20
+ /*
21
+ * call-seq:
22
+ * type
23
+ *
24
+ * Get the element content +type+. Possible values are PCDATA, ELEMENT, SEQ,
25
+ * or OR.
26
+ */
27
+ static VALUE get_type(VALUE self)
28
+ {
29
+ xmlElementContentPtr elem;
30
+ Data_Get_Struct(self, xmlElementContent, elem);
31
+
32
+ return INT2NUM((long)elem->type);
33
+ }
34
+
35
+ /*
36
+ * call-seq:
37
+ * c1
38
+ *
39
+ * Get the first child.
40
+ */
41
+ static VALUE get_c1(VALUE self)
42
+ {
43
+ xmlElementContentPtr elem;
44
+ Data_Get_Struct(self, xmlElementContent, elem);
45
+
46
+ if(!elem->c1) return Qnil;
47
+ return Nokogiri_wrap_element_content(rb_iv_get(self, "@document"), elem->c1);
48
+ }
49
+
50
+ /*
51
+ * call-seq:
52
+ * c2
53
+ *
54
+ * Get the first child.
55
+ */
56
+ static VALUE get_c2(VALUE self)
57
+ {
58
+ xmlElementContentPtr elem;
59
+ Data_Get_Struct(self, xmlElementContent, elem);
60
+
61
+ if(!elem->c2) return Qnil;
62
+ return Nokogiri_wrap_element_content(rb_iv_get(self, "@document"), elem->c2);
63
+ }
64
+
65
+ /*
66
+ * call-seq:
67
+ * occur
68
+ *
69
+ * Get the element content +occur+ flag. Possible values are ONCE, OPT, MULT
70
+ * or PLUS.
71
+ */
72
+ static VALUE get_occur(VALUE self)
73
+ {
74
+ xmlElementContentPtr elem;
75
+ Data_Get_Struct(self, xmlElementContent, elem);
76
+
77
+ return INT2NUM((long)elem->ocur);
78
+ }
79
+
80
+ /*
81
+ * call-seq:
82
+ * prefix
83
+ *
84
+ * Get the element content namespace +prefix+.
85
+ */
86
+ static VALUE get_prefix(VALUE self)
87
+ {
88
+ xmlElementContentPtr elem;
89
+ Data_Get_Struct(self, xmlElementContent, elem);
90
+
91
+ if(!elem->prefix) return Qnil;
92
+
93
+ return NOKOGIRI_STR_NEW2(elem->prefix);
94
+ }
95
+
96
+ VALUE Nokogiri_wrap_element_content(VALUE doc, xmlElementContentPtr element)
97
+ {
98
+ VALUE elem = Data_Wrap_Struct(cNokogiriXmlElementContent, 0, 0, element);
99
+
100
+ // Setting the document is necessary so that this does not get GC'd until
101
+ // the document is GC'd
102
+ rb_iv_set(elem, "@document", doc);
103
+
104
+ return elem;
105
+ }
106
+
107
+ void init_xml_element_content()
108
+ {
109
+ VALUE nokogiri = rb_define_module("Nokogiri");
110
+ VALUE xml = rb_define_module_under(nokogiri, "XML");
111
+
112
+ VALUE klass = rb_define_class_under(xml, "ElementContent", rb_cObject);
113
+
114
+ cNokogiriXmlElementContent = klass;
115
+
116
+ rb_define_method(klass, "name", get_name, 0);
117
+ rb_define_method(klass, "type", get_type, 0);
118
+ rb_define_method(klass, "occur", get_occur, 0);
119
+ rb_define_method(klass, "prefix", get_prefix, 0);
120
+
121
+ rb_define_private_method(klass, "c1", get_c1, 0);
122
+ rb_define_private_method(klass, "c2", get_c2, 0);
123
+ }
@@ -0,0 +1,10 @@
1
+ #ifndef NOKOGIRI_XML_ELEMENT_CONTENT
2
+ #define NOKOGIRI_XML_ELEMENT_CONTENT
3
+
4
+ #include <nokogiri.h>
5
+
6
+
7
+ VALUE Nokogiri_wrap_element_content(VALUE doc, xmlElementContentPtr element);
8
+ void init_xml_element_content();
9
+
10
+ #endif
@@ -0,0 +1,69 @@
1
+ #include <xml_element_decl.h>
2
+
3
+ static ID id_document;
4
+
5
+ /*
6
+ * call-seq:
7
+ * element_type
8
+ *
9
+ * The element_type
10
+ */
11
+ static VALUE element_type(VALUE self)
12
+ {
13
+ xmlElementPtr node;
14
+ Data_Get_Struct(self, xmlElement, node);
15
+ return INT2NUM((long)node->etype);
16
+ }
17
+
18
+ /*
19
+ * call-seq:
20
+ * content
21
+ *
22
+ * The allowed content for this ElementDecl
23
+ */
24
+ static VALUE content(VALUE self)
25
+ {
26
+ xmlElementPtr node;
27
+ Data_Get_Struct(self, xmlElement, node);
28
+
29
+ if(!node->content) return Qnil;
30
+
31
+ return Nokogiri_wrap_element_content(
32
+ rb_funcall(self, id_document, 0),
33
+ node->content
34
+ );
35
+ }
36
+
37
+ /*
38
+ * call-seq:
39
+ * prefix
40
+ *
41
+ * The namespace prefix for this ElementDecl
42
+ */
43
+ static VALUE prefix(VALUE self)
44
+ {
45
+ xmlElementPtr node;
46
+ Data_Get_Struct(self, xmlElement, node);
47
+
48
+ if(!node->prefix) return Qnil;
49
+
50
+ return NOKOGIRI_STR_NEW2(node->prefix);
51
+ }
52
+
53
+ VALUE cNokogiriXmlElementDecl;
54
+
55
+ void init_xml_element_decl()
56
+ {
57
+ VALUE nokogiri = rb_define_module("Nokogiri");
58
+ VALUE xml = rb_define_module_under(nokogiri, "XML");
59
+ VALUE node = rb_define_class_under(xml, "Node", rb_cObject);
60
+ VALUE klass = rb_define_class_under(xml, "ElementDecl", node);
61
+
62
+ cNokogiriXmlElementDecl = klass;
63
+
64
+ rb_define_method(klass, "element_type", element_type, 0);
65
+ rb_define_method(klass, "content", content, 0);
66
+ rb_define_method(klass, "prefix", prefix, 0);
67
+
68
+ id_document = rb_intern("document");
69
+ }
@@ -0,0 +1,9 @@
1
+ #ifndef NOKOGIRI_XML_ELEMENT_DECL
2
+ #define NOKOGIRI_XML_ELEMENT_DECL
3
+
4
+ #include <nokogiri.h>
5
+
6
+ void init_xml_element_decl();
7
+
8
+ extern VALUE cNokogiriXmlElementDecl;
9
+ #endif
@@ -0,0 +1,97 @@
1
+ #include <xml_entity_decl.h>
2
+
3
+ /*
4
+ * call-seq:
5
+ * original_content
6
+ *
7
+ * Get the original_content before ref substitution
8
+ */
9
+ static VALUE original_content(VALUE self)
10
+ {
11
+ xmlEntityPtr node;
12
+ Data_Get_Struct(self, xmlEntity, node);
13
+
14
+ if(!node->orig) return Qnil;
15
+
16
+ return NOKOGIRI_STR_NEW2(node->orig);
17
+ }
18
+
19
+ /*
20
+ * call-seq:
21
+ * content
22
+ *
23
+ * Get the content
24
+ */
25
+ static VALUE get_content(VALUE self)
26
+ {
27
+ xmlEntityPtr node;
28
+ Data_Get_Struct(self, xmlEntity, node);
29
+
30
+ if(!node->content) return Qnil;
31
+
32
+ return NOKOGIRI_STR_NEW(node->content, node->length);
33
+ }
34
+
35
+ /*
36
+ * call-seq:
37
+ * content
38
+ *
39
+ * Get the entity type
40
+ */
41
+ static VALUE entity_type(VALUE self)
42
+ {
43
+ xmlEntityPtr node;
44
+ Data_Get_Struct(self, xmlEntity, node);
45
+
46
+ return INT2NUM((int)node->etype);
47
+ }
48
+
49
+ /*
50
+ * call-seq:
51
+ * external_id
52
+ *
53
+ * Get the external identifier for PUBLIC
54
+ */
55
+ static VALUE external_id(VALUE self)
56
+ {
57
+ xmlEntityPtr node;
58
+ Data_Get_Struct(self, xmlEntity, node);
59
+
60
+ if(!node->ExternalID) return Qnil;
61
+
62
+ return NOKOGIRI_STR_NEW2(node->ExternalID);
63
+ }
64
+
65
+ /*
66
+ * call-seq:
67
+ * system_id
68
+ *
69
+ * Get the URI for a SYSTEM or PUBLIC Entity
70
+ */
71
+ static VALUE system_id(VALUE self)
72
+ {
73
+ xmlEntityPtr node;
74
+ Data_Get_Struct(self, xmlEntity, node);
75
+
76
+ if(!node->SystemID) return Qnil;
77
+
78
+ return NOKOGIRI_STR_NEW2(node->SystemID);
79
+ }
80
+
81
+ VALUE cNokogiriXmlEntityDecl;
82
+
83
+ void init_xml_entity_decl()
84
+ {
85
+ VALUE nokogiri = rb_define_module("Nokogiri");
86
+ VALUE xml = rb_define_module_under(nokogiri, "XML");
87
+ VALUE node = rb_define_class_under(xml, "Node", rb_cObject);
88
+ VALUE klass = rb_define_class_under(xml, "EntityDecl", node);
89
+
90
+ cNokogiriXmlEntityDecl = klass;
91
+
92
+ rb_define_method(klass, "original_content", original_content, 0);
93
+ rb_define_method(klass, "content", get_content, 0);
94
+ rb_define_method(klass, "entity_type", entity_type, 0);
95
+ rb_define_method(klass, "external_id", external_id, 0);
96
+ rb_define_method(klass, "system_id", system_id, 0);
97
+ }
@@ -0,0 +1,10 @@
1
+ #ifndef NOKOGIRI_XML_ENTITY_DECL
2
+ #define NOKOGIRI_XML_ENTITY_DECL
3
+
4
+ #include <nokogiri.h>
5
+
6
+ void init_xml_entity_decl();
7
+
8
+ extern VALUE cNokogiriXmlEntityDecl;
9
+ #endif
10
+
@@ -25,7 +25,7 @@ static VALUE new(int argc, VALUE *argv, VALUE klass)
25
25
  NOKOGIRI_ROOT_NODE(node);
26
26
 
27
27
  VALUE rb_node = Nokogiri_wrap_xml_node(klass, node);
28
- rb_funcall2(rb_node, rb_intern("initialize"), argc, argv);
28
+ rb_obj_call_init(rb_node, argc, argv);
29
29
 
30
30
  if(rb_block_given_p()) rb_yield(rb_node);
31
31
 
@@ -1,10 +1,12 @@
1
1
  #include <xml_io.h>
2
2
 
3
+ static ID id_read, id_write;
4
+
3
5
  int io_read_callback(void * ctx, char * buffer, int len) {
4
6
  VALUE io = (VALUE)ctx;
5
- VALUE string = rb_funcall(io, rb_intern("read"), 1, INT2NUM(len));
7
+ VALUE string = rb_funcall(io, id_read, 1, INT2NUM(len));
6
8
 
7
- if(Qnil == string) return 0;
9
+ if(NIL_P(string)) return 0;
8
10
 
9
11
  memcpy(buffer, StringValuePtr(string), (unsigned int)RSTRING_LEN(string));
10
12
 
@@ -15,10 +17,15 @@ int io_write_callback(void * ctx, char * buffer, int len) {
15
17
  VALUE io = (VALUE)ctx;
16
18
  VALUE string = rb_str_new(buffer, len);
17
19
 
18
- rb_funcall(io, rb_intern("write"), 1, string);
20
+ rb_funcall(io, id_write, 1, string);
19
21
  return len;
20
22
  }
21
23
 
22
24
  int io_close_callback(void * ctx) {
23
25
  return 0;
24
26
  }
27
+
28
+ void init_nokogiri_io() {
29
+ id_read = rb_intern("read");
30
+ id_write = rb_intern("write");
31
+ }
@@ -6,5 +6,6 @@
6
6
  int io_read_callback(void * ctx, char * buffer, int len);
7
7
  int io_write_callback(void * ctx, char * buffer, int len);
8
8
  int io_close_callback(void * ctx);
9
+ void init_nokogiri_io();
9
10
 
10
11
  #endif
@@ -18,7 +18,7 @@ static VALUE prefix(VALUE self)
18
18
 
19
19
  Data_Get_Struct(rb_iv_get(self, "@document"), xmlDoc, doc);
20
20
 
21
- return NOKOGIRI_STR_NEW2(ns->prefix, doc->encoding);
21
+ return NOKOGIRI_STR_NEW2(ns->prefix);
22
22
  }
23
23
 
24
24
  /*
@@ -37,7 +37,7 @@ static VALUE href(VALUE self)
37
37
 
38
38
  Data_Get_Struct(rb_iv_get(self, "@document"), xmlDoc, doc);
39
39
 
40
- return NOKOGIRI_STR_NEW2(ns->href, doc->encoding);
40
+ return NOKOGIRI_STR_NEW2(ns->href);
41
41
  }
42
42
 
43
43
  VALUE Nokogiri_wrap_xml_namespace(xmlDocPtr doc, xmlNsPtr node)
@@ -1,5 +1,7 @@
1
1
  #include <xml_node.h>
2
2
 
3
+ static ID decorate, decorate_bang;
4
+
3
5
  #ifdef DEBUG
4
6
  static void debug_node_dealloc(xmlNodePtr x)
5
7
  {
@@ -24,8 +26,11 @@ typedef xmlNodePtr (*node_other_func)(xmlNodePtr, xmlNodePtr);
24
26
  /* :nodoc: */
25
27
  static void relink_namespace(xmlNodePtr reparented)
26
28
  {
29
+ // Avoid segv when relinking against unlinked nodes.
30
+ if(!reparented->parent) return;
31
+
27
32
  // Make sure that our reparented node has the correct namespaces
28
- if(reparented->doc != (xmlDocPtr)reparented->parent)
33
+ if(!reparented->ns && reparented->doc != (xmlDocPtr)reparented->parent)
29
34
  xmlSetNs(reparented, reparented->parent->ns);
30
35
 
31
36
  // Search our parents for an existing definition
@@ -57,12 +62,15 @@ static VALUE reparent_node_with(VALUE node_obj, VALUE other_obj, node_other_func
57
62
  VALUE reparented_obj ;
58
63
  xmlNodePtr node, other, reparented ;
59
64
 
60
- if(! rb_funcall(node_obj, rb_intern("is_a?"), 1, cNokogiriXmlNode))
65
+ if(!rb_obj_is_kind_of(node_obj, cNokogiriXmlNode))
61
66
  rb_raise(rb_eArgError, "node must be a Nokogiri::XML::Node");
62
67
 
63
68
  Data_Get_Struct(node_obj, xmlNode, node);
64
69
  Data_Get_Struct(other_obj, xmlNode, other);
65
70
 
71
+ if(XML_DOCUMENT_NODE == node->type || XML_HTML_DOCUMENT_NODE == node->type)
72
+ rb_raise(rb_eArgError, "cannot reparent a document node");
73
+
66
74
  // If a document fragment is added, we need to reparent all of it's children
67
75
  if(node->type == XML_DOCUMENT_FRAG_NODE)
68
76
  {
@@ -74,18 +82,27 @@ static VALUE reparent_node_with(VALUE node_obj, VALUE other_obj, node_other_func
74
82
  return node_obj;
75
83
  }
76
84
 
85
+ if(node->type == XML_TEXT_NODE) {
86
+ NOKOGIRI_ROOT_NODE(node);
87
+ node = xmlDocCopyNode(node, other->doc, 1);
88
+ }
89
+
77
90
  if (node->doc == other->doc) {
78
91
  xmlUnlinkNode(node) ;
92
+
93
+ // TODO: I really want to remove this. We shouldn't support 2.6.16 anymore
79
94
  if ( node->type == XML_TEXT_NODE
80
95
  && other->type == XML_TEXT_NODE
81
96
  && is_2_6_16() ) {
82
- other->content = xmlStrdup(other->content); // we'd rather leak than segfault.
97
+
98
+ // we'd rather leak than segfault.
99
+ other->content = xmlStrdup(other->content);
100
+
83
101
  }
84
102
 
85
103
  if(!(reparented = (*func)(other, node))) {
86
104
  rb_raise(rb_eRuntimeError, "Could not reparent node (1)");
87
105
  }
88
-
89
106
  } else {
90
107
  xmlNodePtr duped_node ;
91
108
  // recursively copy to the new document
@@ -110,7 +127,7 @@ static VALUE reparent_node_with(VALUE node_obj, VALUE other_obj, node_other_func
110
127
 
111
128
  reparented_obj = Nokogiri_wrap_xml_node(Qnil, reparented);
112
129
 
113
- rb_funcall(reparented_obj, rb_intern("decorate!"), 0);
130
+ rb_funcall(reparented_obj, decorate_bang, 0);
114
131
 
115
132
  return reparented_obj ;
116
133
  }
@@ -140,7 +157,7 @@ static VALUE pointer_id(VALUE self)
140
157
  xmlNodePtr node;
141
158
  Data_Get_Struct(self, xmlNode, node);
142
159
 
143
- return INT2NUM((int)(node));
160
+ return INT2NUM((long)(node));
144
161
  }
145
162
 
146
163
  /*
@@ -158,12 +175,92 @@ static VALUE encode_special_chars(VALUE self, VALUE string)
158
175
  (const xmlChar *)StringValuePtr(string)
159
176
  );
160
177
 
161
- VALUE encoded_str = NOKOGIRI_STR_NEW2(encoded, node->doc->encoding);
178
+ VALUE encoded_str = NOKOGIRI_STR_NEW2(encoded);
162
179
  xmlFree(encoded);
163
180
 
164
181
  return encoded_str;
165
182
  }
166
183
 
184
+ /*
185
+ * call-seq:
186
+ * create_internal_subset(name, external_id, system_id)
187
+ *
188
+ * Create an internal subset
189
+ */
190
+ static VALUE create_internal_subset(VALUE self, VALUE name, VALUE external_id, VALUE system_id)
191
+ {
192
+ xmlNodePtr node;
193
+ xmlDocPtr doc;
194
+ Data_Get_Struct(self, xmlNode, node);
195
+
196
+ doc = node->doc;
197
+
198
+ if(xmlGetIntSubset(doc))
199
+ rb_raise(rb_eRuntimeError, "Document already has an internal subset");
200
+
201
+ xmlDtdPtr dtd = xmlCreateIntSubset(
202
+ doc,
203
+ NIL_P(name) ? NULL : (const xmlChar *)StringValuePtr(name),
204
+ NIL_P(external_id) ? NULL : (const xmlChar *)StringValuePtr(external_id),
205
+ NIL_P(system_id) ? NULL : (const xmlChar *)StringValuePtr(system_id)
206
+ );
207
+
208
+ if(!dtd) return Qnil;
209
+
210
+ return Nokogiri_wrap_xml_node(Qnil, (xmlNodePtr)dtd);
211
+ }
212
+
213
+ /*
214
+ * call-seq:
215
+ * create_external_subset(name, external_id, system_id)
216
+ *
217
+ * Create an external subset
218
+ */
219
+ static VALUE create_external_subset(VALUE self, VALUE name, VALUE external_id, VALUE system_id)
220
+ {
221
+ xmlNodePtr node;
222
+ xmlDocPtr doc;
223
+ Data_Get_Struct(self, xmlNode, node);
224
+
225
+ doc = node->doc;
226
+
227
+ if(doc->extSubset)
228
+ rb_raise(rb_eRuntimeError, "Document already has an external subset");
229
+
230
+ xmlDtdPtr dtd = xmlNewDtd(
231
+ doc,
232
+ NIL_P(name) ? NULL : (const xmlChar *)StringValuePtr(name),
233
+ NIL_P(external_id) ? NULL : (const xmlChar *)StringValuePtr(external_id),
234
+ NIL_P(system_id) ? NULL : (const xmlChar *)StringValuePtr(system_id)
235
+ );
236
+
237
+ if(!dtd) return Qnil;
238
+
239
+ return Nokogiri_wrap_xml_node(Qnil, (xmlNodePtr)dtd);
240
+ }
241
+
242
+ /*
243
+ * call-seq:
244
+ * external_subset
245
+ *
246
+ * Get the external subset
247
+ */
248
+ static VALUE external_subset(VALUE self)
249
+ {
250
+ xmlNodePtr node;
251
+ xmlDocPtr doc;
252
+ Data_Get_Struct(self, xmlNode, node);
253
+
254
+ if(!node->doc) return Qnil;
255
+
256
+ doc = node->doc;
257
+ xmlDtdPtr dtd = doc->extSubset;
258
+
259
+ if(!dtd) return Qnil;
260
+
261
+ return Nokogiri_wrap_xml_node(Qnil, (xmlNodePtr)dtd);
262
+ }
263
+
167
264
  /*
168
265
  * call-seq:
169
266
  * internal_subset
@@ -198,12 +295,12 @@ static VALUE duplicate_node(int argc, VALUE *argv, VALUE self)
198
295
  VALUE level;
199
296
 
200
297
  if(rb_scan_args(argc, argv, "01", &level) == 0)
201
- level = INT2NUM(1);
298
+ level = INT2NUM((long)1);
202
299
 
203
300
  xmlNodePtr node, dup;
204
301
  Data_Get_Struct(self, xmlNode, node);
205
302
 
206
- dup = xmlDocCopyNode(node, node->doc, NUM2INT(level));
303
+ dup = xmlDocCopyNode(node, node->doc, (int)NUM2INT(level));
207
304
  if(dup == NULL) return Qnil;
208
305
 
209
306
  return Nokogiri_wrap_xml_node(rb_obj_class(self), dup);
@@ -358,7 +455,7 @@ static VALUE namespaced_key_eh(VALUE self, VALUE attribute, VALUE namespace)
358
455
  xmlNodePtr node;
359
456
  Data_Get_Struct(self, xmlNode, node);
360
457
  if(xmlHasNsProp(node, (xmlChar *)StringValuePtr(attribute),
361
- Qnil == namespace ? NULL : (xmlChar *)StringValuePtr(namespace)))
458
+ NIL_P(namespace) ? NULL : (xmlChar *)StringValuePtr(namespace)))
362
459
  return Qtrue;
363
460
  return Qfalse;
364
461
  }
@@ -393,13 +490,13 @@ static VALUE get(VALUE self, VALUE attribute)
393
490
  VALUE rval ;
394
491
  Data_Get_Struct(self, xmlNode, node);
395
492
 
396
- if(attribute == Qnil) return Qnil;
493
+ if(NIL_P(attribute)) return Qnil;
397
494
 
398
495
  propstr = xmlGetProp(node, (xmlChar *)StringValuePtr(attribute));
399
496
 
400
- if(NULL == propstr) return Qnil;
497
+ if(!propstr) return Qnil;
401
498
 
402
- rval = NOKOGIRI_STR_NEW2(propstr, node->doc->encoding);
499
+ rval = NOKOGIRI_STR_NEW2(propstr);
403
500
 
404
501
  xmlFree(propstr);
405
502
  return rval ;
@@ -453,7 +550,7 @@ static VALUE attribute_with_ns(VALUE self, VALUE name, VALUE namespace)
453
550
  xmlAttrPtr prop;
454
551
  Data_Get_Struct(self, xmlNode, node);
455
552
  prop = xmlHasNsProp(node, (xmlChar *)StringValuePtr(name),
456
- Qnil == namespace ? NULL : (xmlChar *)StringValuePtr(namespace));
553
+ NIL_P(namespace) ? NULL : (xmlChar *)StringValuePtr(namespace));
457
554
 
458
555
  if(! prop) return Qnil;
459
556
  return Nokogiri_wrap_xml_node(Qnil, (xmlNodePtr)prop);
@@ -468,12 +565,11 @@ static VALUE attribute_with_ns(VALUE self, VALUE name, VALUE namespace)
468
565
  static VALUE attribute_nodes(VALUE self)
469
566
  {
470
567
  /* this code in the mode of xmlHasProp() */
471
- xmlNodePtr node ;
472
- VALUE attr ;
568
+ xmlNodePtr node;
473
569
 
474
- attr = rb_ary_new() ;
475
570
  Data_Get_Struct(self, xmlNode, node);
476
571
 
572
+ VALUE attr = rb_ary_new();
477
573
  Nokogiri_xml_node_properties(node, attr);
478
574
 
479
575
  return attr ;
@@ -534,7 +630,7 @@ static VALUE node_type(VALUE self)
534
630
  {
535
631
  xmlNodePtr node;
536
632
  Data_Get_Struct(self, xmlNode, node);
537
- return INT2NUM((int)node->type);
633
+ return INT2NUM((long)node->type);
538
634
  }
539
635
 
540
636
  /*
@@ -564,7 +660,7 @@ static VALUE get_content(VALUE self)
564
660
 
565
661
  xmlChar * content = xmlNodeGetContent(node);
566
662
  if(content) {
567
- VALUE rval = NOKOGIRI_STR_NEW2(content, node->doc->encoding);
663
+ VALUE rval = NOKOGIRI_STR_NEW2(content);
568
664
  xmlFree(content);
569
665
  return rval;
570
666
  }
@@ -624,7 +720,7 @@ static VALUE get_name(VALUE self)
624
720
  xmlNodePtr node;
625
721
  Data_Get_Struct(self, xmlNode, node);
626
722
  if(node->name)
627
- return NOKOGIRI_STR_NEW2(node->name, node->doc->encoding);
723
+ return NOKOGIRI_STR_NEW2(node->name);
628
724
  return Qnil;
629
725
  }
630
726
 
@@ -641,7 +737,7 @@ static VALUE path(VALUE self)
641
737
  Data_Get_Struct(self, xmlNode, node);
642
738
 
643
739
  path = xmlGetNodePath(node);
644
- VALUE rval = NOKOGIRI_STR_NEW2(path, node->doc->encoding);
740
+ VALUE rval = NOKOGIRI_STR_NEW2(path);
645
741
  xmlFree(path);
646
742
  return rval ;
647
743
  }
@@ -696,7 +792,7 @@ static VALUE native_write_to(
696
792
  (xmlOutputCloseCallback)io_close_callback,
697
793
  (void *)io,
698
794
  RTEST(encoding) ? StringValuePtr(encoding) : NULL,
699
- NUM2INT(options)
795
+ (int)NUM2INT(options)
700
796
  );
701
797
 
702
798
  xmlSaveTree(savectx, node);
@@ -717,7 +813,7 @@ static VALUE line(VALUE self)
717
813
  xmlNodePtr node;
718
814
  Data_Get_Struct(self, xmlNode, node);
719
815
 
720
- return INT2NUM(node->line);
816
+ return INT2NUM(xmlGetLineNo(node));
721
817
  }
722
818
 
723
819
  /*
@@ -735,18 +831,18 @@ static VALUE add_namespace_definition(VALUE self, VALUE prefix, VALUE href)
735
831
  xmlNsPtr ns = xmlNewNs(
736
832
  node,
737
833
  (const xmlChar *)StringValuePtr(href),
738
- (const xmlChar *)(prefix == Qnil ? NULL : StringValuePtr(prefix))
834
+ (const xmlChar *)(NIL_P(prefix) ? NULL : StringValuePtr(prefix))
739
835
  );
740
836
 
741
837
  if(!ns) {
742
838
  ns = xmlSearchNs(
743
839
  node->doc,
744
840
  node,
745
- (const xmlChar *)(prefix == Qnil ? NULL : StringValuePtr(prefix))
841
+ (const xmlChar *)(NIL_P(prefix) ? NULL : StringValuePtr(prefix))
746
842
  );
747
843
  }
748
844
 
749
- if(Qnil == prefix) xmlSetNs(node, ns);
845
+ if(NIL_P(prefix)) xmlSetNs(node, ns);
750
846
 
751
847
  return Nokogiri_wrap_xml_namespace(node->doc, ns);
752
848
  }
@@ -776,7 +872,7 @@ static VALUE new(int argc, VALUE *argv, VALUE klass)
776
872
  klass == cNokogiriXmlNode ? (VALUE)NULL : klass,
777
873
  node
778
874
  );
779
- rb_funcall2(rb_node, rb_intern("initialize"), argc, argv);
875
+ rb_obj_call_init(rb_node, argc, argv);
780
876
 
781
877
  if(rb_block_given_p()) rb_yield(rb_node);
782
878
 
@@ -797,7 +893,7 @@ static VALUE dump_html(VALUE self)
797
893
 
798
894
  buf = xmlBufferCreate() ;
799
895
  htmlNodeDump(buf, node->doc, node);
800
- VALUE html = NOKOGIRI_STR_NEW2(buf->content, node->doc->encoding);
896
+ VALUE html = NOKOGIRI_STR_NEW2(buf->content);
801
897
  xmlBufferFree(buf);
802
898
  return html ;
803
899
  }
@@ -814,7 +910,7 @@ static VALUE compare(VALUE self, VALUE _other)
814
910
  Data_Get_Struct(self, xmlNode, node);
815
911
  Data_Get_Struct(_other, xmlNode, other);
816
912
 
817
- return INT2NUM(xmlXPathCmpNodes(other, node));
913
+ return INT2NUM((long)xmlXPathCmpNodes(other, node));
818
914
  }
819
915
 
820
916
  VALUE Nokogiri_wrap_xml_node(VALUE klass, xmlNodePtr node)
@@ -857,7 +953,7 @@ VALUE Nokogiri_wrap_xml_node(VALUE klass, xmlNodePtr node)
857
953
  klass = cNokogiriXmlProcessingInstruction;
858
954
  break;
859
955
  case XML_ENTITY_DECL:
860
- klass = cNokogiriXmlEntityDeclaration;
956
+ klass = cNokogiriXmlEntityDecl;
861
957
  break;
862
958
  case XML_CDATA_SECTION_NODE:
863
959
  klass = cNokogiriXmlCData;
@@ -865,6 +961,12 @@ VALUE Nokogiri_wrap_xml_node(VALUE klass, xmlNodePtr node)
865
961
  case XML_DTD_NODE:
866
962
  klass = cNokogiriXmlDtd;
867
963
  break;
964
+ case XML_ATTRIBUTE_DECL:
965
+ klass = cNokogiriXmlAttributeDecl;
966
+ break;
967
+ case XML_ELEMENT_DECL:
968
+ klass = cNokogiriXmlElementDecl;
969
+ break;
868
970
  default:
869
971
  klass = cNokogiriXmlNode;
870
972
  }
@@ -879,7 +981,7 @@ VALUE Nokogiri_wrap_xml_node(VALUE klass, xmlNodePtr node)
879
981
  document = DOC_RUBY_OBJECT(node->doc);
880
982
  node_cache = DOC_NODE_CACHE(node->doc);
881
983
  rb_ary_push(node_cache, rb_node);
882
- rb_funcall(document, rb_intern("decorate"), 1, rb_node);
984
+ rb_funcall(document, decorate, 1, rb_node);
883
985
  }
884
986
 
885
987
  return rb_node ;
@@ -898,7 +1000,6 @@ void Nokogiri_xml_node_properties(xmlNodePtr node, VALUE attr_list)
898
1000
 
899
1001
  VALUE cNokogiriXmlNode ;
900
1002
  VALUE cNokogiriXmlElement ;
901
- VALUE cNokogiriXmlEntityDeclaration ;
902
1003
 
903
1004
  void init_xml_node()
904
1005
  {
@@ -909,8 +1010,6 @@ void init_xml_node()
909
1010
  cNokogiriXmlNode = klass;
910
1011
 
911
1012
  cNokogiriXmlElement = rb_define_class_under(xml, "Element", klass);
912
- cNokogiriXmlEntityDeclaration =
913
- rb_define_class_under(xml, "EntityDeclaration", klass);
914
1013
 
915
1014
  rb_define_singleton_method(klass, "new", new, -1);
916
1015
 
@@ -942,6 +1041,9 @@ void init_xml_node()
942
1041
  rb_define_method(klass, "dup", duplicate_node, -1);
943
1042
  rb_define_method(klass, "unlink", unlink_node, 0);
944
1043
  rb_define_method(klass, "internal_subset", internal_subset, 0);
1044
+ rb_define_method(klass, "external_subset", external_subset, 0);
1045
+ rb_define_method(klass, "create_internal_subset", create_internal_subset, 3);
1046
+ rb_define_method(klass, "create_external_subset", create_external_subset, 3);
945
1047
  rb_define_method(klass, "pointer_id", pointer_id, 0);
946
1048
  rb_define_method(klass, "line", line, 0);
947
1049
 
@@ -952,4 +1054,7 @@ void init_xml_node()
952
1054
  rb_define_private_method(klass, "get", get, 1);
953
1055
  rb_define_private_method(klass, "set_namespace", set_namespace, 1);
954
1056
  rb_define_private_method(klass, "compare", compare, 1);
1057
+
1058
+ decorate = rb_intern("decorate");
1059
+ decorate_bang = rb_intern("decorate!");
955
1060
  }