nokogiri-fitzsimmons 1.5.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (237) hide show
  1. data/.autotest +26 -0
  2. data/.gemtest +0 -0
  3. data/CHANGELOG.ja.rdoc +666 -0
  4. data/CHANGELOG.rdoc +659 -0
  5. data/C_CODING_STYLE.rdoc +33 -0
  6. data/Manifest.txt +295 -0
  7. data/README.ja.rdoc +106 -0
  8. data/README.rdoc +178 -0
  9. data/ROADMAP.md +86 -0
  10. data/Rakefile +194 -0
  11. data/STANDARD_RESPONSES.md +47 -0
  12. data/Y_U_NO_GEMSPEC.md +155 -0
  13. data/bin/nokogiri +63 -0
  14. data/build_all +58 -0
  15. data/ext/nokogiri/depend +358 -0
  16. data/ext/nokogiri/extconf.rb +142 -0
  17. data/ext/nokogiri/html_document.c +170 -0
  18. data/ext/nokogiri/html_document.h +10 -0
  19. data/ext/nokogiri/html_element_description.c +276 -0
  20. data/ext/nokogiri/html_element_description.h +10 -0
  21. data/ext/nokogiri/html_entity_lookup.c +32 -0
  22. data/ext/nokogiri/html_entity_lookup.h +8 -0
  23. data/ext/nokogiri/html_sax_parser_context.c +116 -0
  24. data/ext/nokogiri/html_sax_parser_context.h +11 -0
  25. data/ext/nokogiri/html_sax_push_parser.c +87 -0
  26. data/ext/nokogiri/html_sax_push_parser.h +9 -0
  27. data/ext/nokogiri/nokogiri.c +133 -0
  28. data/ext/nokogiri/nokogiri.h +160 -0
  29. data/ext/nokogiri/xml_attr.c +94 -0
  30. data/ext/nokogiri/xml_attr.h +9 -0
  31. data/ext/nokogiri/xml_attribute_decl.c +70 -0
  32. data/ext/nokogiri/xml_attribute_decl.h +9 -0
  33. data/ext/nokogiri/xml_cdata.c +56 -0
  34. data/ext/nokogiri/xml_cdata.h +9 -0
  35. data/ext/nokogiri/xml_comment.c +54 -0
  36. data/ext/nokogiri/xml_comment.h +9 -0
  37. data/ext/nokogiri/xml_document.c +576 -0
  38. data/ext/nokogiri/xml_document.h +23 -0
  39. data/ext/nokogiri/xml_document_fragment.c +48 -0
  40. data/ext/nokogiri/xml_document_fragment.h +10 -0
  41. data/ext/nokogiri/xml_dtd.c +202 -0
  42. data/ext/nokogiri/xml_dtd.h +10 -0
  43. data/ext/nokogiri/xml_element_content.c +123 -0
  44. data/ext/nokogiri/xml_element_content.h +10 -0
  45. data/ext/nokogiri/xml_element_decl.c +69 -0
  46. data/ext/nokogiri/xml_element_decl.h +9 -0
  47. data/ext/nokogiri/xml_encoding_handler.c +79 -0
  48. data/ext/nokogiri/xml_encoding_handler.h +8 -0
  49. data/ext/nokogiri/xml_entity_decl.c +110 -0
  50. data/ext/nokogiri/xml_entity_decl.h +10 -0
  51. data/ext/nokogiri/xml_entity_reference.c +52 -0
  52. data/ext/nokogiri/xml_entity_reference.h +9 -0
  53. data/ext/nokogiri/xml_io.c +56 -0
  54. data/ext/nokogiri/xml_io.h +11 -0
  55. data/ext/nokogiri/xml_libxml2_hacks.c +112 -0
  56. data/ext/nokogiri/xml_libxml2_hacks.h +12 -0
  57. data/ext/nokogiri/xml_namespace.c +78 -0
  58. data/ext/nokogiri/xml_namespace.h +13 -0
  59. data/ext/nokogiri/xml_node.c +1480 -0
  60. data/ext/nokogiri/xml_node.h +13 -0
  61. data/ext/nokogiri/xml_node_set.c +467 -0
  62. data/ext/nokogiri/xml_node_set.h +14 -0
  63. data/ext/nokogiri/xml_processing_instruction.c +56 -0
  64. data/ext/nokogiri/xml_processing_instruction.h +9 -0
  65. data/ext/nokogiri/xml_reader.c +684 -0
  66. data/ext/nokogiri/xml_reader.h +10 -0
  67. data/ext/nokogiri/xml_relax_ng.c +161 -0
  68. data/ext/nokogiri/xml_relax_ng.h +9 -0
  69. data/ext/nokogiri/xml_sax_parser.c +293 -0
  70. data/ext/nokogiri/xml_sax_parser.h +39 -0
  71. data/ext/nokogiri/xml_sax_parser_context.c +222 -0
  72. data/ext/nokogiri/xml_sax_parser_context.h +10 -0
  73. data/ext/nokogiri/xml_sax_push_parser.c +115 -0
  74. data/ext/nokogiri/xml_sax_push_parser.h +9 -0
  75. data/ext/nokogiri/xml_schema.c +205 -0
  76. data/ext/nokogiri/xml_schema.h +9 -0
  77. data/ext/nokogiri/xml_syntax_error.c +58 -0
  78. data/ext/nokogiri/xml_syntax_error.h +13 -0
  79. data/ext/nokogiri/xml_text.c +52 -0
  80. data/ext/nokogiri/xml_text.h +9 -0
  81. data/ext/nokogiri/xml_xpath_context.c +319 -0
  82. data/ext/nokogiri/xml_xpath_context.h +10 -0
  83. data/ext/nokogiri/xslt_stylesheet.c +270 -0
  84. data/ext/nokogiri/xslt_stylesheet.h +14 -0
  85. data/lib/nokogiri.rb +127 -0
  86. data/lib/nokogiri/css.rb +27 -0
  87. data/lib/nokogiri/css/node.rb +102 -0
  88. data/lib/nokogiri/css/parser.rb +720 -0
  89. data/lib/nokogiri/css/parser.y +258 -0
  90. data/lib/nokogiri/css/parser_extras.rb +91 -0
  91. data/lib/nokogiri/css/syntax_error.rb +7 -0
  92. data/lib/nokogiri/css/tokenizer.rb +152 -0
  93. data/lib/nokogiri/css/tokenizer.rex +55 -0
  94. data/lib/nokogiri/css/xpath_visitor.rb +171 -0
  95. data/lib/nokogiri/decorators/slop.rb +35 -0
  96. data/lib/nokogiri/html.rb +37 -0
  97. data/lib/nokogiri/html/builder.rb +35 -0
  98. data/lib/nokogiri/html/document.rb +254 -0
  99. data/lib/nokogiri/html/document_fragment.rb +41 -0
  100. data/lib/nokogiri/html/element_description.rb +23 -0
  101. data/lib/nokogiri/html/element_description_defaults.rb +671 -0
  102. data/lib/nokogiri/html/entity_lookup.rb +13 -0
  103. data/lib/nokogiri/html/sax/parser.rb +52 -0
  104. data/lib/nokogiri/html/sax/parser_context.rb +16 -0
  105. data/lib/nokogiri/html/sax/push_parser.rb +16 -0
  106. data/lib/nokogiri/syntax_error.rb +4 -0
  107. data/lib/nokogiri/version.rb +88 -0
  108. data/lib/nokogiri/xml.rb +73 -0
  109. data/lib/nokogiri/xml/attr.rb +14 -0
  110. data/lib/nokogiri/xml/attribute_decl.rb +18 -0
  111. data/lib/nokogiri/xml/builder.rb +431 -0
  112. data/lib/nokogiri/xml/cdata.rb +11 -0
  113. data/lib/nokogiri/xml/character_data.rb +7 -0
  114. data/lib/nokogiri/xml/document.rb +267 -0
  115. data/lib/nokogiri/xml/document_fragment.rb +103 -0
  116. data/lib/nokogiri/xml/dtd.rb +22 -0
  117. data/lib/nokogiri/xml/element_content.rb +36 -0
  118. data/lib/nokogiri/xml/element_decl.rb +13 -0
  119. data/lib/nokogiri/xml/entity_decl.rb +19 -0
  120. data/lib/nokogiri/xml/namespace.rb +13 -0
  121. data/lib/nokogiri/xml/node.rb +946 -0
  122. data/lib/nokogiri/xml/node/save_options.rb +61 -0
  123. data/lib/nokogiri/xml/node_set.rb +357 -0
  124. data/lib/nokogiri/xml/notation.rb +6 -0
  125. data/lib/nokogiri/xml/parse_options.rb +98 -0
  126. data/lib/nokogiri/xml/pp.rb +2 -0
  127. data/lib/nokogiri/xml/pp/character_data.rb +18 -0
  128. data/lib/nokogiri/xml/pp/node.rb +56 -0
  129. data/lib/nokogiri/xml/processing_instruction.rb +8 -0
  130. data/lib/nokogiri/xml/reader.rb +112 -0
  131. data/lib/nokogiri/xml/relax_ng.rb +32 -0
  132. data/lib/nokogiri/xml/sax.rb +4 -0
  133. data/lib/nokogiri/xml/sax/document.rb +164 -0
  134. data/lib/nokogiri/xml/sax/parser.rb +115 -0
  135. data/lib/nokogiri/xml/sax/parser_context.rb +16 -0
  136. data/lib/nokogiri/xml/sax/push_parser.rb +60 -0
  137. data/lib/nokogiri/xml/schema.rb +63 -0
  138. data/lib/nokogiri/xml/syntax_error.rb +47 -0
  139. data/lib/nokogiri/xml/text.rb +9 -0
  140. data/lib/nokogiri/xml/xpath.rb +10 -0
  141. data/lib/nokogiri/xml/xpath/syntax_error.rb +11 -0
  142. data/lib/nokogiri/xml/xpath_context.rb +16 -0
  143. data/lib/nokogiri/xslt.rb +56 -0
  144. data/lib/nokogiri/xslt/stylesheet.rb +25 -0
  145. data/lib/xsd/xmlparser/nokogiri.rb +90 -0
  146. data/tasks/cross_compile.rb +153 -0
  147. data/tasks/nokogiri.org.rb +24 -0
  148. data/tasks/test.rb +95 -0
  149. data/test/css/test_nthiness.rb +159 -0
  150. data/test/css/test_parser.rb +341 -0
  151. data/test/css/test_tokenizer.rb +198 -0
  152. data/test/css/test_xpath_visitor.rb +91 -0
  153. data/test/decorators/test_slop.rb +16 -0
  154. data/test/files/2ch.html +108 -0
  155. data/test/files/address_book.rlx +12 -0
  156. data/test/files/address_book.xml +10 -0
  157. data/test/files/bar/bar.xsd +4 -0
  158. data/test/files/dont_hurt_em_why.xml +422 -0
  159. data/test/files/encoding.html +82 -0
  160. data/test/files/encoding.xhtml +84 -0
  161. data/test/files/exslt.xml +8 -0
  162. data/test/files/exslt.xslt +35 -0
  163. data/test/files/foo/foo.xsd +4 -0
  164. data/test/files/metacharset.html +10 -0
  165. data/test/files/noencoding.html +47 -0
  166. data/test/files/po.xml +32 -0
  167. data/test/files/po.xsd +66 -0
  168. data/test/files/shift_jis.html +10 -0
  169. data/test/files/shift_jis.xml +5 -0
  170. data/test/files/snuggles.xml +3 -0
  171. data/test/files/staff.dtd +10 -0
  172. data/test/files/staff.xml +59 -0
  173. data/test/files/staff.xslt +32 -0
  174. data/test/files/tlm.html +850 -0
  175. data/test/files/to_be_xincluded.xml +2 -0
  176. data/test/files/valid_bar.xml +2 -0
  177. data/test/files/xinclude.xml +4 -0
  178. data/test/helper.rb +147 -0
  179. data/test/html/sax/test_parser.rb +138 -0
  180. data/test/html/sax/test_parser_context.rb +46 -0
  181. data/test/html/test_builder.rb +164 -0
  182. data/test/html/test_document.rb +529 -0
  183. data/test/html/test_document_encoding.rb +138 -0
  184. data/test/html/test_document_fragment.rb +254 -0
  185. data/test/html/test_element_description.rb +100 -0
  186. data/test/html/test_named_characters.rb +14 -0
  187. data/test/html/test_node.rb +188 -0
  188. data/test/html/test_node_encoding.rb +27 -0
  189. data/test/test_convert_xpath.rb +135 -0
  190. data/test/test_css_cache.rb +45 -0
  191. data/test/test_encoding_handler.rb +46 -0
  192. data/test/test_memory_leak.rb +152 -0
  193. data/test/test_nokogiri.rb +132 -0
  194. data/test/test_reader.rb +488 -0
  195. data/test/test_soap4r_sax.rb +52 -0
  196. data/test/test_xslt_transforms.rb +254 -0
  197. data/test/xml/node/test_save_options.rb +28 -0
  198. data/test/xml/node/test_subclass.rb +44 -0
  199. data/test/xml/sax/test_parser.rb +338 -0
  200. data/test/xml/sax/test_parser_context.rb +106 -0
  201. data/test/xml/sax/test_push_parser.rb +157 -0
  202. data/test/xml/test_attr.rb +64 -0
  203. data/test/xml/test_attribute_decl.rb +86 -0
  204. data/test/xml/test_builder.rb +248 -0
  205. data/test/xml/test_c14n.rb +151 -0
  206. data/test/xml/test_cdata.rb +48 -0
  207. data/test/xml/test_comment.rb +29 -0
  208. data/test/xml/test_document.rb +742 -0
  209. data/test/xml/test_document_encoding.rb +28 -0
  210. data/test/xml/test_document_fragment.rb +216 -0
  211. data/test/xml/test_dtd.rb +103 -0
  212. data/test/xml/test_dtd_encoding.rb +33 -0
  213. data/test/xml/test_element_content.rb +56 -0
  214. data/test/xml/test_element_decl.rb +73 -0
  215. data/test/xml/test_entity_decl.rb +122 -0
  216. data/test/xml/test_entity_reference.rb +235 -0
  217. data/test/xml/test_namespace.rb +75 -0
  218. data/test/xml/test_node.rb +1029 -0
  219. data/test/xml/test_node_attributes.rb +53 -0
  220. data/test/xml/test_node_encoding.rb +107 -0
  221. data/test/xml/test_node_inheritance.rb +32 -0
  222. data/test/xml/test_node_reparenting.rb +374 -0
  223. data/test/xml/test_node_set.rb +755 -0
  224. data/test/xml/test_parse_options.rb +64 -0
  225. data/test/xml/test_processing_instruction.rb +30 -0
  226. data/test/xml/test_reader_encoding.rb +142 -0
  227. data/test/xml/test_relax_ng.rb +60 -0
  228. data/test/xml/test_schema.rb +94 -0
  229. data/test/xml/test_syntax_error.rb +12 -0
  230. data/test/xml/test_text.rb +45 -0
  231. data/test/xml/test_unparented_node.rb +413 -0
  232. data/test/xml/test_xinclude.rb +83 -0
  233. data/test/xml/test_xpath.rb +295 -0
  234. data/test/xslt/test_custom_functions.rb +129 -0
  235. data/test/xslt/test_exception_handling.rb +37 -0
  236. data/test_all +84 -0
  237. metadata +534 -0
@@ -0,0 +1,94 @@
1
+ #include <xml_attr.h>
2
+
3
+ /*
4
+ * call-seq:
5
+ * value=(content)
6
+ *
7
+ * Set the value for this Attr to +content+
8
+ */
9
+ static VALUE set_value(VALUE self, VALUE content)
10
+ {
11
+ xmlAttrPtr attr;
12
+ Data_Get_Struct(self, xmlAttr, attr);
13
+
14
+ if(attr->children) xmlFreeNodeList(attr->children);
15
+
16
+ attr->children = attr->last = NULL;
17
+
18
+ if(content) {
19
+ xmlChar *buffer;
20
+ xmlNode *tmp;
21
+
22
+ /* Encode our content */
23
+ buffer = xmlEncodeEntitiesReentrant(attr->doc, (unsigned char *)StringValuePtr(content));
24
+
25
+ attr->children = xmlStringGetNodeList(attr->doc, buffer);
26
+ attr->last = NULL;
27
+ tmp = attr->children;
28
+
29
+ /* Loop through the children */
30
+ for(tmp = attr->children; tmp; tmp = tmp->next) {
31
+ tmp->parent = (xmlNode *)attr;
32
+ tmp->doc = attr->doc;
33
+ if(tmp->next == NULL) attr->last = tmp;
34
+ }
35
+
36
+ /* Free up memory */
37
+ xmlFree(buffer);
38
+ }
39
+
40
+ return content;
41
+ }
42
+
43
+ /*
44
+ * call-seq:
45
+ * new(document, name)
46
+ *
47
+ * Create a new Attr element on the +document+ with +name+
48
+ */
49
+ static VALUE new(int argc, VALUE *argv, VALUE klass)
50
+ {
51
+ xmlDocPtr xml_doc;
52
+ VALUE document;
53
+ VALUE name;
54
+ VALUE rest;
55
+ xmlAttrPtr node;
56
+ VALUE rb_node;
57
+
58
+ rb_scan_args(argc, argv, "2*", &document, &name, &rest);
59
+
60
+ Data_Get_Struct(document, xmlDoc, xml_doc);
61
+
62
+ node = xmlNewDocProp(
63
+ xml_doc,
64
+ (const xmlChar *)StringValuePtr(name),
65
+ NULL
66
+ );
67
+
68
+ nokogiri_root_node((xmlNodePtr)node);
69
+
70
+ rb_node = Nokogiri_wrap_xml_node(klass, (xmlNodePtr)node);
71
+ rb_obj_call_init(rb_node, argc, argv);
72
+
73
+ if(rb_block_given_p()) rb_yield(rb_node);
74
+
75
+ return rb_node;
76
+ }
77
+
78
+ VALUE cNokogiriXmlAttr;
79
+ void init_xml_attr()
80
+ {
81
+ VALUE nokogiri = rb_define_module("Nokogiri");
82
+ VALUE xml = rb_define_module_under(nokogiri, "XML");
83
+ VALUE node = rb_define_class_under(xml, "Node", rb_cObject);
84
+
85
+ /*
86
+ * Attr represents a Attr node in an xml document.
87
+ */
88
+ VALUE klass = rb_define_class_under(xml, "Attr", node);
89
+
90
+ cNokogiriXmlAttr = klass;
91
+
92
+ rb_define_singleton_method(klass, "new", new, -1);
93
+ rb_define_method(klass, "value=", set_value, 1);
94
+ }
@@ -0,0 +1,9 @@
1
+ #ifndef NOKOGIRI_XML_ATTR
2
+ #define NOKOGIRI_XML_ATTR
3
+
4
+ #include <nokogiri.h>
5
+
6
+ void init_xml_attr();
7
+
8
+ extern VALUE cNokogiriXmlAttr;
9
+ #endif
@@ -0,0 +1,70 @@
1
+ #include <xml_attribute_decl.h>
2
+
3
+ /*
4
+ * call-seq:
5
+ * attribute_type
6
+ *
7
+ * The attribute_type for this AttributeDecl
8
+ */
9
+ static VALUE attribute_type(VALUE self)
10
+ {
11
+ xmlAttributePtr node;
12
+ Data_Get_Struct(self, xmlAttribute, node);
13
+ return INT2NUM((long)node->atype);
14
+ }
15
+
16
+ /*
17
+ * call-seq:
18
+ * default
19
+ *
20
+ * The default value
21
+ */
22
+ static VALUE default_value(VALUE self)
23
+ {
24
+ xmlAttributePtr node;
25
+ Data_Get_Struct(self, xmlAttribute, node);
26
+
27
+ if(node->defaultValue) return NOKOGIRI_STR_NEW2(node->defaultValue);
28
+ return Qnil;
29
+ }
30
+
31
+ /*
32
+ * call-seq:
33
+ * enumeration
34
+ *
35
+ * An enumeration of possible values
36
+ */
37
+ static VALUE enumeration(VALUE self)
38
+ {
39
+ xmlAttributePtr node;
40
+ xmlEnumerationPtr enm;
41
+ VALUE list;
42
+
43
+ Data_Get_Struct(self, xmlAttribute, node);
44
+
45
+ list = rb_ary_new();
46
+ enm = node->tree;
47
+
48
+ while(enm) {
49
+ rb_ary_push(list, NOKOGIRI_STR_NEW2(enm->name));
50
+ enm = enm->next;
51
+ }
52
+
53
+ return list;
54
+ }
55
+
56
+ VALUE cNokogiriXmlAttributeDecl;
57
+
58
+ void init_xml_attribute_decl()
59
+ {
60
+ VALUE nokogiri = rb_define_module("Nokogiri");
61
+ VALUE xml = rb_define_module_under(nokogiri, "XML");
62
+ VALUE node = rb_define_class_under(xml, "Node", rb_cObject);
63
+ VALUE klass = rb_define_class_under(xml, "AttributeDecl", node);
64
+
65
+ cNokogiriXmlAttributeDecl = klass;
66
+
67
+ rb_define_method(klass, "attribute_type", attribute_type, 0);
68
+ rb_define_method(klass, "default", default_value, 0);
69
+ rb_define_method(klass, "enumeration", enumeration, 0);
70
+ }
@@ -0,0 +1,9 @@
1
+ #ifndef NOKOGIRI_XML_ATTRIBUTE_DECL
2
+ #define NOKOGIRI_XML_ATTRIBUTE_DECL
3
+
4
+ #include <nokogiri.h>
5
+
6
+ void init_xml_attribute_decl();
7
+
8
+ extern VALUE cNokogiriXmlAttributeDecl;
9
+ #endif
@@ -0,0 +1,56 @@
1
+ #include <xml_cdata.h>
2
+
3
+ /*
4
+ * call-seq:
5
+ * new(document, content)
6
+ *
7
+ * Create a new CDATA element on the +document+ with +content+
8
+ */
9
+ static VALUE new(int argc, VALUE *argv, VALUE klass)
10
+ {
11
+ xmlDocPtr xml_doc;
12
+ xmlNodePtr node;
13
+ VALUE doc;
14
+ VALUE content;
15
+ VALUE rest;
16
+ VALUE rb_node;
17
+
18
+ rb_scan_args(argc, argv, "2*", &doc, &content, &rest);
19
+
20
+ Data_Get_Struct(doc, xmlDoc, xml_doc);
21
+
22
+ node = xmlNewCDataBlock(
23
+ xml_doc->doc,
24
+ NIL_P(content) ? NULL : (const xmlChar *)StringValuePtr(content),
25
+ NIL_P(content) ? 0 : (int)RSTRING_LEN(content)
26
+ );
27
+
28
+ nokogiri_root_node(node);
29
+
30
+ rb_node = Nokogiri_wrap_xml_node(klass, node);
31
+ rb_obj_call_init(rb_node, argc, argv);
32
+
33
+ if(rb_block_given_p()) rb_yield(rb_node);
34
+
35
+ return rb_node;
36
+ }
37
+
38
+ VALUE cNokogiriXmlCData;
39
+ void init_xml_cdata()
40
+ {
41
+ VALUE nokogiri = rb_define_module("Nokogiri");
42
+ VALUE xml = rb_define_module_under(nokogiri, "XML");
43
+ VALUE node = rb_define_class_under(xml, "Node", rb_cObject);
44
+ VALUE char_data = rb_define_class_under(xml, "CharacterData", node);
45
+ VALUE text = rb_define_class_under(xml, "Text", char_data);
46
+
47
+ /*
48
+ * CData represents a CData node in an xml document.
49
+ */
50
+ VALUE klass = rb_define_class_under(xml, "CDATA", text);
51
+
52
+
53
+ cNokogiriXmlCData = klass;
54
+
55
+ rb_define_singleton_method(klass, "new", new, -1);
56
+ }
@@ -0,0 +1,9 @@
1
+ #ifndef NOKOGIRI_XML_CDATA
2
+ #define NOKOGIRI_XML_CDATA
3
+
4
+ #include <nokogiri.h>
5
+
6
+ void init_xml_cdata();
7
+
8
+ extern VALUE cNokogiriXmlCData;
9
+ #endif
@@ -0,0 +1,54 @@
1
+ #include <xml_comment.h>
2
+
3
+ /*
4
+ * call-seq:
5
+ * new(document, content)
6
+ *
7
+ * Create a new Comment element on the +document+ with +content+
8
+ */
9
+ static VALUE new(int argc, VALUE *argv, VALUE klass)
10
+ {
11
+ xmlDocPtr xml_doc;
12
+ xmlNodePtr node;
13
+ VALUE document;
14
+ VALUE content;
15
+ VALUE rest;
16
+ VALUE rb_node;
17
+
18
+ rb_scan_args(argc, argv, "2*", &document, &content, &rest);
19
+
20
+ Data_Get_Struct(document, xmlDoc, xml_doc);
21
+
22
+ node = xmlNewDocComment(
23
+ xml_doc,
24
+ (const xmlChar *)StringValuePtr(content)
25
+ );
26
+
27
+ rb_node = Nokogiri_wrap_xml_node(klass, node);
28
+ rb_obj_call_init(rb_node, argc, argv);
29
+
30
+ nokogiri_root_node(node);
31
+
32
+ if(rb_block_given_p()) rb_yield(rb_node);
33
+
34
+ return rb_node;
35
+ }
36
+
37
+ VALUE cNokogiriXmlComment;
38
+ void init_xml_comment()
39
+ {
40
+ VALUE nokogiri = rb_define_module("Nokogiri");
41
+ VALUE xml = rb_define_module_under(nokogiri, "XML");
42
+ VALUE node = rb_define_class_under(xml, "Node", rb_cObject);
43
+ VALUE char_data = rb_define_class_under(xml, "CharacterData", node);
44
+
45
+ /*
46
+ * Comment represents a comment node in an xml document.
47
+ */
48
+ VALUE klass = rb_define_class_under(xml, "Comment", char_data);
49
+
50
+
51
+ cNokogiriXmlComment = klass;
52
+
53
+ rb_define_singleton_method(klass, "new", new, -1);
54
+ }
@@ -0,0 +1,9 @@
1
+ #ifndef NOKOGIRI_XML_COMMENT
2
+ #define NOKOGIRI_XML_COMMENT
3
+
4
+ #include <nokogiri.h>
5
+
6
+ void init_xml_comment();
7
+
8
+ extern VALUE cNokogiriXmlComment;
9
+ #endif
@@ -0,0 +1,576 @@
1
+ #include <xml_document.h>
2
+
3
+ static int dealloc_node_i(xmlNodePtr key, xmlNodePtr node, xmlDocPtr doc)
4
+ {
5
+ switch(node->type) {
6
+ case XML_ATTRIBUTE_NODE:
7
+ xmlFreePropList((xmlAttrPtr)node);
8
+ break;
9
+ case XML_NAMESPACE_DECL:
10
+ xmlFree(node);
11
+ break;
12
+ default:
13
+ if(node->parent == NULL) {
14
+ xmlAddChild((xmlNodePtr)doc, node);
15
+ }
16
+ }
17
+ return ST_CONTINUE;
18
+ }
19
+
20
+ static void dealloc(xmlDocPtr doc)
21
+ {
22
+ xmlDeregisterNodeFunc func;
23
+ st_table *node_hash;
24
+
25
+ NOKOGIRI_DEBUG_START(doc);
26
+ func = xmlDeregisterNodeDefault(NULL);
27
+
28
+ node_hash = DOC_UNLINKED_NODE_HASH(doc);
29
+
30
+ st_foreach(node_hash, dealloc_node_i, (st_data_t)doc);
31
+ st_free_table(node_hash);
32
+
33
+ free(doc->_private);
34
+ doc->_private = NULL;
35
+ xmlFreeDoc(doc);
36
+
37
+ xmlDeregisterNodeDefault(func);
38
+ NOKOGIRI_DEBUG_END(doc);
39
+ }
40
+
41
+ static void recursively_remove_namespaces_from_node(xmlNodePtr node)
42
+ {
43
+ xmlNodePtr child ;
44
+ xmlAttrPtr property ;
45
+
46
+ xmlSetNs(node, NULL);
47
+
48
+ for (child = node->children ; child ; child = child->next)
49
+ recursively_remove_namespaces_from_node(child);
50
+
51
+ if (((node->type == XML_ELEMENT_NODE) ||
52
+ (node->type == XML_XINCLUDE_START) ||
53
+ (node->type == XML_XINCLUDE_END)) &&
54
+ node->nsDef) {
55
+ xmlFreeNsList(node->nsDef);
56
+ node->nsDef = NULL;
57
+ }
58
+
59
+ if (node->type == XML_ELEMENT_NODE && node->properties != NULL) {
60
+ property = node->properties ;
61
+ while (property != NULL) {
62
+ if (property->ns) property->ns = NULL ;
63
+ property = property->next ;
64
+ }
65
+ }
66
+ }
67
+
68
+ /*
69
+ * call-seq:
70
+ * url
71
+ *
72
+ * Get the url name for this document.
73
+ */
74
+ static VALUE url(VALUE self)
75
+ {
76
+ xmlDocPtr doc;
77
+ Data_Get_Struct(self, xmlDoc, doc);
78
+
79
+ if(doc->URL) return NOKOGIRI_STR_NEW2(doc->URL);
80
+
81
+ return Qnil;
82
+ }
83
+
84
+ /*
85
+ * call-seq:
86
+ * root=
87
+ *
88
+ * Set the root element on this document
89
+ */
90
+ static VALUE set_root(VALUE self, VALUE root)
91
+ {
92
+ xmlDocPtr doc;
93
+ xmlNodePtr new_root;
94
+ xmlNodePtr old_root;
95
+
96
+ Data_Get_Struct(self, xmlDoc, doc);
97
+
98
+ old_root = NULL;
99
+
100
+ if(NIL_P(root)) {
101
+ old_root = xmlDocGetRootElement(doc);
102
+
103
+ if(old_root) {
104
+ xmlUnlinkNode(old_root);
105
+ nokogiri_root_node(old_root);
106
+ }
107
+
108
+ return root;
109
+ }
110
+
111
+ Data_Get_Struct(root, xmlNode, new_root);
112
+
113
+
114
+ /* If the new root's document is not the same as the current document,
115
+ * then we need to dup the node in to this document. */
116
+ if(new_root->doc != doc) {
117
+ old_root = xmlDocGetRootElement(doc);
118
+ if (!(new_root = xmlDocCopyNode(new_root, doc, 1))) {
119
+ rb_raise(rb_eRuntimeError, "Could not reparent node (xmlDocCopyNode)");
120
+ }
121
+ }
122
+
123
+ xmlDocSetRootElement(doc, new_root);
124
+ if(old_root) nokogiri_root_node(old_root);
125
+ return root;
126
+ }
127
+
128
+ /*
129
+ * call-seq:
130
+ * root
131
+ *
132
+ * Get the root node for this document.
133
+ */
134
+ static VALUE root(VALUE self)
135
+ {
136
+ xmlDocPtr doc;
137
+ xmlNodePtr root;
138
+
139
+ Data_Get_Struct(self, xmlDoc, doc);
140
+
141
+ root = xmlDocGetRootElement(doc);
142
+
143
+ if(!root) return Qnil;
144
+ return Nokogiri_wrap_xml_node(Qnil, root) ;
145
+ }
146
+
147
+ /*
148
+ * call-seq:
149
+ * encoding= encoding
150
+ *
151
+ * Set the encoding string for this Document
152
+ */
153
+ static VALUE set_encoding(VALUE self, VALUE encoding)
154
+ {
155
+ xmlDocPtr doc;
156
+ Data_Get_Struct(self, xmlDoc, doc);
157
+
158
+ if (doc->encoding)
159
+ free((char *) doc->encoding); // this may produce a gcc cast warning
160
+
161
+ doc->encoding = xmlStrdup((xmlChar *)StringValuePtr(encoding));
162
+
163
+ return encoding;
164
+ }
165
+
166
+ /*
167
+ * call-seq:
168
+ * encoding
169
+ *
170
+ * Get the encoding for this Document
171
+ */
172
+ static VALUE encoding(VALUE self)
173
+ {
174
+ xmlDocPtr doc;
175
+ Data_Get_Struct(self, xmlDoc, doc);
176
+
177
+ if(!doc->encoding) return Qnil;
178
+ return NOKOGIRI_STR_NEW2(doc->encoding);
179
+ }
180
+
181
+ /*
182
+ * call-seq:
183
+ * version
184
+ *
185
+ * Get the XML version for this Document
186
+ */
187
+ static VALUE version(VALUE self)
188
+ {
189
+ xmlDocPtr doc;
190
+ Data_Get_Struct(self, xmlDoc, doc);
191
+
192
+ if(!doc->version) return Qnil;
193
+ return NOKOGIRI_STR_NEW2(doc->version);
194
+ }
195
+
196
+ /*
197
+ * call-seq:
198
+ * read_io(io, url, encoding, options)
199
+ *
200
+ * Create a new document from an IO object
201
+ */
202
+ static VALUE read_io( VALUE klass,
203
+ VALUE io,
204
+ VALUE url,
205
+ VALUE encoding,
206
+ VALUE options )
207
+ {
208
+ const char * c_url = NIL_P(url) ? NULL : StringValuePtr(url);
209
+ const char * c_enc = NIL_P(encoding) ? NULL : StringValuePtr(encoding);
210
+ VALUE error_list = rb_ary_new();
211
+ VALUE document;
212
+ xmlDocPtr doc;
213
+
214
+ xmlResetLastError();
215
+ xmlSetStructuredErrorFunc((void *)error_list, Nokogiri_error_array_pusher);
216
+
217
+ doc = xmlReadIO(
218
+ (xmlInputReadCallback)io_read_callback,
219
+ (xmlInputCloseCallback)io_close_callback,
220
+ (void *)io,
221
+ c_url,
222
+ c_enc,
223
+ (int)NUM2INT(options)
224
+ );
225
+ xmlSetStructuredErrorFunc(NULL, NULL);
226
+
227
+ if(doc == NULL) {
228
+ xmlErrorPtr error;
229
+
230
+ xmlFreeDoc(doc);
231
+
232
+ error = xmlGetLastError();
233
+ if(error)
234
+ rb_exc_raise(Nokogiri_wrap_xml_syntax_error((VALUE)NULL, error));
235
+ else
236
+ rb_raise(rb_eRuntimeError, "Could not parse document");
237
+
238
+ return Qnil;
239
+ }
240
+
241
+ document = Nokogiri_wrap_xml_document(klass, doc);
242
+ rb_iv_set(document, "@errors", error_list);
243
+ return document;
244
+ }
245
+
246
+ /*
247
+ * call-seq:
248
+ * read_memory(string, url, encoding, options)
249
+ *
250
+ * Create a new document from a String
251
+ */
252
+ static VALUE read_memory( VALUE klass,
253
+ VALUE string,
254
+ VALUE url,
255
+ VALUE encoding,
256
+ VALUE options )
257
+ {
258
+ const char * c_buffer = StringValuePtr(string);
259
+ const char * c_url = NIL_P(url) ? NULL : StringValuePtr(url);
260
+ const char * c_enc = NIL_P(encoding) ? NULL : StringValuePtr(encoding);
261
+ int len = (int)RSTRING_LEN(string);
262
+ VALUE error_list = rb_ary_new();
263
+ VALUE document;
264
+ xmlDocPtr doc;
265
+
266
+ xmlResetLastError();
267
+ xmlSetStructuredErrorFunc((void *)error_list, Nokogiri_error_array_pusher);
268
+ doc = xmlReadMemory(c_buffer, len, c_url, c_enc, (int)NUM2INT(options));
269
+ xmlSetStructuredErrorFunc(NULL, NULL);
270
+
271
+ if(doc == NULL) {
272
+ xmlErrorPtr error;
273
+
274
+ xmlFreeDoc(doc);
275
+
276
+ error = xmlGetLastError();
277
+ if(error)
278
+ rb_exc_raise(Nokogiri_wrap_xml_syntax_error((VALUE)NULL, error));
279
+ else
280
+ rb_raise(rb_eRuntimeError, "Could not parse document");
281
+
282
+ return Qnil;
283
+ }
284
+
285
+ document = Nokogiri_wrap_xml_document(klass, doc);
286
+ rb_iv_set(document, "@errors", error_list);
287
+ return document;
288
+ }
289
+
290
+ /*
291
+ * call-seq:
292
+ * dup
293
+ *
294
+ * Copy this Document. An optional depth may be passed in, but it defaults
295
+ * to a deep copy. 0 is a shallow copy, 1 is a deep copy.
296
+ */
297
+ static VALUE duplicate_node(int argc, VALUE *argv, VALUE self)
298
+ {
299
+ xmlDocPtr doc, dup;
300
+ VALUE level;
301
+
302
+ if(rb_scan_args(argc, argv, "01", &level) == 0)
303
+ level = INT2NUM((long)1);
304
+
305
+ Data_Get_Struct(self, xmlDoc, doc);
306
+
307
+ dup = xmlCopyDoc(doc, (int)NUM2INT(level));
308
+ if(dup == NULL) return Qnil;
309
+
310
+ dup->type = doc->type;
311
+ return Nokogiri_wrap_xml_document(rb_obj_class(self), dup);
312
+ }
313
+
314
+ /*
315
+ * call-seq:
316
+ * new(version = default)
317
+ *
318
+ * Create a new document with +version+ (defaults to "1.0")
319
+ */
320
+ static VALUE new(int argc, VALUE *argv, VALUE klass)
321
+ {
322
+ xmlDocPtr doc;
323
+ VALUE version, rest, rb_doc ;
324
+
325
+ rb_scan_args(argc, argv, "0*", &rest);
326
+ version = rb_ary_entry(rest, (long)0);
327
+ if (NIL_P(version)) version = rb_str_new2("1.0");
328
+
329
+ doc = xmlNewDoc((xmlChar *)StringValuePtr(version));
330
+ rb_doc = Nokogiri_wrap_xml_document(klass, doc);
331
+ rb_obj_call_init(rb_doc, argc, argv);
332
+ return rb_doc ;
333
+ }
334
+
335
+ /*
336
+ * call-seq:
337
+ * remove_namespaces!
338
+ *
339
+ * Remove all namespaces from all nodes in the document.
340
+ *
341
+ * This could be useful for developers who either don't understand namespaces
342
+ * or don't care about them.
343
+ *
344
+ * The following example shows a use case, and you can decide for yourself
345
+ * whether this is a good thing or not:
346
+ *
347
+ * doc = Nokogiri::XML <<-EOXML
348
+ * <root>
349
+ * <car xmlns:part="http://general-motors.com/">
350
+ * <part:tire>Michelin Model XGV</part:tire>
351
+ * </car>
352
+ * <bicycle xmlns:part="http://schwinn.com/">
353
+ * <part:tire>I'm a bicycle tire!</part:tire>
354
+ * </bicycle>
355
+ * </root>
356
+ * EOXML
357
+ *
358
+ * doc.xpath("//tire").to_s # => ""
359
+ * doc.xpath("//part:tire", "part" => "http://general-motors.com/").to_s # => "<part:tire>Michelin Model XGV</part:tire>"
360
+ * doc.xpath("//part:tire", "part" => "http://schwinn.com/").to_s # => "<part:tire>I'm a bicycle tire!</part:tire>"
361
+ *
362
+ * doc.remove_namespaces!
363
+ *
364
+ * doc.xpath("//tire").to_s # => "<tire>Michelin Model XGV</tire><tire>I'm a bicycle tire!</tire>"
365
+ * doc.xpath("//part:tire", "part" => "http://general-motors.com/").to_s # => ""
366
+ * doc.xpath("//part:tire", "part" => "http://schwinn.com/").to_s # => ""
367
+ *
368
+ * For more information on why this probably is *not* a good thing in general,
369
+ * please direct your browser to
370
+ * http://tenderlovemaking.com/2009/04/23/namespaces-in-xml/
371
+ */
372
+ VALUE remove_namespaces_bang(VALUE self)
373
+ {
374
+ xmlDocPtr doc ;
375
+ Data_Get_Struct(self, xmlDoc, doc);
376
+
377
+ recursively_remove_namespaces_from_node((xmlNodePtr)doc);
378
+ return self;
379
+ }
380
+
381
+ /* call-seq: doc.create_entity(name, type, external_id, system_id, content)
382
+ *
383
+ * Create a new entity named +name+.
384
+ *
385
+ * +type+ is an integer representing the type of entity to be created, and it
386
+ * defaults to Nokogiri::XML::EntityDecl::INTERNAL_GENERAL. See
387
+ * the constants on Nokogiri::XML::EntityDecl for more information.
388
+ *
389
+ * +external_id+, +system_id+, and +content+ set the External ID, System ID,
390
+ * and content respectively. All of these parameters are optional.
391
+ */
392
+ static VALUE create_entity(int argc, VALUE *argv, VALUE self)
393
+ {
394
+ VALUE name;
395
+ VALUE type;
396
+ VALUE external_id;
397
+ VALUE system_id;
398
+ VALUE content;
399
+ xmlEntityPtr ptr;
400
+ xmlDocPtr doc ;
401
+
402
+ Data_Get_Struct(self, xmlDoc, doc);
403
+
404
+ rb_scan_args(argc, argv, "14", &name, &type, &external_id, &system_id,
405
+ &content);
406
+
407
+ xmlResetLastError();
408
+ ptr = xmlAddDocEntity(
409
+ doc,
410
+ (xmlChar *)(NIL_P(name) ? NULL : StringValuePtr(name)),
411
+ (int) (NIL_P(type) ? XML_INTERNAL_GENERAL_ENTITY : NUM2INT(type)),
412
+ (xmlChar *)(NIL_P(external_id) ? NULL : StringValuePtr(external_id)),
413
+ (xmlChar *)(NIL_P(system_id) ? NULL : StringValuePtr(system_id)),
414
+ (xmlChar *)(NIL_P(content) ? NULL : StringValuePtr(content))
415
+ );
416
+
417
+ if(NULL == ptr) {
418
+ xmlErrorPtr error = xmlGetLastError();
419
+ if(error)
420
+ rb_exc_raise(Nokogiri_wrap_xml_syntax_error((VALUE)NULL, error));
421
+ else
422
+ rb_raise(rb_eRuntimeError, "Could not create entity");
423
+
424
+ return Qnil;
425
+ }
426
+
427
+ return Nokogiri_wrap_xml_node(cNokogiriXmlEntityDecl, (xmlNodePtr)ptr);
428
+ }
429
+
430
+ static int block_caller(void * ctx, xmlNodePtr _node, xmlNodePtr _parent)
431
+ {
432
+ VALUE block;
433
+ VALUE node;
434
+ VALUE parent;
435
+ VALUE ret;
436
+
437
+ if(_node->type == XML_NAMESPACE_DECL){
438
+ node = Nokogiri_wrap_xml_namespace(_parent->doc, (xmlNsPtr) _node);
439
+ }
440
+ else{
441
+ node = Nokogiri_wrap_xml_node(Qnil, _node);
442
+ }
443
+ parent = _parent ? Nokogiri_wrap_xml_node(Qnil, _parent) : Qnil;
444
+ block = (VALUE)ctx;
445
+
446
+ ret = rb_funcall(block, rb_intern("call"), 2, node, parent);
447
+
448
+ if(Qfalse == ret || Qnil == ret) return 0;
449
+
450
+ return 1;
451
+ }
452
+
453
+ /* call-seq:
454
+ * doc.canonicalize(mode=XML_C14N_1_0,inclusive_namespaces=nil,with_comments=false)
455
+ * doc.canonicalize { |obj, parent| ... }
456
+ *
457
+ * Canonicalize a document and return the results. Takes an optional block
458
+ * that takes two parameters: the +obj+ and that node's +parent+.
459
+ * The +obj+ will be either a Nokogiri::XML::Node, or a Nokogiri::XML::Namespace
460
+ * The block must return a non-nil, non-false value if the +obj+ passed in
461
+ * should be included in the canonicalized document.
462
+ */
463
+ static VALUE canonicalize(int argc, VALUE* argv, VALUE self)
464
+ {
465
+ VALUE mode;
466
+ VALUE incl_ns;
467
+ VALUE with_comments;
468
+ xmlChar **ns;
469
+ long ns_len, i;
470
+
471
+ xmlDocPtr doc;
472
+ xmlOutputBufferPtr buf;
473
+ xmlC14NIsVisibleCallback cb = NULL;
474
+ void * ctx = NULL;
475
+
476
+ VALUE rb_cStringIO;
477
+ VALUE io;
478
+
479
+ rb_scan_args(argc, argv, "03", &mode, &incl_ns, &with_comments);
480
+
481
+ Data_Get_Struct(self, xmlDoc, doc);
482
+
483
+ rb_cStringIO = rb_const_get_at(rb_cObject, rb_intern("StringIO"));
484
+ io = rb_class_new_instance(0, 0, rb_cStringIO);
485
+ buf = xmlAllocOutputBuffer(NULL);
486
+
487
+ buf->writecallback = (xmlOutputWriteCallback)io_write_callback;
488
+ buf->closecallback = (xmlOutputCloseCallback)io_close_callback;
489
+ buf->context = (void *)io;
490
+
491
+ if(rb_block_given_p()) {
492
+ cb = block_caller;
493
+ ctx = (void *)rb_block_proc();
494
+ }
495
+
496
+ if(NIL_P(incl_ns)){
497
+ ns = NULL;
498
+ }
499
+ else{
500
+ ns_len = RARRAY_LEN(incl_ns);
501
+ ns = calloc((size_t)ns_len+1, sizeof(xmlChar *));
502
+ for (i = 0 ; i < ns_len ; i++) {
503
+ VALUE entry = rb_ary_entry(incl_ns, i);
504
+ const char * ptr = StringValuePtr(entry);
505
+ ns[i] = (xmlChar*) ptr;
506
+ }
507
+ }
508
+
509
+
510
+ xmlC14NExecute(doc, cb, ctx,
511
+ (int) (NIL_P(mode) ? 0 : NUM2INT(mode)),
512
+ ns,
513
+ (int) (NIL_P(with_comments) ? 0 : 1),
514
+ buf);
515
+
516
+ xmlOutputBufferClose(buf);
517
+
518
+ return rb_funcall(io, rb_intern("string"), 0);
519
+ }
520
+
521
+ VALUE cNokogiriXmlDocument ;
522
+ void init_xml_document()
523
+ {
524
+ VALUE nokogiri = rb_define_module("Nokogiri");
525
+ VALUE xml = rb_define_module_under(nokogiri, "XML");
526
+ VALUE node = rb_define_class_under(xml, "Node", rb_cObject);
527
+
528
+ /*
529
+ * Nokogiri::XML::Document wraps an xml document.
530
+ */
531
+ VALUE klass = rb_define_class_under(xml, "Document", node);
532
+
533
+ cNokogiriXmlDocument = klass;
534
+
535
+ rb_define_singleton_method(klass, "read_memory", read_memory, 4);
536
+ rb_define_singleton_method(klass, "read_io", read_io, 4);
537
+ rb_define_singleton_method(klass, "new", new, -1);
538
+
539
+ rb_define_method(klass, "root", root, 0);
540
+ rb_define_method(klass, "root=", set_root, 1);
541
+ rb_define_method(klass, "encoding", encoding, 0);
542
+ rb_define_method(klass, "encoding=", set_encoding, 1);
543
+ rb_define_method(klass, "version", version, 0);
544
+ rb_define_method(klass, "canonicalize", canonicalize, -1);
545
+ rb_define_method(klass, "dup", duplicate_node, -1);
546
+ rb_define_method(klass, "url", url, 0);
547
+ rb_define_method(klass, "create_entity", create_entity, -1);
548
+ rb_define_method(klass, "remove_namespaces!", remove_namespaces_bang, 0);
549
+ }
550
+
551
+
552
+ /* this takes klass as a param because it's used for HtmlDocument, too. */
553
+ VALUE Nokogiri_wrap_xml_document(VALUE klass, xmlDocPtr doc)
554
+ {
555
+ nokogiriTuplePtr tuple = (nokogiriTuplePtr)malloc(sizeof(nokogiriTuple));
556
+
557
+ VALUE rb_doc = Data_Wrap_Struct(
558
+ klass ? klass : cNokogiriXmlDocument,
559
+ 0,
560
+ dealloc,
561
+ doc
562
+ );
563
+
564
+ VALUE cache = rb_ary_new();
565
+ rb_iv_set(rb_doc, "@decorators", Qnil);
566
+ rb_iv_set(rb_doc, "@node_cache", cache);
567
+
568
+ tuple->doc = rb_doc;
569
+ tuple->unlinkedNodes = st_init_numtable_with_size(128);
570
+ tuple->node_cache = cache;
571
+ doc->_private = tuple ;
572
+
573
+ rb_obj_call_init(rb_doc, 0, NULL);
574
+
575
+ return rb_doc ;
576
+ }