nokogiri 1.11.0.rc1-x86-linux

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (145) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE-DEPENDENCIES.md +1614 -0
  3. data/LICENSE.md +9 -0
  4. data/README.md +200 -0
  5. data/bin/nokogiri +118 -0
  6. data/dependencies.yml +74 -0
  7. data/ext/nokogiri/depend +358 -0
  8. data/ext/nokogiri/extconf.rb +695 -0
  9. data/ext/nokogiri/html_document.c +170 -0
  10. data/ext/nokogiri/html_document.h +10 -0
  11. data/ext/nokogiri/html_element_description.c +279 -0
  12. data/ext/nokogiri/html_element_description.h +10 -0
  13. data/ext/nokogiri/html_entity_lookup.c +32 -0
  14. data/ext/nokogiri/html_entity_lookup.h +8 -0
  15. data/ext/nokogiri/html_sax_parser_context.c +116 -0
  16. data/ext/nokogiri/html_sax_parser_context.h +11 -0
  17. data/ext/nokogiri/html_sax_push_parser.c +87 -0
  18. data/ext/nokogiri/html_sax_push_parser.h +9 -0
  19. data/ext/nokogiri/nokogiri.c +147 -0
  20. data/ext/nokogiri/nokogiri.h +122 -0
  21. data/ext/nokogiri/xml_attr.c +103 -0
  22. data/ext/nokogiri/xml_attr.h +9 -0
  23. data/ext/nokogiri/xml_attribute_decl.c +70 -0
  24. data/ext/nokogiri/xml_attribute_decl.h +9 -0
  25. data/ext/nokogiri/xml_cdata.c +62 -0
  26. data/ext/nokogiri/xml_cdata.h +9 -0
  27. data/ext/nokogiri/xml_comment.c +69 -0
  28. data/ext/nokogiri/xml_comment.h +9 -0
  29. data/ext/nokogiri/xml_document.c +617 -0
  30. data/ext/nokogiri/xml_document.h +23 -0
  31. data/ext/nokogiri/xml_document_fragment.c +48 -0
  32. data/ext/nokogiri/xml_document_fragment.h +10 -0
  33. data/ext/nokogiri/xml_dtd.c +202 -0
  34. data/ext/nokogiri/xml_dtd.h +10 -0
  35. data/ext/nokogiri/xml_element_content.c +123 -0
  36. data/ext/nokogiri/xml_element_content.h +10 -0
  37. data/ext/nokogiri/xml_element_decl.c +69 -0
  38. data/ext/nokogiri/xml_element_decl.h +9 -0
  39. data/ext/nokogiri/xml_encoding_handler.c +79 -0
  40. data/ext/nokogiri/xml_encoding_handler.h +8 -0
  41. data/ext/nokogiri/xml_entity_decl.c +110 -0
  42. data/ext/nokogiri/xml_entity_decl.h +10 -0
  43. data/ext/nokogiri/xml_entity_reference.c +52 -0
  44. data/ext/nokogiri/xml_entity_reference.h +9 -0
  45. data/ext/nokogiri/xml_io.c +61 -0
  46. data/ext/nokogiri/xml_io.h +11 -0
  47. data/ext/nokogiri/xml_libxml2_hacks.c +112 -0
  48. data/ext/nokogiri/xml_libxml2_hacks.h +12 -0
  49. data/ext/nokogiri/xml_namespace.c +111 -0
  50. data/ext/nokogiri/xml_namespace.h +14 -0
  51. data/ext/nokogiri/xml_node.c +1773 -0
  52. data/ext/nokogiri/xml_node.h +13 -0
  53. data/ext/nokogiri/xml_node_set.c +486 -0
  54. data/ext/nokogiri/xml_node_set.h +12 -0
  55. data/ext/nokogiri/xml_processing_instruction.c +56 -0
  56. data/ext/nokogiri/xml_processing_instruction.h +9 -0
  57. data/ext/nokogiri/xml_reader.c +668 -0
  58. data/ext/nokogiri/xml_reader.h +10 -0
  59. data/ext/nokogiri/xml_relax_ng.c +161 -0
  60. data/ext/nokogiri/xml_relax_ng.h +9 -0
  61. data/ext/nokogiri/xml_sax_parser.c +310 -0
  62. data/ext/nokogiri/xml_sax_parser.h +39 -0
  63. data/ext/nokogiri/xml_sax_parser_context.c +262 -0
  64. data/ext/nokogiri/xml_sax_parser_context.h +10 -0
  65. data/ext/nokogiri/xml_sax_push_parser.c +159 -0
  66. data/ext/nokogiri/xml_sax_push_parser.h +9 -0
  67. data/ext/nokogiri/xml_schema.c +205 -0
  68. data/ext/nokogiri/xml_schema.h +9 -0
  69. data/ext/nokogiri/xml_syntax_error.c +64 -0
  70. data/ext/nokogiri/xml_syntax_error.h +13 -0
  71. data/ext/nokogiri/xml_text.c +52 -0
  72. data/ext/nokogiri/xml_text.h +9 -0
  73. data/ext/nokogiri/xml_xpath_context.c +298 -0
  74. data/ext/nokogiri/xml_xpath_context.h +10 -0
  75. data/ext/nokogiri/xslt_stylesheet.c +266 -0
  76. data/ext/nokogiri/xslt_stylesheet.h +14 -0
  77. data/lib/nokogiri.rb +127 -0
  78. data/lib/nokogiri/2.4/nokogiri.so +0 -0
  79. data/lib/nokogiri/2.5/nokogiri.so +0 -0
  80. data/lib/nokogiri/2.6/nokogiri.so +0 -0
  81. data/lib/nokogiri/2.7/nokogiri.so +0 -0
  82. data/lib/nokogiri/css.rb +28 -0
  83. data/lib/nokogiri/css/node.rb +53 -0
  84. data/lib/nokogiri/css/parser.rb +751 -0
  85. data/lib/nokogiri/css/parser.y +272 -0
  86. data/lib/nokogiri/css/parser_extras.rb +92 -0
  87. data/lib/nokogiri/css/syntax_error.rb +8 -0
  88. data/lib/nokogiri/css/tokenizer.rb +154 -0
  89. data/lib/nokogiri/css/tokenizer.rex +55 -0
  90. data/lib/nokogiri/css/xpath_visitor.rb +232 -0
  91. data/lib/nokogiri/decorators/slop.rb +43 -0
  92. data/lib/nokogiri/html.rb +38 -0
  93. data/lib/nokogiri/html/builder.rb +36 -0
  94. data/lib/nokogiri/html/document.rb +336 -0
  95. data/lib/nokogiri/html/document_fragment.rb +50 -0
  96. data/lib/nokogiri/html/element_description.rb +24 -0
  97. data/lib/nokogiri/html/element_description_defaults.rb +672 -0
  98. data/lib/nokogiri/html/entity_lookup.rb +14 -0
  99. data/lib/nokogiri/html/sax/parser.rb +63 -0
  100. data/lib/nokogiri/html/sax/parser_context.rb +17 -0
  101. data/lib/nokogiri/html/sax/push_parser.rb +37 -0
  102. data/lib/nokogiri/jruby/dependencies.rb +20 -0
  103. data/lib/nokogiri/syntax_error.rb +5 -0
  104. data/lib/nokogiri/version.rb +149 -0
  105. data/lib/nokogiri/xml.rb +76 -0
  106. data/lib/nokogiri/xml/attr.rb +15 -0
  107. data/lib/nokogiri/xml/attribute_decl.rb +19 -0
  108. data/lib/nokogiri/xml/builder.rb +447 -0
  109. data/lib/nokogiri/xml/cdata.rb +12 -0
  110. data/lib/nokogiri/xml/character_data.rb +8 -0
  111. data/lib/nokogiri/xml/document.rb +280 -0
  112. data/lib/nokogiri/xml/document_fragment.rb +161 -0
  113. data/lib/nokogiri/xml/dtd.rb +33 -0
  114. data/lib/nokogiri/xml/element_content.rb +37 -0
  115. data/lib/nokogiri/xml/element_decl.rb +14 -0
  116. data/lib/nokogiri/xml/entity_decl.rb +20 -0
  117. data/lib/nokogiri/xml/entity_reference.rb +19 -0
  118. data/lib/nokogiri/xml/namespace.rb +14 -0
  119. data/lib/nokogiri/xml/node.rb +916 -0
  120. data/lib/nokogiri/xml/node/save_options.rb +62 -0
  121. data/lib/nokogiri/xml/node_set.rb +372 -0
  122. data/lib/nokogiri/xml/notation.rb +7 -0
  123. data/lib/nokogiri/xml/parse_options.rb +121 -0
  124. data/lib/nokogiri/xml/pp.rb +3 -0
  125. data/lib/nokogiri/xml/pp/character_data.rb +19 -0
  126. data/lib/nokogiri/xml/pp/node.rb +57 -0
  127. data/lib/nokogiri/xml/processing_instruction.rb +9 -0
  128. data/lib/nokogiri/xml/reader.rb +116 -0
  129. data/lib/nokogiri/xml/relax_ng.rb +33 -0
  130. data/lib/nokogiri/xml/sax.rb +5 -0
  131. data/lib/nokogiri/xml/sax/document.rb +172 -0
  132. data/lib/nokogiri/xml/sax/parser.rb +123 -0
  133. data/lib/nokogiri/xml/sax/parser_context.rb +17 -0
  134. data/lib/nokogiri/xml/sax/push_parser.rb +61 -0
  135. data/lib/nokogiri/xml/schema.rb +64 -0
  136. data/lib/nokogiri/xml/searchable.rb +231 -0
  137. data/lib/nokogiri/xml/syntax_error.rb +71 -0
  138. data/lib/nokogiri/xml/text.rb +10 -0
  139. data/lib/nokogiri/xml/xpath.rb +11 -0
  140. data/lib/nokogiri/xml/xpath/syntax_error.rb +12 -0
  141. data/lib/nokogiri/xml/xpath_context.rb +17 -0
  142. data/lib/nokogiri/xslt.rb +57 -0
  143. data/lib/nokogiri/xslt/stylesheet.rb +26 -0
  144. data/lib/xsd/xmlparser/nokogiri.rb +103 -0
  145. metadata +482 -0
@@ -0,0 +1,103 @@
1
+ #include <xml_attr.h>
2
+
3
+ /*
4
+ * call-seq:
5
+ * value=(content)
6
+ *
7
+ * Set the value for this Attr to +content+. Use `nil` to remove the value
8
+ * (e.g., a HTML boolean attribute).
9
+ */
10
+ static VALUE set_value(VALUE self, VALUE content)
11
+ {
12
+ xmlAttrPtr attr;
13
+ xmlChar *value;
14
+ xmlNode *cur;
15
+
16
+ Data_Get_Struct(self, xmlAttr, attr);
17
+
18
+ if (attr->children) {
19
+ xmlFreeNodeList(attr->children);
20
+ }
21
+ attr->children = attr->last = NULL;
22
+
23
+ if (content == Qnil) {
24
+ return content;
25
+ }
26
+
27
+ value = xmlEncodeEntitiesReentrant(attr->doc, (unsigned char *)StringValueCStr(content));
28
+ if (xmlStrlen(value) == 0) {
29
+ attr->children = xmlNewDocText(attr->doc, value);
30
+ } else {
31
+ attr->children = xmlStringGetNodeList(attr->doc, value);
32
+ }
33
+ xmlFree(value);
34
+
35
+ for (cur = attr->children; cur; cur = cur->next) {
36
+ cur->parent = (xmlNode *)attr;
37
+ cur->doc = attr->doc;
38
+ if (cur->next == NULL) {
39
+ attr->last = cur;
40
+ }
41
+ }
42
+
43
+ return content;
44
+ }
45
+
46
+ /*
47
+ * call-seq:
48
+ * new(document, name)
49
+ *
50
+ * Create a new Attr element on the +document+ with +name+
51
+ */
52
+ static VALUE new(int argc, VALUE *argv, VALUE klass)
53
+ {
54
+ xmlDocPtr xml_doc;
55
+ VALUE document;
56
+ VALUE name;
57
+ VALUE rest;
58
+ xmlAttrPtr node;
59
+ VALUE rb_node;
60
+
61
+ rb_scan_args(argc, argv, "2*", &document, &name, &rest);
62
+
63
+ if (! rb_obj_is_kind_of(document, cNokogiriXmlDocument)) {
64
+ rb_raise(rb_eArgError, "parameter must be a Nokogiri::XML::Document");
65
+ }
66
+
67
+ Data_Get_Struct(document, xmlDoc, xml_doc);
68
+
69
+ node = xmlNewDocProp(
70
+ xml_doc,
71
+ (const xmlChar *)StringValueCStr(name),
72
+ NULL
73
+ );
74
+
75
+ nokogiri_root_node((xmlNodePtr)node);
76
+
77
+ rb_node = Nokogiri_wrap_xml_node(klass, (xmlNodePtr)node);
78
+ rb_obj_call_init(rb_node, argc, argv);
79
+
80
+ if (rb_block_given_p()) {
81
+ rb_yield(rb_node);
82
+ }
83
+
84
+ return rb_node;
85
+ }
86
+
87
+ VALUE cNokogiriXmlAttr;
88
+ void init_xml_attr()
89
+ {
90
+ VALUE nokogiri = rb_define_module("Nokogiri");
91
+ VALUE xml = rb_define_module_under(nokogiri, "XML");
92
+ VALUE node = rb_define_class_under(xml, "Node", rb_cObject);
93
+
94
+ /*
95
+ * Attr represents a Attr node in an xml document.
96
+ */
97
+ VALUE klass = rb_define_class_under(xml, "Attr", node);
98
+
99
+ cNokogiriXmlAttr = klass;
100
+
101
+ rb_define_singleton_method(klass, "new", new, -1);
102
+ rb_define_method(klass, "value=", set_value, 1);
103
+ }
@@ -0,0 +1,9 @@
1
+ #ifndef NOKOGIRI_XML_ATTR
2
+ #define NOKOGIRI_XML_ATTR
3
+
4
+ #include <nokogiri.h>
5
+
6
+ void init_xml_attr();
7
+
8
+ extern VALUE cNokogiriXmlAttr;
9
+ #endif
@@ -0,0 +1,70 @@
1
+ #include <xml_attribute_decl.h>
2
+
3
+ /*
4
+ * call-seq:
5
+ * attribute_type
6
+ *
7
+ * The attribute_type for this AttributeDecl
8
+ */
9
+ static VALUE attribute_type(VALUE self)
10
+ {
11
+ xmlAttributePtr node;
12
+ Data_Get_Struct(self, xmlAttribute, node);
13
+ return INT2NUM((long)node->atype);
14
+ }
15
+
16
+ /*
17
+ * call-seq:
18
+ * default
19
+ *
20
+ * The default value
21
+ */
22
+ static VALUE default_value(VALUE self)
23
+ {
24
+ xmlAttributePtr node;
25
+ Data_Get_Struct(self, xmlAttribute, node);
26
+
27
+ if(node->defaultValue) return NOKOGIRI_STR_NEW2(node->defaultValue);
28
+ return Qnil;
29
+ }
30
+
31
+ /*
32
+ * call-seq:
33
+ * enumeration
34
+ *
35
+ * An enumeration of possible values
36
+ */
37
+ static VALUE enumeration(VALUE self)
38
+ {
39
+ xmlAttributePtr node;
40
+ xmlEnumerationPtr enm;
41
+ VALUE list;
42
+
43
+ Data_Get_Struct(self, xmlAttribute, node);
44
+
45
+ list = rb_ary_new();
46
+ enm = node->tree;
47
+
48
+ while(enm) {
49
+ rb_ary_push(list, NOKOGIRI_STR_NEW2(enm->name));
50
+ enm = enm->next;
51
+ }
52
+
53
+ return list;
54
+ }
55
+
56
+ VALUE cNokogiriXmlAttributeDecl;
57
+
58
+ void init_xml_attribute_decl()
59
+ {
60
+ VALUE nokogiri = rb_define_module("Nokogiri");
61
+ VALUE xml = rb_define_module_under(nokogiri, "XML");
62
+ VALUE node = rb_define_class_under(xml, "Node", rb_cObject);
63
+ VALUE klass = rb_define_class_under(xml, "AttributeDecl", node);
64
+
65
+ cNokogiriXmlAttributeDecl = klass;
66
+
67
+ rb_define_method(klass, "attribute_type", attribute_type, 0);
68
+ rb_define_method(klass, "default", default_value, 0);
69
+ rb_define_method(klass, "enumeration", enumeration, 0);
70
+ }
@@ -0,0 +1,9 @@
1
+ #ifndef NOKOGIRI_XML_ATTRIBUTE_DECL
2
+ #define NOKOGIRI_XML_ATTRIBUTE_DECL
3
+
4
+ #include <nokogiri.h>
5
+
6
+ void init_xml_attribute_decl();
7
+
8
+ extern VALUE cNokogiriXmlAttributeDecl;
9
+ #endif
@@ -0,0 +1,62 @@
1
+ #include <xml_cdata.h>
2
+
3
+ /*
4
+ * call-seq:
5
+ * new(document, content)
6
+ *
7
+ * Create a new CDATA element on the +document+ with +content+
8
+ *
9
+ * If +content+ cannot be implicitly converted to a string, this method will
10
+ * raise a TypeError exception.
11
+ */
12
+ static VALUE new(int argc, VALUE *argv, VALUE klass)
13
+ {
14
+ xmlDocPtr xml_doc;
15
+ xmlNodePtr node;
16
+ VALUE doc;
17
+ VALUE content;
18
+ VALUE rest;
19
+ VALUE rb_node;
20
+ xmlChar *content_str = NULL;
21
+ int content_str_len = 0;
22
+
23
+ rb_scan_args(argc, argv, "2*", &doc, &content, &rest);
24
+
25
+ Data_Get_Struct(doc, xmlDoc, xml_doc);
26
+
27
+ if (!NIL_P(content)) {
28
+ content_str = (xmlChar *)StringValuePtr(content);
29
+ content_str_len = RSTRING_LEN(content);
30
+ }
31
+
32
+ node = xmlNewCDataBlock(xml_doc->doc, content_str, content_str_len);
33
+
34
+ nokogiri_root_node(node);
35
+
36
+ rb_node = Nokogiri_wrap_xml_node(klass, node);
37
+ rb_obj_call_init(rb_node, argc, argv);
38
+
39
+ if(rb_block_given_p()) { rb_yield(rb_node); }
40
+
41
+ return rb_node;
42
+ }
43
+
44
+ VALUE cNokogiriXmlCData;
45
+ void init_xml_cdata()
46
+ {
47
+ VALUE nokogiri = rb_define_module("Nokogiri");
48
+ VALUE xml = rb_define_module_under(nokogiri, "XML");
49
+ VALUE node = rb_define_class_under(xml, "Node", rb_cObject);
50
+ VALUE char_data = rb_define_class_under(xml, "CharacterData", node);
51
+ VALUE text = rb_define_class_under(xml, "Text", char_data);
52
+
53
+ /*
54
+ * CData represents a CData node in an xml document.
55
+ */
56
+ VALUE klass = rb_define_class_under(xml, "CDATA", text);
57
+
58
+
59
+ cNokogiriXmlCData = klass;
60
+
61
+ rb_define_singleton_method(klass, "new", new, -1);
62
+ }
@@ -0,0 +1,9 @@
1
+ #ifndef NOKOGIRI_XML_CDATA
2
+ #define NOKOGIRI_XML_CDATA
3
+
4
+ #include <nokogiri.h>
5
+
6
+ void init_xml_cdata();
7
+
8
+ extern VALUE cNokogiriXmlCData;
9
+ #endif
@@ -0,0 +1,69 @@
1
+ #include <xml_comment.h>
2
+
3
+ static ID document_id ;
4
+
5
+ /*
6
+ * call-seq:
7
+ * new(document_or_node, content)
8
+ *
9
+ * Create a new Comment element on the +document+ with +content+.
10
+ * Alternatively, if a +node+ is passed, the +node+'s document is used.
11
+ */
12
+ static VALUE new(int argc, VALUE *argv, VALUE klass)
13
+ {
14
+ xmlDocPtr xml_doc;
15
+ xmlNodePtr node;
16
+ VALUE document;
17
+ VALUE content;
18
+ VALUE rest;
19
+ VALUE rb_node;
20
+
21
+ rb_scan_args(argc, argv, "2*", &document, &content, &rest);
22
+
23
+ if (rb_obj_is_kind_of(document, cNokogiriXmlNode))
24
+ {
25
+ document = rb_funcall(document, document_id, 0);
26
+ }
27
+ else if ( !rb_obj_is_kind_of(document, cNokogiriXmlDocument)
28
+ && !rb_obj_is_kind_of(document, cNokogiriXmlDocumentFragment))
29
+ {
30
+ rb_raise(rb_eArgError, "first argument must be a XML::Document or XML::Node");
31
+ }
32
+
33
+ Data_Get_Struct(document, xmlDoc, xml_doc);
34
+
35
+ node = xmlNewDocComment(
36
+ xml_doc,
37
+ (const xmlChar *)StringValueCStr(content)
38
+ );
39
+
40
+ rb_node = Nokogiri_wrap_xml_node(klass, node);
41
+ rb_obj_call_init(rb_node, argc, argv);
42
+
43
+ nokogiri_root_node(node);
44
+
45
+ if(rb_block_given_p()) rb_yield(rb_node);
46
+
47
+ return rb_node;
48
+ }
49
+
50
+ VALUE cNokogiriXmlComment;
51
+ void init_xml_comment()
52
+ {
53
+ VALUE nokogiri = rb_define_module("Nokogiri");
54
+ VALUE xml = rb_define_module_under(nokogiri, "XML");
55
+ VALUE node = rb_define_class_under(xml, "Node", rb_cObject);
56
+ VALUE char_data = rb_define_class_under(xml, "CharacterData", node);
57
+
58
+ /*
59
+ * Comment represents a comment node in an xml document.
60
+ */
61
+ VALUE klass = rb_define_class_under(xml, "Comment", char_data);
62
+
63
+
64
+ cNokogiriXmlComment = klass;
65
+
66
+ rb_define_singleton_method(klass, "new", new, -1);
67
+
68
+ document_id = rb_intern("document");
69
+ }
@@ -0,0 +1,9 @@
1
+ #ifndef NOKOGIRI_XML_COMMENT
2
+ #define NOKOGIRI_XML_COMMENT
3
+
4
+ #include <nokogiri.h>
5
+
6
+ void init_xml_comment();
7
+
8
+ extern VALUE cNokogiriXmlComment;
9
+ #endif
@@ -0,0 +1,617 @@
1
+ #include <xml_document.h>
2
+
3
+ static int dealloc_node_i(xmlNodePtr key, xmlNodePtr node, xmlDocPtr doc)
4
+ {
5
+ switch(node->type) {
6
+ case XML_ATTRIBUTE_NODE:
7
+ xmlFreePropList((xmlAttrPtr)node);
8
+ break;
9
+ case XML_NAMESPACE_DECL:
10
+ xmlFreeNs((xmlNsPtr)node);
11
+ break;
12
+ case XML_DTD_NODE:
13
+ xmlFreeDtd((xmlDtdPtr)node);
14
+ break;
15
+ default:
16
+ if(node->parent == NULL) {
17
+ xmlAddChild((xmlNodePtr)doc, node);
18
+ }
19
+ }
20
+ return ST_CONTINUE;
21
+ }
22
+
23
+ static void remove_private(xmlNodePtr node)
24
+ {
25
+ xmlNodePtr child;
26
+
27
+ for (child = node->children; child; child = child->next)
28
+ remove_private(child);
29
+
30
+ if ((node->type == XML_ELEMENT_NODE ||
31
+ node->type == XML_XINCLUDE_START ||
32
+ node->type == XML_XINCLUDE_END) &&
33
+ node->properties) {
34
+ for (child = (xmlNodePtr)node->properties; child; child = child->next)
35
+ remove_private(child);
36
+ }
37
+
38
+ node->_private = NULL;
39
+ }
40
+
41
+ static void mark(xmlDocPtr doc)
42
+ {
43
+ nokogiriTuplePtr tuple = (nokogiriTuplePtr)doc->_private;
44
+ if(tuple) {
45
+ rb_gc_mark(tuple->doc);
46
+ rb_gc_mark(tuple->node_cache);
47
+ }
48
+ }
49
+
50
+ static void dealloc(xmlDocPtr doc)
51
+ {
52
+ st_table *node_hash;
53
+
54
+ NOKOGIRI_DEBUG_START(doc);
55
+
56
+ node_hash = DOC_UNLINKED_NODE_HASH(doc);
57
+
58
+ st_foreach(node_hash, dealloc_node_i, (st_data_t)doc);
59
+ st_free_table(node_hash);
60
+
61
+ free(doc->_private);
62
+
63
+ /* When both Nokogiri and libxml-ruby are loaded, make sure that all nodes
64
+ * have their _private pointers cleared. This is to avoid libxml-ruby's
65
+ * xmlDeregisterNode callback from accessing VALUE pointers from ruby's GC
66
+ * free context, which can result in segfaults.
67
+ */
68
+ if (xmlDeregisterNodeDefaultValue)
69
+ remove_private((xmlNodePtr)doc);
70
+
71
+ xmlFreeDoc(doc);
72
+
73
+ NOKOGIRI_DEBUG_END(doc);
74
+ }
75
+
76
+ static void recursively_remove_namespaces_from_node(xmlNodePtr node)
77
+ {
78
+ xmlNodePtr child ;
79
+ xmlAttrPtr property ;
80
+
81
+ xmlSetNs(node, NULL);
82
+
83
+ for (child = node->children ; child ; child = child->next)
84
+ recursively_remove_namespaces_from_node(child);
85
+
86
+ if (((node->type == XML_ELEMENT_NODE) ||
87
+ (node->type == XML_XINCLUDE_START) ||
88
+ (node->type == XML_XINCLUDE_END)) &&
89
+ node->nsDef) {
90
+ xmlFreeNsList(node->nsDef);
91
+ node->nsDef = NULL;
92
+ }
93
+
94
+ if (node->type == XML_ELEMENT_NODE && node->properties != NULL) {
95
+ property = node->properties ;
96
+ while (property != NULL) {
97
+ if (property->ns) property->ns = NULL ;
98
+ property = property->next ;
99
+ }
100
+ }
101
+ }
102
+
103
+ /*
104
+ * call-seq:
105
+ * url
106
+ *
107
+ * Get the url name for this document.
108
+ */
109
+ static VALUE url(VALUE self)
110
+ {
111
+ xmlDocPtr doc;
112
+ Data_Get_Struct(self, xmlDoc, doc);
113
+
114
+ if(doc->URL) return NOKOGIRI_STR_NEW2(doc->URL);
115
+
116
+ return Qnil;
117
+ }
118
+
119
+ /*
120
+ * call-seq:
121
+ * root=
122
+ *
123
+ * Set the root element on this document
124
+ */
125
+ static VALUE set_root(VALUE self, VALUE root)
126
+ {
127
+ xmlDocPtr doc;
128
+ xmlNodePtr new_root;
129
+ xmlNodePtr old_root;
130
+
131
+ Data_Get_Struct(self, xmlDoc, doc);
132
+
133
+ old_root = NULL;
134
+
135
+ if(NIL_P(root)) {
136
+ old_root = xmlDocGetRootElement(doc);
137
+
138
+ if(old_root) {
139
+ xmlUnlinkNode(old_root);
140
+ nokogiri_root_node(old_root);
141
+ }
142
+
143
+ return root;
144
+ }
145
+
146
+ Data_Get_Struct(root, xmlNode, new_root);
147
+
148
+
149
+ /* If the new root's document is not the same as the current document,
150
+ * then we need to dup the node in to this document. */
151
+ if(new_root->doc != doc) {
152
+ old_root = xmlDocGetRootElement(doc);
153
+ if (!(new_root = xmlDocCopyNode(new_root, doc, 1))) {
154
+ rb_raise(rb_eRuntimeError, "Could not reparent node (xmlDocCopyNode)");
155
+ }
156
+ }
157
+
158
+ xmlDocSetRootElement(doc, new_root);
159
+ if(old_root) nokogiri_root_node(old_root);
160
+ return root;
161
+ }
162
+
163
+ /*
164
+ * call-seq:
165
+ * root
166
+ *
167
+ * Get the root node for this document.
168
+ */
169
+ static VALUE root(VALUE self)
170
+ {
171
+ xmlDocPtr doc;
172
+ xmlNodePtr root;
173
+
174
+ Data_Get_Struct(self, xmlDoc, doc);
175
+
176
+ root = xmlDocGetRootElement(doc);
177
+
178
+ if(!root) return Qnil;
179
+ return Nokogiri_wrap_xml_node(Qnil, root) ;
180
+ }
181
+
182
+ /*
183
+ * call-seq:
184
+ * encoding= encoding
185
+ *
186
+ * Set the encoding string for this Document
187
+ */
188
+ static VALUE set_encoding(VALUE self, VALUE encoding)
189
+ {
190
+ xmlDocPtr doc;
191
+ Data_Get_Struct(self, xmlDoc, doc);
192
+
193
+ if (doc->encoding)
194
+ free((char *)(uintptr_t) doc->encoding); /* avoid gcc cast warning */
195
+
196
+ doc->encoding = xmlStrdup((xmlChar *)StringValueCStr(encoding));
197
+
198
+ return encoding;
199
+ }
200
+
201
+ /*
202
+ * call-seq:
203
+ * encoding
204
+ *
205
+ * Get the encoding for this Document
206
+ */
207
+ static VALUE encoding(VALUE self)
208
+ {
209
+ xmlDocPtr doc;
210
+ Data_Get_Struct(self, xmlDoc, doc);
211
+
212
+ if(!doc->encoding) return Qnil;
213
+ return NOKOGIRI_STR_NEW2(doc->encoding);
214
+ }
215
+
216
+ /*
217
+ * call-seq:
218
+ * version
219
+ *
220
+ * Get the XML version for this Document
221
+ */
222
+ static VALUE version(VALUE self)
223
+ {
224
+ xmlDocPtr doc;
225
+ Data_Get_Struct(self, xmlDoc, doc);
226
+
227
+ if(!doc->version) return Qnil;
228
+ return NOKOGIRI_STR_NEW2(doc->version);
229
+ }
230
+
231
+ /*
232
+ * call-seq:
233
+ * read_io(io, url, encoding, options)
234
+ *
235
+ * Create a new document from an IO object
236
+ */
237
+ static VALUE read_io( VALUE klass,
238
+ VALUE io,
239
+ VALUE url,
240
+ VALUE encoding,
241
+ VALUE options )
242
+ {
243
+ const char * c_url = NIL_P(url) ? NULL : StringValueCStr(url);
244
+ const char * c_enc = NIL_P(encoding) ? NULL : StringValueCStr(encoding);
245
+ VALUE error_list = rb_ary_new();
246
+ VALUE document;
247
+ xmlDocPtr doc;
248
+
249
+ xmlResetLastError();
250
+ xmlSetStructuredErrorFunc((void *)error_list, Nokogiri_error_array_pusher);
251
+
252
+ doc = xmlReadIO(
253
+ (xmlInputReadCallback)io_read_callback,
254
+ (xmlInputCloseCallback)io_close_callback,
255
+ (void *)io,
256
+ c_url,
257
+ c_enc,
258
+ (int)NUM2INT(options)
259
+ );
260
+ xmlSetStructuredErrorFunc(NULL, NULL);
261
+
262
+ if(doc == NULL) {
263
+ xmlErrorPtr error;
264
+
265
+ xmlFreeDoc(doc);
266
+
267
+ error = xmlGetLastError();
268
+ if(error)
269
+ rb_exc_raise(Nokogiri_wrap_xml_syntax_error(error));
270
+ else
271
+ rb_raise(rb_eRuntimeError, "Could not parse document");
272
+
273
+ return Qnil;
274
+ }
275
+
276
+ document = Nokogiri_wrap_xml_document(klass, doc);
277
+ rb_iv_set(document, "@errors", error_list);
278
+ return document;
279
+ }
280
+
281
+ /*
282
+ * call-seq:
283
+ * read_memory(string, url, encoding, options)
284
+ *
285
+ * Create a new document from a String
286
+ */
287
+ static VALUE read_memory( VALUE klass,
288
+ VALUE string,
289
+ VALUE url,
290
+ VALUE encoding,
291
+ VALUE options )
292
+ {
293
+ const char * c_buffer = StringValuePtr(string);
294
+ const char * c_url = NIL_P(url) ? NULL : StringValueCStr(url);
295
+ const char * c_enc = NIL_P(encoding) ? NULL : StringValueCStr(encoding);
296
+ int len = (int)RSTRING_LEN(string);
297
+ VALUE error_list = rb_ary_new();
298
+ VALUE document;
299
+ xmlDocPtr doc;
300
+
301
+ xmlResetLastError();
302
+ xmlSetStructuredErrorFunc((void *)error_list, Nokogiri_error_array_pusher);
303
+ doc = xmlReadMemory(c_buffer, len, c_url, c_enc, (int)NUM2INT(options));
304
+ xmlSetStructuredErrorFunc(NULL, NULL);
305
+
306
+ if(doc == NULL) {
307
+ xmlErrorPtr error;
308
+
309
+ xmlFreeDoc(doc);
310
+
311
+ error = xmlGetLastError();
312
+ if(error)
313
+ rb_exc_raise(Nokogiri_wrap_xml_syntax_error(error));
314
+ else
315
+ rb_raise(rb_eRuntimeError, "Could not parse document");
316
+
317
+ return Qnil;
318
+ }
319
+
320
+ document = Nokogiri_wrap_xml_document(klass, doc);
321
+ rb_iv_set(document, "@errors", error_list);
322
+ return document;
323
+ }
324
+
325
+ /*
326
+ * call-seq:
327
+ * dup
328
+ *
329
+ * Copy this Document. An optional depth may be passed in, but it defaults
330
+ * to a deep copy. 0 is a shallow copy, 1 is a deep copy.
331
+ */
332
+ static VALUE duplicate_document(int argc, VALUE *argv, VALUE self)
333
+ {
334
+ xmlDocPtr doc, dup;
335
+ VALUE copy;
336
+ VALUE level;
337
+ VALUE error_list;
338
+
339
+ if(rb_scan_args(argc, argv, "01", &level) == 0)
340
+ level = INT2NUM((long)1);
341
+
342
+ Data_Get_Struct(self, xmlDoc, doc);
343
+
344
+ dup = xmlCopyDoc(doc, (int)NUM2INT(level));
345
+
346
+ if(dup == NULL) return Qnil;
347
+
348
+ dup->type = doc->type;
349
+ copy = Nokogiri_wrap_xml_document(rb_obj_class(self), dup);
350
+ error_list = rb_iv_get(self, "@errors");
351
+ rb_iv_set(copy, "@errors", error_list);
352
+ return copy ;
353
+ }
354
+
355
+ /*
356
+ * call-seq:
357
+ * new(version = default)
358
+ *
359
+ * Create a new document with +version+ (defaults to "1.0")
360
+ */
361
+ static VALUE new(int argc, VALUE *argv, VALUE klass)
362
+ {
363
+ xmlDocPtr doc;
364
+ VALUE version, rest, rb_doc ;
365
+
366
+ rb_scan_args(argc, argv, "0*", &rest);
367
+ version = rb_ary_entry(rest, (long)0);
368
+ if (NIL_P(version)) version = rb_str_new2("1.0");
369
+
370
+ doc = xmlNewDoc((xmlChar *)StringValueCStr(version));
371
+ rb_doc = Nokogiri_wrap_xml_document(klass, doc);
372
+ rb_obj_call_init(rb_doc, argc, argv);
373
+ return rb_doc ;
374
+ }
375
+
376
+ /*
377
+ * call-seq:
378
+ * remove_namespaces!
379
+ *
380
+ * Remove all namespaces from all nodes in the document.
381
+ *
382
+ * This could be useful for developers who either don't understand namespaces
383
+ * or don't care about them.
384
+ *
385
+ * The following example shows a use case, and you can decide for yourself
386
+ * whether this is a good thing or not:
387
+ *
388
+ * doc = Nokogiri::XML <<-EOXML
389
+ * <root>
390
+ * <car xmlns:part="http://general-motors.com/">
391
+ * <part:tire>Michelin Model XGV</part:tire>
392
+ * </car>
393
+ * <bicycle xmlns:part="http://schwinn.com/">
394
+ * <part:tire>I'm a bicycle tire!</part:tire>
395
+ * </bicycle>
396
+ * </root>
397
+ * EOXML
398
+ *
399
+ * doc.xpath("//tire").to_s # => ""
400
+ * doc.xpath("//part:tire", "part" => "http://general-motors.com/").to_s # => "<part:tire>Michelin Model XGV</part:tire>"
401
+ * doc.xpath("//part:tire", "part" => "http://schwinn.com/").to_s # => "<part:tire>I'm a bicycle tire!</part:tire>"
402
+ *
403
+ * doc.remove_namespaces!
404
+ *
405
+ * doc.xpath("//tire").to_s # => "<tire>Michelin Model XGV</tire><tire>I'm a bicycle tire!</tire>"
406
+ * doc.xpath("//part:tire", "part" => "http://general-motors.com/").to_s # => ""
407
+ * doc.xpath("//part:tire", "part" => "http://schwinn.com/").to_s # => ""
408
+ *
409
+ * For more information on why this probably is *not* a good thing in general,
410
+ * please direct your browser to
411
+ * http://tenderlovemaking.com/2009/04/23/namespaces-in-xml.html
412
+ */
413
+ VALUE remove_namespaces_bang(VALUE self)
414
+ {
415
+ xmlDocPtr doc ;
416
+ Data_Get_Struct(self, xmlDoc, doc);
417
+
418
+ recursively_remove_namespaces_from_node((xmlNodePtr)doc);
419
+ return self;
420
+ }
421
+
422
+ /* call-seq: doc.create_entity(name, type, external_id, system_id, content)
423
+ *
424
+ * Create a new entity named +name+.
425
+ *
426
+ * +type+ is an integer representing the type of entity to be created, and it
427
+ * defaults to Nokogiri::XML::EntityDecl::INTERNAL_GENERAL. See
428
+ * the constants on Nokogiri::XML::EntityDecl for more information.
429
+ *
430
+ * +external_id+, +system_id+, and +content+ set the External ID, System ID,
431
+ * and content respectively. All of these parameters are optional.
432
+ */
433
+ static VALUE create_entity(int argc, VALUE *argv, VALUE self)
434
+ {
435
+ VALUE name;
436
+ VALUE type;
437
+ VALUE external_id;
438
+ VALUE system_id;
439
+ VALUE content;
440
+ xmlEntityPtr ptr;
441
+ xmlDocPtr doc ;
442
+
443
+ Data_Get_Struct(self, xmlDoc, doc);
444
+
445
+ rb_scan_args(argc, argv, "14", &name, &type, &external_id, &system_id,
446
+ &content);
447
+
448
+ xmlResetLastError();
449
+ ptr = xmlAddDocEntity(
450
+ doc,
451
+ (xmlChar *)(NIL_P(name) ? NULL : StringValueCStr(name)),
452
+ (int) (NIL_P(type) ? XML_INTERNAL_GENERAL_ENTITY : NUM2INT(type)),
453
+ (xmlChar *)(NIL_P(external_id) ? NULL : StringValueCStr(external_id)),
454
+ (xmlChar *)(NIL_P(system_id) ? NULL : StringValueCStr(system_id)),
455
+ (xmlChar *)(NIL_P(content) ? NULL : StringValueCStr(content))
456
+ );
457
+
458
+ if(NULL == ptr) {
459
+ xmlErrorPtr error = xmlGetLastError();
460
+ if(error)
461
+ rb_exc_raise(Nokogiri_wrap_xml_syntax_error(error));
462
+ else
463
+ rb_raise(rb_eRuntimeError, "Could not create entity");
464
+
465
+ return Qnil;
466
+ }
467
+
468
+ return Nokogiri_wrap_xml_node(cNokogiriXmlEntityDecl, (xmlNodePtr)ptr);
469
+ }
470
+
471
+ static int block_caller(void * ctx, xmlNodePtr _node, xmlNodePtr _parent)
472
+ {
473
+ VALUE block;
474
+ VALUE node;
475
+ VALUE parent;
476
+ VALUE ret;
477
+
478
+ if(_node->type == XML_NAMESPACE_DECL){
479
+ node = Nokogiri_wrap_xml_namespace(_parent->doc, (xmlNsPtr) _node);
480
+ }
481
+ else{
482
+ node = Nokogiri_wrap_xml_node(Qnil, _node);
483
+ }
484
+ parent = _parent ? Nokogiri_wrap_xml_node(Qnil, _parent) : Qnil;
485
+ block = (VALUE)ctx;
486
+
487
+ ret = rb_funcall(block, rb_intern("call"), 2, node, parent);
488
+
489
+ if(Qfalse == ret || Qnil == ret) return 0;
490
+
491
+ return 1;
492
+ }
493
+
494
+ /* call-seq:
495
+ * doc.canonicalize(mode=XML_C14N_1_0,inclusive_namespaces=nil,with_comments=false)
496
+ * doc.canonicalize { |obj, parent| ... }
497
+ *
498
+ * Canonicalize a document and return the results. Takes an optional block
499
+ * that takes two parameters: the +obj+ and that node's +parent+.
500
+ * The +obj+ will be either a Nokogiri::XML::Node, or a Nokogiri::XML::Namespace
501
+ * The block must return a non-nil, non-false value if the +obj+ passed in
502
+ * should be included in the canonicalized document.
503
+ */
504
+ static VALUE canonicalize(int argc, VALUE* argv, VALUE self)
505
+ {
506
+ VALUE mode;
507
+ VALUE incl_ns;
508
+ VALUE with_comments;
509
+ xmlChar **ns;
510
+ long ns_len, i;
511
+
512
+ xmlDocPtr doc;
513
+ xmlOutputBufferPtr buf;
514
+ xmlC14NIsVisibleCallback cb = NULL;
515
+ void * ctx = NULL;
516
+
517
+ VALUE rb_cStringIO;
518
+ VALUE io;
519
+
520
+ rb_scan_args(argc, argv, "03", &mode, &incl_ns, &with_comments);
521
+
522
+ Data_Get_Struct(self, xmlDoc, doc);
523
+
524
+ rb_cStringIO = rb_const_get_at(rb_cObject, rb_intern("StringIO"));
525
+ io = rb_class_new_instance(0, 0, rb_cStringIO);
526
+ buf = xmlAllocOutputBuffer(NULL);
527
+
528
+ buf->writecallback = (xmlOutputWriteCallback)io_write_callback;
529
+ buf->closecallback = (xmlOutputCloseCallback)io_close_callback;
530
+ buf->context = (void *)io;
531
+
532
+ if(rb_block_given_p()) {
533
+ cb = block_caller;
534
+ ctx = (void *)rb_block_proc();
535
+ }
536
+
537
+ if(NIL_P(incl_ns)){
538
+ ns = NULL;
539
+ }
540
+ else{
541
+ Check_Type(incl_ns, T_ARRAY);
542
+ ns_len = RARRAY_LEN(incl_ns);
543
+ ns = calloc((size_t)ns_len+1, sizeof(xmlChar *));
544
+ for (i = 0 ; i < ns_len ; i++) {
545
+ VALUE entry = rb_ary_entry(incl_ns, i);
546
+ ns[i] = (xmlChar*)StringValueCStr(entry);
547
+ }
548
+ }
549
+
550
+
551
+ xmlC14NExecute(doc, cb, ctx,
552
+ (int) (NIL_P(mode) ? 0 : NUM2INT(mode)),
553
+ ns,
554
+ (int) RTEST(with_comments),
555
+ buf);
556
+
557
+ xmlOutputBufferClose(buf);
558
+
559
+ return rb_funcall(io, rb_intern("string"), 0);
560
+ }
561
+
562
+ VALUE cNokogiriXmlDocument ;
563
+ void init_xml_document()
564
+ {
565
+ VALUE nokogiri = rb_define_module("Nokogiri");
566
+ VALUE xml = rb_define_module_under(nokogiri, "XML");
567
+ VALUE node = rb_define_class_under(xml, "Node", rb_cObject);
568
+
569
+ /*
570
+ * Nokogiri::XML::Document wraps an xml document.
571
+ */
572
+ VALUE klass = rb_define_class_under(xml, "Document", node);
573
+
574
+ cNokogiriXmlDocument = klass;
575
+
576
+ rb_define_singleton_method(klass, "read_memory", read_memory, 4);
577
+ rb_define_singleton_method(klass, "read_io", read_io, 4);
578
+ rb_define_singleton_method(klass, "new", new, -1);
579
+
580
+ rb_define_method(klass, "root", root, 0);
581
+ rb_define_method(klass, "root=", set_root, 1);
582
+ rb_define_method(klass, "encoding", encoding, 0);
583
+ rb_define_method(klass, "encoding=", set_encoding, 1);
584
+ rb_define_method(klass, "version", version, 0);
585
+ rb_define_method(klass, "canonicalize", canonicalize, -1);
586
+ rb_define_method(klass, "dup", duplicate_document, -1);
587
+ rb_define_method(klass, "url", url, 0);
588
+ rb_define_method(klass, "create_entity", create_entity, -1);
589
+ rb_define_method(klass, "remove_namespaces!", remove_namespaces_bang, 0);
590
+ }
591
+
592
+
593
+ /* this takes klass as a param because it's used for HtmlDocument, too. */
594
+ VALUE Nokogiri_wrap_xml_document(VALUE klass, xmlDocPtr doc)
595
+ {
596
+ nokogiriTuplePtr tuple = (nokogiriTuplePtr)malloc(sizeof(nokogiriTuple));
597
+
598
+ VALUE rb_doc = Data_Wrap_Struct(
599
+ klass ? klass : cNokogiriXmlDocument,
600
+ mark,
601
+ dealloc,
602
+ doc
603
+ );
604
+
605
+ VALUE cache = rb_ary_new();
606
+ rb_iv_set(rb_doc, "@decorators", Qnil);
607
+ rb_iv_set(rb_doc, "@node_cache", cache);
608
+
609
+ tuple->doc = rb_doc;
610
+ tuple->unlinkedNodes = st_init_numtable_with_size(128);
611
+ tuple->node_cache = cache;
612
+ doc->_private = tuple ;
613
+
614
+ rb_obj_call_init(rb_doc, 0, NULL);
615
+
616
+ return rb_doc ;
617
+ }