nokogiri-backport 1.11.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (239) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile +3 -0
  3. data/LICENSE-DEPENDENCIES.md +1682 -0
  4. data/LICENSE.md +9 -0
  5. data/README.md +272 -0
  6. data/bin/nokogiri +118 -0
  7. data/dependencies.yml +74 -0
  8. data/ext/java/nokogiri/EncodingHandler.java +124 -0
  9. data/ext/java/nokogiri/HtmlDocument.java +178 -0
  10. data/ext/java/nokogiri/HtmlElementDescription.java +148 -0
  11. data/ext/java/nokogiri/HtmlEntityLookup.java +79 -0
  12. data/ext/java/nokogiri/HtmlSaxParserContext.java +282 -0
  13. data/ext/java/nokogiri/HtmlSaxPushParser.java +222 -0
  14. data/ext/java/nokogiri/NokogiriService.java +597 -0
  15. data/ext/java/nokogiri/XmlAttr.java +162 -0
  16. data/ext/java/nokogiri/XmlAttributeDecl.java +129 -0
  17. data/ext/java/nokogiri/XmlCdata.java +82 -0
  18. data/ext/java/nokogiri/XmlComment.java +97 -0
  19. data/ext/java/nokogiri/XmlDocument.java +633 -0
  20. data/ext/java/nokogiri/XmlDocumentFragment.java +185 -0
  21. data/ext/java/nokogiri/XmlDtd.java +481 -0
  22. data/ext/java/nokogiri/XmlElement.java +68 -0
  23. data/ext/java/nokogiri/XmlElementContent.java +382 -0
  24. data/ext/java/nokogiri/XmlElementDecl.java +147 -0
  25. data/ext/java/nokogiri/XmlEntityDecl.java +157 -0
  26. data/ext/java/nokogiri/XmlEntityReference.java +101 -0
  27. data/ext/java/nokogiri/XmlNamespace.java +199 -0
  28. data/ext/java/nokogiri/XmlNode.java +1684 -0
  29. data/ext/java/nokogiri/XmlNodeSet.java +434 -0
  30. data/ext/java/nokogiri/XmlProcessingInstruction.java +100 -0
  31. data/ext/java/nokogiri/XmlReader.java +531 -0
  32. data/ext/java/nokogiri/XmlRelaxng.java +151 -0
  33. data/ext/java/nokogiri/XmlSaxParserContext.java +374 -0
  34. data/ext/java/nokogiri/XmlSaxPushParser.java +286 -0
  35. data/ext/java/nokogiri/XmlSchema.java +388 -0
  36. data/ext/java/nokogiri/XmlSyntaxError.java +138 -0
  37. data/ext/java/nokogiri/XmlText.java +110 -0
  38. data/ext/java/nokogiri/XmlXpathContext.java +301 -0
  39. data/ext/java/nokogiri/XsltStylesheet.java +347 -0
  40. data/ext/java/nokogiri/internals/ClosedStreamException.java +10 -0
  41. data/ext/java/nokogiri/internals/HtmlDomParserContext.java +252 -0
  42. data/ext/java/nokogiri/internals/IgnoreSchemaErrorsErrorHandler.java +20 -0
  43. data/ext/java/nokogiri/internals/NokogiriBlockingQueueInputStream.java +151 -0
  44. data/ext/java/nokogiri/internals/NokogiriDomParser.java +116 -0
  45. data/ext/java/nokogiri/internals/NokogiriEntityResolver.java +121 -0
  46. data/ext/java/nokogiri/internals/NokogiriErrorHandler.java +69 -0
  47. data/ext/java/nokogiri/internals/NokogiriHandler.java +327 -0
  48. data/ext/java/nokogiri/internals/NokogiriHelpers.java +734 -0
  49. data/ext/java/nokogiri/internals/NokogiriNamespaceCache.java +217 -0
  50. data/ext/java/nokogiri/internals/NokogiriNamespaceContext.java +127 -0
  51. data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler.java +100 -0
  52. data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler4NekoHtml.java +121 -0
  53. data/ext/java/nokogiri/internals/NokogiriStrictErrorHandler.java +78 -0
  54. data/ext/java/nokogiri/internals/NokogiriXPathFunction.java +180 -0
  55. data/ext/java/nokogiri/internals/NokogiriXPathFunctionResolver.java +72 -0
  56. data/ext/java/nokogiri/internals/NokogiriXPathVariableResolver.java +60 -0
  57. data/ext/java/nokogiri/internals/NokogiriXsltErrorListener.java +87 -0
  58. data/ext/java/nokogiri/internals/ParserContext.java +259 -0
  59. data/ext/java/nokogiri/internals/ReaderNode.java +488 -0
  60. data/ext/java/nokogiri/internals/SaveContextVisitor.java +778 -0
  61. data/ext/java/nokogiri/internals/SchemaErrorHandler.java +73 -0
  62. data/ext/java/nokogiri/internals/XalanDTMManagerPatch.java +168 -0
  63. data/ext/java/nokogiri/internals/XmlDeclHandler.java +42 -0
  64. data/ext/java/nokogiri/internals/XmlDomParserContext.java +274 -0
  65. data/ext/java/nokogiri/internals/XmlSaxParser.java +65 -0
  66. data/ext/java/nokogiri/internals/c14n/AttrCompare.java +119 -0
  67. data/ext/java/nokogiri/internals/c14n/C14nHelper.java +159 -0
  68. data/ext/java/nokogiri/internals/c14n/CanonicalFilter.java +37 -0
  69. data/ext/java/nokogiri/internals/c14n/CanonicalizationException.java +93 -0
  70. data/ext/java/nokogiri/internals/c14n/Canonicalizer.java +252 -0
  71. data/ext/java/nokogiri/internals/c14n/Canonicalizer11.java +639 -0
  72. data/ext/java/nokogiri/internals/c14n/Canonicalizer11_OmitComments.java +38 -0
  73. data/ext/java/nokogiri/internals/c14n/Canonicalizer11_WithComments.java +38 -0
  74. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315.java +367 -0
  75. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315Excl.java +295 -0
  76. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315ExclOmitComments.java +40 -0
  77. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315ExclWithComments.java +44 -0
  78. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315OmitComments.java +44 -0
  79. data/ext/java/nokogiri/internals/c14n/Canonicalizer20010315WithComments.java +43 -0
  80. data/ext/java/nokogiri/internals/c14n/CanonicalizerBase.java +630 -0
  81. data/ext/java/nokogiri/internals/c14n/CanonicalizerPhysical.java +173 -0
  82. data/ext/java/nokogiri/internals/c14n/CanonicalizerSpi.java +76 -0
  83. data/ext/java/nokogiri/internals/c14n/Constants.java +42 -0
  84. data/ext/java/nokogiri/internals/c14n/ElementProxy.java +293 -0
  85. data/ext/java/nokogiri/internals/c14n/HelperNodeList.java +93 -0
  86. data/ext/java/nokogiri/internals/c14n/IgnoreAllErrorHandler.java +79 -0
  87. data/ext/java/nokogiri/internals/c14n/InclusiveNamespaces.java +166 -0
  88. data/ext/java/nokogiri/internals/c14n/InvalidCanonicalizerException.java +76 -0
  89. data/ext/java/nokogiri/internals/c14n/NameSpaceSymbTable.java +402 -0
  90. data/ext/java/nokogiri/internals/c14n/NodeFilter.java +51 -0
  91. data/ext/java/nokogiri/internals/c14n/UtfHelpper.java +179 -0
  92. data/ext/java/nokogiri/internals/c14n/XMLUtils.java +507 -0
  93. data/ext/java/nokogiri/internals/dom2dtm/DOM2DTM.java +1745 -0
  94. data/ext/java/nokogiri/internals/dom2dtm/DOM2DTMdefaultNamespaceDeclarationNode.java +685 -0
  95. data/ext/nokogiri/depend +477 -0
  96. data/ext/nokogiri/extconf.rb +836 -0
  97. data/ext/nokogiri/html_document.c +171 -0
  98. data/ext/nokogiri/html_document.h +10 -0
  99. data/ext/nokogiri/html_element_description.c +279 -0
  100. data/ext/nokogiri/html_element_description.h +10 -0
  101. data/ext/nokogiri/html_entity_lookup.c +32 -0
  102. data/ext/nokogiri/html_entity_lookup.h +8 -0
  103. data/ext/nokogiri/html_sax_parser_context.c +116 -0
  104. data/ext/nokogiri/html_sax_parser_context.h +11 -0
  105. data/ext/nokogiri/html_sax_push_parser.c +87 -0
  106. data/ext/nokogiri/html_sax_push_parser.h +9 -0
  107. data/ext/nokogiri/nokogiri.c +135 -0
  108. data/ext/nokogiri/nokogiri.h +130 -0
  109. data/ext/nokogiri/xml_attr.c +103 -0
  110. data/ext/nokogiri/xml_attr.h +9 -0
  111. data/ext/nokogiri/xml_attribute_decl.c +70 -0
  112. data/ext/nokogiri/xml_attribute_decl.h +9 -0
  113. data/ext/nokogiri/xml_cdata.c +62 -0
  114. data/ext/nokogiri/xml_cdata.h +9 -0
  115. data/ext/nokogiri/xml_comment.c +69 -0
  116. data/ext/nokogiri/xml_comment.h +9 -0
  117. data/ext/nokogiri/xml_document.c +622 -0
  118. data/ext/nokogiri/xml_document.h +23 -0
  119. data/ext/nokogiri/xml_document_fragment.c +48 -0
  120. data/ext/nokogiri/xml_document_fragment.h +10 -0
  121. data/ext/nokogiri/xml_dtd.c +202 -0
  122. data/ext/nokogiri/xml_dtd.h +10 -0
  123. data/ext/nokogiri/xml_element_content.c +123 -0
  124. data/ext/nokogiri/xml_element_content.h +10 -0
  125. data/ext/nokogiri/xml_element_decl.c +69 -0
  126. data/ext/nokogiri/xml_element_decl.h +9 -0
  127. data/ext/nokogiri/xml_encoding_handler.c +79 -0
  128. data/ext/nokogiri/xml_encoding_handler.h +8 -0
  129. data/ext/nokogiri/xml_entity_decl.c +110 -0
  130. data/ext/nokogiri/xml_entity_decl.h +10 -0
  131. data/ext/nokogiri/xml_entity_reference.c +52 -0
  132. data/ext/nokogiri/xml_entity_reference.h +9 -0
  133. data/ext/nokogiri/xml_io.c +63 -0
  134. data/ext/nokogiri/xml_io.h +11 -0
  135. data/ext/nokogiri/xml_libxml2_hacks.c +112 -0
  136. data/ext/nokogiri/xml_libxml2_hacks.h +12 -0
  137. data/ext/nokogiri/xml_namespace.c +111 -0
  138. data/ext/nokogiri/xml_namespace.h +14 -0
  139. data/ext/nokogiri/xml_node.c +1773 -0
  140. data/ext/nokogiri/xml_node.h +13 -0
  141. data/ext/nokogiri/xml_node_set.c +486 -0
  142. data/ext/nokogiri/xml_node_set.h +12 -0
  143. data/ext/nokogiri/xml_processing_instruction.c +56 -0
  144. data/ext/nokogiri/xml_processing_instruction.h +9 -0
  145. data/ext/nokogiri/xml_reader.c +657 -0
  146. data/ext/nokogiri/xml_reader.h +10 -0
  147. data/ext/nokogiri/xml_relax_ng.c +179 -0
  148. data/ext/nokogiri/xml_relax_ng.h +9 -0
  149. data/ext/nokogiri/xml_sax_parser.c +305 -0
  150. data/ext/nokogiri/xml_sax_parser.h +39 -0
  151. data/ext/nokogiri/xml_sax_parser_context.c +262 -0
  152. data/ext/nokogiri/xml_sax_parser_context.h +10 -0
  153. data/ext/nokogiri/xml_sax_push_parser.c +159 -0
  154. data/ext/nokogiri/xml_sax_push_parser.h +9 -0
  155. data/ext/nokogiri/xml_schema.c +276 -0
  156. data/ext/nokogiri/xml_schema.h +9 -0
  157. data/ext/nokogiri/xml_syntax_error.c +64 -0
  158. data/ext/nokogiri/xml_syntax_error.h +13 -0
  159. data/ext/nokogiri/xml_text.c +52 -0
  160. data/ext/nokogiri/xml_text.h +9 -0
  161. data/ext/nokogiri/xml_xpath_context.c +374 -0
  162. data/ext/nokogiri/xml_xpath_context.h +10 -0
  163. data/ext/nokogiri/xslt_stylesheet.c +263 -0
  164. data/ext/nokogiri/xslt_stylesheet.h +14 -0
  165. data/lib/isorelax.jar +0 -0
  166. data/lib/jing.jar +0 -0
  167. data/lib/nekodtd.jar +0 -0
  168. data/lib/nekohtml.jar +0 -0
  169. data/lib/nokogiri/css/node.rb +53 -0
  170. data/lib/nokogiri/css/parser.rb +751 -0
  171. data/lib/nokogiri/css/parser.y +272 -0
  172. data/lib/nokogiri/css/parser_extras.rb +94 -0
  173. data/lib/nokogiri/css/syntax_error.rb +8 -0
  174. data/lib/nokogiri/css/tokenizer.rb +154 -0
  175. data/lib/nokogiri/css/tokenizer.rex +55 -0
  176. data/lib/nokogiri/css/xpath_visitor.rb +260 -0
  177. data/lib/nokogiri/css.rb +28 -0
  178. data/lib/nokogiri/decorators/slop.rb +43 -0
  179. data/lib/nokogiri/html/builder.rb +36 -0
  180. data/lib/nokogiri/html/document.rb +322 -0
  181. data/lib/nokogiri/html/document_fragment.rb +50 -0
  182. data/lib/nokogiri/html/element_description.rb +24 -0
  183. data/lib/nokogiri/html/element_description_defaults.rb +672 -0
  184. data/lib/nokogiri/html/entity_lookup.rb +14 -0
  185. data/lib/nokogiri/html/sax/parser.rb +63 -0
  186. data/lib/nokogiri/html/sax/parser_context.rb +17 -0
  187. data/lib/nokogiri/html/sax/push_parser.rb +37 -0
  188. data/lib/nokogiri/html.rb +38 -0
  189. data/lib/nokogiri/jruby/dependencies.rb +20 -0
  190. data/lib/nokogiri/syntax_error.rb +5 -0
  191. data/lib/nokogiri/version/constant.rb +5 -0
  192. data/lib/nokogiri/version/info.rb +182 -0
  193. data/lib/nokogiri/version.rb +3 -0
  194. data/lib/nokogiri/xml/attr.rb +15 -0
  195. data/lib/nokogiri/xml/attribute_decl.rb +19 -0
  196. data/lib/nokogiri/xml/builder.rb +447 -0
  197. data/lib/nokogiri/xml/cdata.rb +12 -0
  198. data/lib/nokogiri/xml/character_data.rb +8 -0
  199. data/lib/nokogiri/xml/document.rb +290 -0
  200. data/lib/nokogiri/xml/document_fragment.rb +159 -0
  201. data/lib/nokogiri/xml/dtd.rb +33 -0
  202. data/lib/nokogiri/xml/element_content.rb +37 -0
  203. data/lib/nokogiri/xml/element_decl.rb +14 -0
  204. data/lib/nokogiri/xml/entity_decl.rb +20 -0
  205. data/lib/nokogiri/xml/entity_reference.rb +19 -0
  206. data/lib/nokogiri/xml/namespace.rb +14 -0
  207. data/lib/nokogiri/xml/node/save_options.rb +62 -0
  208. data/lib/nokogiri/xml/node.rb +1240 -0
  209. data/lib/nokogiri/xml/node_set.rb +372 -0
  210. data/lib/nokogiri/xml/notation.rb +7 -0
  211. data/lib/nokogiri/xml/parse_options.rb +127 -0
  212. data/lib/nokogiri/xml/pp/character_data.rb +19 -0
  213. data/lib/nokogiri/xml/pp/node.rb +57 -0
  214. data/lib/nokogiri/xml/pp.rb +3 -0
  215. data/lib/nokogiri/xml/processing_instruction.rb +9 -0
  216. data/lib/nokogiri/xml/reader.rb +116 -0
  217. data/lib/nokogiri/xml/relax_ng.rb +37 -0
  218. data/lib/nokogiri/xml/sax/document.rb +172 -0
  219. data/lib/nokogiri/xml/sax/parser.rb +123 -0
  220. data/lib/nokogiri/xml/sax/parser_context.rb +17 -0
  221. data/lib/nokogiri/xml/sax/push_parser.rb +61 -0
  222. data/lib/nokogiri/xml/sax.rb +5 -0
  223. data/lib/nokogiri/xml/schema.rb +72 -0
  224. data/lib/nokogiri/xml/searchable.rb +239 -0
  225. data/lib/nokogiri/xml/syntax_error.rb +71 -0
  226. data/lib/nokogiri/xml/text.rb +10 -0
  227. data/lib/nokogiri/xml/xpath/syntax_error.rb +12 -0
  228. data/lib/nokogiri/xml/xpath.rb +11 -0
  229. data/lib/nokogiri/xml/xpath_context.rb +17 -0
  230. data/lib/nokogiri/xml.rb +76 -0
  231. data/lib/nokogiri/xslt/stylesheet.rb +26 -0
  232. data/lib/nokogiri/xslt.rb +57 -0
  233. data/lib/nokogiri.rb +144 -0
  234. data/lib/serializer.jar +0 -0
  235. data/lib/xalan.jar +0 -0
  236. data/lib/xercesImpl.jar +0 -0
  237. data/lib/xml-apis.jar +0 -0
  238. data/lib/xsd/xmlparser/nokogiri.rb +103 -0
  239. metadata +531 -0
@@ -0,0 +1,622 @@
1
+ #include <xml_document.h>
2
+
3
+ static int dealloc_node_i2(xmlNodePtr key, xmlNodePtr node, xmlDocPtr doc)
4
+ {
5
+ switch(node->type) {
6
+ case XML_ATTRIBUTE_NODE:
7
+ xmlFreePropList((xmlAttrPtr)node);
8
+ break;
9
+ case XML_NAMESPACE_DECL:
10
+ xmlFreeNs((xmlNsPtr)node);
11
+ break;
12
+ case XML_DTD_NODE:
13
+ xmlFreeDtd((xmlDtdPtr)node);
14
+ break;
15
+ default:
16
+ if(node->parent == NULL) {
17
+ xmlAddChild((xmlNodePtr)doc, node);
18
+ }
19
+ }
20
+ return ST_CONTINUE;
21
+ }
22
+
23
+ static int dealloc_node_i(st_data_t key, st_data_t node, st_data_t doc)
24
+ {
25
+ return dealloc_node_i2((xmlNodePtr)key, (xmlNodePtr)node, (xmlDocPtr)doc);
26
+ }
27
+
28
+ static void remove_private(xmlNodePtr node)
29
+ {
30
+ xmlNodePtr child;
31
+
32
+ for (child = node->children; child; child = child->next)
33
+ remove_private(child);
34
+
35
+ if ((node->type == XML_ELEMENT_NODE ||
36
+ node->type == XML_XINCLUDE_START ||
37
+ node->type == XML_XINCLUDE_END) &&
38
+ node->properties) {
39
+ for (child = (xmlNodePtr)node->properties; child; child = child->next)
40
+ remove_private(child);
41
+ }
42
+
43
+ node->_private = NULL;
44
+ }
45
+
46
+ static void mark(xmlDocPtr doc)
47
+ {
48
+ nokogiriTuplePtr tuple = (nokogiriTuplePtr)doc->_private;
49
+ if(tuple) {
50
+ rb_gc_mark(tuple->doc);
51
+ rb_gc_mark(tuple->node_cache);
52
+ }
53
+ }
54
+
55
+ static void dealloc(xmlDocPtr doc)
56
+ {
57
+ st_table *node_hash;
58
+
59
+ NOKOGIRI_DEBUG_START(doc);
60
+
61
+ node_hash = DOC_UNLINKED_NODE_HASH(doc);
62
+
63
+ st_foreach(node_hash, dealloc_node_i, (st_data_t)doc);
64
+ st_free_table(node_hash);
65
+
66
+ free(doc->_private);
67
+
68
+ /* When both Nokogiri and libxml-ruby are loaded, make sure that all nodes
69
+ * have their _private pointers cleared. This is to avoid libxml-ruby's
70
+ * xmlDeregisterNode callback from accessing VALUE pointers from ruby's GC
71
+ * free context, which can result in segfaults.
72
+ */
73
+ if (xmlDeregisterNodeDefaultValue)
74
+ remove_private((xmlNodePtr)doc);
75
+
76
+ xmlFreeDoc(doc);
77
+
78
+ NOKOGIRI_DEBUG_END(doc);
79
+ }
80
+
81
+ static void recursively_remove_namespaces_from_node(xmlNodePtr node)
82
+ {
83
+ xmlNodePtr child ;
84
+ xmlAttrPtr property ;
85
+
86
+ xmlSetNs(node, NULL);
87
+
88
+ for (child = node->children ; child ; child = child->next)
89
+ recursively_remove_namespaces_from_node(child);
90
+
91
+ if (((node->type == XML_ELEMENT_NODE) ||
92
+ (node->type == XML_XINCLUDE_START) ||
93
+ (node->type == XML_XINCLUDE_END)) &&
94
+ node->nsDef) {
95
+ xmlFreeNsList(node->nsDef);
96
+ node->nsDef = NULL;
97
+ }
98
+
99
+ if (node->type == XML_ELEMENT_NODE && node->properties != NULL) {
100
+ property = node->properties ;
101
+ while (property != NULL) {
102
+ if (property->ns) property->ns = NULL ;
103
+ property = property->next ;
104
+ }
105
+ }
106
+ }
107
+
108
+ /*
109
+ * call-seq:
110
+ * url
111
+ *
112
+ * Get the url name for this document.
113
+ */
114
+ static VALUE url(VALUE self)
115
+ {
116
+ xmlDocPtr doc;
117
+ Data_Get_Struct(self, xmlDoc, doc);
118
+
119
+ if(doc->URL) return NOKOGIRI_STR_NEW2(doc->URL);
120
+
121
+ return Qnil;
122
+ }
123
+
124
+ /*
125
+ * call-seq:
126
+ * root=
127
+ *
128
+ * Set the root element on this document
129
+ */
130
+ static VALUE set_root(VALUE self, VALUE root)
131
+ {
132
+ xmlDocPtr doc;
133
+ xmlNodePtr new_root;
134
+ xmlNodePtr old_root;
135
+
136
+ Data_Get_Struct(self, xmlDoc, doc);
137
+
138
+ old_root = NULL;
139
+
140
+ if(NIL_P(root)) {
141
+ old_root = xmlDocGetRootElement(doc);
142
+
143
+ if(old_root) {
144
+ xmlUnlinkNode(old_root);
145
+ nokogiri_root_node(old_root);
146
+ }
147
+
148
+ return root;
149
+ }
150
+
151
+ Data_Get_Struct(root, xmlNode, new_root);
152
+
153
+
154
+ /* If the new root's document is not the same as the current document,
155
+ * then we need to dup the node in to this document. */
156
+ if(new_root->doc != doc) {
157
+ old_root = xmlDocGetRootElement(doc);
158
+ if (!(new_root = xmlDocCopyNode(new_root, doc, 1))) {
159
+ rb_raise(rb_eRuntimeError, "Could not reparent node (xmlDocCopyNode)");
160
+ }
161
+ }
162
+
163
+ xmlDocSetRootElement(doc, new_root);
164
+ if(old_root) nokogiri_root_node(old_root);
165
+ return root;
166
+ }
167
+
168
+ /*
169
+ * call-seq:
170
+ * root
171
+ *
172
+ * Get the root node for this document.
173
+ */
174
+ static VALUE root(VALUE self)
175
+ {
176
+ xmlDocPtr doc;
177
+ xmlNodePtr root;
178
+
179
+ Data_Get_Struct(self, xmlDoc, doc);
180
+
181
+ root = xmlDocGetRootElement(doc);
182
+
183
+ if(!root) return Qnil;
184
+ return Nokogiri_wrap_xml_node(Qnil, root) ;
185
+ }
186
+
187
+ /*
188
+ * call-seq:
189
+ * encoding= encoding
190
+ *
191
+ * Set the encoding string for this Document
192
+ */
193
+ static VALUE set_encoding(VALUE self, VALUE encoding)
194
+ {
195
+ xmlDocPtr doc;
196
+ Data_Get_Struct(self, xmlDoc, doc);
197
+
198
+ if (doc->encoding)
199
+ free((char *)(uintptr_t) doc->encoding); /* avoid gcc cast warning */
200
+
201
+ doc->encoding = xmlStrdup((xmlChar *)StringValueCStr(encoding));
202
+
203
+ return encoding;
204
+ }
205
+
206
+ /*
207
+ * call-seq:
208
+ * encoding
209
+ *
210
+ * Get the encoding for this Document
211
+ */
212
+ static VALUE encoding(VALUE self)
213
+ {
214
+ xmlDocPtr doc;
215
+ Data_Get_Struct(self, xmlDoc, doc);
216
+
217
+ if(!doc->encoding) return Qnil;
218
+ return NOKOGIRI_STR_NEW2(doc->encoding);
219
+ }
220
+
221
+ /*
222
+ * call-seq:
223
+ * version
224
+ *
225
+ * Get the XML version for this Document
226
+ */
227
+ static VALUE version(VALUE self)
228
+ {
229
+ xmlDocPtr doc;
230
+ Data_Get_Struct(self, xmlDoc, doc);
231
+
232
+ if(!doc->version) return Qnil;
233
+ return NOKOGIRI_STR_NEW2(doc->version);
234
+ }
235
+
236
+ /*
237
+ * call-seq:
238
+ * read_io(io, url, encoding, options)
239
+ *
240
+ * Create a new document from an IO object
241
+ */
242
+ static VALUE read_io( VALUE klass,
243
+ VALUE io,
244
+ VALUE url,
245
+ VALUE encoding,
246
+ VALUE options )
247
+ {
248
+ const char * c_url = NIL_P(url) ? NULL : StringValueCStr(url);
249
+ const char * c_enc = NIL_P(encoding) ? NULL : StringValueCStr(encoding);
250
+ VALUE error_list = rb_ary_new();
251
+ VALUE document;
252
+ xmlDocPtr doc;
253
+
254
+ xmlResetLastError();
255
+ xmlSetStructuredErrorFunc((void *)error_list, Nokogiri_error_array_pusher);
256
+
257
+ doc = xmlReadIO(
258
+ (xmlInputReadCallback)io_read_callback,
259
+ (xmlInputCloseCallback)io_close_callback,
260
+ (void *)io,
261
+ c_url,
262
+ c_enc,
263
+ (int)NUM2INT(options)
264
+ );
265
+ xmlSetStructuredErrorFunc(NULL, NULL);
266
+
267
+ if(doc == NULL) {
268
+ xmlErrorPtr error;
269
+
270
+ xmlFreeDoc(doc);
271
+
272
+ error = xmlGetLastError();
273
+ if(error)
274
+ rb_exc_raise(Nokogiri_wrap_xml_syntax_error(error));
275
+ else
276
+ rb_raise(rb_eRuntimeError, "Could not parse document");
277
+
278
+ return Qnil;
279
+ }
280
+
281
+ document = Nokogiri_wrap_xml_document(klass, doc);
282
+ rb_iv_set(document, "@errors", error_list);
283
+ return document;
284
+ }
285
+
286
+ /*
287
+ * call-seq:
288
+ * read_memory(string, url, encoding, options)
289
+ *
290
+ * Create a new document from a String
291
+ */
292
+ static VALUE read_memory( VALUE klass,
293
+ VALUE string,
294
+ VALUE url,
295
+ VALUE encoding,
296
+ VALUE options )
297
+ {
298
+ const char * c_buffer = StringValuePtr(string);
299
+ const char * c_url = NIL_P(url) ? NULL : StringValueCStr(url);
300
+ const char * c_enc = NIL_P(encoding) ? NULL : StringValueCStr(encoding);
301
+ int len = (int)RSTRING_LEN(string);
302
+ VALUE error_list = rb_ary_new();
303
+ VALUE document;
304
+ xmlDocPtr doc;
305
+
306
+ xmlResetLastError();
307
+ xmlSetStructuredErrorFunc((void *)error_list, Nokogiri_error_array_pusher);
308
+ doc = xmlReadMemory(c_buffer, len, c_url, c_enc, (int)NUM2INT(options));
309
+ xmlSetStructuredErrorFunc(NULL, NULL);
310
+
311
+ if(doc == NULL) {
312
+ xmlErrorPtr error;
313
+
314
+ xmlFreeDoc(doc);
315
+
316
+ error = xmlGetLastError();
317
+ if(error)
318
+ rb_exc_raise(Nokogiri_wrap_xml_syntax_error(error));
319
+ else
320
+ rb_raise(rb_eRuntimeError, "Could not parse document");
321
+
322
+ return Qnil;
323
+ }
324
+
325
+ document = Nokogiri_wrap_xml_document(klass, doc);
326
+ rb_iv_set(document, "@errors", error_list);
327
+ return document;
328
+ }
329
+
330
+ /*
331
+ * call-seq:
332
+ * dup
333
+ *
334
+ * Copy this Document. An optional depth may be passed in, but it defaults
335
+ * to a deep copy. 0 is a shallow copy, 1 is a deep copy.
336
+ */
337
+ static VALUE duplicate_document(int argc, VALUE *argv, VALUE self)
338
+ {
339
+ xmlDocPtr doc, dup;
340
+ VALUE copy;
341
+ VALUE level;
342
+ VALUE error_list;
343
+
344
+ if(rb_scan_args(argc, argv, "01", &level) == 0)
345
+ level = INT2NUM((long)1);
346
+
347
+ Data_Get_Struct(self, xmlDoc, doc);
348
+
349
+ dup = xmlCopyDoc(doc, (int)NUM2INT(level));
350
+
351
+ if(dup == NULL) return Qnil;
352
+
353
+ dup->type = doc->type;
354
+ copy = Nokogiri_wrap_xml_document(rb_obj_class(self), dup);
355
+ error_list = rb_iv_get(self, "@errors");
356
+ rb_iv_set(copy, "@errors", error_list);
357
+ return copy ;
358
+ }
359
+
360
+ /*
361
+ * call-seq:
362
+ * new(version = default)
363
+ *
364
+ * Create a new document with +version+ (defaults to "1.0")
365
+ */
366
+ static VALUE new(int argc, VALUE *argv, VALUE klass)
367
+ {
368
+ xmlDocPtr doc;
369
+ VALUE version, rest, rb_doc ;
370
+
371
+ rb_scan_args(argc, argv, "0*", &rest);
372
+ version = rb_ary_entry(rest, (long)0);
373
+ if (NIL_P(version)) version = rb_str_new2("1.0");
374
+
375
+ doc = xmlNewDoc((xmlChar *)StringValueCStr(version));
376
+ rb_doc = Nokogiri_wrap_xml_document(klass, doc);
377
+ rb_obj_call_init(rb_doc, argc, argv);
378
+ return rb_doc ;
379
+ }
380
+
381
+ /*
382
+ * call-seq:
383
+ * remove_namespaces!
384
+ *
385
+ * Remove all namespaces from all nodes in the document.
386
+ *
387
+ * This could be useful for developers who either don't understand namespaces
388
+ * or don't care about them.
389
+ *
390
+ * The following example shows a use case, and you can decide for yourself
391
+ * whether this is a good thing or not:
392
+ *
393
+ * doc = Nokogiri::XML <<-EOXML
394
+ * <root>
395
+ * <car xmlns:part="http://general-motors.com/">
396
+ * <part:tire>Michelin Model XGV</part:tire>
397
+ * </car>
398
+ * <bicycle xmlns:part="http://schwinn.com/">
399
+ * <part:tire>I'm a bicycle tire!</part:tire>
400
+ * </bicycle>
401
+ * </root>
402
+ * EOXML
403
+ *
404
+ * doc.xpath("//tire").to_s # => ""
405
+ * doc.xpath("//part:tire", "part" => "http://general-motors.com/").to_s # => "<part:tire>Michelin Model XGV</part:tire>"
406
+ * doc.xpath("//part:tire", "part" => "http://schwinn.com/").to_s # => "<part:tire>I'm a bicycle tire!</part:tire>"
407
+ *
408
+ * doc.remove_namespaces!
409
+ *
410
+ * doc.xpath("//tire").to_s # => "<tire>Michelin Model XGV</tire><tire>I'm a bicycle tire!</tire>"
411
+ * doc.xpath("//part:tire", "part" => "http://general-motors.com/").to_s # => ""
412
+ * doc.xpath("//part:tire", "part" => "http://schwinn.com/").to_s # => ""
413
+ *
414
+ * For more information on why this probably is *not* a good thing in general,
415
+ * please direct your browser to
416
+ * http://tenderlovemaking.com/2009/04/23/namespaces-in-xml.html
417
+ */
418
+ VALUE remove_namespaces_bang(VALUE self)
419
+ {
420
+ xmlDocPtr doc ;
421
+ Data_Get_Struct(self, xmlDoc, doc);
422
+
423
+ recursively_remove_namespaces_from_node((xmlNodePtr)doc);
424
+ return self;
425
+ }
426
+
427
+ /* call-seq: doc.create_entity(name, type, external_id, system_id, content)
428
+ *
429
+ * Create a new entity named +name+.
430
+ *
431
+ * +type+ is an integer representing the type of entity to be created, and it
432
+ * defaults to Nokogiri::XML::EntityDecl::INTERNAL_GENERAL. See
433
+ * the constants on Nokogiri::XML::EntityDecl for more information.
434
+ *
435
+ * +external_id+, +system_id+, and +content+ set the External ID, System ID,
436
+ * and content respectively. All of these parameters are optional.
437
+ */
438
+ static VALUE create_entity(int argc, VALUE *argv, VALUE self)
439
+ {
440
+ VALUE name;
441
+ VALUE type;
442
+ VALUE external_id;
443
+ VALUE system_id;
444
+ VALUE content;
445
+ xmlEntityPtr ptr;
446
+ xmlDocPtr doc ;
447
+
448
+ Data_Get_Struct(self, xmlDoc, doc);
449
+
450
+ rb_scan_args(argc, argv, "14", &name, &type, &external_id, &system_id,
451
+ &content);
452
+
453
+ xmlResetLastError();
454
+ ptr = xmlAddDocEntity(
455
+ doc,
456
+ (xmlChar *)(NIL_P(name) ? NULL : StringValueCStr(name)),
457
+ (int) (NIL_P(type) ? XML_INTERNAL_GENERAL_ENTITY : NUM2INT(type)),
458
+ (xmlChar *)(NIL_P(external_id) ? NULL : StringValueCStr(external_id)),
459
+ (xmlChar *)(NIL_P(system_id) ? NULL : StringValueCStr(system_id)),
460
+ (xmlChar *)(NIL_P(content) ? NULL : StringValueCStr(content))
461
+ );
462
+
463
+ if(NULL == ptr) {
464
+ xmlErrorPtr error = xmlGetLastError();
465
+ if(error)
466
+ rb_exc_raise(Nokogiri_wrap_xml_syntax_error(error));
467
+ else
468
+ rb_raise(rb_eRuntimeError, "Could not create entity");
469
+
470
+ return Qnil;
471
+ }
472
+
473
+ return Nokogiri_wrap_xml_node(cNokogiriXmlEntityDecl, (xmlNodePtr)ptr);
474
+ }
475
+
476
+ static int block_caller(void * ctx, xmlNodePtr _node, xmlNodePtr _parent)
477
+ {
478
+ VALUE block;
479
+ VALUE node;
480
+ VALUE parent;
481
+ VALUE ret;
482
+
483
+ if(_node->type == XML_NAMESPACE_DECL){
484
+ node = Nokogiri_wrap_xml_namespace(_parent->doc, (xmlNsPtr) _node);
485
+ }
486
+ else{
487
+ node = Nokogiri_wrap_xml_node(Qnil, _node);
488
+ }
489
+ parent = _parent ? Nokogiri_wrap_xml_node(Qnil, _parent) : Qnil;
490
+ block = (VALUE)ctx;
491
+
492
+ ret = rb_funcall(block, rb_intern("call"), 2, node, parent);
493
+
494
+ if(Qfalse == ret || Qnil == ret) return 0;
495
+
496
+ return 1;
497
+ }
498
+
499
+ /* call-seq:
500
+ * doc.canonicalize(mode=XML_C14N_1_0,inclusive_namespaces=nil,with_comments=false)
501
+ * doc.canonicalize { |obj, parent| ... }
502
+ *
503
+ * Canonicalize a document and return the results. Takes an optional block
504
+ * that takes two parameters: the +obj+ and that node's +parent+.
505
+ * The +obj+ will be either a Nokogiri::XML::Node, or a Nokogiri::XML::Namespace
506
+ * The block must return a non-nil, non-false value if the +obj+ passed in
507
+ * should be included in the canonicalized document.
508
+ */
509
+ static VALUE nokogiri_xml_document_canonicalize(int argc, VALUE* argv, VALUE self)
510
+ {
511
+ VALUE mode;
512
+ VALUE incl_ns;
513
+ VALUE with_comments;
514
+ xmlChar **ns;
515
+ long ns_len, i;
516
+
517
+ xmlDocPtr doc;
518
+ xmlOutputBufferPtr buf;
519
+ xmlC14NIsVisibleCallback cb = NULL;
520
+ void * ctx = NULL;
521
+
522
+ VALUE rb_cStringIO;
523
+ VALUE io;
524
+
525
+ rb_scan_args(argc, argv, "03", &mode, &incl_ns, &with_comments);
526
+
527
+ Data_Get_Struct(self, xmlDoc, doc);
528
+
529
+ rb_cStringIO = rb_const_get_at(rb_cObject, rb_intern("StringIO"));
530
+ io = rb_class_new_instance(0, 0, rb_cStringIO);
531
+ buf = xmlAllocOutputBuffer(NULL);
532
+
533
+ buf->writecallback = (xmlOutputWriteCallback)io_write_callback;
534
+ buf->closecallback = (xmlOutputCloseCallback)io_close_callback;
535
+ buf->context = (void *)io;
536
+
537
+ if(rb_block_given_p()) {
538
+ cb = block_caller;
539
+ ctx = (void *)rb_block_proc();
540
+ }
541
+
542
+ if(NIL_P(incl_ns)){
543
+ ns = NULL;
544
+ }
545
+ else{
546
+ Check_Type(incl_ns, T_ARRAY);
547
+ ns_len = RARRAY_LEN(incl_ns);
548
+ ns = calloc((size_t)ns_len+1, sizeof(xmlChar *));
549
+ for (i = 0 ; i < ns_len ; i++) {
550
+ VALUE entry = rb_ary_entry(incl_ns, i);
551
+ ns[i] = (xmlChar*)StringValueCStr(entry);
552
+ }
553
+ }
554
+
555
+
556
+ xmlC14NExecute(doc, cb, ctx,
557
+ (int) (NIL_P(mode) ? 0 : NUM2INT(mode)),
558
+ ns,
559
+ (int) RTEST(with_comments),
560
+ buf);
561
+
562
+ xmlOutputBufferClose(buf);
563
+
564
+ return rb_funcall(io, rb_intern("string"), 0);
565
+ }
566
+
567
+ VALUE cNokogiriXmlDocument ;
568
+ void init_xml_document()
569
+ {
570
+ VALUE nokogiri = rb_define_module("Nokogiri");
571
+ VALUE xml = rb_define_module_under(nokogiri, "XML");
572
+ VALUE node = rb_define_class_under(xml, "Node", rb_cObject);
573
+
574
+ /*
575
+ * Nokogiri::XML::Document wraps an xml document.
576
+ */
577
+ VALUE klass = rb_define_class_under(xml, "Document", node);
578
+
579
+ cNokogiriXmlDocument = klass;
580
+
581
+ rb_define_singleton_method(klass, "read_memory", read_memory, 4);
582
+ rb_define_singleton_method(klass, "read_io", read_io, 4);
583
+ rb_define_singleton_method(klass, "new", new, -1);
584
+
585
+ rb_define_method(klass, "root", root, 0);
586
+ rb_define_method(klass, "root=", set_root, 1);
587
+ rb_define_method(klass, "encoding", encoding, 0);
588
+ rb_define_method(klass, "encoding=", set_encoding, 1);
589
+ rb_define_method(klass, "version", version, 0);
590
+ rb_define_method(klass, "canonicalize", nokogiri_xml_document_canonicalize, -1);
591
+ rb_define_method(klass, "dup", duplicate_document, -1);
592
+ rb_define_method(klass, "url", url, 0);
593
+ rb_define_method(klass, "create_entity", create_entity, -1);
594
+ rb_define_method(klass, "remove_namespaces!", remove_namespaces_bang, 0);
595
+ }
596
+
597
+
598
+ /* this takes klass as a param because it's used for HtmlDocument, too. */
599
+ VALUE Nokogiri_wrap_xml_document(VALUE klass, xmlDocPtr doc)
600
+ {
601
+ nokogiriTuplePtr tuple = (nokogiriTuplePtr)malloc(sizeof(nokogiriTuple));
602
+
603
+ VALUE rb_doc = Data_Wrap_Struct(
604
+ klass ? klass : cNokogiriXmlDocument,
605
+ mark,
606
+ dealloc,
607
+ doc
608
+ );
609
+
610
+ VALUE cache = rb_ary_new();
611
+ rb_iv_set(rb_doc, "@decorators", Qnil);
612
+ rb_iv_set(rb_doc, "@node_cache", cache);
613
+
614
+ tuple->doc = rb_doc;
615
+ tuple->unlinkedNodes = st_init_numtable_with_size(128);
616
+ tuple->node_cache = cache;
617
+ doc->_private = tuple ;
618
+
619
+ rb_obj_call_init(rb_doc, 0, NULL);
620
+
621
+ return rb_doc ;
622
+ }
@@ -0,0 +1,23 @@
1
+ #ifndef NOKOGIRI_XML_DOCUMENT
2
+ #define NOKOGIRI_XML_DOCUMENT
3
+
4
+ #include <nokogiri.h>
5
+
6
+ struct _nokogiriTuple {
7
+ VALUE doc;
8
+ st_table *unlinkedNodes;
9
+ VALUE node_cache;
10
+ };
11
+ typedef struct _nokogiriTuple nokogiriTuple;
12
+ typedef nokogiriTuple * nokogiriTuplePtr;
13
+
14
+ void init_xml_document();
15
+ VALUE Nokogiri_wrap_xml_document(VALUE klass, xmlDocPtr doc);
16
+
17
+ #define DOC_RUBY_OBJECT_TEST(x) ((nokogiriTuplePtr)(x->_private))
18
+ #define DOC_RUBY_OBJECT(x) (((nokogiriTuplePtr)(x->_private))->doc)
19
+ #define DOC_UNLINKED_NODE_HASH(x) (((nokogiriTuplePtr)(x->_private))->unlinkedNodes)
20
+ #define DOC_NODE_CACHE(x) (((nokogiriTuplePtr)(x->_private))->node_cache)
21
+
22
+ extern VALUE cNokogiriXmlDocument ;
23
+ #endif
@@ -0,0 +1,48 @@
1
+ #include <xml_document_fragment.h>
2
+
3
+ /*
4
+ * call-seq:
5
+ * new(document)
6
+ *
7
+ * Create a new DocumentFragment element on the +document+
8
+ */
9
+ static VALUE new(int argc, VALUE *argv, VALUE klass)
10
+ {
11
+ xmlDocPtr xml_doc;
12
+ xmlNodePtr node;
13
+ VALUE document;
14
+ VALUE rest;
15
+ VALUE rb_node;
16
+
17
+ rb_scan_args(argc, argv, "1*", &document, &rest);
18
+
19
+ Data_Get_Struct(document, xmlDoc, xml_doc);
20
+
21
+ node = xmlNewDocFragment(xml_doc->doc);
22
+
23
+ nokogiri_root_node(node);
24
+
25
+ rb_node = Nokogiri_wrap_xml_node(klass, node);
26
+ rb_obj_call_init(rb_node, argc, argv);
27
+
28
+ if(rb_block_given_p()) rb_yield(rb_node);
29
+
30
+ return rb_node;
31
+ }
32
+
33
+ VALUE cNokogiriXmlDocumentFragment;
34
+ void init_xml_document_fragment()
35
+ {
36
+ VALUE nokogiri = rb_define_module("Nokogiri");
37
+ VALUE xml = rb_define_module_under(nokogiri, "XML");
38
+ VALUE node = rb_define_class_under(xml, "Node", rb_cObject);
39
+
40
+ /*
41
+ * DocumentFragment represents a DocumentFragment node in an xml document.
42
+ */
43
+ VALUE klass = rb_define_class_under(xml, "DocumentFragment", node);
44
+
45
+ cNokogiriXmlDocumentFragment = klass;
46
+
47
+ rb_define_singleton_method(klass, "new", new, -1);
48
+ }
@@ -0,0 +1,10 @@
1
+ #ifndef NOKOGIRI_XML_DOCUMENT_FRAGMENT
2
+ #define NOKOGIRI_XML_DOCUMENT_FRAGMENT
3
+
4
+ #include <nokogiri.h>
5
+
6
+ void init_xml_document_fragment();
7
+
8
+ extern VALUE cNokogiriXmlDocumentFragment;
9
+ #endif
10
+