nokogiri 1.9.1 → 1.15.3

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (226) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +45 -0
  3. data/LICENSE-DEPENDENCIES.md +1636 -1024
  4. data/LICENSE.md +5 -28
  5. data/README.md +203 -89
  6. data/bin/nokogiri +63 -50
  7. data/dependencies.yml +33 -61
  8. data/ext/nokogiri/depend +38 -358
  9. data/ext/nokogiri/extconf.rb +864 -418
  10. data/ext/nokogiri/gumbo.c +594 -0
  11. data/ext/nokogiri/html4_document.c +165 -0
  12. data/ext/nokogiri/html4_element_description.c +299 -0
  13. data/ext/nokogiri/html4_entity_lookup.c +37 -0
  14. data/ext/nokogiri/html4_sax_parser_context.c +108 -0
  15. data/ext/nokogiri/html4_sax_push_parser.c +95 -0
  16. data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
  17. data/ext/nokogiri/nokogiri.c +251 -105
  18. data/ext/nokogiri/nokogiri.h +215 -90
  19. data/ext/nokogiri/test_global_handlers.c +40 -0
  20. data/ext/nokogiri/xml_attr.c +17 -17
  21. data/ext/nokogiri/xml_attribute_decl.c +22 -22
  22. data/ext/nokogiri/xml_cdata.c +40 -31
  23. data/ext/nokogiri/xml_comment.c +20 -27
  24. data/ext/nokogiri/xml_document.c +401 -240
  25. data/ext/nokogiri/xml_document_fragment.c +13 -17
  26. data/ext/nokogiri/xml_dtd.c +64 -58
  27. data/ext/nokogiri/xml_element_content.c +63 -55
  28. data/ext/nokogiri/xml_element_decl.c +31 -31
  29. data/ext/nokogiri/xml_encoding_handler.c +54 -21
  30. data/ext/nokogiri/xml_entity_decl.c +37 -35
  31. data/ext/nokogiri/xml_entity_reference.c +17 -19
  32. data/ext/nokogiri/xml_namespace.c +135 -61
  33. data/ext/nokogiri/xml_node.c +1346 -677
  34. data/ext/nokogiri/xml_node_set.c +246 -216
  35. data/ext/nokogiri/xml_processing_instruction.c +18 -20
  36. data/ext/nokogiri/xml_reader.c +347 -212
  37. data/ext/nokogiri/xml_relax_ng.c +86 -77
  38. data/ext/nokogiri/xml_sax_parser.c +149 -124
  39. data/ext/nokogiri/xml_sax_parser_context.c +145 -103
  40. data/ext/nokogiri/xml_sax_push_parser.c +64 -36
  41. data/ext/nokogiri/xml_schema.c +138 -81
  42. data/ext/nokogiri/xml_syntax_error.c +42 -21
  43. data/ext/nokogiri/xml_text.c +36 -26
  44. data/ext/nokogiri/xml_xpath_context.c +366 -178
  45. data/ext/nokogiri/xslt_stylesheet.c +335 -189
  46. data/gumbo-parser/CHANGES.md +63 -0
  47. data/gumbo-parser/Makefile +111 -0
  48. data/gumbo-parser/THANKS +27 -0
  49. data/gumbo-parser/src/Makefile +34 -0
  50. data/gumbo-parser/src/README.md +41 -0
  51. data/gumbo-parser/src/ascii.c +75 -0
  52. data/gumbo-parser/src/ascii.h +115 -0
  53. data/gumbo-parser/src/attribute.c +42 -0
  54. data/gumbo-parser/src/attribute.h +17 -0
  55. data/gumbo-parser/src/char_ref.c +22225 -0
  56. data/gumbo-parser/src/char_ref.h +29 -0
  57. data/gumbo-parser/src/char_ref.rl +2154 -0
  58. data/gumbo-parser/src/error.c +630 -0
  59. data/gumbo-parser/src/error.h +148 -0
  60. data/gumbo-parser/src/foreign_attrs.c +103 -0
  61. data/gumbo-parser/src/foreign_attrs.gperf +27 -0
  62. data/gumbo-parser/src/insertion_mode.h +33 -0
  63. data/gumbo-parser/src/macros.h +91 -0
  64. data/gumbo-parser/src/nokogiri_gumbo.h +944 -0
  65. data/gumbo-parser/src/parser.c +4891 -0
  66. data/gumbo-parser/src/parser.h +41 -0
  67. data/gumbo-parser/src/replacement.h +33 -0
  68. data/gumbo-parser/src/string_buffer.c +103 -0
  69. data/gumbo-parser/src/string_buffer.h +68 -0
  70. data/gumbo-parser/src/string_piece.c +48 -0
  71. data/gumbo-parser/src/svg_attrs.c +174 -0
  72. data/gumbo-parser/src/svg_attrs.gperf +77 -0
  73. data/gumbo-parser/src/svg_tags.c +137 -0
  74. data/gumbo-parser/src/svg_tags.gperf +55 -0
  75. data/gumbo-parser/src/tag.c +223 -0
  76. data/gumbo-parser/src/tag_lookup.c +382 -0
  77. data/gumbo-parser/src/tag_lookup.gperf +170 -0
  78. data/gumbo-parser/src/tag_lookup.h +13 -0
  79. data/gumbo-parser/src/token_buffer.c +79 -0
  80. data/gumbo-parser/src/token_buffer.h +71 -0
  81. data/gumbo-parser/src/token_type.h +17 -0
  82. data/gumbo-parser/src/tokenizer.c +3463 -0
  83. data/gumbo-parser/src/tokenizer.h +112 -0
  84. data/gumbo-parser/src/tokenizer_states.h +339 -0
  85. data/gumbo-parser/src/utf8.c +245 -0
  86. data/gumbo-parser/src/utf8.h +164 -0
  87. data/gumbo-parser/src/util.c +66 -0
  88. data/gumbo-parser/src/util.h +34 -0
  89. data/gumbo-parser/src/vector.c +111 -0
  90. data/gumbo-parser/src/vector.h +45 -0
  91. data/lib/nokogiri/class_resolver.rb +67 -0
  92. data/lib/nokogiri/css/node.rb +10 -8
  93. data/lib/nokogiri/css/parser.rb +397 -377
  94. data/lib/nokogiri/css/parser.y +250 -245
  95. data/lib/nokogiri/css/parser_extras.rb +54 -49
  96. data/lib/nokogiri/css/syntax_error.rb +3 -1
  97. data/lib/nokogiri/css/tokenizer.rb +107 -104
  98. data/lib/nokogiri/css/tokenizer.rex +3 -2
  99. data/lib/nokogiri/css/xpath_visitor.rb +224 -95
  100. data/lib/nokogiri/css.rb +56 -17
  101. data/lib/nokogiri/decorators/slop.rb +9 -7
  102. data/lib/nokogiri/encoding_handler.rb +57 -0
  103. data/lib/nokogiri/extension.rb +32 -0
  104. data/lib/nokogiri/gumbo.rb +15 -0
  105. data/lib/nokogiri/html.rb +38 -27
  106. data/lib/nokogiri/{html → html4}/builder.rb +4 -2
  107. data/lib/nokogiri/html4/document.rb +214 -0
  108. data/lib/nokogiri/html4/document_fragment.rb +54 -0
  109. data/lib/nokogiri/{html → html4}/element_description.rb +3 -1
  110. data/lib/nokogiri/html4/element_description_defaults.rb +2040 -0
  111. data/lib/nokogiri/html4/encoding_reader.rb +121 -0
  112. data/lib/nokogiri/{html → html4}/entity_lookup.rb +4 -2
  113. data/lib/nokogiri/{html → html4}/sax/parser.rb +17 -16
  114. data/lib/nokogiri/html4/sax/parser_context.rb +20 -0
  115. data/lib/nokogiri/{html → html4}/sax/push_parser.rb +12 -11
  116. data/lib/nokogiri/html4.rb +47 -0
  117. data/lib/nokogiri/html5/document.rb +168 -0
  118. data/lib/nokogiri/html5/document_fragment.rb +90 -0
  119. data/lib/nokogiri/html5/node.rb +103 -0
  120. data/lib/nokogiri/html5.rb +392 -0
  121. data/lib/nokogiri/jruby/dependencies.rb +3 -0
  122. data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
  123. data/lib/nokogiri/syntax_error.rb +2 -0
  124. data/lib/nokogiri/version/constant.rb +6 -0
  125. data/lib/nokogiri/version/info.rb +223 -0
  126. data/lib/nokogiri/version.rb +3 -108
  127. data/lib/nokogiri/xml/attr.rb +55 -3
  128. data/lib/nokogiri/xml/attribute_decl.rb +6 -2
  129. data/lib/nokogiri/xml/builder.rb +98 -54
  130. data/lib/nokogiri/xml/cdata.rb +3 -1
  131. data/lib/nokogiri/xml/character_data.rb +2 -0
  132. data/lib/nokogiri/xml/document.rb +312 -126
  133. data/lib/nokogiri/xml/document_fragment.rb +93 -48
  134. data/lib/nokogiri/xml/dtd.rb +4 -2
  135. data/lib/nokogiri/xml/element_content.rb +12 -2
  136. data/lib/nokogiri/xml/element_decl.rb +6 -2
  137. data/lib/nokogiri/xml/entity_decl.rb +7 -3
  138. data/lib/nokogiri/xml/entity_reference.rb +2 -0
  139. data/lib/nokogiri/xml/namespace.rb +45 -0
  140. data/lib/nokogiri/xml/node/save_options.rb +23 -8
  141. data/lib/nokogiri/xml/node.rb +1088 -418
  142. data/lib/nokogiri/xml/node_set.rb +173 -63
  143. data/lib/nokogiri/xml/notation.rb +13 -0
  144. data/lib/nokogiri/xml/parse_options.rb +145 -52
  145. data/lib/nokogiri/xml/pp/character_data.rb +9 -6
  146. data/lib/nokogiri/xml/pp/node.rb +42 -30
  147. data/lib/nokogiri/xml/pp.rb +4 -2
  148. data/lib/nokogiri/xml/processing_instruction.rb +4 -1
  149. data/lib/nokogiri/xml/reader.rb +21 -28
  150. data/lib/nokogiri/xml/relax_ng.rb +8 -2
  151. data/lib/nokogiri/xml/sax/document.rb +45 -49
  152. data/lib/nokogiri/xml/sax/parser.rb +39 -36
  153. data/lib/nokogiri/xml/sax/parser_context.rb +8 -3
  154. data/lib/nokogiri/xml/sax/push_parser.rb +6 -5
  155. data/lib/nokogiri/xml/sax.rb +6 -4
  156. data/lib/nokogiri/xml/schema.rb +19 -9
  157. data/lib/nokogiri/xml/searchable.rb +120 -72
  158. data/lib/nokogiri/xml/syntax_error.rb +6 -4
  159. data/lib/nokogiri/xml/text.rb +2 -0
  160. data/lib/nokogiri/xml/xpath/syntax_error.rb +4 -2
  161. data/lib/nokogiri/xml/xpath.rb +15 -4
  162. data/lib/nokogiri/xml/xpath_context.rb +3 -3
  163. data/lib/nokogiri/xml.rb +38 -37
  164. data/lib/nokogiri/xslt/stylesheet.rb +3 -1
  165. data/lib/nokogiri/xslt.rb +101 -22
  166. data/lib/nokogiri.rb +59 -75
  167. data/lib/xsd/xmlparser/nokogiri.rb +29 -25
  168. data/patches/libxml2/0001-Remove-script-macro-support.patch +40 -0
  169. data/patches/libxml2/0002-Update-entities-to-remove-handling-of-ssi.patch +44 -0
  170. data/patches/libxml2/0003-libxml2.la-is-in-top_builddir.patch +25 -0
  171. data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
  172. data/patches/libxml2/0010-update-config.guess-and-config.sub-for-libxml2.patch +224 -0
  173. data/patches/libxml2/0011-rip-out-libxml2-s-libc_single_threaded-support.patch +30 -0
  174. data/patches/libxslt/0001-update-config.guess-and-config.sub-for-libxslt.patch +224 -0
  175. data/ports/archives/libxml2-2.11.4.tar.xz +0 -0
  176. data/ports/archives/libxslt-1.1.38.tar.xz +0 -0
  177. metadata +128 -265
  178. data/ext/nokogiri/html_document.c +0 -170
  179. data/ext/nokogiri/html_document.h +0 -10
  180. data/ext/nokogiri/html_element_description.c +0 -279
  181. data/ext/nokogiri/html_element_description.h +0 -10
  182. data/ext/nokogiri/html_entity_lookup.c +0 -32
  183. data/ext/nokogiri/html_entity_lookup.h +0 -8
  184. data/ext/nokogiri/html_sax_parser_context.c +0 -116
  185. data/ext/nokogiri/html_sax_parser_context.h +0 -11
  186. data/ext/nokogiri/html_sax_push_parser.c +0 -87
  187. data/ext/nokogiri/html_sax_push_parser.h +0 -9
  188. data/ext/nokogiri/xml_attr.h +0 -9
  189. data/ext/nokogiri/xml_attribute_decl.h +0 -9
  190. data/ext/nokogiri/xml_cdata.h +0 -9
  191. data/ext/nokogiri/xml_comment.h +0 -9
  192. data/ext/nokogiri/xml_document.h +0 -23
  193. data/ext/nokogiri/xml_document_fragment.h +0 -10
  194. data/ext/nokogiri/xml_dtd.h +0 -10
  195. data/ext/nokogiri/xml_element_content.h +0 -10
  196. data/ext/nokogiri/xml_element_decl.h +0 -9
  197. data/ext/nokogiri/xml_encoding_handler.h +0 -8
  198. data/ext/nokogiri/xml_entity_decl.h +0 -10
  199. data/ext/nokogiri/xml_entity_reference.h +0 -9
  200. data/ext/nokogiri/xml_io.c +0 -61
  201. data/ext/nokogiri/xml_io.h +0 -11
  202. data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
  203. data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
  204. data/ext/nokogiri/xml_namespace.h +0 -14
  205. data/ext/nokogiri/xml_node.h +0 -13
  206. data/ext/nokogiri/xml_node_set.h +0 -12
  207. data/ext/nokogiri/xml_processing_instruction.h +0 -9
  208. data/ext/nokogiri/xml_reader.h +0 -10
  209. data/ext/nokogiri/xml_relax_ng.h +0 -9
  210. data/ext/nokogiri/xml_sax_parser.h +0 -39
  211. data/ext/nokogiri/xml_sax_parser_context.h +0 -10
  212. data/ext/nokogiri/xml_sax_push_parser.h +0 -9
  213. data/ext/nokogiri/xml_schema.h +0 -9
  214. data/ext/nokogiri/xml_syntax_error.h +0 -13
  215. data/ext/nokogiri/xml_text.h +0 -9
  216. data/ext/nokogiri/xml_xpath_context.h +0 -10
  217. data/ext/nokogiri/xslt_stylesheet.h +0 -14
  218. data/lib/nokogiri/html/document.rb +0 -335
  219. data/lib/nokogiri/html/document_fragment.rb +0 -49
  220. data/lib/nokogiri/html/element_description_defaults.rb +0 -671
  221. data/lib/nokogiri/html/sax/parser_context.rb +0 -16
  222. data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +0 -78
  223. data/patches/libxml2/0002-Fix-nullptr-deref-with-XPath-logic-ops.patch +0 -54
  224. data/patches/libxml2/0003-Fix-infinite-loop-in-LZMA-decompression.patch +0 -50
  225. data/ports/archives/libxml2-2.9.8.tar.gz +0 -0
  226. data/ports/archives/libxslt-1.1.32.tar.gz +0 -0
@@ -1,91 +1,168 @@
1
- #include <xml_document.h>
1
+ #include <nokogiri.h>
2
2
 
3
- static int dealloc_node_i(xmlNodePtr key, xmlNodePtr node, xmlDocPtr doc)
3
+ VALUE cNokogiriXmlDocument ;
4
+
5
+ static int
6
+ dealloc_node_i2(xmlNodePtr key, xmlNodePtr node, xmlDocPtr doc)
4
7
  {
5
- switch(node->type) {
6
- case XML_ATTRIBUTE_NODE:
7
- xmlFreePropList((xmlAttrPtr)node);
8
- break;
9
- case XML_NAMESPACE_DECL:
10
- xmlFreeNs((xmlNsPtr)node);
11
- break;
12
- case XML_DTD_NODE:
13
- xmlFreeDtd((xmlDtdPtr)node);
14
- break;
15
- default:
16
- if(node->parent == NULL) {
17
- xmlAddChild((xmlNodePtr)doc, node);
18
- }
8
+ switch (node->type) {
9
+ case XML_ATTRIBUTE_NODE:
10
+ xmlFreePropList((xmlAttrPtr)node);
11
+ break;
12
+ case XML_NAMESPACE_DECL:
13
+ xmlFreeNs((xmlNsPtr)node);
14
+ break;
15
+ case XML_DTD_NODE:
16
+ xmlFreeDtd((xmlDtdPtr)node);
17
+ break;
18
+ default:
19
+ if (node->parent == NULL) {
20
+ xmlAddChild((xmlNodePtr)doc, node);
21
+ }
19
22
  }
20
23
  return ST_CONTINUE;
21
24
  }
22
25
 
23
- static void remove_private(xmlNodePtr node)
26
+ static int
27
+ dealloc_node_i(st_data_t key, st_data_t node, st_data_t doc)
28
+ {
29
+ return dealloc_node_i2((xmlNodePtr)key, (xmlNodePtr)node, (xmlDocPtr)doc);
30
+ }
31
+
32
+ static void
33
+ remove_private(xmlNodePtr node)
24
34
  {
25
35
  xmlNodePtr child;
26
36
 
27
- for (child = node->children; child; child = child->next)
37
+ for (child = node->children; child; child = child->next) {
28
38
  remove_private(child);
39
+ }
29
40
 
30
41
  if ((node->type == XML_ELEMENT_NODE ||
31
42
  node->type == XML_XINCLUDE_START ||
32
43
  node->type == XML_XINCLUDE_END) &&
33
44
  node->properties) {
34
- for (child = (xmlNodePtr)node->properties; child; child = child->next)
45
+ for (child = (xmlNodePtr)node->properties; child; child = child->next) {
35
46
  remove_private(child);
47
+ }
36
48
  }
37
49
 
38
50
  node->_private = NULL;
39
51
  }
40
52
 
41
- static void dealloc(xmlDocPtr doc)
53
+ static void
54
+ mark(void *data)
42
55
  {
43
- st_table *node_hash;
56
+ xmlDocPtr doc = (xmlDocPtr)data;
57
+ nokogiriTuplePtr tuple = (nokogiriTuplePtr)doc->_private;
58
+ if (tuple) {
59
+ rb_gc_mark(tuple->doc);
60
+ rb_gc_mark(tuple->node_cache);
61
+ }
62
+ }
44
63
 
45
- NOKOGIRI_DEBUG_START(doc);
64
+ static void
65
+ dealloc(void *data)
66
+ {
67
+ xmlDocPtr doc = (xmlDocPtr)data;
68
+ st_table *node_hash;
46
69
 
47
70
  node_hash = DOC_UNLINKED_NODE_HASH(doc);
48
71
 
49
72
  st_foreach(node_hash, dealloc_node_i, (st_data_t)doc);
50
73
  st_free_table(node_hash);
51
74
 
52
- free(doc->_private);
75
+ ruby_xfree(doc->_private);
53
76
 
54
- /* When both Nokogiri and libxml-ruby are loaded, make sure that all nodes
55
- * have their _private pointers cleared. This is to avoid libxml-ruby's
56
- * xmlDeregisterNode callback from accessing VALUE pointers from ruby's GC
57
- * free context, which can result in segfaults.
77
+ #pragma GCC diagnostic push
78
+ #pragma GCC diagnostic ignored "-Wdeprecated-declarations" // xmlDeregisterNodeDefault is deprecated as of libxml2 2.11.0
79
+ /*
80
+ * libxml-ruby < 3.0.0 uses xmlDeregisterNodeDefault. If the user is using one of those older
81
+ * versions, the registered callback from libxml-ruby will access the _private pointers set by
82
+ * nokogiri, which will result in segfaults.
83
+ *
84
+ * To avoid this, we need to clear the _private pointers from all nodes in this document tree
85
+ * before that callback gets invoked.
86
+ *
87
+ * libxml-ruby 3.0.0 was released in 2017-02, so at some point we can probably safely remove this
88
+ * safeguard (though probably pairing with a runtime check on the libxml-ruby version).
58
89
  */
59
- if (xmlDeregisterNodeDefaultValue)
90
+ if (xmlDeregisterNodeDefaultValue) {
60
91
  remove_private((xmlNodePtr)doc);
92
+ }
93
+ #pragma GCC diagnostic pop
61
94
 
62
95
  xmlFreeDoc(doc);
96
+ }
97
+
98
+ static size_t
99
+ memsize_node(const xmlNodePtr node)
100
+ {
101
+ /* note we don't count namespace definitions, just going for a good-enough number here */
102
+ xmlNodePtr child;
103
+ size_t memsize = 0;
104
+
105
+ memsize += xmlStrlen(node->name);
106
+ for (child = (xmlNodePtr)node->properties; child; child = child->next) {
107
+ memsize += sizeof(xmlAttr) + memsize_node(child);
108
+ }
109
+ if (node->type == XML_TEXT_NODE) {
110
+ memsize += xmlStrlen(node->content);
111
+ }
112
+ for (child = node->children; child; child = child->next) {
113
+ memsize += sizeof(xmlNode) + memsize_node(child);
114
+ }
115
+ return memsize;
116
+ }
63
117
 
64
- NOKOGIRI_DEBUG_END(doc);
118
+ static size_t
119
+ memsize(const void *data)
120
+ {
121
+ xmlDocPtr doc = (const xmlDocPtr)data;
122
+ size_t memsize = sizeof(xmlDoc);
123
+ /* This may not account for all memory use */
124
+ memsize += memsize_node((xmlNodePtr)doc);
125
+ return memsize;
65
126
  }
66
127
 
67
- static void recursively_remove_namespaces_from_node(xmlNodePtr node)
128
+ static const rb_data_type_t noko_xml_document_data_type = {
129
+ .wrap_struct_name = "Nokogiri::XML::Document",
130
+ .function = {
131
+ .dmark = mark,
132
+ .dfree = dealloc,
133
+ .dsize = memsize,
134
+ },
135
+ // .flags = RUBY_TYPED_FREE_IMMEDIATELY, // TODO see https://github.com/sparklemotion/nokogiri/issues/2822
136
+ };
137
+
138
+ static void
139
+ recursively_remove_namespaces_from_node(xmlNodePtr node)
68
140
  {
69
141
  xmlNodePtr child ;
70
142
  xmlAttrPtr property ;
71
143
 
72
144
  xmlSetNs(node, NULL);
73
145
 
74
- for (child = node->children ; child ; child = child->next)
146
+ for (child = node->children ; child ; child = child->next) {
75
147
  recursively_remove_namespaces_from_node(child);
148
+ }
76
149
 
77
150
  if (((node->type == XML_ELEMENT_NODE) ||
78
151
  (node->type == XML_XINCLUDE_START) ||
79
152
  (node->type == XML_XINCLUDE_END)) &&
80
153
  node->nsDef) {
81
- xmlFreeNsList(node->nsDef);
154
+ xmlNsPtr curr = node->nsDef;
155
+ while (curr) {
156
+ noko_xml_document_pin_namespace(curr, node->doc);
157
+ curr = curr->next;
158
+ }
82
159
  node->nsDef = NULL;
83
160
  }
84
161
 
85
162
  if (node->type == XML_ELEMENT_NODE && node->properties != NULL) {
86
163
  property = node->properties ;
87
164
  while (property != NULL) {
88
- if (property->ns) property->ns = NULL ;
165
+ if (property->ns) { property->ns = NULL ; }
89
166
  property = property->next ;
90
167
  }
91
168
  }
@@ -97,12 +174,12 @@ static void recursively_remove_namespaces_from_node(xmlNodePtr node)
97
174
  *
98
175
  * Get the url name for this document.
99
176
  */
100
- static VALUE url(VALUE self)
177
+ static VALUE
178
+ url(VALUE self)
101
179
  {
102
- xmlDocPtr doc;
103
- Data_Get_Struct(self, xmlDoc, doc);
180
+ xmlDocPtr doc = noko_xml_document_unwrap(self);
104
181
 
105
- if(doc->URL) return NOKOGIRI_STR_NEW2(doc->URL);
182
+ if (doc->URL) { return NOKOGIRI_STR_NEW2(doc->URL); }
106
183
 
107
184
  return Qnil;
108
185
  }
@@ -113,42 +190,42 @@ static VALUE url(VALUE self)
113
190
  *
114
191
  * Set the root element on this document
115
192
  */
116
- static VALUE set_root(VALUE self, VALUE root)
193
+ static VALUE
194
+ rb_xml_document_root_set(VALUE self, VALUE rb_new_root)
117
195
  {
118
- xmlDocPtr doc;
119
- xmlNodePtr new_root;
120
- xmlNodePtr old_root;
121
-
122
- Data_Get_Struct(self, xmlDoc, doc);
196
+ xmlDocPtr c_document;
197
+ xmlNodePtr c_new_root = NULL, c_current_root;
123
198
 
124
- old_root = NULL;
199
+ c_document = noko_xml_document_unwrap(self);
125
200
 
126
- if(NIL_P(root)) {
127
- old_root = xmlDocGetRootElement(doc);
128
-
129
- if(old_root) {
130
- xmlUnlinkNode(old_root);
131
- nokogiri_root_node(old_root);
132
- }
133
-
134
- return root;
201
+ c_current_root = xmlDocGetRootElement(c_document);
202
+ if (c_current_root) {
203
+ xmlUnlinkNode(c_current_root);
204
+ noko_xml_document_pin_node(c_current_root);
135
205
  }
136
206
 
137
- Data_Get_Struct(root, xmlNode, new_root);
207
+ if (!NIL_P(rb_new_root)) {
208
+ if (!rb_obj_is_kind_of(rb_new_root, cNokogiriXmlNode)) {
209
+ rb_raise(rb_eArgError,
210
+ "expected Nokogiri::XML::Node but received %"PRIsVALUE,
211
+ rb_obj_class(rb_new_root));
212
+ }
138
213
 
214
+ Noko_Node_Get_Struct(rb_new_root, xmlNode, c_new_root);
139
215
 
140
- /* If the new root's document is not the same as the current document,
141
- * then we need to dup the node in to this document. */
142
- if(new_root->doc != doc) {
143
- old_root = xmlDocGetRootElement(doc);
144
- if (!(new_root = xmlDocCopyNode(new_root, doc, 1))) {
145
- rb_raise(rb_eRuntimeError, "Could not reparent node (xmlDocCopyNode)");
216
+ /* If the new root's document is not the same as the current document,
217
+ * then we need to dup the node in to this document. */
218
+ if (c_new_root->doc != c_document) {
219
+ c_new_root = xmlDocCopyNode(c_new_root, c_document, 1);
220
+ if (!c_new_root) {
221
+ rb_raise(rb_eRuntimeError, "Could not reparent node (xmlDocCopyNode)");
222
+ }
146
223
  }
147
224
  }
148
225
 
149
- xmlDocSetRootElement(doc, new_root);
150
- if(old_root) nokogiri_root_node(old_root);
151
- return root;
226
+ xmlDocSetRootElement(c_document, c_new_root);
227
+
228
+ return rb_new_root;
152
229
  }
153
230
 
154
231
  /*
@@ -157,17 +234,20 @@ static VALUE set_root(VALUE self, VALUE root)
157
234
  *
158
235
  * Get the root node for this document.
159
236
  */
160
- static VALUE root(VALUE self)
237
+ static VALUE
238
+ rb_xml_document_root(VALUE self)
161
239
  {
162
- xmlDocPtr doc;
163
- xmlNodePtr root;
240
+ xmlDocPtr c_document;
241
+ xmlNodePtr c_root;
164
242
 
165
- Data_Get_Struct(self, xmlDoc, doc);
243
+ c_document = noko_xml_document_unwrap(self);
166
244
 
167
- root = xmlDocGetRootElement(doc);
245
+ c_root = xmlDocGetRootElement(c_document);
246
+ if (!c_root) {
247
+ return Qnil;
248
+ }
168
249
 
169
- if(!root) return Qnil;
170
- return Nokogiri_wrap_xml_node(Qnil, root) ;
250
+ return noko_xml_node_wrap(Qnil, c_root) ;
171
251
  }
172
252
 
173
253
  /*
@@ -176,13 +256,14 @@ static VALUE root(VALUE self)
176
256
  *
177
257
  * Set the encoding string for this Document
178
258
  */
179
- static VALUE set_encoding(VALUE self, VALUE encoding)
259
+ static VALUE
260
+ set_encoding(VALUE self, VALUE encoding)
180
261
  {
181
- xmlDocPtr doc;
182
- Data_Get_Struct(self, xmlDoc, doc);
262
+ xmlDocPtr doc = noko_xml_document_unwrap(self);
183
263
 
184
- if (doc->encoding)
185
- free((char *)(uintptr_t) doc->encoding); /* avoid gcc cast warning */
264
+ if (doc->encoding) {
265
+ xmlFree(DISCARD_CONST_QUAL_XMLCHAR(doc->encoding));
266
+ }
186
267
 
187
268
  doc->encoding = xmlStrdup((xmlChar *)StringValueCStr(encoding));
188
269
 
@@ -195,12 +276,12 @@ static VALUE set_encoding(VALUE self, VALUE encoding)
195
276
  *
196
277
  * Get the encoding for this Document
197
278
  */
198
- static VALUE encoding(VALUE self)
279
+ static VALUE
280
+ encoding(VALUE self)
199
281
  {
200
- xmlDocPtr doc;
201
- Data_Get_Struct(self, xmlDoc, doc);
282
+ xmlDocPtr doc = noko_xml_document_unwrap(self);
202
283
 
203
- if(!doc->encoding) return Qnil;
284
+ if (!doc->encoding) { return Qnil; }
204
285
  return NOKOGIRI_STR_NEW2(doc->encoding);
205
286
  }
206
287
 
@@ -210,12 +291,12 @@ static VALUE encoding(VALUE self)
210
291
  *
211
292
  * Get the XML version for this Document
212
293
  */
213
- static VALUE version(VALUE self)
294
+ static VALUE
295
+ version(VALUE self)
214
296
  {
215
- xmlDocPtr doc;
216
- Data_Get_Struct(self, xmlDoc, doc);
297
+ xmlDocPtr doc = noko_xml_document_unwrap(self);
217
298
 
218
- if(!doc->version) return Qnil;
299
+ if (!doc->version) { return Qnil; }
219
300
  return NOKOGIRI_STR_NEW2(doc->version);
220
301
  }
221
302
 
@@ -225,14 +306,15 @@ static VALUE version(VALUE self)
225
306
  *
226
307
  * Create a new document from an IO object
227
308
  */
228
- static VALUE read_io( VALUE klass,
229
- VALUE io,
230
- VALUE url,
231
- VALUE encoding,
232
- VALUE options )
309
+ static VALUE
310
+ read_io(VALUE klass,
311
+ VALUE io,
312
+ VALUE url,
313
+ VALUE encoding,
314
+ VALUE options)
233
315
  {
234
- const char * c_url = NIL_P(url) ? NULL : StringValueCStr(url);
235
- const char * c_enc = NIL_P(encoding) ? NULL : StringValueCStr(encoding);
316
+ const char *c_url = NIL_P(url) ? NULL : StringValueCStr(url);
317
+ const char *c_enc = NIL_P(encoding) ? NULL : StringValueCStr(encoding);
236
318
  VALUE error_list = rb_ary_new();
237
319
  VALUE document;
238
320
  xmlDocPtr doc;
@@ -241,30 +323,31 @@ static VALUE read_io( VALUE klass,
241
323
  xmlSetStructuredErrorFunc((void *)error_list, Nokogiri_error_array_pusher);
242
324
 
243
325
  doc = xmlReadIO(
244
- (xmlInputReadCallback)io_read_callback,
245
- (xmlInputCloseCallback)io_close_callback,
246
- (void *)io,
247
- c_url,
248
- c_enc,
249
- (int)NUM2INT(options)
250
- );
326
+ (xmlInputReadCallback)noko_io_read,
327
+ (xmlInputCloseCallback)noko_io_close,
328
+ (void *)io,
329
+ c_url,
330
+ c_enc,
331
+ (int)NUM2INT(options)
332
+ );
251
333
  xmlSetStructuredErrorFunc(NULL, NULL);
252
334
 
253
- if(doc == NULL) {
335
+ if (doc == NULL) {
254
336
  xmlErrorPtr error;
255
337
 
256
338
  xmlFreeDoc(doc);
257
339
 
258
340
  error = xmlGetLastError();
259
- if(error)
341
+ if (error) {
260
342
  rb_exc_raise(Nokogiri_wrap_xml_syntax_error(error));
261
- else
343
+ } else {
262
344
  rb_raise(rb_eRuntimeError, "Could not parse document");
345
+ }
263
346
 
264
347
  return Qnil;
265
348
  }
266
349
 
267
- document = Nokogiri_wrap_xml_document(klass, doc);
350
+ document = noko_xml_document_wrap(klass, doc);
268
351
  rb_iv_set(document, "@errors", error_list);
269
352
  return document;
270
353
  }
@@ -275,15 +358,16 @@ static VALUE read_io( VALUE klass,
275
358
  *
276
359
  * Create a new document from a String
277
360
  */
278
- static VALUE read_memory( VALUE klass,
279
- VALUE string,
280
- VALUE url,
281
- VALUE encoding,
282
- VALUE options )
361
+ static VALUE
362
+ read_memory(VALUE klass,
363
+ VALUE string,
364
+ VALUE url,
365
+ VALUE encoding,
366
+ VALUE options)
283
367
  {
284
- const char * c_buffer = StringValuePtr(string);
285
- const char * c_url = NIL_P(url) ? NULL : StringValueCStr(url);
286
- const char * c_enc = NIL_P(encoding) ? NULL : StringValueCStr(encoding);
368
+ const char *c_buffer = StringValuePtr(string);
369
+ const char *c_url = NIL_P(url) ? NULL : StringValueCStr(url);
370
+ const char *c_enc = NIL_P(encoding) ? NULL : StringValueCStr(encoding);
287
371
  int len = (int)RSTRING_LEN(string);
288
372
  VALUE error_list = rb_ary_new();
289
373
  VALUE document;
@@ -294,21 +378,22 @@ static VALUE read_memory( VALUE klass,
294
378
  doc = xmlReadMemory(c_buffer, len, c_url, c_enc, (int)NUM2INT(options));
295
379
  xmlSetStructuredErrorFunc(NULL, NULL);
296
380
 
297
- if(doc == NULL) {
381
+ if (doc == NULL) {
298
382
  xmlErrorPtr error;
299
383
 
300
384
  xmlFreeDoc(doc);
301
385
 
302
386
  error = xmlGetLastError();
303
- if(error)
387
+ if (error) {
304
388
  rb_exc_raise(Nokogiri_wrap_xml_syntax_error(error));
305
- else
389
+ } else {
306
390
  rb_raise(rb_eRuntimeError, "Could not parse document");
391
+ }
307
392
 
308
393
  return Qnil;
309
394
  }
310
395
 
311
- document = Nokogiri_wrap_xml_document(klass, doc);
396
+ document = noko_xml_document_wrap(klass, doc);
312
397
  rb_iv_set(document, "@errors", error_list);
313
398
  return document;
314
399
  }
@@ -320,26 +405,26 @@ static VALUE read_memory( VALUE klass,
320
405
  * Copy this Document. An optional depth may be passed in, but it defaults
321
406
  * to a deep copy. 0 is a shallow copy, 1 is a deep copy.
322
407
  */
323
- static VALUE duplicate_document(int argc, VALUE *argv, VALUE self)
408
+ static VALUE
409
+ duplicate_document(int argc, VALUE *argv, VALUE self)
324
410
  {
325
411
  xmlDocPtr doc, dup;
326
412
  VALUE copy;
327
413
  VALUE level;
328
- VALUE error_list;
329
414
 
330
- if(rb_scan_args(argc, argv, "01", &level) == 0)
415
+ if (rb_scan_args(argc, argv, "01", &level) == 0) {
331
416
  level = INT2NUM((long)1);
417
+ }
332
418
 
333
- Data_Get_Struct(self, xmlDoc, doc);
419
+ doc = noko_xml_document_unwrap(self);
334
420
 
335
421
  dup = xmlCopyDoc(doc, (int)NUM2INT(level));
336
422
 
337
- if(dup == NULL) return Qnil;
423
+ if (dup == NULL) { return Qnil; }
338
424
 
339
425
  dup->type = doc->type;
340
- copy = Nokogiri_wrap_xml_document(rb_obj_class(self), dup);
341
- error_list = rb_iv_get(self, "@errors");
342
- rb_iv_set(copy, "@errors", error_list);
426
+ copy = noko_xml_document_wrap(rb_obj_class(self), dup);
427
+ rb_iv_set(copy, "@errors", rb_iv_get(self, "@errors"));
343
428
  return copy ;
344
429
  }
345
430
 
@@ -349,18 +434,18 @@ static VALUE duplicate_document(int argc, VALUE *argv, VALUE self)
349
434
  *
350
435
  * Create a new document with +version+ (defaults to "1.0")
351
436
  */
352
- static VALUE new(int argc, VALUE *argv, VALUE klass)
437
+ static VALUE
438
+ new (int argc, VALUE *argv, VALUE klass)
353
439
  {
354
440
  xmlDocPtr doc;
355
441
  VALUE version, rest, rb_doc ;
356
442
 
357
443
  rb_scan_args(argc, argv, "0*", &rest);
358
444
  version = rb_ary_entry(rest, (long)0);
359
- if (NIL_P(version)) version = rb_str_new2("1.0");
445
+ if (NIL_P(version)) { version = rb_str_new2("1.0"); }
360
446
 
361
447
  doc = xmlNewDoc((xmlChar *)StringValueCStr(version));
362
- rb_doc = Nokogiri_wrap_xml_document(klass, doc);
363
- rb_obj_call_init(rb_doc, argc, argv);
448
+ rb_doc = noko_xml_document_wrap_with_init_args(klass, doc, argc, argv);
364
449
  return rb_doc ;
365
450
  }
366
451
 
@@ -401,10 +486,10 @@ static VALUE new(int argc, VALUE *argv, VALUE klass)
401
486
  * please direct your browser to
402
487
  * http://tenderlovemaking.com/2009/04/23/namespaces-in-xml.html
403
488
  */
404
- VALUE remove_namespaces_bang(VALUE self)
489
+ static VALUE
490
+ remove_namespaces_bang(VALUE self)
405
491
  {
406
- xmlDocPtr doc ;
407
- Data_Get_Struct(self, xmlDoc, doc);
492
+ xmlDocPtr doc = noko_xml_document_unwrap(self);
408
493
 
409
494
  recursively_remove_namespaces_from_node((xmlNodePtr)doc);
410
495
  return self;
@@ -421,7 +506,8 @@ VALUE remove_namespaces_bang(VALUE self)
421
506
  * +external_id+, +system_id+, and +content+ set the External ID, System ID,
422
507
  * and content respectively. All of these parameters are optional.
423
508
  */
424
- static VALUE create_entity(int argc, VALUE *argv, VALUE self)
509
+ static VALUE
510
+ create_entity(int argc, VALUE *argv, VALUE self)
425
511
  {
426
512
  VALUE name;
427
513
  VALUE type;
@@ -431,55 +517,53 @@ static VALUE create_entity(int argc, VALUE *argv, VALUE self)
431
517
  xmlEntityPtr ptr;
432
518
  xmlDocPtr doc ;
433
519
 
434
- Data_Get_Struct(self, xmlDoc, doc);
520
+ doc = noko_xml_document_unwrap(self);
435
521
 
436
522
  rb_scan_args(argc, argv, "14", &name, &type, &external_id, &system_id,
437
- &content);
523
+ &content);
438
524
 
439
525
  xmlResetLastError();
440
526
  ptr = xmlAddDocEntity(
441
- doc,
442
- (xmlChar *)(NIL_P(name) ? NULL : StringValueCStr(name)),
443
- (int) (NIL_P(type) ? XML_INTERNAL_GENERAL_ENTITY : NUM2INT(type)),
444
- (xmlChar *)(NIL_P(external_id) ? NULL : StringValueCStr(external_id)),
445
- (xmlChar *)(NIL_P(system_id) ? NULL : StringValueCStr(system_id)),
446
- (xmlChar *)(NIL_P(content) ? NULL : StringValueCStr(content))
447
- );
448
-
449
- if(NULL == ptr) {
527
+ doc,
528
+ (xmlChar *)(NIL_P(name) ? NULL : StringValueCStr(name)),
529
+ (int)(NIL_P(type) ? XML_INTERNAL_GENERAL_ENTITY : NUM2INT(type)),
530
+ (xmlChar *)(NIL_P(external_id) ? NULL : StringValueCStr(external_id)),
531
+ (xmlChar *)(NIL_P(system_id) ? NULL : StringValueCStr(system_id)),
532
+ (xmlChar *)(NIL_P(content) ? NULL : StringValueCStr(content))
533
+ );
534
+
535
+ if (NULL == ptr) {
450
536
  xmlErrorPtr error = xmlGetLastError();
451
- if(error)
537
+ if (error) {
452
538
  rb_exc_raise(Nokogiri_wrap_xml_syntax_error(error));
453
- else
539
+ } else {
454
540
  rb_raise(rb_eRuntimeError, "Could not create entity");
541
+ }
455
542
 
456
543
  return Qnil;
457
544
  }
458
545
 
459
- return Nokogiri_wrap_xml_node(cNokogiriXmlEntityDecl, (xmlNodePtr)ptr);
546
+ return noko_xml_node_wrap(cNokogiriXmlEntityDecl, (xmlNodePtr)ptr);
460
547
  }
461
548
 
462
- static int block_caller(void * ctx, xmlNodePtr _node, xmlNodePtr _parent)
549
+ static int
550
+ block_caller(void *ctx, xmlNodePtr c_node, xmlNodePtr c_parent_node)
463
551
  {
464
- VALUE block;
465
- VALUE node;
466
- VALUE parent;
552
+ VALUE block = (VALUE)ctx;
553
+ VALUE rb_node;
554
+ VALUE rb_parent_node;
467
555
  VALUE ret;
468
556
 
469
- if(_node->type == XML_NAMESPACE_DECL){
470
- node = Nokogiri_wrap_xml_namespace(_parent->doc, (xmlNsPtr) _node);
471
- }
472
- else{
473
- node = Nokogiri_wrap_xml_node(Qnil, _node);
557
+ if (c_node->type == XML_NAMESPACE_DECL) {
558
+ rb_node = noko_xml_namespace_wrap((xmlNsPtr)c_node, c_parent_node->doc);
559
+ } else {
560
+ rb_node = noko_xml_node_wrap(Qnil, c_node);
474
561
  }
475
- parent = _parent ? Nokogiri_wrap_xml_node(Qnil, _parent) : Qnil;
476
- block = (VALUE)ctx;
562
+ rb_parent_node = c_parent_node ? noko_xml_node_wrap(Qnil, c_parent_node) : Qnil;
477
563
 
478
- ret = rb_funcall(block, rb_intern("call"), 2, node, parent);
564
+ ret = rb_funcall(block, rb_intern("call"), 2, rb_node, rb_parent_node);
479
565
 
480
- if(Qfalse == ret || Qnil == ret) return 0;
481
-
482
- return 1;
566
+ return (Qfalse == ret || Qnil == ret) ? 0 : 1;
483
567
  }
484
568
 
485
569
  /* call-seq:
@@ -492,117 +576,194 @@ static int block_caller(void * ctx, xmlNodePtr _node, xmlNodePtr _parent)
492
576
  * The block must return a non-nil, non-false value if the +obj+ passed in
493
577
  * should be included in the canonicalized document.
494
578
  */
495
- static VALUE canonicalize(int argc, VALUE* argv, VALUE self)
579
+ static VALUE
580
+ rb_xml_document_canonicalize(int argc, VALUE *argv, VALUE self)
496
581
  {
497
- VALUE mode;
498
- VALUE incl_ns;
499
- VALUE with_comments;
500
- xmlChar **ns;
501
- long ns_len, i;
582
+ VALUE rb_mode;
583
+ VALUE rb_namespaces;
584
+ VALUE rb_comments_p;
585
+ int c_mode = 0;
586
+ xmlChar **c_namespaces;
502
587
 
503
- xmlDocPtr doc;
504
- xmlOutputBufferPtr buf;
505
- xmlC14NIsVisibleCallback cb = NULL;
506
- void * ctx = NULL;
588
+ xmlDocPtr c_doc;
589
+ xmlOutputBufferPtr c_obuf;
590
+ xmlC14NIsVisibleCallback c_callback_wrapper = NULL;
591
+ void *rb_callback = NULL;
507
592
 
508
593
  VALUE rb_cStringIO;
509
- VALUE io;
594
+ VALUE rb_io;
510
595
 
511
- rb_scan_args(argc, argv, "03", &mode, &incl_ns, &with_comments);
596
+ rb_scan_args(argc, argv, "03", &rb_mode, &rb_namespaces, &rb_comments_p);
597
+ if (!NIL_P(rb_mode)) {
598
+ Check_Type(rb_mode, T_FIXNUM);
599
+ c_mode = NUM2INT(rb_mode);
600
+ }
601
+ if (!NIL_P(rb_namespaces)) {
602
+ Check_Type(rb_namespaces, T_ARRAY);
603
+ if (c_mode == XML_C14N_1_0 || c_mode == XML_C14N_1_1) {
604
+ rb_raise(rb_eRuntimeError, "This canonicalizer does not support this operation");
605
+ }
606
+ }
512
607
 
513
- Data_Get_Struct(self, xmlDoc, doc);
608
+ c_doc = noko_xml_document_unwrap(self);
514
609
 
515
610
  rb_cStringIO = rb_const_get_at(rb_cObject, rb_intern("StringIO"));
516
- io = rb_class_new_instance(0, 0, rb_cStringIO);
517
- buf = xmlAllocOutputBuffer(NULL);
611
+ rb_io = rb_class_new_instance(0, 0, rb_cStringIO);
612
+ c_obuf = xmlAllocOutputBuffer(NULL);
518
613
 
519
- buf->writecallback = (xmlOutputWriteCallback)io_write_callback;
520
- buf->closecallback = (xmlOutputCloseCallback)io_close_callback;
521
- buf->context = (void *)io;
614
+ c_obuf->writecallback = (xmlOutputWriteCallback)noko_io_write;
615
+ c_obuf->closecallback = (xmlOutputCloseCallback)noko_io_close;
616
+ c_obuf->context = (void *)rb_io;
522
617
 
523
- if(rb_block_given_p()) {
524
- cb = block_caller;
525
- ctx = (void *)rb_block_proc();
618
+ if (rb_block_given_p()) {
619
+ c_callback_wrapper = block_caller;
620
+ rb_callback = (void *)rb_block_proc();
526
621
  }
527
622
 
528
- if(NIL_P(incl_ns)){
529
- ns = NULL;
530
- }
531
- else{
532
- Check_Type(incl_ns, T_ARRAY);
533
- ns_len = RARRAY_LEN(incl_ns);
534
- ns = calloc((size_t)ns_len+1, sizeof(xmlChar *));
535
- for (i = 0 ; i < ns_len ; i++) {
536
- VALUE entry = rb_ary_entry(incl_ns, i);
537
- ns[i] = (xmlChar*)StringValueCStr(entry);
623
+ if (NIL_P(rb_namespaces)) {
624
+ c_namespaces = NULL;
625
+ } else {
626
+ long ns_len = RARRAY_LEN(rb_namespaces);
627
+ c_namespaces = ruby_xcalloc((size_t)ns_len + 1, sizeof(xmlChar *));
628
+ for (int j = 0 ; j < ns_len ; j++) {
629
+ VALUE entry = rb_ary_entry(rb_namespaces, j);
630
+ c_namespaces[j] = (xmlChar *)StringValueCStr(entry);
538
631
  }
539
632
  }
540
633
 
634
+ xmlC14NExecute(c_doc, c_callback_wrapper, rb_callback,
635
+ c_mode,
636
+ c_namespaces,
637
+ (int)RTEST(rb_comments_p),
638
+ c_obuf);
639
+
640
+ ruby_xfree(c_namespaces);
641
+ xmlOutputBufferClose(c_obuf);
642
+
643
+ return rb_funcall(rb_io, rb_intern("string"), 0);
644
+ }
645
+
646
+ VALUE
647
+ noko_xml_document_wrap_with_init_args(VALUE klass, xmlDocPtr c_document, int argc, VALUE *argv)
648
+ {
649
+ VALUE rb_document;
650
+ nokogiriTuplePtr tuple;
651
+
652
+ if (!klass) {
653
+ klass = cNokogiriXmlDocument;
654
+ }
655
+
656
+ rb_document = TypedData_Wrap_Struct(klass, &noko_xml_document_data_type, c_document);
657
+
658
+ tuple = (nokogiriTuplePtr)ruby_xmalloc(sizeof(nokogiriTuple));
659
+ tuple->doc = rb_document;
660
+ tuple->unlinkedNodes = st_init_numtable_with_size(128);
661
+ tuple->node_cache = rb_ary_new();
662
+
663
+ c_document->_private = tuple ;
541
664
 
542
- xmlC14NExecute(doc, cb, ctx,
543
- (int) (NIL_P(mode) ? 0 : NUM2INT(mode)),
544
- ns,
545
- (int) RTEST(with_comments),
546
- buf);
665
+ rb_iv_set(rb_document, "@decorators", Qnil);
666
+ rb_iv_set(rb_document, "@errors", Qnil);
667
+ rb_iv_set(rb_document, "@node_cache", tuple->node_cache);
547
668
 
548
- xmlOutputBufferClose(buf);
669
+ rb_obj_call_init(rb_document, argc, argv);
549
670
 
550
- return rb_funcall(io, rb_intern("string"), 0);
671
+ return rb_document ;
551
672
  }
552
673
 
553
- VALUE cNokogiriXmlDocument ;
554
- void init_xml_document()
674
+
675
+ /* deprecated. use noko_xml_document_wrap() instead. */
676
+ VALUE
677
+ Nokogiri_wrap_xml_document(VALUE klass, xmlDocPtr doc)
555
678
  {
556
- VALUE nokogiri = rb_define_module("Nokogiri");
557
- VALUE xml = rb_define_module_under(nokogiri, "XML");
558
- VALUE node = rb_define_class_under(xml, "Node", rb_cObject);
679
+ /* TODO: deprecate this method in v2.0 */
680
+ return noko_xml_document_wrap_with_init_args(klass, doc, 0, NULL);
681
+ }
559
682
 
560
- /*
561
- * Nokogiri::XML::Document wraps an xml document.
562
- */
563
- VALUE klass = rb_define_class_under(xml, "Document", node);
564
-
565
- cNokogiriXmlDocument = klass;
566
-
567
- rb_define_singleton_method(klass, "read_memory", read_memory, 4);
568
- rb_define_singleton_method(klass, "read_io", read_io, 4);
569
- rb_define_singleton_method(klass, "new", new, -1);
570
-
571
- rb_define_method(klass, "root", root, 0);
572
- rb_define_method(klass, "root=", set_root, 1);
573
- rb_define_method(klass, "encoding", encoding, 0);
574
- rb_define_method(klass, "encoding=", set_encoding, 1);
575
- rb_define_method(klass, "version", version, 0);
576
- rb_define_method(klass, "canonicalize", canonicalize, -1);
577
- rb_define_method(klass, "dup", duplicate_document, -1);
578
- rb_define_method(klass, "url", url, 0);
579
- rb_define_method(klass, "create_entity", create_entity, -1);
580
- rb_define_method(klass, "remove_namespaces!", remove_namespaces_bang, 0);
683
+ VALUE
684
+ noko_xml_document_wrap(VALUE klass, xmlDocPtr doc)
685
+ {
686
+ return noko_xml_document_wrap_with_init_args(klass, doc, 0, NULL);
581
687
  }
582
688
 
689
+ xmlDocPtr
690
+ noko_xml_document_unwrap(VALUE rb_document)
691
+ {
692
+ xmlDocPtr c_document;
693
+ TypedData_Get_Struct(rb_document, xmlDoc, &noko_xml_document_data_type, c_document);
694
+ return c_document;
695
+ }
583
696
 
584
- /* this takes klass as a param because it's used for HtmlDocument, too. */
585
- VALUE Nokogiri_wrap_xml_document(VALUE klass, xmlDocPtr doc)
697
+ /* Schema creation will remove and deallocate "blank" nodes.
698
+ * If those blank nodes have been exposed to Ruby, they could get freed
699
+ * out from under the VALUE pointer. This function checks to see if any of
700
+ * those nodes have been exposed to Ruby, and if so we should raise an exception.
701
+ */
702
+ int
703
+ noko_xml_document_has_wrapped_blank_nodes_p(xmlDocPtr c_document)
586
704
  {
587
- nokogiriTuplePtr tuple = (nokogiriTuplePtr)malloc(sizeof(nokogiriTuple));
705
+ VALUE cache = DOC_NODE_CACHE(c_document);
588
706
 
589
- VALUE rb_doc = Data_Wrap_Struct(
590
- klass ? klass : cNokogiriXmlDocument,
591
- 0,
592
- dealloc,
593
- doc
594
- );
707
+ if (NIL_P(cache)) {
708
+ return 0;
709
+ }
595
710
 
596
- VALUE cache = rb_ary_new();
597
- rb_iv_set(rb_doc, "@decorators", Qnil);
598
- rb_iv_set(rb_doc, "@node_cache", cache);
711
+ for (long jnode = 0; jnode < RARRAY_LEN(cache); jnode++) {
712
+ xmlNodePtr node;
713
+ VALUE element = rb_ary_entry(cache, jnode);
599
714
 
600
- tuple->doc = rb_doc;
601
- tuple->unlinkedNodes = st_init_numtable_with_size(128);
602
- tuple->node_cache = cache;
603
- doc->_private = tuple ;
715
+ Noko_Node_Get_Struct(element, xmlNode, node);
716
+ if (xmlIsBlankNode(node)) {
717
+ return 1;
718
+ }
719
+ }
604
720
 
605
- rb_obj_call_init(rb_doc, 0, NULL);
721
+ return 0;
722
+ }
606
723
 
607
- return rb_doc ;
724
+ void
725
+ noko_xml_document_pin_node(xmlNodePtr node)
726
+ {
727
+ xmlDocPtr doc;
728
+ nokogiriTuplePtr tuple;
729
+
730
+ doc = node->doc;
731
+ tuple = (nokogiriTuplePtr)doc->_private;
732
+ st_insert(tuple->unlinkedNodes, (st_data_t)node, (st_data_t)node);
733
+ }
734
+
735
+
736
+ void
737
+ noko_xml_document_pin_namespace(xmlNsPtr ns, xmlDocPtr doc)
738
+ {
739
+ nokogiriTuplePtr tuple;
740
+
741
+ tuple = (nokogiriTuplePtr)doc->_private;
742
+ st_insert(tuple->unlinkedNodes, (st_data_t)ns, (st_data_t)ns);
743
+ }
744
+
745
+
746
+ void
747
+ noko_init_xml_document(void)
748
+ {
749
+ assert(cNokogiriXmlNode);
750
+ /*
751
+ * Nokogiri::XML::Document wraps an xml document.
752
+ */
753
+ cNokogiriXmlDocument = rb_define_class_under(mNokogiriXml, "Document", cNokogiriXmlNode);
754
+
755
+ rb_define_singleton_method(cNokogiriXmlDocument, "read_memory", read_memory, 4);
756
+ rb_define_singleton_method(cNokogiriXmlDocument, "read_io", read_io, 4);
757
+ rb_define_singleton_method(cNokogiriXmlDocument, "new", new, -1);
758
+
759
+ rb_define_method(cNokogiriXmlDocument, "root", rb_xml_document_root, 0);
760
+ rb_define_method(cNokogiriXmlDocument, "root=", rb_xml_document_root_set, 1);
761
+ rb_define_method(cNokogiriXmlDocument, "encoding", encoding, 0);
762
+ rb_define_method(cNokogiriXmlDocument, "encoding=", set_encoding, 1);
763
+ rb_define_method(cNokogiriXmlDocument, "version", version, 0);
764
+ rb_define_method(cNokogiriXmlDocument, "canonicalize", rb_xml_document_canonicalize, -1);
765
+ rb_define_method(cNokogiriXmlDocument, "dup", duplicate_document, -1);
766
+ rb_define_method(cNokogiriXmlDocument, "url", url, 0);
767
+ rb_define_method(cNokogiriXmlDocument, "create_entity", create_entity, -1);
768
+ rb_define_method(cNokogiriXmlDocument, "remove_namespaces!", remove_namespaces_bang, 0);
608
769
  }