nokogiri 1.10.9 → 1.18.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (230) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +38 -0
  3. data/LICENSE-DEPENDENCIES.md +1632 -1022
  4. data/LICENSE.md +1 -1
  5. data/README.md +190 -95
  6. data/bin/nokogiri +63 -50
  7. data/dependencies.yml +34 -66
  8. data/ext/nokogiri/depend +38 -358
  9. data/ext/nokogiri/extconf.rb +909 -422
  10. data/ext/nokogiri/gumbo.c +610 -0
  11. data/ext/nokogiri/html4_document.c +171 -0
  12. data/ext/nokogiri/html4_element_description.c +299 -0
  13. data/ext/nokogiri/html4_entity_lookup.c +37 -0
  14. data/ext/nokogiri/html4_sax_parser.c +40 -0
  15. data/ext/nokogiri/html4_sax_parser_context.c +98 -0
  16. data/ext/nokogiri/html4_sax_push_parser.c +96 -0
  17. data/ext/nokogiri/libxml2_polyfill.c +114 -0
  18. data/ext/nokogiri/nokogiri.c +258 -105
  19. data/ext/nokogiri/nokogiri.h +207 -90
  20. data/ext/nokogiri/test_global_handlers.c +40 -0
  21. data/ext/nokogiri/xml_attr.c +18 -18
  22. data/ext/nokogiri/xml_attribute_decl.c +22 -22
  23. data/ext/nokogiri/xml_cdata.c +33 -33
  24. data/ext/nokogiri/xml_comment.c +19 -31
  25. data/ext/nokogiri/xml_document.c +499 -323
  26. data/ext/nokogiri/xml_document_fragment.c +17 -36
  27. data/ext/nokogiri/xml_dtd.c +65 -59
  28. data/ext/nokogiri/xml_element_content.c +63 -55
  29. data/ext/nokogiri/xml_element_decl.c +31 -31
  30. data/ext/nokogiri/xml_encoding_handler.c +54 -21
  31. data/ext/nokogiri/xml_entity_decl.c +37 -35
  32. data/ext/nokogiri/xml_entity_reference.c +17 -19
  33. data/ext/nokogiri/xml_namespace.c +131 -61
  34. data/ext/nokogiri/xml_node.c +1429 -723
  35. data/ext/nokogiri/xml_node_set.c +257 -225
  36. data/ext/nokogiri/xml_processing_instruction.c +18 -20
  37. data/ext/nokogiri/xml_reader.c +340 -231
  38. data/ext/nokogiri/xml_relax_ng.c +87 -99
  39. data/ext/nokogiri/xml_sax_parser.c +269 -176
  40. data/ext/nokogiri/xml_sax_parser_context.c +286 -152
  41. data/ext/nokogiri/xml_sax_push_parser.c +111 -64
  42. data/ext/nokogiri/xml_schema.c +132 -140
  43. data/ext/nokogiri/xml_syntax_error.c +52 -23
  44. data/ext/nokogiri/xml_text.c +37 -30
  45. data/ext/nokogiri/xml_xpath_context.c +373 -185
  46. data/ext/nokogiri/xslt_stylesheet.c +342 -191
  47. data/gumbo-parser/CHANGES.md +63 -0
  48. data/gumbo-parser/Makefile +129 -0
  49. data/gumbo-parser/THANKS +27 -0
  50. data/gumbo-parser/src/Makefile +34 -0
  51. data/gumbo-parser/src/README.md +41 -0
  52. data/gumbo-parser/src/ascii.c +75 -0
  53. data/gumbo-parser/src/ascii.h +115 -0
  54. data/gumbo-parser/src/attribute.c +42 -0
  55. data/gumbo-parser/src/attribute.h +17 -0
  56. data/gumbo-parser/src/char_ref.c +22225 -0
  57. data/gumbo-parser/src/char_ref.h +29 -0
  58. data/gumbo-parser/src/char_ref.rl +2154 -0
  59. data/gumbo-parser/src/error.c +658 -0
  60. data/gumbo-parser/src/error.h +152 -0
  61. data/gumbo-parser/src/foreign_attrs.c +103 -0
  62. data/gumbo-parser/src/foreign_attrs.gperf +27 -0
  63. data/gumbo-parser/src/insertion_mode.h +33 -0
  64. data/gumbo-parser/src/macros.h +91 -0
  65. data/gumbo-parser/src/nokogiri_gumbo.h +953 -0
  66. data/gumbo-parser/src/parser.c +4932 -0
  67. data/gumbo-parser/src/parser.h +41 -0
  68. data/gumbo-parser/src/replacement.h +33 -0
  69. data/gumbo-parser/src/string_buffer.c +103 -0
  70. data/gumbo-parser/src/string_buffer.h +68 -0
  71. data/gumbo-parser/src/string_piece.c +48 -0
  72. data/gumbo-parser/src/svg_attrs.c +174 -0
  73. data/gumbo-parser/src/svg_attrs.gperf +77 -0
  74. data/gumbo-parser/src/svg_tags.c +137 -0
  75. data/gumbo-parser/src/svg_tags.gperf +55 -0
  76. data/gumbo-parser/src/tag.c +223 -0
  77. data/gumbo-parser/src/tag_lookup.c +382 -0
  78. data/gumbo-parser/src/tag_lookup.gperf +170 -0
  79. data/gumbo-parser/src/tag_lookup.h +13 -0
  80. data/gumbo-parser/src/token_buffer.c +79 -0
  81. data/gumbo-parser/src/token_buffer.h +71 -0
  82. data/gumbo-parser/src/token_type.h +17 -0
  83. data/gumbo-parser/src/tokenizer.c +3464 -0
  84. data/gumbo-parser/src/tokenizer.h +112 -0
  85. data/gumbo-parser/src/tokenizer_states.h +339 -0
  86. data/gumbo-parser/src/utf8.c +245 -0
  87. data/gumbo-parser/src/utf8.h +164 -0
  88. data/gumbo-parser/src/util.c +66 -0
  89. data/gumbo-parser/src/util.h +34 -0
  90. data/gumbo-parser/src/vector.c +111 -0
  91. data/gumbo-parser/src/vector.h +45 -0
  92. data/lib/nokogiri/class_resolver.rb +67 -0
  93. data/lib/nokogiri/css/node.rb +14 -8
  94. data/lib/nokogiri/css/parser.rb +399 -377
  95. data/lib/nokogiri/css/parser.y +250 -245
  96. data/lib/nokogiri/css/parser_extras.rb +16 -71
  97. data/lib/nokogiri/css/selector_cache.rb +38 -0
  98. data/lib/nokogiri/css/syntax_error.rb +3 -1
  99. data/lib/nokogiri/css/tokenizer.rb +7 -5
  100. data/lib/nokogiri/css/tokenizer.rex +11 -9
  101. data/lib/nokogiri/css/xpath_visitor.rb +242 -96
  102. data/lib/nokogiri/css.rb +122 -17
  103. data/lib/nokogiri/decorators/slop.rb +11 -11
  104. data/lib/nokogiri/encoding_handler.rb +57 -0
  105. data/lib/nokogiri/extension.rb +32 -0
  106. data/lib/nokogiri/gumbo.rb +15 -0
  107. data/lib/nokogiri/html.rb +38 -27
  108. data/lib/nokogiri/{html → html4}/builder.rb +4 -2
  109. data/lib/nokogiri/html4/document.rb +235 -0
  110. data/lib/nokogiri/html4/document_fragment.rb +166 -0
  111. data/lib/nokogiri/{html → html4}/element_description.rb +3 -1
  112. data/lib/nokogiri/html4/element_description_defaults.rb +2040 -0
  113. data/lib/nokogiri/html4/encoding_reader.rb +121 -0
  114. data/lib/nokogiri/{html → html4}/entity_lookup.rb +4 -2
  115. data/lib/nokogiri/html4/sax/parser.rb +48 -0
  116. data/lib/nokogiri/html4/sax/parser_context.rb +15 -0
  117. data/lib/nokogiri/{html → html4}/sax/push_parser.rb +12 -11
  118. data/lib/nokogiri/html4.rb +42 -0
  119. data/lib/nokogiri/html5/builder.rb +40 -0
  120. data/lib/nokogiri/html5/document.rb +199 -0
  121. data/lib/nokogiri/html5/document_fragment.rb +200 -0
  122. data/lib/nokogiri/html5/node.rb +103 -0
  123. data/lib/nokogiri/html5.rb +368 -0
  124. data/lib/nokogiri/jruby/dependencies.rb +3 -0
  125. data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
  126. data/lib/nokogiri/syntax_error.rb +2 -0
  127. data/lib/nokogiri/version/constant.rb +6 -0
  128. data/lib/nokogiri/version/info.rb +224 -0
  129. data/lib/nokogiri/version.rb +3 -108
  130. data/lib/nokogiri/xml/attr.rb +55 -3
  131. data/lib/nokogiri/xml/attribute_decl.rb +6 -2
  132. data/lib/nokogiri/xml/builder.rb +83 -35
  133. data/lib/nokogiri/xml/cdata.rb +3 -1
  134. data/lib/nokogiri/xml/character_data.rb +2 -0
  135. data/lib/nokogiri/xml/document.rb +359 -130
  136. data/lib/nokogiri/xml/document_fragment.rb +170 -54
  137. data/lib/nokogiri/xml/dtd.rb +4 -2
  138. data/lib/nokogiri/xml/element_content.rb +12 -2
  139. data/lib/nokogiri/xml/element_decl.rb +6 -2
  140. data/lib/nokogiri/xml/entity_decl.rb +7 -3
  141. data/lib/nokogiri/xml/entity_reference.rb +2 -0
  142. data/lib/nokogiri/xml/namespace.rb +44 -0
  143. data/lib/nokogiri/xml/node/save_options.rb +23 -8
  144. data/lib/nokogiri/xml/node.rb +1168 -420
  145. data/lib/nokogiri/xml/node_set.rb +145 -67
  146. data/lib/nokogiri/xml/notation.rb +13 -0
  147. data/lib/nokogiri/xml/parse_options.rb +145 -52
  148. data/lib/nokogiri/xml/pp/character_data.rb +9 -6
  149. data/lib/nokogiri/xml/pp/node.rb +47 -30
  150. data/lib/nokogiri/xml/pp.rb +4 -2
  151. data/lib/nokogiri/xml/processing_instruction.rb +4 -1
  152. data/lib/nokogiri/xml/reader.rb +68 -41
  153. data/lib/nokogiri/xml/relax_ng.rb +60 -17
  154. data/lib/nokogiri/xml/sax/document.rb +198 -111
  155. data/lib/nokogiri/xml/sax/parser.rb +144 -67
  156. data/lib/nokogiri/xml/sax/parser_context.rb +119 -6
  157. data/lib/nokogiri/xml/sax/push_parser.rb +9 -5
  158. data/lib/nokogiri/xml/sax.rb +54 -4
  159. data/lib/nokogiri/xml/schema.rb +116 -39
  160. data/lib/nokogiri/xml/searchable.rb +139 -95
  161. data/lib/nokogiri/xml/syntax_error.rb +29 -5
  162. data/lib/nokogiri/xml/text.rb +2 -0
  163. data/lib/nokogiri/xml/xpath/syntax_error.rb +4 -2
  164. data/lib/nokogiri/xml/xpath.rb +15 -4
  165. data/lib/nokogiri/xml/xpath_context.rb +15 -4
  166. data/lib/nokogiri/xml.rb +45 -55
  167. data/lib/nokogiri/xslt/stylesheet.rb +32 -8
  168. data/lib/nokogiri/xslt.rb +103 -30
  169. data/lib/nokogiri.rb +59 -75
  170. data/lib/xsd/xmlparser/nokogiri.rb +32 -29
  171. data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
  172. data/patches/libxml2/0010-update-config.guess-and-config.sub-for-libxml2.patch +224 -0
  173. data/patches/libxml2/0011-rip-out-libxml2-s-libc_single_threaded-support.patch +30 -0
  174. data/patches/libxml2/0019-xpath-Use-separate-static-hash-table-for-standard-fu.patch +244 -0
  175. data/patches/libxslt/0001-update-config.guess-and-config.sub-for-libxslt.patch +224 -0
  176. data/ports/archives/libxml2-2.13.6.tar.xz +0 -0
  177. data/ports/archives/libxslt-1.1.42.tar.xz +0 -0
  178. metadata +123 -295
  179. data/ext/nokogiri/html_document.c +0 -170
  180. data/ext/nokogiri/html_document.h +0 -10
  181. data/ext/nokogiri/html_element_description.c +0 -279
  182. data/ext/nokogiri/html_element_description.h +0 -10
  183. data/ext/nokogiri/html_entity_lookup.c +0 -32
  184. data/ext/nokogiri/html_entity_lookup.h +0 -8
  185. data/ext/nokogiri/html_sax_parser_context.c +0 -116
  186. data/ext/nokogiri/html_sax_parser_context.h +0 -11
  187. data/ext/nokogiri/html_sax_push_parser.c +0 -87
  188. data/ext/nokogiri/html_sax_push_parser.h +0 -9
  189. data/ext/nokogiri/xml_attr.h +0 -9
  190. data/ext/nokogiri/xml_attribute_decl.h +0 -9
  191. data/ext/nokogiri/xml_cdata.h +0 -9
  192. data/ext/nokogiri/xml_comment.h +0 -9
  193. data/ext/nokogiri/xml_document.h +0 -23
  194. data/ext/nokogiri/xml_document_fragment.h +0 -10
  195. data/ext/nokogiri/xml_dtd.h +0 -10
  196. data/ext/nokogiri/xml_element_content.h +0 -10
  197. data/ext/nokogiri/xml_element_decl.h +0 -9
  198. data/ext/nokogiri/xml_encoding_handler.h +0 -8
  199. data/ext/nokogiri/xml_entity_decl.h +0 -10
  200. data/ext/nokogiri/xml_entity_reference.h +0 -9
  201. data/ext/nokogiri/xml_io.c +0 -61
  202. data/ext/nokogiri/xml_io.h +0 -11
  203. data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
  204. data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
  205. data/ext/nokogiri/xml_namespace.h +0 -14
  206. data/ext/nokogiri/xml_node.h +0 -13
  207. data/ext/nokogiri/xml_node_set.h +0 -12
  208. data/ext/nokogiri/xml_processing_instruction.h +0 -9
  209. data/ext/nokogiri/xml_reader.h +0 -10
  210. data/ext/nokogiri/xml_relax_ng.h +0 -9
  211. data/ext/nokogiri/xml_sax_parser.h +0 -39
  212. data/ext/nokogiri/xml_sax_parser_context.h +0 -10
  213. data/ext/nokogiri/xml_sax_push_parser.h +0 -9
  214. data/ext/nokogiri/xml_schema.h +0 -9
  215. data/ext/nokogiri/xml_syntax_error.h +0 -13
  216. data/ext/nokogiri/xml_text.h +0 -9
  217. data/ext/nokogiri/xml_xpath_context.h +0 -10
  218. data/ext/nokogiri/xslt_stylesheet.h +0 -14
  219. data/lib/nokogiri/html/document.rb +0 -335
  220. data/lib/nokogiri/html/document_fragment.rb +0 -49
  221. data/lib/nokogiri/html/element_description_defaults.rb +0 -671
  222. data/lib/nokogiri/html/sax/parser.rb +0 -62
  223. data/lib/nokogiri/html/sax/parser_context.rb +0 -16
  224. data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +0 -78
  225. data/patches/libxml2/0004-libxml2.la-is-in-top_builddir.patch +0 -25
  226. data/patches/libxml2/0005-Fix-infinite-loop-in-xmlStringLenDecodeEntities.patch +0 -32
  227. data/ports/archives/libxml2-2.9.10.tar.gz +0 -0
  228. data/ports/archives/libxslt-1.1.34.tar.gz +0 -0
  229. /data/patches/libxml2/{0002-Remove-script-macro-support.patch → 0001-Remove-script-macro-support.patch} +0 -0
  230. /data/patches/libxml2/{0003-Update-entities-to-remove-handling-of-ssi.patch → 0002-Update-entities-to-remove-handling-of-ssi.patch} +0 -0
@@ -1,91 +1,225 @@
1
- #include <xml_document.h>
2
-
3
- static int dealloc_node_i(xmlNodePtr key, xmlNodePtr node, xmlDocPtr doc)
4
- {
5
- switch(node->type) {
6
- case XML_ATTRIBUTE_NODE:
7
- xmlFreePropList((xmlAttrPtr)node);
8
- break;
9
- case XML_NAMESPACE_DECL:
10
- xmlFreeNs((xmlNsPtr)node);
11
- break;
12
- case XML_DTD_NODE:
13
- xmlFreeDtd((xmlDtdPtr)node);
14
- break;
15
- default:
16
- if(node->parent == NULL) {
17
- xmlAddChild((xmlNodePtr)doc, node);
18
- }
1
+ #include <nokogiri.h>
2
+
3
+ VALUE cNokogiriXmlDocument ;
4
+
5
+ static int
6
+ dealloc_node_i2(xmlNodePtr key, xmlNodePtr node, xmlDocPtr doc)
7
+ {
8
+ switch (node->type) {
9
+ case XML_ATTRIBUTE_NODE:
10
+ xmlFreePropList((xmlAttrPtr)node);
11
+ break;
12
+ case XML_NAMESPACE_DECL:
13
+ xmlFreeNs((xmlNsPtr)node);
14
+ break;
15
+ case XML_DTD_NODE:
16
+ xmlFreeDtd((xmlDtdPtr)node);
17
+ break;
18
+ default:
19
+ if (node->parent == NULL) {
20
+ node->next = NULL;
21
+ node->prev = NULL;
22
+ xmlAddChild((xmlNodePtr)doc, node);
23
+ }
19
24
  }
20
25
  return ST_CONTINUE;
21
26
  }
22
27
 
23
- static void remove_private(xmlNodePtr node)
28
+ static int
29
+ dealloc_node_i(st_data_t key, st_data_t node, st_data_t doc)
30
+ {
31
+ return dealloc_node_i2((xmlNodePtr)key, (xmlNodePtr)node, (xmlDocPtr)doc);
32
+ }
33
+
34
+ static void
35
+ remove_private(xmlNodePtr node)
24
36
  {
25
37
  xmlNodePtr child;
26
38
 
27
- for (child = node->children; child; child = child->next)
39
+ for (child = node->children; child; child = child->next) {
28
40
  remove_private(child);
41
+ }
29
42
 
30
43
  if ((node->type == XML_ELEMENT_NODE ||
31
44
  node->type == XML_XINCLUDE_START ||
32
45
  node->type == XML_XINCLUDE_END) &&
33
46
  node->properties) {
34
- for (child = (xmlNodePtr)node->properties; child; child = child->next)
47
+ for (child = (xmlNodePtr)node->properties; child; child = child->next) {
35
48
  remove_private(child);
49
+ }
36
50
  }
37
51
 
38
52
  node->_private = NULL;
39
53
  }
40
54
 
41
- static void dealloc(xmlDocPtr doc)
55
+ static void
56
+ mark(void *data)
42
57
  {
43
- st_table *node_hash;
58
+ xmlDocPtr doc = (xmlDocPtr)data;
59
+ nokogiriTuplePtr tuple = (nokogiriTuplePtr)doc->_private;
60
+ if (tuple) {
61
+ rb_gc_mark(tuple->doc);
62
+ rb_gc_mark(tuple->node_cache);
63
+ }
64
+ }
44
65
 
45
- NOKOGIRI_DEBUG_START(doc);
66
+ static void
67
+ dealloc(void *data)
68
+ {
69
+ xmlDocPtr doc = (xmlDocPtr)data;
70
+ st_table *node_hash;
46
71
 
47
72
  node_hash = DOC_UNLINKED_NODE_HASH(doc);
48
73
 
49
74
  st_foreach(node_hash, dealloc_node_i, (st_data_t)doc);
50
75
  st_free_table(node_hash);
51
76
 
52
- free(doc->_private);
77
+ ruby_xfree(doc->_private);
53
78
 
54
- /* When both Nokogiri and libxml-ruby are loaded, make sure that all nodes
55
- * have their _private pointers cleared. This is to avoid libxml-ruby's
56
- * xmlDeregisterNode callback from accessing VALUE pointers from ruby's GC
57
- * free context, which can result in segfaults.
79
+ #if defined(__GNUC__) && __GNUC__ >= 5
80
+ #pragma GCC diagnostic push
81
+ #pragma GCC diagnostic ignored "-Wdeprecated-declarations" // xmlDeregisterNodeDefault is deprecated as of libxml2 2.11.0
82
+ #endif
83
+ /*
84
+ * libxml-ruby < 3.0.0 uses xmlDeregisterNodeDefault. If the user is using one of those older
85
+ * versions, the registered callback from libxml-ruby will access the _private pointers set by
86
+ * nokogiri, which will result in segfaults.
87
+ *
88
+ * To avoid this, we need to clear the _private pointers from all nodes in this document tree
89
+ * before that callback gets invoked.
90
+ *
91
+ * libxml-ruby 3.0.0 was released in 2017-02, so at some point we can probably safely remove this
92
+ * safeguard (though probably pairing with a runtime check on the libxml-ruby version).
58
93
  */
59
- if (xmlDeregisterNodeDefaultValue)
94
+ if (xmlDeregisterNodeDefaultValue) {
60
95
  remove_private((xmlNodePtr)doc);
96
+ }
97
+ #if defined(__GNUC__) && __GNUC__ >= 5
98
+ #pragma GCC diagnostic pop
99
+ #endif
61
100
 
62
101
  xmlFreeDoc(doc);
102
+ }
63
103
 
64
- NOKOGIRI_DEBUG_END(doc);
104
+ static size_t
105
+ memsize_node(const xmlNodePtr node)
106
+ {
107
+ /* note we don't count namespace definitions, just going for a good-enough number here */
108
+ xmlNodePtr child;
109
+ xmlAttrPtr property;
110
+ size_t memsize = 0;
111
+
112
+ memsize += (size_t)xmlStrlen(node->name);
113
+
114
+ if (node->type == XML_ELEMENT_NODE) {
115
+ for (property = node->properties; property; property = property->next) {
116
+ memsize += sizeof(xmlAttr) + memsize_node((xmlNodePtr)property);
117
+ }
118
+ }
119
+ if (node->type == XML_TEXT_NODE) {
120
+ memsize += (size_t)xmlStrlen(node->content);
121
+ }
122
+ for (child = node->children; child; child = child->next) {
123
+ memsize += sizeof(xmlNode) + memsize_node(child);
124
+ }
125
+ return memsize;
126
+ }
127
+
128
+ static size_t
129
+ memsize(const void *data)
130
+ {
131
+ xmlDocPtr doc = (const xmlDocPtr)data;
132
+ size_t memsize = sizeof(xmlDoc);
133
+ /* This may not account for all memory use */
134
+ memsize += memsize_node((xmlNodePtr)doc);
135
+ return memsize;
65
136
  }
66
137
 
67
- static void recursively_remove_namespaces_from_node(xmlNodePtr node)
138
+ static const rb_data_type_t xml_doc_type = {
139
+ .wrap_struct_name = "xmlDoc",
140
+ .function = {
141
+ .dmark = mark,
142
+ .dfree = dealloc,
143
+ .dsize = memsize,
144
+ },
145
+ // .flags = RUBY_TYPED_FREE_IMMEDIATELY, // TODO see https://github.com/sparklemotion/nokogiri/issues/2822
146
+ };
147
+
148
+ static VALUE
149
+ _xml_document_alloc(VALUE klass)
150
+ {
151
+ return TypedData_Wrap_Struct(klass, &xml_doc_type, NULL);
152
+ }
153
+
154
+ static void
155
+ _xml_document_data_ptr_set(VALUE rb_document, xmlDocPtr c_document)
156
+ {
157
+ nokogiriTuplePtr tuple;
158
+
159
+ assert(DATA_PTR(rb_document) == NULL);
160
+ assert(c_document->_private == NULL);
161
+
162
+ DATA_PTR(rb_document) = c_document;
163
+
164
+ tuple = (nokogiriTuplePtr)ruby_xmalloc(sizeof(nokogiriTuple));
165
+ tuple->doc = rb_document;
166
+ tuple->unlinkedNodes = st_init_numtable_with_size(128);
167
+ tuple->node_cache = rb_ary_new();
168
+
169
+ c_document->_private = tuple ;
170
+
171
+ rb_iv_set(rb_document, "@node_cache", tuple->node_cache);
172
+
173
+ return;
174
+ }
175
+
176
+ /* :nodoc: */
177
+ static VALUE
178
+ rb_xml_document_initialize_copy_with_args(VALUE rb_self, VALUE rb_other, VALUE rb_level)
179
+ {
180
+ xmlDocPtr c_other, c_self;
181
+ int c_level;
182
+
183
+ c_other = noko_xml_document_unwrap(rb_other);
184
+ c_level = (int)NUM2INT(rb_level);
185
+
186
+ c_self = xmlCopyDoc(c_other, c_level);
187
+ if (c_self == NULL) { return Qnil; }
188
+
189
+ c_self->type = c_other->type;
190
+ _xml_document_data_ptr_set(rb_self, c_self);
191
+
192
+ return rb_self ;
193
+ }
194
+
195
+ static void
196
+ recursively_remove_namespaces_from_node(xmlNodePtr node)
68
197
  {
69
198
  xmlNodePtr child ;
70
199
  xmlAttrPtr property ;
71
200
 
72
201
  xmlSetNs(node, NULL);
73
202
 
74
- for (child = node->children ; child ; child = child->next)
203
+ for (child = node->children ; child ; child = child->next) {
75
204
  recursively_remove_namespaces_from_node(child);
205
+ }
76
206
 
77
207
  if (((node->type == XML_ELEMENT_NODE) ||
78
208
  (node->type == XML_XINCLUDE_START) ||
79
209
  (node->type == XML_XINCLUDE_END)) &&
80
210
  node->nsDef) {
81
- xmlFreeNsList(node->nsDef);
211
+ xmlNsPtr curr = node->nsDef;
212
+ while (curr) {
213
+ noko_xml_document_pin_namespace(curr, node->doc);
214
+ curr = curr->next;
215
+ }
82
216
  node->nsDef = NULL;
83
217
  }
84
218
 
85
219
  if (node->type == XML_ELEMENT_NODE && node->properties != NULL) {
86
220
  property = node->properties ;
87
221
  while (property != NULL) {
88
- if (property->ns) property->ns = NULL ;
222
+ if (property->ns) { property->ns = NULL ; }
89
223
  property = property->next ;
90
224
  }
91
225
  }
@@ -97,12 +231,12 @@ static void recursively_remove_namespaces_from_node(xmlNodePtr node)
97
231
  *
98
232
  * Get the url name for this document.
99
233
  */
100
- static VALUE url(VALUE self)
234
+ static VALUE
235
+ url(VALUE self)
101
236
  {
102
- xmlDocPtr doc;
103
- Data_Get_Struct(self, xmlDoc, doc);
237
+ xmlDocPtr doc = noko_xml_document_unwrap(self);
104
238
 
105
- if(doc->URL) return NOKOGIRI_STR_NEW2(doc->URL);
239
+ if (doc->URL) { return NOKOGIRI_STR_NEW2(doc->URL); }
106
240
 
107
241
  return Qnil;
108
242
  }
@@ -113,42 +247,42 @@ static VALUE url(VALUE self)
113
247
  *
114
248
  * Set the root element on this document
115
249
  */
116
- static VALUE set_root(VALUE self, VALUE root)
250
+ static VALUE
251
+ rb_xml_document_root_set(VALUE self, VALUE rb_new_root)
117
252
  {
118
- xmlDocPtr doc;
119
- xmlNodePtr new_root;
120
- xmlNodePtr old_root;
121
-
122
- Data_Get_Struct(self, xmlDoc, doc);
123
-
124
- old_root = NULL;
253
+ xmlDocPtr c_document;
254
+ xmlNodePtr c_new_root = NULL, c_current_root;
125
255
 
126
- if(NIL_P(root)) {
127
- old_root = xmlDocGetRootElement(doc);
256
+ c_document = noko_xml_document_unwrap(self);
128
257
 
129
- if(old_root) {
130
- xmlUnlinkNode(old_root);
131
- nokogiri_root_node(old_root);
132
- }
133
-
134
- return root;
258
+ c_current_root = xmlDocGetRootElement(c_document);
259
+ if (c_current_root) {
260
+ xmlUnlinkNode(c_current_root);
261
+ noko_xml_document_pin_node(c_current_root);
135
262
  }
136
263
 
137
- Data_Get_Struct(root, xmlNode, new_root);
264
+ if (!NIL_P(rb_new_root)) {
265
+ if (!rb_obj_is_kind_of(rb_new_root, cNokogiriXmlNode)) {
266
+ rb_raise(rb_eArgError,
267
+ "expected Nokogiri::XML::Node but received %"PRIsVALUE,
268
+ rb_obj_class(rb_new_root));
269
+ }
138
270
 
271
+ Noko_Node_Get_Struct(rb_new_root, xmlNode, c_new_root);
139
272
 
140
- /* If the new root's document is not the same as the current document,
141
- * then we need to dup the node in to this document. */
142
- if(new_root->doc != doc) {
143
- old_root = xmlDocGetRootElement(doc);
144
- if (!(new_root = xmlDocCopyNode(new_root, doc, 1))) {
145
- rb_raise(rb_eRuntimeError, "Could not reparent node (xmlDocCopyNode)");
273
+ /* If the new root's document is not the same as the current document,
274
+ * then we need to dup the node in to this document. */
275
+ if (c_new_root->doc != c_document) {
276
+ c_new_root = xmlDocCopyNode(c_new_root, c_document, 1);
277
+ if (!c_new_root) {
278
+ rb_raise(rb_eRuntimeError, "Could not reparent node (xmlDocCopyNode)");
279
+ }
146
280
  }
147
281
  }
148
282
 
149
- xmlDocSetRootElement(doc, new_root);
150
- if(old_root) nokogiri_root_node(old_root);
151
- return root;
283
+ xmlDocSetRootElement(c_document, c_new_root);
284
+
285
+ return rb_new_root;
152
286
  }
153
287
 
154
288
  /*
@@ -157,17 +291,20 @@ static VALUE set_root(VALUE self, VALUE root)
157
291
  *
158
292
  * Get the root node for this document.
159
293
  */
160
- static VALUE root(VALUE self)
294
+ static VALUE
295
+ rb_xml_document_root(VALUE self)
161
296
  {
162
- xmlDocPtr doc;
163
- xmlNodePtr root;
297
+ xmlDocPtr c_document;
298
+ xmlNodePtr c_root;
164
299
 
165
- Data_Get_Struct(self, xmlDoc, doc);
300
+ c_document = noko_xml_document_unwrap(self);
166
301
 
167
- root = xmlDocGetRootElement(doc);
302
+ c_root = xmlDocGetRootElement(c_document);
303
+ if (!c_root) {
304
+ return Qnil;
305
+ }
168
306
 
169
- if(!root) return Qnil;
170
- return Nokogiri_wrap_xml_node(Qnil, root) ;
307
+ return noko_xml_node_wrap(Qnil, c_root) ;
171
308
  }
172
309
 
173
310
  /*
@@ -176,13 +313,14 @@ static VALUE root(VALUE self)
176
313
  *
177
314
  * Set the encoding string for this Document
178
315
  */
179
- static VALUE set_encoding(VALUE self, VALUE encoding)
316
+ static VALUE
317
+ set_encoding(VALUE self, VALUE encoding)
180
318
  {
181
- xmlDocPtr doc;
182
- Data_Get_Struct(self, xmlDoc, doc);
319
+ xmlDocPtr doc = noko_xml_document_unwrap(self);
183
320
 
184
- if (doc->encoding)
185
- free((char *)(uintptr_t) doc->encoding); /* avoid gcc cast warning */
321
+ if (doc->encoding) {
322
+ xmlFree(DISCARD_CONST_QUAL_XMLCHAR(doc->encoding));
323
+ }
186
324
 
187
325
  doc->encoding = xmlStrdup((xmlChar *)StringValueCStr(encoding));
188
326
 
@@ -195,12 +333,12 @@ static VALUE set_encoding(VALUE self, VALUE encoding)
195
333
  *
196
334
  * Get the encoding for this Document
197
335
  */
198
- static VALUE encoding(VALUE self)
336
+ static VALUE
337
+ encoding(VALUE self)
199
338
  {
200
- xmlDocPtr doc;
201
- Data_Get_Struct(self, xmlDoc, doc);
339
+ xmlDocPtr doc = noko_xml_document_unwrap(self);
202
340
 
203
- if(!doc->encoding) return Qnil;
341
+ if (!doc->encoding) { return Qnil; }
204
342
  return NOKOGIRI_STR_NEW2(doc->encoding);
205
343
  }
206
344
 
@@ -210,12 +348,12 @@ static VALUE encoding(VALUE self)
210
348
  *
211
349
  * Get the XML version for this Document
212
350
  */
213
- static VALUE version(VALUE self)
351
+ static VALUE
352
+ version(VALUE self)
214
353
  {
215
- xmlDocPtr doc;
216
- Data_Get_Struct(self, xmlDoc, doc);
354
+ xmlDocPtr doc = noko_xml_document_unwrap(self);
217
355
 
218
- if(!doc->version) return Qnil;
356
+ if (!doc->version) { return Qnil; }
219
357
  return NOKOGIRI_STR_NEW2(doc->version);
220
358
  }
221
359
 
@@ -225,48 +363,47 @@ static VALUE version(VALUE self)
225
363
  *
226
364
  * Create a new document from an IO object
227
365
  */
228
- static VALUE read_io( VALUE klass,
229
- VALUE io,
230
- VALUE url,
231
- VALUE encoding,
232
- VALUE options )
233
- {
234
- const char * c_url = NIL_P(url) ? NULL : StringValueCStr(url);
235
- const char * c_enc = NIL_P(encoding) ? NULL : StringValueCStr(encoding);
236
- VALUE error_list = rb_ary_new();
237
- VALUE document;
238
- xmlDocPtr doc;
239
-
240
- xmlResetLastError();
241
- xmlSetStructuredErrorFunc((void *)error_list, Nokogiri_error_array_pusher);
242
-
243
- doc = xmlReadIO(
244
- (xmlInputReadCallback)io_read_callback,
245
- (xmlInputCloseCallback)io_close_callback,
246
- (void *)io,
247
- c_url,
248
- c_enc,
249
- (int)NUM2INT(options)
250
- );
251
- xmlSetStructuredErrorFunc(NULL, NULL);
252
-
253
- if(doc == NULL) {
254
- xmlErrorPtr error;
255
-
256
- xmlFreeDoc(doc);
257
-
258
- error = xmlGetLastError();
259
- if(error)
260
- rb_exc_raise(Nokogiri_wrap_xml_syntax_error(error));
261
- else
366
+ static VALUE
367
+ noko_xml_document_s_read_io(VALUE rb_class,
368
+ VALUE rb_io,
369
+ VALUE rb_url,
370
+ VALUE rb_encoding,
371
+ VALUE rb_options)
372
+ {
373
+ /* TODO: deprecate this method, parse should be the preferred entry point. then we can make this
374
+ private. */
375
+ libxmlStructuredErrorHandlerState handler_state;
376
+ VALUE rb_errors = rb_ary_new();
377
+
378
+ noko__structured_error_func_save_and_set(&handler_state, (void *)rb_errors, noko__error_array_pusher);
379
+
380
+ const char *c_url = NIL_P(rb_url) ? NULL : StringValueCStr(rb_url);
381
+ const char *c_enc = NIL_P(rb_encoding) ? NULL : StringValueCStr(rb_encoding);
382
+ xmlDocPtr c_document = xmlReadIO(
383
+ (xmlInputReadCallback)noko_io_read,
384
+ (xmlInputCloseCallback)noko_io_close,
385
+ (void *)rb_io,
386
+ c_url,
387
+ c_enc,
388
+ (int)NUM2INT(rb_options)
389
+ );
390
+
391
+ noko__structured_error_func_restore(&handler_state);
392
+
393
+ if (c_document == NULL) {
394
+ xmlFreeDoc(c_document);
395
+
396
+ VALUE exception = rb_funcall(cNokogiriXmlSyntaxError, rb_intern("aggregate"), 1, rb_errors);
397
+ if (RB_TEST(exception)) {
398
+ rb_exc_raise(exception);
399
+ } else {
262
400
  rb_raise(rb_eRuntimeError, "Could not parse document");
263
-
264
- return Qnil;
401
+ }
265
402
  }
266
403
 
267
- document = Nokogiri_wrap_xml_document(klass, doc);
268
- rb_iv_set(document, "@errors", error_list);
269
- return document;
404
+ VALUE rb_document = noko_xml_document_wrap(rb_class, c_document);
405
+ rb_iv_set(rb_document, "@errors", rb_errors);
406
+ return rb_document;
270
407
  }
271
408
 
272
409
  /*
@@ -275,92 +412,58 @@ static VALUE read_io( VALUE klass,
275
412
  *
276
413
  * Create a new document from a String
277
414
  */
278
- static VALUE read_memory( VALUE klass,
279
- VALUE string,
280
- VALUE url,
281
- VALUE encoding,
282
- VALUE options )
283
- {
284
- const char * c_buffer = StringValuePtr(string);
285
- const char * c_url = NIL_P(url) ? NULL : StringValueCStr(url);
286
- const char * c_enc = NIL_P(encoding) ? NULL : StringValueCStr(encoding);
287
- int len = (int)RSTRING_LEN(string);
288
- VALUE error_list = rb_ary_new();
289
- VALUE document;
290
- xmlDocPtr doc;
291
-
292
- xmlResetLastError();
293
- xmlSetStructuredErrorFunc((void *)error_list, Nokogiri_error_array_pusher);
294
- doc = xmlReadMemory(c_buffer, len, c_url, c_enc, (int)NUM2INT(options));
295
- xmlSetStructuredErrorFunc(NULL, NULL);
415
+ static VALUE
416
+ noko_xml_document_s_read_memory(VALUE rb_class,
417
+ VALUE rb_input,
418
+ VALUE rb_url,
419
+ VALUE rb_encoding,
420
+ VALUE rb_options)
421
+ {
422
+ /* TODO: deprecate this method, parse should be the preferred entry point. then we can make this
423
+ private. */
424
+ VALUE rb_errors = rb_ary_new();
425
+ xmlSetStructuredErrorFunc((void *)rb_errors, noko__error_array_pusher);
296
426
 
297
- if(doc == NULL) {
298
- xmlErrorPtr error;
427
+ const char *c_buffer = StringValuePtr(rb_input);
428
+ const char *c_url = NIL_P(rb_url) ? NULL : StringValueCStr(rb_url);
429
+ const char *c_enc = NIL_P(rb_encoding) ? NULL : StringValueCStr(rb_encoding);
430
+ int c_buffer_len = (int)RSTRING_LEN(rb_input);
431
+ xmlDocPtr c_document = xmlReadMemory(c_buffer, c_buffer_len, c_url, c_enc, (int)NUM2INT(rb_options));
299
432
 
300
- xmlFreeDoc(doc);
433
+ xmlSetStructuredErrorFunc(NULL, NULL);
301
434
 
302
- error = xmlGetLastError();
303
- if(error)
304
- rb_exc_raise(Nokogiri_wrap_xml_syntax_error(error));
305
- else
435
+ if (c_document == NULL) {
436
+ VALUE exception = rb_funcall(cNokogiriXmlSyntaxError, rb_intern("aggregate"), 1, rb_errors);
437
+ if (RB_TEST(exception)) {
438
+ rb_exc_raise(exception);
439
+ } else {
306
440
  rb_raise(rb_eRuntimeError, "Could not parse document");
307
-
308
- return Qnil;
441
+ }
309
442
  }
310
443
 
311
- document = Nokogiri_wrap_xml_document(klass, doc);
312
- rb_iv_set(document, "@errors", error_list);
444
+ VALUE document = noko_xml_document_wrap(rb_class, c_document);
445
+ rb_iv_set(document, "@errors", rb_errors);
313
446
  return document;
314
447
  }
315
448
 
316
449
  /*
317
450
  * call-seq:
318
- * dup
451
+ * new(version = "1.0")
319
452
  *
320
- * Copy this Document. An optional depth may be passed in, but it defaults
321
- * to a deep copy. 0 is a shallow copy, 1 is a deep copy.
453
+ * Create a new empty document declaring XML version +version+.
322
454
  */
323
- static VALUE duplicate_document(int argc, VALUE *argv, VALUE self)
324
- {
325
- xmlDocPtr doc, dup;
326
- VALUE copy;
327
- VALUE level;
328
- VALUE error_list;
329
-
330
- if(rb_scan_args(argc, argv, "01", &level) == 0)
331
- level = INT2NUM((long)1);
332
-
333
- Data_Get_Struct(self, xmlDoc, doc);
334
-
335
- dup = xmlCopyDoc(doc, (int)NUM2INT(level));
336
-
337
- if(dup == NULL) return Qnil;
338
-
339
- dup->type = doc->type;
340
- copy = Nokogiri_wrap_xml_document(rb_obj_class(self), dup);
341
- error_list = rb_iv_get(self, "@errors");
342
- rb_iv_set(copy, "@errors", error_list);
343
- return copy ;
344
- }
345
-
346
- /*
347
- * call-seq:
348
- * new(version = default)
349
- *
350
- * Create a new document with +version+ (defaults to "1.0")
351
- */
352
- static VALUE new(int argc, VALUE *argv, VALUE klass)
455
+ static VALUE
456
+ new (int argc, VALUE *argv, VALUE klass)
353
457
  {
354
458
  xmlDocPtr doc;
355
459
  VALUE version, rest, rb_doc ;
356
460
 
357
461
  rb_scan_args(argc, argv, "0*", &rest);
358
462
  version = rb_ary_entry(rest, (long)0);
359
- if (NIL_P(version)) version = rb_str_new2("1.0");
463
+ if (NIL_P(version)) { version = rb_str_new2("1.0"); }
360
464
 
361
465
  doc = xmlNewDoc((xmlChar *)StringValueCStr(version));
362
- rb_doc = Nokogiri_wrap_xml_document(klass, doc);
363
- rb_obj_call_init(rb_doc, argc, argv);
466
+ rb_doc = noko_xml_document_wrap_with_init_args(klass, doc, argc, argv);
364
467
  return rb_doc ;
365
468
  }
366
469
 
@@ -401,85 +504,87 @@ static VALUE new(int argc, VALUE *argv, VALUE klass)
401
504
  * please direct your browser to
402
505
  * http://tenderlovemaking.com/2009/04/23/namespaces-in-xml.html
403
506
  */
404
- VALUE remove_namespaces_bang(VALUE self)
507
+ static VALUE
508
+ remove_namespaces_bang(VALUE self)
405
509
  {
406
- xmlDocPtr doc ;
407
- Data_Get_Struct(self, xmlDoc, doc);
510
+ xmlDocPtr doc = noko_xml_document_unwrap(self);
408
511
 
409
512
  recursively_remove_namespaces_from_node((xmlNodePtr)doc);
410
513
  return self;
411
514
  }
412
515
 
413
- /* call-seq: doc.create_entity(name, type, external_id, system_id, content)
516
+ /* call-seq:
517
+ * doc.create_entity(name, type, external_id, system_id, content)
414
518
  *
415
519
  * Create a new entity named +name+.
416
520
  *
417
- * +type+ is an integer representing the type of entity to be created, and it
418
- * defaults to Nokogiri::XML::EntityDecl::INTERNAL_GENERAL. See
419
- * the constants on Nokogiri::XML::EntityDecl for more information.
521
+ * +type+ is an integer representing the type of entity to be created, and it defaults to
522
+ * +Nokogiri::XML::EntityDecl::INTERNAL_GENERAL+. See the constants on Nokogiri::XML::EntityDecl for
523
+ * more information.
420
524
  *
421
525
  * +external_id+, +system_id+, and +content+ set the External ID, System ID,
422
526
  * and content respectively. All of these parameters are optional.
423
527
  */
424
- static VALUE create_entity(int argc, VALUE *argv, VALUE self)
425
- {
426
- VALUE name;
427
- VALUE type;
428
- VALUE external_id;
429
- VALUE system_id;
430
- VALUE content;
431
- xmlEntityPtr ptr;
432
- xmlDocPtr doc ;
433
-
434
- Data_Get_Struct(self, xmlDoc, doc);
435
-
436
- rb_scan_args(argc, argv, "14", &name, &type, &external_id, &system_id,
437
- &content);
438
-
439
- xmlResetLastError();
440
- ptr = xmlAddDocEntity(
441
- doc,
442
- (xmlChar *)(NIL_P(name) ? NULL : StringValueCStr(name)),
443
- (int) (NIL_P(type) ? XML_INTERNAL_GENERAL_ENTITY : NUM2INT(type)),
444
- (xmlChar *)(NIL_P(external_id) ? NULL : StringValueCStr(external_id)),
445
- (xmlChar *)(NIL_P(system_id) ? NULL : StringValueCStr(system_id)),
446
- (xmlChar *)(NIL_P(content) ? NULL : StringValueCStr(content))
447
- );
448
-
449
- if(NULL == ptr) {
450
- xmlErrorPtr error = xmlGetLastError();
451
- if(error)
452
- rb_exc_raise(Nokogiri_wrap_xml_syntax_error(error));
453
- else
528
+ static VALUE
529
+ noko_xml_document__create_entity(int argc, VALUE *argv, VALUE rb_document)
530
+ {
531
+ VALUE rb_name;
532
+ VALUE rb_type;
533
+ VALUE rb_ext_id;
534
+ VALUE rb_sys_id;
535
+ VALUE rb_content;
536
+
537
+ rb_scan_args(argc, argv, "14",
538
+ &rb_name,
539
+ &rb_type, &rb_ext_id, &rb_sys_id, &rb_content);
540
+
541
+ xmlDocPtr c_document = noko_xml_document_unwrap(rb_document);
542
+
543
+ libxmlStructuredErrorHandlerState handler_state;
544
+ VALUE rb_errors = rb_ary_new();
545
+ noko__structured_error_func_save_and_set(&handler_state, (void *)rb_errors, noko__error_array_pusher);
546
+
547
+ xmlEntityPtr c_entity = xmlAddDocEntity(
548
+ c_document,
549
+ (xmlChar *)(NIL_P(rb_name) ? NULL : StringValueCStr(rb_name)),
550
+ (int)(NIL_P(rb_type) ? XML_INTERNAL_GENERAL_ENTITY : NUM2INT(rb_type)),
551
+ (xmlChar *)(NIL_P(rb_ext_id) ? NULL : StringValueCStr(rb_ext_id)),
552
+ (xmlChar *)(NIL_P(rb_sys_id) ? NULL : StringValueCStr(rb_sys_id)),
553
+ (xmlChar *)(NIL_P(rb_content) ? NULL : StringValueCStr(rb_content))
554
+ );
555
+
556
+ noko__structured_error_func_restore(&handler_state);
557
+
558
+ if (NULL == c_entity) {
559
+ VALUE exception = rb_funcall(cNokogiriXmlSyntaxError, rb_intern("aggregate"), 1, rb_errors);
560
+ if (RB_TEST(exception)) {
561
+ rb_exc_raise(exception);
562
+ } else {
454
563
  rb_raise(rb_eRuntimeError, "Could not create entity");
455
-
456
- return Qnil;
564
+ }
457
565
  }
458
566
 
459
- return Nokogiri_wrap_xml_node(cNokogiriXmlEntityDecl, (xmlNodePtr)ptr);
567
+ return noko_xml_node_wrap(cNokogiriXmlEntityDecl, (xmlNodePtr)c_entity);
460
568
  }
461
569
 
462
- static int block_caller(void * ctx, xmlNodePtr _node, xmlNodePtr _parent)
570
+ static int
571
+ block_caller(void *ctx, xmlNodePtr c_node, xmlNodePtr c_parent_node)
463
572
  {
464
- VALUE block;
465
- VALUE node;
466
- VALUE parent;
573
+ VALUE block = (VALUE)ctx;
574
+ VALUE rb_node;
575
+ VALUE rb_parent_node;
467
576
  VALUE ret;
468
577
 
469
- if(_node->type == XML_NAMESPACE_DECL){
470
- node = Nokogiri_wrap_xml_namespace(_parent->doc, (xmlNsPtr) _node);
471
- }
472
- else{
473
- node = Nokogiri_wrap_xml_node(Qnil, _node);
578
+ if (c_node->type == XML_NAMESPACE_DECL) {
579
+ rb_node = noko_xml_namespace_wrap((xmlNsPtr)c_node, c_parent_node->doc);
580
+ } else {
581
+ rb_node = noko_xml_node_wrap(Qnil, c_node);
474
582
  }
475
- parent = _parent ? Nokogiri_wrap_xml_node(Qnil, _parent) : Qnil;
476
- block = (VALUE)ctx;
477
-
478
- ret = rb_funcall(block, rb_intern("call"), 2, node, parent);
583
+ rb_parent_node = c_parent_node ? noko_xml_node_wrap(Qnil, c_parent_node) : Qnil;
479
584
 
480
- if(Qfalse == ret || Qnil == ret) return 0;
585
+ ret = rb_funcall(block, rb_intern("call"), 2, rb_node, rb_parent_node);
481
586
 
482
- return 1;
587
+ return (Qfalse == ret || Qnil == ret) ? 0 : 1;
483
588
  }
484
589
 
485
590
  /* call-seq:
@@ -492,117 +597,188 @@ static int block_caller(void * ctx, xmlNodePtr _node, xmlNodePtr _parent)
492
597
  * The block must return a non-nil, non-false value if the +obj+ passed in
493
598
  * should be included in the canonicalized document.
494
599
  */
495
- static VALUE canonicalize(int argc, VALUE* argv, VALUE self)
600
+ static VALUE
601
+ rb_xml_document_canonicalize(int argc, VALUE *argv, VALUE self)
496
602
  {
497
- VALUE mode;
498
- VALUE incl_ns;
499
- VALUE with_comments;
500
- xmlChar **ns;
501
- long ns_len, i;
603
+ VALUE rb_mode;
604
+ VALUE rb_namespaces;
605
+ VALUE rb_comments_p;
606
+ int c_mode = 0;
607
+ xmlChar **c_namespaces;
502
608
 
503
- xmlDocPtr doc;
504
- xmlOutputBufferPtr buf;
505
- xmlC14NIsVisibleCallback cb = NULL;
506
- void * ctx = NULL;
609
+ xmlDocPtr c_doc;
610
+ xmlOutputBufferPtr c_obuf;
611
+ xmlC14NIsVisibleCallback c_callback_wrapper = NULL;
612
+ void *rb_callback = NULL;
507
613
 
508
614
  VALUE rb_cStringIO;
509
- VALUE io;
615
+ VALUE rb_io;
510
616
 
511
- rb_scan_args(argc, argv, "03", &mode, &incl_ns, &with_comments);
617
+ rb_scan_args(argc, argv, "03", &rb_mode, &rb_namespaces, &rb_comments_p);
618
+ if (!NIL_P(rb_mode)) {
619
+ Check_Type(rb_mode, T_FIXNUM);
620
+ c_mode = NUM2INT(rb_mode);
621
+ }
622
+ if (!NIL_P(rb_namespaces)) {
623
+ Check_Type(rb_namespaces, T_ARRAY);
624
+ if (c_mode == XML_C14N_1_0 || c_mode == XML_C14N_1_1) {
625
+ rb_raise(rb_eRuntimeError, "This canonicalizer does not support this operation");
626
+ }
627
+ }
512
628
 
513
- Data_Get_Struct(self, xmlDoc, doc);
629
+ c_doc = noko_xml_document_unwrap(self);
514
630
 
515
631
  rb_cStringIO = rb_const_get_at(rb_cObject, rb_intern("StringIO"));
516
- io = rb_class_new_instance(0, 0, rb_cStringIO);
517
- buf = xmlAllocOutputBuffer(NULL);
632
+ rb_io = rb_class_new_instance(0, 0, rb_cStringIO);
633
+ c_obuf = xmlAllocOutputBuffer(NULL);
518
634
 
519
- buf->writecallback = (xmlOutputWriteCallback)io_write_callback;
520
- buf->closecallback = (xmlOutputCloseCallback)io_close_callback;
521
- buf->context = (void *)io;
635
+ c_obuf->writecallback = (xmlOutputWriteCallback)noko_io_write;
636
+ c_obuf->closecallback = (xmlOutputCloseCallback)noko_io_close;
637
+ c_obuf->context = (void *)rb_io;
522
638
 
523
- if(rb_block_given_p()) {
524
- cb = block_caller;
525
- ctx = (void *)rb_block_proc();
639
+ if (rb_block_given_p()) {
640
+ c_callback_wrapper = block_caller;
641
+ rb_callback = (void *)rb_block_proc();
526
642
  }
527
643
 
528
- if(NIL_P(incl_ns)){
529
- ns = NULL;
530
- }
531
- else{
532
- Check_Type(incl_ns, T_ARRAY);
533
- ns_len = RARRAY_LEN(incl_ns);
534
- ns = calloc((size_t)ns_len+1, sizeof(xmlChar *));
535
- for (i = 0 ; i < ns_len ; i++) {
536
- VALUE entry = rb_ary_entry(incl_ns, i);
537
- ns[i] = (xmlChar*)StringValueCStr(entry);
644
+ if (NIL_P(rb_namespaces)) {
645
+ c_namespaces = NULL;
646
+ } else {
647
+ long ns_len = RARRAY_LEN(rb_namespaces);
648
+ c_namespaces = ruby_xcalloc((size_t)ns_len + 1, sizeof(xmlChar *));
649
+ for (int j = 0 ; j < ns_len ; j++) {
650
+ VALUE entry = rb_ary_entry(rb_namespaces, j);
651
+ c_namespaces[j] = (xmlChar *)StringValueCStr(entry);
538
652
  }
539
653
  }
540
654
 
655
+ xmlC14NExecute(c_doc, c_callback_wrapper, rb_callback,
656
+ c_mode,
657
+ c_namespaces,
658
+ (int)RTEST(rb_comments_p),
659
+ c_obuf);
541
660
 
542
- xmlC14NExecute(doc, cb, ctx,
543
- (int) (NIL_P(mode) ? 0 : NUM2INT(mode)),
544
- ns,
545
- (int) RTEST(with_comments),
546
- buf);
661
+ ruby_xfree(c_namespaces);
662
+ xmlOutputBufferClose(c_obuf);
547
663
 
548
- xmlOutputBufferClose(buf);
664
+ return rb_funcall(rb_io, rb_intern("string"), 0);
665
+ }
549
666
 
550
- return rb_funcall(io, rb_intern("string"), 0);
667
+ VALUE
668
+ noko_xml_document_wrap_with_init_args(VALUE klass, xmlDocPtr c_document, int argc, VALUE *argv)
669
+ {
670
+ VALUE rb_document;
671
+
672
+ if (!klass) {
673
+ klass = cNokogiriXmlDocument;
674
+ }
675
+
676
+ rb_document = _xml_document_alloc(klass);
677
+ _xml_document_data_ptr_set(rb_document, c_document);
678
+
679
+ rb_iv_set(rb_document, "@decorators", Qnil);
680
+ rb_iv_set(rb_document, "@errors", Qnil);
681
+
682
+ rb_obj_call_init(rb_document, argc, argv);
683
+
684
+ return rb_document ;
551
685
  }
552
686
 
553
- VALUE cNokogiriXmlDocument ;
554
- void init_xml_document()
687
+
688
+ /* deprecated. use noko_xml_document_wrap() instead. */
689
+ VALUE
690
+ Nokogiri_wrap_xml_document(VALUE klass, xmlDocPtr doc)
555
691
  {
556
- VALUE nokogiri = rb_define_module("Nokogiri");
557
- VALUE xml = rb_define_module_under(nokogiri, "XML");
558
- VALUE node = rb_define_class_under(xml, "Node", rb_cObject);
692
+ /* TODO: deprecate this method in v2.0 */
693
+ return noko_xml_document_wrap_with_init_args(klass, doc, 0, NULL);
694
+ }
559
695
 
560
- /*
561
- * Nokogiri::XML::Document wraps an xml document.
562
- */
563
- VALUE klass = rb_define_class_under(xml, "Document", node);
696
+ VALUE
697
+ noko_xml_document_wrap(VALUE klass, xmlDocPtr doc)
698
+ {
699
+ return noko_xml_document_wrap_with_init_args(klass, doc, 0, NULL);
700
+ }
701
+
702
+ xmlDocPtr
703
+ noko_xml_document_unwrap(VALUE rb_document)
704
+ {
705
+ xmlDocPtr c_document;
706
+ TypedData_Get_Struct(rb_document, xmlDoc, &xml_doc_type, c_document);
707
+ return c_document;
708
+ }
709
+
710
+ /* Schema creation will remove and deallocate "blank" nodes.
711
+ * If those blank nodes have been exposed to Ruby, they could get freed
712
+ * out from under the VALUE pointer. This function checks to see if any of
713
+ * those nodes have been exposed to Ruby, and if so we should raise an exception.
714
+ */
715
+ int
716
+ noko_xml_document_has_wrapped_blank_nodes_p(xmlDocPtr c_document)
717
+ {
718
+ VALUE cache = DOC_NODE_CACHE(c_document);
719
+
720
+ if (NIL_P(cache)) {
721
+ return 0;
722
+ }
723
+
724
+ for (long jnode = 0; jnode < RARRAY_LEN(cache); jnode++) {
725
+ xmlNodePtr node;
726
+ VALUE element = rb_ary_entry(cache, jnode);
727
+
728
+ Noko_Node_Get_Struct(element, xmlNode, node);
729
+ if (xmlIsBlankNode(node)) {
730
+ return 1;
731
+ }
732
+ }
733
+
734
+ return 0;
735
+ }
564
736
 
565
- cNokogiriXmlDocument = klass;
737
+ void
738
+ noko_xml_document_pin_node(xmlNodePtr node)
739
+ {
740
+ xmlDocPtr doc;
741
+ nokogiriTuplePtr tuple;
742
+
743
+ doc = node->doc;
744
+ tuple = (nokogiriTuplePtr)doc->_private;
745
+ st_insert(tuple->unlinkedNodes, (st_data_t)node, (st_data_t)node);
746
+ }
566
747
 
567
- rb_define_singleton_method(klass, "read_memory", read_memory, 4);
568
- rb_define_singleton_method(klass, "read_io", read_io, 4);
569
- rb_define_singleton_method(klass, "new", new, -1);
570
748
 
571
- rb_define_method(klass, "root", root, 0);
572
- rb_define_method(klass, "root=", set_root, 1);
573
- rb_define_method(klass, "encoding", encoding, 0);
574
- rb_define_method(klass, "encoding=", set_encoding, 1);
575
- rb_define_method(klass, "version", version, 0);
576
- rb_define_method(klass, "canonicalize", canonicalize, -1);
577
- rb_define_method(klass, "dup", duplicate_document, -1);
578
- rb_define_method(klass, "url", url, 0);
579
- rb_define_method(klass, "create_entity", create_entity, -1);
580
- rb_define_method(klass, "remove_namespaces!", remove_namespaces_bang, 0);
749
+ void
750
+ noko_xml_document_pin_namespace(xmlNsPtr ns, xmlDocPtr doc)
751
+ {
752
+ nokogiriTuplePtr tuple;
753
+
754
+ tuple = (nokogiriTuplePtr)doc->_private;
755
+ st_insert(tuple->unlinkedNodes, (st_data_t)ns, (st_data_t)ns);
581
756
  }
582
757
 
583
758
 
584
- /* this takes klass as a param because it's used for HtmlDocument, too. */
585
- VALUE Nokogiri_wrap_xml_document(VALUE klass, xmlDocPtr doc)
759
+ void
760
+ noko_init_xml_document(void)
586
761
  {
587
- nokogiriTuplePtr tuple = (nokogiriTuplePtr)malloc(sizeof(nokogiriTuple));
762
+ assert(cNokogiriXmlNode);
588
763
 
589
- VALUE rb_doc = Data_Wrap_Struct(
590
- klass ? klass : cNokogiriXmlDocument,
591
- 0,
592
- dealloc,
593
- doc
594
- );
764
+ cNokogiriXmlDocument = rb_define_class_under(mNokogiriXml, "Document", cNokogiriXmlNode);
595
765
 
596
- VALUE cache = rb_ary_new();
597
- rb_iv_set(rb_doc, "@decorators", Qnil);
598
- rb_iv_set(rb_doc, "@node_cache", cache);
766
+ rb_define_alloc_func(cNokogiriXmlDocument, _xml_document_alloc);
599
767
 
600
- tuple->doc = rb_doc;
601
- tuple->unlinkedNodes = st_init_numtable_with_size(128);
602
- tuple->node_cache = cache;
603
- doc->_private = tuple ;
768
+ rb_define_singleton_method(cNokogiriXmlDocument, "read_memory", noko_xml_document_s_read_memory, 4);
769
+ rb_define_singleton_method(cNokogiriXmlDocument, "read_io", noko_xml_document_s_read_io, 4);
770
+ rb_define_singleton_method(cNokogiriXmlDocument, "new", new, -1);
604
771
 
605
- rb_obj_call_init(rb_doc, 0, NULL);
772
+ rb_define_method(cNokogiriXmlDocument, "root", rb_xml_document_root, 0);
773
+ rb_define_method(cNokogiriXmlDocument, "root=", rb_xml_document_root_set, 1);
774
+ rb_define_method(cNokogiriXmlDocument, "encoding", encoding, 0);
775
+ rb_define_method(cNokogiriXmlDocument, "encoding=", set_encoding, 1);
776
+ rb_define_method(cNokogiriXmlDocument, "version", version, 0);
777
+ rb_define_method(cNokogiriXmlDocument, "canonicalize", rb_xml_document_canonicalize, -1);
778
+ rb_define_method(cNokogiriXmlDocument, "url", url, 0);
779
+ rb_define_method(cNokogiriXmlDocument, "create_entity", noko_xml_document__create_entity, -1);
780
+ rb_define_method(cNokogiriXmlDocument, "remove_namespaces!", remove_namespaces_bang, 0);
606
781
 
607
- return rb_doc ;
782
+ rb_define_protected_method(cNokogiriXmlDocument, "initialize_copy_with_args", rb_xml_document_initialize_copy_with_args,
783
+ 2);
608
784
  }