nokogiri 1.0.0 → 1.6.8.1

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (309) hide show
  1. checksums.yaml +7 -0
  2. data/.autotest +26 -0
  3. data/.cross_rubies +9 -0
  4. data/.editorconfig +17 -0
  5. data/.gemtest +0 -0
  6. data/.travis.yml +51 -0
  7. data/CHANGELOG.rdoc +1160 -0
  8. data/CONTRIBUTING.md +42 -0
  9. data/C_CODING_STYLE.rdoc +33 -0
  10. data/Gemfile +22 -0
  11. data/LICENSE.txt +31 -0
  12. data/Manifest.txt +284 -40
  13. data/README.md +166 -0
  14. data/ROADMAP.md +111 -0
  15. data/Rakefile +310 -199
  16. data/STANDARD_RESPONSES.md +47 -0
  17. data/Y_U_NO_GEMSPEC.md +155 -0
  18. data/appveyor.yml +22 -0
  19. data/bin/nokogiri +118 -0
  20. data/build_all +45 -0
  21. data/dependencies.yml +29 -0
  22. data/ext/nokogiri/depend +358 -0
  23. data/ext/nokogiri/extconf.rb +664 -34
  24. data/ext/nokogiri/html_document.c +120 -33
  25. data/ext/nokogiri/html_document.h +1 -1
  26. data/ext/nokogiri/html_element_description.c +279 -0
  27. data/ext/nokogiri/html_element_description.h +10 -0
  28. data/ext/nokogiri/html_entity_lookup.c +32 -0
  29. data/ext/nokogiri/html_entity_lookup.h +8 -0
  30. data/ext/nokogiri/html_sax_parser_context.c +116 -0
  31. data/ext/nokogiri/html_sax_parser_context.h +11 -0
  32. data/ext/nokogiri/html_sax_push_parser.c +87 -0
  33. data/ext/nokogiri/html_sax_push_parser.h +9 -0
  34. data/ext/nokogiri/nokogiri.c +145 -0
  35. data/ext/nokogiri/nokogiri.h +131 -0
  36. data/ext/nokogiri/xml_attr.c +94 -0
  37. data/ext/nokogiri/xml_attr.h +9 -0
  38. data/ext/nokogiri/xml_attribute_decl.c +70 -0
  39. data/ext/nokogiri/xml_attribute_decl.h +9 -0
  40. data/ext/nokogiri/xml_cdata.c +23 -19
  41. data/ext/nokogiri/xml_cdata.h +1 -1
  42. data/ext/nokogiri/xml_comment.c +69 -0
  43. data/ext/nokogiri/xml_comment.h +9 -0
  44. data/ext/nokogiri/xml_document.c +501 -54
  45. data/ext/nokogiri/xml_document.h +14 -1
  46. data/ext/nokogiri/xml_document_fragment.c +48 -0
  47. data/ext/nokogiri/xml_document_fragment.h +10 -0
  48. data/ext/nokogiri/xml_dtd.c +109 -24
  49. data/ext/nokogiri/xml_dtd.h +3 -1
  50. data/ext/nokogiri/xml_element_content.c +123 -0
  51. data/ext/nokogiri/xml_element_content.h +10 -0
  52. data/ext/nokogiri/xml_element_decl.c +69 -0
  53. data/ext/nokogiri/xml_element_decl.h +9 -0
  54. data/ext/nokogiri/xml_encoding_handler.c +79 -0
  55. data/ext/nokogiri/xml_encoding_handler.h +8 -0
  56. data/ext/nokogiri/xml_entity_decl.c +110 -0
  57. data/ext/nokogiri/xml_entity_decl.h +10 -0
  58. data/ext/nokogiri/xml_entity_reference.c +52 -0
  59. data/ext/nokogiri/xml_entity_reference.h +9 -0
  60. data/ext/nokogiri/xml_io.c +60 -0
  61. data/ext/nokogiri/xml_io.h +11 -0
  62. data/ext/nokogiri/xml_libxml2_hacks.c +112 -0
  63. data/ext/nokogiri/xml_libxml2_hacks.h +12 -0
  64. data/ext/nokogiri/xml_namespace.c +117 -0
  65. data/ext/nokogiri/xml_namespace.h +13 -0
  66. data/ext/nokogiri/xml_node.c +1285 -315
  67. data/ext/nokogiri/xml_node.h +4 -6
  68. data/ext/nokogiri/xml_node_set.c +415 -54
  69. data/ext/nokogiri/xml_node_set.h +6 -2
  70. data/ext/nokogiri/xml_processing_instruction.c +56 -0
  71. data/ext/nokogiri/xml_processing_instruction.h +9 -0
  72. data/ext/nokogiri/xml_reader.c +316 -77
  73. data/ext/nokogiri/xml_reader.h +1 -1
  74. data/ext/nokogiri/xml_relax_ng.c +161 -0
  75. data/ext/nokogiri/xml_relax_ng.h +9 -0
  76. data/ext/nokogiri/xml_sax_parser.c +215 -80
  77. data/ext/nokogiri/xml_sax_parser.h +30 -1
  78. data/ext/nokogiri/xml_sax_parser_context.c +262 -0
  79. data/ext/nokogiri/xml_sax_parser_context.h +10 -0
  80. data/ext/nokogiri/xml_sax_push_parser.c +115 -0
  81. data/ext/nokogiri/xml_sax_push_parser.h +9 -0
  82. data/ext/nokogiri/xml_schema.c +205 -0
  83. data/ext/nokogiri/xml_schema.h +9 -0
  84. data/ext/nokogiri/xml_syntax_error.c +45 -175
  85. data/ext/nokogiri/xml_syntax_error.h +4 -2
  86. data/ext/nokogiri/xml_text.c +37 -14
  87. data/ext/nokogiri/xml_text.h +1 -1
  88. data/ext/nokogiri/xml_xpath_context.c +230 -13
  89. data/ext/nokogiri/xml_xpath_context.h +2 -1
  90. data/ext/nokogiri/xslt_stylesheet.c +196 -34
  91. data/ext/nokogiri/xslt_stylesheet.h +6 -1
  92. data/lib/nokogiri/css/node.rb +18 -61
  93. data/lib/nokogiri/css/parser.rb +725 -17
  94. data/lib/nokogiri/css/parser.y +126 -63
  95. data/lib/nokogiri/css/parser_extras.rb +91 -0
  96. data/lib/nokogiri/css/syntax_error.rb +7 -0
  97. data/lib/nokogiri/css/tokenizer.rb +148 -5
  98. data/lib/nokogiri/css/tokenizer.rex +31 -39
  99. data/lib/nokogiri/css/xpath_visitor.rb +109 -51
  100. data/lib/nokogiri/css.rb +24 -3
  101. data/lib/nokogiri/decorators/slop.rb +42 -0
  102. data/lib/nokogiri/html/builder.rb +27 -1
  103. data/lib/nokogiri/html/document.rb +329 -3
  104. data/lib/nokogiri/html/document_fragment.rb +39 -0
  105. data/lib/nokogiri/html/element_description.rb +23 -0
  106. data/lib/nokogiri/html/element_description_defaults.rb +671 -0
  107. data/lib/nokogiri/html/entity_lookup.rb +13 -0
  108. data/lib/nokogiri/html/sax/parser.rb +35 -4
  109. data/lib/nokogiri/html/sax/parser_context.rb +16 -0
  110. data/lib/nokogiri/html/sax/push_parser.rb +36 -0
  111. data/lib/nokogiri/html.rb +18 -76
  112. data/lib/nokogiri/syntax_error.rb +4 -0
  113. data/lib/nokogiri/version.rb +106 -1
  114. data/lib/nokogiri/xml/attr.rb +14 -0
  115. data/lib/nokogiri/xml/attribute_decl.rb +18 -0
  116. data/lib/nokogiri/xml/builder.rb +395 -31
  117. data/lib/nokogiri/xml/cdata.rb +4 -2
  118. data/lib/nokogiri/xml/character_data.rb +7 -0
  119. data/lib/nokogiri/xml/document.rb +267 -12
  120. data/lib/nokogiri/xml/document_fragment.rb +149 -0
  121. data/lib/nokogiri/xml/dtd.rb +27 -1
  122. data/lib/nokogiri/xml/element_content.rb +36 -0
  123. data/lib/nokogiri/xml/element_decl.rb +13 -0
  124. data/lib/nokogiri/xml/entity_decl.rb +19 -0
  125. data/lib/nokogiri/xml/namespace.rb +13 -0
  126. data/lib/nokogiri/xml/node/save_options.rb +61 -0
  127. data/lib/nokogiri/xml/node.rb +748 -109
  128. data/lib/nokogiri/xml/node_set.rb +200 -72
  129. data/lib/nokogiri/xml/parse_options.rb +120 -0
  130. data/lib/nokogiri/xml/pp/character_data.rb +18 -0
  131. data/lib/nokogiri/xml/pp/node.rb +56 -0
  132. data/lib/nokogiri/xml/pp.rb +2 -0
  133. data/lib/nokogiri/xml/processing_instruction.rb +8 -0
  134. data/lib/nokogiri/xml/reader.rb +102 -4
  135. data/lib/nokogiri/xml/relax_ng.rb +32 -0
  136. data/lib/nokogiri/xml/sax/document.rb +114 -2
  137. data/lib/nokogiri/xml/sax/parser.rb +97 -7
  138. data/lib/nokogiri/xml/sax/parser_context.rb +16 -0
  139. data/lib/nokogiri/xml/sax/push_parser.rb +60 -0
  140. data/lib/nokogiri/xml/sax.rb +2 -7
  141. data/lib/nokogiri/xml/schema.rb +63 -0
  142. data/lib/nokogiri/xml/searchable.rb +221 -0
  143. data/lib/nokogiri/xml/syntax_error.rb +27 -1
  144. data/lib/nokogiri/xml/text.rb +4 -1
  145. data/lib/nokogiri/xml/xpath/syntax_error.rb +11 -0
  146. data/lib/nokogiri/xml/xpath.rb +4 -0
  147. data/lib/nokogiri/xml/xpath_context.rb +3 -1
  148. data/lib/nokogiri/xml.rb +45 -38
  149. data/lib/nokogiri/xslt/stylesheet.rb +19 -0
  150. data/lib/nokogiri/xslt.rb +47 -2
  151. data/lib/nokogiri.rb +117 -24
  152. data/lib/xsd/xmlparser/nokogiri.rb +102 -0
  153. data/patches/sort-patches-by-date +25 -0
  154. data/ports/archives/libxml2-2.9.4.tar.gz +0 -0
  155. data/ports/archives/libxslt-1.1.29.tar.gz +0 -0
  156. data/suppressions/README.txt +1 -0
  157. data/suppressions/nokogiri_ree-1.8.7.358.supp +61 -0
  158. data/suppressions/nokogiri_ruby-1.8.7.370.supp +0 -0
  159. data/suppressions/nokogiri_ruby-1.9.2.320.supp +28 -0
  160. data/suppressions/nokogiri_ruby-1.9.3.327.supp +28 -0
  161. data/tasks/test.rb +100 -0
  162. data/test/css/test_nthiness.rb +73 -6
  163. data/test/css/test_parser.rb +184 -39
  164. data/test/css/test_tokenizer.rb +72 -19
  165. data/test/css/test_xpath_visitor.rb +44 -2
  166. data/test/decorators/test_slop.rb +20 -0
  167. data/test/files/2ch.html +108 -0
  168. data/test/files/GH_1042.html +18 -0
  169. data/test/files/address_book.rlx +12 -0
  170. data/test/files/address_book.xml +10 -0
  171. data/test/files/atom.xml +344 -0
  172. data/test/files/bar/bar.xsd +4 -0
  173. data/test/files/bogus.xml +0 -0
  174. data/test/files/dont_hurt_em_why.xml +422 -0
  175. data/test/files/encoding.html +82 -0
  176. data/test/files/encoding.xhtml +84 -0
  177. data/test/files/exslt.xml +8 -0
  178. data/test/files/exslt.xslt +35 -0
  179. data/test/files/foo/foo.xsd +4 -0
  180. data/test/files/metacharset.html +10 -0
  181. data/test/files/namespace_pressure_test.xml +1684 -0
  182. data/test/files/noencoding.html +47 -0
  183. data/test/files/po.xml +32 -0
  184. data/test/files/po.xsd +66 -0
  185. data/test/files/saml/saml20assertion_schema.xsd +283 -0
  186. data/test/files/saml/saml20protocol_schema.xsd +302 -0
  187. data/test/files/saml/xenc_schema.xsd +146 -0
  188. data/test/files/saml/xmldsig_schema.xsd +318 -0
  189. data/test/files/shift_jis.html +10 -0
  190. data/test/files/shift_jis.xml +5 -0
  191. data/test/files/shift_jis_no_charset.html +9 -0
  192. data/test/files/slow-xpath.xml +25509 -0
  193. data/test/files/snuggles.xml +3 -0
  194. data/test/files/staff.dtd +10 -0
  195. data/test/files/test_document_url/bar.xml +2 -0
  196. data/test/files/test_document_url/document.dtd +4 -0
  197. data/test/files/test_document_url/document.xml +6 -0
  198. data/test/files/tlm.html +2 -1
  199. data/test/files/to_be_xincluded.xml +2 -0
  200. data/test/files/valid_bar.xml +2 -0
  201. data/test/files/xinclude.xml +4 -0
  202. data/test/helper.rb +124 -13
  203. data/test/html/sax/test_parser.rb +118 -4
  204. data/test/html/sax/test_parser_context.rb +46 -0
  205. data/test/html/sax/test_push_parser.rb +87 -0
  206. data/test/html/test_builder.rb +94 -8
  207. data/test/html/test_document.rb +626 -11
  208. data/test/html/test_document_encoding.rb +145 -0
  209. data/test/html/test_document_fragment.rb +301 -0
  210. data/test/html/test_element_description.rb +105 -0
  211. data/test/html/test_named_characters.rb +14 -0
  212. data/test/html/test_node.rb +212 -0
  213. data/test/html/test_node_encoding.rb +85 -0
  214. data/test/namespaces/test_additional_namespaces_in_builder_doc.rb +14 -0
  215. data/test/namespaces/test_namespaces_aliased_default.rb +24 -0
  216. data/test/namespaces/test_namespaces_in_builder_doc.rb +75 -0
  217. data/test/namespaces/test_namespaces_in_cloned_doc.rb +31 -0
  218. data/test/namespaces/test_namespaces_in_created_doc.rb +75 -0
  219. data/test/namespaces/test_namespaces_in_parsed_doc.rb +80 -0
  220. data/test/namespaces/test_namespaces_preservation.rb +31 -0
  221. data/test/test_convert_xpath.rb +2 -47
  222. data/test/test_css_cache.rb +45 -0
  223. data/test/test_encoding_handler.rb +48 -0
  224. data/test/test_memory_leak.rb +156 -0
  225. data/test/test_nokogiri.rb +103 -1
  226. data/test/test_soap4r_sax.rb +52 -0
  227. data/test/test_xslt_transforms.rb +293 -8
  228. data/test/xml/node/test_save_options.rb +28 -0
  229. data/test/xml/node/test_subclass.rb +44 -0
  230. data/test/xml/sax/test_parser.rb +309 -8
  231. data/test/xml/sax/test_parser_context.rb +115 -0
  232. data/test/xml/sax/test_push_parser.rb +157 -0
  233. data/test/xml/test_attr.rb +67 -0
  234. data/test/xml/test_attribute_decl.rb +86 -0
  235. data/test/xml/test_builder.rb +327 -2
  236. data/test/xml/test_c14n.rb +180 -0
  237. data/test/xml/test_cdata.rb +32 -2
  238. data/test/xml/test_comment.rb +40 -0
  239. data/test/xml/test_document.rb +846 -35
  240. data/test/xml/test_document_encoding.rb +31 -0
  241. data/test/xml/test_document_fragment.rb +271 -0
  242. data/test/xml/test_dtd.rb +153 -9
  243. data/test/xml/test_dtd_encoding.rb +31 -0
  244. data/test/xml/test_element_content.rb +56 -0
  245. data/test/xml/test_element_decl.rb +73 -0
  246. data/test/xml/test_entity_decl.rb +122 -0
  247. data/test/xml/test_entity_reference.rb +251 -0
  248. data/test/xml/test_namespace.rb +96 -0
  249. data/test/xml/test_node.rb +1126 -105
  250. data/test/xml/test_node_attributes.rb +115 -0
  251. data/test/xml/test_node_encoding.rb +69 -0
  252. data/test/xml/test_node_inheritance.rb +32 -0
  253. data/test/xml/test_node_reparenting.rb +549 -0
  254. data/test/xml/test_node_set.rb +668 -9
  255. data/test/xml/test_parse_options.rb +64 -0
  256. data/test/xml/test_processing_instruction.rb +30 -0
  257. data/test/xml/test_reader.rb +589 -0
  258. data/test/xml/test_reader_encoding.rb +134 -0
  259. data/test/xml/test_relax_ng.rb +60 -0
  260. data/test/xml/test_schema.rb +142 -0
  261. data/test/xml/test_syntax_error.rb +30 -0
  262. data/test/xml/test_text.rb +49 -2
  263. data/test/xml/test_unparented_node.rb +440 -0
  264. data/test/xml/test_xinclude.rb +83 -0
  265. data/test/xml/test_xpath.rb +445 -0
  266. data/test/xslt/test_custom_functions.rb +133 -0
  267. data/test/xslt/test_exception_handling.rb +37 -0
  268. data/test_all +107 -0
  269. metadata +459 -115
  270. data/History.txt +0 -6
  271. data/README.ja.txt +0 -86
  272. data/README.txt +0 -87
  273. data/ext/nokogiri/html_sax_parser.c +0 -32
  274. data/ext/nokogiri/html_sax_parser.h +0 -11
  275. data/ext/nokogiri/native.c +0 -40
  276. data/ext/nokogiri/native.h +0 -51
  277. data/ext/nokogiri/xml_xpath.c +0 -46
  278. data/ext/nokogiri/xml_xpath.h +0 -11
  279. data/lib/nokogiri/css/generated_parser.rb +0 -653
  280. data/lib/nokogiri/css/generated_tokenizer.rb +0 -159
  281. data/lib/nokogiri/decorators/hpricot/node.rb +0 -58
  282. data/lib/nokogiri/decorators/hpricot/node_set.rb +0 -14
  283. data/lib/nokogiri/decorators/hpricot/xpath_visitor.rb +0 -17
  284. data/lib/nokogiri/decorators/hpricot.rb +0 -3
  285. data/lib/nokogiri/decorators.rb +0 -1
  286. data/lib/nokogiri/hpricot.rb +0 -47
  287. data/lib/nokogiri/xml/after_handler.rb +0 -18
  288. data/lib/nokogiri/xml/before_handler.rb +0 -32
  289. data/lib/nokogiri/xml/element.rb +0 -6
  290. data/lib/nokogiri/xml/entity_declaration.rb +0 -9
  291. data/nokogiri.gemspec +0 -34
  292. data/test/hpricot/files/basic.xhtml +0 -17
  293. data/test/hpricot/files/boingboing.html +0 -2266
  294. data/test/hpricot/files/cy0.html +0 -3653
  295. data/test/hpricot/files/immob.html +0 -400
  296. data/test/hpricot/files/pace_application.html +0 -1320
  297. data/test/hpricot/files/tenderlove.html +0 -16
  298. data/test/hpricot/files/uswebgen.html +0 -220
  299. data/test/hpricot/files/utf8.html +0 -1054
  300. data/test/hpricot/files/week9.html +0 -1723
  301. data/test/hpricot/files/why.xml +0 -19
  302. data/test/hpricot/load_files.rb +0 -7
  303. data/test/hpricot/test_alter.rb +0 -67
  304. data/test/hpricot/test_builder.rb +0 -27
  305. data/test/hpricot/test_parser.rb +0 -423
  306. data/test/hpricot/test_paths.rb +0 -15
  307. data/test/hpricot/test_preserved.rb +0 -78
  308. data/test/hpricot/test_xml.rb +0 -30
  309. data/test/test_reader.rb +0 -222
@@ -0,0 +1,9 @@
1
+ #ifndef NOKOGIRI_XML_COMMENT
2
+ #define NOKOGIRI_XML_COMMENT
3
+
4
+ #include <nokogiri.h>
5
+
6
+ void init_xml_comment();
7
+
8
+ extern VALUE cNokogiriXmlComment;
9
+ #endif
@@ -1,43 +1,107 @@
1
1
  #include <xml_document.h>
2
2
 
3
- /*
4
- * note that xmlDocPtr is being cast as an xmlNodePtr, which is legal for the
5
- * "common part" struct header which contains only node pointers.
6
- */
7
- static void gc_mark(xmlNodePtr node)
3
+ static int dealloc_node_i(xmlNodePtr key, xmlNodePtr node, xmlDocPtr doc)
8
4
  {
9
- xmlNodePtr child ;
10
- /* mark children nodes */
11
- for (child = node->children ; child ; child = child->next) {
12
- if (child->_private)
13
- rb_gc_mark((VALUE)child->_private);
5
+ switch(node->type) {
6
+ case XML_ATTRIBUTE_NODE:
7
+ xmlFreePropList((xmlAttrPtr)node);
8
+ break;
9
+ case XML_NAMESPACE_DECL:
10
+ xmlFree(node);
11
+ break;
12
+ default:
13
+ if(node->parent == NULL) {
14
+ xmlAddChild((xmlNodePtr)doc, node);
15
+ }
14
16
  }
17
+ return ST_CONTINUE;
18
+ }
19
+
20
+ static void remove_private(xmlNodePtr node)
21
+ {
22
+ xmlNodePtr child;
23
+
24
+ for (child = node->children; child; child = child->next)
25
+ remove_private(child);
26
+
27
+ if ((node->type == XML_ELEMENT_NODE ||
28
+ node->type == XML_XINCLUDE_START ||
29
+ node->type == XML_XINCLUDE_END) &&
30
+ node->properties) {
31
+ for (child = (xmlNodePtr)node->properties; child; child = child->next)
32
+ remove_private(child);
33
+ }
34
+
35
+ node->_private = NULL;
15
36
  }
16
37
 
17
38
  static void dealloc(xmlDocPtr doc)
18
39
  {
40
+ st_table *node_hash;
41
+
19
42
  NOKOGIRI_DEBUG_START(doc);
43
+
44
+ node_hash = DOC_UNLINKED_NODE_HASH(doc);
45
+
46
+ st_foreach(node_hash, dealloc_node_i, (st_data_t)doc);
47
+ st_free_table(node_hash);
48
+
49
+ free(doc->_private);
50
+
51
+ /* When both Nokogiri and libxml-ruby are loaded, make sure that all nodes
52
+ * have their _private pointers cleared. This is to avoid libxml-ruby's
53
+ * xmlDeregisterNode callback from accessing VALUE pointers from ruby's GC
54
+ * free context, which can result in segfaults.
55
+ */
56
+ if (xmlDeregisterNodeDefaultValue)
57
+ remove_private((xmlNodePtr)doc);
58
+
20
59
  xmlFreeDoc(doc);
60
+
21
61
  NOKOGIRI_DEBUG_END(doc);
22
62
  }
23
63
 
64
+ static void recursively_remove_namespaces_from_node(xmlNodePtr node)
65
+ {
66
+ xmlNodePtr child ;
67
+ xmlAttrPtr property ;
68
+
69
+ xmlSetNs(node, NULL);
70
+
71
+ for (child = node->children ; child ; child = child->next)
72
+ recursively_remove_namespaces_from_node(child);
73
+
74
+ if (((node->type == XML_ELEMENT_NODE) ||
75
+ (node->type == XML_XINCLUDE_START) ||
76
+ (node->type == XML_XINCLUDE_END)) &&
77
+ node->nsDef) {
78
+ xmlFreeNsList(node->nsDef);
79
+ node->nsDef = NULL;
80
+ }
81
+
82
+ if (node->type == XML_ELEMENT_NODE && node->properties != NULL) {
83
+ property = node->properties ;
84
+ while (property != NULL) {
85
+ if (property->ns) property->ns = NULL ;
86
+ property = property->next ;
87
+ }
88
+ }
89
+ }
90
+
24
91
  /*
25
92
  * call-seq:
26
- * serialize
93
+ * url
27
94
  *
28
- * Serialize this document
95
+ * Get the url name for this document.
29
96
  */
30
- static VALUE serialize(VALUE self)
97
+ static VALUE url(VALUE self)
31
98
  {
32
99
  xmlDocPtr doc;
33
- xmlChar *buf;
34
- int size;
35
100
  Data_Get_Struct(self, xmlDoc, doc);
36
101
 
37
- xmlDocDumpMemory(doc, &buf, &size);
38
- VALUE rb_str = rb_str_new((char *)buf, (long)size);
39
- free(buf);
40
- return rb_str;
102
+ if(doc->URL) return NOKOGIRI_STR_NEW2(doc->URL);
103
+
104
+ return Qnil;
41
105
  }
42
106
 
43
107
  /*
@@ -50,12 +114,37 @@ static VALUE set_root(VALUE self, VALUE root)
50
114
  {
51
115
  xmlDocPtr doc;
52
116
  xmlNodePtr new_root;
117
+ xmlNodePtr old_root;
53
118
 
54
119
  Data_Get_Struct(self, xmlDoc, doc);
120
+
121
+ old_root = NULL;
122
+
123
+ if(NIL_P(root)) {
124
+ old_root = xmlDocGetRootElement(doc);
125
+
126
+ if(old_root) {
127
+ xmlUnlinkNode(old_root);
128
+ nokogiri_root_node(old_root);
129
+ }
130
+
131
+ return root;
132
+ }
133
+
55
134
  Data_Get_Struct(root, xmlNode, new_root);
56
135
 
136
+
137
+ /* If the new root's document is not the same as the current document,
138
+ * then we need to dup the node in to this document. */
139
+ if(new_root->doc != doc) {
140
+ old_root = xmlDocGetRootElement(doc);
141
+ if (!(new_root = xmlDocCopyNode(new_root, doc, 1))) {
142
+ rb_raise(rb_eRuntimeError, "Could not reparent node (xmlDocCopyNode)");
143
+ }
144
+ }
145
+
57
146
  xmlDocSetRootElement(doc, new_root);
58
- Nokogiri_xml_node_owned_set(new_root);
147
+ if(old_root) nokogiri_root_node(old_root);
59
148
  return root;
60
149
  }
61
150
 
@@ -68,14 +157,121 @@ static VALUE set_root(VALUE self, VALUE root)
68
157
  static VALUE root(VALUE self)
69
158
  {
70
159
  xmlDocPtr doc;
160
+ xmlNodePtr root;
161
+
71
162
  Data_Get_Struct(self, xmlDoc, doc);
72
163
 
73
- xmlNodePtr root = xmlDocGetRootElement(doc);
164
+ root = xmlDocGetRootElement(doc);
74
165
 
75
166
  if(!root) return Qnil;
76
- return Nokogiri_wrap_xml_node(root) ;
167
+ return Nokogiri_wrap_xml_node(Qnil, root) ;
168
+ }
169
+
170
+ /*
171
+ * call-seq:
172
+ * encoding= encoding
173
+ *
174
+ * Set the encoding string for this Document
175
+ */
176
+ static VALUE set_encoding(VALUE self, VALUE encoding)
177
+ {
178
+ xmlDocPtr doc;
179
+ Data_Get_Struct(self, xmlDoc, doc);
180
+
181
+ if (doc->encoding)
182
+ free((char *) doc->encoding); /* this may produce a gcc cast warning */
183
+
184
+ doc->encoding = xmlStrdup((xmlChar *)StringValueCStr(encoding));
185
+
186
+ return encoding;
187
+ }
188
+
189
+ /*
190
+ * call-seq:
191
+ * encoding
192
+ *
193
+ * Get the encoding for this Document
194
+ */
195
+ static VALUE encoding(VALUE self)
196
+ {
197
+ xmlDocPtr doc;
198
+ Data_Get_Struct(self, xmlDoc, doc);
199
+
200
+ if(!doc->encoding) return Qnil;
201
+ return NOKOGIRI_STR_NEW2(doc->encoding);
202
+ }
203
+
204
+ /*
205
+ * call-seq:
206
+ * version
207
+ *
208
+ * Get the XML version for this Document
209
+ */
210
+ static VALUE version(VALUE self)
211
+ {
212
+ xmlDocPtr doc;
213
+ Data_Get_Struct(self, xmlDoc, doc);
214
+
215
+ if(!doc->version) return Qnil;
216
+ return NOKOGIRI_STR_NEW2(doc->version);
217
+ }
218
+
219
+ /*
220
+ * call-seq:
221
+ * read_io(io, url, encoding, options)
222
+ *
223
+ * Create a new document from an IO object
224
+ */
225
+ static VALUE read_io( VALUE klass,
226
+ VALUE io,
227
+ VALUE url,
228
+ VALUE encoding,
229
+ VALUE options )
230
+ {
231
+ const char * c_url = NIL_P(url) ? NULL : StringValueCStr(url);
232
+ const char * c_enc = NIL_P(encoding) ? NULL : StringValueCStr(encoding);
233
+ VALUE error_list = rb_ary_new();
234
+ VALUE document;
235
+ xmlDocPtr doc;
236
+
237
+ xmlResetLastError();
238
+ xmlSetStructuredErrorFunc((void *)error_list, Nokogiri_error_array_pusher);
239
+
240
+ doc = xmlReadIO(
241
+ (xmlInputReadCallback)io_read_callback,
242
+ (xmlInputCloseCallback)io_close_callback,
243
+ (void *)io,
244
+ c_url,
245
+ c_enc,
246
+ (int)NUM2INT(options)
247
+ );
248
+ xmlSetStructuredErrorFunc(NULL, NULL);
249
+
250
+ if(doc == NULL) {
251
+ xmlErrorPtr error;
252
+
253
+ xmlFreeDoc(doc);
254
+
255
+ error = xmlGetLastError();
256
+ if(error)
257
+ rb_exc_raise(Nokogiri_wrap_xml_syntax_error(error));
258
+ else
259
+ rb_raise(rb_eRuntimeError, "Could not parse document");
260
+
261
+ return Qnil;
262
+ }
263
+
264
+ document = Nokogiri_wrap_xml_document(klass, doc);
265
+ rb_iv_set(document, "@errors", error_list);
266
+ return document;
77
267
  }
78
268
 
269
+ /*
270
+ * call-seq:
271
+ * read_memory(string, url, encoding, options)
272
+ *
273
+ * Create a new document from a String
274
+ */
79
275
  static VALUE read_memory( VALUE klass,
80
276
  VALUE string,
81
277
  VALUE url,
@@ -83,77 +279,328 @@ static VALUE read_memory( VALUE klass,
83
279
  VALUE options )
84
280
  {
85
281
  const char * c_buffer = StringValuePtr(string);
86
- const char * c_url = (url == Qnil) ? NULL : StringValuePtr(url);
87
- const char * c_enc = (encoding == Qnil) ? NULL : StringValuePtr(encoding);
88
- int len = NUM2INT(rb_funcall(string, rb_intern("length"), 0));
282
+ const char * c_url = NIL_P(url) ? NULL : StringValueCStr(url);
283
+ const char * c_enc = NIL_P(encoding) ? NULL : StringValueCStr(encoding);
284
+ int len = (int)RSTRING_LEN(string);
285
+ VALUE error_list = rb_ary_new();
286
+ VALUE document;
287
+ xmlDocPtr doc;
89
288
 
90
- xmlInitParser();
91
- xmlDocPtr doc = xmlReadMemory(c_buffer, len, c_url, c_enc, NUM2INT(options));
289
+ xmlResetLastError();
290
+ xmlSetStructuredErrorFunc((void *)error_list, Nokogiri_error_array_pusher);
291
+ doc = xmlReadMemory(c_buffer, len, c_url, c_enc, (int)NUM2INT(options));
292
+ xmlSetStructuredErrorFunc(NULL, NULL);
92
293
 
93
294
  if(doc == NULL) {
295
+ xmlErrorPtr error;
296
+
94
297
  xmlFreeDoc(doc);
95
- rb_raise(rb_eRuntimeError, "Couldn't create a document");
298
+
299
+ error = xmlGetLastError();
300
+ if(error)
301
+ rb_exc_raise(Nokogiri_wrap_xml_syntax_error(error));
302
+ else
303
+ rb_raise(rb_eRuntimeError, "Could not parse document");
304
+
305
+ return Qnil;
96
306
  }
97
307
 
98
- return Nokogiri_wrap_xml_document(klass, doc);
308
+ document = Nokogiri_wrap_xml_document(klass, doc);
309
+ rb_iv_set(document, "@errors", error_list);
310
+ return document;
99
311
  }
100
312
 
101
- static VALUE new(int argc, VALUE *argv, VALUE klass)
313
+ /*
314
+ * call-seq:
315
+ * dup
316
+ *
317
+ * Copy this Document. An optional depth may be passed in, but it defaults
318
+ * to a deep copy. 0 is a shallow copy, 1 is a deep copy.
319
+ */
320
+ static VALUE duplicate_document(int argc, VALUE *argv, VALUE self)
102
321
  {
103
- VALUE version;
104
- if(rb_scan_args(argc, argv, "01", &version) == 0)
105
- version = rb_str_new2("1.0");
322
+ xmlDocPtr doc, dup;
323
+ VALUE copy;
324
+ VALUE level;
325
+ VALUE error_list;
326
+
327
+ if(rb_scan_args(argc, argv, "01", &level) == 0)
328
+ level = INT2NUM((long)1);
329
+
330
+ Data_Get_Struct(self, xmlDoc, doc);
331
+
332
+ dup = xmlCopyDoc(doc, (int)NUM2INT(level));
106
333
 
107
- xmlDocPtr doc = xmlNewDoc((xmlChar *)StringValuePtr(version));
108
- return Nokogiri_wrap_xml_document(klass, doc);
334
+ if(dup == NULL) return Qnil;
335
+
336
+ dup->type = doc->type;
337
+ copy = Nokogiri_wrap_xml_document(rb_obj_class(self), dup);
338
+ error_list = rb_iv_get(self, "@errors");
339
+ rb_iv_set(copy, "@errors", error_list);
340
+ return copy ;
109
341
  }
110
342
 
111
343
  /*
112
- * call-seq:
113
- * substitute_entities_set bool)
344
+ * call-seq:
345
+ * new(version = default)
114
346
  *
115
- * Set the global XML default for substitute entities.
347
+ * Create a new document with +version+ (defaults to "1.0")
116
348
  */
117
- static VALUE substitute_entities_set(VALUE klass, VALUE value)
349
+ static VALUE new(int argc, VALUE *argv, VALUE klass)
118
350
  {
119
- xmlSubstituteEntitiesDefault(NUM2INT(value));
120
- return Qnil ;
351
+ xmlDocPtr doc;
352
+ VALUE version, rest, rb_doc ;
353
+
354
+ rb_scan_args(argc, argv, "0*", &rest);
355
+ version = rb_ary_entry(rest, (long)0);
356
+ if (NIL_P(version)) version = rb_str_new2("1.0");
357
+
358
+ doc = xmlNewDoc((xmlChar *)StringValueCStr(version));
359
+ rb_doc = Nokogiri_wrap_xml_document(klass, doc);
360
+ rb_obj_call_init(rb_doc, argc, argv);
361
+ return rb_doc ;
121
362
  }
122
363
 
123
364
  /*
124
365
  * call-seq:
125
- * substitute_entities_set bool)
366
+ * remove_namespaces!
367
+ *
368
+ * Remove all namespaces from all nodes in the document.
369
+ *
370
+ * This could be useful for developers who either don't understand namespaces
371
+ * or don't care about them.
372
+ *
373
+ * The following example shows a use case, and you can decide for yourself
374
+ * whether this is a good thing or not:
126
375
  *
127
- * Set the global XML default for load external subsets.
376
+ * doc = Nokogiri::XML <<-EOXML
377
+ * <root>
378
+ * <car xmlns:part="http://general-motors.com/">
379
+ * <part:tire>Michelin Model XGV</part:tire>
380
+ * </car>
381
+ * <bicycle xmlns:part="http://schwinn.com/">
382
+ * <part:tire>I'm a bicycle tire!</part:tire>
383
+ * </bicycle>
384
+ * </root>
385
+ * EOXML
386
+ *
387
+ * doc.xpath("//tire").to_s # => ""
388
+ * doc.xpath("//part:tire", "part" => "http://general-motors.com/").to_s # => "<part:tire>Michelin Model XGV</part:tire>"
389
+ * doc.xpath("//part:tire", "part" => "http://schwinn.com/").to_s # => "<part:tire>I'm a bicycle tire!</part:tire>"
390
+ *
391
+ * doc.remove_namespaces!
392
+ *
393
+ * doc.xpath("//tire").to_s # => "<tire>Michelin Model XGV</tire><tire>I'm a bicycle tire!</tire>"
394
+ * doc.xpath("//part:tire", "part" => "http://general-motors.com/").to_s # => ""
395
+ * doc.xpath("//part:tire", "part" => "http://schwinn.com/").to_s # => ""
396
+ *
397
+ * For more information on why this probably is *not* a good thing in general,
398
+ * please direct your browser to
399
+ * http://tenderlovemaking.com/2009/04/23/namespaces-in-xml.html
128
400
  */
129
- static VALUE load_external_subsets_set(VALUE klass, VALUE value)
401
+ VALUE remove_namespaces_bang(VALUE self)
130
402
  {
131
- xmlLoadExtDtdDefaultValue = NUM2INT(value);
132
- return Qnil ;
403
+ xmlDocPtr doc ;
404
+ Data_Get_Struct(self, xmlDoc, doc);
405
+
406
+ recursively_remove_namespaces_from_node((xmlNodePtr)doc);
407
+ return self;
408
+ }
409
+
410
+ /* call-seq: doc.create_entity(name, type, external_id, system_id, content)
411
+ *
412
+ * Create a new entity named +name+.
413
+ *
414
+ * +type+ is an integer representing the type of entity to be created, and it
415
+ * defaults to Nokogiri::XML::EntityDecl::INTERNAL_GENERAL. See
416
+ * the constants on Nokogiri::XML::EntityDecl for more information.
417
+ *
418
+ * +external_id+, +system_id+, and +content+ set the External ID, System ID,
419
+ * and content respectively. All of these parameters are optional.
420
+ */
421
+ static VALUE create_entity(int argc, VALUE *argv, VALUE self)
422
+ {
423
+ VALUE name;
424
+ VALUE type;
425
+ VALUE external_id;
426
+ VALUE system_id;
427
+ VALUE content;
428
+ xmlEntityPtr ptr;
429
+ xmlDocPtr doc ;
430
+
431
+ Data_Get_Struct(self, xmlDoc, doc);
432
+
433
+ rb_scan_args(argc, argv, "14", &name, &type, &external_id, &system_id,
434
+ &content);
435
+
436
+ xmlResetLastError();
437
+ ptr = xmlAddDocEntity(
438
+ doc,
439
+ (xmlChar *)(NIL_P(name) ? NULL : StringValueCStr(name)),
440
+ (int) (NIL_P(type) ? XML_INTERNAL_GENERAL_ENTITY : NUM2INT(type)),
441
+ (xmlChar *)(NIL_P(external_id) ? NULL : StringValueCStr(external_id)),
442
+ (xmlChar *)(NIL_P(system_id) ? NULL : StringValueCStr(system_id)),
443
+ (xmlChar *)(NIL_P(content) ? NULL : StringValueCStr(content))
444
+ );
445
+
446
+ if(NULL == ptr) {
447
+ xmlErrorPtr error = xmlGetLastError();
448
+ if(error)
449
+ rb_exc_raise(Nokogiri_wrap_xml_syntax_error(error));
450
+ else
451
+ rb_raise(rb_eRuntimeError, "Could not create entity");
452
+
453
+ return Qnil;
454
+ }
455
+
456
+ return Nokogiri_wrap_xml_node(cNokogiriXmlEntityDecl, (xmlNodePtr)ptr);
457
+ }
458
+
459
+ static int block_caller(void * ctx, xmlNodePtr _node, xmlNodePtr _parent)
460
+ {
461
+ VALUE block;
462
+ VALUE node;
463
+ VALUE parent;
464
+ VALUE ret;
465
+
466
+ if(_node->type == XML_NAMESPACE_DECL){
467
+ node = Nokogiri_wrap_xml_namespace(_parent->doc, (xmlNsPtr) _node);
468
+ }
469
+ else{
470
+ node = Nokogiri_wrap_xml_node(Qnil, _node);
471
+ }
472
+ parent = _parent ? Nokogiri_wrap_xml_node(Qnil, _parent) : Qnil;
473
+ block = (VALUE)ctx;
474
+
475
+ ret = rb_funcall(block, rb_intern("call"), 2, node, parent);
476
+
477
+ if(Qfalse == ret || Qnil == ret) return 0;
478
+
479
+ return 1;
480
+ }
481
+
482
+ /* call-seq:
483
+ * doc.canonicalize(mode=XML_C14N_1_0,inclusive_namespaces=nil,with_comments=false)
484
+ * doc.canonicalize { |obj, parent| ... }
485
+ *
486
+ * Canonicalize a document and return the results. Takes an optional block
487
+ * that takes two parameters: the +obj+ and that node's +parent+.
488
+ * The +obj+ will be either a Nokogiri::XML::Node, or a Nokogiri::XML::Namespace
489
+ * The block must return a non-nil, non-false value if the +obj+ passed in
490
+ * should be included in the canonicalized document.
491
+ */
492
+ static VALUE canonicalize(int argc, VALUE* argv, VALUE self)
493
+ {
494
+ VALUE mode;
495
+ VALUE incl_ns;
496
+ VALUE with_comments;
497
+ xmlChar **ns;
498
+ long ns_len, i;
499
+
500
+ xmlDocPtr doc;
501
+ xmlOutputBufferPtr buf;
502
+ xmlC14NIsVisibleCallback cb = NULL;
503
+ void * ctx = NULL;
504
+
505
+ VALUE rb_cStringIO;
506
+ VALUE io;
507
+
508
+ rb_scan_args(argc, argv, "03", &mode, &incl_ns, &with_comments);
509
+
510
+ Data_Get_Struct(self, xmlDoc, doc);
511
+
512
+ rb_cStringIO = rb_const_get_at(rb_cObject, rb_intern("StringIO"));
513
+ io = rb_class_new_instance(0, 0, rb_cStringIO);
514
+ buf = xmlAllocOutputBuffer(NULL);
515
+
516
+ buf->writecallback = (xmlOutputWriteCallback)io_write_callback;
517
+ buf->closecallback = (xmlOutputCloseCallback)io_close_callback;
518
+ buf->context = (void *)io;
519
+
520
+ if(rb_block_given_p()) {
521
+ cb = block_caller;
522
+ ctx = (void *)rb_block_proc();
523
+ }
524
+
525
+ if(NIL_P(incl_ns)){
526
+ ns = NULL;
527
+ }
528
+ else{
529
+ Check_Type(incl_ns, T_ARRAY);
530
+ ns_len = RARRAY_LEN(incl_ns);
531
+ ns = calloc((size_t)ns_len+1, sizeof(xmlChar *));
532
+ for (i = 0 ; i < ns_len ; i++) {
533
+ VALUE entry = rb_ary_entry(incl_ns, i);
534
+ const char * ptr = StringValueCStr(entry);
535
+ ns[i] = (xmlChar*) ptr;
536
+ }
537
+ }
538
+
539
+
540
+ xmlC14NExecute(doc, cb, ctx,
541
+ (int) (NIL_P(mode) ? 0 : NUM2INT(mode)),
542
+ ns,
543
+ (int) RTEST(with_comments),
544
+ buf);
545
+
546
+ xmlOutputBufferClose(buf);
547
+
548
+ return rb_funcall(io, rb_intern("string"), 0);
133
549
  }
134
550
 
135
551
  VALUE cNokogiriXmlDocument ;
136
552
  void init_xml_document()
137
553
  {
138
- VALUE klass = cNokogiriXmlDocument = rb_const_get(mNokogiriXml, rb_intern("Document"));
554
+ VALUE nokogiri = rb_define_module("Nokogiri");
555
+ VALUE xml = rb_define_module_under(nokogiri, "XML");
556
+ VALUE node = rb_define_class_under(xml, "Node", rb_cObject);
557
+
558
+ /*
559
+ * Nokogiri::XML::Document wraps an xml document.
560
+ */
561
+ VALUE klass = rb_define_class_under(xml, "Document", node);
562
+
563
+ cNokogiriXmlDocument = klass;
139
564
 
140
565
  rb_define_singleton_method(klass, "read_memory", read_memory, 4);
566
+ rb_define_singleton_method(klass, "read_io", read_io, 4);
141
567
  rb_define_singleton_method(klass, "new", new, -1);
142
- rb_define_singleton_method(klass, "substitute_entities=", substitute_entities_set, 1);
143
- rb_define_singleton_method(klass, "load_external_subsets=", load_external_subsets_set, 1);
144
568
 
145
569
  rb_define_method(klass, "root", root, 0);
146
570
  rb_define_method(klass, "root=", set_root, 1);
147
- rb_define_method(klass, "serialize", serialize, 0);
571
+ rb_define_method(klass, "encoding", encoding, 0);
572
+ rb_define_method(klass, "encoding=", set_encoding, 1);
573
+ rb_define_method(klass, "version", version, 0);
574
+ rb_define_method(klass, "canonicalize", canonicalize, -1);
575
+ rb_define_method(klass, "dup", duplicate_document, -1);
576
+ rb_define_method(klass, "url", url, 0);
577
+ rb_define_method(klass, "create_entity", create_entity, -1);
578
+ rb_define_method(klass, "remove_namespaces!", remove_namespaces_bang, 0);
148
579
  }
149
580
 
150
581
 
151
582
  /* this takes klass as a param because it's used for HtmlDocument, too. */
152
583
  VALUE Nokogiri_wrap_xml_document(VALUE klass, xmlDocPtr doc)
153
584
  {
154
- if (doc->_private)
155
- return (VALUE)doc->_private ;
156
- VALUE rb_doc = Data_Wrap_Struct(klass ? klass : cNokogiriXmlDocument, gc_mark, dealloc, doc) ;
157
- doc->_private = (void*)rb_doc ;
585
+ nokogiriTuplePtr tuple = (nokogiriTuplePtr)malloc(sizeof(nokogiriTuple));
586
+
587
+ VALUE rb_doc = Data_Wrap_Struct(
588
+ klass ? klass : cNokogiriXmlDocument,
589
+ 0,
590
+ dealloc,
591
+ doc
592
+ );
593
+
594
+ VALUE cache = rb_ary_new();
595
+ rb_iv_set(rb_doc, "@decorators", Qnil);
596
+ rb_iv_set(rb_doc, "@node_cache", cache);
597
+
598
+ tuple->doc = rb_doc;
599
+ tuple->unlinkedNodes = st_init_numtable_with_size(128);
600
+ tuple->node_cache = cache;
601
+ doc->_private = tuple ;
602
+
603
+ rb_obj_call_init(rb_doc, 0, NULL);
604
+
158
605
  return rb_doc ;
159
606
  }
@@ -1,10 +1,23 @@
1
1
  #ifndef NOKOGIRI_XML_DOCUMENT
2
2
  #define NOKOGIRI_XML_DOCUMENT
3
3
 
4
- #include <native.h>
4
+ #include <nokogiri.h>
5
+
6
+ struct _nokogiriTuple {
7
+ VALUE doc;
8
+ st_table *unlinkedNodes;
9
+ VALUE node_cache;
10
+ };
11
+ typedef struct _nokogiriTuple nokogiriTuple;
12
+ typedef nokogiriTuple * nokogiriTuplePtr;
5
13
 
6
14
  void init_xml_document();
7
15
  VALUE Nokogiri_wrap_xml_document(VALUE klass, xmlDocPtr doc);
8
16
 
17
+ #define DOC_RUBY_OBJECT_TEST(x) ((nokogiriTuplePtr)(x->_private))
18
+ #define DOC_RUBY_OBJECT(x) (((nokogiriTuplePtr)(x->_private))->doc)
19
+ #define DOC_UNLINKED_NODE_HASH(x) (((nokogiriTuplePtr)(x->_private))->unlinkedNodes)
20
+ #define DOC_NODE_CACHE(x) (((nokogiriTuplePtr)(x->_private))->node_cache)
21
+
9
22
  extern VALUE cNokogiriXmlDocument ;
10
23
  #endif