nokogiri 1.0.0 → 1.6.8.1

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (309) hide show
  1. checksums.yaml +7 -0
  2. data/.autotest +26 -0
  3. data/.cross_rubies +9 -0
  4. data/.editorconfig +17 -0
  5. data/.gemtest +0 -0
  6. data/.travis.yml +51 -0
  7. data/CHANGELOG.rdoc +1160 -0
  8. data/CONTRIBUTING.md +42 -0
  9. data/C_CODING_STYLE.rdoc +33 -0
  10. data/Gemfile +22 -0
  11. data/LICENSE.txt +31 -0
  12. data/Manifest.txt +284 -40
  13. data/README.md +166 -0
  14. data/ROADMAP.md +111 -0
  15. data/Rakefile +310 -199
  16. data/STANDARD_RESPONSES.md +47 -0
  17. data/Y_U_NO_GEMSPEC.md +155 -0
  18. data/appveyor.yml +22 -0
  19. data/bin/nokogiri +118 -0
  20. data/build_all +45 -0
  21. data/dependencies.yml +29 -0
  22. data/ext/nokogiri/depend +358 -0
  23. data/ext/nokogiri/extconf.rb +664 -34
  24. data/ext/nokogiri/html_document.c +120 -33
  25. data/ext/nokogiri/html_document.h +1 -1
  26. data/ext/nokogiri/html_element_description.c +279 -0
  27. data/ext/nokogiri/html_element_description.h +10 -0
  28. data/ext/nokogiri/html_entity_lookup.c +32 -0
  29. data/ext/nokogiri/html_entity_lookup.h +8 -0
  30. data/ext/nokogiri/html_sax_parser_context.c +116 -0
  31. data/ext/nokogiri/html_sax_parser_context.h +11 -0
  32. data/ext/nokogiri/html_sax_push_parser.c +87 -0
  33. data/ext/nokogiri/html_sax_push_parser.h +9 -0
  34. data/ext/nokogiri/nokogiri.c +145 -0
  35. data/ext/nokogiri/nokogiri.h +131 -0
  36. data/ext/nokogiri/xml_attr.c +94 -0
  37. data/ext/nokogiri/xml_attr.h +9 -0
  38. data/ext/nokogiri/xml_attribute_decl.c +70 -0
  39. data/ext/nokogiri/xml_attribute_decl.h +9 -0
  40. data/ext/nokogiri/xml_cdata.c +23 -19
  41. data/ext/nokogiri/xml_cdata.h +1 -1
  42. data/ext/nokogiri/xml_comment.c +69 -0
  43. data/ext/nokogiri/xml_comment.h +9 -0
  44. data/ext/nokogiri/xml_document.c +501 -54
  45. data/ext/nokogiri/xml_document.h +14 -1
  46. data/ext/nokogiri/xml_document_fragment.c +48 -0
  47. data/ext/nokogiri/xml_document_fragment.h +10 -0
  48. data/ext/nokogiri/xml_dtd.c +109 -24
  49. data/ext/nokogiri/xml_dtd.h +3 -1
  50. data/ext/nokogiri/xml_element_content.c +123 -0
  51. data/ext/nokogiri/xml_element_content.h +10 -0
  52. data/ext/nokogiri/xml_element_decl.c +69 -0
  53. data/ext/nokogiri/xml_element_decl.h +9 -0
  54. data/ext/nokogiri/xml_encoding_handler.c +79 -0
  55. data/ext/nokogiri/xml_encoding_handler.h +8 -0
  56. data/ext/nokogiri/xml_entity_decl.c +110 -0
  57. data/ext/nokogiri/xml_entity_decl.h +10 -0
  58. data/ext/nokogiri/xml_entity_reference.c +52 -0
  59. data/ext/nokogiri/xml_entity_reference.h +9 -0
  60. data/ext/nokogiri/xml_io.c +60 -0
  61. data/ext/nokogiri/xml_io.h +11 -0
  62. data/ext/nokogiri/xml_libxml2_hacks.c +112 -0
  63. data/ext/nokogiri/xml_libxml2_hacks.h +12 -0
  64. data/ext/nokogiri/xml_namespace.c +117 -0
  65. data/ext/nokogiri/xml_namespace.h +13 -0
  66. data/ext/nokogiri/xml_node.c +1285 -315
  67. data/ext/nokogiri/xml_node.h +4 -6
  68. data/ext/nokogiri/xml_node_set.c +415 -54
  69. data/ext/nokogiri/xml_node_set.h +6 -2
  70. data/ext/nokogiri/xml_processing_instruction.c +56 -0
  71. data/ext/nokogiri/xml_processing_instruction.h +9 -0
  72. data/ext/nokogiri/xml_reader.c +316 -77
  73. data/ext/nokogiri/xml_reader.h +1 -1
  74. data/ext/nokogiri/xml_relax_ng.c +161 -0
  75. data/ext/nokogiri/xml_relax_ng.h +9 -0
  76. data/ext/nokogiri/xml_sax_parser.c +215 -80
  77. data/ext/nokogiri/xml_sax_parser.h +30 -1
  78. data/ext/nokogiri/xml_sax_parser_context.c +262 -0
  79. data/ext/nokogiri/xml_sax_parser_context.h +10 -0
  80. data/ext/nokogiri/xml_sax_push_parser.c +115 -0
  81. data/ext/nokogiri/xml_sax_push_parser.h +9 -0
  82. data/ext/nokogiri/xml_schema.c +205 -0
  83. data/ext/nokogiri/xml_schema.h +9 -0
  84. data/ext/nokogiri/xml_syntax_error.c +45 -175
  85. data/ext/nokogiri/xml_syntax_error.h +4 -2
  86. data/ext/nokogiri/xml_text.c +37 -14
  87. data/ext/nokogiri/xml_text.h +1 -1
  88. data/ext/nokogiri/xml_xpath_context.c +230 -13
  89. data/ext/nokogiri/xml_xpath_context.h +2 -1
  90. data/ext/nokogiri/xslt_stylesheet.c +196 -34
  91. data/ext/nokogiri/xslt_stylesheet.h +6 -1
  92. data/lib/nokogiri/css/node.rb +18 -61
  93. data/lib/nokogiri/css/parser.rb +725 -17
  94. data/lib/nokogiri/css/parser.y +126 -63
  95. data/lib/nokogiri/css/parser_extras.rb +91 -0
  96. data/lib/nokogiri/css/syntax_error.rb +7 -0
  97. data/lib/nokogiri/css/tokenizer.rb +148 -5
  98. data/lib/nokogiri/css/tokenizer.rex +31 -39
  99. data/lib/nokogiri/css/xpath_visitor.rb +109 -51
  100. data/lib/nokogiri/css.rb +24 -3
  101. data/lib/nokogiri/decorators/slop.rb +42 -0
  102. data/lib/nokogiri/html/builder.rb +27 -1
  103. data/lib/nokogiri/html/document.rb +329 -3
  104. data/lib/nokogiri/html/document_fragment.rb +39 -0
  105. data/lib/nokogiri/html/element_description.rb +23 -0
  106. data/lib/nokogiri/html/element_description_defaults.rb +671 -0
  107. data/lib/nokogiri/html/entity_lookup.rb +13 -0
  108. data/lib/nokogiri/html/sax/parser.rb +35 -4
  109. data/lib/nokogiri/html/sax/parser_context.rb +16 -0
  110. data/lib/nokogiri/html/sax/push_parser.rb +36 -0
  111. data/lib/nokogiri/html.rb +18 -76
  112. data/lib/nokogiri/syntax_error.rb +4 -0
  113. data/lib/nokogiri/version.rb +106 -1
  114. data/lib/nokogiri/xml/attr.rb +14 -0
  115. data/lib/nokogiri/xml/attribute_decl.rb +18 -0
  116. data/lib/nokogiri/xml/builder.rb +395 -31
  117. data/lib/nokogiri/xml/cdata.rb +4 -2
  118. data/lib/nokogiri/xml/character_data.rb +7 -0
  119. data/lib/nokogiri/xml/document.rb +267 -12
  120. data/lib/nokogiri/xml/document_fragment.rb +149 -0
  121. data/lib/nokogiri/xml/dtd.rb +27 -1
  122. data/lib/nokogiri/xml/element_content.rb +36 -0
  123. data/lib/nokogiri/xml/element_decl.rb +13 -0
  124. data/lib/nokogiri/xml/entity_decl.rb +19 -0
  125. data/lib/nokogiri/xml/namespace.rb +13 -0
  126. data/lib/nokogiri/xml/node/save_options.rb +61 -0
  127. data/lib/nokogiri/xml/node.rb +748 -109
  128. data/lib/nokogiri/xml/node_set.rb +200 -72
  129. data/lib/nokogiri/xml/parse_options.rb +120 -0
  130. data/lib/nokogiri/xml/pp/character_data.rb +18 -0
  131. data/lib/nokogiri/xml/pp/node.rb +56 -0
  132. data/lib/nokogiri/xml/pp.rb +2 -0
  133. data/lib/nokogiri/xml/processing_instruction.rb +8 -0
  134. data/lib/nokogiri/xml/reader.rb +102 -4
  135. data/lib/nokogiri/xml/relax_ng.rb +32 -0
  136. data/lib/nokogiri/xml/sax/document.rb +114 -2
  137. data/lib/nokogiri/xml/sax/parser.rb +97 -7
  138. data/lib/nokogiri/xml/sax/parser_context.rb +16 -0
  139. data/lib/nokogiri/xml/sax/push_parser.rb +60 -0
  140. data/lib/nokogiri/xml/sax.rb +2 -7
  141. data/lib/nokogiri/xml/schema.rb +63 -0
  142. data/lib/nokogiri/xml/searchable.rb +221 -0
  143. data/lib/nokogiri/xml/syntax_error.rb +27 -1
  144. data/lib/nokogiri/xml/text.rb +4 -1
  145. data/lib/nokogiri/xml/xpath/syntax_error.rb +11 -0
  146. data/lib/nokogiri/xml/xpath.rb +4 -0
  147. data/lib/nokogiri/xml/xpath_context.rb +3 -1
  148. data/lib/nokogiri/xml.rb +45 -38
  149. data/lib/nokogiri/xslt/stylesheet.rb +19 -0
  150. data/lib/nokogiri/xslt.rb +47 -2
  151. data/lib/nokogiri.rb +117 -24
  152. data/lib/xsd/xmlparser/nokogiri.rb +102 -0
  153. data/patches/sort-patches-by-date +25 -0
  154. data/ports/archives/libxml2-2.9.4.tar.gz +0 -0
  155. data/ports/archives/libxslt-1.1.29.tar.gz +0 -0
  156. data/suppressions/README.txt +1 -0
  157. data/suppressions/nokogiri_ree-1.8.7.358.supp +61 -0
  158. data/suppressions/nokogiri_ruby-1.8.7.370.supp +0 -0
  159. data/suppressions/nokogiri_ruby-1.9.2.320.supp +28 -0
  160. data/suppressions/nokogiri_ruby-1.9.3.327.supp +28 -0
  161. data/tasks/test.rb +100 -0
  162. data/test/css/test_nthiness.rb +73 -6
  163. data/test/css/test_parser.rb +184 -39
  164. data/test/css/test_tokenizer.rb +72 -19
  165. data/test/css/test_xpath_visitor.rb +44 -2
  166. data/test/decorators/test_slop.rb +20 -0
  167. data/test/files/2ch.html +108 -0
  168. data/test/files/GH_1042.html +18 -0
  169. data/test/files/address_book.rlx +12 -0
  170. data/test/files/address_book.xml +10 -0
  171. data/test/files/atom.xml +344 -0
  172. data/test/files/bar/bar.xsd +4 -0
  173. data/test/files/bogus.xml +0 -0
  174. data/test/files/dont_hurt_em_why.xml +422 -0
  175. data/test/files/encoding.html +82 -0
  176. data/test/files/encoding.xhtml +84 -0
  177. data/test/files/exslt.xml +8 -0
  178. data/test/files/exslt.xslt +35 -0
  179. data/test/files/foo/foo.xsd +4 -0
  180. data/test/files/metacharset.html +10 -0
  181. data/test/files/namespace_pressure_test.xml +1684 -0
  182. data/test/files/noencoding.html +47 -0
  183. data/test/files/po.xml +32 -0
  184. data/test/files/po.xsd +66 -0
  185. data/test/files/saml/saml20assertion_schema.xsd +283 -0
  186. data/test/files/saml/saml20protocol_schema.xsd +302 -0
  187. data/test/files/saml/xenc_schema.xsd +146 -0
  188. data/test/files/saml/xmldsig_schema.xsd +318 -0
  189. data/test/files/shift_jis.html +10 -0
  190. data/test/files/shift_jis.xml +5 -0
  191. data/test/files/shift_jis_no_charset.html +9 -0
  192. data/test/files/slow-xpath.xml +25509 -0
  193. data/test/files/snuggles.xml +3 -0
  194. data/test/files/staff.dtd +10 -0
  195. data/test/files/test_document_url/bar.xml +2 -0
  196. data/test/files/test_document_url/document.dtd +4 -0
  197. data/test/files/test_document_url/document.xml +6 -0
  198. data/test/files/tlm.html +2 -1
  199. data/test/files/to_be_xincluded.xml +2 -0
  200. data/test/files/valid_bar.xml +2 -0
  201. data/test/files/xinclude.xml +4 -0
  202. data/test/helper.rb +124 -13
  203. data/test/html/sax/test_parser.rb +118 -4
  204. data/test/html/sax/test_parser_context.rb +46 -0
  205. data/test/html/sax/test_push_parser.rb +87 -0
  206. data/test/html/test_builder.rb +94 -8
  207. data/test/html/test_document.rb +626 -11
  208. data/test/html/test_document_encoding.rb +145 -0
  209. data/test/html/test_document_fragment.rb +301 -0
  210. data/test/html/test_element_description.rb +105 -0
  211. data/test/html/test_named_characters.rb +14 -0
  212. data/test/html/test_node.rb +212 -0
  213. data/test/html/test_node_encoding.rb +85 -0
  214. data/test/namespaces/test_additional_namespaces_in_builder_doc.rb +14 -0
  215. data/test/namespaces/test_namespaces_aliased_default.rb +24 -0
  216. data/test/namespaces/test_namespaces_in_builder_doc.rb +75 -0
  217. data/test/namespaces/test_namespaces_in_cloned_doc.rb +31 -0
  218. data/test/namespaces/test_namespaces_in_created_doc.rb +75 -0
  219. data/test/namespaces/test_namespaces_in_parsed_doc.rb +80 -0
  220. data/test/namespaces/test_namespaces_preservation.rb +31 -0
  221. data/test/test_convert_xpath.rb +2 -47
  222. data/test/test_css_cache.rb +45 -0
  223. data/test/test_encoding_handler.rb +48 -0
  224. data/test/test_memory_leak.rb +156 -0
  225. data/test/test_nokogiri.rb +103 -1
  226. data/test/test_soap4r_sax.rb +52 -0
  227. data/test/test_xslt_transforms.rb +293 -8
  228. data/test/xml/node/test_save_options.rb +28 -0
  229. data/test/xml/node/test_subclass.rb +44 -0
  230. data/test/xml/sax/test_parser.rb +309 -8
  231. data/test/xml/sax/test_parser_context.rb +115 -0
  232. data/test/xml/sax/test_push_parser.rb +157 -0
  233. data/test/xml/test_attr.rb +67 -0
  234. data/test/xml/test_attribute_decl.rb +86 -0
  235. data/test/xml/test_builder.rb +327 -2
  236. data/test/xml/test_c14n.rb +180 -0
  237. data/test/xml/test_cdata.rb +32 -2
  238. data/test/xml/test_comment.rb +40 -0
  239. data/test/xml/test_document.rb +846 -35
  240. data/test/xml/test_document_encoding.rb +31 -0
  241. data/test/xml/test_document_fragment.rb +271 -0
  242. data/test/xml/test_dtd.rb +153 -9
  243. data/test/xml/test_dtd_encoding.rb +31 -0
  244. data/test/xml/test_element_content.rb +56 -0
  245. data/test/xml/test_element_decl.rb +73 -0
  246. data/test/xml/test_entity_decl.rb +122 -0
  247. data/test/xml/test_entity_reference.rb +251 -0
  248. data/test/xml/test_namespace.rb +96 -0
  249. data/test/xml/test_node.rb +1126 -105
  250. data/test/xml/test_node_attributes.rb +115 -0
  251. data/test/xml/test_node_encoding.rb +69 -0
  252. data/test/xml/test_node_inheritance.rb +32 -0
  253. data/test/xml/test_node_reparenting.rb +549 -0
  254. data/test/xml/test_node_set.rb +668 -9
  255. data/test/xml/test_parse_options.rb +64 -0
  256. data/test/xml/test_processing_instruction.rb +30 -0
  257. data/test/xml/test_reader.rb +589 -0
  258. data/test/xml/test_reader_encoding.rb +134 -0
  259. data/test/xml/test_relax_ng.rb +60 -0
  260. data/test/xml/test_schema.rb +142 -0
  261. data/test/xml/test_syntax_error.rb +30 -0
  262. data/test/xml/test_text.rb +49 -2
  263. data/test/xml/test_unparented_node.rb +440 -0
  264. data/test/xml/test_xinclude.rb +83 -0
  265. data/test/xml/test_xpath.rb +445 -0
  266. data/test/xslt/test_custom_functions.rb +133 -0
  267. data/test/xslt/test_exception_handling.rb +37 -0
  268. data/test_all +107 -0
  269. metadata +459 -115
  270. data/History.txt +0 -6
  271. data/README.ja.txt +0 -86
  272. data/README.txt +0 -87
  273. data/ext/nokogiri/html_sax_parser.c +0 -32
  274. data/ext/nokogiri/html_sax_parser.h +0 -11
  275. data/ext/nokogiri/native.c +0 -40
  276. data/ext/nokogiri/native.h +0 -51
  277. data/ext/nokogiri/xml_xpath.c +0 -46
  278. data/ext/nokogiri/xml_xpath.h +0 -11
  279. data/lib/nokogiri/css/generated_parser.rb +0 -653
  280. data/lib/nokogiri/css/generated_tokenizer.rb +0 -159
  281. data/lib/nokogiri/decorators/hpricot/node.rb +0 -58
  282. data/lib/nokogiri/decorators/hpricot/node_set.rb +0 -14
  283. data/lib/nokogiri/decorators/hpricot/xpath_visitor.rb +0 -17
  284. data/lib/nokogiri/decorators/hpricot.rb +0 -3
  285. data/lib/nokogiri/decorators.rb +0 -1
  286. data/lib/nokogiri/hpricot.rb +0 -47
  287. data/lib/nokogiri/xml/after_handler.rb +0 -18
  288. data/lib/nokogiri/xml/before_handler.rb +0 -32
  289. data/lib/nokogiri/xml/element.rb +0 -6
  290. data/lib/nokogiri/xml/entity_declaration.rb +0 -9
  291. data/nokogiri.gemspec +0 -34
  292. data/test/hpricot/files/basic.xhtml +0 -17
  293. data/test/hpricot/files/boingboing.html +0 -2266
  294. data/test/hpricot/files/cy0.html +0 -3653
  295. data/test/hpricot/files/immob.html +0 -400
  296. data/test/hpricot/files/pace_application.html +0 -1320
  297. data/test/hpricot/files/tenderlove.html +0 -16
  298. data/test/hpricot/files/uswebgen.html +0 -220
  299. data/test/hpricot/files/utf8.html +0 -1054
  300. data/test/hpricot/files/week9.html +0 -1723
  301. data/test/hpricot/files/why.xml +0 -19
  302. data/test/hpricot/load_files.rb +0 -7
  303. data/test/hpricot/test_alter.rb +0 -67
  304. data/test/hpricot/test_builder.rb +0 -27
  305. data/test/hpricot/test_parser.rb +0 -423
  306. data/test/hpricot/test_paths.rb +0 -15
  307. data/test/hpricot/test_preserved.rb +0 -78
  308. data/test/hpricot/test_xml.rb +0 -30
  309. data/test/test_reader.rb +0 -222
@@ -1,15 +1,13 @@
1
1
  #ifndef NOKOGIRI_XML_NODE
2
2
  #define NOKOGIRI_XML_NODE
3
3
 
4
- #include <native.h>
4
+ #include <nokogiri.h>
5
5
 
6
6
  void init_xml_node();
7
- VALUE Nokogiri_wrap_xml_node(xmlNodePtr root);
8
7
 
9
8
  extern VALUE cNokogiriXmlNode ;
10
- VALUE Nokogiri_wrap_xml_node(xmlNodePtr node) ;
9
+ extern VALUE cNokogiriXmlElement ;
10
+
11
+ VALUE Nokogiri_wrap_xml_node(VALUE klass, xmlNodePtr node) ;
11
12
  void Nokogiri_xml_node_properties(xmlNodePtr node, VALUE attr_hash) ;
12
- void Nokogiri_xml_node_namespaces(xmlNodePtr node, VALUE attr_hash) ;
13
- int Nokogiri_xml_node_owned_get(xmlNodePtr node) ;
14
- void Nokogiri_xml_node_owned_set(xmlNodePtr node) ;
15
13
  #endif
@@ -1,5 +1,69 @@
1
1
  #include <xml_node_set.h>
2
+ #include <xml_namespace.h>
2
3
  #include <libxml/xpathInternals.h>
4
+
5
+ static ID decorate ;
6
+ static void xpath_node_set_del(xmlNodeSetPtr cur, xmlNodePtr val);
7
+
8
+
9
+ static void Check_Node_Set_Node_Type(VALUE node)
10
+ {
11
+ if (!(rb_obj_is_kind_of(node, cNokogiriXmlNode) ||
12
+ rb_obj_is_kind_of(node, cNokogiriXmlNamespace))) {
13
+ rb_raise(rb_eArgError, "node must be a Nokogiri::XML::Node or Nokogiri::XML::Namespace");
14
+ }
15
+ }
16
+
17
+
18
+ static void deallocate(xmlNodeSetPtr node_set)
19
+ {
20
+ /*
21
+ *
22
+ * since xpath queries return copies of the xmlNs structs,
23
+ * xmlXPathFreeNodeSet() frees those xmlNs structs that are in the
24
+ * NodeSet.
25
+ *
26
+ * this is bad if someone is still trying to use the Namespace object wrapped
27
+ * around the xmlNs, so we need to avoid that.
28
+ *
29
+ * here we reproduce xmlXPathFreeNodeSet() without the xmlNs logic.
30
+ *
31
+ * this doesn't cause a leak because Namespace objects that are in an XPath
32
+ * query NodeSet are given their own lifecycle in
33
+ * Nokogiri_wrap_xml_namespace().
34
+ */
35
+ NOKOGIRI_DEBUG_START(node_set) ;
36
+ if (node_set->nodeTab != NULL)
37
+ xmlFree(node_set->nodeTab);
38
+
39
+ xmlFree(node_set);
40
+ NOKOGIRI_DEBUG_END(node_set) ;
41
+ }
42
+
43
+ static VALUE allocate(VALUE klass)
44
+ {
45
+ return Nokogiri_wrap_xml_node_set(xmlXPathNodeSetCreate(NULL), Qnil);
46
+ }
47
+
48
+
49
+ /*
50
+ * call-seq:
51
+ * dup
52
+ *
53
+ * Duplicate this node set
54
+ */
55
+ static VALUE duplicate(VALUE self)
56
+ {
57
+ xmlNodeSetPtr node_set;
58
+ xmlNodeSetPtr dupl;
59
+
60
+ Data_Get_Struct(self, xmlNodeSet, node_set);
61
+
62
+ dupl = xmlXPathNodeSetMerge(NULL, node_set);
63
+
64
+ return Nokogiri_wrap_xml_node_set(dupl, rb_iv_get(self, "@document"));
65
+ }
66
+
3
67
  /*
4
68
  * call-seq:
5
69
  * length
@@ -9,12 +73,10 @@
9
73
  static VALUE length(VALUE self)
10
74
  {
11
75
  xmlNodeSetPtr node_set;
12
- Data_Get_Struct(self, xmlNodeSet, node_set);
13
76
 
14
- if(node_set)
15
- return INT2NUM(node_set->nodeNr);
77
+ Data_Get_Struct(self, xmlNodeSet, node_set);
16
78
 
17
- return INT2NUM(0);
79
+ return node_set ? INT2NUM(node_set->nodeNr) : INT2NUM(0);
18
80
  }
19
81
 
20
82
  /*
@@ -28,97 +90,396 @@ static VALUE push(VALUE self, VALUE rb_node)
28
90
  xmlNodeSetPtr node_set;
29
91
  xmlNodePtr node;
30
92
 
93
+ Check_Node_Set_Node_Type(rb_node);
94
+
31
95
  Data_Get_Struct(self, xmlNodeSet, node_set);
32
96
  Data_Get_Struct(rb_node, xmlNode, node);
97
+
33
98
  xmlXPathNodeSetAdd(node_set, node);
99
+
34
100
  return self;
35
101
  }
36
102
 
103
+ /*
104
+ * call-seq:
105
+ * delete(node)
106
+ *
107
+ * Delete +node+ from the Nodeset, if it is a member. Returns the deleted node
108
+ * if found, otherwise returns nil.
109
+ */
110
+ static VALUE
111
+ delete(VALUE self, VALUE rb_node)
112
+ {
113
+ xmlNodeSetPtr node_set;
114
+ xmlNodePtr node;
115
+
116
+ Check_Node_Set_Node_Type(rb_node);
117
+
118
+ Data_Get_Struct(self, xmlNodeSet, node_set);
119
+ Data_Get_Struct(rb_node, xmlNode, node);
120
+
121
+ if (xmlXPathNodeSetContains(node_set, node)) {
122
+ xpath_node_set_del(node_set, node);
123
+ return rb_node;
124
+ }
125
+ return Qnil ;
126
+ }
127
+
128
+
129
+ /*
130
+ * call-seq:
131
+ * &(node_set)
132
+ *
133
+ * Set Intersection — Returns a new NodeSet containing nodes common to the two NodeSets.
134
+ */
135
+ static VALUE intersection(VALUE self, VALUE rb_other)
136
+ {
137
+ xmlNodeSetPtr node_set, other ;
138
+ xmlNodeSetPtr intersection;
139
+
140
+ if(!rb_obj_is_kind_of(rb_other, cNokogiriXmlNodeSet))
141
+ rb_raise(rb_eArgError, "node_set must be a Nokogiri::XML::NodeSet");
142
+
143
+ Data_Get_Struct(self, xmlNodeSet, node_set);
144
+ Data_Get_Struct(rb_other, xmlNodeSet, other);
145
+
146
+ intersection = xmlXPathIntersection(node_set, other);
147
+ return Nokogiri_wrap_xml_node_set(intersection, rb_iv_get(self, "@document"));
148
+ }
149
+
150
+
37
151
  /*
38
152
  * call-seq:
39
- * [](i)
153
+ * include?(node)
40
154
  *
41
- * Get the node at index +i+
155
+ * Returns true if any member of node set equals +node+.
42
156
  */
43
- static VALUE index_at(VALUE self, VALUE number)
157
+ static VALUE include_eh(VALUE self, VALUE rb_node)
44
158
  {
45
- int i = NUM2INT(number);
46
159
  xmlNodeSetPtr node_set;
160
+ xmlNodePtr node;
161
+
162
+ Check_Node_Set_Node_Type(rb_node);
163
+
164
+ Data_Get_Struct(self, xmlNodeSet, node_set);
165
+ Data_Get_Struct(rb_node, xmlNode, node);
166
+
167
+ return (xmlXPathNodeSetContains(node_set, node) ? Qtrue : Qfalse);
168
+ }
169
+
170
+
171
+ /*
172
+ * call-seq:
173
+ * |(node_set)
174
+ *
175
+ * Returns a new set built by merging the set and the elements of the given
176
+ * set.
177
+ */
178
+ static VALUE set_union(VALUE self, VALUE rb_other)
179
+ {
180
+ xmlNodeSetPtr node_set, other;
181
+ xmlNodeSetPtr new;
182
+
183
+ if(!rb_obj_is_kind_of(rb_other, cNokogiriXmlNodeSet))
184
+ rb_raise(rb_eArgError, "node_set must be a Nokogiri::XML::NodeSet");
185
+
47
186
  Data_Get_Struct(self, xmlNodeSet, node_set);
187
+ Data_Get_Struct(rb_other, xmlNodeSet, other);
188
+
189
+ new = xmlXPathNodeSetMerge(NULL, node_set);
190
+ new = xmlXPathNodeSetMerge(new, other);
191
+
192
+ return Nokogiri_wrap_xml_node_set(new, rb_iv_get(self, "@document"));
193
+ }
194
+
195
+ /*
196
+ * call-seq:
197
+ * -(node_set)
198
+ *
199
+ * Difference - returns a new NodeSet that is a copy of this NodeSet, removing
200
+ * each item that also appears in +node_set+
201
+ */
202
+ static VALUE minus(VALUE self, VALUE rb_other)
203
+ {
204
+ xmlNodeSetPtr node_set, other;
205
+ xmlNodeSetPtr new;
206
+ int j ;
48
207
 
49
- if(i >= node_set->nodeNr || abs(i) > node_set->nodeNr)
208
+ if(!rb_obj_is_kind_of(rb_other, cNokogiriXmlNodeSet))
209
+ rb_raise(rb_eArgError, "node_set must be a Nokogiri::XML::NodeSet");
210
+
211
+ Data_Get_Struct(self, xmlNodeSet, node_set);
212
+ Data_Get_Struct(rb_other, xmlNodeSet, other);
213
+
214
+ new = xmlXPathNodeSetMerge(NULL, node_set);
215
+ for (j = 0 ; j < other->nodeNr ; ++j) {
216
+ xpath_node_set_del(new, other->nodeTab[j]);
217
+ }
218
+
219
+ return Nokogiri_wrap_xml_node_set(new, rb_iv_get(self, "@document"));
220
+ }
221
+
222
+
223
+ static VALUE index_at(VALUE self, long offset)
224
+ {
225
+ xmlNodeSetPtr node_set;
226
+
227
+ Data_Get_Struct(self, xmlNodeSet, node_set);
228
+
229
+ if (offset >= node_set->nodeNr || abs((int)offset) > node_set->nodeNr) {
50
230
  return Qnil;
231
+ }
51
232
 
52
- if(i < 0)
53
- i = i + node_set->nodeNr;
233
+ if (offset < 0) { offset += node_set->nodeNr ; }
54
234
 
55
- return Nokogiri_wrap_xml_node(node_set->nodeTab[i]);
235
+ return Nokogiri_wrap_xml_node_set_node(node_set->nodeTab[offset], self);
56
236
  }
57
237
 
58
- static void gc_mark(xmlNodeSetPtr node_set)
238
+ static VALUE subseq(VALUE self, long beg, long len)
239
+ {
240
+ long j;
241
+ xmlNodeSetPtr node_set;
242
+ xmlNodeSetPtr new_set ;
243
+
244
+ Data_Get_Struct(self, xmlNodeSet, node_set);
245
+
246
+ if (beg > node_set->nodeNr) return Qnil ;
247
+ if (beg < 0 || len < 0) return Qnil ;
248
+
249
+ if ((beg + len) > node_set->nodeNr) {
250
+ len = node_set->nodeNr - beg ;
251
+ }
252
+
253
+ new_set = xmlXPathNodeSetCreate(NULL);
254
+ for (j = beg ; j < beg+len ; ++j) {
255
+ xmlXPathNodeSetAddUnique(new_set, node_set->nodeTab[j]);
256
+ }
257
+ return Nokogiri_wrap_xml_node_set(new_set, rb_iv_get(self, "@document"));
258
+ }
259
+
260
+ /*
261
+ * call-seq:
262
+ * [index] -> Node or nil
263
+ * [start, length] -> NodeSet or nil
264
+ * [range] -> NodeSet or nil
265
+ * slice(index) -> Node or nil
266
+ * slice(start, length) -> NodeSet or nil
267
+ * slice(range) -> NodeSet or nil
268
+ *
269
+ * Element reference - returns the node at +index+, or returns a NodeSet
270
+ * containing nodes starting at +start+ and continuing for +length+ elements, or
271
+ * returns a NodeSet containing nodes specified by +range+. Negative +indices+
272
+ * count backward from the end of the +node_set+ (-1 is the last node). Returns
273
+ * nil if the +index+ (or +start+) are out of range.
274
+ */
275
+ static VALUE slice(int argc, VALUE *argv, VALUE self)
59
276
  {
60
- int j ;
61
- for (j = 0 ; j < node_set->nodeNr ; ++j) {
62
- if (node_set->nodeTab[j]->_private)
63
- rb_gc_mark((VALUE)node_set->nodeTab[j]->_private);
277
+ VALUE arg ;
278
+ long beg, len ;
279
+ xmlNodeSetPtr node_set;
280
+
281
+ Data_Get_Struct(self, xmlNodeSet, node_set);
282
+
283
+ if (argc == 2) {
284
+ beg = NUM2LONG(argv[0]);
285
+ len = NUM2LONG(argv[1]);
286
+ if (beg < 0) {
287
+ beg += node_set->nodeNr ;
64
288
  }
289
+ return subseq(self, beg, len);
290
+ }
291
+
292
+ if (argc != 1) {
293
+ rb_scan_args(argc, argv, "11", NULL, NULL);
294
+ }
295
+ arg = argv[0];
296
+
297
+ if (FIXNUM_P(arg)) {
298
+ return index_at(self, FIX2LONG(arg));
299
+ }
300
+
301
+ /* if arg is Range */
302
+ switch (rb_range_beg_len(arg, &beg, &len, (long)node_set->nodeNr, 0)) {
303
+ case Qfalse:
304
+ break;
305
+ case Qnil:
306
+ return Qnil;
307
+ default:
308
+ return subseq(self, beg, len);
309
+ }
310
+
311
+ return index_at(self, NUM2LONG(arg));
65
312
  }
66
313
 
67
- static void deallocate(xmlNodeSetPtr node_set)
314
+
315
+ /*
316
+ * call-seq:
317
+ * to_a
318
+ *
319
+ * Return this list as an Array
320
+ */
321
+ static VALUE to_array(VALUE self, VALUE rb_node)
322
+ {
323
+ xmlNodeSetPtr node_set ;
324
+ VALUE list;
325
+ int i;
326
+
327
+ Data_Get_Struct(self, xmlNodeSet, node_set);
328
+
329
+ list = rb_ary_new2(node_set->nodeNr);
330
+ for(i = 0; i < node_set->nodeNr; i++) {
331
+ VALUE elt = Nokogiri_wrap_xml_node_set_node(node_set->nodeTab[i], self);
332
+ rb_ary_push( list, elt );
333
+ }
334
+
335
+ return list;
336
+ }
337
+
338
+ /*
339
+ * call-seq:
340
+ * unlink
341
+ *
342
+ * Unlink this NodeSet and all Node objects it contains from their current context.
343
+ */
344
+ static VALUE unlink_nodeset(VALUE self)
345
+ {
346
+ xmlNodeSetPtr node_set;
347
+ int j, nodeNr ;
348
+
349
+ Data_Get_Struct(self, xmlNodeSet, node_set);
350
+
351
+ nodeNr = node_set->nodeNr ;
352
+ for (j = 0 ; j < nodeNr ; j++) {
353
+ if (! Nokogiri_namespace_eh(node_set->nodeTab[j])) {
354
+ VALUE node ;
355
+ xmlNodePtr node_ptr;
356
+ node = Nokogiri_wrap_xml_node(Qnil, node_set->nodeTab[j]);
357
+ rb_funcall(node, rb_intern("unlink"), 0); /* modifies the C struct out from under the object */
358
+ Data_Get_Struct(node, xmlNode, node_ptr);
359
+ node_set->nodeTab[j] = node_ptr ;
360
+ }
361
+ }
362
+ return self ;
363
+ }
364
+
365
+
366
+ static void reify_node_set_namespaces(VALUE self)
68
367
  {
69
368
  /*
70
- * xmlXPathFreeNodeSet() contains an implicit assumption that it is being
71
- * called before any of its pointed-to nodes have been free()d. this
72
- * assumption lies in the operation where it dereferences nodeTab pointers
73
- * while searching for namespace nodes to free.
74
- *
75
- * however, since Ruby's GC mechanism cannot guarantee the strict order in
76
- * which ruby objects will be GC'd, nodes may be garbage collected before a
77
- * nodeset containing pointers to those nodes. (this is true regardless of
78
- * how we declare dependencies between objects with rb_gc_mark().)
369
+ * as mentioned in deallocate() above, xmlNs structs returned in an XPath
370
+ * NodeSet are duplicates, and we don't clean them up at deallocate() time.
79
371
  *
80
- * as a result, xmlXPathFreeNodeSet() will perform unsafe memory operations,
81
- * and calling it would be evil.
372
+ * as a result, we need to make sure the Ruby manages this memory. we do this
373
+ * by forcing the creation of a Ruby object wrapped around the xmlNs.
82
374
  *
83
- * on the bright side, though, Nokogiri's API currently does not cause
84
- * namespace nodes to be included in node sets, ever.
375
+ * we also have to make sure that the NodeSet has a reference to the
376
+ * Namespace object, otherwise GC will kick in and the Namespace won't be
377
+ * marked.
85
378
  *
86
- * armed with that fact, we examined xmlXPathFreeNodeSet() and related libxml
87
- * code and determined that, within the Nokogiri abstraction, we will not
88
- * leak memory if we simply free the node set's memory directly. that's only
89
- * quasi-evil!
90
- *
91
- * there's probably a lesson in here somewhere about intermingling, within a
92
- * single array, structs with different memory-ownership semantics. or more
93
- * generally, a lesson about building an API in C/C++ that does not contain
94
- * assumptions about the strict order in which memory will be released. hey,
95
- * that sounds like a great idea for a blog post! get to it!
96
- *
97
- * "In Valgrind We Trust." seriously.
379
+ * we *could* do this safely with *all* the nodes in the NodeSet, but we only
380
+ * *need* to do it for xmlNs structs, and so you get the code we have here.
98
381
  */
99
- NOKOGIRI_DEBUG_START(node_set) ;
100
- if (node_set->nodeTab != NULL)
101
- xmlFree(node_set->nodeTab);
102
- xmlFree(node_set);
103
- NOKOGIRI_DEBUG_END(node_set) ;
382
+ int j ;
383
+ xmlNodeSetPtr node_set ;
384
+ VALUE namespace_cache ;
385
+
386
+ Data_Get_Struct(self, xmlNodeSet, node_set);
387
+
388
+ namespace_cache = rb_iv_get(self, "@namespace_cache");
389
+
390
+ for (j = 0 ; j < node_set->nodeNr ; j++) {
391
+ if (Nokogiri_namespace_eh(node_set->nodeTab[j])) {
392
+ rb_ary_push(namespace_cache, Nokogiri_wrap_xml_node_set_node(node_set->nodeTab[j], self));
393
+ }
394
+ }
104
395
  }
105
396
 
106
- static VALUE allocate(VALUE klass)
397
+
398
+ VALUE Nokogiri_wrap_xml_node_set(xmlNodeSetPtr node_set, VALUE document)
107
399
  {
108
- return Nokogiri_wrap_xml_node_set(xmlXPathNodeSetCreate(NULL));
400
+ VALUE new_set ;
401
+
402
+ if (node_set == NULL) {
403
+ node_set = xmlXPathNodeSetCreate(NULL);
404
+ }
405
+
406
+ new_set = Data_Wrap_Struct(cNokogiriXmlNodeSet, 0, deallocate, node_set);
407
+
408
+ if (!NIL_P(document)) {
409
+ rb_iv_set(new_set, "@document", document);
410
+ rb_funcall(document, decorate, 1, new_set);
411
+ }
412
+
413
+ rb_iv_set(new_set, "@namespace_cache", rb_ary_new());
414
+ reify_node_set_namespaces(new_set);
415
+
416
+ return new_set ;
109
417
  }
110
418
 
111
- VALUE Nokogiri_wrap_xml_node_set(xmlNodeSetPtr node_set)
419
+ VALUE Nokogiri_wrap_xml_node_set_node(xmlNodePtr node, VALUE node_set)
112
420
  {
113
- return Data_Wrap_Struct(cNokogiriXmlNodeSet, gc_mark, deallocate, node_set);
421
+ xmlDocPtr document ;
422
+
423
+ if (Nokogiri_namespace_eh(node)) {
424
+ Data_Get_Struct(rb_iv_get(node_set, "@document"), xmlDoc, document);
425
+ return Nokogiri_wrap_xml_namespace(document, (xmlNsPtr)node);
426
+ } else {
427
+ return Nokogiri_wrap_xml_node(Qnil, node);
428
+ }
114
429
  }
115
430
 
431
+
432
+ static void xpath_node_set_del(xmlNodeSetPtr cur, xmlNodePtr val)
433
+ {
434
+ /*
435
+ * as mentioned a few times above, we do not want to free xmlNs structs
436
+ * outside of the Namespace lifecycle.
437
+ *
438
+ * xmlXPathNodeSetDel() frees xmlNs structs, and so here we reproduce that
439
+ * function with the xmlNs logic.
440
+ */
441
+ int i;
442
+
443
+ if (cur == NULL) return;
444
+ if (val == NULL) return;
445
+
446
+ /*
447
+ * find node in nodeTab
448
+ */
449
+ for (i = 0;i < cur->nodeNr;i++)
450
+ if (cur->nodeTab[i] == val) break;
451
+
452
+ if (i >= cur->nodeNr) { /* not found */
453
+ return;
454
+ }
455
+ cur->nodeNr--;
456
+ for (;i < cur->nodeNr;i++)
457
+ cur->nodeTab[i] = cur->nodeTab[i + 1];
458
+ cur->nodeTab[cur->nodeNr] = NULL;
459
+ }
460
+
461
+
116
462
  VALUE cNokogiriXmlNodeSet ;
117
463
  void init_xml_node_set(void)
118
464
  {
119
- VALUE klass = cNokogiriXmlNodeSet = rb_eval_string("Nokogiri::XML::NodeSet");
465
+ VALUE nokogiri = rb_define_module("Nokogiri");
466
+ VALUE xml = rb_define_module_under(nokogiri, "XML");
467
+ VALUE klass = rb_define_class_under(xml, "NodeSet", rb_cObject);
468
+ cNokogiriXmlNodeSet = klass;
469
+
120
470
  rb_define_alloc_func(klass, allocate);
121
471
  rb_define_method(klass, "length", length, 0);
122
- rb_define_method(klass, "[]", index_at, 1);
472
+ rb_define_method(klass, "[]", slice, -1);
473
+ rb_define_method(klass, "slice", slice, -1);
123
474
  rb_define_method(klass, "push", push, 1);
475
+ rb_define_method(klass, "|", set_union, 1);
476
+ rb_define_method(klass, "-", minus, 1);
477
+ rb_define_method(klass, "unlink", unlink_nodeset, 0);
478
+ rb_define_method(klass, "to_a", to_array, 0);
479
+ rb_define_method(klass, "dup", duplicate, 0);
480
+ rb_define_method(klass, "delete", delete, 1);
481
+ rb_define_method(klass, "&", intersection, 1);
482
+ rb_define_method(klass, "include?", include_eh, 1);
483
+
484
+ decorate = rb_intern("decorate");
124
485
  }
@@ -1,9 +1,13 @@
1
1
  #ifndef NOKOGIRI_XML_NODE_SET
2
2
  #define NOKOGIRI_XML_NODE_SET
3
3
 
4
- #include <native.h>
4
+ #include <nokogiri.h>
5
5
  void init_xml_node_set();
6
6
 
7
7
  extern VALUE cNokogiriXmlNodeSet ;
8
- VALUE Nokogiri_wrap_xml_node_set(xmlNodeSetPtr node_set) ;
8
+ VALUE Nokogiri_wrap_xml_node_set(xmlNodeSetPtr node_set, VALUE document) ;
9
+ VALUE Nokogiri_wrap_xml_node_set_node(xmlNodePtr node, VALUE node_set) ;
10
+ VALUE Nokogiri_wrap_xml_node_set_namespace(xmlNsPtr node, VALUE node_set) ;
11
+ int Nokogiri_namespace_eh(xmlNodePtr node) ;
12
+
9
13
  #endif
@@ -0,0 +1,56 @@
1
+ #include <xml_processing_instruction.h>
2
+
3
+ /*
4
+ * call-seq:
5
+ * new(document, name, content)
6
+ *
7
+ * Create a new ProcessingInstruction element on the +document+ with +name+
8
+ * and +content+
9
+ */
10
+ static VALUE new(int argc, VALUE *argv, VALUE klass)
11
+ {
12
+ xmlDocPtr xml_doc;
13
+ xmlNodePtr node;
14
+ VALUE document;
15
+ VALUE name;
16
+ VALUE content;
17
+ VALUE rest;
18
+ VALUE rb_node;
19
+
20
+ rb_scan_args(argc, argv, "3*", &document, &name, &content, &rest);
21
+
22
+ Data_Get_Struct(document, xmlDoc, xml_doc);
23
+
24
+ node = xmlNewDocPI(
25
+ xml_doc,
26
+ (const xmlChar *)StringValueCStr(name),
27
+ (const xmlChar *)StringValueCStr(content)
28
+ );
29
+
30
+ nokogiri_root_node(node);
31
+
32
+ rb_node = Nokogiri_wrap_xml_node(klass, node);
33
+ rb_obj_call_init(rb_node, argc, argv);
34
+
35
+ if(rb_block_given_p()) rb_yield(rb_node);
36
+
37
+ return rb_node;
38
+ }
39
+
40
+ VALUE cNokogiriXmlProcessingInstruction;
41
+ void init_xml_processing_instruction()
42
+ {
43
+ VALUE nokogiri = rb_define_module("Nokogiri");
44
+ VALUE xml = rb_define_module_under(nokogiri, "XML");
45
+ VALUE node = rb_define_class_under(xml, "Node", rb_cObject);
46
+
47
+ /*
48
+ * ProcessingInstruction represents a ProcessingInstruction node in an xml
49
+ * document.
50
+ */
51
+ VALUE klass = rb_define_class_under(xml, "ProcessingInstruction", node);
52
+
53
+ cNokogiriXmlProcessingInstruction = klass;
54
+
55
+ rb_define_singleton_method(klass, "new", new, -1);
56
+ }
@@ -0,0 +1,9 @@
1
+ #ifndef NOKOGIRI_XML_PROCESSING_INSTRUCTION
2
+ #define NOKOGIRI_XML_PROCESSING_INSTRUCTION
3
+
4
+ #include <nokogiri.h>
5
+
6
+ void init_xml_processing_instruction();
7
+
8
+ extern VALUE cNokogiriXmlProcessingInstruction;
9
+ #endif