nokogiri 1.0.0 → 1.6.8.1

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (309) hide show
  1. checksums.yaml +7 -0
  2. data/.autotest +26 -0
  3. data/.cross_rubies +9 -0
  4. data/.editorconfig +17 -0
  5. data/.gemtest +0 -0
  6. data/.travis.yml +51 -0
  7. data/CHANGELOG.rdoc +1160 -0
  8. data/CONTRIBUTING.md +42 -0
  9. data/C_CODING_STYLE.rdoc +33 -0
  10. data/Gemfile +22 -0
  11. data/LICENSE.txt +31 -0
  12. data/Manifest.txt +284 -40
  13. data/README.md +166 -0
  14. data/ROADMAP.md +111 -0
  15. data/Rakefile +310 -199
  16. data/STANDARD_RESPONSES.md +47 -0
  17. data/Y_U_NO_GEMSPEC.md +155 -0
  18. data/appveyor.yml +22 -0
  19. data/bin/nokogiri +118 -0
  20. data/build_all +45 -0
  21. data/dependencies.yml +29 -0
  22. data/ext/nokogiri/depend +358 -0
  23. data/ext/nokogiri/extconf.rb +664 -34
  24. data/ext/nokogiri/html_document.c +120 -33
  25. data/ext/nokogiri/html_document.h +1 -1
  26. data/ext/nokogiri/html_element_description.c +279 -0
  27. data/ext/nokogiri/html_element_description.h +10 -0
  28. data/ext/nokogiri/html_entity_lookup.c +32 -0
  29. data/ext/nokogiri/html_entity_lookup.h +8 -0
  30. data/ext/nokogiri/html_sax_parser_context.c +116 -0
  31. data/ext/nokogiri/html_sax_parser_context.h +11 -0
  32. data/ext/nokogiri/html_sax_push_parser.c +87 -0
  33. data/ext/nokogiri/html_sax_push_parser.h +9 -0
  34. data/ext/nokogiri/nokogiri.c +145 -0
  35. data/ext/nokogiri/nokogiri.h +131 -0
  36. data/ext/nokogiri/xml_attr.c +94 -0
  37. data/ext/nokogiri/xml_attr.h +9 -0
  38. data/ext/nokogiri/xml_attribute_decl.c +70 -0
  39. data/ext/nokogiri/xml_attribute_decl.h +9 -0
  40. data/ext/nokogiri/xml_cdata.c +23 -19
  41. data/ext/nokogiri/xml_cdata.h +1 -1
  42. data/ext/nokogiri/xml_comment.c +69 -0
  43. data/ext/nokogiri/xml_comment.h +9 -0
  44. data/ext/nokogiri/xml_document.c +501 -54
  45. data/ext/nokogiri/xml_document.h +14 -1
  46. data/ext/nokogiri/xml_document_fragment.c +48 -0
  47. data/ext/nokogiri/xml_document_fragment.h +10 -0
  48. data/ext/nokogiri/xml_dtd.c +109 -24
  49. data/ext/nokogiri/xml_dtd.h +3 -1
  50. data/ext/nokogiri/xml_element_content.c +123 -0
  51. data/ext/nokogiri/xml_element_content.h +10 -0
  52. data/ext/nokogiri/xml_element_decl.c +69 -0
  53. data/ext/nokogiri/xml_element_decl.h +9 -0
  54. data/ext/nokogiri/xml_encoding_handler.c +79 -0
  55. data/ext/nokogiri/xml_encoding_handler.h +8 -0
  56. data/ext/nokogiri/xml_entity_decl.c +110 -0
  57. data/ext/nokogiri/xml_entity_decl.h +10 -0
  58. data/ext/nokogiri/xml_entity_reference.c +52 -0
  59. data/ext/nokogiri/xml_entity_reference.h +9 -0
  60. data/ext/nokogiri/xml_io.c +60 -0
  61. data/ext/nokogiri/xml_io.h +11 -0
  62. data/ext/nokogiri/xml_libxml2_hacks.c +112 -0
  63. data/ext/nokogiri/xml_libxml2_hacks.h +12 -0
  64. data/ext/nokogiri/xml_namespace.c +117 -0
  65. data/ext/nokogiri/xml_namespace.h +13 -0
  66. data/ext/nokogiri/xml_node.c +1285 -315
  67. data/ext/nokogiri/xml_node.h +4 -6
  68. data/ext/nokogiri/xml_node_set.c +415 -54
  69. data/ext/nokogiri/xml_node_set.h +6 -2
  70. data/ext/nokogiri/xml_processing_instruction.c +56 -0
  71. data/ext/nokogiri/xml_processing_instruction.h +9 -0
  72. data/ext/nokogiri/xml_reader.c +316 -77
  73. data/ext/nokogiri/xml_reader.h +1 -1
  74. data/ext/nokogiri/xml_relax_ng.c +161 -0
  75. data/ext/nokogiri/xml_relax_ng.h +9 -0
  76. data/ext/nokogiri/xml_sax_parser.c +215 -80
  77. data/ext/nokogiri/xml_sax_parser.h +30 -1
  78. data/ext/nokogiri/xml_sax_parser_context.c +262 -0
  79. data/ext/nokogiri/xml_sax_parser_context.h +10 -0
  80. data/ext/nokogiri/xml_sax_push_parser.c +115 -0
  81. data/ext/nokogiri/xml_sax_push_parser.h +9 -0
  82. data/ext/nokogiri/xml_schema.c +205 -0
  83. data/ext/nokogiri/xml_schema.h +9 -0
  84. data/ext/nokogiri/xml_syntax_error.c +45 -175
  85. data/ext/nokogiri/xml_syntax_error.h +4 -2
  86. data/ext/nokogiri/xml_text.c +37 -14
  87. data/ext/nokogiri/xml_text.h +1 -1
  88. data/ext/nokogiri/xml_xpath_context.c +230 -13
  89. data/ext/nokogiri/xml_xpath_context.h +2 -1
  90. data/ext/nokogiri/xslt_stylesheet.c +196 -34
  91. data/ext/nokogiri/xslt_stylesheet.h +6 -1
  92. data/lib/nokogiri/css/node.rb +18 -61
  93. data/lib/nokogiri/css/parser.rb +725 -17
  94. data/lib/nokogiri/css/parser.y +126 -63
  95. data/lib/nokogiri/css/parser_extras.rb +91 -0
  96. data/lib/nokogiri/css/syntax_error.rb +7 -0
  97. data/lib/nokogiri/css/tokenizer.rb +148 -5
  98. data/lib/nokogiri/css/tokenizer.rex +31 -39
  99. data/lib/nokogiri/css/xpath_visitor.rb +109 -51
  100. data/lib/nokogiri/css.rb +24 -3
  101. data/lib/nokogiri/decorators/slop.rb +42 -0
  102. data/lib/nokogiri/html/builder.rb +27 -1
  103. data/lib/nokogiri/html/document.rb +329 -3
  104. data/lib/nokogiri/html/document_fragment.rb +39 -0
  105. data/lib/nokogiri/html/element_description.rb +23 -0
  106. data/lib/nokogiri/html/element_description_defaults.rb +671 -0
  107. data/lib/nokogiri/html/entity_lookup.rb +13 -0
  108. data/lib/nokogiri/html/sax/parser.rb +35 -4
  109. data/lib/nokogiri/html/sax/parser_context.rb +16 -0
  110. data/lib/nokogiri/html/sax/push_parser.rb +36 -0
  111. data/lib/nokogiri/html.rb +18 -76
  112. data/lib/nokogiri/syntax_error.rb +4 -0
  113. data/lib/nokogiri/version.rb +106 -1
  114. data/lib/nokogiri/xml/attr.rb +14 -0
  115. data/lib/nokogiri/xml/attribute_decl.rb +18 -0
  116. data/lib/nokogiri/xml/builder.rb +395 -31
  117. data/lib/nokogiri/xml/cdata.rb +4 -2
  118. data/lib/nokogiri/xml/character_data.rb +7 -0
  119. data/lib/nokogiri/xml/document.rb +267 -12
  120. data/lib/nokogiri/xml/document_fragment.rb +149 -0
  121. data/lib/nokogiri/xml/dtd.rb +27 -1
  122. data/lib/nokogiri/xml/element_content.rb +36 -0
  123. data/lib/nokogiri/xml/element_decl.rb +13 -0
  124. data/lib/nokogiri/xml/entity_decl.rb +19 -0
  125. data/lib/nokogiri/xml/namespace.rb +13 -0
  126. data/lib/nokogiri/xml/node/save_options.rb +61 -0
  127. data/lib/nokogiri/xml/node.rb +748 -109
  128. data/lib/nokogiri/xml/node_set.rb +200 -72
  129. data/lib/nokogiri/xml/parse_options.rb +120 -0
  130. data/lib/nokogiri/xml/pp/character_data.rb +18 -0
  131. data/lib/nokogiri/xml/pp/node.rb +56 -0
  132. data/lib/nokogiri/xml/pp.rb +2 -0
  133. data/lib/nokogiri/xml/processing_instruction.rb +8 -0
  134. data/lib/nokogiri/xml/reader.rb +102 -4
  135. data/lib/nokogiri/xml/relax_ng.rb +32 -0
  136. data/lib/nokogiri/xml/sax/document.rb +114 -2
  137. data/lib/nokogiri/xml/sax/parser.rb +97 -7
  138. data/lib/nokogiri/xml/sax/parser_context.rb +16 -0
  139. data/lib/nokogiri/xml/sax/push_parser.rb +60 -0
  140. data/lib/nokogiri/xml/sax.rb +2 -7
  141. data/lib/nokogiri/xml/schema.rb +63 -0
  142. data/lib/nokogiri/xml/searchable.rb +221 -0
  143. data/lib/nokogiri/xml/syntax_error.rb +27 -1
  144. data/lib/nokogiri/xml/text.rb +4 -1
  145. data/lib/nokogiri/xml/xpath/syntax_error.rb +11 -0
  146. data/lib/nokogiri/xml/xpath.rb +4 -0
  147. data/lib/nokogiri/xml/xpath_context.rb +3 -1
  148. data/lib/nokogiri/xml.rb +45 -38
  149. data/lib/nokogiri/xslt/stylesheet.rb +19 -0
  150. data/lib/nokogiri/xslt.rb +47 -2
  151. data/lib/nokogiri.rb +117 -24
  152. data/lib/xsd/xmlparser/nokogiri.rb +102 -0
  153. data/patches/sort-patches-by-date +25 -0
  154. data/ports/archives/libxml2-2.9.4.tar.gz +0 -0
  155. data/ports/archives/libxslt-1.1.29.tar.gz +0 -0
  156. data/suppressions/README.txt +1 -0
  157. data/suppressions/nokogiri_ree-1.8.7.358.supp +61 -0
  158. data/suppressions/nokogiri_ruby-1.8.7.370.supp +0 -0
  159. data/suppressions/nokogiri_ruby-1.9.2.320.supp +28 -0
  160. data/suppressions/nokogiri_ruby-1.9.3.327.supp +28 -0
  161. data/tasks/test.rb +100 -0
  162. data/test/css/test_nthiness.rb +73 -6
  163. data/test/css/test_parser.rb +184 -39
  164. data/test/css/test_tokenizer.rb +72 -19
  165. data/test/css/test_xpath_visitor.rb +44 -2
  166. data/test/decorators/test_slop.rb +20 -0
  167. data/test/files/2ch.html +108 -0
  168. data/test/files/GH_1042.html +18 -0
  169. data/test/files/address_book.rlx +12 -0
  170. data/test/files/address_book.xml +10 -0
  171. data/test/files/atom.xml +344 -0
  172. data/test/files/bar/bar.xsd +4 -0
  173. data/test/files/bogus.xml +0 -0
  174. data/test/files/dont_hurt_em_why.xml +422 -0
  175. data/test/files/encoding.html +82 -0
  176. data/test/files/encoding.xhtml +84 -0
  177. data/test/files/exslt.xml +8 -0
  178. data/test/files/exslt.xslt +35 -0
  179. data/test/files/foo/foo.xsd +4 -0
  180. data/test/files/metacharset.html +10 -0
  181. data/test/files/namespace_pressure_test.xml +1684 -0
  182. data/test/files/noencoding.html +47 -0
  183. data/test/files/po.xml +32 -0
  184. data/test/files/po.xsd +66 -0
  185. data/test/files/saml/saml20assertion_schema.xsd +283 -0
  186. data/test/files/saml/saml20protocol_schema.xsd +302 -0
  187. data/test/files/saml/xenc_schema.xsd +146 -0
  188. data/test/files/saml/xmldsig_schema.xsd +318 -0
  189. data/test/files/shift_jis.html +10 -0
  190. data/test/files/shift_jis.xml +5 -0
  191. data/test/files/shift_jis_no_charset.html +9 -0
  192. data/test/files/slow-xpath.xml +25509 -0
  193. data/test/files/snuggles.xml +3 -0
  194. data/test/files/staff.dtd +10 -0
  195. data/test/files/test_document_url/bar.xml +2 -0
  196. data/test/files/test_document_url/document.dtd +4 -0
  197. data/test/files/test_document_url/document.xml +6 -0
  198. data/test/files/tlm.html +2 -1
  199. data/test/files/to_be_xincluded.xml +2 -0
  200. data/test/files/valid_bar.xml +2 -0
  201. data/test/files/xinclude.xml +4 -0
  202. data/test/helper.rb +124 -13
  203. data/test/html/sax/test_parser.rb +118 -4
  204. data/test/html/sax/test_parser_context.rb +46 -0
  205. data/test/html/sax/test_push_parser.rb +87 -0
  206. data/test/html/test_builder.rb +94 -8
  207. data/test/html/test_document.rb +626 -11
  208. data/test/html/test_document_encoding.rb +145 -0
  209. data/test/html/test_document_fragment.rb +301 -0
  210. data/test/html/test_element_description.rb +105 -0
  211. data/test/html/test_named_characters.rb +14 -0
  212. data/test/html/test_node.rb +212 -0
  213. data/test/html/test_node_encoding.rb +85 -0
  214. data/test/namespaces/test_additional_namespaces_in_builder_doc.rb +14 -0
  215. data/test/namespaces/test_namespaces_aliased_default.rb +24 -0
  216. data/test/namespaces/test_namespaces_in_builder_doc.rb +75 -0
  217. data/test/namespaces/test_namespaces_in_cloned_doc.rb +31 -0
  218. data/test/namespaces/test_namespaces_in_created_doc.rb +75 -0
  219. data/test/namespaces/test_namespaces_in_parsed_doc.rb +80 -0
  220. data/test/namespaces/test_namespaces_preservation.rb +31 -0
  221. data/test/test_convert_xpath.rb +2 -47
  222. data/test/test_css_cache.rb +45 -0
  223. data/test/test_encoding_handler.rb +48 -0
  224. data/test/test_memory_leak.rb +156 -0
  225. data/test/test_nokogiri.rb +103 -1
  226. data/test/test_soap4r_sax.rb +52 -0
  227. data/test/test_xslt_transforms.rb +293 -8
  228. data/test/xml/node/test_save_options.rb +28 -0
  229. data/test/xml/node/test_subclass.rb +44 -0
  230. data/test/xml/sax/test_parser.rb +309 -8
  231. data/test/xml/sax/test_parser_context.rb +115 -0
  232. data/test/xml/sax/test_push_parser.rb +157 -0
  233. data/test/xml/test_attr.rb +67 -0
  234. data/test/xml/test_attribute_decl.rb +86 -0
  235. data/test/xml/test_builder.rb +327 -2
  236. data/test/xml/test_c14n.rb +180 -0
  237. data/test/xml/test_cdata.rb +32 -2
  238. data/test/xml/test_comment.rb +40 -0
  239. data/test/xml/test_document.rb +846 -35
  240. data/test/xml/test_document_encoding.rb +31 -0
  241. data/test/xml/test_document_fragment.rb +271 -0
  242. data/test/xml/test_dtd.rb +153 -9
  243. data/test/xml/test_dtd_encoding.rb +31 -0
  244. data/test/xml/test_element_content.rb +56 -0
  245. data/test/xml/test_element_decl.rb +73 -0
  246. data/test/xml/test_entity_decl.rb +122 -0
  247. data/test/xml/test_entity_reference.rb +251 -0
  248. data/test/xml/test_namespace.rb +96 -0
  249. data/test/xml/test_node.rb +1126 -105
  250. data/test/xml/test_node_attributes.rb +115 -0
  251. data/test/xml/test_node_encoding.rb +69 -0
  252. data/test/xml/test_node_inheritance.rb +32 -0
  253. data/test/xml/test_node_reparenting.rb +549 -0
  254. data/test/xml/test_node_set.rb +668 -9
  255. data/test/xml/test_parse_options.rb +64 -0
  256. data/test/xml/test_processing_instruction.rb +30 -0
  257. data/test/xml/test_reader.rb +589 -0
  258. data/test/xml/test_reader_encoding.rb +134 -0
  259. data/test/xml/test_relax_ng.rb +60 -0
  260. data/test/xml/test_schema.rb +142 -0
  261. data/test/xml/test_syntax_error.rb +30 -0
  262. data/test/xml/test_text.rb +49 -2
  263. data/test/xml/test_unparented_node.rb +440 -0
  264. data/test/xml/test_xinclude.rb +83 -0
  265. data/test/xml/test_xpath.rb +445 -0
  266. data/test/xslt/test_custom_functions.rb +133 -0
  267. data/test/xslt/test_exception_handling.rb +37 -0
  268. data/test_all +107 -0
  269. metadata +459 -115
  270. data/History.txt +0 -6
  271. data/README.ja.txt +0 -86
  272. data/README.txt +0 -87
  273. data/ext/nokogiri/html_sax_parser.c +0 -32
  274. data/ext/nokogiri/html_sax_parser.h +0 -11
  275. data/ext/nokogiri/native.c +0 -40
  276. data/ext/nokogiri/native.h +0 -51
  277. data/ext/nokogiri/xml_xpath.c +0 -46
  278. data/ext/nokogiri/xml_xpath.h +0 -11
  279. data/lib/nokogiri/css/generated_parser.rb +0 -653
  280. data/lib/nokogiri/css/generated_tokenizer.rb +0 -159
  281. data/lib/nokogiri/decorators/hpricot/node.rb +0 -58
  282. data/lib/nokogiri/decorators/hpricot/node_set.rb +0 -14
  283. data/lib/nokogiri/decorators/hpricot/xpath_visitor.rb +0 -17
  284. data/lib/nokogiri/decorators/hpricot.rb +0 -3
  285. data/lib/nokogiri/decorators.rb +0 -1
  286. data/lib/nokogiri/hpricot.rb +0 -47
  287. data/lib/nokogiri/xml/after_handler.rb +0 -18
  288. data/lib/nokogiri/xml/before_handler.rb +0 -32
  289. data/lib/nokogiri/xml/element.rb +0 -6
  290. data/lib/nokogiri/xml/entity_declaration.rb +0 -9
  291. data/nokogiri.gemspec +0 -34
  292. data/test/hpricot/files/basic.xhtml +0 -17
  293. data/test/hpricot/files/boingboing.html +0 -2266
  294. data/test/hpricot/files/cy0.html +0 -3653
  295. data/test/hpricot/files/immob.html +0 -400
  296. data/test/hpricot/files/pace_application.html +0 -1320
  297. data/test/hpricot/files/tenderlove.html +0 -16
  298. data/test/hpricot/files/uswebgen.html +0 -220
  299. data/test/hpricot/files/utf8.html +0 -1054
  300. data/test/hpricot/files/week9.html +0 -1723
  301. data/test/hpricot/files/why.xml +0 -19
  302. data/test/hpricot/load_files.rb +0 -7
  303. data/test/hpricot/test_alter.rb +0 -67
  304. data/test/hpricot/test_builder.rb +0 -27
  305. data/test/hpricot/test_parser.rb +0 -423
  306. data/test/hpricot/test_paths.rb +0 -15
  307. data/test/hpricot/test_preserved.rb +0 -78
  308. data/test/hpricot/test_xml.rb +0 -30
  309. data/test/test_reader.rb +0 -222
@@ -1,24 +1,37 @@
1
1
  module Nokogiri
2
2
  module XML
3
+ ####
4
+ # A NodeSet contains a list of Nokogiri::XML::Node objects. Typically
5
+ # a NodeSet is return as a result of searching a Document via
6
+ # Nokogiri::XML::Searchable#css or Nokogiri::XML::Searchable#xpath
3
7
  class NodeSet
8
+ include Nokogiri::XML::Searchable
4
9
  include Enumerable
5
10
 
11
+ # The Document this NodeSet is associated with
6
12
  attr_accessor :document
7
13
 
8
- def initialize
14
+ # Create a NodeSet with +document+ defaulting to +list+
15
+ def initialize document, list = []
16
+ @document = document
17
+ document.decorate(self)
18
+ list.each { |x| self << x }
9
19
  yield self if block_given?
10
20
  end
11
21
 
12
22
  ###
13
23
  # Get the first element of the NodeSet.
14
- def first
15
- self[0]
24
+ def first n = nil
25
+ return self[0] unless n
26
+ list = []
27
+ n.times { |i| list << self[i] }
28
+ list
16
29
  end
17
30
 
18
31
  ###
19
32
  # Get the last element of the NodeSet.
20
33
  def last
21
- self[length - 1]
34
+ self[-1]
22
35
  end
23
36
 
24
37
  ###
@@ -27,6 +40,13 @@ module Nokogiri
27
40
  length == 0
28
41
  end
29
42
 
43
+ ###
44
+ # Returns the index of the first node in self that is == to +node+. Returns nil if no match is found.
45
+ def index(node)
46
+ each_with_index { |member, j| return j if member == node }
47
+ nil
48
+ end
49
+
30
50
  ###
31
51
  # Insert +datum+ before the first Node in this NodeSet
32
52
  def before datum
@@ -39,68 +59,98 @@ module Nokogiri
39
59
  last.after datum
40
60
  end
41
61
 
62
+ alias :<< :push
63
+ alias :remove :unlink
64
+
42
65
  ###
43
- # Append +node+ to the NodeSet.
44
- def << node
45
- push(node)
66
+ # call-seq: css *rules, [namespace-bindings, custom-pseudo-class]
67
+ #
68
+ # Search this node set for CSS +rules+. +rules+ must be one or more CSS
69
+ # selectors. For example:
70
+ #
71
+ # For more information see Nokogiri::XML::Searchable#css
72
+ def css *args
73
+ rules, handler, ns, _ = extract_params(args)
74
+
75
+ inject(NodeSet.new(document)) do |set, node|
76
+ set += css_internal node, rules, handler, ns
77
+ end
46
78
  end
47
79
 
48
80
  ###
49
- # Unlink this NodeSet and all Node objects it contains from their
50
- # current context.
51
- def unlink
52
- each { |node| node.unlink }
53
- self.document = nil
54
- self
81
+ # call-seq: xpath *paths, [namespace-bindings, variable-bindings, custom-handler-class]
82
+ #
83
+ # Search this node set for XPath +paths+. +paths+ must be one or more XPath
84
+ # queries.
85
+ #
86
+ # For more information see Nokogiri::XML::Searchable#xpath
87
+ def xpath *args
88
+ paths, handler, ns, binds = extract_params(args)
89
+
90
+ inject(NodeSet.new(document)) do |set, node|
91
+ set += node.xpath(*(paths + [ns, handler, binds].compact))
92
+ end
55
93
  end
56
- alias :remove :unlink
57
94
 
58
95
  ###
59
- # Search this document for +paths+
60
- def search *paths
61
- sub_set = NodeSet.new
62
- document.decorate(sub_set)
63
- each do |node|
64
- node.search(*paths).each do |sub_node|
65
- sub_set << sub_node
66
- end
96
+ # Search this NodeSet's nodes' immediate children using CSS selector +selector+
97
+ def > selector
98
+ ns = document.root.namespaces
99
+ xpath CSS.xpath_for(selector, :prefix => "./", :ns => ns).first
100
+ end
101
+
102
+ ###
103
+ # call-seq: search *paths, [namespace-bindings, xpath-variable-bindings, custom-handler-class]
104
+ #
105
+ # Search this object for +paths+, and return only the first
106
+ # result. +paths+ must be one or more XPath or CSS queries.
107
+ #
108
+ # See Searchable#search for more information.
109
+ #
110
+ # Or, if passed an integer, index into the NodeSet:
111
+ #
112
+ # node_set.at(3) # same as node_set[3]
113
+ #
114
+ def at *args
115
+ if args.length == 1 && args.first.is_a?(Numeric)
116
+ return self[args.first]
67
117
  end
68
- sub_set.document = document
69
- sub_set
118
+
119
+ super(*args)
70
120
  end
71
- alias :/ :search
72
- alias :xpath :search
73
- alias :css :search
121
+ alias :% :at
74
122
 
75
123
  ###
76
- # If path is a string, search this document for +path+ returning the
77
- # first Node. Otherwise, index in to the array with +path+.
78
- def at path, ns = {}
79
- return self[path] if path.is_a?(Numeric)
80
- search(path, ns).first
124
+ # Filter this list for nodes that match +expr+
125
+ def filter expr
126
+ find_all { |node| node.matches?(expr) }
81
127
  end
82
128
 
83
129
  ###
84
130
  # Append the class attribute +name+ to all Node objects in the NodeSet.
85
131
  def add_class name
86
132
  each do |el|
87
- next unless el.respond_to? :get_attribute
88
- classes = el.get_attribute('class').to_s.split(" ")
89
- el.set_attribute('class', classes.push(name).uniq.join(" "))
133
+ classes = el['class'].to_s.split(/\s+/)
134
+ el['class'] = classes.push(name).uniq.join " "
90
135
  end
91
136
  self
92
137
  end
93
138
 
94
139
  ###
95
140
  # Remove the class attribute +name+ from all Node objects in the NodeSet.
141
+ # If +name+ is nil, remove the class attribute from all Nodes in the
142
+ # NodeSet.
96
143
  def remove_class name = nil
97
144
  each do |el|
98
- next unless el.respond_to? :get_attribute
99
145
  if name
100
- classes = el.get_attribute('class').to_s.split(" ")
101
- el.set_attribute('class', (classes - [name]).uniq.join(" "))
146
+ classes = el['class'].to_s.split(/\s+/)
147
+ if classes.empty?
148
+ el.delete 'class'
149
+ else
150
+ el['class'] = (classes - [name]).uniq.join " "
151
+ end
102
152
  else
103
- el.remove_attribute("class")
153
+ el.delete "class"
104
154
  end
105
155
  end
106
156
  self
@@ -110,73 +160,151 @@ module Nokogiri
110
160
  # Set the attribute +key+ to +value+ or the return value of +blk+
111
161
  # on all Node objects in the NodeSet.
112
162
  def attr key, value = nil, &blk
113
- if value or blk
114
- each do |el|
115
- el.set_attribute(key, value || blk[el])
116
- end
117
- return self
118
- end
119
- if key.is_a? Hash
120
- key.each { |k,v| self.attr(k,v) }
121
- return self
122
- else
123
- return self[0].get_attribute(key)
163
+ unless Hash === key || key && (value || blk)
164
+ return first.attribute(key)
124
165
  end
166
+
167
+ hash = key.is_a?(Hash) ? key : { key => value }
168
+
169
+ hash.each { |k,v| each { |el| el[k] = v || blk[el] } }
170
+
171
+ self
125
172
  end
126
- alias_method :set, :attr
173
+ alias :set :attr
174
+ alias :attribute :attr
127
175
 
128
176
  ###
129
177
  # Remove the attributed named +name+ from all Node objects in the NodeSet
130
178
  def remove_attr name
131
- each do |el|
132
- next unless el.respond_to? :remove_attribute
133
- el.remove_attribute(name)
134
- end
135
- self
179
+ each { |el| el.delete name }
180
+ self
136
181
  end
137
182
 
138
183
  ###
139
184
  # Iterate over each node, yielding to +block+
140
185
  def each(&block)
141
- x = 0
142
- while x < length
186
+ 0.upto(length - 1) do |x|
143
187
  yield self[x]
144
- x += 1
145
188
  end
146
189
  end
147
190
 
148
191
  ###
149
192
  # Get the inner text of all contained Node objects
150
193
  def inner_text
151
- collect{|j| j.inner_text}.join('')
194
+ collect(&:inner_text).join('')
152
195
  end
153
196
  alias :text :inner_text
154
197
 
198
+ ###
199
+ # Get the inner html of all contained Node objects
200
+ def inner_html *args
201
+ collect{|j| j.inner_html(*args) }.join('')
202
+ end
203
+
155
204
  ###
156
205
  # Wrap this NodeSet with +html+ or the results of the builder in +blk+
157
206
  def wrap(html, &blk)
158
207
  each do |j|
159
- new_parent = Nokogiri.make(html, &blk)
160
- j.replace(new_parent)
161
- nest = new_parent
162
- if nest.child
163
- nest = nest.child until nest.child.nil?
164
- end
165
- j.parent = nest
208
+ new_parent = document.parse(html).first
209
+ j.add_next_sibling(new_parent)
210
+ new_parent.add_child(j)
166
211
  end
167
212
  self
168
213
  end
169
214
 
215
+ ###
216
+ # Convert this NodeSet to a string.
170
217
  def to_s
171
- map { |x| x.to_s }.join
218
+ map(&:to_s).join
172
219
  end
173
220
 
174
- def to_html
175
- map { |x| x.to_html }.join('')
221
+ ###
222
+ # Convert this NodeSet to HTML
223
+ def to_html *args
224
+ if Nokogiri.jruby?
225
+ options = args.first.is_a?(Hash) ? args.shift : {}
226
+ if !options[:save_with]
227
+ options[:save_with] = Node::SaveOptions::NO_DECLARATION | Node::SaveOptions::NO_EMPTY_TAGS | Node::SaveOptions::AS_HTML
228
+ end
229
+ args.insert(0, options)
230
+ end
231
+ map { |x| x.to_html(*args) }.join
232
+ end
233
+
234
+ ###
235
+ # Convert this NodeSet to XHTML
236
+ def to_xhtml *args
237
+ map { |x| x.to_xhtml(*args) }.join
238
+ end
239
+
240
+ ###
241
+ # Convert this NodeSet to XML
242
+ def to_xml *args
243
+ map { |x| x.to_xml(*args) }.join
244
+ end
245
+
246
+ alias :size :length
247
+ alias :to_ary :to_a
248
+
249
+ ###
250
+ # Removes the last element from set and returns it, or +nil+ if
251
+ # the set is empty
252
+ def pop
253
+ return nil if length == 0
254
+ delete last
255
+ end
256
+
257
+ ###
258
+ # Returns the first element of the NodeSet and removes it. Returns
259
+ # +nil+ if the set is empty.
260
+ def shift
261
+ return nil if length == 0
262
+ delete first
263
+ end
264
+
265
+ ###
266
+ # Equality -- Two NodeSets are equal if the contain the same number
267
+ # of elements and if each element is equal to the corresponding
268
+ # element in the other NodeSet
269
+ def == other
270
+ return false unless other.is_a?(Nokogiri::XML::NodeSet)
271
+ return false unless length == other.length
272
+ each_with_index do |node, i|
273
+ return false unless node == other[i]
274
+ end
275
+ true
276
+ end
277
+
278
+ ###
279
+ # Returns a new NodeSet containing all the children of all the nodes in
280
+ # the NodeSet
281
+ def children
282
+ inject(NodeSet.new(document)) { |set, node| set += node.children }
176
283
  end
177
284
 
178
- def size
179
- length
285
+ ###
286
+ # Returns a new NodeSet containing all the nodes in the NodeSet
287
+ # in reverse order
288
+ def reverse
289
+ node_set = NodeSet.new(document)
290
+ (length - 1).downto(0) do |x|
291
+ node_set.push self[x]
292
+ end
293
+ node_set
294
+ end
295
+
296
+ ###
297
+ # Return a nicely formated string representation
298
+ def inspect
299
+ "[#{map(&:inspect).join ', '}]"
300
+ end
301
+
302
+ alias :+ :|
303
+
304
+ private
305
+
306
+ def implied_xpath_contexts # :nodoc:
307
+ [".//", "self::"]
180
308
  end
181
309
  end
182
310
  end
@@ -0,0 +1,120 @@
1
+ module Nokogiri
2
+ module XML
3
+ ###
4
+ # Parse options for passing to Nokogiri.XML or Nokogiri.HTML
5
+ #
6
+ # == Building combinations of parse options
7
+ # You can build your own combinations of these parse options by using any of the following methods:
8
+ # *Note*: All examples attempt to set the +RECOVER+ & +NOENT+ options. All examples use Ruby 2 optional parameter syntax.
9
+ # [Ruby's bitwise operators] You can use the Ruby bitwise operators to set various combinations.
10
+ # <code>Nokogiri.XML('<content>Chapter 1</content', options: Nokogiri::XML::ParseOptions.new((1 << 0) | (1 << 1)))</code>
11
+ # [Method chaining] Every option has an equivalent method in lowercase. You can chain these methods together to set various combinations.
12
+ # <code>Nokogiri.XML('<content>Chapter 1</content', options: Nokogiri::XML::ParseOptions.new.recover.noent)</code>
13
+ # [Using Ruby Blocks] You can also setup parse combinations in the block passed to Nokogiri.XML or Nokogiri.HTML
14
+ # <code>Nokogiri.XML('<content>Chapter 1</content') {|config| config.recover.noent}</code>
15
+ #
16
+ # == Removing particular parse options
17
+ # You can also remove options from an instance of +ParseOptions+ dynamically.
18
+ # Every option has an equivalent <code>no{option}</code> method in lowercase. You can call these methods on an instance of +ParseOptions+ to remove the option.
19
+ # Note that this is not available for +STRICT+.
20
+ #
21
+ # # Setting the RECOVER & NOENT options...
22
+ # options = Nokogiri::XML::ParseOptions.new.recover.noent
23
+ # # later...
24
+ # options.norecover # Removes the Nokogiri::XML::ParseOptions::RECOVER option
25
+ # options.nonoent # Removes the Nokogiri::XML::ParseOptions::NOENT option
26
+ #
27
+ class ParseOptions
28
+ # Strict parsing
29
+ STRICT = 0
30
+ # Recover from errors
31
+ RECOVER = 1 << 0
32
+ # Substitute entities
33
+ NOENT = 1 << 1
34
+ # Load external subsets
35
+ DTDLOAD = 1 << 2
36
+ # Default DTD attributes
37
+ DTDATTR = 1 << 3
38
+ # validate with the DTD
39
+ DTDVALID = 1 << 4
40
+ # suppress error reports
41
+ NOERROR = 1 << 5
42
+ # suppress warning reports
43
+ NOWARNING = 1 << 6
44
+ # pedantic error reporting
45
+ PEDANTIC = 1 << 7
46
+ # remove blank nodes
47
+ NOBLANKS = 1 << 8
48
+ # use the SAX1 interface internally
49
+ SAX1 = 1 << 9
50
+ # Implement XInclude substitution
51
+ XINCLUDE = 1 << 10
52
+ # Forbid network access. Recommended for dealing with untrusted documents.
53
+ NONET = 1 << 11
54
+ # Do not reuse the context dictionary
55
+ NODICT = 1 << 12
56
+ # remove redundant namespaces declarations
57
+ NSCLEAN = 1 << 13
58
+ # merge CDATA as text nodes
59
+ NOCDATA = 1 << 14
60
+ # do not generate XINCLUDE START/END nodes
61
+ NOXINCNODE = 1 << 15
62
+ # compact small text nodes; no modification of the tree allowed afterwards (will possibly crash if you try to modify the tree)
63
+ COMPACT = 1 << 16
64
+ # parse using XML-1.0 before update 5
65
+ OLD10 = 1 << 17
66
+ # do not fixup XINCLUDE xml:base uris
67
+ NOBASEFIX = 1 << 18
68
+ # relax any hardcoded limit from the parser
69
+ HUGE = 1 << 19
70
+
71
+ # the default options used for parsing XML documents
72
+ DEFAULT_XML = RECOVER | NONET
73
+ # the default options used for parsing HTML documents
74
+ DEFAULT_HTML = RECOVER | NOERROR | NOWARNING | NONET
75
+
76
+ attr_accessor :options
77
+ def initialize options = STRICT
78
+ @options = options
79
+ end
80
+
81
+ constants.each do |constant|
82
+ next if constant.to_sym == :STRICT
83
+ class_eval %{
84
+ def #{constant.downcase}
85
+ @options |= #{constant}
86
+ self
87
+ end
88
+
89
+ def no#{constant.downcase}
90
+ @options &= ~#{constant}
91
+ self
92
+ end
93
+
94
+ def #{constant.downcase}?
95
+ #{constant} & @options == #{constant}
96
+ end
97
+ }
98
+ end
99
+
100
+ def strict
101
+ @options &= ~RECOVER
102
+ self
103
+ end
104
+
105
+ def strict?
106
+ @options & RECOVER == STRICT
107
+ end
108
+
109
+ alias :to_i :options
110
+
111
+ def inspect
112
+ options = []
113
+ self.class.constants.each do |k|
114
+ options << k.downcase if send(:"#{k.downcase}?")
115
+ end
116
+ super.sub(/>$/, " " + options.join(', ') + ">")
117
+ end
118
+ end
119
+ end
120
+ end
@@ -0,0 +1,18 @@
1
+ module Nokogiri
2
+ module XML
3
+ module PP
4
+ module CharacterData
5
+ def pretty_print pp # :nodoc:
6
+ nice_name = self.class.name.split('::').last
7
+ pp.group(2, "#(#{nice_name} ", ')') do
8
+ pp.pp text
9
+ end
10
+ end
11
+
12
+ def inspect # :nodoc:
13
+ "#<#{self.class.name}:#{sprintf("0x%x",object_id)} #{text.inspect}>"
14
+ end
15
+ end
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,56 @@
1
+ module Nokogiri
2
+ module XML
3
+ module PP
4
+ module Node
5
+ def inspect # :nodoc:
6
+ attributes = inspect_attributes.reject { |x|
7
+ begin
8
+ attribute = send x
9
+ !attribute || (attribute.respond_to?(:empty?) && attribute.empty?)
10
+ rescue NoMethodError
11
+ true
12
+ end
13
+ }.map { |attribute|
14
+ "#{attribute.to_s.sub(/_\w+/, 's')}=#{send(attribute).inspect}"
15
+ }.join ' '
16
+ "#<#{self.class.name}:#{sprintf("0x%x", object_id)} #{attributes}>"
17
+ end
18
+
19
+ def pretty_print pp # :nodoc:
20
+ nice_name = self.class.name.split('::').last
21
+ pp.group(2, "#(#{nice_name}:#{sprintf("0x%x", object_id)} {", '})') do
22
+
23
+ pp.breakable
24
+ attrs = inspect_attributes.map { |t|
25
+ [t, send(t)] if respond_to?(t)
26
+ }.compact.find_all { |x|
27
+ if x.last
28
+ if [:attribute_nodes, :children].include? x.first
29
+ !x.last.empty?
30
+ else
31
+ true
32
+ end
33
+ end
34
+ }
35
+
36
+ pp.seplist(attrs) do |v|
37
+ if [:attribute_nodes, :children].include? v.first
38
+ pp.group(2, "#{v.first.to_s.sub(/_\w+$/, 's')} = [", "]") do
39
+ pp.breakable
40
+ pp.seplist(v.last) do |item|
41
+ pp.pp item
42
+ end
43
+ end
44
+ else
45
+ pp.text "#{v.first} = "
46
+ pp.pp v.last
47
+ end
48
+ end
49
+ pp.breakable
50
+
51
+ end
52
+ end
53
+ end
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,2 @@
1
+ require 'nokogiri/xml/pp/node'
2
+ require 'nokogiri/xml/pp/character_data'
@@ -0,0 +1,8 @@
1
+ module Nokogiri
2
+ module XML
3
+ class ProcessingInstruction < Node
4
+ def initialize document, name, content
5
+ end
6
+ end
7
+ end
8
+ end
@@ -1,14 +1,112 @@
1
1
  module Nokogiri
2
2
  module XML
3
+ ###
4
+ # Nokogiri::XML::Reader parses an XML document similar to the way a cursor
5
+ # would move. The Reader is given an XML document, and yields nodes
6
+ # to an each block.
7
+ #
8
+ # Here is an example of usage:
9
+ #
10
+ # reader = Nokogiri::XML::Reader(<<-eoxml)
11
+ # <x xmlns:tenderlove='http://tenderlovemaking.com/'>
12
+ # <tenderlove:foo awesome='true'>snuggles!</tenderlove:foo>
13
+ # </x>
14
+ # eoxml
15
+ #
16
+ # reader.each do |node|
17
+ #
18
+ # # node is an instance of Nokogiri::XML::Reader
19
+ # puts node.name
20
+ #
21
+ # end
22
+ #
23
+ # Note that Nokogiri::XML::Reader#each can only be called once!! Once
24
+ # the cursor moves through the entire document, you must parse the
25
+ # document again. So make sure that you capture any information you
26
+ # need during the first iteration.
27
+ #
28
+ # The Reader parser is good for when you need the speed of a SAX parser,
29
+ # but do not want to write a Document handler.
3
30
  class Reader
4
31
  include Enumerable
5
32
 
6
- def each(&block)
7
- while node = self.read
8
- block.call(node)
9
- end
33
+ TYPE_NONE = 0
34
+ # Element node type
35
+ TYPE_ELEMENT = 1
36
+ # Attribute node type
37
+ TYPE_ATTRIBUTE = 2
38
+ # Text node type
39
+ TYPE_TEXT = 3
40
+ # CDATA node type
41
+ TYPE_CDATA = 4
42
+ # Entity Reference node type
43
+ TYPE_ENTITY_REFERENCE = 5
44
+ # Entity node type
45
+ TYPE_ENTITY = 6
46
+ # PI node type
47
+ TYPE_PROCESSING_INSTRUCTION = 7
48
+ # Comment node type
49
+ TYPE_COMMENT = 8
50
+ # Document node type
51
+ TYPE_DOCUMENT = 9
52
+ # Document Type node type
53
+ TYPE_DOCUMENT_TYPE = 10
54
+ # Document Fragment node type
55
+ TYPE_DOCUMENT_FRAGMENT = 11
56
+ # Notation node type
57
+ TYPE_NOTATION = 12
58
+ # Whitespace node type
59
+ TYPE_WHITESPACE = 13
60
+ # Significant Whitespace node type
61
+ TYPE_SIGNIFICANT_WHITESPACE = 14
62
+ # Element end node type
63
+ TYPE_END_ELEMENT = 15
64
+ # Entity end node type
65
+ TYPE_END_ENTITY = 16
66
+ # XML Declaration node type
67
+ TYPE_XML_DECLARATION = 17
68
+
69
+ # A list of errors encountered while parsing
70
+ attr_accessor :errors
71
+
72
+ # The encoding for the document
73
+ attr_reader :encoding
74
+
75
+ # The XML source
76
+ attr_reader :source
77
+
78
+ alias :self_closing? :empty_element?
79
+
80
+ def initialize source, url = nil, encoding = nil # :nodoc:
81
+ @source = source
82
+ @errors = []
83
+ @encoding = encoding
10
84
  end
11
85
  private :initialize
86
+
87
+ ###
88
+ # Get a list of attributes for the current node.
89
+ def attributes
90
+ Hash[attribute_nodes.map { |node|
91
+ [node.name, node.to_s]
92
+ }].merge(namespaces || {})
93
+ end
94
+
95
+ ###
96
+ # Get a list of attributes for the current node
97
+ def attribute_nodes
98
+ nodes = attr_nodes
99
+ nodes.each { |v| v.instance_variable_set(:@_r, self) }
100
+ nodes
101
+ end
102
+
103
+ ###
104
+ # Move the cursor through the document yielding the cursor to the block
105
+ def each
106
+ while cursor = self.read
107
+ yield cursor
108
+ end
109
+ end
12
110
  end
13
111
  end
14
112
  end