nokogiri 1.6.5 → 1.6.6.1

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (45) hide show
  1. checksums.yaml +4 -4
  2. data/.cross_rubies +5 -0
  3. data/.travis.yml +10 -20
  4. data/CHANGELOG.ja.rdoc +28 -1
  5. data/CHANGELOG.rdoc +28 -1
  6. data/Gemfile +1 -1
  7. data/Manifest.txt +5 -1
  8. data/README.ja.rdoc +10 -9
  9. data/README.rdoc +6 -9
  10. data/ROADMAP.md +15 -3
  11. data/Rakefile +1 -3
  12. data/bin/nokogiri +48 -8
  13. data/ext/nokogiri/extconf.rb +18 -3
  14. data/ext/nokogiri/xml_comment.c +17 -2
  15. data/ext/nokogiri/xml_node.c +66 -6
  16. data/ext/nokogiri/xml_syntax_error.c +4 -0
  17. data/ext/nokogiri/xml_syntax_error.h +1 -0
  18. data/lib/nokogiri.rb +2 -2
  19. data/lib/nokogiri/decorators/slop.rb +7 -8
  20. data/lib/nokogiri/html/document_fragment.rb +0 -2
  21. data/lib/nokogiri/html/sax/push_parser.rb +22 -2
  22. data/lib/nokogiri/version.rb +1 -1
  23. data/lib/nokogiri/xml.rb +1 -0
  24. data/lib/nokogiri/xml/document.rb +4 -4
  25. data/lib/nokogiri/xml/document_fragment.rb +39 -2
  26. data/lib/nokogiri/xml/node.rb +11 -181
  27. data/lib/nokogiri/xml/node_set.rb +41 -85
  28. data/lib/nokogiri/xml/searchable.rb +221 -0
  29. data/ports/patches/sort-patches-by-date +25 -0
  30. data/test/css/test_nthiness.rb +1 -1
  31. data/test/html/sax/test_push_parser.rb +87 -0
  32. data/test/html/test_document.rb +20 -5
  33. data/test/html/test_document_fragment.rb +25 -0
  34. data/test/xml/test_attr.rb +5 -2
  35. data/test/xml/test_builder.rb +27 -1
  36. data/test/xml/test_comment.rb +11 -0
  37. data/test/xml/test_document.rb +34 -0
  38. data/test/xml/test_document_fragment.rb +40 -9
  39. data/test/xml/test_namespace.rb +1 -0
  40. data/test/xml/test_node.rb +37 -1
  41. data/test/xml/test_node_set.rb +56 -36
  42. data/test/xml/test_xpath.rb +65 -19
  43. data/test_all +11 -1
  44. metadata +11 -7
  45. data/tasks/nokogiri.org.rb +0 -24
@@ -3,8 +3,9 @@ module Nokogiri
3
3
  ####
4
4
  # A NodeSet contains a list of Nokogiri::XML::Node objects. Typically
5
5
  # a NodeSet is return as a result of searching a Document via
6
- # Nokogiri::XML::Node#css or Nokogiri::XML::Node#xpath
6
+ # Nokogiri::XML::Searchable#css or Nokogiri::XML::Searchable#xpath
7
7
  class NodeSet
8
+ include Nokogiri::XML::Searchable
8
9
  include Enumerable
9
10
 
10
11
  # The Document this NodeSet is associated with
@@ -62,78 +63,33 @@ module Nokogiri
62
63
  alias :remove :unlink
63
64
 
64
65
  ###
65
- # Search this document for +paths+
66
+ # call-seq: css *rules, [namespace-bindings, custom-pseudo-class]
66
67
  #
67
- # For more information see Nokogiri::XML::Node#css and
68
- # Nokogiri::XML::Node#xpath
69
- def search *paths
70
- handler = ![
71
- Hash, String, Symbol
72
- ].include?(paths.last.class) ? paths.pop : nil
73
-
74
- ns = paths.last.is_a?(Hash) ? paths.pop : nil
75
-
76
- sub_set = NodeSet.new(document)
77
-
78
- paths.each do |path|
79
- sub_set += send(
80
- path =~ /^(\.\/|\/|\.\.|\.$)/ ? :xpath : :css,
81
- *(paths + [ns, handler]).compact
82
- )
83
- end
84
-
85
- document.decorate(sub_set)
86
- sub_set
87
- end
88
- alias :/ :search
89
-
90
- ###
91
- # Search this NodeSet for css +paths+
68
+ # Search this node set for CSS +rules+. +rules+ must be one or more CSS
69
+ # selectors. For example:
92
70
  #
93
- # For more information see Nokogiri::XML::Node#css
94
- def css *paths
95
- handler = ![
96
- Hash, String, Symbol
97
- ].include?(paths.last.class) ? paths.pop : nil
98
-
99
- ns = paths.last.is_a?(Hash) ? paths.pop : nil
100
-
101
- sub_set = NodeSet.new(document)
71
+ # For more information see Nokogiri::XML::Searchable#css
72
+ def css *args
73
+ rules, handler, ns, _ = extract_params(args)
102
74
 
103
- each do |node|
104
- doc = node.document
105
- search_ns = ns || (doc.root ? doc.root.namespaces : {})
106
-
107
- xpaths = paths.map { |rule|
108
- [
109
- CSS.xpath_for(rule.to_s, :prefix => ".//", :ns => search_ns),
110
- CSS.xpath_for(rule.to_s, :prefix => "self::", :ns => search_ns)
111
- ].join(' | ')
112
- }
113
-
114
- sub_set += node.xpath(*(xpaths + [search_ns, handler].compact))
75
+ inject(NodeSet.new(document)) do |set, node|
76
+ set += css_internal node, rules, handler, ns
115
77
  end
116
- document.decorate(sub_set)
117
- sub_set
118
78
  end
119
79
 
120
80
  ###
121
- # Search this NodeSet for XPath +paths+
81
+ # call-seq: xpath *paths, [namespace-bindings, variable-bindings, custom-handler-class]
122
82
  #
123
- # For more information see Nokogiri::XML::Node#xpath
124
- def xpath *paths
125
- handler = ![
126
- Hash, String, Symbol
127
- ].include?(paths.last.class) ? paths.pop : nil
128
-
129
- ns = paths.last.is_a?(Hash) ? paths.pop : nil
83
+ # Search this node set for XPath +paths+. +paths+ must be one or more XPath
84
+ # queries.
85
+ #
86
+ # For more information see Nokogiri::XML::Searchable#xpath
87
+ def xpath *args
88
+ paths, handler, ns, binds = extract_params(args)
130
89
 
131
- sub_set = NodeSet.new(document)
132
- each do |node|
133
- sub_set += node.xpath(*(paths + [ns, handler].compact))
90
+ inject(NodeSet.new(document)) do |set, node|
91
+ set += node.xpath(*(paths + [ns, handler, binds].compact))
134
92
  end
135
- document.decorate(sub_set)
136
- sub_set
137
93
  end
138
94
 
139
95
  ###
@@ -144,31 +100,25 @@ module Nokogiri
144
100
  end
145
101
 
146
102
  ###
147
- # If path is a string, search this document for +path+ returning the
148
- # first Node. Otherwise, index in to the array with +path+.
149
- def at path, ns = document.root ? document.root.namespaces : {}
150
- return self[path] if path.is_a?(Numeric)
151
- search(path, ns).first
152
- end
153
- alias :% :at
154
-
155
- ##
156
- # Search this NodeSet for the first occurrence of XPath +paths+.
157
- # Equivalent to <tt>xpath(paths).first</tt>
158
- # See NodeSet#xpath for more information.
103
+ # call-seq: search *paths, [namespace-bindings, xpath-variable-bindings, custom-handler-class]
159
104
  #
160
- def at_xpath *paths
161
- xpath(*paths).first
162
- end
163
-
164
- ##
165
- # Search this NodeSet for the first occurrence of CSS +rules+.
166
- # Equivalent to <tt>css(rules).first</tt>
167
- # See NodeSet#css for more information.
105
+ # Search this object for +paths+, and return only the first
106
+ # result. +paths+ must be one or more XPath or CSS queries.
107
+ #
108
+ # See Searchable#search for more information.
109
+ #
110
+ # Or, if passed an integer, index into the NodeSet:
111
+ #
112
+ # node_set.at(3) # same as node_set[3]
168
113
  #
169
- def at_css *rules
170
- css(*rules).first
114
+ def at *args
115
+ if args.length == 1 && args.first.is_a?(Numeric)
116
+ return self[args.first]
117
+ end
118
+
119
+ super(*args)
171
120
  end
121
+ alias :% :at
172
122
 
173
123
  ###
174
124
  # Filter this list for nodes that match +expr+
@@ -350,6 +300,12 @@ module Nokogiri
350
300
  end
351
301
 
352
302
  alias :+ :|
303
+
304
+ private
305
+
306
+ def implied_xpath_contexts # :nodoc:
307
+ [".//", "self::"]
308
+ end
353
309
  end
354
310
  end
355
311
  end
@@ -0,0 +1,221 @@
1
+ module Nokogiri
2
+ module XML
3
+ #
4
+ # The Searchable module declares the interface used for searching your DOM.
5
+ #
6
+ # It implements the public methods `search`, `css`, and `xpath`,
7
+ # as well as allowing specific implementations to specialize some
8
+ # of the important behaviors.
9
+ #
10
+ module Searchable
11
+ # Regular expression used by Searchable#search to determine if a query
12
+ # string is CSS or XPath
13
+ LOOKS_LIKE_XPATH = /^(\.\/|\/|\.\.|\.$)/
14
+
15
+ ###
16
+ # call-seq: search *paths, [namespace-bindings, xpath-variable-bindings, custom-handler-class]
17
+ #
18
+ # Search this object for +paths+. +paths+ must be one or more XPath or CSS queries:
19
+ #
20
+ # node.search("div.employee", ".//title")
21
+ #
22
+ # A hash of namespace bindings may be appended:
23
+ #
24
+ # node.search('.//bike:tire', {'bike' => 'http://schwinn.com/'})
25
+ # node.search('bike|tire', {'bike' => 'http://schwinn.com/'})
26
+ #
27
+ # For XPath queries, a hash of variable bindings may also be
28
+ # appended to the namespace bindings. For example:
29
+ #
30
+ # node.search('.//address[@domestic=$value]', nil, {:value => 'Yes'})
31
+ #
32
+ # Custom XPath functions and CSS pseudo-selectors may also be
33
+ # defined. To define custom functions create a class and
34
+ # implement the function you want to define. The first argument
35
+ # to the method will be the current matching NodeSet. Any other
36
+ # arguments are ones that you pass in. Note that this class may
37
+ # appear anywhere in the argument list. For example:
38
+ #
39
+ # node.search('.//title[regex(., "\w+")]', 'div.employee:regex("[0-9]+")'
40
+ # Class.new {
41
+ # def regex node_set, regex
42
+ # node_set.find_all { |node| node['some_attribute'] =~ /#{regex}/ }
43
+ # end
44
+ # }.new
45
+ # )
46
+ #
47
+ # See Searchable#xpath and Searchable#css for further usage help.
48
+ def search *args
49
+ paths, handler, ns, binds = extract_params(args)
50
+
51
+ xpaths = paths.map(&:to_s).map do |path|
52
+ (path =~ LOOKS_LIKE_XPATH) ? path : xpath_query_from_css_rule(path, ns)
53
+ end.flatten.uniq
54
+
55
+ xpath(*(xpaths + [ns, handler, binds].compact))
56
+ end
57
+ alias :/ :search
58
+
59
+ ###
60
+ # call-seq: search *paths, [namespace-bindings, xpath-variable-bindings, custom-handler-class]
61
+ #
62
+ # Search this object for +paths+, and return only the first
63
+ # result. +paths+ must be one or more XPath or CSS queries.
64
+ #
65
+ # See Searchable#search for more information.
66
+ def at *args
67
+ search(*args).first
68
+ end
69
+ alias :% :at
70
+
71
+ ###
72
+ # call-seq: css *rules, [namespace-bindings, custom-pseudo-class]
73
+ #
74
+ # Search this object for CSS +rules+. +rules+ must be one or more CSS
75
+ # selectors. For example:
76
+ #
77
+ # node.css('title')
78
+ # node.css('body h1.bold')
79
+ # node.css('div + p.green', 'div#one')
80
+ #
81
+ # A hash of namespace bindings may be appended. For example:
82
+ #
83
+ # node.css('bike|tire', {'bike' => 'http://schwinn.com/'})
84
+ #
85
+ # Custom CSS pseudo classes may also be defined. To define
86
+ # custom pseudo classes, create a class and implement the custom
87
+ # pseudo class you want defined. The first argument to the
88
+ # method will be the current matching NodeSet. Any other
89
+ # arguments are ones that you pass in. For example:
90
+ #
91
+ # node.css('title:regex("\w+")', Class.new {
92
+ # def regex node_set, regex
93
+ # node_set.find_all { |node| node['some_attribute'] =~ /#{regex}/ }
94
+ # end
95
+ # }.new)
96
+ #
97
+ # Note that the CSS query string is case-sensitive with regards
98
+ # to your document type. That is, if you're looking for "H1" in
99
+ # an HTML document, you'll never find anything, since HTML tags
100
+ # will match only lowercase CSS queries. However, "H1" might be
101
+ # found in an XML document, where tags names are case-sensitive
102
+ # (e.g., "H1" is distinct from "h1").
103
+ #
104
+ def css *args
105
+ rules, handler, ns, _ = extract_params(args)
106
+
107
+ css_internal self, rules, handler, ns
108
+ end
109
+
110
+ ##
111
+ # call-seq: css *rules, [namespace-bindings, custom-pseudo-class]
112
+ #
113
+ # Search this object for CSS +rules+, and return only the first
114
+ # match. +rules+ must be one or more CSS selectors.
115
+ #
116
+ # See Searchable#css for more information.
117
+ def at_css *args
118
+ css(*args).first
119
+ end
120
+
121
+ ###
122
+ # call-seq: xpath *paths, [namespace-bindings, variable-bindings, custom-handler-class]
123
+ #
124
+ # Search this node for XPath +paths+. +paths+ must be one or more XPath
125
+ # queries.
126
+ #
127
+ # node.xpath('.//title')
128
+ #
129
+ # A hash of namespace bindings may be appended. For example:
130
+ #
131
+ # node.xpath('.//foo:name', {'foo' => 'http://example.org/'})
132
+ # node.xpath('.//xmlns:name', node.root.namespaces)
133
+ #
134
+ # A hash of variable bindings may also be appended to the namespace bindings. For example:
135
+ #
136
+ # node.xpath('.//address[@domestic=$value]', nil, {:value => 'Yes'})
137
+ #
138
+ # Custom XPath functions may also be defined. To define custom
139
+ # functions create a class and implement the function you want
140
+ # to define. The first argument to the method will be the
141
+ # current matching NodeSet. Any other arguments are ones that
142
+ # you pass in. Note that this class may appear anywhere in the
143
+ # argument list. For example:
144
+ #
145
+ # node.xpath('.//title[regex(., "\w+")]', Class.new {
146
+ # def regex node_set, regex
147
+ # node_set.find_all { |node| node['some_attribute'] =~ /#{regex}/ }
148
+ # end
149
+ # }.new)
150
+ #
151
+ def xpath *args
152
+ return NodeSet.new(document) unless document
153
+
154
+ paths, handler, ns, binds = extract_params(args)
155
+
156
+ sets = paths.map do |path|
157
+ ctx = XPathContext.new(self)
158
+ ctx.register_namespaces(ns)
159
+ path = path.gsub(/xmlns:/, ' :') unless Nokogiri.uses_libxml?
160
+
161
+ binds.each do |key,value|
162
+ ctx.register_variable key.to_s, value
163
+ end if binds
164
+
165
+ ctx.evaluate(path, handler)
166
+ end
167
+ return sets.first if sets.length == 1
168
+
169
+ NodeSet.new(document) do |combined|
170
+ sets.each do |set|
171
+ set.each do |node|
172
+ combined << node
173
+ end
174
+ end
175
+ end
176
+ end
177
+
178
+ ##
179
+ # call-seq: xpath *paths, [namespace-bindings, variable-bindings, custom-handler-class]
180
+ #
181
+ # Search this node for XPath +paths+, and return only the first
182
+ # match. +paths+ must be one or more XPath queries.
183
+ #
184
+ # See Searchable#xpath for more information.
185
+ def at_xpath *args
186
+ xpath(*args).first
187
+ end
188
+
189
+ private
190
+
191
+ def css_internal node, rules, handler, ns
192
+ xpaths = rules.map { |rule| xpath_query_from_css_rule(rule, ns) }
193
+ node.xpath(*(xpaths + [ns, handler].compact))
194
+ end
195
+
196
+ def xpath_query_from_css_rule rule, ns
197
+ implied_xpath_contexts.map do |implied_xpath_context|
198
+ CSS.xpath_for(rule.to_s, :prefix => implied_xpath_context, :ns => ns)
199
+ end.join(' | ')
200
+ end
201
+
202
+ def extract_params params # :nodoc:
203
+ handler = params.find do |param|
204
+ ![Hash, String, Symbol].include?(param.class)
205
+ end
206
+ params -= [handler] if handler
207
+
208
+ hashes = []
209
+ while Hash === params.last || params.last.nil?
210
+ hashes << params.pop
211
+ break if params.empty?
212
+ end
213
+ ns, binds = hashes.reverse
214
+
215
+ ns ||= document.root ? document.root.namespaces : {}
216
+
217
+ [params, handler, ns, binds]
218
+ end
219
+ end
220
+ end
221
+ end
@@ -0,0 +1,25 @@
1
+ #! /usr/bin/env ruby
2
+
3
+ require "date"
4
+
5
+ dir = ARGV[0] || raise("ERROR: arg1 must be dir")
6
+
7
+ Dir.chdir dir
8
+
9
+ files_and_times = {}
10
+ Dir["*.patch"].sort.each do |filename|
11
+ dateline = `cat #{filename} | grep "Date:"`
12
+ datestr = dateline.split("Date:").last.strip
13
+ time = DateTime.parse datestr
14
+ files_and_times[filename] = time
15
+ end
16
+
17
+ count = 0
18
+ files_and_times.sort_by {|k,v| v}.each do |filename, time|
19
+ count += 1
20
+ _, patch_name = filename.split("-", 2)
21
+ new_filename = sprintf("%4.4d-%s", count, patch_name)
22
+ printf "mv -f %s %s # %s\n", filename, new_filename, time
23
+ end
24
+
25
+ STDERR.print "\n**\n** REMEMBER TO UPDATE THE Manifest.txt FILE\n**\n"
@@ -79,7 +79,7 @@ EOF
79
79
  end
80
80
 
81
81
  def test_n
82
- assert_result_rows (1..14).to_a, @parser.search("table/tr:nth(n)")
82
+ assert_result_rows((1..14).to_a, @parser.search("table/tr:nth(n)"))
83
83
  end
84
84
 
85
85
  def test_2n
@@ -0,0 +1,87 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ require "helper"
4
+
5
+ module Nokogiri
6
+ module HTML
7
+ module SAX
8
+ class TestPushParser < Nokogiri::SAX::TestCase
9
+ def setup
10
+ super
11
+ @parser = HTML::SAX::PushParser.new(Doc.new)
12
+ end
13
+
14
+ def test_end_document_called
15
+ @parser.<<(<<-eoxml)
16
+ <p id="asdfasdf">
17
+ <!-- This is a comment -->
18
+ Paragraph 1
19
+ </p>
20
+ eoxml
21
+ assert ! @parser.document.end_document_called
22
+ @parser.finish
23
+ assert @parser.document.end_document_called
24
+ end
25
+
26
+ def test_start_element
27
+ @parser.<<(<<-eoxml)
28
+ <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
29
+ <html><head><body><p id="asdfasdf">
30
+ eoxml
31
+
32
+ assert_equal [["html", []], ["head", []], ["body", []], ["p", [["id", "asdfasdf"]]]],
33
+ @parser.document.start_elements
34
+
35
+ @parser.<<(<<-eoxml)
36
+ <!-- This is a comment -->
37
+ Paragraph 1
38
+ </p></body></html>
39
+ eoxml
40
+ assert_equal [' This is a comment '], @parser.document.comments
41
+ @parser.finish
42
+ end
43
+
44
+
45
+ def test_chevron_partial_html
46
+ @parser.<<(<<-eoxml)
47
+ <p id="asdfasdf">
48
+ eoxml
49
+
50
+ @parser.<<(<<-eoxml)
51
+ <!-- This is a comment -->
52
+ Paragraph 1
53
+ </p>
54
+ eoxml
55
+ assert_equal [' This is a comment '], @parser.document.comments
56
+ @parser.finish
57
+ end
58
+
59
+ def test_chevron
60
+ @parser.<<(<<-eoxml)
61
+ <p id="asdfasdf">
62
+ <!-- This is a comment -->
63
+ Paragraph 1
64
+ </p>
65
+ eoxml
66
+ @parser.finish
67
+ assert_equal [' This is a comment '], @parser.document.comments
68
+ end
69
+
70
+ def test_default_options
71
+ assert_equal 0, @parser.options
72
+ end
73
+
74
+ def test_broken_encoding
75
+ skip("ultra hard to fix for pure Java version") if Nokogiri.jruby?
76
+ @parser.options |= XML::ParseOptions::RECOVER
77
+ # This is ISO_8859-1:
78
+ @parser.<< "<?xml version='1.0' encoding='UTF-8'?><r>Gau\337</r>"
79
+ @parser.finish
80
+ assert(@parser.document.errors.size >= 1)
81
+ assert_equal "Gau\337", @parser.document.data.join
82
+ assert_equal [["r"], ["body"], ["html"]], @parser.document.end_elements
83
+ end
84
+ end
85
+ end
86
+ end
87
+ end