nokogiri 1.6.5-java → 1.6.6.1-java
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/.cross_rubies +5 -0
- data/.travis.yml +10 -20
- data/CHANGELOG.ja.rdoc +28 -1
- data/CHANGELOG.rdoc +28 -1
- data/Gemfile +1 -1
- data/Manifest.txt +5 -1
- data/README.ja.rdoc +10 -9
- data/README.rdoc +6 -9
- data/ROADMAP.md +15 -3
- data/Rakefile +1 -3
- data/bin/nokogiri +48 -8
- data/ext/java/nokogiri/HtmlSaxParserContext.java +1 -1
- data/ext/java/nokogiri/HtmlSaxPushParser.java +244 -0
- data/ext/java/nokogiri/NokogiriService.java +9 -0
- data/ext/java/nokogiri/XmlComment.java +2 -0
- data/ext/java/nokogiri/XmlNode.java +57 -30
- data/ext/java/nokogiri/XmlSyntaxError.java +11 -9
- data/ext/nokogiri/extconf.rb +18 -3
- data/ext/nokogiri/xml_comment.c +17 -2
- data/ext/nokogiri/xml_node.c +66 -6
- data/ext/nokogiri/xml_syntax_error.c +4 -0
- data/ext/nokogiri/xml_syntax_error.h +1 -0
- data/lib/nokogiri.rb +2 -2
- data/lib/nokogiri/decorators/slop.rb +7 -8
- data/lib/nokogiri/html/document_fragment.rb +0 -2
- data/lib/nokogiri/html/sax/push_parser.rb +22 -2
- data/lib/nokogiri/nokogiri.jar +0 -0
- data/lib/nokogiri/version.rb +1 -1
- data/lib/nokogiri/xml.rb +1 -0
- data/lib/nokogiri/xml/document.rb +4 -4
- data/lib/nokogiri/xml/document_fragment.rb +39 -2
- data/lib/nokogiri/xml/node.rb +11 -181
- data/lib/nokogiri/xml/node_set.rb +41 -85
- data/lib/nokogiri/xml/searchable.rb +221 -0
- data/ports/patches/sort-patches-by-date +25 -0
- data/test/css/test_nthiness.rb +1 -1
- data/test/html/sax/test_push_parser.rb +87 -0
- data/test/html/test_document.rb +20 -5
- data/test/html/test_document_fragment.rb +25 -0
- data/test/xml/test_attr.rb +5 -2
- data/test/xml/test_builder.rb +27 -1
- data/test/xml/test_comment.rb +11 -0
- data/test/xml/test_document.rb +34 -0
- data/test/xml/test_document_fragment.rb +40 -9
- data/test/xml/test_namespace.rb +1 -0
- data/test/xml/test_node.rb +37 -1
- data/test/xml/test_node_set.rb +56 -36
- data/test/xml/test_xpath.rb +65 -19
- data/test_all +11 -1
- metadata +12 -7
- data/tasks/nokogiri.org.rb +0 -24
data/lib/nokogiri.rb
CHANGED
@@ -61,8 +61,8 @@ require 'nokogiri/html/builder'
|
|
61
61
|
# puts link.content
|
62
62
|
# end
|
63
63
|
#
|
64
|
-
# See Nokogiri::XML::
|
65
|
-
# See Nokogiri::XML::
|
64
|
+
# See Nokogiri::XML::Searchable#css for more information about CSS searching.
|
65
|
+
# See Nokogiri::XML::Searchable#xpath for more information about XPath searching.
|
66
66
|
module Nokogiri
|
67
67
|
class << self
|
68
68
|
###
|
@@ -4,25 +4,26 @@ module Nokogiri
|
|
4
4
|
# The Slop decorator implements method missing such that a methods may be
|
5
5
|
# used instead of XPath or CSS. See Nokogiri.Slop
|
6
6
|
module Slop
|
7
|
+
# The default XPath search context for Slop
|
8
|
+
XPATH_PREFIX = "./"
|
9
|
+
|
7
10
|
###
|
8
11
|
# look for node with +name+. See Nokogiri.Slop
|
9
12
|
def method_missing name, *args, &block
|
10
|
-
prefix = implied_xpath_context
|
11
|
-
|
12
13
|
if args.empty?
|
13
|
-
list = xpath("#{
|
14
|
+
list = xpath("#{XPATH_PREFIX}#{name.to_s.sub(/^_/, '')}")
|
14
15
|
elsif args.first.is_a? Hash
|
15
16
|
hash = args.first
|
16
17
|
if hash[:css]
|
17
18
|
list = css("#{name}#{hash[:css]}")
|
18
19
|
elsif hash[:xpath]
|
19
20
|
conds = Array(hash[:xpath]).join(' and ')
|
20
|
-
list = xpath("#{
|
21
|
+
list = xpath("#{XPATH_PREFIX}#{name}[#{conds}]")
|
21
22
|
end
|
22
23
|
else
|
23
24
|
CSS::Parser.without_cache do
|
24
25
|
list = xpath(
|
25
|
-
*CSS.xpath_for("#{name}#{args.first}", :prefix =>
|
26
|
+
*CSS.xpath_for("#{name}#{args.first}", :prefix => XPATH_PREFIX)
|
26
27
|
)
|
27
28
|
end
|
28
29
|
end
|
@@ -32,9 +33,7 @@ module Nokogiri
|
|
32
33
|
end
|
33
34
|
|
34
35
|
def respond_to_missing? name, include_private = false
|
35
|
-
|
36
|
-
|
37
|
-
list = xpath("#{prefix}#{name.to_s.sub(/^_/, '')}")
|
36
|
+
list = xpath("#{XPATH_PREFIX}#{name.to_s.sub(/^_/, '')}")
|
38
37
|
|
39
38
|
!list.empty?
|
40
39
|
end
|
@@ -2,13 +2,33 @@ module Nokogiri
|
|
2
2
|
module HTML
|
3
3
|
module SAX
|
4
4
|
class PushParser
|
5
|
-
|
5
|
+
|
6
|
+
# The Nokogiri::HTML::SAX::Document on which the PushParser will be
|
7
|
+
# operating
|
8
|
+
attr_accessor :document
|
9
|
+
|
10
|
+
def initialize(doc = HTML::SAX::Document.new, file_name = nil, encoding = 'UTF-8')
|
6
11
|
@document = doc
|
7
12
|
@encoding = encoding
|
8
13
|
@sax_parser = HTML::SAX::Parser.new(doc, @encoding)
|
9
14
|
|
10
15
|
## Create our push parser context
|
11
|
-
initialize_native(@sax_parser, file_name,
|
16
|
+
initialize_native(@sax_parser, file_name, encoding)
|
17
|
+
end
|
18
|
+
|
19
|
+
###
|
20
|
+
# Write a +chunk+ of HTML to the PushParser. Any callback methods
|
21
|
+
# that can be called will be called immediately.
|
22
|
+
def write chunk, last_chunk = false
|
23
|
+
native_write(chunk, last_chunk)
|
24
|
+
end
|
25
|
+
alias :<< :write
|
26
|
+
|
27
|
+
###
|
28
|
+
# Finish the parsing. This method is only necessary for
|
29
|
+
# Nokogiri::HTML::SAX::Document#end_document to be called.
|
30
|
+
def finish
|
31
|
+
write '', true
|
12
32
|
end
|
13
33
|
end
|
14
34
|
end
|
data/lib/nokogiri/nokogiri.jar
CHANGED
Binary file
|
data/lib/nokogiri/version.rb
CHANGED
data/lib/nokogiri/xml.rb
CHANGED
@@ -5,8 +5,8 @@ module Nokogiri
|
|
5
5
|
# XML documents. The Document is created by parsing an XML document.
|
6
6
|
# See Nokogiri::XML::Document.parse() for more information on parsing.
|
7
7
|
#
|
8
|
-
# For searching a Document, see Nokogiri::XML::
|
9
|
-
# Nokogiri::XML::
|
8
|
+
# For searching a Document, see Nokogiri::XML::Searchable#css and
|
9
|
+
# Nokogiri::XML::Searchable#xpath
|
10
10
|
#
|
11
11
|
class Document < Nokogiri::XML::Node
|
12
12
|
# I'm ignoring unicode characters here.
|
@@ -267,8 +267,8 @@ module Nokogiri
|
|
267
267
|
(string_or_io.respond_to?(:eof?) && string_or_io.eof?)
|
268
268
|
end
|
269
269
|
|
270
|
-
def
|
271
|
-
"
|
270
|
+
def implied_xpath_contexts # :nodoc:
|
271
|
+
["//"]
|
272
272
|
end
|
273
273
|
|
274
274
|
def inspect_attributes
|
@@ -73,15 +73,43 @@ module Nokogiri
|
|
73
73
|
end
|
74
74
|
|
75
75
|
###
|
76
|
-
#
|
76
|
+
# call-seq: css *rules, [namespace-bindings, custom-pseudo-class]
|
77
|
+
#
|
78
|
+
# Search this fragment for CSS +rules+. +rules+ must be one or more CSS
|
79
|
+
# selectors. For example:
|
80
|
+
#
|
81
|
+
# For more information see Nokogiri::XML::Searchable#css
|
77
82
|
def css *args
|
78
83
|
if children.any?
|
79
|
-
children.css(*args)
|
84
|
+
children.css(*args) # 'children' is a smell here
|
80
85
|
else
|
81
86
|
NodeSet.new(document)
|
82
87
|
end
|
83
88
|
end
|
84
89
|
|
90
|
+
#
|
91
|
+
# NOTE that we don't delegate #xpath to children ... another smell.
|
92
|
+
# def xpath ; end
|
93
|
+
#
|
94
|
+
|
95
|
+
###
|
96
|
+
# call-seq: search *paths, [namespace-bindings, xpath-variable-bindings, custom-handler-class]
|
97
|
+
#
|
98
|
+
# Search this fragment for +paths+. +paths+ must be one or more XPath or CSS queries.
|
99
|
+
#
|
100
|
+
# For more information see Nokogiri::XML::Searchable#search
|
101
|
+
def search *rules
|
102
|
+
rules, handler, ns, binds = extract_params(rules)
|
103
|
+
|
104
|
+
rules.inject(NodeSet.new(document)) do |set, rule|
|
105
|
+
set += if rule =~ Searchable::LOOKS_LIKE_XPATH
|
106
|
+
xpath(*([rule, ns, handler, binds].compact))
|
107
|
+
else
|
108
|
+
children.css(*([rule, ns, handler].compact)) # 'children' is a smell here
|
109
|
+
end
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
85
113
|
alias :serialize :to_s
|
86
114
|
|
87
115
|
class << self
|
@@ -92,6 +120,15 @@ module Nokogiri
|
|
92
120
|
end
|
93
121
|
end
|
94
122
|
|
123
|
+
# A list of Nokogiri::XML::SyntaxError found when parsing a document
|
124
|
+
def errors
|
125
|
+
document.errors
|
126
|
+
end
|
127
|
+
|
128
|
+
def errors= things # :nodoc:
|
129
|
+
document.errors = things
|
130
|
+
end
|
131
|
+
|
95
132
|
private
|
96
133
|
|
97
134
|
# fix for issue 770
|
data/lib/nokogiri/xml/node.rb
CHANGED
@@ -32,9 +32,10 @@ module Nokogiri
|
|
32
32
|
# * Nokogiri::XML::Node#next
|
33
33
|
# * Nokogiri::XML::Node#previous
|
34
34
|
#
|
35
|
-
# You may search this node's subtree using
|
35
|
+
# You may search this node's subtree using Searchable#xpath and Searchable#css
|
36
36
|
class Node
|
37
37
|
include Nokogiri::XML::PP::Node
|
38
|
+
include Nokogiri::XML::Searchable
|
38
39
|
include Enumerable
|
39
40
|
|
40
41
|
# Element node type, see Nokogiri::XML::Node#element?
|
@@ -90,130 +91,6 @@ module Nokogiri
|
|
90
91
|
document.decorate(self)
|
91
92
|
end
|
92
93
|
|
93
|
-
###
|
94
|
-
# Search this node for +paths+. +paths+ can be XPath or CSS, and an
|
95
|
-
# optional hash of namespaces may be appended.
|
96
|
-
# See Node#xpath and Node#css.
|
97
|
-
def search *paths
|
98
|
-
# TODO use paths, handler, ns, binds = extract_params(paths)
|
99
|
-
ns = paths.last.is_a?(Hash) ? paths.pop :
|
100
|
-
(document.root ? document.root.namespaces : {})
|
101
|
-
|
102
|
-
prefix = "#{implied_xpath_context}/"
|
103
|
-
|
104
|
-
xpath(*(paths.map { |path|
|
105
|
-
path = path.to_s
|
106
|
-
path =~ /^(\.\/|\/|\.\.|\.$)/ ? path : CSS.xpath_for(
|
107
|
-
path,
|
108
|
-
:prefix => prefix,
|
109
|
-
:ns => ns
|
110
|
-
)
|
111
|
-
}.flatten.uniq) + [ns])
|
112
|
-
end
|
113
|
-
alias :/ :search
|
114
|
-
|
115
|
-
###
|
116
|
-
# call-seq: xpath *paths, [namespace-bindings, variable-bindings, custom-handler-class]
|
117
|
-
#
|
118
|
-
# Search this node for XPath +paths+. +paths+ must be one or more XPath
|
119
|
-
# queries.
|
120
|
-
#
|
121
|
-
# node.xpath('.//title')
|
122
|
-
#
|
123
|
-
# A hash of namespace bindings may be appended. For example:
|
124
|
-
#
|
125
|
-
# node.xpath('.//foo:name', {'foo' => 'http://example.org/'})
|
126
|
-
# node.xpath('.//xmlns:name', node.root.namespaces)
|
127
|
-
#
|
128
|
-
# A hash of variable bindings may also be appended to the namespace bindings. For example:
|
129
|
-
#
|
130
|
-
# node.xpath('.//address[@domestic=$value]', nil, {:value => 'Yes'})
|
131
|
-
#
|
132
|
-
# Custom XPath functions may also be defined. To define custom
|
133
|
-
# functions create a class and implement the function you want
|
134
|
-
# to define. The first argument to the method will be the
|
135
|
-
# current matching NodeSet. Any other arguments are ones that
|
136
|
-
# you pass in. Note that this class may appear anywhere in the
|
137
|
-
# argument list. For example:
|
138
|
-
#
|
139
|
-
# node.xpath('.//title[regex(., "\w+")]', Class.new {
|
140
|
-
# def regex node_set, regex
|
141
|
-
# node_set.find_all { |node| node['some_attribute'] =~ /#{regex}/ }
|
142
|
-
# end
|
143
|
-
# }.new)
|
144
|
-
#
|
145
|
-
def xpath *paths
|
146
|
-
return NodeSet.new(document) unless document
|
147
|
-
|
148
|
-
paths, handler, ns, binds = extract_params(paths)
|
149
|
-
|
150
|
-
sets = paths.map { |path|
|
151
|
-
ctx = XPathContext.new(self)
|
152
|
-
ctx.register_namespaces(ns)
|
153
|
-
path = path.gsub(/xmlns:/, ' :') unless Nokogiri.uses_libxml?
|
154
|
-
|
155
|
-
binds.each do |key,value|
|
156
|
-
ctx.register_variable key.to_s, value
|
157
|
-
end if binds
|
158
|
-
|
159
|
-
ctx.evaluate(path, handler)
|
160
|
-
}
|
161
|
-
return sets.first if sets.length == 1
|
162
|
-
|
163
|
-
NodeSet.new(document) do |combined|
|
164
|
-
sets.each do |set|
|
165
|
-
set.each do |node|
|
166
|
-
combined << node
|
167
|
-
end
|
168
|
-
end
|
169
|
-
end
|
170
|
-
end
|
171
|
-
|
172
|
-
###
|
173
|
-
# call-seq: css *rules, [namespace-bindings, custom-pseudo-class]
|
174
|
-
#
|
175
|
-
# Search this node for CSS +rules+. +rules+ must be one or more CSS
|
176
|
-
# selectors. For example:
|
177
|
-
#
|
178
|
-
# node.css('title')
|
179
|
-
# node.css('body h1.bold')
|
180
|
-
# node.css('div + p.green', 'div#one')
|
181
|
-
#
|
182
|
-
# A hash of namespace bindings may be appended. For example:
|
183
|
-
#
|
184
|
-
# node.css('bike|tire', {'bike' => 'http://schwinn.com/'})
|
185
|
-
#
|
186
|
-
# Custom CSS pseudo classes may also be defined. To define
|
187
|
-
# custom pseudo classes, create a class and implement the custom
|
188
|
-
# pseudo class you want defined. The first argument to the
|
189
|
-
# method will be the current matching NodeSet. Any other
|
190
|
-
# arguments are ones that you pass in. For example:
|
191
|
-
#
|
192
|
-
# node.css('title:regex("\w+")', Class.new {
|
193
|
-
# def regex node_set, regex
|
194
|
-
# node_set.find_all { |node| node['some_attribute'] =~ /#{regex}/ }
|
195
|
-
# end
|
196
|
-
# }.new)
|
197
|
-
#
|
198
|
-
# Note that the CSS query string is case-sensitive with regards
|
199
|
-
# to your document type. That is, if you're looking for "H1" in
|
200
|
-
# an HTML document, you'll never find anything, since HTML tags
|
201
|
-
# will match only lowercase CSS queries. However, "H1" might be
|
202
|
-
# found in an XML document, where tags names are case-sensitive
|
203
|
-
# (e.g., "H1" is distinct from "h1").
|
204
|
-
#
|
205
|
-
def css *rules
|
206
|
-
rules, handler, ns, binds = extract_params(rules)
|
207
|
-
|
208
|
-
prefix = "#{implied_xpath_context}/"
|
209
|
-
|
210
|
-
rules = rules.map { |rule|
|
211
|
-
CSS.xpath_for(rule, :prefix => prefix, :ns => ns)
|
212
|
-
}.flatten.uniq + [ns, handler, binds].compact
|
213
|
-
|
214
|
-
xpath(*rules)
|
215
|
-
end
|
216
|
-
|
217
94
|
###
|
218
95
|
# Search this node's immediate children using CSS selector +selector+
|
219
96
|
def > selector
|
@@ -221,33 +98,6 @@ module Nokogiri
|
|
221
98
|
xpath CSS.xpath_for(selector, :prefix => "./", :ns => ns).first
|
222
99
|
end
|
223
100
|
|
224
|
-
###
|
225
|
-
# Search for the first occurrence of +path+.
|
226
|
-
#
|
227
|
-
# Returns nil if nothing is found, otherwise a Node.
|
228
|
-
def at path, ns = document.root ? document.root.namespaces : {}
|
229
|
-
search(path, ns).first
|
230
|
-
end
|
231
|
-
alias :% :at
|
232
|
-
|
233
|
-
##
|
234
|
-
# Search this node for the first occurrence of XPath +paths+.
|
235
|
-
# Equivalent to <tt>xpath(paths).first</tt>
|
236
|
-
# See Node#xpath for more information.
|
237
|
-
#
|
238
|
-
def at_xpath *paths
|
239
|
-
xpath(*paths).first
|
240
|
-
end
|
241
|
-
|
242
|
-
##
|
243
|
-
# Search this node for the first occurrence of CSS +rules+.
|
244
|
-
# Equivalent to <tt>css(rules).first</tt>
|
245
|
-
# See Node#css for more information.
|
246
|
-
#
|
247
|
-
def at_css *rules
|
248
|
-
css(*rules).first
|
249
|
-
end
|
250
|
-
|
251
101
|
###
|
252
102
|
# Get the attribute value for the attribute +name+
|
253
103
|
def [] name
|
@@ -327,7 +177,7 @@ module Nokogiri
|
|
327
177
|
# Also see related method +after+.
|
328
178
|
def add_next_sibling node_or_tags
|
329
179
|
raise ArgumentError.new("A document may not have multiple root nodes.") if (parent && parent.document?) && !node_or_tags.processing_instruction?
|
330
|
-
|
180
|
+
|
331
181
|
add_sibling :next, node_or_tags
|
332
182
|
end
|
333
183
|
|
@@ -485,7 +335,9 @@ module Nokogiri
|
|
485
335
|
###
|
486
336
|
# Remove the attribute named +name+
|
487
337
|
def remove_attribute name
|
488
|
-
attributes[name].remove if key? name
|
338
|
+
attr = attributes[name].remove if key? name
|
339
|
+
clear_xpath_context if Nokogiri.jruby?
|
340
|
+
attr
|
489
341
|
end
|
490
342
|
alias :delete :remove_attribute
|
491
343
|
|
@@ -930,25 +782,8 @@ module Nokogiri
|
|
930
782
|
write_to io, options
|
931
783
|
end
|
932
784
|
|
933
|
-
def
|
934
|
-
|
935
|
-
handler = params.find { |param|
|
936
|
-
![Hash, String, Symbol].include?(param.class)
|
937
|
-
}
|
938
|
-
|
939
|
-
params -= [handler] if handler
|
940
|
-
|
941
|
-
hashes = []
|
942
|
-
while Hash === params.last || params.last.nil?
|
943
|
-
hashes << params.pop
|
944
|
-
break if params.empty?
|
945
|
-
end
|
946
|
-
|
947
|
-
ns, binds = hashes.reverse
|
948
|
-
|
949
|
-
ns ||= document.root ? document.root.namespaces : {}
|
950
|
-
|
951
|
-
[params, handler, ns, binds]
|
785
|
+
def inspect_attributes
|
786
|
+
[:name, :namespace, :attribute_nodes, :children]
|
952
787
|
end
|
953
788
|
|
954
789
|
def coerce data # :nodoc:
|
@@ -971,22 +806,17 @@ Requires a Node, NodeSet or String argument, and cannot accept a #{data.class}.
|
|
971
806
|
EOERR
|
972
807
|
end
|
973
808
|
|
974
|
-
def
|
975
|
-
"
|
976
|
-
end
|
977
|
-
|
978
|
-
def inspect_attributes
|
979
|
-
[:name, :namespace, :attribute_nodes, :children]
|
809
|
+
def implied_xpath_contexts # :nodoc:
|
810
|
+
[".//"]
|
980
811
|
end
|
981
812
|
|
982
|
-
def add_child_node_and_reparent_attrs node
|
813
|
+
def add_child_node_and_reparent_attrs node # :nodoc:
|
983
814
|
add_child_node node
|
984
815
|
node.attribute_nodes.find_all { |a| a.name =~ /:/ }.each do |attr_node|
|
985
816
|
attr_node.remove
|
986
817
|
node[attr_node.name] = attr_node.value
|
987
818
|
end
|
988
819
|
end
|
989
|
-
|
990
820
|
end
|
991
821
|
end
|
992
822
|
end
|