nokogiri 1.4.3.1-java → 1.4.4-java
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- data/CHANGELOG.ja.rdoc +26 -0
- data/CHANGELOG.rdoc +26 -0
- data/Manifest.txt +3 -0
- data/README.ja.rdoc +0 -4
- data/README.rdoc +0 -4
- data/Rakefile +1 -0
- data/bin/nokogiri +6 -1
- data/ext/nokogiri/depend +358 -32
- data/ext/nokogiri/extconf.rb +1 -3
- data/ext/nokogiri/nokogiri.c +2 -0
- data/ext/nokogiri/nokogiri.h +7 -0
- data/ext/nokogiri/xml_dtd.c +2 -2
- data/ext/nokogiri/xml_io.c +2 -2
- data/ext/nokogiri/xml_node.c +31 -6
- data/ext/nokogiri/xml_node_set.c +1 -1
- data/ext/nokogiri/xml_sax_parser.c +1 -1
- data/ext/nokogiri/xml_sax_parser_context.c +40 -0
- data/ext/nokogiri/xml_xpath_context.c +33 -2
- data/ext/nokogiri/xslt_stylesheet.c +116 -4
- data/lib/nokogiri/css/generated_tokenizer.rb +1 -2
- data/lib/nokogiri/css/xpath_visitor.rb +15 -7
- data/lib/nokogiri/decorators/slop.rb +5 -3
- data/lib/nokogiri/ffi/libxml.rb +9 -0
- data/lib/nokogiri/ffi/structs/xml_parser_context.rb +2 -1
- data/lib/nokogiri/ffi/structs/xml_parser_input.rb +19 -0
- data/lib/nokogiri/ffi/xml/dtd.rb +2 -2
- data/lib/nokogiri/ffi/xml/node.rb +9 -4
- data/lib/nokogiri/ffi/xml/sax/parser_context.rb +12 -0
- data/lib/nokogiri/ffi/xml/xpath_context.rb +5 -0
- data/lib/nokogiri/ffi/xslt/stylesheet.rb +21 -1
- data/lib/nokogiri/html/document.rb +3 -3
- data/lib/nokogiri/html/document_fragment.rb +19 -17
- data/lib/nokogiri/version.rb +1 -1
- data/lib/nokogiri/xml/document.rb +26 -1
- data/lib/nokogiri/xml/document_fragment.rb +2 -2
- data/lib/nokogiri/xml/dtd.rb +11 -0
- data/lib/nokogiri/xml/node.rb +156 -45
- data/lib/nokogiri/xml/node_set.rb +2 -2
- data/lib/nokogiri/xml/reader.rb +36 -0
- data/lib/nokogiri/xml/sax/document.rb +4 -2
- data/lib/nokogiri/xslt.rb +9 -5
- data/tasks/cross_compile.rb +24 -2
- data/test/css/test_parser.rb +29 -18
- data/test/decorators/test_slop.rb +16 -0
- data/test/html/test_document_fragment.rb +46 -3
- data/test/html/test_node.rb +9 -0
- data/test/xml/sax/test_parser.rb +11 -3
- data/test/xml/sax/test_parser_context.rb +50 -0
- data/test/xml/sax/test_push_parser.rb +18 -1
- data/test/xml/test_document_fragment.rb +14 -2
- data/test/xml/test_dtd.rb +15 -0
- data/test/xml/test_node.rb +31 -2
- data/test/xml/test_node_reparenting.rb +59 -31
- data/test/xml/test_node_set.rb +13 -0
- data/test/xml/test_xpath.rb +32 -0
- data/test/xslt/test_custom_functions.rb +94 -0
- metadata +495 -516
- data/ext/nokogiri/libcharset-1.dll +0 -0
- data/ext/nokogiri/libexslt.dll +0 -0
- data/ext/nokogiri/libiconv-2.dll +0 -0
- data/ext/nokogiri/libxml2.dll +0 -0
- data/ext/nokogiri/libxslt.dll +0 -0
- data/ext/nokogiri/zlib1.dll +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
#--
|
2
2
|
# DO NOT MODIFY!!!!
|
3
|
-
# This file is automatically generated by rex 1.0.
|
3
|
+
# This file is automatically generated by rex 1.0.4
|
4
4
|
# from lexical definition file "lib/nokogiri/css/tokenizer.rex".
|
5
5
|
#++
|
6
6
|
|
@@ -29,7 +29,6 @@ class GeneratedTokenizer < GeneratedParser
|
|
29
29
|
scan_setup(str)
|
30
30
|
do_parse
|
31
31
|
end
|
32
|
-
alias :scan :scan_str
|
33
32
|
|
34
33
|
def load_file( filename )
|
35
34
|
@filename = filename
|
@@ -11,18 +11,25 @@ module Nokogiri
|
|
11
11
|
'child::text()'
|
12
12
|
when /^self\(/
|
13
13
|
"self::#{node.value[1]}"
|
14
|
-
when /^
|
14
|
+
when /^eq\(/
|
15
|
+
"position() = #{node.value[1]}"
|
16
|
+
when /^(nth|nth-of-type|nth-child)\(/
|
15
17
|
if node.value[1].is_a?(Nokogiri::CSS::Node) and node.value[1].type == :AN_PLUS_B
|
16
18
|
an_plus_b(node.value[1])
|
17
19
|
else
|
18
|
-
"position() =
|
20
|
+
"position() = #{node.value[1]}"
|
21
|
+
end
|
22
|
+
when /^(nth-last-child|nth-last-of-type)\(/
|
23
|
+
if node.value[1].is_a?(Nokogiri::CSS::Node) and node.value[1].type == :AN_PLUS_B
|
24
|
+
an_plus_b(node.value[1], :last => true)
|
25
|
+
else
|
26
|
+
index = node.value[1].to_i - 1
|
27
|
+
index == 0 ? "position() = last()" : "position() = last() - #{index}"
|
19
28
|
end
|
20
29
|
when /^(first|first-of-type)\(/
|
21
30
|
"position() = 1"
|
22
31
|
when /^(last|last-of-type)\(/
|
23
32
|
"position() = last()"
|
24
|
-
when /^(nth-last-child|nth-last-of-type)\(/
|
25
|
-
"position() = last() - #{node.value[1]}"
|
26
33
|
when /^contains\(/
|
27
34
|
"contains(., #{node.value[1]})"
|
28
35
|
when /^gt\(/
|
@@ -144,17 +151,18 @@ module Nokogiri
|
|
144
151
|
end
|
145
152
|
|
146
153
|
private
|
147
|
-
def an_plus_b node
|
154
|
+
def an_plus_b node, options={}
|
148
155
|
raise ArgumentError, "expected an+b node to contain 4 tokens, but is #{node.value.inspect}" unless node.value.size == 4
|
149
156
|
|
150
157
|
a = node.value[0].to_i
|
151
158
|
b = node.value[3].to_i
|
159
|
+
position = options[:last] ? "(last()-position()+1)" : "position()"
|
152
160
|
|
153
161
|
if (b == 0)
|
154
|
-
return "(position
|
162
|
+
return "(#{position} mod #{a}) = 0"
|
155
163
|
else
|
156
164
|
compare = (a < 0) ? "<=" : ">="
|
157
|
-
return "(position
|
165
|
+
return "(#{position} #{compare} #{b}) and (((#{position}-#{b}) mod #{a.abs}) = 0)"
|
158
166
|
end
|
159
167
|
end
|
160
168
|
|
@@ -7,20 +7,22 @@ module Nokogiri
|
|
7
7
|
###
|
8
8
|
# look for node with +name+. See Nokogiri.Slop
|
9
9
|
def method_missing name, *args, &block
|
10
|
+
prefix = implied_xpath_context
|
11
|
+
|
10
12
|
if args.empty?
|
11
|
-
list = xpath("
|
13
|
+
list = xpath("#{prefix}#{name.to_s.sub(/^_/, '')}")
|
12
14
|
elsif args.first.is_a? Hash
|
13
15
|
hash = args.first
|
14
16
|
if hash[:css]
|
15
17
|
list = css("#{name}#{hash[:css]}")
|
16
18
|
elsif hash[:xpath]
|
17
19
|
conds = Array(hash[:xpath]).join(' and ')
|
18
|
-
list = xpath("
|
20
|
+
list = xpath("#{prefix}#{name}[#{conds}]")
|
19
21
|
end
|
20
22
|
else
|
21
23
|
CSS::Parser.without_cache do
|
22
24
|
list = xpath(
|
23
|
-
*CSS.xpath_for("#{name}#{args.first}", :prefix =>
|
25
|
+
*CSS.xpath_for("#{name}#{args.first}", :prefix => prefix)
|
24
26
|
)
|
25
27
|
end
|
26
28
|
end
|
data/lib/nokogiri/ffi/libxml.rb
CHANGED
@@ -62,6 +62,9 @@ module Nokogiri
|
|
62
62
|
callback :cdata_block_sax_func, [:pointer, :string, :int], :void
|
63
63
|
callback :start_element_ns_sax2_func, [:pointer, :pointer, :pointer, :pointer, :int, :pointer, :int, :int, :pointer], :void
|
64
64
|
callback :end_element_ns_sax2_func, [:pointer, :pointer, :pointer, :pointer], :void
|
65
|
+
callback :xslt_function_init, [:pointer, :string], :pointer
|
66
|
+
callback :xslt_function_shutdown, [:pointer, :string, :pointer], :void
|
67
|
+
callback :xslt_function_caller, [:pointer, :int], :void
|
65
68
|
|
66
69
|
# encoding.c
|
67
70
|
attach_function :xmlFindCharEncodingHandler, [:string], :pointer
|
@@ -200,6 +203,8 @@ module Nokogiri
|
|
200
203
|
attach_function :xmlXPathWrapString, [:pointer], :pointer # should take a :string, but we optimize
|
201
204
|
attach_function :xmlXPathNewBoolean, [:int], :pointer
|
202
205
|
attach_function :xmlXPathNewFloat, [:double], :pointer
|
206
|
+
attach_function :xmlXPathNewCString, [:string], :pointer
|
207
|
+
attach_function :xmlXPathRegisterVariable, [:pointer, :string, :pointer], :int
|
203
208
|
|
204
209
|
class << self
|
205
210
|
# these functions are implemented as C macros
|
@@ -272,6 +277,9 @@ module Nokogiri
|
|
272
277
|
attach_function :xsltApplyStylesheet, [:pointer, :pointer, :pointer], :pointer
|
273
278
|
attach_function :xsltSaveResultToString, [:buffer_out, :buffer_out, :pointer, :pointer], :int
|
274
279
|
attach_function :xsltSetGenericErrorFunc, [:pointer, :generic_error_handler], :void
|
280
|
+
attach_function :xsltRegisterExtModule, [:string, :xslt_function_init, :xslt_function_shutdown], :int
|
281
|
+
attach_function :xsltRegisterExtFunction, [:pointer, :string, :string, :xslt_function_caller], :int
|
282
|
+
attach_function :xsltXPathGetTransformContext, [:pointer], :pointer
|
275
283
|
|
276
284
|
# exslt.c
|
277
285
|
attach_function :exsltRegisterAll, [], :void
|
@@ -376,6 +384,7 @@ require 'nokogiri/xml/syntax_error'
|
|
376
384
|
"structs/xml_element",
|
377
385
|
"structs/xml_entity",
|
378
386
|
"structs/xml_element_content",
|
387
|
+
"structs/xml_parser_input",
|
379
388
|
"xml/node",
|
380
389
|
"xml/namespace",
|
381
390
|
"xml/dtd",
|
@@ -0,0 +1,19 @@
|
|
1
|
+
module Nokogiri
|
2
|
+
# :stopdoc:
|
3
|
+
module LibXML
|
4
|
+
class XmlParserInput < FFI::Struct
|
5
|
+
layout(
|
6
|
+
:buf, :pointer,
|
7
|
+
:filename, :pointer,
|
8
|
+
:directory, :pointer,
|
9
|
+
:base, :pointer,
|
10
|
+
:cur, :pointer,
|
11
|
+
:end, :pointer,
|
12
|
+
:length, :int,
|
13
|
+
:line, :int,
|
14
|
+
:col, :int
|
15
|
+
)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
# :startdoc:
|
19
|
+
end
|
data/lib/nokogiri/ffi/xml/dtd.rb
CHANGED
@@ -53,9 +53,9 @@ module Nokogiri
|
|
53
53
|
|
54
54
|
def internal_attributes attr_name
|
55
55
|
attr_ptr = cstruct[attr_name.to_sym]
|
56
|
-
return nil if attr_ptr.null?
|
57
|
-
|
58
56
|
ahash = {}
|
57
|
+
return ahash if attr_ptr.null?
|
58
|
+
|
59
59
|
LibXML.xmlHashScan(attr_ptr, nil) do |payload, data, name|
|
60
60
|
ahash[name] = Node.wrap(payload)
|
61
61
|
end
|
@@ -405,17 +405,22 @@ module Nokogiri
|
|
405
405
|
end
|
406
406
|
|
407
407
|
def in_context(string, options)
|
408
|
-
|
409
|
-
|
410
|
-
@errors = []
|
411
|
-
LibXML.xmlSetStructuredErrorFunc(nil, SyntaxError.error_array_pusher(@errors))
|
408
|
+
errors = []
|
409
|
+
LibXML.xmlSetStructuredErrorFunc(nil, SyntaxError.error_array_pusher(errors))
|
412
410
|
LibXML.htmlHandleOmittedElem(0)
|
413
411
|
|
414
412
|
list_memory = FFI::MemoryPointer.new :pointer
|
415
413
|
LibXML.xmlParseInNodeContext(cstruct, string, string.length, options, list_memory)
|
416
414
|
|
415
|
+
self.document.children.each do |child|
|
416
|
+
if child.cstruct[:parent] != cstruct[:doc]
|
417
|
+
child.cstruct[:parent] = cstruct[:doc]
|
418
|
+
end
|
419
|
+
end
|
420
|
+
|
417
421
|
LibXML.htmlHandleOmittedElem(1)
|
418
422
|
LibXML.xmlSetStructuredErrorFunc(nil, nil)
|
423
|
+
self.document.errors = errors
|
419
424
|
|
420
425
|
set = NodeSet.wrap(LibXML.xmlXPathNodeSetCreate(nil), document)
|
421
426
|
list_ptr = list_memory.get_pointer(0)
|
@@ -40,6 +40,18 @@ module Nokogiri
|
|
40
40
|
pc
|
41
41
|
end
|
42
42
|
|
43
|
+
def line
|
44
|
+
return nil if cstruct[:input].null?
|
45
|
+
input = LibXML::XmlParserInput.new cstruct[:input]
|
46
|
+
input[:line]
|
47
|
+
end
|
48
|
+
|
49
|
+
def column
|
50
|
+
return nil if cstruct[:input].null?
|
51
|
+
input = LibXML::XmlParserInput.new cstruct[:input]
|
52
|
+
input[:col]
|
53
|
+
end
|
54
|
+
|
43
55
|
def parse_with sax_handler, type = :xml
|
44
56
|
raise ArgumentError unless XML::SAX::Parser === sax_handler
|
45
57
|
sax = sax_handler.cstruct
|
@@ -8,6 +8,11 @@ module Nokogiri
|
|
8
8
|
LibXML.xmlXPathRegisterNs(cstruct, prefix, uri)
|
9
9
|
end
|
10
10
|
|
11
|
+
def register_variable(name, value) # :nodoc:
|
12
|
+
xml_value = LibXML.xmlXPathNewCString(value);
|
13
|
+
LibXML.xmlXPathRegisterVariable(cstruct, name, xml_value);
|
14
|
+
end
|
15
|
+
|
11
16
|
def evaluate(search_path, xpath_handler=nil) # :nodoc:
|
12
17
|
lookup = nil # to keep lambda in scope long enough to avoid a possible GC tragedy
|
13
18
|
query = search_path.to_s
|
@@ -1,5 +1,26 @@
|
|
1
1
|
module Nokogiri
|
2
2
|
module XSLT
|
3
|
+
@modules = {}
|
4
|
+
|
5
|
+
@method_caller = lambda do |context, nargs|
|
6
|
+
# TODO
|
7
|
+
end
|
8
|
+
|
9
|
+
@init_func = lambda do |context, uri|
|
10
|
+
klass = @modules[uri]
|
11
|
+
klass.instance_methods(false).each do |method_name|
|
12
|
+
LibXML.xsltRegisterExtFunction(context, method_name, uri, @method_caller)
|
13
|
+
end
|
14
|
+
klass.new
|
15
|
+
end
|
16
|
+
|
17
|
+
@shutdown_func = lambda do |context, uri, data|
|
18
|
+
end
|
19
|
+
|
20
|
+
def self.register(uri, klass) # :nodoc:
|
21
|
+
raise NotImplementedError.new("sorry, you should implement me.")
|
22
|
+
end
|
23
|
+
|
3
24
|
class Stylesheet
|
4
25
|
|
5
26
|
attr_accessor :cstruct # :nodoc:
|
@@ -47,7 +68,6 @@ module Nokogiri
|
|
47
68
|
|
48
69
|
XML::Document.wrap(ptr)
|
49
70
|
end
|
50
|
-
|
51
71
|
end
|
52
72
|
end
|
53
73
|
end
|
@@ -13,7 +13,7 @@ module Nokogiri
|
|
13
13
|
end
|
14
14
|
|
15
15
|
###
|
16
|
-
# Set the meta tag encoding for this document. If there is no meta
|
16
|
+
# Set the meta tag encoding for this document. If there is no meta
|
17
17
|
# content tag, nil is returned and the encoding is not set.
|
18
18
|
def meta_encoding= encoding
|
19
19
|
return nil unless meta = css('meta').find { |node|
|
@@ -38,7 +38,7 @@ module Nokogiri
|
|
38
38
|
# config.format.as_xml
|
39
39
|
# end
|
40
40
|
#
|
41
|
-
def serialize options = {}
|
41
|
+
def serialize options = {}
|
42
42
|
options[:save_with] ||= XML::Node::SaveOptions::FORMAT |
|
43
43
|
XML::Node::SaveOptions::AS_HTML |
|
44
44
|
XML::Node::SaveOptions::NO_DECLARATION |
|
@@ -61,7 +61,7 @@ module Nokogiri
|
|
61
61
|
# is a number that sets options in the parser, such as
|
62
62
|
# Nokogiri::XML::ParseOptions::RECOVER. See the constants in
|
63
63
|
# Nokogiri::XML::ParseOptions.
|
64
|
-
def parse string_or_io, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML
|
64
|
+
def parse string_or_io, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML
|
65
65
|
|
66
66
|
options = Nokogiri::XML::ParseOptions.new(options) if Fixnum === options
|
67
67
|
# Give the options to the user
|
@@ -1,6 +1,8 @@
|
|
1
1
|
module Nokogiri
|
2
2
|
module HTML
|
3
3
|
class DocumentFragment < Nokogiri::XML::DocumentFragment
|
4
|
+
attr_accessor :errors
|
5
|
+
|
4
6
|
####
|
5
7
|
# Create a Nokogiri::XML::DocumentFragment from +tags+, using +encoding+
|
6
8
|
def self.parse tags, encoding = nil
|
@@ -15,24 +17,24 @@ module Nokogiri
|
|
15
17
|
def initialize document, tags = nil, ctx = nil
|
16
18
|
return self unless tags
|
17
19
|
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
20
|
+
if ctx
|
21
|
+
preexisting_errors = document.errors.dup
|
22
|
+
node_set = ctx.parse("<div>#{tags}</div>")
|
23
|
+
node_set.first.children.each { |child| child.parent = self } unless node_set.empty?
|
24
|
+
self.errors = document.errors - preexisting_errors
|
25
|
+
else
|
26
|
+
# This is a horrible hack, but I don't care
|
27
|
+
if tags.strip =~ /^<body/i
|
28
|
+
path = "/html/body"
|
29
|
+
else
|
30
|
+
path = "/html/body/node()"
|
31
|
+
end
|
28
32
|
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
end
|
35
|
-
children.each { |child| child.parent = self }
|
33
|
+
temp_doc = HTML::Document.parse "<html><body>#{tags}", nil, document.encoding
|
34
|
+
temp_doc.xpath(path).each { |child| child.parent = self }
|
35
|
+
self.errors = temp_doc.errors
|
36
|
+
end
|
37
|
+
children
|
36
38
|
end
|
37
39
|
end
|
38
40
|
end
|
data/lib/nokogiri/version.rb
CHANGED
@@ -113,6 +113,13 @@ module Nokogiri
|
|
113
113
|
# </root>
|
114
114
|
#
|
115
115
|
# The hash returned will look like this: { 'xmlns:foo' => 'bar' }
|
116
|
+
#
|
117
|
+
# Non-prefixed default namespaces (as in "xmlns=") are not included
|
118
|
+
# in the hash.
|
119
|
+
#
|
120
|
+
# Note this is a very expensive operation in current implementation, as it
|
121
|
+
# traverses the entire graph, and also has to bring each node accross the
|
122
|
+
# libxml bridge into a ruby object.
|
116
123
|
def collect_namespaces
|
117
124
|
ns = {}
|
118
125
|
traverse { |j| ns.merge!(j.namespaces) }
|
@@ -134,7 +141,21 @@ module Nokogiri
|
|
134
141
|
end
|
135
142
|
|
136
143
|
##
|
137
|
-
# Explore a document with shortcut methods.
|
144
|
+
# Explore a document with shortcut methods. See Nokogiri::Slop for details.
|
145
|
+
#
|
146
|
+
# Note that any nodes that have been instantiated before #slop!
|
147
|
+
# is called will not be decorated with sloppy behavior. So, if you're in
|
148
|
+
# irb, the preferred idiom is:
|
149
|
+
#
|
150
|
+
# irb> doc = Nokogiri::Slop my_markup
|
151
|
+
#
|
152
|
+
# and not
|
153
|
+
#
|
154
|
+
# irb> doc = Nokogiri::HTML my_markup
|
155
|
+
# ... followed by irb's implicit inspect (and therefore instantiation of every node) ...
|
156
|
+
# irb> doc.slop!
|
157
|
+
# ... which does absolutely nothing.
|
158
|
+
#
|
138
159
|
def slop!
|
139
160
|
unless decorators(XML::Node).include? Nokogiri::Decorators::Slop
|
140
161
|
decorators(XML::Node) << Nokogiri::Decorators::Slop
|
@@ -185,6 +206,10 @@ module Nokogiri
|
|
185
206
|
alias :<< :add_child
|
186
207
|
|
187
208
|
private
|
209
|
+
def implied_xpath_context
|
210
|
+
"/"
|
211
|
+
end
|
212
|
+
|
188
213
|
def inspect_attributes
|
189
214
|
[:name, :children]
|
190
215
|
end
|
@@ -11,9 +11,9 @@ module Nokogiri
|
|
11
11
|
return self unless tags
|
12
12
|
|
13
13
|
children = if ctx
|
14
|
-
ctx.parse(tags
|
14
|
+
ctx.parse(tags)
|
15
15
|
else
|
16
|
-
XML::Document.parse("<root>#{tags
|
16
|
+
XML::Document.parse("<root>#{tags}</root>") \
|
17
17
|
.xpath("/root/node()")
|
18
18
|
end
|
19
19
|
children.each { |child| child.parent = self }
|
data/lib/nokogiri/xml/dtd.rb
CHANGED
@@ -2,10 +2,21 @@ module Nokogiri
|
|
2
2
|
module XML
|
3
3
|
class DTD < Nokogiri::XML::Node
|
4
4
|
undef_method :attribute_nodes
|
5
|
+
undef_method :values
|
5
6
|
undef_method :content
|
6
7
|
undef_method :namespace
|
7
8
|
undef_method :namespace_definitions
|
8
9
|
undef_method :line
|
10
|
+
|
11
|
+
def keys
|
12
|
+
attributes.keys
|
13
|
+
end
|
14
|
+
|
15
|
+
def each &block
|
16
|
+
attributes.each { |key, value|
|
17
|
+
block.call([key, value])
|
18
|
+
}
|
19
|
+
end
|
9
20
|
end
|
10
21
|
end
|
11
22
|
end
|
data/lib/nokogiri/xml/node.rb
CHANGED
@@ -95,13 +95,17 @@ module Nokogiri
|
|
95
95
|
# optional hash of namespaces may be appended.
|
96
96
|
# See Node#xpath and Node#css.
|
97
97
|
def search *paths
|
98
|
+
# TODO use paths, handler, ns, binds = extract_params(paths)
|
98
99
|
ns = paths.last.is_a?(Hash) ? paths.pop :
|
99
100
|
(document.root ? document.root.namespaces : {})
|
101
|
+
|
102
|
+
prefix = "#{implied_xpath_context}/"
|
103
|
+
|
100
104
|
xpath(*(paths.map { |path|
|
101
105
|
path = path.to_s
|
102
106
|
path =~ /^(\.\/|\/)/ ? path : CSS.xpath_for(
|
103
107
|
path,
|
104
|
-
:prefix =>
|
108
|
+
:prefix => prefix,
|
105
109
|
:ns => ns
|
106
110
|
)
|
107
111
|
}.flatten.uniq) + [ns])
|
@@ -109,16 +113,28 @@ module Nokogiri
|
|
109
113
|
alias :/ :search
|
110
114
|
|
111
115
|
###
|
116
|
+
# call-seq: xpath *paths, [namespace-bindings, variable-bindings, custom-handler-class]
|
117
|
+
#
|
112
118
|
# Search this node for XPath +paths+. +paths+ must be one or more XPath
|
113
|
-
# queries.
|
119
|
+
# queries.
|
114
120
|
#
|
115
121
|
# node.xpath('.//title')
|
116
|
-
#
|
122
|
+
#
|
123
|
+
# A hash of namespace bindings may be appended. For example:
|
124
|
+
#
|
125
|
+
# node.xpath('.//foo:name', {'foo' => 'http://example.org/'})
|
117
126
|
# node.xpath('.//xmlns:name', node.root.namespaces)
|
118
127
|
#
|
119
|
-
#
|
120
|
-
#
|
121
|
-
#
|
128
|
+
# A hash of variable bindings may also be appended to the namespace bindings. For example:
|
129
|
+
#
|
130
|
+
# node.xpath('.//address[@domestic=$value]', nil, {:value => 'Yes'})
|
131
|
+
#
|
132
|
+
# Custom XPath functions may also be defined. To define custom
|
133
|
+
# functions create a class and implement the function you want
|
134
|
+
# to define. The first argument to the method will be the
|
135
|
+
# current matching NodeSet. Any other arguments are ones that
|
136
|
+
# you pass in. Note that this class may appear anywhere in the
|
137
|
+
# argument list. For example:
|
122
138
|
#
|
123
139
|
# node.xpath('.//title[regex(., "\w+")]', Class.new {
|
124
140
|
# def regex node_set, regex
|
@@ -127,19 +143,18 @@ module Nokogiri
|
|
127
143
|
# }.new)
|
128
144
|
#
|
129
145
|
def xpath *paths
|
130
|
-
# Pop off our custom function handler if it exists
|
131
|
-
handler = ![
|
132
|
-
Hash, String, Symbol
|
133
|
-
].include?(paths.last.class) ? paths.pop : nil
|
134
|
-
|
135
|
-
ns = paths.last.is_a?(Hash) ? paths.pop :
|
136
|
-
(document.root ? document.root.namespaces : {})
|
137
|
-
|
138
146
|
return NodeSet.new(document) unless document
|
139
147
|
|
148
|
+
paths, handler, ns, binds = extract_params(paths)
|
149
|
+
|
140
150
|
sets = paths.map { |path|
|
141
151
|
ctx = XPathContext.new(self)
|
142
152
|
ctx.register_namespaces(ns)
|
153
|
+
|
154
|
+
binds.each do |key,value|
|
155
|
+
ctx.register_variable key.to_s, value
|
156
|
+
end if binds
|
157
|
+
|
143
158
|
ctx.evaluate(path, handler)
|
144
159
|
}
|
145
160
|
return sets.first if sets.length == 1
|
@@ -154,18 +169,24 @@ module Nokogiri
|
|
154
169
|
end
|
155
170
|
|
156
171
|
###
|
172
|
+
# call-seq: css *rules, [namespace-bindings, custom-pseudo-class]
|
173
|
+
#
|
157
174
|
# Search this node for CSS +rules+. +rules+ must be one or more CSS
|
158
|
-
# selectors.
|
175
|
+
# selectors. For example:
|
159
176
|
#
|
160
177
|
# node.css('title')
|
161
178
|
# node.css('body h1.bold')
|
162
179
|
# node.css('div + p.green', 'div#one')
|
163
180
|
#
|
164
|
-
#
|
165
|
-
#
|
166
|
-
#
|
167
|
-
#
|
168
|
-
#
|
181
|
+
# A hash of namespace bindings may be appended. For example:
|
182
|
+
#
|
183
|
+
# node.css('bike|tire', {'bike' => 'http://schwinn.com/'})
|
184
|
+
#
|
185
|
+
# Custom CSS pseudo classes may also be defined. To define
|
186
|
+
# custom pseudo classes, create a class and implement the custom
|
187
|
+
# pseudo class you want defined. The first argument to the
|
188
|
+
# method will be the current matching NodeSet. Any other
|
189
|
+
# arguments are ones that you pass in. For example:
|
169
190
|
#
|
170
191
|
# node.css('title:regex("\w+")', Class.new {
|
171
192
|
# def regex node_set, regex
|
@@ -173,18 +194,21 @@ module Nokogiri
|
|
173
194
|
# end
|
174
195
|
# }.new)
|
175
196
|
#
|
197
|
+
# Note that the CSS query string is case-sensitive with regards
|
198
|
+
# to your document type. That is, if you're looking for "H1" in
|
199
|
+
# an HTML document, you'll never find anything, since HTML tags
|
200
|
+
# will match only lowercase CSS queries. However, "H1" might be
|
201
|
+
# found in an XML document, where tags names are case-sensitive
|
202
|
+
# (e.g., "H1" is distinct from "h1").
|
203
|
+
#
|
176
204
|
def css *rules
|
177
|
-
|
178
|
-
handler = ![
|
179
|
-
Hash, String, Symbol
|
180
|
-
].include?(rules.last.class) ? rules.pop : nil
|
205
|
+
rules, handler, ns, binds = extract_params(rules)
|
181
206
|
|
182
|
-
|
183
|
-
(document.root ? document.root.namespaces : {})
|
207
|
+
prefix = "#{implied_xpath_context}/"
|
184
208
|
|
185
209
|
rules = rules.map { |rule|
|
186
|
-
CSS.xpath_for(rule, :prefix =>
|
187
|
-
}.flatten.uniq + [ns, handler].compact
|
210
|
+
CSS.xpath_for(rule, :prefix => prefix, :ns => ns)
|
211
|
+
}.flatten.uniq + [ns, handler, binds].compact
|
188
212
|
|
189
213
|
xpath(*rules)
|
190
214
|
end
|
@@ -234,7 +258,7 @@ module Nokogiri
|
|
234
258
|
# Add +node_or_tags+ as a child of this Node.
|
235
259
|
# +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a string containing markup.
|
236
260
|
#
|
237
|
-
# Returns the
|
261
|
+
# Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is a DocumentFragment, NodeSet, or string).
|
238
262
|
def add_child node_or_tags
|
239
263
|
node_or_tags = coerce(node_or_tags)
|
240
264
|
if node_or_tags.is_a?(XML::NodeSet)
|
@@ -242,42 +266,55 @@ module Nokogiri
|
|
242
266
|
else
|
243
267
|
add_child_node node_or_tags
|
244
268
|
end
|
269
|
+
node_or_tags
|
245
270
|
end
|
246
271
|
|
247
272
|
###
|
248
273
|
# Insert +node_or_tags+ before this Node (as a sibling).
|
249
274
|
# +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a string containing markup.
|
250
275
|
#
|
251
|
-
# Returns the
|
276
|
+
# Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is a DocumentFragment, NodeSet, or string).
|
252
277
|
#
|
253
278
|
# Also see related method +before+.
|
254
279
|
def add_previous_sibling node_or_tags
|
255
280
|
node_or_tags = coerce(node_or_tags)
|
256
281
|
if node_or_tags.is_a?(XML::NodeSet)
|
257
|
-
|
282
|
+
if text?
|
283
|
+
pivot = Nokogiri::XML::Node.new 'dummy', document
|
284
|
+
add_previous_sibling_node pivot
|
285
|
+
else
|
286
|
+
pivot = self
|
287
|
+
end
|
288
|
+
node_or_tags.each { |n| pivot.send :add_previous_sibling_node, n }
|
289
|
+
pivot.unlink if text?
|
258
290
|
else
|
259
291
|
add_previous_sibling_node node_or_tags
|
260
292
|
end
|
293
|
+
node_or_tags
|
261
294
|
end
|
262
295
|
|
263
296
|
###
|
264
297
|
# Insert +node_or_tags+ after this Node (as a sibling).
|
265
298
|
# +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a string containing markup.
|
266
299
|
#
|
267
|
-
# Returns the
|
300
|
+
# Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is a DocumentFragment, NodeSet, or string).
|
268
301
|
#
|
269
302
|
# Also see related method +after+.
|
270
303
|
def add_next_sibling node_or_tags
|
271
304
|
node_or_tags = coerce(node_or_tags)
|
272
305
|
if node_or_tags.is_a?(XML::NodeSet)
|
273
|
-
if
|
274
|
-
|
306
|
+
if text?
|
307
|
+
pivot = Nokogiri::XML::Node.new 'dummy', document
|
308
|
+
add_next_sibling_node pivot
|
275
309
|
else
|
276
|
-
|
310
|
+
pivot = self
|
277
311
|
end
|
312
|
+
node_or_tags.reverse.each { |n| pivot.send :add_next_sibling_node, n }
|
313
|
+
pivot.unlink if text?
|
278
314
|
else
|
279
315
|
add_next_sibling_node node_or_tags
|
280
316
|
end
|
317
|
+
node_or_tags
|
281
318
|
end
|
282
319
|
|
283
320
|
####
|
@@ -305,11 +342,25 @@ module Nokogiri
|
|
305
342
|
end
|
306
343
|
|
307
344
|
####
|
308
|
-
# Set the
|
345
|
+
# Set the inner html for this Node to +node_or_tags+
|
309
346
|
# +node_or_tags+ can be a Nokogiri::XML::Node, a Nokogiri::XML::DocumentFragment, or a string containing markup.
|
310
347
|
#
|
311
348
|
# Returns self.
|
349
|
+
#
|
350
|
+
# Also see related method +children=+
|
312
351
|
def inner_html= node_or_tags
|
352
|
+
self.children = node_or_tags
|
353
|
+
self
|
354
|
+
end
|
355
|
+
|
356
|
+
####
|
357
|
+
# Set the inner html for this Node +node_or_tags+
|
358
|
+
# +node_or_tags+ can be a Nokogiri::XML::Node, a Nokogiri::XML::DocumentFragment, or a string containing markup.
|
359
|
+
#
|
360
|
+
# Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is a DocumentFragment, NodeSet, or string).
|
361
|
+
#
|
362
|
+
# Also see related method +inner_html=+
|
363
|
+
def children= node_or_tags
|
313
364
|
node_or_tags = coerce(node_or_tags)
|
314
365
|
children.unlink
|
315
366
|
if node_or_tags.is_a?(XML::NodeSet)
|
@@ -317,24 +368,32 @@ module Nokogiri
|
|
317
368
|
else
|
318
369
|
add_child node_or_tags
|
319
370
|
end
|
320
|
-
|
371
|
+
node_or_tags
|
321
372
|
end
|
322
373
|
|
323
374
|
####
|
324
375
|
# Replace this Node with +node_or_tags+.
|
325
376
|
# +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a string containing markup.
|
326
377
|
#
|
327
|
-
# Returns the
|
378
|
+
# Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is a DocumentFragment, NodeSet, or string).
|
328
379
|
#
|
329
380
|
# Also see related method +swap+.
|
330
381
|
def replace node_or_tags
|
331
382
|
node_or_tags = coerce(node_or_tags)
|
332
383
|
if node_or_tags.is_a?(XML::NodeSet)
|
333
|
-
|
334
|
-
|
384
|
+
if text?
|
385
|
+
replacee = Nokogiri::XML::Node.new 'dummy', document
|
386
|
+
add_previous_sibling_node replacee
|
387
|
+
unlink
|
388
|
+
else
|
389
|
+
replacee = self
|
390
|
+
end
|
391
|
+
node_or_tags.each { |n| replacee.add_previous_sibling n }
|
392
|
+
replacee.unlink
|
335
393
|
else
|
336
394
|
replace_node node_or_tags
|
337
395
|
end
|
396
|
+
node_or_tags
|
338
397
|
end
|
339
398
|
|
340
399
|
####
|
@@ -430,7 +489,8 @@ module Nokogiri
|
|
430
489
|
# Parse +string_or_io+ as a document fragment within the context of
|
431
490
|
# *this* node. Returns a XML::NodeSet containing the nodes parsed from
|
432
491
|
# +string_or_io+.
|
433
|
-
def parse string_or_io, options =
|
492
|
+
def parse string_or_io, options = nil
|
493
|
+
options ||= (document.html? ? ParseOptions::DEFAULT_HTML : ParseOptions::DEFAULT_XML)
|
434
494
|
if Fixnum === options
|
435
495
|
options = Nokogiri::XML::ParseOptions.new(options)
|
436
496
|
end
|
@@ -442,7 +502,16 @@ module Nokogiri
|
|
442
502
|
string_or_io
|
443
503
|
|
444
504
|
return Nokogiri::XML::NodeSet.new(document) if contents.empty?
|
445
|
-
|
505
|
+
|
506
|
+
##
|
507
|
+
# This is a horrible hack, but I don't care. See #313 for background.
|
508
|
+
error_count = document.errors.length
|
509
|
+
node_set = in_context(contents, options.to_i)
|
510
|
+
if node_set.empty? and document.errors.length > error_count and options.recover?
|
511
|
+
fragment = Nokogiri::HTML::DocumentFragment.parse contents
|
512
|
+
node_set = fragment.children
|
513
|
+
end
|
514
|
+
node_set
|
446
515
|
end
|
447
516
|
|
448
517
|
####
|
@@ -459,7 +528,19 @@ module Nokogiri
|
|
459
528
|
end
|
460
529
|
|
461
530
|
###
|
462
|
-
#
|
531
|
+
# Returns a Hash of {prefix => value} for all namespaces on this
|
532
|
+
# node and its ancestors.
|
533
|
+
#
|
534
|
+
# This method returns the same namespaces as #namespace_scopes.
|
535
|
+
#
|
536
|
+
# Returns namespaces in scope for self -- those defined on self
|
537
|
+
# element directly or any ancestor node -- as a Hash of
|
538
|
+
# attribute-name/value pairs. Note that the keys in this hash
|
539
|
+
# XML attributes that would be used to define this namespace,
|
540
|
+
# such as "xmlns:prefix", not just the prefix. Default namespace
|
541
|
+
# set on self will be included with key "xmlns". However,
|
542
|
+
# default namespaces set on ancestor will NOT be, even if self
|
543
|
+
# has no explicit default namespace.
|
463
544
|
def namespaces
|
464
545
|
Hash[*namespace_scopes.map { |nd|
|
465
546
|
key = ['xmlns', nd.prefix].compact.join(':')
|
@@ -567,14 +648,22 @@ module Nokogiri
|
|
567
648
|
end
|
568
649
|
|
569
650
|
###
|
570
|
-
#
|
651
|
+
# Adds a default namespace supplied as a string +url+ href, to self.
|
652
|
+
# The consequence is as an xmlns attribute with supplied argument were
|
653
|
+
# present in parsed XML. A default namespace set with this method will
|
654
|
+
# now show up in #attributes, but when this node is serialized to XML an
|
655
|
+
# "xmlns" attribute will appear. See also #namespace and #namespace=
|
571
656
|
def default_namespace= url
|
572
657
|
add_namespace_definition(nil, url)
|
573
658
|
end
|
574
659
|
alias :add_namespace :add_namespace_definition
|
575
660
|
|
576
661
|
###
|
577
|
-
# Set the namespace
|
662
|
+
# Set the default namespace on this node (as would be defined with an
|
663
|
+
# "xmlns=" attribute in XML source), as a Namespace object +ns+. Note that
|
664
|
+
# a Namespace added this way will NOT be serialized as an xmlns attribute
|
665
|
+
# for this node. You probably want #default_namespace= instead, or perhaps
|
666
|
+
# #add_namespace_definition with a nil prefix argument.
|
578
667
|
def namespace= ns
|
579
668
|
return set_namespace(ns) unless ns
|
580
669
|
|
@@ -772,6 +861,24 @@ module Nokogiri
|
|
772
861
|
|
773
862
|
private
|
774
863
|
|
864
|
+
def extract_params params # :nodoc:
|
865
|
+
# Pop off our custom function handler if it exists
|
866
|
+
handler = params.find { |param|
|
867
|
+
![Hash, String, Symbol].include?(param.class)
|
868
|
+
}
|
869
|
+
|
870
|
+
params -= [handler] if handler
|
871
|
+
|
872
|
+
hashes = []
|
873
|
+
hashes << params.pop while Hash === params.last || params.last.nil?
|
874
|
+
|
875
|
+
ns, binds = hashes.reverse
|
876
|
+
|
877
|
+
ns ||= document.root ? document.root.namespaces : {}
|
878
|
+
|
879
|
+
[params, handler, ns, binds]
|
880
|
+
end
|
881
|
+
|
775
882
|
def coerce data # :nodoc:
|
776
883
|
return data if data.is_a?(XML::NodeSet)
|
777
884
|
return data.children if data.is_a?(XML::DocumentFragment)
|
@@ -787,6 +894,10 @@ Requires a Node, NodeSet or String argument, and cannot accept a #{data.class}.
|
|
787
894
|
data
|
788
895
|
end
|
789
896
|
|
897
|
+
def implied_xpath_context
|
898
|
+
"./"
|
899
|
+
end
|
900
|
+
|
790
901
|
def inspect_attributes
|
791
902
|
[:name, :namespace, :attribute_nodes, :children]
|
792
903
|
end
|