nokogiri 1.4.3.1-java → 1.4.4-java

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (63) hide show
  1. data/CHANGELOG.ja.rdoc +26 -0
  2. data/CHANGELOG.rdoc +26 -0
  3. data/Manifest.txt +3 -0
  4. data/README.ja.rdoc +0 -4
  5. data/README.rdoc +0 -4
  6. data/Rakefile +1 -0
  7. data/bin/nokogiri +6 -1
  8. data/ext/nokogiri/depend +358 -32
  9. data/ext/nokogiri/extconf.rb +1 -3
  10. data/ext/nokogiri/nokogiri.c +2 -0
  11. data/ext/nokogiri/nokogiri.h +7 -0
  12. data/ext/nokogiri/xml_dtd.c +2 -2
  13. data/ext/nokogiri/xml_io.c +2 -2
  14. data/ext/nokogiri/xml_node.c +31 -6
  15. data/ext/nokogiri/xml_node_set.c +1 -1
  16. data/ext/nokogiri/xml_sax_parser.c +1 -1
  17. data/ext/nokogiri/xml_sax_parser_context.c +40 -0
  18. data/ext/nokogiri/xml_xpath_context.c +33 -2
  19. data/ext/nokogiri/xslt_stylesheet.c +116 -4
  20. data/lib/nokogiri/css/generated_tokenizer.rb +1 -2
  21. data/lib/nokogiri/css/xpath_visitor.rb +15 -7
  22. data/lib/nokogiri/decorators/slop.rb +5 -3
  23. data/lib/nokogiri/ffi/libxml.rb +9 -0
  24. data/lib/nokogiri/ffi/structs/xml_parser_context.rb +2 -1
  25. data/lib/nokogiri/ffi/structs/xml_parser_input.rb +19 -0
  26. data/lib/nokogiri/ffi/xml/dtd.rb +2 -2
  27. data/lib/nokogiri/ffi/xml/node.rb +9 -4
  28. data/lib/nokogiri/ffi/xml/sax/parser_context.rb +12 -0
  29. data/lib/nokogiri/ffi/xml/xpath_context.rb +5 -0
  30. data/lib/nokogiri/ffi/xslt/stylesheet.rb +21 -1
  31. data/lib/nokogiri/html/document.rb +3 -3
  32. data/lib/nokogiri/html/document_fragment.rb +19 -17
  33. data/lib/nokogiri/version.rb +1 -1
  34. data/lib/nokogiri/xml/document.rb +26 -1
  35. data/lib/nokogiri/xml/document_fragment.rb +2 -2
  36. data/lib/nokogiri/xml/dtd.rb +11 -0
  37. data/lib/nokogiri/xml/node.rb +156 -45
  38. data/lib/nokogiri/xml/node_set.rb +2 -2
  39. data/lib/nokogiri/xml/reader.rb +36 -0
  40. data/lib/nokogiri/xml/sax/document.rb +4 -2
  41. data/lib/nokogiri/xslt.rb +9 -5
  42. data/tasks/cross_compile.rb +24 -2
  43. data/test/css/test_parser.rb +29 -18
  44. data/test/decorators/test_slop.rb +16 -0
  45. data/test/html/test_document_fragment.rb +46 -3
  46. data/test/html/test_node.rb +9 -0
  47. data/test/xml/sax/test_parser.rb +11 -3
  48. data/test/xml/sax/test_parser_context.rb +50 -0
  49. data/test/xml/sax/test_push_parser.rb +18 -1
  50. data/test/xml/test_document_fragment.rb +14 -2
  51. data/test/xml/test_dtd.rb +15 -0
  52. data/test/xml/test_node.rb +31 -2
  53. data/test/xml/test_node_reparenting.rb +59 -31
  54. data/test/xml/test_node_set.rb +13 -0
  55. data/test/xml/test_xpath.rb +32 -0
  56. data/test/xslt/test_custom_functions.rb +94 -0
  57. metadata +495 -516
  58. data/ext/nokogiri/libcharset-1.dll +0 -0
  59. data/ext/nokogiri/libexslt.dll +0 -0
  60. data/ext/nokogiri/libiconv-2.dll +0 -0
  61. data/ext/nokogiri/libxml2.dll +0 -0
  62. data/ext/nokogiri/libxslt.dll +0 -0
  63. data/ext/nokogiri/zlib1.dll +0 -0
@@ -1,6 +1,6 @@
1
1
  #--
2
2
  # DO NOT MODIFY!!!!
3
- # This file is automatically generated by rex 1.0.5.beta1
3
+ # This file is automatically generated by rex 1.0.4
4
4
  # from lexical definition file "lib/nokogiri/css/tokenizer.rex".
5
5
  #++
6
6
 
@@ -29,7 +29,6 @@ class GeneratedTokenizer < GeneratedParser
29
29
  scan_setup(str)
30
30
  do_parse
31
31
  end
32
- alias :scan :scan_str
33
32
 
34
33
  def load_file( filename )
35
34
  @filename = filename
@@ -11,18 +11,25 @@ module Nokogiri
11
11
  'child::text()'
12
12
  when /^self\(/
13
13
  "self::#{node.value[1]}"
14
- when /^(eq|nth|nth-of-type|nth-child)\(/
14
+ when /^eq\(/
15
+ "position() = #{node.value[1]}"
16
+ when /^(nth|nth-of-type|nth-child)\(/
15
17
  if node.value[1].is_a?(Nokogiri::CSS::Node) and node.value[1].type == :AN_PLUS_B
16
18
  an_plus_b(node.value[1])
17
19
  else
18
- "position() = " + node.value[1]
20
+ "position() = #{node.value[1]}"
21
+ end
22
+ when /^(nth-last-child|nth-last-of-type)\(/
23
+ if node.value[1].is_a?(Nokogiri::CSS::Node) and node.value[1].type == :AN_PLUS_B
24
+ an_plus_b(node.value[1], :last => true)
25
+ else
26
+ index = node.value[1].to_i - 1
27
+ index == 0 ? "position() = last()" : "position() = last() - #{index}"
19
28
  end
20
29
  when /^(first|first-of-type)\(/
21
30
  "position() = 1"
22
31
  when /^(last|last-of-type)\(/
23
32
  "position() = last()"
24
- when /^(nth-last-child|nth-last-of-type)\(/
25
- "position() = last() - #{node.value[1]}"
26
33
  when /^contains\(/
27
34
  "contains(., #{node.value[1]})"
28
35
  when /^gt\(/
@@ -144,17 +151,18 @@ module Nokogiri
144
151
  end
145
152
 
146
153
  private
147
- def an_plus_b node
154
+ def an_plus_b node, options={}
148
155
  raise ArgumentError, "expected an+b node to contain 4 tokens, but is #{node.value.inspect}" unless node.value.size == 4
149
156
 
150
157
  a = node.value[0].to_i
151
158
  b = node.value[3].to_i
159
+ position = options[:last] ? "(last()-position()+1)" : "position()"
152
160
 
153
161
  if (b == 0)
154
- return "(position() mod #{a}) = 0"
162
+ return "(#{position} mod #{a}) = 0"
155
163
  else
156
164
  compare = (a < 0) ? "<=" : ">="
157
- return "(position() #{compare} #{b}) and (((position()-#{b}) mod #{a.abs}) = 0)"
165
+ return "(#{position} #{compare} #{b}) and (((#{position}-#{b}) mod #{a.abs}) = 0)"
158
166
  end
159
167
  end
160
168
 
@@ -7,20 +7,22 @@ module Nokogiri
7
7
  ###
8
8
  # look for node with +name+. See Nokogiri.Slop
9
9
  def method_missing name, *args, &block
10
+ prefix = implied_xpath_context
11
+
10
12
  if args.empty?
11
- list = xpath("./#{name}")
13
+ list = xpath("#{prefix}#{name.to_s.sub(/^_/, '')}")
12
14
  elsif args.first.is_a? Hash
13
15
  hash = args.first
14
16
  if hash[:css]
15
17
  list = css("#{name}#{hash[:css]}")
16
18
  elsif hash[:xpath]
17
19
  conds = Array(hash[:xpath]).join(' and ')
18
- list = xpath("./#{name}[#{conds}]")
20
+ list = xpath("#{prefix}#{name}[#{conds}]")
19
21
  end
20
22
  else
21
23
  CSS::Parser.without_cache do
22
24
  list = xpath(
23
- *CSS.xpath_for("#{name}#{args.first}", :prefix => "./")
25
+ *CSS.xpath_for("#{name}#{args.first}", :prefix => prefix)
24
26
  )
25
27
  end
26
28
  end
@@ -62,6 +62,9 @@ module Nokogiri
62
62
  callback :cdata_block_sax_func, [:pointer, :string, :int], :void
63
63
  callback :start_element_ns_sax2_func, [:pointer, :pointer, :pointer, :pointer, :int, :pointer, :int, :int, :pointer], :void
64
64
  callback :end_element_ns_sax2_func, [:pointer, :pointer, :pointer, :pointer], :void
65
+ callback :xslt_function_init, [:pointer, :string], :pointer
66
+ callback :xslt_function_shutdown, [:pointer, :string, :pointer], :void
67
+ callback :xslt_function_caller, [:pointer, :int], :void
65
68
 
66
69
  # encoding.c
67
70
  attach_function :xmlFindCharEncodingHandler, [:string], :pointer
@@ -200,6 +203,8 @@ module Nokogiri
200
203
  attach_function :xmlXPathWrapString, [:pointer], :pointer # should take a :string, but we optimize
201
204
  attach_function :xmlXPathNewBoolean, [:int], :pointer
202
205
  attach_function :xmlXPathNewFloat, [:double], :pointer
206
+ attach_function :xmlXPathNewCString, [:string], :pointer
207
+ attach_function :xmlXPathRegisterVariable, [:pointer, :string, :pointer], :int
203
208
 
204
209
  class << self
205
210
  # these functions are implemented as C macros
@@ -272,6 +277,9 @@ module Nokogiri
272
277
  attach_function :xsltApplyStylesheet, [:pointer, :pointer, :pointer], :pointer
273
278
  attach_function :xsltSaveResultToString, [:buffer_out, :buffer_out, :pointer, :pointer], :int
274
279
  attach_function :xsltSetGenericErrorFunc, [:pointer, :generic_error_handler], :void
280
+ attach_function :xsltRegisterExtModule, [:string, :xslt_function_init, :xslt_function_shutdown], :int
281
+ attach_function :xsltRegisterExtFunction, [:pointer, :string, :string, :xslt_function_caller], :int
282
+ attach_function :xsltXPathGetTransformContext, [:pointer], :pointer
275
283
 
276
284
  # exslt.c
277
285
  attach_function :exsltRegisterAll, [], :void
@@ -376,6 +384,7 @@ require 'nokogiri/xml/syntax_error'
376
384
  "structs/xml_element",
377
385
  "structs/xml_entity",
378
386
  "structs/xml_element_content",
387
+ "structs/xml_parser_input",
379
388
  "xml/node",
380
389
  "xml/namespace",
381
390
  "xml/dtd",
@@ -11,7 +11,8 @@ module Nokogiri
11
11
  :version, :string,
12
12
  :encoding, :string,
13
13
  :standalone, :int,
14
- :html, :int
14
+ :html, :int,
15
+ :input, :pointer
15
16
  )
16
17
  end
17
18
  end
@@ -0,0 +1,19 @@
1
+ module Nokogiri
2
+ # :stopdoc:
3
+ module LibXML
4
+ class XmlParserInput < FFI::Struct
5
+ layout(
6
+ :buf, :pointer,
7
+ :filename, :pointer,
8
+ :directory, :pointer,
9
+ :base, :pointer,
10
+ :cur, :pointer,
11
+ :end, :pointer,
12
+ :length, :int,
13
+ :line, :int,
14
+ :col, :int
15
+ )
16
+ end
17
+ end
18
+ # :startdoc:
19
+ end
@@ -53,9 +53,9 @@ module Nokogiri
53
53
 
54
54
  def internal_attributes attr_name
55
55
  attr_ptr = cstruct[attr_name.to_sym]
56
- return nil if attr_ptr.null?
57
-
58
56
  ahash = {}
57
+ return ahash if attr_ptr.null?
58
+
59
59
  LibXML.xmlHashScan(attr_ptr, nil) do |payload, data, name|
60
60
  ahash[name] = Node.wrap(payload)
61
61
  end
@@ -405,17 +405,22 @@ module Nokogiri
405
405
  end
406
406
 
407
407
  def in_context(string, options)
408
- raise RuntimeError, "no contextual parsing on unlinked nodes" if parent.nil?
409
-
410
- @errors = []
411
- LibXML.xmlSetStructuredErrorFunc(nil, SyntaxError.error_array_pusher(@errors))
408
+ errors = []
409
+ LibXML.xmlSetStructuredErrorFunc(nil, SyntaxError.error_array_pusher(errors))
412
410
  LibXML.htmlHandleOmittedElem(0)
413
411
 
414
412
  list_memory = FFI::MemoryPointer.new :pointer
415
413
  LibXML.xmlParseInNodeContext(cstruct, string, string.length, options, list_memory)
416
414
 
415
+ self.document.children.each do |child|
416
+ if child.cstruct[:parent] != cstruct[:doc]
417
+ child.cstruct[:parent] = cstruct[:doc]
418
+ end
419
+ end
420
+
417
421
  LibXML.htmlHandleOmittedElem(1)
418
422
  LibXML.xmlSetStructuredErrorFunc(nil, nil)
423
+ self.document.errors = errors
419
424
 
420
425
  set = NodeSet.wrap(LibXML.xmlXPathNodeSetCreate(nil), document)
421
426
  list_ptr = list_memory.get_pointer(0)
@@ -40,6 +40,18 @@ module Nokogiri
40
40
  pc
41
41
  end
42
42
 
43
+ def line
44
+ return nil if cstruct[:input].null?
45
+ input = LibXML::XmlParserInput.new cstruct[:input]
46
+ input[:line]
47
+ end
48
+
49
+ def column
50
+ return nil if cstruct[:input].null?
51
+ input = LibXML::XmlParserInput.new cstruct[:input]
52
+ input[:col]
53
+ end
54
+
43
55
  def parse_with sax_handler, type = :xml
44
56
  raise ArgumentError unless XML::SAX::Parser === sax_handler
45
57
  sax = sax_handler.cstruct
@@ -8,6 +8,11 @@ module Nokogiri
8
8
  LibXML.xmlXPathRegisterNs(cstruct, prefix, uri)
9
9
  end
10
10
 
11
+ def register_variable(name, value) # :nodoc:
12
+ xml_value = LibXML.xmlXPathNewCString(value);
13
+ LibXML.xmlXPathRegisterVariable(cstruct, name, xml_value);
14
+ end
15
+
11
16
  def evaluate(search_path, xpath_handler=nil) # :nodoc:
12
17
  lookup = nil # to keep lambda in scope long enough to avoid a possible GC tragedy
13
18
  query = search_path.to_s
@@ -1,5 +1,26 @@
1
1
  module Nokogiri
2
2
  module XSLT
3
+ @modules = {}
4
+
5
+ @method_caller = lambda do |context, nargs|
6
+ # TODO
7
+ end
8
+
9
+ @init_func = lambda do |context, uri|
10
+ klass = @modules[uri]
11
+ klass.instance_methods(false).each do |method_name|
12
+ LibXML.xsltRegisterExtFunction(context, method_name, uri, @method_caller)
13
+ end
14
+ klass.new
15
+ end
16
+
17
+ @shutdown_func = lambda do |context, uri, data|
18
+ end
19
+
20
+ def self.register(uri, klass) # :nodoc:
21
+ raise NotImplementedError.new("sorry, you should implement me.")
22
+ end
23
+
3
24
  class Stylesheet
4
25
 
5
26
  attr_accessor :cstruct # :nodoc:
@@ -47,7 +68,6 @@ module Nokogiri
47
68
 
48
69
  XML::Document.wrap(ptr)
49
70
  end
50
-
51
71
  end
52
72
  end
53
73
  end
@@ -13,7 +13,7 @@ module Nokogiri
13
13
  end
14
14
 
15
15
  ###
16
- # Set the meta tag encoding for this document. If there is no meta
16
+ # Set the meta tag encoding for this document. If there is no meta
17
17
  # content tag, nil is returned and the encoding is not set.
18
18
  def meta_encoding= encoding
19
19
  return nil unless meta = css('meta').find { |node|
@@ -38,7 +38,7 @@ module Nokogiri
38
38
  # config.format.as_xml
39
39
  # end
40
40
  #
41
- def serialize options = {}, &block
41
+ def serialize options = {}
42
42
  options[:save_with] ||= XML::Node::SaveOptions::FORMAT |
43
43
  XML::Node::SaveOptions::AS_HTML |
44
44
  XML::Node::SaveOptions::NO_DECLARATION |
@@ -61,7 +61,7 @@ module Nokogiri
61
61
  # is a number that sets options in the parser, such as
62
62
  # Nokogiri::XML::ParseOptions::RECOVER. See the constants in
63
63
  # Nokogiri::XML::ParseOptions.
64
- def parse string_or_io, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML, &block
64
+ def parse string_or_io, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML
65
65
 
66
66
  options = Nokogiri::XML::ParseOptions.new(options) if Fixnum === options
67
67
  # Give the options to the user
@@ -1,6 +1,8 @@
1
1
  module Nokogiri
2
2
  module HTML
3
3
  class DocumentFragment < Nokogiri::XML::DocumentFragment
4
+ attr_accessor :errors
5
+
4
6
  ####
5
7
  # Create a Nokogiri::XML::DocumentFragment from +tags+, using +encoding+
6
8
  def self.parse tags, encoding = nil
@@ -15,24 +17,24 @@ module Nokogiri
15
17
  def initialize document, tags = nil, ctx = nil
16
18
  return self unless tags
17
19
 
18
- children = if ctx
19
- ctx.parse("<div>#{tags.strip}</div>").first.children
20
- else
21
- ###
22
- # This is a horrible hack, but I don't care
23
- if tags.strip =~ /^<body/i
24
- path = "/html/body"
25
- else
26
- path = "/html/body/node()"
27
- end
20
+ if ctx
21
+ preexisting_errors = document.errors.dup
22
+ node_set = ctx.parse("<div>#{tags}</div>")
23
+ node_set.first.children.each { |child| child.parent = self } unless node_set.empty?
24
+ self.errors = document.errors - preexisting_errors
25
+ else
26
+ # This is a horrible hack, but I don't care
27
+ if tags.strip =~ /^<body/i
28
+ path = "/html/body"
29
+ else
30
+ path = "/html/body/node()"
31
+ end
28
32
 
29
- HTML::Document.parse(
30
- "<html><body>#{tags.strip}</body></html>",
31
- nil,
32
- document.encoding
33
- ).xpath(path)
34
- end
35
- children.each { |child| child.parent = self }
33
+ temp_doc = HTML::Document.parse "<html><body>#{tags}", nil, document.encoding
34
+ temp_doc.xpath(path).each { |child| child.parent = self }
35
+ self.errors = temp_doc.errors
36
+ end
37
+ children
36
38
  end
37
39
  end
38
40
  end
@@ -1,6 +1,6 @@
1
1
  module Nokogiri
2
2
  # The version of Nokogiri you are using
3
- VERSION = '1.4.3.1'
3
+ VERSION = '1.4.4'
4
4
 
5
5
  # More complete version information about libxml
6
6
  VERSION_INFO = {}
@@ -113,6 +113,13 @@ module Nokogiri
113
113
  # </root>
114
114
  #
115
115
  # The hash returned will look like this: { 'xmlns:foo' => 'bar' }
116
+ #
117
+ # Non-prefixed default namespaces (as in "xmlns=") are not included
118
+ # in the hash.
119
+ #
120
+ # Note this is a very expensive operation in current implementation, as it
121
+ # traverses the entire graph, and also has to bring each node accross the
122
+ # libxml bridge into a ruby object.
116
123
  def collect_namespaces
117
124
  ns = {}
118
125
  traverse { |j| ns.merge!(j.namespaces) }
@@ -134,7 +141,21 @@ module Nokogiri
134
141
  end
135
142
 
136
143
  ##
137
- # Explore a document with shortcut methods.
144
+ # Explore a document with shortcut methods. See Nokogiri::Slop for details.
145
+ #
146
+ # Note that any nodes that have been instantiated before #slop!
147
+ # is called will not be decorated with sloppy behavior. So, if you're in
148
+ # irb, the preferred idiom is:
149
+ #
150
+ # irb> doc = Nokogiri::Slop my_markup
151
+ #
152
+ # and not
153
+ #
154
+ # irb> doc = Nokogiri::HTML my_markup
155
+ # ... followed by irb's implicit inspect (and therefore instantiation of every node) ...
156
+ # irb> doc.slop!
157
+ # ... which does absolutely nothing.
158
+ #
138
159
  def slop!
139
160
  unless decorators(XML::Node).include? Nokogiri::Decorators::Slop
140
161
  decorators(XML::Node) << Nokogiri::Decorators::Slop
@@ -185,6 +206,10 @@ module Nokogiri
185
206
  alias :<< :add_child
186
207
 
187
208
  private
209
+ def implied_xpath_context
210
+ "/"
211
+ end
212
+
188
213
  def inspect_attributes
189
214
  [:name, :children]
190
215
  end
@@ -11,9 +11,9 @@ module Nokogiri
11
11
  return self unless tags
12
12
 
13
13
  children = if ctx
14
- ctx.parse(tags.strip)
14
+ ctx.parse(tags)
15
15
  else
16
- XML::Document.parse("<root>#{tags.strip}</root>") \
16
+ XML::Document.parse("<root>#{tags}</root>") \
17
17
  .xpath("/root/node()")
18
18
  end
19
19
  children.each { |child| child.parent = self }
@@ -2,10 +2,21 @@ module Nokogiri
2
2
  module XML
3
3
  class DTD < Nokogiri::XML::Node
4
4
  undef_method :attribute_nodes
5
+ undef_method :values
5
6
  undef_method :content
6
7
  undef_method :namespace
7
8
  undef_method :namespace_definitions
8
9
  undef_method :line
10
+
11
+ def keys
12
+ attributes.keys
13
+ end
14
+
15
+ def each &block
16
+ attributes.each { |key, value|
17
+ block.call([key, value])
18
+ }
19
+ end
9
20
  end
10
21
  end
11
22
  end
@@ -95,13 +95,17 @@ module Nokogiri
95
95
  # optional hash of namespaces may be appended.
96
96
  # See Node#xpath and Node#css.
97
97
  def search *paths
98
+ # TODO use paths, handler, ns, binds = extract_params(paths)
98
99
  ns = paths.last.is_a?(Hash) ? paths.pop :
99
100
  (document.root ? document.root.namespaces : {})
101
+
102
+ prefix = "#{implied_xpath_context}/"
103
+
100
104
  xpath(*(paths.map { |path|
101
105
  path = path.to_s
102
106
  path =~ /^(\.\/|\/)/ ? path : CSS.xpath_for(
103
107
  path,
104
- :prefix => ".//",
108
+ :prefix => prefix,
105
109
  :ns => ns
106
110
  )
107
111
  }.flatten.uniq) + [ns])
@@ -109,16 +113,28 @@ module Nokogiri
109
113
  alias :/ :search
110
114
 
111
115
  ###
116
+ # call-seq: xpath *paths, [namespace-bindings, variable-bindings, custom-handler-class]
117
+ #
112
118
  # Search this node for XPath +paths+. +paths+ must be one or more XPath
113
- # queries. A hash of namespaces may be appended. For example:
119
+ # queries.
114
120
  #
115
121
  # node.xpath('.//title')
116
- # node.xpath('.//foo:name', { 'foo' => 'http://example.org/' })
122
+ #
123
+ # A hash of namespace bindings may be appended. For example:
124
+ #
125
+ # node.xpath('.//foo:name', {'foo' => 'http://example.org/'})
117
126
  # node.xpath('.//xmlns:name', node.root.namespaces)
118
127
  #
119
- # Custom XPath functions may also be defined. To define custom functions
120
- # create a class and implement the # function you want to define.
121
- # For example:
128
+ # A hash of variable bindings may also be appended to the namespace bindings. For example:
129
+ #
130
+ # node.xpath('.//address[@domestic=$value]', nil, {:value => 'Yes'})
131
+ #
132
+ # Custom XPath functions may also be defined. To define custom
133
+ # functions create a class and implement the function you want
134
+ # to define. The first argument to the method will be the
135
+ # current matching NodeSet. Any other arguments are ones that
136
+ # you pass in. Note that this class may appear anywhere in the
137
+ # argument list. For example:
122
138
  #
123
139
  # node.xpath('.//title[regex(., "\w+")]', Class.new {
124
140
  # def regex node_set, regex
@@ -127,19 +143,18 @@ module Nokogiri
127
143
  # }.new)
128
144
  #
129
145
  def xpath *paths
130
- # Pop off our custom function handler if it exists
131
- handler = ![
132
- Hash, String, Symbol
133
- ].include?(paths.last.class) ? paths.pop : nil
134
-
135
- ns = paths.last.is_a?(Hash) ? paths.pop :
136
- (document.root ? document.root.namespaces : {})
137
-
138
146
  return NodeSet.new(document) unless document
139
147
 
148
+ paths, handler, ns, binds = extract_params(paths)
149
+
140
150
  sets = paths.map { |path|
141
151
  ctx = XPathContext.new(self)
142
152
  ctx.register_namespaces(ns)
153
+
154
+ binds.each do |key,value|
155
+ ctx.register_variable key.to_s, value
156
+ end if binds
157
+
143
158
  ctx.evaluate(path, handler)
144
159
  }
145
160
  return sets.first if sets.length == 1
@@ -154,18 +169,24 @@ module Nokogiri
154
169
  end
155
170
 
156
171
  ###
172
+ # call-seq: css *rules, [namespace-bindings, custom-pseudo-class]
173
+ #
157
174
  # Search this node for CSS +rules+. +rules+ must be one or more CSS
158
- # selectors. For example:
175
+ # selectors. For example:
159
176
  #
160
177
  # node.css('title')
161
178
  # node.css('body h1.bold')
162
179
  # node.css('div + p.green', 'div#one')
163
180
  #
164
- # Custom CSS pseudo classes may also be defined. To define custom pseudo
165
- # classes, create a class and implement the custom pseudo class you
166
- # want defined. The first argument to the method will be the current
167
- # matching NodeSet. Any other arguments are ones that you pass in.
168
- # For example:
181
+ # A hash of namespace bindings may be appended. For example:
182
+ #
183
+ # node.css('bike|tire', {'bike' => 'http://schwinn.com/'})
184
+ #
185
+ # Custom CSS pseudo classes may also be defined. To define
186
+ # custom pseudo classes, create a class and implement the custom
187
+ # pseudo class you want defined. The first argument to the
188
+ # method will be the current matching NodeSet. Any other
189
+ # arguments are ones that you pass in. For example:
169
190
  #
170
191
  # node.css('title:regex("\w+")', Class.new {
171
192
  # def regex node_set, regex
@@ -173,18 +194,21 @@ module Nokogiri
173
194
  # end
174
195
  # }.new)
175
196
  #
197
+ # Note that the CSS query string is case-sensitive with regards
198
+ # to your document type. That is, if you're looking for "H1" in
199
+ # an HTML document, you'll never find anything, since HTML tags
200
+ # will match only lowercase CSS queries. However, "H1" might be
201
+ # found in an XML document, where tags names are case-sensitive
202
+ # (e.g., "H1" is distinct from "h1").
203
+ #
176
204
  def css *rules
177
- # Pop off our custom function handler if it exists
178
- handler = ![
179
- Hash, String, Symbol
180
- ].include?(rules.last.class) ? rules.pop : nil
205
+ rules, handler, ns, binds = extract_params(rules)
181
206
 
182
- ns = rules.last.is_a?(Hash) ? rules.pop :
183
- (document.root ? document.root.namespaces : {})
207
+ prefix = "#{implied_xpath_context}/"
184
208
 
185
209
  rules = rules.map { |rule|
186
- CSS.xpath_for(rule, :prefix => ".//", :ns => ns)
187
- }.flatten.uniq + [ns, handler].compact
210
+ CSS.xpath_for(rule, :prefix => prefix, :ns => ns)
211
+ }.flatten.uniq + [ns, handler, binds].compact
188
212
 
189
213
  xpath(*rules)
190
214
  end
@@ -234,7 +258,7 @@ module Nokogiri
234
258
  # Add +node_or_tags+ as a child of this Node.
235
259
  # +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a string containing markup.
236
260
  #
237
- # Returns the new child node.
261
+ # Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is a DocumentFragment, NodeSet, or string).
238
262
  def add_child node_or_tags
239
263
  node_or_tags = coerce(node_or_tags)
240
264
  if node_or_tags.is_a?(XML::NodeSet)
@@ -242,42 +266,55 @@ module Nokogiri
242
266
  else
243
267
  add_child_node node_or_tags
244
268
  end
269
+ node_or_tags
245
270
  end
246
271
 
247
272
  ###
248
273
  # Insert +node_or_tags+ before this Node (as a sibling).
249
274
  # +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a string containing markup.
250
275
  #
251
- # Returns the new sibling node.
276
+ # Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is a DocumentFragment, NodeSet, or string).
252
277
  #
253
278
  # Also see related method +before+.
254
279
  def add_previous_sibling node_or_tags
255
280
  node_or_tags = coerce(node_or_tags)
256
281
  if node_or_tags.is_a?(XML::NodeSet)
257
- node_or_tags.each { |n| add_previous_sibling_node n }
282
+ if text?
283
+ pivot = Nokogiri::XML::Node.new 'dummy', document
284
+ add_previous_sibling_node pivot
285
+ else
286
+ pivot = self
287
+ end
288
+ node_or_tags.each { |n| pivot.send :add_previous_sibling_node, n }
289
+ pivot.unlink if text?
258
290
  else
259
291
  add_previous_sibling_node node_or_tags
260
292
  end
293
+ node_or_tags
261
294
  end
262
295
 
263
296
  ###
264
297
  # Insert +node_or_tags+ after this Node (as a sibling).
265
298
  # +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a string containing markup.
266
299
  #
267
- # Returns the new sibling node.
300
+ # Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is a DocumentFragment, NodeSet, or string).
268
301
  #
269
302
  # Also see related method +after+.
270
303
  def add_next_sibling node_or_tags
271
304
  node_or_tags = coerce(node_or_tags)
272
305
  if node_or_tags.is_a?(XML::NodeSet)
273
- if '1.8.6' == RUBY_VERSION
274
- node_or_tags.reverse.each { |n| add_next_sibling_node n }
306
+ if text?
307
+ pivot = Nokogiri::XML::Node.new 'dummy', document
308
+ add_next_sibling_node pivot
275
309
  else
276
- node_or_tags.reverse_each { |n| add_next_sibling_node n }
310
+ pivot = self
277
311
  end
312
+ node_or_tags.reverse.each { |n| pivot.send :add_next_sibling_node, n }
313
+ pivot.unlink if text?
278
314
  else
279
315
  add_next_sibling_node node_or_tags
280
316
  end
317
+ node_or_tags
281
318
  end
282
319
 
283
320
  ####
@@ -305,11 +342,25 @@ module Nokogiri
305
342
  end
306
343
 
307
344
  ####
308
- # Set the inner_html for this Node to +node_or_tags+
345
+ # Set the inner html for this Node to +node_or_tags+
309
346
  # +node_or_tags+ can be a Nokogiri::XML::Node, a Nokogiri::XML::DocumentFragment, or a string containing markup.
310
347
  #
311
348
  # Returns self.
349
+ #
350
+ # Also see related method +children=+
312
351
  def inner_html= node_or_tags
352
+ self.children = node_or_tags
353
+ self
354
+ end
355
+
356
+ ####
357
+ # Set the inner html for this Node +node_or_tags+
358
+ # +node_or_tags+ can be a Nokogiri::XML::Node, a Nokogiri::XML::DocumentFragment, or a string containing markup.
359
+ #
360
+ # Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is a DocumentFragment, NodeSet, or string).
361
+ #
362
+ # Also see related method +inner_html=+
363
+ def children= node_or_tags
313
364
  node_or_tags = coerce(node_or_tags)
314
365
  children.unlink
315
366
  if node_or_tags.is_a?(XML::NodeSet)
@@ -317,24 +368,32 @@ module Nokogiri
317
368
  else
318
369
  add_child node_or_tags
319
370
  end
320
- self
371
+ node_or_tags
321
372
  end
322
373
 
323
374
  ####
324
375
  # Replace this Node with +node_or_tags+.
325
376
  # +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a string containing markup.
326
377
  #
327
- # Returns the new child node.
378
+ # Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is a DocumentFragment, NodeSet, or string).
328
379
  #
329
380
  # Also see related method +swap+.
330
381
  def replace node_or_tags
331
382
  node_or_tags = coerce(node_or_tags)
332
383
  if node_or_tags.is_a?(XML::NodeSet)
333
- node_or_tags.each { |n| add_previous_sibling n }
334
- unlink
384
+ if text?
385
+ replacee = Nokogiri::XML::Node.new 'dummy', document
386
+ add_previous_sibling_node replacee
387
+ unlink
388
+ else
389
+ replacee = self
390
+ end
391
+ node_or_tags.each { |n| replacee.add_previous_sibling n }
392
+ replacee.unlink
335
393
  else
336
394
  replace_node node_or_tags
337
395
  end
396
+ node_or_tags
338
397
  end
339
398
 
340
399
  ####
@@ -430,7 +489,8 @@ module Nokogiri
430
489
  # Parse +string_or_io+ as a document fragment within the context of
431
490
  # *this* node. Returns a XML::NodeSet containing the nodes parsed from
432
491
  # +string_or_io+.
433
- def parse string_or_io, options = ParseOptions::DEFAULT_XML
492
+ def parse string_or_io, options = nil
493
+ options ||= (document.html? ? ParseOptions::DEFAULT_HTML : ParseOptions::DEFAULT_XML)
434
494
  if Fixnum === options
435
495
  options = Nokogiri::XML::ParseOptions.new(options)
436
496
  end
@@ -442,7 +502,16 @@ module Nokogiri
442
502
  string_or_io
443
503
 
444
504
  return Nokogiri::XML::NodeSet.new(document) if contents.empty?
445
- in_context(contents, options.to_i)
505
+
506
+ ##
507
+ # This is a horrible hack, but I don't care. See #313 for background.
508
+ error_count = document.errors.length
509
+ node_set = in_context(contents, options.to_i)
510
+ if node_set.empty? and document.errors.length > error_count and options.recover?
511
+ fragment = Nokogiri::HTML::DocumentFragment.parse contents
512
+ node_set = fragment.children
513
+ end
514
+ node_set
446
515
  end
447
516
 
448
517
  ####
@@ -459,7 +528,19 @@ module Nokogiri
459
528
  end
460
529
 
461
530
  ###
462
- # Get a hash containing the Namespace definitions for this Node
531
+ # Returns a Hash of {prefix => value} for all namespaces on this
532
+ # node and its ancestors.
533
+ #
534
+ # This method returns the same namespaces as #namespace_scopes.
535
+ #
536
+ # Returns namespaces in scope for self -- those defined on self
537
+ # element directly or any ancestor node -- as a Hash of
538
+ # attribute-name/value pairs. Note that the keys in this hash
539
+ # XML attributes that would be used to define this namespace,
540
+ # such as "xmlns:prefix", not just the prefix. Default namespace
541
+ # set on self will be included with key "xmlns". However,
542
+ # default namespaces set on ancestor will NOT be, even if self
543
+ # has no explicit default namespace.
463
544
  def namespaces
464
545
  Hash[*namespace_scopes.map { |nd|
465
546
  key = ['xmlns', nd.prefix].compact.join(':')
@@ -567,14 +648,22 @@ module Nokogiri
567
648
  end
568
649
 
569
650
  ###
570
- # Set the default namespace for this node to +url+
651
+ # Adds a default namespace supplied as a string +url+ href, to self.
652
+ # The consequence is as an xmlns attribute with supplied argument were
653
+ # present in parsed XML. A default namespace set with this method will
654
+ # now show up in #attributes, but when this node is serialized to XML an
655
+ # "xmlns" attribute will appear. See also #namespace and #namespace=
571
656
  def default_namespace= url
572
657
  add_namespace_definition(nil, url)
573
658
  end
574
659
  alias :add_namespace :add_namespace_definition
575
660
 
576
661
  ###
577
- # Set the namespace for this node to +ns+
662
+ # Set the default namespace on this node (as would be defined with an
663
+ # "xmlns=" attribute in XML source), as a Namespace object +ns+. Note that
664
+ # a Namespace added this way will NOT be serialized as an xmlns attribute
665
+ # for this node. You probably want #default_namespace= instead, or perhaps
666
+ # #add_namespace_definition with a nil prefix argument.
578
667
  def namespace= ns
579
668
  return set_namespace(ns) unless ns
580
669
 
@@ -772,6 +861,24 @@ module Nokogiri
772
861
 
773
862
  private
774
863
 
864
+ def extract_params params # :nodoc:
865
+ # Pop off our custom function handler if it exists
866
+ handler = params.find { |param|
867
+ ![Hash, String, Symbol].include?(param.class)
868
+ }
869
+
870
+ params -= [handler] if handler
871
+
872
+ hashes = []
873
+ hashes << params.pop while Hash === params.last || params.last.nil?
874
+
875
+ ns, binds = hashes.reverse
876
+
877
+ ns ||= document.root ? document.root.namespaces : {}
878
+
879
+ [params, handler, ns, binds]
880
+ end
881
+
775
882
  def coerce data # :nodoc:
776
883
  return data if data.is_a?(XML::NodeSet)
777
884
  return data.children if data.is_a?(XML::DocumentFragment)
@@ -787,6 +894,10 @@ Requires a Node, NodeSet or String argument, and cannot accept a #{data.class}.
787
894
  data
788
895
  end
789
896
 
897
+ def implied_xpath_context
898
+ "./"
899
+ end
900
+
790
901
  def inspect_attributes
791
902
  [:name, :namespace, :attribute_nodes, :children]
792
903
  end