nokogiri 1.5.0.beta.2 → 1.5.0.beta.3

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (107) hide show
  1. data/CHANGELOG.ja.rdoc +63 -0
  2. data/CHANGELOG.rdoc +44 -1
  3. data/Manifest.txt +3 -3
  4. data/README.ja.rdoc +4 -4
  5. data/README.rdoc +4 -4
  6. data/Rakefile +3 -0
  7. data/bin/nokogiri +6 -1
  8. data/ext/java/nokogiri/EncodingHandler.java +32 -0
  9. data/ext/java/nokogiri/HtmlDocument.java +36 -0
  10. data/ext/java/nokogiri/HtmlElementDescription.java +34 -0
  11. data/ext/java/nokogiri/HtmlEntityLookup.java +34 -0
  12. data/ext/java/nokogiri/HtmlSaxParserContext.java +41 -3
  13. data/ext/java/nokogiri/NokogiriService.java +109 -13
  14. data/ext/java/nokogiri/XmlAttr.java +40 -4
  15. data/ext/java/nokogiri/XmlAttributeDecl.java +32 -0
  16. data/ext/java/nokogiri/XmlCdata.java +41 -2
  17. data/ext/java/nokogiri/XmlComment.java +38 -1
  18. data/ext/java/nokogiri/XmlDocument.java +56 -11
  19. data/ext/java/nokogiri/XmlDocumentFragment.java +39 -30
  20. data/ext/java/nokogiri/XmlDtd.java +37 -0
  21. data/ext/java/nokogiri/XmlElement.java +51 -2
  22. data/ext/java/nokogiri/XmlElementContent.java +32 -0
  23. data/ext/java/nokogiri/XmlElementDecl.java +32 -0
  24. data/ext/java/nokogiri/XmlEntityDecl.java +32 -0
  25. data/ext/java/nokogiri/XmlEntityReference.java +35 -2
  26. data/ext/java/nokogiri/XmlNamespace.java +55 -5
  27. data/ext/java/nokogiri/XmlNode.java +129 -136
  28. data/ext/java/nokogiri/XmlNodeSet.java +36 -0
  29. data/ext/java/nokogiri/XmlProcessingInstruction.java +34 -1
  30. data/ext/java/nokogiri/XmlReader.java +36 -0
  31. data/ext/java/nokogiri/XmlRelaxng.java +34 -1
  32. data/ext/java/nokogiri/XmlSaxParserContext.java +52 -7
  33. data/ext/java/nokogiri/XmlSaxPushParser.java +36 -0
  34. data/ext/java/nokogiri/XmlSchema.java +34 -1
  35. data/ext/java/nokogiri/XmlSyntaxError.java +48 -18
  36. data/ext/java/nokogiri/XmlText.java +45 -6
  37. data/ext/java/nokogiri/XmlXpathContext.java +45 -0
  38. data/ext/java/nokogiri/XsltStylesheet.java +58 -3
  39. data/ext/java/nokogiri/internals/HtmlDomParserContext.java +50 -26
  40. data/ext/java/nokogiri/internals/NokogiriDocumentCache.java +35 -1
  41. data/ext/java/nokogiri/internals/NokogiriErrorHandler.java +51 -13
  42. data/ext/java/nokogiri/internals/NokogiriHandler.java +70 -21
  43. data/ext/java/nokogiri/internals/NokogiriHelpers.java +95 -38
  44. data/ext/java/nokogiri/internals/NokogiriNamespaceCache.java +37 -3
  45. data/ext/java/nokogiri/internals/NokogiriNamespaceContext.java +39 -1
  46. data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler.java +43 -7
  47. data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler4NekoHtml.java +77 -10
  48. data/ext/java/nokogiri/internals/NokogiriStrictErrorHandler.java +49 -20
  49. data/ext/java/nokogiri/internals/NokogiriXPathFunction.java +34 -2
  50. data/ext/java/nokogiri/internals/NokogiriXPathFunctionResolver.java +34 -1
  51. data/ext/java/nokogiri/internals/ParserContext.java +32 -0
  52. data/ext/java/nokogiri/internals/PushInputStream.java +33 -3
  53. data/ext/java/nokogiri/internals/ReaderNode.java +50 -8
  54. data/ext/java/nokogiri/internals/SaveContext.java +35 -2
  55. data/ext/java/nokogiri/internals/SchemaErrorHandler.java +34 -1
  56. data/ext/java/nokogiri/internals/XmlDeclHandler.java +32 -0
  57. data/ext/java/nokogiri/internals/XmlDomParser.java +32 -0
  58. data/ext/java/nokogiri/internals/XmlDomParserContext.java +43 -11
  59. data/ext/java/nokogiri/internals/XmlSaxParser.java +32 -0
  60. data/ext/java/nokogiri/internals/XsltExtensionFunction.java +72 -0
  61. data/ext/nokogiri/depend +358 -32
  62. data/ext/nokogiri/extconf.rb +1 -3
  63. data/ext/nokogiri/nokogiri.c +2 -0
  64. data/ext/nokogiri/nokogiri.h +7 -0
  65. data/ext/nokogiri/xml_dtd.c +2 -2
  66. data/ext/nokogiri/xml_io.c +2 -2
  67. data/ext/nokogiri/xml_node.c +31 -6
  68. data/ext/nokogiri/xml_node_set.c +1 -1
  69. data/ext/nokogiri/xml_sax_parser.c +1 -1
  70. data/ext/nokogiri/xml_sax_parser_context.c +40 -0
  71. data/ext/nokogiri/xml_xpath_context.c +33 -2
  72. data/ext/nokogiri/xslt_stylesheet.c +122 -6
  73. data/lib/nokogiri.rb +12 -5
  74. data/lib/nokogiri/css/generated_tokenizer.rb +1 -2
  75. data/lib/nokogiri/css/xpath_visitor.rb +15 -7
  76. data/lib/nokogiri/decorators/slop.rb +5 -3
  77. data/lib/nokogiri/html/document.rb +3 -3
  78. data/lib/nokogiri/html/document_fragment.rb +19 -17
  79. data/lib/nokogiri/version.rb +1 -1
  80. data/lib/nokogiri/xml/document.rb +26 -1
  81. data/lib/nokogiri/xml/document_fragment.rb +2 -2
  82. data/lib/nokogiri/xml/dtd.rb +11 -0
  83. data/lib/nokogiri/xml/node.rb +156 -45
  84. data/lib/nokogiri/xml/node_set.rb +2 -2
  85. data/lib/nokogiri/xml/reader.rb +36 -0
  86. data/lib/nokogiri/xml/sax/document.rb +4 -2
  87. data/lib/nokogiri/xslt.rb +9 -5
  88. data/lib/nokogiri/xslt/stylesheet.rb +1 -1
  89. data/tasks/cross_compile.rb +27 -8
  90. data/test/css/test_parser.rb +29 -18
  91. data/test/decorators/test_slop.rb +16 -0
  92. data/test/html/test_document_fragment.rb +46 -3
  93. data/test/html/test_node.rb +9 -0
  94. data/test/xml/sax/test_parser.rb +11 -3
  95. data/test/xml/sax/test_parser_context.rb +50 -0
  96. data/test/xml/sax/test_push_parser.rb +18 -1
  97. data/test/xml/test_document_fragment.rb +15 -8
  98. data/test/xml/test_dtd.rb +15 -0
  99. data/test/xml/test_node.rb +31 -2
  100. data/test/xml/test_node_reparenting.rb +59 -31
  101. data/test/xml/test_node_set.rb +13 -0
  102. data/test/xml/test_xpath.rb +32 -0
  103. data/test/xslt/test_custom_functions.rb +94 -0
  104. metadata +83 -81
  105. data/lib/nokogiri/nokogiri.jar +0 -0
  106. data/spec/helper.rb +0 -3
  107. data/spec/xml/reader_spec.rb +0 -307
@@ -7,11 +7,18 @@ ENV['PATH'] = [File.expand_path(
7
7
  ), ENV['PATH']].compact.join(';') if RbConfig::CONFIG['host_os'] =~ /(mswin|mingw)/i
8
8
 
9
9
  if defined?(RUBY_ENGINE) && RUBY_ENGINE == "jruby"
10
- require 'isorelax.jar'
11
- require 'jing.jar'
12
- require 'nekohtml.jar'
13
- require 'nekodtd.jar'
14
- require 'xercesImpl.jar'
10
+ # If JRuby::Rack::VERSION is defined, Nokogiri is in a servlet.
11
+ # If AppEngine::ApiProxy is defined, Nokogiri is on Google App Egnine.
12
+ # These two cases don't need to require jar archives because those
13
+ # should be in WEB-INF/lib and already set in the classpath by
14
+ # a servlet container.
15
+ unless defined?(JRuby::Rack::VERSION) || defined?(AppEngine::ApiProxy)
16
+ require 'isorelax.jar'
17
+ require 'jing.jar'
18
+ require 'nekohtml.jar'
19
+ require 'nekodtd.jar'
20
+ require 'xercesImpl.jar'
21
+ end
15
22
  require 'nokogiri/nokogiri'
16
23
  else
17
24
  require 'nokogiri/nokogiri'
@@ -1,6 +1,6 @@
1
1
  #--
2
2
  # DO NOT MODIFY!!!!
3
- # This file is automatically generated by rex 1.0.5.beta1
3
+ # This file is automatically generated by rex 1.0.4
4
4
  # from lexical definition file "lib/nokogiri/css/tokenizer.rex".
5
5
  #++
6
6
 
@@ -29,7 +29,6 @@ class GeneratedTokenizer < GeneratedParser
29
29
  scan_setup(str)
30
30
  do_parse
31
31
  end
32
- alias :scan :scan_str
33
32
 
34
33
  def load_file( filename )
35
34
  @filename = filename
@@ -11,18 +11,25 @@ module Nokogiri
11
11
  'child::text()'
12
12
  when /^self\(/
13
13
  "self::#{node.value[1]}"
14
- when /^(eq|nth|nth-of-type|nth-child)\(/
14
+ when /^eq\(/
15
+ "position() = #{node.value[1]}"
16
+ when /^(nth|nth-of-type|nth-child)\(/
15
17
  if node.value[1].is_a?(Nokogiri::CSS::Node) and node.value[1].type == :AN_PLUS_B
16
18
  an_plus_b(node.value[1])
17
19
  else
18
- "position() = " + node.value[1]
20
+ "position() = #{node.value[1]}"
21
+ end
22
+ when /^(nth-last-child|nth-last-of-type)\(/
23
+ if node.value[1].is_a?(Nokogiri::CSS::Node) and node.value[1].type == :AN_PLUS_B
24
+ an_plus_b(node.value[1], :last => true)
25
+ else
26
+ index = node.value[1].to_i - 1
27
+ index == 0 ? "position() = last()" : "position() = last() - #{index}"
19
28
  end
20
29
  when /^(first|first-of-type)\(/
21
30
  "position() = 1"
22
31
  when /^(last|last-of-type)\(/
23
32
  "position() = last()"
24
- when /^(nth-last-child|nth-last-of-type)\(/
25
- "position() = last() - #{node.value[1]}"
26
33
  when /^contains\(/
27
34
  "contains(., #{node.value[1]})"
28
35
  when /^gt\(/
@@ -144,17 +151,18 @@ module Nokogiri
144
151
  end
145
152
 
146
153
  private
147
- def an_plus_b node
154
+ def an_plus_b node, options={}
148
155
  raise ArgumentError, "expected an+b node to contain 4 tokens, but is #{node.value.inspect}" unless node.value.size == 4
149
156
 
150
157
  a = node.value[0].to_i
151
158
  b = node.value[3].to_i
159
+ position = options[:last] ? "(last()-position()+1)" : "position()"
152
160
 
153
161
  if (b == 0)
154
- return "(position() mod #{a}) = 0"
162
+ return "(#{position} mod #{a}) = 0"
155
163
  else
156
164
  compare = (a < 0) ? "<=" : ">="
157
- return "(position() #{compare} #{b}) and (((position()-#{b}) mod #{a.abs}) = 0)"
165
+ return "(#{position} #{compare} #{b}) and (((#{position}-#{b}) mod #{a.abs}) = 0)"
158
166
  end
159
167
  end
160
168
 
@@ -7,20 +7,22 @@ module Nokogiri
7
7
  ###
8
8
  # look for node with +name+. See Nokogiri.Slop
9
9
  def method_missing name, *args, &block
10
+ prefix = implied_xpath_context
11
+
10
12
  if args.empty?
11
- list = xpath("./#{name}")
13
+ list = xpath("#{prefix}#{name.to_s.sub(/^_/, '')}")
12
14
  elsif args.first.is_a? Hash
13
15
  hash = args.first
14
16
  if hash[:css]
15
17
  list = css("#{name}#{hash[:css]}")
16
18
  elsif hash[:xpath]
17
19
  conds = Array(hash[:xpath]).join(' and ')
18
- list = xpath("./#{name}[#{conds}]")
20
+ list = xpath("#{prefix}#{name}[#{conds}]")
19
21
  end
20
22
  else
21
23
  CSS::Parser.without_cache do
22
24
  list = xpath(
23
- *CSS.xpath_for("#{name}#{args.first}", :prefix => "./")
25
+ *CSS.xpath_for("#{name}#{args.first}", :prefix => prefix)
24
26
  )
25
27
  end
26
28
  end
@@ -13,7 +13,7 @@ module Nokogiri
13
13
  end
14
14
 
15
15
  ###
16
- # Set the meta tag encoding for this document. If there is no meta
16
+ # Set the meta tag encoding for this document. If there is no meta
17
17
  # content tag, nil is returned and the encoding is not set.
18
18
  def meta_encoding= encoding
19
19
  return nil unless meta = css('meta').find { |node|
@@ -38,7 +38,7 @@ module Nokogiri
38
38
  # config.format.as_xml
39
39
  # end
40
40
  #
41
- def serialize options = {}, &block
41
+ def serialize options = {}
42
42
  options[:save_with] ||= XML::Node::SaveOptions::FORMAT |
43
43
  XML::Node::SaveOptions::AS_HTML |
44
44
  XML::Node::SaveOptions::NO_DECLARATION |
@@ -61,7 +61,7 @@ module Nokogiri
61
61
  # is a number that sets options in the parser, such as
62
62
  # Nokogiri::XML::ParseOptions::RECOVER. See the constants in
63
63
  # Nokogiri::XML::ParseOptions.
64
- def parse string_or_io, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML, &block
64
+ def parse string_or_io, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML
65
65
 
66
66
  options = Nokogiri::XML::ParseOptions.new(options) if Fixnum === options
67
67
  # Give the options to the user
@@ -1,6 +1,8 @@
1
1
  module Nokogiri
2
2
  module HTML
3
3
  class DocumentFragment < Nokogiri::XML::DocumentFragment
4
+ attr_accessor :errors
5
+
4
6
  ####
5
7
  # Create a Nokogiri::XML::DocumentFragment from +tags+, using +encoding+
6
8
  def self.parse tags, encoding = nil
@@ -15,24 +17,24 @@ module Nokogiri
15
17
  def initialize document, tags = nil, ctx = nil
16
18
  return self unless tags
17
19
 
18
- children = if ctx
19
- ctx.parse("<div>#{tags.strip}</div>").first.children
20
- else
21
- ###
22
- # This is a horrible hack, but I don't care
23
- if tags.strip =~ /^<body/i
24
- path = "/html/body"
25
- else
26
- path = "/html/body/node()"
27
- end
20
+ if ctx
21
+ preexisting_errors = document.errors.dup
22
+ node_set = ctx.parse("<div>#{tags}</div>")
23
+ node_set.first.children.each { |child| child.parent = self } unless node_set.empty?
24
+ self.errors = document.errors - preexisting_errors
25
+ else
26
+ # This is a horrible hack, but I don't care
27
+ if tags.strip =~ /^<body/i
28
+ path = "/html/body"
29
+ else
30
+ path = "/html/body/node()"
31
+ end
28
32
 
29
- HTML::Document.parse(
30
- "<html><body>#{tags.strip}</body></html>",
31
- nil,
32
- document.encoding
33
- ).xpath(path)
34
- end
35
- children.each { |child| child.parent = self }
33
+ temp_doc = HTML::Document.parse "<html><body>#{tags}", nil, document.encoding
34
+ temp_doc.xpath(path).each { |child| child.parent = self }
35
+ self.errors = temp_doc.errors
36
+ end
37
+ children
36
38
  end
37
39
  end
38
40
  end
@@ -1,6 +1,6 @@
1
1
  module Nokogiri
2
2
  # The version of Nokogiri you are using
3
- VERSION = '1.5.0.beta.2'
3
+ VERSION = '1.5.0.beta.3'
4
4
 
5
5
  # More complete version information about libxml
6
6
  VERSION_INFO = {}
@@ -113,6 +113,13 @@ module Nokogiri
113
113
  # </root>
114
114
  #
115
115
  # The hash returned will look like this: { 'xmlns:foo' => 'bar' }
116
+ #
117
+ # Non-prefixed default namespaces (as in "xmlns=") are not included
118
+ # in the hash.
119
+ #
120
+ # Note this is a very expensive operation in current implementation, as it
121
+ # traverses the entire graph, and also has to bring each node accross the
122
+ # libxml bridge into a ruby object.
116
123
  def collect_namespaces
117
124
  ns = {}
118
125
  traverse { |j| ns.merge!(j.namespaces) }
@@ -134,7 +141,21 @@ module Nokogiri
134
141
  end
135
142
 
136
143
  ##
137
- # Explore a document with shortcut methods.
144
+ # Explore a document with shortcut methods. See Nokogiri::Slop for details.
145
+ #
146
+ # Note that any nodes that have been instantiated before #slop!
147
+ # is called will not be decorated with sloppy behavior. So, if you're in
148
+ # irb, the preferred idiom is:
149
+ #
150
+ # irb> doc = Nokogiri::Slop my_markup
151
+ #
152
+ # and not
153
+ #
154
+ # irb> doc = Nokogiri::HTML my_markup
155
+ # ... followed by irb's implicit inspect (and therefore instantiation of every node) ...
156
+ # irb> doc.slop!
157
+ # ... which does absolutely nothing.
158
+ #
138
159
  def slop!
139
160
  unless decorators(XML::Node).include? Nokogiri::Decorators::Slop
140
161
  decorators(XML::Node) << Nokogiri::Decorators::Slop
@@ -185,6 +206,10 @@ module Nokogiri
185
206
  alias :<< :add_child
186
207
 
187
208
  private
209
+ def implied_xpath_context
210
+ "/"
211
+ end
212
+
188
213
  def inspect_attributes
189
214
  [:name, :children]
190
215
  end
@@ -11,9 +11,9 @@ module Nokogiri
11
11
  return self unless tags
12
12
 
13
13
  children = if ctx
14
- ctx.parse(tags.strip)
14
+ ctx.parse(tags)
15
15
  else
16
- XML::Document.parse("<root>#{tags.strip}</root>") \
16
+ XML::Document.parse("<root>#{tags}</root>") \
17
17
  .xpath("/root/node()")
18
18
  end
19
19
  children.each { |child| child.parent = self }
@@ -2,10 +2,21 @@ module Nokogiri
2
2
  module XML
3
3
  class DTD < Nokogiri::XML::Node
4
4
  undef_method :attribute_nodes
5
+ undef_method :values
5
6
  undef_method :content
6
7
  undef_method :namespace
7
8
  undef_method :namespace_definitions
8
9
  undef_method :line if method_defined?(:line)
10
+
11
+ def keys
12
+ attributes.keys
13
+ end
14
+
15
+ def each &block
16
+ attributes.each { |key, value|
17
+ block.call([key, value])
18
+ }
19
+ end
9
20
  end
10
21
  end
11
22
  end
@@ -95,13 +95,17 @@ module Nokogiri
95
95
  # optional hash of namespaces may be appended.
96
96
  # See Node#xpath and Node#css.
97
97
  def search *paths
98
+ # TODO use paths, handler, ns, binds = extract_params(paths)
98
99
  ns = paths.last.is_a?(Hash) ? paths.pop :
99
100
  (document.root ? document.root.namespaces : {})
101
+
102
+ prefix = "#{implied_xpath_context}/"
103
+
100
104
  xpath(*(paths.map { |path|
101
105
  path = path.to_s
102
106
  path =~ /^(\.\/|\/)/ ? path : CSS.xpath_for(
103
107
  path,
104
- :prefix => ".//",
108
+ :prefix => prefix,
105
109
  :ns => ns
106
110
  )
107
111
  }.flatten.uniq) + [ns])
@@ -109,16 +113,28 @@ module Nokogiri
109
113
  alias :/ :search
110
114
 
111
115
  ###
116
+ # call-seq: xpath *paths, [namespace-bindings, variable-bindings, custom-handler-class]
117
+ #
112
118
  # Search this node for XPath +paths+. +paths+ must be one or more XPath
113
- # queries. A hash of namespaces may be appended. For example:
119
+ # queries.
114
120
  #
115
121
  # node.xpath('.//title')
116
- # node.xpath('.//foo:name', { 'foo' => 'http://example.org/' })
122
+ #
123
+ # A hash of namespace bindings may be appended. For example:
124
+ #
125
+ # node.xpath('.//foo:name', {'foo' => 'http://example.org/'})
117
126
  # node.xpath('.//xmlns:name', node.root.namespaces)
118
127
  #
119
- # Custom XPath functions may also be defined. To define custom functions
120
- # create a class and implement the # function you want to define.
121
- # For example:
128
+ # A hash of variable bindings may also be appended to the namespace bindings. For example:
129
+ #
130
+ # node.xpath('.//address[@domestic=$value]', nil, {:value => 'Yes'})
131
+ #
132
+ # Custom XPath functions may also be defined. To define custom
133
+ # functions create a class and implement the function you want
134
+ # to define. The first argument to the method will be the
135
+ # current matching NodeSet. Any other arguments are ones that
136
+ # you pass in. Note that this class may appear anywhere in the
137
+ # argument list. For example:
122
138
  #
123
139
  # node.xpath('.//title[regex(., "\w+")]', Class.new {
124
140
  # def regex node_set, regex
@@ -127,20 +143,19 @@ module Nokogiri
127
143
  # }.new)
128
144
  #
129
145
  def xpath *paths
130
- # Pop off our custom function handler if it exists
131
- handler = ![
132
- Hash, String, Symbol
133
- ].include?(paths.last.class) ? paths.pop : nil
134
-
135
- ns = paths.last.is_a?(Hash) ? paths.pop :
136
- (document.root ? document.root.namespaces : {})
137
-
138
146
  return NodeSet.new(document) unless document
139
147
 
148
+ paths, handler, ns, binds = extract_params(paths)
149
+
140
150
  sets = paths.map { |path|
141
151
  ctx = XPathContext.new(self)
142
152
  ctx.register_namespaces(ns)
143
153
  path = path.gsub(/\/xmlns:/,'/:') unless Nokogiri.uses_libxml?
154
+
155
+ binds.each do |key,value|
156
+ ctx.register_variable key.to_s, value
157
+ end if binds
158
+
144
159
  ctx.evaluate(path, handler)
145
160
  }
146
161
  return sets.first if sets.length == 1
@@ -155,18 +170,24 @@ module Nokogiri
155
170
  end
156
171
 
157
172
  ###
173
+ # call-seq: css *rules, [namespace-bindings, custom-pseudo-class]
174
+ #
158
175
  # Search this node for CSS +rules+. +rules+ must be one or more CSS
159
- # selectors. For example:
176
+ # selectors. For example:
160
177
  #
161
178
  # node.css('title')
162
179
  # node.css('body h1.bold')
163
180
  # node.css('div + p.green', 'div#one')
164
181
  #
165
- # Custom CSS pseudo classes may also be defined. To define custom pseudo
166
- # classes, create a class and implement the custom pseudo class you
167
- # want defined. The first argument to the method will be the current
168
- # matching NodeSet. Any other arguments are ones that you pass in.
169
- # For example:
182
+ # A hash of namespace bindings may be appended. For example:
183
+ #
184
+ # node.css('bike|tire', {'bike' => 'http://schwinn.com/'})
185
+ #
186
+ # Custom CSS pseudo classes may also be defined. To define
187
+ # custom pseudo classes, create a class and implement the custom
188
+ # pseudo class you want defined. The first argument to the
189
+ # method will be the current matching NodeSet. Any other
190
+ # arguments are ones that you pass in. For example:
170
191
  #
171
192
  # node.css('title:regex("\w+")', Class.new {
172
193
  # def regex node_set, regex
@@ -174,18 +195,21 @@ module Nokogiri
174
195
  # end
175
196
  # }.new)
176
197
  #
198
+ # Note that the CSS query string is case-sensitive with regards
199
+ # to your document type. That is, if you're looking for "H1" in
200
+ # an HTML document, you'll never find anything, since HTML tags
201
+ # will match only lowercase CSS queries. However, "H1" might be
202
+ # found in an XML document, where tags names are case-sensitive
203
+ # (e.g., "H1" is distinct from "h1").
204
+ #
177
205
  def css *rules
178
- # Pop off our custom function handler if it exists
179
- handler = ![
180
- Hash, String, Symbol
181
- ].include?(rules.last.class) ? rules.pop : nil
206
+ rules, handler, ns, binds = extract_params(rules)
182
207
 
183
- ns = rules.last.is_a?(Hash) ? rules.pop :
184
- (document.root ? document.root.namespaces : {})
208
+ prefix = "#{implied_xpath_context}/"
185
209
 
186
210
  rules = rules.map { |rule|
187
- xpath_rule = CSS.xpath_for(rule, :prefix => ".//", :ns => ns)
188
- }.flatten.uniq + [ns, handler].compact
211
+ CSS.xpath_for(rule, :prefix => prefix, :ns => ns)
212
+ }.flatten.uniq + [ns, handler, binds].compact
189
213
 
190
214
  xpath(*rules)
191
215
  end
@@ -235,7 +259,7 @@ module Nokogiri
235
259
  # Add +node_or_tags+ as a child of this Node.
236
260
  # +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a string containing markup.
237
261
  #
238
- # Returns the new child node.
262
+ # Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is a DocumentFragment, NodeSet, or string).
239
263
  def add_child node_or_tags
240
264
  node_or_tags = coerce(node_or_tags)
241
265
  if node_or_tags.is_a?(XML::NodeSet)
@@ -243,42 +267,55 @@ module Nokogiri
243
267
  else
244
268
  add_child_node node_or_tags
245
269
  end
270
+ node_or_tags
246
271
  end
247
272
 
248
273
  ###
249
274
  # Insert +node_or_tags+ before this Node (as a sibling).
250
275
  # +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a string containing markup.
251
276
  #
252
- # Returns the new sibling node.
277
+ # Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is a DocumentFragment, NodeSet, or string).
253
278
  #
254
279
  # Also see related method +before+.
255
280
  def add_previous_sibling node_or_tags
256
281
  node_or_tags = coerce(node_or_tags)
257
282
  if node_or_tags.is_a?(XML::NodeSet)
258
- node_or_tags.each { |n| add_previous_sibling_node n }
283
+ if text?
284
+ pivot = Nokogiri::XML::Node.new 'dummy', document
285
+ add_previous_sibling_node pivot
286
+ else
287
+ pivot = self
288
+ end
289
+ node_or_tags.each { |n| pivot.send :add_previous_sibling_node, n }
290
+ pivot.unlink if text?
259
291
  else
260
292
  add_previous_sibling_node node_or_tags
261
293
  end
294
+ node_or_tags
262
295
  end
263
296
 
264
297
  ###
265
298
  # Insert +node_or_tags+ after this Node (as a sibling).
266
299
  # +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a string containing markup.
267
300
  #
268
- # Returns the new sibling node.
301
+ # Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is a DocumentFragment, NodeSet, or string).
269
302
  #
270
303
  # Also see related method +after+.
271
304
  def add_next_sibling node_or_tags
272
305
  node_or_tags = coerce(node_or_tags)
273
306
  if node_or_tags.is_a?(XML::NodeSet)
274
- if '1.8.6' == RUBY_VERSION
275
- node_or_tags.reverse.each { |n| add_next_sibling_node n }
307
+ if text?
308
+ pivot = Nokogiri::XML::Node.new 'dummy', document
309
+ add_next_sibling_node pivot
276
310
  else
277
- node_or_tags.reverse_each { |n| add_next_sibling_node n }
311
+ pivot = self
278
312
  end
313
+ node_or_tags.reverse.each { |n| pivot.send :add_next_sibling_node, n }
314
+ pivot.unlink if text?
279
315
  else
280
316
  add_next_sibling_node node_or_tags
281
317
  end
318
+ node_or_tags
282
319
  end
283
320
 
284
321
  ####
@@ -306,11 +343,25 @@ module Nokogiri
306
343
  end
307
344
 
308
345
  ####
309
- # Set the inner_html for this Node to +node_or_tags+
346
+ # Set the inner html for this Node to +node_or_tags+
310
347
  # +node_or_tags+ can be a Nokogiri::XML::Node, a Nokogiri::XML::DocumentFragment, or a string containing markup.
311
348
  #
312
349
  # Returns self.
350
+ #
351
+ # Also see related method +children=+
313
352
  def inner_html= node_or_tags
353
+ self.children = node_or_tags
354
+ self
355
+ end
356
+
357
+ ####
358
+ # Set the inner html for this Node +node_or_tags+
359
+ # +node_or_tags+ can be a Nokogiri::XML::Node, a Nokogiri::XML::DocumentFragment, or a string containing markup.
360
+ #
361
+ # Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is a DocumentFragment, NodeSet, or string).
362
+ #
363
+ # Also see related method +inner_html=+
364
+ def children= node_or_tags
314
365
  node_or_tags = coerce(node_or_tags)
315
366
  children.unlink
316
367
  if node_or_tags.is_a?(XML::NodeSet)
@@ -318,24 +369,32 @@ module Nokogiri
318
369
  else
319
370
  add_child_node node_or_tags
320
371
  end
321
- self
372
+ node_or_tags
322
373
  end
323
374
 
324
375
  ####
325
376
  # Replace this Node with +node_or_tags+.
326
377
  # +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a string containing markup.
327
378
  #
328
- # Returns the new child node.
379
+ # Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is a DocumentFragment, NodeSet, or string).
329
380
  #
330
381
  # Also see related method +swap+.
331
382
  def replace node_or_tags
332
383
  node_or_tags = coerce(node_or_tags)
333
384
  if node_or_tags.is_a?(XML::NodeSet)
334
- node_or_tags.each { |n| add_previous_sibling n }
335
- unlink
385
+ if text?
386
+ replacee = Nokogiri::XML::Node.new 'dummy', document
387
+ add_previous_sibling_node replacee
388
+ unlink
389
+ else
390
+ replacee = self
391
+ end
392
+ node_or_tags.each { |n| replacee.add_previous_sibling n }
393
+ replacee.unlink
336
394
  else
337
395
  replace_node node_or_tags
338
396
  end
397
+ node_or_tags
339
398
  end
340
399
 
341
400
  ####
@@ -431,7 +490,8 @@ module Nokogiri
431
490
  # Parse +string_or_io+ as a document fragment within the context of
432
491
  # *this* node. Returns a XML::NodeSet containing the nodes parsed from
433
492
  # +string_or_io+.
434
- def parse string_or_io, options = ParseOptions::DEFAULT_XML
493
+ def parse string_or_io, options = nil
494
+ options ||= (document.html? ? ParseOptions::DEFAULT_HTML : ParseOptions::DEFAULT_XML)
435
495
  if Fixnum === options
436
496
  options = Nokogiri::XML::ParseOptions.new(options)
437
497
  end
@@ -443,7 +503,16 @@ module Nokogiri
443
503
  string_or_io
444
504
 
445
505
  return Nokogiri::XML::NodeSet.new(document) if contents.empty?
446
- in_context(contents, options.to_i)
506
+
507
+ ##
508
+ # This is a horrible hack, but I don't care. See #313 for background.
509
+ error_count = document.errors.length
510
+ node_set = in_context(contents, options.to_i)
511
+ if node_set.empty? and document.errors.length > error_count and options.recover?
512
+ fragment = Nokogiri::HTML::DocumentFragment.parse contents
513
+ node_set = fragment.children
514
+ end
515
+ node_set
447
516
  end
448
517
 
449
518
  ####
@@ -460,7 +529,19 @@ module Nokogiri
460
529
  end
461
530
 
462
531
  ###
463
- # Get a hash containing the Namespace definitions for this Node
532
+ # Returns a Hash of {prefix => value} for all namespaces on this
533
+ # node and its ancestors.
534
+ #
535
+ # This method returns the same namespaces as #namespace_scopes.
536
+ #
537
+ # Returns namespaces in scope for self -- those defined on self
538
+ # element directly or any ancestor node -- as a Hash of
539
+ # attribute-name/value pairs. Note that the keys in this hash
540
+ # XML attributes that would be used to define this namespace,
541
+ # such as "xmlns:prefix", not just the prefix. Default namespace
542
+ # set on self will be included with key "xmlns". However,
543
+ # default namespaces set on ancestor will NOT be, even if self
544
+ # has no explicit default namespace.
464
545
  def namespaces
465
546
  Hash[*namespace_scopes.map { |nd|
466
547
  key = ['xmlns', nd.prefix].compact.join(':')
@@ -568,14 +649,22 @@ module Nokogiri
568
649
  end
569
650
 
570
651
  ###
571
- # Set the default namespace for this node to +url+
652
+ # Adds a default namespace supplied as a string +url+ href, to self.
653
+ # The consequence is as an xmlns attribute with supplied argument were
654
+ # present in parsed XML. A default namespace set with this method will
655
+ # now show up in #attributes, but when this node is serialized to XML an
656
+ # "xmlns" attribute will appear. See also #namespace and #namespace=
572
657
  def default_namespace= url
573
658
  add_namespace_definition(nil, url)
574
659
  end
575
660
  alias :add_namespace :add_namespace_definition
576
661
 
577
662
  ###
578
- # Set the namespace for this node to +ns+
663
+ # Set the default namespace on this node (as would be defined with an
664
+ # "xmlns=" attribute in XML source), as a Namespace object +ns+. Note that
665
+ # a Namespace added this way will NOT be serialized as an xmlns attribute
666
+ # for this node. You probably want #default_namespace= instead, or perhaps
667
+ # #add_namespace_definition with a nil prefix argument.
579
668
  def namespace= ns
580
669
  return set_namespace(ns) unless ns
581
670
 
@@ -773,6 +862,24 @@ module Nokogiri
773
862
 
774
863
  private
775
864
 
865
+ def extract_params params # :nodoc:
866
+ # Pop off our custom function handler if it exists
867
+ handler = params.find { |param|
868
+ ![Hash, String, Symbol].include?(param.class)
869
+ }
870
+
871
+ params -= [handler] if handler
872
+
873
+ hashes = []
874
+ hashes << params.pop while Hash === params.last || params.last.nil?
875
+
876
+ ns, binds = hashes.reverse
877
+
878
+ ns ||= document.root ? document.root.namespaces : {}
879
+
880
+ [params, handler, ns, binds]
881
+ end
882
+
776
883
  def coerce data # :nodoc:
777
884
  return data if data.is_a?(XML::NodeSet)
778
885
  return data.children if data.is_a?(XML::DocumentFragment)
@@ -788,6 +895,10 @@ Requires a Node, NodeSet or String argument, and cannot accept a #{data.class}.
788
895
  data
789
896
  end
790
897
 
898
+ def implied_xpath_context
899
+ "./"
900
+ end
901
+
791
902
  def inspect_attributes
792
903
  [:name, :namespace, :attribute_nodes, :children]
793
904
  end