nokogiri 1.4.7 → 1.5.0.beta.1
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- data/CHANGELOG.ja.rdoc +8 -83
- data/CHANGELOG.rdoc +6 -80
- data/Manifest.txt +4 -74
- data/README.ja.rdoc +5 -1
- data/README.rdoc +8 -22
- data/Rakefile +79 -60
- data/bin/nokogiri +1 -6
- data/deps.rip +5 -0
- data/ext/nokogiri/extconf.rb +32 -53
- data/ext/nokogiri/nokogiri.c +0 -2
- data/ext/nokogiri/nokogiri.h +0 -9
- data/ext/nokogiri/xml_document.c +0 -14
- data/ext/nokogiri/xml_dtd.c +2 -2
- data/ext/nokogiri/xml_io.c +7 -32
- data/ext/nokogiri/xml_node.c +31 -103
- data/ext/nokogiri/xml_node_set.c +8 -8
- data/ext/nokogiri/xml_reader.c +1 -20
- data/ext/nokogiri/xml_sax_parser.c +3 -5
- data/ext/nokogiri/xml_sax_parser_context.c +0 -40
- data/ext/nokogiri/xml_xpath_context.c +2 -35
- data/ext/nokogiri/xslt_stylesheet.c +6 -124
- data/lib/nokogiri.rb +7 -3
- data/lib/nokogiri/css.rb +3 -6
- data/lib/nokogiri/css/generated_parser.rb +669 -0
- data/lib/nokogiri/css/generated_tokenizer.rb +145 -0
- data/lib/nokogiri/css/parser.rb +70 -665
- data/lib/nokogiri/css/parser.y +1 -6
- data/lib/nokogiri/css/tokenizer.rb +3 -148
- data/lib/nokogiri/css/tokenizer.rex +1 -1
- data/lib/nokogiri/css/xpath_visitor.rb +14 -16
- data/lib/nokogiri/decorators/slop.rb +3 -5
- data/lib/nokogiri/html.rb +3 -2
- data/lib/nokogiri/html/document.rb +18 -134
- data/lib/nokogiri/html/document_fragment.rb +21 -26
- data/lib/nokogiri/html/element_description_defaults.rb +671 -0
- data/lib/nokogiri/html/sax/parser.rb +2 -6
- data/lib/nokogiri/version.rb +4 -9
- data/lib/nokogiri/xml/attribute_decl.rb +1 -1
- data/lib/nokogiri/xml/builder.rb +1 -1
- data/lib/nokogiri/xml/document.rb +3 -27
- data/lib/nokogiri/xml/document_fragment.rb +2 -9
- data/lib/nokogiri/xml/dtd.rb +1 -12
- data/lib/nokogiri/xml/element_decl.rb +1 -1
- data/lib/nokogiri/xml/entity_decl.rb +1 -1
- data/lib/nokogiri/xml/node.rb +75 -172
- data/lib/nokogiri/xml/node/save_options.rb +0 -10
- data/lib/nokogiri/xml/node_set.rb +3 -28
- data/lib/nokogiri/xml/parse_options.rb +0 -8
- data/lib/nokogiri/xml/reader.rb +6 -44
- data/lib/nokogiri/xml/sax/document.rb +5 -9
- data/lib/nokogiri/xml/schema.rb +1 -7
- data/lib/nokogiri/xslt.rb +5 -9
- data/tasks/cross_compile.rb +12 -27
- data/tasks/test.rb +0 -0
- data/test/css/test_parser.rb +19 -40
- data/test/css/test_tokenizer.rb +0 -8
- data/test/helper.rb +1 -4
- data/test/html/sax/test_parser.rb +21 -47
- data/test/html/sax/test_parser_context.rb +2 -2
- data/test/html/test_document.rb +3 -58
- data/test/html/test_document_encoding.rb +0 -53
- data/test/html/test_document_fragment.rb +13 -82
- data/test/html/test_element_description.rb +4 -2
- data/test/html/test_node.rb +0 -9
- data/test/test_memory_leak.rb +2 -57
- data/test/test_nokogiri.rb +14 -20
- data/test/test_reader.rb +7 -47
- data/test/test_xslt_transforms.rb +5 -8
- data/test/xml/sax/test_parser.rb +17 -34
- data/test/xml/sax/test_parser_context.rb +0 -50
- data/test/xml/sax/test_push_parser.rb +1 -18
- data/test/xml/test_attr.rb +4 -31
- data/test/xml/test_attribute_decl.rb +7 -3
- data/test/xml/test_builder.rb +5 -5
- data/test/xml/test_cdata.rb +3 -3
- data/test/xml/test_document.rb +18 -15
- data/test/xml/test_document_fragment.rb +20 -19
- data/test/xml/test_dtd.rb +13 -18
- data/test/xml/test_element_content.rb +1 -1
- data/test/xml/test_element_decl.rb +1 -1
- data/test/xml/test_entity_decl.rb +12 -10
- data/test/xml/test_namespace.rb +7 -5
- data/test/xml/test_node.rb +15 -54
- data/test/xml/test_node_reparenting.rb +42 -85
- data/test/xml/test_node_set.rb +2 -61
- data/test/xml/test_schema.rb +0 -5
- data/test/xml/test_text.rb +2 -11
- data/test/xml/test_unparented_node.rb +1 -1
- data/test/xml/test_xpath.rb +7 -43
- metadata +131 -155
- data/.gemtest +0 -0
- data/ext/nokogiri/depend +0 -358
- data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
- data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
- data/lib/nokogiri/css/parser_extras.rb +0 -91
- data/lib/nokogiri/ffi/encoding_handler.rb +0 -42
- data/lib/nokogiri/ffi/html/document.rb +0 -28
- data/lib/nokogiri/ffi/html/element_description.rb +0 -81
- data/lib/nokogiri/ffi/html/entity_lookup.rb +0 -16
- data/lib/nokogiri/ffi/html/sax/parser_context.rb +0 -38
- data/lib/nokogiri/ffi/io_callbacks.rb +0 -42
- data/lib/nokogiri/ffi/libxml.rb +0 -420
- data/lib/nokogiri/ffi/structs/common_node.rb +0 -38
- data/lib/nokogiri/ffi/structs/html_elem_desc.rb +0 -24
- data/lib/nokogiri/ffi/structs/html_entity_desc.rb +0 -13
- data/lib/nokogiri/ffi/structs/xml_alloc.rb +0 -16
- data/lib/nokogiri/ffi/structs/xml_attr.rb +0 -20
- data/lib/nokogiri/ffi/structs/xml_attribute.rb +0 -27
- data/lib/nokogiri/ffi/structs/xml_buffer.rb +0 -16
- data/lib/nokogiri/ffi/structs/xml_char_encoding_handler.rb +0 -11
- data/lib/nokogiri/ffi/structs/xml_document.rb +0 -117
- data/lib/nokogiri/ffi/structs/xml_dtd.rb +0 -28
- data/lib/nokogiri/ffi/structs/xml_element.rb +0 -26
- data/lib/nokogiri/ffi/structs/xml_element_content.rb +0 -17
- data/lib/nokogiri/ffi/structs/xml_entity.rb +0 -32
- data/lib/nokogiri/ffi/structs/xml_enumeration.rb +0 -12
- data/lib/nokogiri/ffi/structs/xml_node.rb +0 -28
- data/lib/nokogiri/ffi/structs/xml_node_set.rb +0 -53
- data/lib/nokogiri/ffi/structs/xml_notation.rb +0 -11
- data/lib/nokogiri/ffi/structs/xml_ns.rb +0 -15
- data/lib/nokogiri/ffi/structs/xml_parser_context.rb +0 -20
- data/lib/nokogiri/ffi/structs/xml_parser_input.rb +0 -19
- data/lib/nokogiri/ffi/structs/xml_relax_ng.rb +0 -14
- data/lib/nokogiri/ffi/structs/xml_sax_handler.rb +0 -51
- data/lib/nokogiri/ffi/structs/xml_sax_push_parser_context.rb +0 -124
- data/lib/nokogiri/ffi/structs/xml_schema.rb +0 -13
- data/lib/nokogiri/ffi/structs/xml_syntax_error.rb +0 -31
- data/lib/nokogiri/ffi/structs/xml_text_reader.rb +0 -12
- data/lib/nokogiri/ffi/structs/xml_xpath_context.rb +0 -38
- data/lib/nokogiri/ffi/structs/xml_xpath_object.rb +0 -35
- data/lib/nokogiri/ffi/structs/xml_xpath_parser_context.rb +0 -20
- data/lib/nokogiri/ffi/structs/xslt_stylesheet.rb +0 -13
- data/lib/nokogiri/ffi/weak_bucket.rb +0 -40
- data/lib/nokogiri/ffi/xml/attr.rb +0 -41
- data/lib/nokogiri/ffi/xml/attribute_decl.rb +0 -27
- data/lib/nokogiri/ffi/xml/cdata.rb +0 -19
- data/lib/nokogiri/ffi/xml/comment.rb +0 -18
- data/lib/nokogiri/ffi/xml/document.rb +0 -174
- data/lib/nokogiri/ffi/xml/document_fragment.rb +0 -21
- data/lib/nokogiri/ffi/xml/dtd.rb +0 -67
- data/lib/nokogiri/ffi/xml/element_content.rb +0 -43
- data/lib/nokogiri/ffi/xml/element_decl.rb +0 -19
- data/lib/nokogiri/ffi/xml/entity_decl.rb +0 -36
- data/lib/nokogiri/ffi/xml/entity_reference.rb +0 -19
- data/lib/nokogiri/ffi/xml/namespace.rb +0 -44
- data/lib/nokogiri/ffi/xml/node.rb +0 -559
- data/lib/nokogiri/ffi/xml/node_set.rb +0 -150
- data/lib/nokogiri/ffi/xml/processing_instruction.rb +0 -20
- data/lib/nokogiri/ffi/xml/reader.rb +0 -236
- data/lib/nokogiri/ffi/xml/relax_ng.rb +0 -85
- data/lib/nokogiri/ffi/xml/sax/parser.rb +0 -143
- data/lib/nokogiri/ffi/xml/sax/parser_context.rb +0 -79
- data/lib/nokogiri/ffi/xml/sax/push_parser.rb +0 -51
- data/lib/nokogiri/ffi/xml/schema.rb +0 -109
- data/lib/nokogiri/ffi/xml/syntax_error.rb +0 -98
- data/lib/nokogiri/ffi/xml/text.rb +0 -18
- data/lib/nokogiri/ffi/xml/xpath.rb +0 -9
- data/lib/nokogiri/ffi/xml/xpath_context.rb +0 -153
- data/lib/nokogiri/ffi/xslt/stylesheet.rb +0 -77
- data/test/decorators/test_slop.rb +0 -16
- data/test/ffi/test_document.rb +0 -35
- data/test/files/encoding.html +0 -82
- data/test/files/encoding.xhtml +0 -84
- data/test/xslt/test_custom_functions.rb +0 -94
@@ -31,9 +31,7 @@ module Nokogiri
|
|
31
31
|
def parse_memory data, encoding = 'UTF-8'
|
32
32
|
raise ArgumentError unless data
|
33
33
|
return unless data.length > 0
|
34
|
-
|
35
|
-
yield ctx if block_given?
|
36
|
-
ctx.parse_with self
|
34
|
+
ParserContext.memory(data, encoding).parse_with self
|
37
35
|
end
|
38
36
|
|
39
37
|
###
|
@@ -42,9 +40,7 @@ module Nokogiri
|
|
42
40
|
raise ArgumentError unless filename
|
43
41
|
raise Errno::ENOENT unless File.exists?(filename)
|
44
42
|
raise Errno::EISDIR if File.directory?(filename)
|
45
|
-
|
46
|
-
yield ctx if block_given?
|
47
|
-
ctx.parse_with self
|
43
|
+
ParserContext.file(filename, encoding).parse_with self
|
48
44
|
end
|
49
45
|
end
|
50
46
|
end
|
data/lib/nokogiri/version.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
module Nokogiri
|
2
2
|
# The version of Nokogiri you are using
|
3
|
-
VERSION = '1.
|
3
|
+
VERSION = '1.5.0.beta.1'
|
4
4
|
|
5
5
|
# More complete version information about libxml
|
6
6
|
VERSION_INFO = {}
|
@@ -9,8 +9,7 @@ module Nokogiri
|
|
9
9
|
VERSION_INFO['ruby'] = {}
|
10
10
|
VERSION_INFO['ruby']['version'] = ::RUBY_VERSION
|
11
11
|
VERSION_INFO['ruby']['platform'] = ::RUBY_PLATFORM
|
12
|
-
|
13
|
-
VERSION_INFO['ruby']['engine'] = defined?(RUBY_ENGINE) ? RUBY_ENGINE : 'mri'
|
12
|
+
|
14
13
|
VERSION_INFO['ruby']['jruby'] = ::JRUBY_VERSION if RUBY_PLATFORM == "java"
|
15
14
|
if defined?(LIBXML_VERSION)
|
16
15
|
VERSION_INFO['libxml'] = {}
|
@@ -29,15 +28,11 @@ module Nokogiri
|
|
29
28
|
!Nokogiri::VERSION_INFO['libxml'].nil?
|
30
29
|
end
|
31
30
|
|
32
|
-
def self.ffi? # :nodoc:
|
33
|
-
uses_libxml? && Nokogiri::VERSION_INFO['libxml']['binding'] == 'ffi'
|
34
|
-
end
|
35
|
-
|
36
31
|
def self.jruby?
|
37
|
-
|
32
|
+
!Nokogiri::VERSION_INFO['ruby']['jruby'].nil?
|
38
33
|
end
|
39
34
|
|
40
35
|
def self.is_2_6_16? # :nodoc:
|
41
|
-
Nokogiri::VERSION_INFO['libxml']['loaded'] <= '2.6.16'
|
36
|
+
Nokogiri::VERSION_INFO['ruby']['platform'] !~ /java/ && Nokogiri::VERSION_INFO['libxml']['loaded'] <= '2.6.16'
|
42
37
|
end
|
43
38
|
end
|
@@ -8,7 +8,7 @@ module Nokogiri
|
|
8
8
|
undef_method :content
|
9
9
|
undef_method :namespace
|
10
10
|
undef_method :namespace_definitions
|
11
|
-
undef_method :line
|
11
|
+
undef_method :line if method_defined?(:line)
|
12
12
|
|
13
13
|
def inspect
|
14
14
|
"#<#{self.class.name}:#{sprintf("0x%x", object_id)} #{to_s.inspect}>"
|
data/lib/nokogiri/xml/builder.rb
CHANGED
@@ -7,7 +7,7 @@ module Nokogiri
|
|
7
7
|
#
|
8
8
|
# For searching a Document, see Nokogiri::XML::Node#css and
|
9
9
|
# Nokogiri::XML::Node#xpath
|
10
|
-
class Document <
|
10
|
+
class Document < Node
|
11
11
|
##
|
12
12
|
# Parse an XML file. +thing+ may be a String, or any object that
|
13
13
|
# responds to _read_ and _close_ such as an IO, or StringIO.
|
@@ -113,13 +113,6 @@ module Nokogiri
|
|
113
113
|
# </root>
|
114
114
|
#
|
115
115
|
# The hash returned will look like this: { 'xmlns:foo' => 'bar' }
|
116
|
-
#
|
117
|
-
# Non-prefixed default namespaces (as in "xmlns=") are not included
|
118
|
-
# in the hash.
|
119
|
-
#
|
120
|
-
# Note this is a very expensive operation in current implementation, as it
|
121
|
-
# traverses the entire graph, and also has to bring each node accross the
|
122
|
-
# libxml bridge into a ruby object.
|
123
116
|
def collect_namespaces
|
124
117
|
ns = {}
|
125
118
|
traverse { |j| ns.merge!(j.namespaces) }
|
@@ -141,21 +134,7 @@ module Nokogiri
|
|
141
134
|
end
|
142
135
|
|
143
136
|
##
|
144
|
-
# Explore a document with shortcut methods.
|
145
|
-
#
|
146
|
-
# Note that any nodes that have been instantiated before #slop!
|
147
|
-
# is called will not be decorated with sloppy behavior. So, if you're in
|
148
|
-
# irb, the preferred idiom is:
|
149
|
-
#
|
150
|
-
# irb> doc = Nokogiri::Slop my_markup
|
151
|
-
#
|
152
|
-
# and not
|
153
|
-
#
|
154
|
-
# irb> doc = Nokogiri::HTML my_markup
|
155
|
-
# ... followed by irb's implicit inspect (and therefore instantiation of every node) ...
|
156
|
-
# irb> doc.slop!
|
157
|
-
# ... which does absolutely nothing.
|
158
|
-
#
|
137
|
+
# Explore a document with shortcut methods.
|
159
138
|
def slop!
|
160
139
|
unless decorators(XML::Node).include? Nokogiri::Decorators::Slop
|
161
140
|
decorators(XML::Node) << Nokogiri::Decorators::Slop
|
@@ -193,6 +172,7 @@ module Nokogiri
|
|
193
172
|
undef_method :swap, :parent, :namespace, :default_namespace=
|
194
173
|
undef_method :add_namespace_definition, :attributes
|
195
174
|
undef_method :namespace_definitions, :line, :add_namespace
|
175
|
+
undef_method :parse, :in_context
|
196
176
|
|
197
177
|
def add_child child
|
198
178
|
raise "Document already has a root node" if root
|
@@ -206,10 +186,6 @@ module Nokogiri
|
|
206
186
|
alias :<< :add_child
|
207
187
|
|
208
188
|
private
|
209
|
-
def implied_xpath_context
|
210
|
-
"/"
|
211
|
-
end
|
212
|
-
|
213
189
|
def inspect_attributes
|
214
190
|
[:name, :children]
|
215
191
|
end
|
@@ -11,9 +11,9 @@ module Nokogiri
|
|
11
11
|
return self unless tags
|
12
12
|
|
13
13
|
children = if ctx
|
14
|
-
ctx.parse(tags)
|
14
|
+
ctx.parse(tags.strip)
|
15
15
|
else
|
16
|
-
XML::Document.parse("<root>#{tags}</root>") \
|
16
|
+
XML::Document.parse("<root>#{tags.strip}</root>") \
|
17
17
|
.xpath("/root/node()")
|
18
18
|
end
|
19
19
|
children.each { |child| child.parent = self }
|
@@ -72,13 +72,6 @@ module Nokogiri
|
|
72
72
|
end
|
73
73
|
end
|
74
74
|
|
75
|
-
private
|
76
|
-
|
77
|
-
def coerce data
|
78
|
-
return super unless String === data
|
79
|
-
|
80
|
-
document.fragment(data).children
|
81
|
-
end
|
82
75
|
end
|
83
76
|
end
|
84
77
|
end
|
data/lib/nokogiri/xml/dtd.rb
CHANGED
@@ -2,21 +2,10 @@ module Nokogiri
|
|
2
2
|
module XML
|
3
3
|
class DTD < Nokogiri::XML::Node
|
4
4
|
undef_method :attribute_nodes
|
5
|
-
undef_method :values
|
6
5
|
undef_method :content
|
7
6
|
undef_method :namespace
|
8
7
|
undef_method :namespace_definitions
|
9
|
-
undef_method :line
|
10
|
-
|
11
|
-
def keys
|
12
|
-
attributes.keys
|
13
|
-
end
|
14
|
-
|
15
|
-
def each &block
|
16
|
-
attributes.each { |key, value|
|
17
|
-
block.call([key, value])
|
18
|
-
}
|
19
|
-
end
|
8
|
+
undef_method :line if method_defined?(:line)
|
20
9
|
end
|
21
10
|
end
|
22
11
|
end
|
@@ -3,7 +3,7 @@ module Nokogiri
|
|
3
3
|
class ElementDecl < Nokogiri::XML::Node
|
4
4
|
undef_method :namespace
|
5
5
|
undef_method :namespace_definitions
|
6
|
-
undef_method :line
|
6
|
+
undef_method :line if method_defined?(:line)
|
7
7
|
|
8
8
|
def inspect
|
9
9
|
"#<#{self.class.name}:#{sprintf("0x%x", object_id)} #{to_s.inspect}>"
|
data/lib/nokogiri/xml/node.rb
CHANGED
@@ -95,17 +95,13 @@ module Nokogiri
|
|
95
95
|
# optional hash of namespaces may be appended.
|
96
96
|
# See Node#xpath and Node#css.
|
97
97
|
def search *paths
|
98
|
-
# TODO use paths, handler, ns, binds = extract_params(paths)
|
99
98
|
ns = paths.last.is_a?(Hash) ? paths.pop :
|
100
99
|
(document.root ? document.root.namespaces : {})
|
101
|
-
|
102
|
-
prefix = "#{implied_xpath_context}/"
|
103
|
-
|
104
100
|
xpath(*(paths.map { |path|
|
105
101
|
path = path.to_s
|
106
|
-
path =~ /^(
|
102
|
+
path =~ /^(\.\/|\/)/ ? path : CSS.xpath_for(
|
107
103
|
path,
|
108
|
-
:prefix =>
|
104
|
+
:prefix => ".//",
|
109
105
|
:ns => ns
|
110
106
|
)
|
111
107
|
}.flatten.uniq) + [ns])
|
@@ -113,28 +109,16 @@ module Nokogiri
|
|
113
109
|
alias :/ :search
|
114
110
|
|
115
111
|
###
|
116
|
-
# call-seq: xpath *paths, [namespace-bindings, variable-bindings, custom-handler-class]
|
117
|
-
#
|
118
112
|
# Search this node for XPath +paths+. +paths+ must be one or more XPath
|
119
|
-
# queries.
|
113
|
+
# queries. A hash of namespaces may be appended. For example:
|
120
114
|
#
|
121
115
|
# node.xpath('.//title')
|
122
|
-
#
|
123
|
-
# A hash of namespace bindings may be appended. For example:
|
124
|
-
#
|
125
|
-
# node.xpath('.//foo:name', {'foo' => 'http://example.org/'})
|
116
|
+
# node.xpath('.//foo:name', { 'foo' => 'http://example.org/' })
|
126
117
|
# node.xpath('.//xmlns:name', node.root.namespaces)
|
127
118
|
#
|
128
|
-
#
|
129
|
-
#
|
130
|
-
#
|
131
|
-
#
|
132
|
-
# Custom XPath functions may also be defined. To define custom
|
133
|
-
# functions create a class and implement the function you want
|
134
|
-
# to define. The first argument to the method will be the
|
135
|
-
# current matching NodeSet. Any other arguments are ones that
|
136
|
-
# you pass in. Note that this class may appear anywhere in the
|
137
|
-
# argument list. For example:
|
119
|
+
# Custom XPath functions may also be defined. To define custom functions
|
120
|
+
# create a class and implement the # function you want to define.
|
121
|
+
# For example:
|
138
122
|
#
|
139
123
|
# node.xpath('.//title[regex(., "\w+")]', Class.new {
|
140
124
|
# def regex node_set, regex
|
@@ -143,18 +127,20 @@ module Nokogiri
|
|
143
127
|
# }.new)
|
144
128
|
#
|
145
129
|
def xpath *paths
|
146
|
-
|
130
|
+
# Pop off our custom function handler if it exists
|
131
|
+
handler = ![
|
132
|
+
Hash, String, Symbol
|
133
|
+
].include?(paths.last.class) ? paths.pop : nil
|
134
|
+
|
135
|
+
ns = paths.last.is_a?(Hash) ? paths.pop :
|
136
|
+
(document.root ? document.root.namespaces : {})
|
147
137
|
|
148
|
-
|
138
|
+
return NodeSet.new(document) unless document
|
149
139
|
|
150
140
|
sets = paths.map { |path|
|
151
141
|
ctx = XPathContext.new(self)
|
152
142
|
ctx.register_namespaces(ns)
|
153
|
-
|
154
|
-
binds.each do |key,value|
|
155
|
-
ctx.register_variable key.to_s, value
|
156
|
-
end if binds
|
157
|
-
|
143
|
+
path = path.gsub(/\/xmlns:/,'/:') unless Nokogiri.uses_libxml?
|
158
144
|
ctx.evaluate(path, handler)
|
159
145
|
}
|
160
146
|
return sets.first if sets.length == 1
|
@@ -169,24 +155,18 @@ module Nokogiri
|
|
169
155
|
end
|
170
156
|
|
171
157
|
###
|
172
|
-
# call-seq: css *rules, [namespace-bindings, custom-pseudo-class]
|
173
|
-
#
|
174
158
|
# Search this node for CSS +rules+. +rules+ must be one or more CSS
|
175
|
-
# selectors.
|
159
|
+
# selectors. For example:
|
176
160
|
#
|
177
161
|
# node.css('title')
|
178
162
|
# node.css('body h1.bold')
|
179
163
|
# node.css('div + p.green', 'div#one')
|
180
164
|
#
|
181
|
-
#
|
182
|
-
#
|
183
|
-
#
|
184
|
-
#
|
185
|
-
#
|
186
|
-
# custom pseudo classes, create a class and implement the custom
|
187
|
-
# pseudo class you want defined. The first argument to the
|
188
|
-
# method will be the current matching NodeSet. Any other
|
189
|
-
# arguments are ones that you pass in. For example:
|
165
|
+
# Custom CSS pseudo classes may also be defined. To define custom pseudo
|
166
|
+
# classes, create a class and implement the custom pseudo class you
|
167
|
+
# want defined. The first argument to the method will be the current
|
168
|
+
# matching NodeSet. Any other arguments are ones that you pass in.
|
169
|
+
# For example:
|
190
170
|
#
|
191
171
|
# node.css('title:regex("\w+")', Class.new {
|
192
172
|
# def regex node_set, regex
|
@@ -194,21 +174,18 @@ module Nokogiri
|
|
194
174
|
# end
|
195
175
|
# }.new)
|
196
176
|
#
|
197
|
-
# Note that the CSS query string is case-sensitive with regards
|
198
|
-
# to your document type. That is, if you're looking for "H1" in
|
199
|
-
# an HTML document, you'll never find anything, since HTML tags
|
200
|
-
# will match only lowercase CSS queries. However, "H1" might be
|
201
|
-
# found in an XML document, where tags names are case-sensitive
|
202
|
-
# (e.g., "H1" is distinct from "h1").
|
203
|
-
#
|
204
177
|
def css *rules
|
205
|
-
|
178
|
+
# Pop off our custom function handler if it exists
|
179
|
+
handler = ![
|
180
|
+
Hash, String, Symbol
|
181
|
+
].include?(rules.last.class) ? rules.pop : nil
|
206
182
|
|
207
|
-
|
183
|
+
ns = rules.last.is_a?(Hash) ? rules.pop :
|
184
|
+
(document.root ? document.root.namespaces : {})
|
208
185
|
|
209
186
|
rules = rules.map { |rule|
|
210
|
-
CSS.xpath_for(rule, :prefix =>
|
211
|
-
}.flatten.uniq + [ns, handler
|
187
|
+
xpath_rule = CSS.xpath_for(rule, :prefix => ".//", :ns => ns)
|
188
|
+
}.flatten.uniq + [ns, handler].compact
|
212
189
|
|
213
190
|
xpath(*rules)
|
214
191
|
end
|
@@ -258,7 +235,7 @@ module Nokogiri
|
|
258
235
|
# Add +node_or_tags+ as a child of this Node.
|
259
236
|
# +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a string containing markup.
|
260
237
|
#
|
261
|
-
# Returns the
|
238
|
+
# Returns the new child node.
|
262
239
|
def add_child node_or_tags
|
263
240
|
node_or_tags = coerce(node_or_tags)
|
264
241
|
if node_or_tags.is_a?(XML::NodeSet)
|
@@ -266,55 +243,42 @@ module Nokogiri
|
|
266
243
|
else
|
267
244
|
add_child_node node_or_tags
|
268
245
|
end
|
269
|
-
node_or_tags
|
270
246
|
end
|
271
247
|
|
272
248
|
###
|
273
249
|
# Insert +node_or_tags+ before this Node (as a sibling).
|
274
250
|
# +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a string containing markup.
|
275
251
|
#
|
276
|
-
# Returns the
|
252
|
+
# Returns the new sibling node.
|
277
253
|
#
|
278
254
|
# Also see related method +before+.
|
279
255
|
def add_previous_sibling node_or_tags
|
280
256
|
node_or_tags = coerce(node_or_tags)
|
281
257
|
if node_or_tags.is_a?(XML::NodeSet)
|
282
|
-
|
283
|
-
pivot = Nokogiri::XML::Node.new 'dummy', document
|
284
|
-
add_previous_sibling_node pivot
|
285
|
-
else
|
286
|
-
pivot = self
|
287
|
-
end
|
288
|
-
node_or_tags.each { |n| pivot.send :add_previous_sibling_node, n }
|
289
|
-
pivot.unlink if text?
|
258
|
+
node_or_tags.each { |n| add_previous_sibling_node n }
|
290
259
|
else
|
291
260
|
add_previous_sibling_node node_or_tags
|
292
261
|
end
|
293
|
-
node_or_tags
|
294
262
|
end
|
295
263
|
|
296
264
|
###
|
297
265
|
# Insert +node_or_tags+ after this Node (as a sibling).
|
298
266
|
# +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a string containing markup.
|
299
267
|
#
|
300
|
-
# Returns the
|
268
|
+
# Returns the new sibling node.
|
301
269
|
#
|
302
270
|
# Also see related method +after+.
|
303
271
|
def add_next_sibling node_or_tags
|
304
272
|
node_or_tags = coerce(node_or_tags)
|
305
273
|
if node_or_tags.is_a?(XML::NodeSet)
|
306
|
-
if
|
307
|
-
|
308
|
-
add_next_sibling_node pivot
|
274
|
+
if '1.8.6' == RUBY_VERSION
|
275
|
+
node_or_tags.reverse.each { |n| add_next_sibling_node n }
|
309
276
|
else
|
310
|
-
|
277
|
+
node_or_tags.reverse_each { |n| add_next_sibling_node n }
|
311
278
|
end
|
312
|
-
node_or_tags.reverse.each { |n| pivot.send :add_next_sibling_node, n }
|
313
|
-
pivot.unlink if text?
|
314
279
|
else
|
315
280
|
add_next_sibling_node node_or_tags
|
316
281
|
end
|
317
|
-
node_or_tags
|
318
282
|
end
|
319
283
|
|
320
284
|
####
|
@@ -342,25 +306,11 @@ module Nokogiri
|
|
342
306
|
end
|
343
307
|
|
344
308
|
####
|
345
|
-
# Set the
|
309
|
+
# Set the inner_html for this Node to +node_or_tags+
|
346
310
|
# +node_or_tags+ can be a Nokogiri::XML::Node, a Nokogiri::XML::DocumentFragment, or a string containing markup.
|
347
311
|
#
|
348
312
|
# Returns self.
|
349
|
-
#
|
350
|
-
# Also see related method +children=+
|
351
313
|
def inner_html= node_or_tags
|
352
|
-
self.children = node_or_tags
|
353
|
-
self
|
354
|
-
end
|
355
|
-
|
356
|
-
####
|
357
|
-
# Set the inner html for this Node +node_or_tags+
|
358
|
-
# +node_or_tags+ can be a Nokogiri::XML::Node, a Nokogiri::XML::DocumentFragment, or a string containing markup.
|
359
|
-
#
|
360
|
-
# Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is a DocumentFragment, NodeSet, or string).
|
361
|
-
#
|
362
|
-
# Also see related method +inner_html=+
|
363
|
-
def children= node_or_tags
|
364
314
|
node_or_tags = coerce(node_or_tags)
|
365
315
|
children.unlink
|
366
316
|
if node_or_tags.is_a?(XML::NodeSet)
|
@@ -368,32 +318,24 @@ module Nokogiri
|
|
368
318
|
else
|
369
319
|
add_child node_or_tags
|
370
320
|
end
|
371
|
-
|
321
|
+
self
|
372
322
|
end
|
373
323
|
|
374
324
|
####
|
375
325
|
# Replace this Node with +node_or_tags+.
|
376
326
|
# +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a string containing markup.
|
377
327
|
#
|
378
|
-
# Returns the
|
328
|
+
# Returns the new child node.
|
379
329
|
#
|
380
330
|
# Also see related method +swap+.
|
381
331
|
def replace node_or_tags
|
382
332
|
node_or_tags = coerce(node_or_tags)
|
383
333
|
if node_or_tags.is_a?(XML::NodeSet)
|
384
|
-
|
385
|
-
|
386
|
-
add_previous_sibling_node replacee
|
387
|
-
unlink
|
388
|
-
else
|
389
|
-
replacee = self
|
390
|
-
end
|
391
|
-
node_or_tags.each { |n| replacee.add_previous_sibling n }
|
392
|
-
replacee.unlink
|
334
|
+
node_or_tags.each { |n| add_previous_sibling n }
|
335
|
+
unlink
|
393
336
|
else
|
394
337
|
replace_node node_or_tags
|
395
338
|
end
|
396
|
-
node_or_tags
|
397
339
|
end
|
398
340
|
|
399
341
|
####
|
@@ -434,10 +376,8 @@ module Nokogiri
|
|
434
376
|
|
435
377
|
####
|
436
378
|
# Returns a hash containing the node's attributes. The key is
|
437
|
-
# the attribute name
|
379
|
+
# the attribute name, the value is a Nokogiri::XML::Attr
|
438
380
|
# representing the attribute.
|
439
|
-
# If you need to distinguish attributes with the same name, with different namespaces
|
440
|
-
# use #attribute_nodes instead.
|
441
381
|
def attributes
|
442
382
|
Hash[*(attribute_nodes.map { |node|
|
443
383
|
[node.node_name, node]
|
@@ -489,8 +429,7 @@ module Nokogiri
|
|
489
429
|
# Parse +string_or_io+ as a document fragment within the context of
|
490
430
|
# *this* node. Returns a XML::NodeSet containing the nodes parsed from
|
491
431
|
# +string_or_io+.
|
492
|
-
def parse string_or_io, options =
|
493
|
-
options ||= (document.html? ? ParseOptions::DEFAULT_HTML : ParseOptions::DEFAULT_XML)
|
432
|
+
def parse string_or_io, options = ParseOptions::DEFAULT_XML
|
494
433
|
if Fixnum === options
|
495
434
|
options = Nokogiri::XML::ParseOptions.new(options)
|
496
435
|
end
|
@@ -502,16 +441,7 @@ module Nokogiri
|
|
502
441
|
string_or_io
|
503
442
|
|
504
443
|
return Nokogiri::XML::NodeSet.new(document) if contents.empty?
|
505
|
-
|
506
|
-
##
|
507
|
-
# This is a horrible hack, but I don't care. See #313 for background.
|
508
|
-
error_count = document.errors.length
|
509
|
-
node_set = in_context(contents, options.to_i)
|
510
|
-
if node_set.empty? and document.errors.length > error_count and options.recover?
|
511
|
-
fragment = Nokogiri::HTML::DocumentFragment.parse contents
|
512
|
-
node_set = fragment.children
|
513
|
-
end
|
514
|
-
node_set
|
444
|
+
in_context(contents, options.to_i)
|
515
445
|
end
|
516
446
|
|
517
447
|
####
|
@@ -528,19 +458,7 @@ module Nokogiri
|
|
528
458
|
end
|
529
459
|
|
530
460
|
###
|
531
|
-
#
|
532
|
-
# node and its ancestors.
|
533
|
-
#
|
534
|
-
# This method returns the same namespaces as #namespace_scopes.
|
535
|
-
#
|
536
|
-
# Returns namespaces in scope for self -- those defined on self
|
537
|
-
# element directly or any ancestor node -- as a Hash of
|
538
|
-
# attribute-name/value pairs. Note that the keys in this hash
|
539
|
-
# XML attributes that would be used to define this namespace,
|
540
|
-
# such as "xmlns:prefix", not just the prefix. Default namespace
|
541
|
-
# set on self will be included with key "xmlns". However,
|
542
|
-
# default namespaces set on ancestor will NOT be, even if self
|
543
|
-
# has no explicit default namespace.
|
461
|
+
# Get a hash containing the Namespace definitions for this Node
|
544
462
|
def namespaces
|
545
463
|
Hash[*namespace_scopes.map { |nd|
|
546
464
|
key = ['xmlns', nd.prefix].compact.join(':')
|
@@ -648,22 +566,14 @@ module Nokogiri
|
|
648
566
|
end
|
649
567
|
|
650
568
|
###
|
651
|
-
#
|
652
|
-
# The consequence is as an xmlns attribute with supplied argument were
|
653
|
-
# present in parsed XML. A default namespace set with this method will
|
654
|
-
# now show up in #attributes, but when this node is serialized to XML an
|
655
|
-
# "xmlns" attribute will appear. See also #namespace and #namespace=
|
569
|
+
# Set the default namespace for this node to +url+
|
656
570
|
def default_namespace= url
|
657
571
|
add_namespace_definition(nil, url)
|
658
572
|
end
|
659
573
|
alias :add_namespace :add_namespace_definition
|
660
574
|
|
661
575
|
###
|
662
|
-
# Set the
|
663
|
-
# "xmlns=" attribute in XML source), as a Namespace object +ns+. Note that
|
664
|
-
# a Namespace added this way will NOT be serialized as an xmlns attribute
|
665
|
-
# for this node. You probably want #default_namespace= instead, or perhaps
|
666
|
-
# #add_namespace_definition with a nil prefix argument.
|
576
|
+
# Set the namespace for this node to +ns+
|
667
577
|
def namespace= ns
|
668
578
|
return set_namespace(ns) unless ns
|
669
579
|
|
@@ -738,9 +648,13 @@ module Nokogiri
|
|
738
648
|
# use Node#to_xhtml instead.
|
739
649
|
def to_html options = {}
|
740
650
|
# FIXME: this is a hack around broken libxml versions
|
741
|
-
return dump_html if %w[2 6] === LIBXML_VERSION.split('.')[0..1]
|
651
|
+
return dump_html if Nokogiri.uses_libxml? && %w[2 6] === LIBXML_VERSION.split('.')[0..1]
|
652
|
+
|
653
|
+
options[:save_with] ||= SaveOptions::FORMAT |
|
654
|
+
SaveOptions::NO_DECLARATION |
|
655
|
+
SaveOptions::NO_EMPTY_TAGS |
|
656
|
+
SaveOptions::AS_HTML
|
742
657
|
|
743
|
-
options[:save_with] ||= SaveOptions::DEFAULT_HTML
|
744
658
|
serialize(options)
|
745
659
|
end
|
746
660
|
|
@@ -751,7 +665,8 @@ module Nokogiri
|
|
751
665
|
#
|
752
666
|
# See Node#write_to for a list of +options+
|
753
667
|
def to_xml options = {}
|
754
|
-
options[:save_with] ||= SaveOptions::
|
668
|
+
options[:save_with] ||= SaveOptions::FORMAT | SaveOptions::AS_XML
|
669
|
+
|
755
670
|
serialize(options)
|
756
671
|
end
|
757
672
|
|
@@ -763,9 +678,13 @@ module Nokogiri
|
|
763
678
|
# See Node#write_to for a list of +options+
|
764
679
|
def to_xhtml options = {}
|
765
680
|
# FIXME: this is a hack around broken libxml versions
|
766
|
-
return dump_html if %w[2 6] === LIBXML_VERSION.split('.')[0..1]
|
681
|
+
return dump_html if Nokogiri.uses_libxml? && %w[2 6] === LIBXML_VERSION.split('.')[0..1]
|
682
|
+
|
683
|
+
options[:save_with] ||= SaveOptions::FORMAT |
|
684
|
+
SaveOptions::NO_DECLARATION |
|
685
|
+
SaveOptions::NO_EMPTY_TAGS |
|
686
|
+
SaveOptions::AS_XHTML
|
767
687
|
|
768
|
-
options[:save_with] ||= SaveOptions::DEFAULT_XHTML
|
769
688
|
serialize(options)
|
770
689
|
end
|
771
690
|
|
@@ -794,7 +713,7 @@ module Nokogiri
|
|
794
713
|
indent_times = options[:indent] || 2
|
795
714
|
|
796
715
|
|
797
|
-
config = SaveOptions.new(save_options
|
716
|
+
config = SaveOptions.new(save_options)
|
798
717
|
yield config if block_given?
|
799
718
|
|
800
719
|
native_write_to(io, encoding, indent_text * indent_times, config.options)
|
@@ -806,9 +725,12 @@ module Nokogiri
|
|
806
725
|
# See Node#write_to for a list of +options+
|
807
726
|
def write_html_to io, options = {}
|
808
727
|
# FIXME: this is a hack around broken libxml versions
|
809
|
-
return (io << dump_html) if %w[2 6] === LIBXML_VERSION.split('.')[0..1]
|
728
|
+
return (io << dump_html) if Nokogiri.uses_libxml? && %w[2 6] === LIBXML_VERSION.split('.')[0..1]
|
810
729
|
|
811
|
-
options[:save_with] ||= SaveOptions::
|
730
|
+
options[:save_with] ||= SaveOptions::FORMAT |
|
731
|
+
SaveOptions::NO_DECLARATION |
|
732
|
+
SaveOptions::NO_EMPTY_TAGS |
|
733
|
+
SaveOptions::AS_HTML
|
812
734
|
write_to io, options
|
813
735
|
end
|
814
736
|
|
@@ -818,9 +740,12 @@ module Nokogiri
|
|
818
740
|
# See Node#write_to for a list of +options+
|
819
741
|
def write_xhtml_to io, options = {}
|
820
742
|
# FIXME: this is a hack around broken libxml versions
|
821
|
-
return (io << dump_html) if %w[2 6] === LIBXML_VERSION.split('.')[0..1]
|
743
|
+
return (io << dump_html) if Nokogiri.uses_libxml? && %w[2 6] === LIBXML_VERSION.split('.')[0..1]
|
822
744
|
|
823
|
-
options[:save_with] ||= SaveOptions::
|
745
|
+
options[:save_with] ||= SaveOptions::FORMAT |
|
746
|
+
SaveOptions::NO_DECLARATION |
|
747
|
+
SaveOptions::NO_EMPTY_TAGS |
|
748
|
+
SaveOptions::AS_XHTML
|
824
749
|
write_to io, options
|
825
750
|
end
|
826
751
|
|
@@ -831,7 +756,7 @@ module Nokogiri
|
|
831
756
|
#
|
832
757
|
# See Node#write_to for a list of options
|
833
758
|
def write_xml_to io, options = {}
|
834
|
-
options[:save_with] ||= SaveOptions::
|
759
|
+
options[:save_with] ||= SaveOptions::FORMAT | SaveOptions::AS_XML
|
835
760
|
write_to io, options
|
836
761
|
end
|
837
762
|
|
@@ -846,25 +771,7 @@ module Nokogiri
|
|
846
771
|
|
847
772
|
private
|
848
773
|
|
849
|
-
def
|
850
|
-
# Pop off our custom function handler if it exists
|
851
|
-
handler = params.find { |param|
|
852
|
-
![Hash, String, Symbol].include?(param.class)
|
853
|
-
}
|
854
|
-
|
855
|
-
params -= [handler] if handler
|
856
|
-
|
857
|
-
hashes = []
|
858
|
-
hashes << params.pop while Hash === params.last || params.last.nil?
|
859
|
-
|
860
|
-
ns, binds = hashes.reverse
|
861
|
-
|
862
|
-
ns ||= document.root ? document.root.namespaces : {}
|
863
|
-
|
864
|
-
[params, handler, ns, binds]
|
865
|
-
end
|
866
|
-
|
867
|
-
def coerce data # :nodoc:
|
774
|
+
def coerce(data) # :nodoc:
|
868
775
|
return data if data.is_a?(XML::NodeSet)
|
869
776
|
return data.children if data.is_a?(XML::DocumentFragment)
|
870
777
|
return fragment(data).children if data.is_a?(String)
|
@@ -879,10 +786,6 @@ Requires a Node, NodeSet or String argument, and cannot accept a #{data.class}.
|
|
879
786
|
data
|
880
787
|
end
|
881
788
|
|
882
|
-
def implied_xpath_context
|
883
|
-
"./"
|
884
|
-
end
|
885
|
-
|
886
789
|
def inspect_attributes
|
887
790
|
[:name, :namespace, :attribute_nodes, :children]
|
888
791
|
end
|