nokogiri 1.0.7 → 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- data/History.ja.txt +14 -0
- data/History.txt +16 -0
- data/Manifest.txt +5 -0
- data/README.ja.txt +1 -1
- data/README.txt +1 -1
- data/Rakefile +5 -8
- data/ext/nokogiri/extconf.rb +46 -7
- data/ext/nokogiri/html_sax_parser.c +12 -8
- data/ext/nokogiri/xml_node.c +41 -26
- data/ext/nokogiri/xml_node_set.c +3 -0
- data/ext/nokogiri/xml_sax_parser.c +1 -0
- data/ext/nokogiri/xml_xpath_context.c +120 -4
- data/ext/nokogiri/xslt_stylesheet.c +22 -7
- data/lib/nokogiri/css.rb +1 -0
- data/lib/nokogiri/css/generated_parser.rb +142 -122
- data/lib/nokogiri/css/parser.rb +23 -19
- data/lib/nokogiri/css/parser.y +3 -1
- data/lib/nokogiri/css/selector_handler.rb +6 -0
- data/lib/nokogiri/css/xpath_visitor.rb +5 -2
- data/lib/nokogiri/html.rb +10 -0
- data/lib/nokogiri/version.rb +1 -1
- data/lib/nokogiri/xml.rb +1 -0
- data/lib/nokogiri/xml/node.rb +103 -11
- data/lib/nokogiri/xml/node_set.rb +2 -1
- data/lib/nokogiri/xml/xpath_handler.rb +6 -0
- data/lib/nokogiri/xslt.rb +17 -0
- data/test/css/test_parser.rb +6 -6
- data/test/css/test_xpath_visitor.rb +10 -0
- data/test/files/exslt.xml +8 -0
- data/test/files/exslt.xslt +35 -0
- data/test/helper.rb +2 -0
- data/test/hpricot/test_parser.rb +7 -5
- data/test/test_memory_leak.rb +2 -2
- data/test/test_xslt_transforms.rb +64 -0
- data/test/xml/test_node.rb +47 -0
- data/test/xml/test_xpath.rb +98 -0
- data/vendor/hoe.rb +22 -18
- metadata +8 -2
data/lib/nokogiri/css/parser.rb
CHANGED
@@ -1,35 +1,37 @@
|
|
1
|
+
require 'thread'
|
2
|
+
|
1
3
|
module Nokogiri
|
2
4
|
module CSS
|
3
5
|
class Parser < GeneratedTokenizer
|
6
|
+
@cache_on = true
|
7
|
+
@cache = {}
|
8
|
+
@mutex = Mutex.new
|
9
|
+
|
4
10
|
class << self
|
11
|
+
attr_accessor :cache_on
|
12
|
+
alias :cache_on? :cache_on
|
13
|
+
alias :set_cache :cache_on=
|
14
|
+
|
5
15
|
def parse string
|
6
16
|
new.parse(string)
|
7
17
|
end
|
18
|
+
|
8
19
|
def xpath_for string, options={}
|
9
20
|
new.xpath_for(string, options)
|
10
21
|
end
|
11
22
|
|
12
|
-
def
|
13
|
-
@cache_on
|
14
|
-
|
15
|
-
|
16
|
-
def cache_on?
|
17
|
-
@cache ||= {}
|
18
|
-
instance_variable_defined?('@cache_on') ? @cache_on : true
|
19
|
-
end
|
20
|
-
|
21
|
-
def check_cache string
|
22
|
-
return unless cache_on?
|
23
|
-
@cache[string]
|
23
|
+
def [] string
|
24
|
+
return unless @cache_on
|
25
|
+
@mutex.synchronize { @cache[string] }
|
24
26
|
end
|
25
27
|
|
26
|
-
def
|
27
|
-
return value unless cache_on
|
28
|
-
@cache[string] = value
|
28
|
+
def []= string, value
|
29
|
+
return value unless @cache_on
|
30
|
+
@mutex.synchronize { @cache[string] = value }
|
29
31
|
end
|
30
32
|
|
31
33
|
def clear_cache
|
32
|
-
@cache = {}
|
34
|
+
@mutex.synchronize { @cache = {} }
|
33
35
|
end
|
34
36
|
|
35
37
|
def without_cache &block
|
@@ -42,13 +44,15 @@ module Nokogiri
|
|
42
44
|
alias :parse :scan_str
|
43
45
|
|
44
46
|
def xpath_for string, options={}
|
45
|
-
v = self.class
|
46
|
-
return v
|
47
|
+
v = self.class[string]
|
48
|
+
return v if v
|
47
49
|
|
48
50
|
prefix = options[:prefix] || nil
|
49
51
|
visitor = options[:visitor] || nil
|
50
52
|
args = [prefix, visitor]
|
51
|
-
self.class
|
53
|
+
self.class[string] = parse(string).map { |ast|
|
54
|
+
ast.to_xpath(prefix, visitor)
|
55
|
+
}
|
52
56
|
end
|
53
57
|
|
54
58
|
def on_error error_token_id, error_value, value_stack
|
data/lib/nokogiri/css/parser.y
CHANGED
@@ -29,8 +29,11 @@ module Nokogiri
|
|
29
29
|
"position() > #{node.value[1]}"
|
30
30
|
when /^only-child\(/
|
31
31
|
"last() = 1"
|
32
|
+
when /^comment\(/
|
33
|
+
"comment()"
|
32
34
|
else
|
33
|
-
|
35
|
+
args = ['.'] + node.value[1..-1]
|
36
|
+
"#{node.value.first}#{args.join(', ')})"
|
34
37
|
end
|
35
38
|
end
|
36
39
|
|
@@ -106,7 +109,7 @@ module Nokogiri
|
|
106
109
|
when "parent" then "node()"
|
107
110
|
when "root" then "not(parent::*)"
|
108
111
|
else
|
109
|
-
|
112
|
+
node.value.first + "(.)"
|
110
113
|
end
|
111
114
|
end
|
112
115
|
end
|
data/lib/nokogiri/html.rb
CHANGED
@@ -3,6 +3,14 @@ require 'nokogiri/html/sax/parser'
|
|
3
3
|
|
4
4
|
module Nokogiri
|
5
5
|
class << self
|
6
|
+
###
|
7
|
+
# Parse HTML. +thing+ may be a String, or any object that
|
8
|
+
# responds to _read_ and _close_ such as an IO, or StringIO.
|
9
|
+
# +url+ is resource where this document is located. +encoding+ is the
|
10
|
+
# encoding that should be used when processing the document. +options+
|
11
|
+
# is a number that sets options in the parser, such as
|
12
|
+
# Nokogiri::XML::PARSE_RECOVER. See the constants in
|
13
|
+
# Nokogiri::XML.
|
6
14
|
def HTML thing, url = nil, encoding = nil, options = 2145
|
7
15
|
Nokogiri::HTML.parse(thing, url, encoding, options)
|
8
16
|
end
|
@@ -17,6 +25,8 @@ module Nokogiri
|
|
17
25
|
PARSE_NONET = 1 << 11 # No network access
|
18
26
|
|
19
27
|
class << self
|
28
|
+
###
|
29
|
+
# Parse HTML. See Nokogiri.HTML.
|
20
30
|
def parse string_or_io, url = nil, encoding = nil, options = 2145
|
21
31
|
if string_or_io.respond_to?(:read)
|
22
32
|
url ||= string_or_io.respond_to?(:path) ? string_or_io.path : nil
|
data/lib/nokogiri/version.rb
CHANGED
data/lib/nokogiri/xml.rb
CHANGED
data/lib/nokogiri/xml/node.rb
CHANGED
@@ -1,17 +1,26 @@
|
|
1
1
|
module Nokogiri
|
2
2
|
module XML
|
3
3
|
class Node
|
4
|
+
ELEMENT_NODE = 1
|
5
|
+
ATTRIBUTE_NODE = 2
|
6
|
+
TEXT_NODE = 3
|
4
7
|
CDATA_SECTION_NODE = 4
|
5
|
-
|
6
|
-
|
8
|
+
ENTITY_REF_NODE = 5
|
9
|
+
ENTITY_NODE = 6
|
10
|
+
PI_NODE = 7
|
11
|
+
COMMENT_NODE = 8
|
12
|
+
DOCUMENT_NODE = 9
|
13
|
+
DOCUMENT_TYPE_NODE = 10
|
14
|
+
DOCUMENT_FRAG_NODE = 11
|
15
|
+
NOTATION_NODE = 12
|
7
16
|
HTML_DOCUMENT_NODE = 13
|
8
|
-
DTD_NODE =
|
9
|
-
ELEMENT_DECL =
|
10
|
-
ATTRIBUTE_DECL =
|
11
|
-
ENTITY_DECL =
|
12
|
-
NAMESPACE_DECL =
|
13
|
-
XINCLUDE_START =
|
14
|
-
XINCLUDE_END =
|
17
|
+
DTD_NODE = 14
|
18
|
+
ELEMENT_DECL = 15
|
19
|
+
ATTRIBUTE_DECL = 16
|
20
|
+
ENTITY_DECL = 17
|
21
|
+
NAMESPACE_DECL = 18
|
22
|
+
XINCLUDE_START = 19
|
23
|
+
XINCLUDE_END = 20
|
15
24
|
DOCB_DOCUMENT_NODE = 21
|
16
25
|
|
17
26
|
attr_accessor :document
|
@@ -50,7 +59,26 @@ module Nokogiri
|
|
50
59
|
end
|
51
60
|
alias :/ :search
|
52
61
|
|
62
|
+
###
|
63
|
+
# Search this node for XPath +paths+. +paths+ must be one or more XPath
|
64
|
+
# queries. A hash of namespaces may be appended. For example:
|
65
|
+
#
|
66
|
+
# node.xpath('.//title')
|
67
|
+
# node.xpath('.//foo:name', { 'foo' => 'http://example.org/' })
|
68
|
+
# node.xpath('.//xmlns:name', node.root.namespaces)
|
69
|
+
#
|
70
|
+
# Custom XPath functions may also be defined. To define custom functions
|
71
|
+
# create a class which subclasses XPathHandler and implement the
|
72
|
+
# function you want to define. For example:
|
73
|
+
#
|
74
|
+
# node.xpath('.//title[regex(., "\w+")]', Class.new(XPathHandler) {
|
75
|
+
# def regex node_set, regex
|
76
|
+
# node_set.find_all { |node| node['some_attribute'] =~ /#{regex}/ }
|
77
|
+
# end
|
78
|
+
# })
|
79
|
+
#
|
53
80
|
def xpath *paths
|
81
|
+
handler = paths.last.is_a?(XPathHandler) ? paths.pop : nil
|
54
82
|
ns = paths.last.is_a?(Hash) ? paths.pop : {}
|
55
83
|
|
56
84
|
return NodeSet.new(document) unless document.root
|
@@ -58,7 +86,7 @@ module Nokogiri
|
|
58
86
|
sets = paths.map { |path|
|
59
87
|
ctx = XPathContext.new(self)
|
60
88
|
ctx.register_namespaces(ns)
|
61
|
-
set = ctx.evaluate(path).node_set
|
89
|
+
set = ctx.evaluate(path, handler).node_set
|
62
90
|
set.document = document
|
63
91
|
document.decorate(set)
|
64
92
|
set
|
@@ -75,8 +103,35 @@ module Nokogiri
|
|
75
103
|
end
|
76
104
|
end
|
77
105
|
|
106
|
+
###
|
107
|
+
# Search this node for CSS +rules+. +rules+ must be one or more CSS
|
108
|
+
# selectors. For example:
|
109
|
+
#
|
110
|
+
# node.css('title')
|
111
|
+
# node.css('body h1.bold')
|
112
|
+
# node.css('div + p.green', 'div#one')
|
113
|
+
#
|
114
|
+
# Custom CSS pseudo classes may also be defined. To define custom pseudo
|
115
|
+
# classes, create a class which subclasses SelectorHandler and implement
|
116
|
+
# the the custom pseudo class you want defined. The first argument to
|
117
|
+
# the method will be the current matching NodeSet. Any other arguments
|
118
|
+
# are ones that you pass in. For example:
|
119
|
+
#
|
120
|
+
# node.css('title:regex("\w+")', Class.new(SelectorHandler) {
|
121
|
+
# def regex node_set, regex
|
122
|
+
# node_set.find_all { |node| node['some_attribute'] =~ /#{regex}/ }
|
123
|
+
# end
|
124
|
+
# })
|
125
|
+
#
|
78
126
|
def css *rules
|
79
|
-
|
127
|
+
handler = rules.last.is_a?(XPathHandler) ? rules.pop : nil
|
128
|
+
ns = rules.last.is_a?(Hash) ? rules.pop : {}
|
129
|
+
|
130
|
+
rules = rules.map { |rule|
|
131
|
+
CSS.xpath_for(rule, :prefix => ".//")
|
132
|
+
}.flatten.uniq + [ns, handler].compact
|
133
|
+
|
134
|
+
xpath(*rules)
|
80
135
|
end
|
81
136
|
|
82
137
|
def at path, ns = {}
|
@@ -143,6 +198,17 @@ module Nokogiri
|
|
143
198
|
self.native_content = encode_special_chars(string)
|
144
199
|
end
|
145
200
|
|
201
|
+
###
|
202
|
+
# Set the parent Node for this Node
|
203
|
+
def parent= parent_node
|
204
|
+
parent_node.add_child(self)
|
205
|
+
parent_node
|
206
|
+
end
|
207
|
+
|
208
|
+
def << child
|
209
|
+
add_child child
|
210
|
+
end
|
211
|
+
|
146
212
|
def comment?
|
147
213
|
type == COMMENT_NODE
|
148
214
|
end
|
@@ -182,12 +248,38 @@ module Nokogiri
|
|
182
248
|
ns
|
183
249
|
end
|
184
250
|
|
251
|
+
###
|
252
|
+
# Get a list of ancestor Node for this Node
|
253
|
+
def ancestors
|
254
|
+
parents = []
|
255
|
+
|
256
|
+
this_parent = self.parent
|
257
|
+
|
258
|
+
while this_parent != nil
|
259
|
+
parents << this_parent
|
260
|
+
this_parent = this_parent.parent
|
261
|
+
end
|
262
|
+
parents
|
263
|
+
end
|
264
|
+
|
185
265
|
####
|
186
266
|
# Yields self and all children to +block+ recursively.
|
187
267
|
def traverse(&block)
|
188
268
|
children.each{|j| j.traverse(&block) }
|
189
269
|
block.call(self)
|
190
270
|
end
|
271
|
+
|
272
|
+
####
|
273
|
+
# replace node with the new node in the document.
|
274
|
+
def replace(new_node)
|
275
|
+
if new_node.is_a?(Document)
|
276
|
+
raise ArgumentError, <<-EOERR
|
277
|
+
Node.replace requires a Node argument, and cannot accept a Document.
|
278
|
+
(You probably want to select a node from the Document with at() or search(), or create a new Node via Node.new().)
|
279
|
+
EOERR
|
280
|
+
end
|
281
|
+
replace_with_node new_node
|
282
|
+
end
|
191
283
|
end
|
192
284
|
end
|
193
285
|
end
|
data/lib/nokogiri/xslt.rb
CHANGED
@@ -6,6 +6,23 @@ module Nokogiri
|
|
6
6
|
def parse(string)
|
7
7
|
Stylesheet.parse_stylesheet_doc(XML.parse(string))
|
8
8
|
end
|
9
|
+
|
10
|
+
def quote_params params
|
11
|
+
parray = (params.instance_of?(Hash) ? params.to_a.flatten : params).dup
|
12
|
+
parray.each_with_index do |v,i|
|
13
|
+
if i % 2 > 0
|
14
|
+
parray[i]=
|
15
|
+
if v =~ /'/
|
16
|
+
"concat('#{ v.gsub(/'/, %q{', "'", '}) }')"
|
17
|
+
else
|
18
|
+
"'#{v}'";
|
19
|
+
end
|
20
|
+
else
|
21
|
+
parray[i] = v.to_s
|
22
|
+
end
|
23
|
+
end
|
24
|
+
parray.flatten
|
25
|
+
end
|
9
26
|
end
|
10
27
|
end
|
11
28
|
end
|
data/test/css/test_parser.rb
CHANGED
@@ -158,15 +158,15 @@ module Nokogiri
|
|
158
158
|
end
|
159
159
|
|
160
160
|
def test_pseudo_class_no_ident
|
161
|
-
assert_xpath "//*[
|
161
|
+
assert_xpath "//*[link(.)]", @parser.parse(':link')
|
162
162
|
end
|
163
163
|
|
164
164
|
def test_pseudo_class
|
165
|
-
assert_xpath "//a[
|
166
|
-
assert_xpath "//a[
|
167
|
-
assert_xpath "//a[
|
168
|
-
assert_xpath "//a[
|
169
|
-
assert_xpath "//a[
|
165
|
+
assert_xpath "//a[link(.)]", @parser.parse('a:link')
|
166
|
+
assert_xpath "//a[visited(.)]", @parser.parse('a:visited')
|
167
|
+
assert_xpath "//a[hover(.)]", @parser.parse('a:hover')
|
168
|
+
assert_xpath "//a[active(.)]", @parser.parse('a:active')
|
169
|
+
assert_xpath "//a[active(.) and contains(concat(' ', @class, ' '), ' foo ')]",
|
170
170
|
@parser.parse('a:active.foo')
|
171
171
|
end
|
172
172
|
|
@@ -7,6 +7,16 @@ module Nokogiri
|
|
7
7
|
@parser = Nokogiri::CSS::Parser.new
|
8
8
|
end
|
9
9
|
|
10
|
+
def test_unknown_psuedo_classes_get_pushed_down
|
11
|
+
assert_xpath("//a[aaron(.)]", @parser.parse('a:aaron'))
|
12
|
+
end
|
13
|
+
|
14
|
+
def test_unknown_functions_get_dot_plus_args
|
15
|
+
assert_xpath("//a[aaron(.)]", @parser.parse('a:aaron()'))
|
16
|
+
assert_xpath("//a[aaron(., 12)]", @parser.parse('a:aaron(12)'))
|
17
|
+
assert_xpath("//a[aaron(., 12, 1)]", @parser.parse('a:aaron(12, 1)'))
|
18
|
+
end
|
19
|
+
|
10
20
|
def test_class_selectors
|
11
21
|
assert_xpath "//*[contains(concat(' ', @class, ' '), ' red ')]",
|
12
22
|
@parser.parse(".red")
|
@@ -0,0 +1,35 @@
|
|
1
|
+
<?xml version="1.0" encoding="ISO-8859-1"?>
|
2
|
+
|
3
|
+
<xsl:stylesheet version="1.0"
|
4
|
+
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
|
5
|
+
xmlns:func="http://exslt.org/functions"
|
6
|
+
xmlns:my="urn:my-functions"
|
7
|
+
xmlns:date="http://exslt.org/dates-and-times"
|
8
|
+
xmlns:math="http://exslt.org/math"
|
9
|
+
extension-element-prefixes="func date"
|
10
|
+
>
|
11
|
+
|
12
|
+
<xsl:param name="p1"/>
|
13
|
+
<xsl:param name="p2"/>
|
14
|
+
<xsl:param name="p3"/>
|
15
|
+
<xsl:param name="p4"/>
|
16
|
+
|
17
|
+
<xsl:template match="/">
|
18
|
+
<root>
|
19
|
+
<function><xsl:value-of select="my:func()"/></function>
|
20
|
+
<date><xsl:value-of select="date:date()"/></date>
|
21
|
+
<max><xsl:value-of select="math:max(//max/value)"/></max>
|
22
|
+
<params>
|
23
|
+
<p1><xsl:value-of select="$p1"/></p1>
|
24
|
+
<p2><xsl:value-of select="$p2"/></p2>
|
25
|
+
<p3><xsl:value-of select="$p3"/></p3>
|
26
|
+
<p4><xsl:value-of select="$p4"/></p4>
|
27
|
+
</params>
|
28
|
+
</root>
|
29
|
+
</xsl:template>
|
30
|
+
|
31
|
+
<func:function name="my:func">
|
32
|
+
<func:result select="'func-result'"/>
|
33
|
+
</func:function>
|
34
|
+
|
35
|
+
</xsl:stylesheet>
|
data/test/helper.rb
CHANGED
@@ -12,6 +12,8 @@ module Nokogiri
|
|
12
12
|
ASSETS_DIR = File.join(File.dirname(__FILE__), 'files')
|
13
13
|
XML_FILE = File.join(ASSETS_DIR, 'staff.xml')
|
14
14
|
XSLT_FILE = File.join(ASSETS_DIR, 'staff.xslt')
|
15
|
+
EXSLT_FILE = File.join(ASSETS_DIR, 'exslt.xslt')
|
16
|
+
EXML_FILE = File.join(ASSETS_DIR, 'exslt.xml')
|
15
17
|
HTML_FILE = File.join(ASSETS_DIR, 'tlm.html')
|
16
18
|
|
17
19
|
unless RUBY_VERSION >= '1.9'
|