nokogiri 1.0.6 → 1.0.7
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- data/History.ja.txt +11 -0
- data/History.txt +11 -0
- data/Manifest.txt +7 -0
- data/README.txt +7 -0
- data/Rakefile +43 -2
- data/ext/nokogiri/native.c +1 -0
- data/ext/nokogiri/native.h +1 -2
- data/ext/nokogiri/xml_cdata.c +3 -1
- data/ext/nokogiri/xml_comment.c +42 -0
- data/ext/nokogiri/xml_comment.h +9 -0
- data/ext/nokogiri/xml_document.c +1 -0
- data/ext/nokogiri/xml_node.c +21 -7
- data/ext/nokogiri/xml_sax_parser.c +26 -0
- data/ext/nokogiri/xml_syntax_error.c +4 -0
- data/ext/nokogiri/xml_text.c +16 -1
- data/ext/nokogiri/xml_xpath_context.c +4 -0
- data/ext/nokogiri/xslt_stylesheet.c +1 -1
- data/lib/action-nokogiri.rb +30 -0
- data/lib/nokogiri.rb +25 -4
- data/lib/nokogiri/css.rb +9 -4
- data/lib/nokogiri/css/generated_parser.rb +199 -171
- data/lib/nokogiri/css/parser.rb +11 -0
- data/lib/nokogiri/css/parser.y +16 -0
- data/lib/nokogiri/decorators.rb +1 -0
- data/lib/nokogiri/decorators/slop.rb +31 -0
- data/lib/nokogiri/hpricot.rb +2 -4
- data/lib/nokogiri/version.rb +1 -1
- data/lib/nokogiri/xml.rb +15 -0
- data/lib/nokogiri/xml/builder.rb +1 -1
- data/lib/nokogiri/xml/comment.rb +6 -0
- data/lib/nokogiri/xml/document.rb +22 -8
- data/lib/nokogiri/xml/sax/parser.rb +7 -3
- data/test/css/test_parser.rb +9 -0
- data/test/helper.rb +4 -1
- data/test/hpricot/load_files.rb +8 -4
- data/test/test_css_cache.rb +17 -10
- data/test/test_memory_leak.rb +38 -0
- data/test/test_nokogiri.rb +61 -0
- data/test/xml/sax/test_parser.rb +2 -0
- data/test/xml/test_comment.rb +16 -0
- data/test/xml/test_document.rb +2 -4
- data/test/xml/test_node.rb +13 -0
- data/vendor/hoe.rb +8 -22
- metadata +12 -3
data/lib/nokogiri/css/parser.rb
CHANGED
@@ -12,21 +12,32 @@ module Nokogiri
|
|
12
12
|
def set_cache setting
|
13
13
|
@cache_on = setting ? true : false
|
14
14
|
end
|
15
|
+
|
15
16
|
def cache_on?
|
16
17
|
@cache ||= {}
|
17
18
|
instance_variable_defined?('@cache_on') ? @cache_on : true
|
18
19
|
end
|
20
|
+
|
19
21
|
def check_cache string
|
20
22
|
return unless cache_on?
|
21
23
|
@cache[string]
|
22
24
|
end
|
25
|
+
|
23
26
|
def add_cache string, value
|
24
27
|
return value unless cache_on?
|
25
28
|
@cache[string] = value
|
26
29
|
end
|
30
|
+
|
27
31
|
def clear_cache
|
28
32
|
@cache = {}
|
29
33
|
end
|
34
|
+
|
35
|
+
def without_cache &block
|
36
|
+
tmp = @cache_on
|
37
|
+
@cache_on = false
|
38
|
+
block.call
|
39
|
+
@cache_on = tmp
|
40
|
+
end
|
30
41
|
end
|
31
42
|
alias :parse :scan_str
|
32
43
|
|
data/lib/nokogiri/css/parser.y
CHANGED
@@ -29,6 +29,14 @@ rule
|
|
29
29
|
Node.new(:CONDITIONAL_SELECTOR, [val.first, val[1]])
|
30
30
|
end
|
31
31
|
}
|
32
|
+
| element_name hcap_1toN negation {
|
33
|
+
result = Node.new(:CONDITIONAL_SELECTOR,
|
34
|
+
[
|
35
|
+
val.first,
|
36
|
+
Node.new(:COMBINATOR, [val[1], val.last])
|
37
|
+
]
|
38
|
+
)
|
39
|
+
}
|
32
40
|
| element_name negation {
|
33
41
|
result = Node.new(:CONDITIONAL_SELECTOR, val)
|
34
42
|
}
|
@@ -36,6 +44,14 @@ rule
|
|
36
44
|
| function attrib {
|
37
45
|
result = Node.new(:CONDITIONAL_SELECTOR, val)
|
38
46
|
}
|
47
|
+
| hcap_1toN negation {
|
48
|
+
result = Node.new(:CONDITIONAL_SELECTOR,
|
49
|
+
[
|
50
|
+
Node.new(:ELEMENT_NAME, ['*']),
|
51
|
+
Node.new(:COMBINATOR, val)
|
52
|
+
]
|
53
|
+
)
|
54
|
+
}
|
39
55
|
| hcap_1toN {
|
40
56
|
result = Node.new(:CONDITIONAL_SELECTOR,
|
41
57
|
[Node.new(:ELEMENT_NAME, ['*']), val.first]
|
data/lib/nokogiri/decorators.rb
CHANGED
@@ -0,0 +1,31 @@
|
|
1
|
+
module Nokogiri
|
2
|
+
module Decorators
|
3
|
+
###
|
4
|
+
# The Slop decorator implements method missing such that a methods may be
|
5
|
+
# used instead of XPath or CSS. See Nokogiri.Slop
|
6
|
+
module Slop
|
7
|
+
def method_missing name, *args, &block
|
8
|
+
if args.empty?
|
9
|
+
list = xpath("./#{name}")
|
10
|
+
elsif args.first.is_a? Hash
|
11
|
+
hash = args.first
|
12
|
+
if hash[:css]
|
13
|
+
list = css("#{name}#{hash[:css]}")
|
14
|
+
elsif hash[:xpath]
|
15
|
+
conds = Array(hash[:xpath]).collect{|j| "[#{j}]"}
|
16
|
+
list = xpath("./#{name}#{conds}")
|
17
|
+
end
|
18
|
+
else
|
19
|
+
CSS::Parser.without_cache do
|
20
|
+
list = xpath(
|
21
|
+
*CSS.xpath_for("#{name}#{args.first}", :prefix => "./")
|
22
|
+
)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
super if list.empty?
|
27
|
+
list.length == 1 ? list.first : list
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
data/lib/nokogiri/hpricot.rb
CHANGED
@@ -29,10 +29,8 @@ module Nokogiri
|
|
29
29
|
end
|
30
30
|
|
31
31
|
def add_decorators(doc)
|
32
|
-
doc.decorators
|
33
|
-
doc.decorators
|
34
|
-
doc.decorators['document'] << Decorators::Hpricot::Node
|
35
|
-
doc.decorators['nodeset'] << Decorators::Hpricot::NodeSet
|
32
|
+
doc.decorators(XML::Node) << Decorators::Hpricot::Node
|
33
|
+
doc.decorators(XML::NodeSet) << Decorators::Hpricot::NodeSet
|
36
34
|
doc.decorate!
|
37
35
|
doc
|
38
36
|
end
|
data/lib/nokogiri/version.rb
CHANGED
data/lib/nokogiri/xml.rb
CHANGED
@@ -5,6 +5,7 @@ require 'nokogiri/xml/node'
|
|
5
5
|
require 'nokogiri/xml/dtd'
|
6
6
|
require 'nokogiri/xml/text'
|
7
7
|
require 'nokogiri/xml/cdata'
|
8
|
+
require 'nokogiri/xml/comment'
|
8
9
|
require 'nokogiri/xml/document'
|
9
10
|
require 'nokogiri/xml/node_set'
|
10
11
|
require 'nokogiri/xml/xpath'
|
@@ -18,6 +19,14 @@ require 'nokogiri/xml/entity_declaration'
|
|
18
19
|
|
19
20
|
module Nokogiri
|
20
21
|
class << self
|
22
|
+
###
|
23
|
+
# Parse an XML file. +thing+ may be a String, or any object that
|
24
|
+
# responds to _read_ and _close_ such as an IO, or StringIO.
|
25
|
+
# +url+ is resource where this document is located. +encoding+ is the
|
26
|
+
# encoding that should be used when processing the document. +options+
|
27
|
+
# is a number that sets options in the parser, such as
|
28
|
+
# Nokogiri::XML::PARSE_RECOVER. See the constants in
|
29
|
+
# Nokogiri::XML.
|
21
30
|
def XML thing, url = nil, encoding = nil, options = 1
|
22
31
|
Nokogiri::XML.parse(thing, url, encoding, options)
|
23
32
|
end
|
@@ -43,6 +52,8 @@ module Nokogiri
|
|
43
52
|
PARSE_NOXINCNODE = 1 << 15 # do not generate XINCLUDE START/END nodes
|
44
53
|
|
45
54
|
class << self
|
55
|
+
###
|
56
|
+
# Parse an XML document. See Nokogiri.XML.
|
46
57
|
def parse string_or_io, url = nil, encoding = nil, options = 2159
|
47
58
|
if string_or_io.respond_to?(:read)
|
48
59
|
url ||= string_or_io.respond_to?(:path) ? string_or_io.path : nil
|
@@ -55,10 +66,14 @@ module Nokogiri
|
|
55
66
|
Document.read_memory(string_or_io, url, encoding, options)
|
56
67
|
end
|
57
68
|
|
69
|
+
###
|
70
|
+
# Sets whether or not entities should be substituted.
|
58
71
|
def substitute_entities=(value = true)
|
59
72
|
Document.substitute_entities = value
|
60
73
|
end
|
61
74
|
|
75
|
+
###
|
76
|
+
# Sets whether or not external subsets should be loaded
|
62
77
|
def load_external_subsets=(value = true)
|
63
78
|
Document.load_external_subsets = value
|
64
79
|
end
|
data/lib/nokogiri/xml/builder.rb
CHANGED
@@ -1,10 +1,6 @@
|
|
1
1
|
module Nokogiri
|
2
2
|
module XML
|
3
3
|
class Document < Node
|
4
|
-
def decorators
|
5
|
-
@decorators ||= Hash.new { |h,k| h[k] = [] }
|
6
|
-
end
|
7
|
-
|
8
4
|
def name
|
9
5
|
'document'
|
10
6
|
end
|
@@ -13,13 +9,31 @@ module Nokogiri
|
|
13
9
|
self
|
14
10
|
end
|
15
11
|
|
12
|
+
def decorators(key)
|
13
|
+
@decorators ||= Hash.new
|
14
|
+
@decorators[key] ||= []
|
15
|
+
end
|
16
|
+
|
17
|
+
###
|
18
|
+
# Explore a document with shortcut methods.
|
19
|
+
|
20
|
+
def slop!
|
21
|
+
unless decorators(XML::Node).include? Nokogiri::Decorators::Slop
|
22
|
+
decorators(XML::Node) << Nokogiri::Decorators::Slop
|
23
|
+
decorate!
|
24
|
+
end
|
25
|
+
|
26
|
+
self
|
27
|
+
end
|
28
|
+
|
16
29
|
###
|
17
30
|
# Apply any decorators to +node+
|
18
31
|
def decorate(node)
|
19
|
-
|
20
|
-
decorators
|
21
|
-
node.
|
22
|
-
|
32
|
+
return unless @decorators
|
33
|
+
@decorators.each { |klass,list|
|
34
|
+
next unless node.is_a?(klass)
|
35
|
+
list.each { |moodule| node.extend(moodule) }
|
36
|
+
}
|
23
37
|
end
|
24
38
|
|
25
39
|
def node_cache
|
@@ -11,13 +11,17 @@ module Nokogiri
|
|
11
11
|
# Parse given +thing+ which may be a string containing xml, or an
|
12
12
|
# IO object.
|
13
13
|
def parse thing
|
14
|
-
|
14
|
+
if thing.respond_to?(:read) && thing.respond_to?(:close)
|
15
|
+
parse_io(thing)
|
16
|
+
else
|
17
|
+
parse_memory(thing)
|
18
|
+
end
|
15
19
|
end
|
16
20
|
|
17
21
|
###
|
18
22
|
# Parse given +io+
|
19
|
-
def parse_io io
|
20
|
-
|
23
|
+
def parse_io io, encoding = 0
|
24
|
+
native_parse_io io, encoding
|
21
25
|
end
|
22
26
|
|
23
27
|
###
|
data/test/css/test_parser.rb
CHANGED
@@ -187,6 +187,15 @@ module Nokogiri
|
|
187
187
|
@parser.parse('foo .awesome')
|
188
188
|
end
|
189
189
|
|
190
|
+
def test_not_so_simple_not
|
191
|
+
assert_xpath "//*[@id = 'p' and not(contains(concat(' ', @class, ' '), ' a '))]",
|
192
|
+
@parser.parse('#p:not(.a)')
|
193
|
+
assert_xpath "//p[contains(concat(' ', @class, ' '), ' a ') and not(contains(concat(' ', @class, ' '), ' b '))]",
|
194
|
+
@parser.parse('p.a:not(.b)')
|
195
|
+
assert_xpath "//p[@a = 'foo' and not(contains(concat(' ', @class, ' '), ' b '))]",
|
196
|
+
@parser.parse("p[a='foo']:not(.b)")
|
197
|
+
end
|
198
|
+
|
190
199
|
def test_ident
|
191
200
|
assert_xpath '//x', @parser.parse('x')
|
192
201
|
end
|
data/test/helper.rb
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
$VERBOSE = true
|
1
2
|
require 'test/unit'
|
2
3
|
|
3
4
|
%w(../lib ../ext).each do |path|
|
@@ -13,7 +14,9 @@ module Nokogiri
|
|
13
14
|
XSLT_FILE = File.join(ASSETS_DIR, 'staff.xslt')
|
14
15
|
HTML_FILE = File.join(ASSETS_DIR, 'tlm.html')
|
15
16
|
|
16
|
-
|
17
|
+
unless RUBY_VERSION >= '1.9'
|
18
|
+
undef :default_test
|
19
|
+
end
|
17
20
|
|
18
21
|
def teardown
|
19
22
|
if ENV['NOKOGIRI_GC']
|
data/test/hpricot/load_files.rb
CHANGED
@@ -1,7 +1,11 @@
|
|
1
1
|
module TestFiles
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
2
|
+
Dir.chdir(File.dirname(__FILE__)) do
|
3
|
+
Dir['files/*.{html,xhtml,xml}'].each do |fname|
|
4
|
+
if RUBY_VERSION >= '1.9'
|
5
|
+
const_set fname[%r!/(\w+)\.\w+$!, 1].upcase, File.open(fname, 'r:ascii-8bit') { |f| f.read }
|
6
|
+
else
|
7
|
+
const_set fname[%r!/(\w+)\.\w+$!, 1].upcase, File.read(fname)
|
8
|
+
end
|
6
9
|
end
|
10
|
+
end
|
7
11
|
end
|
data/test/test_css_cache.rb
CHANGED
@@ -1,14 +1,22 @@
|
|
1
1
|
require File.expand_path(File.join(File.dirname(__FILE__), "helper"))
|
2
2
|
|
3
|
-
require 'rubygems'
|
4
|
-
require 'mocha'
|
5
|
-
|
6
3
|
class TestCssCache < Nokogiri::TestCase
|
7
4
|
|
8
5
|
def setup
|
9
6
|
@css = "a1 > b2 > c3"
|
10
7
|
@parse_result = Nokogiri::CSS.parse(@css)
|
11
8
|
@to_xpath_result = @parse_result.map {|ast| ast.to_xpath}
|
9
|
+
Nokogiri::CSS::Parser.class_eval do
|
10
|
+
class << @cache
|
11
|
+
alias :old_bracket :[]
|
12
|
+
attr_reader :count
|
13
|
+
def [](key)
|
14
|
+
@count ||= 0
|
15
|
+
@count += 1
|
16
|
+
old_bracket(key)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
12
20
|
assert Nokogiri::CSS::Parser.cache_on?
|
13
21
|
end
|
14
22
|
|
@@ -19,30 +27,29 @@ class TestCssCache < Nokogiri::TestCase
|
|
19
27
|
|
20
28
|
[ false, true ].each do |cache_setting|
|
21
29
|
define_method "test_css_cache_#{cache_setting ? "true" : "false"}" do
|
22
|
-
times = cache_setting ?
|
30
|
+
times = cache_setting ? 6 : nil
|
31
|
+
|
23
32
|
Nokogiri::CSS::Parser.set_cache cache_setting
|
24
33
|
|
25
|
-
Nokogiri::CSS::Parser.any_instance.expects(:parse).with(@css).returns(@parse_result).times(times)
|
26
|
-
Nokogiri::CSS::Node.any_instance.expects(:to_xpath).returns(@to_xpath_result).times(times)
|
27
|
-
|
28
34
|
Nokogiri::CSS.xpath_for(@css)
|
29
35
|
Nokogiri::CSS.xpath_for(@css)
|
30
36
|
Nokogiri::CSS::Parser.xpath_for(@css)
|
31
37
|
Nokogiri::CSS::Parser.xpath_for(@css)
|
32
38
|
Nokogiri::CSS::Parser.new.xpath_for(@css)
|
33
39
|
Nokogiri::CSS::Parser.new.xpath_for(@css)
|
40
|
+
|
41
|
+
assert_equal(times, Nokogiri::CSS::Parser.class_eval { @cache.count })
|
34
42
|
end
|
35
43
|
|
36
44
|
define_method "test_hpricot_cache_#{cache_setting ? "true" : "false"}" do
|
37
|
-
times = cache_setting ?
|
45
|
+
times = cache_setting ? 2 : nil
|
38
46
|
Nokogiri::CSS::Parser.set_cache cache_setting
|
39
47
|
|
40
48
|
nh = Nokogiri.Hpricot("<html></html>")
|
41
|
-
Nokogiri::CSS::Parser.any_instance.expects(:parse).with(@css).returns(@parse_result).times(times)
|
42
|
-
Nokogiri::CSS::Node.any_instance.expects(:to_xpath).returns(@to_xpath_result).times(times)
|
43
49
|
|
44
50
|
nh.convert_to_xpath(@css)
|
45
51
|
nh.convert_to_xpath(@css)
|
52
|
+
assert_equal(times, Nokogiri::CSS::Parser.class_eval { @cache.count })
|
46
53
|
end
|
47
54
|
end
|
48
55
|
|
@@ -0,0 +1,38 @@
|
|
1
|
+
require File.expand_path(File.join(File.dirname(__FILE__), "helper"))
|
2
|
+
|
3
|
+
class TestMemoryLeak < Nokogiri::TestCase
|
4
|
+
def test_for_memory_leak
|
5
|
+
begin
|
6
|
+
# we don't use Dike in any tests, but requiring it has side effects
|
7
|
+
# that can create memory leaks, and that's what we're testing for.
|
8
|
+
require 'rubygems'
|
9
|
+
require 'dike' # do not remove!
|
10
|
+
|
11
|
+
count_start = count_object_space_documents
|
12
|
+
xml_data = <<-EOS
|
13
|
+
<test>
|
14
|
+
<items>
|
15
|
+
<item>abc</item>
|
16
|
+
<item>1234</item>
|
17
|
+
<item>Zzz</item>
|
18
|
+
<items>
|
19
|
+
</test>
|
20
|
+
EOS
|
21
|
+
10.times do
|
22
|
+
doc = Nokogiri::XML(xml_data)
|
23
|
+
doc.xpath("//item")
|
24
|
+
end
|
25
|
+
2.times { GC.start }
|
26
|
+
count_end = count_object_space_documents
|
27
|
+
assert((count_end - count_start) == 0, "memory leak detected")
|
28
|
+
rescue LoadError
|
29
|
+
puts "\ndike is not installed, skipping memory leak test"
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def count_object_space_documents
|
34
|
+
count = 0
|
35
|
+
ObjectSpace.each_object {|j| count += 1 if j.is_a?(Nokogiri::XML::Document) }
|
36
|
+
count
|
37
|
+
end
|
38
|
+
end
|
data/test/test_nokogiri.rb
CHANGED
@@ -33,4 +33,65 @@ class TestNokogiri < Nokogiri::TestCase
|
|
33
33
|
doc = Nokogiri.make { b "bold tag" }
|
34
34
|
assert_equal('<b>bold tag</b>', doc.to_html.chomp)
|
35
35
|
end
|
36
|
+
|
37
|
+
SLOP_HTML = <<-END
|
38
|
+
<html>
|
39
|
+
<body>
|
40
|
+
<ul>
|
41
|
+
<li class='red'>one</li>
|
42
|
+
<li class='blue'>two</li>
|
43
|
+
</ul>
|
44
|
+
<div>
|
45
|
+
one
|
46
|
+
<div>div two</div>
|
47
|
+
</div>
|
48
|
+
</body>
|
49
|
+
</html>
|
50
|
+
END
|
51
|
+
|
52
|
+
def test_slop_css
|
53
|
+
doc = Nokogiri::Slop(<<-eohtml)
|
54
|
+
<html>
|
55
|
+
<body>
|
56
|
+
<div>
|
57
|
+
one
|
58
|
+
<div class='foo'>
|
59
|
+
div two
|
60
|
+
<div class='foo'>
|
61
|
+
div three
|
62
|
+
</div>
|
63
|
+
</div>
|
64
|
+
</div>
|
65
|
+
</body>
|
66
|
+
</html>
|
67
|
+
eohtml
|
68
|
+
assert_equal "div", doc.html.body.div.div('.foo').name
|
69
|
+
end
|
70
|
+
|
71
|
+
def test_slop
|
72
|
+
doc = Nokogiri::Slop(SLOP_HTML)
|
73
|
+
|
74
|
+
assert_equal "one", doc.html.body.ul.li.first.text
|
75
|
+
assert_equal "two", doc.html.body.ul.li(".blue").text
|
76
|
+
assert_equal "div two", doc.html.body.div.div.text
|
77
|
+
|
78
|
+
assert_equal "two", doc.html.body.ul.li(:css => ".blue").text
|
79
|
+
|
80
|
+
assert_equal "two", doc.html.body.ul.li(:xpath => "position()=2").text
|
81
|
+
assert_equal "one", doc.html.body.ul.li(:xpath => ["contains(text(),'o')"]).first.text
|
82
|
+
assert_equal "two", doc.html.body.ul.li(:xpath => ["contains(text(),'o')","contains(text(),'t')"]).text
|
83
|
+
|
84
|
+
assert_raise(NoMethodError) { doc.nonexistent }
|
85
|
+
end
|
86
|
+
|
87
|
+
def test_slop_decorator
|
88
|
+
doc = Nokogiri(SLOP_HTML)
|
89
|
+
assert !doc.decorators(Nokogiri::XML::Node).include?(Nokogiri::Decorators::Slop)
|
90
|
+
|
91
|
+
doc.slop!
|
92
|
+
assert doc.decorators(Nokogiri::XML::Node).include?(Nokogiri::Decorators::Slop)
|
93
|
+
|
94
|
+
doc.slop!
|
95
|
+
assert_equal 1, doc.decorators(Nokogiri::XML::Node).select { |d| d == Nokogiri::Decorators::Slop }.size
|
96
|
+
end
|
36
97
|
end
|