nokogiri 1.0.6 → 1.0.7

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (44) hide show
  1. data/History.ja.txt +11 -0
  2. data/History.txt +11 -0
  3. data/Manifest.txt +7 -0
  4. data/README.txt +7 -0
  5. data/Rakefile +43 -2
  6. data/ext/nokogiri/native.c +1 -0
  7. data/ext/nokogiri/native.h +1 -2
  8. data/ext/nokogiri/xml_cdata.c +3 -1
  9. data/ext/nokogiri/xml_comment.c +42 -0
  10. data/ext/nokogiri/xml_comment.h +9 -0
  11. data/ext/nokogiri/xml_document.c +1 -0
  12. data/ext/nokogiri/xml_node.c +21 -7
  13. data/ext/nokogiri/xml_sax_parser.c +26 -0
  14. data/ext/nokogiri/xml_syntax_error.c +4 -0
  15. data/ext/nokogiri/xml_text.c +16 -1
  16. data/ext/nokogiri/xml_xpath_context.c +4 -0
  17. data/ext/nokogiri/xslt_stylesheet.c +1 -1
  18. data/lib/action-nokogiri.rb +30 -0
  19. data/lib/nokogiri.rb +25 -4
  20. data/lib/nokogiri/css.rb +9 -4
  21. data/lib/nokogiri/css/generated_parser.rb +199 -171
  22. data/lib/nokogiri/css/parser.rb +11 -0
  23. data/lib/nokogiri/css/parser.y +16 -0
  24. data/lib/nokogiri/decorators.rb +1 -0
  25. data/lib/nokogiri/decorators/slop.rb +31 -0
  26. data/lib/nokogiri/hpricot.rb +2 -4
  27. data/lib/nokogiri/version.rb +1 -1
  28. data/lib/nokogiri/xml.rb +15 -0
  29. data/lib/nokogiri/xml/builder.rb +1 -1
  30. data/lib/nokogiri/xml/comment.rb +6 -0
  31. data/lib/nokogiri/xml/document.rb +22 -8
  32. data/lib/nokogiri/xml/sax/parser.rb +7 -3
  33. data/test/css/test_parser.rb +9 -0
  34. data/test/helper.rb +4 -1
  35. data/test/hpricot/load_files.rb +8 -4
  36. data/test/test_css_cache.rb +17 -10
  37. data/test/test_memory_leak.rb +38 -0
  38. data/test/test_nokogiri.rb +61 -0
  39. data/test/xml/sax/test_parser.rb +2 -0
  40. data/test/xml/test_comment.rb +16 -0
  41. data/test/xml/test_document.rb +2 -4
  42. data/test/xml/test_node.rb +13 -0
  43. data/vendor/hoe.rb +8 -22
  44. metadata +12 -3
@@ -12,21 +12,32 @@ module Nokogiri
12
12
  def set_cache setting
13
13
  @cache_on = setting ? true : false
14
14
  end
15
+
15
16
  def cache_on?
16
17
  @cache ||= {}
17
18
  instance_variable_defined?('@cache_on') ? @cache_on : true
18
19
  end
20
+
19
21
  def check_cache string
20
22
  return unless cache_on?
21
23
  @cache[string]
22
24
  end
25
+
23
26
  def add_cache string, value
24
27
  return value unless cache_on?
25
28
  @cache[string] = value
26
29
  end
30
+
27
31
  def clear_cache
28
32
  @cache = {}
29
33
  end
34
+
35
+ def without_cache &block
36
+ tmp = @cache_on
37
+ @cache_on = false
38
+ block.call
39
+ @cache_on = tmp
40
+ end
30
41
  end
31
42
  alias :parse :scan_str
32
43
 
@@ -29,6 +29,14 @@ rule
29
29
  Node.new(:CONDITIONAL_SELECTOR, [val.first, val[1]])
30
30
  end
31
31
  }
32
+ | element_name hcap_1toN negation {
33
+ result = Node.new(:CONDITIONAL_SELECTOR,
34
+ [
35
+ val.first,
36
+ Node.new(:COMBINATOR, [val[1], val.last])
37
+ ]
38
+ )
39
+ }
32
40
  | element_name negation {
33
41
  result = Node.new(:CONDITIONAL_SELECTOR, val)
34
42
  }
@@ -36,6 +44,14 @@ rule
36
44
  | function attrib {
37
45
  result = Node.new(:CONDITIONAL_SELECTOR, val)
38
46
  }
47
+ | hcap_1toN negation {
48
+ result = Node.new(:CONDITIONAL_SELECTOR,
49
+ [
50
+ Node.new(:ELEMENT_NAME, ['*']),
51
+ Node.new(:COMBINATOR, val)
52
+ ]
53
+ )
54
+ }
39
55
  | hcap_1toN {
40
56
  result = Node.new(:CONDITIONAL_SELECTOR,
41
57
  [Node.new(:ELEMENT_NAME, ['*']), val.first]
@@ -1 +1,2 @@
1
1
  require 'nokogiri/decorators/hpricot'
2
+ require 'nokogiri/decorators/slop'
@@ -0,0 +1,31 @@
1
+ module Nokogiri
2
+ module Decorators
3
+ ###
4
+ # The Slop decorator implements method missing such that a methods may be
5
+ # used instead of XPath or CSS. See Nokogiri.Slop
6
+ module Slop
7
+ def method_missing name, *args, &block
8
+ if args.empty?
9
+ list = xpath("./#{name}")
10
+ elsif args.first.is_a? Hash
11
+ hash = args.first
12
+ if hash[:css]
13
+ list = css("#{name}#{hash[:css]}")
14
+ elsif hash[:xpath]
15
+ conds = Array(hash[:xpath]).collect{|j| "[#{j}]"}
16
+ list = xpath("./#{name}#{conds}")
17
+ end
18
+ else
19
+ CSS::Parser.without_cache do
20
+ list = xpath(
21
+ *CSS.xpath_for("#{name}#{args.first}", :prefix => "./")
22
+ )
23
+ end
24
+ end
25
+
26
+ super if list.empty?
27
+ list.length == 1 ? list.first : list
28
+ end
29
+ end
30
+ end
31
+ end
@@ -29,10 +29,8 @@ module Nokogiri
29
29
  end
30
30
 
31
31
  def add_decorators(doc)
32
- doc.decorators['node'] << Decorators::Hpricot::Node
33
- doc.decorators['element'] << Decorators::Hpricot::Node
34
- doc.decorators['document'] << Decorators::Hpricot::Node
35
- doc.decorators['nodeset'] << Decorators::Hpricot::NodeSet
32
+ doc.decorators(XML::Node) << Decorators::Hpricot::Node
33
+ doc.decorators(XML::NodeSet) << Decorators::Hpricot::NodeSet
36
34
  doc.decorate!
37
35
  doc
38
36
  end
@@ -1,3 +1,3 @@
1
1
  module Nokogiri
2
- VERSION = '1.0.6'
2
+ VERSION = '1.0.7'
3
3
  end
@@ -5,6 +5,7 @@ require 'nokogiri/xml/node'
5
5
  require 'nokogiri/xml/dtd'
6
6
  require 'nokogiri/xml/text'
7
7
  require 'nokogiri/xml/cdata'
8
+ require 'nokogiri/xml/comment'
8
9
  require 'nokogiri/xml/document'
9
10
  require 'nokogiri/xml/node_set'
10
11
  require 'nokogiri/xml/xpath'
@@ -18,6 +19,14 @@ require 'nokogiri/xml/entity_declaration'
18
19
 
19
20
  module Nokogiri
20
21
  class << self
22
+ ###
23
+ # Parse an XML file. +thing+ may be a String, or any object that
24
+ # responds to _read_ and _close_ such as an IO, or StringIO.
25
+ # +url+ is resource where this document is located. +encoding+ is the
26
+ # encoding that should be used when processing the document. +options+
27
+ # is a number that sets options in the parser, such as
28
+ # Nokogiri::XML::PARSE_RECOVER. See the constants in
29
+ # Nokogiri::XML.
21
30
  def XML thing, url = nil, encoding = nil, options = 1
22
31
  Nokogiri::XML.parse(thing, url, encoding, options)
23
32
  end
@@ -43,6 +52,8 @@ module Nokogiri
43
52
  PARSE_NOXINCNODE = 1 << 15 # do not generate XINCLUDE START/END nodes
44
53
 
45
54
  class << self
55
+ ###
56
+ # Parse an XML document. See Nokogiri.XML.
46
57
  def parse string_or_io, url = nil, encoding = nil, options = 2159
47
58
  if string_or_io.respond_to?(:read)
48
59
  url ||= string_or_io.respond_to?(:path) ? string_or_io.path : nil
@@ -55,10 +66,14 @@ module Nokogiri
55
66
  Document.read_memory(string_or_io, url, encoding, options)
56
67
  end
57
68
 
69
+ ###
70
+ # Sets whether or not entities should be substituted.
58
71
  def substitute_entities=(value = true)
59
72
  Document.substitute_entities = value
60
73
  end
61
74
 
75
+ ###
76
+ # Sets whether or not external subsets should be loaded
62
77
  def load_external_subsets=(value = true)
63
78
  Document.load_external_subsets = value
64
79
  end
@@ -18,7 +18,7 @@ module Nokogiri
18
18
  end
19
19
 
20
20
  def cdata(string)
21
- node = Nokogiri::XML::CData.new(@doc, string)
21
+ node = Nokogiri::XML::CDATA.new(@doc, string)
22
22
  insert(node)
23
23
  end
24
24
 
@@ -0,0 +1,6 @@
1
+ module Nokogiri
2
+ module XML
3
+ class Comment < Node
4
+ end
5
+ end
6
+ end
@@ -1,10 +1,6 @@
1
1
  module Nokogiri
2
2
  module XML
3
3
  class Document < Node
4
- def decorators
5
- @decorators ||= Hash.new { |h,k| h[k] = [] }
6
- end
7
-
8
4
  def name
9
5
  'document'
10
6
  end
@@ -13,13 +9,31 @@ module Nokogiri
13
9
  self
14
10
  end
15
11
 
12
+ def decorators(key)
13
+ @decorators ||= Hash.new
14
+ @decorators[key] ||= []
15
+ end
16
+
17
+ ###
18
+ # Explore a document with shortcut methods.
19
+
20
+ def slop!
21
+ unless decorators(XML::Node).include? Nokogiri::Decorators::Slop
22
+ decorators(XML::Node) << Nokogiri::Decorators::Slop
23
+ decorate!
24
+ end
25
+
26
+ self
27
+ end
28
+
16
29
  ###
17
30
  # Apply any decorators to +node+
18
31
  def decorate(node)
19
- key = node.class.name.split('::').last.downcase
20
- decorators[key].each do |klass|
21
- node.extend(klass)
22
- end
32
+ return unless @decorators
33
+ @decorators.each { |klass,list|
34
+ next unless node.is_a?(klass)
35
+ list.each { |moodule| node.extend(moodule) }
36
+ }
23
37
  end
24
38
 
25
39
  def node_cache
@@ -11,13 +11,17 @@ module Nokogiri
11
11
  # Parse given +thing+ which may be a string containing xml, or an
12
12
  # IO object.
13
13
  def parse thing
14
- parse_memory(thing.is_a?(IO) ? thing.read : thing)
14
+ if thing.respond_to?(:read) && thing.respond_to?(:close)
15
+ parse_io(thing)
16
+ else
17
+ parse_memory(thing)
18
+ end
15
19
  end
16
20
 
17
21
  ###
18
22
  # Parse given +io+
19
- def parse_io io
20
- parse_memory io.read
23
+ def parse_io io, encoding = 0
24
+ native_parse_io io, encoding
21
25
  end
22
26
 
23
27
  ###
@@ -187,6 +187,15 @@ module Nokogiri
187
187
  @parser.parse('foo .awesome')
188
188
  end
189
189
 
190
+ def test_not_so_simple_not
191
+ assert_xpath "//*[@id = 'p' and not(contains(concat(' ', @class, ' '), ' a '))]",
192
+ @parser.parse('#p:not(.a)')
193
+ assert_xpath "//p[contains(concat(' ', @class, ' '), ' a ') and not(contains(concat(' ', @class, ' '), ' b '))]",
194
+ @parser.parse('p.a:not(.b)')
195
+ assert_xpath "//p[@a = 'foo' and not(contains(concat(' ', @class, ' '), ' b '))]",
196
+ @parser.parse("p[a='foo']:not(.b)")
197
+ end
198
+
190
199
  def test_ident
191
200
  assert_xpath '//x', @parser.parse('x')
192
201
  end
@@ -1,3 +1,4 @@
1
+ $VERBOSE = true
1
2
  require 'test/unit'
2
3
 
3
4
  %w(../lib ../ext).each do |path|
@@ -13,7 +14,9 @@ module Nokogiri
13
14
  XSLT_FILE = File.join(ASSETS_DIR, 'staff.xslt')
14
15
  HTML_FILE = File.join(ASSETS_DIR, 'tlm.html')
15
16
 
16
- undef :default_test
17
+ unless RUBY_VERSION >= '1.9'
18
+ undef :default_test
19
+ end
17
20
 
18
21
  def teardown
19
22
  if ENV['NOKOGIRI_GC']
@@ -1,7 +1,11 @@
1
1
  module TestFiles
2
- Dir.chdir(File.dirname(__FILE__)) do
3
- Dir['files/*.{html,xhtml,xml}'].each do |fname|
4
- const_set fname[%r!/(\w+)\.\w+$!, 1].upcase, IO.read(fname)
5
- end
2
+ Dir.chdir(File.dirname(__FILE__)) do
3
+ Dir['files/*.{html,xhtml,xml}'].each do |fname|
4
+ if RUBY_VERSION >= '1.9'
5
+ const_set fname[%r!/(\w+)\.\w+$!, 1].upcase, File.open(fname, 'r:ascii-8bit') { |f| f.read }
6
+ else
7
+ const_set fname[%r!/(\w+)\.\w+$!, 1].upcase, File.read(fname)
8
+ end
6
9
  end
10
+ end
7
11
  end
@@ -1,14 +1,22 @@
1
1
  require File.expand_path(File.join(File.dirname(__FILE__), "helper"))
2
2
 
3
- require 'rubygems'
4
- require 'mocha'
5
-
6
3
  class TestCssCache < Nokogiri::TestCase
7
4
 
8
5
  def setup
9
6
  @css = "a1 > b2 > c3"
10
7
  @parse_result = Nokogiri::CSS.parse(@css)
11
8
  @to_xpath_result = @parse_result.map {|ast| ast.to_xpath}
9
+ Nokogiri::CSS::Parser.class_eval do
10
+ class << @cache
11
+ alias :old_bracket :[]
12
+ attr_reader :count
13
+ def [](key)
14
+ @count ||= 0
15
+ @count += 1
16
+ old_bracket(key)
17
+ end
18
+ end
19
+ end
12
20
  assert Nokogiri::CSS::Parser.cache_on?
13
21
  end
14
22
 
@@ -19,30 +27,29 @@ class TestCssCache < Nokogiri::TestCase
19
27
 
20
28
  [ false, true ].each do |cache_setting|
21
29
  define_method "test_css_cache_#{cache_setting ? "true" : "false"}" do
22
- times = cache_setting ? 1 : 6
30
+ times = cache_setting ? 6 : nil
31
+
23
32
  Nokogiri::CSS::Parser.set_cache cache_setting
24
33
 
25
- Nokogiri::CSS::Parser.any_instance.expects(:parse).with(@css).returns(@parse_result).times(times)
26
- Nokogiri::CSS::Node.any_instance.expects(:to_xpath).returns(@to_xpath_result).times(times)
27
-
28
34
  Nokogiri::CSS.xpath_for(@css)
29
35
  Nokogiri::CSS.xpath_for(@css)
30
36
  Nokogiri::CSS::Parser.xpath_for(@css)
31
37
  Nokogiri::CSS::Parser.xpath_for(@css)
32
38
  Nokogiri::CSS::Parser.new.xpath_for(@css)
33
39
  Nokogiri::CSS::Parser.new.xpath_for(@css)
40
+
41
+ assert_equal(times, Nokogiri::CSS::Parser.class_eval { @cache.count })
34
42
  end
35
43
 
36
44
  define_method "test_hpricot_cache_#{cache_setting ? "true" : "false"}" do
37
- times = cache_setting ? 1 : 2
45
+ times = cache_setting ? 2 : nil
38
46
  Nokogiri::CSS::Parser.set_cache cache_setting
39
47
 
40
48
  nh = Nokogiri.Hpricot("<html></html>")
41
- Nokogiri::CSS::Parser.any_instance.expects(:parse).with(@css).returns(@parse_result).times(times)
42
- Nokogiri::CSS::Node.any_instance.expects(:to_xpath).returns(@to_xpath_result).times(times)
43
49
 
44
50
  nh.convert_to_xpath(@css)
45
51
  nh.convert_to_xpath(@css)
52
+ assert_equal(times, Nokogiri::CSS::Parser.class_eval { @cache.count })
46
53
  end
47
54
  end
48
55
 
@@ -0,0 +1,38 @@
1
+ require File.expand_path(File.join(File.dirname(__FILE__), "helper"))
2
+
3
+ class TestMemoryLeak < Nokogiri::TestCase
4
+ def test_for_memory_leak
5
+ begin
6
+ # we don't use Dike in any tests, but requiring it has side effects
7
+ # that can create memory leaks, and that's what we're testing for.
8
+ require 'rubygems'
9
+ require 'dike' # do not remove!
10
+
11
+ count_start = count_object_space_documents
12
+ xml_data = <<-EOS
13
+ <test>
14
+ <items>
15
+ <item>abc</item>
16
+ <item>1234</item>
17
+ <item>Zzz</item>
18
+ <items>
19
+ </test>
20
+ EOS
21
+ 10.times do
22
+ doc = Nokogiri::XML(xml_data)
23
+ doc.xpath("//item")
24
+ end
25
+ 2.times { GC.start }
26
+ count_end = count_object_space_documents
27
+ assert((count_end - count_start) == 0, "memory leak detected")
28
+ rescue LoadError
29
+ puts "\ndike is not installed, skipping memory leak test"
30
+ end
31
+ end
32
+
33
+ def count_object_space_documents
34
+ count = 0
35
+ ObjectSpace.each_object {|j| count += 1 if j.is_a?(Nokogiri::XML::Document) }
36
+ count
37
+ end
38
+ end
@@ -33,4 +33,65 @@ class TestNokogiri < Nokogiri::TestCase
33
33
  doc = Nokogiri.make { b "bold tag" }
34
34
  assert_equal('<b>bold tag</b>', doc.to_html.chomp)
35
35
  end
36
+
37
+ SLOP_HTML = <<-END
38
+ <html>
39
+ <body>
40
+ <ul>
41
+ <li class='red'>one</li>
42
+ <li class='blue'>two</li>
43
+ </ul>
44
+ <div>
45
+ one
46
+ <div>div two</div>
47
+ </div>
48
+ </body>
49
+ </html>
50
+ END
51
+
52
+ def test_slop_css
53
+ doc = Nokogiri::Slop(<<-eohtml)
54
+ <html>
55
+ <body>
56
+ <div>
57
+ one
58
+ <div class='foo'>
59
+ div two
60
+ <div class='foo'>
61
+ div three
62
+ </div>
63
+ </div>
64
+ </div>
65
+ </body>
66
+ </html>
67
+ eohtml
68
+ assert_equal "div", doc.html.body.div.div('.foo').name
69
+ end
70
+
71
+ def test_slop
72
+ doc = Nokogiri::Slop(SLOP_HTML)
73
+
74
+ assert_equal "one", doc.html.body.ul.li.first.text
75
+ assert_equal "two", doc.html.body.ul.li(".blue").text
76
+ assert_equal "div two", doc.html.body.div.div.text
77
+
78
+ assert_equal "two", doc.html.body.ul.li(:css => ".blue").text
79
+
80
+ assert_equal "two", doc.html.body.ul.li(:xpath => "position()=2").text
81
+ assert_equal "one", doc.html.body.ul.li(:xpath => ["contains(text(),'o')"]).first.text
82
+ assert_equal "two", doc.html.body.ul.li(:xpath => ["contains(text(),'o')","contains(text(),'t')"]).text
83
+
84
+ assert_raise(NoMethodError) { doc.nonexistent }
85
+ end
86
+
87
+ def test_slop_decorator
88
+ doc = Nokogiri(SLOP_HTML)
89
+ assert !doc.decorators(Nokogiri::XML::Node).include?(Nokogiri::Decorators::Slop)
90
+
91
+ doc.slop!
92
+ assert doc.decorators(Nokogiri::XML::Node).include?(Nokogiri::Decorators::Slop)
93
+
94
+ doc.slop!
95
+ assert_equal 1, doc.decorators(Nokogiri::XML::Node).select { |d| d == Nokogiri::Decorators::Slop }.size
96
+ end
36
97
  end