nokogiri 1.1.1 → 1.2.0

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (88) hide show
  1. data/History.ja.txt +34 -0
  2. data/History.txt +36 -0
  3. data/Manifest.txt +21 -0
  4. data/README.ja.txt +1 -1
  5. data/README.txt +1 -1
  6. data/Rakefile +27 -89
  7. data/ext/nokogiri/extconf.rb +48 -63
  8. data/ext/nokogiri/html_document.c +90 -29
  9. data/ext/nokogiri/html_sax_parser.c +23 -2
  10. data/ext/nokogiri/native.c +18 -8
  11. data/ext/nokogiri/native.h +22 -0
  12. data/ext/nokogiri/xml_attr.c +83 -0
  13. data/ext/nokogiri/xml_attr.h +9 -0
  14. data/ext/nokogiri/xml_cdata.c +1 -1
  15. data/ext/nokogiri/xml_document.c +84 -18
  16. data/ext/nokogiri/xml_document_fragment.c +38 -0
  17. data/ext/nokogiri/xml_document_fragment.h +10 -0
  18. data/ext/nokogiri/xml_dtd.c +2 -22
  19. data/ext/nokogiri/xml_entity_reference.c +41 -0
  20. data/ext/nokogiri/xml_entity_reference.h +9 -0
  21. data/ext/nokogiri/xml_io.c +10 -3
  22. data/ext/nokogiri/xml_io.h +1 -0
  23. data/ext/nokogiri/xml_node.c +116 -66
  24. data/ext/nokogiri/xml_node_set.c +5 -1
  25. data/ext/nokogiri/xml_processing_instruction.c +44 -0
  26. data/ext/nokogiri/xml_processing_instruction.h +9 -0
  27. data/ext/nokogiri/xml_reader.c +20 -4
  28. data/ext/nokogiri/xml_sax_parser.c +51 -15
  29. data/ext/nokogiri/xml_sax_push_parser.c +85 -0
  30. data/ext/nokogiri/xml_sax_push_parser.h +9 -0
  31. data/ext/nokogiri/xml_syntax_error.c +12 -8
  32. data/ext/nokogiri/xml_syntax_error.h +2 -1
  33. data/ext/nokogiri/xml_xpath_context.c +11 -2
  34. data/ext/nokogiri/xslt_stylesheet.c +1 -6
  35. data/lib/nokogiri.rb +10 -13
  36. data/lib/nokogiri/css.rb +1 -1
  37. data/lib/nokogiri/css/generated_parser.rb +287 -295
  38. data/lib/nokogiri/css/generated_tokenizer.rb +36 -51
  39. data/lib/nokogiri/css/node.rb +1 -3
  40. data/lib/nokogiri/css/parser.rb +21 -12
  41. data/lib/nokogiri/css/parser.y +55 -44
  42. data/lib/nokogiri/css/syntax_error.rb +2 -1
  43. data/lib/nokogiri/css/tokenizer.rex +23 -32
  44. data/lib/nokogiri/decorators/hpricot/node_set.rb +1 -1
  45. data/lib/nokogiri/html.rb +10 -4
  46. data/lib/nokogiri/html/document.rb +6 -2
  47. data/lib/nokogiri/syntax_error.rb +4 -0
  48. data/lib/nokogiri/version.rb +2 -1
  49. data/lib/nokogiri/xml.rb +3 -1
  50. data/lib/nokogiri/xml/attr.rb +3 -4
  51. data/lib/nokogiri/xml/cdata.rb +1 -1
  52. data/lib/nokogiri/xml/document.rb +4 -7
  53. data/lib/nokogiri/xml/document_fragment.rb +9 -0
  54. data/lib/nokogiri/xml/dtd.rb +3 -0
  55. data/lib/nokogiri/xml/node.rb +144 -40
  56. data/lib/nokogiri/xml/node/save_options.rb +32 -0
  57. data/lib/nokogiri/xml/node_set.rb +11 -20
  58. data/lib/nokogiri/xml/processing_instruction.rb +6 -0
  59. data/lib/nokogiri/xml/reader.rb +5 -0
  60. data/lib/nokogiri/xml/sax.rb +1 -0
  61. data/lib/nokogiri/xml/sax/push_parser.rb +47 -0
  62. data/lib/nokogiri/xml/syntax_error.rb +3 -1
  63. data/lib/nokogiri/xml/xpath/syntax_error.rb +1 -1
  64. data/tasks/test.rb +136 -0
  65. data/test/css/test_parser.rb +4 -0
  66. data/test/css/test_tokenizer.rb +30 -17
  67. data/test/css/test_xpath_visitor.rb +11 -0
  68. data/test/helper.rb +11 -0
  69. data/test/hpricot/test_builder.rb +2 -9
  70. data/test/hpricot/test_parser.rb +4 -4
  71. data/test/html/test_builder.rb +7 -7
  72. data/test/html/test_document.rb +90 -4
  73. data/test/html/test_node.rb +1 -0
  74. data/test/test_css_cache.rb +1 -3
  75. data/test/test_reader.rb +19 -1
  76. data/test/test_xslt_transforms.rb +1 -1
  77. data/test/xml/node/test_save_options.rb +20 -0
  78. data/test/xml/sax/test_parser.rb +17 -0
  79. data/test/xml/sax/test_push_parser.rb +67 -0
  80. data/test/xml/test_attr.rb +16 -0
  81. data/test/xml/test_cdata.rb +1 -1
  82. data/test/xml/test_document.rb +45 -0
  83. data/test/xml/test_document_fragment.rb +18 -0
  84. data/test/xml/test_dtd.rb +2 -4
  85. data/test/xml/test_entity_reference.rb +16 -0
  86. data/test/xml/test_node.rb +149 -80
  87. data/test/xml/test_processing_instruction.rb +24 -0
  88. metadata +28 -2
@@ -41,11 +41,7 @@ module Nokogiri
41
41
  last.after datum
42
42
  end
43
43
 
44
- ###
45
- # Append +node+ to the NodeSet.
46
- def << node
47
- push(node)
48
- end
44
+ alias :<< :push
49
45
 
50
46
  ###
51
47
  # Unlink this NodeSet and all Node objects it contains from their
@@ -163,12 +159,8 @@ module Nokogiri
163
159
  def wrap(html, &blk)
164
160
  each do |j|
165
161
  new_parent = Nokogiri.make(html, &blk)
166
- j.replace(new_parent)
167
- nest = new_parent
168
- if nest.child
169
- nest = nest.child until nest.child.nil?
170
- end
171
- j.parent = nest
162
+ j.parent.add_child(new_parent)
163
+ new_parent.add_child(j)
172
164
  end
173
165
  self
174
166
  end
@@ -177,21 +169,20 @@ module Nokogiri
177
169
  map { |x| x.to_s }.join
178
170
  end
179
171
 
180
- def to_html
181
- map { |x| x.to_html }.join('')
172
+ def to_html *args
173
+ map { |x| x.to_html(*args) }.join('')
182
174
  end
183
175
 
184
- def to_xml *args
185
- map { |x| x.to_xml(*args) }.join('')
176
+ def to_xhtml *args
177
+ map { |x| x.to_xhtml(*args) }.join('')
186
178
  end
187
179
 
188
- def size
189
- length
180
+ def to_xml *args
181
+ map { |x| x.to_xml(*args) }.join('')
190
182
  end
191
183
 
192
- def to_ary
193
- to_a
194
- end
184
+ alias :size :length
185
+ alias :to_ary :to_a
195
186
  end
196
187
  end
197
188
  end
@@ -0,0 +1,6 @@
1
+ module Nokogiri
2
+ module XML
3
+ class ProcessingInstruction < Node
4
+ end
5
+ end
6
+ end
@@ -2,6 +2,11 @@ module Nokogiri
2
2
  module XML
3
3
  class Reader
4
4
  include Enumerable
5
+ attr_accessor :errors
6
+
7
+ def initialize
8
+ @errors = []
9
+ end
5
10
 
6
11
  def attributes
7
12
  Hash[*(attribute_nodes.map { |node|
@@ -1,5 +1,6 @@
1
1
  require 'nokogiri/xml/sax/document'
2
2
  require 'nokogiri/xml/sax/parser'
3
+ require 'nokogiri/xml/sax/push_parser'
3
4
 
4
5
  module Nokogiri
5
6
  module XML
@@ -0,0 +1,47 @@
1
+ module Nokogiri
2
+ module XML
3
+ module SAX
4
+ ###
5
+ # PushParser can parse a document that is fed to it manually. It
6
+ # must be given a SAX::Document object which will be called with
7
+ # SAX events as the document is being parsed.
8
+ #
9
+ # Calling PushParser#<< writes XML to the parser, calling any SAX
10
+ # callbacks it can.
11
+ #
12
+ # PushParser#finish tells the parser that the document is finished
13
+ # and calls the end_document SAX method.
14
+ #
15
+ # Example:
16
+ #
17
+ # parser = PushParser.new(Class.new(XML::SAX::Document) {
18
+ # def start_document
19
+ # puts "start document called"
20
+ # end
21
+ # }.new)
22
+ # parser << "<div>hello<"
23
+ # parser << "/div>"
24
+ # parser.finish
25
+ class PushParser
26
+ attr_accessor :document
27
+
28
+ def initialize(doc = XML::SAX::Document.new, file_name = nil)
29
+ @document = doc
30
+ @sax_parser = XML::SAX::Parser.new(doc)
31
+
32
+ ## Create our push parser context
33
+ initialize_native(@sax_parser, file_name)
34
+ end
35
+
36
+ def write chunk, last_chunk = false
37
+ native_write(chunk, last_chunk)
38
+ end
39
+ alias :<< :write
40
+
41
+ def finish
42
+ write '', true
43
+ end
44
+ end
45
+ end
46
+ end
47
+ end
@@ -1,6 +1,6 @@
1
1
  module Nokogiri
2
2
  module XML
3
- class SyntaxError < SyntaxError
3
+ class SyntaxError < ::Nokogiri::SyntaxError
4
4
  def none?
5
5
  level == 0
6
6
  end
@@ -16,6 +16,8 @@ module Nokogiri
16
16
  def fatal?
17
17
  level == 3
18
18
  end
19
+
20
+ alias :to_s :message
19
21
  end
20
22
  end
21
23
  end
@@ -1,7 +1,7 @@
1
1
  module Nokogiri
2
2
  module XML
3
3
  class XPath
4
- class SyntaxError < ::SyntaxError
4
+ class SyntaxError < XML::SyntaxError
5
5
  end
6
6
  end
7
7
  end
@@ -0,0 +1,136 @@
1
+ # partial-loads-ok and undef-value-errors necessary to ignore
2
+ # spurious (and eminently ignorable) warnings from the ruby
3
+ # interpreter
4
+ VALGRIND_BASIC_OPTS = "--num-callers=50 --error-limit=no --partial-loads-ok=yes --undef-value-errors=no"
5
+
6
+ class NokogiriTestTask < Rake::TestTask
7
+ def initialize *args
8
+ super
9
+ %w[ ext lib bin test ].each do |dir|
10
+ self.libs << dir
11
+ end
12
+ self.test_files = FileList['test/**/test_*.rb'] +
13
+ FileList['test/**/*_test.rb']
14
+ self.verbose = true
15
+ self.warning = true
16
+ end
17
+ end
18
+
19
+ desc "run test suite under valgrind with basic ruby options"
20
+ NokogiriTestTask.new('test:valgrind').extend(Module.new {
21
+ def ruby *args
22
+ cmd = "valgrind #{VALGRIND_BASIC_OPTS} #{RUBY} #{args.join(' ')}"
23
+ puts cmd
24
+ system cmd
25
+ end
26
+ })
27
+
28
+ desc "run test suite under valgrind with memory-fill ruby options"
29
+ NokogiriTestTask.new('test:valgrind_mem').extend(Module.new {
30
+ def ruby *args
31
+ cmd = "valgrind #{VALGRIND_BASIC_OPTS} --freelist-vol=100000000 --malloc-fill=6D --free-fill=66 #{RUBY} #{args.join(' ')}"
32
+ puts cmd
33
+ system cmd
34
+ end
35
+ })
36
+
37
+ desc "run test suite under valgrind with memory-zero ruby options"
38
+ NokogiriTestTask.new('test:valgrind_mem0').extend(Module.new {
39
+ def ruby *args
40
+ cmd = "valgrind #{VALGRIND_BASIC_OPTS} --freelist-vol=100000000 --malloc-fill=00 --free-fill=00 #{RUBY} #{args.join(' ')}"
41
+ puts cmd
42
+ system cmd
43
+ end
44
+ })
45
+
46
+ desc "run test suite under gdb"
47
+ NokogiriTestTask.new('test:gdb').extend(Module.new {
48
+ def ruby *args
49
+ cmd = "gdb --args #{RUBY} #{args.join(' ')}"
50
+ puts cmd
51
+ system cmd
52
+ end
53
+ })
54
+
55
+ desc "test coverage"
56
+ NokogiriTestTask.new('test:coverage').extend(Module.new {
57
+ def ruby *args
58
+ rm_rf "coverage"
59
+ cmd = "rcov -x Library -I lib:ext:test #{args.join(' ')}"
60
+ puts cmd
61
+ system cmd
62
+ end
63
+ })
64
+
65
+ namespace :test do
66
+ desc "run test suite with aggressive GC"
67
+ task :gc => :build do
68
+ ENV['NOKOGIRI_GC'] = "true"
69
+ Rake::Task["test"].invoke
70
+ end
71
+
72
+ desc "find call-seq in the rdoc"
73
+ task :rdoc => 'docs' do
74
+ Dir['doc/**/*.html'].each { |docfile|
75
+ next if docfile =~ /\.src/
76
+ puts "FAIL: #{docfile}" if File.read(docfile) =~ /call-seq/
77
+ }
78
+ end
79
+
80
+ desc "Test against multiple versions of libxml2"
81
+ task :multixml2 do
82
+ MULTI_XML = File.join(ENV['HOME'], '.multixml2')
83
+ unless File.exists?(MULTI_XML)
84
+ %w{ versions install build }.each { |x|
85
+ FileUtils.mkdir_p(File.join(MULTI_XML, x))
86
+ }
87
+ Dir.chdir File.join(MULTI_XML, 'versions') do
88
+ require 'net/ftp'
89
+ ftp = Net::FTP.new('xmlsoft.org')
90
+ ftp.login('anonymous', 'anonymous')
91
+ ftp.chdir('libxml2')
92
+ ftp.list('libxml2-2.*.tar.gz').each do |x|
93
+ file = x[/[^\s]*$/]
94
+ puts "Downloading #{file}"
95
+ ftp.getbinaryfile(file)
96
+ end
97
+ end
98
+ end
99
+
100
+ # Build any libxml2 versions in $HOME/.multixml2/versions that
101
+ # haven't been built yet
102
+ Dir[File.join(MULTI_XML, 'versions','*.tar.gz')].each do |f|
103
+ filename = File.basename(f, '.tar.gz')
104
+
105
+ install_dir = File.join(MULTI_XML, 'install', filename)
106
+ next if File.exists?(install_dir)
107
+
108
+ Dir.chdir File.join(MULTI_XML, 'versions') do
109
+ system "tar zxvf #{f} -C #{File.join(MULTI_XML, 'build')}"
110
+ end
111
+
112
+ Dir.chdir File.join(MULTI_XML, 'build', filename) do
113
+ system "./configure --prefix=#{install_dir}"
114
+ system "make && make install"
115
+ end
116
+ end
117
+
118
+ test_results = {}
119
+ Dir[File.join(MULTI_XML, 'install', '*')].each do |xml2_version|
120
+ extopts = "--with-xml2-include=#{xml2_version}/include/libxml2 --with-xml2-lib=#{xml2_version}/lib --with-xslt-dir=/usr/local"
121
+ cmd = "#{$0} clean test EXTOPTS='#{extopts}'"
122
+
123
+ version = File.basename(xml2_version)
124
+ result = system(cmd)
125
+ test_results[version] = {
126
+ :result => result,
127
+ :cmd => cmd
128
+ }
129
+ end
130
+ test_results.sort_by { |k,v| k }.each do |k,v|
131
+ passed = v[:result]
132
+ puts "#{k}: #{passed ? 'PASS' : 'FAIL'}"
133
+ puts "repro: #{v[:cmd]}" unless passed
134
+ end
135
+ end
136
+ end
@@ -7,6 +7,10 @@ module Nokogiri
7
7
  @parser = Nokogiri::CSS::Parser.new
8
8
  end
9
9
 
10
+ def test_extra_single_quote
11
+ assert_raises(CSS::SyntaxError) { @parser.parse("'") }
12
+ end
13
+
10
14
  def test_syntax_error_raised
11
15
  assert_raises(CSS::SyntaxError) { @parser.parse("a[x=]") }
12
16
  end
@@ -7,14 +7,29 @@ module Nokogiri
7
7
  @scanner = Nokogiri::CSS::Tokenizer.new
8
8
  end
9
9
 
10
+ def test_tokenize_bad_single_quote
11
+ @scanner.scan("'")
12
+ assert_tokens([["'", "'"]], @scanner)
13
+ end
14
+
10
15
  def test_not_equal
11
16
  @scanner.scan("h1[a!='Tender Lovemaking']")
12
17
  assert_tokens([ [:IDENT, 'h1'],
13
- ['[', '['],
18
+ [:LSQUARE, '['],
14
19
  [:IDENT, 'a'],
15
20
  [:NOT_EQUAL, '!='],
16
21
  [:STRING, "'Tender Lovemaking'"],
17
- [']', ']'],
22
+ [:RSQUARE, ']'],
23
+ ], @scanner)
24
+ end
25
+
26
+ def test_negation
27
+ @scanner.scan("p:not(.a)")
28
+ assert_tokens([ [:IDENT, 'p'],
29
+ [:NOT, ':not('],
30
+ ['.', '.'],
31
+ [:IDENT, 'a'],
32
+ [:RPAREN, ')'],
18
33
  ], @scanner)
19
34
  end
20
35
 
@@ -23,15 +38,14 @@ module Nokogiri
23
38
  assert_tokens([ [:IDENT, 'script'],
24
39
  [:S, ' '],
25
40
  [:FUNCTION, 'comment('],
26
- [')', ')'],
41
+ [:RPAREN, ')'],
27
42
  ], @scanner)
28
43
  end
29
44
 
30
45
  def test_preceding_selector
31
46
  @scanner.scan("E ~ F")
32
47
  assert_tokens([ [:IDENT, 'E'],
33
- [:TILDE, ' ~'],
34
- [:S, ' '],
48
+ [:TILDE, ' ~ '],
35
49
  [:IDENT, 'F'],
36
50
  ], @scanner)
37
51
  end
@@ -39,19 +53,19 @@ module Nokogiri
39
53
  def test_scan_attribute_string
40
54
  @scanner.scan("h1[a='Tender Lovemaking']")
41
55
  assert_tokens([ [:IDENT, 'h1'],
42
- ['[', '['],
56
+ [:LSQUARE, '['],
43
57
  [:IDENT, 'a'],
44
- ['=', '='],
58
+ [:EQUAL, '='],
45
59
  [:STRING, "'Tender Lovemaking'"],
46
- [']', ']'],
60
+ [:RSQUARE, ']'],
47
61
  ], @scanner)
48
62
  @scanner.scan('h1[a="Tender Lovemaking"]')
49
63
  assert_tokens([ [:IDENT, 'h1'],
50
- ['[', '['],
64
+ [:LSQUARE, '['],
51
65
  [:IDENT, 'a'],
52
- ['=', '='],
66
+ [:EQUAL, '='],
53
67
  [:STRING, '"Tender Lovemaking"'],
54
- [']', ']'],
68
+ [:RSQUARE, ']'],
55
69
  ], @scanner)
56
70
  end
57
71
 
@@ -84,8 +98,7 @@ module Nokogiri
84
98
  def test_scan_greater
85
99
  @scanner.scan('x > y')
86
100
  assert_tokens([ [:IDENT, 'x'],
87
- [:GREATER, ' >'],
88
- [:S, ' '],
101
+ [:GREATER, ' > '],
89
102
  [:IDENT, 'y']
90
103
  ], @scanner)
91
104
  end
@@ -112,7 +125,7 @@ module Nokogiri
112
125
  [':', ':'],
113
126
  [:FUNCTION, 'eq('],
114
127
  [:NUMBER, "0"],
115
- [")", ")"]
128
+ [:RPAREN, ')'],
116
129
  ], @scanner)
117
130
  end
118
131
 
@@ -125,7 +138,7 @@ module Nokogiri
125
138
  [:IDENT, 'n'],
126
139
  [:PLUS, '+'],
127
140
  [:NUMBER, '3'],
128
- [")", ")"]
141
+ [:RPAREN, ')'],
129
142
  ], @scanner)
130
143
 
131
144
  @scanner.scan('x:nth-child(-1n+3)')
@@ -136,7 +149,7 @@ module Nokogiri
136
149
  [:IDENT, 'n'],
137
150
  [:PLUS, '+'],
138
151
  [:NUMBER, '3'],
139
- [")", ")"]
152
+ [:RPAREN, ')'],
140
153
  ], @scanner)
141
154
 
142
155
  @scanner.scan('x:nth-child(-n+3)')
@@ -146,7 +159,7 @@ module Nokogiri
146
159
  [:IDENT, '-n'],
147
160
  [:PLUS, '+'],
148
161
  [:NUMBER, '3'],
149
- [")", ")"]
162
+ [:RPAREN, ')'],
150
163
  ], @scanner)
151
164
  end
152
165
 
@@ -7,6 +7,17 @@ module Nokogiri
7
7
  @parser = Nokogiri::CSS::Parser.new
8
8
  end
9
9
 
10
+ def test_function_calls_allow_at_params
11
+ assert_xpath("//a[foo(., @href)]", @parser.parse('a:foo(@href)'))
12
+ assert_xpath("//a[foo(., @a, b)]", @parser.parse('a:foo(@a, b)'))
13
+ assert_xpath("//a[foo(., a, 10)]", @parser.parse('a:foo(a, 10)'))
14
+ end
15
+
16
+ def test_namespace_conversion
17
+ assert_xpath("//aaron:a", @parser.parse('aaron|a'))
18
+ assert_xpath("//a", @parser.parse('|a'))
19
+ end
20
+
10
21
  def test_unknown_psuedo_classes_get_pushed_down
11
22
  assert_xpath("//a[aaron(.)]", @parser.parse('a:aaron'))
12
23
  end
@@ -34,6 +34,7 @@ module Nokogiri
34
34
  attr_reader :start_elements, :start_document_called
35
35
  attr_reader :end_elements, :end_document_called
36
36
  attr_reader :data, :comments, :cdata_blocks
37
+ attr_reader :errors, :warnings
37
38
 
38
39
  def start_document
39
40
  @start_document_called = true
@@ -45,6 +46,16 @@ module Nokogiri
45
46
  super
46
47
  end
47
48
 
49
+ def error error
50
+ (@errors ||= []) << error
51
+ super
52
+ end
53
+
54
+ def warning warning
55
+ (@warning ||= []) << warning
56
+ super
57
+ end
58
+
48
59
  def start_element *args
49
60
  (@start_elements ||= []) << args
50
61
  super