nokogiri 1.8.2 → 1.8.3

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (52) hide show
  1. checksums.yaml +4 -4
  2. data/.travis.yml +14 -14
  3. data/CHANGELOG.md +43 -1
  4. data/LICENSE.md +2 -1
  5. data/Manifest.txt +3 -0
  6. data/README.md +20 -21
  7. data/Rakefile +2 -8
  8. data/SECURITY.md +19 -0
  9. data/build_all +2 -2
  10. data/dependencies.yml +11 -11
  11. data/ext/nokogiri/extconf.rb +1 -1
  12. data/ext/nokogiri/html_element_description.c +14 -14
  13. data/ext/nokogiri/xml_cdata.c +6 -4
  14. data/ext/nokogiri/xml_document.c +2 -3
  15. data/ext/nokogiri/xml_dtd.c +2 -2
  16. data/ext/nokogiri/xml_io.c +1 -0
  17. data/ext/nokogiri/xml_namespace.c +3 -9
  18. data/ext/nokogiri/xml_namespace.h +2 -0
  19. data/ext/nokogiri/xml_node.c +23 -15
  20. data/ext/nokogiri/xml_node_set.c +5 -4
  21. data/ext/nokogiri/xml_node_set.h +0 -1
  22. data/ext/nokogiri/xslt_stylesheet.c +2 -2
  23. data/lib/nokogiri/css/parser.rb +108 -90
  24. data/lib/nokogiri/css/parser.y +13 -2
  25. data/lib/nokogiri/css/tokenizer.rb +1 -1
  26. data/lib/nokogiri/css/tokenizer.rex +4 -4
  27. data/lib/nokogiri/css/xpath_visitor.rb +10 -3
  28. data/lib/nokogiri/html/document_fragment.rb +11 -1
  29. data/lib/nokogiri/version.rb +1 -1
  30. data/lib/nokogiri/xml/node.rb +58 -0
  31. data/lib/nokogiri/xml/node_set.rb +32 -18
  32. data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +78 -0
  33. data/ports/archives/libxml2-2.9.8.tar.gz +0 -0
  34. data/test/css/test_nthiness.rb +21 -21
  35. data/test/css/test_parser.rb +17 -0
  36. data/test/html/test_attributes.rb +85 -0
  37. data/test/html/test_document_fragment.rb +7 -1
  38. data/test/test_css_cache.rb +5 -3
  39. data/test/xml/sax/test_parser.rb +9 -1
  40. data/test/xml/sax/test_push_parser.rb +60 -0
  41. data/test/xml/test_cdata.rb +1 -1
  42. data/test/xml/test_document.rb +5 -5
  43. data/test/xml/test_dtd.rb +4 -4
  44. data/test/xml/test_node.rb +89 -6
  45. data/test/xml/test_node_attributes.rb +3 -3
  46. data/test/xml/test_node_reparenting.rb +18 -0
  47. data/test/xml/test_node_set.rb +31 -4
  48. data/test/xml/test_reader.rb +13 -1
  49. data/test/xml/test_syntax_error.rb +3 -3
  50. data/test/xml/test_xpath.rb +8 -0
  51. metadata +25 -4
  52. data/ports/archives/libxml2-2.9.7.tar.gz +0 -0
@@ -255,6 +255,23 @@ module Nokogiri
255
255
  def test_attribute
256
256
  assert_xpath "//h1[@a = 'Tender Lovemaking']",
257
257
  @parser.parse("h1[a='Tender Lovemaking']")
258
+ assert_xpath "//h1[@a]",
259
+ @parser.parse("h1[a]")
260
+ assert_xpath %q{//h1[@a = 'gnewline\n']},
261
+ @parser.parse("h1[a='\\gnew\\\nline\\\\n']")
262
+ assert_xpath "//h1[@a = 'test']",
263
+ @parser.parse(%q{h1[a=\te\st]})
264
+ assert_xpath %q{//h1[@a = "'"]},
265
+ @parser.parse(%q{h1[a="'"]})
266
+ assert_xpath %q{//h1[@a = concat("'", "")]},
267
+ @parser.parse(%q{h1[a='\\'']})
268
+ assert_xpath %q{//h1[@a = concat("", '"', "'", "")]},
269
+ @parser.parse(%q{h1[a='"\'']})
270
+ end
271
+
272
+ def test_attribute_with_number_or_string
273
+ assert_xpath "//img[@width = '200']", @parser.parse("img[width='200']")
274
+ assert_xpath "//img[@width = '200']", @parser.parse("img[width=200]")
258
275
  end
259
276
 
260
277
  def test_id
@@ -0,0 +1,85 @@
1
+ require "helper"
2
+
3
+ module Nokogiri
4
+ module HTML
5
+ class TestAttr < Nokogiri::TestCase
6
+ unless Nokogiri::VersionInfo.instance.libxml2? && Nokogiri::VersionInfo.instance.libxml2_using_system?
7
+ #
8
+ # libxml2 >= 2.9.2 fails to escape comments within some attributes. It
9
+ # wants to ensure these comments can be treated as "server-side includes",
10
+ # but as a result fails to ensure that serialization is well-formed,
11
+ # resulting in an opportunity for XSS injection of code into a final
12
+ # re-parsed document (presumably in a browser).
13
+ #
14
+ # the offending commit is:
15
+ #
16
+ # https://github.com/GNOME/libxml2/commit/960f0e2
17
+ #
18
+ # we'll test this by parsing the HTML, serializing it, then
19
+ # re-parsing it to ensure there isn't any ambiguity in the output
20
+ # that might allow code injection into a browser consuming
21
+ # "sanitized" output.
22
+ #
23
+ # complaints have been made upstream about this behavior, notably at
24
+ #
25
+ # https://bugzilla.gnome.org/show_bug.cgi?id=769760
26
+ #
27
+ # and multiple CVEs have been declared and fixed in downstream
28
+ # libraries as a result, a list is being kept up to date here:
29
+ #
30
+ # https://github.com/flavorjones/loofah/issues/144
31
+ #
32
+ [
33
+ #
34
+ # these tags and attributes are determined by the code at:
35
+ #
36
+ # https://git.gnome.org/browse/libxml2/tree/HTMLtree.c?h=v2.9.2#n714
37
+ #
38
+ {tag: "a", attr: "href"},
39
+ {tag: "div", attr: "href"},
40
+ {tag: "a", attr: "action"},
41
+ {tag: "div", attr: "action"},
42
+ {tag: "a", attr: "src"},
43
+ {tag: "div", attr: "src"},
44
+ {tag: "a", attr: "name"},
45
+ #
46
+ # note that div+name is _not_ affected by the libxml2 issue.
47
+ # but we test it anyway to ensure our logic isn't modifying
48
+ # attributes that don't need modifying.
49
+ #
50
+ {tag: "div", attr: "name", unescaped: true},
51
+ ].each do |config|
52
+
53
+ define_method "test_uri_escaping_of_#{config[:attr]}_attr_in_#{config[:tag]}_tag" do
54
+ html = %{<#{config[:tag]} #{config[:attr]}='examp<!--" unsafeattr=unsafevalue()>-->le.com'>test</#{config[:tag]}>}
55
+
56
+ reparsed = HTML.fragment(HTML.fragment(html).to_html)
57
+ attributes = reparsed.at_css(config[:tag]).attribute_nodes
58
+
59
+ assert_equal [config[:attr]], attributes.collect(&:name)
60
+ if Nokogiri::VersionInfo.instance.libxml2?
61
+ if config[:unescaped]
62
+ #
63
+ # this attribute was emitted wrapped in single-quotes, so a double quote is A-OK.
64
+ # assert that this attribute's serialization is unaffected.
65
+ #
66
+ assert_equal %{examp<!--" unsafeattr=unsafevalue()>-->le.com}, attributes.first.value
67
+ else
68
+ #
69
+ # let's match the behavior in libxml < 2.9.2.
70
+ # test that this attribute's serialization is well-formed and sanitized.
71
+ #
72
+ assert_equal %{examp<!--%22%20unsafeattr=unsafevalue()>-->le.com}, attributes.first.value
73
+ end
74
+ else
75
+ #
76
+ # yay for consistency in javaland. move along, nothing to see here.
77
+ #
78
+ assert_equal %{examp<!--%22 unsafeattr=unsafevalue()>-->le.com}, attributes.first.value
79
+ end
80
+ end
81
+ end
82
+ end
83
+ end
84
+ end
85
+ end
@@ -9,6 +9,12 @@ module Nokogiri
9
9
  @html = Nokogiri::HTML.parse(File.read(HTML_FILE), HTML_FILE)
10
10
  end
11
11
 
12
+ def test_ascii_8bit_encoding
13
+ s = String.new 'hello'
14
+ s.force_encoding ::Encoding::ASCII_8BIT
15
+ assert_equal "hello", Nokogiri::HTML::DocumentFragment.parse(s).to_html
16
+ end
17
+
12
18
  def test_inspect_encoding
13
19
  fragment = "<div>こんにちは!</div>".encode('EUC-JP')
14
20
  f = Nokogiri::HTML::DocumentFragment.parse fragment
@@ -21,7 +27,7 @@ module Nokogiri
21
27
  assert_equal 'EUC-JP', f.document.encoding
22
28
  assert_equal "こんにちは!", f.content
23
29
  end
24
-
30
+
25
31
  def test_unlink_empty_document
26
32
  frag = Nokogiri::HTML::DocumentFragment.parse('').unlink # must_not_raise
27
33
  assert_nil frag.parent
@@ -28,8 +28,6 @@ class TestCssCache < Nokogiri::TestCase
28
28
 
29
29
  [ false, true ].each do |cache_setting|
30
30
  define_method "test_css_cache_#{cache_setting ? "true" : "false"}" do
31
- times = cache_setting ? 4 : nil
32
-
33
31
  Nokogiri::CSS::Parser.set_cache cache_setting
34
32
 
35
33
  Nokogiri::CSS.xpath_for(@css)
@@ -37,7 +35,11 @@ class TestCssCache < Nokogiri::TestCase
37
35
  Nokogiri::CSS::Parser.new.xpath_for(@css)
38
36
  Nokogiri::CSS::Parser.new.xpath_for(@css)
39
37
 
40
- assert_equal(times, Nokogiri::CSS::Parser.class_eval { @cache.count })
38
+ if cache_setting
39
+ assert_equal(4, Nokogiri::CSS::Parser.class_eval { @cache.count })
40
+ else
41
+ assert_nil(Nokogiri::CSS::Parser.class_eval { @cache.count })
42
+ end
41
43
  end
42
44
  end
43
45
 
@@ -43,9 +43,17 @@ module Nokogiri
43
43
  assert_equal [['foo', [['a', '&b']]]], doc.start_elements
44
44
  end
45
45
 
46
+ def test_empty_decl
47
+ parser = XML::SAX::Parser.new(Doc.new)
48
+
49
+ xml = "<root />"
50
+ parser.parse xml
51
+ assert parser.document.start_document_called, xml
52
+ assert_nil parser.document.xmldecls, xml
53
+ end
54
+
46
55
  def test_xml_decl
47
56
  [
48
- ['', nil],
49
57
  ['<?xml version="1.0" ?>',
50
58
  ['1.0']],
51
59
  ['<?xml version="1.0" encoding="UTF-8" ?>',
@@ -21,6 +21,66 @@ module Nokogiri
21
21
  end
22
22
  end
23
23
 
24
+ def test_early_finish
25
+ @parser << "<foo>"
26
+ assert_raises(SyntaxError) do
27
+ @parser.finish
28
+ end
29
+ end
30
+
31
+ def test_write_last_chunk
32
+ @parser << "<foo>"
33
+ @parser.write "</foo>", true
34
+ assert_equal [["foo", []]], @parser.document.start_elements
35
+ assert_equal [["foo"]], @parser.document.end_elements
36
+ end
37
+
38
+ def test_empty_doc
39
+ @parser.options |= XML::ParseOptions::RECOVER
40
+ @parser.write "", true
41
+ assert_nil @parser.document.start_elements
42
+ assert_nil @parser.document.end_elements
43
+ end
44
+
45
+
46
+ def test_finish_should_rethrow_last_error
47
+ begin
48
+ @parser << "</foo>"
49
+ rescue => e
50
+ expected = e
51
+ end
52
+
53
+ begin
54
+ @parser.finish
55
+ rescue => e
56
+ actual = e
57
+ end
58
+
59
+ assert_equal actual.message, expected.message
60
+ end
61
+
62
+ def test_should_throw_error_returned_by_document
63
+ doc = Doc.new
64
+ class << doc
65
+ def error msg
66
+ raise "parse error"
67
+ end
68
+ end
69
+
70
+ @parser = XML::SAX::PushParser.new(doc)
71
+ begin
72
+ @parser << "</foo>"
73
+ rescue => e
74
+ actual = e
75
+ end
76
+
77
+ assert_equal actual.message, "parse error"
78
+ end
79
+
80
+ def test_writing_nil
81
+ assert_equal @parser.write(nil), @parser
82
+ end
83
+
24
84
  def test_end_document_called
25
85
  @parser.<<(<<-eoxml)
26
86
  <p id="asdfasdf">
@@ -25,7 +25,7 @@ module Nokogiri
25
25
 
26
26
  def test_new_with_nil
27
27
  node = CDATA.new(@xml, nil)
28
- assert_equal nil, node.content
28
+ assert_nil node.content
29
29
  end
30
30
 
31
31
  def test_new_with_non_string
@@ -69,7 +69,7 @@ module Nokogiri
69
69
 
70
70
  def test_root_set_to_nil
71
71
  @xml.root = nil
72
- assert_equal nil, @xml.root
72
+ assert_nil @xml.root
73
73
  end
74
74
 
75
75
  def test_million_laugh_attach
@@ -869,9 +869,9 @@ module Nokogiri
869
869
  assert_equal 1, doc.xpath("//x:foo", "x" => "http://c.flavorjon.es/").length
870
870
  assert_match %r{foo c:attr}, doc.to_xml
871
871
  doc.at_xpath("//x:foo", "x" => "http://c.flavorjon.es/").tap do |node|
872
- assert_equal nil, node["attr"]
872
+ assert_nil node["attr"]
873
873
  assert_equal "attr-value", node["c:attr"]
874
- assert_equal nil, node.attribute_with_ns("attr", nil)
874
+ assert_nil node.attribute_with_ns("attr", nil)
875
875
  assert_equal "attr-value", node.attribute_with_ns("attr", "http://c.flavorjon.es/").value
876
876
  assert_equal "attr-value", node.attributes["attr"].value
877
877
  end
@@ -887,9 +887,9 @@ module Nokogiri
887
887
  assert_match %r{foo attr}, doc.to_xml
888
888
  doc.at_xpath("//container/foo").tap do |node|
889
889
  assert_equal "attr-value", node["attr"]
890
- assert_equal nil, node["c:attr"]
890
+ assert_nil node["c:attr"]
891
891
  assert_equal "attr-value", node.attribute_with_ns("attr", nil).value
892
- assert_equal nil, node.attribute_with_ns("attr", "http://c.flavorjon.es/")
892
+ assert_nil node.attribute_with_ns("attr", "http://c.flavorjon.es/")
893
893
  assert_equal "attr-value", node.attributes["attr"].value # doesn't change!
894
894
  end
895
895
  end
@@ -91,14 +91,14 @@ module Nokogiri
91
91
  dtd = doc.internal_subset
92
92
  assert_instance_of Nokogiri::XML::DTD, dtd, name
93
93
  if html_p
94
- assert_send [dtd, :html_dtd?], name
94
+ assert dtd.html_dtd?, name
95
95
  else
96
- assert_not_send [dtd, :html_dtd?], name
96
+ refute dtd.html_dtd?, name
97
97
  end
98
98
  if html5_p
99
- assert_send [dtd, :html5_dtd?], name
99
+ assert dtd.html5_dtd?, name
100
100
  else
101
- assert_not_send [dtd, :html5_dtd?], name
101
+ refute dtd.html5_dtd?, name
102
102
  end
103
103
  }
104
104
  end
@@ -680,6 +680,89 @@ module Nokogiri
680
680
  assert_nil address['domestic']
681
681
  end
682
682
 
683
+ def test_classes
684
+ xml = Nokogiri::XML(<<-eoxml)
685
+ <div>
686
+ <p class=" foo bar foo ">test</p>
687
+ <p class="">test</p>
688
+ </div>
689
+ eoxml
690
+ div = xml.at_xpath('//div')
691
+ p1, p2 = xml.xpath('//p')
692
+
693
+ assert_equal [], div.classes
694
+ assert_equal %w[foo bar foo], p1.classes
695
+ assert_equal [], p2.classes
696
+ end
697
+
698
+ def test_add_class
699
+ xml = Nokogiri::XML(<<-eoxml)
700
+ <div>
701
+ <p class=" foo bar foo ">test</p>
702
+ <p class="">test</p>
703
+ </div>
704
+ eoxml
705
+ div = xml.at_xpath('//div')
706
+ p1, p2 = xml.xpath('//p')
707
+
708
+ assert_same div, div.add_class('main')
709
+ assert_equal 'main', div['class']
710
+
711
+ assert_same p1, p1.add_class('baz foo')
712
+ assert_equal 'foo bar foo baz', p1['class']
713
+
714
+ assert_same p2, p2.add_class('foo baz foo')
715
+ assert_equal 'foo baz foo', p2['class']
716
+ end
717
+
718
+ def test_append_class
719
+ xml = Nokogiri::XML(<<-eoxml)
720
+ <div>
721
+ <p class=" foo bar foo ">test</p>
722
+ <p class="">test</p>
723
+ </div>
724
+ eoxml
725
+ div = xml.at_xpath('//div')
726
+ p1, p2 = xml.xpath('//p')
727
+
728
+ assert_same div, div.append_class('main')
729
+ assert_equal 'main', div['class']
730
+
731
+ assert_same p1, p1.append_class('baz foo')
732
+ assert_equal 'foo bar foo baz foo', p1['class']
733
+
734
+ assert_same p2, p2.append_class('foo baz foo')
735
+ assert_equal 'foo baz foo', p2['class']
736
+ end
737
+
738
+ def test_remove_class
739
+ xml = Nokogiri::XML(<<-eoxml)
740
+ <div>
741
+ <p class=" foo bar baz foo ">test</p>
742
+ <p class=" foo bar baz foo ">test</p>
743
+ <p class="foo foo">test</p>
744
+ <p class="">test</p>
745
+ </div>
746
+ eoxml
747
+ div = xml.at_xpath('//div')
748
+ p1, p2, p3, p4 = xml.xpath('//p')
749
+
750
+ assert_same div, div.remove_class('main')
751
+ assert_nil div['class']
752
+
753
+ assert_same p1, p1.remove_class('bar baz')
754
+ assert_equal 'foo foo', p1['class']
755
+
756
+ assert_same p2, p2.remove_class()
757
+ assert_nil p2['class']
758
+
759
+ assert_same p3, p3.remove_class('foo')
760
+ assert_nil p3['class']
761
+
762
+ assert_same p4, p4.remove_class('foo')
763
+ assert_nil p4['class']
764
+ end
765
+
683
766
  def test_set_content_with_symbol
684
767
  node = @xml.at('//name')
685
768
  node.content = :foo
@@ -1054,15 +1137,15 @@ EOXML
1054
1137
  assert_equal 'http://bar.com/', set[1].namespace.href
1055
1138
  assert_equal "c", set[2].namespace.prefix
1056
1139
  assert_equal 'http://bazz.com/', set[2].namespace.href
1057
- assert_equal nil, set[3].namespace.prefix # default namespace
1140
+ assert_nil set[3].namespace.prefix # default namespace
1058
1141
  assert_equal 'http://ns.example.com/d', set[3].namespace.href
1059
- assert_equal nil, set[4].namespace # no namespace
1142
+ assert_nil set[4].namespace # no namespace
1060
1143
 
1061
1144
  assert_equal 'b', set[2].attributes['y'].namespace.prefix
1062
1145
  assert_equal 'http://bar.com/', set[2].attributes['y'].namespace.href
1063
- assert_equal nil, set[2].attributes['x'].namespace
1064
- assert_equal nil, set[3].attributes['x'].namespace
1065
- assert_equal nil, set[4].attributes['x'].namespace
1146
+ assert_nil set[2].attributes['x'].namespace
1147
+ assert_nil set[3].attributes['x'].namespace
1148
+ assert_nil set[4].attributes['x'].namespace
1066
1149
  end
1067
1150
 
1068
1151
  if Nokogiri.uses_libxml?
@@ -1076,7 +1159,7 @@ EOXML
1076
1159
 
1077
1160
  assert_equal 1, node.namespaces.keys.size
1078
1161
  assert node.namespaces.has_key?('xmlns:o')
1079
- assert_equal nil, node.namespaces['xmlns:o']
1162
+ assert_nil node.namespaces['xmlns:o']
1080
1163
  end
1081
1164
  end
1082
1165
 
@@ -23,7 +23,7 @@ module Nokogiri
23
23
 
24
24
  assert_equal 'en-GB', node['xml:lang']
25
25
  assert_equal 'en-GB', node.attributes['lang'].value
26
- assert_equal nil, node['lang']
26
+ assert_nil node['lang']
27
27
  end
28
28
 
29
29
  def test_unknown_namespace_prefix_should_not_be_removed
@@ -42,12 +42,12 @@ module Nokogiri
42
42
 
43
43
  assert_equal 'en-GB', node['xml:lang']
44
44
  assert_equal 'en-GB', node.attributes['lang'].value
45
- assert_equal nil, node['lang']
45
+ assert_nil node['lang']
46
46
  assert_equal 'http://www.w3.org/XML/1998/namespace', node.attributes['lang'].namespace.href
47
47
 
48
48
  assert_equal 'bazz', node['foo:bar']
49
49
  assert_equal 'bazz', node.attributes['bar'].value
50
- assert_equal nil, node['bar']
50
+ assert_nil node['bar']
51
51
  assert_equal 'x', node.attributes['bar'].namespace.href
52
52
  end
53
53
 
@@ -459,6 +459,24 @@ module Nokogiri
459
459
  end
460
460
  end
461
461
 
462
+ it "can replace with a comment node" do
463
+ doc = Nokogiri::XML %Q{<parent><child>text}
464
+ replacee = doc.at_css("child")
465
+ replacer = doc.create_comment("<b>text</b>")
466
+ replacee.replace replacer
467
+ assert_equal 1, doc.root.children.length
468
+ assert_equal replacer, doc.root.children.first
469
+ end
470
+
471
+ it "can replace with a CDATA node" do
472
+ doc = Nokogiri::XML %Q{<parent><child>text}
473
+ replacee = doc.at_css("child")
474
+ replacer = doc.create_cdata("<b>text</b>")
475
+ replacee.replace replacer
476
+ assert_equal 1, doc.root.children.length
477
+ assert_equal replacer, doc.root.children.first
478
+ end
479
+
462
480
  describe "when a document has a default namespace" do
463
481
  before do
464
482
  @fruits = Nokogiri::XML(<<-eoxml)