nokogiri 1.3.2-x86-mswin32 → 1.3.3-x86-mswin32

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (68) hide show
  1. data/CHANGELOG.ja.rdoc +25 -4
  2. data/CHANGELOG.rdoc +20 -0
  3. data/Manifest.txt +2 -0
  4. data/Rakefile +67 -24
  5. data/ext/nokogiri/extconf.rb +16 -9
  6. data/ext/nokogiri/html_document.c +0 -2
  7. data/ext/nokogiri/nokogiri.c +2 -0
  8. data/ext/nokogiri/nokogiri.h +3 -4
  9. data/ext/nokogiri/xml_document.c +30 -23
  10. data/ext/nokogiri/xml_document.h +3 -2
  11. data/ext/nokogiri/xml_dtd.c +4 -0
  12. data/ext/nokogiri/xml_dtd.h +2 -0
  13. data/ext/nokogiri/xml_node.c +28 -9
  14. data/ext/nokogiri/xml_reader.c +0 -7
  15. data/ext/nokogiri/xml_relax_ng.c +7 -1
  16. data/ext/nokogiri/xml_sax_parser.c +2 -0
  17. data/lib/action-nokogiri.rb +2 -0
  18. data/lib/nokogiri.rb +9 -3
  19. data/lib/nokogiri/1.8/nokogiri.so +0 -0
  20. data/lib/nokogiri/1.9/nokogiri.so +0 -0
  21. data/lib/nokogiri/css/generated_tokenizer.rb +80 -82
  22. data/lib/nokogiri/css/tokenizer.rb +1 -5
  23. data/lib/nokogiri/decorators/hpricot/node_set.rb +1 -1
  24. data/lib/nokogiri/ffi/structs/common_node.rb +1 -1
  25. data/lib/nokogiri/ffi/structs/xml_document.rb +1 -1
  26. data/lib/nokogiri/ffi/xml/document.rb +15 -4
  27. data/lib/nokogiri/ffi/xml/node.rb +85 -63
  28. data/lib/nokogiri/ffi/xml/reader.rb +4 -15
  29. data/lib/nokogiri/ffi/xml/relax_ng.rb +3 -1
  30. data/lib/nokogiri/hpricot.rb +30 -0
  31. data/lib/nokogiri/html/document.rb +3 -1
  32. data/lib/nokogiri/html/document_fragment.rb +1 -1
  33. data/lib/nokogiri/html/sax/parser.rb +2 -1
  34. data/lib/nokogiri/version.rb +1 -1
  35. data/lib/nokogiri/xml/builder.rb +44 -1
  36. data/lib/nokogiri/xml/document.rb +8 -1
  37. data/lib/nokogiri/xml/document_fragment.rb +1 -1
  38. data/lib/nokogiri/xml/fragment_handler.rb +4 -7
  39. data/lib/nokogiri/xml/node.rb +9 -6
  40. data/lib/nokogiri/xml/node_set.rb +7 -0
  41. data/lib/nokogiri/xml/parse_options.rb +1 -1
  42. data/test/css/test_nthiness.rb +2 -3
  43. data/test/ffi/test_document.rb +6 -6
  44. data/test/files/2ch.html +108 -0
  45. data/test/files/shift_jis.xml +5 -0
  46. data/test/helper.rb +3 -0
  47. data/test/hpricot/test_alter.rb +9 -9
  48. data/test/hpricot/test_builder.rb +2 -2
  49. data/test/hpricot/test_parser.rb +70 -146
  50. data/test/hpricot/test_paths.rb +2 -2
  51. data/test/hpricot/test_preserved.rb +2 -2
  52. data/test/hpricot/test_xml.rb +3 -3
  53. data/test/html/sax/test_parser.rb +12 -0
  54. data/test/html/test_builder.rb +6 -4
  55. data/test/html/test_document.rb +7 -0
  56. data/test/html/test_document_encoding.rb +17 -0
  57. data/test/html/test_document_fragment.rb +12 -0
  58. data/test/html/test_node.rb +5 -2
  59. data/test/test_convert_xpath.rb +1 -50
  60. data/test/test_css_cache.rb +1 -12
  61. data/test/test_nokogiri.rb +7 -0
  62. data/test/test_reader.rb +14 -0
  63. data/test/xml/test_document.rb +44 -0
  64. data/test/xml/test_document_fragment.rb +12 -0
  65. data/test/xml/test_node.rb +10 -2
  66. data/test/xml/test_node_encoding.rb +23 -0
  67. data/test/xml/test_node_set.rb +10 -0
  68. metadata +48 -46
@@ -61,7 +61,7 @@ module Nokogiri
61
61
  schema_ptr = LibXML.xmlRelaxNGParse(ctx)
62
62
 
63
63
  LibXML.xmlSetStructuredErrorFunc(nil, nil)
64
- LibXML.xmlRelaxNGFreeParserCtxt(ctx)
64
+ LibXML.xmlRelaxNGFreeParserCtxt(ctx) unless Nokogiri.is_2_6_16?
65
65
 
66
66
  if schema_ptr.null?
67
67
  error = LibXML.xmlGetLastError
@@ -72,6 +72,8 @@ module Nokogiri
72
72
  end
73
73
  end
74
74
 
75
+ LibXML.xmlRelaxNGFreeParserCtxt(ctx) if Nokogiri.is_2_6_16?
76
+
75
77
  schema = allocate
76
78
  schema.cstruct = LibXML::XmlRelaxNG.new schema_ptr
77
79
  schema.errors = errors
@@ -11,24 +11,48 @@ module Nokogiri
11
11
  class << self
12
12
  # parse proxy
13
13
  def parse(*args)
14
+ warn <<-eomsg
15
+ Nokogiri::Hpricot.parse is deprecated and will be extracted to it's own gem
16
+ when Nokogiri 1.4.0 is released. Please switch to Nokogiri(), or be prepared
17
+ to install the compatibility layer.
18
+ #{caller.first}
19
+ eomsg
14
20
  doc = Nokogiri.parse(*args)
15
21
  add_decorators(doc)
16
22
  end
17
23
 
18
24
  # XML proxy
19
25
  def XML(string)
26
+ warn <<-eomsg
27
+ Nokogiri::Hpricot.parse is deprecated and will be extracted to it's own gem
28
+ when Nokogiri 1.4.0 is released. Please switch to Nokogiri::XML(), or be
29
+ prepared to install the compatibility layer.
30
+ #{caller.first}
31
+ eomsg
20
32
  doc = Nokogiri::XML::Document.parse(string)
21
33
  add_decorators(doc)
22
34
  end
23
35
 
24
36
  # HTML proxy
25
37
  def HTML(string)
38
+ warn <<-eomsg
39
+ Nokogiri::Hpricot.parse is deprecated and will be extracted to it's own gem
40
+ when Nokogiri 1.4.0 is released. Please switch to Nokogiri::HTML(), or be
41
+ prepared to install the compatibility layer.
42
+ #{caller.first}
43
+ eomsg
26
44
  doc = Nokogiri::HTML::Document.parse(string)
27
45
  add_decorators(doc)
28
46
  end
29
47
 
30
48
  # make proxy
31
49
  def make string
50
+ warn <<-eomsg
51
+ Nokogiri::Hpricot.parse is deprecated and will be extracted to it's own gem
52
+ when Nokogiri 1.4.0 is released. Please switch to Nokogiri::HTML.make(), or be
53
+ prepared to install the compatibility layer.
54
+ #{caller.first}
55
+ eomsg
32
56
  doc = XML::Document.new
33
57
  ns = XML::NodeSet.new(doc)
34
58
  ns << XML::Text.new(string, doc)
@@ -50,6 +74,12 @@ module Nokogiri
50
74
  # Parse a document and apply the Hpricot decorators for Hpricot
51
75
  # compatibility mode.
52
76
  def Hpricot(*args, &block)
77
+ warn <<-eomsg
78
+ Nokogiri::Hpricot.parse is deprecated and will be extracted to it's own gem
79
+ when Nokogiri 1.4.0 is released. Please switch to Nokogiri(), or be
80
+ prepared to install the compatibility layer.
81
+ #{caller.first}
82
+ eomsg
53
83
  if block_given?
54
84
  builder = Nokogiri::HTML::Builder.new(&block)
55
85
  Nokogiri::Hpricot.add_decorators(builder.doc)
@@ -61,7 +61,9 @@ Please change to #{self.class}#serialize(:encoding => enc, :save_with => opts)
61
61
  return self.read_io(string_or_io, url, encoding, options.to_i)
62
62
  end
63
63
 
64
- return self.new if(string_or_io.length == 0)
64
+ # read_memory pukes on empty docs
65
+ return self.new if string_or_io.nil? or string_or_io.empty?
66
+
65
67
  self.read_memory(string_or_io, url, encoding, options.to_i)
66
68
  end
67
69
  end
@@ -6,7 +6,7 @@ module Nokogiri
6
6
  ####
7
7
  # Create a Nokogiri::XML::DocumentFragment from +tags+
8
8
  def parse tags
9
- HTML::DocumentFragment.new(HTML::Document.new, tags)
9
+ self.new(HTML::Document.new, tags)
10
10
  end
11
11
  end
12
12
 
@@ -30,7 +30,8 @@ module Nokogiri
30
30
  # Parse html stored in +data+ using +encoding+
31
31
  def parse_memory data, encoding = 'UTF-8'
32
32
  raise ArgumentError unless data
33
- native_parse_memory(data, encoding)
33
+ return unless data.length > 0
34
+ native_parse_memory data, encoding
34
35
  end
35
36
 
36
37
  ###
@@ -1,6 +1,6 @@
1
1
  module Nokogiri
2
2
  # The version of Nokogiri you are using
3
- VERSION = '1.3.2'
3
+ VERSION = '1.3.3'
4
4
 
5
5
  # More complete version information about libxml
6
6
  VERSION_INFO = {}
@@ -17,6 +17,19 @@ module Nokogiri
17
17
  # end
18
18
  # puts builder.to_xml
19
19
  #
20
+ # Will output:
21
+ #
22
+ # <?xml version="1.0"?>
23
+ # <root>
24
+ # <products>
25
+ # <widget>
26
+ # <id>10</id>
27
+ # <name>Awesome widget</name>
28
+ # </widget>
29
+ # </products>
30
+ # </root>
31
+ #
32
+ #
20
33
  # === Builder scope
21
34
  #
22
35
  # The builder allows two forms. When the builder is supplied with a block
@@ -65,7 +78,28 @@ module Nokogiri
65
78
  # puts builder.to_xml
66
79
  #
67
80
  # The underscore may be used with any tag name, and the last underscore
68
- # will just be removed.
81
+ # will just be removed. This code will output the following XML:
82
+ #
83
+ # <?xml version="1.0"?>
84
+ # <root>
85
+ # <objects>
86
+ # <object>
87
+ # <type>Object</type>
88
+ # <class>Object</class>
89
+ # <id>48390</id>
90
+ # </object>
91
+ # <object>
92
+ # <type>Object</type>
93
+ # <class>Object</class>
94
+ # <id>48380</id>
95
+ # </object>
96
+ # <object>
97
+ # <type>Object</type>
98
+ # <class>Object</class>
99
+ # <id>48370</id>
100
+ # </object>
101
+ # </objects>
102
+ # </root>
69
103
  #
70
104
  # == Tag Attributes
71
105
  #
@@ -102,6 +136,15 @@ module Nokogiri
102
136
  # end
103
137
  # puts builder.to_xml
104
138
  #
139
+ # Which will output:
140
+ #
141
+ # <?xml version="1.0"?>
142
+ # <root>
143
+ # <objects>
144
+ # <object class="classy" id="thing"/>
145
+ # </objects>
146
+ # </root>
147
+ #
105
148
  # All other options are still supported with this syntax, including
106
149
  # blocks and extra tag attributes.
107
150
  class Builder
@@ -61,7 +61,6 @@ module Nokogiri
61
61
  end
62
62
 
63
63
  alias :to_xml :serialize
64
- alias :inner_html :serialize
65
64
  alias :clone :dup
66
65
 
67
66
  # Get the hash of namespaces on the root Nokogiri::XML::Node
@@ -78,6 +77,14 @@ module Nokogiri
78
77
  undef_method :swap, :parent, :namespace, :default_namespace=
79
78
  undef_method :add_namespace_definition
80
79
 
80
+ def add_child child
81
+ if [Node::ELEMENT_NODE, Node::DOCUMENT_FRAG_NODE].include? child.type
82
+ raise "Document already has a root node" if root
83
+ end
84
+ super
85
+ end
86
+ alias :<< :add_child
87
+
81
88
  class << self
82
89
  ###
83
90
  # Parse an XML file. +thing+ may be a String, or any object that
@@ -40,7 +40,7 @@ module Nokogiri
40
40
  ####
41
41
  # Create a Nokogiri::XML::DocumentFragment from +tags+
42
42
  def parse tags
43
- XML::DocumentFragment.new(XML::Document.new, tags)
43
+ self.new(XML::Document.new, tags)
44
44
  end
45
45
  end
46
46
 
@@ -18,12 +18,9 @@ module Nokogiri
18
18
  # this implementation choice was the result of some benchmarks, if
19
19
  # you're curious: http://gist.github.com/115936
20
20
  #
21
- newline_index = original_html.index("\n")
22
- @original_html = if newline_index
23
- original_html[0,newline_index]
24
- else
25
- original_html
26
- end
21
+ @original_html = original_html.lstrip
22
+ newline_index = @original_html.index("\n")
23
+ @original_html = @original_html[0,newline_index] if newline_index
27
24
  end
28
25
 
29
26
  def start_element name, attrs = []
@@ -32,7 +29,7 @@ module Nokogiri
32
29
  @doc_started = true if @original_html =~ regex
33
30
  return unless @doc_started
34
31
 
35
- node = Node.new(name, @document)
32
+ node = Element.new(name, @document)
36
33
  attrs << "" unless (attrs.length % 2) == 0
37
34
  Hash[*attrs].each do |k,v|
38
35
  node[k] = v
@@ -295,7 +295,7 @@ module Nokogiri
295
295
  def inner_html= tags
296
296
  children.each { |x| x.remove}
297
297
 
298
- fragment(tags).children.to_a.reverse.each do |node|
298
+ fragment(tags).children.to_a.each do |node|
299
299
  add_child node
300
300
  end
301
301
  self
@@ -481,8 +481,8 @@ Node.replace requires a Node argument, and cannot accept a Document.
481
481
  end
482
482
 
483
483
  ###
484
- # Serialize Node using +options+. Save options
485
- # can also be set using a block. See SaveOptions.
484
+ # Serialize Node using +options+. Save options can also be set using a
485
+ # block. See SaveOptions.
486
486
  #
487
487
  # These two statements are equivalent:
488
488
  #
@@ -500,10 +500,13 @@ Node.replace requires a Node argument, and cannot accept a Document.
500
500
  :save_with => args[1] || SaveOptions::FORMAT
501
501
  }
502
502
 
503
- io = StringIO.new
503
+ outstring = ""
504
+ if document.encoding && outstring.respond_to?(:force_encoding)
505
+ outstring.force_encoding(Encoding.find(document.encoding))
506
+ end
507
+ io = StringIO.new(outstring)
504
508
  write_to io, options, &block
505
- io.rewind
506
- io.read
509
+ io.string
507
510
  end
508
511
 
509
512
  ###
@@ -273,6 +273,13 @@ module Nokogiri
273
273
  end
274
274
  true
275
275
  end
276
+
277
+ ###
278
+ # Returns a new NodeSet containing all the children of all the nodes in the NodeSet
279
+ def children
280
+ inject(NodeSet.new(document)) { |set, node| set += node.children }
281
+ end
282
+
276
283
  end
277
284
  end
278
285
  end
@@ -63,7 +63,7 @@ module Nokogiri
63
63
  end
64
64
 
65
65
  def strict
66
- @options |= STRICT
66
+ @options &= ~RECOVER
67
67
  self
68
68
  end
69
69
 
@@ -42,7 +42,7 @@ module Nokogiri
42
42
  <p class='not-empty'><b></b></p>
43
43
  </html>
44
44
  EOF
45
- @parser = Nokogiri.Hpricot doc
45
+ @parser = Nokogiri.HTML doc
46
46
  end
47
47
 
48
48
 
@@ -140,8 +140,7 @@ EOF
140
140
  <p id="4">p4 </p>
141
141
  <p id="5">p5 </p>
142
142
  EOF
143
- parser = Nokogiri.Hpricot doc
144
-
143
+ parser = Nokogiri.HTML doc
145
144
  assert_equal 2, parser.search("#3 ~ p").size
146
145
  assert_equal "p4 p5 ", parser.search("#3 ~ p").inner_text
147
146
  assert_equal 0, parser.search("#5 ~ p").size
@@ -16,18 +16,18 @@ if defined?(Nokogiri::LibXML)
16
16
  assert_equal foo, doc.cstruct.ruby_doc
17
17
  end
18
18
 
19
- def test_node_set
19
+ def test_unlinked_nodes
20
20
  doc = Nokogiri::XML("<root><foo>foo</foo></root>")
21
- assert_instance_of Nokogiri::LibXML::XmlNodeSetCast, doc.cstruct.node_set
21
+ assert_instance_of Nokogiri::LibXML::XmlNodeSetCast, doc.cstruct.unlinked_nodes
22
22
  end
23
23
 
24
- def test_node_set_contains_unlinked_nodes
24
+ def test_unlinked_nodes_contains_unlinked_nodes
25
25
  doc = Nokogiri::XML("<root><foo>foo</foo></root>")
26
26
  node = doc.xpath('//foo').first
27
- assert_equal 0, doc.cstruct.node_set[:nodeNr]
27
+ assert_equal 0, doc.cstruct.unlinked_nodes[:nodeNr]
28
28
  node.unlink
29
- assert_equal 1, doc.cstruct.node_set[:nodeNr]
30
- assert_equal node.cstruct.pointer, doc.cstruct.node_set[:nodeTab].get_pointer(0)
29
+ assert_equal 1, doc.cstruct.unlinked_nodes[:nodeNr]
30
+ assert_equal node.cstruct.pointer, doc.cstruct.unlinked_nodes[:nodeTab].get_pointer(0)
31
31
  end
32
32
 
33
33
  end
@@ -0,0 +1,108 @@
1
+ <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
2
+ <html xmlns="http://www.w3.org/1999/xhtml">
3
+ <head>
4
+ <meta http-equiv="Content-Type" content="text/html; charset=Shift_JIS" />
5
+ <meta http-equiv="Content-Script-Type" content="text/javascript" />
6
+ <meta http-equiv="Content-Style-Type" content="text/css" />
7
+ <meta name="Description" CONTENT="�u�n�b�L���O�v����u���ӂ̂������v�܂ł���L���J�o�[���鋐��f���ŒQ�w�Q�����˂�x�ւ悤�����I">
8
+ <meta name="KeyWords" CONTENT="�Q�����˂�, �f����, 2ch, BBS">
9
+ <meta name="Author" CONTENT="�Q�����˂�">
10
+ <meta name="verify-v1" content="hMevVuWJIPYrUj9ItfRJByoLNIpyhnrWaywiH+IFocU=" />
11
+ <title>�Q�����˂�f���‚ւ悤����</title>
12
+ <style type="text/css" media="all">
13
+ @import url(2ch_top.css);
14
+ </style>
15
+ <script>
16
+ <!--
17
+ function fcs(){document.f.STR.focus();}
18
+ -->
19
+ </script>
20
+ </head>
21
+
22
+ <body onLoad="document.f.STR.focus()">
23
+
24
+ <div id="wrapper">
25
+ <div id="header">
26
+ <div id="header_inside">
27
+ <div class="header_left">
28
+ <a href="http://www2.2ch.net/2ch.html"><img src="images/2ch_logo.gif" width="151" height="40" alt="�Q�����˂�" align="middle" border="0" /><a href="http://www2.2ch.net/2ch.html">�f����</a>�b
29
+ <a href="http://c.2ch.net/">imode</a>�b<a href="http://orz.2ch.io/top.html">�g��orz</a>�b<a href="http://p2.2ch.net/">�r���[�Ap2</a>
30
+ </div>
31
+
32
+ <div class="senna_banner">
33
+
34
+ <a href="http://razil.jp/product/senna/"><img src="images/senna88x31.gif" width="88" height="31" alt="Senna" align="middle" border="0" /></a>
35
+ </div>
36
+ <div class="header_right">
37
+ <!---find 2ch form --->
38
+ <form method=GET name=f action="http://find.2ch.net/" style="margin:0px">
39
+ <input size=25 name=STR value="" class="form_input">
40
+ <select name="TYPE" class="form_select">
41
+ <option value="BODY">�{��</option>
42
+
43
+ <option value="TITLE" selected>�X���b�h�^�C�g��</option>
44
+ <option value="POSTERS">���e��</option>
45
+ </select>
46
+ <input type="image" src="images/search_button.gif" alt="����" class="form_button"><br />
47
+ <input type=hidden name=BBS value=ALL>
48
+
49
+ <input type=hidden name=ENCODING value=SJIS>
50
+ <input type=hidden name=COUNT value=50>
51
+ <div class="caption">�{�������⌟���ݒ��<a href="http://find.2ch.net/moritapo/notlogin.php">���O�C��</a><a href="http://find.2ch.net/moritapo/welcome/">&raquo;�ڍ�</a></div>
52
+
53
+ </form>
54
+ </div>
55
+ </div><!--- header_inside --->
56
+ </div><!--- end of header--->
57
+
58
+ <div id="under_header">
59
+ <a href="http://newsnavi.2ch.net/">�j���[�X</a>�b<a href="http://headline.2ch.net/bbynews/">�w�b�h���C��</a>�b<a href="http://epg.2ch.net/">�e���r��</a>�b<a href="http://www.2ch.net/kakolog.html">�ߋ����O�q��</a>
60
+
61
+ </div><!--- end of under header--->
62
+
63
+ <div id="main">
64
+ <iframe src="http://cast.texpo.jp/2chtop/main_frame.html" width="100%" height="550" scrolling="no" border="0" frameborder="0"></iframe>
65
+ </div><!--- end of main--->
66
+
67
+ <div id="footer_menu">
68
+ <div class="guide">
69
+ <a href="http://info.2ch.net/guide/">�g����������</a>�b<a href="http://info.2ch.net/guide/adv.html#saku_guide">�폜�K�C�h���C��</a>�b<a href="http://www2.2ch.net/2ch2.html">�g���Ń��j���[</a>�b<a href="http://info.2ch.net/blog.html">�Ђ�䂫���L</a>�b<a href="http://www.2ch.net/ad.html">�L���̂��ē�</a>
70
+
71
+ </div>
72
+
73
+ <div class="service">
74
+ <iframe src="http://cast.texpo.jp/2chtop/moritapo_frame.html" width="100%" height="50" scrolling="no" border="0" frameborder="0"></iframe>
75
+ </div>
76
+
77
+ <div class="banner">
78
+ <a href=http://livede55.com/ target="_blank"><img
79
+ src="http://www2.livede55.com/2ch_468_60_13.gif"
80
+ width="468" height="60" border="0" alt=���C�u�`���b�g></a><br>
81
+ <br>
82
+ <A href="http://www.bb-chat.tv/" target=_blank><IMG height=60 src="http://img.bbchat.tv/images/bannar/46860-3.gif" width=468 border=0></A><br>
83
+ </div>
84
+
85
+ <div class="condition">
86
+ �Q�����˂�̂����p�͗��p�Ҋe�ʂ̂����f�ɂ��C�����Ă��܂��b<a href="precautions.html">2ch�̃f�[�^���p�ɂ‚���</a>
87
+
88
+ </div>
89
+ </div><!--- end of footer_menu--->
90
+
91
+
92
+ <div id="footer">
93
+ <div class="footer_left">
94
+ <a href="http://www.bunka.go.jp/jiyuriyo/" target="_blank">
95
+ <img src="http://www.dd.iij4u.or.jp/~cap/y_3copyok.jpg" width="184" height="31" border="0" alt="���R���p�}�[�N" /></a>
96
+ </div>
97
+
98
+ <div class="footer_right">
99
+ <a href="http://count.2ch.net/?index" target="_blank">
100
+ <img src="http://count.2ch.net/ct.php/index" width="88" height="31" border="0" alt="���������J�E���^�["></a>
101
+
102
+
103
+ </div>
104
+ </div><!---end of footer--->
105
+
106
+ </div><!--- end of wrapper--->
107
+ </body>
108
+ </html>