nokogiri 1.5.10 → 1.10.4

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (182) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE-DEPENDENCIES.md +1614 -0
  3. data/LICENSE.md +9 -0
  4. data/README.md +198 -0
  5. data/bin/nokogiri +50 -10
  6. data/dependencies.yml +72 -0
  7. data/ext/nokogiri/extconf.rb +634 -92
  8. data/ext/nokogiri/html_document.c +8 -8
  9. data/ext/nokogiri/html_element_description.c +15 -15
  10. data/ext/nokogiri/html_entity_lookup.c +1 -1
  11. data/ext/nokogiri/html_sax_parser_context.c +4 -4
  12. data/ext/nokogiri/html_sax_push_parser.c +2 -2
  13. data/ext/nokogiri/nokogiri.c +20 -12
  14. data/ext/nokogiri/nokogiri.h +1 -44
  15. data/ext/nokogiri/xml_attr.c +34 -25
  16. data/ext/nokogiri/xml_cdata.c +12 -6
  17. data/ext/nokogiri/xml_comment.c +18 -3
  18. data/ext/nokogiri/xml_document.c +64 -32
  19. data/ext/nokogiri/xml_dtd.c +2 -2
  20. data/ext/nokogiri/xml_encoding_handler.c +3 -3
  21. data/ext/nokogiri/xml_entity_reference.c +1 -1
  22. data/ext/nokogiri/xml_io.c +11 -6
  23. data/ext/nokogiri/xml_namespace.c +50 -17
  24. data/ext/nokogiri/xml_namespace.h +3 -2
  25. data/ext/nokogiri/xml_node.c +459 -240
  26. data/ext/nokogiri/xml_node_set.c +166 -147
  27. data/ext/nokogiri/xml_node_set.h +2 -4
  28. data/ext/nokogiri/xml_processing_instruction.c +2 -2
  29. data/ext/nokogiri/xml_reader.c +6 -19
  30. data/ext/nokogiri/xml_sax_parser.c +11 -13
  31. data/ext/nokogiri/xml_sax_parser_context.c +41 -1
  32. data/ext/nokogiri/xml_sax_push_parser.c +56 -12
  33. data/ext/nokogiri/xml_schema.c +1 -1
  34. data/ext/nokogiri/xml_syntax_error.c +11 -5
  35. data/ext/nokogiri/xml_syntax_error.h +1 -1
  36. data/ext/nokogiri/xml_text.c +1 -1
  37. data/ext/nokogiri/xml_xpath_context.c +17 -38
  38. data/ext/nokogiri/xslt_stylesheet.c +10 -10
  39. data/lib/nokogiri/css/node.rb +0 -50
  40. data/lib/nokogiri/css/parser.rb +263 -233
  41. data/lib/nokogiri/css/parser.y +54 -40
  42. data/lib/nokogiri/css/tokenizer.rb +104 -103
  43. data/lib/nokogiri/css/tokenizer.rex +5 -5
  44. data/lib/nokogiri/css/xpath_visitor.rb +78 -19
  45. data/lib/nokogiri/decorators/slop.rb +12 -5
  46. data/lib/nokogiri/html/document.rb +102 -21
  47. data/lib/nokogiri/html/document_fragment.rb +11 -3
  48. data/lib/nokogiri/html/sax/parser.rb +12 -2
  49. data/lib/nokogiri/html/sax/push_parser.rb +22 -2
  50. data/lib/nokogiri/version.rb +40 -22
  51. data/lib/nokogiri/xml/builder.rb +34 -31
  52. data/lib/nokogiri/xml/document.rb +20 -14
  53. data/lib/nokogiri/xml/document_fragment.rb +50 -2
  54. data/lib/nokogiri/xml/dtd.rb +14 -4
  55. data/lib/nokogiri/xml/entity_reference.rb +18 -0
  56. data/lib/nokogiri/xml/node.rb +148 -203
  57. data/lib/nokogiri/xml/node_set.rb +139 -123
  58. data/lib/nokogiri/xml/parse_options.rb +22 -0
  59. data/lib/nokogiri/xml/sax/document.rb +1 -1
  60. data/lib/nokogiri/xml/sax/parser.rb +7 -8
  61. data/lib/nokogiri/xml/searchable.rb +230 -0
  62. data/lib/nokogiri/xml/syntax_error.rb +24 -1
  63. data/lib/nokogiri/xml.rb +3 -1
  64. data/lib/nokogiri.rb +40 -24
  65. data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +78 -0
  66. data/patches/libxml2/0002-Remove-script-macro-support.patch +40 -0
  67. data/patches/libxml2/0003-Update-entities-to-remove-handling-of-ssi.patch +44 -0
  68. data/patches/libxslt/0001-Fix-security-framework-bypass.patch +120 -0
  69. data/ports/archives/libxml2-2.9.9.tar.gz +0 -0
  70. data/ports/archives/libxslt-1.1.33.tar.gz +0 -0
  71. metadata +252 -388
  72. data/.autotest +0 -26
  73. data/.gemtest +0 -0
  74. data/CHANGELOG.ja.rdoc +0 -785
  75. data/CHANGELOG.rdoc +0 -783
  76. data/C_CODING_STYLE.rdoc +0 -33
  77. data/Manifest.txt +0 -303
  78. data/README.ja.rdoc +0 -106
  79. data/README.rdoc +0 -175
  80. data/ROADMAP.md +0 -90
  81. data/Rakefile +0 -228
  82. data/STANDARD_RESPONSES.md +0 -47
  83. data/Y_U_NO_GEMSPEC.md +0 -155
  84. data/build_all +0 -105
  85. data/tasks/cross_compile.rb +0 -150
  86. data/tasks/nokogiri.org.rb +0 -24
  87. data/tasks/test.rb +0 -95
  88. data/test/css/test_nthiness.rb +0 -159
  89. data/test/css/test_parser.rb +0 -341
  90. data/test/css/test_tokenizer.rb +0 -198
  91. data/test/css/test_xpath_visitor.rb +0 -91
  92. data/test/decorators/test_slop.rb +0 -16
  93. data/test/files/2ch.html +0 -108
  94. data/test/files/address_book.rlx +0 -12
  95. data/test/files/address_book.xml +0 -10
  96. data/test/files/bar/bar.xsd +0 -4
  97. data/test/files/dont_hurt_em_why.xml +0 -422
  98. data/test/files/encoding.html +0 -82
  99. data/test/files/encoding.xhtml +0 -84
  100. data/test/files/exslt.xml +0 -8
  101. data/test/files/exslt.xslt +0 -35
  102. data/test/files/foo/foo.xsd +0 -4
  103. data/test/files/metacharset.html +0 -10
  104. data/test/files/noencoding.html +0 -47
  105. data/test/files/po.xml +0 -32
  106. data/test/files/po.xsd +0 -66
  107. data/test/files/shift_jis.html +0 -10
  108. data/test/files/shift_jis.xml +0 -5
  109. data/test/files/snuggles.xml +0 -3
  110. data/test/files/staff.dtd +0 -10
  111. data/test/files/staff.xml +0 -59
  112. data/test/files/staff.xslt +0 -32
  113. data/test/files/test_document_url/bar.xml +0 -2
  114. data/test/files/test_document_url/document.dtd +0 -4
  115. data/test/files/test_document_url/document.xml +0 -6
  116. data/test/files/tlm.html +0 -850
  117. data/test/files/to_be_xincluded.xml +0 -2
  118. data/test/files/valid_bar.xml +0 -2
  119. data/test/files/xinclude.xml +0 -4
  120. data/test/helper.rb +0 -154
  121. data/test/html/sax/test_parser.rb +0 -141
  122. data/test/html/sax/test_parser_context.rb +0 -46
  123. data/test/html/test_builder.rb +0 -164
  124. data/test/html/test_document.rb +0 -552
  125. data/test/html/test_document_encoding.rb +0 -138
  126. data/test/html/test_document_fragment.rb +0 -261
  127. data/test/html/test_element_description.rb +0 -105
  128. data/test/html/test_named_characters.rb +0 -14
  129. data/test/html/test_node.rb +0 -196
  130. data/test/html/test_node_encoding.rb +0 -27
  131. data/test/namespaces/test_additional_namespaces_in_builder_doc.rb +0 -14
  132. data/test/namespaces/test_namespaces_in_builder_doc.rb +0 -75
  133. data/test/namespaces/test_namespaces_in_created_doc.rb +0 -75
  134. data/test/namespaces/test_namespaces_in_parsed_doc.rb +0 -66
  135. data/test/test_convert_xpath.rb +0 -135
  136. data/test/test_css_cache.rb +0 -45
  137. data/test/test_encoding_handler.rb +0 -46
  138. data/test/test_memory_leak.rb +0 -156
  139. data/test/test_nokogiri.rb +0 -132
  140. data/test/test_reader.rb +0 -555
  141. data/test/test_soap4r_sax.rb +0 -52
  142. data/test/test_xslt_transforms.rb +0 -254
  143. data/test/xml/node/test_save_options.rb +0 -28
  144. data/test/xml/node/test_subclass.rb +0 -44
  145. data/test/xml/sax/test_parser.rb +0 -366
  146. data/test/xml/sax/test_parser_context.rb +0 -106
  147. data/test/xml/sax/test_push_parser.rb +0 -157
  148. data/test/xml/test_attr.rb +0 -64
  149. data/test/xml/test_attribute_decl.rb +0 -86
  150. data/test/xml/test_builder.rb +0 -306
  151. data/test/xml/test_c14n.rb +0 -151
  152. data/test/xml/test_cdata.rb +0 -48
  153. data/test/xml/test_comment.rb +0 -29
  154. data/test/xml/test_document.rb +0 -828
  155. data/test/xml/test_document_encoding.rb +0 -28
  156. data/test/xml/test_document_fragment.rb +0 -223
  157. data/test/xml/test_dtd.rb +0 -103
  158. data/test/xml/test_dtd_encoding.rb +0 -33
  159. data/test/xml/test_element_content.rb +0 -56
  160. data/test/xml/test_element_decl.rb +0 -73
  161. data/test/xml/test_entity_decl.rb +0 -122
  162. data/test/xml/test_entity_reference.rb +0 -245
  163. data/test/xml/test_namespace.rb +0 -95
  164. data/test/xml/test_node.rb +0 -1137
  165. data/test/xml/test_node_attributes.rb +0 -96
  166. data/test/xml/test_node_encoding.rb +0 -107
  167. data/test/xml/test_node_inheritance.rb +0 -32
  168. data/test/xml/test_node_reparenting.rb +0 -374
  169. data/test/xml/test_node_set.rb +0 -755
  170. data/test/xml/test_parse_options.rb +0 -64
  171. data/test/xml/test_processing_instruction.rb +0 -30
  172. data/test/xml/test_reader_encoding.rb +0 -142
  173. data/test/xml/test_relax_ng.rb +0 -60
  174. data/test/xml/test_schema.rb +0 -103
  175. data/test/xml/test_syntax_error.rb +0 -12
  176. data/test/xml/test_text.rb +0 -45
  177. data/test/xml/test_unparented_node.rb +0 -422
  178. data/test/xml/test_xinclude.rb +0 -83
  179. data/test/xml/test_xpath.rb +0 -295
  180. data/test/xslt/test_custom_functions.rb +0 -133
  181. data/test/xslt/test_exception_handling.rb +0 -37
  182. data/test_all +0 -81
@@ -4,25 +4,26 @@ module Nokogiri
4
4
  # The Slop decorator implements method missing such that a methods may be
5
5
  # used instead of XPath or CSS. See Nokogiri.Slop
6
6
  module Slop
7
+ # The default XPath search context for Slop
8
+ XPATH_PREFIX = "./"
9
+
7
10
  ###
8
11
  # look for node with +name+. See Nokogiri.Slop
9
12
  def method_missing name, *args, &block
10
- prefix = implied_xpath_context
11
-
12
13
  if args.empty?
13
- list = xpath("#{prefix}#{name.to_s.sub(/^_/, '')}")
14
+ list = xpath("#{XPATH_PREFIX}#{name.to_s.sub(/^_/, '')}")
14
15
  elsif args.first.is_a? Hash
15
16
  hash = args.first
16
17
  if hash[:css]
17
18
  list = css("#{name}#{hash[:css]}")
18
19
  elsif hash[:xpath]
19
20
  conds = Array(hash[:xpath]).join(' and ')
20
- list = xpath("#{prefix}#{name}[#{conds}]")
21
+ list = xpath("#{XPATH_PREFIX}#{name}[#{conds}]")
21
22
  end
22
23
  else
23
24
  CSS::Parser.without_cache do
24
25
  list = xpath(
25
- *CSS.xpath_for("#{name}#{args.first}", :prefix => prefix)
26
+ *CSS.xpath_for("#{name}#{args.first}", :prefix => XPATH_PREFIX)
26
27
  )
27
28
  end
28
29
  end
@@ -30,6 +31,12 @@ module Nokogiri
30
31
  super if list.empty?
31
32
  list.length == 1 ? list.first : list
32
33
  end
34
+
35
+ def respond_to_missing? name, include_private = false
36
+ list = xpath("#{XPATH_PREFIX}#{name.to_s.sub(/^_/, '')}")
37
+
38
+ !list.empty?
39
+ end
33
40
  end
34
41
  end
35
42
  end
@@ -5,24 +5,59 @@ module Nokogiri
5
5
  # Get the meta tag encoding for this document. If there is no meta tag,
6
6
  # then nil is returned.
7
7
  def meta_encoding
8
- meta = meta_content_type and
9
- match = /charset\s*=\s*([\w-]+)/i.match(meta['content']) and
10
- match[1]
8
+ case
9
+ when meta = at('//meta[@charset]')
10
+ meta[:charset]
11
+ when meta = meta_content_type
12
+ meta['content'][/charset\s*=\s*([\w-]+)/i, 1]
13
+ end
11
14
  end
12
15
 
13
16
  ###
14
- # Set the meta tag encoding for this document. If there is no meta
15
- # content tag, the encoding is not set.
17
+ # Set the meta tag encoding for this document.
18
+ #
19
+ # If an meta encoding tag is already present, its content is
20
+ # replaced with the given text.
21
+ #
22
+ # Otherwise, this method tries to create one at an appropriate
23
+ # place supplying head and/or html elements as necessary, which
24
+ # is inside a head element if any, and before any text node or
25
+ # content element (typically <body>) if any.
26
+ #
27
+ # The result when trying to set an encoding that is different
28
+ # from the document encoding is undefined.
29
+ #
30
+ # Beware in CRuby, that libxml2 automatically inserts a meta tag
31
+ # into a head element.
16
32
  def meta_encoding= encoding
17
- meta = meta_content_type and
18
- meta['content'] = "text/html; charset=%s" % encoding
33
+ case
34
+ when meta = meta_content_type
35
+ meta['content'] = 'text/html; charset=%s' % encoding
36
+ encoding
37
+ when meta = at('//meta[@charset]')
38
+ meta['charset'] = encoding
39
+ else
40
+ meta = XML::Node.new('meta', self)
41
+ if dtd = internal_subset and dtd.html5_dtd?
42
+ meta['charset'] = encoding
43
+ else
44
+ meta['http-equiv'] = 'Content-Type'
45
+ meta['content'] = 'text/html; charset=%s' % encoding
46
+ end
47
+
48
+ case
49
+ when head = at('//head')
50
+ head.prepend_child(meta)
51
+ else
52
+ set_metadata_element(meta)
53
+ end
54
+ encoding
55
+ end
19
56
  end
20
57
 
21
58
  def meta_content_type
22
- css('meta[@http-equiv]').find { |node|
23
- node['http-equiv'] =~ /\AContent-Type\z/i and
24
- !node['content'].nil? and
25
- !node['content'].empty?
59
+ xpath('//meta[@http-equiv and boolean(@content)]').find { |node|
60
+ node['http-equiv'] =~ /\AContent-Type\z/i
26
61
  }
27
62
  end
28
63
  private :meta_content_type
@@ -31,20 +66,64 @@ module Nokogiri
31
66
  # Get the title string of this document. Return nil if there is
32
67
  # no title tag.
33
68
  def title
34
- title = at('title') and title.inner_text
69
+ title = at('//title') and title.inner_text
35
70
  end
36
71
 
37
72
  ###
38
- # Set the title string of this document. If there is no head
39
- # element, the title is not set.
73
+ # Set the title string of this document.
74
+ #
75
+ # If a title element is already present, its content is replaced
76
+ # with the given text.
77
+ #
78
+ # Otherwise, this method tries to create one at an appropriate
79
+ # place supplying head and/or html elements as necessary, which
80
+ # is inside a head element if any, right after a meta
81
+ # encoding/charset tag if any, and before any text node or
82
+ # content element (typically <body>) if any.
40
83
  def title=(text)
41
- unless title = at('title')
42
- head = at('head') or return nil
43
- title = Nokogiri::XML::Node.new('title', self)
84
+ tnode = XML::Text.new(text, self)
85
+ if title = at('//title')
86
+ title.children = tnode
87
+ return text
88
+ end
89
+
90
+ title = XML::Node.new('title', self) << tnode
91
+ case
92
+ when head = at('//head')
44
93
  head << title
94
+ when meta = at('//meta[@charset]') || meta_content_type
95
+ # better put after charset declaration
96
+ meta.add_next_sibling(title)
97
+ else
98
+ set_metadata_element(title)
99
+ end
100
+ text
101
+ end
102
+
103
+ def set_metadata_element(element)
104
+ case
105
+ when head = at('//head')
106
+ head << element
107
+ when html = at('//html')
108
+ head = html.prepend_child(XML::Node.new('head', self))
109
+ head.prepend_child(element)
110
+ when first = children.find { |node|
111
+ case node
112
+ when XML::Element, XML::Text
113
+ true
114
+ end
115
+ }
116
+ # We reach here only if the underlying document model
117
+ # allows <html>/<head> elements to be omitted and does not
118
+ # automatically supply them.
119
+ first.add_previous_sibling(element)
120
+ else
121
+ html = add_child(XML::Node.new('html', self))
122
+ head = html.add_child(XML::Node.new('head', self))
123
+ head.prepend_child(element)
45
124
  end
46
- title.children = XML::Text.new(text, self)
47
125
  end
126
+ private :set_metadata_element
48
127
 
49
128
  ####
50
129
  # Serialize Node using +options+. Save options can also be set using a
@@ -82,7 +161,7 @@ module Nokogiri
82
161
  # Nokogiri::XML::ParseOptions.
83
162
  def parse string_or_io, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML
84
163
 
85
- options = Nokogiri::XML::ParseOptions.new(options) if Fixnum === options
164
+ options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
86
165
  # Give the options to the user
87
166
  yield options if block_given?
88
167
 
@@ -94,7 +173,7 @@ module Nokogiri
94
173
 
95
174
  if string_or_io.respond_to?(:read)
96
175
  url ||= string_or_io.respond_to?(:path) ? string_or_io.path : nil
97
- if !encoding
176
+ unless encoding
98
177
  # Libxml2's parser has poor support for encoding
99
178
  # detection. First, it does not recognize the HTML5
100
179
  # style meta charset declaration. Secondly, even if it
@@ -117,7 +196,9 @@ module Nokogiri
117
196
  end
118
197
 
119
198
  # read_memory pukes on empty docs
120
- return new if string_or_io.nil? or string_or_io.empty?
199
+ if string_or_io.nil? or string_or_io.empty?
200
+ return encoding ? new.tap { |i| i.encoding = encoding } : new
201
+ end
121
202
 
122
203
  encoding ||= EncodingReader.detect_encoding(string_or_io)
123
204
 
@@ -1,14 +1,22 @@
1
1
  module Nokogiri
2
2
  module HTML
3
3
  class DocumentFragment < Nokogiri::XML::DocumentFragment
4
- attr_accessor :errors
5
-
6
4
  ####
7
5
  # Create a Nokogiri::XML::DocumentFragment from +tags+, using +encoding+
8
6
  def self.parse tags, encoding = nil
9
7
  doc = HTML::Document.new
10
8
 
11
- encoding ||= tags.respond_to?(:encoding) ? tags.encoding.name : 'UTF-8'
9
+ encoding ||= if tags.respond_to?(:encoding)
10
+ encoding = tags.encoding
11
+ if encoding == ::Encoding::ASCII_8BIT
12
+ 'UTF-8'
13
+ else
14
+ encoding.name
15
+ end
16
+ else
17
+ 'UTF-8'
18
+ end
19
+
12
20
  doc.encoding = encoding
13
21
 
14
22
  new(doc, tags)
@@ -22,7 +22,7 @@ module Nokogiri
22
22
  # end
23
23
  #
24
24
  # parser = Nokogiri::HTML::SAX::Parser.new(MyDoc.new)
25
- # parser.parse(File.read(ARGV[0], 'rb'))
25
+ # parser.parse(File.read(ARGV[0], mode: 'rb'))
26
26
  #
27
27
  # For more information on SAX parsers, see Nokogiri::XML::SAX
28
28
  class Parser < Nokogiri::XML::SAX::Parser
@@ -36,11 +36,21 @@ module Nokogiri
36
36
  ctx.parse_with self
37
37
  end
38
38
 
39
+ ###
40
+ # Parse given +io+
41
+ def parse_io io, encoding = 'UTF-8'
42
+ check_encoding(encoding)
43
+ @encoding = encoding
44
+ ctx = ParserContext.io(io, ENCODINGS[encoding])
45
+ yield ctx if block_given?
46
+ ctx.parse_with self
47
+ end
48
+
39
49
  ###
40
50
  # Parse a file with +filename+
41
51
  def parse_file filename, encoding = 'UTF-8'
42
52
  raise ArgumentError unless filename
43
- raise Errno::ENOENT unless File.exists?(filename)
53
+ raise Errno::ENOENT unless File.exist?(filename)
44
54
  raise Errno::EISDIR if File.directory?(filename)
45
55
  ctx = ParserContext.file(filename, encoding)
46
56
  yield ctx if block_given?
@@ -2,13 +2,33 @@ module Nokogiri
2
2
  module HTML
3
3
  module SAX
4
4
  class PushParser
5
- def initialize(doc = XML::SAX::Document.new, file_name = nil, encoding = 'UTF-8')
5
+
6
+ # The Nokogiri::HTML::SAX::Document on which the PushParser will be
7
+ # operating
8
+ attr_accessor :document
9
+
10
+ def initialize(doc = HTML::SAX::Document.new, file_name = nil, encoding = 'UTF-8')
6
11
  @document = doc
7
12
  @encoding = encoding
8
13
  @sax_parser = HTML::SAX::Parser.new(doc, @encoding)
9
14
 
10
15
  ## Create our push parser context
11
- initialize_native(@sax_parser, file_name, @encoding)
16
+ initialize_native(@sax_parser, file_name, encoding)
17
+ end
18
+
19
+ ###
20
+ # Write a +chunk+ of HTML to the PushParser. Any callback methods
21
+ # that can be called will be called immediately.
22
+ def write chunk, last_chunk = false
23
+ native_write(chunk, last_chunk)
24
+ end
25
+ alias :<< :write
26
+
27
+ ###
28
+ # Finish the parsing. This method is only necessary for
29
+ # Nokogiri::HTML::SAX::Document#end_document to be called.
30
+ def finish
31
+ write '', true
12
32
  end
13
33
  end
14
34
  end
@@ -1,6 +1,6 @@
1
1
  module Nokogiri
2
2
  # The version of Nokogiri you are using
3
- VERSION = '1.5.10'
3
+ VERSION = "1.10.4"
4
4
 
5
5
  class VersionInfo # :nodoc:
6
6
  def jruby?
@@ -8,13 +8,14 @@ module Nokogiri
8
8
  end
9
9
 
10
10
  def engine
11
- defined?(RUBY_ENGINE) ? RUBY_ENGINE : 'mri'
11
+ defined?(RUBY_ENGINE) ? RUBY_ENGINE : "mri"
12
12
  end
13
13
 
14
14
  def loaded_parser_version
15
- LIBXML_PARSER_VERSION.scan(/^(.*)(..)(..)$/).first.collect{ |j|
16
- j.to_i
17
- }.join(".")
15
+ LIBXML_PARSER_VERSION.
16
+ scan(/^(\d+)(\d\d)(\d\d)(?!\d)/).first.
17
+ collect(&:to_i).
18
+ join(".")
18
19
  end
19
20
 
20
21
  def compiled_parser_version
@@ -25,6 +26,14 @@ module Nokogiri
25
26
  defined?(LIBXML_VERSION)
26
27
  end
27
28
 
29
+ def libxml2_using_system?
30
+ !libxml2_using_packaged?
31
+ end
32
+
33
+ def libxml2_using_packaged?
34
+ NOKOGIRI_USE_PACKAGED_LIBRARIES
35
+ end
36
+
28
37
  def warnings
29
38
  return [] unless libxml2?
30
39
 
@@ -37,24 +46,33 @@ module Nokogiri
37
46
 
38
47
  def to_hash
39
48
  hash_info = {}
40
- hash_info['warnings'] = []
41
- hash_info['nokogiri'] = Nokogiri::VERSION
42
- hash_info['ruby'] = {}
43
- hash_info['ruby']['version'] = ::RUBY_VERSION
44
- hash_info['ruby']['platform'] = ::RUBY_PLATFORM
45
- hash_info['ruby']['description'] = ::RUBY_DESCRIPTION
46
- hash_info['ruby']['engine'] = engine
47
- hash_info['ruby']['jruby'] = jruby? if jruby?
49
+ hash_info["warnings"] = []
50
+ hash_info["nokogiri"] = Nokogiri::VERSION
51
+ hash_info["ruby"] = {}
52
+ hash_info["ruby"]["version"] = ::RUBY_VERSION
53
+ hash_info["ruby"]["platform"] = ::RUBY_PLATFORM
54
+ hash_info["ruby"]["description"] = ::RUBY_DESCRIPTION
55
+ hash_info["ruby"]["engine"] = engine
56
+ hash_info["ruby"]["jruby"] = jruby? if jruby?
48
57
 
49
58
  if libxml2?
50
- hash_info['libxml'] = {}
51
- hash_info['libxml']['binding'] = 'extension'
52
- hash_info['libxml']['compiled'] = compiled_parser_version
53
- hash_info['libxml']['loaded'] = loaded_parser_version
54
- hash_info['warnings'] = warnings
59
+ hash_info["libxml"] = {}
60
+ hash_info["libxml"]["binding"] = "extension"
61
+ if libxml2_using_packaged?
62
+ hash_info["libxml"]["source"] = "packaged"
63
+ hash_info["libxml"]["libxml2_path"] = NOKOGIRI_LIBXML2_PATH
64
+ hash_info["libxml"]["libxslt_path"] = NOKOGIRI_LIBXSLT_PATH
65
+ hash_info["libxml"]["libxml2_patches"] = NOKOGIRI_LIBXML2_PATCHES
66
+ hash_info["libxml"]["libxslt_patches"] = NOKOGIRI_LIBXSLT_PATCHES
67
+ else
68
+ hash_info["libxml"]["source"] = "system"
69
+ end
70
+ hash_info["libxml"]["compiled"] = compiled_parser_version
71
+ hash_info["libxml"]["loaded"] = loaded_parser_version
72
+ hash_info["warnings"] = warnings
55
73
  elsif jruby?
56
- hash_info['xerces'] = Nokogiri::XERCES_VERSION
57
- hash_info['nekohtml'] = Nokogiri::NEKO_VERSION
74
+ hash_info["xerces"] = Nokogiri::XERCES_VERSION
75
+ hash_info["nekohtml"] = Nokogiri::NEKO_VERSION
58
76
  end
59
77
 
60
78
  hash_info
@@ -62,10 +80,10 @@ module Nokogiri
62
80
 
63
81
  def to_markdown
64
82
  begin
65
- require 'psych'
83
+ require "psych"
66
84
  rescue LoadError
67
85
  end
68
- require 'yaml'
86
+ require "yaml"
69
87
  "# Nokogiri (#{Nokogiri::VERSION})\n" +
70
88
  YAML.dump(to_hash).each_line.map { |line| " #{line}" }.join
71
89
  end
@@ -213,7 +213,7 @@ module Nokogiri
213
213
  # xml.foo
214
214
  # end
215
215
  # end
216
- #
216
+ #
217
217
  # puts builder.to_xml
218
218
  #
219
219
  # Will output this xml:
@@ -250,7 +250,7 @@ module Nokogiri
250
250
  # xml.awesome # add the "awesome" tag below "some_tag"
251
251
  # end
252
252
  #
253
- def self.with root, &block
253
+ def self.with(root, &block)
254
254
  new({}, root, &block)
255
255
  end
256
256
 
@@ -263,23 +263,25 @@ module Nokogiri
263
263
  # Nokogiri::XML::Builder.new(:encoding => 'UTF-8') do |xml|
264
264
  # ...
265
265
  # end
266
- def initialize options = {}, root = nil, &block
267
-
266
+ def initialize(options = {}, root = nil, &block)
268
267
  if root
269
- @doc = root.document
268
+ @doc = root.document
270
269
  @parent = root
271
270
  else
272
- namespace = self.class.name.split('::')
273
- namespace[-1] = 'Document'
274
- @doc = eval(namespace.join('::')).new
275
- @parent = @doc
271
+ klassname = "::" + (self.class.name.split("::")[0..-2] + ["Document"]).join("::")
272
+ klass = begin
273
+ Object.const_get(klassname)
274
+ rescue NameError
275
+ Nokogiri::XML::Document
276
+ end
277
+ @parent = @doc = klass.new
276
278
  end
277
279
 
278
- @context = nil
279
- @arity = nil
280
- @ns = nil
280
+ @context = nil
281
+ @arity = nil
282
+ @ns = nil
281
283
 
282
- options.each do |k,v|
284
+ options.each do |k, v|
283
285
  @doc.send(:"#{k}=", v)
284
286
  end
285
287
 
@@ -287,7 +289,7 @@ module Nokogiri
287
289
 
288
290
  @arity = block.arity
289
291
  if @arity <= 0
290
- @context = eval('self', block.binding)
292
+ @context = eval("self", block.binding)
291
293
  instance_eval(&block)
292
294
  else
293
295
  yield self
@@ -298,26 +300,26 @@ module Nokogiri
298
300
 
299
301
  ###
300
302
  # Create a Text Node with content of +string+
301
- def text string
303
+ def text(string)
302
304
  insert @doc.create_text_node(string)
303
305
  end
304
306
 
305
307
  ###
306
308
  # Create a CDATA Node with content of +string+
307
- def cdata string
309
+ def cdata(string)
308
310
  insert doc.create_cdata(string)
309
311
  end
310
312
 
311
313
  ###
312
314
  # Create a Comment Node with content of +string+
313
- def comment string
315
+ def comment(string)
314
316
  insert doc.create_comment(string)
315
317
  end
316
318
 
317
319
  ###
318
320
  # Build a tag that is associated with namespace +ns+. Raises an
319
321
  # ArgumentError if +ns+ has not been defined higher in the tree.
320
- def [] ns
322
+ def [](ns)
321
323
  if @parent != @doc
322
324
  @ns = @parent.namespace_definitions.find { |x| x.prefix == ns.to_s }
323
325
  end
@@ -348,15 +350,15 @@ module Nokogiri
348
350
 
349
351
  ###
350
352
  # Append the given raw XML +string+ to the document
351
- def << string
353
+ def <<(string)
352
354
  @doc.fragment(string).children.each { |x| insert(x) }
353
355
  end
354
356
 
355
- def method_missing method, *args, &block # :nodoc:
357
+ def method_missing(method, *args, &block) # :nodoc:
356
358
  if @context && @context.respond_to?(method)
357
359
  @context.send(method, *args, &block)
358
360
  else
359
- node = @doc.create_element(method.to_s.sub(/[_!]$/, ''),*args) { |n|
361
+ node = @doc.create_element(method.to_s.sub(/[_!]$/, ""), *args) { |n|
360
362
  # Set up the namespace
361
363
  if @ns.is_a? Nokogiri::XML::Namespace
362
364
  n.namespace = @ns
@@ -377,13 +379,14 @@ module Nokogiri
377
379
  end
378
380
 
379
381
  private
382
+
380
383
  ###
381
384
  # Insert +node+ as a child of the current Node
382
385
  def insert(node, &block)
383
- node.parent = @parent
386
+ node = @parent.add_child(node)
384
387
  if block_given?
385
388
  old_parent = @parent
386
- @parent = node
389
+ @parent = node
387
390
  @arity ||= block.arity
388
391
  if @arity <= 0
389
392
  instance_eval(&block)
@@ -396,16 +399,16 @@ module Nokogiri
396
399
  end
397
400
 
398
401
  class NodeBuilder # :nodoc:
399
- def initialize node, doc_builder
402
+ def initialize(node, doc_builder)
400
403
  @node = node
401
404
  @doc_builder = doc_builder
402
405
  end
403
406
 
404
- def []= k, v
407
+ def []=(k, v)
405
408
  @node[k] = v
406
409
  end
407
410
 
408
- def [] k
411
+ def [](k)
409
412
  @node[k]
410
413
  end
411
414
 
@@ -413,19 +416,19 @@ module Nokogiri
413
416
  opts = args.last.is_a?(Hash) ? args.pop : {}
414
417
  case method.to_s
415
418
  when /^(.*)!$/
416
- @node['id'] = $1
419
+ @node["id"] = $1
417
420
  @node.content = args.first if args.first
418
421
  when /^(.*)=/
419
422
  @node[$1] = args.first
420
423
  else
421
- @node['class'] =
422
- ((@node['class'] || '').split(/\s/) + [method.to_s]).join(' ')
424
+ @node["class"] =
425
+ ((@node["class"] || "").split(/\s/) + [method.to_s]).join(" ")
423
426
  @node.content = args.first if args.first
424
427
  end
425
428
 
426
429
  # Assign any extra options
427
- opts.each do |k,v|
428
- @node[k.to_s] = ((@node[k.to_s] || '').split(/\s/) + [v]).join(' ')
430
+ opts.each do |k, v|
431
+ @node[k.to_s] = ((@node[k.to_s] || "").split(/\s/) + [v]).join(" ")
429
432
  end
430
433
 
431
434
  if block_given?
@@ -5,8 +5,8 @@ module Nokogiri
5
5
  # XML documents. The Document is created by parsing an XML document.
6
6
  # See Nokogiri::XML::Document.parse() for more information on parsing.
7
7
  #
8
- # For searching a Document, see Nokogiri::XML::Node#css and
9
- # Nokogiri::XML::Node#xpath
8
+ # For searching a Document, see Nokogiri::XML::Searchable#css and
9
+ # Nokogiri::XML::Searchable#xpath
10
10
  #
11
11
  class Document < Nokogiri::XML::Node
12
12
  # I'm ignoring unicode characters here.
@@ -33,19 +33,26 @@ module Nokogiri
33
33
  # +block+ (optional) is passed a configuration object on which
34
34
  # parse options may be set.
35
35
  #
36
- # When parsing untrusted documents, it's recommended that the
37
- # +nonet+ option be used, as shown in this example code:
38
- #
39
- # Nokogiri::XML::Document.parse(xml_string) { |config| config.nonet }
36
+ # By default, Nokogiri treats documents as untrusted, and so
37
+ # does not attempt to load DTDs or access the network. See
38
+ # Nokogiri::XML::ParseOptions for a complete list of options;
39
+ # and that module's DEFAULT_XML constant for what's set (and not
40
+ # set) by default.
40
41
  #
41
42
  # Nokogiri.XML() is a convenience method which will call this method.
42
43
  #
43
- def self.parse string_or_io, url = nil, encoding = nil, options = ParseOptions::DEFAULT_XML, &block
44
- options = Nokogiri::XML::ParseOptions.new(options) if Fixnum === options
44
+ def self.parse string_or_io, url = nil, encoding = nil, options = ParseOptions::DEFAULT_XML
45
+ options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
45
46
  # Give the options to the user
46
47
  yield options if block_given?
47
48
 
48
- return new if empty_doc?(string_or_io)
49
+ if empty_doc?(string_or_io)
50
+ if options.strict?
51
+ raise Nokogiri::XML::SyntaxError.new("Empty document")
52
+ else
53
+ return encoding ? new.tap { |i| i.encoding = encoding } : new
54
+ end
55
+ end
49
56
 
50
57
  doc = if string_or_io.respond_to?(:read)
51
58
  url ||= string_or_io.respond_to?(:path) ? string_or_io.path : nil
@@ -233,10 +240,10 @@ module Nokogiri
233
240
  undef_method :namespace_definitions, :line, :add_namespace
234
241
 
235
242
  def add_child node_or_tags
236
- raise "Document already has a root node" if root
243
+ raise "A document may not have multiple root nodes." if (root && root.name != 'nokogiri_text_wrapper') && !(node_or_tags.comment? || node_or_tags.processing_instruction?)
237
244
  node_or_tags = coerce(node_or_tags)
238
245
  if node_or_tags.is_a?(XML::NodeSet)
239
- raise "Document cannot have multiple root nodes" if node_or_tags.size > 1
246
+ raise "A document may not have multiple root nodes." if node_or_tags.size > 1
240
247
  super(node_or_tags.first)
241
248
  else
242
249
  super
@@ -267,9 +274,8 @@ module Nokogiri
267
274
  (string_or_io.respond_to?(:eof?) && string_or_io.eof?)
268
275
  end
269
276
 
270
- def implied_xpath_context
271
- "/"
272
- end
277
+ # @private
278
+ IMPLIED_XPATH_CONTEXTS = [ '//'.freeze ].freeze # :nodoc:
273
279
 
274
280
  def inspect_attributes
275
281
  [:name, :children]