nokogiri 1.10.3 → 1.11.4

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (159) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +3 -0
  3. data/LICENSE-DEPENDENCIES.md +1015 -947
  4. data/LICENSE.md +1 -1
  5. data/README.md +173 -94
  6. data/dependencies.yml +28 -26
  7. data/ext/nokogiri/depend +37 -358
  8. data/ext/nokogiri/extconf.rb +611 -391
  9. data/ext/nokogiri/html_document.c +78 -82
  10. data/ext/nokogiri/html_element_description.c +84 -71
  11. data/ext/nokogiri/html_entity_lookup.c +21 -16
  12. data/ext/nokogiri/html_sax_parser_context.c +69 -66
  13. data/ext/nokogiri/html_sax_push_parser.c +42 -34
  14. data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
  15. data/ext/nokogiri/nokogiri.c +192 -87
  16. data/ext/nokogiri/nokogiri.h +181 -89
  17. data/ext/nokogiri/test_global_handlers.c +40 -0
  18. data/ext/nokogiri/xml_attr.c +15 -15
  19. data/ext/nokogiri/xml_attribute_decl.c +18 -18
  20. data/ext/nokogiri/xml_cdata.c +13 -18
  21. data/ext/nokogiri/xml_comment.c +19 -26
  22. data/ext/nokogiri/xml_document.c +255 -183
  23. data/ext/nokogiri/xml_document_fragment.c +13 -15
  24. data/ext/nokogiri/xml_dtd.c +54 -48
  25. data/ext/nokogiri/xml_element_content.c +30 -27
  26. data/ext/nokogiri/xml_element_decl.c +22 -22
  27. data/ext/nokogiri/xml_encoding_handler.c +17 -11
  28. data/ext/nokogiri/xml_entity_decl.c +32 -30
  29. data/ext/nokogiri/xml_entity_reference.c +16 -18
  30. data/ext/nokogiri/xml_namespace.c +56 -49
  31. data/ext/nokogiri/xml_node.c +387 -316
  32. data/ext/nokogiri/xml_node_set.c +168 -156
  33. data/ext/nokogiri/xml_processing_instruction.c +17 -19
  34. data/ext/nokogiri/xml_reader.c +195 -172
  35. data/ext/nokogiri/xml_relax_ng.c +52 -28
  36. data/ext/nokogiri/xml_sax_parser.c +118 -118
  37. data/ext/nokogiri/xml_sax_parser_context.c +103 -86
  38. data/ext/nokogiri/xml_sax_push_parser.c +36 -27
  39. data/ext/nokogiri/xml_schema.c +111 -34
  40. data/ext/nokogiri/xml_syntax_error.c +42 -21
  41. data/ext/nokogiri/xml_text.c +13 -17
  42. data/ext/nokogiri/xml_xpath_context.c +206 -123
  43. data/ext/nokogiri/xslt_stylesheet.c +158 -165
  44. data/lib/nokogiri/css/node.rb +1 -0
  45. data/lib/nokogiri/css/parser.rb +63 -62
  46. data/lib/nokogiri/css/parser.y +2 -2
  47. data/lib/nokogiri/css/parser_extras.rb +39 -36
  48. data/lib/nokogiri/css/syntax_error.rb +1 -0
  49. data/lib/nokogiri/css/tokenizer.rb +105 -103
  50. data/lib/nokogiri/css/xpath_visitor.rb +73 -43
  51. data/lib/nokogiri/css.rb +1 -0
  52. data/lib/nokogiri/decorators/slop.rb +1 -0
  53. data/lib/nokogiri/extension.rb +26 -0
  54. data/lib/nokogiri/html/builder.rb +1 -0
  55. data/lib/nokogiri/html/document.rb +13 -26
  56. data/lib/nokogiri/html/document_fragment.rb +16 -15
  57. data/lib/nokogiri/html/element_description.rb +1 -0
  58. data/lib/nokogiri/html/element_description_defaults.rb +1 -0
  59. data/lib/nokogiri/html/entity_lookup.rb +1 -0
  60. data/lib/nokogiri/html/sax/parser.rb +1 -0
  61. data/lib/nokogiri/html/sax/parser_context.rb +1 -0
  62. data/lib/nokogiri/html/sax/push_parser.rb +1 -0
  63. data/lib/nokogiri/html.rb +1 -0
  64. data/lib/nokogiri/jruby/dependencies.rb +20 -0
  65. data/lib/nokogiri/syntax_error.rb +1 -0
  66. data/lib/nokogiri/version/constant.rb +5 -0
  67. data/lib/nokogiri/version/info.rb +205 -0
  68. data/lib/nokogiri/version.rb +3 -109
  69. data/lib/nokogiri/xml/attr.rb +1 -0
  70. data/lib/nokogiri/xml/attribute_decl.rb +1 -0
  71. data/lib/nokogiri/xml/builder.rb +36 -32
  72. data/lib/nokogiri/xml/cdata.rb +1 -0
  73. data/lib/nokogiri/xml/character_data.rb +1 -0
  74. data/lib/nokogiri/xml/document.rb +92 -41
  75. data/lib/nokogiri/xml/document_fragment.rb +5 -6
  76. data/lib/nokogiri/xml/dtd.rb +1 -0
  77. data/lib/nokogiri/xml/element_content.rb +1 -0
  78. data/lib/nokogiri/xml/element_decl.rb +1 -0
  79. data/lib/nokogiri/xml/entity_decl.rb +1 -0
  80. data/lib/nokogiri/xml/entity_reference.rb +1 -0
  81. data/lib/nokogiri/xml/namespace.rb +1 -0
  82. data/lib/nokogiri/xml/node/save_options.rb +1 -0
  83. data/lib/nokogiri/xml/node.rb +625 -290
  84. data/lib/nokogiri/xml/node_set.rb +1 -0
  85. data/lib/nokogiri/xml/notation.rb +1 -0
  86. data/lib/nokogiri/xml/parse_options.rb +10 -3
  87. data/lib/nokogiri/xml/pp/character_data.rb +1 -0
  88. data/lib/nokogiri/xml/pp/node.rb +1 -0
  89. data/lib/nokogiri/xml/pp.rb +1 -0
  90. data/lib/nokogiri/xml/processing_instruction.rb +1 -0
  91. data/lib/nokogiri/xml/reader.rb +9 -12
  92. data/lib/nokogiri/xml/relax_ng.rb +7 -2
  93. data/lib/nokogiri/xml/sax/document.rb +1 -0
  94. data/lib/nokogiri/xml/sax/parser.rb +1 -0
  95. data/lib/nokogiri/xml/sax/parser_context.rb +1 -0
  96. data/lib/nokogiri/xml/sax/push_parser.rb +1 -0
  97. data/lib/nokogiri/xml/sax.rb +1 -0
  98. data/lib/nokogiri/xml/schema.rb +13 -4
  99. data/lib/nokogiri/xml/searchable.rb +25 -16
  100. data/lib/nokogiri/xml/syntax_error.rb +1 -0
  101. data/lib/nokogiri/xml/text.rb +1 -0
  102. data/lib/nokogiri/xml/xpath/syntax_error.rb +2 -1
  103. data/lib/nokogiri/xml/xpath.rb +2 -3
  104. data/lib/nokogiri/xml/xpath_context.rb +1 -0
  105. data/lib/nokogiri/xml.rb +1 -0
  106. data/lib/nokogiri/xslt/stylesheet.rb +1 -0
  107. data/lib/nokogiri/xslt.rb +1 -0
  108. data/lib/nokogiri.rb +6 -27
  109. data/lib/xsd/xmlparser/nokogiri.rb +1 -0
  110. data/patches/libxml2/{0002-Remove-script-macro-support.patch → 0001-Remove-script-macro-support.patch} +0 -0
  111. data/patches/libxml2/{0003-Update-entities-to-remove-handling-of-ssi.patch → 0002-Update-entities-to-remove-handling-of-ssi.patch} +0 -0
  112. data/patches/libxml2/0003-libxml2.la-is-in-top_builddir.patch +25 -0
  113. data/patches/libxml2/0004-use-glibc-strlen.patch +53 -0
  114. data/patches/libxml2/0005-avoid-isnan-isinf.patch +81 -0
  115. data/patches/libxml2/0006-update-automake-files-for-arm64.patch +2511 -0
  116. data/patches/libxslt/0001-update-automake-files-for-arm64.patch +2511 -0
  117. data/patches/libxslt/0002-Fix-xml2-config-check-in-configure-script.patch +19 -0
  118. data/ports/archives/libxml2-2.9.12.tar.gz +0 -0
  119. data/ports/archives/libxslt-1.1.34.tar.gz +0 -0
  120. metadata +102 -147
  121. data/ext/nokogiri/html_document.h +0 -10
  122. data/ext/nokogiri/html_element_description.h +0 -10
  123. data/ext/nokogiri/html_entity_lookup.h +0 -8
  124. data/ext/nokogiri/html_sax_parser_context.h +0 -11
  125. data/ext/nokogiri/html_sax_push_parser.h +0 -9
  126. data/ext/nokogiri/xml_attr.h +0 -9
  127. data/ext/nokogiri/xml_attribute_decl.h +0 -9
  128. data/ext/nokogiri/xml_cdata.h +0 -9
  129. data/ext/nokogiri/xml_comment.h +0 -9
  130. data/ext/nokogiri/xml_document.h +0 -23
  131. data/ext/nokogiri/xml_document_fragment.h +0 -10
  132. data/ext/nokogiri/xml_dtd.h +0 -10
  133. data/ext/nokogiri/xml_element_content.h +0 -10
  134. data/ext/nokogiri/xml_element_decl.h +0 -9
  135. data/ext/nokogiri/xml_encoding_handler.h +0 -8
  136. data/ext/nokogiri/xml_entity_decl.h +0 -10
  137. data/ext/nokogiri/xml_entity_reference.h +0 -9
  138. data/ext/nokogiri/xml_io.c +0 -61
  139. data/ext/nokogiri/xml_io.h +0 -11
  140. data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
  141. data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
  142. data/ext/nokogiri/xml_namespace.h +0 -14
  143. data/ext/nokogiri/xml_node.h +0 -13
  144. data/ext/nokogiri/xml_node_set.h +0 -12
  145. data/ext/nokogiri/xml_processing_instruction.h +0 -9
  146. data/ext/nokogiri/xml_reader.h +0 -10
  147. data/ext/nokogiri/xml_relax_ng.h +0 -9
  148. data/ext/nokogiri/xml_sax_parser.h +0 -39
  149. data/ext/nokogiri/xml_sax_parser_context.h +0 -10
  150. data/ext/nokogiri/xml_sax_push_parser.h +0 -9
  151. data/ext/nokogiri/xml_schema.h +0 -9
  152. data/ext/nokogiri/xml_syntax_error.h +0 -13
  153. data/ext/nokogiri/xml_text.h +0 -9
  154. data/ext/nokogiri/xml_xpath_context.h +0 -10
  155. data/ext/nokogiri/xslt_stylesheet.h +0 -14
  156. data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +0 -78
  157. data/patches/libxslt/0001-Fix-security-framework-bypass.patch +0 -120
  158. data/ports/archives/libxml2-2.9.9.tar.gz +0 -0
  159. data/ports/archives/libxslt-1.1.33.tar.gz +0 -0
@@ -1,3 +1,8 @@
1
+ # coding: utf-8
2
+ # frozen_string_literal: true
3
+
4
+ require 'pathname'
5
+
1
6
  module Nokogiri
2
7
  module XML
3
8
  ##
@@ -9,11 +14,12 @@ module Nokogiri
9
14
  # Nokogiri::XML::Searchable#xpath
10
15
  #
11
16
  class Document < Nokogiri::XML::Node
12
- # I'm ignoring unicode characters here.
13
- # See http://www.w3.org/TR/REC-xml-names/#ns-decl for more details.
17
+ # See http://www.w3.org/TR/REC-xml-names/#ns-decl for more details. Note that we're not
18
+ # attempting to handle unicode characters partly because libxml2 doesn't handle unicode
19
+ # characters in NCNAMEs.
14
20
  NCNAME_START_CHAR = "A-Za-z_"
15
- NCNAME_CHAR = NCNAME_START_CHAR + "\\-.0-9"
16
- NCNAME_RE = /^xmlns(:[#{NCNAME_START_CHAR}][#{NCNAME_CHAR}]*)?$/
21
+ NCNAME_CHAR = NCNAME_START_CHAR + "\\-\\.0-9"
22
+ NCNAME_RE = /^xmlns(?::([#{NCNAME_START_CHAR}][#{NCNAME_CHAR}]*))?$/
17
23
 
18
24
  ##
19
25
  # Parse an XML file.
@@ -43,9 +49,11 @@ module Nokogiri
43
49
  #
44
50
  def self.parse string_or_io, url = nil, encoding = nil, options = ParseOptions::DEFAULT_XML
45
51
  options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
46
- # Give the options to the user
52
+
47
53
  yield options if block_given?
48
54
 
55
+ url ||= string_or_io.respond_to?(:path) ? string_or_io.path : nil
56
+
49
57
  if empty_doc?(string_or_io)
50
58
  if options.strict?
51
59
  raise Nokogiri::XML::SyntaxError.new("Empty document")
@@ -55,12 +63,17 @@ module Nokogiri
55
63
  end
56
64
 
57
65
  doc = if string_or_io.respond_to?(:read)
58
- url ||= string_or_io.respond_to?(:path) ? string_or_io.path : nil
59
- read_io(string_or_io, url, encoding, options.to_i)
60
- else
61
- # read_memory pukes on empty docs
62
- read_memory(string_or_io, url, encoding, options.to_i)
63
- end
66
+ if string_or_io.is_a?(Pathname)
67
+ # resolve the Pathname to the file and open it as an IO object, see #2110
68
+ string_or_io = string_or_io.expand_path.open
69
+ url ||= string_or_io.path
70
+ end
71
+
72
+ read_io(string_or_io, url, encoding, options.to_i)
73
+ else
74
+ # read_memory pukes on empty docs
75
+ read_memory(string_or_io, url, encoding, options.to_i)
76
+ end
64
77
 
65
78
  # do xinclude processing
66
79
  doc.do_xinclude(options) if options.xinclude?
@@ -68,6 +81,35 @@ module Nokogiri
68
81
  return doc
69
82
  end
70
83
 
84
+ ##
85
+ # @!method wrap(java_document)
86
+ # @!scope class
87
+ #
88
+ # Create a {Document} using an existing Java DOM document object.
89
+ #
90
+ # The returned {Document} shares the same underlying data structure as the Java object, so
91
+ # changes in one are reflected in the other.
92
+ #
93
+ # @param java_document [Java::OrgW3cDom::Document]
94
+ # @return [Nokogiri::XML::Document]
95
+ # @note This method is only available when running JRuby.
96
+ # @note The class +Java::OrgW3cDom::Document+ is also accessible as +org.w3c.dom.Document+.
97
+ # @see #to_java
98
+
99
+ ##
100
+ # @!method to_java()
101
+ #
102
+ # Returns the underlying Java DOM document object for the {Document}.
103
+ #
104
+ # The returned Java object shares the same underlying data structure as the {Document}, so
105
+ # changes in one are reflected in the other.
106
+ #
107
+ # @return [Java::OrgW3cDom::Document]
108
+ # @note This method is only available when running JRuby.
109
+ # @note The class +Java::OrgW3cDom::Document+ is also accessible as +org.w3c.dom.Document+.
110
+ # @see .wrap
111
+
112
+
71
113
  # A list of Nokogiri::XML::SyntaxError found when parsing a document
72
114
  attr_accessor :errors
73
115
 
@@ -77,33 +119,58 @@ module Nokogiri
77
119
  end
78
120
 
79
121
  ##
80
- # Create an element with +name+, and optionally setting the content and attributes.
122
+ # Create a new +Element+ with +name+ sharing GC lifecycle with the document, optionally
123
+ # setting contents or attributes.
124
+ #
125
+ # Arguments may be passed to initialize the element:
126
+ # - a +Hash+ argument will be used to set attributes
127
+ # - a non-Hash object that responds to +#to_s+ will be used to set the new node's contents
128
+ #
129
+ # A block may be passed to mutate the node.
130
+ #
131
+ # @param name [String]
132
+ # @param contents_or_attrs [#to_s,Hash]
133
+ # @yieldparam node [Nokogiri::XML::Element]
134
+ # @return [Nokogiri::XML::Element]
135
+ #
136
+ # @example An empty element without attributes
137
+ # doc.create_element("div")
138
+ # # => <div></div>
139
+ #
140
+ # @example An element with contents
141
+ # doc.create_element("div", "contents")
142
+ # # => <div>contents</div>
143
+ #
144
+ # @example An element with attributes
145
+ # doc.create_element("div", {"class" => "container"})
146
+ # # => <div class='container'></div>
81
147
  #
82
- # doc.create_element "div" # <div></div>
83
- # doc.create_element "div", :class => "container" # <div class='container'></div>
84
- # doc.create_element "div", "contents" # <div>contents</div>
85
- # doc.create_element "div", "contents", :class => "container" # <div class='container'>contents</div>
86
- # doc.create_element "div" { |node| node['class'] = "container" } # <div class='container'></div>
148
+ # @example An element with contents and attributes
149
+ # doc.create_element("div", "contents", {"class" => "container"})
150
+ # # => <div class='container'>contents</div>
87
151
  #
88
- def create_element name, *args, &block
152
+ # @example Passing a block to mutate the element
153
+ # doc.create_element("div") { |node| node["class"] = "blue" if before_noon? }
154
+ #
155
+ def create_element(name, *contents_or_attrs, &block)
89
156
  elm = Nokogiri::XML::Element.new(name, self, &block)
90
- args.each do |arg|
157
+ contents_or_attrs.each do |arg|
91
158
  case arg
92
159
  when Hash
93
- arg.each { |k,v|
160
+ arg.each do |k, v|
94
161
  key = k.to_s
95
162
  if key =~ NCNAME_RE
96
- ns_name = key.split(":", 2)[1]
97
- elm.add_namespace_definition ns_name, v
163
+ ns_name = Regexp.last_match(1)
164
+ elm.add_namespace_definition(ns_name, v)
98
165
  else
99
166
  elm[k.to_s] = v.to_s
100
167
  end
101
- }
168
+ end
102
169
  else
103
170
  elm.content = arg
104
171
  end
105
172
  end
106
- if ns = elm.namespace_definitions.find { |n| n.prefix.nil? or n.prefix == '' }
173
+ if ns = elm.namespace_definitions.find { |n| n.prefix.nil? || (n.prefix == '') }
107
174
  elm.namespace = ns
108
175
  end
109
176
  elm
@@ -251,30 +318,14 @@ module Nokogiri
251
318
  end
252
319
  alias :<< :add_child
253
320
 
254
- ##
255
- # +JRuby+
256
- # Wraps Java's org.w3c.dom.document and returns Nokogiri::XML::Document
257
- def self.wrap document
258
- raise "JRuby only method" unless Nokogiri.jruby?
259
- return wrapJavaDocument(document)
260
- end
261
-
262
- ##
263
- # +JRuby+
264
- # Returns Java's org.w3c.dom.document of this Document.
265
- def to_java
266
- raise "JRuby only method" unless Nokogiri.jruby?
267
- return toJavaDocument()
268
- end
269
-
270
321
  private
322
+
271
323
  def self.empty_doc? string_or_io
272
324
  string_or_io.nil? ||
273
325
  (string_or_io.respond_to?(:empty?) && string_or_io.empty?) ||
274
326
  (string_or_io.respond_to?(:eof?) && string_or_io.eof?)
275
327
  end
276
328
 
277
- # @private
278
329
  IMPLIED_XPATH_CONTEXTS = [ '//'.freeze ].freeze # :nodoc:
279
330
 
280
331
  def inspect_attributes
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  module Nokogiri
2
3
  module XML
3
4
  class DocumentFragment < Nokogiri::XML::Node
@@ -140,6 +141,10 @@ module Nokogiri
140
141
  document.errors = things
141
142
  end
142
143
 
144
+ def fragment(data)
145
+ document.fragment(data)
146
+ end
147
+
143
148
  private
144
149
 
145
150
  # fix for issue 770
@@ -149,12 +154,6 @@ module Nokogiri
149
154
  %Q{xmlns#{prefix}="#{namespace.href}"}
150
155
  end.join ' '
151
156
  end
152
-
153
- def coerce data
154
- return super unless String === data
155
-
156
- document.fragment(data).children
157
- end
158
157
  end
159
158
  end
160
159
  end
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  module Nokogiri
2
3
  module XML
3
4
  class DTD < Nokogiri::XML::Node
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  module Nokogiri
2
3
  module XML
3
4
  ###
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  module Nokogiri
2
3
  module XML
3
4
  class ElementDecl < Nokogiri::XML::Node
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  module Nokogiri
2
3
  module XML
3
4
  class EntityDecl < Nokogiri::XML::Node
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  module Nokogiri
2
3
  module XML
3
4
  class EntityReference < Nokogiri::XML::Node
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  module Nokogiri
2
3
  module XML
3
4
  class Namespace
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  module Nokogiri
2
3
  module XML
3
4
  class Node