nokogiri 1.10.10 → 1.12.0

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (216) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +3 -0
  3. data/LICENSE-DEPENDENCIES.md +1173 -884
  4. data/LICENSE.md +1 -1
  5. data/README.md +176 -96
  6. data/dependencies.yml +12 -12
  7. data/ext/nokogiri/depend +38 -358
  8. data/ext/nokogiri/extconf.rb +712 -414
  9. data/ext/nokogiri/gumbo.c +584 -0
  10. data/ext/nokogiri/html4_document.c +166 -0
  11. data/ext/nokogiri/html4_element_description.c +294 -0
  12. data/ext/nokogiri/html4_entity_lookup.c +37 -0
  13. data/ext/nokogiri/html4_sax_parser_context.c +119 -0
  14. data/ext/nokogiri/html4_sax_push_parser.c +95 -0
  15. data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
  16. data/ext/nokogiri/nokogiri.c +228 -91
  17. data/ext/nokogiri/nokogiri.h +188 -89
  18. data/ext/nokogiri/test_global_handlers.c +40 -0
  19. data/ext/nokogiri/xml_attr.c +15 -15
  20. data/ext/nokogiri/xml_attribute_decl.c +18 -18
  21. data/ext/nokogiri/xml_cdata.c +13 -18
  22. data/ext/nokogiri/xml_comment.c +19 -26
  23. data/ext/nokogiri/xml_document.c +267 -195
  24. data/ext/nokogiri/xml_document_fragment.c +13 -15
  25. data/ext/nokogiri/xml_dtd.c +54 -48
  26. data/ext/nokogiri/xml_element_content.c +31 -26
  27. data/ext/nokogiri/xml_element_decl.c +22 -22
  28. data/ext/nokogiri/xml_encoding_handler.c +28 -17
  29. data/ext/nokogiri/xml_entity_decl.c +32 -30
  30. data/ext/nokogiri/xml_entity_reference.c +16 -18
  31. data/ext/nokogiri/xml_namespace.c +58 -49
  32. data/ext/nokogiri/xml_node.c +489 -410
  33. data/ext/nokogiri/xml_node_set.c +174 -162
  34. data/ext/nokogiri/xml_processing_instruction.c +17 -19
  35. data/ext/nokogiri/xml_reader.c +197 -172
  36. data/ext/nokogiri/xml_relax_ng.c +52 -28
  37. data/ext/nokogiri/xml_sax_parser.c +112 -112
  38. data/ext/nokogiri/xml_sax_parser_context.c +105 -86
  39. data/ext/nokogiri/xml_sax_push_parser.c +36 -27
  40. data/ext/nokogiri/xml_schema.c +96 -46
  41. data/ext/nokogiri/xml_syntax_error.c +42 -21
  42. data/ext/nokogiri/xml_text.c +13 -17
  43. data/ext/nokogiri/xml_xpath_context.c +158 -73
  44. data/ext/nokogiri/xslt_stylesheet.c +158 -164
  45. data/gumbo-parser/CHANGES.md +63 -0
  46. data/gumbo-parser/Makefile +101 -0
  47. data/gumbo-parser/THANKS +27 -0
  48. data/gumbo-parser/src/Makefile +17 -0
  49. data/gumbo-parser/src/README.md +41 -0
  50. data/gumbo-parser/src/ascii.c +75 -0
  51. data/gumbo-parser/src/ascii.h +115 -0
  52. data/gumbo-parser/src/attribute.c +42 -0
  53. data/gumbo-parser/src/attribute.h +17 -0
  54. data/gumbo-parser/src/char_ref.c +22225 -0
  55. data/gumbo-parser/src/char_ref.h +29 -0
  56. data/gumbo-parser/src/char_ref.rl +2154 -0
  57. data/gumbo-parser/src/error.c +626 -0
  58. data/gumbo-parser/src/error.h +148 -0
  59. data/gumbo-parser/src/foreign_attrs.c +104 -0
  60. data/gumbo-parser/src/foreign_attrs.gperf +27 -0
  61. data/gumbo-parser/src/gumbo.h +943 -0
  62. data/gumbo-parser/src/insertion_mode.h +33 -0
  63. data/gumbo-parser/src/macros.h +91 -0
  64. data/gumbo-parser/src/parser.c +4886 -0
  65. data/gumbo-parser/src/parser.h +41 -0
  66. data/gumbo-parser/src/replacement.h +33 -0
  67. data/gumbo-parser/src/string_buffer.c +103 -0
  68. data/gumbo-parser/src/string_buffer.h +68 -0
  69. data/gumbo-parser/src/string_piece.c +48 -0
  70. data/gumbo-parser/src/svg_attrs.c +174 -0
  71. data/gumbo-parser/src/svg_attrs.gperf +77 -0
  72. data/gumbo-parser/src/svg_tags.c +137 -0
  73. data/gumbo-parser/src/svg_tags.gperf +55 -0
  74. data/gumbo-parser/src/tag.c +222 -0
  75. data/gumbo-parser/src/tag_lookup.c +382 -0
  76. data/gumbo-parser/src/tag_lookup.gperf +169 -0
  77. data/gumbo-parser/src/tag_lookup.h +13 -0
  78. data/gumbo-parser/src/token_buffer.c +79 -0
  79. data/gumbo-parser/src/token_buffer.h +71 -0
  80. data/gumbo-parser/src/token_type.h +17 -0
  81. data/gumbo-parser/src/tokenizer.c +3463 -0
  82. data/gumbo-parser/src/tokenizer.h +112 -0
  83. data/gumbo-parser/src/tokenizer_states.h +339 -0
  84. data/gumbo-parser/src/utf8.c +245 -0
  85. data/gumbo-parser/src/utf8.h +164 -0
  86. data/gumbo-parser/src/util.c +68 -0
  87. data/gumbo-parser/src/util.h +30 -0
  88. data/gumbo-parser/src/vector.c +111 -0
  89. data/gumbo-parser/src/vector.h +45 -0
  90. data/lib/nokogiri.rb +32 -51
  91. data/lib/nokogiri/css.rb +15 -14
  92. data/lib/nokogiri/css/node.rb +1 -0
  93. data/lib/nokogiri/css/parser.rb +64 -63
  94. data/lib/nokogiri/css/parser.y +3 -3
  95. data/lib/nokogiri/css/parser_extras.rb +39 -36
  96. data/lib/nokogiri/css/syntax_error.rb +2 -1
  97. data/lib/nokogiri/css/tokenizer.rb +1 -0
  98. data/lib/nokogiri/css/xpath_visitor.rb +73 -43
  99. data/lib/nokogiri/decorators/slop.rb +1 -0
  100. data/lib/nokogiri/extension.rb +26 -0
  101. data/lib/nokogiri/gumbo.rb +14 -0
  102. data/lib/nokogiri/html.rb +32 -27
  103. data/lib/nokogiri/html4.rb +40 -0
  104. data/lib/nokogiri/{html → html4}/builder.rb +3 -2
  105. data/lib/nokogiri/{html → html4}/document.rb +17 -30
  106. data/lib/nokogiri/{html → html4}/document_fragment.rb +18 -17
  107. data/lib/nokogiri/{html → html4}/element_description.rb +2 -1
  108. data/lib/nokogiri/{html → html4}/element_description_defaults.rb +2 -1
  109. data/lib/nokogiri/{html → html4}/entity_lookup.rb +2 -1
  110. data/lib/nokogiri/{html → html4}/sax/parser.rb +12 -14
  111. data/lib/nokogiri/html4/sax/parser_context.rb +19 -0
  112. data/lib/nokogiri/{html → html4}/sax/push_parser.rb +6 -5
  113. data/lib/nokogiri/html5.rb +473 -0
  114. data/lib/nokogiri/html5/document.rb +74 -0
  115. data/lib/nokogiri/html5/document_fragment.rb +80 -0
  116. data/lib/nokogiri/html5/node.rb +93 -0
  117. data/lib/nokogiri/jruby/dependencies.rb +20 -0
  118. data/lib/nokogiri/syntax_error.rb +1 -0
  119. data/lib/nokogiri/version.rb +3 -109
  120. data/lib/nokogiri/version/constant.rb +5 -0
  121. data/lib/nokogiri/version/info.rb +215 -0
  122. data/lib/nokogiri/xml.rb +36 -36
  123. data/lib/nokogiri/xml/attr.rb +1 -0
  124. data/lib/nokogiri/xml/attribute_decl.rb +1 -0
  125. data/lib/nokogiri/xml/builder.rb +3 -2
  126. data/lib/nokogiri/xml/cdata.rb +1 -0
  127. data/lib/nokogiri/xml/character_data.rb +1 -0
  128. data/lib/nokogiri/xml/document.rb +92 -41
  129. data/lib/nokogiri/xml/document_fragment.rb +5 -6
  130. data/lib/nokogiri/xml/dtd.rb +1 -0
  131. data/lib/nokogiri/xml/element_content.rb +1 -0
  132. data/lib/nokogiri/xml/element_decl.rb +1 -0
  133. data/lib/nokogiri/xml/entity_decl.rb +1 -0
  134. data/lib/nokogiri/xml/entity_reference.rb +1 -0
  135. data/lib/nokogiri/xml/namespace.rb +1 -0
  136. data/lib/nokogiri/xml/node.rb +629 -293
  137. data/lib/nokogiri/xml/node/save_options.rb +1 -0
  138. data/lib/nokogiri/xml/node_set.rb +1 -0
  139. data/lib/nokogiri/xml/notation.rb +1 -0
  140. data/lib/nokogiri/xml/parse_options.rb +12 -3
  141. data/lib/nokogiri/xml/pp.rb +3 -2
  142. data/lib/nokogiri/xml/pp/character_data.rb +1 -0
  143. data/lib/nokogiri/xml/pp/node.rb +1 -0
  144. data/lib/nokogiri/xml/processing_instruction.rb +1 -0
  145. data/lib/nokogiri/xml/reader.rb +9 -12
  146. data/lib/nokogiri/xml/relax_ng.rb +7 -2
  147. data/lib/nokogiri/xml/sax.rb +5 -4
  148. data/lib/nokogiri/xml/sax/document.rb +25 -30
  149. data/lib/nokogiri/xml/sax/parser.rb +1 -0
  150. data/lib/nokogiri/xml/sax/parser_context.rb +1 -0
  151. data/lib/nokogiri/xml/sax/push_parser.rb +1 -0
  152. data/lib/nokogiri/xml/schema.rb +13 -4
  153. data/lib/nokogiri/xml/searchable.rb +25 -16
  154. data/lib/nokogiri/xml/syntax_error.rb +1 -0
  155. data/lib/nokogiri/xml/text.rb +1 -0
  156. data/lib/nokogiri/xml/xpath.rb +4 -5
  157. data/lib/nokogiri/xml/xpath/syntax_error.rb +2 -1
  158. data/lib/nokogiri/xml/xpath_context.rb +1 -0
  159. data/lib/nokogiri/xslt.rb +17 -16
  160. data/lib/nokogiri/xslt/stylesheet.rb +2 -1
  161. data/lib/xsd/xmlparser/nokogiri.rb +1 -0
  162. data/patches/libxml2/{0002-Remove-script-macro-support.patch → 0001-Remove-script-macro-support.patch} +0 -0
  163. data/patches/libxml2/{0003-Update-entities-to-remove-handling-of-ssi.patch → 0002-Update-entities-to-remove-handling-of-ssi.patch} +0 -0
  164. data/patches/libxml2/{0004-libxml2.la-is-in-top_builddir.patch → 0003-libxml2.la-is-in-top_builddir.patch} +1 -1
  165. data/patches/libxml2/0004-use-glibc-strlen.patch +53 -0
  166. data/patches/libxml2/0005-avoid-isnan-isinf.patch +81 -0
  167. data/patches/libxml2/0006-update-automake-files-for-arm64.patch +2511 -0
  168. data/patches/libxml2/0007-Fix-XPath-recursion-limit.patch +31 -0
  169. data/patches/libxslt/0001-update-automake-files-for-arm64.patch +2511 -0
  170. data/patches/libxslt/0002-Fix-xml2-config-check-in-configure-script.patch +19 -0
  171. data/ports/archives/libxml2-2.9.12.tar.gz +0 -0
  172. metadata +139 -161
  173. data/ext/nokogiri/html_document.c +0 -170
  174. data/ext/nokogiri/html_document.h +0 -10
  175. data/ext/nokogiri/html_element_description.c +0 -279
  176. data/ext/nokogiri/html_element_description.h +0 -10
  177. data/ext/nokogiri/html_entity_lookup.c +0 -32
  178. data/ext/nokogiri/html_entity_lookup.h +0 -8
  179. data/ext/nokogiri/html_sax_parser_context.c +0 -116
  180. data/ext/nokogiri/html_sax_parser_context.h +0 -11
  181. data/ext/nokogiri/html_sax_push_parser.c +0 -87
  182. data/ext/nokogiri/html_sax_push_parser.h +0 -9
  183. data/ext/nokogiri/xml_attr.h +0 -9
  184. data/ext/nokogiri/xml_attribute_decl.h +0 -9
  185. data/ext/nokogiri/xml_cdata.h +0 -9
  186. data/ext/nokogiri/xml_comment.h +0 -9
  187. data/ext/nokogiri/xml_document.h +0 -23
  188. data/ext/nokogiri/xml_document_fragment.h +0 -10
  189. data/ext/nokogiri/xml_dtd.h +0 -10
  190. data/ext/nokogiri/xml_element_content.h +0 -10
  191. data/ext/nokogiri/xml_element_decl.h +0 -9
  192. data/ext/nokogiri/xml_encoding_handler.h +0 -8
  193. data/ext/nokogiri/xml_entity_decl.h +0 -10
  194. data/ext/nokogiri/xml_entity_reference.h +0 -9
  195. data/ext/nokogiri/xml_io.c +0 -61
  196. data/ext/nokogiri/xml_io.h +0 -11
  197. data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
  198. data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
  199. data/ext/nokogiri/xml_namespace.h +0 -14
  200. data/ext/nokogiri/xml_node.h +0 -13
  201. data/ext/nokogiri/xml_node_set.h +0 -12
  202. data/ext/nokogiri/xml_processing_instruction.h +0 -9
  203. data/ext/nokogiri/xml_reader.h +0 -10
  204. data/ext/nokogiri/xml_relax_ng.h +0 -9
  205. data/ext/nokogiri/xml_sax_parser.h +0 -39
  206. data/ext/nokogiri/xml_sax_parser_context.h +0 -10
  207. data/ext/nokogiri/xml_sax_push_parser.h +0 -9
  208. data/ext/nokogiri/xml_schema.h +0 -9
  209. data/ext/nokogiri/xml_syntax_error.h +0 -13
  210. data/ext/nokogiri/xml_text.h +0 -9
  211. data/ext/nokogiri/xml_xpath_context.h +0 -10
  212. data/ext/nokogiri/xslt_stylesheet.h +0 -14
  213. data/lib/nokogiri/html/sax/parser_context.rb +0 -16
  214. data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +0 -78
  215. data/patches/libxml2/0005-Fix-infinite-loop-in-xmlStringLenDecodeEntities.patch +0 -32
  216. data/ports/archives/libxml2-2.9.10.tar.gz +0 -0
@@ -0,0 +1,74 @@
1
+ # frozen_string_literal: true
2
+ #
3
+ # Copyright 2013-2021 Sam Ruby, Stephen Checkoway
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+ #
17
+
18
+ require_relative "../html4/document"
19
+
20
+ module Nokogiri
21
+ module HTML5
22
+ # @since v1.12.0
23
+ # @note HTML5 functionality is not available when running JRuby.
24
+ class Document < Nokogiri::HTML4::Document
25
+ def self.parse(string_or_io, url = nil, encoding = nil, **options, &block)
26
+ yield options if block_given?
27
+ string_or_io = '' unless string_or_io
28
+
29
+ if string_or_io.respond_to?(:encoding) && string_or_io.encoding.name != 'ASCII-8BIT'
30
+ encoding ||= string_or_io.encoding.name
31
+ end
32
+
33
+ if string_or_io.respond_to?(:read) && string_or_io.respond_to?(:path)
34
+ url ||= string_or_io.path
35
+ end
36
+ unless string_or_io.respond_to?(:read) || string_or_io.respond_to?(:to_str)
37
+ raise ArgumentError.new("not a string or IO object")
38
+ end
39
+ do_parse(string_or_io, url, encoding, options)
40
+ end
41
+
42
+ def self.read_io(io, url = nil, encoding = nil, **options)
43
+ raise ArgumentError.new("io object doesn't respond to :read") unless io.respond_to?(:read)
44
+ do_parse(io, url, encoding, options)
45
+ end
46
+
47
+ def self.read_memory(string, url = nil, encoding = nil, **options)
48
+ raise ArgumentError.new("string object doesn't respond to :to_str") unless string.respond_to?(:to_str)
49
+ do_parse(string, url, encoding, options)
50
+ end
51
+
52
+ def fragment(tags = nil)
53
+ DocumentFragment.new(self, tags, self.root)
54
+ end
55
+
56
+ def to_xml(options = {}, &block)
57
+ # Bypass XML::Document#to_xml which doesn't add
58
+ # XML::Node::SaveOptions::AS_XML like XML::Node#to_xml does.
59
+ XML::Node.instance_method(:to_xml).bind(self).call(options, &block)
60
+ end
61
+
62
+ private
63
+ def self.do_parse(string_or_io, url, encoding, options)
64
+ string = HTML5.read_and_encode(string_or_io, encoding)
65
+ max_attributes = options[:max_attributes] || Nokogiri::Gumbo::DEFAULT_MAX_ATTRIBUTES
66
+ max_errors = options[:max_errors] || options[:max_parse_errors] || Nokogiri::Gumbo::DEFAULT_MAX_ERRORS
67
+ max_depth = options[:max_tree_depth] || Nokogiri::Gumbo::DEFAULT_MAX_TREE_DEPTH
68
+ doc = Nokogiri::Gumbo.parse(string, url, max_attributes, max_errors, max_depth)
69
+ doc.encoding = 'UTF-8'
70
+ doc
71
+ end
72
+ end
73
+ end
74
+ end
@@ -0,0 +1,80 @@
1
+ # frozen_string_literal: true
2
+ #
3
+ # Copyright 2013-2021 Sam Ruby, Stephen Checkoway
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+ #
17
+
18
+ require_relative "../html4/document_fragment"
19
+
20
+ module Nokogiri
21
+ module HTML5
22
+ # @since v1.12.0
23
+ # @note HTML5 functionality is not available when running JRuby.
24
+ class DocumentFragment < Nokogiri::HTML4::DocumentFragment
25
+ attr_accessor :document
26
+ attr_accessor :errors
27
+
28
+ # Create a document fragment.
29
+ def initialize(doc, tags = nil, ctx = nil, options = {})
30
+ self.document = doc
31
+ self.errors = []
32
+ return self unless tags
33
+
34
+ max_attributes = options[:max_attributes] || Nokogiri::Gumbo::DEFAULT_MAX_ATTRIBUTES
35
+ max_errors = options[:max_errors] || Nokogiri::Gumbo::DEFAULT_MAX_ERRORS
36
+ max_depth = options[:max_tree_depth] || Nokogiri::Gumbo::DEFAULT_MAX_TREE_DEPTH
37
+ tags = Nokogiri::HTML5.read_and_encode(tags, nil)
38
+ Nokogiri::Gumbo.fragment(self, tags, ctx, max_attributes, max_errors, max_depth)
39
+ end
40
+
41
+ def serialize(options = {}, &block)
42
+ # Bypass XML::Document.serialize which doesn't support options even
43
+ # though XML::Node.serialize does!
44
+ XML::Node.instance_method(:serialize).bind(self).call(options, &block)
45
+ end
46
+
47
+ # Parse a document fragment from +tags+, returning a Nodeset.
48
+ def self.parse(tags, encoding = nil, options = {})
49
+ doc = HTML5::Document.new
50
+ tags = HTML5.read_and_encode(tags, encoding)
51
+ doc.encoding = "UTF-8"
52
+ new(doc, tags, nil, options)
53
+ end
54
+
55
+ def extract_params(params) # :nodoc:
56
+ handler = params.find do |param|
57
+ ![Hash, String, Symbol].include?(param.class)
58
+ end
59
+ params -= [handler] if handler
60
+
61
+ hashes = []
62
+ while Hash === params.last || params.last.nil?
63
+ hashes << params.pop
64
+ break if params.empty?
65
+ end
66
+ ns, binds = hashes.reverse
67
+
68
+ ns ||=
69
+ begin
70
+ ns = {}
71
+ children.each { |child| ns.merge!(child.namespaces) }
72
+ ns
73
+ end
74
+
75
+ [params, handler, ns, binds]
76
+ end
77
+ end
78
+ end
79
+ end
80
+ # vim: set shiftwidth=2 softtabstop=2 tabstop=8 expandtab:
@@ -0,0 +1,93 @@
1
+ # frozen_string_literal: true
2
+ #
3
+ # Copyright 2013-2021 Sam Ruby, Stephen Checkoway
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+ #
17
+
18
+ require_relative "../xml/node"
19
+
20
+ module Nokogiri
21
+ module HTML5
22
+ # @since v1.12.0
23
+ # @note HTML5 functionality is not available when running JRuby.
24
+ module Node
25
+ def inner_html(options = {})
26
+ return super(options) unless document.is_a?(HTML5::Document)
27
+ result = options[:preserve_newline] && HTML5.prepend_newline?(self) ? String.new("\n") : String.new
28
+ result << children.map { |child| child.to_html(options) }.join
29
+ result
30
+ end
31
+
32
+ def write_to(io, *options)
33
+ return super(io, *options) unless document.is_a?(HTML5::Document)
34
+ options = options.first.is_a?(Hash) ? options.shift : {}
35
+ encoding = options[:encoding] || options[0]
36
+ if Nokogiri.jruby?
37
+ save_options = options[:save_with] || options[1]
38
+ indent_times = options[:indent] || 0
39
+ else
40
+ save_options = options[:save_with] || options[1] || XML::Node::SaveOptions::FORMAT
41
+ indent_times = options[:indent] || 2
42
+ end
43
+ indent_string = (options[:indent_text] || " ") * indent_times
44
+
45
+ config = XML::Node::SaveOptions.new(save_options.to_i)
46
+ yield config if block_given?
47
+
48
+ config_options = config.options
49
+ if config_options & (XML::Node::SaveOptions::AS_XML | XML::Node::SaveOptions::AS_XHTML) != 0
50
+ # Use Nokogiri's serializing code.
51
+ native_write_to(io, encoding, indent_string, config_options)
52
+ else
53
+ # Serialize including the current node.
54
+ encoding ||= document.encoding || Encoding::UTF_8
55
+ internal_ops = {
56
+ preserve_newline: options[:preserve_newline] || false,
57
+ }
58
+ HTML5.serialize_node_internal(self, io, encoding, internal_ops)
59
+ end
60
+ end
61
+
62
+ def fragment(tags)
63
+ return super(tags) unless document.is_a?(HTML5::Document)
64
+ DocumentFragment.new(document, tags, self)
65
+ end
66
+
67
+ private
68
+
69
+ # HTML elements can have attributes that contain colons.
70
+ # Nokogiri::XML::Node#[]= treats names with colons as a prefixed QName
71
+ # and tries to create an attribute in a namespace. This is especially
72
+ # annoying with attribute names like xml:lang since libxml2 will
73
+ # actually create the xml namespace if it doesn't exist already.
74
+ def add_child_node_and_reparent_attrs(node)
75
+ return super(node) unless document.is_a?(HTML5::Document)
76
+ # I'm not sure what this method is supposed to do. Reparenting
77
+ # namespaces is handled by libxml2, including child namespaces which
78
+ # this method wouldn't handle.
79
+ # https://github.com/sparklemotion/nokogiri/issues/1790
80
+ add_child_node(node)
81
+ # node.attribute_nodes.find_all { |a| a.namespace }.each do |attr|
82
+ # attr.remove
83
+ # ns = attr.namespace
84
+ # a["#{ns.prefix}:#{attr.name}"] = attr.value
85
+ # end
86
+ end
87
+ end
88
+ # Monkey patch
89
+ XML::Node.prepend(HTML5::Node)
90
+ end
91
+ end
92
+
93
+ # vim: set shiftwidth=2 softtabstop=2 tabstop=8 expandtab:
@@ -0,0 +1,20 @@
1
+ # frozen_string_literal: true
2
+ # The line below caused a problem on non-GAE rack environment.
3
+ # unless defined?(JRuby::Rack::VERSION) || defined?(AppEngine::ApiProxy)
4
+ #
5
+ # However, simply cutting defined?(JRuby::Rack::VERSION) off resulted in
6
+ # an unable-to-load-nokogiri problem. Thus, now, Nokogiri checks the presense
7
+ # of appengine-rack.jar in $LOAD_PATH. If Nokogiri is on GAE, Nokogiri
8
+ # should skip loading xml jars. This is because those are in WEB-INF/lib and
9
+ # already set in the classpath.
10
+ unless $LOAD_PATH.to_s.include?("appengine-rack")
11
+ require 'stringio'
12
+ require 'isorelax.jar'
13
+ require 'jing.jar'
14
+ require 'nekohtml.jar'
15
+ require 'nekodtd.jar'
16
+ require 'xercesImpl.jar'
17
+ require 'serializer.jar'
18
+ require 'xalan.jar'
19
+ require 'xml-apis.jar'
20
+ end
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  module Nokogiri
2
3
  class SyntaxError < ::StandardError
3
4
  end
@@ -1,109 +1,3 @@
1
- module Nokogiri
2
- # The version of Nokogiri you are using
3
- VERSION = "1.10.10"
4
-
5
- class VersionInfo # :nodoc:
6
- def jruby?
7
- ::JRUBY_VERSION if RUBY_PLATFORM == "java"
8
- end
9
-
10
- def engine
11
- defined?(RUBY_ENGINE) ? RUBY_ENGINE : "mri"
12
- end
13
-
14
- def loaded_parser_version
15
- LIBXML_PARSER_VERSION.
16
- scan(/^(\d+)(\d\d)(\d\d)(?!\d)/).first.
17
- collect(&:to_i).
18
- join(".")
19
- end
20
-
21
- def compiled_parser_version
22
- LIBXML_VERSION
23
- end
24
-
25
- def libxml2?
26
- defined?(LIBXML_VERSION)
27
- end
28
-
29
- def libxml2_using_system?
30
- !libxml2_using_packaged?
31
- end
32
-
33
- def libxml2_using_packaged?
34
- NOKOGIRI_USE_PACKAGED_LIBRARIES
35
- end
36
-
37
- def warnings
38
- return [] unless libxml2?
39
-
40
- if compiled_parser_version != loaded_parser_version
41
- ["Nokogiri was built against LibXML version #{compiled_parser_version}, but has dynamically loaded #{loaded_parser_version}"]
42
- else
43
- []
44
- end
45
- end
46
-
47
- def to_hash
48
- hash_info = {}
49
- hash_info["warnings"] = []
50
- hash_info["nokogiri"] = Nokogiri::VERSION
51
- hash_info["ruby"] = {}
52
- hash_info["ruby"]["version"] = ::RUBY_VERSION
53
- hash_info["ruby"]["platform"] = ::RUBY_PLATFORM
54
- hash_info["ruby"]["description"] = ::RUBY_DESCRIPTION
55
- hash_info["ruby"]["engine"] = engine
56
- hash_info["ruby"]["jruby"] = jruby? if jruby?
57
-
58
- if libxml2?
59
- hash_info["libxml"] = {}
60
- hash_info["libxml"]["binding"] = "extension"
61
- if libxml2_using_packaged?
62
- hash_info["libxml"]["source"] = "packaged"
63
- hash_info["libxml"]["libxml2_path"] = NOKOGIRI_LIBXML2_PATH
64
- hash_info["libxml"]["libxslt_path"] = NOKOGIRI_LIBXSLT_PATH
65
- hash_info["libxml"]["libxml2_patches"] = NOKOGIRI_LIBXML2_PATCHES
66
- hash_info["libxml"]["libxslt_patches"] = NOKOGIRI_LIBXSLT_PATCHES
67
- else
68
- hash_info["libxml"]["source"] = "system"
69
- end
70
- hash_info["libxml"]["compiled"] = compiled_parser_version
71
- hash_info["libxml"]["loaded"] = loaded_parser_version
72
- hash_info["warnings"] = warnings
73
- elsif jruby?
74
- hash_info["xerces"] = Nokogiri::XERCES_VERSION
75
- hash_info["nekohtml"] = Nokogiri::NEKO_VERSION
76
- end
77
-
78
- hash_info
79
- end
80
-
81
- def to_markdown
82
- begin
83
- require "psych"
84
- rescue LoadError
85
- end
86
- require "yaml"
87
- "# Nokogiri (#{Nokogiri::VERSION})\n" +
88
- YAML.dump(to_hash).each_line.map { |line| " #{line}" }.join
89
- end
90
-
91
- # FIXME: maybe switch to singleton?
92
- @@instance = new
93
- @@instance.warnings.each do |warning|
94
- warn "WARNING: #{warning}"
95
- end
96
- def self.instance; @@instance; end
97
- end
98
-
99
- # More complete version information about libxml
100
- VERSION_INFO = VersionInfo.instance.to_hash
101
-
102
- def self.uses_libxml? # :nodoc:
103
- VersionInfo.instance.libxml2?
104
- end
105
-
106
- def self.jruby? # :nodoc:
107
- VersionInfo.instance.jruby?
108
- end
109
- end
1
+ # frozen_string_literal: true
2
+ require_relative "version/constant"
3
+ require_relative "version/info"
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+ module Nokogiri
3
+ # The version of Nokogiri you are using
4
+ VERSION = "1.12.0"
5
+ end
@@ -0,0 +1,215 @@
1
+ # frozen_string_literal: true
2
+ require "singleton"
3
+ require "shellwords"
4
+
5
+ module Nokogiri
6
+ class VersionInfo # :nodoc:
7
+ include Singleton
8
+
9
+ def jruby?
10
+ ::JRUBY_VERSION if ::RUBY_PLATFORM == "java"
11
+ end
12
+
13
+ def windows?
14
+ ::RUBY_PLATFORM =~ /mingw|mswin/
15
+ end
16
+
17
+ def ruby_minor
18
+ Gem::Version.new(::RUBY_VERSION).segments[0..1].join(".")
19
+ end
20
+
21
+ def engine
22
+ defined?(::RUBY_ENGINE) ? ::RUBY_ENGINE : "mri"
23
+ end
24
+
25
+ def loaded_libxml_version
26
+ Gem::Version.new(Nokogiri::LIBXML_LOADED_VERSION
27
+ .scan(/^(\d+)(\d\d)(\d\d)(?!\d)/).first
28
+ .collect(&:to_i)
29
+ .join("."))
30
+ end
31
+
32
+ def compiled_libxml_version
33
+ Gem::Version.new(Nokogiri::LIBXML_COMPILED_VERSION)
34
+ end
35
+
36
+ def loaded_libxslt_version
37
+ Gem::Version.new(Nokogiri::LIBXSLT_LOADED_VERSION
38
+ .scan(/^(\d+)(\d\d)(\d\d)(?!\d)/).first
39
+ .collect(&:to_i)
40
+ .join("."))
41
+ end
42
+
43
+ def compiled_libxslt_version
44
+ Gem::Version.new(Nokogiri::LIBXSLT_COMPILED_VERSION)
45
+ end
46
+
47
+ def libxml2?
48
+ defined?(Nokogiri::LIBXML_COMPILED_VERSION)
49
+ end
50
+
51
+ def libxml2_has_iconv?
52
+ defined?(Nokogiri::LIBXML_ICONV_ENABLED) && Nokogiri::LIBXML_ICONV_ENABLED
53
+ end
54
+
55
+ def libxslt_has_datetime?
56
+ defined?(Nokogiri::LIBXSLT_DATETIME_ENABLED) && Nokogiri::LIBXSLT_DATETIME_ENABLED
57
+ end
58
+
59
+ def libxml2_using_packaged?
60
+ libxml2? && Nokogiri::PACKAGED_LIBRARIES
61
+ end
62
+
63
+ def libxml2_using_system?
64
+ libxml2? && !libxml2_using_packaged?
65
+ end
66
+
67
+ def libxml2_precompiled?
68
+ libxml2_using_packaged? && Nokogiri::PRECOMPILED_LIBRARIES
69
+ end
70
+
71
+ def warnings
72
+ warnings = []
73
+
74
+ if libxml2?
75
+ if compiled_libxml_version != loaded_libxml_version
76
+ warnings << "Nokogiri was built against libxml version #{compiled_libxml_version}, but has dynamically loaded #{loaded_libxml_version}"
77
+ end
78
+
79
+ if compiled_libxslt_version != loaded_libxslt_version
80
+ warnings << "Nokogiri was built against libxslt version #{compiled_libxslt_version}, but has dynamically loaded #{loaded_libxslt_version}"
81
+ end
82
+ end
83
+
84
+ warnings
85
+ end
86
+
87
+ def to_hash
88
+ header_directory = File.expand_path(File.join(File.dirname(__FILE__), "../../../ext/nokogiri"))
89
+
90
+ {}.tap do |vi|
91
+ vi["warnings"] = []
92
+ vi["nokogiri"] = {}.tap do |nokogiri|
93
+ nokogiri["version"] = Nokogiri::VERSION
94
+
95
+ unless jruby?
96
+ # enable gems like nokogumbo to build with the following in their extconf.rb:
97
+ #
98
+ # append_cflags(Nokogiri::VERSION_INFO["nokogiri"]["cppflags"])
99
+ # append_ldflags(Nokogiri::VERSION_INFO["nokogiri"]["ldflags"])
100
+ #
101
+ cppflags = ["-I#{header_directory.shellescape}"]
102
+ ldflags = []
103
+
104
+ if libxml2_using_packaged?
105
+ cppflags << "-I#{File.join(header_directory, 'include').shellescape}"
106
+ cppflags << "-I#{File.join(header_directory, 'include/libxml2').shellescape}"
107
+
108
+ if windows?
109
+ # on windows, nokogumbo needs to link against nokogiri.so to resolve symbols. see #2167
110
+ lib_directory = File.expand_path(File.join(File.dirname(__FILE__), "../#{ruby_minor}"))
111
+ unless File.exist?(lib_directory)
112
+ lib_directory = File.expand_path(File.join(File.dirname(__FILE__), ".."))
113
+ end
114
+ ldflags << "-L#{lib_directory.shellescape}"
115
+ ldflags << "-l:nokogiri.so"
116
+ end
117
+ end
118
+
119
+ nokogiri["cppflags"] = cppflags
120
+ nokogiri["ldflags"] = ldflags
121
+ end
122
+ end
123
+ vi["ruby"] = {}.tap do |ruby|
124
+ ruby["version"] = ::RUBY_VERSION
125
+ ruby["platform"] = ::RUBY_PLATFORM
126
+ ruby["gem_platform"] = ::Gem::Platform.local.to_s
127
+ ruby["description"] = ::RUBY_DESCRIPTION
128
+ ruby["engine"] = engine
129
+ ruby["jruby"] = jruby? if jruby?
130
+ end
131
+
132
+ if libxml2?
133
+ vi["libxml"] = {}.tap do |libxml|
134
+ if libxml2_using_packaged?
135
+ libxml["source"] = "packaged"
136
+ libxml["precompiled"] = libxml2_precompiled?
137
+ libxml["patches"] = Nokogiri::LIBXML2_PATCHES
138
+
139
+ # this is for nokogumbo and shouldn't be forever
140
+ libxml["libxml2_path"] = header_directory
141
+ else
142
+ libxml["source"] = "system"
143
+ end
144
+ libxml["memory_management"] = Nokogiri::LIBXML_MEMORY_MANAGEMENT
145
+ libxml["iconv_enabled"] = libxml2_has_iconv?
146
+ libxml["compiled"] = compiled_libxml_version.to_s
147
+ libxml["loaded"] = loaded_libxml_version.to_s
148
+ end
149
+
150
+ vi["libxslt"] = {}.tap do |libxslt|
151
+ if libxml2_using_packaged?
152
+ libxslt["source"] = "packaged"
153
+ libxslt["precompiled"] = libxml2_precompiled?
154
+ libxslt["patches"] = Nokogiri::LIBXSLT_PATCHES
155
+ else
156
+ libxslt["source"] = "system"
157
+ end
158
+ libxslt["datetime_enabled"] = libxslt_has_datetime?
159
+ libxslt["compiled"] = compiled_libxslt_version.to_s
160
+ libxslt["loaded"] = loaded_libxslt_version.to_s
161
+ end
162
+
163
+ vi["warnings"] = warnings
164
+ end
165
+
166
+ if defined?(Nokogiri::OTHER_LIBRARY_VERSIONS)
167
+ # see extconf for how this string is assembled: "lib1name:lib1version,lib2name:lib2version"
168
+ vi["other_libraries"] = Hash[*Nokogiri::OTHER_LIBRARY_VERSIONS.split(/[,:]/)]
169
+ elsif jruby?
170
+ vi["other_libraries"] = {}.tap do |ol|
171
+ ol["xerces"] = Nokogiri::XERCES_VERSION
172
+ ol["nekohtml"] = Nokogiri::NEKO_VERSION
173
+ end
174
+ end
175
+ end
176
+ end
177
+
178
+ def to_markdown
179
+ begin
180
+ require "psych"
181
+ rescue LoadError
182
+ end
183
+ require "yaml"
184
+ "# Nokogiri (#{Nokogiri::VERSION})\n" +
185
+ YAML.dump(to_hash).each_line.map { |line| " #{line}" }.join
186
+ end
187
+
188
+ instance.warnings.each do |warning|
189
+ warn "WARNING: #{warning}"
190
+ end
191
+ end
192
+
193
+ def self.uses_libxml?(requirement = nil) # :nodoc:
194
+ return false unless VersionInfo.instance.libxml2?
195
+ return true unless requirement
196
+ Gem::Requirement.new(requirement).satisfied_by?(VersionInfo.instance.loaded_libxml_version)
197
+ end
198
+
199
+ def self.uses_gumbo?
200
+ uses_libxml? # TODO: replace with Gumbo functionality
201
+ end
202
+
203
+ def self.jruby? # :nodoc:
204
+ VersionInfo.instance.jruby?
205
+ end
206
+
207
+ # Ensure constants used in this file are loaded - see #1896
208
+ if Nokogiri.jruby?
209
+ require_relative "../jruby/dependencies"
210
+ end
211
+ require_relative "../extension"
212
+
213
+ # More complete version information about libxml
214
+ VERSION_INFO = VersionInfo.instance.to_hash
215
+ end