nokogiri 1.10.10 → 1.12.1
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/Gemfile +3 -0
- data/LICENSE-DEPENDENCIES.md +1173 -884
- data/LICENSE.md +1 -1
- data/README.md +176 -96
- data/dependencies.yml +12 -12
- data/ext/nokogiri/depend +38 -358
- data/ext/nokogiri/extconf.rb +712 -414
- data/ext/nokogiri/gumbo.c +584 -0
- data/ext/nokogiri/html4_document.c +166 -0
- data/ext/nokogiri/html4_element_description.c +294 -0
- data/ext/nokogiri/html4_entity_lookup.c +37 -0
- data/ext/nokogiri/html4_sax_parser_context.c +119 -0
- data/ext/nokogiri/html4_sax_push_parser.c +95 -0
- data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
- data/ext/nokogiri/nokogiri.c +228 -91
- data/ext/nokogiri/nokogiri.h +188 -89
- data/ext/nokogiri/test_global_handlers.c +40 -0
- data/ext/nokogiri/xml_attr.c +15 -15
- data/ext/nokogiri/xml_attribute_decl.c +18 -18
- data/ext/nokogiri/xml_cdata.c +13 -18
- data/ext/nokogiri/xml_comment.c +19 -26
- data/ext/nokogiri/xml_document.c +267 -195
- data/ext/nokogiri/xml_document_fragment.c +13 -15
- data/ext/nokogiri/xml_dtd.c +54 -48
- data/ext/nokogiri/xml_element_content.c +31 -26
- data/ext/nokogiri/xml_element_decl.c +22 -22
- data/ext/nokogiri/xml_encoding_handler.c +28 -17
- data/ext/nokogiri/xml_entity_decl.c +32 -30
- data/ext/nokogiri/xml_entity_reference.c +16 -18
- data/ext/nokogiri/xml_namespace.c +58 -49
- data/ext/nokogiri/xml_node.c +489 -410
- data/ext/nokogiri/xml_node_set.c +174 -162
- data/ext/nokogiri/xml_processing_instruction.c +17 -19
- data/ext/nokogiri/xml_reader.c +197 -172
- data/ext/nokogiri/xml_relax_ng.c +52 -28
- data/ext/nokogiri/xml_sax_parser.c +112 -112
- data/ext/nokogiri/xml_sax_parser_context.c +105 -86
- data/ext/nokogiri/xml_sax_push_parser.c +36 -27
- data/ext/nokogiri/xml_schema.c +96 -46
- data/ext/nokogiri/xml_syntax_error.c +42 -21
- data/ext/nokogiri/xml_text.c +13 -17
- data/ext/nokogiri/xml_xpath_context.c +158 -73
- data/ext/nokogiri/xslt_stylesheet.c +158 -164
- data/gumbo-parser/CHANGES.md +63 -0
- data/gumbo-parser/Makefile +101 -0
- data/gumbo-parser/THANKS +27 -0
- data/gumbo-parser/src/Makefile +34 -0
- data/gumbo-parser/src/README.md +41 -0
- data/gumbo-parser/src/ascii.c +75 -0
- data/gumbo-parser/src/ascii.h +115 -0
- data/gumbo-parser/src/attribute.c +42 -0
- data/gumbo-parser/src/attribute.h +17 -0
- data/gumbo-parser/src/char_ref.c +22225 -0
- data/gumbo-parser/src/char_ref.h +29 -0
- data/gumbo-parser/src/char_ref.rl +2154 -0
- data/gumbo-parser/src/error.c +626 -0
- data/gumbo-parser/src/error.h +148 -0
- data/gumbo-parser/src/foreign_attrs.c +104 -0
- data/gumbo-parser/src/foreign_attrs.gperf +27 -0
- data/gumbo-parser/src/gumbo.h +943 -0
- data/gumbo-parser/src/insertion_mode.h +33 -0
- data/gumbo-parser/src/macros.h +91 -0
- data/gumbo-parser/src/parser.c +4886 -0
- data/gumbo-parser/src/parser.h +41 -0
- data/gumbo-parser/src/replacement.h +33 -0
- data/gumbo-parser/src/string_buffer.c +103 -0
- data/gumbo-parser/src/string_buffer.h +68 -0
- data/gumbo-parser/src/string_piece.c +48 -0
- data/gumbo-parser/src/svg_attrs.c +174 -0
- data/gumbo-parser/src/svg_attrs.gperf +77 -0
- data/gumbo-parser/src/svg_tags.c +137 -0
- data/gumbo-parser/src/svg_tags.gperf +55 -0
- data/gumbo-parser/src/tag.c +222 -0
- data/gumbo-parser/src/tag_lookup.c +382 -0
- data/gumbo-parser/src/tag_lookup.gperf +169 -0
- data/gumbo-parser/src/tag_lookup.h +13 -0
- data/gumbo-parser/src/token_buffer.c +79 -0
- data/gumbo-parser/src/token_buffer.h +71 -0
- data/gumbo-parser/src/token_type.h +17 -0
- data/gumbo-parser/src/tokenizer.c +3463 -0
- data/gumbo-parser/src/tokenizer.h +112 -0
- data/gumbo-parser/src/tokenizer_states.h +339 -0
- data/gumbo-parser/src/utf8.c +245 -0
- data/gumbo-parser/src/utf8.h +164 -0
- data/gumbo-parser/src/util.c +68 -0
- data/gumbo-parser/src/util.h +30 -0
- data/gumbo-parser/src/vector.c +111 -0
- data/gumbo-parser/src/vector.h +45 -0
- data/lib/nokogiri.rb +32 -51
- data/lib/nokogiri/css.rb +15 -14
- data/lib/nokogiri/css/node.rb +1 -0
- data/lib/nokogiri/css/parser.rb +64 -63
- data/lib/nokogiri/css/parser.y +3 -3
- data/lib/nokogiri/css/parser_extras.rb +39 -36
- data/lib/nokogiri/css/syntax_error.rb +2 -1
- data/lib/nokogiri/css/tokenizer.rb +1 -0
- data/lib/nokogiri/css/xpath_visitor.rb +73 -43
- data/lib/nokogiri/decorators/slop.rb +1 -0
- data/lib/nokogiri/extension.rb +26 -0
- data/lib/nokogiri/gumbo.rb +14 -0
- data/lib/nokogiri/html.rb +32 -27
- data/lib/nokogiri/html4.rb +40 -0
- data/lib/nokogiri/{html → html4}/builder.rb +3 -2
- data/lib/nokogiri/{html → html4}/document.rb +17 -30
- data/lib/nokogiri/{html → html4}/document_fragment.rb +18 -17
- data/lib/nokogiri/{html → html4}/element_description.rb +2 -1
- data/lib/nokogiri/{html → html4}/element_description_defaults.rb +2 -1
- data/lib/nokogiri/{html → html4}/entity_lookup.rb +2 -1
- data/lib/nokogiri/{html → html4}/sax/parser.rb +12 -14
- data/lib/nokogiri/html4/sax/parser_context.rb +19 -0
- data/lib/nokogiri/{html → html4}/sax/push_parser.rb +6 -5
- data/lib/nokogiri/html5.rb +473 -0
- data/lib/nokogiri/html5/document.rb +74 -0
- data/lib/nokogiri/html5/document_fragment.rb +80 -0
- data/lib/nokogiri/html5/node.rb +93 -0
- data/lib/nokogiri/jruby/dependencies.rb +20 -0
- data/lib/nokogiri/syntax_error.rb +1 -0
- data/lib/nokogiri/version.rb +3 -109
- data/lib/nokogiri/version/constant.rb +5 -0
- data/lib/nokogiri/version/info.rb +215 -0
- data/lib/nokogiri/xml.rb +36 -36
- data/lib/nokogiri/xml/attr.rb +1 -0
- data/lib/nokogiri/xml/attribute_decl.rb +1 -0
- data/lib/nokogiri/xml/builder.rb +3 -2
- data/lib/nokogiri/xml/cdata.rb +1 -0
- data/lib/nokogiri/xml/character_data.rb +1 -0
- data/lib/nokogiri/xml/document.rb +92 -41
- data/lib/nokogiri/xml/document_fragment.rb +5 -6
- data/lib/nokogiri/xml/dtd.rb +1 -0
- data/lib/nokogiri/xml/element_content.rb +1 -0
- data/lib/nokogiri/xml/element_decl.rb +1 -0
- data/lib/nokogiri/xml/entity_decl.rb +1 -0
- data/lib/nokogiri/xml/entity_reference.rb +1 -0
- data/lib/nokogiri/xml/namespace.rb +1 -0
- data/lib/nokogiri/xml/node.rb +629 -293
- data/lib/nokogiri/xml/node/save_options.rb +1 -0
- data/lib/nokogiri/xml/node_set.rb +1 -0
- data/lib/nokogiri/xml/notation.rb +1 -0
- data/lib/nokogiri/xml/parse_options.rb +12 -3
- data/lib/nokogiri/xml/pp.rb +3 -2
- data/lib/nokogiri/xml/pp/character_data.rb +1 -0
- data/lib/nokogiri/xml/pp/node.rb +1 -0
- data/lib/nokogiri/xml/processing_instruction.rb +1 -0
- data/lib/nokogiri/xml/reader.rb +9 -12
- data/lib/nokogiri/xml/relax_ng.rb +7 -2
- data/lib/nokogiri/xml/sax.rb +5 -4
- data/lib/nokogiri/xml/sax/document.rb +25 -30
- data/lib/nokogiri/xml/sax/parser.rb +1 -0
- data/lib/nokogiri/xml/sax/parser_context.rb +1 -0
- data/lib/nokogiri/xml/sax/push_parser.rb +1 -0
- data/lib/nokogiri/xml/schema.rb +13 -4
- data/lib/nokogiri/xml/searchable.rb +25 -16
- data/lib/nokogiri/xml/syntax_error.rb +1 -0
- data/lib/nokogiri/xml/text.rb +1 -0
- data/lib/nokogiri/xml/xpath.rb +4 -5
- data/lib/nokogiri/xml/xpath/syntax_error.rb +2 -1
- data/lib/nokogiri/xml/xpath_context.rb +1 -0
- data/lib/nokogiri/xslt.rb +17 -16
- data/lib/nokogiri/xslt/stylesheet.rb +2 -1
- data/lib/xsd/xmlparser/nokogiri.rb +1 -0
- data/patches/libxml2/{0002-Remove-script-macro-support.patch → 0001-Remove-script-macro-support.patch} +0 -0
- data/patches/libxml2/{0003-Update-entities-to-remove-handling-of-ssi.patch → 0002-Update-entities-to-remove-handling-of-ssi.patch} +0 -0
- data/patches/libxml2/{0004-libxml2.la-is-in-top_builddir.patch → 0003-libxml2.la-is-in-top_builddir.patch} +1 -1
- data/patches/libxml2/0004-use-glibc-strlen.patch +53 -0
- data/patches/libxml2/0005-avoid-isnan-isinf.patch +81 -0
- data/patches/libxml2/0006-update-automake-files-for-arm64.patch +2511 -0
- data/patches/libxml2/0007-Fix-XPath-recursion-limit.patch +31 -0
- data/patches/libxslt/0001-update-automake-files-for-arm64.patch +2511 -0
- data/patches/libxslt/0002-Fix-xml2-config-check-in-configure-script.patch +19 -0
- data/ports/archives/libxml2-2.9.12.tar.gz +0 -0
- metadata +139 -161
- data/ext/nokogiri/html_document.c +0 -170
- data/ext/nokogiri/html_document.h +0 -10
- data/ext/nokogiri/html_element_description.c +0 -279
- data/ext/nokogiri/html_element_description.h +0 -10
- data/ext/nokogiri/html_entity_lookup.c +0 -32
- data/ext/nokogiri/html_entity_lookup.h +0 -8
- data/ext/nokogiri/html_sax_parser_context.c +0 -116
- data/ext/nokogiri/html_sax_parser_context.h +0 -11
- data/ext/nokogiri/html_sax_push_parser.c +0 -87
- data/ext/nokogiri/html_sax_push_parser.h +0 -9
- data/ext/nokogiri/xml_attr.h +0 -9
- data/ext/nokogiri/xml_attribute_decl.h +0 -9
- data/ext/nokogiri/xml_cdata.h +0 -9
- data/ext/nokogiri/xml_comment.h +0 -9
- data/ext/nokogiri/xml_document.h +0 -23
- data/ext/nokogiri/xml_document_fragment.h +0 -10
- data/ext/nokogiri/xml_dtd.h +0 -10
- data/ext/nokogiri/xml_element_content.h +0 -10
- data/ext/nokogiri/xml_element_decl.h +0 -9
- data/ext/nokogiri/xml_encoding_handler.h +0 -8
- data/ext/nokogiri/xml_entity_decl.h +0 -10
- data/ext/nokogiri/xml_entity_reference.h +0 -9
- data/ext/nokogiri/xml_io.c +0 -61
- data/ext/nokogiri/xml_io.h +0 -11
- data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
- data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
- data/ext/nokogiri/xml_namespace.h +0 -14
- data/ext/nokogiri/xml_node.h +0 -13
- data/ext/nokogiri/xml_node_set.h +0 -12
- data/ext/nokogiri/xml_processing_instruction.h +0 -9
- data/ext/nokogiri/xml_reader.h +0 -10
- data/ext/nokogiri/xml_relax_ng.h +0 -9
- data/ext/nokogiri/xml_sax_parser.h +0 -39
- data/ext/nokogiri/xml_sax_parser_context.h +0 -10
- data/ext/nokogiri/xml_sax_push_parser.h +0 -9
- data/ext/nokogiri/xml_schema.h +0 -9
- data/ext/nokogiri/xml_syntax_error.h +0 -13
- data/ext/nokogiri/xml_text.h +0 -9
- data/ext/nokogiri/xml_xpath_context.h +0 -10
- data/ext/nokogiri/xslt_stylesheet.h +0 -14
- data/lib/nokogiri/html/sax/parser_context.rb +0 -16
- data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +0 -78
- data/patches/libxml2/0005-Fix-infinite-loop-in-xmlStringLenDecodeEntities.patch +0 -32
- data/ports/archives/libxml2-2.9.10.tar.gz +0 -0
@@ -0,0 +1,74 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
#
|
3
|
+
# Copyright 2013-2021 Sam Ruby, Stephen Checkoway
|
4
|
+
#
|
5
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
6
|
+
# you may not use this file except in compliance with the License.
|
7
|
+
# You may obtain a copy of the License at
|
8
|
+
#
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
#
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
14
|
+
# See the License for the specific language governing permissions and
|
15
|
+
# limitations under the License.
|
16
|
+
#
|
17
|
+
|
18
|
+
require_relative "../html4/document"
|
19
|
+
|
20
|
+
module Nokogiri
|
21
|
+
module HTML5
|
22
|
+
# @since v1.12.0
|
23
|
+
# @note HTML5 functionality is not available when running JRuby.
|
24
|
+
class Document < Nokogiri::HTML4::Document
|
25
|
+
def self.parse(string_or_io, url = nil, encoding = nil, **options, &block)
|
26
|
+
yield options if block_given?
|
27
|
+
string_or_io = '' unless string_or_io
|
28
|
+
|
29
|
+
if string_or_io.respond_to?(:encoding) && string_or_io.encoding.name != 'ASCII-8BIT'
|
30
|
+
encoding ||= string_or_io.encoding.name
|
31
|
+
end
|
32
|
+
|
33
|
+
if string_or_io.respond_to?(:read) && string_or_io.respond_to?(:path)
|
34
|
+
url ||= string_or_io.path
|
35
|
+
end
|
36
|
+
unless string_or_io.respond_to?(:read) || string_or_io.respond_to?(:to_str)
|
37
|
+
raise ArgumentError.new("not a string or IO object")
|
38
|
+
end
|
39
|
+
do_parse(string_or_io, url, encoding, options)
|
40
|
+
end
|
41
|
+
|
42
|
+
def self.read_io(io, url = nil, encoding = nil, **options)
|
43
|
+
raise ArgumentError.new("io object doesn't respond to :read") unless io.respond_to?(:read)
|
44
|
+
do_parse(io, url, encoding, options)
|
45
|
+
end
|
46
|
+
|
47
|
+
def self.read_memory(string, url = nil, encoding = nil, **options)
|
48
|
+
raise ArgumentError.new("string object doesn't respond to :to_str") unless string.respond_to?(:to_str)
|
49
|
+
do_parse(string, url, encoding, options)
|
50
|
+
end
|
51
|
+
|
52
|
+
def fragment(tags = nil)
|
53
|
+
DocumentFragment.new(self, tags, self.root)
|
54
|
+
end
|
55
|
+
|
56
|
+
def to_xml(options = {}, &block)
|
57
|
+
# Bypass XML::Document#to_xml which doesn't add
|
58
|
+
# XML::Node::SaveOptions::AS_XML like XML::Node#to_xml does.
|
59
|
+
XML::Node.instance_method(:to_xml).bind(self).call(options, &block)
|
60
|
+
end
|
61
|
+
|
62
|
+
private
|
63
|
+
def self.do_parse(string_or_io, url, encoding, options)
|
64
|
+
string = HTML5.read_and_encode(string_or_io, encoding)
|
65
|
+
max_attributes = options[:max_attributes] || Nokogiri::Gumbo::DEFAULT_MAX_ATTRIBUTES
|
66
|
+
max_errors = options[:max_errors] || options[:max_parse_errors] || Nokogiri::Gumbo::DEFAULT_MAX_ERRORS
|
67
|
+
max_depth = options[:max_tree_depth] || Nokogiri::Gumbo::DEFAULT_MAX_TREE_DEPTH
|
68
|
+
doc = Nokogiri::Gumbo.parse(string, url, max_attributes, max_errors, max_depth)
|
69
|
+
doc.encoding = 'UTF-8'
|
70
|
+
doc
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
@@ -0,0 +1,80 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
#
|
3
|
+
# Copyright 2013-2021 Sam Ruby, Stephen Checkoway
|
4
|
+
#
|
5
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
6
|
+
# you may not use this file except in compliance with the License.
|
7
|
+
# You may obtain a copy of the License at
|
8
|
+
#
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
#
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
14
|
+
# See the License for the specific language governing permissions and
|
15
|
+
# limitations under the License.
|
16
|
+
#
|
17
|
+
|
18
|
+
require_relative "../html4/document_fragment"
|
19
|
+
|
20
|
+
module Nokogiri
|
21
|
+
module HTML5
|
22
|
+
# @since v1.12.0
|
23
|
+
# @note HTML5 functionality is not available when running JRuby.
|
24
|
+
class DocumentFragment < Nokogiri::HTML4::DocumentFragment
|
25
|
+
attr_accessor :document
|
26
|
+
attr_accessor :errors
|
27
|
+
|
28
|
+
# Create a document fragment.
|
29
|
+
def initialize(doc, tags = nil, ctx = nil, options = {})
|
30
|
+
self.document = doc
|
31
|
+
self.errors = []
|
32
|
+
return self unless tags
|
33
|
+
|
34
|
+
max_attributes = options[:max_attributes] || Nokogiri::Gumbo::DEFAULT_MAX_ATTRIBUTES
|
35
|
+
max_errors = options[:max_errors] || Nokogiri::Gumbo::DEFAULT_MAX_ERRORS
|
36
|
+
max_depth = options[:max_tree_depth] || Nokogiri::Gumbo::DEFAULT_MAX_TREE_DEPTH
|
37
|
+
tags = Nokogiri::HTML5.read_and_encode(tags, nil)
|
38
|
+
Nokogiri::Gumbo.fragment(self, tags, ctx, max_attributes, max_errors, max_depth)
|
39
|
+
end
|
40
|
+
|
41
|
+
def serialize(options = {}, &block)
|
42
|
+
# Bypass XML::Document.serialize which doesn't support options even
|
43
|
+
# though XML::Node.serialize does!
|
44
|
+
XML::Node.instance_method(:serialize).bind(self).call(options, &block)
|
45
|
+
end
|
46
|
+
|
47
|
+
# Parse a document fragment from +tags+, returning a Nodeset.
|
48
|
+
def self.parse(tags, encoding = nil, options = {})
|
49
|
+
doc = HTML5::Document.new
|
50
|
+
tags = HTML5.read_and_encode(tags, encoding)
|
51
|
+
doc.encoding = "UTF-8"
|
52
|
+
new(doc, tags, nil, options)
|
53
|
+
end
|
54
|
+
|
55
|
+
def extract_params(params) # :nodoc:
|
56
|
+
handler = params.find do |param|
|
57
|
+
![Hash, String, Symbol].include?(param.class)
|
58
|
+
end
|
59
|
+
params -= [handler] if handler
|
60
|
+
|
61
|
+
hashes = []
|
62
|
+
while Hash === params.last || params.last.nil?
|
63
|
+
hashes << params.pop
|
64
|
+
break if params.empty?
|
65
|
+
end
|
66
|
+
ns, binds = hashes.reverse
|
67
|
+
|
68
|
+
ns ||=
|
69
|
+
begin
|
70
|
+
ns = {}
|
71
|
+
children.each { |child| ns.merge!(child.namespaces) }
|
72
|
+
ns
|
73
|
+
end
|
74
|
+
|
75
|
+
[params, handler, ns, binds]
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
80
|
+
# vim: set shiftwidth=2 softtabstop=2 tabstop=8 expandtab:
|
@@ -0,0 +1,93 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
#
|
3
|
+
# Copyright 2013-2021 Sam Ruby, Stephen Checkoway
|
4
|
+
#
|
5
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
6
|
+
# you may not use this file except in compliance with the License.
|
7
|
+
# You may obtain a copy of the License at
|
8
|
+
#
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
#
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
14
|
+
# See the License for the specific language governing permissions and
|
15
|
+
# limitations under the License.
|
16
|
+
#
|
17
|
+
|
18
|
+
require_relative "../xml/node"
|
19
|
+
|
20
|
+
module Nokogiri
|
21
|
+
module HTML5
|
22
|
+
# @since v1.12.0
|
23
|
+
# @note HTML5 functionality is not available when running JRuby.
|
24
|
+
module Node
|
25
|
+
def inner_html(options = {})
|
26
|
+
return super(options) unless document.is_a?(HTML5::Document)
|
27
|
+
result = options[:preserve_newline] && HTML5.prepend_newline?(self) ? String.new("\n") : String.new
|
28
|
+
result << children.map { |child| child.to_html(options) }.join
|
29
|
+
result
|
30
|
+
end
|
31
|
+
|
32
|
+
def write_to(io, *options)
|
33
|
+
return super(io, *options) unless document.is_a?(HTML5::Document)
|
34
|
+
options = options.first.is_a?(Hash) ? options.shift : {}
|
35
|
+
encoding = options[:encoding] || options[0]
|
36
|
+
if Nokogiri.jruby?
|
37
|
+
save_options = options[:save_with] || options[1]
|
38
|
+
indent_times = options[:indent] || 0
|
39
|
+
else
|
40
|
+
save_options = options[:save_with] || options[1] || XML::Node::SaveOptions::FORMAT
|
41
|
+
indent_times = options[:indent] || 2
|
42
|
+
end
|
43
|
+
indent_string = (options[:indent_text] || " ") * indent_times
|
44
|
+
|
45
|
+
config = XML::Node::SaveOptions.new(save_options.to_i)
|
46
|
+
yield config if block_given?
|
47
|
+
|
48
|
+
config_options = config.options
|
49
|
+
if config_options & (XML::Node::SaveOptions::AS_XML | XML::Node::SaveOptions::AS_XHTML) != 0
|
50
|
+
# Use Nokogiri's serializing code.
|
51
|
+
native_write_to(io, encoding, indent_string, config_options)
|
52
|
+
else
|
53
|
+
# Serialize including the current node.
|
54
|
+
encoding ||= document.encoding || Encoding::UTF_8
|
55
|
+
internal_ops = {
|
56
|
+
preserve_newline: options[:preserve_newline] || false,
|
57
|
+
}
|
58
|
+
HTML5.serialize_node_internal(self, io, encoding, internal_ops)
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
def fragment(tags)
|
63
|
+
return super(tags) unless document.is_a?(HTML5::Document)
|
64
|
+
DocumentFragment.new(document, tags, self)
|
65
|
+
end
|
66
|
+
|
67
|
+
private
|
68
|
+
|
69
|
+
# HTML elements can have attributes that contain colons.
|
70
|
+
# Nokogiri::XML::Node#[]= treats names with colons as a prefixed QName
|
71
|
+
# and tries to create an attribute in a namespace. This is especially
|
72
|
+
# annoying with attribute names like xml:lang since libxml2 will
|
73
|
+
# actually create the xml namespace if it doesn't exist already.
|
74
|
+
def add_child_node_and_reparent_attrs(node)
|
75
|
+
return super(node) unless document.is_a?(HTML5::Document)
|
76
|
+
# I'm not sure what this method is supposed to do. Reparenting
|
77
|
+
# namespaces is handled by libxml2, including child namespaces which
|
78
|
+
# this method wouldn't handle.
|
79
|
+
# https://github.com/sparklemotion/nokogiri/issues/1790
|
80
|
+
add_child_node(node)
|
81
|
+
# node.attribute_nodes.find_all { |a| a.namespace }.each do |attr|
|
82
|
+
# attr.remove
|
83
|
+
# ns = attr.namespace
|
84
|
+
# a["#{ns.prefix}:#{attr.name}"] = attr.value
|
85
|
+
# end
|
86
|
+
end
|
87
|
+
end
|
88
|
+
# Monkey patch
|
89
|
+
XML::Node.prepend(HTML5::Node)
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
# vim: set shiftwidth=2 softtabstop=2 tabstop=8 expandtab:
|
@@ -0,0 +1,20 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
# The line below caused a problem on non-GAE rack environment.
|
3
|
+
# unless defined?(JRuby::Rack::VERSION) || defined?(AppEngine::ApiProxy)
|
4
|
+
#
|
5
|
+
# However, simply cutting defined?(JRuby::Rack::VERSION) off resulted in
|
6
|
+
# an unable-to-load-nokogiri problem. Thus, now, Nokogiri checks the presense
|
7
|
+
# of appengine-rack.jar in $LOAD_PATH. If Nokogiri is on GAE, Nokogiri
|
8
|
+
# should skip loading xml jars. This is because those are in WEB-INF/lib and
|
9
|
+
# already set in the classpath.
|
10
|
+
unless $LOAD_PATH.to_s.include?("appengine-rack")
|
11
|
+
require 'stringio'
|
12
|
+
require 'isorelax.jar'
|
13
|
+
require 'jing.jar'
|
14
|
+
require 'nekohtml.jar'
|
15
|
+
require 'nekodtd.jar'
|
16
|
+
require 'xercesImpl.jar'
|
17
|
+
require 'serializer.jar'
|
18
|
+
require 'xalan.jar'
|
19
|
+
require 'xml-apis.jar'
|
20
|
+
end
|
data/lib/nokogiri/version.rb
CHANGED
@@ -1,109 +1,3 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
class VersionInfo # :nodoc:
|
6
|
-
def jruby?
|
7
|
-
::JRUBY_VERSION if RUBY_PLATFORM == "java"
|
8
|
-
end
|
9
|
-
|
10
|
-
def engine
|
11
|
-
defined?(RUBY_ENGINE) ? RUBY_ENGINE : "mri"
|
12
|
-
end
|
13
|
-
|
14
|
-
def loaded_parser_version
|
15
|
-
LIBXML_PARSER_VERSION.
|
16
|
-
scan(/^(\d+)(\d\d)(\d\d)(?!\d)/).first.
|
17
|
-
collect(&:to_i).
|
18
|
-
join(".")
|
19
|
-
end
|
20
|
-
|
21
|
-
def compiled_parser_version
|
22
|
-
LIBXML_VERSION
|
23
|
-
end
|
24
|
-
|
25
|
-
def libxml2?
|
26
|
-
defined?(LIBXML_VERSION)
|
27
|
-
end
|
28
|
-
|
29
|
-
def libxml2_using_system?
|
30
|
-
!libxml2_using_packaged?
|
31
|
-
end
|
32
|
-
|
33
|
-
def libxml2_using_packaged?
|
34
|
-
NOKOGIRI_USE_PACKAGED_LIBRARIES
|
35
|
-
end
|
36
|
-
|
37
|
-
def warnings
|
38
|
-
return [] unless libxml2?
|
39
|
-
|
40
|
-
if compiled_parser_version != loaded_parser_version
|
41
|
-
["Nokogiri was built against LibXML version #{compiled_parser_version}, but has dynamically loaded #{loaded_parser_version}"]
|
42
|
-
else
|
43
|
-
[]
|
44
|
-
end
|
45
|
-
end
|
46
|
-
|
47
|
-
def to_hash
|
48
|
-
hash_info = {}
|
49
|
-
hash_info["warnings"] = []
|
50
|
-
hash_info["nokogiri"] = Nokogiri::VERSION
|
51
|
-
hash_info["ruby"] = {}
|
52
|
-
hash_info["ruby"]["version"] = ::RUBY_VERSION
|
53
|
-
hash_info["ruby"]["platform"] = ::RUBY_PLATFORM
|
54
|
-
hash_info["ruby"]["description"] = ::RUBY_DESCRIPTION
|
55
|
-
hash_info["ruby"]["engine"] = engine
|
56
|
-
hash_info["ruby"]["jruby"] = jruby? if jruby?
|
57
|
-
|
58
|
-
if libxml2?
|
59
|
-
hash_info["libxml"] = {}
|
60
|
-
hash_info["libxml"]["binding"] = "extension"
|
61
|
-
if libxml2_using_packaged?
|
62
|
-
hash_info["libxml"]["source"] = "packaged"
|
63
|
-
hash_info["libxml"]["libxml2_path"] = NOKOGIRI_LIBXML2_PATH
|
64
|
-
hash_info["libxml"]["libxslt_path"] = NOKOGIRI_LIBXSLT_PATH
|
65
|
-
hash_info["libxml"]["libxml2_patches"] = NOKOGIRI_LIBXML2_PATCHES
|
66
|
-
hash_info["libxml"]["libxslt_patches"] = NOKOGIRI_LIBXSLT_PATCHES
|
67
|
-
else
|
68
|
-
hash_info["libxml"]["source"] = "system"
|
69
|
-
end
|
70
|
-
hash_info["libxml"]["compiled"] = compiled_parser_version
|
71
|
-
hash_info["libxml"]["loaded"] = loaded_parser_version
|
72
|
-
hash_info["warnings"] = warnings
|
73
|
-
elsif jruby?
|
74
|
-
hash_info["xerces"] = Nokogiri::XERCES_VERSION
|
75
|
-
hash_info["nekohtml"] = Nokogiri::NEKO_VERSION
|
76
|
-
end
|
77
|
-
|
78
|
-
hash_info
|
79
|
-
end
|
80
|
-
|
81
|
-
def to_markdown
|
82
|
-
begin
|
83
|
-
require "psych"
|
84
|
-
rescue LoadError
|
85
|
-
end
|
86
|
-
require "yaml"
|
87
|
-
"# Nokogiri (#{Nokogiri::VERSION})\n" +
|
88
|
-
YAML.dump(to_hash).each_line.map { |line| " #{line}" }.join
|
89
|
-
end
|
90
|
-
|
91
|
-
# FIXME: maybe switch to singleton?
|
92
|
-
@@instance = new
|
93
|
-
@@instance.warnings.each do |warning|
|
94
|
-
warn "WARNING: #{warning}"
|
95
|
-
end
|
96
|
-
def self.instance; @@instance; end
|
97
|
-
end
|
98
|
-
|
99
|
-
# More complete version information about libxml
|
100
|
-
VERSION_INFO = VersionInfo.instance.to_hash
|
101
|
-
|
102
|
-
def self.uses_libxml? # :nodoc:
|
103
|
-
VersionInfo.instance.libxml2?
|
104
|
-
end
|
105
|
-
|
106
|
-
def self.jruby? # :nodoc:
|
107
|
-
VersionInfo.instance.jruby?
|
108
|
-
end
|
109
|
-
end
|
1
|
+
# frozen_string_literal: true
|
2
|
+
require_relative "version/constant"
|
3
|
+
require_relative "version/info"
|
@@ -0,0 +1,215 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
require "singleton"
|
3
|
+
require "shellwords"
|
4
|
+
|
5
|
+
module Nokogiri
|
6
|
+
class VersionInfo # :nodoc:
|
7
|
+
include Singleton
|
8
|
+
|
9
|
+
def jruby?
|
10
|
+
::JRUBY_VERSION if ::RUBY_PLATFORM == "java"
|
11
|
+
end
|
12
|
+
|
13
|
+
def windows?
|
14
|
+
::RUBY_PLATFORM =~ /mingw|mswin/
|
15
|
+
end
|
16
|
+
|
17
|
+
def ruby_minor
|
18
|
+
Gem::Version.new(::RUBY_VERSION).segments[0..1].join(".")
|
19
|
+
end
|
20
|
+
|
21
|
+
def engine
|
22
|
+
defined?(::RUBY_ENGINE) ? ::RUBY_ENGINE : "mri"
|
23
|
+
end
|
24
|
+
|
25
|
+
def loaded_libxml_version
|
26
|
+
Gem::Version.new(Nokogiri::LIBXML_LOADED_VERSION
|
27
|
+
.scan(/^(\d+)(\d\d)(\d\d)(?!\d)/).first
|
28
|
+
.collect(&:to_i)
|
29
|
+
.join("."))
|
30
|
+
end
|
31
|
+
|
32
|
+
def compiled_libxml_version
|
33
|
+
Gem::Version.new(Nokogiri::LIBXML_COMPILED_VERSION)
|
34
|
+
end
|
35
|
+
|
36
|
+
def loaded_libxslt_version
|
37
|
+
Gem::Version.new(Nokogiri::LIBXSLT_LOADED_VERSION
|
38
|
+
.scan(/^(\d+)(\d\d)(\d\d)(?!\d)/).first
|
39
|
+
.collect(&:to_i)
|
40
|
+
.join("."))
|
41
|
+
end
|
42
|
+
|
43
|
+
def compiled_libxslt_version
|
44
|
+
Gem::Version.new(Nokogiri::LIBXSLT_COMPILED_VERSION)
|
45
|
+
end
|
46
|
+
|
47
|
+
def libxml2?
|
48
|
+
defined?(Nokogiri::LIBXML_COMPILED_VERSION)
|
49
|
+
end
|
50
|
+
|
51
|
+
def libxml2_has_iconv?
|
52
|
+
defined?(Nokogiri::LIBXML_ICONV_ENABLED) && Nokogiri::LIBXML_ICONV_ENABLED
|
53
|
+
end
|
54
|
+
|
55
|
+
def libxslt_has_datetime?
|
56
|
+
defined?(Nokogiri::LIBXSLT_DATETIME_ENABLED) && Nokogiri::LIBXSLT_DATETIME_ENABLED
|
57
|
+
end
|
58
|
+
|
59
|
+
def libxml2_using_packaged?
|
60
|
+
libxml2? && Nokogiri::PACKAGED_LIBRARIES
|
61
|
+
end
|
62
|
+
|
63
|
+
def libxml2_using_system?
|
64
|
+
libxml2? && !libxml2_using_packaged?
|
65
|
+
end
|
66
|
+
|
67
|
+
def libxml2_precompiled?
|
68
|
+
libxml2_using_packaged? && Nokogiri::PRECOMPILED_LIBRARIES
|
69
|
+
end
|
70
|
+
|
71
|
+
def warnings
|
72
|
+
warnings = []
|
73
|
+
|
74
|
+
if libxml2?
|
75
|
+
if compiled_libxml_version != loaded_libxml_version
|
76
|
+
warnings << "Nokogiri was built against libxml version #{compiled_libxml_version}, but has dynamically loaded #{loaded_libxml_version}"
|
77
|
+
end
|
78
|
+
|
79
|
+
if compiled_libxslt_version != loaded_libxslt_version
|
80
|
+
warnings << "Nokogiri was built against libxslt version #{compiled_libxslt_version}, but has dynamically loaded #{loaded_libxslt_version}"
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
warnings
|
85
|
+
end
|
86
|
+
|
87
|
+
def to_hash
|
88
|
+
header_directory = File.expand_path(File.join(File.dirname(__FILE__), "../../../ext/nokogiri"))
|
89
|
+
|
90
|
+
{}.tap do |vi|
|
91
|
+
vi["warnings"] = []
|
92
|
+
vi["nokogiri"] = {}.tap do |nokogiri|
|
93
|
+
nokogiri["version"] = Nokogiri::VERSION
|
94
|
+
|
95
|
+
unless jruby?
|
96
|
+
# enable gems like nokogumbo to build with the following in their extconf.rb:
|
97
|
+
#
|
98
|
+
# append_cflags(Nokogiri::VERSION_INFO["nokogiri"]["cppflags"])
|
99
|
+
# append_ldflags(Nokogiri::VERSION_INFO["nokogiri"]["ldflags"])
|
100
|
+
#
|
101
|
+
cppflags = ["-I#{header_directory.shellescape}"]
|
102
|
+
ldflags = []
|
103
|
+
|
104
|
+
if libxml2_using_packaged?
|
105
|
+
cppflags << "-I#{File.join(header_directory, 'include').shellescape}"
|
106
|
+
cppflags << "-I#{File.join(header_directory, 'include/libxml2').shellescape}"
|
107
|
+
|
108
|
+
if windows?
|
109
|
+
# on windows, nokogumbo needs to link against nokogiri.so to resolve symbols. see #2167
|
110
|
+
lib_directory = File.expand_path(File.join(File.dirname(__FILE__), "../#{ruby_minor}"))
|
111
|
+
unless File.exist?(lib_directory)
|
112
|
+
lib_directory = File.expand_path(File.join(File.dirname(__FILE__), ".."))
|
113
|
+
end
|
114
|
+
ldflags << "-L#{lib_directory.shellescape}"
|
115
|
+
ldflags << "-l:nokogiri.so"
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
nokogiri["cppflags"] = cppflags
|
120
|
+
nokogiri["ldflags"] = ldflags
|
121
|
+
end
|
122
|
+
end
|
123
|
+
vi["ruby"] = {}.tap do |ruby|
|
124
|
+
ruby["version"] = ::RUBY_VERSION
|
125
|
+
ruby["platform"] = ::RUBY_PLATFORM
|
126
|
+
ruby["gem_platform"] = ::Gem::Platform.local.to_s
|
127
|
+
ruby["description"] = ::RUBY_DESCRIPTION
|
128
|
+
ruby["engine"] = engine
|
129
|
+
ruby["jruby"] = jruby? if jruby?
|
130
|
+
end
|
131
|
+
|
132
|
+
if libxml2?
|
133
|
+
vi["libxml"] = {}.tap do |libxml|
|
134
|
+
if libxml2_using_packaged?
|
135
|
+
libxml["source"] = "packaged"
|
136
|
+
libxml["precompiled"] = libxml2_precompiled?
|
137
|
+
libxml["patches"] = Nokogiri::LIBXML2_PATCHES
|
138
|
+
|
139
|
+
# this is for nokogumbo and shouldn't be forever
|
140
|
+
libxml["libxml2_path"] = header_directory
|
141
|
+
else
|
142
|
+
libxml["source"] = "system"
|
143
|
+
end
|
144
|
+
libxml["memory_management"] = Nokogiri::LIBXML_MEMORY_MANAGEMENT
|
145
|
+
libxml["iconv_enabled"] = libxml2_has_iconv?
|
146
|
+
libxml["compiled"] = compiled_libxml_version.to_s
|
147
|
+
libxml["loaded"] = loaded_libxml_version.to_s
|
148
|
+
end
|
149
|
+
|
150
|
+
vi["libxslt"] = {}.tap do |libxslt|
|
151
|
+
if libxml2_using_packaged?
|
152
|
+
libxslt["source"] = "packaged"
|
153
|
+
libxslt["precompiled"] = libxml2_precompiled?
|
154
|
+
libxslt["patches"] = Nokogiri::LIBXSLT_PATCHES
|
155
|
+
else
|
156
|
+
libxslt["source"] = "system"
|
157
|
+
end
|
158
|
+
libxslt["datetime_enabled"] = libxslt_has_datetime?
|
159
|
+
libxslt["compiled"] = compiled_libxslt_version.to_s
|
160
|
+
libxslt["loaded"] = loaded_libxslt_version.to_s
|
161
|
+
end
|
162
|
+
|
163
|
+
vi["warnings"] = warnings
|
164
|
+
end
|
165
|
+
|
166
|
+
if defined?(Nokogiri::OTHER_LIBRARY_VERSIONS)
|
167
|
+
# see extconf for how this string is assembled: "lib1name:lib1version,lib2name:lib2version"
|
168
|
+
vi["other_libraries"] = Hash[*Nokogiri::OTHER_LIBRARY_VERSIONS.split(/[,:]/)]
|
169
|
+
elsif jruby?
|
170
|
+
vi["other_libraries"] = {}.tap do |ol|
|
171
|
+
ol["xerces"] = Nokogiri::XERCES_VERSION
|
172
|
+
ol["nekohtml"] = Nokogiri::NEKO_VERSION
|
173
|
+
end
|
174
|
+
end
|
175
|
+
end
|
176
|
+
end
|
177
|
+
|
178
|
+
def to_markdown
|
179
|
+
begin
|
180
|
+
require "psych"
|
181
|
+
rescue LoadError
|
182
|
+
end
|
183
|
+
require "yaml"
|
184
|
+
"# Nokogiri (#{Nokogiri::VERSION})\n" +
|
185
|
+
YAML.dump(to_hash).each_line.map { |line| " #{line}" }.join
|
186
|
+
end
|
187
|
+
|
188
|
+
instance.warnings.each do |warning|
|
189
|
+
warn "WARNING: #{warning}"
|
190
|
+
end
|
191
|
+
end
|
192
|
+
|
193
|
+
def self.uses_libxml?(requirement = nil) # :nodoc:
|
194
|
+
return false unless VersionInfo.instance.libxml2?
|
195
|
+
return true unless requirement
|
196
|
+
Gem::Requirement.new(requirement).satisfied_by?(VersionInfo.instance.loaded_libxml_version)
|
197
|
+
end
|
198
|
+
|
199
|
+
def self.uses_gumbo?
|
200
|
+
uses_libxml? # TODO: replace with Gumbo functionality
|
201
|
+
end
|
202
|
+
|
203
|
+
def self.jruby? # :nodoc:
|
204
|
+
VersionInfo.instance.jruby?
|
205
|
+
end
|
206
|
+
|
207
|
+
# Ensure constants used in this file are loaded - see #1896
|
208
|
+
if Nokogiri.jruby?
|
209
|
+
require_relative "../jruby/dependencies"
|
210
|
+
end
|
211
|
+
require_relative "../extension"
|
212
|
+
|
213
|
+
# More complete version information about libxml
|
214
|
+
VERSION_INFO = VersionInfo.instance.to_hash
|
215
|
+
end
|