nokogiri 1.10.9 → 1.18.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/Gemfile +38 -0
- data/LICENSE-DEPENDENCIES.md +1632 -1022
- data/LICENSE.md +1 -1
- data/README.md +190 -95
- data/bin/nokogiri +63 -50
- data/dependencies.yml +34 -66
- data/ext/nokogiri/depend +38 -358
- data/ext/nokogiri/extconf.rb +909 -422
- data/ext/nokogiri/gumbo.c +610 -0
- data/ext/nokogiri/html4_document.c +171 -0
- data/ext/nokogiri/html4_element_description.c +299 -0
- data/ext/nokogiri/html4_entity_lookup.c +37 -0
- data/ext/nokogiri/html4_sax_parser.c +40 -0
- data/ext/nokogiri/html4_sax_parser_context.c +98 -0
- data/ext/nokogiri/html4_sax_push_parser.c +96 -0
- data/ext/nokogiri/libxml2_polyfill.c +114 -0
- data/ext/nokogiri/nokogiri.c +258 -105
- data/ext/nokogiri/nokogiri.h +207 -90
- data/ext/nokogiri/test_global_handlers.c +40 -0
- data/ext/nokogiri/xml_attr.c +18 -18
- data/ext/nokogiri/xml_attribute_decl.c +22 -22
- data/ext/nokogiri/xml_cdata.c +33 -33
- data/ext/nokogiri/xml_comment.c +19 -31
- data/ext/nokogiri/xml_document.c +499 -323
- data/ext/nokogiri/xml_document_fragment.c +17 -36
- data/ext/nokogiri/xml_dtd.c +65 -59
- data/ext/nokogiri/xml_element_content.c +63 -55
- data/ext/nokogiri/xml_element_decl.c +31 -31
- data/ext/nokogiri/xml_encoding_handler.c +54 -21
- data/ext/nokogiri/xml_entity_decl.c +37 -35
- data/ext/nokogiri/xml_entity_reference.c +17 -19
- data/ext/nokogiri/xml_namespace.c +131 -61
- data/ext/nokogiri/xml_node.c +1429 -723
- data/ext/nokogiri/xml_node_set.c +257 -225
- data/ext/nokogiri/xml_processing_instruction.c +18 -20
- data/ext/nokogiri/xml_reader.c +340 -231
- data/ext/nokogiri/xml_relax_ng.c +87 -99
- data/ext/nokogiri/xml_sax_parser.c +269 -176
- data/ext/nokogiri/xml_sax_parser_context.c +286 -152
- data/ext/nokogiri/xml_sax_push_parser.c +111 -64
- data/ext/nokogiri/xml_schema.c +132 -140
- data/ext/nokogiri/xml_syntax_error.c +52 -23
- data/ext/nokogiri/xml_text.c +37 -30
- data/ext/nokogiri/xml_xpath_context.c +373 -185
- data/ext/nokogiri/xslt_stylesheet.c +342 -191
- data/gumbo-parser/CHANGES.md +63 -0
- data/gumbo-parser/Makefile +129 -0
- data/gumbo-parser/THANKS +27 -0
- data/gumbo-parser/src/Makefile +34 -0
- data/gumbo-parser/src/README.md +41 -0
- data/gumbo-parser/src/ascii.c +75 -0
- data/gumbo-parser/src/ascii.h +115 -0
- data/gumbo-parser/src/attribute.c +42 -0
- data/gumbo-parser/src/attribute.h +17 -0
- data/gumbo-parser/src/char_ref.c +22225 -0
- data/gumbo-parser/src/char_ref.h +29 -0
- data/gumbo-parser/src/char_ref.rl +2154 -0
- data/gumbo-parser/src/error.c +658 -0
- data/gumbo-parser/src/error.h +152 -0
- data/gumbo-parser/src/foreign_attrs.c +103 -0
- data/gumbo-parser/src/foreign_attrs.gperf +27 -0
- data/gumbo-parser/src/insertion_mode.h +33 -0
- data/gumbo-parser/src/macros.h +91 -0
- data/gumbo-parser/src/nokogiri_gumbo.h +953 -0
- data/gumbo-parser/src/parser.c +4932 -0
- data/gumbo-parser/src/parser.h +41 -0
- data/gumbo-parser/src/replacement.h +33 -0
- data/gumbo-parser/src/string_buffer.c +103 -0
- data/gumbo-parser/src/string_buffer.h +68 -0
- data/gumbo-parser/src/string_piece.c +48 -0
- data/gumbo-parser/src/svg_attrs.c +174 -0
- data/gumbo-parser/src/svg_attrs.gperf +77 -0
- data/gumbo-parser/src/svg_tags.c +137 -0
- data/gumbo-parser/src/svg_tags.gperf +55 -0
- data/gumbo-parser/src/tag.c +223 -0
- data/gumbo-parser/src/tag_lookup.c +382 -0
- data/gumbo-parser/src/tag_lookup.gperf +170 -0
- data/gumbo-parser/src/tag_lookup.h +13 -0
- data/gumbo-parser/src/token_buffer.c +79 -0
- data/gumbo-parser/src/token_buffer.h +71 -0
- data/gumbo-parser/src/token_type.h +17 -0
- data/gumbo-parser/src/tokenizer.c +3464 -0
- data/gumbo-parser/src/tokenizer.h +112 -0
- data/gumbo-parser/src/tokenizer_states.h +339 -0
- data/gumbo-parser/src/utf8.c +245 -0
- data/gumbo-parser/src/utf8.h +164 -0
- data/gumbo-parser/src/util.c +66 -0
- data/gumbo-parser/src/util.h +34 -0
- data/gumbo-parser/src/vector.c +111 -0
- data/gumbo-parser/src/vector.h +45 -0
- data/lib/nokogiri/class_resolver.rb +67 -0
- data/lib/nokogiri/css/node.rb +14 -8
- data/lib/nokogiri/css/parser.rb +399 -377
- data/lib/nokogiri/css/parser.y +250 -245
- data/lib/nokogiri/css/parser_extras.rb +16 -71
- data/lib/nokogiri/css/selector_cache.rb +38 -0
- data/lib/nokogiri/css/syntax_error.rb +3 -1
- data/lib/nokogiri/css/tokenizer.rb +7 -5
- data/lib/nokogiri/css/tokenizer.rex +11 -9
- data/lib/nokogiri/css/xpath_visitor.rb +242 -96
- data/lib/nokogiri/css.rb +122 -17
- data/lib/nokogiri/decorators/slop.rb +11 -11
- data/lib/nokogiri/encoding_handler.rb +57 -0
- data/lib/nokogiri/extension.rb +32 -0
- data/lib/nokogiri/gumbo.rb +15 -0
- data/lib/nokogiri/html.rb +38 -27
- data/lib/nokogiri/{html → html4}/builder.rb +4 -2
- data/lib/nokogiri/html4/document.rb +235 -0
- data/lib/nokogiri/html4/document_fragment.rb +166 -0
- data/lib/nokogiri/{html → html4}/element_description.rb +3 -1
- data/lib/nokogiri/html4/element_description_defaults.rb +2040 -0
- data/lib/nokogiri/html4/encoding_reader.rb +121 -0
- data/lib/nokogiri/{html → html4}/entity_lookup.rb +4 -2
- data/lib/nokogiri/html4/sax/parser.rb +48 -0
- data/lib/nokogiri/html4/sax/parser_context.rb +15 -0
- data/lib/nokogiri/{html → html4}/sax/push_parser.rb +12 -11
- data/lib/nokogiri/html4.rb +42 -0
- data/lib/nokogiri/html5/builder.rb +40 -0
- data/lib/nokogiri/html5/document.rb +199 -0
- data/lib/nokogiri/html5/document_fragment.rb +200 -0
- data/lib/nokogiri/html5/node.rb +103 -0
- data/lib/nokogiri/html5.rb +368 -0
- data/lib/nokogiri/jruby/dependencies.rb +3 -0
- data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
- data/lib/nokogiri/syntax_error.rb +2 -0
- data/lib/nokogiri/version/constant.rb +6 -0
- data/lib/nokogiri/version/info.rb +224 -0
- data/lib/nokogiri/version.rb +3 -108
- data/lib/nokogiri/xml/attr.rb +55 -3
- data/lib/nokogiri/xml/attribute_decl.rb +6 -2
- data/lib/nokogiri/xml/builder.rb +83 -35
- data/lib/nokogiri/xml/cdata.rb +3 -1
- data/lib/nokogiri/xml/character_data.rb +2 -0
- data/lib/nokogiri/xml/document.rb +359 -130
- data/lib/nokogiri/xml/document_fragment.rb +170 -54
- data/lib/nokogiri/xml/dtd.rb +4 -2
- data/lib/nokogiri/xml/element_content.rb +12 -2
- data/lib/nokogiri/xml/element_decl.rb +6 -2
- data/lib/nokogiri/xml/entity_decl.rb +7 -3
- data/lib/nokogiri/xml/entity_reference.rb +2 -0
- data/lib/nokogiri/xml/namespace.rb +44 -0
- data/lib/nokogiri/xml/node/save_options.rb +23 -8
- data/lib/nokogiri/xml/node.rb +1168 -420
- data/lib/nokogiri/xml/node_set.rb +145 -67
- data/lib/nokogiri/xml/notation.rb +13 -0
- data/lib/nokogiri/xml/parse_options.rb +145 -52
- data/lib/nokogiri/xml/pp/character_data.rb +9 -6
- data/lib/nokogiri/xml/pp/node.rb +47 -30
- data/lib/nokogiri/xml/pp.rb +4 -2
- data/lib/nokogiri/xml/processing_instruction.rb +4 -1
- data/lib/nokogiri/xml/reader.rb +68 -41
- data/lib/nokogiri/xml/relax_ng.rb +60 -17
- data/lib/nokogiri/xml/sax/document.rb +198 -111
- data/lib/nokogiri/xml/sax/parser.rb +144 -67
- data/lib/nokogiri/xml/sax/parser_context.rb +119 -6
- data/lib/nokogiri/xml/sax/push_parser.rb +9 -5
- data/lib/nokogiri/xml/sax.rb +54 -4
- data/lib/nokogiri/xml/schema.rb +116 -39
- data/lib/nokogiri/xml/searchable.rb +139 -95
- data/lib/nokogiri/xml/syntax_error.rb +29 -5
- data/lib/nokogiri/xml/text.rb +2 -0
- data/lib/nokogiri/xml/xpath/syntax_error.rb +4 -2
- data/lib/nokogiri/xml/xpath.rb +15 -4
- data/lib/nokogiri/xml/xpath_context.rb +15 -4
- data/lib/nokogiri/xml.rb +45 -55
- data/lib/nokogiri/xslt/stylesheet.rb +32 -8
- data/lib/nokogiri/xslt.rb +103 -30
- data/lib/nokogiri.rb +59 -75
- data/lib/xsd/xmlparser/nokogiri.rb +32 -29
- data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
- data/patches/libxml2/0010-update-config.guess-and-config.sub-for-libxml2.patch +224 -0
- data/patches/libxml2/0011-rip-out-libxml2-s-libc_single_threaded-support.patch +30 -0
- data/patches/libxml2/0019-xpath-Use-separate-static-hash-table-for-standard-fu.patch +244 -0
- data/patches/libxslt/0001-update-config.guess-and-config.sub-for-libxslt.patch +224 -0
- data/ports/archives/libxml2-2.13.6.tar.xz +0 -0
- data/ports/archives/libxslt-1.1.42.tar.xz +0 -0
- metadata +123 -295
- data/ext/nokogiri/html_document.c +0 -170
- data/ext/nokogiri/html_document.h +0 -10
- data/ext/nokogiri/html_element_description.c +0 -279
- data/ext/nokogiri/html_element_description.h +0 -10
- data/ext/nokogiri/html_entity_lookup.c +0 -32
- data/ext/nokogiri/html_entity_lookup.h +0 -8
- data/ext/nokogiri/html_sax_parser_context.c +0 -116
- data/ext/nokogiri/html_sax_parser_context.h +0 -11
- data/ext/nokogiri/html_sax_push_parser.c +0 -87
- data/ext/nokogiri/html_sax_push_parser.h +0 -9
- data/ext/nokogiri/xml_attr.h +0 -9
- data/ext/nokogiri/xml_attribute_decl.h +0 -9
- data/ext/nokogiri/xml_cdata.h +0 -9
- data/ext/nokogiri/xml_comment.h +0 -9
- data/ext/nokogiri/xml_document.h +0 -23
- data/ext/nokogiri/xml_document_fragment.h +0 -10
- data/ext/nokogiri/xml_dtd.h +0 -10
- data/ext/nokogiri/xml_element_content.h +0 -10
- data/ext/nokogiri/xml_element_decl.h +0 -9
- data/ext/nokogiri/xml_encoding_handler.h +0 -8
- data/ext/nokogiri/xml_entity_decl.h +0 -10
- data/ext/nokogiri/xml_entity_reference.h +0 -9
- data/ext/nokogiri/xml_io.c +0 -61
- data/ext/nokogiri/xml_io.h +0 -11
- data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
- data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
- data/ext/nokogiri/xml_namespace.h +0 -14
- data/ext/nokogiri/xml_node.h +0 -13
- data/ext/nokogiri/xml_node_set.h +0 -12
- data/ext/nokogiri/xml_processing_instruction.h +0 -9
- data/ext/nokogiri/xml_reader.h +0 -10
- data/ext/nokogiri/xml_relax_ng.h +0 -9
- data/ext/nokogiri/xml_sax_parser.h +0 -39
- data/ext/nokogiri/xml_sax_parser_context.h +0 -10
- data/ext/nokogiri/xml_sax_push_parser.h +0 -9
- data/ext/nokogiri/xml_schema.h +0 -9
- data/ext/nokogiri/xml_syntax_error.h +0 -13
- data/ext/nokogiri/xml_text.h +0 -9
- data/ext/nokogiri/xml_xpath_context.h +0 -10
- data/ext/nokogiri/xslt_stylesheet.h +0 -14
- data/lib/nokogiri/html/document.rb +0 -335
- data/lib/nokogiri/html/document_fragment.rb +0 -49
- data/lib/nokogiri/html/element_description_defaults.rb +0 -671
- data/lib/nokogiri/html/sax/parser.rb +0 -62
- data/lib/nokogiri/html/sax/parser_context.rb +0 -16
- data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +0 -78
- data/patches/libxml2/0004-libxml2.la-is-in-top_builddir.patch +0 -25
- data/patches/libxml2/0005-Fix-infinite-loop-in-xmlStringLenDecodeEntities.patch +0 -32
- data/ports/archives/libxml2-2.9.10.tar.gz +0 -0
- data/ports/archives/libxslt-1.1.34.tar.gz +0 -0
- /data/patches/libxml2/{0002-Remove-script-macro-support.patch → 0001-Remove-script-macro-support.patch} +0 -0
- /data/patches/libxml2/{0003-Update-entities-to-remove-handling-of-ssi.patch → 0002-Update-entities-to-remove-handling-of-ssi.patch} +0 -0
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Nokogiri
|
2
4
|
module XSLT
|
3
5
|
###
|
@@ -8,16 +10,38 @@ module Nokogiri
|
|
8
10
|
# doc = Nokogiri::XML(File.read('some_file.xml'))
|
9
11
|
# xslt = Nokogiri::XSLT(File.read('some_transformer.xslt'))
|
10
12
|
#
|
11
|
-
#
|
13
|
+
# xslt.transform(doc) # => Nokogiri::XML::Document
|
12
14
|
#
|
13
|
-
#
|
14
|
-
#
|
15
|
+
# Many XSLT transformations include serialization behavior to emit a non-XML document. For these
|
16
|
+
# cases, please take care to invoke the #serialize method on the result of the transformation:
|
17
|
+
#
|
18
|
+
# doc = Nokogiri::XML(File.read('some_file.xml'))
|
19
|
+
# xslt = Nokogiri::XSLT(File.read('some_transformer.xslt'))
|
20
|
+
# xslt.serialize(xslt.transform(doc)) # => String
|
21
|
+
#
|
22
|
+
# or use the #apply_to method, which is a shortcut for `serialize(transform(document))`:
|
23
|
+
#
|
24
|
+
# doc = Nokogiri::XML(File.read('some_file.xml'))
|
25
|
+
# xslt = Nokogiri::XSLT(File.read('some_transformer.xslt'))
|
26
|
+
# xslt.apply_to(doc) # => String
|
27
|
+
#
|
28
|
+
# See Nokogiri::XSLT::Stylesheet#transform for more information and examples.
|
15
29
|
class Stylesheet
|
16
|
-
|
17
|
-
#
|
18
|
-
#
|
19
|
-
#
|
20
|
-
|
30
|
+
# :call-seq:
|
31
|
+
# apply_to(document, params = []) -> String
|
32
|
+
#
|
33
|
+
# Apply an XSLT stylesheet to an XML::Document and serialize it properly. This method is
|
34
|
+
# equivalent to calling #serialize on the result of #transform.
|
35
|
+
#
|
36
|
+
# [Parameters]
|
37
|
+
# - +document+ is an instance of XML::Document to transform
|
38
|
+
# - +params+ is an array of strings used as XSLT parameters, passed into #transform
|
39
|
+
#
|
40
|
+
# [Returns]
|
41
|
+
# A string containing the serialized result of the transformation.
|
42
|
+
#
|
43
|
+
# See Nokogiri::XSLT::Stylesheet#transform for more information and examples.
|
44
|
+
def apply_to(document, params = [])
|
21
45
|
serialize(transform(document, params))
|
22
46
|
end
|
23
47
|
end
|
data/lib/nokogiri/xslt.rb
CHANGED
@@ -1,16 +1,11 @@
|
|
1
|
-
|
1
|
+
# coding: utf-8
|
2
|
+
# frozen_string_literal: true
|
2
3
|
|
3
4
|
module Nokogiri
|
4
5
|
class << self
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
# Example:
|
9
|
-
#
|
10
|
-
# xslt = Nokogiri::XSLT(File.read(ARGV[0]))
|
11
|
-
#
|
12
|
-
def XSLT stylesheet, modules = {}
|
13
|
-
XSLT.parse(stylesheet, modules)
|
6
|
+
# Convenience method for Nokogiri::XSLT.parse
|
7
|
+
def XSLT(...)
|
8
|
+
XSLT.parse(...)
|
14
9
|
end
|
15
10
|
end
|
16
11
|
|
@@ -19,38 +14,116 @@ module Nokogiri
|
|
19
14
|
# Stylesheet object.
|
20
15
|
module XSLT
|
21
16
|
class << self
|
22
|
-
|
23
|
-
#
|
24
|
-
|
17
|
+
# :call-seq:
|
18
|
+
# parse(xsl) → Nokogiri::XSLT::Stylesheet
|
19
|
+
# parse(xsl, modules) → Nokogiri::XSLT::Stylesheet
|
20
|
+
#
|
21
|
+
# Parse the stylesheet in +xsl+, registering optional +modules+ as custom class handlers.
|
22
|
+
#
|
23
|
+
# [Parameters]
|
24
|
+
# - +xsl+ (String) XSL content to be parsed into a stylesheet
|
25
|
+
# - +modules+ (Hash<String ⇒ Class>) A hash of URI-to-handler relations for linking a
|
26
|
+
# namespace to a custom function handler.
|
27
|
+
#
|
28
|
+
# ⚠ The XSLT handler classes are registered *globally*.
|
29
|
+
#
|
30
|
+
# Also see Nokogiri::XSLT.register
|
31
|
+
#
|
32
|
+
# *Example*
|
33
|
+
#
|
34
|
+
# xml = Nokogiri.XML(<<~XML)
|
35
|
+
# <nodes>
|
36
|
+
# <node>Foo</node>
|
37
|
+
# <node>Bar</node>
|
38
|
+
# </nodes>
|
39
|
+
# XML
|
40
|
+
#
|
41
|
+
# handler = Class.new do
|
42
|
+
# def reverse(node)
|
43
|
+
# node.text.reverse
|
44
|
+
# end
|
45
|
+
# end
|
46
|
+
#
|
47
|
+
# xsl = <<~XSL
|
48
|
+
# <xsl:stylesheet version="1.0"
|
49
|
+
# xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
|
50
|
+
# xmlns:myfuncs="http://nokogiri.org/xslt/myfuncs"
|
51
|
+
# extension-element-prefixes="myfuncs">
|
52
|
+
# <xsl:template match="/">
|
53
|
+
# <reversed>
|
54
|
+
# <xsl:for-each select="nodes/node">
|
55
|
+
# <reverse><xsl:copy-of select="myfuncs:reverse(.)"/></reverse>
|
56
|
+
# </xsl:for-each>
|
57
|
+
# </reversed>
|
58
|
+
# </xsl:template>
|
59
|
+
# </xsl:stylesheet>
|
60
|
+
# XSL
|
61
|
+
#
|
62
|
+
# xsl = Nokogiri.XSLT(xsl, "http://nokogiri.org/xslt/myfuncs" => handler)
|
63
|
+
# xsl.transform(xml).to_xml
|
64
|
+
# # => "<?xml version=\"1.0\"?>\n" +
|
65
|
+
# # "<reversed>\n" +
|
66
|
+
# # " <reverse>ooF</reverse>\n" +
|
67
|
+
# # " <reverse>raB</reverse>\n" +
|
68
|
+
# # "</reversed>\n"
|
69
|
+
#
|
70
|
+
def parse(string, modules = {})
|
25
71
|
modules.each do |url, klass|
|
26
|
-
XSLT.register
|
72
|
+
XSLT.register(url, klass)
|
27
73
|
end
|
28
74
|
|
75
|
+
doc = XML::Document.parse(string, nil, nil, XML::ParseOptions::DEFAULT_XSLT)
|
29
76
|
if Nokogiri.jruby?
|
30
|
-
Stylesheet.parse_stylesheet_doc(
|
77
|
+
Stylesheet.parse_stylesheet_doc(doc, string)
|
31
78
|
else
|
32
|
-
Stylesheet.parse_stylesheet_doc(
|
79
|
+
Stylesheet.parse_stylesheet_doc(doc)
|
33
80
|
end
|
34
81
|
end
|
35
82
|
|
36
|
-
|
37
|
-
#
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
83
|
+
# :call-seq:
|
84
|
+
# quote_params(params) → Array
|
85
|
+
#
|
86
|
+
# Quote parameters in +params+ for stylesheet safety.
|
87
|
+
# See Nokogiri::XSLT::Stylesheet.transform for example usage.
|
88
|
+
#
|
89
|
+
# [Parameters]
|
90
|
+
# - +params+ (Hash, Array) XSLT parameters (key->value, or tuples of [key, value])
|
91
|
+
#
|
92
|
+
# [Returns] Array of string parameters, with quotes correctly escaped for use with XSLT::Stylesheet.transform
|
93
|
+
#
|
94
|
+
def quote_params(params)
|
95
|
+
params.flatten.each_slice(2).with_object([]) do |kv, quoted_params|
|
96
|
+
key, value = kv.map(&:to_s)
|
97
|
+
value = if value.include?("'")
|
98
|
+
"concat('#{value.gsub("'", %q{', "'", '})}')"
|
48
99
|
else
|
49
|
-
|
100
|
+
"'#{value}'"
|
50
101
|
end
|
102
|
+
quoted_params << key
|
103
|
+
quoted_params << value
|
51
104
|
end
|
52
|
-
parray.flatten
|
53
105
|
end
|
106
|
+
|
107
|
+
# call-seq:
|
108
|
+
# register(uri, custom_handler_class)
|
109
|
+
#
|
110
|
+
# Register a class that implements custom XSLT transformation functions.
|
111
|
+
#
|
112
|
+
# ⚠ The XSLT handler classes are registered *globally*.
|
113
|
+
#
|
114
|
+
# [Parameters}
|
115
|
+
# - +uri+ (String) The namespace for the custom handlers
|
116
|
+
# - +custom_handler_class+ (Class) A class with ruby methods that can be called during
|
117
|
+
# transformation
|
118
|
+
#
|
119
|
+
# See Nokogiri::XSLT.parse for usage.
|
120
|
+
#
|
121
|
+
def register(uri, custom_handler_class)
|
122
|
+
# NOTE: this is implemented in the C extension, see ext/nokogiri/xslt_stylesheet.c
|
123
|
+
raise NotImplementedError, "Nokogiri::XSLT.register is not implemented on JRuby"
|
124
|
+
end if Nokogiri.jruby?
|
54
125
|
end
|
55
126
|
end
|
56
127
|
end
|
128
|
+
|
129
|
+
require_relative "xslt/stylesheet"
|
data/lib/nokogiri.rb
CHANGED
@@ -1,96 +1,73 @@
|
|
1
|
-
#
|
2
|
-
#
|
3
|
-
|
4
|
-
require 'rbconfig'
|
1
|
+
# coding: utf-8
|
2
|
+
# frozen_string_literal: true
|
5
3
|
|
6
4
|
if defined?(RUBY_ENGINE) && RUBY_ENGINE == "jruby"
|
7
|
-
|
8
|
-
# unless defined?(JRuby::Rack::VERSION) || defined?(AppEngine::ApiProxy)
|
9
|
-
#
|
10
|
-
# However, simply cutting defined?(JRuby::Rack::VERSION) off resulted in
|
11
|
-
# an unable-to-load-nokogiri problem. Thus, now, Nokogiri checks the presense
|
12
|
-
# of appengine-rack.jar in $LOAD_PATH. If Nokogiri is on GAE, Nokogiri
|
13
|
-
# should skip loading xml jars. This is because those are in WEB-INF/lib and
|
14
|
-
# already set in the classpath.
|
15
|
-
unless $LOAD_PATH.to_s.include?("appengine-rack")
|
16
|
-
require 'stringio'
|
17
|
-
require 'isorelax.jar'
|
18
|
-
require 'jing.jar'
|
19
|
-
require 'nekohtml.jar'
|
20
|
-
require 'nekodtd.jar'
|
21
|
-
require 'xercesImpl.jar'
|
22
|
-
require 'serializer.jar'
|
23
|
-
require 'xalan.jar'
|
24
|
-
require 'xml-apis.jar'
|
25
|
-
end
|
5
|
+
require_relative "nokogiri/jruby/dependencies"
|
26
6
|
end
|
27
7
|
|
28
|
-
|
29
|
-
RUBY_VERSION =~ /(\d+\.\d+)/
|
30
|
-
require "nokogiri/#{$1}/nokogiri"
|
31
|
-
rescue LoadError
|
32
|
-
require 'nokogiri/nokogiri'
|
33
|
-
end
|
34
|
-
require 'nokogiri/version'
|
35
|
-
require 'nokogiri/syntax_error'
|
36
|
-
require 'nokogiri/xml'
|
37
|
-
require 'nokogiri/xslt'
|
38
|
-
require 'nokogiri/html'
|
39
|
-
require 'nokogiri/decorators/slop'
|
40
|
-
require 'nokogiri/css'
|
41
|
-
require 'nokogiri/html/builder'
|
8
|
+
require_relative "nokogiri/extension"
|
42
9
|
|
43
10
|
# Nokogiri parses and searches XML/HTML very quickly, and also has
|
44
11
|
# correctly implemented CSS3 selector support as well as XPath 1.0
|
45
12
|
# support.
|
46
13
|
#
|
47
14
|
# Parsing a document returns either a Nokogiri::XML::Document, or a
|
48
|
-
# Nokogiri::
|
15
|
+
# Nokogiri::HTML4::Document depending on the kind of document you parse.
|
49
16
|
#
|
50
17
|
# Here is an example:
|
51
18
|
#
|
52
|
-
#
|
53
|
-
#
|
19
|
+
# require 'nokogiri'
|
20
|
+
# require 'open-uri'
|
21
|
+
#
|
22
|
+
# # Get a Nokogiri::HTML4::Document for the page we’re interested in...
|
54
23
|
#
|
55
|
-
#
|
24
|
+
# doc = Nokogiri::HTML4(URI.open('http://www.google.com/search?q=tenderlove'))
|
56
25
|
#
|
57
|
-
#
|
26
|
+
# # Do funky things with it using Nokogiri::XML::Node methods...
|
58
27
|
#
|
59
|
-
#
|
28
|
+
# ####
|
29
|
+
# # Search for nodes by css
|
30
|
+
# doc.css('h3.r a.l').each do |link|
|
31
|
+
# puts link.content
|
32
|
+
# end
|
60
33
|
#
|
61
|
-
#
|
62
|
-
# # Search for nodes by css
|
63
|
-
# doc.css('h3.r a.l').each do |link|
|
64
|
-
# puts link.content
|
65
|
-
# end
|
34
|
+
# See also:
|
66
35
|
#
|
67
|
-
#
|
68
|
-
#
|
36
|
+
# - Nokogiri::XML::Searchable#css for more information about CSS searching
|
37
|
+
# - Nokogiri::XML::Searchable#xpath for more information about XPath searching
|
69
38
|
module Nokogiri
|
70
39
|
class << self
|
71
40
|
###
|
72
41
|
# Parse an HTML or XML document. +string+ contains the document.
|
73
|
-
def parse
|
42
|
+
def parse(string, url = nil, encoding = nil, options = nil)
|
74
43
|
if string.respond_to?(:read) ||
|
75
|
-
/^\s*<(?:!DOCTYPE\s+)?html[\s>]/i
|
44
|
+
/^\s*<(?:!DOCTYPE\s+)?html[\s>]/i.match?(string[0, 512])
|
76
45
|
# Expect an HTML indicator to appear within the first 512
|
77
46
|
# characters of a document. (<?xml ?> + <?xml-stylesheet ?>
|
78
47
|
# shouldn't be that long)
|
79
|
-
Nokogiri.
|
80
|
-
|
48
|
+
Nokogiri.HTML4(
|
49
|
+
string,
|
50
|
+
url,
|
51
|
+
encoding,
|
52
|
+
options || XML::ParseOptions::DEFAULT_HTML,
|
53
|
+
)
|
81
54
|
else
|
82
|
-
Nokogiri.XML(
|
83
|
-
|
84
|
-
|
55
|
+
Nokogiri.XML(
|
56
|
+
string,
|
57
|
+
url,
|
58
|
+
encoding,
|
59
|
+
options || XML::ParseOptions::DEFAULT_XML,
|
60
|
+
)
|
61
|
+
end.tap do |doc|
|
85
62
|
yield doc if block_given?
|
86
|
-
|
63
|
+
end
|
87
64
|
end
|
88
65
|
|
89
66
|
###
|
90
67
|
# Create a new Nokogiri::XML::DocumentFragment
|
91
|
-
def make
|
68
|
+
def make(input = nil, opts = {}, &blk)
|
92
69
|
if input
|
93
|
-
Nokogiri::
|
70
|
+
Nokogiri::HTML4.fragment(input).children.first
|
94
71
|
else
|
95
72
|
Nokogiri(&blk)
|
96
73
|
end
|
@@ -115,30 +92,37 @@ module Nokogiri
|
|
115
92
|
Nokogiri(*args, &block).slop!
|
116
93
|
end
|
117
94
|
|
95
|
+
# :nodoc:
|
118
96
|
def install_default_aliases
|
119
|
-
|
120
|
-
|
121
|
-
{
|
122
|
-
'Windows-31J' => 'CP932', # Windows-31J is the IANA registered name of CP932.
|
123
|
-
}.each { |alias_name, name|
|
124
|
-
EncodingHandler.alias(name, alias_name) if EncodingHandler[alias_name].nil?
|
125
|
-
}
|
97
|
+
warn("Nokogiri.install_default_aliases is deprecated. Please call Nokogiri::EncodingHandler.install_default_aliases instead. This will become an error in Nokogiri v1.17.0.", uplevel: 1, category: :deprecated) # deprecated in v1.14.0, remove in v1.17.0
|
98
|
+
Nokogiri::EncodingHandler.install_default_aliases
|
126
99
|
end
|
127
100
|
end
|
128
|
-
|
129
|
-
Nokogiri.install_default_aliases
|
130
101
|
end
|
131
102
|
|
132
103
|
###
|
133
|
-
#
|
134
|
-
#
|
135
|
-
# Nokogiri.parse
|
104
|
+
# Parse a document contained in +args+. Nokogiri will try to guess what type of document you are
|
105
|
+
# attempting to parse. For more information, see Nokogiri.parse
|
136
106
|
#
|
137
|
-
# To specify the type of document, use Nokogiri.XML or Nokogiri.
|
107
|
+
# To specify the type of document, use {Nokogiri.XML}, {Nokogiri.HTML4}, or {Nokogiri.HTML5}.
|
138
108
|
def Nokogiri(*args, &block)
|
139
|
-
if
|
140
|
-
Nokogiri::
|
109
|
+
if block
|
110
|
+
Nokogiri::HTML4::Builder.new(&block).doc.root
|
141
111
|
else
|
142
112
|
Nokogiri.parse(*args)
|
143
113
|
end
|
144
114
|
end
|
115
|
+
|
116
|
+
require_relative "nokogiri/version"
|
117
|
+
require_relative "nokogiri/class_resolver"
|
118
|
+
require_relative "nokogiri/syntax_error"
|
119
|
+
require_relative "nokogiri/xml"
|
120
|
+
require_relative "nokogiri/xslt"
|
121
|
+
require_relative "nokogiri/html4"
|
122
|
+
require_relative "nokogiri/html"
|
123
|
+
require_relative "nokogiri/decorators/slop"
|
124
|
+
require_relative "nokogiri/css"
|
125
|
+
require_relative "nokogiri/html4/builder"
|
126
|
+
require_relative "nokogiri/encoding_handler"
|
127
|
+
|
128
|
+
require_relative "nokogiri/html5" if Nokogiri.uses_gumbo?
|
@@ -1,14 +1,15 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
2
|
|
3
|
-
|
4
|
-
|
3
|
+
require "nokogiri"
|
4
|
+
|
5
|
+
module XSD
|
6
|
+
module XMLParser
|
5
7
|
###
|
6
8
|
# Nokogiri XML parser for soap4r.
|
7
9
|
#
|
8
|
-
# Nokogiri may be used as the XML parser in soap4r.
|
9
|
-
#
|
10
|
-
#
|
11
|
-
# required to use Nokogiri as the XML parser.
|
10
|
+
# Nokogiri may be used as the XML parser in soap4r. Require 'xsd/xmlparser/nokogiri' in your
|
11
|
+
# soap4r applications, and soap4r will use Nokogiri as its XML parser. No other changes should
|
12
|
+
# be required to use Nokogiri as the XML parser.
|
12
13
|
#
|
13
14
|
# Example (using UW ITS Web Services):
|
14
15
|
#
|
@@ -26,40 +27,40 @@ module XSD # :nodoc:
|
|
26
27
|
class Nokogiri < XSD::XMLParser::Parser
|
27
28
|
###
|
28
29
|
# Create a new XSD parser with +host+ and +opt+
|
29
|
-
def initialize
|
30
|
+
def initialize(host, opt = {})
|
30
31
|
super
|
31
|
-
@parser = ::Nokogiri::XML::SAX::Parser.new(self, @charset ||
|
32
|
+
@parser = ::Nokogiri::XML::SAX::Parser.new(self, @charset || "UTF-8")
|
32
33
|
end
|
33
34
|
|
34
35
|
###
|
35
36
|
# Start parsing +string_or_readable+
|
36
|
-
def do_parse
|
37
|
+
def do_parse(string_or_readable)
|
37
38
|
@parser.parse(string_or_readable)
|
38
39
|
end
|
39
40
|
|
40
41
|
###
|
41
42
|
# Handle the start_element event with +name+ and +attrs+
|
42
|
-
def start_element
|
43
|
+
def start_element(name, attrs = [])
|
43
44
|
super(name, Hash[*attrs.flatten])
|
44
45
|
end
|
45
46
|
|
46
47
|
###
|
47
48
|
# Handle the end_element event with +name+
|
48
|
-
def end_element
|
49
|
+
def end_element(name)
|
49
50
|
super
|
50
51
|
end
|
51
52
|
|
52
53
|
###
|
53
54
|
# Handle errors with message +msg+
|
54
|
-
def error
|
55
|
-
raise ParseError
|
55
|
+
def error(msg)
|
56
|
+
raise ParseError, msg
|
56
57
|
end
|
57
|
-
|
58
|
+
alias_method :warning, :error
|
58
59
|
|
59
60
|
###
|
60
61
|
# Handle cdata_blocks containing +string+
|
61
|
-
def cdata_block
|
62
|
-
characters
|
62
|
+
def cdata_block(string)
|
63
|
+
characters(string)
|
63
64
|
end
|
64
65
|
|
65
66
|
###
|
@@ -69,16 +70,16 @@ module XSD # :nodoc:
|
|
69
70
|
# +prefix+ is the namespace prefix for the element
|
70
71
|
# +uri+ is the associated namespace URI
|
71
72
|
# +ns+ is a hash of namespace prefix:urls associated with the element
|
72
|
-
def start_element_namespace
|
73
|
+
def start_element_namespace(name, attrs = [], prefix = nil, uri = nil, ns = []) # rubocop:disable Metrics/ParameterLists
|
73
74
|
###
|
74
75
|
# Deal with SAX v1 interface
|
75
|
-
name = [prefix, name].compact.join(
|
76
|
-
attributes = ns.map
|
77
|
-
[[
|
78
|
-
|
79
|
-
[[attr.prefix, attr.localname].compact.join(
|
80
|
-
|
81
|
-
start_element
|
76
|
+
name = [prefix, name].compact.join(":")
|
77
|
+
attributes = ns.map do |ns_prefix, ns_uri|
|
78
|
+
[["xmlns", ns_prefix].compact.join(":"), ns_uri]
|
79
|
+
end + attrs.map do |attr|
|
80
|
+
[[attr.prefix, attr.localname].compact.join(":"), attr.value]
|
81
|
+
end.flatten
|
82
|
+
start_element(name, attributes)
|
82
83
|
end
|
83
84
|
|
84
85
|
###
|
@@ -86,14 +87,16 @@ module XSD # :nodoc:
|
|
86
87
|
# +name+ is the element's name
|
87
88
|
# +prefix+ is the namespace prefix associated with the element
|
88
89
|
# +uri+ is the associated namespace URI
|
89
|
-
def end_element_namespace
|
90
|
+
def end_element_namespace(name, prefix = nil, uri = nil)
|
90
91
|
###
|
91
92
|
# Deal with SAX v1 interface
|
92
|
-
end_element
|
93
|
+
end_element([prefix, name].compact.join(":"))
|
93
94
|
end
|
94
95
|
|
95
|
-
|
96
|
-
class_eval
|
96
|
+
["xmldecl", "start_document", "end_document", "comment"].each do |name|
|
97
|
+
class_eval <<~RUBY, __FILE__, __LINE__ + 1
|
98
|
+
def #{name}(*args); end
|
99
|
+
RUBY
|
97
100
|
end
|
98
101
|
|
99
102
|
add_factory(self)
|
@@ -0,0 +1,77 @@
|
|
1
|
+
From 74c95ec5932c737d4fcb06b8646b0017364ada14 Mon Sep 17 00:00:00 2001
|
2
|
+
From: Mike Dalessio <mike.dalessio@gmail.com>
|
3
|
+
Date: Fri, 24 Dec 2021 19:08:01 -0500
|
4
|
+
Subject: [PATCH] attempt to hack in wildcard namespaces to xpath
|
5
|
+
|
6
|
+
I'm not confident this is a bulletproof patch.
|
7
|
+
---
|
8
|
+
xpath.c | 24 ++++++++++++++++++------
|
9
|
+
1 file changed, 18 insertions(+), 6 deletions(-)
|
10
|
+
|
11
|
+
diff --git a/xpath.c b/xpath.c
|
12
|
+
index 1aa2f1a..c7f0885 100644
|
13
|
+
--- a/xpath.c
|
14
|
+
+++ b/xpath.c
|
15
|
+
@@ -146,6 +146,9 @@
|
16
|
+
#define XPATH_MAX_RECURSION_DEPTH 5000
|
17
|
+
#endif
|
18
|
+
|
19
|
+
+#define WILDCARD_PREFIX "*"
|
20
|
+
+#define IS_WILDCARD_PREFIX(p) xmlStrEqual((xmlChar*)WILDCARD_PREFIX, p)
|
21
|
+
+
|
22
|
+
/*
|
23
|
+
* TODO:
|
24
|
+
* There are a few spots where some tests are done which depend upon ascii
|
25
|
+
@@ -11073,12 +11076,15 @@ xmlXPathCompNodeTest(xmlXPathParserContextPtr ctxt, xmlXPathTestVal *test,
|
26
|
+
SKIP_BLANKS;
|
27
|
+
|
28
|
+
if ((name == NULL) && (CUR == '*')) {
|
29
|
+
- /*
|
30
|
+
- * All elements
|
31
|
+
- */
|
32
|
+
NEXT;
|
33
|
+
- *test = NODE_TEST_ALL;
|
34
|
+
- return(NULL);
|
35
|
+
+ if (CUR != ':') {
|
36
|
+
+ /*
|
37
|
+
+ * All elements
|
38
|
+
+ */
|
39
|
+
+ *test = NODE_TEST_ALL;
|
40
|
+
+ return(NULL);
|
41
|
+
+ }
|
42
|
+
+ name = xmlCharStrdup(WILDCARD_PREFIX);
|
43
|
+
}
|
44
|
+
|
45
|
+
if (name == NULL)
|
46
|
+
@@ -11327,6 +11333,10 @@ xmlXPathCompStep(xmlXPathParserContextPtr ctxt) {
|
47
|
+
}
|
48
|
+
#endif
|
49
|
+
if (CUR == '*') {
|
50
|
+
+ if (NXT(1) == ':') {
|
51
|
+
+ NEXT;
|
52
|
+
+ name = xmlCharStrdup(WILDCARD_PREFIX);
|
53
|
+
+ }
|
54
|
+
axis = AXIS_CHILD;
|
55
|
+
} else {
|
56
|
+
if (name == NULL)
|
57
|
+
@@ -12030,7 +12040,7 @@ xmlXPathNodeCollectAndTest(xmlXPathParserContextPtr ctxt,
|
58
|
+
/*
|
59
|
+
* Setup namespaces.
|
60
|
+
*/
|
61
|
+
- if (prefix != NULL) {
|
62
|
+
+ if (prefix != NULL && !IS_WILDCARD_PREFIX(prefix)) {
|
63
|
+
URI = xmlXPathNsLookup(xpctxt, prefix);
|
64
|
+
if (URI == NULL) {
|
65
|
+
xmlXPathReleaseObject(xpctxt, obj);
|
66
|
+
@@ -12369,6 +12379,8 @@ xmlXPathNodeCollectAndTest(xmlXPathParserContextPtr ctxt,
|
67
|
+
{
|
68
|
+
XP_TEST_HIT
|
69
|
+
}
|
70
|
+
+ } else if (IS_WILDCARD_PREFIX(prefix)) {
|
71
|
+
+ XP_TEST_HIT
|
72
|
+
} else {
|
73
|
+
if ((cur->ns != NULL) &&
|
74
|
+
(xmlStrEqual(URI, cur->ns->href)))
|
75
|
+
--
|
76
|
+
2.31.0
|
77
|
+
|