nokogiri 1.13.8 → 1.15.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (120) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +40 -0
  3. data/LICENSE-DEPENDENCIES.md +830 -509
  4. data/LICENSE.md +1 -1
  5. data/README.md +18 -11
  6. data/dependencies.yml +33 -15
  7. data/ext/nokogiri/extconf.rb +164 -46
  8. data/ext/nokogiri/gumbo.c +20 -10
  9. data/ext/nokogiri/html4_document.c +3 -4
  10. data/ext/nokogiri/html4_element_description.c +20 -15
  11. data/ext/nokogiri/html4_entity_lookup.c +2 -2
  12. data/ext/nokogiri/html4_sax_parser_context.c +11 -22
  13. data/ext/nokogiri/html4_sax_push_parser.c +3 -3
  14. data/ext/nokogiri/nokogiri.c +84 -75
  15. data/ext/nokogiri/nokogiri.h +31 -16
  16. data/ext/nokogiri/test_global_handlers.c +1 -1
  17. data/ext/nokogiri/xml_attr.c +2 -2
  18. data/ext/nokogiri/xml_attribute_decl.c +2 -2
  19. data/ext/nokogiri/xml_cdata.c +32 -18
  20. data/ext/nokogiri/xml_comment.c +2 -2
  21. data/ext/nokogiri/xml_document.c +127 -34
  22. data/ext/nokogiri/xml_document_fragment.c +2 -2
  23. data/ext/nokogiri/xml_dtd.c +2 -2
  24. data/ext/nokogiri/xml_element_content.c +34 -31
  25. data/ext/nokogiri/xml_element_decl.c +7 -7
  26. data/ext/nokogiri/xml_encoding_handler.c +15 -7
  27. data/ext/nokogiri/xml_entity_decl.c +1 -1
  28. data/ext/nokogiri/xml_entity_reference.c +2 -2
  29. data/ext/nokogiri/xml_namespace.c +79 -14
  30. data/ext/nokogiri/xml_node.c +300 -34
  31. data/ext/nokogiri/xml_node_set.c +125 -107
  32. data/ext/nokogiri/xml_processing_instruction.c +2 -2
  33. data/ext/nokogiri/xml_reader.c +81 -48
  34. data/ext/nokogiri/xml_relax_ng.c +66 -81
  35. data/ext/nokogiri/xml_sax_parser.c +45 -20
  36. data/ext/nokogiri/xml_sax_parser_context.c +46 -30
  37. data/ext/nokogiri/xml_sax_push_parser.c +30 -11
  38. data/ext/nokogiri/xml_schema.c +95 -117
  39. data/ext/nokogiri/xml_syntax_error.c +1 -1
  40. data/ext/nokogiri/xml_text.c +28 -14
  41. data/ext/nokogiri/xml_xpath_context.c +216 -136
  42. data/ext/nokogiri/xslt_stylesheet.c +118 -64
  43. data/gumbo-parser/Makefile +10 -0
  44. data/gumbo-parser/src/attribute.h +1 -1
  45. data/gumbo-parser/src/error.c +10 -6
  46. data/gumbo-parser/src/error.h +1 -1
  47. data/gumbo-parser/src/foreign_attrs.c +15 -16
  48. data/gumbo-parser/src/foreign_attrs.gperf +1 -1
  49. data/gumbo-parser/src/{gumbo.h → nokogiri_gumbo.h} +1 -0
  50. data/gumbo-parser/src/parser.c +21 -5
  51. data/gumbo-parser/src/replacement.h +1 -1
  52. data/gumbo-parser/src/string_buffer.h +1 -1
  53. data/gumbo-parser/src/string_piece.c +1 -1
  54. data/gumbo-parser/src/svg_attrs.c +2 -2
  55. data/gumbo-parser/src/svg_tags.c +2 -2
  56. data/gumbo-parser/src/tag.c +2 -1
  57. data/gumbo-parser/src/tag_lookup.c +7 -7
  58. data/gumbo-parser/src/tag_lookup.gperf +1 -0
  59. data/gumbo-parser/src/tag_lookup.h +1 -1
  60. data/gumbo-parser/src/token_buffer.h +1 -1
  61. data/gumbo-parser/src/tokenizer.c +1 -1
  62. data/gumbo-parser/src/tokenizer.h +1 -1
  63. data/gumbo-parser/src/utf8.c +1 -1
  64. data/gumbo-parser/src/utf8.h +1 -1
  65. data/gumbo-parser/src/util.c +1 -3
  66. data/gumbo-parser/src/util.h +4 -0
  67. data/gumbo-parser/src/vector.h +1 -1
  68. data/lib/nokogiri/css/node.rb +2 -2
  69. data/lib/nokogiri/css/xpath_visitor.rb +7 -5
  70. data/lib/nokogiri/css.rb +6 -0
  71. data/lib/nokogiri/decorators/slop.rb +1 -1
  72. data/lib/nokogiri/encoding_handler.rb +57 -0
  73. data/lib/nokogiri/extension.rb +4 -3
  74. data/lib/nokogiri/html4/document.rb +2 -121
  75. data/lib/nokogiri/html4/document_fragment.rb +1 -1
  76. data/lib/nokogiri/html4/element_description_defaults.rb +1827 -365
  77. data/lib/nokogiri/html4/encoding_reader.rb +121 -0
  78. data/lib/nokogiri/html4.rb +1 -0
  79. data/lib/nokogiri/html5/document.rb +113 -36
  80. data/lib/nokogiri/html5/document_fragment.rb +10 -3
  81. data/lib/nokogiri/html5/node.rb +8 -5
  82. data/lib/nokogiri/html5.rb +130 -216
  83. data/lib/nokogiri/jruby/dependencies.rb +1 -19
  84. data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
  85. data/lib/nokogiri/version/constant.rb +1 -1
  86. data/lib/nokogiri/version/info.rb +11 -10
  87. data/lib/nokogiri/xml/attr.rb +49 -0
  88. data/lib/nokogiri/xml/attribute_decl.rb +4 -2
  89. data/lib/nokogiri/xml/builder.rb +1 -1
  90. data/lib/nokogiri/xml/document.rb +102 -55
  91. data/lib/nokogiri/xml/document_fragment.rb +50 -7
  92. data/lib/nokogiri/xml/element_content.rb +10 -2
  93. data/lib/nokogiri/xml/element_decl.rb +4 -2
  94. data/lib/nokogiri/xml/entity_decl.rb +4 -2
  95. data/lib/nokogiri/xml/namespace.rb +42 -0
  96. data/lib/nokogiri/xml/node/save_options.rb +14 -4
  97. data/lib/nokogiri/xml/node.rb +212 -48
  98. data/lib/nokogiri/xml/node_set.rb +88 -9
  99. data/lib/nokogiri/xml/parse_options.rb +129 -50
  100. data/lib/nokogiri/xml/pp/node.rb +28 -15
  101. data/lib/nokogiri/xml/processing_instruction.rb +2 -1
  102. data/lib/nokogiri/xml/sax/document.rb +1 -1
  103. data/lib/nokogiri/xml/sax/parser.rb +2 -3
  104. data/lib/nokogiri/xml/searchable.rb +18 -10
  105. data/lib/nokogiri/xslt.rb +74 -4
  106. data/lib/nokogiri.rb +15 -15
  107. data/lib/xsd/xmlparser/nokogiri.rb +4 -2
  108. data/patches/libxml2/0010-update-config.guess-and-config.sub-for-libxml2.patch +224 -0
  109. data/patches/libxml2/0011-rip-out-libxml2-s-libc_single_threaded-support.patch +30 -0
  110. data/patches/libxslt/0001-update-config.guess-and-config.sub-for-libxslt.patch +224 -0
  111. data/ports/archives/libxml2-2.11.7.tar.xz +0 -0
  112. data/ports/archives/libxslt-1.1.39.tar.xz +0 -0
  113. metadata +19 -242
  114. data/patches/libxml2/0004-use-glibc-strlen.patch +0 -53
  115. data/patches/libxml2/0005-avoid-isnan-isinf.patch +0 -81
  116. data/patches/libxml2/0006-update-automake-files-for-arm64.patch +0 -3040
  117. data/patches/libxml2/0008-htmlParseComment-handle-abruptly-closed-comments.patch +0 -61
  118. data/patches/libxslt/0001-update-automake-files-for-arm64.patch +0 -3037
  119. data/ports/archives/libxml2-2.9.14.tar.xz +0 -0
  120. data/ports/archives/libxslt-1.1.35.tar.xz +0 -0
@@ -1,84 +1,163 @@
1
+ # coding: utf-8
1
2
  # frozen_string_literal: true
2
3
 
3
4
  module Nokogiri
4
5
  module XML
5
- ###
6
- # Parse options for passing to Nokogiri.XML or Nokogiri.HTML
7
- #
8
- # == Building combinations of parse options
9
- # You can build your own combinations of these parse options by using any of the following methods:
10
- # *Note*: All examples attempt to set the +RECOVER+ & +NOENT+ options.
11
- # [Ruby's bitwise operators] You can use the Ruby bitwise operators to set various combinations.
12
- # Nokogiri.XML('<content>Chapter 1</content', nil, nil, Nokogiri::XML::ParseOptions.new((1 << 0) | (1 << 1)))
13
- # [Method chaining] Every option has an equivalent method in lowercase. You can chain these methods together to set various combinations.
14
- # Nokogiri.XML('<content>Chapter 1</content', nil, nil, Nokogiri::XML::ParseOptions.new.recover.noent)
15
- # [Using Ruby Blocks] You can also setup parse combinations in the block passed to Nokogiri.XML or Nokogiri.HTML
16
- # Nokogiri.XML('<content>Chapter 1</content') {|config| config.recover.noent}
17
- #
18
- # == Removing particular parse options
19
- # You can also remove options from an instance of +ParseOptions+ dynamically.
20
- # Every option has an equivalent <code>no{option}</code> method in lowercase. You can call these methods on an instance of +ParseOptions+ to remove the option.
21
- # Note that this is not available for +STRICT+.
22
- #
23
- # # Setting the RECOVER & NOENT options...
24
- # options = Nokogiri::XML::ParseOptions.new.recover.noent
25
- # # later...
26
- # options.norecover # Removes the Nokogiri::XML::ParseOptions::RECOVER option
27
- # options.nonoent # Removes the Nokogiri::XML::ParseOptions::NOENT option
6
+ # Options that control the parsing behavior for XML::Document, XML::DocumentFragment,
7
+ # HTML4::Document, HTML4::DocumentFragment, XSLT::Stylesheet, and XML::Schema.
8
+ #
9
+ # These options directly expose libxml2's parse options, which are all boolean in the sense that
10
+ # an option is "on" or "off".
11
+ #
12
+ # 💡 Note that HTML5 parsing has a separate, orthogonal set of options due to the nature of the
13
+ # HTML5 specification. See Nokogiri::HTML5.
14
+ #
15
+ # ⚠ Not all parse options are supported on JRuby. Nokogiri will attempt to invoke the equivalent
16
+ # behavior in Xerces/NekoHTML on JRuby when it's possible.
17
+ #
18
+ # == Setting and unsetting parse options
19
+ #
20
+ # You can build your own combinations of parse options by using any of the following methods:
21
+ #
22
+ # [ParseOptions method chaining]
23
+ #
24
+ # Every option has an equivalent method in lowercase. You can chain these methods together to
25
+ # set various combinations.
26
+ #
27
+ # # Set the HUGE & PEDANTIC options
28
+ # po = Nokogiri::XML::ParseOptions.new.huge.pedantic
29
+ # doc = Nokogiri::XML::Document.parse(xml, nil, nil, po)
30
+ #
31
+ # Every option has an equivalent <code>no{option}</code> method in lowercase. You can call these
32
+ # methods on an instance of ParseOptions to unset the option.
33
+ #
34
+ # # Set the HUGE & PEDANTIC options
35
+ # po = Nokogiri::XML::ParseOptions.new.huge.pedantic
36
+ #
37
+ # # later we want to modify the options
38
+ # po.nohuge # Unset the HUGE option
39
+ # po.nopedantic # Unset the PEDANTIC option
40
+ #
41
+ # 💡 Note that some options begin with "no" leading to the logical but perhaps unintuitive
42
+ # double negative:
43
+ #
44
+ # po.nocdata # Set the NOCDATA parse option
45
+ # po.nonocdata # Unset the NOCDATA parse option
46
+ #
47
+ # 💡 Note that negation is not available for STRICT, which is itself a negation of all other
48
+ # features.
49
+ #
50
+ #
51
+ # [Using Ruby Blocks]
52
+ #
53
+ # Most parsing methods will accept a block for configuration of parse options, and we
54
+ # recommend chaining the setter methods:
55
+ #
56
+ # doc = Nokogiri::XML::Document.parse(xml) { |config| config.huge.pedantic }
57
+ #
58
+ #
59
+ # [ParseOptions constants]
60
+ #
61
+ # You can also use the constants declared under Nokogiri::XML::ParseOptions to set various
62
+ # combinations. They are bits in a bitmask, and so can be combined with bitwise operators:
63
+ #
64
+ # po = Nokogiri::XML::ParseOptions.new(Nokogiri::XML::ParseOptions::HUGE | Nokogiri::XML::ParseOptions::PEDANTIC)
65
+ # doc = Nokogiri::XML::Document.parse(xml, nil, nil, po)
28
66
  #
29
67
  class ParseOptions
30
68
  # Strict parsing
31
69
  STRICT = 0
32
- # Recover from errors
70
+
71
+ # Recover from errors. On by default for XML::Document, XML::DocumentFragment,
72
+ # HTML4::Document, HTML4::DocumentFragment, XSLT::Stylesheet, and XML::Schema.
33
73
  RECOVER = 1 << 0
34
- # Substitute entities
74
+
75
+ # Substitute entities. Off by default.
76
+ #
77
+ # ⚠ This option enables entity substitution, contrary to what the name implies.
78
+ #
79
+ # ⚠ <b>It is UNSAFE to set this option</b> when parsing untrusted documents.
35
80
  NOENT = 1 << 1
36
- # Load external subsets
81
+
82
+ # Load external subsets. On by default for XSLT::Stylesheet.
83
+ #
84
+ # ⚠ <b>It is UNSAFE to set this option</b> when parsing untrusted documents.
37
85
  DTDLOAD = 1 << 2
38
- # Default DTD attributes
86
+
87
+ # Default DTD attributes. On by default for XSLT::Stylesheet.
39
88
  DTDATTR = 1 << 3
40
- # validate with the DTD
89
+
90
+ # Validate with the DTD. Off by default.
41
91
  DTDVALID = 1 << 4
42
- # suppress error reports
92
+
93
+ # Suppress error reports. On by default for HTML4::Document and HTML4::DocumentFragment
43
94
  NOERROR = 1 << 5
44
- # suppress warning reports
95
+
96
+ # Suppress warning reports. On by default for HTML4::Document and HTML4::DocumentFragment
45
97
  NOWARNING = 1 << 6
46
- # pedantic error reporting
98
+
99
+ # Enable pedantic error reporting. Off by default.
47
100
  PEDANTIC = 1 << 7
48
- # remove blank nodes
101
+
102
+ # Remove blank nodes. Off by default.
49
103
  NOBLANKS = 1 << 8
50
- # use the SAX1 interface internally
104
+
105
+ # Use the SAX1 interface internally. Off by default.
51
106
  SAX1 = 1 << 9
52
- # Implement XInclude substitution
107
+
108
+ # Implement XInclude substitution. Off by default.
53
109
  XINCLUDE = 1 << 10
54
- # Forbid network access. Recommended for dealing with untrusted documents.
110
+
111
+ # Forbid network access. On by default for XML::Document, XML::DocumentFragment,
112
+ # HTML4::Document, HTML4::DocumentFragment, XSLT::Stylesheet, and XML::Schema.
113
+ #
114
+ # ⚠ <b>It is UNSAFE to unset this option</b> when parsing untrusted documents.
55
115
  NONET = 1 << 11
56
- # Do not reuse the context dictionary
116
+
117
+ # Do not reuse the context dictionary. Off by default.
57
118
  NODICT = 1 << 12
58
- # remove redundant namespaces declarations
119
+
120
+ # Remove redundant namespaces declarations. Off by default.
59
121
  NSCLEAN = 1 << 13
60
- # merge CDATA as text nodes
122
+
123
+ # Merge CDATA as text nodes. On by default for XSLT::Stylesheet.
61
124
  NOCDATA = 1 << 14
62
- # do not generate XINCLUDE START/END nodes
125
+
126
+ # Do not generate XInclude START/END nodes. Off by default.
63
127
  NOXINCNODE = 1 << 15
64
- # compact small text nodes; no modification of the tree allowed afterwards (will possibly crash if you try to modify the tree)
128
+
129
+ # Compact small text nodes. Off by default.
130
+ #
131
+ # ⚠ No modification of the DOM tree is allowed after parsing. libxml2 may crash if you try to
132
+ # modify the tree.
65
133
  COMPACT = 1 << 16
66
- # parse using XML-1.0 before update 5
134
+
135
+ # Parse using XML-1.0 before update 5. Off by default
67
136
  OLD10 = 1 << 17
68
- # do not fixup XINCLUDE xml:base uris
137
+
138
+ # Do not fixup XInclude xml:base uris. Off by default
69
139
  NOBASEFIX = 1 << 18
70
- # relax any hardcoded limit from the parser
140
+
141
+ # Relax any hardcoded limit from the parser. Off by default.
142
+ #
143
+ # ⚠ There may be a performance penalty when this option is set.
71
144
  HUGE = 1 << 19
72
- # line numbers stored as long int (instead of a short int)
145
+
146
+ # Support line numbers up to <code>long int</code> (default is a <code>short int</code>). On
147
+ # by default for for XML::Document, XML::DocumentFragment, HTML4::Document,
148
+ # HTML4::DocumentFragment, XSLT::Stylesheet, and XML::Schema.
73
149
  BIG_LINES = 1 << 22
74
150
 
75
- # the default options used for parsing XML documents
151
+ # The options mask used by default for parsing XML::Document and XML::DocumentFragment
76
152
  DEFAULT_XML = RECOVER | NONET | BIG_LINES
77
- # the default options used for parsing XSLT stylesheets
153
+
154
+ # The options mask used by default used for parsing XSLT::Stylesheet
78
155
  DEFAULT_XSLT = RECOVER | NONET | NOENT | DTDLOAD | DTDATTR | NOCDATA | BIG_LINES
79
- # the default options used for parsing HTML documents
156
+
157
+ # The options mask used by default used for parsing HTML4::Document and HTML4::DocumentFragment
80
158
  DEFAULT_HTML = RECOVER | NOERROR | NOWARNING | NONET | BIG_LINES
81
- # the default options used for parsing XML schemas
159
+
160
+ # The options mask used by default used for parsing XML::Schema
82
161
  DEFAULT_SCHEMA = NONET | BIG_LINES
83
162
 
84
163
  attr_accessor :options
@@ -90,7 +169,7 @@ module Nokogiri
90
169
  constants.each do |constant|
91
170
  next if constant.to_sym == :STRICT
92
171
 
93
- class_eval %{
172
+ class_eval <<~RUBY, __FILE__, __LINE__ + 1
94
173
  def #{constant.downcase}
95
174
  @options |= #{constant}
96
175
  self
@@ -104,7 +183,7 @@ module Nokogiri
104
183
  def #{constant.downcase}?
105
184
  #{constant} & @options == #{constant}
106
185
  end
107
- }
186
+ RUBY
108
187
  end
109
188
 
110
189
  def strict
@@ -5,15 +5,22 @@ module Nokogiri
5
5
  # :nodoc: all
6
6
  module PP
7
7
  module Node
8
+ COLLECTIONS = [:attribute_nodes, :children]
9
+
8
10
  def inspect
9
11
  attributes = inspect_attributes.reject do |x|
10
12
  attribute = send(x)
11
13
  !attribute || (attribute.respond_to?(:empty?) && attribute.empty?)
12
14
  rescue NoMethodError
13
15
  true
14
- end.map do |attribute|
15
- "#{attribute.to_s.sub(/_\w+/, "s")}=#{send(attribute).inspect}"
16
- end.join(" ")
16
+ end
17
+ attributes = if inspect_attributes.length == 1
18
+ send(attributes.first).inspect
19
+ else
20
+ attributes.map do |attribute|
21
+ "#{attribute}=#{send(attribute).inspect}"
22
+ end.join(" ")
23
+ end
17
24
  "#<#{self.class.name}:#{format("0x%x", object_id)} #{attributes}>"
18
25
  end
19
26
 
@@ -21,11 +28,12 @@ module Nokogiri
21
28
  nice_name = self.class.name.split("::").last
22
29
  pp.group(2, "#(#{nice_name}:#{format("0x%x", object_id)} {", "})") do
23
30
  pp.breakable
24
- attrs = inspect_attributes.map do |t|
31
+
32
+ attrs = inspect_attributes.filter_map do |t|
25
33
  [t, send(t)] if respond_to?(t)
26
- end.compact.find_all do |x|
34
+ end.find_all do |x|
27
35
  if x.last
28
- if [:attribute_nodes, :children].include?(x.first)
36
+ if COLLECTIONS.include?(x.first)
29
37
  !x.last.empty?
30
38
  else
31
39
  true
@@ -33,19 +41,24 @@ module Nokogiri
33
41
  end
34
42
  end
35
43
 
36
- pp.seplist(attrs) do |v|
37
- if [:attribute_nodes, :children].include?(v.first)
38
- pp.group(2, "#{v.first.to_s.sub(/_\w+$/, "s")} = [", "]") do
39
- pp.breakable
40
- pp.seplist(v.last) do |item|
41
- pp.pp(item)
44
+ if inspect_attributes.length == 1
45
+ pp.pp(attrs.first.last)
46
+ else
47
+ pp.seplist(attrs) do |v|
48
+ if COLLECTIONS.include?(v.first)
49
+ pp.group(2, "#{v.first} = [", "]") do
50
+ pp.breakable
51
+ pp.seplist(v.last) do |item|
52
+ pp.pp(item)
53
+ end
42
54
  end
55
+ else
56
+ pp.text("#{v.first} = ")
57
+ pp.pp(v.last)
43
58
  end
44
- else
45
- pp.text("#{v.first} = ")
46
- pp.pp(v.last)
47
59
  end
48
60
  end
61
+
49
62
  pp.breakable
50
63
  end
51
64
  end
@@ -3,7 +3,8 @@
3
3
  module Nokogiri
4
4
  module XML
5
5
  class ProcessingInstruction < Node
6
- def initialize(document, name, content) # rubocop:disable Style/RedundantInitialize
6
+ def initialize(document, name, content)
7
+ super(document, name)
7
8
  end
8
9
  end
9
10
  end
@@ -100,7 +100,7 @@ module Nokogiri
100
100
  # +prefix+ is the namespace prefix for the element
101
101
  # +uri+ is the associated namespace URI
102
102
  # +ns+ is a hash of namespace prefix:urls associated with the element
103
- def start_element_namespace(name, attrs = [], prefix = nil, uri = nil, ns = [])
103
+ def start_element_namespace(name, attrs = [], prefix = nil, uri = nil, ns = []) # rubocop:disable Metrics/ParameterLists
104
104
  ###
105
105
  # Deal with SAX v1 interface
106
106
  name = [prefix, name].compact.join(":")
@@ -88,9 +88,8 @@ module Nokogiri
88
88
 
89
89
  ###
90
90
  # Parse given +io+
91
- def parse_io(io, encoding = "ASCII")
92
- @encoding = check_encoding(encoding)
93
- ctx = ParserContext.io(io, ENCODINGS[@encoding])
91
+ def parse_io(io, encoding = @encoding)
92
+ ctx = ParserContext.io(io, ENCODINGS[check_encoding(encoding)])
94
93
  yield ctx if block_given?
95
94
  ctx.parse_with(self)
96
95
  end
@@ -36,16 +36,19 @@ module Nokogiri
36
36
  # node.search('.//address[@domestic=$value]', nil, {:value => 'Yes'})
37
37
  #
38
38
  # 💡 Custom XPath functions and CSS pseudo-selectors may also be defined. To define custom
39
- # functions create a class and implement the function you want to define. The first argument
40
- # to the method will be the current matching NodeSet. Any other arguments are ones that you
41
- # pass in. Note that this class may appear anywhere in the argument list. For example:
39
+ # functions create a class and implement the function you want to define, which will be in the
40
+ # `nokogiri` namespace in XPath queries.
41
+ #
42
+ # The first argument to the method will be the current matching NodeSet. Any other arguments
43
+ # are ones that you pass in. Note that this class may appear anywhere in the argument
44
+ # list. For example:
42
45
  #
43
46
  # handler = Class.new {
44
47
  # def regex node_set, regex
45
48
  # node_set.find_all { |node| node['some_attribute'] =~ /#{regex}/ }
46
49
  # end
47
50
  # }.new
48
- # node.search('.//title[regex(., "\w+")]', 'div.employee:regex("[0-9]+")', handler)
51
+ # node.search('.//title[nokogiri:regex(., "\w+")]', 'div.employee:regex("[0-9]+")', handler)
49
52
  #
50
53
  # See Searchable#xpath and Searchable#css for further usage help.
51
54
  def search(*args)
@@ -160,16 +163,18 @@ module Nokogiri
160
163
  # node.xpath('.//address[@domestic=$value]', nil, {:value => 'Yes'})
161
164
  #
162
165
  # 💡 Custom XPath functions may also be defined. To define custom functions create a class and
163
- # implement the function you want to define. The first argument to the method will be the
164
- # current matching NodeSet. Any other arguments are ones that you pass in. Note that this
165
- # class may appear anywhere in the argument list. For example:
166
+ # implement the function you want to define, which will be in the `nokogiri` namespace.
167
+ #
168
+ # The first argument to the method will be the current matching NodeSet. Any other arguments
169
+ # are ones that you pass in. Note that this class may appear anywhere in the argument
170
+ # list. For example:
166
171
  #
167
172
  # handler = Class.new {
168
173
  # def regex(node_set, regex)
169
174
  # node_set.find_all { |node| node['some_attribute'] =~ /#{regex}/ }
170
175
  # end
171
176
  # }.new
172
- # node.xpath('.//title[regex(., "\w+")]', handler)
177
+ # node.xpath('.//title[nokogiri:regex(., "\w+")]', handler)
173
178
  #
174
179
  def xpath(*args)
175
180
  paths, handler, ns, binds = extract_params(args)
@@ -243,8 +248,11 @@ module Nokogiri
243
248
  doctype: document.xpath_doctype,
244
249
  )
245
250
  self.class::IMPLIED_XPATH_CONTEXTS.map do |implied_xpath_context|
246
- CSS.xpath_for(rule.to_s, { prefix: implied_xpath_context, ns: ns,
247
- visitor: visitor, })
251
+ CSS.xpath_for(rule.to_s, {
252
+ prefix: implied_xpath_context,
253
+ ns: ns,
254
+ visitor: visitor,
255
+ })
248
256
  end.join(" | ")
249
257
  end
250
258
 
data/lib/nokogiri/xslt.rb CHANGED
@@ -20,8 +20,59 @@ module Nokogiri
20
20
  # Stylesheet object.
21
21
  module XSLT
22
22
  class << self
23
- ###
24
- # Parse the stylesheet in +string+, register any +modules+
23
+ # :call-seq:
24
+ # parse(xsl) Nokogiri::XSLT::Stylesheet
25
+ # parse(xsl, modules) → Nokogiri::XSLT::Stylesheet
26
+ #
27
+ # Parse the stylesheet in +xsl+, registering optional +modules+ as custom class handlers.
28
+ #
29
+ # [Parameters]
30
+ # - +xsl+ (String) XSL content to be parsed into a stylesheet
31
+ # - +modules+ (Hash<String ⇒ Class>) A hash of URI-to-handler relations for linking a
32
+ # namespace to a custom function handler.
33
+ #
34
+ # ⚠ The XSLT handler classes are registered *globally*.
35
+ #
36
+ # Also see Nokogiri::XSLT.register
37
+ #
38
+ # *Example*
39
+ #
40
+ # xml = Nokogiri.XML(<<~XML)
41
+ # <nodes>
42
+ # <node>Foo</node>
43
+ # <node>Bar</node>
44
+ # </nodes>
45
+ # XML
46
+ #
47
+ # handler = Class.new do
48
+ # def reverse(node)
49
+ # node.text.reverse
50
+ # end
51
+ # end
52
+ #
53
+ # xsl = <<~XSL
54
+ # <xsl:stylesheet version="1.0"
55
+ # xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
56
+ # xmlns:myfuncs="http://nokogiri.org/xslt/myfuncs"
57
+ # extension-element-prefixes="myfuncs">
58
+ # <xsl:template match="/">
59
+ # <reversed>
60
+ # <xsl:for-each select="nodes/node">
61
+ # <reverse><xsl:copy-of select="myfuncs:reverse(.)"/></reverse>
62
+ # </xsl:for-each>
63
+ # </reversed>
64
+ # </xsl:template>
65
+ # </xsl:stylesheet>
66
+ # XSL
67
+ #
68
+ # xsl = Nokogiri.XSLT(xsl, "http://nokogiri.org/xslt/myfuncs" => handler)
69
+ # xsl.transform(xml).to_xml
70
+ # # => "<?xml version=\"1.0\"?>\n" +
71
+ # # "<reversed>\n" +
72
+ # # " <reverse>ooF</reverse>\n" +
73
+ # # " <reverse>raB</reverse>\n" +
74
+ # # "</reversed>\n"
75
+ #
25
76
  def parse(string, modules = {})
26
77
  modules.each do |url, klass|
27
78
  XSLT.register(url, klass)
@@ -47,9 +98,9 @@ module Nokogiri
47
98
  # [Returns] Array of string parameters, with quotes correctly escaped for use with XSLT::Stylesheet.transform
48
99
  #
49
100
  def quote_params(params)
50
- params.flatten.each_slice(2).each_with_object([]) do |kv, quoted_params|
101
+ params.flatten.each_slice(2).with_object([]) do |kv, quoted_params|
51
102
  key, value = kv.map(&:to_s)
52
- value = if /'/.match?(value)
103
+ value = if value.include?("'")
53
104
  "concat('#{value.gsub(/'/, %q{', "'", '})}')"
54
105
  else
55
106
  "'#{value}'"
@@ -58,6 +109,25 @@ module Nokogiri
58
109
  quoted_params << value
59
110
  end
60
111
  end
112
+
113
+ # call-seq:
114
+ # register(uri, custom_handler_class)
115
+ #
116
+ # Register a class that implements custom XSLT transformation functions.
117
+ #
118
+ # ⚠ The XSLT handler classes are registered *globally*.
119
+ #
120
+ # [Parameters}
121
+ # - +uri+ (String) The namespace for the custom handlers
122
+ # - +custom_handler_class+ (Class) A class with ruby methods that can be called during
123
+ # transformation
124
+ #
125
+ # See Nokogiri::XSLT.parse for usage.
126
+ #
127
+ def register(uri, custom_handler_class)
128
+ # NOTE: this is implemented in the C extension, see ext/nokogiri/xslt_stylesheet.c
129
+ raise NotImplementedError, "Nokogiri::XSLT.register is not implemented on JRuby"
130
+ end if Nokogiri.jruby?
61
131
  end
62
132
  end
63
133
  end
data/lib/nokogiri.rb CHANGED
@@ -1,8 +1,6 @@
1
1
  # coding: utf-8
2
2
  # frozen_string_literal: true
3
3
 
4
- require "rbconfig"
5
-
6
4
  if defined?(RUBY_ENGINE) && RUBY_ENGINE == "jruby"
7
5
  require_relative "nokogiri/jruby/dependencies"
8
6
  end
@@ -47,11 +45,19 @@ module Nokogiri
47
45
  # Expect an HTML indicator to appear within the first 512
48
46
  # characters of a document. (<?xml ?> + <?xml-stylesheet ?>
49
47
  # shouldn't be that long)
50
- Nokogiri.HTML4(string, url, encoding,
51
- options || XML::ParseOptions::DEFAULT_HTML)
48
+ Nokogiri.HTML4(
49
+ string,
50
+ url,
51
+ encoding,
52
+ options || XML::ParseOptions::DEFAULT_HTML,
53
+ )
52
54
  else
53
- Nokogiri.XML(string, url, encoding,
54
- options || XML::ParseOptions::DEFAULT_XML)
55
+ Nokogiri.XML(
56
+ string,
57
+ url,
58
+ encoding,
59
+ options || XML::ParseOptions::DEFAULT_XML,
60
+ )
55
61
  end.tap do |doc|
56
62
  yield doc if block_given?
57
63
  end
@@ -88,17 +94,10 @@ module Nokogiri
88
94
 
89
95
  # :nodoc:
90
96
  def install_default_aliases
91
- # Make sure to support some popular encoding aliases not known by
92
- # all iconv implementations.
93
- {
94
- "Windows-31J" => "CP932", # Windows-31J is the IANA registered name of CP932.
95
- }.each do |alias_name, name|
96
- EncodingHandler.alias(name, alias_name) if EncodingHandler[alias_name].nil?
97
- end
97
+ warn("Nokogiri.install_default_aliases is deprecated and will be removed in a future version of Nokogiri. Please call Nokogiri::EncodingHandler.install_default_aliases instead.")
98
+ Nokogiri::EncodingHandler.install_default_aliases
98
99
  end
99
100
  end
100
-
101
- Nokogiri.install_default_aliases
102
101
  end
103
102
 
104
103
  ###
@@ -124,5 +123,6 @@ require_relative "nokogiri/html"
124
123
  require_relative "nokogiri/decorators/slop"
125
124
  require_relative "nokogiri/css"
126
125
  require_relative "nokogiri/html4/builder"
126
+ require_relative "nokogiri/encoding_handler"
127
127
 
128
128
  require_relative "nokogiri/html5" if Nokogiri.uses_gumbo?
@@ -71,7 +71,7 @@ module XSD
71
71
  # +prefix+ is the namespace prefix for the element
72
72
  # +uri+ is the associated namespace URI
73
73
  # +ns+ is a hash of namespace prefix:urls associated with the element
74
- def start_element_namespace(name, attrs = [], prefix = nil, uri = nil, ns = [])
74
+ def start_element_namespace(name, attrs = [], prefix = nil, uri = nil, ns = []) # rubocop:disable Metrics/ParameterLists
75
75
  ###
76
76
  # Deal with SAX v1 interface
77
77
  name = [prefix, name].compact.join(":")
@@ -95,7 +95,9 @@ module XSD
95
95
  end
96
96
 
97
97
  ["xmldecl", "start_document", "end_document", "comment"].each do |name|
98
- class_eval %{ def #{name}(*args); end }
98
+ class_eval <<~RUBY, __FILE__, __LINE__ + 1
99
+ def #{name}(*args); end
100
+ RUBY
99
101
  end
100
102
 
101
103
  add_factory(self)