nokogiri 1.13.6 → 1.16.7

Sign up to get free protection for your applications and to get access to all the features.
Files changed (125) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +43 -0
  3. data/LICENSE-DEPENDENCIES.md +830 -509
  4. data/LICENSE.md +1 -1
  5. data/README.md +21 -11
  6. data/dependencies.yml +34 -15
  7. data/ext/nokogiri/extconf.rb +167 -48
  8. data/ext/nokogiri/gumbo.c +21 -11
  9. data/ext/nokogiri/html4_document.c +3 -4
  10. data/ext/nokogiri/html4_element_description.c +20 -15
  11. data/ext/nokogiri/html4_entity_lookup.c +2 -2
  12. data/ext/nokogiri/html4_sax_parser_context.c +11 -22
  13. data/ext/nokogiri/html4_sax_push_parser.c +4 -4
  14. data/ext/nokogiri/nokogiri.c +84 -75
  15. data/ext/nokogiri/nokogiri.h +46 -16
  16. data/ext/nokogiri/test_global_handlers.c +2 -2
  17. data/ext/nokogiri/xml_attr.c +3 -3
  18. data/ext/nokogiri/xml_attribute_decl.c +5 -5
  19. data/ext/nokogiri/xml_cdata.c +31 -18
  20. data/ext/nokogiri/xml_comment.c +2 -2
  21. data/ext/nokogiri/xml_document.c +135 -38
  22. data/ext/nokogiri/xml_document_fragment.c +2 -2
  23. data/ext/nokogiri/xml_dtd.c +9 -9
  24. data/ext/nokogiri/xml_element_content.c +34 -31
  25. data/ext/nokogiri/xml_element_decl.c +10 -10
  26. data/ext/nokogiri/xml_encoding_handler.c +15 -7
  27. data/ext/nokogiri/xml_entity_decl.c +6 -6
  28. data/ext/nokogiri/xml_entity_reference.c +2 -2
  29. data/ext/nokogiri/xml_namespace.c +75 -14
  30. data/ext/nokogiri/xml_node.c +365 -87
  31. data/ext/nokogiri/xml_node_set.c +129 -111
  32. data/ext/nokogiri/xml_processing_instruction.c +2 -2
  33. data/ext/nokogiri/xml_reader.c +126 -64
  34. data/ext/nokogiri/xml_relax_ng.c +67 -82
  35. data/ext/nokogiri/xml_sax_parser.c +45 -20
  36. data/ext/nokogiri/xml_sax_parser_context.c +50 -30
  37. data/ext/nokogiri/xml_sax_push_parser.c +31 -12
  38. data/ext/nokogiri/xml_schema.c +95 -118
  39. data/ext/nokogiri/xml_syntax_error.c +4 -4
  40. data/ext/nokogiri/xml_text.c +27 -14
  41. data/ext/nokogiri/xml_xpath_context.c +213 -136
  42. data/ext/nokogiri/xslt_stylesheet.c +126 -67
  43. data/gumbo-parser/Makefile +28 -0
  44. data/gumbo-parser/src/attribute.h +1 -1
  45. data/gumbo-parser/src/error.c +10 -6
  46. data/gumbo-parser/src/error.h +1 -1
  47. data/gumbo-parser/src/foreign_attrs.c +15 -16
  48. data/gumbo-parser/src/foreign_attrs.gperf +1 -1
  49. data/gumbo-parser/src/{gumbo.h → nokogiri_gumbo.h} +1 -0
  50. data/gumbo-parser/src/parser.c +29 -10
  51. data/gumbo-parser/src/replacement.h +1 -1
  52. data/gumbo-parser/src/string_buffer.h +1 -1
  53. data/gumbo-parser/src/string_piece.c +1 -1
  54. data/gumbo-parser/src/svg_attrs.c +2 -2
  55. data/gumbo-parser/src/svg_tags.c +2 -2
  56. data/gumbo-parser/src/tag.c +2 -1
  57. data/gumbo-parser/src/tag_lookup.c +7 -7
  58. data/gumbo-parser/src/tag_lookup.gperf +1 -0
  59. data/gumbo-parser/src/tag_lookup.h +1 -1
  60. data/gumbo-parser/src/token_buffer.h +1 -1
  61. data/gumbo-parser/src/tokenizer.c +2 -1
  62. data/gumbo-parser/src/tokenizer.h +1 -1
  63. data/gumbo-parser/src/utf8.c +1 -1
  64. data/gumbo-parser/src/utf8.h +1 -1
  65. data/gumbo-parser/src/util.c +1 -3
  66. data/gumbo-parser/src/util.h +4 -0
  67. data/gumbo-parser/src/vector.h +1 -1
  68. data/lib/nokogiri/css/node.rb +2 -2
  69. data/lib/nokogiri/css/parser_extras.rb +1 -1
  70. data/lib/nokogiri/css/xpath_visitor.rb +8 -26
  71. data/lib/nokogiri/css.rb +6 -0
  72. data/lib/nokogiri/decorators/slop.rb +1 -1
  73. data/lib/nokogiri/encoding_handler.rb +57 -0
  74. data/lib/nokogiri/extension.rb +4 -3
  75. data/lib/nokogiri/html4/document.rb +3 -122
  76. data/lib/nokogiri/html4/document_fragment.rb +1 -1
  77. data/lib/nokogiri/html4/element_description_defaults.rb +1827 -365
  78. data/lib/nokogiri/html4/encoding_reader.rb +121 -0
  79. data/lib/nokogiri/html4.rb +1 -0
  80. data/lib/nokogiri/html5/document.rb +113 -36
  81. data/lib/nokogiri/html5/document_fragment.rb +10 -3
  82. data/lib/nokogiri/html5/node.rb +8 -5
  83. data/lib/nokogiri/html5.rb +74 -226
  84. data/lib/nokogiri/jruby/dependencies.rb +1 -19
  85. data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
  86. data/lib/nokogiri/version/constant.rb +1 -1
  87. data/lib/nokogiri/version/info.rb +16 -14
  88. data/lib/nokogiri/xml/attr.rb +49 -0
  89. data/lib/nokogiri/xml/attribute_decl.rb +4 -2
  90. data/lib/nokogiri/xml/builder.rb +1 -1
  91. data/lib/nokogiri/xml/document.rb +103 -56
  92. data/lib/nokogiri/xml/document_fragment.rb +50 -7
  93. data/lib/nokogiri/xml/element_content.rb +10 -2
  94. data/lib/nokogiri/xml/element_decl.rb +4 -2
  95. data/lib/nokogiri/xml/entity_decl.rb +4 -2
  96. data/lib/nokogiri/xml/namespace.rb +41 -0
  97. data/lib/nokogiri/xml/node/save_options.rb +14 -4
  98. data/lib/nokogiri/xml/node.rb +241 -70
  99. data/lib/nokogiri/xml/node_set.rb +90 -11
  100. data/lib/nokogiri/xml/parse_options.rb +129 -50
  101. data/lib/nokogiri/xml/pp/node.rb +28 -15
  102. data/lib/nokogiri/xml/processing_instruction.rb +2 -1
  103. data/lib/nokogiri/xml/reader.rb +16 -17
  104. data/lib/nokogiri/xml/sax/document.rb +1 -1
  105. data/lib/nokogiri/xml/sax/parser.rb +2 -3
  106. data/lib/nokogiri/xml/searchable.rb +21 -13
  107. data/lib/nokogiri/xml/syntax_error.rb +1 -1
  108. data/lib/nokogiri/xml.rb +1 -1
  109. data/lib/nokogiri/xslt/stylesheet.rb +29 -7
  110. data/lib/nokogiri/xslt.rb +75 -5
  111. data/lib/nokogiri.rb +15 -15
  112. data/lib/xsd/xmlparser/nokogiri.rb +4 -2
  113. data/patches/libxml2/0010-update-config.guess-and-config.sub-for-libxml2.patch +224 -0
  114. data/patches/libxml2/0011-rip-out-libxml2-s-libc_single_threaded-support.patch +30 -0
  115. data/patches/libxslt/0001-update-config.guess-and-config.sub-for-libxslt.patch +224 -0
  116. data/ports/archives/libxml2-2.12.9.tar.xz +0 -0
  117. data/ports/archives/libxslt-1.1.39.tar.xz +0 -0
  118. metadata +21 -248
  119. data/patches/libxml2/0004-use-glibc-strlen.patch +0 -53
  120. data/patches/libxml2/0005-avoid-isnan-isinf.patch +0 -81
  121. data/patches/libxml2/0006-update-automake-files-for-arm64.patch +0 -3040
  122. data/patches/libxml2/0008-htmlParseComment-handle-abruptly-closed-comments.patch +0 -61
  123. data/patches/libxslt/0001-update-automake-files-for-arm64.patch +0 -3037
  124. data/ports/archives/libxml2-2.9.14.tar.xz +0 -0
  125. data/ports/archives/libxslt-1.1.35.tar.xz +0 -0
@@ -1,84 +1,163 @@
1
+ # coding: utf-8
1
2
  # frozen_string_literal: true
2
3
 
3
4
  module Nokogiri
4
5
  module XML
5
- ###
6
- # Parse options for passing to Nokogiri.XML or Nokogiri.HTML
7
- #
8
- # == Building combinations of parse options
9
- # You can build your own combinations of these parse options by using any of the following methods:
10
- # *Note*: All examples attempt to set the +RECOVER+ & +NOENT+ options.
11
- # [Ruby's bitwise operators] You can use the Ruby bitwise operators to set various combinations.
12
- # Nokogiri.XML('<content>Chapter 1</content', nil, nil, Nokogiri::XML::ParseOptions.new((1 << 0) | (1 << 1)))
13
- # [Method chaining] Every option has an equivalent method in lowercase. You can chain these methods together to set various combinations.
14
- # Nokogiri.XML('<content>Chapter 1</content', nil, nil, Nokogiri::XML::ParseOptions.new.recover.noent)
15
- # [Using Ruby Blocks] You can also setup parse combinations in the block passed to Nokogiri.XML or Nokogiri.HTML
16
- # Nokogiri.XML('<content>Chapter 1</content') {|config| config.recover.noent}
17
- #
18
- # == Removing particular parse options
19
- # You can also remove options from an instance of +ParseOptions+ dynamically.
20
- # Every option has an equivalent <code>no{option}</code> method in lowercase. You can call these methods on an instance of +ParseOptions+ to remove the option.
21
- # Note that this is not available for +STRICT+.
22
- #
23
- # # Setting the RECOVER & NOENT options...
24
- # options = Nokogiri::XML::ParseOptions.new.recover.noent
25
- # # later...
26
- # options.norecover # Removes the Nokogiri::XML::ParseOptions::RECOVER option
27
- # options.nonoent # Removes the Nokogiri::XML::ParseOptions::NOENT option
6
+ # Options that control the parsing behavior for XML::Document, XML::DocumentFragment,
7
+ # HTML4::Document, HTML4::DocumentFragment, XSLT::Stylesheet, and XML::Schema.
8
+ #
9
+ # These options directly expose libxml2's parse options, which are all boolean in the sense that
10
+ # an option is "on" or "off".
11
+ #
12
+ # 💡 Note that HTML5 parsing has a separate, orthogonal set of options due to the nature of the
13
+ # HTML5 specification. See Nokogiri::HTML5.
14
+ #
15
+ # ⚠ Not all parse options are supported on JRuby. Nokogiri will attempt to invoke the equivalent
16
+ # behavior in Xerces/NekoHTML on JRuby when it's possible.
17
+ #
18
+ # == Setting and unsetting parse options
19
+ #
20
+ # You can build your own combinations of parse options by using any of the following methods:
21
+ #
22
+ # [ParseOptions method chaining]
23
+ #
24
+ # Every option has an equivalent method in lowercase. You can chain these methods together to
25
+ # set various combinations.
26
+ #
27
+ # # Set the HUGE & PEDANTIC options
28
+ # po = Nokogiri::XML::ParseOptions.new.huge.pedantic
29
+ # doc = Nokogiri::XML::Document.parse(xml, nil, nil, po)
30
+ #
31
+ # Every option has an equivalent <code>no{option}</code> method in lowercase. You can call these
32
+ # methods on an instance of ParseOptions to unset the option.
33
+ #
34
+ # # Set the HUGE & PEDANTIC options
35
+ # po = Nokogiri::XML::ParseOptions.new.huge.pedantic
36
+ #
37
+ # # later we want to modify the options
38
+ # po.nohuge # Unset the HUGE option
39
+ # po.nopedantic # Unset the PEDANTIC option
40
+ #
41
+ # 💡 Note that some options begin with "no" leading to the logical but perhaps unintuitive
42
+ # double negative:
43
+ #
44
+ # po.nocdata # Set the NOCDATA parse option
45
+ # po.nonocdata # Unset the NOCDATA parse option
46
+ #
47
+ # 💡 Note that negation is not available for STRICT, which is itself a negation of all other
48
+ # features.
49
+ #
50
+ #
51
+ # [Using Ruby Blocks]
52
+ #
53
+ # Most parsing methods will accept a block for configuration of parse options, and we
54
+ # recommend chaining the setter methods:
55
+ #
56
+ # doc = Nokogiri::XML::Document.parse(xml) { |config| config.huge.pedantic }
57
+ #
58
+ #
59
+ # [ParseOptions constants]
60
+ #
61
+ # You can also use the constants declared under Nokogiri::XML::ParseOptions to set various
62
+ # combinations. They are bits in a bitmask, and so can be combined with bitwise operators:
63
+ #
64
+ # po = Nokogiri::XML::ParseOptions.new(Nokogiri::XML::ParseOptions::HUGE | Nokogiri::XML::ParseOptions::PEDANTIC)
65
+ # doc = Nokogiri::XML::Document.parse(xml, nil, nil, po)
28
66
  #
29
67
  class ParseOptions
30
68
  # Strict parsing
31
69
  STRICT = 0
32
- # Recover from errors
70
+
71
+ # Recover from errors. On by default for XML::Document, XML::DocumentFragment,
72
+ # HTML4::Document, HTML4::DocumentFragment, XSLT::Stylesheet, and XML::Schema.
33
73
  RECOVER = 1 << 0
34
- # Substitute entities
74
+
75
+ # Substitute entities. Off by default.
76
+ #
77
+ # ⚠ This option enables entity substitution, contrary to what the name implies.
78
+ #
79
+ # ⚠ <b>It is UNSAFE to set this option</b> when parsing untrusted documents.
35
80
  NOENT = 1 << 1
36
- # Load external subsets
81
+
82
+ # Load external subsets. On by default for XSLT::Stylesheet.
83
+ #
84
+ # ⚠ <b>It is UNSAFE to set this option</b> when parsing untrusted documents.
37
85
  DTDLOAD = 1 << 2
38
- # Default DTD attributes
86
+
87
+ # Default DTD attributes. On by default for XSLT::Stylesheet.
39
88
  DTDATTR = 1 << 3
40
- # validate with the DTD
89
+
90
+ # Validate with the DTD. Off by default.
41
91
  DTDVALID = 1 << 4
42
- # suppress error reports
92
+
93
+ # Suppress error reports. On by default for HTML4::Document and HTML4::DocumentFragment
43
94
  NOERROR = 1 << 5
44
- # suppress warning reports
95
+
96
+ # Suppress warning reports. On by default for HTML4::Document and HTML4::DocumentFragment
45
97
  NOWARNING = 1 << 6
46
- # pedantic error reporting
98
+
99
+ # Enable pedantic error reporting. Off by default.
47
100
  PEDANTIC = 1 << 7
48
- # remove blank nodes
101
+
102
+ # Remove blank nodes. Off by default.
49
103
  NOBLANKS = 1 << 8
50
- # use the SAX1 interface internally
104
+
105
+ # Use the SAX1 interface internally. Off by default.
51
106
  SAX1 = 1 << 9
52
- # Implement XInclude substitution
107
+
108
+ # Implement XInclude substitution. Off by default.
53
109
  XINCLUDE = 1 << 10
54
- # Forbid network access. Recommended for dealing with untrusted documents.
110
+
111
+ # Forbid network access. On by default for XML::Document, XML::DocumentFragment,
112
+ # HTML4::Document, HTML4::DocumentFragment, XSLT::Stylesheet, and XML::Schema.
113
+ #
114
+ # ⚠ <b>It is UNSAFE to unset this option</b> when parsing untrusted documents.
55
115
  NONET = 1 << 11
56
- # Do not reuse the context dictionary
116
+
117
+ # Do not reuse the context dictionary. Off by default.
57
118
  NODICT = 1 << 12
58
- # remove redundant namespaces declarations
119
+
120
+ # Remove redundant namespaces declarations. Off by default.
59
121
  NSCLEAN = 1 << 13
60
- # merge CDATA as text nodes
122
+
123
+ # Merge CDATA as text nodes. On by default for XSLT::Stylesheet.
61
124
  NOCDATA = 1 << 14
62
- # do not generate XINCLUDE START/END nodes
125
+
126
+ # Do not generate XInclude START/END nodes. Off by default.
63
127
  NOXINCNODE = 1 << 15
64
- # compact small text nodes; no modification of the tree allowed afterwards (will possibly crash if you try to modify the tree)
128
+
129
+ # Compact small text nodes. Off by default.
130
+ #
131
+ # ⚠ No modification of the DOM tree is allowed after parsing. libxml2 may crash if you try to
132
+ # modify the tree.
65
133
  COMPACT = 1 << 16
66
- # parse using XML-1.0 before update 5
134
+
135
+ # Parse using XML-1.0 before update 5. Off by default
67
136
  OLD10 = 1 << 17
68
- # do not fixup XINCLUDE xml:base uris
137
+
138
+ # Do not fixup XInclude xml:base uris. Off by default
69
139
  NOBASEFIX = 1 << 18
70
- # relax any hardcoded limit from the parser
140
+
141
+ # Relax any hardcoded limit from the parser. Off by default.
142
+ #
143
+ # ⚠ There may be a performance penalty when this option is set.
71
144
  HUGE = 1 << 19
72
- # line numbers stored as long int (instead of a short int)
145
+
146
+ # Support line numbers up to <code>long int</code> (default is a <code>short int</code>). On
147
+ # by default for for XML::Document, XML::DocumentFragment, HTML4::Document,
148
+ # HTML4::DocumentFragment, XSLT::Stylesheet, and XML::Schema.
73
149
  BIG_LINES = 1 << 22
74
150
 
75
- # the default options used for parsing XML documents
151
+ # The options mask used by default for parsing XML::Document and XML::DocumentFragment
76
152
  DEFAULT_XML = RECOVER | NONET | BIG_LINES
77
- # the default options used for parsing XSLT stylesheets
153
+
154
+ # The options mask used by default used for parsing XSLT::Stylesheet
78
155
  DEFAULT_XSLT = RECOVER | NONET | NOENT | DTDLOAD | DTDATTR | NOCDATA | BIG_LINES
79
- # the default options used for parsing HTML documents
156
+
157
+ # The options mask used by default used for parsing HTML4::Document and HTML4::DocumentFragment
80
158
  DEFAULT_HTML = RECOVER | NOERROR | NOWARNING | NONET | BIG_LINES
81
- # the default options used for parsing XML schemas
159
+
160
+ # The options mask used by default used for parsing XML::Schema
82
161
  DEFAULT_SCHEMA = NONET | BIG_LINES
83
162
 
84
163
  attr_accessor :options
@@ -90,7 +169,7 @@ module Nokogiri
90
169
  constants.each do |constant|
91
170
  next if constant.to_sym == :STRICT
92
171
 
93
- class_eval %{
172
+ class_eval <<~RUBY, __FILE__, __LINE__ + 1
94
173
  def #{constant.downcase}
95
174
  @options |= #{constant}
96
175
  self
@@ -104,7 +183,7 @@ module Nokogiri
104
183
  def #{constant.downcase}?
105
184
  #{constant} & @options == #{constant}
106
185
  end
107
- }
186
+ RUBY
108
187
  end
109
188
 
110
189
  def strict
@@ -5,15 +5,22 @@ module Nokogiri
5
5
  # :nodoc: all
6
6
  module PP
7
7
  module Node
8
+ COLLECTIONS = [:attribute_nodes, :children]
9
+
8
10
  def inspect
9
11
  attributes = inspect_attributes.reject do |x|
10
12
  attribute = send(x)
11
13
  !attribute || (attribute.respond_to?(:empty?) && attribute.empty?)
12
14
  rescue NoMethodError
13
15
  true
14
- end.map do |attribute|
15
- "#{attribute.to_s.sub(/_\w+/, "s")}=#{send(attribute).inspect}"
16
- end.join(" ")
16
+ end
17
+ attributes = if inspect_attributes.length == 1
18
+ send(attributes.first).inspect
19
+ else
20
+ attributes.map do |attribute|
21
+ "#{attribute}=#{send(attribute).inspect}"
22
+ end.join(" ")
23
+ end
17
24
  "#<#{self.class.name}:#{format("0x%x", object_id)} #{attributes}>"
18
25
  end
19
26
 
@@ -21,11 +28,12 @@ module Nokogiri
21
28
  nice_name = self.class.name.split("::").last
22
29
  pp.group(2, "#(#{nice_name}:#{format("0x%x", object_id)} {", "})") do
23
30
  pp.breakable
24
- attrs = inspect_attributes.map do |t|
31
+
32
+ attrs = inspect_attributes.filter_map do |t|
25
33
  [t, send(t)] if respond_to?(t)
26
- end.compact.find_all do |x|
34
+ end.find_all do |x|
27
35
  if x.last
28
- if [:attribute_nodes, :children].include?(x.first)
36
+ if COLLECTIONS.include?(x.first)
29
37
  !x.last.empty?
30
38
  else
31
39
  true
@@ -33,19 +41,24 @@ module Nokogiri
33
41
  end
34
42
  end
35
43
 
36
- pp.seplist(attrs) do |v|
37
- if [:attribute_nodes, :children].include?(v.first)
38
- pp.group(2, "#{v.first.to_s.sub(/_\w+$/, "s")} = [", "]") do
39
- pp.breakable
40
- pp.seplist(v.last) do |item|
41
- pp.pp(item)
44
+ if inspect_attributes.length == 1
45
+ pp.pp(attrs.first.last)
46
+ else
47
+ pp.seplist(attrs) do |v|
48
+ if COLLECTIONS.include?(v.first)
49
+ pp.group(2, "#{v.first} = [", "]") do
50
+ pp.breakable
51
+ pp.seplist(v.last) do |item|
52
+ pp.pp(item)
53
+ end
42
54
  end
55
+ else
56
+ pp.text("#{v.first} = ")
57
+ pp.pp(v.last)
43
58
  end
44
- else
45
- pp.text("#{v.first} = ")
46
- pp.pp(v.last)
47
59
  end
48
60
  end
61
+
49
62
  pp.breakable
50
63
  end
51
64
  end
@@ -3,7 +3,8 @@
3
3
  module Nokogiri
4
4
  module XML
5
5
  class ProcessingInstruction < Node
6
- def initialize(document, name, content) # rubocop:disable Style/RedundantInitialize
6
+ def initialize(document, name, content)
7
+ super(document, name)
7
8
  end
8
9
  end
9
10
  end
@@ -3,9 +3,11 @@
3
3
  module Nokogiri
4
4
  module XML
5
5
  ###
6
- # Nokogiri::XML::Reader parses an XML document similar to the way a cursor
7
- # would move. The Reader is given an XML document, and yields nodes
8
- # to an each block.
6
+ # Nokogiri::XML::Reader parses an XML document similar to the way a cursor would move. The
7
+ # Reader is given an XML document, and yields nodes to an each block.
8
+ #
9
+ # The Reader parser might be good for when you need the speed and low memory usage of the SAX
10
+ # parser, but do not want to write a Document handler.
9
11
  #
10
12
  # Here is an example of usage:
11
13
  #
@@ -22,13 +24,12 @@ module Nokogiri
22
24
  #
23
25
  # end
24
26
  #
25
- # Note that Nokogiri::XML::Reader#each can only be called once!! Once
26
- # the cursor moves through the entire document, you must parse the
27
- # document again. So make sure that you capture any information you
28
- # need during the first iteration.
27
+ # Nokogiri::XML::Reader#each can only be called once! Once the cursor moves through the entire
28
+ # document, you must parse the document again. It may be better to capture all information you
29
+ # need during a single iteration.
29
30
  #
30
- # The Reader parser is good for when you need the speed of a SAX parser,
31
- # but do not want to write a Document handler.
31
+ # libxml2 does not support error recovery in the Reader parser. The `RECOVER` ParseOption is
32
+ # ignored. If a syntax error is encountered during parsing, an exception will be raised.
32
33
  class Reader
33
34
  include Enumerable
34
35
 
@@ -83,16 +84,14 @@ module Nokogiri
83
84
  end
84
85
  private :initialize
85
86
 
86
- # Get the attributes of the current node as a Hash
87
+ # Get the attributes and namespaces of the current node as a Hash.
88
+ #
89
+ # This is the union of Reader#attribute_hash and Reader#namespaces
87
90
  #
88
- # [Returns] (Hash<String, String>) Attribute names and values
91
+ # [Returns]
92
+ # (Hash<String, String>) Attribute names and values, and namespace prefixes and hrefs.
89
93
  def attributes
90
- attrs_hash = attribute_nodes.each_with_object({}) do |node, hash|
91
- hash[node.name] = node.to_s
92
- end
93
- ns = namespaces
94
- attrs_hash.merge!(ns) if ns
95
- attrs_hash
94
+ attribute_hash.merge(namespaces)
96
95
  end
97
96
 
98
97
  ###
@@ -100,7 +100,7 @@ module Nokogiri
100
100
  # +prefix+ is the namespace prefix for the element
101
101
  # +uri+ is the associated namespace URI
102
102
  # +ns+ is a hash of namespace prefix:urls associated with the element
103
- def start_element_namespace(name, attrs = [], prefix = nil, uri = nil, ns = [])
103
+ def start_element_namespace(name, attrs = [], prefix = nil, uri = nil, ns = []) # rubocop:disable Metrics/ParameterLists
104
104
  ###
105
105
  # Deal with SAX v1 interface
106
106
  name = [prefix, name].compact.join(":")
@@ -88,9 +88,8 @@ module Nokogiri
88
88
 
89
89
  ###
90
90
  # Parse given +io+
91
- def parse_io(io, encoding = "ASCII")
92
- @encoding = check_encoding(encoding)
93
- ctx = ParserContext.io(io, ENCODINGS[@encoding])
91
+ def parse_io(io, encoding = @encoding)
92
+ ctx = ParserContext.io(io, ENCODINGS[check_encoding(encoding)])
94
93
  yield ctx if block_given?
95
94
  ctx.parse_with(self)
96
95
  end
@@ -36,16 +36,19 @@ module Nokogiri
36
36
  # node.search('.//address[@domestic=$value]', nil, {:value => 'Yes'})
37
37
  #
38
38
  # 💡 Custom XPath functions and CSS pseudo-selectors may also be defined. To define custom
39
- # functions create a class and implement the function you want to define. The first argument
40
- # to the method will be the current matching NodeSet. Any other arguments are ones that you
41
- # pass in. Note that this class may appear anywhere in the argument list. For example:
39
+ # functions create a class and implement the function you want to define, which will be in the
40
+ # `nokogiri` namespace in XPath queries.
41
+ #
42
+ # The first argument to the method will be the current matching NodeSet. Any other arguments
43
+ # are ones that you pass in. Note that this class may appear anywhere in the argument
44
+ # list. For example:
42
45
  #
43
46
  # handler = Class.new {
44
47
  # def regex node_set, regex
45
48
  # node_set.find_all { |node| node['some_attribute'] =~ /#{regex}/ }
46
49
  # end
47
50
  # }.new
48
- # node.search('.//title[regex(., "\w+")]', 'div.employee:regex("[0-9]+")', handler)
51
+ # node.search('.//title[nokogiri:regex(., "\w+")]', 'div.employee:regex("[0-9]+")', handler)
49
52
  #
50
53
  # See Searchable#xpath and Searchable#css for further usage help.
51
54
  def search(*args)
@@ -160,16 +163,18 @@ module Nokogiri
160
163
  # node.xpath('.//address[@domestic=$value]', nil, {:value => 'Yes'})
161
164
  #
162
165
  # 💡 Custom XPath functions may also be defined. To define custom functions create a class and
163
- # implement the function you want to define. The first argument to the method will be the
164
- # current matching NodeSet. Any other arguments are ones that you pass in. Note that this
165
- # class may appear anywhere in the argument list. For example:
166
+ # implement the function you want to define, which will be in the `nokogiri` namespace.
167
+ #
168
+ # The first argument to the method will be the current matching NodeSet. Any other arguments
169
+ # are ones that you pass in. Note that this class may appear anywhere in the argument
170
+ # list. For example:
166
171
  #
167
172
  # handler = Class.new {
168
173
  # def regex(node_set, regex)
169
174
  # node_set.find_all { |node| node['some_attribute'] =~ /#{regex}/ }
170
175
  # end
171
176
  # }.new
172
- # node.xpath('.//title[regex(., "\w+")]', handler)
177
+ # node.xpath('.//title[nokogiri:regex(., "\w+")]', handler)
173
178
  #
174
179
  def xpath(*args)
175
180
  paths, handler, ns, binds = extract_params(args)
@@ -194,7 +199,7 @@ module Nokogiri
194
199
  #
195
200
  # Search this node's immediate children using CSS selector +selector+
196
201
  def >(selector) # rubocop:disable Naming/BinaryOperatorParameterName
197
- ns = (document.root&.namespaces || {})
202
+ ns = document.root&.namespaces || {}
198
203
  xpath(CSS.xpath_for(selector, prefix: "./", ns: ns).first)
199
204
  end
200
205
 
@@ -224,7 +229,7 @@ module Nokogiri
224
229
  def xpath_impl(node, path, handler, ns, binds)
225
230
  ctx = XPathContext.new(node)
226
231
  ctx.register_namespaces(ns)
227
- path = path.gsub(/xmlns:/, " :") unless Nokogiri.uses_libxml?
232
+ path = path.gsub("xmlns:", " :") unless Nokogiri.uses_libxml?
228
233
 
229
234
  binds&.each do |key, value|
230
235
  ctx.register_variable(key.to_s, value)
@@ -243,8 +248,11 @@ module Nokogiri
243
248
  doctype: document.xpath_doctype,
244
249
  )
245
250
  self.class::IMPLIED_XPATH_CONTEXTS.map do |implied_xpath_context|
246
- CSS.xpath_for(rule.to_s, { prefix: implied_xpath_context, ns: ns,
247
- visitor: visitor, })
251
+ CSS.xpath_for(rule.to_s, {
252
+ prefix: implied_xpath_context,
253
+ ns: ns,
254
+ visitor: visitor,
255
+ })
248
256
  end.join(" | ")
249
257
  end
250
258
 
@@ -261,7 +269,7 @@ module Nokogiri
261
269
  end
262
270
  ns, binds = hashes.reverse
263
271
 
264
- ns ||= (document.root&.namespaces || {})
272
+ ns ||= document.root&.namespaces || {}
265
273
 
266
274
  [params, handler, ns, binds]
267
275
  end
@@ -63,7 +63,7 @@ module Nokogiri
63
63
  end
64
64
 
65
65
  def location_to_s
66
- return nil if nil_or_zero?(line) && nil_or_zero?(column)
66
+ return if nil_or_zero?(line) && nil_or_zero?(column)
67
67
 
68
68
  "#{line}:#{column}"
69
69
  end
data/lib/nokogiri/xml.rb CHANGED
@@ -11,7 +11,7 @@ module Nokogiri
11
11
 
12
12
  module XML
13
13
  # Original C14N 1.0 spec canonicalization
14
- XML_C14N_1_0 = 0
14
+ XML_C14N_1_0 = 0
15
15
  # Exclusive C14N 1.0 spec canonicalization
16
16
  XML_C14N_EXCLUSIVE_1_0 = 1
17
17
  # C14N 1.1 spec canonicalization
@@ -10,15 +10,37 @@ module Nokogiri
10
10
  # doc = Nokogiri::XML(File.read('some_file.xml'))
11
11
  # xslt = Nokogiri::XSLT(File.read('some_transformer.xslt'))
12
12
  #
13
- # puts xslt.transform(doc)
13
+ # xslt.transform(doc) # => Nokogiri::XML::Document
14
14
  #
15
- # See Nokogiri::XSLT::Stylesheet#transform for more transformation
16
- # information.
15
+ # Many XSLT transformations include serialization behavior to emit a non-XML document. For these
16
+ # cases, please take care to invoke the #serialize method on the result of the transformation:
17
+ #
18
+ # doc = Nokogiri::XML(File.read('some_file.xml'))
19
+ # xslt = Nokogiri::XSLT(File.read('some_transformer.xslt'))
20
+ # xslt.serialize(xslt.transform(doc)) # => String
21
+ #
22
+ # or use the #apply_to method, which is a shortcut for `serialize(transform(document))`:
23
+ #
24
+ # doc = Nokogiri::XML(File.read('some_file.xml'))
25
+ # xslt = Nokogiri::XSLT(File.read('some_transformer.xslt'))
26
+ # xslt.apply_to(doc) # => String
27
+ #
28
+ # See Nokogiri::XSLT::Stylesheet#transform for more information and examples.
17
29
  class Stylesheet
18
- ###
19
- # Apply an XSLT stylesheet to an XML::Document.
20
- # +params+ is an array of strings used as XSLT parameters.
21
- # returns serialized document
30
+ # :call-seq:
31
+ # apply_to(document, params = []) -> String
32
+ #
33
+ # Apply an XSLT stylesheet to an XML::Document and serialize it properly. This method is
34
+ # equivalent to calling #serialize on the result of #transform.
35
+ #
36
+ # [Parameters]
37
+ # - +document+ is an instance of XML::Document to transform
38
+ # - +params+ is an array of strings used as XSLT parameters, passed into #transform
39
+ #
40
+ # [Returns]
41
+ # A string containing the serialized result of the transformation.
42
+ #
43
+ # See Nokogiri::XSLT::Stylesheet#transform for more information and examples.
22
44
  def apply_to(document, params = [])
23
45
  serialize(transform(document, params))
24
46
  end
data/lib/nokogiri/xslt.rb CHANGED
@@ -20,8 +20,59 @@ module Nokogiri
20
20
  # Stylesheet object.
21
21
  module XSLT
22
22
  class << self
23
- ###
24
- # Parse the stylesheet in +string+, register any +modules+
23
+ # :call-seq:
24
+ # parse(xsl) Nokogiri::XSLT::Stylesheet
25
+ # parse(xsl, modules) → Nokogiri::XSLT::Stylesheet
26
+ #
27
+ # Parse the stylesheet in +xsl+, registering optional +modules+ as custom class handlers.
28
+ #
29
+ # [Parameters]
30
+ # - +xsl+ (String) XSL content to be parsed into a stylesheet
31
+ # - +modules+ (Hash<String ⇒ Class>) A hash of URI-to-handler relations for linking a
32
+ # namespace to a custom function handler.
33
+ #
34
+ # ⚠ The XSLT handler classes are registered *globally*.
35
+ #
36
+ # Also see Nokogiri::XSLT.register
37
+ #
38
+ # *Example*
39
+ #
40
+ # xml = Nokogiri.XML(<<~XML)
41
+ # <nodes>
42
+ # <node>Foo</node>
43
+ # <node>Bar</node>
44
+ # </nodes>
45
+ # XML
46
+ #
47
+ # handler = Class.new do
48
+ # def reverse(node)
49
+ # node.text.reverse
50
+ # end
51
+ # end
52
+ #
53
+ # xsl = <<~XSL
54
+ # <xsl:stylesheet version="1.0"
55
+ # xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
56
+ # xmlns:myfuncs="http://nokogiri.org/xslt/myfuncs"
57
+ # extension-element-prefixes="myfuncs">
58
+ # <xsl:template match="/">
59
+ # <reversed>
60
+ # <xsl:for-each select="nodes/node">
61
+ # <reverse><xsl:copy-of select="myfuncs:reverse(.)"/></reverse>
62
+ # </xsl:for-each>
63
+ # </reversed>
64
+ # </xsl:template>
65
+ # </xsl:stylesheet>
66
+ # XSL
67
+ #
68
+ # xsl = Nokogiri.XSLT(xsl, "http://nokogiri.org/xslt/myfuncs" => handler)
69
+ # xsl.transform(xml).to_xml
70
+ # # => "<?xml version=\"1.0\"?>\n" +
71
+ # # "<reversed>\n" +
72
+ # # " <reverse>ooF</reverse>\n" +
73
+ # # " <reverse>raB</reverse>\n" +
74
+ # # "</reversed>\n"
75
+ #
25
76
  def parse(string, modules = {})
26
77
  modules.each do |url, klass|
27
78
  XSLT.register(url, klass)
@@ -47,10 +98,10 @@ module Nokogiri
47
98
  # [Returns] Array of string parameters, with quotes correctly escaped for use with XSLT::Stylesheet.transform
48
99
  #
49
100
  def quote_params(params)
50
- params.flatten.each_slice(2).each_with_object([]) do |kv, quoted_params|
101
+ params.flatten.each_slice(2).with_object([]) do |kv, quoted_params|
51
102
  key, value = kv.map(&:to_s)
52
- value = if /'/.match?(value)
53
- "concat('#{value.gsub(/'/, %q{', "'", '})}')"
103
+ value = if value.include?("'")
104
+ "concat('#{value.gsub("'", %q{', "'", '})}')"
54
105
  else
55
106
  "'#{value}'"
56
107
  end
@@ -58,6 +109,25 @@ module Nokogiri
58
109
  quoted_params << value
59
110
  end
60
111
  end
112
+
113
+ # call-seq:
114
+ # register(uri, custom_handler_class)
115
+ #
116
+ # Register a class that implements custom XSLT transformation functions.
117
+ #
118
+ # ⚠ The XSLT handler classes are registered *globally*.
119
+ #
120
+ # [Parameters}
121
+ # - +uri+ (String) The namespace for the custom handlers
122
+ # - +custom_handler_class+ (Class) A class with ruby methods that can be called during
123
+ # transformation
124
+ #
125
+ # See Nokogiri::XSLT.parse for usage.
126
+ #
127
+ def register(uri, custom_handler_class)
128
+ # NOTE: this is implemented in the C extension, see ext/nokogiri/xslt_stylesheet.c
129
+ raise NotImplementedError, "Nokogiri::XSLT.register is not implemented on JRuby"
130
+ end if Nokogiri.jruby?
61
131
  end
62
132
  end
63
133
  end