nokogiri 1.10.9 → 1.18.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (230) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +38 -0
  3. data/LICENSE-DEPENDENCIES.md +1632 -1022
  4. data/LICENSE.md +1 -1
  5. data/README.md +190 -95
  6. data/bin/nokogiri +63 -50
  7. data/dependencies.yml +34 -66
  8. data/ext/nokogiri/depend +38 -358
  9. data/ext/nokogiri/extconf.rb +909 -422
  10. data/ext/nokogiri/gumbo.c +610 -0
  11. data/ext/nokogiri/html4_document.c +171 -0
  12. data/ext/nokogiri/html4_element_description.c +299 -0
  13. data/ext/nokogiri/html4_entity_lookup.c +37 -0
  14. data/ext/nokogiri/html4_sax_parser.c +40 -0
  15. data/ext/nokogiri/html4_sax_parser_context.c +98 -0
  16. data/ext/nokogiri/html4_sax_push_parser.c +96 -0
  17. data/ext/nokogiri/libxml2_polyfill.c +114 -0
  18. data/ext/nokogiri/nokogiri.c +258 -105
  19. data/ext/nokogiri/nokogiri.h +207 -90
  20. data/ext/nokogiri/test_global_handlers.c +40 -0
  21. data/ext/nokogiri/xml_attr.c +18 -18
  22. data/ext/nokogiri/xml_attribute_decl.c +22 -22
  23. data/ext/nokogiri/xml_cdata.c +33 -33
  24. data/ext/nokogiri/xml_comment.c +19 -31
  25. data/ext/nokogiri/xml_document.c +499 -323
  26. data/ext/nokogiri/xml_document_fragment.c +17 -36
  27. data/ext/nokogiri/xml_dtd.c +65 -59
  28. data/ext/nokogiri/xml_element_content.c +63 -55
  29. data/ext/nokogiri/xml_element_decl.c +31 -31
  30. data/ext/nokogiri/xml_encoding_handler.c +54 -21
  31. data/ext/nokogiri/xml_entity_decl.c +37 -35
  32. data/ext/nokogiri/xml_entity_reference.c +17 -19
  33. data/ext/nokogiri/xml_namespace.c +131 -61
  34. data/ext/nokogiri/xml_node.c +1429 -723
  35. data/ext/nokogiri/xml_node_set.c +257 -225
  36. data/ext/nokogiri/xml_processing_instruction.c +18 -20
  37. data/ext/nokogiri/xml_reader.c +340 -231
  38. data/ext/nokogiri/xml_relax_ng.c +87 -99
  39. data/ext/nokogiri/xml_sax_parser.c +269 -176
  40. data/ext/nokogiri/xml_sax_parser_context.c +286 -152
  41. data/ext/nokogiri/xml_sax_push_parser.c +111 -64
  42. data/ext/nokogiri/xml_schema.c +132 -140
  43. data/ext/nokogiri/xml_syntax_error.c +52 -23
  44. data/ext/nokogiri/xml_text.c +37 -30
  45. data/ext/nokogiri/xml_xpath_context.c +373 -185
  46. data/ext/nokogiri/xslt_stylesheet.c +342 -191
  47. data/gumbo-parser/CHANGES.md +63 -0
  48. data/gumbo-parser/Makefile +129 -0
  49. data/gumbo-parser/THANKS +27 -0
  50. data/gumbo-parser/src/Makefile +34 -0
  51. data/gumbo-parser/src/README.md +41 -0
  52. data/gumbo-parser/src/ascii.c +75 -0
  53. data/gumbo-parser/src/ascii.h +115 -0
  54. data/gumbo-parser/src/attribute.c +42 -0
  55. data/gumbo-parser/src/attribute.h +17 -0
  56. data/gumbo-parser/src/char_ref.c +22225 -0
  57. data/gumbo-parser/src/char_ref.h +29 -0
  58. data/gumbo-parser/src/char_ref.rl +2154 -0
  59. data/gumbo-parser/src/error.c +658 -0
  60. data/gumbo-parser/src/error.h +152 -0
  61. data/gumbo-parser/src/foreign_attrs.c +103 -0
  62. data/gumbo-parser/src/foreign_attrs.gperf +27 -0
  63. data/gumbo-parser/src/insertion_mode.h +33 -0
  64. data/gumbo-parser/src/macros.h +91 -0
  65. data/gumbo-parser/src/nokogiri_gumbo.h +953 -0
  66. data/gumbo-parser/src/parser.c +4932 -0
  67. data/gumbo-parser/src/parser.h +41 -0
  68. data/gumbo-parser/src/replacement.h +33 -0
  69. data/gumbo-parser/src/string_buffer.c +103 -0
  70. data/gumbo-parser/src/string_buffer.h +68 -0
  71. data/gumbo-parser/src/string_piece.c +48 -0
  72. data/gumbo-parser/src/svg_attrs.c +174 -0
  73. data/gumbo-parser/src/svg_attrs.gperf +77 -0
  74. data/gumbo-parser/src/svg_tags.c +137 -0
  75. data/gumbo-parser/src/svg_tags.gperf +55 -0
  76. data/gumbo-parser/src/tag.c +223 -0
  77. data/gumbo-parser/src/tag_lookup.c +382 -0
  78. data/gumbo-parser/src/tag_lookup.gperf +170 -0
  79. data/gumbo-parser/src/tag_lookup.h +13 -0
  80. data/gumbo-parser/src/token_buffer.c +79 -0
  81. data/gumbo-parser/src/token_buffer.h +71 -0
  82. data/gumbo-parser/src/token_type.h +17 -0
  83. data/gumbo-parser/src/tokenizer.c +3464 -0
  84. data/gumbo-parser/src/tokenizer.h +112 -0
  85. data/gumbo-parser/src/tokenizer_states.h +339 -0
  86. data/gumbo-parser/src/utf8.c +245 -0
  87. data/gumbo-parser/src/utf8.h +164 -0
  88. data/gumbo-parser/src/util.c +66 -0
  89. data/gumbo-parser/src/util.h +34 -0
  90. data/gumbo-parser/src/vector.c +111 -0
  91. data/gumbo-parser/src/vector.h +45 -0
  92. data/lib/nokogiri/class_resolver.rb +67 -0
  93. data/lib/nokogiri/css/node.rb +14 -8
  94. data/lib/nokogiri/css/parser.rb +399 -377
  95. data/lib/nokogiri/css/parser.y +250 -245
  96. data/lib/nokogiri/css/parser_extras.rb +16 -71
  97. data/lib/nokogiri/css/selector_cache.rb +38 -0
  98. data/lib/nokogiri/css/syntax_error.rb +3 -1
  99. data/lib/nokogiri/css/tokenizer.rb +7 -5
  100. data/lib/nokogiri/css/tokenizer.rex +11 -9
  101. data/lib/nokogiri/css/xpath_visitor.rb +242 -96
  102. data/lib/nokogiri/css.rb +122 -17
  103. data/lib/nokogiri/decorators/slop.rb +11 -11
  104. data/lib/nokogiri/encoding_handler.rb +57 -0
  105. data/lib/nokogiri/extension.rb +32 -0
  106. data/lib/nokogiri/gumbo.rb +15 -0
  107. data/lib/nokogiri/html.rb +38 -27
  108. data/lib/nokogiri/{html → html4}/builder.rb +4 -2
  109. data/lib/nokogiri/html4/document.rb +235 -0
  110. data/lib/nokogiri/html4/document_fragment.rb +166 -0
  111. data/lib/nokogiri/{html → html4}/element_description.rb +3 -1
  112. data/lib/nokogiri/html4/element_description_defaults.rb +2040 -0
  113. data/lib/nokogiri/html4/encoding_reader.rb +121 -0
  114. data/lib/nokogiri/{html → html4}/entity_lookup.rb +4 -2
  115. data/lib/nokogiri/html4/sax/parser.rb +48 -0
  116. data/lib/nokogiri/html4/sax/parser_context.rb +15 -0
  117. data/lib/nokogiri/{html → html4}/sax/push_parser.rb +12 -11
  118. data/lib/nokogiri/html4.rb +42 -0
  119. data/lib/nokogiri/html5/builder.rb +40 -0
  120. data/lib/nokogiri/html5/document.rb +199 -0
  121. data/lib/nokogiri/html5/document_fragment.rb +200 -0
  122. data/lib/nokogiri/html5/node.rb +103 -0
  123. data/lib/nokogiri/html5.rb +368 -0
  124. data/lib/nokogiri/jruby/dependencies.rb +3 -0
  125. data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
  126. data/lib/nokogiri/syntax_error.rb +2 -0
  127. data/lib/nokogiri/version/constant.rb +6 -0
  128. data/lib/nokogiri/version/info.rb +224 -0
  129. data/lib/nokogiri/version.rb +3 -108
  130. data/lib/nokogiri/xml/attr.rb +55 -3
  131. data/lib/nokogiri/xml/attribute_decl.rb +6 -2
  132. data/lib/nokogiri/xml/builder.rb +83 -35
  133. data/lib/nokogiri/xml/cdata.rb +3 -1
  134. data/lib/nokogiri/xml/character_data.rb +2 -0
  135. data/lib/nokogiri/xml/document.rb +359 -130
  136. data/lib/nokogiri/xml/document_fragment.rb +170 -54
  137. data/lib/nokogiri/xml/dtd.rb +4 -2
  138. data/lib/nokogiri/xml/element_content.rb +12 -2
  139. data/lib/nokogiri/xml/element_decl.rb +6 -2
  140. data/lib/nokogiri/xml/entity_decl.rb +7 -3
  141. data/lib/nokogiri/xml/entity_reference.rb +2 -0
  142. data/lib/nokogiri/xml/namespace.rb +44 -0
  143. data/lib/nokogiri/xml/node/save_options.rb +23 -8
  144. data/lib/nokogiri/xml/node.rb +1168 -420
  145. data/lib/nokogiri/xml/node_set.rb +145 -67
  146. data/lib/nokogiri/xml/notation.rb +13 -0
  147. data/lib/nokogiri/xml/parse_options.rb +145 -52
  148. data/lib/nokogiri/xml/pp/character_data.rb +9 -6
  149. data/lib/nokogiri/xml/pp/node.rb +47 -30
  150. data/lib/nokogiri/xml/pp.rb +4 -2
  151. data/lib/nokogiri/xml/processing_instruction.rb +4 -1
  152. data/lib/nokogiri/xml/reader.rb +68 -41
  153. data/lib/nokogiri/xml/relax_ng.rb +60 -17
  154. data/lib/nokogiri/xml/sax/document.rb +198 -111
  155. data/lib/nokogiri/xml/sax/parser.rb +144 -67
  156. data/lib/nokogiri/xml/sax/parser_context.rb +119 -6
  157. data/lib/nokogiri/xml/sax/push_parser.rb +9 -5
  158. data/lib/nokogiri/xml/sax.rb +54 -4
  159. data/lib/nokogiri/xml/schema.rb +116 -39
  160. data/lib/nokogiri/xml/searchable.rb +139 -95
  161. data/lib/nokogiri/xml/syntax_error.rb +29 -5
  162. data/lib/nokogiri/xml/text.rb +2 -0
  163. data/lib/nokogiri/xml/xpath/syntax_error.rb +4 -2
  164. data/lib/nokogiri/xml/xpath.rb +15 -4
  165. data/lib/nokogiri/xml/xpath_context.rb +15 -4
  166. data/lib/nokogiri/xml.rb +45 -55
  167. data/lib/nokogiri/xslt/stylesheet.rb +32 -8
  168. data/lib/nokogiri/xslt.rb +103 -30
  169. data/lib/nokogiri.rb +59 -75
  170. data/lib/xsd/xmlparser/nokogiri.rb +32 -29
  171. data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
  172. data/patches/libxml2/0010-update-config.guess-and-config.sub-for-libxml2.patch +224 -0
  173. data/patches/libxml2/0011-rip-out-libxml2-s-libc_single_threaded-support.patch +30 -0
  174. data/patches/libxml2/0019-xpath-Use-separate-static-hash-table-for-standard-fu.patch +244 -0
  175. data/patches/libxslt/0001-update-config.guess-and-config.sub-for-libxslt.patch +224 -0
  176. data/ports/archives/libxml2-2.13.6.tar.xz +0 -0
  177. data/ports/archives/libxslt-1.1.42.tar.xz +0 -0
  178. metadata +123 -295
  179. data/ext/nokogiri/html_document.c +0 -170
  180. data/ext/nokogiri/html_document.h +0 -10
  181. data/ext/nokogiri/html_element_description.c +0 -279
  182. data/ext/nokogiri/html_element_description.h +0 -10
  183. data/ext/nokogiri/html_entity_lookup.c +0 -32
  184. data/ext/nokogiri/html_entity_lookup.h +0 -8
  185. data/ext/nokogiri/html_sax_parser_context.c +0 -116
  186. data/ext/nokogiri/html_sax_parser_context.h +0 -11
  187. data/ext/nokogiri/html_sax_push_parser.c +0 -87
  188. data/ext/nokogiri/html_sax_push_parser.h +0 -9
  189. data/ext/nokogiri/xml_attr.h +0 -9
  190. data/ext/nokogiri/xml_attribute_decl.h +0 -9
  191. data/ext/nokogiri/xml_cdata.h +0 -9
  192. data/ext/nokogiri/xml_comment.h +0 -9
  193. data/ext/nokogiri/xml_document.h +0 -23
  194. data/ext/nokogiri/xml_document_fragment.h +0 -10
  195. data/ext/nokogiri/xml_dtd.h +0 -10
  196. data/ext/nokogiri/xml_element_content.h +0 -10
  197. data/ext/nokogiri/xml_element_decl.h +0 -9
  198. data/ext/nokogiri/xml_encoding_handler.h +0 -8
  199. data/ext/nokogiri/xml_entity_decl.h +0 -10
  200. data/ext/nokogiri/xml_entity_reference.h +0 -9
  201. data/ext/nokogiri/xml_io.c +0 -61
  202. data/ext/nokogiri/xml_io.h +0 -11
  203. data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
  204. data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
  205. data/ext/nokogiri/xml_namespace.h +0 -14
  206. data/ext/nokogiri/xml_node.h +0 -13
  207. data/ext/nokogiri/xml_node_set.h +0 -12
  208. data/ext/nokogiri/xml_processing_instruction.h +0 -9
  209. data/ext/nokogiri/xml_reader.h +0 -10
  210. data/ext/nokogiri/xml_relax_ng.h +0 -9
  211. data/ext/nokogiri/xml_sax_parser.h +0 -39
  212. data/ext/nokogiri/xml_sax_parser_context.h +0 -10
  213. data/ext/nokogiri/xml_sax_push_parser.h +0 -9
  214. data/ext/nokogiri/xml_schema.h +0 -9
  215. data/ext/nokogiri/xml_syntax_error.h +0 -13
  216. data/ext/nokogiri/xml_text.h +0 -9
  217. data/ext/nokogiri/xml_xpath_context.h +0 -10
  218. data/ext/nokogiri/xslt_stylesheet.h +0 -14
  219. data/lib/nokogiri/html/document.rb +0 -335
  220. data/lib/nokogiri/html/document_fragment.rb +0 -49
  221. data/lib/nokogiri/html/element_description_defaults.rb +0 -671
  222. data/lib/nokogiri/html/sax/parser.rb +0 -62
  223. data/lib/nokogiri/html/sax/parser_context.rb +0 -16
  224. data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +0 -78
  225. data/patches/libxml2/0004-libxml2.la-is-in-top_builddir.patch +0 -25
  226. data/patches/libxml2/0005-Fix-infinite-loop-in-xmlStringLenDecodeEntities.patch +0 -32
  227. data/ports/archives/libxml2-2.9.10.tar.gz +0 -0
  228. data/ports/archives/libxslt-1.1.34.tar.gz +0 -0
  229. /data/patches/libxml2/{0002-Remove-script-macro-support.patch → 0001-Remove-script-macro-support.patch} +0 -0
  230. /data/patches/libxml2/{0003-Update-entities-to-remove-handling-of-ssi.patch → 0002-Update-entities-to-remove-handling-of-ssi.patch} +0 -0
@@ -1,9 +1,16 @@
1
+ # coding: utf-8
2
+ # frozen_string_literal: true
3
+
1
4
  module Nokogiri
2
5
  module XML
3
6
  ####
4
- # A NodeSet contains a list of Nokogiri::XML::Node objects. Typically
5
- # a NodeSet is return as a result of searching a Document via
6
- # Nokogiri::XML::Searchable#css or Nokogiri::XML::Searchable#xpath
7
+ # A NodeSet is an Enumerable that contains a list of Nokogiri::XML::Node objects.
8
+ #
9
+ # Typically a NodeSet is returned as a result of searching a Document via
10
+ # Nokogiri::XML::Searchable#css or Nokogiri::XML::Searchable#xpath.
11
+ #
12
+ # Note that the `#dup` and `#clone` methods perform shallow copies; these methods do not copy
13
+ # the Nodes contained in the NodeSet (similar to how Array and other Enumerable classes work).
7
14
  class NodeSet
8
15
  include Nokogiri::XML::Searchable
9
16
  include Enumerable
@@ -11,10 +18,8 @@ module Nokogiri
11
18
  # The Document this NodeSet is associated with
12
19
  attr_accessor :document
13
20
 
14
- alias :clone :dup
15
-
16
21
  # Create a NodeSet with +document+ defaulting to +list+
17
- def initialize document, list = []
22
+ def initialize(document, list = [])
18
23
  @document = document
19
24
  document.decorate(self)
20
25
  list.each { |x| self << x }
@@ -23,8 +28,9 @@ module Nokogiri
23
28
 
24
29
  ###
25
30
  # Get the first element of the NodeSet.
26
- def first n = nil
31
+ def first(n = nil)
27
32
  return self[0] unless n
33
+
28
34
  list = []
29
35
  [n, length].min.times { |i| list << self[i] }
30
36
  list
@@ -46,7 +52,7 @@ module Nokogiri
46
52
  # Returns the index of the first node in self that is == to +node+ or meets the given block. Returns nil if no match is found.
47
53
  def index(node = nil)
48
54
  if node
49
- warn "given block not used" if block_given?
55
+ warn("given block not used") if block_given?
50
56
  each_with_index { |member, j| return j if member == node }
51
57
  elsif block_given?
52
58
  each_with_index { |member, j| return j if yield(member) }
@@ -56,18 +62,18 @@ module Nokogiri
56
62
 
57
63
  ###
58
64
  # Insert +datum+ before the first Node in this NodeSet
59
- def before datum
60
- first.before datum
65
+ def before(datum)
66
+ first.before(datum)
61
67
  end
62
68
 
63
69
  ###
64
70
  # Insert +datum+ after the last Node in this NodeSet
65
- def after datum
66
- last.after datum
71
+ def after(datum)
72
+ last.after(datum)
67
73
  end
68
74
 
69
- alias :<< :push
70
- alias :remove :unlink
75
+ alias_method :<<, :push
76
+ alias_method :remove, :unlink
71
77
 
72
78
  ###
73
79
  # call-seq: css *rules, [namespace-bindings, custom-pseudo-class]
@@ -76,7 +82,7 @@ module Nokogiri
76
82
  # selectors. For example:
77
83
  #
78
84
  # For more information see Nokogiri::XML::Searchable#css
79
- def css *args
85
+ def css(*args)
80
86
  rules, handler, ns, _ = extract_params(args)
81
87
  paths = css_rules_to_xpath(rules, ns)
82
88
 
@@ -92,7 +98,7 @@ module Nokogiri
92
98
  # queries.
93
99
  #
94
100
  # For more information see Nokogiri::XML::Searchable#xpath
95
- def xpath *args
101
+ def xpath(*args)
96
102
  paths, handler, ns, binds = extract_params(args)
97
103
 
98
104
  inject(NodeSet.new(document)) do |set, node|
@@ -100,13 +106,6 @@ module Nokogiri
100
106
  end
101
107
  end
102
108
 
103
- ###
104
- # Search this NodeSet's nodes' immediate children using CSS selector +selector+
105
- def > selector
106
- ns = document.root.namespaces
107
- xpath CSS.xpath_for(selector, :prefix => "./", :ns => ns).first
108
- end
109
-
110
109
  ###
111
110
  # call-seq: search *paths, [namespace-bindings, xpath-variable-bindings, custom-handler-class]
112
111
  #
@@ -119,18 +118,18 @@ module Nokogiri
119
118
  #
120
119
  # node_set.at(3) # same as node_set[3]
121
120
  #
122
- def at *args
121
+ def at(*args)
123
122
  if args.length == 1 && args.first.is_a?(Numeric)
124
123
  return self[args.first]
125
124
  end
126
125
 
127
- super(*args)
126
+ super
128
127
  end
129
- alias :% :at
128
+ alias_method :%, :at
130
129
 
131
130
  ###
132
131
  # Filter this list for nodes that match +expr+
133
- def filter expr
132
+ def filter(expr)
134
133
  find_all { |node| node.matches?(expr) }
135
134
  end
136
135
 
@@ -139,7 +138,7 @@ module Nokogiri
139
138
  # NodeSet.
140
139
  #
141
140
  # See Nokogiri::XML::Node#add_class for more information.
142
- def add_class name
141
+ def add_class(name)
143
142
  each do |el|
144
143
  el.add_class(name)
145
144
  end
@@ -151,7 +150,7 @@ module Nokogiri
151
150
  # NodeSet.
152
151
  #
153
152
  # See Nokogiri::XML::Node#append_class for more information.
154
- def append_class name
153
+ def append_class(name)
155
154
  each do |el|
156
155
  el.append_class(name)
157
156
  end
@@ -163,7 +162,7 @@ module Nokogiri
163
162
  # NodeSet.
164
163
  #
165
164
  # See Nokogiri::XML::Node#remove_class for more information.
166
- def remove_class name = nil
165
+ def remove_class(name = nil)
167
166
  each do |el|
168
167
  el.remove_class(name)
169
168
  end
@@ -203,31 +202,31 @@ module Nokogiri
203
202
  #
204
203
  # node_set.attr("class") { |node| node.name }
205
204
  #
206
- def attr key, value = nil, &block
205
+ def attr(key, value = nil, &block)
207
206
  unless key.is_a?(Hash) || (key && (value || block))
208
- return first ? first.attribute(key) : nil
207
+ return first&.attribute(key)
209
208
  end
210
209
 
211
210
  hash = key.is_a?(Hash) ? key : { key => value }
212
211
 
213
- hash.each do |k,v|
212
+ hash.each do |k, v|
214
213
  each do |node|
215
- node[k] = v || block.call(node)
214
+ node[k] = v || yield(node)
216
215
  end
217
216
  end
218
217
 
219
218
  self
220
219
  end
221
- alias :set :attr
222
- alias :attribute :attr
220
+ alias_method :set, :attr
221
+ alias_method :attribute, :attr
223
222
 
224
223
  ###
225
224
  # Remove the attributed named +name+ from all Node objects in the NodeSet
226
- def remove_attr name
227
- each { |el| el.delete name }
225
+ def remove_attr(name)
226
+ each { |el| el.delete(name) }
228
227
  self
229
228
  end
230
- alias remove_attribute remove_attr
229
+ alias_method :remove_attribute, :remove_attr
231
230
 
232
231
  ###
233
232
  # Iterate over each node, yielding to +block+
@@ -254,20 +253,83 @@ module Nokogiri
254
253
  #
255
254
  # See Nokogiri::XML::Node#content for more information.
256
255
  def inner_text
257
- collect(&:inner_text).join('')
256
+ collect(&:inner_text).join("")
258
257
  end
259
- alias :text :inner_text
258
+ alias_method :text, :inner_text
260
259
 
261
260
  ###
262
261
  # Get the inner html of all contained Node objects
263
- def inner_html *args
264
- collect{|j| j.inner_html(*args) }.join('')
262
+ def inner_html(*args)
263
+ collect { |j| j.inner_html(*args) }.join("")
265
264
  end
266
265
 
267
- ###
268
- # Wrap this NodeSet with +html+
269
- def wrap html
270
- map { |node| node.wrap html }
266
+ # :call-seq:
267
+ # wrap(markup) -> self
268
+ # wrap(node) -> self
269
+ #
270
+ # Wrap each member of this NodeSet with the node parsed from +markup+ or a dup of the +node+.
271
+ #
272
+ # [Parameters]
273
+ # - *markup* (String)
274
+ # Markup that is parsed, once per member of the NodeSet, and used as the wrapper. Each
275
+ # node's parent, if it exists, is used as the context node for parsing; otherwise the
276
+ # associated document is used. If the parsed fragment has multiple roots, the first root
277
+ # node is used as the wrapper.
278
+ # - *node* (Nokogiri::XML::Node)
279
+ # An element that is `#dup`ed and used as the wrapper.
280
+ #
281
+ # [Returns] +self+, to support chaining.
282
+ #
283
+ # ⚠ Note that if a +String+ is passed, the markup will be parsed <b>once per node</b> in the
284
+ # NodeSet. You can avoid this overhead in cases where you know exactly the wrapper you wish to
285
+ # use by passing a +Node+ instead.
286
+ #
287
+ # Also see Node#wrap
288
+ #
289
+ # *Example* with a +String+ argument:
290
+ #
291
+ # doc = Nokogiri::HTML5(<<~HTML)
292
+ # <html><body>
293
+ # <a>a</a>
294
+ # <a>b</a>
295
+ # <a>c</a>
296
+ # <a>d</a>
297
+ # </body></html>
298
+ # HTML
299
+ # doc.css("a").wrap("<div></div>")
300
+ # doc.to_html
301
+ # # => <html><head></head><body>
302
+ # # <div><a>a</a></div>
303
+ # # <div><a>b</a></div>
304
+ # # <div><a>c</a></div>
305
+ # # <div><a>d</a></div>
306
+ # # </body></html>
307
+ #
308
+ # *Example* with a +Node+ argument
309
+ #
310
+ # 💡 Note that this is faster than the equivalent call passing a +String+ because it avoids
311
+ # having to reparse the wrapper markup for each node.
312
+ #
313
+ # doc = Nokogiri::HTML5(<<~HTML)
314
+ # <html><body>
315
+ # <a>a</a>
316
+ # <a>b</a>
317
+ # <a>c</a>
318
+ # <a>d</a>
319
+ # </body></html>
320
+ # HTML
321
+ # doc.css("a").wrap(doc.create_element("div"))
322
+ # doc.to_html
323
+ # # => <html><head></head><body>
324
+ # # <div><a>a</a></div>
325
+ # # <div><a>b</a></div>
326
+ # # <div><a>c</a></div>
327
+ # # <div><a>d</a></div>
328
+ # # </body></html>
329
+ #
330
+ def wrap(node_or_tags)
331
+ map { |node| node.wrap(node_or_tags) }
332
+ self
271
333
  end
272
334
 
273
335
  ###
@@ -278,55 +340,62 @@ module Nokogiri
278
340
 
279
341
  ###
280
342
  # Convert this NodeSet to HTML
281
- def to_html *args
343
+ def to_html(*args)
282
344
  if Nokogiri.jruby?
283
345
  options = args.first.is_a?(Hash) ? args.shift : {}
284
- if !options[:save_with]
285
- options[:save_with] = Node::SaveOptions::NO_DECLARATION | Node::SaveOptions::NO_EMPTY_TAGS | Node::SaveOptions::AS_HTML
286
- end
346
+ options[:save_with] ||= Node::SaveOptions::DEFAULT_HTML
287
347
  args.insert(0, options)
288
348
  end
289
- map { |x| x.to_html(*args) }.join
349
+ if empty?
350
+ encoding = (args.first.is_a?(Hash) ? args.first[:encoding] : nil)
351
+ encoding ||= document.encoding
352
+ encoding.nil? ? "" : "".encode(encoding)
353
+ else
354
+ map { |x| x.to_html(*args) }.join
355
+ end
290
356
  end
291
357
 
292
358
  ###
293
359
  # Convert this NodeSet to XHTML
294
- def to_xhtml *args
360
+ def to_xhtml(*args)
295
361
  map { |x| x.to_xhtml(*args) }.join
296
362
  end
297
363
 
298
364
  ###
299
365
  # Convert this NodeSet to XML
300
- def to_xml *args
366
+ def to_xml(*args)
301
367
  map { |x| x.to_xml(*args) }.join
302
368
  end
303
369
 
304
- alias :size :length
305
- alias :to_ary :to_a
370
+ alias_method :size, :length
371
+ alias_method :to_ary, :to_a
306
372
 
307
373
  ###
308
374
  # Removes the last element from set and returns it, or +nil+ if
309
375
  # the set is empty
310
376
  def pop
311
- return nil if length == 0
312
- delete last
377
+ return if length == 0
378
+
379
+ delete(last)
313
380
  end
314
381
 
315
382
  ###
316
383
  # Returns the first element of the NodeSet and removes it. Returns
317
384
  # +nil+ if the set is empty.
318
385
  def shift
319
- return nil if length == 0
320
- delete first
386
+ return if length == 0
387
+
388
+ delete(first)
321
389
  end
322
390
 
323
391
  ###
324
392
  # Equality -- Two NodeSets are equal if the contain the same number
325
393
  # of elements and if each element is equal to the corresponding
326
394
  # element in the other NodeSet
327
- def == other
395
+ def ==(other)
328
396
  return false unless other.is_a?(Nokogiri::XML::NodeSet)
329
397
  return false unless length == other.length
398
+
330
399
  each_with_index do |node, i|
331
400
  return false unless node == other[i]
332
401
  end
@@ -350,22 +419,31 @@ module Nokogiri
350
419
  def reverse
351
420
  node_set = NodeSet.new(document)
352
421
  (length - 1).downto(0) do |x|
353
- node_set.push self[x]
422
+ node_set.push(self[x])
354
423
  end
355
424
  node_set
356
425
  end
357
426
 
358
427
  ###
359
- # Return a nicely formated string representation
428
+ # Return a nicely formatted string representation
360
429
  def inspect
361
- "[#{map(&:inspect).join ', '}]"
430
+ "[#{map(&:inspect).join(", ")}]"
362
431
  end
363
432
 
364
- alias :+ :|
433
+ alias_method :+, :|
365
434
 
366
- # @private
367
- IMPLIED_XPATH_CONTEXTS = [ './/'.freeze, 'self::'.freeze ].freeze # :nodoc:
435
+ #
436
+ # :call-seq: deconstruct() Array
437
+ #
438
+ # Returns the members of this NodeSet as an array, to use in pattern matching.
439
+ #
440
+ # Since v1.14.0
441
+ #
442
+ def deconstruct
443
+ to_a
444
+ end
368
445
 
446
+ IMPLIED_XPATH_CONTEXTS = [".//", "self::"].freeze # :nodoc:
369
447
  end
370
448
  end
371
449
  end
@@ -1,6 +1,19 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Nokogiri
2
4
  module XML
5
+ # Struct representing an {XML Schema Notation}[https://www.w3.org/TR/xml/#Notations]
3
6
  class Notation < Struct.new(:name, :public_id, :system_id)
7
+ # dead comment to ensure rdoc processing
8
+
9
+ # :attr: name (String)
10
+ # The name for the element.
11
+
12
+ # :attr: public_id (String)
13
+ # The URI corresponding to the public identifier
14
+
15
+ # :attr: system_id (String,nil)
16
+ # The URI corresponding to the system identifier
4
17
  end
5
18
  end
6
19
  end
@@ -1,86 +1,175 @@
1
+ # coding: utf-8
2
+ # frozen_string_literal: true
3
+
1
4
  module Nokogiri
2
5
  module XML
3
- ###
4
- # Parse options for passing to Nokogiri.XML or Nokogiri.HTML
5
- #
6
- # == Building combinations of parse options
7
- # You can build your own combinations of these parse options by using any of the following methods:
8
- # *Note*: All examples attempt to set the +RECOVER+ & +NOENT+ options. All examples use Ruby 2 optional parameter syntax.
9
- # [Ruby's bitwise operators] You can use the Ruby bitwise operators to set various combinations.
10
- # Nokogiri.XML('<content>Chapter 1</content', options: Nokogiri::XML::ParseOptions.new((1 << 0) | (1 << 1)))
11
- # [Method chaining] Every option has an equivalent method in lowercase. You can chain these methods together to set various combinations.
12
- # Nokogiri.XML('<content>Chapter 1</content', options: Nokogiri::XML::ParseOptions.new.recover.noent)
13
- # [Using Ruby Blocks] You can also setup parse combinations in the block passed to Nokogiri.XML or Nokogiri.HTML
14
- # Nokogiri.XML('<content>Chapter 1</content') {|config| config.recover.noent}
15
- #
16
- # == Removing particular parse options
17
- # You can also remove options from an instance of +ParseOptions+ dynamically.
18
- # Every option has an equivalent <code>no{option}</code> method in lowercase. You can call these methods on an instance of +ParseOptions+ to remove the option.
19
- # Note that this is not available for +STRICT+.
20
- #
21
- # # Setting the RECOVER & NOENT options...
22
- # options = Nokogiri::XML::ParseOptions.new.recover.noent
23
- # # later...
24
- # options.norecover # Removes the Nokogiri::XML::ParseOptions::RECOVER option
25
- # options.nonoent # Removes the Nokogiri::XML::ParseOptions::NOENT option
6
+ # Options that control the parsing behavior for XML::Document, XML::DocumentFragment,
7
+ # HTML4::Document, HTML4::DocumentFragment, XSLT::Stylesheet, and XML::Schema.
8
+ #
9
+ # These options directly expose libxml2's parse options, which are all boolean in the sense that
10
+ # an option is "on" or "off".
11
+ #
12
+ # 💡 Note that HTML5 parsing has a separate, orthogonal set of options due to the nature of the
13
+ # HTML5 specification. See Nokogiri::HTML5.
14
+ #
15
+ # Not all parse options are supported on JRuby. Nokogiri will attempt to invoke the equivalent
16
+ # behavior in Xerces/NekoHTML on JRuby when it's possible.
17
+ #
18
+ # == Setting and unsetting parse options
19
+ #
20
+ # You can build your own combinations of parse options by using any of the following methods:
21
+ #
22
+ # [ParseOptions method chaining]
23
+ #
24
+ # Every option has an equivalent method in lowercase. You can chain these methods together to
25
+ # set various combinations.
26
+ #
27
+ # # Set the HUGE & PEDANTIC options
28
+ # po = Nokogiri::XML::ParseOptions.new.huge.pedantic
29
+ # doc = Nokogiri::XML::Document.parse(xml, nil, nil, po)
30
+ #
31
+ # Every option has an equivalent <code>no{option}</code> method in lowercase. You can call these
32
+ # methods on an instance of ParseOptions to unset the option.
33
+ #
34
+ # # Set the HUGE & PEDANTIC options
35
+ # po = Nokogiri::XML::ParseOptions.new.huge.pedantic
36
+ #
37
+ # # later we want to modify the options
38
+ # po.nohuge # Unset the HUGE option
39
+ # po.nopedantic # Unset the PEDANTIC option
40
+ #
41
+ # 💡 Note that some options begin with "no" leading to the logical but perhaps unintuitive
42
+ # double negative:
43
+ #
44
+ # po.nocdata # Set the NOCDATA parse option
45
+ # po.nonocdata # Unset the NOCDATA parse option
46
+ #
47
+ # 💡 Note that negation is not available for STRICT, which is itself a negation of all other
48
+ # features.
49
+ #
50
+ #
51
+ # [Using Ruby Blocks]
52
+ #
53
+ # Most parsing methods will accept a block for configuration of parse options, and we
54
+ # recommend chaining the setter methods:
55
+ #
56
+ # doc = Nokogiri::XML::Document.parse(xml) { |config| config.huge.pedantic }
57
+ #
58
+ #
59
+ # [ParseOptions constants]
60
+ #
61
+ # You can also use the constants declared under Nokogiri::XML::ParseOptions to set various
62
+ # combinations. They are bits in a bitmask, and so can be combined with bitwise operators:
63
+ #
64
+ # po = Nokogiri::XML::ParseOptions.new(Nokogiri::XML::ParseOptions::HUGE | Nokogiri::XML::ParseOptions::PEDANTIC)
65
+ # doc = Nokogiri::XML::Document.parse(xml, nil, nil, po)
26
66
  #
27
67
  class ParseOptions
28
68
  # Strict parsing
29
69
  STRICT = 0
30
- # Recover from errors
70
+
71
+ # Recover from errors. On by default for XML::Document, XML::DocumentFragment,
72
+ # HTML4::Document, HTML4::DocumentFragment, XSLT::Stylesheet, and XML::Schema.
31
73
  RECOVER = 1 << 0
32
- # Substitute entities
74
+
75
+ # Substitute entities. Off by default.
76
+ #
77
+ # ⚠ This option enables entity substitution, contrary to what the name implies.
78
+ #
79
+ # ⚠ <b>It is UNSAFE to set this option</b> when parsing untrusted documents.
33
80
  NOENT = 1 << 1
34
- # Load external subsets
81
+
82
+ # Load external subsets. On by default for XSLT::Stylesheet.
83
+ #
84
+ # ⚠ <b>It is UNSAFE to set this option</b> when parsing untrusted documents.
35
85
  DTDLOAD = 1 << 2
36
- # Default DTD attributes
86
+
87
+ # Default DTD attributes. On by default for XSLT::Stylesheet.
37
88
  DTDATTR = 1 << 3
38
- # validate with the DTD
89
+
90
+ # Validate with the DTD. Off by default.
39
91
  DTDVALID = 1 << 4
40
- # suppress error reports
92
+
93
+ # Suppress error reports. On by default for HTML4::Document and HTML4::DocumentFragment
41
94
  NOERROR = 1 << 5
42
- # suppress warning reports
95
+
96
+ # Suppress warning reports. On by default for HTML4::Document and HTML4::DocumentFragment
43
97
  NOWARNING = 1 << 6
44
- # pedantic error reporting
98
+
99
+ # Enable pedantic error reporting. Off by default.
45
100
  PEDANTIC = 1 << 7
46
- # remove blank nodes
101
+
102
+ # Remove blank nodes. Off by default.
47
103
  NOBLANKS = 1 << 8
48
- # use the SAX1 interface internally
104
+
105
+ # Use the SAX1 interface internally. Off by default.
49
106
  SAX1 = 1 << 9
50
- # Implement XInclude substitution
107
+
108
+ # Implement XInclude substitution. Off by default.
51
109
  XINCLUDE = 1 << 10
52
- # Forbid network access. Recommended for dealing with untrusted documents.
110
+
111
+ # Forbid network access. On by default for XML::Document, XML::DocumentFragment,
112
+ # HTML4::Document, HTML4::DocumentFragment, XSLT::Stylesheet, and XML::Schema.
113
+ #
114
+ # ⚠ <b>It is UNSAFE to unset this option</b> when parsing untrusted documents.
53
115
  NONET = 1 << 11
54
- # Do not reuse the context dictionary
116
+
117
+ # Do not reuse the context dictionary. Off by default.
55
118
  NODICT = 1 << 12
56
- # remove redundant namespaces declarations
119
+
120
+ # Remove redundant namespaces declarations. Off by default.
57
121
  NSCLEAN = 1 << 13
58
- # merge CDATA as text nodes
122
+
123
+ # Merge CDATA as text nodes. On by default for XSLT::Stylesheet.
59
124
  NOCDATA = 1 << 14
60
- # do not generate XINCLUDE START/END nodes
125
+
126
+ # Do not generate XInclude START/END nodes. Off by default.
61
127
  NOXINCNODE = 1 << 15
62
- # compact small text nodes; no modification of the tree allowed afterwards (will possibly crash if you try to modify the tree)
128
+
129
+ # Compact small text nodes. Off by default.
130
+ #
131
+ # ⚠ No modification of the DOM tree is allowed after parsing. libxml2 may crash if you try to
132
+ # modify the tree.
63
133
  COMPACT = 1 << 16
64
- # parse using XML-1.0 before update 5
134
+
135
+ # Parse using XML-1.0 before update 5. Off by default
65
136
  OLD10 = 1 << 17
66
- # do not fixup XINCLUDE xml:base uris
137
+
138
+ # Do not fixup XInclude xml:base uris. Off by default
67
139
  NOBASEFIX = 1 << 18
68
- # relax any hardcoded limit from the parser
140
+
141
+ # Relax any hardcoded limit from the parser. Off by default.
142
+ #
143
+ # ⚠ <b>It is UNSAFE to set this option</b> when parsing untrusted documents.
69
144
  HUGE = 1 << 19
70
145
 
71
- # the default options used for parsing XML documents
72
- DEFAULT_XML = RECOVER | NONET
73
- # the default options used for parsing HTML documents
74
- DEFAULT_HTML = RECOVER | NOERROR | NOWARNING | NONET
146
+ # Support line numbers up to <code>long int</code> (default is a <code>short int</code>). On
147
+ # by default for for XML::Document, XML::DocumentFragment, HTML4::Document,
148
+ # HTML4::DocumentFragment, XSLT::Stylesheet, and XML::Schema.
149
+ BIG_LINES = 1 << 22
150
+
151
+ # The options mask used by default for parsing XML::Document and XML::DocumentFragment
152
+ DEFAULT_XML = RECOVER | NONET | BIG_LINES
153
+
154
+ # The options mask used by default used for parsing XSLT::Stylesheet
155
+ DEFAULT_XSLT = RECOVER | NONET | NOENT | DTDLOAD | DTDATTR | NOCDATA | BIG_LINES
156
+
157
+ # The options mask used by default used for parsing HTML4::Document and HTML4::DocumentFragment
158
+ DEFAULT_HTML = RECOVER | NOERROR | NOWARNING | NONET | BIG_LINES
159
+
160
+ # The options mask used by default used for parsing XML::Schema
161
+ DEFAULT_SCHEMA = NONET | BIG_LINES
75
162
 
76
163
  attr_accessor :options
77
- def initialize options = STRICT
164
+
165
+ def initialize(options = STRICT)
78
166
  @options = options
79
167
  end
80
168
 
81
169
  constants.each do |constant|
82
170
  next if constant.to_sym == :STRICT
83
- class_eval %{
171
+
172
+ class_eval <<~RUBY, __FILE__, __LINE__ + 1
84
173
  def #{constant.downcase}
85
174
  @options |= #{constant}
86
175
  self
@@ -94,7 +183,7 @@ module Nokogiri
94
183
  def #{constant.downcase}?
95
184
  #{constant} & @options == #{constant}
96
185
  end
97
- }
186
+ RUBY
98
187
  end
99
188
 
100
189
  def strict
@@ -106,14 +195,18 @@ module Nokogiri
106
195
  @options & RECOVER == STRICT
107
196
  end
108
197
 
109
- alias :to_i :options
198
+ def ==(other)
199
+ other.to_i == to_i
200
+ end
201
+
202
+ alias_method :to_i, :options
110
203
 
111
204
  def inspect
112
205
  options = []
113
206
  self.class.constants.each do |k|
114
207
  options << k.downcase if send(:"#{k.downcase}?")
115
208
  end
116
- super.sub(/>$/, " " + options.join(', ') + ">")
209
+ super.sub(/>$/, " " + options.join(", ") + ">")
117
210
  end
118
211
  end
119
212
  end