nokogiri 1.10.7 → 1.16.0

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (224) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +42 -0
  3. data/LICENSE-DEPENDENCIES.md +1632 -1022
  4. data/LICENSE.md +1 -1
  5. data/README.md +188 -96
  6. data/bin/nokogiri +63 -50
  7. data/dependencies.yml +34 -66
  8. data/ext/nokogiri/depend +38 -358
  9. data/ext/nokogiri/extconf.rb +862 -421
  10. data/ext/nokogiri/gumbo.c +594 -0
  11. data/ext/nokogiri/html4_document.c +165 -0
  12. data/ext/nokogiri/html4_element_description.c +299 -0
  13. data/ext/nokogiri/html4_entity_lookup.c +37 -0
  14. data/ext/nokogiri/html4_sax_parser_context.c +108 -0
  15. data/ext/nokogiri/html4_sax_push_parser.c +95 -0
  16. data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
  17. data/ext/nokogiri/nokogiri.c +251 -105
  18. data/ext/nokogiri/nokogiri.h +222 -90
  19. data/ext/nokogiri/test_global_handlers.c +40 -0
  20. data/ext/nokogiri/xml_attr.c +17 -17
  21. data/ext/nokogiri/xml_attribute_decl.c +22 -22
  22. data/ext/nokogiri/xml_cdata.c +39 -31
  23. data/ext/nokogiri/xml_comment.c +20 -27
  24. data/ext/nokogiri/xml_document.c +408 -243
  25. data/ext/nokogiri/xml_document_fragment.c +13 -17
  26. data/ext/nokogiri/xml_dtd.c +64 -58
  27. data/ext/nokogiri/xml_element_content.c +63 -55
  28. data/ext/nokogiri/xml_element_decl.c +31 -31
  29. data/ext/nokogiri/xml_encoding_handler.c +54 -21
  30. data/ext/nokogiri/xml_entity_decl.c +37 -35
  31. data/ext/nokogiri/xml_entity_reference.c +17 -19
  32. data/ext/nokogiri/xml_namespace.c +131 -61
  33. data/ext/nokogiri/xml_node.c +1343 -674
  34. data/ext/nokogiri/xml_node_set.c +246 -216
  35. data/ext/nokogiri/xml_processing_instruction.c +18 -20
  36. data/ext/nokogiri/xml_reader.c +305 -213
  37. data/ext/nokogiri/xml_relax_ng.c +87 -78
  38. data/ext/nokogiri/xml_sax_parser.c +149 -124
  39. data/ext/nokogiri/xml_sax_parser_context.c +149 -103
  40. data/ext/nokogiri/xml_sax_push_parser.c +65 -37
  41. data/ext/nokogiri/xml_schema.c +138 -82
  42. data/ext/nokogiri/xml_syntax_error.c +42 -21
  43. data/ext/nokogiri/xml_text.c +35 -26
  44. data/ext/nokogiri/xml_xpath_context.c +363 -178
  45. data/ext/nokogiri/xslt_stylesheet.c +335 -189
  46. data/gumbo-parser/CHANGES.md +63 -0
  47. data/gumbo-parser/Makefile +126 -0
  48. data/gumbo-parser/THANKS +27 -0
  49. data/gumbo-parser/src/Makefile +34 -0
  50. data/gumbo-parser/src/README.md +41 -0
  51. data/gumbo-parser/src/ascii.c +75 -0
  52. data/gumbo-parser/src/ascii.h +115 -0
  53. data/gumbo-parser/src/attribute.c +42 -0
  54. data/gumbo-parser/src/attribute.h +17 -0
  55. data/gumbo-parser/src/char_ref.c +22225 -0
  56. data/gumbo-parser/src/char_ref.h +29 -0
  57. data/gumbo-parser/src/char_ref.rl +2154 -0
  58. data/gumbo-parser/src/error.c +630 -0
  59. data/gumbo-parser/src/error.h +148 -0
  60. data/gumbo-parser/src/foreign_attrs.c +103 -0
  61. data/gumbo-parser/src/foreign_attrs.gperf +27 -0
  62. data/gumbo-parser/src/insertion_mode.h +33 -0
  63. data/gumbo-parser/src/macros.h +91 -0
  64. data/gumbo-parser/src/nokogiri_gumbo.h +944 -0
  65. data/gumbo-parser/src/parser.c +4891 -0
  66. data/gumbo-parser/src/parser.h +41 -0
  67. data/gumbo-parser/src/replacement.h +33 -0
  68. data/gumbo-parser/src/string_buffer.c +103 -0
  69. data/gumbo-parser/src/string_buffer.h +68 -0
  70. data/gumbo-parser/src/string_piece.c +48 -0
  71. data/gumbo-parser/src/svg_attrs.c +174 -0
  72. data/gumbo-parser/src/svg_attrs.gperf +77 -0
  73. data/gumbo-parser/src/svg_tags.c +137 -0
  74. data/gumbo-parser/src/svg_tags.gperf +55 -0
  75. data/gumbo-parser/src/tag.c +223 -0
  76. data/gumbo-parser/src/tag_lookup.c +382 -0
  77. data/gumbo-parser/src/tag_lookup.gperf +170 -0
  78. data/gumbo-parser/src/tag_lookup.h +13 -0
  79. data/gumbo-parser/src/token_buffer.c +79 -0
  80. data/gumbo-parser/src/token_buffer.h +71 -0
  81. data/gumbo-parser/src/token_type.h +17 -0
  82. data/gumbo-parser/src/tokenizer.c +3464 -0
  83. data/gumbo-parser/src/tokenizer.h +112 -0
  84. data/gumbo-parser/src/tokenizer_states.h +339 -0
  85. data/gumbo-parser/src/utf8.c +245 -0
  86. data/gumbo-parser/src/utf8.h +164 -0
  87. data/gumbo-parser/src/util.c +66 -0
  88. data/gumbo-parser/src/util.h +34 -0
  89. data/gumbo-parser/src/vector.c +111 -0
  90. data/gumbo-parser/src/vector.h +45 -0
  91. data/lib/nokogiri/class_resolver.rb +67 -0
  92. data/lib/nokogiri/css/node.rb +10 -8
  93. data/lib/nokogiri/css/parser.rb +397 -377
  94. data/lib/nokogiri/css/parser.y +250 -245
  95. data/lib/nokogiri/css/parser_extras.rb +54 -49
  96. data/lib/nokogiri/css/syntax_error.rb +3 -1
  97. data/lib/nokogiri/css/tokenizer.rb +5 -3
  98. data/lib/nokogiri/css/tokenizer.rex +3 -2
  99. data/lib/nokogiri/css/xpath_visitor.rb +205 -96
  100. data/lib/nokogiri/css.rb +56 -17
  101. data/lib/nokogiri/decorators/slop.rb +9 -7
  102. data/lib/nokogiri/encoding_handler.rb +57 -0
  103. data/lib/nokogiri/extension.rb +32 -0
  104. data/lib/nokogiri/gumbo.rb +15 -0
  105. data/lib/nokogiri/html.rb +38 -27
  106. data/lib/nokogiri/{html → html4}/builder.rb +4 -2
  107. data/lib/nokogiri/html4/document.rb +214 -0
  108. data/lib/nokogiri/html4/document_fragment.rb +54 -0
  109. data/lib/nokogiri/{html → html4}/element_description.rb +3 -1
  110. data/lib/nokogiri/html4/element_description_defaults.rb +2040 -0
  111. data/lib/nokogiri/html4/encoding_reader.rb +121 -0
  112. data/lib/nokogiri/{html → html4}/entity_lookup.rb +4 -2
  113. data/lib/nokogiri/{html → html4}/sax/parser.rb +17 -16
  114. data/lib/nokogiri/html4/sax/parser_context.rb +20 -0
  115. data/lib/nokogiri/{html → html4}/sax/push_parser.rb +12 -11
  116. data/lib/nokogiri/html4.rb +47 -0
  117. data/lib/nokogiri/html5/document.rb +168 -0
  118. data/lib/nokogiri/html5/document_fragment.rb +90 -0
  119. data/lib/nokogiri/html5/node.rb +103 -0
  120. data/lib/nokogiri/html5.rb +326 -0
  121. data/lib/nokogiri/jruby/dependencies.rb +3 -0
  122. data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
  123. data/lib/nokogiri/syntax_error.rb +2 -0
  124. data/lib/nokogiri/version/constant.rb +6 -0
  125. data/lib/nokogiri/version/info.rb +224 -0
  126. data/lib/nokogiri/version.rb +3 -108
  127. data/lib/nokogiri/xml/attr.rb +55 -3
  128. data/lib/nokogiri/xml/attribute_decl.rb +6 -2
  129. data/lib/nokogiri/xml/builder.rb +75 -34
  130. data/lib/nokogiri/xml/cdata.rb +3 -1
  131. data/lib/nokogiri/xml/character_data.rb +2 -0
  132. data/lib/nokogiri/xml/document.rb +312 -127
  133. data/lib/nokogiri/xml/document_fragment.rb +93 -48
  134. data/lib/nokogiri/xml/dtd.rb +4 -2
  135. data/lib/nokogiri/xml/element_content.rb +12 -2
  136. data/lib/nokogiri/xml/element_decl.rb +6 -2
  137. data/lib/nokogiri/xml/entity_decl.rb +7 -3
  138. data/lib/nokogiri/xml/entity_reference.rb +2 -0
  139. data/lib/nokogiri/xml/namespace.rb +44 -0
  140. data/lib/nokogiri/xml/node/save_options.rb +23 -8
  141. data/lib/nokogiri/xml/node.rb +1096 -419
  142. data/lib/nokogiri/xml/node_set.rb +137 -61
  143. data/lib/nokogiri/xml/notation.rb +13 -0
  144. data/lib/nokogiri/xml/parse_options.rb +145 -52
  145. data/lib/nokogiri/xml/pp/character_data.rb +9 -6
  146. data/lib/nokogiri/xml/pp/node.rb +42 -30
  147. data/lib/nokogiri/xml/pp.rb +4 -2
  148. data/lib/nokogiri/xml/processing_instruction.rb +4 -1
  149. data/lib/nokogiri/xml/reader.rb +21 -28
  150. data/lib/nokogiri/xml/relax_ng.rb +8 -2
  151. data/lib/nokogiri/xml/sax/document.rb +45 -49
  152. data/lib/nokogiri/xml/sax/parser.rb +39 -36
  153. data/lib/nokogiri/xml/sax/parser_context.rb +8 -3
  154. data/lib/nokogiri/xml/sax/push_parser.rb +6 -5
  155. data/lib/nokogiri/xml/sax.rb +6 -4
  156. data/lib/nokogiri/xml/schema.rb +19 -9
  157. data/lib/nokogiri/xml/searchable.rb +120 -72
  158. data/lib/nokogiri/xml/syntax_error.rb +7 -5
  159. data/lib/nokogiri/xml/text.rb +2 -0
  160. data/lib/nokogiri/xml/xpath/syntax_error.rb +4 -2
  161. data/lib/nokogiri/xml/xpath.rb +15 -4
  162. data/lib/nokogiri/xml/xpath_context.rb +3 -3
  163. data/lib/nokogiri/xml.rb +39 -38
  164. data/lib/nokogiri/xslt/stylesheet.rb +3 -1
  165. data/lib/nokogiri/xslt.rb +101 -22
  166. data/lib/nokogiri.rb +59 -75
  167. data/lib/xsd/xmlparser/nokogiri.rb +29 -25
  168. data/patches/libxml2/{0004-libxml2.la-is-in-top_builddir.patch → 0003-libxml2.la-is-in-top_builddir.patch} +1 -1
  169. data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
  170. data/patches/libxml2/0010-update-config.guess-and-config.sub-for-libxml2.patch +224 -0
  171. data/patches/libxml2/0011-rip-out-libxml2-s-libc_single_threaded-support.patch +30 -0
  172. data/patches/libxslt/0001-update-config.guess-and-config.sub-for-libxslt.patch +224 -0
  173. data/ports/archives/libxml2-2.12.3.tar.xz +0 -0
  174. data/ports/archives/libxslt-1.1.39.tar.xz +0 -0
  175. metadata +121 -291
  176. data/ext/nokogiri/html_document.c +0 -170
  177. data/ext/nokogiri/html_document.h +0 -10
  178. data/ext/nokogiri/html_element_description.c +0 -279
  179. data/ext/nokogiri/html_element_description.h +0 -10
  180. data/ext/nokogiri/html_entity_lookup.c +0 -32
  181. data/ext/nokogiri/html_entity_lookup.h +0 -8
  182. data/ext/nokogiri/html_sax_parser_context.c +0 -116
  183. data/ext/nokogiri/html_sax_parser_context.h +0 -11
  184. data/ext/nokogiri/html_sax_push_parser.c +0 -87
  185. data/ext/nokogiri/html_sax_push_parser.h +0 -9
  186. data/ext/nokogiri/xml_attr.h +0 -9
  187. data/ext/nokogiri/xml_attribute_decl.h +0 -9
  188. data/ext/nokogiri/xml_cdata.h +0 -9
  189. data/ext/nokogiri/xml_comment.h +0 -9
  190. data/ext/nokogiri/xml_document.h +0 -23
  191. data/ext/nokogiri/xml_document_fragment.h +0 -10
  192. data/ext/nokogiri/xml_dtd.h +0 -10
  193. data/ext/nokogiri/xml_element_content.h +0 -10
  194. data/ext/nokogiri/xml_element_decl.h +0 -9
  195. data/ext/nokogiri/xml_encoding_handler.h +0 -8
  196. data/ext/nokogiri/xml_entity_decl.h +0 -10
  197. data/ext/nokogiri/xml_entity_reference.h +0 -9
  198. data/ext/nokogiri/xml_io.c +0 -61
  199. data/ext/nokogiri/xml_io.h +0 -11
  200. data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
  201. data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
  202. data/ext/nokogiri/xml_namespace.h +0 -14
  203. data/ext/nokogiri/xml_node.h +0 -13
  204. data/ext/nokogiri/xml_node_set.h +0 -12
  205. data/ext/nokogiri/xml_processing_instruction.h +0 -9
  206. data/ext/nokogiri/xml_reader.h +0 -10
  207. data/ext/nokogiri/xml_relax_ng.h +0 -9
  208. data/ext/nokogiri/xml_sax_parser.h +0 -39
  209. data/ext/nokogiri/xml_sax_parser_context.h +0 -10
  210. data/ext/nokogiri/xml_sax_push_parser.h +0 -9
  211. data/ext/nokogiri/xml_schema.h +0 -9
  212. data/ext/nokogiri/xml_syntax_error.h +0 -13
  213. data/ext/nokogiri/xml_text.h +0 -9
  214. data/ext/nokogiri/xml_xpath_context.h +0 -10
  215. data/ext/nokogiri/xslt_stylesheet.h +0 -14
  216. data/lib/nokogiri/html/document.rb +0 -335
  217. data/lib/nokogiri/html/document_fragment.rb +0 -49
  218. data/lib/nokogiri/html/element_description_defaults.rb +0 -671
  219. data/lib/nokogiri/html/sax/parser_context.rb +0 -16
  220. data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +0 -78
  221. data/ports/archives/libxml2-2.9.10.tar.gz +0 -0
  222. data/ports/archives/libxslt-1.1.34.tar.gz +0 -0
  223. /data/patches/libxml2/{0002-Remove-script-macro-support.patch → 0001-Remove-script-macro-support.patch} +0 -0
  224. /data/patches/libxml2/{0003-Update-entities-to-remove-handling-of-ssi.patch → 0002-Update-entities-to-remove-handling-of-ssi.patch} +0 -0
@@ -1,3 +1,6 @@
1
+ # coding: utf-8
2
+ # frozen_string_literal: true
3
+
1
4
  module Nokogiri
2
5
  module XML
3
6
  ####
@@ -11,10 +14,10 @@ module Nokogiri
11
14
  # The Document this NodeSet is associated with
12
15
  attr_accessor :document
13
16
 
14
- alias :clone :dup
17
+ alias_method :clone, :dup
15
18
 
16
19
  # Create a NodeSet with +document+ defaulting to +list+
17
- def initialize document, list = []
20
+ def initialize(document, list = [])
18
21
  @document = document
19
22
  document.decorate(self)
20
23
  list.each { |x| self << x }
@@ -23,8 +26,9 @@ module Nokogiri
23
26
 
24
27
  ###
25
28
  # Get the first element of the NodeSet.
26
- def first n = nil
29
+ def first(n = nil)
27
30
  return self[0] unless n
31
+
28
32
  list = []
29
33
  [n, length].min.times { |i| list << self[i] }
30
34
  list
@@ -46,7 +50,7 @@ module Nokogiri
46
50
  # Returns the index of the first node in self that is == to +node+ or meets the given block. Returns nil if no match is found.
47
51
  def index(node = nil)
48
52
  if node
49
- warn "given block not used" if block_given?
53
+ warn("given block not used") if block_given?
50
54
  each_with_index { |member, j| return j if member == node }
51
55
  elsif block_given?
52
56
  each_with_index { |member, j| return j if yield(member) }
@@ -56,18 +60,18 @@ module Nokogiri
56
60
 
57
61
  ###
58
62
  # Insert +datum+ before the first Node in this NodeSet
59
- def before datum
60
- first.before datum
63
+ def before(datum)
64
+ first.before(datum)
61
65
  end
62
66
 
63
67
  ###
64
68
  # Insert +datum+ after the last Node in this NodeSet
65
- def after datum
66
- last.after datum
69
+ def after(datum)
70
+ last.after(datum)
67
71
  end
68
72
 
69
- alias :<< :push
70
- alias :remove :unlink
73
+ alias_method :<<, :push
74
+ alias_method :remove, :unlink
71
75
 
72
76
  ###
73
77
  # call-seq: css *rules, [namespace-bindings, custom-pseudo-class]
@@ -76,7 +80,7 @@ module Nokogiri
76
80
  # selectors. For example:
77
81
  #
78
82
  # For more information see Nokogiri::XML::Searchable#css
79
- def css *args
83
+ def css(*args)
80
84
  rules, handler, ns, _ = extract_params(args)
81
85
  paths = css_rules_to_xpath(rules, ns)
82
86
 
@@ -92,7 +96,7 @@ module Nokogiri
92
96
  # queries.
93
97
  #
94
98
  # For more information see Nokogiri::XML::Searchable#xpath
95
- def xpath *args
99
+ def xpath(*args)
96
100
  paths, handler, ns, binds = extract_params(args)
97
101
 
98
102
  inject(NodeSet.new(document)) do |set, node|
@@ -100,13 +104,6 @@ module Nokogiri
100
104
  end
101
105
  end
102
106
 
103
- ###
104
- # Search this NodeSet's nodes' immediate children using CSS selector +selector+
105
- def > selector
106
- ns = document.root.namespaces
107
- xpath CSS.xpath_for(selector, :prefix => "./", :ns => ns).first
108
- end
109
-
110
107
  ###
111
108
  # call-seq: search *paths, [namespace-bindings, xpath-variable-bindings, custom-handler-class]
112
109
  #
@@ -119,18 +116,18 @@ module Nokogiri
119
116
  #
120
117
  # node_set.at(3) # same as node_set[3]
121
118
  #
122
- def at *args
119
+ def at(*args)
123
120
  if args.length == 1 && args.first.is_a?(Numeric)
124
121
  return self[args.first]
125
122
  end
126
123
 
127
124
  super(*args)
128
125
  end
129
- alias :% :at
126
+ alias_method :%, :at
130
127
 
131
128
  ###
132
129
  # Filter this list for nodes that match +expr+
133
- def filter expr
130
+ def filter(expr)
134
131
  find_all { |node| node.matches?(expr) }
135
132
  end
136
133
 
@@ -139,7 +136,7 @@ module Nokogiri
139
136
  # NodeSet.
140
137
  #
141
138
  # See Nokogiri::XML::Node#add_class for more information.
142
- def add_class name
139
+ def add_class(name)
143
140
  each do |el|
144
141
  el.add_class(name)
145
142
  end
@@ -151,7 +148,7 @@ module Nokogiri
151
148
  # NodeSet.
152
149
  #
153
150
  # See Nokogiri::XML::Node#append_class for more information.
154
- def append_class name
151
+ def append_class(name)
155
152
  each do |el|
156
153
  el.append_class(name)
157
154
  end
@@ -163,7 +160,7 @@ module Nokogiri
163
160
  # NodeSet.
164
161
  #
165
162
  # See Nokogiri::XML::Node#remove_class for more information.
166
- def remove_class name = nil
163
+ def remove_class(name = nil)
167
164
  each do |el|
168
165
  el.remove_class(name)
169
166
  end
@@ -203,31 +200,31 @@ module Nokogiri
203
200
  #
204
201
  # node_set.attr("class") { |node| node.name }
205
202
  #
206
- def attr key, value = nil, &block
203
+ def attr(key, value = nil, &block)
207
204
  unless key.is_a?(Hash) || (key && (value || block))
208
- return first ? first.attribute(key) : nil
205
+ return first&.attribute(key)
209
206
  end
210
207
 
211
208
  hash = key.is_a?(Hash) ? key : { key => value }
212
209
 
213
- hash.each do |k,v|
210
+ hash.each do |k, v|
214
211
  each do |node|
215
- node[k] = v || block.call(node)
212
+ node[k] = v || yield(node)
216
213
  end
217
214
  end
218
215
 
219
216
  self
220
217
  end
221
- alias :set :attr
222
- alias :attribute :attr
218
+ alias_method :set, :attr
219
+ alias_method :attribute, :attr
223
220
 
224
221
  ###
225
222
  # Remove the attributed named +name+ from all Node objects in the NodeSet
226
- def remove_attr name
227
- each { |el| el.delete name }
223
+ def remove_attr(name)
224
+ each { |el| el.delete(name) }
228
225
  self
229
226
  end
230
- alias remove_attribute remove_attr
227
+ alias_method :remove_attribute, :remove_attr
231
228
 
232
229
  ###
233
230
  # Iterate over each node, yielding to +block+
@@ -254,20 +251,83 @@ module Nokogiri
254
251
  #
255
252
  # See Nokogiri::XML::Node#content for more information.
256
253
  def inner_text
257
- collect(&:inner_text).join('')
254
+ collect(&:inner_text).join("")
258
255
  end
259
- alias :text :inner_text
256
+ alias_method :text, :inner_text
260
257
 
261
258
  ###
262
259
  # Get the inner html of all contained Node objects
263
- def inner_html *args
264
- collect{|j| j.inner_html(*args) }.join('')
260
+ def inner_html(*args)
261
+ collect { |j| j.inner_html(*args) }.join("")
265
262
  end
266
263
 
267
- ###
268
- # Wrap this NodeSet with +html+
269
- def wrap html
270
- map { |node| node.wrap html }
264
+ # :call-seq:
265
+ # wrap(markup) -> self
266
+ # wrap(node) -> self
267
+ #
268
+ # Wrap each member of this NodeSet with the node parsed from +markup+ or a dup of the +node+.
269
+ #
270
+ # [Parameters]
271
+ # - *markup* (String)
272
+ # Markup that is parsed, once per member of the NodeSet, and used as the wrapper. Each
273
+ # node's parent, if it exists, is used as the context node for parsing; otherwise the
274
+ # associated document is used. If the parsed fragment has multiple roots, the first root
275
+ # node is used as the wrapper.
276
+ # - *node* (Nokogiri::XML::Node)
277
+ # An element that is `#dup`ed and used as the wrapper.
278
+ #
279
+ # [Returns] +self+, to support chaining.
280
+ #
281
+ # ⚠ Note that if a +String+ is passed, the markup will be parsed <b>once per node</b> in the
282
+ # NodeSet. You can avoid this overhead in cases where you know exactly the wrapper you wish to
283
+ # use by passing a +Node+ instead.
284
+ #
285
+ # Also see Node#wrap
286
+ #
287
+ # *Example* with a +String+ argument:
288
+ #
289
+ # doc = Nokogiri::HTML5(<<~HTML)
290
+ # <html><body>
291
+ # <a>a</a>
292
+ # <a>b</a>
293
+ # <a>c</a>
294
+ # <a>d</a>
295
+ # </body></html>
296
+ # HTML
297
+ # doc.css("a").wrap("<div></div>")
298
+ # doc.to_html
299
+ # # => <html><head></head><body>
300
+ # # <div><a>a</a></div>
301
+ # # <div><a>b</a></div>
302
+ # # <div><a>c</a></div>
303
+ # # <div><a>d</a></div>
304
+ # # </body></html>
305
+ #
306
+ # *Example* with a +Node+ argument
307
+ #
308
+ # 💡 Note that this is faster than the equivalent call passing a +String+ because it avoids
309
+ # having to reparse the wrapper markup for each node.
310
+ #
311
+ # doc = Nokogiri::HTML5(<<~HTML)
312
+ # <html><body>
313
+ # <a>a</a>
314
+ # <a>b</a>
315
+ # <a>c</a>
316
+ # <a>d</a>
317
+ # </body></html>
318
+ # HTML
319
+ # doc.css("a").wrap(doc.create_element("div"))
320
+ # doc.to_html
321
+ # # => <html><head></head><body>
322
+ # # <div><a>a</a></div>
323
+ # # <div><a>b</a></div>
324
+ # # <div><a>c</a></div>
325
+ # # <div><a>d</a></div>
326
+ # # </body></html>
327
+ #
328
+ def wrap(node_or_tags)
329
+ map { |node| node.wrap(node_or_tags) }
330
+ self
271
331
  end
272
332
 
273
333
  ###
@@ -278,55 +338,62 @@ module Nokogiri
278
338
 
279
339
  ###
280
340
  # Convert this NodeSet to HTML
281
- def to_html *args
341
+ def to_html(*args)
282
342
  if Nokogiri.jruby?
283
343
  options = args.first.is_a?(Hash) ? args.shift : {}
284
- if !options[:save_with]
285
- options[:save_with] = Node::SaveOptions::NO_DECLARATION | Node::SaveOptions::NO_EMPTY_TAGS | Node::SaveOptions::AS_HTML
286
- end
344
+ options[:save_with] ||= Node::SaveOptions::DEFAULT_HTML
287
345
  args.insert(0, options)
288
346
  end
289
- map { |x| x.to_html(*args) }.join
347
+ if empty?
348
+ encoding = (args.first.is_a?(Hash) ? args.first[:encoding] : nil)
349
+ encoding ||= document.encoding
350
+ encoding.nil? ? "" : "".encode(encoding)
351
+ else
352
+ map { |x| x.to_html(*args) }.join
353
+ end
290
354
  end
291
355
 
292
356
  ###
293
357
  # Convert this NodeSet to XHTML
294
- def to_xhtml *args
358
+ def to_xhtml(*args)
295
359
  map { |x| x.to_xhtml(*args) }.join
296
360
  end
297
361
 
298
362
  ###
299
363
  # Convert this NodeSet to XML
300
- def to_xml *args
364
+ def to_xml(*args)
301
365
  map { |x| x.to_xml(*args) }.join
302
366
  end
303
367
 
304
- alias :size :length
305
- alias :to_ary :to_a
368
+ alias_method :size, :length
369
+ alias_method :to_ary, :to_a
306
370
 
307
371
  ###
308
372
  # Removes the last element from set and returns it, or +nil+ if
309
373
  # the set is empty
310
374
  def pop
311
- return nil if length == 0
312
- delete last
375
+ return if length == 0
376
+
377
+ delete(last)
313
378
  end
314
379
 
315
380
  ###
316
381
  # Returns the first element of the NodeSet and removes it. Returns
317
382
  # +nil+ if the set is empty.
318
383
  def shift
319
- return nil if length == 0
320
- delete first
384
+ return if length == 0
385
+
386
+ delete(first)
321
387
  end
322
388
 
323
389
  ###
324
390
  # Equality -- Two NodeSets are equal if the contain the same number
325
391
  # of elements and if each element is equal to the corresponding
326
392
  # element in the other NodeSet
327
- def == other
393
+ def ==(other)
328
394
  return false unless other.is_a?(Nokogiri::XML::NodeSet)
329
395
  return false unless length == other.length
396
+
330
397
  each_with_index do |node, i|
331
398
  return false unless node == other[i]
332
399
  end
@@ -350,7 +417,7 @@ module Nokogiri
350
417
  def reverse
351
418
  node_set = NodeSet.new(document)
352
419
  (length - 1).downto(0) do |x|
353
- node_set.push self[x]
420
+ node_set.push(self[x])
354
421
  end
355
422
  node_set
356
423
  end
@@ -358,14 +425,23 @@ module Nokogiri
358
425
  ###
359
426
  # Return a nicely formated string representation
360
427
  def inspect
361
- "[#{map(&:inspect).join ', '}]"
428
+ "[#{map(&:inspect).join(", ")}]"
362
429
  end
363
430
 
364
- alias :+ :|
431
+ alias_method :+, :|
365
432
 
366
- # @private
367
- IMPLIED_XPATH_CONTEXTS = [ './/'.freeze, 'self::'.freeze ].freeze # :nodoc:
433
+ #
434
+ # :call-seq: deconstruct() Array
435
+ #
436
+ # Returns the members of this NodeSet as an array, to use in pattern matching.
437
+ #
438
+ # Since v1.14.0
439
+ #
440
+ def deconstruct
441
+ to_a
442
+ end
368
443
 
444
+ IMPLIED_XPATH_CONTEXTS = [".//", "self::"].freeze # :nodoc:
369
445
  end
370
446
  end
371
447
  end
@@ -1,6 +1,19 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Nokogiri
2
4
  module XML
5
+ # Struct representing an {XML Schema Notation}[https://www.w3.org/TR/xml/#Notations]
3
6
  class Notation < Struct.new(:name, :public_id, :system_id)
7
+ # dead comment to ensure rdoc processing
8
+
9
+ # :attr: name (String)
10
+ # The name for the element.
11
+
12
+ # :attr: public_id (String)
13
+ # The URI corresponding to the public identifier
14
+
15
+ # :attr: system_id (String,nil)
16
+ # The URI corresponding to the system identifier
4
17
  end
5
18
  end
6
19
  end
@@ -1,86 +1,175 @@
1
+ # coding: utf-8
2
+ # frozen_string_literal: true
3
+
1
4
  module Nokogiri
2
5
  module XML
3
- ###
4
- # Parse options for passing to Nokogiri.XML or Nokogiri.HTML
5
- #
6
- # == Building combinations of parse options
7
- # You can build your own combinations of these parse options by using any of the following methods:
8
- # *Note*: All examples attempt to set the +RECOVER+ & +NOENT+ options. All examples use Ruby 2 optional parameter syntax.
9
- # [Ruby's bitwise operators] You can use the Ruby bitwise operators to set various combinations.
10
- # Nokogiri.XML('<content>Chapter 1</content', options: Nokogiri::XML::ParseOptions.new((1 << 0) | (1 << 1)))
11
- # [Method chaining] Every option has an equivalent method in lowercase. You can chain these methods together to set various combinations.
12
- # Nokogiri.XML('<content>Chapter 1</content', options: Nokogiri::XML::ParseOptions.new.recover.noent)
13
- # [Using Ruby Blocks] You can also setup parse combinations in the block passed to Nokogiri.XML or Nokogiri.HTML
14
- # Nokogiri.XML('<content>Chapter 1</content') {|config| config.recover.noent}
15
- #
16
- # == Removing particular parse options
17
- # You can also remove options from an instance of +ParseOptions+ dynamically.
18
- # Every option has an equivalent <code>no{option}</code> method in lowercase. You can call these methods on an instance of +ParseOptions+ to remove the option.
19
- # Note that this is not available for +STRICT+.
20
- #
21
- # # Setting the RECOVER & NOENT options...
22
- # options = Nokogiri::XML::ParseOptions.new.recover.noent
23
- # # later...
24
- # options.norecover # Removes the Nokogiri::XML::ParseOptions::RECOVER option
25
- # options.nonoent # Removes the Nokogiri::XML::ParseOptions::NOENT option
6
+ # Options that control the parsing behavior for XML::Document, XML::DocumentFragment,
7
+ # HTML4::Document, HTML4::DocumentFragment, XSLT::Stylesheet, and XML::Schema.
8
+ #
9
+ # These options directly expose libxml2's parse options, which are all boolean in the sense that
10
+ # an option is "on" or "off".
11
+ #
12
+ # 💡 Note that HTML5 parsing has a separate, orthogonal set of options due to the nature of the
13
+ # HTML5 specification. See Nokogiri::HTML5.
14
+ #
15
+ # Not all parse options are supported on JRuby. Nokogiri will attempt to invoke the equivalent
16
+ # behavior in Xerces/NekoHTML on JRuby when it's possible.
17
+ #
18
+ # == Setting and unsetting parse options
19
+ #
20
+ # You can build your own combinations of parse options by using any of the following methods:
21
+ #
22
+ # [ParseOptions method chaining]
23
+ #
24
+ # Every option has an equivalent method in lowercase. You can chain these methods together to
25
+ # set various combinations.
26
+ #
27
+ # # Set the HUGE & PEDANTIC options
28
+ # po = Nokogiri::XML::ParseOptions.new.huge.pedantic
29
+ # doc = Nokogiri::XML::Document.parse(xml, nil, nil, po)
30
+ #
31
+ # Every option has an equivalent <code>no{option}</code> method in lowercase. You can call these
32
+ # methods on an instance of ParseOptions to unset the option.
33
+ #
34
+ # # Set the HUGE & PEDANTIC options
35
+ # po = Nokogiri::XML::ParseOptions.new.huge.pedantic
36
+ #
37
+ # # later we want to modify the options
38
+ # po.nohuge # Unset the HUGE option
39
+ # po.nopedantic # Unset the PEDANTIC option
40
+ #
41
+ # 💡 Note that some options begin with "no" leading to the logical but perhaps unintuitive
42
+ # double negative:
43
+ #
44
+ # po.nocdata # Set the NOCDATA parse option
45
+ # po.nonocdata # Unset the NOCDATA parse option
46
+ #
47
+ # 💡 Note that negation is not available for STRICT, which is itself a negation of all other
48
+ # features.
49
+ #
50
+ #
51
+ # [Using Ruby Blocks]
52
+ #
53
+ # Most parsing methods will accept a block for configuration of parse options, and we
54
+ # recommend chaining the setter methods:
55
+ #
56
+ # doc = Nokogiri::XML::Document.parse(xml) { |config| config.huge.pedantic }
57
+ #
58
+ #
59
+ # [ParseOptions constants]
60
+ #
61
+ # You can also use the constants declared under Nokogiri::XML::ParseOptions to set various
62
+ # combinations. They are bits in a bitmask, and so can be combined with bitwise operators:
63
+ #
64
+ # po = Nokogiri::XML::ParseOptions.new(Nokogiri::XML::ParseOptions::HUGE | Nokogiri::XML::ParseOptions::PEDANTIC)
65
+ # doc = Nokogiri::XML::Document.parse(xml, nil, nil, po)
26
66
  #
27
67
  class ParseOptions
28
68
  # Strict parsing
29
69
  STRICT = 0
30
- # Recover from errors
70
+
71
+ # Recover from errors. On by default for XML::Document, XML::DocumentFragment,
72
+ # HTML4::Document, HTML4::DocumentFragment, XSLT::Stylesheet, and XML::Schema.
31
73
  RECOVER = 1 << 0
32
- # Substitute entities
74
+
75
+ # Substitute entities. Off by default.
76
+ #
77
+ # ⚠ This option enables entity substitution, contrary to what the name implies.
78
+ #
79
+ # ⚠ <b>It is UNSAFE to set this option</b> when parsing untrusted documents.
33
80
  NOENT = 1 << 1
34
- # Load external subsets
81
+
82
+ # Load external subsets. On by default for XSLT::Stylesheet.
83
+ #
84
+ # ⚠ <b>It is UNSAFE to set this option</b> when parsing untrusted documents.
35
85
  DTDLOAD = 1 << 2
36
- # Default DTD attributes
86
+
87
+ # Default DTD attributes. On by default for XSLT::Stylesheet.
37
88
  DTDATTR = 1 << 3
38
- # validate with the DTD
89
+
90
+ # Validate with the DTD. Off by default.
39
91
  DTDVALID = 1 << 4
40
- # suppress error reports
92
+
93
+ # Suppress error reports. On by default for HTML4::Document and HTML4::DocumentFragment
41
94
  NOERROR = 1 << 5
42
- # suppress warning reports
95
+
96
+ # Suppress warning reports. On by default for HTML4::Document and HTML4::DocumentFragment
43
97
  NOWARNING = 1 << 6
44
- # pedantic error reporting
98
+
99
+ # Enable pedantic error reporting. Off by default.
45
100
  PEDANTIC = 1 << 7
46
- # remove blank nodes
101
+
102
+ # Remove blank nodes. Off by default.
47
103
  NOBLANKS = 1 << 8
48
- # use the SAX1 interface internally
104
+
105
+ # Use the SAX1 interface internally. Off by default.
49
106
  SAX1 = 1 << 9
50
- # Implement XInclude substitution
107
+
108
+ # Implement XInclude substitution. Off by default.
51
109
  XINCLUDE = 1 << 10
52
- # Forbid network access. Recommended for dealing with untrusted documents.
110
+
111
+ # Forbid network access. On by default for XML::Document, XML::DocumentFragment,
112
+ # HTML4::Document, HTML4::DocumentFragment, XSLT::Stylesheet, and XML::Schema.
113
+ #
114
+ # ⚠ <b>It is UNSAFE to unset this option</b> when parsing untrusted documents.
53
115
  NONET = 1 << 11
54
- # Do not reuse the context dictionary
116
+
117
+ # Do not reuse the context dictionary. Off by default.
55
118
  NODICT = 1 << 12
56
- # remove redundant namespaces declarations
119
+
120
+ # Remove redundant namespaces declarations. Off by default.
57
121
  NSCLEAN = 1 << 13
58
- # merge CDATA as text nodes
122
+
123
+ # Merge CDATA as text nodes. On by default for XSLT::Stylesheet.
59
124
  NOCDATA = 1 << 14
60
- # do not generate XINCLUDE START/END nodes
125
+
126
+ # Do not generate XInclude START/END nodes. Off by default.
61
127
  NOXINCNODE = 1 << 15
62
- # compact small text nodes; no modification of the tree allowed afterwards (will possibly crash if you try to modify the tree)
128
+
129
+ # Compact small text nodes. Off by default.
130
+ #
131
+ # ⚠ No modification of the DOM tree is allowed after parsing. libxml2 may crash if you try to
132
+ # modify the tree.
63
133
  COMPACT = 1 << 16
64
- # parse using XML-1.0 before update 5
134
+
135
+ # Parse using XML-1.0 before update 5. Off by default
65
136
  OLD10 = 1 << 17
66
- # do not fixup XINCLUDE xml:base uris
137
+
138
+ # Do not fixup XInclude xml:base uris. Off by default
67
139
  NOBASEFIX = 1 << 18
68
- # relax any hardcoded limit from the parser
140
+
141
+ # Relax any hardcoded limit from the parser. Off by default.
142
+ #
143
+ # ⚠ There may be a performance penalty when this option is set.
69
144
  HUGE = 1 << 19
70
145
 
71
- # the default options used for parsing XML documents
72
- DEFAULT_XML = RECOVER | NONET
73
- # the default options used for parsing HTML documents
74
- DEFAULT_HTML = RECOVER | NOERROR | NOWARNING | NONET
146
+ # Support line numbers up to <code>long int</code> (default is a <code>short int</code>). On
147
+ # by default for for XML::Document, XML::DocumentFragment, HTML4::Document,
148
+ # HTML4::DocumentFragment, XSLT::Stylesheet, and XML::Schema.
149
+ BIG_LINES = 1 << 22
150
+
151
+ # The options mask used by default for parsing XML::Document and XML::DocumentFragment
152
+ DEFAULT_XML = RECOVER | NONET | BIG_LINES
153
+
154
+ # The options mask used by default used for parsing XSLT::Stylesheet
155
+ DEFAULT_XSLT = RECOVER | NONET | NOENT | DTDLOAD | DTDATTR | NOCDATA | BIG_LINES
156
+
157
+ # The options mask used by default used for parsing HTML4::Document and HTML4::DocumentFragment
158
+ DEFAULT_HTML = RECOVER | NOERROR | NOWARNING | NONET | BIG_LINES
159
+
160
+ # The options mask used by default used for parsing XML::Schema
161
+ DEFAULT_SCHEMA = NONET | BIG_LINES
75
162
 
76
163
  attr_accessor :options
77
- def initialize options = STRICT
164
+
165
+ def initialize(options = STRICT)
78
166
  @options = options
79
167
  end
80
168
 
81
169
  constants.each do |constant|
82
170
  next if constant.to_sym == :STRICT
83
- class_eval %{
171
+
172
+ class_eval <<~RUBY, __FILE__, __LINE__ + 1
84
173
  def #{constant.downcase}
85
174
  @options |= #{constant}
86
175
  self
@@ -94,7 +183,7 @@ module Nokogiri
94
183
  def #{constant.downcase}?
95
184
  #{constant} & @options == #{constant}
96
185
  end
97
- }
186
+ RUBY
98
187
  end
99
188
 
100
189
  def strict
@@ -106,14 +195,18 @@ module Nokogiri
106
195
  @options & RECOVER == STRICT
107
196
  end
108
197
 
109
- alias :to_i :options
198
+ def ==(other)
199
+ other.to_i == to_i
200
+ end
201
+
202
+ alias_method :to_i, :options
110
203
 
111
204
  def inspect
112
205
  options = []
113
206
  self.class.constants.each do |k|
114
207
  options << k.downcase if send(:"#{k.downcase}?")
115
208
  end
116
- super.sub(/>$/, " " + options.join(', ') + ">")
209
+ super.sub(/>$/, " " + options.join(", ") + ">")
117
210
  end
118
211
  end
119
212
  end