nokogiri 1.12.5 → 1.14.3

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (156) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +41 -0
  3. data/LICENSE-DEPENDENCIES.md +830 -509
  4. data/LICENSE.md +1 -1
  5. data/README.md +23 -14
  6. data/bin/nokogiri +63 -50
  7. data/dependencies.yml +33 -66
  8. data/ext/nokogiri/extconf.rb +159 -63
  9. data/ext/nokogiri/gumbo.c +21 -11
  10. data/ext/nokogiri/html4_document.c +2 -2
  11. data/ext/nokogiri/html4_element_description.c +1 -1
  12. data/ext/nokogiri/html4_entity_lookup.c +2 -2
  13. data/ext/nokogiri/html4_sax_parser_context.c +3 -9
  14. data/ext/nokogiri/html4_sax_push_parser.c +1 -1
  15. data/ext/nokogiri/nokogiri.c +38 -51
  16. data/ext/nokogiri/nokogiri.h +26 -14
  17. data/ext/nokogiri/test_global_handlers.c +1 -1
  18. data/ext/nokogiri/xml_attr.c +3 -3
  19. data/ext/nokogiri/xml_attribute_decl.c +5 -5
  20. data/ext/nokogiri/xml_cdata.c +3 -3
  21. data/ext/nokogiri/xml_comment.c +1 -1
  22. data/ext/nokogiri/xml_document.c +53 -44
  23. data/ext/nokogiri/xml_document_fragment.c +1 -3
  24. data/ext/nokogiri/xml_dtd.c +11 -11
  25. data/ext/nokogiri/xml_element_content.c +3 -3
  26. data/ext/nokogiri/xml_element_decl.c +5 -5
  27. data/ext/nokogiri/xml_encoding_handler.c +28 -14
  28. data/ext/nokogiri/xml_entity_decl.c +6 -6
  29. data/ext/nokogiri/xml_entity_reference.c +1 -1
  30. data/ext/nokogiri/xml_namespace.c +80 -14
  31. data/ext/nokogiri/xml_node.c +982 -396
  32. data/ext/nokogiri/xml_node_set.c +4 -6
  33. data/ext/nokogiri/xml_processing_instruction.c +1 -1
  34. data/ext/nokogiri/xml_reader.c +133 -32
  35. data/ext/nokogiri/xml_relax_ng.c +1 -3
  36. data/ext/nokogiri/xml_sax_parser.c +23 -17
  37. data/ext/nokogiri/xml_sax_parser_context.c +11 -9
  38. data/ext/nokogiri/xml_sax_push_parser.c +1 -3
  39. data/ext/nokogiri/xml_schema.c +4 -6
  40. data/ext/nokogiri/xml_syntax_error.c +1 -1
  41. data/ext/nokogiri/xml_text.c +2 -2
  42. data/ext/nokogiri/xml_xpath_context.c +144 -114
  43. data/ext/nokogiri/xslt_stylesheet.c +122 -23
  44. data/gumbo-parser/Makefile +10 -0
  45. data/gumbo-parser/src/attribute.h +1 -1
  46. data/gumbo-parser/src/error.c +2 -2
  47. data/gumbo-parser/src/error.h +1 -1
  48. data/gumbo-parser/src/foreign_attrs.c +2 -2
  49. data/gumbo-parser/src/{gumbo.h → nokogiri_gumbo.h} +1 -0
  50. data/gumbo-parser/src/parser.c +8 -16
  51. data/gumbo-parser/src/replacement.h +1 -1
  52. data/gumbo-parser/src/string_buffer.h +1 -1
  53. data/gumbo-parser/src/string_piece.c +1 -1
  54. data/gumbo-parser/src/svg_attrs.c +2 -2
  55. data/gumbo-parser/src/svg_tags.c +2 -2
  56. data/gumbo-parser/src/tag.c +2 -1
  57. data/gumbo-parser/src/tag_lookup.c +7 -7
  58. data/gumbo-parser/src/tag_lookup.gperf +1 -0
  59. data/gumbo-parser/src/tag_lookup.h +1 -1
  60. data/gumbo-parser/src/token_buffer.h +1 -1
  61. data/gumbo-parser/src/tokenizer.c +1 -1
  62. data/gumbo-parser/src/tokenizer.h +1 -1
  63. data/gumbo-parser/src/utf8.c +1 -1
  64. data/gumbo-parser/src/utf8.h +1 -1
  65. data/gumbo-parser/src/util.c +1 -3
  66. data/gumbo-parser/src/util.h +4 -0
  67. data/gumbo-parser/src/vector.h +1 -1
  68. data/lib/nokogiri/class_resolver.rb +67 -0
  69. data/lib/nokogiri/css/node.rb +9 -8
  70. data/lib/nokogiri/css/parser.rb +360 -341
  71. data/lib/nokogiri/css/parser.y +249 -244
  72. data/lib/nokogiri/css/parser_extras.rb +22 -20
  73. data/lib/nokogiri/css/syntax_error.rb +1 -0
  74. data/lib/nokogiri/css/tokenizer.rb +4 -3
  75. data/lib/nokogiri/css/tokenizer.rex +3 -2
  76. data/lib/nokogiri/css/xpath_visitor.rb +184 -85
  77. data/lib/nokogiri/css.rb +44 -6
  78. data/lib/nokogiri/decorators/slop.rb +8 -7
  79. data/lib/nokogiri/encoding_handler.rb +57 -0
  80. data/lib/nokogiri/extension.rb +4 -3
  81. data/lib/nokogiri/gumbo.rb +1 -0
  82. data/lib/nokogiri/html.rb +16 -10
  83. data/lib/nokogiri/html4/builder.rb +1 -0
  84. data/lib/nokogiri/html4/document.rb +56 -164
  85. data/lib/nokogiri/html4/document_fragment.rb +11 -7
  86. data/lib/nokogiri/html4/element_description.rb +1 -0
  87. data/lib/nokogiri/html4/element_description_defaults.rb +432 -532
  88. data/lib/nokogiri/html4/encoding_reader.rb +121 -0
  89. data/lib/nokogiri/html4/entity_lookup.rb +2 -1
  90. data/lib/nokogiri/html4/sax/parser.rb +5 -2
  91. data/lib/nokogiri/html4/sax/parser_context.rb +1 -0
  92. data/lib/nokogiri/html4/sax/push_parser.rb +7 -7
  93. data/lib/nokogiri/html4.rb +12 -5
  94. data/lib/nokogiri/html5/document.rb +126 -32
  95. data/lib/nokogiri/html5/document_fragment.rb +14 -4
  96. data/lib/nokogiri/html5/node.rb +12 -7
  97. data/lib/nokogiri/html5.rb +138 -222
  98. data/lib/nokogiri/jruby/dependencies.rb +2 -19
  99. data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
  100. data/lib/nokogiri/syntax_error.rb +1 -0
  101. data/lib/nokogiri/version/constant.rb +2 -1
  102. data/lib/nokogiri/version/info.rb +32 -24
  103. data/lib/nokogiri/version.rb +1 -0
  104. data/lib/nokogiri/xml/attr.rb +54 -3
  105. data/lib/nokogiri/xml/attribute_decl.rb +2 -1
  106. data/lib/nokogiri/xml/builder.rb +35 -33
  107. data/lib/nokogiri/xml/cdata.rb +2 -1
  108. data/lib/nokogiri/xml/character_data.rb +1 -0
  109. data/lib/nokogiri/xml/document.rb +232 -143
  110. data/lib/nokogiri/xml/document_fragment.rb +88 -42
  111. data/lib/nokogiri/xml/dtd.rb +3 -2
  112. data/lib/nokogiri/xml/element_content.rb +1 -0
  113. data/lib/nokogiri/xml/element_decl.rb +2 -1
  114. data/lib/nokogiri/xml/entity_decl.rb +3 -2
  115. data/lib/nokogiri/xml/entity_reference.rb +1 -0
  116. data/lib/nokogiri/xml/namespace.rb +44 -0
  117. data/lib/nokogiri/xml/node/save_options.rb +14 -8
  118. data/lib/nokogiri/xml/node.rb +708 -383
  119. data/lib/nokogiri/xml/node_set.rb +134 -59
  120. data/lib/nokogiri/xml/notation.rb +12 -0
  121. data/lib/nokogiri/xml/parse_options.rb +140 -56
  122. data/lib/nokogiri/xml/pp/character_data.rb +8 -6
  123. data/lib/nokogiri/xml/pp/node.rb +26 -26
  124. data/lib/nokogiri/xml/pp.rb +1 -0
  125. data/lib/nokogiri/xml/processing_instruction.rb +3 -1
  126. data/lib/nokogiri/xml/reader.rb +20 -24
  127. data/lib/nokogiri/xml/relax_ng.rb +1 -0
  128. data/lib/nokogiri/xml/sax/document.rb +20 -19
  129. data/lib/nokogiri/xml/sax/parser.rb +38 -36
  130. data/lib/nokogiri/xml/sax/parser_context.rb +7 -3
  131. data/lib/nokogiri/xml/sax/push_parser.rb +5 -5
  132. data/lib/nokogiri/xml/sax.rb +1 -0
  133. data/lib/nokogiri/xml/schema.rb +7 -6
  134. data/lib/nokogiri/xml/searchable.rb +93 -62
  135. data/lib/nokogiri/xml/syntax_error.rb +5 -4
  136. data/lib/nokogiri/xml/text.rb +1 -0
  137. data/lib/nokogiri/xml/xpath/syntax_error.rb +2 -1
  138. data/lib/nokogiri/xml/xpath.rb +12 -0
  139. data/lib/nokogiri/xml/xpath_context.rb +2 -3
  140. data/lib/nokogiri/xml.rb +4 -3
  141. data/lib/nokogiri/xslt/stylesheet.rb +1 -0
  142. data/lib/nokogiri/xslt.rb +21 -13
  143. data/lib/nokogiri.rb +22 -27
  144. data/lib/xsd/xmlparser/nokogiri.rb +28 -25
  145. data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
  146. data/patches/libxslt/0001-update-automake-files-for-arm64.patch +2445 -1919
  147. data/ports/archives/libxml2-2.10.4.tar.xz +0 -0
  148. data/ports/archives/libxslt-1.1.37.tar.xz +0 -0
  149. metadata +20 -171
  150. data/patches/libxml2/0004-use-glibc-strlen.patch +0 -53
  151. data/patches/libxml2/0005-avoid-isnan-isinf.patch +0 -81
  152. data/patches/libxml2/0006-update-automake-files-for-arm64.patch +0 -2511
  153. data/patches/libxml2/0007-Fix-XPath-recursion-limit.patch +0 -31
  154. data/patches/libxslt/0002-Fix-xml2-config-check-in-configure-script.patch +0 -19
  155. data/ports/archives/libxml2-2.9.12.tar.gz +0 -0
  156. data/ports/archives/libxslt-1.1.34.tar.gz +0 -0
@@ -1,4 +1,6 @@
1
+ # coding: utf-8
1
2
  # frozen_string_literal: true
3
+
2
4
  module Nokogiri
3
5
  module XML
4
6
  ####
@@ -12,10 +14,10 @@ module Nokogiri
12
14
  # The Document this NodeSet is associated with
13
15
  attr_accessor :document
14
16
 
15
- alias :clone :dup
17
+ alias_method :clone, :dup
16
18
 
17
19
  # Create a NodeSet with +document+ defaulting to +list+
18
- def initialize document, list = []
20
+ def initialize(document, list = [])
19
21
  @document = document
20
22
  document.decorate(self)
21
23
  list.each { |x| self << x }
@@ -24,8 +26,9 @@ module Nokogiri
24
26
 
25
27
  ###
26
28
  # Get the first element of the NodeSet.
27
- def first n = nil
29
+ def first(n = nil)
28
30
  return self[0] unless n
31
+
29
32
  list = []
30
33
  [n, length].min.times { |i| list << self[i] }
31
34
  list
@@ -47,7 +50,7 @@ module Nokogiri
47
50
  # Returns the index of the first node in self that is == to +node+ or meets the given block. Returns nil if no match is found.
48
51
  def index(node = nil)
49
52
  if node
50
- warn "given block not used" if block_given?
53
+ warn("given block not used") if block_given?
51
54
  each_with_index { |member, j| return j if member == node }
52
55
  elsif block_given?
53
56
  each_with_index { |member, j| return j if yield(member) }
@@ -57,18 +60,18 @@ module Nokogiri
57
60
 
58
61
  ###
59
62
  # Insert +datum+ before the first Node in this NodeSet
60
- def before datum
61
- first.before datum
63
+ def before(datum)
64
+ first.before(datum)
62
65
  end
63
66
 
64
67
  ###
65
68
  # Insert +datum+ after the last Node in this NodeSet
66
- def after datum
67
- last.after datum
69
+ def after(datum)
70
+ last.after(datum)
68
71
  end
69
72
 
70
- alias :<< :push
71
- alias :remove :unlink
73
+ alias_method :<<, :push
74
+ alias_method :remove, :unlink
72
75
 
73
76
  ###
74
77
  # call-seq: css *rules, [namespace-bindings, custom-pseudo-class]
@@ -77,7 +80,7 @@ module Nokogiri
77
80
  # selectors. For example:
78
81
  #
79
82
  # For more information see Nokogiri::XML::Searchable#css
80
- def css *args
83
+ def css(*args)
81
84
  rules, handler, ns, _ = extract_params(args)
82
85
  paths = css_rules_to_xpath(rules, ns)
83
86
 
@@ -93,7 +96,7 @@ module Nokogiri
93
96
  # queries.
94
97
  #
95
98
  # For more information see Nokogiri::XML::Searchable#xpath
96
- def xpath *args
99
+ def xpath(*args)
97
100
  paths, handler, ns, binds = extract_params(args)
98
101
 
99
102
  inject(NodeSet.new(document)) do |set, node|
@@ -101,13 +104,6 @@ module Nokogiri
101
104
  end
102
105
  end
103
106
 
104
- ###
105
- # Search this NodeSet's nodes' immediate children using CSS selector +selector+
106
- def > selector
107
- ns = document.root.namespaces
108
- xpath CSS.xpath_for(selector, :prefix => "./", :ns => ns).first
109
- end
110
-
111
107
  ###
112
108
  # call-seq: search *paths, [namespace-bindings, xpath-variable-bindings, custom-handler-class]
113
109
  #
@@ -120,18 +116,18 @@ module Nokogiri
120
116
  #
121
117
  # node_set.at(3) # same as node_set[3]
122
118
  #
123
- def at *args
119
+ def at(*args)
124
120
  if args.length == 1 && args.first.is_a?(Numeric)
125
121
  return self[args.first]
126
122
  end
127
123
 
128
124
  super(*args)
129
125
  end
130
- alias :% :at
126
+ alias_method :%, :at
131
127
 
132
128
  ###
133
129
  # Filter this list for nodes that match +expr+
134
- def filter expr
130
+ def filter(expr)
135
131
  find_all { |node| node.matches?(expr) }
136
132
  end
137
133
 
@@ -140,7 +136,7 @@ module Nokogiri
140
136
  # NodeSet.
141
137
  #
142
138
  # See Nokogiri::XML::Node#add_class for more information.
143
- def add_class name
139
+ def add_class(name)
144
140
  each do |el|
145
141
  el.add_class(name)
146
142
  end
@@ -152,7 +148,7 @@ module Nokogiri
152
148
  # NodeSet.
153
149
  #
154
150
  # See Nokogiri::XML::Node#append_class for more information.
155
- def append_class name
151
+ def append_class(name)
156
152
  each do |el|
157
153
  el.append_class(name)
158
154
  end
@@ -164,7 +160,7 @@ module Nokogiri
164
160
  # NodeSet.
165
161
  #
166
162
  # See Nokogiri::XML::Node#remove_class for more information.
167
- def remove_class name = nil
163
+ def remove_class(name = nil)
168
164
  each do |el|
169
165
  el.remove_class(name)
170
166
  end
@@ -204,31 +200,31 @@ module Nokogiri
204
200
  #
205
201
  # node_set.attr("class") { |node| node.name }
206
202
  #
207
- def attr key, value = nil, &block
203
+ def attr(key, value = nil, &block)
208
204
  unless key.is_a?(Hash) || (key && (value || block))
209
- return first ? first.attribute(key) : nil
205
+ return first&.attribute(key)
210
206
  end
211
207
 
212
208
  hash = key.is_a?(Hash) ? key : { key => value }
213
209
 
214
- hash.each do |k,v|
210
+ hash.each do |k, v|
215
211
  each do |node|
216
- node[k] = v || block.call(node)
212
+ node[k] = v || yield(node)
217
213
  end
218
214
  end
219
215
 
220
216
  self
221
217
  end
222
- alias :set :attr
223
- alias :attribute :attr
218
+ alias_method :set, :attr
219
+ alias_method :attribute, :attr
224
220
 
225
221
  ###
226
222
  # Remove the attributed named +name+ from all Node objects in the NodeSet
227
- def remove_attr name
228
- each { |el| el.delete name }
223
+ def remove_attr(name)
224
+ each { |el| el.delete(name) }
229
225
  self
230
226
  end
231
- alias remove_attribute remove_attr
227
+ alias_method :remove_attribute, :remove_attr
232
228
 
233
229
  ###
234
230
  # Iterate over each node, yielding to +block+
@@ -255,20 +251,83 @@ module Nokogiri
255
251
  #
256
252
  # See Nokogiri::XML::Node#content for more information.
257
253
  def inner_text
258
- collect(&:inner_text).join('')
254
+ collect(&:inner_text).join("")
259
255
  end
260
- alias :text :inner_text
256
+ alias_method :text, :inner_text
261
257
 
262
258
  ###
263
259
  # Get the inner html of all contained Node objects
264
- def inner_html *args
265
- collect{|j| j.inner_html(*args) }.join('')
260
+ def inner_html(*args)
261
+ collect { |j| j.inner_html(*args) }.join("")
266
262
  end
267
263
 
268
- ###
269
- # Wrap this NodeSet with +html+
270
- def wrap html
271
- map { |node| node.wrap html }
264
+ # :call-seq:
265
+ # wrap(markup) -> self
266
+ # wrap(node) -> self
267
+ #
268
+ # Wrap each member of this NodeSet with the node parsed from +markup+ or a dup of the +node+.
269
+ #
270
+ # [Parameters]
271
+ # - *markup* (String)
272
+ # Markup that is parsed, once per member of the NodeSet, and used as the wrapper. Each
273
+ # node's parent, if it exists, is used as the context node for parsing; otherwise the
274
+ # associated document is used. If the parsed fragment has multiple roots, the first root
275
+ # node is used as the wrapper.
276
+ # - *node* (Nokogiri::XML::Node)
277
+ # An element that is `#dup`ed and used as the wrapper.
278
+ #
279
+ # [Returns] +self+, to support chaining.
280
+ #
281
+ # ⚠ Note that if a +String+ is passed, the markup will be parsed <b>once per node</b> in the
282
+ # NodeSet. You can avoid this overhead in cases where you know exactly the wrapper you wish to
283
+ # use by passing a +Node+ instead.
284
+ #
285
+ # Also see Node#wrap
286
+ #
287
+ # *Example* with a +String+ argument:
288
+ #
289
+ # doc = Nokogiri::HTML5(<<~HTML)
290
+ # <html><body>
291
+ # <a>a</a>
292
+ # <a>b</a>
293
+ # <a>c</a>
294
+ # <a>d</a>
295
+ # </body></html>
296
+ # HTML
297
+ # doc.css("a").wrap("<div></div>")
298
+ # doc.to_html
299
+ # # => <html><head></head><body>
300
+ # # <div><a>a</a></div>
301
+ # # <div><a>b</a></div>
302
+ # # <div><a>c</a></div>
303
+ # # <div><a>d</a></div>
304
+ # # </body></html>
305
+ #
306
+ # *Example* with a +Node+ argument
307
+ #
308
+ # 💡 Note that this is faster than the equivalent call passing a +String+ because it avoids
309
+ # having to reparse the wrapper markup for each node.
310
+ #
311
+ # doc = Nokogiri::HTML5(<<~HTML)
312
+ # <html><body>
313
+ # <a>a</a>
314
+ # <a>b</a>
315
+ # <a>c</a>
316
+ # <a>d</a>
317
+ # </body></html>
318
+ # HTML
319
+ # doc.css("a").wrap(doc.create_element("div"))
320
+ # doc.to_html
321
+ # # => <html><head></head><body>
322
+ # # <div><a>a</a></div>
323
+ # # <div><a>b</a></div>
324
+ # # <div><a>c</a></div>
325
+ # # <div><a>d</a></div>
326
+ # # </body></html>
327
+ #
328
+ def wrap(node_or_tags)
329
+ map { |node| node.wrap(node_or_tags) }
330
+ self
272
331
  end
273
332
 
274
333
  ###
@@ -279,38 +338,43 @@ module Nokogiri
279
338
 
280
339
  ###
281
340
  # Convert this NodeSet to HTML
282
- def to_html *args
341
+ def to_html(*args)
283
342
  if Nokogiri.jruby?
284
343
  options = args.first.is_a?(Hash) ? args.shift : {}
285
- if !options[:save_with]
286
- options[:save_with] = Node::SaveOptions::NO_DECLARATION | Node::SaveOptions::NO_EMPTY_TAGS | Node::SaveOptions::AS_HTML
287
- end
344
+ options[:save_with] ||= Node::SaveOptions::DEFAULT_HTML
288
345
  args.insert(0, options)
289
346
  end
290
- map { |x| x.to_html(*args) }.join
347
+ if empty?
348
+ encoding = (args.first.is_a?(Hash) ? args.first[:encoding] : nil)
349
+ encoding ||= document.encoding
350
+ encoding.nil? ? "" : "".encode(encoding)
351
+ else
352
+ map { |x| x.to_html(*args) }.join
353
+ end
291
354
  end
292
355
 
293
356
  ###
294
357
  # Convert this NodeSet to XHTML
295
- def to_xhtml *args
358
+ def to_xhtml(*args)
296
359
  map { |x| x.to_xhtml(*args) }.join
297
360
  end
298
361
 
299
362
  ###
300
363
  # Convert this NodeSet to XML
301
- def to_xml *args
364
+ def to_xml(*args)
302
365
  map { |x| x.to_xml(*args) }.join
303
366
  end
304
367
 
305
- alias :size :length
306
- alias :to_ary :to_a
368
+ alias_method :size, :length
369
+ alias_method :to_ary, :to_a
307
370
 
308
371
  ###
309
372
  # Removes the last element from set and returns it, or +nil+ if
310
373
  # the set is empty
311
374
  def pop
312
375
  return nil if length == 0
313
- delete last
376
+
377
+ delete(last)
314
378
  end
315
379
 
316
380
  ###
@@ -318,16 +382,18 @@ module Nokogiri
318
382
  # +nil+ if the set is empty.
319
383
  def shift
320
384
  return nil if length == 0
321
- delete first
385
+
386
+ delete(first)
322
387
  end
323
388
 
324
389
  ###
325
390
  # Equality -- Two NodeSets are equal if the contain the same number
326
391
  # of elements and if each element is equal to the corresponding
327
392
  # element in the other NodeSet
328
- def == other
393
+ def ==(other)
329
394
  return false unless other.is_a?(Nokogiri::XML::NodeSet)
330
395
  return false unless length == other.length
396
+
331
397
  each_with_index do |node, i|
332
398
  return false unless node == other[i]
333
399
  end
@@ -351,7 +417,7 @@ module Nokogiri
351
417
  def reverse
352
418
  node_set = NodeSet.new(document)
353
419
  (length - 1).downto(0) do |x|
354
- node_set.push self[x]
420
+ node_set.push(self[x])
355
421
  end
356
422
  node_set
357
423
  end
@@ -359,14 +425,23 @@ module Nokogiri
359
425
  ###
360
426
  # Return a nicely formated string representation
361
427
  def inspect
362
- "[#{map(&:inspect).join ', '}]"
428
+ "[#{map(&:inspect).join(", ")}]"
363
429
  end
364
430
 
365
- alias :+ :|
431
+ alias_method :+, :|
366
432
 
367
- # @private
368
- IMPLIED_XPATH_CONTEXTS = [ './/'.freeze, 'self::'.freeze ].freeze # :nodoc:
433
+ #
434
+ # :call-seq: deconstruct() Array
435
+ #
436
+ # Returns the members of this NodeSet as an array, to use in pattern matching.
437
+ #
438
+ # ⚡ This is an experimental feature, available since v1.14.0
439
+ #
440
+ def deconstruct
441
+ to_a
442
+ end
369
443
 
444
+ IMPLIED_XPATH_CONTEXTS = [".//", "self::"].freeze # :nodoc:
370
445
  end
371
446
  end
372
447
  end
@@ -1,7 +1,19 @@
1
1
  # frozen_string_literal: true
2
+
2
3
  module Nokogiri
3
4
  module XML
5
+ # Struct representing an {XML Schema Notation}[https://www.w3.org/TR/xml/#Notations]
4
6
  class Notation < Struct.new(:name, :public_id, :system_id)
7
+ # dead comment to ensure rdoc processing
8
+
9
+ # :attr: name (String)
10
+ # The name for the element.
11
+
12
+ # :attr: public_id (String)
13
+ # The URI corresponding to the public identifier
14
+
15
+ # :attr: system_id (String,nil)
16
+ # The URI corresponding to the system identifier
5
17
  end
6
18
  end
7
19
  end
@@ -1,91 +1,175 @@
1
+ # coding: utf-8
1
2
  # frozen_string_literal: true
3
+
2
4
  module Nokogiri
3
5
  module XML
4
- ###
5
- # Parse options for passing to Nokogiri.XML or Nokogiri.HTML
6
- #
7
- # == Building combinations of parse options
8
- # You can build your own combinations of these parse options by using any of the following methods:
9
- # *Note*: All examples attempt to set the +RECOVER+ & +NOENT+ options.
10
- # [Ruby's bitwise operators] You can use the Ruby bitwise operators to set various combinations.
11
- # Nokogiri.XML('<content>Chapter 1</content', nil, nil, Nokogiri::XML::ParseOptions.new((1 << 0) | (1 << 1)))
12
- # [Method chaining] Every option has an equivalent method in lowercase. You can chain these methods together to set various combinations.
13
- # Nokogiri.XML('<content>Chapter 1</content', nil, nil, Nokogiri::XML::ParseOptions.new.recover.noent)
14
- # [Using Ruby Blocks] You can also setup parse combinations in the block passed to Nokogiri.XML or Nokogiri.HTML
15
- # Nokogiri.XML('<content>Chapter 1</content') {|config| config.recover.noent}
16
- #
17
- # == Removing particular parse options
18
- # You can also remove options from an instance of +ParseOptions+ dynamically.
19
- # Every option has an equivalent <code>no{option}</code> method in lowercase. You can call these methods on an instance of +ParseOptions+ to remove the option.
20
- # Note that this is not available for +STRICT+.
21
- #
22
- # # Setting the RECOVER & NOENT options...
23
- # options = Nokogiri::XML::ParseOptions.new.recover.noent
24
- # # later...
25
- # options.norecover # Removes the Nokogiri::XML::ParseOptions::RECOVER option
26
- # options.nonoent # Removes the Nokogiri::XML::ParseOptions::NOENT option
6
+ # Options that control the parsing behavior for XML::Document, XML::DocumentFragment,
7
+ # HTML4::Document, HTML4::DocumentFragment, XSLT::Stylesheet, and XML::Schema.
8
+ #
9
+ # These options directly expose libxml2's parse options, which are all boolean in the sense that
10
+ # an option is "on" or "off".
11
+ #
12
+ # 💡 Note that HTML5 parsing has a separate, orthogonal set of options due to the nature of the
13
+ # HTML5 specification. See Nokogiri::HTML5.
14
+ #
15
+ # ⚠ Not all parse options are supported on JRuby. Nokogiri will attempt to invoke the equivalent
16
+ # behavior in Xerces/NekoHTML on JRuby when it's possible.
17
+ #
18
+ # == Setting and unsetting parse options
19
+ #
20
+ # You can build your own combinations of parse options by using any of the following methods:
21
+ #
22
+ # [ParseOptions method chaining]
23
+ #
24
+ # Every option has an equivalent method in lowercase. You can chain these methods together to
25
+ # set various combinations.
26
+ #
27
+ # # Set the HUGE & PEDANTIC options
28
+ # po = Nokogiri::XML::ParseOptions.new.huge.pedantic
29
+ # doc = Nokogiri::XML::Document.parse(xml, nil, nil, po)
30
+ #
31
+ # Every option has an equivalent <code>no{option}</code> method in lowercase. You can call these
32
+ # methods on an instance of ParseOptions to unset the option.
33
+ #
34
+ # # Set the HUGE & PEDANTIC options
35
+ # po = Nokogiri::XML::ParseOptions.new.huge.pedantic
36
+ #
37
+ # # later we want to modify the options
38
+ # po.nohuge # Unset the HUGE option
39
+ # po.nopedantic # Unset the PEDANTIC option
40
+ #
41
+ # 💡 Note that some options begin with "no" leading to the logical but perhaps unintuitive
42
+ # double negative:
43
+ #
44
+ # po.nocdata # Set the NOCDATA parse option
45
+ # po.nonocdata # Unset the NOCDATA parse option
46
+ #
47
+ # 💡 Note that negation is not available for STRICT, which is itself a negation of all other
48
+ # features.
49
+ #
50
+ #
51
+ # [Using Ruby Blocks]
52
+ #
53
+ # Most parsing methods will accept a block for configuration of parse options, and we
54
+ # recommend chaining the setter methods:
55
+ #
56
+ # doc = Nokogiri::XML::Document.parse(xml) { |config| config.huge.pedantic }
57
+ #
58
+ #
59
+ # [ParseOptions constants]
60
+ #
61
+ # You can also use the constants declared under Nokogiri::XML::ParseOptions to set various
62
+ # combinations. They are bits in a bitmask, and so can be combined with bitwise operators:
63
+ #
64
+ # po = Nokogiri::XML::ParseOptions.new(Nokogiri::XML::ParseOptions::HUGE | Nokogiri::XML::ParseOptions::PEDANTIC)
65
+ # doc = Nokogiri::XML::Document.parse(xml, nil, nil, po)
27
66
  #
28
67
  class ParseOptions
29
68
  # Strict parsing
30
69
  STRICT = 0
31
- # Recover from errors
70
+
71
+ # Recover from errors. On by default for XML::Document, XML::DocumentFragment,
72
+ # HTML4::Document, HTML4::DocumentFragment, XSLT::Stylesheet, and XML::Schema.
32
73
  RECOVER = 1 << 0
33
- # Substitute entities
74
+
75
+ # Substitute entities. Off by default.
76
+ #
77
+ # ⚠ This option enables entity substitution, contrary to what the name implies.
78
+ #
79
+ # ⚠ <b>It is UNSAFE to set this option</b> when parsing untrusted documents.
34
80
  NOENT = 1 << 1
35
- # Load external subsets
81
+
82
+ # Load external subsets. On by default for XSLT::Stylesheet.
83
+ #
84
+ # ⚠ <b>It is UNSAFE to set this option</b> when parsing untrusted documents.
36
85
  DTDLOAD = 1 << 2
37
- # Default DTD attributes
86
+
87
+ # Default DTD attributes. On by default for XSLT::Stylesheet.
38
88
  DTDATTR = 1 << 3
39
- # validate with the DTD
89
+
90
+ # Validate with the DTD. Off by default.
40
91
  DTDVALID = 1 << 4
41
- # suppress error reports
92
+
93
+ # Suppress error reports. On by default for HTML4::Document and HTML4::DocumentFragment
42
94
  NOERROR = 1 << 5
43
- # suppress warning reports
95
+
96
+ # Suppress warning reports. On by default for HTML4::Document and HTML4::DocumentFragment
44
97
  NOWARNING = 1 << 6
45
- # pedantic error reporting
98
+
99
+ # Enable pedantic error reporting. Off by default.
46
100
  PEDANTIC = 1 << 7
47
- # remove blank nodes
101
+
102
+ # Remove blank nodes. Off by default.
48
103
  NOBLANKS = 1 << 8
49
- # use the SAX1 interface internally
104
+
105
+ # Use the SAX1 interface internally. Off by default.
50
106
  SAX1 = 1 << 9
51
- # Implement XInclude substitution
107
+
108
+ # Implement XInclude substitution. Off by default.
52
109
  XINCLUDE = 1 << 10
53
- # Forbid network access. Recommended for dealing with untrusted documents.
110
+
111
+ # Forbid network access. On by default for XML::Document, XML::DocumentFragment,
112
+ # HTML4::Document, HTML4::DocumentFragment, XSLT::Stylesheet, and XML::Schema.
113
+ #
114
+ # ⚠ <b>It is UNSAFE to unset this option</b> when parsing untrusted documents.
54
115
  NONET = 1 << 11
55
- # Do not reuse the context dictionary
116
+
117
+ # Do not reuse the context dictionary. Off by default.
56
118
  NODICT = 1 << 12
57
- # remove redundant namespaces declarations
119
+
120
+ # Remove redundant namespaces declarations. Off by default.
58
121
  NSCLEAN = 1 << 13
59
- # merge CDATA as text nodes
122
+
123
+ # Merge CDATA as text nodes. On by default for XSLT::Stylesheet.
60
124
  NOCDATA = 1 << 14
61
- # do not generate XINCLUDE START/END nodes
125
+
126
+ # Do not generate XInclude START/END nodes. Off by default.
62
127
  NOXINCNODE = 1 << 15
63
- # compact small text nodes; no modification of the tree allowed afterwards (will possibly crash if you try to modify the tree)
128
+
129
+ # Compact small text nodes. Off by default.
130
+ #
131
+ # ⚠ No modification of the DOM tree is allowed after parsing. libxml2 may crash if you try to
132
+ # modify the tree.
64
133
  COMPACT = 1 << 16
65
- # parse using XML-1.0 before update 5
134
+
135
+ # Parse using XML-1.0 before update 5. Off by default
66
136
  OLD10 = 1 << 17
67
- # do not fixup XINCLUDE xml:base uris
137
+
138
+ # Do not fixup XInclude xml:base uris. Off by default
68
139
  NOBASEFIX = 1 << 18
69
- # relax any hardcoded limit from the parser
140
+
141
+ # Relax any hardcoded limit from the parser. Off by default.
142
+ #
143
+ # ⚠ There may be a performance penalty when this option is set.
70
144
  HUGE = 1 << 19
71
145
 
72
- # the default options used for parsing XML documents
73
- DEFAULT_XML = RECOVER | NONET
74
- # the default options used for parsing XSLT stylesheets
75
- DEFAULT_XSLT = RECOVER | NONET | NOENT | DTDLOAD | DTDATTR | NOCDATA
76
- # the default options used for parsing HTML documents
77
- DEFAULT_HTML = RECOVER | NOERROR | NOWARNING | NONET
78
- # the default options used for parsing XML schemas
79
- DEFAULT_SCHEMA = NONET
146
+ # Support line numbers up to <code>long int</code> (default is a <code>short int</code>). On
147
+ # by default for for XML::Document, XML::DocumentFragment, HTML4::Document,
148
+ # HTML4::DocumentFragment, XSLT::Stylesheet, and XML::Schema.
149
+ BIG_LINES = 1 << 22
150
+
151
+ # The options mask used by default for parsing XML::Document and XML::DocumentFragment
152
+ DEFAULT_XML = RECOVER | NONET | BIG_LINES
153
+
154
+ # The options mask used by default used for parsing XSLT::Stylesheet
155
+ DEFAULT_XSLT = RECOVER | NONET | NOENT | DTDLOAD | DTDATTR | NOCDATA | BIG_LINES
156
+
157
+ # The options mask used by default used for parsing HTML4::Document and HTML4::DocumentFragment
158
+ DEFAULT_HTML = RECOVER | NOERROR | NOWARNING | NONET | BIG_LINES
159
+
160
+ # The options mask used by default used for parsing XML::Schema
161
+ DEFAULT_SCHEMA = NONET | BIG_LINES
80
162
 
81
163
  attr_accessor :options
82
- def initialize options = STRICT
164
+
165
+ def initialize(options = STRICT)
83
166
  @options = options
84
167
  end
85
168
 
86
169
  constants.each do |constant|
87
170
  next if constant.to_sym == :STRICT
88
- class_eval %{
171
+
172
+ class_eval <<~RUBY, __FILE__, __LINE__ + 1
89
173
  def #{constant.downcase}
90
174
  @options |= #{constant}
91
175
  self
@@ -99,7 +183,7 @@ module Nokogiri
99
183
  def #{constant.downcase}?
100
184
  #{constant} & @options == #{constant}
101
185
  end
102
- }
186
+ RUBY
103
187
  end
104
188
 
105
189
  def strict
@@ -115,14 +199,14 @@ module Nokogiri
115
199
  other.to_i == to_i
116
200
  end
117
201
 
118
- alias :to_i :options
202
+ alias_method :to_i, :options
119
203
 
120
204
  def inspect
121
205
  options = []
122
206
  self.class.constants.each do |k|
123
207
  options << k.downcase if send(:"#{k.downcase}?")
124
208
  end
125
- super.sub(/>$/, " " + options.join(', ') + ">")
209
+ super.sub(/>$/, " " + options.join(", ") + ">")
126
210
  end
127
211
  end
128
212
  end