nokogiri 1.18.0.rc1-x86_64-linux-gnu

Sign up to get free protection for your applications and to get access to all the features.
Files changed (203) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile +38 -0
  3. data/LICENSE-DEPENDENCIES.md +2224 -0
  4. data/LICENSE.md +9 -0
  5. data/README.md +293 -0
  6. data/bin/nokogiri +131 -0
  7. data/dependencies.yml +42 -0
  8. data/ext/nokogiri/depend +38 -0
  9. data/ext/nokogiri/extconf.rb +1173 -0
  10. data/ext/nokogiri/gumbo.c +610 -0
  11. data/ext/nokogiri/html4_document.c +171 -0
  12. data/ext/nokogiri/html4_element_description.c +299 -0
  13. data/ext/nokogiri/html4_entity_lookup.c +37 -0
  14. data/ext/nokogiri/html4_sax_parser.c +40 -0
  15. data/ext/nokogiri/html4_sax_parser_context.c +98 -0
  16. data/ext/nokogiri/html4_sax_push_parser.c +96 -0
  17. data/ext/nokogiri/include/libexslt/exslt.h +108 -0
  18. data/ext/nokogiri/include/libexslt/exsltconfig.h +70 -0
  19. data/ext/nokogiri/include/libexslt/exsltexports.h +63 -0
  20. data/ext/nokogiri/include/libxml2/libxml/HTMLparser.h +336 -0
  21. data/ext/nokogiri/include/libxml2/libxml/HTMLtree.h +147 -0
  22. data/ext/nokogiri/include/libxml2/libxml/SAX.h +202 -0
  23. data/ext/nokogiri/include/libxml2/libxml/SAX2.h +171 -0
  24. data/ext/nokogiri/include/libxml2/libxml/c14n.h +115 -0
  25. data/ext/nokogiri/include/libxml2/libxml/catalog.h +182 -0
  26. data/ext/nokogiri/include/libxml2/libxml/chvalid.h +230 -0
  27. data/ext/nokogiri/include/libxml2/libxml/debugXML.h +217 -0
  28. data/ext/nokogiri/include/libxml2/libxml/dict.h +82 -0
  29. data/ext/nokogiri/include/libxml2/libxml/encoding.h +244 -0
  30. data/ext/nokogiri/include/libxml2/libxml/entities.h +166 -0
  31. data/ext/nokogiri/include/libxml2/libxml/globals.h +41 -0
  32. data/ext/nokogiri/include/libxml2/libxml/hash.h +251 -0
  33. data/ext/nokogiri/include/libxml2/libxml/list.h +137 -0
  34. data/ext/nokogiri/include/libxml2/libxml/nanoftp.h +186 -0
  35. data/ext/nokogiri/include/libxml2/libxml/nanohttp.h +98 -0
  36. data/ext/nokogiri/include/libxml2/libxml/parser.h +1390 -0
  37. data/ext/nokogiri/include/libxml2/libxml/parserInternals.h +671 -0
  38. data/ext/nokogiri/include/libxml2/libxml/pattern.h +106 -0
  39. data/ext/nokogiri/include/libxml2/libxml/relaxng.h +219 -0
  40. data/ext/nokogiri/include/libxml2/libxml/schemasInternals.h +959 -0
  41. data/ext/nokogiri/include/libxml2/libxml/schematron.h +143 -0
  42. data/ext/nokogiri/include/libxml2/libxml/threads.h +87 -0
  43. data/ext/nokogiri/include/libxml2/libxml/tree.h +1382 -0
  44. data/ext/nokogiri/include/libxml2/libxml/uri.h +106 -0
  45. data/ext/nokogiri/include/libxml2/libxml/valid.h +477 -0
  46. data/ext/nokogiri/include/libxml2/libxml/xinclude.h +136 -0
  47. data/ext/nokogiri/include/libxml2/libxml/xlink.h +189 -0
  48. data/ext/nokogiri/include/libxml2/libxml/xmlIO.h +438 -0
  49. data/ext/nokogiri/include/libxml2/libxml/xmlautomata.h +146 -0
  50. data/ext/nokogiri/include/libxml2/libxml/xmlerror.h +962 -0
  51. data/ext/nokogiri/include/libxml2/libxml/xmlexports.h +146 -0
  52. data/ext/nokogiri/include/libxml2/libxml/xmlmemory.h +188 -0
  53. data/ext/nokogiri/include/libxml2/libxml/xmlmodule.h +57 -0
  54. data/ext/nokogiri/include/libxml2/libxml/xmlreader.h +436 -0
  55. data/ext/nokogiri/include/libxml2/libxml/xmlregexp.h +215 -0
  56. data/ext/nokogiri/include/libxml2/libxml/xmlsave.h +102 -0
  57. data/ext/nokogiri/include/libxml2/libxml/xmlschemas.h +249 -0
  58. data/ext/nokogiri/include/libxml2/libxml/xmlschemastypes.h +152 -0
  59. data/ext/nokogiri/include/libxml2/libxml/xmlstring.h +140 -0
  60. data/ext/nokogiri/include/libxml2/libxml/xmlunicode.h +366 -0
  61. data/ext/nokogiri/include/libxml2/libxml/xmlversion.h +347 -0
  62. data/ext/nokogiri/include/libxml2/libxml/xmlwriter.h +489 -0
  63. data/ext/nokogiri/include/libxml2/libxml/xpath.h +579 -0
  64. data/ext/nokogiri/include/libxml2/libxml/xpathInternals.h +633 -0
  65. data/ext/nokogiri/include/libxml2/libxml/xpointer.h +138 -0
  66. data/ext/nokogiri/include/libxslt/attributes.h +39 -0
  67. data/ext/nokogiri/include/libxslt/documents.h +93 -0
  68. data/ext/nokogiri/include/libxslt/extensions.h +262 -0
  69. data/ext/nokogiri/include/libxslt/extra.h +72 -0
  70. data/ext/nokogiri/include/libxslt/functions.h +78 -0
  71. data/ext/nokogiri/include/libxslt/imports.h +75 -0
  72. data/ext/nokogiri/include/libxslt/keys.h +53 -0
  73. data/ext/nokogiri/include/libxslt/namespaces.h +68 -0
  74. data/ext/nokogiri/include/libxslt/numbersInternals.h +73 -0
  75. data/ext/nokogiri/include/libxslt/pattern.h +84 -0
  76. data/ext/nokogiri/include/libxslt/preproc.h +43 -0
  77. data/ext/nokogiri/include/libxslt/security.h +104 -0
  78. data/ext/nokogiri/include/libxslt/templates.h +77 -0
  79. data/ext/nokogiri/include/libxslt/transform.h +207 -0
  80. data/ext/nokogiri/include/libxslt/variables.h +118 -0
  81. data/ext/nokogiri/include/libxslt/xslt.h +110 -0
  82. data/ext/nokogiri/include/libxslt/xsltInternals.h +1995 -0
  83. data/ext/nokogiri/include/libxslt/xsltconfig.h +146 -0
  84. data/ext/nokogiri/include/libxslt/xsltexports.h +64 -0
  85. data/ext/nokogiri/include/libxslt/xsltlocale.h +44 -0
  86. data/ext/nokogiri/include/libxslt/xsltutils.h +343 -0
  87. data/ext/nokogiri/libxml2_polyfill.c +114 -0
  88. data/ext/nokogiri/nokogiri.c +294 -0
  89. data/ext/nokogiri/nokogiri.h +238 -0
  90. data/ext/nokogiri/test_global_handlers.c +40 -0
  91. data/ext/nokogiri/xml_attr.c +103 -0
  92. data/ext/nokogiri/xml_attribute_decl.c +70 -0
  93. data/ext/nokogiri/xml_cdata.c +62 -0
  94. data/ext/nokogiri/xml_comment.c +57 -0
  95. data/ext/nokogiri/xml_document.c +784 -0
  96. data/ext/nokogiri/xml_document_fragment.c +29 -0
  97. data/ext/nokogiri/xml_dtd.c +208 -0
  98. data/ext/nokogiri/xml_element_content.c +131 -0
  99. data/ext/nokogiri/xml_element_decl.c +69 -0
  100. data/ext/nokogiri/xml_encoding_handler.c +112 -0
  101. data/ext/nokogiri/xml_entity_decl.c +112 -0
  102. data/ext/nokogiri/xml_entity_reference.c +50 -0
  103. data/ext/nokogiri/xml_namespace.c +181 -0
  104. data/ext/nokogiri/xml_node.c +2459 -0
  105. data/ext/nokogiri/xml_node_set.c +518 -0
  106. data/ext/nokogiri/xml_processing_instruction.c +54 -0
  107. data/ext/nokogiri/xml_reader.c +777 -0
  108. data/ext/nokogiri/xml_relax_ng.c +149 -0
  109. data/ext/nokogiri/xml_sax_parser.c +403 -0
  110. data/ext/nokogiri/xml_sax_parser_context.c +390 -0
  111. data/ext/nokogiri/xml_sax_push_parser.c +206 -0
  112. data/ext/nokogiri/xml_schema.c +226 -0
  113. data/ext/nokogiri/xml_syntax_error.c +93 -0
  114. data/ext/nokogiri/xml_text.c +59 -0
  115. data/ext/nokogiri/xml_xpath_context.c +502 -0
  116. data/ext/nokogiri/xslt_stylesheet.c +421 -0
  117. data/gumbo-parser/CHANGES.md +63 -0
  118. data/gumbo-parser/Makefile +129 -0
  119. data/gumbo-parser/THANKS +27 -0
  120. data/lib/nokogiri/3.1/nokogiri.so +0 -0
  121. data/lib/nokogiri/3.2/nokogiri.so +0 -0
  122. data/lib/nokogiri/3.3/nokogiri.so +0 -0
  123. data/lib/nokogiri/3.4/nokogiri.so +0 -0
  124. data/lib/nokogiri/class_resolver.rb +67 -0
  125. data/lib/nokogiri/css/node.rb +58 -0
  126. data/lib/nokogiri/css/parser.rb +772 -0
  127. data/lib/nokogiri/css/parser.y +277 -0
  128. data/lib/nokogiri/css/parser_extras.rb +36 -0
  129. data/lib/nokogiri/css/selector_cache.rb +38 -0
  130. data/lib/nokogiri/css/syntax_error.rb +9 -0
  131. data/lib/nokogiri/css/tokenizer.rb +155 -0
  132. data/lib/nokogiri/css/tokenizer.rex +57 -0
  133. data/lib/nokogiri/css/xpath_visitor.rb +375 -0
  134. data/lib/nokogiri/css.rb +132 -0
  135. data/lib/nokogiri/decorators/slop.rb +42 -0
  136. data/lib/nokogiri/encoding_handler.rb +57 -0
  137. data/lib/nokogiri/extension.rb +32 -0
  138. data/lib/nokogiri/gumbo.rb +15 -0
  139. data/lib/nokogiri/html.rb +48 -0
  140. data/lib/nokogiri/html4/builder.rb +37 -0
  141. data/lib/nokogiri/html4/document.rb +235 -0
  142. data/lib/nokogiri/html4/document_fragment.rb +166 -0
  143. data/lib/nokogiri/html4/element_description.rb +25 -0
  144. data/lib/nokogiri/html4/element_description_defaults.rb +2040 -0
  145. data/lib/nokogiri/html4/encoding_reader.rb +121 -0
  146. data/lib/nokogiri/html4/entity_lookup.rb +15 -0
  147. data/lib/nokogiri/html4/sax/parser.rb +48 -0
  148. data/lib/nokogiri/html4/sax/parser_context.rb +15 -0
  149. data/lib/nokogiri/html4/sax/push_parser.rb +37 -0
  150. data/lib/nokogiri/html4.rb +42 -0
  151. data/lib/nokogiri/html5/builder.rb +40 -0
  152. data/lib/nokogiri/html5/document.rb +199 -0
  153. data/lib/nokogiri/html5/document_fragment.rb +200 -0
  154. data/lib/nokogiri/html5/node.rb +103 -0
  155. data/lib/nokogiri/html5.rb +368 -0
  156. data/lib/nokogiri/jruby/dependencies.rb +3 -0
  157. data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
  158. data/lib/nokogiri/syntax_error.rb +6 -0
  159. data/lib/nokogiri/version/constant.rb +6 -0
  160. data/lib/nokogiri/version/info.rb +224 -0
  161. data/lib/nokogiri/version.rb +4 -0
  162. data/lib/nokogiri/xml/attr.rb +66 -0
  163. data/lib/nokogiri/xml/attribute_decl.rb +22 -0
  164. data/lib/nokogiri/xml/builder.rb +494 -0
  165. data/lib/nokogiri/xml/cdata.rb +13 -0
  166. data/lib/nokogiri/xml/character_data.rb +9 -0
  167. data/lib/nokogiri/xml/document.rb +514 -0
  168. data/lib/nokogiri/xml/document_fragment.rb +276 -0
  169. data/lib/nokogiri/xml/dtd.rb +34 -0
  170. data/lib/nokogiri/xml/element_content.rb +46 -0
  171. data/lib/nokogiri/xml/element_decl.rb +17 -0
  172. data/lib/nokogiri/xml/entity_decl.rb +23 -0
  173. data/lib/nokogiri/xml/entity_reference.rb +20 -0
  174. data/lib/nokogiri/xml/namespace.rb +57 -0
  175. data/lib/nokogiri/xml/node/save_options.rb +76 -0
  176. data/lib/nokogiri/xml/node.rb +1650 -0
  177. data/lib/nokogiri/xml/node_set.rb +449 -0
  178. data/lib/nokogiri/xml/notation.rb +19 -0
  179. data/lib/nokogiri/xml/parse_options.rb +213 -0
  180. data/lib/nokogiri/xml/pp/character_data.rb +21 -0
  181. data/lib/nokogiri/xml/pp/node.rb +73 -0
  182. data/lib/nokogiri/xml/pp.rb +4 -0
  183. data/lib/nokogiri/xml/processing_instruction.rb +11 -0
  184. data/lib/nokogiri/xml/reader.rb +139 -0
  185. data/lib/nokogiri/xml/relax_ng.rb +75 -0
  186. data/lib/nokogiri/xml/sax/document.rb +258 -0
  187. data/lib/nokogiri/xml/sax/parser.rb +199 -0
  188. data/lib/nokogiri/xml/sax/parser_context.rb +129 -0
  189. data/lib/nokogiri/xml/sax/push_parser.rb +64 -0
  190. data/lib/nokogiri/xml/sax.rb +54 -0
  191. data/lib/nokogiri/xml/schema.rb +140 -0
  192. data/lib/nokogiri/xml/searchable.rb +297 -0
  193. data/lib/nokogiri/xml/syntax_error.rb +94 -0
  194. data/lib/nokogiri/xml/text.rb +11 -0
  195. data/lib/nokogiri/xml/xpath/syntax_error.rb +13 -0
  196. data/lib/nokogiri/xml/xpath.rb +21 -0
  197. data/lib/nokogiri/xml/xpath_context.rb +49 -0
  198. data/lib/nokogiri/xml.rb +65 -0
  199. data/lib/nokogiri/xslt/stylesheet.rb +49 -0
  200. data/lib/nokogiri/xslt.rb +129 -0
  201. data/lib/nokogiri.rb +128 -0
  202. data/lib/xsd/xmlparser/nokogiri.rb +105 -0
  203. metadata +324 -0
@@ -0,0 +1,449 @@
1
+ # coding: utf-8
2
+ # frozen_string_literal: true
3
+
4
+ module Nokogiri
5
+ module XML
6
+ ####
7
+ # A NodeSet is an Enumerable that contains a list of Nokogiri::XML::Node objects.
8
+ #
9
+ # Typically a NodeSet is returned as a result of searching a Document via
10
+ # Nokogiri::XML::Searchable#css or Nokogiri::XML::Searchable#xpath.
11
+ #
12
+ # Note that the `#dup` and `#clone` methods perform shallow copies; these methods do not copy
13
+ # the Nodes contained in the NodeSet (similar to how Array and other Enumerable classes work).
14
+ class NodeSet
15
+ include Nokogiri::XML::Searchable
16
+ include Enumerable
17
+
18
+ # The Document this NodeSet is associated with
19
+ attr_accessor :document
20
+
21
+ # Create a NodeSet with +document+ defaulting to +list+
22
+ def initialize(document, list = [])
23
+ @document = document
24
+ document.decorate(self)
25
+ list.each { |x| self << x }
26
+ yield self if block_given?
27
+ end
28
+
29
+ ###
30
+ # Get the first element of the NodeSet.
31
+ def first(n = nil)
32
+ return self[0] unless n
33
+
34
+ list = []
35
+ [n, length].min.times { |i| list << self[i] }
36
+ list
37
+ end
38
+
39
+ ###
40
+ # Get the last element of the NodeSet.
41
+ def last
42
+ self[-1]
43
+ end
44
+
45
+ ###
46
+ # Is this NodeSet empty?
47
+ def empty?
48
+ length == 0
49
+ end
50
+
51
+ ###
52
+ # Returns the index of the first node in self that is == to +node+ or meets the given block. Returns nil if no match is found.
53
+ def index(node = nil)
54
+ if node
55
+ warn("given block not used") if block_given?
56
+ each_with_index { |member, j| return j if member == node }
57
+ elsif block_given?
58
+ each_with_index { |member, j| return j if yield(member) }
59
+ end
60
+ nil
61
+ end
62
+
63
+ ###
64
+ # Insert +datum+ before the first Node in this NodeSet
65
+ def before(datum)
66
+ first.before(datum)
67
+ end
68
+
69
+ ###
70
+ # Insert +datum+ after the last Node in this NodeSet
71
+ def after(datum)
72
+ last.after(datum)
73
+ end
74
+
75
+ alias_method :<<, :push
76
+ alias_method :remove, :unlink
77
+
78
+ ###
79
+ # call-seq: css *rules, [namespace-bindings, custom-pseudo-class]
80
+ #
81
+ # Search this node set for CSS +rules+. +rules+ must be one or more CSS
82
+ # selectors. For example:
83
+ #
84
+ # For more information see Nokogiri::XML::Searchable#css
85
+ def css(*args)
86
+ rules, handler, ns, _ = extract_params(args)
87
+ paths = css_rules_to_xpath(rules, ns)
88
+
89
+ inject(NodeSet.new(document)) do |set, node|
90
+ set + xpath_internal(node, paths, handler, ns, nil)
91
+ end
92
+ end
93
+
94
+ ###
95
+ # call-seq: xpath *paths, [namespace-bindings, variable-bindings, custom-handler-class]
96
+ #
97
+ # Search this node set for XPath +paths+. +paths+ must be one or more XPath
98
+ # queries.
99
+ #
100
+ # For more information see Nokogiri::XML::Searchable#xpath
101
+ def xpath(*args)
102
+ paths, handler, ns, binds = extract_params(args)
103
+
104
+ inject(NodeSet.new(document)) do |set, node|
105
+ set + xpath_internal(node, paths, handler, ns, binds)
106
+ end
107
+ end
108
+
109
+ ###
110
+ # call-seq: search *paths, [namespace-bindings, xpath-variable-bindings, custom-handler-class]
111
+ #
112
+ # Search this object for +paths+, and return only the first
113
+ # result. +paths+ must be one or more XPath or CSS queries.
114
+ #
115
+ # See Searchable#search for more information.
116
+ #
117
+ # Or, if passed an integer, index into the NodeSet:
118
+ #
119
+ # node_set.at(3) # same as node_set[3]
120
+ #
121
+ def at(*args)
122
+ if args.length == 1 && args.first.is_a?(Numeric)
123
+ return self[args.first]
124
+ end
125
+
126
+ super
127
+ end
128
+ alias_method :%, :at
129
+
130
+ ###
131
+ # Filter this list for nodes that match +expr+
132
+ def filter(expr)
133
+ find_all { |node| node.matches?(expr) }
134
+ end
135
+
136
+ ###
137
+ # Add the class attribute +name+ to all Node objects in the
138
+ # NodeSet.
139
+ #
140
+ # See Nokogiri::XML::Node#add_class for more information.
141
+ def add_class(name)
142
+ each do |el|
143
+ el.add_class(name)
144
+ end
145
+ self
146
+ end
147
+
148
+ ###
149
+ # Append the class attribute +name+ to all Node objects in the
150
+ # NodeSet.
151
+ #
152
+ # See Nokogiri::XML::Node#append_class for more information.
153
+ def append_class(name)
154
+ each do |el|
155
+ el.append_class(name)
156
+ end
157
+ self
158
+ end
159
+
160
+ ###
161
+ # Remove the class attribute +name+ from all Node objects in the
162
+ # NodeSet.
163
+ #
164
+ # See Nokogiri::XML::Node#remove_class for more information.
165
+ def remove_class(name = nil)
166
+ each do |el|
167
+ el.remove_class(name)
168
+ end
169
+ self
170
+ end
171
+
172
+ ###
173
+ # Set attributes on each Node in the NodeSet, or get an
174
+ # attribute from the first Node in the NodeSet.
175
+ #
176
+ # To get an attribute from the first Node in a NodeSet:
177
+ #
178
+ # node_set.attr("href") # => "https://www.nokogiri.org"
179
+ #
180
+ # Note that an empty NodeSet will return nil when +#attr+ is called as a getter.
181
+ #
182
+ # To set an attribute on each node, +key+ can either be an
183
+ # attribute name, or a Hash of attribute names and values. When
184
+ # called as a setter, +#attr+ returns the NodeSet.
185
+ #
186
+ # If +key+ is an attribute name, then either +value+ or +block+
187
+ # must be passed.
188
+ #
189
+ # If +key+ is a Hash then attributes will be set for each
190
+ # key/value pair:
191
+ #
192
+ # node_set.attr("href" => "https://www.nokogiri.org", "class" => "member")
193
+ #
194
+ # If +value+ is passed, it will be used as the attribute value
195
+ # for all nodes:
196
+ #
197
+ # node_set.attr("href", "https://www.nokogiri.org")
198
+ #
199
+ # If +block+ is passed, it will be called on each Node object in
200
+ # the NodeSet and the return value used as the attribute value
201
+ # for that node:
202
+ #
203
+ # node_set.attr("class") { |node| node.name }
204
+ #
205
+ def attr(key, value = nil, &block)
206
+ unless key.is_a?(Hash) || (key && (value || block))
207
+ return first&.attribute(key)
208
+ end
209
+
210
+ hash = key.is_a?(Hash) ? key : { key => value }
211
+
212
+ hash.each do |k, v|
213
+ each do |node|
214
+ node[k] = v || yield(node)
215
+ end
216
+ end
217
+
218
+ self
219
+ end
220
+ alias_method :set, :attr
221
+ alias_method :attribute, :attr
222
+
223
+ ###
224
+ # Remove the attributed named +name+ from all Node objects in the NodeSet
225
+ def remove_attr(name)
226
+ each { |el| el.delete(name) }
227
+ self
228
+ end
229
+ alias_method :remove_attribute, :remove_attr
230
+
231
+ ###
232
+ # Iterate over each node, yielding to +block+
233
+ def each
234
+ return to_enum unless block_given?
235
+
236
+ 0.upto(length - 1) do |x|
237
+ yield self[x]
238
+ end
239
+ self
240
+ end
241
+
242
+ ###
243
+ # Get the inner text of all contained Node objects
244
+ #
245
+ # Note: This joins the text of all Node objects in the NodeSet:
246
+ #
247
+ # doc = Nokogiri::XML('<xml><a><d>foo</d><d>bar</d></a></xml>')
248
+ # doc.css('d').text # => "foobar"
249
+ #
250
+ # Instead, if you want to return the text of all nodes in the NodeSet:
251
+ #
252
+ # doc.css('d').map(&:text) # => ["foo", "bar"]
253
+ #
254
+ # See Nokogiri::XML::Node#content for more information.
255
+ def inner_text
256
+ collect(&:inner_text).join("")
257
+ end
258
+ alias_method :text, :inner_text
259
+
260
+ ###
261
+ # Get the inner html of all contained Node objects
262
+ def inner_html(*args)
263
+ collect { |j| j.inner_html(*args) }.join("")
264
+ end
265
+
266
+ # :call-seq:
267
+ # wrap(markup) -> self
268
+ # wrap(node) -> self
269
+ #
270
+ # Wrap each member of this NodeSet with the node parsed from +markup+ or a dup of the +node+.
271
+ #
272
+ # [Parameters]
273
+ # - *markup* (String)
274
+ # Markup that is parsed, once per member of the NodeSet, and used as the wrapper. Each
275
+ # node's parent, if it exists, is used as the context node for parsing; otherwise the
276
+ # associated document is used. If the parsed fragment has multiple roots, the first root
277
+ # node is used as the wrapper.
278
+ # - *node* (Nokogiri::XML::Node)
279
+ # An element that is `#dup`ed and used as the wrapper.
280
+ #
281
+ # [Returns] +self+, to support chaining.
282
+ #
283
+ # ⚠ Note that if a +String+ is passed, the markup will be parsed <b>once per node</b> in the
284
+ # NodeSet. You can avoid this overhead in cases where you know exactly the wrapper you wish to
285
+ # use by passing a +Node+ instead.
286
+ #
287
+ # Also see Node#wrap
288
+ #
289
+ # *Example* with a +String+ argument:
290
+ #
291
+ # doc = Nokogiri::HTML5(<<~HTML)
292
+ # <html><body>
293
+ # <a>a</a>
294
+ # <a>b</a>
295
+ # <a>c</a>
296
+ # <a>d</a>
297
+ # </body></html>
298
+ # HTML
299
+ # doc.css("a").wrap("<div></div>")
300
+ # doc.to_html
301
+ # # => <html><head></head><body>
302
+ # # <div><a>a</a></div>
303
+ # # <div><a>b</a></div>
304
+ # # <div><a>c</a></div>
305
+ # # <div><a>d</a></div>
306
+ # # </body></html>
307
+ #
308
+ # *Example* with a +Node+ argument
309
+ #
310
+ # 💡 Note that this is faster than the equivalent call passing a +String+ because it avoids
311
+ # having to reparse the wrapper markup for each node.
312
+ #
313
+ # doc = Nokogiri::HTML5(<<~HTML)
314
+ # <html><body>
315
+ # <a>a</a>
316
+ # <a>b</a>
317
+ # <a>c</a>
318
+ # <a>d</a>
319
+ # </body></html>
320
+ # HTML
321
+ # doc.css("a").wrap(doc.create_element("div"))
322
+ # doc.to_html
323
+ # # => <html><head></head><body>
324
+ # # <div><a>a</a></div>
325
+ # # <div><a>b</a></div>
326
+ # # <div><a>c</a></div>
327
+ # # <div><a>d</a></div>
328
+ # # </body></html>
329
+ #
330
+ def wrap(node_or_tags)
331
+ map { |node| node.wrap(node_or_tags) }
332
+ self
333
+ end
334
+
335
+ ###
336
+ # Convert this NodeSet to a string.
337
+ def to_s
338
+ map(&:to_s).join
339
+ end
340
+
341
+ ###
342
+ # Convert this NodeSet to HTML
343
+ def to_html(*args)
344
+ if Nokogiri.jruby?
345
+ options = args.first.is_a?(Hash) ? args.shift : {}
346
+ options[:save_with] ||= Node::SaveOptions::DEFAULT_HTML
347
+ args.insert(0, options)
348
+ end
349
+ if empty?
350
+ encoding = (args.first.is_a?(Hash) ? args.first[:encoding] : nil)
351
+ encoding ||= document.encoding
352
+ encoding.nil? ? "" : "".encode(encoding)
353
+ else
354
+ map { |x| x.to_html(*args) }.join
355
+ end
356
+ end
357
+
358
+ ###
359
+ # Convert this NodeSet to XHTML
360
+ def to_xhtml(*args)
361
+ map { |x| x.to_xhtml(*args) }.join
362
+ end
363
+
364
+ ###
365
+ # Convert this NodeSet to XML
366
+ def to_xml(*args)
367
+ map { |x| x.to_xml(*args) }.join
368
+ end
369
+
370
+ alias_method :size, :length
371
+ alias_method :to_ary, :to_a
372
+
373
+ ###
374
+ # Removes the last element from set and returns it, or +nil+ if
375
+ # the set is empty
376
+ def pop
377
+ return if length == 0
378
+
379
+ delete(last)
380
+ end
381
+
382
+ ###
383
+ # Returns the first element of the NodeSet and removes it. Returns
384
+ # +nil+ if the set is empty.
385
+ def shift
386
+ return if length == 0
387
+
388
+ delete(first)
389
+ end
390
+
391
+ ###
392
+ # Equality -- Two NodeSets are equal if the contain the same number
393
+ # of elements and if each element is equal to the corresponding
394
+ # element in the other NodeSet
395
+ def ==(other)
396
+ return false unless other.is_a?(Nokogiri::XML::NodeSet)
397
+ return false unless length == other.length
398
+
399
+ each_with_index do |node, i|
400
+ return false unless node == other[i]
401
+ end
402
+ true
403
+ end
404
+
405
+ ###
406
+ # Returns a new NodeSet containing all the children of all the nodes in
407
+ # the NodeSet
408
+ def children
409
+ node_set = NodeSet.new(document)
410
+ each do |node|
411
+ node.children.each { |n| node_set.push(n) }
412
+ end
413
+ node_set
414
+ end
415
+
416
+ ###
417
+ # Returns a new NodeSet containing all the nodes in the NodeSet
418
+ # in reverse order
419
+ def reverse
420
+ node_set = NodeSet.new(document)
421
+ (length - 1).downto(0) do |x|
422
+ node_set.push(self[x])
423
+ end
424
+ node_set
425
+ end
426
+
427
+ ###
428
+ # Return a nicely formatted string representation
429
+ def inspect
430
+ "[#{map(&:inspect).join(", ")}]"
431
+ end
432
+
433
+ alias_method :+, :|
434
+
435
+ #
436
+ # :call-seq: deconstruct() → Array
437
+ #
438
+ # Returns the members of this NodeSet as an array, to use in pattern matching.
439
+ #
440
+ # Since v1.14.0
441
+ #
442
+ def deconstruct
443
+ to_a
444
+ end
445
+
446
+ IMPLIED_XPATH_CONTEXTS = [".//", "self::"].freeze # :nodoc:
447
+ end
448
+ end
449
+ end
@@ -0,0 +1,19 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Nokogiri
4
+ module XML
5
+ # Struct representing an {XML Schema Notation}[https://www.w3.org/TR/xml/#Notations]
6
+ class Notation < Struct.new(:name, :public_id, :system_id)
7
+ # dead comment to ensure rdoc processing
8
+
9
+ # :attr: name (String)
10
+ # The name for the element.
11
+
12
+ # :attr: public_id (String)
13
+ # The URI corresponding to the public identifier
14
+
15
+ # :attr: system_id (String,nil)
16
+ # The URI corresponding to the system identifier
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,213 @@
1
+ # coding: utf-8
2
+ # frozen_string_literal: true
3
+
4
+ module Nokogiri
5
+ module XML
6
+ # Options that control the parsing behavior for XML::Document, XML::DocumentFragment,
7
+ # HTML4::Document, HTML4::DocumentFragment, XSLT::Stylesheet, and XML::Schema.
8
+ #
9
+ # These options directly expose libxml2's parse options, which are all boolean in the sense that
10
+ # an option is "on" or "off".
11
+ #
12
+ # 💡 Note that HTML5 parsing has a separate, orthogonal set of options due to the nature of the
13
+ # HTML5 specification. See Nokogiri::HTML5.
14
+ #
15
+ # ⚠ Not all parse options are supported on JRuby. Nokogiri will attempt to invoke the equivalent
16
+ # behavior in Xerces/NekoHTML on JRuby when it's possible.
17
+ #
18
+ # == Setting and unsetting parse options
19
+ #
20
+ # You can build your own combinations of parse options by using any of the following methods:
21
+ #
22
+ # [ParseOptions method chaining]
23
+ #
24
+ # Every option has an equivalent method in lowercase. You can chain these methods together to
25
+ # set various combinations.
26
+ #
27
+ # # Set the HUGE & PEDANTIC options
28
+ # po = Nokogiri::XML::ParseOptions.new.huge.pedantic
29
+ # doc = Nokogiri::XML::Document.parse(xml, nil, nil, po)
30
+ #
31
+ # Every option has an equivalent <code>no{option}</code> method in lowercase. You can call these
32
+ # methods on an instance of ParseOptions to unset the option.
33
+ #
34
+ # # Set the HUGE & PEDANTIC options
35
+ # po = Nokogiri::XML::ParseOptions.new.huge.pedantic
36
+ #
37
+ # # later we want to modify the options
38
+ # po.nohuge # Unset the HUGE option
39
+ # po.nopedantic # Unset the PEDANTIC option
40
+ #
41
+ # 💡 Note that some options begin with "no" leading to the logical but perhaps unintuitive
42
+ # double negative:
43
+ #
44
+ # po.nocdata # Set the NOCDATA parse option
45
+ # po.nonocdata # Unset the NOCDATA parse option
46
+ #
47
+ # 💡 Note that negation is not available for STRICT, which is itself a negation of all other
48
+ # features.
49
+ #
50
+ #
51
+ # [Using Ruby Blocks]
52
+ #
53
+ # Most parsing methods will accept a block for configuration of parse options, and we
54
+ # recommend chaining the setter methods:
55
+ #
56
+ # doc = Nokogiri::XML::Document.parse(xml) { |config| config.huge.pedantic }
57
+ #
58
+ #
59
+ # [ParseOptions constants]
60
+ #
61
+ # You can also use the constants declared under Nokogiri::XML::ParseOptions to set various
62
+ # combinations. They are bits in a bitmask, and so can be combined with bitwise operators:
63
+ #
64
+ # po = Nokogiri::XML::ParseOptions.new(Nokogiri::XML::ParseOptions::HUGE | Nokogiri::XML::ParseOptions::PEDANTIC)
65
+ # doc = Nokogiri::XML::Document.parse(xml, nil, nil, po)
66
+ #
67
+ class ParseOptions
68
+ # Strict parsing
69
+ STRICT = 0
70
+
71
+ # Recover from errors. On by default for XML::Document, XML::DocumentFragment,
72
+ # HTML4::Document, HTML4::DocumentFragment, XSLT::Stylesheet, and XML::Schema.
73
+ RECOVER = 1 << 0
74
+
75
+ # Substitute entities. Off by default.
76
+ #
77
+ # ⚠ This option enables entity substitution, contrary to what the name implies.
78
+ #
79
+ # ⚠ <b>It is UNSAFE to set this option</b> when parsing untrusted documents.
80
+ NOENT = 1 << 1
81
+
82
+ # Load external subsets. On by default for XSLT::Stylesheet.
83
+ #
84
+ # ⚠ <b>It is UNSAFE to set this option</b> when parsing untrusted documents.
85
+ DTDLOAD = 1 << 2
86
+
87
+ # Default DTD attributes. On by default for XSLT::Stylesheet.
88
+ DTDATTR = 1 << 3
89
+
90
+ # Validate with the DTD. Off by default.
91
+ DTDVALID = 1 << 4
92
+
93
+ # Suppress error reports. On by default for HTML4::Document and HTML4::DocumentFragment
94
+ NOERROR = 1 << 5
95
+
96
+ # Suppress warning reports. On by default for HTML4::Document and HTML4::DocumentFragment
97
+ NOWARNING = 1 << 6
98
+
99
+ # Enable pedantic error reporting. Off by default.
100
+ PEDANTIC = 1 << 7
101
+
102
+ # Remove blank nodes. Off by default.
103
+ NOBLANKS = 1 << 8
104
+
105
+ # Use the SAX1 interface internally. Off by default.
106
+ SAX1 = 1 << 9
107
+
108
+ # Implement XInclude substitution. Off by default.
109
+ XINCLUDE = 1 << 10
110
+
111
+ # Forbid network access. On by default for XML::Document, XML::DocumentFragment,
112
+ # HTML4::Document, HTML4::DocumentFragment, XSLT::Stylesheet, and XML::Schema.
113
+ #
114
+ # ⚠ <b>It is UNSAFE to unset this option</b> when parsing untrusted documents.
115
+ NONET = 1 << 11
116
+
117
+ # Do not reuse the context dictionary. Off by default.
118
+ NODICT = 1 << 12
119
+
120
+ # Remove redundant namespaces declarations. Off by default.
121
+ NSCLEAN = 1 << 13
122
+
123
+ # Merge CDATA as text nodes. On by default for XSLT::Stylesheet.
124
+ NOCDATA = 1 << 14
125
+
126
+ # Do not generate XInclude START/END nodes. Off by default.
127
+ NOXINCNODE = 1 << 15
128
+
129
+ # Compact small text nodes. Off by default.
130
+ #
131
+ # ⚠ No modification of the DOM tree is allowed after parsing. libxml2 may crash if you try to
132
+ # modify the tree.
133
+ COMPACT = 1 << 16
134
+
135
+ # Parse using XML-1.0 before update 5. Off by default
136
+ OLD10 = 1 << 17
137
+
138
+ # Do not fixup XInclude xml:base uris. Off by default
139
+ NOBASEFIX = 1 << 18
140
+
141
+ # Relax any hardcoded limit from the parser. Off by default.
142
+ #
143
+ # ⚠ <b>It is UNSAFE to set this option</b> when parsing untrusted documents.
144
+ HUGE = 1 << 19
145
+
146
+ # Support line numbers up to <code>long int</code> (default is a <code>short int</code>). On
147
+ # by default for for XML::Document, XML::DocumentFragment, HTML4::Document,
148
+ # HTML4::DocumentFragment, XSLT::Stylesheet, and XML::Schema.
149
+ BIG_LINES = 1 << 22
150
+
151
+ # The options mask used by default for parsing XML::Document and XML::DocumentFragment
152
+ DEFAULT_XML = RECOVER | NONET | BIG_LINES
153
+
154
+ # The options mask used by default used for parsing XSLT::Stylesheet
155
+ DEFAULT_XSLT = RECOVER | NONET | NOENT | DTDLOAD | DTDATTR | NOCDATA | BIG_LINES
156
+
157
+ # The options mask used by default used for parsing HTML4::Document and HTML4::DocumentFragment
158
+ DEFAULT_HTML = RECOVER | NOERROR | NOWARNING | NONET | BIG_LINES
159
+
160
+ # The options mask used by default used for parsing XML::Schema
161
+ DEFAULT_SCHEMA = NONET | BIG_LINES
162
+
163
+ attr_accessor :options
164
+
165
+ def initialize(options = STRICT)
166
+ @options = options
167
+ end
168
+
169
+ constants.each do |constant|
170
+ next if constant.to_sym == :STRICT
171
+
172
+ class_eval <<~RUBY, __FILE__, __LINE__ + 1
173
+ def #{constant.downcase}
174
+ @options |= #{constant}
175
+ self
176
+ end
177
+
178
+ def no#{constant.downcase}
179
+ @options &= ~#{constant}
180
+ self
181
+ end
182
+
183
+ def #{constant.downcase}?
184
+ #{constant} & @options == #{constant}
185
+ end
186
+ RUBY
187
+ end
188
+
189
+ def strict
190
+ @options &= ~RECOVER
191
+ self
192
+ end
193
+
194
+ def strict?
195
+ @options & RECOVER == STRICT
196
+ end
197
+
198
+ def ==(other)
199
+ other.to_i == to_i
200
+ end
201
+
202
+ alias_method :to_i, :options
203
+
204
+ def inspect
205
+ options = []
206
+ self.class.constants.each do |k|
207
+ options << k.downcase if send(:"#{k.downcase}?")
208
+ end
209
+ super.sub(/>$/, " " + options.join(", ") + ">")
210
+ end
211
+ end
212
+ end
213
+ end
@@ -0,0 +1,21 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Nokogiri
4
+ module XML
5
+ # :nodoc: all
6
+ module PP
7
+ module CharacterData
8
+ def pretty_print(pp)
9
+ nice_name = self.class.name.split("::").last
10
+ pp.group(2, "#(#{nice_name} ", ")") do
11
+ pp.pp(text)
12
+ end
13
+ end
14
+
15
+ def inspect
16
+ "#<#{self.class.name}:#{format("0x%x", object_id)} #{text.inspect}>"
17
+ end
18
+ end
19
+ end
20
+ end
21
+ end