nokogiri 1.14.0.rc1-arm-linux

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (200) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile +38 -0
  3. data/LICENSE-DEPENDENCIES.md +2224 -0
  4. data/LICENSE.md +9 -0
  5. data/README.md +287 -0
  6. data/bin/nokogiri +131 -0
  7. data/dependencies.yml +41 -0
  8. data/ext/nokogiri/depend +38 -0
  9. data/ext/nokogiri/extconf.rb +1082 -0
  10. data/ext/nokogiri/gumbo.c +594 -0
  11. data/ext/nokogiri/html4_document.c +166 -0
  12. data/ext/nokogiri/html4_element_description.c +294 -0
  13. data/ext/nokogiri/html4_entity_lookup.c +37 -0
  14. data/ext/nokogiri/html4_sax_parser_context.c +114 -0
  15. data/ext/nokogiri/html4_sax_push_parser.c +95 -0
  16. data/ext/nokogiri/include/libexslt/exslt.h +108 -0
  17. data/ext/nokogiri/include/libexslt/exsltconfig.h +70 -0
  18. data/ext/nokogiri/include/libexslt/exsltexports.h +63 -0
  19. data/ext/nokogiri/include/libxml2/libxml/HTMLparser.h +306 -0
  20. data/ext/nokogiri/include/libxml2/libxml/HTMLtree.h +147 -0
  21. data/ext/nokogiri/include/libxml2/libxml/SAX.h +204 -0
  22. data/ext/nokogiri/include/libxml2/libxml/SAX2.h +172 -0
  23. data/ext/nokogiri/include/libxml2/libxml/c14n.h +128 -0
  24. data/ext/nokogiri/include/libxml2/libxml/catalog.h +182 -0
  25. data/ext/nokogiri/include/libxml2/libxml/chvalid.h +230 -0
  26. data/ext/nokogiri/include/libxml2/libxml/debugXML.h +217 -0
  27. data/ext/nokogiri/include/libxml2/libxml/dict.h +81 -0
  28. data/ext/nokogiri/include/libxml2/libxml/encoding.h +232 -0
  29. data/ext/nokogiri/include/libxml2/libxml/entities.h +153 -0
  30. data/ext/nokogiri/include/libxml2/libxml/globals.h +499 -0
  31. data/ext/nokogiri/include/libxml2/libxml/hash.h +236 -0
  32. data/ext/nokogiri/include/libxml2/libxml/list.h +137 -0
  33. data/ext/nokogiri/include/libxml2/libxml/nanoftp.h +186 -0
  34. data/ext/nokogiri/include/libxml2/libxml/nanohttp.h +81 -0
  35. data/ext/nokogiri/include/libxml2/libxml/parser.h +1244 -0
  36. data/ext/nokogiri/include/libxml2/libxml/parserInternals.h +656 -0
  37. data/ext/nokogiri/include/libxml2/libxml/pattern.h +100 -0
  38. data/ext/nokogiri/include/libxml2/libxml/relaxng.h +218 -0
  39. data/ext/nokogiri/include/libxml2/libxml/schemasInternals.h +958 -0
  40. data/ext/nokogiri/include/libxml2/libxml/schematron.h +142 -0
  41. data/ext/nokogiri/include/libxml2/libxml/threads.h +91 -0
  42. data/ext/nokogiri/include/libxml2/libxml/tree.h +1312 -0
  43. data/ext/nokogiri/include/libxml2/libxml/uri.h +94 -0
  44. data/ext/nokogiri/include/libxml2/libxml/valid.h +463 -0
  45. data/ext/nokogiri/include/libxml2/libxml/xinclude.h +129 -0
  46. data/ext/nokogiri/include/libxml2/libxml/xlink.h +189 -0
  47. data/ext/nokogiri/include/libxml2/libxml/xmlIO.h +368 -0
  48. data/ext/nokogiri/include/libxml2/libxml/xmlautomata.h +146 -0
  49. data/ext/nokogiri/include/libxml2/libxml/xmlerror.h +947 -0
  50. data/ext/nokogiri/include/libxml2/libxml/xmlexports.h +77 -0
  51. data/ext/nokogiri/include/libxml2/libxml/xmlmemory.h +226 -0
  52. data/ext/nokogiri/include/libxml2/libxml/xmlmodule.h +57 -0
  53. data/ext/nokogiri/include/libxml2/libxml/xmlreader.h +428 -0
  54. data/ext/nokogiri/include/libxml2/libxml/xmlregexp.h +222 -0
  55. data/ext/nokogiri/include/libxml2/libxml/xmlsave.h +88 -0
  56. data/ext/nokogiri/include/libxml2/libxml/xmlschemas.h +246 -0
  57. data/ext/nokogiri/include/libxml2/libxml/xmlschemastypes.h +152 -0
  58. data/ext/nokogiri/include/libxml2/libxml/xmlstring.h +140 -0
  59. data/ext/nokogiri/include/libxml2/libxml/xmlunicode.h +202 -0
  60. data/ext/nokogiri/include/libxml2/libxml/xmlversion.h +503 -0
  61. data/ext/nokogiri/include/libxml2/libxml/xmlwriter.h +488 -0
  62. data/ext/nokogiri/include/libxml2/libxml/xpath.h +575 -0
  63. data/ext/nokogiri/include/libxml2/libxml/xpathInternals.h +632 -0
  64. data/ext/nokogiri/include/libxml2/libxml/xpointer.h +137 -0
  65. data/ext/nokogiri/include/libxslt/attributes.h +38 -0
  66. data/ext/nokogiri/include/libxslt/documents.h +93 -0
  67. data/ext/nokogiri/include/libxslt/extensions.h +262 -0
  68. data/ext/nokogiri/include/libxslt/extra.h +72 -0
  69. data/ext/nokogiri/include/libxslt/functions.h +78 -0
  70. data/ext/nokogiri/include/libxslt/imports.h +75 -0
  71. data/ext/nokogiri/include/libxslt/keys.h +53 -0
  72. data/ext/nokogiri/include/libxslt/namespaces.h +68 -0
  73. data/ext/nokogiri/include/libxslt/numbersInternals.h +73 -0
  74. data/ext/nokogiri/include/libxslt/pattern.h +84 -0
  75. data/ext/nokogiri/include/libxslt/preproc.h +43 -0
  76. data/ext/nokogiri/include/libxslt/security.h +104 -0
  77. data/ext/nokogiri/include/libxslt/templates.h +77 -0
  78. data/ext/nokogiri/include/libxslt/transform.h +207 -0
  79. data/ext/nokogiri/include/libxslt/variables.h +118 -0
  80. data/ext/nokogiri/include/libxslt/xslt.h +110 -0
  81. data/ext/nokogiri/include/libxslt/xsltInternals.h +1982 -0
  82. data/ext/nokogiri/include/libxslt/xsltconfig.h +179 -0
  83. data/ext/nokogiri/include/libxslt/xsltexports.h +64 -0
  84. data/ext/nokogiri/include/libxslt/xsltlocale.h +76 -0
  85. data/ext/nokogiri/include/libxslt/xsltutils.h +310 -0
  86. data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
  87. data/ext/nokogiri/nokogiri.c +259 -0
  88. data/ext/nokogiri/nokogiri.h +235 -0
  89. data/ext/nokogiri/test_global_handlers.c +40 -0
  90. data/ext/nokogiri/xml_attr.c +103 -0
  91. data/ext/nokogiri/xml_attribute_decl.c +70 -0
  92. data/ext/nokogiri/xml_cdata.c +57 -0
  93. data/ext/nokogiri/xml_comment.c +62 -0
  94. data/ext/nokogiri/xml_document.c +689 -0
  95. data/ext/nokogiri/xml_document_fragment.c +44 -0
  96. data/ext/nokogiri/xml_dtd.c +208 -0
  97. data/ext/nokogiri/xml_element_content.c +128 -0
  98. data/ext/nokogiri/xml_element_decl.c +69 -0
  99. data/ext/nokogiri/xml_encoding_handler.c +104 -0
  100. data/ext/nokogiri/xml_entity_decl.c +112 -0
  101. data/ext/nokogiri/xml_entity_reference.c +50 -0
  102. data/ext/nokogiri/xml_namespace.c +186 -0
  103. data/ext/nokogiri/xml_node.c +2425 -0
  104. data/ext/nokogiri/xml_node_set.c +496 -0
  105. data/ext/nokogiri/xml_processing_instruction.c +54 -0
  106. data/ext/nokogiri/xml_reader.c +794 -0
  107. data/ext/nokogiri/xml_relax_ng.c +183 -0
  108. data/ext/nokogiri/xml_sax_parser.c +316 -0
  109. data/ext/nokogiri/xml_sax_parser_context.c +283 -0
  110. data/ext/nokogiri/xml_sax_push_parser.c +166 -0
  111. data/ext/nokogiri/xml_schema.c +282 -0
  112. data/ext/nokogiri/xml_syntax_error.c +85 -0
  113. data/ext/nokogiri/xml_text.c +48 -0
  114. data/ext/nokogiri/xml_xpath_context.c +413 -0
  115. data/ext/nokogiri/xslt_stylesheet.c +363 -0
  116. data/gumbo-parser/CHANGES.md +63 -0
  117. data/gumbo-parser/Makefile +111 -0
  118. data/gumbo-parser/THANKS +27 -0
  119. data/lib/nokogiri/2.7/nokogiri.so +0 -0
  120. data/lib/nokogiri/3.0/nokogiri.so +0 -0
  121. data/lib/nokogiri/3.1/nokogiri.so +0 -0
  122. data/lib/nokogiri/3.2/nokogiri.so +0 -0
  123. data/lib/nokogiri/class_resolver.rb +67 -0
  124. data/lib/nokogiri/css/node.rb +54 -0
  125. data/lib/nokogiri/css/parser.rb +770 -0
  126. data/lib/nokogiri/css/parser.y +277 -0
  127. data/lib/nokogiri/css/parser_extras.rb +96 -0
  128. data/lib/nokogiri/css/syntax_error.rb +9 -0
  129. data/lib/nokogiri/css/tokenizer.rb +155 -0
  130. data/lib/nokogiri/css/tokenizer.rex +56 -0
  131. data/lib/nokogiri/css/xpath_visitor.rb +359 -0
  132. data/lib/nokogiri/css.rb +66 -0
  133. data/lib/nokogiri/decorators/slop.rb +44 -0
  134. data/lib/nokogiri/encoding_handler.rb +57 -0
  135. data/lib/nokogiri/extension.rb +32 -0
  136. data/lib/nokogiri/gumbo.rb +15 -0
  137. data/lib/nokogiri/html.rb +48 -0
  138. data/lib/nokogiri/html4/builder.rb +37 -0
  139. data/lib/nokogiri/html4/document.rb +214 -0
  140. data/lib/nokogiri/html4/document_fragment.rb +54 -0
  141. data/lib/nokogiri/html4/element_description.rb +25 -0
  142. data/lib/nokogiri/html4/element_description_defaults.rb +572 -0
  143. data/lib/nokogiri/html4/encoding_reader.rb +121 -0
  144. data/lib/nokogiri/html4/entity_lookup.rb +15 -0
  145. data/lib/nokogiri/html4/sax/parser.rb +63 -0
  146. data/lib/nokogiri/html4/sax/parser_context.rb +20 -0
  147. data/lib/nokogiri/html4/sax/push_parser.rb +37 -0
  148. data/lib/nokogiri/html4.rb +47 -0
  149. data/lib/nokogiri/html5/document.rb +168 -0
  150. data/lib/nokogiri/html5/document_fragment.rb +90 -0
  151. data/lib/nokogiri/html5/node.rb +98 -0
  152. data/lib/nokogiri/html5.rb +389 -0
  153. data/lib/nokogiri/jruby/dependencies.rb +3 -0
  154. data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
  155. data/lib/nokogiri/syntax_error.rb +6 -0
  156. data/lib/nokogiri/version/constant.rb +6 -0
  157. data/lib/nokogiri/version/info.rb +223 -0
  158. data/lib/nokogiri/version.rb +4 -0
  159. data/lib/nokogiri/xml/attr.rb +66 -0
  160. data/lib/nokogiri/xml/attribute_decl.rb +20 -0
  161. data/lib/nokogiri/xml/builder.rb +487 -0
  162. data/lib/nokogiri/xml/cdata.rb +13 -0
  163. data/lib/nokogiri/xml/character_data.rb +9 -0
  164. data/lib/nokogiri/xml/document.rb +471 -0
  165. data/lib/nokogiri/xml/document_fragment.rb +205 -0
  166. data/lib/nokogiri/xml/dtd.rb +34 -0
  167. data/lib/nokogiri/xml/element_content.rb +38 -0
  168. data/lib/nokogiri/xml/element_decl.rb +15 -0
  169. data/lib/nokogiri/xml/entity_decl.rb +21 -0
  170. data/lib/nokogiri/xml/entity_reference.rb +20 -0
  171. data/lib/nokogiri/xml/namespace.rb +58 -0
  172. data/lib/nokogiri/xml/node/save_options.rb +68 -0
  173. data/lib/nokogiri/xml/node.rb +1563 -0
  174. data/lib/nokogiri/xml/node_set.rb +446 -0
  175. data/lib/nokogiri/xml/notation.rb +19 -0
  176. data/lib/nokogiri/xml/parse_options.rb +213 -0
  177. data/lib/nokogiri/xml/pp/character_data.rb +21 -0
  178. data/lib/nokogiri/xml/pp/node.rb +57 -0
  179. data/lib/nokogiri/xml/pp.rb +4 -0
  180. data/lib/nokogiri/xml/processing_instruction.rb +11 -0
  181. data/lib/nokogiri/xml/reader.rb +105 -0
  182. data/lib/nokogiri/xml/relax_ng.rb +38 -0
  183. data/lib/nokogiri/xml/sax/document.rb +167 -0
  184. data/lib/nokogiri/xml/sax/parser.rb +125 -0
  185. data/lib/nokogiri/xml/sax/parser_context.rb +21 -0
  186. data/lib/nokogiri/xml/sax/push_parser.rb +61 -0
  187. data/lib/nokogiri/xml/sax.rb +6 -0
  188. data/lib/nokogiri/xml/schema.rb +73 -0
  189. data/lib/nokogiri/xml/searchable.rb +270 -0
  190. data/lib/nokogiri/xml/syntax_error.rb +72 -0
  191. data/lib/nokogiri/xml/text.rb +11 -0
  192. data/lib/nokogiri/xml/xpath/syntax_error.rb +13 -0
  193. data/lib/nokogiri/xml/xpath.rb +21 -0
  194. data/lib/nokogiri/xml/xpath_context.rb +16 -0
  195. data/lib/nokogiri/xml.rb +76 -0
  196. data/lib/nokogiri/xslt/stylesheet.rb +27 -0
  197. data/lib/nokogiri/xslt.rb +65 -0
  198. data/lib/nokogiri.rb +120 -0
  199. data/lib/xsd/xmlparser/nokogiri.rb +104 -0
  200. metadata +317 -0
@@ -0,0 +1,270 @@
1
+ # coding: utf-8
2
+ # frozen_string_literal: true
3
+
4
+ module Nokogiri
5
+ module XML
6
+ #
7
+ # The Searchable module declares the interface used for searching your DOM.
8
+ #
9
+ # It implements the public methods #search, #css, and #xpath,
10
+ # as well as allowing specific implementations to specialize some
11
+ # of the important behaviors.
12
+ #
13
+ module Searchable
14
+ # Regular expression used by Searchable#search to determine if a query
15
+ # string is CSS or XPath
16
+ LOOKS_LIKE_XPATH = %r{^(\./|/|\.\.|\.$)}
17
+
18
+ # :section: Searching via XPath or CSS Queries
19
+
20
+ ###
21
+ # call-seq:
22
+ # search(*paths, [namespace-bindings, xpath-variable-bindings, custom-handler-class])
23
+ #
24
+ # Search this object for +paths+. +paths+ must be one or more XPath or CSS queries:
25
+ #
26
+ # node.search("div.employee", ".//title")
27
+ #
28
+ # A hash of namespace bindings may be appended:
29
+ #
30
+ # node.search('.//bike:tire', {'bike' => 'http://schwinn.com/'})
31
+ # node.search('bike|tire', {'bike' => 'http://schwinn.com/'})
32
+ #
33
+ # For XPath queries, a hash of variable bindings may also be appended to the namespace
34
+ # bindings. For example:
35
+ #
36
+ # node.search('.//address[@domestic=$value]', nil, {:value => 'Yes'})
37
+ #
38
+ # 💡 Custom XPath functions and CSS pseudo-selectors may also be defined. To define custom
39
+ # functions create a class and implement the function you want to define. The first argument
40
+ # to the method will be the current matching NodeSet. Any other arguments are ones that you
41
+ # pass in. Note that this class may appear anywhere in the argument list. For example:
42
+ #
43
+ # handler = Class.new {
44
+ # def regex node_set, regex
45
+ # node_set.find_all { |node| node['some_attribute'] =~ /#{regex}/ }
46
+ # end
47
+ # }.new
48
+ # node.search('.//title[regex(., "\w+")]', 'div.employee:regex("[0-9]+")', handler)
49
+ #
50
+ # See Searchable#xpath and Searchable#css for further usage help.
51
+ def search(*args)
52
+ paths, handler, ns, binds = extract_params(args)
53
+
54
+ xpaths = paths.map(&:to_s).map do |path|
55
+ LOOKS_LIKE_XPATH.match?(path) ? path : xpath_query_from_css_rule(path, ns)
56
+ end.flatten.uniq
57
+
58
+ xpath(*(xpaths + [ns, handler, binds].compact))
59
+ end
60
+
61
+ alias_method :/, :search
62
+
63
+ ###
64
+ # call-seq:
65
+ # at(*paths, [namespace-bindings, xpath-variable-bindings, custom-handler-class])
66
+ #
67
+ # Search this object for +paths+, and return only the first
68
+ # result. +paths+ must be one or more XPath or CSS queries.
69
+ #
70
+ # See Searchable#search for more information.
71
+ def at(*args)
72
+ search(*args).first
73
+ end
74
+
75
+ alias_method :%, :at
76
+
77
+ ###
78
+ # call-seq:
79
+ # css(*rules, [namespace-bindings, custom-pseudo-class])
80
+ #
81
+ # Search this object for CSS +rules+. +rules+ must be one or more CSS
82
+ # selectors. For example:
83
+ #
84
+ # node.css('title')
85
+ # node.css('body h1.bold')
86
+ # node.css('div + p.green', 'div#one')
87
+ #
88
+ # A hash of namespace bindings may be appended. For example:
89
+ #
90
+ # node.css('bike|tire', {'bike' => 'http://schwinn.com/'})
91
+ #
92
+ # 💡 Custom CSS pseudo classes may also be defined which are mapped to a custom XPath
93
+ # function. To define custom pseudo classes, create a class and implement the custom pseudo
94
+ # class you want defined. The first argument to the method will be the matching context
95
+ # NodeSet. Any other arguments are ones that you pass in. For example:
96
+ #
97
+ # handler = Class.new {
98
+ # def regex(node_set, regex)
99
+ # node_set.find_all { |node| node['some_attribute'] =~ /#{regex}/ }
100
+ # end
101
+ # }.new
102
+ # node.css('title:regex("\w+")', handler)
103
+ #
104
+ # 💡 Some XPath syntax is supported in CSS queries. For example, to query for an attribute:
105
+ #
106
+ # node.css('img > @href') # returns all +href+ attributes on an +img+ element
107
+ # node.css('img / @href') # same
108
+ #
109
+ # # ⚠ this returns +class+ attributes from all +div+ elements AND THEIR CHILDREN!
110
+ # node.css('div @class')
111
+ #
112
+ # node.css
113
+ #
114
+ # 💡 Array-like syntax is supported in CSS queries as an alternative to using +:nth-child()+.
115
+ #
116
+ # ⚠ NOTE that indices are 1-based like +:nth-child+ and not 0-based like Ruby Arrays. For
117
+ # example:
118
+ #
119
+ # # equivalent to 'li:nth-child(2)'
120
+ # node.css('li[2]') # retrieve the second li element in a list
121
+ #
122
+ # ⚠ NOTE that the CSS query string is case-sensitive with regards to your document type. HTML
123
+ # tags will match only lowercase CSS queries, so if you search for "H1" in an HTML document,
124
+ # you'll never find anything. However, "H1" might be found in an XML document, where tags
125
+ # names are case-sensitive (e.g., "H1" is distinct from "h1").
126
+ def css(*args)
127
+ rules, handler, ns, _ = extract_params(args)
128
+
129
+ css_internal(self, rules, handler, ns)
130
+ end
131
+
132
+ ##
133
+ # call-seq:
134
+ # at_css(*rules, [namespace-bindings, custom-pseudo-class])
135
+ #
136
+ # Search this object for CSS +rules+, and return only the first
137
+ # match. +rules+ must be one or more CSS selectors.
138
+ #
139
+ # See Searchable#css for more information.
140
+ def at_css(*args)
141
+ css(*args).first
142
+ end
143
+
144
+ ###
145
+ # call-seq:
146
+ # xpath(*paths, [namespace-bindings, variable-bindings, custom-handler-class])
147
+ #
148
+ # Search this node for XPath +paths+. +paths+ must be one or more XPath
149
+ # queries.
150
+ #
151
+ # node.xpath('.//title')
152
+ #
153
+ # A hash of namespace bindings may be appended. For example:
154
+ #
155
+ # node.xpath('.//foo:name', {'foo' => 'http://example.org/'})
156
+ # node.xpath('.//xmlns:name', node.root.namespaces)
157
+ #
158
+ # A hash of variable bindings may also be appended to the namespace bindings. For example:
159
+ #
160
+ # node.xpath('.//address[@domestic=$value]', nil, {:value => 'Yes'})
161
+ #
162
+ # 💡 Custom XPath functions may also be defined. To define custom functions create a class and
163
+ # implement the function you want to define. The first argument to the method will be the
164
+ # current matching NodeSet. Any other arguments are ones that you pass in. Note that this
165
+ # class may appear anywhere in the argument list. For example:
166
+ #
167
+ # handler = Class.new {
168
+ # def regex(node_set, regex)
169
+ # node_set.find_all { |node| node['some_attribute'] =~ /#{regex}/ }
170
+ # end
171
+ # }.new
172
+ # node.xpath('.//title[regex(., "\w+")]', handler)
173
+ #
174
+ def xpath(*args)
175
+ paths, handler, ns, binds = extract_params(args)
176
+
177
+ xpath_internal(self, paths, handler, ns, binds)
178
+ end
179
+
180
+ ##
181
+ # call-seq:
182
+ # at_xpath(*paths, [namespace-bindings, variable-bindings, custom-handler-class])
183
+ #
184
+ # Search this node for XPath +paths+, and return only the first
185
+ # match. +paths+ must be one or more XPath queries.
186
+ #
187
+ # See Searchable#xpath for more information.
188
+ def at_xpath(*args)
189
+ xpath(*args).first
190
+ end
191
+
192
+ # :call-seq:
193
+ # >(selector) → NodeSet
194
+ #
195
+ # Search this node's immediate children using CSS selector +selector+
196
+ def >(selector) # rubocop:disable Naming/BinaryOperatorParameterName
197
+ ns = (document.root&.namespaces || {})
198
+ xpath(CSS.xpath_for(selector, prefix: "./", ns: ns).first)
199
+ end
200
+
201
+ # :section:
202
+
203
+ private
204
+
205
+ def css_internal(node, rules, handler, ns)
206
+ xpath_internal(node, css_rules_to_xpath(rules, ns), handler, ns, nil)
207
+ end
208
+
209
+ def xpath_internal(node, paths, handler, ns, binds)
210
+ document = node.document
211
+ return NodeSet.new(document) unless document
212
+
213
+ if paths.length == 1
214
+ return xpath_impl(node, paths.first, handler, ns, binds)
215
+ end
216
+
217
+ NodeSet.new(document) do |combined|
218
+ paths.each do |path|
219
+ xpath_impl(node, path, handler, ns, binds).each { |set| combined << set }
220
+ end
221
+ end
222
+ end
223
+
224
+ def xpath_impl(node, path, handler, ns, binds)
225
+ ctx = XPathContext.new(node)
226
+ ctx.register_namespaces(ns)
227
+ path = path.gsub(/xmlns:/, " :") unless Nokogiri.uses_libxml?
228
+
229
+ binds&.each do |key, value|
230
+ ctx.register_variable(key.to_s, value)
231
+ end
232
+
233
+ ctx.evaluate(path, handler)
234
+ end
235
+
236
+ def css_rules_to_xpath(rules, ns)
237
+ rules.map { |rule| xpath_query_from_css_rule(rule, ns) }
238
+ end
239
+
240
+ def xpath_query_from_css_rule(rule, ns)
241
+ visitor = Nokogiri::CSS::XPathVisitor.new(
242
+ builtins: Nokogiri::CSS::XPathVisitor::BuiltinsConfig::OPTIMAL,
243
+ doctype: document.xpath_doctype,
244
+ )
245
+ self.class::IMPLIED_XPATH_CONTEXTS.map do |implied_xpath_context|
246
+ CSS.xpath_for(rule.to_s, { prefix: implied_xpath_context, ns: ns,
247
+ visitor: visitor, })
248
+ end.join(" | ")
249
+ end
250
+
251
+ def extract_params(params) # :nodoc:
252
+ handler = params.find do |param|
253
+ ![Hash, String, Symbol].include?(param.class)
254
+ end
255
+ params -= [handler] if handler
256
+
257
+ hashes = []
258
+ while Hash === params.last || params.last.nil?
259
+ hashes << params.pop
260
+ break if params.empty?
261
+ end
262
+ ns, binds = hashes.reverse
263
+
264
+ ns ||= (document.root&.namespaces || {})
265
+
266
+ [params, handler, ns, binds]
267
+ end
268
+ end
269
+ end
270
+ end
@@ -0,0 +1,72 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Nokogiri
4
+ module XML
5
+ ###
6
+ # This class provides information about XML SyntaxErrors. These
7
+ # exceptions are typically stored on Nokogiri::XML::Document#errors.
8
+ class SyntaxError < ::Nokogiri::SyntaxError
9
+ attr_reader :domain
10
+ attr_reader :code
11
+ attr_reader :level
12
+ attr_reader :file
13
+ attr_reader :line
14
+ attr_reader :str1
15
+ attr_reader :str2
16
+ attr_reader :str3
17
+ attr_reader :int1
18
+ attr_reader :column
19
+
20
+ ###
21
+ # return true if this is a non error
22
+ def none?
23
+ level == 0
24
+ end
25
+
26
+ ###
27
+ # return true if this is a warning
28
+ def warning?
29
+ level == 1
30
+ end
31
+
32
+ ###
33
+ # return true if this is an error
34
+ def error?
35
+ level == 2
36
+ end
37
+
38
+ ###
39
+ # return true if this error is fatal
40
+ def fatal?
41
+ level == 3
42
+ end
43
+
44
+ def to_s
45
+ message = super.chomp
46
+ [location_to_s, level_to_s, message]
47
+ .compact.join(": ")
48
+ .force_encoding(message.encoding)
49
+ end
50
+
51
+ private
52
+
53
+ def level_to_s
54
+ case level
55
+ when 3 then "FATAL"
56
+ when 2 then "ERROR"
57
+ when 1 then "WARNING"
58
+ end
59
+ end
60
+
61
+ def nil_or_zero?(attribute)
62
+ attribute.nil? || attribute.zero?
63
+ end
64
+
65
+ def location_to_s
66
+ return nil if nil_or_zero?(line) && nil_or_zero?(column)
67
+
68
+ "#{line}:#{column}"
69
+ end
70
+ end
71
+ end
72
+ end
@@ -0,0 +1,11 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Nokogiri
4
+ module XML
5
+ class Text < Nokogiri::XML::CharacterData
6
+ def content=(string)
7
+ self.native_content = string.to_s
8
+ end
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,13 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Nokogiri
4
+ module XML
5
+ module XPath
6
+ class SyntaxError < XML::SyntaxError
7
+ def to_s
8
+ [super.chomp, str1].compact.join(": ")
9
+ end
10
+ end
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,21 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Nokogiri
4
+ module XML
5
+ module XPath
6
+ # The XPath search prefix to search globally, +//+
7
+ GLOBAL_SEARCH_PREFIX = "//"
8
+
9
+ # The XPath search prefix to search direct descendants of the root element, +/+
10
+ ROOT_SEARCH_PREFIX = "/"
11
+
12
+ # The XPath search prefix to search direct descendants of the current element, +./+
13
+ CURRENT_SEARCH_PREFIX = "./"
14
+
15
+ # The XPath search prefix to search anywhere in the current element's subtree, +.//+
16
+ SUBTREE_SEARCH_PREFIX = ".//"
17
+ end
18
+ end
19
+ end
20
+
21
+ require_relative "xpath/syntax_error"
@@ -0,0 +1,16 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Nokogiri
4
+ module XML
5
+ class XPathContext
6
+ ###
7
+ # Register namespaces in +namespaces+
8
+ def register_namespaces(namespaces)
9
+ namespaces.each do |k, v|
10
+ k = k.to_s.gsub(/.*:/, "") # strip off 'xmlns:' or 'xml:'
11
+ register_ns(k, v)
12
+ end
13
+ end
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,76 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Nokogiri
4
+ class << self
5
+ ###
6
+ # Parse XML. Convenience method for Nokogiri::XML::Document.parse
7
+ def XML(thing, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_XML, &block)
8
+ Nokogiri::XML::Document.parse(thing, url, encoding, options, &block)
9
+ end
10
+ end
11
+
12
+ module XML
13
+ # Original C14N 1.0 spec canonicalization
14
+ XML_C14N_1_0 = 0
15
+ # Exclusive C14N 1.0 spec canonicalization
16
+ XML_C14N_EXCLUSIVE_1_0 = 1
17
+ # C14N 1.1 spec canonicalization
18
+ XML_C14N_1_1 = 2
19
+ class << self
20
+ ###
21
+ # Parse an XML document using the Nokogiri::XML::Reader API. See
22
+ # Nokogiri::XML::Reader for mor information
23
+ def Reader(string_or_io, url = nil, encoding = nil, options = ParseOptions::STRICT)
24
+ options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
25
+ yield options if block_given?
26
+
27
+ if string_or_io.respond_to?(:read)
28
+ return Reader.from_io(string_or_io, url, encoding, options.to_i)
29
+ end
30
+
31
+ Reader.from_memory(string_or_io, url, encoding, options.to_i)
32
+ end
33
+
34
+ ###
35
+ # Parse XML. Convenience method for Nokogiri::XML::Document.parse
36
+ def parse(thing, url = nil, encoding = nil, options = ParseOptions::DEFAULT_XML, &block)
37
+ Document.parse(thing, url, encoding, options, &block)
38
+ end
39
+
40
+ ####
41
+ # Parse a fragment from +string+ in to a NodeSet.
42
+ def fragment(string, options = ParseOptions::DEFAULT_XML, &block)
43
+ XML::DocumentFragment.parse(string, options, &block)
44
+ end
45
+ end
46
+ end
47
+ end
48
+
49
+ require_relative "xml/pp"
50
+ require_relative "xml/parse_options"
51
+ require_relative "xml/sax"
52
+ require_relative "xml/searchable"
53
+ require_relative "xml/node"
54
+ require_relative "xml/attribute_decl"
55
+ require_relative "xml/element_decl"
56
+ require_relative "xml/element_content"
57
+ require_relative "xml/character_data"
58
+ require_relative "xml/namespace"
59
+ require_relative "xml/attr"
60
+ require_relative "xml/dtd"
61
+ require_relative "xml/cdata"
62
+ require_relative "xml/text"
63
+ require_relative "xml/document"
64
+ require_relative "xml/document_fragment"
65
+ require_relative "xml/processing_instruction"
66
+ require_relative "xml/node_set"
67
+ require_relative "xml/syntax_error"
68
+ require_relative "xml/xpath"
69
+ require_relative "xml/xpath_context"
70
+ require_relative "xml/builder"
71
+ require_relative "xml/reader"
72
+ require_relative "xml/notation"
73
+ require_relative "xml/entity_decl"
74
+ require_relative "xml/entity_reference"
75
+ require_relative "xml/schema"
76
+ require_relative "xml/relax_ng"
@@ -0,0 +1,27 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Nokogiri
4
+ module XSLT
5
+ ###
6
+ # A Stylesheet represents an XSLT Stylesheet object. Stylesheet creation
7
+ # is done through Nokogiri.XSLT. Here is an example of transforming
8
+ # an XML::Document with a Stylesheet:
9
+ #
10
+ # doc = Nokogiri::XML(File.read('some_file.xml'))
11
+ # xslt = Nokogiri::XSLT(File.read('some_transformer.xslt'))
12
+ #
13
+ # puts xslt.transform(doc)
14
+ #
15
+ # See Nokogiri::XSLT::Stylesheet#transform for more transformation
16
+ # information.
17
+ class Stylesheet
18
+ ###
19
+ # Apply an XSLT stylesheet to an XML::Document.
20
+ # +params+ is an array of strings used as XSLT parameters.
21
+ # returns serialized document
22
+ def apply_to(document, params = [])
23
+ serialize(transform(document, params))
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,65 @@
1
+ # coding: utf-8
2
+ # frozen_string_literal: true
3
+
4
+ module Nokogiri
5
+ class << self
6
+ ###
7
+ # Create a Nokogiri::XSLT::Stylesheet with +stylesheet+.
8
+ #
9
+ # Example:
10
+ #
11
+ # xslt = Nokogiri::XSLT(File.read(ARGV[0]))
12
+ #
13
+ def XSLT(stylesheet, modules = {})
14
+ XSLT.parse(stylesheet, modules)
15
+ end
16
+ end
17
+
18
+ ###
19
+ # See Nokogiri::XSLT::Stylesheet for creating and manipulating
20
+ # Stylesheet object.
21
+ module XSLT
22
+ class << self
23
+ ###
24
+ # Parse the stylesheet in +string+, register any +modules+
25
+ def parse(string, modules = {})
26
+ modules.each do |url, klass|
27
+ XSLT.register(url, klass)
28
+ end
29
+
30
+ doc = XML::Document.parse(string, nil, nil, XML::ParseOptions::DEFAULT_XSLT)
31
+ if Nokogiri.jruby?
32
+ Stylesheet.parse_stylesheet_doc(doc, string)
33
+ else
34
+ Stylesheet.parse_stylesheet_doc(doc)
35
+ end
36
+ end
37
+
38
+ # :call-seq:
39
+ # quote_params(params) → Array
40
+ #
41
+ # Quote parameters in +params+ for stylesheet safety.
42
+ # See Nokogiri::XSLT::Stylesheet.transform for example usage.
43
+ #
44
+ # [Parameters]
45
+ # - +params+ (Hash, Array) XSLT parameters (key->value, or tuples of [key, value])
46
+ #
47
+ # [Returns] Array of string parameters, with quotes correctly escaped for use with XSLT::Stylesheet.transform
48
+ #
49
+ def quote_params(params)
50
+ params.flatten.each_slice(2).with_object([]) do |kv, quoted_params|
51
+ key, value = kv.map(&:to_s)
52
+ value = if /'/.match?(value)
53
+ "concat('#{value.gsub(/'/, %q{', "'", '})}')"
54
+ else
55
+ "'#{value}'"
56
+ end
57
+ quoted_params << key
58
+ quoted_params << value
59
+ end
60
+ end
61
+ end
62
+ end
63
+ end
64
+
65
+ require_relative "xslt/stylesheet"
data/lib/nokogiri.rb ADDED
@@ -0,0 +1,120 @@
1
+ # coding: utf-8
2
+ # frozen_string_literal: true
3
+
4
+ if defined?(RUBY_ENGINE) && RUBY_ENGINE == "jruby"
5
+ require_relative "nokogiri/jruby/dependencies"
6
+ end
7
+
8
+ require_relative "nokogiri/extension"
9
+
10
+ # Nokogiri parses and searches XML/HTML very quickly, and also has
11
+ # correctly implemented CSS3 selector support as well as XPath 1.0
12
+ # support.
13
+ #
14
+ # Parsing a document returns either a Nokogiri::XML::Document, or a
15
+ # Nokogiri::HTML4::Document depending on the kind of document you parse.
16
+ #
17
+ # Here is an example:
18
+ #
19
+ # require 'nokogiri'
20
+ # require 'open-uri'
21
+ #
22
+ # # Get a Nokogiri::HTML4::Document for the page we’re interested in...
23
+ #
24
+ # doc = Nokogiri::HTML4(URI.open('http://www.google.com/search?q=tenderlove'))
25
+ #
26
+ # # Do funky things with it using Nokogiri::XML::Node methods...
27
+ #
28
+ # ####
29
+ # # Search for nodes by css
30
+ # doc.css('h3.r a.l').each do |link|
31
+ # puts link.content
32
+ # end
33
+ #
34
+ # See also:
35
+ #
36
+ # - Nokogiri::XML::Searchable#css for more information about CSS searching
37
+ # - Nokogiri::XML::Searchable#xpath for more information about XPath searching
38
+ module Nokogiri
39
+ class << self
40
+ ###
41
+ # Parse an HTML or XML document. +string+ contains the document.
42
+ def parse(string, url = nil, encoding = nil, options = nil)
43
+ if string.respond_to?(:read) ||
44
+ /^\s*<(?:!DOCTYPE\s+)?html[\s>]/i.match?(string[0, 512])
45
+ # Expect an HTML indicator to appear within the first 512
46
+ # characters of a document. (<?xml ?> + <?xml-stylesheet ?>
47
+ # shouldn't be that long)
48
+ Nokogiri.HTML4(string, url, encoding,
49
+ options || XML::ParseOptions::DEFAULT_HTML)
50
+ else
51
+ Nokogiri.XML(string, url, encoding,
52
+ options || XML::ParseOptions::DEFAULT_XML)
53
+ end.tap do |doc|
54
+ yield doc if block_given?
55
+ end
56
+ end
57
+
58
+ ###
59
+ # Create a new Nokogiri::XML::DocumentFragment
60
+ def make(input = nil, opts = {}, &blk)
61
+ if input
62
+ Nokogiri::HTML4.fragment(input).children.first
63
+ else
64
+ Nokogiri(&blk)
65
+ end
66
+ end
67
+
68
+ ###
69
+ # Parse a document and add the Slop decorator. The Slop decorator
70
+ # implements method_missing such that methods may be used instead of CSS
71
+ # or XPath. For example:
72
+ #
73
+ # doc = Nokogiri::Slop(<<-eohtml)
74
+ # <html>
75
+ # <body>
76
+ # <p>first</p>
77
+ # <p>second</p>
78
+ # </body>
79
+ # </html>
80
+ # eohtml
81
+ # assert_equal('second', doc.html.body.p[1].text)
82
+ #
83
+ def Slop(*args, &block)
84
+ Nokogiri(*args, &block).slop!
85
+ end
86
+
87
+ # :nodoc:
88
+ def install_default_aliases
89
+ warn("Nokogiri.install_default_aliases is deprecated and will be removed in a future version of Nokogiri. Please call Nokogiri::EncodingHandler.install_default_aliases instead.")
90
+ Nokogiri::EncodingHandler.install_default_aliases
91
+ end
92
+ end
93
+ end
94
+
95
+ ###
96
+ # Parse a document contained in +args+. Nokogiri will try to guess what type of document you are
97
+ # attempting to parse. For more information, see Nokogiri.parse
98
+ #
99
+ # To specify the type of document, use {Nokogiri.XML}, {Nokogiri.HTML4}, or {Nokogiri.HTML5}.
100
+ def Nokogiri(*args, &block)
101
+ if block
102
+ Nokogiri::HTML4::Builder.new(&block).doc.root
103
+ else
104
+ Nokogiri.parse(*args)
105
+ end
106
+ end
107
+
108
+ require_relative "nokogiri/version"
109
+ require_relative "nokogiri/class_resolver"
110
+ require_relative "nokogiri/syntax_error"
111
+ require_relative "nokogiri/xml"
112
+ require_relative "nokogiri/xslt"
113
+ require_relative "nokogiri/html4"
114
+ require_relative "nokogiri/html"
115
+ require_relative "nokogiri/decorators/slop"
116
+ require_relative "nokogiri/css"
117
+ require_relative "nokogiri/html4/builder"
118
+ require_relative "nokogiri/encoding_handler"
119
+
120
+ require_relative "nokogiri/html5" if Nokogiri.uses_gumbo?