nokogiri 1.14.0.rc1-arm-linux
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +7 -0
- data/Gemfile +38 -0
- data/LICENSE-DEPENDENCIES.md +2224 -0
- data/LICENSE.md +9 -0
- data/README.md +287 -0
- data/bin/nokogiri +131 -0
- data/dependencies.yml +41 -0
- data/ext/nokogiri/depend +38 -0
- data/ext/nokogiri/extconf.rb +1082 -0
- data/ext/nokogiri/gumbo.c +594 -0
- data/ext/nokogiri/html4_document.c +166 -0
- data/ext/nokogiri/html4_element_description.c +294 -0
- data/ext/nokogiri/html4_entity_lookup.c +37 -0
- data/ext/nokogiri/html4_sax_parser_context.c +114 -0
- data/ext/nokogiri/html4_sax_push_parser.c +95 -0
- data/ext/nokogiri/include/libexslt/exslt.h +108 -0
- data/ext/nokogiri/include/libexslt/exsltconfig.h +70 -0
- data/ext/nokogiri/include/libexslt/exsltexports.h +63 -0
- data/ext/nokogiri/include/libxml2/libxml/HTMLparser.h +306 -0
- data/ext/nokogiri/include/libxml2/libxml/HTMLtree.h +147 -0
- data/ext/nokogiri/include/libxml2/libxml/SAX.h +204 -0
- data/ext/nokogiri/include/libxml2/libxml/SAX2.h +172 -0
- data/ext/nokogiri/include/libxml2/libxml/c14n.h +128 -0
- data/ext/nokogiri/include/libxml2/libxml/catalog.h +182 -0
- data/ext/nokogiri/include/libxml2/libxml/chvalid.h +230 -0
- data/ext/nokogiri/include/libxml2/libxml/debugXML.h +217 -0
- data/ext/nokogiri/include/libxml2/libxml/dict.h +81 -0
- data/ext/nokogiri/include/libxml2/libxml/encoding.h +232 -0
- data/ext/nokogiri/include/libxml2/libxml/entities.h +153 -0
- data/ext/nokogiri/include/libxml2/libxml/globals.h +499 -0
- data/ext/nokogiri/include/libxml2/libxml/hash.h +236 -0
- data/ext/nokogiri/include/libxml2/libxml/list.h +137 -0
- data/ext/nokogiri/include/libxml2/libxml/nanoftp.h +186 -0
- data/ext/nokogiri/include/libxml2/libxml/nanohttp.h +81 -0
- data/ext/nokogiri/include/libxml2/libxml/parser.h +1244 -0
- data/ext/nokogiri/include/libxml2/libxml/parserInternals.h +656 -0
- data/ext/nokogiri/include/libxml2/libxml/pattern.h +100 -0
- data/ext/nokogiri/include/libxml2/libxml/relaxng.h +218 -0
- data/ext/nokogiri/include/libxml2/libxml/schemasInternals.h +958 -0
- data/ext/nokogiri/include/libxml2/libxml/schematron.h +142 -0
- data/ext/nokogiri/include/libxml2/libxml/threads.h +91 -0
- data/ext/nokogiri/include/libxml2/libxml/tree.h +1312 -0
- data/ext/nokogiri/include/libxml2/libxml/uri.h +94 -0
- data/ext/nokogiri/include/libxml2/libxml/valid.h +463 -0
- data/ext/nokogiri/include/libxml2/libxml/xinclude.h +129 -0
- data/ext/nokogiri/include/libxml2/libxml/xlink.h +189 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlIO.h +368 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlautomata.h +146 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlerror.h +947 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlexports.h +77 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlmemory.h +226 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlmodule.h +57 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlreader.h +428 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlregexp.h +222 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlsave.h +88 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlschemas.h +246 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlschemastypes.h +152 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlstring.h +140 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlunicode.h +202 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlversion.h +503 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlwriter.h +488 -0
- data/ext/nokogiri/include/libxml2/libxml/xpath.h +575 -0
- data/ext/nokogiri/include/libxml2/libxml/xpathInternals.h +632 -0
- data/ext/nokogiri/include/libxml2/libxml/xpointer.h +137 -0
- data/ext/nokogiri/include/libxslt/attributes.h +38 -0
- data/ext/nokogiri/include/libxslt/documents.h +93 -0
- data/ext/nokogiri/include/libxslt/extensions.h +262 -0
- data/ext/nokogiri/include/libxslt/extra.h +72 -0
- data/ext/nokogiri/include/libxslt/functions.h +78 -0
- data/ext/nokogiri/include/libxslt/imports.h +75 -0
- data/ext/nokogiri/include/libxslt/keys.h +53 -0
- data/ext/nokogiri/include/libxslt/namespaces.h +68 -0
- data/ext/nokogiri/include/libxslt/numbersInternals.h +73 -0
- data/ext/nokogiri/include/libxslt/pattern.h +84 -0
- data/ext/nokogiri/include/libxslt/preproc.h +43 -0
- data/ext/nokogiri/include/libxslt/security.h +104 -0
- data/ext/nokogiri/include/libxslt/templates.h +77 -0
- data/ext/nokogiri/include/libxslt/transform.h +207 -0
- data/ext/nokogiri/include/libxslt/variables.h +118 -0
- data/ext/nokogiri/include/libxslt/xslt.h +110 -0
- data/ext/nokogiri/include/libxslt/xsltInternals.h +1982 -0
- data/ext/nokogiri/include/libxslt/xsltconfig.h +179 -0
- data/ext/nokogiri/include/libxslt/xsltexports.h +64 -0
- data/ext/nokogiri/include/libxslt/xsltlocale.h +76 -0
- data/ext/nokogiri/include/libxslt/xsltutils.h +310 -0
- data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
- data/ext/nokogiri/nokogiri.c +259 -0
- data/ext/nokogiri/nokogiri.h +235 -0
- data/ext/nokogiri/test_global_handlers.c +40 -0
- data/ext/nokogiri/xml_attr.c +103 -0
- data/ext/nokogiri/xml_attribute_decl.c +70 -0
- data/ext/nokogiri/xml_cdata.c +57 -0
- data/ext/nokogiri/xml_comment.c +62 -0
- data/ext/nokogiri/xml_document.c +689 -0
- data/ext/nokogiri/xml_document_fragment.c +44 -0
- data/ext/nokogiri/xml_dtd.c +208 -0
- data/ext/nokogiri/xml_element_content.c +128 -0
- data/ext/nokogiri/xml_element_decl.c +69 -0
- data/ext/nokogiri/xml_encoding_handler.c +104 -0
- data/ext/nokogiri/xml_entity_decl.c +112 -0
- data/ext/nokogiri/xml_entity_reference.c +50 -0
- data/ext/nokogiri/xml_namespace.c +186 -0
- data/ext/nokogiri/xml_node.c +2425 -0
- data/ext/nokogiri/xml_node_set.c +496 -0
- data/ext/nokogiri/xml_processing_instruction.c +54 -0
- data/ext/nokogiri/xml_reader.c +794 -0
- data/ext/nokogiri/xml_relax_ng.c +183 -0
- data/ext/nokogiri/xml_sax_parser.c +316 -0
- data/ext/nokogiri/xml_sax_parser_context.c +283 -0
- data/ext/nokogiri/xml_sax_push_parser.c +166 -0
- data/ext/nokogiri/xml_schema.c +282 -0
- data/ext/nokogiri/xml_syntax_error.c +85 -0
- data/ext/nokogiri/xml_text.c +48 -0
- data/ext/nokogiri/xml_xpath_context.c +413 -0
- data/ext/nokogiri/xslt_stylesheet.c +363 -0
- data/gumbo-parser/CHANGES.md +63 -0
- data/gumbo-parser/Makefile +111 -0
- data/gumbo-parser/THANKS +27 -0
- data/lib/nokogiri/2.7/nokogiri.so +0 -0
- data/lib/nokogiri/3.0/nokogiri.so +0 -0
- data/lib/nokogiri/3.1/nokogiri.so +0 -0
- data/lib/nokogiri/3.2/nokogiri.so +0 -0
- data/lib/nokogiri/class_resolver.rb +67 -0
- data/lib/nokogiri/css/node.rb +54 -0
- data/lib/nokogiri/css/parser.rb +770 -0
- data/lib/nokogiri/css/parser.y +277 -0
- data/lib/nokogiri/css/parser_extras.rb +96 -0
- data/lib/nokogiri/css/syntax_error.rb +9 -0
- data/lib/nokogiri/css/tokenizer.rb +155 -0
- data/lib/nokogiri/css/tokenizer.rex +56 -0
- data/lib/nokogiri/css/xpath_visitor.rb +359 -0
- data/lib/nokogiri/css.rb +66 -0
- data/lib/nokogiri/decorators/slop.rb +44 -0
- data/lib/nokogiri/encoding_handler.rb +57 -0
- data/lib/nokogiri/extension.rb +32 -0
- data/lib/nokogiri/gumbo.rb +15 -0
- data/lib/nokogiri/html.rb +48 -0
- data/lib/nokogiri/html4/builder.rb +37 -0
- data/lib/nokogiri/html4/document.rb +214 -0
- data/lib/nokogiri/html4/document_fragment.rb +54 -0
- data/lib/nokogiri/html4/element_description.rb +25 -0
- data/lib/nokogiri/html4/element_description_defaults.rb +572 -0
- data/lib/nokogiri/html4/encoding_reader.rb +121 -0
- data/lib/nokogiri/html4/entity_lookup.rb +15 -0
- data/lib/nokogiri/html4/sax/parser.rb +63 -0
- data/lib/nokogiri/html4/sax/parser_context.rb +20 -0
- data/lib/nokogiri/html4/sax/push_parser.rb +37 -0
- data/lib/nokogiri/html4.rb +47 -0
- data/lib/nokogiri/html5/document.rb +168 -0
- data/lib/nokogiri/html5/document_fragment.rb +90 -0
- data/lib/nokogiri/html5/node.rb +98 -0
- data/lib/nokogiri/html5.rb +389 -0
- data/lib/nokogiri/jruby/dependencies.rb +3 -0
- data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
- data/lib/nokogiri/syntax_error.rb +6 -0
- data/lib/nokogiri/version/constant.rb +6 -0
- data/lib/nokogiri/version/info.rb +223 -0
- data/lib/nokogiri/version.rb +4 -0
- data/lib/nokogiri/xml/attr.rb +66 -0
- data/lib/nokogiri/xml/attribute_decl.rb +20 -0
- data/lib/nokogiri/xml/builder.rb +487 -0
- data/lib/nokogiri/xml/cdata.rb +13 -0
- data/lib/nokogiri/xml/character_data.rb +9 -0
- data/lib/nokogiri/xml/document.rb +471 -0
- data/lib/nokogiri/xml/document_fragment.rb +205 -0
- data/lib/nokogiri/xml/dtd.rb +34 -0
- data/lib/nokogiri/xml/element_content.rb +38 -0
- data/lib/nokogiri/xml/element_decl.rb +15 -0
- data/lib/nokogiri/xml/entity_decl.rb +21 -0
- data/lib/nokogiri/xml/entity_reference.rb +20 -0
- data/lib/nokogiri/xml/namespace.rb +58 -0
- data/lib/nokogiri/xml/node/save_options.rb +68 -0
- data/lib/nokogiri/xml/node.rb +1563 -0
- data/lib/nokogiri/xml/node_set.rb +446 -0
- data/lib/nokogiri/xml/notation.rb +19 -0
- data/lib/nokogiri/xml/parse_options.rb +213 -0
- data/lib/nokogiri/xml/pp/character_data.rb +21 -0
- data/lib/nokogiri/xml/pp/node.rb +57 -0
- data/lib/nokogiri/xml/pp.rb +4 -0
- data/lib/nokogiri/xml/processing_instruction.rb +11 -0
- data/lib/nokogiri/xml/reader.rb +105 -0
- data/lib/nokogiri/xml/relax_ng.rb +38 -0
- data/lib/nokogiri/xml/sax/document.rb +167 -0
- data/lib/nokogiri/xml/sax/parser.rb +125 -0
- data/lib/nokogiri/xml/sax/parser_context.rb +21 -0
- data/lib/nokogiri/xml/sax/push_parser.rb +61 -0
- data/lib/nokogiri/xml/sax.rb +6 -0
- data/lib/nokogiri/xml/schema.rb +73 -0
- data/lib/nokogiri/xml/searchable.rb +270 -0
- data/lib/nokogiri/xml/syntax_error.rb +72 -0
- data/lib/nokogiri/xml/text.rb +11 -0
- data/lib/nokogiri/xml/xpath/syntax_error.rb +13 -0
- data/lib/nokogiri/xml/xpath.rb +21 -0
- data/lib/nokogiri/xml/xpath_context.rb +16 -0
- data/lib/nokogiri/xml.rb +76 -0
- data/lib/nokogiri/xslt/stylesheet.rb +27 -0
- data/lib/nokogiri/xslt.rb +65 -0
- data/lib/nokogiri.rb +120 -0
- data/lib/xsd/xmlparser/nokogiri.rb +104 -0
- metadata +317 -0
@@ -0,0 +1,270 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
module Nokogiri
|
5
|
+
module XML
|
6
|
+
#
|
7
|
+
# The Searchable module declares the interface used for searching your DOM.
|
8
|
+
#
|
9
|
+
# It implements the public methods #search, #css, and #xpath,
|
10
|
+
# as well as allowing specific implementations to specialize some
|
11
|
+
# of the important behaviors.
|
12
|
+
#
|
13
|
+
module Searchable
|
14
|
+
# Regular expression used by Searchable#search to determine if a query
|
15
|
+
# string is CSS or XPath
|
16
|
+
LOOKS_LIKE_XPATH = %r{^(\./|/|\.\.|\.$)}
|
17
|
+
|
18
|
+
# :section: Searching via XPath or CSS Queries
|
19
|
+
|
20
|
+
###
|
21
|
+
# call-seq:
|
22
|
+
# search(*paths, [namespace-bindings, xpath-variable-bindings, custom-handler-class])
|
23
|
+
#
|
24
|
+
# Search this object for +paths+. +paths+ must be one or more XPath or CSS queries:
|
25
|
+
#
|
26
|
+
# node.search("div.employee", ".//title")
|
27
|
+
#
|
28
|
+
# A hash of namespace bindings may be appended:
|
29
|
+
#
|
30
|
+
# node.search('.//bike:tire', {'bike' => 'http://schwinn.com/'})
|
31
|
+
# node.search('bike|tire', {'bike' => 'http://schwinn.com/'})
|
32
|
+
#
|
33
|
+
# For XPath queries, a hash of variable bindings may also be appended to the namespace
|
34
|
+
# bindings. For example:
|
35
|
+
#
|
36
|
+
# node.search('.//address[@domestic=$value]', nil, {:value => 'Yes'})
|
37
|
+
#
|
38
|
+
# 💡 Custom XPath functions and CSS pseudo-selectors may also be defined. To define custom
|
39
|
+
# functions create a class and implement the function you want to define. The first argument
|
40
|
+
# to the method will be the current matching NodeSet. Any other arguments are ones that you
|
41
|
+
# pass in. Note that this class may appear anywhere in the argument list. For example:
|
42
|
+
#
|
43
|
+
# handler = Class.new {
|
44
|
+
# def regex node_set, regex
|
45
|
+
# node_set.find_all { |node| node['some_attribute'] =~ /#{regex}/ }
|
46
|
+
# end
|
47
|
+
# }.new
|
48
|
+
# node.search('.//title[regex(., "\w+")]', 'div.employee:regex("[0-9]+")', handler)
|
49
|
+
#
|
50
|
+
# See Searchable#xpath and Searchable#css for further usage help.
|
51
|
+
def search(*args)
|
52
|
+
paths, handler, ns, binds = extract_params(args)
|
53
|
+
|
54
|
+
xpaths = paths.map(&:to_s).map do |path|
|
55
|
+
LOOKS_LIKE_XPATH.match?(path) ? path : xpath_query_from_css_rule(path, ns)
|
56
|
+
end.flatten.uniq
|
57
|
+
|
58
|
+
xpath(*(xpaths + [ns, handler, binds].compact))
|
59
|
+
end
|
60
|
+
|
61
|
+
alias_method :/, :search
|
62
|
+
|
63
|
+
###
|
64
|
+
# call-seq:
|
65
|
+
# at(*paths, [namespace-bindings, xpath-variable-bindings, custom-handler-class])
|
66
|
+
#
|
67
|
+
# Search this object for +paths+, and return only the first
|
68
|
+
# result. +paths+ must be one or more XPath or CSS queries.
|
69
|
+
#
|
70
|
+
# See Searchable#search for more information.
|
71
|
+
def at(*args)
|
72
|
+
search(*args).first
|
73
|
+
end
|
74
|
+
|
75
|
+
alias_method :%, :at
|
76
|
+
|
77
|
+
###
|
78
|
+
# call-seq:
|
79
|
+
# css(*rules, [namespace-bindings, custom-pseudo-class])
|
80
|
+
#
|
81
|
+
# Search this object for CSS +rules+. +rules+ must be one or more CSS
|
82
|
+
# selectors. For example:
|
83
|
+
#
|
84
|
+
# node.css('title')
|
85
|
+
# node.css('body h1.bold')
|
86
|
+
# node.css('div + p.green', 'div#one')
|
87
|
+
#
|
88
|
+
# A hash of namespace bindings may be appended. For example:
|
89
|
+
#
|
90
|
+
# node.css('bike|tire', {'bike' => 'http://schwinn.com/'})
|
91
|
+
#
|
92
|
+
# 💡 Custom CSS pseudo classes may also be defined which are mapped to a custom XPath
|
93
|
+
# function. To define custom pseudo classes, create a class and implement the custom pseudo
|
94
|
+
# class you want defined. The first argument to the method will be the matching context
|
95
|
+
# NodeSet. Any other arguments are ones that you pass in. For example:
|
96
|
+
#
|
97
|
+
# handler = Class.new {
|
98
|
+
# def regex(node_set, regex)
|
99
|
+
# node_set.find_all { |node| node['some_attribute'] =~ /#{regex}/ }
|
100
|
+
# end
|
101
|
+
# }.new
|
102
|
+
# node.css('title:regex("\w+")', handler)
|
103
|
+
#
|
104
|
+
# 💡 Some XPath syntax is supported in CSS queries. For example, to query for an attribute:
|
105
|
+
#
|
106
|
+
# node.css('img > @href') # returns all +href+ attributes on an +img+ element
|
107
|
+
# node.css('img / @href') # same
|
108
|
+
#
|
109
|
+
# # ⚠ this returns +class+ attributes from all +div+ elements AND THEIR CHILDREN!
|
110
|
+
# node.css('div @class')
|
111
|
+
#
|
112
|
+
# node.css
|
113
|
+
#
|
114
|
+
# 💡 Array-like syntax is supported in CSS queries as an alternative to using +:nth-child()+.
|
115
|
+
#
|
116
|
+
# ⚠ NOTE that indices are 1-based like +:nth-child+ and not 0-based like Ruby Arrays. For
|
117
|
+
# example:
|
118
|
+
#
|
119
|
+
# # equivalent to 'li:nth-child(2)'
|
120
|
+
# node.css('li[2]') # retrieve the second li element in a list
|
121
|
+
#
|
122
|
+
# ⚠ NOTE that the CSS query string is case-sensitive with regards to your document type. HTML
|
123
|
+
# tags will match only lowercase CSS queries, so if you search for "H1" in an HTML document,
|
124
|
+
# you'll never find anything. However, "H1" might be found in an XML document, where tags
|
125
|
+
# names are case-sensitive (e.g., "H1" is distinct from "h1").
|
126
|
+
def css(*args)
|
127
|
+
rules, handler, ns, _ = extract_params(args)
|
128
|
+
|
129
|
+
css_internal(self, rules, handler, ns)
|
130
|
+
end
|
131
|
+
|
132
|
+
##
|
133
|
+
# call-seq:
|
134
|
+
# at_css(*rules, [namespace-bindings, custom-pseudo-class])
|
135
|
+
#
|
136
|
+
# Search this object for CSS +rules+, and return only the first
|
137
|
+
# match. +rules+ must be one or more CSS selectors.
|
138
|
+
#
|
139
|
+
# See Searchable#css for more information.
|
140
|
+
def at_css(*args)
|
141
|
+
css(*args).first
|
142
|
+
end
|
143
|
+
|
144
|
+
###
|
145
|
+
# call-seq:
|
146
|
+
# xpath(*paths, [namespace-bindings, variable-bindings, custom-handler-class])
|
147
|
+
#
|
148
|
+
# Search this node for XPath +paths+. +paths+ must be one or more XPath
|
149
|
+
# queries.
|
150
|
+
#
|
151
|
+
# node.xpath('.//title')
|
152
|
+
#
|
153
|
+
# A hash of namespace bindings may be appended. For example:
|
154
|
+
#
|
155
|
+
# node.xpath('.//foo:name', {'foo' => 'http://example.org/'})
|
156
|
+
# node.xpath('.//xmlns:name', node.root.namespaces)
|
157
|
+
#
|
158
|
+
# A hash of variable bindings may also be appended to the namespace bindings. For example:
|
159
|
+
#
|
160
|
+
# node.xpath('.//address[@domestic=$value]', nil, {:value => 'Yes'})
|
161
|
+
#
|
162
|
+
# 💡 Custom XPath functions may also be defined. To define custom functions create a class and
|
163
|
+
# implement the function you want to define. The first argument to the method will be the
|
164
|
+
# current matching NodeSet. Any other arguments are ones that you pass in. Note that this
|
165
|
+
# class may appear anywhere in the argument list. For example:
|
166
|
+
#
|
167
|
+
# handler = Class.new {
|
168
|
+
# def regex(node_set, regex)
|
169
|
+
# node_set.find_all { |node| node['some_attribute'] =~ /#{regex}/ }
|
170
|
+
# end
|
171
|
+
# }.new
|
172
|
+
# node.xpath('.//title[regex(., "\w+")]', handler)
|
173
|
+
#
|
174
|
+
def xpath(*args)
|
175
|
+
paths, handler, ns, binds = extract_params(args)
|
176
|
+
|
177
|
+
xpath_internal(self, paths, handler, ns, binds)
|
178
|
+
end
|
179
|
+
|
180
|
+
##
|
181
|
+
# call-seq:
|
182
|
+
# at_xpath(*paths, [namespace-bindings, variable-bindings, custom-handler-class])
|
183
|
+
#
|
184
|
+
# Search this node for XPath +paths+, and return only the first
|
185
|
+
# match. +paths+ must be one or more XPath queries.
|
186
|
+
#
|
187
|
+
# See Searchable#xpath for more information.
|
188
|
+
def at_xpath(*args)
|
189
|
+
xpath(*args).first
|
190
|
+
end
|
191
|
+
|
192
|
+
# :call-seq:
|
193
|
+
# >(selector) → NodeSet
|
194
|
+
#
|
195
|
+
# Search this node's immediate children using CSS selector +selector+
|
196
|
+
def >(selector) # rubocop:disable Naming/BinaryOperatorParameterName
|
197
|
+
ns = (document.root&.namespaces || {})
|
198
|
+
xpath(CSS.xpath_for(selector, prefix: "./", ns: ns).first)
|
199
|
+
end
|
200
|
+
|
201
|
+
# :section:
|
202
|
+
|
203
|
+
private
|
204
|
+
|
205
|
+
def css_internal(node, rules, handler, ns)
|
206
|
+
xpath_internal(node, css_rules_to_xpath(rules, ns), handler, ns, nil)
|
207
|
+
end
|
208
|
+
|
209
|
+
def xpath_internal(node, paths, handler, ns, binds)
|
210
|
+
document = node.document
|
211
|
+
return NodeSet.new(document) unless document
|
212
|
+
|
213
|
+
if paths.length == 1
|
214
|
+
return xpath_impl(node, paths.first, handler, ns, binds)
|
215
|
+
end
|
216
|
+
|
217
|
+
NodeSet.new(document) do |combined|
|
218
|
+
paths.each do |path|
|
219
|
+
xpath_impl(node, path, handler, ns, binds).each { |set| combined << set }
|
220
|
+
end
|
221
|
+
end
|
222
|
+
end
|
223
|
+
|
224
|
+
def xpath_impl(node, path, handler, ns, binds)
|
225
|
+
ctx = XPathContext.new(node)
|
226
|
+
ctx.register_namespaces(ns)
|
227
|
+
path = path.gsub(/xmlns:/, " :") unless Nokogiri.uses_libxml?
|
228
|
+
|
229
|
+
binds&.each do |key, value|
|
230
|
+
ctx.register_variable(key.to_s, value)
|
231
|
+
end
|
232
|
+
|
233
|
+
ctx.evaluate(path, handler)
|
234
|
+
end
|
235
|
+
|
236
|
+
def css_rules_to_xpath(rules, ns)
|
237
|
+
rules.map { |rule| xpath_query_from_css_rule(rule, ns) }
|
238
|
+
end
|
239
|
+
|
240
|
+
def xpath_query_from_css_rule(rule, ns)
|
241
|
+
visitor = Nokogiri::CSS::XPathVisitor.new(
|
242
|
+
builtins: Nokogiri::CSS::XPathVisitor::BuiltinsConfig::OPTIMAL,
|
243
|
+
doctype: document.xpath_doctype,
|
244
|
+
)
|
245
|
+
self.class::IMPLIED_XPATH_CONTEXTS.map do |implied_xpath_context|
|
246
|
+
CSS.xpath_for(rule.to_s, { prefix: implied_xpath_context, ns: ns,
|
247
|
+
visitor: visitor, })
|
248
|
+
end.join(" | ")
|
249
|
+
end
|
250
|
+
|
251
|
+
def extract_params(params) # :nodoc:
|
252
|
+
handler = params.find do |param|
|
253
|
+
![Hash, String, Symbol].include?(param.class)
|
254
|
+
end
|
255
|
+
params -= [handler] if handler
|
256
|
+
|
257
|
+
hashes = []
|
258
|
+
while Hash === params.last || params.last.nil?
|
259
|
+
hashes << params.pop
|
260
|
+
break if params.empty?
|
261
|
+
end
|
262
|
+
ns, binds = hashes.reverse
|
263
|
+
|
264
|
+
ns ||= (document.root&.namespaces || {})
|
265
|
+
|
266
|
+
[params, handler, ns, binds]
|
267
|
+
end
|
268
|
+
end
|
269
|
+
end
|
270
|
+
end
|
@@ -0,0 +1,72 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Nokogiri
|
4
|
+
module XML
|
5
|
+
###
|
6
|
+
# This class provides information about XML SyntaxErrors. These
|
7
|
+
# exceptions are typically stored on Nokogiri::XML::Document#errors.
|
8
|
+
class SyntaxError < ::Nokogiri::SyntaxError
|
9
|
+
attr_reader :domain
|
10
|
+
attr_reader :code
|
11
|
+
attr_reader :level
|
12
|
+
attr_reader :file
|
13
|
+
attr_reader :line
|
14
|
+
attr_reader :str1
|
15
|
+
attr_reader :str2
|
16
|
+
attr_reader :str3
|
17
|
+
attr_reader :int1
|
18
|
+
attr_reader :column
|
19
|
+
|
20
|
+
###
|
21
|
+
# return true if this is a non error
|
22
|
+
def none?
|
23
|
+
level == 0
|
24
|
+
end
|
25
|
+
|
26
|
+
###
|
27
|
+
# return true if this is a warning
|
28
|
+
def warning?
|
29
|
+
level == 1
|
30
|
+
end
|
31
|
+
|
32
|
+
###
|
33
|
+
# return true if this is an error
|
34
|
+
def error?
|
35
|
+
level == 2
|
36
|
+
end
|
37
|
+
|
38
|
+
###
|
39
|
+
# return true if this error is fatal
|
40
|
+
def fatal?
|
41
|
+
level == 3
|
42
|
+
end
|
43
|
+
|
44
|
+
def to_s
|
45
|
+
message = super.chomp
|
46
|
+
[location_to_s, level_to_s, message]
|
47
|
+
.compact.join(": ")
|
48
|
+
.force_encoding(message.encoding)
|
49
|
+
end
|
50
|
+
|
51
|
+
private
|
52
|
+
|
53
|
+
def level_to_s
|
54
|
+
case level
|
55
|
+
when 3 then "FATAL"
|
56
|
+
when 2 then "ERROR"
|
57
|
+
when 1 then "WARNING"
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
def nil_or_zero?(attribute)
|
62
|
+
attribute.nil? || attribute.zero?
|
63
|
+
end
|
64
|
+
|
65
|
+
def location_to_s
|
66
|
+
return nil if nil_or_zero?(line) && nil_or_zero?(column)
|
67
|
+
|
68
|
+
"#{line}:#{column}"
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Nokogiri
|
4
|
+
module XML
|
5
|
+
module XPath
|
6
|
+
# The XPath search prefix to search globally, +//+
|
7
|
+
GLOBAL_SEARCH_PREFIX = "//"
|
8
|
+
|
9
|
+
# The XPath search prefix to search direct descendants of the root element, +/+
|
10
|
+
ROOT_SEARCH_PREFIX = "/"
|
11
|
+
|
12
|
+
# The XPath search prefix to search direct descendants of the current element, +./+
|
13
|
+
CURRENT_SEARCH_PREFIX = "./"
|
14
|
+
|
15
|
+
# The XPath search prefix to search anywhere in the current element's subtree, +.//+
|
16
|
+
SUBTREE_SEARCH_PREFIX = ".//"
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
require_relative "xpath/syntax_error"
|
@@ -0,0 +1,16 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Nokogiri
|
4
|
+
module XML
|
5
|
+
class XPathContext
|
6
|
+
###
|
7
|
+
# Register namespaces in +namespaces+
|
8
|
+
def register_namespaces(namespaces)
|
9
|
+
namespaces.each do |k, v|
|
10
|
+
k = k.to_s.gsub(/.*:/, "") # strip off 'xmlns:' or 'xml:'
|
11
|
+
register_ns(k, v)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
data/lib/nokogiri/xml.rb
ADDED
@@ -0,0 +1,76 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Nokogiri
|
4
|
+
class << self
|
5
|
+
###
|
6
|
+
# Parse XML. Convenience method for Nokogiri::XML::Document.parse
|
7
|
+
def XML(thing, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_XML, &block)
|
8
|
+
Nokogiri::XML::Document.parse(thing, url, encoding, options, &block)
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
module XML
|
13
|
+
# Original C14N 1.0 spec canonicalization
|
14
|
+
XML_C14N_1_0 = 0
|
15
|
+
# Exclusive C14N 1.0 spec canonicalization
|
16
|
+
XML_C14N_EXCLUSIVE_1_0 = 1
|
17
|
+
# C14N 1.1 spec canonicalization
|
18
|
+
XML_C14N_1_1 = 2
|
19
|
+
class << self
|
20
|
+
###
|
21
|
+
# Parse an XML document using the Nokogiri::XML::Reader API. See
|
22
|
+
# Nokogiri::XML::Reader for mor information
|
23
|
+
def Reader(string_or_io, url = nil, encoding = nil, options = ParseOptions::STRICT)
|
24
|
+
options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
|
25
|
+
yield options if block_given?
|
26
|
+
|
27
|
+
if string_or_io.respond_to?(:read)
|
28
|
+
return Reader.from_io(string_or_io, url, encoding, options.to_i)
|
29
|
+
end
|
30
|
+
|
31
|
+
Reader.from_memory(string_or_io, url, encoding, options.to_i)
|
32
|
+
end
|
33
|
+
|
34
|
+
###
|
35
|
+
# Parse XML. Convenience method for Nokogiri::XML::Document.parse
|
36
|
+
def parse(thing, url = nil, encoding = nil, options = ParseOptions::DEFAULT_XML, &block)
|
37
|
+
Document.parse(thing, url, encoding, options, &block)
|
38
|
+
end
|
39
|
+
|
40
|
+
####
|
41
|
+
# Parse a fragment from +string+ in to a NodeSet.
|
42
|
+
def fragment(string, options = ParseOptions::DEFAULT_XML, &block)
|
43
|
+
XML::DocumentFragment.parse(string, options, &block)
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
require_relative "xml/pp"
|
50
|
+
require_relative "xml/parse_options"
|
51
|
+
require_relative "xml/sax"
|
52
|
+
require_relative "xml/searchable"
|
53
|
+
require_relative "xml/node"
|
54
|
+
require_relative "xml/attribute_decl"
|
55
|
+
require_relative "xml/element_decl"
|
56
|
+
require_relative "xml/element_content"
|
57
|
+
require_relative "xml/character_data"
|
58
|
+
require_relative "xml/namespace"
|
59
|
+
require_relative "xml/attr"
|
60
|
+
require_relative "xml/dtd"
|
61
|
+
require_relative "xml/cdata"
|
62
|
+
require_relative "xml/text"
|
63
|
+
require_relative "xml/document"
|
64
|
+
require_relative "xml/document_fragment"
|
65
|
+
require_relative "xml/processing_instruction"
|
66
|
+
require_relative "xml/node_set"
|
67
|
+
require_relative "xml/syntax_error"
|
68
|
+
require_relative "xml/xpath"
|
69
|
+
require_relative "xml/xpath_context"
|
70
|
+
require_relative "xml/builder"
|
71
|
+
require_relative "xml/reader"
|
72
|
+
require_relative "xml/notation"
|
73
|
+
require_relative "xml/entity_decl"
|
74
|
+
require_relative "xml/entity_reference"
|
75
|
+
require_relative "xml/schema"
|
76
|
+
require_relative "xml/relax_ng"
|
@@ -0,0 +1,27 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Nokogiri
|
4
|
+
module XSLT
|
5
|
+
###
|
6
|
+
# A Stylesheet represents an XSLT Stylesheet object. Stylesheet creation
|
7
|
+
# is done through Nokogiri.XSLT. Here is an example of transforming
|
8
|
+
# an XML::Document with a Stylesheet:
|
9
|
+
#
|
10
|
+
# doc = Nokogiri::XML(File.read('some_file.xml'))
|
11
|
+
# xslt = Nokogiri::XSLT(File.read('some_transformer.xslt'))
|
12
|
+
#
|
13
|
+
# puts xslt.transform(doc)
|
14
|
+
#
|
15
|
+
# See Nokogiri::XSLT::Stylesheet#transform for more transformation
|
16
|
+
# information.
|
17
|
+
class Stylesheet
|
18
|
+
###
|
19
|
+
# Apply an XSLT stylesheet to an XML::Document.
|
20
|
+
# +params+ is an array of strings used as XSLT parameters.
|
21
|
+
# returns serialized document
|
22
|
+
def apply_to(document, params = [])
|
23
|
+
serialize(transform(document, params))
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,65 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
module Nokogiri
|
5
|
+
class << self
|
6
|
+
###
|
7
|
+
# Create a Nokogiri::XSLT::Stylesheet with +stylesheet+.
|
8
|
+
#
|
9
|
+
# Example:
|
10
|
+
#
|
11
|
+
# xslt = Nokogiri::XSLT(File.read(ARGV[0]))
|
12
|
+
#
|
13
|
+
def XSLT(stylesheet, modules = {})
|
14
|
+
XSLT.parse(stylesheet, modules)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
###
|
19
|
+
# See Nokogiri::XSLT::Stylesheet for creating and manipulating
|
20
|
+
# Stylesheet object.
|
21
|
+
module XSLT
|
22
|
+
class << self
|
23
|
+
###
|
24
|
+
# Parse the stylesheet in +string+, register any +modules+
|
25
|
+
def parse(string, modules = {})
|
26
|
+
modules.each do |url, klass|
|
27
|
+
XSLT.register(url, klass)
|
28
|
+
end
|
29
|
+
|
30
|
+
doc = XML::Document.parse(string, nil, nil, XML::ParseOptions::DEFAULT_XSLT)
|
31
|
+
if Nokogiri.jruby?
|
32
|
+
Stylesheet.parse_stylesheet_doc(doc, string)
|
33
|
+
else
|
34
|
+
Stylesheet.parse_stylesheet_doc(doc)
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
# :call-seq:
|
39
|
+
# quote_params(params) → Array
|
40
|
+
#
|
41
|
+
# Quote parameters in +params+ for stylesheet safety.
|
42
|
+
# See Nokogiri::XSLT::Stylesheet.transform for example usage.
|
43
|
+
#
|
44
|
+
# [Parameters]
|
45
|
+
# - +params+ (Hash, Array) XSLT parameters (key->value, or tuples of [key, value])
|
46
|
+
#
|
47
|
+
# [Returns] Array of string parameters, with quotes correctly escaped for use with XSLT::Stylesheet.transform
|
48
|
+
#
|
49
|
+
def quote_params(params)
|
50
|
+
params.flatten.each_slice(2).with_object([]) do |kv, quoted_params|
|
51
|
+
key, value = kv.map(&:to_s)
|
52
|
+
value = if /'/.match?(value)
|
53
|
+
"concat('#{value.gsub(/'/, %q{', "'", '})}')"
|
54
|
+
else
|
55
|
+
"'#{value}'"
|
56
|
+
end
|
57
|
+
quoted_params << key
|
58
|
+
quoted_params << value
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
require_relative "xslt/stylesheet"
|
data/lib/nokogiri.rb
ADDED
@@ -0,0 +1,120 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
if defined?(RUBY_ENGINE) && RUBY_ENGINE == "jruby"
|
5
|
+
require_relative "nokogiri/jruby/dependencies"
|
6
|
+
end
|
7
|
+
|
8
|
+
require_relative "nokogiri/extension"
|
9
|
+
|
10
|
+
# Nokogiri parses and searches XML/HTML very quickly, and also has
|
11
|
+
# correctly implemented CSS3 selector support as well as XPath 1.0
|
12
|
+
# support.
|
13
|
+
#
|
14
|
+
# Parsing a document returns either a Nokogiri::XML::Document, or a
|
15
|
+
# Nokogiri::HTML4::Document depending on the kind of document you parse.
|
16
|
+
#
|
17
|
+
# Here is an example:
|
18
|
+
#
|
19
|
+
# require 'nokogiri'
|
20
|
+
# require 'open-uri'
|
21
|
+
#
|
22
|
+
# # Get a Nokogiri::HTML4::Document for the page we’re interested in...
|
23
|
+
#
|
24
|
+
# doc = Nokogiri::HTML4(URI.open('http://www.google.com/search?q=tenderlove'))
|
25
|
+
#
|
26
|
+
# # Do funky things with it using Nokogiri::XML::Node methods...
|
27
|
+
#
|
28
|
+
# ####
|
29
|
+
# # Search for nodes by css
|
30
|
+
# doc.css('h3.r a.l').each do |link|
|
31
|
+
# puts link.content
|
32
|
+
# end
|
33
|
+
#
|
34
|
+
# See also:
|
35
|
+
#
|
36
|
+
# - Nokogiri::XML::Searchable#css for more information about CSS searching
|
37
|
+
# - Nokogiri::XML::Searchable#xpath for more information about XPath searching
|
38
|
+
module Nokogiri
|
39
|
+
class << self
|
40
|
+
###
|
41
|
+
# Parse an HTML or XML document. +string+ contains the document.
|
42
|
+
def parse(string, url = nil, encoding = nil, options = nil)
|
43
|
+
if string.respond_to?(:read) ||
|
44
|
+
/^\s*<(?:!DOCTYPE\s+)?html[\s>]/i.match?(string[0, 512])
|
45
|
+
# Expect an HTML indicator to appear within the first 512
|
46
|
+
# characters of a document. (<?xml ?> + <?xml-stylesheet ?>
|
47
|
+
# shouldn't be that long)
|
48
|
+
Nokogiri.HTML4(string, url, encoding,
|
49
|
+
options || XML::ParseOptions::DEFAULT_HTML)
|
50
|
+
else
|
51
|
+
Nokogiri.XML(string, url, encoding,
|
52
|
+
options || XML::ParseOptions::DEFAULT_XML)
|
53
|
+
end.tap do |doc|
|
54
|
+
yield doc if block_given?
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
###
|
59
|
+
# Create a new Nokogiri::XML::DocumentFragment
|
60
|
+
def make(input = nil, opts = {}, &blk)
|
61
|
+
if input
|
62
|
+
Nokogiri::HTML4.fragment(input).children.first
|
63
|
+
else
|
64
|
+
Nokogiri(&blk)
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
###
|
69
|
+
# Parse a document and add the Slop decorator. The Slop decorator
|
70
|
+
# implements method_missing such that methods may be used instead of CSS
|
71
|
+
# or XPath. For example:
|
72
|
+
#
|
73
|
+
# doc = Nokogiri::Slop(<<-eohtml)
|
74
|
+
# <html>
|
75
|
+
# <body>
|
76
|
+
# <p>first</p>
|
77
|
+
# <p>second</p>
|
78
|
+
# </body>
|
79
|
+
# </html>
|
80
|
+
# eohtml
|
81
|
+
# assert_equal('second', doc.html.body.p[1].text)
|
82
|
+
#
|
83
|
+
def Slop(*args, &block)
|
84
|
+
Nokogiri(*args, &block).slop!
|
85
|
+
end
|
86
|
+
|
87
|
+
# :nodoc:
|
88
|
+
def install_default_aliases
|
89
|
+
warn("Nokogiri.install_default_aliases is deprecated and will be removed in a future version of Nokogiri. Please call Nokogiri::EncodingHandler.install_default_aliases instead.")
|
90
|
+
Nokogiri::EncodingHandler.install_default_aliases
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
###
|
96
|
+
# Parse a document contained in +args+. Nokogiri will try to guess what type of document you are
|
97
|
+
# attempting to parse. For more information, see Nokogiri.parse
|
98
|
+
#
|
99
|
+
# To specify the type of document, use {Nokogiri.XML}, {Nokogiri.HTML4}, or {Nokogiri.HTML5}.
|
100
|
+
def Nokogiri(*args, &block)
|
101
|
+
if block
|
102
|
+
Nokogiri::HTML4::Builder.new(&block).doc.root
|
103
|
+
else
|
104
|
+
Nokogiri.parse(*args)
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
108
|
+
require_relative "nokogiri/version"
|
109
|
+
require_relative "nokogiri/class_resolver"
|
110
|
+
require_relative "nokogiri/syntax_error"
|
111
|
+
require_relative "nokogiri/xml"
|
112
|
+
require_relative "nokogiri/xslt"
|
113
|
+
require_relative "nokogiri/html4"
|
114
|
+
require_relative "nokogiri/html"
|
115
|
+
require_relative "nokogiri/decorators/slop"
|
116
|
+
require_relative "nokogiri/css"
|
117
|
+
require_relative "nokogiri/html4/builder"
|
118
|
+
require_relative "nokogiri/encoding_handler"
|
119
|
+
|
120
|
+
require_relative "nokogiri/html5" if Nokogiri.uses_gumbo?
|