oga 0.1.1-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. checksums.yaml +7 -0
  2. data/.yardopts +13 -0
  3. data/LICENSE +19 -0
  4. data/README.md +179 -0
  5. data/doc/DCO.md +25 -0
  6. data/doc/changelog.md +20 -0
  7. data/doc/css/common.css +76 -0
  8. data/doc/migrating_from_nokogiri.md +169 -0
  9. data/ext/c/extconf.rb +13 -0
  10. data/ext/c/lexer.c +1518 -0
  11. data/ext/c/lexer.h +8 -0
  12. data/ext/c/lexer.rl +121 -0
  13. data/ext/c/liboga.c +6 -0
  14. data/ext/c/liboga.h +11 -0
  15. data/ext/java/Liboga.java +14 -0
  16. data/ext/java/org/liboga/xml/Lexer.java +829 -0
  17. data/ext/java/org/liboga/xml/Lexer.rl +151 -0
  18. data/ext/ragel/base_lexer.rl +323 -0
  19. data/lib/liboga.jar +0 -0
  20. data/lib/oga.rb +43 -0
  21. data/lib/oga/html/parser.rb +25 -0
  22. data/lib/oga/oga.rb +27 -0
  23. data/lib/oga/version.rb +3 -0
  24. data/lib/oga/xml/attribute.rb +111 -0
  25. data/lib/oga/xml/cdata.rb +17 -0
  26. data/lib/oga/xml/character_node.rb +39 -0
  27. data/lib/oga/xml/comment.rb +17 -0
  28. data/lib/oga/xml/doctype.rb +84 -0
  29. data/lib/oga/xml/document.rb +99 -0
  30. data/lib/oga/xml/element.rb +331 -0
  31. data/lib/oga/xml/lexer.rb +399 -0
  32. data/lib/oga/xml/namespace.rb +42 -0
  33. data/lib/oga/xml/node.rb +168 -0
  34. data/lib/oga/xml/node_set.rb +313 -0
  35. data/lib/oga/xml/parser.rb +556 -0
  36. data/lib/oga/xml/processing_instruction.rb +39 -0
  37. data/lib/oga/xml/pull_parser.rb +180 -0
  38. data/lib/oga/xml/querying.rb +32 -0
  39. data/lib/oga/xml/text.rb +11 -0
  40. data/lib/oga/xml/traversal.rb +48 -0
  41. data/lib/oga/xml/xml_declaration.rb +69 -0
  42. data/lib/oga/xpath/evaluator.rb +1748 -0
  43. data/lib/oga/xpath/lexer.rb +2043 -0
  44. data/lib/oga/xpath/node.rb +10 -0
  45. data/lib/oga/xpath/parser.rb +537 -0
  46. data/oga.gemspec +45 -0
  47. metadata +221 -0
@@ -0,0 +1,39 @@
1
+ module Oga
2
+ module XML
3
+ ##
4
+ # Class used for storing information about a single processing instruction.
5
+ #
6
+ # @!attribute [rw] name
7
+ # @return [String]
8
+ #
9
+ class ProcessingInstruction < CharacterNode
10
+ attr_accessor :name
11
+
12
+ ##
13
+ # @param [Hash] options
14
+ #
15
+ # @option options [String] :name The name of the instruction.
16
+ # @see [Oga::XML::CharacterNode#initialize]
17
+ #
18
+ def initialize(options = {})
19
+ super
20
+
21
+ @name = options[:name]
22
+ end
23
+
24
+ ##
25
+ # @return [String]
26
+ #
27
+ def to_xml
28
+ return "<?#{name}#{text}?>"
29
+ end
30
+
31
+ ##
32
+ # @return [String]
33
+ #
34
+ def inspect
35
+ return "ProcessingInstruction(name: #{name.inspect} text: #{text.inspect})"
36
+ end
37
+ end # ProcessingInstruction
38
+ end # XML
39
+ end # Oga
@@ -0,0 +1,180 @@
1
+ module Oga
2
+ module XML
3
+ ##
4
+ # The PullParser class can be used to parse an XML document incrementally
5
+ # instead of parsing it as a whole. This results in lower memory usage and
6
+ # potentially faster parsing times. The downside is that pull parsers are
7
+ # typically more difficult to use compared to DOM parsers.
8
+ #
9
+ # Basic parsing using this class works as following:
10
+ #
11
+ # parser = Oga::XML::PullParser.new('... xml here ...')
12
+ #
13
+ # parser.parse do |node|
14
+ # if node.is_a?(Oga::XML::PullParser)
15
+ #
16
+ # end
17
+ # end
18
+ #
19
+ # This parses yields proper XML instances such as {Oga::XML::Element}.
20
+ # Doctypes and XML declarations are ignored by this parser.
21
+ #
22
+ # @!attribute [r] node
23
+ # The current node.
24
+ # @return [Oga::XML::Node]
25
+ #
26
+ # @!attribute [r] nesting
27
+ # Array containing the names of the currently nested elements.
28
+ # @return [Array]
29
+ #
30
+ class PullParser < Parser
31
+ attr_reader :node, :nesting
32
+
33
+ ##
34
+ # @return [Array]
35
+ #
36
+ DISABLED_CALLBACKS = [
37
+ :on_document,
38
+ :on_doctype,
39
+ :on_xml_decl,
40
+ :on_element_children
41
+ ]
42
+
43
+ ##
44
+ # @return [Array]
45
+ #
46
+ BLOCK_CALLBACKS = [
47
+ :on_cdata,
48
+ :on_comment,
49
+ :on_text,
50
+ :on_proc_ins
51
+ ]
52
+
53
+ ##
54
+ # Returns the shorthands that can be used for various node classes.
55
+ #
56
+ # @return [Hash]
57
+ #
58
+ NODE_SHORTHANDS = {
59
+ :text => XML::Text,
60
+ :node => XML::Node,
61
+ :cdata => XML::Cdata,
62
+ :element => XML::Element,
63
+ :doctype => XML::Doctype,
64
+ :comment => XML::Comment,
65
+ :xml_declaration => XML::XmlDeclaration
66
+ }
67
+
68
+ ##
69
+ # @see Oga::XML::Parser#reset
70
+ #
71
+ def reset
72
+ super
73
+
74
+ @block = nil
75
+ @nesting = []
76
+ @node = nil
77
+ end
78
+
79
+ ##
80
+ # Parses the input and yields every node to the supplied block.
81
+ #
82
+ # @yieldparam [Oga::XML::Node]
83
+ #
84
+ def parse(&block)
85
+ @block = block
86
+
87
+ yyparse(self, :yield_next_token)
88
+
89
+ reset
90
+
91
+ return
92
+ end
93
+
94
+ ##
95
+ # Calls the supplied block if the current node type and optionally the
96
+ # nesting match. This method allows you to write this:
97
+ #
98
+ # parser.parse do |node|
99
+ # parser.on(:text, %w{people person name}) do
100
+ # puts node.text
101
+ # end
102
+ # end
103
+ #
104
+ # Instead of this:
105
+ #
106
+ # parser.parse do |node|
107
+ # if node.is_a?(Oga::XML::Text) and parser.nesting == %w{people person name}
108
+ # puts node.text
109
+ # end
110
+ # end
111
+ #
112
+ # When calling this method you can specify the following node types:
113
+ #
114
+ # * `:cdata`
115
+ # * `:comment`
116
+ # * `:element`
117
+ # * `:text`
118
+ #
119
+ # @example
120
+ # parser.on(:element, %w{people person name}) do
121
+ #
122
+ # end
123
+ #
124
+ # @param [Symbol] type The type of node to act upon. This is a symbol as
125
+ # returned by {Oga::XML::Node#node_type}.
126
+ #
127
+ # @param [Array] nesting The element name nesting to act upon.
128
+ #
129
+ def on(type, nesting = [])
130
+ if node.is_a?(NODE_SHORTHANDS[type])
131
+ if nesting.empty? or nesting == self.nesting
132
+ yield
133
+ end
134
+ end
135
+ end
136
+
137
+ # eval is a heck of a lot faster than define_method on both Rubinius and
138
+ # JRuby.
139
+ DISABLED_CALLBACKS.each do |method|
140
+ eval <<-EOF, nil, __FILE__, __LINE__ + 1
141
+ def #{method}(*args)
142
+ return
143
+ end
144
+ EOF
145
+ end
146
+
147
+ BLOCK_CALLBACKS.each do |method|
148
+ eval <<-EOF, nil, __FILE__, __LINE__ + 1
149
+ def #{method}(*args)
150
+ @node = super
151
+ @block.call(@node)
152
+ return
153
+ end
154
+ EOF
155
+ end
156
+
157
+ ##
158
+ # @see Oga::XML::Parser#on_element
159
+ #
160
+ def on_element(*args)
161
+ @node = super
162
+
163
+ nesting << @node.name
164
+
165
+ @block.call(@node)
166
+
167
+ return
168
+ end
169
+
170
+ ##
171
+ # @see Oga::XML::Parser#on_element_children
172
+ #
173
+ def after_element(*args)
174
+ nesting.pop
175
+
176
+ return
177
+ end
178
+ end # PullParser
179
+ end # XML
180
+ end # Oga
@@ -0,0 +1,32 @@
1
+ module Oga
2
+ module XML
3
+ ##
4
+ # The Querying module provides methods that make it easy to run XPath/CSS
5
+ # queries on XML documents/elements.
6
+ #
7
+ module Querying
8
+ ##
9
+ # Evaluates the given XPath expression.
10
+ #
11
+ # @param [String] expression The XPath expression to run.
12
+ # @param [Hash] variables Variables to bind.
13
+ # @see [Oga::XPath::Evaluator#initialize]
14
+ #
15
+ def xpath(expression, variables = {})
16
+ return XPath::Evaluator.new(self, variables).evaluate(expression)
17
+ end
18
+
19
+ ##
20
+ # Evaluates the given XPath expression and returns the first node in the
21
+ # set.
22
+ #
23
+ # @see [#xpath]
24
+ #
25
+ def at_xpath(*args)
26
+ result = xpath(*args)
27
+
28
+ return result.is_a?(XML::NodeSet) ? result.first : result
29
+ end
30
+ end # Querying
31
+ end # XML
32
+ end # Oga
@@ -0,0 +1,11 @@
1
+ module Oga
2
+ module XML
3
+ ##
4
+ # Class containing information about a single text node. Text nodes don't
5
+ # have any children, attributes and the likes; just text.
6
+ #
7
+ class Text < CharacterNode
8
+
9
+ end # Text
10
+ end # XML
11
+ end # Oga
@@ -0,0 +1,48 @@
1
+ module Oga
2
+ module XML
3
+ ##
4
+ # Module that provides methods to traverse DOM trees.
5
+ #
6
+ module Traversal
7
+ ##
8
+ # Traverses through the node and yields every child node to the supplied
9
+ # block.
10
+ #
11
+ # The block's body can also determine whether or not to traverse child
12
+ # nodes. Preventing a node's children from being traversed can be done by
13
+ # using `throw :skip_children`
14
+ #
15
+ # This method uses a combination of breadth-first and depth-first
16
+ # traversal to traverse the entire XML tree in document order. See
17
+ # http://en.wikipedia.org/wiki/Breadth-first_search for more information.
18
+ #
19
+ # @example
20
+ # document.each_node do |node|
21
+ # p node.class
22
+ # end
23
+ #
24
+ # @example Skipping the children of a certain node
25
+ # document.each_node do |node|
26
+ # if node.is_a?(Oga::XML::Element) and node.name == 'book'
27
+ # throw :skip_children
28
+ # end
29
+ # end
30
+ #
31
+ # @yieldparam [Oga::XML::Node] The current node.
32
+ #
33
+ def each_node
34
+ visit = children.to_a.dup # copy it since we're modifying the array
35
+
36
+ until visit.empty?
37
+ current = visit.shift
38
+
39
+ catch :skip_children do
40
+ yield current
41
+
42
+ visit = current.children.to_a + visit
43
+ end
44
+ end
45
+ end
46
+ end # Traversal
47
+ end # XML
48
+ end # Oga
@@ -0,0 +1,69 @@
1
+ module Oga
2
+ module XML
3
+ ##
4
+ # Class containing information about an XML declaration tag.
5
+ #
6
+ # @!attribute [rw] version
7
+ # The XML version.
8
+ # @return [String]
9
+ #
10
+ # @!attribute [rw] encoding
11
+ # The XML document's encoding.
12
+ # @return [String]
13
+ #
14
+ # @!attribute [rw] standalone
15
+ # Whether or not the document is a standalone document.
16
+ # @return [String]
17
+ #
18
+ class XmlDeclaration
19
+ attr_accessor :version, :encoding, :standalone
20
+
21
+ ##
22
+ # @param [Hash] options
23
+ #
24
+ # @option options [String] :version
25
+ # @option options [String] :encoding
26
+ # @option options [String] :standalone
27
+ #
28
+ def initialize(options = {})
29
+ @version = options[:version] || '1.0'
30
+ @encoding = options[:encoding] || 'UTF-8'
31
+ @standalone = options[:standalone]
32
+ end
33
+
34
+ ##
35
+ # Converts the declaration tag to XML.
36
+ #
37
+ # @return [String]
38
+ #
39
+ def to_xml
40
+ pairs = []
41
+
42
+ [:version, :encoding, :standalone].each do |getter|
43
+ value = send(getter)
44
+
45
+ pairs << %Q{#{getter}="#{value}"} if value
46
+ end
47
+
48
+ return "<?xml #{pairs.join(' ')} ?>"
49
+ end
50
+
51
+ ##
52
+ # @return [String]
53
+ #
54
+ def inspect
55
+ segments = []
56
+
57
+ [:version, :encoding, :standalone].each do |attr|
58
+ value = send(attr)
59
+
60
+ if value and !value.empty?
61
+ segments << "#{attr}: #{value.inspect}"
62
+ end
63
+ end
64
+
65
+ return "XmlDeclaration(#{segments.join(' ')})"
66
+ end
67
+ end # XmlDeclaration
68
+ end # XML
69
+ end # Oga
@@ -0,0 +1,1748 @@
1
+ module Oga
2
+ module XPath
3
+ ##
4
+ # The Evaluator class evaluates XPath expressions, either as a String or an
5
+ # AST of {Oga::XPath::Node} instances.
6
+ #
7
+ # ## Thread Safety
8
+ #
9
+ # This class is not thread-safe, you can not share the same instance between
10
+ # multiple threads. This is due to the use of an internal stack (see below
11
+ # for more information). It is however perfectly fine to use multiple
12
+ # separated instances as this class does not use a thread global state.
13
+ #
14
+ # ## Node Set Stack
15
+ #
16
+ # This class uses an internal stack of XML node sets. This stack is used for
17
+ # functions that require access to the set of nodes a predicate belongs to.
18
+ # An example of such a function is `position()`.
19
+ #
20
+ # An alternative would be to pass the node sets a predicate belongs to as an
21
+ # extra argument to the various `on_*` methods. The problematic part of
22
+ # this approach is that it requires every method to take and pass along the
23
+ # argument. It's far too easy to make mistakes in such a setup and as such
24
+ # I've chosen to use an internal stack instead.
25
+ #
26
+ # See {#with_node_set} and {#current_node_set} for more information.
27
+ #
28
+ # ## Set Indices
29
+ #
30
+ # XPath node sets start at index 1 instead of index 0. In other words, if
31
+ # you want to access the first node in a set you have to use index 1, not 0.
32
+ # Certain methods such as {#on_call_last} and {#on_call_position} take care
33
+ # of converting indices from Ruby to XPath.
34
+ #
35
+ # ## Number Types
36
+ #
37
+ # The XPath specification states that all numbers produced by an expression
38
+ # should be returned as double-precision 64bit IEEE 754 floating point
39
+ # numbers. For example, the return value of `position()` should be a float
40
+ # (e.g. "1.0", not "1").
41
+ #
42
+ # Oga takes care internally of converting numbers to integers and/or floats
43
+ # where needed. The output types however will always be floats.
44
+ #
45
+ # For more information on the specification, see
46
+ # <http://www.w3.org/TR/xpath/#numbers>.
47
+ #
48
+ # ## Variables
49
+ #
50
+ # The evaluator supports the binding of custom variables in the
51
+ # {#initialize} method. Variables can be bound by passing in a Hash with the
52
+ # keys set to the variable names (minus the `$` sign) and their values to
53
+ # the variable values. The keys of the variables Hash *must* be Strings.
54
+ #
55
+ # A basic example:
56
+ #
57
+ # evaluator = Evaluator.new(document, 'number' => 10)
58
+ #
59
+ # evaluator.evaluate('$number') # => 10
60
+ #
61
+ class Evaluator
62
+ ##
63
+ # @param [Oga::XML::Document|Oga::XML::Node] document
64
+ # @param [Hash] variables Hash containing variables to expose to the XPath
65
+ # expressions.
66
+ #
67
+ def initialize(document, variables = {})
68
+ @document = document
69
+ @variables = variables
70
+ @node_sets = []
71
+ end
72
+
73
+ ##
74
+ # Evaluates an XPath expression as a String.
75
+ #
76
+ # @example
77
+ # evaluator = Oga::XPath::Evaluator.new(document)
78
+ #
79
+ # evaluator.evaluate('//a')
80
+ #
81
+ # @param [String] string An XPath expression as a String.
82
+ # @return [Mixed]
83
+ #
84
+ def evaluate(string)
85
+ ast = Parser.new(string).parse
86
+
87
+ return evaluate_ast(ast)
88
+ end
89
+
90
+ ##
91
+ # Evaluates a pre-parsed XPath expression.
92
+ #
93
+ # @param [Oga::XPath::Node] ast
94
+ # @return [Mixed]
95
+ #
96
+ def evaluate_ast(ast)
97
+ context = XML::NodeSet.new([@document])
98
+
99
+ return process(ast, context)
100
+ end
101
+
102
+ ##
103
+ # Processes an XPath node by dispatching it and the given context to a
104
+ # dedicated handler method. Handler methods are called "on_X" where "X" is
105
+ # the node type.
106
+ #
107
+ # @param [Oga::XPath::Node] ast_node The XPath AST node to process.
108
+ #
109
+ # @param [Oga::XML::NodeSet] context The context (a set of nodes) to
110
+ # evaluate an expression in.
111
+ #
112
+ # @return [Oga::XML::NodeSet]
113
+ #
114
+ def process(ast_node, context)
115
+ handler = "on_#{ast_node.type}"
116
+
117
+ return send(handler, ast_node, context)
118
+ end
119
+
120
+ ##
121
+ # Processes an absolute XPath expression such as `/foo`.
122
+ #
123
+ # @param [Oga::XPath::Node] ast_node
124
+ # @param [Oga::XML::NodeSet] context
125
+ # @return [Oga::XML::NodeSet]
126
+ #
127
+ def on_absolute_path(ast_node, context)
128
+ if @document.respond_to?(:root_node)
129
+ context = XML::NodeSet.new([@document.root_node])
130
+ else
131
+ context = XML::NodeSet.new([@document])
132
+ end
133
+
134
+ return on_path(ast_node, context)
135
+ end
136
+
137
+ ##
138
+ # Processes a relative XPath expression such as `foo`.
139
+ #
140
+ # Paths are evaluated using a "short-circuit" mechanism similar to Ruby's
141
+ # `&&` / `and` operator. Whenever a path results in an empty node set the
142
+ # evaluation is aborted immediately.
143
+ #
144
+ # @param [Oga::XPath::Node] ast_node
145
+ # @param [Oga::XML::NodeSet] context
146
+ # @return [Oga::XML::NodeSet]
147
+ #
148
+ def on_path(ast_node, context)
149
+ nodes = XML::NodeSet.new
150
+
151
+ ast_node.children.each do |test|
152
+ nodes = process(test, context)
153
+
154
+ if nodes.empty?
155
+ break
156
+ else
157
+ context = nodes
158
+ end
159
+ end
160
+
161
+ return nodes
162
+ end
163
+
164
+ ##
165
+ # Processes a node test and optionally a predicate.
166
+ #
167
+ # @param [Oga::XPath::Node] ast_node
168
+ # @param [Oga::XML::NodeSet] context
169
+ # @return [Oga::XML::NodeSet]
170
+ #
171
+ def on_test(ast_node, context)
172
+ nodes = XML::NodeSet.new
173
+ predicate = ast_node.children[2]
174
+ xpath_index = 1
175
+
176
+ context.each do |xml_node|
177
+ next unless node_matches?(xml_node, ast_node)
178
+
179
+ if predicate
180
+ retval = with_node_set(context) do
181
+ process(predicate, XML::NodeSet.new([xml_node]))
182
+ end
183
+
184
+ # Numeric values are used as node set indexes.
185
+ if retval.is_a?(Numeric)
186
+ nodes << xml_node if retval.to_i == xpath_index
187
+
188
+ # Node sets, strings, booleans, etc
189
+ elsif retval
190
+ if retval.respond_to?(:empty?) and retval.empty?
191
+ next
192
+ end
193
+
194
+ nodes << xml_node
195
+ end
196
+ else
197
+ nodes << xml_node
198
+ end
199
+
200
+ xpath_index += 1
201
+ end
202
+
203
+ return nodes
204
+ end
205
+
206
+ ##
207
+ # Dispatches the processing of axes to dedicated methods. This works
208
+ # similar to {#process} except the handler names are "on_axis_X" with "X"
209
+ # being the axis name.
210
+ #
211
+ # @param [Oga::XPath::Node] ast_node
212
+ # @param [Oga::XML::NodeSet] context
213
+ # @return [Oga::XML::NodeSet]
214
+ #
215
+ def on_axis(ast_node, context)
216
+ name, test = *ast_node
217
+
218
+ handler = name.gsub('-', '_')
219
+
220
+ return send("on_axis_#{handler}", test, context)
221
+ end
222
+
223
+ ##
224
+ # Processes the `ancestor` axis. This axis walks through the entire
225
+ # ancestor chain until a matching node is found.
226
+ #
227
+ # Evaluation happens using a "short-circuit" mechanism. The moment a
228
+ # matching node is found it is returned immediately.
229
+ #
230
+ # @param [Oga::XPath::Node] ast_node
231
+ # @param [Oga::XML::NodeSet] context
232
+ # @return [Oga::XML::NodeSet]
233
+ #
234
+ def on_axis_ancestor(ast_node, context)
235
+ nodes = XML::NodeSet.new
236
+
237
+ context.each do |xml_node|
238
+ while has_parent?(xml_node)
239
+ xml_node = xml_node.parent
240
+
241
+ if node_matches?(xml_node, ast_node)
242
+ nodes << xml_node
243
+ break
244
+ end
245
+ end
246
+ end
247
+
248
+ return nodes
249
+ end
250
+
251
+ ##
252
+ # Processes the `ancestor-or-self` axis.
253
+ #
254
+ # @see [#on_axis_ancestor]
255
+ #
256
+ def on_axis_ancestor_or_self(ast_node, context)
257
+ nodes = XML::NodeSet.new
258
+
259
+ context.each do |xml_node|
260
+ while has_parent?(xml_node)
261
+ if node_matches?(xml_node, ast_node)
262
+ nodes << xml_node
263
+ break
264
+ end
265
+
266
+ xml_node = xml_node.parent
267
+ end
268
+ end
269
+
270
+ return nodes
271
+ end
272
+
273
+ ##
274
+ # Processes the `attribute` axis. The node test is performed against all
275
+ # the attributes of the nodes in the current context.
276
+ #
277
+ # Evaluation of the nodes continues until the node set has been exhausted
278
+ # (unlike some other methods which return the moment they find a matching
279
+ # node).
280
+ #
281
+ # @param [Oga::XPath::Node] ast_node
282
+ # @param [Oga::XML::NodeSet] context
283
+ # @return [Oga::XML::NodeSet]
284
+ #
285
+ def on_axis_attribute(ast_node, context)
286
+ nodes = XML::NodeSet.new
287
+
288
+ context.each do |xml_node|
289
+ next unless xml_node.is_a?(XML::Element)
290
+
291
+ nodes += on_test(ast_node, xml_node.attributes)
292
+ end
293
+
294
+ return nodes
295
+ end
296
+
297
+ ##
298
+ # Evaluates the `child` axis. This axis simply takes all the child nodes
299
+ # of the current context nodes.
300
+ #
301
+ # @param [Oga::XPath::Node] ast_node
302
+ # @param [Oga::XML::NodeSet] context
303
+ # @return [Oga::XML::NodeSet]
304
+ #
305
+ def on_axis_child(ast_node, context)
306
+ return process(ast_node, child_nodes(context))
307
+ end
308
+
309
+ ##
310
+ # Evaluates the `descendant` axis. This method processes child nodes until
311
+ # the very end of the tree, no "short-circuiting" mechanism is used.
312
+ #
313
+ # @param [Oga::XPath::Node] ast_node
314
+ # @param [Oga::XML::NodeSet] context
315
+ # @return [Oga::XML::NodeSet]
316
+ #
317
+ def on_axis_descendant(ast_node, context)
318
+ nodes = XML::NodeSet.new
319
+
320
+ context.each do |context_node|
321
+ context_node.children.each do |node|
322
+ nodes << node if node_matches?(node, ast_node)
323
+ end
324
+
325
+ context_node.each_node do |node|
326
+ nodes << node if node_matches?(node, ast_node)
327
+ end
328
+ end
329
+
330
+ return nodes
331
+ end
332
+
333
+ ##
334
+ # Evaluates the `descendant-or-self` axis.
335
+ #
336
+ # @param [Oga::XPath::Node] ast_node
337
+ # @param [Oga::XML::NodeSet] context
338
+ # @return [Oga::XML::NodeSet]
339
+ #
340
+ def on_axis_descendant_or_self(ast_node, context)
341
+ nodes = on_test(ast_node, context)
342
+
343
+ nodes.concat(on_axis_descendant(ast_node, context))
344
+
345
+ return nodes
346
+ end
347
+
348
+ ##
349
+ # Evaluates the `following` axis.
350
+ #
351
+ # @param [Oga::XPath::Node] ast_node
352
+ # @param [Oga::XML::NodeSet] context
353
+ # @return [Oga::XML::NodeSet]
354
+ #
355
+ def on_axis_following(ast_node, context)
356
+ nodes = XML::NodeSet.new
357
+
358
+ context.each do |context_node|
359
+ check = false
360
+
361
+ @document.each_node do |doc_node|
362
+ # Skip child nodes of the current context node, compare all
363
+ # following nodes.
364
+ if doc_node == context_node
365
+ check = true
366
+ throw :skip_children
367
+ end
368
+
369
+ next unless check
370
+
371
+ nodes << doc_node if node_matches?(doc_node, ast_node)
372
+ end
373
+ end
374
+
375
+ return nodes
376
+ end
377
+
378
+ ##
379
+ # Evaluates the `following-sibling` axis.
380
+ #
381
+ # @param [Oga::XPath::Node] ast_node
382
+ # @param [Oga::XML::NodeSet] context
383
+ # @return [Oga::XML::NodeSet]
384
+ #
385
+ def on_axis_following_sibling(ast_node, context)
386
+ nodes = XML::NodeSet.new
387
+
388
+ context.each do |context_node|
389
+ check = false
390
+ parent = has_parent?(context_node) ? context_node.parent : nil
391
+
392
+ @document.each_node do |doc_node|
393
+ # Skip child nodes of the current context node, compare all
394
+ # following nodes.
395
+ if doc_node == context_node
396
+ check = true
397
+ throw :skip_children
398
+ end
399
+
400
+ if !check or parent != doc_node.parent
401
+ next
402
+ end
403
+
404
+ if node_matches?(doc_node, ast_node)
405
+ nodes << doc_node
406
+
407
+ throw :skip_children
408
+ end
409
+ end
410
+ end
411
+
412
+ return nodes
413
+ end
414
+
415
+ ##
416
+ # Evaluates the `parent` axis.
417
+ #
418
+ # @param [Oga::XPath::Node] ast_node
419
+ # @param [Oga::XML::NodeSet] context
420
+ # @return [Oga::XML::NodeSet]
421
+ #
422
+ def on_axis_parent(ast_node, context)
423
+ nodes = XML::NodeSet.new
424
+
425
+ context.each do |context_node|
426
+ next unless has_parent?(context_node)
427
+
428
+ parent = context_node.parent
429
+
430
+ nodes << parent if node_matches?(parent, ast_node)
431
+ end
432
+
433
+ return nodes
434
+ end
435
+
436
+ ##
437
+ # Evaluates the `preceding` axis.
438
+ #
439
+ # @param [Oga::XPath::Node] ast_node
440
+ # @param [Oga::XML::NodeSet] context
441
+ # @return [Oga::XML::NodeSet]
442
+ #
443
+ def on_axis_preceding(ast_node, context)
444
+ nodes = XML::NodeSet.new
445
+
446
+ context.each do |context_node|
447
+ check = true
448
+
449
+ @document.each_node do |doc_node|
450
+ # Test everything *until* we hit the current context node.
451
+ if doc_node == context_node
452
+ break
453
+ elsif node_matches?(doc_node, ast_node)
454
+ nodes << doc_node
455
+ end
456
+ end
457
+ end
458
+
459
+ return nodes
460
+ end
461
+
462
+ ##
463
+ # Evaluates the `preceding-sibling` axis.
464
+ #
465
+ # @param [Oga::XPath::Node] ast_node
466
+ # @param [Oga::XML::NodeSet] context
467
+ # @return [Oga::XML::NodeSet]
468
+ #
469
+ def on_axis_preceding_sibling(ast_node, context)
470
+ nodes = XML::NodeSet.new
471
+
472
+ context.each do |context_node|
473
+ check = true
474
+ parent = has_parent?(context_node) ? context_node.parent : nil
475
+
476
+ @document.each_node do |doc_node|
477
+ # Test everything *until* we hit the current context node.
478
+ if doc_node == context_node
479
+ break
480
+ elsif doc_node.parent == parent and node_matches?(doc_node, ast_node)
481
+ nodes << doc_node
482
+ end
483
+ end
484
+ end
485
+
486
+ return nodes
487
+ end
488
+
489
+ ##
490
+ # Evaluates the `self` axis.
491
+ #
492
+ # @param [Oga::XPath::Node] ast_node
493
+ # @param [Oga::XML::NodeSet] context
494
+ # @return [Oga::XML::NodeSet]
495
+ #
496
+ def on_axis_self(ast_node, context)
497
+ nodes = XML::NodeSet.new
498
+
499
+ context.each do |context_node|
500
+ nodes << context_node if node_matches?(context_node, ast_node)
501
+ end
502
+
503
+ return nodes
504
+ end
505
+
506
+ ##
507
+ # Evaluates the `namespace` axis.
508
+ #
509
+ # @param [Oga::XPath::Node] ast_node
510
+ # @param [Oga::XML::NodeSet] context
511
+ # @return [Oga::XML::NodeSet]
512
+ #
513
+ def on_axis_namespace(ast_node, context)
514
+ nodes = XML::NodeSet.new
515
+ name = ast_node.children[1]
516
+
517
+ context.each do |context_node|
518
+ next unless context_node.respond_to?(:available_namespaces)
519
+
520
+ context_node.available_namespaces.each do |_, namespace|
521
+ if namespace.name == name or name == '*'
522
+ nodes << namespace
523
+ end
524
+ end
525
+ end
526
+
527
+ return nodes
528
+ end
529
+
530
+ ##
531
+ # Dispatches node type matching to dedicated handlers.
532
+ #
533
+ # @param [Oga::XPath::Node] ast_node
534
+ # @param [Oga::XML::NodeSet] context
535
+ # @return [Oga::XML::NodeSet]
536
+ #
537
+ def on_type_test(ast_node, context)
538
+ name, test = *ast_node
539
+
540
+ handler = name.gsub('-', '_')
541
+
542
+ return send("on_type_test_#{handler}", test, context)
543
+ end
544
+
545
+ ##
546
+ # Processes the `node` type matcher. This matcher matches all node types.
547
+ #
548
+ # @param [Oga::XPath::Node] ast_node
549
+ # @param [Oga::XML::NodeSet] context
550
+ # @return [Oga::XML::NodeSet]
551
+ #
552
+ def on_type_test_node(ast_node, context)
553
+ nodes = XML::NodeSet.new
554
+
555
+ context.each do |node|
556
+ nodes << node if node.is_a?(XML::Node)
557
+ end
558
+
559
+ return nodes
560
+ end
561
+
562
+ ##
563
+ # Processes the `text()` type test. This matches only text nodes.
564
+ #
565
+ # @param [Oga::XPath::Node] ast_node
566
+ # @param [Oga::XML::NodeSet] context
567
+ # @return [Oga::XML::NodeSet]
568
+ #
569
+ def on_type_test_text(ast_node, context)
570
+ nodes = XML::NodeSet.new
571
+
572
+ context.each do |node|
573
+ nodes << node if node.is_a?(XML::Text)
574
+ end
575
+
576
+ return nodes
577
+ end
578
+
579
+ ##
580
+ # Processes the `comment()` type test. This matches only comment nodes.
581
+ #
582
+ # @param [Oga::XPath::Node] ast_node
583
+ # @param [Oga::XML::NodeSet] context
584
+ # @return [Oga::XML::NodeSet]
585
+ #
586
+ def on_type_test_comment(ast_node, context)
587
+ nodes = XML::NodeSet.new
588
+
589
+ context.each do |node|
590
+ nodes << node if node.is_a?(XML::Comment)
591
+ end
592
+
593
+ return nodes
594
+ end
595
+
596
+ ##
597
+ # Processes the `processing-instruction()` type test. This matches only
598
+ # processing-instruction nodes.
599
+ #
600
+ # @param [Oga::XPath::Node] ast_node
601
+ # @param [Oga::XML::NodeSet] context
602
+ # @return [Oga::XML::NodeSet]
603
+ #
604
+ def on_type_test_processing_instruction(ast_node, context)
605
+ nodes = XML::NodeSet.new
606
+
607
+ context.each do |node|
608
+ nodes << node if node.is_a?(XML::ProcessingInstruction)
609
+ end
610
+
611
+ return nodes
612
+ end
613
+
614
+ ##
615
+ # Processes the pipe (`|`) operator. This operator creates a union of two
616
+ # sets.
617
+ #
618
+ # @param [Oga::XPath::Node] ast_node
619
+ # @param [Oga::XML::NodeSet] context
620
+ # @return [Oga::XML::NodeSet]
621
+ #
622
+ def on_pipe(ast_node, context)
623
+ left, right = *ast_node
624
+
625
+ return process(left, context) + process(right, context)
626
+ end
627
+
628
+ ##
629
+ # Processes the `and` operator.
630
+ #
631
+ # This operator returns true if both the left and right expression
632
+ # evaluate to `true`. If the first expression evaluates to `false` the
633
+ # right expression is ignored.
634
+ #
635
+ # @param [Oga::XPath::Node] ast_node
636
+ # @param [Oga::XML::NodeSet] context
637
+ # @return [TrueClass|FalseClass]
638
+ #
639
+ def on_and(ast_node, context)
640
+ left, right = *ast_node
641
+
642
+ return on_call_boolean(context, left) && on_call_boolean(context, right)
643
+ end
644
+
645
+ ##
646
+ # Processes the `or` operator.
647
+ #
648
+ # This operator returns `true` if one of the expressions evaluates to
649
+ # true, otherwise false is returned. If the first expression evaluates to
650
+ # `true` the second expression is ignored.
651
+ #
652
+ # @param [Oga::XPath::Node] ast_node
653
+ # @param [Oga::XML::NodeSet] context
654
+ # @return [TrueClass|FalseClass]
655
+ #
656
+ def on_or(ast_node, context)
657
+ left, right = *ast_node
658
+
659
+ return on_call_boolean(context, left) || on_call_boolean(context, right)
660
+ end
661
+
662
+ ##
663
+ # Processes the `+` operator.
664
+ #
665
+ # This operator converts the left and right expressions to numbers and
666
+ # adds them together.
667
+ #
668
+ # @param [Oga::XPath::Node] ast_node
669
+ # @param [Oga::XML::NodeSet] context
670
+ # @return [Float]
671
+ #
672
+ def on_add(ast_node, context)
673
+ left, right = *ast_node
674
+
675
+ return on_call_number(context, left) + on_call_number(context, right)
676
+ end
677
+
678
+ ##
679
+ # Processes the `div` operator.
680
+ #
681
+ # This operator converts the left and right expressions to numbers and
682
+ # divides the left number with the right number.
683
+ #
684
+ # @param [Oga::XPath::Node] ast_node
685
+ # @param [Oga::XML::NodeSet] context
686
+ # @return [Float]
687
+ #
688
+ def on_div(ast_node, context)
689
+ left, right = *ast_node
690
+
691
+ return on_call_number(context, left) / on_call_number(context, right)
692
+ end
693
+
694
+ ##
695
+ # Processes the `mod` operator.
696
+ #
697
+ # This operator converts the left and right expressions to numbers and
698
+ # returns the modulo of the two numbers.
699
+ #
700
+ # @param [Oga::XPath::Node] ast_node
701
+ # @param [Oga::XML::NodeSet] context
702
+ # @return [Float]
703
+ #
704
+ def on_mod(ast_node, context)
705
+ left, right = *ast_node
706
+
707
+ return on_call_number(context, left) % on_call_number(context, right)
708
+ end
709
+
710
+ ##
711
+ # Processes the `*` operator.
712
+ #
713
+ # This operator converts the left and right expressions to numbers and
714
+ # multiplies the left number with the right number.
715
+ #
716
+ # @param [Oga::XPath::Node] ast_node
717
+ # @param [Oga::XML::NodeSet] context
718
+ # @return [Float]
719
+ #
720
+ def on_mul(ast_node, context)
721
+ left, right = *ast_node
722
+
723
+ return on_call_number(context, left) * on_call_number(context, right)
724
+ end
725
+
726
+ ##
727
+ # Processes the `-` operator.
728
+ #
729
+ # This operator converts the left and right expressions to numbers and
730
+ # subtracts the right number of the left number.
731
+ #
732
+ # @param [Oga::XPath::Node] ast_node
733
+ # @param [Oga::XML::NodeSet] context
734
+ # @return [Float]
735
+ #
736
+ def on_sub(ast_node, context)
737
+ left, right = *ast_node
738
+
739
+ return on_call_number(context, left) - on_call_number(context, right)
740
+ end
741
+
742
+ ##
743
+ # Processes the `=` operator.
744
+ #
745
+ # This operator evaluates the expression on the left and right and returns
746
+ # `true` if they are equal. This operator can be used to compare strings,
747
+ # numbers and node sets. When using node sets the text of the set is
748
+ # compared instead of the nodes themselves. That is, nodes with different
749
+ # names but the same text are considered to be equal.
750
+ #
751
+ # @param [Oga::XPath::Node] ast_node
752
+ # @param [Oga::XML::NodeSet] context
753
+ # @return [TrueClass|FalseClass]
754
+ #
755
+ def on_eq(ast_node, context)
756
+ left = process(ast_node.children[0], context)
757
+ right = process(ast_node.children[1], context)
758
+
759
+ if left.is_a?(XML::NodeSet)
760
+ left = first_node_text(left)
761
+ end
762
+
763
+ if right.is_a?(XML::NodeSet)
764
+ right = first_node_text(right)
765
+ end
766
+
767
+ if left.is_a?(Numeric) and !right.is_a?(Numeric)
768
+ right = to_float(right)
769
+ end
770
+
771
+ if left.is_a?(String) and !right.is_a?(String)
772
+ right = to_string(right)
773
+ end
774
+
775
+ return left == right
776
+ end
777
+
778
+ ##
779
+ # Processes the `!=` operator.
780
+ #
781
+ # This operator does the exact opposite of the `=` operator. See {#on_eq}
782
+ # for more information.
783
+ #
784
+ # @see [#on_eq]
785
+ #
786
+ def on_neq(ast_node, context)
787
+ return !on_eq(ast_node, context)
788
+ end
789
+
790
+ ##
791
+ # Processes the `<` operator.
792
+ #
793
+ # This operator converts the left and right expression to a number and
794
+ # returns `true` if the first number is lower than the second number.
795
+ #
796
+ # @param [Oga::XML::Node] ast_node
797
+ # @param [Oga::XML::NodeSet] context
798
+ # @return [TrueClass|FalseClass]
799
+ #
800
+ def on_lt(ast_node, context)
801
+ left, right = *ast_node
802
+
803
+ return on_call_number(context, left) < on_call_number(context, right)
804
+ end
805
+
806
+ ##
807
+ # Processes the `>` operator.
808
+ #
809
+ # This operator converts the left and right expression to a number and
810
+ # returns `true` if the first number is greater than the second number.
811
+ #
812
+ # @param [Oga::XML::Node] ast_node
813
+ # @param [Oga::XML::NodeSet] context
814
+ # @return [TrueClass|FalseClass]
815
+ #
816
+ def on_gt(ast_node, context)
817
+ left, right = *ast_node
818
+
819
+ return on_call_number(context, left) > on_call_number(context, right)
820
+ end
821
+
822
+ ##
823
+ # Processes the `<=` operator.
824
+ #
825
+ # This operator converts the left and right expression to a number and
826
+ # returns `true` if the first number is lower-than or equal to the second
827
+ # number.
828
+ #
829
+ # @param [Oga::XML::Node] ast_node
830
+ # @param [Oga::XML::NodeSet] context
831
+ # @return [TrueClass|FalseClass]
832
+ #
833
+ def on_lte(ast_node, context)
834
+ left, right = *ast_node
835
+
836
+ return on_call_number(context, left) <= on_call_number(context, right)
837
+ end
838
+
839
+ ##
840
+ # Processes the `>=` operator.
841
+ #
842
+ # This operator converts the left and right expression to a number and
843
+ # returns `true` if the first number is greater-than or equal to the
844
+ # second number.
845
+ #
846
+ # @param [Oga::XML::Node] ast_node
847
+ # @param [Oga::XML::NodeSet] context
848
+ # @return [TrueClass|FalseClass]
849
+ #
850
+ def on_gte(ast_node, context)
851
+ left, right = *ast_node
852
+
853
+ return on_call_number(context, left) >= on_call_number(context, right)
854
+ end
855
+
856
+ ##
857
+ # Delegates function calls to specific handlers.
858
+ #
859
+ # Handler functions take two arguments:
860
+ #
861
+ # 1. The context node set
862
+ # 2. A variable list of XPath function arguments, passed as individual
863
+ # Ruby method arguments.
864
+ #
865
+ # @param [Oga::XPath::Node] ast_node
866
+ # @param [Oga::XML::NodeSet] context
867
+ # @return [Oga::XML::NodeSet]
868
+ #
869
+ def on_call(ast_node, context)
870
+ name, *args = *ast_node
871
+
872
+ handler = name.gsub('-', '_')
873
+
874
+ return send("on_call_#{handler}", context, *args)
875
+ end
876
+
877
+ ##
878
+ # Processes the `last()` function call. This function call returns the
879
+ # index of the last node in the current set.
880
+ #
881
+ # @param [Oga::XML::NodeSet] context
882
+ # @return [Float]
883
+ #
884
+ def on_call_last(context)
885
+ # XPath uses indexes 1 to N instead of 0 to N.
886
+ return current_node_set.length.to_f
887
+ end
888
+
889
+ ##
890
+ # Processes the `position()` function call. This function returns the
891
+ # position of the current node in the current node set.
892
+ #
893
+ # @param [Oga::XML::NodeSet] context
894
+ # @return [Float]
895
+ #
896
+ def on_call_position(context)
897
+ index = current_node_set.index(context.first) + 1
898
+
899
+ return index.to_f
900
+ end
901
+
902
+ ##
903
+ # Processes the `count()` function call. This function counts the amount
904
+ # of nodes in `expression` and returns the result as a float.
905
+ #
906
+ # @param [Oga::XML::NodeSet] context
907
+ # @param [Oga::XPath::Node] expression
908
+ # @return [Float]
909
+ #
910
+ def on_call_count(context, expression)
911
+ retval = process(expression, context)
912
+
913
+ unless retval.is_a?(XML::NodeSet)
914
+ raise TypeError, 'count() can only operate on NodeSet instances'
915
+ end
916
+
917
+ return retval.length.to_f
918
+ end
919
+
920
+ ##
921
+ # Processes the `id()` function call.
922
+ #
923
+ # The XPath specification states that this function's behaviour should be
924
+ # controlled by a DTD. If a DTD were to specify that the ID attribute for
925
+ # a certain element would be "foo" then this function should use said
926
+ # attribute.
927
+ #
928
+ # Oga does not support DTD parsing/evaluation and as such always uses the
929
+ # "id" attribute.
930
+ #
931
+ # This function searches the entire document for a matching node,
932
+ # regardless of the current position.
933
+ #
934
+ # @param [Oga::XML::NodeSet] context
935
+ # @param [Oga::XPath::Node] expression
936
+ # @return [Oga::XML::NodeSet]
937
+ #
938
+ def on_call_id(context, expression)
939
+ id = process(expression, context)
940
+ nodes = XML::NodeSet.new
941
+
942
+ # Based on Nokogiri's/libxml behaviour it appears that when using a node
943
+ # set the text of the set is used as the ID.
944
+ id = id.is_a?(XML::NodeSet) ? id.text : id.to_s
945
+ ids = id.split(' ')
946
+
947
+ @document.each_node do |node|
948
+ next unless node.is_a?(XML::Element)
949
+
950
+ attr = node.attribute('id')
951
+
952
+ if attr and ids.include?(attr.value)
953
+ nodes << node
954
+ end
955
+ end
956
+
957
+ return nodes
958
+ end
959
+
960
+ ##
961
+ # Processes the `local-name()` function call.
962
+ #
963
+ # This function call returns the name of one of the following:
964
+ #
965
+ # * The current context node (if any)
966
+ # * The first node in the supplied node set
967
+ #
968
+ # @param [Oga::XML::NodeSet] context
969
+ # @param [Oga::XPath::Node] expression
970
+ # @return [Oga::XML::NodeSet]
971
+ #
972
+ def on_call_local_name(context, expression = nil)
973
+ node = function_node(context, expression)
974
+
975
+ return node.respond_to?(:name) ? node.name : ''
976
+ end
977
+
978
+ ##
979
+ # Processes the `name()` function call.
980
+ #
981
+ # This function call is similar to `local-name()` (see
982
+ # {#on_call_local_name}) except that it includes the namespace name if
983
+ # present.
984
+ #
985
+ # @param [Oga::XML::NodeSet] context
986
+ # @param [Oga::XPath::Node] expression
987
+ # @return [Oga::XML::NodeSet]
988
+ #
989
+ def on_call_name(context, expression = nil)
990
+ node = function_node(context, expression)
991
+
992
+ if node.respond_to?(:name) and node.respond_to?(:namespace)
993
+ if node.namespace
994
+ return "#{node.namespace.name}:#{node.name}"
995
+ else
996
+ return node.name
997
+ end
998
+ else
999
+ return ''
1000
+ end
1001
+ end
1002
+
1003
+ ##
1004
+ # Processes the `namespace-uri()` function call.
1005
+ #
1006
+ # This function call returns the namespace URI of one of the following:
1007
+ #
1008
+ # * The current context node (if any)
1009
+ # * The first node in the supplied node set
1010
+ #
1011
+ # @param [Oga::XML::NodeSet] context
1012
+ # @param [Oga::XPath::Node] expression
1013
+ # @return [Oga::XML::NodeSet]
1014
+ #
1015
+ def on_call_namespace_uri(context, expression = nil)
1016
+ node = function_node(context, expression)
1017
+
1018
+ if node.respond_to?(:namespace) and node.namespace
1019
+ return node.namespace.uri
1020
+ else
1021
+ return ''
1022
+ end
1023
+ end
1024
+
1025
+ ##
1026
+ # Evaluates the `string()` function call.
1027
+ #
1028
+ # This function call converts the given argument *or* the current context
1029
+ # node to a string. If a node set is given then only the first node is
1030
+ # converted to a string.
1031
+ #
1032
+ # @example
1033
+ # string(10) # => "10"
1034
+ #
1035
+ # @param [Oga::XML::NodeSet] context
1036
+ # @param [Oga::XPath::Node] expression
1037
+ # @return [String]
1038
+ #
1039
+ def on_call_string(context, expression = nil)
1040
+ if expression
1041
+ convert = process(expression, context)
1042
+
1043
+ if convert.is_a?(XML::NodeSet)
1044
+ convert = convert[0]
1045
+ end
1046
+ else
1047
+ convert = context.first
1048
+ end
1049
+
1050
+ if convert.respond_to?(:text)
1051
+ return convert.text
1052
+ else
1053
+ return to_string(convert)
1054
+ end
1055
+ end
1056
+
1057
+ ##
1058
+ # Evaluates the `number()` function call.
1059
+ #
1060
+ # This function call converts its first argument *or* the current context
1061
+ # node to a number, similar to the `string()` function.
1062
+ #
1063
+ # @example
1064
+ # number("10") # => 10.0
1065
+ #
1066
+ # @see [#on_call_string]
1067
+ # @param [Oga::XML::NodeSet] context
1068
+ # @param [Oga::XPath::Node] expression
1069
+ # @return [Float]
1070
+ #
1071
+ def on_call_number(context, expression = nil)
1072
+ convert = nil
1073
+
1074
+ if expression
1075
+ exp_retval = process(expression, context)
1076
+
1077
+ if exp_retval.is_a?(XML::NodeSet)
1078
+ convert = first_node_text(exp_retval)
1079
+
1080
+ elsif exp_retval == true
1081
+ convert = 1.0
1082
+
1083
+ elsif exp_retval == false
1084
+ convert = 0.0
1085
+
1086
+ elsif exp_retval
1087
+ convert = exp_retval
1088
+ end
1089
+ else
1090
+ convert = context.first.text
1091
+ end
1092
+
1093
+ return to_float(convert)
1094
+ end
1095
+
1096
+ ##
1097
+ # Processes the `concat()` function call.
1098
+ #
1099
+ # This function call converts its arguments to strings and concatenates
1100
+ # them. In case of node sets the text of the set is used.
1101
+ #
1102
+ # @param [Oga::XML::NodeSet] context
1103
+ # @param [Oga::XPath::Node] first
1104
+ # @param [Oga::XPath::Node] second
1105
+ # @param [Array<Oga::XPath::Node>] rest
1106
+ #
1107
+ def on_call_concat(context, first, second, *rest)
1108
+ args = [first, second] + rest
1109
+ retval = ''
1110
+
1111
+ args.each do |arg|
1112
+ retval << on_call_string(context, arg)
1113
+ end
1114
+
1115
+ return retval
1116
+ end
1117
+
1118
+ ##
1119
+ # Processes the `starts-with()` function call.
1120
+ #
1121
+ # This function call returns `true` if the string in the 1st argument
1122
+ # starts with the string in the 2nd argument. Node sets can also be used.
1123
+ #
1124
+ # @example
1125
+ # starts-with("hello world", "hello") # => true
1126
+ #
1127
+ # @param [Oga::XML::NodeSet] context
1128
+ # @param [Oga::XPath::Node] haystack The string to search.
1129
+ # @param [Oga::XPath::Node] needle The string to search for.
1130
+ # @return [TrueClass|FalseClass]
1131
+ #
1132
+ def on_call_starts_with(context, haystack, needle)
1133
+ haystack_str = on_call_string(context, haystack)
1134
+ needle_str = on_call_string(context, needle)
1135
+
1136
+ # https://github.com/jruby/jruby/issues/1923
1137
+ return needle_str.empty? || haystack_str.start_with?(needle_str)
1138
+ end
1139
+
1140
+ ##
1141
+ # Processes the `contains()` function call.
1142
+ #
1143
+ # This function call returns `true` if the string in the 1st argument
1144
+ # contains the string in the 2nd argument. Node sets can also be used.
1145
+ #
1146
+ # @example
1147
+ # contains("hello world", "o w") # => true
1148
+ #
1149
+ # @param [Oga::XML::NodeSet] context
1150
+ # @param [Oga::XPath::Node] haystack The string to search.
1151
+ # @param [Oga::XPath::Node] needle The string to search for.
1152
+ # @return [String]
1153
+ #
1154
+ def on_call_contains(context, haystack, needle)
1155
+ haystack_str = on_call_string(context, haystack)
1156
+ needle_str = on_call_string(context, needle)
1157
+
1158
+ return haystack_str.include?(needle_str)
1159
+ end
1160
+
1161
+ ##
1162
+ # Processes the `substring-before()` function call.
1163
+ #
1164
+ # This function call returns the substring of the 1st argument that occurs
1165
+ # before the string given in the 2nd argument. For example:
1166
+ #
1167
+ # substring-before("2014-08-25", "-")
1168
+ #
1169
+ # This would return "2014" as it occurs before the first "-".
1170
+ #
1171
+ # @param [Oga::XML::NodeSet] context
1172
+ # @param [Oga::XPath::Node] haystack The string to search.
1173
+ # @param [Oga::XPath::Node] needle The string to search for.
1174
+ # @return [String]
1175
+ #
1176
+ def on_call_substring_before(context, haystack, needle)
1177
+ haystack_str = on_call_string(context, haystack)
1178
+ needle_str = on_call_string(context, needle)
1179
+
1180
+ before, sep, after = haystack_str.partition(needle_str)
1181
+
1182
+ return sep.empty? ? sep : before
1183
+ end
1184
+
1185
+ ##
1186
+ # Processes the `substring-after()` function call.
1187
+ #
1188
+ # This function call returns the substring of the 1st argument that occurs
1189
+ # after the string given in the 2nd argument. For example:
1190
+ #
1191
+ # substring-before("2014-08-25", "-")
1192
+ #
1193
+ # This would return "08-25" as it occurs after the first "-".
1194
+ #
1195
+ # @param [Oga::XML::NodeSet] context
1196
+ # @param [Oga::XPath::Node] haystack The string to search.
1197
+ # @param [Oga::XPath::Node] needle The string to search for.
1198
+ # @return [String]
1199
+ #
1200
+ def on_call_substring_after(context, haystack, needle)
1201
+ haystack_str = on_call_string(context, haystack)
1202
+ needle_str = on_call_string(context, needle)
1203
+
1204
+ before, sep, after = haystack_str.partition(needle_str)
1205
+
1206
+ return sep.empty? ? sep : after
1207
+ end
1208
+
1209
+ ##
1210
+ # Processes the `substring()` function call.
1211
+ #
1212
+ # This function call returns the substring of the 1st argument, starting
1213
+ # at the position given in the 2nd argument. If the third argument is
1214
+ # given it is used as the length for the substring, otherwise the string
1215
+ # is consumed until the end.
1216
+ #
1217
+ # XPath string indexes start from position 1, not position 0.
1218
+ #
1219
+ # @example Using a literal string
1220
+ # substring("foo", 2) # => "oo"
1221
+ #
1222
+ # @example Using a literal string with a custom length
1223
+ # substring("foo", 1, 2) # => "fo"
1224
+ #
1225
+ # @example Using a node set
1226
+ # substring(users/user/username, 5)
1227
+ #
1228
+ # @param [Oga::XML::NodeSet] context
1229
+ # @param [Oga::XPath::Node] haystack
1230
+ # @param [Oga::XPath::Node] start
1231
+ # @param [Oga::XPath::Node] length
1232
+ # @return [String]
1233
+ #
1234
+ def on_call_substring(context, haystack, start, length = nil)
1235
+ haystack_str = on_call_string(context, haystack)
1236
+ start_index = on_call_number(context, start).to_i - 1
1237
+
1238
+ if length
1239
+ length_int = on_call_number(context, length).to_i - 1
1240
+ stop_index = start_index + length_int
1241
+ else
1242
+ stop_index = -1
1243
+ end
1244
+
1245
+ return haystack_str[start_index..stop_index]
1246
+ end
1247
+
1248
+ ##
1249
+ # Processes the `string-length()` function.
1250
+ #
1251
+ # This function returns the length of the string given in the 1st argument
1252
+ # *or* the current context node. If the expression is not a string it's
1253
+ # converted to a string using the `string()` function.
1254
+ #
1255
+ # @see [#on_call_string]
1256
+ # @param [Oga::XML::NodeSet] context
1257
+ # @param [Oga::XPath::Node] expression
1258
+ # @return [Float]
1259
+ #
1260
+ def on_call_string_length(context, expression = nil)
1261
+ return on_call_string(context, expression).length.to_f
1262
+ end
1263
+
1264
+ ##
1265
+ # Processes the `normalize-space()` function call.
1266
+ #
1267
+ # This function strips the 1st argument string *or* the current context
1268
+ # node of leading/trailing whitespace as well as replacing multiple
1269
+ # whitespace sequences with single spaces.
1270
+ #
1271
+ # @example
1272
+ # normalize-space(" fo o ") # => "fo o"
1273
+ #
1274
+ # @param [Oga::XML::NodeSet] context
1275
+ # @param [Oga::XPath::Node] expression
1276
+ # @return [String]
1277
+ #
1278
+ def on_call_normalize_space(context, expression = nil)
1279
+ str = on_call_string(context, expression)
1280
+
1281
+ return str.strip.gsub(/\s+/, ' ')
1282
+ end
1283
+
1284
+ ##
1285
+ # Processes the `translate()` function call.
1286
+ #
1287
+ # This function takes the string of the 1st argument and replaces all
1288
+ # characters of the 2nd argument with those specified in the 3rd argument.
1289
+ #
1290
+ # @example
1291
+ # translate("bar", "abc", "ABC") # => "BAr"
1292
+ #
1293
+ # @param [Oga::XML::NodeSet] context
1294
+ # @param [Oga::XPath::Node] input
1295
+ # @param [Oga::XPath::Node] find
1296
+ # @param [Oga::XPath::Node] replace
1297
+ # @return [String]
1298
+ #
1299
+ def on_call_translate(context, input, find, replace)
1300
+ input_str = on_call_string(context, input)
1301
+ find_chars = on_call_string(context, find).chars.to_a
1302
+ replace_chars = on_call_string(context, replace).chars.to_a
1303
+ replaced = input_str
1304
+
1305
+ find_chars.each_with_index do |char, index|
1306
+ replace_with = replace_chars[index] ? replace_chars[index] : ''
1307
+ replaced = replaced.gsub(char, replace_with)
1308
+ end
1309
+
1310
+ return replaced
1311
+ end
1312
+
1313
+ ##
1314
+ # Processes the `boolean()` function call.
1315
+ #
1316
+ # This function converts the 1st argument to a boolean.
1317
+ #
1318
+ # The boolean `true` is returned for the following:
1319
+ #
1320
+ # * A non empty string
1321
+ # * A non empty node set
1322
+ # * A non zero number, either positive or negative
1323
+ #
1324
+ # The boolean `false` is returned for all other cases.
1325
+ #
1326
+ # @param [Oga::XML::NodeSet] context
1327
+ # @param [Oga::XPath::Node] expression
1328
+ # @return [TrueClass|FalseClass]
1329
+ #
1330
+ def on_call_boolean(context, expression)
1331
+ retval = process(expression, context)
1332
+ bool = false
1333
+
1334
+ if retval.is_a?(Numeric)
1335
+ bool = !retval.nan? && !retval.zero?
1336
+ elsif retval
1337
+ bool = !retval.respond_to?(:empty?) || !retval.empty?
1338
+ end
1339
+
1340
+ return bool
1341
+ end
1342
+
1343
+ ##
1344
+ # Processes the `not()` function call.
1345
+ #
1346
+ # This function converts the 1st argument to a boolean and returns the
1347
+ # opposite boolean value. For example, if the first argument results in
1348
+ # `true` then this function returns `false` instead.
1349
+ #
1350
+ # @param [Oga::XML::NodeSet] context
1351
+ # @param [Oga::XPath::Node] expression
1352
+ # @return [TrueClass|FalseClass]
1353
+ #
1354
+ def on_call_not(context, expression)
1355
+ return !on_call_boolean(context, expression)
1356
+ end
1357
+
1358
+ ##
1359
+ # Processes the `true()` function call.
1360
+ #
1361
+ # This function simply returns the boolean `true`.
1362
+ #
1363
+ # @param [Oga::XPath::NodeSet] context
1364
+ # @return [TrueClass]
1365
+ #
1366
+ def on_call_true(context)
1367
+ return true
1368
+ end
1369
+
1370
+ ##
1371
+ # Processes the `false()` function call.
1372
+ #
1373
+ # This function simply returns the boolean `false`.
1374
+ #
1375
+ # @param [Oga::XPath::NodeSet] context
1376
+ # @return [FalseClass]
1377
+ #
1378
+ def on_call_false(context)
1379
+ return false
1380
+ end
1381
+
1382
+ ##
1383
+ # Processes the `lang()` function call.
1384
+ #
1385
+ # This function returns `true` if the current context node is in the given
1386
+ # language, `false` otherwise.
1387
+ #
1388
+ # The language is based on the value of the "xml:lang" attribute of either
1389
+ # the context node or an ancestor node (in case the context node has no
1390
+ # such attribute).
1391
+ #
1392
+ # @param [Oga::XML::NodeSet] context
1393
+ # @param [Oga::XPath::Node] language
1394
+ # @return [TrueClass|FalseClass]
1395
+ #
1396
+ def on_call_lang(context, language)
1397
+ lang_str = on_call_string(context, language)
1398
+ node = context.first
1399
+
1400
+ while node.respond_to?(:attribute)
1401
+ found = node.attribute('xml:lang')
1402
+
1403
+ return found.value == lang_str if found
1404
+
1405
+ node = node.parent
1406
+ end
1407
+
1408
+ return false
1409
+ end
1410
+
1411
+ ##
1412
+ # Processes the `sum()` function call.
1413
+ #
1414
+ # This function call takes a node set, converts each node to a number and
1415
+ # then sums the values.
1416
+ #
1417
+ # As an example, take the following XML:
1418
+ #
1419
+ # <root>
1420
+ # <a>1</a>
1421
+ # <b>2</b>
1422
+ # </root>
1423
+ #
1424
+ # Using the expression `sum(root/*)` the return value would be `3.0`.
1425
+ #
1426
+ # @param [Oga::XML::NodeSet] context
1427
+ # @param [Oga::XPath::Node] expression
1428
+ # @return [Float]
1429
+ #
1430
+ def on_call_sum(context, expression)
1431
+ nodes = process(expression, context)
1432
+ sum = 0.0
1433
+
1434
+ unless nodes.is_a?(XML::NodeSet)
1435
+ raise TypeError, 'sum() can only operate on NodeSet instances'
1436
+ end
1437
+
1438
+ nodes.each do |node|
1439
+ sum += node.text.to_f
1440
+ end
1441
+
1442
+ return sum
1443
+ end
1444
+
1445
+ ##
1446
+ # Processes the `floor()` function call.
1447
+ #
1448
+ # This function call rounds the 1st argument down to the closest integer,
1449
+ # and then returns that number as a float.
1450
+ #
1451
+ # @param [Oga::XML::NodeSet] context
1452
+ # @param [Oga::XPath::Node] expression
1453
+ # @return [Float]
1454
+ #
1455
+ def on_call_floor(context, expression)
1456
+ number = on_call_number(context, expression)
1457
+
1458
+ return number.nan? ? number : number.floor.to_f
1459
+ end
1460
+
1461
+ ##
1462
+ # Processes the `ceiling()` function call.
1463
+ #
1464
+ # This function call rounds the 1st argument up to the closest integer,
1465
+ # and then returns that number as a float.
1466
+ #
1467
+ # @param [Oga::XML::NodeSet] context
1468
+ # @param [Oga::XPath::Node] expression
1469
+ # @return [Float]
1470
+ #
1471
+ def on_call_ceiling(context, expression)
1472
+ number = on_call_number(context, expression)
1473
+
1474
+ return number.nan? ? number : number.ceil.to_f
1475
+ end
1476
+
1477
+ ##
1478
+ # Processes the `round()` function call.
1479
+ #
1480
+ # This function call rounds the 1st argument to the closest integer, and
1481
+ # then returns that number as a float.
1482
+ #
1483
+ # @param [Oga::XML::NodeSet] context
1484
+ # @param [Oga::XPath::Node] expression
1485
+ # @return [Float]
1486
+ #
1487
+ def on_call_round(context, expression)
1488
+ number = on_call_number(context, expression)
1489
+
1490
+ return number.nan? ? number : number.round.to_f
1491
+ end
1492
+
1493
+ ##
1494
+ # Processes an `(int)` node.
1495
+ #
1496
+ # @param [Oga::XPath::Node] ast_node
1497
+ # @param [Oga::XML::NodeSet] context
1498
+ # @return [Float]
1499
+ #
1500
+ def on_int(ast_node, context)
1501
+ return ast_node.children[0].to_f
1502
+ end
1503
+
1504
+ ##
1505
+ # Processes an `(float)` node.
1506
+ #
1507
+ # @param [Oga::XPath::Node] ast_node
1508
+ # @param [Oga::XML::NodeSet] context
1509
+ # @return [Float]
1510
+ #
1511
+ def on_float(ast_node, context)
1512
+ return ast_node.children[0]
1513
+ end
1514
+
1515
+ ##
1516
+ # Processes a `(string)` node.
1517
+ #
1518
+ # @param [Oga::XPath::Node] ast_node
1519
+ # @param [Oga::XML::NodeSet] context
1520
+ # @return [String]
1521
+ #
1522
+ def on_string(ast_node, context)
1523
+ return ast_node.children[0]
1524
+ end
1525
+
1526
+ ##
1527
+ # Processes a variable reference. If the variable is not defined an error
1528
+ # is raised.
1529
+ #
1530
+ # @param [Oga::XPath::Node] ast_node
1531
+ # @param [Oga::XML::NodeSet] context
1532
+ # @return [Mixed]
1533
+ # @raise [RuntimeError]
1534
+ #
1535
+ def on_var(ast_node, context)
1536
+ name = ast_node.children[0]
1537
+
1538
+ if @variables.key?(name)
1539
+ return @variables[name]
1540
+ else
1541
+ raise "Undefined XPath variable: #{name}"
1542
+ end
1543
+ end
1544
+
1545
+ ##
1546
+ # Returns the node for a function call. This node is either the first node
1547
+ # in the supplied node set, or the first node in the current context.
1548
+ #
1549
+ # @param [Oga::XML::NodeSet] context
1550
+ # @param [Oga::XPath::Node] expression
1551
+ # @return [Oga::XML::Node]
1552
+ #
1553
+ def function_node(context, expression = nil)
1554
+ if expression
1555
+ node = process(expression, context)
1556
+
1557
+ if node.is_a?(XML::NodeSet)
1558
+ node = node.first
1559
+ else
1560
+ raise TypeError, 'only node sets can be used as arguments'
1561
+ end
1562
+ else
1563
+ node = context.first
1564
+ end
1565
+
1566
+ return node
1567
+ end
1568
+
1569
+ ##
1570
+ # Returns the text of the first node in the node set, or an empty string
1571
+ # if the node set is empty.
1572
+ #
1573
+ # @param [Oga::XML::NodeSet] set
1574
+ # @return [String]
1575
+ #
1576
+ def first_node_text(set)
1577
+ return set[0].respond_to?(:text) ? set[0].text : ''
1578
+ end
1579
+
1580
+ ##
1581
+ # Returns a node set containing all the child nodes of the given set of
1582
+ # nodes.
1583
+ #
1584
+ # @param [Oga::XML::NodeSet] nodes
1585
+ # @return [Oga::XML::NodeSet]
1586
+ #
1587
+ def child_nodes(nodes)
1588
+ children = XML::NodeSet.new
1589
+
1590
+ nodes.each do |xml_node|
1591
+ children.concat(xml_node.children)
1592
+ end
1593
+
1594
+ return children
1595
+ end
1596
+
1597
+ ##
1598
+ # Checks if a given {Oga::XML::Node} instance matches a {Oga::XPath::Node}
1599
+ # instance.
1600
+ #
1601
+ # This method can use both "test" and "type-test" nodes. In case of
1602
+ # "type-test" nodes the procedure is as following:
1603
+ #
1604
+ # 1. Evaluate the expression
1605
+ # 2. If the return value is non empty return `true`, otherwise return
1606
+ # `false`
1607
+ #
1608
+ # For "test" nodes the procedure is as following instead:
1609
+ #
1610
+ # 1. Match the name
1611
+ # 2. Match the namespace
1612
+ #
1613
+ # For both the name and namespace a wildcard (`*`) can be used.
1614
+ #
1615
+ # @param [Oga::XML::Node] xml_node
1616
+ # @param [Oga::XPath::Node] ast_node
1617
+ # @return [Oga::XML::NodeSet]
1618
+ #
1619
+ def node_matches?(xml_node, ast_node)
1620
+ ns, name = *ast_node
1621
+
1622
+ if ast_node.type == :type_test
1623
+ return type_matches?(xml_node, ast_node)
1624
+ end
1625
+
1626
+ # If only the name is given and is a wildcard then we'll also want to
1627
+ # match the namespace as a wildcard.
1628
+ if !ns and name == '*'
1629
+ ns = '*'
1630
+ end
1631
+
1632
+ name_matches = name_matches?(xml_node, name)
1633
+ ns_matches = false
1634
+
1635
+ if ns
1636
+ ns_matches = namespace_matches?(xml_node, ns)
1637
+
1638
+ elsif name_matches and !xml_node.namespace
1639
+ ns_matches = true
1640
+ end
1641
+
1642
+ return name_matches && ns_matches
1643
+ end
1644
+
1645
+ ##
1646
+ # @param [Oga::XML::Node] xml_node
1647
+ # @param [Oga::XPath::Node] ast_node
1648
+ # @return [TrueClass|FalseClass]
1649
+ #
1650
+ def type_matches?(xml_node, ast_node)
1651
+ context = XML::NodeSet.new([xml_node])
1652
+
1653
+ return process(ast_node, context).length > 0
1654
+ end
1655
+
1656
+ ##
1657
+ # Returns `true` if the name of the XML node matches the given name *or*
1658
+ # matches a wildcard.
1659
+ #
1660
+ # @param [Oga::XML::Node] xml_node
1661
+ # @param [String] name
1662
+ #
1663
+ def name_matches?(xml_node, name)
1664
+ return false unless xml_node.respond_to?(:name)
1665
+
1666
+ return xml_node.name == name || name == '*'
1667
+ end
1668
+
1669
+ ##
1670
+ # Returns `true` if the namespace of the XML node matches the given
1671
+ # namespace *or* matches a wildcard.
1672
+ #
1673
+ # @param [Oga::XML::Node] xml_node
1674
+ # @param [String] ns
1675
+ #
1676
+ def namespace_matches?(xml_node, ns)
1677
+ return false unless xml_node.respond_to?(:namespace)
1678
+
1679
+ return xml_node.namespace.to_s == ns || ns == '*'
1680
+ end
1681
+
1682
+ ##
1683
+ # @param [Oga::XML::Node] ast_node
1684
+ # @return [TrueClass|FalseClass]
1685
+ #
1686
+ def has_parent?(ast_node)
1687
+ return ast_node.respond_to?(:parent) && !!ast_node.parent
1688
+ end
1689
+
1690
+ ##
1691
+ # Converts the given value to a float. If the value can't be converted to
1692
+ # a float NaN is returned instead.
1693
+ #
1694
+ # @param [Mixed] value
1695
+ # @return [Float]
1696
+ #
1697
+ def to_float(value)
1698
+ return Float(value) rescue Float::NAN
1699
+ end
1700
+
1701
+ ##
1702
+ # Converts the given value to a string according to the XPath string
1703
+ # conversion rules.
1704
+ #
1705
+ # @param [Mixed] value
1706
+ # @return [String]
1707
+ #
1708
+ def to_string(value)
1709
+ # If we have a number that has a zero decimal (e.g. 10.0) we want to
1710
+ # get rid of that decimal. For this we'll first convert the number to
1711
+ # an integer.
1712
+ if value.is_a?(Float) and value.modulo(1).zero?
1713
+ value = value.to_i
1714
+ end
1715
+
1716
+ return value.to_s
1717
+ end
1718
+
1719
+ ##
1720
+ # Stores the specified node set and yields the supplied block. The return
1721
+ # value of this method is whatever the block returned.
1722
+ #
1723
+ # @example
1724
+ # retval = with_node_set(context) do
1725
+ # process(....)
1726
+ # end
1727
+ #
1728
+ # @param [Oga::XML::NodeSet] nodes
1729
+ #
1730
+ def with_node_set(nodes)
1731
+ @node_sets << nodes
1732
+
1733
+ retval = yield
1734
+
1735
+ @node_sets.pop
1736
+
1737
+ return retval
1738
+ end
1739
+
1740
+ ##
1741
+ # @return [Oga::XML::NodeSet]
1742
+ #
1743
+ def current_node_set
1744
+ return @node_sets.last
1745
+ end
1746
+ end # Evaluator
1747
+ end # XPath
1748
+ end # Oga