oga 0.1.1-java

Sign up to get free protection for your applications and to get access to all the features.
Files changed (47) hide show
  1. checksums.yaml +7 -0
  2. data/.yardopts +13 -0
  3. data/LICENSE +19 -0
  4. data/README.md +179 -0
  5. data/doc/DCO.md +25 -0
  6. data/doc/changelog.md +20 -0
  7. data/doc/css/common.css +76 -0
  8. data/doc/migrating_from_nokogiri.md +169 -0
  9. data/ext/c/extconf.rb +13 -0
  10. data/ext/c/lexer.c +1518 -0
  11. data/ext/c/lexer.h +8 -0
  12. data/ext/c/lexer.rl +121 -0
  13. data/ext/c/liboga.c +6 -0
  14. data/ext/c/liboga.h +11 -0
  15. data/ext/java/Liboga.java +14 -0
  16. data/ext/java/org/liboga/xml/Lexer.java +829 -0
  17. data/ext/java/org/liboga/xml/Lexer.rl +151 -0
  18. data/ext/ragel/base_lexer.rl +323 -0
  19. data/lib/liboga.jar +0 -0
  20. data/lib/oga.rb +43 -0
  21. data/lib/oga/html/parser.rb +25 -0
  22. data/lib/oga/oga.rb +27 -0
  23. data/lib/oga/version.rb +3 -0
  24. data/lib/oga/xml/attribute.rb +111 -0
  25. data/lib/oga/xml/cdata.rb +17 -0
  26. data/lib/oga/xml/character_node.rb +39 -0
  27. data/lib/oga/xml/comment.rb +17 -0
  28. data/lib/oga/xml/doctype.rb +84 -0
  29. data/lib/oga/xml/document.rb +99 -0
  30. data/lib/oga/xml/element.rb +331 -0
  31. data/lib/oga/xml/lexer.rb +399 -0
  32. data/lib/oga/xml/namespace.rb +42 -0
  33. data/lib/oga/xml/node.rb +168 -0
  34. data/lib/oga/xml/node_set.rb +313 -0
  35. data/lib/oga/xml/parser.rb +556 -0
  36. data/lib/oga/xml/processing_instruction.rb +39 -0
  37. data/lib/oga/xml/pull_parser.rb +180 -0
  38. data/lib/oga/xml/querying.rb +32 -0
  39. data/lib/oga/xml/text.rb +11 -0
  40. data/lib/oga/xml/traversal.rb +48 -0
  41. data/lib/oga/xml/xml_declaration.rb +69 -0
  42. data/lib/oga/xpath/evaluator.rb +1748 -0
  43. data/lib/oga/xpath/lexer.rb +2043 -0
  44. data/lib/oga/xpath/node.rb +10 -0
  45. data/lib/oga/xpath/parser.rb +537 -0
  46. data/oga.gemspec +45 -0
  47. metadata +221 -0
@@ -0,0 +1,39 @@
1
+ module Oga
2
+ module XML
3
+ ##
4
+ # Class used for storing information about a single processing instruction.
5
+ #
6
+ # @!attribute [rw] name
7
+ # @return [String]
8
+ #
9
+ class ProcessingInstruction < CharacterNode
10
+ attr_accessor :name
11
+
12
+ ##
13
+ # @param [Hash] options
14
+ #
15
+ # @option options [String] :name The name of the instruction.
16
+ # @see [Oga::XML::CharacterNode#initialize]
17
+ #
18
+ def initialize(options = {})
19
+ super
20
+
21
+ @name = options[:name]
22
+ end
23
+
24
+ ##
25
+ # @return [String]
26
+ #
27
+ def to_xml
28
+ return "<?#{name}#{text}?>"
29
+ end
30
+
31
+ ##
32
+ # @return [String]
33
+ #
34
+ def inspect
35
+ return "ProcessingInstruction(name: #{name.inspect} text: #{text.inspect})"
36
+ end
37
+ end # ProcessingInstruction
38
+ end # XML
39
+ end # Oga
@@ -0,0 +1,180 @@
1
+ module Oga
2
+ module XML
3
+ ##
4
+ # The PullParser class can be used to parse an XML document incrementally
5
+ # instead of parsing it as a whole. This results in lower memory usage and
6
+ # potentially faster parsing times. The downside is that pull parsers are
7
+ # typically more difficult to use compared to DOM parsers.
8
+ #
9
+ # Basic parsing using this class works as following:
10
+ #
11
+ # parser = Oga::XML::PullParser.new('... xml here ...')
12
+ #
13
+ # parser.parse do |node|
14
+ # if node.is_a?(Oga::XML::PullParser)
15
+ #
16
+ # end
17
+ # end
18
+ #
19
+ # This parses yields proper XML instances such as {Oga::XML::Element}.
20
+ # Doctypes and XML declarations are ignored by this parser.
21
+ #
22
+ # @!attribute [r] node
23
+ # The current node.
24
+ # @return [Oga::XML::Node]
25
+ #
26
+ # @!attribute [r] nesting
27
+ # Array containing the names of the currently nested elements.
28
+ # @return [Array]
29
+ #
30
+ class PullParser < Parser
31
+ attr_reader :node, :nesting
32
+
33
+ ##
34
+ # @return [Array]
35
+ #
36
+ DISABLED_CALLBACKS = [
37
+ :on_document,
38
+ :on_doctype,
39
+ :on_xml_decl,
40
+ :on_element_children
41
+ ]
42
+
43
+ ##
44
+ # @return [Array]
45
+ #
46
+ BLOCK_CALLBACKS = [
47
+ :on_cdata,
48
+ :on_comment,
49
+ :on_text,
50
+ :on_proc_ins
51
+ ]
52
+
53
+ ##
54
+ # Returns the shorthands that can be used for various node classes.
55
+ #
56
+ # @return [Hash]
57
+ #
58
+ NODE_SHORTHANDS = {
59
+ :text => XML::Text,
60
+ :node => XML::Node,
61
+ :cdata => XML::Cdata,
62
+ :element => XML::Element,
63
+ :doctype => XML::Doctype,
64
+ :comment => XML::Comment,
65
+ :xml_declaration => XML::XmlDeclaration
66
+ }
67
+
68
+ ##
69
+ # @see Oga::XML::Parser#reset
70
+ #
71
+ def reset
72
+ super
73
+
74
+ @block = nil
75
+ @nesting = []
76
+ @node = nil
77
+ end
78
+
79
+ ##
80
+ # Parses the input and yields every node to the supplied block.
81
+ #
82
+ # @yieldparam [Oga::XML::Node]
83
+ #
84
+ def parse(&block)
85
+ @block = block
86
+
87
+ yyparse(self, :yield_next_token)
88
+
89
+ reset
90
+
91
+ return
92
+ end
93
+
94
+ ##
95
+ # Calls the supplied block if the current node type and optionally the
96
+ # nesting match. This method allows you to write this:
97
+ #
98
+ # parser.parse do |node|
99
+ # parser.on(:text, %w{people person name}) do
100
+ # puts node.text
101
+ # end
102
+ # end
103
+ #
104
+ # Instead of this:
105
+ #
106
+ # parser.parse do |node|
107
+ # if node.is_a?(Oga::XML::Text) and parser.nesting == %w{people person name}
108
+ # puts node.text
109
+ # end
110
+ # end
111
+ #
112
+ # When calling this method you can specify the following node types:
113
+ #
114
+ # * `:cdata`
115
+ # * `:comment`
116
+ # * `:element`
117
+ # * `:text`
118
+ #
119
+ # @example
120
+ # parser.on(:element, %w{people person name}) do
121
+ #
122
+ # end
123
+ #
124
+ # @param [Symbol] type The type of node to act upon. This is a symbol as
125
+ # returned by {Oga::XML::Node#node_type}.
126
+ #
127
+ # @param [Array] nesting The element name nesting to act upon.
128
+ #
129
+ def on(type, nesting = [])
130
+ if node.is_a?(NODE_SHORTHANDS[type])
131
+ if nesting.empty? or nesting == self.nesting
132
+ yield
133
+ end
134
+ end
135
+ end
136
+
137
+ # eval is a heck of a lot faster than define_method on both Rubinius and
138
+ # JRuby.
139
+ DISABLED_CALLBACKS.each do |method|
140
+ eval <<-EOF, nil, __FILE__, __LINE__ + 1
141
+ def #{method}(*args)
142
+ return
143
+ end
144
+ EOF
145
+ end
146
+
147
+ BLOCK_CALLBACKS.each do |method|
148
+ eval <<-EOF, nil, __FILE__, __LINE__ + 1
149
+ def #{method}(*args)
150
+ @node = super
151
+ @block.call(@node)
152
+ return
153
+ end
154
+ EOF
155
+ end
156
+
157
+ ##
158
+ # @see Oga::XML::Parser#on_element
159
+ #
160
+ def on_element(*args)
161
+ @node = super
162
+
163
+ nesting << @node.name
164
+
165
+ @block.call(@node)
166
+
167
+ return
168
+ end
169
+
170
+ ##
171
+ # @see Oga::XML::Parser#on_element_children
172
+ #
173
+ def after_element(*args)
174
+ nesting.pop
175
+
176
+ return
177
+ end
178
+ end # PullParser
179
+ end # XML
180
+ end # Oga
@@ -0,0 +1,32 @@
1
+ module Oga
2
+ module XML
3
+ ##
4
+ # The Querying module provides methods that make it easy to run XPath/CSS
5
+ # queries on XML documents/elements.
6
+ #
7
+ module Querying
8
+ ##
9
+ # Evaluates the given XPath expression.
10
+ #
11
+ # @param [String] expression The XPath expression to run.
12
+ # @param [Hash] variables Variables to bind.
13
+ # @see [Oga::XPath::Evaluator#initialize]
14
+ #
15
+ def xpath(expression, variables = {})
16
+ return XPath::Evaluator.new(self, variables).evaluate(expression)
17
+ end
18
+
19
+ ##
20
+ # Evaluates the given XPath expression and returns the first node in the
21
+ # set.
22
+ #
23
+ # @see [#xpath]
24
+ #
25
+ def at_xpath(*args)
26
+ result = xpath(*args)
27
+
28
+ return result.is_a?(XML::NodeSet) ? result.first : result
29
+ end
30
+ end # Querying
31
+ end # XML
32
+ end # Oga
@@ -0,0 +1,11 @@
1
+ module Oga
2
+ module XML
3
+ ##
4
+ # Class containing information about a single text node. Text nodes don't
5
+ # have any children, attributes and the likes; just text.
6
+ #
7
+ class Text < CharacterNode
8
+
9
+ end # Text
10
+ end # XML
11
+ end # Oga
@@ -0,0 +1,48 @@
1
+ module Oga
2
+ module XML
3
+ ##
4
+ # Module that provides methods to traverse DOM trees.
5
+ #
6
+ module Traversal
7
+ ##
8
+ # Traverses through the node and yields every child node to the supplied
9
+ # block.
10
+ #
11
+ # The block's body can also determine whether or not to traverse child
12
+ # nodes. Preventing a node's children from being traversed can be done by
13
+ # using `throw :skip_children`
14
+ #
15
+ # This method uses a combination of breadth-first and depth-first
16
+ # traversal to traverse the entire XML tree in document order. See
17
+ # http://en.wikipedia.org/wiki/Breadth-first_search for more information.
18
+ #
19
+ # @example
20
+ # document.each_node do |node|
21
+ # p node.class
22
+ # end
23
+ #
24
+ # @example Skipping the children of a certain node
25
+ # document.each_node do |node|
26
+ # if node.is_a?(Oga::XML::Element) and node.name == 'book'
27
+ # throw :skip_children
28
+ # end
29
+ # end
30
+ #
31
+ # @yieldparam [Oga::XML::Node] The current node.
32
+ #
33
+ def each_node
34
+ visit = children.to_a.dup # copy it since we're modifying the array
35
+
36
+ until visit.empty?
37
+ current = visit.shift
38
+
39
+ catch :skip_children do
40
+ yield current
41
+
42
+ visit = current.children.to_a + visit
43
+ end
44
+ end
45
+ end
46
+ end # Traversal
47
+ end # XML
48
+ end # Oga
@@ -0,0 +1,69 @@
1
+ module Oga
2
+ module XML
3
+ ##
4
+ # Class containing information about an XML declaration tag.
5
+ #
6
+ # @!attribute [rw] version
7
+ # The XML version.
8
+ # @return [String]
9
+ #
10
+ # @!attribute [rw] encoding
11
+ # The XML document's encoding.
12
+ # @return [String]
13
+ #
14
+ # @!attribute [rw] standalone
15
+ # Whether or not the document is a standalone document.
16
+ # @return [String]
17
+ #
18
+ class XmlDeclaration
19
+ attr_accessor :version, :encoding, :standalone
20
+
21
+ ##
22
+ # @param [Hash] options
23
+ #
24
+ # @option options [String] :version
25
+ # @option options [String] :encoding
26
+ # @option options [String] :standalone
27
+ #
28
+ def initialize(options = {})
29
+ @version = options[:version] || '1.0'
30
+ @encoding = options[:encoding] || 'UTF-8'
31
+ @standalone = options[:standalone]
32
+ end
33
+
34
+ ##
35
+ # Converts the declaration tag to XML.
36
+ #
37
+ # @return [String]
38
+ #
39
+ def to_xml
40
+ pairs = []
41
+
42
+ [:version, :encoding, :standalone].each do |getter|
43
+ value = send(getter)
44
+
45
+ pairs << %Q{#{getter}="#{value}"} if value
46
+ end
47
+
48
+ return "<?xml #{pairs.join(' ')} ?>"
49
+ end
50
+
51
+ ##
52
+ # @return [String]
53
+ #
54
+ def inspect
55
+ segments = []
56
+
57
+ [:version, :encoding, :standalone].each do |attr|
58
+ value = send(attr)
59
+
60
+ if value and !value.empty?
61
+ segments << "#{attr}: #{value.inspect}"
62
+ end
63
+ end
64
+
65
+ return "XmlDeclaration(#{segments.join(' ')})"
66
+ end
67
+ end # XmlDeclaration
68
+ end # XML
69
+ end # Oga
@@ -0,0 +1,1748 @@
1
+ module Oga
2
+ module XPath
3
+ ##
4
+ # The Evaluator class evaluates XPath expressions, either as a String or an
5
+ # AST of {Oga::XPath::Node} instances.
6
+ #
7
+ # ## Thread Safety
8
+ #
9
+ # This class is not thread-safe, you can not share the same instance between
10
+ # multiple threads. This is due to the use of an internal stack (see below
11
+ # for more information). It is however perfectly fine to use multiple
12
+ # separated instances as this class does not use a thread global state.
13
+ #
14
+ # ## Node Set Stack
15
+ #
16
+ # This class uses an internal stack of XML node sets. This stack is used for
17
+ # functions that require access to the set of nodes a predicate belongs to.
18
+ # An example of such a function is `position()`.
19
+ #
20
+ # An alternative would be to pass the node sets a predicate belongs to as an
21
+ # extra argument to the various `on_*` methods. The problematic part of
22
+ # this approach is that it requires every method to take and pass along the
23
+ # argument. It's far too easy to make mistakes in such a setup and as such
24
+ # I've chosen to use an internal stack instead.
25
+ #
26
+ # See {#with_node_set} and {#current_node_set} for more information.
27
+ #
28
+ # ## Set Indices
29
+ #
30
+ # XPath node sets start at index 1 instead of index 0. In other words, if
31
+ # you want to access the first node in a set you have to use index 1, not 0.
32
+ # Certain methods such as {#on_call_last} and {#on_call_position} take care
33
+ # of converting indices from Ruby to XPath.
34
+ #
35
+ # ## Number Types
36
+ #
37
+ # The XPath specification states that all numbers produced by an expression
38
+ # should be returned as double-precision 64bit IEEE 754 floating point
39
+ # numbers. For example, the return value of `position()` should be a float
40
+ # (e.g. "1.0", not "1").
41
+ #
42
+ # Oga takes care internally of converting numbers to integers and/or floats
43
+ # where needed. The output types however will always be floats.
44
+ #
45
+ # For more information on the specification, see
46
+ # <http://www.w3.org/TR/xpath/#numbers>.
47
+ #
48
+ # ## Variables
49
+ #
50
+ # The evaluator supports the binding of custom variables in the
51
+ # {#initialize} method. Variables can be bound by passing in a Hash with the
52
+ # keys set to the variable names (minus the `$` sign) and their values to
53
+ # the variable values. The keys of the variables Hash *must* be Strings.
54
+ #
55
+ # A basic example:
56
+ #
57
+ # evaluator = Evaluator.new(document, 'number' => 10)
58
+ #
59
+ # evaluator.evaluate('$number') # => 10
60
+ #
61
+ class Evaluator
62
+ ##
63
+ # @param [Oga::XML::Document|Oga::XML::Node] document
64
+ # @param [Hash] variables Hash containing variables to expose to the XPath
65
+ # expressions.
66
+ #
67
+ def initialize(document, variables = {})
68
+ @document = document
69
+ @variables = variables
70
+ @node_sets = []
71
+ end
72
+
73
+ ##
74
+ # Evaluates an XPath expression as a String.
75
+ #
76
+ # @example
77
+ # evaluator = Oga::XPath::Evaluator.new(document)
78
+ #
79
+ # evaluator.evaluate('//a')
80
+ #
81
+ # @param [String] string An XPath expression as a String.
82
+ # @return [Mixed]
83
+ #
84
+ def evaluate(string)
85
+ ast = Parser.new(string).parse
86
+
87
+ return evaluate_ast(ast)
88
+ end
89
+
90
+ ##
91
+ # Evaluates a pre-parsed XPath expression.
92
+ #
93
+ # @param [Oga::XPath::Node] ast
94
+ # @return [Mixed]
95
+ #
96
+ def evaluate_ast(ast)
97
+ context = XML::NodeSet.new([@document])
98
+
99
+ return process(ast, context)
100
+ end
101
+
102
+ ##
103
+ # Processes an XPath node by dispatching it and the given context to a
104
+ # dedicated handler method. Handler methods are called "on_X" where "X" is
105
+ # the node type.
106
+ #
107
+ # @param [Oga::XPath::Node] ast_node The XPath AST node to process.
108
+ #
109
+ # @param [Oga::XML::NodeSet] context The context (a set of nodes) to
110
+ # evaluate an expression in.
111
+ #
112
+ # @return [Oga::XML::NodeSet]
113
+ #
114
+ def process(ast_node, context)
115
+ handler = "on_#{ast_node.type}"
116
+
117
+ return send(handler, ast_node, context)
118
+ end
119
+
120
+ ##
121
+ # Processes an absolute XPath expression such as `/foo`.
122
+ #
123
+ # @param [Oga::XPath::Node] ast_node
124
+ # @param [Oga::XML::NodeSet] context
125
+ # @return [Oga::XML::NodeSet]
126
+ #
127
+ def on_absolute_path(ast_node, context)
128
+ if @document.respond_to?(:root_node)
129
+ context = XML::NodeSet.new([@document.root_node])
130
+ else
131
+ context = XML::NodeSet.new([@document])
132
+ end
133
+
134
+ return on_path(ast_node, context)
135
+ end
136
+
137
+ ##
138
+ # Processes a relative XPath expression such as `foo`.
139
+ #
140
+ # Paths are evaluated using a "short-circuit" mechanism similar to Ruby's
141
+ # `&&` / `and` operator. Whenever a path results in an empty node set the
142
+ # evaluation is aborted immediately.
143
+ #
144
+ # @param [Oga::XPath::Node] ast_node
145
+ # @param [Oga::XML::NodeSet] context
146
+ # @return [Oga::XML::NodeSet]
147
+ #
148
+ def on_path(ast_node, context)
149
+ nodes = XML::NodeSet.new
150
+
151
+ ast_node.children.each do |test|
152
+ nodes = process(test, context)
153
+
154
+ if nodes.empty?
155
+ break
156
+ else
157
+ context = nodes
158
+ end
159
+ end
160
+
161
+ return nodes
162
+ end
163
+
164
+ ##
165
+ # Processes a node test and optionally a predicate.
166
+ #
167
+ # @param [Oga::XPath::Node] ast_node
168
+ # @param [Oga::XML::NodeSet] context
169
+ # @return [Oga::XML::NodeSet]
170
+ #
171
+ def on_test(ast_node, context)
172
+ nodes = XML::NodeSet.new
173
+ predicate = ast_node.children[2]
174
+ xpath_index = 1
175
+
176
+ context.each do |xml_node|
177
+ next unless node_matches?(xml_node, ast_node)
178
+
179
+ if predicate
180
+ retval = with_node_set(context) do
181
+ process(predicate, XML::NodeSet.new([xml_node]))
182
+ end
183
+
184
+ # Numeric values are used as node set indexes.
185
+ if retval.is_a?(Numeric)
186
+ nodes << xml_node if retval.to_i == xpath_index
187
+
188
+ # Node sets, strings, booleans, etc
189
+ elsif retval
190
+ if retval.respond_to?(:empty?) and retval.empty?
191
+ next
192
+ end
193
+
194
+ nodes << xml_node
195
+ end
196
+ else
197
+ nodes << xml_node
198
+ end
199
+
200
+ xpath_index += 1
201
+ end
202
+
203
+ return nodes
204
+ end
205
+
206
+ ##
207
+ # Dispatches the processing of axes to dedicated methods. This works
208
+ # similar to {#process} except the handler names are "on_axis_X" with "X"
209
+ # being the axis name.
210
+ #
211
+ # @param [Oga::XPath::Node] ast_node
212
+ # @param [Oga::XML::NodeSet] context
213
+ # @return [Oga::XML::NodeSet]
214
+ #
215
+ def on_axis(ast_node, context)
216
+ name, test = *ast_node
217
+
218
+ handler = name.gsub('-', '_')
219
+
220
+ return send("on_axis_#{handler}", test, context)
221
+ end
222
+
223
+ ##
224
+ # Processes the `ancestor` axis. This axis walks through the entire
225
+ # ancestor chain until a matching node is found.
226
+ #
227
+ # Evaluation happens using a "short-circuit" mechanism. The moment a
228
+ # matching node is found it is returned immediately.
229
+ #
230
+ # @param [Oga::XPath::Node] ast_node
231
+ # @param [Oga::XML::NodeSet] context
232
+ # @return [Oga::XML::NodeSet]
233
+ #
234
+ def on_axis_ancestor(ast_node, context)
235
+ nodes = XML::NodeSet.new
236
+
237
+ context.each do |xml_node|
238
+ while has_parent?(xml_node)
239
+ xml_node = xml_node.parent
240
+
241
+ if node_matches?(xml_node, ast_node)
242
+ nodes << xml_node
243
+ break
244
+ end
245
+ end
246
+ end
247
+
248
+ return nodes
249
+ end
250
+
251
+ ##
252
+ # Processes the `ancestor-or-self` axis.
253
+ #
254
+ # @see [#on_axis_ancestor]
255
+ #
256
+ def on_axis_ancestor_or_self(ast_node, context)
257
+ nodes = XML::NodeSet.new
258
+
259
+ context.each do |xml_node|
260
+ while has_parent?(xml_node)
261
+ if node_matches?(xml_node, ast_node)
262
+ nodes << xml_node
263
+ break
264
+ end
265
+
266
+ xml_node = xml_node.parent
267
+ end
268
+ end
269
+
270
+ return nodes
271
+ end
272
+
273
+ ##
274
+ # Processes the `attribute` axis. The node test is performed against all
275
+ # the attributes of the nodes in the current context.
276
+ #
277
+ # Evaluation of the nodes continues until the node set has been exhausted
278
+ # (unlike some other methods which return the moment they find a matching
279
+ # node).
280
+ #
281
+ # @param [Oga::XPath::Node] ast_node
282
+ # @param [Oga::XML::NodeSet] context
283
+ # @return [Oga::XML::NodeSet]
284
+ #
285
+ def on_axis_attribute(ast_node, context)
286
+ nodes = XML::NodeSet.new
287
+
288
+ context.each do |xml_node|
289
+ next unless xml_node.is_a?(XML::Element)
290
+
291
+ nodes += on_test(ast_node, xml_node.attributes)
292
+ end
293
+
294
+ return nodes
295
+ end
296
+
297
+ ##
298
+ # Evaluates the `child` axis. This axis simply takes all the child nodes
299
+ # of the current context nodes.
300
+ #
301
+ # @param [Oga::XPath::Node] ast_node
302
+ # @param [Oga::XML::NodeSet] context
303
+ # @return [Oga::XML::NodeSet]
304
+ #
305
+ def on_axis_child(ast_node, context)
306
+ return process(ast_node, child_nodes(context))
307
+ end
308
+
309
+ ##
310
+ # Evaluates the `descendant` axis. This method processes child nodes until
311
+ # the very end of the tree, no "short-circuiting" mechanism is used.
312
+ #
313
+ # @param [Oga::XPath::Node] ast_node
314
+ # @param [Oga::XML::NodeSet] context
315
+ # @return [Oga::XML::NodeSet]
316
+ #
317
+ def on_axis_descendant(ast_node, context)
318
+ nodes = XML::NodeSet.new
319
+
320
+ context.each do |context_node|
321
+ context_node.children.each do |node|
322
+ nodes << node if node_matches?(node, ast_node)
323
+ end
324
+
325
+ context_node.each_node do |node|
326
+ nodes << node if node_matches?(node, ast_node)
327
+ end
328
+ end
329
+
330
+ return nodes
331
+ end
332
+
333
+ ##
334
+ # Evaluates the `descendant-or-self` axis.
335
+ #
336
+ # @param [Oga::XPath::Node] ast_node
337
+ # @param [Oga::XML::NodeSet] context
338
+ # @return [Oga::XML::NodeSet]
339
+ #
340
+ def on_axis_descendant_or_self(ast_node, context)
341
+ nodes = on_test(ast_node, context)
342
+
343
+ nodes.concat(on_axis_descendant(ast_node, context))
344
+
345
+ return nodes
346
+ end
347
+
348
+ ##
349
+ # Evaluates the `following` axis.
350
+ #
351
+ # @param [Oga::XPath::Node] ast_node
352
+ # @param [Oga::XML::NodeSet] context
353
+ # @return [Oga::XML::NodeSet]
354
+ #
355
+ def on_axis_following(ast_node, context)
356
+ nodes = XML::NodeSet.new
357
+
358
+ context.each do |context_node|
359
+ check = false
360
+
361
+ @document.each_node do |doc_node|
362
+ # Skip child nodes of the current context node, compare all
363
+ # following nodes.
364
+ if doc_node == context_node
365
+ check = true
366
+ throw :skip_children
367
+ end
368
+
369
+ next unless check
370
+
371
+ nodes << doc_node if node_matches?(doc_node, ast_node)
372
+ end
373
+ end
374
+
375
+ return nodes
376
+ end
377
+
378
+ ##
379
+ # Evaluates the `following-sibling` axis.
380
+ #
381
+ # @param [Oga::XPath::Node] ast_node
382
+ # @param [Oga::XML::NodeSet] context
383
+ # @return [Oga::XML::NodeSet]
384
+ #
385
+ def on_axis_following_sibling(ast_node, context)
386
+ nodes = XML::NodeSet.new
387
+
388
+ context.each do |context_node|
389
+ check = false
390
+ parent = has_parent?(context_node) ? context_node.parent : nil
391
+
392
+ @document.each_node do |doc_node|
393
+ # Skip child nodes of the current context node, compare all
394
+ # following nodes.
395
+ if doc_node == context_node
396
+ check = true
397
+ throw :skip_children
398
+ end
399
+
400
+ if !check or parent != doc_node.parent
401
+ next
402
+ end
403
+
404
+ if node_matches?(doc_node, ast_node)
405
+ nodes << doc_node
406
+
407
+ throw :skip_children
408
+ end
409
+ end
410
+ end
411
+
412
+ return nodes
413
+ end
414
+
415
+ ##
416
+ # Evaluates the `parent` axis.
417
+ #
418
+ # @param [Oga::XPath::Node] ast_node
419
+ # @param [Oga::XML::NodeSet] context
420
+ # @return [Oga::XML::NodeSet]
421
+ #
422
+ def on_axis_parent(ast_node, context)
423
+ nodes = XML::NodeSet.new
424
+
425
+ context.each do |context_node|
426
+ next unless has_parent?(context_node)
427
+
428
+ parent = context_node.parent
429
+
430
+ nodes << parent if node_matches?(parent, ast_node)
431
+ end
432
+
433
+ return nodes
434
+ end
435
+
436
+ ##
437
+ # Evaluates the `preceding` axis.
438
+ #
439
+ # @param [Oga::XPath::Node] ast_node
440
+ # @param [Oga::XML::NodeSet] context
441
+ # @return [Oga::XML::NodeSet]
442
+ #
443
+ def on_axis_preceding(ast_node, context)
444
+ nodes = XML::NodeSet.new
445
+
446
+ context.each do |context_node|
447
+ check = true
448
+
449
+ @document.each_node do |doc_node|
450
+ # Test everything *until* we hit the current context node.
451
+ if doc_node == context_node
452
+ break
453
+ elsif node_matches?(doc_node, ast_node)
454
+ nodes << doc_node
455
+ end
456
+ end
457
+ end
458
+
459
+ return nodes
460
+ end
461
+
462
+ ##
463
+ # Evaluates the `preceding-sibling` axis.
464
+ #
465
+ # @param [Oga::XPath::Node] ast_node
466
+ # @param [Oga::XML::NodeSet] context
467
+ # @return [Oga::XML::NodeSet]
468
+ #
469
+ def on_axis_preceding_sibling(ast_node, context)
470
+ nodes = XML::NodeSet.new
471
+
472
+ context.each do |context_node|
473
+ check = true
474
+ parent = has_parent?(context_node) ? context_node.parent : nil
475
+
476
+ @document.each_node do |doc_node|
477
+ # Test everything *until* we hit the current context node.
478
+ if doc_node == context_node
479
+ break
480
+ elsif doc_node.parent == parent and node_matches?(doc_node, ast_node)
481
+ nodes << doc_node
482
+ end
483
+ end
484
+ end
485
+
486
+ return nodes
487
+ end
488
+
489
+ ##
490
+ # Evaluates the `self` axis.
491
+ #
492
+ # @param [Oga::XPath::Node] ast_node
493
+ # @param [Oga::XML::NodeSet] context
494
+ # @return [Oga::XML::NodeSet]
495
+ #
496
+ def on_axis_self(ast_node, context)
497
+ nodes = XML::NodeSet.new
498
+
499
+ context.each do |context_node|
500
+ nodes << context_node if node_matches?(context_node, ast_node)
501
+ end
502
+
503
+ return nodes
504
+ end
505
+
506
+ ##
507
+ # Evaluates the `namespace` axis.
508
+ #
509
+ # @param [Oga::XPath::Node] ast_node
510
+ # @param [Oga::XML::NodeSet] context
511
+ # @return [Oga::XML::NodeSet]
512
+ #
513
+ def on_axis_namespace(ast_node, context)
514
+ nodes = XML::NodeSet.new
515
+ name = ast_node.children[1]
516
+
517
+ context.each do |context_node|
518
+ next unless context_node.respond_to?(:available_namespaces)
519
+
520
+ context_node.available_namespaces.each do |_, namespace|
521
+ if namespace.name == name or name == '*'
522
+ nodes << namespace
523
+ end
524
+ end
525
+ end
526
+
527
+ return nodes
528
+ end
529
+
530
+ ##
531
+ # Dispatches node type matching to dedicated handlers.
532
+ #
533
+ # @param [Oga::XPath::Node] ast_node
534
+ # @param [Oga::XML::NodeSet] context
535
+ # @return [Oga::XML::NodeSet]
536
+ #
537
+ def on_type_test(ast_node, context)
538
+ name, test = *ast_node
539
+
540
+ handler = name.gsub('-', '_')
541
+
542
+ return send("on_type_test_#{handler}", test, context)
543
+ end
544
+
545
+ ##
546
+ # Processes the `node` type matcher. This matcher matches all node types.
547
+ #
548
+ # @param [Oga::XPath::Node] ast_node
549
+ # @param [Oga::XML::NodeSet] context
550
+ # @return [Oga::XML::NodeSet]
551
+ #
552
+ def on_type_test_node(ast_node, context)
553
+ nodes = XML::NodeSet.new
554
+
555
+ context.each do |node|
556
+ nodes << node if node.is_a?(XML::Node)
557
+ end
558
+
559
+ return nodes
560
+ end
561
+
562
+ ##
563
+ # Processes the `text()` type test. This matches only text nodes.
564
+ #
565
+ # @param [Oga::XPath::Node] ast_node
566
+ # @param [Oga::XML::NodeSet] context
567
+ # @return [Oga::XML::NodeSet]
568
+ #
569
+ def on_type_test_text(ast_node, context)
570
+ nodes = XML::NodeSet.new
571
+
572
+ context.each do |node|
573
+ nodes << node if node.is_a?(XML::Text)
574
+ end
575
+
576
+ return nodes
577
+ end
578
+
579
+ ##
580
+ # Processes the `comment()` type test. This matches only comment nodes.
581
+ #
582
+ # @param [Oga::XPath::Node] ast_node
583
+ # @param [Oga::XML::NodeSet] context
584
+ # @return [Oga::XML::NodeSet]
585
+ #
586
+ def on_type_test_comment(ast_node, context)
587
+ nodes = XML::NodeSet.new
588
+
589
+ context.each do |node|
590
+ nodes << node if node.is_a?(XML::Comment)
591
+ end
592
+
593
+ return nodes
594
+ end
595
+
596
+ ##
597
+ # Processes the `processing-instruction()` type test. This matches only
598
+ # processing-instruction nodes.
599
+ #
600
+ # @param [Oga::XPath::Node] ast_node
601
+ # @param [Oga::XML::NodeSet] context
602
+ # @return [Oga::XML::NodeSet]
603
+ #
604
+ def on_type_test_processing_instruction(ast_node, context)
605
+ nodes = XML::NodeSet.new
606
+
607
+ context.each do |node|
608
+ nodes << node if node.is_a?(XML::ProcessingInstruction)
609
+ end
610
+
611
+ return nodes
612
+ end
613
+
614
+ ##
615
+ # Processes the pipe (`|`) operator. This operator creates a union of two
616
+ # sets.
617
+ #
618
+ # @param [Oga::XPath::Node] ast_node
619
+ # @param [Oga::XML::NodeSet] context
620
+ # @return [Oga::XML::NodeSet]
621
+ #
622
+ def on_pipe(ast_node, context)
623
+ left, right = *ast_node
624
+
625
+ return process(left, context) + process(right, context)
626
+ end
627
+
628
+ ##
629
+ # Processes the `and` operator.
630
+ #
631
+ # This operator returns true if both the left and right expression
632
+ # evaluate to `true`. If the first expression evaluates to `false` the
633
+ # right expression is ignored.
634
+ #
635
+ # @param [Oga::XPath::Node] ast_node
636
+ # @param [Oga::XML::NodeSet] context
637
+ # @return [TrueClass|FalseClass]
638
+ #
639
+ def on_and(ast_node, context)
640
+ left, right = *ast_node
641
+
642
+ return on_call_boolean(context, left) && on_call_boolean(context, right)
643
+ end
644
+
645
+ ##
646
+ # Processes the `or` operator.
647
+ #
648
+ # This operator returns `true` if one of the expressions evaluates to
649
+ # true, otherwise false is returned. If the first expression evaluates to
650
+ # `true` the second expression is ignored.
651
+ #
652
+ # @param [Oga::XPath::Node] ast_node
653
+ # @param [Oga::XML::NodeSet] context
654
+ # @return [TrueClass|FalseClass]
655
+ #
656
+ def on_or(ast_node, context)
657
+ left, right = *ast_node
658
+
659
+ return on_call_boolean(context, left) || on_call_boolean(context, right)
660
+ end
661
+
662
+ ##
663
+ # Processes the `+` operator.
664
+ #
665
+ # This operator converts the left and right expressions to numbers and
666
+ # adds them together.
667
+ #
668
+ # @param [Oga::XPath::Node] ast_node
669
+ # @param [Oga::XML::NodeSet] context
670
+ # @return [Float]
671
+ #
672
+ def on_add(ast_node, context)
673
+ left, right = *ast_node
674
+
675
+ return on_call_number(context, left) + on_call_number(context, right)
676
+ end
677
+
678
+ ##
679
+ # Processes the `div` operator.
680
+ #
681
+ # This operator converts the left and right expressions to numbers and
682
+ # divides the left number with the right number.
683
+ #
684
+ # @param [Oga::XPath::Node] ast_node
685
+ # @param [Oga::XML::NodeSet] context
686
+ # @return [Float]
687
+ #
688
+ def on_div(ast_node, context)
689
+ left, right = *ast_node
690
+
691
+ return on_call_number(context, left) / on_call_number(context, right)
692
+ end
693
+
694
+ ##
695
+ # Processes the `mod` operator.
696
+ #
697
+ # This operator converts the left and right expressions to numbers and
698
+ # returns the modulo of the two numbers.
699
+ #
700
+ # @param [Oga::XPath::Node] ast_node
701
+ # @param [Oga::XML::NodeSet] context
702
+ # @return [Float]
703
+ #
704
+ def on_mod(ast_node, context)
705
+ left, right = *ast_node
706
+
707
+ return on_call_number(context, left) % on_call_number(context, right)
708
+ end
709
+
710
+ ##
711
+ # Processes the `*` operator.
712
+ #
713
+ # This operator converts the left and right expressions to numbers and
714
+ # multiplies the left number with the right number.
715
+ #
716
+ # @param [Oga::XPath::Node] ast_node
717
+ # @param [Oga::XML::NodeSet] context
718
+ # @return [Float]
719
+ #
720
+ def on_mul(ast_node, context)
721
+ left, right = *ast_node
722
+
723
+ return on_call_number(context, left) * on_call_number(context, right)
724
+ end
725
+
726
+ ##
727
+ # Processes the `-` operator.
728
+ #
729
+ # This operator converts the left and right expressions to numbers and
730
+ # subtracts the right number of the left number.
731
+ #
732
+ # @param [Oga::XPath::Node] ast_node
733
+ # @param [Oga::XML::NodeSet] context
734
+ # @return [Float]
735
+ #
736
+ def on_sub(ast_node, context)
737
+ left, right = *ast_node
738
+
739
+ return on_call_number(context, left) - on_call_number(context, right)
740
+ end
741
+
742
+ ##
743
+ # Processes the `=` operator.
744
+ #
745
+ # This operator evaluates the expression on the left and right and returns
746
+ # `true` if they are equal. This operator can be used to compare strings,
747
+ # numbers and node sets. When using node sets the text of the set is
748
+ # compared instead of the nodes themselves. That is, nodes with different
749
+ # names but the same text are considered to be equal.
750
+ #
751
+ # @param [Oga::XPath::Node] ast_node
752
+ # @param [Oga::XML::NodeSet] context
753
+ # @return [TrueClass|FalseClass]
754
+ #
755
+ def on_eq(ast_node, context)
756
+ left = process(ast_node.children[0], context)
757
+ right = process(ast_node.children[1], context)
758
+
759
+ if left.is_a?(XML::NodeSet)
760
+ left = first_node_text(left)
761
+ end
762
+
763
+ if right.is_a?(XML::NodeSet)
764
+ right = first_node_text(right)
765
+ end
766
+
767
+ if left.is_a?(Numeric) and !right.is_a?(Numeric)
768
+ right = to_float(right)
769
+ end
770
+
771
+ if left.is_a?(String) and !right.is_a?(String)
772
+ right = to_string(right)
773
+ end
774
+
775
+ return left == right
776
+ end
777
+
778
+ ##
779
+ # Processes the `!=` operator.
780
+ #
781
+ # This operator does the exact opposite of the `=` operator. See {#on_eq}
782
+ # for more information.
783
+ #
784
+ # @see [#on_eq]
785
+ #
786
+ def on_neq(ast_node, context)
787
+ return !on_eq(ast_node, context)
788
+ end
789
+
790
+ ##
791
+ # Processes the `<` operator.
792
+ #
793
+ # This operator converts the left and right expression to a number and
794
+ # returns `true` if the first number is lower than the second number.
795
+ #
796
+ # @param [Oga::XML::Node] ast_node
797
+ # @param [Oga::XML::NodeSet] context
798
+ # @return [TrueClass|FalseClass]
799
+ #
800
+ def on_lt(ast_node, context)
801
+ left, right = *ast_node
802
+
803
+ return on_call_number(context, left) < on_call_number(context, right)
804
+ end
805
+
806
+ ##
807
+ # Processes the `>` operator.
808
+ #
809
+ # This operator converts the left and right expression to a number and
810
+ # returns `true` if the first number is greater than the second number.
811
+ #
812
+ # @param [Oga::XML::Node] ast_node
813
+ # @param [Oga::XML::NodeSet] context
814
+ # @return [TrueClass|FalseClass]
815
+ #
816
+ def on_gt(ast_node, context)
817
+ left, right = *ast_node
818
+
819
+ return on_call_number(context, left) > on_call_number(context, right)
820
+ end
821
+
822
+ ##
823
+ # Processes the `<=` operator.
824
+ #
825
+ # This operator converts the left and right expression to a number and
826
+ # returns `true` if the first number is lower-than or equal to the second
827
+ # number.
828
+ #
829
+ # @param [Oga::XML::Node] ast_node
830
+ # @param [Oga::XML::NodeSet] context
831
+ # @return [TrueClass|FalseClass]
832
+ #
833
+ def on_lte(ast_node, context)
834
+ left, right = *ast_node
835
+
836
+ return on_call_number(context, left) <= on_call_number(context, right)
837
+ end
838
+
839
+ ##
840
+ # Processes the `>=` operator.
841
+ #
842
+ # This operator converts the left and right expression to a number and
843
+ # returns `true` if the first number is greater-than or equal to the
844
+ # second number.
845
+ #
846
+ # @param [Oga::XML::Node] ast_node
847
+ # @param [Oga::XML::NodeSet] context
848
+ # @return [TrueClass|FalseClass]
849
+ #
850
+ def on_gte(ast_node, context)
851
+ left, right = *ast_node
852
+
853
+ return on_call_number(context, left) >= on_call_number(context, right)
854
+ end
855
+
856
+ ##
857
+ # Delegates function calls to specific handlers.
858
+ #
859
+ # Handler functions take two arguments:
860
+ #
861
+ # 1. The context node set
862
+ # 2. A variable list of XPath function arguments, passed as individual
863
+ # Ruby method arguments.
864
+ #
865
+ # @param [Oga::XPath::Node] ast_node
866
+ # @param [Oga::XML::NodeSet] context
867
+ # @return [Oga::XML::NodeSet]
868
+ #
869
+ def on_call(ast_node, context)
870
+ name, *args = *ast_node
871
+
872
+ handler = name.gsub('-', '_')
873
+
874
+ return send("on_call_#{handler}", context, *args)
875
+ end
876
+
877
+ ##
878
+ # Processes the `last()` function call. This function call returns the
879
+ # index of the last node in the current set.
880
+ #
881
+ # @param [Oga::XML::NodeSet] context
882
+ # @return [Float]
883
+ #
884
+ def on_call_last(context)
885
+ # XPath uses indexes 1 to N instead of 0 to N.
886
+ return current_node_set.length.to_f
887
+ end
888
+
889
+ ##
890
+ # Processes the `position()` function call. This function returns the
891
+ # position of the current node in the current node set.
892
+ #
893
+ # @param [Oga::XML::NodeSet] context
894
+ # @return [Float]
895
+ #
896
+ def on_call_position(context)
897
+ index = current_node_set.index(context.first) + 1
898
+
899
+ return index.to_f
900
+ end
901
+
902
+ ##
903
+ # Processes the `count()` function call. This function counts the amount
904
+ # of nodes in `expression` and returns the result as a float.
905
+ #
906
+ # @param [Oga::XML::NodeSet] context
907
+ # @param [Oga::XPath::Node] expression
908
+ # @return [Float]
909
+ #
910
+ def on_call_count(context, expression)
911
+ retval = process(expression, context)
912
+
913
+ unless retval.is_a?(XML::NodeSet)
914
+ raise TypeError, 'count() can only operate on NodeSet instances'
915
+ end
916
+
917
+ return retval.length.to_f
918
+ end
919
+
920
+ ##
921
+ # Processes the `id()` function call.
922
+ #
923
+ # The XPath specification states that this function's behaviour should be
924
+ # controlled by a DTD. If a DTD were to specify that the ID attribute for
925
+ # a certain element would be "foo" then this function should use said
926
+ # attribute.
927
+ #
928
+ # Oga does not support DTD parsing/evaluation and as such always uses the
929
+ # "id" attribute.
930
+ #
931
+ # This function searches the entire document for a matching node,
932
+ # regardless of the current position.
933
+ #
934
+ # @param [Oga::XML::NodeSet] context
935
+ # @param [Oga::XPath::Node] expression
936
+ # @return [Oga::XML::NodeSet]
937
+ #
938
+ def on_call_id(context, expression)
939
+ id = process(expression, context)
940
+ nodes = XML::NodeSet.new
941
+
942
+ # Based on Nokogiri's/libxml behaviour it appears that when using a node
943
+ # set the text of the set is used as the ID.
944
+ id = id.is_a?(XML::NodeSet) ? id.text : id.to_s
945
+ ids = id.split(' ')
946
+
947
+ @document.each_node do |node|
948
+ next unless node.is_a?(XML::Element)
949
+
950
+ attr = node.attribute('id')
951
+
952
+ if attr and ids.include?(attr.value)
953
+ nodes << node
954
+ end
955
+ end
956
+
957
+ return nodes
958
+ end
959
+
960
+ ##
961
+ # Processes the `local-name()` function call.
962
+ #
963
+ # This function call returns the name of one of the following:
964
+ #
965
+ # * The current context node (if any)
966
+ # * The first node in the supplied node set
967
+ #
968
+ # @param [Oga::XML::NodeSet] context
969
+ # @param [Oga::XPath::Node] expression
970
+ # @return [Oga::XML::NodeSet]
971
+ #
972
+ def on_call_local_name(context, expression = nil)
973
+ node = function_node(context, expression)
974
+
975
+ return node.respond_to?(:name) ? node.name : ''
976
+ end
977
+
978
+ ##
979
+ # Processes the `name()` function call.
980
+ #
981
+ # This function call is similar to `local-name()` (see
982
+ # {#on_call_local_name}) except that it includes the namespace name if
983
+ # present.
984
+ #
985
+ # @param [Oga::XML::NodeSet] context
986
+ # @param [Oga::XPath::Node] expression
987
+ # @return [Oga::XML::NodeSet]
988
+ #
989
+ def on_call_name(context, expression = nil)
990
+ node = function_node(context, expression)
991
+
992
+ if node.respond_to?(:name) and node.respond_to?(:namespace)
993
+ if node.namespace
994
+ return "#{node.namespace.name}:#{node.name}"
995
+ else
996
+ return node.name
997
+ end
998
+ else
999
+ return ''
1000
+ end
1001
+ end
1002
+
1003
+ ##
1004
+ # Processes the `namespace-uri()` function call.
1005
+ #
1006
+ # This function call returns the namespace URI of one of the following:
1007
+ #
1008
+ # * The current context node (if any)
1009
+ # * The first node in the supplied node set
1010
+ #
1011
+ # @param [Oga::XML::NodeSet] context
1012
+ # @param [Oga::XPath::Node] expression
1013
+ # @return [Oga::XML::NodeSet]
1014
+ #
1015
+ def on_call_namespace_uri(context, expression = nil)
1016
+ node = function_node(context, expression)
1017
+
1018
+ if node.respond_to?(:namespace) and node.namespace
1019
+ return node.namespace.uri
1020
+ else
1021
+ return ''
1022
+ end
1023
+ end
1024
+
1025
+ ##
1026
+ # Evaluates the `string()` function call.
1027
+ #
1028
+ # This function call converts the given argument *or* the current context
1029
+ # node to a string. If a node set is given then only the first node is
1030
+ # converted to a string.
1031
+ #
1032
+ # @example
1033
+ # string(10) # => "10"
1034
+ #
1035
+ # @param [Oga::XML::NodeSet] context
1036
+ # @param [Oga::XPath::Node] expression
1037
+ # @return [String]
1038
+ #
1039
+ def on_call_string(context, expression = nil)
1040
+ if expression
1041
+ convert = process(expression, context)
1042
+
1043
+ if convert.is_a?(XML::NodeSet)
1044
+ convert = convert[0]
1045
+ end
1046
+ else
1047
+ convert = context.first
1048
+ end
1049
+
1050
+ if convert.respond_to?(:text)
1051
+ return convert.text
1052
+ else
1053
+ return to_string(convert)
1054
+ end
1055
+ end
1056
+
1057
+ ##
1058
+ # Evaluates the `number()` function call.
1059
+ #
1060
+ # This function call converts its first argument *or* the current context
1061
+ # node to a number, similar to the `string()` function.
1062
+ #
1063
+ # @example
1064
+ # number("10") # => 10.0
1065
+ #
1066
+ # @see [#on_call_string]
1067
+ # @param [Oga::XML::NodeSet] context
1068
+ # @param [Oga::XPath::Node] expression
1069
+ # @return [Float]
1070
+ #
1071
+ def on_call_number(context, expression = nil)
1072
+ convert = nil
1073
+
1074
+ if expression
1075
+ exp_retval = process(expression, context)
1076
+
1077
+ if exp_retval.is_a?(XML::NodeSet)
1078
+ convert = first_node_text(exp_retval)
1079
+
1080
+ elsif exp_retval == true
1081
+ convert = 1.0
1082
+
1083
+ elsif exp_retval == false
1084
+ convert = 0.0
1085
+
1086
+ elsif exp_retval
1087
+ convert = exp_retval
1088
+ end
1089
+ else
1090
+ convert = context.first.text
1091
+ end
1092
+
1093
+ return to_float(convert)
1094
+ end
1095
+
1096
+ ##
1097
+ # Processes the `concat()` function call.
1098
+ #
1099
+ # This function call converts its arguments to strings and concatenates
1100
+ # them. In case of node sets the text of the set is used.
1101
+ #
1102
+ # @param [Oga::XML::NodeSet] context
1103
+ # @param [Oga::XPath::Node] first
1104
+ # @param [Oga::XPath::Node] second
1105
+ # @param [Array<Oga::XPath::Node>] rest
1106
+ #
1107
+ def on_call_concat(context, first, second, *rest)
1108
+ args = [first, second] + rest
1109
+ retval = ''
1110
+
1111
+ args.each do |arg|
1112
+ retval << on_call_string(context, arg)
1113
+ end
1114
+
1115
+ return retval
1116
+ end
1117
+
1118
+ ##
1119
+ # Processes the `starts-with()` function call.
1120
+ #
1121
+ # This function call returns `true` if the string in the 1st argument
1122
+ # starts with the string in the 2nd argument. Node sets can also be used.
1123
+ #
1124
+ # @example
1125
+ # starts-with("hello world", "hello") # => true
1126
+ #
1127
+ # @param [Oga::XML::NodeSet] context
1128
+ # @param [Oga::XPath::Node] haystack The string to search.
1129
+ # @param [Oga::XPath::Node] needle The string to search for.
1130
+ # @return [TrueClass|FalseClass]
1131
+ #
1132
+ def on_call_starts_with(context, haystack, needle)
1133
+ haystack_str = on_call_string(context, haystack)
1134
+ needle_str = on_call_string(context, needle)
1135
+
1136
+ # https://github.com/jruby/jruby/issues/1923
1137
+ return needle_str.empty? || haystack_str.start_with?(needle_str)
1138
+ end
1139
+
1140
+ ##
1141
+ # Processes the `contains()` function call.
1142
+ #
1143
+ # This function call returns `true` if the string in the 1st argument
1144
+ # contains the string in the 2nd argument. Node sets can also be used.
1145
+ #
1146
+ # @example
1147
+ # contains("hello world", "o w") # => true
1148
+ #
1149
+ # @param [Oga::XML::NodeSet] context
1150
+ # @param [Oga::XPath::Node] haystack The string to search.
1151
+ # @param [Oga::XPath::Node] needle The string to search for.
1152
+ # @return [String]
1153
+ #
1154
+ def on_call_contains(context, haystack, needle)
1155
+ haystack_str = on_call_string(context, haystack)
1156
+ needle_str = on_call_string(context, needle)
1157
+
1158
+ return haystack_str.include?(needle_str)
1159
+ end
1160
+
1161
+ ##
1162
+ # Processes the `substring-before()` function call.
1163
+ #
1164
+ # This function call returns the substring of the 1st argument that occurs
1165
+ # before the string given in the 2nd argument. For example:
1166
+ #
1167
+ # substring-before("2014-08-25", "-")
1168
+ #
1169
+ # This would return "2014" as it occurs before the first "-".
1170
+ #
1171
+ # @param [Oga::XML::NodeSet] context
1172
+ # @param [Oga::XPath::Node] haystack The string to search.
1173
+ # @param [Oga::XPath::Node] needle The string to search for.
1174
+ # @return [String]
1175
+ #
1176
+ def on_call_substring_before(context, haystack, needle)
1177
+ haystack_str = on_call_string(context, haystack)
1178
+ needle_str = on_call_string(context, needle)
1179
+
1180
+ before, sep, after = haystack_str.partition(needle_str)
1181
+
1182
+ return sep.empty? ? sep : before
1183
+ end
1184
+
1185
+ ##
1186
+ # Processes the `substring-after()` function call.
1187
+ #
1188
+ # This function call returns the substring of the 1st argument that occurs
1189
+ # after the string given in the 2nd argument. For example:
1190
+ #
1191
+ # substring-before("2014-08-25", "-")
1192
+ #
1193
+ # This would return "08-25" as it occurs after the first "-".
1194
+ #
1195
+ # @param [Oga::XML::NodeSet] context
1196
+ # @param [Oga::XPath::Node] haystack The string to search.
1197
+ # @param [Oga::XPath::Node] needle The string to search for.
1198
+ # @return [String]
1199
+ #
1200
+ def on_call_substring_after(context, haystack, needle)
1201
+ haystack_str = on_call_string(context, haystack)
1202
+ needle_str = on_call_string(context, needle)
1203
+
1204
+ before, sep, after = haystack_str.partition(needle_str)
1205
+
1206
+ return sep.empty? ? sep : after
1207
+ end
1208
+
1209
+ ##
1210
+ # Processes the `substring()` function call.
1211
+ #
1212
+ # This function call returns the substring of the 1st argument, starting
1213
+ # at the position given in the 2nd argument. If the third argument is
1214
+ # given it is used as the length for the substring, otherwise the string
1215
+ # is consumed until the end.
1216
+ #
1217
+ # XPath string indexes start from position 1, not position 0.
1218
+ #
1219
+ # @example Using a literal string
1220
+ # substring("foo", 2) # => "oo"
1221
+ #
1222
+ # @example Using a literal string with a custom length
1223
+ # substring("foo", 1, 2) # => "fo"
1224
+ #
1225
+ # @example Using a node set
1226
+ # substring(users/user/username, 5)
1227
+ #
1228
+ # @param [Oga::XML::NodeSet] context
1229
+ # @param [Oga::XPath::Node] haystack
1230
+ # @param [Oga::XPath::Node] start
1231
+ # @param [Oga::XPath::Node] length
1232
+ # @return [String]
1233
+ #
1234
+ def on_call_substring(context, haystack, start, length = nil)
1235
+ haystack_str = on_call_string(context, haystack)
1236
+ start_index = on_call_number(context, start).to_i - 1
1237
+
1238
+ if length
1239
+ length_int = on_call_number(context, length).to_i - 1
1240
+ stop_index = start_index + length_int
1241
+ else
1242
+ stop_index = -1
1243
+ end
1244
+
1245
+ return haystack_str[start_index..stop_index]
1246
+ end
1247
+
1248
+ ##
1249
+ # Processes the `string-length()` function.
1250
+ #
1251
+ # This function returns the length of the string given in the 1st argument
1252
+ # *or* the current context node. If the expression is not a string it's
1253
+ # converted to a string using the `string()` function.
1254
+ #
1255
+ # @see [#on_call_string]
1256
+ # @param [Oga::XML::NodeSet] context
1257
+ # @param [Oga::XPath::Node] expression
1258
+ # @return [Float]
1259
+ #
1260
+ def on_call_string_length(context, expression = nil)
1261
+ return on_call_string(context, expression).length.to_f
1262
+ end
1263
+
1264
+ ##
1265
+ # Processes the `normalize-space()` function call.
1266
+ #
1267
+ # This function strips the 1st argument string *or* the current context
1268
+ # node of leading/trailing whitespace as well as replacing multiple
1269
+ # whitespace sequences with single spaces.
1270
+ #
1271
+ # @example
1272
+ # normalize-space(" fo o ") # => "fo o"
1273
+ #
1274
+ # @param [Oga::XML::NodeSet] context
1275
+ # @param [Oga::XPath::Node] expression
1276
+ # @return [String]
1277
+ #
1278
+ def on_call_normalize_space(context, expression = nil)
1279
+ str = on_call_string(context, expression)
1280
+
1281
+ return str.strip.gsub(/\s+/, ' ')
1282
+ end
1283
+
1284
+ ##
1285
+ # Processes the `translate()` function call.
1286
+ #
1287
+ # This function takes the string of the 1st argument and replaces all
1288
+ # characters of the 2nd argument with those specified in the 3rd argument.
1289
+ #
1290
+ # @example
1291
+ # translate("bar", "abc", "ABC") # => "BAr"
1292
+ #
1293
+ # @param [Oga::XML::NodeSet] context
1294
+ # @param [Oga::XPath::Node] input
1295
+ # @param [Oga::XPath::Node] find
1296
+ # @param [Oga::XPath::Node] replace
1297
+ # @return [String]
1298
+ #
1299
+ def on_call_translate(context, input, find, replace)
1300
+ input_str = on_call_string(context, input)
1301
+ find_chars = on_call_string(context, find).chars.to_a
1302
+ replace_chars = on_call_string(context, replace).chars.to_a
1303
+ replaced = input_str
1304
+
1305
+ find_chars.each_with_index do |char, index|
1306
+ replace_with = replace_chars[index] ? replace_chars[index] : ''
1307
+ replaced = replaced.gsub(char, replace_with)
1308
+ end
1309
+
1310
+ return replaced
1311
+ end
1312
+
1313
+ ##
1314
+ # Processes the `boolean()` function call.
1315
+ #
1316
+ # This function converts the 1st argument to a boolean.
1317
+ #
1318
+ # The boolean `true` is returned for the following:
1319
+ #
1320
+ # * A non empty string
1321
+ # * A non empty node set
1322
+ # * A non zero number, either positive or negative
1323
+ #
1324
+ # The boolean `false` is returned for all other cases.
1325
+ #
1326
+ # @param [Oga::XML::NodeSet] context
1327
+ # @param [Oga::XPath::Node] expression
1328
+ # @return [TrueClass|FalseClass]
1329
+ #
1330
+ def on_call_boolean(context, expression)
1331
+ retval = process(expression, context)
1332
+ bool = false
1333
+
1334
+ if retval.is_a?(Numeric)
1335
+ bool = !retval.nan? && !retval.zero?
1336
+ elsif retval
1337
+ bool = !retval.respond_to?(:empty?) || !retval.empty?
1338
+ end
1339
+
1340
+ return bool
1341
+ end
1342
+
1343
+ ##
1344
+ # Processes the `not()` function call.
1345
+ #
1346
+ # This function converts the 1st argument to a boolean and returns the
1347
+ # opposite boolean value. For example, if the first argument results in
1348
+ # `true` then this function returns `false` instead.
1349
+ #
1350
+ # @param [Oga::XML::NodeSet] context
1351
+ # @param [Oga::XPath::Node] expression
1352
+ # @return [TrueClass|FalseClass]
1353
+ #
1354
+ def on_call_not(context, expression)
1355
+ return !on_call_boolean(context, expression)
1356
+ end
1357
+
1358
+ ##
1359
+ # Processes the `true()` function call.
1360
+ #
1361
+ # This function simply returns the boolean `true`.
1362
+ #
1363
+ # @param [Oga::XPath::NodeSet] context
1364
+ # @return [TrueClass]
1365
+ #
1366
+ def on_call_true(context)
1367
+ return true
1368
+ end
1369
+
1370
+ ##
1371
+ # Processes the `false()` function call.
1372
+ #
1373
+ # This function simply returns the boolean `false`.
1374
+ #
1375
+ # @param [Oga::XPath::NodeSet] context
1376
+ # @return [FalseClass]
1377
+ #
1378
+ def on_call_false(context)
1379
+ return false
1380
+ end
1381
+
1382
+ ##
1383
+ # Processes the `lang()` function call.
1384
+ #
1385
+ # This function returns `true` if the current context node is in the given
1386
+ # language, `false` otherwise.
1387
+ #
1388
+ # The language is based on the value of the "xml:lang" attribute of either
1389
+ # the context node or an ancestor node (in case the context node has no
1390
+ # such attribute).
1391
+ #
1392
+ # @param [Oga::XML::NodeSet] context
1393
+ # @param [Oga::XPath::Node] language
1394
+ # @return [TrueClass|FalseClass]
1395
+ #
1396
+ def on_call_lang(context, language)
1397
+ lang_str = on_call_string(context, language)
1398
+ node = context.first
1399
+
1400
+ while node.respond_to?(:attribute)
1401
+ found = node.attribute('xml:lang')
1402
+
1403
+ return found.value == lang_str if found
1404
+
1405
+ node = node.parent
1406
+ end
1407
+
1408
+ return false
1409
+ end
1410
+
1411
+ ##
1412
+ # Processes the `sum()` function call.
1413
+ #
1414
+ # This function call takes a node set, converts each node to a number and
1415
+ # then sums the values.
1416
+ #
1417
+ # As an example, take the following XML:
1418
+ #
1419
+ # <root>
1420
+ # <a>1</a>
1421
+ # <b>2</b>
1422
+ # </root>
1423
+ #
1424
+ # Using the expression `sum(root/*)` the return value would be `3.0`.
1425
+ #
1426
+ # @param [Oga::XML::NodeSet] context
1427
+ # @param [Oga::XPath::Node] expression
1428
+ # @return [Float]
1429
+ #
1430
+ def on_call_sum(context, expression)
1431
+ nodes = process(expression, context)
1432
+ sum = 0.0
1433
+
1434
+ unless nodes.is_a?(XML::NodeSet)
1435
+ raise TypeError, 'sum() can only operate on NodeSet instances'
1436
+ end
1437
+
1438
+ nodes.each do |node|
1439
+ sum += node.text.to_f
1440
+ end
1441
+
1442
+ return sum
1443
+ end
1444
+
1445
+ ##
1446
+ # Processes the `floor()` function call.
1447
+ #
1448
+ # This function call rounds the 1st argument down to the closest integer,
1449
+ # and then returns that number as a float.
1450
+ #
1451
+ # @param [Oga::XML::NodeSet] context
1452
+ # @param [Oga::XPath::Node] expression
1453
+ # @return [Float]
1454
+ #
1455
+ def on_call_floor(context, expression)
1456
+ number = on_call_number(context, expression)
1457
+
1458
+ return number.nan? ? number : number.floor.to_f
1459
+ end
1460
+
1461
+ ##
1462
+ # Processes the `ceiling()` function call.
1463
+ #
1464
+ # This function call rounds the 1st argument up to the closest integer,
1465
+ # and then returns that number as a float.
1466
+ #
1467
+ # @param [Oga::XML::NodeSet] context
1468
+ # @param [Oga::XPath::Node] expression
1469
+ # @return [Float]
1470
+ #
1471
+ def on_call_ceiling(context, expression)
1472
+ number = on_call_number(context, expression)
1473
+
1474
+ return number.nan? ? number : number.ceil.to_f
1475
+ end
1476
+
1477
+ ##
1478
+ # Processes the `round()` function call.
1479
+ #
1480
+ # This function call rounds the 1st argument to the closest integer, and
1481
+ # then returns that number as a float.
1482
+ #
1483
+ # @param [Oga::XML::NodeSet] context
1484
+ # @param [Oga::XPath::Node] expression
1485
+ # @return [Float]
1486
+ #
1487
+ def on_call_round(context, expression)
1488
+ number = on_call_number(context, expression)
1489
+
1490
+ return number.nan? ? number : number.round.to_f
1491
+ end
1492
+
1493
+ ##
1494
+ # Processes an `(int)` node.
1495
+ #
1496
+ # @param [Oga::XPath::Node] ast_node
1497
+ # @param [Oga::XML::NodeSet] context
1498
+ # @return [Float]
1499
+ #
1500
+ def on_int(ast_node, context)
1501
+ return ast_node.children[0].to_f
1502
+ end
1503
+
1504
+ ##
1505
+ # Processes an `(float)` node.
1506
+ #
1507
+ # @param [Oga::XPath::Node] ast_node
1508
+ # @param [Oga::XML::NodeSet] context
1509
+ # @return [Float]
1510
+ #
1511
+ def on_float(ast_node, context)
1512
+ return ast_node.children[0]
1513
+ end
1514
+
1515
+ ##
1516
+ # Processes a `(string)` node.
1517
+ #
1518
+ # @param [Oga::XPath::Node] ast_node
1519
+ # @param [Oga::XML::NodeSet] context
1520
+ # @return [String]
1521
+ #
1522
+ def on_string(ast_node, context)
1523
+ return ast_node.children[0]
1524
+ end
1525
+
1526
+ ##
1527
+ # Processes a variable reference. If the variable is not defined an error
1528
+ # is raised.
1529
+ #
1530
+ # @param [Oga::XPath::Node] ast_node
1531
+ # @param [Oga::XML::NodeSet] context
1532
+ # @return [Mixed]
1533
+ # @raise [RuntimeError]
1534
+ #
1535
+ def on_var(ast_node, context)
1536
+ name = ast_node.children[0]
1537
+
1538
+ if @variables.key?(name)
1539
+ return @variables[name]
1540
+ else
1541
+ raise "Undefined XPath variable: #{name}"
1542
+ end
1543
+ end
1544
+
1545
+ ##
1546
+ # Returns the node for a function call. This node is either the first node
1547
+ # in the supplied node set, or the first node in the current context.
1548
+ #
1549
+ # @param [Oga::XML::NodeSet] context
1550
+ # @param [Oga::XPath::Node] expression
1551
+ # @return [Oga::XML::Node]
1552
+ #
1553
+ def function_node(context, expression = nil)
1554
+ if expression
1555
+ node = process(expression, context)
1556
+
1557
+ if node.is_a?(XML::NodeSet)
1558
+ node = node.first
1559
+ else
1560
+ raise TypeError, 'only node sets can be used as arguments'
1561
+ end
1562
+ else
1563
+ node = context.first
1564
+ end
1565
+
1566
+ return node
1567
+ end
1568
+
1569
+ ##
1570
+ # Returns the text of the first node in the node set, or an empty string
1571
+ # if the node set is empty.
1572
+ #
1573
+ # @param [Oga::XML::NodeSet] set
1574
+ # @return [String]
1575
+ #
1576
+ def first_node_text(set)
1577
+ return set[0].respond_to?(:text) ? set[0].text : ''
1578
+ end
1579
+
1580
+ ##
1581
+ # Returns a node set containing all the child nodes of the given set of
1582
+ # nodes.
1583
+ #
1584
+ # @param [Oga::XML::NodeSet] nodes
1585
+ # @return [Oga::XML::NodeSet]
1586
+ #
1587
+ def child_nodes(nodes)
1588
+ children = XML::NodeSet.new
1589
+
1590
+ nodes.each do |xml_node|
1591
+ children.concat(xml_node.children)
1592
+ end
1593
+
1594
+ return children
1595
+ end
1596
+
1597
+ ##
1598
+ # Checks if a given {Oga::XML::Node} instance matches a {Oga::XPath::Node}
1599
+ # instance.
1600
+ #
1601
+ # This method can use both "test" and "type-test" nodes. In case of
1602
+ # "type-test" nodes the procedure is as following:
1603
+ #
1604
+ # 1. Evaluate the expression
1605
+ # 2. If the return value is non empty return `true`, otherwise return
1606
+ # `false`
1607
+ #
1608
+ # For "test" nodes the procedure is as following instead:
1609
+ #
1610
+ # 1. Match the name
1611
+ # 2. Match the namespace
1612
+ #
1613
+ # For both the name and namespace a wildcard (`*`) can be used.
1614
+ #
1615
+ # @param [Oga::XML::Node] xml_node
1616
+ # @param [Oga::XPath::Node] ast_node
1617
+ # @return [Oga::XML::NodeSet]
1618
+ #
1619
+ def node_matches?(xml_node, ast_node)
1620
+ ns, name = *ast_node
1621
+
1622
+ if ast_node.type == :type_test
1623
+ return type_matches?(xml_node, ast_node)
1624
+ end
1625
+
1626
+ # If only the name is given and is a wildcard then we'll also want to
1627
+ # match the namespace as a wildcard.
1628
+ if !ns and name == '*'
1629
+ ns = '*'
1630
+ end
1631
+
1632
+ name_matches = name_matches?(xml_node, name)
1633
+ ns_matches = false
1634
+
1635
+ if ns
1636
+ ns_matches = namespace_matches?(xml_node, ns)
1637
+
1638
+ elsif name_matches and !xml_node.namespace
1639
+ ns_matches = true
1640
+ end
1641
+
1642
+ return name_matches && ns_matches
1643
+ end
1644
+
1645
+ ##
1646
+ # @param [Oga::XML::Node] xml_node
1647
+ # @param [Oga::XPath::Node] ast_node
1648
+ # @return [TrueClass|FalseClass]
1649
+ #
1650
+ def type_matches?(xml_node, ast_node)
1651
+ context = XML::NodeSet.new([xml_node])
1652
+
1653
+ return process(ast_node, context).length > 0
1654
+ end
1655
+
1656
+ ##
1657
+ # Returns `true` if the name of the XML node matches the given name *or*
1658
+ # matches a wildcard.
1659
+ #
1660
+ # @param [Oga::XML::Node] xml_node
1661
+ # @param [String] name
1662
+ #
1663
+ def name_matches?(xml_node, name)
1664
+ return false unless xml_node.respond_to?(:name)
1665
+
1666
+ return xml_node.name == name || name == '*'
1667
+ end
1668
+
1669
+ ##
1670
+ # Returns `true` if the namespace of the XML node matches the given
1671
+ # namespace *or* matches a wildcard.
1672
+ #
1673
+ # @param [Oga::XML::Node] xml_node
1674
+ # @param [String] ns
1675
+ #
1676
+ def namespace_matches?(xml_node, ns)
1677
+ return false unless xml_node.respond_to?(:namespace)
1678
+
1679
+ return xml_node.namespace.to_s == ns || ns == '*'
1680
+ end
1681
+
1682
+ ##
1683
+ # @param [Oga::XML::Node] ast_node
1684
+ # @return [TrueClass|FalseClass]
1685
+ #
1686
+ def has_parent?(ast_node)
1687
+ return ast_node.respond_to?(:parent) && !!ast_node.parent
1688
+ end
1689
+
1690
+ ##
1691
+ # Converts the given value to a float. If the value can't be converted to
1692
+ # a float NaN is returned instead.
1693
+ #
1694
+ # @param [Mixed] value
1695
+ # @return [Float]
1696
+ #
1697
+ def to_float(value)
1698
+ return Float(value) rescue Float::NAN
1699
+ end
1700
+
1701
+ ##
1702
+ # Converts the given value to a string according to the XPath string
1703
+ # conversion rules.
1704
+ #
1705
+ # @param [Mixed] value
1706
+ # @return [String]
1707
+ #
1708
+ def to_string(value)
1709
+ # If we have a number that has a zero decimal (e.g. 10.0) we want to
1710
+ # get rid of that decimal. For this we'll first convert the number to
1711
+ # an integer.
1712
+ if value.is_a?(Float) and value.modulo(1).zero?
1713
+ value = value.to_i
1714
+ end
1715
+
1716
+ return value.to_s
1717
+ end
1718
+
1719
+ ##
1720
+ # Stores the specified node set and yields the supplied block. The return
1721
+ # value of this method is whatever the block returned.
1722
+ #
1723
+ # @example
1724
+ # retval = with_node_set(context) do
1725
+ # process(....)
1726
+ # end
1727
+ #
1728
+ # @param [Oga::XML::NodeSet] nodes
1729
+ #
1730
+ def with_node_set(nodes)
1731
+ @node_sets << nodes
1732
+
1733
+ retval = yield
1734
+
1735
+ @node_sets.pop
1736
+
1737
+ return retval
1738
+ end
1739
+
1740
+ ##
1741
+ # @return [Oga::XML::NodeSet]
1742
+ #
1743
+ def current_node_set
1744
+ return @node_sets.last
1745
+ end
1746
+ end # Evaluator
1747
+ end # XPath
1748
+ end # Oga