rubyjedi-oga 1.0.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (58) hide show
  1. checksums.yaml +7 -0
  2. data/.yardopts +13 -0
  3. data/LICENSE +362 -0
  4. data/README.md +317 -0
  5. data/doc/css/common.css +77 -0
  6. data/doc/css_selectors.md +935 -0
  7. data/doc/manually_creating_documents.md +67 -0
  8. data/doc/migrating_from_nokogiri.md +169 -0
  9. data/doc/xml_namespaces.md +63 -0
  10. data/ext/c/extconf.rb +11 -0
  11. data/ext/c/lexer.c +2595 -0
  12. data/ext/c/lexer.h +16 -0
  13. data/ext/c/lexer.rl +198 -0
  14. data/ext/c/liboga.c +6 -0
  15. data/ext/c/liboga.h +11 -0
  16. data/ext/java/Liboga.java +14 -0
  17. data/ext/java/org/liboga/xml/Lexer.java +1363 -0
  18. data/ext/java/org/liboga/xml/Lexer.rl +223 -0
  19. data/ext/ragel/base_lexer.rl +633 -0
  20. data/lib/oga.rb +57 -0
  21. data/lib/oga/blacklist.rb +40 -0
  22. data/lib/oga/css/lexer.rb +743 -0
  23. data/lib/oga/css/parser.rb +976 -0
  24. data/lib/oga/entity_decoder.rb +21 -0
  25. data/lib/oga/html/entities.rb +2150 -0
  26. data/lib/oga/html/parser.rb +25 -0
  27. data/lib/oga/html/sax_parser.rb +18 -0
  28. data/lib/oga/lru.rb +160 -0
  29. data/lib/oga/oga.rb +57 -0
  30. data/lib/oga/version.rb +3 -0
  31. data/lib/oga/whitelist.rb +20 -0
  32. data/lib/oga/xml/attribute.rb +136 -0
  33. data/lib/oga/xml/cdata.rb +17 -0
  34. data/lib/oga/xml/character_node.rb +37 -0
  35. data/lib/oga/xml/comment.rb +17 -0
  36. data/lib/oga/xml/default_namespace.rb +13 -0
  37. data/lib/oga/xml/doctype.rb +82 -0
  38. data/lib/oga/xml/document.rb +108 -0
  39. data/lib/oga/xml/element.rb +428 -0
  40. data/lib/oga/xml/entities.rb +122 -0
  41. data/lib/oga/xml/html_void_elements.rb +15 -0
  42. data/lib/oga/xml/lexer.rb +550 -0
  43. data/lib/oga/xml/namespace.rb +48 -0
  44. data/lib/oga/xml/node.rb +219 -0
  45. data/lib/oga/xml/node_set.rb +333 -0
  46. data/lib/oga/xml/parser.rb +631 -0
  47. data/lib/oga/xml/processing_instruction.rb +37 -0
  48. data/lib/oga/xml/pull_parser.rb +175 -0
  49. data/lib/oga/xml/querying.rb +56 -0
  50. data/lib/oga/xml/sax_parser.rb +192 -0
  51. data/lib/oga/xml/text.rb +66 -0
  52. data/lib/oga/xml/traversal.rb +50 -0
  53. data/lib/oga/xml/xml_declaration.rb +65 -0
  54. data/lib/oga/xpath/evaluator.rb +1798 -0
  55. data/lib/oga/xpath/lexer.rb +1958 -0
  56. data/lib/oga/xpath/parser.rb +622 -0
  57. data/oga.gemspec +45 -0
  58. metadata +227 -0
@@ -0,0 +1,66 @@
1
+ module Oga
2
+ module XML
3
+ ##
4
+ # Class containing information about a single text node. Text nodes don't
5
+ # have any children, attributes and the likes; just text.
6
+ #
7
+ class Text < CharacterNode
8
+ def initialize(*args)
9
+ super
10
+
11
+ @decoded = false
12
+ end
13
+
14
+ ##
15
+ # @param [String] value
16
+ #
17
+ def text=(value)
18
+ @decoded = false
19
+ @text = value
20
+ end
21
+
22
+ ##
23
+ # Returns the text as a String. Upon the first call any XML/HTML entities
24
+ # are decoded.
25
+ #
26
+ # @return [String]
27
+ #
28
+ def text
29
+ if decode_entities?
30
+ @text = EntityDecoder.try_decode(@text, html?)
31
+ @decoded = true
32
+ end
33
+
34
+ @text
35
+ end
36
+
37
+ ##
38
+ # @see [Oga::XML::CharacterNode#to_xml]
39
+ #
40
+ def to_xml
41
+ return super if inside_literal_html?
42
+
43
+ Entities.encode(super)
44
+ end
45
+
46
+ private
47
+
48
+ ##
49
+ # @return [TrueClass|FalseClass]
50
+ #
51
+ def decode_entities?
52
+ !@decoded && !inside_literal_html?
53
+ end
54
+
55
+ ##
56
+ # @return [TrueClass|FalseClass]
57
+ #
58
+ def inside_literal_html?
59
+ node = parent
60
+
61
+ node.is_a?(Element) && html? &&
62
+ Lexer::LITERAL_HTML_ELEMENTS.allow?(node.name)
63
+ end
64
+ end # Text
65
+ end # XML
66
+ end # Oga
@@ -0,0 +1,50 @@
1
+ module Oga
2
+ module XML
3
+ ##
4
+ # Module that provides methods to traverse DOM trees.
5
+ #
6
+ module Traversal
7
+ ##
8
+ # Traverses through the node and yields every child node to the supplied
9
+ # block.
10
+ #
11
+ # The block's body can also determine whether or not to traverse child
12
+ # nodes. Preventing a node's children from being traversed can be done by
13
+ # using `throw :skip_children`
14
+ #
15
+ # This method uses a combination of breadth-first and depth-first
16
+ # traversal to traverse the entire XML tree in document order. See
17
+ # http://en.wikipedia.org/wiki/Breadth-first_search for more information.
18
+ #
19
+ # @example
20
+ # document.each_node do |node|
21
+ # p node.class
22
+ # end
23
+ #
24
+ # @example Skipping the children of a certain node
25
+ # document.each_node do |node|
26
+ # if node.is_a?(Oga::XML::Element) and node.name == 'book'
27
+ # throw :skip_children
28
+ # end
29
+ # end
30
+ #
31
+ # @yieldparam [Oga::XML::Node] The current node.
32
+ #
33
+ def each_node
34
+ visit = children.to_a.dup # copy it since we're modifying the array
35
+
36
+ until visit.empty?
37
+ current = visit.shift
38
+
39
+ catch :skip_children do
40
+ yield current
41
+
42
+ current.children.to_a.reverse_each do |child|
43
+ visit.unshift(child)
44
+ end
45
+ end
46
+ end
47
+ end
48
+ end # Traversal
49
+ end # XML
50
+ end # Oga
@@ -0,0 +1,65 @@
1
+ module Oga
2
+ module XML
3
+ ##
4
+ # Class containing information about an XML declaration tag.
5
+ #
6
+ class XmlDeclaration
7
+ # @return [String]
8
+ attr_accessor :version
9
+
10
+ # @return [String]
11
+ attr_accessor :encoding
12
+
13
+ # Whether or not the document is a standalone document.
14
+ # @return [String]
15
+ attr_accessor :standalone
16
+
17
+ ##
18
+ # @param [Hash] options
19
+ #
20
+ # @option options [String] :version
21
+ # @option options [String] :encoding
22
+ # @option options [String] :standalone
23
+ #
24
+ def initialize(options = {})
25
+ @version = options[:version] || '1.0'
26
+ @encoding = options[:encoding] || 'UTF-8'
27
+ @standalone = options[:standalone]
28
+ end
29
+
30
+ ##
31
+ # Converts the declaration tag to XML.
32
+ #
33
+ # @return [String]
34
+ #
35
+ def to_xml
36
+ pairs = []
37
+
38
+ [:version, :encoding, :standalone].each do |getter|
39
+ value = send(getter)
40
+
41
+ pairs << %Q{#{getter}="#{value}"} if value
42
+ end
43
+
44
+ "<?xml #{pairs.join(' ')} ?>"
45
+ end
46
+
47
+ ##
48
+ # @return [String]
49
+ #
50
+ def inspect
51
+ segments = []
52
+
53
+ [:version, :encoding, :standalone].each do |attr|
54
+ value = send(attr)
55
+
56
+ if value and !value.empty?
57
+ segments << "#{attr}: #{value.inspect}"
58
+ end
59
+ end
60
+
61
+ "XmlDeclaration(#{segments.join(' ')})"
62
+ end
63
+ end # XmlDeclaration
64
+ end # XML
65
+ end # Oga
@@ -0,0 +1,1798 @@
1
+ module Oga
2
+ module XPath
3
+ ##
4
+ # The Evaluator class evaluates XPath expressions, either as a String or an
5
+ # AST of `AST::Node` instances.
6
+ #
7
+ # ## Thread Safety
8
+ #
9
+ # This class is not thread-safe, you can not share the same instance between
10
+ # multiple threads. This is due to the use of an internal stack (see below
11
+ # for more information). It is however perfectly fine to use multiple
12
+ # separated instances as this class does not use a thread global state.
13
+ #
14
+ # ## Node Set Stack
15
+ #
16
+ # This class uses an internal stack of XML node sets. This stack is used for
17
+ # functions that require access to the set of nodes a predicate belongs to.
18
+ # An example of such a function is `position()`.
19
+ #
20
+ # An alternative would be to pass the node sets a predicate belongs to as an
21
+ # extra argument to the various `on_*` methods. The problematic part of
22
+ # this approach is that it requires every method to take and pass along the
23
+ # argument. It's far too easy to make mistakes in such a setup and as such
24
+ # I've chosen to use an internal stack instead.
25
+ #
26
+ # See {#with_node_set} and {#current_node_set} for more information.
27
+ #
28
+ # ## Set Indices
29
+ #
30
+ # XPath node sets start at index 1 instead of index 0. In other words, if
31
+ # you want to access the first node in a set you have to use index 1, not 0.
32
+ # Certain methods such as {#on_call_last} and {#on_call_position} take care
33
+ # of converting indices from Ruby to XPath.
34
+ #
35
+ # ## Number Types
36
+ #
37
+ # The XPath specification states that all numbers produced by an expression
38
+ # should be returned as double-precision 64bit IEEE 754 floating point
39
+ # numbers. For example, the return value of `position()` should be a float
40
+ # (e.g. "1.0", not "1").
41
+ #
42
+ # Oga takes care internally of converting numbers to integers and/or floats
43
+ # where needed. The output types however will always be floats.
44
+ #
45
+ # For more information on the specification, see
46
+ # <http://www.w3.org/TR/xpath/#numbers>.
47
+ #
48
+ # ## Variables
49
+ #
50
+ # The evaluator supports the binding of custom variables in the
51
+ # {#initialize} method. Variables can be bound by passing in a Hash with the
52
+ # keys set to the variable names (minus the `$` sign) and their values to
53
+ # the variable values. The keys of the variables Hash *must* be Strings.
54
+ #
55
+ # A basic example:
56
+ #
57
+ # evaluator = Evaluator.new(document, 'number' => 10)
58
+ #
59
+ # evaluator.evaluate('$number') # => 10
60
+ #
61
+ # @api private
62
+ #
63
+ class Evaluator
64
+ # Wildcard for node names/namespace prefixes.
65
+ STAR = '*'
66
+
67
+ ##
68
+ # @param [Oga::XML::Document|Oga::XML::Node] document
69
+ # @param [Hash] variables Hash containing variables to expose to the XPath
70
+ # expressions.
71
+ #
72
+ def initialize(document, variables = {})
73
+ @document = document
74
+ @variables = variables
75
+ @node_sets = []
76
+ end
77
+
78
+ ##
79
+ # Evaluates an XPath expression as a String.
80
+ #
81
+ # @example
82
+ # evaluator = Oga::XPath::Evaluator.new(document)
83
+ #
84
+ # evaluator.evaluate('//a')
85
+ #
86
+ # @param [String] string An XPath expression as a String.
87
+ # @return [Mixed]
88
+ #
89
+ def evaluate(string)
90
+ ast = Parser.parse_with_cache(string)
91
+
92
+ evaluate_ast(ast)
93
+ end
94
+
95
+ ##
96
+ # Evaluates a pre-parsed XPath expression.
97
+ #
98
+ # @param [AST::Node] ast
99
+ # @return [Mixed]
100
+ #
101
+ def evaluate_ast(ast)
102
+ context = XML::NodeSet.new([@document])
103
+
104
+ process(ast, context)
105
+ end
106
+
107
+ ##
108
+ # Processes an XPath node by dispatching it and the given context to a
109
+ # dedicated handler method. Handler methods are called "on_X" where "X" is
110
+ # the node type.
111
+ #
112
+ # @param [AST::Node] ast_node The XPath AST node to process.
113
+ #
114
+ # @param [Oga::XML::NodeSet] context The context (a set of nodes) to
115
+ # evaluate an expression in.
116
+ #
117
+ # @return [Oga::XML::NodeSet]
118
+ #
119
+ def process(ast_node, context)
120
+ handler = "on_#{ast_node.type}"
121
+
122
+ send(handler, ast_node, context)
123
+ end
124
+
125
+ ##
126
+ # Processes an absolute XPath expression such as `/foo`.
127
+ #
128
+ # @param [AST::Node] ast_node
129
+ # @param [Oga::XML::NodeSet] context
130
+ # @return [Oga::XML::NodeSet]
131
+ #
132
+ def on_absolute_path(ast_node, context)
133
+ if @document.respond_to?(:root_node)
134
+ context = XML::NodeSet.new([@document.root_node])
135
+ else
136
+ context = XML::NodeSet.new([@document])
137
+ end
138
+
139
+ # If the expression is just "/" we'll just return the current context.
140
+ ast_node.children.empty? ? context : on_path(ast_node, context)
141
+ end
142
+
143
+ ##
144
+ # Processes a relative XPath expression such as `foo`.
145
+ #
146
+ # Paths are evaluated using a "short-circuit" mechanism similar to Ruby's
147
+ # `&&` / `and` operator. Whenever a path results in an empty node set the
148
+ # evaluation is aborted immediately.
149
+ #
150
+ # @param [AST::Node] ast_node
151
+ # @param [Oga::XML::NodeSet] context
152
+ # @return [Oga::XML::NodeSet]
153
+ #
154
+ def on_path(ast_node, context)
155
+ nodes = XML::NodeSet.new
156
+
157
+ ast_node.children.each do |test|
158
+ nodes = process(test, context)
159
+
160
+ if nodes.empty?
161
+ break
162
+ else
163
+ context = nodes
164
+ end
165
+ end
166
+
167
+ nodes
168
+ end
169
+
170
+ ##
171
+ # Processes a node test.
172
+ #
173
+ # @param [AST::Node] ast_node
174
+ # @param [Oga::XML::NodeSet] context
175
+ # @return [Oga::XML::NodeSet]
176
+ #
177
+ def on_test(ast_node, context)
178
+ nodes = XML::NodeSet.new
179
+
180
+ context.each do |xml_node|
181
+ nodes << xml_node if node_matches?(xml_node, ast_node)
182
+ end
183
+
184
+ nodes
185
+ end
186
+
187
+ ##
188
+ # Processes a predicate.
189
+ #
190
+ # @param [AST::Node] ast_node
191
+ # @param [Oga::XML::NodeSet] context
192
+ # @return [Oga::XML::NodeSet]
193
+ #
194
+ def on_predicate(ast_node, context)
195
+ test, predicate = *ast_node.children
196
+ final_nodes = XML::NodeSet.new
197
+
198
+ context.each do |context_node|
199
+ initial_nodes = process(test, XML::NodeSet.new([context_node]))
200
+ xpath_index = 1
201
+
202
+ initial_nodes.each do |xml_node|
203
+ retval = with_node_set(initial_nodes) do
204
+ process(predicate, XML::NodeSet.new([xml_node]))
205
+ end
206
+
207
+ # Numeric values are used as node set indexes.
208
+ if retval.is_a?(Numeric)
209
+ final_nodes << xml_node if retval.to_i == xpath_index
210
+
211
+ # Node sets, strings, booleans, etc
212
+ elsif retval
213
+ if retval.respond_to?(:empty?) and retval.empty?
214
+ next
215
+ end
216
+
217
+ final_nodes << xml_node
218
+ end
219
+
220
+ xpath_index += 1
221
+ end
222
+ end
223
+
224
+ final_nodes
225
+ end
226
+
227
+ ##
228
+ # Dispatches the processing of axes to dedicated methods. This works
229
+ # similar to {#process} except the handler names are "on_axis_X" with "X"
230
+ # being the axis name.
231
+ #
232
+ # @param [AST::Node] ast_node
233
+ # @param [Oga::XML::NodeSet] context
234
+ # @return [Oga::XML::NodeSet]
235
+ #
236
+ def on_axis(ast_node, context)
237
+ name, test = *ast_node.children
238
+
239
+ handler = name.gsub('-', '_')
240
+
241
+ send("on_axis_#{handler}", test, context)
242
+ end
243
+
244
+ ##
245
+ # Processes the `ancestor` axis. This axis walks through the entire
246
+ # ancestor chain until a matching node is found.
247
+ #
248
+ # Evaluation happens using a "short-circuit" mechanism. The moment a
249
+ # matching node is found it is returned immediately.
250
+ #
251
+ # @param [AST::Node] ast_node
252
+ # @param [Oga::XML::NodeSet] context
253
+ # @return [Oga::XML::NodeSet]
254
+ #
255
+ def on_axis_ancestor(ast_node, context)
256
+ nodes = XML::NodeSet.new
257
+
258
+ context.each do |xml_node|
259
+ while has_parent?(xml_node)
260
+ xml_node = xml_node.parent
261
+
262
+ if node_matches?(xml_node, ast_node)
263
+ nodes << xml_node
264
+ break
265
+ end
266
+ end
267
+ end
268
+
269
+ nodes
270
+ end
271
+
272
+ ##
273
+ # Processes the `ancestor-or-self` axis.
274
+ #
275
+ # @see [#on_axis_ancestor]
276
+ #
277
+ def on_axis_ancestor_or_self(ast_node, context)
278
+ nodes = XML::NodeSet.new
279
+
280
+ context.each do |xml_node|
281
+ while has_parent?(xml_node)
282
+ if node_matches?(xml_node, ast_node)
283
+ nodes << xml_node
284
+ break
285
+ end
286
+
287
+ xml_node = xml_node.parent
288
+ end
289
+ end
290
+
291
+ nodes
292
+ end
293
+
294
+ ##
295
+ # Processes the `attribute` axis. The node test is performed against all
296
+ # the attributes of the nodes in the current context.
297
+ #
298
+ # Evaluation of the nodes continues until the node set has been exhausted
299
+ # (unlike some other methods which return the moment they find a matching
300
+ # node).
301
+ #
302
+ # @param [AST::Node] ast_node
303
+ # @param [Oga::XML::NodeSet] context
304
+ # @return [Oga::XML::NodeSet]
305
+ #
306
+ def on_axis_attribute(ast_node, context)
307
+ nodes = XML::NodeSet.new
308
+
309
+ context.each do |xml_node|
310
+ next unless xml_node.is_a?(XML::Element)
311
+
312
+ nodes += on_test(ast_node, xml_node.attributes)
313
+ end
314
+
315
+ nodes
316
+ end
317
+
318
+ ##
319
+ # Evaluates the `child` axis. This axis simply takes all the child nodes
320
+ # of the current context nodes.
321
+ #
322
+ # @param [AST::Node] ast_node
323
+ # @param [Oga::XML::NodeSet] context
324
+ # @return [Oga::XML::NodeSet]
325
+ #
326
+ def on_axis_child(ast_node, context)
327
+ process(ast_node, child_nodes(context))
328
+ end
329
+
330
+ ##
331
+ # Evaluates the `descendant` axis. This method processes child nodes until
332
+ # the very end of the tree, no "short-circuiting" mechanism is used.
333
+ #
334
+ # @param [AST::Node] ast_node
335
+ # @param [Oga::XML::NodeSet] context
336
+ # @return [Oga::XML::NodeSet]
337
+ #
338
+ def on_axis_descendant(ast_node, context)
339
+ nodes = XML::NodeSet.new
340
+
341
+ context.each do |context_node|
342
+ context_node.each_node do |node|
343
+ nodes.concat(process(ast_node, XML::NodeSet.new([node])))
344
+ end
345
+ end
346
+
347
+ nodes
348
+ end
349
+
350
+ ##
351
+ # Evaluates the `descendant-or-self` axis.
352
+ #
353
+ # @param [AST::Node] ast_node
354
+ # @param [Oga::XML::NodeSet] context
355
+ # @return [Oga::XML::NodeSet]
356
+ #
357
+ def on_axis_descendant_or_self(ast_node, context)
358
+ nodes = on_test(ast_node, context)
359
+
360
+ nodes.concat(on_axis_descendant(ast_node, context))
361
+
362
+ nodes
363
+ end
364
+
365
+ ##
366
+ # Evaluates the `following` axis.
367
+ #
368
+ # @param [AST::Node] ast_node
369
+ # @param [Oga::XML::NodeSet] context
370
+ # @return [Oga::XML::NodeSet]
371
+ #
372
+ def on_axis_following(ast_node, context)
373
+ nodes = XML::NodeSet.new
374
+ root = root_node(@document)
375
+
376
+ context.each do |context_node|
377
+ check = false
378
+
379
+ root.each_node do |doc_node|
380
+ # Skip child nodes of the current context node, compare all
381
+ # following nodes.
382
+ if doc_node == context_node
383
+ check = true
384
+ throw :skip_children
385
+ end
386
+
387
+ next unless check
388
+
389
+ nodes << doc_node if node_matches?(doc_node, ast_node)
390
+ end
391
+ end
392
+
393
+ nodes
394
+ end
395
+
396
+ ##
397
+ # Evaluates the `following-sibling` axis.
398
+ #
399
+ # @param [AST::Node] ast_node
400
+ # @param [Oga::XML::NodeSet] context
401
+ # @return [Oga::XML::NodeSet]
402
+ #
403
+ def on_axis_following_sibling(ast_node, context)
404
+ nodes = XML::NodeSet.new
405
+ root = parent_node(@document)
406
+
407
+ context.each do |context_node|
408
+ check = false
409
+ parent = has_parent?(context_node) ? context_node.parent : nil
410
+
411
+ root.each_node do |doc_node|
412
+ # Skip child nodes of the current context node, compare all
413
+ # following nodes.
414
+ if doc_node == context_node
415
+ check = true
416
+ throw :skip_children
417
+ end
418
+
419
+ if !check or parent != doc_node.parent
420
+ next
421
+ end
422
+
423
+ if node_matches?(doc_node, ast_node)
424
+ nodes << doc_node
425
+
426
+ throw :skip_children
427
+ end
428
+ end
429
+ end
430
+
431
+ nodes
432
+ end
433
+
434
+ ##
435
+ # Evaluates the `parent` axis.
436
+ #
437
+ # @param [AST::Node] ast_node
438
+ # @param [Oga::XML::NodeSet] context
439
+ # @return [Oga::XML::NodeSet]
440
+ #
441
+ def on_axis_parent(ast_node, context)
442
+ nodes = XML::NodeSet.new
443
+
444
+ context.each do |context_node|
445
+ next unless has_parent?(context_node)
446
+
447
+ parent = context_node.parent
448
+
449
+ nodes << parent if node_matches?(parent, ast_node)
450
+ end
451
+
452
+ nodes
453
+ end
454
+
455
+ ##
456
+ # Evaluates the `preceding` axis.
457
+ #
458
+ # @param [AST::Node] ast_node
459
+ # @param [Oga::XML::NodeSet] context
460
+ # @return [Oga::XML::NodeSet]
461
+ #
462
+ def on_axis_preceding(ast_node, context)
463
+ nodes = XML::NodeSet.new
464
+ root = root_node(@document)
465
+
466
+ context.each do |context_node|
467
+ check = true
468
+
469
+ root.each_node do |doc_node|
470
+ # Test everything *until* we hit the current context node.
471
+ if doc_node == context_node
472
+ break
473
+ elsif node_matches?(doc_node, ast_node)
474
+ nodes << doc_node
475
+ end
476
+ end
477
+ end
478
+
479
+ nodes
480
+ end
481
+
482
+ ##
483
+ # Evaluates the `preceding-sibling` axis.
484
+ #
485
+ # @param [AST::Node] ast_node
486
+ # @param [Oga::XML::NodeSet] context
487
+ # @return [Oga::XML::NodeSet]
488
+ #
489
+ def on_axis_preceding_sibling(ast_node, context)
490
+ nodes = XML::NodeSet.new
491
+ root = parent_node(@document)
492
+
493
+ context.each do |context_node|
494
+ check = true
495
+ parent = has_parent?(context_node) ? context_node.parent : nil
496
+
497
+ root.each_node do |doc_node|
498
+ # Test everything *until* we hit the current context node.
499
+ if doc_node == context_node
500
+ break
501
+ elsif doc_node.parent == parent and node_matches?(doc_node, ast_node)
502
+ nodes << doc_node
503
+ end
504
+ end
505
+ end
506
+
507
+ nodes
508
+ end
509
+
510
+ ##
511
+ # Evaluates the `self` axis.
512
+ #
513
+ # @param [AST::Node] ast_node
514
+ # @param [Oga::XML::NodeSet] context
515
+ # @return [Oga::XML::NodeSet]
516
+ #
517
+ def on_axis_self(ast_node, context)
518
+ nodes = XML::NodeSet.new
519
+
520
+ context.each do |context_node|
521
+ nodes << context_node if node_matches?(context_node, ast_node)
522
+ end
523
+
524
+ nodes
525
+ end
526
+
527
+ ##
528
+ # Evaluates the `namespace` axis.
529
+ #
530
+ # @param [AST::Node] ast_node
531
+ # @param [Oga::XML::NodeSet] context
532
+ # @return [Oga::XML::NodeSet]
533
+ #
534
+ def on_axis_namespace(ast_node, context)
535
+ nodes = XML::NodeSet.new
536
+ name = ast_node.children[1]
537
+
538
+ context.each do |context_node|
539
+ next unless context_node.respond_to?(:available_namespaces)
540
+
541
+ context_node.available_namespaces.each do |_, namespace|
542
+ if namespace.name == name or name == STAR
543
+ nodes << namespace
544
+ end
545
+ end
546
+ end
547
+
548
+ nodes
549
+ end
550
+
551
+ ##
552
+ # Dispatches node type matching to dedicated handlers.
553
+ #
554
+ # @param [AST::Node] ast_node
555
+ # @param [Oga::XML::NodeSet] context
556
+ # @return [Oga::XML::NodeSet]
557
+ #
558
+ def on_type_test(ast_node, context)
559
+ name, test = *ast_node.children
560
+
561
+ handler = name.gsub('-', '_')
562
+
563
+ send("on_type_test_#{handler}", test, context)
564
+ end
565
+
566
+ ##
567
+ # Processes the `node` type matcher. This matcher matches all node types.
568
+ #
569
+ # @param [AST::Node] ast_node
570
+ # @param [Oga::XML::NodeSet] context
571
+ # @return [Oga::XML::NodeSet]
572
+ #
573
+ def on_type_test_node(ast_node, context)
574
+ nodes = XML::NodeSet.new
575
+
576
+ context.each do |node|
577
+ if node.is_a?(XML::Node) or node.is_a?(XML::Document)
578
+ nodes << node
579
+ end
580
+ end
581
+
582
+ nodes
583
+ end
584
+
585
+ ##
586
+ # Processes the `text()` type test. This matches only text nodes.
587
+ #
588
+ # @param [AST::Node] ast_node
589
+ # @param [Oga::XML::NodeSet] context
590
+ # @return [Oga::XML::NodeSet]
591
+ #
592
+ def on_type_test_text(ast_node, context)
593
+ nodes = XML::NodeSet.new
594
+
595
+ context.each do |node|
596
+ nodes << node if node.is_a?(XML::Text)
597
+ end
598
+
599
+ nodes
600
+ end
601
+
602
+ ##
603
+ # Processes the `comment()` type test. This matches only comment nodes.
604
+ #
605
+ # @param [AST::Node] ast_node
606
+ # @param [Oga::XML::NodeSet] context
607
+ # @return [Oga::XML::NodeSet]
608
+ #
609
+ def on_type_test_comment(ast_node, context)
610
+ nodes = XML::NodeSet.new
611
+
612
+ context.each do |node|
613
+ nodes << node if node.is_a?(XML::Comment)
614
+ end
615
+
616
+ nodes
617
+ end
618
+
619
+ ##
620
+ # Processes the `processing-instruction()` type test. This matches only
621
+ # processing-instruction nodes.
622
+ #
623
+ # @param [AST::Node] ast_node
624
+ # @param [Oga::XML::NodeSet] context
625
+ # @return [Oga::XML::NodeSet]
626
+ #
627
+ def on_type_test_processing_instruction(ast_node, context)
628
+ nodes = XML::NodeSet.new
629
+
630
+ context.each do |node|
631
+ nodes << node if node.is_a?(XML::ProcessingInstruction)
632
+ end
633
+
634
+ nodes
635
+ end
636
+
637
+ ##
638
+ # Processes the pipe (`|`) operator. This operator creates a union of two
639
+ # sets.
640
+ #
641
+ # @param [AST::Node] ast_node
642
+ # @param [Oga::XML::NodeSet] context
643
+ # @return [Oga::XML::NodeSet]
644
+ #
645
+ def on_pipe(ast_node, context)
646
+ left, right = *ast_node.children
647
+
648
+ process(left, context) + process(right, context)
649
+ end
650
+
651
+ ##
652
+ # Processes the `and` operator.
653
+ #
654
+ # This operator returns true if both the left and right expression
655
+ # evaluate to `true`. If the first expression evaluates to `false` the
656
+ # right expression is ignored.
657
+ #
658
+ # @param [AST::Node] ast_node
659
+ # @param [Oga::XML::NodeSet] context
660
+ # @return [TrueClass|FalseClass]
661
+ #
662
+ def on_and(ast_node, context)
663
+ left, right = *ast_node.children
664
+
665
+ on_call_boolean(context, left) && on_call_boolean(context, right)
666
+ end
667
+
668
+ ##
669
+ # Processes the `or` operator.
670
+ #
671
+ # This operator returns `true` if one of the expressions evaluates to
672
+ # true, otherwise false is returned. If the first expression evaluates to
673
+ # `true` the second expression is ignored.
674
+ #
675
+ # @param [AST::Node] ast_node
676
+ # @param [Oga::XML::NodeSet] context
677
+ # @return [TrueClass|FalseClass]
678
+ #
679
+ def on_or(ast_node, context)
680
+ left, right = *ast_node.children
681
+
682
+ on_call_boolean(context, left) || on_call_boolean(context, right)
683
+ end
684
+
685
+ ##
686
+ # Processes the `+` operator.
687
+ #
688
+ # This operator converts the left and right expressions to numbers and
689
+ # adds them together.
690
+ #
691
+ # @param [AST::Node] ast_node
692
+ # @param [Oga::XML::NodeSet] context
693
+ # @return [Float]
694
+ #
695
+ def on_add(ast_node, context)
696
+ left, right = *ast_node.children
697
+
698
+ on_call_number(context, left) + on_call_number(context, right)
699
+ end
700
+
701
+ ##
702
+ # Processes the `div` operator.
703
+ #
704
+ # This operator converts the left and right expressions to numbers and
705
+ # divides the left number with the right number.
706
+ #
707
+ # @param [AST::Node] ast_node
708
+ # @param [Oga::XML::NodeSet] context
709
+ # @return [Float]
710
+ #
711
+ def on_div(ast_node, context)
712
+ left, right = *ast_node.children
713
+
714
+ on_call_number(context, left) / on_call_number(context, right)
715
+ end
716
+
717
+ ##
718
+ # Processes the `mod` operator.
719
+ #
720
+ # This operator converts the left and right expressions to numbers and
721
+ # returns the modulo of the two numbers.
722
+ #
723
+ # @param [AST::Node] ast_node
724
+ # @param [Oga::XML::NodeSet] context
725
+ # @return [Float]
726
+ #
727
+ def on_mod(ast_node, context)
728
+ left, right = *ast_node.children
729
+
730
+ on_call_number(context, left) % on_call_number(context, right)
731
+ end
732
+
733
+ ##
734
+ # Processes the `*` operator.
735
+ #
736
+ # This operator converts the left and right expressions to numbers and
737
+ # multiplies the left number with the right number.
738
+ #
739
+ # @param [AST::Node] ast_node
740
+ # @param [Oga::XML::NodeSet] context
741
+ # @return [Float]
742
+ #
743
+ def on_mul(ast_node, context)
744
+ left, right = *ast_node.children
745
+
746
+ on_call_number(context, left) * on_call_number(context, right)
747
+ end
748
+
749
+ ##
750
+ # Processes the `-` operator.
751
+ #
752
+ # This operator converts the left and right expressions to numbers and
753
+ # subtracts the right number of the left number.
754
+ #
755
+ # @param [AST::Node] ast_node
756
+ # @param [Oga::XML::NodeSet] context
757
+ # @return [Float]
758
+ #
759
+ def on_sub(ast_node, context)
760
+ left, right = *ast_node.children
761
+
762
+ on_call_number(context, left) - on_call_number(context, right)
763
+ end
764
+
765
+ ##
766
+ # Processes the `=` operator.
767
+ #
768
+ # This operator evaluates the expression on the left and right and returns
769
+ # `true` if they are equal. This operator can be used to compare strings,
770
+ # numbers and node sets. When using node sets the text of the set is
771
+ # compared instead of the nodes themselves. That is, nodes with different
772
+ # names but the same text are considered to be equal.
773
+ #
774
+ # @param [AST::Node] ast_node
775
+ # @param [Oga::XML::NodeSet] context
776
+ # @return [TrueClass|FalseClass]
777
+ #
778
+ def on_eq(ast_node, context)
779
+ left = process(ast_node.children[0], context)
780
+ right = process(ast_node.children[1], context)
781
+
782
+ if left.is_a?(XML::NodeSet)
783
+ left = first_node_text(left)
784
+ end
785
+
786
+ if right.is_a?(XML::NodeSet)
787
+ right = first_node_text(right)
788
+ end
789
+
790
+ if left.is_a?(Numeric) and !right.is_a?(Numeric)
791
+ right = to_float(right)
792
+ end
793
+
794
+ if left.is_a?(String) and !right.is_a?(String)
795
+ right = to_string(right)
796
+ end
797
+
798
+ left == right
799
+ end
800
+
801
+ ##
802
+ # Processes the `!=` operator.
803
+ #
804
+ # This operator does the exact opposite of the `=` operator. See {#on_eq}
805
+ # for more information.
806
+ #
807
+ # @see [#on_eq]
808
+ #
809
+ def on_neq(ast_node, context)
810
+ !on_eq(ast_node, context)
811
+ end
812
+
813
+ ##
814
+ # Processes the `<` operator.
815
+ #
816
+ # This operator converts the left and right expression to a number and
817
+ # returns `true` if the first number is lower than the second number.
818
+ #
819
+ # @param [Oga::XML::Node] ast_node
820
+ # @param [Oga::XML::NodeSet] context
821
+ # @return [TrueClass|FalseClass]
822
+ #
823
+ def on_lt(ast_node, context)
824
+ left, right = *ast_node.children
825
+
826
+ on_call_number(context, left) < on_call_number(context, right)
827
+ end
828
+
829
+ ##
830
+ # Processes the `>` operator.
831
+ #
832
+ # This operator converts the left and right expression to a number and
833
+ # returns `true` if the first number is greater than the second number.
834
+ #
835
+ # @param [Oga::XML::Node] ast_node
836
+ # @param [Oga::XML::NodeSet] context
837
+ # @return [TrueClass|FalseClass]
838
+ #
839
+ def on_gt(ast_node, context)
840
+ left, right = *ast_node.children
841
+
842
+ on_call_number(context, left) > on_call_number(context, right)
843
+ end
844
+
845
+ ##
846
+ # Processes the `<=` operator.
847
+ #
848
+ # This operator converts the left and right expression to a number and
849
+ # returns `true` if the first number is lower-than or equal to the second
850
+ # number.
851
+ #
852
+ # @param [Oga::XML::Node] ast_node
853
+ # @param [Oga::XML::NodeSet] context
854
+ # @return [TrueClass|FalseClass]
855
+ #
856
+ def on_lte(ast_node, context)
857
+ left, right = *ast_node.children
858
+
859
+ on_call_number(context, left) <= on_call_number(context, right)
860
+ end
861
+
862
+ ##
863
+ # Processes the `>=` operator.
864
+ #
865
+ # This operator converts the left and right expression to a number and
866
+ # returns `true` if the first number is greater-than or equal to the
867
+ # second number.
868
+ #
869
+ # @param [Oga::XML::Node] ast_node
870
+ # @param [Oga::XML::NodeSet] context
871
+ # @return [TrueClass|FalseClass]
872
+ #
873
+ def on_gte(ast_node, context)
874
+ left, right = *ast_node.children
875
+
876
+ on_call_number(context, left) >= on_call_number(context, right)
877
+ end
878
+
879
+ ##
880
+ # Delegates function calls to specific handlers.
881
+ #
882
+ # Handler functions take two arguments:
883
+ #
884
+ # 1. The context node set
885
+ # 2. A variable list of XPath function arguments, passed as individual
886
+ # Ruby method arguments.
887
+ #
888
+ # @param [AST::Node] ast_node
889
+ # @param [Oga::XML::NodeSet] context
890
+ # @return [Oga::XML::NodeSet]
891
+ #
892
+ def on_call(ast_node, context)
893
+ name, *args = *ast_node.children
894
+
895
+ handler = name.gsub('-', '_')
896
+
897
+ send("on_call_#{handler}", context, *args)
898
+ end
899
+
900
+ ##
901
+ # Processes the `last()` function call. This function call returns the
902
+ # index of the last node in the current set.
903
+ #
904
+ # @param [Oga::XML::NodeSet] context
905
+ # @return [Float]
906
+ #
907
+ def on_call_last(context)
908
+ # XPath uses indexes 1 to N instead of 0 to N.
909
+ current_node_set.length.to_f
910
+ end
911
+
912
+ ##
913
+ # Processes the `position()` function call. This function returns the
914
+ # position of the current node in the current node set.
915
+ #
916
+ # @param [Oga::XML::NodeSet] context
917
+ # @return [Float]
918
+ #
919
+ def on_call_position(context)
920
+ index = current_node_set.index(context.first) + 1
921
+
922
+ index.to_f
923
+ end
924
+
925
+ ##
926
+ # Processes the `count()` function call. This function counts the amount
927
+ # of nodes in `expression` and returns the result as a float.
928
+ #
929
+ # @param [Oga::XML::NodeSet] context
930
+ # @param [AST::Node] expression
931
+ # @return [Float]
932
+ #
933
+ def on_call_count(context, expression)
934
+ retval = process(expression, context)
935
+
936
+ unless retval.is_a?(XML::NodeSet)
937
+ raise TypeError, 'count() can only operate on NodeSet instances'
938
+ end
939
+
940
+ retval.length.to_f
941
+ end
942
+
943
+ ##
944
+ # Processes the `id()` function call.
945
+ #
946
+ # The XPath specification states that this function's behaviour should be
947
+ # controlled by a DTD. If a DTD were to specify that the ID attribute for
948
+ # a certain element would be "foo" then this function should use said
949
+ # attribute.
950
+ #
951
+ # Oga does not support DTD parsing/evaluation and as such always uses the
952
+ # "id" attribute.
953
+ #
954
+ # This function searches the entire document for a matching node,
955
+ # regardless of the current position.
956
+ #
957
+ # @param [Oga::XML::NodeSet] context
958
+ # @param [AST::Node] expression
959
+ # @return [Oga::XML::NodeSet]
960
+ #
961
+ def on_call_id(context, expression)
962
+ id = process(expression, context)
963
+ nodes = XML::NodeSet.new
964
+
965
+ # Based on Nokogiri's/libxml behaviour it appears that when using a node
966
+ # set the text of the set is used as the ID.
967
+ id = id.is_a?(XML::NodeSet) ? id.text : id.to_s
968
+ ids = id.split(' ')
969
+
970
+ @document.each_node do |node|
971
+ next unless node.is_a?(XML::Element)
972
+
973
+ attr = node.attribute('id')
974
+
975
+ if attr and ids.include?(attr.value)
976
+ nodes << node
977
+ end
978
+ end
979
+
980
+ nodes
981
+ end
982
+
983
+ ##
984
+ # Processes the `local-name()` function call.
985
+ #
986
+ # This function call returns the name of one of the following:
987
+ #
988
+ # * The current context node (if any)
989
+ # * The first node in the supplied node set
990
+ #
991
+ # @param [Oga::XML::NodeSet] context
992
+ # @param [AST::Node] expression
993
+ # @return [Oga::XML::NodeSet]
994
+ #
995
+ def on_call_local_name(context, expression = nil)
996
+ node = function_node(context, expression)
997
+
998
+ node.respond_to?(:name) ? node.name : ''
999
+ end
1000
+
1001
+ ##
1002
+ # Processes the `name()` function call.
1003
+ #
1004
+ # This function call is similar to `local-name()` (see
1005
+ # {#on_call_local_name}) except that it includes the namespace name if
1006
+ # present.
1007
+ #
1008
+ # @param [Oga::XML::NodeSet] context
1009
+ # @param [AST::Node] expression
1010
+ # @return [Oga::XML::NodeSet]
1011
+ #
1012
+ def on_call_name(context, expression = nil)
1013
+ node = function_node(context, expression)
1014
+
1015
+ if node.respond_to?(:name) and node.respond_to?(:namespace)
1016
+ if node.namespace
1017
+ return "#{node.namespace.name}:#{node.name}"
1018
+ else
1019
+ return node.name
1020
+ end
1021
+ else
1022
+ return ''
1023
+ end
1024
+ end
1025
+
1026
+ ##
1027
+ # Processes the `namespace-uri()` function call.
1028
+ #
1029
+ # This function call returns the namespace URI of one of the following:
1030
+ #
1031
+ # * The current context node (if any)
1032
+ # * The first node in the supplied node set
1033
+ #
1034
+ # @param [Oga::XML::NodeSet] context
1035
+ # @param [AST::Node] expression
1036
+ # @return [Oga::XML::NodeSet]
1037
+ #
1038
+ def on_call_namespace_uri(context, expression = nil)
1039
+ node = function_node(context, expression)
1040
+
1041
+ if node.respond_to?(:namespace) and node.namespace
1042
+ return node.namespace.uri
1043
+ else
1044
+ return ''
1045
+ end
1046
+ end
1047
+
1048
+ ##
1049
+ # Evaluates the `string()` function call.
1050
+ #
1051
+ # This function call converts the given argument *or* the current context
1052
+ # node to a string. If a node set is given then only the first node is
1053
+ # converted to a string.
1054
+ #
1055
+ # @example
1056
+ # string(10) # => "10"
1057
+ #
1058
+ # @param [Oga::XML::NodeSet] context
1059
+ # @param [AST::Node] expression
1060
+ # @return [String]
1061
+ #
1062
+ def on_call_string(context, expression = nil)
1063
+ if expression
1064
+ convert = process(expression, context)
1065
+
1066
+ if convert.is_a?(XML::NodeSet)
1067
+ convert = convert[0]
1068
+ end
1069
+ else
1070
+ convert = context.first
1071
+ end
1072
+
1073
+ if convert.respond_to?(:text)
1074
+ return convert.text
1075
+ else
1076
+ return to_string(convert)
1077
+ end
1078
+ end
1079
+
1080
+ ##
1081
+ # Evaluates the `number()` function call.
1082
+ #
1083
+ # This function call converts its first argument *or* the current context
1084
+ # node to a number, similar to the `string()` function.
1085
+ #
1086
+ # @example
1087
+ # number("10") # => 10.0
1088
+ #
1089
+ # @see [#on_call_string]
1090
+ # @param [Oga::XML::NodeSet] context
1091
+ # @param [AST::Node] expression
1092
+ # @return [Float]
1093
+ #
1094
+ def on_call_number(context, expression = nil)
1095
+ convert = nil
1096
+
1097
+ if expression
1098
+ exp_retval = process(expression, context)
1099
+
1100
+ if exp_retval.is_a?(XML::NodeSet)
1101
+ convert = first_node_text(exp_retval)
1102
+
1103
+ elsif exp_retval == true
1104
+ convert = 1.0
1105
+
1106
+ elsif exp_retval == false
1107
+ convert = 0.0
1108
+
1109
+ elsif exp_retval
1110
+ convert = exp_retval
1111
+ end
1112
+ else
1113
+ convert = context.first.text
1114
+ end
1115
+
1116
+ to_float(convert)
1117
+ end
1118
+
1119
+ ##
1120
+ # Processes the `concat()` function call.
1121
+ #
1122
+ # This function call converts its arguments to strings and concatenates
1123
+ # them. In case of node sets the text of the set is used.
1124
+ #
1125
+ # @param [Oga::XML::NodeSet] context
1126
+ # @param [AST::Node] first
1127
+ # @param [AST::Node] second
1128
+ # @param [Array<AST::Node>] rest
1129
+ #
1130
+ def on_call_concat(context, first, second, *rest)
1131
+ args = [first, second] + rest
1132
+ retval = ''
1133
+
1134
+ args.each do |arg|
1135
+ retval << on_call_string(context, arg)
1136
+ end
1137
+
1138
+ retval
1139
+ end
1140
+
1141
+ ##
1142
+ # Processes the `starts-with()` function call.
1143
+ #
1144
+ # This function call returns `true` if the string in the 1st argument
1145
+ # starts with the string in the 2nd argument. Node sets can also be used.
1146
+ #
1147
+ # @example
1148
+ # starts-with("hello world", "hello") # => true
1149
+ #
1150
+ # @param [Oga::XML::NodeSet] context
1151
+ # @param [AST::Node] haystack The string to search.
1152
+ # @param [AST::Node] needle The string to search for.
1153
+ # @return [TrueClass|FalseClass]
1154
+ #
1155
+ def on_call_starts_with(context, haystack, needle)
1156
+ haystack_str = on_call_string(context, haystack)
1157
+ needle_str = on_call_string(context, needle)
1158
+
1159
+ # https://github.com/jruby/jruby/issues/1923
1160
+ needle_str.empty? || haystack_str.start_with?(needle_str)
1161
+ end
1162
+
1163
+ ##
1164
+ # Processes the `contains()` function call.
1165
+ #
1166
+ # This function call returns `true` if the string in the 1st argument
1167
+ # contains the string in the 2nd argument. Node sets can also be used.
1168
+ #
1169
+ # @example
1170
+ # contains("hello world", "o w") # => true
1171
+ #
1172
+ # @param [Oga::XML::NodeSet] context
1173
+ # @param [AST::Node] haystack The string to search.
1174
+ # @param [AST::Node] needle The string to search for.
1175
+ # @return [String]
1176
+ #
1177
+ def on_call_contains(context, haystack, needle)
1178
+ haystack_str = on_call_string(context, haystack)
1179
+ needle_str = on_call_string(context, needle)
1180
+
1181
+ haystack_str.include?(needle_str)
1182
+ end
1183
+
1184
+ ##
1185
+ # Processes the `substring-before()` function call.
1186
+ #
1187
+ # This function call returns the substring of the 1st argument that occurs
1188
+ # before the string given in the 2nd argument. For example:
1189
+ #
1190
+ # substring-before("2014-08-25", "-")
1191
+ #
1192
+ # This would return "2014" as it occurs before the first "-".
1193
+ #
1194
+ # @param [Oga::XML::NodeSet] context
1195
+ # @param [AST::Node] haystack The string to search.
1196
+ # @param [AST::Node] needle The string to search for.
1197
+ # @return [String]
1198
+ #
1199
+ def on_call_substring_before(context, haystack, needle)
1200
+ haystack_str = on_call_string(context, haystack)
1201
+ needle_str = on_call_string(context, needle)
1202
+
1203
+ before, sep, after = haystack_str.partition(needle_str)
1204
+
1205
+ sep.empty? ? sep : before
1206
+ end
1207
+
1208
+ ##
1209
+ # Processes the `substring-after()` function call.
1210
+ #
1211
+ # This function call returns the substring of the 1st argument that occurs
1212
+ # after the string given in the 2nd argument. For example:
1213
+ #
1214
+ # substring-after("2014-08-25", "-")
1215
+ #
1216
+ # This would return "08-25" as it occurs after the first "-".
1217
+ #
1218
+ # @param [Oga::XML::NodeSet] context
1219
+ # @param [AST::Node] haystack The string to search.
1220
+ # @param [AST::Node] needle The string to search for.
1221
+ # @return [String]
1222
+ #
1223
+ def on_call_substring_after(context, haystack, needle)
1224
+ haystack_str = on_call_string(context, haystack)
1225
+ needle_str = on_call_string(context, needle)
1226
+
1227
+ before, sep, after = haystack_str.partition(needle_str)
1228
+
1229
+ sep.empty? ? sep : after
1230
+ end
1231
+
1232
+ ##
1233
+ # Processes the `substring()` function call.
1234
+ #
1235
+ # This function call returns the substring of the 1st argument, starting
1236
+ # at the position given in the 2nd argument. If the third argument is
1237
+ # given it is used as the length for the substring, otherwise the string
1238
+ # is consumed until the end.
1239
+ #
1240
+ # XPath string indexes start from position 1, not position 0.
1241
+ #
1242
+ # @example Using a literal string
1243
+ # substring("foo", 2) # => "oo"
1244
+ #
1245
+ # @example Using a literal string with a custom length
1246
+ # substring("foo", 1, 2) # => "fo"
1247
+ #
1248
+ # @example Using a node set
1249
+ # substring(users/user/username, 5)
1250
+ #
1251
+ # @param [Oga::XML::NodeSet] context
1252
+ # @param [AST::Node] haystack
1253
+ # @param [AST::Node] start
1254
+ # @param [AST::Node] length
1255
+ # @return [String]
1256
+ #
1257
+ def on_call_substring(context, haystack, start, length = nil)
1258
+ haystack_str = on_call_string(context, haystack)
1259
+ start_index = on_call_number(context, start).to_i - 1
1260
+
1261
+ if length
1262
+ length_int = on_call_number(context, length).to_i - 1
1263
+ stop_index = start_index + length_int
1264
+ else
1265
+ stop_index = -1
1266
+ end
1267
+
1268
+ haystack_str[start_index..stop_index]
1269
+ end
1270
+
1271
+ ##
1272
+ # Processes the `string-length()` function.
1273
+ #
1274
+ # This function returns the length of the string given in the 1st argument
1275
+ # *or* the current context node. If the expression is not a string it's
1276
+ # converted to a string using the `string()` function.
1277
+ #
1278
+ # @see [#on_call_string]
1279
+ # @param [Oga::XML::NodeSet] context
1280
+ # @param [AST::Node] expression
1281
+ # @return [Float]
1282
+ #
1283
+ def on_call_string_length(context, expression = nil)
1284
+ on_call_string(context, expression).length.to_f
1285
+ end
1286
+
1287
+ ##
1288
+ # Processes the `normalize-space()` function call.
1289
+ #
1290
+ # This function strips the 1st argument string *or* the current context
1291
+ # node of leading/trailing whitespace as well as replacing multiple
1292
+ # whitespace sequences with single spaces.
1293
+ #
1294
+ # @example
1295
+ # normalize-space(" fo o ") # => "fo o"
1296
+ #
1297
+ # @param [Oga::XML::NodeSet] context
1298
+ # @param [AST::Node] expression
1299
+ # @return [String]
1300
+ #
1301
+ def on_call_normalize_space(context, expression = nil)
1302
+ str = on_call_string(context, expression)
1303
+
1304
+ str.strip.gsub(/\s+/, ' ')
1305
+ end
1306
+
1307
+ ##
1308
+ # Processes the `translate()` function call.
1309
+ #
1310
+ # This function takes the string of the 1st argument and replaces all
1311
+ # characters of the 2nd argument with those specified in the 3rd argument.
1312
+ #
1313
+ # @example
1314
+ # translate("bar", "abc", "ABC") # => "BAr"
1315
+ #
1316
+ # @param [Oga::XML::NodeSet] context
1317
+ # @param [AST::Node] input
1318
+ # @param [AST::Node] find
1319
+ # @param [AST::Node] replace
1320
+ # @return [String]
1321
+ #
1322
+ def on_call_translate(context, input, find, replace)
1323
+ input_str = on_call_string(context, input)
1324
+ find_chars = on_call_string(context, find).chars.to_a
1325
+ replace_chars = on_call_string(context, replace).chars.to_a
1326
+ replaced = input_str
1327
+
1328
+ find_chars.each_with_index do |char, index|
1329
+ replace_with = replace_chars[index] ? replace_chars[index] : ''
1330
+ replaced = replaced.gsub(char, replace_with)
1331
+ end
1332
+
1333
+ replaced
1334
+ end
1335
+
1336
+ ##
1337
+ # Processes the `boolean()` function call.
1338
+ #
1339
+ # This function converts the 1st argument to a boolean.
1340
+ #
1341
+ # The boolean `true` is returned for the following:
1342
+ #
1343
+ # * A non empty string
1344
+ # * A non empty node set
1345
+ # * A non zero number, either positive or negative
1346
+ #
1347
+ # The boolean `false` is returned for all other cases.
1348
+ #
1349
+ # @param [Oga::XML::NodeSet] context
1350
+ # @param [AST::Node] expression
1351
+ # @return [TrueClass|FalseClass]
1352
+ #
1353
+ def on_call_boolean(context, expression)
1354
+ retval = process(expression, context)
1355
+ bool = false
1356
+
1357
+ if retval.is_a?(Numeric)
1358
+ bool = !retval.nan? && !retval.zero?
1359
+ elsif retval
1360
+ bool = !retval.respond_to?(:empty?) || !retval.empty?
1361
+ end
1362
+
1363
+ bool
1364
+ end
1365
+
1366
+ ##
1367
+ # Processes the `not()` function call.
1368
+ #
1369
+ # This function converts the 1st argument to a boolean and returns the
1370
+ # opposite boolean value. For example, if the first argument results in
1371
+ # `true` then this function returns `false` instead.
1372
+ #
1373
+ # @param [Oga::XML::NodeSet] context
1374
+ # @param [AST::Node] expression
1375
+ # @return [TrueClass|FalseClass]
1376
+ #
1377
+ def on_call_not(context, expression)
1378
+ !on_call_boolean(context, expression)
1379
+ end
1380
+
1381
+ ##
1382
+ # Processes the `true()` function call.
1383
+ #
1384
+ # This function simply returns the boolean `true`.
1385
+ #
1386
+ # @param [AST::NodeSet] context
1387
+ # @return [TrueClass]
1388
+ #
1389
+ def on_call_true(context)
1390
+ true
1391
+ end
1392
+
1393
+ ##
1394
+ # Processes the `false()` function call.
1395
+ #
1396
+ # This function simply returns the boolean `false`.
1397
+ #
1398
+ # @param [AST::NodeSet] context
1399
+ # @return [FalseClass]
1400
+ #
1401
+ def on_call_false(context)
1402
+ false
1403
+ end
1404
+
1405
+ ##
1406
+ # Processes the `lang()` function call.
1407
+ #
1408
+ # This function returns `true` if the current context node is in the given
1409
+ # language, `false` otherwise.
1410
+ #
1411
+ # The language is based on the value of the "xml:lang" attribute of either
1412
+ # the context node or an ancestor node (in case the context node has no
1413
+ # such attribute).
1414
+ #
1415
+ # @param [Oga::XML::NodeSet] context
1416
+ # @param [AST::Node] language
1417
+ # @return [TrueClass|FalseClass]
1418
+ #
1419
+ def on_call_lang(context, language)
1420
+ lang_str = on_call_string(context, language)
1421
+ node = context.first
1422
+
1423
+ while node.respond_to?(:attribute)
1424
+ found = node.attribute('xml:lang')
1425
+
1426
+ return found.value == lang_str if found
1427
+
1428
+ node = node.parent
1429
+ end
1430
+
1431
+ false
1432
+ end
1433
+
1434
+ ##
1435
+ # Processes the `sum()` function call.
1436
+ #
1437
+ # This function call takes a node set, converts each node to a number and
1438
+ # then sums the values.
1439
+ #
1440
+ # As an example, take the following XML:
1441
+ #
1442
+ # <root>
1443
+ # <a>1</a>
1444
+ # <b>2</b>
1445
+ # </root>
1446
+ #
1447
+ # Using the expression `sum(root/*)` the return value would be `3.0`.
1448
+ #
1449
+ # @param [Oga::XML::NodeSet] context
1450
+ # @param [AST::Node] expression
1451
+ # @return [Float]
1452
+ #
1453
+ def on_call_sum(context, expression)
1454
+ nodes = process(expression, context)
1455
+ sum = 0.0
1456
+
1457
+ unless nodes.is_a?(XML::NodeSet)
1458
+ raise TypeError, 'sum() can only operate on NodeSet instances'
1459
+ end
1460
+
1461
+ nodes.each do |node|
1462
+ sum += node.text.to_f
1463
+ end
1464
+
1465
+ sum
1466
+ end
1467
+
1468
+ ##
1469
+ # Processes the `floor()` function call.
1470
+ #
1471
+ # This function call rounds the 1st argument down to the closest integer,
1472
+ # and then returns that number as a float.
1473
+ #
1474
+ # @param [Oga::XML::NodeSet] context
1475
+ # @param [AST::Node] expression
1476
+ # @return [Float]
1477
+ #
1478
+ def on_call_floor(context, expression)
1479
+ number = on_call_number(context, expression)
1480
+
1481
+ number.nan? ? number : number.floor.to_f
1482
+ end
1483
+
1484
+ ##
1485
+ # Processes the `ceiling()` function call.
1486
+ #
1487
+ # This function call rounds the 1st argument up to the closest integer,
1488
+ # and then returns that number as a float.
1489
+ #
1490
+ # @param [Oga::XML::NodeSet] context
1491
+ # @param [AST::Node] expression
1492
+ # @return [Float]
1493
+ #
1494
+ def on_call_ceiling(context, expression)
1495
+ number = on_call_number(context, expression)
1496
+
1497
+ number.nan? ? number : number.ceil.to_f
1498
+ end
1499
+
1500
+ ##
1501
+ # Processes the `round()` function call.
1502
+ #
1503
+ # This function call rounds the 1st argument to the closest integer, and
1504
+ # then returns that number as a float.
1505
+ #
1506
+ # @param [Oga::XML::NodeSet] context
1507
+ # @param [AST::Node] expression
1508
+ # @return [Float]
1509
+ #
1510
+ def on_call_round(context, expression)
1511
+ number = on_call_number(context, expression)
1512
+
1513
+ number.nan? ? number : number.round.to_f
1514
+ end
1515
+
1516
+ ##
1517
+ # Processes an `(int)` node.
1518
+ #
1519
+ # @param [AST::Node] ast_node
1520
+ # @param [Oga::XML::NodeSet] context
1521
+ # @return [Float]
1522
+ #
1523
+ def on_int(ast_node, context)
1524
+ ast_node.children[0].to_f
1525
+ end
1526
+
1527
+ ##
1528
+ # Processes an `(float)` node.
1529
+ #
1530
+ # @param [AST::Node] ast_node
1531
+ # @param [Oga::XML::NodeSet] context
1532
+ # @return [Float]
1533
+ #
1534
+ def on_float(ast_node, context)
1535
+ ast_node.children[0]
1536
+ end
1537
+
1538
+ ##
1539
+ # Processes a `(string)` node.
1540
+ #
1541
+ # @param [AST::Node] ast_node
1542
+ # @param [Oga::XML::NodeSet] context
1543
+ # @return [String]
1544
+ #
1545
+ def on_string(ast_node, context)
1546
+ ast_node.children[0]
1547
+ end
1548
+
1549
+ ##
1550
+ # Processes a variable reference. If the variable is not defined an error
1551
+ # is raised.
1552
+ #
1553
+ # @param [AST::Node] ast_node
1554
+ # @param [Oga::XML::NodeSet] context
1555
+ # @return [Mixed]
1556
+ # @raise [RuntimeError]
1557
+ #
1558
+ def on_var(ast_node, context)
1559
+ name = ast_node.children[0]
1560
+
1561
+ if @variables.key?(name)
1562
+ return @variables[name]
1563
+ else
1564
+ raise "Undefined XPath variable: #{name}"
1565
+ end
1566
+ end
1567
+
1568
+ ##
1569
+ # Returns the node for a function call. This node is either the first node
1570
+ # in the supplied node set, or the first node in the current context.
1571
+ #
1572
+ # @param [Oga::XML::NodeSet] context
1573
+ # @param [AST::Node] expression
1574
+ # @return [Oga::XML::Node]
1575
+ #
1576
+ def function_node(context, expression = nil)
1577
+ if expression
1578
+ node = process(expression, context)
1579
+
1580
+ if node.is_a?(XML::NodeSet)
1581
+ node = node.first
1582
+ else
1583
+ raise TypeError, 'only node sets can be used as arguments'
1584
+ end
1585
+ else
1586
+ node = context.first
1587
+ end
1588
+
1589
+ node
1590
+ end
1591
+
1592
+ ##
1593
+ # Returns the text of the first node in the node set, or an empty string
1594
+ # if the node set is empty.
1595
+ #
1596
+ # @param [Oga::XML::NodeSet] set
1597
+ # @return [String]
1598
+ #
1599
+ def first_node_text(set)
1600
+ set[0].respond_to?(:text) ? set[0].text : ''
1601
+ end
1602
+
1603
+ ##
1604
+ # Returns a node set containing all the child nodes of the given set of
1605
+ # nodes.
1606
+ #
1607
+ # @param [Oga::XML::NodeSet] nodes
1608
+ # @return [Oga::XML::NodeSet]
1609
+ #
1610
+ def child_nodes(nodes)
1611
+ children = XML::NodeSet.new
1612
+
1613
+ nodes.each do |xml_node|
1614
+ children.concat(xml_node.children)
1615
+ end
1616
+
1617
+ children
1618
+ end
1619
+
1620
+ ##
1621
+ # Checks if a given {Oga::XML::Node} instance matches a `AST::Node`
1622
+ # instance.
1623
+ #
1624
+ # This method can use both "test" and "type-test" nodes. In case of
1625
+ # "type-test" nodes the procedure is as following:
1626
+ #
1627
+ # 1. Evaluate the expression
1628
+ # 2. If the return value is non empty return `true`, otherwise return
1629
+ # `false`
1630
+ #
1631
+ # For "test" nodes the procedure is as following instead:
1632
+ #
1633
+ # 1. Match the name
1634
+ # 2. Match the namespace
1635
+ #
1636
+ # For both the name and namespace a wildcard (`*`) can be used.
1637
+ #
1638
+ # @param [Oga::XML::Node] xml_node
1639
+ # @param [AST::Node] ast_node
1640
+ # @return [Oga::XML::NodeSet]
1641
+ #
1642
+ def node_matches?(xml_node, ast_node)
1643
+ ns, name = *ast_node.children
1644
+
1645
+ if ast_node.type.equal?(:type_test)
1646
+ return type_matches?(xml_node, ast_node)
1647
+ end
1648
+
1649
+ # If only the name is given and is a wildcard then we'll also want to
1650
+ # match the namespace as a wildcard.
1651
+ if !ns and name == STAR
1652
+ ns = STAR
1653
+ end
1654
+
1655
+ name_matches = name_matches?(xml_node, name)
1656
+ ns_matches = false
1657
+
1658
+ if ns
1659
+ ns_matches = namespace_matches?(xml_node, ns)
1660
+
1661
+ elsif name_matches and !xml_node.namespace
1662
+ ns_matches = true
1663
+ end
1664
+
1665
+ if !ns and !ns_matches
1666
+ ns_matches = xml_node.respond_to?(:default_namespace?) &&
1667
+ xml_node.default_namespace?
1668
+ end
1669
+
1670
+ name_matches && ns_matches
1671
+ end
1672
+
1673
+ ##
1674
+ # @param [Oga::XML::Node] xml_node
1675
+ # @param [AST::Node] ast_node
1676
+ # @return [TrueClass|FalseClass]
1677
+ #
1678
+ def type_matches?(xml_node, ast_node)
1679
+ context = XML::NodeSet.new([xml_node])
1680
+
1681
+ process(ast_node, context).length > 0
1682
+ end
1683
+
1684
+ ##
1685
+ # Returns `true` if the name of the XML node matches the given name *or*
1686
+ # matches a wildcard.
1687
+ #
1688
+ # @param [Oga::XML::Node] xml_node
1689
+ # @param [String] name
1690
+ #
1691
+ def name_matches?(xml_node, name)
1692
+ return false unless xml_node.respond_to?(:name)
1693
+
1694
+ name == STAR ? true : xml_node.name == name
1695
+ end
1696
+
1697
+ ##
1698
+ # Returns `true` if the namespace of the XML node matches the given
1699
+ # namespace *or* matches a wildcard.
1700
+ #
1701
+ # @param [Oga::XML::Node] xml_node
1702
+ # @param [String] ns
1703
+ #
1704
+ def namespace_matches?(xml_node, ns)
1705
+ return false unless xml_node.respond_to?(:namespace)
1706
+
1707
+ return true if ns == STAR
1708
+
1709
+ xml_node.namespace && xml_node.namespace.name == ns
1710
+ end
1711
+
1712
+ ##
1713
+ # @param [Oga::XML::Node] ast_node
1714
+ # @return [TrueClass|FalseClass]
1715
+ #
1716
+ def has_parent?(ast_node)
1717
+ ast_node.respond_to?(:parent) && !!ast_node.parent
1718
+ end
1719
+
1720
+ ##
1721
+ # Converts the given value to a float. If the value can't be converted to
1722
+ # a float NaN is returned instead.
1723
+ #
1724
+ # @param [Mixed] value
1725
+ # @return [Float]
1726
+ #
1727
+ def to_float(value)
1728
+ return Float(value) rescue Float::NAN
1729
+ end
1730
+
1731
+ ##
1732
+ # Converts the given value to a string according to the XPath string
1733
+ # conversion rules.
1734
+ #
1735
+ # @param [Mixed] value
1736
+ # @return [String]
1737
+ #
1738
+ def to_string(value)
1739
+ # If we have a number that has a zero decimal (e.g. 10.0) we want to
1740
+ # get rid of that decimal. For this we'll first convert the number to
1741
+ # an integer.
1742
+ if value.is_a?(Float) and value.modulo(1).zero?
1743
+ value = value.to_i
1744
+ end
1745
+
1746
+ value.to_s
1747
+ end
1748
+
1749
+ ##
1750
+ # Stores the specified node set and yields the supplied block. The return
1751
+ # value of this method is whatever the block returned.
1752
+ #
1753
+ # @example
1754
+ # retval = with_node_set(context) do
1755
+ # process(....)
1756
+ # end
1757
+ #
1758
+ # @param [Oga::XML::NodeSet] nodes
1759
+ #
1760
+ def with_node_set(nodes)
1761
+ @node_sets << nodes
1762
+
1763
+ retval = yield
1764
+
1765
+ @node_sets.pop
1766
+
1767
+ retval
1768
+ end
1769
+
1770
+ ##
1771
+ # @return [Oga::XML::NodeSet]
1772
+ #
1773
+ def current_node_set
1774
+ @node_sets.last
1775
+ end
1776
+
1777
+ ##
1778
+ # Returns the root node of `node`, or `node` itself if its a Document.
1779
+ #
1780
+ # @param [Oga::XML::Node|Oga::XML::Document] node
1781
+ # @return [Oga::XML::Node|Oga::XML::Document]
1782
+ #
1783
+ def root_node(node)
1784
+ node.respond_to?(:root_node) ? node.root_node : node
1785
+ end
1786
+
1787
+ ##
1788
+ # Returns the parent node of `node`, or `node` itself if its a Document.
1789
+ #
1790
+ # @param [Oga::XML::Node|Oga::XML::Document] node
1791
+ # @return [Oga::XML::Node|Oga::XML::Document]
1792
+ #
1793
+ def parent_node(node)
1794
+ node.respond_to?(:parent) ? node.parent : node
1795
+ end
1796
+ end # Evaluator
1797
+ end # XPath
1798
+ end # Oga