rexml 3.1.7.3
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of rexml might be problematic. Click here for more details.
- checksums.yaml +7 -0
- data/.gitignore +9 -0
- data/.travis.yml +10 -0
- data/Gemfile +6 -0
- data/LICENSE.txt +22 -0
- data/README.md +60 -0
- data/Rakefile +10 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/lib/rexml/attlistdecl.rb +63 -0
- data/lib/rexml/attribute.rb +192 -0
- data/lib/rexml/cdata.rb +68 -0
- data/lib/rexml/child.rb +97 -0
- data/lib/rexml/comment.rb +80 -0
- data/lib/rexml/doctype.rb +270 -0
- data/lib/rexml/document.rb +291 -0
- data/lib/rexml/dtd/attlistdecl.rb +11 -0
- data/lib/rexml/dtd/dtd.rb +47 -0
- data/lib/rexml/dtd/elementdecl.rb +18 -0
- data/lib/rexml/dtd/entitydecl.rb +57 -0
- data/lib/rexml/dtd/notationdecl.rb +40 -0
- data/lib/rexml/element.rb +1267 -0
- data/lib/rexml/encoding.rb +51 -0
- data/lib/rexml/entity.rb +171 -0
- data/lib/rexml/formatters/default.rb +112 -0
- data/lib/rexml/formatters/pretty.rb +142 -0
- data/lib/rexml/formatters/transitive.rb +58 -0
- data/lib/rexml/functions.rb +447 -0
- data/lib/rexml/instruction.rb +71 -0
- data/lib/rexml/light/node.rb +196 -0
- data/lib/rexml/namespace.rb +48 -0
- data/lib/rexml/node.rb +76 -0
- data/lib/rexml/output.rb +30 -0
- data/lib/rexml/parent.rb +166 -0
- data/lib/rexml/parseexception.rb +52 -0
- data/lib/rexml/parsers/baseparser.rb +586 -0
- data/lib/rexml/parsers/lightparser.rb +59 -0
- data/lib/rexml/parsers/pullparser.rb +197 -0
- data/lib/rexml/parsers/sax2parser.rb +273 -0
- data/lib/rexml/parsers/streamparser.rb +61 -0
- data/lib/rexml/parsers/treeparser.rb +101 -0
- data/lib/rexml/parsers/ultralightparser.rb +57 -0
- data/lib/rexml/parsers/xpathparser.rb +675 -0
- data/lib/rexml/quickpath.rb +266 -0
- data/lib/rexml/rexml.rb +32 -0
- data/lib/rexml/sax2listener.rb +98 -0
- data/lib/rexml/security.rb +28 -0
- data/lib/rexml/source.rb +298 -0
- data/lib/rexml/streamlistener.rb +93 -0
- data/lib/rexml/syncenumerator.rb +33 -0
- data/lib/rexml/text.rb +424 -0
- data/lib/rexml/undefinednamespaceexception.rb +9 -0
- data/lib/rexml/validation/relaxng.rb +539 -0
- data/lib/rexml/validation/validation.rb +144 -0
- data/lib/rexml/validation/validationexception.rb +10 -0
- data/lib/rexml/xmldecl.rb +116 -0
- data/lib/rexml/xmltokens.rb +85 -0
- data/lib/rexml/xpath.rb +81 -0
- data/lib/rexml/xpath_parser.rb +934 -0
- data/rexml.gemspec +42 -0
- metadata +131 -0
@@ -0,0 +1,144 @@
|
|
1
|
+
# frozen_string_literal: false
|
2
|
+
require_relative 'validationexception'
|
3
|
+
|
4
|
+
module REXML
|
5
|
+
module Validation
|
6
|
+
module Validator
|
7
|
+
NILEVENT = [ nil ]
|
8
|
+
def reset
|
9
|
+
@current = @root
|
10
|
+
@root.reset
|
11
|
+
@root.previous = true
|
12
|
+
@attr_stack = []
|
13
|
+
self
|
14
|
+
end
|
15
|
+
def dump
|
16
|
+
puts @root.inspect
|
17
|
+
end
|
18
|
+
def validate( event )
|
19
|
+
@attr_stack = [] unless defined? @attr_stack
|
20
|
+
match = @current.next(event)
|
21
|
+
raise ValidationException.new( "Validation error. Expected: "+
|
22
|
+
@current.expected.join( " or " )+" from #{@current.inspect} "+
|
23
|
+
" but got #{Event.new( event[0], event[1] ).inspect}" ) unless match
|
24
|
+
@current = match
|
25
|
+
|
26
|
+
# Check for attributes
|
27
|
+
case event[0]
|
28
|
+
when :start_element
|
29
|
+
@attr_stack << event[2]
|
30
|
+
begin
|
31
|
+
sattr = [:start_attribute, nil]
|
32
|
+
eattr = [:end_attribute]
|
33
|
+
text = [:text, nil]
|
34
|
+
k, = event[2].find { |key,value|
|
35
|
+
sattr[1] = key
|
36
|
+
m = @current.next( sattr )
|
37
|
+
if m
|
38
|
+
# If the state has text children...
|
39
|
+
if m.matches?( eattr )
|
40
|
+
@current = m
|
41
|
+
else
|
42
|
+
text[1] = value
|
43
|
+
m = m.next( text )
|
44
|
+
text[1] = nil
|
45
|
+
return false unless m
|
46
|
+
@current = m if m
|
47
|
+
end
|
48
|
+
m = @current.next( eattr )
|
49
|
+
if m
|
50
|
+
@current = m
|
51
|
+
true
|
52
|
+
else
|
53
|
+
false
|
54
|
+
end
|
55
|
+
else
|
56
|
+
false
|
57
|
+
end
|
58
|
+
}
|
59
|
+
event[2].delete(k) if k
|
60
|
+
end while k
|
61
|
+
when :end_element
|
62
|
+
attrs = @attr_stack.pop
|
63
|
+
raise ValidationException.new( "Validation error. Illegal "+
|
64
|
+
" attributes: #{attrs.inspect}") if attrs.length > 0
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
class Event
|
70
|
+
def initialize(event_type, event_arg=nil )
|
71
|
+
@event_type = event_type
|
72
|
+
@event_arg = event_arg
|
73
|
+
end
|
74
|
+
|
75
|
+
attr_reader :event_type
|
76
|
+
attr_accessor :event_arg
|
77
|
+
|
78
|
+
def done?
|
79
|
+
@done
|
80
|
+
end
|
81
|
+
|
82
|
+
def single?
|
83
|
+
return (@event_type != :start_element and @event_type != :start_attribute)
|
84
|
+
end
|
85
|
+
|
86
|
+
def matches?( event )
|
87
|
+
return false unless event[0] == @event_type
|
88
|
+
case event[0]
|
89
|
+
when nil
|
90
|
+
return true
|
91
|
+
when :start_element
|
92
|
+
return true if event[1] == @event_arg
|
93
|
+
when :end_element
|
94
|
+
return true
|
95
|
+
when :start_attribute
|
96
|
+
return true if event[1] == @event_arg
|
97
|
+
when :end_attribute
|
98
|
+
return true
|
99
|
+
when :end_document
|
100
|
+
return true
|
101
|
+
when :text
|
102
|
+
return (@event_arg.nil? or @event_arg == event[1])
|
103
|
+
=begin
|
104
|
+
when :processing_instruction
|
105
|
+
false
|
106
|
+
when :xmldecl
|
107
|
+
false
|
108
|
+
when :start_doctype
|
109
|
+
false
|
110
|
+
when :end_doctype
|
111
|
+
false
|
112
|
+
when :externalentity
|
113
|
+
false
|
114
|
+
when :elementdecl
|
115
|
+
false
|
116
|
+
when :entity
|
117
|
+
false
|
118
|
+
when :attlistdecl
|
119
|
+
false
|
120
|
+
when :notationdecl
|
121
|
+
false
|
122
|
+
when :end_doctype
|
123
|
+
false
|
124
|
+
=end
|
125
|
+
else
|
126
|
+
false
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
130
|
+
def ==( other )
|
131
|
+
return false unless other.kind_of? Event
|
132
|
+
@event_type == other.event_type and @event_arg == other.event_arg
|
133
|
+
end
|
134
|
+
|
135
|
+
def to_s
|
136
|
+
inspect
|
137
|
+
end
|
138
|
+
|
139
|
+
def inspect
|
140
|
+
"#{@event_type.inspect}( #@event_arg )"
|
141
|
+
end
|
142
|
+
end
|
143
|
+
end
|
144
|
+
end
|
@@ -0,0 +1,116 @@
|
|
1
|
+
# frozen_string_literal: false
|
2
|
+
require_relative 'encoding'
|
3
|
+
require_relative 'source'
|
4
|
+
|
5
|
+
module REXML
|
6
|
+
# NEEDS DOCUMENTATION
|
7
|
+
class XMLDecl < Child
|
8
|
+
include Encoding
|
9
|
+
|
10
|
+
DEFAULT_VERSION = "1.0";
|
11
|
+
DEFAULT_ENCODING = "UTF-8";
|
12
|
+
DEFAULT_STANDALONE = "no";
|
13
|
+
START = '<\?xml';
|
14
|
+
STOP = '\?>';
|
15
|
+
|
16
|
+
attr_accessor :version, :standalone
|
17
|
+
attr_reader :writeencoding, :writethis
|
18
|
+
|
19
|
+
def initialize(version=DEFAULT_VERSION, encoding=nil, standalone=nil)
|
20
|
+
@writethis = true
|
21
|
+
@writeencoding = !encoding.nil?
|
22
|
+
if version.kind_of? XMLDecl
|
23
|
+
super()
|
24
|
+
@version = version.version
|
25
|
+
self.encoding = version.encoding
|
26
|
+
@writeencoding = version.writeencoding
|
27
|
+
@standalone = version.standalone
|
28
|
+
else
|
29
|
+
super()
|
30
|
+
@version = version
|
31
|
+
self.encoding = encoding
|
32
|
+
@standalone = standalone
|
33
|
+
end
|
34
|
+
@version = DEFAULT_VERSION if @version.nil?
|
35
|
+
end
|
36
|
+
|
37
|
+
def clone
|
38
|
+
XMLDecl.new(self)
|
39
|
+
end
|
40
|
+
|
41
|
+
# indent::
|
42
|
+
# Ignored. There must be no whitespace before an XML declaration
|
43
|
+
# transitive::
|
44
|
+
# Ignored
|
45
|
+
# ie_hack::
|
46
|
+
# Ignored
|
47
|
+
def write(writer, indent=-1, transitive=false, ie_hack=false)
|
48
|
+
return nil unless @writethis or writer.kind_of? Output
|
49
|
+
writer << START.sub(/\\/u, '')
|
50
|
+
writer << " #{content encoding}"
|
51
|
+
writer << STOP.sub(/\\/u, '')
|
52
|
+
end
|
53
|
+
|
54
|
+
def ==( other )
|
55
|
+
other.kind_of?(XMLDecl) and
|
56
|
+
other.version == @version and
|
57
|
+
other.encoding == self.encoding and
|
58
|
+
other.standalone == @standalone
|
59
|
+
end
|
60
|
+
|
61
|
+
def xmldecl version, encoding, standalone
|
62
|
+
@version = version
|
63
|
+
self.encoding = encoding
|
64
|
+
@standalone = standalone
|
65
|
+
end
|
66
|
+
|
67
|
+
def node_type
|
68
|
+
:xmldecl
|
69
|
+
end
|
70
|
+
|
71
|
+
alias :stand_alone? :standalone
|
72
|
+
alias :old_enc= :encoding=
|
73
|
+
|
74
|
+
def encoding=( enc )
|
75
|
+
if enc.nil?
|
76
|
+
self.old_enc = "UTF-8"
|
77
|
+
@writeencoding = false
|
78
|
+
else
|
79
|
+
self.old_enc = enc
|
80
|
+
@writeencoding = true
|
81
|
+
end
|
82
|
+
self.dowrite
|
83
|
+
end
|
84
|
+
|
85
|
+
# Only use this if you do not want the XML declaration to be written;
|
86
|
+
# this object is ignored by the XML writer. Otherwise, instantiate your
|
87
|
+
# own XMLDecl and add it to the document.
|
88
|
+
#
|
89
|
+
# Note that XML 1.1 documents *must* include an XML declaration
|
90
|
+
def XMLDecl.default
|
91
|
+
rv = XMLDecl.new( "1.0" )
|
92
|
+
rv.nowrite
|
93
|
+
rv
|
94
|
+
end
|
95
|
+
|
96
|
+
def nowrite
|
97
|
+
@writethis = false
|
98
|
+
end
|
99
|
+
|
100
|
+
def dowrite
|
101
|
+
@writethis = true
|
102
|
+
end
|
103
|
+
|
104
|
+
def inspect
|
105
|
+
START.sub(/\\/u, '') + " ... " + STOP.sub(/\\/u, '')
|
106
|
+
end
|
107
|
+
|
108
|
+
private
|
109
|
+
def content(enc)
|
110
|
+
rv = "version='#@version'"
|
111
|
+
rv << " encoding='#{enc}'" if @writeencoding || enc !~ /\Autf-8\z/i
|
112
|
+
rv << " standalone='#@standalone'" if @standalone
|
113
|
+
rv
|
114
|
+
end
|
115
|
+
end
|
116
|
+
end
|
@@ -0,0 +1,85 @@
|
|
1
|
+
# frozen_string_literal: false
|
2
|
+
module REXML
|
3
|
+
# Defines a number of tokens used for parsing XML. Not for general
|
4
|
+
# consumption.
|
5
|
+
module XMLTokens
|
6
|
+
# From http://www.w3.org/TR/REC-xml/#sec-common-syn
|
7
|
+
#
|
8
|
+
# [4] NameStartChar ::=
|
9
|
+
# ":" |
|
10
|
+
# [A-Z] |
|
11
|
+
# "_" |
|
12
|
+
# [a-z] |
|
13
|
+
# [#xC0-#xD6] |
|
14
|
+
# [#xD8-#xF6] |
|
15
|
+
# [#xF8-#x2FF] |
|
16
|
+
# [#x370-#x37D] |
|
17
|
+
# [#x37F-#x1FFF] |
|
18
|
+
# [#x200C-#x200D] |
|
19
|
+
# [#x2070-#x218F] |
|
20
|
+
# [#x2C00-#x2FEF] |
|
21
|
+
# [#x3001-#xD7FF] |
|
22
|
+
# [#xF900-#xFDCF] |
|
23
|
+
# [#xFDF0-#xFFFD] |
|
24
|
+
# [#x10000-#xEFFFF]
|
25
|
+
name_start_chars = [
|
26
|
+
":",
|
27
|
+
"A-Z",
|
28
|
+
"_",
|
29
|
+
"a-z",
|
30
|
+
"\\u00C0-\\u00D6",
|
31
|
+
"\\u00D8-\\u00F6",
|
32
|
+
"\\u00F8-\\u02FF",
|
33
|
+
"\\u0370-\\u037D",
|
34
|
+
"\\u037F-\\u1FFF",
|
35
|
+
"\\u200C-\\u200D",
|
36
|
+
"\\u2070-\\u218F",
|
37
|
+
"\\u2C00-\\u2FEF",
|
38
|
+
"\\u3001-\\uD7FF",
|
39
|
+
"\\uF900-\\uFDCF",
|
40
|
+
"\\uFDF0-\\uFFFD",
|
41
|
+
"\\u{10000}-\\u{EFFFF}",
|
42
|
+
]
|
43
|
+
# From http://www.w3.org/TR/REC-xml/#sec-common-syn
|
44
|
+
#
|
45
|
+
# [4a] NameChar ::=
|
46
|
+
# NameStartChar |
|
47
|
+
# "-" |
|
48
|
+
# "." |
|
49
|
+
# [0-9] |
|
50
|
+
# #xB7 |
|
51
|
+
# [#x0300-#x036F] |
|
52
|
+
# [#x203F-#x2040]
|
53
|
+
name_chars = name_start_chars + [
|
54
|
+
"\\-",
|
55
|
+
"\\.",
|
56
|
+
"0-9",
|
57
|
+
"\\u00B7",
|
58
|
+
"\\u0300-\\u036F",
|
59
|
+
"\\u203F-\\u2040",
|
60
|
+
]
|
61
|
+
NAME_START_CHAR = "[#{name_start_chars.join('')}]"
|
62
|
+
NAME_CHAR = "[#{name_chars.join('')}]"
|
63
|
+
NAMECHAR = NAME_CHAR # deprecated. Use NAME_CHAR instead.
|
64
|
+
|
65
|
+
# From http://www.w3.org/TR/xml-names11/#NT-NCName
|
66
|
+
#
|
67
|
+
# [6] NCNameStartChar ::= NameStartChar - ':'
|
68
|
+
ncname_start_chars = name_start_chars - [":"]
|
69
|
+
# From http://www.w3.org/TR/xml-names11/#NT-NCName
|
70
|
+
#
|
71
|
+
# [5] NCNameChar ::= NameChar - ':'
|
72
|
+
ncname_chars = name_chars - [":"]
|
73
|
+
NCNAME_STR = "[#{ncname_start_chars.join('')}][#{ncname_chars.join('')}]*"
|
74
|
+
NAME_STR = "(?:#{NCNAME_STR}:)?#{NCNAME_STR}"
|
75
|
+
|
76
|
+
NAME = "(#{NAME_START_CHAR}#{NAME_CHAR}*)"
|
77
|
+
NMTOKEN = "(?:#{NAME_CHAR})+"
|
78
|
+
NMTOKENS = "#{NMTOKEN}(\\s+#{NMTOKEN})*"
|
79
|
+
REFERENCE = "(?:&#{NAME};|&#\\d+;|&#x[0-9a-fA-F]+;)"
|
80
|
+
|
81
|
+
#REFERENCE = "(?:#{ENTITYREF}|#{CHARREF})"
|
82
|
+
#ENTITYREF = "&#{NAME};"
|
83
|
+
#CHARREF = "&#\\d+;|&#x[0-9a-fA-F]+;"
|
84
|
+
end
|
85
|
+
end
|
data/lib/rexml/xpath.rb
ADDED
@@ -0,0 +1,81 @@
|
|
1
|
+
# frozen_string_literal: false
|
2
|
+
require_relative 'functions'
|
3
|
+
require_relative 'xpath_parser'
|
4
|
+
|
5
|
+
module REXML
|
6
|
+
# Wrapper class. Use this class to access the XPath functions.
|
7
|
+
class XPath
|
8
|
+
include Functions
|
9
|
+
# A base Hash object, supposing to be used when initializing a
|
10
|
+
# default empty namespaces set, but is currently unused.
|
11
|
+
# TODO: either set the namespaces=EMPTY_HASH, or deprecate this.
|
12
|
+
EMPTY_HASH = {}
|
13
|
+
|
14
|
+
# Finds and returns the first node that matches the supplied xpath.
|
15
|
+
# element::
|
16
|
+
# The context element
|
17
|
+
# path::
|
18
|
+
# The xpath to search for. If not supplied or nil, returns the first
|
19
|
+
# node matching '*'.
|
20
|
+
# namespaces::
|
21
|
+
# If supplied, a Hash which defines a namespace mapping.
|
22
|
+
# variables::
|
23
|
+
# If supplied, a Hash which maps $variables in the query
|
24
|
+
# to values. This can be used to avoid XPath injection attacks
|
25
|
+
# or to automatically handle escaping string values.
|
26
|
+
#
|
27
|
+
# XPath.first( node )
|
28
|
+
# XPath.first( doc, "//b"} )
|
29
|
+
# XPath.first( node, "a/x:b", { "x"=>"http://doofus" } )
|
30
|
+
# XPath.first( node, '/book/publisher/text()=$publisher', {}, {"publisher"=>"O'Reilly"})
|
31
|
+
def XPath::first(element, path=nil, namespaces=nil, variables={}, options={})
|
32
|
+
raise "The namespaces argument, if supplied, must be a hash object." unless namespaces.nil? or namespaces.kind_of?(Hash)
|
33
|
+
raise "The variables argument, if supplied, must be a hash object." unless variables.kind_of?(Hash)
|
34
|
+
parser = XPathParser.new(**options)
|
35
|
+
parser.namespaces = namespaces
|
36
|
+
parser.variables = variables
|
37
|
+
path = "*" unless path
|
38
|
+
element = [element] unless element.kind_of? Array
|
39
|
+
parser.parse(path, element).flatten[0]
|
40
|
+
end
|
41
|
+
|
42
|
+
# Iterates over nodes that match the given path, calling the supplied
|
43
|
+
# block with the match.
|
44
|
+
# element::
|
45
|
+
# The context element
|
46
|
+
# path::
|
47
|
+
# The xpath to search for. If not supplied or nil, defaults to '*'
|
48
|
+
# namespaces::
|
49
|
+
# If supplied, a Hash which defines a namespace mapping
|
50
|
+
# variables::
|
51
|
+
# If supplied, a Hash which maps $variables in the query
|
52
|
+
# to values. This can be used to avoid XPath injection attacks
|
53
|
+
# or to automatically handle escaping string values.
|
54
|
+
#
|
55
|
+
# XPath.each( node ) { |el| ... }
|
56
|
+
# XPath.each( node, '/*[@attr='v']' ) { |el| ... }
|
57
|
+
# XPath.each( node, 'ancestor::x' ) { |el| ... }
|
58
|
+
# XPath.each( node, '/book/publisher/text()=$publisher', {}, {"publisher"=>"O'Reilly"}) \
|
59
|
+
# {|el| ... }
|
60
|
+
def XPath::each(element, path=nil, namespaces=nil, variables={}, options={}, &block)
|
61
|
+
raise "The namespaces argument, if supplied, must be a hash object." unless namespaces.nil? or namespaces.kind_of?(Hash)
|
62
|
+
raise "The variables argument, if supplied, must be a hash object." unless variables.kind_of?(Hash)
|
63
|
+
parser = XPathParser.new(**options)
|
64
|
+
parser.namespaces = namespaces
|
65
|
+
parser.variables = variables
|
66
|
+
path = "*" unless path
|
67
|
+
element = [element] unless element.kind_of? Array
|
68
|
+
parser.parse(path, element).each( &block )
|
69
|
+
end
|
70
|
+
|
71
|
+
# Returns an array of nodes matching a given XPath.
|
72
|
+
def XPath::match(element, path=nil, namespaces=nil, variables={}, options={})
|
73
|
+
parser = XPathParser.new(**options)
|
74
|
+
parser.namespaces = namespaces
|
75
|
+
parser.variables = variables
|
76
|
+
path = "*" unless path
|
77
|
+
element = [element] unless element.kind_of? Array
|
78
|
+
parser.parse(path,element)
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
@@ -0,0 +1,934 @@
|
|
1
|
+
# frozen_string_literal: false
|
2
|
+
require_relative 'namespace'
|
3
|
+
require_relative 'xmltokens'
|
4
|
+
require_relative 'attribute'
|
5
|
+
require_relative 'syncenumerator'
|
6
|
+
require_relative 'parsers/xpathparser'
|
7
|
+
|
8
|
+
class Object
|
9
|
+
# provides a unified +clone+ operation, for REXML::XPathParser
|
10
|
+
# to use across multiple Object types
|
11
|
+
def dclone
|
12
|
+
clone
|
13
|
+
end
|
14
|
+
end
|
15
|
+
class Symbol
|
16
|
+
# provides a unified +clone+ operation, for REXML::XPathParser
|
17
|
+
# to use across multiple Object types
|
18
|
+
def dclone ; self ; end
|
19
|
+
end
|
20
|
+
class Integer
|
21
|
+
# provides a unified +clone+ operation, for REXML::XPathParser
|
22
|
+
# to use across multiple Object types
|
23
|
+
def dclone ; self ; end
|
24
|
+
end
|
25
|
+
class Float
|
26
|
+
# provides a unified +clone+ operation, for REXML::XPathParser
|
27
|
+
# to use across multiple Object types
|
28
|
+
def dclone ; self ; end
|
29
|
+
end
|
30
|
+
class Array
|
31
|
+
# provides a unified +clone+ operation, for REXML::XPathParser
|
32
|
+
# to use across multiple Object+ types
|
33
|
+
def dclone
|
34
|
+
klone = self.clone
|
35
|
+
klone.clear
|
36
|
+
self.each{|v| klone << v.dclone}
|
37
|
+
klone
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
module REXML
|
42
|
+
# You don't want to use this class. Really. Use XPath, which is a wrapper
|
43
|
+
# for this class. Believe me. You don't want to poke around in here.
|
44
|
+
# There is strange, dark magic at work in this code. Beware. Go back! Go
|
45
|
+
# back while you still can!
|
46
|
+
class XPathParser
|
47
|
+
include XMLTokens
|
48
|
+
LITERAL = /^'([^']*)'|^"([^"]*)"/u
|
49
|
+
|
50
|
+
def initialize(strict: false)
|
51
|
+
@parser = REXML::Parsers::XPathParser.new
|
52
|
+
@namespaces = nil
|
53
|
+
@variables = {}
|
54
|
+
@nest = 0
|
55
|
+
@strict = strict
|
56
|
+
end
|
57
|
+
|
58
|
+
def namespaces=( namespaces={} )
|
59
|
+
Functions::namespace_context = namespaces
|
60
|
+
@namespaces = namespaces
|
61
|
+
end
|
62
|
+
|
63
|
+
def variables=( vars={} )
|
64
|
+
Functions::variables = vars
|
65
|
+
@variables = vars
|
66
|
+
end
|
67
|
+
|
68
|
+
def parse path, nodeset
|
69
|
+
path_stack = @parser.parse( path )
|
70
|
+
match( path_stack, nodeset )
|
71
|
+
end
|
72
|
+
|
73
|
+
def get_first path, nodeset
|
74
|
+
path_stack = @parser.parse( path )
|
75
|
+
first( path_stack, nodeset )
|
76
|
+
end
|
77
|
+
|
78
|
+
def predicate path, nodeset
|
79
|
+
path_stack = @parser.parse( path )
|
80
|
+
match( path_stack, nodeset )
|
81
|
+
end
|
82
|
+
|
83
|
+
def []=( variable_name, value )
|
84
|
+
@variables[ variable_name ] = value
|
85
|
+
end
|
86
|
+
|
87
|
+
|
88
|
+
# Performs a depth-first (document order) XPath search, and returns the
|
89
|
+
# first match. This is the fastest, lightest way to return a single result.
|
90
|
+
#
|
91
|
+
# FIXME: This method is incomplete!
|
92
|
+
def first( path_stack, node )
|
93
|
+
return nil if path.size == 0
|
94
|
+
|
95
|
+
case path[0]
|
96
|
+
when :document
|
97
|
+
# do nothing
|
98
|
+
return first( path[1..-1], node )
|
99
|
+
when :child
|
100
|
+
for c in node.children
|
101
|
+
r = first( path[1..-1], c )
|
102
|
+
return r if r
|
103
|
+
end
|
104
|
+
when :qname
|
105
|
+
name = path[2]
|
106
|
+
if node.name == name
|
107
|
+
return node if path.size == 3
|
108
|
+
return first( path[3..-1], node )
|
109
|
+
else
|
110
|
+
return nil
|
111
|
+
end
|
112
|
+
when :descendant_or_self
|
113
|
+
r = first( path[1..-1], node )
|
114
|
+
return r if r
|
115
|
+
for c in node.children
|
116
|
+
r = first( path, c )
|
117
|
+
return r if r
|
118
|
+
end
|
119
|
+
when :node
|
120
|
+
return first( path[1..-1], node )
|
121
|
+
when :any
|
122
|
+
return first( path[1..-1], node )
|
123
|
+
end
|
124
|
+
return nil
|
125
|
+
end
|
126
|
+
|
127
|
+
|
128
|
+
def match(path_stack, nodeset)
|
129
|
+
nodeset = nodeset.collect.with_index do |node, i|
|
130
|
+
position = i + 1
|
131
|
+
XPathNode.new(node, position: position)
|
132
|
+
end
|
133
|
+
result = expr(path_stack, nodeset)
|
134
|
+
case result
|
135
|
+
when Array # nodeset
|
136
|
+
unnode(result)
|
137
|
+
else
|
138
|
+
result
|
139
|
+
end
|
140
|
+
end
|
141
|
+
|
142
|
+
private
|
143
|
+
def strict?
|
144
|
+
@strict
|
145
|
+
end
|
146
|
+
|
147
|
+
# Returns a String namespace for a node, given a prefix
|
148
|
+
# The rules are:
|
149
|
+
#
|
150
|
+
# 1. Use the supplied namespace mapping first.
|
151
|
+
# 2. If no mapping was supplied, use the context node to look up the namespace
|
152
|
+
def get_namespace( node, prefix )
|
153
|
+
if @namespaces
|
154
|
+
return @namespaces[prefix] || ''
|
155
|
+
else
|
156
|
+
return node.namespace( prefix ) if node.node_type == :element
|
157
|
+
return ''
|
158
|
+
end
|
159
|
+
end
|
160
|
+
|
161
|
+
|
162
|
+
# Expr takes a stack of path elements and a set of nodes (either a Parent
|
163
|
+
# or an Array and returns an Array of matching nodes
|
164
|
+
def expr( path_stack, nodeset, context=nil )
|
165
|
+
# enter(:expr, path_stack, nodeset)
|
166
|
+
return nodeset if path_stack.length == 0 || nodeset.length == 0
|
167
|
+
while path_stack.length > 0
|
168
|
+
# trace(:while, path_stack, nodeset)
|
169
|
+
if nodeset.length == 0
|
170
|
+
path_stack.clear
|
171
|
+
return []
|
172
|
+
end
|
173
|
+
op = path_stack.shift
|
174
|
+
case op
|
175
|
+
when :document
|
176
|
+
first_raw_node = nodeset.first.raw_node
|
177
|
+
nodeset = [XPathNode.new(first_raw_node.root_node, position: 1)]
|
178
|
+
when :self
|
179
|
+
nodeset = step(path_stack) do
|
180
|
+
[nodeset]
|
181
|
+
end
|
182
|
+
when :child
|
183
|
+
nodeset = step(path_stack) do
|
184
|
+
child(nodeset)
|
185
|
+
end
|
186
|
+
when :literal
|
187
|
+
# trace(:literal, path_stack, nodeset)
|
188
|
+
return path_stack.shift
|
189
|
+
when :attribute
|
190
|
+
nodeset = step(path_stack, any_type: :attribute) do
|
191
|
+
nodesets = []
|
192
|
+
nodeset.each do |node|
|
193
|
+
raw_node = node.raw_node
|
194
|
+
next unless raw_node.node_type == :element
|
195
|
+
attributes = raw_node.attributes
|
196
|
+
next if attributes.empty?
|
197
|
+
nodesets << attributes.each_attribute.collect.with_index do |attribute, i|
|
198
|
+
XPathNode.new(attribute, position: i + 1)
|
199
|
+
end
|
200
|
+
end
|
201
|
+
nodesets
|
202
|
+
end
|
203
|
+
when :namespace
|
204
|
+
pre_defined_namespaces = {
|
205
|
+
"xml" => "http://www.w3.org/XML/1998/namespace",
|
206
|
+
}
|
207
|
+
nodeset = step(path_stack, any_type: :namespace) do
|
208
|
+
nodesets = []
|
209
|
+
nodeset.each do |node|
|
210
|
+
raw_node = node.raw_node
|
211
|
+
case raw_node.node_type
|
212
|
+
when :element
|
213
|
+
if @namespaces
|
214
|
+
nodesets << pre_defined_namespaces.merge(@namespaces)
|
215
|
+
else
|
216
|
+
nodesets << pre_defined_namespaces.merge(raw_node.namespaces)
|
217
|
+
end
|
218
|
+
when :attribute
|
219
|
+
if @namespaces
|
220
|
+
nodesets << pre_defined_namespaces.merge(@namespaces)
|
221
|
+
else
|
222
|
+
nodesets << pre_defined_namespaces.merge(raw_node.element.namespaces)
|
223
|
+
end
|
224
|
+
end
|
225
|
+
end
|
226
|
+
nodesets
|
227
|
+
end
|
228
|
+
when :parent
|
229
|
+
nodeset = step(path_stack) do
|
230
|
+
nodesets = []
|
231
|
+
nodeset.each do |node|
|
232
|
+
raw_node = node.raw_node
|
233
|
+
if raw_node.node_type == :attribute
|
234
|
+
parent = raw_node.element
|
235
|
+
else
|
236
|
+
parent = raw_node.parent
|
237
|
+
end
|
238
|
+
nodesets << [XPathNode.new(parent, position: 1)] if parent
|
239
|
+
end
|
240
|
+
nodesets
|
241
|
+
end
|
242
|
+
when :ancestor
|
243
|
+
nodeset = step(path_stack) do
|
244
|
+
nodesets = []
|
245
|
+
# new_nodes = {}
|
246
|
+
nodeset.each do |node|
|
247
|
+
raw_node = node.raw_node
|
248
|
+
new_nodeset = []
|
249
|
+
while raw_node.parent
|
250
|
+
raw_node = raw_node.parent
|
251
|
+
# next if new_nodes.key?(node)
|
252
|
+
new_nodeset << XPathNode.new(raw_node,
|
253
|
+
position: new_nodeset.size + 1)
|
254
|
+
# new_nodes[node] = true
|
255
|
+
end
|
256
|
+
nodesets << new_nodeset unless new_nodeset.empty?
|
257
|
+
end
|
258
|
+
nodesets
|
259
|
+
end
|
260
|
+
when :ancestor_or_self
|
261
|
+
nodeset = step(path_stack) do
|
262
|
+
nodesets = []
|
263
|
+
# new_nodes = {}
|
264
|
+
nodeset.each do |node|
|
265
|
+
raw_node = node.raw_node
|
266
|
+
next unless raw_node.node_type == :element
|
267
|
+
new_nodeset = [XPathNode.new(raw_node, position: 1)]
|
268
|
+
# new_nodes[node] = true
|
269
|
+
while raw_node.parent
|
270
|
+
raw_node = raw_node.parent
|
271
|
+
# next if new_nodes.key?(node)
|
272
|
+
new_nodeset << XPathNode.new(raw_node,
|
273
|
+
position: new_nodeset.size + 1)
|
274
|
+
# new_nodes[node] = true
|
275
|
+
end
|
276
|
+
nodesets << new_nodeset unless new_nodeset.empty?
|
277
|
+
end
|
278
|
+
nodesets
|
279
|
+
end
|
280
|
+
when :descendant_or_self
|
281
|
+
nodeset = step(path_stack) do
|
282
|
+
descendant(nodeset, true)
|
283
|
+
end
|
284
|
+
when :descendant
|
285
|
+
nodeset = step(path_stack) do
|
286
|
+
descendant(nodeset, false)
|
287
|
+
end
|
288
|
+
when :following_sibling
|
289
|
+
nodeset = step(path_stack) do
|
290
|
+
nodesets = []
|
291
|
+
nodeset.each do |node|
|
292
|
+
raw_node = node.raw_node
|
293
|
+
next unless raw_node.respond_to?(:parent)
|
294
|
+
next if raw_node.parent.nil?
|
295
|
+
all_siblings = raw_node.parent.children
|
296
|
+
current_index = all_siblings.index(raw_node)
|
297
|
+
following_siblings = all_siblings[(current_index + 1)..-1]
|
298
|
+
next if following_siblings.empty?
|
299
|
+
nodesets << following_siblings.collect.with_index do |sibling, i|
|
300
|
+
XPathNode.new(sibling, position: i + 1)
|
301
|
+
end
|
302
|
+
end
|
303
|
+
nodesets
|
304
|
+
end
|
305
|
+
when :preceding_sibling
|
306
|
+
nodeset = step(path_stack, order: :reverse) do
|
307
|
+
nodesets = []
|
308
|
+
nodeset.each do |node|
|
309
|
+
raw_node = node.raw_node
|
310
|
+
next unless raw_node.respond_to?(:parent)
|
311
|
+
next if raw_node.parent.nil?
|
312
|
+
all_siblings = raw_node.parent.children
|
313
|
+
current_index = all_siblings.index(raw_node)
|
314
|
+
preceding_siblings = all_siblings[0, current_index].reverse
|
315
|
+
next if preceding_siblings.empty?
|
316
|
+
nodesets << preceding_siblings.collect.with_index do |sibling, i|
|
317
|
+
XPathNode.new(sibling, position: i + 1)
|
318
|
+
end
|
319
|
+
end
|
320
|
+
nodesets
|
321
|
+
end
|
322
|
+
when :preceding
|
323
|
+
nodeset = step(path_stack, order: :reverse) do
|
324
|
+
unnode(nodeset) do |node|
|
325
|
+
preceding(node)
|
326
|
+
end
|
327
|
+
end
|
328
|
+
when :following
|
329
|
+
nodeset = step(path_stack) do
|
330
|
+
unnode(nodeset) do |node|
|
331
|
+
following(node)
|
332
|
+
end
|
333
|
+
end
|
334
|
+
when :variable
|
335
|
+
var_name = path_stack.shift
|
336
|
+
return [@variables[var_name]]
|
337
|
+
|
338
|
+
# :and, :or, :eq, :neq, :lt, :lteq, :gt, :gteq
|
339
|
+
# TODO: Special case for :or and :and -- not evaluate the right
|
340
|
+
# operand if the left alone determines result (i.e. is true for
|
341
|
+
# :or and false for :and).
|
342
|
+
when :eq, :neq, :lt, :lteq, :gt, :gteq, :or
|
343
|
+
left = expr( path_stack.shift, nodeset.dup, context )
|
344
|
+
right = expr( path_stack.shift, nodeset.dup, context )
|
345
|
+
res = equality_relational_compare( left, op, right )
|
346
|
+
# trace(op, left, right, res)
|
347
|
+
return res
|
348
|
+
|
349
|
+
when :and
|
350
|
+
left = expr( path_stack.shift, nodeset.dup, context )
|
351
|
+
return [] unless left
|
352
|
+
if left.respond_to?(:inject) and !left.inject(false) {|a,b| a | b}
|
353
|
+
return []
|
354
|
+
end
|
355
|
+
right = expr( path_stack.shift, nodeset.dup, context )
|
356
|
+
res = equality_relational_compare( left, op, right )
|
357
|
+
return res
|
358
|
+
|
359
|
+
when :div, :mod, :mult, :plus, :minus
|
360
|
+
left = expr(path_stack.shift, nodeset, context)
|
361
|
+
right = expr(path_stack.shift, nodeset, context)
|
362
|
+
left = unnode(left) if left.is_a?(Array)
|
363
|
+
right = unnode(right) if right.is_a?(Array)
|
364
|
+
left = Functions::number(left)
|
365
|
+
right = Functions::number(right)
|
366
|
+
case op
|
367
|
+
when :div
|
368
|
+
return left / right
|
369
|
+
when :mod
|
370
|
+
return left % right
|
371
|
+
when :mult
|
372
|
+
return left * right
|
373
|
+
when :plus
|
374
|
+
return left + right
|
375
|
+
when :minus
|
376
|
+
return left - right
|
377
|
+
else
|
378
|
+
raise "[BUG] Unexpected operator: <#{op.inspect}>"
|
379
|
+
end
|
380
|
+
when :union
|
381
|
+
left = expr( path_stack.shift, nodeset, context )
|
382
|
+
right = expr( path_stack.shift, nodeset, context )
|
383
|
+
left = unnode(left) if left.is_a?(Array)
|
384
|
+
right = unnode(right) if right.is_a?(Array)
|
385
|
+
return (left | right)
|
386
|
+
when :neg
|
387
|
+
res = expr( path_stack, nodeset, context )
|
388
|
+
res = unnode(res) if res.is_a?(Array)
|
389
|
+
return -Functions.number(res)
|
390
|
+
when :not
|
391
|
+
when :function
|
392
|
+
func_name = path_stack.shift.tr('-','_')
|
393
|
+
arguments = path_stack.shift
|
394
|
+
subcontext = context ? nil : { :size => nodeset.size }
|
395
|
+
|
396
|
+
res = []
|
397
|
+
cont = context
|
398
|
+
nodeset.each_with_index do |node, i|
|
399
|
+
if subcontext
|
400
|
+
if node.is_a?(XPathNode)
|
401
|
+
subcontext[:node] = node.raw_node
|
402
|
+
subcontext[:index] = node.position
|
403
|
+
else
|
404
|
+
subcontext[:node] = node
|
405
|
+
subcontext[:index] = i
|
406
|
+
end
|
407
|
+
cont = subcontext
|
408
|
+
end
|
409
|
+
arg_clone = arguments.dclone
|
410
|
+
args = arg_clone.collect do |arg|
|
411
|
+
result = expr( arg, [node], cont )
|
412
|
+
result = unnode(result) if result.is_a?(Array)
|
413
|
+
result
|
414
|
+
end
|
415
|
+
Functions.context = cont
|
416
|
+
res << Functions.send( func_name, *args )
|
417
|
+
end
|
418
|
+
return res
|
419
|
+
|
420
|
+
else
|
421
|
+
raise "[BUG] Unexpected path: <#{op.inspect}>: <#{path_stack.inspect}>"
|
422
|
+
end
|
423
|
+
end # while
|
424
|
+
return nodeset
|
425
|
+
# ensure
|
426
|
+
# leave(:expr, path_stack, nodeset)
|
427
|
+
end
|
428
|
+
|
429
|
+
def step(path_stack, any_type: :element, order: :forward)
|
430
|
+
nodesets = yield
|
431
|
+
begin
|
432
|
+
# enter(:step, path_stack, nodesets)
|
433
|
+
nodesets = node_test(path_stack, nodesets, any_type: any_type)
|
434
|
+
while path_stack[0] == :predicate
|
435
|
+
path_stack.shift # :predicate
|
436
|
+
predicate_expression = path_stack.shift.dclone
|
437
|
+
nodesets = evaluate_predicate(predicate_expression, nodesets)
|
438
|
+
end
|
439
|
+
if nodesets.size == 1
|
440
|
+
ordered_nodeset = nodesets[0]
|
441
|
+
else
|
442
|
+
raw_nodes = []
|
443
|
+
nodesets.each do |nodeset|
|
444
|
+
nodeset.each do |node|
|
445
|
+
if node.respond_to?(:raw_node)
|
446
|
+
raw_nodes << node.raw_node
|
447
|
+
else
|
448
|
+
raw_nodes << node
|
449
|
+
end
|
450
|
+
end
|
451
|
+
end
|
452
|
+
ordered_nodeset = sort(raw_nodes, order)
|
453
|
+
end
|
454
|
+
new_nodeset = []
|
455
|
+
ordered_nodeset.each do |node|
|
456
|
+
# TODO: Remove duplicated
|
457
|
+
new_nodeset << XPathNode.new(node, position: new_nodeset.size + 1)
|
458
|
+
end
|
459
|
+
new_nodeset
|
460
|
+
# ensure
|
461
|
+
# leave(:step, path_stack, new_nodeset)
|
462
|
+
end
|
463
|
+
end
|
464
|
+
|
465
|
+
def node_test(path_stack, nodesets, any_type: :element)
|
466
|
+
# enter(:node_test, path_stack, nodesets)
|
467
|
+
operator = path_stack.shift
|
468
|
+
case operator
|
469
|
+
when :qname
|
470
|
+
prefix = path_stack.shift
|
471
|
+
name = path_stack.shift
|
472
|
+
new_nodesets = nodesets.collect do |nodeset|
|
473
|
+
filter_nodeset(nodeset) do |node|
|
474
|
+
raw_node = node.raw_node
|
475
|
+
case raw_node.node_type
|
476
|
+
when :element
|
477
|
+
if prefix.nil?
|
478
|
+
raw_node.name == name
|
479
|
+
elsif prefix.empty?
|
480
|
+
if strict?
|
481
|
+
raw_node.name == name and raw_node.namespace == ""
|
482
|
+
else
|
483
|
+
# FIXME: This DOUBLES the time XPath searches take
|
484
|
+
ns = get_namespace(raw_node, prefix)
|
485
|
+
raw_node.name == name and raw_node.namespace == ns
|
486
|
+
end
|
487
|
+
else
|
488
|
+
# FIXME: This DOUBLES the time XPath searches take
|
489
|
+
ns = get_namespace(raw_node, prefix)
|
490
|
+
raw_node.name == name and raw_node.namespace == ns
|
491
|
+
end
|
492
|
+
when :attribute
|
493
|
+
if prefix.nil?
|
494
|
+
raw_node.name == name
|
495
|
+
elsif prefix.empty?
|
496
|
+
# FIXME: This DOUBLES the time XPath searches take
|
497
|
+
raw_node.name == name and
|
498
|
+
raw_node.namespace == raw_node.element.namespace
|
499
|
+
else
|
500
|
+
# FIXME: This DOUBLES the time XPath searches take
|
501
|
+
ns = get_namespace(raw_node.element, prefix)
|
502
|
+
raw_node.name == name and raw_node.namespace == ns
|
503
|
+
end
|
504
|
+
else
|
505
|
+
false
|
506
|
+
end
|
507
|
+
end
|
508
|
+
end
|
509
|
+
when :namespace
|
510
|
+
prefix = path_stack.shift
|
511
|
+
new_nodesets = nodesets.collect do |nodeset|
|
512
|
+
filter_nodeset(nodeset) do |node|
|
513
|
+
raw_node = node.raw_node
|
514
|
+
case raw_node.node_type
|
515
|
+
when :element
|
516
|
+
namespaces = @namespaces || raw_node.namespaces
|
517
|
+
raw_node.namespace == namespaces[prefix]
|
518
|
+
when :attribute
|
519
|
+
namespaces = @namespaces || raw_node.element.namespaces
|
520
|
+
raw_node.namespace == namespaces[prefix]
|
521
|
+
else
|
522
|
+
false
|
523
|
+
end
|
524
|
+
end
|
525
|
+
end
|
526
|
+
when :any
|
527
|
+
new_nodesets = nodesets.collect do |nodeset|
|
528
|
+
filter_nodeset(nodeset) do |node|
|
529
|
+
raw_node = node.raw_node
|
530
|
+
raw_node.node_type == any_type
|
531
|
+
end
|
532
|
+
end
|
533
|
+
when :comment
|
534
|
+
new_nodesets = nodesets.collect do |nodeset|
|
535
|
+
filter_nodeset(nodeset) do |node|
|
536
|
+
raw_node = node.raw_node
|
537
|
+
raw_node.node_type == :comment
|
538
|
+
end
|
539
|
+
end
|
540
|
+
when :text
|
541
|
+
new_nodesets = nodesets.collect do |nodeset|
|
542
|
+
filter_nodeset(nodeset) do |node|
|
543
|
+
raw_node = node.raw_node
|
544
|
+
raw_node.node_type == :text
|
545
|
+
end
|
546
|
+
end
|
547
|
+
when :processing_instruction
|
548
|
+
target = path_stack.shift
|
549
|
+
new_nodesets = nodesets.collect do |nodeset|
|
550
|
+
filter_nodeset(nodeset) do |node|
|
551
|
+
raw_node = node.raw_node
|
552
|
+
(raw_node.node_type == :processing_instruction) and
|
553
|
+
(target.empty? or (raw_node.target == target))
|
554
|
+
end
|
555
|
+
end
|
556
|
+
when :node
|
557
|
+
new_nodesets = nodesets.collect do |nodeset|
|
558
|
+
filter_nodeset(nodeset) do |node|
|
559
|
+
true
|
560
|
+
end
|
561
|
+
end
|
562
|
+
else
|
563
|
+
message = "[BUG] Unexpected node test: " +
|
564
|
+
"<#{operator.inspect}>: <#{path_stack.inspect}>"
|
565
|
+
raise message
|
566
|
+
end
|
567
|
+
new_nodesets
|
568
|
+
# ensure
|
569
|
+
# leave(:node_test, path_stack, new_nodesets)
|
570
|
+
end
|
571
|
+
|
572
|
+
def filter_nodeset(nodeset)
|
573
|
+
new_nodeset = []
|
574
|
+
nodeset.each do |node|
|
575
|
+
next unless yield(node)
|
576
|
+
new_nodeset << XPathNode.new(node, position: new_nodeset.size + 1)
|
577
|
+
end
|
578
|
+
new_nodeset
|
579
|
+
end
|
580
|
+
|
581
|
+
def evaluate_predicate(expression, nodesets)
|
582
|
+
# enter(:predicate, expression, nodesets)
|
583
|
+
new_nodesets = nodesets.collect do |nodeset|
|
584
|
+
new_nodeset = []
|
585
|
+
subcontext = { :size => nodeset.size }
|
586
|
+
nodeset.each_with_index do |node, index|
|
587
|
+
if node.is_a?(XPathNode)
|
588
|
+
subcontext[:node] = node.raw_node
|
589
|
+
subcontext[:index] = node.position
|
590
|
+
else
|
591
|
+
subcontext[:node] = node
|
592
|
+
subcontext[:index] = index + 1
|
593
|
+
end
|
594
|
+
result = expr(expression.dclone, [node], subcontext)
|
595
|
+
# trace(:predicate_evaluate, expression, node, subcontext, result)
|
596
|
+
result = result[0] if result.kind_of? Array and result.length == 1
|
597
|
+
if result.kind_of? Numeric
|
598
|
+
if result == node.position
|
599
|
+
new_nodeset << XPathNode.new(node, position: new_nodeset.size + 1)
|
600
|
+
end
|
601
|
+
elsif result.instance_of? Array
|
602
|
+
if result.size > 0 and result.inject(false) {|k,s| s or k}
|
603
|
+
if result.size > 0
|
604
|
+
new_nodeset << XPathNode.new(node, position: new_nodeset.size + 1)
|
605
|
+
end
|
606
|
+
end
|
607
|
+
else
|
608
|
+
if result
|
609
|
+
new_nodeset << XPathNode.new(node, position: new_nodeset.size + 1)
|
610
|
+
end
|
611
|
+
end
|
612
|
+
end
|
613
|
+
new_nodeset
|
614
|
+
end
|
615
|
+
new_nodesets
|
616
|
+
# ensure
|
617
|
+
# leave(:predicate, new_nodesets)
|
618
|
+
end
|
619
|
+
|
620
|
+
def trace(*args)
|
621
|
+
indent = " " * @nest
|
622
|
+
puts("#{indent}#{args.inspect}")
|
623
|
+
end
|
624
|
+
|
625
|
+
def enter(tag, *args)
|
626
|
+
trace(:enter, tag, *args)
|
627
|
+
@nest += 1
|
628
|
+
end
|
629
|
+
|
630
|
+
def leave(tag, *args)
|
631
|
+
@nest -= 1
|
632
|
+
trace(:leave, tag, *args)
|
633
|
+
end
|
634
|
+
|
635
|
+
# Reorders an array of nodes so that they are in document order
|
636
|
+
# It tries to do this efficiently.
|
637
|
+
#
|
638
|
+
# FIXME: I need to get rid of this, but the issue is that most of the XPath
|
639
|
+
# interpreter functions as a filter, which means that we lose context going
|
640
|
+
# in and out of function calls. If I knew what the index of the nodes was,
|
641
|
+
# I wouldn't have to do this. Maybe add a document IDX for each node?
|
642
|
+
# Problems with mutable documents. Or, rewrite everything.
|
643
|
+
def sort(array_of_nodes, order)
|
644
|
+
new_arry = []
|
645
|
+
array_of_nodes.each { |node|
|
646
|
+
node_idx = []
|
647
|
+
np = node.node_type == :attribute ? node.element : node
|
648
|
+
while np.parent and np.parent.node_type == :element
|
649
|
+
node_idx << np.parent.index( np )
|
650
|
+
np = np.parent
|
651
|
+
end
|
652
|
+
new_arry << [ node_idx.reverse, node ]
|
653
|
+
}
|
654
|
+
ordered = new_arry.sort_by do |index, node|
|
655
|
+
if order == :forward
|
656
|
+
index
|
657
|
+
else
|
658
|
+
-index
|
659
|
+
end
|
660
|
+
end
|
661
|
+
ordered.collect do |_index, node|
|
662
|
+
node
|
663
|
+
end
|
664
|
+
end
|
665
|
+
|
666
|
+
def descendant(nodeset, include_self)
|
667
|
+
nodesets = []
|
668
|
+
nodeset.each do |node|
|
669
|
+
new_nodeset = []
|
670
|
+
new_nodes = {}
|
671
|
+
descendant_recursive(node.raw_node, new_nodeset, new_nodes, include_self)
|
672
|
+
nodesets << new_nodeset unless new_nodeset.empty?
|
673
|
+
end
|
674
|
+
nodesets
|
675
|
+
end
|
676
|
+
|
677
|
+
def descendant_recursive(raw_node, new_nodeset, new_nodes, include_self)
|
678
|
+
if include_self
|
679
|
+
return if new_nodes.key?(raw_node)
|
680
|
+
new_nodeset << XPathNode.new(raw_node, position: new_nodeset.size + 1)
|
681
|
+
new_nodes[raw_node] = true
|
682
|
+
end
|
683
|
+
|
684
|
+
node_type = raw_node.node_type
|
685
|
+
if node_type == :element or node_type == :document
|
686
|
+
raw_node.children.each do |child|
|
687
|
+
descendant_recursive(child, new_nodeset, new_nodes, true)
|
688
|
+
end
|
689
|
+
end
|
690
|
+
end
|
691
|
+
|
692
|
+
# Builds a nodeset of all of the preceding nodes of the supplied node,
|
693
|
+
# in reverse document order
|
694
|
+
# preceding:: includes every element in the document that precedes this node,
|
695
|
+
# except for ancestors
|
696
|
+
def preceding(node)
|
697
|
+
ancestors = []
|
698
|
+
parent = node.parent
|
699
|
+
while parent
|
700
|
+
ancestors << parent
|
701
|
+
parent = parent.parent
|
702
|
+
end
|
703
|
+
|
704
|
+
precedings = []
|
705
|
+
preceding_node = preceding_node_of(node)
|
706
|
+
while preceding_node
|
707
|
+
if ancestors.include?(preceding_node)
|
708
|
+
ancestors.delete(preceding_node)
|
709
|
+
else
|
710
|
+
precedings << XPathNode.new(preceding_node,
|
711
|
+
position: precedings.size + 1)
|
712
|
+
end
|
713
|
+
preceding_node = preceding_node_of(preceding_node)
|
714
|
+
end
|
715
|
+
precedings
|
716
|
+
end
|
717
|
+
|
718
|
+
def preceding_node_of( node )
|
719
|
+
psn = node.previous_sibling_node
|
720
|
+
if psn.nil?
|
721
|
+
if node.parent.nil? or node.parent.class == Document
|
722
|
+
return nil
|
723
|
+
end
|
724
|
+
return node.parent
|
725
|
+
#psn = preceding_node_of( node.parent )
|
726
|
+
end
|
727
|
+
while psn and psn.kind_of? Element and psn.children.size > 0
|
728
|
+
psn = psn.children[-1]
|
729
|
+
end
|
730
|
+
psn
|
731
|
+
end
|
732
|
+
|
733
|
+
def following(node)
|
734
|
+
followings = []
|
735
|
+
following_node = next_sibling_node(node)
|
736
|
+
while following_node
|
737
|
+
followings << XPathNode.new(following_node,
|
738
|
+
position: followings.size + 1)
|
739
|
+
following_node = following_node_of(following_node)
|
740
|
+
end
|
741
|
+
followings
|
742
|
+
end
|
743
|
+
|
744
|
+
def following_node_of( node )
|
745
|
+
if node.kind_of? Element and node.children.size > 0
|
746
|
+
return node.children[0]
|
747
|
+
end
|
748
|
+
return next_sibling_node(node)
|
749
|
+
end
|
750
|
+
|
751
|
+
def next_sibling_node(node)
|
752
|
+
psn = node.next_sibling_node
|
753
|
+
while psn.nil?
|
754
|
+
if node.parent.nil? or node.parent.class == Document
|
755
|
+
return nil
|
756
|
+
end
|
757
|
+
node = node.parent
|
758
|
+
psn = node.next_sibling_node
|
759
|
+
end
|
760
|
+
return psn
|
761
|
+
end
|
762
|
+
|
763
|
+
def child(nodeset)
|
764
|
+
nodesets = []
|
765
|
+
nodeset.each do |node|
|
766
|
+
raw_node = node.raw_node
|
767
|
+
node_type = raw_node.node_type
|
768
|
+
# trace(:child, node_type, node)
|
769
|
+
case node_type
|
770
|
+
when :element
|
771
|
+
nodesets << raw_node.children.collect.with_index do |child_node, i|
|
772
|
+
XPathNode.new(child_node, position: i + 1)
|
773
|
+
end
|
774
|
+
when :document
|
775
|
+
new_nodeset = []
|
776
|
+
raw_node.children.each do |child|
|
777
|
+
case child
|
778
|
+
when XMLDecl, Text
|
779
|
+
# Ignore
|
780
|
+
else
|
781
|
+
new_nodeset << XPathNode.new(child, position: new_nodeset.size + 1)
|
782
|
+
end
|
783
|
+
end
|
784
|
+
nodesets << new_nodeset unless new_nodeset.empty?
|
785
|
+
end
|
786
|
+
end
|
787
|
+
nodesets
|
788
|
+
end
|
789
|
+
|
790
|
+
def norm b
|
791
|
+
case b
|
792
|
+
when true, false
|
793
|
+
return b
|
794
|
+
when 'true', 'false'
|
795
|
+
return Functions::boolean( b )
|
796
|
+
when /^\d+(\.\d+)?$/, Numeric
|
797
|
+
return Functions::number( b )
|
798
|
+
else
|
799
|
+
return Functions::string( b )
|
800
|
+
end
|
801
|
+
end
|
802
|
+
|
803
|
+
def equality_relational_compare( set1, op, set2 )
|
804
|
+
set1 = unnode(set1) if set1.is_a?(Array)
|
805
|
+
set2 = unnode(set2) if set2.is_a?(Array)
|
806
|
+
if set1.kind_of? Array and set2.kind_of? Array
|
807
|
+
if set1.size == 0 or set2.size == 0
|
808
|
+
nd = set1.size==0 ? set2 : set1
|
809
|
+
rv = nd.collect { |il| compare( il, op, nil ) }
|
810
|
+
return rv
|
811
|
+
else
|
812
|
+
res = []
|
813
|
+
SyncEnumerator.new( set1, set2 ).each { |i1, i2|
|
814
|
+
i1 = norm( i1 )
|
815
|
+
i2 = norm( i2 )
|
816
|
+
res << compare( i1, op, i2 )
|
817
|
+
}
|
818
|
+
return res
|
819
|
+
end
|
820
|
+
end
|
821
|
+
# If one is nodeset and other is number, compare number to each item
|
822
|
+
# in nodeset s.t. number op number(string(item))
|
823
|
+
# If one is nodeset and other is string, compare string to each item
|
824
|
+
# in nodeset s.t. string op string(item)
|
825
|
+
# If one is nodeset and other is boolean, compare boolean to each item
|
826
|
+
# in nodeset s.t. boolean op boolean(item)
|
827
|
+
if set1.kind_of? Array or set2.kind_of? Array
|
828
|
+
if set1.kind_of? Array
|
829
|
+
a = set1
|
830
|
+
b = set2
|
831
|
+
else
|
832
|
+
a = set2
|
833
|
+
b = set1
|
834
|
+
end
|
835
|
+
|
836
|
+
case b
|
837
|
+
when true, false
|
838
|
+
return unnode(a) {|v| compare( Functions::boolean(v), op, b ) }
|
839
|
+
when Numeric
|
840
|
+
return unnode(a) {|v| compare( Functions::number(v), op, b )}
|
841
|
+
when /^\d+(\.\d+)?$/
|
842
|
+
b = Functions::number( b )
|
843
|
+
return unnode(a) {|v| compare( Functions::number(v), op, b )}
|
844
|
+
else
|
845
|
+
b = Functions::string( b )
|
846
|
+
return unnode(a) { |v| compare( Functions::string(v), op, b ) }
|
847
|
+
end
|
848
|
+
else
|
849
|
+
# If neither is nodeset,
|
850
|
+
# If op is = or !=
|
851
|
+
# If either boolean, convert to boolean
|
852
|
+
# If either number, convert to number
|
853
|
+
# Else, convert to string
|
854
|
+
# Else
|
855
|
+
# Convert both to numbers and compare
|
856
|
+
set1 = unnode(set1) if set1.is_a?(Array)
|
857
|
+
set2 = unnode(set2) if set2.is_a?(Array)
|
858
|
+
s1 = Functions.string(set1)
|
859
|
+
s2 = Functions.string(set2)
|
860
|
+
if s1 == 'true' or s1 == 'false' or s2 == 'true' or s2 == 'false'
|
861
|
+
set1 = Functions::boolean( set1 )
|
862
|
+
set2 = Functions::boolean( set2 )
|
863
|
+
else
|
864
|
+
if op == :eq or op == :neq
|
865
|
+
if s1 =~ /^\d+(\.\d+)?$/ or s2 =~ /^\d+(\.\d+)?$/
|
866
|
+
set1 = Functions::number( s1 )
|
867
|
+
set2 = Functions::number( s2 )
|
868
|
+
else
|
869
|
+
set1 = Functions::string( set1 )
|
870
|
+
set2 = Functions::string( set2 )
|
871
|
+
end
|
872
|
+
else
|
873
|
+
set1 = Functions::number( set1 )
|
874
|
+
set2 = Functions::number( set2 )
|
875
|
+
end
|
876
|
+
end
|
877
|
+
return compare( set1, op, set2 )
|
878
|
+
end
|
879
|
+
return false
|
880
|
+
end
|
881
|
+
|
882
|
+
def compare a, op, b
|
883
|
+
case op
|
884
|
+
when :eq
|
885
|
+
a == b
|
886
|
+
when :neq
|
887
|
+
a != b
|
888
|
+
when :lt
|
889
|
+
a < b
|
890
|
+
when :lteq
|
891
|
+
a <= b
|
892
|
+
when :gt
|
893
|
+
a > b
|
894
|
+
when :gteq
|
895
|
+
a >= b
|
896
|
+
when :and
|
897
|
+
a and b
|
898
|
+
when :or
|
899
|
+
a or b
|
900
|
+
else
|
901
|
+
false
|
902
|
+
end
|
903
|
+
end
|
904
|
+
|
905
|
+
def unnode(nodeset)
|
906
|
+
nodeset.collect do |node|
|
907
|
+
if node.is_a?(XPathNode)
|
908
|
+
unnoded = node.raw_node
|
909
|
+
else
|
910
|
+
unnoded = node
|
911
|
+
end
|
912
|
+
unnoded = yield(unnoded) if block_given?
|
913
|
+
unnoded
|
914
|
+
end
|
915
|
+
end
|
916
|
+
end
|
917
|
+
|
918
|
+
# @private
|
919
|
+
class XPathNode
|
920
|
+
attr_reader :raw_node, :context
|
921
|
+
def initialize(node, context=nil)
|
922
|
+
if node.is_a?(XPathNode)
|
923
|
+
@raw_node = node.raw_node
|
924
|
+
else
|
925
|
+
@raw_node = node
|
926
|
+
end
|
927
|
+
@context = context || {}
|
928
|
+
end
|
929
|
+
|
930
|
+
def position
|
931
|
+
@context[:position]
|
932
|
+
end
|
933
|
+
end
|
934
|
+
end
|