arboretum 0.0.3 → 0.0.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/arboretum/doctree.rb +1566 -0
- data/lib/arboretum/scandent.rb +882 -0
- data/lib/arboretum/xml.rb +169 -0
- metadata +4 -1
@@ -0,0 +1,882 @@
|
|
1
|
+
module Arboretum
|
2
|
+
module Scandent
|
3
|
+
|
4
|
+
class TokenizationException < StandardError
|
5
|
+
def initialize(msg="An error occurred while tokenizing input")
|
6
|
+
super(msg)
|
7
|
+
end
|
8
|
+
end
|
9
|
+
|
10
|
+
class ParseException < StandardError
|
11
|
+
def initialize(msg="An error occurred while parsing input")
|
12
|
+
super(msg)
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
class InvalidExpressionException < StandardError
|
17
|
+
def initialize(msg="Invalid Scandent expression")
|
18
|
+
super(msg)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
class ScandentRule
|
23
|
+
attr_accessor :paths
|
24
|
+
|
25
|
+
def initialize(rule_paths)
|
26
|
+
@paths = rule_paths
|
27
|
+
end
|
28
|
+
|
29
|
+
def valid_on?(element)
|
30
|
+
@paths.each {|path| return true if path.valid_on?(element)}
|
31
|
+
return false
|
32
|
+
end
|
33
|
+
alias_method :selects?, :valid_on?
|
34
|
+
alias_method :matches?, :valid_on?
|
35
|
+
|
36
|
+
def to_s
|
37
|
+
rule_str = ''
|
38
|
+
@paths.each do |path|
|
39
|
+
rule_str << ', ' if !rule_str.empty?
|
40
|
+
rule_str << path.to_s
|
41
|
+
end
|
42
|
+
rule_str
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
class ListenerRule < ScandentRule
|
47
|
+
end
|
48
|
+
|
49
|
+
class LocatorRule < ScandentRule
|
50
|
+
def locate(element)
|
51
|
+
all_located = []
|
52
|
+
@paths.each do |path|
|
53
|
+
path.locate(element) do |located_element|
|
54
|
+
unless all_located.include?(located_element)
|
55
|
+
yield located_element if block_given?
|
56
|
+
all_located << located_element
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
all_located
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
class ListenerPath
|
65
|
+
attr_accessor :steps
|
66
|
+
|
67
|
+
def initialize(steps=[])
|
68
|
+
# Make sure that the listener stems from the root element
|
69
|
+
if !steps[0].nil? and steps[0].element_ref != :ELEMENT_ROOT
|
70
|
+
if steps[0].element_ref.nil?
|
71
|
+
implied_step = [
|
72
|
+
[:T_TILDE, '~', :STATE_ROOT_PATH],
|
73
|
+
[:T_SLASH2, '//', :STATE_ROOT_PATH]
|
74
|
+
]
|
75
|
+
steps.unshift(Parser.parse_step_tokens(implied_step, :PATH_LISTENER))
|
76
|
+
else steps[0].element_ref == :ELEMENT_SELF
|
77
|
+
steps[0].element_ref = :ELEMENT_ROOT
|
78
|
+
end
|
79
|
+
end
|
80
|
+
# Make sure there are no misplaced :ELEMENT_SELF references
|
81
|
+
steps.each {|step| raise InvalidExpressionException.new if step.element_ref == :ELEMENT_SELF and !step.eql?(steps.first)}
|
82
|
+
|
83
|
+
@steps = steps
|
84
|
+
end
|
85
|
+
|
86
|
+
def valid_on?(candidate)
|
87
|
+
steps_valid_on?(@steps, candidate)
|
88
|
+
end
|
89
|
+
|
90
|
+
def steps_valid_on?(steps, candidate)
|
91
|
+
return true if steps.empty?
|
92
|
+
steps.last.match(candidate) do |element|
|
93
|
+
return true if steps_valid_on?(steps[0..-2], element)
|
94
|
+
end
|
95
|
+
return false
|
96
|
+
end
|
97
|
+
|
98
|
+
def to_s
|
99
|
+
path_string = ''
|
100
|
+
@steps.each{|step| path_string << step.to_s}
|
101
|
+
path_string
|
102
|
+
end
|
103
|
+
|
104
|
+
end
|
105
|
+
|
106
|
+
class LocatorPath
|
107
|
+
attr_accessor :steps
|
108
|
+
|
109
|
+
def initialize(steps=[])
|
110
|
+
# Make sure that the locators stems from the root or current context element
|
111
|
+
if steps.first.element_ref.nil?
|
112
|
+
steps.first.action = :ACTION_CHILD if steps.first.action == :ACTION_SELF
|
113
|
+
implied_step = [
|
114
|
+
[:T_DOT, '.', :STATE_ROOT_PATH],
|
115
|
+
]
|
116
|
+
steps.unshift(Parser.parse_step_tokens(implied_step, :PATH_LOCATOR))
|
117
|
+
end
|
118
|
+
# Make sure there are no misplaced :ELEMENT_SELF references
|
119
|
+
steps.each {|step| raise InvalidExpressionException.new if step.element_ref == :ELEMENT_SELF and !step.eql?(steps.first)}
|
120
|
+
|
121
|
+
@steps = steps
|
122
|
+
end
|
123
|
+
|
124
|
+
def locate(element)
|
125
|
+
matches = [element]
|
126
|
+
next_round = []
|
127
|
+
@steps.each do |step|
|
128
|
+
next_round = []
|
129
|
+
matches.each do |matched_element|
|
130
|
+
next_round += step.match(matched_element)
|
131
|
+
end
|
132
|
+
return next_round if next_round.empty?
|
133
|
+
matches = next_round
|
134
|
+
end
|
135
|
+
matches.each {|match| yield match if block_given?}
|
136
|
+
return matches
|
137
|
+
end
|
138
|
+
|
139
|
+
def valid_on?(candidate)
|
140
|
+
steps_valid_on?(@steps, candidate)
|
141
|
+
end
|
142
|
+
|
143
|
+
def steps_valid_on?(steps, candidate)
|
144
|
+
return true if steps.empty?
|
145
|
+
steps.first.match(candidate) do |element|
|
146
|
+
return true if steps_valid_on?(steps[1..-1], element)
|
147
|
+
end
|
148
|
+
return false
|
149
|
+
end
|
150
|
+
|
151
|
+
def to_s
|
152
|
+
path_string = ''
|
153
|
+
@steps.each{|step| path_string << step.to_s}
|
154
|
+
path_string
|
155
|
+
end
|
156
|
+
end
|
157
|
+
|
158
|
+
class PathStep
|
159
|
+
@@inverse_actions = {
|
160
|
+
:ACTION_CHILD => :ACTION_PARENT,
|
161
|
+
:ACTION_DESCENDANT => :ACTION_ANCESTOR,
|
162
|
+
:ACTION_PARENT => :ACTION_CHILD,
|
163
|
+
:ACTION_ANCESTOR => :ACTION_DESCENDANT,
|
164
|
+
:ACTION_FOLLOWING_SIBLING => :ACTION_PRECEDING_SIBLING,
|
165
|
+
:ACTION_FOLLOWING => :ACTION_PRECEDING,
|
166
|
+
:ACTION_PRECEDING_SIBLING => :ACTION_FOLLOWING_SIBLING,
|
167
|
+
:ACTION_PRECEDING => :ACTION_FOLLOWING,
|
168
|
+
:ACTION_SELF => :ACTION_SELF
|
169
|
+
}
|
170
|
+
@@action_group = {
|
171
|
+
:ACTION_CHILD => :content,
|
172
|
+
:ACTION_DESCENDANT => :descendants,
|
173
|
+
:ACTION_PARENT => :parent,
|
174
|
+
:ACTION_ANCESTOR => :ancestors,
|
175
|
+
:ACTION_FOLLOWING_SIBLING => :sibling_next,
|
176
|
+
:ACTION_FOLLOWING => :following_siblings,
|
177
|
+
:ACTION_PRECEDING_SIBLING => :sibling_prev,
|
178
|
+
:ACTION_PRECEDING => :preceding_siblings,
|
179
|
+
:ACTION_SELF => :itself
|
180
|
+
}
|
181
|
+
@@action_str = {
|
182
|
+
:ACTION_CHILD => '/'.freeze,
|
183
|
+
:ACTION_DESCENDANT => '//'.freeze,
|
184
|
+
:ACTION_PARENT => '/..'.freeze,
|
185
|
+
:ACTION_ANCESTOR => '/...'.freeze,
|
186
|
+
:ACTION_FOLLOWING_SIBLING => '/>'.freeze,
|
187
|
+
:ACTION_FOLLOWING => '/>>'.freeze,
|
188
|
+
:ACTION_PRECEDING_SIBLING => '/<'.freeze,
|
189
|
+
:ACTION_PRECEDING => '/<<'.freeze,
|
190
|
+
:ACTION_SELF => ''.freeze
|
191
|
+
}
|
192
|
+
|
193
|
+
attr_accessor :action, :element_ref, :tag, :namespace, :id, :attrs, :pseudo_exps, :valid_rules
|
194
|
+
|
195
|
+
def initialize(action, element_ref, tag, namespace, id, attrs, pseudo_exps, valid_rules)
|
196
|
+
@action = action
|
197
|
+
@element_ref = element_ref
|
198
|
+
@tag = tag.nil? ? nil : tag.to_sym
|
199
|
+
@namespace = namespace.nil? ? nil : namespace.to_sym
|
200
|
+
@id = id
|
201
|
+
@attrs = attrs
|
202
|
+
@pseudo_exps = pseudo_exps
|
203
|
+
@valid_rules = valid_rules
|
204
|
+
end
|
205
|
+
|
206
|
+
# Check if all fields match that of the given element, with no attention paid to the action
|
207
|
+
# FIXME: Search time improves by 25% when a tag is given. Reject non-tagged Elements more quickly
|
208
|
+
# to get a similar performance boost across the board
|
209
|
+
def describes?(element)
|
210
|
+
if element.kind_of?(Arboretum::DocTree::Elements::TaggedElement)
|
211
|
+
return false if !@tag.nil? and element.tag != @tag
|
212
|
+
return false if !@namespace.nil? and element.namespace != @namespace
|
213
|
+
return false if !@id.nil? and !element.equals_attr_val?(:id, [@id])
|
214
|
+
@attrs[:has].each {|attr_name| return false if !element.has_attr?(attr_name)}
|
215
|
+
@attrs[:contains].each {|name,values| values.each {|value| return false if !element.contains_attr_val?(name,value)}}
|
216
|
+
@attrs[:equals].each {|name,values| values.each {|value| return false if !element.equals_attr_val?(name,value)}}
|
217
|
+
@attrs[:matches].each {|name,values| values.each {|value| return false if !element.matches_attr_val?(name,value)}}
|
218
|
+
return false if element_ref == :ELEMENT_ROOT and !element.parent.nil?
|
219
|
+
else
|
220
|
+
return false if !@tag.nil? or
|
221
|
+
!@namespace.nil? or
|
222
|
+
!@id.nil? or
|
223
|
+
!@attrs[:has].empty? or
|
224
|
+
!@attrs[:contains].empty? or
|
225
|
+
!@attrs[:equals].empty? or
|
226
|
+
!@attrs[:matches].empty? or
|
227
|
+
(element_ref == :ELEMENT_ROOT and !element.parent.nil?)
|
228
|
+
end
|
229
|
+
@pseudo_exps.each do |psuedo_name, pseudo_arg|
|
230
|
+
return false if !PseudoElements.match(element, pseudo_name, pseudo_arg)
|
231
|
+
end
|
232
|
+
@valid_rules.each do |rule|
|
233
|
+
return false if !rule.valid_on?(element)
|
234
|
+
end
|
235
|
+
return true
|
236
|
+
end
|
237
|
+
|
238
|
+
def to_s_sans_action
|
239
|
+
step_str = ''
|
240
|
+
if !@element_ref.nil?
|
241
|
+
if @element_ref == :ELEMENT_ROOT
|
242
|
+
step_str << '~'
|
243
|
+
elsif @element_ref == :ELEMENT_SELF
|
244
|
+
step_str << '.'
|
245
|
+
end
|
246
|
+
end
|
247
|
+
step_str << "%#{@tag.to_s}" if !tag.nil?
|
248
|
+
step_str << "@#{@namespace.to_s}" if !namespace.nil?
|
249
|
+
step_str << "##{@id}" if !@id.nil?
|
250
|
+
@attrs[:has].each {|attr_name| step_str << "[#{attr_name}]"}
|
251
|
+
@attrs[:contains].each {|name,values| values.each {|value| step_str << "[#{name}=\"#{value}\"]"}}
|
252
|
+
@attrs[:equals].each {|name,values| values.each {|value| step_str << "[#{name}==\"#{value.join(' ')}\"]"}}
|
253
|
+
@attrs[:matches].each {|name,values| values.each {|value| step_str << "[#{name}~=|/#{value}/|]"}}
|
254
|
+
@valid_rules.each {|rule| step_str << "{#{rule.to_s}}"}
|
255
|
+
@pseudo_exps.each {|pseudo, arg| step_str << ":#{pseudo}(#{arg.to_s})"}
|
256
|
+
step_str
|
257
|
+
end
|
258
|
+
end
|
259
|
+
|
260
|
+
class ListenerStep < PathStep
|
261
|
+
# Take step action and check for elements matching description
|
262
|
+
def match(element)
|
263
|
+
result = []
|
264
|
+
inverse_action = @@inverse_actions[@action]
|
265
|
+
search_group = element.public_send(@@action_group[inverse_action])
|
266
|
+
search_group.listing.each do |searched_element|
|
267
|
+
yield searched_element if self.describes?(searched_element) and block_given?
|
268
|
+
result << searched_element if self.describes?(searched_element)
|
269
|
+
end
|
270
|
+
result
|
271
|
+
end
|
272
|
+
|
273
|
+
def to_s
|
274
|
+
step_str = self.to_s_sans_action
|
275
|
+
step_str << @@action_str[@action]
|
276
|
+
end
|
277
|
+
end
|
278
|
+
|
279
|
+
class LocatorStep < PathStep
|
280
|
+
# Take step action and check for elements matching description
|
281
|
+
def match(element)
|
282
|
+
result = []
|
283
|
+
search_group = element.public_send(@@action_group[@action])
|
284
|
+
search_group.listing.each do |searched_element|
|
285
|
+
yield searched_element if self.describes?(searched_element) and block_given?
|
286
|
+
result << searched_element if self.describes?(searched_element)
|
287
|
+
end
|
288
|
+
result
|
289
|
+
end
|
290
|
+
|
291
|
+
def to_s
|
292
|
+
step_str = '' # Don't copy reference to @@action_str[@action]
|
293
|
+
step_str << @@action_str[@action]
|
294
|
+
step_str << self.to_s_sans_action
|
295
|
+
end
|
296
|
+
end
|
297
|
+
|
298
|
+
class Formula
|
299
|
+
def initialize(form_tokens)
|
300
|
+
@coefficient = 0
|
301
|
+
@intercept = 0
|
302
|
+
form_token_types = form_tokens.map {|token| token[0]}
|
303
|
+
if form_token_types[0] == :T_KEY_EVEN
|
304
|
+
@coefficient = 2
|
305
|
+
@intercept = 0
|
306
|
+
elsif form_token_types[0] == :T_KEY_ODD
|
307
|
+
@coefficient = 2
|
308
|
+
@intercept = 1
|
309
|
+
else
|
310
|
+
term_negative = false
|
311
|
+
term_coef = false
|
312
|
+
value = 0
|
313
|
+
|
314
|
+
index = 0
|
315
|
+
while index < form_tokens.length
|
316
|
+
case form_token_types[index]
|
317
|
+
when :T_FORM_PLUS
|
318
|
+
# Resolve Term
|
319
|
+
if term_coef
|
320
|
+
@coefficient += value
|
321
|
+
else
|
322
|
+
@intercept += value
|
323
|
+
end
|
324
|
+
# Reset with new sign
|
325
|
+
term_negative = false
|
326
|
+
term_coef = false
|
327
|
+
value = 0
|
328
|
+
when :T_FORM_MINUS
|
329
|
+
# Resolve Term
|
330
|
+
if term_coef
|
331
|
+
@coefficient += value
|
332
|
+
else
|
333
|
+
@intercept += value
|
334
|
+
end
|
335
|
+
# Reset with new sign
|
336
|
+
term_negative = true
|
337
|
+
term_coef = false
|
338
|
+
value = 0
|
339
|
+
when :LITERAL_INT
|
340
|
+
value = form_tokens[index][1].to_i
|
341
|
+
value *= -1 if term_negative
|
342
|
+
when :T_FORM_N
|
343
|
+
term_coef = true
|
344
|
+
value = 1 if value.zero?
|
345
|
+
else
|
346
|
+
raise InvalidExpressionException.new
|
347
|
+
end
|
348
|
+
index += 1
|
349
|
+
end
|
350
|
+
# Resolve one last time
|
351
|
+
if term_coef
|
352
|
+
@coefficient += value
|
353
|
+
else
|
354
|
+
@intercept += value
|
355
|
+
end
|
356
|
+
end
|
357
|
+
end
|
358
|
+
|
359
|
+
def to_s
|
360
|
+
"#{@coefficient}n#{'+' if @intercept >= 0}#{@intercept}"
|
361
|
+
end
|
362
|
+
end
|
363
|
+
|
364
|
+
class PseudoElements
|
365
|
+
def self.match(element, pseudo_name, pseudo_arg)
|
366
|
+
PseudoElements.public_send(:pseudo_name, element, pseudo_arg)
|
367
|
+
end
|
368
|
+
end
|
369
|
+
|
370
|
+
# Parser interprets tokens formed by a Scandent string and formed ScandentRules
|
371
|
+
# that represent the interpreted form of the string
|
372
|
+
# ScandentRules can then be matched to Elements in a DocTree
|
373
|
+
class Parser
|
374
|
+
@@actions = {
|
375
|
+
:T_SLASH => :ACTION_CHILD,
|
376
|
+
:T_SLASH2 => :ACTION_DESCENDANT,
|
377
|
+
:T_SLASHDOT2 => :ACTION_PARENT,
|
378
|
+
:T_SLASHDOT3 => :ACTION_ANCESTOR,
|
379
|
+
:T_SLASHGT => :ACTION_FOLLOWING_SIBLING,
|
380
|
+
:T_SLASHGT2 => :ACTION_FOLLOWING,
|
381
|
+
:T_SLASHLT => :ACTION_PRECEDING_SIBLING,
|
382
|
+
:T_SLASHLT2 => :ACTION_PRECEDING
|
383
|
+
}
|
384
|
+
|
385
|
+
# Parse a Scandent string by giving it to the Tokenizer and then parsing the results
|
386
|
+
def self.parse_rule_string(rule_string, type)
|
387
|
+
Parser.parse_rule_tokens(Tokenizer.tokenize(rule_string), type)
|
388
|
+
end
|
389
|
+
|
390
|
+
# Directly parse Scandent string tokens
|
391
|
+
def self.parse_rule_tokens(rule_tokens, type)
|
392
|
+
# Separate rules into its comma-delimited paths and remove the delimiter
|
393
|
+
rule_paths_tokens = rule_tokens.slice_after {|token| token[0] == :T_COMMA and token[2] == :STATE_ROOT_PATH}.to_a
|
394
|
+
rule_paths_tokens.each {|path_tokens| path_tokens.pop if path_tokens.last[0] == :T_COMMA}
|
395
|
+
|
396
|
+
# Parse each path individually
|
397
|
+
rule_paths = rule_paths_tokens.map{|path| Parser.parse_path_tokens(path, type)}
|
398
|
+
|
399
|
+
if type == :PATH_LISTENER
|
400
|
+
ListenerRule.new(rule_paths)
|
401
|
+
elsif type == :PATH_LOCATOR
|
402
|
+
LocatorRule.new(rule_paths)
|
403
|
+
else
|
404
|
+
raise ParseException.new("Unknown step type")
|
405
|
+
end
|
406
|
+
end
|
407
|
+
|
408
|
+
# Parse an individual path of a rule
|
409
|
+
def self.parse_path_tokens(path_tokens, type)
|
410
|
+
# Double check that the end state of the path is valid
|
411
|
+
raise InvalidExpressionException.new("End state of path is '#{path_tokens.last[2]}' instead of :STATE_ROOT_PATH") if path_tokens.last[2] != :STATE_ROOT_PATH
|
412
|
+
|
413
|
+
step_delimiters = [:T_SLASH, :T_SLASH2, :T_SLASHDOT2, :T_SLASHDOT3, :T_SLASHGT, :T_SLASHGT2, :T_SLASHLT, :T_SLASHLT2]
|
414
|
+
|
415
|
+
# Seperate paths into its steps
|
416
|
+
if type == :PATH_LISTENER
|
417
|
+
path_steps_tokens = path_tokens.slice_after {|token| step_delimiters.include? token[0] and token[2] == :STATE_ROOT_PATH}.to_a
|
418
|
+
elsif type == :PATH_LOCATOR
|
419
|
+
path_steps_tokens = path_tokens.slice_before {|token| step_delimiters.include? token[0] and token[2] == :STATE_ROOT_PATH}.to_a
|
420
|
+
else
|
421
|
+
raise ParseException.new("Unknown step type")
|
422
|
+
end
|
423
|
+
|
424
|
+
# Parse each step individually
|
425
|
+
path_steps = path_steps_tokens.map{|step| Parser.parse_step_tokens(step, type)}
|
426
|
+
|
427
|
+
if type == :PATH_LISTENER
|
428
|
+
ListenerPath.new(path_steps)
|
429
|
+
else
|
430
|
+
LocatorPath.new(path_steps)
|
431
|
+
end
|
432
|
+
end
|
433
|
+
|
434
|
+
# Parse an individual step of a path
|
435
|
+
def self.parse_step_tokens(step_tokens, type)
|
436
|
+
# Remove whitespace tokens
|
437
|
+
step_tokens.delete_if {|token| token[0] == :T_WHITESPACE}
|
438
|
+
|
439
|
+
if type == :PATH_LISTENER
|
440
|
+
action = @@actions.has_key?(step_tokens.last[0]) ? @@actions[step_tokens.last[0]] : :ACTION_SELF
|
441
|
+
elsif type == :PATH_LOCATOR
|
442
|
+
action = @@actions.has_key?(step_tokens.first[0]) ? @@actions[step_tokens.first[0]] : :ACTION_SELF
|
443
|
+
else
|
444
|
+
raise ParseException.new("Unknown step type")
|
445
|
+
end
|
446
|
+
|
447
|
+
element_ref = []
|
448
|
+
tag = []
|
449
|
+
namespace = []
|
450
|
+
id = []
|
451
|
+
attrs = {
|
452
|
+
:has => [],
|
453
|
+
:contains => Hash.new{|k, v| k[v] = Array.new},
|
454
|
+
:equals => Hash.new{|k, v| k[v] = Array.new},
|
455
|
+
:matches => Hash.new{|k, v| k[v] = Array.new}
|
456
|
+
}
|
457
|
+
pseudo_exps = []
|
458
|
+
valid_rules = []
|
459
|
+
|
460
|
+
index = 0
|
461
|
+
state = :STATE_ROOT_PATH
|
462
|
+
while index < step_tokens.length
|
463
|
+
# Consume current token and increment
|
464
|
+
index_token = step_tokens[index]
|
465
|
+
index += 1
|
466
|
+
|
467
|
+
case index_token[0]
|
468
|
+
when :T_PCT
|
469
|
+
following_token = step_tokens[index]
|
470
|
+
index += 1
|
471
|
+
raise InvalidExpressionException.new if following_token[0] != :LITERAL_IDENT
|
472
|
+
tag << following_token[1]
|
473
|
+
when :T_AT
|
474
|
+
following_token = step_tokens[index]
|
475
|
+
index += 1
|
476
|
+
raise InvalidExpressionException.new if following_token[0] != :LITERAL_IDENT
|
477
|
+
namespace << following_token[1]
|
478
|
+
when :T_PND
|
479
|
+
following_token = step_tokens[index]
|
480
|
+
index += 1
|
481
|
+
raise InvalidExpressionException.new if following_token[0] != :LITERAL_IDENT
|
482
|
+
id << following_token[1]
|
483
|
+
when :T_COLON
|
484
|
+
following_token = step_tokens[index]
|
485
|
+
index += 1
|
486
|
+
raise InvalidExpressionException.new if following_token[0] != :LITERAL_IDENT
|
487
|
+
pseudo_name = following_token[1].to_sym
|
488
|
+
arg_tokens = []
|
489
|
+
|
490
|
+
following_token = step_tokens[index]
|
491
|
+
if following_token[0] == :T_LPAREN
|
492
|
+
index += 1 # To consume the LPAREN
|
493
|
+
following_token = step_tokens[index]
|
494
|
+
index += 1
|
495
|
+
until following_token[0] == :T_RPAREN or index > step_tokens.length
|
496
|
+
arg_tokens << following_token
|
497
|
+
following_token = step_tokens[index]
|
498
|
+
index += 1
|
499
|
+
end
|
500
|
+
raise InvalidExpressionException.new if index > step_tokens.length # Undesirable exit condition to above loop
|
501
|
+
index += 1 # To consume the RPAREN
|
502
|
+
end
|
503
|
+
pseudo_exps << [pseudo_name, Parser.parse_arg(arg_tokens)]
|
504
|
+
when :T_ASTERISK # Adds no restrictions, so do nothing
|
505
|
+
when :T_TILDE
|
506
|
+
element_ref << :ELEMENT_ROOT
|
507
|
+
when :T_DOT
|
508
|
+
element_ref << :ELEMENT_SELF
|
509
|
+
when :T_LBRAK
|
510
|
+
following_token = step_tokens[index]
|
511
|
+
index += 1
|
512
|
+
raise InvalidExpressionException.new if following_token[0] != :LITERAL_IDENT
|
513
|
+
attr_name = following_token[1].to_sym
|
514
|
+
attr_value = nil
|
515
|
+
operation = nil
|
516
|
+
|
517
|
+
following_token = step_tokens[index]
|
518
|
+
index += 1
|
519
|
+
case following_token[0]
|
520
|
+
when :T_EQL
|
521
|
+
operation = :contains
|
522
|
+
|
523
|
+
following_token = step_tokens[index]
|
524
|
+
index += 1
|
525
|
+
raise InvalidExpressionException.new("Expected a string after '='") if ![:T_DQUOTE, :T_SQUOTE].include?(following_token[0])
|
526
|
+
string_limiter = following_token[0]
|
527
|
+
|
528
|
+
following_token = step_tokens[index]
|
529
|
+
index += 1
|
530
|
+
raise InvalidExpressionException.new if not [:LITERAL_IDENT, :LITERAL_STRING, :LITERAL_INT, :LITERAL_FLOAT].include?(following_token[0])
|
531
|
+
attr_value = following_token[1]
|
532
|
+
|
533
|
+
following_token = step_tokens[index]
|
534
|
+
index += 1
|
535
|
+
raise InvalidExpressionException.new if following_token[0] != string_limiter
|
536
|
+
|
537
|
+
following_token = step_tokens[index]
|
538
|
+
index += 1
|
539
|
+
raise InvalidExpressionException.new if following_token[0] != :T_RBRAK
|
540
|
+
when :T_EQL2
|
541
|
+
operation = :equals
|
542
|
+
following_token = step_tokens[index]
|
543
|
+
index += 1
|
544
|
+
raise InvalidExpressionException.new if ![:T_DQUOTE, :T_SQUOTE].include?(following_token[0])
|
545
|
+
string_limiter = following_token[0]
|
546
|
+
|
547
|
+
following_token = step_tokens[index]
|
548
|
+
index += 1
|
549
|
+
raise InvalidExpressionException.new if not [:LITERAL_IDENT, :LITERAL_STRING, :LITERAL_INT, :LITERAL_FLOAT].include?(following_token[0])
|
550
|
+
attr_value = following_token[1].split
|
551
|
+
|
552
|
+
following_token = step_tokens[index]
|
553
|
+
index += 1
|
554
|
+
raise InvalidExpressionException.new if following_token[0] != string_limiter
|
555
|
+
|
556
|
+
following_token = step_tokens[index]
|
557
|
+
index += 1
|
558
|
+
raise InvalidExpressionException.new if following_token[0] != :T_RBRAK
|
559
|
+
when :T_TILDE_EQL
|
560
|
+
operation = :matches
|
561
|
+
|
562
|
+
following_token = step_tokens[index]
|
563
|
+
index += 1
|
564
|
+
raise InvalidExpressionException.new if following_token[0] != :T_VBARSLASH
|
565
|
+
|
566
|
+
following_token = step_tokens[index]
|
567
|
+
index += 1
|
568
|
+
raise InvalidExpressionException.new if not [:LITERAL_IDENT, :LITERAL_STRING, :LITERAL_INT, :LITERAL_FLOAT].include?(following_token[0])
|
569
|
+
attr_value = Regexp.new(following_token[1])
|
570
|
+
|
571
|
+
following_token = step_tokens[index]
|
572
|
+
index += 1
|
573
|
+
raise InvalidExpressionException.new if following_token[0] != :T_SLASHVBAR
|
574
|
+
|
575
|
+
following_token = step_tokens[index]
|
576
|
+
index += 1
|
577
|
+
raise InvalidExpressionException.new if following_token[0] != :T_RBRAK
|
578
|
+
when :T_RBRAK
|
579
|
+
operation = nil
|
580
|
+
else
|
581
|
+
raise InvalidExpressionException.new
|
582
|
+
end
|
583
|
+
if operation.nil?
|
584
|
+
attrs[:has] << attr_name
|
585
|
+
else
|
586
|
+
attrs[operation][attr_name] << attr_value
|
587
|
+
end
|
588
|
+
when :T_LBRACE
|
589
|
+
equilibrium = 1
|
590
|
+
reformed_path_string = ''
|
591
|
+
following_token = step_tokens[index]
|
592
|
+
index += 1
|
593
|
+
until (following_token[0] == :T_RBRACE and equilibrium.zero?) or index > step_tokens.length
|
594
|
+
reformed_path_string << following_token[1]
|
595
|
+
following_token = step_tokens[index]
|
596
|
+
index += 1
|
597
|
+
equilibrium += 1 if following_token[0] == :T_LBRACE
|
598
|
+
equilibrium -= 1 if following_token[0] == :T_RBRACE
|
599
|
+
end
|
600
|
+
raise InvalidExpressionException.new("Could not find matching R_BRACE in #{reformed_path_string}") if index > step_tokens.length # Undesirable exit condition to above loop
|
601
|
+
valid_rules << Parser.parse_rule_string(reformed_path_string, :PATH_LOCATOR)
|
602
|
+
when :T_SLASH, :T_SLASH2, :T_SLASHDOT2, :T_SLASHDOT3, :T_SLASHGT, :T_SLASHGT2, :T_SLASHLT, :T_SLASHLT2
|
603
|
+
else
|
604
|
+
raise ParseException.new("Consumed unexpected token: #{index_token}")
|
605
|
+
end
|
606
|
+
end # All tokens consumed
|
607
|
+
|
608
|
+
# Validate results
|
609
|
+
raise InvalidExpressionException.new if tag.length > 1
|
610
|
+
raise InvalidExpressionException.new if namespace.length > 1
|
611
|
+
raise InvalidExpressionException.new if element_ref.length > 1
|
612
|
+
|
613
|
+
if type == :PATH_LISTENER
|
614
|
+
ListenerStep.new(action, element_ref[0], tag[0], namespace[0], id[0], attrs, pseudo_exps, valid_rules)
|
615
|
+
else
|
616
|
+
LocatorStep.new(action, element_ref[0], tag[0], namespace[0], id[0], attrs, pseudo_exps, valid_rules)
|
617
|
+
end
|
618
|
+
end
|
619
|
+
|
620
|
+
# Parse an argument given to a pseudo-class
|
621
|
+
def self.parse_arg(arg_tokens)
|
622
|
+
arg_token_types = arg_tokens.map {|token| token[0]}
|
623
|
+
if ((arg_token_types[0] == :T_SQUOTE and arg_token_types[2] == :T_SQUOTE) or (arg_token_types[0] == :T_DQUOTE and arg_token_types[2] == :T_DQUOTE)) and arg_tokens.length == 3
|
624
|
+
arg_tokens[1][1]
|
625
|
+
elsif (arg_token_types[0] == :T_VBARSLASH and arg_token_types[2] == :T_SLASHVBAR) and arg_tokens.length == 3
|
626
|
+
Regexp.new(arg_tokens[1][1])
|
627
|
+
elsif (!(arg_token_types & [:T_KEY_EVEN, :T_KEY_ODD]).empty? and arg_tokens.length == 1) or arg_token_types.include? :T_FORM_N
|
628
|
+
Formula.new(arg_tokens)
|
629
|
+
elsif arg_token_types[0] == :LITERAL_INT and arg_tokens.length == 1
|
630
|
+
arg_tokens[0][1].to_i
|
631
|
+
elsif arg_token_types[0] == :LITERAL_FLOAT and arg_tokens.length == 1
|
632
|
+
arg_tokens[0][1].to_f
|
633
|
+
elsif arg_tokens.length.zero?
|
634
|
+
nil
|
635
|
+
else
|
636
|
+
arg_str = ''
|
637
|
+
arg_tokens.each {|token| arg_str << token[1]}
|
638
|
+
raise InvalidExpressionException.new("Invalid argument '#{arg_str}'")
|
639
|
+
nil
|
640
|
+
end
|
641
|
+
end
|
642
|
+
|
643
|
+
end # Parser
|
644
|
+
|
645
|
+
# A class with class methods used to tokenize a Scandent string
|
646
|
+
# Has information regarding which character patterns match which tokens
|
647
|
+
# Has information regarding which tokens trigger which state in the tokenizer
|
648
|
+
class Tokenizer
|
649
|
+
# A Hash with keys cooresponding to states, and value Hashes that match patterns in that state to tokens
|
650
|
+
@@tokens = {
|
651
|
+
:STATE_ROOT_PATH => {
|
652
|
+
',' => :T_COMMA,
|
653
|
+
'~' => :T_TILDE,
|
654
|
+
'//' => :T_SLASH2,
|
655
|
+
'/' => :T_SLASH,
|
656
|
+
'.' => :T_DOT,
|
657
|
+
'/..' => :T_SLASHDOT2,
|
658
|
+
'/...' => :T_SLASHDOT3,
|
659
|
+
'/>' => :T_SLASHGT,
|
660
|
+
'/>>' => :T_SLASHGT2,
|
661
|
+
'/<' => :T_SLASHLT,
|
662
|
+
'/<<' => :T_SLASHLT2,
|
663
|
+
'*' => :T_ASTERISK,
|
664
|
+
'{' => :T_LBRACE,
|
665
|
+
'[' => :T_LBRAK,
|
666
|
+
'(' => :T_LPAREN,
|
667
|
+
':' => :T_COLON,
|
668
|
+
'#' => :T_PND,
|
669
|
+
'%' => :T_PCT,
|
670
|
+
'@' => :T_AT,
|
671
|
+
# Whitespace
|
672
|
+
"\n" => :T_WHITESPACE,
|
673
|
+
"\t" => :T_WHITESPACE,
|
674
|
+
' ' => :T_WHITESPACE
|
675
|
+
},
|
676
|
+
:STATE_SSTRING => {
|
677
|
+
'\'' => :T_SQUOTE # Closing Token
|
678
|
+
},
|
679
|
+
:STATE_DSTRING => {
|
680
|
+
'"' => :T_DQUOTE, # Closing Token
|
681
|
+
},
|
682
|
+
:STATE_REGEX => {
|
683
|
+
'/|' => :T_SLASHVBAR # Closing Token
|
684
|
+
},
|
685
|
+
:STATE_ATTR_EXP => {
|
686
|
+
']' => :T_RBRAK, # Closing Token
|
687
|
+
'=' => :T_EQL,
|
688
|
+
'==' => :T_EQL2,
|
689
|
+
'~=' => :T_TILDE_EQL,
|
690
|
+
'"' => :T_DQUOTE,
|
691
|
+
'\'' => :T_SQUOTE,
|
692
|
+
'|/' => :T_VBARSLASH,
|
693
|
+
# Whitespace
|
694
|
+
"\n" => :T_WHITESPACE,
|
695
|
+
"\t" => :T_WHITESPACE,
|
696
|
+
' ' => :T_WHITESPACE
|
697
|
+
},
|
698
|
+
:STATE_PATH_EXP => {
|
699
|
+
'}' => :T_RBRACE, # Closing Token
|
700
|
+
# From ROOT_PATH
|
701
|
+
',' => :T_COMMA,
|
702
|
+
'~' => :T_TILDE,
|
703
|
+
'//' => :T_SLASH2,
|
704
|
+
'/' => :T_SLASH,
|
705
|
+
'.' => :T_DOT,
|
706
|
+
'/..' => :T_SLASHDOT2,
|
707
|
+
'/...' => :T_SLASHDOT3,
|
708
|
+
'/>' => :T_SLASHGT,
|
709
|
+
'/>>' => :T_SLASHGT2,
|
710
|
+
'/<' => :T_SLASHLT,
|
711
|
+
'/<<' => :T_SLASHLT2,
|
712
|
+
'*' => :T_ASTERISK,
|
713
|
+
'{' => :T_LBRACE,
|
714
|
+
'[' => :T_LBRAK,
|
715
|
+
'(' => :T_LPAREN,
|
716
|
+
':' => :T_COLON,
|
717
|
+
'#' => :T_PND,
|
718
|
+
'%' => :T_PCT,
|
719
|
+
'@' => :T_AT,
|
720
|
+
# Whitespace
|
721
|
+
"\n" => :T_WHITESPACE,
|
722
|
+
"\t" => :T_WHITESPACE,
|
723
|
+
' ' => :T_WHITESPACE
|
724
|
+
},
|
725
|
+
:STATE_ARGS => {
|
726
|
+
')' => :T_RPAREN, # Closing Token
|
727
|
+
'|/' => :T_VBARSLASH,
|
728
|
+
'n' => :T_FORM_N,
|
729
|
+
'+' => :T_FORM_PLUS,
|
730
|
+
'-' => :T_FORM_MINUS,
|
731
|
+
'even' => :T_KEY_EVEN,
|
732
|
+
'odd' => :T_KEY_ODD,
|
733
|
+
'"' => :T_DQUOTE,
|
734
|
+
'\'' => :T_SQUOTE,
|
735
|
+
# Whitespace
|
736
|
+
"\n" => :T_WHITESPACE,
|
737
|
+
"\t" => :T_WHITESPACE,
|
738
|
+
' ' => :T_WHITESPACE
|
739
|
+
},
|
740
|
+
}
|
741
|
+
# A Hash with keys cooresponding to states, and value Hashes that describe what tokens trigger the closing
|
742
|
+
# of the *current* state or the opening of a *different* state
|
743
|
+
@@triggers = {
|
744
|
+
:STATE_ROOT_PATH => {
|
745
|
+
:open => {
|
746
|
+
:T_LBRACE => :STATE_PATH_EXP,
|
747
|
+
:T_LBRAK => :STATE_ATTR_EXP,
|
748
|
+
:T_LPAREN => :STATE_ARGS
|
749
|
+
},
|
750
|
+
:close => []
|
751
|
+
},
|
752
|
+
:STATE_SSTRING => {
|
753
|
+
:open => {},
|
754
|
+
:close => [:T_SQUOTE]
|
755
|
+
},
|
756
|
+
:STATE_DSTRING => {
|
757
|
+
:open => {},
|
758
|
+
:close => [:T_DQUOTE]
|
759
|
+
},
|
760
|
+
:STATE_REGEX => {
|
761
|
+
:open => {},
|
762
|
+
:close => [:T_SLASHVBAR]
|
763
|
+
},
|
764
|
+
:STATE_ATTR_EXP => {
|
765
|
+
:open => {
|
766
|
+
:T_SQUOTE => :STATE_SSTRING,
|
767
|
+
:T_DQUOTE => :STATE_DSTRING,
|
768
|
+
:T_VBARSLASH => :STATE_REGEX
|
769
|
+
},
|
770
|
+
:close => [:T_RBRAK]
|
771
|
+
},
|
772
|
+
:STATE_PATH_EXP => {
|
773
|
+
:open => {
|
774
|
+
:T_LBRACE => :STATE_PATH_EXP,
|
775
|
+
:T_LBRAK => :STATE_ATTR_EXP,
|
776
|
+
:T_LPAREN => :STATE_ARGS
|
777
|
+
},
|
778
|
+
:close => [:T_RBRACE]
|
779
|
+
},
|
780
|
+
:STATE_ARGS => {
|
781
|
+
:open => {
|
782
|
+
:T_SQUOTE => :STATE_SSTRING,
|
783
|
+
:T_DQUOTE => :STATE_DSTRING,
|
784
|
+
:T_VBARSLASH => :STATE_REGEX
|
785
|
+
},
|
786
|
+
:close => [:T_RPAREN]
|
787
|
+
},
|
788
|
+
}
|
789
|
+
|
790
|
+
# Returns the type of a string of characters that does not match a pattern
|
791
|
+
def self.literal_type(literal)
|
792
|
+
return :LITERAL_IDENT if literal =~ /^[[:alpha:]][[:alnum:]-_]*$/
|
793
|
+
return :LITERAL_INT if literal =~ /^\d+$/
|
794
|
+
return :LITERAL_FLOAT if literal =~ /^\d*\.?\d+$/
|
795
|
+
return :LITERAL_STRING if literal =~ /^.+$/
|
796
|
+
return :LITERAL_UNKNOWN
|
797
|
+
end
|
798
|
+
|
799
|
+
# If no matches found for a letter, move to next letter, even if the possible patterns would match more
|
800
|
+
def self.tokenize(input)
|
801
|
+
state = [:STATE_ROOT_PATH] # State stack for the tokenizer, state.last will return the current state
|
802
|
+
match_start = 0
|
803
|
+
match_end = 0
|
804
|
+
unmatched_buffer = ''
|
805
|
+
largest_full_match = nil
|
806
|
+
token_list = [] # The list of tokens in the input, each item is an Array in the form:
|
807
|
+
# [Token type, Pattern that matched, State of the tokenizer after the token]
|
808
|
+
|
809
|
+
# Until we have checked and matched at or on every single character
|
810
|
+
while match_start < input.length
|
811
|
+
# Start building a substring from a single character
|
812
|
+
match_end = match_start
|
813
|
+
# Start will all patterns for current state as candidates
|
814
|
+
candidates = @@tokens[state.last].keys
|
815
|
+
# Start will no full match detected
|
816
|
+
largest_full_match = nil
|
817
|
+
|
818
|
+
# Until nothing can match substring or end of input has been reached
|
819
|
+
until candidates.empty? or match_end >= input.length
|
820
|
+
# String that candidate patterns will have to match
|
821
|
+
matched_string = input[match_start..match_end]
|
822
|
+
# Check to see if each remaining candidate pattern matches
|
823
|
+
# - If a full match, set as largest full match
|
824
|
+
# - Delete if the pattern does not match the string
|
825
|
+
candidates.delete_if do |pattern|
|
826
|
+
largest_full_match = [pattern, match_start, match_end] if pattern.eql?(matched_string)
|
827
|
+
!pattern.start_with?(matched_string) # Element deleted if true is the last statement in block
|
828
|
+
end
|
829
|
+
# Increase size of match by one if further matching is to be done
|
830
|
+
match_end += 1 if not candidates.empty?
|
831
|
+
end # Substring is now one character too large to be matched to
|
832
|
+
|
833
|
+
# If no full match found, add the last checked character as unmatched
|
834
|
+
# Otherwise:
|
835
|
+
# - Parse the unmatched_buffer as a literal and store
|
836
|
+
# - Activate state triggers associated with the largest fully matched token
|
837
|
+
# - Store the largest fully matched token
|
838
|
+
# - Start again where the matched token completes
|
839
|
+
if largest_full_match.nil?
|
840
|
+
# Add last checked character to the unmatched buffer
|
841
|
+
unmatched_buffer << input[match_start]
|
842
|
+
# Start matching again on the next letter
|
843
|
+
match_start += 1
|
844
|
+
else
|
845
|
+
|
846
|
+
# Do not activate state triggers associate with the parsed literal
|
847
|
+
# Is there a use case?
|
848
|
+
|
849
|
+
# Parse the unmatched_buffer as a literal and store (if it exists), then clear the buffer
|
850
|
+
token_list << [Tokenizer.literal_type(unmatched_buffer), unmatched_buffer, state.last] if unmatched_buffer.length > 0
|
851
|
+
unmatched_buffer = ''
|
852
|
+
|
853
|
+
# Info from the largest fully matched token
|
854
|
+
matched_pattern, pattern_start, pattern_end = largest_full_match
|
855
|
+
matched_token_type = @@tokens[state.last][matched_pattern]
|
856
|
+
|
857
|
+
# Activate state triggers associated with the largest fully matched token
|
858
|
+
current_state_triggers = @@triggers[state.last]
|
859
|
+
if current_state_triggers[:open].has_key?(matched_token_type)
|
860
|
+
state.push(current_state_triggers[:open][matched_token_type])
|
861
|
+
elsif current_state_triggers[:close].include?(matched_token_type)
|
862
|
+
state.pop
|
863
|
+
end
|
864
|
+
|
865
|
+
# Store the largest fully matched token
|
866
|
+
token_list << [matched_token_type, matched_pattern, state.last]
|
867
|
+
|
868
|
+
# Start again where the matched token completes
|
869
|
+
match_start = pattern_end + 1
|
870
|
+
|
871
|
+
end # Next token has been added to list
|
872
|
+
end # Input has been fully tokenized
|
873
|
+
|
874
|
+
# Parse and store the unmatched_buffer one last time
|
875
|
+
token_list << [Tokenizer.literal_type(unmatched_buffer), unmatched_buffer, state.last] if unmatched_buffer.length > 0
|
876
|
+
|
877
|
+
token_list
|
878
|
+
end # tokenize_path
|
879
|
+
|
880
|
+
end # Tokenizer
|
881
|
+
end # Scandent
|
882
|
+
end # Arboretum
|