arboretum 0.0.3 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,882 @@
1
+ module Arboretum
2
+ module Scandent
3
+
4
+ class TokenizationException < StandardError
5
+ def initialize(msg="An error occurred while tokenizing input")
6
+ super(msg)
7
+ end
8
+ end
9
+
10
+ class ParseException < StandardError
11
+ def initialize(msg="An error occurred while parsing input")
12
+ super(msg)
13
+ end
14
+ end
15
+
16
+ class InvalidExpressionException < StandardError
17
+ def initialize(msg="Invalid Scandent expression")
18
+ super(msg)
19
+ end
20
+ end
21
+
22
+ class ScandentRule
23
+ attr_accessor :paths
24
+
25
+ def initialize(rule_paths)
26
+ @paths = rule_paths
27
+ end
28
+
29
+ def valid_on?(element)
30
+ @paths.each {|path| return true if path.valid_on?(element)}
31
+ return false
32
+ end
33
+ alias_method :selects?, :valid_on?
34
+ alias_method :matches?, :valid_on?
35
+
36
+ def to_s
37
+ rule_str = ''
38
+ @paths.each do |path|
39
+ rule_str << ', ' if !rule_str.empty?
40
+ rule_str << path.to_s
41
+ end
42
+ rule_str
43
+ end
44
+ end
45
+
46
+ class ListenerRule < ScandentRule
47
+ end
48
+
49
+ class LocatorRule < ScandentRule
50
+ def locate(element)
51
+ all_located = []
52
+ @paths.each do |path|
53
+ path.locate(element) do |located_element|
54
+ unless all_located.include?(located_element)
55
+ yield located_element if block_given?
56
+ all_located << located_element
57
+ end
58
+ end
59
+ end
60
+ all_located
61
+ end
62
+ end
63
+
64
+ class ListenerPath
65
+ attr_accessor :steps
66
+
67
+ def initialize(steps=[])
68
+ # Make sure that the listener stems from the root element
69
+ if !steps[0].nil? and steps[0].element_ref != :ELEMENT_ROOT
70
+ if steps[0].element_ref.nil?
71
+ implied_step = [
72
+ [:T_TILDE, '~', :STATE_ROOT_PATH],
73
+ [:T_SLASH2, '//', :STATE_ROOT_PATH]
74
+ ]
75
+ steps.unshift(Parser.parse_step_tokens(implied_step, :PATH_LISTENER))
76
+ else steps[0].element_ref == :ELEMENT_SELF
77
+ steps[0].element_ref = :ELEMENT_ROOT
78
+ end
79
+ end
80
+ # Make sure there are no misplaced :ELEMENT_SELF references
81
+ steps.each {|step| raise InvalidExpressionException.new if step.element_ref == :ELEMENT_SELF and !step.eql?(steps.first)}
82
+
83
+ @steps = steps
84
+ end
85
+
86
+ def valid_on?(candidate)
87
+ steps_valid_on?(@steps, candidate)
88
+ end
89
+
90
+ def steps_valid_on?(steps, candidate)
91
+ return true if steps.empty?
92
+ steps.last.match(candidate) do |element|
93
+ return true if steps_valid_on?(steps[0..-2], element)
94
+ end
95
+ return false
96
+ end
97
+
98
+ def to_s
99
+ path_string = ''
100
+ @steps.each{|step| path_string << step.to_s}
101
+ path_string
102
+ end
103
+
104
+ end
105
+
106
+ class LocatorPath
107
+ attr_accessor :steps
108
+
109
+ def initialize(steps=[])
110
+ # Make sure that the locators stems from the root or current context element
111
+ if steps.first.element_ref.nil?
112
+ steps.first.action = :ACTION_CHILD if steps.first.action == :ACTION_SELF
113
+ implied_step = [
114
+ [:T_DOT, '.', :STATE_ROOT_PATH],
115
+ ]
116
+ steps.unshift(Parser.parse_step_tokens(implied_step, :PATH_LOCATOR))
117
+ end
118
+ # Make sure there are no misplaced :ELEMENT_SELF references
119
+ steps.each {|step| raise InvalidExpressionException.new if step.element_ref == :ELEMENT_SELF and !step.eql?(steps.first)}
120
+
121
+ @steps = steps
122
+ end
123
+
124
+ def locate(element)
125
+ matches = [element]
126
+ next_round = []
127
+ @steps.each do |step|
128
+ next_round = []
129
+ matches.each do |matched_element|
130
+ next_round += step.match(matched_element)
131
+ end
132
+ return next_round if next_round.empty?
133
+ matches = next_round
134
+ end
135
+ matches.each {|match| yield match if block_given?}
136
+ return matches
137
+ end
138
+
139
+ def valid_on?(candidate)
140
+ steps_valid_on?(@steps, candidate)
141
+ end
142
+
143
+ def steps_valid_on?(steps, candidate)
144
+ return true if steps.empty?
145
+ steps.first.match(candidate) do |element|
146
+ return true if steps_valid_on?(steps[1..-1], element)
147
+ end
148
+ return false
149
+ end
150
+
151
+ def to_s
152
+ path_string = ''
153
+ @steps.each{|step| path_string << step.to_s}
154
+ path_string
155
+ end
156
+ end
157
+
158
+ class PathStep
159
+ @@inverse_actions = {
160
+ :ACTION_CHILD => :ACTION_PARENT,
161
+ :ACTION_DESCENDANT => :ACTION_ANCESTOR,
162
+ :ACTION_PARENT => :ACTION_CHILD,
163
+ :ACTION_ANCESTOR => :ACTION_DESCENDANT,
164
+ :ACTION_FOLLOWING_SIBLING => :ACTION_PRECEDING_SIBLING,
165
+ :ACTION_FOLLOWING => :ACTION_PRECEDING,
166
+ :ACTION_PRECEDING_SIBLING => :ACTION_FOLLOWING_SIBLING,
167
+ :ACTION_PRECEDING => :ACTION_FOLLOWING,
168
+ :ACTION_SELF => :ACTION_SELF
169
+ }
170
+ @@action_group = {
171
+ :ACTION_CHILD => :content,
172
+ :ACTION_DESCENDANT => :descendants,
173
+ :ACTION_PARENT => :parent,
174
+ :ACTION_ANCESTOR => :ancestors,
175
+ :ACTION_FOLLOWING_SIBLING => :sibling_next,
176
+ :ACTION_FOLLOWING => :following_siblings,
177
+ :ACTION_PRECEDING_SIBLING => :sibling_prev,
178
+ :ACTION_PRECEDING => :preceding_siblings,
179
+ :ACTION_SELF => :itself
180
+ }
181
+ @@action_str = {
182
+ :ACTION_CHILD => '/'.freeze,
183
+ :ACTION_DESCENDANT => '//'.freeze,
184
+ :ACTION_PARENT => '/..'.freeze,
185
+ :ACTION_ANCESTOR => '/...'.freeze,
186
+ :ACTION_FOLLOWING_SIBLING => '/>'.freeze,
187
+ :ACTION_FOLLOWING => '/>>'.freeze,
188
+ :ACTION_PRECEDING_SIBLING => '/<'.freeze,
189
+ :ACTION_PRECEDING => '/<<'.freeze,
190
+ :ACTION_SELF => ''.freeze
191
+ }
192
+
193
+ attr_accessor :action, :element_ref, :tag, :namespace, :id, :attrs, :pseudo_exps, :valid_rules
194
+
195
+ def initialize(action, element_ref, tag, namespace, id, attrs, pseudo_exps, valid_rules)
196
+ @action = action
197
+ @element_ref = element_ref
198
+ @tag = tag.nil? ? nil : tag.to_sym
199
+ @namespace = namespace.nil? ? nil : namespace.to_sym
200
+ @id = id
201
+ @attrs = attrs
202
+ @pseudo_exps = pseudo_exps
203
+ @valid_rules = valid_rules
204
+ end
205
+
206
+ # Check if all fields match that of the given element, with no attention paid to the action
207
+ # FIXME: Search time improves by 25% when a tag is given. Reject non-tagged Elements more quickly
208
+ # to get a similar performance boost across the board
209
+ def describes?(element)
210
+ if element.kind_of?(Arboretum::DocTree::Elements::TaggedElement)
211
+ return false if !@tag.nil? and element.tag != @tag
212
+ return false if !@namespace.nil? and element.namespace != @namespace
213
+ return false if !@id.nil? and !element.equals_attr_val?(:id, [@id])
214
+ @attrs[:has].each {|attr_name| return false if !element.has_attr?(attr_name)}
215
+ @attrs[:contains].each {|name,values| values.each {|value| return false if !element.contains_attr_val?(name,value)}}
216
+ @attrs[:equals].each {|name,values| values.each {|value| return false if !element.equals_attr_val?(name,value)}}
217
+ @attrs[:matches].each {|name,values| values.each {|value| return false if !element.matches_attr_val?(name,value)}}
218
+ return false if element_ref == :ELEMENT_ROOT and !element.parent.nil?
219
+ else
220
+ return false if !@tag.nil? or
221
+ !@namespace.nil? or
222
+ !@id.nil? or
223
+ !@attrs[:has].empty? or
224
+ !@attrs[:contains].empty? or
225
+ !@attrs[:equals].empty? or
226
+ !@attrs[:matches].empty? or
227
+ (element_ref == :ELEMENT_ROOT and !element.parent.nil?)
228
+ end
229
+ @pseudo_exps.each do |psuedo_name, pseudo_arg|
230
+ return false if !PseudoElements.match(element, pseudo_name, pseudo_arg)
231
+ end
232
+ @valid_rules.each do |rule|
233
+ return false if !rule.valid_on?(element)
234
+ end
235
+ return true
236
+ end
237
+
238
+ def to_s_sans_action
239
+ step_str = ''
240
+ if !@element_ref.nil?
241
+ if @element_ref == :ELEMENT_ROOT
242
+ step_str << '~'
243
+ elsif @element_ref == :ELEMENT_SELF
244
+ step_str << '.'
245
+ end
246
+ end
247
+ step_str << "%#{@tag.to_s}" if !tag.nil?
248
+ step_str << "@#{@namespace.to_s}" if !namespace.nil?
249
+ step_str << "##{@id}" if !@id.nil?
250
+ @attrs[:has].each {|attr_name| step_str << "[#{attr_name}]"}
251
+ @attrs[:contains].each {|name,values| values.each {|value| step_str << "[#{name}=\"#{value}\"]"}}
252
+ @attrs[:equals].each {|name,values| values.each {|value| step_str << "[#{name}==\"#{value.join(' ')}\"]"}}
253
+ @attrs[:matches].each {|name,values| values.each {|value| step_str << "[#{name}~=|/#{value}/|]"}}
254
+ @valid_rules.each {|rule| step_str << "{#{rule.to_s}}"}
255
+ @pseudo_exps.each {|pseudo, arg| step_str << ":#{pseudo}(#{arg.to_s})"}
256
+ step_str
257
+ end
258
+ end
259
+
260
+ class ListenerStep < PathStep
261
+ # Take step action and check for elements matching description
262
+ def match(element)
263
+ result = []
264
+ inverse_action = @@inverse_actions[@action]
265
+ search_group = element.public_send(@@action_group[inverse_action])
266
+ search_group.listing.each do |searched_element|
267
+ yield searched_element if self.describes?(searched_element) and block_given?
268
+ result << searched_element if self.describes?(searched_element)
269
+ end
270
+ result
271
+ end
272
+
273
+ def to_s
274
+ step_str = self.to_s_sans_action
275
+ step_str << @@action_str[@action]
276
+ end
277
+ end
278
+
279
+ class LocatorStep < PathStep
280
+ # Take step action and check for elements matching description
281
+ def match(element)
282
+ result = []
283
+ search_group = element.public_send(@@action_group[@action])
284
+ search_group.listing.each do |searched_element|
285
+ yield searched_element if self.describes?(searched_element) and block_given?
286
+ result << searched_element if self.describes?(searched_element)
287
+ end
288
+ result
289
+ end
290
+
291
+ def to_s
292
+ step_str = '' # Don't copy reference to @@action_str[@action]
293
+ step_str << @@action_str[@action]
294
+ step_str << self.to_s_sans_action
295
+ end
296
+ end
297
+
298
+ class Formula
299
+ def initialize(form_tokens)
300
+ @coefficient = 0
301
+ @intercept = 0
302
+ form_token_types = form_tokens.map {|token| token[0]}
303
+ if form_token_types[0] == :T_KEY_EVEN
304
+ @coefficient = 2
305
+ @intercept = 0
306
+ elsif form_token_types[0] == :T_KEY_ODD
307
+ @coefficient = 2
308
+ @intercept = 1
309
+ else
310
+ term_negative = false
311
+ term_coef = false
312
+ value = 0
313
+
314
+ index = 0
315
+ while index < form_tokens.length
316
+ case form_token_types[index]
317
+ when :T_FORM_PLUS
318
+ # Resolve Term
319
+ if term_coef
320
+ @coefficient += value
321
+ else
322
+ @intercept += value
323
+ end
324
+ # Reset with new sign
325
+ term_negative = false
326
+ term_coef = false
327
+ value = 0
328
+ when :T_FORM_MINUS
329
+ # Resolve Term
330
+ if term_coef
331
+ @coefficient += value
332
+ else
333
+ @intercept += value
334
+ end
335
+ # Reset with new sign
336
+ term_negative = true
337
+ term_coef = false
338
+ value = 0
339
+ when :LITERAL_INT
340
+ value = form_tokens[index][1].to_i
341
+ value *= -1 if term_negative
342
+ when :T_FORM_N
343
+ term_coef = true
344
+ value = 1 if value.zero?
345
+ else
346
+ raise InvalidExpressionException.new
347
+ end
348
+ index += 1
349
+ end
350
+ # Resolve one last time
351
+ if term_coef
352
+ @coefficient += value
353
+ else
354
+ @intercept += value
355
+ end
356
+ end
357
+ end
358
+
359
+ def to_s
360
+ "#{@coefficient}n#{'+' if @intercept >= 0}#{@intercept}"
361
+ end
362
+ end
363
+
364
+ class PseudoElements
365
+ def self.match(element, pseudo_name, pseudo_arg)
366
+ PseudoElements.public_send(:pseudo_name, element, pseudo_arg)
367
+ end
368
+ end
369
+
370
+ # Parser interprets tokens formed by a Scandent string and formed ScandentRules
371
+ # that represent the interpreted form of the string
372
+ # ScandentRules can then be matched to Elements in a DocTree
373
+ class Parser
374
+ @@actions = {
375
+ :T_SLASH => :ACTION_CHILD,
376
+ :T_SLASH2 => :ACTION_DESCENDANT,
377
+ :T_SLASHDOT2 => :ACTION_PARENT,
378
+ :T_SLASHDOT3 => :ACTION_ANCESTOR,
379
+ :T_SLASHGT => :ACTION_FOLLOWING_SIBLING,
380
+ :T_SLASHGT2 => :ACTION_FOLLOWING,
381
+ :T_SLASHLT => :ACTION_PRECEDING_SIBLING,
382
+ :T_SLASHLT2 => :ACTION_PRECEDING
383
+ }
384
+
385
+ # Parse a Scandent string by giving it to the Tokenizer and then parsing the results
386
+ def self.parse_rule_string(rule_string, type)
387
+ Parser.parse_rule_tokens(Tokenizer.tokenize(rule_string), type)
388
+ end
389
+
390
+ # Directly parse Scandent string tokens
391
+ def self.parse_rule_tokens(rule_tokens, type)
392
+ # Separate rules into its comma-delimited paths and remove the delimiter
393
+ rule_paths_tokens = rule_tokens.slice_after {|token| token[0] == :T_COMMA and token[2] == :STATE_ROOT_PATH}.to_a
394
+ rule_paths_tokens.each {|path_tokens| path_tokens.pop if path_tokens.last[0] == :T_COMMA}
395
+
396
+ # Parse each path individually
397
+ rule_paths = rule_paths_tokens.map{|path| Parser.parse_path_tokens(path, type)}
398
+
399
+ if type == :PATH_LISTENER
400
+ ListenerRule.new(rule_paths)
401
+ elsif type == :PATH_LOCATOR
402
+ LocatorRule.new(rule_paths)
403
+ else
404
+ raise ParseException.new("Unknown step type")
405
+ end
406
+ end
407
+
408
+ # Parse an individual path of a rule
409
+ def self.parse_path_tokens(path_tokens, type)
410
+ # Double check that the end state of the path is valid
411
+ raise InvalidExpressionException.new("End state of path is '#{path_tokens.last[2]}' instead of :STATE_ROOT_PATH") if path_tokens.last[2] != :STATE_ROOT_PATH
412
+
413
+ step_delimiters = [:T_SLASH, :T_SLASH2, :T_SLASHDOT2, :T_SLASHDOT3, :T_SLASHGT, :T_SLASHGT2, :T_SLASHLT, :T_SLASHLT2]
414
+
415
+ # Seperate paths into its steps
416
+ if type == :PATH_LISTENER
417
+ path_steps_tokens = path_tokens.slice_after {|token| step_delimiters.include? token[0] and token[2] == :STATE_ROOT_PATH}.to_a
418
+ elsif type == :PATH_LOCATOR
419
+ path_steps_tokens = path_tokens.slice_before {|token| step_delimiters.include? token[0] and token[2] == :STATE_ROOT_PATH}.to_a
420
+ else
421
+ raise ParseException.new("Unknown step type")
422
+ end
423
+
424
+ # Parse each step individually
425
+ path_steps = path_steps_tokens.map{|step| Parser.parse_step_tokens(step, type)}
426
+
427
+ if type == :PATH_LISTENER
428
+ ListenerPath.new(path_steps)
429
+ else
430
+ LocatorPath.new(path_steps)
431
+ end
432
+ end
433
+
434
+ # Parse an individual step of a path
435
+ def self.parse_step_tokens(step_tokens, type)
436
+ # Remove whitespace tokens
437
+ step_tokens.delete_if {|token| token[0] == :T_WHITESPACE}
438
+
439
+ if type == :PATH_LISTENER
440
+ action = @@actions.has_key?(step_tokens.last[0]) ? @@actions[step_tokens.last[0]] : :ACTION_SELF
441
+ elsif type == :PATH_LOCATOR
442
+ action = @@actions.has_key?(step_tokens.first[0]) ? @@actions[step_tokens.first[0]] : :ACTION_SELF
443
+ else
444
+ raise ParseException.new("Unknown step type")
445
+ end
446
+
447
+ element_ref = []
448
+ tag = []
449
+ namespace = []
450
+ id = []
451
+ attrs = {
452
+ :has => [],
453
+ :contains => Hash.new{|k, v| k[v] = Array.new},
454
+ :equals => Hash.new{|k, v| k[v] = Array.new},
455
+ :matches => Hash.new{|k, v| k[v] = Array.new}
456
+ }
457
+ pseudo_exps = []
458
+ valid_rules = []
459
+
460
+ index = 0
461
+ state = :STATE_ROOT_PATH
462
+ while index < step_tokens.length
463
+ # Consume current token and increment
464
+ index_token = step_tokens[index]
465
+ index += 1
466
+
467
+ case index_token[0]
468
+ when :T_PCT
469
+ following_token = step_tokens[index]
470
+ index += 1
471
+ raise InvalidExpressionException.new if following_token[0] != :LITERAL_IDENT
472
+ tag << following_token[1]
473
+ when :T_AT
474
+ following_token = step_tokens[index]
475
+ index += 1
476
+ raise InvalidExpressionException.new if following_token[0] != :LITERAL_IDENT
477
+ namespace << following_token[1]
478
+ when :T_PND
479
+ following_token = step_tokens[index]
480
+ index += 1
481
+ raise InvalidExpressionException.new if following_token[0] != :LITERAL_IDENT
482
+ id << following_token[1]
483
+ when :T_COLON
484
+ following_token = step_tokens[index]
485
+ index += 1
486
+ raise InvalidExpressionException.new if following_token[0] != :LITERAL_IDENT
487
+ pseudo_name = following_token[1].to_sym
488
+ arg_tokens = []
489
+
490
+ following_token = step_tokens[index]
491
+ if following_token[0] == :T_LPAREN
492
+ index += 1 # To consume the LPAREN
493
+ following_token = step_tokens[index]
494
+ index += 1
495
+ until following_token[0] == :T_RPAREN or index > step_tokens.length
496
+ arg_tokens << following_token
497
+ following_token = step_tokens[index]
498
+ index += 1
499
+ end
500
+ raise InvalidExpressionException.new if index > step_tokens.length # Undesirable exit condition to above loop
501
+ index += 1 # To consume the RPAREN
502
+ end
503
+ pseudo_exps << [pseudo_name, Parser.parse_arg(arg_tokens)]
504
+ when :T_ASTERISK # Adds no restrictions, so do nothing
505
+ when :T_TILDE
506
+ element_ref << :ELEMENT_ROOT
507
+ when :T_DOT
508
+ element_ref << :ELEMENT_SELF
509
+ when :T_LBRAK
510
+ following_token = step_tokens[index]
511
+ index += 1
512
+ raise InvalidExpressionException.new if following_token[0] != :LITERAL_IDENT
513
+ attr_name = following_token[1].to_sym
514
+ attr_value = nil
515
+ operation = nil
516
+
517
+ following_token = step_tokens[index]
518
+ index += 1
519
+ case following_token[0]
520
+ when :T_EQL
521
+ operation = :contains
522
+
523
+ following_token = step_tokens[index]
524
+ index += 1
525
+ raise InvalidExpressionException.new("Expected a string after '='") if ![:T_DQUOTE, :T_SQUOTE].include?(following_token[0])
526
+ string_limiter = following_token[0]
527
+
528
+ following_token = step_tokens[index]
529
+ index += 1
530
+ raise InvalidExpressionException.new if not [:LITERAL_IDENT, :LITERAL_STRING, :LITERAL_INT, :LITERAL_FLOAT].include?(following_token[0])
531
+ attr_value = following_token[1]
532
+
533
+ following_token = step_tokens[index]
534
+ index += 1
535
+ raise InvalidExpressionException.new if following_token[0] != string_limiter
536
+
537
+ following_token = step_tokens[index]
538
+ index += 1
539
+ raise InvalidExpressionException.new if following_token[0] != :T_RBRAK
540
+ when :T_EQL2
541
+ operation = :equals
542
+ following_token = step_tokens[index]
543
+ index += 1
544
+ raise InvalidExpressionException.new if ![:T_DQUOTE, :T_SQUOTE].include?(following_token[0])
545
+ string_limiter = following_token[0]
546
+
547
+ following_token = step_tokens[index]
548
+ index += 1
549
+ raise InvalidExpressionException.new if not [:LITERAL_IDENT, :LITERAL_STRING, :LITERAL_INT, :LITERAL_FLOAT].include?(following_token[0])
550
+ attr_value = following_token[1].split
551
+
552
+ following_token = step_tokens[index]
553
+ index += 1
554
+ raise InvalidExpressionException.new if following_token[0] != string_limiter
555
+
556
+ following_token = step_tokens[index]
557
+ index += 1
558
+ raise InvalidExpressionException.new if following_token[0] != :T_RBRAK
559
+ when :T_TILDE_EQL
560
+ operation = :matches
561
+
562
+ following_token = step_tokens[index]
563
+ index += 1
564
+ raise InvalidExpressionException.new if following_token[0] != :T_VBARSLASH
565
+
566
+ following_token = step_tokens[index]
567
+ index += 1
568
+ raise InvalidExpressionException.new if not [:LITERAL_IDENT, :LITERAL_STRING, :LITERAL_INT, :LITERAL_FLOAT].include?(following_token[0])
569
+ attr_value = Regexp.new(following_token[1])
570
+
571
+ following_token = step_tokens[index]
572
+ index += 1
573
+ raise InvalidExpressionException.new if following_token[0] != :T_SLASHVBAR
574
+
575
+ following_token = step_tokens[index]
576
+ index += 1
577
+ raise InvalidExpressionException.new if following_token[0] != :T_RBRAK
578
+ when :T_RBRAK
579
+ operation = nil
580
+ else
581
+ raise InvalidExpressionException.new
582
+ end
583
+ if operation.nil?
584
+ attrs[:has] << attr_name
585
+ else
586
+ attrs[operation][attr_name] << attr_value
587
+ end
588
+ when :T_LBRACE
589
+ equilibrium = 1
590
+ reformed_path_string = ''
591
+ following_token = step_tokens[index]
592
+ index += 1
593
+ until (following_token[0] == :T_RBRACE and equilibrium.zero?) or index > step_tokens.length
594
+ reformed_path_string << following_token[1]
595
+ following_token = step_tokens[index]
596
+ index += 1
597
+ equilibrium += 1 if following_token[0] == :T_LBRACE
598
+ equilibrium -= 1 if following_token[0] == :T_RBRACE
599
+ end
600
+ raise InvalidExpressionException.new("Could not find matching R_BRACE in #{reformed_path_string}") if index > step_tokens.length # Undesirable exit condition to above loop
601
+ valid_rules << Parser.parse_rule_string(reformed_path_string, :PATH_LOCATOR)
602
+ when :T_SLASH, :T_SLASH2, :T_SLASHDOT2, :T_SLASHDOT3, :T_SLASHGT, :T_SLASHGT2, :T_SLASHLT, :T_SLASHLT2
603
+ else
604
+ raise ParseException.new("Consumed unexpected token: #{index_token}")
605
+ end
606
+ end # All tokens consumed
607
+
608
+ # Validate results
609
+ raise InvalidExpressionException.new if tag.length > 1
610
+ raise InvalidExpressionException.new if namespace.length > 1
611
+ raise InvalidExpressionException.new if element_ref.length > 1
612
+
613
+ if type == :PATH_LISTENER
614
+ ListenerStep.new(action, element_ref[0], tag[0], namespace[0], id[0], attrs, pseudo_exps, valid_rules)
615
+ else
616
+ LocatorStep.new(action, element_ref[0], tag[0], namespace[0], id[0], attrs, pseudo_exps, valid_rules)
617
+ end
618
+ end
619
+
620
+ # Parse an argument given to a pseudo-class
621
+ def self.parse_arg(arg_tokens)
622
+ arg_token_types = arg_tokens.map {|token| token[0]}
623
+ if ((arg_token_types[0] == :T_SQUOTE and arg_token_types[2] == :T_SQUOTE) or (arg_token_types[0] == :T_DQUOTE and arg_token_types[2] == :T_DQUOTE)) and arg_tokens.length == 3
624
+ arg_tokens[1][1]
625
+ elsif (arg_token_types[0] == :T_VBARSLASH and arg_token_types[2] == :T_SLASHVBAR) and arg_tokens.length == 3
626
+ Regexp.new(arg_tokens[1][1])
627
+ elsif (!(arg_token_types & [:T_KEY_EVEN, :T_KEY_ODD]).empty? and arg_tokens.length == 1) or arg_token_types.include? :T_FORM_N
628
+ Formula.new(arg_tokens)
629
+ elsif arg_token_types[0] == :LITERAL_INT and arg_tokens.length == 1
630
+ arg_tokens[0][1].to_i
631
+ elsif arg_token_types[0] == :LITERAL_FLOAT and arg_tokens.length == 1
632
+ arg_tokens[0][1].to_f
633
+ elsif arg_tokens.length.zero?
634
+ nil
635
+ else
636
+ arg_str = ''
637
+ arg_tokens.each {|token| arg_str << token[1]}
638
+ raise InvalidExpressionException.new("Invalid argument '#{arg_str}'")
639
+ nil
640
+ end
641
+ end
642
+
643
+ end # Parser
644
+
645
+ # A class with class methods used to tokenize a Scandent string
646
+ # Has information regarding which character patterns match which tokens
647
+ # Has information regarding which tokens trigger which state in the tokenizer
648
+ class Tokenizer
649
+ # A Hash with keys cooresponding to states, and value Hashes that match patterns in that state to tokens
650
+ @@tokens = {
651
+ :STATE_ROOT_PATH => {
652
+ ',' => :T_COMMA,
653
+ '~' => :T_TILDE,
654
+ '//' => :T_SLASH2,
655
+ '/' => :T_SLASH,
656
+ '.' => :T_DOT,
657
+ '/..' => :T_SLASHDOT2,
658
+ '/...' => :T_SLASHDOT3,
659
+ '/>' => :T_SLASHGT,
660
+ '/>>' => :T_SLASHGT2,
661
+ '/<' => :T_SLASHLT,
662
+ '/<<' => :T_SLASHLT2,
663
+ '*' => :T_ASTERISK,
664
+ '{' => :T_LBRACE,
665
+ '[' => :T_LBRAK,
666
+ '(' => :T_LPAREN,
667
+ ':' => :T_COLON,
668
+ '#' => :T_PND,
669
+ '%' => :T_PCT,
670
+ '@' => :T_AT,
671
+ # Whitespace
672
+ "\n" => :T_WHITESPACE,
673
+ "\t" => :T_WHITESPACE,
674
+ ' ' => :T_WHITESPACE
675
+ },
676
+ :STATE_SSTRING => {
677
+ '\'' => :T_SQUOTE # Closing Token
678
+ },
679
+ :STATE_DSTRING => {
680
+ '"' => :T_DQUOTE, # Closing Token
681
+ },
682
+ :STATE_REGEX => {
683
+ '/|' => :T_SLASHVBAR # Closing Token
684
+ },
685
+ :STATE_ATTR_EXP => {
686
+ ']' => :T_RBRAK, # Closing Token
687
+ '=' => :T_EQL,
688
+ '==' => :T_EQL2,
689
+ '~=' => :T_TILDE_EQL,
690
+ '"' => :T_DQUOTE,
691
+ '\'' => :T_SQUOTE,
692
+ '|/' => :T_VBARSLASH,
693
+ # Whitespace
694
+ "\n" => :T_WHITESPACE,
695
+ "\t" => :T_WHITESPACE,
696
+ ' ' => :T_WHITESPACE
697
+ },
698
+ :STATE_PATH_EXP => {
699
+ '}' => :T_RBRACE, # Closing Token
700
+ # From ROOT_PATH
701
+ ',' => :T_COMMA,
702
+ '~' => :T_TILDE,
703
+ '//' => :T_SLASH2,
704
+ '/' => :T_SLASH,
705
+ '.' => :T_DOT,
706
+ '/..' => :T_SLASHDOT2,
707
+ '/...' => :T_SLASHDOT3,
708
+ '/>' => :T_SLASHGT,
709
+ '/>>' => :T_SLASHGT2,
710
+ '/<' => :T_SLASHLT,
711
+ '/<<' => :T_SLASHLT2,
712
+ '*' => :T_ASTERISK,
713
+ '{' => :T_LBRACE,
714
+ '[' => :T_LBRAK,
715
+ '(' => :T_LPAREN,
716
+ ':' => :T_COLON,
717
+ '#' => :T_PND,
718
+ '%' => :T_PCT,
719
+ '@' => :T_AT,
720
+ # Whitespace
721
+ "\n" => :T_WHITESPACE,
722
+ "\t" => :T_WHITESPACE,
723
+ ' ' => :T_WHITESPACE
724
+ },
725
+ :STATE_ARGS => {
726
+ ')' => :T_RPAREN, # Closing Token
727
+ '|/' => :T_VBARSLASH,
728
+ 'n' => :T_FORM_N,
729
+ '+' => :T_FORM_PLUS,
730
+ '-' => :T_FORM_MINUS,
731
+ 'even' => :T_KEY_EVEN,
732
+ 'odd' => :T_KEY_ODD,
733
+ '"' => :T_DQUOTE,
734
+ '\'' => :T_SQUOTE,
735
+ # Whitespace
736
+ "\n" => :T_WHITESPACE,
737
+ "\t" => :T_WHITESPACE,
738
+ ' ' => :T_WHITESPACE
739
+ },
740
+ }
741
+ # A Hash with keys cooresponding to states, and value Hashes that describe what tokens trigger the closing
742
+ # of the *current* state or the opening of a *different* state
743
+ @@triggers = {
744
+ :STATE_ROOT_PATH => {
745
+ :open => {
746
+ :T_LBRACE => :STATE_PATH_EXP,
747
+ :T_LBRAK => :STATE_ATTR_EXP,
748
+ :T_LPAREN => :STATE_ARGS
749
+ },
750
+ :close => []
751
+ },
752
+ :STATE_SSTRING => {
753
+ :open => {},
754
+ :close => [:T_SQUOTE]
755
+ },
756
+ :STATE_DSTRING => {
757
+ :open => {},
758
+ :close => [:T_DQUOTE]
759
+ },
760
+ :STATE_REGEX => {
761
+ :open => {},
762
+ :close => [:T_SLASHVBAR]
763
+ },
764
+ :STATE_ATTR_EXP => {
765
+ :open => {
766
+ :T_SQUOTE => :STATE_SSTRING,
767
+ :T_DQUOTE => :STATE_DSTRING,
768
+ :T_VBARSLASH => :STATE_REGEX
769
+ },
770
+ :close => [:T_RBRAK]
771
+ },
772
+ :STATE_PATH_EXP => {
773
+ :open => {
774
+ :T_LBRACE => :STATE_PATH_EXP,
775
+ :T_LBRAK => :STATE_ATTR_EXP,
776
+ :T_LPAREN => :STATE_ARGS
777
+ },
778
+ :close => [:T_RBRACE]
779
+ },
780
+ :STATE_ARGS => {
781
+ :open => {
782
+ :T_SQUOTE => :STATE_SSTRING,
783
+ :T_DQUOTE => :STATE_DSTRING,
784
+ :T_VBARSLASH => :STATE_REGEX
785
+ },
786
+ :close => [:T_RPAREN]
787
+ },
788
+ }
789
+
790
+ # Returns the type of a string of characters that does not match a pattern
791
+ def self.literal_type(literal)
792
+ return :LITERAL_IDENT if literal =~ /^[[:alpha:]][[:alnum:]-_]*$/
793
+ return :LITERAL_INT if literal =~ /^\d+$/
794
+ return :LITERAL_FLOAT if literal =~ /^\d*\.?\d+$/
795
+ return :LITERAL_STRING if literal =~ /^.+$/
796
+ return :LITERAL_UNKNOWN
797
+ end
798
+
799
+ # If no matches found for a letter, move to next letter, even if the possible patterns would match more
800
+ def self.tokenize(input)
801
+ state = [:STATE_ROOT_PATH] # State stack for the tokenizer, state.last will return the current state
802
+ match_start = 0
803
+ match_end = 0
804
+ unmatched_buffer = ''
805
+ largest_full_match = nil
806
+ token_list = [] # The list of tokens in the input, each item is an Array in the form:
807
+ # [Token type, Pattern that matched, State of the tokenizer after the token]
808
+
809
+ # Until we have checked and matched at or on every single character
810
+ while match_start < input.length
811
+ # Start building a substring from a single character
812
+ match_end = match_start
813
+ # Start will all patterns for current state as candidates
814
+ candidates = @@tokens[state.last].keys
815
+ # Start will no full match detected
816
+ largest_full_match = nil
817
+
818
+ # Until nothing can match substring or end of input has been reached
819
+ until candidates.empty? or match_end >= input.length
820
+ # String that candidate patterns will have to match
821
+ matched_string = input[match_start..match_end]
822
+ # Check to see if each remaining candidate pattern matches
823
+ # - If a full match, set as largest full match
824
+ # - Delete if the pattern does not match the string
825
+ candidates.delete_if do |pattern|
826
+ largest_full_match = [pattern, match_start, match_end] if pattern.eql?(matched_string)
827
+ !pattern.start_with?(matched_string) # Element deleted if true is the last statement in block
828
+ end
829
+ # Increase size of match by one if further matching is to be done
830
+ match_end += 1 if not candidates.empty?
831
+ end # Substring is now one character too large to be matched to
832
+
833
+ # If no full match found, add the last checked character as unmatched
834
+ # Otherwise:
835
+ # - Parse the unmatched_buffer as a literal and store
836
+ # - Activate state triggers associated with the largest fully matched token
837
+ # - Store the largest fully matched token
838
+ # - Start again where the matched token completes
839
+ if largest_full_match.nil?
840
+ # Add last checked character to the unmatched buffer
841
+ unmatched_buffer << input[match_start]
842
+ # Start matching again on the next letter
843
+ match_start += 1
844
+ else
845
+
846
+ # Do not activate state triggers associate with the parsed literal
847
+ # Is there a use case?
848
+
849
+ # Parse the unmatched_buffer as a literal and store (if it exists), then clear the buffer
850
+ token_list << [Tokenizer.literal_type(unmatched_buffer), unmatched_buffer, state.last] if unmatched_buffer.length > 0
851
+ unmatched_buffer = ''
852
+
853
+ # Info from the largest fully matched token
854
+ matched_pattern, pattern_start, pattern_end = largest_full_match
855
+ matched_token_type = @@tokens[state.last][matched_pattern]
856
+
857
+ # Activate state triggers associated with the largest fully matched token
858
+ current_state_triggers = @@triggers[state.last]
859
+ if current_state_triggers[:open].has_key?(matched_token_type)
860
+ state.push(current_state_triggers[:open][matched_token_type])
861
+ elsif current_state_triggers[:close].include?(matched_token_type)
862
+ state.pop
863
+ end
864
+
865
+ # Store the largest fully matched token
866
+ token_list << [matched_token_type, matched_pattern, state.last]
867
+
868
+ # Start again where the matched token completes
869
+ match_start = pattern_end + 1
870
+
871
+ end # Next token has been added to list
872
+ end # Input has been fully tokenized
873
+
874
+ # Parse and store the unmatched_buffer one last time
875
+ token_list << [Tokenizer.literal_type(unmatched_buffer), unmatched_buffer, state.last] if unmatched_buffer.length > 0
876
+
877
+ token_list
878
+ end # tokenize_path
879
+
880
+ end # Tokenizer
881
+ end # Scandent
882
+ end # Arboretum