simply_stored 0.1.4
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/simply_stored/class_methods_base.rb +31 -0
- data/lib/simply_stored/couch/belongs_to.rb +117 -0
- data/lib/simply_stored/couch/ext/couch_potato.rb +16 -0
- data/lib/simply_stored/couch/has_many.rb +148 -0
- data/lib/simply_stored/couch/has_one.rb +93 -0
- data/lib/simply_stored/couch/validations.rb +74 -0
- data/lib/simply_stored/couch/views/array_property_view_spec.rb +22 -0
- data/lib/simply_stored/couch/views.rb +1 -0
- data/lib/simply_stored/couch.rb +278 -0
- data/lib/simply_stored/instance_methods.rb +143 -0
- data/lib/simply_stored/simpledb/associations.rb +196 -0
- data/lib/simply_stored/simpledb/attributes.rb +173 -0
- data/lib/simply_stored/simpledb/storag.rb +85 -0
- data/lib/simply_stored/simpledb/validations.rb +88 -0
- data/lib/simply_stored/simpledb.rb +212 -0
- data/lib/simply_stored/storage.rb +93 -0
- data/lib/simply_stored.rb +9 -0
- data/test/custom_views_test.rb +33 -0
- data/test/fixtures/couch.rb +182 -0
- data/test/fixtures/simpledb/item.rb +11 -0
- data/test/fixtures/simpledb/item_daddy.rb +8 -0
- data/test/fixtures/simpledb/log_item.rb +3 -0
- data/test/fixtures/simpledb/namespace_bar.rb +5 -0
- data/test/fixtures/simpledb/namespace_foo.rb +7 -0
- data/test/fixtures/simpledb/protected_item.rb +3 -0
- data/test/simply_stored_couch_test.rb +1684 -0
- data/test/simply_stored_simpledb_test.rb +1341 -0
- data/test/test_helper.rb +22 -0
- data/test/vendor/dhaka-2.2.1/lib/dhaka/dot/dot.rb +29 -0
- data/test/vendor/dhaka-2.2.1/lib/dhaka/evaluator/evaluator.rb +133 -0
- data/test/vendor/dhaka-2.2.1/lib/dhaka/grammar/closure_hash.rb +15 -0
- data/test/vendor/dhaka-2.2.1/lib/dhaka/grammar/grammar.rb +240 -0
- data/test/vendor/dhaka-2.2.1/lib/dhaka/grammar/grammar_symbol.rb +27 -0
- data/test/vendor/dhaka-2.2.1/lib/dhaka/grammar/precedence.rb +19 -0
- data/test/vendor/dhaka-2.2.1/lib/dhaka/grammar/production.rb +36 -0
- data/test/vendor/dhaka-2.2.1/lib/dhaka/lexer/accept_actions.rb +36 -0
- data/test/vendor/dhaka-2.2.1/lib/dhaka/lexer/alphabet.rb +21 -0
- data/test/vendor/dhaka-2.2.1/lib/dhaka/lexer/compiled_lexer.rb +46 -0
- data/test/vendor/dhaka-2.2.1/lib/dhaka/lexer/dfa.rb +121 -0
- data/test/vendor/dhaka-2.2.1/lib/dhaka/lexer/lexeme.rb +32 -0
- data/test/vendor/dhaka-2.2.1/lib/dhaka/lexer/lexer.rb +70 -0
- data/test/vendor/dhaka-2.2.1/lib/dhaka/lexer/lexer_run.rb +78 -0
- data/test/vendor/dhaka-2.2.1/lib/dhaka/lexer/regex_grammar.rb +392 -0
- data/test/vendor/dhaka-2.2.1/lib/dhaka/lexer/regex_parser.rb +2010 -0
- data/test/vendor/dhaka-2.2.1/lib/dhaka/lexer/regex_tokenizer.rb +14 -0
- data/test/vendor/dhaka-2.2.1/lib/dhaka/lexer/specification.rb +96 -0
- data/test/vendor/dhaka-2.2.1/lib/dhaka/lexer/state.rb +68 -0
- data/test/vendor/dhaka-2.2.1/lib/dhaka/lexer/state_machine.rb +37 -0
- data/test/vendor/dhaka-2.2.1/lib/dhaka/parser/action.rb +55 -0
- data/test/vendor/dhaka-2.2.1/lib/dhaka/parser/channel.rb +58 -0
- data/test/vendor/dhaka-2.2.1/lib/dhaka/parser/compiled_parser.rb +51 -0
- data/test/vendor/dhaka-2.2.1/lib/dhaka/parser/conflict.rb +54 -0
- data/test/vendor/dhaka-2.2.1/lib/dhaka/parser/item.rb +42 -0
- data/test/vendor/dhaka-2.2.1/lib/dhaka/parser/parse_result.rb +50 -0
- data/test/vendor/dhaka-2.2.1/lib/dhaka/parser/parse_tree.rb +66 -0
- data/test/vendor/dhaka-2.2.1/lib/dhaka/parser/parser.rb +165 -0
- data/test/vendor/dhaka-2.2.1/lib/dhaka/parser/parser_methods.rb +11 -0
- data/test/vendor/dhaka-2.2.1/lib/dhaka/parser/parser_run.rb +39 -0
- data/test/vendor/dhaka-2.2.1/lib/dhaka/parser/parser_state.rb +74 -0
- data/test/vendor/dhaka-2.2.1/lib/dhaka/parser/token.rb +22 -0
- data/test/vendor/dhaka-2.2.1/lib/dhaka/runtime.rb +51 -0
- data/test/vendor/dhaka-2.2.1/lib/dhaka/tokenizer/tokenizer.rb +190 -0
- data/test/vendor/dhaka-2.2.1/lib/dhaka.rb +62 -0
- data/test/vendor/dhaka-2.2.1/test/all_tests.rb +5 -0
- data/test/vendor/dhaka-2.2.1/test/arithmetic/arithmetic_evaluator.rb +64 -0
- data/test/vendor/dhaka-2.2.1/test/arithmetic/arithmetic_evaluator_test.rb +43 -0
- data/test/vendor/dhaka-2.2.1/test/arithmetic/arithmetic_grammar.rb +41 -0
- data/test/vendor/dhaka-2.2.1/test/arithmetic/arithmetic_grammar_test.rb +9 -0
- data/test/vendor/dhaka-2.2.1/test/arithmetic/arithmetic_test_methods.rb +9 -0
- data/test/vendor/dhaka-2.2.1/test/arithmetic/arithmetic_tokenizer.rb +39 -0
- data/test/vendor/dhaka-2.2.1/test/arithmetic/arithmetic_tokenizer_test.rb +38 -0
- data/test/vendor/dhaka-2.2.1/test/arithmetic_precedence/arithmetic_precedence_evaluator.rb +43 -0
- data/test/vendor/dhaka-2.2.1/test/arithmetic_precedence/arithmetic_precedence_grammar.rb +24 -0
- data/test/vendor/dhaka-2.2.1/test/arithmetic_precedence/arithmetic_precedence_grammar_test.rb +30 -0
- data/test/vendor/dhaka-2.2.1/test/arithmetic_precedence/arithmetic_precedence_lexer_specification.rb +23 -0
- data/test/vendor/dhaka-2.2.1/test/arithmetic_precedence/arithmetic_precedence_parser_test.rb +33 -0
- data/test/vendor/dhaka-2.2.1/test/brackets/bracket_grammar.rb +23 -0
- data/test/vendor/dhaka-2.2.1/test/brackets/bracket_tokenizer.rb +22 -0
- data/test/vendor/dhaka-2.2.1/test/brackets/brackets_test.rb +28 -0
- data/test/vendor/dhaka-2.2.1/test/chittagong/chittagong_driver.rb +46 -0
- data/test/vendor/dhaka-2.2.1/test/chittagong/chittagong_driver_test.rb +276 -0
- data/test/vendor/dhaka-2.2.1/test/chittagong/chittagong_evaluator.rb +284 -0
- data/test/vendor/dhaka-2.2.1/test/chittagong/chittagong_evaluator_test.rb +38 -0
- data/test/vendor/dhaka-2.2.1/test/chittagong/chittagong_grammar.rb +104 -0
- data/test/vendor/dhaka-2.2.1/test/chittagong/chittagong_lexer.rb +109 -0
- data/test/vendor/dhaka-2.2.1/test/chittagong/chittagong_lexer_specification.rb +37 -0
- data/test/vendor/dhaka-2.2.1/test/chittagong/chittagong_lexer_test.rb +58 -0
- data/test/vendor/dhaka-2.2.1/test/chittagong/chittagong_parser.rb +879 -0
- data/test/vendor/dhaka-2.2.1/test/chittagong/chittagong_parser_test.rb +55 -0
- data/test/vendor/dhaka-2.2.1/test/chittagong/chittagong_test.rb +170 -0
- data/test/vendor/dhaka-2.2.1/test/core/another_lalr_but_not_slr_grammar.rb +20 -0
- data/test/vendor/dhaka-2.2.1/test/core/compiled_parser_test.rb +44 -0
- data/test/vendor/dhaka-2.2.1/test/core/dfa_test.rb +170 -0
- data/test/vendor/dhaka-2.2.1/test/core/evaluator_test.rb +22 -0
- data/test/vendor/dhaka-2.2.1/test/core/grammar_test.rb +83 -0
- data/test/vendor/dhaka-2.2.1/test/core/lalr_but_not_slr_grammar.rb +19 -0
- data/test/vendor/dhaka-2.2.1/test/core/lexer_test.rb +139 -0
- data/test/vendor/dhaka-2.2.1/test/core/malformed_grammar.rb +7 -0
- data/test/vendor/dhaka-2.2.1/test/core/malformed_grammar_test.rb +8 -0
- data/test/vendor/dhaka-2.2.1/test/core/nullable_grammar.rb +21 -0
- data/test/vendor/dhaka-2.2.1/test/core/parse_result_test.rb +44 -0
- data/test/vendor/dhaka-2.2.1/test/core/parser_state_test.rb +24 -0
- data/test/vendor/dhaka-2.2.1/test/core/parser_test.rb +131 -0
- data/test/vendor/dhaka-2.2.1/test/core/precedence_grammar.rb +17 -0
- data/test/vendor/dhaka-2.2.1/test/core/precedence_grammar_test.rb +9 -0
- data/test/vendor/dhaka-2.2.1/test/core/rr_conflict_grammar.rb +21 -0
- data/test/vendor/dhaka-2.2.1/test/core/simple_grammar.rb +22 -0
- data/test/vendor/dhaka-2.2.1/test/core/sr_conflict_grammar.rb +16 -0
- data/test/vendor/dhaka-2.2.1/test/dhaka_test_helper.rb +17 -0
- data/test/vendor/dhaka-2.2.1/test/fake_logger.rb +17 -0
- data/test/vendor/simplerdb-0.2/lib/simplerdb/client_exception.rb +10 -0
- data/test/vendor/simplerdb-0.2/lib/simplerdb/db.rb +146 -0
- data/test/vendor/simplerdb-0.2/lib/simplerdb/query_language.rb +266 -0
- data/test/vendor/simplerdb-0.2/lib/simplerdb/server.rb +33 -0
- data/test/vendor/simplerdb-0.2/lib/simplerdb/servlet.rb +191 -0
- data/test/vendor/simplerdb-0.2/lib/simplerdb.rb +3 -0
- data/test/vendor/simplerdb-0.2/test/functional_test.rb +81 -0
- data/test/vendor/simplerdb-0.2/test/query_evaluator_test.rb +73 -0
- data/test/vendor/simplerdb-0.2/test/query_parser_test.rb +64 -0
- data/test/vendor/simplerdb-0.2/test/simplerdb_test.rb +80 -0
- metadata +182 -0
@@ -0,0 +1,121 @@
|
|
1
|
+
module Dhaka
|
2
|
+
module LexerSupport
|
3
|
+
|
4
|
+
# Raised when an invalid regular expression pattern is encountered
|
5
|
+
# in a LexerSpecification
|
6
|
+
class InvalidRegexException < StandardError
|
7
|
+
end
|
8
|
+
|
9
|
+
class CheckpointAction
|
10
|
+
attr_reader :pattern
|
11
|
+
def initialize(pattern)
|
12
|
+
@pattern = pattern
|
13
|
+
end
|
14
|
+
|
15
|
+
def call(lexer_run)
|
16
|
+
lexer_run.save_checkpoint(pattern)
|
17
|
+
end
|
18
|
+
|
19
|
+
def compile_to_ruby_source
|
20
|
+
"add_checkpoint(#{pattern.inspect})"
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
|
25
|
+
class DFA < StateMachine #:nodoc:
|
26
|
+
def initialize(regex)
|
27
|
+
@regex = regex
|
28
|
+
|
29
|
+
tokenize_result = RegexTokenizer.tokenize(@regex)
|
30
|
+
raise InvalidRegexException.new(tokenize_error_message(tokenize_result)) if tokenize_result.has_error?
|
31
|
+
|
32
|
+
parse_result = RegexParser.parse(tokenize_result)
|
33
|
+
raise InvalidRegexException.new(parse_error_message(parse_result)) if parse_result.has_error?
|
34
|
+
|
35
|
+
ast = parse_result
|
36
|
+
ast.calculate_follow_sets
|
37
|
+
|
38
|
+
super(ItemSet.new(ast.first))
|
39
|
+
end
|
40
|
+
|
41
|
+
def tokenize_error_message(tokenize_result)
|
42
|
+
"Invalid character #{@regex[tokenize_result.unexpected_char_index].chr}: #{@regex.dup.insert(tokenize_result.unexpected_char_index, '>>>')}"
|
43
|
+
end
|
44
|
+
|
45
|
+
def parse_error_message(parse_result)
|
46
|
+
unexpected_token = parse_result.unexpected_token
|
47
|
+
if unexpected_token.symbol_name == END_SYMBOL_NAME
|
48
|
+
"Unexpected end of regex."
|
49
|
+
else
|
50
|
+
"Unexpected token #{parse_result.unexpected_token.symbol_name}: #{@regex.dup.insert(parse_result.unexpected_token.input_position, '>>>')}"
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
def dest_key_for key, char
|
55
|
+
result = ItemSet.new
|
56
|
+
key.each do |position|
|
57
|
+
result.merge(position.follow_set) if position.character == char
|
58
|
+
end
|
59
|
+
result
|
60
|
+
end
|
61
|
+
|
62
|
+
def new_state_for_key key
|
63
|
+
accepting = key.detect {|position| position.accepting}
|
64
|
+
if accepting
|
65
|
+
new_state = State.new(self, accepting.action(@regex))
|
66
|
+
else
|
67
|
+
new_state = State.new(self)
|
68
|
+
end
|
69
|
+
if key.any? {|position| position.checkpoint}
|
70
|
+
new_state.checkpoint_actions << CheckpointAction.new(@regex)
|
71
|
+
end
|
72
|
+
new_state
|
73
|
+
end
|
74
|
+
|
75
|
+
def transition_characters key
|
76
|
+
result = Set.new
|
77
|
+
key.each do |node|
|
78
|
+
result << node.character unless (node.accepting || node.checkpoint)
|
79
|
+
end
|
80
|
+
result
|
81
|
+
end
|
82
|
+
|
83
|
+
def match(input)
|
84
|
+
DFARun.new(self, input).match
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
class DFARun
|
89
|
+
def initialize(dfa, input)
|
90
|
+
@dfa, @input = dfa, input
|
91
|
+
@matched = ""
|
92
|
+
@not_yet_accepted = ""
|
93
|
+
@curr_state = @dfa.start_state
|
94
|
+
end
|
95
|
+
|
96
|
+
def match
|
97
|
+
@input.unpack("C*").each do |i|
|
98
|
+
break unless dest_state = @curr_state.transitions[i.chr]
|
99
|
+
@not_yet_accepted << i.chr
|
100
|
+
@curr_state = dest_state
|
101
|
+
@curr_state.process(self)
|
102
|
+
end
|
103
|
+
@matched
|
104
|
+
end
|
105
|
+
|
106
|
+
def save_checkpoint(pattern)
|
107
|
+
@last_saved_checkpoint = @matched + @not_yet_accepted
|
108
|
+
end
|
109
|
+
|
110
|
+
def accept(pattern)
|
111
|
+
@matched.concat @not_yet_accepted
|
112
|
+
@not_yet_accepted = ""
|
113
|
+
end
|
114
|
+
|
115
|
+
def accept_last_saved_checkpoint(pattern)
|
116
|
+
@matched = @last_saved_checkpoint
|
117
|
+
@not_yet_accepted = ""
|
118
|
+
end
|
119
|
+
end
|
120
|
+
end
|
121
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
module Dhaka
|
2
|
+
# Represents a portion of the input string that has been recognized as matching a given lexer pattern.
|
3
|
+
class Lexeme
|
4
|
+
# The pattern matched by this lexeme.
|
5
|
+
attr_accessor :pattern, :characters
|
6
|
+
|
7
|
+
# +input_position+ is the index in the input stream that this lexeme starts at.
|
8
|
+
attr_reader :input_position
|
9
|
+
|
10
|
+
def initialize(input_position) #:nodoc:
|
11
|
+
@input_position = input_position
|
12
|
+
@characters = []
|
13
|
+
end
|
14
|
+
|
15
|
+
# The substring of the input stream that this lexeme is comprised of.
|
16
|
+
def value
|
17
|
+
characters.join
|
18
|
+
end
|
19
|
+
|
20
|
+
def accepted? #:nodoc:
|
21
|
+
pattern
|
22
|
+
end
|
23
|
+
|
24
|
+
def << char #:nodoc:
|
25
|
+
characters << char
|
26
|
+
end
|
27
|
+
|
28
|
+
def concat chars #:nodoc:
|
29
|
+
characters.concat chars
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,70 @@
|
|
1
|
+
module Dhaka
|
2
|
+
# The lexer generator. To generate a lexer from a lexer specification +MyLexerSpecification+:
|
3
|
+
# lexer = Dhaka::Lexer.new(MyLexerSpecification)
|
4
|
+
#
|
5
|
+
# To compile this lexer as +MyLexer+ to a string of Ruby source:
|
6
|
+
# lexer.compile_to_ruby_source_as(:MyLexer)
|
7
|
+
class Lexer < LexerSupport::StateMachine
|
8
|
+
attr_reader :specification
|
9
|
+
|
10
|
+
# Creates a new lexer from a given specification.
|
11
|
+
def initialize(specification)
|
12
|
+
dfas = {}
|
13
|
+
@specification = specification
|
14
|
+
specification.items.each do |pattern, item|
|
15
|
+
dfas[pattern] = LexerSupport::DFA.new(pattern)
|
16
|
+
end
|
17
|
+
super(ItemSet.new(dfas.values.collect{|dfa| dfa.start_state}))
|
18
|
+
end
|
19
|
+
|
20
|
+
# Compiles the lexer to Ruby code that when executed, reloads all the states and actions of the lexer
|
21
|
+
# into a class named +lexer_class_name+.
|
22
|
+
def compile_to_ruby_source_as lexer_class_name
|
23
|
+
result = "class #{lexer_class_name} < Dhaka::CompiledLexer\n\n"
|
24
|
+
result << " self.specification = #{specification.name}\n\n"
|
25
|
+
result << " start_with #{start_state.object_id}\n\n"
|
26
|
+
@states.each do |key, state|
|
27
|
+
result << "#{state.compile_to_ruby_source}\n\n"
|
28
|
+
end
|
29
|
+
result << "end"
|
30
|
+
result
|
31
|
+
end
|
32
|
+
|
33
|
+
# Returns a LexerRun that tokenizes +input+.
|
34
|
+
def lex input
|
35
|
+
LexerRun.new(self, input)
|
36
|
+
end
|
37
|
+
|
38
|
+
def action_for_pattern pattern #:nodoc
|
39
|
+
@specification.items[pattern].action
|
40
|
+
end
|
41
|
+
|
42
|
+
private
|
43
|
+
def new_state_for_key key
|
44
|
+
accepting_states = key.select {|state| state.accepting?}
|
45
|
+
unless accepting_states.empty?
|
46
|
+
highest_precedence_state = accepting_states.min {|a, b| @specification.items[a.action.pattern] <=> @specification.items[b.action.pattern]}
|
47
|
+
new_state = LexerSupport::State.new(self, highest_precedence_state.action)
|
48
|
+
else
|
49
|
+
new_state = LexerSupport::State.new(self)
|
50
|
+
end
|
51
|
+
key.select {|state| !state.checkpoint_actions.empty?}.each do |state|
|
52
|
+
new_state.checkpoint_actions.concat state.checkpoint_actions
|
53
|
+
end
|
54
|
+
new_state
|
55
|
+
end
|
56
|
+
|
57
|
+
def transition_characters states
|
58
|
+
states.collect{|state| state.transitions.keys}.flatten.uniq
|
59
|
+
end
|
60
|
+
|
61
|
+
def dest_key_for states, char
|
62
|
+
result = ItemSet.new
|
63
|
+
states.each do |state|
|
64
|
+
dest_state = state.transitions[char]
|
65
|
+
result << dest_state if dest_state
|
66
|
+
end
|
67
|
+
result
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
@@ -0,0 +1,78 @@
|
|
1
|
+
module Dhaka
|
2
|
+
# Represents a run of a lexer on a given input string.
|
3
|
+
class LexerRun
|
4
|
+
include Enumerable
|
5
|
+
|
6
|
+
attr_reader :current_lexeme
|
7
|
+
def initialize lexer, input
|
8
|
+
@lexer, @input = lexer, input
|
9
|
+
@input_position = 0
|
10
|
+
@not_yet_accepted_chars = []
|
11
|
+
@last_saved_checkpoints = {}
|
12
|
+
end
|
13
|
+
|
14
|
+
# Constructs a token of type +symbol_name+ from the +current_lexeme+.
|
15
|
+
def create_token(symbol_name, value = current_lexeme.characters.join)
|
16
|
+
Token.new(symbol_name, value, current_lexeme.input_position)
|
17
|
+
end
|
18
|
+
|
19
|
+
# Yields each token as it is recognized. Returns a TokenizerErrorResult if an error occurs during tokenization.
|
20
|
+
def each
|
21
|
+
reset_and_rewind
|
22
|
+
loop do
|
23
|
+
c = curr_char
|
24
|
+
break if (c == "\0" && @not_yet_accepted_chars.empty? && !@current_lexeme.accepted?)
|
25
|
+
dest_state = @curr_state.transitions[c]
|
26
|
+
unless dest_state
|
27
|
+
return TokenizerErrorResult.new(@input_position) unless @current_lexeme.accepted?
|
28
|
+
token = get_token
|
29
|
+
yield token if token
|
30
|
+
reset_and_rewind
|
31
|
+
else
|
32
|
+
@curr_state = dest_state
|
33
|
+
@not_yet_accepted_chars << c
|
34
|
+
@curr_state.process(self)
|
35
|
+
advance
|
36
|
+
end
|
37
|
+
end
|
38
|
+
yield Token.new(END_SYMBOL_NAME, nil, nil)
|
39
|
+
end
|
40
|
+
|
41
|
+
def accept(pattern) #:nodoc:
|
42
|
+
@current_lexeme.pattern = pattern
|
43
|
+
@current_lexeme.concat @not_yet_accepted_chars
|
44
|
+
@not_yet_accepted_chars = []
|
45
|
+
end
|
46
|
+
|
47
|
+
def save_checkpoint(pattern) #:nodoc:
|
48
|
+
@last_saved_checkpoints[pattern] = (@current_lexeme.characters + @not_yet_accepted_chars)
|
49
|
+
end
|
50
|
+
|
51
|
+
def accept_last_saved_checkpoint(pattern) #:nodoc:
|
52
|
+
@current_lexeme.pattern = pattern
|
53
|
+
@current_lexeme.concat @not_yet_accepted_chars
|
54
|
+
@not_yet_accepted_chars = @current_lexeme.characters[(@last_saved_checkpoints[pattern].size)..-1]
|
55
|
+
@current_lexeme.characters = @last_saved_checkpoints[pattern].dup
|
56
|
+
end
|
57
|
+
|
58
|
+
private
|
59
|
+
def reset_and_rewind
|
60
|
+
@input_position -= @not_yet_accepted_chars.size
|
61
|
+
@current_lexeme = Lexeme.new(@input_position)
|
62
|
+
@curr_state = @lexer.start_state
|
63
|
+
@not_yet_accepted_chars = []
|
64
|
+
end
|
65
|
+
|
66
|
+
def curr_char
|
67
|
+
(@input[@input_position] || 0).chr
|
68
|
+
end
|
69
|
+
|
70
|
+
def advance
|
71
|
+
@input_position += 1
|
72
|
+
end
|
73
|
+
|
74
|
+
def get_token
|
75
|
+
instance_eval(&@lexer.action_for_pattern(@current_lexeme.pattern))
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
@@ -0,0 +1,392 @@
|
|
1
|
+
module Dhaka
|
2
|
+
module LexerSupport #:nodoc:all
|
3
|
+
|
4
|
+
class RegexGrammar < Dhaka::Grammar
|
5
|
+
|
6
|
+
for_symbol(Dhaka::START_SYMBOL_NAME) do
|
7
|
+
regex %w| Disjunction | do Dhaka::LexerSupport::RootNode.new(child_nodes[0], Dhaka::LexerSupport::AcceptingNode.new) end
|
8
|
+
regex_with_lookahead %w| Disjunction / Disjunction | do Dhaka::LexerSupport::RootNode.new(Dhaka::LexerSupport::LookaheadNode.new(child_nodes[0], child_nodes[2]), Dhaka::LexerSupport::LookaheadAcceptingNode.new) end
|
9
|
+
end
|
10
|
+
|
11
|
+
for_symbol('Disjunction') do
|
12
|
+
disjunction %w| Alternative \| Disjunction | do Dhaka::LexerSupport::OrNode.new(child_nodes[0], child_nodes[2]) end
|
13
|
+
alternative %w| Alternative | do child_nodes[0] end
|
14
|
+
end
|
15
|
+
|
16
|
+
for_symbol('Alternative') do
|
17
|
+
concatenation %w| Alternative Term | do Dhaka::LexerSupport::CatNode.new(child_nodes[0], child_nodes[1]) end
|
18
|
+
term %w| Term | do child_nodes[0] end
|
19
|
+
end
|
20
|
+
|
21
|
+
for_symbol('Term') do
|
22
|
+
zero_or_more %w| Atom * | do Dhaka::LexerSupport::ZeroOrMoreNode.new(child_nodes[0]) end
|
23
|
+
one_or_more %w| Atom + | do Dhaka::LexerSupport::OneOrMoreNode.new(child_nodes[0]) end
|
24
|
+
zero_or_one %w| Atom ? | do Dhaka::LexerSupport::ZeroOrOneNode.new(child_nodes[0]) end
|
25
|
+
atom %w| Atom | do child_nodes[0] end
|
26
|
+
end
|
27
|
+
|
28
|
+
for_symbol('Atom') do
|
29
|
+
group %w| ( Disjunction ) | do child_nodes[1] end
|
30
|
+
char %w| Character | do Dhaka::LexerSupport::LeafNode.new(child_nodes[0]) end
|
31
|
+
anything %w| . | do Dhaka::LexerSupport::OrNode.new(*(Dhaka::LexerSupport::ALL_CHARACTERS - ["\r", "\n"]).collect {|char| Dhaka::LexerSupport::LeafNode.new(char)}) end
|
32
|
+
positive_set %w| [ SetContents ] | do OrNode.new(*child_nodes[1].collect{|char| Dhaka::LexerSupport::LeafNode.new(char)}) end
|
33
|
+
negative_set %w| [ ^ SetContents ] | do Dhaka::LexerSupport::OrNode.new(*(Dhaka::LexerSupport::ALL_CHARACTERS - child_nodes[2]).collect {|char| Dhaka::LexerSupport::LeafNode.new(char)}) end
|
34
|
+
|
35
|
+
Dhaka::LexerSupport::CLASSES.each do |char, expansion|
|
36
|
+
send("character_class_#{char}", ['\\', char]) do
|
37
|
+
Dhaka::LexerSupport::OrNode.new(*Dhaka::LexerSupport::CLASSES[char].collect {|c| Dhaka::LexerSupport::LeafNode.new(c)})
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
Dhaka::LexerSupport::OPERATOR_CHARACTERS.each do |char, method_name|
|
42
|
+
send(method_name, ['\\', char]) do
|
43
|
+
Dhaka::LexerSupport::LeafNode.new(char)
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
for_symbol('Character') do
|
49
|
+
letter_character %w| Letter | do child_nodes[0] end
|
50
|
+
digit_character %w| Digit | do child_nodes[0] end
|
51
|
+
white_space_character %w| Whitespace | do child_nodes[0] end
|
52
|
+
symbol_character %w| Symbol | do child_nodes[0] end
|
53
|
+
end
|
54
|
+
|
55
|
+
|
56
|
+
for_symbol('SetContents') do
|
57
|
+
single_item %w| SetItem | do child_nodes[0] end
|
58
|
+
multiple_items %w| SetContents SetItem | do child_nodes[0].concat child_nodes[1] end
|
59
|
+
end
|
60
|
+
|
61
|
+
for_symbol('SetItem') do
|
62
|
+
single_char_item %w| SetCharacter | do [child_nodes[0]] end
|
63
|
+
lower_case_letter_range %w| LowercaseLetter - LowercaseLetter | do (child_nodes[0]..child_nodes[2]).to_a end
|
64
|
+
upper_case_letter_range %w| UppercaseLetter - UppercaseLetter | do (child_nodes[0]..child_nodes[2]).to_a end
|
65
|
+
digit_range %w| Digit - Digit | do (child_nodes[0]..child_nodes[2]).to_a end
|
66
|
+
end
|
67
|
+
|
68
|
+
|
69
|
+
|
70
|
+
for_symbol('Letter') do
|
71
|
+
lower_case_letter %w| LowercaseLetter | do child_nodes[0] end
|
72
|
+
upper_case_letter %w| UppercaseLetter | do child_nodes[0] end
|
73
|
+
end
|
74
|
+
|
75
|
+
for_symbol('LowercaseLetter') do
|
76
|
+
Dhaka::LexerSupport::LOWERCASE_LETTERS.each do |letter|
|
77
|
+
send("lower_char_letter_#{letter}", letter) do
|
78
|
+
letter
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
for_symbol('UppercaseLetter') do
|
84
|
+
Dhaka::LexerSupport::UPPERCASE_LETTERS.each do |letter|
|
85
|
+
send("upper_case_letter_#{letter}", letter) do
|
86
|
+
letter
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
for_symbol('Digit') do
|
92
|
+
Dhaka::LexerSupport::DIGITS.each do |digit|
|
93
|
+
send("digit_#{digit}", digit) do
|
94
|
+
digit
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
for_symbol('Whitespace') do
|
100
|
+
Dhaka::LexerSupport::WHITESPACE.each do |whitespace_char|
|
101
|
+
send("whitespace_#{to_byte(whitespace_char)}", whitespace_char) do
|
102
|
+
whitespace_char
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
for_symbol('Symbol') do
|
108
|
+
Dhaka::LexerSupport::SYMBOLS.each do |symbol_char|
|
109
|
+
send("symbol_char_#{to_byte(symbol_char)}", symbol_char) do
|
110
|
+
symbol_char
|
111
|
+
end
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
for_symbol('SetCharacter') do
|
116
|
+
(Dhaka::LexerSupport::ALL_CHARACTERS - Dhaka::LexerSupport::SET_OPERATOR_CHARACTERS).each do |char|
|
117
|
+
send("set_character_#{to_byte(char)}", char) do
|
118
|
+
char
|
119
|
+
end
|
120
|
+
end
|
121
|
+
Dhaka::LexerSupport::SET_OPERATOR_CHARACTERS.each do |char|
|
122
|
+
send("set_operator_character_#{to_byte(char)}", ['\\', char]) do
|
123
|
+
char
|
124
|
+
end
|
125
|
+
end
|
126
|
+
end
|
127
|
+
end
|
128
|
+
|
129
|
+
|
130
|
+
class ASTNode
|
131
|
+
def checkpoint
|
132
|
+
false
|
133
|
+
end
|
134
|
+
|
135
|
+
def accepting
|
136
|
+
false
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
140
|
+
class BinaryNode < ASTNode
|
141
|
+
attr_reader :left, :right
|
142
|
+
def initialize left, right
|
143
|
+
@left, @right = left, right
|
144
|
+
end
|
145
|
+
|
146
|
+
def to_dot(graph)
|
147
|
+
graph.node(self, :label => label)
|
148
|
+
graph.edge(self, left)
|
149
|
+
graph.edge(self, right)
|
150
|
+
left.to_dot(graph)
|
151
|
+
right.to_dot(graph)
|
152
|
+
end
|
153
|
+
|
154
|
+
def calculate_follow_sets
|
155
|
+
left.calculate_follow_sets
|
156
|
+
right.calculate_follow_sets
|
157
|
+
end
|
158
|
+
end
|
159
|
+
|
160
|
+
class OrNode < ASTNode
|
161
|
+
attr_reader :children
|
162
|
+
def initialize(*children)
|
163
|
+
@children = children
|
164
|
+
end
|
165
|
+
def label
|
166
|
+
"|"
|
167
|
+
end
|
168
|
+
|
169
|
+
def nullable
|
170
|
+
children.any? {|child| child.nullable}
|
171
|
+
end
|
172
|
+
|
173
|
+
def first
|
174
|
+
result = Set.new
|
175
|
+
children.each do |child|
|
176
|
+
result.merge child.first
|
177
|
+
end
|
178
|
+
result
|
179
|
+
end
|
180
|
+
|
181
|
+
def last
|
182
|
+
result = Set.new
|
183
|
+
children.each do |child|
|
184
|
+
result.merge child.last
|
185
|
+
end
|
186
|
+
result
|
187
|
+
end
|
188
|
+
|
189
|
+
def to_dot(graph)
|
190
|
+
graph.node(self, :label => label)
|
191
|
+
children.each do |child|
|
192
|
+
graph.edge(self, child)
|
193
|
+
child.to_dot(graph)
|
194
|
+
end
|
195
|
+
end
|
196
|
+
|
197
|
+
def calculate_follow_sets
|
198
|
+
children.each do |child|
|
199
|
+
child.calculate_follow_sets
|
200
|
+
end
|
201
|
+
end
|
202
|
+
end
|
203
|
+
|
204
|
+
class CatNode < BinaryNode
|
205
|
+
def label
|
206
|
+
"cat"
|
207
|
+
end
|
208
|
+
|
209
|
+
def nullable
|
210
|
+
left.nullable && right.nullable
|
211
|
+
end
|
212
|
+
|
213
|
+
def first
|
214
|
+
left.nullable ? (left.first | right.first) : left.first
|
215
|
+
end
|
216
|
+
|
217
|
+
def last
|
218
|
+
right.nullable ? (left.last | right.last) : right.last
|
219
|
+
end
|
220
|
+
|
221
|
+
def calculate_follow_sets
|
222
|
+
super
|
223
|
+
left.last.each do |leaf_node|
|
224
|
+
leaf_node.follow_set.merge right.first
|
225
|
+
end
|
226
|
+
end
|
227
|
+
end
|
228
|
+
|
229
|
+
class LookaheadNode < CatNode
|
230
|
+
def label
|
231
|
+
"/"
|
232
|
+
end
|
233
|
+
|
234
|
+
def calculate_follow_sets
|
235
|
+
super
|
236
|
+
left.last.each do |leaf_node|
|
237
|
+
leaf_node.follow_set.merge(Set.new([CheckpointNode.new]))
|
238
|
+
end
|
239
|
+
end
|
240
|
+
end
|
241
|
+
|
242
|
+
class UnaryNode < ASTNode
|
243
|
+
attr_reader :child
|
244
|
+
def initialize child
|
245
|
+
@child = child
|
246
|
+
end
|
247
|
+
|
248
|
+
def to_dot(graph)
|
249
|
+
graph.node(self, :label => label)
|
250
|
+
graph.edge(self, child)
|
251
|
+
child.to_dot(graph)
|
252
|
+
end
|
253
|
+
|
254
|
+
def nullable
|
255
|
+
child.nullable
|
256
|
+
end
|
257
|
+
|
258
|
+
def first
|
259
|
+
child.first
|
260
|
+
end
|
261
|
+
|
262
|
+
def last
|
263
|
+
child.last
|
264
|
+
end
|
265
|
+
|
266
|
+
def calculate_follow_sets
|
267
|
+
child.calculate_follow_sets
|
268
|
+
end
|
269
|
+
end
|
270
|
+
|
271
|
+
class RootNode < CatNode
|
272
|
+
def label
|
273
|
+
"start"
|
274
|
+
end
|
275
|
+
|
276
|
+
def head_node?
|
277
|
+
true
|
278
|
+
end
|
279
|
+
end
|
280
|
+
|
281
|
+
class ZeroOrMoreNode < UnaryNode
|
282
|
+
def label
|
283
|
+
"*"
|
284
|
+
end
|
285
|
+
|
286
|
+
def nullable
|
287
|
+
true
|
288
|
+
end
|
289
|
+
|
290
|
+
def calculate_follow_sets
|
291
|
+
super
|
292
|
+
last.each do |leaf_node|
|
293
|
+
leaf_node.follow_set.merge first
|
294
|
+
end
|
295
|
+
end
|
296
|
+
end
|
297
|
+
|
298
|
+
class ZeroOrOneNode < UnaryNode
|
299
|
+
def label
|
300
|
+
"?"
|
301
|
+
end
|
302
|
+
|
303
|
+
def nullable
|
304
|
+
true
|
305
|
+
end
|
306
|
+
end
|
307
|
+
|
308
|
+
class OneOrMoreNode < UnaryNode
|
309
|
+
def label
|
310
|
+
"+"
|
311
|
+
end
|
312
|
+
|
313
|
+
def calculate_follow_sets
|
314
|
+
super
|
315
|
+
last.each do |leaf_node|
|
316
|
+
leaf_node.follow_set.merge first
|
317
|
+
end
|
318
|
+
end
|
319
|
+
end
|
320
|
+
|
321
|
+
class LeafNode < ASTNode
|
322
|
+
attr_reader :character, :follow_set
|
323
|
+
def initialize character
|
324
|
+
@character = character
|
325
|
+
@follow_set = Set.new
|
326
|
+
end
|
327
|
+
|
328
|
+
def to_dot(graph)
|
329
|
+
graph.node(self, :label => character)
|
330
|
+
end
|
331
|
+
|
332
|
+
def nullable
|
333
|
+
false
|
334
|
+
end
|
335
|
+
|
336
|
+
def first
|
337
|
+
Set.new([self])
|
338
|
+
end
|
339
|
+
|
340
|
+
def last
|
341
|
+
Set.new([self])
|
342
|
+
end
|
343
|
+
|
344
|
+
def calculate_follow_sets
|
345
|
+
end
|
346
|
+
end
|
347
|
+
|
348
|
+
class CheckpointNode < ASTNode
|
349
|
+
def to_dot(graph)
|
350
|
+
graph.node(self, :label => "lookahead")
|
351
|
+
end
|
352
|
+
|
353
|
+
def character
|
354
|
+
end
|
355
|
+
|
356
|
+
def checkpoint
|
357
|
+
true
|
358
|
+
end
|
359
|
+
end
|
360
|
+
|
361
|
+
class AcceptingNode < ASTNode
|
362
|
+
def accepting
|
363
|
+
true
|
364
|
+
end
|
365
|
+
|
366
|
+
def character
|
367
|
+
end
|
368
|
+
|
369
|
+
def action(pattern)
|
370
|
+
AcceptAction.new(pattern)
|
371
|
+
end
|
372
|
+
|
373
|
+
def first
|
374
|
+
Set.new([self])
|
375
|
+
end
|
376
|
+
|
377
|
+
def calculate_follow_sets
|
378
|
+
end
|
379
|
+
|
380
|
+
def to_dot(graph)
|
381
|
+
graph.node(self, :label => '#')
|
382
|
+
end
|
383
|
+
end
|
384
|
+
|
385
|
+
class LookaheadAcceptingNode < AcceptingNode
|
386
|
+
def action(pattern)
|
387
|
+
LookaheadAcceptAction.new(pattern)
|
388
|
+
end
|
389
|
+
end
|
390
|
+
|
391
|
+
end
|
392
|
+
end
|