simply_stored 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/simply_stored/class_methods_base.rb +31 -0
- data/lib/simply_stored/couch/belongs_to.rb +117 -0
- data/lib/simply_stored/couch/ext/couch_potato.rb +16 -0
- data/lib/simply_stored/couch/has_many.rb +148 -0
- data/lib/simply_stored/couch/has_one.rb +93 -0
- data/lib/simply_stored/couch/validations.rb +74 -0
- data/lib/simply_stored/couch/views/array_property_view_spec.rb +22 -0
- data/lib/simply_stored/couch/views.rb +1 -0
- data/lib/simply_stored/couch.rb +278 -0
- data/lib/simply_stored/instance_methods.rb +143 -0
- data/lib/simply_stored/simpledb/associations.rb +196 -0
- data/lib/simply_stored/simpledb/attributes.rb +173 -0
- data/lib/simply_stored/simpledb/storag.rb +85 -0
- data/lib/simply_stored/simpledb/validations.rb +88 -0
- data/lib/simply_stored/simpledb.rb +212 -0
- data/lib/simply_stored/storage.rb +93 -0
- data/lib/simply_stored.rb +9 -0
- data/test/custom_views_test.rb +33 -0
- data/test/fixtures/couch.rb +182 -0
- data/test/fixtures/simpledb/item.rb +11 -0
- data/test/fixtures/simpledb/item_daddy.rb +8 -0
- data/test/fixtures/simpledb/log_item.rb +3 -0
- data/test/fixtures/simpledb/namespace_bar.rb +5 -0
- data/test/fixtures/simpledb/namespace_foo.rb +7 -0
- data/test/fixtures/simpledb/protected_item.rb +3 -0
- data/test/simply_stored_couch_test.rb +1684 -0
- data/test/simply_stored_simpledb_test.rb +1341 -0
- data/test/test_helper.rb +22 -0
- data/test/vendor/dhaka-2.2.1/lib/dhaka/dot/dot.rb +29 -0
- data/test/vendor/dhaka-2.2.1/lib/dhaka/evaluator/evaluator.rb +133 -0
- data/test/vendor/dhaka-2.2.1/lib/dhaka/grammar/closure_hash.rb +15 -0
- data/test/vendor/dhaka-2.2.1/lib/dhaka/grammar/grammar.rb +240 -0
- data/test/vendor/dhaka-2.2.1/lib/dhaka/grammar/grammar_symbol.rb +27 -0
- data/test/vendor/dhaka-2.2.1/lib/dhaka/grammar/precedence.rb +19 -0
- data/test/vendor/dhaka-2.2.1/lib/dhaka/grammar/production.rb +36 -0
- data/test/vendor/dhaka-2.2.1/lib/dhaka/lexer/accept_actions.rb +36 -0
- data/test/vendor/dhaka-2.2.1/lib/dhaka/lexer/alphabet.rb +21 -0
- data/test/vendor/dhaka-2.2.1/lib/dhaka/lexer/compiled_lexer.rb +46 -0
- data/test/vendor/dhaka-2.2.1/lib/dhaka/lexer/dfa.rb +121 -0
- data/test/vendor/dhaka-2.2.1/lib/dhaka/lexer/lexeme.rb +32 -0
- data/test/vendor/dhaka-2.2.1/lib/dhaka/lexer/lexer.rb +70 -0
- data/test/vendor/dhaka-2.2.1/lib/dhaka/lexer/lexer_run.rb +78 -0
- data/test/vendor/dhaka-2.2.1/lib/dhaka/lexer/regex_grammar.rb +392 -0
- data/test/vendor/dhaka-2.2.1/lib/dhaka/lexer/regex_parser.rb +2010 -0
- data/test/vendor/dhaka-2.2.1/lib/dhaka/lexer/regex_tokenizer.rb +14 -0
- data/test/vendor/dhaka-2.2.1/lib/dhaka/lexer/specification.rb +96 -0
- data/test/vendor/dhaka-2.2.1/lib/dhaka/lexer/state.rb +68 -0
- data/test/vendor/dhaka-2.2.1/lib/dhaka/lexer/state_machine.rb +37 -0
- data/test/vendor/dhaka-2.2.1/lib/dhaka/parser/action.rb +55 -0
- data/test/vendor/dhaka-2.2.1/lib/dhaka/parser/channel.rb +58 -0
- data/test/vendor/dhaka-2.2.1/lib/dhaka/parser/compiled_parser.rb +51 -0
- data/test/vendor/dhaka-2.2.1/lib/dhaka/parser/conflict.rb +54 -0
- data/test/vendor/dhaka-2.2.1/lib/dhaka/parser/item.rb +42 -0
- data/test/vendor/dhaka-2.2.1/lib/dhaka/parser/parse_result.rb +50 -0
- data/test/vendor/dhaka-2.2.1/lib/dhaka/parser/parse_tree.rb +66 -0
- data/test/vendor/dhaka-2.2.1/lib/dhaka/parser/parser.rb +165 -0
- data/test/vendor/dhaka-2.2.1/lib/dhaka/parser/parser_methods.rb +11 -0
- data/test/vendor/dhaka-2.2.1/lib/dhaka/parser/parser_run.rb +39 -0
- data/test/vendor/dhaka-2.2.1/lib/dhaka/parser/parser_state.rb +74 -0
- data/test/vendor/dhaka-2.2.1/lib/dhaka/parser/token.rb +22 -0
- data/test/vendor/dhaka-2.2.1/lib/dhaka/runtime.rb +51 -0
- data/test/vendor/dhaka-2.2.1/lib/dhaka/tokenizer/tokenizer.rb +190 -0
- data/test/vendor/dhaka-2.2.1/lib/dhaka.rb +62 -0
- data/test/vendor/dhaka-2.2.1/test/all_tests.rb +5 -0
- data/test/vendor/dhaka-2.2.1/test/arithmetic/arithmetic_evaluator.rb +64 -0
- data/test/vendor/dhaka-2.2.1/test/arithmetic/arithmetic_evaluator_test.rb +43 -0
- data/test/vendor/dhaka-2.2.1/test/arithmetic/arithmetic_grammar.rb +41 -0
- data/test/vendor/dhaka-2.2.1/test/arithmetic/arithmetic_grammar_test.rb +9 -0
- data/test/vendor/dhaka-2.2.1/test/arithmetic/arithmetic_test_methods.rb +9 -0
- data/test/vendor/dhaka-2.2.1/test/arithmetic/arithmetic_tokenizer.rb +39 -0
- data/test/vendor/dhaka-2.2.1/test/arithmetic/arithmetic_tokenizer_test.rb +38 -0
- data/test/vendor/dhaka-2.2.1/test/arithmetic_precedence/arithmetic_precedence_evaluator.rb +43 -0
- data/test/vendor/dhaka-2.2.1/test/arithmetic_precedence/arithmetic_precedence_grammar.rb +24 -0
- data/test/vendor/dhaka-2.2.1/test/arithmetic_precedence/arithmetic_precedence_grammar_test.rb +30 -0
- data/test/vendor/dhaka-2.2.1/test/arithmetic_precedence/arithmetic_precedence_lexer_specification.rb +23 -0
- data/test/vendor/dhaka-2.2.1/test/arithmetic_precedence/arithmetic_precedence_parser_test.rb +33 -0
- data/test/vendor/dhaka-2.2.1/test/brackets/bracket_grammar.rb +23 -0
- data/test/vendor/dhaka-2.2.1/test/brackets/bracket_tokenizer.rb +22 -0
- data/test/vendor/dhaka-2.2.1/test/brackets/brackets_test.rb +28 -0
- data/test/vendor/dhaka-2.2.1/test/chittagong/chittagong_driver.rb +46 -0
- data/test/vendor/dhaka-2.2.1/test/chittagong/chittagong_driver_test.rb +276 -0
- data/test/vendor/dhaka-2.2.1/test/chittagong/chittagong_evaluator.rb +284 -0
- data/test/vendor/dhaka-2.2.1/test/chittagong/chittagong_evaluator_test.rb +38 -0
- data/test/vendor/dhaka-2.2.1/test/chittagong/chittagong_grammar.rb +104 -0
- data/test/vendor/dhaka-2.2.1/test/chittagong/chittagong_lexer.rb +109 -0
- data/test/vendor/dhaka-2.2.1/test/chittagong/chittagong_lexer_specification.rb +37 -0
- data/test/vendor/dhaka-2.2.1/test/chittagong/chittagong_lexer_test.rb +58 -0
- data/test/vendor/dhaka-2.2.1/test/chittagong/chittagong_parser.rb +879 -0
- data/test/vendor/dhaka-2.2.1/test/chittagong/chittagong_parser_test.rb +55 -0
- data/test/vendor/dhaka-2.2.1/test/chittagong/chittagong_test.rb +170 -0
- data/test/vendor/dhaka-2.2.1/test/core/another_lalr_but_not_slr_grammar.rb +20 -0
- data/test/vendor/dhaka-2.2.1/test/core/compiled_parser_test.rb +44 -0
- data/test/vendor/dhaka-2.2.1/test/core/dfa_test.rb +170 -0
- data/test/vendor/dhaka-2.2.1/test/core/evaluator_test.rb +22 -0
- data/test/vendor/dhaka-2.2.1/test/core/grammar_test.rb +83 -0
- data/test/vendor/dhaka-2.2.1/test/core/lalr_but_not_slr_grammar.rb +19 -0
- data/test/vendor/dhaka-2.2.1/test/core/lexer_test.rb +139 -0
- data/test/vendor/dhaka-2.2.1/test/core/malformed_grammar.rb +7 -0
- data/test/vendor/dhaka-2.2.1/test/core/malformed_grammar_test.rb +8 -0
- data/test/vendor/dhaka-2.2.1/test/core/nullable_grammar.rb +21 -0
- data/test/vendor/dhaka-2.2.1/test/core/parse_result_test.rb +44 -0
- data/test/vendor/dhaka-2.2.1/test/core/parser_state_test.rb +24 -0
- data/test/vendor/dhaka-2.2.1/test/core/parser_test.rb +131 -0
- data/test/vendor/dhaka-2.2.1/test/core/precedence_grammar.rb +17 -0
- data/test/vendor/dhaka-2.2.1/test/core/precedence_grammar_test.rb +9 -0
- data/test/vendor/dhaka-2.2.1/test/core/rr_conflict_grammar.rb +21 -0
- data/test/vendor/dhaka-2.2.1/test/core/simple_grammar.rb +22 -0
- data/test/vendor/dhaka-2.2.1/test/core/sr_conflict_grammar.rb +16 -0
- data/test/vendor/dhaka-2.2.1/test/dhaka_test_helper.rb +17 -0
- data/test/vendor/dhaka-2.2.1/test/fake_logger.rb +17 -0
- data/test/vendor/simplerdb-0.2/lib/simplerdb/client_exception.rb +10 -0
- data/test/vendor/simplerdb-0.2/lib/simplerdb/db.rb +146 -0
- data/test/vendor/simplerdb-0.2/lib/simplerdb/query_language.rb +266 -0
- data/test/vendor/simplerdb-0.2/lib/simplerdb/server.rb +33 -0
- data/test/vendor/simplerdb-0.2/lib/simplerdb/servlet.rb +191 -0
- data/test/vendor/simplerdb-0.2/lib/simplerdb.rb +3 -0
- data/test/vendor/simplerdb-0.2/test/functional_test.rb +81 -0
- data/test/vendor/simplerdb-0.2/test/query_evaluator_test.rb +73 -0
- data/test/vendor/simplerdb-0.2/test/query_parser_test.rb +64 -0
- data/test/vendor/simplerdb-0.2/test/simplerdb_test.rb +80 -0
- metadata +182 -0
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
module Dhaka
|
|
2
|
+
# The parser generator. To generate a parser from a grammar specification +ArithmeticPrecedenceGrammar+, one would
|
|
3
|
+
# write:
|
|
4
|
+
# parser = Dhaka::Parser.new(ArithmeticPrecedenceGrammar)
|
|
5
|
+
#
|
|
6
|
+
# To compile this parser to Ruby source as +ArithmeticPrecedenceParser+:
|
|
7
|
+
# parser.compile_to_ruby_source_as(:ArithmeticPrecedenceParser)
|
|
8
|
+
# which returns a string of Ruby code.
|
|
9
|
+
class Parser
|
|
10
|
+
include ParserMethods
|
|
11
|
+
attr_reader :grammar
|
|
12
|
+
|
|
13
|
+
# Creates a new parser from the given grammar. Messages are logged by default to STDOUT
|
|
14
|
+
# and the log level is WARN. Shift-reduce conflicts are reported at WARN and reduce-reduce conflicts
|
|
15
|
+
# at ERROR. You may pass in your own logger. Logging at DEBUG shows a lot of progress output.
|
|
16
|
+
def initialize(grammar, logger = nil)
|
|
17
|
+
@shift_actions = Hash.new {|hash, state| hash[state] = ShiftAction.new(state)}
|
|
18
|
+
@reduce_actions = Hash.new {|hash, production| hash[production] = ReduceAction.new(production)}
|
|
19
|
+
@logger = logger || default_logger
|
|
20
|
+
@transitions = Hash.new {|hash, state| hash[state] = {}}
|
|
21
|
+
@grammar = grammar
|
|
22
|
+
@channels = Hash.new {|hash, start_item| hash[start_item] = []}
|
|
23
|
+
@states = Hash.new do |hash, kernel|
|
|
24
|
+
closure, channels = grammar.closure(kernel)
|
|
25
|
+
channels.each do |start_item, channel_set|
|
|
26
|
+
@channels[start_item].concat channel_set.to_a
|
|
27
|
+
end
|
|
28
|
+
new_state = ParserState.new(self, closure)
|
|
29
|
+
hash[kernel] = new_state
|
|
30
|
+
@logger.debug("Created #{new_state.unique_name}.")
|
|
31
|
+
new_state.transition_items.each do |symbol, items|
|
|
32
|
+
destination_kernel = ItemSet.new(items.collect{|item| item.next_item})
|
|
33
|
+
destination_state = hash[destination_kernel]
|
|
34
|
+
items.each {|item| @channels[item] << grammar.passive_channel(item, destination_state.items[item.next_item])}
|
|
35
|
+
@transitions[new_state][symbol] = destination_state
|
|
36
|
+
end
|
|
37
|
+
new_state
|
|
38
|
+
end
|
|
39
|
+
initialize_states
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
# Returns the Ruby source of the generated parser compiled as +parser_class_name+. This can be written out to a file.
|
|
43
|
+
def compile_to_ruby_source_as parser_class_name
|
|
44
|
+
result = "class #{parser_class_name} < Dhaka::CompiledParser\n\n"
|
|
45
|
+
result << " self.grammar = #{grammar.name}\n\n"
|
|
46
|
+
result << " start_with #{start_state.id}\n\n"
|
|
47
|
+
states.each do |state|
|
|
48
|
+
result << "#{state.compile_to_ruby_source}\n\n"
|
|
49
|
+
end
|
|
50
|
+
result << "end"
|
|
51
|
+
result
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
# Returns the dot representation of the parser. If <tt>:hide_lookaheads</tt> is set to true in the
|
|
55
|
+
# options hash, lookaheads are not written out to the parser states, which is helpful when there are dozens
|
|
56
|
+
# of lookahead symbols for every item in every state.
|
|
57
|
+
def to_dot(options = {})
|
|
58
|
+
Dot::Digraph.new(:fontsize => 10, :shape => :box, :size => 5) do |g|
|
|
59
|
+
states.each do |state|
|
|
60
|
+
g.node(state, :label => state.items.values.collect{|item| item.to_s(options)}.join("\n"))
|
|
61
|
+
@transitions[state].each do |symbol, dest_state|
|
|
62
|
+
g.edge(state, dest_state, :label => symbol.name)
|
|
63
|
+
end
|
|
64
|
+
end
|
|
65
|
+
end.to_dot
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
def inspect
|
|
69
|
+
"<Dhaka::Parser grammar : #{grammar}>"
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
private
|
|
73
|
+
attr_reader :start_state
|
|
74
|
+
|
|
75
|
+
def states
|
|
76
|
+
@states.values
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
def default_logger
|
|
80
|
+
logger = Logger.new(STDOUT)
|
|
81
|
+
logger.level = Logger::WARN
|
|
82
|
+
logger.formatter = ParserLogOutputFormatter.new
|
|
83
|
+
logger
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
def initialize_states
|
|
87
|
+
start_productions = grammar.productions_for_symbol(grammar.start_symbol)
|
|
88
|
+
raise NoStartProductionsError.new(grammar) if start_productions.empty?
|
|
89
|
+
start_items = ItemSet.new(start_productions.collect {|production| Item.new(production, 0)})
|
|
90
|
+
start_items.each {|start_item| start_item.lookaheadset << grammar.end_symbol}
|
|
91
|
+
@start_state = @states[start_items]
|
|
92
|
+
@logger.debug("Pumping #{@channels.keys.size} dirty items...")
|
|
93
|
+
pump_channels @channels.keys
|
|
94
|
+
@logger.debug("Generating shift actions...")
|
|
95
|
+
generate_shift_actions
|
|
96
|
+
@logger.debug("Generating reduce actions...")
|
|
97
|
+
generate_reduce_actions
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
def generate_shift_actions
|
|
101
|
+
@states.values.each do |state|
|
|
102
|
+
@transitions[state].keys.each do |symbol|
|
|
103
|
+
state.actions[symbol.name] = @shift_actions[@transitions[state][symbol]]
|
|
104
|
+
end
|
|
105
|
+
end
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
def generate_reduce_actions
|
|
109
|
+
@states.values.each do |state|
|
|
110
|
+
state.items.values.select{ |item| !item.next_symbol }.each do |item|
|
|
111
|
+
create_reduction_actions_for_item_and_state item, state
|
|
112
|
+
end
|
|
113
|
+
end
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
def create_reduction_actions_for_item_and_state item, state
|
|
117
|
+
item.lookaheadset.each do |lookahead|
|
|
118
|
+
new_action = @reduce_actions[item.production]
|
|
119
|
+
if existing_action = state.actions[lookahead.name]
|
|
120
|
+
if ReduceAction === existing_action
|
|
121
|
+
message = ReduceReduceConflict.new(state, lookahead, new_action).resolve
|
|
122
|
+
@logger.error(message)
|
|
123
|
+
else
|
|
124
|
+
message = ShiftReduceConflict.new(state, lookahead, new_action).resolve
|
|
125
|
+
@logger.warn(message)
|
|
126
|
+
end
|
|
127
|
+
else
|
|
128
|
+
state.actions[lookahead.name] = new_action
|
|
129
|
+
end
|
|
130
|
+
end
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
def pump_channels dirty_items
|
|
134
|
+
loop do
|
|
135
|
+
new_dirty_items = Set.new
|
|
136
|
+
dirty_items.each do |dirty_item|
|
|
137
|
+
@channels[dirty_item].each do |channel|
|
|
138
|
+
new_dirty_items << channel.end_item if channel.pump
|
|
139
|
+
end
|
|
140
|
+
end
|
|
141
|
+
break if new_dirty_items.empty?
|
|
142
|
+
@logger.debug("#{new_dirty_items.size} dirty items...")
|
|
143
|
+
dirty_items = new_dirty_items
|
|
144
|
+
end
|
|
145
|
+
end
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
# Raised when trying to create a Parser for a grammar that has no productions for the start symbol
|
|
149
|
+
class NoStartProductionsError < StandardError
|
|
150
|
+
def initialize(grammar) #:nodoc:
|
|
151
|
+
@grammar = grammar
|
|
152
|
+
end
|
|
153
|
+
def to_s #:nodoc:
|
|
154
|
+
"No start productions defined for #{@grammar.name}"
|
|
155
|
+
end
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
class ParserLogOutputFormatter < Logger::Formatter #:nodoc:
|
|
159
|
+
def call(severity, time, progname, msg)
|
|
160
|
+
"\n%s -- %s: %s\n" % [ severity, progname, msg2str(msg)]
|
|
161
|
+
end
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
end
|
|
165
|
+
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
module Dhaka
|
|
3
|
+
# This module is included both in Parser and CompiledParser.
|
|
4
|
+
module ParserMethods
|
|
5
|
+
# +token_stream+ is an Enumerable of Token-s. Returns either a ParseSuccessResult or a ParseErrorResult.
|
|
6
|
+
def parse token_stream
|
|
7
|
+
parser_run = ParserRun.new(grammar, start_state, token_stream)
|
|
8
|
+
parser_run.run
|
|
9
|
+
end
|
|
10
|
+
end
|
|
11
|
+
end
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
module Dhaka
|
|
2
|
+
class ParserRun #:nodoc:
|
|
3
|
+
|
|
4
|
+
def initialize(grammar, start_state, token_stream)
|
|
5
|
+
@grammar = grammar
|
|
6
|
+
@node_stack = []
|
|
7
|
+
@state_stack = [start_state]
|
|
8
|
+
@token_stream = token_stream
|
|
9
|
+
@symbol_queue = []
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
def run
|
|
13
|
+
tokenize_result = token_stream.each do |token|
|
|
14
|
+
@current_token = token
|
|
15
|
+
@symbol_queue << @current_token.symbol_name
|
|
16
|
+
error = execute_actions
|
|
17
|
+
return error if error
|
|
18
|
+
node_stack << ParseTreeLeafNode.new(@current_token)
|
|
19
|
+
state_stack.last
|
|
20
|
+
end
|
|
21
|
+
return tokenize_result if TokenizerErrorResult === tokenize_result
|
|
22
|
+
ParseSuccessResult.new(node_stack.first) if node_stack.first.head_node?
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
private
|
|
26
|
+
|
|
27
|
+
attr_reader :state_stack, :token_stream, :node_stack
|
|
28
|
+
|
|
29
|
+
def execute_actions
|
|
30
|
+
while symbol_name = @symbol_queue.pop
|
|
31
|
+
action = state_stack.last.actions[symbol_name]
|
|
32
|
+
return ParseErrorResult.new(@current_token, state_stack.last) unless action
|
|
33
|
+
instance_eval(&action.action_code)
|
|
34
|
+
end
|
|
35
|
+
nil
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
end
|
|
39
|
+
end
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
module Dhaka
|
|
2
|
+
class ParserState #:nodoc:
|
|
3
|
+
attr_accessor :items, :actions, :id
|
|
4
|
+
|
|
5
|
+
@@state_id = 0
|
|
6
|
+
|
|
7
|
+
def self.next_state_id
|
|
8
|
+
result = @@state_id
|
|
9
|
+
@@state_id += 1
|
|
10
|
+
result
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def initialize(parser, items, id=nil)
|
|
14
|
+
@parser = parser
|
|
15
|
+
@items = items
|
|
16
|
+
@actions = {}
|
|
17
|
+
@id = id || ParserState.next_state_id
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def transition_items
|
|
21
|
+
result = Hash.new {|h, k| h[k] = ItemSet.new()}
|
|
22
|
+
items.values.each do |item|
|
|
23
|
+
result[item.next_symbol] << item if item.next_symbol
|
|
24
|
+
end
|
|
25
|
+
result
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def unique_name
|
|
29
|
+
"State#{id}"
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def compile_to_ruby_source
|
|
33
|
+
result = " at_state(#{id}) {\n"
|
|
34
|
+
|
|
35
|
+
symbol_names_by_action = Hash.new {|hash, key| hash[key] = []}
|
|
36
|
+
actions.each do |symbol_name, action|
|
|
37
|
+
symbol_names_by_action[action] << symbol_name
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
symbol_names_by_action.keys.each do |action|
|
|
41
|
+
symbol_names = symbol_names_by_action[action].collect {|symbol_name| "#{symbol_name.inspect}"}.join(', ')
|
|
42
|
+
result << " for_symbols(#{symbol_names}) { #{action.compile_to_ruby_source} }\n"
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
result << " }"
|
|
46
|
+
result
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def for_symbols *symbol_names, &blk
|
|
50
|
+
symbol_names.each do |symbol_name|
|
|
51
|
+
actions[symbol_name] = @parser.instance_eval(&blk)
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
alias :for_symbol :for_symbols
|
|
56
|
+
|
|
57
|
+
def to_s(options = {})
|
|
58
|
+
items.values.collect{|item| item.to_s(options)}.join("\n")
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
class ItemSet < Set #:nodoc:
|
|
64
|
+
def hash
|
|
65
|
+
result = 5381
|
|
66
|
+
each { |item| result ^= item.hash }
|
|
67
|
+
result
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
def eql? other
|
|
71
|
+
self == other
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
end
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
module Dhaka
|
|
2
|
+
# Represents a portion of the input character stream that is mapped by the tokenizer
|
|
3
|
+
# to a symbol in the grammar. The attribute +input_position+ contains the start index position of the original
|
|
4
|
+
# string input that this token came from. It can be used to report errors by indicating the specific portion
|
|
5
|
+
# of the input where the error occurred.
|
|
6
|
+
class Token
|
|
7
|
+
attr_accessor :symbol_name, :value, :input_position
|
|
8
|
+
def initialize(symbol_name, value, input_position)
|
|
9
|
+
@symbol_name = symbol_name
|
|
10
|
+
@value = value
|
|
11
|
+
@input_position = input_position
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def to_s #:nodoc:
|
|
15
|
+
value ? "#{symbol_name} : #{value}" : "#{symbol_name}"
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def == other
|
|
19
|
+
symbol_name == other.symbol_name && value == other.value
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
#--
|
|
2
|
+
# Copyright (c) 2006, 2007 Mushfeq Khan
|
|
3
|
+
#
|
|
4
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
|
5
|
+
# a copy of this software and associated documentation files (the
|
|
6
|
+
# "Software"), to deal in the Software without restriction, including
|
|
7
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
|
8
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
|
9
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
|
10
|
+
# the following conditions:
|
|
11
|
+
#
|
|
12
|
+
# The above copyright notice and this permission notice shall be
|
|
13
|
+
# included in all copies or substantial portions of the Software.
|
|
14
|
+
#
|
|
15
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
16
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
17
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
18
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
|
19
|
+
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
|
20
|
+
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
|
21
|
+
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
22
|
+
#++
|
|
23
|
+
|
|
24
|
+
require 'set'
|
|
25
|
+
require 'logger'
|
|
26
|
+
require 'delegate'
|
|
27
|
+
|
|
28
|
+
%w[
|
|
29
|
+
grammar/grammar_symbol
|
|
30
|
+
grammar/production
|
|
31
|
+
grammar/grammar
|
|
32
|
+
grammar/precedence
|
|
33
|
+
parser/parse_tree
|
|
34
|
+
parser/parse_result
|
|
35
|
+
parser/parser_methods
|
|
36
|
+
parser/parser_state
|
|
37
|
+
parser/token
|
|
38
|
+
parser/action
|
|
39
|
+
parser/parser_run
|
|
40
|
+
parser/compiled_parser
|
|
41
|
+
tokenizer/tokenizer
|
|
42
|
+
evaluator/evaluator
|
|
43
|
+
lexer/accept_actions
|
|
44
|
+
lexer/alphabet
|
|
45
|
+
lexer/state_machine
|
|
46
|
+
lexer/state
|
|
47
|
+
lexer/specification
|
|
48
|
+
lexer/lexeme
|
|
49
|
+
lexer/lexer_run
|
|
50
|
+
lexer/compiled_lexer
|
|
51
|
+
].each {|path| require File.join(File.dirname(__FILE__), path)}
|
|
@@ -0,0 +1,190 @@
|
|
|
1
|
+
module Dhaka
|
|
2
|
+
# Reserved constant used to identify the idle state of the tokenizer.
|
|
3
|
+
TOKENIZER_IDLE_STATE = :idle_state
|
|
4
|
+
|
|
5
|
+
# Returned on successful tokenizing of the input stream. Supports iteration by including Enumerable, so it can
|
|
6
|
+
# be passed in directly to the parser.
|
|
7
|
+
class TokenizerSuccessResult
|
|
8
|
+
include Enumerable
|
|
9
|
+
|
|
10
|
+
def initialize(tokens)
|
|
11
|
+
@tokens = tokens
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
# Returns false.
|
|
15
|
+
def has_error?
|
|
16
|
+
false
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def each(&block)
|
|
20
|
+
@tokens.each(&block)
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
# Returned when tokenizing fails due to an unexpected character in the input stream.
|
|
25
|
+
class TokenizerErrorResult
|
|
26
|
+
# The index of the character that caused the error.
|
|
27
|
+
attr_reader :unexpected_char_index
|
|
28
|
+
|
|
29
|
+
def initialize(unexpected_char_index)
|
|
30
|
+
@unexpected_char_index = unexpected_char_index
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
# Returns true.
|
|
34
|
+
def has_error?
|
|
35
|
+
true
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
# A tokenizer state encapsulates actions that should be performed upon
|
|
40
|
+
# encountering each permissible character for that state.
|
|
41
|
+
class TokenizerState
|
|
42
|
+
attr_reader :actions, :default_action
|
|
43
|
+
|
|
44
|
+
def initialize
|
|
45
|
+
@actions = {}
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
# Define the action (+blk+) to be performed when encountering any of +characters+ in the token stream.
|
|
49
|
+
def for_characters(characters, &blk)
|
|
50
|
+
characters.each do |character|
|
|
51
|
+
actions[character] = blk
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
alias for_character for_characters
|
|
56
|
+
|
|
57
|
+
# define the action (+blk+) to be performed for any +characters+ that don't have an action to perform.
|
|
58
|
+
def for_default(&blk)
|
|
59
|
+
@default_action = blk
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
def to_s #:nodoc:
|
|
63
|
+
actions.inspect
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
# This abstract class contains a DSL for hand-coding tokenizers. Subclass it to implement tokenizers for specific grammars.
|
|
69
|
+
#
|
|
70
|
+
# Tokenizers are state machines. Each state of a tokenizer is identified
|
|
71
|
+
# by a Ruby symbol. The constant Dhaka::TOKENIZER_IDLE_STATE is reserved for the idle state of the tokenizer (the one
|
|
72
|
+
# that it starts in).
|
|
73
|
+
#
|
|
74
|
+
# The following is a tokenizer for arithmetic expressions with integer terms. The tokenizer starts in the idle state
|
|
75
|
+
# creating single-character tokens for all characters excepts digits and whitespace. It shifts to
|
|
76
|
+
# <tt>:get_integer_literal</tt> when it encounters a digit character and creates a token on the stack on which it
|
|
77
|
+
# accumulates the value of the literal. When it again encounters a non-digit character, it shifts back to idle.
|
|
78
|
+
# Whitespace is treated as a delimiter, but not shifted as a token.
|
|
79
|
+
#
|
|
80
|
+
# class ArithmeticPrecedenceTokenizer < Dhaka::Tokenizer
|
|
81
|
+
#
|
|
82
|
+
# digits = ('0'..'9').to_a
|
|
83
|
+
# parenths = ['(', ')']
|
|
84
|
+
# operators = ['-', '+', '/', '*', '^']
|
|
85
|
+
# functions = ['h', 'l']
|
|
86
|
+
# arg_separator = [',']
|
|
87
|
+
# whitespace = [' ']
|
|
88
|
+
#
|
|
89
|
+
# all_characters = digits + parenths + operators + functions + arg_separator + whitespace
|
|
90
|
+
#
|
|
91
|
+
# for_state Dhaka::TOKENIZER_IDLE_STATE do
|
|
92
|
+
# for_characters(all_characters - (digits + whitespace)) do
|
|
93
|
+
# create_token(curr_char, nil)
|
|
94
|
+
# advance
|
|
95
|
+
# end
|
|
96
|
+
# for_characters digits do
|
|
97
|
+
# create_token('n', '')
|
|
98
|
+
# switch_to :get_integer_literal
|
|
99
|
+
# end
|
|
100
|
+
# for_character whitespace do
|
|
101
|
+
# advance
|
|
102
|
+
# end
|
|
103
|
+
# end
|
|
104
|
+
#
|
|
105
|
+
# for_state :get_integer_literal do
|
|
106
|
+
# for_characters all_characters - digits do
|
|
107
|
+
# switch_to Dhaka::TOKENIZER_IDLE_STATE
|
|
108
|
+
# end
|
|
109
|
+
# for_characters digits do
|
|
110
|
+
# curr_token.value << curr_char
|
|
111
|
+
# advance
|
|
112
|
+
# end
|
|
113
|
+
# end
|
|
114
|
+
#
|
|
115
|
+
# end
|
|
116
|
+
#
|
|
117
|
+
# For languages where the lexical structure is very complicated, it may be too tedious to implement a Tokenizer by hand.
|
|
118
|
+
# In such cases, it's a lot easier to write a LexerSpecification using regular expressions and create a Lexer from that.
|
|
119
|
+
class Tokenizer
|
|
120
|
+
class << self
|
|
121
|
+
# Define the action for the state named +state_name+.
|
|
122
|
+
def for_state(state_name, &blk)
|
|
123
|
+
states[state_name].instance_eval(&blk)
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
# Tokenizes a string +input+ and returns a TokenizerErrorResult on failure or a TokenizerSuccessResult on sucess.
|
|
127
|
+
def tokenize(input)
|
|
128
|
+
new(input).run
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
private
|
|
132
|
+
def inherited(tokenizer)
|
|
133
|
+
class << tokenizer
|
|
134
|
+
attr_accessor :states, :grammar
|
|
135
|
+
end
|
|
136
|
+
tokenizer.states = Hash.new {|hash, key| hash[key] = TokenizerState.new}
|
|
137
|
+
end
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
# The tokens shifted so far.
|
|
141
|
+
attr_reader :tokens
|
|
142
|
+
|
|
143
|
+
def initialize(input) #:nodoc:
|
|
144
|
+
@input = input
|
|
145
|
+
@current_state = self.class.states[TOKENIZER_IDLE_STATE]
|
|
146
|
+
@curr_char_index = 0
|
|
147
|
+
@tokens = []
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
# The character currently being processed.
|
|
151
|
+
def curr_char
|
|
152
|
+
@input[@curr_char_index] and @input[@curr_char_index].chr
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
# Advance to the next character.
|
|
156
|
+
def advance
|
|
157
|
+
@curr_char_index += 1
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
def inspect
|
|
161
|
+
"<Dhaka::Tokenizer grammar : #{grammar}>"
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
# The token currently on top of the stack.
|
|
165
|
+
def curr_token
|
|
166
|
+
tokens.last
|
|
167
|
+
end
|
|
168
|
+
|
|
169
|
+
# Push a new token on to the stack with symbol corresponding to +symbol_name+ and a value of +value+.
|
|
170
|
+
def create_token(symbol_name, value)
|
|
171
|
+
new_token = Dhaka::Token.new(symbol_name, value, @curr_char_index)
|
|
172
|
+
tokens << new_token
|
|
173
|
+
end
|
|
174
|
+
|
|
175
|
+
# Change the active state of the tokenizer to the state identified by the symbol +state_name+.
|
|
176
|
+
def switch_to state_name
|
|
177
|
+
@current_state = self.class.states[state_name]
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
def run #:nodoc:
|
|
181
|
+
while curr_char
|
|
182
|
+
blk = @current_state.actions[curr_char] || @current_state.default_action
|
|
183
|
+
return TokenizerErrorResult.new(@curr_char_index) unless blk
|
|
184
|
+
instance_eval(&blk)
|
|
185
|
+
end
|
|
186
|
+
tokens << Dhaka::Token.new(Dhaka::END_SYMBOL_NAME, nil, nil)
|
|
187
|
+
TokenizerSuccessResult.new(tokens)
|
|
188
|
+
end
|
|
189
|
+
end
|
|
190
|
+
end
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
#--
|
|
2
|
+
# Copyright (c) 2006, 2007 Mushfeq Khan
|
|
3
|
+
#
|
|
4
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
|
5
|
+
# a copy of this software and associated documentation files (the
|
|
6
|
+
# "Software"), to deal in the Software without restriction, including
|
|
7
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
|
8
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
|
9
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
|
10
|
+
# the following conditions:
|
|
11
|
+
#
|
|
12
|
+
# The above copyright notice and this permission notice shall be
|
|
13
|
+
# included in all copies or substantial portions of the Software.
|
|
14
|
+
#
|
|
15
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
16
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
17
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
18
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
|
19
|
+
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
|
20
|
+
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
|
21
|
+
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
22
|
+
#++
|
|
23
|
+
|
|
24
|
+
require 'set'
|
|
25
|
+
require 'logger'
|
|
26
|
+
require 'delegate'
|
|
27
|
+
|
|
28
|
+
%w[
|
|
29
|
+
dot/dot
|
|
30
|
+
grammar/grammar_symbol
|
|
31
|
+
grammar/production
|
|
32
|
+
grammar/closure_hash
|
|
33
|
+
grammar/grammar
|
|
34
|
+
grammar/precedence
|
|
35
|
+
parser/parse_tree
|
|
36
|
+
parser/parse_result
|
|
37
|
+
parser/item
|
|
38
|
+
parser/channel
|
|
39
|
+
parser/parser_methods
|
|
40
|
+
parser/parser_state
|
|
41
|
+
parser/conflict
|
|
42
|
+
parser/token
|
|
43
|
+
parser/action
|
|
44
|
+
parser/parser_run
|
|
45
|
+
parser/parser
|
|
46
|
+
parser/compiled_parser
|
|
47
|
+
tokenizer/tokenizer
|
|
48
|
+
evaluator/evaluator
|
|
49
|
+
lexer/accept_actions
|
|
50
|
+
lexer/alphabet
|
|
51
|
+
lexer/regex_grammar
|
|
52
|
+
lexer/regex_tokenizer
|
|
53
|
+
lexer/regex_parser
|
|
54
|
+
lexer/state_machine
|
|
55
|
+
lexer/dfa
|
|
56
|
+
lexer/state
|
|
57
|
+
lexer/specification
|
|
58
|
+
lexer/lexeme
|
|
59
|
+
lexer/lexer_run
|
|
60
|
+
lexer/lexer
|
|
61
|
+
lexer/compiled_lexer
|
|
62
|
+
].each {|path| require File.join(File.dirname(__FILE__), 'dhaka/' + path)}
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
require File.dirname(__FILE__) + '/arithmetic_grammar'
|
|
2
|
+
|
|
3
|
+
class ArithmeticEvaluator < Dhaka::Evaluator
|
|
4
|
+
|
|
5
|
+
self.grammar = ArithmeticGrammar
|
|
6
|
+
|
|
7
|
+
define_evaluation_rules do
|
|
8
|
+
|
|
9
|
+
for_subtraction do
|
|
10
|
+
evaluate(child_nodes[0]) - evaluate(child_nodes[2])
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
for_addition do
|
|
14
|
+
evaluate(child_nodes[0]) + evaluate(child_nodes[2])
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
for_division do
|
|
18
|
+
evaluate(child_nodes[0]).to_f/evaluate(child_nodes[2])
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
for_multiplication do
|
|
22
|
+
evaluate(child_nodes[0]) * evaluate(child_nodes[2])
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
for_getting_literals do
|
|
26
|
+
child_nodes[0].token.value
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
for_unpacking_parenthetized_expression do
|
|
30
|
+
evaluate(child_nodes[1])
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
for_empty_args do
|
|
34
|
+
[]
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
for_evaluating_function do
|
|
38
|
+
evaluate(child_nodes[0]).call evaluate(child_nodes[2])
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
for_concatenating_args do
|
|
42
|
+
[evaluate(child_nodes[0])]+evaluate(child_nodes[2])
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
for_single_args do
|
|
46
|
+
[evaluate(child_nodes[0])]
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
for_min_function do
|
|
50
|
+
@min_function
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
for_max_function do
|
|
54
|
+
@max_function
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
def initialize(min_function, max_function)
|
|
60
|
+
@min_function = min_function
|
|
61
|
+
@max_function = max_function
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
end
|