simply_stored 0.1.4
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/simply_stored/class_methods_base.rb +31 -0
- data/lib/simply_stored/couch/belongs_to.rb +117 -0
- data/lib/simply_stored/couch/ext/couch_potato.rb +16 -0
- data/lib/simply_stored/couch/has_many.rb +148 -0
- data/lib/simply_stored/couch/has_one.rb +93 -0
- data/lib/simply_stored/couch/validations.rb +74 -0
- data/lib/simply_stored/couch/views/array_property_view_spec.rb +22 -0
- data/lib/simply_stored/couch/views.rb +1 -0
- data/lib/simply_stored/couch.rb +278 -0
- data/lib/simply_stored/instance_methods.rb +143 -0
- data/lib/simply_stored/simpledb/associations.rb +196 -0
- data/lib/simply_stored/simpledb/attributes.rb +173 -0
- data/lib/simply_stored/simpledb/storag.rb +85 -0
- data/lib/simply_stored/simpledb/validations.rb +88 -0
- data/lib/simply_stored/simpledb.rb +212 -0
- data/lib/simply_stored/storage.rb +93 -0
- data/lib/simply_stored.rb +9 -0
- data/test/custom_views_test.rb +33 -0
- data/test/fixtures/couch.rb +182 -0
- data/test/fixtures/simpledb/item.rb +11 -0
- data/test/fixtures/simpledb/item_daddy.rb +8 -0
- data/test/fixtures/simpledb/log_item.rb +3 -0
- data/test/fixtures/simpledb/namespace_bar.rb +5 -0
- data/test/fixtures/simpledb/namespace_foo.rb +7 -0
- data/test/fixtures/simpledb/protected_item.rb +3 -0
- data/test/simply_stored_couch_test.rb +1684 -0
- data/test/simply_stored_simpledb_test.rb +1341 -0
- data/test/test_helper.rb +22 -0
- data/test/vendor/dhaka-2.2.1/lib/dhaka/dot/dot.rb +29 -0
- data/test/vendor/dhaka-2.2.1/lib/dhaka/evaluator/evaluator.rb +133 -0
- data/test/vendor/dhaka-2.2.1/lib/dhaka/grammar/closure_hash.rb +15 -0
- data/test/vendor/dhaka-2.2.1/lib/dhaka/grammar/grammar.rb +240 -0
- data/test/vendor/dhaka-2.2.1/lib/dhaka/grammar/grammar_symbol.rb +27 -0
- data/test/vendor/dhaka-2.2.1/lib/dhaka/grammar/precedence.rb +19 -0
- data/test/vendor/dhaka-2.2.1/lib/dhaka/grammar/production.rb +36 -0
- data/test/vendor/dhaka-2.2.1/lib/dhaka/lexer/accept_actions.rb +36 -0
- data/test/vendor/dhaka-2.2.1/lib/dhaka/lexer/alphabet.rb +21 -0
- data/test/vendor/dhaka-2.2.1/lib/dhaka/lexer/compiled_lexer.rb +46 -0
- data/test/vendor/dhaka-2.2.1/lib/dhaka/lexer/dfa.rb +121 -0
- data/test/vendor/dhaka-2.2.1/lib/dhaka/lexer/lexeme.rb +32 -0
- data/test/vendor/dhaka-2.2.1/lib/dhaka/lexer/lexer.rb +70 -0
- data/test/vendor/dhaka-2.2.1/lib/dhaka/lexer/lexer_run.rb +78 -0
- data/test/vendor/dhaka-2.2.1/lib/dhaka/lexer/regex_grammar.rb +392 -0
- data/test/vendor/dhaka-2.2.1/lib/dhaka/lexer/regex_parser.rb +2010 -0
- data/test/vendor/dhaka-2.2.1/lib/dhaka/lexer/regex_tokenizer.rb +14 -0
- data/test/vendor/dhaka-2.2.1/lib/dhaka/lexer/specification.rb +96 -0
- data/test/vendor/dhaka-2.2.1/lib/dhaka/lexer/state.rb +68 -0
- data/test/vendor/dhaka-2.2.1/lib/dhaka/lexer/state_machine.rb +37 -0
- data/test/vendor/dhaka-2.2.1/lib/dhaka/parser/action.rb +55 -0
- data/test/vendor/dhaka-2.2.1/lib/dhaka/parser/channel.rb +58 -0
- data/test/vendor/dhaka-2.2.1/lib/dhaka/parser/compiled_parser.rb +51 -0
- data/test/vendor/dhaka-2.2.1/lib/dhaka/parser/conflict.rb +54 -0
- data/test/vendor/dhaka-2.2.1/lib/dhaka/parser/item.rb +42 -0
- data/test/vendor/dhaka-2.2.1/lib/dhaka/parser/parse_result.rb +50 -0
- data/test/vendor/dhaka-2.2.1/lib/dhaka/parser/parse_tree.rb +66 -0
- data/test/vendor/dhaka-2.2.1/lib/dhaka/parser/parser.rb +165 -0
- data/test/vendor/dhaka-2.2.1/lib/dhaka/parser/parser_methods.rb +11 -0
- data/test/vendor/dhaka-2.2.1/lib/dhaka/parser/parser_run.rb +39 -0
- data/test/vendor/dhaka-2.2.1/lib/dhaka/parser/parser_state.rb +74 -0
- data/test/vendor/dhaka-2.2.1/lib/dhaka/parser/token.rb +22 -0
- data/test/vendor/dhaka-2.2.1/lib/dhaka/runtime.rb +51 -0
- data/test/vendor/dhaka-2.2.1/lib/dhaka/tokenizer/tokenizer.rb +190 -0
- data/test/vendor/dhaka-2.2.1/lib/dhaka.rb +62 -0
- data/test/vendor/dhaka-2.2.1/test/all_tests.rb +5 -0
- data/test/vendor/dhaka-2.2.1/test/arithmetic/arithmetic_evaluator.rb +64 -0
- data/test/vendor/dhaka-2.2.1/test/arithmetic/arithmetic_evaluator_test.rb +43 -0
- data/test/vendor/dhaka-2.2.1/test/arithmetic/arithmetic_grammar.rb +41 -0
- data/test/vendor/dhaka-2.2.1/test/arithmetic/arithmetic_grammar_test.rb +9 -0
- data/test/vendor/dhaka-2.2.1/test/arithmetic/arithmetic_test_methods.rb +9 -0
- data/test/vendor/dhaka-2.2.1/test/arithmetic/arithmetic_tokenizer.rb +39 -0
- data/test/vendor/dhaka-2.2.1/test/arithmetic/arithmetic_tokenizer_test.rb +38 -0
- data/test/vendor/dhaka-2.2.1/test/arithmetic_precedence/arithmetic_precedence_evaluator.rb +43 -0
- data/test/vendor/dhaka-2.2.1/test/arithmetic_precedence/arithmetic_precedence_grammar.rb +24 -0
- data/test/vendor/dhaka-2.2.1/test/arithmetic_precedence/arithmetic_precedence_grammar_test.rb +30 -0
- data/test/vendor/dhaka-2.2.1/test/arithmetic_precedence/arithmetic_precedence_lexer_specification.rb +23 -0
- data/test/vendor/dhaka-2.2.1/test/arithmetic_precedence/arithmetic_precedence_parser_test.rb +33 -0
- data/test/vendor/dhaka-2.2.1/test/brackets/bracket_grammar.rb +23 -0
- data/test/vendor/dhaka-2.2.1/test/brackets/bracket_tokenizer.rb +22 -0
- data/test/vendor/dhaka-2.2.1/test/brackets/brackets_test.rb +28 -0
- data/test/vendor/dhaka-2.2.1/test/chittagong/chittagong_driver.rb +46 -0
- data/test/vendor/dhaka-2.2.1/test/chittagong/chittagong_driver_test.rb +276 -0
- data/test/vendor/dhaka-2.2.1/test/chittagong/chittagong_evaluator.rb +284 -0
- data/test/vendor/dhaka-2.2.1/test/chittagong/chittagong_evaluator_test.rb +38 -0
- data/test/vendor/dhaka-2.2.1/test/chittagong/chittagong_grammar.rb +104 -0
- data/test/vendor/dhaka-2.2.1/test/chittagong/chittagong_lexer.rb +109 -0
- data/test/vendor/dhaka-2.2.1/test/chittagong/chittagong_lexer_specification.rb +37 -0
- data/test/vendor/dhaka-2.2.1/test/chittagong/chittagong_lexer_test.rb +58 -0
- data/test/vendor/dhaka-2.2.1/test/chittagong/chittagong_parser.rb +879 -0
- data/test/vendor/dhaka-2.2.1/test/chittagong/chittagong_parser_test.rb +55 -0
- data/test/vendor/dhaka-2.2.1/test/chittagong/chittagong_test.rb +170 -0
- data/test/vendor/dhaka-2.2.1/test/core/another_lalr_but_not_slr_grammar.rb +20 -0
- data/test/vendor/dhaka-2.2.1/test/core/compiled_parser_test.rb +44 -0
- data/test/vendor/dhaka-2.2.1/test/core/dfa_test.rb +170 -0
- data/test/vendor/dhaka-2.2.1/test/core/evaluator_test.rb +22 -0
- data/test/vendor/dhaka-2.2.1/test/core/grammar_test.rb +83 -0
- data/test/vendor/dhaka-2.2.1/test/core/lalr_but_not_slr_grammar.rb +19 -0
- data/test/vendor/dhaka-2.2.1/test/core/lexer_test.rb +139 -0
- data/test/vendor/dhaka-2.2.1/test/core/malformed_grammar.rb +7 -0
- data/test/vendor/dhaka-2.2.1/test/core/malformed_grammar_test.rb +8 -0
- data/test/vendor/dhaka-2.2.1/test/core/nullable_grammar.rb +21 -0
- data/test/vendor/dhaka-2.2.1/test/core/parse_result_test.rb +44 -0
- data/test/vendor/dhaka-2.2.1/test/core/parser_state_test.rb +24 -0
- data/test/vendor/dhaka-2.2.1/test/core/parser_test.rb +131 -0
- data/test/vendor/dhaka-2.2.1/test/core/precedence_grammar.rb +17 -0
- data/test/vendor/dhaka-2.2.1/test/core/precedence_grammar_test.rb +9 -0
- data/test/vendor/dhaka-2.2.1/test/core/rr_conflict_grammar.rb +21 -0
- data/test/vendor/dhaka-2.2.1/test/core/simple_grammar.rb +22 -0
- data/test/vendor/dhaka-2.2.1/test/core/sr_conflict_grammar.rb +16 -0
- data/test/vendor/dhaka-2.2.1/test/dhaka_test_helper.rb +17 -0
- data/test/vendor/dhaka-2.2.1/test/fake_logger.rb +17 -0
- data/test/vendor/simplerdb-0.2/lib/simplerdb/client_exception.rb +10 -0
- data/test/vendor/simplerdb-0.2/lib/simplerdb/db.rb +146 -0
- data/test/vendor/simplerdb-0.2/lib/simplerdb/query_language.rb +266 -0
- data/test/vendor/simplerdb-0.2/lib/simplerdb/server.rb +33 -0
- data/test/vendor/simplerdb-0.2/lib/simplerdb/servlet.rb +191 -0
- data/test/vendor/simplerdb-0.2/lib/simplerdb.rb +3 -0
- data/test/vendor/simplerdb-0.2/test/functional_test.rb +81 -0
- data/test/vendor/simplerdb-0.2/test/query_evaluator_test.rb +73 -0
- data/test/vendor/simplerdb-0.2/test/query_parser_test.rb +64 -0
- data/test/vendor/simplerdb-0.2/test/simplerdb_test.rb +80 -0
- metadata +182 -0
@@ -0,0 +1,165 @@
|
|
1
|
+
module Dhaka
|
2
|
+
# The parser generator. To generate a parser from a grammar specification +ArithmeticPrecedenceGrammar+, one would
|
3
|
+
# write:
|
4
|
+
# parser = Dhaka::Parser.new(ArithmeticPrecedenceGrammar)
|
5
|
+
#
|
6
|
+
# To compile this parser to Ruby source as +ArithmeticPrecedenceParser+:
|
7
|
+
# parser.compile_to_ruby_source_as(:ArithmeticPrecedenceParser)
|
8
|
+
# which returns a string of Ruby code.
|
9
|
+
class Parser
|
10
|
+
include ParserMethods
|
11
|
+
attr_reader :grammar
|
12
|
+
|
13
|
+
# Creates a new parser from the given grammar. Messages are logged by default to STDOUT
|
14
|
+
# and the log level is WARN. Shift-reduce conflicts are reported at WARN and reduce-reduce conflicts
|
15
|
+
# at ERROR. You may pass in your own logger. Logging at DEBUG shows a lot of progress output.
|
16
|
+
def initialize(grammar, logger = nil)
|
17
|
+
@shift_actions = Hash.new {|hash, state| hash[state] = ShiftAction.new(state)}
|
18
|
+
@reduce_actions = Hash.new {|hash, production| hash[production] = ReduceAction.new(production)}
|
19
|
+
@logger = logger || default_logger
|
20
|
+
@transitions = Hash.new {|hash, state| hash[state] = {}}
|
21
|
+
@grammar = grammar
|
22
|
+
@channels = Hash.new {|hash, start_item| hash[start_item] = []}
|
23
|
+
@states = Hash.new do |hash, kernel|
|
24
|
+
closure, channels = grammar.closure(kernel)
|
25
|
+
channels.each do |start_item, channel_set|
|
26
|
+
@channels[start_item].concat channel_set.to_a
|
27
|
+
end
|
28
|
+
new_state = ParserState.new(self, closure)
|
29
|
+
hash[kernel] = new_state
|
30
|
+
@logger.debug("Created #{new_state.unique_name}.")
|
31
|
+
new_state.transition_items.each do |symbol, items|
|
32
|
+
destination_kernel = ItemSet.new(items.collect{|item| item.next_item})
|
33
|
+
destination_state = hash[destination_kernel]
|
34
|
+
items.each {|item| @channels[item] << grammar.passive_channel(item, destination_state.items[item.next_item])}
|
35
|
+
@transitions[new_state][symbol] = destination_state
|
36
|
+
end
|
37
|
+
new_state
|
38
|
+
end
|
39
|
+
initialize_states
|
40
|
+
end
|
41
|
+
|
42
|
+
# Returns the Ruby source of the generated parser compiled as +parser_class_name+. This can be written out to a file.
|
43
|
+
def compile_to_ruby_source_as parser_class_name
|
44
|
+
result = "class #{parser_class_name} < Dhaka::CompiledParser\n\n"
|
45
|
+
result << " self.grammar = #{grammar.name}\n\n"
|
46
|
+
result << " start_with #{start_state.id}\n\n"
|
47
|
+
states.each do |state|
|
48
|
+
result << "#{state.compile_to_ruby_source}\n\n"
|
49
|
+
end
|
50
|
+
result << "end"
|
51
|
+
result
|
52
|
+
end
|
53
|
+
|
54
|
+
# Returns the dot representation of the parser. If <tt>:hide_lookaheads</tt> is set to true in the
|
55
|
+
# options hash, lookaheads are not written out to the parser states, which is helpful when there are dozens
|
56
|
+
# of lookahead symbols for every item in every state.
|
57
|
+
def to_dot(options = {})
|
58
|
+
Dot::Digraph.new(:fontsize => 10, :shape => :box, :size => 5) do |g|
|
59
|
+
states.each do |state|
|
60
|
+
g.node(state, :label => state.items.values.collect{|item| item.to_s(options)}.join("\n"))
|
61
|
+
@transitions[state].each do |symbol, dest_state|
|
62
|
+
g.edge(state, dest_state, :label => symbol.name)
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end.to_dot
|
66
|
+
end
|
67
|
+
|
68
|
+
def inspect
|
69
|
+
"<Dhaka::Parser grammar : #{grammar}>"
|
70
|
+
end
|
71
|
+
|
72
|
+
private
|
73
|
+
attr_reader :start_state
|
74
|
+
|
75
|
+
def states
|
76
|
+
@states.values
|
77
|
+
end
|
78
|
+
|
79
|
+
def default_logger
|
80
|
+
logger = Logger.new(STDOUT)
|
81
|
+
logger.level = Logger::WARN
|
82
|
+
logger.formatter = ParserLogOutputFormatter.new
|
83
|
+
logger
|
84
|
+
end
|
85
|
+
|
86
|
+
def initialize_states
|
87
|
+
start_productions = grammar.productions_for_symbol(grammar.start_symbol)
|
88
|
+
raise NoStartProductionsError.new(grammar) if start_productions.empty?
|
89
|
+
start_items = ItemSet.new(start_productions.collect {|production| Item.new(production, 0)})
|
90
|
+
start_items.each {|start_item| start_item.lookaheadset << grammar.end_symbol}
|
91
|
+
@start_state = @states[start_items]
|
92
|
+
@logger.debug("Pumping #{@channels.keys.size} dirty items...")
|
93
|
+
pump_channels @channels.keys
|
94
|
+
@logger.debug("Generating shift actions...")
|
95
|
+
generate_shift_actions
|
96
|
+
@logger.debug("Generating reduce actions...")
|
97
|
+
generate_reduce_actions
|
98
|
+
end
|
99
|
+
|
100
|
+
def generate_shift_actions
|
101
|
+
@states.values.each do |state|
|
102
|
+
@transitions[state].keys.each do |symbol|
|
103
|
+
state.actions[symbol.name] = @shift_actions[@transitions[state][symbol]]
|
104
|
+
end
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
108
|
+
def generate_reduce_actions
|
109
|
+
@states.values.each do |state|
|
110
|
+
state.items.values.select{ |item| !item.next_symbol }.each do |item|
|
111
|
+
create_reduction_actions_for_item_and_state item, state
|
112
|
+
end
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
def create_reduction_actions_for_item_and_state item, state
|
117
|
+
item.lookaheadset.each do |lookahead|
|
118
|
+
new_action = @reduce_actions[item.production]
|
119
|
+
if existing_action = state.actions[lookahead.name]
|
120
|
+
if ReduceAction === existing_action
|
121
|
+
message = ReduceReduceConflict.new(state, lookahead, new_action).resolve
|
122
|
+
@logger.error(message)
|
123
|
+
else
|
124
|
+
message = ShiftReduceConflict.new(state, lookahead, new_action).resolve
|
125
|
+
@logger.warn(message)
|
126
|
+
end
|
127
|
+
else
|
128
|
+
state.actions[lookahead.name] = new_action
|
129
|
+
end
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
133
|
+
def pump_channels dirty_items
|
134
|
+
loop do
|
135
|
+
new_dirty_items = Set.new
|
136
|
+
dirty_items.each do |dirty_item|
|
137
|
+
@channels[dirty_item].each do |channel|
|
138
|
+
new_dirty_items << channel.end_item if channel.pump
|
139
|
+
end
|
140
|
+
end
|
141
|
+
break if new_dirty_items.empty?
|
142
|
+
@logger.debug("#{new_dirty_items.size} dirty items...")
|
143
|
+
dirty_items = new_dirty_items
|
144
|
+
end
|
145
|
+
end
|
146
|
+
end
|
147
|
+
|
148
|
+
# Raised when trying to create a Parser for a grammar that has no productions for the start symbol
|
149
|
+
class NoStartProductionsError < StandardError
|
150
|
+
def initialize(grammar) #:nodoc:
|
151
|
+
@grammar = grammar
|
152
|
+
end
|
153
|
+
def to_s #:nodoc:
|
154
|
+
"No start productions defined for #{@grammar.name}"
|
155
|
+
end
|
156
|
+
end
|
157
|
+
|
158
|
+
class ParserLogOutputFormatter < Logger::Formatter #:nodoc:
|
159
|
+
def call(severity, time, progname, msg)
|
160
|
+
"\n%s -- %s: %s\n" % [ severity, progname, msg2str(msg)]
|
161
|
+
end
|
162
|
+
end
|
163
|
+
|
164
|
+
end
|
165
|
+
|
@@ -0,0 +1,11 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
module Dhaka
|
3
|
+
# This module is included both in Parser and CompiledParser.
|
4
|
+
module ParserMethods
|
5
|
+
# +token_stream+ is an Enumerable of Token-s. Returns either a ParseSuccessResult or a ParseErrorResult.
|
6
|
+
def parse token_stream
|
7
|
+
parser_run = ParserRun.new(grammar, start_state, token_stream)
|
8
|
+
parser_run.run
|
9
|
+
end
|
10
|
+
end
|
11
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
module Dhaka
|
2
|
+
class ParserRun #:nodoc:
|
3
|
+
|
4
|
+
def initialize(grammar, start_state, token_stream)
|
5
|
+
@grammar = grammar
|
6
|
+
@node_stack = []
|
7
|
+
@state_stack = [start_state]
|
8
|
+
@token_stream = token_stream
|
9
|
+
@symbol_queue = []
|
10
|
+
end
|
11
|
+
|
12
|
+
def run
|
13
|
+
tokenize_result = token_stream.each do |token|
|
14
|
+
@current_token = token
|
15
|
+
@symbol_queue << @current_token.symbol_name
|
16
|
+
error = execute_actions
|
17
|
+
return error if error
|
18
|
+
node_stack << ParseTreeLeafNode.new(@current_token)
|
19
|
+
state_stack.last
|
20
|
+
end
|
21
|
+
return tokenize_result if TokenizerErrorResult === tokenize_result
|
22
|
+
ParseSuccessResult.new(node_stack.first) if node_stack.first.head_node?
|
23
|
+
end
|
24
|
+
|
25
|
+
private
|
26
|
+
|
27
|
+
attr_reader :state_stack, :token_stream, :node_stack
|
28
|
+
|
29
|
+
def execute_actions
|
30
|
+
while symbol_name = @symbol_queue.pop
|
31
|
+
action = state_stack.last.actions[symbol_name]
|
32
|
+
return ParseErrorResult.new(@current_token, state_stack.last) unless action
|
33
|
+
instance_eval(&action.action_code)
|
34
|
+
end
|
35
|
+
nil
|
36
|
+
end
|
37
|
+
|
38
|
+
end
|
39
|
+
end
|
@@ -0,0 +1,74 @@
|
|
1
|
+
module Dhaka
|
2
|
+
class ParserState #:nodoc:
|
3
|
+
attr_accessor :items, :actions, :id
|
4
|
+
|
5
|
+
@@state_id = 0
|
6
|
+
|
7
|
+
def self.next_state_id
|
8
|
+
result = @@state_id
|
9
|
+
@@state_id += 1
|
10
|
+
result
|
11
|
+
end
|
12
|
+
|
13
|
+
def initialize(parser, items, id=nil)
|
14
|
+
@parser = parser
|
15
|
+
@items = items
|
16
|
+
@actions = {}
|
17
|
+
@id = id || ParserState.next_state_id
|
18
|
+
end
|
19
|
+
|
20
|
+
def transition_items
|
21
|
+
result = Hash.new {|h, k| h[k] = ItemSet.new()}
|
22
|
+
items.values.each do |item|
|
23
|
+
result[item.next_symbol] << item if item.next_symbol
|
24
|
+
end
|
25
|
+
result
|
26
|
+
end
|
27
|
+
|
28
|
+
def unique_name
|
29
|
+
"State#{id}"
|
30
|
+
end
|
31
|
+
|
32
|
+
def compile_to_ruby_source
|
33
|
+
result = " at_state(#{id}) {\n"
|
34
|
+
|
35
|
+
symbol_names_by_action = Hash.new {|hash, key| hash[key] = []}
|
36
|
+
actions.each do |symbol_name, action|
|
37
|
+
symbol_names_by_action[action] << symbol_name
|
38
|
+
end
|
39
|
+
|
40
|
+
symbol_names_by_action.keys.each do |action|
|
41
|
+
symbol_names = symbol_names_by_action[action].collect {|symbol_name| "#{symbol_name.inspect}"}.join(', ')
|
42
|
+
result << " for_symbols(#{symbol_names}) { #{action.compile_to_ruby_source} }\n"
|
43
|
+
end
|
44
|
+
|
45
|
+
result << " }"
|
46
|
+
result
|
47
|
+
end
|
48
|
+
|
49
|
+
def for_symbols *symbol_names, &blk
|
50
|
+
symbol_names.each do |symbol_name|
|
51
|
+
actions[symbol_name] = @parser.instance_eval(&blk)
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
alias :for_symbol :for_symbols
|
56
|
+
|
57
|
+
def to_s(options = {})
|
58
|
+
items.values.collect{|item| item.to_s(options)}.join("\n")
|
59
|
+
end
|
60
|
+
|
61
|
+
end
|
62
|
+
|
63
|
+
class ItemSet < Set #:nodoc:
|
64
|
+
def hash
|
65
|
+
result = 5381
|
66
|
+
each { |item| result ^= item.hash }
|
67
|
+
result
|
68
|
+
end
|
69
|
+
|
70
|
+
def eql? other
|
71
|
+
self == other
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
module Dhaka
|
2
|
+
# Represents a portion of the input character stream that is mapped by the tokenizer
|
3
|
+
# to a symbol in the grammar. The attribute +input_position+ contains the start index position of the original
|
4
|
+
# string input that this token came from. It can be used to report errors by indicating the specific portion
|
5
|
+
# of the input where the error occurred.
|
6
|
+
class Token
|
7
|
+
attr_accessor :symbol_name, :value, :input_position
|
8
|
+
def initialize(symbol_name, value, input_position)
|
9
|
+
@symbol_name = symbol_name
|
10
|
+
@value = value
|
11
|
+
@input_position = input_position
|
12
|
+
end
|
13
|
+
|
14
|
+
def to_s #:nodoc:
|
15
|
+
value ? "#{symbol_name} : #{value}" : "#{symbol_name}"
|
16
|
+
end
|
17
|
+
|
18
|
+
def == other
|
19
|
+
symbol_name == other.symbol_name && value == other.value
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
#--
|
2
|
+
# Copyright (c) 2006, 2007 Mushfeq Khan
|
3
|
+
#
|
4
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
5
|
+
# a copy of this software and associated documentation files (the
|
6
|
+
# "Software"), to deal in the Software without restriction, including
|
7
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
8
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
9
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
10
|
+
# the following conditions:
|
11
|
+
#
|
12
|
+
# The above copyright notice and this permission notice shall be
|
13
|
+
# included in all copies or substantial portions of the Software.
|
14
|
+
#
|
15
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
16
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
17
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
18
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
19
|
+
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
20
|
+
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
21
|
+
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
22
|
+
#++
|
23
|
+
|
24
|
+
require 'set'
|
25
|
+
require 'logger'
|
26
|
+
require 'delegate'
|
27
|
+
|
28
|
+
%w[
|
29
|
+
grammar/grammar_symbol
|
30
|
+
grammar/production
|
31
|
+
grammar/grammar
|
32
|
+
grammar/precedence
|
33
|
+
parser/parse_tree
|
34
|
+
parser/parse_result
|
35
|
+
parser/parser_methods
|
36
|
+
parser/parser_state
|
37
|
+
parser/token
|
38
|
+
parser/action
|
39
|
+
parser/parser_run
|
40
|
+
parser/compiled_parser
|
41
|
+
tokenizer/tokenizer
|
42
|
+
evaluator/evaluator
|
43
|
+
lexer/accept_actions
|
44
|
+
lexer/alphabet
|
45
|
+
lexer/state_machine
|
46
|
+
lexer/state
|
47
|
+
lexer/specification
|
48
|
+
lexer/lexeme
|
49
|
+
lexer/lexer_run
|
50
|
+
lexer/compiled_lexer
|
51
|
+
].each {|path| require File.join(File.dirname(__FILE__), path)}
|
@@ -0,0 +1,190 @@
|
|
1
|
+
module Dhaka
|
2
|
+
# Reserved constant used to identify the idle state of the tokenizer.
|
3
|
+
TOKENIZER_IDLE_STATE = :idle_state
|
4
|
+
|
5
|
+
# Returned on successful tokenizing of the input stream. Supports iteration by including Enumerable, so it can
|
6
|
+
# be passed in directly to the parser.
|
7
|
+
class TokenizerSuccessResult
|
8
|
+
include Enumerable
|
9
|
+
|
10
|
+
def initialize(tokens)
|
11
|
+
@tokens = tokens
|
12
|
+
end
|
13
|
+
|
14
|
+
# Returns false.
|
15
|
+
def has_error?
|
16
|
+
false
|
17
|
+
end
|
18
|
+
|
19
|
+
def each(&block)
|
20
|
+
@tokens.each(&block)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
# Returned when tokenizing fails due to an unexpected character in the input stream.
|
25
|
+
class TokenizerErrorResult
|
26
|
+
# The index of the character that caused the error.
|
27
|
+
attr_reader :unexpected_char_index
|
28
|
+
|
29
|
+
def initialize(unexpected_char_index)
|
30
|
+
@unexpected_char_index = unexpected_char_index
|
31
|
+
end
|
32
|
+
|
33
|
+
# Returns true.
|
34
|
+
def has_error?
|
35
|
+
true
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
# A tokenizer state encapsulates actions that should be performed upon
|
40
|
+
# encountering each permissible character for that state.
|
41
|
+
class TokenizerState
|
42
|
+
attr_reader :actions, :default_action
|
43
|
+
|
44
|
+
def initialize
|
45
|
+
@actions = {}
|
46
|
+
end
|
47
|
+
|
48
|
+
# Define the action (+blk+) to be performed when encountering any of +characters+ in the token stream.
|
49
|
+
def for_characters(characters, &blk)
|
50
|
+
characters.each do |character|
|
51
|
+
actions[character] = blk
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
alias for_character for_characters
|
56
|
+
|
57
|
+
# define the action (+blk+) to be performed for any +characters+ that don't have an action to perform.
|
58
|
+
def for_default(&blk)
|
59
|
+
@default_action = blk
|
60
|
+
end
|
61
|
+
|
62
|
+
def to_s #:nodoc:
|
63
|
+
actions.inspect
|
64
|
+
end
|
65
|
+
|
66
|
+
end
|
67
|
+
|
68
|
+
# This abstract class contains a DSL for hand-coding tokenizers. Subclass it to implement tokenizers for specific grammars.
|
69
|
+
#
|
70
|
+
# Tokenizers are state machines. Each state of a tokenizer is identified
|
71
|
+
# by a Ruby symbol. The constant Dhaka::TOKENIZER_IDLE_STATE is reserved for the idle state of the tokenizer (the one
|
72
|
+
# that it starts in).
|
73
|
+
#
|
74
|
+
# The following is a tokenizer for arithmetic expressions with integer terms. The tokenizer starts in the idle state
|
75
|
+
# creating single-character tokens for all characters excepts digits and whitespace. It shifts to
|
76
|
+
# <tt>:get_integer_literal</tt> when it encounters a digit character and creates a token on the stack on which it
|
77
|
+
# accumulates the value of the literal. When it again encounters a non-digit character, it shifts back to idle.
|
78
|
+
# Whitespace is treated as a delimiter, but not shifted as a token.
|
79
|
+
#
|
80
|
+
# class ArithmeticPrecedenceTokenizer < Dhaka::Tokenizer
|
81
|
+
#
|
82
|
+
# digits = ('0'..'9').to_a
|
83
|
+
# parenths = ['(', ')']
|
84
|
+
# operators = ['-', '+', '/', '*', '^']
|
85
|
+
# functions = ['h', 'l']
|
86
|
+
# arg_separator = [',']
|
87
|
+
# whitespace = [' ']
|
88
|
+
#
|
89
|
+
# all_characters = digits + parenths + operators + functions + arg_separator + whitespace
|
90
|
+
#
|
91
|
+
# for_state Dhaka::TOKENIZER_IDLE_STATE do
|
92
|
+
# for_characters(all_characters - (digits + whitespace)) do
|
93
|
+
# create_token(curr_char, nil)
|
94
|
+
# advance
|
95
|
+
# end
|
96
|
+
# for_characters digits do
|
97
|
+
# create_token('n', '')
|
98
|
+
# switch_to :get_integer_literal
|
99
|
+
# end
|
100
|
+
# for_character whitespace do
|
101
|
+
# advance
|
102
|
+
# end
|
103
|
+
# end
|
104
|
+
#
|
105
|
+
# for_state :get_integer_literal do
|
106
|
+
# for_characters all_characters - digits do
|
107
|
+
# switch_to Dhaka::TOKENIZER_IDLE_STATE
|
108
|
+
# end
|
109
|
+
# for_characters digits do
|
110
|
+
# curr_token.value << curr_char
|
111
|
+
# advance
|
112
|
+
# end
|
113
|
+
# end
|
114
|
+
#
|
115
|
+
# end
|
116
|
+
#
|
117
|
+
# For languages where the lexical structure is very complicated, it may be too tedious to implement a Tokenizer by hand.
|
118
|
+
# In such cases, it's a lot easier to write a LexerSpecification using regular expressions and create a Lexer from that.
|
119
|
+
class Tokenizer
|
120
|
+
class << self
|
121
|
+
# Define the action for the state named +state_name+.
|
122
|
+
def for_state(state_name, &blk)
|
123
|
+
states[state_name].instance_eval(&blk)
|
124
|
+
end
|
125
|
+
|
126
|
+
# Tokenizes a string +input+ and returns a TokenizerErrorResult on failure or a TokenizerSuccessResult on sucess.
|
127
|
+
def tokenize(input)
|
128
|
+
new(input).run
|
129
|
+
end
|
130
|
+
|
131
|
+
private
|
132
|
+
def inherited(tokenizer)
|
133
|
+
class << tokenizer
|
134
|
+
attr_accessor :states, :grammar
|
135
|
+
end
|
136
|
+
tokenizer.states = Hash.new {|hash, key| hash[key] = TokenizerState.new}
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
140
|
+
# The tokens shifted so far.
|
141
|
+
attr_reader :tokens
|
142
|
+
|
143
|
+
def initialize(input) #:nodoc:
|
144
|
+
@input = input
|
145
|
+
@current_state = self.class.states[TOKENIZER_IDLE_STATE]
|
146
|
+
@curr_char_index = 0
|
147
|
+
@tokens = []
|
148
|
+
end
|
149
|
+
|
150
|
+
# The character currently being processed.
|
151
|
+
def curr_char
|
152
|
+
@input[@curr_char_index] and @input[@curr_char_index].chr
|
153
|
+
end
|
154
|
+
|
155
|
+
# Advance to the next character.
|
156
|
+
def advance
|
157
|
+
@curr_char_index += 1
|
158
|
+
end
|
159
|
+
|
160
|
+
def inspect
|
161
|
+
"<Dhaka::Tokenizer grammar : #{grammar}>"
|
162
|
+
end
|
163
|
+
|
164
|
+
# The token currently on top of the stack.
|
165
|
+
def curr_token
|
166
|
+
tokens.last
|
167
|
+
end
|
168
|
+
|
169
|
+
# Push a new token on to the stack with symbol corresponding to +symbol_name+ and a value of +value+.
|
170
|
+
def create_token(symbol_name, value)
|
171
|
+
new_token = Dhaka::Token.new(symbol_name, value, @curr_char_index)
|
172
|
+
tokens << new_token
|
173
|
+
end
|
174
|
+
|
175
|
+
# Change the active state of the tokenizer to the state identified by the symbol +state_name+.
|
176
|
+
def switch_to state_name
|
177
|
+
@current_state = self.class.states[state_name]
|
178
|
+
end
|
179
|
+
|
180
|
+
def run #:nodoc:
|
181
|
+
while curr_char
|
182
|
+
blk = @current_state.actions[curr_char] || @current_state.default_action
|
183
|
+
return TokenizerErrorResult.new(@curr_char_index) unless blk
|
184
|
+
instance_eval(&blk)
|
185
|
+
end
|
186
|
+
tokens << Dhaka::Token.new(Dhaka::END_SYMBOL_NAME, nil, nil)
|
187
|
+
TokenizerSuccessResult.new(tokens)
|
188
|
+
end
|
189
|
+
end
|
190
|
+
end
|
@@ -0,0 +1,62 @@
|
|
1
|
+
#--
|
2
|
+
# Copyright (c) 2006, 2007 Mushfeq Khan
|
3
|
+
#
|
4
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
5
|
+
# a copy of this software and associated documentation files (the
|
6
|
+
# "Software"), to deal in the Software without restriction, including
|
7
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
8
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
9
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
10
|
+
# the following conditions:
|
11
|
+
#
|
12
|
+
# The above copyright notice and this permission notice shall be
|
13
|
+
# included in all copies or substantial portions of the Software.
|
14
|
+
#
|
15
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
16
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
17
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
18
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
19
|
+
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
20
|
+
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
21
|
+
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
22
|
+
#++
|
23
|
+
|
24
|
+
require 'set'
|
25
|
+
require 'logger'
|
26
|
+
require 'delegate'
|
27
|
+
|
28
|
+
%w[
|
29
|
+
dot/dot
|
30
|
+
grammar/grammar_symbol
|
31
|
+
grammar/production
|
32
|
+
grammar/closure_hash
|
33
|
+
grammar/grammar
|
34
|
+
grammar/precedence
|
35
|
+
parser/parse_tree
|
36
|
+
parser/parse_result
|
37
|
+
parser/item
|
38
|
+
parser/channel
|
39
|
+
parser/parser_methods
|
40
|
+
parser/parser_state
|
41
|
+
parser/conflict
|
42
|
+
parser/token
|
43
|
+
parser/action
|
44
|
+
parser/parser_run
|
45
|
+
parser/parser
|
46
|
+
parser/compiled_parser
|
47
|
+
tokenizer/tokenizer
|
48
|
+
evaluator/evaluator
|
49
|
+
lexer/accept_actions
|
50
|
+
lexer/alphabet
|
51
|
+
lexer/regex_grammar
|
52
|
+
lexer/regex_tokenizer
|
53
|
+
lexer/regex_parser
|
54
|
+
lexer/state_machine
|
55
|
+
lexer/dfa
|
56
|
+
lexer/state
|
57
|
+
lexer/specification
|
58
|
+
lexer/lexeme
|
59
|
+
lexer/lexer_run
|
60
|
+
lexer/lexer
|
61
|
+
lexer/compiled_lexer
|
62
|
+
].each {|path| require File.join(File.dirname(__FILE__), 'dhaka/' + path)}
|
@@ -0,0 +1,64 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/arithmetic_grammar'
|
2
|
+
|
3
|
+
class ArithmeticEvaluator < Dhaka::Evaluator
|
4
|
+
|
5
|
+
self.grammar = ArithmeticGrammar
|
6
|
+
|
7
|
+
define_evaluation_rules do
|
8
|
+
|
9
|
+
for_subtraction do
|
10
|
+
evaluate(child_nodes[0]) - evaluate(child_nodes[2])
|
11
|
+
end
|
12
|
+
|
13
|
+
for_addition do
|
14
|
+
evaluate(child_nodes[0]) + evaluate(child_nodes[2])
|
15
|
+
end
|
16
|
+
|
17
|
+
for_division do
|
18
|
+
evaluate(child_nodes[0]).to_f/evaluate(child_nodes[2])
|
19
|
+
end
|
20
|
+
|
21
|
+
for_multiplication do
|
22
|
+
evaluate(child_nodes[0]) * evaluate(child_nodes[2])
|
23
|
+
end
|
24
|
+
|
25
|
+
for_getting_literals do
|
26
|
+
child_nodes[0].token.value
|
27
|
+
end
|
28
|
+
|
29
|
+
for_unpacking_parenthetized_expression do
|
30
|
+
evaluate(child_nodes[1])
|
31
|
+
end
|
32
|
+
|
33
|
+
for_empty_args do
|
34
|
+
[]
|
35
|
+
end
|
36
|
+
|
37
|
+
for_evaluating_function do
|
38
|
+
evaluate(child_nodes[0]).call evaluate(child_nodes[2])
|
39
|
+
end
|
40
|
+
|
41
|
+
for_concatenating_args do
|
42
|
+
[evaluate(child_nodes[0])]+evaluate(child_nodes[2])
|
43
|
+
end
|
44
|
+
|
45
|
+
for_single_args do
|
46
|
+
[evaluate(child_nodes[0])]
|
47
|
+
end
|
48
|
+
|
49
|
+
for_min_function do
|
50
|
+
@min_function
|
51
|
+
end
|
52
|
+
|
53
|
+
for_max_function do
|
54
|
+
@max_function
|
55
|
+
end
|
56
|
+
|
57
|
+
end
|
58
|
+
|
59
|
+
def initialize(min_function, max_function)
|
60
|
+
@min_function = min_function
|
61
|
+
@max_function = max_function
|
62
|
+
end
|
63
|
+
|
64
|
+
end
|