simply_stored 0.1.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (121) hide show
  1. data/lib/simply_stored/class_methods_base.rb +31 -0
  2. data/lib/simply_stored/couch/belongs_to.rb +117 -0
  3. data/lib/simply_stored/couch/ext/couch_potato.rb +16 -0
  4. data/lib/simply_stored/couch/has_many.rb +148 -0
  5. data/lib/simply_stored/couch/has_one.rb +93 -0
  6. data/lib/simply_stored/couch/validations.rb +74 -0
  7. data/lib/simply_stored/couch/views/array_property_view_spec.rb +22 -0
  8. data/lib/simply_stored/couch/views.rb +1 -0
  9. data/lib/simply_stored/couch.rb +278 -0
  10. data/lib/simply_stored/instance_methods.rb +143 -0
  11. data/lib/simply_stored/simpledb/associations.rb +196 -0
  12. data/lib/simply_stored/simpledb/attributes.rb +173 -0
  13. data/lib/simply_stored/simpledb/storag.rb +85 -0
  14. data/lib/simply_stored/simpledb/validations.rb +88 -0
  15. data/lib/simply_stored/simpledb.rb +212 -0
  16. data/lib/simply_stored/storage.rb +93 -0
  17. data/lib/simply_stored.rb +9 -0
  18. data/test/custom_views_test.rb +33 -0
  19. data/test/fixtures/couch.rb +182 -0
  20. data/test/fixtures/simpledb/item.rb +11 -0
  21. data/test/fixtures/simpledb/item_daddy.rb +8 -0
  22. data/test/fixtures/simpledb/log_item.rb +3 -0
  23. data/test/fixtures/simpledb/namespace_bar.rb +5 -0
  24. data/test/fixtures/simpledb/namespace_foo.rb +7 -0
  25. data/test/fixtures/simpledb/protected_item.rb +3 -0
  26. data/test/simply_stored_couch_test.rb +1684 -0
  27. data/test/simply_stored_simpledb_test.rb +1341 -0
  28. data/test/test_helper.rb +22 -0
  29. data/test/vendor/dhaka-2.2.1/lib/dhaka/dot/dot.rb +29 -0
  30. data/test/vendor/dhaka-2.2.1/lib/dhaka/evaluator/evaluator.rb +133 -0
  31. data/test/vendor/dhaka-2.2.1/lib/dhaka/grammar/closure_hash.rb +15 -0
  32. data/test/vendor/dhaka-2.2.1/lib/dhaka/grammar/grammar.rb +240 -0
  33. data/test/vendor/dhaka-2.2.1/lib/dhaka/grammar/grammar_symbol.rb +27 -0
  34. data/test/vendor/dhaka-2.2.1/lib/dhaka/grammar/precedence.rb +19 -0
  35. data/test/vendor/dhaka-2.2.1/lib/dhaka/grammar/production.rb +36 -0
  36. data/test/vendor/dhaka-2.2.1/lib/dhaka/lexer/accept_actions.rb +36 -0
  37. data/test/vendor/dhaka-2.2.1/lib/dhaka/lexer/alphabet.rb +21 -0
  38. data/test/vendor/dhaka-2.2.1/lib/dhaka/lexer/compiled_lexer.rb +46 -0
  39. data/test/vendor/dhaka-2.2.1/lib/dhaka/lexer/dfa.rb +121 -0
  40. data/test/vendor/dhaka-2.2.1/lib/dhaka/lexer/lexeme.rb +32 -0
  41. data/test/vendor/dhaka-2.2.1/lib/dhaka/lexer/lexer.rb +70 -0
  42. data/test/vendor/dhaka-2.2.1/lib/dhaka/lexer/lexer_run.rb +78 -0
  43. data/test/vendor/dhaka-2.2.1/lib/dhaka/lexer/regex_grammar.rb +392 -0
  44. data/test/vendor/dhaka-2.2.1/lib/dhaka/lexer/regex_parser.rb +2010 -0
  45. data/test/vendor/dhaka-2.2.1/lib/dhaka/lexer/regex_tokenizer.rb +14 -0
  46. data/test/vendor/dhaka-2.2.1/lib/dhaka/lexer/specification.rb +96 -0
  47. data/test/vendor/dhaka-2.2.1/lib/dhaka/lexer/state.rb +68 -0
  48. data/test/vendor/dhaka-2.2.1/lib/dhaka/lexer/state_machine.rb +37 -0
  49. data/test/vendor/dhaka-2.2.1/lib/dhaka/parser/action.rb +55 -0
  50. data/test/vendor/dhaka-2.2.1/lib/dhaka/parser/channel.rb +58 -0
  51. data/test/vendor/dhaka-2.2.1/lib/dhaka/parser/compiled_parser.rb +51 -0
  52. data/test/vendor/dhaka-2.2.1/lib/dhaka/parser/conflict.rb +54 -0
  53. data/test/vendor/dhaka-2.2.1/lib/dhaka/parser/item.rb +42 -0
  54. data/test/vendor/dhaka-2.2.1/lib/dhaka/parser/parse_result.rb +50 -0
  55. data/test/vendor/dhaka-2.2.1/lib/dhaka/parser/parse_tree.rb +66 -0
  56. data/test/vendor/dhaka-2.2.1/lib/dhaka/parser/parser.rb +165 -0
  57. data/test/vendor/dhaka-2.2.1/lib/dhaka/parser/parser_methods.rb +11 -0
  58. data/test/vendor/dhaka-2.2.1/lib/dhaka/parser/parser_run.rb +39 -0
  59. data/test/vendor/dhaka-2.2.1/lib/dhaka/parser/parser_state.rb +74 -0
  60. data/test/vendor/dhaka-2.2.1/lib/dhaka/parser/token.rb +22 -0
  61. data/test/vendor/dhaka-2.2.1/lib/dhaka/runtime.rb +51 -0
  62. data/test/vendor/dhaka-2.2.1/lib/dhaka/tokenizer/tokenizer.rb +190 -0
  63. data/test/vendor/dhaka-2.2.1/lib/dhaka.rb +62 -0
  64. data/test/vendor/dhaka-2.2.1/test/all_tests.rb +5 -0
  65. data/test/vendor/dhaka-2.2.1/test/arithmetic/arithmetic_evaluator.rb +64 -0
  66. data/test/vendor/dhaka-2.2.1/test/arithmetic/arithmetic_evaluator_test.rb +43 -0
  67. data/test/vendor/dhaka-2.2.1/test/arithmetic/arithmetic_grammar.rb +41 -0
  68. data/test/vendor/dhaka-2.2.1/test/arithmetic/arithmetic_grammar_test.rb +9 -0
  69. data/test/vendor/dhaka-2.2.1/test/arithmetic/arithmetic_test_methods.rb +9 -0
  70. data/test/vendor/dhaka-2.2.1/test/arithmetic/arithmetic_tokenizer.rb +39 -0
  71. data/test/vendor/dhaka-2.2.1/test/arithmetic/arithmetic_tokenizer_test.rb +38 -0
  72. data/test/vendor/dhaka-2.2.1/test/arithmetic_precedence/arithmetic_precedence_evaluator.rb +43 -0
  73. data/test/vendor/dhaka-2.2.1/test/arithmetic_precedence/arithmetic_precedence_grammar.rb +24 -0
  74. data/test/vendor/dhaka-2.2.1/test/arithmetic_precedence/arithmetic_precedence_grammar_test.rb +30 -0
  75. data/test/vendor/dhaka-2.2.1/test/arithmetic_precedence/arithmetic_precedence_lexer_specification.rb +23 -0
  76. data/test/vendor/dhaka-2.2.1/test/arithmetic_precedence/arithmetic_precedence_parser_test.rb +33 -0
  77. data/test/vendor/dhaka-2.2.1/test/brackets/bracket_grammar.rb +23 -0
  78. data/test/vendor/dhaka-2.2.1/test/brackets/bracket_tokenizer.rb +22 -0
  79. data/test/vendor/dhaka-2.2.1/test/brackets/brackets_test.rb +28 -0
  80. data/test/vendor/dhaka-2.2.1/test/chittagong/chittagong_driver.rb +46 -0
  81. data/test/vendor/dhaka-2.2.1/test/chittagong/chittagong_driver_test.rb +276 -0
  82. data/test/vendor/dhaka-2.2.1/test/chittagong/chittagong_evaluator.rb +284 -0
  83. data/test/vendor/dhaka-2.2.1/test/chittagong/chittagong_evaluator_test.rb +38 -0
  84. data/test/vendor/dhaka-2.2.1/test/chittagong/chittagong_grammar.rb +104 -0
  85. data/test/vendor/dhaka-2.2.1/test/chittagong/chittagong_lexer.rb +109 -0
  86. data/test/vendor/dhaka-2.2.1/test/chittagong/chittagong_lexer_specification.rb +37 -0
  87. data/test/vendor/dhaka-2.2.1/test/chittagong/chittagong_lexer_test.rb +58 -0
  88. data/test/vendor/dhaka-2.2.1/test/chittagong/chittagong_parser.rb +879 -0
  89. data/test/vendor/dhaka-2.2.1/test/chittagong/chittagong_parser_test.rb +55 -0
  90. data/test/vendor/dhaka-2.2.1/test/chittagong/chittagong_test.rb +170 -0
  91. data/test/vendor/dhaka-2.2.1/test/core/another_lalr_but_not_slr_grammar.rb +20 -0
  92. data/test/vendor/dhaka-2.2.1/test/core/compiled_parser_test.rb +44 -0
  93. data/test/vendor/dhaka-2.2.1/test/core/dfa_test.rb +170 -0
  94. data/test/vendor/dhaka-2.2.1/test/core/evaluator_test.rb +22 -0
  95. data/test/vendor/dhaka-2.2.1/test/core/grammar_test.rb +83 -0
  96. data/test/vendor/dhaka-2.2.1/test/core/lalr_but_not_slr_grammar.rb +19 -0
  97. data/test/vendor/dhaka-2.2.1/test/core/lexer_test.rb +139 -0
  98. data/test/vendor/dhaka-2.2.1/test/core/malformed_grammar.rb +7 -0
  99. data/test/vendor/dhaka-2.2.1/test/core/malformed_grammar_test.rb +8 -0
  100. data/test/vendor/dhaka-2.2.1/test/core/nullable_grammar.rb +21 -0
  101. data/test/vendor/dhaka-2.2.1/test/core/parse_result_test.rb +44 -0
  102. data/test/vendor/dhaka-2.2.1/test/core/parser_state_test.rb +24 -0
  103. data/test/vendor/dhaka-2.2.1/test/core/parser_test.rb +131 -0
  104. data/test/vendor/dhaka-2.2.1/test/core/precedence_grammar.rb +17 -0
  105. data/test/vendor/dhaka-2.2.1/test/core/precedence_grammar_test.rb +9 -0
  106. data/test/vendor/dhaka-2.2.1/test/core/rr_conflict_grammar.rb +21 -0
  107. data/test/vendor/dhaka-2.2.1/test/core/simple_grammar.rb +22 -0
  108. data/test/vendor/dhaka-2.2.1/test/core/sr_conflict_grammar.rb +16 -0
  109. data/test/vendor/dhaka-2.2.1/test/dhaka_test_helper.rb +17 -0
  110. data/test/vendor/dhaka-2.2.1/test/fake_logger.rb +17 -0
  111. data/test/vendor/simplerdb-0.2/lib/simplerdb/client_exception.rb +10 -0
  112. data/test/vendor/simplerdb-0.2/lib/simplerdb/db.rb +146 -0
  113. data/test/vendor/simplerdb-0.2/lib/simplerdb/query_language.rb +266 -0
  114. data/test/vendor/simplerdb-0.2/lib/simplerdb/server.rb +33 -0
  115. data/test/vendor/simplerdb-0.2/lib/simplerdb/servlet.rb +191 -0
  116. data/test/vendor/simplerdb-0.2/lib/simplerdb.rb +3 -0
  117. data/test/vendor/simplerdb-0.2/test/functional_test.rb +81 -0
  118. data/test/vendor/simplerdb-0.2/test/query_evaluator_test.rb +73 -0
  119. data/test/vendor/simplerdb-0.2/test/query_parser_test.rb +64 -0
  120. data/test/vendor/simplerdb-0.2/test/simplerdb_test.rb +80 -0
  121. metadata +182 -0
@@ -0,0 +1,165 @@
1
+ module Dhaka
2
+ # The parser generator. To generate a parser from a grammar specification +ArithmeticPrecedenceGrammar+, one would
3
+ # write:
4
+ # parser = Dhaka::Parser.new(ArithmeticPrecedenceGrammar)
5
+ #
6
+ # To compile this parser to Ruby source as +ArithmeticPrecedenceParser+:
7
+ # parser.compile_to_ruby_source_as(:ArithmeticPrecedenceParser)
8
+ # which returns a string of Ruby code.
9
+ class Parser
10
+ include ParserMethods
11
+ attr_reader :grammar
12
+
13
+ # Creates a new parser from the given grammar. Messages are logged by default to STDOUT
14
+ # and the log level is WARN. Shift-reduce conflicts are reported at WARN and reduce-reduce conflicts
15
+ # at ERROR. You may pass in your own logger. Logging at DEBUG shows a lot of progress output.
16
+ def initialize(grammar, logger = nil)
17
+ @shift_actions = Hash.new {|hash, state| hash[state] = ShiftAction.new(state)}
18
+ @reduce_actions = Hash.new {|hash, production| hash[production] = ReduceAction.new(production)}
19
+ @logger = logger || default_logger
20
+ @transitions = Hash.new {|hash, state| hash[state] = {}}
21
+ @grammar = grammar
22
+ @channels = Hash.new {|hash, start_item| hash[start_item] = []}
23
+ @states = Hash.new do |hash, kernel|
24
+ closure, channels = grammar.closure(kernel)
25
+ channels.each do |start_item, channel_set|
26
+ @channels[start_item].concat channel_set.to_a
27
+ end
28
+ new_state = ParserState.new(self, closure)
29
+ hash[kernel] = new_state
30
+ @logger.debug("Created #{new_state.unique_name}.")
31
+ new_state.transition_items.each do |symbol, items|
32
+ destination_kernel = ItemSet.new(items.collect{|item| item.next_item})
33
+ destination_state = hash[destination_kernel]
34
+ items.each {|item| @channels[item] << grammar.passive_channel(item, destination_state.items[item.next_item])}
35
+ @transitions[new_state][symbol] = destination_state
36
+ end
37
+ new_state
38
+ end
39
+ initialize_states
40
+ end
41
+
42
+ # Returns the Ruby source of the generated parser compiled as +parser_class_name+. This can be written out to a file.
43
+ def compile_to_ruby_source_as parser_class_name
44
+ result = "class #{parser_class_name} < Dhaka::CompiledParser\n\n"
45
+ result << " self.grammar = #{grammar.name}\n\n"
46
+ result << " start_with #{start_state.id}\n\n"
47
+ states.each do |state|
48
+ result << "#{state.compile_to_ruby_source}\n\n"
49
+ end
50
+ result << "end"
51
+ result
52
+ end
53
+
54
+ # Returns the dot representation of the parser. If <tt>:hide_lookaheads</tt> is set to true in the
55
+ # options hash, lookaheads are not written out to the parser states, which is helpful when there are dozens
56
+ # of lookahead symbols for every item in every state.
57
+ def to_dot(options = {})
58
+ Dot::Digraph.new(:fontsize => 10, :shape => :box, :size => 5) do |g|
59
+ states.each do |state|
60
+ g.node(state, :label => state.items.values.collect{|item| item.to_s(options)}.join("\n"))
61
+ @transitions[state].each do |symbol, dest_state|
62
+ g.edge(state, dest_state, :label => symbol.name)
63
+ end
64
+ end
65
+ end.to_dot
66
+ end
67
+
68
+ def inspect
69
+ "<Dhaka::Parser grammar : #{grammar}>"
70
+ end
71
+
72
+ private
73
+ attr_reader :start_state
74
+
75
+ def states
76
+ @states.values
77
+ end
78
+
79
+ def default_logger
80
+ logger = Logger.new(STDOUT)
81
+ logger.level = Logger::WARN
82
+ logger.formatter = ParserLogOutputFormatter.new
83
+ logger
84
+ end
85
+
86
+ def initialize_states
87
+ start_productions = grammar.productions_for_symbol(grammar.start_symbol)
88
+ raise NoStartProductionsError.new(grammar) if start_productions.empty?
89
+ start_items = ItemSet.new(start_productions.collect {|production| Item.new(production, 0)})
90
+ start_items.each {|start_item| start_item.lookaheadset << grammar.end_symbol}
91
+ @start_state = @states[start_items]
92
+ @logger.debug("Pumping #{@channels.keys.size} dirty items...")
93
+ pump_channels @channels.keys
94
+ @logger.debug("Generating shift actions...")
95
+ generate_shift_actions
96
+ @logger.debug("Generating reduce actions...")
97
+ generate_reduce_actions
98
+ end
99
+
100
+ def generate_shift_actions
101
+ @states.values.each do |state|
102
+ @transitions[state].keys.each do |symbol|
103
+ state.actions[symbol.name] = @shift_actions[@transitions[state][symbol]]
104
+ end
105
+ end
106
+ end
107
+
108
+ def generate_reduce_actions
109
+ @states.values.each do |state|
110
+ state.items.values.select{ |item| !item.next_symbol }.each do |item|
111
+ create_reduction_actions_for_item_and_state item, state
112
+ end
113
+ end
114
+ end
115
+
116
+ def create_reduction_actions_for_item_and_state item, state
117
+ item.lookaheadset.each do |lookahead|
118
+ new_action = @reduce_actions[item.production]
119
+ if existing_action = state.actions[lookahead.name]
120
+ if ReduceAction === existing_action
121
+ message = ReduceReduceConflict.new(state, lookahead, new_action).resolve
122
+ @logger.error(message)
123
+ else
124
+ message = ShiftReduceConflict.new(state, lookahead, new_action).resolve
125
+ @logger.warn(message)
126
+ end
127
+ else
128
+ state.actions[lookahead.name] = new_action
129
+ end
130
+ end
131
+ end
132
+
133
+ def pump_channels dirty_items
134
+ loop do
135
+ new_dirty_items = Set.new
136
+ dirty_items.each do |dirty_item|
137
+ @channels[dirty_item].each do |channel|
138
+ new_dirty_items << channel.end_item if channel.pump
139
+ end
140
+ end
141
+ break if new_dirty_items.empty?
142
+ @logger.debug("#{new_dirty_items.size} dirty items...")
143
+ dirty_items = new_dirty_items
144
+ end
145
+ end
146
+ end
147
+
148
+ # Raised when trying to create a Parser for a grammar that has no productions for the start symbol
149
+ class NoStartProductionsError < StandardError
150
+ def initialize(grammar) #:nodoc:
151
+ @grammar = grammar
152
+ end
153
+ def to_s #:nodoc:
154
+ "No start productions defined for #{@grammar.name}"
155
+ end
156
+ end
157
+
158
+ class ParserLogOutputFormatter < Logger::Formatter #:nodoc:
159
+ def call(severity, time, progname, msg)
160
+ "\n%s -- %s: %s\n" % [ severity, progname, msg2str(msg)]
161
+ end
162
+ end
163
+
164
+ end
165
+
@@ -0,0 +1,11 @@
1
+ #!/usr/bin/env ruby
2
+ module Dhaka
3
+ # This module is included both in Parser and CompiledParser.
4
+ module ParserMethods
5
+ # +token_stream+ is an Enumerable of Token-s. Returns either a ParseSuccessResult or a ParseErrorResult.
6
+ def parse token_stream
7
+ parser_run = ParserRun.new(grammar, start_state, token_stream)
8
+ parser_run.run
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,39 @@
1
+ module Dhaka
2
+ class ParserRun #:nodoc:
3
+
4
+ def initialize(grammar, start_state, token_stream)
5
+ @grammar = grammar
6
+ @node_stack = []
7
+ @state_stack = [start_state]
8
+ @token_stream = token_stream
9
+ @symbol_queue = []
10
+ end
11
+
12
+ def run
13
+ tokenize_result = token_stream.each do |token|
14
+ @current_token = token
15
+ @symbol_queue << @current_token.symbol_name
16
+ error = execute_actions
17
+ return error if error
18
+ node_stack << ParseTreeLeafNode.new(@current_token)
19
+ state_stack.last
20
+ end
21
+ return tokenize_result if TokenizerErrorResult === tokenize_result
22
+ ParseSuccessResult.new(node_stack.first) if node_stack.first.head_node?
23
+ end
24
+
25
+ private
26
+
27
+ attr_reader :state_stack, :token_stream, :node_stack
28
+
29
+ def execute_actions
30
+ while symbol_name = @symbol_queue.pop
31
+ action = state_stack.last.actions[symbol_name]
32
+ return ParseErrorResult.new(@current_token, state_stack.last) unless action
33
+ instance_eval(&action.action_code)
34
+ end
35
+ nil
36
+ end
37
+
38
+ end
39
+ end
@@ -0,0 +1,74 @@
1
+ module Dhaka
2
+ class ParserState #:nodoc:
3
+ attr_accessor :items, :actions, :id
4
+
5
+ @@state_id = 0
6
+
7
+ def self.next_state_id
8
+ result = @@state_id
9
+ @@state_id += 1
10
+ result
11
+ end
12
+
13
+ def initialize(parser, items, id=nil)
14
+ @parser = parser
15
+ @items = items
16
+ @actions = {}
17
+ @id = id || ParserState.next_state_id
18
+ end
19
+
20
+ def transition_items
21
+ result = Hash.new {|h, k| h[k] = ItemSet.new()}
22
+ items.values.each do |item|
23
+ result[item.next_symbol] << item if item.next_symbol
24
+ end
25
+ result
26
+ end
27
+
28
+ def unique_name
29
+ "State#{id}"
30
+ end
31
+
32
+ def compile_to_ruby_source
33
+ result = " at_state(#{id}) {\n"
34
+
35
+ symbol_names_by_action = Hash.new {|hash, key| hash[key] = []}
36
+ actions.each do |symbol_name, action|
37
+ symbol_names_by_action[action] << symbol_name
38
+ end
39
+
40
+ symbol_names_by_action.keys.each do |action|
41
+ symbol_names = symbol_names_by_action[action].collect {|symbol_name| "#{symbol_name.inspect}"}.join(', ')
42
+ result << " for_symbols(#{symbol_names}) { #{action.compile_to_ruby_source} }\n"
43
+ end
44
+
45
+ result << " }"
46
+ result
47
+ end
48
+
49
+ def for_symbols *symbol_names, &blk
50
+ symbol_names.each do |symbol_name|
51
+ actions[symbol_name] = @parser.instance_eval(&blk)
52
+ end
53
+ end
54
+
55
+ alias :for_symbol :for_symbols
56
+
57
+ def to_s(options = {})
58
+ items.values.collect{|item| item.to_s(options)}.join("\n")
59
+ end
60
+
61
+ end
62
+
63
+ class ItemSet < Set #:nodoc:
64
+ def hash
65
+ result = 5381
66
+ each { |item| result ^= item.hash }
67
+ result
68
+ end
69
+
70
+ def eql? other
71
+ self == other
72
+ end
73
+ end
74
+ end
@@ -0,0 +1,22 @@
1
+ module Dhaka
2
+ # Represents a portion of the input character stream that is mapped by the tokenizer
3
+ # to a symbol in the grammar. The attribute +input_position+ contains the start index position of the original
4
+ # string input that this token came from. It can be used to report errors by indicating the specific portion
5
+ # of the input where the error occurred.
6
+ class Token
7
+ attr_accessor :symbol_name, :value, :input_position
8
+ def initialize(symbol_name, value, input_position)
9
+ @symbol_name = symbol_name
10
+ @value = value
11
+ @input_position = input_position
12
+ end
13
+
14
+ def to_s #:nodoc:
15
+ value ? "#{symbol_name} : #{value}" : "#{symbol_name}"
16
+ end
17
+
18
+ def == other
19
+ symbol_name == other.symbol_name && value == other.value
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,51 @@
1
+ #--
2
+ # Copyright (c) 2006, 2007 Mushfeq Khan
3
+ #
4
+ # Permission is hereby granted, free of charge, to any person obtaining
5
+ # a copy of this software and associated documentation files (the
6
+ # "Software"), to deal in the Software without restriction, including
7
+ # without limitation the rights to use, copy, modify, merge, publish,
8
+ # distribute, sublicense, and/or sell copies of the Software, and to
9
+ # permit persons to whom the Software is furnished to do so, subject to
10
+ # the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be
13
+ # included in all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16
+ # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17
+ # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18
+ # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
19
+ # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
20
+ # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
21
+ # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22
+ #++
23
+
24
+ require 'set'
25
+ require 'logger'
26
+ require 'delegate'
27
+
28
+ %w[
29
+ grammar/grammar_symbol
30
+ grammar/production
31
+ grammar/grammar
32
+ grammar/precedence
33
+ parser/parse_tree
34
+ parser/parse_result
35
+ parser/parser_methods
36
+ parser/parser_state
37
+ parser/token
38
+ parser/action
39
+ parser/parser_run
40
+ parser/compiled_parser
41
+ tokenizer/tokenizer
42
+ evaluator/evaluator
43
+ lexer/accept_actions
44
+ lexer/alphabet
45
+ lexer/state_machine
46
+ lexer/state
47
+ lexer/specification
48
+ lexer/lexeme
49
+ lexer/lexer_run
50
+ lexer/compiled_lexer
51
+ ].each {|path| require File.join(File.dirname(__FILE__), path)}
@@ -0,0 +1,190 @@
1
+ module Dhaka
2
+ # Reserved constant used to identify the idle state of the tokenizer.
3
+ TOKENIZER_IDLE_STATE = :idle_state
4
+
5
+ # Returned on successful tokenizing of the input stream. Supports iteration by including Enumerable, so it can
6
+ # be passed in directly to the parser.
7
+ class TokenizerSuccessResult
8
+ include Enumerable
9
+
10
+ def initialize(tokens)
11
+ @tokens = tokens
12
+ end
13
+
14
+ # Returns false.
15
+ def has_error?
16
+ false
17
+ end
18
+
19
+ def each(&block)
20
+ @tokens.each(&block)
21
+ end
22
+ end
23
+
24
+ # Returned when tokenizing fails due to an unexpected character in the input stream.
25
+ class TokenizerErrorResult
26
+ # The index of the character that caused the error.
27
+ attr_reader :unexpected_char_index
28
+
29
+ def initialize(unexpected_char_index)
30
+ @unexpected_char_index = unexpected_char_index
31
+ end
32
+
33
+ # Returns true.
34
+ def has_error?
35
+ true
36
+ end
37
+ end
38
+
39
+ # A tokenizer state encapsulates actions that should be performed upon
40
+ # encountering each permissible character for that state.
41
+ class TokenizerState
42
+ attr_reader :actions, :default_action
43
+
44
+ def initialize
45
+ @actions = {}
46
+ end
47
+
48
+ # Define the action (+blk+) to be performed when encountering any of +characters+ in the token stream.
49
+ def for_characters(characters, &blk)
50
+ characters.each do |character|
51
+ actions[character] = blk
52
+ end
53
+ end
54
+
55
+ alias for_character for_characters
56
+
57
+ # define the action (+blk+) to be performed for any +characters+ that don't have an action to perform.
58
+ def for_default(&blk)
59
+ @default_action = blk
60
+ end
61
+
62
+ def to_s #:nodoc:
63
+ actions.inspect
64
+ end
65
+
66
+ end
67
+
68
+ # This abstract class contains a DSL for hand-coding tokenizers. Subclass it to implement tokenizers for specific grammars.
69
+ #
70
+ # Tokenizers are state machines. Each state of a tokenizer is identified
71
+ # by a Ruby symbol. The constant Dhaka::TOKENIZER_IDLE_STATE is reserved for the idle state of the tokenizer (the one
72
+ # that it starts in).
73
+ #
74
+ # The following is a tokenizer for arithmetic expressions with integer terms. The tokenizer starts in the idle state
75
+ # creating single-character tokens for all characters excepts digits and whitespace. It shifts to
76
+ # <tt>:get_integer_literal</tt> when it encounters a digit character and creates a token on the stack on which it
77
+ # accumulates the value of the literal. When it again encounters a non-digit character, it shifts back to idle.
78
+ # Whitespace is treated as a delimiter, but not shifted as a token.
79
+ #
80
+ # class ArithmeticPrecedenceTokenizer < Dhaka::Tokenizer
81
+ #
82
+ # digits = ('0'..'9').to_a
83
+ # parenths = ['(', ')']
84
+ # operators = ['-', '+', '/', '*', '^']
85
+ # functions = ['h', 'l']
86
+ # arg_separator = [',']
87
+ # whitespace = [' ']
88
+ #
89
+ # all_characters = digits + parenths + operators + functions + arg_separator + whitespace
90
+ #
91
+ # for_state Dhaka::TOKENIZER_IDLE_STATE do
92
+ # for_characters(all_characters - (digits + whitespace)) do
93
+ # create_token(curr_char, nil)
94
+ # advance
95
+ # end
96
+ # for_characters digits do
97
+ # create_token('n', '')
98
+ # switch_to :get_integer_literal
99
+ # end
100
+ # for_character whitespace do
101
+ # advance
102
+ # end
103
+ # end
104
+ #
105
+ # for_state :get_integer_literal do
106
+ # for_characters all_characters - digits do
107
+ # switch_to Dhaka::TOKENIZER_IDLE_STATE
108
+ # end
109
+ # for_characters digits do
110
+ # curr_token.value << curr_char
111
+ # advance
112
+ # end
113
+ # end
114
+ #
115
+ # end
116
+ #
117
+ # For languages where the lexical structure is very complicated, it may be too tedious to implement a Tokenizer by hand.
118
+ # In such cases, it's a lot easier to write a LexerSpecification using regular expressions and create a Lexer from that.
119
+ class Tokenizer
120
+ class << self
121
+ # Define the action for the state named +state_name+.
122
+ def for_state(state_name, &blk)
123
+ states[state_name].instance_eval(&blk)
124
+ end
125
+
126
+ # Tokenizes a string +input+ and returns a TokenizerErrorResult on failure or a TokenizerSuccessResult on sucess.
127
+ def tokenize(input)
128
+ new(input).run
129
+ end
130
+
131
+ private
132
+ def inherited(tokenizer)
133
+ class << tokenizer
134
+ attr_accessor :states, :grammar
135
+ end
136
+ tokenizer.states = Hash.new {|hash, key| hash[key] = TokenizerState.new}
137
+ end
138
+ end
139
+
140
+ # The tokens shifted so far.
141
+ attr_reader :tokens
142
+
143
+ def initialize(input) #:nodoc:
144
+ @input = input
145
+ @current_state = self.class.states[TOKENIZER_IDLE_STATE]
146
+ @curr_char_index = 0
147
+ @tokens = []
148
+ end
149
+
150
+ # The character currently being processed.
151
+ def curr_char
152
+ @input[@curr_char_index] and @input[@curr_char_index].chr
153
+ end
154
+
155
+ # Advance to the next character.
156
+ def advance
157
+ @curr_char_index += 1
158
+ end
159
+
160
+ def inspect
161
+ "<Dhaka::Tokenizer grammar : #{grammar}>"
162
+ end
163
+
164
+ # The token currently on top of the stack.
165
+ def curr_token
166
+ tokens.last
167
+ end
168
+
169
+ # Push a new token on to the stack with symbol corresponding to +symbol_name+ and a value of +value+.
170
+ def create_token(symbol_name, value)
171
+ new_token = Dhaka::Token.new(symbol_name, value, @curr_char_index)
172
+ tokens << new_token
173
+ end
174
+
175
+ # Change the active state of the tokenizer to the state identified by the symbol +state_name+.
176
+ def switch_to state_name
177
+ @current_state = self.class.states[state_name]
178
+ end
179
+
180
+ def run #:nodoc:
181
+ while curr_char
182
+ blk = @current_state.actions[curr_char] || @current_state.default_action
183
+ return TokenizerErrorResult.new(@curr_char_index) unless blk
184
+ instance_eval(&blk)
185
+ end
186
+ tokens << Dhaka::Token.new(Dhaka::END_SYMBOL_NAME, nil, nil)
187
+ TokenizerSuccessResult.new(tokens)
188
+ end
189
+ end
190
+ end
@@ -0,0 +1,62 @@
1
+ #--
2
+ # Copyright (c) 2006, 2007 Mushfeq Khan
3
+ #
4
+ # Permission is hereby granted, free of charge, to any person obtaining
5
+ # a copy of this software and associated documentation files (the
6
+ # "Software"), to deal in the Software without restriction, including
7
+ # without limitation the rights to use, copy, modify, merge, publish,
8
+ # distribute, sublicense, and/or sell copies of the Software, and to
9
+ # permit persons to whom the Software is furnished to do so, subject to
10
+ # the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be
13
+ # included in all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16
+ # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17
+ # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18
+ # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
19
+ # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
20
+ # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
21
+ # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22
+ #++
23
+
24
+ require 'set'
25
+ require 'logger'
26
+ require 'delegate'
27
+
28
+ %w[
29
+ dot/dot
30
+ grammar/grammar_symbol
31
+ grammar/production
32
+ grammar/closure_hash
33
+ grammar/grammar
34
+ grammar/precedence
35
+ parser/parse_tree
36
+ parser/parse_result
37
+ parser/item
38
+ parser/channel
39
+ parser/parser_methods
40
+ parser/parser_state
41
+ parser/conflict
42
+ parser/token
43
+ parser/action
44
+ parser/parser_run
45
+ parser/parser
46
+ parser/compiled_parser
47
+ tokenizer/tokenizer
48
+ evaluator/evaluator
49
+ lexer/accept_actions
50
+ lexer/alphabet
51
+ lexer/regex_grammar
52
+ lexer/regex_tokenizer
53
+ lexer/regex_parser
54
+ lexer/state_machine
55
+ lexer/dfa
56
+ lexer/state
57
+ lexer/specification
58
+ lexer/lexeme
59
+ lexer/lexer_run
60
+ lexer/lexer
61
+ lexer/compiled_lexer
62
+ ].each {|path| require File.join(File.dirname(__FILE__), 'dhaka/' + path)}
@@ -0,0 +1,5 @@
1
+ #!/usr/bin/env ruby
2
+ Dir['**/*test.rb'].each do |test_file|
3
+ puts test_file
4
+ require File.join(File.dirname(__FILE__), test_file)
5
+ end
@@ -0,0 +1,64 @@
1
+ require File.dirname(__FILE__) + '/arithmetic_grammar'
2
+
3
+ class ArithmeticEvaluator < Dhaka::Evaluator
4
+
5
+ self.grammar = ArithmeticGrammar
6
+
7
+ define_evaluation_rules do
8
+
9
+ for_subtraction do
10
+ evaluate(child_nodes[0]) - evaluate(child_nodes[2])
11
+ end
12
+
13
+ for_addition do
14
+ evaluate(child_nodes[0]) + evaluate(child_nodes[2])
15
+ end
16
+
17
+ for_division do
18
+ evaluate(child_nodes[0]).to_f/evaluate(child_nodes[2])
19
+ end
20
+
21
+ for_multiplication do
22
+ evaluate(child_nodes[0]) * evaluate(child_nodes[2])
23
+ end
24
+
25
+ for_getting_literals do
26
+ child_nodes[0].token.value
27
+ end
28
+
29
+ for_unpacking_parenthetized_expression do
30
+ evaluate(child_nodes[1])
31
+ end
32
+
33
+ for_empty_args do
34
+ []
35
+ end
36
+
37
+ for_evaluating_function do
38
+ evaluate(child_nodes[0]).call evaluate(child_nodes[2])
39
+ end
40
+
41
+ for_concatenating_args do
42
+ [evaluate(child_nodes[0])]+evaluate(child_nodes[2])
43
+ end
44
+
45
+ for_single_args do
46
+ [evaluate(child_nodes[0])]
47
+ end
48
+
49
+ for_min_function do
50
+ @min_function
51
+ end
52
+
53
+ for_max_function do
54
+ @max_function
55
+ end
56
+
57
+ end
58
+
59
+ def initialize(min_function, max_function)
60
+ @min_function = min_function
61
+ @max_function = max_function
62
+ end
63
+
64
+ end