simply_stored 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (121) hide show
  1. data/lib/simply_stored/class_methods_base.rb +31 -0
  2. data/lib/simply_stored/couch/belongs_to.rb +117 -0
  3. data/lib/simply_stored/couch/ext/couch_potato.rb +16 -0
  4. data/lib/simply_stored/couch/has_many.rb +148 -0
  5. data/lib/simply_stored/couch/has_one.rb +93 -0
  6. data/lib/simply_stored/couch/validations.rb +74 -0
  7. data/lib/simply_stored/couch/views/array_property_view_spec.rb +22 -0
  8. data/lib/simply_stored/couch/views.rb +1 -0
  9. data/lib/simply_stored/couch.rb +278 -0
  10. data/lib/simply_stored/instance_methods.rb +143 -0
  11. data/lib/simply_stored/simpledb/associations.rb +196 -0
  12. data/lib/simply_stored/simpledb/attributes.rb +173 -0
  13. data/lib/simply_stored/simpledb/storag.rb +85 -0
  14. data/lib/simply_stored/simpledb/validations.rb +88 -0
  15. data/lib/simply_stored/simpledb.rb +212 -0
  16. data/lib/simply_stored/storage.rb +93 -0
  17. data/lib/simply_stored.rb +9 -0
  18. data/test/custom_views_test.rb +33 -0
  19. data/test/fixtures/couch.rb +182 -0
  20. data/test/fixtures/simpledb/item.rb +11 -0
  21. data/test/fixtures/simpledb/item_daddy.rb +8 -0
  22. data/test/fixtures/simpledb/log_item.rb +3 -0
  23. data/test/fixtures/simpledb/namespace_bar.rb +5 -0
  24. data/test/fixtures/simpledb/namespace_foo.rb +7 -0
  25. data/test/fixtures/simpledb/protected_item.rb +3 -0
  26. data/test/simply_stored_couch_test.rb +1684 -0
  27. data/test/simply_stored_simpledb_test.rb +1341 -0
  28. data/test/test_helper.rb +22 -0
  29. data/test/vendor/dhaka-2.2.1/lib/dhaka/dot/dot.rb +29 -0
  30. data/test/vendor/dhaka-2.2.1/lib/dhaka/evaluator/evaluator.rb +133 -0
  31. data/test/vendor/dhaka-2.2.1/lib/dhaka/grammar/closure_hash.rb +15 -0
  32. data/test/vendor/dhaka-2.2.1/lib/dhaka/grammar/grammar.rb +240 -0
  33. data/test/vendor/dhaka-2.2.1/lib/dhaka/grammar/grammar_symbol.rb +27 -0
  34. data/test/vendor/dhaka-2.2.1/lib/dhaka/grammar/precedence.rb +19 -0
  35. data/test/vendor/dhaka-2.2.1/lib/dhaka/grammar/production.rb +36 -0
  36. data/test/vendor/dhaka-2.2.1/lib/dhaka/lexer/accept_actions.rb +36 -0
  37. data/test/vendor/dhaka-2.2.1/lib/dhaka/lexer/alphabet.rb +21 -0
  38. data/test/vendor/dhaka-2.2.1/lib/dhaka/lexer/compiled_lexer.rb +46 -0
  39. data/test/vendor/dhaka-2.2.1/lib/dhaka/lexer/dfa.rb +121 -0
  40. data/test/vendor/dhaka-2.2.1/lib/dhaka/lexer/lexeme.rb +32 -0
  41. data/test/vendor/dhaka-2.2.1/lib/dhaka/lexer/lexer.rb +70 -0
  42. data/test/vendor/dhaka-2.2.1/lib/dhaka/lexer/lexer_run.rb +78 -0
  43. data/test/vendor/dhaka-2.2.1/lib/dhaka/lexer/regex_grammar.rb +392 -0
  44. data/test/vendor/dhaka-2.2.1/lib/dhaka/lexer/regex_parser.rb +2010 -0
  45. data/test/vendor/dhaka-2.2.1/lib/dhaka/lexer/regex_tokenizer.rb +14 -0
  46. data/test/vendor/dhaka-2.2.1/lib/dhaka/lexer/specification.rb +96 -0
  47. data/test/vendor/dhaka-2.2.1/lib/dhaka/lexer/state.rb +68 -0
  48. data/test/vendor/dhaka-2.2.1/lib/dhaka/lexer/state_machine.rb +37 -0
  49. data/test/vendor/dhaka-2.2.1/lib/dhaka/parser/action.rb +55 -0
  50. data/test/vendor/dhaka-2.2.1/lib/dhaka/parser/channel.rb +58 -0
  51. data/test/vendor/dhaka-2.2.1/lib/dhaka/parser/compiled_parser.rb +51 -0
  52. data/test/vendor/dhaka-2.2.1/lib/dhaka/parser/conflict.rb +54 -0
  53. data/test/vendor/dhaka-2.2.1/lib/dhaka/parser/item.rb +42 -0
  54. data/test/vendor/dhaka-2.2.1/lib/dhaka/parser/parse_result.rb +50 -0
  55. data/test/vendor/dhaka-2.2.1/lib/dhaka/parser/parse_tree.rb +66 -0
  56. data/test/vendor/dhaka-2.2.1/lib/dhaka/parser/parser.rb +165 -0
  57. data/test/vendor/dhaka-2.2.1/lib/dhaka/parser/parser_methods.rb +11 -0
  58. data/test/vendor/dhaka-2.2.1/lib/dhaka/parser/parser_run.rb +39 -0
  59. data/test/vendor/dhaka-2.2.1/lib/dhaka/parser/parser_state.rb +74 -0
  60. data/test/vendor/dhaka-2.2.1/lib/dhaka/parser/token.rb +22 -0
  61. data/test/vendor/dhaka-2.2.1/lib/dhaka/runtime.rb +51 -0
  62. data/test/vendor/dhaka-2.2.1/lib/dhaka/tokenizer/tokenizer.rb +190 -0
  63. data/test/vendor/dhaka-2.2.1/lib/dhaka.rb +62 -0
  64. data/test/vendor/dhaka-2.2.1/test/all_tests.rb +5 -0
  65. data/test/vendor/dhaka-2.2.1/test/arithmetic/arithmetic_evaluator.rb +64 -0
  66. data/test/vendor/dhaka-2.2.1/test/arithmetic/arithmetic_evaluator_test.rb +43 -0
  67. data/test/vendor/dhaka-2.2.1/test/arithmetic/arithmetic_grammar.rb +41 -0
  68. data/test/vendor/dhaka-2.2.1/test/arithmetic/arithmetic_grammar_test.rb +9 -0
  69. data/test/vendor/dhaka-2.2.1/test/arithmetic/arithmetic_test_methods.rb +9 -0
  70. data/test/vendor/dhaka-2.2.1/test/arithmetic/arithmetic_tokenizer.rb +39 -0
  71. data/test/vendor/dhaka-2.2.1/test/arithmetic/arithmetic_tokenizer_test.rb +38 -0
  72. data/test/vendor/dhaka-2.2.1/test/arithmetic_precedence/arithmetic_precedence_evaluator.rb +43 -0
  73. data/test/vendor/dhaka-2.2.1/test/arithmetic_precedence/arithmetic_precedence_grammar.rb +24 -0
  74. data/test/vendor/dhaka-2.2.1/test/arithmetic_precedence/arithmetic_precedence_grammar_test.rb +30 -0
  75. data/test/vendor/dhaka-2.2.1/test/arithmetic_precedence/arithmetic_precedence_lexer_specification.rb +23 -0
  76. data/test/vendor/dhaka-2.2.1/test/arithmetic_precedence/arithmetic_precedence_parser_test.rb +33 -0
  77. data/test/vendor/dhaka-2.2.1/test/brackets/bracket_grammar.rb +23 -0
  78. data/test/vendor/dhaka-2.2.1/test/brackets/bracket_tokenizer.rb +22 -0
  79. data/test/vendor/dhaka-2.2.1/test/brackets/brackets_test.rb +28 -0
  80. data/test/vendor/dhaka-2.2.1/test/chittagong/chittagong_driver.rb +46 -0
  81. data/test/vendor/dhaka-2.2.1/test/chittagong/chittagong_driver_test.rb +276 -0
  82. data/test/vendor/dhaka-2.2.1/test/chittagong/chittagong_evaluator.rb +284 -0
  83. data/test/vendor/dhaka-2.2.1/test/chittagong/chittagong_evaluator_test.rb +38 -0
  84. data/test/vendor/dhaka-2.2.1/test/chittagong/chittagong_grammar.rb +104 -0
  85. data/test/vendor/dhaka-2.2.1/test/chittagong/chittagong_lexer.rb +109 -0
  86. data/test/vendor/dhaka-2.2.1/test/chittagong/chittagong_lexer_specification.rb +37 -0
  87. data/test/vendor/dhaka-2.2.1/test/chittagong/chittagong_lexer_test.rb +58 -0
  88. data/test/vendor/dhaka-2.2.1/test/chittagong/chittagong_parser.rb +879 -0
  89. data/test/vendor/dhaka-2.2.1/test/chittagong/chittagong_parser_test.rb +55 -0
  90. data/test/vendor/dhaka-2.2.1/test/chittagong/chittagong_test.rb +170 -0
  91. data/test/vendor/dhaka-2.2.1/test/core/another_lalr_but_not_slr_grammar.rb +20 -0
  92. data/test/vendor/dhaka-2.2.1/test/core/compiled_parser_test.rb +44 -0
  93. data/test/vendor/dhaka-2.2.1/test/core/dfa_test.rb +170 -0
  94. data/test/vendor/dhaka-2.2.1/test/core/evaluator_test.rb +22 -0
  95. data/test/vendor/dhaka-2.2.1/test/core/grammar_test.rb +83 -0
  96. data/test/vendor/dhaka-2.2.1/test/core/lalr_but_not_slr_grammar.rb +19 -0
  97. data/test/vendor/dhaka-2.2.1/test/core/lexer_test.rb +139 -0
  98. data/test/vendor/dhaka-2.2.1/test/core/malformed_grammar.rb +7 -0
  99. data/test/vendor/dhaka-2.2.1/test/core/malformed_grammar_test.rb +8 -0
  100. data/test/vendor/dhaka-2.2.1/test/core/nullable_grammar.rb +21 -0
  101. data/test/vendor/dhaka-2.2.1/test/core/parse_result_test.rb +44 -0
  102. data/test/vendor/dhaka-2.2.1/test/core/parser_state_test.rb +24 -0
  103. data/test/vendor/dhaka-2.2.1/test/core/parser_test.rb +131 -0
  104. data/test/vendor/dhaka-2.2.1/test/core/precedence_grammar.rb +17 -0
  105. data/test/vendor/dhaka-2.2.1/test/core/precedence_grammar_test.rb +9 -0
  106. data/test/vendor/dhaka-2.2.1/test/core/rr_conflict_grammar.rb +21 -0
  107. data/test/vendor/dhaka-2.2.1/test/core/simple_grammar.rb +22 -0
  108. data/test/vendor/dhaka-2.2.1/test/core/sr_conflict_grammar.rb +16 -0
  109. data/test/vendor/dhaka-2.2.1/test/dhaka_test_helper.rb +17 -0
  110. data/test/vendor/dhaka-2.2.1/test/fake_logger.rb +17 -0
  111. data/test/vendor/simplerdb-0.2/lib/simplerdb/client_exception.rb +10 -0
  112. data/test/vendor/simplerdb-0.2/lib/simplerdb/db.rb +146 -0
  113. data/test/vendor/simplerdb-0.2/lib/simplerdb/query_language.rb +266 -0
  114. data/test/vendor/simplerdb-0.2/lib/simplerdb/server.rb +33 -0
  115. data/test/vendor/simplerdb-0.2/lib/simplerdb/servlet.rb +191 -0
  116. data/test/vendor/simplerdb-0.2/lib/simplerdb.rb +3 -0
  117. data/test/vendor/simplerdb-0.2/test/functional_test.rb +81 -0
  118. data/test/vendor/simplerdb-0.2/test/query_evaluator_test.rb +73 -0
  119. data/test/vendor/simplerdb-0.2/test/query_parser_test.rb +64 -0
  120. data/test/vendor/simplerdb-0.2/test/simplerdb_test.rb +80 -0
  121. metadata +182 -0
@@ -0,0 +1,165 @@
1
+ module Dhaka
2
+ # The parser generator. To generate a parser from a grammar specification +ArithmeticPrecedenceGrammar+, one would
3
+ # write:
4
+ # parser = Dhaka::Parser.new(ArithmeticPrecedenceGrammar)
5
+ #
6
+ # To compile this parser to Ruby source as +ArithmeticPrecedenceParser+:
7
+ # parser.compile_to_ruby_source_as(:ArithmeticPrecedenceParser)
8
+ # which returns a string of Ruby code.
9
+ class Parser
10
+ include ParserMethods
11
+ attr_reader :grammar
12
+
13
+ # Creates a new parser from the given grammar. Messages are logged by default to STDOUT
14
+ # and the log level is WARN. Shift-reduce conflicts are reported at WARN and reduce-reduce conflicts
15
+ # at ERROR. You may pass in your own logger. Logging at DEBUG shows a lot of progress output.
16
+ def initialize(grammar, logger = nil)
17
+ @shift_actions = Hash.new {|hash, state| hash[state] = ShiftAction.new(state)}
18
+ @reduce_actions = Hash.new {|hash, production| hash[production] = ReduceAction.new(production)}
19
+ @logger = logger || default_logger
20
+ @transitions = Hash.new {|hash, state| hash[state] = {}}
21
+ @grammar = grammar
22
+ @channels = Hash.new {|hash, start_item| hash[start_item] = []}
23
+ @states = Hash.new do |hash, kernel|
24
+ closure, channels = grammar.closure(kernel)
25
+ channels.each do |start_item, channel_set|
26
+ @channels[start_item].concat channel_set.to_a
27
+ end
28
+ new_state = ParserState.new(self, closure)
29
+ hash[kernel] = new_state
30
+ @logger.debug("Created #{new_state.unique_name}.")
31
+ new_state.transition_items.each do |symbol, items|
32
+ destination_kernel = ItemSet.new(items.collect{|item| item.next_item})
33
+ destination_state = hash[destination_kernel]
34
+ items.each {|item| @channels[item] << grammar.passive_channel(item, destination_state.items[item.next_item])}
35
+ @transitions[new_state][symbol] = destination_state
36
+ end
37
+ new_state
38
+ end
39
+ initialize_states
40
+ end
41
+
42
+ # Returns the Ruby source of the generated parser compiled as +parser_class_name+. This can be written out to a file.
43
+ def compile_to_ruby_source_as parser_class_name
44
+ result = "class #{parser_class_name} < Dhaka::CompiledParser\n\n"
45
+ result << " self.grammar = #{grammar.name}\n\n"
46
+ result << " start_with #{start_state.id}\n\n"
47
+ states.each do |state|
48
+ result << "#{state.compile_to_ruby_source}\n\n"
49
+ end
50
+ result << "end"
51
+ result
52
+ end
53
+
54
+ # Returns the dot representation of the parser. If <tt>:hide_lookaheads</tt> is set to true in the
55
+ # options hash, lookaheads are not written out to the parser states, which is helpful when there are dozens
56
+ # of lookahead symbols for every item in every state.
57
+ def to_dot(options = {})
58
+ Dot::Digraph.new(:fontsize => 10, :shape => :box, :size => 5) do |g|
59
+ states.each do |state|
60
+ g.node(state, :label => state.items.values.collect{|item| item.to_s(options)}.join("\n"))
61
+ @transitions[state].each do |symbol, dest_state|
62
+ g.edge(state, dest_state, :label => symbol.name)
63
+ end
64
+ end
65
+ end.to_dot
66
+ end
67
+
68
+ def inspect
69
+ "<Dhaka::Parser grammar : #{grammar}>"
70
+ end
71
+
72
+ private
73
+ attr_reader :start_state
74
+
75
+ def states
76
+ @states.values
77
+ end
78
+
79
+ def default_logger
80
+ logger = Logger.new(STDOUT)
81
+ logger.level = Logger::WARN
82
+ logger.formatter = ParserLogOutputFormatter.new
83
+ logger
84
+ end
85
+
86
+ def initialize_states
87
+ start_productions = grammar.productions_for_symbol(grammar.start_symbol)
88
+ raise NoStartProductionsError.new(grammar) if start_productions.empty?
89
+ start_items = ItemSet.new(start_productions.collect {|production| Item.new(production, 0)})
90
+ start_items.each {|start_item| start_item.lookaheadset << grammar.end_symbol}
91
+ @start_state = @states[start_items]
92
+ @logger.debug("Pumping #{@channels.keys.size} dirty items...")
93
+ pump_channels @channels.keys
94
+ @logger.debug("Generating shift actions...")
95
+ generate_shift_actions
96
+ @logger.debug("Generating reduce actions...")
97
+ generate_reduce_actions
98
+ end
99
+
100
+ def generate_shift_actions
101
+ @states.values.each do |state|
102
+ @transitions[state].keys.each do |symbol|
103
+ state.actions[symbol.name] = @shift_actions[@transitions[state][symbol]]
104
+ end
105
+ end
106
+ end
107
+
108
+ def generate_reduce_actions
109
+ @states.values.each do |state|
110
+ state.items.values.select{ |item| !item.next_symbol }.each do |item|
111
+ create_reduction_actions_for_item_and_state item, state
112
+ end
113
+ end
114
+ end
115
+
116
+ def create_reduction_actions_for_item_and_state item, state
117
+ item.lookaheadset.each do |lookahead|
118
+ new_action = @reduce_actions[item.production]
119
+ if existing_action = state.actions[lookahead.name]
120
+ if ReduceAction === existing_action
121
+ message = ReduceReduceConflict.new(state, lookahead, new_action).resolve
122
+ @logger.error(message)
123
+ else
124
+ message = ShiftReduceConflict.new(state, lookahead, new_action).resolve
125
+ @logger.warn(message)
126
+ end
127
+ else
128
+ state.actions[lookahead.name] = new_action
129
+ end
130
+ end
131
+ end
132
+
133
+ def pump_channels dirty_items
134
+ loop do
135
+ new_dirty_items = Set.new
136
+ dirty_items.each do |dirty_item|
137
+ @channels[dirty_item].each do |channel|
138
+ new_dirty_items << channel.end_item if channel.pump
139
+ end
140
+ end
141
+ break if new_dirty_items.empty?
142
+ @logger.debug("#{new_dirty_items.size} dirty items...")
143
+ dirty_items = new_dirty_items
144
+ end
145
+ end
146
+ end
147
+
148
+ # Raised when trying to create a Parser for a grammar that has no productions for the start symbol
149
+ class NoStartProductionsError < StandardError
150
+ def initialize(grammar) #:nodoc:
151
+ @grammar = grammar
152
+ end
153
+ def to_s #:nodoc:
154
+ "No start productions defined for #{@grammar.name}"
155
+ end
156
+ end
157
+
158
+ class ParserLogOutputFormatter < Logger::Formatter #:nodoc:
159
+ def call(severity, time, progname, msg)
160
+ "\n%s -- %s: %s\n" % [ severity, progname, msg2str(msg)]
161
+ end
162
+ end
163
+
164
+ end
165
+
@@ -0,0 +1,11 @@
1
+ #!/usr/bin/env ruby
2
+ module Dhaka
3
+ # This module is included both in Parser and CompiledParser.
4
+ module ParserMethods
5
+ # +token_stream+ is an Enumerable of Token-s. Returns either a ParseSuccessResult or a ParseErrorResult.
6
+ def parse token_stream
7
+ parser_run = ParserRun.new(grammar, start_state, token_stream)
8
+ parser_run.run
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,39 @@
1
+ module Dhaka
2
+ class ParserRun #:nodoc:
3
+
4
+ def initialize(grammar, start_state, token_stream)
5
+ @grammar = grammar
6
+ @node_stack = []
7
+ @state_stack = [start_state]
8
+ @token_stream = token_stream
9
+ @symbol_queue = []
10
+ end
11
+
12
+ def run
13
+ tokenize_result = token_stream.each do |token|
14
+ @current_token = token
15
+ @symbol_queue << @current_token.symbol_name
16
+ error = execute_actions
17
+ return error if error
18
+ node_stack << ParseTreeLeafNode.new(@current_token)
19
+ state_stack.last
20
+ end
21
+ return tokenize_result if TokenizerErrorResult === tokenize_result
22
+ ParseSuccessResult.new(node_stack.first) if node_stack.first.head_node?
23
+ end
24
+
25
+ private
26
+
27
+ attr_reader :state_stack, :token_stream, :node_stack
28
+
29
+ def execute_actions
30
+ while symbol_name = @symbol_queue.pop
31
+ action = state_stack.last.actions[symbol_name]
32
+ return ParseErrorResult.new(@current_token, state_stack.last) unless action
33
+ instance_eval(&action.action_code)
34
+ end
35
+ nil
36
+ end
37
+
38
+ end
39
+ end
@@ -0,0 +1,74 @@
1
+ module Dhaka
2
+ class ParserState #:nodoc:
3
+ attr_accessor :items, :actions, :id
4
+
5
+ @@state_id = 0
6
+
7
+ def self.next_state_id
8
+ result = @@state_id
9
+ @@state_id += 1
10
+ result
11
+ end
12
+
13
+ def initialize(parser, items, id=nil)
14
+ @parser = parser
15
+ @items = items
16
+ @actions = {}
17
+ @id = id || ParserState.next_state_id
18
+ end
19
+
20
+ def transition_items
21
+ result = Hash.new {|h, k| h[k] = ItemSet.new()}
22
+ items.values.each do |item|
23
+ result[item.next_symbol] << item if item.next_symbol
24
+ end
25
+ result
26
+ end
27
+
28
+ def unique_name
29
+ "State#{id}"
30
+ end
31
+
32
+ def compile_to_ruby_source
33
+ result = " at_state(#{id}) {\n"
34
+
35
+ symbol_names_by_action = Hash.new {|hash, key| hash[key] = []}
36
+ actions.each do |symbol_name, action|
37
+ symbol_names_by_action[action] << symbol_name
38
+ end
39
+
40
+ symbol_names_by_action.keys.each do |action|
41
+ symbol_names = symbol_names_by_action[action].collect {|symbol_name| "#{symbol_name.inspect}"}.join(', ')
42
+ result << " for_symbols(#{symbol_names}) { #{action.compile_to_ruby_source} }\n"
43
+ end
44
+
45
+ result << " }"
46
+ result
47
+ end
48
+
49
+ def for_symbols *symbol_names, &blk
50
+ symbol_names.each do |symbol_name|
51
+ actions[symbol_name] = @parser.instance_eval(&blk)
52
+ end
53
+ end
54
+
55
+ alias :for_symbol :for_symbols
56
+
57
+ def to_s(options = {})
58
+ items.values.collect{|item| item.to_s(options)}.join("\n")
59
+ end
60
+
61
+ end
62
+
63
+ class ItemSet < Set #:nodoc:
64
+ def hash
65
+ result = 5381
66
+ each { |item| result ^= item.hash }
67
+ result
68
+ end
69
+
70
+ def eql? other
71
+ self == other
72
+ end
73
+ end
74
+ end
@@ -0,0 +1,22 @@
1
+ module Dhaka
2
+ # Represents a portion of the input character stream that is mapped by the tokenizer
3
+ # to a symbol in the grammar. The attribute +input_position+ contains the start index position of the original
4
+ # string input that this token came from. It can be used to report errors by indicating the specific portion
5
+ # of the input where the error occurred.
6
+ class Token
7
+ attr_accessor :symbol_name, :value, :input_position
8
+ def initialize(symbol_name, value, input_position)
9
+ @symbol_name = symbol_name
10
+ @value = value
11
+ @input_position = input_position
12
+ end
13
+
14
+ def to_s #:nodoc:
15
+ value ? "#{symbol_name} : #{value}" : "#{symbol_name}"
16
+ end
17
+
18
+ def == other
19
+ symbol_name == other.symbol_name && value == other.value
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,51 @@
1
+ #--
2
+ # Copyright (c) 2006, 2007 Mushfeq Khan
3
+ #
4
+ # Permission is hereby granted, free of charge, to any person obtaining
5
+ # a copy of this software and associated documentation files (the
6
+ # "Software"), to deal in the Software without restriction, including
7
+ # without limitation the rights to use, copy, modify, merge, publish,
8
+ # distribute, sublicense, and/or sell copies of the Software, and to
9
+ # permit persons to whom the Software is furnished to do so, subject to
10
+ # the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be
13
+ # included in all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16
+ # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17
+ # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18
+ # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
19
+ # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
20
+ # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
21
+ # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22
+ #++
23
+
24
+ require 'set'
25
+ require 'logger'
26
+ require 'delegate'
27
+
28
+ %w[
29
+ grammar/grammar_symbol
30
+ grammar/production
31
+ grammar/grammar
32
+ grammar/precedence
33
+ parser/parse_tree
34
+ parser/parse_result
35
+ parser/parser_methods
36
+ parser/parser_state
37
+ parser/token
38
+ parser/action
39
+ parser/parser_run
40
+ parser/compiled_parser
41
+ tokenizer/tokenizer
42
+ evaluator/evaluator
43
+ lexer/accept_actions
44
+ lexer/alphabet
45
+ lexer/state_machine
46
+ lexer/state
47
+ lexer/specification
48
+ lexer/lexeme
49
+ lexer/lexer_run
50
+ lexer/compiled_lexer
51
+ ].each {|path| require File.join(File.dirname(__FILE__), path)}
@@ -0,0 +1,190 @@
1
+ module Dhaka
2
+ # Reserved constant used to identify the idle state of the tokenizer.
3
+ TOKENIZER_IDLE_STATE = :idle_state
4
+
5
+ # Returned on successful tokenizing of the input stream. Supports iteration by including Enumerable, so it can
6
+ # be passed in directly to the parser.
7
+ class TokenizerSuccessResult
8
+ include Enumerable
9
+
10
+ def initialize(tokens)
11
+ @tokens = tokens
12
+ end
13
+
14
+ # Returns false.
15
+ def has_error?
16
+ false
17
+ end
18
+
19
+ def each(&block)
20
+ @tokens.each(&block)
21
+ end
22
+ end
23
+
24
+ # Returned when tokenizing fails due to an unexpected character in the input stream.
25
+ class TokenizerErrorResult
26
+ # The index of the character that caused the error.
27
+ attr_reader :unexpected_char_index
28
+
29
+ def initialize(unexpected_char_index)
30
+ @unexpected_char_index = unexpected_char_index
31
+ end
32
+
33
+ # Returns true.
34
+ def has_error?
35
+ true
36
+ end
37
+ end
38
+
39
+ # A tokenizer state encapsulates actions that should be performed upon
40
+ # encountering each permissible character for that state.
41
+ class TokenizerState
42
+ attr_reader :actions, :default_action
43
+
44
+ def initialize
45
+ @actions = {}
46
+ end
47
+
48
+ # Define the action (+blk+) to be performed when encountering any of +characters+ in the token stream.
49
+ def for_characters(characters, &blk)
50
+ characters.each do |character|
51
+ actions[character] = blk
52
+ end
53
+ end
54
+
55
+ alias for_character for_characters
56
+
57
+ # define the action (+blk+) to be performed for any +characters+ that don't have an action to perform.
58
+ def for_default(&blk)
59
+ @default_action = blk
60
+ end
61
+
62
+ def to_s #:nodoc:
63
+ actions.inspect
64
+ end
65
+
66
+ end
67
+
68
+ # This abstract class contains a DSL for hand-coding tokenizers. Subclass it to implement tokenizers for specific grammars.
69
+ #
70
+ # Tokenizers are state machines. Each state of a tokenizer is identified
71
+ # by a Ruby symbol. The constant Dhaka::TOKENIZER_IDLE_STATE is reserved for the idle state of the tokenizer (the one
72
+ # that it starts in).
73
+ #
74
+ # The following is a tokenizer for arithmetic expressions with integer terms. The tokenizer starts in the idle state
75
+ # creating single-character tokens for all characters excepts digits and whitespace. It shifts to
76
+ # <tt>:get_integer_literal</tt> when it encounters a digit character and creates a token on the stack on which it
77
+ # accumulates the value of the literal. When it again encounters a non-digit character, it shifts back to idle.
78
+ # Whitespace is treated as a delimiter, but not shifted as a token.
79
+ #
80
+ # class ArithmeticPrecedenceTokenizer < Dhaka::Tokenizer
81
+ #
82
+ # digits = ('0'..'9').to_a
83
+ # parenths = ['(', ')']
84
+ # operators = ['-', '+', '/', '*', '^']
85
+ # functions = ['h', 'l']
86
+ # arg_separator = [',']
87
+ # whitespace = [' ']
88
+ #
89
+ # all_characters = digits + parenths + operators + functions + arg_separator + whitespace
90
+ #
91
+ # for_state Dhaka::TOKENIZER_IDLE_STATE do
92
+ # for_characters(all_characters - (digits + whitespace)) do
93
+ # create_token(curr_char, nil)
94
+ # advance
95
+ # end
96
+ # for_characters digits do
97
+ # create_token('n', '')
98
+ # switch_to :get_integer_literal
99
+ # end
100
+ # for_character whitespace do
101
+ # advance
102
+ # end
103
+ # end
104
+ #
105
+ # for_state :get_integer_literal do
106
+ # for_characters all_characters - digits do
107
+ # switch_to Dhaka::TOKENIZER_IDLE_STATE
108
+ # end
109
+ # for_characters digits do
110
+ # curr_token.value << curr_char
111
+ # advance
112
+ # end
113
+ # end
114
+ #
115
+ # end
116
+ #
117
+ # For languages where the lexical structure is very complicated, it may be too tedious to implement a Tokenizer by hand.
118
+ # In such cases, it's a lot easier to write a LexerSpecification using regular expressions and create a Lexer from that.
119
+ class Tokenizer
120
+ class << self
121
+ # Define the action for the state named +state_name+.
122
+ def for_state(state_name, &blk)
123
+ states[state_name].instance_eval(&blk)
124
+ end
125
+
126
+ # Tokenizes a string +input+ and returns a TokenizerErrorResult on failure or a TokenizerSuccessResult on sucess.
127
+ def tokenize(input)
128
+ new(input).run
129
+ end
130
+
131
+ private
132
+ def inherited(tokenizer)
133
+ class << tokenizer
134
+ attr_accessor :states, :grammar
135
+ end
136
+ tokenizer.states = Hash.new {|hash, key| hash[key] = TokenizerState.new}
137
+ end
138
+ end
139
+
140
+ # The tokens shifted so far.
141
+ attr_reader :tokens
142
+
143
+ def initialize(input) #:nodoc:
144
+ @input = input
145
+ @current_state = self.class.states[TOKENIZER_IDLE_STATE]
146
+ @curr_char_index = 0
147
+ @tokens = []
148
+ end
149
+
150
+ # The character currently being processed.
151
+ def curr_char
152
+ @input[@curr_char_index] and @input[@curr_char_index].chr
153
+ end
154
+
155
+ # Advance to the next character.
156
+ def advance
157
+ @curr_char_index += 1
158
+ end
159
+
160
+ def inspect
161
+ "<Dhaka::Tokenizer grammar : #{grammar}>"
162
+ end
163
+
164
+ # The token currently on top of the stack.
165
+ def curr_token
166
+ tokens.last
167
+ end
168
+
169
+ # Push a new token on to the stack with symbol corresponding to +symbol_name+ and a value of +value+.
170
+ def create_token(symbol_name, value)
171
+ new_token = Dhaka::Token.new(symbol_name, value, @curr_char_index)
172
+ tokens << new_token
173
+ end
174
+
175
+ # Change the active state of the tokenizer to the state identified by the symbol +state_name+.
176
+ def switch_to state_name
177
+ @current_state = self.class.states[state_name]
178
+ end
179
+
180
+ def run #:nodoc:
181
+ while curr_char
182
+ blk = @current_state.actions[curr_char] || @current_state.default_action
183
+ return TokenizerErrorResult.new(@curr_char_index) unless blk
184
+ instance_eval(&blk)
185
+ end
186
+ tokens << Dhaka::Token.new(Dhaka::END_SYMBOL_NAME, nil, nil)
187
+ TokenizerSuccessResult.new(tokens)
188
+ end
189
+ end
190
+ end
@@ -0,0 +1,62 @@
1
+ #--
2
+ # Copyright (c) 2006, 2007 Mushfeq Khan
3
+ #
4
+ # Permission is hereby granted, free of charge, to any person obtaining
5
+ # a copy of this software and associated documentation files (the
6
+ # "Software"), to deal in the Software without restriction, including
7
+ # without limitation the rights to use, copy, modify, merge, publish,
8
+ # distribute, sublicense, and/or sell copies of the Software, and to
9
+ # permit persons to whom the Software is furnished to do so, subject to
10
+ # the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be
13
+ # included in all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16
+ # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17
+ # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18
+ # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
19
+ # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
20
+ # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
21
+ # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22
+ #++
23
+
24
+ require 'set'
25
+ require 'logger'
26
+ require 'delegate'
27
+
28
+ %w[
29
+ dot/dot
30
+ grammar/grammar_symbol
31
+ grammar/production
32
+ grammar/closure_hash
33
+ grammar/grammar
34
+ grammar/precedence
35
+ parser/parse_tree
36
+ parser/parse_result
37
+ parser/item
38
+ parser/channel
39
+ parser/parser_methods
40
+ parser/parser_state
41
+ parser/conflict
42
+ parser/token
43
+ parser/action
44
+ parser/parser_run
45
+ parser/parser
46
+ parser/compiled_parser
47
+ tokenizer/tokenizer
48
+ evaluator/evaluator
49
+ lexer/accept_actions
50
+ lexer/alphabet
51
+ lexer/regex_grammar
52
+ lexer/regex_tokenizer
53
+ lexer/regex_parser
54
+ lexer/state_machine
55
+ lexer/dfa
56
+ lexer/state
57
+ lexer/specification
58
+ lexer/lexeme
59
+ lexer/lexer_run
60
+ lexer/lexer
61
+ lexer/compiled_lexer
62
+ ].each {|path| require File.join(File.dirname(__FILE__), 'dhaka/' + path)}
@@ -0,0 +1,5 @@
1
+ #!/usr/bin/env ruby
2
+ Dir['**/*test.rb'].each do |test_file|
3
+ puts test_file
4
+ require File.join(File.dirname(__FILE__), test_file)
5
+ end
@@ -0,0 +1,64 @@
1
+ require File.dirname(__FILE__) + '/arithmetic_grammar'
2
+
3
+ class ArithmeticEvaluator < Dhaka::Evaluator
4
+
5
+ self.grammar = ArithmeticGrammar
6
+
7
+ define_evaluation_rules do
8
+
9
+ for_subtraction do
10
+ evaluate(child_nodes[0]) - evaluate(child_nodes[2])
11
+ end
12
+
13
+ for_addition do
14
+ evaluate(child_nodes[0]) + evaluate(child_nodes[2])
15
+ end
16
+
17
+ for_division do
18
+ evaluate(child_nodes[0]).to_f/evaluate(child_nodes[2])
19
+ end
20
+
21
+ for_multiplication do
22
+ evaluate(child_nodes[0]) * evaluate(child_nodes[2])
23
+ end
24
+
25
+ for_getting_literals do
26
+ child_nodes[0].token.value
27
+ end
28
+
29
+ for_unpacking_parenthetized_expression do
30
+ evaluate(child_nodes[1])
31
+ end
32
+
33
+ for_empty_args do
34
+ []
35
+ end
36
+
37
+ for_evaluating_function do
38
+ evaluate(child_nodes[0]).call evaluate(child_nodes[2])
39
+ end
40
+
41
+ for_concatenating_args do
42
+ [evaluate(child_nodes[0])]+evaluate(child_nodes[2])
43
+ end
44
+
45
+ for_single_args do
46
+ [evaluate(child_nodes[0])]
47
+ end
48
+
49
+ for_min_function do
50
+ @min_function
51
+ end
52
+
53
+ for_max_function do
54
+ @max_function
55
+ end
56
+
57
+ end
58
+
59
+ def initialize(min_function, max_function)
60
+ @min_function = min_function
61
+ @max_function = max_function
62
+ end
63
+
64
+ end