foreverman-dhaka 2.2.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (84) hide show
  1. data/Rakefile +64 -0
  2. data/lib/dhaka.rb +62 -0
  3. data/lib/dhaka/dot/dot.rb +29 -0
  4. data/lib/dhaka/evaluator/evaluator.rb +133 -0
  5. data/lib/dhaka/grammar/closure_hash.rb +15 -0
  6. data/lib/dhaka/grammar/grammar.rb +236 -0
  7. data/lib/dhaka/grammar/grammar_symbol.rb +27 -0
  8. data/lib/dhaka/grammar/precedence.rb +19 -0
  9. data/lib/dhaka/grammar/production.rb +36 -0
  10. data/lib/dhaka/lexer/accept_actions.rb +36 -0
  11. data/lib/dhaka/lexer/alphabet.rb +21 -0
  12. data/lib/dhaka/lexer/compiled_lexer.rb +46 -0
  13. data/lib/dhaka/lexer/dfa.rb +121 -0
  14. data/lib/dhaka/lexer/lexeme.rb +32 -0
  15. data/lib/dhaka/lexer/lexer.rb +70 -0
  16. data/lib/dhaka/lexer/lexer_run.rb +78 -0
  17. data/lib/dhaka/lexer/regex_grammar.rb +393 -0
  18. data/lib/dhaka/lexer/regex_parser.rb +2010 -0
  19. data/lib/dhaka/lexer/regex_tokenizer.rb +14 -0
  20. data/lib/dhaka/lexer/specification.rb +96 -0
  21. data/lib/dhaka/lexer/state.rb +68 -0
  22. data/lib/dhaka/lexer/state_machine.rb +37 -0
  23. data/lib/dhaka/parser/action.rb +55 -0
  24. data/lib/dhaka/parser/channel.rb +58 -0
  25. data/lib/dhaka/parser/compiled_parser.rb +51 -0
  26. data/lib/dhaka/parser/conflict.rb +54 -0
  27. data/lib/dhaka/parser/item.rb +43 -0
  28. data/lib/dhaka/parser/parse_result.rb +50 -0
  29. data/lib/dhaka/parser/parse_tree.rb +66 -0
  30. data/lib/dhaka/parser/parser.rb +165 -0
  31. data/lib/dhaka/parser/parser_methods.rb +11 -0
  32. data/lib/dhaka/parser/parser_run.rb +39 -0
  33. data/lib/dhaka/parser/parser_state.rb +74 -0
  34. data/lib/dhaka/parser/token.rb +22 -0
  35. data/lib/dhaka/runtime.rb +51 -0
  36. data/lib/dhaka/tokenizer/tokenizer.rb +190 -0
  37. data/test/all_tests.rb +5 -0
  38. data/test/arithmetic/arithmetic_evaluator.rb +64 -0
  39. data/test/arithmetic/arithmetic_evaluator_test.rb +43 -0
  40. data/test/arithmetic/arithmetic_grammar.rb +41 -0
  41. data/test/arithmetic/arithmetic_grammar_test.rb +9 -0
  42. data/test/arithmetic/arithmetic_test_methods.rb +9 -0
  43. data/test/arithmetic/arithmetic_tokenizer.rb +39 -0
  44. data/test/arithmetic/arithmetic_tokenizer_test.rb +38 -0
  45. data/test/arithmetic_precedence/arithmetic_precedence_evaluator.rb +43 -0
  46. data/test/arithmetic_precedence/arithmetic_precedence_grammar.rb +24 -0
  47. data/test/arithmetic_precedence/arithmetic_precedence_grammar_test.rb +30 -0
  48. data/test/arithmetic_precedence/arithmetic_precedence_lexer_specification.rb +23 -0
  49. data/test/arithmetic_precedence/arithmetic_precedence_parser_test.rb +33 -0
  50. data/test/brackets/bracket_grammar.rb +23 -0
  51. data/test/brackets/bracket_tokenizer.rb +22 -0
  52. data/test/brackets/brackets_test.rb +28 -0
  53. data/test/chittagong/chittagong_driver.rb +46 -0
  54. data/test/chittagong/chittagong_driver_test.rb +276 -0
  55. data/test/chittagong/chittagong_evaluator.rb +284 -0
  56. data/test/chittagong/chittagong_evaluator_test.rb +38 -0
  57. data/test/chittagong/chittagong_grammar.rb +104 -0
  58. data/test/chittagong/chittagong_lexer.rb +109 -0
  59. data/test/chittagong/chittagong_lexer_specification.rb +37 -0
  60. data/test/chittagong/chittagong_lexer_test.rb +58 -0
  61. data/test/chittagong/chittagong_parser.rb +879 -0
  62. data/test/chittagong/chittagong_parser_test.rb +55 -0
  63. data/test/chittagong/chittagong_test.rb +170 -0
  64. data/test/core/another_lalr_but_not_slr_grammar.rb +20 -0
  65. data/test/core/compiled_parser_test.rb +44 -0
  66. data/test/core/dfa_test.rb +170 -0
  67. data/test/core/evaluator_test.rb +22 -0
  68. data/test/core/grammar_test.rb +83 -0
  69. data/test/core/lalr_but_not_slr_grammar.rb +19 -0
  70. data/test/core/lexer_test.rb +139 -0
  71. data/test/core/malformed_grammar.rb +7 -0
  72. data/test/core/malformed_grammar_test.rb +8 -0
  73. data/test/core/nullable_grammar.rb +21 -0
  74. data/test/core/parse_result_test.rb +44 -0
  75. data/test/core/parser_state_test.rb +24 -0
  76. data/test/core/parser_test.rb +131 -0
  77. data/test/core/precedence_grammar.rb +17 -0
  78. data/test/core/precedence_grammar_test.rb +9 -0
  79. data/test/core/rr_conflict_grammar.rb +21 -0
  80. data/test/core/simple_grammar.rb +22 -0
  81. data/test/core/sr_conflict_grammar.rb +16 -0
  82. data/test/dhaka_test_helper.rb +18 -0
  83. data/test/fake_logger.rb +17 -0
  84. metadata +137 -0
@@ -0,0 +1,190 @@
1
+ module Dhaka
2
+ # Reserved constant used to identify the idle state of the tokenizer.
3
+ TOKENIZER_IDLE_STATE = :idle_state
4
+
5
+ # Returned on successful tokenizing of the input stream. Supports iteration by including Enumerable, so it can
6
+ # be passed in directly to the parser.
7
+ class TokenizerSuccessResult
8
+ include Enumerable
9
+
10
+ def initialize(tokens)
11
+ @tokens = tokens
12
+ end
13
+
14
+ # Returns false.
15
+ def has_error?
16
+ false
17
+ end
18
+
19
+ def each(&block)
20
+ @tokens.each(&block)
21
+ end
22
+ end
23
+
24
+ # Returned when tokenizing fails due to an unexpected character in the input stream.
25
+ class TokenizerErrorResult
26
+ # The index of the character that caused the error.
27
+ attr_reader :unexpected_char_index
28
+
29
+ def initialize(unexpected_char_index)
30
+ @unexpected_char_index = unexpected_char_index
31
+ end
32
+
33
+ # Returns true.
34
+ def has_error?
35
+ true
36
+ end
37
+ end
38
+
39
+ # A tokenizer state encapsulates actions that should be performed upon
40
+ # encountering each permissible character for that state.
41
+ class TokenizerState
42
+ attr_reader :actions, :default_action
43
+
44
+ def initialize
45
+ @actions = {}
46
+ end
47
+
48
+ # Define the action (+blk+) to be performed when encountering any of +characters+ in the token stream.
49
+ def for_characters(characters, &blk)
50
+ characters.each do |character|
51
+ actions[character] = blk
52
+ end
53
+ end
54
+
55
+ alias for_character for_characters
56
+
57
+ # define the action (+blk+) to be performed for any +characters+ that don't have an action to perform.
58
+ def for_default(&blk)
59
+ @default_action = blk
60
+ end
61
+
62
+ def to_s #:nodoc:
63
+ actions.inspect
64
+ end
65
+
66
+ end
67
+
68
+ # This abstract class contains a DSL for hand-coding tokenizers. Subclass it to implement tokenizers for specific grammars.
69
+ #
70
+ # Tokenizers are state machines. Each state of a tokenizer is identified
71
+ # by a Ruby symbol. The constant Dhaka::TOKENIZER_IDLE_STATE is reserved for the idle state of the tokenizer (the one
72
+ # that it starts in).
73
+ #
74
+ # The following is a tokenizer for arithmetic expressions with integer terms. The tokenizer starts in the idle state
75
+ # creating single-character tokens for all characters excepts digits and whitespace. It shifts to
76
+ # <tt>:get_integer_literal</tt> when it encounters a digit character and creates a token on the stack on which it
77
+ # accumulates the value of the literal. When it again encounters a non-digit character, it shifts back to idle.
78
+ # Whitespace is treated as a delimiter, but not shifted as a token.
79
+ #
80
+ # class ArithmeticPrecedenceTokenizer < Dhaka::Tokenizer
81
+ #
82
+ # digits = ('0'..'9').to_a
83
+ # parenths = ['(', ')']
84
+ # operators = ['-', '+', '/', '*', '^']
85
+ # functions = ['h', 'l']
86
+ # arg_separator = [',']
87
+ # whitespace = [' ']
88
+ #
89
+ # all_characters = digits + parenths + operators + functions + arg_separator + whitespace
90
+ #
91
+ # for_state Dhaka::TOKENIZER_IDLE_STATE do
92
+ # for_characters(all_characters - (digits + whitespace)) do
93
+ # create_token(curr_char, nil)
94
+ # advance
95
+ # end
96
+ # for_characters digits do
97
+ # create_token('n', '')
98
+ # switch_to :get_integer_literal
99
+ # end
100
+ # for_character whitespace do
101
+ # advance
102
+ # end
103
+ # end
104
+ #
105
+ # for_state :get_integer_literal do
106
+ # for_characters all_characters - digits do
107
+ # switch_to Dhaka::TOKENIZER_IDLE_STATE
108
+ # end
109
+ # for_characters digits do
110
+ # curr_token.value << curr_char
111
+ # advance
112
+ # end
113
+ # end
114
+ #
115
+ # end
116
+ #
117
+ # For languages where the lexical structure is very complicated, it may be too tedious to implement a Tokenizer by hand.
118
+ # In such cases, it's a lot easier to write a LexerSpecification using regular expressions and create a Lexer from that.
119
+ class Tokenizer
120
+ class << self
121
+ # Define the action for the state named +state_name+.
122
+ def for_state(state_name, &blk)
123
+ states[state_name].instance_eval(&blk)
124
+ end
125
+
126
+ # Tokenizes a string +input+ and returns a TokenizerErrorResult on failure or a TokenizerSuccessResult on sucess.
127
+ def tokenize(input)
128
+ new(input).run
129
+ end
130
+
131
+ private
132
+ def inherited(tokenizer)
133
+ class << tokenizer
134
+ attr_accessor :states, :grammar
135
+ end
136
+ tokenizer.states = Hash.new {|hash, key| hash[key] = TokenizerState.new}
137
+ end
138
+ end
139
+
140
+ # The tokens shifted so far.
141
+ attr_reader :tokens
142
+
143
+ def initialize(input) #:nodoc:
144
+ @input = input
145
+ @current_state = self.class.states[TOKENIZER_IDLE_STATE]
146
+ @curr_char_index = 0
147
+ @tokens = []
148
+ end
149
+
150
+ # The character currently being processed.
151
+ def curr_char
152
+ @input[@curr_char_index] and @input[@curr_char_index].chr
153
+ end
154
+
155
+ # Advance to the next character.
156
+ def advance
157
+ @curr_char_index += 1
158
+ end
159
+
160
+ def inspect
161
+ "<Dhaka::Tokenizer grammar : #{grammar}>"
162
+ end
163
+
164
+ # The token currently on top of the stack.
165
+ def curr_token
166
+ tokens.last
167
+ end
168
+
169
+ # Push a new token on to the stack with symbol corresponding to +symbol_name+ and a value of +value+.
170
+ def create_token(symbol_name, value)
171
+ new_token = Dhaka::Token.new(symbol_name, value, @curr_char_index)
172
+ tokens << new_token
173
+ end
174
+
175
+ # Change the active state of the tokenizer to the state identified by the symbol +state_name+.
176
+ def switch_to state_name
177
+ @current_state = self.class.states[state_name]
178
+ end
179
+
180
+ def run #:nodoc:
181
+ while curr_char
182
+ blk = @current_state.actions[curr_char] || @current_state.default_action
183
+ return TokenizerErrorResult.new(@curr_char_index) unless blk
184
+ instance_eval(&blk)
185
+ end
186
+ tokens << Dhaka::Token.new(Dhaka::END_SYMBOL_NAME, nil, nil)
187
+ TokenizerSuccessResult.new(tokens)
188
+ end
189
+ end
190
+ end
data/test/all_tests.rb ADDED
@@ -0,0 +1,5 @@
1
+ #!/usr/bin/env ruby
2
+ Dir['**/*test.rb'].each do |test_file|
3
+ puts test_file
4
+ require File.join(File.dirname(__FILE__), test_file)
5
+ end
@@ -0,0 +1,64 @@
1
+ require File.dirname(__FILE__) + '/arithmetic_grammar'
2
+
3
+ class ArithmeticEvaluator < Dhaka::Evaluator
4
+
5
+ self.grammar = ArithmeticGrammar
6
+
7
+ define_evaluation_rules do
8
+
9
+ for_subtraction do
10
+ evaluate(child_nodes[0]) - evaluate(child_nodes[2])
11
+ end
12
+
13
+ for_addition do
14
+ evaluate(child_nodes[0]) + evaluate(child_nodes[2])
15
+ end
16
+
17
+ for_division do
18
+ evaluate(child_nodes[0]).to_f/evaluate(child_nodes[2])
19
+ end
20
+
21
+ for_multiplication do
22
+ evaluate(child_nodes[0]) * evaluate(child_nodes[2])
23
+ end
24
+
25
+ for_getting_literals do
26
+ child_nodes[0].token.value
27
+ end
28
+
29
+ for_unpacking_parenthetized_expression do
30
+ evaluate(child_nodes[1])
31
+ end
32
+
33
+ for_empty_args do
34
+ []
35
+ end
36
+
37
+ for_evaluating_function do
38
+ evaluate(child_nodes[0]).call evaluate(child_nodes[2])
39
+ end
40
+
41
+ for_concatenating_args do
42
+ [evaluate(child_nodes[0])]+evaluate(child_nodes[2])
43
+ end
44
+
45
+ for_single_args do
46
+ [evaluate(child_nodes[0])]
47
+ end
48
+
49
+ for_min_function do
50
+ @min_function
51
+ end
52
+
53
+ for_max_function do
54
+ @max_function
55
+ end
56
+
57
+ end
58
+
59
+ def initialize(min_function, max_function)
60
+ @min_function = min_function
61
+ @max_function = max_function
62
+ end
63
+
64
+ end
@@ -0,0 +1,43 @@
1
+ require File.dirname(__FILE__) + '/../dhaka_test_helper'
2
+ require File.dirname(__FILE__) + '/arithmetic_evaluator'
3
+ require File.dirname(__FILE__) + '/arithmetic_test_methods'
4
+ eval(Dhaka::Parser.new(ArithmeticGrammar).compile_to_ruby_source_as(:CompiledArithmeticParser))
5
+
6
+ class TestArithmeticEvaluator < Test::Unit::TestCase
7
+ include ArithmeticTestMethods
8
+
9
+ def setup
10
+ @min_func = Proc.new {|args| args.inject {|min, elem| min = (elem < min ? elem : min)}}
11
+ @max_func = Proc.new {|args| args.inject {|max, elem| max = (elem > max ? elem : max)}}
12
+ end
13
+
14
+ def test_results_simple_arithmetic_given_tokens_and_parse_tree_1
15
+ token_stream = [token('n', 2), token('-', nil), token('n', 4), token(Dhaka::END_SYMBOL_NAME, nil)]
16
+ parse_tree = parse(token_stream)
17
+ assert_equal -2, ArithmeticEvaluator.new(@min_func, @max_func).evaluate(parse_tree)
18
+ end
19
+
20
+ def test_results_simple_arithmetic_given_tokens_and_parse_tree_2
21
+ token_stream = [token('n', 2), token('-', nil), token('(', nil), token('n', 3), token('/', nil), token('n', 4), token(')', nil), token(Dhaka::END_SYMBOL_NAME, nil)]
22
+ parse_tree = parse(token_stream)
23
+ assert_equal 1.25, ArithmeticEvaluator.new(@min_func, @max_func).evaluate(parse_tree)
24
+ end
25
+
26
+ def test_results_simple_arithmetic_given_tokens_and_parse_tree_3
27
+ token_stream = [token('n', 2), token('+', nil), token('(', nil), token('n', 3), token('/', nil), token('(', nil), token('n', 7), token('-', nil), token('n', 5), token(')', nil) , token(')', nil), token(Dhaka::END_SYMBOL_NAME, nil)]
28
+ parse_tree = parse(token_stream)
29
+ assert_equal 3.5, ArithmeticEvaluator.new(@min_func, @max_func).evaluate(parse_tree)
30
+ end
31
+
32
+ def test_results_simple_arithmetic_given_tokens_and_parse_tree_4
33
+ token_stream = [token('n', 2), token('+', nil), token('h', nil), token('(', nil), token('n', 3), token(',', nil), token('n', 4), token(')', nil), token(Dhaka::END_SYMBOL_NAME, nil)]
34
+ parse_tree = parse(token_stream)
35
+ assert_equal 6, ArithmeticEvaluator.new(@min_func, @max_func).evaluate(parse_tree)
36
+ end
37
+
38
+ def test_results_simple_arithmetic_given_tokens_and_parse_tree_5
39
+ token_stream = [token('n', 2), token('+', nil), token('l', nil), token('(', nil), token('n', 3), token(',', nil), token('n', 4), token(')', nil), token(Dhaka::END_SYMBOL_NAME, nil)]
40
+ parse_tree = parse(token_stream)
41
+ assert_equal 5, ArithmeticEvaluator.new(@min_func, @max_func).evaluate(parse_tree)
42
+ end
43
+ end
@@ -0,0 +1,41 @@
1
+ class ArithmeticGrammar < Dhaka::Grammar
2
+
3
+ for_symbol(Dhaka::START_SYMBOL_NAME) do
4
+ expression %w| E |
5
+ end
6
+
7
+ for_symbol('E') do
8
+ subtraction %w| E - T |
9
+ addition %w| E + T |
10
+ term %w| T |
11
+ end
12
+
13
+ for_symbol('T') do
14
+ factor %w| F |
15
+ division %w| T / F |
16
+ multiplication %w| T * F |
17
+ end
18
+
19
+ for_symbol('F') do
20
+ getting_literals %w| n |
21
+ unpacking_parenthetized_expression %w| ( E ) |
22
+ function %w| Function |
23
+ end
24
+
25
+ for_symbol('Function') do
26
+ evaluating_function %w| FunctionName ( Args ) |
27
+ end
28
+
29
+ for_symbol('FunctionName') do
30
+ max_function %w| h |
31
+ min_function %w| l |
32
+ end
33
+
34
+ for_symbol('Args') do
35
+ empty_args %w||
36
+ single_args %w| E |
37
+ concatenating_args %w| E , Args |
38
+ end
39
+
40
+ end
41
+
@@ -0,0 +1,9 @@
1
+ require File.dirname(__FILE__) + '/../dhaka_test_helper'
2
+ require File.dirname(__FILE__) + '/arithmetic_grammar'
3
+
4
+ class ArithmeticGrammarTest < Test::Unit::TestCase
5
+ def test_first_with_nullable_non_terminals
6
+ grammar = ArithmeticGrammar
7
+ assert_equal(Set.new(['(', 'n', 'h', 'l']), Set.new(grammar.first(grammar.symbol_for_name('Args')).collect { |symbol| symbol.name }))
8
+ end
9
+ end
@@ -0,0 +1,9 @@
1
+ module ArithmeticTestMethods
2
+ def parse(token_stream)
3
+ CompiledArithmeticParser.parse(token_stream)
4
+ end
5
+
6
+ def token(symbol_name, value)
7
+ Dhaka::Token.new(symbol_name, value, nil)
8
+ end
9
+ end
@@ -0,0 +1,39 @@
1
+ require File.dirname(__FILE__) + '/arithmetic_grammar'
2
+
3
+ class ArithmeticTokenizer < Dhaka::Tokenizer
4
+
5
+ digits = ('0'..'9').to_a
6
+ parenths = %w| ( ) |
7
+ operators = %w| - + / * |
8
+ functions = %w| h l |
9
+ arg_separator = %w| , |
10
+ whitespace = [' ']
11
+
12
+ all_characters = digits + parenths + operators + functions + arg_separator + whitespace
13
+
14
+ for_state Dhaka::TOKENIZER_IDLE_STATE do
15
+ for_characters(all_characters - (digits + whitespace)) do
16
+ create_token(curr_char, nil)
17
+ advance
18
+ end
19
+ for_characters digits do
20
+ create_token('n', '')
21
+ switch_to :get_integer_literal
22
+ end
23
+ for_character whitespace do
24
+ advance
25
+ end
26
+ end
27
+
28
+ for_state :get_integer_literal do
29
+ for_characters all_characters - digits do
30
+ switch_to Dhaka::TOKENIZER_IDLE_STATE
31
+ end
32
+ for_characters digits do
33
+ curr_token.value << curr_char
34
+ advance
35
+ end
36
+ end
37
+
38
+ end
39
+
@@ -0,0 +1,38 @@
1
+ require File.dirname(__FILE__) + '/../dhaka_test_helper'
2
+ require File.dirname(__FILE__) + "/arithmetic_tokenizer"
3
+
4
+ class TestArithmeticTokenizer < Test::Unit::TestCase
5
+ def test_returns_end_of_input_token_for_empty_input
6
+ assert_equal([token(Dhaka::END_SYMBOL_NAME, nil)], ArithmeticTokenizer.tokenize([]).to_a)
7
+ end
8
+
9
+ def test_tokenizes_given_a_string_input
10
+ assert_equal([token('n', 2), token('-', nil), token('n', 4), token(Dhaka::END_SYMBOL_NAME, nil)], ArithmeticTokenizer.tokenize('2 - 4').to_a)
11
+ end
12
+
13
+ def test_a_longer_input
14
+ actual = ArithmeticTokenizer.tokenize('2+(3 / (7 - 5))').to_a
15
+ assert_equal([token('n', 2), token('+', nil), token('(', nil), token('n', 3), token('/', nil), token('(', nil), token('n', 7), token('-', nil), token('n', 5), token(')', nil) , token(')', nil), token(Dhaka::END_SYMBOL_NAME, nil)], actual)
16
+ end
17
+
18
+ def test_another_input_with_multi_digit_numbers
19
+ actual = ArithmeticTokenizer.tokenize('2034 +(3433 / (7 - 5))').to_a
20
+ assert_equal([token('n', 2034), token('+', nil), token('(', nil), token('n', 3433), token('/', nil), token('(', nil), token('n', 7), token('-', nil), token('n', 5), token(')', nil) , token(')', nil), token(Dhaka::END_SYMBOL_NAME, nil)], actual)
21
+ end
22
+
23
+ def test_an_input_with_unrecognized_characters
24
+ result = ArithmeticTokenizer.tokenize('2+(3 / (7 -& 5))')
25
+ assert(result.has_error?)
26
+ assert_equal(11, result.unexpected_char_index)
27
+ end
28
+
29
+ def test_another_input_with_illegal_characters
30
+ result = ArithmeticTokenizer.tokenize('2034 +(34b3 / (7 - 5))')
31
+ assert(result.has_error?)
32
+ assert_equal(9, result.unexpected_char_index)
33
+ end
34
+
35
+ def token(symbol_name, value)
36
+ Dhaka::Token.new(symbol_name, value ? value.to_s : nil, nil)
37
+ end
38
+ end