foreverman-dhaka 2.2.1
Sign up to get free protection for your applications and to get access to all the features.
- data/Rakefile +64 -0
- data/lib/dhaka.rb +62 -0
- data/lib/dhaka/dot/dot.rb +29 -0
- data/lib/dhaka/evaluator/evaluator.rb +133 -0
- data/lib/dhaka/grammar/closure_hash.rb +15 -0
- data/lib/dhaka/grammar/grammar.rb +236 -0
- data/lib/dhaka/grammar/grammar_symbol.rb +27 -0
- data/lib/dhaka/grammar/precedence.rb +19 -0
- data/lib/dhaka/grammar/production.rb +36 -0
- data/lib/dhaka/lexer/accept_actions.rb +36 -0
- data/lib/dhaka/lexer/alphabet.rb +21 -0
- data/lib/dhaka/lexer/compiled_lexer.rb +46 -0
- data/lib/dhaka/lexer/dfa.rb +121 -0
- data/lib/dhaka/lexer/lexeme.rb +32 -0
- data/lib/dhaka/lexer/lexer.rb +70 -0
- data/lib/dhaka/lexer/lexer_run.rb +78 -0
- data/lib/dhaka/lexer/regex_grammar.rb +393 -0
- data/lib/dhaka/lexer/regex_parser.rb +2010 -0
- data/lib/dhaka/lexer/regex_tokenizer.rb +14 -0
- data/lib/dhaka/lexer/specification.rb +96 -0
- data/lib/dhaka/lexer/state.rb +68 -0
- data/lib/dhaka/lexer/state_machine.rb +37 -0
- data/lib/dhaka/parser/action.rb +55 -0
- data/lib/dhaka/parser/channel.rb +58 -0
- data/lib/dhaka/parser/compiled_parser.rb +51 -0
- data/lib/dhaka/parser/conflict.rb +54 -0
- data/lib/dhaka/parser/item.rb +43 -0
- data/lib/dhaka/parser/parse_result.rb +50 -0
- data/lib/dhaka/parser/parse_tree.rb +66 -0
- data/lib/dhaka/parser/parser.rb +165 -0
- data/lib/dhaka/parser/parser_methods.rb +11 -0
- data/lib/dhaka/parser/parser_run.rb +39 -0
- data/lib/dhaka/parser/parser_state.rb +74 -0
- data/lib/dhaka/parser/token.rb +22 -0
- data/lib/dhaka/runtime.rb +51 -0
- data/lib/dhaka/tokenizer/tokenizer.rb +190 -0
- data/test/all_tests.rb +5 -0
- data/test/arithmetic/arithmetic_evaluator.rb +64 -0
- data/test/arithmetic/arithmetic_evaluator_test.rb +43 -0
- data/test/arithmetic/arithmetic_grammar.rb +41 -0
- data/test/arithmetic/arithmetic_grammar_test.rb +9 -0
- data/test/arithmetic/arithmetic_test_methods.rb +9 -0
- data/test/arithmetic/arithmetic_tokenizer.rb +39 -0
- data/test/arithmetic/arithmetic_tokenizer_test.rb +38 -0
- data/test/arithmetic_precedence/arithmetic_precedence_evaluator.rb +43 -0
- data/test/arithmetic_precedence/arithmetic_precedence_grammar.rb +24 -0
- data/test/arithmetic_precedence/arithmetic_precedence_grammar_test.rb +30 -0
- data/test/arithmetic_precedence/arithmetic_precedence_lexer_specification.rb +23 -0
- data/test/arithmetic_precedence/arithmetic_precedence_parser_test.rb +33 -0
- data/test/brackets/bracket_grammar.rb +23 -0
- data/test/brackets/bracket_tokenizer.rb +22 -0
- data/test/brackets/brackets_test.rb +28 -0
- data/test/chittagong/chittagong_driver.rb +46 -0
- data/test/chittagong/chittagong_driver_test.rb +276 -0
- data/test/chittagong/chittagong_evaluator.rb +284 -0
- data/test/chittagong/chittagong_evaluator_test.rb +38 -0
- data/test/chittagong/chittagong_grammar.rb +104 -0
- data/test/chittagong/chittagong_lexer.rb +109 -0
- data/test/chittagong/chittagong_lexer_specification.rb +37 -0
- data/test/chittagong/chittagong_lexer_test.rb +58 -0
- data/test/chittagong/chittagong_parser.rb +879 -0
- data/test/chittagong/chittagong_parser_test.rb +55 -0
- data/test/chittagong/chittagong_test.rb +170 -0
- data/test/core/another_lalr_but_not_slr_grammar.rb +20 -0
- data/test/core/compiled_parser_test.rb +44 -0
- data/test/core/dfa_test.rb +170 -0
- data/test/core/evaluator_test.rb +22 -0
- data/test/core/grammar_test.rb +83 -0
- data/test/core/lalr_but_not_slr_grammar.rb +19 -0
- data/test/core/lexer_test.rb +139 -0
- data/test/core/malformed_grammar.rb +7 -0
- data/test/core/malformed_grammar_test.rb +8 -0
- data/test/core/nullable_grammar.rb +21 -0
- data/test/core/parse_result_test.rb +44 -0
- data/test/core/parser_state_test.rb +24 -0
- data/test/core/parser_test.rb +131 -0
- data/test/core/precedence_grammar.rb +17 -0
- data/test/core/precedence_grammar_test.rb +9 -0
- data/test/core/rr_conflict_grammar.rb +21 -0
- data/test/core/simple_grammar.rb +22 -0
- data/test/core/sr_conflict_grammar.rb +16 -0
- data/test/dhaka_test_helper.rb +18 -0
- data/test/fake_logger.rb +17 -0
- metadata +137 -0
@@ -0,0 +1,190 @@
|
|
1
|
+
module Dhaka
|
2
|
+
# Reserved constant used to identify the idle state of the tokenizer.
|
3
|
+
TOKENIZER_IDLE_STATE = :idle_state
|
4
|
+
|
5
|
+
# Returned on successful tokenizing of the input stream. Supports iteration by including Enumerable, so it can
|
6
|
+
# be passed in directly to the parser.
|
7
|
+
class TokenizerSuccessResult
|
8
|
+
include Enumerable
|
9
|
+
|
10
|
+
def initialize(tokens)
|
11
|
+
@tokens = tokens
|
12
|
+
end
|
13
|
+
|
14
|
+
# Returns false.
|
15
|
+
def has_error?
|
16
|
+
false
|
17
|
+
end
|
18
|
+
|
19
|
+
def each(&block)
|
20
|
+
@tokens.each(&block)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
# Returned when tokenizing fails due to an unexpected character in the input stream.
|
25
|
+
class TokenizerErrorResult
|
26
|
+
# The index of the character that caused the error.
|
27
|
+
attr_reader :unexpected_char_index
|
28
|
+
|
29
|
+
def initialize(unexpected_char_index)
|
30
|
+
@unexpected_char_index = unexpected_char_index
|
31
|
+
end
|
32
|
+
|
33
|
+
# Returns true.
|
34
|
+
def has_error?
|
35
|
+
true
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
# A tokenizer state encapsulates actions that should be performed upon
|
40
|
+
# encountering each permissible character for that state.
|
41
|
+
class TokenizerState
|
42
|
+
attr_reader :actions, :default_action
|
43
|
+
|
44
|
+
def initialize
|
45
|
+
@actions = {}
|
46
|
+
end
|
47
|
+
|
48
|
+
# Define the action (+blk+) to be performed when encountering any of +characters+ in the token stream.
|
49
|
+
def for_characters(characters, &blk)
|
50
|
+
characters.each do |character|
|
51
|
+
actions[character] = blk
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
alias for_character for_characters
|
56
|
+
|
57
|
+
# define the action (+blk+) to be performed for any +characters+ that don't have an action to perform.
|
58
|
+
def for_default(&blk)
|
59
|
+
@default_action = blk
|
60
|
+
end
|
61
|
+
|
62
|
+
def to_s #:nodoc:
|
63
|
+
actions.inspect
|
64
|
+
end
|
65
|
+
|
66
|
+
end
|
67
|
+
|
68
|
+
# This abstract class contains a DSL for hand-coding tokenizers. Subclass it to implement tokenizers for specific grammars.
|
69
|
+
#
|
70
|
+
# Tokenizers are state machines. Each state of a tokenizer is identified
|
71
|
+
# by a Ruby symbol. The constant Dhaka::TOKENIZER_IDLE_STATE is reserved for the idle state of the tokenizer (the one
|
72
|
+
# that it starts in).
|
73
|
+
#
|
74
|
+
# The following is a tokenizer for arithmetic expressions with integer terms. The tokenizer starts in the idle state
|
75
|
+
# creating single-character tokens for all characters excepts digits and whitespace. It shifts to
|
76
|
+
# <tt>:get_integer_literal</tt> when it encounters a digit character and creates a token on the stack on which it
|
77
|
+
# accumulates the value of the literal. When it again encounters a non-digit character, it shifts back to idle.
|
78
|
+
# Whitespace is treated as a delimiter, but not shifted as a token.
|
79
|
+
#
|
80
|
+
# class ArithmeticPrecedenceTokenizer < Dhaka::Tokenizer
|
81
|
+
#
|
82
|
+
# digits = ('0'..'9').to_a
|
83
|
+
# parenths = ['(', ')']
|
84
|
+
# operators = ['-', '+', '/', '*', '^']
|
85
|
+
# functions = ['h', 'l']
|
86
|
+
# arg_separator = [',']
|
87
|
+
# whitespace = [' ']
|
88
|
+
#
|
89
|
+
# all_characters = digits + parenths + operators + functions + arg_separator + whitespace
|
90
|
+
#
|
91
|
+
# for_state Dhaka::TOKENIZER_IDLE_STATE do
|
92
|
+
# for_characters(all_characters - (digits + whitespace)) do
|
93
|
+
# create_token(curr_char, nil)
|
94
|
+
# advance
|
95
|
+
# end
|
96
|
+
# for_characters digits do
|
97
|
+
# create_token('n', '')
|
98
|
+
# switch_to :get_integer_literal
|
99
|
+
# end
|
100
|
+
# for_character whitespace do
|
101
|
+
# advance
|
102
|
+
# end
|
103
|
+
# end
|
104
|
+
#
|
105
|
+
# for_state :get_integer_literal do
|
106
|
+
# for_characters all_characters - digits do
|
107
|
+
# switch_to Dhaka::TOKENIZER_IDLE_STATE
|
108
|
+
# end
|
109
|
+
# for_characters digits do
|
110
|
+
# curr_token.value << curr_char
|
111
|
+
# advance
|
112
|
+
# end
|
113
|
+
# end
|
114
|
+
#
|
115
|
+
# end
|
116
|
+
#
|
117
|
+
# For languages where the lexical structure is very complicated, it may be too tedious to implement a Tokenizer by hand.
|
118
|
+
# In such cases, it's a lot easier to write a LexerSpecification using regular expressions and create a Lexer from that.
|
119
|
+
class Tokenizer
|
120
|
+
class << self
|
121
|
+
# Define the action for the state named +state_name+.
|
122
|
+
def for_state(state_name, &blk)
|
123
|
+
states[state_name].instance_eval(&blk)
|
124
|
+
end
|
125
|
+
|
126
|
+
# Tokenizes a string +input+ and returns a TokenizerErrorResult on failure or a TokenizerSuccessResult on sucess.
|
127
|
+
def tokenize(input)
|
128
|
+
new(input).run
|
129
|
+
end
|
130
|
+
|
131
|
+
private
|
132
|
+
def inherited(tokenizer)
|
133
|
+
class << tokenizer
|
134
|
+
attr_accessor :states, :grammar
|
135
|
+
end
|
136
|
+
tokenizer.states = Hash.new {|hash, key| hash[key] = TokenizerState.new}
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
140
|
+
# The tokens shifted so far.
|
141
|
+
attr_reader :tokens
|
142
|
+
|
143
|
+
def initialize(input) #:nodoc:
|
144
|
+
@input = input
|
145
|
+
@current_state = self.class.states[TOKENIZER_IDLE_STATE]
|
146
|
+
@curr_char_index = 0
|
147
|
+
@tokens = []
|
148
|
+
end
|
149
|
+
|
150
|
+
# The character currently being processed.
|
151
|
+
def curr_char
|
152
|
+
@input[@curr_char_index] and @input[@curr_char_index].chr
|
153
|
+
end
|
154
|
+
|
155
|
+
# Advance to the next character.
|
156
|
+
def advance
|
157
|
+
@curr_char_index += 1
|
158
|
+
end
|
159
|
+
|
160
|
+
def inspect
|
161
|
+
"<Dhaka::Tokenizer grammar : #{grammar}>"
|
162
|
+
end
|
163
|
+
|
164
|
+
# The token currently on top of the stack.
|
165
|
+
def curr_token
|
166
|
+
tokens.last
|
167
|
+
end
|
168
|
+
|
169
|
+
# Push a new token on to the stack with symbol corresponding to +symbol_name+ and a value of +value+.
|
170
|
+
def create_token(symbol_name, value)
|
171
|
+
new_token = Dhaka::Token.new(symbol_name, value, @curr_char_index)
|
172
|
+
tokens << new_token
|
173
|
+
end
|
174
|
+
|
175
|
+
# Change the active state of the tokenizer to the state identified by the symbol +state_name+.
|
176
|
+
def switch_to state_name
|
177
|
+
@current_state = self.class.states[state_name]
|
178
|
+
end
|
179
|
+
|
180
|
+
def run #:nodoc:
|
181
|
+
while curr_char
|
182
|
+
blk = @current_state.actions[curr_char] || @current_state.default_action
|
183
|
+
return TokenizerErrorResult.new(@curr_char_index) unless blk
|
184
|
+
instance_eval(&blk)
|
185
|
+
end
|
186
|
+
tokens << Dhaka::Token.new(Dhaka::END_SYMBOL_NAME, nil, nil)
|
187
|
+
TokenizerSuccessResult.new(tokens)
|
188
|
+
end
|
189
|
+
end
|
190
|
+
end
|
data/test/all_tests.rb
ADDED
@@ -0,0 +1,64 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/arithmetic_grammar'
|
2
|
+
|
3
|
+
class ArithmeticEvaluator < Dhaka::Evaluator
|
4
|
+
|
5
|
+
self.grammar = ArithmeticGrammar
|
6
|
+
|
7
|
+
define_evaluation_rules do
|
8
|
+
|
9
|
+
for_subtraction do
|
10
|
+
evaluate(child_nodes[0]) - evaluate(child_nodes[2])
|
11
|
+
end
|
12
|
+
|
13
|
+
for_addition do
|
14
|
+
evaluate(child_nodes[0]) + evaluate(child_nodes[2])
|
15
|
+
end
|
16
|
+
|
17
|
+
for_division do
|
18
|
+
evaluate(child_nodes[0]).to_f/evaluate(child_nodes[2])
|
19
|
+
end
|
20
|
+
|
21
|
+
for_multiplication do
|
22
|
+
evaluate(child_nodes[0]) * evaluate(child_nodes[2])
|
23
|
+
end
|
24
|
+
|
25
|
+
for_getting_literals do
|
26
|
+
child_nodes[0].token.value
|
27
|
+
end
|
28
|
+
|
29
|
+
for_unpacking_parenthetized_expression do
|
30
|
+
evaluate(child_nodes[1])
|
31
|
+
end
|
32
|
+
|
33
|
+
for_empty_args do
|
34
|
+
[]
|
35
|
+
end
|
36
|
+
|
37
|
+
for_evaluating_function do
|
38
|
+
evaluate(child_nodes[0]).call evaluate(child_nodes[2])
|
39
|
+
end
|
40
|
+
|
41
|
+
for_concatenating_args do
|
42
|
+
[evaluate(child_nodes[0])]+evaluate(child_nodes[2])
|
43
|
+
end
|
44
|
+
|
45
|
+
for_single_args do
|
46
|
+
[evaluate(child_nodes[0])]
|
47
|
+
end
|
48
|
+
|
49
|
+
for_min_function do
|
50
|
+
@min_function
|
51
|
+
end
|
52
|
+
|
53
|
+
for_max_function do
|
54
|
+
@max_function
|
55
|
+
end
|
56
|
+
|
57
|
+
end
|
58
|
+
|
59
|
+
def initialize(min_function, max_function)
|
60
|
+
@min_function = min_function
|
61
|
+
@max_function = max_function
|
62
|
+
end
|
63
|
+
|
64
|
+
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/../dhaka_test_helper'
|
2
|
+
require File.dirname(__FILE__) + '/arithmetic_evaluator'
|
3
|
+
require File.dirname(__FILE__) + '/arithmetic_test_methods'
|
4
|
+
eval(Dhaka::Parser.new(ArithmeticGrammar).compile_to_ruby_source_as(:CompiledArithmeticParser))
|
5
|
+
|
6
|
+
class TestArithmeticEvaluator < Test::Unit::TestCase
|
7
|
+
include ArithmeticTestMethods
|
8
|
+
|
9
|
+
def setup
|
10
|
+
@min_func = Proc.new {|args| args.inject {|min, elem| min = (elem < min ? elem : min)}}
|
11
|
+
@max_func = Proc.new {|args| args.inject {|max, elem| max = (elem > max ? elem : max)}}
|
12
|
+
end
|
13
|
+
|
14
|
+
def test_results_simple_arithmetic_given_tokens_and_parse_tree_1
|
15
|
+
token_stream = [token('n', 2), token('-', nil), token('n', 4), token(Dhaka::END_SYMBOL_NAME, nil)]
|
16
|
+
parse_tree = parse(token_stream)
|
17
|
+
assert_equal -2, ArithmeticEvaluator.new(@min_func, @max_func).evaluate(parse_tree)
|
18
|
+
end
|
19
|
+
|
20
|
+
def test_results_simple_arithmetic_given_tokens_and_parse_tree_2
|
21
|
+
token_stream = [token('n', 2), token('-', nil), token('(', nil), token('n', 3), token('/', nil), token('n', 4), token(')', nil), token(Dhaka::END_SYMBOL_NAME, nil)]
|
22
|
+
parse_tree = parse(token_stream)
|
23
|
+
assert_equal 1.25, ArithmeticEvaluator.new(@min_func, @max_func).evaluate(parse_tree)
|
24
|
+
end
|
25
|
+
|
26
|
+
def test_results_simple_arithmetic_given_tokens_and_parse_tree_3
|
27
|
+
token_stream = [token('n', 2), token('+', nil), token('(', nil), token('n', 3), token('/', nil), token('(', nil), token('n', 7), token('-', nil), token('n', 5), token(')', nil) , token(')', nil), token(Dhaka::END_SYMBOL_NAME, nil)]
|
28
|
+
parse_tree = parse(token_stream)
|
29
|
+
assert_equal 3.5, ArithmeticEvaluator.new(@min_func, @max_func).evaluate(parse_tree)
|
30
|
+
end
|
31
|
+
|
32
|
+
def test_results_simple_arithmetic_given_tokens_and_parse_tree_4
|
33
|
+
token_stream = [token('n', 2), token('+', nil), token('h', nil), token('(', nil), token('n', 3), token(',', nil), token('n', 4), token(')', nil), token(Dhaka::END_SYMBOL_NAME, nil)]
|
34
|
+
parse_tree = parse(token_stream)
|
35
|
+
assert_equal 6, ArithmeticEvaluator.new(@min_func, @max_func).evaluate(parse_tree)
|
36
|
+
end
|
37
|
+
|
38
|
+
def test_results_simple_arithmetic_given_tokens_and_parse_tree_5
|
39
|
+
token_stream = [token('n', 2), token('+', nil), token('l', nil), token('(', nil), token('n', 3), token(',', nil), token('n', 4), token(')', nil), token(Dhaka::END_SYMBOL_NAME, nil)]
|
40
|
+
parse_tree = parse(token_stream)
|
41
|
+
assert_equal 5, ArithmeticEvaluator.new(@min_func, @max_func).evaluate(parse_tree)
|
42
|
+
end
|
43
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
class ArithmeticGrammar < Dhaka::Grammar
|
2
|
+
|
3
|
+
for_symbol(Dhaka::START_SYMBOL_NAME) do
|
4
|
+
expression %w| E |
|
5
|
+
end
|
6
|
+
|
7
|
+
for_symbol('E') do
|
8
|
+
subtraction %w| E - T |
|
9
|
+
addition %w| E + T |
|
10
|
+
term %w| T |
|
11
|
+
end
|
12
|
+
|
13
|
+
for_symbol('T') do
|
14
|
+
factor %w| F |
|
15
|
+
division %w| T / F |
|
16
|
+
multiplication %w| T * F |
|
17
|
+
end
|
18
|
+
|
19
|
+
for_symbol('F') do
|
20
|
+
getting_literals %w| n |
|
21
|
+
unpacking_parenthetized_expression %w| ( E ) |
|
22
|
+
function %w| Function |
|
23
|
+
end
|
24
|
+
|
25
|
+
for_symbol('Function') do
|
26
|
+
evaluating_function %w| FunctionName ( Args ) |
|
27
|
+
end
|
28
|
+
|
29
|
+
for_symbol('FunctionName') do
|
30
|
+
max_function %w| h |
|
31
|
+
min_function %w| l |
|
32
|
+
end
|
33
|
+
|
34
|
+
for_symbol('Args') do
|
35
|
+
empty_args %w||
|
36
|
+
single_args %w| E |
|
37
|
+
concatenating_args %w| E , Args |
|
38
|
+
end
|
39
|
+
|
40
|
+
end
|
41
|
+
|
@@ -0,0 +1,9 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/../dhaka_test_helper'
|
2
|
+
require File.dirname(__FILE__) + '/arithmetic_grammar'
|
3
|
+
|
4
|
+
class ArithmeticGrammarTest < Test::Unit::TestCase
|
5
|
+
def test_first_with_nullable_non_terminals
|
6
|
+
grammar = ArithmeticGrammar
|
7
|
+
assert_equal(Set.new(['(', 'n', 'h', 'l']), Set.new(grammar.first(grammar.symbol_for_name('Args')).collect { |symbol| symbol.name }))
|
8
|
+
end
|
9
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/arithmetic_grammar'
|
2
|
+
|
3
|
+
class ArithmeticTokenizer < Dhaka::Tokenizer
|
4
|
+
|
5
|
+
digits = ('0'..'9').to_a
|
6
|
+
parenths = %w| ( ) |
|
7
|
+
operators = %w| - + / * |
|
8
|
+
functions = %w| h l |
|
9
|
+
arg_separator = %w| , |
|
10
|
+
whitespace = [' ']
|
11
|
+
|
12
|
+
all_characters = digits + parenths + operators + functions + arg_separator + whitespace
|
13
|
+
|
14
|
+
for_state Dhaka::TOKENIZER_IDLE_STATE do
|
15
|
+
for_characters(all_characters - (digits + whitespace)) do
|
16
|
+
create_token(curr_char, nil)
|
17
|
+
advance
|
18
|
+
end
|
19
|
+
for_characters digits do
|
20
|
+
create_token('n', '')
|
21
|
+
switch_to :get_integer_literal
|
22
|
+
end
|
23
|
+
for_character whitespace do
|
24
|
+
advance
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
for_state :get_integer_literal do
|
29
|
+
for_characters all_characters - digits do
|
30
|
+
switch_to Dhaka::TOKENIZER_IDLE_STATE
|
31
|
+
end
|
32
|
+
for_characters digits do
|
33
|
+
curr_token.value << curr_char
|
34
|
+
advance
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
end
|
39
|
+
|
@@ -0,0 +1,38 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/../dhaka_test_helper'
|
2
|
+
require File.dirname(__FILE__) + "/arithmetic_tokenizer"
|
3
|
+
|
4
|
+
class TestArithmeticTokenizer < Test::Unit::TestCase
|
5
|
+
def test_returns_end_of_input_token_for_empty_input
|
6
|
+
assert_equal([token(Dhaka::END_SYMBOL_NAME, nil)], ArithmeticTokenizer.tokenize([]).to_a)
|
7
|
+
end
|
8
|
+
|
9
|
+
def test_tokenizes_given_a_string_input
|
10
|
+
assert_equal([token('n', 2), token('-', nil), token('n', 4), token(Dhaka::END_SYMBOL_NAME, nil)], ArithmeticTokenizer.tokenize('2 - 4').to_a)
|
11
|
+
end
|
12
|
+
|
13
|
+
def test_a_longer_input
|
14
|
+
actual = ArithmeticTokenizer.tokenize('2+(3 / (7 - 5))').to_a
|
15
|
+
assert_equal([token('n', 2), token('+', nil), token('(', nil), token('n', 3), token('/', nil), token('(', nil), token('n', 7), token('-', nil), token('n', 5), token(')', nil) , token(')', nil), token(Dhaka::END_SYMBOL_NAME, nil)], actual)
|
16
|
+
end
|
17
|
+
|
18
|
+
def test_another_input_with_multi_digit_numbers
|
19
|
+
actual = ArithmeticTokenizer.tokenize('2034 +(3433 / (7 - 5))').to_a
|
20
|
+
assert_equal([token('n', 2034), token('+', nil), token('(', nil), token('n', 3433), token('/', nil), token('(', nil), token('n', 7), token('-', nil), token('n', 5), token(')', nil) , token(')', nil), token(Dhaka::END_SYMBOL_NAME, nil)], actual)
|
21
|
+
end
|
22
|
+
|
23
|
+
def test_an_input_with_unrecognized_characters
|
24
|
+
result = ArithmeticTokenizer.tokenize('2+(3 / (7 -& 5))')
|
25
|
+
assert(result.has_error?)
|
26
|
+
assert_equal(11, result.unexpected_char_index)
|
27
|
+
end
|
28
|
+
|
29
|
+
def test_another_input_with_illegal_characters
|
30
|
+
result = ArithmeticTokenizer.tokenize('2034 +(34b3 / (7 - 5))')
|
31
|
+
assert(result.has_error?)
|
32
|
+
assert_equal(9, result.unexpected_char_index)
|
33
|
+
end
|
34
|
+
|
35
|
+
def token(symbol_name, value)
|
36
|
+
Dhaka::Token.new(symbol_name, value ? value.to_s : nil, nil)
|
37
|
+
end
|
38
|
+
end
|