dhaka 2.0.0 → 2.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (68) hide show
  1. data/lib/dhaka.rb +24 -22
  2. data/lib/evaluator/evaluator.rb +42 -44
  3. data/lib/grammar/closure_hash.rb +4 -3
  4. data/lib/grammar/grammar.rb +113 -110
  5. data/lib/grammar/grammar_symbol.rb +6 -3
  6. data/lib/grammar/precedence.rb +3 -2
  7. data/lib/grammar/production.rb +5 -6
  8. data/lib/parser/action.rb +16 -11
  9. data/lib/parser/channel.rb +22 -16
  10. data/lib/parser/compiled_parser.rb +28 -22
  11. data/lib/parser/conflict.rb +54 -0
  12. data/lib/parser/item.rb +19 -19
  13. data/lib/parser/parse_result.rb +16 -1
  14. data/lib/parser/parse_tree.rb +15 -9
  15. data/lib/parser/parser.rb +51 -80
  16. data/lib/parser/parser_run.rb +6 -6
  17. data/lib/parser/parser_state.rb +16 -18
  18. data/lib/parser/token.rb +6 -4
  19. data/lib/tokenizer/tokenizer.rb +34 -31
  20. data/test/all_tests.rb +4 -18
  21. data/test/another_lalr_but_not_slr_grammar.rb +9 -5
  22. data/test/{arithmetic_evaluator.rb → arithmetic/arithmetic_evaluator.rb} +1 -2
  23. data/test/{arithmetic_evaluator_test.rb → arithmetic/arithmetic_evaluator_test.rb} +9 -20
  24. data/test/arithmetic/arithmetic_grammar.rb +41 -0
  25. data/test/{arithmetic_grammar_test.rb → arithmetic/arithmetic_grammar_test.rb} +2 -4
  26. data/test/{arithmetic_test_methods.rb → arithmetic/arithmetic_test_methods.rb} +1 -3
  27. data/test/{arithmetic_tokenizer.rb → arithmetic/arithmetic_tokenizer.rb} +8 -10
  28. data/test/{arithmetic_tokenizer_test.rb → arithmetic/arithmetic_tokenizer_test.rb} +4 -2
  29. data/test/{arithmetic_precedence_evaluator.rb → arithmetic_precedence/arithmetic_precedence_evaluator.rb} +1 -2
  30. data/test/arithmetic_precedence/arithmetic_precedence_grammar.rb +24 -0
  31. data/test/{arithmetic_precedence_grammar_test.rb → arithmetic_precedence/arithmetic_precedence_grammar_test.rb} +2 -3
  32. data/test/arithmetic_precedence/arithmetic_precedence_parser_test.rb +31 -0
  33. data/test/{arithmetic_precedence_tokenizer.rb → arithmetic_precedence/arithmetic_precedence_tokenizer.rb} +8 -10
  34. data/test/brackets/bracket_grammar.rb +23 -0
  35. data/test/{bracket_tokenizer.rb → brackets/bracket_tokenizer.rb} +2 -4
  36. data/test/{brackets_test.rb → brackets/brackets_test.rb} +3 -4
  37. data/test/chittagong/chittagong_driver.rb +47 -0
  38. data/test/{chittagong_driver_test.rb → chittagong/chittagong_driver_test.rb} +66 -58
  39. data/test/{chittagong_evaluator.rb → chittagong/chittagong_evaluator.rb} +28 -13
  40. data/test/{chittagong_evaluator_test.rb → chittagong/chittagong_evaluator_test.rb} +6 -10
  41. data/test/chittagong/chittagong_grammar.rb +110 -0
  42. data/test/{chittagong_parser_test.rb → chittagong/chittagong_parser_test.rb} +5 -7
  43. data/test/{chittagong_test.rb → chittagong/chittagong_test.rb} +27 -36
  44. data/test/{chittagong_tokenizer.rb → chittagong/chittagong_tokenizer.rb} +17 -17
  45. data/test/{chittagong_tokenizer_test.rb → chittagong/chittagong_tokenizer_test.rb} +2 -3
  46. data/test/compiled_parser_test.rb +9 -42
  47. data/test/dhaka_test_helper.rb +17 -0
  48. data/test/evaluator_test.rb +18 -3
  49. data/test/grammar_test.rb +10 -15
  50. data/test/lalr_but_not_slr_grammar.rb +10 -8
  51. data/test/malformed_grammar.rb +2 -4
  52. data/test/malformed_grammar_test.rb +2 -3
  53. data/test/nullable_grammar.rb +11 -8
  54. data/test/parse_result_test.rb +44 -0
  55. data/test/parser_state_test.rb +36 -0
  56. data/test/parser_test.rb +53 -103
  57. data/test/precedence_grammar.rb +6 -6
  58. data/test/precedence_grammar_test.rb +2 -3
  59. data/test/rr_conflict_grammar.rb +5 -7
  60. data/test/simple_grammar.rb +6 -8
  61. data/test/sr_conflict_grammar.rb +6 -6
  62. metadata +30 -26
  63. data/test/arithmetic_grammar.rb +0 -35
  64. data/test/arithmetic_precedence_grammar.rb +0 -24
  65. data/test/arithmetic_precedence_parser_test.rb +0 -33
  66. data/test/bracket_grammar.rb +0 -25
  67. data/test/chittagong_grammar.rb +0 -104
  68. data/test/incomplete_arithmetic_evaluator.rb +0 -60
@@ -1,7 +1,3 @@
1
- #!/usr/bin/env ruby
2
- require 'set'
3
- require 'logger'
4
-
5
1
  module Dhaka
6
2
  # The parser generator. To generate a parser from a grammar specification +ArithmeticPrecedenceGrammar+, one would
7
3
  # write:
@@ -12,31 +8,26 @@ module Dhaka
12
8
  # which returns a string of Ruby code.
13
9
  class Parser
14
10
  include ParserMethods
15
- attr_reader :grammar, :start_state
11
+ attr_reader :grammar
16
12
 
17
13
  # Creates a new parser from the given grammar. Messages are logged by default to STDOUT
18
14
  # and the log level is WARN. Shift-reduce conflicts are reported at WARN and reduce-reduce conflicts
19
15
  # at ERROR. You may pass in your own logger. Logging at DEBUG shows a lot of progress output.
20
16
  def initialize(grammar, logger = nil)
21
- if logger
22
- @logger = logger
23
- else
24
- @logger = Logger.new(STDOUT)
25
- @logger.level = Logger::WARN
26
- end
17
+ @logger = logger || default_logger
27
18
  @transitions = Hash.new {|hash, state| hash[state] = {}}
28
- @grammar = grammar
29
- @channels = []
19
+ @grammar = grammar
20
+ @channels = []
30
21
  @states = Hash.new do |hash, kernel|
31
- channels, closure = @grammar.closure(kernel)
32
- @channels += channels.to_a
33
- new_state = ParserState.new(self, closure)
22
+ channels, closure = grammar.closure(kernel)
23
+ @channels.concat channels.to_a
24
+ new_state = ParserState.new(self, closure)
34
25
  hash[kernel] = new_state
35
- @logger.debug("Created #{new_state}.")
26
+ @logger.debug("Created #{new_state.unique_name}.")
36
27
  new_state.transition_items.each do |symbol, items|
37
28
  destination_kernel = ItemSet.new(items.collect{|item| item.next_item})
38
- destination_state = hash[destination_kernel]
39
- items.each { |item| @channels << @grammar.passive_channel(item, destination_state.items[item.next_item]) }
29
+ destination_state = hash[destination_kernel]
30
+ items.each { |item| @channels << grammar.passive_channel(item, destination_state.items[item.next_item]) }
40
31
  @transitions[new_state][symbol] = destination_state
41
32
  end
42
33
  new_state
@@ -47,7 +38,7 @@ module Dhaka
47
38
  # Returns the Ruby source of the generated parser compiled as +parser_class_name+. This can be written out to a file.
48
39
  def compile_to_ruby_source_as parser_class_name
49
40
  result = "class #{parser_class_name} < Dhaka::CompiledParser\n\n"
50
- result << " self.grammar = #{@grammar.name}\n\n"
41
+ result << " self.grammar = #{grammar.name}\n\n"
51
42
  result << " start_with #{start_state.id}\n\n"
52
43
  states.each do |state|
53
44
  result << "#{state.compile_to_ruby_source}\n\n"
@@ -60,29 +51,40 @@ module Dhaka
60
51
  # options hash, lookaheads are not written out to the parser states, which is helpful when there are dozens
61
52
  # of lookahead symbols for every item in every state.
62
53
  def to_dot(options = {})
63
- result = ["digraph x {", "node [fontsize=\"10\" shape=box size=\"5\"]"]
64
- result += states.collect { |state| state.to_dot(options) }
65
- states.each { |state|
66
- @transitions[state].each { |symbol, dest_state|
67
- result << "#{state.dot_name} -> #{dest_state.dot_name} [label=\"#{symbol.name}\"]"
68
- }
69
- }
54
+ result = ["digraph x {", %(node [fontsize="10" shape=box size="5"])]
55
+ result.concat states.collect { |state| state.to_dot(options) }
56
+ states.each do |state|
57
+ @transitions[state].each do |symbol, dest_state|
58
+ result << %(#{state.unique_name} -> #{dest_state.unique_name} [label="#{symbol.name}"])
59
+ end
60
+ end
70
61
  result << ['}']
71
62
  result.join("\n")
72
63
  end
64
+
65
+ def inspect
66
+ "<Dhaka::Parser grammar : #{grammar}>"
67
+ end
73
68
 
74
- private :start_state
75
69
  private
76
-
70
+ attr_reader :start_state
71
+
77
72
  def states
78
73
  @states.values
79
74
  end
75
+
76
+ def default_logger
77
+ logger = Logger.new(STDOUT)
78
+ logger.level = Logger::WARN
79
+ logger.formatter = ParserLogOutputFormatter.new
80
+ logger
81
+ end
80
82
 
81
83
  def initialize_states
82
- start_productions = @grammar.productions_for_symbol(@grammar.start_symbol)
83
- raise NoStartProductionsError.new(@grammar) if start_productions.empty?
84
+ start_productions = grammar.productions_for_symbol(grammar.start_symbol)
85
+ raise NoStartProductionsError.new(grammar) if start_productions.empty?
84
86
  start_items = ItemSet.new(start_productions.collect {|production| Item.new(production, 0)})
85
- start_items.each {|start_item| start_item.lookaheadset << @grammar.end_symbol}
87
+ start_items.each {|start_item| start_item.lookaheadset << grammar.end_symbol}
86
88
  @start_state = @states[start_items]
87
89
  @logger.debug("Pumping #{@channels.size} channels...")
88
90
  pump_channels
@@ -94,9 +96,9 @@ module Dhaka
94
96
 
95
97
  def generate_shift_actions
96
98
  @states.values.each do |state|
97
- @transitions[state].keys.each { |symbol|
98
- state.actions[symbol.name] = ShiftAction.new(@transitions[state][symbol])
99
- }
99
+ @transitions[state].keys.each do |symbol|
100
+ state.actions[symbol.name] = ShiftAction.new(@transitions[state][symbol])
101
+ end
100
102
  end
101
103
  end
102
104
 
@@ -113,9 +115,11 @@ module Dhaka
113
115
  new_action = ReduceAction.new(item.production)
114
116
  if existing_action = state.actions[lookahead.name]
115
117
  if ReduceAction === existing_action
116
- @logger.error(build_conflict_message(state, lookahead, new_action).join("\n"))
118
+ message = ReduceReduceConflict.new(state, lookahead, new_action).resolve
119
+ @logger.error(message)
117
120
  else
118
- resolve_conflict state, lookahead, new_action
121
+ message = ShiftReduceConflict.new(state, lookahead, new_action).resolve
122
+ @logger.warn(message)
119
123
  end
120
124
  else
121
125
  state.actions[lookahead.name] = new_action
@@ -123,56 +127,16 @@ module Dhaka
123
127
  end
124
128
  end
125
129
 
126
-
127
- def resolve_conflict state, lookahead, new_action
128
- message = build_conflict_message(state, lookahead, new_action)
129
- shift_precedence = lookahead.precedence
130
- reduce_precedence = new_action.production.precedence
131
- if (shift_precedence && reduce_precedence)
132
- if (shift_precedence > reduce_precedence)
133
- message << "Resolving with precedence. Choosing shift over reduce."
134
- elsif (shift_precedence < reduce_precedence)
135
- message << "Resolving with precedence. Choosing reduce over shift."
136
- state.actions[lookahead.name] = new_action
137
- else
138
- case shift_precedence.associativity
139
- when :left
140
- message << "Resolving with left associativity. Choosing reduce over shift."
141
- state.actions[lookahead.name] = new_action
142
- when :right
143
- message << "Resolving with right associativity. Choosing shift over reduce."
144
- when :nonassoc
145
- message << "Resolving with non-associativity. Eliminating action."
146
- state.actions.delete(lookahead.name)
147
- end
148
- end
149
- else
150
- message << "No precedence rule. Choosing shift over reduce."
151
- end
152
- @logger.warn(message.join("\n"))
153
- end
154
-
155
- def build_conflict_message state, lookahead, new_action
156
- message = ["Parser Conflict at State:"] + state.items.values.collect{|it| it.to_s(:hide_lookaheads => true)}
157
- message << "Existing: #{state.actions[lookahead.name]}"
158
- message << "New: #{new_action}"
159
- message << "Lookahead: #{lookahead}"
160
- message
161
- end
162
-
163
130
  def pump_channels
164
- while true
131
+ loop do
165
132
  unstable_count = 0
166
133
  @channels.each do |channel|
167
- if channel.pump
168
- unstable_count += 1
169
- end
134
+ unstable_count += 1 if channel.pump
170
135
  end
171
- break if unstable_count == 0
136
+ break if unstable_count.zero?
172
137
  @logger.debug("#{unstable_count} unstable channels...")
173
138
  end
174
139
  end
175
-
176
140
  end
177
141
 
178
142
  # Raised when trying to create a Parser for a grammar that has no productions for the start symbol
@@ -184,5 +148,12 @@ module Dhaka
184
148
  "No start productions defined for #{@grammar.name}"
185
149
  end
186
150
  end
151
+
152
+ class ParserLogOutputFormatter < Logger::Formatter #:nodoc:
153
+ def call(severity, time, progname, msg)
154
+ "\n%s -- %s: %s\n" % [ severity, progname, msg2str(msg)]
155
+ end
156
+ end
157
+
187
158
  end
188
159
 
@@ -2,9 +2,9 @@ module Dhaka
2
2
  class ParserRun #:nodoc:
3
3
 
4
4
  def initialize(grammar, start_state, token_stream)
5
- @grammar = grammar
6
- @node_stack = []
7
- @state_stack = [start_state]
5
+ @grammar = grammar
6
+ @node_stack = []
7
+ @state_stack = [start_state]
8
8
  @token_stream = token_stream
9
9
  @symbol_queue = []
10
10
  end
@@ -17,7 +17,7 @@ module Dhaka
17
17
  return error if error
18
18
  node_stack << ParseTreeLeafNode.new(@current_token)
19
19
  end
20
- ParseSuccessResult.new(node_stack[0])
20
+ ParseSuccessResult.new(node_stack.first)
21
21
  end
22
22
 
23
23
  private
@@ -26,9 +26,9 @@ module Dhaka
26
26
 
27
27
  def execute_actions
28
28
  while symbol_name = @symbol_queue.pop
29
- action = state_stack[-1].actions[symbol_name]
29
+ action = state_stack.last.actions[symbol_name]
30
30
  return ParseErrorResult.new(@current_token) unless action
31
- self.instance_eval(&action.action_code)
31
+ instance_eval(&action.action_code)
32
32
  end
33
33
  nil
34
34
  end
@@ -1,43 +1,40 @@
1
- #!/usr/bin/env ruby
2
- require 'set'
3
1
  module Dhaka
4
2
  class ParserState #:nodoc:
5
-
6
3
  attr_accessor :items, :actions, :id
7
4
 
8
5
  @@state_id = 0
9
6
 
10
7
  def self.next_state_id
11
- result = @@state_id
8
+ result = @@state_id
12
9
  @@state_id += 1
13
10
  result
14
11
  end
15
12
 
16
13
  def initialize(parser, items, id=nil)
17
- @parser = parser
18
- @items = items
14
+ @parser = parser
15
+ @items = items
19
16
  @actions = {}
20
- @id = id ? id : ParserState.next_state_id
17
+ @id = id || ParserState.next_state_id
21
18
  end
22
19
 
23
20
  def transition_items
24
21
  result = Hash.new {|h, k| h[k] = ItemSet.new()}
25
- for item in @items.values
26
- (result[item.next_symbol] << item) if item.next_symbol
22
+ items.values.each do |item|
23
+ result[item.next_symbol] << item if item.next_symbol
27
24
  end
28
25
  result
29
26
  end
30
27
 
31
- def dot_name
32
- self.to_s
28
+ def unique_name
29
+ "State#{id}"
33
30
  end
34
31
 
35
32
  def to_dot(options = {})
36
- label = self.items.values.collect{|item| item.to_s(options)}.join('\n')
37
- "#{dot_name} [label=\"#{label}\"]"
33
+ %(#{unique_name} [label="#{items.values.collect{|item| item.to_s(options)}.join('\n')}"])
38
34
  end
35
+
39
36
  def compile_to_ruby_source
40
- result = " at_state(#{@id}) {\n"
37
+ result = " at_state(#{id}) {\n"
41
38
  actions.each do |symbol_name, action|
42
39
  result << " for_symbol('#{symbol_name}') { #{action.compile_to_ruby_source} }\n"
43
40
  end
@@ -49,18 +46,19 @@ module Dhaka
49
46
  actions[symbol_name] = @parser.instance_eval(&blk)
50
47
  end
51
48
 
52
- def to_s
53
- "State#{id}"
49
+ def to_s(options = {})
50
+ items.values.collect{|item| item.to_s(options)}.join("\n")
54
51
  end
55
52
 
56
53
  end
57
54
 
58
55
  class ItemSet < Set #:nodoc:
59
56
  def hash
60
- self.collect{|item| item.hash}.inject{|result, hashcode| result ^ hashcode}
57
+ collect{|item| item.hash}.inject{|result, hashcode| result ^ hashcode}
61
58
  end
59
+
62
60
  def eql? other
63
61
  self == other
64
62
  end
65
63
  end
66
- end
64
+ end
@@ -6,15 +6,17 @@ module Dhaka
6
6
  class Token
7
7
  attr_accessor :symbol_name, :value, :input_position
8
8
  def initialize(symbol_name, value, input_position)
9
- @symbol_name = symbol_name
10
- @value = value
9
+ @symbol_name = symbol_name
10
+ @value = value
11
11
  @input_position = input_position
12
12
  end
13
+
13
14
  def to_s #:nodoc:
14
- "#{symbol_name}"
15
+ value ? "#{symbol_name} : #{value}" : "#{symbol_name}"
15
16
  end
17
+
16
18
  def == other
17
- (symbol_name == other.symbol_name) && (value == other.value)
19
+ symbol_name == other.symbol_name && value == other.value
18
20
  end
19
21
  end
20
22
  end
@@ -1,5 +1,4 @@
1
1
  module Dhaka
2
-
3
2
  # Reserved constant used to identify the idle state of the tokenizer.
4
3
  TOKENIZER_IDLE_STATE = :idle_state
5
4
 
@@ -7,17 +6,18 @@ module Dhaka
7
6
  # be passed in directly to the parser.
8
7
  class TokenizerSuccessResult
9
8
  include Enumerable
9
+
10
10
  def initialize(tokens)
11
11
  @tokens = tokens
12
12
  end
13
+
13
14
  # Returns false.
14
15
  def has_error?
15
16
  false
16
17
  end
17
- def each
18
- @tokens.each do |token|
19
- yield token
20
- end
18
+
19
+ def each(&block)
20
+ @tokens.each(&block)
21
21
  end
22
22
  end
23
23
 
@@ -25,9 +25,11 @@ module Dhaka
25
25
  class TokenizerErrorResult
26
26
  # The index of the character that caused the error.
27
27
  attr_reader :unexpected_char_index
28
+
28
29
  def initialize(unexpected_char_index)
29
30
  @unexpected_char_index = unexpected_char_index
30
31
  end
32
+
31
33
  # Returns true.
32
34
  def has_error?
33
35
  true
@@ -101,34 +103,41 @@ module Dhaka
101
103
  # switch_to Dhaka::TOKENIZER_IDLE_STATE
102
104
  # end
103
105
  # for_characters digits do
104
- # curr_token.value += curr_char
106
+ # curr_token.value << curr_char
105
107
  # advance
106
108
  # end
107
109
  # end
108
110
  #
109
111
  # end
110
-
111
-
112
112
  class Tokenizer
113
+ class << self
114
+ # Define the action for the state named +state_name+.
115
+ def for_state(state_name, &blk)
116
+ states[state_name].instance_eval(&blk)
117
+ end
113
118
 
114
- # Define the action for the state named +state_name+.
115
- def self.for_state(state_name, &blk)
116
- states[state_name].instance_eval(&blk)
117
- end
118
-
119
- # Tokenizes a string +input+ and returns a TokenizerErrorResult on failure or a TokenizerSuccessResult on sucess.
120
- def self.tokenize(input)
121
- self.new(input).run
119
+ # Tokenizes a string +input+ and returns a TokenizerErrorResult on failure or a TokenizerSuccessResult on sucess.
120
+ def tokenize(input)
121
+ new(input).run
122
+ end
123
+
124
+ private
125
+ def inherited(tokenizer)
126
+ class << tokenizer
127
+ attr_accessor :states, :grammar
128
+ end
129
+ tokenizer.states = Hash.new {|hash, key| hash[key] = TokenizerState.new}
130
+ end
122
131
  end
123
132
 
124
133
  # The tokens shifted so far.
125
134
  attr_reader :tokens
126
135
 
127
136
  def initialize(input) #:nodoc:
128
- @input = input
129
- @current_state = self.class.states[TOKENIZER_IDLE_STATE]
137
+ @input = input
138
+ @current_state = self.class.states[TOKENIZER_IDLE_STATE]
130
139
  @curr_char_index = 0
131
- @tokens = []
140
+ @tokens = []
132
141
  end
133
142
 
134
143
  # The character currently being processed.
@@ -140,10 +149,14 @@ module Dhaka
140
149
  def advance
141
150
  @curr_char_index += 1
142
151
  end
152
+
153
+ def inspect
154
+ "<Dhaka::Tokenizer grammar : #{grammar}>"
155
+ end
143
156
 
144
157
  # The token currently on top of the stack.
145
158
  def curr_token
146
- tokens[-1]
159
+ tokens.last
147
160
  end
148
161
 
149
162
  # Push a new token on to the stack with symbol corresponding to +symbol_name+ and a value of +value+.
@@ -164,17 +177,7 @@ module Dhaka
164
177
  instance_eval(&blk)
165
178
  end
166
179
  tokens << Dhaka::Token.new(Dhaka::END_SYMBOL_NAME, nil, nil)
167
- return TokenizerSuccessResult.new(tokens)
180
+ TokenizerSuccessResult.new(tokens)
168
181
  end
169
-
170
- private
171
- def self.inherited(tokenizer)
172
- class << tokenizer
173
- attr_accessor :states, :grammar
174
- end
175
- tokenizer.states = Hash.new {|hash, key| hash[key] = TokenizerState.new}
176
- end
177
-
178
182
  end
179
-
180
183
  end