dhaka 2.0.0 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. data/lib/dhaka.rb +24 -22
  2. data/lib/evaluator/evaluator.rb +42 -44
  3. data/lib/grammar/closure_hash.rb +4 -3
  4. data/lib/grammar/grammar.rb +113 -110
  5. data/lib/grammar/grammar_symbol.rb +6 -3
  6. data/lib/grammar/precedence.rb +3 -2
  7. data/lib/grammar/production.rb +5 -6
  8. data/lib/parser/action.rb +16 -11
  9. data/lib/parser/channel.rb +22 -16
  10. data/lib/parser/compiled_parser.rb +28 -22
  11. data/lib/parser/conflict.rb +54 -0
  12. data/lib/parser/item.rb +19 -19
  13. data/lib/parser/parse_result.rb +16 -1
  14. data/lib/parser/parse_tree.rb +15 -9
  15. data/lib/parser/parser.rb +51 -80
  16. data/lib/parser/parser_run.rb +6 -6
  17. data/lib/parser/parser_state.rb +16 -18
  18. data/lib/parser/token.rb +6 -4
  19. data/lib/tokenizer/tokenizer.rb +34 -31
  20. data/test/all_tests.rb +4 -18
  21. data/test/another_lalr_but_not_slr_grammar.rb +9 -5
  22. data/test/{arithmetic_evaluator.rb → arithmetic/arithmetic_evaluator.rb} +1 -2
  23. data/test/{arithmetic_evaluator_test.rb → arithmetic/arithmetic_evaluator_test.rb} +9 -20
  24. data/test/arithmetic/arithmetic_grammar.rb +41 -0
  25. data/test/{arithmetic_grammar_test.rb → arithmetic/arithmetic_grammar_test.rb} +2 -4
  26. data/test/{arithmetic_test_methods.rb → arithmetic/arithmetic_test_methods.rb} +1 -3
  27. data/test/{arithmetic_tokenizer.rb → arithmetic/arithmetic_tokenizer.rb} +8 -10
  28. data/test/{arithmetic_tokenizer_test.rb → arithmetic/arithmetic_tokenizer_test.rb} +4 -2
  29. data/test/{arithmetic_precedence_evaluator.rb → arithmetic_precedence/arithmetic_precedence_evaluator.rb} +1 -2
  30. data/test/arithmetic_precedence/arithmetic_precedence_grammar.rb +24 -0
  31. data/test/{arithmetic_precedence_grammar_test.rb → arithmetic_precedence/arithmetic_precedence_grammar_test.rb} +2 -3
  32. data/test/arithmetic_precedence/arithmetic_precedence_parser_test.rb +31 -0
  33. data/test/{arithmetic_precedence_tokenizer.rb → arithmetic_precedence/arithmetic_precedence_tokenizer.rb} +8 -10
  34. data/test/brackets/bracket_grammar.rb +23 -0
  35. data/test/{bracket_tokenizer.rb → brackets/bracket_tokenizer.rb} +2 -4
  36. data/test/{brackets_test.rb → brackets/brackets_test.rb} +3 -4
  37. data/test/chittagong/chittagong_driver.rb +47 -0
  38. data/test/{chittagong_driver_test.rb → chittagong/chittagong_driver_test.rb} +66 -58
  39. data/test/{chittagong_evaluator.rb → chittagong/chittagong_evaluator.rb} +28 -13
  40. data/test/{chittagong_evaluator_test.rb → chittagong/chittagong_evaluator_test.rb} +6 -10
  41. data/test/chittagong/chittagong_grammar.rb +110 -0
  42. data/test/{chittagong_parser_test.rb → chittagong/chittagong_parser_test.rb} +5 -7
  43. data/test/{chittagong_test.rb → chittagong/chittagong_test.rb} +27 -36
  44. data/test/{chittagong_tokenizer.rb → chittagong/chittagong_tokenizer.rb} +17 -17
  45. data/test/{chittagong_tokenizer_test.rb → chittagong/chittagong_tokenizer_test.rb} +2 -3
  46. data/test/compiled_parser_test.rb +9 -42
  47. data/test/dhaka_test_helper.rb +17 -0
  48. data/test/evaluator_test.rb +18 -3
  49. data/test/grammar_test.rb +10 -15
  50. data/test/lalr_but_not_slr_grammar.rb +10 -8
  51. data/test/malformed_grammar.rb +2 -4
  52. data/test/malformed_grammar_test.rb +2 -3
  53. data/test/nullable_grammar.rb +11 -8
  54. data/test/parse_result_test.rb +44 -0
  55. data/test/parser_state_test.rb +36 -0
  56. data/test/parser_test.rb +53 -103
  57. data/test/precedence_grammar.rb +6 -6
  58. data/test/precedence_grammar_test.rb +2 -3
  59. data/test/rr_conflict_grammar.rb +5 -7
  60. data/test/simple_grammar.rb +6 -8
  61. data/test/sr_conflict_grammar.rb +6 -6
  62. metadata +30 -26
  63. data/test/arithmetic_grammar.rb +0 -35
  64. data/test/arithmetic_precedence_grammar.rb +0 -24
  65. data/test/arithmetic_precedence_parser_test.rb +0 -33
  66. data/test/bracket_grammar.rb +0 -25
  67. data/test/chittagong_grammar.rb +0 -104
  68. data/test/incomplete_arithmetic_evaluator.rb +0 -60
@@ -1,7 +1,3 @@
1
- #!/usr/bin/env ruby
2
- require 'set'
3
- require 'logger'
4
-
5
1
  module Dhaka
6
2
  # The parser generator. To generate a parser from a grammar specification +ArithmeticPrecedenceGrammar+, one would
7
3
  # write:
@@ -12,31 +8,26 @@ module Dhaka
12
8
  # which returns a string of Ruby code.
13
9
  class Parser
14
10
  include ParserMethods
15
- attr_reader :grammar, :start_state
11
+ attr_reader :grammar
16
12
 
17
13
  # Creates a new parser from the given grammar. Messages are logged by default to STDOUT
18
14
  # and the log level is WARN. Shift-reduce conflicts are reported at WARN and reduce-reduce conflicts
19
15
  # at ERROR. You may pass in your own logger. Logging at DEBUG shows a lot of progress output.
20
16
  def initialize(grammar, logger = nil)
21
- if logger
22
- @logger = logger
23
- else
24
- @logger = Logger.new(STDOUT)
25
- @logger.level = Logger::WARN
26
- end
17
+ @logger = logger || default_logger
27
18
  @transitions = Hash.new {|hash, state| hash[state] = {}}
28
- @grammar = grammar
29
- @channels = []
19
+ @grammar = grammar
20
+ @channels = []
30
21
  @states = Hash.new do |hash, kernel|
31
- channels, closure = @grammar.closure(kernel)
32
- @channels += channels.to_a
33
- new_state = ParserState.new(self, closure)
22
+ channels, closure = grammar.closure(kernel)
23
+ @channels.concat channels.to_a
24
+ new_state = ParserState.new(self, closure)
34
25
  hash[kernel] = new_state
35
- @logger.debug("Created #{new_state}.")
26
+ @logger.debug("Created #{new_state.unique_name}.")
36
27
  new_state.transition_items.each do |symbol, items|
37
28
  destination_kernel = ItemSet.new(items.collect{|item| item.next_item})
38
- destination_state = hash[destination_kernel]
39
- items.each { |item| @channels << @grammar.passive_channel(item, destination_state.items[item.next_item]) }
29
+ destination_state = hash[destination_kernel]
30
+ items.each { |item| @channels << grammar.passive_channel(item, destination_state.items[item.next_item]) }
40
31
  @transitions[new_state][symbol] = destination_state
41
32
  end
42
33
  new_state
@@ -47,7 +38,7 @@ module Dhaka
47
38
  # Returns the Ruby source of the generated parser compiled as +parser_class_name+. This can be written out to a file.
48
39
  def compile_to_ruby_source_as parser_class_name
49
40
  result = "class #{parser_class_name} < Dhaka::CompiledParser\n\n"
50
- result << " self.grammar = #{@grammar.name}\n\n"
41
+ result << " self.grammar = #{grammar.name}\n\n"
51
42
  result << " start_with #{start_state.id}\n\n"
52
43
  states.each do |state|
53
44
  result << "#{state.compile_to_ruby_source}\n\n"
@@ -60,29 +51,40 @@ module Dhaka
60
51
  # options hash, lookaheads are not written out to the parser states, which is helpful when there are dozens
61
52
  # of lookahead symbols for every item in every state.
62
53
  def to_dot(options = {})
63
- result = ["digraph x {", "node [fontsize=\"10\" shape=box size=\"5\"]"]
64
- result += states.collect { |state| state.to_dot(options) }
65
- states.each { |state|
66
- @transitions[state].each { |symbol, dest_state|
67
- result << "#{state.dot_name} -> #{dest_state.dot_name} [label=\"#{symbol.name}\"]"
68
- }
69
- }
54
+ result = ["digraph x {", %(node [fontsize="10" shape=box size="5"])]
55
+ result.concat states.collect { |state| state.to_dot(options) }
56
+ states.each do |state|
57
+ @transitions[state].each do |symbol, dest_state|
58
+ result << %(#{state.unique_name} -> #{dest_state.unique_name} [label="#{symbol.name}"])
59
+ end
60
+ end
70
61
  result << ['}']
71
62
  result.join("\n")
72
63
  end
64
+
65
+ def inspect
66
+ "<Dhaka::Parser grammar : #{grammar}>"
67
+ end
73
68
 
74
- private :start_state
75
69
  private
76
-
70
+ attr_reader :start_state
71
+
77
72
  def states
78
73
  @states.values
79
74
  end
75
+
76
+ def default_logger
77
+ logger = Logger.new(STDOUT)
78
+ logger.level = Logger::WARN
79
+ logger.formatter = ParserLogOutputFormatter.new
80
+ logger
81
+ end
80
82
 
81
83
  def initialize_states
82
- start_productions = @grammar.productions_for_symbol(@grammar.start_symbol)
83
- raise NoStartProductionsError.new(@grammar) if start_productions.empty?
84
+ start_productions = grammar.productions_for_symbol(grammar.start_symbol)
85
+ raise NoStartProductionsError.new(grammar) if start_productions.empty?
84
86
  start_items = ItemSet.new(start_productions.collect {|production| Item.new(production, 0)})
85
- start_items.each {|start_item| start_item.lookaheadset << @grammar.end_symbol}
87
+ start_items.each {|start_item| start_item.lookaheadset << grammar.end_symbol}
86
88
  @start_state = @states[start_items]
87
89
  @logger.debug("Pumping #{@channels.size} channels...")
88
90
  pump_channels
@@ -94,9 +96,9 @@ module Dhaka
94
96
 
95
97
  def generate_shift_actions
96
98
  @states.values.each do |state|
97
- @transitions[state].keys.each { |symbol|
98
- state.actions[symbol.name] = ShiftAction.new(@transitions[state][symbol])
99
- }
99
+ @transitions[state].keys.each do |symbol|
100
+ state.actions[symbol.name] = ShiftAction.new(@transitions[state][symbol])
101
+ end
100
102
  end
101
103
  end
102
104
 
@@ -113,9 +115,11 @@ module Dhaka
113
115
  new_action = ReduceAction.new(item.production)
114
116
  if existing_action = state.actions[lookahead.name]
115
117
  if ReduceAction === existing_action
116
- @logger.error(build_conflict_message(state, lookahead, new_action).join("\n"))
118
+ message = ReduceReduceConflict.new(state, lookahead, new_action).resolve
119
+ @logger.error(message)
117
120
  else
118
- resolve_conflict state, lookahead, new_action
121
+ message = ShiftReduceConflict.new(state, lookahead, new_action).resolve
122
+ @logger.warn(message)
119
123
  end
120
124
  else
121
125
  state.actions[lookahead.name] = new_action
@@ -123,56 +127,16 @@ module Dhaka
123
127
  end
124
128
  end
125
129
 
126
-
127
- def resolve_conflict state, lookahead, new_action
128
- message = build_conflict_message(state, lookahead, new_action)
129
- shift_precedence = lookahead.precedence
130
- reduce_precedence = new_action.production.precedence
131
- if (shift_precedence && reduce_precedence)
132
- if (shift_precedence > reduce_precedence)
133
- message << "Resolving with precedence. Choosing shift over reduce."
134
- elsif (shift_precedence < reduce_precedence)
135
- message << "Resolving with precedence. Choosing reduce over shift."
136
- state.actions[lookahead.name] = new_action
137
- else
138
- case shift_precedence.associativity
139
- when :left
140
- message << "Resolving with left associativity. Choosing reduce over shift."
141
- state.actions[lookahead.name] = new_action
142
- when :right
143
- message << "Resolving with right associativity. Choosing shift over reduce."
144
- when :nonassoc
145
- message << "Resolving with non-associativity. Eliminating action."
146
- state.actions.delete(lookahead.name)
147
- end
148
- end
149
- else
150
- message << "No precedence rule. Choosing shift over reduce."
151
- end
152
- @logger.warn(message.join("\n"))
153
- end
154
-
155
- def build_conflict_message state, lookahead, new_action
156
- message = ["Parser Conflict at State:"] + state.items.values.collect{|it| it.to_s(:hide_lookaheads => true)}
157
- message << "Existing: #{state.actions[lookahead.name]}"
158
- message << "New: #{new_action}"
159
- message << "Lookahead: #{lookahead}"
160
- message
161
- end
162
-
163
130
  def pump_channels
164
- while true
131
+ loop do
165
132
  unstable_count = 0
166
133
  @channels.each do |channel|
167
- if channel.pump
168
- unstable_count += 1
169
- end
134
+ unstable_count += 1 if channel.pump
170
135
  end
171
- break if unstable_count == 0
136
+ break if unstable_count.zero?
172
137
  @logger.debug("#{unstable_count} unstable channels...")
173
138
  end
174
139
  end
175
-
176
140
  end
177
141
 
178
142
  # Raised when trying to create a Parser for a grammar that has no productions for the start symbol
@@ -184,5 +148,12 @@ module Dhaka
184
148
  "No start productions defined for #{@grammar.name}"
185
149
  end
186
150
  end
151
+
152
+ class ParserLogOutputFormatter < Logger::Formatter #:nodoc:
153
+ def call(severity, time, progname, msg)
154
+ "\n%s -- %s: %s\n" % [ severity, progname, msg2str(msg)]
155
+ end
156
+ end
157
+
187
158
  end
188
159
 
@@ -2,9 +2,9 @@ module Dhaka
2
2
  class ParserRun #:nodoc:
3
3
 
4
4
  def initialize(grammar, start_state, token_stream)
5
- @grammar = grammar
6
- @node_stack = []
7
- @state_stack = [start_state]
5
+ @grammar = grammar
6
+ @node_stack = []
7
+ @state_stack = [start_state]
8
8
  @token_stream = token_stream
9
9
  @symbol_queue = []
10
10
  end
@@ -17,7 +17,7 @@ module Dhaka
17
17
  return error if error
18
18
  node_stack << ParseTreeLeafNode.new(@current_token)
19
19
  end
20
- ParseSuccessResult.new(node_stack[0])
20
+ ParseSuccessResult.new(node_stack.first)
21
21
  end
22
22
 
23
23
  private
@@ -26,9 +26,9 @@ module Dhaka
26
26
 
27
27
  def execute_actions
28
28
  while symbol_name = @symbol_queue.pop
29
- action = state_stack[-1].actions[symbol_name]
29
+ action = state_stack.last.actions[symbol_name]
30
30
  return ParseErrorResult.new(@current_token) unless action
31
- self.instance_eval(&action.action_code)
31
+ instance_eval(&action.action_code)
32
32
  end
33
33
  nil
34
34
  end
@@ -1,43 +1,40 @@
1
- #!/usr/bin/env ruby
2
- require 'set'
3
1
  module Dhaka
4
2
  class ParserState #:nodoc:
5
-
6
3
  attr_accessor :items, :actions, :id
7
4
 
8
5
  @@state_id = 0
9
6
 
10
7
  def self.next_state_id
11
- result = @@state_id
8
+ result = @@state_id
12
9
  @@state_id += 1
13
10
  result
14
11
  end
15
12
 
16
13
  def initialize(parser, items, id=nil)
17
- @parser = parser
18
- @items = items
14
+ @parser = parser
15
+ @items = items
19
16
  @actions = {}
20
- @id = id ? id : ParserState.next_state_id
17
+ @id = id || ParserState.next_state_id
21
18
  end
22
19
 
23
20
  def transition_items
24
21
  result = Hash.new {|h, k| h[k] = ItemSet.new()}
25
- for item in @items.values
26
- (result[item.next_symbol] << item) if item.next_symbol
22
+ items.values.each do |item|
23
+ result[item.next_symbol] << item if item.next_symbol
27
24
  end
28
25
  result
29
26
  end
30
27
 
31
- def dot_name
32
- self.to_s
28
+ def unique_name
29
+ "State#{id}"
33
30
  end
34
31
 
35
32
  def to_dot(options = {})
36
- label = self.items.values.collect{|item| item.to_s(options)}.join('\n')
37
- "#{dot_name} [label=\"#{label}\"]"
33
+ %(#{unique_name} [label="#{items.values.collect{|item| item.to_s(options)}.join('\n')}"])
38
34
  end
35
+
39
36
  def compile_to_ruby_source
40
- result = " at_state(#{@id}) {\n"
37
+ result = " at_state(#{id}) {\n"
41
38
  actions.each do |symbol_name, action|
42
39
  result << " for_symbol('#{symbol_name}') { #{action.compile_to_ruby_source} }\n"
43
40
  end
@@ -49,18 +46,19 @@ module Dhaka
49
46
  actions[symbol_name] = @parser.instance_eval(&blk)
50
47
  end
51
48
 
52
- def to_s
53
- "State#{id}"
49
+ def to_s(options = {})
50
+ items.values.collect{|item| item.to_s(options)}.join("\n")
54
51
  end
55
52
 
56
53
  end
57
54
 
58
55
  class ItemSet < Set #:nodoc:
59
56
  def hash
60
- self.collect{|item| item.hash}.inject{|result, hashcode| result ^ hashcode}
57
+ collect{|item| item.hash}.inject{|result, hashcode| result ^ hashcode}
61
58
  end
59
+
62
60
  def eql? other
63
61
  self == other
64
62
  end
65
63
  end
66
- end
64
+ end
@@ -6,15 +6,17 @@ module Dhaka
6
6
  class Token
7
7
  attr_accessor :symbol_name, :value, :input_position
8
8
  def initialize(symbol_name, value, input_position)
9
- @symbol_name = symbol_name
10
- @value = value
9
+ @symbol_name = symbol_name
10
+ @value = value
11
11
  @input_position = input_position
12
12
  end
13
+
13
14
  def to_s #:nodoc:
14
- "#{symbol_name}"
15
+ value ? "#{symbol_name} : #{value}" : "#{symbol_name}"
15
16
  end
17
+
16
18
  def == other
17
- (symbol_name == other.symbol_name) && (value == other.value)
19
+ symbol_name == other.symbol_name && value == other.value
18
20
  end
19
21
  end
20
22
  end
@@ -1,5 +1,4 @@
1
1
  module Dhaka
2
-
3
2
  # Reserved constant used to identify the idle state of the tokenizer.
4
3
  TOKENIZER_IDLE_STATE = :idle_state
5
4
 
@@ -7,17 +6,18 @@ module Dhaka
7
6
  # be passed in directly to the parser.
8
7
  class TokenizerSuccessResult
9
8
  include Enumerable
9
+
10
10
  def initialize(tokens)
11
11
  @tokens = tokens
12
12
  end
13
+
13
14
  # Returns false.
14
15
  def has_error?
15
16
  false
16
17
  end
17
- def each
18
- @tokens.each do |token|
19
- yield token
20
- end
18
+
19
+ def each(&block)
20
+ @tokens.each(&block)
21
21
  end
22
22
  end
23
23
 
@@ -25,9 +25,11 @@ module Dhaka
25
25
  class TokenizerErrorResult
26
26
  # The index of the character that caused the error.
27
27
  attr_reader :unexpected_char_index
28
+
28
29
  def initialize(unexpected_char_index)
29
30
  @unexpected_char_index = unexpected_char_index
30
31
  end
32
+
31
33
  # Returns true.
32
34
  def has_error?
33
35
  true
@@ -101,34 +103,41 @@ module Dhaka
101
103
  # switch_to Dhaka::TOKENIZER_IDLE_STATE
102
104
  # end
103
105
  # for_characters digits do
104
- # curr_token.value += curr_char
106
+ # curr_token.value << curr_char
105
107
  # advance
106
108
  # end
107
109
  # end
108
110
  #
109
111
  # end
110
-
111
-
112
112
  class Tokenizer
113
+ class << self
114
+ # Define the action for the state named +state_name+.
115
+ def for_state(state_name, &blk)
116
+ states[state_name].instance_eval(&blk)
117
+ end
113
118
 
114
- # Define the action for the state named +state_name+.
115
- def self.for_state(state_name, &blk)
116
- states[state_name].instance_eval(&blk)
117
- end
118
-
119
- # Tokenizes a string +input+ and returns a TokenizerErrorResult on failure or a TokenizerSuccessResult on sucess.
120
- def self.tokenize(input)
121
- self.new(input).run
119
+ # Tokenizes a string +input+ and returns a TokenizerErrorResult on failure or a TokenizerSuccessResult on sucess.
120
+ def tokenize(input)
121
+ new(input).run
122
+ end
123
+
124
+ private
125
+ def inherited(tokenizer)
126
+ class << tokenizer
127
+ attr_accessor :states, :grammar
128
+ end
129
+ tokenizer.states = Hash.new {|hash, key| hash[key] = TokenizerState.new}
130
+ end
122
131
  end
123
132
 
124
133
  # The tokens shifted so far.
125
134
  attr_reader :tokens
126
135
 
127
136
  def initialize(input) #:nodoc:
128
- @input = input
129
- @current_state = self.class.states[TOKENIZER_IDLE_STATE]
137
+ @input = input
138
+ @current_state = self.class.states[TOKENIZER_IDLE_STATE]
130
139
  @curr_char_index = 0
131
- @tokens = []
140
+ @tokens = []
132
141
  end
133
142
 
134
143
  # The character currently being processed.
@@ -140,10 +149,14 @@ module Dhaka
140
149
  def advance
141
150
  @curr_char_index += 1
142
151
  end
152
+
153
+ def inspect
154
+ "<Dhaka::Tokenizer grammar : #{grammar}>"
155
+ end
143
156
 
144
157
  # The token currently on top of the stack.
145
158
  def curr_token
146
- tokens[-1]
159
+ tokens.last
147
160
  end
148
161
 
149
162
  # Push a new token on to the stack with symbol corresponding to +symbol_name+ and a value of +value+.
@@ -164,17 +177,7 @@ module Dhaka
164
177
  instance_eval(&blk)
165
178
  end
166
179
  tokens << Dhaka::Token.new(Dhaka::END_SYMBOL_NAME, nil, nil)
167
- return TokenizerSuccessResult.new(tokens)
180
+ TokenizerSuccessResult.new(tokens)
168
181
  end
169
-
170
- private
171
- def self.inherited(tokenizer)
172
- class << tokenizer
173
- attr_accessor :states, :grammar
174
- end
175
- tokenizer.states = Hash.new {|hash, key| hash[key] = TokenizerState.new}
176
- end
177
-
178
182
  end
179
-
180
183
  end