dhaka 2.0.0 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. data/lib/dhaka.rb +24 -22
  2. data/lib/evaluator/evaluator.rb +42 -44
  3. data/lib/grammar/closure_hash.rb +4 -3
  4. data/lib/grammar/grammar.rb +113 -110
  5. data/lib/grammar/grammar_symbol.rb +6 -3
  6. data/lib/grammar/precedence.rb +3 -2
  7. data/lib/grammar/production.rb +5 -6
  8. data/lib/parser/action.rb +16 -11
  9. data/lib/parser/channel.rb +22 -16
  10. data/lib/parser/compiled_parser.rb +28 -22
  11. data/lib/parser/conflict.rb +54 -0
  12. data/lib/parser/item.rb +19 -19
  13. data/lib/parser/parse_result.rb +16 -1
  14. data/lib/parser/parse_tree.rb +15 -9
  15. data/lib/parser/parser.rb +51 -80
  16. data/lib/parser/parser_run.rb +6 -6
  17. data/lib/parser/parser_state.rb +16 -18
  18. data/lib/parser/token.rb +6 -4
  19. data/lib/tokenizer/tokenizer.rb +34 -31
  20. data/test/all_tests.rb +4 -18
  21. data/test/another_lalr_but_not_slr_grammar.rb +9 -5
  22. data/test/{arithmetic_evaluator.rb → arithmetic/arithmetic_evaluator.rb} +1 -2
  23. data/test/{arithmetic_evaluator_test.rb → arithmetic/arithmetic_evaluator_test.rb} +9 -20
  24. data/test/arithmetic/arithmetic_grammar.rb +41 -0
  25. data/test/{arithmetic_grammar_test.rb → arithmetic/arithmetic_grammar_test.rb} +2 -4
  26. data/test/{arithmetic_test_methods.rb → arithmetic/arithmetic_test_methods.rb} +1 -3
  27. data/test/{arithmetic_tokenizer.rb → arithmetic/arithmetic_tokenizer.rb} +8 -10
  28. data/test/{arithmetic_tokenizer_test.rb → arithmetic/arithmetic_tokenizer_test.rb} +4 -2
  29. data/test/{arithmetic_precedence_evaluator.rb → arithmetic_precedence/arithmetic_precedence_evaluator.rb} +1 -2
  30. data/test/arithmetic_precedence/arithmetic_precedence_grammar.rb +24 -0
  31. data/test/{arithmetic_precedence_grammar_test.rb → arithmetic_precedence/arithmetic_precedence_grammar_test.rb} +2 -3
  32. data/test/arithmetic_precedence/arithmetic_precedence_parser_test.rb +31 -0
  33. data/test/{arithmetic_precedence_tokenizer.rb → arithmetic_precedence/arithmetic_precedence_tokenizer.rb} +8 -10
  34. data/test/brackets/bracket_grammar.rb +23 -0
  35. data/test/{bracket_tokenizer.rb → brackets/bracket_tokenizer.rb} +2 -4
  36. data/test/{brackets_test.rb → brackets/brackets_test.rb} +3 -4
  37. data/test/chittagong/chittagong_driver.rb +47 -0
  38. data/test/{chittagong_driver_test.rb → chittagong/chittagong_driver_test.rb} +66 -58
  39. data/test/{chittagong_evaluator.rb → chittagong/chittagong_evaluator.rb} +28 -13
  40. data/test/{chittagong_evaluator_test.rb → chittagong/chittagong_evaluator_test.rb} +6 -10
  41. data/test/chittagong/chittagong_grammar.rb +110 -0
  42. data/test/{chittagong_parser_test.rb → chittagong/chittagong_parser_test.rb} +5 -7
  43. data/test/{chittagong_test.rb → chittagong/chittagong_test.rb} +27 -36
  44. data/test/{chittagong_tokenizer.rb → chittagong/chittagong_tokenizer.rb} +17 -17
  45. data/test/{chittagong_tokenizer_test.rb → chittagong/chittagong_tokenizer_test.rb} +2 -3
  46. data/test/compiled_parser_test.rb +9 -42
  47. data/test/dhaka_test_helper.rb +17 -0
  48. data/test/evaluator_test.rb +18 -3
  49. data/test/grammar_test.rb +10 -15
  50. data/test/lalr_but_not_slr_grammar.rb +10 -8
  51. data/test/malformed_grammar.rb +2 -4
  52. data/test/malformed_grammar_test.rb +2 -3
  53. data/test/nullable_grammar.rb +11 -8
  54. data/test/parse_result_test.rb +44 -0
  55. data/test/parser_state_test.rb +36 -0
  56. data/test/parser_test.rb +53 -103
  57. data/test/precedence_grammar.rb +6 -6
  58. data/test/precedence_grammar_test.rb +2 -3
  59. data/test/rr_conflict_grammar.rb +5 -7
  60. data/test/simple_grammar.rb +6 -8
  61. data/test/sr_conflict_grammar.rb +6 -6
  62. metadata +30 -26
  63. data/test/arithmetic_grammar.rb +0 -35
  64. data/test/arithmetic_precedence_grammar.rb +0 -24
  65. data/test/arithmetic_precedence_parser_test.rb +0 -33
  66. data/test/bracket_grammar.rb +0 -25
  67. data/test/chittagong_grammar.rb +0 -104
  68. data/test/incomplete_arithmetic_evaluator.rb +0 -60
@@ -1,4 +1,3 @@
1
- #!/usr/bin/env ruby
2
1
  module Dhaka
3
2
  # Each grammar symbol is uniquely identified by a string name. The name of a symbol can
4
3
  # be anything and need not correspond to its character representation. For example, an ampersand in the
@@ -7,17 +6,21 @@ module Dhaka
7
6
  class GrammarSymbol
8
7
  attr_reader :name
9
8
  attr_accessor :non_terminal, :nullable, :precedence, :associativity
9
+
10
10
  def initialize(name)
11
11
  @name = name
12
12
  end
13
+
13
14
  def terminal
14
15
  !non_terminal
15
16
  end
17
+
16
18
  def to_s #:nodoc:
17
- name
19
+ name.dup
18
20
  end
21
+
19
22
  def <=> other
20
- self.name <=> other.name
23
+ name <=> other.name
21
24
  end
22
25
  end
23
26
  end
@@ -2,13 +2,14 @@ module Dhaka
2
2
  class Precedence #:nodoc:
3
3
  include Comparable
4
4
  attr_reader :precedence_level, :associativity
5
+
5
6
  def initialize(precedence_level, associativity)
6
7
  @precedence_level = precedence_level
7
- @associativity = associativity
8
+ @associativity = associativity
8
9
  end
9
10
 
10
11
  def <=> other
11
- self.precedence_level <=> other.precedence_level
12
+ precedence_level <=> other.precedence_level
12
13
  end
13
14
 
14
15
  def to_s
@@ -1,19 +1,18 @@
1
- #!/usr/bin/env ruby
2
1
  module Dhaka
3
2
  class Production
4
3
 
5
4
  attr_reader :symbol, :expansion, :name
6
5
 
7
6
  def initialize(symbol, expansion, name, precedence = nil)
8
- @symbol = symbol
9
- @expansion = expansion
10
- @name = name
7
+ @symbol = symbol
8
+ @expansion = expansion
9
+ @name = name
11
10
  @precedence = precedence
12
11
  end
13
12
 
14
13
  def precedence
15
14
  unless @precedence
16
- @expansion.reverse_each do |symbol|
15
+ expansion.reverse_each do |symbol|
17
16
  if symbol.terminal
18
17
  @precedence = symbol.precedence
19
18
  break
@@ -24,7 +23,7 @@ module Dhaka
24
23
  end
25
24
 
26
25
  def to_s #:nodoc:
27
- "#{@name} #{@symbol} ::= #{@expansion.join(' ')}"
26
+ "#{name} #{symbol} ::= #{expansion.join(' ')}"
28
27
  end
29
28
 
30
29
  end
@@ -8,43 +8,48 @@ module Dhaka
8
8
  attr_reader :destination_state
9
9
  def initialize destination_state
10
10
  @destination_state = destination_state
11
- @action_code = Proc.new do
11
+ @action_code = Proc.new do
12
12
  state_stack << destination_state
13
13
  []
14
14
  end
15
15
  end
16
+
16
17
  def compile_to_ruby_source
17
- "shift_to #{@destination_state.id}"
18
+ "shift_to #{destination_state.id}"
18
19
  end
20
+
19
21
  def to_s
20
- "Shift to #{@destination_state}"
22
+ "Shift"
21
23
  end
22
24
  end
23
25
 
24
26
  class ReduceAction < Action #:nodoc:
25
27
  attr_reader :production
28
+
26
29
  def initialize(production)
27
- @production = production
30
+ @production = production
28
31
  @action_code = Proc.new do
29
32
  composite_node = ParseTreeCompositeNode.new(production)
30
33
 
31
- production.expansion.each { |symbol|
32
- state_stack.pop
33
- composite_node.child_nodes.unshift(node_stack.pop)
34
- }
34
+ production.expansion.each do |symbol|
35
+ state_stack.pop
36
+ composite_node.child_nodes.unshift(node_stack.pop)
37
+ end
35
38
 
36
39
  node_stack << composite_node
37
40
 
38
41
  unless composite_node.head_node?
39
- @symbol_queue += [@current_token.symbol_name, production.symbol.name]
42
+ @symbol_queue.concat [@current_token.symbol_name, production.symbol.name]
40
43
  end
41
44
  end
42
45
  end
46
+
43
47
  def compile_to_ruby_source
44
- "reduce_with '#{@production.name}'"
48
+ "reduce_with '#{production.name}'"
45
49
  end
50
+
46
51
  def to_s
47
52
  "Reduce with #{production}"
48
53
  end
49
54
  end
50
- end
55
+ end
@@ -1,52 +1,58 @@
1
- #!/usr/bin/env ruby
2
1
  module Dhaka
3
2
  # Represents channels for pumping of lookaheads between items
4
3
  class Channel #:nodoc:
5
4
  attr_reader :start_item, :end_item
5
+
6
6
  def initialize(grammar, start_item, end_item)
7
- @grammar = grammar
7
+ @grammar = grammar
8
8
  @start_item = start_item
9
- @end_item = end_item
9
+ @end_item = end_item
10
10
  end
11
+
11
12
  def propagate cargo
12
- diff = cargo - @end_item.lookaheadset
13
- @end_item.lookaheadset.merge(diff)
13
+ diff = cargo - end_item.lookaheadset
14
+ end_item.lookaheadset.merge(diff)
14
15
  !diff.empty?
15
16
  end
17
+
16
18
  def to_s
17
- "Channel from #{@start_item} to #{@end_item}"
19
+ "Channel from #{start_item} to #{end_item}"
18
20
  end
21
+
19
22
  def eql? other
20
- @start_item.eql?(other.start_item) and @end_item.eql?(other.end_item)
23
+ start_item.eql?(other.start_item) and end_item.eql?(other.end_item)
21
24
  end
25
+
22
26
  def hash
23
- @start_item.hash ^ @end_item.hash
27
+ start_item.hash ^ end_item.hash
24
28
  end
25
29
  end
26
30
 
27
31
  class SpontaneousChannel < Channel #:nodoc:
28
32
  def to_s
29
- "Spontaneous " + super.to_s
33
+ "Spontaneous " + super
30
34
  end
35
+
31
36
  def pump
32
- follow_index = @start_item.next_item_index + 1
33
- cargo = Set.new
34
- while follow_symbol = @start_item.production.expansion[follow_index]
35
- cargo += @grammar.first(follow_symbol)
37
+ follow_index = start_item.next_item_index + 1
38
+ cargo = Set.new
39
+ while follow_symbol = start_item.production.expansion[follow_index]
40
+ cargo.merge @grammar.first(follow_symbol)
36
41
  return propagate(cargo) unless follow_symbol.nullable
37
42
  follow_index += 1
38
43
  end
39
- cargo += @start_item.lookaheadset
44
+ cargo.merge start_item.lookaheadset
40
45
  propagate cargo
41
46
  end
42
47
  end
43
48
 
44
49
  class PassiveChannel < Channel #:nodoc:
45
50
  def to_s
46
- "Passive " + super.to_s
51
+ "Passive " + super
47
52
  end
53
+
48
54
  def pump
49
- propagate @start_item.lookaheadset
55
+ propagate start_item.lookaheadset
50
56
  end
51
57
  end
52
58
  end
@@ -2,35 +2,41 @@ module Dhaka
2
2
  # This is the superclass of all compiled Parsers. It is only used by generated code.
3
3
  class CompiledParser
4
4
 
5
- private
6
- def self.inherited(compiled_parser)
7
- class << compiled_parser
8
- attr_accessor :states, :grammar, :start_state_id
9
- end
10
- compiled_parser.states = Hash.new {|hash, state_id| hash[state_id] = ParserState.new(compiled_parser, {}, state_id)}
11
- end
5
+ class << self
6
+ private
7
+ def inherited(compiled_parser)
8
+ class << compiled_parser
9
+ attr_accessor :states, :grammar, :start_state_id
10
+ end
11
+ compiled_parser.states = Hash.new {|hash, state_id| hash[state_id] = ParserState.new(compiled_parser, {}, state_id)}
12
+ end
12
13
 
13
- def self.at_state x, &blk
14
- self.states[x].instance_eval(&blk)
15
- end
14
+ def at_state x, &blk
15
+ states[x].instance_eval(&blk)
16
+ end
16
17
 
17
- def self.start_state
18
- states[start_state_id]
19
- end
18
+ def start_state
19
+ states[start_state_id]
20
+ end
20
21
 
21
- def self.start_with start_state_id
22
- self.start_state_id = start_state_id
23
- end
22
+ def start_with start_state_id
23
+ self.start_state_id = start_state_id
24
+ end
24
25
 
25
- def self.reduce_with production_name
26
- ReduceAction.new(grammar.production_named(production_name))
27
- end
26
+ def reduce_with production_name
27
+ ReduceAction.new(grammar.production_named(production_name))
28
+ end
28
29
 
29
- def self.shift_to state_id
30
- ShiftAction.new(states[state_id])
30
+ def shift_to state_id
31
+ ShiftAction.new(states[state_id])
32
+ end
33
+
34
+ def inspect
35
+ "<Dhaka::CompiledParser grammar : #{grammar}>"
36
+ end
31
37
  end
32
38
 
33
- self.extend(ParserMethods)
39
+ extend(ParserMethods)
34
40
 
35
41
  end
36
42
 
@@ -0,0 +1,54 @@
1
+ module Dhaka
2
+ # Represents parser shift-reduce and reduce-reduce conflicts and encapsulates logic for resolving them.
3
+ class Conflict #:nodoc:
4
+ def initialize(state, lookahead, new_action)
5
+ @lookahead, @new_action, @state = lookahead, new_action, state
6
+ end
7
+
8
+ def build_conflict_message
9
+ lines = ["Parser Conflict at State:", @state.to_s(:hide_lookaheads => true)]
10
+ lines << "Existing: #{@state.actions[@lookahead.name]}"
11
+ lines << "New: #{@new_action}"
12
+ lines << "Lookahead: #{@lookahead}"
13
+ lines.join("\n")
14
+ end
15
+ end
16
+
17
+
18
+ class ReduceReduceConflict < Conflict #:nodoc:
19
+ def resolve
20
+ build_conflict_message
21
+ end
22
+ end
23
+
24
+ class ShiftReduceConflict < Conflict #:nodoc:
25
+
26
+ def resolve
27
+ lines = [build_conflict_message]
28
+ shift_precedence = @lookahead.precedence
29
+ reduce_precedence = @new_action.production.precedence
30
+ if shift_precedence && reduce_precedence
31
+ if shift_precedence > reduce_precedence
32
+ lines << "Resolving with precedence. Choosing shift over reduce."
33
+ elsif shift_precedence < reduce_precedence
34
+ lines << "Resolving with precedence. Choosing reduce over shift."
35
+ @state.actions[@lookahead.name] = @new_action
36
+ else
37
+ case shift_precedence.associativity
38
+ when :left
39
+ lines << "Resolving with left associativity. Choosing reduce over shift."
40
+ @state.actions[@lookahead.name] = @new_action
41
+ when :right
42
+ lines << "Resolving with right associativity. Choosing shift over reduce."
43
+ when :nonassoc
44
+ lines << "Resolving with non-associativity. Eliminating action."
45
+ @state.actions.delete(@lookahead.name)
46
+ end
47
+ end
48
+ else
49
+ lines << "No precedence rule. Choosing shift over reduce."
50
+ end
51
+ lines.join("\n")
52
+ end
53
+ end
54
+ end
@@ -1,42 +1,42 @@
1
- #!/usr/bin/env ruby
2
1
  module Dhaka
3
2
  # Represents parser state items
4
3
  class Item #:nodoc:
5
4
  attr_reader :production, :next_item_index, :lookaheadset
5
+
6
6
  def initialize(production, next_item_index)
7
- @production = production
7
+ @production = production
8
8
  @next_item_index = next_item_index
9
- @lookaheadset = Set.new
9
+ @lookaheadset = Set.new
10
10
  end
11
+
11
12
  def next_symbol
12
- if @next_item_index < @production.expansion.size
13
- @production.expansion[@next_item_index]
14
- else
15
- nil
16
- end
13
+ production.expansion[next_item_index]
17
14
  end
15
+
18
16
  def next_item
19
- Item.new(@production, @next_item_index+1)
17
+ Item.new(production, @next_item_index + 1)
20
18
  end
19
+
21
20
  def to_s(options = {})
22
- expansion_symbols = @production.expansion.collect {|symbol| symbol.name}
23
- if @next_item_index < expansion_symbols.size
24
- expansion_symbols.insert(@next_item_index, '->')
21
+ expansion_symbols = production.expansion.collect {|symbol| symbol.name}
22
+ if next_item_index < expansion_symbols.size
23
+ expansion_symbols.insert(next_item_index, '->')
25
24
  else
26
25
  expansion_symbols << '->'
27
26
  end
28
27
  expansion_repr = expansion_symbols.join(' ')
29
- if options[:hide_lookaheads]
30
- "#{@production.symbol} ::= #{expansion_repr}"
31
- else
32
- "#{@production.symbol} ::= #{expansion_repr} [#{@lookaheadset.collect.sort}]"
33
- end
28
+
29
+ item = "#{production.symbol} ::= #{expansion_repr}"
30
+ item << " [#{lookaheadset.collect.sort}]" unless options[:hide_lookaheads]
31
+ item
34
32
  end
33
+
35
34
  def eql?(other)
36
- @production == other.production && @next_item_index==other.next_item_index
35
+ production == other.production && next_item_index == other.next_item_index
37
36
  end
37
+
38
38
  def hash
39
- @production.hash ^ @next_item_index.hash
39
+ production.hash ^ next_item_index.hash
40
40
  end
41
41
  end
42
42
  end
@@ -1,15 +1,28 @@
1
1
  module Dhaka
2
2
  # Returned on successful parsing of the input token stream.
3
- class ParseSuccessResult
3
+ class ParseSuccessResult < DelegateClass(ParseTreeCompositeNode)
4
4
  # Contains the parse result.
5
5
  attr_accessor :parse_tree
6
+
6
7
  def initialize(parse_tree) #:nodoc:
8
+ super
7
9
  @parse_tree = parse_tree
8
10
  end
11
+
9
12
  # This is false.
10
13
  def has_error?
11
14
  false
12
15
  end
16
+
17
+ # Returns the dot representation of the parse tree
18
+ def to_dot
19
+ result = []
20
+ result << ["digraph x {", %(node [fontsize="10" shape=box size="5"])]
21
+ result << parse_tree.to_dot
22
+ result << ['}']
23
+ result.join("\n")
24
+ end
25
+
13
26
  # Deprecated. Use the +parse_tree+ accessor.
14
27
  alias syntax_tree parse_tree
15
28
  end
@@ -18,9 +31,11 @@ module Dhaka
18
31
  class ParseErrorResult
19
32
  # The token that caused the parse error.
20
33
  attr_reader :unexpected_token
34
+
21
35
  def initialize(unexpected_token) #:nodoc:
22
36
  @unexpected_token = unexpected_token
23
37
  end
38
+
24
39
  # This is true.
25
40
  def has_error?
26
41
  true
@@ -2,30 +2,32 @@ module Dhaka
2
2
  # These are composite nodes of the syntax tree returned by the successful parsing of a token stream.
3
3
  class ParseTreeCompositeNode
4
4
  attr_reader :production, :child_nodes
5
+
5
6
  def initialize(production) #:nodoc:
6
- @production = production
7
+ @production = production
7
8
  @child_nodes = []
8
9
  end
9
10
  def linearize #:nodoc:
10
11
  child_nodes.collect {|child_node| child_node.linearize}.flatten + [self]
11
12
  end
13
+
12
14
  def tokens
13
15
  child_nodes.collect{|child_node| child_node.tokens}.flatten
14
16
  end
17
+
15
18
  def to_s #:nodoc:
16
19
  "CompositeNode: #{production.symbol} --> [#{child_nodes.join(", ")}]"
17
20
  end
18
- # Returns the dot representation of the syntax tree.
21
+
22
+ # Returns the dot representation of this node.
19
23
  def to_dot
20
24
  result = []
21
- result << ["digraph x {", "node [fontsize=\"10\" shape=box size=\"5\"]"] if head_node?
22
- label = self.production
23
- result << "#{dot_name} [label=\"#{label}\"]"
25
+ label = production
26
+ result << %(#{dot_name} [label="#{label}"])
24
27
  child_nodes.each do |child|
25
28
  result << "#{dot_name} -> #{child.dot_name}"
26
29
  result << "#{child.to_dot}"
27
30
  end
28
- result << ['}'] if head_node?
29
31
  result.join("\n")
30
32
  end
31
33
 
@@ -42,23 +44,28 @@ module Dhaka
42
44
  # These are leaf nodes of syntax trees. They contain tokens.
43
45
  class ParseTreeLeafNode
44
46
  attr_reader :token
47
+
45
48
  def initialize(token) #:nodoc:
46
49
  @token = token
47
50
  end
51
+
48
52
  def linearize #:nodoc:
49
53
  []
50
54
  end
55
+
51
56
  def tokens
52
57
  [token]
53
58
  end
59
+
54
60
  def to_s #:nodoc:
55
61
  "LeafNode: #{token}"
56
62
  end
63
+
57
64
  # Returns the dot representation of this node.
58
65
  def to_dot
59
- label = "#{token}#{' : '+token.value.to_s if token.value}"
60
- "#{dot_name} [label=\"#{label}\"]"
66
+ %(#{dot_name} [label="#{token.to_s}"])
61
67
  end
68
+
62
69
  def head_node? #:nodoc:
63
70
  false
64
71
  end
@@ -66,6 +73,5 @@ module Dhaka
66
73
  def dot_name #:nodoc:
67
74
  "Node#{object_id}"
68
75
  end
69
-
70
76
  end
71
77
  end