dendroid 0.1.00 → 0.2.00

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +3 -0
  3. data/lib/dendroid/formatters/ascii_tree.rb +142 -0
  4. data/lib/dendroid/formatters/base_formatter.rb +25 -0
  5. data/lib/dendroid/formatters/bracket_notation.rb +50 -0
  6. data/lib/dendroid/grm_analysis/dotted_item.rb +46 -30
  7. data/lib/dendroid/grm_analysis/grm_analyzer.rb +2 -4
  8. data/lib/dendroid/grm_analysis/{choice_items.rb → rule_items.rb} +10 -10
  9. data/lib/dendroid/grm_dsl/base_grm_builder.rb +3 -4
  10. data/lib/dendroid/parsing/and_node.rb +56 -0
  11. data/lib/dendroid/parsing/chart_walker.rb +293 -0
  12. data/lib/dendroid/parsing/composite_parse_node.rb +21 -0
  13. data/lib/dendroid/parsing/empty_rule_node.rb +28 -0
  14. data/lib/dendroid/parsing/or_node.rb +51 -0
  15. data/lib/dendroid/parsing/parse_node.rb +26 -0
  16. data/lib/dendroid/parsing/parse_tree_visitor.rb +127 -0
  17. data/lib/dendroid/parsing/parser.rb +185 -0
  18. data/lib/dendroid/parsing/terminal_node.rb +32 -0
  19. data/lib/dendroid/parsing/walk_progress.rb +117 -0
  20. data/lib/dendroid/recognizer/chart.rb +8 -0
  21. data/lib/dendroid/recognizer/e_item.rb +21 -2
  22. data/lib/dendroid/recognizer/item_set.rb +7 -2
  23. data/lib/dendroid/recognizer/recognizer.rb +33 -20
  24. data/lib/dendroid/syntax/grammar.rb +1 -1
  25. data/lib/dendroid/syntax/rule.rb +71 -13
  26. data/spec/dendroid/grm_analysis/dotted_item_spec.rb +59 -47
  27. data/spec/dendroid/grm_analysis/{choice_items_spec.rb → rule_items_spec.rb} +5 -6
  28. data/spec/dendroid/parsing/chart_walker_spec.rb +223 -0
  29. data/spec/dendroid/parsing/terminal_node_spec.rb +36 -0
  30. data/spec/dendroid/recognizer/e_item_spec.rb +5 -5
  31. data/spec/dendroid/recognizer/item_set_spec.rb +16 -8
  32. data/spec/dendroid/recognizer/recognizer_spec.rb +57 -5
  33. data/spec/dendroid/support/sample_grammars.rb +2 -0
  34. data/spec/dendroid/syntax/grammar_spec.rb +16 -21
  35. data/spec/dendroid/syntax/rule_spec.rb +56 -7
  36. data/version.txt +1 -1
  37. metadata +20 -13
  38. data/lib/dendroid/grm_analysis/alternative_item.rb +0 -70
  39. data/lib/dendroid/grm_analysis/production_items.rb +0 -55
  40. data/lib/dendroid/syntax/choice.rb +0 -95
  41. data/lib/dendroid/syntax/production.rb +0 -82
  42. data/spec/dendroid/grm_analysis/alternative_item_spec.rb +0 -12
  43. data/spec/dendroid/grm_analysis/production_items_spec.rb +0 -68
  44. data/spec/dendroid/syntax/choice_spec.rb +0 -68
  45. data/spec/dendroid/syntax/production_spec.rb +0 -92
@@ -0,0 +1,185 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'weakref'
4
+ require_relative '../grm_dsl/base_grm_builder'
5
+ require_relative '../utils/base_tokenizer'
6
+ require_relative '../recognizer/recognizer'
7
+ require_relative 'chart_walker'
8
+ require_relative 'parse_tree_visitor'
9
+ require_relative '../formatters/bracket_notation'
10
+ require_relative '../formatters/ascii_tree'
11
+
12
+ module Dendroid
13
+ module Parsing
14
+ class Parser
15
+ end # class
16
+ end # module
17
+ end # module
18
+
19
+ def grammar_l1
20
+ builder = Dendroid::GrmDSL::BaseGrmBuilder.new do
21
+ # Grammar inspired from Wikipedia entry on Earley parsing
22
+ declare_terminals('PLUS', 'STAR', 'INTEGER')
23
+ rule('p' => 's')
24
+ rule('s' => ['s PLUS m', 'm'])
25
+ rule('m' => ['m STAR t', 't'])
26
+ rule('t' => 'INTEGER')
27
+ end
28
+
29
+ builder.grammar
30
+ end
31
+
32
+ def grammar_l31
33
+ builder = Dendroid::GrmDSL::BaseGrmBuilder.new do
34
+ # Ambiguous arithmetical expression language
35
+ # This language is compatible with tokenizer L1
36
+ declare_terminals('PLUS', 'STAR', 'INTEGER')
37
+ rule('p' => 's')
38
+ rule('s' => ['s PLUS s', 's STAR s', 'INTEGER'])
39
+ end
40
+
41
+ builder.grammar
42
+ end
43
+
44
+ def tokenizer_l1
45
+ Dendroid::Utils::BaseTokenizer.new do
46
+ map_verbatim2terminal({ '+' => :PLUS, '*' => :STAR })
47
+
48
+ scan_verbatim(['+', '*'])
49
+ scan_value(/\d+/, :INTEGER, ->(txt) { txt.to_i })
50
+ end
51
+ end
52
+
53
+ def retrieve_success_item(chart, grammar)
54
+ last_item_set = chart.item_sets.last
55
+ result = nil
56
+ last_item_set.items.reverse_each do |itm|
57
+ if itm.origin.zero? && itm.dotted_item.completed? && itm.dotted_item.rule.lhs == grammar.start_symbol
58
+ result = itm
59
+ break
60
+ end
61
+ end
62
+
63
+ result
64
+ end
65
+
66
+ def grammar_l10
67
+ # Grammar with left recursive rule
68
+ builder = Dendroid::GrmDSL::BaseGrmBuilder.new do
69
+ declare_terminals('a')
70
+
71
+ rule 'A' => ['A a', '']
72
+ end
73
+
74
+ builder.grammar
75
+ end
76
+
77
+ def tokenizer_l10
78
+ Dendroid::Utils::BaseTokenizer.new do
79
+ map_verbatim2terminal({ 'a' => :a })
80
+
81
+ scan_verbatim(['a'])
82
+ end
83
+ end
84
+
85
+ def grammar_l11
86
+ builder = Dendroid::GrmDSL::BaseGrmBuilder.new do
87
+ # Grammar with right-recursive rule
88
+ declare_terminals('a')
89
+
90
+ rule 'A' => ['a A', '']
91
+ end
92
+
93
+ builder.grammar
94
+ end
95
+
96
+ def tokenizer_l11
97
+ Dendroid::Utils::BaseTokenizer.new do
98
+ map_verbatim2terminal({ 'a' => :a })
99
+
100
+ scan_verbatim(['a'])
101
+ end
102
+ end
103
+
104
+ def grammar_l8
105
+ builder = Dendroid::GrmDSL::BaseGrmBuilder.new do
106
+ # (based on grammar G2 from paper Masaru Tomita "An Efficient Context-Free Parsing Algorithm
107
+ # for Natural Languages")
108
+ declare_terminals('x')
109
+
110
+ rule 'S' => ['S S', 'x']
111
+ end
112
+
113
+ builder.grammar
114
+ end
115
+
116
+ def tokenizer_l8
117
+ Dendroid::Utils::BaseTokenizer.new do
118
+ map_verbatim2terminal({ 'x' => :x })
119
+
120
+ scan_verbatim(['x'])
121
+ end
122
+ end
123
+
124
+ ########################################
125
+ # Entry point
126
+ ########################################
127
+ kode = 3
128
+
129
+ case kode
130
+ when 0
131
+ recognizer = Dendroid::Recognizer::Recognizer.new(grammar_l1, tokenizer_l1)
132
+ chart = recognizer.run('2 + 3 * 4')
133
+ succ_item = retrieve_success_item(chart, recognizer.grm_analysis.grammar)
134
+ walker = Dendroid::Parsing::ChartWalker.new(chart)
135
+ root = walker.walk(succ_item)
136
+
137
+ # formatter = BracketNotation.new($stdout)
138
+ formatter = Asciitree.new($stdout)
139
+ visitor = ParseTreeVisitor.new(root)
140
+ formatter.render(visitor)
141
+
142
+ when 1
143
+ recognizer = Dendroid::Recognizer::Recognizer.new(grammar_l10, tokenizer_l10)
144
+ chart = recognizer.run('a a a a a')
145
+ succ_item = retrieve_success_item(chart, recognizer.grm_analysis.grammar)
146
+ walker = Dendroid::Parsing::ChartWalker.new(chart)
147
+ root = walker.walk(succ_item)
148
+
149
+ formatter = Asciitree.new($stdout)
150
+ visitor = ParseTreeVisitor.new(root)
151
+ formatter.render(visitor)
152
+
153
+ when 2
154
+ recognizer = Dendroid::Recognizer::Recognizer.new(grammar_l11, tokenizer_l11)
155
+ chart = recognizer.run('a a a a a')
156
+ succ_item = retrieve_success_item(chart, recognizer.grm_analysis.grammar)
157
+ walker = Dendroid::Parsing::ChartWalker.new(chart)
158
+ root = walker.walk(succ_item)
159
+
160
+ formatter = Asciitree.new($stdout)
161
+ visitor = ParseTreeVisitor.new(root)
162
+ formatter.render(visitor)
163
+
164
+ when 3
165
+ recognizer = Dendroid::Recognizer::Recognizer.new(grammar_l31, tokenizer_l1)
166
+ chart = recognizer.run('2 + 3 * 4')
167
+ succ_item = retrieve_success_item(chart, recognizer.grm_analysis.grammar)
168
+ walker = Dendroid::Parsing::ChartWalker.new(chart)
169
+ root = walker.walk(succ_item)
170
+
171
+ formatter = Asciitree.new($stdout)
172
+ visitor = ParseTreeVisitor.new(root)
173
+ formatter.render(visitor)
174
+
175
+ when 4
176
+ recognizer = Dendroid::Recognizer::Recognizer.new(grammar_l8, tokenizer_l8)
177
+ chart = recognizer.run('x x x x')
178
+ succ_item = retrieve_success_item(chart, recognizer.grm_analysis.grammar)
179
+ walker = Dendroid::Parsing::ChartWalker.new(chart)
180
+ root = walker.walk(succ_item)
181
+
182
+ formatter = Asciitree.new($stdout)
183
+ visitor = ParseTreeVisitor.new(root)
184
+ formatter.render(visitor)
185
+ end
@@ -0,0 +1,32 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'parse_node'
4
+
5
+ module Dendroid
6
+ module Parsing
7
+ class TerminalNode < ParseNode
8
+ # @return [Dendroid::Syntax::Terminal] Terminal symbol of matching token.
9
+ attr_reader :symbol
10
+
11
+ # @return [Dendroid::Lexical::Token] Matching input token object.
12
+ attr_reader :token
13
+
14
+ def initialize(sym, tok, rank)
15
+ super(rank, rank + 1)
16
+ @symbol = sym
17
+ @token = tok
18
+ end
19
+
20
+ # Part of the 'visitee' role in Visitor design pattern.
21
+ # @param aVisitor[ParseTreeVisitor] the visitor
22
+ def accept(aVisitor)
23
+ aVisitor.visit_terminal(self)
24
+ end
25
+
26
+ def to_s()
27
+ display_val = token.is_a?(Dendroid::Lexical::Literal) ? ": #{token.value}" : ''
28
+ "#{symbol.name}#{display_val} #{super}"
29
+ end
30
+ end # class
31
+ end # module
32
+ end # module
@@ -0,0 +1,117 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'and_node'
4
+ require_relative 'or_node'
5
+ require_relative 'terminal_node'
6
+ require_relative 'empty_rule_node'
7
+
8
+ module Dendroid
9
+ module Parsing
10
+ class WalkProgress
11
+ attr_accessor :state
12
+ attr_accessor :curr_rank
13
+ attr_reader :curr_item
14
+ attr_accessor :predecessor
15
+ attr_reader :parents
16
+
17
+ def initialize(start_rank, start_item, parents)
18
+ @state = :New
19
+ @curr_rank = start_rank
20
+ @curr_item = start_item
21
+ @predecessor = nil
22
+ @parents = parents
23
+ end
24
+
25
+ # Factory method.
26
+ def initialize_copy(orig)
27
+ @state = orig.state
28
+ @curr_rank = orig.curr_rank
29
+ @curr_item = orig.curr_item
30
+ @predecessor = nil
31
+ @parents = orig.parents.dup
32
+ end
33
+
34
+ def fork(thePredecessor)
35
+ @state = :Forking
36
+ @predecessor = thePredecessor
37
+ end
38
+
39
+ def curr_item=(anEntry)
40
+ if anEntry.nil?
41
+ raise StandardError
42
+ else
43
+ @curr_item = anEntry
44
+ end
45
+ end
46
+
47
+ def add_node_empty(anEntry)
48
+ node_empty = EmptyRuleNode.new(anEntry, curr_rank)
49
+ add_child_node(node_empty)
50
+ end
51
+
52
+ # Add a terminal node for terminal at current rank as a child of last parent
53
+ def add_terminal_node(token)
54
+ @curr_rank -= 1
55
+ term_node = TerminalNode.new(curr_item.prev_symbol, token, curr_rank)
56
+ add_child_node(term_node)
57
+ end
58
+
59
+ # Add an AND node for given entry as a child of last parent
60
+ def push_and_node(anEntry)
61
+ node = ANDNode.new(anEntry, curr_rank)
62
+ raise StandardError unless anEntry.rule == node.rule # Fails
63
+ add_child_node(node)
64
+ parents.push(node)
65
+
66
+ node
67
+ end
68
+
69
+ def push_or_node(origin, arity)
70
+ node = OrNode.new(curr_item.prev_symbol, origin, curr_rank, arity)
71
+ add_child_node(node)
72
+ parents.push(node)
73
+
74
+ node
75
+ end
76
+
77
+ def add_child_node(aNode)
78
+ parents.last.add_child(aNode, curr_item.position - 1)
79
+ aNode
80
+ end
81
+
82
+ # Do the given EItems match one of the parent?
83
+ # Matching = corresponds to the same rule and range
84
+ # @return [Array<EItem>]
85
+ def match_parent?(entries, stop_at_first)
86
+ matching = []
87
+ min_origin = entries[0].origin
88
+ first_iteration = true
89
+ offset = 0
90
+
91
+ parents.reverse_each do |node|
92
+ if node.is_a?(OrNode)
93
+ offset += 1
94
+ next
95
+ end
96
+ entries.each do |ent|
97
+ if first_iteration
98
+ min_origin = ent.origin if ent.origin < min_origin
99
+ end
100
+ next unless node.match(ent)
101
+
102
+ matching << [ent, offset]
103
+ break if stop_at_first
104
+ end
105
+ first_iteration = false
106
+ break if stop_at_first && !matching.empty?
107
+
108
+ # Stop loop when parent.origin < min(entries.origin)
109
+ break if node.range[0] < min_origin
110
+ offset += 1
111
+ end
112
+
113
+ matching
114
+ end
115
+ end # class
116
+ end # module
117
+ end # module
@@ -15,6 +15,9 @@ module Dendroid
15
15
  # @return [Array<Recognizer::ItemSet>] The array of item sets
16
16
  attr_reader :item_sets
17
17
 
18
+ # @return [Array<Dendroid::Lexical::Token>] The input tokens
19
+ attr_reader :tokens
20
+
18
21
  # @return [Boolean] Indicates whether the recognizer successfully processed the whole input
19
22
  attr_writer :success
20
23
 
@@ -45,6 +48,11 @@ module Dendroid
45
48
  item_sets.last.add_item(e_item)
46
49
  end
47
50
 
51
+ # @param input_tokens [Array<Dendroid::Lexical::Token>] The input tokens
52
+ def tokens=(input_tokens)
53
+ @tokens = input_tokens
54
+ end
55
+
48
56
  # Return true if the input text is valid according to the grammar.
49
57
  # @return [Boolean]
50
58
  def successful?
@@ -1,6 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require 'forwardable'
4
+ require 'weakref'
4
5
 
5
6
  module Dendroid
6
7
  module Recognizer
@@ -9,19 +10,27 @@ module Dendroid
9
10
  class EItem
10
11
  extend Forwardable
11
12
 
13
+ # (Weak) reference to the dotted item
12
14
  # @return [Dendroid::GrmAnalysis::DottedItem]
13
15
  attr_reader :dotted_item
14
16
 
15
17
  # @return [Integer] the rank of the token that correspond to the start of the rule.
16
18
  attr_reader :origin
17
19
 
18
- def_delegators :@dotted_item, :completed?, :expecting?, :next_symbol, :pre_scan?
20
+ # TODO: :predictor, :completer, :scanner
21
+ attr_accessor :algo
22
+
23
+ # @return [Array<WeakRef>] predecessors sorted by decreasing origin value
24
+ attr_accessor :predecessors
25
+
26
+ def_delegators :@dotted_item, :completed?, :expecting?, :next_symbol, :pre_scan?, :position, :prev_symbol, :rule
19
27
 
20
28
  # @param aDottedItem [Dendroid::GrmAnalysis::DottedItem]
21
29
  # @param origin [Integer]
22
30
  def initialize(aDottedItem, origin)
23
- @dotted_item = aDottedItem
31
+ @dotted_item = WeakRef.new(aDottedItem)
24
32
  @origin = origin
33
+ @predecessors = []
25
34
  end
26
35
 
27
36
  # @return [Dendroid::Syntax::NonTerminal] the head of the production rule
@@ -42,6 +51,16 @@ module Dendroid
42
51
  def to_s
43
52
  "#{dotted_item} @ #{origin}"
44
53
  end
54
+
55
+ alias inspect to_s
56
+
57
+ def add_predecessor(pred)
58
+ if predecessors.size > 1 && pred.origin < predecessors[0].origin
59
+ predecessors.insert(2, WeakRef.new(pred))
60
+ else
61
+ predecessors.unshift(WeakRef.new(pred))
62
+ end
63
+ end
45
64
  end # class
46
65
  end # module
47
66
  end # module
@@ -15,10 +15,15 @@ module Dendroid
15
15
  @items = []
16
16
  end
17
17
 
18
- # Add an Early item to the set
18
+ # Add an Earley item to the set if not yet present.
19
19
  # @param anItem [Recognizer::EItem]
20
+ # @return [Recognizer::EItem] the item in the set
20
21
  def add_item(anItem)
21
- @items << anItem unless items.include? anItem
22
+ idx = items.find_index anItem
23
+ return items[idx] if idx
24
+
25
+ @items << anItem
26
+ anItem
22
27
  end
23
28
 
24
29
  # Find the items that expect a given grammar symbol
@@ -39,7 +39,7 @@ module Dendroid
39
39
  end
40
40
 
41
41
  # Run the Earley algorithm
42
- # @param initial_token [Dednroid::Lexical::Token]
42
+ # @param initial_token [Dendroid::Lexical::Token]
43
43
  def earley_parse(initial_token)
44
44
  chart = new_chart
45
45
  tokens = [initial_token]
@@ -64,7 +64,8 @@ module Dendroid
64
64
  break unless advance
65
65
  end
66
66
 
67
- determine_outcome(chart, tokens)
67
+ chart.tokens = tokens
68
+ determine_outcome(chart)
68
69
  chart
69
70
  end
70
71
 
@@ -76,7 +77,11 @@ module Dendroid
76
77
  prd = grm_analysis.grammar.nonterm2production[top_symbol]
77
78
  chart = Chart.new
78
79
  seed_items = prd.predicted_items
79
- seed_items.each { |item| chart.seed_last_set(EItem.new(item, 0)) }
80
+ seed_items.each do |item|
81
+ entry = EItem.new(item, 0)
82
+ entry.algo = :predictor
83
+ chart.seed_last_set(entry)
84
+ end
80
85
 
81
86
  chart
82
87
  end
@@ -117,27 +122,26 @@ module Dendroid
117
122
  # Error case: next actual token matches none of the expected tokens.
118
123
  def predictor(chart, item, rank, tokens, mode, predicted_symbols)
119
124
  next_symbol = item.next_symbol
120
- if mode == :genuine
121
- predicted_symbols << Set.new if rank == predicted_symbols.size
122
- predicted = predicted_symbols[rank]
123
- return if predicted.include?(next_symbol)
124
-
125
- predicted.add(next_symbol)
126
- end
125
+ # if mode == :genuine
126
+ # predicted_symbols << Set.new if rank == predicted_symbols.size
127
+ # predicted = predicted_symbols[rank]
128
+ # return if predicted.include?(next_symbol)
129
+ #
130
+ # predicted.add(next_symbol)
131
+ # end
127
132
 
128
133
  curr_set = chart[rank]
129
134
  next_token = tokens[rank]
130
135
  prd = grm_analysis.symbol2production(next_symbol)
131
136
  entry_items = prd.predicted_items
137
+ added = []
132
138
  entry_items.each do |entry|
133
139
  member = entry.next_symbol
134
140
  if member&.terminal?
135
141
  next unless next_token
136
142
  next if (member.name != next_token.terminal) && mode == :genuine
137
143
  end
138
-
139
- new_item = EItem.new(entry, rank)
140
- curr_set.add_item(new_item)
144
+ added << add_item(curr_set, entry, rank, item, :predictor)
141
145
  end
142
146
  # Use trick from paper John Aycock and R. Nigel Horspool: "Practical Earley Parsing"
143
147
  return unless next_symbol.nullable?
@@ -145,8 +149,9 @@ module Dendroid
145
149
  next_item = grm_analysis.next_item(item.dotted_item)
146
150
  return unless next_item
147
151
 
148
- new_item = EItem.new(next_item, item.origin)
149
- curr_set.add_item(new_item)
152
+ special = add_item(curr_set, next_item, item.origin, nil, :predictor)
153
+ # special = add_item(curr_set, next_item, item.origin, added.shift, :predictor)
154
+ # added.each { |e| special.add_predecessor(e) }
150
155
  end
151
156
 
152
157
  # procedure SCANNER((A → α•aβ, j), k, words)
@@ -161,8 +166,7 @@ module Dendroid
161
166
  new_rank = rank + 1
162
167
  chart.append_new_set if chart[new_rank].nil?
163
168
  next_dotted_item = grm_analysis.next_item(dit)
164
- new_item = EItem.new(next_dotted_item, scan_item.origin)
165
- chart[new_rank].add_item(new_item)
169
+ add_item(chart[new_rank], next_dotted_item, scan_item.origin, scan_item, :scanner)
166
170
  advance = true
167
171
  end
168
172
 
@@ -190,8 +194,7 @@ module Dendroid
190
194
  next if member.name != next_token.terminal
191
195
  end
192
196
 
193
- new_item = EItem.new(return_item, call_item.origin)
194
- curr_set.add_item(new_item)
197
+ add_item(curr_set, return_item, call_item.origin, item, :completer)
195
198
  end
196
199
  end
197
200
 
@@ -206,8 +209,18 @@ module Dendroid
206
209
  end
207
210
  end
208
211
 
209
- def determine_outcome(chart, tokens)
212
+ def add_item(item_set, dotted_item, origin, predecessor, procedure)
213
+ new_item = EItem.new(dotted_item, origin)
214
+ added = item_set.add_item(new_item)
215
+ added.add_predecessor(predecessor) if predecessor
216
+ new_item.algo = procedure
217
+
218
+ added
219
+ end
220
+
221
+ def determine_outcome(chart)
210
222
  success = false
223
+ tokens = chart.tokens
211
224
  if chart.size == tokens.size + 1
212
225
  top_symbol = grm_analysis.grammar.start_symbol
213
226
  top_rule = grm_analysis.grammar.nonterm2production[top_symbol]
@@ -211,7 +211,7 @@ module Dendroid
211
211
  backlog.subtract(to_remove)
212
212
  end
213
213
 
214
- backlog.each { |i| rules[i].non_productive }
214
+ # backlog.each { |i| rules[i].non_productive }
215
215
  non_productive = symbols.reject(&:productive?)
216
216
  non_productive.each { |symb| symb.productive = false }
217
217
  non_productive
@@ -2,28 +2,60 @@
2
2
 
3
3
  module Dendroid
4
4
  module Syntax
5
- # In a context-free grammar, a rule has its left-hand side (LHS)
6
- # that consists solely of one non-terminal symbol.
7
- # and the right-hand side (RHS) consists of one or more sequence of symbols.
8
- # The symbols in RHS can be either terminal or non-terminal symbols.
9
- # The rule stipulates that the LHS is equivalent to the RHS,
10
- # in other words every occurrence of the LHS can be substituted to
11
- # corresponding RHS.
5
+ # A specialization of the Rule class.
6
+ # A choice is a rule with multiple rhs
12
7
  class Rule
13
8
  # @return [Dendroid::Syntax::NonTerminal] The left-hand side of the rule.
14
9
  attr_reader :head
15
10
  alias lhs head
16
11
 
17
- # Create a Rule instance.
18
- # @param lhs [Dendroid::Syntax::NonTerminal] The left-hand side of the rule.
19
- def initialize(lhs)
20
- @head = valid_head(lhs)
12
+ # @return [Array<Dendroid::Syntax::SymbolSeq>]
13
+ attr_reader :alternatives
14
+
15
+ # Create a Choice instance.
16
+ # @param theLhs [Dendroid::Syntax::NonTerminal] The left-hand side of the rule.
17
+ # @param alt [Array<Dendroid::Syntax::SymbolSeq>] the alternatives (each as a sequence of symbols).
18
+ def initialize(theLhs, alt)
19
+ @head = valid_head(theLhs)
20
+ @alternatives = valid_alternatives(alt)
21
21
  end
22
22
 
23
- # Return the text representation of the rule
23
+ # Return the text representation of the choice
24
24
  # @return [String]
25
25
  def to_s
26
- head.to_s
26
+ "#{head} => #{alternatives.join(' | ')}"
27
+ end
28
+
29
+ # Predicate method to check whether the choice rule body is productive.
30
+ # It is productive when at least one of its alternative is productive.
31
+ # @return [Boolean]
32
+ def productive?
33
+ productive_alts = alternatives.select(&:productive?)
34
+ return false if productive_alts.empty?
35
+
36
+ @productive = Set.new(productive_alts)
37
+ head.productive = true
38
+ end
39
+
40
+ # Predicate method to check whether the rule has at least one empty alternative.
41
+ # @return [Boolean]
42
+ def empty?
43
+ alternatives.any?(&:empty?)
44
+ end
45
+
46
+ # Returns an array with the symbol sequence of its alternatives
47
+ # @return [Array<Dendroid::Syntax::SymbolSeq>]
48
+ def rhs
49
+ alternatives
50
+ end
51
+
52
+ # Equality operator
53
+ # Two production rules are equal when their head and alternatives are equal.
54
+ # @return [Boolean]
55
+ def ==(other)
56
+ return true if equal?(other)
57
+
58
+ (head == other.head) && (alternatives == other.alternatives)
27
59
  end
28
60
 
29
61
  # The set of all grammar symbols that occur in the rhs.
@@ -70,6 +102,32 @@ module Dendroid
70
102
 
71
103
  lhs
72
104
  end
105
+
106
+ def valid_alternatives(alt)
107
+ raise StandardError, "Expecting an Array, found a #{rhs.class} instead." unless alt.is_a?(Array)
108
+
109
+ if alt.size.zero?
110
+ # A choice must have at least two alternatives
111
+ raise StandardError, "The choice for `#{head}` must have at least one alternative."
112
+ end
113
+
114
+ # Verify that each array element is a valid symbol sequence
115
+ alt.each { |elem| valid_sequence(elem) }
116
+
117
+ # Fail when duplicate rhs found
118
+ alt_texts = alt.map(&:to_s)
119
+ no_duplicate = alt_texts.uniq
120
+ if alt_texts.size > no_duplicate.size
121
+ alt_texts.each_with_index do |str, i|
122
+ next if str == no_duplicate[i]
123
+
124
+ err_msg = "Duplicate alternatives: #{head} => #{alt_texts[i]}"
125
+ raise StandardError, err_msg
126
+ end
127
+ end
128
+
129
+ alt
130
+ end
73
131
  end # class
74
132
  end # module
75
133
  end # module