dendroid 0.0.11 → 0.1.00
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +1 -1
- data/CHANGELOG.md +10 -0
- data/lib/dendroid/grm_analysis/grm_analyzer.rb +22 -57
- data/lib/dendroid/recognizer/chart.rb +16 -6
- data/lib/dendroid/recognizer/e_item.rb +0 -1
- data/lib/dendroid/recognizer/item_set.rb +1 -0
- data/lib/dendroid/recognizer/recognizer.rb +73 -82
- data/lib/dendroid/syntax/grammar.rb +71 -59
- data/spec/dendroid/recognizer/chart_spec.rb +0 -1
- data/spec/dendroid/recognizer/e_item_spec.rb +4 -0
- data/spec/dendroid/recognizer/item_set_spec.rb +1 -1
- data/spec/dendroid/recognizer/recognizer_spec.rb +594 -19
- data/spec/dendroid/support/sample_grammars.rb +249 -6
- data/spec/dendroid/syntax/grammar_spec.rb +165 -5
- data/version.txt +1 -1
- metadata +2 -2
@@ -17,47 +17,48 @@ module Dendroid
|
|
17
17
|
# @return [Array<Dendroid::Syntax::GrmSymbol>] The terminal and non-terminal symbols.
|
18
18
|
attr_reader :symbols
|
19
19
|
|
20
|
+
# A Hash that maps symbol names to their grammar symbols
|
21
|
+
# @return [Hash{String|Symbol => Dendroid::Syntax::GrmSymbol}]
|
22
|
+
attr_reader :name2symbol
|
23
|
+
|
20
24
|
# The list of production rules for the language.
|
21
25
|
# @return [Array<Dendroid::Syntax::Rule>] Array of rules for the grammar.
|
22
26
|
attr_reader :rules
|
23
27
|
|
24
|
-
# A Hash that maps symbol names to their grammar symbols
|
25
|
-
# @return [Hash{String => Dendroid::Syntax::GrmSymbol}]
|
26
|
-
attr_reader :name2symbol
|
27
|
-
|
28
|
-
# TODO: make nonterminal - rules one-to-one
|
29
28
|
# A Hash that maps symbol names to their grammar symbols
|
30
29
|
# @return [Hash{Dendroid::Syntax::GrmSymbol => Dendroid::Syntax::Rule}]
|
31
|
-
attr_reader :
|
30
|
+
attr_reader :nonterm2production
|
32
31
|
|
33
32
|
# Constructor.
|
34
33
|
# @param terminals [Array<Dendroid::Syntax::Terminal>]
|
35
34
|
def initialize(terminals)
|
36
35
|
@symbols = []
|
37
36
|
@name2symbol = {}
|
37
|
+
@rules = []
|
38
|
+
@nonterm2production = {}
|
38
39
|
add_terminals(terminals)
|
39
40
|
end
|
40
41
|
|
41
|
-
# Add a rule to the grammar
|
42
|
+
# Add a rule to the grammar.
|
42
43
|
# @param rule [Dendroid::Syntax::Rule]
|
43
44
|
def add_rule(rule)
|
44
|
-
if
|
45
|
-
|
46
|
-
|
45
|
+
if lhs_already_defined?(rule)
|
46
|
+
msg = "Non-terminal '#{rule.head}' is on left-hand side of more than one rule."
|
47
|
+
raise StandardError, msg
|
47
48
|
end
|
48
|
-
|
49
|
-
|
50
|
-
raise StandardError, "Production rule '#{production}' appears more than once in the grammar."
|
49
|
+
if duplicate_rule?(rule)
|
50
|
+
raise StandardError, "Duplicate production rule '#{rule}'."
|
51
51
|
end
|
52
52
|
|
53
53
|
add_symbol(rule.head)
|
54
54
|
rule.nonterminals.each { |nonterm| add_symbol(nonterm) }
|
55
55
|
rules << rule
|
56
|
-
|
57
|
-
nonterm2productions[rule.head] << rule
|
56
|
+
nonterm2production[rule.head] = rule
|
58
57
|
end
|
59
58
|
|
60
|
-
# Return the start symbol for the language
|
59
|
+
# Return the start symbol for the language, that is,
|
60
|
+
# the non-terminal symbol used to denote the top-level
|
61
|
+
# construct of the language being defined.
|
61
62
|
# @return [Dendroid::Syntax::NonTerminal]
|
62
63
|
def start_symbol
|
63
64
|
rules.first.lhs
|
@@ -73,10 +74,14 @@ module Dendroid
|
|
73
74
|
|
74
75
|
private
|
75
76
|
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
77
|
+
def lhs_already_defined?(rule)
|
78
|
+
nonterm2production.include? rule.head
|
79
|
+
end
|
80
|
+
|
81
|
+
def duplicate_rule?(rule)
|
82
|
+
nonterm2production[rule.head]&.include? rule
|
83
|
+
end
|
84
|
+
|
80
85
|
def add_terminals(terminals)
|
81
86
|
terminals.each { |term| add_symbol(term) }
|
82
87
|
end
|
@@ -89,6 +94,15 @@ module Dendroid
|
|
89
94
|
name2symbol[symb.name.to_s] = symb
|
90
95
|
end
|
91
96
|
|
97
|
+
def all_terminals
|
98
|
+
Set.new(symbols.select(&:terminal?))
|
99
|
+
end
|
100
|
+
|
101
|
+
def all_nonterminals
|
102
|
+
Set.new(symbols.reject(&:terminal?))
|
103
|
+
end
|
104
|
+
|
105
|
+
# Perform correctness checks of the grammar.
|
92
106
|
def validate
|
93
107
|
at_least_one_terminal
|
94
108
|
are_terminals_referenced?
|
@@ -104,7 +118,6 @@ module Dendroid
|
|
104
118
|
# Does the grammar contain at least one terminal symbol?
|
105
119
|
def at_least_one_terminal
|
106
120
|
found = symbols.any?(&:terminal?)
|
107
|
-
|
108
121
|
return true if found
|
109
122
|
|
110
123
|
err_msg = "Grammar doesn't contain any terminal symbol."
|
@@ -114,37 +127,28 @@ module Dendroid
|
|
114
127
|
# Does every terminal symbol appear at least once
|
115
128
|
# in a rhs of a production rule?
|
116
129
|
def are_terminals_referenced?
|
117
|
-
all_terminals = Set.new(symbols.select(&:terminal?))
|
118
130
|
terms_in_rhs = rules.reduce(Set.new) do |collected, prd|
|
119
131
|
found = prd.terminals
|
120
132
|
collected.merge(found)
|
121
133
|
end
|
122
|
-
|
123
|
-
unless check_ok
|
124
|
-
unused_terms = all_terminals.difference(terms_in_rhs)
|
125
|
-
text = unused_terms.map(&:name).join("', '")
|
126
|
-
err_msg = "Terminal symbols '#{text}' never appear in production rules."
|
127
|
-
raise StandardError, err_msg
|
128
|
-
end
|
134
|
+
return true if all_terminals == terms_in_rhs
|
129
135
|
|
130
|
-
|
136
|
+
unused_terms = all_terminals.difference(terms_in_rhs)
|
137
|
+
text = unused_terms.map(&:name).join("', '")
|
138
|
+
err_msg = "Terminal symbols '#{text}' never appear in production rules."
|
139
|
+
raise StandardError, err_msg
|
131
140
|
end
|
132
141
|
|
133
142
|
def are_nonterminals_rewritten?
|
134
|
-
all_nonterminals = Set.new(symbols.reject(&:terminal?))
|
135
|
-
|
136
143
|
symbs_in_lhs = rules.reduce(Set.new) do |collected, prd|
|
137
144
|
collected.add(prd.head)
|
138
145
|
end
|
139
|
-
|
140
|
-
unless check_ok
|
141
|
-
undefined_nterms = all_nonterminals.difference(symbs_in_lhs)
|
142
|
-
text = undefined_nterms.map(&:name).join("', '")
|
143
|
-
err_msg = "Non-terminal symbols '#{text}' never appear in head of any production rule."
|
144
|
-
raise StandardError, err_msg
|
145
|
-
end
|
146
|
+
return true if all_nonterminals == symbs_in_lhs
|
146
147
|
|
147
|
-
|
148
|
+
undefined_nterms = all_nonterminals.difference(symbs_in_lhs)
|
149
|
+
text = undefined_nterms.map(&:name).join("', '")
|
150
|
+
err_msg = "Non-terminal symbols '#{text}' never appear in head of any production rule."
|
151
|
+
raise StandardError, err_msg
|
148
152
|
end
|
149
153
|
|
150
154
|
def are_symbols_reachable?
|
@@ -165,28 +169,31 @@ module Dendroid
|
|
165
169
|
raise StandardError, err_msg
|
166
170
|
end
|
167
171
|
|
172
|
+
# rubocop: disable Metrics/AbcSize
|
173
|
+
# rubocop: disable Metrics/CyclomaticComplexity
|
174
|
+
# rubocop: disable Metrics/PerceivedComplexity
|
175
|
+
|
168
176
|
# Are all symbols reachable from start symbol?
|
177
|
+
# @return [Set<NonTerminal>] Set of unreachable symbols
|
169
178
|
def unreachable_symbols
|
170
179
|
backlog = [start_symbol]
|
171
180
|
set_reachable = Set.new(backlog.dup)
|
172
181
|
|
173
|
-
|
182
|
+
loop do
|
174
183
|
reachable_sym = backlog.pop
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
backlog.push(member)
|
180
|
-
end
|
181
|
-
set_reachable.add(member)
|
182
|
-
end
|
184
|
+
prd = nonterm2production[reachable_sym]
|
185
|
+
prd.rhs_symbols.each do |member|
|
186
|
+
backlog.push(member) unless member.terminal? || set_reachable.include?(member)
|
187
|
+
set_reachable.add(member)
|
183
188
|
end
|
184
|
-
|
189
|
+
break if backlog.empty?
|
190
|
+
end
|
185
191
|
|
186
192
|
all_symbols = Set.new(symbols)
|
187
193
|
all_symbols - set_reachable
|
188
194
|
end
|
189
195
|
|
196
|
+
# @return [Array<Dendroid::Syntax::NonTerminal>]
|
190
197
|
def mark_non_productive_symbols
|
191
198
|
prod_count = rules.size
|
192
199
|
backlog = Set.new(0...prod_count)
|
@@ -214,11 +221,11 @@ module Dendroid
|
|
214
221
|
nullable_found = false
|
215
222
|
sym2seqs = {}
|
216
223
|
|
217
|
-
|
218
|
-
if
|
224
|
+
nonterm2production.each_pair do |sym, prod|
|
225
|
+
if prod.empty?
|
219
226
|
sym.nullable = nullable_found = true
|
220
227
|
else
|
221
|
-
sym2seqs[sym] =
|
228
|
+
sym2seqs[sym] = prod.rhs
|
222
229
|
end
|
223
230
|
end
|
224
231
|
|
@@ -228,7 +235,7 @@ module Dendroid
|
|
228
235
|
seqs.each { |sq| backlog[sq] = [0, sym] }
|
229
236
|
end
|
230
237
|
|
231
|
-
|
238
|
+
loop do
|
232
239
|
seqs_done = []
|
233
240
|
backlog.each_pair do |sq, (elem_index, lhs)|
|
234
241
|
member = sq[elem_index]
|
@@ -256,18 +263,23 @@ module Dendroid
|
|
256
263
|
backlog.delete(sq)
|
257
264
|
end
|
258
265
|
end
|
259
|
-
|
266
|
+
break if backlog.empty? || seqs_done.empty?
|
267
|
+
end
|
260
268
|
end
|
261
269
|
|
270
|
+
# symbols.each do |sym|
|
271
|
+
# next if sym.terminal?
|
272
|
+
#
|
273
|
+
# sym.nullable = false if sym.nullable.nil?
|
274
|
+
# end
|
262
275
|
symbols.each do |sym|
|
263
|
-
next if sym.terminal?
|
276
|
+
next if sym.terminal? || sym.nullable?
|
264
277
|
|
265
|
-
sym.nullable = false
|
278
|
+
sym.nullable = false
|
266
279
|
end
|
267
280
|
end
|
268
281
|
# rubocop: enable Metrics/AbcSize
|
269
|
-
# rubocop: enable Metrics/
|
270
|
-
# rubocop: enable Metrics/MethodLength
|
282
|
+
# rubocop: enable Metrics/CyclomaticComplexity
|
271
283
|
# rubocop: enable Metrics/PerceivedComplexity
|
272
284
|
end # class
|
273
285
|
end # module
|
@@ -41,6 +41,8 @@ describe Dendroid::Recognizer::EItem do
|
|
41
41
|
expect(subject.lhs).to eq(expr_symb)
|
42
42
|
end # context
|
43
43
|
|
44
|
+
# rubocop: disable Lint/BinaryOperatorWithIdenticalOperands
|
45
|
+
|
44
46
|
it 'can compare with another EItem' do
|
45
47
|
expect(subject == subject).to be_truthy
|
46
48
|
expect(subject == described_class.new(sample_dotted, sample_origin)).to be_truthy
|
@@ -48,6 +50,8 @@ describe Dendroid::Recognizer::EItem do
|
|
48
50
|
expect(subject == described_class.new(other_dotted, sample_origin)).to be_falsey
|
49
51
|
end
|
50
52
|
|
53
|
+
# rubocop: enable Lint/BinaryOperatorWithIdenticalOperands
|
54
|
+
|
51
55
|
it 'can renders a String representation of itself' do
|
52
56
|
expect(subject.to_s).to eq("#{sample_dotted} @ #{sample_origin}")
|
53
57
|
end
|