dendroid 0.0.12 → 0.1.00
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +3 -0
- data/lib/dendroid/grm_analysis/grm_analyzer.rb +22 -57
- data/lib/dendroid/recognizer/chart.rb +10 -2
- data/lib/dendroid/recognizer/recognizer.rb +43 -56
- data/lib/dendroid/syntax/grammar.rb +71 -59
- data/spec/dendroid/syntax/grammar_spec.rb +28 -13
- data/version.txt +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 722b27a6f20e87c43de339b3f0c45e2bcc77c464d5dd9ecd56bbb686c4857b61
|
4
|
+
data.tar.gz: ce6ffd0c100ea7b7c336044e2877617eebb85c7bb306d5de9d1d4395200320aa
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 69870ade1f77e7fe0b9faf20b7943a500abdf2b41d383a4e048438e431e6f65bf4b418806ec4ba325a6839ee4eb1337085772fe7fa5c594b59663cd653cdeac6
|
7
|
+
data.tar.gz: 137bbf46a71dcb603f3866f51b5be25a8557fb574937e2e1b3f8b40980cd6e7fd6a851c84b229b7b7ecb692d2899d721e3bcc041fe09e213318f46efdb041ea7
|
data/CHANGELOG.md
CHANGED
@@ -13,7 +13,6 @@ module Dendroid
|
|
13
13
|
attr_reader :grammar
|
14
14
|
attr_reader :items
|
15
15
|
attr_reader :production2items
|
16
|
-
attr_reader :symbol2productions
|
17
16
|
|
18
17
|
# @return [Dendroid::Syntax::Terminal] The pseudo-terminal `__epsilon` (for empty string)
|
19
18
|
attr_reader :epsilon
|
@@ -37,7 +36,6 @@ module Dendroid
|
|
37
36
|
@grammar = aGrammar
|
38
37
|
@items = []
|
39
38
|
@production2items = {}
|
40
|
-
@symbol2productions = {}
|
41
39
|
@epsilon = Syntax::Terminal.new(:__epsilon)
|
42
40
|
@endmarker = Syntax::Terminal.new(:"$$")
|
43
41
|
@first_sets = {}
|
@@ -56,14 +54,14 @@ module Dendroid
|
|
56
54
|
prod.next_item(aDottedItem)
|
57
55
|
end
|
58
56
|
|
57
|
+
def symbol2production(sym)
|
58
|
+
grammar.nonterm2production[sym]
|
59
|
+
end
|
60
|
+
|
59
61
|
private
|
60
62
|
|
61
63
|
def build_dotted_items
|
62
64
|
grammar.rules.each do |prod|
|
63
|
-
lhs = prod.head
|
64
|
-
symbol2productions[lhs] = [] unless symbol2productions.include? lhs
|
65
|
-
symbol2productions[lhs] << prod
|
66
|
-
# production2items[prod] = []
|
67
65
|
mixin = prod.choice? ? ChoiceItems : ProductionItems
|
68
66
|
prod.extend(mixin)
|
69
67
|
prod.build_items
|
@@ -76,33 +74,31 @@ module Dendroid
|
|
76
74
|
def build_first_sets
|
77
75
|
initialize_first_sets
|
78
76
|
|
79
|
-
|
77
|
+
loop do
|
80
78
|
changed = false
|
81
79
|
grammar.rules.each do |prod|
|
82
80
|
head = prod.head
|
83
81
|
first_head = first_sets[head]
|
84
82
|
pre_first_size = first_head.size
|
85
|
-
|
86
|
-
|
87
|
-
first_head.merge(sequence_first(alt.members))
|
88
|
-
end
|
89
|
-
else
|
90
|
-
first_head.merge(sequence_first(prod.body.members))
|
83
|
+
prod.rhs.each do |seq|
|
84
|
+
first_head.merge(sequence_first(seq.members))
|
91
85
|
end
|
92
86
|
changed = true if first_head.size > pre_first_size
|
93
87
|
end
|
94
|
-
|
88
|
+
break unless changed
|
89
|
+
end
|
95
90
|
end
|
96
91
|
|
97
92
|
def initialize_first_sets
|
98
93
|
grammar.symbols.each do |symb|
|
99
|
-
if symb.terminal?
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
94
|
+
set_arg = if symb.terminal?
|
95
|
+
[symb]
|
96
|
+
elsif symb.nullable?
|
97
|
+
[epsilon]
|
98
|
+
else
|
99
|
+
nil
|
100
|
+
end
|
101
|
+
first_sets[symb] = Set.new(set_arg)
|
106
102
|
end
|
107
103
|
end
|
108
104
|
|
@@ -122,43 +118,11 @@ module Dendroid
|
|
122
118
|
def build_follow_sets
|
123
119
|
initialize_follow_sets
|
124
120
|
|
125
|
-
|
121
|
+
loop do
|
126
122
|
changed = false
|
127
123
|
grammar.rules.each do |prod|
|
128
|
-
|
129
|
-
|
130
|
-
body = alt.members
|
131
|
-
next if body.empty?
|
132
|
-
|
133
|
-
head = prod.head
|
134
|
-
head_follow = follow_sets[head]
|
135
|
-
# trailer = Set.new
|
136
|
-
last = true
|
137
|
-
last_index = body.size - 1
|
138
|
-
last_index.downto(0) do |i|
|
139
|
-
symbol = body[i]
|
140
|
-
next if symbol.terminal?
|
141
|
-
|
142
|
-
follow_symbol = follow_sets[symbol]
|
143
|
-
size_before = follow_symbol.size
|
144
|
-
if last
|
145
|
-
# Rule: if last non-terminal member (symbol) is nullable
|
146
|
-
# then add FOLLOW(head) to FOLLOW(symbol)
|
147
|
-
follow_sets[symbol].merge(head_follow) if symbol.nullable?
|
148
|
-
last = false
|
149
|
-
else
|
150
|
-
symbol_seq = body.slice(i + 1, last_index - i)
|
151
|
-
trailer_first = sequence_first(symbol_seq)
|
152
|
-
contains_epsilon = trailer_first.include? epsilon
|
153
|
-
trailer_first.delete(epsilon) if contains_epsilon
|
154
|
-
follow_sets[symbol].merge(trailer_first)
|
155
|
-
follow_sets[symbol].merge(head_follow) if contains_epsilon
|
156
|
-
end
|
157
|
-
changed = true if follow_sets[symbol].size > size_before
|
158
|
-
end
|
159
|
-
end
|
160
|
-
else
|
161
|
-
body = prod.body.members
|
124
|
+
prod.rhs.each do |alt|
|
125
|
+
body = alt.members
|
162
126
|
next if body.empty?
|
163
127
|
|
164
128
|
head = prod.head
|
@@ -189,7 +153,8 @@ module Dendroid
|
|
189
153
|
end
|
190
154
|
end
|
191
155
|
end
|
192
|
-
|
156
|
+
break unless changed
|
157
|
+
end
|
193
158
|
end
|
194
159
|
|
195
160
|
def initialize_follow_sets
|
@@ -19,10 +19,10 @@ module Dendroid
|
|
19
19
|
attr_writer :success
|
20
20
|
|
21
21
|
# @return [StandardError] The exception class in case of an error found by the recognizer
|
22
|
-
|
22
|
+
attr_reader :failure_class
|
23
23
|
|
24
24
|
# @return [String] The error message
|
25
|
-
|
25
|
+
attr_reader :failure_reason
|
26
26
|
|
27
27
|
def_delegators :@item_sets, :[], :last, :size
|
28
28
|
|
@@ -50,6 +50,14 @@ module Dendroid
|
|
50
50
|
def successful?
|
51
51
|
@success
|
52
52
|
end
|
53
|
+
|
54
|
+
# Set the error cause.
|
55
|
+
# @param exception_class [StandardError] Exception class
|
56
|
+
# @param message [String] Error message
|
57
|
+
def failure(exception_class, message)
|
58
|
+
@failure_class = exception_class
|
59
|
+
@failure_reason = message
|
60
|
+
end
|
53
61
|
end # class
|
54
62
|
end # module
|
55
63
|
end # module
|
@@ -31,8 +31,7 @@ module Dendroid
|
|
31
31
|
tok = tokenizer.next_token
|
32
32
|
if tok.nil? && !grm_analysis.grammar.start_symbol.nullable?
|
33
33
|
chart = new_chart
|
34
|
-
chart.
|
35
|
-
chart.failure_reason = 'Error: Input may not be empty nor blank.'
|
34
|
+
chart.failure(StandardError, 'Error: Input may not be empty nor blank.')
|
36
35
|
chart
|
37
36
|
else
|
38
37
|
earley_parse(tok)
|
@@ -62,7 +61,7 @@ module Dendroid
|
|
62
61
|
|
63
62
|
rank += 1 if advance
|
64
63
|
break if eos_reached && !advance
|
65
|
-
break
|
64
|
+
break unless advance
|
66
65
|
end
|
67
66
|
|
68
67
|
determine_outcome(chart, tokens)
|
@@ -74,13 +73,10 @@ module Dendroid
|
|
74
73
|
def new_chart
|
75
74
|
top_symbol = grm_analysis.grammar.start_symbol
|
76
75
|
|
77
|
-
|
78
|
-
prods = grm_analysis.grammar.nonterm2productions[top_symbol]
|
76
|
+
prd = grm_analysis.grammar.nonterm2production[top_symbol]
|
79
77
|
chart = Chart.new
|
80
|
-
|
81
|
-
|
82
|
-
seed_items.each { |item| chart.seed_last_set(EItem.new(item, 0)) }
|
83
|
-
end
|
78
|
+
seed_items = prd.predicted_items
|
79
|
+
seed_items.each { |item| chart.seed_last_set(EItem.new(item, 0)) }
|
84
80
|
|
85
81
|
chart
|
86
82
|
end
|
@@ -103,12 +99,10 @@ module Dendroid
|
|
103
99
|
|
104
100
|
if entry.completed?
|
105
101
|
completer(chart, entry, rank, tokens, mode)
|
102
|
+
elsif entry.next_symbol.terminal?
|
103
|
+
advance = scanner(chart, entry, rank, tokens)
|
106
104
|
else
|
107
|
-
|
108
|
-
advance = scanner(chart, entry, rank, tokens)
|
109
|
-
else
|
110
|
-
predictor(chart, entry, rank, tokens, mode, predicted_symbols)
|
111
|
-
end
|
105
|
+
predictor(chart, entry, rank, tokens, mode, predicted_symbols)
|
112
106
|
end
|
113
107
|
|
114
108
|
advance
|
@@ -131,31 +125,28 @@ module Dendroid
|
|
131
125
|
predicted.add(next_symbol)
|
132
126
|
end
|
133
127
|
|
134
|
-
prods = grm_analysis.symbol2productions[next_symbol]
|
135
128
|
curr_set = chart[rank]
|
136
129
|
next_token = tokens[rank]
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
end
|
145
|
-
|
146
|
-
new_item = EItem.new(entry, rank)
|
147
|
-
curr_set.add_item(new_item)
|
130
|
+
prd = grm_analysis.symbol2production(next_symbol)
|
131
|
+
entry_items = prd.predicted_items
|
132
|
+
entry_items.each do |entry|
|
133
|
+
member = entry.next_symbol
|
134
|
+
if member&.terminal?
|
135
|
+
next unless next_token
|
136
|
+
next if (member.name != next_token.terminal) && mode == :genuine
|
148
137
|
end
|
149
|
-
end
|
150
138
|
|
151
|
-
|
152
|
-
|
153
|
-
next_item = grm_analysis.next_item(item.dotted_item)
|
154
|
-
if next_item
|
155
|
-
new_item = EItem.new(next_item, item.origin)
|
156
|
-
curr_set.add_item(new_item)
|
157
|
-
end
|
139
|
+
new_item = EItem.new(entry, rank)
|
140
|
+
curr_set.add_item(new_item)
|
158
141
|
end
|
142
|
+
# Use trick from paper John Aycock and R. Nigel Horspool: "Practical Earley Parsing"
|
143
|
+
return unless next_symbol.nullable?
|
144
|
+
|
145
|
+
next_item = grm_analysis.next_item(item.dotted_item)
|
146
|
+
return unless next_item
|
147
|
+
|
148
|
+
new_item = EItem.new(next_item, item.origin)
|
149
|
+
curr_set.add_item(new_item)
|
159
150
|
end
|
160
151
|
|
161
152
|
# procedure SCANNER((A → α•aβ, j), k, words)
|
@@ -219,13 +210,11 @@ module Dendroid
|
|
219
210
|
success = false
|
220
211
|
if chart.size == tokens.size + 1
|
221
212
|
top_symbol = grm_analysis.grammar.start_symbol
|
222
|
-
|
223
|
-
final_items =
|
224
|
-
items.concat(rule.reduce_items)
|
225
|
-
end
|
213
|
+
top_rule = grm_analysis.grammar.nonterm2production[top_symbol]
|
214
|
+
final_items = top_rule.reduce_items
|
226
215
|
last_set = chart.item_sets.last
|
227
216
|
last_set.each do |entry|
|
228
|
-
next if
|
217
|
+
next if !entry.origin.zero? || !final_items.include?(entry.dotted_item)
|
229
218
|
|
230
219
|
success = true
|
231
220
|
end
|
@@ -239,39 +228,37 @@ module Dendroid
|
|
239
228
|
offending_token = tokens[chart.size - 1]
|
240
229
|
pos = offending_token.position
|
241
230
|
(line, col) = [pos.lineno, pos.column]
|
242
|
-
|
243
|
-
terminals = last_set.items.reduce([]) do |result, ent|
|
244
|
-
result << ent.next_symbol if ent.pre_scan?
|
245
|
-
result
|
246
|
-
end
|
247
|
-
terminals.uniq!
|
231
|
+
terminals = expected_terminals(chart)
|
248
232
|
prefix = "Syntax error at or near token line #{line}, column #{col} >>>#{offending_token.source}<<<"
|
249
233
|
expectation = terminals.size == 1 ? terminals[0].name.to_s : "one of: [#{terminals.map(&:name).join(', ')}]"
|
250
234
|
err_msg = "#{prefix} Expected #{expectation}, found a #{offending_token.terminal} instead."
|
251
|
-
chart.
|
252
|
-
chart.failure_reason = err_msg
|
235
|
+
chart.failure(StandardError, err_msg)
|
253
236
|
elsif chart.size == tokens.size + 1
|
254
237
|
# EOS unexpected...
|
255
238
|
last_token = tokens.last
|
256
239
|
pos = last_token.position
|
257
240
|
(line, col) = [pos.lineno, pos.column]
|
258
|
-
|
259
|
-
terminals = last_set.items.reduce([]) do |result, ent|
|
260
|
-
result << ent.next_symbol if ent.pre_scan?
|
261
|
-
result
|
262
|
-
end
|
263
|
-
terminals.uniq!
|
264
|
-
|
241
|
+
terminals = expected_terminals(chart)
|
265
242
|
prefix = "Line #{line}, column #{col}: Premature end of input after '#{last_token.source}'"
|
266
243
|
expectation = terminals.size == 1 ? terminals[0].name.to_s : "one of: [#{terminals.map(&:name).join(', ')}]"
|
267
244
|
err_msg = "#{prefix}, expected: #{expectation}."
|
268
|
-
chart.
|
269
|
-
chart.failure_reason = err_msg
|
245
|
+
chart.failure(StandardError, err_msg)
|
270
246
|
end
|
271
247
|
end
|
272
248
|
chart.success = success
|
273
249
|
end
|
274
250
|
|
251
|
+
def expected_terminals(chart)
|
252
|
+
last_set = chart.last
|
253
|
+
terminals = last_set.items.reduce([]) do |result, ent|
|
254
|
+
result << ent.next_symbol if ent.pre_scan?
|
255
|
+
result
|
256
|
+
end
|
257
|
+
terminals.uniq!
|
258
|
+
|
259
|
+
terminals
|
260
|
+
end
|
261
|
+
|
275
262
|
def replay_last_set(chart, tokens)
|
276
263
|
rank = chart.size - 1
|
277
264
|
seed_set(chart, rank) # Re-initialize last set with scan entries
|
@@ -17,47 +17,48 @@ module Dendroid
|
|
17
17
|
# @return [Array<Dendroid::Syntax::GrmSymbol>] The terminal and non-terminal symbols.
|
18
18
|
attr_reader :symbols
|
19
19
|
|
20
|
+
# A Hash that maps symbol names to their grammar symbols
|
21
|
+
# @return [Hash{String|Symbol => Dendroid::Syntax::GrmSymbol}]
|
22
|
+
attr_reader :name2symbol
|
23
|
+
|
20
24
|
# The list of production rules for the language.
|
21
25
|
# @return [Array<Dendroid::Syntax::Rule>] Array of rules for the grammar.
|
22
26
|
attr_reader :rules
|
23
27
|
|
24
|
-
# A Hash that maps symbol names to their grammar symbols
|
25
|
-
# @return [Hash{String => Dendroid::Syntax::GrmSymbol}]
|
26
|
-
attr_reader :name2symbol
|
27
|
-
|
28
|
-
# TODO: make nonterminal - rules one-to-one
|
29
28
|
# A Hash that maps symbol names to their grammar symbols
|
30
29
|
# @return [Hash{Dendroid::Syntax::GrmSymbol => Dendroid::Syntax::Rule}]
|
31
|
-
attr_reader :
|
30
|
+
attr_reader :nonterm2production
|
32
31
|
|
33
32
|
# Constructor.
|
34
33
|
# @param terminals [Array<Dendroid::Syntax::Terminal>]
|
35
34
|
def initialize(terminals)
|
36
35
|
@symbols = []
|
37
36
|
@name2symbol = {}
|
37
|
+
@rules = []
|
38
|
+
@nonterm2production = {}
|
38
39
|
add_terminals(terminals)
|
39
40
|
end
|
40
41
|
|
41
|
-
# Add a rule to the grammar
|
42
|
+
# Add a rule to the grammar.
|
42
43
|
# @param rule [Dendroid::Syntax::Rule]
|
43
44
|
def add_rule(rule)
|
44
|
-
if
|
45
|
-
|
46
|
-
|
45
|
+
if lhs_already_defined?(rule)
|
46
|
+
msg = "Non-terminal '#{rule.head}' is on left-hand side of more than one rule."
|
47
|
+
raise StandardError, msg
|
47
48
|
end
|
48
|
-
|
49
|
-
|
50
|
-
raise StandardError, "Production rule '#{rule}' appears more than once in the grammar."
|
49
|
+
if duplicate_rule?(rule)
|
50
|
+
raise StandardError, "Duplicate production rule '#{rule}'."
|
51
51
|
end
|
52
52
|
|
53
53
|
add_symbol(rule.head)
|
54
54
|
rule.nonterminals.each { |nonterm| add_symbol(nonterm) }
|
55
55
|
rules << rule
|
56
|
-
|
57
|
-
nonterm2productions[rule.head] << rule
|
56
|
+
nonterm2production[rule.head] = rule
|
58
57
|
end
|
59
58
|
|
60
|
-
# Return the start symbol for the language
|
59
|
+
# Return the start symbol for the language, that is,
|
60
|
+
# the non-terminal symbol used to denote the top-level
|
61
|
+
# construct of the language being defined.
|
61
62
|
# @return [Dendroid::Syntax::NonTerminal]
|
62
63
|
def start_symbol
|
63
64
|
rules.first.lhs
|
@@ -73,10 +74,14 @@ module Dendroid
|
|
73
74
|
|
74
75
|
private
|
75
76
|
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
77
|
+
def lhs_already_defined?(rule)
|
78
|
+
nonterm2production.include? rule.head
|
79
|
+
end
|
80
|
+
|
81
|
+
def duplicate_rule?(rule)
|
82
|
+
nonterm2production[rule.head]&.include? rule
|
83
|
+
end
|
84
|
+
|
80
85
|
def add_terminals(terminals)
|
81
86
|
terminals.each { |term| add_symbol(term) }
|
82
87
|
end
|
@@ -89,6 +94,15 @@ module Dendroid
|
|
89
94
|
name2symbol[symb.name.to_s] = symb
|
90
95
|
end
|
91
96
|
|
97
|
+
def all_terminals
|
98
|
+
Set.new(symbols.select(&:terminal?))
|
99
|
+
end
|
100
|
+
|
101
|
+
def all_nonterminals
|
102
|
+
Set.new(symbols.reject(&:terminal?))
|
103
|
+
end
|
104
|
+
|
105
|
+
# Perform correctness checks of the grammar.
|
92
106
|
def validate
|
93
107
|
at_least_one_terminal
|
94
108
|
are_terminals_referenced?
|
@@ -104,7 +118,6 @@ module Dendroid
|
|
104
118
|
# Does the grammar contain at least one terminal symbol?
|
105
119
|
def at_least_one_terminal
|
106
120
|
found = symbols.any?(&:terminal?)
|
107
|
-
|
108
121
|
return true if found
|
109
122
|
|
110
123
|
err_msg = "Grammar doesn't contain any terminal symbol."
|
@@ -114,37 +127,28 @@ module Dendroid
|
|
114
127
|
# Does every terminal symbol appear at least once
|
115
128
|
# in a rhs of a production rule?
|
116
129
|
def are_terminals_referenced?
|
117
|
-
all_terminals = Set.new(symbols.select(&:terminal?))
|
118
130
|
terms_in_rhs = rules.reduce(Set.new) do |collected, prd|
|
119
131
|
found = prd.terminals
|
120
132
|
collected.merge(found)
|
121
133
|
end
|
122
|
-
|
123
|
-
unless check_ok
|
124
|
-
unused_terms = all_terminals.difference(terms_in_rhs)
|
125
|
-
text = unused_terms.map(&:name).join("', '")
|
126
|
-
err_msg = "Terminal symbols '#{text}' never appear in production rules."
|
127
|
-
raise StandardError, err_msg
|
128
|
-
end
|
134
|
+
return true if all_terminals == terms_in_rhs
|
129
135
|
|
130
|
-
|
136
|
+
unused_terms = all_terminals.difference(terms_in_rhs)
|
137
|
+
text = unused_terms.map(&:name).join("', '")
|
138
|
+
err_msg = "Terminal symbols '#{text}' never appear in production rules."
|
139
|
+
raise StandardError, err_msg
|
131
140
|
end
|
132
141
|
|
133
142
|
def are_nonterminals_rewritten?
|
134
|
-
all_nonterminals = Set.new(symbols.reject(&:terminal?))
|
135
|
-
|
136
143
|
symbs_in_lhs = rules.reduce(Set.new) do |collected, prd|
|
137
144
|
collected.add(prd.head)
|
138
145
|
end
|
139
|
-
|
140
|
-
unless check_ok
|
141
|
-
undefined_nterms = all_nonterminals.difference(symbs_in_lhs)
|
142
|
-
text = undefined_nterms.map(&:name).join("', '")
|
143
|
-
err_msg = "Non-terminal symbols '#{text}' never appear in head of any production rule."
|
144
|
-
raise StandardError, err_msg
|
145
|
-
end
|
146
|
+
return true if all_nonterminals == symbs_in_lhs
|
146
147
|
|
147
|
-
|
148
|
+
undefined_nterms = all_nonterminals.difference(symbs_in_lhs)
|
149
|
+
text = undefined_nterms.map(&:name).join("', '")
|
150
|
+
err_msg = "Non-terminal symbols '#{text}' never appear in head of any production rule."
|
151
|
+
raise StandardError, err_msg
|
148
152
|
end
|
149
153
|
|
150
154
|
def are_symbols_reachable?
|
@@ -165,28 +169,31 @@ module Dendroid
|
|
165
169
|
raise StandardError, err_msg
|
166
170
|
end
|
167
171
|
|
172
|
+
# rubocop: disable Metrics/AbcSize
|
173
|
+
# rubocop: disable Metrics/CyclomaticComplexity
|
174
|
+
# rubocop: disable Metrics/PerceivedComplexity
|
175
|
+
|
168
176
|
# Are all symbols reachable from start symbol?
|
177
|
+
# @return [Set<NonTerminal>] Set of unreachable symbols
|
169
178
|
def unreachable_symbols
|
170
179
|
backlog = [start_symbol]
|
171
180
|
set_reachable = Set.new(backlog.dup)
|
172
181
|
|
173
|
-
|
182
|
+
loop do
|
174
183
|
reachable_sym = backlog.pop
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
backlog.push(member)
|
180
|
-
end
|
181
|
-
set_reachable.add(member)
|
182
|
-
end
|
184
|
+
prd = nonterm2production[reachable_sym]
|
185
|
+
prd.rhs_symbols.each do |member|
|
186
|
+
backlog.push(member) unless member.terminal? || set_reachable.include?(member)
|
187
|
+
set_reachable.add(member)
|
183
188
|
end
|
184
|
-
|
189
|
+
break if backlog.empty?
|
190
|
+
end
|
185
191
|
|
186
192
|
all_symbols = Set.new(symbols)
|
187
193
|
all_symbols - set_reachable
|
188
194
|
end
|
189
195
|
|
196
|
+
# @return [Array<Dendroid::Syntax::NonTerminal>]
|
190
197
|
def mark_non_productive_symbols
|
191
198
|
prod_count = rules.size
|
192
199
|
backlog = Set.new(0...prod_count)
|
@@ -214,11 +221,11 @@ module Dendroid
|
|
214
221
|
nullable_found = false
|
215
222
|
sym2seqs = {}
|
216
223
|
|
217
|
-
|
218
|
-
if
|
224
|
+
nonterm2production.each_pair do |sym, prod|
|
225
|
+
if prod.empty?
|
219
226
|
sym.nullable = nullable_found = true
|
220
227
|
else
|
221
|
-
sym2seqs[sym] =
|
228
|
+
sym2seqs[sym] = prod.rhs
|
222
229
|
end
|
223
230
|
end
|
224
231
|
|
@@ -228,7 +235,7 @@ module Dendroid
|
|
228
235
|
seqs.each { |sq| backlog[sq] = [0, sym] }
|
229
236
|
end
|
230
237
|
|
231
|
-
|
238
|
+
loop do
|
232
239
|
seqs_done = []
|
233
240
|
backlog.each_pair do |sq, (elem_index, lhs)|
|
234
241
|
member = sq[elem_index]
|
@@ -256,18 +263,23 @@ module Dendroid
|
|
256
263
|
backlog.delete(sq)
|
257
264
|
end
|
258
265
|
end
|
259
|
-
|
266
|
+
break if backlog.empty? || seqs_done.empty?
|
267
|
+
end
|
260
268
|
end
|
261
269
|
|
270
|
+
# symbols.each do |sym|
|
271
|
+
# next if sym.terminal?
|
272
|
+
#
|
273
|
+
# sym.nullable = false if sym.nullable.nil?
|
274
|
+
# end
|
262
275
|
symbols.each do |sym|
|
263
|
-
next if sym.terminal?
|
276
|
+
next if sym.terminal? || sym.nullable?
|
264
277
|
|
265
|
-
sym.nullable = false
|
278
|
+
sym.nullable = false
|
266
279
|
end
|
267
280
|
end
|
268
281
|
# rubocop: enable Metrics/AbcSize
|
269
|
-
# rubocop: enable Metrics/
|
270
|
-
# rubocop: enable Metrics/MethodLength
|
282
|
+
# rubocop: enable Metrics/CyclomaticComplexity
|
271
283
|
# rubocop: enable Metrics/PerceivedComplexity
|
272
284
|
end # class
|
273
285
|
end # module
|
@@ -58,8 +58,8 @@ describe Dendroid::Syntax::Grammar do
|
|
58
58
|
expect(subject.symbols).to eq(all_terminals)
|
59
59
|
end
|
60
60
|
|
61
|
-
it '
|
62
|
-
expect(subject.rules).to
|
61
|
+
it 'does not have rules after initialization' do
|
62
|
+
expect(subject.rules).to be_empty
|
63
63
|
end
|
64
64
|
|
65
65
|
it 'maps a terminal name to one GrmSymbol object' do
|
@@ -109,14 +109,15 @@ describe Dendroid::Syntax::Grammar do
|
|
109
109
|
end
|
110
110
|
end
|
111
111
|
|
112
|
-
it 'maps every non-terminal to its defining
|
112
|
+
it 'maps every non-terminal to its defining production' do
|
113
113
|
rules = build_all_rules
|
114
114
|
rules.each { |rl| subject.add_rule(rl) }
|
115
115
|
%i[p s m t].each do |symb_name|
|
116
116
|
symb = subject.name2symbol[symb_name]
|
117
117
|
expected_prods = subject.rules.select { |prd| prd.head == symb }
|
118
|
-
|
119
|
-
|
118
|
+
expect(expected_prods.size).to eq(1)
|
119
|
+
related_prod = subject.nonterm2production[symb]
|
120
|
+
expect(related_prod).to eq(expected_prods[0])
|
120
121
|
end
|
121
122
|
end
|
122
123
|
end # context
|
@@ -230,8 +231,7 @@ describe Dendroid::Syntax::Grammar do
|
|
230
231
|
# No terminal symbol explicitly declared => all symbols are non-terminals
|
231
232
|
|
232
233
|
rule 'S' => 'A'
|
233
|
-
rule 'A' => 'a A c'
|
234
|
-
rule 'A' => 'b'
|
234
|
+
rule 'A' => ['a A c', 'b']
|
235
235
|
end
|
236
236
|
|
237
237
|
builder.grammar
|
@@ -243,8 +243,7 @@ describe Dendroid::Syntax::Grammar do
|
|
243
243
|
|
244
244
|
# # Wrong: terminals 'd' and 'e' never appear in rules
|
245
245
|
rule 'S' => 'A'
|
246
|
-
rule 'A' => 'a A c'
|
247
|
-
rule 'A' => 'b'
|
246
|
+
rule 'A' => ['a A c', 'b']
|
248
247
|
end
|
249
248
|
|
250
249
|
builder.grammar
|
@@ -282,13 +281,24 @@ describe Dendroid::Syntax::Grammar do
|
|
282
281
|
builder.grammar
|
283
282
|
end
|
284
283
|
|
284
|
+
def grm_multiple_defs
|
285
|
+
builder = Dendroid::GrmDSL::BaseGrmBuilder.new do
|
286
|
+
declare_terminals('a b c')
|
287
|
+
|
288
|
+
rule 'A' => %w[a B]
|
289
|
+
rule 'B' => ['b', '']
|
290
|
+
rule 'A' => 'c'
|
291
|
+
end
|
292
|
+
|
293
|
+
builder.grammar
|
294
|
+
end
|
295
|
+
|
285
296
|
def duplicate_production
|
286
297
|
builder = Dendroid::GrmDSL::BaseGrmBuilder.new do
|
287
|
-
declare_terminals('a'
|
298
|
+
declare_terminals('a')
|
288
299
|
|
289
300
|
rule 'S' => 'A'
|
290
|
-
rule 'A' => [
|
291
|
-
rule 'S' => 'A' # Duplicate rule
|
301
|
+
rule 'A' => %w[a a] # Duplicate alternatives
|
292
302
|
end
|
293
303
|
|
294
304
|
builder.grammar
|
@@ -329,8 +339,13 @@ describe Dendroid::Syntax::Grammar do
|
|
329
339
|
expect { grm_undefined_nterm }.to raise_error(StandardError, err_msg)
|
330
340
|
end
|
331
341
|
|
342
|
+
it 'raises an error when a non-terminal is defined multiple times' do
|
343
|
+
err_msg = "Non-terminal 'A' is on left-hand side of more than one rule."
|
344
|
+
expect { grm_multiple_defs }.to raise_error(StandardError, err_msg)
|
345
|
+
end
|
346
|
+
|
332
347
|
it 'raises an error when a production is duplicated' do
|
333
|
-
err_msg =
|
348
|
+
err_msg = 'Duplicate alternatives: A => a'
|
334
349
|
expect { duplicate_production }.to raise_error(StandardError, err_msg)
|
335
350
|
end
|
336
351
|
|
data/version.txt
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.1.00
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: dendroid
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.1.00
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dimitri Geshef
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-11-
|
11
|
+
date: 2023-11-03 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: WIP. A Ruby implementation of an Earley parser
|
14
14
|
email: famished.tiger@yahoo.com
|