dendroid 0.0.4 → 0.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +13 -1
- data/CHANGELOG.md +43 -0
- data/dendroid.gemspec +1 -0
- data/lib/dendroid/syntax/choice.rb +84 -0
- data/lib/dendroid/syntax/grammar.rb +275 -0
- data/lib/dendroid/syntax/production.rb +1 -2
- data/spec/dendroid/syntax/choice_spec.rb +54 -0
- data/spec/dendroid/syntax/grammar_spec.rb +205 -0
- data/version.txt +1 -1
- metadata +8 -3
- /data/lib/{dendroid/dendroid.rb → dendroid.rb} +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a762fb52e0c8ff6116f41f481b5cb7346b99050d90f77c4e133da37a31dda3b3
|
4
|
+
data.tar.gz: ace142c5221c038eedaab8ea9882a3fbffa34cb424be08083ee722e9e5d8c6ca
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2746b3e1cd03e07e0045d7e41b54ed4c8a2f003df89ea45166fa0b66013f37aa9c1cf0044adc71bbe75d5958d99334b7f612d15c116954fe451f8e1e6756bfaa
|
7
|
+
data.tar.gz: 4917fd27358d80719b722c14a6df8636917ad827dfc29fb8201920b068393dba09dd2b9750d8b5946031a1d821bd6922c84dd27572b4a635e709faa4bfe31ceb
|
data/.rubocop.yml
CHANGED
@@ -2,9 +2,21 @@ Layout/EndOfLine:
|
|
2
2
|
Enabled: true
|
3
3
|
EnforcedStyle: lf
|
4
4
|
|
5
|
+
Metrics/AbcSize:
|
6
|
+
Enabled: true
|
7
|
+
Max: 25
|
8
|
+
|
5
9
|
Metrics/BlockLength:
|
6
10
|
Enabled: true
|
7
|
-
Max:
|
11
|
+
Max: 75
|
12
|
+
|
13
|
+
Metrics/ClassLength:
|
14
|
+
Enabled: true
|
15
|
+
Max: 200
|
16
|
+
|
17
|
+
Metrics/MethodLength:
|
18
|
+
Enabled: true
|
19
|
+
Max: 20
|
8
20
|
|
9
21
|
Naming/MethodParameterName:
|
10
22
|
Enabled: false
|
data/CHANGELOG.md
ADDED
@@ -0,0 +1,43 @@
|
|
1
|
+
# Changelog
|
2
|
+
|
3
|
+
## [Unreleased]
|
4
|
+
|
5
|
+
|
6
|
+
## [0.0.6] - 2023-10-30
|
7
|
+
### Added
|
8
|
+
- Class `Grammar` and its spec file
|
9
|
+
|
10
|
+
## [0.0.5] - 2023-10-28
|
11
|
+
### Added
|
12
|
+
- Class `Choice` and its spec file
|
13
|
+
|
14
|
+
### Fixed
|
15
|
+
- File `dendroid.gemspec`: added missing `CHANGELOG.md` in the package
|
16
|
+
|
17
|
+
## [0.0.4] - 2023-10-28
|
18
|
+
### Added
|
19
|
+
- Class `Production` and its spec file
|
20
|
+
|
21
|
+
## [0.0.3] - 2023-10-28
|
22
|
+
### Added
|
23
|
+
- Class `Rule` and its spec file
|
24
|
+
|
25
|
+
## [0.0.2] - 2023-10-28
|
26
|
+
### Added
|
27
|
+
- Class `SymbolSeq` and its spec file
|
28
|
+
- File `CHANGELOG.md`; the file file you're reading now.
|
29
|
+
|
30
|
+
### Changed
|
31
|
+
- Line separator set to lf (line feed)
|
32
|
+
- Code re-styling to please Rubocop 1.57.1
|
33
|
+
|
34
|
+
## [0.0.1] - 2023-10-27
|
35
|
+
### Added
|
36
|
+
- Class `NonTerminal` and its spec file
|
37
|
+
|
38
|
+
## [0.0.0] - 2023-10-27
|
39
|
+
- Initial commit
|
40
|
+
|
41
|
+
### Added
|
42
|
+
- Class `GrmSymbol` and its spec file
|
43
|
+
- Class `Terminal` and its spec file
|
data/dendroid.gemspec
CHANGED
@@ -0,0 +1,84 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative 'rule'
|
4
|
+
|
5
|
+
module Dendroid
|
6
|
+
module Syntax
|
7
|
+
# A specialization of the Rule class.
|
8
|
+
# A choice is a rule with multiple rhs
|
9
|
+
class Choice < Rule
|
10
|
+
# @return [Array<Dendroid::Syntax::SymbolSeq>]
|
11
|
+
attr_reader :alternatives
|
12
|
+
|
13
|
+
# Create a Choice instance.
|
14
|
+
# @param lhs [Dendroid::Syntax::NonTerminal] The left-hand side of the rule.
|
15
|
+
# @param alt [Array<Dendroid::Syntax::SymbolSeq>] the alternatives (each as a sequence of symbols).
|
16
|
+
def initialize(lhs, alt)
|
17
|
+
super(lhs)
|
18
|
+
@alternatives = valid_alternatives(alt)
|
19
|
+
end
|
20
|
+
|
21
|
+
# Predicate method to check whether the rule has alternatives
|
22
|
+
# @return [TrueClass]
|
23
|
+
def choice?
|
24
|
+
true
|
25
|
+
end
|
26
|
+
|
27
|
+
# Return the text representation of the choice
|
28
|
+
# @return [String]
|
29
|
+
def to_s
|
30
|
+
"#{head} => #{alternatives.join(' | ')}"
|
31
|
+
end
|
32
|
+
|
33
|
+
# Predicate method to check whether the choice rule body is productive.
|
34
|
+
# It is productive when at least of its alternative is productive.
|
35
|
+
# @return [Boolean]
|
36
|
+
def productive?
|
37
|
+
productive_alts = alternatives.select(&:productive?)
|
38
|
+
return false if productive_alts.empty?
|
39
|
+
|
40
|
+
@productive = Set.new(productive_alts)
|
41
|
+
head.productive = true
|
42
|
+
end
|
43
|
+
|
44
|
+
# Predicate method to check whether the rule has at least one empty alternative.
|
45
|
+
# @return [Boolean]
|
46
|
+
def empty?
|
47
|
+
alternatives.any?(&:empty?)
|
48
|
+
end
|
49
|
+
|
50
|
+
# Returns an array with the symbol sequence of its alternatives
|
51
|
+
# @return [Array<Dendroid::Syntax::SymbolSeq>]
|
52
|
+
def rhs
|
53
|
+
alternatives
|
54
|
+
end
|
55
|
+
|
56
|
+
# Equality operator
|
57
|
+
# Two production rules are equal when their head and alternatives are equal.
|
58
|
+
# @return [Boolean]
|
59
|
+
def ==(other)
|
60
|
+
return true if equal?(other)
|
61
|
+
return false if other.is_a?(Production)
|
62
|
+
|
63
|
+
(head == other.head) && (alternatives == other.alternatives)
|
64
|
+
end
|
65
|
+
|
66
|
+
private
|
67
|
+
|
68
|
+
def valid_alternatives(alt)
|
69
|
+
if alt.size < 2
|
70
|
+
# A choice must have at least two alternatives
|
71
|
+
raise StandardError.new, "The choice for #{lhs} must have at least two alternatives."
|
72
|
+
end
|
73
|
+
|
74
|
+
cyclic = alt.find { |a| a.size == 1 && lhs == a.first }
|
75
|
+
if cyclic
|
76
|
+
# Forbid cyclic rules (e.g. A => A)
|
77
|
+
raise StandardError.new, "Cyclic rule of the kind #{lhs} => #{lhs} is not allowed."
|
78
|
+
end
|
79
|
+
|
80
|
+
alt
|
81
|
+
end
|
82
|
+
end # class
|
83
|
+
end # module
|
84
|
+
end # module
|
@@ -0,0 +1,275 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'set'
|
4
|
+
|
5
|
+
module Dendroid
|
6
|
+
module Syntax
|
7
|
+
# A grammar specifies the syntax of a language.
|
8
|
+
# Formally, a grammar has:
|
9
|
+
# * One start symbol,
|
10
|
+
# * One or more other production rules,
|
11
|
+
# * Each production has a rhs that is a sequence of grammar symbols.
|
12
|
+
# * Grammar symbols are categorized into:
|
13
|
+
# -terminal symbols
|
14
|
+
# -non-terminal symbols
|
15
|
+
class Grammar
|
16
|
+
# The list of grammar symbols in the language.
|
17
|
+
# @return [Array<Dendroid::Syntax::GrmSymbol>] The terminal and non-terminal symbols.
|
18
|
+
attr_reader :symbols
|
19
|
+
|
20
|
+
# The list of production rules for the language.
|
21
|
+
# @return [Array<Dendroid::Syntax::Rule>] Array of rules for the grammar.
|
22
|
+
attr_reader :rules
|
23
|
+
|
24
|
+
# A Hash that maps symbol names to their grammar symbols
|
25
|
+
# @return [Hash{String => Dendroid::Syntax::GrmSymbol}]
|
26
|
+
attr_reader :name2symbol
|
27
|
+
|
28
|
+
# TODO: make nonterminal - rules one-to-one
|
29
|
+
# A Hash that maps symbol names to their grammar symbols
|
30
|
+
# @return [Hash{Dendroid::Syntax::GrmSymbol => Dendroid::Syntax::Rule}]
|
31
|
+
attr_reader :nonterm2productions
|
32
|
+
|
33
|
+
# Constructor.
|
34
|
+
# @param terminals [Array<Dendroid::Syntax::Terminal>]
|
35
|
+
def initialize(terminals)
|
36
|
+
@symbols = []
|
37
|
+
@name2symbol = {}
|
38
|
+
add_terminals(terminals)
|
39
|
+
end
|
40
|
+
|
41
|
+
# Add a rule to the grammar
|
42
|
+
# @param rule [Dendroid::Syntax::Rule]
|
43
|
+
def add_rule(rule)
|
44
|
+
if @rules.nil?
|
45
|
+
@rules = []
|
46
|
+
@nonterm2productions = {}
|
47
|
+
end
|
48
|
+
# TODO: add test for duplicate productions
|
49
|
+
if nonterm2productions[rule.head]&.include? rule
|
50
|
+
raise StandardError, "Production rule '#{production}' appears more than once in the grammar."
|
51
|
+
end
|
52
|
+
add_symbol(rule.head)
|
53
|
+
rule.nonterminals.each { |nonterm| add_symbol(nonterm) }
|
54
|
+
rules << rule
|
55
|
+
nonterm2productions[rule.head] = [] unless nonterm2productions.include? rule.head
|
56
|
+
nonterm2productions[rule.head] << rule
|
57
|
+
end
|
58
|
+
|
59
|
+
# Return the start symbol for the language
|
60
|
+
# @return [Dendroid::Syntax::NonTerminal]
|
61
|
+
def start_symbol
|
62
|
+
rules.first.lhs
|
63
|
+
end
|
64
|
+
|
65
|
+
# A event method to notify the grammar that all grammar rules
|
66
|
+
# have been entered. The grammar, in turn, reacts by validating the
|
67
|
+
# production rules.
|
68
|
+
def complete!
|
69
|
+
validate
|
70
|
+
analyze
|
71
|
+
end
|
72
|
+
|
73
|
+
private
|
74
|
+
|
75
|
+
# rubocop: disable Metrics/AbcSize
|
76
|
+
# rubocop: disable Metrics/BlockNesting
|
77
|
+
# rubocop: disable Metrics/MethodLength
|
78
|
+
# rubocop: disable Metrics/PerceivedComplexity
|
79
|
+
def add_terminals(terminals)
|
80
|
+
terminals.each { |term| add_symbol(term) }
|
81
|
+
end
|
82
|
+
|
83
|
+
def add_symbol(symb)
|
84
|
+
return if name2symbol.include? symb.name
|
85
|
+
|
86
|
+
symbols.push(symb)
|
87
|
+
name2symbol[symb.name] = symb
|
88
|
+
name2symbol[symb.name.to_s] = symb
|
89
|
+
end
|
90
|
+
|
91
|
+
def validate
|
92
|
+
at_least_one_terminal
|
93
|
+
are_terminals_referenced?
|
94
|
+
are_nonterminals_rewritten?
|
95
|
+
are_symbols_productive?
|
96
|
+
are_symbols_reachable?
|
97
|
+
end
|
98
|
+
|
99
|
+
def analyze
|
100
|
+
mark_nullable_symbols
|
101
|
+
end
|
102
|
+
|
103
|
+
# Does the grammar contain at least one terminal symbol?
|
104
|
+
def at_least_one_terminal
|
105
|
+
found = symbols.any?(&:terminal?)
|
106
|
+
|
107
|
+
unless found
|
108
|
+
err_msg = "Grammar doesn't contain any terminal symbol."
|
109
|
+
raise StandardError, err_msg
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
# Does every terminal symbol appear at least once
|
114
|
+
# in a rhs of a production rule?
|
115
|
+
def are_terminals_referenced?
|
116
|
+
all_terminals = Set.new(symbols.select(&:terminal?))
|
117
|
+
terms_in_rhs = rules.reduce(Set.new) do |collected, prd|
|
118
|
+
found = prd.terminals
|
119
|
+
collected.merge(found)
|
120
|
+
end
|
121
|
+
check_ok = all_terminals == terms_in_rhs
|
122
|
+
unless check_ok
|
123
|
+
unused_terms = all_terminals.difference(terms_in_rhs)
|
124
|
+
text = unused_terms.map(&:name).join("', '")
|
125
|
+
err_msg = "Terminal symbols '#{text}' never appear in production rules."
|
126
|
+
raise StandardError, err_msg
|
127
|
+
end
|
128
|
+
|
129
|
+
check_ok
|
130
|
+
end
|
131
|
+
|
132
|
+
def are_nonterminals_rewritten?
|
133
|
+
all_nonterminals = Set.new(symbols.reject(&:terminal?))
|
134
|
+
|
135
|
+
symbs_in_lhs = rules.reduce(Set.new) do |collected, prd|
|
136
|
+
collected.add(prd.head)
|
137
|
+
end
|
138
|
+
check_ok = all_nonterminals == symbs_in_lhs
|
139
|
+
unless check_ok
|
140
|
+
undefined_nterms = all_nonterminals.difference(symbs_in_lhs)
|
141
|
+
text = undefined_nterms.map(&:name).join("', '")
|
142
|
+
err_msg = "Non-terminal symbols '#{text}' never appear in head of any production rule."
|
143
|
+
raise StandardError, err_msg
|
144
|
+
end
|
145
|
+
|
146
|
+
check_ok
|
147
|
+
end
|
148
|
+
|
149
|
+
def are_symbols_reachable?
|
150
|
+
unreachable = unreachable_symbols
|
151
|
+
return true if unreachable.empty?
|
152
|
+
|
153
|
+
text = unreachable.to_a.map(&:name).join("', '")
|
154
|
+
err_msg = "Symbols '#{text}' are unreachable from start symbol."
|
155
|
+
raise StandardError, err_msg
|
156
|
+
end
|
157
|
+
|
158
|
+
def are_symbols_productive?
|
159
|
+
non_productive = mark_non_productive_symbols
|
160
|
+
return true if non_productive.empty?
|
161
|
+
|
162
|
+
text = non_productive.to_a.map(&:name).join("', '")
|
163
|
+
err_msg = "Symbols '#{text}' are non-productive."
|
164
|
+
raise StandardError, err_msg
|
165
|
+
end
|
166
|
+
|
167
|
+
# Are all symbols reachable from start symbol?
|
168
|
+
def unreachable_symbols
|
169
|
+
backlog = [start_symbol]
|
170
|
+
set_reachable = Set.new(backlog.dup)
|
171
|
+
|
172
|
+
begin
|
173
|
+
reachable_sym = backlog.pop
|
174
|
+
prods = nonterm2productions[reachable_sym]
|
175
|
+
prods.each do |prd|
|
176
|
+
# prd.body.members.each do |member|
|
177
|
+
prd.rhs_symbols.each do |member|
|
178
|
+
unless member.terminal? || set_reachable.include?(member)
|
179
|
+
backlog.push(member)
|
180
|
+
end
|
181
|
+
set_reachable.add(member)
|
182
|
+
end
|
183
|
+
end
|
184
|
+
end until backlog.empty?
|
185
|
+
|
186
|
+
all_symbols = Set.new(symbols)
|
187
|
+
unreachable = all_symbols - set_reachable
|
188
|
+
end
|
189
|
+
|
190
|
+
def mark_non_productive_symbols
|
191
|
+
prod_count = rules.size
|
192
|
+
backlog = Set.new(0...prod_count)
|
193
|
+
rules.each_with_index do |prd, i|
|
194
|
+
backlog.delete(i) if prd.productive?
|
195
|
+
end
|
196
|
+
until backlog.empty?
|
197
|
+
size_before = backlog.size
|
198
|
+
to_remove = []
|
199
|
+
backlog.each do |i|
|
200
|
+
prd = rules[i]
|
201
|
+
to_remove << i if prd.productive?
|
202
|
+
end
|
203
|
+
break if to_remove.empty?
|
204
|
+
|
205
|
+
backlog.subtract(to_remove)
|
206
|
+
end
|
207
|
+
|
208
|
+
backlog.each { |i| rules[i].non_productive }
|
209
|
+
non_productive = symbols.reject(&:productive?)
|
210
|
+
non_productive.each { |symb| symb.productive = false }
|
211
|
+
non_productive
|
212
|
+
end
|
213
|
+
|
214
|
+
def mark_nullable_symbols
|
215
|
+
nullable_found = false
|
216
|
+
sym2seqs = {}
|
217
|
+
|
218
|
+
nonterm2productions.each_pair do |sym, prods|
|
219
|
+
if prods.any?(&:empty?)
|
220
|
+
sym.nullable = nullable_found = true
|
221
|
+
else
|
222
|
+
sym2seqs[sym] = prods.map(&:rhs).flatten
|
223
|
+
end
|
224
|
+
end
|
225
|
+
|
226
|
+
if nullable_found
|
227
|
+
backlog = {} # { SymbolSequence => [Integer, Symbol] }
|
228
|
+
sym2seqs.each do |sym, seqs|
|
229
|
+
seqs.each { |sq| backlog[sq] = [0, sym] }
|
230
|
+
end
|
231
|
+
|
232
|
+
begin
|
233
|
+
seqs_done = []
|
234
|
+
backlog.each_pair do |sq, (elem_index, lhs)|
|
235
|
+
member = sq.members[elem_index]
|
236
|
+
if member.terminal?
|
237
|
+
seqs_done << sq # stop with this sequence: it is non-nullable
|
238
|
+
backlog[sq] = [-1, lhs]
|
239
|
+
elsif member.nullable?
|
240
|
+
if elem_index == sq.size - 1
|
241
|
+
seqs_done << sq # end of sequence reached...
|
242
|
+
backlog[sq] = [-1, lhs]
|
243
|
+
lhs.nullable = true
|
244
|
+
else
|
245
|
+
backlog[sq] = [elem_index + 1, lhs]
|
246
|
+
end
|
247
|
+
end
|
248
|
+
end
|
249
|
+
seqs_done.each do |sq|
|
250
|
+
if backlog.include? sq
|
251
|
+
(_, lhs) = backlog[sq]
|
252
|
+
if lhs.nullable?
|
253
|
+
to_drop = sym2seqs[lhs]
|
254
|
+
to_drop.each { |seq| backlog.delete(seq) }
|
255
|
+
else
|
256
|
+
backlog.delete(sq)
|
257
|
+
end
|
258
|
+
end
|
259
|
+
end
|
260
|
+
end until backlog.empty? || seqs_done.empty?
|
261
|
+
end
|
262
|
+
|
263
|
+
symbols.each do |sym|
|
264
|
+
next if sym.terminal?
|
265
|
+
|
266
|
+
sym.nullable = false if sym.nullable.nil?
|
267
|
+
end
|
268
|
+
end
|
269
|
+
# rubocop: enable Metrics/AbcSize
|
270
|
+
# rubocop: enable Metrics/BlockNesting
|
271
|
+
# rubocop: enable Metrics/MethodLength
|
272
|
+
# rubocop: enable Metrics/PerceivedComplexity
|
273
|
+
end # class
|
274
|
+
end # module
|
275
|
+
end # module
|
@@ -5,8 +5,7 @@ require_relative 'rule'
|
|
5
5
|
module Dendroid
|
6
6
|
module Syntax
|
7
7
|
# A specialization of the Rule class.
|
8
|
-
# A production is
|
9
|
-
# at its left-hand side (lhs).
|
8
|
+
# A production is a rule with a single rhs
|
10
9
|
class Production < Rule
|
11
10
|
# @return [Dendroid::Syntax::SymbolSeq]
|
12
11
|
attr_reader :body
|
@@ -0,0 +1,54 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative '..\..\spec_helper'
|
4
|
+
require_relative '..\..\..\lib\dendroid\syntax\terminal'
|
5
|
+
require_relative '..\..\..\lib\dendroid\syntax\non_terminal'
|
6
|
+
require_relative '..\..\..\lib\dendroid\syntax\symbol_seq'
|
7
|
+
require_relative '..\..\..\lib\dendroid\syntax\choice'
|
8
|
+
|
9
|
+
describe Dendroid::Syntax::Choice do
|
10
|
+
let(:num_symb) { Dendroid::Syntax::Terminal.new('NUMBER') }
|
11
|
+
let(:plus_symb) { Dendroid::Syntax::Terminal.new('PLUS') }
|
12
|
+
let(:minus_symb) { Dendroid::Syntax::Terminal.new('MINUS') }
|
13
|
+
let(:expr_symb) { Dendroid::Syntax::NonTerminal.new('expression') }
|
14
|
+
let(:foo_symb) { Dendroid::Syntax::NonTerminal.new('foo') }
|
15
|
+
let(:alt1) { Dendroid::Syntax::SymbolSeq.new([num_symb, plus_symb, num_symb]) }
|
16
|
+
let(:alt2) { Dendroid::Syntax::SymbolSeq.new([num_symb, minus_symb, num_symb]) }
|
17
|
+
let(:empty_body) { Dendroid::Syntax::SymbolSeq.new([]) }
|
18
|
+
|
19
|
+
# Implements a choice rule:
|
20
|
+
# expression => NUMBER PLUS NUMBER
|
21
|
+
# | NUMBER MINUS NUMBER
|
22
|
+
# | epsilon
|
23
|
+
subject { described_class.new(expr_symb, [alt1, alt2, empty_body]) }
|
24
|
+
|
25
|
+
context 'Initialization:' do
|
26
|
+
it 'is initialized with a head and alternatives' do
|
27
|
+
expect { described_class.new(expr_symb, [alt1, alt2, empty_body]) }.not_to raise_error
|
28
|
+
end
|
29
|
+
|
30
|
+
it 'knows its alternatives' do
|
31
|
+
expect(subject.alternatives).to eq([alt1, alt2, empty_body])
|
32
|
+
end
|
33
|
+
|
34
|
+
it 'renders a String representation of itself' do
|
35
|
+
expectation = 'expression => NUMBER PLUS NUMBER | NUMBER MINUS NUMBER | '
|
36
|
+
expect(subject.to_s).to eq(expectation)
|
37
|
+
end
|
38
|
+
end # context
|
39
|
+
|
40
|
+
context 'Provided services:' do
|
41
|
+
it 'knows its terminal members' do
|
42
|
+
expect(subject.terminals).to eq([num_symb, plus_symb, minus_symb])
|
43
|
+
end
|
44
|
+
|
45
|
+
it 'knows its non-terminal members' do
|
46
|
+
expect(subject.nonterminals).to be_empty
|
47
|
+
|
48
|
+
my_alt1 = Dendroid::Syntax::SymbolSeq.new([expr_symb, plus_symb, expr_symb])
|
49
|
+
my_alt2 = Dendroid::Syntax::SymbolSeq.new([foo_symb, minus_symb, expr_symb])
|
50
|
+
instance = described_class.new(foo_symb, [my_alt1, my_alt2])
|
51
|
+
expect(instance.nonterminals).to eq([expr_symb, foo_symb])
|
52
|
+
end
|
53
|
+
end # context
|
54
|
+
end # describe
|
@@ -0,0 +1,205 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative '..\..\spec_helper'
|
4
|
+
require_relative '..\..\..\lib\dendroid\syntax\terminal'
|
5
|
+
require_relative '..\..\..\lib\dendroid\syntax\non_terminal'
|
6
|
+
require_relative '..\..\..\lib\dendroid\syntax\symbol_seq'
|
7
|
+
require_relative '..\..\..\lib\dendroid\syntax\production'
|
8
|
+
require_relative '..\..\..\lib\dendroid\syntax\choice'
|
9
|
+
require_relative '..\..\..\lib\dendroid\syntax\grammar'
|
10
|
+
|
11
|
+
describe Dendroid::Syntax::Grammar do
|
12
|
+
let(:int_symb) { build_terminal('INTEGER') }
|
13
|
+
let(:plus_symb) { build_terminal('PLUS') }
|
14
|
+
let(:star_symb) { build_terminal('STAR') }
|
15
|
+
let(:p_symb) { build_nonterminal('p') }
|
16
|
+
let(:s_symb) { build_nonterminal('s') }
|
17
|
+
let(:m_symb) { build_nonterminal('m') }
|
18
|
+
let(:t_symb) { build_nonterminal('t') }
|
19
|
+
let(:all_terminals) { [int_symb, plus_symb, star_symb] }
|
20
|
+
|
21
|
+
subject { described_class.new(all_terminals) }
|
22
|
+
|
23
|
+
def build_terminal(name)
|
24
|
+
Dendroid::Syntax::Terminal.new(name)
|
25
|
+
end
|
26
|
+
|
27
|
+
def build_nonterminal(name)
|
28
|
+
Dendroid::Syntax::NonTerminal.new(name)
|
29
|
+
end
|
30
|
+
|
31
|
+
def build_symbol_seq(symbols)
|
32
|
+
Dendroid::Syntax::SymbolSeq.new(symbols)
|
33
|
+
end
|
34
|
+
|
35
|
+
def build_production(lhs, symbols)
|
36
|
+
Dendroid::Syntax::Production.new(lhs, build_symbol_seq(symbols))
|
37
|
+
end
|
38
|
+
|
39
|
+
def build_choice(lhs, sequences)
|
40
|
+
Dendroid::Syntax::Choice.new(lhs, sequences.map { |arr| build_symbol_seq(arr) })
|
41
|
+
end
|
42
|
+
|
43
|
+
# rubocop: disable Metrics/AbcSize
|
44
|
+
def build_all_rules
|
45
|
+
rule1 = build_production(p_symb, [s_symb]) # p => s
|
46
|
+
rule2 = build_choice(s_symb, [[s_symb, plus_symb, m_symb], [m_symb]]) # s => s + m | m
|
47
|
+
rule3 = build_choice(m_symb, [[m_symb, star_symb, t_symb], [t_symb]]) # m => m * t
|
48
|
+
rule4 = build_production(t_symb, [int_symb]) # t => INTEGER
|
49
|
+
[rule1, rule2, rule3, rule4]
|
50
|
+
end
|
51
|
+
# rubocop: enable Metrics/AbcSize
|
52
|
+
|
53
|
+
context 'Initialization:' do
|
54
|
+
it 'is initialized with an array of terminal symbols' do
|
55
|
+
expect { described_class.new(all_terminals) }.not_to raise_error
|
56
|
+
end
|
57
|
+
|
58
|
+
it 'knows its terminal symbols' do
|
59
|
+
expect(subject.symbols).to eq(all_terminals)
|
60
|
+
end
|
61
|
+
|
62
|
+
it 'ignores about productions after initialization' do
|
63
|
+
expect(subject.rules).to be_nil
|
64
|
+
end
|
65
|
+
|
66
|
+
it 'maps a terminal name to one GrmSymbol object' do
|
67
|
+
expect(subject.name2symbol.values.uniq.size).to eq(all_terminals.size)
|
68
|
+
expect(subject.name2symbol.values.size).to eq(2 * all_terminals.size)
|
69
|
+
expect(subject.name2symbol[:PLUS]).to eq(plus_symb)
|
70
|
+
expect(subject.name2symbol['PLUS']).to eq(plus_symb)
|
71
|
+
end
|
72
|
+
end # context
|
73
|
+
|
74
|
+
context 'Adding productions:' do
|
75
|
+
it 'allows the addition of one production rule' do
|
76
|
+
rule = build_production(p_symb, [s_symb])
|
77
|
+
expect { subject.add_rule(rule) }.not_to raise_error
|
78
|
+
expect(subject.rules.size).to eq(1)
|
79
|
+
expect(subject.rules.first).to eq(rule)
|
80
|
+
end
|
81
|
+
|
82
|
+
it 'allows the addition of multiple production rules' do
|
83
|
+
rules = build_all_rules
|
84
|
+
rules.each { |rl| subject.add_rule(rl) }
|
85
|
+
expect(subject.rules.size).to eq(4)
|
86
|
+
expect(subject.rules.first).to eq(rules.first)
|
87
|
+
expect(subject.rules.last).to eq(rules.last)
|
88
|
+
end
|
89
|
+
|
90
|
+
it 'updates the set of symbols when adding production rules' do
|
91
|
+
rules = build_all_rules
|
92
|
+
rules.each { |rl| subject.add_rule(rl) }
|
93
|
+
[p_symb, s_symb, m_symb, t_symb].each do |symb|
|
94
|
+
expect(subject.symbols.include?(symb)).to be_truthy
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
it 'maps name of every non-terminal to its related GrmSymbol' do
|
99
|
+
rules = build_all_rules
|
100
|
+
rules.each { |rl| subject.add_rule(rl) }
|
101
|
+
[[:p, p_symb],
|
102
|
+
['p', p_symb],
|
103
|
+
[:s, s_symb],
|
104
|
+
['s', s_symb],
|
105
|
+
[:m, m_symb],
|
106
|
+
['m', m_symb],
|
107
|
+
[:t, t_symb],
|
108
|
+
[:t, t_symb]].each do |(name, symb)|
|
109
|
+
expect(subject.name2symbol[name]).to eq(symb)
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
it 'maps every non-terminal to its defining productions' do
|
114
|
+
rules = build_all_rules
|
115
|
+
rules.each { |rl| subject.add_rule(rl) }
|
116
|
+
%i[p s m t].each do |symb_name|
|
117
|
+
symb = subject.name2symbol[symb_name]
|
118
|
+
expected_prods = subject.rules.select { |prd| prd.head == symb }
|
119
|
+
related_prods = subject.nonterm2productions[symb]
|
120
|
+
expect(related_prods).to eq(expected_prods)
|
121
|
+
end
|
122
|
+
end
|
123
|
+
end # context
|
124
|
+
|
125
|
+
context 'Grammar completion:' do
|
126
|
+
it 'detects and marks nullable symbols (I)' do
|
127
|
+
# Case: grammar without nullable symbols
|
128
|
+
rules = build_all_rules
|
129
|
+
rules.each { |rl| subject.add_rule(rl) }
|
130
|
+
subject.complete!
|
131
|
+
expect(subject.symbols.none?(&:nullable?)).to be_truthy
|
132
|
+
end
|
133
|
+
|
134
|
+
it 'detects and marks nullable symbols (II)' do
|
135
|
+
# Case: grammar with only nullable symbols
|
136
|
+
# Grammar inspired for paper "Practical Earley Parser"
|
137
|
+
terminal_a = build_terminal('a')
|
138
|
+
nterm_s_prime = build_nonterminal("S'")
|
139
|
+
nterm_s = build_nonterminal('S')
|
140
|
+
nterm_a = build_nonterminal('A')
|
141
|
+
nterm_e = build_nonterminal('E')
|
142
|
+
|
143
|
+
instance = described_class.new([terminal_a])
|
144
|
+
instance.add_rule(build_production(nterm_s_prime, [nterm_s]))
|
145
|
+
instance.add_rule(build_production(nterm_s, [nterm_a, nterm_a, nterm_a, nterm_a]))
|
146
|
+
instance.add_rule(build_choice(nterm_a, [[terminal_a], [nterm_e]]))
|
147
|
+
instance.add_rule(build_production(nterm_e, []))
|
148
|
+
|
149
|
+
instance.complete!
|
150
|
+
all_nonterminals = subject.symbols.reject(&:terminal?)
|
151
|
+
expect(all_nonterminals.all?(&:nullable?)).to be_truthy
|
152
|
+
end
|
153
|
+
|
154
|
+
it 'detects unreachable symbols' do
|
155
|
+
# Case: grammar without unreachable symbols
|
156
|
+
rules = build_all_rules
|
157
|
+
rules.each { |rl| subject.add_rule(rl) }
|
158
|
+
expect(subject.send(:unreachable_symbols)).to be_empty
|
159
|
+
|
160
|
+
# Let add's unreachable symbols
|
161
|
+
zed_symb = build_nonterminal('Z')
|
162
|
+
question_symb = build_nonterminal('?')
|
163
|
+
bad_rule = build_production(zed_symb, [zed_symb, question_symb, int_symb]) # Z => Z ? INTEGER
|
164
|
+
subject.add_rule(bad_rule)
|
165
|
+
unreachable = subject.send(:unreachable_symbols)
|
166
|
+
expect(unreachable).not_to be_empty
|
167
|
+
expect(unreachable).to eq(Set.new([zed_symb, question_symb]))
|
168
|
+
end
|
169
|
+
|
170
|
+
it 'detects non-productive symbols' do
|
171
|
+
# Case: grammar without non-productive symbols
|
172
|
+
rules = build_all_rules
|
173
|
+
rules.each { |rl| subject.add_rule(rl) }
|
174
|
+
expect(subject.send(:mark_non_productive_symbols)).to be_empty
|
175
|
+
expect(t_symb).to be_productive
|
176
|
+
expect(p_symb).to be_productive
|
177
|
+
|
178
|
+
# Grammar with non-productive symbols
|
179
|
+
term_a = build_terminal('a')
|
180
|
+
term_b = build_terminal('b')
|
181
|
+
term_c = build_terminal('c')
|
182
|
+
term_d = build_terminal('d')
|
183
|
+
term_e = build_terminal('e')
|
184
|
+
term_f = build_terminal('f')
|
185
|
+
nterm_A = build_nonterminal('A')
|
186
|
+
nterm_B = build_nonterminal('B')
|
187
|
+
nterm_C = build_nonterminal('C')
|
188
|
+
nterm_D = build_nonterminal('D')
|
189
|
+
nterm_E = build_nonterminal('E')
|
190
|
+
nterm_F = build_nonterminal('F')
|
191
|
+
nterm_S = build_nonterminal('S')
|
192
|
+
instance = described_class.new([term_a, term_b, term_c, term_d, term_e, term_f])
|
193
|
+
instance.add_rule(build_choice(nterm_S, [[nterm_A, nterm_B], [nterm_D, nterm_E]]))
|
194
|
+
instance.add_rule(build_production(nterm_A, [term_a]))
|
195
|
+
instance.add_rule(build_production(nterm_B, [term_b, nterm_C]))
|
196
|
+
instance.add_rule(build_production(nterm_C, [term_c]))
|
197
|
+
instance.add_rule(build_production(nterm_D, [term_d, nterm_F]))
|
198
|
+
instance.add_rule(build_production(nterm_E, [term_e]))
|
199
|
+
instance.add_rule(build_production(nterm_F, [term_f, nterm_D]))
|
200
|
+
nonproductive = instance.send(:mark_non_productive_symbols)
|
201
|
+
expect(nonproductive).not_to be_empty
|
202
|
+
expect(nonproductive).to eq([nterm_D, nterm_F])
|
203
|
+
end
|
204
|
+
end # context
|
205
|
+
end # describe
|
data/version.txt
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.0.
|
1
|
+
0.0.6
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: dendroid
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dimitri Geshef
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-10-
|
11
|
+
date: 2023-10-30 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: WIP. A Ruby implementation of a Earley parser
|
14
14
|
email: famished.tiger@yahoo.com
|
@@ -17,18 +17,23 @@ extensions: []
|
|
17
17
|
extra_rdoc_files: []
|
18
18
|
files:
|
19
19
|
- ".rubocop.yml"
|
20
|
+
- CHANGELOG.md
|
20
21
|
- LICENSE
|
21
22
|
- README.md
|
22
23
|
- Rakefile
|
23
24
|
- bin/dendroid
|
24
25
|
- dendroid.gemspec
|
25
|
-
- lib/dendroid
|
26
|
+
- lib/dendroid.rb
|
27
|
+
- lib/dendroid/syntax/choice.rb
|
28
|
+
- lib/dendroid/syntax/grammar.rb
|
26
29
|
- lib/dendroid/syntax/grm_symbol.rb
|
27
30
|
- lib/dendroid/syntax/non_terminal.rb
|
28
31
|
- lib/dendroid/syntax/production.rb
|
29
32
|
- lib/dendroid/syntax/rule.rb
|
30
33
|
- lib/dendroid/syntax/symbol_seq.rb
|
31
34
|
- lib/dendroid/syntax/terminal.rb
|
35
|
+
- spec/dendroid/syntax/choice_spec.rb
|
36
|
+
- spec/dendroid/syntax/grammar_spec.rb
|
32
37
|
- spec/dendroid/syntax/grm_symbol_spec.rb
|
33
38
|
- spec/dendroid/syntax/non_terminal_spec.rb
|
34
39
|
- spec/dendroid/syntax/production_spec.rb
|
File without changes
|