dendroid 0.0.5 → 0.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +13 -1
- data/CHANGELOG.md +8 -0
- data/lib/dendroid/grm_dsl/base_grm_builder.rb +169 -0
- data/lib/dendroid/syntax/grammar.rb +275 -0
- data/lib/dendroid/syntax/grm_symbol.rb +1 -0
- data/spec/dendroid/grm_dsl/base_grm_builder_spec.rb +106 -0
- data/spec/dendroid/syntax/grammar_spec.rb +203 -0
- data/version.txt +1 -1
- metadata +7 -3
- /data/lib/{dendroid/dendroid.rb → dendroid.rb} +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: '06667077966c902b083c8e73debebc0808bbc0952243712d19a34f26f9d0c1ba'
|
4
|
+
data.tar.gz: 56e29eb75f509f37212d9bc384c139b6e903b730308fa320e84dc4f352c93f2e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e135fc4c2ce34cf54d226d6ed82e188174d99220645abb127877b8d7a97ea77bbce77f8b8a731c390b362e592f961f758078c1af8618c339e41b6564052f1bf3
|
7
|
+
data.tar.gz: 8b30951295517fbf46fa2a1560b4e27dbae13b101cff1841142b641711b4be6696cb50e16333e03dbcb472fa4803380a8e706c85ddab69837db148b69f8ffae4
|
data/.rubocop.yml
CHANGED
@@ -2,9 +2,21 @@ Layout/EndOfLine:
|
|
2
2
|
Enabled: true
|
3
3
|
EnforcedStyle: lf
|
4
4
|
|
5
|
+
Metrics/AbcSize:
|
6
|
+
Enabled: true
|
7
|
+
Max: 25
|
8
|
+
|
5
9
|
Metrics/BlockLength:
|
6
10
|
Enabled: true
|
7
|
-
Max:
|
11
|
+
Max: 75
|
12
|
+
|
13
|
+
Metrics/ClassLength:
|
14
|
+
Enabled: true
|
15
|
+
Max: 200
|
16
|
+
|
17
|
+
Metrics/MethodLength:
|
18
|
+
Enabled: true
|
19
|
+
Max: 20
|
8
20
|
|
9
21
|
Naming/MethodParameterName:
|
10
22
|
Enabled: false
|
data/CHANGELOG.md
CHANGED
@@ -2,6 +2,14 @@
|
|
2
2
|
|
3
3
|
## [Unreleased]
|
4
4
|
|
5
|
+
## [0.0.7] - 2023-10-30
|
6
|
+
### Added
|
7
|
+
- Class `BaseGrmBuilder` and its spec file
|
8
|
+
|
9
|
+
## [0.0.6] - 2023-10-30
|
10
|
+
### Added
|
11
|
+
- Class `Grammar` and its spec file
|
12
|
+
|
5
13
|
## [0.0.5] - 2023-10-28
|
6
14
|
### Added
|
7
15
|
- Class `Choice` and its spec file
|
@@ -0,0 +1,169 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative '..\syntax\terminal'
|
4
|
+
require_relative '..\syntax\non_terminal'
|
5
|
+
require_relative '..\syntax\symbol_seq'
|
6
|
+
require_relative '..\syntax\production'
|
7
|
+
require_relative '..\syntax\choice'
|
8
|
+
require_relative '..\syntax\grammar'
|
9
|
+
|
10
|
+
module Dendroid
|
11
|
+
# This module contains classes that define Domain-Specific Language specialized
|
12
|
+
# in grammar definition.
|
13
|
+
module GrmDSL
|
14
|
+
# Builder GoF pattern: Builder builds a complex object.
|
15
|
+
# here the builder creates a grammar from simpler objects
|
16
|
+
# (symbols and production rules)
|
17
|
+
# and using a step by step approach.
|
18
|
+
class BaseGrmBuilder
|
19
|
+
# @return [Symbol] one of: :declaring, :building, :complete
|
20
|
+
attr_reader :state
|
21
|
+
|
22
|
+
# @return [Hash{String, Dendroid::Syntax::GrmSymbol}] The mapping of grammar symbol names
|
23
|
+
# to the matching grammar symbol object.
|
24
|
+
attr_reader :symbols
|
25
|
+
|
26
|
+
# @return [Array<Dendroid::Syntax::Rule>] The list of rules of the grammar
|
27
|
+
attr_reader :rules
|
28
|
+
|
29
|
+
# Creates a new grammar builder object.
|
30
|
+
# @param aBlock [Proc] code block used to build the grammar.
|
31
|
+
# @example Building a tiny English grammar
|
32
|
+
# builder = Rley::Syntax::GrammarBuilder.new do
|
33
|
+
# declare_terminals('n', 'v', 'adj', 'det')
|
34
|
+
# rule 'S' => 'NP VP'
|
35
|
+
# rule 'VP' => 'v NP'
|
36
|
+
# rule 'NP' => ['det n', 'adj NP']
|
37
|
+
# end
|
38
|
+
# # Now with `builder`, let's create the grammar
|
39
|
+
# tiny_eng = builder.grammar
|
40
|
+
def initialize(&aBlock)
|
41
|
+
@symbols = {}
|
42
|
+
@rules = []
|
43
|
+
@state = :declaring
|
44
|
+
|
45
|
+
if block_given?
|
46
|
+
instance_exec(&aBlock)
|
47
|
+
grammar_complete!
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
# Add the given terminal symbols to the grammar of the language
|
52
|
+
# @param terminalSymbols [String, Terminal] 1..* terminal symbols.
|
53
|
+
# @return [void]
|
54
|
+
def declare_terminals(*terminalSymbols)
|
55
|
+
err_msg = "Terminal symbols may only be declared in state :declaring, current state is: #{state}"
|
56
|
+
raise StandardError, err_msg unless state == :declaring
|
57
|
+
|
58
|
+
new_symbs = build_symbols(Dendroid::Syntax::Terminal, terminalSymbols)
|
59
|
+
symbols.merge!(new_symbs)
|
60
|
+
end
|
61
|
+
|
62
|
+
# Add a production rule in the grammar given one
|
63
|
+
# key-value pair of the form: String => String.
|
64
|
+
# Where the key is the name of the non-terminal appearing in the
|
65
|
+
# left side of the rule.
|
66
|
+
# When the value is a String, it is a sequence of grammar symbol names separated by space.
|
67
|
+
# When the value is an array of String, the elements represent an alternative rhs
|
68
|
+
# The rule is created and inserted in the grammar.
|
69
|
+
# @example
|
70
|
+
# builder.rule('sentence' => 'noun_phrase verb_phrase')
|
71
|
+
# builder.rule('noun_phrase' => ['noun', 'adj noun'])
|
72
|
+
# @param productionRuleRepr [Hash{String, String|Array<String>}]
|
73
|
+
# A Hash-based representation of a production.
|
74
|
+
# @return [Dendroid::Syntax::Rule] The created Production or Choice instance
|
75
|
+
def rule(productionRuleRepr)
|
76
|
+
raise Exception, "Cannot add a production rule in state :complete" if state == :complete
|
77
|
+
@state = :building
|
78
|
+
|
79
|
+
if productionRuleRepr.is_a?(Hash)
|
80
|
+
head_name = productionRuleRepr.keys.first
|
81
|
+
if symbols.include? head_name
|
82
|
+
err_msg = "Terminal symbol '#{head_name}' may not be on left-side of a rule."
|
83
|
+
raise StandardError, err_msg if symbols[head_name].is_a?(Dendroid::Syntax::Terminal)
|
84
|
+
else
|
85
|
+
symbols.merge!(build_symbols(Dendroid::Syntax::NonTerminal, [head_name]))
|
86
|
+
end
|
87
|
+
lhs = symbols[head_name]
|
88
|
+
raw_rhs = productionRuleRepr.values.first
|
89
|
+
|
90
|
+
if raw_rhs.is_a? String
|
91
|
+
new_prod = Dendroid::Syntax::Production.new(lhs, build_symbol_seq(raw_rhs))
|
92
|
+
else
|
93
|
+
rhs = raw_rhs.map { |raw| build_symbol_seq(raw) }
|
94
|
+
new_prod = Dendroid::Syntax::Choice.new(lhs, rhs)
|
95
|
+
end
|
96
|
+
rules << new_prod
|
97
|
+
new_prod
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
# A method used to notify the builder that the grammar is complete
|
102
|
+
# (i.e. all rules were entered).
|
103
|
+
def grammar_complete!
|
104
|
+
@state = :complete
|
105
|
+
end
|
106
|
+
|
107
|
+
# Generate the grammar according to the specifications.
|
108
|
+
# @return [Dendroid::Syntax::Grammar]
|
109
|
+
def grammar
|
110
|
+
terminals = symbols.values.select(&:terminal?)
|
111
|
+
grm = Dendroid::Syntax::Grammar.new(terminals)
|
112
|
+
rules.each { |prod| grm.add_rule(prod) }
|
113
|
+
grm.complete!
|
114
|
+
grm
|
115
|
+
end
|
116
|
+
|
117
|
+
private
|
118
|
+
|
119
|
+
def build_symbol_seq(raw_symbols)
|
120
|
+
symb_array = []
|
121
|
+
raw_stripped = raw_symbols.strip
|
122
|
+
return Dendroid::Syntax::SymbolSeq.new([]) if raw_stripped.empty?
|
123
|
+
|
124
|
+
symbol_names = raw_stripped.split(/(?: |\t)+/)
|
125
|
+
symbol_names.each do |symb_name|
|
126
|
+
if symbols.include? symb_name
|
127
|
+
symb_array << symbols[symb_name]
|
128
|
+
else
|
129
|
+
symbols.merge!(build_symbols(Dendroid::Syntax::NonTerminal, [symb_name]))
|
130
|
+
symb_array << symbols[symb_name]
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
134
|
+
Dendroid::Syntax::SymbolSeq.new(symb_array)
|
135
|
+
end
|
136
|
+
|
137
|
+
# Add the given grammar symbols.
|
138
|
+
# @param aClass [Class] The class of grammar symbols to instantiate.
|
139
|
+
# @param theSymbols [Array] array of elements are treated as follows:
|
140
|
+
# if the element is already a grammar symbol, then it added as is,
|
141
|
+
# otherwise it is considered as the name of a grammar symbol
|
142
|
+
# of the specified class to build.
|
143
|
+
def build_symbols(aClass, theSymbols)
|
144
|
+
symbs = {}
|
145
|
+
theSymbols.each do |s|
|
146
|
+
new_symbol = build_symbol(aClass, s)
|
147
|
+
symbs[new_symbol.name] = new_symbol
|
148
|
+
symbs[s] = new_symbol
|
149
|
+
end
|
150
|
+
|
151
|
+
symbs
|
152
|
+
end
|
153
|
+
|
154
|
+
# If the argument is already a grammar symbol object then it is
|
155
|
+
# returned as is. Otherwise, the argument is treated as a name
|
156
|
+
# for a new instance of the given class.
|
157
|
+
# @param aClass [Class] The class of grammar symbols to instantiate
|
158
|
+
# @param aSymbolArg [GrmSymbol-like or String]
|
159
|
+
# @return [Array] list of grammar symbols
|
160
|
+
def build_symbol(aClass, aSymbolArg)
|
161
|
+
if aSymbolArg.is_a?(Dendroid::Syntax::GrmSymbol)
|
162
|
+
aSymbolArg
|
163
|
+
else
|
164
|
+
aClass.new(aSymbolArg)
|
165
|
+
end
|
166
|
+
end
|
167
|
+
end # class
|
168
|
+
end # module
|
169
|
+
end # module
|
@@ -0,0 +1,275 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'set'
|
4
|
+
|
5
|
+
module Dendroid
|
6
|
+
module Syntax
|
7
|
+
# A grammar specifies the syntax of a language.
|
8
|
+
# Formally, a grammar has:
|
9
|
+
# * One start symbol,
|
10
|
+
# * One or more other production rules,
|
11
|
+
# * Each production has a rhs that is a sequence of grammar symbols.
|
12
|
+
# * Grammar symbols are categorized into:
|
13
|
+
# -terminal symbols
|
14
|
+
# -non-terminal symbols
|
15
|
+
class Grammar
|
16
|
+
# The list of grammar symbols in the language.
|
17
|
+
# @return [Array<Dendroid::Syntax::GrmSymbol>] The terminal and non-terminal symbols.
|
18
|
+
attr_reader :symbols
|
19
|
+
|
20
|
+
# The list of production rules for the language.
|
21
|
+
# @return [Array<Dendroid::Syntax::Rule>] Array of rules for the grammar.
|
22
|
+
attr_reader :rules
|
23
|
+
|
24
|
+
# A Hash that maps symbol names to their grammar symbols
|
25
|
+
# @return [Hash{String => Dendroid::Syntax::GrmSymbol}]
|
26
|
+
attr_reader :name2symbol
|
27
|
+
|
28
|
+
# TODO: make nonterminal - rules one-to-one
|
29
|
+
# A Hash that maps symbol names to their grammar symbols
|
30
|
+
# @return [Hash{Dendroid::Syntax::GrmSymbol => Dendroid::Syntax::Rule}]
|
31
|
+
attr_reader :nonterm2productions
|
32
|
+
|
33
|
+
# Constructor.
|
34
|
+
# @param terminals [Array<Dendroid::Syntax::Terminal>]
|
35
|
+
def initialize(terminals)
|
36
|
+
@symbols = []
|
37
|
+
@name2symbol = {}
|
38
|
+
add_terminals(terminals)
|
39
|
+
end
|
40
|
+
|
41
|
+
# Add a rule to the grammar
|
42
|
+
# @param rule [Dendroid::Syntax::Rule]
|
43
|
+
def add_rule(rule)
|
44
|
+
if @rules.nil?
|
45
|
+
@rules = []
|
46
|
+
@nonterm2productions = {}
|
47
|
+
end
|
48
|
+
# TODO: add test for duplicate productions
|
49
|
+
if nonterm2productions[rule.head]&.include? rule
|
50
|
+
raise StandardError, "Production rule '#{production}' appears more than once in the grammar."
|
51
|
+
end
|
52
|
+
add_symbol(rule.head)
|
53
|
+
rule.nonterminals.each { |nonterm| add_symbol(nonterm) }
|
54
|
+
rules << rule
|
55
|
+
nonterm2productions[rule.head] = [] unless nonterm2productions.include? rule.head
|
56
|
+
nonterm2productions[rule.head] << rule
|
57
|
+
end
|
58
|
+
|
59
|
+
# Return the start symbol for the language
|
60
|
+
# @return [Dendroid::Syntax::NonTerminal]
|
61
|
+
def start_symbol
|
62
|
+
rules.first.lhs
|
63
|
+
end
|
64
|
+
|
65
|
+
# A event method to notify the grammar that all grammar rules
|
66
|
+
# have been entered. The grammar, in turn, reacts by validating the
|
67
|
+
# production rules.
|
68
|
+
def complete!
|
69
|
+
validate
|
70
|
+
analyze
|
71
|
+
end
|
72
|
+
|
73
|
+
private
|
74
|
+
|
75
|
+
# rubocop: disable Metrics/AbcSize
|
76
|
+
# rubocop: disable Metrics/BlockNesting
|
77
|
+
# rubocop: disable Metrics/MethodLength
|
78
|
+
# rubocop: disable Metrics/PerceivedComplexity
|
79
|
+
def add_terminals(terminals)
|
80
|
+
terminals.each { |term| add_symbol(term) }
|
81
|
+
end
|
82
|
+
|
83
|
+
def add_symbol(symb)
|
84
|
+
return if name2symbol.include? symb.name
|
85
|
+
|
86
|
+
symbols.push(symb)
|
87
|
+
name2symbol[symb.name] = symb
|
88
|
+
name2symbol[symb.name.to_s] = symb
|
89
|
+
end
|
90
|
+
|
91
|
+
def validate
|
92
|
+
at_least_one_terminal
|
93
|
+
are_terminals_referenced?
|
94
|
+
are_nonterminals_rewritten?
|
95
|
+
are_symbols_productive?
|
96
|
+
are_symbols_reachable?
|
97
|
+
end
|
98
|
+
|
99
|
+
def analyze
|
100
|
+
mark_nullable_symbols
|
101
|
+
end
|
102
|
+
|
103
|
+
# Does the grammar contain at least one terminal symbol?
|
104
|
+
def at_least_one_terminal
|
105
|
+
found = symbols.any?(&:terminal?)
|
106
|
+
|
107
|
+
unless found
|
108
|
+
err_msg = "Grammar doesn't contain any terminal symbol."
|
109
|
+
raise StandardError, err_msg
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
# Does every terminal symbol appear at least once
|
114
|
+
# in a rhs of a production rule?
|
115
|
+
def are_terminals_referenced?
|
116
|
+
all_terminals = Set.new(symbols.select(&:terminal?))
|
117
|
+
terms_in_rhs = rules.reduce(Set.new) do |collected, prd|
|
118
|
+
found = prd.terminals
|
119
|
+
collected.merge(found)
|
120
|
+
end
|
121
|
+
check_ok = all_terminals == terms_in_rhs
|
122
|
+
unless check_ok
|
123
|
+
unused_terms = all_terminals.difference(terms_in_rhs)
|
124
|
+
text = unused_terms.map(&:name).join("', '")
|
125
|
+
err_msg = "Terminal symbols '#{text}' never appear in production rules."
|
126
|
+
raise StandardError, err_msg
|
127
|
+
end
|
128
|
+
|
129
|
+
check_ok
|
130
|
+
end
|
131
|
+
|
132
|
+
def are_nonterminals_rewritten?
|
133
|
+
all_nonterminals = Set.new(symbols.reject(&:terminal?))
|
134
|
+
|
135
|
+
symbs_in_lhs = rules.reduce(Set.new) do |collected, prd|
|
136
|
+
collected.add(prd.head)
|
137
|
+
end
|
138
|
+
check_ok = all_nonterminals == symbs_in_lhs
|
139
|
+
unless check_ok
|
140
|
+
undefined_nterms = all_nonterminals.difference(symbs_in_lhs)
|
141
|
+
text = undefined_nterms.map(&:name).join("', '")
|
142
|
+
err_msg = "Non-terminal symbols '#{text}' never appear in head of any production rule."
|
143
|
+
raise StandardError, err_msg
|
144
|
+
end
|
145
|
+
|
146
|
+
check_ok
|
147
|
+
end
|
148
|
+
|
149
|
+
def are_symbols_reachable?
|
150
|
+
unreachable = unreachable_symbols
|
151
|
+
return true if unreachable.empty?
|
152
|
+
|
153
|
+
text = unreachable.to_a.map(&:name).join("', '")
|
154
|
+
err_msg = "Symbols '#{text}' are unreachable from start symbol."
|
155
|
+
raise StandardError, err_msg
|
156
|
+
end
|
157
|
+
|
158
|
+
def are_symbols_productive?
|
159
|
+
non_productive = mark_non_productive_symbols
|
160
|
+
return true if non_productive.empty?
|
161
|
+
|
162
|
+
text = non_productive.to_a.map(&:name).join("', '")
|
163
|
+
err_msg = "Symbols '#{text}' are non-productive."
|
164
|
+
raise StandardError, err_msg
|
165
|
+
end
|
166
|
+
|
167
|
+
# Are all symbols reachable from start symbol?
|
168
|
+
def unreachable_symbols
|
169
|
+
backlog = [start_symbol]
|
170
|
+
set_reachable = Set.new(backlog.dup)
|
171
|
+
|
172
|
+
begin
|
173
|
+
reachable_sym = backlog.pop
|
174
|
+
prods = nonterm2productions[reachable_sym]
|
175
|
+
prods.each do |prd|
|
176
|
+
# prd.body.members.each do |member|
|
177
|
+
prd.rhs_symbols.each do |member|
|
178
|
+
unless member.terminal? || set_reachable.include?(member)
|
179
|
+
backlog.push(member)
|
180
|
+
end
|
181
|
+
set_reachable.add(member)
|
182
|
+
end
|
183
|
+
end
|
184
|
+
end until backlog.empty?
|
185
|
+
|
186
|
+
all_symbols = Set.new(symbols)
|
187
|
+
unreachable = all_symbols - set_reachable
|
188
|
+
end
|
189
|
+
|
190
|
+
def mark_non_productive_symbols
|
191
|
+
prod_count = rules.size
|
192
|
+
backlog = Set.new(0...prod_count)
|
193
|
+
rules.each_with_index do |prd, i|
|
194
|
+
backlog.delete(i) if prd.productive?
|
195
|
+
end
|
196
|
+
until backlog.empty?
|
197
|
+
size_before = backlog.size
|
198
|
+
to_remove = []
|
199
|
+
backlog.each do |i|
|
200
|
+
prd = rules[i]
|
201
|
+
to_remove << i if prd.productive?
|
202
|
+
end
|
203
|
+
break if to_remove.empty?
|
204
|
+
|
205
|
+
backlog.subtract(to_remove)
|
206
|
+
end
|
207
|
+
|
208
|
+
backlog.each { |i| rules[i].non_productive }
|
209
|
+
non_productive = symbols.reject(&:productive?)
|
210
|
+
non_productive.each { |symb| symb.productive = false }
|
211
|
+
non_productive
|
212
|
+
end
|
213
|
+
|
214
|
+
def mark_nullable_symbols
|
215
|
+
nullable_found = false
|
216
|
+
sym2seqs = {}
|
217
|
+
|
218
|
+
nonterm2productions.each_pair do |sym, prods|
|
219
|
+
if prods.any?(&:empty?)
|
220
|
+
sym.nullable = nullable_found = true
|
221
|
+
else
|
222
|
+
sym2seqs[sym] = prods.map(&:rhs).flatten
|
223
|
+
end
|
224
|
+
end
|
225
|
+
|
226
|
+
if nullable_found
|
227
|
+
backlog = {} # { SymbolSequence => [Integer, Symbol] }
|
228
|
+
sym2seqs.each do |sym, seqs|
|
229
|
+
seqs.each { |sq| backlog[sq] = [0, sym] }
|
230
|
+
end
|
231
|
+
|
232
|
+
begin
|
233
|
+
seqs_done = []
|
234
|
+
backlog.each_pair do |sq, (elem_index, lhs)|
|
235
|
+
member = sq.members[elem_index]
|
236
|
+
if member.terminal?
|
237
|
+
seqs_done << sq # stop with this sequence: it is non-nullable
|
238
|
+
backlog[sq] = [-1, lhs]
|
239
|
+
elsif member.nullable?
|
240
|
+
if elem_index == sq.size - 1
|
241
|
+
seqs_done << sq # end of sequence reached...
|
242
|
+
backlog[sq] = [-1, lhs]
|
243
|
+
lhs.nullable = true
|
244
|
+
else
|
245
|
+
backlog[sq] = [elem_index + 1, lhs]
|
246
|
+
end
|
247
|
+
end
|
248
|
+
end
|
249
|
+
seqs_done.each do |sq|
|
250
|
+
if backlog.include? sq
|
251
|
+
(_, lhs) = backlog[sq]
|
252
|
+
if lhs.nullable?
|
253
|
+
to_drop = sym2seqs[lhs]
|
254
|
+
to_drop.each { |seq| backlog.delete(seq) }
|
255
|
+
else
|
256
|
+
backlog.delete(sq)
|
257
|
+
end
|
258
|
+
end
|
259
|
+
end
|
260
|
+
end until backlog.empty? || seqs_done.empty?
|
261
|
+
end
|
262
|
+
|
263
|
+
symbols.each do |sym|
|
264
|
+
next if sym.terminal?
|
265
|
+
|
266
|
+
sym.nullable = false if sym.nullable.nil?
|
267
|
+
end
|
268
|
+
end
|
269
|
+
# rubocop: enable Metrics/AbcSize
|
270
|
+
# rubocop: enable Metrics/BlockNesting
|
271
|
+
# rubocop: enable Metrics/MethodLength
|
272
|
+
# rubocop: enable Metrics/PerceivedComplexity
|
273
|
+
end # class
|
274
|
+
end # module
|
275
|
+
end # module
|
@@ -0,0 +1,106 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative '..\..\spec_helper'
|
4
|
+
require_relative '..\..\..\lib\dendroid\grm_dsl\base_grm_builder'
|
5
|
+
|
6
|
+
describe Dendroid::GrmDSL::BaseGrmBuilder do
|
7
|
+
# Builds ingredients for a grammar inspired from https://en.wikipedia.org/wiki/Earley_parser
|
8
|
+
subject do
|
9
|
+
instance = described_class.new
|
10
|
+
instance.declare_terminals('PLUS', 'STAR', 'INTEGER')
|
11
|
+
instance
|
12
|
+
end
|
13
|
+
|
14
|
+
context 'Initialization:' do
|
15
|
+
it 'is initialized with an optional code block' do
|
16
|
+
expect { described_class.new }.not_to raise_error
|
17
|
+
end
|
18
|
+
|
19
|
+
it 'is in "declaring" state by default' do
|
20
|
+
expect(described_class.new.state).to eq(:declaring)
|
21
|
+
end
|
22
|
+
|
23
|
+
it 'has no grammar symbol by default' do
|
24
|
+
expect(described_class.new.symbols).to be_empty
|
25
|
+
end
|
26
|
+
|
27
|
+
it 'has no production rule by default' do
|
28
|
+
expect(described_class.new.rules).to be_empty
|
29
|
+
end
|
30
|
+
end # context
|
31
|
+
|
32
|
+
context 'Provided services:' do
|
33
|
+
it 'builds declared terminal symbols' do
|
34
|
+
instance = described_class.new
|
35
|
+
terminals = %w[PLUS STAR INTEGER]
|
36
|
+
instance.declare_terminals(*terminals)
|
37
|
+
expect(instance.symbols.size).to eq(2 * terminals.size)
|
38
|
+
expect(instance.symbols[:PLUS]).to be_kind_of(Dendroid::Syntax::Terminal)
|
39
|
+
expect(instance.symbols['PLUS']).to eq(instance.symbols[:PLUS])
|
40
|
+
expect(instance.symbols[:PLUS].name).to eq(:PLUS)
|
41
|
+
expect(instance.symbols[:STAR]).to be_kind_of(Dendroid::Syntax::Terminal)
|
42
|
+
expect(instance.symbols['STAR']).to eq(instance.symbols[:STAR])
|
43
|
+
expect(instance.symbols[:STAR].name).to eq(:STAR)
|
44
|
+
expect(instance.symbols[:INTEGER]).to be_kind_of(Dendroid::Syntax::Terminal)
|
45
|
+
expect(instance.symbols['INTEGER']).to eq(instance.symbols[:INTEGER])
|
46
|
+
expect(instance.symbols[:INTEGER].name).to eq(:INTEGER)
|
47
|
+
expect(instance.state).to eq(:declaring)
|
48
|
+
end
|
49
|
+
|
50
|
+
it 'builds production rules' do
|
51
|
+
subject.rule('p' => 's')
|
52
|
+
expect(subject.state).to eq(:building)
|
53
|
+
|
54
|
+
# Undeclared symbols in production represent non-terminals
|
55
|
+
expect(subject.symbols['p']).to be_kind_of(Dendroid::Syntax::NonTerminal)
|
56
|
+
expect(subject.symbols['s']).to be_kind_of(Dendroid::Syntax::NonTerminal)
|
57
|
+
|
58
|
+
expect(subject.rules.size).to eq(1)
|
59
|
+
expect(subject.rules.first.to_s).to eq('p => s')
|
60
|
+
end
|
61
|
+
|
62
|
+
it 'builds a grammar' do
|
63
|
+
subject.rule('p' => 's')
|
64
|
+
subject.rule('s' => ['s PLUS m', 'm'])
|
65
|
+
subject.rule('m' => ['m STAR t', 't'])
|
66
|
+
subject.rule('t' => 'INTEGER')
|
67
|
+
subject.grammar_complete!
|
68
|
+
|
69
|
+
grm = subject.grammar
|
70
|
+
expect(grm).to be_kind_of(Dendroid::Syntax::Grammar)
|
71
|
+
(terms, nonterms) = grm.symbols.partition(&:terminal?)
|
72
|
+
expect(terms.map(&:name)).to eq(%i[PLUS STAR INTEGER])
|
73
|
+
expect(nonterms.map(&:name)).to eq(%i[p s m t])
|
74
|
+
grammar_rules = [
|
75
|
+
'p => s',
|
76
|
+
's => s PLUS m | m',
|
77
|
+
'm => m STAR t | t',
|
78
|
+
't => INTEGER'
|
79
|
+
]
|
80
|
+
expect(subject.rules.map(&:to_s)).to eq(grammar_rules)
|
81
|
+
end
|
82
|
+
|
83
|
+
it 'provides a simple DSL' do
|
84
|
+
instance = described_class.new do
|
85
|
+
declare_terminals('PLUS', 'STAR', 'INTEGER')
|
86
|
+
rule('p' => 's')
|
87
|
+
rule('s' => ['s PLUS m', 'm'])
|
88
|
+
rule('m' => ['m STAR t', 't'])
|
89
|
+
rule('t' => 'INTEGER')
|
90
|
+
end
|
91
|
+
|
92
|
+
grm = instance.grammar
|
93
|
+
expect(grm).to be_kind_of(Dendroid::Syntax::Grammar)
|
94
|
+
(terms, nonterms) = grm.symbols.partition(&:terminal?)
|
95
|
+
expect(terms.map(&:name)).to eq(%i[PLUS STAR INTEGER])
|
96
|
+
expect(nonterms.map(&:name)).to eq(%i[p s m t])
|
97
|
+
grammar_rules = [
|
98
|
+
'p => s',
|
99
|
+
's => s PLUS m | m',
|
100
|
+
'm => m STAR t | t',
|
101
|
+
't => INTEGER'
|
102
|
+
]
|
103
|
+
expect(instance.rules.map(&:to_s)).to eq(grammar_rules)
|
104
|
+
end
|
105
|
+
end # context
|
106
|
+
end # describe
|
@@ -0,0 +1,203 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative '..\..\spec_helper'
|
4
|
+
require_relative '..\..\..\lib\dendroid\syntax\terminal'
|
5
|
+
require_relative '..\..\..\lib\dendroid\syntax\non_terminal'
|
6
|
+
require_relative '..\..\..\lib\dendroid\syntax\symbol_seq'
|
7
|
+
require_relative '..\..\..\lib\dendroid\syntax\production'
|
8
|
+
require_relative '..\..\..\lib\dendroid\syntax\choice'
|
9
|
+
require_relative '..\..\..\lib\dendroid\syntax\grammar'
|
10
|
+
|
11
|
+
describe Dendroid::Syntax::Grammar do
|
12
|
+
let(:int_symb) { build_terminal('INTEGER') }
|
13
|
+
let(:plus_symb) { build_terminal('PLUS') }
|
14
|
+
let(:star_symb) { build_terminal('STAR') }
|
15
|
+
let(:p_symb) { build_nonterminal('p') }
|
16
|
+
let(:s_symb) { build_nonterminal('s') }
|
17
|
+
let(:m_symb) { build_nonterminal('m') }
|
18
|
+
let(:t_symb) { build_nonterminal('t') }
|
19
|
+
let(:all_terminals) { [int_symb, plus_symb, star_symb] }
|
20
|
+
|
21
|
+
subject { described_class.new(all_terminals) }
|
22
|
+
|
23
|
+
def build_terminal(name)
|
24
|
+
Dendroid::Syntax::Terminal.new(name)
|
25
|
+
end
|
26
|
+
|
27
|
+
def build_nonterminal(name)
|
28
|
+
Dendroid::Syntax::NonTerminal.new(name)
|
29
|
+
end
|
30
|
+
|
31
|
+
def build_symbol_seq(symbols)
|
32
|
+
Dendroid::Syntax::SymbolSeq.new(symbols)
|
33
|
+
end
|
34
|
+
|
35
|
+
def build_production(lhs, symbols)
|
36
|
+
Dendroid::Syntax::Production.new(lhs, build_symbol_seq(symbols))
|
37
|
+
end
|
38
|
+
|
39
|
+
def build_choice(lhs, sequences)
|
40
|
+
Dendroid::Syntax::Choice.new(lhs, sequences.map { |arr| build_symbol_seq(arr) })
|
41
|
+
end
|
42
|
+
|
43
|
+
def build_all_rules
|
44
|
+
rule1 = build_production(p_symb, [s_symb]) # p => s
|
45
|
+
rule2 = build_choice(s_symb, [[s_symb, plus_symb, m_symb], [m_symb]]) # s => s + m | m
|
46
|
+
rule3 = build_choice(m_symb, [[m_symb, star_symb, t_symb], [t_symb]]) # m => m * t
|
47
|
+
rule4 = build_production(t_symb, [int_symb]) # t => INTEGER
|
48
|
+
[rule1, rule2, rule3, rule4]
|
49
|
+
end
|
50
|
+
|
51
|
+
context 'Initialization:' do
|
52
|
+
it 'is initialized with an array of terminal symbols' do
|
53
|
+
expect { described_class.new(all_terminals) }.not_to raise_error
|
54
|
+
end
|
55
|
+
|
56
|
+
it 'knows its terminal symbols' do
|
57
|
+
expect(subject.symbols).to eq(all_terminals)
|
58
|
+
end
|
59
|
+
|
60
|
+
it 'ignores about productions after initialization' do
|
61
|
+
expect(subject.rules).to be_nil
|
62
|
+
end
|
63
|
+
|
64
|
+
it 'maps a terminal name to one GrmSymbol object' do
|
65
|
+
expect(subject.name2symbol.values.uniq.size).to eq(all_terminals.size)
|
66
|
+
expect(subject.name2symbol.values.size).to eq(2 * all_terminals.size)
|
67
|
+
expect(subject.name2symbol[:PLUS]).to eq(plus_symb)
|
68
|
+
expect(subject.name2symbol['PLUS']).to eq(plus_symb)
|
69
|
+
end
|
70
|
+
end # context
|
71
|
+
|
72
|
+
context 'Adding productions:' do
|
73
|
+
it 'allows the addition of one production rule' do
|
74
|
+
rule = build_production(p_symb, [s_symb])
|
75
|
+
expect { subject.add_rule(rule) }.not_to raise_error
|
76
|
+
expect(subject.rules.size).to eq(1)
|
77
|
+
expect(subject.rules.first).to eq(rule)
|
78
|
+
end
|
79
|
+
|
80
|
+
it 'allows the addition of multiple production rules' do
|
81
|
+
rules = build_all_rules
|
82
|
+
rules.each { |rl| subject.add_rule(rl) }
|
83
|
+
expect(subject.rules.size).to eq(4)
|
84
|
+
expect(subject.rules.first).to eq(rules.first)
|
85
|
+
expect(subject.rules.last).to eq(rules.last)
|
86
|
+
end
|
87
|
+
|
88
|
+
it 'updates the set of symbols when adding production rules' do
|
89
|
+
rules = build_all_rules
|
90
|
+
rules.each { |rl| subject.add_rule(rl) }
|
91
|
+
[p_symb, s_symb, m_symb, t_symb].each do |symb|
|
92
|
+
expect(subject.symbols.include?(symb)).to be_truthy
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
it 'maps name of every non-terminal to its related GrmSymbol' do
|
97
|
+
rules = build_all_rules
|
98
|
+
rules.each { |rl| subject.add_rule(rl) }
|
99
|
+
[[:p, p_symb],
|
100
|
+
['p', p_symb],
|
101
|
+
[:s, s_symb],
|
102
|
+
['s', s_symb],
|
103
|
+
[:m, m_symb],
|
104
|
+
['m', m_symb],
|
105
|
+
[:t, t_symb],
|
106
|
+
[:t, t_symb]].each do |(name, symb)|
|
107
|
+
expect(subject.name2symbol[name]).to eq(symb)
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
it 'maps every non-terminal to its defining productions' do
|
112
|
+
rules = build_all_rules
|
113
|
+
rules.each { |rl| subject.add_rule(rl) }
|
114
|
+
%i[p s m t].each do |symb_name|
|
115
|
+
symb = subject.name2symbol[symb_name]
|
116
|
+
expected_prods = subject.rules.select { |prd| prd.head == symb }
|
117
|
+
related_prods = subject.nonterm2productions[symb]
|
118
|
+
expect(related_prods).to eq(expected_prods)
|
119
|
+
end
|
120
|
+
end
|
121
|
+
end # context
|
122
|
+
|
123
|
+
context 'Grammar completion:' do
|
124
|
+
it 'detects and marks nullable symbols (I)' do
|
125
|
+
# Case: grammar without nullable symbols
|
126
|
+
rules = build_all_rules
|
127
|
+
rules.each { |rl| subject.add_rule(rl) }
|
128
|
+
subject.complete!
|
129
|
+
expect(subject.symbols.none?(&:nullable?)).to be_truthy
|
130
|
+
end
|
131
|
+
|
132
|
+
it 'detects and marks nullable symbols (II)' do
|
133
|
+
# Case: grammar with only nullable symbols
|
134
|
+
# Grammar inspired for paper "Practical Earley Parser"
|
135
|
+
terminal_a = build_terminal('a')
|
136
|
+
nterm_s_prime = build_nonterminal("S'")
|
137
|
+
nterm_s = build_nonterminal('S')
|
138
|
+
nterm_a = build_nonterminal('A')
|
139
|
+
nterm_e = build_nonterminal('E')
|
140
|
+
|
141
|
+
instance = described_class.new([terminal_a])
|
142
|
+
instance.add_rule(build_production(nterm_s_prime, [nterm_s]))
|
143
|
+
instance.add_rule(build_production(nterm_s, [nterm_a, nterm_a, nterm_a, nterm_a]))
|
144
|
+
instance.add_rule(build_choice(nterm_a, [[terminal_a], [nterm_e]]))
|
145
|
+
instance.add_rule(build_production(nterm_e, []))
|
146
|
+
|
147
|
+
instance.complete!
|
148
|
+
all_nonterminals = subject.symbols.reject(&:terminal?)
|
149
|
+
expect(all_nonterminals.all?(&:nullable?)).to be_truthy
|
150
|
+
end
|
151
|
+
|
152
|
+
it 'detects unreachable symbols' do
|
153
|
+
# Case: grammar without unreachable symbols
|
154
|
+
rules = build_all_rules
|
155
|
+
rules.each { |rl| subject.add_rule(rl) }
|
156
|
+
expect(subject.send(:unreachable_symbols)).to be_empty
|
157
|
+
|
158
|
+
# Let add's unreachable symbols
|
159
|
+
zed_symb = build_nonterminal('Z')
|
160
|
+
question_symb = build_nonterminal('?')
|
161
|
+
bad_rule = build_production(zed_symb, [zed_symb, question_symb, int_symb]) # Z => Z ? INTEGER
|
162
|
+
subject.add_rule(bad_rule)
|
163
|
+
unreachable = subject.send(:unreachable_symbols)
|
164
|
+
expect(unreachable).not_to be_empty
|
165
|
+
expect(unreachable).to eq(Set.new([zed_symb, question_symb]))
|
166
|
+
end
|
167
|
+
|
168
|
+
it 'detects non-productive symbols' do
|
169
|
+
# Case: grammar without non-productive symbols
|
170
|
+
rules = build_all_rules
|
171
|
+
rules.each { |rl| subject.add_rule(rl) }
|
172
|
+
expect(subject.send(:mark_non_productive_symbols)).to be_empty
|
173
|
+
expect(t_symb).to be_productive
|
174
|
+
expect(p_symb).to be_productive
|
175
|
+
|
176
|
+
# Grammar with non-productive symbols
|
177
|
+
term_a = build_terminal('a')
|
178
|
+
term_b = build_terminal('b')
|
179
|
+
term_c = build_terminal('c')
|
180
|
+
term_d = build_terminal('d')
|
181
|
+
term_e = build_terminal('e')
|
182
|
+
term_f = build_terminal('f')
|
183
|
+
nterm_A = build_nonterminal('A')
|
184
|
+
nterm_B = build_nonterminal('B')
|
185
|
+
nterm_C = build_nonterminal('C')
|
186
|
+
nterm_D = build_nonterminal('D')
|
187
|
+
nterm_E = build_nonterminal('E')
|
188
|
+
nterm_F = build_nonterminal('F')
|
189
|
+
nterm_S = build_nonterminal('S')
|
190
|
+
instance = described_class.new([term_a, term_b, term_c, term_d, term_e, term_f])
|
191
|
+
instance.add_rule(build_choice(nterm_S, [[nterm_A, nterm_B], [nterm_D, nterm_E]]))
|
192
|
+
instance.add_rule(build_production(nterm_A, [term_a]))
|
193
|
+
instance.add_rule(build_production(nterm_B, [term_b, nterm_C]))
|
194
|
+
instance.add_rule(build_production(nterm_C, [term_c]))
|
195
|
+
instance.add_rule(build_production(nterm_D, [term_d, nterm_F]))
|
196
|
+
instance.add_rule(build_production(nterm_E, [term_e]))
|
197
|
+
instance.add_rule(build_production(nterm_F, [term_f, nterm_D]))
|
198
|
+
nonproductive = instance.send(:mark_non_productive_symbols)
|
199
|
+
expect(nonproductive).not_to be_empty
|
200
|
+
expect(nonproductive).to eq([nterm_D, nterm_F])
|
201
|
+
end
|
202
|
+
end # context
|
203
|
+
end # describe
|
data/version.txt
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.0.
|
1
|
+
0.0.7
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: dendroid
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dimitri Geshef
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-10-
|
11
|
+
date: 2023-10-30 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: WIP. A Ruby implementation of a Earley parser
|
14
14
|
email: famished.tiger@yahoo.com
|
@@ -23,15 +23,19 @@ files:
|
|
23
23
|
- Rakefile
|
24
24
|
- bin/dendroid
|
25
25
|
- dendroid.gemspec
|
26
|
-
- lib/dendroid
|
26
|
+
- lib/dendroid.rb
|
27
|
+
- lib/dendroid/grm_dsl/base_grm_builder.rb
|
27
28
|
- lib/dendroid/syntax/choice.rb
|
29
|
+
- lib/dendroid/syntax/grammar.rb
|
28
30
|
- lib/dendroid/syntax/grm_symbol.rb
|
29
31
|
- lib/dendroid/syntax/non_terminal.rb
|
30
32
|
- lib/dendroid/syntax/production.rb
|
31
33
|
- lib/dendroid/syntax/rule.rb
|
32
34
|
- lib/dendroid/syntax/symbol_seq.rb
|
33
35
|
- lib/dendroid/syntax/terminal.rb
|
36
|
+
- spec/dendroid/grm_dsl/base_grm_builder_spec.rb
|
34
37
|
- spec/dendroid/syntax/choice_spec.rb
|
38
|
+
- spec/dendroid/syntax/grammar_spec.rb
|
35
39
|
- spec/dendroid/syntax/grm_symbol_spec.rb
|
36
40
|
- spec/dendroid/syntax/non_terminal_spec.rb
|
37
41
|
- spec/dendroid/syntax/production_spec.rb
|
File without changes
|