dendroid 0.0.5 → 0.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e94a9721000d8e9a45184107ec3488a3dad0c92337507c6753d4cbfa973400fd
4
- data.tar.gz: 03bbe70d73b5e12de42b5dca4f008f1850fb9efcc4272115e5123c7397984f4a
3
+ metadata.gz: '06667077966c902b083c8e73debebc0808bbc0952243712d19a34f26f9d0c1ba'
4
+ data.tar.gz: 56e29eb75f509f37212d9bc384c139b6e903b730308fa320e84dc4f352c93f2e
5
5
  SHA512:
6
- metadata.gz: 8dde1d9509f2d713db092e5730a1c37f1aaab2a1a58b45cb46c3e28011e672d6c27d9e630262d96a5e5597de35b9278aab8b6e2b87934bf4840e024eee0d6bea
7
- data.tar.gz: d927b08bf68f6c2a128c92c4724a533991799acf207cd13c381d4abea4cbbf8b340de17771c0a41c351bed3a0d6ac2cd523990d1c0bb699824ce14b9ccd97fe6
6
+ metadata.gz: e135fc4c2ce34cf54d226d6ed82e188174d99220645abb127877b8d7a97ea77bbce77f8b8a731c390b362e592f961f758078c1af8618c339e41b6564052f1bf3
7
+ data.tar.gz: 8b30951295517fbf46fa2a1560b4e27dbae13b101cff1841142b641711b4be6696cb50e16333e03dbcb472fa4803380a8e706c85ddab69837db148b69f8ffae4
data/.rubocop.yml CHANGED
@@ -2,9 +2,21 @@ Layout/EndOfLine:
2
2
  Enabled: true
3
3
  EnforcedStyle: lf
4
4
 
5
+ Metrics/AbcSize:
6
+ Enabled: true
7
+ Max: 25
8
+
5
9
  Metrics/BlockLength:
6
10
  Enabled: true
7
- Max: 50
11
+ Max: 75
12
+
13
+ Metrics/ClassLength:
14
+ Enabled: true
15
+ Max: 200
16
+
17
+ Metrics/MethodLength:
18
+ Enabled: true
19
+ Max: 20
8
20
 
9
21
  Naming/MethodParameterName:
10
22
  Enabled: false
data/CHANGELOG.md CHANGED
@@ -2,6 +2,14 @@
2
2
 
3
3
  ## [Unreleased]
4
4
 
5
+ ## [0.0.7] - 2023-10-30
6
+ ### Added
7
+ - Class `BaseGrmBuilder` and its spec file
8
+
9
+ ## [0.0.6] - 2023-10-30
10
+ ### Added
11
+ - Class `Grammar` and its spec file
12
+
5
13
  ## [0.0.5] - 2023-10-28
6
14
  ### Added
7
15
  - Class `Choice` and its spec file
@@ -0,0 +1,169 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '..\syntax\terminal'
4
+ require_relative '..\syntax\non_terminal'
5
+ require_relative '..\syntax\symbol_seq'
6
+ require_relative '..\syntax\production'
7
+ require_relative '..\syntax\choice'
8
+ require_relative '..\syntax\grammar'
9
+
10
+ module Dendroid
11
+ # This module contains classes that define Domain-Specific Language specialized
12
+ # in grammar definition.
13
+ module GrmDSL
14
+ # Builder GoF pattern: Builder builds a complex object.
15
+ # here the builder creates a grammar from simpler objects
16
+ # (symbols and production rules)
17
+ # and using a step by step approach.
18
+ class BaseGrmBuilder
19
+ # @return [Symbol] one of: :declaring, :building, :complete
20
+ attr_reader :state
21
+
22
+ # @return [Hash{String, Dendroid::Syntax::GrmSymbol}] The mapping of grammar symbol names
23
+ # to the matching grammar symbol object.
24
+ attr_reader :symbols
25
+
26
+ # @return [Array<Dendroid::Syntax::Rule>] The list of rules of the grammar
27
+ attr_reader :rules
28
+
29
+ # Creates a new grammar builder object.
30
+ # @param aBlock [Proc] code block used to build the grammar.
31
+ # @example Building a tiny English grammar
32
+ # builder = Rley::Syntax::GrammarBuilder.new do
33
+ # declare_terminals('n', 'v', 'adj', 'det')
34
+ # rule 'S' => 'NP VP'
35
+ # rule 'VP' => 'v NP'
36
+ # rule 'NP' => ['det n', 'adj NP']
37
+ # end
38
+ # # Now with `builder`, let's create the grammar
39
+ # tiny_eng = builder.grammar
40
+ def initialize(&aBlock)
41
+ @symbols = {}
42
+ @rules = []
43
+ @state = :declaring
44
+
45
+ if block_given?
46
+ instance_exec(&aBlock)
47
+ grammar_complete!
48
+ end
49
+ end
50
+
51
+ # Add the given terminal symbols to the grammar of the language
52
+ # @param terminalSymbols [String, Terminal] 1..* terminal symbols.
53
+ # @return [void]
54
+ def declare_terminals(*terminalSymbols)
55
+ err_msg = "Terminal symbols may only be declared in state :declaring, current state is: #{state}"
56
+ raise StandardError, err_msg unless state == :declaring
57
+
58
+ new_symbs = build_symbols(Dendroid::Syntax::Terminal, terminalSymbols)
59
+ symbols.merge!(new_symbs)
60
+ end
61
+
62
+ # Add a production rule in the grammar given one
63
+ # key-value pair of the form: String => String.
64
+ # Where the key is the name of the non-terminal appearing in the
65
+ # left side of the rule.
66
+ # When the value is a String, it is a sequence of grammar symbol names separated by space.
67
+ # When the value is an array of String, the elements represent an alternative rhs
68
+ # The rule is created and inserted in the grammar.
69
+ # @example
70
+ # builder.rule('sentence' => 'noun_phrase verb_phrase')
71
+ # builder.rule('noun_phrase' => ['noun', 'adj noun'])
72
+ # @param productionRuleRepr [Hash{String, String|Array<String>}]
73
+ # A Hash-based representation of a production.
74
+ # @return [Dendroid::Syntax::Rule] The created Production or Choice instance
75
+ def rule(productionRuleRepr)
76
+ raise Exception, "Cannot add a production rule in state :complete" if state == :complete
77
+ @state = :building
78
+
79
+ if productionRuleRepr.is_a?(Hash)
80
+ head_name = productionRuleRepr.keys.first
81
+ if symbols.include? head_name
82
+ err_msg = "Terminal symbol '#{head_name}' may not be on left-side of a rule."
83
+ raise StandardError, err_msg if symbols[head_name].is_a?(Dendroid::Syntax::Terminal)
84
+ else
85
+ symbols.merge!(build_symbols(Dendroid::Syntax::NonTerminal, [head_name]))
86
+ end
87
+ lhs = symbols[head_name]
88
+ raw_rhs = productionRuleRepr.values.first
89
+
90
+ if raw_rhs.is_a? String
91
+ new_prod = Dendroid::Syntax::Production.new(lhs, build_symbol_seq(raw_rhs))
92
+ else
93
+ rhs = raw_rhs.map { |raw| build_symbol_seq(raw) }
94
+ new_prod = Dendroid::Syntax::Choice.new(lhs, rhs)
95
+ end
96
+ rules << new_prod
97
+ new_prod
98
+ end
99
+ end
100
+
101
+ # A method used to notify the builder that the grammar is complete
102
+ # (i.e. all rules were entered).
103
+ def grammar_complete!
104
+ @state = :complete
105
+ end
106
+
107
+ # Generate the grammar according to the specifications.
108
+ # @return [Dendroid::Syntax::Grammar]
109
+ def grammar
110
+ terminals = symbols.values.select(&:terminal?)
111
+ grm = Dendroid::Syntax::Grammar.new(terminals)
112
+ rules.each { |prod| grm.add_rule(prod) }
113
+ grm.complete!
114
+ grm
115
+ end
116
+
117
+ private
118
+
119
+ def build_symbol_seq(raw_symbols)
120
+ symb_array = []
121
+ raw_stripped = raw_symbols.strip
122
+ return Dendroid::Syntax::SymbolSeq.new([]) if raw_stripped.empty?
123
+
124
+ symbol_names = raw_stripped.split(/(?: |\t)+/)
125
+ symbol_names.each do |symb_name|
126
+ if symbols.include? symb_name
127
+ symb_array << symbols[symb_name]
128
+ else
129
+ symbols.merge!(build_symbols(Dendroid::Syntax::NonTerminal, [symb_name]))
130
+ symb_array << symbols[symb_name]
131
+ end
132
+ end
133
+
134
+ Dendroid::Syntax::SymbolSeq.new(symb_array)
135
+ end
136
+
137
+ # Add the given grammar symbols.
138
+ # @param aClass [Class] The class of grammar symbols to instantiate.
139
+ # @param theSymbols [Array] array of elements are treated as follows:
140
+ # if the element is already a grammar symbol, then it added as is,
141
+ # otherwise it is considered as the name of a grammar symbol
142
+ # of the specified class to build.
143
+ def build_symbols(aClass, theSymbols)
144
+ symbs = {}
145
+ theSymbols.each do |s|
146
+ new_symbol = build_symbol(aClass, s)
147
+ symbs[new_symbol.name] = new_symbol
148
+ symbs[s] = new_symbol
149
+ end
150
+
151
+ symbs
152
+ end
153
+
154
+ # If the argument is already a grammar symbol object then it is
155
+ # returned as is. Otherwise, the argument is treated as a name
156
+ # for a new instance of the given class.
157
+ # @param aClass [Class] The class of grammar symbols to instantiate
158
+ # @param aSymbolArg [GrmSymbol-like or String]
159
+ # @return [Array] list of grammar symbols
160
+ def build_symbol(aClass, aSymbolArg)
161
+ if aSymbolArg.is_a?(Dendroid::Syntax::GrmSymbol)
162
+ aSymbolArg
163
+ else
164
+ aClass.new(aSymbolArg)
165
+ end
166
+ end
167
+ end # class
168
+ end # module
169
+ end # module
@@ -0,0 +1,275 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'set'
4
+
5
+ module Dendroid
6
+ module Syntax
7
+ # A grammar specifies the syntax of a language.
8
+ # Formally, a grammar has:
9
+ # * One start symbol,
10
+ # * One or more other production rules,
11
+ # * Each production has a rhs that is a sequence of grammar symbols.
12
+ # * Grammar symbols are categorized into:
13
+ # -terminal symbols
14
+ # -non-terminal symbols
15
+ class Grammar
16
+ # The list of grammar symbols in the language.
17
+ # @return [Array<Dendroid::Syntax::GrmSymbol>] The terminal and non-terminal symbols.
18
+ attr_reader :symbols
19
+
20
+ # The list of production rules for the language.
21
+ # @return [Array<Dendroid::Syntax::Rule>] Array of rules for the grammar.
22
+ attr_reader :rules
23
+
24
+ # A Hash that maps symbol names to their grammar symbols
25
+ # @return [Hash{String => Dendroid::Syntax::GrmSymbol}]
26
+ attr_reader :name2symbol
27
+
28
+ # TODO: make nonterminal - rules one-to-one
29
+ # A Hash that maps symbol names to their grammar symbols
30
+ # @return [Hash{Dendroid::Syntax::GrmSymbol => Dendroid::Syntax::Rule}]
31
+ attr_reader :nonterm2productions
32
+
33
+ # Constructor.
34
+ # @param terminals [Array<Dendroid::Syntax::Terminal>]
35
+ def initialize(terminals)
36
+ @symbols = []
37
+ @name2symbol = {}
38
+ add_terminals(terminals)
39
+ end
40
+
41
+ # Add a rule to the grammar
42
+ # @param rule [Dendroid::Syntax::Rule]
43
+ def add_rule(rule)
44
+ if @rules.nil?
45
+ @rules = []
46
+ @nonterm2productions = {}
47
+ end
48
+ # TODO: add test for duplicate productions
49
+ if nonterm2productions[rule.head]&.include? rule
50
+ raise StandardError, "Production rule '#{production}' appears more than once in the grammar."
51
+ end
52
+ add_symbol(rule.head)
53
+ rule.nonterminals.each { |nonterm| add_symbol(nonterm) }
54
+ rules << rule
55
+ nonterm2productions[rule.head] = [] unless nonterm2productions.include? rule.head
56
+ nonterm2productions[rule.head] << rule
57
+ end
58
+
59
+ # Return the start symbol for the language
60
+ # @return [Dendroid::Syntax::NonTerminal]
61
+ def start_symbol
62
+ rules.first.lhs
63
+ end
64
+
65
+ # A event method to notify the grammar that all grammar rules
66
+ # have been entered. The grammar, in turn, reacts by validating the
67
+ # production rules.
68
+ def complete!
69
+ validate
70
+ analyze
71
+ end
72
+
73
+ private
74
+
75
+ # rubocop: disable Metrics/AbcSize
76
+ # rubocop: disable Metrics/BlockNesting
77
+ # rubocop: disable Metrics/MethodLength
78
+ # rubocop: disable Metrics/PerceivedComplexity
79
+ def add_terminals(terminals)
80
+ terminals.each { |term| add_symbol(term) }
81
+ end
82
+
83
+ def add_symbol(symb)
84
+ return if name2symbol.include? symb.name
85
+
86
+ symbols.push(symb)
87
+ name2symbol[symb.name] = symb
88
+ name2symbol[symb.name.to_s] = symb
89
+ end
90
+
91
+ def validate
92
+ at_least_one_terminal
93
+ are_terminals_referenced?
94
+ are_nonterminals_rewritten?
95
+ are_symbols_productive?
96
+ are_symbols_reachable?
97
+ end
98
+
99
+ def analyze
100
+ mark_nullable_symbols
101
+ end
102
+
103
+ # Does the grammar contain at least one terminal symbol?
104
+ def at_least_one_terminal
105
+ found = symbols.any?(&:terminal?)
106
+
107
+ unless found
108
+ err_msg = "Grammar doesn't contain any terminal symbol."
109
+ raise StandardError, err_msg
110
+ end
111
+ end
112
+
113
+ # Does every terminal symbol appear at least once
114
+ # in a rhs of a production rule?
115
+ def are_terminals_referenced?
116
+ all_terminals = Set.new(symbols.select(&:terminal?))
117
+ terms_in_rhs = rules.reduce(Set.new) do |collected, prd|
118
+ found = prd.terminals
119
+ collected.merge(found)
120
+ end
121
+ check_ok = all_terminals == terms_in_rhs
122
+ unless check_ok
123
+ unused_terms = all_terminals.difference(terms_in_rhs)
124
+ text = unused_terms.map(&:name).join("', '")
125
+ err_msg = "Terminal symbols '#{text}' never appear in production rules."
126
+ raise StandardError, err_msg
127
+ end
128
+
129
+ check_ok
130
+ end
131
+
132
+ def are_nonterminals_rewritten?
133
+ all_nonterminals = Set.new(symbols.reject(&:terminal?))
134
+
135
+ symbs_in_lhs = rules.reduce(Set.new) do |collected, prd|
136
+ collected.add(prd.head)
137
+ end
138
+ check_ok = all_nonterminals == symbs_in_lhs
139
+ unless check_ok
140
+ undefined_nterms = all_nonterminals.difference(symbs_in_lhs)
141
+ text = undefined_nterms.map(&:name).join("', '")
142
+ err_msg = "Non-terminal symbols '#{text}' never appear in head of any production rule."
143
+ raise StandardError, err_msg
144
+ end
145
+
146
+ check_ok
147
+ end
148
+
149
+ def are_symbols_reachable?
150
+ unreachable = unreachable_symbols
151
+ return true if unreachable.empty?
152
+
153
+ text = unreachable.to_a.map(&:name).join("', '")
154
+ err_msg = "Symbols '#{text}' are unreachable from start symbol."
155
+ raise StandardError, err_msg
156
+ end
157
+
158
+ def are_symbols_productive?
159
+ non_productive = mark_non_productive_symbols
160
+ return true if non_productive.empty?
161
+
162
+ text = non_productive.to_a.map(&:name).join("', '")
163
+ err_msg = "Symbols '#{text}' are non-productive."
164
+ raise StandardError, err_msg
165
+ end
166
+
167
+ # Are all symbols reachable from start symbol?
168
+ def unreachable_symbols
169
+ backlog = [start_symbol]
170
+ set_reachable = Set.new(backlog.dup)
171
+
172
+ begin
173
+ reachable_sym = backlog.pop
174
+ prods = nonterm2productions[reachable_sym]
175
+ prods.each do |prd|
176
+ # prd.body.members.each do |member|
177
+ prd.rhs_symbols.each do |member|
178
+ unless member.terminal? || set_reachable.include?(member)
179
+ backlog.push(member)
180
+ end
181
+ set_reachable.add(member)
182
+ end
183
+ end
184
+ end until backlog.empty?
185
+
186
+ all_symbols = Set.new(symbols)
187
+ unreachable = all_symbols - set_reachable
188
+ end
189
+
190
+ def mark_non_productive_symbols
191
+ prod_count = rules.size
192
+ backlog = Set.new(0...prod_count)
193
+ rules.each_with_index do |prd, i|
194
+ backlog.delete(i) if prd.productive?
195
+ end
196
+ until backlog.empty?
197
+ size_before = backlog.size
198
+ to_remove = []
199
+ backlog.each do |i|
200
+ prd = rules[i]
201
+ to_remove << i if prd.productive?
202
+ end
203
+ break if to_remove.empty?
204
+
205
+ backlog.subtract(to_remove)
206
+ end
207
+
208
+ backlog.each { |i| rules[i].non_productive }
209
+ non_productive = symbols.reject(&:productive?)
210
+ non_productive.each { |symb| symb.productive = false }
211
+ non_productive
212
+ end
213
+
214
+ def mark_nullable_symbols
215
+ nullable_found = false
216
+ sym2seqs = {}
217
+
218
+ nonterm2productions.each_pair do |sym, prods|
219
+ if prods.any?(&:empty?)
220
+ sym.nullable = nullable_found = true
221
+ else
222
+ sym2seqs[sym] = prods.map(&:rhs).flatten
223
+ end
224
+ end
225
+
226
+ if nullable_found
227
+ backlog = {} # { SymbolSequence => [Integer, Symbol] }
228
+ sym2seqs.each do |sym, seqs|
229
+ seqs.each { |sq| backlog[sq] = [0, sym] }
230
+ end
231
+
232
+ begin
233
+ seqs_done = []
234
+ backlog.each_pair do |sq, (elem_index, lhs)|
235
+ member = sq.members[elem_index]
236
+ if member.terminal?
237
+ seqs_done << sq # stop with this sequence: it is non-nullable
238
+ backlog[sq] = [-1, lhs]
239
+ elsif member.nullable?
240
+ if elem_index == sq.size - 1
241
+ seqs_done << sq # end of sequence reached...
242
+ backlog[sq] = [-1, lhs]
243
+ lhs.nullable = true
244
+ else
245
+ backlog[sq] = [elem_index + 1, lhs]
246
+ end
247
+ end
248
+ end
249
+ seqs_done.each do |sq|
250
+ if backlog.include? sq
251
+ (_, lhs) = backlog[sq]
252
+ if lhs.nullable?
253
+ to_drop = sym2seqs[lhs]
254
+ to_drop.each { |seq| backlog.delete(seq) }
255
+ else
256
+ backlog.delete(sq)
257
+ end
258
+ end
259
+ end
260
+ end until backlog.empty? || seqs_done.empty?
261
+ end
262
+
263
+ symbols.each do |sym|
264
+ next if sym.terminal?
265
+
266
+ sym.nullable = false if sym.nullable.nil?
267
+ end
268
+ end
269
+ # rubocop: enable Metrics/AbcSize
270
+ # rubocop: enable Metrics/BlockNesting
271
+ # rubocop: enable Metrics/MethodLength
272
+ # rubocop: enable Metrics/PerceivedComplexity
273
+ end # class
274
+ end # module
275
+ end # module
@@ -1,6 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Dendroid
4
+ # The namespace for all classes used to build a grammar.
4
5
  module Syntax
5
6
  # Abstract class for grammar symbols.
6
7
  # A grammar symbol is an element that appears in grammar rules.
@@ -0,0 +1,106 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '..\..\spec_helper'
4
+ require_relative '..\..\..\lib\dendroid\grm_dsl\base_grm_builder'
5
+
6
+ describe Dendroid::GrmDSL::BaseGrmBuilder do
7
+ # Builds ingredients for a grammar inspired from https://en.wikipedia.org/wiki/Earley_parser
8
+ subject do
9
+ instance = described_class.new
10
+ instance.declare_terminals('PLUS', 'STAR', 'INTEGER')
11
+ instance
12
+ end
13
+
14
+ context 'Initialization:' do
15
+ it 'is initialized with an optional code block' do
16
+ expect { described_class.new }.not_to raise_error
17
+ end
18
+
19
+ it 'is in "declaring" state by default' do
20
+ expect(described_class.new.state).to eq(:declaring)
21
+ end
22
+
23
+ it 'has no grammar symbol by default' do
24
+ expect(described_class.new.symbols).to be_empty
25
+ end
26
+
27
+ it 'has no production rule by default' do
28
+ expect(described_class.new.rules).to be_empty
29
+ end
30
+ end # context
31
+
32
+ context 'Provided services:' do
33
+ it 'builds declared terminal symbols' do
34
+ instance = described_class.new
35
+ terminals = %w[PLUS STAR INTEGER]
36
+ instance.declare_terminals(*terminals)
37
+ expect(instance.symbols.size).to eq(2 * terminals.size)
38
+ expect(instance.symbols[:PLUS]).to be_kind_of(Dendroid::Syntax::Terminal)
39
+ expect(instance.symbols['PLUS']).to eq(instance.symbols[:PLUS])
40
+ expect(instance.symbols[:PLUS].name).to eq(:PLUS)
41
+ expect(instance.symbols[:STAR]).to be_kind_of(Dendroid::Syntax::Terminal)
42
+ expect(instance.symbols['STAR']).to eq(instance.symbols[:STAR])
43
+ expect(instance.symbols[:STAR].name).to eq(:STAR)
44
+ expect(instance.symbols[:INTEGER]).to be_kind_of(Dendroid::Syntax::Terminal)
45
+ expect(instance.symbols['INTEGER']).to eq(instance.symbols[:INTEGER])
46
+ expect(instance.symbols[:INTEGER].name).to eq(:INTEGER)
47
+ expect(instance.state).to eq(:declaring)
48
+ end
49
+
50
+ it 'builds production rules' do
51
+ subject.rule('p' => 's')
52
+ expect(subject.state).to eq(:building)
53
+
54
+ # Undeclared symbols in production represent non-terminals
55
+ expect(subject.symbols['p']).to be_kind_of(Dendroid::Syntax::NonTerminal)
56
+ expect(subject.symbols['s']).to be_kind_of(Dendroid::Syntax::NonTerminal)
57
+
58
+ expect(subject.rules.size).to eq(1)
59
+ expect(subject.rules.first.to_s).to eq('p => s')
60
+ end
61
+
62
+ it 'builds a grammar' do
63
+ subject.rule('p' => 's')
64
+ subject.rule('s' => ['s PLUS m', 'm'])
65
+ subject.rule('m' => ['m STAR t', 't'])
66
+ subject.rule('t' => 'INTEGER')
67
+ subject.grammar_complete!
68
+
69
+ grm = subject.grammar
70
+ expect(grm).to be_kind_of(Dendroid::Syntax::Grammar)
71
+ (terms, nonterms) = grm.symbols.partition(&:terminal?)
72
+ expect(terms.map(&:name)).to eq(%i[PLUS STAR INTEGER])
73
+ expect(nonterms.map(&:name)).to eq(%i[p s m t])
74
+ grammar_rules = [
75
+ 'p => s',
76
+ 's => s PLUS m | m',
77
+ 'm => m STAR t | t',
78
+ 't => INTEGER'
79
+ ]
80
+ expect(subject.rules.map(&:to_s)).to eq(grammar_rules)
81
+ end
82
+
83
+ it 'provides a simple DSL' do
84
+ instance = described_class.new do
85
+ declare_terminals('PLUS', 'STAR', 'INTEGER')
86
+ rule('p' => 's')
87
+ rule('s' => ['s PLUS m', 'm'])
88
+ rule('m' => ['m STAR t', 't'])
89
+ rule('t' => 'INTEGER')
90
+ end
91
+
92
+ grm = instance.grammar
93
+ expect(grm).to be_kind_of(Dendroid::Syntax::Grammar)
94
+ (terms, nonterms) = grm.symbols.partition(&:terminal?)
95
+ expect(terms.map(&:name)).to eq(%i[PLUS STAR INTEGER])
96
+ expect(nonterms.map(&:name)).to eq(%i[p s m t])
97
+ grammar_rules = [
98
+ 'p => s',
99
+ 's => s PLUS m | m',
100
+ 'm => m STAR t | t',
101
+ 't => INTEGER'
102
+ ]
103
+ expect(instance.rules.map(&:to_s)).to eq(grammar_rules)
104
+ end
105
+ end # context
106
+ end # describe
@@ -0,0 +1,203 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '..\..\spec_helper'
4
+ require_relative '..\..\..\lib\dendroid\syntax\terminal'
5
+ require_relative '..\..\..\lib\dendroid\syntax\non_terminal'
6
+ require_relative '..\..\..\lib\dendroid\syntax\symbol_seq'
7
+ require_relative '..\..\..\lib\dendroid\syntax\production'
8
+ require_relative '..\..\..\lib\dendroid\syntax\choice'
9
+ require_relative '..\..\..\lib\dendroid\syntax\grammar'
10
+
11
+ describe Dendroid::Syntax::Grammar do
12
+ let(:int_symb) { build_terminal('INTEGER') }
13
+ let(:plus_symb) { build_terminal('PLUS') }
14
+ let(:star_symb) { build_terminal('STAR') }
15
+ let(:p_symb) { build_nonterminal('p') }
16
+ let(:s_symb) { build_nonterminal('s') }
17
+ let(:m_symb) { build_nonterminal('m') }
18
+ let(:t_symb) { build_nonterminal('t') }
19
+ let(:all_terminals) { [int_symb, plus_symb, star_symb] }
20
+
21
+ subject { described_class.new(all_terminals) }
22
+
23
+ def build_terminal(name)
24
+ Dendroid::Syntax::Terminal.new(name)
25
+ end
26
+
27
+ def build_nonterminal(name)
28
+ Dendroid::Syntax::NonTerminal.new(name)
29
+ end
30
+
31
+ def build_symbol_seq(symbols)
32
+ Dendroid::Syntax::SymbolSeq.new(symbols)
33
+ end
34
+
35
+ def build_production(lhs, symbols)
36
+ Dendroid::Syntax::Production.new(lhs, build_symbol_seq(symbols))
37
+ end
38
+
39
+ def build_choice(lhs, sequences)
40
+ Dendroid::Syntax::Choice.new(lhs, sequences.map { |arr| build_symbol_seq(arr) })
41
+ end
42
+
43
+ def build_all_rules
44
+ rule1 = build_production(p_symb, [s_symb]) # p => s
45
+ rule2 = build_choice(s_symb, [[s_symb, plus_symb, m_symb], [m_symb]]) # s => s + m | m
46
+ rule3 = build_choice(m_symb, [[m_symb, star_symb, t_symb], [t_symb]]) # m => m * t
47
+ rule4 = build_production(t_symb, [int_symb]) # t => INTEGER
48
+ [rule1, rule2, rule3, rule4]
49
+ end
50
+
51
+ context 'Initialization:' do
52
+ it 'is initialized with an array of terminal symbols' do
53
+ expect { described_class.new(all_terminals) }.not_to raise_error
54
+ end
55
+
56
+ it 'knows its terminal symbols' do
57
+ expect(subject.symbols).to eq(all_terminals)
58
+ end
59
+
60
+ it 'ignores about productions after initialization' do
61
+ expect(subject.rules).to be_nil
62
+ end
63
+
64
+ it 'maps a terminal name to one GrmSymbol object' do
65
+ expect(subject.name2symbol.values.uniq.size).to eq(all_terminals.size)
66
+ expect(subject.name2symbol.values.size).to eq(2 * all_terminals.size)
67
+ expect(subject.name2symbol[:PLUS]).to eq(plus_symb)
68
+ expect(subject.name2symbol['PLUS']).to eq(plus_symb)
69
+ end
70
+ end # context
71
+
72
+ context 'Adding productions:' do
73
+ it 'allows the addition of one production rule' do
74
+ rule = build_production(p_symb, [s_symb])
75
+ expect { subject.add_rule(rule) }.not_to raise_error
76
+ expect(subject.rules.size).to eq(1)
77
+ expect(subject.rules.first).to eq(rule)
78
+ end
79
+
80
+ it 'allows the addition of multiple production rules' do
81
+ rules = build_all_rules
82
+ rules.each { |rl| subject.add_rule(rl) }
83
+ expect(subject.rules.size).to eq(4)
84
+ expect(subject.rules.first).to eq(rules.first)
85
+ expect(subject.rules.last).to eq(rules.last)
86
+ end
87
+
88
+ it 'updates the set of symbols when adding production rules' do
89
+ rules = build_all_rules
90
+ rules.each { |rl| subject.add_rule(rl) }
91
+ [p_symb, s_symb, m_symb, t_symb].each do |symb|
92
+ expect(subject.symbols.include?(symb)).to be_truthy
93
+ end
94
+ end
95
+
96
+ it 'maps name of every non-terminal to its related GrmSymbol' do
97
+ rules = build_all_rules
98
+ rules.each { |rl| subject.add_rule(rl) }
99
+ [[:p, p_symb],
100
+ ['p', p_symb],
101
+ [:s, s_symb],
102
+ ['s', s_symb],
103
+ [:m, m_symb],
104
+ ['m', m_symb],
105
+ [:t, t_symb],
106
+ [:t, t_symb]].each do |(name, symb)|
107
+ expect(subject.name2symbol[name]).to eq(symb)
108
+ end
109
+ end
110
+
111
+ it 'maps every non-terminal to its defining productions' do
112
+ rules = build_all_rules
113
+ rules.each { |rl| subject.add_rule(rl) }
114
+ %i[p s m t].each do |symb_name|
115
+ symb = subject.name2symbol[symb_name]
116
+ expected_prods = subject.rules.select { |prd| prd.head == symb }
117
+ related_prods = subject.nonterm2productions[symb]
118
+ expect(related_prods).to eq(expected_prods)
119
+ end
120
+ end
121
+ end # context
122
+
123
+ context 'Grammar completion:' do
124
+ it 'detects and marks nullable symbols (I)' do
125
+ # Case: grammar without nullable symbols
126
+ rules = build_all_rules
127
+ rules.each { |rl| subject.add_rule(rl) }
128
+ subject.complete!
129
+ expect(subject.symbols.none?(&:nullable?)).to be_truthy
130
+ end
131
+
132
+ it 'detects and marks nullable symbols (II)' do
133
+ # Case: grammar with only nullable symbols
134
+ # Grammar inspired for paper "Practical Earley Parser"
135
+ terminal_a = build_terminal('a')
136
+ nterm_s_prime = build_nonterminal("S'")
137
+ nterm_s = build_nonterminal('S')
138
+ nterm_a = build_nonterminal('A')
139
+ nterm_e = build_nonterminal('E')
140
+
141
+ instance = described_class.new([terminal_a])
142
+ instance.add_rule(build_production(nterm_s_prime, [nterm_s]))
143
+ instance.add_rule(build_production(nterm_s, [nterm_a, nterm_a, nterm_a, nterm_a]))
144
+ instance.add_rule(build_choice(nterm_a, [[terminal_a], [nterm_e]]))
145
+ instance.add_rule(build_production(nterm_e, []))
146
+
147
+ instance.complete!
148
+ all_nonterminals = subject.symbols.reject(&:terminal?)
149
+ expect(all_nonterminals.all?(&:nullable?)).to be_truthy
150
+ end
151
+
152
+ it 'detects unreachable symbols' do
153
+ # Case: grammar without unreachable symbols
154
+ rules = build_all_rules
155
+ rules.each { |rl| subject.add_rule(rl) }
156
+ expect(subject.send(:unreachable_symbols)).to be_empty
157
+
158
+ # Let add's unreachable symbols
159
+ zed_symb = build_nonterminal('Z')
160
+ question_symb = build_nonterminal('?')
161
+ bad_rule = build_production(zed_symb, [zed_symb, question_symb, int_symb]) # Z => Z ? INTEGER
162
+ subject.add_rule(bad_rule)
163
+ unreachable = subject.send(:unreachable_symbols)
164
+ expect(unreachable).not_to be_empty
165
+ expect(unreachable).to eq(Set.new([zed_symb, question_symb]))
166
+ end
167
+
168
+ it 'detects non-productive symbols' do
169
+ # Case: grammar without non-productive symbols
170
+ rules = build_all_rules
171
+ rules.each { |rl| subject.add_rule(rl) }
172
+ expect(subject.send(:mark_non_productive_symbols)).to be_empty
173
+ expect(t_symb).to be_productive
174
+ expect(p_symb).to be_productive
175
+
176
+ # Grammar with non-productive symbols
177
+ term_a = build_terminal('a')
178
+ term_b = build_terminal('b')
179
+ term_c = build_terminal('c')
180
+ term_d = build_terminal('d')
181
+ term_e = build_terminal('e')
182
+ term_f = build_terminal('f')
183
+ nterm_A = build_nonterminal('A')
184
+ nterm_B = build_nonterminal('B')
185
+ nterm_C = build_nonterminal('C')
186
+ nterm_D = build_nonterminal('D')
187
+ nterm_E = build_nonterminal('E')
188
+ nterm_F = build_nonterminal('F')
189
+ nterm_S = build_nonterminal('S')
190
+ instance = described_class.new([term_a, term_b, term_c, term_d, term_e, term_f])
191
+ instance.add_rule(build_choice(nterm_S, [[nterm_A, nterm_B], [nterm_D, nterm_E]]))
192
+ instance.add_rule(build_production(nterm_A, [term_a]))
193
+ instance.add_rule(build_production(nterm_B, [term_b, nterm_C]))
194
+ instance.add_rule(build_production(nterm_C, [term_c]))
195
+ instance.add_rule(build_production(nterm_D, [term_d, nterm_F]))
196
+ instance.add_rule(build_production(nterm_E, [term_e]))
197
+ instance.add_rule(build_production(nterm_F, [term_f, nterm_D]))
198
+ nonproductive = instance.send(:mark_non_productive_symbols)
199
+ expect(nonproductive).not_to be_empty
200
+ expect(nonproductive).to eq([nterm_D, nterm_F])
201
+ end
202
+ end # context
203
+ end # describe
data/version.txt CHANGED
@@ -1 +1 @@
1
- 0.0.5
1
+ 0.0.7
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dendroid
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.5
4
+ version: 0.0.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dimitri Geshef
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-10-28 00:00:00.000000000 Z
11
+ date: 2023-10-30 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: WIP. A Ruby implementation of a Earley parser
14
14
  email: famished.tiger@yahoo.com
@@ -23,15 +23,19 @@ files:
23
23
  - Rakefile
24
24
  - bin/dendroid
25
25
  - dendroid.gemspec
26
- - lib/dendroid/dendroid.rb
26
+ - lib/dendroid.rb
27
+ - lib/dendroid/grm_dsl/base_grm_builder.rb
27
28
  - lib/dendroid/syntax/choice.rb
29
+ - lib/dendroid/syntax/grammar.rb
28
30
  - lib/dendroid/syntax/grm_symbol.rb
29
31
  - lib/dendroid/syntax/non_terminal.rb
30
32
  - lib/dendroid/syntax/production.rb
31
33
  - lib/dendroid/syntax/rule.rb
32
34
  - lib/dendroid/syntax/symbol_seq.rb
33
35
  - lib/dendroid/syntax/terminal.rb
36
+ - spec/dendroid/grm_dsl/base_grm_builder_spec.rb
34
37
  - spec/dendroid/syntax/choice_spec.rb
38
+ - spec/dendroid/syntax/grammar_spec.rb
35
39
  - spec/dendroid/syntax/grm_symbol_spec.rb
36
40
  - spec/dendroid/syntax/non_terminal_spec.rb
37
41
  - spec/dendroid/syntax/production_spec.rb
File without changes