dendroid 0.0.6 → 0.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +10 -2
- data/CHANGELOG.md +7 -0
- data/README.md +3 -1
- data/lib/dendroid/grm_analysis/dotted_item.rb +125 -0
- data/lib/dendroid/grm_dsl/base_grm_builder.rb +168 -0
- data/lib/dendroid/syntax/choice.rb +16 -5
- data/lib/dendroid/syntax/grammar.rb +15 -16
- data/lib/dendroid/syntax/grm_symbol.rb +2 -1
- data/lib/dendroid/syntax/production.rb +1 -12
- data/lib/dendroid/syntax/rule.rb +13 -0
- data/lib/dendroid/syntax/symbol_seq.rb +1 -1
- data/spec/dendroid/grm_analysis/dotted_item_spec.rb +101 -0
- data/spec/dendroid/grm_dsl/base_grm_builder_spec.rb +106 -0
- data/spec/dendroid/syntax/choice_spec.rb +14 -0
- data/spec/dendroid/syntax/grammar_spec.rb +0 -2
- data/spec/dendroid/syntax/production_spec.rb +15 -0
- data/spec/dendroid/syntax/rule_spec.rb +7 -0
- data/version.txt +1 -1
- metadata +5 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 989c88ea00734b7145dfc64ded1318c296a3678039213904c15ead0a306942a2
|
4
|
+
data.tar.gz: 73aa3efff885fcf1f71f23c7907b7c7b953fd3b174aefb9324a1326663652a9c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 906746623d20819ce0f1923c8156a27c7ce58b2d926174e0a1dff611407bd873378272c2a2125bb665c8aec20c3beef787942c9fc7f0376aff22e7db4fb6a235
|
7
|
+
data.tar.gz: 34f3aabf74b7598a506e3acd38c9df03a7040203e8a9bda2fa02804b96ea0054d17c3044c7ca1384380bdb55af2cd80f1b9e88baa1b5c985642797cc68d80783
|
data/.rubocop.yml
CHANGED
@@ -4,7 +4,7 @@ Layout/EndOfLine:
|
|
4
4
|
|
5
5
|
Metrics/AbcSize:
|
6
6
|
Enabled: true
|
7
|
-
Max:
|
7
|
+
Max: 29
|
8
8
|
|
9
9
|
Metrics/BlockLength:
|
10
10
|
Enabled: true
|
@@ -13,10 +13,18 @@ Metrics/BlockLength:
|
|
13
13
|
Metrics/ClassLength:
|
14
14
|
Enabled: true
|
15
15
|
Max: 200
|
16
|
+
|
17
|
+
Metrics/CyclomaticComplexity:
|
18
|
+
Enabled: true
|
19
|
+
Max: 10
|
16
20
|
|
17
21
|
Metrics/MethodLength:
|
18
22
|
Enabled: true
|
19
|
-
Max:
|
23
|
+
Max: 30
|
24
|
+
|
25
|
+
Metrics/PerceivedComplexity:
|
26
|
+
Enabled: true
|
27
|
+
Max: 10
|
20
28
|
|
21
29
|
Naming/MethodParameterName:
|
22
30
|
Enabled: false
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -0,0 +1,125 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Dendroid
|
4
|
+
module GrmAnalysis
|
5
|
+
# For a given production rule, a dotted item represents a recognition state.
|
6
|
+
# The dot partitions the rhs of the rule in two parts:
|
7
|
+
# a) the left part consists of the symbols in the rhs that are matched
|
8
|
+
# by the input tokens.
|
9
|
+
# b) The right part consists of symbols that are predicted to match the
|
10
|
+
# input tokens.
|
11
|
+
# The terminology stems from the traditional way to visualize the partition
|
12
|
+
# by using a fat dot character as a separator between the left and right
|
13
|
+
# parts.
|
14
|
+
# An item with the dot at the beginning (i.e. before any rhs symbol)
|
15
|
+
# is called a predicted item.
|
16
|
+
# An item with the dot at the end (i.e. after all rhs symbols)
|
17
|
+
# is called a reduce item.
|
18
|
+
# An item with a dot in front of a terminal is called a shift item.
|
19
|
+
# An item with the dot not at the beginning is sometimes referred to as a kernel item
|
20
|
+
class DottedItem
|
21
|
+
# Reference to the production rule
|
22
|
+
# @return [Dendroid::Syntax::Production]
|
23
|
+
attr_reader :rule
|
24
|
+
|
25
|
+
# @return [Integer] the dot position
|
26
|
+
attr_reader :position
|
27
|
+
|
28
|
+
# Constructor.
|
29
|
+
# @param aRule [Dendroid::Syntax::Rule]
|
30
|
+
# @param aPosition [Integer] Position of the dot in rhs of production.
|
31
|
+
def initialize(aRule, aPosition)
|
32
|
+
@rule = aRule
|
33
|
+
@position = valid_position(aPosition)
|
34
|
+
end
|
35
|
+
|
36
|
+
# Return a String representation of the dotted item.
|
37
|
+
# @return [String]
|
38
|
+
def to_s
|
39
|
+
rhs_names = rule.body.map(&:to_s)
|
40
|
+
dotted_rhs = rhs_names.insert(position, '.')
|
41
|
+
"#{rule.head} => #{dotted_rhs.join(' ')}"
|
42
|
+
end
|
43
|
+
|
44
|
+
# Indicate whether the rhs of the rule is empty
|
45
|
+
# @return [Boolean]
|
46
|
+
def empty?
|
47
|
+
rule.empty?
|
48
|
+
end
|
49
|
+
|
50
|
+
# Terminology inspired from Luger's book
|
51
|
+
# @return [Symbol] one of: :initial, :initial_and_completed, :partial, :completed
|
52
|
+
def state
|
53
|
+
return :initial_and_completed if empty?
|
54
|
+
return :initial if position.zero?
|
55
|
+
|
56
|
+
position == rule.body.size ? :completed : :partial
|
57
|
+
end
|
58
|
+
|
59
|
+
# Indicate whether the dot is at the start of rhs
|
60
|
+
# @return [Boolean]
|
61
|
+
def initial_pos?
|
62
|
+
position.zero? || empty?
|
63
|
+
end
|
64
|
+
|
65
|
+
# Indicate whether the dot is at the end of rhs
|
66
|
+
# @return [Boolean]
|
67
|
+
def final_pos?
|
68
|
+
empty? || position == rule.body.size
|
69
|
+
end
|
70
|
+
|
71
|
+
alias completed? final_pos?
|
72
|
+
|
73
|
+
# Indicate the dot isn't at start nor at end position
|
74
|
+
# @return [Boolean]
|
75
|
+
def intermediate_pos?
|
76
|
+
return false if empty? || position.zero?
|
77
|
+
|
78
|
+
position < rule.body.size
|
79
|
+
end
|
80
|
+
|
81
|
+
# Return the symbol right after the dot (if any)
|
82
|
+
# @return [Dendroid::Syntax::GrmSymbol, NilClass]
|
83
|
+
def next_symbol
|
84
|
+
return nil if empty? || completed?
|
85
|
+
|
86
|
+
rule.body[position]
|
87
|
+
end
|
88
|
+
|
89
|
+
# Check whether the given symbol is the same as after the dot.
|
90
|
+
# @param [Dendroid::Syntax::GrmSymbol]
|
91
|
+
# @return [Boolean]
|
92
|
+
def expecting?(aSymbol)
|
93
|
+
actual = next_symbol
|
94
|
+
return false if actual.nil?
|
95
|
+
|
96
|
+
actual == aSymbol
|
97
|
+
end
|
98
|
+
|
99
|
+
# Check whether the dotted item is a shift item.
|
100
|
+
# In other words, it expects a terminal to be next symbol
|
101
|
+
# @return [Boolean]
|
102
|
+
def pre_scan?
|
103
|
+
next_symbol&.terminal?
|
104
|
+
end
|
105
|
+
|
106
|
+
# Test for equality with another dotted item.
|
107
|
+
# Two dotted items are equal if they refer to the same rule and
|
108
|
+
# have both the same rhs and dot positions.
|
109
|
+
# @return [Boolean]
|
110
|
+
def ==(other)
|
111
|
+
return true if eql?(other)
|
112
|
+
|
113
|
+
(position == other.position) && rule.eql?(other.rule)
|
114
|
+
end
|
115
|
+
|
116
|
+
private
|
117
|
+
|
118
|
+
def valid_position(aPosition)
|
119
|
+
raise StandardError if aPosition.negative? || aPosition > rule.body.size
|
120
|
+
|
121
|
+
aPosition
|
122
|
+
end
|
123
|
+
end # class
|
124
|
+
end # module
|
125
|
+
end # module
|
@@ -0,0 +1,168 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative '..\syntax\terminal'
|
4
|
+
require_relative '..\syntax\non_terminal'
|
5
|
+
require_relative '..\syntax\symbol_seq'
|
6
|
+
require_relative '..\syntax\production'
|
7
|
+
require_relative '..\syntax\choice'
|
8
|
+
require_relative '..\syntax\grammar'
|
9
|
+
|
10
|
+
module Dendroid
|
11
|
+
# This module contains classes that define Domain-Specific Language specialized
|
12
|
+
# in grammar definition.
|
13
|
+
module GrmDSL
|
14
|
+
# Builder GoF pattern: Builder builds a complex object.
|
15
|
+
# here the builder creates a grammar from simpler objects
|
16
|
+
# (symbols and production rules)
|
17
|
+
# and using a step by step approach.
|
18
|
+
class BaseGrmBuilder
|
19
|
+
# @return [Symbol] one of: :declaring, :building, :complete
|
20
|
+
attr_reader :state
|
21
|
+
|
22
|
+
# @return [Hash{String, Dendroid::Syntax::GrmSymbol}] The mapping of grammar symbol names
|
23
|
+
# to the matching grammar symbol object.
|
24
|
+
attr_reader :symbols
|
25
|
+
|
26
|
+
# @return [Array<Dendroid::Syntax::Rule>] The list of rules of the grammar
|
27
|
+
attr_reader :rules
|
28
|
+
|
29
|
+
# Creates a new grammar builder object.
|
30
|
+
# @param aBlock [Proc] code block used to build the grammar.
|
31
|
+
# @example Building a tiny English grammar
|
32
|
+
# builder = Rley::Syntax::GrammarBuilder.new do
|
33
|
+
# declare_terminals('n', 'v', 'adj', 'det')
|
34
|
+
# rule 'S' => 'NP VP'
|
35
|
+
# rule 'VP' => 'v NP'
|
36
|
+
# rule 'NP' => ['det n', 'adj NP']
|
37
|
+
# end
|
38
|
+
# # Now with `builder`, let's create the grammar
|
39
|
+
# tiny_eng = builder.grammar
|
40
|
+
def initialize(&aBlock)
|
41
|
+
@symbols = {}
|
42
|
+
@rules = []
|
43
|
+
@state = :declaring
|
44
|
+
|
45
|
+
return unless block_given?
|
46
|
+
|
47
|
+
instance_exec(&aBlock)
|
48
|
+
grammar_complete!
|
49
|
+
end
|
50
|
+
|
51
|
+
# Add the given terminal symbols to the grammar of the language
|
52
|
+
# @param terminalSymbols [String, Terminal] 1..* terminal symbols.
|
53
|
+
# @return [void]
|
54
|
+
def declare_terminals(*terminalSymbols)
|
55
|
+
err_msg = "Terminal symbols may only be declared in state :declaring, current state is: #{state}"
|
56
|
+
raise StandardError, err_msg unless state == :declaring
|
57
|
+
|
58
|
+
new_symbs = build_symbols(Dendroid::Syntax::Terminal, terminalSymbols)
|
59
|
+
symbols.merge!(new_symbs)
|
60
|
+
end
|
61
|
+
|
62
|
+
# Add a production rule in the grammar given one
|
63
|
+
# key-value pair of the form: String => String.
|
64
|
+
# Where the key is the name of the non-terminal appearing in the
|
65
|
+
# left side of the rule.
|
66
|
+
# When the value is a String, it is a sequence of grammar symbol names separated by space.
|
67
|
+
# When the value is an array of String, the elements represent an alternative rhs
|
68
|
+
# The rule is created and inserted in the grammar.
|
69
|
+
# @example
|
70
|
+
# builder.rule('sentence' => 'noun_phrase verb_phrase')
|
71
|
+
# builder.rule('noun_phrase' => ['noun', 'adj noun'])
|
72
|
+
# @param productionRuleRepr [Hash{String, String|Array<String>}]
|
73
|
+
# A Hash-based representation of a production.
|
74
|
+
# @return [Dendroid::Syntax::Rule] The created Production or Choice instance
|
75
|
+
def rule(productionRuleRepr)
|
76
|
+
raise StandardError, 'Cannot add a production rule in state :complete' if state == :complete
|
77
|
+
|
78
|
+
@state = :building
|
79
|
+
|
80
|
+
return nil unless productionRuleRepr.is_a?(Hash)
|
81
|
+
|
82
|
+
head_name = productionRuleRepr.keys.first
|
83
|
+
if symbols.include? head_name
|
84
|
+
err_msg = "Terminal symbol '#{head_name}' may not be on left-side of a rule."
|
85
|
+
raise StandardError, err_msg if symbols[head_name].is_a?(Dendroid::Syntax::Terminal)
|
86
|
+
else
|
87
|
+
symbols.merge!(build_symbols(Dendroid::Syntax::NonTerminal, [head_name]))
|
88
|
+
end
|
89
|
+
lhs = symbols[head_name]
|
90
|
+
raw_rhs = productionRuleRepr.values.first
|
91
|
+
|
92
|
+
if raw_rhs.is_a? String
|
93
|
+
new_prod = Dendroid::Syntax::Production.new(lhs, build_symbol_seq(raw_rhs))
|
94
|
+
else
|
95
|
+
rhs = raw_rhs.map { |raw| build_symbol_seq(raw) }
|
96
|
+
new_prod = Dendroid::Syntax::Choice.new(lhs, rhs)
|
97
|
+
end
|
98
|
+
rules << new_prod
|
99
|
+
new_prod
|
100
|
+
end
|
101
|
+
|
102
|
+
# A method used to notify the builder that the grammar is complete
|
103
|
+
# (i.e. all rules were entered).
|
104
|
+
def grammar_complete!
|
105
|
+
@state = :complete
|
106
|
+
end
|
107
|
+
|
108
|
+
# Generate the grammar according to the specifications.
|
109
|
+
# @return [Dendroid::Syntax::Grammar]
|
110
|
+
def grammar
|
111
|
+
terminals = symbols.values.select(&:terminal?)
|
112
|
+
grm = Dendroid::Syntax::Grammar.new(terminals)
|
113
|
+
rules.each { |prod| grm.add_rule(prod) }
|
114
|
+
grm.complete!
|
115
|
+
grm
|
116
|
+
end
|
117
|
+
|
118
|
+
private
|
119
|
+
|
120
|
+
def build_symbol_seq(raw_symbols)
|
121
|
+
symb_array = []
|
122
|
+
raw_stripped = raw_symbols.strip
|
123
|
+
return Dendroid::Syntax::SymbolSeq.new([]) if raw_stripped.empty?
|
124
|
+
|
125
|
+
symbol_names = raw_stripped.split(/(?: |\t)+/)
|
126
|
+
symbol_names.each do |symb_name|
|
127
|
+
unless symbols.include?(symb_name)
|
128
|
+
symbols.merge!(build_symbols(Dendroid::Syntax::NonTerminal, [symb_name]))
|
129
|
+
end
|
130
|
+
symb_array << symbols[symb_name]
|
131
|
+
end
|
132
|
+
|
133
|
+
Dendroid::Syntax::SymbolSeq.new(symb_array)
|
134
|
+
end
|
135
|
+
|
136
|
+
# Add the given grammar symbols.
|
137
|
+
# @param aClass [Class] The class of grammar symbols to instantiate.
|
138
|
+
# @param theSymbols [Array] array of elements are treated as follows:
|
139
|
+
# if the element is already a grammar symbol, then it added as is,
|
140
|
+
# otherwise it is considered as the name of a grammar symbol
|
141
|
+
# of the specified class to build.
|
142
|
+
def build_symbols(aClass, theSymbols)
|
143
|
+
symbs = {}
|
144
|
+
theSymbols.each do |s|
|
145
|
+
new_symbol = build_symbol(aClass, s)
|
146
|
+
symbs[new_symbol.name] = new_symbol
|
147
|
+
symbs[s] = new_symbol
|
148
|
+
end
|
149
|
+
|
150
|
+
symbs
|
151
|
+
end
|
152
|
+
|
153
|
+
# If the argument is already a grammar symbol object then it is
|
154
|
+
# returned as is. Otherwise, the argument is treated as a name
|
155
|
+
# for a new instance of the given class.
|
156
|
+
# @param aClass [Class] The class of grammar symbols to instantiate
|
157
|
+
# @param aSymbolArg [GrmSymbol-like or String]
|
158
|
+
# @return [Array] list of grammar symbols
|
159
|
+
def build_symbol(aClass, aSymbolArg)
|
160
|
+
if aSymbolArg.is_a?(Dendroid::Syntax::GrmSymbol)
|
161
|
+
aSymbolArg
|
162
|
+
else
|
163
|
+
aClass.new(aSymbolArg)
|
164
|
+
end
|
165
|
+
end
|
166
|
+
end # class
|
167
|
+
end # module
|
168
|
+
end # module
|
@@ -66,15 +66,26 @@ module Dendroid
|
|
66
66
|
private
|
67
67
|
|
68
68
|
def valid_alternatives(alt)
|
69
|
+
raise StandardError, "Expecting an Array, found a #{rhs.class} instead." unless alt.is_a?(Array)
|
70
|
+
|
69
71
|
if alt.size < 2
|
70
72
|
# A choice must have at least two alternatives
|
71
|
-
raise StandardError
|
73
|
+
raise StandardError, "The choice for `#{head}` must have at least two alternatives."
|
72
74
|
end
|
73
75
|
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
76
|
+
# Verify that each array element is a valid symbol sequence
|
77
|
+
alt.each { |elem| valid_sequence(elem) }
|
78
|
+
|
79
|
+
# Fail when duplicate rhs found
|
80
|
+
alt_texts = alt.map(&:to_s)
|
81
|
+
no_duplicate = alt_texts.uniq
|
82
|
+
if alt_texts.size > no_duplicate.size
|
83
|
+
alt_texts.each_with_index do |str, i|
|
84
|
+
next if str == no_duplicate[i]
|
85
|
+
|
86
|
+
err_msg = "Duplicate alternatives: #{head} => #{alt_texts[i]}"
|
87
|
+
raise StandardError, err_msg
|
88
|
+
end
|
78
89
|
end
|
79
90
|
|
80
91
|
alt
|
@@ -49,6 +49,7 @@ module Dendroid
|
|
49
49
|
if nonterm2productions[rule.head]&.include? rule
|
50
50
|
raise StandardError, "Production rule '#{production}' appears more than once in the grammar."
|
51
51
|
end
|
52
|
+
|
52
53
|
add_symbol(rule.head)
|
53
54
|
rule.nonterminals.each { |nonterm| add_symbol(nonterm) }
|
54
55
|
rules << rule
|
@@ -104,10 +105,10 @@ module Dendroid
|
|
104
105
|
def at_least_one_terminal
|
105
106
|
found = symbols.any?(&:terminal?)
|
106
107
|
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
108
|
+
return true if found
|
109
|
+
|
110
|
+
err_msg = "Grammar doesn't contain any terminal symbol."
|
111
|
+
raise StandardError, err_msg
|
111
112
|
end
|
112
113
|
|
113
114
|
# Does every terminal symbol appear at least once
|
@@ -173,7 +174,6 @@ module Dendroid
|
|
173
174
|
reachable_sym = backlog.pop
|
174
175
|
prods = nonterm2productions[reachable_sym]
|
175
176
|
prods.each do |prd|
|
176
|
-
# prd.body.members.each do |member|
|
177
177
|
prd.rhs_symbols.each do |member|
|
178
178
|
unless member.terminal? || set_reachable.include?(member)
|
179
179
|
backlog.push(member)
|
@@ -184,7 +184,7 @@ module Dendroid
|
|
184
184
|
end until backlog.empty?
|
185
185
|
|
186
186
|
all_symbols = Set.new(symbols)
|
187
|
-
|
187
|
+
all_symbols - set_reachable
|
188
188
|
end
|
189
189
|
|
190
190
|
def mark_non_productive_symbols
|
@@ -194,7 +194,6 @@ module Dendroid
|
|
194
194
|
backlog.delete(i) if prd.productive?
|
195
195
|
end
|
196
196
|
until backlog.empty?
|
197
|
-
size_before = backlog.size
|
198
197
|
to_remove = []
|
199
198
|
backlog.each do |i|
|
200
199
|
prd = rules[i]
|
@@ -232,7 +231,7 @@ module Dendroid
|
|
232
231
|
begin
|
233
232
|
seqs_done = []
|
234
233
|
backlog.each_pair do |sq, (elem_index, lhs)|
|
235
|
-
member = sq
|
234
|
+
member = sq[elem_index]
|
236
235
|
if member.terminal?
|
237
236
|
seqs_done << sq # stop with this sequence: it is non-nullable
|
238
237
|
backlog[sq] = [-1, lhs]
|
@@ -247,14 +246,14 @@ module Dendroid
|
|
247
246
|
end
|
248
247
|
end
|
249
248
|
seqs_done.each do |sq|
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
249
|
+
next unless backlog.include? sq
|
250
|
+
|
251
|
+
(_, lhs) = backlog[sq]
|
252
|
+
if lhs.nullable?
|
253
|
+
to_drop = sym2seqs[lhs]
|
254
|
+
to_drop.each { |seq| backlog.delete(seq) }
|
255
|
+
else
|
256
|
+
backlog.delete(sq)
|
258
257
|
end
|
259
258
|
end
|
260
259
|
end until backlog.empty? || seqs_done.empty?
|
@@ -1,6 +1,7 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module Dendroid
|
4
|
+
# The namespace for all classes used to build a grammar.
|
4
5
|
module Syntax
|
5
6
|
# Abstract class for grammar symbols.
|
6
7
|
# A grammar symbol is an element that appears in grammar rules.
|
@@ -20,7 +21,7 @@ module Dendroid
|
|
20
21
|
name.to_s
|
21
22
|
end
|
22
23
|
|
23
|
-
# Equality testing (based on symbol
|
24
|
+
# Equality testing (based on symbol names)
|
24
25
|
# @return [Boolean]
|
25
26
|
def ==(other)
|
26
27
|
name == other.name
|
@@ -15,7 +15,7 @@ module Dendroid
|
|
15
15
|
# @param rhs [Dendroid::Syntax::SymbolSeq] the sequence of symbols on rhs.
|
16
16
|
def initialize(lhs, rhs)
|
17
17
|
super(lhs)
|
18
|
-
@body =
|
18
|
+
@body = valid_sequence(rhs)
|
19
19
|
end
|
20
20
|
|
21
21
|
# Predicate method to check whether the rule body (its rhs) is empty.
|
@@ -73,17 +73,6 @@ module Dendroid
|
|
73
73
|
|
74
74
|
private
|
75
75
|
|
76
|
-
def valid_body(rhs)
|
77
|
-
raise StandardError unless rhs.is_a?(SymbolSeq)
|
78
|
-
|
79
|
-
if rhs.size == 1 && lhs == rhs.first
|
80
|
-
# Forbid cyclic rules (e.g. A => A)
|
81
|
-
raise StandardError.new, "Cyclic rule of the kind #{lhs} => #{lhs} is not allowed."
|
82
|
-
end
|
83
|
-
|
84
|
-
rhs
|
85
|
-
end
|
86
|
-
|
87
76
|
def productive=(val)
|
88
77
|
@productive = val
|
89
78
|
lhs.productive = val
|
data/lib/dendroid/syntax/rule.rb
CHANGED
@@ -47,6 +47,19 @@ module Dendroid
|
|
47
47
|
rhs_symbols.select(&:terminal?)
|
48
48
|
end
|
49
49
|
|
50
|
+
protected
|
51
|
+
|
52
|
+
def valid_sequence(rhs)
|
53
|
+
raise StandardError, "Expecting a SymbolSeq, found a #{rhs.class} instead." unless rhs.is_a?(SymbolSeq)
|
54
|
+
|
55
|
+
if rhs.size == 1 && lhs == rhs.first
|
56
|
+
# Forbid cyclic rules (e.g. A => A)
|
57
|
+
raise StandardError.new, "Cyclic rules of the kind #{lhs} => #{lhs} are not allowed."
|
58
|
+
end
|
59
|
+
|
60
|
+
rhs
|
61
|
+
end
|
62
|
+
|
50
63
|
private
|
51
64
|
|
52
65
|
def valid_head(lhs)
|
@@ -13,7 +13,7 @@ module Dendroid
|
|
13
13
|
# @return [Array<Dendroid::Syntax::GrmSymbol>] The sequence of symbols
|
14
14
|
attr_reader :members
|
15
15
|
|
16
|
-
def_delegators(:@members, :empty?, :first, :map, :size)
|
16
|
+
def_delegators(:@members, :[], :empty?, :first, :map, :size)
|
17
17
|
|
18
18
|
# Create a sequence of grammar symbols (as in right-hand side of
|
19
19
|
# a production rule).
|
@@ -0,0 +1,101 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative '..\..\spec_helper'
|
4
|
+
require_relative '..\..\..\lib\dendroid\syntax\terminal'
|
5
|
+
require_relative '..\..\..\lib\dendroid\syntax\non_terminal'
|
6
|
+
require_relative '..\..\..\lib\dendroid\syntax\symbol_seq'
|
7
|
+
require_relative '..\..\..\lib\dendroid\syntax\production'
|
8
|
+
require_relative '..\..\..\lib\dendroid\grm_analysis\dotted_item'
|
9
|
+
|
10
|
+
describe Dendroid::GrmAnalysis::DottedItem do
|
11
|
+
let(:num_symb) { Dendroid::Syntax::Terminal.new('NUMBER') }
|
12
|
+
let(:plus_symb) { Dendroid::Syntax::Terminal.new('PLUS') }
|
13
|
+
let(:expr_symb) { Dendroid::Syntax::NonTerminal.new('expression') }
|
14
|
+
let(:rhs) { Dendroid::Syntax::SymbolSeq.new([num_symb, plus_symb, num_symb]) }
|
15
|
+
let(:empty_body) { Dendroid::Syntax::SymbolSeq.new([]) }
|
16
|
+
let(:prod) { Dendroid::Syntax::Production.new(expr_symb, rhs) }
|
17
|
+
let(:empty_prod) { Dendroid::Syntax::Production.new(expr_symb, empty_body) }
|
18
|
+
|
19
|
+
# Implements a dotted item: expression => NUMBER . PLUS NUMBER
|
20
|
+
subject { described_class.new(prod, 1) }
|
21
|
+
|
22
|
+
context 'Initialization:' do
|
23
|
+
it 'is initialized with a production and a dot position' do
|
24
|
+
expect { described_class.new(prod, 1) }.not_to raise_error
|
25
|
+
end
|
26
|
+
|
27
|
+
it 'knows its related production' do
|
28
|
+
expect(subject.rule).to eq(prod)
|
29
|
+
end
|
30
|
+
|
31
|
+
it 'knows its position' do
|
32
|
+
expect(subject.position).to eq(1)
|
33
|
+
end
|
34
|
+
end # context
|
35
|
+
|
36
|
+
context 'Provided services:' do
|
37
|
+
it 'renders a String representation of itself' do
|
38
|
+
expect(subject.to_s).to eq('expression => NUMBER . PLUS NUMBER')
|
39
|
+
end
|
40
|
+
|
41
|
+
it 'knows its state' do
|
42
|
+
expect(described_class.new(prod, 0).state).to eq(:initial)
|
43
|
+
expect(described_class.new(prod, 1).state).to eq(:partial)
|
44
|
+
expect(described_class.new(prod, 3).state).to eq(:completed)
|
45
|
+
|
46
|
+
# Case of an empty production
|
47
|
+
expect(described_class.new(empty_prod, 0).state).to eq(:initial_and_completed)
|
48
|
+
end
|
49
|
+
|
50
|
+
it 'knows whether it is in the initial position' do
|
51
|
+
expect(described_class.new(prod, 0)).to be_initial_pos
|
52
|
+
expect(described_class.new(prod, 2)).not_to be_initial_pos
|
53
|
+
expect(described_class.new(prod, 3)).not_to be_initial_pos
|
54
|
+
|
55
|
+
# Case of an empty production
|
56
|
+
expect(described_class.new(empty_prod, 0)).to be_initial_pos
|
57
|
+
end
|
58
|
+
|
59
|
+
it 'knows whether it is in the final position' do
|
60
|
+
expect(described_class.new(prod, 0)).not_to be_final_pos
|
61
|
+
expect(described_class.new(prod, 2)).not_to be_final_pos
|
62
|
+
expect(described_class.new(prod, 3)).to be_final_pos
|
63
|
+
expect(described_class.new(prod, 3)).to be_completed
|
64
|
+
|
65
|
+
# Case of an empty production
|
66
|
+
expect(described_class.new(empty_prod, 0)).to be_final_pos
|
67
|
+
end
|
68
|
+
|
69
|
+
it 'knows whether it is in an intermediate position' do
|
70
|
+
expect(described_class.new(prod, 0)).not_to be_intermediate_pos
|
71
|
+
expect(described_class.new(prod, 2)).to be_intermediate_pos
|
72
|
+
expect(described_class.new(prod, 3)).not_to be_intermediate_pos
|
73
|
+
|
74
|
+
# Case of an empty production
|
75
|
+
expect(described_class.new(empty_prod, 0)).not_to be_intermediate_pos
|
76
|
+
end
|
77
|
+
|
78
|
+
it 'knows the symbol after the dot (if any)' do
|
79
|
+
expect(described_class.new(prod, 0).next_symbol.name).to eq(:NUMBER)
|
80
|
+
expect(described_class.new(prod, 1).next_symbol.name).to eq(:PLUS)
|
81
|
+
expect(described_class.new(prod, 2).next_symbol.name).to eq(:NUMBER)
|
82
|
+
expect(described_class.new(prod, 3).next_symbol).to be_nil
|
83
|
+
|
84
|
+
# Case of an empty production
|
85
|
+
expect(described_class.new(empty_prod, 0).next_symbol).to be_nil
|
86
|
+
end
|
87
|
+
|
88
|
+
it 'can compare a given symbol to the expected one' do
|
89
|
+
expect(described_class.new(prod, 0)).to be_expecting(num_symb)
|
90
|
+
expect(described_class.new(prod, 0)).not_to be_expecting(plus_symb)
|
91
|
+
expect(described_class.new(prod, 1)).to be_expecting(plus_symb)
|
92
|
+
expect(described_class.new(prod, 2)).to be_expecting(num_symb)
|
93
|
+
expect(described_class.new(prod, 3)).not_to be_expecting(num_symb)
|
94
|
+
expect(described_class.new(prod, 3)).not_to be_expecting(plus_symb)
|
95
|
+
|
96
|
+
# Case of an empty production
|
97
|
+
expect(described_class.new(empty_prod, 0)).not_to be_expecting(num_symb)
|
98
|
+
expect(described_class.new(empty_prod, 0)).not_to be_expecting(plus_symb)
|
99
|
+
end
|
100
|
+
end # context
|
101
|
+
end # describe
|
@@ -0,0 +1,106 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative '..\..\spec_helper'
|
4
|
+
require_relative '..\..\..\lib\dendroid\grm_dsl\base_grm_builder'
|
5
|
+
|
6
|
+
describe Dendroid::GrmDSL::BaseGrmBuilder do
|
7
|
+
# Builds ingredients for a grammar inspired from https://en.wikipedia.org/wiki/Earley_parser
|
8
|
+
subject do
|
9
|
+
instance = described_class.new
|
10
|
+
instance.declare_terminals('PLUS', 'STAR', 'INTEGER')
|
11
|
+
instance
|
12
|
+
end
|
13
|
+
|
14
|
+
context 'Initialization:' do
|
15
|
+
it 'is initialized with an optional code block' do
|
16
|
+
expect { described_class.new }.not_to raise_error
|
17
|
+
end
|
18
|
+
|
19
|
+
it 'is in "declaring" state by default' do
|
20
|
+
expect(described_class.new.state).to eq(:declaring)
|
21
|
+
end
|
22
|
+
|
23
|
+
it 'has no grammar symbol by default' do
|
24
|
+
expect(described_class.new.symbols).to be_empty
|
25
|
+
end
|
26
|
+
|
27
|
+
it 'has no production rule by default' do
|
28
|
+
expect(described_class.new.rules).to be_empty
|
29
|
+
end
|
30
|
+
end # context
|
31
|
+
|
32
|
+
context 'Provided services:' do
|
33
|
+
it 'builds declared terminal symbols' do
|
34
|
+
instance = described_class.new
|
35
|
+
terminals = %w[PLUS STAR INTEGER]
|
36
|
+
instance.declare_terminals(*terminals)
|
37
|
+
expect(instance.symbols.size).to eq(2 * terminals.size)
|
38
|
+
expect(instance.symbols[:PLUS]).to be_kind_of(Dendroid::Syntax::Terminal)
|
39
|
+
expect(instance.symbols['PLUS']).to eq(instance.symbols[:PLUS])
|
40
|
+
expect(instance.symbols[:PLUS].name).to eq(:PLUS)
|
41
|
+
expect(instance.symbols[:STAR]).to be_kind_of(Dendroid::Syntax::Terminal)
|
42
|
+
expect(instance.symbols['STAR']).to eq(instance.symbols[:STAR])
|
43
|
+
expect(instance.symbols[:STAR].name).to eq(:STAR)
|
44
|
+
expect(instance.symbols[:INTEGER]).to be_kind_of(Dendroid::Syntax::Terminal)
|
45
|
+
expect(instance.symbols['INTEGER']).to eq(instance.symbols[:INTEGER])
|
46
|
+
expect(instance.symbols[:INTEGER].name).to eq(:INTEGER)
|
47
|
+
expect(instance.state).to eq(:declaring)
|
48
|
+
end
|
49
|
+
|
50
|
+
it 'builds production rules' do
|
51
|
+
subject.rule('p' => 's')
|
52
|
+
expect(subject.state).to eq(:building)
|
53
|
+
|
54
|
+
# Undeclared symbols in production represent non-terminals
|
55
|
+
expect(subject.symbols['p']).to be_kind_of(Dendroid::Syntax::NonTerminal)
|
56
|
+
expect(subject.symbols['s']).to be_kind_of(Dendroid::Syntax::NonTerminal)
|
57
|
+
|
58
|
+
expect(subject.rules.size).to eq(1)
|
59
|
+
expect(subject.rules.first.to_s).to eq('p => s')
|
60
|
+
end
|
61
|
+
|
62
|
+
it 'builds a grammar' do
|
63
|
+
subject.rule('p' => 's')
|
64
|
+
subject.rule('s' => ['s PLUS m', 'm'])
|
65
|
+
subject.rule('m' => ['m STAR t', 't'])
|
66
|
+
subject.rule('t' => 'INTEGER')
|
67
|
+
subject.grammar_complete!
|
68
|
+
|
69
|
+
grm = subject.grammar
|
70
|
+
expect(grm).to be_kind_of(Dendroid::Syntax::Grammar)
|
71
|
+
(terms, nonterms) = grm.symbols.partition(&:terminal?)
|
72
|
+
expect(terms.map(&:name)).to eq(%i[PLUS STAR INTEGER])
|
73
|
+
expect(nonterms.map(&:name)).to eq(%i[p s m t])
|
74
|
+
grammar_rules = [
|
75
|
+
'p => s',
|
76
|
+
's => s PLUS m | m',
|
77
|
+
'm => m STAR t | t',
|
78
|
+
't => INTEGER'
|
79
|
+
]
|
80
|
+
expect(subject.rules.map(&:to_s)).to eq(grammar_rules)
|
81
|
+
end
|
82
|
+
|
83
|
+
it 'provides a simple DSL' do
|
84
|
+
instance = described_class.new do
|
85
|
+
declare_terminals('PLUS', 'STAR', 'INTEGER')
|
86
|
+
rule('p' => 's')
|
87
|
+
rule('s' => ['s PLUS m', 'm'])
|
88
|
+
rule('m' => ['m STAR t', 't'])
|
89
|
+
rule('t' => 'INTEGER')
|
90
|
+
end
|
91
|
+
|
92
|
+
grm = instance.grammar
|
93
|
+
expect(grm).to be_kind_of(Dendroid::Syntax::Grammar)
|
94
|
+
(terms, nonterms) = grm.symbols.partition(&:terminal?)
|
95
|
+
expect(terms.map(&:name)).to eq(%i[PLUS STAR INTEGER])
|
96
|
+
expect(nonterms.map(&:name)).to eq(%i[p s m t])
|
97
|
+
grammar_rules = [
|
98
|
+
'p => s',
|
99
|
+
's => s PLUS m | m',
|
100
|
+
'm => m STAR t | t',
|
101
|
+
't => INTEGER'
|
102
|
+
]
|
103
|
+
expect(instance.rules.map(&:to_s)).to eq(grammar_rules)
|
104
|
+
end
|
105
|
+
end # context
|
106
|
+
end # describe
|
@@ -51,4 +51,18 @@ describe Dendroid::Syntax::Choice do
|
|
51
51
|
expect(instance.nonterminals).to eq([expr_symb, foo_symb])
|
52
52
|
end
|
53
53
|
end # context
|
54
|
+
|
55
|
+
context 'Errors:' do
|
56
|
+
it 'fails when initialized with one alternative only' do
|
57
|
+
err = StandardError
|
58
|
+
err_msg = 'The choice for `expression` must have at least two alternatives.'
|
59
|
+
expect { described_class.new(expr_symb, [alt1]) }.to raise_error(err, err_msg)
|
60
|
+
end
|
61
|
+
|
62
|
+
it 'fails in presence of duplicate rhs' do
|
63
|
+
err = StandardError
|
64
|
+
err_msg = 'Duplicate alternatives: expression => NUMBER PLUS NUMBER'
|
65
|
+
expect { described_class.new(expr_symb, [alt1, alt2, alt1]) }.to raise_error(err, err_msg)
|
66
|
+
end
|
67
|
+
end # context
|
54
68
|
end # describe
|
@@ -40,7 +40,6 @@ describe Dendroid::Syntax::Grammar do
|
|
40
40
|
Dendroid::Syntax::Choice.new(lhs, sequences.map { |arr| build_symbol_seq(arr) })
|
41
41
|
end
|
42
42
|
|
43
|
-
# rubocop: disable Metrics/AbcSize
|
44
43
|
def build_all_rules
|
45
44
|
rule1 = build_production(p_symb, [s_symb]) # p => s
|
46
45
|
rule2 = build_choice(s_symb, [[s_symb, plus_symb, m_symb], [m_symb]]) # s => s + m | m
|
@@ -48,7 +47,6 @@ describe Dendroid::Syntax::Grammar do
|
|
48
47
|
rule4 = build_production(t_symb, [int_symb]) # t => INTEGER
|
49
48
|
[rule1, rule2, rule3, rule4]
|
50
49
|
end
|
51
|
-
# rubocop: enable Metrics/AbcSize
|
52
50
|
|
53
51
|
context 'Initialization:' do
|
54
52
|
it 'is initialized with an array of terminal symbols' do
|
@@ -12,6 +12,7 @@ describe Dendroid::Syntax::Production do
|
|
12
12
|
let(:expr_symb) { Dendroid::Syntax::NonTerminal.new('expression') }
|
13
13
|
let(:foo_symb) { Dendroid::Syntax::NonTerminal.new('foo') }
|
14
14
|
let(:rhs) { Dendroid::Syntax::SymbolSeq.new([num_symb, plus_symb, num_symb]) }
|
15
|
+
let(:cyclic_rhs) { Dendroid::Syntax::SymbolSeq.new([foo_symb]) }
|
15
16
|
let(:empty_body) { Dendroid::Syntax::SymbolSeq.new([]) }
|
16
17
|
|
17
18
|
# Implements a production rule: expression => NUMBER PLUS NUMBER
|
@@ -74,4 +75,18 @@ describe Dendroid::Syntax::Production do
|
|
74
75
|
end
|
75
76
|
# rubocop: enable Lint/BinaryOperatorWithIdenticalOperands
|
76
77
|
end # context
|
78
|
+
|
79
|
+
context 'Errors:' do
|
80
|
+
it "fails when rhs isn't initialized with a SymbolSeq" do
|
81
|
+
err = StandardError
|
82
|
+
err_msg = 'Expecting a SymbolSeq, found a String instead.'
|
83
|
+
expect { described_class.new(foo_symb, 'bad') }.to raise_error err, err_msg
|
84
|
+
end
|
85
|
+
|
86
|
+
it 'fails when the production is cyclic' do
|
87
|
+
err = StandardError
|
88
|
+
err_msg = 'Cyclic rules of the kind foo => foo are not allowed.'
|
89
|
+
expect { described_class.new(foo_symb, cyclic_rhs) }.to raise_error err, err_msg
|
90
|
+
end
|
91
|
+
end # context
|
77
92
|
end # describe
|
@@ -20,4 +20,11 @@ describe Dendroid::Syntax::Rule do
|
|
20
20
|
expect(subject.head).to eq(expr_symb)
|
21
21
|
end
|
22
22
|
end # context
|
23
|
+
|
24
|
+
context 'Errors:' do
|
25
|
+
it 'fails when initialized with a terminal' do
|
26
|
+
msg = "Terminal symbol 'NUMBER' may not be on left-side of a rule."
|
27
|
+
expect { described_class.new(num_symb) }.to raise_error(StandardError, msg)
|
28
|
+
end
|
29
|
+
end
|
23
30
|
end # describe
|
data/version.txt
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.0.
|
1
|
+
0.0.8
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: dendroid
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dimitri Geshef
|
@@ -24,6 +24,8 @@ files:
|
|
24
24
|
- bin/dendroid
|
25
25
|
- dendroid.gemspec
|
26
26
|
- lib/dendroid.rb
|
27
|
+
- lib/dendroid/grm_analysis/dotted_item.rb
|
28
|
+
- lib/dendroid/grm_dsl/base_grm_builder.rb
|
27
29
|
- lib/dendroid/syntax/choice.rb
|
28
30
|
- lib/dendroid/syntax/grammar.rb
|
29
31
|
- lib/dendroid/syntax/grm_symbol.rb
|
@@ -32,6 +34,8 @@ files:
|
|
32
34
|
- lib/dendroid/syntax/rule.rb
|
33
35
|
- lib/dendroid/syntax/symbol_seq.rb
|
34
36
|
- lib/dendroid/syntax/terminal.rb
|
37
|
+
- spec/dendroid/grm_analysis/dotted_item_spec.rb
|
38
|
+
- spec/dendroid/grm_dsl/base_grm_builder_spec.rb
|
35
39
|
- spec/dendroid/syntax/choice_spec.rb
|
36
40
|
- spec/dendroid/syntax/grammar_spec.rb
|
37
41
|
- spec/dendroid/syntax/grm_symbol_spec.rb
|