rley 0.4.05 → 0.4.06
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +14 -0
- data/README.md +6 -6
- data/examples/NLP/mini_en_demo.rb +1 -1
- data/lib/rley/constants.rb +1 -1
- data/lib/rley/formatter/asciitree.rb +4 -4
- data/lib/rley/formatter/bracket_notation.rb +3 -3
- data/lib/rley/gfg/call_edge.rb +1 -1
- data/lib/rley/gfg/grm_flow_graph.rb +112 -2
- data/lib/rley/parser/parse_entry_set.rb +1 -1
- data/lib/rley/rley_error.rb +1 -1
- data/lib/rley/syntax/grammar.rb +107 -8
- data/lib/rley/syntax/grammar_builder.rb +56 -27
- data/lib/rley/syntax/grm_symbol.rb +18 -9
- data/lib/rley/syntax/non_terminal.rb +21 -0
- data/lib/rley/syntax/production.rb +12 -0
- data/lib/rley/syntax/terminal.rb +1 -0
- data/spec/rley/gfg/grm_flow_graph_spec.rb +96 -0
- data/spec/rley/syntax/grammar_builder_spec.rb +17 -12
- data/spec/rley/syntax/grammar_spec.rb +70 -11
- data/spec/rley/syntax/non_terminal_spec.rb +16 -0
- data/spec/rley/syntax/terminal_spec.rb +4 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 57136a1796f3625b841a2f1fdb0965b4314b6224
|
4
|
+
data.tar.gz: 80fd87d6ad9f68314278d5b4c15ef85a241647c3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3922fe824b8d721892b71022e64a79f4117ed0135f0b02fdaf76f5edfef083d345192328f3086a80fc0484e7dac14f7ec188a51c942bab2b14c8a196e4c9ffc4
|
7
|
+
data.tar.gz: 836625169012043daeba92c142fda1e80e6f4eaf085437c2d1bc006e57ca8de99d70619fce22f2d6d8f000468e9c93c65b8e073d80f68084619d7c2070e18197
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,17 @@
|
|
1
|
+
### 0.4.06 / 2017-05-25
|
2
|
+
* [FIX] File `formatter\asciitree.rb` fixed inconsistency in comments that caused Yard warnings.
|
3
|
+
* [FIX] File `formatter\bracket_notation.rb` fixed inconsistency in comments that caused Yard warnings.
|
4
|
+
* [FIX] File `parser\parse_entry_set.rb` fixed inconsistency in comments that caused Yard warnings.
|
5
|
+
* [NEW] Method `Grammar#diagnose` performs a number of checks on the grammar. It detects whether:
|
6
|
+
there are undefined non-terminals (i.e. non-terminals without a rule that define them)
|
7
|
+
there are non-productive non-terminals (i.e. non-terminals that don't derive a sting of terminals)
|
8
|
+
there are nullable productions and non-terminals.
|
9
|
+
* [NEW] Method `GrmFlowGraph#traverse_df` performs depth-first traversal of the GFG.
|
10
|
+
* [NEW] Method `GrmFlowGraph#diagnose` determines which terminals are reachable from the start symbol.
|
11
|
+
* [NEW] Method `GrmSymbol#generative?` inidcates whether the grammar symbol can produce a sequence of terminals.
|
12
|
+
* [CHANGE] Class `GrammarBuilder` Improved the API documentation.
|
13
|
+
|
14
|
+
|
1
15
|
### 0.4.05 / 2017-05-06
|
2
16
|
* [CHANGE] File `README.md` Added documentation on how to build parse trees and manipulate them.
|
3
17
|
* [CHANGE] File `examples\NLP\mini_en_demo.rb` now emits different parse tree representations.
|
data/README.md
CHANGED
@@ -182,9 +182,9 @@ creating a lexicon and tokenizer from scratch. Here are a few Ruby Part-of-Speec
|
|
182
182
|
|
183
183
|
At this stage, we're done with parsing. What we need next are convenient means
|
184
184
|
to exploit the parse result. As it is, the `result` variable in the last code snippet
|
185
|
-
above is a data structure ("Earley item sets") that is
|
185
|
+
above is a data structure ("Earley item sets") that is highly depending on the intricate details
|
186
186
|
of the Earley's parsing algorithm. Obviously, it contains all the necessary data to exploit
|
187
|
-
the parsing results but it is
|
187
|
+
the parsing results but it is rather low-level and inconvenient from a programming viewpoint.
|
188
188
|
Therefore, __Rley__ provides out of the box two convenient data structures for
|
189
189
|
representing the parse outcome:
|
190
190
|
- Parse tree (optimal when the parse is unambiguous)
|
@@ -201,10 +201,10 @@ OK. Now that we have the parse tree, what we can do with it?
|
|
201
201
|
One option is to manipulate the parse tree and its node directly. For instance,
|
202
202
|
one could write code to customize and transform the parse tree. This approach gives
|
203
203
|
most the of flexibility needed for advanced applications. The other, more common
|
204
|
-
option is to
|
204
|
+
option is to use an `Rley::ParseTreeVisitor` instance.
|
205
205
|
Such a visitor walks over the parse tree nodes and generates visit events that
|
206
206
|
are dispatched to subscribed event listeners. All this may, at first, sound
|
207
|
-
|
207
|
+
complicated but the coming code snippets show it otherwise.
|
208
208
|
|
209
209
|
Let's do it by:
|
210
210
|
- Creating a parse tree visitor
|
@@ -327,7 +327,7 @@ by yet another one:
|
|
327
327
|
|
328
328
|
```ruby
|
329
329
|
# Let's create a formatter that will render the parse tree in labelled bracket notation
|
330
|
-
renderer = Rley::Formatter::BracketNotation
|
330
|
+
renderer = Rley::Formatter::BracketNotation.new($stdout)
|
331
331
|
|
332
332
|
# Subscribe the formatter to the visitor's event and launch the visit
|
333
333
|
renderer.render(visitor)
|
@@ -338,7 +338,7 @@ This results in the strange-looking output:
|
|
338
338
|
[S [NP [Proper-Noun John]][VP [Verb saw][NP [Proper-Noun Mary]][PP [Preposition with][NP [Determiner a][Noun telescope]]]]]
|
339
339
|
```
|
340
340
|
|
341
|
-
This output is in a format that is recognized by many NLP
|
341
|
+
This output is in a format that is recognized by many NLP softwares.
|
342
342
|
The next diagram was created by copy-pasting the output above in the online tool
|
343
343
|
[RSyntaxTree](http://yohasebe.com/rsyntaxtree/).
|
344
344
|
By the way, this tool is also a Ruby gem, [rsyntaxtree](https://rubygems.org/gems/rsyntaxtree).
|
@@ -103,7 +103,7 @@ visitor = Rley::ParseTreeVisitor.new(ptree)
|
|
103
103
|
renderer = Rley::Formatter::Asciitree.new($stdout)
|
104
104
|
|
105
105
|
# Let's create a formatter that will render the parse tree in labelled bracket notation
|
106
|
-
# renderer = Rley::Formatter::BracketNotation
|
106
|
+
# renderer = Rley::Formatter::BracketNotation.new($stdout)
|
107
107
|
|
108
108
|
# Subscribe the formatter to the visitor's event and launch the visit
|
109
109
|
renderer.render(visitor)
|
data/lib/rley/constants.rb
CHANGED
@@ -36,8 +36,8 @@ module Rley # This module is used as a namespace
|
|
36
36
|
# Method called by a ParseTreeVisitor to which the formatter subscribed.
|
37
37
|
# Notification of a visit event: the visitor is about to visit
|
38
38
|
# the children of a non-terminal node
|
39
|
-
# @param
|
40
|
-
# @param
|
39
|
+
# @param parent [NonTerminalNode]
|
40
|
+
# @param children [Array] array of children nodes
|
41
41
|
def before_subnodes(parent, children)
|
42
42
|
rank_of(parent)
|
43
43
|
curr_path << parent
|
@@ -47,7 +47,7 @@ module Rley # This module is used as a namespace
|
|
47
47
|
# Method called by a ParseTreeVisitor to which the formatter subscribed.
|
48
48
|
# Notification of a visit event: the visitor is about to visit
|
49
49
|
# a non-terminal node
|
50
|
-
# @param
|
50
|
+
# @param aNonTerm [NonTerminalNode]
|
51
51
|
def before_non_terminal(aNonTerm)
|
52
52
|
emit(aNonTerm)
|
53
53
|
end
|
@@ -56,7 +56,7 @@ module Rley # This module is used as a namespace
|
|
56
56
|
# Method called by a ParseTreeVisitor to which the formatter subscribed.
|
57
57
|
# Notification of a visit event: the visitor is about to visit
|
58
58
|
# a terminal node
|
59
|
-
# @param
|
59
|
+
# @param aTerm [TerminalNode]
|
60
60
|
def before_terminal(aTerm)
|
61
61
|
emit(aTerm, ": '#{aTerm.token.lexeme}'")
|
62
62
|
end
|
@@ -23,7 +23,7 @@ module Rley # This module is used as a namespace
|
|
23
23
|
# Method called by a ParseTreeVisitor to which the formatter subscribed.
|
24
24
|
# Notification of a visit event: the visitor is about to visit
|
25
25
|
# a non-terminal node
|
26
|
-
# @param
|
26
|
+
# @param aNonTerm [NonTerminalNode]
|
27
27
|
def before_non_terminal(aNonTerm)
|
28
28
|
write("[#{aNonTerm.symbol.name} ")
|
29
29
|
end
|
@@ -32,7 +32,7 @@ module Rley # This module is used as a namespace
|
|
32
32
|
# Method called by a ParseTreeVisitor to which the formatter subscribed.
|
33
33
|
# Notification of a visit event: the visitor is about to visit
|
34
34
|
# a terminal node
|
35
|
-
# @param
|
35
|
+
# @param aTerm [TerminalNode]
|
36
36
|
def before_terminal(aTerm)
|
37
37
|
write("[#{aTerm.symbol.name} ")
|
38
38
|
end
|
@@ -40,7 +40,7 @@ module Rley # This module is used as a namespace
|
|
40
40
|
# Method called by a ParseTreeVisitor to which the formatter subscribed.
|
41
41
|
# Notification of a visit event: the visitor completed the visit of
|
42
42
|
# a terminal node.
|
43
|
-
# @param
|
43
|
+
# @param aTerm [TerminalNode]
|
44
44
|
def after_terminal(aTerm)
|
45
45
|
# Escape all opening and closing square brackets
|
46
46
|
escape_lbrackets = aTerm.token.lexeme.gsub(/\[/, "\\[")
|
data/lib/rley/gfg/call_edge.rb
CHANGED
@@ -14,7 +14,7 @@ module Rley # This module is used as a namespace
|
|
14
14
|
# Pre-condition: theSuccessor is an StartVertex
|
15
15
|
def initialize(thePredecessor, theSuccessor)
|
16
16
|
super(thePredecessor, theSuccessor)
|
17
|
-
do_set_key(thePredecessor, theSuccessor)
|
17
|
+
do_set_key(thePredecessor, theSuccessor)
|
18
18
|
end
|
19
19
|
|
20
20
|
private
|
@@ -1,3 +1,4 @@
|
|
1
|
+
require 'set'
|
1
2
|
require_relative 'start_vertex'
|
2
3
|
require_relative 'end_vertex'
|
3
4
|
require_relative 'item_vertex'
|
@@ -36,6 +37,76 @@ module Rley # This module is used as a namespace
|
|
36
37
|
vertices.find { |a_vertex| a_vertex.label == aVertexLabel }
|
37
38
|
end
|
38
39
|
|
40
|
+
# Perform a diagnosis of the grammar elements (symbols and rules)
|
41
|
+
# in order to detect:
|
42
|
+
# If one wants to remove useless rules, then do first:
|
43
|
+
# elimination of non-generating symbols
|
44
|
+
# then elimination of unreachable symbols
|
45
|
+
def diagnose
|
46
|
+
mark_unreachable_symbols
|
47
|
+
end
|
48
|
+
|
49
|
+
Branching = Struct.new(:vertex, :to_visit, :visited) do
|
50
|
+
def initialize(aVertex)
|
51
|
+
super(aVertex)
|
52
|
+
self.to_visit = aVertex.edges.dup
|
53
|
+
self.visited = []
|
54
|
+
end
|
55
|
+
|
56
|
+
def done?
|
57
|
+
self.to_visit.empty?
|
58
|
+
end
|
59
|
+
|
60
|
+
def next_edge
|
61
|
+
next_one = self.to_visit.shift
|
62
|
+
self.visited << next_one.successor unless next_one.nil?
|
63
|
+
|
64
|
+
return next_one
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
# Walk over all the vertices of the graph that are reachable from a given
|
69
|
+
# start vertex. This is a depth-first graph traversal.
|
70
|
+
# @param aStartVertex [StartVertex] the depth-first traversal begins from here
|
71
|
+
# @param visitAction [Proc] block code called when a new graph vertex is found
|
72
|
+
def traverse_df(aStartVertex, &visitAction)
|
73
|
+
visited = Set.new
|
74
|
+
stack = []
|
75
|
+
visitee = aStartVertex
|
76
|
+
|
77
|
+
begin
|
78
|
+
first_time = !visited.include?(visitee)
|
79
|
+
if first_time
|
80
|
+
visitAction.call(visitee)
|
81
|
+
visited << visitee
|
82
|
+
end
|
83
|
+
|
84
|
+
case visitee
|
85
|
+
when Rley::GFG::StartVertex
|
86
|
+
if first_time
|
87
|
+
stack.push(Branching.new(visitee))
|
88
|
+
curr_edge = stack.last.next_edge
|
89
|
+
else
|
90
|
+
# Skip start and end vertices
|
91
|
+
# Retrieve the corresponding return edge
|
92
|
+
curr_edge = get_matching_return(curr_edge)
|
93
|
+
end
|
94
|
+
|
95
|
+
when Rley::GFG::EndVertex
|
96
|
+
stack.pop if stack.last.done?
|
97
|
+
break if stack.empty?
|
98
|
+
curr_edge = stack.last.next_edge
|
99
|
+
|
100
|
+
else
|
101
|
+
# All other vertex types have only one successor
|
102
|
+
curr_edge = visitee.edges[0]
|
103
|
+
end
|
104
|
+
visitee = curr_edge.successor unless curr_edge.nil?
|
105
|
+
end until stack.empty?
|
106
|
+
# Now process the end vertex matching the initial start vertex
|
107
|
+
visitAction.call(end_vertex_for[aStartVertex.non_terminal])
|
108
|
+
end
|
109
|
+
|
39
110
|
private
|
40
111
|
|
41
112
|
def add_vertex(aVertex)
|
@@ -68,7 +139,15 @@ module Rley # This module is used as a namespace
|
|
68
139
|
def build_all_starts_ends(theDottedItems)
|
69
140
|
productions_raw = theDottedItems.map(&:production)
|
70
141
|
productions = productions_raw.uniq
|
71
|
-
|
142
|
+
all_nterms = Set.new
|
143
|
+
productions.each do |prod|
|
144
|
+
all_nterms << prod.lhs
|
145
|
+
nterms_of_rhs = prod.rhs.members.select do |symb|
|
146
|
+
symb.kind_of?(Syntax::NonTerminal)
|
147
|
+
end
|
148
|
+
all_nterms.merge(nterms_of_rhs)
|
149
|
+
end
|
150
|
+
all_nterms.each { |nterm| build_start_end_for(nterm) }
|
72
151
|
end
|
73
152
|
|
74
153
|
# if there is not yet a start vertex labelled .N in the GFG:
|
@@ -179,12 +258,43 @@ module Rley # This module is used as a namespace
|
|
179
258
|
# Retrieve corresponding end vertex
|
180
259
|
end_vertex = end_vertex_for[nt_symbol]
|
181
260
|
# Create an edge end vertex -> return vertex
|
182
|
-
ReturnEdge.new(end_vertex, return_vertex)
|
261
|
+
ReturnEdge.new(end_vertex, return_vertex) if end_vertex
|
183
262
|
end
|
184
263
|
|
185
264
|
def build_shortcut_edge(fromVertex, toVertex)
|
186
265
|
ShortcutEdge.new(fromVertex, toVertex)
|
187
266
|
end
|
267
|
+
|
268
|
+
|
269
|
+
# Retrieve the return edge that matches the given
|
270
|
+
# call edge.
|
271
|
+
def get_matching_return(aCallEdge)
|
272
|
+
# Calculate key of return edge from the key of call edge
|
273
|
+
ret_key = aCallEdge.key.sub(/CALL/, 'RET')
|
274
|
+
|
275
|
+
# Retrieve the corresponding end vertex
|
276
|
+
end_vertex = end_vertex_for[aCallEdge.successor.non_terminal]
|
277
|
+
|
278
|
+
# Retrieve the return edge with specified key
|
279
|
+
return_edge = end_vertex.edges.find { |edge| edge.key == ret_key }
|
280
|
+
end
|
281
|
+
|
282
|
+
# Mark non-terminal symbols that cannot be derived from the start symbol.
|
283
|
+
# In a GFG, a non-terminal symbol N is unreachable if there is no path
|
284
|
+
# from the start symbol to the start node .N
|
285
|
+
def mark_unreachable_symbols()
|
286
|
+
# Mark all non-terminals as unreachable
|
287
|
+
start_vertex_for.values.each do |a_vertex|
|
288
|
+
a_vertex.non_terminal.unreachable = true
|
289
|
+
end
|
290
|
+
|
291
|
+
# Now traverse graph from start vertex
|
292
|
+
# and make all visited non-terminals as reachable
|
293
|
+
traverse_df(start_vertex) do |a_vertex|
|
294
|
+
next unless a_vertex.kind_of?(StartVertex)
|
295
|
+
a_vertex.non_terminal.unreachable = false
|
296
|
+
end
|
297
|
+
end
|
188
298
|
end # class
|
189
299
|
end # module
|
190
300
|
end # module
|
@@ -39,7 +39,7 @@ module Rley # This module is used as a namespace
|
|
39
39
|
|
40
40
|
# Append the given entry (if it isn't yet in the set)
|
41
41
|
# to the list of parse entries
|
42
|
-
# @param
|
42
|
+
# @param anEntry [ParseEntry] the parse entry to push.
|
43
43
|
# @return [ParseEntry] the passed parse entry it doesn't added
|
44
44
|
def push_entry(anEntry)
|
45
45
|
match = entries.find { |entry| entry == anEntry }
|
data/lib/rley/rley_error.rb
CHANGED
data/lib/rley/syntax/grammar.rb
CHANGED
@@ -1,15 +1,16 @@
|
|
1
1
|
require 'set'
|
2
|
+
require_relative '../rley_error'
|
2
3
|
|
3
4
|
module Rley # This module is used as a namespace
|
4
5
|
module Syntax # This module is used as a namespace
|
5
6
|
# A grammar specifies the syntax of a language.
|
6
|
-
# Formally, a grammar has:
|
7
|
+
# Formally, a grammar has:
|
7
8
|
# * One start symbol,
|
8
9
|
# * One or more other production rules,
|
9
10
|
# * Each production has a rhs that is a sequence of grammar symbols.
|
10
|
-
# * Grammar symbols are categorized into
|
11
|
-
#
|
12
|
-
#
|
11
|
+
# * Grammar symbols are categorized into:
|
12
|
+
# -terminal symbols
|
13
|
+
# -non-terminal symbols
|
13
14
|
class Grammar
|
14
15
|
# A non-terminal symbol that represents all the possible strings
|
15
16
|
# in the language.
|
@@ -30,15 +31,16 @@ module Rley # This module is used as a namespace
|
|
30
31
|
@symbols = []
|
31
32
|
@name2symbol = {}
|
32
33
|
valid_productions = validate_productions(theProductions)
|
34
|
+
valid_productions.each { |prod| add_production(prod) }
|
35
|
+
diagnose
|
36
|
+
|
33
37
|
# TODO: use topological sorting
|
34
38
|
@start_symbol = valid_productions[0].lhs
|
35
|
-
valid_productions.each { |prod| add_production(prod) }
|
36
|
-
compute_nullable
|
37
39
|
end
|
38
40
|
|
39
41
|
# @return [Array] The list of non-terminals in the grammar.
|
40
42
|
def non_terminals()
|
41
|
-
|
43
|
+
@non_terminals ||= symbols.select { |s| s.kind_of?(NonTerminal) }
|
42
44
|
end
|
43
45
|
|
44
46
|
# @return [Production] The start production of the grammar (i.e.
|
@@ -61,10 +63,101 @@ module Rley # This module is used as a namespace
|
|
61
63
|
the_lhs = aProduction.lhs
|
62
64
|
add_symbol(the_lhs)
|
63
65
|
|
64
|
-
# TODO: remove quadratic execution time
|
65
66
|
aProduction.rhs.each { |symb| add_symbol(symb) }
|
66
67
|
end
|
67
68
|
|
69
|
+
# Perform some check of the grammar.
|
70
|
+
def diagnose()
|
71
|
+
mark_undefined
|
72
|
+
mark_generative
|
73
|
+
compute_nullable
|
74
|
+
end
|
75
|
+
|
76
|
+
# Check that each non-terminal appears at least once in lhs.
|
77
|
+
# If it is not the case, then mark it as undefined
|
78
|
+
def mark_undefined
|
79
|
+
defined = Set.new
|
80
|
+
|
81
|
+
# Defined non-terminals appear at least once as lhs of a production
|
82
|
+
rules.each { |prod| defined << prod.lhs }
|
83
|
+
defined.each { |n_term| n_term.undefined = false }
|
84
|
+
|
85
|
+
# Retrieve all non-terminals that aren't marked as non-undefined
|
86
|
+
undefined = non_terminals.select { |n_term| n_term.undefined?.nil? }
|
87
|
+
|
88
|
+
undefined.each { |n_term| n_term.undefined = true }
|
89
|
+
end
|
90
|
+
|
91
|
+
|
92
|
+
|
93
|
+
# Mark all non-terminals and production rules as
|
94
|
+
# generative or not.
|
95
|
+
# A production is generative when it can derive a string of terminals.
|
96
|
+
# A production is therefore generative when all its rhs members are
|
97
|
+
# themselves generatives.
|
98
|
+
# A non-terminal is generative if at least one of its defining production
|
99
|
+
# is itself generative.
|
100
|
+
def mark_generative
|
101
|
+
curr_marked = []
|
102
|
+
|
103
|
+
# Iterate until no new rule can be marked.
|
104
|
+
begin
|
105
|
+
prev_marked = curr_marked.dup
|
106
|
+
|
107
|
+
rules.each do |a_rule|
|
108
|
+
next unless a_rule.generative?.nil?
|
109
|
+
if a_rule.empty?
|
110
|
+
a_rule.generative = false
|
111
|
+
curr_marked << a_rule
|
112
|
+
could_mark_nterm_generative(a_rule)
|
113
|
+
next
|
114
|
+
end
|
115
|
+
|
116
|
+
last_considered = nil
|
117
|
+
a_rule.rhs.members.each do |symbol|
|
118
|
+
last_considered = symbol
|
119
|
+
break unless symbol.generative?
|
120
|
+
end
|
121
|
+
next if last_considered.generative?.nil?
|
122
|
+
a_rule.generative = last_considered.generative?
|
123
|
+
curr_marked << a_rule
|
124
|
+
could_mark_nterm_generative(a_rule)
|
125
|
+
end
|
126
|
+
end until prev_marked.size == curr_marked.size
|
127
|
+
|
128
|
+
# The nonterminals that are not marked yet are non-generative
|
129
|
+
non_terminals.each do |nterm|
|
130
|
+
nterm.generative = false if nterm.generative?.nil?
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
134
|
+
# Given a production rule with given non-terminal
|
135
|
+
# Check whether that non-terminal should be marked
|
136
|
+
# as generative or not.
|
137
|
+
# A non-terminal may be marked as generative if at
|
138
|
+
# least one of its defining production is generative.
|
139
|
+
def could_mark_nterm_generative(aRule)
|
140
|
+
nterm = aRule.lhs
|
141
|
+
|
142
|
+
# non-terminal already marked? If yes, nothing more to do...
|
143
|
+
return unless nterm.generative?.nil?
|
144
|
+
|
145
|
+
defining_rules = rules_for(nterm) # Retrieve all defining productions
|
146
|
+
|
147
|
+
all_false = true
|
148
|
+
defining_rules.each do |prod|
|
149
|
+
if prod.generative?
|
150
|
+
# One generative rule found!
|
151
|
+
nterm.generative = true
|
152
|
+
all_false = false
|
153
|
+
break
|
154
|
+
else
|
155
|
+
all_false = false if prod.generative?.nil?
|
156
|
+
end
|
157
|
+
end
|
158
|
+
nterm.generative = false if all_false
|
159
|
+
end
|
160
|
+
|
68
161
|
|
69
162
|
# For each non-terminal determine whether it is nullable or not.
|
70
163
|
# A nullable nonterminal is a nonterminal that can match an empty string.
|
@@ -118,6 +211,12 @@ module Rley # This module is used as a namespace
|
|
118
211
|
@symbols << aSymbol
|
119
212
|
@name2symbol[its_name] = aSymbol
|
120
213
|
end
|
214
|
+
|
215
|
+
# Retrieve all the production rules that share the same symbol in lhs
|
216
|
+
def rules_for(aNonTerm)
|
217
|
+
rules.select { |a_rule| a_rule.lhs == aNonTerm }
|
218
|
+
end
|
219
|
+
|
121
220
|
end # class
|
122
221
|
end # module
|
123
222
|
end # module
|
@@ -1,56 +1,74 @@
|
|
1
|
+
require 'set'
|
1
2
|
require_relative 'verbatim_symbol'
|
2
3
|
require_relative 'literal'
|
4
|
+
require_relative 'terminal'
|
3
5
|
require_relative 'non_terminal'
|
4
6
|
require_relative 'production'
|
5
7
|
require_relative 'grammar'
|
6
8
|
|
7
9
|
module Rley # This module is used as a namespace
|
8
10
|
module Syntax # This module is used as a namespace
|
9
|
-
# Builder GoF pattern. Builder
|
10
|
-
#
|
11
|
-
#
|
11
|
+
# Builder GoF pattern. Builder builds a complex object
|
12
|
+
# (say, a grammar) from simpler objects (terminals and productions)
|
13
|
+
# and using a step by step approach.
|
12
14
|
class GrammarBuilder
|
13
|
-
# The
|
14
|
-
#
|
15
|
-
# and non-terminal (symbol).
|
15
|
+
# @return [Hash{String, GrmSymbol}] The mapping of grammar symbol names
|
16
|
+
# to the matching grammar symbol object.
|
16
17
|
attr_reader(:symbols)
|
17
18
|
|
18
|
-
# The list of production rules for
|
19
|
+
# @return [Array<Production>] The list of production rules for
|
20
|
+
# the grammar to build.
|
19
21
|
attr_reader(:productions)
|
20
22
|
|
21
|
-
|
23
|
+
# Creates a new grammar builder.
|
24
|
+
# @param aBlock [Proc] code block used to build the grammar.
|
25
|
+
# @example Building a tiny English grammar
|
26
|
+
# builder = Rley::Syntax::GrammarBuilder.new do
|
27
|
+
# add_terminals('n', 'v', 'adj', 'det')
|
28
|
+
# rule 'S' => %w(NP VP)
|
29
|
+
# rule 'VP' => %w(v NP)
|
30
|
+
# rule 'NP' => %w(det n)
|
31
|
+
# rule 'NP' => %w(adj NP)
|
32
|
+
# end
|
33
|
+
# tiny_eng = builder.grammar
|
22
34
|
def initialize(&aBlock)
|
23
35
|
@symbols = {}
|
24
36
|
@productions = []
|
25
37
|
|
26
38
|
instance_exec(&aBlock) if block_given?
|
27
39
|
end
|
28
|
-
|
40
|
+
|
29
41
|
# Retrieve a grammar symbol from its name.
|
30
42
|
# Raise an exception if not found.
|
31
|
-
# @param aSymbolName [String] the name of a symbol
|
32
|
-
# @return [GrmSymbol] the retrieved symbol.
|
43
|
+
# @param aSymbolName [String] the name of a grammar symbol.
|
44
|
+
# @return [GrmSymbol] the retrieved symbol object.
|
33
45
|
def [](aSymbolName)
|
34
46
|
return symbols[aSymbolName]
|
35
47
|
end
|
36
48
|
|
37
49
|
# Add the given terminal symbols to the grammar of the language
|
38
50
|
# @param terminalSymbols [String or Terminal] 1..* terminal symbols.
|
51
|
+
# @return [void]
|
39
52
|
def add_terminals(*terminalSymbols)
|
40
53
|
new_symbs = build_symbols(Terminal, terminalSymbols)
|
41
54
|
symbols.merge!(new_symbs)
|
42
55
|
end
|
43
56
|
|
44
57
|
|
45
|
-
# Add a production rule in the grammar given one
|
58
|
+
# Add a production rule in the grammar given one
|
46
59
|
# key-value pair of the form: String => Array.
|
47
|
-
# Where the key is the name of the non-terminal appearing in the
|
48
|
-
# left side of the rule.
|
60
|
+
# Where the key is the name of the non-terminal appearing in the
|
61
|
+
# left side of the rule.
|
49
62
|
# The value, an Array, is a sequence of grammar symbol names.
|
50
63
|
# The rule is created and inserted in the grammar.
|
51
|
-
#
|
52
|
-
# builder.add_production('A' => ['a', 'A', 'c'])
|
53
|
-
#
|
64
|
+
# @example Equivalent call syntaxes
|
65
|
+
# builder.add_production('A' => ['a', 'A', 'c'])
|
66
|
+
# builder.rule('A' => ['a', 'A', 'c']) # 'rule' is a synonym
|
67
|
+
# builder.rule('A' => %w(a A c)) # Use %w syntax for Array of String
|
68
|
+
# builder.rule 'A' => %w(a A c) # Call parentheses are optional
|
69
|
+
# @param aProductionRepr [Hash{String, Array<String>}] A Hash-based representation
|
70
|
+
# of a production.
|
71
|
+
# @return [void]
|
54
72
|
def add_production(aProductionRepr)
|
55
73
|
aProductionRepr.each_pair do |(lhs_name, rhs_repr)|
|
56
74
|
lhs = get_nonterminal(lhs_name)
|
@@ -69,26 +87,37 @@ module Rley # This module is used as a namespace
|
|
69
87
|
|
70
88
|
# Given the grammar symbols and productions added to the builder,
|
71
89
|
# build the resulting grammar (if not yet done).
|
90
|
+
# @return [Grammar] the created grammar object.
|
72
91
|
def grammar()
|
73
92
|
unless @grammar
|
74
93
|
raise StandardError, 'No symbol found for grammar' if symbols.empty?
|
75
94
|
if productions.empty?
|
76
95
|
raise StandardError, 'No production found for grammar'
|
77
96
|
end
|
78
|
-
|
79
|
-
# Check that each
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
97
|
+
|
98
|
+
# Check that each terminal appears at least in a rhs of a production
|
99
|
+
all_terminals = symbols.values.select do |a_symb|
|
100
|
+
a_symb.kind_of?(Terminal)
|
101
|
+
end
|
102
|
+
in_use = Set.new
|
103
|
+
productions.each do |prod|
|
104
|
+
prod.rhs.members.each do |symb|
|
105
|
+
in_use << symb if symb.kind_of?(Syntax::Terminal)
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
unused = all_terminals.reject { |a_term| in_use.include?(a_term) }
|
110
|
+
unless unused.empty?
|
111
|
+
suffix = "#{unused.map(&:name).join(', ')}."
|
112
|
+
raise StandardError, 'Useless terminal symbol(s): ' + suffix
|
84
113
|
end
|
85
114
|
|
86
115
|
@grammar = Grammar.new(productions.dup)
|
87
116
|
end
|
88
|
-
|
117
|
+
|
89
118
|
return @grammar
|
90
119
|
end
|
91
|
-
|
120
|
+
|
92
121
|
alias rule add_production
|
93
122
|
|
94
123
|
private
|
@@ -125,7 +154,7 @@ module Rley # This module is used as a namespace
|
|
125
154
|
|
126
155
|
return a_symbol
|
127
156
|
end
|
128
|
-
|
157
|
+
|
129
158
|
# Retrieve the non-terminal symbol with given name.
|
130
159
|
# If it doesn't exist yet, then it is created on the fly.
|
131
160
|
# @param aSymbolName [String] the name of the grammar symbol to retrieve
|
@@ -136,7 +165,7 @@ module Rley # This module is used as a namespace
|
|
136
165
|
end
|
137
166
|
return symbols[aSymbolName]
|
138
167
|
end
|
139
|
-
|
168
|
+
|
140
169
|
end # class
|
141
170
|
end # module
|
142
171
|
end # module
|
@@ -2,28 +2,37 @@ module Rley # This module is used as a namespace
|
|
2
2
|
module Syntax # This module is used as a namespace
|
3
3
|
# Abstract class for grammar symbols.
|
4
4
|
# A grammar symbol is an element that appears in grammar rules.
|
5
|
-
class GrmSymbol
|
5
|
+
class GrmSymbol
|
6
6
|
# The name of the grammar symbol
|
7
7
|
attr_reader(:name)
|
8
8
|
|
9
|
+
# An indicator that tells whether the grammar symbol can generate a
|
10
|
+
# non-empty string of terminals.
|
11
|
+
attr_writer(:generative)
|
12
|
+
|
9
13
|
# Constructor.
|
10
14
|
# aName [String] The name of the grammar symbol.
|
11
15
|
def initialize(aName)
|
12
16
|
@name = aName.dup
|
13
17
|
end
|
14
|
-
|
15
|
-
# Return true iff the symbol is a terminal
|
16
|
-
def terminal?()
|
17
|
-
# Default implementation to override if necessary
|
18
|
-
return false
|
19
|
-
end
|
20
|
-
|
18
|
+
|
21
19
|
# The String representation of the grammar symbol
|
22
20
|
# @return [String]
|
23
21
|
def to_s()
|
24
22
|
return name.to_s
|
25
23
|
end
|
26
|
-
|
24
|
+
|
25
|
+
# @return [Boolean] true iff the symbol is a terminal
|
26
|
+
def terminal?()
|
27
|
+
# Default implementation to override if necessary
|
28
|
+
return false
|
29
|
+
end
|
30
|
+
|
31
|
+
# @return [Boolean] true iff the symbol is generative.
|
32
|
+
def generative?()
|
33
|
+
return @generative
|
34
|
+
end
|
35
|
+
|
27
36
|
end # class
|
28
37
|
end # module
|
29
38
|
end # module
|
@@ -5,7 +5,16 @@ module Rley # This module is used as a namespace
|
|
5
5
|
# A non-terminal symbol (sometimes called a syntactic variable) represents
|
6
6
|
# a composition of terminal or non-terminal symbols
|
7
7
|
class NonTerminal < GrmSymbol
|
8
|
+
# A non-terminal symbol is nullable if it can match an empty string.
|
8
9
|
attr_writer(:nullable)
|
10
|
+
|
11
|
+
# A non-terminal symbol is undefined if no production rule in the grammar
|
12
|
+
# has that non-terminal symbol in its left-hand side.
|
13
|
+
attr_writer(:undefined)
|
14
|
+
|
15
|
+
# A non-terminal symbol is unreachable if it cannot be reached (derived)
|
16
|
+
# from the start symbol.
|
17
|
+
attr_writer(:unreachable)
|
9
18
|
|
10
19
|
# Constructor.
|
11
20
|
# @param aName [String] The name of the grammar symbol.
|
@@ -21,6 +30,18 @@ module Rley # This module is used as a namespace
|
|
21
30
|
def nullable?()
|
22
31
|
return @nullable
|
23
32
|
end
|
33
|
+
|
34
|
+
# @return [false/true] Return true if the symbol doesn't appear
|
35
|
+
# on the left-hand side of any production rule.
|
36
|
+
def undefined?()
|
37
|
+
return @undefined
|
38
|
+
end
|
39
|
+
|
40
|
+
# @return [false/true] Return true if the symbol cannot be derived
|
41
|
+
# from the start symbol.
|
42
|
+
def unreachable?()
|
43
|
+
return @unreachable
|
44
|
+
end
|
24
45
|
end # class
|
25
46
|
end # module
|
26
47
|
end # module
|
@@ -15,6 +15,10 @@ module Rley # This module is used as a namespace
|
|
15
15
|
|
16
16
|
# The left-hand side of the rule. It must be a non-terminal symbol
|
17
17
|
attr_reader(:lhs)
|
18
|
+
|
19
|
+
# A production is generative when all of its rhs members are generative (that as, they
|
20
|
+
# can each generate/derive a non-empty string of terminals).
|
21
|
+
attr_writer(:generative)
|
18
22
|
|
19
23
|
# Provide common alternate names to lhs and rhs accessors
|
20
24
|
|
@@ -31,6 +35,14 @@ module Rley # This module is used as a namespace
|
|
31
35
|
def empty?()
|
32
36
|
return rhs.empty?
|
33
37
|
end
|
38
|
+
|
39
|
+
# Return true iff the production is generative
|
40
|
+
def generative?()
|
41
|
+
if @generative.nil?
|
42
|
+
end
|
43
|
+
|
44
|
+
return @generative
|
45
|
+
end
|
34
46
|
|
35
47
|
private
|
36
48
|
|
data/lib/rley/syntax/terminal.rb
CHANGED
@@ -154,6 +154,102 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
154
154
|
end
|
155
155
|
end
|
156
156
|
end # context
|
157
|
+
|
158
|
+
context 'Provided services:' do
|
159
|
+
let(:problematic_grammar) do
|
160
|
+
# Based on grammar example in book
|
161
|
+
# C. Fisher, R. LeBlanc, "Crafting a Compiler"; page 98
|
162
|
+
builder = Rley::Syntax::GrammarBuilder.new
|
163
|
+
builder.add_terminals('a', 'b', 'c')
|
164
|
+
builder.add_production('S' => 'A')
|
165
|
+
builder.add_production('S' => 'B')
|
166
|
+
builder.add_production('A' => 'a')
|
167
|
+
# There is no edge between .B and B => B . b => non-generative
|
168
|
+
builder.add_production('B' => %w(B b))
|
169
|
+
|
170
|
+
# Non-terminal symbol C is unreachable
|
171
|
+
builder.add_production('C' => 'c')
|
172
|
+
|
173
|
+
# And now build the grammar...
|
174
|
+
builder.grammar
|
175
|
+
end
|
176
|
+
|
177
|
+
it 'should provide depth-first traversal' do
|
178
|
+
result = []
|
179
|
+
subject.traverse_df(subject.start_vertex) do |vertex|
|
180
|
+
result << vertex.label
|
181
|
+
end
|
182
|
+
|
183
|
+
expected = [
|
184
|
+
'.S',
|
185
|
+
'S => . A',
|
186
|
+
'.A',
|
187
|
+
'A => . a A c',
|
188
|
+
'A => a . A c',
|
189
|
+
'A => a A . c',
|
190
|
+
'A => a A c .',
|
191
|
+
'A.',
|
192
|
+
'A => . b',
|
193
|
+
'A => b .',
|
194
|
+
'S.'
|
195
|
+
]
|
196
|
+
expect(result).to eq(expected)
|
197
|
+
end
|
198
|
+
|
199
|
+
it 'should perform a diagnosis of a correct grammar' do
|
200
|
+
expect { subject.diagnose }.not_to raise_error
|
201
|
+
grammar_abc.non_terminals.each do |nterm|
|
202
|
+
expect(nterm).not_to be_undefined
|
203
|
+
expect(nterm).not_to be_unreachable
|
204
|
+
end
|
205
|
+
end
|
206
|
+
|
207
|
+
it 'should detect when a non-terminal is unreachable' do
|
208
|
+
grammar = problematic_grammar
|
209
|
+
items = build_items_for_grammar(grammar)
|
210
|
+
|
211
|
+
graph = GrmFlowGraph.new(items)
|
212
|
+
expect { graph.diagnose }.not_to raise_error
|
213
|
+
grammar.non_terminals.each do |nterm|
|
214
|
+
expect(nterm).not_to be_undefined
|
215
|
+
end
|
216
|
+
|
217
|
+
unreachable = grammar.non_terminals.select do |nterm|
|
218
|
+
nterm.unreachable?
|
219
|
+
end
|
220
|
+
expect(unreachable.size).to eq(1)
|
221
|
+
expect(unreachable[0].name).to eq('C')
|
222
|
+
end
|
223
|
+
end # context
|
224
|
+
|
225
|
+
=begin
|
226
|
+
context 'Grammar without undefined symbols:' do
|
227
|
+
it 'should mark all its nonterminals as not undefined' do
|
228
|
+
nonterms = subject.non_terminals
|
229
|
+
nonterms.each do |nterm|
|
230
|
+
expect(nterm).not_to be_undefined
|
231
|
+
end
|
232
|
+
end
|
233
|
+
end # context
|
234
|
+
|
235
|
+
context 'Grammar with undefined symbols:' do
|
236
|
+
subject do
|
237
|
+
productions = [prod_S, prod_A1, prod_A2, prod_A3]
|
238
|
+
Grammar.new(productions)
|
239
|
+
end
|
240
|
+
|
241
|
+
it 'should detect its nonterminals that are undefined' do
|
242
|
+
nonterms = subject.non_terminals
|
243
|
+
culprits = nonterms.select do |nterm|
|
244
|
+
nterm.undefined?
|
245
|
+
end
|
246
|
+
|
247
|
+
expect(culprits.size).to eq(1)
|
248
|
+
expect(culprits[0]).to eq(nt_C)
|
249
|
+
end
|
250
|
+
end # context
|
251
|
+
=end
|
252
|
+
|
157
253
|
end # describe
|
158
254
|
end # module
|
159
255
|
end # module
|
@@ -9,7 +9,7 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
9
9
|
context 'Initialization without argument:' do
|
10
10
|
it 'could be created without argument' do
|
11
11
|
expect { GrammarBuilder.new }.not_to raise_error
|
12
|
-
end
|
12
|
+
end
|
13
13
|
|
14
14
|
it 'should have no grammar symbols at start' do
|
15
15
|
expect(subject.symbols).to be_empty
|
@@ -19,12 +19,12 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
19
19
|
expect(subject.productions).to be_empty
|
20
20
|
end
|
21
21
|
end # context
|
22
|
-
|
22
|
+
|
23
23
|
context 'Initialization with argument:' do
|
24
24
|
it 'could be created with a block argument' do
|
25
25
|
expect do GrammarBuilder.new { nil }
|
26
26
|
end.not_to raise_error
|
27
|
-
end
|
27
|
+
end
|
28
28
|
|
29
29
|
it 'could have grammar symbols from block argument' do
|
30
30
|
instance = GrammarBuilder.new do
|
@@ -36,7 +36,7 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
36
36
|
it 'should have no productions at start' do
|
37
37
|
expect(subject.productions).to be_empty
|
38
38
|
end
|
39
|
-
end # context
|
39
|
+
end # context
|
40
40
|
|
41
41
|
context 'Adding symbols:' do
|
42
42
|
it 'should build terminals from their names' do
|
@@ -122,8 +122,8 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
122
122
|
expect(subject.grammar).to be_kind_of(Grammar)
|
123
123
|
grm = subject.grammar
|
124
124
|
expect(grm.rules).to eq(subject.productions)
|
125
|
-
|
126
|
-
# Invoking the factory method again should return
|
125
|
+
|
126
|
+
# Invoking the factory method again should return
|
127
127
|
# the same grammar object
|
128
128
|
second_time = subject.grammar
|
129
129
|
expect(second_time).to eq(grm)
|
@@ -144,13 +144,18 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
144
144
|
expect { instance.grammar }.to raise_error(err, msg)
|
145
145
|
end
|
146
146
|
|
147
|
-
it 'should complain
|
148
|
-
|
149
|
-
|
150
|
-
|
147
|
+
it 'should complain if one or more terminals are useless' do
|
148
|
+
# Add one useless terminal symbol
|
149
|
+
subject.add_terminals('d')
|
150
|
+
|
151
151
|
err = StandardError
|
152
|
-
msg = '
|
153
|
-
expect {
|
152
|
+
msg = 'Useless terminal symbol(s): d.'
|
153
|
+
expect { subject.grammar }.to raise_error(err, msg)
|
154
|
+
|
155
|
+
# Add another useless terminal
|
156
|
+
subject.add_terminals('e')
|
157
|
+
msg = 'Useless terminal symbol(s): d, e.'
|
158
|
+
expect { subject.grammar }.to raise_error(err, msg)
|
154
159
|
end
|
155
160
|
|
156
161
|
it 'should build a grammar with nullable nonterminals' do
|
@@ -70,12 +70,16 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
70
70
|
# A ::= "b".
|
71
71
|
let(:nt_S) { NonTerminal.new('S') }
|
72
72
|
let(:nt_A) { NonTerminal.new('A') }
|
73
|
+
let(:nt_B) { NonTerminal.new('B') }
|
74
|
+
let(:nt_C) { NonTerminal.new('C') }
|
75
|
+
let(:nt_D) { NonTerminal.new('D') }
|
73
76
|
let(:a_) { VerbatimSymbol.new('a') }
|
74
77
|
let(:b_) { VerbatimSymbol.new('b') }
|
75
78
|
let(:c_) { VerbatimSymbol.new('c') }
|
76
79
|
let(:prod_S) { Production.new(nt_S, [nt_A]) }
|
77
80
|
let(:prod_A1) { Production.new(nt_A, [a_, nt_A, c_]) }
|
78
81
|
let(:prod_A2) { Production.new(nt_A, [b_]) }
|
82
|
+
let(:prod_A3) {Production.new(nt_A, [c_, nt_C] ) }
|
79
83
|
|
80
84
|
=begin
|
81
85
|
# Non-terminals that specify the lexicon of the language
|
@@ -137,16 +141,16 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
137
141
|
it 'should know all its symbols' do
|
138
142
|
expect(subject.symbols).to eq([nt_S, nt_A, a_, c_, b_])
|
139
143
|
end
|
140
|
-
|
144
|
+
|
141
145
|
it 'should know all its non-terminal symbols' do
|
142
|
-
expect(subject.non_terminals).to eq([nt_S, nt_A])
|
146
|
+
expect(subject.non_terminals).to eq([nt_S, nt_A])
|
143
147
|
end
|
144
|
-
|
148
|
+
|
145
149
|
it 'should know its start production' do
|
146
150
|
expect(subject.start_production).to eq(prod_S)
|
147
151
|
end
|
148
152
|
end # context
|
149
|
-
|
153
|
+
|
150
154
|
context 'Provided services:' do
|
151
155
|
it 'should retrieve its symbols from their name' do
|
152
156
|
expect(subject.name2symbol['S']).to eq(nt_S)
|
@@ -156,9 +160,64 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
156
160
|
expect(subject.name2symbol['c']).to eq(c_)
|
157
161
|
end
|
158
162
|
end # context
|
159
|
-
|
163
|
+
|
164
|
+
context 'Grammar diagnosis:' do
|
165
|
+
it 'should mark any non-terminal that has no production' do
|
166
|
+
# S ::= A.
|
167
|
+
# S ::= B.
|
168
|
+
# A ::= "a" .
|
169
|
+
# B ::= C "b". # C doesn't appear on lhs of a rule
|
170
|
+
prod_S1 = Rley::Syntax::Production.new(nt_S, [nt_A])
|
171
|
+
prod_S2 = Rley::Syntax::Production.new(nt_S, [nt_B])
|
172
|
+
prod_A = Rley::Syntax::Production.new(nt_A, [a_])
|
173
|
+
prod_B = Rley::Syntax::Production.new(nt_B, [nt_C, b_]) # C is undefined
|
174
|
+
instance = Grammar.new([prod_S1, prod_S2, prod_A, prod_B])
|
175
|
+
undefineds = instance.non_terminals.select(&:undefined?)
|
176
|
+
expect(undefineds.size).to eq(1)
|
177
|
+
expect(undefineds.first).to eq(nt_C)
|
178
|
+
end
|
179
|
+
|
180
|
+
it 'should mark any non-terminal as generative or not' do
|
181
|
+
# S ::= A.
|
182
|
+
# S ::= B.
|
183
|
+
# A ::= "a" .
|
184
|
+
# B ::= C "b". # C doesn't appear on lhs of a rule
|
185
|
+
prod_S1 = Rley::Syntax::Production.new(nt_S, [nt_A])
|
186
|
+
prod_S2 = Rley::Syntax::Production.new(nt_S, [nt_B])
|
187
|
+
prod_A = Rley::Syntax::Production.new(nt_A, [a_])
|
188
|
+
prod_B = Rley::Syntax::Production.new(nt_B, [nt_C, b_]) # C is undefined
|
189
|
+
instance = Grammar.new([prod_S1, prod_S2, prod_A, prod_B])
|
190
|
+
partitioning = instance.non_terminals.partition(&:generative?)
|
191
|
+
expect(partitioning[0].size).to eq(2)
|
192
|
+
expect(partitioning[0]).to eq([nt_S, nt_A])
|
193
|
+
expect(partitioning[1]).to eq([nt_B, nt_C])
|
194
|
+
end
|
195
|
+
|
196
|
+
it "should do a diagnosis even for 'loopy' grammars" do
|
197
|
+
# 'S' => 'A'
|
198
|
+
# 'S' => 'B'
|
199
|
+
# 'A' => 'a'
|
200
|
+
# 'B' => 'C'
|
201
|
+
# 'C' => 'D'
|
202
|
+
# 'D' => 'B'
|
203
|
+
prod_S1 = Rley::Syntax::Production.new(nt_S, [nt_A])
|
204
|
+
prod_S2 = Rley::Syntax::Production.new(nt_S, [nt_B])
|
205
|
+
prod_A = Rley::Syntax::Production.new(nt_A, [a_])
|
206
|
+
prod_B = Rley::Syntax::Production.new(nt_B, [nt_C])
|
207
|
+
prod_C = Rley::Syntax::Production.new(nt_C, [nt_D])
|
208
|
+
prod_D = Rley::Syntax::Production.new(nt_D, [nt_B])
|
209
|
+
instance = Grammar.new([prod_S1, prod_S2, prod_A, prod_B, prod_C, prod_D])
|
210
|
+
partitioning = instance.non_terminals.partition(&:generative?)
|
211
|
+
expect(partitioning[0].size).to eq(2)
|
212
|
+
expect(partitioning[0]).to eq([nt_S, nt_A])
|
213
|
+
expect(partitioning[1]).to eq([nt_B, nt_C, nt_D])
|
214
|
+
|
215
|
+
undefined = instance.non_terminals.select(&:undefined?)
|
216
|
+
expect(undefined).to be_empty
|
217
|
+
end
|
218
|
+
end # context
|
219
|
+
|
160
220
|
context 'Non-nullable grammar:' do
|
161
|
-
|
162
221
|
it 'should mark all its nonterminals as non-nullable' do
|
163
222
|
nonterms = subject.non_terminals
|
164
223
|
nonterms.each do |nterm|
|
@@ -166,14 +225,14 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
166
225
|
end
|
167
226
|
end
|
168
227
|
end # context
|
169
|
-
|
228
|
+
|
170
229
|
context 'Nullable grammars:' do
|
171
230
|
subject do
|
172
|
-
|
173
|
-
productions = [prod_S, prod_A1, prod_A2,
|
231
|
+
prod_A4 = Production.new(nt_A, [])
|
232
|
+
productions = [prod_S, prod_A1, prod_A2, prod_A4]
|
174
233
|
Grammar.new(productions)
|
175
234
|
end
|
176
|
-
|
235
|
+
|
177
236
|
it 'should mark its nullable nonterminals' do
|
178
237
|
# In the default grammar, all nonterminals are nullable
|
179
238
|
nonterms = subject.non_terminals
|
@@ -181,8 +240,8 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
181
240
|
expect(nterm).to be_nullable
|
182
241
|
end
|
183
242
|
end
|
184
|
-
|
185
243
|
end # context
|
244
|
+
|
186
245
|
end # describe
|
187
246
|
end # module
|
188
247
|
end # module
|
@@ -32,6 +32,22 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
32
32
|
subject.nullable = false
|
33
33
|
expect(subject).not_to be_nullable
|
34
34
|
end
|
35
|
+
|
36
|
+
it 'should know whether it is defined' do
|
37
|
+
expect(subject.undefined?).to be_nil
|
38
|
+
subject.undefined = true
|
39
|
+
expect(subject).to be_undefined
|
40
|
+
subject.undefined = false
|
41
|
+
expect(subject).not_to be_undefined
|
42
|
+
end
|
43
|
+
|
44
|
+
it 'should know whether it is generative' do
|
45
|
+
expect(subject.generative?).to be_nil
|
46
|
+
subject.generative = true
|
47
|
+
expect(subject).to be_generative
|
48
|
+
subject.generative = false
|
49
|
+
expect(subject).not_to be_generative
|
50
|
+
end
|
35
51
|
end # context
|
36
52
|
end # describe
|
37
53
|
end # module
|
@@ -25,6 +25,10 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
25
25
|
it "should know that isn't nullable" do
|
26
26
|
expect(subject).not_to be_nullable
|
27
27
|
end
|
28
|
+
|
29
|
+
it "should know that it is generative" do
|
30
|
+
expect(subject).to be_generative
|
31
|
+
end
|
28
32
|
end # context
|
29
33
|
end # describe
|
30
34
|
end # module
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rley
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.4.
|
4
|
+
version: 0.4.06
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dimitri Geshef
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-05-
|
11
|
+
date: 2017-05-25 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|