rley 0.3.01 → 0.3.04
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +17 -3
- data/Gemfile +1 -1
- data/examples/parsers/parsing_groucho.rb +1 -3
- data/lib/rley/constants.rb +1 -1
- data/lib/rley/parser/gfg_parsing.rb +5 -179
- data/lib/rley/parser/parse_forest_builder.rb +62 -77
- data/lib/rley/parser/parse_walker_factory.rb +26 -17
- data/lib/rley.rb +1 -0
- data/spec/rley/gfg/end_vertex_spec.rb +1 -1
- data/spec/rley/gfg/shortcut_edge_spec.rb +6 -6
- data/spec/rley/gfg/start_vertex_spec.rb +1 -1
- data/spec/rley/parse_forest_visitor_spec.rb +1 -1
- data/spec/rley/parse_tree_visitor_spec.rb +1 -1
- data/spec/rley/parser/gfg_parsing_spec.rb +3 -6
- data/spec/rley/parser/parse_entry_set_spec.rb +19 -19
- data/spec/rley/parser/parse_entry_spec.rb +6 -6
- data/spec/rley/parser/parse_forest_builder_spec.rb +374 -79
- data/spec/rley/parser/parse_forest_factory_spec.rb +1 -1
- data/spec/rley/parser/parse_walker_factory_spec.rb +60 -40
- data/spec/rley/parser/state_set_spec.rb +8 -8
- data/spec/rley/support/grammar_L0_helper.rb +81 -0
- metadata +11 -13
- data/spec/rley/sppf/antecedence_graph.rb +0 -87
- data/spec/rley/sppf/forest_representation.rb +0 -136
- data/spec/rley/sppf/gfg_representation.rb +0 -111
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4c5e18168fe007f5fb819f2df4f1f1944bae971d
|
4
|
+
data.tar.gz: ccb8abe6ea9d8d135a0fc896ebfa4dbf5fe6f674
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d2eab437aba2c78676529651cca3a0a1fd9cf0b23f14f7003bf631add424a50295a14ddc1065e63db907c3003a415ae1c20427a1d6594243f5724a25203f248b
|
7
|
+
data.tar.gz: 4fc6fa566e1f8c847dd34a862e27d73d7dc758bfcd557e84343b41951f6f52dd2993d0be4d92c60ec40241887065021ca006ef4b96dd1e609264b9976da6a433
|
data/CHANGELOG.md
CHANGED
@@ -1,7 +1,21 @@
|
|
1
|
+
### 0.3.04 / 2016-11-01
|
2
|
+
* [FIX] File `state_set_spec.rb` : Failing mock tests. Reverted `expect` to `allow` expectations.
|
3
|
+
|
4
|
+
### 0.3.03 / 2016-11-01
|
5
|
+
* [FIX] File `parse_forest_factory_spec.rb`: Commented out reference to local files.
|
6
|
+
* [FIX] Files `*_spec.rb` : Replaced most `allow` expectations by `expect`
|
7
|
+
* [CHANGE] Updated development dependency upon RSpec version 3.5
|
8
|
+
|
9
|
+
### 0.3.02 / 2016-11-01
|
10
|
+
* [FIX] Method `ParseWalkerFactory#visit_entry` didn't generate events for entries with start vertex. This caused issue in parse forest generation.
|
11
|
+
* [NEW] File `parse_forest_builder_spec.rb`: added more parse forest building tests.
|
12
|
+
* [CHANGE] Method `ParseWalkerFactory#antecedent_of`. Code refactoring.
|
13
|
+
* [CHANGE] Method `ParseForestBuilder#receive_event`. Code refactoring.
|
14
|
+
|
1
15
|
### 0.3.01 / 2016-10-23
|
2
|
-
* [
|
3
|
-
* [
|
4
|
-
* [
|
16
|
+
* [CHANGE] Method `ParseWalkerFactory#build_walker`. Signature change in order prevent direct dependency on `GFGParsing` class.
|
17
|
+
* [CHANGE] Class `ParseForestBuilder`. Removal of `parsing` attribute, no direct dependency on `GFGParsing` class.
|
18
|
+
* [CHANGE] Internal changed to `ParseForestFactory` class.
|
5
19
|
|
6
20
|
### 0.3.00 / 2016-10-23
|
7
21
|
* [CHANGE] Many new classes. The gem bundles a second parser that copes with ambiguous grammars.
|
data/Gemfile
CHANGED
@@ -1,7 +1,6 @@
|
|
1
1
|
# Purpose: to demonstrate how to parse an emblematic ambiguous sentence
|
2
2
|
# Based on example found at: http://www.nltk.org/book_1ed/ch08.html
|
3
3
|
|
4
|
-
require 'pp'
|
5
4
|
require 'rley' # Load the gem
|
6
5
|
|
7
6
|
# Steps to render a parse tree (of a valid parsed input):
|
@@ -10,8 +9,7 @@ require 'rley' # Load the gem
|
|
10
9
|
# 3. Create a parser for that grammar
|
11
10
|
# 4. Tokenize the input
|
12
11
|
# 5. Let the parser process the input
|
13
|
-
# 6. Generate a parse
|
14
|
-
# 7. Render the parse tree (in JSON)
|
12
|
+
# 6. Generate a parse forest from the parse result
|
15
13
|
|
16
14
|
########################################
|
17
15
|
# Step 1. Define a grammar for a micro English-like language
|
data/lib/rley/constants.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
require_relative 'gfg_chart'
|
2
2
|
require_relative 'parse_entry_tracker'
|
3
|
-
require_relative '
|
3
|
+
require_relative 'parse_forest_factory'
|
4
4
|
|
5
5
|
|
6
6
|
module Rley # This module is used as a namespace
|
@@ -128,33 +128,14 @@ module Rley # This module is used as a namespace
|
|
128
128
|
end
|
129
129
|
|
130
130
|
|
131
|
-
|
131
|
+
|
132
132
|
# Factory method. Builds a ParseForest from the parse result.
|
133
|
-
# @return [ParseForest]
|
134
|
-
# Algorithm:
|
135
|
-
# set state_set_index = index of last entry set in chart
|
136
|
-
# Search the completed parse state that corresponds to the full parse
|
133
|
+
# @return [ParseForest]
|
137
134
|
def parse_forest()
|
138
|
-
|
139
|
-
builder = forest_builder(state_tracker.state_set_index)
|
140
|
-
|
141
|
-
loop do
|
142
|
-
state_tracker.symbol_on_left
|
143
|
-
# match_symbol = state_tracker.symbol_on_left
|
144
|
-
# puts '--------------------'
|
145
|
-
# puts "Active parse state: #{state_tracker.parse_state}"
|
146
|
-
# puts "Matching symbol: #{match_symbol}"
|
147
|
-
# puts 'Parse tree:'
|
148
|
-
# puts builder.root.to_string(0)
|
149
|
-
|
150
|
-
# Place the symbol on left of the dot in the parse tree
|
151
|
-
done = insert_matched_symbol(state_tracker, builder)
|
152
|
-
break if done
|
153
|
-
end
|
135
|
+
factory = ParseForestFactory.new(self)
|
154
136
|
|
155
|
-
return
|
137
|
+
return factory.build_parse_forest
|
156
138
|
end
|
157
|
-
=end
|
158
139
|
|
159
140
|
# Retrieve the very first parse entry added to the chart.
|
160
141
|
# This entry corresponds to the start vertex of the GF graph
|
@@ -171,108 +152,7 @@ module Rley # This module is used as a namespace
|
|
171
152
|
return chart.accepting_entry
|
172
153
|
end
|
173
154
|
|
174
|
-
=begin
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
# This method is called when a parse entry for chart entry at position
|
179
|
-
# 'pos' expects a terminal as next symbol.
|
180
|
-
# If the input token matches the terminal symbol then:
|
181
|
-
# Retrieve all parse entrys for chart entry at 'aPosition'
|
182
|
-
# that have the given terminal as next symbol.
|
183
|
-
# For each s of the above entrys, push to chart entry aPosition + 1
|
184
|
-
# a new entry like: <next dotted rule, s.origin, aPosition + 1>
|
185
|
-
# In other words, we place the dotted rules in the next entry set
|
186
|
-
# such that the dot appears after terminal.
|
187
|
-
# @param aTerminal [Terminal] a terminal symbol that
|
188
|
-
# immediately follows a dot
|
189
|
-
# @param aPosition [Fixnum] position in the input token sequence.
|
190
|
-
# @param nextMapping [Proc or Lambda] code to evaluate in order to
|
191
|
-
# determine the "next" dotted rule for a given one.
|
192
|
-
def scanning(aTerminal, aPosition, &nextMapping)
|
193
|
-
curr_token = tokens[aPosition]
|
194
|
-
return unless curr_token.terminal == aTerminal
|
195
|
-
|
196
|
-
entrys = entrys_expecting(aTerminal, aPosition, false)
|
197
|
-
entrys.each do |s|
|
198
|
-
next_item = nextMapping.call(s.dotted_rule)
|
199
|
-
push_entry(next_item, s.origin, aPosition + 1, :scanning)
|
200
|
-
end
|
201
|
-
end
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
# This method is called when a parse entry at chart entry reaches the end
|
206
|
-
# of a production.
|
207
|
-
# For every entry in chart[aPosition] that is complete
|
208
|
-
# (i.e. of the form: { dotted_rule: X -> γ •, origin: j}),
|
209
|
-
# Find entrys s in chart[j] of the form
|
210
|
-
# {dotted_rule: Y -> α • X β, origin: i}
|
211
|
-
# In other words, rules that predicted the non-terminal X.
|
212
|
-
# For each s, add to chart[aPosition] a entry of the form
|
213
|
-
# { dotted_rule: Y → α X • β, origin: i})
|
214
|
-
def completion(aState, aPosition, &nextMapping)
|
215
|
-
curr_origin = aState.origin
|
216
|
-
curr_lhs = aState.dotted_rule.lhs
|
217
|
-
entrys = entrys_expecting(curr_lhs, curr_origin, false)
|
218
|
-
entrys.each do |s|
|
219
|
-
next_item = nextMapping.call(s.dotted_rule)
|
220
|
-
push_entry(next_item, s.origin, aPosition, :completion)
|
221
|
-
end
|
222
|
-
end
|
223
|
-
|
224
|
-
|
225
|
-
# The list of ParseState from the chart entry at given position
|
226
|
-
# that expect the given terminal
|
227
|
-
def entrys_expecting(aTerminal, aPosition, toSort)
|
228
|
-
expecting = chart[aPosition].entrys_expecting(aTerminal)
|
229
|
-
return expecting if !toSort || expecting.size < 2
|
230
|
-
|
231
|
-
# Put predicted entrys ahead
|
232
|
-
(predicted, others) = expecting.partition(&:predicted?)
|
233
|
-
|
234
|
-
# Sort entry in reverse order of their origin value
|
235
|
-
[predicted, others].each do |set|
|
236
|
-
set.sort! { |a, b| b.origin <=> a.origin }
|
237
|
-
end
|
238
|
-
|
239
|
-
return predicted + others
|
240
|
-
end
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
# Insert in a parse tree the symbol on the left of the
|
246
|
-
# current dotted rule.
|
247
|
-
def insert_matched_symbol(aStateTracker, aBuilder)
|
248
|
-
# Retrieve symbol before the dot in active parse entry
|
249
|
-
match_symbol = aStateTracker.symbol_on_left
|
250
155
|
|
251
|
-
# Retrieve tree node being processed
|
252
|
-
tree_node = aBuilder.current_node
|
253
|
-
|
254
|
-
done = false
|
255
|
-
case [match_symbol.class, tree_node.class]
|
256
|
-
when [Syntax::Terminal, PTree::TerminalNode]
|
257
|
-
aStateTracker.to_prev_entry_set
|
258
|
-
predecessor_entry_terminal(match_symbol, aStateTracker, aBuilder)
|
259
|
-
|
260
|
-
when [NilClass, Rley::PTree::TerminalNode],
|
261
|
-
[NilClass, PTree::NonTerminalNode]
|
262
|
-
# Retrieve all parse entrys that expect the lhs
|
263
|
-
new_entrys = entrys_expecting_lhs(aStateTracker, aBuilder)
|
264
|
-
done = true if new_entrys.empty?
|
265
|
-
# Select an unused parse entry
|
266
|
-
aStateTracker.select_entry(new_entrys)
|
267
|
-
|
268
|
-
when [Syntax::NonTerminal, PTree::NonTerminalNode]
|
269
|
-
completed_entry_for(match_symbol, aStateTracker, aBuilder)
|
270
|
-
end
|
271
|
-
|
272
|
-
done ||= aBuilder.root == aBuilder.current_node
|
273
|
-
return done
|
274
|
-
end
|
275
|
-
=end
|
276
156
|
private
|
277
157
|
|
278
158
|
# Raise an exception to indicate a syntax error.
|
@@ -322,61 +202,7 @@ module Rley # This module is used as a namespace
|
|
322
202
|
|
323
203
|
return instance
|
324
204
|
end
|
325
|
-
=begin
|
326
|
-
|
327
|
-
# A terminal symbol is on the left of dot.
|
328
|
-
# Go to the predecessor entry for the given terminal
|
329
|
-
def predecessor_entry_terminal(_a_symb, aStateTracker, aTreeBuilder)
|
330
|
-
index = aStateTracker.entry_set_index
|
331
|
-
aTreeBuilder.current_node.range = { low: index, high: index + 1 }
|
332
|
-
link_node_to_token(aTreeBuilder, aStateTracker.entry_set_index)
|
333
|
-
unless aTreeBuilder.current_node.is_a?(PTree::TerminalNode)
|
334
|
-
fail StandardError, 'Expected terminal node'
|
335
|
-
end
|
336
|
-
aTreeBuilder.move_back
|
337
|
-
entry_set = chart[aStateTracker.entry_set_index]
|
338
|
-
previous_entry = entry_set.predecessor_entry(aStateTracker.parse_entry)
|
339
|
-
aStateTracker.parse_entry = previous_entry
|
340
|
-
end
|
341
|
-
|
342
|
-
|
343
|
-
# Retrieve a complete entry with given terminal symbol as lhs.
|
344
|
-
def completed_entry_for(a_symb, aTracker, aTreeBuilder)
|
345
|
-
new_entrys = chart[aTracker.entry_set_index].entrys_rewriting(a_symb)
|
346
|
-
aTracker.select_entry(new_entrys)
|
347
|
-
aTreeBuilder.range = { high: aTracker.entry_set_index }
|
348
|
-
aTreeBuilder.use_complete_entry(aTracker.parse_entry)
|
349
|
-
link_node_to_token(aTreeBuilder, aTracker.entry_set_index - 1)
|
350
|
-
aTreeBuilder.move_down
|
351
|
-
end
|
352
|
-
|
353
|
-
|
354
|
-
def entrys_expecting_lhs(aStateTracker, aTreeBuilder)
|
355
|
-
lhs = aStateTracker.curr_dotted_item.production.lhs
|
356
|
-
new_entrys = entrys_expecting(lhs, aStateTracker.entry_set_index, true)
|
357
|
-
new_entrys.reject! { |st| st == aStateTracker.parse_entry }
|
358
|
-
# Filter out parse entrys with incompatible range
|
359
|
-
if new_entrys.size > 1
|
360
|
-
previous_node = aTreeBuilder.current_path[-3]
|
361
|
-
new_entrys.select! do |parse_entry|
|
362
|
-
parse_entry.dotted_rule.production.lhs == previous_node.symbol
|
363
|
-
end
|
364
|
-
end
|
365
|
-
|
366
|
-
return new_entrys
|
367
|
-
end
|
368
|
-
|
369
|
-
# If the current node is a terminal node
|
370
|
-
# then link the token to that node
|
371
|
-
def link_node_to_token(aTreeBuilder, aStateSetIndex)
|
372
|
-
return unless aTreeBuilder.current_node.is_a?(PTree::TerminalNode)
|
373
|
-
return unless aTreeBuilder.current_node.token.nil?
|
374
|
-
|
375
|
-
a_node = aTreeBuilder.current_node
|
376
|
-
a_node.token = tokens[aStateSetIndex] unless a_node.token
|
377
|
-
end
|
378
205
|
|
379
|
-
=end
|
380
206
|
end # class
|
381
207
|
end # module
|
382
208
|
end # module
|
@@ -1,3 +1,8 @@
|
|
1
|
+
require_relative '../syntax/terminal'
|
2
|
+
require_relative '../syntax/non_terminal'
|
3
|
+
require_relative '../gfg/end_vertex'
|
4
|
+
require_relative '../gfg/item_vertex'
|
5
|
+
require_relative '../gfg/start_vertex'
|
1
6
|
require_relative '../sppf/epsilon_node'
|
2
7
|
require_relative '../sppf/non_terminal_node'
|
3
8
|
require_relative '../sppf/alternative_node'
|
@@ -9,7 +14,7 @@ module Rley # This module is used as a namespace
|
|
9
14
|
# (say, a parse forest) from simpler objects (terminal and non-terminal
|
10
15
|
# nodes) and using a step by step approach.
|
11
16
|
class ParseForestBuilder
|
12
|
-
# The sequence of input tokens
|
17
|
+
# The sequence of input tokens
|
13
18
|
attr_reader(:tokens)
|
14
19
|
|
15
20
|
# Link to forest object
|
@@ -18,6 +23,9 @@ module Rley # This module is used as a namespace
|
|
18
23
|
# Link to current path
|
19
24
|
attr_reader(:curr_path)
|
20
25
|
|
26
|
+
# The last parse entry visited
|
27
|
+
attr_reader(:last_visitee)
|
28
|
+
|
21
29
|
# A hash with pairs of the form: visited parse entry => forest node
|
22
30
|
attr_reader(:entry2node)
|
23
31
|
|
@@ -34,18 +42,17 @@ module Rley # This module is used as a namespace
|
|
34
42
|
|
35
43
|
def receive_event(anEvent, anEntry, anIndex)
|
36
44
|
# puts "Event: #{anEvent} #{anEntry} #{anIndex}"
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
process_end_entry(anEvent, anEntry, anIndex)
|
46
|
-
else
|
47
|
-
fail NotImplementedError
|
45
|
+
if anEntry.dotted_entry?
|
46
|
+
process_item_entry(anEvent, anEntry, anIndex)
|
47
|
+
elsif anEntry.start_entry?
|
48
|
+
process_start_entry(anEvent, anEntry, anIndex)
|
49
|
+
elsif anEntry.end_entry?
|
50
|
+
process_end_entry(anEvent, anEntry, anIndex)
|
51
|
+
else
|
52
|
+
fail NotImplementedError
|
48
53
|
end
|
54
|
+
|
55
|
+
@last_visitee = anEntry
|
49
56
|
end
|
50
57
|
|
51
58
|
# Return the current_parent node
|
@@ -56,32 +63,20 @@ module Rley # This module is used as a namespace
|
|
56
63
|
private
|
57
64
|
|
58
65
|
def process_start_entry(anEvent, anEntry, anIndex)
|
59
|
-
self.curr_path.pop while curr_parent.kind_of?(SPPF::AlternativeNode)
|
60
66
|
self.curr_path.pop
|
61
67
|
end
|
62
68
|
|
63
69
|
def process_end_entry(anEvent, anEntry, anIndex)
|
64
70
|
case anEvent
|
65
71
|
when :visit
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
@entry2node[anEntry] = self.curr_parent
|
75
|
-
if anEntry.antecedents.size > 1
|
76
|
-
# Store current path for later backtracking
|
77
|
-
# puts "Store backtrack context #{anEntry}"
|
78
|
-
# puts "path [#{curr_path.join(', ')}]"
|
79
|
-
self.entry2path_to_alt[anEntry] = curr_path.dup
|
80
|
-
curr_parent.refinement = :or
|
81
|
-
|
82
|
-
create_alternative_node(anEntry.antecedents.first)
|
83
|
-
end
|
84
|
-
end
|
72
|
+
# create a node with the non-terminal
|
73
|
+
# with same right extent as curr_entry_set_index
|
74
|
+
# add the new node as first child of current_parent
|
75
|
+
# append the new node to the curr_path
|
76
|
+
range = { low: anEntry.origin, high: anIndex }
|
77
|
+
non_terminal = anEntry.vertex.non_terminal
|
78
|
+
create_non_terminal_node(anEntry, range, non_terminal)
|
79
|
+
@forest = create_forest(curr_parent) unless @last_visitee
|
85
80
|
|
86
81
|
when :backtrack
|
87
82
|
# Restore path
|
@@ -90,15 +85,9 @@ private
|
|
90
85
|
antecedent_index = curr_parent.subnodes.size
|
91
86
|
# puts "Current parent #{curr_parent.to_string(0)}"
|
92
87
|
# puts "Antecedent index #{antecedent_index}"
|
93
|
-
create_alternative_node(anEntry.antecedents[antecedent_index])
|
94
|
-
|
95
|
-
when :revisit
|
96
|
-
# Remove most recent entry in path
|
97
|
-
@curr_path.pop
|
98
88
|
|
99
|
-
# Remove also its reference in parent
|
100
|
-
curr_parent.subnodes.pop
|
101
89
|
|
90
|
+
when :revisit
|
102
91
|
# Retrieve the already existing node corresponding to re-visited entry
|
103
92
|
popular = @entry2node[anEntry]
|
104
93
|
|
@@ -110,55 +99,45 @@ private
|
|
110
99
|
end
|
111
100
|
end
|
112
101
|
|
113
|
-
|
114
|
-
if it is a dotted item entry (pattern is: X => α . β):
|
115
|
-
if there is at least one symbol before the dot
|
116
|
-
if that symbol is a non-terminal:
|
117
|
-
|
118
|
-
if that symbol is a terminal # else
|
119
|
-
create a token node,
|
120
|
-
with same origin as token,
|
121
|
-
with same right extent = origin + 1
|
122
|
-
add the new node as first child of current_parent
|
123
|
-
set curr_entry_set_index to curr_entry_set_index - 1
|
124
|
-
if it is a dotted item entry with a beginning dot: # else
|
125
|
-
if current_parent node matches the lhs non-terminal of anEntry
|
126
|
-
set its origin to the origin of its first child (if not yet assigned)
|
127
|
-
remove this node from the path
|
128
|
-
=end
|
102
|
+
|
129
103
|
def process_item_entry(anEvent, anEntry, anIndex)
|
104
|
+
if anEntry.exit_entry?
|
105
|
+
# Previous entry was an end entry (X. pattern)
|
106
|
+
# Does the previous entry have multiple antecedent?
|
107
|
+
if last_visitee.end_entry? && last_visitee.antecedents.size > 1
|
108
|
+
# Store current path for later backtracking
|
109
|
+
# puts "Store backtrack context #{last_visitee}"
|
110
|
+
# puts "path [#{curr_path.join(', ')}]"
|
111
|
+
self.entry2path_to_alt[last_visitee] = curr_path.dup
|
112
|
+
curr_parent.refinement = :or
|
113
|
+
|
114
|
+
create_alternative_node(anEntry)
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
130
118
|
# Retrieve the grammar symbol before the dot (if any)
|
131
119
|
prev_symbol = anEntry.prev_symbol
|
132
120
|
case prev_symbol
|
133
121
|
when Syntax::Terminal
|
134
|
-
#
|
135
|
-
# with same origin as token,
|
136
|
-
# with same right extent = origin + 1
|
137
|
-
# add the new node as first child of current_parent
|
122
|
+
# Add node without changing current path
|
138
123
|
create_token_node(anEntry, anIndex)
|
139
124
|
|
140
|
-
|
141
125
|
when Syntax::NonTerminal
|
142
|
-
#
|
143
|
-
# with same right extent as curr_entry_set_index
|
144
|
-
# add the new node as first child of current_parent
|
145
|
-
# append the new node to the curr_path
|
146
|
-
range = { high: anIndex }
|
147
|
-
create_non_terminal_node(anEntry, range, prev_symbol)
|
126
|
+
# Do nothing
|
148
127
|
|
149
128
|
when NilClass # Dot at the beginning of production
|
150
129
|
if anEntry.vertex.dotted_item.production.empty?
|
151
|
-
# Empty rhs => create an epsilon node
|
130
|
+
# Empty rhs => create an epsilon node ...
|
131
|
+
# ... without changing current path
|
152
132
|
create_epsilon_node(anEntry, anIndex)
|
153
133
|
end
|
134
|
+
self.curr_path.pop if curr_parent.kind_of?(SPPF::AlternativeNode)
|
154
135
|
end
|
155
136
|
end
|
156
|
-
|
137
|
+
|
157
138
|
# Create an empty parse forest
|
158
|
-
def create_forest(
|
159
|
-
|
160
|
-
root_node = create_non_terminal_node(anEntry, full_range)
|
161
|
-
return Rley::SPPF::ParseForest.new(root_node)
|
139
|
+
def create_forest(aRootNode)
|
140
|
+
return Rley::SPPF::ParseForest.new(aRootNode)
|
162
141
|
end
|
163
142
|
|
164
143
|
|
@@ -167,6 +146,7 @@ private
|
|
167
146
|
non_terminal = nonTSymb.nil? ? anEntry.vertex.non_terminal : nonTSymb
|
168
147
|
new_node = Rley::SPPF::NonTerminalNode.new(non_terminal, aRange)
|
169
148
|
entry2node[anEntry] = new_node
|
149
|
+
# puts "FOREST ADD #{curr_parent.key if curr_parent}/#{new_node.key}"
|
170
150
|
add_subnode(new_node)
|
171
151
|
|
172
152
|
return new_node
|
@@ -177,29 +157,34 @@ private
|
|
177
157
|
def create_alternative_node(anEntry)
|
178
158
|
alternative = Rley::SPPF::AlternativeNode.new(anEntry.vertex, curr_parent.range)
|
179
159
|
add_subnode(alternative)
|
160
|
+
# puts "FOREST ADD #{alternative.key}"
|
180
161
|
|
181
162
|
return alternative
|
182
163
|
end
|
183
164
|
|
165
|
+
# create a token node,
|
166
|
+
# with same origin as token,
|
167
|
+
# with same right extent = origin + 1
|
168
|
+
# add the new node as first child of current_parent
|
184
169
|
def create_token_node(anEntry, anIndex)
|
185
170
|
token_position = anIndex - 1
|
186
171
|
curr_token = tokens[token_position]
|
187
172
|
new_node = SPPF::TokenNode.new(curr_token, token_position)
|
188
173
|
candidate = add_node_to_forest(new_node)
|
189
|
-
entry2node[anEntry] = candidate
|
174
|
+
entry2node[anEntry] = candidate
|
190
175
|
|
191
|
-
return candidate
|
176
|
+
return candidate
|
192
177
|
end
|
193
178
|
|
194
179
|
|
195
180
|
def create_epsilon_node(anEntry, anIndex)
|
196
181
|
new_node = SPPF::EpsilonNode.new(anIndex)
|
197
182
|
candidate = add_node_to_forest(new_node)
|
198
|
-
entry2node[anEntry] = candidate
|
183
|
+
entry2node[anEntry] = candidate
|
199
184
|
|
200
185
|
return candidate
|
201
186
|
end
|
202
|
-
|
187
|
+
|
203
188
|
# Add the given node if not yet present in parse forest
|
204
189
|
def add_node_to_forest(aNode)
|
205
190
|
key_node = aNode.key
|
@@ -210,7 +195,7 @@ private
|
|
210
195
|
forest.key2node[key_node] = new_node
|
211
196
|
# puts "FOREST ADD #{key_node}"
|
212
197
|
end
|
213
|
-
add_subnode(new_node, false)
|
198
|
+
add_subnode(new_node, false)
|
214
199
|
|
215
200
|
return new_node
|
216
201
|
end
|
@@ -1,4 +1,10 @@
|
|
1
1
|
require 'set'
|
2
|
+
require_relative '../gfg/call_edge'
|
3
|
+
require_relative '../gfg/scan_edge'
|
4
|
+
require_relative '../gfg/epsilon_edge'
|
5
|
+
require_relative '../gfg/end_vertex'
|
6
|
+
require_relative '../gfg/item_vertex'
|
7
|
+
require_relative '../gfg/start_vertex'
|
2
8
|
|
3
9
|
module Rley # This module is used as a namespace
|
4
10
|
module Parser # This module is used as a namespace
|
@@ -87,8 +93,7 @@ private
|
|
87
93
|
aContext.nterm2start[anEntry.vertex.non_terminal] = anEntry
|
88
94
|
end
|
89
95
|
|
90
|
-
if aContext.visitees.include?(anEntry)
|
91
|
-
# multiple time visit
|
96
|
+
if aContext.visitees.include?(anEntry) # Already visited?...
|
92
97
|
case anEntry.vertex
|
93
98
|
when GFG::EndVertex
|
94
99
|
# Jump to related start entry...
|
@@ -98,8 +103,7 @@ private
|
|
98
103
|
event = [:revisit, anEntry, index]
|
99
104
|
|
100
105
|
when GFG::StartVertex
|
101
|
-
|
102
|
-
event = nil
|
106
|
+
event = [:revisit, anEntry, index]
|
103
107
|
|
104
108
|
when GFG::ItemVertex
|
105
109
|
# Skip item entries while revisiting
|
@@ -142,18 +146,23 @@ private
|
|
142
146
|
new_entry = aContext.curr_entry.antecedents.first
|
143
147
|
events = [new_entry]
|
144
148
|
traversed_edge = new_entry.vertex.edges.first
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
149
|
+
if new_entry.vertex.kind_of?(GFG::EndVertex)
|
150
|
+
# Return edge encountered
|
151
|
+
# Push current entry onto stack
|
152
|
+
# puts "Push on return stack #{aContext.curr_entry}"
|
153
|
+
aContext.return_stack << aContext.curr_entry
|
154
|
+
elsif traversed_edge.kind_of?(GFG::CallEdge)
|
155
|
+
# Pop top of stack
|
156
|
+
tos = aContext.return_stack.pop
|
157
|
+
# puts "Pop from return stack matching entry #{new_entry}"
|
158
|
+
elsif traversed_edge.kind_of?(GFG::ScanEdge)
|
159
|
+
# Scan edge encountered, decrease sigma set index
|
160
|
+
aContext.entry_set_index -= 1
|
161
|
+
elsif traversed_edge.kind_of?(GFG::EpsilonEdge)
|
162
|
+
# Do nothing
|
163
|
+
else
|
164
|
+
fail NotImplementedError, "edge is a #{traversed_edge.class}"
|
165
|
+
end
|
157
166
|
|
158
167
|
return events
|
159
168
|
end
|
@@ -169,7 +178,6 @@ private
|
|
169
178
|
new_entry = bp.visitee.antecedents[bp.antecedent_index]
|
170
179
|
|
171
180
|
when GFG::StartVertex
|
172
|
-
# An start vertex with multiple requires a backtrack point
|
173
181
|
new_entry = select_calling_entry(aContext)
|
174
182
|
else
|
175
183
|
fail StandardError, "Internal error"
|
@@ -204,6 +212,7 @@ private
|
|
204
212
|
if bp.antecedent_index == bp.visitee.antecedents.size - 1
|
205
213
|
aContext.backtrack_points.pop
|
206
214
|
end
|
215
|
+
# puts "Backtracking to #{bp.visitee}"
|
207
216
|
|
208
217
|
# Emit a backtrack event
|
209
218
|
return [:backtrack, bp.visitee, aContext.entry_set_index]
|
data/lib/rley.rb
CHANGED
@@ -6,6 +6,7 @@ require_relative './rley/constants'
|
|
6
6
|
require_relative './rley/syntax/grammar_builder'
|
7
7
|
require_relative './rley/parser/token'
|
8
8
|
require_relative './rley/parser/earley_parser'
|
9
|
+
require_relative './rley/parser/gfg_earley_parser'
|
9
10
|
require_relative './rley/parse_tree_visitor'
|
10
11
|
require_relative './rley/formatter/debug'
|
11
12
|
require_relative './rley/formatter/json'
|
@@ -15,7 +15,7 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
15
15
|
end
|
16
16
|
|
17
17
|
it 'should know its label' do
|
18
|
-
|
18
|
+
expect(sample_nt).to receive(:to_s).and_return('NT')
|
19
19
|
expect(subject.label).to eq('NT.')
|
20
20
|
end
|
21
21
|
|
@@ -15,23 +15,23 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
15
15
|
|
16
16
|
context 'Initialization:' do
|
17
17
|
it 'should be created with two vertice arguments & a non-terminal' do
|
18
|
-
|
19
|
-
|
18
|
+
expect(vertex1).to receive(:shortcut=)
|
19
|
+
expect(vertex1).to receive(:next_symbol).and_return(nt_b_sequence)
|
20
20
|
|
21
21
|
expect { ShortcutEdge.new(vertex1, vertex2) }
|
22
22
|
.not_to raise_error
|
23
23
|
end
|
24
24
|
|
25
25
|
it 'should know the successor vertex' do
|
26
|
-
|
27
|
-
|
26
|
+
expect(vertex1).to receive(:shortcut=)
|
27
|
+
expect(vertex1).to receive(:next_symbol).and_return(nt_b_sequence)
|
28
28
|
|
29
29
|
expect(subject.successor).to eq(vertex2)
|
30
30
|
end
|
31
31
|
|
32
32
|
it 'should know the related terminal' do
|
33
|
-
|
34
|
-
|
33
|
+
expect(vertex1).to receive(:shortcut=)
|
34
|
+
expect(vertex1).to receive(:next_symbol).and_return(nt_b_sequence)
|
35
35
|
|
36
36
|
expect(subject.nonterminal).to eq(nt_b_sequence)
|
37
37
|
end
|