rley 0.2.01 → 0.2.02
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +8 -8
- data/CHANGELOG.md +5 -0
- data/README.md +9 -6
- data/examples/parsers/parsing_L0.rb +1 -1
- data/examples/parsers/parsing_L1.rb +136 -0
- data/examples/parsers/parsing_abc.rb +1 -2
- data/examples/parsers/parsing_ambig.rb +85 -0
- data/lib/rley/constants.rb +1 -1
- data/lib/rley/formatter/json.rb +1 -4
- data/lib/rley/parser/parse_state_tracker.rb +1 -1
- data/lib/rley/parser/parse_tree_builder.rb +12 -13
- data/lib/rley/parser/parsing.rb +61 -44
- data/lib/rley/ptree/non_terminal_node.rb +13 -0
- data/lib/rley/ptree/parse_tree_node.rb +7 -0
- data/lib/rley/ptree/terminal_node.rb +7 -0
- data/lib/rley/ptree/token_range.rb +18 -0
- data/spec/rley/formatter/json_spec.rb +1 -2
- data/spec/rley/parser/earley_parser_spec.rb +77 -4
- data/spec/rley/parser/parse_tree_builder_spec.rb +28 -27
- data/spec/rley/parser/parsing_spec.rb +230 -73
- data/spec/rley/ptree/non_terminal_node_spec.rb +42 -3
- data/spec/rley/ptree/terminal_node_spec.rb +39 -0
- data/spec/rley/ptree/token_range_spec.rb +45 -0
- data/spec/rley/support/ambiguous_grammar_helper.rb +36 -0
- data/spec/rley/support/grammar_b_expr_helper.rb +1 -1
- metadata +7 -2
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
MzY1NzQxNzdkNGEzMDFiYzhhNmQ0M2Q5MTU3NzQxOGMyMDkxYTM4Ng==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
OGUzMzg4YmE5ZGRiZmRiOWEzMTFiYjhiOTJlYmQ2M2FkNzBkOWVjOQ==
|
7
7
|
!binary "U0hBNTEy":
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
MzAzY2FkM2VkNzY2NDVhZDEwNmIyYWVkZDg3MDEwYzQ4ZGM3NzMzMmMxZTdk
|
10
|
+
YzYyZDZhMjNmNDQyZDc5ZmRlYzBjYTQ5ZTMyMmQzZTk2YjBiMjI3N2IxOTYw
|
11
|
+
YjZiZGYzZWQxZjUxM2ExMTQxMzExZmIzNTJmZTZjNDhjYzcyOTk=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
MWQ5MGM3YjVjNjNhY2JjMzE0MjlkMWIxZTE0NDI2NWQyMWU1MDBjMGE0ZTc2
|
14
|
+
MjI1NzU4OGM3NDczNWQ4ZWNjMzIzZGFjNzM1ODE0MzljZTM2MzJjYjY4Mjgz
|
15
|
+
YmM2ZThiODQ0Njg5N2ZiYzYzODNlZjRkNGJlZTgyY2U3YzYyNWE=
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,8 @@
|
|
1
|
+
### 0.2.02 / 2015-02-02
|
2
|
+
* [NEW] Examplar file `parsing_L1.rb`: demo using a (highly simplified) English grammar.
|
3
|
+
* [NEW] Examplar file `parsing_amb.rb`: demo using an ambiguous grammar.
|
4
|
+
* [FIX] Method `Parsing#parse_tree` now produces correct parse trees for all the examples.
|
5
|
+
|
1
6
|
### 0.2.01 / 2015-01-03
|
2
7
|
* [CHANGE] File `.rubocop.yml`: AbcMetric setting relaxed.
|
3
8
|
* [CHANGE] Fixed most style offenses reported by Rubocop.
|
data/README.md
CHANGED
@@ -8,9 +8,9 @@ Rley
|
|
8
8
|
[](http://badge.fury.io/rb/rley)
|
9
9
|
[](https://gemnasium.com/famished-tiger/Rley)
|
10
10
|
|
11
|
-
|
12
|
-
|
13
|
-
|
11
|
+
__Rley__ is a Ruby implementation of an [Earley parser](http://en.wikipedia.org/wiki/Earley_parser).
|
12
|
+
The project aims to build a parser convenient for lightweight NLP (Natural Language Processing) purposes.
|
13
|
+
|
14
14
|
|
15
15
|
Yet another parser?
|
16
16
|
Yes and no. Rley doesn't aim to replace other very good programming language parsers for Ruby.
|
@@ -19,19 +19,22 @@ in the grammar/language they support.
|
|
19
19
|
The Earley's algorithm being more general is able to parse input without imposing restriction on the context-free grammar.
|
20
20
|
Consult Wikipedia to learn more about Earley's parsing algorithm.
|
21
21
|
|
22
|
-
This project is in "
|
22
|
+
This project is in "earley" stage.
|
23
23
|
####Roadmap:
|
24
|
+
- Add examples (including small NLP grammar)
|
25
|
+
- Document the parser API
|
24
26
|
- Add more validation tests and sample grammars
|
25
27
|
- Add AST generation (and semantic actions?)
|
26
28
|
- Add DSL for grammar specification
|
27
29
|
- Add grammar validations
|
28
30
|
- Add error reporting
|
29
|
-
|
31
|
+
|
30
32
|
- Add a command-line interface
|
31
33
|
- Provide documentation and examples
|
32
34
|
|
33
35
|
|
36
|
+
|
34
37
|
Copyright
|
35
38
|
---------
|
36
|
-
Copyright (c) 2014, Dimitri Geshef.
|
39
|
+
Copyright (c) 2014-2015, Dimitri Geshef.
|
37
40
|
__Rley__ is released under the MIT License see [LICENSE.txt](https://github.com/famished-tiger/Rley/blob/master/LICENSE.txt) for details.
|
@@ -14,7 +14,7 @@ require 'rley' # Load the gem
|
|
14
14
|
|
15
15
|
########################################
|
16
16
|
# Step 1. Define a grammar for a micro English-like language
|
17
|
-
# based on Jurafky & Martin L0 language.
|
17
|
+
# based on Jurafky & Martin L0 language (chapter 12 of the book).
|
18
18
|
# It defines the syntax of a sentence in a language with a
|
19
19
|
# very limited syntax and lexicon in the context of airline reservation.
|
20
20
|
builder = Rley::Syntax::GrammarBuilder.new
|
@@ -0,0 +1,136 @@
|
|
1
|
+
# Purpose: to demonstrate how to build and render a parse tree for the L1
|
2
|
+
# language
|
3
|
+
require 'pp'
|
4
|
+
require 'rley' # Load the gem
|
5
|
+
|
6
|
+
# Steps to render a parse tree (of a valid parsed input):
|
7
|
+
# 1. Define a grammar
|
8
|
+
# 2. Create a tokenizer for the language
|
9
|
+
# 3. Create a parser for that grammar
|
10
|
+
# 4. Tokenize the input
|
11
|
+
# 5. Let the parser process the input
|
12
|
+
# 6. Generate a parse tree from the parse result
|
13
|
+
# 7. Render the parse tree (in JSON)
|
14
|
+
|
15
|
+
########################################
|
16
|
+
# Step 1. Define a grammar for a miniature English-like language
|
17
|
+
# based on Jurafky & Martin L1 language (chapter 13).
|
18
|
+
# It defines the syntax of a sentence in a language with a
|
19
|
+
# very limited syntax and lexicon in the context of airline reservation.
|
20
|
+
builder = Rley::Syntax::GrammarBuilder.new
|
21
|
+
builder.add_terminals('Noun', 'Verb', 'Pronoun', 'Proper-Noun')
|
22
|
+
builder.add_terminals('Aux', 'Determiner', 'Preposition')
|
23
|
+
builder.add_production('Sentence' => 'S')
|
24
|
+
builder.add_production('S' => %w[Aux NP VP])
|
25
|
+
builder.add_production('S' => %w[NP VP])
|
26
|
+
builder.add_production('S' => %w[VP])
|
27
|
+
builder.add_production('NP' => 'Pronoun')
|
28
|
+
builder.add_production('NP' => 'Proper-Noun')
|
29
|
+
builder.add_production('NP' => %w[Determiner Nominal])
|
30
|
+
builder.add_production('Nominal' => %w[Nominal Noun])
|
31
|
+
builder.add_production('Nominal' => %w[Nominal PP])
|
32
|
+
builder.add_production('Nominal' => 'Noun')
|
33
|
+
builder.add_production('VP' => 'Verb')
|
34
|
+
builder.add_production('VP' => %w[Verb NP])
|
35
|
+
builder.add_production('VP' => %w[Verb NP PP])
|
36
|
+
builder.add_production('VP' => %w[Verb PP])
|
37
|
+
builder.add_production('VP' => %w[VP PP])
|
38
|
+
builder.add_production('PP' => %w[Preposition NP])
|
39
|
+
|
40
|
+
# And now build the grammar...
|
41
|
+
grammar_l1 = builder.grammar
|
42
|
+
|
43
|
+
|
44
|
+
########################################
|
45
|
+
# 2. Create a tokenizer for the language
|
46
|
+
# The tokenizer transforms the input into an array of tokens
|
47
|
+
# This is a very simplistic implementation for demo purposes.
|
48
|
+
|
49
|
+
# The lexicon is just a Hash with pairs of the form:
|
50
|
+
# word =>terminal symbol name
|
51
|
+
L1_lexicon = {
|
52
|
+
'does' => 'Aux',
|
53
|
+
'flight' => 'Noun',
|
54
|
+
'trip' => 'Noun',
|
55
|
+
'meal' => 'Noun',
|
56
|
+
'money' => 'Noun',
|
57
|
+
'morning' => 'Noun',
|
58
|
+
'is' => 'Verb',
|
59
|
+
'book' => 'Verb',
|
60
|
+
'prefer' => 'Verb',
|
61
|
+
'like' => 'Verb',
|
62
|
+
'need' => 'Verb',
|
63
|
+
'want' => 'Verb',
|
64
|
+
'fly' => 'Verb',
|
65
|
+
'show' => 'Verb',
|
66
|
+
'me' => 'Pronoun',
|
67
|
+
'I' => 'Pronoun',
|
68
|
+
'she' => 'Pronoun',
|
69
|
+
'you' => 'Pronoun',
|
70
|
+
'it' => 'Pronoun',
|
71
|
+
'Alaska' => 'Proper-Noun',
|
72
|
+
'Baltimore' => 'Proper-Noun',
|
73
|
+
'Chicago' => 'Proper-Noun',
|
74
|
+
'Houston' => 'Proper-Noun',
|
75
|
+
'NWA' => 'Proper-Noun',
|
76
|
+
'United' => 'Proper-Noun',
|
77
|
+
'American' => 'Proper-Noun',
|
78
|
+
'the' => 'Determiner',
|
79
|
+
'a' => 'Determiner',
|
80
|
+
'an' => 'Determiner',
|
81
|
+
'this' => 'Determiner',
|
82
|
+
'these' => 'Determiner',
|
83
|
+
'that' => 'Determiner',
|
84
|
+
'from' => 'Preposition',
|
85
|
+
'to' => 'Preposition',
|
86
|
+
'on' => 'Preposition',
|
87
|
+
'near' => 'Preposition',
|
88
|
+
'through' => 'Preposition'
|
89
|
+
}
|
90
|
+
|
91
|
+
# Highly simplified tokenizer implementation.
|
92
|
+
def tokenizer(aText, aGrammar)
|
93
|
+
tokens = aText.scan(/\S+/).map do |word|
|
94
|
+
term_name = L1_lexicon[word]
|
95
|
+
if term_name.nil?
|
96
|
+
fail StandardError, "Word '#{word}' not found in lexicon"
|
97
|
+
end
|
98
|
+
terminal = aGrammar.name2symbol[term_name]
|
99
|
+
Rley::Parser::Token.new(word, terminal)
|
100
|
+
end
|
101
|
+
|
102
|
+
return tokens
|
103
|
+
end
|
104
|
+
|
105
|
+
########################################
|
106
|
+
# Step 3. Create a parser for that grammar
|
107
|
+
parser = Rley::Parser::EarleyParser.new(grammar_l1)
|
108
|
+
|
109
|
+
########################################
|
110
|
+
# Step 3. Tokenize the input
|
111
|
+
valid_input = 'I want the flight from Alaska through Chicago to Houston'
|
112
|
+
# Another sentence: it is a flight from Chicago
|
113
|
+
tokens = tokenizer(valid_input, grammar_l1)
|
114
|
+
|
115
|
+
########################################
|
116
|
+
# Step 5. Let the parser process the input
|
117
|
+
result = parser.parse(tokens)
|
118
|
+
puts "Parsing success? #{result.success?}"
|
119
|
+
|
120
|
+
|
121
|
+
########################################
|
122
|
+
# Step 6. Generate a parse tree from the parse result
|
123
|
+
ptree = result.parse_tree
|
124
|
+
|
125
|
+
########################################
|
126
|
+
# Step 7. Render the parse tree (in JSON)
|
127
|
+
# Let's create a parse tree visitor
|
128
|
+
visitor = Rley::ParseTreeVisitor.new(ptree)
|
129
|
+
|
130
|
+
#Here we create a renderer object...
|
131
|
+
renderer = Rley::Formatter::Json.new(STDOUT)
|
132
|
+
|
133
|
+
# Now emit the parse tree as JSON on the console output
|
134
|
+
puts "JSON rendering of the parse tree for '#{valid_input}' input:"
|
135
|
+
renderer.render(visitor)
|
136
|
+
# End of file
|
@@ -1,5 +1,4 @@
|
|
1
1
|
# Purpose: to demonstrate how to build and render a parse tree
|
2
|
-
|
3
2
|
require 'rley' # Load the gem
|
4
3
|
|
5
4
|
# Steps to render a parse tree (of a valid parsed input):
|
@@ -57,6 +56,7 @@ result = parser.parse(tokens)
|
|
57
56
|
# Step 6. Generate a parse tree from the parse result
|
58
57
|
ptree = result.parse_tree
|
59
58
|
|
59
|
+
|
60
60
|
########################################
|
61
61
|
# Step 7. Render the parse tree (in JSON)
|
62
62
|
# Let's create a parse tree visitor
|
@@ -68,5 +68,4 @@ renderer = Rley::Formatter::Json.new(STDOUT)
|
|
68
68
|
# Now emit the parse tree as JSON on the console output
|
69
69
|
puts "JSON rendering of the parse tree for '#{valid_input}' input:"
|
70
70
|
renderer.render(visitor)
|
71
|
-
|
72
71
|
# End of file
|
@@ -0,0 +1,85 @@
|
|
1
|
+
# Purpose: to demonstrate how to build and render a parse tree
|
2
|
+
|
3
|
+
require 'pp' # TODO remove this dependency
|
4
|
+
require 'rley' # Load the gem
|
5
|
+
|
6
|
+
# Steps to render a parse tree (of a valid parsed input):
|
7
|
+
# 1. Define a grammar
|
8
|
+
# 2. Create a tokenizer for the language
|
9
|
+
# 3. Create a parser for that grammar
|
10
|
+
# 4. Tokenize the input
|
11
|
+
# 5. Let the parser process the input
|
12
|
+
# 6. Generate a parse tree from the parse result
|
13
|
+
# 7. Render the parse tree (in JSON)
|
14
|
+
|
15
|
+
########################################
|
16
|
+
# Step 1. Define a grammar for a very simple language
|
17
|
+
# Grammar 3: A ambiguous arithmetic expression language
|
18
|
+
# (based on example in article on Earley's algorithm in Wikipedia)
|
19
|
+
# Let's create the grammar step-by-step with the grammar builder:
|
20
|
+
builder = Rley::Syntax::GrammarBuilder.new
|
21
|
+
builder.add_terminals('integer', '+', '*')
|
22
|
+
builder.add_production('P' => 'S')
|
23
|
+
builder.add_production('S' => %w(S + S))
|
24
|
+
builder.add_production('S' => %w(S * S))
|
25
|
+
builder.add_production('S' => 'L')
|
26
|
+
builder.add_production('L' => 'integer')
|
27
|
+
|
28
|
+
# And now build the grammar...
|
29
|
+
grammar_amb = builder.grammar
|
30
|
+
|
31
|
+
|
32
|
+
########################################
|
33
|
+
# 2. Create a tokenizer for the language
|
34
|
+
# The tokenizer transforms the input into an array of tokens
|
35
|
+
def tokenizer(aText, aGrammar)
|
36
|
+
tokens = aText.scan(/\S+/).map do |lexeme|
|
37
|
+
case lexeme
|
38
|
+
when '+', '*'
|
39
|
+
terminal = aGrammar.name2symbol[lexeme]
|
40
|
+
when /^[-+]?\d+$/
|
41
|
+
terminal = aGrammar.name2symbol['integer']
|
42
|
+
else
|
43
|
+
msg = "Unknown input text '#{lexeme}'"
|
44
|
+
fail StandardError, msg
|
45
|
+
end
|
46
|
+
Rley::Parser::Token.new(lexeme, terminal)
|
47
|
+
end
|
48
|
+
|
49
|
+
return tokens
|
50
|
+
end
|
51
|
+
|
52
|
+
########################################
|
53
|
+
# Step 3. Create a parser for that grammar
|
54
|
+
parser = Rley::Parser::EarleyParser.new(grammar_amb)
|
55
|
+
|
56
|
+
########################################
|
57
|
+
# Step 3. Tokenize the input
|
58
|
+
valid_input = '2 + 3 * 4'
|
59
|
+
tokens = tokenizer(valid_input, grammar_amb)
|
60
|
+
|
61
|
+
########################################
|
62
|
+
# Step 5. Let the parser process the input
|
63
|
+
result = parser.parse(tokens)
|
64
|
+
puts "Parsing success? #{result.success?}"
|
65
|
+
pp result
|
66
|
+
|
67
|
+
|
68
|
+
########################################
|
69
|
+
# Step 6. Generate a parse tree from the parse result
|
70
|
+
ptree = result.parse_tree
|
71
|
+
pp ptree
|
72
|
+
=begin
|
73
|
+
########################################
|
74
|
+
# Step 7. Render the parse tree (in JSON)
|
75
|
+
# Let's create a parse tree visitor
|
76
|
+
visitor = Rley::ParseTreeVisitor.new(ptree)
|
77
|
+
|
78
|
+
#Here we create a renderer object...
|
79
|
+
renderer = Rley::Formatter::Json.new(STDOUT)
|
80
|
+
|
81
|
+
# Now emit the parse tree as JSON on the console output
|
82
|
+
puts "JSON rendering of the parse tree for '#{valid_input}' input:"
|
83
|
+
renderer.render(visitor)
|
84
|
+
=end
|
85
|
+
# End of file
|
data/lib/rley/constants.rb
CHANGED
data/lib/rley/formatter/json.rb
CHANGED
@@ -64,10 +64,7 @@ module Rley # This module is used as a namespace
|
|
64
64
|
def before_terminal(term_node)
|
65
65
|
separator = sibling_flags[-1] ? ",\n" : "\n"
|
66
66
|
name = term_node.symbol.name
|
67
|
-
|
68
|
-
msg = "No token associated with #{name}"
|
69
|
-
fail StandardError, msg
|
70
|
-
end
|
67
|
+
|
71
68
|
lexeme = term_node.token.lexeme
|
72
69
|
print_text(separator, "{\"#{name}\": \"#{lexeme}\"}")
|
73
70
|
sibling_flags[-1] = true
|
@@ -24,7 +24,7 @@ module Rley # This module is used as a namespace
|
|
24
24
|
def current_node()
|
25
25
|
return current_path.last
|
26
26
|
end
|
27
|
-
|
27
|
+
|
28
28
|
# Factory method.
|
29
29
|
def parse_tree()
|
30
30
|
return PTree::ParseTree.new(root)
|
@@ -59,12 +59,9 @@ module Rley # This module is used as a namespace
|
|
59
59
|
|
60
60
|
# Make the predecessor of current node the
|
61
61
|
# new current node.
|
62
|
-
def move_back()
|
63
|
-
|
64
|
-
if current_path.length == 1
|
65
|
-
msg = 'Cannot move further back'
|
66
|
-
fail StandardError, msg
|
67
|
-
end
|
62
|
+
def move_back()
|
63
|
+
loop do
|
64
|
+
break if current_path.length == 1
|
68
65
|
(parent, pos) = current_path[-3, 2]
|
69
66
|
current_path.pop(2)
|
70
67
|
if pos > 0
|
@@ -73,7 +70,8 @@ module Rley # This module is used as a namespace
|
|
73
70
|
current_path << new_pos
|
74
71
|
current_path << new_curr_node
|
75
72
|
end
|
76
|
-
|
73
|
+
break if pos > 0 || new_curr_node.is_a?(PTree::TerminalNode)
|
74
|
+
end
|
77
75
|
end
|
78
76
|
|
79
77
|
|
@@ -94,10 +92,10 @@ module Rley # This module is used as a namespace
|
|
94
92
|
lower = low_bound(aRange)
|
95
93
|
unless lower.nil?
|
96
94
|
current_node.range = lower
|
97
|
-
if curr_node.is_a?(PTree::TerminalNode)
|
95
|
+
if curr_node.is_a?(PTree::TerminalNode) && lower[:low]
|
98
96
|
current_node.range = high_bound(lower[:low] + 1)
|
99
97
|
end
|
100
|
-
end
|
98
|
+
end
|
101
99
|
upper = high_bound(aRange)
|
102
100
|
current_node.range = upper unless upper.nil?
|
103
101
|
end
|
@@ -122,8 +120,9 @@ module Rley # This module is used as a namespace
|
|
122
120
|
curr_node = current_node
|
123
121
|
|
124
122
|
if curr_node.symbol != prod.lhs
|
125
|
-
|
126
|
-
|
123
|
+
snapshot = root.to_string(0)
|
124
|
+
msg = "Current node is a #{curr_node.symbol} instead of #{prod.lhs}."
|
125
|
+
fail StandardError, msg + "\n" + snapshot
|
127
126
|
end
|
128
127
|
self.range = aRange
|
129
128
|
prod.rhs.each { |symb| add_node(symb, {}) }
|
@@ -131,7 +130,7 @@ module Rley # This module is used as a namespace
|
|
131
130
|
return if curr_node.children.empty?
|
132
131
|
curr_node.children.first.range.assign(low: curr_node.range.low)
|
133
132
|
curr_node.children.last.range.assign(high: curr_node.range.high)
|
134
|
-
end
|
133
|
+
end
|
135
134
|
|
136
135
|
# Add the given node as child node of current node
|
137
136
|
def add_child(aNode)
|
data/lib/rley/parser/parsing.rb
CHANGED
@@ -10,7 +10,7 @@ module Rley # This module is used as a namespace
|
|
10
10
|
|
11
11
|
# The sequence of input token to parse
|
12
12
|
attr_reader(:tokens)
|
13
|
-
|
13
|
+
|
14
14
|
def initialize(startDottedRule, theTokens)
|
15
15
|
@tokens = theTokens.dup
|
16
16
|
@chart = Chart.new(startDottedRule, tokens.size)
|
@@ -25,7 +25,7 @@ module Rley # This module is used as a namespace
|
|
25
25
|
found = end_parse_state
|
26
26
|
return !found.nil?
|
27
27
|
end
|
28
|
-
|
28
|
+
|
29
29
|
# Factory method. Builds a ParseTree from the parse result.
|
30
30
|
# @return [ParseTree]
|
31
31
|
# Algorithm:
|
@@ -34,27 +34,14 @@ module Rley # This module is used as a namespace
|
|
34
34
|
def parse_tree()
|
35
35
|
state_tracker = new_state_tracker
|
36
36
|
builder = tree_builder(state_tracker.state_set_index)
|
37
|
-
|
37
|
+
|
38
38
|
loop do
|
39
|
-
#
|
39
|
+
# Retrieve the symbol on left of the dot
|
40
40
|
curr_symbol = state_tracker.symbol_on_left
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
predecessor_state_terminal(curr_symbol, state_tracker, builder)
|
46
|
-
|
47
|
-
when Syntax::NonTerminal
|
48
|
-
completed_state_for(curr_symbol, state_tracker, builder)
|
49
|
-
|
50
|
-
when NilClass # No symbol on the left of dot
|
51
|
-
# Retrieve all parse states that expect the lhs
|
52
|
-
new_states = states_expecting_lhs(state_tracker)
|
53
|
-
break if new_states.empty?
|
54
|
-
|
55
|
-
select_expecting_state(new_states, state_tracker, builder)
|
56
|
-
break if builder.root == builder.current_node
|
57
|
-
end
|
41
|
+
|
42
|
+
# Place the symbol in the parse tree
|
43
|
+
done = insert_matched_symbol(state_tracker, builder)
|
44
|
+
break if done
|
58
45
|
end
|
59
46
|
|
60
47
|
return builder.parse_tree
|
@@ -145,8 +132,40 @@ module Rley # This module is used as a namespace
|
|
145
132
|
return candidate_states.find(&:complete?)
|
146
133
|
end
|
147
134
|
|
135
|
+
|
136
|
+
# Insert in a parse tree the symbol on the left of the
|
137
|
+
# current dotted rule.
|
138
|
+
def insert_matched_symbol(aStateTracker, aBuilder)
|
139
|
+
# Retrieve symbol before the dot in active parse state
|
140
|
+
match_symbol = aStateTracker.symbol_on_left
|
141
|
+
|
142
|
+
# Retrieve tree node being processed
|
143
|
+
tree_node = aBuilder.current_node
|
144
|
+
|
145
|
+
done = false
|
146
|
+
case [match_symbol.class, tree_node.class]
|
147
|
+
when [Syntax::Terminal, PTree::TerminalNode]
|
148
|
+
aStateTracker.to_prev_state_set
|
149
|
+
predecessor_state_terminal(match_symbol, aStateTracker, aBuilder)
|
150
|
+
|
151
|
+
when [NilClass, Rley::PTree::TerminalNode],
|
152
|
+
[NilClass, PTree::NonTerminalNode]
|
153
|
+
# Retrieve all parse states that expect the lhs
|
154
|
+
new_states = states_expecting_lhs(aStateTracker, aBuilder)
|
155
|
+
done = true if new_states.empty?
|
156
|
+
# Select an unused parse state
|
157
|
+
aStateTracker.select_state(new_states)
|
158
|
+
|
159
|
+
when [Syntax::NonTerminal, PTree::NonTerminalNode]
|
160
|
+
completed_state_for(match_symbol, aStateTracker, aBuilder)
|
161
|
+
end
|
162
|
+
|
163
|
+
done ||= aBuilder.root == aBuilder.current_node
|
164
|
+
return done
|
165
|
+
end
|
166
|
+
|
148
167
|
private
|
149
|
-
|
168
|
+
|
150
169
|
# Factory method. Creates and initializes a ParseStateTracker instance.
|
151
170
|
def new_state_tracker()
|
152
171
|
instance = ParseStateTracker.new(chart.last_index)
|
@@ -154,16 +173,15 @@ module Rley # This module is used as a namespace
|
|
154
173
|
|
155
174
|
return instance
|
156
175
|
end
|
157
|
-
|
158
|
-
|
176
|
+
|
177
|
+
|
159
178
|
# A terminal symbol is on the left of dot.
|
160
179
|
# Go to the predecessor state for the given terminal
|
161
180
|
def predecessor_state_terminal(_a_symb, aStateTracker, aTreeBuilder)
|
162
|
-
|
181
|
+
index = aStateTracker.state_set_index
|
182
|
+
aTreeBuilder.current_node.range = { low: index, high: index + 1 }
|
163
183
|
link_node_to_token(aTreeBuilder, aStateTracker.state_set_index)
|
164
184
|
unless aTreeBuilder.current_node.is_a?(PTree::TerminalNode)
|
165
|
-
pp aTreeBuilder.root
|
166
|
-
pp aTreeBuilder.current_node
|
167
185
|
fail StandardError, 'Expected terminal node'
|
168
186
|
end
|
169
187
|
aTreeBuilder.move_back
|
@@ -171,40 +189,39 @@ module Rley # This module is used as a namespace
|
|
171
189
|
previous_state = state_set.predecessor_state(aStateTracker.parse_state)
|
172
190
|
aStateTracker.parse_state = previous_state
|
173
191
|
end
|
174
|
-
|
175
|
-
|
176
|
-
# Retrieve a complete state with given symbol as lhs.
|
192
|
+
|
193
|
+
|
194
|
+
# Retrieve a complete state with given terminal symbol as lhs.
|
177
195
|
def completed_state_for(a_symb, aTracker, aTreeBuilder)
|
178
196
|
new_states = chart[aTracker.state_set_index].states_rewriting(a_symb)
|
179
|
-
aTracker.select_state(new_states)
|
197
|
+
aTracker.select_state(new_states)
|
180
198
|
aTreeBuilder.range = { high: aTracker.state_set_index }
|
181
199
|
aTreeBuilder.use_complete_state(aTracker.parse_state)
|
182
200
|
link_node_to_token(aTreeBuilder, aTracker.state_set_index - 1)
|
183
201
|
aTreeBuilder.move_down
|
184
202
|
end
|
185
|
-
|
186
|
-
|
187
|
-
def states_expecting_lhs(aStateTracker)
|
203
|
+
|
204
|
+
|
205
|
+
def states_expecting_lhs(aStateTracker, aTreeBuilder)
|
188
206
|
lhs = aStateTracker.curr_dotted_item.production.lhs
|
189
207
|
new_states = states_expecting(lhs, aStateTracker.state_set_index, true)
|
208
|
+
new_states.reject! { |st| st == aStateTracker.parse_state }
|
209
|
+
# Filter out parse states with incompatible range
|
210
|
+
if new_states.size > 1
|
211
|
+
previous_node = aTreeBuilder.current_path[-3]
|
212
|
+
new_states.select! do |parse_state|
|
213
|
+
parse_state.dotted_rule.production.lhs == previous_node.symbol
|
214
|
+
end
|
215
|
+
end
|
190
216
|
|
191
217
|
return new_states
|
192
218
|
end
|
193
|
-
|
194
|
-
def select_expecting_state(theStates, aStateTracker, aTreeBuilder)
|
195
|
-
# Select an unused parse state
|
196
|
-
aStateTracker.select_state(theStates)
|
197
|
-
|
198
|
-
aTreeBuilder.range = { low: aStateTracker.state_set_index }
|
199
|
-
aTreeBuilder.move_back
|
200
|
-
aTreeBuilder.range = { low: aStateTracker.parse_state.origin }
|
201
|
-
end
|
202
|
-
|
203
219
|
|
204
220
|
# If the current node is a terminal node
|
205
221
|
# then link the token to that node
|
206
222
|
def link_node_to_token(aTreeBuilder, aStateSetIndex)
|
207
223
|
return unless aTreeBuilder.current_node.is_a?(PTree::TerminalNode)
|
224
|
+
return unless aTreeBuilder.current_node.token.nil?
|
208
225
|
|
209
226
|
a_node = aTreeBuilder.current_node
|
210
227
|
a_node.token = tokens[aStateSetIndex] unless a_node.token
|