rley 0.2.01 → 0.2.02
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +8 -8
- data/CHANGELOG.md +5 -0
- data/README.md +9 -6
- data/examples/parsers/parsing_L0.rb +1 -1
- data/examples/parsers/parsing_L1.rb +136 -0
- data/examples/parsers/parsing_abc.rb +1 -2
- data/examples/parsers/parsing_ambig.rb +85 -0
- data/lib/rley/constants.rb +1 -1
- data/lib/rley/formatter/json.rb +1 -4
- data/lib/rley/parser/parse_state_tracker.rb +1 -1
- data/lib/rley/parser/parse_tree_builder.rb +12 -13
- data/lib/rley/parser/parsing.rb +61 -44
- data/lib/rley/ptree/non_terminal_node.rb +13 -0
- data/lib/rley/ptree/parse_tree_node.rb +7 -0
- data/lib/rley/ptree/terminal_node.rb +7 -0
- data/lib/rley/ptree/token_range.rb +18 -0
- data/spec/rley/formatter/json_spec.rb +1 -2
- data/spec/rley/parser/earley_parser_spec.rb +77 -4
- data/spec/rley/parser/parse_tree_builder_spec.rb +28 -27
- data/spec/rley/parser/parsing_spec.rb +230 -73
- data/spec/rley/ptree/non_terminal_node_spec.rb +42 -3
- data/spec/rley/ptree/terminal_node_spec.rb +39 -0
- data/spec/rley/ptree/token_range_spec.rb +45 -0
- data/spec/rley/support/ambiguous_grammar_helper.rb +36 -0
- data/spec/rley/support/grammar_b_expr_helper.rb +1 -1
- metadata +7 -2
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
MzY1NzQxNzdkNGEzMDFiYzhhNmQ0M2Q5MTU3NzQxOGMyMDkxYTM4Ng==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
OGUzMzg4YmE5ZGRiZmRiOWEzMTFiYjhiOTJlYmQ2M2FkNzBkOWVjOQ==
|
7
7
|
!binary "U0hBNTEy":
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
MzAzY2FkM2VkNzY2NDVhZDEwNmIyYWVkZDg3MDEwYzQ4ZGM3NzMzMmMxZTdk
|
10
|
+
YzYyZDZhMjNmNDQyZDc5ZmRlYzBjYTQ5ZTMyMmQzZTk2YjBiMjI3N2IxOTYw
|
11
|
+
YjZiZGYzZWQxZjUxM2ExMTQxMzExZmIzNTJmZTZjNDhjYzcyOTk=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
MWQ5MGM3YjVjNjNhY2JjMzE0MjlkMWIxZTE0NDI2NWQyMWU1MDBjMGE0ZTc2
|
14
|
+
MjI1NzU4OGM3NDczNWQ4ZWNjMzIzZGFjNzM1ODE0MzljZTM2MzJjYjY4Mjgz
|
15
|
+
YmM2ZThiODQ0Njg5N2ZiYzYzODNlZjRkNGJlZTgyY2U3YzYyNWE=
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,8 @@
|
|
1
|
+
### 0.2.02 / 2015-02-02
|
2
|
+
* [NEW] Examplar file `parsing_L1.rb`: demo using a (highly simplified) English grammar.
|
3
|
+
* [NEW] Examplar file `parsing_amb.rb`: demo using an ambiguous grammar.
|
4
|
+
* [FIX] Method `Parsing#parse_tree` now produces correct parse trees for all the examples.
|
5
|
+
|
1
6
|
### 0.2.01 / 2015-01-03
|
2
7
|
* [CHANGE] File `.rubocop.yml`: AbcMetric setting relaxed.
|
3
8
|
* [CHANGE] Fixed most style offenses reported by Rubocop.
|
data/README.md
CHANGED
@@ -8,9 +8,9 @@ Rley
|
|
8
8
|
[![Gem Version](https://badge.fury.io/rb/rley.svg)](http://badge.fury.io/rb/rley)
|
9
9
|
[![Dependency Status](https://gemnasium.com/famished-tiger/Rley.svg)](https://gemnasium.com/famished-tiger/Rley)
|
10
10
|
|
11
|
-
|
12
|
-
|
13
|
-
|
11
|
+
__Rley__ is a Ruby implementation of an [Earley parser](http://en.wikipedia.org/wiki/Earley_parser).
|
12
|
+
The project aims to build a parser convenient for lightweight NLP (Natural Language Processing) purposes.
|
13
|
+
|
14
14
|
|
15
15
|
Yet another parser?
|
16
16
|
Yes and no. Rley doesn't aim to replace other very good programming language parsers for Ruby.
|
@@ -19,19 +19,22 @@ in the grammar/language they support.
|
|
19
19
|
The Earley's algorithm being more general is able to parse input without imposing restriction on the context-free grammar.
|
20
20
|
Consult Wikipedia to learn more about Earley's parsing algorithm.
|
21
21
|
|
22
|
-
This project is in "
|
22
|
+
This project is in "earley" stage.
|
23
23
|
####Roadmap:
|
24
|
+
- Add examples (including small NLP grammar)
|
25
|
+
- Document the parser API
|
24
26
|
- Add more validation tests and sample grammars
|
25
27
|
- Add AST generation (and semantic actions?)
|
26
28
|
- Add DSL for grammar specification
|
27
29
|
- Add grammar validations
|
28
30
|
- Add error reporting
|
29
|
-
|
31
|
+
|
30
32
|
- Add a command-line interface
|
31
33
|
- Provide documentation and examples
|
32
34
|
|
33
35
|
|
36
|
+
|
34
37
|
Copyright
|
35
38
|
---------
|
36
|
-
Copyright (c) 2014, Dimitri Geshef.
|
39
|
+
Copyright (c) 2014-2015, Dimitri Geshef.
|
37
40
|
__Rley__ is released under the MIT License see [LICENSE.txt](https://github.com/famished-tiger/Rley/blob/master/LICENSE.txt) for details.
|
@@ -14,7 +14,7 @@ require 'rley' # Load the gem
|
|
14
14
|
|
15
15
|
########################################
|
16
16
|
# Step 1. Define a grammar for a micro English-like language
|
17
|
-
# based on Jurafky & Martin L0 language.
|
17
|
+
# based on Jurafky & Martin L0 language (chapter 12 of the book).
|
18
18
|
# It defines the syntax of a sentence in a language with a
|
19
19
|
# very limited syntax and lexicon in the context of airline reservation.
|
20
20
|
builder = Rley::Syntax::GrammarBuilder.new
|
@@ -0,0 +1,136 @@
|
|
1
|
+
# Purpose: to demonstrate how to build and render a parse tree for the L1
|
2
|
+
# language
|
3
|
+
require 'pp'
|
4
|
+
require 'rley' # Load the gem
|
5
|
+
|
6
|
+
# Steps to render a parse tree (of a valid parsed input):
|
7
|
+
# 1. Define a grammar
|
8
|
+
# 2. Create a tokenizer for the language
|
9
|
+
# 3. Create a parser for that grammar
|
10
|
+
# 4. Tokenize the input
|
11
|
+
# 5. Let the parser process the input
|
12
|
+
# 6. Generate a parse tree from the parse result
|
13
|
+
# 7. Render the parse tree (in JSON)
|
14
|
+
|
15
|
+
########################################
|
16
|
+
# Step 1. Define a grammar for a miniature English-like language
|
17
|
+
# based on Jurafky & Martin L1 language (chapter 13).
|
18
|
+
# It defines the syntax of a sentence in a language with a
|
19
|
+
# very limited syntax and lexicon in the context of airline reservation.
|
20
|
+
builder = Rley::Syntax::GrammarBuilder.new
|
21
|
+
builder.add_terminals('Noun', 'Verb', 'Pronoun', 'Proper-Noun')
|
22
|
+
builder.add_terminals('Aux', 'Determiner', 'Preposition')
|
23
|
+
builder.add_production('Sentence' => 'S')
|
24
|
+
builder.add_production('S' => %w[Aux NP VP])
|
25
|
+
builder.add_production('S' => %w[NP VP])
|
26
|
+
builder.add_production('S' => %w[VP])
|
27
|
+
builder.add_production('NP' => 'Pronoun')
|
28
|
+
builder.add_production('NP' => 'Proper-Noun')
|
29
|
+
builder.add_production('NP' => %w[Determiner Nominal])
|
30
|
+
builder.add_production('Nominal' => %w[Nominal Noun])
|
31
|
+
builder.add_production('Nominal' => %w[Nominal PP])
|
32
|
+
builder.add_production('Nominal' => 'Noun')
|
33
|
+
builder.add_production('VP' => 'Verb')
|
34
|
+
builder.add_production('VP' => %w[Verb NP])
|
35
|
+
builder.add_production('VP' => %w[Verb NP PP])
|
36
|
+
builder.add_production('VP' => %w[Verb PP])
|
37
|
+
builder.add_production('VP' => %w[VP PP])
|
38
|
+
builder.add_production('PP' => %w[Preposition NP])
|
39
|
+
|
40
|
+
# And now build the grammar...
|
41
|
+
grammar_l1 = builder.grammar
|
42
|
+
|
43
|
+
|
44
|
+
########################################
|
45
|
+
# 2. Create a tokenizer for the language
|
46
|
+
# The tokenizer transforms the input into an array of tokens
|
47
|
+
# This is a very simplistic implementation for demo purposes.
|
48
|
+
|
49
|
+
# The lexicon is just a Hash with pairs of the form:
|
50
|
+
# word =>terminal symbol name
|
51
|
+
L1_lexicon = {
|
52
|
+
'does' => 'Aux',
|
53
|
+
'flight' => 'Noun',
|
54
|
+
'trip' => 'Noun',
|
55
|
+
'meal' => 'Noun',
|
56
|
+
'money' => 'Noun',
|
57
|
+
'morning' => 'Noun',
|
58
|
+
'is' => 'Verb',
|
59
|
+
'book' => 'Verb',
|
60
|
+
'prefer' => 'Verb',
|
61
|
+
'like' => 'Verb',
|
62
|
+
'need' => 'Verb',
|
63
|
+
'want' => 'Verb',
|
64
|
+
'fly' => 'Verb',
|
65
|
+
'show' => 'Verb',
|
66
|
+
'me' => 'Pronoun',
|
67
|
+
'I' => 'Pronoun',
|
68
|
+
'she' => 'Pronoun',
|
69
|
+
'you' => 'Pronoun',
|
70
|
+
'it' => 'Pronoun',
|
71
|
+
'Alaska' => 'Proper-Noun',
|
72
|
+
'Baltimore' => 'Proper-Noun',
|
73
|
+
'Chicago' => 'Proper-Noun',
|
74
|
+
'Houston' => 'Proper-Noun',
|
75
|
+
'NWA' => 'Proper-Noun',
|
76
|
+
'United' => 'Proper-Noun',
|
77
|
+
'American' => 'Proper-Noun',
|
78
|
+
'the' => 'Determiner',
|
79
|
+
'a' => 'Determiner',
|
80
|
+
'an' => 'Determiner',
|
81
|
+
'this' => 'Determiner',
|
82
|
+
'these' => 'Determiner',
|
83
|
+
'that' => 'Determiner',
|
84
|
+
'from' => 'Preposition',
|
85
|
+
'to' => 'Preposition',
|
86
|
+
'on' => 'Preposition',
|
87
|
+
'near' => 'Preposition',
|
88
|
+
'through' => 'Preposition'
|
89
|
+
}
|
90
|
+
|
91
|
+
# Highly simplified tokenizer implementation.
|
92
|
+
def tokenizer(aText, aGrammar)
|
93
|
+
tokens = aText.scan(/\S+/).map do |word|
|
94
|
+
term_name = L1_lexicon[word]
|
95
|
+
if term_name.nil?
|
96
|
+
fail StandardError, "Word '#{word}' not found in lexicon"
|
97
|
+
end
|
98
|
+
terminal = aGrammar.name2symbol[term_name]
|
99
|
+
Rley::Parser::Token.new(word, terminal)
|
100
|
+
end
|
101
|
+
|
102
|
+
return tokens
|
103
|
+
end
|
104
|
+
|
105
|
+
########################################
|
106
|
+
# Step 3. Create a parser for that grammar
|
107
|
+
parser = Rley::Parser::EarleyParser.new(grammar_l1)
|
108
|
+
|
109
|
+
########################################
|
110
|
+
# Step 3. Tokenize the input
|
111
|
+
valid_input = 'I want the flight from Alaska through Chicago to Houston'
|
112
|
+
# Another sentence: it is a flight from Chicago
|
113
|
+
tokens = tokenizer(valid_input, grammar_l1)
|
114
|
+
|
115
|
+
########################################
|
116
|
+
# Step 5. Let the parser process the input
|
117
|
+
result = parser.parse(tokens)
|
118
|
+
puts "Parsing success? #{result.success?}"
|
119
|
+
|
120
|
+
|
121
|
+
########################################
|
122
|
+
# Step 6. Generate a parse tree from the parse result
|
123
|
+
ptree = result.parse_tree
|
124
|
+
|
125
|
+
########################################
|
126
|
+
# Step 7. Render the parse tree (in JSON)
|
127
|
+
# Let's create a parse tree visitor
|
128
|
+
visitor = Rley::ParseTreeVisitor.new(ptree)
|
129
|
+
|
130
|
+
#Here we create a renderer object...
|
131
|
+
renderer = Rley::Formatter::Json.new(STDOUT)
|
132
|
+
|
133
|
+
# Now emit the parse tree as JSON on the console output
|
134
|
+
puts "JSON rendering of the parse tree for '#{valid_input}' input:"
|
135
|
+
renderer.render(visitor)
|
136
|
+
# End of file
|
@@ -1,5 +1,4 @@
|
|
1
1
|
# Purpose: to demonstrate how to build and render a parse tree
|
2
|
-
|
3
2
|
require 'rley' # Load the gem
|
4
3
|
|
5
4
|
# Steps to render a parse tree (of a valid parsed input):
|
@@ -57,6 +56,7 @@ result = parser.parse(tokens)
|
|
57
56
|
# Step 6. Generate a parse tree from the parse result
|
58
57
|
ptree = result.parse_tree
|
59
58
|
|
59
|
+
|
60
60
|
########################################
|
61
61
|
# Step 7. Render the parse tree (in JSON)
|
62
62
|
# Let's create a parse tree visitor
|
@@ -68,5 +68,4 @@ renderer = Rley::Formatter::Json.new(STDOUT)
|
|
68
68
|
# Now emit the parse tree as JSON on the console output
|
69
69
|
puts "JSON rendering of the parse tree for '#{valid_input}' input:"
|
70
70
|
renderer.render(visitor)
|
71
|
-
|
72
71
|
# End of file
|
@@ -0,0 +1,85 @@
|
|
1
|
+
# Purpose: to demonstrate how to build and render a parse tree
|
2
|
+
|
3
|
+
require 'pp' # TODO remove this dependency
|
4
|
+
require 'rley' # Load the gem
|
5
|
+
|
6
|
+
# Steps to render a parse tree (of a valid parsed input):
|
7
|
+
# 1. Define a grammar
|
8
|
+
# 2. Create a tokenizer for the language
|
9
|
+
# 3. Create a parser for that grammar
|
10
|
+
# 4. Tokenize the input
|
11
|
+
# 5. Let the parser process the input
|
12
|
+
# 6. Generate a parse tree from the parse result
|
13
|
+
# 7. Render the parse tree (in JSON)
|
14
|
+
|
15
|
+
########################################
|
16
|
+
# Step 1. Define a grammar for a very simple language
|
17
|
+
# Grammar 3: A ambiguous arithmetic expression language
|
18
|
+
# (based on example in article on Earley's algorithm in Wikipedia)
|
19
|
+
# Let's create the grammar step-by-step with the grammar builder:
|
20
|
+
builder = Rley::Syntax::GrammarBuilder.new
|
21
|
+
builder.add_terminals('integer', '+', '*')
|
22
|
+
builder.add_production('P' => 'S')
|
23
|
+
builder.add_production('S' => %w(S + S))
|
24
|
+
builder.add_production('S' => %w(S * S))
|
25
|
+
builder.add_production('S' => 'L')
|
26
|
+
builder.add_production('L' => 'integer')
|
27
|
+
|
28
|
+
# And now build the grammar...
|
29
|
+
grammar_amb = builder.grammar
|
30
|
+
|
31
|
+
|
32
|
+
########################################
|
33
|
+
# 2. Create a tokenizer for the language
|
34
|
+
# The tokenizer transforms the input into an array of tokens
|
35
|
+
def tokenizer(aText, aGrammar)
|
36
|
+
tokens = aText.scan(/\S+/).map do |lexeme|
|
37
|
+
case lexeme
|
38
|
+
when '+', '*'
|
39
|
+
terminal = aGrammar.name2symbol[lexeme]
|
40
|
+
when /^[-+]?\d+$/
|
41
|
+
terminal = aGrammar.name2symbol['integer']
|
42
|
+
else
|
43
|
+
msg = "Unknown input text '#{lexeme}'"
|
44
|
+
fail StandardError, msg
|
45
|
+
end
|
46
|
+
Rley::Parser::Token.new(lexeme, terminal)
|
47
|
+
end
|
48
|
+
|
49
|
+
return tokens
|
50
|
+
end
|
51
|
+
|
52
|
+
########################################
|
53
|
+
# Step 3. Create a parser for that grammar
|
54
|
+
parser = Rley::Parser::EarleyParser.new(grammar_amb)
|
55
|
+
|
56
|
+
########################################
|
57
|
+
# Step 3. Tokenize the input
|
58
|
+
valid_input = '2 + 3 * 4'
|
59
|
+
tokens = tokenizer(valid_input, grammar_amb)
|
60
|
+
|
61
|
+
########################################
|
62
|
+
# Step 5. Let the parser process the input
|
63
|
+
result = parser.parse(tokens)
|
64
|
+
puts "Parsing success? #{result.success?}"
|
65
|
+
pp result
|
66
|
+
|
67
|
+
|
68
|
+
########################################
|
69
|
+
# Step 6. Generate a parse tree from the parse result
|
70
|
+
ptree = result.parse_tree
|
71
|
+
pp ptree
|
72
|
+
=begin
|
73
|
+
########################################
|
74
|
+
# Step 7. Render the parse tree (in JSON)
|
75
|
+
# Let's create a parse tree visitor
|
76
|
+
visitor = Rley::ParseTreeVisitor.new(ptree)
|
77
|
+
|
78
|
+
#Here we create a renderer object...
|
79
|
+
renderer = Rley::Formatter::Json.new(STDOUT)
|
80
|
+
|
81
|
+
# Now emit the parse tree as JSON on the console output
|
82
|
+
puts "JSON rendering of the parse tree for '#{valid_input}' input:"
|
83
|
+
renderer.render(visitor)
|
84
|
+
=end
|
85
|
+
# End of file
|
data/lib/rley/constants.rb
CHANGED
data/lib/rley/formatter/json.rb
CHANGED
@@ -64,10 +64,7 @@ module Rley # This module is used as a namespace
|
|
64
64
|
def before_terminal(term_node)
|
65
65
|
separator = sibling_flags[-1] ? ",\n" : "\n"
|
66
66
|
name = term_node.symbol.name
|
67
|
-
|
68
|
-
msg = "No token associated with #{name}"
|
69
|
-
fail StandardError, msg
|
70
|
-
end
|
67
|
+
|
71
68
|
lexeme = term_node.token.lexeme
|
72
69
|
print_text(separator, "{\"#{name}\": \"#{lexeme}\"}")
|
73
70
|
sibling_flags[-1] = true
|
@@ -24,7 +24,7 @@ module Rley # This module is used as a namespace
|
|
24
24
|
def current_node()
|
25
25
|
return current_path.last
|
26
26
|
end
|
27
|
-
|
27
|
+
|
28
28
|
# Factory method.
|
29
29
|
def parse_tree()
|
30
30
|
return PTree::ParseTree.new(root)
|
@@ -59,12 +59,9 @@ module Rley # This module is used as a namespace
|
|
59
59
|
|
60
60
|
# Make the predecessor of current node the
|
61
61
|
# new current node.
|
62
|
-
def move_back()
|
63
|
-
|
64
|
-
if current_path.length == 1
|
65
|
-
msg = 'Cannot move further back'
|
66
|
-
fail StandardError, msg
|
67
|
-
end
|
62
|
+
def move_back()
|
63
|
+
loop do
|
64
|
+
break if current_path.length == 1
|
68
65
|
(parent, pos) = current_path[-3, 2]
|
69
66
|
current_path.pop(2)
|
70
67
|
if pos > 0
|
@@ -73,7 +70,8 @@ module Rley # This module is used as a namespace
|
|
73
70
|
current_path << new_pos
|
74
71
|
current_path << new_curr_node
|
75
72
|
end
|
76
|
-
|
73
|
+
break if pos > 0 || new_curr_node.is_a?(PTree::TerminalNode)
|
74
|
+
end
|
77
75
|
end
|
78
76
|
|
79
77
|
|
@@ -94,10 +92,10 @@ module Rley # This module is used as a namespace
|
|
94
92
|
lower = low_bound(aRange)
|
95
93
|
unless lower.nil?
|
96
94
|
current_node.range = lower
|
97
|
-
if curr_node.is_a?(PTree::TerminalNode)
|
95
|
+
if curr_node.is_a?(PTree::TerminalNode) && lower[:low]
|
98
96
|
current_node.range = high_bound(lower[:low] + 1)
|
99
97
|
end
|
100
|
-
end
|
98
|
+
end
|
101
99
|
upper = high_bound(aRange)
|
102
100
|
current_node.range = upper unless upper.nil?
|
103
101
|
end
|
@@ -122,8 +120,9 @@ module Rley # This module is used as a namespace
|
|
122
120
|
curr_node = current_node
|
123
121
|
|
124
122
|
if curr_node.symbol != prod.lhs
|
125
|
-
|
126
|
-
|
123
|
+
snapshot = root.to_string(0)
|
124
|
+
msg = "Current node is a #{curr_node.symbol} instead of #{prod.lhs}."
|
125
|
+
fail StandardError, msg + "\n" + snapshot
|
127
126
|
end
|
128
127
|
self.range = aRange
|
129
128
|
prod.rhs.each { |symb| add_node(symb, {}) }
|
@@ -131,7 +130,7 @@ module Rley # This module is used as a namespace
|
|
131
130
|
return if curr_node.children.empty?
|
132
131
|
curr_node.children.first.range.assign(low: curr_node.range.low)
|
133
132
|
curr_node.children.last.range.assign(high: curr_node.range.high)
|
134
|
-
end
|
133
|
+
end
|
135
134
|
|
136
135
|
# Add the given node as child node of current node
|
137
136
|
def add_child(aNode)
|
data/lib/rley/parser/parsing.rb
CHANGED
@@ -10,7 +10,7 @@ module Rley # This module is used as a namespace
|
|
10
10
|
|
11
11
|
# The sequence of input token to parse
|
12
12
|
attr_reader(:tokens)
|
13
|
-
|
13
|
+
|
14
14
|
def initialize(startDottedRule, theTokens)
|
15
15
|
@tokens = theTokens.dup
|
16
16
|
@chart = Chart.new(startDottedRule, tokens.size)
|
@@ -25,7 +25,7 @@ module Rley # This module is used as a namespace
|
|
25
25
|
found = end_parse_state
|
26
26
|
return !found.nil?
|
27
27
|
end
|
28
|
-
|
28
|
+
|
29
29
|
# Factory method. Builds a ParseTree from the parse result.
|
30
30
|
# @return [ParseTree]
|
31
31
|
# Algorithm:
|
@@ -34,27 +34,14 @@ module Rley # This module is used as a namespace
|
|
34
34
|
def parse_tree()
|
35
35
|
state_tracker = new_state_tracker
|
36
36
|
builder = tree_builder(state_tracker.state_set_index)
|
37
|
-
|
37
|
+
|
38
38
|
loop do
|
39
|
-
#
|
39
|
+
# Retrieve the symbol on left of the dot
|
40
40
|
curr_symbol = state_tracker.symbol_on_left
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
predecessor_state_terminal(curr_symbol, state_tracker, builder)
|
46
|
-
|
47
|
-
when Syntax::NonTerminal
|
48
|
-
completed_state_for(curr_symbol, state_tracker, builder)
|
49
|
-
|
50
|
-
when NilClass # No symbol on the left of dot
|
51
|
-
# Retrieve all parse states that expect the lhs
|
52
|
-
new_states = states_expecting_lhs(state_tracker)
|
53
|
-
break if new_states.empty?
|
54
|
-
|
55
|
-
select_expecting_state(new_states, state_tracker, builder)
|
56
|
-
break if builder.root == builder.current_node
|
57
|
-
end
|
41
|
+
|
42
|
+
# Place the symbol in the parse tree
|
43
|
+
done = insert_matched_symbol(state_tracker, builder)
|
44
|
+
break if done
|
58
45
|
end
|
59
46
|
|
60
47
|
return builder.parse_tree
|
@@ -145,8 +132,40 @@ module Rley # This module is used as a namespace
|
|
145
132
|
return candidate_states.find(&:complete?)
|
146
133
|
end
|
147
134
|
|
135
|
+
|
136
|
+
# Insert in a parse tree the symbol on the left of the
|
137
|
+
# current dotted rule.
|
138
|
+
def insert_matched_symbol(aStateTracker, aBuilder)
|
139
|
+
# Retrieve symbol before the dot in active parse state
|
140
|
+
match_symbol = aStateTracker.symbol_on_left
|
141
|
+
|
142
|
+
# Retrieve tree node being processed
|
143
|
+
tree_node = aBuilder.current_node
|
144
|
+
|
145
|
+
done = false
|
146
|
+
case [match_symbol.class, tree_node.class]
|
147
|
+
when [Syntax::Terminal, PTree::TerminalNode]
|
148
|
+
aStateTracker.to_prev_state_set
|
149
|
+
predecessor_state_terminal(match_symbol, aStateTracker, aBuilder)
|
150
|
+
|
151
|
+
when [NilClass, Rley::PTree::TerminalNode],
|
152
|
+
[NilClass, PTree::NonTerminalNode]
|
153
|
+
# Retrieve all parse states that expect the lhs
|
154
|
+
new_states = states_expecting_lhs(aStateTracker, aBuilder)
|
155
|
+
done = true if new_states.empty?
|
156
|
+
# Select an unused parse state
|
157
|
+
aStateTracker.select_state(new_states)
|
158
|
+
|
159
|
+
when [Syntax::NonTerminal, PTree::NonTerminalNode]
|
160
|
+
completed_state_for(match_symbol, aStateTracker, aBuilder)
|
161
|
+
end
|
162
|
+
|
163
|
+
done ||= aBuilder.root == aBuilder.current_node
|
164
|
+
return done
|
165
|
+
end
|
166
|
+
|
148
167
|
private
|
149
|
-
|
168
|
+
|
150
169
|
# Factory method. Creates and initializes a ParseStateTracker instance.
|
151
170
|
def new_state_tracker()
|
152
171
|
instance = ParseStateTracker.new(chart.last_index)
|
@@ -154,16 +173,15 @@ module Rley # This module is used as a namespace
|
|
154
173
|
|
155
174
|
return instance
|
156
175
|
end
|
157
|
-
|
158
|
-
|
176
|
+
|
177
|
+
|
159
178
|
# A terminal symbol is on the left of dot.
|
160
179
|
# Go to the predecessor state for the given terminal
|
161
180
|
def predecessor_state_terminal(_a_symb, aStateTracker, aTreeBuilder)
|
162
|
-
|
181
|
+
index = aStateTracker.state_set_index
|
182
|
+
aTreeBuilder.current_node.range = { low: index, high: index + 1 }
|
163
183
|
link_node_to_token(aTreeBuilder, aStateTracker.state_set_index)
|
164
184
|
unless aTreeBuilder.current_node.is_a?(PTree::TerminalNode)
|
165
|
-
pp aTreeBuilder.root
|
166
|
-
pp aTreeBuilder.current_node
|
167
185
|
fail StandardError, 'Expected terminal node'
|
168
186
|
end
|
169
187
|
aTreeBuilder.move_back
|
@@ -171,40 +189,39 @@ module Rley # This module is used as a namespace
|
|
171
189
|
previous_state = state_set.predecessor_state(aStateTracker.parse_state)
|
172
190
|
aStateTracker.parse_state = previous_state
|
173
191
|
end
|
174
|
-
|
175
|
-
|
176
|
-
# Retrieve a complete state with given symbol as lhs.
|
192
|
+
|
193
|
+
|
194
|
+
# Retrieve a complete state with given terminal symbol as lhs.
|
177
195
|
def completed_state_for(a_symb, aTracker, aTreeBuilder)
|
178
196
|
new_states = chart[aTracker.state_set_index].states_rewriting(a_symb)
|
179
|
-
aTracker.select_state(new_states)
|
197
|
+
aTracker.select_state(new_states)
|
180
198
|
aTreeBuilder.range = { high: aTracker.state_set_index }
|
181
199
|
aTreeBuilder.use_complete_state(aTracker.parse_state)
|
182
200
|
link_node_to_token(aTreeBuilder, aTracker.state_set_index - 1)
|
183
201
|
aTreeBuilder.move_down
|
184
202
|
end
|
185
|
-
|
186
|
-
|
187
|
-
def states_expecting_lhs(aStateTracker)
|
203
|
+
|
204
|
+
|
205
|
+
def states_expecting_lhs(aStateTracker, aTreeBuilder)
|
188
206
|
lhs = aStateTracker.curr_dotted_item.production.lhs
|
189
207
|
new_states = states_expecting(lhs, aStateTracker.state_set_index, true)
|
208
|
+
new_states.reject! { |st| st == aStateTracker.parse_state }
|
209
|
+
# Filter out parse states with incompatible range
|
210
|
+
if new_states.size > 1
|
211
|
+
previous_node = aTreeBuilder.current_path[-3]
|
212
|
+
new_states.select! do |parse_state|
|
213
|
+
parse_state.dotted_rule.production.lhs == previous_node.symbol
|
214
|
+
end
|
215
|
+
end
|
190
216
|
|
191
217
|
return new_states
|
192
218
|
end
|
193
|
-
|
194
|
-
def select_expecting_state(theStates, aStateTracker, aTreeBuilder)
|
195
|
-
# Select an unused parse state
|
196
|
-
aStateTracker.select_state(theStates)
|
197
|
-
|
198
|
-
aTreeBuilder.range = { low: aStateTracker.state_set_index }
|
199
|
-
aTreeBuilder.move_back
|
200
|
-
aTreeBuilder.range = { low: aStateTracker.parse_state.origin }
|
201
|
-
end
|
202
|
-
|
203
219
|
|
204
220
|
# If the current node is a terminal node
|
205
221
|
# then link the token to that node
|
206
222
|
def link_node_to_token(aTreeBuilder, aStateSetIndex)
|
207
223
|
return unless aTreeBuilder.current_node.is_a?(PTree::TerminalNode)
|
224
|
+
return unless aTreeBuilder.current_node.token.nil?
|
208
225
|
|
209
226
|
a_node = aTreeBuilder.current_node
|
210
227
|
a_node.token = tokens[aStateSetIndex] unless a_node.token
|