rley 0.2.03 → 0.2.04
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +8 -8
- data/examples/parsers/parsing_err_expr.rb +85 -0
- data/examples/parsers/parsing_groucho.rb +99 -0
- data/examples/parsers/parsing_tricky.rb +53 -0
- data/lib/rley/constants.rb +1 -1
- data/lib/rley/parser/chart.rb +27 -4
- data/lib/rley/parser/earley_parser.rb +54 -16
- data/lib/rley/parser/parse_state_tracker.rb +1 -0
- data/lib/rley/parser/parse_tracer.rb +100 -0
- data/lib/rley/parser/parsing.rb +18 -6
- data/lib/rley/parser/state_set.rb +9 -1
- data/spec/rley/parser/chart_spec.rb +71 -4
- data/spec/rley/parser/earley_parser_spec.rb +33 -0
- data/spec/rley/parser/parse_tracer_spec.rb +193 -0
- data/spec/rley/parser/parsing_spec.rb +28 -11
- metadata +9 -3
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
YmIzNmI1ZmM0N2QyOWM5NmQyYjlmOWRlNzllZmZjMmMxZmNmNmQ4Yg==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
MmJjMDU3ZTMwYTA2NzY1YzJjOWQ3ZDk1MGZjYmFmMGMyMjgzOWZhYQ==
|
7
7
|
!binary "U0hBNTEy":
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
OTgxZTIxZWZhMWRlZTU1ZmVmZDhlYjllOTk2YjYwOTE5NDZjMDgzNzVlMmE3
|
10
|
+
YTIyYzNlNDU3MWE2OTZjM2I4MzAxNzhmMDFjNWU5YmI2N2QyNzQ2NTcxYjg1
|
11
|
+
ZjZkOTU2MWU4ZjM0NWUyMWM5ZDdiNDE1NzM2YTk0NDdlOThhMmE=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
NDRhZmM5Mzc2ZmFhOWI1OTBiNzMwODA0OTE5NTk4ZjUzYzQ0ZjgyOTAzYTg2
|
14
|
+
YTE0YjFjZDRjM2M0NDYwZDk4Nzg3NGM0OTM4NWRjMzk4NTY4Nzg0OTdkNzAx
|
15
|
+
NjgxOTAxMmIyZWFjMzY0Y2M3MTU4NzRhZjA5MzdlMjUzYzdhNmI=
|
@@ -0,0 +1,85 @@
|
|
1
|
+
# Purpose: to demonstrate how to handle parsing errors
|
2
|
+
# and render a parse tree
|
3
|
+
require 'pp' # TODO remove this dependency
|
4
|
+
require 'rley' # Load the gem
|
5
|
+
|
6
|
+
# Steps to render a parse tree (of a valid parsed input):
|
7
|
+
# 1. Define a grammar
|
8
|
+
# 2. Create a tokenizer for the language
|
9
|
+
# 3. Create a parser for that grammar
|
10
|
+
# 4. Tokenize the input
|
11
|
+
# 5. Let the parser process the input
|
12
|
+
# 6. Generate a parse tree from the parse result
|
13
|
+
# 7. Render the parse tree (in JSON)
|
14
|
+
|
15
|
+
########################################
|
16
|
+
# Step 1. Define a grammar for a very simple arithmetic expression language
|
17
|
+
# (based on example in article on Earley's algorithm in Wikipedia)
|
18
|
+
|
19
|
+
# Let's create the grammar piece by piece
|
20
|
+
builder = Rley::Syntax::GrammarBuilder.new
|
21
|
+
builder.add_terminals('+', '*', 'integer')
|
22
|
+
builder.add_production('P' => 'S')
|
23
|
+
builder.add_production('S' => %w(S + M))
|
24
|
+
builder.add_production('S' => 'M')
|
25
|
+
builder.add_production('M' => %w(M * T))
|
26
|
+
builder.add_production('M' => 'T')
|
27
|
+
builder.add_production('T' => 'integer')
|
28
|
+
|
29
|
+
# And now build the grammar...
|
30
|
+
grammar_s_expr = builder.grammar
|
31
|
+
|
32
|
+
|
33
|
+
########################################
|
34
|
+
# 2. Create a tokenizer for the language
|
35
|
+
# The tokenizer transforms the input into an array of tokens
|
36
|
+
def tokenizer(aText, aGrammar)
|
37
|
+
tokens = aText.scan(/\S+/).map do |lexeme|
|
38
|
+
case lexeme
|
39
|
+
when '+', '*'
|
40
|
+
terminal = aGrammar.name2symbol[lexeme]
|
41
|
+
when /^[-+]?\d+$/
|
42
|
+
terminal = aGrammar.name2symbol['integer']
|
43
|
+
else
|
44
|
+
msg = "Unknown input text '#{lexeme}'"
|
45
|
+
fail StandardError, msg
|
46
|
+
end
|
47
|
+
Rley::Parser::Token.new(lexeme, terminal)
|
48
|
+
end
|
49
|
+
|
50
|
+
return tokens
|
51
|
+
end
|
52
|
+
|
53
|
+
########################################
|
54
|
+
# Step 3. Create a parser for that grammar
|
55
|
+
parser = Rley::Parser::EarleyParser.new(grammar_s_expr)
|
56
|
+
|
57
|
+
########################################
|
58
|
+
# Step 3. Tokenize the invalid input
|
59
|
+
invalid_input = '2 + 3 * * 4'
|
60
|
+
tokens = tokenizer(invalid_input, grammar_s_expr)
|
61
|
+
|
62
|
+
########################################
|
63
|
+
# Step 5. Let the parser process the input
|
64
|
+
result = parser.parse(tokens)
|
65
|
+
puts "Parse successful? #{result.success?}"
|
66
|
+
pp result
|
67
|
+
|
68
|
+
########################################
|
69
|
+
# Step 6. Generate a parse tree from the parse result
|
70
|
+
ptree = result.parse_tree
|
71
|
+
pp ptree
|
72
|
+
|
73
|
+
########################################
|
74
|
+
# Step 7. Render the parse tree (in JSON)
|
75
|
+
# Let's create a parse tree visitor
|
76
|
+
visitor = Rley::ParseTreeVisitor.new(ptree)
|
77
|
+
|
78
|
+
#Here we create a renderer object...
|
79
|
+
renderer = Rley::Formatter::Json.new(STDOUT)
|
80
|
+
|
81
|
+
# Now emit the parse tree as JSON on the console output
|
82
|
+
puts "JSON rendering of the parse tree for '#{invalid_input}' input:"
|
83
|
+
renderer.render(visitor)
|
84
|
+
|
85
|
+
# End of file
|
@@ -0,0 +1,99 @@
|
|
1
|
+
# Purpose: to demonstrate how to parse an emblematic ambiguous sentence
|
2
|
+
# Based on example found at: http://www.nltk.org/book_1ed/ch08.html
|
3
|
+
|
4
|
+
require 'pp'
|
5
|
+
require 'rley' # Load the gem
|
6
|
+
|
7
|
+
# Steps to render a parse tree (of a valid parsed input):
|
8
|
+
# 1. Define a grammar
|
9
|
+
# 2. Create a tokenizer for the language
|
10
|
+
# 3. Create a parser for that grammar
|
11
|
+
# 4. Tokenize the input
|
12
|
+
# 5. Let the parser process the input
|
13
|
+
# 6. Generate a parse tree from the parse result
|
14
|
+
# 7. Render the parse tree (in JSON)
|
15
|
+
|
16
|
+
########################################
|
17
|
+
# Step 1. Define a grammar for a micro English-like language
|
18
|
+
# based on Jurafky & Martin L0 language (chapter 12 of the book).
|
19
|
+
# It defines the syntax of a sentence in a language with a
|
20
|
+
# very limited syntax and lexicon in the context of airline reservation.
|
21
|
+
builder = Rley::Syntax::GrammarBuilder.new
|
22
|
+
builder.add_terminals('N', 'V', 'Pro') # N(oun), V(erb), Pro(noun)
|
23
|
+
builder.add_terminals('Det', 'P') # Det(erminer), P(reposition)
|
24
|
+
builder.add_production('S' => %w[NP VP])
|
25
|
+
builder.add_production('NP' => %w[Det N])
|
26
|
+
builder.add_production('NP' => %w[Det N PP])
|
27
|
+
builder.add_production('NP' => 'Pro')
|
28
|
+
builder.add_production('VP' => %w[V NP])
|
29
|
+
builder.add_production('VP' => %w[VP PP])
|
30
|
+
builder.add_production('PP' => %w[P NP])
|
31
|
+
|
32
|
+
# And now build the grammar...
|
33
|
+
groucho_grammar = builder.grammar
|
34
|
+
|
35
|
+
|
36
|
+
########################################
|
37
|
+
# 2. Create a tokenizer for the language
|
38
|
+
# The tokenizer transforms the input into an array of tokens
|
39
|
+
# This is a very simplistic implementation for demo purposes.
|
40
|
+
|
41
|
+
# The lexicon is just a Hash with pairs of the form:
|
42
|
+
# word => terminal symbol name
|
43
|
+
Groucho_lexicon = {
|
44
|
+
'elephant' => 'N',
|
45
|
+
'pajamas' => 'N',
|
46
|
+
'shot' => 'V',
|
47
|
+
'I' => 'Pro',
|
48
|
+
'an' => 'Det',
|
49
|
+
'my' => 'Det',
|
50
|
+
'in' => 'P',
|
51
|
+
}
|
52
|
+
|
53
|
+
# Highly simplified tokenizer implementation.
|
54
|
+
def tokenizer(aText, aGrammar)
|
55
|
+
tokens = aText.scan(/\S+/).map do |word|
|
56
|
+
term_name = Groucho_lexicon[word]
|
57
|
+
if term_name.nil?
|
58
|
+
fail StandardError, "Word '#{word}' not found in lexicon"
|
59
|
+
end
|
60
|
+
terminal = aGrammar.name2symbol[term_name]
|
61
|
+
Rley::Parser::Token.new(word, terminal)
|
62
|
+
end
|
63
|
+
|
64
|
+
return tokens
|
65
|
+
end
|
66
|
+
|
67
|
+
########################################
|
68
|
+
# Step 3. Create a parser for that grammar
|
69
|
+
parser = Rley::Parser::EarleyParser.new(groucho_grammar)
|
70
|
+
|
71
|
+
########################################
|
72
|
+
# Step 3. Tokenize the input
|
73
|
+
valid_input = 'I shot an elephant in my pajamas'
|
74
|
+
tokens = tokenizer(valid_input, groucho_grammar)
|
75
|
+
|
76
|
+
########################################
|
77
|
+
# Step 5. Let the parser process the input
|
78
|
+
result = parser.parse(tokens)
|
79
|
+
|
80
|
+
puts "Parsing success? #{result.success?}"
|
81
|
+
|
82
|
+
#=begin
|
83
|
+
########################################
|
84
|
+
# Step 6. Generate a parse tree from the parse result
|
85
|
+
ptree = result.parse_tree
|
86
|
+
|
87
|
+
########################################
|
88
|
+
# Step 7. Render the parse tree (in JSON)
|
89
|
+
# Let's create a parse tree visitor
|
90
|
+
visitor = Rley::ParseTreeVisitor.new(ptree)
|
91
|
+
|
92
|
+
#Here we create a renderer object...
|
93
|
+
renderer = Rley::Formatter::Json.new(STDOUT)
|
94
|
+
|
95
|
+
# Now emit the parse tree as JSON on the console output
|
96
|
+
puts "JSON rendering of the parse tree for '#{valid_input}' input:"
|
97
|
+
renderer.render(visitor)
|
98
|
+
#=end
|
99
|
+
# End of file
|
@@ -0,0 +1,53 @@
|
|
1
|
+
# Purpose: to use a grammar that causes some Earley parsers to fail.
|
2
|
+
# See: http://stackoverflow.com/questions/22311323/earley-parser-recursion
|
3
|
+
require 'rley' # Load the gem
|
4
|
+
|
5
|
+
# Steps to parse some valid input:
|
6
|
+
# 1. Define a grammar
|
7
|
+
# 2. Create a tokenizer for the language
|
8
|
+
# 3. Create a parser for that grammar
|
9
|
+
# 4. Tokenize the input
|
10
|
+
# 5. Let the parser process the input & trace its progress
|
11
|
+
|
12
|
+
|
13
|
+
########################################
|
14
|
+
# Step 1. Define a grammar that might cause infinite recursion
|
15
|
+
# Let's create the grammar step-by-step with the grammar builder:
|
16
|
+
builder = Rley::Syntax::GrammarBuilder.new
|
17
|
+
builder.add_terminals('ident')
|
18
|
+
builder.add_production('S' => 'E')
|
19
|
+
builder.add_production('E' => ['E', 'E'] )
|
20
|
+
builder.add_production('E' => 'ident')
|
21
|
+
|
22
|
+
# And now build the grammar...
|
23
|
+
grammar_tricky = builder.grammar
|
24
|
+
|
25
|
+
|
26
|
+
########################################
|
27
|
+
# 2. Create a tokenizer for the language
|
28
|
+
# The tokenizer transforms the input into an array of tokens
|
29
|
+
def tokenizer(aText, aGrammar)
|
30
|
+
terminal = aGrammar.name2symbol['ident']
|
31
|
+
|
32
|
+
tokens = aText.chars.map do |ch|
|
33
|
+
Rley::Parser::Token.new(ch, terminal)
|
34
|
+
end
|
35
|
+
|
36
|
+
return tokens
|
37
|
+
end
|
38
|
+
|
39
|
+
########################################
|
40
|
+
# Step 3. Create a parser for that grammar
|
41
|
+
parser = Rley::Parser::EarleyParser.new(grammar_tricky)
|
42
|
+
|
43
|
+
########################################
|
44
|
+
# Step 3. Tokenize the input
|
45
|
+
valid_input = 'abcdefg'
|
46
|
+
tokens = tokenizer(valid_input, grammar_tricky)
|
47
|
+
|
48
|
+
########################################
|
49
|
+
# Step 5. Let the parser process the input, set trace level to 1
|
50
|
+
result = parser.parse(tokens, 1)
|
51
|
+
puts "Parsing success? #{result.success?}"
|
52
|
+
|
53
|
+
# End of file
|
data/lib/rley/constants.rb
CHANGED
data/lib/rley/parser/chart.rb
CHANGED
@@ -7,10 +7,19 @@ module Rley # This module is used as a namespace
|
|
7
7
|
# A one-dimensional array with n + 1 entries (n = number of input tokens).
|
8
8
|
class Chart
|
9
9
|
attr_reader(:state_sets)
|
10
|
+
|
11
|
+
# The level of trace details reported on stdout during the parse.
|
12
|
+
# The possible values are:
|
13
|
+
# 0: No trace output (default case)
|
14
|
+
# 1: Show trace of scanning and completion rules
|
15
|
+
# 2: Same as of 1 with the addition of the prediction rules
|
16
|
+
attr_reader(:tracer)
|
10
17
|
|
11
|
-
|
18
|
+
# @param aTracerLevel [ParseTracer] A tracer object.
|
19
|
+
def initialize(startDottedItem, tokenCount, aTracer)
|
20
|
+
@tracer = aTracer
|
12
21
|
@state_sets = Array.new(tokenCount + 1) { |_| StateSet.new }
|
13
|
-
push_state(startDottedItem, 0, 0)
|
22
|
+
push_state(startDottedItem, 0, 0, :start_rule)
|
14
23
|
end
|
15
24
|
|
16
25
|
# The dotted item/rule used to seed the parse chart.
|
@@ -38,9 +47,23 @@ module Rley # This module is used as a namespace
|
|
38
47
|
end
|
39
48
|
|
40
49
|
# Push a parse state for the chart entry with given index
|
41
|
-
def push_state(aDottedItem, anOrigin, anIndex)
|
50
|
+
def push_state(aDottedItem, anOrigin, anIndex, aReason)
|
42
51
|
new_state = ParseState.new(aDottedItem, anOrigin)
|
43
|
-
self[anIndex].push_state(new_state)
|
52
|
+
pushed = self[anIndex].push_state(new_state)
|
53
|
+
if pushed && tracer.level > 0
|
54
|
+
case aReason
|
55
|
+
when :start_rule, :prediction
|
56
|
+
tracer.trace_prediction(anIndex, new_state)
|
57
|
+
|
58
|
+
when :scanning
|
59
|
+
tracer.trace_scanning(anIndex, new_state)
|
60
|
+
|
61
|
+
when :completion
|
62
|
+
tracer.trace_completion(anIndex, new_state)
|
63
|
+
else
|
64
|
+
raise NotImplementedError, "Unknown push_state mode #{aReason}"
|
65
|
+
end
|
66
|
+
end
|
44
67
|
end
|
45
68
|
end # class
|
46
69
|
end # module
|
@@ -1,4 +1,5 @@
|
|
1
1
|
require_relative '../syntax/grammar'
|
2
|
+
require_relative 'parse_tracer'
|
2
3
|
require_relative 'dotted_item'
|
3
4
|
require_relative 'parsing'
|
4
5
|
|
@@ -19,9 +20,7 @@ module Rley # This module is used as a namespace
|
|
19
20
|
# In other words, the 'next_mapping' allows to find the dotted item
|
20
21
|
# after "advancing" the dot
|
21
22
|
attr_reader(:next_mapping)
|
22
|
-
|
23
|
-
# @param aGrammar [Grammar] The grammar of the language
|
24
|
-
# (to use by the parser).
|
23
|
+
|
25
24
|
def initialize(aGrammar)
|
26
25
|
@grammar = aGrammar
|
27
26
|
@dotted_items = build_dotted_items(grammar)
|
@@ -29,29 +28,57 @@ module Rley # This module is used as a namespace
|
|
29
28
|
@next_mapping = build_next_mapping(dotted_items)
|
30
29
|
end
|
31
30
|
|
31
|
+
=begin
|
32
|
+
You can optionally specify a tracing level, for how much output you
|
33
|
+
want to see:
|
34
|
+
|
35
|
+
0: No output.
|
36
|
+
1: Show edges from scanner and completer rules (not predictor).
|
37
|
+
2 (default): Show all edges as they are added to the chart.
|
38
|
+
|
39
|
+
- For each index I{end} in [0, 1, ..., N]:
|
40
|
+
- For each I{edge} s.t. I{edge}.end = I{end}:
|
41
|
+
- If I{edge} is incomplete, and I{edge}.next is not a part
|
42
|
+
of speech:
|
43
|
+
- Apply PredictorRule to I{edge}
|
44
|
+
- If I{edge} is incomplete, and I{edge}.next is a part of
|
45
|
+
speech:
|
46
|
+
- Apply ScannerRule to I{edge}
|
47
|
+
- If I{edge} is complete:
|
48
|
+
- Apply CompleterRule to I{edge}
|
49
|
+
- Return any complete parses in the chart
|
50
|
+
=end
|
51
|
+
|
32
52
|
# Parse a sequence of input tokens.
|
33
53
|
# @param aTokenSequence [Array] Array of Tokens objects returned by a
|
34
54
|
# tokenizer/scanner/lexer.
|
55
|
+
# @param aGrammar [Grammar] The grammar of the language
|
56
|
+
# (to use by the parser).
|
57
|
+
# @param aTraceLevel [Fixnum] The specified trace level.
|
58
|
+
# The possible values are:
|
59
|
+
# 0: No trace output (default case)
|
60
|
+
# 1: Show trace of scanning and completion rules
|
61
|
+
# 2: Same as of 1 with the addition of the prediction rules
|
35
62
|
# @return [Parsing] an object that embeds the parse results.
|
36
|
-
def parse(aTokenSequence)
|
37
|
-
|
63
|
+
def parse(aTokenSequence, aTraceLevel = 0)
|
64
|
+
tracer = ParseTracer.new(aTraceLevel, $stdout, aTokenSequence)
|
65
|
+
result = Parsing.new(start_dotted_item, aTokenSequence, tracer)
|
38
66
|
last_token_index = aTokenSequence.size
|
39
67
|
(0..last_token_index).each do |i|
|
40
68
|
predicted = Set.new
|
41
69
|
result.chart[i].each do |state|
|
42
|
-
if state.complete?
|
43
|
-
|
44
|
-
completion(result, state, i)
|
70
|
+
if state.complete? # End of production reached?
|
71
|
+
completion(result, state, i, tracer)
|
45
72
|
else
|
46
73
|
next_symbol = state.next_symbol
|
47
74
|
if next_symbol.kind_of?(Syntax::NonTerminal)
|
48
75
|
unless predicted.include? next_symbol
|
49
|
-
prediction(result, state, next_symbol, i)
|
76
|
+
prediction(result, state, next_symbol, i, tracer)
|
50
77
|
predicted << next_symbol # Avoid repeated predictions
|
51
78
|
end
|
52
79
|
elsif i < last_token_index
|
53
80
|
# Expecting a terminal symbol
|
54
|
-
scanning(result, next_symbol, i)
|
81
|
+
scanning(result, next_symbol, i, tracer)
|
55
82
|
end
|
56
83
|
end
|
57
84
|
end
|
@@ -134,18 +161,21 @@ module Rley # This module is used as a namespace
|
|
134
161
|
# immediately follows a dot
|
135
162
|
# (= is expected/predicted by the production rule)
|
136
163
|
# @param aPosition [Fixnum] position in the input token sequence.
|
137
|
-
def prediction(aParsing, aState, aNonTerminal, aPosition)
|
164
|
+
def prediction(aParsing, aState, aNonTerminal, aPosition, aTracer)
|
165
|
+
if aTracer.level > 1
|
166
|
+
puts "Chart[#{aPosition}] Prediction(s) from #{aState}:"
|
167
|
+
end
|
138
168
|
# Retrieve all start dotted items for productions
|
139
169
|
# with aNonTerminal as its lhs
|
140
170
|
items = start_mapping[aNonTerminal]
|
141
171
|
items.each do |an_item|
|
142
|
-
aParsing.push_state(an_item, aPosition, aPosition)
|
172
|
+
aParsing.push_state(an_item, aPosition, aPosition, :prediction)
|
143
173
|
end
|
144
174
|
|
145
175
|
return unless aNonTerminal.nullable?
|
146
176
|
# Ayock-Horspool trick for nullable rules
|
147
177
|
next_item = next_mapping[aState.dotted_rule]
|
148
|
-
aParsing.push_state(next_item, aState.origin, aPosition)
|
178
|
+
aParsing.push_state(next_item, aState.origin, aPosition, :prediction)
|
149
179
|
end
|
150
180
|
|
151
181
|
# This method is called when a parse state for chart entry at position
|
@@ -162,7 +192,12 @@ module Rley # This module is used as a namespace
|
|
162
192
|
# @param aTerminal [Terminal] a terminal symbol that
|
163
193
|
# immediately follows a dot
|
164
194
|
# @param aPosition [Fixnum] position in the input token sequence.
|
165
|
-
def scanning(aParsing, aTerminal, aPosition)
|
195
|
+
def scanning(aParsing, aTerminal, aPosition, aTracer)
|
196
|
+
if aTracer.level > 1
|
197
|
+
prefix = "Chart[#{aPosition}] Scanning of terminal "
|
198
|
+
suffix = "#{aTerminal.name}:"
|
199
|
+
puts prefix + suffix
|
200
|
+
end
|
166
201
|
aParsing.scanning(aTerminal, aPosition) do |item|
|
167
202
|
next_mapping[item]
|
168
203
|
end
|
@@ -173,11 +208,14 @@ module Rley # This module is used as a namespace
|
|
173
208
|
# For every state in chart[aPosition] that is
|
174
209
|
# complete (i.e. of the form: { dotted_rule: X -> γ •, origin: j}),
|
175
210
|
# Find states s in chart[j] of the
|
176
|
-
# form {dotted_rule: Y -> α • X β, origin: i}
|
211
|
+
# form { dotted_rule: Y -> α • X β, origin: i}
|
177
212
|
# In other words, rules that predicted the non-terminal X.
|
178
213
|
# For each s, add to chart[aPosition] a state of the form
|
179
214
|
# { dotted_rule: Y → α X • β, origin: i})
|
180
|
-
def completion(aParsing, aState, aPosition)
|
215
|
+
def completion(aParsing, aState, aPosition, aTracer)
|
216
|
+
if aTracer.level > 1
|
217
|
+
puts "Chart[#{aPosition}] Completion of state #{aState}:"
|
218
|
+
end
|
181
219
|
aParsing.completion(aState, aPosition) do |item|
|
182
220
|
next_mapping[item]
|
183
221
|
end
|
@@ -25,6 +25,7 @@ module Rley # This module is used as a namespace
|
|
25
25
|
|
26
26
|
# Write accessor. Set the given parse state as the current one.
|
27
27
|
def parse_state=(aParseState)
|
28
|
+
fail StandardError, "Nil parse state" if aParseState.nil?
|
28
29
|
@parse_state = aParseState
|
29
30
|
processed_states[parse_state] = true
|
30
31
|
end
|
@@ -0,0 +1,100 @@
|
|
1
|
+
require 'ostruct'
|
2
|
+
|
3
|
+
module Rley # This module is used as a namespace
|
4
|
+
module Parser # This module is used as a namespace
|
5
|
+
# Utility class used to trace the parsing of a token sequence.
|
6
|
+
class ParseTracer
|
7
|
+
# The stream where the trace output is sent
|
8
|
+
attr_reader(:ostream)
|
9
|
+
|
10
|
+
# The trace level
|
11
|
+
attr_reader(:level)
|
12
|
+
|
13
|
+
attr_reader(:lexemes)
|
14
|
+
|
15
|
+
attr_reader(:col_width)
|
16
|
+
|
17
|
+
def initialize(aTraceLevel, anIO, aTokenSequence)
|
18
|
+
@level = aTraceLevel <= 0 ? 0 : [aTraceLevel, 2].min
|
19
|
+
@ostream = anIO
|
20
|
+
@lexemes = aTokenSequence.map(&:lexeme)
|
21
|
+
|
22
|
+
emit_tokens
|
23
|
+
emit_heading
|
24
|
+
end
|
25
|
+
|
26
|
+
# Emit the trace text to the output IO
|
27
|
+
# if the given trace level is equal or greater to the
|
28
|
+
# trace level of the tracer instance.
|
29
|
+
def print_if(aLevel, text)
|
30
|
+
ostream.print(text) if level >= aLevel
|
31
|
+
end
|
32
|
+
|
33
|
+
# Emit the trace of a scanning step.
|
34
|
+
def trace_scanning(aStatesetIndex, aParseState)
|
35
|
+
return unless level
|
36
|
+
|
37
|
+
scan_picture = '[' + '-' * (col_width-1) + ']'
|
38
|
+
org = OpenStruct.new(origin: aStatesetIndex - 1,
|
39
|
+
dotted_rule: aParseState.dotted_rule)
|
40
|
+
trace_diagram(aStatesetIndex, org, scan_picture)
|
41
|
+
end
|
42
|
+
|
43
|
+
def trace_prediction(aStatesetIndex, aParseState)
|
44
|
+
return unless level
|
45
|
+
|
46
|
+
trace_diagram(aStatesetIndex, aParseState, '>')
|
47
|
+
end
|
48
|
+
|
49
|
+
def trace_completion(aStatesetIndex, aParseState)
|
50
|
+
return unless level
|
51
|
+
|
52
|
+
if aStatesetIndex == lexemes.size && aParseState.origin == 0 && aParseState.complete?
|
53
|
+
picture = '=' * (col_width * lexemes.size - 1)
|
54
|
+
else
|
55
|
+
count = col_width * (aStatesetIndex - aParseState.origin) - 1
|
56
|
+
picture = '-' * count
|
57
|
+
end
|
58
|
+
completion_picture = '[' + picture + (aParseState.complete? ? ']' : '>')
|
59
|
+
trace_diagram(aStatesetIndex, aParseState, completion_picture)
|
60
|
+
end
|
61
|
+
|
62
|
+
private
|
63
|
+
|
64
|
+
def emit_tokens()
|
65
|
+
literals = lexemes.map { |lx| "'#{lx}'" }
|
66
|
+
print_if 1, '[' + literals.join(', ') + "]\n"
|
67
|
+
end
|
68
|
+
|
69
|
+
def emit_heading()
|
70
|
+
longest = lexemes.map(&:length).max
|
71
|
+
@col_width = longest + 3
|
72
|
+
headers = lexemes.map { |l| "#{l.center(col_width-1, ' ')}" }
|
73
|
+
print_if 1, '|.' + headers.join('.') + ".|\n"
|
74
|
+
end
|
75
|
+
|
76
|
+
def padding(aStatesetIndex, aParseState, aPicture)
|
77
|
+
l_pad_pattern = '.' + ' ' * (col_width-1)
|
78
|
+
left_padding = l_pad_pattern * ([0, aParseState.origin].max)
|
79
|
+
r_pad_pattern = ' ' * (col_width-1) + '.'
|
80
|
+
right_padding = r_pad_pattern * (lexemes.size - aStatesetIndex)
|
81
|
+
return left_padding + aPicture + right_padding
|
82
|
+
end
|
83
|
+
|
84
|
+
def parse_state_str(aStatesetIndex, aParseState)
|
85
|
+
"[#{aParseState.origin}:#{aStatesetIndex}] #{aParseState.dotted_rule}"
|
86
|
+
end
|
87
|
+
|
88
|
+
def trace_diagram(aStatesetIndex, aParseState, aPicture)
|
89
|
+
diagram = padding(aStatesetIndex, aParseState, aPicture)
|
90
|
+
prefix = '|'
|
91
|
+
suffix = '| ' + parse_state_str(aStatesetIndex, aParseState)
|
92
|
+
trace = prefix + diagram + suffix
|
93
|
+
|
94
|
+
print_if 1, trace + "\n"
|
95
|
+
end
|
96
|
+
end # class
|
97
|
+
end # module
|
98
|
+
end # module
|
99
|
+
|
100
|
+
# End of file
|
data/lib/rley/parser/parsing.rb
CHANGED
@@ -11,9 +11,14 @@ module Rley # This module is used as a namespace
|
|
11
11
|
# The sequence of input token to parse
|
12
12
|
attr_reader(:tokens)
|
13
13
|
|
14
|
-
|
14
|
+
# @param aTraceLevel [Fixnum] The specified trace level.
|
15
|
+
# The possible values are:
|
16
|
+
# 0: No trace output (default case)
|
17
|
+
# 1: Show trace of scanning and completion rules
|
18
|
+
# 2: Same as of 1 with the addition of the prediction rules
|
19
|
+
def initialize(startDottedRule, theTokens, aTracer)
|
15
20
|
@tokens = theTokens.dup
|
16
|
-
@chart = Chart.new(startDottedRule, tokens.size)
|
21
|
+
@chart = Chart.new(startDottedRule, tokens.size, aTracer)
|
17
22
|
end
|
18
23
|
|
19
24
|
# Return true if the parse was successful (= input tokens
|
@@ -36,6 +41,13 @@ module Rley # This module is used as a namespace
|
|
36
41
|
builder = tree_builder(state_tracker.state_set_index)
|
37
42
|
|
38
43
|
loop do
|
44
|
+
match_symbol = state_tracker.symbol_on_left
|
45
|
+
# puts '--------------------'
|
46
|
+
# puts "Active parse state: #{state_tracker.parse_state}"
|
47
|
+
# puts "Matching symbol: #{match_symbol}"
|
48
|
+
# puts 'Parse tree:'
|
49
|
+
# puts builder.root.to_string(0)
|
50
|
+
|
39
51
|
# Place the symbol on left of the dot in the parse tree
|
40
52
|
done = insert_matched_symbol(state_tracker, builder)
|
41
53
|
break if done
|
@@ -47,9 +59,9 @@ module Rley # This module is used as a namespace
|
|
47
59
|
|
48
60
|
# Push a parse state (dotted item + origin) to the
|
49
61
|
# chart entry with given index if it isn't yet in the chart entry.
|
50
|
-
def push_state(aDottedItem, anOrigin, aChartIndex)
|
62
|
+
def push_state(aDottedItem, anOrigin, aChartIndex, aReason)
|
51
63
|
fail StandardError, 'Dotted item may not be nil' if aDottedItem.nil?
|
52
|
-
chart.push_state(aDottedItem, anOrigin, aChartIndex)
|
64
|
+
chart.push_state(aDottedItem, anOrigin, aChartIndex, aReason)
|
53
65
|
end
|
54
66
|
|
55
67
|
|
@@ -74,7 +86,7 @@ module Rley # This module is used as a namespace
|
|
74
86
|
states = states_expecting(aTerminal, aPosition, false)
|
75
87
|
states.each do |s|
|
76
88
|
next_item = nextMapping.call(s.dotted_rule)
|
77
|
-
push_state(next_item, s.origin, aPosition + 1)
|
89
|
+
push_state(next_item, s.origin, aPosition + 1, :scanning)
|
78
90
|
end
|
79
91
|
end
|
80
92
|
|
@@ -95,7 +107,7 @@ module Rley # This module is used as a namespace
|
|
95
107
|
states = states_expecting(curr_lhs, curr_origin, false)
|
96
108
|
states.each do |s|
|
97
109
|
next_item = nextMapping.call(s.dotted_rule)
|
98
|
-
push_state(next_item, s.origin, aPosition)
|
110
|
+
push_state(next_item, s.origin, aPosition, :completion)
|
99
111
|
end
|
100
112
|
end
|
101
113
|
|
@@ -17,8 +17,16 @@ module Rley # This module is used as a namespace
|
|
17
17
|
# Append the given state (if it isn't yet in the set)
|
18
18
|
# to the list of states
|
19
19
|
# @param aState [ParseState] the state to push.
|
20
|
+
# @return [TrueClass/FalseClass] true when the state is really added
|
20
21
|
def push_state(aState)
|
21
|
-
|
22
|
+
if include?(aState)
|
23
|
+
result = false
|
24
|
+
else
|
25
|
+
@states << aState
|
26
|
+
result = true
|
27
|
+
end
|
28
|
+
|
29
|
+
return result
|
22
30
|
end
|
23
31
|
|
24
32
|
# The list of ParseState that expect the given symbol.
|
@@ -1,5 +1,13 @@
|
|
1
1
|
require_relative '../../spec_helper'
|
2
|
+
require 'stringio'
|
2
3
|
|
4
|
+
require_relative '../../../lib/rley/syntax/terminal'
|
5
|
+
require_relative '../../../lib/rley/syntax/non_terminal'
|
6
|
+
require_relative '../../../lib/rley/syntax/production'
|
7
|
+
require_relative '../../../lib/rley/parser/token'
|
8
|
+
require_relative '../../../lib/rley/parser/dotted_item'
|
9
|
+
require_relative '../../../lib/rley/parser/parse_state'
|
10
|
+
require_relative '../../../lib/rley/parser/parse_tracer'
|
3
11
|
|
4
12
|
# Load the class under test
|
5
13
|
require_relative '../../../lib/rley/parser/chart'
|
@@ -10,12 +18,21 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
10
18
|
let(:count_token) { 20 }
|
11
19
|
let(:dotted_rule) { double('fake-dotted-item') }
|
12
20
|
|
21
|
+
let(:output) { StringIO.new('', 'w') }
|
22
|
+
|
23
|
+
let(:token_seq) do
|
24
|
+
literals = ['I', 'saw', 'John', 'with', 'a', 'dog']
|
25
|
+
literals.map {|lexeme| Token.new(lexeme, nil)}
|
26
|
+
end
|
27
|
+
|
28
|
+
let(:sample_tracer) { ParseTracer.new(0, output, token_seq) }
|
29
|
+
|
13
30
|
context 'Initialization:' do
|
14
31
|
# Default instantiation rule
|
15
|
-
subject { Chart.new(dotted_rule, count_token) }
|
32
|
+
subject { Chart.new(dotted_rule, count_token, sample_tracer) }
|
16
33
|
|
17
|
-
it 'should be created with
|
18
|
-
expect { Chart.new(dotted_rule, count_token) }.not_to raise_error
|
34
|
+
it 'should be created with start dotted rule, token count, tracer' do
|
35
|
+
expect { Chart.new(dotted_rule, count_token, sample_tracer) }.not_to raise_error
|
19
36
|
end
|
20
37
|
|
21
38
|
it 'should have a seed state in first state_set' do
|
@@ -33,10 +50,60 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
33
50
|
it 'should know the start dotted rule' do
|
34
51
|
expect(subject.start_dotted_rule).to eq(dotted_rule)
|
35
52
|
end
|
36
|
-
|
53
|
+
|
37
54
|
it 'should have at least one non-empty state set' do
|
38
55
|
expect(subject.last_index).to eq(0)
|
39
56
|
end
|
57
|
+
|
58
|
+
it 'should reference a tracer' do
|
59
|
+
expect(subject.tracer).to eq(sample_tracer)
|
60
|
+
end
|
61
|
+
end # context
|
62
|
+
|
63
|
+
context 'Provided services:' do
|
64
|
+
let(:t_a) { Syntax::Terminal.new('A') }
|
65
|
+
let(:t_b) { Syntax::Terminal.new('B') }
|
66
|
+
let(:t_c) { Syntax::Terminal.new('C') }
|
67
|
+
let(:nt_sentence) { Syntax::NonTerminal.new('sentence') }
|
68
|
+
|
69
|
+
let(:sample_prod) do
|
70
|
+
Syntax::Production.new(nt_sentence, [t_a, t_b, t_c])
|
71
|
+
end
|
72
|
+
|
73
|
+
let(:origin_val) { 3 }
|
74
|
+
let(:dotted_rule) { DottedItem.new(sample_prod, 2) }
|
75
|
+
let(:complete_rule) { DottedItem.new(sample_prod, 3) }
|
76
|
+
let(:sample_parse_state) { ParseState.new(dotted_rule, origin_val) }
|
77
|
+
let(:sample_tracer) { ParseTracer.new(1, output, token_seq) }
|
78
|
+
|
79
|
+
# Factory method.
|
80
|
+
def parse_state(origin, aDottedRule)
|
81
|
+
ParseState.new(aDottedRule, origin)
|
82
|
+
end
|
83
|
+
|
84
|
+
subject { Chart.new(dotted_rule, count_token, sample_tracer) }
|
85
|
+
|
86
|
+
it 'should trace its initialization' do
|
87
|
+
subject[0] # Force constructor call here
|
88
|
+
expectation = <<-SNIPPET
|
89
|
+
['I', 'saw', 'John', 'with', 'a', 'dog']
|
90
|
+
|. I . saw . John . with . a . dog .|
|
91
|
+
|> . . . . . .| [0:0] sentence => A B . C
|
92
|
+
SNIPPET
|
93
|
+
expect(output.string).to eq(expectation)
|
94
|
+
|
95
|
+
end
|
96
|
+
|
97
|
+
it 'should trace parse state pushing' do
|
98
|
+
subject[0] # Force constructor call here
|
99
|
+
output.string = ''
|
100
|
+
|
101
|
+
subject.push_state(dotted_rule, 3, 5, :prediction)
|
102
|
+
expectation = <<-SNIPPET
|
103
|
+
|. . . > .| [3:5] sentence => A B . C
|
104
|
+
SNIPPET
|
105
|
+
expect(output.string).to eq(expectation)
|
106
|
+
end
|
40
107
|
end # context
|
41
108
|
end # describe
|
42
109
|
end # module
|
@@ -1,4 +1,5 @@
|
|
1
1
|
require_relative '../../spec_helper'
|
2
|
+
require 'stringio'
|
2
3
|
|
3
4
|
require_relative '../../../lib/rley/syntax/verbatim_symbol'
|
4
5
|
require_relative '../../../lib/rley/syntax/non_terminal'
|
@@ -226,6 +227,38 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
226
227
|
compare_state_texts(state_set_5, expected)
|
227
228
|
end
|
228
229
|
|
230
|
+
it 'should trace a parse with level 1' do
|
231
|
+
# Substitute temporarily $stdout by a StringIO
|
232
|
+
prev_ostream = $stdout
|
233
|
+
$stdout = StringIO.new('', 'w')
|
234
|
+
|
235
|
+
trace_level = 1
|
236
|
+
parse_result = subject.parse(grm1_tokens, trace_level)
|
237
|
+
expectations = <<-SNIPPET
|
238
|
+
['a', 'a', 'b', 'c', 'c']
|
239
|
+
|. a . a . b . c . c .|
|
240
|
+
|> . . . . .| [0:0] S => . A
|
241
|
+
|> . . . . .| [0:0] A => . 'a' A 'c'
|
242
|
+
|> . . . . .| [0:0] A => . 'b'
|
243
|
+
|[---] . . . .| [0:1] A => 'a' . A 'c'
|
244
|
+
|. > . . . .| [1:1] A => . 'a' A 'c'
|
245
|
+
|. > . . . .| [1:1] A => . 'b'
|
246
|
+
|. [---] . . .| [1:2] A => 'a' . A 'c'
|
247
|
+
|. . > . . .| [2:2] A => . 'a' A 'c'
|
248
|
+
|. . > . . .| [2:2] A => . 'b'
|
249
|
+
|. . [---] . .| [2:3] A => 'b' .
|
250
|
+
|. [-------> . .| [1:3] A => 'a' A . 'c'
|
251
|
+
|. . . [---] .| [3:4] A => 'a' A 'c' .
|
252
|
+
|[---------------> .| [0:4] A => 'a' A . 'c'
|
253
|
+
|. . . . [---]| [4:5] A => 'a' A 'c' .
|
254
|
+
|[===================]| [0:5] S => A .
|
255
|
+
SNIPPET
|
256
|
+
expect($stdout.string).to eq(expectations)
|
257
|
+
|
258
|
+
# Restore standard ouput stream
|
259
|
+
$stdout = prev_ostream
|
260
|
+
end
|
261
|
+
|
229
262
|
it 'should parse a valid simple expression' do
|
230
263
|
instance = EarleyParser.new(grammar_expr)
|
231
264
|
parse_result = instance.parse(grm2_tokens)
|
@@ -0,0 +1,193 @@
|
|
1
|
+
require_relative '../../spec_helper'
|
2
|
+
require 'stringio'
|
3
|
+
|
4
|
+
require_relative '../../../lib/rley/syntax/terminal'
|
5
|
+
require_relative '../../../lib/rley/syntax/non_terminal'
|
6
|
+
require_relative '../../../lib/rley/syntax/production'
|
7
|
+
require_relative '../../../lib/rley/parser/dotted_item'
|
8
|
+
require_relative '../../../lib/rley/parser/parse_state'
|
9
|
+
require_relative '../../../lib/rley/parser/token'
|
10
|
+
|
11
|
+
# Load the class under test
|
12
|
+
require_relative '../../../lib/rley/parser/parse_tracer'
|
13
|
+
|
14
|
+
module Rley # Open this namespace to avoid module qualifier prefixes
|
15
|
+
module Parser # Open this namespace to avoid module qualifier prefixes
|
16
|
+
describe ParseTracer do
|
17
|
+
let(:output) { StringIO.new('', 'w') }
|
18
|
+
|
19
|
+
let(:token_seq) do
|
20
|
+
literals = ['I', 'saw', 'John', 'with', 'a', 'dog']
|
21
|
+
literals.map {|lexeme| Token.new(lexeme, nil)}
|
22
|
+
end
|
23
|
+
|
24
|
+
subject { ParseTracer.new(1, output, token_seq) }
|
25
|
+
|
26
|
+
context 'Creation & initialization:' do
|
27
|
+
it 'should accept trace level 0' do
|
28
|
+
expect { ParseTracer.new(0, output, token_seq) }.not_to raise_error
|
29
|
+
expect(output.string).to eq('')
|
30
|
+
end
|
31
|
+
|
32
|
+
# |. I . saw . John . with . a . dog .|
|
33
|
+
|
34
|
+
it 'should accept trace level 1' do
|
35
|
+
expect { ParseTracer.new(1, output, token_seq) }.not_to raise_error
|
36
|
+
expectations = <<-SNIPPET
|
37
|
+
['I', 'saw', 'John', 'with', 'a', 'dog']
|
38
|
+
|. I . saw . John . with . a . dog .|
|
39
|
+
SNIPPET
|
40
|
+
expect(output.string).to eq(expectations)
|
41
|
+
end
|
42
|
+
|
43
|
+
it 'should accept trace level 2' do
|
44
|
+
expect { ParseTracer.new(2, output, token_seq) }.not_to raise_error
|
45
|
+
expectations = <<-SNIPPET
|
46
|
+
['I', 'saw', 'John', 'with', 'a', 'dog']
|
47
|
+
|. I . saw . John . with . a . dog .|
|
48
|
+
SNIPPET
|
49
|
+
expect(output.string).to eq(expectations)
|
50
|
+
end
|
51
|
+
|
52
|
+
it 'should know the trace level' do
|
53
|
+
expect(subject.level).to eq(1)
|
54
|
+
end
|
55
|
+
|
56
|
+
it 'should know the output stream' do
|
57
|
+
expect(subject.ostream).to eq(output)
|
58
|
+
end
|
59
|
+
end # context
|
60
|
+
|
61
|
+
context 'Provided services:' do
|
62
|
+
let(:t_a) { Syntax::Terminal.new('A') }
|
63
|
+
let(:t_b) { Syntax::Terminal.new('B') }
|
64
|
+
let(:t_c) { Syntax::Terminal.new('C') }
|
65
|
+
let(:nt_sentence) { Syntax::NonTerminal.new('sentence') }
|
66
|
+
|
67
|
+
let(:sample_prod) do
|
68
|
+
Syntax::Production.new(nt_sentence, [t_a, t_b, t_c])
|
69
|
+
end
|
70
|
+
|
71
|
+
let(:origin_val) { 3 }
|
72
|
+
let(:dotted_rule) { DottedItem.new(sample_prod, 2) }
|
73
|
+
let(:complete_rule) { DottedItem.new(sample_prod, 3) }
|
74
|
+
let(:sample_parse_state) { ParseState.new(dotted_rule, origin_val) }
|
75
|
+
|
76
|
+
# Factory method.
|
77
|
+
def parse_state(origin, aDottedRule)
|
78
|
+
ParseState.new(aDottedRule, origin)
|
79
|
+
end
|
80
|
+
|
81
|
+
it 'should render a scanning step' do
|
82
|
+
# Case: token at the beginning
|
83
|
+
subject.ostream.string = ''
|
84
|
+
subject.trace_scanning(1, parse_state(0, dotted_rule))
|
85
|
+
expectations = <<-SNIPPET
|
86
|
+
|[------] . . . . .| [0:1] sentence => A B . C
|
87
|
+
SNIPPET
|
88
|
+
|
89
|
+
# Case: token in the middle
|
90
|
+
subject.ostream.string = ''
|
91
|
+
subject.trace_scanning(4, sample_parse_state)
|
92
|
+
expectations = <<-SNIPPET
|
93
|
+
|. . . [------] . .| [3:4] sentence => A B . C
|
94
|
+
SNIPPET
|
95
|
+
|
96
|
+
# Case: token at the end
|
97
|
+
subject.ostream.string = ''
|
98
|
+
subject.trace_scanning(6, parse_state(5, dotted_rule))
|
99
|
+
expectations = <<-SNIPPET
|
100
|
+
|. . . . . [------]| [5:6] sentence => A B . C
|
101
|
+
SNIPPET
|
102
|
+
end
|
103
|
+
|
104
|
+
|
105
|
+
it 'should render a prediction step' do
|
106
|
+
# Case: initial stateset
|
107
|
+
subject.ostream.string = ''
|
108
|
+
subject.trace_prediction(0, parse_state(0, dotted_rule))
|
109
|
+
expectations = <<-SNIPPET
|
110
|
+
|> . . . . . .| [0:0] sentence => A B . C
|
111
|
+
SNIPPET
|
112
|
+
expect(output.string).to eq(expectations)
|
113
|
+
|
114
|
+
# Case: stateset in the middle
|
115
|
+
subject.ostream.string = ''
|
116
|
+
subject.trace_prediction(3, sample_parse_state)
|
117
|
+
expectations = <<-SNIPPET
|
118
|
+
|. . . > . . .| [3:3] sentence => A B . C
|
119
|
+
SNIPPET
|
120
|
+
expect(output.string).to eq(expectations)
|
121
|
+
|
122
|
+
# Case: final stateset
|
123
|
+
subject.ostream.string = ''
|
124
|
+
subject.trace_prediction(6, parse_state(6, dotted_rule))
|
125
|
+
expectations = <<-SNIPPET
|
126
|
+
|. . . . . . >| [6:6] sentence => A B . C
|
127
|
+
SNIPPET
|
128
|
+
expect(output.string).to eq(expectations)
|
129
|
+
end
|
130
|
+
|
131
|
+
it 'should render a completion step' do
|
132
|
+
# Case: full parse completed
|
133
|
+
subject.ostream.string = ''
|
134
|
+
subject.trace_completion(6, parse_state(0, complete_rule))
|
135
|
+
expectations = <<-SNIPPET
|
136
|
+
|[=========================================]| [0:6] sentence => A B C .
|
137
|
+
SNIPPET
|
138
|
+
expect(output.string).to eq(expectations)
|
139
|
+
|
140
|
+
# Case: step at the start (complete)
|
141
|
+
subject.ostream.string = ''
|
142
|
+
subject.trace_completion(1, parse_state(0, complete_rule))
|
143
|
+
expectations = <<-SNIPPET
|
144
|
+
|[------] . . . . .| [0:1] sentence => A B C .
|
145
|
+
SNIPPET
|
146
|
+
expect(output.string).to eq(expectations)
|
147
|
+
|
148
|
+
# Case: step at the start (not complete)
|
149
|
+
subject.ostream.string = ''
|
150
|
+
subject.trace_completion(1, parse_state(0, dotted_rule))
|
151
|
+
expectations = <<-SNIPPET
|
152
|
+
|[------> . . . . .| [0:1] sentence => A B . C
|
153
|
+
SNIPPET
|
154
|
+
expect(output.string).to eq(expectations)
|
155
|
+
|
156
|
+
# Case: step at the middle (complete)
|
157
|
+
subject.ostream.string = ''
|
158
|
+
subject.trace_completion(4, parse_state(2, complete_rule))
|
159
|
+
expectations = <<-SNIPPET
|
160
|
+
|. . [-------------] . .| [2:4] sentence => A B C .
|
161
|
+
SNIPPET
|
162
|
+
expect(output.string).to eq(expectations)
|
163
|
+
|
164
|
+
# Case: step at the middle (not complete)
|
165
|
+
subject.ostream.string = ''
|
166
|
+
subject.trace_completion(4, parse_state(2, dotted_rule))
|
167
|
+
expectations = <<-SNIPPET
|
168
|
+
|. . [-------------> . .| [2:4] sentence => A B . C
|
169
|
+
SNIPPET
|
170
|
+
expect(output.string).to eq(expectations)
|
171
|
+
|
172
|
+
# Case: step at the end (complete)
|
173
|
+
subject.ostream.string = ''
|
174
|
+
subject.trace_completion(6, parse_state(3, complete_rule))
|
175
|
+
expectations = <<-SNIPPET
|
176
|
+
|. . . [--------------------]| [3:6] sentence => A B C .
|
177
|
+
SNIPPET
|
178
|
+
expect(output.string).to eq(expectations)
|
179
|
+
|
180
|
+
# Case: step at the end (not complete)
|
181
|
+
subject.ostream.string = ''
|
182
|
+
subject.trace_completion(6, parse_state(3, dotted_rule))
|
183
|
+
expectations = <<-SNIPPET
|
184
|
+
|. . . [-------------------->| [3:6] sentence => A B . C
|
185
|
+
SNIPPET
|
186
|
+
expect(output.string).to eq(expectations)
|
187
|
+
end
|
188
|
+
end # context
|
189
|
+
end # describe
|
190
|
+
end # module
|
191
|
+
end # module
|
192
|
+
|
193
|
+
# End of file
|
@@ -1,4 +1,5 @@
|
|
1
1
|
require_relative '../../spec_helper'
|
2
|
+
require 'stringio'
|
2
3
|
|
3
4
|
require_relative '../../../lib/rley/syntax/non_terminal'
|
4
5
|
require_relative '../../../lib/rley/syntax/verbatim_symbol'
|
@@ -6,6 +7,7 @@ require_relative '../../../lib/rley/syntax/production'
|
|
6
7
|
require_relative '../../../lib/rley/syntax/grammar_builder'
|
7
8
|
require_relative '../../../lib/rley/parser/dotted_item'
|
8
9
|
require_relative '../../../lib/rley/parser/token'
|
10
|
+
require_relative '../../../lib/rley/parser/parse_tracer'
|
9
11
|
require_relative '../../../lib/rley/parser/earley_parser'
|
10
12
|
require_relative '../support/grammar_abc_helper'
|
11
13
|
require_relative '../support/grammar_b_expr_helper'
|
@@ -48,15 +50,18 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
48
50
|
|
49
51
|
|
50
52
|
let(:start_dotted_rule) { DottedItem.new(prod_S, 0) }
|
53
|
+
let(:output) { StringIO.new('', 'w') }
|
54
|
+
let(:sample_tracer) { ParseTracer.new(0, output, grm1_tokens) }
|
51
55
|
|
52
56
|
# Default instantiation rule
|
53
|
-
subject { Parsing.new(start_dotted_rule, grm1_tokens) }
|
57
|
+
subject { Parsing.new(start_dotted_rule, grm1_tokens, sample_tracer) }
|
54
58
|
|
55
59
|
context 'Initialization:' do
|
56
|
-
it 'should be created with list of tokens
|
60
|
+
it 'should be created with list of tokens, start dotted rule, trace' do
|
57
61
|
start_rule = start_dotted_rule
|
58
62
|
tokens = grm1_tokens
|
59
|
-
|
63
|
+
tracer = sample_tracer
|
64
|
+
expect { Parsing.new(start_rule, tokens, tracer) }.not_to raise_error
|
60
65
|
end
|
61
66
|
|
62
67
|
it 'should know the input tokens' do
|
@@ -66,6 +71,17 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
66
71
|
it 'should know its chart object' do
|
67
72
|
expect(subject.chart).to be_kind_of(Chart)
|
68
73
|
end
|
74
|
+
|
75
|
+
it 'should emit trace level 1 info' do
|
76
|
+
tracer = ParseTracer.new(1, output, grm1_tokens)
|
77
|
+
instance = Parsing.new(start_dotted_rule, grm1_tokens, tracer)
|
78
|
+
expectations = <<-SNIPPET
|
79
|
+
['a', 'a', 'b', 'c', 'c']
|
80
|
+
|. a . a . b . c . c .|
|
81
|
+
|> . . . . .| [0:0] S => . A
|
82
|
+
SNIPPET
|
83
|
+
expect(output.string).to eq(expectations)
|
84
|
+
end
|
69
85
|
end # context
|
70
86
|
|
71
87
|
context 'Parsing:' do
|
@@ -73,27 +89,27 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
73
89
|
expect(subject.chart[1]).to be_empty
|
74
90
|
item = DottedItem.new(prod_A1, 1)
|
75
91
|
|
76
|
-
subject.push_state(item, 1, 1)
|
92
|
+
subject.push_state(item, 1, 1, :scanning)
|
77
93
|
expect(subject.chart[1]).not_to be_empty
|
78
94
|
expect(subject.chart[1].first.dotted_rule).to eq(item)
|
79
95
|
|
80
96
|
# Pushing twice the same state must be no-op
|
81
|
-
subject.push_state(item, 1, 1)
|
97
|
+
subject.push_state(item, 1, 1, :scanning)
|
82
98
|
expect(subject.chart[1].size).to eq(1)
|
83
99
|
end
|
84
100
|
|
85
101
|
it 'should complain when trying to push a nil dotted item' do
|
86
102
|
err = StandardError
|
87
103
|
msg = 'Dotted item may not be nil'
|
88
|
-
expect
|
104
|
+
expect{ subject.push_state(nil, 1, 1, :prediction) }.to raise_error(err, msg)
|
89
105
|
end
|
90
106
|
|
91
107
|
|
92
108
|
it 'should retrieve the parse states that expect a given terminal' do
|
93
109
|
item1 = DottedItem.new(prod_A1, 2)
|
94
110
|
item2 = DottedItem.new(prod_A1, 1)
|
95
|
-
subject.push_state(item1, 2, 2)
|
96
|
-
subject.push_state(item2, 2, 2)
|
111
|
+
subject.push_state(item1, 2, 2, :scanning)
|
112
|
+
subject.push_state(item2, 2, 2, :scanning)
|
97
113
|
states = subject.states_expecting(c_, 2, false)
|
98
114
|
expect(states.size).to eq(1)
|
99
115
|
expect(states[0].dotted_rule).to eq(item1)
|
@@ -106,8 +122,8 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
106
122
|
|
107
123
|
item1 = DottedItem.new(prod_A1, 0)
|
108
124
|
item2 = DottedItem.new(prod_A2, 0)
|
109
|
-
subject.push_state(item1, 0, 0)
|
110
|
-
subject.push_state(item2, 0, 0)
|
125
|
+
subject.push_state(item1, 0, 0, :completion)
|
126
|
+
subject.push_state(item2, 0, 0, :completion)
|
111
127
|
subject.scanning(a_, 0) { |i| i } # Code block is mock
|
112
128
|
|
113
129
|
# Expected side effect: a new state at chart[1]
|
@@ -117,7 +133,7 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
117
133
|
expect(new_state.origin).to eq(0)
|
118
134
|
end
|
119
135
|
end # context
|
120
|
-
|
136
|
+
=begin
|
121
137
|
context 'Parse tree building:' do
|
122
138
|
let(:sample_grammar1) do
|
123
139
|
builder = grammar_abc_builder
|
@@ -385,6 +401,7 @@ SNIPPET
|
|
385
401
|
expect(actual).to eq(expected_text.chomp)
|
386
402
|
end
|
387
403
|
end # context
|
404
|
+
=end
|
388
405
|
end # describe
|
389
406
|
end # module
|
390
407
|
end # module
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rley
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.04
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dimitri Geshef
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-03-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -80,7 +80,7 @@ dependencies:
|
|
80
80
|
- - ! '>='
|
81
81
|
- !ruby/object:Gem::Version
|
82
82
|
version: 2.0.0
|
83
|
-
description: A
|
83
|
+
description: A general parser using the Earley algorithm.
|
84
84
|
email: famished.tiger@yahoo.com
|
85
85
|
executables: []
|
86
86
|
extensions: []
|
@@ -104,8 +104,11 @@ files:
|
|
104
104
|
- examples/parsers/parsing_abc.rb
|
105
105
|
- examples/parsers/parsing_ambig.rb
|
106
106
|
- examples/parsers/parsing_b_expr.rb
|
107
|
+
- examples/parsers/parsing_err_expr.rb
|
108
|
+
- examples/parsers/parsing_groucho.rb
|
107
109
|
- examples/parsers/parsing_L0.rb
|
108
110
|
- examples/parsers/parsing_L1.rb
|
111
|
+
- examples/parsers/parsing_tricky.rb
|
109
112
|
- examples/recognizers/recognizer_abc.rb
|
110
113
|
- lib/rley.rb
|
111
114
|
- lib/rley/constants.rb
|
@@ -117,6 +120,7 @@ files:
|
|
117
120
|
- lib/rley/parser/earley_parser.rb
|
118
121
|
- lib/rley/parser/parse_state.rb
|
119
122
|
- lib/rley/parser/parse_state_tracker.rb
|
123
|
+
- lib/rley/parser/parse_tracer.rb
|
120
124
|
- lib/rley/parser/parse_tree_builder.rb
|
121
125
|
- lib/rley/parser/parsing.rb
|
122
126
|
- lib/rley/parser/state_set.rb
|
@@ -142,6 +146,7 @@ files:
|
|
142
146
|
- spec/rley/parser/dotted_item_spec.rb
|
143
147
|
- spec/rley/parser/earley_parser_spec.rb
|
144
148
|
- spec/rley/parser/parse_state_spec.rb
|
149
|
+
- spec/rley/parser/parse_tracer_spec.rb
|
145
150
|
- spec/rley/parser/parse_tree_builder_spec.rb
|
146
151
|
- spec/rley/parser/parsing_spec.rb
|
147
152
|
- spec/rley/parser/state_set_spec.rb
|
@@ -203,6 +208,7 @@ test_files:
|
|
203
208
|
- spec/rley/parser/dotted_item_spec.rb
|
204
209
|
- spec/rley/parser/earley_parser_spec.rb
|
205
210
|
- spec/rley/parser/parse_state_spec.rb
|
211
|
+
- spec/rley/parser/parse_tracer_spec.rb
|
206
212
|
- spec/rley/parser/parse_tree_builder_spec.rb
|
207
213
|
- spec/rley/parser/parsing_spec.rb
|
208
214
|
- spec/rley/parser/state_set_spec.rb
|